From 61eb46e6dbb4e68a6166ac02a0fd5ee1c552fdf2 Mon Sep 17 00:00:00 2001 From: "Rebecca N. Palmer" Date: Tue, 25 Aug 2020 20:07:50 +0100 Subject: [PATCH] Import pandas_1.0.5+dfsg.orig.tar.xz [dgit import orig pandas_1.0.5+dfsg.orig.tar.xz] --- .gitattributes | 16 + .github/CODE_OF_CONDUCT.md | 63 + .github/CONTRIBUTING.md | 23 + .github/FUNDING.yml | 3 + .github/ISSUE_TEMPLATE.md | 29 + .github/PULL_REQUEST_TEMPLATE.md | 5 + .github/SECURITY.md | 1 + .github/workflows/assign.yml | 15 + .github/workflows/ci.yml | 159 + .gitignore | 120 + .pep8speaks.yml | 4 + .pre-commit-config.yaml | 30 + .travis.yml | 91 + AUTHORS.md | 57 + LICENSE | 29 + LICENSES/DATEUTIL_LICENSE | 54 + LICENSES/HAVEN_LICENSE | 2 + LICENSES/HAVEN_MIT | 32 + LICENSES/MUSL_LICENSE | 132 + LICENSES/NUMPY_LICENSE | 30 + LICENSES/OTHER | 80 + LICENSES/PSF_LICENSE | 279 + LICENSES/SAS7BDAT_LICENSE | 19 + LICENSES/SCIPY_LICENSE | 31 + LICENSES/ULTRAJSON_LICENSE | 34 + LICENSES/XARRAY_LICENSE | 191 + MANIFEST.in | 45 + Makefile | 27 + README.md | 237 + RELEASE.md | 6 + asv_bench/asv.conf.json | 129 + asv_bench/benchmarks/__init__.py | 1 + asv_bench/benchmarks/algorithms.py | 187 + asv_bench/benchmarks/array.py | 23 + asv_bench/benchmarks/attrs_caching.py | 36 + asv_bench/benchmarks/binary_ops.py | 191 + asv_bench/benchmarks/boolean.py | 32 + asv_bench/benchmarks/categoricals.py | 312 + asv_bench/benchmarks/ctors.py | 118 + asv_bench/benchmarks/dtypes.py | 65 + asv_bench/benchmarks/eval.py | 66 + asv_bench/benchmarks/frame_ctor.py | 121 + asv_bench/benchmarks/frame_methods.py | 635 + asv_bench/benchmarks/gil.py | 304 + asv_bench/benchmarks/groupby.py | 629 + .../benchmarks/index_cached_properties.py | 75 + asv_bench/benchmarks/index_object.py | 262 + asv_bench/benchmarks/indexing.py | 348 + asv_bench/benchmarks/indexing_engines.py | 71 + asv_bench/benchmarks/inference.py | 123 + asv_bench/benchmarks/io/__init__.py | 0 asv_bench/benchmarks/io/csv.py | 407 + asv_bench/benchmarks/io/excel.py | 73 + asv_bench/benchmarks/io/hdf.py | 130 + asv_bench/benchmarks/io/json.py | 241 + asv_bench/benchmarks/io/parsers.py | 42 + asv_bench/benchmarks/io/pickle.py | 28 + asv_bench/benchmarks/io/sas.py | 30 + asv_bench/benchmarks/io/sql.py | 146 + asv_bench/benchmarks/io/stata.py | 53 + asv_bench/benchmarks/join_merge.py | 386 + asv_bench/benchmarks/multiindex_object.py | 163 + asv_bench/benchmarks/offset.py | 80 + asv_bench/benchmarks/package.py | 25 + asv_bench/benchmarks/pandas_vb_common.py | 80 + asv_bench/benchmarks/period.py | 104 + asv_bench/benchmarks/plotting.py | 97 + asv_bench/benchmarks/reindex.py | 163 + asv_bench/benchmarks/replace.py | 77 + asv_bench/benchmarks/reshape.py | 266 + asv_bench/benchmarks/rolling.py | 146 + asv_bench/benchmarks/series_methods.py | 280 + asv_bench/benchmarks/sparse.py | 139 + asv_bench/benchmarks/stat_ops.py | 138 + asv_bench/benchmarks/strings.py | 186 + asv_bench/benchmarks/timedelta.py | 104 + asv_bench/benchmarks/timeseries.py | 431 + asv_bench/benchmarks/tslibs/__init__.py | 7 + asv_bench/benchmarks/tslibs/offsets.py | 90 + asv_bench/benchmarks/tslibs/period.py | 70 + asv_bench/benchmarks/tslibs/timedelta.py | 61 + asv_bench/benchmarks/tslibs/timestamp.py | 137 + azure-pipelines.yml | 92 + ci/azure/posix.yml | 96 + ci/azure/windows.yml | 57 + ci/check_cache.sh | 27 + ci/check_git_tags.sh | 28 + ci/code_checks.sh | 342 + ci/deps/azure-36-32bit.yaml | 26 + ci/deps/azure-36-locale.yaml | 40 + ci/deps/azure-36-locale_slow.yaml | 32 + ci/deps/azure-36-minimum_versions.yaml | 31 + ci/deps/azure-37-locale.yaml | 39 + ci/deps/azure-37-numpydev.yaml | 22 + ci/deps/azure-macos-36.yaml | 36 + ci/deps/azure-windows-36.yaml | 32 + ci/deps/azure-windows-37.yaml | 40 + ci/deps/travis-36-cov.yaml | 54 + ci/deps/travis-36-locale.yaml | 42 + ci/deps/travis-36-slow.yaml | 34 + ci/deps/travis-37.yaml | 26 + ci/deps/travis-38.yaml | 20 + ci/prep_cython_cache.sh | 74 + ci/print_skipped.py | 38 + ci/run_tests.sh | 32 + ci/setup_env.sh | 157 + ci/submit_cython_cache.sh | 29 + ci/travis_encrypt_gbq.sh | 34 + ci/travis_gbq.json.enc | Bin 0 -> 2352 bytes ci/travis_gbq_config.txt | 2 + ci/travis_process_gbq_encryption.sh | 13 + codecov.yml | 13 + conda.recipe/bld.bat | 2 + conda.recipe/build.sh | 2 + conda.recipe/meta.yaml | 40 + doc/.gitignore | 4 + doc/README.rst | 1 + doc/_templates/api_redirect.html | 10 + doc/_templates/autosummary/accessor.rst | 6 + .../autosummary/accessor_attribute.rst | 6 + .../autosummary/accessor_callable.rst | 6 + .../autosummary/accessor_method.rst | 6 + doc/_templates/autosummary/class.rst | 33 + .../autosummary/class_without_autosummary.rst | 6 + doc/cheatsheet/Pandas_Cheat_Sheet.pdf | Bin 0 -> 345905 bytes doc/cheatsheet/Pandas_Cheat_Sheet.pptx | Bin 0 -> 105278 bytes doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf | Bin 0 -> 420632 bytes doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx | Bin 0 -> 82563 bytes doc/cheatsheet/README.txt | 8 + doc/data/air_quality_long.csv | 5273 +++++++ doc/data/air_quality_no2.csv | 1036 ++ doc/data/air_quality_no2_long.csv | 2069 +++ doc/data/air_quality_parameters.csv | 8 + doc/data/air_quality_pm25_long.csv | 1111 ++ doc/data/air_quality_stations.csv | 67 + doc/data/baseball.csv | 101 + doc/data/fx_prices | Bin 0 -> 16177 bytes doc/data/iris.data | 151 + doc/data/mindex_ex.csv | 16 + doc/data/test.xls | Bin 0 -> 30720 bytes doc/data/tips.csv | 245 + doc/data/titanic.csv | 892 ++ doc/make.py | 361 + doc/redirects.csv | 1403 ++ doc/source/_static/banklist.html | 4885 +++++++ doc/source/_static/ci.png | Bin 0 -> 35295 bytes doc/source/_static/css/getting_started.css | 251 + doc/source/_static/css/pandas.css | 36 + doc/source/_static/df_repr_truncated.png | Bin 0 -> 8040 bytes doc/source/_static/eval-perf-small.png | Bin 0 -> 25314 bytes doc/source/_static/eval-perf.png | Bin 0 -> 18603 bytes doc/source/_static/index_api.svg | 97 + doc/source/_static/index_contribute.svg | 76 + doc/source/_static/index_getting_started.svg | 66 + doc/source/_static/index_user_guide.svg | 67 + doc/source/_static/legacy_0.10.h5 | Bin 0 -> 238321 bytes doc/source/_static/logo_r.svg | 14 + doc/source/_static/logo_sas.svg | 9 + doc/source/_static/logo_sql.svg | 73 + doc/source/_static/logo_stata.svg | 17 + doc/source/_static/new-excel-index.png | Bin 0 -> 11506 bytes doc/source/_static/old-excel-index.png | Bin 0 -> 11570 bytes doc/source/_static/option_unicode01.png | Bin 0 -> 10736 bytes doc/source/_static/option_unicode02.png | Bin 0 -> 7878 bytes doc/source/_static/option_unicode03.png | Bin 0 -> 5342 bytes doc/source/_static/option_unicode04.png | Bin 0 -> 5481 bytes doc/source/_static/print_df_new.png | Bin 0 -> 77202 bytes doc/source/_static/print_df_old.png | Bin 0 -> 89239 bytes doc/source/_static/query-perf-small.png | Bin 0 -> 21731 bytes doc/source/_static/query-perf.png | Bin 0 -> 20351 bytes doc/source/_static/question_mark_noback.svg | 72 + doc/source/_static/reshaping_melt.png | Bin 0 -> 52900 bytes doc/source/_static/reshaping_pivot.png | Bin 0 -> 52132 bytes doc/source/_static/reshaping_stack.png | Bin 0 -> 54479 bytes doc/source/_static/reshaping_unstack.png | Bin 0 -> 53895 bytes doc/source/_static/reshaping_unstack_0.png | Bin 0 -> 58533 bytes doc/source/_static/reshaping_unstack_1.png | Bin 0 -> 57978 bytes .../_static/schemas/01_table_dataframe.svg | 262 + .../_static/schemas/01_table_series.svg | 127 + .../_static/schemas/01_table_spreadsheet.png | Bin 0 -> 46286 bytes .../_static/schemas/02_io_readwrite.svg | 1401 ++ .../_static/schemas/03_subset_columns.svg | 327 + .../schemas/03_subset_columns_rows.svg | 272 + doc/source/_static/schemas/03_subset_rows.svg | 316 + .../_static/schemas/04_plot_overview.svg | 6443 +++++++++ doc/source/_static/schemas/05_newcolumn_1.svg | 347 + doc/source/_static/schemas/05_newcolumn_2.svg | 347 + doc/source/_static/schemas/05_newcolumn_3.svg | 352 + doc/source/_static/schemas/06_aggregate.svg | 211 + doc/source/_static/schemas/06_groupby.svg | 307 + .../_static/schemas/06_groupby_agg_detail.svg | 619 + .../schemas/06_groupby_select_detail.svg | 697 + doc/source/_static/schemas/06_reduction.svg | 222 + doc/source/_static/schemas/06_valuecounts.svg | 269 + doc/source/_static/schemas/07_melt.svg | 315 + doc/source/_static/schemas/07_pivot.svg | 338 + doc/source/_static/schemas/07_pivot_table.svg | 455 + .../_static/schemas/08_concat_column.svg | 465 + doc/source/_static/schemas/08_concat_row.svg | 392 + doc/source/_static/schemas/08_merge_left.svg | 608 + doc/source/_static/stub | 0 doc/source/_static/style-excel.png | Bin 0 -> 58167 bytes doc/source/_static/trunc_after.png | Bin 0 -> 29195 bytes doc/source/_static/trunc_before.png | Bin 0 -> 50913 bytes doc/source/_static/whatsnew_assign.png | Bin 0 -> 13462 bytes .../_static/whatsnew_plot_submethods.png | Bin 0 -> 5579 bytes doc/source/conf.py | 720 + doc/source/development/code_style.rst | 129 + doc/source/development/contributing.rst | 1506 ++ .../development/contributing_docstring.rst | 1006 ++ doc/source/development/developer.rst | 185 + doc/source/development/extending.rst | 504 + doc/source/development/index.rst | 23 + doc/source/development/internals.rst | 108 + doc/source/development/maintaining.rst | 193 + doc/source/development/meeting.rst | 32 + doc/source/development/policies.rst | 57 + doc/source/development/roadmap.rst | 193 + doc/source/ecosystem.rst | 395 + doc/source/getting_started/10min.rst | 813 ++ doc/source/getting_started/basics.rst | 2364 ++++ .../comparison/comparison_with_r.rst | 553 + .../comparison/comparison_with_sas.rst | 755 + .../comparison/comparison_with_sql.rst | 493 + .../comparison/comparison_with_stata.rst | 678 + .../getting_started/comparison/index.rst | 15 + doc/source/getting_started/dsintro.rst | 840 ++ doc/source/getting_started/index.rst | 672 + doc/source/getting_started/install.rst | 308 + .../intro_tutorials/01_table_oriented.rst | 218 + .../intro_tutorials/02_read_write.rst | 232 + .../intro_tutorials/03_subset_data.rst | 405 + .../intro_tutorials/04_plotting.rst | 252 + .../intro_tutorials/05_add_columns.rst | 186 + .../06_calculate_statistics.rst | 310 + .../07_reshape_table_layout.rst | 404 + .../intro_tutorials/08_combine_dataframes.rst | 326 + .../intro_tutorials/09_timeseries.rst | 389 + .../intro_tutorials/10_text_data.rst | 278 + .../getting_started/intro_tutorials/index.rst | 22 + doc/source/getting_started/overview.rst | 177 + doc/source/getting_started/tutorials.rst | 109 + doc/source/index.rst.template | 131 + doc/source/reference/arrays.rst | 522 + doc/source/reference/extensions.rst | 74 + doc/source/reference/frame.rst | 368 + doc/source/reference/general_functions.rst | 87 + .../reference/general_utility_functions.rst | 110 + doc/source/reference/groupby.rst | 137 + doc/source/reference/index.rst | 72 + doc/source/reference/indexing.rst | 490 + doc/source/reference/io.rst | 152 + doc/source/reference/offset_frequency.rst | 1522 +++ doc/source/reference/panel.rst | 10 + doc/source/reference/plotting.rst | 26 + doc/source/reference/resampling.rst | 66 + doc/source/reference/series.rst | 584 + doc/source/reference/style.rst | 67 + doc/source/reference/window.rst | 87 + doc/source/styled.xlsx | Bin 0 -> 5682 bytes .../themes/nature_with_gtoc/layout.html | 108 + .../nature_with_gtoc/static/nature.css_t | 356 + doc/source/themes/nature_with_gtoc/theme.conf | 7 + doc/source/user_guide/advanced.rst | 1226 ++ doc/source/user_guide/boolean.rst | 102 + doc/source/user_guide/categorical.rst | 1179 ++ doc/source/user_guide/computation.rst | 1067 ++ doc/source/user_guide/enhancingperf.rst | 828 ++ doc/source/user_guide/gotchas.rst | 343 + doc/source/user_guide/groupby.rst | 1462 ++ doc/source/user_guide/index.rst | 45 + doc/source/user_guide/indexing.rst | 1902 +++ doc/source/user_guide/integer_na.rst | 153 + doc/source/user_guide/io.rst | 5737 ++++++++ doc/source/user_guide/merging.rst | 1462 ++ doc/source/user_guide/missing_data.rst | 971 ++ doc/source/user_guide/options.rst | 578 + doc/source/user_guide/reshaping.rst | 849 ++ doc/source/user_guide/scale.rst | 375 + doc/source/user_guide/sparse.rst | 367 + doc/source/user_guide/style.ipynb | 1244 ++ doc/source/user_guide/templates/myhtml.tpl | 5 + .../templates/template_structure.html | 57 + doc/source/user_guide/text.rst | 747 + doc/source/user_guide/timedeltas.rst | 481 + doc/source/user_guide/timeseries.rst | 2436 ++++ doc/source/user_guide/visualization.rst | 1643 +++ doc/source/whatsnew/index.rst | 229 + doc/source/whatsnew/v0.10.0.rst | 531 + doc/source/whatsnew/v0.10.1.rst | 262 + doc/source/whatsnew/v0.11.0.rst | 462 + doc/source/whatsnew/v0.12.0.rst | 518 + doc/source/whatsnew/v0.13.0.rst | 1335 ++ doc/source/whatsnew/v0.13.1.rst | 477 + doc/source/whatsnew/v0.14.0.rst | 1087 ++ doc/source/whatsnew/v0.14.1.rst | 282 + doc/source/whatsnew/v0.15.0.rst | 1242 ++ doc/source/whatsnew/v0.15.1.rst | 318 + doc/source/whatsnew/v0.15.2.rst | 257 + doc/source/whatsnew/v0.16.0.rst | 690 + doc/source/whatsnew/v0.16.1.rst | 484 + doc/source/whatsnew/v0.16.2.rst | 178 + doc/source/whatsnew/v0.17.0.rst | 1177 ++ doc/source/whatsnew/v0.17.1.rst | 215 + doc/source/whatsnew/v0.18.0.rst | 1304 ++ doc/source/whatsnew/v0.18.1.rst | 713 + doc/source/whatsnew/v0.19.0.rst | 1583 +++ doc/source/whatsnew/v0.19.1.rst | 77 + doc/source/whatsnew/v0.19.2.rst | 98 + doc/source/whatsnew/v0.20.0.rst | 1780 +++ doc/source/whatsnew/v0.20.2.rst | 143 + doc/source/whatsnew/v0.20.3.rst | 76 + doc/source/whatsnew/v0.21.0.rst | 1198 ++ doc/source/whatsnew/v0.21.1.rst | 187 + doc/source/whatsnew/v0.22.0.rst | 262 + doc/source/whatsnew/v0.23.0.rst | 1480 ++ doc/source/whatsnew/v0.23.1.rst | 151 + doc/source/whatsnew/v0.23.2.rst | 123 + doc/source/whatsnew/v0.23.3.rst | 16 + doc/source/whatsnew/v0.23.4.rst | 47 + doc/source/whatsnew/v0.24.0.rst | 1936 +++ doc/source/whatsnew/v0.24.1.rst | 94 + doc/source/whatsnew/v0.24.2.rst | 108 + doc/source/whatsnew/v0.25.0.rst | 1272 ++ doc/source/whatsnew/v0.25.1.rst | 119 + doc/source/whatsnew/v0.25.2.rst | 49 + doc/source/whatsnew/v0.25.3.rst | 22 + doc/source/whatsnew/v0.4.x.rst | 69 + doc/source/whatsnew/v0.5.0.rst | 59 + doc/source/whatsnew/v0.6.0.rst | 72 + doc/source/whatsnew/v0.6.1.rst | 58 + doc/source/whatsnew/v0.7.0.rst | 311 + doc/source/whatsnew/v0.7.1.rst | 41 + doc/source/whatsnew/v0.7.2.rst | 38 + doc/source/whatsnew/v0.7.3.rst | 101 + doc/source/whatsnew/v0.8.0.rst | 281 + doc/source/whatsnew/v0.8.1.rst | 47 + doc/source/whatsnew/v0.9.0.rst | 107 + doc/source/whatsnew/v0.9.1.rst | 170 + doc/source/whatsnew/v1.0.0.rst | 1294 ++ doc/source/whatsnew/v1.0.1.rst | 79 + doc/source/whatsnew/v1.0.2.rst | 126 + doc/source/whatsnew/v1.0.3.rst | 29 + doc/source/whatsnew/v1.0.4.rst | 48 + doc/source/whatsnew/v1.0.5.rst | 39 + .../whatsnew/whatsnew_0171_html_table.html | 872 ++ doc/sphinxext/README.rst | 17 + doc/sphinxext/announce.py | 162 + doc/sphinxext/contributors.py | 57 + environment.yml | 109 + pandas/__init__.py | 407 + pandas/_config/__init__.py | 28 + pandas/_config/config.py | 869 ++ pandas/_config/dates.py | 23 + pandas/_config/display.py | 59 + pandas/_config/localization.py | 166 + pandas/_libs/__init__.py | 11 + pandas/_libs/algos.pxd | 21 + pandas/_libs/algos.pyx | 1237 ++ pandas/_libs/algos_common_helper.pxi.in | 70 + pandas/_libs/algos_take_helper.pxi.in | 250 + pandas/_libs/groupby.pyx | 1511 ++ pandas/_libs/hashing.pyx | 189 + pandas/_libs/hashtable.pxd | 54 + pandas/_libs/hashtable.pyx | 173 + pandas/_libs/hashtable_class_helper.pxi.in | 1128 ++ pandas/_libs/hashtable_func_helper.pxi.in | 341 + pandas/_libs/index.pyx | 731 + pandas/_libs/index_class_helper.pxi.in | 83 + pandas/_libs/indexing.pyx | 23 + pandas/_libs/internals.pyx | 449 + pandas/_libs/interval.pyx | 531 + pandas/_libs/intervaltree.pxi.in | 411 + pandas/_libs/join.pyx | 975 ++ pandas/_libs/khash.pxd | 152 + pandas/_libs/lib.pxd | 1 + pandas/_libs/lib.pyx | 2475 ++++ pandas/_libs/missing.pxd | 16 + pandas/_libs/missing.pyx | 488 + pandas/_libs/ops.pyx | 292 + pandas/_libs/ops_dispatch.pyx | 94 + pandas/_libs/parsers.pyx | 2218 +++ pandas/_libs/properties.pyx | 67 + pandas/_libs/reduction.pyx | 620 + pandas/_libs/reshape.pyx | 139 + pandas/_libs/sparse.pyx | 796 ++ pandas/_libs/sparse_op_helper.pxi.in | 309 + pandas/_libs/src/headers/cmath | 36 + pandas/_libs/src/headers/ms_inttypes.h | 305 + pandas/_libs/src/headers/ms_stdint.h | 247 + pandas/_libs/src/headers/portable.h | 16 + pandas/_libs/src/headers/stdint.h | 10 + pandas/_libs/src/inline_helper.h | 25 + pandas/_libs/src/klib/khash.h | 569 + pandas/_libs/src/klib/khash_python.h | 124 + pandas/_libs/src/parse_helper.h | 95 + pandas/_libs/src/parser/io.c | 312 + pandas/_libs/src/parser/io.h | 69 + pandas/_libs/src/parser/tokenizer.c | 2008 +++ pandas/_libs/src/parser/tokenizer.h | 232 + pandas/_libs/src/skiplist.h | 279 + pandas/_libs/src/ujson/lib/ultrajson.h | 318 + pandas/_libs/src/ujson/lib/ultrajsondec.c | 1202 ++ pandas/_libs/src/ujson/lib/ultrajsonenc.c | 1173 ++ pandas/_libs/src/ujson/python/JSONtoObj.c | 596 + pandas/_libs/src/ujson/python/objToJSON.c | 2464 ++++ pandas/_libs/src/ujson/python/ujson.c | 79 + pandas/_libs/src/ujson/python/version.h | 43 + pandas/_libs/testing.pyx | 217 + pandas/_libs/tslib.pyx | 856 ++ pandas/_libs/tslibs/__init__.py | 12 + pandas/_libs/tslibs/c_timestamp.pxd | 19 + pandas/_libs/tslibs/c_timestamp.pyx | 408 + pandas/_libs/tslibs/ccalendar.pxd | 12 + pandas/_libs/tslibs/ccalendar.pyx | 230 + pandas/_libs/tslibs/conversion.pxd | 30 + pandas/_libs/tslibs/conversion.pyx | 855 ++ pandas/_libs/tslibs/fields.pyx | 667 + pandas/_libs/tslibs/frequencies.pxd | 9 + pandas/_libs/tslibs/frequencies.pyx | 515 + pandas/_libs/tslibs/nattype.pxd | 20 + pandas/_libs/tslibs/nattype.pyx | 800 ++ pandas/_libs/tslibs/np_datetime.pxd | 77 + pandas/_libs/tslibs/np_datetime.pyx | 179 + pandas/_libs/tslibs/offsets.pxd | 3 + pandas/_libs/tslibs/offsets.pyx | 1133 ++ pandas/_libs/tslibs/parsing.pyx | 986 ++ pandas/_libs/tslibs/period.pyx | 2554 ++++ pandas/_libs/tslibs/resolution.pyx | 353 + .../_libs/tslibs/src/datetime/np_datetime.c | 768 ++ .../_libs/tslibs/src/datetime/np_datetime.h | 80 + .../tslibs/src/datetime/np_datetime_strings.c | 907 ++ .../tslibs/src/datetime/np_datetime_strings.h | 82 + pandas/_libs/tslibs/strptime.pyx | 754 + pandas/_libs/tslibs/timedeltas.pxd | 8 + pandas/_libs/tslibs/timedeltas.pyx | 1555 +++ pandas/_libs/tslibs/timestamps.pxd | 8 + pandas/_libs/tslibs/timestamps.pyx | 1040 ++ pandas/_libs/tslibs/timezones.pxd | 16 + pandas/_libs/tslibs/timezones.pyx | 358 + pandas/_libs/tslibs/tzconversion.pxd | 7 + pandas/_libs/tslibs/tzconversion.pyx | 541 + pandas/_libs/tslibs/util.pxd | 249 + pandas/_libs/util.pxd | 99 + pandas/_libs/window/__init__.py | 0 pandas/_libs/window/aggregations.pyx | 1951 +++ pandas/_libs/window/indexers.pyx | 109 + pandas/_libs/writers.pyx | 167 + pandas/_testing.py | 2755 ++++ pandas/_typing.py | 73 + pandas/_version.py | 479 + pandas/api/__init__.py | 2 + pandas/api/extensions/__init__.py | 27 + pandas/api/indexers/__init__.py | 8 + pandas/api/types/__init__.py | 23 + pandas/arrays/__init__.py | 30 + pandas/compat/__init__.py | 140 + pandas/compat/_optional.py | 111 + pandas/compat/chainmap.py | 33 + pandas/compat/numpy/__init__.py | 77 + pandas/compat/numpy/function.py | 405 + pandas/compat/pickle_compat.py | 243 + pandas/conftest.py | 934 ++ pandas/core/__init__.py | 0 pandas/core/accessor.py | 316 + pandas/core/algorithms.py | 2067 +++ pandas/core/api.py | 57 + pandas/core/apply.py | 466 + pandas/core/arrays/__init__.py | 36 + pandas/core/arrays/_arrow_utils.py | 124 + pandas/core/arrays/_ranges.py | 190 + pandas/core/arrays/base.py | 1246 ++ pandas/core/arrays/boolean.py | 791 ++ pandas/core/arrays/categorical.py | 2712 ++++ pandas/core/arrays/datetimelike.py | 1714 +++ pandas/core/arrays/datetimes.py | 2156 +++ pandas/core/arrays/integer.py | 811 ++ pandas/core/arrays/interval.py | 1361 ++ pandas/core/arrays/masked.py | 251 + pandas/core/arrays/numpy_.py | 462 + pandas/core/arrays/period.py | 1054 ++ pandas/core/arrays/sparse/__init__.py | 10 + pandas/core/arrays/sparse/accessor.py | 328 + pandas/core/arrays/sparse/array.py | 1552 +++ pandas/core/arrays/sparse/dtype.py | 348 + pandas/core/arrays/sparse/scipy_sparse.py | 146 + pandas/core/arrays/string_.py | 323 + pandas/core/arrays/timedeltas.py | 1090 ++ pandas/core/base.py | 1498 ++ pandas/core/common.py | 480 + pandas/core/computation/__init__.py | 0 pandas/core/computation/align.py | 192 + pandas/core/computation/api.py | 3 + pandas/core/computation/check.py | 10 + pandas/core/computation/common.py | 30 + pandas/core/computation/engines.py | 136 + pandas/core/computation/eval.py | 390 + pandas/core/computation/expr.py | 791 ++ pandas/core/computation/expressions.py | 252 + pandas/core/computation/ops.py | 601 + pandas/core/computation/parsing.py | 190 + pandas/core/computation/pytables.py | 624 + pandas/core/computation/scope.py | 314 + pandas/core/config_init.py | 655 + pandas/core/construction.py | 626 + pandas/core/dtypes/__init__.py | 0 pandas/core/dtypes/api.py | 45 + pandas/core/dtypes/base.py | 317 + pandas/core/dtypes/cast.py | 1592 +++ pandas/core/dtypes/common.py | 1891 +++ pandas/core/dtypes/concat.py | 481 + pandas/core/dtypes/dtypes.py | 1161 ++ pandas/core/dtypes/generic.py | 84 + pandas/core/dtypes/inference.py | 424 + pandas/core/dtypes/missing.py | 617 + pandas/core/frame.py | 8480 ++++++++++++ pandas/core/generic.py | 11368 ++++++++++++++++ pandas/core/groupby/__init__.py | 11 + pandas/core/groupby/base.py | 188 + pandas/core/groupby/categorical.py | 99 + pandas/core/groupby/generic.py | 2090 +++ pandas/core/groupby/groupby.py | 2571 ++++ pandas/core/groupby/grouper.py | 660 + pandas/core/groupby/ops.py | 947 ++ pandas/core/index.py | 31 + pandas/core/indexers.py | 414 + pandas/core/indexes/__init__.py | 0 pandas/core/indexes/accessors.py | 338 + pandas/core/indexes/api.py | 301 + pandas/core/indexes/base.py | 5576 ++++++++ pandas/core/indexes/category.py | 884 ++ pandas/core/indexes/datetimelike.py | 908 ++ pandas/core/indexes/datetimes.py | 1295 ++ pandas/core/indexes/extension.py | 295 + pandas/core/indexes/frozen.py | 107 + pandas/core/indexes/interval.py | 1383 ++ pandas/core/indexes/multi.py | 3519 +++++ pandas/core/indexes/numeric.py | 531 + pandas/core/indexes/period.py | 915 ++ pandas/core/indexes/range.py | 811 ++ pandas/core/indexes/timedeltas.py | 509 + pandas/core/indexing.py | 2489 ++++ pandas/core/internals/__init__.py | 47 + pandas/core/internals/blocks.py | 3219 +++++ pandas/core/internals/concat.py | 493 + pandas/core/internals/construction.py | 626 + pandas/core/internals/managers.py | 2027 +++ pandas/core/missing.py | 684 + pandas/core/nanops.py | 1424 ++ pandas/core/ops/__init__.py | 857 ++ pandas/core/ops/array_ops.py | 393 + pandas/core/ops/common.py | 66 + pandas/core/ops/dispatch.py | 126 + pandas/core/ops/docstrings.py | 675 + pandas/core/ops/invalid.py | 56 + pandas/core/ops/mask_ops.py | 178 + pandas/core/ops/methods.py | 233 + pandas/core/ops/missing.py | 179 + pandas/core/ops/roperator.py | 60 + pandas/core/resample.py | 1799 +++ pandas/core/reshape/__init__.py | 0 pandas/core/reshape/api.py | 8 + pandas/core/reshape/concat.py | 701 + pandas/core/reshape/melt.py | 474 + pandas/core/reshape/merge.py | 2052 +++ pandas/core/reshape/pivot.py | 707 + pandas/core/reshape/reshape.py | 1080 ++ pandas/core/reshape/tile.py | 599 + pandas/core/reshape/util.py | 59 + pandas/core/series.py | 4576 +++++++ pandas/core/sorting.py | 411 + pandas/core/sparse/__init__.py | 0 pandas/core/sparse/api.py | 3 + pandas/core/strings.py | 3568 +++++ pandas/core/tools/__init__.py | 0 pandas/core/tools/datetimes.py | 1053 ++ pandas/core/tools/numeric.py | 194 + pandas/core/tools/timedeltas.py | 157 + pandas/core/util/__init__.py | 0 pandas/core/util/hashing.py | 359 + pandas/core/window/__init__.py | 3 + pandas/core/window/common.py | 326 + pandas/core/window/ewm.py | 403 + pandas/core/window/expanding.py | 259 + pandas/core/window/indexers.py | 122 + pandas/core/window/numba_.py | 133 + pandas/core/window/rolling.py | 2123 +++ pandas/errors/__init__.py | 184 + pandas/io/__init__.py | 0 pandas/io/api.py | 21 + pandas/io/clipboard/__init__.py | 667 + pandas/io/clipboards.py | 138 + pandas/io/common.py | 528 + pandas/io/date_converters.py | 64 + pandas/io/excel/__init__.py | 16 + pandas/io/excel/_base.py | 915 ++ pandas/io/excel/_odfreader.py | 181 + pandas/io/excel/_openpyxl.py | 542 + pandas/io/excel/_pyxlsb.py | 68 + pandas/io/excel/_util.py | 229 + pandas/io/excel/_xlrd.py | 106 + pandas/io/excel/_xlsxwriter.py | 237 + pandas/io/excel/_xlwt.py | 138 + pandas/io/feather_format.py | 103 + pandas/io/formats/__init__.py | 0 pandas/io/formats/console.py | 91 + pandas/io/formats/css.py | 264 + pandas/io/formats/csvs.py | 356 + pandas/io/formats/excel.py | 738 + pandas/io/formats/format.py | 1992 +++ pandas/io/formats/html.py | 614 + pandas/io/formats/latex.py | 377 + pandas/io/formats/printing.py | 530 + pandas/io/formats/style.py | 1528 +++ pandas/io/formats/templates/html.tpl | 70 + pandas/io/gbq.py | 220 + pandas/io/gcs.py | 18 + pandas/io/html.py | 1101 ++ pandas/io/json/__init__.py | 13 + pandas/io/json/_json.py | 1180 ++ pandas/io/json/_normalize.py | 363 + pandas/io/json/_table_schema.py | 338 + pandas/io/orc.py | 57 + pandas/io/parquet.py | 312 + pandas/io/parsers.py | 3671 +++++ pandas/io/pickle.py | 199 + pandas/io/pytables.py | 5123 +++++++ pandas/io/s3.py | 49 + pandas/io/sas/__init__.py | 1 + pandas/io/sas/sas.pyx | 444 + pandas/io/sas/sas7bdat.py | 732 + pandas/io/sas/sas_constants.py | 253 + pandas/io/sas/sas_xport.py | 507 + pandas/io/sas/sasreader.py | 86 + pandas/io/spss.py | 45 + pandas/io/sql.py | 1782 +++ pandas/io/stata.py | 3333 +++++ pandas/plotting/__init__.py | 98 + pandas/plotting/_core.py | 1678 +++ pandas/plotting/_matplotlib/__init__.py | 83 + pandas/plotting/_matplotlib/boxplot.py | 436 + pandas/plotting/_matplotlib/compat.py | 23 + pandas/plotting/_matplotlib/converter.py | 1132 ++ pandas/plotting/_matplotlib/core.py | 1517 +++ pandas/plotting/_matplotlib/hist.py | 413 + pandas/plotting/_matplotlib/misc.py | 431 + pandas/plotting/_matplotlib/style.py | 92 + pandas/plotting/_matplotlib/timeseries.py | 311 + pandas/plotting/_matplotlib/tools.py | 378 + pandas/plotting/_misc.py | 487 + pandas/testing.py | 17 + pandas/tests/__init__.py | 0 pandas/tests/api/__init__.py | 0 pandas/tests/api/test_api.py | 332 + pandas/tests/api/test_types.py | 64 + pandas/tests/arithmetic/__init__.py | 0 pandas/tests/arithmetic/common.py | 89 + pandas/tests/arithmetic/conftest.py | 248 + pandas/tests/arithmetic/test_datetime64.py | 2397 ++++ pandas/tests/arithmetic/test_interval.py | 273 + pandas/tests/arithmetic/test_numeric.py | 1291 ++ pandas/tests/arithmetic/test_object.py | 365 + pandas/tests/arithmetic/test_period.py | 1450 ++ pandas/tests/arithmetic/test_timedelta64.py | 2176 +++ pandas/tests/arrays/__init__.py | 0 pandas/tests/arrays/categorical/__init__.py | 0 pandas/tests/arrays/categorical/common.py | 8 + pandas/tests/arrays/categorical/conftest.py | 7 + pandas/tests/arrays/categorical/test_algos.py | 200 + .../arrays/categorical/test_analytics.py | 380 + pandas/tests/arrays/categorical/test_api.py | 511 + .../arrays/categorical/test_constructors.py | 644 + .../tests/arrays/categorical/test_dtypes.py | 173 + .../tests/arrays/categorical/test_indexing.py | 280 + .../tests/arrays/categorical/test_missing.py | 135 + .../arrays/categorical/test_operators.py | 442 + .../tests/arrays/categorical/test_replace.py | 48 + pandas/tests/arrays/categorical/test_repr.py | 525 + .../tests/arrays/categorical/test_sorting.py | 124 + .../tests/arrays/categorical/test_subclass.py | 22 + .../tests/arrays/categorical/test_warnings.py | 29 + pandas/tests/arrays/interval/__init__.py | 0 pandas/tests/arrays/interval/test_interval.py | 232 + pandas/tests/arrays/interval/test_ops.py | 88 + pandas/tests/arrays/sparse/__init__.py | 0 pandas/tests/arrays/sparse/test_accessor.py | 123 + .../tests/arrays/sparse/test_arithmetics.py | 495 + pandas/tests/arrays/sparse/test_array.py | 1249 ++ .../arrays/sparse/test_combine_concat.py | 31 + pandas/tests/arrays/sparse/test_dtype.py | 198 + pandas/tests/arrays/sparse/test_libsparse.py | 601 + pandas/tests/arrays/string_/__init__.py | 0 pandas/tests/arrays/string_/test_string.py | 271 + pandas/tests/arrays/test_array.py | 385 + pandas/tests/arrays/test_boolean.py | 931 ++ pandas/tests/arrays/test_datetimelike.py | 813 ++ pandas/tests/arrays/test_datetimes.py | 418 + pandas/tests/arrays/test_integer.py | 1096 ++ pandas/tests/arrays/test_numpy.py | 250 + pandas/tests/arrays/test_period.py | 414 + pandas/tests/arrays/test_timedeltas.py | 293 + pandas/tests/base/__init__.py | 0 pandas/tests/base/test_constructors.py | 142 + pandas/tests/base/test_conversion.py | 439 + pandas/tests/base/test_ops.py | 899 ++ pandas/tests/computation/__init__.py | 0 pandas/tests/computation/test_compat.py | 49 + pandas/tests/computation/test_eval.py | 2051 +++ pandas/tests/config/__init__.py | 0 pandas/tests/config/test_config.py | 470 + pandas/tests/config/test_localization.py | 105 + pandas/tests/dtypes/__init__.py | 0 pandas/tests/dtypes/cast/__init__.py | 0 .../dtypes/cast/test_construct_from_scalar.py | 20 + .../dtypes/cast/test_construct_ndarray.py | 21 + .../dtypes/cast/test_construct_object_arr.py | 20 + .../tests/dtypes/cast/test_convert_objects.py | 12 + pandas/tests/dtypes/cast/test_downcast.py | 99 + .../dtypes/cast/test_find_common_type.py | 122 + .../dtypes/cast/test_infer_datetimelike.py | 23 + pandas/tests/dtypes/cast/test_infer_dtype.py | 198 + pandas/tests/dtypes/cast/test_promote.py | 631 + pandas/tests/dtypes/cast/test_upcast.py | 71 + pandas/tests/dtypes/test_common.py | 757 + pandas/tests/dtypes/test_concat.py | 78 + pandas/tests/dtypes/test_dtypes.py | 956 ++ pandas/tests/dtypes/test_generic.py | 86 + pandas/tests/dtypes/test_inference.py | 1485 ++ pandas/tests/dtypes/test_missing.py | 586 + pandas/tests/extension/__init__.py | 0 pandas/tests/extension/arrow/__init__.py | 0 pandas/tests/extension/arrow/arrays.py | 190 + pandas/tests/extension/arrow/test_bool.py | 74 + pandas/tests/extension/arrow/test_string.py | 13 + pandas/tests/extension/base/__init__.py | 65 + pandas/tests/extension/base/base.py | 9 + pandas/tests/extension/base/casting.py | 34 + pandas/tests/extension/base/constructors.py | 85 + pandas/tests/extension/base/dtype.py | 111 + pandas/tests/extension/base/getitem.py | 372 + pandas/tests/extension/base/groupby.py | 91 + pandas/tests/extension/base/interface.py | 95 + pandas/tests/extension/base/io.py | 20 + pandas/tests/extension/base/methods.py | 397 + pandas/tests/extension/base/missing.py | 129 + pandas/tests/extension/base/ops.py | 178 + pandas/tests/extension/base/printing.py | 43 + pandas/tests/extension/base/reduce.py | 60 + pandas/tests/extension/base/reshaping.py | 326 + pandas/tests/extension/base/setitem.py | 345 + pandas/tests/extension/conftest.py | 178 + pandas/tests/extension/decimal/__init__.py | 3 + pandas/tests/extension/decimal/array.py | 206 + .../tests/extension/decimal/test_decimal.py | 515 + pandas/tests/extension/json/__init__.py | 3 + pandas/tests/extension/json/array.py | 207 + pandas/tests/extension/json/test_json.py | 303 + pandas/tests/extension/list/__init__.py | 3 + pandas/tests/extension/list/array.py | 133 + pandas/tests/extension/list/test_list.py | 30 + pandas/tests/extension/test_boolean.py | 351 + pandas/tests/extension/test_categorical.py | 286 + pandas/tests/extension/test_common.py | 81 + pandas/tests/extension/test_datetime.py | 214 + pandas/tests/extension/test_external_block.py | 55 + pandas/tests/extension/test_integer.py | 256 + pandas/tests/extension/test_interval.py | 166 + pandas/tests/extension/test_numpy.py | 452 + pandas/tests/extension/test_period.py | 161 + pandas/tests/extension/test_sparse.py | 379 + pandas/tests/extension/test_string.py | 114 + pandas/tests/frame/__init__.py | 0 pandas/tests/frame/common.py | 31 + pandas/tests/frame/conftest.py | 330 + .../tests/frame/indexing/test_categorical.py | 398 + pandas/tests/frame/indexing/test_datetime.py | 62 + pandas/tests/frame/indexing/test_indexing.py | 2345 ++++ pandas/tests/frame/indexing/test_where.py | 582 + pandas/tests/frame/methods/__init__.py | 7 + pandas/tests/frame/methods/test_append.py | 195 + pandas/tests/frame/methods/test_asof.py | 158 + pandas/tests/frame/methods/test_clip.py | 157 + pandas/tests/frame/methods/test_count.py | 36 + pandas/tests/frame/methods/test_cov_corr.py | 272 + pandas/tests/frame/methods/test_describe.py | 333 + pandas/tests/frame/methods/test_diff.py | 120 + .../frame/methods/test_drop_duplicates.py | 420 + pandas/tests/frame/methods/test_duplicated.py | 100 + pandas/tests/frame/methods/test_explode.py | 164 + pandas/tests/frame/methods/test_isin.py | 186 + pandas/tests/frame/methods/test_nlargest.py | 211 + pandas/tests/frame/methods/test_pct_change.py | 96 + pandas/tests/frame/methods/test_quantile.py | 492 + pandas/tests/frame/methods/test_rank.py | 331 + pandas/tests/frame/methods/test_replace.py | 1373 ++ pandas/tests/frame/methods/test_round.py | 217 + pandas/tests/frame/methods/test_shift.py | 210 + pandas/tests/frame/methods/test_sort_index.py | 320 + .../tests/frame/methods/test_sort_values.py | 518 + pandas/tests/frame/methods/test_to_dict.py | 258 + pandas/tests/frame/methods/test_to_records.py | 360 + pandas/tests/frame/methods/test_transpose.py | 43 + pandas/tests/frame/methods/test_truncate.py | 89 + pandas/tests/frame/test_alter_axes.py | 1568 +++ pandas/tests/frame/test_analytics.py | 1286 ++ pandas/tests/frame/test_api.py | 572 + pandas/tests/frame/test_apply.py | 1414 ++ pandas/tests/frame/test_arithmetic.py | 782 ++ .../tests/frame/test_axis_select_reindex.py | 1154 ++ pandas/tests/frame/test_block_internals.py | 624 + pandas/tests/frame/test_combine_concat.py | 798 ++ pandas/tests/frame/test_constructors.py | 2572 ++++ pandas/tests/frame/test_cumulative.py | 146 + pandas/tests/frame/test_dtypes.py | 1231 ++ pandas/tests/frame/test_join.py | 218 + pandas/tests/frame/test_missing.py | 985 ++ pandas/tests/frame/test_mutate_columns.py | 262 + pandas/tests/frame/test_nonunique_indexes.py | 526 + pandas/tests/frame/test_operators.py | 911 ++ pandas/tests/frame/test_period.py | 156 + pandas/tests/frame/test_query_eval.py | 1178 ++ pandas/tests/frame/test_repr_info.py | 579 + pandas/tests/frame/test_reshape.py | 1161 ++ .../frame/test_sort_values_level_as_str.py | 92 + pandas/tests/frame/test_subclass.py | 559 + pandas/tests/frame/test_timeseries.py | 549 + pandas/tests/frame/test_timezones.py | 215 + pandas/tests/frame/test_to_csv.py | 1358 ++ pandas/tests/frame/test_validate.py | 41 + pandas/tests/generic/__init__.py | 0 pandas/tests/generic/test_frame.py | 282 + pandas/tests/generic/test_generic.py | 976 ++ .../generic/test_label_or_level_utils.py | 339 + pandas/tests/generic/test_series.py | 263 + pandas/tests/groupby/__init__.py | 0 pandas/tests/groupby/aggregate/__init__.py | 0 .../tests/groupby/aggregate/test_aggregate.py | 997 ++ pandas/tests/groupby/aggregate/test_cython.py | 238 + pandas/tests/groupby/aggregate/test_other.py | 644 + pandas/tests/groupby/conftest.py | 124 + pandas/tests/groupby/test_apply.py | 811 ++ pandas/tests/groupby/test_bin_groupby.py | 172 + pandas/tests/groupby/test_categorical.py | 1378 ++ pandas/tests/groupby/test_counting.py | 222 + pandas/tests/groupby/test_filters.py | 597 + pandas/tests/groupby/test_function.py | 1621 +++ pandas/tests/groupby/test_groupby.py | 2032 +++ pandas/tests/groupby/test_grouping.py | 966 ++ pandas/tests/groupby/test_index_as_string.py | 82 + pandas/tests/groupby/test_nth.py | 609 + pandas/tests/groupby/test_rank.py | 445 + pandas/tests/groupby/test_timegrouper.py | 757 + pandas/tests/groupby/test_transform.py | 1170 ++ pandas/tests/groupby/test_value_counts.py | 109 + pandas/tests/groupby/test_whitelist.py | 436 + pandas/tests/indexes/__init__.py | 0 pandas/tests/indexes/categorical/__init__.py | 0 .../indexes/categorical/test_category.py | 994 ++ .../indexes/categorical/test_constructors.py | 147 + pandas/tests/indexes/common.py | 892 ++ pandas/tests/indexes/conftest.py | 52 + pandas/tests/indexes/datetimelike.py | 97 + pandas/tests/indexes/datetimes/__init__.py | 0 pandas/tests/indexes/datetimes/test_astype.py | 378 + .../indexes/datetimes/test_constructors.py | 952 ++ .../indexes/datetimes/test_date_range.py | 963 ++ .../tests/indexes/datetimes/test_datetime.py | 445 + .../indexes/datetimes/test_datetimelike.py | 33 + .../tests/indexes/datetimes/test_formats.py | 241 + .../tests/indexes/datetimes/test_indexing.py | 784 ++ pandas/tests/indexes/datetimes/test_join.py | 144 + pandas/tests/indexes/datetimes/test_misc.py | 384 + .../tests/indexes/datetimes/test_missing.py | 62 + pandas/tests/indexes/datetimes/test_ops.py | 546 + .../indexes/datetimes/test_partial_slicing.py | 481 + .../indexes/datetimes/test_scalar_compat.py | 338 + pandas/tests/indexes/datetimes/test_setops.py | 596 + pandas/tests/indexes/datetimes/test_shift.py | 117 + .../tests/indexes/datetimes/test_timezones.py | 1251 ++ pandas/tests/indexes/datetimes/test_tools.py | 2324 ++++ pandas/tests/indexes/interval/__init__.py | 0 pandas/tests/indexes/interval/test_astype.py | 223 + pandas/tests/indexes/interval/test_base.py | 88 + .../indexes/interval/test_constructors.py | 423 + pandas/tests/indexes/interval/test_formats.py | 78 + .../tests/indexes/interval/test_indexing.py | 438 + .../tests/indexes/interval/test_interval.py | 868 ++ .../indexes/interval/test_interval_range.py | 355 + .../indexes/interval/test_interval_tree.py | 193 + pandas/tests/indexes/interval/test_setops.py | 187 + pandas/tests/indexes/multi/__init__.py | 0 pandas/tests/indexes/multi/conftest.py | 85 + pandas/tests/indexes/multi/test_analytics.py | 356 + pandas/tests/indexes/multi/test_astype.py | 30 + pandas/tests/indexes/multi/test_compat.py | 123 + .../tests/indexes/multi/test_constructors.py | 725 + pandas/tests/indexes/multi/test_contains.py | 124 + pandas/tests/indexes/multi/test_conversion.py | 197 + pandas/tests/indexes/multi/test_copy.py | 88 + pandas/tests/indexes/multi/test_drop.py | 190 + pandas/tests/indexes/multi/test_duplicates.py | 276 + .../tests/indexes/multi/test_equivalence.py | 226 + pandas/tests/indexes/multi/test_format.py | 197 + pandas/tests/indexes/multi/test_get_set.py | 417 + pandas/tests/indexes/multi/test_indexing.py | 528 + pandas/tests/indexes/multi/test_integrity.py | 294 + pandas/tests/indexes/multi/test_join.py | 105 + pandas/tests/indexes/multi/test_missing.py | 143 + pandas/tests/indexes/multi/test_monotonic.py | 230 + pandas/tests/indexes/multi/test_names.py | 143 + .../indexes/multi/test_partial_indexing.py | 96 + pandas/tests/indexes/multi/test_reindex.py | 103 + pandas/tests/indexes/multi/test_reshape.py | 130 + pandas/tests/indexes/multi/test_setops.py | 363 + pandas/tests/indexes/multi/test_sorting.py | 276 + pandas/tests/indexes/period/__init__.py | 0 pandas/tests/indexes/period/test_asfreq.py | 149 + pandas/tests/indexes/period/test_astype.py | 128 + .../tests/indexes/period/test_constructors.py | 520 + pandas/tests/indexes/period/test_formats.py | 211 + pandas/tests/indexes/period/test_indexing.py | 731 + pandas/tests/indexes/period/test_ops.py | 347 + .../indexes/period/test_partial_slicing.py | 135 + pandas/tests/indexes/period/test_period.py | 664 + .../tests/indexes/period/test_period_range.py | 99 + .../indexes/period/test_scalar_compat.py | 17 + pandas/tests/indexes/period/test_setops.py | 375 + pandas/tests/indexes/period/test_shift.py | 119 + pandas/tests/indexes/period/test_tools.py | 381 + pandas/tests/indexes/ranges/__init__.py | 0 .../tests/indexes/ranges/test_constructors.py | 154 + pandas/tests/indexes/ranges/test_range.py | 742 + pandas/tests/indexes/ranges/test_setops.py | 244 + pandas/tests/indexes/test_base.py | 2841 ++++ pandas/tests/indexes/test_common.py | 354 + pandas/tests/indexes/test_frozen.py | 104 + pandas/tests/indexes/test_numeric.py | 1243 ++ pandas/tests/indexes/test_numpy_compat.py | 130 + pandas/tests/indexes/test_setops.py | 107 + pandas/tests/indexes/timedeltas/__init__.py | 0 .../tests/indexes/timedeltas/test_astype.py | 123 + .../indexes/timedeltas/test_constructors.py | 212 + .../tests/indexes/timedeltas/test_formats.py | 90 + .../tests/indexes/timedeltas/test_indexing.py | 384 + pandas/tests/indexes/timedeltas/test_join.py | 49 + pandas/tests/indexes/timedeltas/test_ops.py | 316 + .../timedeltas/test_partial_slicing.py | 90 + .../indexes/timedeltas/test_scalar_compat.py | 71 + .../tests/indexes/timedeltas/test_setops.py | 260 + pandas/tests/indexes/timedeltas/test_shift.py | 75 + .../indexes/timedeltas/test_timedelta.py | 300 + .../timedeltas/test_timedelta_range.py | 80 + pandas/tests/indexes/timedeltas/test_tools.py | 146 + pandas/tests/indexing/__init__.py | 0 pandas/tests/indexing/common.py | 224 + pandas/tests/indexing/conftest.py | 23 + pandas/tests/indexing/interval/__init__.py | 0 .../tests/indexing/interval/test_interval.py | 149 + .../indexing/interval/test_interval_new.py | 248 + pandas/tests/indexing/multiindex/__init__.py | 0 pandas/tests/indexing/multiindex/conftest.py | 30 + .../multiindex/test_chaining_and_caching.py | 64 + .../indexing/multiindex/test_datetime.py | 22 + .../tests/indexing/multiindex/test_getitem.py | 252 + pandas/tests/indexing/multiindex/test_iloc.py | 171 + .../indexing/multiindex/test_indexing_slow.py | 92 + pandas/tests/indexing/multiindex/test_ix.py | 63 + pandas/tests/indexing/multiindex/test_loc.py | 470 + .../indexing/multiindex/test_multiindex.py | 113 + .../tests/indexing/multiindex/test_partial.py | 194 + .../tests/indexing/multiindex/test_set_ops.py | 41 + .../tests/indexing/multiindex/test_setitem.py | 453 + .../tests/indexing/multiindex/test_slice.py | 740 + .../tests/indexing/multiindex/test_sorted.py | 97 + pandas/tests/indexing/multiindex/test_xs.py | 245 + pandas/tests/indexing/test_callable.py | 260 + pandas/tests/indexing/test_categorical.py | 822 ++ .../indexing/test_chaining_and_caching.py | 394 + pandas/tests/indexing/test_check_indexer.py | 99 + pandas/tests/indexing/test_coercion.py | 1087 ++ pandas/tests/indexing/test_datetime.py | 372 + pandas/tests/indexing/test_floats.py | 925 ++ pandas/tests/indexing/test_iloc.py | 690 + pandas/tests/indexing/test_indexing.py | 1186 ++ .../tests/indexing/test_indexing_engines.py | 163 + pandas/tests/indexing/test_indexing_slow.py | 14 + pandas/tests/indexing/test_loc.py | 1004 ++ pandas/tests/indexing/test_na_indexing.py | 90 + pandas/tests/indexing/test_partial.py | 527 + pandas/tests/indexing/test_scalar.py | 247 + pandas/tests/indexing/test_timedelta.py | 120 + pandas/tests/internals/__init__.py | 0 pandas/tests/internals/test_internals.py | 1285 ++ pandas/tests/io/__init__.py | 0 pandas/tests/io/conftest.py | 83 + pandas/tests/io/data/csv/banklist.csv | 507 + pandas/tests/io/data/csv/iris.csv | 151 + pandas/tests/io/data/csv/test1.csv | 8 + pandas/tests/io/data/csv/test_mmap.csv | 5 + pandas/tests/io/data/csv/tips.csv | 245 + pandas/tests/io/data/excel/blank.ods | Bin 0 -> 2813 bytes pandas/tests/io/data/excel/blank.xls | Bin 0 -> 23040 bytes pandas/tests/io/data/excel/blank.xlsb | Bin 0 -> 8908 bytes pandas/tests/io/data/excel/blank.xlsm | Bin 0 -> 8418 bytes pandas/tests/io/data/excel/blank.xlsx | Bin 0 -> 8379 bytes .../tests/io/data/excel/blank_with_header.ods | Bin 0 -> 2893 bytes .../tests/io/data/excel/blank_with_header.xls | Bin 0 -> 23040 bytes .../io/data/excel/blank_with_header.xlsb | Bin 0 -> 9129 bytes .../io/data/excel/blank_with_header.xlsm | Bin 0 -> 8813 bytes .../io/data/excel/blank_with_header.xlsx | Bin 0 -> 8773 bytes .../io/data/excel/invalid_value_type.ods | Bin 0 -> 8502 bytes pandas/tests/io/data/excel/test1.ods | Bin 0 -> 4440 bytes pandas/tests/io/data/excel/test1.xls | Bin 0 -> 28672 bytes pandas/tests/io/data/excel/test1.xlsb | Bin 0 -> 11359 bytes pandas/tests/io/data/excel/test1.xlsm | Bin 0 -> 12091 bytes pandas/tests/io/data/excel/test1.xlsx | Bin 0 -> 12074 bytes pandas/tests/io/data/excel/test2.ods | Bin 0 -> 2877 bytes pandas/tests/io/data/excel/test2.xls | Bin 0 -> 5632 bytes pandas/tests/io/data/excel/test2.xlsb | Bin 0 -> 7579 bytes pandas/tests/io/data/excel/test2.xlsm | Bin 0 -> 8086 bytes pandas/tests/io/data/excel/test2.xlsx | Bin 0 -> 8067 bytes pandas/tests/io/data/excel/test3.ods | Bin 0 -> 2889 bytes pandas/tests/io/data/excel/test3.xls | Bin 0 -> 23040 bytes pandas/tests/io/data/excel/test3.xlsb | Bin 0 -> 7553 bytes pandas/tests/io/data/excel/test3.xlsm | Bin 0 -> 8063 bytes pandas/tests/io/data/excel/test3.xlsx | Bin 0 -> 8045 bytes pandas/tests/io/data/excel/test4.ods | Bin 0 -> 2992 bytes pandas/tests/io/data/excel/test4.xls | Bin 0 -> 25600 bytes pandas/tests/io/data/excel/test4.xlsb | Bin 0 -> 7646 bytes pandas/tests/io/data/excel/test4.xlsm | Bin 0 -> 8360 bytes pandas/tests/io/data/excel/test4.xlsx | Bin 0 -> 8344 bytes pandas/tests/io/data/excel/test5.ods | Bin 0 -> 2906 bytes pandas/tests/io/data/excel/test5.xls | Bin 0 -> 20480 bytes pandas/tests/io/data/excel/test5.xlsb | Bin 0 -> 7824 bytes pandas/tests/io/data/excel/test5.xlsm | Bin 0 -> 8642 bytes pandas/tests/io/data/excel/test5.xlsx | Bin 0 -> 8626 bytes .../tests/io/data/excel/test_converters.ods | Bin 0 -> 3287 bytes .../tests/io/data/excel/test_converters.xls | Bin 0 -> 6144 bytes .../tests/io/data/excel/test_converters.xlsb | Bin 0 -> 7810 bytes .../tests/io/data/excel/test_converters.xlsm | Bin 0 -> 8467 bytes .../tests/io/data/excel/test_converters.xlsx | Bin 0 -> 4810 bytes .../io/data/excel/test_index_name_pre17.ods | Bin 0 -> 3699 bytes .../io/data/excel/test_index_name_pre17.xls | Bin 0 -> 26624 bytes .../io/data/excel/test_index_name_pre17.xlsb | Bin 0 -> 11097 bytes .../io/data/excel/test_index_name_pre17.xlsm | Bin 0 -> 10896 bytes .../io/data/excel/test_index_name_pre17.xlsx | Bin 0 -> 10879 bytes .../tests/io/data/excel/test_multisheet.ods | Bin 0 -> 3797 bytes .../tests/io/data/excel/test_multisheet.xls | Bin 0 -> 24576 bytes .../tests/io/data/excel/test_multisheet.xlsb | Bin 0 -> 10707 bytes .../tests/io/data/excel/test_multisheet.xlsm | Bin 0 -> 11313 bytes .../tests/io/data/excel/test_multisheet.xlsx | Bin 0 -> 11296 bytes pandas/tests/io/data/excel/test_squeeze.ods | Bin 0 -> 3218 bytes pandas/tests/io/data/excel/test_squeeze.xls | Bin 0 -> 26112 bytes pandas/tests/io/data/excel/test_squeeze.xlsb | Bin 0 -> 8567 bytes pandas/tests/io/data/excel/test_squeeze.xlsm | Bin 0 -> 9122 bytes pandas/tests/io/data/excel/test_squeeze.xlsx | Bin 0 -> 9106 bytes pandas/tests/io/data/excel/test_types.ods | Bin 0 -> 3489 bytes pandas/tests/io/data/excel/test_types.xls | Bin 0 -> 26112 bytes pandas/tests/io/data/excel/test_types.xlsb | Bin 0 -> 8053 bytes pandas/tests/io/data/excel/test_types.xlsm | Bin 0 -> 9042 bytes pandas/tests/io/data/excel/test_types.xlsx | Bin 0 -> 9010 bytes .../tests/io/data/excel/testdateoverflow.ods | Bin 0 -> 3422 bytes .../tests/io/data/excel/testdateoverflow.xls | Bin 0 -> 19456 bytes .../tests/io/data/excel/testdateoverflow.xlsb | Bin 0 -> 9856 bytes .../tests/io/data/excel/testdateoverflow.xlsm | Bin 0 -> 9374 bytes .../tests/io/data/excel/testdateoverflow.xlsx | Bin 0 -> 9351 bytes pandas/tests/io/data/excel/testdtype.ods | Bin 0 -> 3196 bytes pandas/tests/io/data/excel/testdtype.xls | Bin 0 -> 22528 bytes pandas/tests/io/data/excel/testdtype.xlsb | Bin 0 -> 7697 bytes pandas/tests/io/data/excel/testdtype.xlsm | Bin 0 -> 8517 bytes pandas/tests/io/data/excel/testdtype.xlsx | Bin 0 -> 8501 bytes pandas/tests/io/data/excel/testmultiindex.ods | Bin 0 -> 5575 bytes pandas/tests/io/data/excel/testmultiindex.xls | Bin 0 -> 39424 bytes .../tests/io/data/excel/testmultiindex.xlsb | Bin 0 -> 18853 bytes .../tests/io/data/excel/testmultiindex.xlsm | Bin 0 -> 19133 bytes .../tests/io/data/excel/testmultiindex.xlsx | Bin 0 -> 18845 bytes pandas/tests/io/data/excel/testskiprows.ods | Bin 0 -> 3235 bytes pandas/tests/io/data/excel/testskiprows.xls | Bin 0 -> 22528 bytes pandas/tests/io/data/excel/testskiprows.xlsb | Bin 0 -> 7699 bytes pandas/tests/io/data/excel/testskiprows.xlsm | Bin 0 -> 8281 bytes pandas/tests/io/data/excel/testskiprows.xlsx | Bin 0 -> 8258 bytes pandas/tests/io/data/excel/times_1900.ods | Bin 0 -> 3181 bytes pandas/tests/io/data/excel/times_1900.xls | Bin 0 -> 16384 bytes pandas/tests/io/data/excel/times_1900.xlsb | Bin 0 -> 7773 bytes pandas/tests/io/data/excel/times_1900.xlsm | Bin 0 -> 8282 bytes pandas/tests/io/data/excel/times_1900.xlsx | Bin 0 -> 8266 bytes pandas/tests/io/data/excel/times_1904.ods | Bin 0 -> 3215 bytes pandas/tests/io/data/excel/times_1904.xls | Bin 0 -> 16384 bytes pandas/tests/io/data/excel/times_1904.xlsb | Bin 0 -> 7734 bytes pandas/tests/io/data/excel/times_1904.xlsm | Bin 0 -> 8260 bytes pandas/tests/io/data/excel/times_1904.xlsx | Bin 0 -> 8244 bytes pandas/tests/io/data/excel/writertable.odt | Bin 0 -> 10313 bytes .../io/data/feather/feather-0_3_1.feather | Bin 0 -> 672 bytes .../data/fixed_width/fixed_width_format.txt | 3 + pandas/tests/io/data/gbq_fake_job.txt | 1 + pandas/tests/io/data/html/banklist.html | 4886 +++++++ pandas/tests/io/data/html/spam.html | 797 ++ pandas/tests/io/data/html/valid_markup.html | 62 + .../tests/io/data/html/wikipedia_states.html | 1756 +++ .../io/data/html_encoding/chinese_utf-16.html | Bin 0 -> 824 bytes .../io/data/html_encoding/chinese_utf-32.html | Bin 0 -> 1648 bytes .../io/data/html_encoding/chinese_utf-8.html | 26 + .../io/data/html_encoding/letz_latin1.html | 26 + .../io/data/legacy_hdf/datetimetz_object.h5 | Bin 0 -> 106271 bytes pandas/tests/io/data/legacy_hdf/gh26443.h5 | Bin 0 -> 7168 bytes .../data/legacy_hdf/legacy_table_fixed_py2.h5 | Bin 0 -> 1064200 bytes .../io/data/legacy_hdf/legacy_table_py2.h5 | Bin 0 -> 72279 bytes ...periodindex_0.20.1_x86_64_darwin_2.7.13.h5 | Bin 0 -> 7312 bytes .../io/data/legacy_hdf/pytables_native.h5 | Bin 0 -> 74246 bytes .../io/data/legacy_hdf/pytables_native2.h5 | Bin 0 -> 12336 bytes .../0.20.3/0.20.3_x86_64_darwin_3.5.2.msgpack | Bin 0 -> 118654 bytes .../0.20.3/0.20.3_x86_64_darwin_3.5.2.pickle | Bin 0 -> 127923 bytes .../0.20.3/0.20.3_x86_64_darwin_3.5.6.pickle | Bin 0 -> 127244 bytes .../tests/io/data/orc/TestOrcFile.decimal.orc | Bin 0 -> 16337 bytes .../io/data/orc/TestOrcFile.emptyFile.orc | Bin 0 -> 523 bytes .../tests/io/data/orc/TestOrcFile.test1.orc | Bin 0 -> 1711 bytes .../io/data/orc/TestOrcFile.testDate1900.orc | Bin 0 -> 30941 bytes .../io/data/orc/TestOrcFile.testDate2038.orc | Bin 0 -> 95787 bytes .../io/data/orc/TestOrcFile.testSnappy.orc | Bin 0 -> 126370 bytes pandas/tests/io/data/parquet/simple.parquet | Bin 0 -> 2157 bytes .../io/data/pickle/categorical.0.25.0.pickle | Bin 0 -> 578 bytes .../data/pickle/sparseframe-0.20.3.pickle.gz | Bin 0 -> 625 bytes .../data/pickle/sparseseries-0.20.3.pickle.gz | Bin 0 -> 521 bytes pandas/tests/io/data/pickle/test_mi_py27.pkl | Bin 0 -> 1395 bytes pandas/tests/io/data/pickle/test_py27.pkl | Bin 0 -> 943 bytes pandas/tests/io/data/spss/labelled-num-na.sav | Bin 0 -> 535 bytes pandas/tests/io/data/spss/labelled-num.sav | Bin 0 -> 507 bytes pandas/tests/io/data/spss/labelled-str.sav | Bin 0 -> 525 bytes pandas/tests/io/data/spss/umlauts.sav | Bin 0 -> 567 bytes pandas/tests/io/data/stata/S4_EDUC1.dta | Bin 0 -> 2997 bytes pandas/tests/io/data/stata/stata10_115.dta | Bin 0 -> 2298 bytes pandas/tests/io/data/stata/stata10_117.dta | Bin 0 -> 2298 bytes pandas/tests/io/data/stata/stata11_115.dta | Bin 0 -> 810 bytes pandas/tests/io/data/stata/stata11_117.dta | Bin 0 -> 1268 bytes pandas/tests/io/data/stata/stata12_117.dta | Bin 0 -> 1285 bytes pandas/tests/io/data/stata/stata13_dates.dta | Bin 0 -> 3386 bytes pandas/tests/io/data/stata/stata14_118.dta | Bin 0 -> 5556 bytes pandas/tests/io/data/stata/stata15.dta | Bin 0 -> 3183 bytes pandas/tests/io/data/stata/stata16_118.dta | Bin 0 -> 4614 bytes pandas/tests/io/data/stata/stata1_114.dta | Bin 0 -> 1130 bytes pandas/tests/io/data/stata/stata1_117.dta | Bin 0 -> 1569 bytes pandas/tests/io/data/stata/stata1_119.dta.gz | Bin 0 -> 269559 bytes .../tests/io/data/stata/stata1_encoding.dta | Bin 0 -> 3507 bytes .../io/data/stata/stata1_encoding_118.dta | Bin 0 -> 5587 bytes pandas/tests/io/data/stata/stata2_113.dta | Bin 0 -> 1490 bytes pandas/tests/io/data/stata/stata2_114.dta | Bin 0 -> 1786 bytes pandas/tests/io/data/stata/stata2_115.dta | Bin 0 -> 1786 bytes pandas/tests/io/data/stata/stata2_117.dta | Bin 0 -> 2228 bytes pandas/tests/io/data/stata/stata3.csv | 204 + pandas/tests/io/data/stata/stata3_113.dta | Bin 0 -> 12737 bytes pandas/tests/io/data/stata/stata3_114.dta | Bin 0 -> 13255 bytes pandas/tests/io/data/stata/stata3_115.dta | Bin 0 -> 13255 bytes pandas/tests/io/data/stata/stata3_117.dta | Bin 0 -> 13703 bytes pandas/tests/io/data/stata/stata4_113.dta | Bin 0 -> 1528 bytes pandas/tests/io/data/stata/stata4_114.dta | Bin 0 -> 1713 bytes pandas/tests/io/data/stata/stata4_115.dta | Bin 0 -> 1713 bytes pandas/tests/io/data/stata/stata4_117.dta | Bin 0 -> 2185 bytes pandas/tests/io/data/stata/stata5.csv | 19 + pandas/tests/io/data/stata/stata5_113.dta | Bin 0 -> 4628 bytes pandas/tests/io/data/stata/stata5_114.dta | Bin 0 -> 4924 bytes pandas/tests/io/data/stata/stata5_115.dta | Bin 0 -> 4924 bytes pandas/tests/io/data/stata/stata5_117.dta | Bin 0 -> 5366 bytes pandas/tests/io/data/stata/stata6.csv | 6 + pandas/tests/io/data/stata/stata6_113.dta | Bin 0 -> 2752 bytes pandas/tests/io/data/stata/stata6_114.dta | Bin 0 -> 3048 bytes pandas/tests/io/data/stata/stata6_115.dta | Bin 0 -> 3048 bytes pandas/tests/io/data/stata/stata6_117.dta | Bin 0 -> 3490 bytes pandas/tests/io/data/stata/stata7_111.dta | Bin 0 -> 1024 bytes pandas/tests/io/data/stata/stata7_115.dta | Bin 0 -> 722 bytes pandas/tests/io/data/stata/stata7_117.dta | Bin 0 -> 1159 bytes pandas/tests/io/data/stata/stata8_113.dta | Bin 0 -> 1439 bytes pandas/tests/io/data/stata/stata8_115.dta | Bin 0 -> 1624 bytes pandas/tests/io/data/stata/stata8_117.dta | Bin 0 -> 2063 bytes pandas/tests/io/data/stata/stata9_115.dta | Bin 0 -> 2342 bytes pandas/tests/io/data/stata/stata9_117.dta | Bin 0 -> 2342 bytes pandas/tests/io/excel/__init__.py | 6 + pandas/tests/io/excel/conftest.py | 65 + pandas/tests/io/excel/test_odf.py | 46 + pandas/tests/io/excel/test_openpyxl.py | 124 + pandas/tests/io/excel/test_readers.py | 1056 ++ pandas/tests/io/excel/test_style.py | 169 + pandas/tests/io/excel/test_writers.py | 1294 ++ pandas/tests/io/excel/test_xlrd.py | 43 + pandas/tests/io/excel/test_xlsxwriter.py | 64 + pandas/tests/io/excel/test_xlwt.py | 67 + pandas/tests/io/formats/__init__.py | 0 .../data/html/datetime64_hourformatter.html | 18 + .../data/html/datetime64_monthformatter.html | 18 + .../io/formats/data/html/escape_disabled.html | 21 + .../tests/io/formats/data/html/escaped.html | 21 + .../data/html/gh12031_expected_output.html | 22 + .../data/html/gh14882_expected_output_1.html | 274 + .../data/html/gh14882_expected_output_2.html | 258 + .../data/html/gh14998_expected_output.html | 12 + .../data/html/gh15019_expected_output.html | 30 + .../data/html/gh21625_expected_output.html | 14 + .../data/html/gh22270_expected_output.html | 14 + .../data/html/gh22579_expected_output.html | 76 + .../data/html/gh22783_expected_output.html | 27 + .../html/gh22783_named_columns_index.html | 30 + .../data/html/gh6131_expected_output.html | 46 + .../data/html/gh8452_expected_output.html | 28 + .../html_repr_max_rows_10_min_rows_12.html | 70 + .../html_repr_max_rows_10_min_rows_4.html | 46 + .../html_repr_max_rows_12_min_rows_None.html | 78 + .../html_repr_max_rows_None_min_rows_12.html | 269 + ...l_repr_min_rows_default_no_truncation.html | 105 + .../html_repr_min_rows_default_truncated.html | 70 + .../tests/io/formats/data/html/index_1.html | 30 + .../tests/io/formats/data/html/index_2.html | 26 + .../tests/io/formats/data/html/index_3.html | 36 + .../tests/io/formats/data/html/index_4.html | 33 + .../tests/io/formats/data/html/index_5.html | 40 + .../io/formats/data/html/index_formatter.html | 31 + ...index_named_multi_columns_named_multi.html | 34 + ...ex_named_multi_columns_named_standard.html | 29 + .../html/index_named_multi_columns_none.html | 23 + ...dex_named_multi_columns_unnamed_multi.html | 34 + ..._named_multi_columns_unnamed_standard.html | 29 + ...ex_named_standard_columns_named_multi.html | 30 + ...named_standard_columns_named_standard.html | 26 + .../index_named_standard_columns_none.html | 21 + ..._named_standard_columns_unnamed_multi.html | 30 + ...med_standard_columns_unnamed_standard.html | 26 + .../html/index_none_columns_named_multi.html | 25 + .../index_none_columns_named_standard.html | 21 + .../data/html/index_none_columns_none.html | 12 + .../index_none_columns_unnamed_multi.html | 21 + .../index_none_columns_unnamed_standard.html | 18 + ...dex_unnamed_multi_columns_named_multi.html | 28 + ..._unnamed_multi_columns_named_standard.html | 23 + .../index_unnamed_multi_columns_none.html | 15 + ...x_unnamed_multi_columns_unnamed_multi.html | 28 + ...nnamed_multi_columns_unnamed_standard.html | 23 + ..._unnamed_standard_columns_named_multi.html | 25 + ...named_standard_columns_named_standard.html | 21 + .../index_unnamed_standard_columns_none.html | 14 + ...nnamed_standard_columns_unnamed_multi.html | 25 + ...med_standard_columns_unnamed_standard.html | 21 + .../tests/io/formats/data/html/justify.html | 30 + .../io/formats/data/html/multiindex_1.html | 32 + .../io/formats/data/html/multiindex_2.html | 34 + .../data/html/multiindex_sparsify_1.html | 40 + .../data/html/multiindex_sparsify_2.html | 46 + ...tiindex_sparsify_false_multi_sparse_1.html | 42 + ...tiindex_sparsify_false_multi_sparse_2.html | 48 + .../formats/data/html/render_links_false.html | 24 + .../formats/data/html/render_links_true.html | 24 + ...index_named_multi_columns_named_multi.html | 88 + ...ex_named_multi_columns_named_standard.html | 72 + ...unc_df_index_named_multi_columns_none.html | 62 + ...dex_named_multi_columns_unnamed_multi.html | 88 + ..._named_multi_columns_unnamed_standard.html | 72 + ...ex_named_standard_columns_named_multi.html | 74 + ...named_standard_columns_named_standard.html | 62 + ..._df_index_named_standard_columns_none.html | 54 + ..._named_standard_columns_unnamed_multi.html | 74 + ...med_standard_columns_unnamed_standard.html | 62 + ...unc_df_index_none_columns_named_multi.html | 66 + ..._df_index_none_columns_named_standard.html | 54 + .../trunc_df_index_none_columns_none.html | 39 + ...c_df_index_none_columns_unnamed_multi.html | 58 + ...f_index_none_columns_unnamed_standard.html | 48 + ...dex_unnamed_multi_columns_named_multi.html | 78 + ..._unnamed_multi_columns_named_standard.html | 62 + ...c_df_index_unnamed_multi_columns_none.html | 50 + ...x_unnamed_multi_columns_unnamed_multi.html | 78 + ...nnamed_multi_columns_unnamed_standard.html | 62 + ..._unnamed_standard_columns_named_multi.html | 66 + ...named_standard_columns_named_standard.html | 54 + ...f_index_unnamed_standard_columns_none.html | 44 + ...nnamed_standard_columns_unnamed_multi.html | 66 + ...med_standard_columns_unnamed_standard.html | 54 + .../tests/io/formats/data/html/truncate.html | 86 + .../formats/data/html/truncate_formatter.html | 36 + .../data/html/truncate_multi_index.html | 101 + .../html/truncate_multi_index_sparse_off.html | 105 + .../tests/io/formats/data/html/unicode_1.html | 50 + .../tests/io/formats/data/html/unicode_2.html | 14 + .../io/formats/data/html/with_classes.html | 9 + pandas/tests/io/formats/test_console.py | 72 + pandas/tests/io/formats/test_css.py | 232 + .../tests/io/formats/test_eng_formatting.py | 235 + pandas/tests/io/formats/test_format.py | 3288 +++++ pandas/tests/io/formats/test_printing.py | 205 + pandas/tests/io/formats/test_style.py | 1789 +++ pandas/tests/io/formats/test_to_csv.py | 599 + pandas/tests/io/formats/test_to_excel.py | 315 + pandas/tests/io/formats/test_to_html.py | 788 ++ pandas/tests/io/formats/test_to_latex.py | 884 ++ pandas/tests/io/formats/test_to_markdown.py | 55 + .../tests/io/generate_legacy_storage_files.py | 360 + pandas/tests/io/json/__init__.py | 0 pandas/tests/io/json/conftest.py | 9 + .../tests/io/json/data/tsframe_iso_v012.json | 1 + pandas/tests/io/json/data/tsframe_v012.json | 1 + .../tests/io/json/data/tsframe_v012.json.zip | Bin 0 -> 436 bytes pandas/tests/io/json/test_compression.py | 113 + .../tests/io/json/test_json_table_schema.py | 724 + pandas/tests/io/json/test_normalize.py | 740 + pandas/tests/io/json/test_pandas.py | 1652 +++ pandas/tests/io/json/test_readlines.py | 181 + pandas/tests/io/json/test_ujson.py | 1079 ++ pandas/tests/io/parser/__init__.py | 0 pandas/tests/io/parser/conftest.py | 123 + pandas/tests/io/parser/data/items.jsonl | 2 + pandas/tests/io/parser/data/salaries.csv | 47 + pandas/tests/io/parser/data/salaries.csv.bz2 | Bin 0 -> 283 bytes pandas/tests/io/parser/data/salaries.csv.gz | Bin 0 -> 302 bytes pandas/tests/io/parser/data/salaries.csv.xz | Bin 0 -> 336 bytes pandas/tests/io/parser/data/salaries.csv.zip | Bin 0 -> 445 bytes .../tests/io/parser/data/sauron.SHIFT_JIS.csv | 14 + pandas/tests/io/parser/data/sub_char.csv | 2 + pandas/tests/io/parser/data/tar_csv.tar | Bin 0 -> 10240 bytes pandas/tests/io/parser/data/tar_csv.tar.gz | Bin 0 -> 117 bytes pandas/tests/io/parser/data/test1.csv | 8 + pandas/tests/io/parser/data/test1.csv.bz2 | Bin 0 -> 307 bytes pandas/tests/io/parser/data/test1.csv.gz | Bin 0 -> 294 bytes pandas/tests/io/parser/data/test2.csv | 6 + pandas/tests/io/parser/data/test_mmap.csv | 4 + pandas/tests/io/parser/data/tips.csv | 245 + pandas/tests/io/parser/data/tips.csv.bz2 | Bin 0 -> 1316 bytes pandas/tests/io/parser/data/tips.csv.gz | Bin 0 -> 1740 bytes .../tests/io/parser/data/unicode_series.csv | 18 + pandas/tests/io/parser/data/utf16_ex.txt | Bin 0 -> 11406 bytes .../tests/io/parser/data/utf16_ex_small.zip | Bin 0 -> 285 bytes .../tests/io/parser/data/utf32_ex_small.zip | Bin 0 -> 251 bytes pandas/tests/io/parser/data/utf8_ex_small.zip | Bin 0 -> 201 bytes pandas/tests/io/parser/test_c_parser_only.py | 610 + pandas/tests/io/parser/test_comment.py | 136 + pandas/tests/io/parser/test_common.py | 2072 +++ pandas/tests/io/parser/test_compression.py | 151 + pandas/tests/io/parser/test_converters.py | 160 + pandas/tests/io/parser/test_dialect.py | 144 + pandas/tests/io/parser/test_dtypes.py | 584 + pandas/tests/io/parser/test_encoding.py | 199 + pandas/tests/io/parser/test_header.py | 573 + pandas/tests/io/parser/test_index_col.py | 186 + pandas/tests/io/parser/test_mangle_dupes.py | 132 + pandas/tests/io/parser/test_multi_thread.py | 146 + pandas/tests/io/parser/test_na_values.py | 567 + pandas/tests/io/parser/test_network.py | 240 + pandas/tests/io/parser/test_parse_dates.py | 1518 +++ .../io/parser/test_python_parser_only.py | 298 + pandas/tests/io/parser/test_quoting.py | 159 + pandas/tests/io/parser/test_read_fwf.py | 618 + pandas/tests/io/parser/test_skiprows.py | 252 + pandas/tests/io/parser/test_textreader.py | 347 + pandas/tests/io/parser/test_unsupported.py | 123 + pandas/tests/io/parser/test_usecols.py | 572 + pandas/tests/io/pytables/__init__.py | 0 pandas/tests/io/pytables/common.py | 82 + pandas/tests/io/pytables/conftest.py | 17 + pandas/tests/io/pytables/test_compat.py | 77 + pandas/tests/io/pytables/test_complex.py | 185 + .../io/pytables/test_pytables_missing.py | 14 + pandas/tests/io/pytables/test_store.py | 4796 +++++++ pandas/tests/io/pytables/test_timezones.py | 386 + pandas/tests/io/sas/__init__.py | 0 pandas/tests/io/sas/data/DEMO_G.csv | 9757 +++++++++++++ pandas/tests/io/sas/data/DEMO_G.xpt | Bin 0 -> 3753760 bytes pandas/tests/io/sas/data/DRXFCD_G.csv | 7619 +++++++++++ pandas/tests/io/sas/data/DRXFCD_G.xpt | Bin 0 -> 2195200 bytes pandas/tests/io/sas/data/SSHSV1_A.csv | 1427 ++ pandas/tests/io/sas/data/SSHSV1_A.xpt | Bin 0 -> 23920 bytes pandas/tests/io/sas/data/airline.csv | 33 + pandas/tests/io/sas/data/airline.sas7bdat | Bin 0 -> 5120 bytes pandas/tests/io/sas/data/cars.sas7bdat | Bin 0 -> 13312 bytes pandas/tests/io/sas/data/datetime.csv | 5 + pandas/tests/io/sas/data/datetime.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/load_log.sas7bdat | Bin 0 -> 589824 bytes pandas/tests/io/sas/data/many_columns.csv | 4 + .../tests/io/sas/data/many_columns.sas7bdat | Bin 0 -> 81920 bytes pandas/tests/io/sas/data/paxraw_d_short.csv | 101 + pandas/tests/io/sas/data/paxraw_d_short.xpt | Bin 0 -> 6960 bytes pandas/tests/io/sas/data/productsales.csv | 1441 ++ .../tests/io/sas/data/productsales.sas7bdat | Bin 0 -> 148480 bytes pandas/tests/io/sas/data/test1.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/test10.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/test11.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test12.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test13.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/test14.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test15.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test16.sas7bdat | Bin 0 -> 73728 bytes pandas/tests/io/sas/data/test2.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test3.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test4.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/test5.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test6.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test7.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/test8.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test9.sas7bdat | Bin 0 -> 196608 bytes pandas/tests/io/sas/data/test_12659.csv | 37 + pandas/tests/io/sas/data/test_12659.sas7bdat | Bin 0 -> 131072 bytes pandas/tests/io/sas/data/test_sas7bdat_1.csv | 11 + pandas/tests/io/sas/data/test_sas7bdat_2.csv | 11 + .../tests/io/sas/data/zero_variables.sas7bdat | Bin 0 -> 149504 bytes pandas/tests/io/sas/test_sas.py | 26 + pandas/tests/io/sas/test_sas7bdat.py | 216 + pandas/tests/io/sas/test_xport.py | 141 + pandas/tests/io/test_clipboard.py | 256 + pandas/tests/io/test_common.py | 359 + pandas/tests/io/test_compression.py | 144 + pandas/tests/io/test_date_converters.py | 40 + pandas/tests/io/test_feather.py | 150 + pandas/tests/io/test_gbq.py | 235 + pandas/tests/io/test_gcs.py | 101 + pandas/tests/io/test_html.py | 1214 ++ pandas/tests/io/test_orc.py | 227 + pandas/tests/io/test_parquet.py | 723 + pandas/tests/io/test_pickle.py | 498 + pandas/tests/io/test_s3.py | 25 + pandas/tests/io/test_spss.py | 73 + pandas/tests/io/test_sql.py | 2851 ++++ pandas/tests/io/test_stata.py | 1818 +++ pandas/tests/plotting/__init__.py | 0 pandas/tests/plotting/common.py | 563 + pandas/tests/plotting/test_backend.py | 104 + pandas/tests/plotting/test_boxplot_method.py | 442 + pandas/tests/plotting/test_converter.py | 365 + pandas/tests/plotting/test_datetimelike.py | 1518 +++ pandas/tests/plotting/test_frame.py | 3314 +++++ pandas/tests/plotting/test_groupby.py | 69 + pandas/tests/plotting/test_hist_method.py | 464 + pandas/tests/plotting/test_misc.py | 412 + pandas/tests/plotting/test_series.py | 938 ++ pandas/tests/reductions/__init__.py | 4 + pandas/tests/reductions/test_reductions.py | 1278 ++ .../tests/reductions/test_stat_reductions.py | 270 + pandas/tests/resample/__init__.py | 0 pandas/tests/resample/conftest.py | 158 + pandas/tests/resample/test_base.py | 269 + pandas/tests/resample/test_datetime_index.py | 1583 +++ pandas/tests/resample/test_period_index.py | 872 ++ pandas/tests/resample/test_resample_api.py | 606 + .../tests/resample/test_resampler_grouper.py | 288 + pandas/tests/resample/test_time_grouper.py | 278 + pandas/tests/resample/test_timedelta.py | 127 + pandas/tests/reshape/__init__.py | 0 pandas/tests/reshape/data/cut_data.csv | 1 + pandas/tests/reshape/merge/__init__.py | 0 .../merge/data/allow_exact_matches.csv | 28 + .../allow_exact_matches_and_tolerance.csv | 28 + pandas/tests/reshape/merge/data/asof.csv | 28 + pandas/tests/reshape/merge/data/asof2.csv | 78 + pandas/tests/reshape/merge/data/quotes.csv | 17 + pandas/tests/reshape/merge/data/quotes2.csv | 57 + pandas/tests/reshape/merge/data/tolerance.csv | 28 + pandas/tests/reshape/merge/data/trades.csv | 28 + pandas/tests/reshape/merge/data/trades2.csv | 78 + pandas/tests/reshape/merge/test_join.py | 884 ++ pandas/tests/reshape/merge/test_merge.py | 2172 +++ pandas/tests/reshape/merge/test_merge_asof.py | 1343 ++ .../merge/test_merge_index_as_string.py | 188 + .../tests/reshape/merge/test_merge_ordered.py | 117 + pandas/tests/reshape/merge/test_multi.py | 839 ++ pandas/tests/reshape/merge/test_pivot_old.py | 0 pandas/tests/reshape/test_concat.py | 2752 ++++ pandas/tests/reshape/test_cut.py | 627 + pandas/tests/reshape/test_melt.py | 992 ++ pandas/tests/reshape/test_pivot.py | 2643 ++++ pandas/tests/reshape/test_qcut.py | 300 + pandas/tests/reshape/test_reshape.py | 647 + .../tests/reshape/test_union_categoricals.py | 348 + pandas/tests/reshape/test_util.py | 51 + pandas/tests/scalar/__init__.py | 0 pandas/tests/scalar/interval/__init__.py | 0 pandas/tests/scalar/interval/test_interval.py | 259 + pandas/tests/scalar/interval/test_ops.py | 64 + pandas/tests/scalar/period/__init__.py | 0 pandas/tests/scalar/period/test_asfreq.py | 780 ++ pandas/tests/scalar/period/test_period.py | 1567 +++ pandas/tests/scalar/test_na_scalar.py | 294 + pandas/tests/scalar/test_nat.py | 510 + pandas/tests/scalar/timedelta/__init__.py | 0 .../tests/scalar/timedelta/test_arithmetic.py | 758 ++ .../scalar/timedelta/test_constructors.py | 276 + pandas/tests/scalar/timedelta/test_formats.py | 44 + .../tests/scalar/timedelta/test_timedelta.py | 823 ++ pandas/tests/scalar/timestamp/__init__.py | 0 .../tests/scalar/timestamp/test_arithmetic.py | 228 + .../scalar/timestamp/test_comparisons.py | 192 + .../tests/scalar/timestamp/test_rendering.py | 87 + .../tests/scalar/timestamp/test_timestamp.py | 1087 ++ .../tests/scalar/timestamp/test_timezones.py | 418 + .../tests/scalar/timestamp/test_unary_ops.py | 420 + pandas/tests/series/__init__.py | 0 pandas/tests/series/conftest.py | 33 + pandas/tests/series/indexing/__init__.py | 0 .../tests/series/indexing/test_alter_index.py | 570 + pandas/tests/series/indexing/test_boolean.py | 627 + pandas/tests/series/indexing/test_callable.py | 33 + pandas/tests/series/indexing/test_datetime.py | 772 ++ pandas/tests/series/indexing/test_iloc.py | 32 + pandas/tests/series/indexing/test_indexing.py | 937 ++ pandas/tests/series/indexing/test_loc.py | 159 + pandas/tests/series/indexing/test_numeric.py | 313 + pandas/tests/series/methods/__init__.py | 7 + pandas/tests/series/methods/test_append.py | 168 + pandas/tests/series/methods/test_argsort.py | 63 + pandas/tests/series/methods/test_asof.py | 178 + pandas/tests/series/methods/test_clip.py | 99 + pandas/tests/series/methods/test_count.py | 38 + pandas/tests/series/methods/test_cov_corr.py | 158 + pandas/tests/series/methods/test_describe.py | 69 + pandas/tests/series/methods/test_diff.py | 77 + .../series/methods/test_drop_duplicates.py | 141 + .../tests/series/methods/test_duplicated.py | 35 + pandas/tests/series/methods/test_explode.py | 121 + pandas/tests/series/methods/test_isin.py | 82 + pandas/tests/series/methods/test_nlargest.py | 213 + .../tests/series/methods/test_pct_change.py | 79 + pandas/tests/series/methods/test_quantile.py | 216 + pandas/tests/series/methods/test_rank.py | 565 + pandas/tests/series/methods/test_replace.py | 369 + pandas/tests/series/methods/test_round.py | 46 + .../tests/series/methods/test_searchsorted.py | 55 + pandas/tests/series/methods/test_shift.py | 275 + .../tests/series/methods/test_sort_index.py | 168 + .../tests/series/methods/test_sort_values.py | 183 + pandas/tests/series/methods/test_to_dict.py | 20 + pandas/tests/series/methods/test_truncate.py | 78 + .../tests/series/methods/test_value_counts.py | 205 + pandas/tests/series/test_alter_axes.py | 352 + pandas/tests/series/test_analytics.py | 270 + pandas/tests/series/test_api.py | 747 + pandas/tests/series/test_apply.py | 789 ++ pandas/tests/series/test_arithmetic.py | 205 + pandas/tests/series/test_block_internals.py | 39 + pandas/tests/series/test_combine_concat.py | 267 + pandas/tests/series/test_constructors.py | 1406 ++ pandas/tests/series/test_convert_dtypes.py | 286 + pandas/tests/series/test_cumulative.py | 181 + pandas/tests/series/test_datetime_values.py | 689 + pandas/tests/series/test_dtypes.py | 489 + pandas/tests/series/test_duplicates.py | 92 + pandas/tests/series/test_internals.py | 244 + pandas/tests/series/test_io.py | 240 + pandas/tests/series/test_missing.py | 1650 +++ pandas/tests/series/test_operators.py | 936 ++ pandas/tests/series/test_period.py | 170 + pandas/tests/series/test_repr.py | 489 + pandas/tests/series/test_subclass.py | 37 + pandas/tests/series/test_timeseries.py | 767 ++ pandas/tests/series/test_timezones.py | 366 + pandas/tests/series/test_ufunc.py | 304 + pandas/tests/series/test_validate.py | 20 + pandas/tests/test_algos.py | 2295 ++++ pandas/tests/test_common.py | 131 + pandas/tests/test_compat.py | 3 + pandas/tests/test_downstream.py | 180 + pandas/tests/test_errors.py | 66 + pandas/tests/test_expressions.py | 384 + pandas/tests/test_join.py | 346 + pandas/tests/test_lib.py | 196 + pandas/tests/test_multilevel.py | 2473 ++++ pandas/tests/test_nanops.py | 1074 ++ pandas/tests/test_optional_dependency.py | 52 + pandas/tests/test_register_accessor.py | 92 + pandas/tests/test_sorting.py | 454 + pandas/tests/test_strings.py | 3606 +++++ pandas/tests/test_take.py | 461 + pandas/tests/tools/__init__.py | 0 pandas/tests/tools/test_numeric.py | 629 + pandas/tests/tseries/__init__.py | 0 pandas/tests/tseries/frequencies/__init__.py | 0 .../tseries/frequencies/test_freq_code.py | 192 + .../tseries/frequencies/test_inference.py | 535 + .../tseries/frequencies/test_to_offset.py | 176 + pandas/tests/tseries/holiday/__init__.py | 0 pandas/tests/tseries/holiday/test_calendar.py | 100 + pandas/tests/tseries/holiday/test_federal.py | 38 + pandas/tests/tseries/holiday/test_holiday.py | 268 + .../tests/tseries/holiday/test_observance.py | 87 + pandas/tests/tseries/offsets/__init__.py | 0 pandas/tests/tseries/offsets/common.py | 26 + pandas/tests/tseries/offsets/conftest.py | 25 + .../tseries/offsets/data/cday-0.14.1.pickle | Bin 0 -> 492 bytes .../offsets/data/dateoffset_0_15_2.pickle | 183 + pandas/tests/tseries/offsets/test_fiscal.py | 692 + pandas/tests/tseries/offsets/test_offsets.py | 4415 ++++++ .../offsets/test_offsets_properties.py | 140 + pandas/tests/tseries/offsets/test_ticks.py | 322 + .../tests/tseries/offsets/test_yqm_offsets.py | 1464 ++ pandas/tests/tslibs/__init__.py | 0 pandas/tests/tslibs/test_api.py | 47 + pandas/tests/tslibs/test_array_to_datetime.py | 197 + pandas/tests/tslibs/test_ccalendar.py | 27 + pandas/tests/tslibs/test_conversion.py | 100 + pandas/tests/tslibs/test_fields.py | 31 + pandas/tests/tslibs/test_libfrequencies.py | 104 + pandas/tests/tslibs/test_liboffsets.py | 169 + pandas/tests/tslibs/test_normalize_date.py | 41 + pandas/tests/tslibs/test_parse_iso8601.py | 72 + pandas/tests/tslibs/test_parsing.py | 227 + pandas/tests/tslibs/test_period_asfreq.py | 78 + pandas/tests/tslibs/test_timedeltas.py | 30 + pandas/tests/tslibs/test_timezones.py | 108 + pandas/tests/util/__init__.py | 0 pandas/tests/util/conftest.py | 26 + pandas/tests/util/test_assert_almost_equal.py | 361 + .../util/test_assert_categorical_equal.py | 90 + .../util/test_assert_extension_array_equal.py | 107 + pandas/tests/util/test_assert_frame_equal.py | 224 + pandas/tests/util/test_assert_index_equal.py | 172 + .../util/test_assert_interval_array_equal.py | 81 + .../util/test_assert_numpy_array_equal.py | 213 + .../util/test_assert_produces_warning.py | 22 + pandas/tests/util/test_assert_series_equal.py | 196 + pandas/tests/util/test_deprecate.py | 64 + pandas/tests/util/test_deprecate_kwarg.py | 90 + pandas/tests/util/test_hashing.py | 383 + pandas/tests/util/test_safe_import.py | 39 + pandas/tests/util/test_util.py | 78 + pandas/tests/util/test_validate_args.py | 67 + .../util/test_validate_args_and_kwargs.py | 81 + pandas/tests/util/test_validate_kwargs.py | 63 + pandas/tests/window/__init__.py | 0 pandas/tests/window/common.py | 386 + pandas/tests/window/conftest.py | 89 + pandas/tests/window/moments/conftest.py | 20 + .../tests/window/moments/test_moments_ewm.py | 439 + .../window/moments/test_moments_expanding.py | 409 + .../window/moments/test_moments_rolling.py | 1529 +++ pandas/tests/window/test_api.py | 344 + pandas/tests/window/test_apply.py | 165 + pandas/tests/window/test_base_indexer.py | 82 + pandas/tests/window/test_dtypes.py | 242 + pandas/tests/window/test_ewm.py | 70 + pandas/tests/window/test_expanding.py | 134 + pandas/tests/window/test_grouper.py | 210 + pandas/tests/window/test_numba.py | 74 + pandas/tests/window/test_pairwise.py | 191 + pandas/tests/window/test_rolling.py | 447 + pandas/tests/window/test_timeseries_window.py | 745 + pandas/tests/window/test_window.py | 76 + pandas/tseries/__init__.py | 0 pandas/tseries/api.py | 8 + pandas/tseries/frequencies.py | 538 + pandas/tseries/holiday.py | 534 + pandas/tseries/offsets.py | 2838 ++++ pandas/util/__init__.py | 30 + pandas/util/_decorators.py | 342 + pandas/util/_depr_module.py | 107 + pandas/util/_doctools.py | 193 + pandas/util/_exceptions.py | 19 + pandas/util/_print_versions.py | 150 + pandas/util/_test_decorators.py | 264 + pandas/util/_tester.py | 30 + pandas/util/_validators.py | 378 + pandas/util/testing.py | 12 + pyproject.toml | 34 + release_stats.sh | 51 + requirements-dev.txt | 75 + scripts/build_dist.sh | 18 + scripts/build_dist_for_release.sh | 10 + scripts/download_wheels.py | 47 + scripts/find_commits_touching_func.py | 244 + scripts/generate_pip_deps_from_conda.py | 139 + scripts/list_future_warnings.sh | 46 + scripts/tests/__init__.py | 0 scripts/tests/conftest.py | 6 + scripts/tests/test_validate_docstrings.py | 1467 ++ scripts/validate_docstrings.py | 1062 ++ scripts/validate_string_concatenation.py | 129 + setup.cfg | 340 + setup.py | 781 ++ test.bat | 3 + test.sh | 4 + test_fast.bat | 3 + test_fast.sh | 8 + test_rebuild.sh | 6 + versioneer.py | 1748 +++ web/README.md | 12 + web/pandas/_templates/layout.html | 108 + web/pandas/about/citing.md | 123 + web/pandas/about/index.md | 86 + web/pandas/about/roadmap.md | 195 + web/pandas/about/sponsors.md | 41 + web/pandas/about/team.md | 77 + web/pandas/community/blog.html | 14 + web/pandas/community/coc.md | 63 + web/pandas/community/ecosystem.md | 365 + web/pandas/config.yml | 130 + web/pandas/contribute.md | 55 + web/pandas/donate.md | 14 + web/pandas/getting_started.md | 51 + web/pandas/index.html | 114 + web/pandas/static/css/pandas.css | 66 + web/pandas/static/img/favicon.ico | Bin 0 -> 1150 bytes .../static/img/install/anaconda_prompt.png | Bin 0 -> 1373 bytes .../static/img/install/jupyterlab_home.png | Bin 0 -> 1962 bytes .../img/install/pandas_import_and_version.png | Bin 0 -> 2252 bytes web/pandas/static/img/pandas.svg | 1 + web/pandas/static/img/pandas_mark.svg | 111 + web/pandas/static/img/pandas_mark_white.svg | 111 + web/pandas/static/img/pandas_secondary.svg | 1 + .../static/img/pandas_secondary_white.svg | 1 + web/pandas/static/img/pandas_white.svg | 1 + web/pandas/static/img/partners/anaconda.svg | 99 + web/pandas/static/img/partners/numfocus.svg | 60 + web/pandas/static/img/partners/r_studio.svg | 50 + web/pandas/static/img/partners/tidelift.svg | 33 + web/pandas/static/img/partners/two_sigma.svg | 1 + web/pandas/static/img/partners/ursa_labs.svg | 106 + web/pandas/static/img/pydata_book.gif | Bin 0 -> 5862 bytes web/pandas/try.md | 21 + web/pandas_web.py | 286 + 1709 files changed, 621319 insertions(+) create mode 100644 .gitattributes create mode 100644 .github/CODE_OF_CONDUCT.md create mode 100644 .github/CONTRIBUTING.md create mode 100644 .github/FUNDING.yml create mode 100644 .github/ISSUE_TEMPLATE.md create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/SECURITY.md create mode 100644 .github/workflows/assign.yml create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 .pep8speaks.yml create mode 100644 .pre-commit-config.yaml create mode 100644 .travis.yml create mode 100644 AUTHORS.md create mode 100644 LICENSE create mode 100644 LICENSES/DATEUTIL_LICENSE create mode 100644 LICENSES/HAVEN_LICENSE create mode 100644 LICENSES/HAVEN_MIT create mode 100644 LICENSES/MUSL_LICENSE create mode 100644 LICENSES/NUMPY_LICENSE create mode 100644 LICENSES/OTHER create mode 100644 LICENSES/PSF_LICENSE create mode 100644 LICENSES/SAS7BDAT_LICENSE create mode 100644 LICENSES/SCIPY_LICENSE create mode 100644 LICENSES/ULTRAJSON_LICENSE create mode 100644 LICENSES/XARRAY_LICENSE create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 README.md create mode 100644 RELEASE.md create mode 100644 asv_bench/asv.conf.json create mode 100644 asv_bench/benchmarks/__init__.py create mode 100644 asv_bench/benchmarks/algorithms.py create mode 100644 asv_bench/benchmarks/array.py create mode 100644 asv_bench/benchmarks/attrs_caching.py create mode 100644 asv_bench/benchmarks/binary_ops.py create mode 100644 asv_bench/benchmarks/boolean.py create mode 100644 asv_bench/benchmarks/categoricals.py create mode 100644 asv_bench/benchmarks/ctors.py create mode 100644 asv_bench/benchmarks/dtypes.py create mode 100644 asv_bench/benchmarks/eval.py create mode 100644 asv_bench/benchmarks/frame_ctor.py create mode 100644 asv_bench/benchmarks/frame_methods.py create mode 100644 asv_bench/benchmarks/gil.py create mode 100644 asv_bench/benchmarks/groupby.py create mode 100644 asv_bench/benchmarks/index_cached_properties.py create mode 100644 asv_bench/benchmarks/index_object.py create mode 100644 asv_bench/benchmarks/indexing.py create mode 100644 asv_bench/benchmarks/indexing_engines.py create mode 100644 asv_bench/benchmarks/inference.py create mode 100644 asv_bench/benchmarks/io/__init__.py create mode 100644 asv_bench/benchmarks/io/csv.py create mode 100644 asv_bench/benchmarks/io/excel.py create mode 100644 asv_bench/benchmarks/io/hdf.py create mode 100644 asv_bench/benchmarks/io/json.py create mode 100644 asv_bench/benchmarks/io/parsers.py create mode 100644 asv_bench/benchmarks/io/pickle.py create mode 100644 asv_bench/benchmarks/io/sas.py create mode 100644 asv_bench/benchmarks/io/sql.py create mode 100644 asv_bench/benchmarks/io/stata.py create mode 100644 asv_bench/benchmarks/join_merge.py create mode 100644 asv_bench/benchmarks/multiindex_object.py create mode 100644 asv_bench/benchmarks/offset.py create mode 100644 asv_bench/benchmarks/package.py create mode 100644 asv_bench/benchmarks/pandas_vb_common.py create mode 100644 asv_bench/benchmarks/period.py create mode 100644 asv_bench/benchmarks/plotting.py create mode 100644 asv_bench/benchmarks/reindex.py create mode 100644 asv_bench/benchmarks/replace.py create mode 100644 asv_bench/benchmarks/reshape.py create mode 100644 asv_bench/benchmarks/rolling.py create mode 100644 asv_bench/benchmarks/series_methods.py create mode 100644 asv_bench/benchmarks/sparse.py create mode 100644 asv_bench/benchmarks/stat_ops.py create mode 100644 asv_bench/benchmarks/strings.py create mode 100644 asv_bench/benchmarks/timedelta.py create mode 100644 asv_bench/benchmarks/timeseries.py create mode 100644 asv_bench/benchmarks/tslibs/__init__.py create mode 100644 asv_bench/benchmarks/tslibs/offsets.py create mode 100644 asv_bench/benchmarks/tslibs/period.py create mode 100644 asv_bench/benchmarks/tslibs/timedelta.py create mode 100644 asv_bench/benchmarks/tslibs/timestamp.py create mode 100644 azure-pipelines.yml create mode 100644 ci/azure/posix.yml create mode 100644 ci/azure/windows.yml create mode 100755 ci/check_cache.sh create mode 100755 ci/check_git_tags.sh create mode 100755 ci/code_checks.sh create mode 100644 ci/deps/azure-36-32bit.yaml create mode 100644 ci/deps/azure-36-locale.yaml create mode 100644 ci/deps/azure-36-locale_slow.yaml create mode 100644 ci/deps/azure-36-minimum_versions.yaml create mode 100644 ci/deps/azure-37-locale.yaml create mode 100644 ci/deps/azure-37-numpydev.yaml create mode 100644 ci/deps/azure-macos-36.yaml create mode 100644 ci/deps/azure-windows-36.yaml create mode 100644 ci/deps/azure-windows-37.yaml create mode 100644 ci/deps/travis-36-cov.yaml create mode 100644 ci/deps/travis-36-locale.yaml create mode 100644 ci/deps/travis-36-slow.yaml create mode 100644 ci/deps/travis-37.yaml create mode 100644 ci/deps/travis-38.yaml create mode 100755 ci/prep_cython_cache.sh create mode 100755 ci/print_skipped.py create mode 100755 ci/run_tests.sh create mode 100755 ci/setup_env.sh create mode 100755 ci/submit_cython_cache.sh create mode 100755 ci/travis_encrypt_gbq.sh create mode 100644 ci/travis_gbq.json.enc create mode 100644 ci/travis_gbq_config.txt create mode 100755 ci/travis_process_gbq_encryption.sh create mode 100644 codecov.yml create mode 100644 conda.recipe/bld.bat create mode 100644 conda.recipe/build.sh create mode 100644 conda.recipe/meta.yaml create mode 100644 doc/.gitignore create mode 100644 doc/README.rst create mode 100644 doc/_templates/api_redirect.html create mode 100644 doc/_templates/autosummary/accessor.rst create mode 100644 doc/_templates/autosummary/accessor_attribute.rst create mode 100644 doc/_templates/autosummary/accessor_callable.rst create mode 100644 doc/_templates/autosummary/accessor_method.rst create mode 100644 doc/_templates/autosummary/class.rst create mode 100644 doc/_templates/autosummary/class_without_autosummary.rst create mode 100644 doc/cheatsheet/Pandas_Cheat_Sheet.pdf create mode 100644 doc/cheatsheet/Pandas_Cheat_Sheet.pptx create mode 100644 doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf create mode 100644 doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx create mode 100644 doc/cheatsheet/README.txt create mode 100644 doc/data/air_quality_long.csv create mode 100644 doc/data/air_quality_no2.csv create mode 100644 doc/data/air_quality_no2_long.csv create mode 100644 doc/data/air_quality_parameters.csv create mode 100644 doc/data/air_quality_pm25_long.csv create mode 100644 doc/data/air_quality_stations.csv create mode 100644 doc/data/baseball.csv create mode 100644 doc/data/fx_prices create mode 100644 doc/data/iris.data create mode 100644 doc/data/mindex_ex.csv create mode 100644 doc/data/test.xls create mode 100644 doc/data/tips.csv create mode 100644 doc/data/titanic.csv create mode 100755 doc/make.py create mode 100644 doc/redirects.csv create mode 100644 doc/source/_static/banklist.html create mode 100644 doc/source/_static/ci.png create mode 100644 doc/source/_static/css/getting_started.css create mode 100644 doc/source/_static/css/pandas.css create mode 100644 doc/source/_static/df_repr_truncated.png create mode 100644 doc/source/_static/eval-perf-small.png create mode 100644 doc/source/_static/eval-perf.png create mode 100644 doc/source/_static/index_api.svg create mode 100644 doc/source/_static/index_contribute.svg create mode 100644 doc/source/_static/index_getting_started.svg create mode 100644 doc/source/_static/index_user_guide.svg create mode 100644 doc/source/_static/legacy_0.10.h5 create mode 100644 doc/source/_static/logo_r.svg create mode 100644 doc/source/_static/logo_sas.svg create mode 100644 doc/source/_static/logo_sql.svg create mode 100644 doc/source/_static/logo_stata.svg create mode 100644 doc/source/_static/new-excel-index.png create mode 100644 doc/source/_static/old-excel-index.png create mode 100644 doc/source/_static/option_unicode01.png create mode 100644 doc/source/_static/option_unicode02.png create mode 100644 doc/source/_static/option_unicode03.png create mode 100644 doc/source/_static/option_unicode04.png create mode 100644 doc/source/_static/print_df_new.png create mode 100644 doc/source/_static/print_df_old.png create mode 100644 doc/source/_static/query-perf-small.png create mode 100644 doc/source/_static/query-perf.png create mode 100644 doc/source/_static/question_mark_noback.svg create mode 100644 doc/source/_static/reshaping_melt.png create mode 100644 doc/source/_static/reshaping_pivot.png create mode 100644 doc/source/_static/reshaping_stack.png create mode 100644 doc/source/_static/reshaping_unstack.png create mode 100644 doc/source/_static/reshaping_unstack_0.png create mode 100644 doc/source/_static/reshaping_unstack_1.png create mode 100644 doc/source/_static/schemas/01_table_dataframe.svg create mode 100644 doc/source/_static/schemas/01_table_series.svg create mode 100644 doc/source/_static/schemas/01_table_spreadsheet.png create mode 100644 doc/source/_static/schemas/02_io_readwrite.svg create mode 100644 doc/source/_static/schemas/03_subset_columns.svg create mode 100644 doc/source/_static/schemas/03_subset_columns_rows.svg create mode 100644 doc/source/_static/schemas/03_subset_rows.svg create mode 100644 doc/source/_static/schemas/04_plot_overview.svg create mode 100644 doc/source/_static/schemas/05_newcolumn_1.svg create mode 100644 doc/source/_static/schemas/05_newcolumn_2.svg create mode 100644 doc/source/_static/schemas/05_newcolumn_3.svg create mode 100644 doc/source/_static/schemas/06_aggregate.svg create mode 100644 doc/source/_static/schemas/06_groupby.svg create mode 100644 doc/source/_static/schemas/06_groupby_agg_detail.svg create mode 100644 doc/source/_static/schemas/06_groupby_select_detail.svg create mode 100644 doc/source/_static/schemas/06_reduction.svg create mode 100644 doc/source/_static/schemas/06_valuecounts.svg create mode 100644 doc/source/_static/schemas/07_melt.svg create mode 100644 doc/source/_static/schemas/07_pivot.svg create mode 100644 doc/source/_static/schemas/07_pivot_table.svg create mode 100644 doc/source/_static/schemas/08_concat_column.svg create mode 100644 doc/source/_static/schemas/08_concat_row.svg create mode 100644 doc/source/_static/schemas/08_merge_left.svg create mode 100644 doc/source/_static/stub create mode 100644 doc/source/_static/style-excel.png create mode 100644 doc/source/_static/trunc_after.png create mode 100644 doc/source/_static/trunc_before.png create mode 100644 doc/source/_static/whatsnew_assign.png create mode 100644 doc/source/_static/whatsnew_plot_submethods.png create mode 100644 doc/source/conf.py create mode 100644 doc/source/development/code_style.rst create mode 100644 doc/source/development/contributing.rst create mode 100644 doc/source/development/contributing_docstring.rst create mode 100644 doc/source/development/developer.rst create mode 100644 doc/source/development/extending.rst create mode 100644 doc/source/development/index.rst create mode 100644 doc/source/development/internals.rst create mode 100644 doc/source/development/maintaining.rst create mode 100644 doc/source/development/meeting.rst create mode 100644 doc/source/development/policies.rst create mode 100644 doc/source/development/roadmap.rst create mode 100644 doc/source/ecosystem.rst create mode 100644 doc/source/getting_started/10min.rst create mode 100644 doc/source/getting_started/basics.rst create mode 100644 doc/source/getting_started/comparison/comparison_with_r.rst create mode 100644 doc/source/getting_started/comparison/comparison_with_sas.rst create mode 100644 doc/source/getting_started/comparison/comparison_with_sql.rst create mode 100644 doc/source/getting_started/comparison/comparison_with_stata.rst create mode 100644 doc/source/getting_started/comparison/index.rst create mode 100644 doc/source/getting_started/dsintro.rst create mode 100644 doc/source/getting_started/index.rst create mode 100644 doc/source/getting_started/install.rst create mode 100644 doc/source/getting_started/intro_tutorials/01_table_oriented.rst create mode 100644 doc/source/getting_started/intro_tutorials/02_read_write.rst create mode 100644 doc/source/getting_started/intro_tutorials/03_subset_data.rst create mode 100644 doc/source/getting_started/intro_tutorials/04_plotting.rst create mode 100644 doc/source/getting_started/intro_tutorials/05_add_columns.rst create mode 100644 doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst create mode 100644 doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst create mode 100644 doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst create mode 100644 doc/source/getting_started/intro_tutorials/09_timeseries.rst create mode 100644 doc/source/getting_started/intro_tutorials/10_text_data.rst create mode 100644 doc/source/getting_started/intro_tutorials/index.rst create mode 100644 doc/source/getting_started/overview.rst create mode 100644 doc/source/getting_started/tutorials.rst create mode 100644 doc/source/index.rst.template create mode 100644 doc/source/reference/arrays.rst create mode 100644 doc/source/reference/extensions.rst create mode 100644 doc/source/reference/frame.rst create mode 100644 doc/source/reference/general_functions.rst create mode 100644 doc/source/reference/general_utility_functions.rst create mode 100644 doc/source/reference/groupby.rst create mode 100644 doc/source/reference/index.rst create mode 100644 doc/source/reference/indexing.rst create mode 100644 doc/source/reference/io.rst create mode 100644 doc/source/reference/offset_frequency.rst create mode 100644 doc/source/reference/panel.rst create mode 100644 doc/source/reference/plotting.rst create mode 100644 doc/source/reference/resampling.rst create mode 100644 doc/source/reference/series.rst create mode 100644 doc/source/reference/style.rst create mode 100644 doc/source/reference/window.rst create mode 100644 doc/source/styled.xlsx create mode 100644 doc/source/themes/nature_with_gtoc/layout.html create mode 100644 doc/source/themes/nature_with_gtoc/static/nature.css_t create mode 100644 doc/source/themes/nature_with_gtoc/theme.conf create mode 100644 doc/source/user_guide/advanced.rst create mode 100644 doc/source/user_guide/boolean.rst create mode 100644 doc/source/user_guide/categorical.rst create mode 100644 doc/source/user_guide/computation.rst create mode 100644 doc/source/user_guide/enhancingperf.rst create mode 100644 doc/source/user_guide/gotchas.rst create mode 100644 doc/source/user_guide/groupby.rst create mode 100644 doc/source/user_guide/index.rst create mode 100644 doc/source/user_guide/indexing.rst create mode 100644 doc/source/user_guide/integer_na.rst create mode 100644 doc/source/user_guide/io.rst create mode 100644 doc/source/user_guide/merging.rst create mode 100644 doc/source/user_guide/missing_data.rst create mode 100644 doc/source/user_guide/options.rst create mode 100644 doc/source/user_guide/reshaping.rst create mode 100644 doc/source/user_guide/scale.rst create mode 100644 doc/source/user_guide/sparse.rst create mode 100644 doc/source/user_guide/style.ipynb create mode 100644 doc/source/user_guide/templates/myhtml.tpl create mode 100644 doc/source/user_guide/templates/template_structure.html create mode 100644 doc/source/user_guide/text.rst create mode 100644 doc/source/user_guide/timedeltas.rst create mode 100644 doc/source/user_guide/timeseries.rst create mode 100644 doc/source/user_guide/visualization.rst create mode 100644 doc/source/whatsnew/index.rst create mode 100644 doc/source/whatsnew/v0.10.0.rst create mode 100644 doc/source/whatsnew/v0.10.1.rst create mode 100644 doc/source/whatsnew/v0.11.0.rst create mode 100644 doc/source/whatsnew/v0.12.0.rst create mode 100644 doc/source/whatsnew/v0.13.0.rst create mode 100644 doc/source/whatsnew/v0.13.1.rst create mode 100644 doc/source/whatsnew/v0.14.0.rst create mode 100644 doc/source/whatsnew/v0.14.1.rst create mode 100644 doc/source/whatsnew/v0.15.0.rst create mode 100644 doc/source/whatsnew/v0.15.1.rst create mode 100644 doc/source/whatsnew/v0.15.2.rst create mode 100644 doc/source/whatsnew/v0.16.0.rst create mode 100644 doc/source/whatsnew/v0.16.1.rst create mode 100644 doc/source/whatsnew/v0.16.2.rst create mode 100644 doc/source/whatsnew/v0.17.0.rst create mode 100644 doc/source/whatsnew/v0.17.1.rst create mode 100644 doc/source/whatsnew/v0.18.0.rst create mode 100644 doc/source/whatsnew/v0.18.1.rst create mode 100644 doc/source/whatsnew/v0.19.0.rst create mode 100644 doc/source/whatsnew/v0.19.1.rst create mode 100644 doc/source/whatsnew/v0.19.2.rst create mode 100644 doc/source/whatsnew/v0.20.0.rst create mode 100644 doc/source/whatsnew/v0.20.2.rst create mode 100644 doc/source/whatsnew/v0.20.3.rst create mode 100644 doc/source/whatsnew/v0.21.0.rst create mode 100644 doc/source/whatsnew/v0.21.1.rst create mode 100644 doc/source/whatsnew/v0.22.0.rst create mode 100644 doc/source/whatsnew/v0.23.0.rst create mode 100644 doc/source/whatsnew/v0.23.1.rst create mode 100644 doc/source/whatsnew/v0.23.2.rst create mode 100644 doc/source/whatsnew/v0.23.3.rst create mode 100644 doc/source/whatsnew/v0.23.4.rst create mode 100644 doc/source/whatsnew/v0.24.0.rst create mode 100644 doc/source/whatsnew/v0.24.1.rst create mode 100644 doc/source/whatsnew/v0.24.2.rst create mode 100644 doc/source/whatsnew/v0.25.0.rst create mode 100644 doc/source/whatsnew/v0.25.1.rst create mode 100644 doc/source/whatsnew/v0.25.2.rst create mode 100644 doc/source/whatsnew/v0.25.3.rst create mode 100644 doc/source/whatsnew/v0.4.x.rst create mode 100644 doc/source/whatsnew/v0.5.0.rst create mode 100644 doc/source/whatsnew/v0.6.0.rst create mode 100644 doc/source/whatsnew/v0.6.1.rst create mode 100644 doc/source/whatsnew/v0.7.0.rst create mode 100644 doc/source/whatsnew/v0.7.1.rst create mode 100644 doc/source/whatsnew/v0.7.2.rst create mode 100644 doc/source/whatsnew/v0.7.3.rst create mode 100644 doc/source/whatsnew/v0.8.0.rst create mode 100644 doc/source/whatsnew/v0.8.1.rst create mode 100644 doc/source/whatsnew/v0.9.0.rst create mode 100644 doc/source/whatsnew/v0.9.1.rst create mode 100755 doc/source/whatsnew/v1.0.0.rst create mode 100644 doc/source/whatsnew/v1.0.1.rst create mode 100644 doc/source/whatsnew/v1.0.2.rst create mode 100644 doc/source/whatsnew/v1.0.3.rst create mode 100644 doc/source/whatsnew/v1.0.4.rst create mode 100644 doc/source/whatsnew/v1.0.5.rst create mode 100644 doc/source/whatsnew/whatsnew_0171_html_table.html create mode 100644 doc/sphinxext/README.rst create mode 100755 doc/sphinxext/announce.py create mode 100644 doc/sphinxext/contributors.py create mode 100644 environment.yml create mode 100644 pandas/__init__.py create mode 100644 pandas/_config/__init__.py create mode 100644 pandas/_config/config.py create mode 100644 pandas/_config/dates.py create mode 100644 pandas/_config/display.py create mode 100644 pandas/_config/localization.py create mode 100644 pandas/_libs/__init__.py create mode 100644 pandas/_libs/algos.pxd create mode 100644 pandas/_libs/algos.pyx create mode 100644 pandas/_libs/algos_common_helper.pxi.in create mode 100644 pandas/_libs/algos_take_helper.pxi.in create mode 100644 pandas/_libs/groupby.pyx create mode 100644 pandas/_libs/hashing.pyx create mode 100644 pandas/_libs/hashtable.pxd create mode 100644 pandas/_libs/hashtable.pyx create mode 100644 pandas/_libs/hashtable_class_helper.pxi.in create mode 100644 pandas/_libs/hashtable_func_helper.pxi.in create mode 100644 pandas/_libs/index.pyx create mode 100644 pandas/_libs/index_class_helper.pxi.in create mode 100644 pandas/_libs/indexing.pyx create mode 100644 pandas/_libs/internals.pyx create mode 100644 pandas/_libs/interval.pyx create mode 100644 pandas/_libs/intervaltree.pxi.in create mode 100644 pandas/_libs/join.pyx create mode 100644 pandas/_libs/khash.pxd create mode 100644 pandas/_libs/lib.pxd create mode 100644 pandas/_libs/lib.pyx create mode 100644 pandas/_libs/missing.pxd create mode 100644 pandas/_libs/missing.pyx create mode 100644 pandas/_libs/ops.pyx create mode 100644 pandas/_libs/ops_dispatch.pyx create mode 100644 pandas/_libs/parsers.pyx create mode 100644 pandas/_libs/properties.pyx create mode 100644 pandas/_libs/reduction.pyx create mode 100644 pandas/_libs/reshape.pyx create mode 100644 pandas/_libs/sparse.pyx create mode 100644 pandas/_libs/sparse_op_helper.pxi.in create mode 100644 pandas/_libs/src/headers/cmath create mode 100644 pandas/_libs/src/headers/ms_inttypes.h create mode 100644 pandas/_libs/src/headers/ms_stdint.h create mode 100644 pandas/_libs/src/headers/portable.h create mode 100644 pandas/_libs/src/headers/stdint.h create mode 100644 pandas/_libs/src/inline_helper.h create mode 100644 pandas/_libs/src/klib/khash.h create mode 100644 pandas/_libs/src/klib/khash_python.h create mode 100644 pandas/_libs/src/parse_helper.h create mode 100644 pandas/_libs/src/parser/io.c create mode 100644 pandas/_libs/src/parser/io.h create mode 100644 pandas/_libs/src/parser/tokenizer.c create mode 100644 pandas/_libs/src/parser/tokenizer.h create mode 100644 pandas/_libs/src/skiplist.h create mode 100644 pandas/_libs/src/ujson/lib/ultrajson.h create mode 100644 pandas/_libs/src/ujson/lib/ultrajsondec.c create mode 100644 pandas/_libs/src/ujson/lib/ultrajsonenc.c create mode 100644 pandas/_libs/src/ujson/python/JSONtoObj.c create mode 100644 pandas/_libs/src/ujson/python/objToJSON.c create mode 100644 pandas/_libs/src/ujson/python/ujson.c create mode 100644 pandas/_libs/src/ujson/python/version.h create mode 100644 pandas/_libs/testing.pyx create mode 100644 pandas/_libs/tslib.pyx create mode 100644 pandas/_libs/tslibs/__init__.py create mode 100644 pandas/_libs/tslibs/c_timestamp.pxd create mode 100644 pandas/_libs/tslibs/c_timestamp.pyx create mode 100644 pandas/_libs/tslibs/ccalendar.pxd create mode 100644 pandas/_libs/tslibs/ccalendar.pyx create mode 100644 pandas/_libs/tslibs/conversion.pxd create mode 100644 pandas/_libs/tslibs/conversion.pyx create mode 100644 pandas/_libs/tslibs/fields.pyx create mode 100644 pandas/_libs/tslibs/frequencies.pxd create mode 100644 pandas/_libs/tslibs/frequencies.pyx create mode 100644 pandas/_libs/tslibs/nattype.pxd create mode 100644 pandas/_libs/tslibs/nattype.pyx create mode 100644 pandas/_libs/tslibs/np_datetime.pxd create mode 100644 pandas/_libs/tslibs/np_datetime.pyx create mode 100644 pandas/_libs/tslibs/offsets.pxd create mode 100644 pandas/_libs/tslibs/offsets.pyx create mode 100644 pandas/_libs/tslibs/parsing.pyx create mode 100644 pandas/_libs/tslibs/period.pyx create mode 100644 pandas/_libs/tslibs/resolution.pyx create mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime.c create mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime.h create mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime_strings.c create mode 100644 pandas/_libs/tslibs/src/datetime/np_datetime_strings.h create mode 100644 pandas/_libs/tslibs/strptime.pyx create mode 100644 pandas/_libs/tslibs/timedeltas.pxd create mode 100644 pandas/_libs/tslibs/timedeltas.pyx create mode 100644 pandas/_libs/tslibs/timestamps.pxd create mode 100644 pandas/_libs/tslibs/timestamps.pyx create mode 100644 pandas/_libs/tslibs/timezones.pxd create mode 100644 pandas/_libs/tslibs/timezones.pyx create mode 100644 pandas/_libs/tslibs/tzconversion.pxd create mode 100644 pandas/_libs/tslibs/tzconversion.pyx create mode 100644 pandas/_libs/tslibs/util.pxd create mode 100644 pandas/_libs/util.pxd create mode 100644 pandas/_libs/window/__init__.py create mode 100644 pandas/_libs/window/aggregations.pyx create mode 100644 pandas/_libs/window/indexers.pyx create mode 100644 pandas/_libs/writers.pyx create mode 100644 pandas/_testing.py create mode 100644 pandas/_typing.py create mode 100644 pandas/_version.py create mode 100644 pandas/api/__init__.py create mode 100644 pandas/api/extensions/__init__.py create mode 100644 pandas/api/indexers/__init__.py create mode 100644 pandas/api/types/__init__.py create mode 100644 pandas/arrays/__init__.py create mode 100644 pandas/compat/__init__.py create mode 100644 pandas/compat/_optional.py create mode 100644 pandas/compat/chainmap.py create mode 100644 pandas/compat/numpy/__init__.py create mode 100644 pandas/compat/numpy/function.py create mode 100644 pandas/compat/pickle_compat.py create mode 100644 pandas/conftest.py create mode 100644 pandas/core/__init__.py create mode 100644 pandas/core/accessor.py create mode 100644 pandas/core/algorithms.py create mode 100644 pandas/core/api.py create mode 100644 pandas/core/apply.py create mode 100644 pandas/core/arrays/__init__.py create mode 100644 pandas/core/arrays/_arrow_utils.py create mode 100644 pandas/core/arrays/_ranges.py create mode 100644 pandas/core/arrays/base.py create mode 100644 pandas/core/arrays/boolean.py create mode 100644 pandas/core/arrays/categorical.py create mode 100644 pandas/core/arrays/datetimelike.py create mode 100644 pandas/core/arrays/datetimes.py create mode 100644 pandas/core/arrays/integer.py create mode 100644 pandas/core/arrays/interval.py create mode 100644 pandas/core/arrays/masked.py create mode 100644 pandas/core/arrays/numpy_.py create mode 100644 pandas/core/arrays/period.py create mode 100644 pandas/core/arrays/sparse/__init__.py create mode 100644 pandas/core/arrays/sparse/accessor.py create mode 100644 pandas/core/arrays/sparse/array.py create mode 100644 pandas/core/arrays/sparse/dtype.py create mode 100644 pandas/core/arrays/sparse/scipy_sparse.py create mode 100644 pandas/core/arrays/string_.py create mode 100644 pandas/core/arrays/timedeltas.py create mode 100644 pandas/core/base.py create mode 100644 pandas/core/common.py create mode 100644 pandas/core/computation/__init__.py create mode 100644 pandas/core/computation/align.py create mode 100644 pandas/core/computation/api.py create mode 100644 pandas/core/computation/check.py create mode 100644 pandas/core/computation/common.py create mode 100644 pandas/core/computation/engines.py create mode 100644 pandas/core/computation/eval.py create mode 100644 pandas/core/computation/expr.py create mode 100644 pandas/core/computation/expressions.py create mode 100644 pandas/core/computation/ops.py create mode 100644 pandas/core/computation/parsing.py create mode 100644 pandas/core/computation/pytables.py create mode 100644 pandas/core/computation/scope.py create mode 100644 pandas/core/config_init.py create mode 100644 pandas/core/construction.py create mode 100644 pandas/core/dtypes/__init__.py create mode 100644 pandas/core/dtypes/api.py create mode 100644 pandas/core/dtypes/base.py create mode 100644 pandas/core/dtypes/cast.py create mode 100644 pandas/core/dtypes/common.py create mode 100644 pandas/core/dtypes/concat.py create mode 100644 pandas/core/dtypes/dtypes.py create mode 100644 pandas/core/dtypes/generic.py create mode 100644 pandas/core/dtypes/inference.py create mode 100644 pandas/core/dtypes/missing.py create mode 100644 pandas/core/frame.py create mode 100644 pandas/core/generic.py create mode 100644 pandas/core/groupby/__init__.py create mode 100644 pandas/core/groupby/base.py create mode 100644 pandas/core/groupby/categorical.py create mode 100644 pandas/core/groupby/generic.py create mode 100644 pandas/core/groupby/groupby.py create mode 100644 pandas/core/groupby/grouper.py create mode 100644 pandas/core/groupby/ops.py create mode 100644 pandas/core/index.py create mode 100644 pandas/core/indexers.py create mode 100644 pandas/core/indexes/__init__.py create mode 100644 pandas/core/indexes/accessors.py create mode 100644 pandas/core/indexes/api.py create mode 100644 pandas/core/indexes/base.py create mode 100644 pandas/core/indexes/category.py create mode 100644 pandas/core/indexes/datetimelike.py create mode 100644 pandas/core/indexes/datetimes.py create mode 100644 pandas/core/indexes/extension.py create mode 100644 pandas/core/indexes/frozen.py create mode 100644 pandas/core/indexes/interval.py create mode 100644 pandas/core/indexes/multi.py create mode 100644 pandas/core/indexes/numeric.py create mode 100644 pandas/core/indexes/period.py create mode 100644 pandas/core/indexes/range.py create mode 100644 pandas/core/indexes/timedeltas.py create mode 100755 pandas/core/indexing.py create mode 100644 pandas/core/internals/__init__.py create mode 100644 pandas/core/internals/blocks.py create mode 100644 pandas/core/internals/concat.py create mode 100644 pandas/core/internals/construction.py create mode 100644 pandas/core/internals/managers.py create mode 100644 pandas/core/missing.py create mode 100644 pandas/core/nanops.py create mode 100644 pandas/core/ops/__init__.py create mode 100644 pandas/core/ops/array_ops.py create mode 100644 pandas/core/ops/common.py create mode 100644 pandas/core/ops/dispatch.py create mode 100644 pandas/core/ops/docstrings.py create mode 100644 pandas/core/ops/invalid.py create mode 100644 pandas/core/ops/mask_ops.py create mode 100644 pandas/core/ops/methods.py create mode 100644 pandas/core/ops/missing.py create mode 100644 pandas/core/ops/roperator.py create mode 100644 pandas/core/resample.py create mode 100644 pandas/core/reshape/__init__.py create mode 100644 pandas/core/reshape/api.py create mode 100644 pandas/core/reshape/concat.py create mode 100644 pandas/core/reshape/melt.py create mode 100644 pandas/core/reshape/merge.py create mode 100644 pandas/core/reshape/pivot.py create mode 100644 pandas/core/reshape/reshape.py create mode 100644 pandas/core/reshape/tile.py create mode 100644 pandas/core/reshape/util.py create mode 100644 pandas/core/series.py create mode 100644 pandas/core/sorting.py create mode 100644 pandas/core/sparse/__init__.py create mode 100644 pandas/core/sparse/api.py create mode 100644 pandas/core/strings.py create mode 100644 pandas/core/tools/__init__.py create mode 100644 pandas/core/tools/datetimes.py create mode 100644 pandas/core/tools/numeric.py create mode 100644 pandas/core/tools/timedeltas.py create mode 100644 pandas/core/util/__init__.py create mode 100644 pandas/core/util/hashing.py create mode 100644 pandas/core/window/__init__.py create mode 100644 pandas/core/window/common.py create mode 100644 pandas/core/window/ewm.py create mode 100644 pandas/core/window/expanding.py create mode 100644 pandas/core/window/indexers.py create mode 100644 pandas/core/window/numba_.py create mode 100644 pandas/core/window/rolling.py create mode 100644 pandas/errors/__init__.py create mode 100644 pandas/io/__init__.py create mode 100644 pandas/io/api.py create mode 100644 pandas/io/clipboard/__init__.py create mode 100644 pandas/io/clipboards.py create mode 100644 pandas/io/common.py create mode 100644 pandas/io/date_converters.py create mode 100644 pandas/io/excel/__init__.py create mode 100644 pandas/io/excel/_base.py create mode 100644 pandas/io/excel/_odfreader.py create mode 100644 pandas/io/excel/_openpyxl.py create mode 100644 pandas/io/excel/_pyxlsb.py create mode 100644 pandas/io/excel/_util.py create mode 100644 pandas/io/excel/_xlrd.py create mode 100644 pandas/io/excel/_xlsxwriter.py create mode 100644 pandas/io/excel/_xlwt.py create mode 100644 pandas/io/feather_format.py create mode 100644 pandas/io/formats/__init__.py create mode 100644 pandas/io/formats/console.py create mode 100644 pandas/io/formats/css.py create mode 100644 pandas/io/formats/csvs.py create mode 100644 pandas/io/formats/excel.py create mode 100644 pandas/io/formats/format.py create mode 100644 pandas/io/formats/html.py create mode 100644 pandas/io/formats/latex.py create mode 100644 pandas/io/formats/printing.py create mode 100644 pandas/io/formats/style.py create mode 100644 pandas/io/formats/templates/html.tpl create mode 100644 pandas/io/gbq.py create mode 100644 pandas/io/gcs.py create mode 100644 pandas/io/html.py create mode 100644 pandas/io/json/__init__.py create mode 100644 pandas/io/json/_json.py create mode 100644 pandas/io/json/_normalize.py create mode 100644 pandas/io/json/_table_schema.py create mode 100644 pandas/io/orc.py create mode 100644 pandas/io/parquet.py create mode 100755 pandas/io/parsers.py create mode 100644 pandas/io/pickle.py create mode 100644 pandas/io/pytables.py create mode 100644 pandas/io/s3.py create mode 100644 pandas/io/sas/__init__.py create mode 100644 pandas/io/sas/sas.pyx create mode 100644 pandas/io/sas/sas7bdat.py create mode 100644 pandas/io/sas/sas_constants.py create mode 100644 pandas/io/sas/sas_xport.py create mode 100644 pandas/io/sas/sasreader.py create mode 100644 pandas/io/spss.py create mode 100644 pandas/io/sql.py create mode 100644 pandas/io/stata.py create mode 100644 pandas/plotting/__init__.py create mode 100644 pandas/plotting/_core.py create mode 100644 pandas/plotting/_matplotlib/__init__.py create mode 100644 pandas/plotting/_matplotlib/boxplot.py create mode 100644 pandas/plotting/_matplotlib/compat.py create mode 100644 pandas/plotting/_matplotlib/converter.py create mode 100644 pandas/plotting/_matplotlib/core.py create mode 100644 pandas/plotting/_matplotlib/hist.py create mode 100644 pandas/plotting/_matplotlib/misc.py create mode 100644 pandas/plotting/_matplotlib/style.py create mode 100644 pandas/plotting/_matplotlib/timeseries.py create mode 100644 pandas/plotting/_matplotlib/tools.py create mode 100644 pandas/plotting/_misc.py create mode 100644 pandas/testing.py create mode 100644 pandas/tests/__init__.py create mode 100644 pandas/tests/api/__init__.py create mode 100644 pandas/tests/api/test_api.py create mode 100644 pandas/tests/api/test_types.py create mode 100644 pandas/tests/arithmetic/__init__.py create mode 100644 pandas/tests/arithmetic/common.py create mode 100644 pandas/tests/arithmetic/conftest.py create mode 100644 pandas/tests/arithmetic/test_datetime64.py create mode 100644 pandas/tests/arithmetic/test_interval.py create mode 100644 pandas/tests/arithmetic/test_numeric.py create mode 100644 pandas/tests/arithmetic/test_object.py create mode 100644 pandas/tests/arithmetic/test_period.py create mode 100644 pandas/tests/arithmetic/test_timedelta64.py create mode 100644 pandas/tests/arrays/__init__.py create mode 100644 pandas/tests/arrays/categorical/__init__.py create mode 100644 pandas/tests/arrays/categorical/common.py create mode 100644 pandas/tests/arrays/categorical/conftest.py create mode 100644 pandas/tests/arrays/categorical/test_algos.py create mode 100644 pandas/tests/arrays/categorical/test_analytics.py create mode 100644 pandas/tests/arrays/categorical/test_api.py create mode 100644 pandas/tests/arrays/categorical/test_constructors.py create mode 100644 pandas/tests/arrays/categorical/test_dtypes.py create mode 100644 pandas/tests/arrays/categorical/test_indexing.py create mode 100644 pandas/tests/arrays/categorical/test_missing.py create mode 100644 pandas/tests/arrays/categorical/test_operators.py create mode 100644 pandas/tests/arrays/categorical/test_replace.py create mode 100644 pandas/tests/arrays/categorical/test_repr.py create mode 100644 pandas/tests/arrays/categorical/test_sorting.py create mode 100644 pandas/tests/arrays/categorical/test_subclass.py create mode 100644 pandas/tests/arrays/categorical/test_warnings.py create mode 100644 pandas/tests/arrays/interval/__init__.py create mode 100644 pandas/tests/arrays/interval/test_interval.py create mode 100644 pandas/tests/arrays/interval/test_ops.py create mode 100644 pandas/tests/arrays/sparse/__init__.py create mode 100644 pandas/tests/arrays/sparse/test_accessor.py create mode 100644 pandas/tests/arrays/sparse/test_arithmetics.py create mode 100644 pandas/tests/arrays/sparse/test_array.py create mode 100644 pandas/tests/arrays/sparse/test_combine_concat.py create mode 100644 pandas/tests/arrays/sparse/test_dtype.py create mode 100644 pandas/tests/arrays/sparse/test_libsparse.py create mode 100644 pandas/tests/arrays/string_/__init__.py create mode 100644 pandas/tests/arrays/string_/test_string.py create mode 100644 pandas/tests/arrays/test_array.py create mode 100644 pandas/tests/arrays/test_boolean.py create mode 100644 pandas/tests/arrays/test_datetimelike.py create mode 100644 pandas/tests/arrays/test_datetimes.py create mode 100644 pandas/tests/arrays/test_integer.py create mode 100644 pandas/tests/arrays/test_numpy.py create mode 100644 pandas/tests/arrays/test_period.py create mode 100644 pandas/tests/arrays/test_timedeltas.py create mode 100644 pandas/tests/base/__init__.py create mode 100644 pandas/tests/base/test_constructors.py create mode 100644 pandas/tests/base/test_conversion.py create mode 100644 pandas/tests/base/test_ops.py create mode 100644 pandas/tests/computation/__init__.py create mode 100644 pandas/tests/computation/test_compat.py create mode 100644 pandas/tests/computation/test_eval.py create mode 100644 pandas/tests/config/__init__.py create mode 100644 pandas/tests/config/test_config.py create mode 100644 pandas/tests/config/test_localization.py create mode 100644 pandas/tests/dtypes/__init__.py create mode 100644 pandas/tests/dtypes/cast/__init__.py create mode 100644 pandas/tests/dtypes/cast/test_construct_from_scalar.py create mode 100644 pandas/tests/dtypes/cast/test_construct_ndarray.py create mode 100644 pandas/tests/dtypes/cast/test_construct_object_arr.py create mode 100644 pandas/tests/dtypes/cast/test_convert_objects.py create mode 100644 pandas/tests/dtypes/cast/test_downcast.py create mode 100644 pandas/tests/dtypes/cast/test_find_common_type.py create mode 100644 pandas/tests/dtypes/cast/test_infer_datetimelike.py create mode 100644 pandas/tests/dtypes/cast/test_infer_dtype.py create mode 100644 pandas/tests/dtypes/cast/test_promote.py create mode 100644 pandas/tests/dtypes/cast/test_upcast.py create mode 100644 pandas/tests/dtypes/test_common.py create mode 100644 pandas/tests/dtypes/test_concat.py create mode 100644 pandas/tests/dtypes/test_dtypes.py create mode 100644 pandas/tests/dtypes/test_generic.py create mode 100644 pandas/tests/dtypes/test_inference.py create mode 100644 pandas/tests/dtypes/test_missing.py create mode 100644 pandas/tests/extension/__init__.py create mode 100644 pandas/tests/extension/arrow/__init__.py create mode 100644 pandas/tests/extension/arrow/arrays.py create mode 100644 pandas/tests/extension/arrow/test_bool.py create mode 100644 pandas/tests/extension/arrow/test_string.py create mode 100644 pandas/tests/extension/base/__init__.py create mode 100644 pandas/tests/extension/base/base.py create mode 100644 pandas/tests/extension/base/casting.py create mode 100644 pandas/tests/extension/base/constructors.py create mode 100644 pandas/tests/extension/base/dtype.py create mode 100644 pandas/tests/extension/base/getitem.py create mode 100644 pandas/tests/extension/base/groupby.py create mode 100644 pandas/tests/extension/base/interface.py create mode 100644 pandas/tests/extension/base/io.py create mode 100644 pandas/tests/extension/base/methods.py create mode 100644 pandas/tests/extension/base/missing.py create mode 100644 pandas/tests/extension/base/ops.py create mode 100644 pandas/tests/extension/base/printing.py create mode 100644 pandas/tests/extension/base/reduce.py create mode 100644 pandas/tests/extension/base/reshaping.py create mode 100644 pandas/tests/extension/base/setitem.py create mode 100644 pandas/tests/extension/conftest.py create mode 100644 pandas/tests/extension/decimal/__init__.py create mode 100644 pandas/tests/extension/decimal/array.py create mode 100644 pandas/tests/extension/decimal/test_decimal.py create mode 100644 pandas/tests/extension/json/__init__.py create mode 100644 pandas/tests/extension/json/array.py create mode 100644 pandas/tests/extension/json/test_json.py create mode 100644 pandas/tests/extension/list/__init__.py create mode 100644 pandas/tests/extension/list/array.py create mode 100644 pandas/tests/extension/list/test_list.py create mode 100644 pandas/tests/extension/test_boolean.py create mode 100644 pandas/tests/extension/test_categorical.py create mode 100644 pandas/tests/extension/test_common.py create mode 100644 pandas/tests/extension/test_datetime.py create mode 100644 pandas/tests/extension/test_external_block.py create mode 100644 pandas/tests/extension/test_integer.py create mode 100644 pandas/tests/extension/test_interval.py create mode 100644 pandas/tests/extension/test_numpy.py create mode 100644 pandas/tests/extension/test_period.py create mode 100644 pandas/tests/extension/test_sparse.py create mode 100644 pandas/tests/extension/test_string.py create mode 100644 pandas/tests/frame/__init__.py create mode 100644 pandas/tests/frame/common.py create mode 100644 pandas/tests/frame/conftest.py create mode 100644 pandas/tests/frame/indexing/test_categorical.py create mode 100644 pandas/tests/frame/indexing/test_datetime.py create mode 100644 pandas/tests/frame/indexing/test_indexing.py create mode 100644 pandas/tests/frame/indexing/test_where.py create mode 100644 pandas/tests/frame/methods/__init__.py create mode 100644 pandas/tests/frame/methods/test_append.py create mode 100644 pandas/tests/frame/methods/test_asof.py create mode 100644 pandas/tests/frame/methods/test_clip.py create mode 100644 pandas/tests/frame/methods/test_count.py create mode 100644 pandas/tests/frame/methods/test_cov_corr.py create mode 100644 pandas/tests/frame/methods/test_describe.py create mode 100644 pandas/tests/frame/methods/test_diff.py create mode 100644 pandas/tests/frame/methods/test_drop_duplicates.py create mode 100644 pandas/tests/frame/methods/test_duplicated.py create mode 100644 pandas/tests/frame/methods/test_explode.py create mode 100644 pandas/tests/frame/methods/test_isin.py create mode 100644 pandas/tests/frame/methods/test_nlargest.py create mode 100644 pandas/tests/frame/methods/test_pct_change.py create mode 100644 pandas/tests/frame/methods/test_quantile.py create mode 100644 pandas/tests/frame/methods/test_rank.py create mode 100644 pandas/tests/frame/methods/test_replace.py create mode 100644 pandas/tests/frame/methods/test_round.py create mode 100644 pandas/tests/frame/methods/test_shift.py create mode 100644 pandas/tests/frame/methods/test_sort_index.py create mode 100644 pandas/tests/frame/methods/test_sort_values.py create mode 100644 pandas/tests/frame/methods/test_to_dict.py create mode 100644 pandas/tests/frame/methods/test_to_records.py create mode 100644 pandas/tests/frame/methods/test_transpose.py create mode 100644 pandas/tests/frame/methods/test_truncate.py create mode 100644 pandas/tests/frame/test_alter_axes.py create mode 100644 pandas/tests/frame/test_analytics.py create mode 100644 pandas/tests/frame/test_api.py create mode 100644 pandas/tests/frame/test_apply.py create mode 100644 pandas/tests/frame/test_arithmetic.py create mode 100644 pandas/tests/frame/test_axis_select_reindex.py create mode 100644 pandas/tests/frame/test_block_internals.py create mode 100644 pandas/tests/frame/test_combine_concat.py create mode 100644 pandas/tests/frame/test_constructors.py create mode 100644 pandas/tests/frame/test_cumulative.py create mode 100644 pandas/tests/frame/test_dtypes.py create mode 100644 pandas/tests/frame/test_join.py create mode 100644 pandas/tests/frame/test_missing.py create mode 100644 pandas/tests/frame/test_mutate_columns.py create mode 100644 pandas/tests/frame/test_nonunique_indexes.py create mode 100644 pandas/tests/frame/test_operators.py create mode 100644 pandas/tests/frame/test_period.py create mode 100644 pandas/tests/frame/test_query_eval.py create mode 100644 pandas/tests/frame/test_repr_info.py create mode 100644 pandas/tests/frame/test_reshape.py create mode 100644 pandas/tests/frame/test_sort_values_level_as_str.py create mode 100644 pandas/tests/frame/test_subclass.py create mode 100644 pandas/tests/frame/test_timeseries.py create mode 100644 pandas/tests/frame/test_timezones.py create mode 100644 pandas/tests/frame/test_to_csv.py create mode 100644 pandas/tests/frame/test_validate.py create mode 100644 pandas/tests/generic/__init__.py create mode 100644 pandas/tests/generic/test_frame.py create mode 100644 pandas/tests/generic/test_generic.py create mode 100644 pandas/tests/generic/test_label_or_level_utils.py create mode 100644 pandas/tests/generic/test_series.py create mode 100644 pandas/tests/groupby/__init__.py create mode 100644 pandas/tests/groupby/aggregate/__init__.py create mode 100644 pandas/tests/groupby/aggregate/test_aggregate.py create mode 100644 pandas/tests/groupby/aggregate/test_cython.py create mode 100644 pandas/tests/groupby/aggregate/test_other.py create mode 100644 pandas/tests/groupby/conftest.py create mode 100644 pandas/tests/groupby/test_apply.py create mode 100644 pandas/tests/groupby/test_bin_groupby.py create mode 100644 pandas/tests/groupby/test_categorical.py create mode 100644 pandas/tests/groupby/test_counting.py create mode 100644 pandas/tests/groupby/test_filters.py create mode 100644 pandas/tests/groupby/test_function.py create mode 100644 pandas/tests/groupby/test_groupby.py create mode 100644 pandas/tests/groupby/test_grouping.py create mode 100644 pandas/tests/groupby/test_index_as_string.py create mode 100644 pandas/tests/groupby/test_nth.py create mode 100644 pandas/tests/groupby/test_rank.py create mode 100644 pandas/tests/groupby/test_timegrouper.py create mode 100644 pandas/tests/groupby/test_transform.py create mode 100644 pandas/tests/groupby/test_value_counts.py create mode 100644 pandas/tests/groupby/test_whitelist.py create mode 100644 pandas/tests/indexes/__init__.py create mode 100644 pandas/tests/indexes/categorical/__init__.py create mode 100644 pandas/tests/indexes/categorical/test_category.py create mode 100644 pandas/tests/indexes/categorical/test_constructors.py create mode 100644 pandas/tests/indexes/common.py create mode 100644 pandas/tests/indexes/conftest.py create mode 100644 pandas/tests/indexes/datetimelike.py create mode 100644 pandas/tests/indexes/datetimes/__init__.py create mode 100644 pandas/tests/indexes/datetimes/test_astype.py create mode 100644 pandas/tests/indexes/datetimes/test_constructors.py create mode 100644 pandas/tests/indexes/datetimes/test_date_range.py create mode 100644 pandas/tests/indexes/datetimes/test_datetime.py create mode 100644 pandas/tests/indexes/datetimes/test_datetimelike.py create mode 100644 pandas/tests/indexes/datetimes/test_formats.py create mode 100644 pandas/tests/indexes/datetimes/test_indexing.py create mode 100644 pandas/tests/indexes/datetimes/test_join.py create mode 100644 pandas/tests/indexes/datetimes/test_misc.py create mode 100644 pandas/tests/indexes/datetimes/test_missing.py create mode 100644 pandas/tests/indexes/datetimes/test_ops.py create mode 100644 pandas/tests/indexes/datetimes/test_partial_slicing.py create mode 100644 pandas/tests/indexes/datetimes/test_scalar_compat.py create mode 100644 pandas/tests/indexes/datetimes/test_setops.py create mode 100644 pandas/tests/indexes/datetimes/test_shift.py create mode 100644 pandas/tests/indexes/datetimes/test_timezones.py create mode 100644 pandas/tests/indexes/datetimes/test_tools.py create mode 100644 pandas/tests/indexes/interval/__init__.py create mode 100644 pandas/tests/indexes/interval/test_astype.py create mode 100644 pandas/tests/indexes/interval/test_base.py create mode 100644 pandas/tests/indexes/interval/test_constructors.py create mode 100644 pandas/tests/indexes/interval/test_formats.py create mode 100644 pandas/tests/indexes/interval/test_indexing.py create mode 100644 pandas/tests/indexes/interval/test_interval.py create mode 100644 pandas/tests/indexes/interval/test_interval_range.py create mode 100644 pandas/tests/indexes/interval/test_interval_tree.py create mode 100644 pandas/tests/indexes/interval/test_setops.py create mode 100644 pandas/tests/indexes/multi/__init__.py create mode 100644 pandas/tests/indexes/multi/conftest.py create mode 100644 pandas/tests/indexes/multi/test_analytics.py create mode 100644 pandas/tests/indexes/multi/test_astype.py create mode 100644 pandas/tests/indexes/multi/test_compat.py create mode 100644 pandas/tests/indexes/multi/test_constructors.py create mode 100644 pandas/tests/indexes/multi/test_contains.py create mode 100644 pandas/tests/indexes/multi/test_conversion.py create mode 100644 pandas/tests/indexes/multi/test_copy.py create mode 100644 pandas/tests/indexes/multi/test_drop.py create mode 100644 pandas/tests/indexes/multi/test_duplicates.py create mode 100644 pandas/tests/indexes/multi/test_equivalence.py create mode 100644 pandas/tests/indexes/multi/test_format.py create mode 100644 pandas/tests/indexes/multi/test_get_set.py create mode 100644 pandas/tests/indexes/multi/test_indexing.py create mode 100644 pandas/tests/indexes/multi/test_integrity.py create mode 100644 pandas/tests/indexes/multi/test_join.py create mode 100644 pandas/tests/indexes/multi/test_missing.py create mode 100644 pandas/tests/indexes/multi/test_monotonic.py create mode 100644 pandas/tests/indexes/multi/test_names.py create mode 100644 pandas/tests/indexes/multi/test_partial_indexing.py create mode 100644 pandas/tests/indexes/multi/test_reindex.py create mode 100644 pandas/tests/indexes/multi/test_reshape.py create mode 100644 pandas/tests/indexes/multi/test_setops.py create mode 100644 pandas/tests/indexes/multi/test_sorting.py create mode 100644 pandas/tests/indexes/period/__init__.py create mode 100644 pandas/tests/indexes/period/test_asfreq.py create mode 100644 pandas/tests/indexes/period/test_astype.py create mode 100644 pandas/tests/indexes/period/test_constructors.py create mode 100644 pandas/tests/indexes/period/test_formats.py create mode 100644 pandas/tests/indexes/period/test_indexing.py create mode 100644 pandas/tests/indexes/period/test_ops.py create mode 100644 pandas/tests/indexes/period/test_partial_slicing.py create mode 100644 pandas/tests/indexes/period/test_period.py create mode 100644 pandas/tests/indexes/period/test_period_range.py create mode 100644 pandas/tests/indexes/period/test_scalar_compat.py create mode 100644 pandas/tests/indexes/period/test_setops.py create mode 100644 pandas/tests/indexes/period/test_shift.py create mode 100644 pandas/tests/indexes/period/test_tools.py create mode 100644 pandas/tests/indexes/ranges/__init__.py create mode 100644 pandas/tests/indexes/ranges/test_constructors.py create mode 100644 pandas/tests/indexes/ranges/test_range.py create mode 100644 pandas/tests/indexes/ranges/test_setops.py create mode 100644 pandas/tests/indexes/test_base.py create mode 100644 pandas/tests/indexes/test_common.py create mode 100644 pandas/tests/indexes/test_frozen.py create mode 100644 pandas/tests/indexes/test_numeric.py create mode 100644 pandas/tests/indexes/test_numpy_compat.py create mode 100644 pandas/tests/indexes/test_setops.py create mode 100644 pandas/tests/indexes/timedeltas/__init__.py create mode 100644 pandas/tests/indexes/timedeltas/test_astype.py create mode 100644 pandas/tests/indexes/timedeltas/test_constructors.py create mode 100644 pandas/tests/indexes/timedeltas/test_formats.py create mode 100644 pandas/tests/indexes/timedeltas/test_indexing.py create mode 100644 pandas/tests/indexes/timedeltas/test_join.py create mode 100644 pandas/tests/indexes/timedeltas/test_ops.py create mode 100644 pandas/tests/indexes/timedeltas/test_partial_slicing.py create mode 100644 pandas/tests/indexes/timedeltas/test_scalar_compat.py create mode 100644 pandas/tests/indexes/timedeltas/test_setops.py create mode 100644 pandas/tests/indexes/timedeltas/test_shift.py create mode 100644 pandas/tests/indexes/timedeltas/test_timedelta.py create mode 100644 pandas/tests/indexes/timedeltas/test_timedelta_range.py create mode 100644 pandas/tests/indexes/timedeltas/test_tools.py create mode 100644 pandas/tests/indexing/__init__.py create mode 100644 pandas/tests/indexing/common.py create mode 100644 pandas/tests/indexing/conftest.py create mode 100644 pandas/tests/indexing/interval/__init__.py create mode 100644 pandas/tests/indexing/interval/test_interval.py create mode 100644 pandas/tests/indexing/interval/test_interval_new.py create mode 100644 pandas/tests/indexing/multiindex/__init__.py create mode 100644 pandas/tests/indexing/multiindex/conftest.py create mode 100644 pandas/tests/indexing/multiindex/test_chaining_and_caching.py create mode 100644 pandas/tests/indexing/multiindex/test_datetime.py create mode 100644 pandas/tests/indexing/multiindex/test_getitem.py create mode 100644 pandas/tests/indexing/multiindex/test_iloc.py create mode 100644 pandas/tests/indexing/multiindex/test_indexing_slow.py create mode 100644 pandas/tests/indexing/multiindex/test_ix.py create mode 100644 pandas/tests/indexing/multiindex/test_loc.py create mode 100644 pandas/tests/indexing/multiindex/test_multiindex.py create mode 100644 pandas/tests/indexing/multiindex/test_partial.py create mode 100644 pandas/tests/indexing/multiindex/test_set_ops.py create mode 100644 pandas/tests/indexing/multiindex/test_setitem.py create mode 100644 pandas/tests/indexing/multiindex/test_slice.py create mode 100644 pandas/tests/indexing/multiindex/test_sorted.py create mode 100644 pandas/tests/indexing/multiindex/test_xs.py create mode 100644 pandas/tests/indexing/test_callable.py create mode 100644 pandas/tests/indexing/test_categorical.py create mode 100644 pandas/tests/indexing/test_chaining_and_caching.py create mode 100644 pandas/tests/indexing/test_check_indexer.py create mode 100644 pandas/tests/indexing/test_coercion.py create mode 100644 pandas/tests/indexing/test_datetime.py create mode 100644 pandas/tests/indexing/test_floats.py create mode 100644 pandas/tests/indexing/test_iloc.py create mode 100644 pandas/tests/indexing/test_indexing.py create mode 100644 pandas/tests/indexing/test_indexing_engines.py create mode 100644 pandas/tests/indexing/test_indexing_slow.py create mode 100644 pandas/tests/indexing/test_loc.py create mode 100644 pandas/tests/indexing/test_na_indexing.py create mode 100644 pandas/tests/indexing/test_partial.py create mode 100644 pandas/tests/indexing/test_scalar.py create mode 100644 pandas/tests/indexing/test_timedelta.py create mode 100644 pandas/tests/internals/__init__.py create mode 100644 pandas/tests/internals/test_internals.py create mode 100644 pandas/tests/io/__init__.py create mode 100644 pandas/tests/io/conftest.py create mode 100644 pandas/tests/io/data/csv/banklist.csv create mode 100644 pandas/tests/io/data/csv/iris.csv create mode 100644 pandas/tests/io/data/csv/test1.csv create mode 100644 pandas/tests/io/data/csv/test_mmap.csv create mode 100644 pandas/tests/io/data/csv/tips.csv create mode 100644 pandas/tests/io/data/excel/blank.ods create mode 100644 pandas/tests/io/data/excel/blank.xls create mode 100644 pandas/tests/io/data/excel/blank.xlsb create mode 100644 pandas/tests/io/data/excel/blank.xlsm create mode 100644 pandas/tests/io/data/excel/blank.xlsx create mode 100644 pandas/tests/io/data/excel/blank_with_header.ods create mode 100644 pandas/tests/io/data/excel/blank_with_header.xls create mode 100644 pandas/tests/io/data/excel/blank_with_header.xlsb create mode 100644 pandas/tests/io/data/excel/blank_with_header.xlsm create mode 100644 pandas/tests/io/data/excel/blank_with_header.xlsx create mode 100644 pandas/tests/io/data/excel/invalid_value_type.ods create mode 100644 pandas/tests/io/data/excel/test1.ods create mode 100644 pandas/tests/io/data/excel/test1.xls create mode 100644 pandas/tests/io/data/excel/test1.xlsb create mode 100644 pandas/tests/io/data/excel/test1.xlsm create mode 100644 pandas/tests/io/data/excel/test1.xlsx create mode 100644 pandas/tests/io/data/excel/test2.ods create mode 100644 pandas/tests/io/data/excel/test2.xls create mode 100644 pandas/tests/io/data/excel/test2.xlsb create mode 100644 pandas/tests/io/data/excel/test2.xlsm create mode 100644 pandas/tests/io/data/excel/test2.xlsx create mode 100644 pandas/tests/io/data/excel/test3.ods create mode 100644 pandas/tests/io/data/excel/test3.xls create mode 100644 pandas/tests/io/data/excel/test3.xlsb create mode 100644 pandas/tests/io/data/excel/test3.xlsm create mode 100644 pandas/tests/io/data/excel/test3.xlsx create mode 100644 pandas/tests/io/data/excel/test4.ods create mode 100644 pandas/tests/io/data/excel/test4.xls create mode 100644 pandas/tests/io/data/excel/test4.xlsb create mode 100644 pandas/tests/io/data/excel/test4.xlsm create mode 100644 pandas/tests/io/data/excel/test4.xlsx create mode 100644 pandas/tests/io/data/excel/test5.ods create mode 100644 pandas/tests/io/data/excel/test5.xls create mode 100644 pandas/tests/io/data/excel/test5.xlsb create mode 100644 pandas/tests/io/data/excel/test5.xlsm create mode 100644 pandas/tests/io/data/excel/test5.xlsx create mode 100644 pandas/tests/io/data/excel/test_converters.ods create mode 100644 pandas/tests/io/data/excel/test_converters.xls create mode 100644 pandas/tests/io/data/excel/test_converters.xlsb create mode 100644 pandas/tests/io/data/excel/test_converters.xlsm create mode 100644 pandas/tests/io/data/excel/test_converters.xlsx create mode 100644 pandas/tests/io/data/excel/test_index_name_pre17.ods create mode 100644 pandas/tests/io/data/excel/test_index_name_pre17.xls create mode 100644 pandas/tests/io/data/excel/test_index_name_pre17.xlsb create mode 100644 pandas/tests/io/data/excel/test_index_name_pre17.xlsm create mode 100644 pandas/tests/io/data/excel/test_index_name_pre17.xlsx create mode 100644 pandas/tests/io/data/excel/test_multisheet.ods create mode 100644 pandas/tests/io/data/excel/test_multisheet.xls create mode 100644 pandas/tests/io/data/excel/test_multisheet.xlsb create mode 100644 pandas/tests/io/data/excel/test_multisheet.xlsm create mode 100644 pandas/tests/io/data/excel/test_multisheet.xlsx create mode 100644 pandas/tests/io/data/excel/test_squeeze.ods create mode 100644 pandas/tests/io/data/excel/test_squeeze.xls create mode 100644 pandas/tests/io/data/excel/test_squeeze.xlsb create mode 100644 pandas/tests/io/data/excel/test_squeeze.xlsm create mode 100644 pandas/tests/io/data/excel/test_squeeze.xlsx create mode 100644 pandas/tests/io/data/excel/test_types.ods create mode 100644 pandas/tests/io/data/excel/test_types.xls create mode 100644 pandas/tests/io/data/excel/test_types.xlsb create mode 100644 pandas/tests/io/data/excel/test_types.xlsm create mode 100644 pandas/tests/io/data/excel/test_types.xlsx create mode 100644 pandas/tests/io/data/excel/testdateoverflow.ods create mode 100644 pandas/tests/io/data/excel/testdateoverflow.xls create mode 100644 pandas/tests/io/data/excel/testdateoverflow.xlsb create mode 100644 pandas/tests/io/data/excel/testdateoverflow.xlsm create mode 100644 pandas/tests/io/data/excel/testdateoverflow.xlsx create mode 100644 pandas/tests/io/data/excel/testdtype.ods create mode 100644 pandas/tests/io/data/excel/testdtype.xls create mode 100644 pandas/tests/io/data/excel/testdtype.xlsb create mode 100644 pandas/tests/io/data/excel/testdtype.xlsm create mode 100644 pandas/tests/io/data/excel/testdtype.xlsx create mode 100644 pandas/tests/io/data/excel/testmultiindex.ods create mode 100644 pandas/tests/io/data/excel/testmultiindex.xls create mode 100644 pandas/tests/io/data/excel/testmultiindex.xlsb create mode 100644 pandas/tests/io/data/excel/testmultiindex.xlsm create mode 100644 pandas/tests/io/data/excel/testmultiindex.xlsx create mode 100644 pandas/tests/io/data/excel/testskiprows.ods create mode 100644 pandas/tests/io/data/excel/testskiprows.xls create mode 100644 pandas/tests/io/data/excel/testskiprows.xlsb create mode 100644 pandas/tests/io/data/excel/testskiprows.xlsm create mode 100644 pandas/tests/io/data/excel/testskiprows.xlsx create mode 100644 pandas/tests/io/data/excel/times_1900.ods create mode 100644 pandas/tests/io/data/excel/times_1900.xls create mode 100644 pandas/tests/io/data/excel/times_1900.xlsb create mode 100644 pandas/tests/io/data/excel/times_1900.xlsm create mode 100644 pandas/tests/io/data/excel/times_1900.xlsx create mode 100644 pandas/tests/io/data/excel/times_1904.ods create mode 100644 pandas/tests/io/data/excel/times_1904.xls create mode 100644 pandas/tests/io/data/excel/times_1904.xlsb create mode 100644 pandas/tests/io/data/excel/times_1904.xlsm create mode 100644 pandas/tests/io/data/excel/times_1904.xlsx create mode 100644 pandas/tests/io/data/excel/writertable.odt create mode 100644 pandas/tests/io/data/feather/feather-0_3_1.feather create mode 100644 pandas/tests/io/data/fixed_width/fixed_width_format.txt create mode 100644 pandas/tests/io/data/gbq_fake_job.txt create mode 100644 pandas/tests/io/data/html/banklist.html create mode 100644 pandas/tests/io/data/html/spam.html create mode 100644 pandas/tests/io/data/html/valid_markup.html create mode 100644 pandas/tests/io/data/html/wikipedia_states.html create mode 100644 pandas/tests/io/data/html_encoding/chinese_utf-16.html create mode 100644 pandas/tests/io/data/html_encoding/chinese_utf-32.html create mode 100644 pandas/tests/io/data/html_encoding/chinese_utf-8.html create mode 100644 pandas/tests/io/data/html_encoding/letz_latin1.html create mode 100644 pandas/tests/io/data/legacy_hdf/datetimetz_object.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/gh26443.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/legacy_table_fixed_py2.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/legacy_table_py2.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/periodindex_0.20.1_x86_64_darwin_2.7.13.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/pytables_native.h5 create mode 100644 pandas/tests/io/data/legacy_hdf/pytables_native2.h5 create mode 100644 pandas/tests/io/data/legacy_msgpack/0.20.3/0.20.3_x86_64_darwin_3.5.2.msgpack create mode 100644 pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.2.pickle create mode 100644 pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.6.pickle create mode 100644 pandas/tests/io/data/orc/TestOrcFile.decimal.orc create mode 100644 pandas/tests/io/data/orc/TestOrcFile.emptyFile.orc create mode 100644 pandas/tests/io/data/orc/TestOrcFile.test1.orc create mode 100644 pandas/tests/io/data/orc/TestOrcFile.testDate1900.orc create mode 100644 pandas/tests/io/data/orc/TestOrcFile.testDate2038.orc create mode 100644 pandas/tests/io/data/orc/TestOrcFile.testSnappy.orc create mode 100644 pandas/tests/io/data/parquet/simple.parquet create mode 100644 pandas/tests/io/data/pickle/categorical.0.25.0.pickle create mode 100644 pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz create mode 100644 pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz create mode 100644 pandas/tests/io/data/pickle/test_mi_py27.pkl create mode 100644 pandas/tests/io/data/pickle/test_py27.pkl create mode 100755 pandas/tests/io/data/spss/labelled-num-na.sav create mode 100755 pandas/tests/io/data/spss/labelled-num.sav create mode 100755 pandas/tests/io/data/spss/labelled-str.sav create mode 100755 pandas/tests/io/data/spss/umlauts.sav create mode 100644 pandas/tests/io/data/stata/S4_EDUC1.dta create mode 100644 pandas/tests/io/data/stata/stata10_115.dta create mode 100644 pandas/tests/io/data/stata/stata10_117.dta create mode 100644 pandas/tests/io/data/stata/stata11_115.dta create mode 100644 pandas/tests/io/data/stata/stata11_117.dta create mode 100644 pandas/tests/io/data/stata/stata12_117.dta create mode 100644 pandas/tests/io/data/stata/stata13_dates.dta create mode 100644 pandas/tests/io/data/stata/stata14_118.dta create mode 100644 pandas/tests/io/data/stata/stata15.dta create mode 100644 pandas/tests/io/data/stata/stata16_118.dta create mode 100644 pandas/tests/io/data/stata/stata1_114.dta create mode 100644 pandas/tests/io/data/stata/stata1_117.dta create mode 100644 pandas/tests/io/data/stata/stata1_119.dta.gz create mode 100644 pandas/tests/io/data/stata/stata1_encoding.dta create mode 100644 pandas/tests/io/data/stata/stata1_encoding_118.dta create mode 100644 pandas/tests/io/data/stata/stata2_113.dta create mode 100644 pandas/tests/io/data/stata/stata2_114.dta create mode 100644 pandas/tests/io/data/stata/stata2_115.dta create mode 100644 pandas/tests/io/data/stata/stata2_117.dta create mode 100644 pandas/tests/io/data/stata/stata3.csv create mode 100644 pandas/tests/io/data/stata/stata3_113.dta create mode 100644 pandas/tests/io/data/stata/stata3_114.dta create mode 100644 pandas/tests/io/data/stata/stata3_115.dta create mode 100644 pandas/tests/io/data/stata/stata3_117.dta create mode 100644 pandas/tests/io/data/stata/stata4_113.dta create mode 100644 pandas/tests/io/data/stata/stata4_114.dta create mode 100644 pandas/tests/io/data/stata/stata4_115.dta create mode 100644 pandas/tests/io/data/stata/stata4_117.dta create mode 100644 pandas/tests/io/data/stata/stata5.csv create mode 100644 pandas/tests/io/data/stata/stata5_113.dta create mode 100644 pandas/tests/io/data/stata/stata5_114.dta create mode 100644 pandas/tests/io/data/stata/stata5_115.dta create mode 100644 pandas/tests/io/data/stata/stata5_117.dta create mode 100644 pandas/tests/io/data/stata/stata6.csv create mode 100644 pandas/tests/io/data/stata/stata6_113.dta create mode 100644 pandas/tests/io/data/stata/stata6_114.dta create mode 100644 pandas/tests/io/data/stata/stata6_115.dta create mode 100644 pandas/tests/io/data/stata/stata6_117.dta create mode 100644 pandas/tests/io/data/stata/stata7_111.dta create mode 100644 pandas/tests/io/data/stata/stata7_115.dta create mode 100644 pandas/tests/io/data/stata/stata7_117.dta create mode 100644 pandas/tests/io/data/stata/stata8_113.dta create mode 100644 pandas/tests/io/data/stata/stata8_115.dta create mode 100644 pandas/tests/io/data/stata/stata8_117.dta create mode 100644 pandas/tests/io/data/stata/stata9_115.dta create mode 100644 pandas/tests/io/data/stata/stata9_117.dta create mode 100644 pandas/tests/io/excel/__init__.py create mode 100644 pandas/tests/io/excel/conftest.py create mode 100644 pandas/tests/io/excel/test_odf.py create mode 100644 pandas/tests/io/excel/test_openpyxl.py create mode 100644 pandas/tests/io/excel/test_readers.py create mode 100644 pandas/tests/io/excel/test_style.py create mode 100644 pandas/tests/io/excel/test_writers.py create mode 100644 pandas/tests/io/excel/test_xlrd.py create mode 100644 pandas/tests/io/excel/test_xlsxwriter.py create mode 100644 pandas/tests/io/excel/test_xlwt.py create mode 100644 pandas/tests/io/formats/__init__.py create mode 100644 pandas/tests/io/formats/data/html/datetime64_hourformatter.html create mode 100644 pandas/tests/io/formats/data/html/datetime64_monthformatter.html create mode 100644 pandas/tests/io/formats/data/html/escape_disabled.html create mode 100644 pandas/tests/io/formats/data/html/escaped.html create mode 100644 pandas/tests/io/formats/data/html/gh12031_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh14882_expected_output_1.html create mode 100644 pandas/tests/io/formats/data/html/gh14882_expected_output_2.html create mode 100644 pandas/tests/io/formats/data/html/gh14998_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh15019_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh21625_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh22270_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh22579_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh22783_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh22783_named_columns_index.html create mode 100644 pandas/tests/io/formats/data/html/gh6131_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/gh8452_expected_output.html create mode 100644 pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_12.html create mode 100644 pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_4.html create mode 100644 pandas/tests/io/formats/data/html/html_repr_max_rows_12_min_rows_None.html create mode 100644 pandas/tests/io/formats/data/html/html_repr_max_rows_None_min_rows_12.html create mode 100644 pandas/tests/io/formats/data/html/html_repr_min_rows_default_no_truncation.html create mode 100644 pandas/tests/io/formats/data/html/html_repr_min_rows_default_truncated.html create mode 100644 pandas/tests/io/formats/data/html/index_1.html create mode 100644 pandas/tests/io/formats/data/html/index_2.html create mode 100644 pandas/tests/io/formats/data/html/index_3.html create mode 100644 pandas/tests/io/formats/data/html/index_4.html create mode 100644 pandas/tests/io/formats/data/html/index_5.html create mode 100644 pandas/tests/io/formats/data/html/index_formatter.html create mode 100644 pandas/tests/io/formats/data/html/index_named_multi_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_named_multi_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_named_multi_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_named_standard_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_named_standard_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_named_standard_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_none_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_none_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_none_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/index_none_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_none_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_multi_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_standard_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/justify.html create mode 100644 pandas/tests/io/formats/data/html/multiindex_1.html create mode 100644 pandas/tests/io/formats/data/html/multiindex_2.html create mode 100644 pandas/tests/io/formats/data/html/multiindex_sparsify_1.html create mode 100644 pandas/tests/io/formats/data/html/multiindex_sparsify_2.html create mode 100644 pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_1.html create mode 100644 pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_2.html create mode 100644 pandas/tests/io/formats/data/html/render_links_false.html create mode 100644 pandas/tests/io/formats/data/html/render_links_true.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_none_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_standard.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_none.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_multi.html create mode 100644 pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_standard.html create mode 100644 pandas/tests/io/formats/data/html/truncate.html create mode 100644 pandas/tests/io/formats/data/html/truncate_formatter.html create mode 100644 pandas/tests/io/formats/data/html/truncate_multi_index.html create mode 100644 pandas/tests/io/formats/data/html/truncate_multi_index_sparse_off.html create mode 100644 pandas/tests/io/formats/data/html/unicode_1.html create mode 100644 pandas/tests/io/formats/data/html/unicode_2.html create mode 100644 pandas/tests/io/formats/data/html/with_classes.html create mode 100644 pandas/tests/io/formats/test_console.py create mode 100644 pandas/tests/io/formats/test_css.py create mode 100644 pandas/tests/io/formats/test_eng_formatting.py create mode 100644 pandas/tests/io/formats/test_format.py create mode 100644 pandas/tests/io/formats/test_printing.py create mode 100644 pandas/tests/io/formats/test_style.py create mode 100644 pandas/tests/io/formats/test_to_csv.py create mode 100644 pandas/tests/io/formats/test_to_excel.py create mode 100644 pandas/tests/io/formats/test_to_html.py create mode 100644 pandas/tests/io/formats/test_to_latex.py create mode 100644 pandas/tests/io/formats/test_to_markdown.py create mode 100755 pandas/tests/io/generate_legacy_storage_files.py create mode 100644 pandas/tests/io/json/__init__.py create mode 100644 pandas/tests/io/json/conftest.py create mode 100644 pandas/tests/io/json/data/tsframe_iso_v012.json create mode 100644 pandas/tests/io/json/data/tsframe_v012.json create mode 100644 pandas/tests/io/json/data/tsframe_v012.json.zip create mode 100644 pandas/tests/io/json/test_compression.py create mode 100644 pandas/tests/io/json/test_json_table_schema.py create mode 100644 pandas/tests/io/json/test_normalize.py create mode 100644 pandas/tests/io/json/test_pandas.py create mode 100644 pandas/tests/io/json/test_readlines.py create mode 100644 pandas/tests/io/json/test_ujson.py create mode 100644 pandas/tests/io/parser/__init__.py create mode 100644 pandas/tests/io/parser/conftest.py create mode 100644 pandas/tests/io/parser/data/items.jsonl create mode 100644 pandas/tests/io/parser/data/salaries.csv create mode 100644 pandas/tests/io/parser/data/salaries.csv.bz2 create mode 100644 pandas/tests/io/parser/data/salaries.csv.gz create mode 100644 pandas/tests/io/parser/data/salaries.csv.xz create mode 100644 pandas/tests/io/parser/data/salaries.csv.zip create mode 100644 pandas/tests/io/parser/data/sauron.SHIFT_JIS.csv create mode 100644 pandas/tests/io/parser/data/sub_char.csv create mode 100644 pandas/tests/io/parser/data/tar_csv.tar create mode 100644 pandas/tests/io/parser/data/tar_csv.tar.gz create mode 100644 pandas/tests/io/parser/data/test1.csv create mode 100644 pandas/tests/io/parser/data/test1.csv.bz2 create mode 100644 pandas/tests/io/parser/data/test1.csv.gz create mode 100644 pandas/tests/io/parser/data/test2.csv create mode 100644 pandas/tests/io/parser/data/test_mmap.csv create mode 100644 pandas/tests/io/parser/data/tips.csv create mode 100644 pandas/tests/io/parser/data/tips.csv.bz2 create mode 100644 pandas/tests/io/parser/data/tips.csv.gz create mode 100644 pandas/tests/io/parser/data/unicode_series.csv create mode 100644 pandas/tests/io/parser/data/utf16_ex.txt create mode 100644 pandas/tests/io/parser/data/utf16_ex_small.zip create mode 100644 pandas/tests/io/parser/data/utf32_ex_small.zip create mode 100644 pandas/tests/io/parser/data/utf8_ex_small.zip create mode 100644 pandas/tests/io/parser/test_c_parser_only.py create mode 100644 pandas/tests/io/parser/test_comment.py create mode 100644 pandas/tests/io/parser/test_common.py create mode 100644 pandas/tests/io/parser/test_compression.py create mode 100644 pandas/tests/io/parser/test_converters.py create mode 100644 pandas/tests/io/parser/test_dialect.py create mode 100644 pandas/tests/io/parser/test_dtypes.py create mode 100644 pandas/tests/io/parser/test_encoding.py create mode 100644 pandas/tests/io/parser/test_header.py create mode 100644 pandas/tests/io/parser/test_index_col.py create mode 100644 pandas/tests/io/parser/test_mangle_dupes.py create mode 100644 pandas/tests/io/parser/test_multi_thread.py create mode 100644 pandas/tests/io/parser/test_na_values.py create mode 100644 pandas/tests/io/parser/test_network.py create mode 100644 pandas/tests/io/parser/test_parse_dates.py create mode 100644 pandas/tests/io/parser/test_python_parser_only.py create mode 100644 pandas/tests/io/parser/test_quoting.py create mode 100644 pandas/tests/io/parser/test_read_fwf.py create mode 100644 pandas/tests/io/parser/test_skiprows.py create mode 100644 pandas/tests/io/parser/test_textreader.py create mode 100644 pandas/tests/io/parser/test_unsupported.py create mode 100644 pandas/tests/io/parser/test_usecols.py create mode 100644 pandas/tests/io/pytables/__init__.py create mode 100644 pandas/tests/io/pytables/common.py create mode 100644 pandas/tests/io/pytables/conftest.py create mode 100644 pandas/tests/io/pytables/test_compat.py create mode 100644 pandas/tests/io/pytables/test_complex.py create mode 100644 pandas/tests/io/pytables/test_pytables_missing.py create mode 100644 pandas/tests/io/pytables/test_store.py create mode 100644 pandas/tests/io/pytables/test_timezones.py create mode 100644 pandas/tests/io/sas/__init__.py create mode 100644 pandas/tests/io/sas/data/DEMO_G.csv create mode 100644 pandas/tests/io/sas/data/DEMO_G.xpt create mode 100644 pandas/tests/io/sas/data/DRXFCD_G.csv create mode 100644 pandas/tests/io/sas/data/DRXFCD_G.xpt create mode 100644 pandas/tests/io/sas/data/SSHSV1_A.csv create mode 100644 pandas/tests/io/sas/data/SSHSV1_A.xpt create mode 100644 pandas/tests/io/sas/data/airline.csv create mode 100644 pandas/tests/io/sas/data/airline.sas7bdat create mode 100644 pandas/tests/io/sas/data/cars.sas7bdat create mode 100644 pandas/tests/io/sas/data/datetime.csv create mode 100644 pandas/tests/io/sas/data/datetime.sas7bdat create mode 100644 pandas/tests/io/sas/data/load_log.sas7bdat create mode 100644 pandas/tests/io/sas/data/many_columns.csv create mode 100644 pandas/tests/io/sas/data/many_columns.sas7bdat create mode 100644 pandas/tests/io/sas/data/paxraw_d_short.csv create mode 100644 pandas/tests/io/sas/data/paxraw_d_short.xpt create mode 100644 pandas/tests/io/sas/data/productsales.csv create mode 100644 pandas/tests/io/sas/data/productsales.sas7bdat create mode 100644 pandas/tests/io/sas/data/test1.sas7bdat create mode 100644 pandas/tests/io/sas/data/test10.sas7bdat create mode 100644 pandas/tests/io/sas/data/test11.sas7bdat create mode 100644 pandas/tests/io/sas/data/test12.sas7bdat create mode 100644 pandas/tests/io/sas/data/test13.sas7bdat create mode 100644 pandas/tests/io/sas/data/test14.sas7bdat create mode 100644 pandas/tests/io/sas/data/test15.sas7bdat create mode 100644 pandas/tests/io/sas/data/test16.sas7bdat create mode 100644 pandas/tests/io/sas/data/test2.sas7bdat create mode 100644 pandas/tests/io/sas/data/test3.sas7bdat create mode 100644 pandas/tests/io/sas/data/test4.sas7bdat create mode 100644 pandas/tests/io/sas/data/test5.sas7bdat create mode 100644 pandas/tests/io/sas/data/test6.sas7bdat create mode 100644 pandas/tests/io/sas/data/test7.sas7bdat create mode 100644 pandas/tests/io/sas/data/test8.sas7bdat create mode 100644 pandas/tests/io/sas/data/test9.sas7bdat create mode 100644 pandas/tests/io/sas/data/test_12659.csv create mode 100644 pandas/tests/io/sas/data/test_12659.sas7bdat create mode 100644 pandas/tests/io/sas/data/test_sas7bdat_1.csv create mode 100644 pandas/tests/io/sas/data/test_sas7bdat_2.csv create mode 100644 pandas/tests/io/sas/data/zero_variables.sas7bdat create mode 100644 pandas/tests/io/sas/test_sas.py create mode 100644 pandas/tests/io/sas/test_sas7bdat.py create mode 100644 pandas/tests/io/sas/test_xport.py create mode 100644 pandas/tests/io/test_clipboard.py create mode 100644 pandas/tests/io/test_common.py create mode 100644 pandas/tests/io/test_compression.py create mode 100644 pandas/tests/io/test_date_converters.py create mode 100644 pandas/tests/io/test_feather.py create mode 100644 pandas/tests/io/test_gbq.py create mode 100644 pandas/tests/io/test_gcs.py create mode 100644 pandas/tests/io/test_html.py create mode 100644 pandas/tests/io/test_orc.py create mode 100644 pandas/tests/io/test_parquet.py create mode 100644 pandas/tests/io/test_pickle.py create mode 100644 pandas/tests/io/test_s3.py create mode 100644 pandas/tests/io/test_spss.py create mode 100644 pandas/tests/io/test_sql.py create mode 100644 pandas/tests/io/test_stata.py create mode 100644 pandas/tests/plotting/__init__.py create mode 100644 pandas/tests/plotting/common.py create mode 100644 pandas/tests/plotting/test_backend.py create mode 100644 pandas/tests/plotting/test_boxplot_method.py create mode 100644 pandas/tests/plotting/test_converter.py create mode 100644 pandas/tests/plotting/test_datetimelike.py create mode 100644 pandas/tests/plotting/test_frame.py create mode 100644 pandas/tests/plotting/test_groupby.py create mode 100644 pandas/tests/plotting/test_hist_method.py create mode 100644 pandas/tests/plotting/test_misc.py create mode 100644 pandas/tests/plotting/test_series.py create mode 100644 pandas/tests/reductions/__init__.py create mode 100644 pandas/tests/reductions/test_reductions.py create mode 100644 pandas/tests/reductions/test_stat_reductions.py create mode 100644 pandas/tests/resample/__init__.py create mode 100644 pandas/tests/resample/conftest.py create mode 100644 pandas/tests/resample/test_base.py create mode 100644 pandas/tests/resample/test_datetime_index.py create mode 100644 pandas/tests/resample/test_period_index.py create mode 100644 pandas/tests/resample/test_resample_api.py create mode 100644 pandas/tests/resample/test_resampler_grouper.py create mode 100644 pandas/tests/resample/test_time_grouper.py create mode 100644 pandas/tests/resample/test_timedelta.py create mode 100644 pandas/tests/reshape/__init__.py create mode 100644 pandas/tests/reshape/data/cut_data.csv create mode 100644 pandas/tests/reshape/merge/__init__.py create mode 100644 pandas/tests/reshape/merge/data/allow_exact_matches.csv create mode 100644 pandas/tests/reshape/merge/data/allow_exact_matches_and_tolerance.csv create mode 100644 pandas/tests/reshape/merge/data/asof.csv create mode 100644 pandas/tests/reshape/merge/data/asof2.csv create mode 100644 pandas/tests/reshape/merge/data/quotes.csv create mode 100644 pandas/tests/reshape/merge/data/quotes2.csv create mode 100644 pandas/tests/reshape/merge/data/tolerance.csv create mode 100644 pandas/tests/reshape/merge/data/trades.csv create mode 100644 pandas/tests/reshape/merge/data/trades2.csv create mode 100644 pandas/tests/reshape/merge/test_join.py create mode 100644 pandas/tests/reshape/merge/test_merge.py create mode 100644 pandas/tests/reshape/merge/test_merge_asof.py create mode 100644 pandas/tests/reshape/merge/test_merge_index_as_string.py create mode 100644 pandas/tests/reshape/merge/test_merge_ordered.py create mode 100644 pandas/tests/reshape/merge/test_multi.py create mode 100644 pandas/tests/reshape/merge/test_pivot_old.py create mode 100644 pandas/tests/reshape/test_concat.py create mode 100644 pandas/tests/reshape/test_cut.py create mode 100644 pandas/tests/reshape/test_melt.py create mode 100644 pandas/tests/reshape/test_pivot.py create mode 100644 pandas/tests/reshape/test_qcut.py create mode 100644 pandas/tests/reshape/test_reshape.py create mode 100644 pandas/tests/reshape/test_union_categoricals.py create mode 100644 pandas/tests/reshape/test_util.py create mode 100644 pandas/tests/scalar/__init__.py create mode 100644 pandas/tests/scalar/interval/__init__.py create mode 100644 pandas/tests/scalar/interval/test_interval.py create mode 100644 pandas/tests/scalar/interval/test_ops.py create mode 100644 pandas/tests/scalar/period/__init__.py create mode 100644 pandas/tests/scalar/period/test_asfreq.py create mode 100644 pandas/tests/scalar/period/test_period.py create mode 100644 pandas/tests/scalar/test_na_scalar.py create mode 100644 pandas/tests/scalar/test_nat.py create mode 100644 pandas/tests/scalar/timedelta/__init__.py create mode 100644 pandas/tests/scalar/timedelta/test_arithmetic.py create mode 100644 pandas/tests/scalar/timedelta/test_constructors.py create mode 100644 pandas/tests/scalar/timedelta/test_formats.py create mode 100644 pandas/tests/scalar/timedelta/test_timedelta.py create mode 100644 pandas/tests/scalar/timestamp/__init__.py create mode 100644 pandas/tests/scalar/timestamp/test_arithmetic.py create mode 100644 pandas/tests/scalar/timestamp/test_comparisons.py create mode 100644 pandas/tests/scalar/timestamp/test_rendering.py create mode 100644 pandas/tests/scalar/timestamp/test_timestamp.py create mode 100644 pandas/tests/scalar/timestamp/test_timezones.py create mode 100644 pandas/tests/scalar/timestamp/test_unary_ops.py create mode 100644 pandas/tests/series/__init__.py create mode 100644 pandas/tests/series/conftest.py create mode 100644 pandas/tests/series/indexing/__init__.py create mode 100644 pandas/tests/series/indexing/test_alter_index.py create mode 100644 pandas/tests/series/indexing/test_boolean.py create mode 100644 pandas/tests/series/indexing/test_callable.py create mode 100644 pandas/tests/series/indexing/test_datetime.py create mode 100644 pandas/tests/series/indexing/test_iloc.py create mode 100644 pandas/tests/series/indexing/test_indexing.py create mode 100644 pandas/tests/series/indexing/test_loc.py create mode 100644 pandas/tests/series/indexing/test_numeric.py create mode 100644 pandas/tests/series/methods/__init__.py create mode 100644 pandas/tests/series/methods/test_append.py create mode 100644 pandas/tests/series/methods/test_argsort.py create mode 100644 pandas/tests/series/methods/test_asof.py create mode 100644 pandas/tests/series/methods/test_clip.py create mode 100644 pandas/tests/series/methods/test_count.py create mode 100644 pandas/tests/series/methods/test_cov_corr.py create mode 100644 pandas/tests/series/methods/test_describe.py create mode 100644 pandas/tests/series/methods/test_diff.py create mode 100644 pandas/tests/series/methods/test_drop_duplicates.py create mode 100644 pandas/tests/series/methods/test_duplicated.py create mode 100644 pandas/tests/series/methods/test_explode.py create mode 100644 pandas/tests/series/methods/test_isin.py create mode 100644 pandas/tests/series/methods/test_nlargest.py create mode 100644 pandas/tests/series/methods/test_pct_change.py create mode 100644 pandas/tests/series/methods/test_quantile.py create mode 100644 pandas/tests/series/methods/test_rank.py create mode 100644 pandas/tests/series/methods/test_replace.py create mode 100644 pandas/tests/series/methods/test_round.py create mode 100644 pandas/tests/series/methods/test_searchsorted.py create mode 100644 pandas/tests/series/methods/test_shift.py create mode 100644 pandas/tests/series/methods/test_sort_index.py create mode 100644 pandas/tests/series/methods/test_sort_values.py create mode 100644 pandas/tests/series/methods/test_to_dict.py create mode 100644 pandas/tests/series/methods/test_truncate.py create mode 100644 pandas/tests/series/methods/test_value_counts.py create mode 100644 pandas/tests/series/test_alter_axes.py create mode 100644 pandas/tests/series/test_analytics.py create mode 100644 pandas/tests/series/test_api.py create mode 100644 pandas/tests/series/test_apply.py create mode 100644 pandas/tests/series/test_arithmetic.py create mode 100644 pandas/tests/series/test_block_internals.py create mode 100644 pandas/tests/series/test_combine_concat.py create mode 100644 pandas/tests/series/test_constructors.py create mode 100644 pandas/tests/series/test_convert_dtypes.py create mode 100644 pandas/tests/series/test_cumulative.py create mode 100644 pandas/tests/series/test_datetime_values.py create mode 100644 pandas/tests/series/test_dtypes.py create mode 100644 pandas/tests/series/test_duplicates.py create mode 100644 pandas/tests/series/test_internals.py create mode 100644 pandas/tests/series/test_io.py create mode 100644 pandas/tests/series/test_missing.py create mode 100644 pandas/tests/series/test_operators.py create mode 100644 pandas/tests/series/test_period.py create mode 100644 pandas/tests/series/test_repr.py create mode 100644 pandas/tests/series/test_subclass.py create mode 100644 pandas/tests/series/test_timeseries.py create mode 100644 pandas/tests/series/test_timezones.py create mode 100644 pandas/tests/series/test_ufunc.py create mode 100644 pandas/tests/series/test_validate.py create mode 100644 pandas/tests/test_algos.py create mode 100644 pandas/tests/test_common.py create mode 100644 pandas/tests/test_compat.py create mode 100644 pandas/tests/test_downstream.py create mode 100644 pandas/tests/test_errors.py create mode 100644 pandas/tests/test_expressions.py create mode 100644 pandas/tests/test_join.py create mode 100644 pandas/tests/test_lib.py create mode 100644 pandas/tests/test_multilevel.py create mode 100644 pandas/tests/test_nanops.py create mode 100644 pandas/tests/test_optional_dependency.py create mode 100644 pandas/tests/test_register_accessor.py create mode 100644 pandas/tests/test_sorting.py create mode 100644 pandas/tests/test_strings.py create mode 100644 pandas/tests/test_take.py create mode 100644 pandas/tests/tools/__init__.py create mode 100644 pandas/tests/tools/test_numeric.py create mode 100644 pandas/tests/tseries/__init__.py create mode 100644 pandas/tests/tseries/frequencies/__init__.py create mode 100644 pandas/tests/tseries/frequencies/test_freq_code.py create mode 100644 pandas/tests/tseries/frequencies/test_inference.py create mode 100644 pandas/tests/tseries/frequencies/test_to_offset.py create mode 100644 pandas/tests/tseries/holiday/__init__.py create mode 100644 pandas/tests/tseries/holiday/test_calendar.py create mode 100644 pandas/tests/tseries/holiday/test_federal.py create mode 100644 pandas/tests/tseries/holiday/test_holiday.py create mode 100644 pandas/tests/tseries/holiday/test_observance.py create mode 100644 pandas/tests/tseries/offsets/__init__.py create mode 100644 pandas/tests/tseries/offsets/common.py create mode 100644 pandas/tests/tseries/offsets/conftest.py create mode 100644 pandas/tests/tseries/offsets/data/cday-0.14.1.pickle create mode 100644 pandas/tests/tseries/offsets/data/dateoffset_0_15_2.pickle create mode 100644 pandas/tests/tseries/offsets/test_fiscal.py create mode 100644 pandas/tests/tseries/offsets/test_offsets.py create mode 100644 pandas/tests/tseries/offsets/test_offsets_properties.py create mode 100644 pandas/tests/tseries/offsets/test_ticks.py create mode 100644 pandas/tests/tseries/offsets/test_yqm_offsets.py create mode 100644 pandas/tests/tslibs/__init__.py create mode 100644 pandas/tests/tslibs/test_api.py create mode 100644 pandas/tests/tslibs/test_array_to_datetime.py create mode 100644 pandas/tests/tslibs/test_ccalendar.py create mode 100644 pandas/tests/tslibs/test_conversion.py create mode 100644 pandas/tests/tslibs/test_fields.py create mode 100644 pandas/tests/tslibs/test_libfrequencies.py create mode 100644 pandas/tests/tslibs/test_liboffsets.py create mode 100644 pandas/tests/tslibs/test_normalize_date.py create mode 100644 pandas/tests/tslibs/test_parse_iso8601.py create mode 100644 pandas/tests/tslibs/test_parsing.py create mode 100644 pandas/tests/tslibs/test_period_asfreq.py create mode 100644 pandas/tests/tslibs/test_timedeltas.py create mode 100644 pandas/tests/tslibs/test_timezones.py create mode 100644 pandas/tests/util/__init__.py create mode 100644 pandas/tests/util/conftest.py create mode 100644 pandas/tests/util/test_assert_almost_equal.py create mode 100644 pandas/tests/util/test_assert_categorical_equal.py create mode 100644 pandas/tests/util/test_assert_extension_array_equal.py create mode 100644 pandas/tests/util/test_assert_frame_equal.py create mode 100644 pandas/tests/util/test_assert_index_equal.py create mode 100644 pandas/tests/util/test_assert_interval_array_equal.py create mode 100644 pandas/tests/util/test_assert_numpy_array_equal.py create mode 100644 pandas/tests/util/test_assert_produces_warning.py create mode 100644 pandas/tests/util/test_assert_series_equal.py create mode 100644 pandas/tests/util/test_deprecate.py create mode 100644 pandas/tests/util/test_deprecate_kwarg.py create mode 100644 pandas/tests/util/test_hashing.py create mode 100644 pandas/tests/util/test_safe_import.py create mode 100644 pandas/tests/util/test_util.py create mode 100644 pandas/tests/util/test_validate_args.py create mode 100644 pandas/tests/util/test_validate_args_and_kwargs.py create mode 100644 pandas/tests/util/test_validate_kwargs.py create mode 100644 pandas/tests/window/__init__.py create mode 100644 pandas/tests/window/common.py create mode 100644 pandas/tests/window/conftest.py create mode 100644 pandas/tests/window/moments/conftest.py create mode 100644 pandas/tests/window/moments/test_moments_ewm.py create mode 100644 pandas/tests/window/moments/test_moments_expanding.py create mode 100644 pandas/tests/window/moments/test_moments_rolling.py create mode 100644 pandas/tests/window/test_api.py create mode 100644 pandas/tests/window/test_apply.py create mode 100644 pandas/tests/window/test_base_indexer.py create mode 100644 pandas/tests/window/test_dtypes.py create mode 100644 pandas/tests/window/test_ewm.py create mode 100644 pandas/tests/window/test_expanding.py create mode 100644 pandas/tests/window/test_grouper.py create mode 100644 pandas/tests/window/test_numba.py create mode 100644 pandas/tests/window/test_pairwise.py create mode 100644 pandas/tests/window/test_rolling.py create mode 100644 pandas/tests/window/test_timeseries_window.py create mode 100644 pandas/tests/window/test_window.py create mode 100644 pandas/tseries/__init__.py create mode 100644 pandas/tseries/api.py create mode 100644 pandas/tseries/frequencies.py create mode 100644 pandas/tseries/holiday.py create mode 100644 pandas/tseries/offsets.py create mode 100644 pandas/util/__init__.py create mode 100644 pandas/util/_decorators.py create mode 100644 pandas/util/_depr_module.py create mode 100644 pandas/util/_doctools.py create mode 100644 pandas/util/_exceptions.py create mode 100644 pandas/util/_print_versions.py create mode 100644 pandas/util/_test_decorators.py create mode 100644 pandas/util/_tester.py create mode 100644 pandas/util/_validators.py create mode 100644 pandas/util/testing.py create mode 100644 pyproject.toml create mode 100755 release_stats.sh create mode 100644 requirements-dev.txt create mode 100755 scripts/build_dist.sh create mode 100755 scripts/build_dist_for_release.sh create mode 100644 scripts/download_wheels.py create mode 100755 scripts/find_commits_touching_func.py create mode 100755 scripts/generate_pip_deps_from_conda.py create mode 100755 scripts/list_future_warnings.sh create mode 100644 scripts/tests/__init__.py create mode 100644 scripts/tests/conftest.py create mode 100644 scripts/tests/test_validate_docstrings.py create mode 100755 scripts/validate_docstrings.py create mode 100755 scripts/validate_string_concatenation.py create mode 100644 setup.cfg create mode 100755 setup.py create mode 100644 test.bat create mode 100755 test.sh create mode 100644 test_fast.bat create mode 100755 test_fast.sh create mode 100755 test_rebuild.sh create mode 100644 versioneer.py create mode 100644 web/README.md create mode 100644 web/pandas/_templates/layout.html create mode 100644 web/pandas/about/citing.md create mode 100644 web/pandas/about/index.md create mode 100644 web/pandas/about/roadmap.md create mode 100644 web/pandas/about/sponsors.md create mode 100644 web/pandas/about/team.md create mode 100644 web/pandas/community/blog.html create mode 100644 web/pandas/community/coc.md create mode 100644 web/pandas/community/ecosystem.md create mode 100644 web/pandas/config.yml create mode 100644 web/pandas/contribute.md create mode 100644 web/pandas/donate.md create mode 100644 web/pandas/getting_started.md create mode 100644 web/pandas/index.html create mode 100644 web/pandas/static/css/pandas.css create mode 100644 web/pandas/static/img/favicon.ico create mode 100644 web/pandas/static/img/install/anaconda_prompt.png create mode 100644 web/pandas/static/img/install/jupyterlab_home.png create mode 100644 web/pandas/static/img/install/pandas_import_and_version.png create mode 100644 web/pandas/static/img/pandas.svg create mode 100644 web/pandas/static/img/pandas_mark.svg create mode 100644 web/pandas/static/img/pandas_mark_white.svg create mode 100644 web/pandas/static/img/pandas_secondary.svg create mode 100644 web/pandas/static/img/pandas_secondary_white.svg create mode 100644 web/pandas/static/img/pandas_white.svg create mode 100644 web/pandas/static/img/partners/anaconda.svg create mode 100644 web/pandas/static/img/partners/numfocus.svg create mode 100644 web/pandas/static/img/partners/r_studio.svg create mode 100644 web/pandas/static/img/partners/tidelift.svg create mode 100644 web/pandas/static/img/partners/two_sigma.svg create mode 100644 web/pandas/static/img/partners/ursa_labs.svg create mode 100644 web/pandas/static/img/pydata_book.gif create mode 100644 web/pandas/try.md create mode 100644 web/pandas_web.py diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..736fa09d --- /dev/null +++ b/.gitattributes @@ -0,0 +1,16 @@ +* text=auto +# enforce text on certain files +*.py text +*.pyx text +*.pyd text +*.c text +*.h text +*.html text +*.csv text +*.json text +*.pickle binary +*.h5 binary +*.dta binary +*.xls binary +*.xlsx binary +pandas/_version.py export-subst diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..a1fbece3 --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,63 @@ +# Contributor Code of Conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery +* Personal attacks +* Trolling or insulting/derogatory comments +* Public or private harassment +* Publishing other's private information, such as physical or electronic + addresses, without explicit permission +* Other unethical or unprofessional conduct + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +- Safia Abdalla +- Tom Augspurger +- Joris Van den Bossche +- Camille Scott +- Nathaniel Smith + +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[http://contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct + diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 00000000..2e6e9802 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,23 @@ +# Contributing to pandas + +Whether you are a novice or experienced software developer, all contributions and suggestions are welcome! + +Our main contributing guide can be found [in this repo](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst) or [on the website](https://pandas-docs.github.io/pandas-docs-travis/development/contributing.html). If you do not want to read it in its entirety, we will summarize the main ways in which you can contribute and point to relevant sections of that document for further information. + +## Getting Started + +If you are looking to contribute to the *pandas* codebase, the best place to start is the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues). This is also a great place for filing bug reports and making suggestions for ways in which we can improve the code and documentation. + +If you have additional questions, feel free to ask them on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). Further information can also be found in the "[Where to start?](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst#where-to-start)" section. + +## Filing Issues + +If you notice a bug in the code or documentation, or have suggestions for how we can improve either, feel free to create an issue on the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) using [GitHub's "issue" form](https://github.com/pandas-dev/pandas/issues/new). The form contains some questions that will help us best address your issue. For more information regarding how to file issues against *pandas*, please refer to the "[Bug reports and enhancement requests](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst#bug-reports-and-enhancement-requests)" section. + +## Contributing to the Codebase + +The code is hosted on [GitHub](https://www.github.com/pandas-dev/pandas), so you will need to use [Git](http://git-scm.com/) to clone the project and make changes to the codebase. Once you have obtained a copy of the code, you should create a development environment that is separate from your existing Python environment so that you can make and test changes without compromising your own work environment. For more information, please refer to the "[Working with the code](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst#working-with-the-code)" section. + +Before submitting your changes for review, make sure to check that your changes do not break any tests. You can find more information about our test suites in the "[Test-driven development/code writing](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst#test-driven-development-code-writing)" section. We also have guidelines regarding coding style that will be enforced during testing, which can be found in the "[Code standards](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst#code-standards)" section. + +Once your changes are ready to be submitted, make sure to push your changes to GitHub before creating a pull request. Details about how to do that can be found in the "[Contributing your changes to pandas](https://github.com/pandas-dev/pandas/blob/master/doc/source/development/contributing.rst#contributing-your-changes-to-pandas)" section. We will review your changes, and you will most likely be asked to make additional changes before it is finally ready to merge. However, once it's ready, we will merge it, and you will have successfully contributed to the codebase! diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..27dfded8 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,3 @@ +custom: https://pandas.pydata.org/donate.html +github: [numfocus] +tidelift: pypi/pandas diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md new file mode 100644 index 00000000..e33835c4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE.md @@ -0,0 +1,29 @@ +#### Code Sample, a copy-pastable example if possible + +```python +# Your code here + +``` +#### Problem description + +[this should explain **why** the current behaviour is a problem and why the expected output is a better solution.] + +**Note**: We receive a lot of issues on our GitHub tracker, so it is very possible that your issue has been posted before. Please check first before submitting so that we do not have to handle and close duplicates! + +**Note**: Many problems can be resolved by simply upgrading `pandas` to the latest version. Before submitting, please check if that solution works for you. If possible, you may want to check if `master` addresses this issue, but that is not necessary. + +For documentation-related issues, you can check the latest versions of the docs on `master` here: + +https://pandas-docs.github.io/pandas-docs-travis/ + +If the issue has not been resolved there, go ahead and file it in the issue tracker. + +#### Expected Output + +#### Output of ``pd.show_versions()`` + +
+ +[paste the output of ``pd.show_versions()`` here below this line] + +
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..7c387047 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,5 @@ +- [ ] closes #xxxx +- [ ] tests added / passed +- [ ] passes `black pandas` +- [ ] passes `git diff upstream/master -u -- "*.py" | flake8 --diff` +- [ ] whatsnew entry diff --git a/.github/SECURITY.md b/.github/SECURITY.md new file mode 100644 index 00000000..f3b059a5 --- /dev/null +++ b/.github/SECURITY.md @@ -0,0 +1 @@ +To report a security vulnerability to pandas, please go to https://tidelift.com/security and see the instructions there. diff --git a/.github/workflows/assign.yml b/.github/workflows/assign.yml new file mode 100644 index 00000000..019ecfc4 --- /dev/null +++ b/.github/workflows/assign.yml @@ -0,0 +1,15 @@ +name: Assign +on: + issue_comment: + types: created + +jobs: + one: + runs-on: ubuntu-latest + steps: + - name: + run: | + if [[ "${{ github.event.comment.body }}" == "take" ]]; then + echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}" + curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees + fi diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..d87fa520 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,159 @@ +name: CI + +on: + push: + branches: master + pull_request: + branches: master + +env: + ENV_FILE: environment.yml + +jobs: + checks: + name: Checks + runs-on: ubuntu-latest + steps: + + - name: Setting conda path + run: echo "::add-path::${HOME}/miniconda3/bin" + + - name: Checkout + uses: actions/checkout@v1 + + - name: Looking for unwanted patterns + run: ci/code_checks.sh patterns + if: always() + + - name: Setup environment and build pandas + run: ci/setup_env.sh + if: always() + + - name: Linting + run: | + source activate pandas-dev + ci/code_checks.sh lint + if: always() + + - name: Dependencies consistency + run: | + source activate pandas-dev + ci/code_checks.sh dependencies + if: always() + + - name: Checks on imported code + run: | + source activate pandas-dev + ci/code_checks.sh code + if: always() + + - name: Running doctests + run: | + source activate pandas-dev + ci/code_checks.sh doctests + if: always() + + - name: Docstring validation + run: | + source activate pandas-dev + ci/code_checks.sh docstrings + if: always() + + - name: Typing validation + run: | + source activate pandas-dev + ci/code_checks.sh typing + if: always() + + - name: Testing docstring validation script + run: | + source activate pandas-dev + pytest --capture=no --strict scripts + if: always() + + - name: Running benchmarks + run: | + source activate pandas-dev + cd asv_bench + asv check -E existing + git remote add upstream https://github.com/pandas-dev/pandas.git + git fetch upstream + if git diff upstream/master --name-only | grep -q "^asv_bench/"; then + asv machine --yes + asv dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log + if grep "failed" benchmarks.log > /dev/null ; then + exit 1 + fi + else + echo "Benchmarks did not run, no changes detected" + fi + if: always() + + - name: Publish benchmarks artifact + uses: actions/upload-artifact@master + with: + name: Benchmarks log + path: asv_bench/benchmarks.log + if: failure() + + web_and_docs: + name: Web and docs + runs-on: ubuntu-latest + steps: + + - name: Setting conda path + run: echo "::set-env name=PATH::${HOME}/miniconda3/bin:${PATH}" + + - name: Checkout + uses: actions/checkout@v1 + + - name: Setup environment and build pandas + run: ci/setup_env.sh + + - name: Build website + run: | + source activate pandas-dev + python web/pandas_web.py web/pandas --target-path=web/build + + - name: Build documentation + run: | + source activate pandas-dev + doc/make.py --warnings-are-errors | tee sphinx.log ; exit ${PIPESTATUS[0]} + + # This can be removed when the ipython directive fails when there are errors, + # including the `tee sphinx.log` in te previous step (https://github.com/ipython/ipython/issues/11547) + - name: Check ipython directive errors + run: "! grep -B1 \"^<<<-------------------------------------------------------------------------$\" sphinx.log" + + - name: Merge website and docs + run: | + mkdir -p pandas_web/docs + cp -r web/build/* pandas_web/ + cp -r doc/build/html/* pandas_web/docs/ + if: github.event_name == 'push' + + - name: Install Rclone + run: sudo apt install rclone -y + if: github.event_name == 'push' + + - name: Set up Rclone + run: | + RCLONE_CONFIG_PATH=$HOME/.config/rclone/rclone.conf + mkdir -p `dirname $RCLONE_CONFIG_PATH` + echo "[ovh_cloud_pandas_web]" > $RCLONE_CONFIG_PATH + echo "type = swift" >> $RCLONE_CONFIG_PATH + echo "env_auth = false" >> $RCLONE_CONFIG_PATH + echo "auth_version = 3" >> $RCLONE_CONFIG_PATH + echo "auth = https://auth.cloud.ovh.net/v3/" >> $RCLONE_CONFIG_PATH + echo "endpoint_type = public" >> $RCLONE_CONFIG_PATH + echo "tenant_domain = default" >> $RCLONE_CONFIG_PATH + echo "tenant = 2977553886518025" >> $RCLONE_CONFIG_PATH + echo "domain = default" >> $RCLONE_CONFIG_PATH + echo "user = w4KGs3pmDxpd" >> $RCLONE_CONFIG_PATH + echo "key = ${{ secrets.ovh_object_store_key }}" >> $RCLONE_CONFIG_PATH + echo "region = BHS" >> $RCLONE_CONFIG_PATH + if: github.event_name == 'push' + + - name: Sync web + run: rclone sync pandas_web ovh_cloud_pandas_web:dev + if: github.event_name == 'push' diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..6c3c275c --- /dev/null +++ b/.gitignore @@ -0,0 +1,120 @@ +######################################### +# Editor temporary/working/backup files # +.#* +*\#*\# +[#]*# +*~ +*$ +*.bak +*flymake* +*.iml +*.kdev4 +*.log +*.swp +*.pdb +.project +.pydevproject +.settings +.idea +.vagrant +.noseids +.ipynb_checkpoints +.tags +.cache/ +.vscode/ + +# Compiled source # +################### +*.a +*.com +*.class +*.dll +*.exe +*.pxi +*.o +*.py[ocd] +*.so +.build_cache_dir +MANIFEST + +# Python files # +################ +# setup.py working directory +build +# sphinx build directory +doc/_build +# setup.py dist directory +dist +# Egg metadata +*.egg-info +.eggs +.pypirc + +# tox testing tool +.tox +# rope +.ropeproject +# wheel files +*.whl +**/wheelhouse/* +pip-wheel-metadata +# coverage +.coverage +coverage.xml +coverage_html_report +.mypy_cache +*.pytest_cache +# hypothesis test database +.hypothesis/ +__pycache__ +# pytest-monkeytype +monkeytype.sqlite3 + + +# OS generated files # +###################### +.directory +.gdb_history +.DS_Store +ehthumbs.db +Icon? +Thumbs.db + +# Data files # +############## +*.dta +*.xpt +*.h5 +pandas/io/*.dat +pandas/io/*.json +scikits + +# Generated Sources # +##################### +!skts.c +!np_datetime.c +!np_datetime_strings.c +*.c +*.cpp + +# Unit / Performance Testing # +############################## +asv_bench/env/ +asv_bench/html/ +asv_bench/results/ +asv_bench/pandas/ + +# Documentation generated files # +################################# +doc/source/generated +doc/source/user_guide/styled.xlsx +doc/source/reference/api +doc/source/_static +doc/source/vbench +doc/source/vbench.rst +doc/source/index.rst +doc/build/html/index.html +# Windows specific leftover: +doc/tmp.sv +env/ +doc/source/savefig/ diff --git a/.pep8speaks.yml b/.pep8speaks.yml new file mode 100644 index 00000000..5a83727d --- /dev/null +++ b/.pep8speaks.yml @@ -0,0 +1,4 @@ +# File : .pep8speaks.yml + +scanner: + diff_only: True # If True, errors caused by only the patch are shown diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..809764a2 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,30 @@ +repos: +- repo: https://github.com/python/black + rev: 19.10b0 + hooks: + - id: black + language_version: python3.7 +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.7.7 + hooks: + - id: flake8 + language: python_venv + additional_dependencies: [flake8-comprehensions>=3.1.0] +- repo: https://github.com/pre-commit/mirrors-isort + rev: v4.3.21 + hooks: + - id: isort + language: python_venv + exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$ +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.730 + hooks: + - id: mypy + # We run mypy over all files because of: + # * changes in type definitions may affect non-touched files. + # * Running it with `mypy pandas` and the filenames will lead to + # spurious duplicate module errors, + # see also https://github.com/pre-commit/mirrors-mypy/issues/5 + pass_filenames: false + args: + - pandas diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..a23bc8a4 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,91 @@ +language: python +python: 3.7 + +# To turn off cached cython files and compiler cache +# set NOCACHE-true +# To delete caches go to https://travis-ci.org/OWNER/REPOSITORY/caches or run +# travis cache --delete inside the project directory from the travis command line client +# The cache directories will be deleted if anything in ci/ changes in a commit +cache: + ccache: true + directories: + - $HOME/.cache # cython cache + - $HOME/.ccache # compiler cache + +env: + global: + # create a github personal access token + # cd pandas-dev/pandas + # travis encrypt 'PANDAS_GH_TOKEN=personal_access_token' -r pandas-dev/pandas + - secure: "EkWLZhbrp/mXJOx38CHjs7BnjXafsqHtwxPQrqWy457VDFWhIY1DMnIR/lOWG+a20Qv52sCsFtiZEmMfUjf0pLGXOqurdxbYBGJ7/ikFLk9yV2rDwiArUlVM9bWFnFxHvdz9zewBH55WurrY4ShZWyV+x2dWjjceWG5VpWeI6sA=" + +git: + # for cloning + depth: false + +matrix: + fast_finish: true + + include: + - env: + - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)" + + - env: + - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network and not clipboard)" + + - env: + - JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1" + services: + - mysql + - postgresql + + - env: + - JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1" + services: + - mysql + - postgresql + + - env: + - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1" + services: + - mysql + - postgresql + +before_install: + - echo "before_install" + # set non-blocking IO on travis + # https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024 + - python -c 'import os,sys,fcntl; flags = fcntl.fcntl(sys.stdout, fcntl.F_GETFL); fcntl.fcntl(sys.stdout, fcntl.F_SETFL, flags&~os.O_NONBLOCK);' + - source ci/travis_process_gbq_encryption.sh + - export PATH="$HOME/miniconda3/bin:$PATH" + - df -h + - pwd + - uname -a + - git --version + - ./ci/check_git_tags.sh + # Because travis runs on Google Cloud and has a /etc/boto.cfg, + # it breaks moto import, see: + # https://github.com/spulec/moto/issues/1771 + # https://github.com/boto/boto/issues/3741 + # This overrides travis and tells it to look nowhere. + - export BOTO_CONFIG=/dev/null + + +install: + - echo "install start" + - ci/prep_cython_cache.sh + - ci/setup_env.sh + - ci/submit_cython_cache.sh + - echo "install done" + +script: + - echo "script start" + - echo "$JOB" + - source activate pandas-dev + - ci/run_tests.sh + +after_script: + - echo "after_script start" + - source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd + - ci/print_skipped.py + - echo "after_script done" diff --git a/AUTHORS.md b/AUTHORS.md new file mode 100644 index 00000000..dcaaea10 --- /dev/null +++ b/AUTHORS.md @@ -0,0 +1,57 @@ +About the Copyright Holders +=========================== + +* Copyright (c) 2008-2011 AQR Capital Management, LLC + + AQR Capital Management began pandas development in 2008. Development was + led by Wes McKinney. AQR released the source under this license in 2009. +* Copyright (c) 2011-2012, Lambda Foundry, Inc. + + Wes is now an employee of Lambda Foundry, and remains the pandas project + lead. +* Copyright (c) 2011-2012, PyData Development Team + + The PyData Development Team is the collection of developers of the PyData + project. This includes all of the PyData sub-projects, including pandas. The + core team that coordinates development on GitHub can be found here: + http://github.com/pydata. + +Full credits for pandas contributors can be found in the documentation. + +Our Copyright Policy +==================== + +PyData uses a shared copyright model. Each contributor maintains copyright +over their contributions to PyData. However, it is important to note that +these contributions are typically only changes to the repositories. Thus, +the PyData source code, in its entirety, is not the copyright of any single +person or institution. Instead, it is the collective copyright of the +entire PyData Development Team. If individual contributors want to maintain +a record of what changes/contributions they have specific copyright on, +they should indicate their copyright in the commit message of the change +when they commit the change to one of the PyData repositories. + +With this in mind, the following banner should be used in any source code +file to indicate the copyright and license terms: + +``` +#----------------------------------------------------------------------------- +# Copyright (c) 2012, PyData Development Team +# All rights reserved. +# +# Distributed under the terms of the BSD Simplified License. +# +# The full license is in the LICENSE file, distributed with this software. +#----------------------------------------------------------------------------- +``` + +Other licenses can be found in the LICENSES directory. + +License +======= + +pandas is distributed under a 3-clause ("Simplified" or "New") BSD +license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have +BSD-compatible licenses, are included. Their licenses follow the pandas +license. + diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..924de262 --- /dev/null +++ b/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/DATEUTIL_LICENSE b/LICENSES/DATEUTIL_LICENSE new file mode 100644 index 00000000..6053d35c --- /dev/null +++ b/LICENSES/DATEUTIL_LICENSE @@ -0,0 +1,54 @@ +Copyright 2017- Paul Ganssle +Copyright 2017- dateutil contributors (see AUTHORS file) + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +The above license applies to all contributions after 2017-12-01, as well as +all contributions that have been re-licensed (see AUTHORS file for the list of +contributors who have re-licensed their code). +-------------------------------------------------------------------------------- +dateutil - Extensions to the standard Python datetime module. + +Copyright (c) 2003-2011 - Gustavo Niemeyer +Copyright (c) 2012-2014 - Tomi Pieviläinen +Copyright (c) 2014-2016 - Yaron de Leeuw +Copyright (c) 2015- - Paul Ganssle +Copyright (c) 2015- - dateutil contributors (see AUTHORS file) + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The above BSD License Applies to all code, even that also covered by Apache 2.0. diff --git a/LICENSES/HAVEN_LICENSE b/LICENSES/HAVEN_LICENSE new file mode 100644 index 00000000..2f444cb4 --- /dev/null +++ b/LICENSES/HAVEN_LICENSE @@ -0,0 +1,2 @@ +YEAR: 2013-2016 +COPYRIGHT HOLDER: Hadley Wickham; RStudio; and Evan Miller diff --git a/LICENSES/HAVEN_MIT b/LICENSES/HAVEN_MIT new file mode 100644 index 00000000..b03d0e64 --- /dev/null +++ b/LICENSES/HAVEN_MIT @@ -0,0 +1,32 @@ +Based on http://opensource.org/licenses/MIT + +This is a template. Complete and ship as file LICENSE the following 2 +lines (only) + +YEAR: +COPYRIGHT HOLDER: + +and specify as + +License: MIT + file LICENSE + +Copyright (c) , + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/LICENSES/MUSL_LICENSE b/LICENSES/MUSL_LICENSE new file mode 100644 index 00000000..a8833d4b --- /dev/null +++ b/LICENSES/MUSL_LICENSE @@ -0,0 +1,132 @@ +musl as a whole is licensed under the following standard MIT license: + +---------------------------------------------------------------------- +Copyright © 2005-2014 Rich Felker, et al. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +---------------------------------------------------------------------- + +Authors/contributors include: + +Anthony G. Basile +Arvid Picciani +Bobby Bingham +Boris Brezillon +Brent Cook +Chris Spiegel +Clément Vasseur +Emil Renner Berthing +Hiltjo Posthuma +Isaac Dunham +Jens Gustedt +Jeremy Huntwork +John Spencer +Justin Cormack +Luca Barbato +Luka Perkov +M Farkas-Dyck (Strake) +Michael Forney +Nicholas J. Kain +orc +Pascal Cuoq +Pierre Carrier +Rich Felker +Richard Pennington +sin +Solar Designer +Stefan Kristiansson +Szabolcs Nagy +Timo Teräs +Valentin Ochs +William Haddon + +Portions of this software are derived from third-party works licensed +under terms compatible with the above MIT license: + +The TRE regular expression implementation (src/regex/reg* and +src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed +under a 2-clause BSD license (license text in the source files). The +included version has been heavily modified by Rich Felker in 2012, in +the interests of size, simplicity, and namespace cleanliness. + +Much of the math library code (src/math/* and src/complex/*) is +Copyright © 1993,2004 Sun Microsystems or +Copyright © 2003-2011 David Schultz or +Copyright © 2003-2009 Steven G. Kargl or +Copyright © 2003-2009 Bruce D. Evans or +Copyright © 2008 Stephen L. Moshier +and labelled as such in comments in the individual source files. All +have been licensed under extremely permissive terms. + +The ARM memcpy code (src/string/armel/memcpy.s) is Copyright © 2008 +The Android Open Source Project and is licensed under a two-clause BSD +license. It was taken from Bionic libc, used on Android. + +The implementation of DES for crypt (src/misc/crypt_des.c) is +Copyright © 1994 David Burren. It is licensed under a BSD license. + +The implementation of blowfish crypt (src/misc/crypt_blowfish.c) was +originally written by Solar Designer and placed into the public +domain. The code also comes with a fallback permissive license for use +in jurisdictions that may not recognize the public domain. + +The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011 +Valentin Ochs and is licensed under an MIT-style license. + +The BSD PRNG implementation (src/prng/random.c) and XSI search API +(src/search/*.c) functions are Copyright © 2011 Szabolcs Nagy and +licensed under following terms: "Permission to use, copy, modify, +and/or distribute this code for any purpose with or without fee is +hereby granted. There is no warranty." + +The x86_64 port was written by Nicholas J. Kain. Several files (crt) +were released into the public domain; others are licensed under the +standard MIT license terms at the top of this file. See individual +files for their copyright status. + +The mips and microblaze ports were originally written by Richard +Pennington for use in the ellcc project. The original code was adapted +by Rich Felker for build system and code conventions during upstream +integration. It is licensed under the standard MIT terms. + +The powerpc port was also originally written by Richard Pennington, +and later supplemented and integrated by John Spencer. It is licensed +under the standard MIT terms. + +All other files which have no copyright comments are original works +produced specifically for use as part of this library, written either +by Rich Felker, the main author of the library, or by one or more +contibutors listed above. Details on authorship of individual files +can be found in the git version control history of the project. The +omission of copyright and license comments in each file is in the +interest of source tree size. + +All public header files (include/* and arch/*/bits/*) should be +treated as Public Domain as they intentionally contain no content +which can be covered by copyright. Some source modules may fall in +this category as well. If you believe that a file is so trivial that +it should be in the Public Domain, please contact the authors and +request an explicit statement releasing it from copyright. + +The following files are trivial, believed not to be copyrightable in +the first place, and hereby explicitly released to the Public Domain: + +All public headers: include/*, arch/*/bits/* +Startup files: crt/* diff --git a/LICENSES/NUMPY_LICENSE b/LICENSES/NUMPY_LICENSE new file mode 100644 index 00000000..7e972cff --- /dev/null +++ b/LICENSES/NUMPY_LICENSE @@ -0,0 +1,30 @@ +Copyright (c) 2005-2011, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/OTHER b/LICENSES/OTHER new file mode 100644 index 00000000..f0550b4e --- /dev/null +++ b/LICENSES/OTHER @@ -0,0 +1,80 @@ +numpydoc license +---------------- + +The numpydoc license is in pandas/doc/sphinxext/LICENSE.txt + +Bottleneck license +------------------ + +Copyright (c) 2010-2012 Archipel Asset Management AB. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +google-api-python-client license +-------------------------------- + +Copyright (C) 2012 Google Inc. +All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Pyperclip v1.3 license +---------------------- + +Copyright (c) 2010, Albert Sweigart +All rights reserved. + +BSD-style license: + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the pyperclip nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY Albert Sweigart "AS IS" AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL Albert Sweigart BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSES/PSF_LICENSE b/LICENSES/PSF_LICENSE new file mode 100644 index 00000000..5cdb01e8 --- /dev/null +++ b/LICENSES/PSF_LICENSE @@ -0,0 +1,279 @@ +A. HISTORY OF THE SOFTWARE +========================== + +Python was created in the early 1990s by Guido van Rossum at Stichting +Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands +as a successor of a language called ABC. Guido remains Python's +principal author, although it includes many contributions from others. + +In 1995, Guido continued his work on Python at the Corporation for +National Research Initiatives (CNRI, see http://www.cnri.reston.va.us) +in Reston, Virginia where he released several versions of the +software. + +In May 2000, Guido and the Python core development team moved to +BeOpen.com to form the BeOpen PythonLabs team. In October of the same +year, the PythonLabs team moved to Digital Creations (now Zope +Corporation, see http://www.zope.com). In 2001, the Python Software +Foundation (PSF, see http://www.python.org/psf/) was formed, a +non-profit organization created specifically to own Python-related +Intellectual Property. Zope Corporation is a sponsoring member of +the PSF. + +All Python releases are Open Source (see http://www.opensource.org for +the Open Source Definition). Historically, most, but not all, Python +releases have also been GPL-compatible; the table below summarizes +the various releases. + + Release Derived Year Owner GPL- + from compatible? (1) + + 0.9.0 thru 1.2 1991-1995 CWI yes + 1.3 thru 1.5.2 1.2 1995-1999 CNRI yes + 1.6 1.5.2 2000 CNRI no + 2.0 1.6 2000 BeOpen.com no + 1.6.1 1.6 2001 CNRI yes (2) + 2.1 2.0+1.6.1 2001 PSF no + 2.0.1 2.0+1.6.1 2001 PSF yes + 2.1.1 2.1+2.0.1 2001 PSF yes + 2.2 2.1.1 2001 PSF yes + 2.1.2 2.1.1 2002 PSF yes + 2.1.3 2.1.2 2002 PSF yes + 2.2.1 2.2 2002 PSF yes + 2.2.2 2.2.1 2002 PSF yes + 2.2.3 2.2.2 2003 PSF yes + 2.3 2.2.2 2002-2003 PSF yes + 2.3.1 2.3 2002-2003 PSF yes + 2.3.2 2.3.1 2002-2003 PSF yes + 2.3.3 2.3.2 2002-2003 PSF yes + 2.3.4 2.3.3 2004 PSF yes + 2.3.5 2.3.4 2005 PSF yes + 2.4 2.3 2004 PSF yes + 2.4.1 2.4 2005 PSF yes + 2.4.2 2.4.1 2005 PSF yes + 2.4.3 2.4.2 2006 PSF yes + 2.4.4 2.4.3 2006 PSF yes + 2.5 2.4 2006 PSF yes + 2.5.1 2.5 2007 PSF yes + 2.5.2 2.5.1 2008 PSF yes + 2.5.3 2.5.2 2008 PSF yes + 2.6 2.5 2008 PSF yes + 2.6.1 2.6 2008 PSF yes + 2.6.2 2.6.1 2009 PSF yes + 2.6.3 2.6.2 2009 PSF yes + 2.6.4 2.6.3 2009 PSF yes + 2.6.5 2.6.4 2010 PSF yes + 2.7 2.6 2010 PSF yes + +Footnotes: + +(1) GPL-compatible doesn't mean that we're distributing Python under + the GPL. All Python licenses, unlike the GPL, let you distribute + a modified version without making your changes open source. The + GPL-compatible licenses make it possible to combine Python with + other software that is released under the GPL; the others don't. + +(2) According to Richard Stallman, 1.6.1 is not GPL-compatible, + because its license has a choice of law clause. According to + CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1 + is "not incompatible" with the GPL. + +Thanks to the many outside volunteers who have worked under Guido's +direction to make these releases possible. + + +B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON +=============================================================== + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 +Python Software Foundation; All Rights Reserved" are retained in Python alone or +in any derivative version prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0 +------------------------------------------- + +BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1 + +1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an +office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the +Individual or Organization ("Licensee") accessing and otherwise using +this software in source or binary form and its associated +documentation ("the Software"). + +2. Subject to the terms and conditions of this BeOpen Python License +Agreement, BeOpen hereby grants Licensee a non-exclusive, +royalty-free, world-wide license to reproduce, analyze, test, perform +and/or display publicly, prepare derivative works, distribute, and +otherwise use the Software alone or in any derivative version, +provided, however, that the BeOpen Python License is retained in the +Software, alone or in any derivative version prepared by Licensee. + +3. BeOpen is making the Software available to Licensee on an "AS IS" +basis. BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE +SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS +AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY +DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +5. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +6. This License Agreement shall be governed by and interpreted in all +respects by the law of the State of California, excluding conflict of +law provisions. Nothing in this License Agreement shall be deemed to +create any relationship of agency, partnership, or joint venture +between BeOpen and Licensee. This License Agreement does not grant +permission to use BeOpen trademarks or trade names in a trademark +sense to endorse or promote products or services of Licensee, or any +third party. As an exception, the "BeOpen Python" logos available at +http://www.pythonlabs.com/logos.html may be used according to the +permissions granted on that web page. + +7. By copying, installing or otherwise using the software, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + + +CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1 +--------------------------------------- + +1. This LICENSE AGREEMENT is between the Corporation for National +Research Initiatives, having an office at 1895 Preston White Drive, +Reston, VA 20191 ("CNRI"), and the Individual or Organization +("Licensee") accessing and otherwise using Python 1.6.1 software in +source or binary form and its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, CNRI +hereby grants Licensee a nonexclusive, royalty-free, world-wide +license to reproduce, analyze, test, perform and/or display publicly, +prepare derivative works, distribute, and otherwise use Python 1.6.1 +alone or in any derivative version, provided, however, that CNRI's +License Agreement and CNRI's notice of copyright, i.e., "Copyright (c) +1995-2001 Corporation for National Research Initiatives; All Rights +Reserved" are retained in Python 1.6.1 alone or in any derivative +version prepared by Licensee. Alternately, in lieu of CNRI's License +Agreement, Licensee may substitute the following text (omitting the +quotes): "Python 1.6.1 is made available subject to the terms and +conditions in CNRI's License Agreement. This Agreement together with +Python 1.6.1 may be located on the Internet using the following +unique, persistent identifier (known as a handle): 1895.22/1013. This +Agreement may also be obtained from a proxy server on the Internet +using the following URL: http://hdl.handle.net/1895.22/1013". + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python 1.6.1 or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python 1.6.1. + +4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS" +basis. CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. This License Agreement shall be governed by the federal +intellectual property law of the United States, including without +limitation the federal copyright law, and, to the extent such +U.S. federal law does not apply, by the law of the Commonwealth of +Virginia, excluding Virginia's conflict of law provisions. +Notwithstanding the foregoing, with regard to derivative works based +on Python 1.6.1 that incorporate non-separable material that was +previously distributed under the GNU General Public License (GPL), the +law of the Commonwealth of Virginia shall govern this License +Agreement only as to issues arising under or with respect to +Paragraphs 4, 5, and 7 of this License Agreement. Nothing in this +License Agreement shall be deemed to create any relationship of +agency, partnership, or joint venture between CNRI and Licensee. This +License Agreement does not grant permission to use CNRI trademarks or +trade name in a trademark sense to endorse or promote products or +services of Licensee, or any third party. + +8. By clicking on the "ACCEPT" button where indicated, or by copying, +installing or otherwise using Python 1.6.1, Licensee agrees to be +bound by the terms and conditions of this License Agreement. + + ACCEPT + + +CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2 +-------------------------------------------------- + +Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam, +The Netherlands. All rights reserved. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, +provided that the above copyright notice appear in all copies and that +both that copyright notice and this permission notice appear in +supporting documentation, and that the name of Stichting Mathematisch +Centrum or CWI not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior +permission. + +STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO +THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE +FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT +OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/LICENSES/SAS7BDAT_LICENSE b/LICENSES/SAS7BDAT_LICENSE new file mode 100644 index 00000000..8fbf1940 --- /dev/null +++ b/LICENSES/SAS7BDAT_LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2015 Jared Hobbs + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/LICENSES/SCIPY_LICENSE b/LICENSES/SCIPY_LICENSE new file mode 100644 index 00000000..d887ce5f --- /dev/null +++ b/LICENSES/SCIPY_LICENSE @@ -0,0 +1,31 @@ +Copyright (c) 2001, 2002 Enthought, Inc. +All rights reserved. + +Copyright (c) 2003-2012 SciPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + a. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + c. Neither the name of Enthought nor the names of the SciPy Developers + may be used to endorse or promote products derived from this software + without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. + diff --git a/LICENSES/ULTRAJSON_LICENSE b/LICENSES/ULTRAJSON_LICENSE new file mode 100644 index 00000000..3b2886eb --- /dev/null +++ b/LICENSES/ULTRAJSON_LICENSE @@ -0,0 +1,34 @@ +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. diff --git a/LICENSES/XARRAY_LICENSE b/LICENSES/XARRAY_LICENSE new file mode 100644 index 00000000..37ec93a1 --- /dev/null +++ b/LICENSES/XARRAY_LICENSE @@ -0,0 +1,191 @@ +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and +distribution as defined by Sections 1 through 9 of this document. + +"Licensor" shall mean the copyright owner or entity authorized by the copyright +owner that is granting the License. + +"Legal Entity" shall mean the union of the acting entity and all other entities +that control, are controlled by, or are under common control with that entity. +For the purposes of this definition, "control" means (i) the power, direct or +indirect, to cause the direction or management of such entity, whether by +contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the +outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising +permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including +but not limited to software source code, documentation source, and configuration +files. + +"Object" form shall mean any form resulting from mechanical transformation or +translation of a Source form, including but not limited to compiled object code, +generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made +available under the License, as indicated by a copyright notice that is included +in or attached to the work (an example is provided in the Appendix below). + +"Derivative Works" shall mean any work, whether in Source or Object form, that +is based on (or derived from) the Work and for which the editorial revisions, +annotations, elaborations, or other modifications represent, as a whole, an +original work of authorship. For the purposes of this License, Derivative Works +shall not include works that remain separable from, or merely link (or bind by +name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version +of the Work and any modifications or additions to that Work or Derivative Works +thereof, that is intentionally submitted to Licensor for inclusion in the Work +by the copyright owner or by an individual or Legal Entity authorized to submit +on behalf of the copyright owner. For the purposes of this definition, +"submitted" means any form of electronic, verbal, or written communication sent +to the Licensor or its representatives, including but not limited to +communication on electronic mailing lists, source code control systems, and +issue tracking systems that are managed by, or on behalf of, the Licensor for +the purpose of discussing and improving the Work, but excluding communication +that is conspicuously marked or otherwise designated in writing by the copyright +owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf +of whom a Contribution has been received by Licensor and subsequently +incorporated within the Work. + +2. Grant of Copyright License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable copyright license to reproduce, prepare Derivative Works of, +publicly display, publicly perform, sublicense, and distribute the Work and such +Derivative Works in Source or Object form. + +3. Grant of Patent License. + +Subject to the terms and conditions of this License, each Contributor hereby +grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, +irrevocable (except as stated in this section) patent license to make, have +made, use, offer to sell, sell, import, and otherwise transfer the Work, where +such license applies only to those patent claims licensable by such Contributor +that are necessarily infringed by their Contribution(s) alone or by combination +of their Contribution(s) with the Work to which such Contribution(s) was +submitted. If You institute patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Work or a +Contribution incorporated within the Work constitutes direct or contributory +patent infringement, then any patent licenses granted to You under this License +for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. + +You may reproduce and distribute copies of the Work or Derivative Works thereof +in any medium, with or without modifications, and in Source or Object form, +provided that You meet the following conditions: + +You must give any other recipients of the Work or Derivative Works a copy of +this License; and +You must cause any modified files to carry prominent notices stating that You +changed the files; and +You must retain, in the Source form of any Derivative Works that You distribute, +all copyright, patent, trademark, and attribution notices from the Source form +of the Work, excluding those notices that do not pertain to any part of the +Derivative Works; and +If the Work includes a "NOTICE" text file as part of its distribution, then any +Derivative Works that You distribute must include a readable copy of the +attribution notices contained within such NOTICE file, excluding those notices +that do not pertain to any part of the Derivative Works, in at least one of the +following places: within a NOTICE text file distributed as part of the +Derivative Works; within the Source form or documentation, if provided along +with the Derivative Works; or, within a display generated by the Derivative +Works, if and wherever such third-party notices normally appear. The contents of +the NOTICE file are for informational purposes only and do not modify the +License. You may add Your own attribution notices within Derivative Works that +You distribute, alongside or as an addendum to the NOTICE text from the Work, +provided that such additional attribution notices cannot be construed as +modifying the License. +You may add Your own copyright statement to Your modifications and may provide +additional or different license terms and conditions for use, reproduction, or +distribution of Your modifications, or for any such Derivative Works as a whole, +provided Your use, reproduction, and distribution of the Work otherwise complies +with the conditions stated in this License. + +5. Submission of Contributions. + +Unless You explicitly state otherwise, any Contribution intentionally submitted +for inclusion in the Work by You to the Licensor shall be under the terms and +conditions of this License, without any additional terms or conditions. +Notwithstanding the above, nothing herein shall supersede or modify the terms of +any separate license agreement you may have executed with Licensor regarding +such Contributions. + +6. Trademarks. + +This License does not grant permission to use the trade names, trademarks, +service marks, or product names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. + +Unless required by applicable law or agreed to in writing, Licensor provides the +Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, +including, without limitation, any warranties or conditions of TITLE, +NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are +solely responsible for determining the appropriateness of using or +redistributing the Work and assume any risks associated with Your exercise of +permissions under this License. + +8. Limitation of Liability. + +In no event and under no legal theory, whether in tort (including negligence), +contract, or otherwise, unless required by applicable law (such as deliberate +and grossly negligent acts) or agreed to in writing, shall any Contributor be +liable to You for damages, including any direct, indirect, special, incidental, +or consequential damages of any character arising as a result of this License or +out of the use or inability to use the Work (including but not limited to +damages for loss of goodwill, work stoppage, computer failure or malfunction, or +any and all other commercial damages or losses), even if such Contributor has +been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. + +While redistributing the Work or Derivative Works thereof, You may choose to +offer, and charge a fee for, acceptance of support, warranty, indemnity, or +other liability obligations and/or rights consistent with this License. However, +in accepting such obligations, You may act only on Your own behalf and on Your +sole responsibility, not on behalf of any other Contributor, and only if You +agree to indemnify, defend, and hold each Contributor harmless for any liability +incurred by, or claims asserted against, such Contributor by reason of your +accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work + +To apply the Apache License to your work, attach the following boilerplate +notice, with the fields enclosed by brackets "[]" replaced with your own +identifying information. (Don't include the brackets!) The text should be +enclosed in the appropriate comment syntax for the file format. We also +recommend that a file or class name and description of purpose be included on +the same "printed page" as the copyright notice for easier identification within +third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..cf6a1835 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,45 @@ +include MANIFEST.in +include LICENSE +include RELEASE.md +include README.md +include setup.py +include pyproject.toml + +graft doc +prune doc/build + +graft LICENSES + +graft pandas + +global-exclude *.bz2 +global-exclude *.csv +global-exclude *.dta +global-exclude *.feather +global-exclude *.gz +global-exclude *.h5 +global-exclude *.html +global-exclude *.json +global-exclude *.pickle +global-exclude *.png +global-exclude *.pyc +global-exclude *.pyd +global-exclude *.ods +global-exclude *.odt +global-exclude *.sas7bdat +global-exclude *.sav +global-exclude *.so +global-exclude *.xls +global-exclude *.xlsm +global-exclude *.xlsx +global-exclude *.xpt +global-exclude *.xz +global-exclude *.zip +global-exclude *~ +global-exclude .DS_Store +global-exclude .git* +global-exclude \#* + +include versioneer.py +include pandas/_version.py +include pandas/io/formats/templates/*.tpl diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..f26689ab --- /dev/null +++ b/Makefile @@ -0,0 +1,27 @@ +.PHONY : develop build clean clean_pyc doc lint-diff black + +all: develop + +clean: + -python setup.py clean + +clean_pyc: + -find . -name '*.py[co]' -exec rm {} \; + +build: clean_pyc + python setup.py build_ext --inplace + +lint-diff: + git diff upstream/master --name-only -- "*.py" | xargs flake8 + +black: + black . + +develop: build + python -m pip install --no-build-isolation -e . + +doc: + -rm -rf doc/build doc/source/generated + cd doc; \ + python make.py clean; \ + python make.py html diff --git a/README.md b/README.md new file mode 100644 index 00000000..1130eb30 --- /dev/null +++ b/README.md @@ -0,0 +1,237 @@ +
+
+
+ +----------------- + +# pandas: powerful Python data analysis toolkit + + + + + + + + + + + + + + + + + + + + + + + + + + + +   + + + + + + + + + +
Latest Release + + latest release + +
+ + latest release + +
Package Status + + status + +
License + + license + +
Build Status + + travis build status + +
+ + Azure Pipelines build status + +
Coverage + + coverage + +
Downloads + + conda-forge downloads + +
Gitter + + + +
+ + + +## What is it? + +**pandas** is a Python package providing fast, flexible, and expressive data +structures designed to make working with "relational" or "labeled" data both +easy and intuitive. It aims to be the fundamental high-level building block for +doing practical, **real world** data analysis in Python. Additionally, it has +the broader goal of becoming **the most powerful and flexible open source data +analysis / manipulation tool available in any language**. It is already well on +its way towards this goal. + +## Main Features +Here are just a few of the things that pandas does well: + + - Easy handling of [**missing data**][missing-data] (represented as + `NaN`) in floating point as well as non-floating point data + - Size mutability: columns can be [**inserted and + deleted**][insertion-deletion] from DataFrame and higher dimensional + objects + - Automatic and explicit [**data alignment**][alignment]: objects can + be explicitly aligned to a set of labels, or the user can simply + ignore the labels and let `Series`, `DataFrame`, etc. automatically + align the data for you in computations + - Powerful, flexible [**group by**][groupby] functionality to perform + split-apply-combine operations on data sets, for both aggregating + and transforming data + - Make it [**easy to convert**][conversion] ragged, + differently-indexed data in other Python and NumPy data structures + into DataFrame objects + - Intelligent label-based [**slicing**][slicing], [**fancy + indexing**][fancy-indexing], and [**subsetting**][subsetting] of + large data sets + - Intuitive [**merging**][merging] and [**joining**][joining] data + sets + - Flexible [**reshaping**][reshape] and [**pivoting**][pivot-table] of + data sets + - [**Hierarchical**][mi] labeling of axes (possible to have multiple + labels per tick) + - Robust IO tools for loading data from [**flat files**][flat-files] + (CSV and delimited), [**Excel files**][excel], [**databases**][db], + and saving/loading data from the ultrafast [**HDF5 format**][hdfstore] + - [**Time series**][timeseries]-specific functionality: date range + generation and frequency conversion, moving window statistics, + date shifting and lagging. + + + [missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data + [insertion-deletion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#column-selection-addition-deletion + [alignment]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html?highlight=alignment#intro-to-data-structures + [groupby]: https://pandas.pydata.org/pandas-docs/stable/groupby.html#group-by-split-apply-combine + [conversion]: https://pandas.pydata.org/pandas-docs/stable/dsintro.html#dataframe + [slicing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#slicing-ranges + [fancy-indexing]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#advanced-indexing-with-ix + [subsetting]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#boolean-indexing + [merging]: https://pandas.pydata.org/pandas-docs/stable/merging.html#database-style-dataframe-joining-merging + [joining]: https://pandas.pydata.org/pandas-docs/stable/merging.html#joining-on-index + [reshape]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#reshaping-and-pivot-tables + [pivot-table]: https://pandas.pydata.org/pandas-docs/stable/reshaping.html#pivot-tables-and-cross-tabulations + [mi]: https://pandas.pydata.org/pandas-docs/stable/indexing.html#hierarchical-indexing-multiindex + [flat-files]: https://pandas.pydata.org/pandas-docs/stable/io.html#csv-text-files + [excel]: https://pandas.pydata.org/pandas-docs/stable/io.html#excel-files + [db]: https://pandas.pydata.org/pandas-docs/stable/io.html#sql-queries + [hdfstore]: https://pandas.pydata.org/pandas-docs/stable/io.html#hdf5-pytables + [timeseries]: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#time-series-date-functionality + +## Where to get it +The source code is currently hosted on GitHub at: +https://github.com/pandas-dev/pandas + +Binary installers for the latest released version are available at the [Python +package index](https://pypi.org/project/pandas) and on conda. + +```sh +# conda +conda install pandas +``` + +```sh +# or PyPI +pip install pandas +``` + +## Dependencies +- [NumPy](https://www.numpy.org) +- [python-dateutil](https://labix.org/python-dateutil) +- [pytz](https://pythonhosted.org/pytz) + +See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for minimum supported versions of required, recommended and optional dependencies. + +## Installation from sources +To install pandas from source you need Cython in addition to the normal +dependencies above. Cython can be installed from pypi: + +```sh +pip install cython +``` + +In the `pandas` directory (same one where you found this file after +cloning the git repo), execute: + +```sh +python setup.py install +``` + +or for installing in [development mode](https://pip.pypa.io/en/latest/reference/pip_install.html#editable-installs): + + +```sh +python -m pip install -e . --no-build-isolation --no-use-pep517 +``` + +If you have `make`, you can also use `make develop` to run the same command. + +or alternatively + +```sh +python setup.py develop +``` + +See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source). + +## License +[BSD 3](LICENSE) + +## Documentation +The official documentation is hosted on PyData.org: https://pandas.pydata.org/pandas-docs/stable + +## Background +Work on ``pandas`` started at AQR (a quantitative hedge fund) in 2008 and +has been under active development since then. + +## Getting Help + +For usage questions, the best place to go to is [StackOverflow](https://stackoverflow.com/questions/tagged/pandas). +Further, general questions and discussions can also take place on the [pydata mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata). + +## Discussion and Development +Most development discussion is taking place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions. + +## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas) + +All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome. + +A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/docs/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub. + +If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out. + +You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas). + +Or maybe through using pandas you have an idea of your own or are looking for something in the documentation and thinking ‘this can be improved’...you can do something about it! + +Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas). + +As contributors and maintainers to this project, you are expected to abide by pandas' code of conduct. More information can be found at: [Contributor Code of Conduct](https://github.com/pandas-dev/pandas/blob/master/.github/CODE_OF_CONDUCT.md) diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 00000000..7924ffaf --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,6 @@ +Release Notes +============= + +The list of changes to Pandas between each release can be found +[here](https://pandas.pydata.org/pandas-docs/stable/whatsnew/index.html). For full +details, see the commit logs at http://github.com/pandas-dev/pandas. diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json new file mode 100644 index 00000000..cd1a31d4 --- /dev/null +++ b/asv_bench/asv.conf.json @@ -0,0 +1,129 @@ +{ + // The version of the config file format. Do not change, unless + // you know what you are doing. + "version": 1, + + // The name of the project being benchmarked + "project": "pandas", + + // The project's homepage + "project_url": "https://pandas.pydata.org/", + + // The URL of the source code repository for the project being + // benchmarked + "repo": "..", + + // The tool to use to create environments. May be "conda", + // "virtualenv" or other value depending on the plugins in use. + // If missing or the empty string, the tool will be automatically + // determined by looking for tools on the PATH environment + // variable. + "environment_type": "conda", + + // the base URL to show a commit for the project. + "show_commit_url": "https://github.com/pandas-dev/pandas/commit/", + + // The Pythons you'd like to test against. If not provided, defaults + // to the current version of Python used to run `asv`. + // "pythons": ["2.7", "3.4"], + "pythons": ["3.6"], + + // The matrix of dependencies to test. Each key is the name of a + // package (in PyPI) and the values are version numbers. An empty + // list or empty string indicates to just test against the default + // (latest) version. null indicates that the package is to not be + // installed. If the package to be tested is only available from + // PyPi, and the 'environment_type' is conda, then you can preface + // the package name by 'pip+', and the package will be installed via + // pip (with all the conda available packages installed first, + // followed by the pip installed packages). + "matrix": { + "numpy": [], + "Cython": [], + "matplotlib": [], + "sqlalchemy": [], + "scipy": [], + "numexpr": [], + "pytables": [null, ""], // platform dependent, see excludes below + "tables": [null, ""], + "openpyxl": [], + "xlsxwriter": [], + "xlrd": [], + "xlwt": [], + "odfpy": [], + "pytest": [], + // If using Windows with python 2.7 and want to build using the + // mingw toolchain (rather than MSVC), uncomment the following line. + // "libpython": [], + }, + "conda_channels": ["defaults", "conda-forge"], + // Combinations of libraries/python versions can be excluded/included + // from the set to test. Each entry is a dictionary containing additional + // key-value pairs to include/exclude. + // + // An exclude entry excludes entries where all values match. The + // values are regexps that should match the whole string. + // + // An include entry adds an environment. Only the packages listed + // are installed. The 'python' key is required. The exclude rules + // do not apply to includes. + // + // In addition to package names, the following keys are available: + // + // - python + // Python version, as in the *pythons* variable above. + // - environment_type + // Environment type, as above. + // - sys_platform + // Platform, as in sys.platform. Possible values for the common + // cases: 'linux2', 'win32', 'cygwin', 'darwin'. + "exclude": [ + // On conda install pytables, otherwise tables + {"environment_type": "conda", "tables": ""}, + {"environment_type": "conda", "pytables": null}, + {"environment_type": "(?!conda).*", "tables": null}, + {"environment_type": "(?!conda).*", "pytables": ""}, + ], + "include": [], + + // The directory (relative to the current directory) that benchmarks are + // stored in. If not provided, defaults to "benchmarks" + // "benchmark_dir": "benchmarks", + + // The directory (relative to the current directory) to cache the Python + // environments in. If not provided, defaults to "env" + // "env_dir": "env", + + // The directory (relative to the current directory) that raw benchmark + // results are stored in. If not provided, defaults to "results". + // "results_dir": "results", + + // The directory (relative to the current directory) that the html tree + // should be written to. If not provided, defaults to "html". + // "html_dir": "html", + + // The number of characters to retain in the commit hashes. + // "hash_length": 8, + + // `asv` will cache wheels of the recent builds in each + // environment, making them faster to install next time. This is + // number of builds to keep, per environment. + "build_cache_size": 8, + + // The commits after which the regression search in `asv publish` + // should start looking for regressions. Dictionary whose keys are + // regexps matching to benchmark names, and values corresponding to + // the commit (exclusive) after which to start looking for + // regressions. The default is to start from the first commit + // with results. If the commit is `null`, regression detection is + // skipped for the matching benchmark. + // + "regressions_first_commits": { + ".*": "0409521665" + }, + "regression_thresholds": { + }, + "build_command": + ["python setup.py build -j4", + "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"], +} diff --git a/asv_bench/benchmarks/__init__.py b/asv_bench/benchmarks/__init__.py new file mode 100644 index 00000000..eada1478 --- /dev/null +++ b/asv_bench/benchmarks/__init__.py @@ -0,0 +1 @@ +"""Pandas benchmarks.""" diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py new file mode 100644 index 00000000..0f3b3838 --- /dev/null +++ b/asv_bench/benchmarks/algorithms.py @@ -0,0 +1,187 @@ +from importlib import import_module + +import numpy as np + +from pandas._libs import lib + +import pandas as pd + +from .pandas_vb_common import tm + +for imp in ["pandas.util", "pandas.tools.hashing"]: + try: + hashing = import_module(imp) + break + except (ImportError, TypeError, ValueError): + pass + + +class MaybeConvertObjects: + def setup(self): + N = 10 ** 5 + + data = list(range(N)) + data[0] = pd.NaT + data = np.array(data) + self.data = data + + def time_maybe_convert_objects(self): + lib.maybe_convert_objects(self.data) + + +class Factorize: + + params = [[True, False], ["int", "uint", "float", "string"]] + param_names = ["sort", "dtype"] + + def setup(self, sort, dtype): + N = 10 ** 5 + data = { + "int": pd.Int64Index(np.arange(N).repeat(5)), + "uint": pd.UInt64Index(np.arange(N).repeat(5)), + "float": pd.Float64Index(np.random.randn(N).repeat(5)), + "string": tm.makeStringIndex(N).repeat(5), + } + self.idx = data[dtype] + + def time_factorize(self, sort, dtype): + self.idx.factorize(sort=sort) + + +class FactorizeUnique: + + params = [[True, False], ["int", "uint", "float", "string"]] + param_names = ["sort", "dtype"] + + def setup(self, sort, dtype): + N = 10 ** 5 + data = { + "int": pd.Int64Index(np.arange(N)), + "uint": pd.UInt64Index(np.arange(N)), + "float": pd.Float64Index(np.arange(N)), + "string": tm.makeStringIndex(N), + } + self.idx = data[dtype] + assert self.idx.is_unique + + def time_factorize(self, sort, dtype): + self.idx.factorize(sort=sort) + + +class Duplicated: + + params = [["first", "last", False], ["int", "uint", "float", "string"]] + param_names = ["keep", "dtype"] + + def setup(self, keep, dtype): + N = 10 ** 5 + data = { + "int": pd.Int64Index(np.arange(N).repeat(5)), + "uint": pd.UInt64Index(np.arange(N).repeat(5)), + "float": pd.Float64Index(np.random.randn(N).repeat(5)), + "string": tm.makeStringIndex(N).repeat(5), + } + self.idx = data[dtype] + # cache is_unique + self.idx.is_unique + + def time_duplicated(self, keep, dtype): + self.idx.duplicated(keep=keep) + + +class DuplicatedUniqueIndex: + + params = ["int", "uint", "float", "string"] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10 ** 5 + data = { + "int": pd.Int64Index(np.arange(N)), + "uint": pd.UInt64Index(np.arange(N)), + "float": pd.Float64Index(np.random.randn(N)), + "string": tm.makeStringIndex(N), + } + self.idx = data[dtype] + # cache is_unique + self.idx.is_unique + + def time_duplicated_unique(self, dtype): + self.idx.duplicated() + + +class Hashing: + def setup_cache(self): + N = 10 ** 5 + + df = pd.DataFrame( + { + "strings": pd.Series( + tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=N)) + ), + "floats": np.random.randn(N), + "ints": np.arange(N), + "dates": pd.date_range("20110101", freq="s", periods=N), + "timedeltas": pd.timedelta_range("1 day", freq="s", periods=N), + } + ) + df["categories"] = df["strings"].astype("category") + df.iloc[10:20] = np.nan + return df + + def time_frame(self, df): + hashing.hash_pandas_object(df) + + def time_series_int(self, df): + hashing.hash_pandas_object(df["ints"]) + + def time_series_string(self, df): + hashing.hash_pandas_object(df["strings"]) + + def time_series_float(self, df): + hashing.hash_pandas_object(df["floats"]) + + def time_series_categorical(self, df): + hashing.hash_pandas_object(df["categories"]) + + def time_series_timedeltas(self, df): + hashing.hash_pandas_object(df["timedeltas"]) + + def time_series_dates(self, df): + hashing.hash_pandas_object(df["dates"]) + + +class Quantile: + params = [ + [0, 0.5, 1], + ["linear", "nearest", "lower", "higher", "midpoint"], + ["float", "int", "uint"], + ] + param_names = ["quantile", "interpolation", "dtype"] + + def setup(self, quantile, interpolation, dtype): + N = 10 ** 5 + data = { + "int": np.arange(N), + "uint": np.arange(N).astype(np.uint64), + "float": np.random.randn(N), + } + self.idx = pd.Series(data[dtype].repeat(5)) + + def time_quantile(self, quantile, interpolation, dtype): + self.idx.quantile(quantile, interpolation=interpolation) + + +class SortIntegerArray: + params = [10 ** 3, 10 ** 5] + + def setup(self, N): + data = np.arange(N, dtype=float) + data[40] = np.nan + self.array = pd.array(data, dtype="Int64") + + def time_argsort(self, N): + self.array.argsort() + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/array.py b/asv_bench/benchmarks/array.py new file mode 100644 index 00000000..8cbf8c85 --- /dev/null +++ b/asv_bench/benchmarks/array.py @@ -0,0 +1,23 @@ +import numpy as np + +import pandas as pd + + +class BooleanArray: + def setup(self): + self.values_bool = np.array([True, False, True, False]) + self.values_float = np.array([1.0, 0.0, 1.0, 0.0]) + self.values_integer = np.array([1, 0, 1, 0]) + self.values_integer_like = [1, 0, 1, 0] + + def time_from_bool_array(self): + pd.array(self.values_bool, dtype="boolean") + + def time_from_integer_array(self): + pd.array(self.values_integer, dtype="boolean") + + def time_from_integer_like(self): + pd.array(self.values_integer_like, dtype="boolean") + + def time_from_float_array(self): + pd.array(self.values_float, dtype="boolean") diff --git a/asv_bench/benchmarks/attrs_caching.py b/asv_bench/benchmarks/attrs_caching.py new file mode 100644 index 00000000..501e27b9 --- /dev/null +++ b/asv_bench/benchmarks/attrs_caching.py @@ -0,0 +1,36 @@ +import numpy as np + +from pandas import DataFrame + +try: + from pandas.util import cache_readonly +except ImportError: + from pandas.util.decorators import cache_readonly + + +class DataFrameAttributes: + def setup(self): + self.df = DataFrame(np.random.randn(10, 6)) + self.cur_index = self.df.index + + def time_get_index(self): + self.foo = self.df.index + + def time_set_index(self): + self.df.index = self.cur_index + + +class CacheReadonly: + def setup(self): + class Foo: + @cache_readonly + def prop(self): + return 5 + + self.obj = Foo() + + def time_cache_readonly(self): + self.obj.prop + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py new file mode 100644 index 00000000..64e067d2 --- /dev/null +++ b/asv_bench/benchmarks/binary_ops.py @@ -0,0 +1,191 @@ +import operator + +import numpy as np + +from pandas import DataFrame, Series, date_range +from pandas.core.algorithms import checked_add_with_arr + +try: + import pandas.core.computation.expressions as expr +except ImportError: + import pandas.computation.expressions as expr + + +class IntFrameWithScalar: + params = [ + [np.float64, np.int64], + [2, 3.0, np.int32(4), np.float64(5)], + [ + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.floordiv, + operator.pow, + operator.mod, + operator.eq, + operator.ne, + operator.gt, + operator.ge, + operator.lt, + operator.le, + ], + ] + param_names = ["dtype", "scalar", "op"] + + def setup(self, dtype, scalar, op): + arr = np.random.randn(20000, 100) + self.df = DataFrame(arr.astype(dtype)) + + def time_frame_op_with_scalar(self, dtype, scalar, op): + op(self.df, scalar) + + +class Ops: + + params = [[True, False], ["default", 1]] + param_names = ["use_numexpr", "threads"] + + def setup(self, use_numexpr, threads): + self.df = DataFrame(np.random.randn(20000, 100)) + self.df2 = DataFrame(np.random.randn(20000, 100)) + + if threads != "default": + expr.set_numexpr_threads(threads) + if not use_numexpr: + expr.set_use_numexpr(False) + + def time_frame_add(self, use_numexpr, threads): + self.df + self.df2 + + def time_frame_mult(self, use_numexpr, threads): + self.df * self.df2 + + def time_frame_multi_and(self, use_numexpr, threads): + self.df[(self.df > 0) & (self.df2 > 0)] + + def time_frame_comparison(self, use_numexpr, threads): + self.df > self.df2 + + def teardown(self, use_numexpr, threads): + expr.set_use_numexpr(True) + expr.set_numexpr_threads() + + +class Ops2: + def setup(self): + N = 10 ** 3 + self.df = DataFrame(np.random.randn(N, N)) + self.df2 = DataFrame(np.random.randn(N, N)) + + self.df_int = DataFrame( + np.random.randint( + np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(N, N) + ) + ) + self.df2_int = DataFrame( + np.random.randint( + np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(N, N) + ) + ) + + self.s = Series(np.random.randn(N)) + + # Division + + def time_frame_float_div(self): + self.df // self.df2 + + def time_frame_float_div_by_zero(self): + self.df / 0 + + def time_frame_float_floor_by_zero(self): + self.df // 0 + + def time_frame_int_div_by_zero(self): + self.df_int / 0 + + # Modulo + + def time_frame_int_mod(self): + self.df_int % self.df2_int + + def time_frame_float_mod(self): + self.df % self.df2 + + # Dot product + + def time_frame_dot(self): + self.df.dot(self.df2) + + def time_series_dot(self): + self.s.dot(self.s) + + def time_frame_series_dot(self): + self.df.dot(self.s) + + +class Timeseries: + + params = [None, "US/Eastern"] + param_names = ["tz"] + + def setup(self, tz): + N = 10 ** 6 + halfway = (N // 2) - 1 + self.s = Series(date_range("20010101", periods=N, freq="T", tz=tz)) + self.ts = self.s[halfway] + + self.s2 = Series(date_range("20010101", periods=N, freq="s", tz=tz)) + + def time_series_timestamp_compare(self, tz): + self.s <= self.ts + + def time_timestamp_series_compare(self, tz): + self.ts >= self.s + + def time_timestamp_ops_diff(self, tz): + self.s2.diff() + + def time_timestamp_ops_diff_with_shift(self, tz): + self.s - self.s.shift() + + +class AddOverflowScalar: + + params = [1, -1, 0] + param_names = ["scalar"] + + def setup(self, scalar): + N = 10 ** 6 + self.arr = np.arange(N) + + def time_add_overflow_scalar(self, scalar): + checked_add_with_arr(self.arr, scalar) + + +class AddOverflowArray: + def setup(self): + N = 10 ** 6 + self.arr = np.arange(N) + self.arr_rev = np.arange(-N, 0) + self.arr_mixed = np.array([1, -1]).repeat(N / 2) + self.arr_nan_1 = np.random.choice([True, False], size=N) + self.arr_nan_2 = np.random.choice([True, False], size=N) + + def time_add_overflow_arr_rev(self): + checked_add_with_arr(self.arr, self.arr_rev) + + def time_add_overflow_arr_mask_nan(self): + checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1) + + def time_add_overflow_b_mask_nan(self): + checked_add_with_arr(self.arr, self.arr_mixed, b_mask=self.arr_nan_1) + + def time_add_overflow_both_arg_nan(self): + checked_add_with_arr( + self.arr, self.arr_mixed, arr_mask=self.arr_nan_1, b_mask=self.arr_nan_2 + ) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/boolean.py b/asv_bench/benchmarks/boolean.py new file mode 100644 index 00000000..71c422c6 --- /dev/null +++ b/asv_bench/benchmarks/boolean.py @@ -0,0 +1,32 @@ +import numpy as np + +import pandas as pd + + +class TimeLogicalOps: + def setup(self): + N = 10_000 + left, right, lmask, rmask = np.random.randint(0, 2, size=(4, N)).astype("bool") + self.left = pd.arrays.BooleanArray(left, lmask) + self.right = pd.arrays.BooleanArray(right, rmask) + + def time_or_scalar(self): + self.left | True + self.left | False + + def time_or_array(self): + self.left | self.right + + def time_and_scalar(self): + self.left & True + self.left & False + + def time_and_array(self): + self.left & self.right + + def time_xor_scalar(self): + self.left ^ True + self.left ^ False + + def time_xor_array(self): + self.left ^ self.right diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py new file mode 100644 index 00000000..1dcd52ac --- /dev/null +++ b/asv_bench/benchmarks/categoricals.py @@ -0,0 +1,312 @@ +import warnings + +import numpy as np + +import pandas as pd + +from .pandas_vb_common import tm + +try: + from pandas.api.types import union_categoricals +except ImportError: + try: + from pandas.types.concat import union_categoricals + except ImportError: + pass + + +class Constructor: + def setup(self): + N = 10 ** 5 + self.categories = list("abcde") + self.cat_idx = pd.Index(self.categories) + self.values = np.tile(self.categories, N) + self.codes = np.tile(range(len(self.categories)), N) + + self.datetimes = pd.Series( + pd.date_range("1995-01-01 00:00:00", periods=N / 10, freq="s") + ) + self.datetimes_with_nat = self.datetimes.copy() + self.datetimes_with_nat.iloc[-1] = pd.NaT + + self.values_some_nan = list(np.tile(self.categories + [np.nan], N)) + self.values_all_nan = [np.nan] * len(self.values) + self.values_all_int8 = np.ones(N, "int8") + self.categorical = pd.Categorical(self.values, self.categories) + self.series = pd.Series(self.categorical) + + def time_regular(self): + pd.Categorical(self.values, self.categories) + + def time_fastpath(self): + pd.Categorical(self.codes, self.cat_idx, fastpath=True) + + def time_datetimes(self): + pd.Categorical(self.datetimes) + + def time_datetimes_with_nat(self): + pd.Categorical(self.datetimes_with_nat) + + def time_with_nan(self): + pd.Categorical(self.values_some_nan) + + def time_all_nan(self): + pd.Categorical(self.values_all_nan) + + def time_from_codes_all_int8(self): + pd.Categorical.from_codes(self.values_all_int8, self.categories) + + def time_existing_categorical(self): + pd.Categorical(self.categorical) + + def time_existing_series(self): + pd.Categorical(self.series) + + +class CategoricalOps: + params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"] + param_names = ["op"] + + def setup(self, op): + N = 10 ** 5 + self.cat = pd.Categorical(list("aabbcd") * N, ordered=True) + + def time_categorical_op(self, op): + getattr(self.cat, op)("b") + + +class Concat: + def setup(self): + N = 10 ** 5 + self.s = pd.Series(list("aabbcd") * N).astype("category") + + self.a = pd.Categorical(list("aabbcd") * N) + self.b = pd.Categorical(list("bbcdjk") * N) + + def time_concat(self): + pd.concat([self.s, self.s]) + + def time_union(self): + union_categoricals([self.a, self.b]) + + +class ValueCounts: + + params = [True, False] + param_names = ["dropna"] + + def setup(self, dropna): + n = 5 * 10 ** 5 + arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)] + self.ts = pd.Series(arr).astype("category") + + def time_value_counts(self, dropna): + self.ts.value_counts(dropna=dropna) + + +class Repr: + def setup(self): + self.sel = pd.Series(["s1234"]).astype("category") + + def time_rendering(self): + str(self.sel) + + +class SetCategories: + def setup(self): + n = 5 * 10 ** 5 + arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)] + self.ts = pd.Series(arr).astype("category") + + def time_set_categories(self): + self.ts.cat.set_categories(self.ts.cat.categories[::2]) + + +class RemoveCategories: + def setup(self): + n = 5 * 10 ** 5 + arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)] + self.ts = pd.Series(arr).astype("category") + + def time_remove_categories(self): + self.ts.cat.remove_categories(self.ts.cat.categories[::2]) + + +class Rank: + def setup(self): + N = 10 ** 5 + ncats = 100 + + self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str) + self.s_str_cat = pd.Series(self.s_str, dtype="category") + with warnings.catch_warnings(record=True): + str_cat_type = pd.CategoricalDtype(set(self.s_str), ordered=True) + self.s_str_cat_ordered = self.s_str.astype(str_cat_type) + + self.s_int = pd.Series(np.random.randint(0, ncats, size=N)) + self.s_int_cat = pd.Series(self.s_int, dtype="category") + with warnings.catch_warnings(record=True): + int_cat_type = pd.CategoricalDtype(set(self.s_int), ordered=True) + self.s_int_cat_ordered = self.s_int.astype(int_cat_type) + + def time_rank_string(self): + self.s_str.rank() + + def time_rank_string_cat(self): + self.s_str_cat.rank() + + def time_rank_string_cat_ordered(self): + self.s_str_cat_ordered.rank() + + def time_rank_int(self): + self.s_int.rank() + + def time_rank_int_cat(self): + self.s_int_cat.rank() + + def time_rank_int_cat_ordered(self): + self.s_int_cat_ordered.rank() + + +class Isin: + + params = ["object", "int64"] + param_names = ["dtype"] + + def setup(self, dtype): + np.random.seed(1234) + n = 5 * 10 ** 5 + sample_size = 100 + arr = list(np.random.randint(0, n // 10, size=n)) + if dtype == "object": + arr = [f"s{i:04d}" for i in arr] + self.sample = np.random.choice(arr, sample_size) + self.series = pd.Series(arr).astype("category") + + def time_isin_categorical(self, dtype): + self.series.isin(self.sample) + + +class IsMonotonic: + def setup(self): + N = 1000 + self.c = pd.CategoricalIndex(list("a" * N + "b" * N + "c" * N)) + self.s = pd.Series(self.c) + + def time_categorical_index_is_monotonic_increasing(self): + self.c.is_monotonic_increasing + + def time_categorical_index_is_monotonic_decreasing(self): + self.c.is_monotonic_decreasing + + def time_categorical_series_is_monotonic_increasing(self): + self.s.is_monotonic_increasing + + def time_categorical_series_is_monotonic_decreasing(self): + self.s.is_monotonic_decreasing + + +class Contains: + def setup(self): + N = 10 ** 5 + self.ci = tm.makeCategoricalIndex(N) + self.c = self.ci.values + self.key = self.ci.categories[0] + + def time_categorical_index_contains(self): + self.key in self.ci + + def time_categorical_contains(self): + self.key in self.c + + +class CategoricalSlicing: + + params = ["monotonic_incr", "monotonic_decr", "non_monotonic"] + param_names = ["index"] + + def setup(self, index): + N = 10 ** 6 + categories = ["a", "b", "c"] + values = [0] * N + [1] * N + [2] * N + if index == "monotonic_incr": + self.data = pd.Categorical.from_codes(values, categories=categories) + elif index == "monotonic_decr": + self.data = pd.Categorical.from_codes( + list(reversed(values)), categories=categories + ) + elif index == "non_monotonic": + self.data = pd.Categorical.from_codes([0, 1, 2] * N, categories=categories) + else: + raise ValueError(f"Invalid index param: {index}") + + self.scalar = 10000 + self.list = list(range(10000)) + self.cat_scalar = "b" + + def time_getitem_scalar(self, index): + self.data[self.scalar] + + def time_getitem_slice(self, index): + self.data[: self.scalar] + + def time_getitem_list_like(self, index): + self.data[[self.scalar]] + + def time_getitem_list(self, index): + self.data[self.list] + + def time_getitem_bool_array(self, index): + self.data[self.data == self.cat_scalar] + + +class Indexing: + def setup(self): + N = 10 ** 5 + self.index = pd.CategoricalIndex(range(N), range(N)) + self.series = pd.Series(range(N), index=self.index).sort_index() + self.category = self.index[500] + + def time_get_loc(self): + self.index.get_loc(self.category) + + def time_shape(self): + self.index.shape + + def time_shallow_copy(self): + self.index._shallow_copy() + + def time_align(self): + pd.DataFrame({"a": self.series, "b": self.series[:500]}) + + def time_intersection(self): + self.index[:750].intersection(self.index[250:]) + + def time_unique(self): + self.index.unique() + + def time_reindex(self): + self.index.reindex(self.index[:500]) + + def time_reindex_missing(self): + self.index.reindex(["a", "b", "c", "d"]) + + def time_sort_values(self): + self.index.sort_values(ascending=False) + + +class SearchSorted: + def setup(self): + N = 10 ** 5 + self.ci = tm.makeCategoricalIndex(N).sort_values() + self.c = self.ci.values + self.key = self.ci.categories[1] + + def time_categorical_index_contains(self): + self.ci.searchsorted(self.key) + + def time_categorical_contains(self): + self.c.searchsorted(self.key) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py new file mode 100644 index 00000000..7c43485f --- /dev/null +++ b/asv_bench/benchmarks/ctors.py @@ -0,0 +1,118 @@ +import numpy as np + +from pandas import DatetimeIndex, Index, MultiIndex, Series, Timestamp + +from .pandas_vb_common import tm + + +def no_change(arr): + return arr + + +def list_of_str(arr): + return list(arr.astype(str)) + + +def gen_of_str(arr): + return (x for x in arr.astype(str)) + + +def arr_dict(arr): + return dict(zip(range(len(arr)), arr)) + + +def list_of_tuples(arr): + return [(i, -i) for i in arr] + + +def gen_of_tuples(arr): + return ((i, -i) for i in arr) + + +def list_of_lists(arr): + return [[i, -i] for i in arr] + + +def list_of_tuples_with_none(arr): + return [(i, -i) for i in arr][:-1] + [None] + + +def list_of_lists_with_none(arr): + return [[i, -i] for i in arr][:-1] + [None] + + +class SeriesConstructors: + + param_names = ["data_fmt", "with_index", "dtype"] + params = [ + [ + no_change, + list, + list_of_str, + gen_of_str, + arr_dict, + list_of_tuples, + gen_of_tuples, + list_of_lists, + list_of_tuples_with_none, + list_of_lists_with_none, + ], + [False, True], + ["float", "int"], + ] + + # Generators get exhausted on use, so run setup before every call + number = 1 + repeat = (3, 250, 10) + + def setup(self, data_fmt, with_index, dtype): + if data_fmt in (gen_of_str, gen_of_tuples) and with_index: + raise NotImplementedError( + "Series constructors do not support using generators with indexes" + ) + N = 10 ** 4 + if dtype == "float": + arr = np.random.randn(N) + else: + arr = np.arange(N) + self.data = data_fmt(arr) + self.index = np.arange(N) if with_index else None + + def time_series_constructor(self, data_fmt, with_index, dtype): + Series(self.data, index=self.index) + + +class SeriesDtypesConstructors: + def setup(self): + N = 10 ** 4 + self.arr = np.random.randn(N) + self.arr_str = np.array(["foo", "bar", "baz"], dtype=object) + self.s = Series( + [Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")] + * N + * 10 + ) + + def time_index_from_array_string(self): + Index(self.arr_str) + + def time_index_from_array_floats(self): + Index(self.arr) + + def time_dtindex_from_series(self): + DatetimeIndex(self.s) + + def time_dtindex_from_index_with_series(self): + Index(self.s) + + +class MultiIndexConstructor: + def setup(self): + N = 10 ** 4 + self.iterables = [tm.makeStringIndex(N), range(20)] + + def time_multiindex_from_iterables(self): + MultiIndex.from_product(self.iterables) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py new file mode 100644 index 00000000..bd17b710 --- /dev/null +++ b/asv_bench/benchmarks/dtypes.py @@ -0,0 +1,65 @@ +import numpy as np + +from pandas.api.types import pandas_dtype + +from .pandas_vb_common import ( + datetime_dtypes, + extension_dtypes, + lib, + numeric_dtypes, + string_dtypes, +) + +_numpy_dtypes = [ + np.dtype(dtype) for dtype in (numeric_dtypes + datetime_dtypes + string_dtypes) +] +_dtypes = _numpy_dtypes + extension_dtypes + + +class Dtypes: + params = _dtypes + list(map(lambda dt: dt.name, _dtypes)) + param_names = ["dtype"] + + def time_pandas_dtype(self, dtype): + pandas_dtype(dtype) + + +class DtypesInvalid: + param_names = ["dtype"] + params = ["scalar-string", "scalar-int", "list-string", "array-string"] + data_dict = { + "scalar-string": "foo", + "scalar-int": 1, + "list-string": ["foo"] * 1000, + "array-string": np.array(["foo"] * 1000), + } + + def time_pandas_dtype_invalid(self, dtype): + try: + pandas_dtype(self.data_dict[dtype]) + except TypeError: + pass + + +class InferDtypes: + param_names = ["dtype"] + data_dict = { + "np-object": np.array([1] * 100000, dtype="O"), + "py-object": [1] * 100000, + "np-null": np.array([1] * 50000 + [np.nan] * 50000), + "py-null": [1] * 50000 + [None] * 50000, + "np-int": np.array([1] * 100000, dtype=int), + "np-floating": np.array([1.0] * 100000, dtype=float), + "empty": [], + "bytes": [b"a"] * 100000, + } + params = list(data_dict.keys()) + + def time_infer_skipna(self, dtype): + lib.infer_dtype(self.data_dict[dtype], skipna=True) + + def time_infer(self, dtype): + lib.infer_dtype(self.data_dict[dtype], skipna=False) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/eval.py b/asv_bench/benchmarks/eval.py new file mode 100644 index 00000000..cbab9fdc --- /dev/null +++ b/asv_bench/benchmarks/eval.py @@ -0,0 +1,66 @@ +import numpy as np + +import pandas as pd + +try: + import pandas.core.computation.expressions as expr +except ImportError: + import pandas.computation.expressions as expr + + +class Eval: + + params = [["numexpr", "python"], [1, "all"]] + param_names = ["engine", "threads"] + + def setup(self, engine, threads): + self.df = pd.DataFrame(np.random.randn(20000, 100)) + self.df2 = pd.DataFrame(np.random.randn(20000, 100)) + self.df3 = pd.DataFrame(np.random.randn(20000, 100)) + self.df4 = pd.DataFrame(np.random.randn(20000, 100)) + + if threads == 1: + expr.set_numexpr_threads(1) + + def time_add(self, engine, threads): + pd.eval("self.df + self.df2 + self.df3 + self.df4", engine=engine) + + def time_and(self, engine, threads): + pd.eval( + "(self.df > 0) & (self.df2 > 0) & (self.df3 > 0) & (self.df4 > 0)", + engine=engine, + ) + + def time_chained_cmp(self, engine, threads): + pd.eval("self.df < self.df2 < self.df3 < self.df4", engine=engine) + + def time_mult(self, engine, threads): + pd.eval("self.df * self.df2 * self.df3 * self.df4", engine=engine) + + def teardown(self, engine, threads): + expr.set_numexpr_threads() + + +class Query: + def setup(self): + N = 10 ** 6 + halfway = (N // 2) - 1 + index = pd.date_range("20010101", periods=N, freq="T") + s = pd.Series(index) + self.ts = s.iloc[halfway] + self.df = pd.DataFrame({"a": np.random.randn(N), "dates": index}, index=index) + data = np.random.randn(N) + self.min_val = data.min() + self.max_val = data.max() + + def time_query_datetime_index(self): + self.df.query("index < @self.ts") + + def time_query_datetime_column(self): + self.df.query("dates < @self.ts") + + def time_query_with_boolean_selection(self): + self.df.query("(a >= @self.min_val) & (a <= @self.max_val)") + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py new file mode 100644 index 00000000..2b24bab8 --- /dev/null +++ b/asv_bench/benchmarks/frame_ctor.py @@ -0,0 +1,121 @@ +import numpy as np + +from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range + +from .pandas_vb_common import tm + +try: + from pandas.tseries.offsets import Nano, Hour +except ImportError: + # For compatibility with older versions + from pandas.core.datetools import * # noqa + + +class FromDicts: + def setup(self): + N, K = 5000, 50 + self.index = tm.makeStringIndex(N) + self.columns = tm.makeStringIndex(K) + frame = DataFrame(np.random.randn(N, K), index=self.index, columns=self.columns) + self.data = frame.to_dict() + self.dict_list = frame.to_dict(orient="records") + self.data2 = {i: {j: float(j) for j in range(100)} for i in range(2000)} + + def time_list_of_dict(self): + DataFrame(self.dict_list) + + def time_nested_dict(self): + DataFrame(self.data) + + def time_nested_dict_index(self): + DataFrame(self.data, index=self.index) + + def time_nested_dict_columns(self): + DataFrame(self.data, columns=self.columns) + + def time_nested_dict_index_columns(self): + DataFrame(self.data, index=self.index, columns=self.columns) + + def time_nested_dict_int64(self): + # nested dict, integer indexes, regression described in #621 + DataFrame(self.data2) + + +class FromSeries: + def setup(self): + mi = MultiIndex.from_product([range(100), range(100)]) + self.s = Series(np.random.randn(10000), index=mi) + + def time_mi_series(self): + DataFrame(self.s) + + +class FromDictwithTimestamp: + + params = [Nano(1), Hour(1)] + param_names = ["offset"] + + def setup(self, offset): + N = 10 ** 3 + np.random.seed(1234) + idx = date_range(Timestamp("1/1/1900"), freq=offset, periods=N) + df = DataFrame(np.random.randn(N, 10), index=idx) + self.d = df.to_dict() + + def time_dict_with_timestamp_offsets(self, offset): + DataFrame(self.d) + + +class FromRecords: + + params = [None, 1000] + param_names = ["nrows"] + + # Generators get exhausted on use, so run setup before every call + number = 1 + repeat = (3, 250, 10) + + def setup(self, nrows): + N = 100000 + self.gen = ((x, (x * 20), (x * 100)) for x in range(N)) + + def time_frame_from_records_generator(self, nrows): + # issue-6700 + self.df = DataFrame.from_records(self.gen, nrows=nrows) + + +class FromNDArray: + def setup(self): + N = 100000 + self.data = np.random.randn(N) + + def time_frame_from_ndarray(self): + self.df = DataFrame(self.data) + + +class FromLists: + + goal_time = 0.2 + + def setup(self): + N = 1000 + M = 100 + self.data = [list(range(M)) for i in range(N)] + + def time_frame_from_lists(self): + self.df = DataFrame(self.data) + + +class FromRange: + + goal_time = 0.2 + + def setup(self): + N = 1_000_000 + self.data = range(N) + + def time_frame_from_range(self): + self.df = DataFrame(self.data) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py new file mode 100644 index 00000000..a3aff45a --- /dev/null +++ b/asv_bench/benchmarks/frame_methods.py @@ -0,0 +1,635 @@ +import string +import warnings + +import numpy as np + +from pandas import DataFrame, MultiIndex, NaT, Series, date_range, isnull, period_range + +from .pandas_vb_common import tm + + +class GetNumericData: + def setup(self): + self.df = DataFrame(np.random.randn(10000, 25)) + self.df["foo"] = "bar" + self.df["bar"] = "baz" + self.df = self.df._consolidate() + + def time_frame_get_numeric_data(self): + self.df._get_numeric_data() + + +class Lookup: + def setup(self): + self.df = DataFrame(np.random.randn(10000, 8), columns=list("abcdefgh")) + self.df["foo"] = "bar" + self.row_labels = list(self.df.index[::10])[:900] + self.col_labels = list(self.df.columns) * 100 + self.row_labels_all = np.array( + list(self.df.index) * len(self.df.columns), dtype="object" + ) + self.col_labels_all = np.array( + list(self.df.columns) * len(self.df.index), dtype="object" + ) + + def time_frame_fancy_lookup(self): + self.df.lookup(self.row_labels, self.col_labels) + + def time_frame_fancy_lookup_all(self): + self.df.lookup(self.row_labels_all, self.col_labels_all) + + +class Reindex: + def setup(self): + N = 10 ** 3 + self.df = DataFrame(np.random.randn(N * 10, N)) + self.idx = np.arange(4 * N, 7 * N) + self.df2 = DataFrame( + { + c: { + 0: np.random.randint(0, 2, N).astype(np.bool_), + 1: np.random.randint(0, N, N).astype(np.int16), + 2: np.random.randint(0, N, N).astype(np.int32), + 3: np.random.randint(0, N, N).astype(np.int64), + }[np.random.randint(0, 4)] + for c in range(N) + } + ) + + def time_reindex_axis0(self): + self.df.reindex(self.idx) + + def time_reindex_axis1(self): + self.df.reindex(columns=self.idx) + + def time_reindex_both_axes(self): + self.df.reindex(index=self.idx, columns=self.idx) + + def time_reindex_upcast(self): + self.df2.reindex(np.random.permutation(range(1200))) + + +class Rename: + def setup(self): + N = 10 ** 3 + self.df = DataFrame(np.random.randn(N * 10, N)) + self.idx = np.arange(4 * N, 7 * N) + self.dict_idx = {k: k for k in self.idx} + self.df2 = DataFrame( + { + c: { + 0: np.random.randint(0, 2, N).astype(np.bool_), + 1: np.random.randint(0, N, N).astype(np.int16), + 2: np.random.randint(0, N, N).astype(np.int32), + 3: np.random.randint(0, N, N).astype(np.int64), + }[np.random.randint(0, 4)] + for c in range(N) + } + ) + + def time_rename_single(self): + self.df.rename({0: 0}) + + def time_rename_axis0(self): + self.df.rename(self.dict_idx) + + def time_rename_axis1(self): + self.df.rename(columns=self.dict_idx) + + def time_rename_both_axes(self): + self.df.rename(index=self.dict_idx, columns=self.dict_idx) + + def time_dict_rename_both_axes(self): + self.df.rename(index=self.dict_idx, columns=self.dict_idx) + + +class Iteration: + # mem_itertuples_* benchmarks are slow + timeout = 120 + + def setup(self): + N = 1000 + self.df = DataFrame(np.random.randn(N * 10, N)) + self.df2 = DataFrame(np.random.randn(N * 50, 10)) + self.df3 = DataFrame( + np.random.randn(N, 5 * N), columns=["C" + str(c) for c in range(N * 5)] + ) + self.df4 = DataFrame(np.random.randn(N * 1000, 10)) + + def time_items(self): + # (monitor no-copying behaviour) + if hasattr(self.df, "_item_cache"): + self.df._item_cache.clear() + for name, col in self.df.items(): + pass + + def time_items_cached(self): + for name, col in self.df.items(): + pass + + def time_iteritems_indexing(self): + for col in self.df3: + self.df3[col] + + def time_itertuples_start(self): + self.df4.itertuples() + + def time_itertuples_read_first(self): + next(self.df4.itertuples()) + + def time_itertuples(self): + for row in self.df4.itertuples(): + pass + + def time_itertuples_to_list(self): + list(self.df4.itertuples()) + + def mem_itertuples_start(self): + return self.df4.itertuples() + + def peakmem_itertuples_start(self): + self.df4.itertuples() + + def mem_itertuples_read_first(self): + return next(self.df4.itertuples()) + + def peakmem_itertuples(self): + for row in self.df4.itertuples(): + pass + + def mem_itertuples_to_list(self): + return list(self.df4.itertuples()) + + def peakmem_itertuples_to_list(self): + list(self.df4.itertuples()) + + def time_itertuples_raw_start(self): + self.df4.itertuples(index=False, name=None) + + def time_itertuples_raw_read_first(self): + next(self.df4.itertuples(index=False, name=None)) + + def time_itertuples_raw_tuples(self): + for row in self.df4.itertuples(index=False, name=None): + pass + + def time_itertuples_raw_tuples_to_list(self): + list(self.df4.itertuples(index=False, name=None)) + + def mem_itertuples_raw_start(self): + return self.df4.itertuples(index=False, name=None) + + def peakmem_itertuples_raw_start(self): + self.df4.itertuples(index=False, name=None) + + def peakmem_itertuples_raw_read_first(self): + next(self.df4.itertuples(index=False, name=None)) + + def peakmem_itertuples_raw(self): + for row in self.df4.itertuples(index=False, name=None): + pass + + def mem_itertuples_raw_to_list(self): + return list(self.df4.itertuples(index=False, name=None)) + + def peakmem_itertuples_raw_to_list(self): + list(self.df4.itertuples(index=False, name=None)) + + def time_iterrows(self): + for row in self.df.iterrows(): + pass + + +class ToString: + def setup(self): + self.df = DataFrame(np.random.randn(100, 10)) + + def time_to_string_floats(self): + self.df.to_string() + + +class ToHTML: + def setup(self): + nrows = 500 + self.df2 = DataFrame(np.random.randn(nrows, 10)) + self.df2[0] = period_range("2000", periods=nrows) + self.df2[1] = range(nrows) + + def time_to_html_mixed(self): + self.df2.to_html() + + +class Repr: + def setup(self): + nrows = 10000 + data = np.random.randn(nrows, 10) + arrays = np.tile(np.random.randn(3, int(nrows / 100)), 100) + idx = MultiIndex.from_arrays(arrays) + self.df3 = DataFrame(data, index=idx) + self.df4 = DataFrame(data, index=np.random.randn(nrows)) + self.df_tall = DataFrame(np.random.randn(nrows, 10)) + self.df_wide = DataFrame(np.random.randn(10, nrows)) + + def time_html_repr_trunc_mi(self): + self.df3._repr_html_() + + def time_html_repr_trunc_si(self): + self.df4._repr_html_() + + def time_repr_tall(self): + repr(self.df_tall) + + def time_frame_repr_wide(self): + repr(self.df_wide) + + +class MaskBool: + def setup(self): + data = np.random.randn(1000, 500) + df = DataFrame(data) + df = df.where(df > 0) + self.bools = df > 0 + self.mask = isnull(df) + + def time_frame_mask_bools(self): + self.bools.mask(self.mask) + + def time_frame_mask_floats(self): + self.bools.astype(float).mask(self.mask) + + +class Isnull: + def setup(self): + N = 10 ** 3 + self.df_no_null = DataFrame(np.random.randn(N, N)) + + sample = np.array([np.nan, 1.0]) + data = np.random.choice(sample, (N, N)) + self.df = DataFrame(data) + + sample = np.array(list(string.ascii_letters + string.whitespace)) + data = np.random.choice(sample, (N, N)) + self.df_strings = DataFrame(data) + + sample = np.array( + [ + NaT, + np.nan, + None, + np.datetime64("NaT"), + np.timedelta64("NaT"), + 0, + 1, + 2.0, + "", + "abcd", + ] + ) + data = np.random.choice(sample, (N, N)) + self.df_obj = DataFrame(data) + + def time_isnull_floats_no_null(self): + isnull(self.df_no_null) + + def time_isnull(self): + isnull(self.df) + + def time_isnull_strngs(self): + isnull(self.df_strings) + + def time_isnull_obj(self): + isnull(self.df_obj) + + +class Fillna: + + params = ([True, False], ["pad", "bfill"]) + param_names = ["inplace", "method"] + + def setup(self, inplace, method): + values = np.random.randn(10000, 100) + values[::2] = np.nan + self.df = DataFrame(values) + + def time_frame_fillna(self, inplace, method): + self.df.fillna(inplace=inplace, method=method) + + +class Dropna: + + params = (["all", "any"], [0, 1]) + param_names = ["how", "axis"] + + def setup(self, how, axis): + self.df = DataFrame(np.random.randn(10000, 1000)) + self.df.iloc[50:1000, 20:50] = np.nan + self.df.iloc[2000:3000] = np.nan + self.df.iloc[:, 60:70] = np.nan + self.df_mixed = self.df.copy() + self.df_mixed["foo"] = "bar" + + def time_dropna(self, how, axis): + self.df.dropna(how=how, axis=axis) + + def time_dropna_axis_mixed_dtypes(self, how, axis): + self.df_mixed.dropna(how=how, axis=axis) + + +class Count: + + params = [0, 1] + param_names = ["axis"] + + def setup(self, axis): + self.df = DataFrame(np.random.randn(10000, 1000)) + self.df.iloc[50:1000, 20:50] = np.nan + self.df.iloc[2000:3000] = np.nan + self.df.iloc[:, 60:70] = np.nan + self.df_mixed = self.df.copy() + self.df_mixed["foo"] = "bar" + + self.df.index = MultiIndex.from_arrays([self.df.index, self.df.index]) + self.df.columns = MultiIndex.from_arrays([self.df.columns, self.df.columns]) + self.df_mixed.index = MultiIndex.from_arrays( + [self.df_mixed.index, self.df_mixed.index] + ) + self.df_mixed.columns = MultiIndex.from_arrays( + [self.df_mixed.columns, self.df_mixed.columns] + ) + + def time_count_level_multi(self, axis): + self.df.count(axis=axis, level=1) + + def time_count_level_mixed_dtypes_multi(self, axis): + self.df_mixed.count(axis=axis, level=1) + + +class Apply: + def setup(self): + self.df = DataFrame(np.random.randn(1000, 100)) + + self.s = Series(np.arange(1028.0)) + self.df2 = DataFrame({i: self.s for i in range(1028)}) + self.df3 = DataFrame(np.random.randn(1000, 3), columns=list("ABC")) + + def time_apply_user_func(self): + self.df2.apply(lambda x: np.corrcoef(x, self.s)[(0, 1)]) + + def time_apply_axis_1(self): + self.df.apply(lambda x: x + 1, axis=1) + + def time_apply_lambda_mean(self): + self.df.apply(lambda x: x.mean()) + + def time_apply_np_mean(self): + self.df.apply(np.mean) + + def time_apply_pass_thru(self): + self.df.apply(lambda x: x) + + def time_apply_ref_by_name(self): + self.df3.apply(lambda x: x["A"] + x["B"], axis=1) + + +class Dtypes: + def setup(self): + self.df = DataFrame(np.random.randn(1000, 1000)) + + def time_frame_dtypes(self): + self.df.dtypes + + +class Equals: + def setup(self): + N = 10 ** 3 + self.float_df = DataFrame(np.random.randn(N, N)) + self.float_df_nan = self.float_df.copy() + self.float_df_nan.iloc[-1, -1] = np.nan + + self.object_df = DataFrame("foo", index=range(N), columns=range(N)) + self.object_df_nan = self.object_df.copy() + self.object_df_nan.iloc[-1, -1] = np.nan + + self.nonunique_cols = self.object_df.copy() + self.nonunique_cols.columns = ["A"] * len(self.nonunique_cols.columns) + self.nonunique_cols_nan = self.nonunique_cols.copy() + self.nonunique_cols_nan.iloc[-1, -1] = np.nan + + def time_frame_float_equal(self): + self.float_df.equals(self.float_df) + + def time_frame_float_unequal(self): + self.float_df.equals(self.float_df_nan) + + def time_frame_nonunique_equal(self): + self.nonunique_cols.equals(self.nonunique_cols) + + def time_frame_nonunique_unequal(self): + self.nonunique_cols.equals(self.nonunique_cols_nan) + + def time_frame_object_equal(self): + self.object_df.equals(self.object_df) + + def time_frame_object_unequal(self): + self.object_df.equals(self.object_df_nan) + + +class Interpolate: + + params = [None, "infer"] + param_names = ["downcast"] + + def setup(self, downcast): + N = 10000 + # this is the worst case, where every column has NaNs. + self.df = DataFrame(np.random.randn(N, 100)) + self.df.values[::2] = np.nan + + self.df2 = DataFrame( + { + "A": np.arange(0, N), + "B": np.random.randint(0, 100, N), + "C": np.random.randn(N), + "D": np.random.randn(N), + } + ) + self.df2.loc[1::5, "A"] = np.nan + self.df2.loc[1::5, "C"] = np.nan + + def time_interpolate(self, downcast): + self.df.interpolate(downcast=downcast) + + def time_interpolate_some_good(self, downcast): + self.df2.interpolate(downcast=downcast) + + +class Shift: + # frame shift speedup issue-5609 + params = [0, 1] + param_names = ["axis"] + + def setup(self, axis): + self.df = DataFrame(np.random.rand(10000, 500)) + + def time_shift(self, axis): + self.df.shift(1, axis=axis) + + +class Nunique: + def setup(self): + self.df = DataFrame(np.random.randn(10000, 1000)) + + def time_frame_nunique(self): + self.df.nunique() + + +class Duplicated: + def setup(self): + n = 1 << 20 + t = date_range("2015-01-01", freq="S", periods=(n // 64)) + xs = np.random.randn(n // 64).round(2) + self.df = DataFrame( + { + "a": np.random.randint(-1 << 8, 1 << 8, n), + "b": np.random.choice(t, n), + "c": np.random.choice(xs, n), + } + ) + self.df2 = DataFrame(np.random.randn(1000, 100).astype(str)).T + + def time_frame_duplicated(self): + self.df.duplicated() + + def time_frame_duplicated_wide(self): + self.df2.duplicated() + + +class XS: + + params = [0, 1] + param_names = ["axis"] + + def setup(self, axis): + self.N = 10 ** 4 + self.df = DataFrame(np.random.randn(self.N, self.N)) + + def time_frame_xs(self, axis): + self.df.xs(self.N / 2, axis=axis) + + +class SortValues: + + params = [True, False] + param_names = ["ascending"] + + def setup(self, ascending): + self.df = DataFrame(np.random.randn(1000000, 2), columns=list("AB")) + + def time_frame_sort_values(self, ascending): + self.df.sort_values(by="A", ascending=ascending) + + +class SortIndexByColumns: + def setup(self): + N = 10000 + K = 10 + self.df = DataFrame( + { + "key1": tm.makeStringIndex(N).values.repeat(K), + "key2": tm.makeStringIndex(N).values.repeat(K), + "value": np.random.randn(N * K), + } + ) + + def time_frame_sort_values_by_columns(self): + self.df.sort_values(by=["key1", "key2"]) + + +class Quantile: + + params = [0, 1] + param_names = ["axis"] + + def setup(self, axis): + self.df = DataFrame(np.random.randn(1000, 3), columns=list("ABC")) + + def time_frame_quantile(self, axis): + self.df.quantile([0.1, 0.5], axis=axis) + + +class GetDtypeCounts: + # 2807 + def setup(self): + self.df = DataFrame(np.random.randn(10, 10000)) + + def time_frame_get_dtype_counts(self): + with warnings.catch_warnings(record=True): + self.df._data.get_dtype_counts() + + def time_info(self): + self.df.info() + + +class NSort: + + params = ["first", "last", "all"] + param_names = ["keep"] + + def setup(self, keep): + self.df = DataFrame(np.random.randn(100000, 3), columns=list("ABC")) + + def time_nlargest_one_column(self, keep): + self.df.nlargest(100, "A", keep=keep) + + def time_nlargest_two_columns(self, keep): + self.df.nlargest(100, ["A", "B"], keep=keep) + + def time_nsmallest_one_column(self, keep): + self.df.nsmallest(100, "A", keep=keep) + + def time_nsmallest_two_columns(self, keep): + self.df.nsmallest(100, ["A", "B"], keep=keep) + + +class Describe: + def setup(self): + self.df = DataFrame( + { + "a": np.random.randint(0, 100, int(1e6)), + "b": np.random.randint(0, 100, int(1e6)), + "c": np.random.randint(0, 100, int(1e6)), + } + ) + + def time_series_describe(self): + self.df["a"].describe() + + def time_dataframe_describe(self): + self.df.describe() + + +class SelectDtypes: + params = [100, 1000] + param_names = ["n"] + + def setup(self, n): + self.df = DataFrame(np.random.randn(10, n)) + + def time_select_dtypes(self, n): + self.df.select_dtypes(include="int") + + +class MemoryUsage: + def setup(self): + self.df = DataFrame(np.random.randn(100000, 2), columns=list("AB")) + self.df2 = self.df.copy() + self.df2["A"] = self.df2["A"].astype("object") + + def time_memory_usage(self): + self.df.memory_usage(deep=True) + + def time_memory_usage_object_dtype(self): + self.df2.memory_usage(deep=True) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py new file mode 100644 index 00000000..e266d871 --- /dev/null +++ b/asv_bench/benchmarks/gil.py @@ -0,0 +1,304 @@ +import numpy as np + +from pandas import DataFrame, Series, date_range, factorize, read_csv +from pandas.core.algorithms import take_1d + +from .pandas_vb_common import tm + +try: + from pandas import ( + rolling_median, + rolling_mean, + rolling_min, + rolling_max, + rolling_var, + rolling_skew, + rolling_kurt, + rolling_std, + ) + + have_rolling_methods = True +except ImportError: + have_rolling_methods = False +try: + from pandas._libs import algos +except ImportError: + from pandas import algos +try: + from pandas._testing import test_parallel + + have_real_test_parallel = True +except ImportError: + have_real_test_parallel = False + + def test_parallel(num_threads=1): + def wrapper(fname): + return fname + + return wrapper + + +from .pandas_vb_common import BaseIO # isort:skip + + +class ParallelGroupbyMethods: + + params = ([2, 4, 8], ["count", "last", "max", "mean", "min", "prod", "sum", "var"]) + param_names = ["threads", "method"] + + def setup(self, threads, method): + if not have_real_test_parallel: + raise NotImplementedError + N = 10 ** 6 + ngroups = 10 ** 3 + df = DataFrame( + {"key": np.random.randint(0, ngroups, size=N), "data": np.random.randn(N)} + ) + + @test_parallel(num_threads=threads) + def parallel(): + getattr(df.groupby("key")["data"], method)() + + self.parallel = parallel + + def loop(): + getattr(df.groupby("key")["data"], method)() + + self.loop = loop + + def time_parallel(self, threads, method): + self.parallel() + + def time_loop(self, threads, method): + for i in range(threads): + self.loop() + + +class ParallelGroups: + + params = [2, 4, 8] + param_names = ["threads"] + + def setup(self, threads): + if not have_real_test_parallel: + raise NotImplementedError + size = 2 ** 22 + ngroups = 10 ** 3 + data = Series(np.random.randint(0, ngroups, size=size)) + + @test_parallel(num_threads=threads) + def get_groups(): + data.groupby(data).groups + + self.get_groups = get_groups + + def time_get_groups(self, threads): + self.get_groups() + + +class ParallelTake1D: + + params = ["int64", "float64"] + param_names = ["dtype"] + + def setup(self, dtype): + if not have_real_test_parallel: + raise NotImplementedError + N = 10 ** 6 + df = DataFrame({"col": np.arange(N, dtype=dtype)}) + indexer = np.arange(100, len(df) - 100) + + @test_parallel(num_threads=2) + def parallel_take1d(): + take_1d(df["col"].values, indexer) + + self.parallel_take1d = parallel_take1d + + def time_take1d(self, dtype): + self.parallel_take1d() + + +class ParallelKth: + + number = 1 + repeat = 5 + + def setup(self): + if not have_real_test_parallel: + raise NotImplementedError + N = 10 ** 7 + k = 5 * 10 ** 5 + kwargs_list = [{"arr": np.random.randn(N)}, {"arr": np.random.randn(N)}] + + @test_parallel(num_threads=2, kwargs_list=kwargs_list) + def parallel_kth_smallest(arr): + algos.kth_smallest(arr, k) + + self.parallel_kth_smallest = parallel_kth_smallest + + def time_kth_smallest(self): + self.parallel_kth_smallest() + + +class ParallelDatetimeFields: + def setup(self): + if not have_real_test_parallel: + raise NotImplementedError + N = 10 ** 6 + self.dti = date_range("1900-01-01", periods=N, freq="T") + self.period = self.dti.to_period("D") + + def time_datetime_field_year(self): + @test_parallel(num_threads=2) + def run(dti): + dti.year + + run(self.dti) + + def time_datetime_field_day(self): + @test_parallel(num_threads=2) + def run(dti): + dti.day + + run(self.dti) + + def time_datetime_field_daysinmonth(self): + @test_parallel(num_threads=2) + def run(dti): + dti.days_in_month + + run(self.dti) + + def time_datetime_field_normalize(self): + @test_parallel(num_threads=2) + def run(dti): + dti.normalize() + + run(self.dti) + + def time_datetime_to_period(self): + @test_parallel(num_threads=2) + def run(dti): + dti.to_period("S") + + run(self.dti) + + def time_period_to_datetime(self): + @test_parallel(num_threads=2) + def run(period): + period.to_timestamp() + + run(self.period) + + +class ParallelRolling: + + params = ["median", "mean", "min", "max", "var", "skew", "kurt", "std"] + param_names = ["method"] + + def setup(self, method): + if not have_real_test_parallel: + raise NotImplementedError + win = 100 + arr = np.random.rand(100000) + if hasattr(DataFrame, "rolling"): + df = DataFrame(arr).rolling(win) + + @test_parallel(num_threads=2) + def parallel_rolling(): + getattr(df, method)() + + self.parallel_rolling = parallel_rolling + elif have_rolling_methods: + rolling = { + "median": rolling_median, + "mean": rolling_mean, + "min": rolling_min, + "max": rolling_max, + "var": rolling_var, + "skew": rolling_skew, + "kurt": rolling_kurt, + "std": rolling_std, + } + + @test_parallel(num_threads=2) + def parallel_rolling(): + rolling[method](arr, win) + + self.parallel_rolling = parallel_rolling + else: + raise NotImplementedError + + def time_rolling(self, method): + self.parallel_rolling() + + +class ParallelReadCSV(BaseIO): + + number = 1 + repeat = 5 + params = ["float", "object", "datetime"] + param_names = ["dtype"] + + def setup(self, dtype): + if not have_real_test_parallel: + raise NotImplementedError + rows = 10000 + cols = 50 + data = { + "float": DataFrame(np.random.randn(rows, cols)), + "datetime": DataFrame( + np.random.randn(rows, cols), index=date_range("1/1/2000", periods=rows) + ), + "object": DataFrame( + "foo", index=range(rows), columns=["object%03d" for _ in range(5)] + ), + } + + self.fname = f"__test_{dtype}__.csv" + df = data[dtype] + df.to_csv(self.fname) + + @test_parallel(num_threads=2) + def parallel_read_csv(): + read_csv(self.fname) + + self.parallel_read_csv = parallel_read_csv + + def time_read_csv(self, dtype): + self.parallel_read_csv() + + +class ParallelFactorize: + + number = 1 + repeat = 5 + params = [2, 4, 8] + param_names = ["threads"] + + def setup(self, threads): + if not have_real_test_parallel: + raise NotImplementedError + + strings = tm.makeStringIndex(100000) + + @test_parallel(num_threads=threads) + def parallel(): + factorize(strings) + + self.parallel = parallel + + def loop(): + factorize(strings) + + self.loop = loop + + def time_parallel(self, threads): + self.parallel() + + def time_loop(self, threads): + for i in range(threads): + self.loop() + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py new file mode 100644 index 00000000..28e0dcc5 --- /dev/null +++ b/asv_bench/benchmarks/groupby.py @@ -0,0 +1,629 @@ +from functools import partial +from itertools import product +from string import ascii_letters + +import numpy as np + +from pandas import ( + Categorical, + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, + period_range, +) + +from .pandas_vb_common import tm + +method_blacklist = { + "object": { + "median", + "prod", + "sem", + "cumsum", + "sum", + "cummin", + "mean", + "max", + "skew", + "cumprod", + "cummax", + "rank", + "pct_change", + "min", + "var", + "mad", + "describe", + "std", + "quantile", + }, + "datetime": { + "median", + "prod", + "sem", + "cumsum", + "sum", + "mean", + "skew", + "cumprod", + "cummax", + "pct_change", + "var", + "mad", + "describe", + "std", + }, +} + + +class ApplyDictReturn: + def setup(self): + self.labels = np.arange(1000).repeat(10) + self.data = Series(np.random.randn(len(self.labels))) + + def time_groupby_apply_dict_return(self): + self.data.groupby(self.labels).apply( + lambda x: {"first": x.values[0], "last": x.values[-1]} + ) + + +class Apply: + def setup_cache(self): + N = 10 ** 4 + labels = np.random.randint(0, 2000, size=N) + labels2 = np.random.randint(0, 3, size=N) + df = DataFrame( + { + "key": labels, + "key2": labels2, + "value1": np.random.randn(N), + "value2": ["foo", "bar", "baz", "qux"] * (N // 4), + } + ) + return df + + def time_scalar_function_multi_col(self, df): + df.groupby(["key", "key2"]).apply(lambda x: 1) + + def time_scalar_function_single_col(self, df): + df.groupby("key").apply(lambda x: 1) + + @staticmethod + def df_copy_function(g): + # ensure that the group name is available (see GH #15062) + g.name + return g.copy() + + def time_copy_function_multi_col(self, df): + df.groupby(["key", "key2"]).apply(self.df_copy_function) + + def time_copy_overhead_single_col(self, df): + df.groupby("key").apply(self.df_copy_function) + + +class Groups: + + param_names = ["key"] + params = ["int64_small", "int64_large", "object_small", "object_large"] + + def setup_cache(self): + size = 10 ** 6 + data = { + "int64_small": Series(np.random.randint(0, 100, size=size)), + "int64_large": Series(np.random.randint(0, 10000, size=size)), + "object_small": Series( + tm.makeStringIndex(100).take(np.random.randint(0, 100, size=size)) + ), + "object_large": Series( + tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=size)) + ), + } + return data + + def setup(self, data, key): + self.ser = data[key] + + def time_series_groups(self, data, key): + self.ser.groupby(self.ser).groups + + +class GroupManyLabels: + + params = [1, 1000] + param_names = ["ncols"] + + def setup(self, ncols): + N = 1000 + data = np.random.randn(N, ncols) + self.labels = np.random.randint(0, 100, size=N) + self.df = DataFrame(data) + + def time_sum(self, ncols): + self.df.groupby(self.labels).sum() + + +class Nth: + + param_names = ["dtype"] + params = ["float32", "float64", "datetime", "object"] + + def setup(self, dtype): + N = 10 ** 5 + # with datetimes (GH7555) + if dtype == "datetime": + values = date_range("1/1/2011", periods=N, freq="s") + elif dtype == "object": + values = ["foo"] * N + else: + values = np.arange(N).astype(dtype) + + key = np.arange(N) + self.df = DataFrame({"key": key, "values": values}) + self.df.iloc[1, 1] = np.nan # insert missing data + + def time_frame_nth_any(self, dtype): + self.df.groupby("key").nth(0, dropna="any") + + def time_groupby_nth_all(self, dtype): + self.df.groupby("key").nth(0, dropna="all") + + def time_frame_nth(self, dtype): + self.df.groupby("key").nth(0) + + def time_series_nth_any(self, dtype): + self.df["values"].groupby(self.df["key"]).nth(0, dropna="any") + + def time_series_nth_all(self, dtype): + self.df["values"].groupby(self.df["key"]).nth(0, dropna="all") + + def time_series_nth(self, dtype): + self.df["values"].groupby(self.df["key"]).nth(0) + + +class DateAttributes: + def setup(self): + rng = date_range("1/1/2000", "12/31/2005", freq="H") + self.year, self.month, self.day = rng.year, rng.month, rng.day + self.ts = Series(np.random.randn(len(rng)), index=rng) + + def time_len_groupby_object(self): + len(self.ts.groupby([self.year, self.month, self.day])) + + +class Int64: + def setup(self): + arr = np.random.randint(-1 << 12, 1 << 12, (1 << 17, 5)) + i = np.random.choice(len(arr), len(arr) * 5) + arr = np.vstack((arr, arr[i])) + i = np.random.permutation(len(arr)) + arr = arr[i] + self.cols = list("abcde") + self.df = DataFrame(arr, columns=self.cols) + self.df["jim"], self.df["joe"] = np.random.randn(2, len(self.df)) * 10 + + def time_overflow(self): + self.df.groupby(self.cols).max() + + +class CountMultiDtype: + def setup_cache(self): + n = 10000 + offsets = np.random.randint(n, size=n).astype("timedelta64[ns]") + dates = np.datetime64("now") + offsets + dates[np.random.rand(n) > 0.5] = np.datetime64("nat") + offsets[np.random.rand(n) > 0.5] = np.timedelta64("nat") + value2 = np.random.randn(n) + value2[np.random.rand(n) > 0.5] = np.nan + obj = np.random.choice(list("ab"), size=n).astype(object) + obj[np.random.randn(n) > 0.5] = np.nan + df = DataFrame( + { + "key1": np.random.randint(0, 500, size=n), + "key2": np.random.randint(0, 100, size=n), + "dates": dates, + "value2": value2, + "value3": np.random.randn(n), + "ints": np.random.randint(0, 1000, size=n), + "obj": obj, + "offsets": offsets, + } + ) + return df + + def time_multi_count(self, df): + df.groupby(["key1", "key2"]).count() + + +class CountMultiInt: + def setup_cache(self): + n = 10000 + df = DataFrame( + { + "key1": np.random.randint(0, 500, size=n), + "key2": np.random.randint(0, 100, size=n), + "ints": np.random.randint(0, 1000, size=n), + "ints2": np.random.randint(0, 1000, size=n), + } + ) + return df + + def time_multi_int_count(self, df): + df.groupby(["key1", "key2"]).count() + + def time_multi_int_nunique(self, df): + df.groupby(["key1", "key2"]).nunique() + + +class AggFunctions: + def setup_cache(self): + N = 10 ** 5 + fac1 = np.array(["A", "B", "C"], dtype="O") + fac2 = np.array(["one", "two"], dtype="O") + df = DataFrame( + { + "key1": fac1.take(np.random.randint(0, 3, size=N)), + "key2": fac2.take(np.random.randint(0, 2, size=N)), + "value1": np.random.randn(N), + "value2": np.random.randn(N), + "value3": np.random.randn(N), + } + ) + return df + + def time_different_str_functions(self, df): + df.groupby(["key1", "key2"]).agg( + {"value1": "mean", "value2": "var", "value3": "sum"} + ) + + def time_different_numpy_functions(self, df): + df.groupby(["key1", "key2"]).agg( + {"value1": np.mean, "value2": np.var, "value3": np.sum} + ) + + def time_different_python_functions_multicol(self, df): + df.groupby(["key1", "key2"]).agg([sum, min, max]) + + def time_different_python_functions_singlecol(self, df): + df.groupby("key1").agg([sum, min, max]) + + +class GroupStrings: + def setup(self): + n = 2 * 10 ** 5 + alpha = list(map("".join, product(ascii_letters, repeat=4))) + data = np.random.choice(alpha, (n // 5, 4), replace=False) + data = np.repeat(data, 5, axis=0) + self.df = DataFrame(data, columns=list("abcd")) + self.df["joe"] = (np.random.randn(len(self.df)) * 10).round(3) + self.df = self.df.sample(frac=1).reset_index(drop=True) + + def time_multi_columns(self): + self.df.groupby(list("abcd")).max() + + +class MultiColumn: + def setup_cache(self): + N = 10 ** 5 + key1 = np.tile(np.arange(100, dtype=object), 1000) + key2 = key1.copy() + np.random.shuffle(key1) + np.random.shuffle(key2) + df = DataFrame( + { + "key1": key1, + "key2": key2, + "data1": np.random.randn(N), + "data2": np.random.randn(N), + } + ) + return df + + def time_lambda_sum(self, df): + df.groupby(["key1", "key2"]).agg(lambda x: x.values.sum()) + + def time_cython_sum(self, df): + df.groupby(["key1", "key2"]).sum() + + def time_col_select_lambda_sum(self, df): + df.groupby(["key1", "key2"])["data1"].agg(lambda x: x.values.sum()) + + def time_col_select_numpy_sum(self, df): + df.groupby(["key1", "key2"])["data1"].agg(np.sum) + + +class Size: + def setup(self): + n = 10 ** 5 + offsets = np.random.randint(n, size=n).astype("timedelta64[ns]") + dates = np.datetime64("now") + offsets + self.df = DataFrame( + { + "key1": np.random.randint(0, 500, size=n), + "key2": np.random.randint(0, 100, size=n), + "value1": np.random.randn(n), + "value2": np.random.randn(n), + "value3": np.random.randn(n), + "dates": dates, + } + ) + self.draws = Series(np.random.randn(n)) + labels = Series(["foo", "bar", "baz", "qux"] * (n // 4)) + self.cats = labels.astype("category") + + def time_multi_size(self): + self.df.groupby(["key1", "key2"]).size() + + def time_category_size(self): + self.draws.groupby(self.cats).size() + + +class GroupByMethods: + + param_names = ["dtype", "method", "application"] + params = [ + ["int", "float", "object", "datetime"], + [ + "all", + "any", + "bfill", + "count", + "cumcount", + "cummax", + "cummin", + "cumprod", + "cumsum", + "describe", + "ffill", + "first", + "head", + "last", + "mad", + "max", + "min", + "median", + "mean", + "nunique", + "pct_change", + "prod", + "quantile", + "rank", + "sem", + "shift", + "size", + "skew", + "std", + "sum", + "tail", + "unique", + "value_counts", + "var", + ], + ["direct", "transformation"], + ] + + def setup(self, dtype, method, application): + if method in method_blacklist.get(dtype, {}): + raise NotImplementedError # skip benchmark + ngroups = 1000 + size = ngroups * 2 + rng = np.arange(ngroups) + values = rng.take(np.random.randint(0, ngroups, size=size)) + if dtype == "int": + key = np.random.randint(0, size, size=size) + elif dtype == "float": + key = np.concatenate( + [np.random.random(ngroups) * 0.1, np.random.random(ngroups) * 10.0] + ) + elif dtype == "object": + key = ["foo"] * size + elif dtype == "datetime": + key = date_range("1/1/2011", periods=size, freq="s") + + df = DataFrame({"values": values, "key": key}) + + if application == "transform": + if method == "describe": + raise NotImplementedError + + self.as_group_method = lambda: df.groupby("key")["values"].transform(method) + self.as_field_method = lambda: df.groupby("values")["key"].transform(method) + else: + self.as_group_method = getattr(df.groupby("key")["values"], method) + self.as_field_method = getattr(df.groupby("values")["key"], method) + + def time_dtype_as_group(self, dtype, method, application): + self.as_group_method() + + def time_dtype_as_field(self, dtype, method, application): + self.as_field_method() + + +class RankWithTies: + # GH 21237 + param_names = ["dtype", "tie_method"] + params = [ + ["float64", "float32", "int64", "datetime64"], + ["first", "average", "dense", "min", "max"], + ] + + def setup(self, dtype, tie_method): + N = 10 ** 4 + if dtype == "datetime64": + data = np.array([Timestamp("2011/01/01")] * N, dtype=dtype) + else: + data = np.array([1] * N, dtype=dtype) + self.df = DataFrame({"values": data, "key": ["foo"] * N}) + + def time_rank_ties(self, dtype, tie_method): + self.df.groupby("key").rank(method=tie_method) + + +class Float32: + # GH 13335 + def setup(self): + tmp1 = (np.random.random(10000) * 0.1).astype(np.float32) + tmp2 = (np.random.random(10000) * 10.0).astype(np.float32) + tmp = np.concatenate((tmp1, tmp2)) + arr = np.repeat(tmp, 10) + self.df = DataFrame(dict(a=arr, b=arr)) + + def time_sum(self): + self.df.groupby(["a"])["b"].sum() + + +class Categories: + def setup(self): + N = 10 ** 5 + arr = np.random.random(N) + data = {"a": Categorical(np.random.randint(10000, size=N)), "b": arr} + self.df = DataFrame(data) + data = { + "a": Categorical(np.random.randint(10000, size=N), ordered=True), + "b": arr, + } + self.df_ordered = DataFrame(data) + data = { + "a": Categorical( + np.random.randint(100, size=N), categories=np.arange(10000) + ), + "b": arr, + } + self.df_extra_cat = DataFrame(data) + + def time_groupby_sort(self): + self.df.groupby("a")["b"].count() + + def time_groupby_nosort(self): + self.df.groupby("a", sort=False)["b"].count() + + def time_groupby_ordered_sort(self): + self.df_ordered.groupby("a")["b"].count() + + def time_groupby_ordered_nosort(self): + self.df_ordered.groupby("a", sort=False)["b"].count() + + def time_groupby_extra_cat_sort(self): + self.df_extra_cat.groupby("a")["b"].count() + + def time_groupby_extra_cat_nosort(self): + self.df_extra_cat.groupby("a", sort=False)["b"].count() + + +class Datelike: + # GH 14338 + params = ["period_range", "date_range", "date_range_tz"] + param_names = ["grouper"] + + def setup(self, grouper): + N = 10 ** 4 + rng_map = { + "period_range": period_range, + "date_range": date_range, + "date_range_tz": partial(date_range, tz="US/Central"), + } + self.grouper = rng_map[grouper]("1900-01-01", freq="D", periods=N) + self.df = DataFrame(np.random.randn(10 ** 4, 2)) + + def time_sum(self, grouper): + self.df.groupby(self.grouper).sum() + + +class SumBools: + # GH 2692 + def setup(self): + N = 500 + self.df = DataFrame({"ii": range(N), "bb": [True] * N}) + + def time_groupby_sum_booleans(self): + self.df.groupby("ii").sum() + + +class SumMultiLevel: + # GH 9049 + timeout = 120.0 + + def setup(self): + N = 50 + self.df = DataFrame( + {"A": list(range(N)) * 2, "B": range(N * 2), "C": 1} + ).set_index(["A", "B"]) + + def time_groupby_sum_multiindex(self): + self.df.groupby(level=[0, 1]).sum() + + +class Transform: + def setup(self): + n1 = 400 + n2 = 250 + index = MultiIndex( + levels=[np.arange(n1), tm.makeStringIndex(n2)], + codes=[np.repeat(range(n1), n2).tolist(), list(range(n2)) * n1], + names=["lev1", "lev2"], + ) + arr = np.random.randn(n1 * n2, 3) + arr[::10000, 0] = np.nan + arr[1::10000, 1] = np.nan + arr[2::10000, 2] = np.nan + data = DataFrame(arr, index=index, columns=["col1", "col20", "col3"]) + self.df = data + + n = 20000 + self.df1 = DataFrame( + np.random.randint(1, n, (n, 3)), columns=["jim", "joe", "jolie"] + ) + self.df2 = self.df1.copy() + self.df2["jim"] = self.df2["joe"] + + self.df3 = DataFrame( + np.random.randint(1, (n / 10), (n, 3)), columns=["jim", "joe", "jolie"] + ) + self.df4 = self.df3.copy() + self.df4["jim"] = self.df4["joe"] + + def time_transform_lambda_max(self): + self.df.groupby(level="lev1").transform(lambda x: max(x)) + + def time_transform_ufunc_max(self): + self.df.groupby(level="lev1").transform(np.max) + + def time_transform_multi_key1(self): + self.df1.groupby(["jim", "joe"])["jolie"].transform("max") + + def time_transform_multi_key2(self): + self.df2.groupby(["jim", "joe"])["jolie"].transform("max") + + def time_transform_multi_key3(self): + self.df3.groupby(["jim", "joe"])["jolie"].transform("max") + + def time_transform_multi_key4(self): + self.df4.groupby(["jim", "joe"])["jolie"].transform("max") + + +class TransformBools: + def setup(self): + N = 120000 + transition_points = np.sort(np.random.choice(np.arange(N), 1400)) + transitions = np.zeros(N, dtype=np.bool) + transitions[transition_points] = True + self.g = transitions.cumsum() + self.df = DataFrame({"signal": np.random.rand(N)}) + + def time_transform_mean(self): + self.df["signal"].groupby(self.g).transform(np.mean) + + +class TransformNaN: + # GH 12737 + def setup(self): + self.df_nans = DataFrame( + {"key": np.repeat(np.arange(1000), 10), "B": np.nan, "C": np.nan} + ) + self.df_nans.loc[4::10, "B":"C"] = 5 + + def time_first(self): + self.df_nans.groupby("key").transform("first") + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/index_cached_properties.py b/asv_bench/benchmarks/index_cached_properties.py new file mode 100644 index 00000000..13b33855 --- /dev/null +++ b/asv_bench/benchmarks/index_cached_properties.py @@ -0,0 +1,75 @@ +import pandas as pd + + +class IndexCache: + number = 1 + repeat = (3, 100, 20) + + params = [ + [ + "DatetimeIndex", + "Float64Index", + "IntervalIndex", + "Int64Index", + "MultiIndex", + "PeriodIndex", + "RangeIndex", + "TimedeltaIndex", + "UInt64Index", + ] + ] + param_names = ["index_type"] + + def setup(self, index_type): + N = 10 ** 5 + if index_type == "MultiIndex": + self.idx = pd.MultiIndex.from_product( + [pd.date_range("1/1/2000", freq="T", periods=N // 2), ["a", "b"]] + ) + elif index_type == "DatetimeIndex": + self.idx = pd.date_range("1/1/2000", freq="T", periods=N) + elif index_type == "Int64Index": + self.idx = pd.Index(range(N)) + elif index_type == "PeriodIndex": + self.idx = pd.period_range("1/1/2000", freq="T", periods=N) + elif index_type == "RangeIndex": + self.idx = pd.RangeIndex(start=0, stop=N) + elif index_type == "IntervalIndex": + self.idx = pd.IntervalIndex.from_arrays(range(N), range(1, N + 1)) + elif index_type == "TimedeltaIndex": + self.idx = pd.TimedeltaIndex(range(N)) + elif index_type == "Float64Index": + self.idx = pd.Float64Index(range(N)) + elif index_type == "UInt64Index": + self.idx = pd.UInt64Index(range(N)) + else: + raise ValueError + assert len(self.idx) == N + self.idx._cache = {} + + def time_values(self, index_type): + self.idx._values + + def time_shape(self, index_type): + self.idx.shape + + def time_is_monotonic(self, index_type): + self.idx.is_monotonic + + def time_is_monotonic_decreasing(self, index_type): + self.idx.is_monotonic_decreasing + + def time_is_monotonic_increasing(self, index_type): + self.idx.is_monotonic_increasing + + def time_is_unique(self, index_type): + self.idx.is_unique + + def time_engine(self, index_type): + self.idx._engine + + def time_inferred_type(self, index_type): + self.idx.inferred_type + + def time_is_all_dates(self, index_type): + self.idx.is_all_dates diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py new file mode 100644 index 00000000..10314154 --- /dev/null +++ b/asv_bench/benchmarks/index_object.py @@ -0,0 +1,262 @@ +import gc + +import numpy as np + +from pandas import ( + DatetimeIndex, + Float64Index, + Index, + IntervalIndex, + MultiIndex, + RangeIndex, + Series, + date_range, +) + +from .pandas_vb_common import tm + + +class SetOperations: + + params = ( + ["datetime", "date_string", "int", "strings"], + ["intersection", "union", "symmetric_difference"], + ) + param_names = ["dtype", "method"] + + def setup(self, dtype, method): + N = 10 ** 5 + dates_left = date_range("1/1/2000", periods=N, freq="T") + fmt = "%Y-%m-%d %H:%M:%S" + date_str_left = Index(dates_left.strftime(fmt)) + int_left = Index(np.arange(N)) + str_left = tm.makeStringIndex(N) + data = { + "datetime": {"left": dates_left, "right": dates_left[:-1]}, + "date_string": {"left": date_str_left, "right": date_str_left[:-1]}, + "int": {"left": int_left, "right": int_left[:-1]}, + "strings": {"left": str_left, "right": str_left[:-1]}, + } + self.left = data[dtype]["left"] + self.right = data[dtype]["right"] + + def time_operation(self, dtype, method): + getattr(self.left, method)(self.right) + + +class SetDisjoint: + def setup(self): + N = 10 ** 5 + B = N + 20000 + self.datetime_left = DatetimeIndex(range(N)) + self.datetime_right = DatetimeIndex(range(N, B)) + + def time_datetime_difference_disjoint(self): + self.datetime_left.difference(self.datetime_right) + + +class Datetime: + def setup(self): + self.dr = date_range("20000101", freq="D", periods=10000) + + def time_is_dates_only(self): + self.dr._is_dates_only + + +class Ops: + + params = ["float", "int"] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10 ** 6 + indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"} + self.index = getattr(tm, indexes[dtype])(N) + + def time_add(self, dtype): + self.index + 2 + + def time_subtract(self, dtype): + self.index - 2 + + def time_multiply(self, dtype): + self.index * 2 + + def time_divide(self, dtype): + self.index / 2 + + def time_modulo(self, dtype): + self.index % 2 + + +class Range: + def setup(self): + self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3) + self.idx_dec = RangeIndex(start=10 ** 7, stop=-1, step=-3) + + def time_max(self): + self.idx_inc.max() + + def time_max_trivial(self): + self.idx_dec.max() + + def time_min(self): + self.idx_dec.min() + + def time_min_trivial(self): + self.idx_inc.min() + + def time_get_loc_inc(self): + self.idx_inc.get_loc(900000) + + def time_get_loc_dec(self): + self.idx_dec.get_loc(100000) + + +class IndexEquals: + def setup(self): + idx_large_fast = RangeIndex(100000) + idx_small_slow = date_range(start="1/1/2012", periods=1) + self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow]) + + self.idx_non_object = RangeIndex(1) + + def time_non_object_equals_multiindex(self): + self.idx_non_object.equals(self.mi_large_slow) + + +class IndexAppend: + def setup(self): + + N = 10000 + self.range_idx = RangeIndex(0, 100) + self.int_idx = self.range_idx.astype(int) + self.obj_idx = self.int_idx.astype(str) + self.range_idxs = [] + self.int_idxs = [] + self.object_idxs = [] + for i in range(1, N): + r_idx = RangeIndex(i * 100, (i + 1) * 100) + self.range_idxs.append(r_idx) + i_idx = r_idx.astype(int) + self.int_idxs.append(i_idx) + o_idx = i_idx.astype(str) + self.object_idxs.append(o_idx) + + def time_append_range_list(self): + self.range_idx.append(self.range_idxs) + + def time_append_int_list(self): + self.int_idx.append(self.int_idxs) + + def time_append_obj_list(self): + self.obj_idx.append(self.object_idxs) + + +class Indexing: + + params = ["String", "Float", "Int"] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10 ** 6 + self.idx = getattr(tm, f"make{dtype}Index")(N) + self.array_mask = (np.arange(N) % 3) == 0 + self.series_mask = Series(self.array_mask) + self.sorted = self.idx.sort_values() + half = N // 2 + self.non_unique = self.idx[:half].append(self.idx[:half]) + self.non_unique_sorted = ( + self.sorted[:half].append(self.sorted[:half]).sort_values() + ) + self.key = self.sorted[N // 4] + + def time_boolean_array(self, dtype): + self.idx[self.array_mask] + + def time_boolean_series(self, dtype): + self.idx[self.series_mask] + + def time_get(self, dtype): + self.idx[1] + + def time_slice(self, dtype): + self.idx[:-1] + + def time_slice_step(self, dtype): + self.idx[::2] + + def time_get_loc(self, dtype): + self.idx.get_loc(self.key) + + def time_get_loc_sorted(self, dtype): + self.sorted.get_loc(self.key) + + def time_get_loc_non_unique(self, dtype): + self.non_unique.get_loc(self.key) + + def time_get_loc_non_unique_sorted(self, dtype): + self.non_unique_sorted.get_loc(self.key) + + +class Float64IndexMethod: + # GH 13166 + def setup(self): + N = 100000 + a = np.arange(N) + self.ind = Float64Index(a * 4.8000000418824129e-08) + + def time_get_loc(self): + self.ind.get_loc(0) + + +class IntervalIndexMethod: + # GH 24813 + params = [10 ** 3, 10 ** 5] + + def setup(self, N): + left = np.append(np.arange(N), np.array(0)) + right = np.append(np.arange(1, N + 1), np.array(1)) + self.intv = IntervalIndex.from_arrays(left, right) + self.intv._engine + + self.intv2 = IntervalIndex.from_arrays(left + 1, right + 1) + self.intv2._engine + + self.left = IntervalIndex.from_breaks(np.arange(N)) + self.right = IntervalIndex.from_breaks(np.arange(N - 3, 2 * N - 3)) + + def time_monotonic_inc(self, N): + self.intv.is_monotonic_increasing + + def time_is_unique(self, N): + self.intv.is_unique + + def time_intersection(self, N): + self.left.intersection(self.right) + + def time_intersection_one_duplicate(self, N): + self.intv.intersection(self.right) + + def time_intersection_both_duplicate(self, N): + self.intv.intersection(self.intv2) + + +class GC: + params = [1, 2, 5] + + def create_use_drop(self): + idx = Index(list(range(1000 * 1000))) + idx._engine + + def peakmem_gc_instances(self, N): + try: + gc.disable() + + for _ in range(N): + self.create_use_drop() + finally: + gc.enable() + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py new file mode 100644 index 00000000..087fe391 --- /dev/null +++ b/asv_bench/benchmarks/indexing.py @@ -0,0 +1,348 @@ +import warnings + +import numpy as np + +from pandas import ( + CategoricalIndex, + DataFrame, + Float64Index, + IndexSlice, + Int64Index, + IntervalIndex, + MultiIndex, + Series, + UInt64Index, + concat, + date_range, + option_context, + period_range, +) + +from .pandas_vb_common import tm + + +class NumericSeriesIndexing: + + params = [ + (Int64Index, UInt64Index, Float64Index), + ("unique_monotonic_inc", "nonunique_monotonic_inc"), + ] + param_names = ["index_dtype", "index_structure"] + + def setup(self, index, index_structure): + N = 10 ** 6 + indices = { + "unique_monotonic_inc": index(range(N)), + "nonunique_monotonic_inc": index( + list(range(55)) + [54] + list(range(55, N - 1)) + ), + } + self.data = Series(np.random.rand(N), index=indices[index_structure]) + self.array = np.arange(10000) + self.array_list = self.array.tolist() + + def time_getitem_scalar(self, index, index_structure): + self.data[800000] + + def time_getitem_slice(self, index, index_structure): + self.data[:800000] + + def time_getitem_list_like(self, index, index_structure): + self.data[[800000]] + + def time_getitem_array(self, index, index_structure): + self.data[self.array] + + def time_getitem_lists(self, index, index_structure): + self.data[self.array_list] + + def time_iloc_array(self, index, index_structure): + self.data.iloc[self.array] + + def time_iloc_list_like(self, index, index_structure): + self.data.iloc[[800000]] + + def time_iloc_scalar(self, index, index_structure): + self.data.iloc[800000] + + def time_iloc_slice(self, index, index_structure): + self.data.iloc[:800000] + + def time_loc_array(self, index, index_structure): + self.data.loc[self.array] + + def time_loc_list_like(self, index, index_structure): + self.data.loc[[800000]] + + def time_loc_scalar(self, index, index_structure): + self.data.loc[800000] + + def time_loc_slice(self, index, index_structure): + self.data.loc[:800000] + + +class NonNumericSeriesIndexing: + + params = [ + ("string", "datetime", "period"), + ("unique_monotonic_inc", "nonunique_monotonic_inc", "non_monotonic"), + ] + param_names = ["index_dtype", "index_structure"] + + def setup(self, index, index_structure): + N = 10 ** 6 + if index == "string": + index = tm.makeStringIndex(N) + elif index == "datetime": + index = date_range("1900", periods=N, freq="s") + elif index == "period": + index = period_range("1900", periods=N, freq="s") + index = index.sort_values() + assert index.is_unique and index.is_monotonic_increasing + if index_structure == "nonunique_monotonic_inc": + index = index.insert(item=index[2], loc=2)[:-1] + elif index_structure == "non_monotonic": + index = index[::2].append(index[1::2]) + assert len(index) == N + self.s = Series(np.random.rand(N), index=index) + self.lbl = index[80000] + # warm up index mapping + self.s[self.lbl] + + def time_getitem_label_slice(self, index, index_structure): + self.s[: self.lbl] + + def time_getitem_pos_slice(self, index, index_structure): + self.s[:80000] + + def time_getitem_scalar(self, index, index_structure): + self.s[self.lbl] + + def time_getitem_list_like(self, index, index_structure): + self.s[[self.lbl]] + + +class DataFrameStringIndexing: + def setup(self): + index = tm.makeStringIndex(1000) + columns = tm.makeStringIndex(30) + with warnings.catch_warnings(record=True): + self.df = DataFrame(np.random.randn(1000, 30), index=index, columns=columns) + self.idx_scalar = index[100] + self.col_scalar = columns[10] + self.bool_indexer = self.df[self.col_scalar] > 0 + self.bool_obj_indexer = self.bool_indexer.astype(object) + self.boolean_indexer = (self.df[self.col_scalar] > 0).astype("boolean") + + def time_loc(self): + self.df.loc[self.idx_scalar, self.col_scalar] + + def time_getitem_scalar(self): + self.df[self.col_scalar][self.idx_scalar] + + def time_boolean_rows(self): + self.df[self.bool_indexer] + + def time_boolean_rows_object(self): + self.df[self.bool_obj_indexer] + + def time_boolean_rows_boolean(self): + self.df[self.boolean_indexer] + + +class DataFrameNumericIndexing: + def setup(self): + self.idx_dupe = np.array(range(30)) * 99 + self.df = DataFrame(np.random.randn(10000, 5)) + self.df_dup = concat([self.df, 2 * self.df, 3 * self.df]) + self.bool_indexer = [True] * 5000 + [False] * 5000 + + def time_iloc_dups(self): + self.df_dup.iloc[self.idx_dupe] + + def time_loc_dups(self): + self.df_dup.loc[self.idx_dupe] + + def time_iloc(self): + self.df.iloc[:100, 0] + + def time_loc(self): + self.df.loc[:100, 0] + + def time_bool_indexer(self): + self.df[self.bool_indexer] + + +class Take: + + params = ["int", "datetime"] + param_names = ["index"] + + def setup(self, index): + N = 100000 + indexes = { + "int": Int64Index(np.arange(N)), + "datetime": date_range("2011-01-01", freq="S", periods=N), + } + index = indexes[index] + self.s = Series(np.random.rand(N), index=index) + self.indexer = [True, False, True, True, False] * 20000 + + def time_take(self, index): + self.s.take(self.indexer) + + +class MultiIndexing: + def setup(self): + mi = MultiIndex.from_product([range(1000), range(1000)]) + self.s = Series(np.random.randn(1000000), index=mi) + self.df = DataFrame(self.s) + + n = 100000 + with warnings.catch_warnings(record=True): + self.mdt = DataFrame( + { + "A": np.random.choice(range(10000, 45000, 1000), n), + "B": np.random.choice(range(10, 400), n), + "C": np.random.choice(range(1, 150), n), + "D": np.random.choice(range(10000, 45000), n), + "x": np.random.choice(range(400), n), + "y": np.random.choice(range(25), n), + } + ) + self.idx = IndexSlice[20000:30000, 20:30, 35:45, 30000:40000] + self.mdt = self.mdt.set_index(["A", "B", "C", "D"]).sort_index() + + def time_index_slice(self): + self.mdt.loc[self.idx, :] + + +class IntervalIndexing: + def setup_cache(self): + idx = IntervalIndex.from_breaks(np.arange(1000001)) + monotonic = Series(np.arange(1000000), index=idx) + return monotonic + + def time_getitem_scalar(self, monotonic): + monotonic[80000] + + def time_loc_scalar(self, monotonic): + monotonic.loc[80000] + + def time_getitem_list(self, monotonic): + monotonic[80000:] + + def time_loc_list(self, monotonic): + monotonic.loc[80000:] + + +class CategoricalIndexIndexing: + + params = ["monotonic_incr", "monotonic_decr", "non_monotonic"] + param_names = ["index"] + + def setup(self, index): + N = 10 ** 5 + values = list("a" * N + "b" * N + "c" * N) + indices = { + "monotonic_incr": CategoricalIndex(values), + "monotonic_decr": CategoricalIndex(reversed(values)), + "non_monotonic": CategoricalIndex(list("abc" * N)), + } + self.data = indices[index] + + self.int_scalar = 10000 + self.int_list = list(range(10000)) + + self.cat_scalar = "b" + self.cat_list = ["a", "c"] + + def time_getitem_scalar(self, index): + self.data[self.int_scalar] + + def time_getitem_slice(self, index): + self.data[: self.int_scalar] + + def time_getitem_list_like(self, index): + self.data[[self.int_scalar]] + + def time_getitem_list(self, index): + self.data[self.int_list] + + def time_getitem_bool_array(self, index): + self.data[self.data == self.cat_scalar] + + def time_get_loc_scalar(self, index): + self.data.get_loc(self.cat_scalar) + + def time_get_indexer_list(self, index): + self.data.get_indexer(self.cat_list) + + +class MethodLookup: + def setup_cache(self): + s = Series() + return s + + def time_lookup_iloc(self, s): + s.iloc + + def time_lookup_loc(self, s): + s.loc + + +class GetItemSingleColumn: + def setup(self): + self.df_string_col = DataFrame(np.random.randn(3000, 1), columns=["A"]) + self.df_int_col = DataFrame(np.random.randn(3000, 1)) + + def time_frame_getitem_single_column_label(self): + self.df_string_col["A"] + + def time_frame_getitem_single_column_int(self): + self.df_int_col[0] + + +class AssignTimeseriesIndex: + def setup(self): + N = 100000 + idx = date_range("1/1/2000", periods=N, freq="H") + self.df = DataFrame(np.random.randn(N, 1), columns=["A"], index=idx) + + def time_frame_assign_timeseries_index(self): + self.df["date"] = self.df.index + + +class InsertColumns: + def setup(self): + self.N = 10 ** 3 + self.df = DataFrame(index=range(self.N)) + + def time_insert(self): + np.random.seed(1234) + for i in range(100): + self.df.insert(0, i, np.random.randn(self.N), allow_duplicates=True) + + def time_assign_with_setitem(self): + np.random.seed(1234) + for i in range(100): + self.df[i] = np.random.randn(self.N) + + +class ChainIndexing: + + params = [None, "warn"] + param_names = ["mode"] + + def setup(self, mode): + self.N = 1000000 + + def time_chained_indexing(self, mode): + with warnings.catch_warnings(record=True): + with option_context("mode.chained_assignment", mode): + df = DataFrame({"A": np.arange(self.N), "B": "foo"}) + df2 = df[df.A > self.N // 2] + df2["C"] = 1.0 + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/indexing_engines.py b/asv_bench/benchmarks/indexing_engines.py new file mode 100644 index 00000000..44a22dfa --- /dev/null +++ b/asv_bench/benchmarks/indexing_engines.py @@ -0,0 +1,71 @@ +import numpy as np + +from pandas._libs import index as libindex + + +def _get_numeric_engines(): + engine_names = [ + ("Int64Engine", np.int64), + ("Int32Engine", np.int32), + ("Int16Engine", np.int16), + ("Int8Engine", np.int8), + ("UInt64Engine", np.uint64), + ("UInt32Engine", np.uint32), + ("UInt16engine", np.uint16), + ("UInt8Engine", np.uint8), + ("Float64Engine", np.float64), + ("Float32Engine", np.float32), + ] + return [ + (getattr(libindex, engine_name), dtype) + for engine_name, dtype in engine_names + if hasattr(libindex, engine_name) + ] + + +class NumericEngineIndexing: + + params = [ + _get_numeric_engines(), + ["monotonic_incr", "monotonic_decr", "non_monotonic"], + ] + param_names = ["engine_and_dtype", "index_type"] + + def setup(self, engine_and_dtype, index_type): + engine, dtype = engine_and_dtype + N = 10 ** 5 + values = list([1] * N + [2] * N + [3] * N) + arr = { + "monotonic_incr": np.array(values, dtype=dtype), + "monotonic_decr": np.array(list(reversed(values)), dtype=dtype), + "non_monotonic": np.array([1, 2, 3] * N, dtype=dtype), + }[index_type] + + self.data = engine(lambda: arr, len(arr)) + # code belows avoids populating the mapping etc. while timing. + self.data.get_loc(2) + + def time_get_loc(self, engine_and_dtype, index_type): + self.data.get_loc(2) + + +class ObjectEngineIndexing: + + params = [("monotonic_incr", "monotonic_decr", "non_monotonic")] + param_names = ["index_type"] + + def setup(self, index_type): + N = 10 ** 5 + values = list("a" * N + "b" * N + "c" * N) + arr = { + "monotonic_incr": np.array(values, dtype=object), + "monotonic_decr": np.array(list(reversed(values)), dtype=object), + "non_monotonic": np.array(list("abc") * N, dtype=object), + }[index_type] + + self.data = libindex.ObjectEngine(lambda: arr, len(arr)) + # code belows avoids populating the mapping etc. while timing. + self.data.get_loc("b") + + def time_get_loc(self, index_type): + self.data.get_loc("b") diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py new file mode 100644 index 00000000..1a8d5ede --- /dev/null +++ b/asv_bench/benchmarks/inference.py @@ -0,0 +1,123 @@ +import numpy as np + +from pandas import DataFrame, Series, to_numeric + +from .pandas_vb_common import lib, numeric_dtypes, tm + + +class NumericInferOps: + # from GH 7332 + params = numeric_dtypes + param_names = ["dtype"] + + def setup(self, dtype): + N = 5 * 10 ** 5 + self.df = DataFrame( + {"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)} + ) + + def time_add(self, dtype): + self.df["A"] + self.df["B"] + + def time_subtract(self, dtype): + self.df["A"] - self.df["B"] + + def time_multiply(self, dtype): + self.df["A"] * self.df["B"] + + def time_divide(self, dtype): + self.df["A"] / self.df["B"] + + def time_modulo(self, dtype): + self.df["A"] % self.df["B"] + + +class DateInferOps: + # from GH 7332 + def setup_cache(self): + N = 5 * 10 ** 5 + df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")}) + df["timedelta"] = df["datetime64"] - df["datetime64"] + return df + + def time_subtract_datetimes(self, df): + df["datetime64"] - df["datetime64"] + + def time_timedelta_plus_datetime(self, df): + df["timedelta"] + df["datetime64"] + + def time_add_timedeltas(self, df): + df["timedelta"] + df["timedelta"] + + +class ToNumeric: + + params = ["ignore", "coerce"] + param_names = ["errors"] + + def setup(self, errors): + N = 10000 + self.float = Series(np.random.randn(N)) + self.numstr = self.float.astype("str") + self.str = Series(tm.makeStringIndex(N)) + + def time_from_float(self, errors): + to_numeric(self.float, errors=errors) + + def time_from_numeric_str(self, errors): + to_numeric(self.numstr, errors=errors) + + def time_from_str(self, errors): + to_numeric(self.str, errors=errors) + + +class ToNumericDowncast: + + param_names = ["dtype", "downcast"] + params = [ + [ + "string-float", + "string-int", + "string-nint", + "datetime64", + "int-list", + "int32", + ], + [None, "integer", "signed", "unsigned", "float"], + ] + + N = 500000 + N2 = int(N / 2) + + data_dict = { + "string-int": ["1"] * N2 + [2] * N2, + "string-nint": ["-1"] * N2 + [2] * N2, + "datetime64": np.repeat( + np.array(["1970-01-01", "1970-01-02"], dtype="datetime64[D]"), N + ), + "string-float": ["1.1"] * N2 + [2] * N2, + "int-list": [1] * N2 + [2] * N2, + "int32": np.repeat(np.int32(1), N), + } + + def setup(self, dtype, downcast): + self.data = self.data_dict[dtype] + + def time_downcast(self, dtype, downcast): + to_numeric(self.data, downcast=downcast) + + +class MaybeConvertNumeric: + def setup_cache(self): + N = 10 ** 6 + arr = np.repeat([2 ** 63], N) + np.arange(N).astype("uint64") + data = arr.astype(object) + data[1::2] = arr[1::2].astype(str) + data[-1] = -1 + return data + + def time_convert(self, data): + lib.maybe_convert_numeric(data, set(), coerce_numeric=False) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/__init__.py b/asv_bench/benchmarks/io/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py new file mode 100644 index 00000000..9bcd125f --- /dev/null +++ b/asv_bench/benchmarks/io/csv.py @@ -0,0 +1,407 @@ +from io import StringIO +import random +import string + +import numpy as np + +from pandas import Categorical, DataFrame, date_range, read_csv, to_datetime + +from ..pandas_vb_common import BaseIO, tm + + +class ToCSV(BaseIO): + + fname = "__test__.csv" + params = ["wide", "long", "mixed"] + param_names = ["kind"] + + def setup(self, kind): + wide_frame = DataFrame(np.random.randn(3000, 30)) + long_frame = DataFrame( + { + "A": np.arange(50000), + "B": np.arange(50000) + 1.0, + "C": np.arange(50000) + 2.0, + "D": np.arange(50000) + 3.0, + } + ) + mixed_frame = DataFrame( + { + "float": np.random.randn(5000), + "int": np.random.randn(5000).astype(int), + "bool": (np.arange(5000) % 2) == 0, + "datetime": date_range("2001", freq="s", periods=5000), + "object": ["foo"] * 5000, + } + ) + mixed_frame.loc[30:500, "float"] = np.nan + data = {"wide": wide_frame, "long": long_frame, "mixed": mixed_frame} + self.df = data[kind] + + def time_frame(self, kind): + self.df.to_csv(self.fname) + + +class ToCSVDatetime(BaseIO): + + fname = "__test__.csv" + + def setup(self): + rng = date_range("1/1/2000", periods=1000) + self.data = DataFrame(rng, index=rng) + + def time_frame_date_formatting(self): + self.data.to_csv(self.fname, date_format="%Y%m%d") + + +class ToCSVDatetimeBig(BaseIO): + + fname = "__test__.csv" + timeout = 1500 + params = [1000, 10000, 100000] + param_names = ["obs"] + + def setup(self, obs): + d = "2018-11-29" + dt = "2018-11-26 11:18:27.0" + self.data = DataFrame( + { + "dt": [np.datetime64(dt)] * obs, + "d": [np.datetime64(d)] * obs, + "r": [np.random.uniform()] * obs, + } + ) + + def time_frame(self, obs): + self.data.to_csv(self.fname) + + +class StringIORewind: + def data(self, stringio_object): + stringio_object.seek(0) + return stringio_object + + +class ReadCSVDInferDatetimeFormat(StringIORewind): + + params = ([True, False], ["custom", "iso8601", "ymd"]) + param_names = ["infer_datetime_format", "format"] + + def setup(self, infer_datetime_format, format): + rng = date_range("1/1/2000", periods=1000) + formats = { + "custom": "%m/%d/%Y %H:%M:%S.%f", + "iso8601": "%Y-%m-%d %H:%M:%S", + "ymd": "%Y%m%d", + } + dt_format = formats[format] + self.StringIO_input = StringIO("\n".join(rng.strftime(dt_format).tolist())) + + def time_read_csv(self, infer_datetime_format, format): + read_csv( + self.data(self.StringIO_input), + header=None, + names=["foo"], + parse_dates=["foo"], + infer_datetime_format=infer_datetime_format, + ) + + +class ReadCSVConcatDatetime(StringIORewind): + + iso8601 = "%Y-%m-%d %H:%M:%S" + + def setup(self): + rng = date_range("1/1/2000", periods=50000, freq="S") + self.StringIO_input = StringIO("\n".join(rng.strftime(self.iso8601).tolist())) + + def time_read_csv(self): + read_csv( + self.data(self.StringIO_input), + header=None, + names=["foo"], + parse_dates=["foo"], + infer_datetime_format=False, + ) + + +class ReadCSVConcatDatetimeBadDateValue(StringIORewind): + + params = (["nan", "0", ""],) + param_names = ["bad_date_value"] + + def setup(self, bad_date_value): + self.StringIO_input = StringIO((f"{bad_date_value},\n") * 50000) + + def time_read_csv(self, bad_date_value): + read_csv( + self.data(self.StringIO_input), + header=None, + names=["foo", "bar"], + parse_dates=["foo"], + infer_datetime_format=False, + ) + + +class ReadCSVSkipRows(BaseIO): + + fname = "__test__.csv" + params = [None, 10000] + param_names = ["skiprows"] + + def setup(self, skiprows): + N = 20000 + index = tm.makeStringIndex(N) + df = DataFrame( + { + "float1": np.random.randn(N), + "float2": np.random.randn(N), + "string1": ["foo"] * N, + "bool1": [True] * N, + "int1": np.random.randint(0, N, size=N), + }, + index=index, + ) + df.to_csv(self.fname) + + def time_skipprows(self, skiprows): + read_csv(self.fname, skiprows=skiprows) + + +class ReadUint64Integers(StringIORewind): + def setup(self): + self.na_values = [2 ** 63 + 500] + arr = np.arange(10000).astype("uint64") + 2 ** 63 + self.data1 = StringIO("\n".join(arr.astype(str).tolist())) + arr = arr.astype(object) + arr[500] = -1 + self.data2 = StringIO("\n".join(arr.astype(str).tolist())) + + def time_read_uint64(self): + read_csv(self.data(self.data1), header=None, names=["foo"]) + + def time_read_uint64_neg_values(self): + read_csv(self.data(self.data2), header=None, names=["foo"]) + + def time_read_uint64_na_values(self): + read_csv( + self.data(self.data1), header=None, names=["foo"], na_values=self.na_values + ) + + +class ReadCSVThousands(BaseIO): + + fname = "__test__.csv" + params = ([",", "|"], [None, ","]) + param_names = ["sep", "thousands"] + + def setup(self, sep, thousands): + N = 10000 + K = 8 + data = np.random.randn(N, K) * np.random.randint(100, 10000, (N, K)) + df = DataFrame(data) + if thousands is not None: + fmt = f":{thousands}" + fmt = "{" + fmt + "}" + df = df.applymap(lambda x: fmt.format(x)) + df.to_csv(self.fname, sep=sep) + + def time_thousands(self, sep, thousands): + read_csv(self.fname, sep=sep, thousands=thousands) + + +class ReadCSVComment(StringIORewind): + def setup(self): + data = ["A,B,C"] + (["1,2,3 # comment"] * 100000) + self.StringIO_input = StringIO("\n".join(data)) + + def time_comment(self): + read_csv( + self.data(self.StringIO_input), comment="#", header=None, names=list("abc") + ) + + +class ReadCSVFloatPrecision(StringIORewind): + + params = ([",", ";"], [".", "_"], [None, "high", "round_trip"]) + param_names = ["sep", "decimal", "float_precision"] + + def setup(self, sep, decimal, float_precision): + floats = [ + "".join(random.choice(string.digits) for _ in range(28)) for _ in range(15) + ] + rows = sep.join([f"0{decimal}" + "{}"] * 3) + "\n" + data = rows * 5 + data = data.format(*floats) * 200 # 1000 x 3 strings csv + self.StringIO_input = StringIO(data) + + def time_read_csv(self, sep, decimal, float_precision): + read_csv( + self.data(self.StringIO_input), + sep=sep, + header=None, + names=list("abc"), + float_precision=float_precision, + ) + + def time_read_csv_python_engine(self, sep, decimal, float_precision): + read_csv( + self.data(self.StringIO_input), + sep=sep, + header=None, + engine="python", + float_precision=None, + names=list("abc"), + ) + + +class ReadCSVCategorical(BaseIO): + + fname = "__test__.csv" + + def setup(self): + N = 100000 + group1 = ["aaaaaaaa", "bbbbbbb", "cccccccc", "dddddddd", "eeeeeeee"] + df = DataFrame(np.random.choice(group1, (N, 3)), columns=list("abc")) + df.to_csv(self.fname, index=False) + + def time_convert_post(self): + read_csv(self.fname).apply(Categorical) + + def time_convert_direct(self): + read_csv(self.fname, dtype="category") + + +class ReadCSVParseDates(StringIORewind): + def setup(self): + data = """{},19:00:00,18:56:00,0.8100,2.8100,7.2000,0.0000,280.0000\n + {},20:00:00,19:56:00,0.0100,2.2100,7.2000,0.0000,260.0000\n + {},21:00:00,20:56:00,-0.5900,2.2100,5.7000,0.0000,280.0000\n + {},21:00:00,21:18:00,-0.9900,2.0100,3.6000,0.0000,270.0000\n + {},22:00:00,21:56:00,-0.5900,1.7100,5.1000,0.0000,290.0000\n + """ + two_cols = ["KORD,19990127"] * 5 + data = data.format(*two_cols) + self.StringIO_input = StringIO(data) + + def time_multiple_date(self): + read_csv( + self.data(self.StringIO_input), + sep=",", + header=None, + names=list(string.digits[:9]), + parse_dates=[[1, 2], [1, 3]], + ) + + def time_baseline(self): + read_csv( + self.data(self.StringIO_input), + sep=",", + header=None, + parse_dates=[1], + names=list(string.digits[:9]), + ) + + +class ReadCSVCachedParseDates(StringIORewind): + params = ([True, False],) + param_names = ["do_cache"] + + def setup(self, do_cache): + data = ("\n".join(f"10/{year}" for year in range(2000, 2100)) + "\n") * 10 + self.StringIO_input = StringIO(data) + + def time_read_csv_cached(self, do_cache): + try: + read_csv( + self.data(self.StringIO_input), + header=None, + parse_dates=[0], + cache_dates=do_cache, + ) + except TypeError: + # cache_dates is a new keyword in 0.25 + pass + + +class ReadCSVMemoryGrowth(BaseIO): + + chunksize = 20 + num_rows = 1000 + fname = "__test__.csv" + + def setup(self): + with open(self.fname, "w") as f: + for i in range(self.num_rows): + f.write(f"{i}\n") + + def mem_parser_chunks(self): + # see gh-24805. + result = read_csv(self.fname, chunksize=self.chunksize) + + for _ in result: + pass + + +class ReadCSVParseSpecialDate(StringIORewind): + params = (["mY", "mdY", "hm"],) + param_names = ["value"] + objects = { + "mY": "01-2019\n10-2019\n02/2000\n", + "mdY": "12/02/2010\n", + "hm": "21:34\n", + } + + def setup(self, value): + count_elem = 10000 + data = self.objects[value] * count_elem + self.StringIO_input = StringIO(data) + + def time_read_special_date(self, value): + read_csv( + self.data(self.StringIO_input), + sep=",", + header=None, + names=["Date"], + parse_dates=["Date"], + ) + + +class ParseDateComparison(StringIORewind): + params = ([False, True],) + param_names = ["cache_dates"] + + def setup(self, cache_dates): + count_elem = 10000 + data = "12-02-2010\n" * count_elem + self.StringIO_input = StringIO(data) + + def time_read_csv_dayfirst(self, cache_dates): + try: + read_csv( + self.data(self.StringIO_input), + sep=",", + header=None, + names=["Date"], + parse_dates=["Date"], + cache_dates=cache_dates, + dayfirst=True, + ) + except TypeError: + # cache_dates is a new keyword in 0.25 + pass + + def time_to_datetime_dayfirst(self, cache_dates): + df = read_csv( + self.data(self.StringIO_input), dtype={"date": str}, names=["date"] + ) + to_datetime(df["date"], cache=cache_dates, dayfirst=True) + + def time_to_datetime_format_DD_MM_YYYY(self, cache_dates): + df = read_csv( + self.data(self.StringIO_input), dtype={"date": str}, names=["date"] + ) + to_datetime(df["date"], cache=cache_dates, format="%d-%m-%Y") + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py new file mode 100644 index 00000000..80af2cff --- /dev/null +++ b/asv_bench/benchmarks/io/excel.py @@ -0,0 +1,73 @@ +from io import BytesIO + +import numpy as np +from odf.opendocument import OpenDocumentSpreadsheet +from odf.table import Table, TableCell, TableRow +from odf.text import P + +from pandas import DataFrame, ExcelWriter, date_range, read_excel + +from ..pandas_vb_common import tm + + +def _generate_dataframe(): + N = 2000 + C = 5 + df = DataFrame( + np.random.randn(N, C), + columns=[f"float{i}" for i in range(C)], + index=date_range("20000101", periods=N, freq="H"), + ) + df["object"] = tm.makeStringIndex(N) + return df + + +class WriteExcel: + + params = ["openpyxl", "xlsxwriter", "xlwt"] + param_names = ["engine"] + + def setup(self, engine): + self.df = _generate_dataframe() + + def time_write_excel(self, engine): + bio = BytesIO() + bio.seek(0) + writer = ExcelWriter(bio, engine=engine) + self.df.to_excel(writer, sheet_name="Sheet1") + writer.save() + + +class ReadExcel: + + params = ["xlrd", "openpyxl", "odf"] + param_names = ["engine"] + fname_excel = "spreadsheet.xlsx" + fname_odf = "spreadsheet.ods" + + def _create_odf(self): + doc = OpenDocumentSpreadsheet() + table = Table(name="Table1") + for row in self.df.values: + tr = TableRow() + for val in row: + tc = TableCell(valuetype="string") + tc.addElement(P(text=val)) + tr.addElement(tc) + table.addElement(tr) + + doc.spreadsheet.addElement(table) + doc.save(self.fname_odf) + + def setup_cache(self): + self.df = _generate_dataframe() + + self.df.to_excel(self.fname_excel, sheet_name="Sheet1") + self._create_odf() + + def time_read_excel(self, engine): + fname = self.fname_odf if engine == "odf" else self.fname_excel + read_excel(fname, engine=engine) + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py new file mode 100644 index 00000000..4ca399a2 --- /dev/null +++ b/asv_bench/benchmarks/io/hdf.py @@ -0,0 +1,130 @@ +import numpy as np + +from pandas import DataFrame, HDFStore, date_range, read_hdf + +from ..pandas_vb_common import BaseIO, tm + + +class HDFStoreDataFrame(BaseIO): + def setup(self): + N = 25000 + index = tm.makeStringIndex(N) + self.df = DataFrame( + {"float1": np.random.randn(N), "float2": np.random.randn(N)}, index=index + ) + self.df_mixed = DataFrame( + { + "float1": np.random.randn(N), + "float2": np.random.randn(N), + "string1": ["foo"] * N, + "bool1": [True] * N, + "int1": np.random.randint(0, N, size=N), + }, + index=index, + ) + self.df_wide = DataFrame(np.random.randn(N, 100)) + self.start_wide = self.df_wide.index[10000] + self.stop_wide = self.df_wide.index[15000] + self.df2 = DataFrame( + {"float1": np.random.randn(N), "float2": np.random.randn(N)}, + index=date_range("1/1/2000", periods=N), + ) + self.start = self.df2.index[10000] + self.stop = self.df2.index[15000] + self.df_wide2 = DataFrame( + np.random.randn(N, 100), index=date_range("1/1/2000", periods=N) + ) + self.df_dc = DataFrame( + np.random.randn(N, 10), columns=["C%03d" % i for i in range(10)] + ) + + self.fname = "__test__.h5" + + self.store = HDFStore(self.fname) + self.store.put("fixed", self.df) + self.store.put("fixed_mixed", self.df_mixed) + self.store.append("table", self.df2) + self.store.append("table_mixed", self.df_mixed) + self.store.append("table_wide", self.df_wide) + self.store.append("table_wide2", self.df_wide2) + + def teardown(self): + self.store.close() + self.remove(self.fname) + + def time_read_store(self): + self.store.get("fixed") + + def time_read_store_mixed(self): + self.store.get("fixed_mixed") + + def time_write_store(self): + self.store.put("fixed_write", self.df) + + def time_write_store_mixed(self): + self.store.put("fixed_mixed_write", self.df_mixed) + + def time_read_store_table_mixed(self): + self.store.select("table_mixed") + + def time_write_store_table_mixed(self): + self.store.append("table_mixed_write", self.df_mixed) + + def time_read_store_table(self): + self.store.select("table") + + def time_write_store_table(self): + self.store.append("table_write", self.df) + + def time_read_store_table_wide(self): + self.store.select("table_wide") + + def time_write_store_table_wide(self): + self.store.append("table_wide_write", self.df_wide) + + def time_write_store_table_dc(self): + self.store.append("table_dc_write", self.df_dc, data_columns=True) + + def time_query_store_table_wide(self): + self.store.select( + "table_wide", where="index > self.start_wide and index < self.stop_wide" + ) + + def time_query_store_table(self): + self.store.select("table", where="index > self.start and index < self.stop") + + def time_store_repr(self): + repr(self.store) + + def time_store_str(self): + str(self.store) + + def time_store_info(self): + self.store.info() + + +class HDF(BaseIO): + + params = ["table", "fixed"] + param_names = ["format"] + + def setup(self, format): + self.fname = "__test__.h5" + N = 100000 + C = 5 + self.df = DataFrame( + np.random.randn(N, C), + columns=[f"float{i}" for i in range(C)], + index=date_range("20000101", periods=N, freq="H"), + ) + self.df["object"] = tm.makeStringIndex(N) + self.df.to_hdf(self.fname, "df", format=format) + + def time_read_hdf(self, format): + read_hdf(self.fname, "df") + + def time_write_hdf(self, format): + self.df.to_hdf(self.fname, "df", format=format) + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py new file mode 100644 index 00000000..f478bf2a --- /dev/null +++ b/asv_bench/benchmarks/io/json.py @@ -0,0 +1,241 @@ +import numpy as np + +from pandas import DataFrame, concat, date_range, read_json, timedelta_range + +from ..pandas_vb_common import BaseIO, tm + + +class ReadJSON(BaseIO): + + fname = "__test__.json" + params = (["split", "index", "records"], ["int", "datetime"]) + param_names = ["orient", "index"] + + def setup(self, orient, index): + N = 100000 + indexes = { + "int": np.arange(N), + "datetime": date_range("20000101", periods=N, freq="H"), + } + df = DataFrame( + np.random.randn(N, 5), + columns=[f"float_{i}" for i in range(5)], + index=indexes[index], + ) + df.to_json(self.fname, orient=orient) + + def time_read_json(self, orient, index): + read_json(self.fname, orient=orient) + + +class ReadJSONLines(BaseIO): + + fname = "__test_lines__.json" + params = ["int", "datetime"] + param_names = ["index"] + + def setup(self, index): + N = 100000 + indexes = { + "int": np.arange(N), + "datetime": date_range("20000101", periods=N, freq="H"), + } + df = DataFrame( + np.random.randn(N, 5), + columns=[f"float_{i}" for i in range(5)], + index=indexes[index], + ) + df.to_json(self.fname, orient="records", lines=True) + + def time_read_json_lines(self, index): + read_json(self.fname, orient="records", lines=True) + + def time_read_json_lines_concat(self, index): + concat(read_json(self.fname, orient="records", lines=True, chunksize=25000)) + + def peakmem_read_json_lines(self, index): + read_json(self.fname, orient="records", lines=True) + + def peakmem_read_json_lines_concat(self, index): + concat(read_json(self.fname, orient="records", lines=True, chunksize=25000)) + + +class ToJSON(BaseIO): + + fname = "__test__.json" + params = [ + ["split", "columns", "index", "values", "records"], + ["df", "df_date_idx", "df_td_int_ts", "df_int_floats", "df_int_float_str"], + ] + param_names = ["orient", "frame"] + + def setup(self, orient, frame): + N = 10 ** 5 + ncols = 5 + index = date_range("20000101", periods=N, freq="H") + timedeltas = timedelta_range(start=1, periods=N, freq="s") + datetimes = date_range(start=1, periods=N, freq="s") + ints = np.random.randint(100000000, size=N) + floats = np.random.randn(N) + strings = tm.makeStringIndex(N) + self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N)) + self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index) + self.df_td_int_ts = DataFrame( + { + "td_1": timedeltas, + "td_2": timedeltas, + "int_1": ints, + "int_2": ints, + "ts_1": datetimes, + "ts_2": datetimes, + }, + index=index, + ) + self.df_int_floats = DataFrame( + { + "int_1": ints, + "int_2": ints, + "int_3": ints, + "float_1": floats, + "float_2": floats, + "float_3": floats, + }, + index=index, + ) + self.df_int_float_str = DataFrame( + { + "int_1": ints, + "int_2": ints, + "float_1": floats, + "float_2": floats, + "str_1": strings, + "str_2": strings, + }, + index=index, + ) + + def time_to_json(self, orient, frame): + getattr(self, frame).to_json(self.fname, orient=orient) + + def peakmem_to_json(self, orient, frame): + getattr(self, frame).to_json(self.fname, orient=orient) + + def time_to_json_wide(self, orient, frame): + base_df = getattr(self, frame).copy() + df = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1) + df.to_json(self.fname, orient=orient) + + def peakmem_to_json_wide(self, orient, frame): + base_df = getattr(self, frame).copy() + df = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1) + df.to_json(self.fname, orient=orient) + + +class ToJSONISO(BaseIO): + fname = "__test__.json" + params = [["split", "columns", "index", "values", "records"]] + param_names = ["orient"] + + def setup(self, orient): + N = 10 ** 5 + index = date_range("20000101", periods=N, freq="H") + timedeltas = timedelta_range(start=1, periods=N, freq="s") + datetimes = date_range(start=1, periods=N, freq="s") + self.df = DataFrame( + { + "td_1": timedeltas, + "td_2": timedeltas, + "ts_1": datetimes, + "ts_2": datetimes, + }, + index=index, + ) + + def time_iso_format(self, orient): + self.df.to_json(orient=orient, date_format="iso") + + +class ToJSONLines(BaseIO): + + fname = "__test__.json" + + def setup(self): + N = 10 ** 5 + ncols = 5 + index = date_range("20000101", periods=N, freq="H") + timedeltas = timedelta_range(start=1, periods=N, freq="s") + datetimes = date_range(start=1, periods=N, freq="s") + ints = np.random.randint(100000000, size=N) + floats = np.random.randn(N) + strings = tm.makeStringIndex(N) + self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N)) + self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index) + self.df_td_int_ts = DataFrame( + { + "td_1": timedeltas, + "td_2": timedeltas, + "int_1": ints, + "int_2": ints, + "ts_1": datetimes, + "ts_2": datetimes, + }, + index=index, + ) + self.df_int_floats = DataFrame( + { + "int_1": ints, + "int_2": ints, + "int_3": ints, + "float_1": floats, + "float_2": floats, + "float_3": floats, + }, + index=index, + ) + self.df_int_float_str = DataFrame( + { + "int_1": ints, + "int_2": ints, + "float_1": floats, + "float_2": floats, + "str_1": strings, + "str_2": strings, + }, + index=index, + ) + + def time_floats_with_int_idex_lines(self): + self.df.to_json(self.fname, orient="records", lines=True) + + def time_floats_with_dt_index_lines(self): + self.df_date_idx.to_json(self.fname, orient="records", lines=True) + + def time_delta_int_tstamp_lines(self): + self.df_td_int_ts.to_json(self.fname, orient="records", lines=True) + + def time_float_int_lines(self): + self.df_int_floats.to_json(self.fname, orient="records", lines=True) + + def time_float_int_str_lines(self): + self.df_int_float_str.to_json(self.fname, orient="records", lines=True) + + +class ToJSONMem: + def setup_cache(self): + df = DataFrame([[1]]) + frames = {"int": df, "float": df.astype(float)} + + return frames + + def peakmem_int(self, frames): + df = frames["int"] + for _ in range(100_000): + df.to_json() + + def peakmem_float(self, frames): + df = frames["float"] + for _ in range(100_000): + df.to_json() + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/parsers.py b/asv_bench/benchmarks/io/parsers.py new file mode 100644 index 00000000..c5e099bd --- /dev/null +++ b/asv_bench/benchmarks/io/parsers.py @@ -0,0 +1,42 @@ +import numpy as np + +try: + from pandas._libs.tslibs.parsing import ( + _concat_date_cols, + _does_string_look_like_datetime, + ) +except ImportError: + # Avoid whole benchmark suite import failure on asv (currently 0.4) + pass + + +class DoesStringLookLikeDatetime: + + params = (["2Q2005", "0.0", "10000"],) + param_names = ["value"] + + def setup(self, value): + self.objects = [value] * 1000000 + + def time_check_datetimes(self, value): + for obj in self.objects: + _does_string_look_like_datetime(obj) + + +class ConcatDateCols: + + params = ([1234567890, "AAAA"], [1, 2]) + param_names = ["value", "dim"] + + def setup(self, value, dim): + count_elem = 10000 + if dim == 1: + self.object = (np.array([value] * count_elem),) + if dim == 2: + self.object = ( + np.array([value] * count_elem), + np.array([value] * count_elem), + ) + + def time_check_concat(self, value, dim): + _concat_date_cols(self.object) diff --git a/asv_bench/benchmarks/io/pickle.py b/asv_bench/benchmarks/io/pickle.py new file mode 100644 index 00000000..4ca9a82a --- /dev/null +++ b/asv_bench/benchmarks/io/pickle.py @@ -0,0 +1,28 @@ +import numpy as np + +from pandas import DataFrame, date_range, read_pickle + +from ..pandas_vb_common import BaseIO, tm + + +class Pickle(BaseIO): + def setup(self): + self.fname = "__test__.pkl" + N = 100000 + C = 5 + self.df = DataFrame( + np.random.randn(N, C), + columns=[f"float{i}" for i in range(C)], + index=date_range("20000101", periods=N, freq="H"), + ) + self.df["object"] = tm.makeStringIndex(N) + self.df.to_pickle(self.fname) + + def time_read_pickle(self): + read_pickle(self.fname) + + def time_write_pickle(self): + self.df.to_pickle(self.fname) + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/sas.py b/asv_bench/benchmarks/io/sas.py new file mode 100644 index 00000000..5eaeb231 --- /dev/null +++ b/asv_bench/benchmarks/io/sas.py @@ -0,0 +1,30 @@ +import os + +from pandas import read_sas + + +class SAS: + + params = ["sas7bdat", "xport"] + param_names = ["format"] + + def setup(self, format): + # Read files that are located in 'pandas/io/tests/sas/data' + files = {"sas7bdat": "test1.sas7bdat", "xport": "paxraw_d_short.xpt"} + file = files[format] + paths = [ + os.path.dirname(__file__), + "..", + "..", + "..", + "pandas", + "tests", + "io", + "sas", + "data", + file, + ] + self.f = os.path.join(*paths) + + def time_read_sas(self, format): + read_sas(self.f, format=format) diff --git a/asv_bench/benchmarks/io/sql.py b/asv_bench/benchmarks/io/sql.py new file mode 100644 index 00000000..b71bb832 --- /dev/null +++ b/asv_bench/benchmarks/io/sql.py @@ -0,0 +1,146 @@ +import sqlite3 + +import numpy as np +from sqlalchemy import create_engine + +from pandas import DataFrame, date_range, read_sql_query, read_sql_table + +from ..pandas_vb_common import tm + + +class SQL: + + params = ["sqlalchemy", "sqlite"] + param_names = ["connection"] + + def setup(self, connection): + N = 10000 + con = { + "sqlalchemy": create_engine("sqlite:///:memory:"), + "sqlite": sqlite3.connect(":memory:"), + } + self.table_name = "test_type" + self.query_all = f"SELECT * FROM {self.table_name}" + self.con = con[connection] + self.df = DataFrame( + { + "float": np.random.randn(N), + "float_with_nan": np.random.randn(N), + "string": ["foo"] * N, + "bool": [True] * N, + "int": np.random.randint(0, N, size=N), + "datetime": date_range("2000-01-01", periods=N, freq="s"), + }, + index=tm.makeStringIndex(N), + ) + self.df.loc[1000:3000, "float_with_nan"] = np.nan + self.df["datetime_string"] = self.df["datetime"].astype(str) + self.df.to_sql(self.table_name, self.con, if_exists="replace") + + def time_to_sql_dataframe(self, connection): + self.df.to_sql("test1", self.con, if_exists="replace") + + def time_read_sql_query(self, connection): + read_sql_query(self.query_all, self.con) + + +class WriteSQLDtypes: + + params = ( + ["sqlalchemy", "sqlite"], + ["float", "float_with_nan", "string", "bool", "int", "datetime"], + ) + param_names = ["connection", "dtype"] + + def setup(self, connection, dtype): + N = 10000 + con = { + "sqlalchemy": create_engine("sqlite:///:memory:"), + "sqlite": sqlite3.connect(":memory:"), + } + self.table_name = "test_type" + self.query_col = f"SELECT {dtype} FROM {self.table_name}" + self.con = con[connection] + self.df = DataFrame( + { + "float": np.random.randn(N), + "float_with_nan": np.random.randn(N), + "string": ["foo"] * N, + "bool": [True] * N, + "int": np.random.randint(0, N, size=N), + "datetime": date_range("2000-01-01", periods=N, freq="s"), + }, + index=tm.makeStringIndex(N), + ) + self.df.loc[1000:3000, "float_with_nan"] = np.nan + self.df["datetime_string"] = self.df["datetime"].astype(str) + self.df.to_sql(self.table_name, self.con, if_exists="replace") + + def time_to_sql_dataframe_column(self, connection, dtype): + self.df[[dtype]].to_sql("test1", self.con, if_exists="replace") + + def time_read_sql_query_select_column(self, connection, dtype): + read_sql_query(self.query_col, self.con) + + +class ReadSQLTable: + def setup(self): + N = 10000 + self.table_name = "test" + self.con = create_engine("sqlite:///:memory:") + self.df = DataFrame( + { + "float": np.random.randn(N), + "float_with_nan": np.random.randn(N), + "string": ["foo"] * N, + "bool": [True] * N, + "int": np.random.randint(0, N, size=N), + "datetime": date_range("2000-01-01", periods=N, freq="s"), + }, + index=tm.makeStringIndex(N), + ) + self.df.loc[1000:3000, "float_with_nan"] = np.nan + self.df["datetime_string"] = self.df["datetime"].astype(str) + self.df.to_sql(self.table_name, self.con, if_exists="replace") + + def time_read_sql_table_all(self): + read_sql_table(self.table_name, self.con) + + def time_read_sql_table_parse_dates(self): + read_sql_table( + self.table_name, + self.con, + columns=["datetime_string"], + parse_dates=["datetime_string"], + ) + + +class ReadSQLTableDtypes: + + params = ["float", "float_with_nan", "string", "bool", "int", "datetime"] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10000 + self.table_name = "test" + self.con = create_engine("sqlite:///:memory:") + self.df = DataFrame( + { + "float": np.random.randn(N), + "float_with_nan": np.random.randn(N), + "string": ["foo"] * N, + "bool": [True] * N, + "int": np.random.randint(0, N, size=N), + "datetime": date_range("2000-01-01", periods=N, freq="s"), + }, + index=tm.makeStringIndex(N), + ) + self.df.loc[1000:3000, "float_with_nan"] = np.nan + self.df["datetime_string"] = self.df["datetime"].astype(str) + self.df.to_sql(self.table_name, self.con, if_exists="replace") + + def time_read_sql_table_column(self, dtype): + read_sql_table(self.table_name, self.con, columns=[dtype]) + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/stata.py b/asv_bench/benchmarks/io/stata.py new file mode 100644 index 00000000..9faafa82 --- /dev/null +++ b/asv_bench/benchmarks/io/stata.py @@ -0,0 +1,53 @@ +import numpy as np + +from pandas import DataFrame, date_range, read_stata + +from ..pandas_vb_common import BaseIO, tm + + +class Stata(BaseIO): + + params = ["tc", "td", "tm", "tw", "th", "tq", "ty"] + param_names = ["convert_dates"] + + def setup(self, convert_dates): + self.fname = "__test__.dta" + N = self.N = 100000 + C = self.C = 5 + self.df = DataFrame( + np.random.randn(N, C), + columns=[f"float{i}" for i in range(C)], + index=date_range("20000101", periods=N, freq="H"), + ) + self.df["object"] = tm.makeStringIndex(self.N) + self.df["int8_"] = np.random.randint( + np.iinfo(np.int8).min, np.iinfo(np.int8).max - 27, N + ) + self.df["int16_"] = np.random.randint( + np.iinfo(np.int16).min, np.iinfo(np.int16).max - 27, N + ) + self.df["int32_"] = np.random.randint( + np.iinfo(np.int32).min, np.iinfo(np.int32).max - 27, N + ) + self.df["float32_"] = np.array(np.random.randn(N), dtype=np.float32) + self.convert_dates = {"index": convert_dates} + self.df.to_stata(self.fname, self.convert_dates) + + def time_read_stata(self, convert_dates): + read_stata(self.fname) + + def time_write_stata(self, convert_dates): + self.df.to_stata(self.fname, self.convert_dates) + + +class StataMissing(Stata): + def setup(self, convert_dates): + super().setup(convert_dates) + for i in range(10): + missing_data = np.random.randn(self.N) + missing_data[missing_data < 0] = np.nan + self.df[f"missing_{i}"] = missing_data + self.df.to_stata(self.fname, self.convert_dates) + + +from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py new file mode 100644 index 00000000..1333b3a0 --- /dev/null +++ b/asv_bench/benchmarks/join_merge.py @@ -0,0 +1,386 @@ +import string + +import numpy as np + +from pandas import DataFrame, MultiIndex, Series, concat, date_range, merge, merge_asof + +from .pandas_vb_common import tm + +try: + from pandas import merge_ordered +except ImportError: + from pandas import ordered_merge as merge_ordered + + +class Append: + def setup(self): + self.df1 = DataFrame(np.random.randn(10000, 4), columns=["A", "B", "C", "D"]) + self.df2 = self.df1.copy() + self.df2.index = np.arange(10000, 20000) + self.mdf1 = self.df1.copy() + self.mdf1["obj1"] = "bar" + self.mdf1["obj2"] = "bar" + self.mdf1["int1"] = 5 + self.mdf1 = self.mdf1._consolidate() + self.mdf2 = self.mdf1.copy() + self.mdf2.index = self.df2.index + + def time_append_homogenous(self): + self.df1.append(self.df2) + + def time_append_mixed(self): + self.mdf1.append(self.mdf2) + + +class Concat: + + params = [0, 1] + param_names = ["axis"] + + def setup(self, axis): + N = 1000 + s = Series(N, index=tm.makeStringIndex(N)) + self.series = [s[i:-i] for i in range(1, 10)] * 50 + self.small_frames = [DataFrame(np.random.randn(5, 4))] * 1000 + df = DataFrame( + {"A": range(N)}, index=date_range("20130101", periods=N, freq="s") + ) + self.empty_left = [DataFrame(), df] + self.empty_right = [df, DataFrame()] + self.mixed_ndims = [df, df.head(N // 2)] + + def time_concat_series(self, axis): + concat(self.series, axis=axis, sort=False) + + def time_concat_small_frames(self, axis): + concat(self.small_frames, axis=axis) + + def time_concat_empty_right(self, axis): + concat(self.empty_right, axis=axis) + + def time_concat_empty_left(self, axis): + concat(self.empty_left, axis=axis) + + def time_concat_mixed_ndims(self, axis): + concat(self.mixed_ndims, axis=axis) + + +class ConcatDataFrames: + + params = ([0, 1], [True, False]) + param_names = ["axis", "ignore_index"] + + def setup(self, axis, ignore_index): + frame_c = DataFrame(np.zeros((10000, 200), dtype=np.float32, order="C")) + self.frame_c = [frame_c] * 20 + frame_f = DataFrame(np.zeros((10000, 200), dtype=np.float32, order="F")) + self.frame_f = [frame_f] * 20 + + def time_c_ordered(self, axis, ignore_index): + concat(self.frame_c, axis=axis, ignore_index=ignore_index) + + def time_f_ordered(self, axis, ignore_index): + concat(self.frame_f, axis=axis, ignore_index=ignore_index) + + +class Join: + + params = [True, False] + param_names = ["sort"] + + def setup(self, sort): + level1 = tm.makeStringIndex(10).values + level2 = tm.makeStringIndex(1000).values + codes1 = np.arange(10).repeat(1000) + codes2 = np.tile(np.arange(1000), 10) + index2 = MultiIndex(levels=[level1, level2], codes=[codes1, codes2]) + self.df_multi = DataFrame( + np.random.randn(len(index2), 4), index=index2, columns=["A", "B", "C", "D"] + ) + + self.key1 = np.tile(level1.take(codes1), 10) + self.key2 = np.tile(level2.take(codes2), 10) + self.df = DataFrame( + { + "data1": np.random.randn(100000), + "data2": np.random.randn(100000), + "key1": self.key1, + "key2": self.key2, + } + ) + + self.df_key1 = DataFrame( + np.random.randn(len(level1), 4), index=level1, columns=["A", "B", "C", "D"] + ) + self.df_key2 = DataFrame( + np.random.randn(len(level2), 4), index=level2, columns=["A", "B", "C", "D"] + ) + + shuf = np.arange(100000) + np.random.shuffle(shuf) + self.df_shuf = self.df.reindex(self.df.index[shuf]) + + def time_join_dataframe_index_multi(self, sort): + self.df.join(self.df_multi, on=["key1", "key2"], sort=sort) + + def time_join_dataframe_index_single_key_bigger(self, sort): + self.df.join(self.df_key2, on="key2", sort=sort) + + def time_join_dataframe_index_single_key_small(self, sort): + self.df.join(self.df_key1, on="key1", sort=sort) + + def time_join_dataframe_index_shuffle_key_bigger_sort(self, sort): + self.df_shuf.join(self.df_key2, on="key2", sort=sort) + + +class JoinIndex: + def setup(self): + N = 50000 + self.left = DataFrame( + np.random.randint(1, N / 500, (N, 2)), columns=["jim", "joe"] + ) + self.right = DataFrame( + np.random.randint(1, N / 500, (N, 2)), columns=["jolie", "jolia"] + ).set_index("jolie") + + def time_left_outer_join_index(self): + self.left.join(self.right, on="jim") + + +class JoinNonUnique: + # outer join of non-unique + # GH 6329 + def setup(self): + date_index = date_range("01-Jan-2013", "23-Jan-2013", freq="T") + daily_dates = date_index.to_period("D").to_timestamp("S", "S") + self.fracofday = date_index.values - daily_dates.values + self.fracofday = self.fracofday.astype("timedelta64[ns]") + self.fracofday = self.fracofday.astype(np.float64) / 86400000000000.0 + self.fracofday = Series(self.fracofday, daily_dates) + index = date_range(date_index.min(), date_index.max(), freq="D") + self.temp = Series(1.0, index)[self.fracofday.index] + + def time_join_non_unique_equal(self): + self.fracofday * self.temp + + +class Merge: + + params = [True, False] + param_names = ["sort"] + + def setup(self, sort): + N = 10000 + indices = tm.makeStringIndex(N).values + indices2 = tm.makeStringIndex(N).values + key = np.tile(indices[:8000], 10) + key2 = np.tile(indices2[:8000], 10) + self.left = DataFrame( + {"key": key, "key2": key2, "value": np.random.randn(80000)} + ) + self.right = DataFrame( + { + "key": indices[2000:], + "key2": indices2[2000:], + "value2": np.random.randn(8000), + } + ) + + self.df = DataFrame( + { + "key1": np.tile(np.arange(500).repeat(10), 2), + "key2": np.tile(np.arange(250).repeat(10), 4), + "value": np.random.randn(10000), + } + ) + self.df2 = DataFrame({"key1": np.arange(500), "value2": np.random.randn(500)}) + self.df3 = self.df[:5000] + + def time_merge_2intkey(self, sort): + merge(self.left, self.right, sort=sort) + + def time_merge_dataframe_integer_2key(self, sort): + merge(self.df, self.df3, sort=sort) + + def time_merge_dataframe_integer_key(self, sort): + merge(self.df, self.df2, on="key1", sort=sort) + + +class I8Merge: + + params = ["inner", "outer", "left", "right"] + param_names = ["how"] + + def setup(self, how): + low, high, n = -1000, 1000, 10 ** 6 + self.left = DataFrame( + np.random.randint(low, high, (n, 7)), columns=list("ABCDEFG") + ) + self.left["left"] = self.left.sum(axis=1) + self.right = self.left.sample(frac=1).rename({"left": "right"}, axis=1) + self.right = self.right.reset_index(drop=True) + self.right["right"] *= -1 + + def time_i8merge(self, how): + merge(self.left, self.right, how=how) + + +class MergeCategoricals: + def setup(self): + self.left_object = DataFrame( + { + "X": np.random.choice(range(0, 10), size=(10000,)), + "Y": np.random.choice(["one", "two", "three"], size=(10000,)), + } + ) + + self.right_object = DataFrame( + { + "X": np.random.choice(range(0, 10), size=(10000,)), + "Z": np.random.choice(["jjj", "kkk", "sss"], size=(10000,)), + } + ) + + self.left_cat = self.left_object.assign( + Y=self.left_object["Y"].astype("category") + ) + self.right_cat = self.right_object.assign( + Z=self.right_object["Z"].astype("category") + ) + + def time_merge_object(self): + merge(self.left_object, self.right_object, on="X") + + def time_merge_cat(self): + merge(self.left_cat, self.right_cat, on="X") + + +class MergeOrdered: + def setup(self): + groups = tm.makeStringIndex(10).values + self.left = DataFrame( + { + "group": groups.repeat(5000), + "key": np.tile(np.arange(0, 10000, 2), 10), + "lvalue": np.random.randn(50000), + } + ) + self.right = DataFrame( + {"key": np.arange(10000), "rvalue": np.random.randn(10000)} + ) + + def time_merge_ordered(self): + merge_ordered(self.left, self.right, on="key", left_by="group") + + +class MergeAsof: + params = [["backward", "forward", "nearest"], [None, 5]] + param_names = ["direction", "tolerance"] + + def setup(self, direction, tolerance): + one_count = 200000 + two_count = 1000000 + + df1 = DataFrame( + { + "time": np.random.randint(0, one_count / 20, one_count), + "key": np.random.choice(list(string.ascii_uppercase), one_count), + "key2": np.random.randint(0, 25, one_count), + "value1": np.random.randn(one_count), + } + ) + df2 = DataFrame( + { + "time": np.random.randint(0, two_count / 20, two_count), + "key": np.random.choice(list(string.ascii_uppercase), two_count), + "key2": np.random.randint(0, 25, two_count), + "value2": np.random.randn(two_count), + } + ) + + df1 = df1.sort_values("time") + df2 = df2.sort_values("time") + + df1["time32"] = np.int32(df1.time) + df2["time32"] = np.int32(df2.time) + + df1["timeu64"] = np.uint64(df1.time) + df2["timeu64"] = np.uint64(df2.time) + + self.df1a = df1[["time", "value1"]] + self.df2a = df2[["time", "value2"]] + self.df1b = df1[["time", "key", "value1"]] + self.df2b = df2[["time", "key", "value2"]] + self.df1c = df1[["time", "key2", "value1"]] + self.df2c = df2[["time", "key2", "value2"]] + self.df1d = df1[["time32", "value1"]] + self.df2d = df2[["time32", "value2"]] + self.df1e = df1[["time", "key", "key2", "value1"]] + self.df2e = df2[["time", "key", "key2", "value2"]] + self.df1f = df1[["timeu64", "value1"]] + self.df2f = df2[["timeu64", "value2"]] + + def time_on_int(self, direction, tolerance): + merge_asof( + self.df1a, self.df2a, on="time", direction=direction, tolerance=tolerance + ) + + def time_on_int32(self, direction, tolerance): + merge_asof( + self.df1d, self.df2d, on="time32", direction=direction, tolerance=tolerance + ) + + def time_on_uint64(self, direction, tolerance): + merge_asof( + self.df1f, self.df2f, on="timeu64", direction=direction, tolerance=tolerance + ) + + def time_by_object(self, direction, tolerance): + merge_asof( + self.df1b, + self.df2b, + on="time", + by="key", + direction=direction, + tolerance=tolerance, + ) + + def time_by_int(self, direction, tolerance): + merge_asof( + self.df1c, + self.df2c, + on="time", + by="key2", + direction=direction, + tolerance=tolerance, + ) + + def time_multiby(self, direction, tolerance): + merge_asof( + self.df1e, + self.df2e, + on="time", + by=["key", "key2"], + direction=direction, + tolerance=tolerance, + ) + + +class Align: + def setup(self): + size = 5 * 10 ** 5 + rng = np.arange(0, 10 ** 13, 10 ** 7) + stamps = np.datetime64("now").view("i8") + rng + idx1 = np.sort(np.random.choice(stamps, size, replace=False)) + idx2 = np.sort(np.random.choice(stamps, size, replace=False)) + self.ts1 = Series(np.random.randn(size), idx1) + self.ts2 = Series(np.random.randn(size), idx2) + + def time_series_align_int64_index(self): + self.ts1 + self.ts2 + + def time_series_align_left_monotonic(self): + self.ts1.align(self.ts2, join="left") + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/multiindex_object.py b/asv_bench/benchmarks/multiindex_object.py new file mode 100644 index 00000000..0e188c58 --- /dev/null +++ b/asv_bench/benchmarks/multiindex_object.py @@ -0,0 +1,163 @@ +import string + +import numpy as np + +from pandas import DataFrame, MultiIndex, RangeIndex, date_range + +from .pandas_vb_common import tm + + +class GetLoc: + def setup(self): + self.mi_large = MultiIndex.from_product( + [np.arange(1000), np.arange(20), list(string.ascii_letters)], + names=["one", "two", "three"], + ) + self.mi_med = MultiIndex.from_product( + [np.arange(1000), np.arange(10), list("A")], names=["one", "two", "three"] + ) + self.mi_small = MultiIndex.from_product( + [np.arange(100), list("A"), list("A")], names=["one", "two", "three"] + ) + + def time_large_get_loc(self): + self.mi_large.get_loc((999, 19, "Z")) + + def time_large_get_loc_warm(self): + for _ in range(1000): + self.mi_large.get_loc((999, 19, "Z")) + + def time_med_get_loc(self): + self.mi_med.get_loc((999, 9, "A")) + + def time_med_get_loc_warm(self): + for _ in range(1000): + self.mi_med.get_loc((999, 9, "A")) + + def time_string_get_loc(self): + self.mi_small.get_loc((99, "A", "A")) + + def time_small_get_loc_warm(self): + for _ in range(1000): + self.mi_small.get_loc((99, "A", "A")) + + +class Duplicates: + def setup(self): + size = 65536 + arrays = [np.random.randint(0, 8192, size), np.random.randint(0, 1024, size)] + mask = np.random.rand(size) < 0.1 + self.mi_unused_levels = MultiIndex.from_arrays(arrays) + self.mi_unused_levels = self.mi_unused_levels[mask] + + def time_remove_unused_levels(self): + self.mi_unused_levels.remove_unused_levels() + + +class Integer: + def setup(self): + self.mi_int = MultiIndex.from_product( + [np.arange(1000), np.arange(1000)], names=["one", "two"] + ) + self.obj_index = np.array( + [ + (0, 10), + (0, 11), + (0, 12), + (0, 13), + (0, 14), + (0, 15), + (0, 16), + (0, 17), + (0, 18), + (0, 19), + ], + dtype=object, + ) + + def time_get_indexer(self): + self.mi_int.get_indexer(self.obj_index) + + def time_is_monotonic(self): + self.mi_int.is_monotonic + + +class Duplicated: + def setup(self): + n, k = 200, 5000 + levels = [np.arange(n), tm.makeStringIndex(n).values, 1000 + np.arange(n)] + codes = [np.random.choice(n, (k * n)) for lev in levels] + self.mi = MultiIndex(levels=levels, codes=codes) + + def time_duplicated(self): + self.mi.duplicated() + + +class Sortlevel: + def setup(self): + n = 1182720 + low, high = -4096, 4096 + arrs = [ + np.repeat(np.random.randint(low, high, (n // k)), k) + for k in [11, 7, 5, 3, 1] + ] + self.mi_int = MultiIndex.from_arrays(arrs)[np.random.permutation(n)] + + a = np.repeat(np.arange(100), 1000) + b = np.tile(np.arange(1000), 100) + self.mi = MultiIndex.from_arrays([a, b]) + self.mi = self.mi.take(np.random.permutation(np.arange(100000))) + + def time_sortlevel_int64(self): + self.mi_int.sortlevel() + + def time_sortlevel_zero(self): + self.mi.sortlevel(0) + + def time_sortlevel_one(self): + self.mi.sortlevel(1) + + +class Values: + def setup_cache(self): + + level1 = range(1000) + level2 = date_range(start="1/1/2012", periods=100) + mi = MultiIndex.from_product([level1, level2]) + return mi + + def time_datetime_level_values_copy(self, mi): + mi.copy().values + + def time_datetime_level_values_sliced(self, mi): + mi[:10].values + + +class CategoricalLevel: + def setup(self): + + self.df = DataFrame( + { + "a": np.arange(1_000_000, dtype=np.int32), + "b": np.arange(1_000_000, dtype=np.int64), + "c": np.arange(1_000_000, dtype=float), + } + ).astype({"a": "category", "b": "category"}) + + def time_categorical_level(self): + self.df.set_index(["a", "b"]) + + +class Equals: + def setup(self): + idx_large_fast = RangeIndex(100000) + idx_small_slow = date_range(start="1/1/2012", periods=1) + self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow]) + + self.idx_non_object = RangeIndex(1) + + def time_equals_non_object_index(self): + self.mi_large_slow.equals(self.idx_non_object) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py new file mode 100644 index 00000000..77ce1b27 --- /dev/null +++ b/asv_bench/benchmarks/offset.py @@ -0,0 +1,80 @@ +import warnings + +import pandas as pd + +try: + import pandas.tseries.holiday +except ImportError: + pass + +hcal = pd.tseries.holiday.USFederalHolidayCalendar() +# These offsets currently raise a NotImplimentedError with .apply_index() +non_apply = [ + pd.offsets.Day(), + pd.offsets.BYearEnd(), + pd.offsets.BYearBegin(), + pd.offsets.BQuarterEnd(), + pd.offsets.BQuarterBegin(), + pd.offsets.BMonthEnd(), + pd.offsets.BMonthBegin(), + pd.offsets.CustomBusinessDay(), + pd.offsets.CustomBusinessDay(calendar=hcal), + pd.offsets.CustomBusinessMonthBegin(calendar=hcal), + pd.offsets.CustomBusinessMonthEnd(calendar=hcal), + pd.offsets.CustomBusinessMonthEnd(calendar=hcal), +] +other_offsets = [ + pd.offsets.YearEnd(), + pd.offsets.YearBegin(), + pd.offsets.QuarterEnd(), + pd.offsets.QuarterBegin(), + pd.offsets.MonthEnd(), + pd.offsets.MonthBegin(), + pd.offsets.DateOffset(months=2, days=2), + pd.offsets.BusinessDay(), + pd.offsets.SemiMonthEnd(), + pd.offsets.SemiMonthBegin(), +] +offsets = non_apply + other_offsets + + +class ApplyIndex: + + params = other_offsets + param_names = ["offset"] + + def setup(self, offset): + N = 10000 + self.rng = pd.date_range(start="1/1/2000", periods=N, freq="T") + + def time_apply_index(self, offset): + offset.apply_index(self.rng) + + +class OffsetSeriesArithmetic: + + params = offsets + param_names = ["offset"] + + def setup(self, offset): + N = 1000 + rng = pd.date_range(start="1/1/2000", periods=N, freq="T") + self.data = pd.Series(rng) + + def time_add_offset(self, offset): + with warnings.catch_warnings(record=True): + self.data + offset + + +class OffsetDatetimeIndexArithmetic: + + params = offsets + param_names = ["offset"] + + def setup(self, offset): + N = 1000 + self.data = pd.date_range(start="1/1/2000", periods=N, freq="T") + + def time_add_offset(self, offset): + with warnings.catch_warnings(record=True): + self.data + offset diff --git a/asv_bench/benchmarks/package.py b/asv_bench/benchmarks/package.py new file mode 100644 index 00000000..8ca33db3 --- /dev/null +++ b/asv_bench/benchmarks/package.py @@ -0,0 +1,25 @@ +""" +Benchmarks for pandas at the package-level. +""" +import subprocess +import sys + +from pandas.compat import PY37 + + +class TimeImport: + def time_import(self): + if PY37: + # on py37+ we the "-X importtime" usage gives us a more precise + # measurement of the import time we actually care about, + # without the subprocess or interpreter overhead + cmd = [sys.executable, "-X", "importtime", "-c", "import pandas as pd"] + p = subprocess.run(cmd, stderr=subprocess.PIPE) + + line = p.stderr.splitlines()[-1] + field = line.split(b"|")[-2].strip() + total = int(field) # microseconds + return total + + cmd = [sys.executable, "-c", "import pandas as pd"] + subprocess.run(cmd, stderr=subprocess.PIPE) diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py new file mode 100644 index 00000000..6da2b227 --- /dev/null +++ b/asv_bench/benchmarks/pandas_vb_common.py @@ -0,0 +1,80 @@ +from importlib import import_module +import os + +import numpy as np + +import pandas as pd + +# Compatibility import for lib +for imp in ["pandas._libs.lib", "pandas.lib"]: + try: + lib = import_module(imp) + break + except (ImportError, TypeError, ValueError): + pass + +# Compatibility import for the testing module +try: + import pandas._testing as tm # noqa +except ImportError: + import pandas.util.testing as tm # noqa + + +numeric_dtypes = [ + np.int64, + np.int32, + np.uint32, + np.uint64, + np.float32, + np.float64, + np.int16, + np.int8, + np.uint16, + np.uint8, +] +datetime_dtypes = [np.datetime64, np.timedelta64] +string_dtypes = [np.object] +try: + extension_dtypes = [ + pd.Int8Dtype, + pd.Int16Dtype, + pd.Int32Dtype, + pd.Int64Dtype, + pd.UInt8Dtype, + pd.UInt16Dtype, + pd.UInt32Dtype, + pd.UInt64Dtype, + pd.CategoricalDtype, + pd.IntervalDtype, + pd.DatetimeTZDtype("ns", "UTC"), + pd.PeriodDtype("D"), + ] +except AttributeError: + extension_dtypes = [] + + +def setup(*args, **kwargs): + # This function just needs to be imported into each benchmark file to + # set up the random seed before each function. + # http://asv.readthedocs.io/en/latest/writing_benchmarks.html + np.random.seed(1234) + + +class BaseIO: + """ + Base class for IO benchmarks + """ + + fname = None + + def remove(self, f): + """Remove created files""" + try: + os.remove(f) + except OSError: + # On Windows, attempting to remove a file that is in use + # causes an exception to be raised + pass + + def teardown(self, *args, **kwargs): + self.remove(self.fname) diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py new file mode 100644 index 00000000..b52aa2e5 --- /dev/null +++ b/asv_bench/benchmarks/period.py @@ -0,0 +1,104 @@ +""" +Period benchmarks with non-tslibs dependencies. See +benchmarks.tslibs.period for benchmarks that rely only on tslibs. +""" +from pandas import DataFrame, Period, PeriodIndex, Series, date_range, period_range + +from pandas.tseries.frequencies import to_offset + + +class PeriodIndexConstructor: + + params = [["D"], [True, False]] + param_names = ["freq", "is_offset"] + + def setup(self, freq, is_offset): + self.rng = date_range("1985", periods=1000) + self.rng2 = date_range("1985", periods=1000).to_pydatetime() + self.ints = list(range(2000, 3000)) + self.daily_ints = ( + date_range("1/1/2000", periods=1000, freq=freq).strftime("%Y%m%d").map(int) + ) + if is_offset: + self.freq = to_offset(freq) + else: + self.freq = freq + + def time_from_date_range(self, freq, is_offset): + PeriodIndex(self.rng, freq=freq) + + def time_from_pydatetime(self, freq, is_offset): + PeriodIndex(self.rng2, freq=freq) + + def time_from_ints(self, freq, is_offset): + PeriodIndex(self.ints, freq=freq) + + def time_from_ints_daily(self, freq, is_offset): + PeriodIndex(self.daily_ints, freq=freq) + + +class DataFramePeriodColumn: + def setup(self): + self.rng = period_range(start="1/1/1990", freq="S", periods=20000) + self.df = DataFrame(index=range(len(self.rng))) + + def time_setitem_period_column(self): + self.df["col"] = self.rng + + def time_set_index(self): + # GH#21582 limited by comparisons of Period objects + self.df["col2"] = self.rng + self.df.set_index("col2", append=True) + + +class Algorithms: + + params = ["index", "series"] + param_names = ["typ"] + + def setup(self, typ): + data = [ + Period("2011-01", freq="M"), + Period("2011-02", freq="M"), + Period("2011-03", freq="M"), + Period("2011-04", freq="M"), + ] + + if typ == "index": + self.vector = PeriodIndex(data * 1000, freq="M") + elif typ == "series": + self.vector = Series(data * 1000) + + def time_drop_duplicates(self, typ): + self.vector.drop_duplicates() + + def time_value_counts(self, typ): + self.vector.value_counts() + + +class Indexing: + def setup(self): + self.index = period_range(start="1985", periods=1000, freq="D") + self.series = Series(range(1000), index=self.index) + self.period = self.index[500] + + def time_get_loc(self): + self.index.get_loc(self.period) + + def time_shape(self): + self.index.shape + + def time_shallow_copy(self): + self.index._shallow_copy() + + def time_series_loc(self): + self.series.loc[self.period] + + def time_align(self): + DataFrame({"a": self.series, "b": self.series[:500]}) + + def time_intersection(self): + self.index[:750].intersection(self.index[250:]) + + def time_unique(self): + self.index.unique() diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py new file mode 100644 index 00000000..5c718516 --- /dev/null +++ b/asv_bench/benchmarks/plotting.py @@ -0,0 +1,97 @@ +import matplotlib +import numpy as np + +from pandas import DataFrame, DatetimeIndex, Series, date_range + +try: + from pandas.plotting import andrews_curves +except ImportError: + from pandas.tools.plotting import andrews_curves + +matplotlib.use("Agg") + + +class SeriesPlotting: + params = [["line", "bar", "area", "barh", "hist", "kde", "pie"]] + param_names = ["kind"] + + def setup(self, kind): + if kind in ["bar", "barh", "pie"]: + n = 100 + elif kind in ["kde"]: + n = 10000 + else: + n = 1000000 + + self.s = Series(np.random.randn(n)) + if kind in ["area", "pie"]: + self.s = self.s.abs() + + def time_series_plot(self, kind): + self.s.plot(kind=kind) + + +class FramePlotting: + params = [ + ["line", "bar", "area", "barh", "hist", "kde", "pie", "scatter", "hexbin"] + ] + param_names = ["kind"] + + def setup(self, kind): + if kind in ["bar", "barh", "pie"]: + n = 100 + elif kind in ["kde", "scatter", "hexbin"]: + n = 10000 + else: + n = 1000000 + + self.x = Series(np.random.randn(n)) + self.y = Series(np.random.randn(n)) + if kind in ["area", "pie"]: + self.x = self.x.abs() + self.y = self.y.abs() + self.df = DataFrame({"x": self.x, "y": self.y}) + + def time_frame_plot(self, kind): + self.df.plot(x="x", y="y", kind=kind) + + +class TimeseriesPlotting: + def setup(self): + N = 2000 + M = 5 + idx = date_range("1/1/1975", periods=N) + self.df = DataFrame(np.random.randn(N, M), index=idx) + + idx_irregular = DatetimeIndex( + np.concatenate((idx.values[0:10], idx.values[12:])) + ) + self.df2 = DataFrame( + np.random.randn(len(idx_irregular), M), index=idx_irregular + ) + + def time_plot_regular(self): + self.df.plot() + + def time_plot_regular_compat(self): + self.df.plot(x_compat=True) + + def time_plot_irregular(self): + self.df2.plot() + + def time_plot_table(self): + self.df.plot(table=True) + + +class Misc: + def setup(self): + N = 500 + M = 10 + self.df = DataFrame(np.random.randn(N, M)) + self.df["Name"] = ["A"] * N + + def time_plot_andrews_curves(self): + andrews_curves(self.df, "Name") + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py new file mode 100644 index 00000000..03394e6f --- /dev/null +++ b/asv_bench/benchmarks/reindex.py @@ -0,0 +1,163 @@ +import numpy as np + +from pandas import DataFrame, Index, MultiIndex, Series, date_range, period_range + +from .pandas_vb_common import lib, tm + + +class Reindex: + def setup(self): + rng = date_range(start="1/1/1970", periods=10000, freq="1min") + self.df = DataFrame(np.random.rand(10000, 10), index=rng, columns=range(10)) + self.df["foo"] = "bar" + self.rng_subset = Index(rng[::2]) + self.df2 = DataFrame( + index=range(10000), data=np.random.rand(10000, 30), columns=range(30) + ) + N = 5000 + K = 200 + level1 = tm.makeStringIndex(N).values.repeat(K) + level2 = np.tile(tm.makeStringIndex(K).values, N) + index = MultiIndex.from_arrays([level1, level2]) + self.s = Series(np.random.randn(N * K), index=index) + self.s_subset = self.s[::2] + + def time_reindex_dates(self): + self.df.reindex(self.rng_subset) + + def time_reindex_columns(self): + self.df2.reindex(columns=self.df.columns[1:5]) + + def time_reindex_multiindex(self): + self.s.reindex(self.s_subset.index) + + +class ReindexMethod: + + params = [["pad", "backfill"], [date_range, period_range]] + param_names = ["method", "constructor"] + + def setup(self, method, constructor): + N = 100000 + self.idx = constructor("1/1/2000", periods=N, freq="1min") + self.ts = Series(np.random.randn(N), index=self.idx)[::2] + + def time_reindex_method(self, method, constructor): + self.ts.reindex(self.idx, method=method) + + +class Fillna: + + params = ["pad", "backfill"] + param_names = ["method"] + + def setup(self, method): + N = 100000 + self.idx = date_range("1/1/2000", periods=N, freq="1min") + ts = Series(np.random.randn(N), index=self.idx)[::2] + self.ts_reindexed = ts.reindex(self.idx) + self.ts_float32 = self.ts_reindexed.astype("float32") + + def time_reindexed(self, method): + self.ts_reindexed.fillna(method=method) + + def time_float_32(self, method): + self.ts_float32.fillna(method=method) + + +class LevelAlign: + def setup(self): + self.index = MultiIndex( + levels=[np.arange(10), np.arange(100), np.arange(100)], + codes=[ + np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10), + ], + ) + self.df = DataFrame(np.random.randn(len(self.index), 4), index=self.index) + self.df_level = DataFrame(np.random.randn(100, 4), index=self.index.levels[1]) + + def time_align_level(self): + self.df.align(self.df_level, level=1, copy=False) + + def time_reindex_level(self): + self.df_level.reindex(self.index, level=1) + + +class DropDuplicates: + + params = [True, False] + param_names = ["inplace"] + + def setup(self, inplace): + N = 10000 + K = 10 + key1 = tm.makeStringIndex(N).values.repeat(K) + key2 = tm.makeStringIndex(N).values.repeat(K) + self.df = DataFrame( + {"key1": key1, "key2": key2, "value": np.random.randn(N * K)} + ) + self.df_nan = self.df.copy() + self.df_nan.iloc[:10000, :] = np.nan + + self.s = Series(np.random.randint(0, 1000, size=10000)) + self.s_str = Series(np.tile(tm.makeStringIndex(1000).values, 10)) + + N = 1000000 + K = 10000 + key1 = np.random.randint(0, K, size=N) + self.df_int = DataFrame({"key1": key1}) + self.df_bool = DataFrame(np.random.randint(0, 2, size=(K, 10), dtype=bool)) + + def time_frame_drop_dups(self, inplace): + self.df.drop_duplicates(["key1", "key2"], inplace=inplace) + + def time_frame_drop_dups_na(self, inplace): + self.df_nan.drop_duplicates(["key1", "key2"], inplace=inplace) + + def time_series_drop_dups_int(self, inplace): + self.s.drop_duplicates(inplace=inplace) + + def time_series_drop_dups_string(self, inplace): + self.s_str.drop_duplicates(inplace=inplace) + + def time_frame_drop_dups_int(self, inplace): + self.df_int.drop_duplicates(inplace=inplace) + + def time_frame_drop_dups_bool(self, inplace): + self.df_bool.drop_duplicates(inplace=inplace) + + +class Align: + # blog "pandas escaped the zoo" + def setup(self): + n = 50000 + indices = tm.makeStringIndex(n) + subsample_size = 40000 + self.x = Series(np.random.randn(n), indices) + self.y = Series( + np.random.randn(subsample_size), + index=np.random.choice(indices, subsample_size, replace=False), + ) + + def time_align_series_irregular_string(self): + self.x + self.y + + +class LibFastZip: + def setup(self): + N = 10000 + K = 10 + key1 = tm.makeStringIndex(N).values.repeat(K) + key2 = tm.makeStringIndex(N).values.repeat(K) + col_array = np.vstack([key1, key2, np.random.randn(N * K)]) + col_array2 = col_array.copy() + col_array2[:, :10000] = np.nan + self.col_array_list = list(col_array) + + def time_lib_fast_zip(self): + lib.fast_zip(self.col_array_list) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py new file mode 100644 index 00000000..2a115fb0 --- /dev/null +++ b/asv_bench/benchmarks/replace.py @@ -0,0 +1,77 @@ +import numpy as np + +import pandas as pd + + +class FillNa: + + params = [True, False] + param_names = ["inplace"] + + def setup(self, inplace): + N = 10 ** 6 + rng = pd.date_range("1/1/2000", periods=N, freq="min") + data = np.random.randn(N) + data[::2] = np.nan + self.ts = pd.Series(data, index=rng) + + def time_fillna(self, inplace): + self.ts.fillna(0.0, inplace=inplace) + + def time_replace(self, inplace): + self.ts.replace(np.nan, 0.0, inplace=inplace) + + +class ReplaceDict: + + params = [True, False] + param_names = ["inplace"] + + def setup(self, inplace): + N = 10 ** 5 + start_value = 10 ** 5 + self.to_rep = dict(enumerate(np.arange(N) + start_value)) + self.s = pd.Series(np.random.randint(N, size=10 ** 3)) + + def time_replace_series(self, inplace): + self.s.replace(self.to_rep, inplace=inplace) + + +class ReplaceList: + # GH#28099 + + params = [(True, False)] + param_names = ["inplace"] + + def setup(self, inplace): + self.df = pd.DataFrame({"A": 0, "B": 0}, index=range(4 * 10 ** 7)) + + def time_replace_list(self, inplace): + self.df.replace([np.inf, -np.inf], np.nan, inplace=inplace) + + def time_replace_list_one_match(self, inplace): + # the 1 can be held in self._df.blocks[0], while the inf and -inf cant + self.df.replace([np.inf, -np.inf, 1], np.nan, inplace=inplace) + + +class Convert: + + params = (["DataFrame", "Series"], ["Timestamp", "Timedelta"]) + param_names = ["constructor", "replace_data"] + + def setup(self, constructor, replace_data): + N = 10 ** 3 + data = { + "Series": pd.Series(np.random.randint(N, size=N)), + "DataFrame": pd.DataFrame( + {"A": np.random.randint(N, size=N), "B": np.random.randint(N, size=N)} + ), + } + self.to_replace = {i: getattr(pd, replace_data) for i in range(N)} + self.data = data[constructor] + + def time_replace(self, constructor, replace_data): + self.data.replace(self.to_replace) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py new file mode 100644 index 00000000..441f4b38 --- /dev/null +++ b/asv_bench/benchmarks/reshape.py @@ -0,0 +1,266 @@ +from itertools import product +import string + +import numpy as np + +import pandas as pd +from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long + + +class Melt: + def setup(self): + self.df = DataFrame(np.random.randn(10000, 3), columns=["A", "B", "C"]) + self.df["id1"] = np.random.randint(0, 10, 10000) + self.df["id2"] = np.random.randint(100, 1000, 10000) + + def time_melt_dataframe(self): + melt(self.df, id_vars=["id1", "id2"]) + + +class Pivot: + def setup(self): + N = 10000 + index = date_range("1/1/2000", periods=N, freq="h") + data = { + "value": np.random.randn(N * 50), + "variable": np.arange(50).repeat(N), + "date": np.tile(index.values, 50), + } + self.df = DataFrame(data) + + def time_reshape_pivot_time_series(self): + self.df.pivot("date", "variable", "value") + + +class SimpleReshape: + def setup(self): + arrays = [np.arange(100).repeat(100), np.roll(np.tile(np.arange(100), 100), 25)] + index = MultiIndex.from_arrays(arrays) + self.df = DataFrame(np.random.randn(10000, 4), index=index) + self.udf = self.df.unstack(1) + + def time_stack(self): + self.udf.stack() + + def time_unstack(self): + self.df.unstack(1) + + +class Unstack: + + params = ["int", "category"] + + def setup(self, dtype): + m = 100 + n = 1000 + + levels = np.arange(m) + index = MultiIndex.from_product([levels] * 2) + columns = np.arange(n) + if dtype == "int": + values = np.arange(m * m * n).reshape(m * m, n) + else: + # the category branch is ~20x slower than int. So we + # cut down the size a bit. Now it's only ~3x slower. + n = 50 + columns = columns[:n] + indices = np.random.randint(0, 52, size=(m * m, n)) + values = np.take(list(string.ascii_letters), indices) + values = [pd.Categorical(v) for v in values.T] + + self.df = DataFrame(values, index, columns) + self.df2 = self.df.iloc[:-1] + + def time_full_product(self, dtype): + self.df.unstack() + + def time_without_last_row(self, dtype): + self.df2.unstack() + + +class SparseIndex: + def setup(self): + NUM_ROWS = 1000 + self.df = DataFrame( + { + "A": np.random.randint(50, size=NUM_ROWS), + "B": np.random.randint(50, size=NUM_ROWS), + "C": np.random.randint(-10, 10, size=NUM_ROWS), + "D": np.random.randint(-10, 10, size=NUM_ROWS), + "E": np.random.randint(10, size=NUM_ROWS), + "F": np.random.randn(NUM_ROWS), + } + ) + self.df = self.df.set_index(["A", "B", "C", "D", "E"]) + + def time_unstack(self): + self.df.unstack() + + +class WideToLong: + def setup(self): + nyrs = 20 + nidvars = 20 + N = 5000 + self.letters = list("ABCD") + yrvars = [l + str(num) for l, num in product(self.letters, range(1, nyrs + 1))] + columns = [str(i) for i in range(nidvars)] + yrvars + self.df = DataFrame(np.random.randn(N, nidvars + len(yrvars)), columns=columns) + self.df["id"] = self.df.index + + def time_wide_to_long_big(self): + wide_to_long(self.df, self.letters, i="id", j="year") + + +class PivotTable: + def setup(self): + N = 100000 + fac1 = np.array(["A", "B", "C"], dtype="O") + fac2 = np.array(["one", "two"], dtype="O") + ind1 = np.random.randint(0, 3, size=N) + ind2 = np.random.randint(0, 2, size=N) + self.df = DataFrame( + { + "key1": fac1.take(ind1), + "key2": fac2.take(ind2), + "key3": fac2.take(ind2), + "value1": np.random.randn(N), + "value2": np.random.randn(N), + "value3": np.random.randn(N), + } + ) + self.df2 = DataFrame( + {"col1": list("abcde"), "col2": list("fghij"), "col3": [1, 2, 3, 4, 5]} + ) + self.df2.col1 = self.df2.col1.astype("category") + self.df2.col2 = self.df2.col2.astype("category") + + def time_pivot_table(self): + self.df.pivot_table(index="key1", columns=["key2", "key3"]) + + def time_pivot_table_agg(self): + self.df.pivot_table( + index="key1", columns=["key2", "key3"], aggfunc=["sum", "mean"] + ) + + def time_pivot_table_margins(self): + self.df.pivot_table(index="key1", columns=["key2", "key3"], margins=True) + + def time_pivot_table_categorical(self): + self.df2.pivot_table( + index="col1", values="col3", columns="col2", aggfunc=np.sum, fill_value=0 + ) + + def time_pivot_table_categorical_observed(self): + self.df2.pivot_table( + index="col1", + values="col3", + columns="col2", + aggfunc=np.sum, + fill_value=0, + observed=True, + ) + + +class Crosstab: + def setup(self): + N = 100000 + fac1 = np.array(["A", "B", "C"], dtype="O") + fac2 = np.array(["one", "two"], dtype="O") + self.ind1 = np.random.randint(0, 3, size=N) + self.ind2 = np.random.randint(0, 2, size=N) + self.vec1 = fac1.take(self.ind1) + self.vec2 = fac2.take(self.ind2) + + def time_crosstab(self): + pd.crosstab(self.vec1, self.vec2) + + def time_crosstab_values(self): + pd.crosstab(self.vec1, self.vec2, values=self.ind1, aggfunc="sum") + + def time_crosstab_normalize(self): + pd.crosstab(self.vec1, self.vec2, normalize=True) + + def time_crosstab_normalize_margins(self): + pd.crosstab(self.vec1, self.vec2, normalize=True, margins=True) + + +class GetDummies: + def setup(self): + categories = list(string.ascii_letters[:12]) + s = pd.Series( + np.random.choice(categories, size=1000000), + dtype=pd.api.types.CategoricalDtype(categories), + ) + self.s = s + + def time_get_dummies_1d(self): + pd.get_dummies(self.s, sparse=False) + + def time_get_dummies_1d_sparse(self): + pd.get_dummies(self.s, sparse=True) + + +class Cut: + params = [[4, 10, 1000]] + param_names = ["bins"] + + def setup(self, bins): + N = 10 ** 5 + self.int_series = pd.Series(np.arange(N).repeat(5)) + self.float_series = pd.Series(np.random.randn(N).repeat(5)) + self.timedelta_series = pd.Series( + np.random.randint(N, size=N), dtype="timedelta64[ns]" + ) + self.datetime_series = pd.Series( + np.random.randint(N, size=N), dtype="datetime64[ns]" + ) + self.interval_bins = pd.IntervalIndex.from_breaks(np.linspace(0, N, bins)) + + def time_cut_int(self, bins): + pd.cut(self.int_series, bins) + + def time_cut_float(self, bins): + pd.cut(self.float_series, bins) + + def time_cut_timedelta(self, bins): + pd.cut(self.timedelta_series, bins) + + def time_cut_datetime(self, bins): + pd.cut(self.datetime_series, bins) + + def time_qcut_int(self, bins): + pd.qcut(self.int_series, bins) + + def time_qcut_float(self, bins): + pd.qcut(self.float_series, bins) + + def time_qcut_timedelta(self, bins): + pd.qcut(self.timedelta_series, bins) + + def time_qcut_datetime(self, bins): + pd.qcut(self.datetime_series, bins) + + def time_cut_interval(self, bins): + # GH 27668 + pd.cut(self.int_series, self.interval_bins) + + def peakmem_cut_interval(self, bins): + # GH 27668 + pd.cut(self.int_series, self.interval_bins) + + +class Explode: + param_names = ["n_rows", "max_list_length"] + params = [[100, 1000, 10000], [3, 5, 10]] + + def setup(self, n_rows, max_list_length): + + data = [np.arange(np.random.randint(max_list_length)) for _ in range(n_rows)] + self.series = pd.Series(data) + + def time_explode(self, n_rows, max_list_length): + self.series.explode() + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py new file mode 100644 index 00000000..331086b7 --- /dev/null +++ b/asv_bench/benchmarks/rolling.py @@ -0,0 +1,146 @@ +import numpy as np + +import pandas as pd + + +class Methods: + + params = ( + ["DataFrame", "Series"], + [10, 1000], + ["int", "float"], + ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"], + ) + param_names = ["contructor", "window", "dtype", "method"] + + def setup(self, constructor, window, dtype, method): + N = 10 ** 5 + arr = (100 * np.random.random(N)).astype(dtype) + self.roll = getattr(pd, constructor)(arr).rolling(window) + + def time_rolling(self, constructor, window, dtype, method): + getattr(self.roll, method)() + + def peakmem_rolling(self, constructor, window, dtype, method): + getattr(self.roll, method)() + + +class Apply: + params = ( + ["DataFrame", "Series"], + [3, 300], + ["int", "float"], + [sum, np.sum, lambda x: np.sum(x) + 5], + [True, False], + ) + param_names = ["constructor", "window", "dtype", "function", "raw"] + + def setup(self, constructor, window, dtype, function, raw): + N = 10 ** 3 + arr = (100 * np.random.random(N)).astype(dtype) + self.roll = getattr(pd, constructor)(arr).rolling(window) + + def time_rolling(self, constructor, window, dtype, function, raw): + self.roll.apply(function, raw=raw) + + +class ExpandingMethods: + + params = ( + ["DataFrame", "Series"], + ["int", "float"], + ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"], + ) + param_names = ["contructor", "window", "dtype", "method"] + + def setup(self, constructor, dtype, method): + N = 10 ** 5 + arr = (100 * np.random.random(N)).astype(dtype) + self.expanding = getattr(pd, constructor)(arr).expanding() + + def time_expanding(self, constructor, dtype, method): + getattr(self.expanding, method)() + + +class EWMMethods: + + params = (["DataFrame", "Series"], [10, 1000], ["int", "float"], ["mean", "std"]) + param_names = ["contructor", "window", "dtype", "method"] + + def setup(self, constructor, window, dtype, method): + N = 10 ** 5 + arr = (100 * np.random.random(N)).astype(dtype) + self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window) + + def time_ewm(self, constructor, window, dtype, method): + getattr(self.ewm, method)() + + +class VariableWindowMethods(Methods): + params = ( + ["DataFrame", "Series"], + ["50s", "1h", "1d"], + ["int", "float"], + ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"], + ) + param_names = ["contructor", "window", "dtype", "method"] + + def setup(self, constructor, window, dtype, method): + N = 10 ** 5 + arr = (100 * np.random.random(N)).astype(dtype) + index = pd.date_range("2017-01-01", periods=N, freq="5s") + self.roll = getattr(pd, constructor)(arr, index=index).rolling(window) + + +class Pairwise: + + params = ([10, 1000, None], ["corr", "cov"], [True, False]) + param_names = ["window", "method", "pairwise"] + + def setup(self, window, method, pairwise): + N = 10 ** 4 + arr = np.random.random(N) + self.df = pd.DataFrame(arr) + + def time_pairwise(self, window, method, pairwise): + if window is None: + r = self.df.expanding() + else: + r = self.df.rolling(window=window) + getattr(r, method)(self.df, pairwise=pairwise) + + +class Quantile: + params = ( + ["DataFrame", "Series"], + [10, 1000], + ["int", "float"], + [0, 0.5, 1], + ["linear", "nearest", "lower", "higher", "midpoint"], + ) + param_names = ["constructor", "window", "dtype", "percentile"] + + def setup(self, constructor, window, dtype, percentile, interpolation): + N = 10 ** 5 + arr = np.random.random(N).astype(dtype) + self.roll = getattr(pd, constructor)(arr).rolling(window) + + def time_quantile(self, constructor, window, dtype, percentile, interpolation): + self.roll.quantile(percentile, interpolation=interpolation) + + +class PeakMemFixedWindowMinMax: + + params = ["min", "max"] + + def setup(self, operation): + N = int(1e6) + arr = np.random.random(N) + self.roll = pd.Series(arr).rolling(2) + + def peakmem_fixed(self, operation): + for x in range(5): + getattr(self.roll, operation)() + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py new file mode 100644 index 00000000..57c625ce --- /dev/null +++ b/asv_bench/benchmarks/series_methods.py @@ -0,0 +1,280 @@ +from datetime import datetime + +import numpy as np + +from pandas import NaT, Series, date_range + +from .pandas_vb_common import tm + + +class SeriesConstructor: + + params = [None, "dict"] + param_names = ["data"] + + def setup(self, data): + self.idx = date_range( + start=datetime(2015, 10, 26), end=datetime(2016, 1, 1), freq="50s" + ) + dict_data = dict(zip(self.idx, range(len(self.idx)))) + self.data = None if data is None else dict_data + + def time_constructor(self, data): + Series(data=self.data, index=self.idx) + + +class IsIn: + + params = ["int64", "uint64", "object"] + param_names = ["dtype"] + + def setup(self, dtype): + self.s = Series(np.random.randint(1, 10, 100000)).astype(dtype) + self.values = [1, 2] + + def time_isin(self, dtypes): + self.s.isin(self.values) + + +class IsInFloat64: + def setup(self): + self.small = Series([1, 2], dtype=np.float64) + self.many_different_values = np.arange(10 ** 6, dtype=np.float64) + self.few_different_values = np.zeros(10 ** 7, dtype=np.float64) + self.only_nans_values = np.full(10 ** 7, np.nan, dtype=np.float64) + + def time_isin_many_different(self): + # runtime is dominated by creation of the lookup-table + self.small.isin(self.many_different_values) + + def time_isin_few_different(self): + # runtime is dominated by creation of the lookup-table + self.small.isin(self.few_different_values) + + def time_isin_nan_values(self): + # runtime is dominated by creation of the lookup-table + self.small.isin(self.few_different_values) + + +class IsInForObjects: + def setup(self): + self.s_nans = Series(np.full(10 ** 4, np.nan)).astype(np.object) + self.vals_nans = np.full(10 ** 4, np.nan).astype(np.object) + self.s_short = Series(np.arange(2)).astype(np.object) + self.s_long = Series(np.arange(10 ** 5)).astype(np.object) + self.vals_short = np.arange(2).astype(np.object) + self.vals_long = np.arange(10 ** 5).astype(np.object) + # because of nans floats are special: + self.s_long_floats = Series(np.arange(10 ** 5, dtype=np.float)).astype( + np.object + ) + self.vals_long_floats = np.arange(10 ** 5, dtype=np.float).astype(np.object) + + def time_isin_nans(self): + # if nan-objects are different objects, + # this has the potential to trigger O(n^2) running time + self.s_nans.isin(self.vals_nans) + + def time_isin_short_series_long_values(self): + # running time dominated by the preprocessing + self.s_short.isin(self.vals_long) + + def time_isin_long_series_short_values(self): + # running time dominated by look-up + self.s_long.isin(self.vals_short) + + def time_isin_long_series_long_values(self): + # no dominating part + self.s_long.isin(self.vals_long) + + def time_isin_long_series_long_values_floats(self): + # no dominating part + self.s_long_floats.isin(self.vals_long_floats) + + +class NSort: + + params = ["first", "last", "all"] + param_names = ["keep"] + + def setup(self, keep): + self.s = Series(np.random.randint(1, 10, 100000)) + + def time_nlargest(self, keep): + self.s.nlargest(3, keep=keep) + + def time_nsmallest(self, keep): + self.s.nsmallest(3, keep=keep) + + +class Dropna: + + params = ["int", "datetime"] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10 ** 6 + data = { + "int": np.random.randint(1, 10, N), + "datetime": date_range("2000-01-01", freq="S", periods=N), + } + self.s = Series(data[dtype]) + if dtype == "datetime": + self.s[np.random.randint(1, N, 100)] = NaT + + def time_dropna(self, dtype): + self.s.dropna() + + +class SearchSorted: + + goal_time = 0.2 + params = [ + "int8", + "int16", + "int32", + "int64", + "uint8", + "uint16", + "uint32", + "uint64", + "float16", + "float32", + "float64", + "str", + ] + param_names = ["dtype"] + + def setup(self, dtype): + N = 10 ** 5 + data = np.array([1] * N + [2] * N + [3] * N).astype(dtype) + self.s = Series(data) + + def time_searchsorted(self, dtype): + key = "2" if dtype == "str" else 2 + self.s.searchsorted(key) + + +class Map: + + params = (["dict", "Series", "lambda"], ["object", "category", "int"]) + param_names = "mapper" + + def setup(self, mapper, dtype): + map_size = 1000 + map_data = Series(map_size - np.arange(map_size), dtype=dtype) + + # construct mapper + if mapper == "Series": + self.map_data = map_data + elif mapper == "dict": + self.map_data = map_data.to_dict() + elif mapper == "lambda": + map_dict = map_data.to_dict() + self.map_data = lambda x: map_dict[x] + else: + raise NotImplementedError + + self.s = Series(np.random.randint(0, map_size, 10000), dtype=dtype) + + def time_map(self, mapper, *args, **kwargs): + self.s.map(self.map_data) + + +class Clip: + params = [50, 1000, 10 ** 5] + param_names = ["n"] + + def setup(self, n): + self.s = Series(np.random.randn(n)) + + def time_clip(self, n): + self.s.clip(0, 1) + + +class ValueCounts: + + params = ["int", "uint", "float", "object"] + param_names = ["dtype"] + + def setup(self, dtype): + self.s = Series(np.random.randint(0, 1000, size=100000)).astype(dtype) + + def time_value_counts(self, dtype): + self.s.value_counts() + + +class Dir: + def setup(self): + self.s = Series(index=tm.makeStringIndex(10000)) + + def time_dir_strings(self): + dir(self.s) + + +class SeriesGetattr: + # https://github.com/pandas-dev/pandas/issues/19764 + def setup(self): + self.s = Series(1, index=date_range("2012-01-01", freq="s", periods=int(1e6))) + + def time_series_datetimeindex_repr(self): + getattr(self.s, "a", None) + + +class All: + + params = [[10 ** 3, 10 ** 6], ["fast", "slow"]] + param_names = ["N", "case"] + + def setup(self, N, case): + val = case != "fast" + self.s = Series([val] * N) + + def time_all(self, N, case): + self.s.all() + + +class Any: + + params = [[10 ** 3, 10 ** 6], ["fast", "slow"]] + param_names = ["N", "case"] + + def setup(self, N, case): + val = case == "fast" + self.s = Series([val] * N) + + def time_any(self, N, case): + self.s.any() + + +class NanOps: + + params = [ + [ + "var", + "mean", + "median", + "max", + "min", + "sum", + "std", + "sem", + "argmax", + "skew", + "kurt", + "prod", + ], + [10 ** 3, 10 ** 6], + ["int8", "int32", "int64", "float64"], + ] + param_names = ["func", "N", "dtype"] + + def setup(self, func, N, dtype): + self.s = Series([1] * N, dtype=dtype) + self.func = getattr(self.s, func) + + def time_func(self, func, N, dtype): + self.func() + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py new file mode 100644 index 00000000..ac78ca53 --- /dev/null +++ b/asv_bench/benchmarks/sparse.py @@ -0,0 +1,139 @@ +import numpy as np +import scipy.sparse + +import pandas as pd +from pandas import MultiIndex, Series, SparseArray, date_range + + +def make_array(size, dense_proportion, fill_value, dtype): + dense_size = int(size * dense_proportion) + arr = np.full(size, fill_value, dtype) + indexer = np.random.choice(np.arange(size), dense_size, replace=False) + arr[indexer] = np.random.choice(np.arange(100, dtype=dtype), dense_size) + return arr + + +class SparseSeriesToFrame: + def setup(self): + K = 50 + N = 50001 + rng = date_range("1/1/2000", periods=N, freq="T") + self.series = {} + for i in range(1, K): + data = np.random.randn(N)[:-i] + idx = rng[:-i] + data[100:] = np.nan + self.series[i] = pd.Series(pd.SparseArray(data), index=idx) + + def time_series_to_frame(self): + pd.DataFrame(self.series) + + +class SparseArrayConstructor: + + params = ([0.1, 0.01], [0, np.nan], [np.int64, np.float64, np.object]) + param_names = ["dense_proportion", "fill_value", "dtype"] + + def setup(self, dense_proportion, fill_value, dtype): + N = 10 ** 6 + self.array = make_array(N, dense_proportion, fill_value, dtype) + + def time_sparse_array(self, dense_proportion, fill_value, dtype): + SparseArray(self.array, fill_value=fill_value, dtype=dtype) + + +class SparseDataFrameConstructor: + def setup(self): + N = 1000 + self.arr = np.arange(N) + self.sparse = scipy.sparse.rand(N, N, 0.005) + + def time_from_scipy(self): + pd.DataFrame.sparse.from_spmatrix(self.sparse) + + +class FromCoo: + def setup(self): + self.matrix = scipy.sparse.coo_matrix( + ([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), shape=(100, 100) + ) + + def time_sparse_series_from_coo(self): + pd.Series.sparse.from_coo(self.matrix) + + +class ToCoo: + def setup(self): + s = Series([np.nan] * 10000) + s[0] = 3.0 + s[100] = -1.0 + s[999] = 12.1 + s.index = MultiIndex.from_product([range(10)] * 4) + self.ss = s.astype("Sparse") + + def time_sparse_series_to_coo(self): + self.ss.sparse.to_coo(row_levels=[0, 1], column_levels=[2, 3], sort_labels=True) + + +class Arithmetic: + + params = ([0.1, 0.01], [0, np.nan]) + param_names = ["dense_proportion", "fill_value"] + + def setup(self, dense_proportion, fill_value): + N = 10 ** 6 + arr1 = make_array(N, dense_proportion, fill_value, np.int64) + self.array1 = SparseArray(arr1, fill_value=fill_value) + arr2 = make_array(N, dense_proportion, fill_value, np.int64) + self.array2 = SparseArray(arr2, fill_value=fill_value) + + def time_make_union(self, dense_proportion, fill_value): + self.array1.sp_index.make_union(self.array2.sp_index) + + def time_intersect(self, dense_proportion, fill_value): + self.array1.sp_index.intersect(self.array2.sp_index) + + def time_add(self, dense_proportion, fill_value): + self.array1 + self.array2 + + def time_divide(self, dense_proportion, fill_value): + self.array1 / self.array2 + + +class ArithmeticBlock: + + params = [np.nan, 0] + param_names = ["fill_value"] + + def setup(self, fill_value): + N = 10 ** 6 + self.arr1 = self.make_block_array( + length=N, num_blocks=1000, block_size=10, fill_value=fill_value + ) + self.arr2 = self.make_block_array( + length=N, num_blocks=1000, block_size=10, fill_value=fill_value + ) + + def make_block_array(self, length, num_blocks, block_size, fill_value): + arr = np.full(length, fill_value) + indicies = np.random.choice( + np.arange(0, length, block_size), num_blocks, replace=False + ) + for ind in indicies: + arr[ind : ind + block_size] = np.random.randint(0, 100, block_size) + return SparseArray(arr, fill_value=fill_value) + + def time_make_union(self, fill_value): + self.arr1.sp_index.make_union(self.arr2.sp_index) + + def time_intersect(self, fill_value): + self.arr2.sp_index.intersect(self.arr2.sp_index) + + def time_addition(self, fill_value): + self.arr1 + self.arr2 + + def time_division(self, fill_value): + self.arr1 / self.arr2 + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py new file mode 100644 index 00000000..ec67394e --- /dev/null +++ b/asv_bench/benchmarks/stat_ops.py @@ -0,0 +1,138 @@ +import numpy as np + +import pandas as pd + +ops = ["mean", "sum", "median", "std", "skew", "kurt", "mad", "prod", "sem", "var"] + + +class FrameOps: + + params = [ops, ["float", "int"], [0, 1]] + param_names = ["op", "dtype", "axis"] + + def setup(self, op, dtype, axis): + df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype) + self.df_func = getattr(df, op) + + def time_op(self, op, dtype, axis): + self.df_func(axis=axis) + + +class FrameMultiIndexOps: + + params = ([0, 1, [0, 1]], ops) + param_names = ["level", "op"] + + def setup(self, level, op): + levels = [np.arange(10), np.arange(100), np.arange(100)] + codes = [ + np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10), + ] + index = pd.MultiIndex(levels=levels, codes=codes) + df = pd.DataFrame(np.random.randn(len(index), 4), index=index) + self.df_func = getattr(df, op) + + def time_op(self, level, op): + self.df_func(level=level) + + +class SeriesOps: + + params = [ops, ["float", "int"]] + param_names = ["op", "dtype"] + + def setup(self, op, dtype): + s = pd.Series(np.random.randn(100000)).astype(dtype) + self.s_func = getattr(s, op) + + def time_op(self, op, dtype): + self.s_func() + + +class SeriesMultiIndexOps: + + params = ([0, 1, [0, 1]], ops) + param_names = ["level", "op"] + + def setup(self, level, op): + levels = [np.arange(10), np.arange(100), np.arange(100)] + codes = [ + np.arange(10).repeat(10000), + np.tile(np.arange(100).repeat(100), 10), + np.tile(np.tile(np.arange(100), 100), 10), + ] + index = pd.MultiIndex(levels=levels, codes=codes) + s = pd.Series(np.random.randn(len(index)), index=index) + self.s_func = getattr(s, op) + + def time_op(self, level, op): + self.s_func(level=level) + + +class Rank: + + params = [["DataFrame", "Series"], [True, False]] + param_names = ["constructor", "pct"] + + def setup(self, constructor, pct): + values = np.random.randn(10 ** 5) + self.data = getattr(pd, constructor)(values) + + def time_rank(self, constructor, pct): + self.data.rank(pct=pct) + + def time_average_old(self, constructor, pct): + self.data.rank(pct=pct) / len(self.data) + + +class Correlation: + + params = [["spearman", "kendall", "pearson"]] + param_names = ["method"] + + def setup(self, method): + self.df = pd.DataFrame(np.random.randn(500, 15)) + self.df2 = pd.DataFrame(np.random.randn(500, 15)) + self.df_wide = pd.DataFrame(np.random.randn(500, 100)) + self.df_wide_nans = self.df_wide.where(np.random.random((500, 100)) < 0.9) + self.s = pd.Series(np.random.randn(500)) + self.s2 = pd.Series(np.random.randn(500)) + + def time_corr(self, method): + self.df.corr(method=method) + + def time_corr_wide(self, method): + self.df_wide.corr(method=method) + + def time_corr_wide_nans(self, method): + self.df_wide_nans.corr(method=method) + + def peakmem_corr_wide(self, method): + self.df_wide.corr(method=method) + + def time_corr_series(self, method): + self.s.corr(self.s2, method=method) + + def time_corrwith_cols(self, method): + self.df.corrwith(self.df2, method=method) + + def time_corrwith_rows(self, method): + self.df.corrwith(self.df2, axis=1, method=method) + + +class Covariance: + + params = [] + param_names = [] + + def setup(self): + self.s = pd.Series(np.random.randn(100000)) + self.s2 = pd.Series(np.random.randn(100000)) + + def time_cov_series(self): + self.s.cov(self.s2) + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py new file mode 100644 index 00000000..d7fb2775 --- /dev/null +++ b/asv_bench/benchmarks/strings.py @@ -0,0 +1,186 @@ +import warnings + +import numpy as np + +from pandas import DataFrame, Series + +from .pandas_vb_common import tm + + +class Methods: + def setup(self): + self.s = Series(tm.makeStringIndex(10 ** 5)) + + def time_center(self): + self.s.str.center(100) + + def time_count(self): + self.s.str.count("A") + + def time_endswith(self): + self.s.str.endswith("A") + + def time_extract(self): + with warnings.catch_warnings(record=True): + self.s.str.extract("(\\w*)A(\\w*)") + + def time_findall(self): + self.s.str.findall("[A-Z]+") + + def time_find(self): + self.s.str.find("[A-Z]+") + + def time_rfind(self): + self.s.str.rfind("[A-Z]+") + + def time_get(self): + self.s.str.get(0) + + def time_len(self): + self.s.str.len() + + def time_join(self): + self.s.str.join(" ") + + def time_match(self): + self.s.str.match("A") + + def time_normalize(self): + self.s.str.normalize("NFC") + + def time_pad(self): + self.s.str.pad(100, side="both") + + def time_partition(self): + self.s.str.partition("A") + + def time_rpartition(self): + self.s.str.rpartition("A") + + def time_replace(self): + self.s.str.replace("A", "\x01\x01") + + def time_translate(self): + self.s.str.translate({"A": "\x01\x01"}) + + def time_slice(self): + self.s.str.slice(5, 15, 2) + + def time_startswith(self): + self.s.str.startswith("A") + + def time_strip(self): + self.s.str.strip("A") + + def time_rstrip(self): + self.s.str.rstrip("A") + + def time_lstrip(self): + self.s.str.lstrip("A") + + def time_title(self): + self.s.str.title() + + def time_upper(self): + self.s.str.upper() + + def time_lower(self): + self.s.str.lower() + + def time_wrap(self): + self.s.str.wrap(10) + + def time_zfill(self): + self.s.str.zfill(10) + + +class Repeat: + + params = ["int", "array"] + param_names = ["repeats"] + + def setup(self, repeats): + N = 10 ** 5 + self.s = Series(tm.makeStringIndex(N)) + repeat = {"int": 1, "array": np.random.randint(1, 3, N)} + self.values = repeat[repeats] + + def time_repeat(self, repeats): + self.s.str.repeat(self.values) + + +class Cat: + + params = ([0, 3], [None, ","], [None, "-"], [0.0, 0.001, 0.15]) + param_names = ["other_cols", "sep", "na_rep", "na_frac"] + + def setup(self, other_cols, sep, na_rep, na_frac): + N = 10 ** 5 + mask_gen = lambda: np.random.choice([True, False], N, p=[1 - na_frac, na_frac]) + self.s = Series(tm.makeStringIndex(N)).where(mask_gen()) + if other_cols == 0: + # str.cat self-concatenates only for others=None + self.others = None + else: + self.others = DataFrame( + {i: tm.makeStringIndex(N).where(mask_gen()) for i in range(other_cols)} + ) + + def time_cat(self, other_cols, sep, na_rep, na_frac): + # before the concatenation (one caller + other_cols columns), the total + # expected fraction of rows containing any NaN is: + # reduce(lambda t, _: t + (1 - t) * na_frac, range(other_cols + 1), 0) + # for other_cols=3 and na_frac=0.15, this works out to ~48% + self.s.str.cat(others=self.others, sep=sep, na_rep=na_rep) + + +class Contains: + + params = [True, False] + param_names = ["regex"] + + def setup(self, regex): + self.s = Series(tm.makeStringIndex(10 ** 5)) + + def time_contains(self, regex): + self.s.str.contains("A", regex=regex) + + +class Split: + + params = [True, False] + param_names = ["expand"] + + def setup(self, expand): + self.s = Series(tm.makeStringIndex(10 ** 5)).str.join("--") + + def time_split(self, expand): + self.s.str.split("--", expand=expand) + + def time_rsplit(self, expand): + self.s.str.rsplit("--", expand=expand) + + +class Dummies: + def setup(self): + self.s = Series(tm.makeStringIndex(10 ** 5)).str.join("|") + + def time_get_dummies(self): + self.s.str.get_dummies("|") + + +class Encode: + def setup(self): + self.ser = Series(tm.makeUnicodeIndex()) + + def time_encode_decode(self): + self.ser.str.encode("utf-8").str.decode("utf-8") + + +class Slice: + def setup(self): + self.s = Series(["abcdefg", np.nan] * 500000) + + def time_vector_slice(self): + # GH 2602 + self.s.str[:5] diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py new file mode 100644 index 00000000..37418d75 --- /dev/null +++ b/asv_bench/benchmarks/timedelta.py @@ -0,0 +1,104 @@ +""" +Timedelta benchmarks with non-tslibs dependencies. See +benchmarks.tslibs.timedelta for benchmarks that rely only on tslibs. +""" + +import numpy as np + +from pandas import DataFrame, Series, Timestamp, timedelta_range, to_timedelta + + +class ToTimedelta: + def setup(self): + self.ints = np.random.randint(0, 60, size=10000) + self.str_days = [] + self.str_seconds = [] + for i in self.ints: + self.str_days.append(f"{i} days") + self.str_seconds.append(f"00:00:{i:02d}") + + def time_convert_int(self): + to_timedelta(self.ints, unit="s") + + def time_convert_string_days(self): + to_timedelta(self.str_days) + + def time_convert_string_seconds(self): + to_timedelta(self.str_seconds) + + +class ToTimedeltaErrors: + + params = ["coerce", "ignore"] + param_names = ["errors"] + + def setup(self, errors): + ints = np.random.randint(0, 60, size=10000) + self.arr = [f"{i} days" for i in ints] + self.arr[-1] = "apple" + + def time_convert(self, errors): + to_timedelta(self.arr, errors=errors) + + +class TimedeltaOps: + def setup(self): + self.td = to_timedelta(np.arange(1000000)) + self.ts = Timestamp("2000") + + def time_add_td_ts(self): + self.td + self.ts + + +class DatetimeAccessor: + def setup_cache(self): + N = 100000 + series = Series(timedelta_range("1 days", periods=N, freq="h")) + return series + + def time_dt_accessor(self, series): + series.dt + + def time_timedelta_days(self, series): + series.dt.days + + def time_timedelta_seconds(self, series): + series.dt.seconds + + def time_timedelta_microseconds(self, series): + series.dt.microseconds + + def time_timedelta_nanoseconds(self, series): + series.dt.nanoseconds + + +class TimedeltaIndexing: + def setup(self): + self.index = timedelta_range(start="1985", periods=1000, freq="D") + self.index2 = timedelta_range(start="1986", periods=1000, freq="D") + self.series = Series(range(1000), index=self.index) + self.timedelta = self.index[500] + + def time_get_loc(self): + self.index.get_loc(self.timedelta) + + def time_shape(self): + self.index.shape + + def time_shallow_copy(self): + self.index._shallow_copy() + + def time_series_loc(self): + self.series.loc[self.timedelta] + + def time_align(self): + DataFrame({"a": self.series, "b": self.series[:500]}) + + def time_intersection(self): + self.index.intersection(self.index2) + + def time_union(self): + self.index.union(self.index2) + + def time_unique(self): + self.index.unique() diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py new file mode 100644 index 00000000..ba0b5192 --- /dev/null +++ b/asv_bench/benchmarks/timeseries.py @@ -0,0 +1,431 @@ +from datetime import timedelta + +import dateutil +import numpy as np + +from pandas import DataFrame, Series, date_range, period_range, to_datetime + +from pandas.tseries.frequencies import infer_freq + +try: + from pandas.plotting._matplotlib.converter import DatetimeConverter +except ImportError: + from pandas.tseries.converter import DatetimeConverter + + +class DatetimeIndex: + + params = ["dst", "repeated", "tz_aware", "tz_local", "tz_naive"] + param_names = ["index_type"] + + def setup(self, index_type): + N = 100000 + dtidxes = { + "dst": date_range( + start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="S" + ), + "repeated": date_range(start="2000", periods=N / 10, freq="s").repeat(10), + "tz_aware": date_range(start="2000", periods=N, freq="s", tz="US/Eastern"), + "tz_local": date_range( + start="2000", periods=N, freq="s", tz=dateutil.tz.tzlocal() + ), + "tz_naive": date_range(start="2000", periods=N, freq="s"), + } + self.index = dtidxes[index_type] + + def time_add_timedelta(self, index_type): + self.index + timedelta(minutes=2) + + def time_normalize(self, index_type): + self.index.normalize() + + def time_unique(self, index_type): + self.index.unique() + + def time_to_time(self, index_type): + self.index.time + + def time_get(self, index_type): + self.index[0] + + def time_timeseries_is_month_start(self, index_type): + self.index.is_month_start + + def time_to_date(self, index_type): + self.index.date + + def time_to_pydatetime(self, index_type): + self.index.to_pydatetime() + + +class TzLocalize: + + params = [None, "US/Eastern", "UTC", dateutil.tz.tzutc()] + param_names = "tz" + + def setup(self, tz): + dst_rng = date_range( + start="10/29/2000 1:00:00", end="10/29/2000 1:59:59", freq="S" + ) + self.index = date_range(start="10/29/2000", end="10/29/2000 00:59:59", freq="S") + self.index = self.index.append(dst_rng) + self.index = self.index.append(dst_rng) + self.index = self.index.append( + date_range(start="10/29/2000 2:00:00", end="10/29/2000 3:00:00", freq="S") + ) + + def time_infer_dst(self, tz): + self.index.tz_localize(tz, ambiguous="infer") + + +class ResetIndex: + + params = [None, "US/Eastern"] + param_names = "tz" + + def setup(self, tz): + idx = date_range(start="1/1/2000", periods=1000, freq="H", tz=tz) + self.df = DataFrame(np.random.randn(1000, 2), index=idx) + + def time_reest_datetimeindex(self, tz): + self.df.reset_index() + + +class Factorize: + + params = [None, "Asia/Tokyo"] + param_names = "tz" + + def setup(self, tz): + N = 100000 + self.dti = date_range("2011-01-01", freq="H", periods=N, tz=tz) + self.dti = self.dti.repeat(5) + + def time_factorize(self, tz): + self.dti.factorize() + + +class InferFreq: + + params = [None, "D", "B"] + param_names = ["freq"] + + def setup(self, freq): + if freq is None: + self.idx = date_range(start="1/1/1700", freq="D", periods=10000) + self.idx._data._freq = None + else: + self.idx = date_range(start="1/1/1700", freq=freq, periods=10000) + + def time_infer_freq(self, freq): + infer_freq(self.idx) + + +class TimeDatetimeConverter: + def setup(self): + N = 100000 + self.rng = date_range(start="1/1/2000", periods=N, freq="T") + + def time_convert(self): + DatetimeConverter.convert(self.rng, None, None) + + +class Iteration: + + params = [date_range, period_range] + param_names = ["time_index"] + + def setup(self, time_index): + N = 10 ** 6 + self.idx = time_index(start="20140101", freq="T", periods=N) + self.exit = 10000 + + def time_iter(self, time_index): + for _ in self.idx: + pass + + def time_iter_preexit(self, time_index): + for i, _ in enumerate(self.idx): + if i > self.exit: + break + + +class ResampleDataFrame: + + params = ["max", "mean", "min"] + param_names = ["method"] + + def setup(self, method): + rng = date_range(start="20130101", periods=100000, freq="50L") + df = DataFrame(np.random.randn(100000, 2), index=rng) + self.resample = getattr(df.resample("1s"), method) + + def time_method(self, method): + self.resample() + + +class ResampleSeries: + + params = (["period", "datetime"], ["5min", "1D"], ["mean", "ohlc"]) + param_names = ["index", "freq", "method"] + + def setup(self, index, freq, method): + indexes = { + "period": period_range(start="1/1/2000", end="1/1/2001", freq="T"), + "datetime": date_range(start="1/1/2000", end="1/1/2001", freq="T"), + } + idx = indexes[index] + ts = Series(np.random.randn(len(idx)), index=idx) + self.resample = getattr(ts.resample(freq), method) + + def time_resample(self, index, freq, method): + self.resample() + + +class ResampleDatetetime64: + # GH 7754 + def setup(self): + rng3 = date_range( + start="2000-01-01 00:00:00", end="2000-01-01 10:00:00", freq="555000U" + ) + self.dt_ts = Series(5, rng3, dtype="datetime64[ns]") + + def time_resample(self): + self.dt_ts.resample("1S").last() + + +class AsOf: + + params = ["DataFrame", "Series"] + param_names = ["constructor"] + + def setup(self, constructor): + N = 10000 + M = 10 + rng = date_range(start="1/1/1990", periods=N, freq="53s") + data = { + "DataFrame": DataFrame(np.random.randn(N, M)), + "Series": Series(np.random.randn(N)), + } + self.ts = data[constructor] + self.ts.index = rng + self.ts2 = self.ts.copy() + self.ts2.iloc[250:5000] = np.nan + self.ts3 = self.ts.copy() + self.ts3.iloc[-5000:] = np.nan + self.dates = date_range(start="1/1/1990", periods=N * 10, freq="5s") + self.date = self.dates[0] + self.date_last = self.dates[-1] + self.date_early = self.date - timedelta(10) + + # test speed of pre-computing NAs. + def time_asof(self, constructor): + self.ts.asof(self.dates) + + # should be roughly the same as above. + def time_asof_nan(self, constructor): + self.ts2.asof(self.dates) + + # test speed of the code path for a scalar index + # without *while* loop + def time_asof_single(self, constructor): + self.ts.asof(self.date) + + # test speed of the code path for a scalar index + # before the start. should be the same as above. + def time_asof_single_early(self, constructor): + self.ts.asof(self.date_early) + + # test the speed of the code path for a scalar index + # with a long *while* loop. should still be much + # faster than pre-computing all the NAs. + def time_asof_nan_single(self, constructor): + self.ts3.asof(self.date_last) + + +class SortIndex: + + params = [True, False] + param_names = ["monotonic"] + + def setup(self, monotonic): + N = 10 ** 5 + idx = date_range(start="1/1/2000", periods=N, freq="s") + self.s = Series(np.random.randn(N), index=idx) + if not monotonic: + self.s = self.s.sample(frac=1) + + def time_sort_index(self, monotonic): + self.s.sort_index() + + def time_get_slice(self, monotonic): + self.s[:10000] + + +class IrregularOps: + def setup(self): + N = 10 ** 5 + idx = date_range(start="1/1/2000", periods=N, freq="s") + s = Series(np.random.randn(N), index=idx) + self.left = s.sample(frac=1) + self.right = s.sample(frac=1) + + def time_add(self): + self.left + self.right + + +class Lookup: + def setup(self): + N = 1500000 + rng = date_range(start="1/1/2000", periods=N, freq="S") + self.ts = Series(1, index=rng) + self.lookup_val = rng[N // 2] + + def time_lookup_and_cleanup(self): + self.ts[self.lookup_val] + self.ts.index._cleanup() + + +class ToDatetimeYYYYMMDD: + def setup(self): + rng = date_range(start="1/1/2000", periods=10000, freq="D") + self.stringsD = Series(rng.strftime("%Y%m%d")) + + def time_format_YYYYMMDD(self): + to_datetime(self.stringsD, format="%Y%m%d") + + +class ToDatetimeCacheSmallCount: + + params = ([True, False], [50, 500, 5000, 100000]) + param_names = ["cache", "count"] + + def setup(self, cache, count): + rng = date_range(start="1/1/1971", periods=count) + self.unique_date_strings = rng.strftime("%Y-%m-%d").tolist() + + def time_unique_date_strings(self, cache, count): + to_datetime(self.unique_date_strings, cache=cache) + + +class ToDatetimeISO8601: + def setup(self): + rng = date_range(start="1/1/2000", periods=20000, freq="H") + self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist() + self.strings_nosep = rng.strftime("%Y%m%d %H:%M:%S").tolist() + self.strings_tz_space = [ + x.strftime("%Y-%m-%d %H:%M:%S") + " -0800" for x in rng + ] + + def time_iso8601(self): + to_datetime(self.strings) + + def time_iso8601_nosep(self): + to_datetime(self.strings_nosep) + + def time_iso8601_format(self): + to_datetime(self.strings, format="%Y-%m-%d %H:%M:%S") + + def time_iso8601_format_no_sep(self): + to_datetime(self.strings_nosep, format="%Y%m%d %H:%M:%S") + + def time_iso8601_tz_spaceformat(self): + to_datetime(self.strings_tz_space) + + +class ToDatetimeNONISO8601: + def setup(self): + N = 10000 + half = int(N / 2) + ts_string_1 = "March 1, 2018 12:00:00+0400" + ts_string_2 = "March 1, 2018 12:00:00+0500" + self.same_offset = [ts_string_1] * N + self.diff_offset = [ts_string_1] * half + [ts_string_2] * half + + def time_same_offset(self): + to_datetime(self.same_offset) + + def time_different_offset(self): + to_datetime(self.diff_offset) + + +class ToDatetimeFormatQuarters: + def setup(self): + self.s = Series(["2Q2005", "2Q05", "2005Q1", "05Q1"] * 10000) + + def time_infer_quarter(self): + to_datetime(self.s) + + +class ToDatetimeFormat: + def setup(self): + self.s = Series(["19MAY11", "19MAY11:00:00:00"] * 100000) + self.s2 = self.s.str.replace(":\\S+$", "") + + def time_exact(self): + to_datetime(self.s2, format="%d%b%y") + + def time_no_exact(self): + to_datetime(self.s, format="%d%b%y", exact=False) + + +class ToDatetimeCache: + + params = [True, False] + param_names = ["cache"] + + def setup(self, cache): + N = 10000 + self.unique_numeric_seconds = list(range(N)) + self.dup_numeric_seconds = [1000] * N + self.dup_string_dates = ["2000-02-11"] * N + self.dup_string_with_tz = ["2000-02-11 15:00:00-0800"] * N + + def time_unique_seconds_and_unit(self, cache): + to_datetime(self.unique_numeric_seconds, unit="s", cache=cache) + + def time_dup_seconds_and_unit(self, cache): + to_datetime(self.dup_numeric_seconds, unit="s", cache=cache) + + def time_dup_string_dates(self, cache): + to_datetime(self.dup_string_dates, cache=cache) + + def time_dup_string_dates_and_format(self, cache): + to_datetime(self.dup_string_dates, format="%Y-%m-%d", cache=cache) + + def time_dup_string_tzoffset_dates(self, cache): + to_datetime(self.dup_string_with_tz, cache=cache) + + +class DatetimeAccessor: + + params = [None, "US/Eastern", "UTC", dateutil.tz.tzutc()] + param_names = "tz" + + def setup(self, tz): + N = 100000 + self.series = Series(date_range(start="1/1/2000", periods=N, freq="T", tz=tz)) + + def time_dt_accessor(self, tz): + self.series.dt + + def time_dt_accessor_normalize(self, tz): + self.series.dt.normalize() + + def time_dt_accessor_month_name(self, tz): + self.series.dt.month_name() + + def time_dt_accessor_day_name(self, tz): + self.series.dt.day_name() + + def time_dt_accessor_time(self, tz): + self.series.dt.time + + def time_dt_accessor_date(self, tz): + self.series.dt.date + + def time_dt_accessor_year(self, tz): + self.series.dt.year + + +from .pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/tslibs/__init__.py b/asv_bench/benchmarks/tslibs/__init__.py new file mode 100644 index 00000000..815cf55b --- /dev/null +++ b/asv_bench/benchmarks/tslibs/__init__.py @@ -0,0 +1,7 @@ +""" +Benchmarks in this directory should depend only on tslibs, tseries.offsets, +and to_offset. + +i.e. any code changes that do not touch those files should not need to +run these benchmarks. +""" diff --git a/asv_bench/benchmarks/tslibs/offsets.py b/asv_bench/benchmarks/tslibs/offsets.py new file mode 100644 index 00000000..fc1efe63 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/offsets.py @@ -0,0 +1,90 @@ +""" +offsets benchmarks that rely only on tslibs. See benchmarks.offset for +offsets benchmarks that rely on other parts of pandas. +""" +from datetime import datetime + +import numpy as np + +from pandas import offsets + +try: + import pandas.tseries.holiday # noqa +except ImportError: + pass + +hcal = pandas.tseries.holiday.USFederalHolidayCalendar() +# These offsets currently raise a NotImplimentedError with .apply_index() +non_apply = [ + offsets.Day(), + offsets.BYearEnd(), + offsets.BYearBegin(), + offsets.BQuarterEnd(), + offsets.BQuarterBegin(), + offsets.BMonthEnd(), + offsets.BMonthBegin(), + offsets.CustomBusinessDay(), + offsets.CustomBusinessDay(calendar=hcal), + offsets.CustomBusinessMonthBegin(calendar=hcal), + offsets.CustomBusinessMonthEnd(calendar=hcal), + offsets.CustomBusinessMonthEnd(calendar=hcal), +] +other_offsets = [ + offsets.YearEnd(), + offsets.YearBegin(), + offsets.QuarterEnd(), + offsets.QuarterBegin(), + offsets.MonthEnd(), + offsets.MonthBegin(), + offsets.DateOffset(months=2, days=2), + offsets.BusinessDay(), + offsets.SemiMonthEnd(), + offsets.SemiMonthBegin(), +] +offset_objs = non_apply + other_offsets + + +class OnOffset: + + params = offset_objs + param_names = ["offset"] + + def setup(self, offset): + self.dates = [ + datetime(2016, m, d) + for m in [10, 11, 12] + for d in [1, 2, 3, 28, 29, 30, 31] + if not (m == 11 and d == 31) + ] + + def time_on_offset(self, offset): + for date in self.dates: + offset.is_on_offset(date) + + +class OffestDatetimeArithmetic: + + params = offset_objs + param_names = ["offset"] + + def setup(self, offset): + self.date = datetime(2011, 1, 1) + self.dt64 = np.datetime64("2011-01-01 09:00Z") + + def time_apply(self, offset): + offset.apply(self.date) + + def time_apply_np_dt64(self, offset): + offset.apply(self.dt64) + + def time_add(self, offset): + self.date + offset + + def time_add_10(self, offset): + self.date + (10 * offset) + + def time_subtract(self, offset): + self.date - offset + + def time_subtract_10(self, offset): + self.date - (10 * offset) diff --git a/asv_bench/benchmarks/tslibs/period.py b/asv_bench/benchmarks/tslibs/period.py new file mode 100644 index 00000000..9156c4aa --- /dev/null +++ b/asv_bench/benchmarks/tslibs/period.py @@ -0,0 +1,70 @@ +""" +Period benchmarks that rely only on tslibs. See benchmarks.period for +Period benchmarks that rely on other parts fo pandas. +""" +from pandas import Period + +from pandas.tseries.frequencies import to_offset + + +class PeriodProperties: + + params = ( + ["M", "min"], + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "is_leap_year", + "quarter", + "qyear", + "week", + "daysinmonth", + "dayofweek", + "dayofyear", + "start_time", + "end_time", + ], + ) + param_names = ["freq", "attr"] + + def setup(self, freq, attr): + self.per = Period("2012-06-01", freq=freq) + + def time_property(self, freq, attr): + getattr(self.per, attr) + + +class PeriodUnaryMethods: + + params = ["M", "min"] + param_names = ["freq"] + + def setup(self, freq): + self.per = Period("2012-06-01", freq=freq) + + def time_to_timestamp(self, freq): + self.per.to_timestamp() + + def time_now(self, freq): + self.per.now(freq) + + def time_asfreq(self, freq): + self.per.asfreq("A") + + +class PeriodConstructor: + params = [["D"], [True, False]] + param_names = ["freq", "is_offset"] + + def setup(self, freq, is_offset): + if is_offset: + self.freq = to_offset(freq) + else: + self.freq = freq + + def time_period_constructor(self, freq, is_offset): + Period("2012-06-01", freq=freq) diff --git a/asv_bench/benchmarks/tslibs/timedelta.py b/asv_bench/benchmarks/tslibs/timedelta.py new file mode 100644 index 00000000..8a16ddc1 --- /dev/null +++ b/asv_bench/benchmarks/tslibs/timedelta.py @@ -0,0 +1,61 @@ +""" +Timedelta benchmarks that rely only on tslibs. See benchmarks.timedeltas for +Timedelta benchmarks that rely on other parts fo pandas. +""" +import datetime + +import numpy as np + +from pandas import Timedelta + + +class TimedeltaConstructor: + def time_from_int(self): + Timedelta(123456789) + + def time_from_unit(self): + Timedelta(1, unit="d") + + def time_from_components(self): + Timedelta( + days=1, + hours=2, + minutes=3, + seconds=4, + milliseconds=5, + microseconds=6, + nanoseconds=7, + ) + + def time_from_datetime_timedelta(self): + Timedelta(datetime.timedelta(days=1, seconds=1)) + + def time_from_np_timedelta(self): + Timedelta(np.timedelta64(1, "ms")) + + def time_from_string(self): + Timedelta("1 days") + + def time_from_iso_format(self): + Timedelta("P4DT12H30M5S") + + def time_from_missing(self): + Timedelta("nat") + + +class TimedeltaProperties: + def setup_cache(self): + td = Timedelta(days=365, minutes=35, seconds=25, milliseconds=35) + return td + + def time_timedelta_days(self, td): + td.days + + def time_timedelta_seconds(self, td): + td.seconds + + def time_timedelta_microseconds(self, td): + td.microseconds + + def time_timedelta_nanoseconds(self, td): + td.nanoseconds diff --git a/asv_bench/benchmarks/tslibs/timestamp.py b/asv_bench/benchmarks/tslibs/timestamp.py new file mode 100644 index 00000000..8ebb2d8d --- /dev/null +++ b/asv_bench/benchmarks/tslibs/timestamp.py @@ -0,0 +1,137 @@ +import datetime + +import dateutil +import pytz + +from pandas import Timestamp + + +class TimestampConstruction: + def time_parse_iso8601_no_tz(self): + Timestamp("2017-08-25 08:16:14") + + def time_parse_iso8601_tz(self): + Timestamp("2017-08-25 08:16:14-0500") + + def time_parse_dateutil(self): + Timestamp("2017/08/25 08:16:14 AM") + + def time_parse_today(self): + Timestamp("today") + + def time_parse_now(self): + Timestamp("now") + + def time_fromordinal(self): + Timestamp.fromordinal(730120) + + def time_fromtimestamp(self): + Timestamp.fromtimestamp(1515448538) + + +class TimestampProperties: + _tzs = [None, pytz.timezone("Europe/Amsterdam"), pytz.UTC, dateutil.tz.tzutc()] + _freqs = [None, "B"] + params = [_tzs, _freqs] + param_names = ["tz", "freq"] + + def setup(self, tz, freq): + self.ts = Timestamp("2017-08-25 08:16:14", tzinfo=tz, freq=freq) + + def time_tz(self, tz, freq): + self.ts.tz + + def time_dayofweek(self, tz, freq): + self.ts.dayofweek + + def time_weekday_name(self, tz, freq): + self.ts.day_name + + def time_dayofyear(self, tz, freq): + self.ts.dayofyear + + def time_week(self, tz, freq): + self.ts.week + + def time_quarter(self, tz, freq): + self.ts.quarter + + def time_days_in_month(self, tz, freq): + self.ts.days_in_month + + def time_freqstr(self, tz, freq): + self.ts.freqstr + + def time_is_month_start(self, tz, freq): + self.ts.is_month_start + + def time_is_month_end(self, tz, freq): + self.ts.is_month_end + + def time_is_quarter_start(self, tz, freq): + self.ts.is_quarter_start + + def time_is_quarter_end(self, tz, freq): + self.ts.is_quarter_end + + def time_is_year_start(self, tz, freq): + self.ts.is_year_start + + def time_is_year_end(self, tz, freq): + self.ts.is_year_end + + def time_is_leap_year(self, tz, freq): + self.ts.is_leap_year + + def time_microsecond(self, tz, freq): + self.ts.microsecond + + def time_month_name(self, tz, freq): + self.ts.month_name() + + +class TimestampOps: + params = [None, "US/Eastern", pytz.UTC, dateutil.tz.tzutc()] + param_names = ["tz"] + + def setup(self, tz): + self.ts = Timestamp("2017-08-25 08:16:14", tz=tz) + + def time_replace_tz(self, tz): + self.ts.replace(tzinfo=pytz.timezone("US/Eastern")) + + def time_replace_None(self, tz): + self.ts.replace(tzinfo=None) + + def time_to_pydatetime(self, tz): + self.ts.to_pydatetime() + + def time_normalize(self, tz): + self.ts.normalize() + + def time_tz_convert(self, tz): + if self.ts.tz is not None: + self.ts.tz_convert(tz) + + def time_tz_localize(self, tz): + if self.ts.tz is None: + self.ts.tz_localize(tz) + + def time_to_julian_date(self, tz): + self.ts.to_julian_date() + + def time_floor(self, tz): + self.ts.floor("5T") + + def time_ceil(self, tz): + self.ts.ceil("5T") + + +class TimestampAcrossDst: + def setup(self): + dt = datetime.datetime(2016, 3, 27, 1) + self.tzinfo = pytz.timezone("CET").localize(dt, is_dst=False).tzinfo + self.ts2 = Timestamp(dt) + + def time_replace_across_dst(self): + self.ts2.replace(tzinfo=self.tzinfo) diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 00000000..d992c640 --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,92 @@ +# Adapted from https://github.com/numba/numba/blob/master/azure-pipelines.yml +jobs: +# Mac and Linux use the same template +- template: ci/azure/posix.yml + parameters: + name: macOS + vmImage: macOS-10.14 + +- template: ci/azure/posix.yml + parameters: + name: Linux + vmImage: ubuntu-16.04 + +- template: ci/azure/windows.yml + parameters: + name: Windows + vmImage: vs2017-win2016 + +- job: 'Web_and_Docs' + pool: + vmImage: ubuntu-16.04 + timeoutInMinutes: 90 + steps: + - script: | + echo '##vso[task.setvariable variable=ENV_FILE]environment.yml' + echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' + displayName: 'Setting environment variables' + + - script: | + sudo apt-get install -y libc6-dev-i386 + ci/setup_env.sh + displayName: 'Setup environment and build pandas' + + - script: | + source activate pandas-dev + python web/pandas_web.py web/pandas --target-path=web/build + displayName: 'Build website' + + - script: | + source activate pandas-dev + # Next we should simply have `doc/make.py --warnings-are-errors`, everything else is required because the ipython directive doesn't fail the build on errors (https://github.com/ipython/ipython/issues/11547) + doc/make.py --warnings-are-errors | tee sphinx.log ; SPHINX_RET=${PIPESTATUS[0]} + grep -B1 "^<<<-------------------------------------------------------------------------$" sphinx.log ; IPY_RET=$(( $? != 1 )) + exit $(( $SPHINX_RET + $IPY_RET )) + displayName: 'Build documentation' + + - script: | + mkdir -p to_deploy/docs + cp -r web/build/* to_deploy/ + cp -r doc/build/html/* to_deploy/docs/ + displayName: 'Merge website and docs' + + - script: | + cd to_deploy + git init + touch .nojekyll + echo "dev.pandas.io" > CNAME + printf "User-agent: *\nDisallow: /" > robots.txt + git add --all . + git config user.email "pandas-dev@python.org" + git config user.name "pandas-bot" + git commit -m "pandas web and documentation in master" + displayName: 'Create git repo for docs build' + condition : | + and(not(eq(variables['Build.Reason'], 'PullRequest')), + eq(variables['Build.SourceBranch'], 'refs/heads/master')) + + # For `InstallSSHKey@0` to work, next steps are required: + # 1. Generate a pair of private/public keys (i.e. `ssh-keygen -t rsa -b 4096 -C "your_email@example.com"`) + # 2. Go to "Library > Secure files" in the Azure Pipelines dashboard: https://dev.azure.com/pandas-dev/pandas/_library?itemType=SecureFiles + # 3. Click on "+ Secure file" + # 4. Upload the private key (the name of the file must match with the specified in "sshKeySecureFile" input below, "pandas_docs_key") + # 5. Click on file name after it is created, tick the box "Authorize for use in all pipelines" and save + # 6. The public key specified in "sshPublicKey" is the pair of the uploaded private key, and needs to be set as a deploy key of the repo where the docs will be pushed (with write access): https://github.com/pandas-dev/pandas-dev.github.io/settings/keys + - task: InstallSSHKey@0 + inputs: + hostName: 'github.com,192.30.252.128 ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==' + sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDHmz3l/EdqrgNxEUKkwDUuUcLv91unig03pYFGO/DMIgCmPdMG96zAgfnESd837Rm0wSSqylwSzkRJt5MV/TpFlcVifDLDQmUhqCeO8Z6dLl/oe35UKmyYICVwcvQTAaHNnYRpKC5IUlTh0JEtw9fGlnp1Ta7U1ENBLbKdpywczElhZu+hOQ892zqOj3CwA+U2329/d6cd7YnqIKoFN9DWT3kS5K6JE4IoBfQEVekIOs23bKjNLvPoOmi6CroAhu/K8j+NCWQjge5eJf2x/yTnIIP1PlEcXoHIr8io517posIx3TBup+CN8bNS1PpDW3jyD3ttl1uoBudjOQrobNnJeR6Rn67DRkG6IhSwr3BWj8alwUG5mTdZzwV5Pa9KZFdIiqX7NoDGg+itsR39QCn0thK8lGRNSR8KrWC1PSjecwelKBO7uQ7rnk/rkrZdBWR4oEA8YgNH8tirUw5WfOr5a0AIaJicKxGKNdMxZt+zmC+bS7F4YCOGIm9KHa43RrKhoGRhRf9fHHHKUPwFGqtWG4ykcUgoamDOURJyepesBAO3FiRE9rLU6ILbB3yEqqoekborHmAJD5vf7PWItW3Q/YQKuk3kkqRcKnexPyzyyq5lUgTi8CxxZdaASIOu294wjBhhdyHlXEkVTNJ9JKkj/obF+XiIIp0cBDsOXY9hDQ== pandas-dev@python.org' + sshKeySecureFile: 'pandas_docs_key' + displayName: 'Install GitHub ssh deployment key' + condition : | + and(not(eq(variables['Build.Reason'], 'PullRequest')), + eq(variables['Build.SourceBranch'], 'refs/heads/master')) + + - script: | + cd to_deploy + git remote add origin git@github.com:pandas-dev/pandas-dev.github.io.git + git push -f origin master + displayName: 'Publish web and docs to GitHub pages' + condition : | + and(not(eq(variables['Build.Reason'], 'PullRequest')), + eq(variables['Build.SourceBranch'], 'refs/heads/master')) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml new file mode 100644 index 00000000..437cc9b1 --- /dev/null +++ b/ci/azure/posix.yml @@ -0,0 +1,96 @@ +parameters: + name: '' + vmImage: '' + +jobs: +- job: ${{ parameters.name }} + pool: + vmImage: ${{ parameters.vmImage }} + strategy: + matrix: + ${{ if eq(parameters.name, 'macOS') }}: + py36_macos: + ENV_FILE: ci/deps/azure-macos-36.yaml + CONDA_PY: "36" + PATTERN: "not slow and not network" + + ${{ if eq(parameters.name, 'Linux') }}: + py36_minimum_versions: + ENV_FILE: ci/deps/azure-36-minimum_versions.yaml + CONDA_PY: "36" + PATTERN: "not slow and not network and not clipboard" + + py36_locale_slow_old_np: + ENV_FILE: ci/deps/azure-36-locale_slow.yaml + CONDA_PY: "36" + PATTERN: "slow" + # pandas does not use the language (zh_CN), but should support diferent encodings (utf8) + # we should test with encodings different than utf8, but doesn't seem like Ubuntu supports any + LANG: "zh_CN.utf8" + LC_ALL: "zh_CN.utf8" + EXTRA_APT: "language-pack-zh-hans" + + py36_locale: + ENV_FILE: ci/deps/azure-36-locale.yaml + CONDA_PY: "36" + PATTERN: "not slow and not network" + LANG: "it_IT.utf8" + LC_ALL: "it_IT.utf8" + EXTRA_APT: "language-pack-it xsel" + + #py36_32bit: + # ENV_FILE: ci/deps/azure-36-32bit.yaml + # CONDA_PY: "36" + # PATTERN: "not slow and not network and not clipboard" + # BITS32: "yes" + + py37_locale: + ENV_FILE: ci/deps/azure-37-locale.yaml + CONDA_PY: "37" + PATTERN: "not slow and not network" + LANG: "zh_CN.utf8" + LC_ALL: "zh_CN.utf8" + EXTRA_APT: "language-pack-zh-hans xsel" + + py37_np_dev: + ENV_FILE: ci/deps/azure-37-numpydev.yaml + CONDA_PY: "37" + PATTERN: "not slow and not network" + TEST_ARGS: "-W error" + PANDAS_TESTING_MODE: "deprecate" + EXTRA_APT: "xsel" + + steps: + - script: | + if [ "$(uname)" == "Linux" ]; then + sudo apt-get update + sudo apt-get install -y libc6-dev-i386 $EXTRA_APT + fi + displayName: 'Install extra packages' + + - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' + displayName: 'Set conda path' + + - script: ci/setup_env.sh + displayName: 'Setup environment and build pandas' + + - script: | + source activate pandas-dev + ci/run_tests.sh + displayName: 'Test' + + - script: source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd + displayName: 'Build versions' + + - task: PublishTestResults@2 + condition: succeededOrFailed() + inputs: + failTaskOnFailedTests: true + testResultsFiles: 'test-data.xml' + testRunTitle: ${{ format('{0}-$(CONDA_PY)', parameters.name) }} + displayName: 'Publish test results' + + - script: | + source activate pandas-dev + python ci/print_skipped.py + displayName: 'Print skipped tests' diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml new file mode 100644 index 00000000..187a5db9 --- /dev/null +++ b/ci/azure/windows.yml @@ -0,0 +1,57 @@ +parameters: + name: '' + vmImage: '' + +jobs: +- job: ${{ parameters.name }} + pool: + vmImage: ${{ parameters.vmImage }} + strategy: + matrix: + py36_np15: + ENV_FILE: ci/deps/azure-windows-36.yaml + CONDA_PY: "36" + PATTERN: "not slow and not network" + + py37_np141: + ENV_FILE: ci/deps/azure-windows-37.yaml + CONDA_PY: "37" + PATTERN: "not slow and not network" + + steps: + - powershell: | + Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" + Write-Host "##vso[task.prependpath]$HOME/miniconda3/bin" + displayName: 'Add conda to PATH' + + - script: conda update -q -n base conda + displayName: 'Update conda' + + - bash: | + conda env create -q --file ci\\deps\\azure-windows-$(CONDA_PY).yaml + displayName: 'Create anaconda environment' + + - bash: | + source activate pandas-dev + conda list + python setup.py build_ext -q -i -j 4 + python -m pip install --no-build-isolation -e . + displayName: 'Build' + + - bash: | + source activate pandas-dev + ci/run_tests.sh + displayName: 'Test' + + - task: PublishTestResults@2 + condition: succeededOrFailed() + inputs: + failTaskOnFailedTests: true + testResultsFiles: 'test-data.xml' + testRunTitle: ${{ format('{0}-$(CONDA_PY)', parameters.name) }} + displayName: 'Publish test results' + + - bash: | + source activate pandas-dev + python ci/print_skipped.py + displayName: 'Print skipped tests' diff --git a/ci/check_cache.sh b/ci/check_cache.sh new file mode 100755 index 00000000..b83144fc --- /dev/null +++ b/ci/check_cache.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# currently not used +# script to make sure that cache is clean +# Travis CI now handles this + +if [ "$TRAVIS_PULL_REQUEST" == "false" ] +then + echo "Not a PR: checking for changes in ci/ from last 2 commits" + git diff HEAD~2 --numstat | grep -E "ci/" + ci_changes=$(git diff HEAD~2 --numstat | grep -E "ci/"| wc -l) +else + echo "PR: checking for changes in ci/ from last 2 commits" + git fetch origin pull/${TRAVIS_PULL_REQUEST}/head:PR_HEAD + git diff PR_HEAD~2 --numstat | grep -E "ci/" + ci_changes=$(git diff PR_HEAD~2 --numstat | grep -E "ci/"| wc -l) +fi + +CACHE_DIR="$HOME/.cache/" +CCACHE_DIR="$HOME/.ccache/" + +if [ $ci_changes -ne 0 ] +then + echo "Files have changed in ci/ deleting all caches" + rm -rf "$CACHE_DIR" + rm -rf "$CCACHE_DIR" +fi diff --git a/ci/check_git_tags.sh b/ci/check_git_tags.sh new file mode 100755 index 00000000..9dbcd4f9 --- /dev/null +++ b/ci/check_git_tags.sh @@ -0,0 +1,28 @@ +set -e + +if [[ ! $(git tag) ]]; then + echo "No git tags in clone, please sync your git tags with upstream using:" + echo " git fetch --tags upstream" + echo " git push --tags origin" + echo "" + echo "If the issue persists, the clone depth needs to be increased in .travis.yml" + exit 1 +fi + +# This will error if there are no tags and we omit --always +DESCRIPTION=$(git describe --long --tags) +echo "$DESCRIPTION" + +if [[ "$DESCRIPTION" == *"untagged"* ]]; then + echo "Unable to determine most recent tag, aborting build" + exit 1 +else + if [[ "$DESCRIPTION" != *"g"* ]]; then + # A good description will have the hash prefixed by g, a bad one will be + # just the hash + echo "Unable to determine most recent tag, aborting build" + exit 1 + else + echo "$(git tag)" + fi +fi diff --git a/ci/code_checks.sh b/ci/code_checks.sh new file mode 100755 index 00000000..30d3a3ff --- /dev/null +++ b/ci/code_checks.sh @@ -0,0 +1,342 @@ +#!/bin/bash +# +# Run checks related to code quality. +# +# This script is intended for both the CI and to check locally that code standards are +# respected. We are currently linting (PEP-8 and similar), looking for patterns of +# common mistakes (sphinx directives with missing blank lines, old style classes, +# unwanted imports...), we run doctests here (currently some files only), and we +# validate formatting error in docstrings. +# +# Usage: +# $ ./ci/code_checks.sh # run all checks +# $ ./ci/code_checks.sh lint # run linting only +# $ ./ci/code_checks.sh patterns # check for patterns that should not exist +# $ ./ci/code_checks.sh code # checks on imported code +# $ ./ci/code_checks.sh doctests # run doctests +# $ ./ci/code_checks.sh docstrings # validate docstring errors +# $ ./ci/code_checks.sh dependencies # check that dependencies are consistent +# $ ./ci/code_checks.sh typing # run static type analysis + +[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "dependencies" || "$1" == "typing" ]] || \ + { echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|dependencies|typing]"; exit 9999; } + +BASE_DIR="$(dirname $0)/.." +RET=0 +CHECK=$1 + +function invgrep { + # grep with inverse exist status and formatting for azure-pipelines + # + # This function works exactly as grep, but with opposite exit status: + # - 0 (success) when no patterns are found + # - 1 (fail) when the patterns are found + # + # This is useful for the CI, as we want to fail if one of the patterns + # that we want to avoid is found by grep. + grep -n "$@" | sed "s/^/$INVGREP_PREPEND/" | sed "s/$/$INVGREP_APPEND/" ; EXIT_STATUS=${PIPESTATUS[0]} + return $((! $EXIT_STATUS)) +} + +if [[ "$GITHUB_ACTIONS" == "true" ]]; then + FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code)s:%(text)s" + INVGREP_PREPEND="##[error]" +else + FLAKE8_FORMAT="default" +fi + +### LINTING ### +if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then + + echo "black --version" + black --version + + MSG='Checking black formatting' ; echo $MSG + black . --check + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # `setup.cfg` contains the list of error codes that are being ignored in flake8 + + echo "flake8 --version" + flake8 --version + + # pandas/_libs/src is C code, so no need to search there. + MSG='Linting .py code' ; echo $MSG + flake8 --format="$FLAKE8_FORMAT" . + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Linting .pyx code' ; echo $MSG + flake8 --format="$FLAKE8_FORMAT" pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Linting .pxd and .pxi.in' ; echo $MSG + flake8 --format="$FLAKE8_FORMAT" pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 + RET=$(($RET + $?)) ; echo $MSG "DONE" + + echo "flake8-rst --version" + flake8-rst --version + + MSG='Linting code-blocks in .rst documentation' ; echo $MSG + flake8-rst doc/source --filename=*.rst --format="$FLAKE8_FORMAT" + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # Check that cython casting is of the form `obj` as opposed to ` obj`; + # it doesn't make a difference, but we want to be internally consistent. + # Note: this grep pattern is (intended to be) equivalent to the python + # regex r'(?])> ' + MSG='Linting .pyx code for spacing conventions in casting' ; echo $MSG + invgrep -r -E --include '*.pyx' --include '*.pxi.in' '[a-zA-Z0-9*]> ' pandas/_libs + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # readability/casting: Warnings about C casting instead of C++ casting + # runtime/int: Warnings about using C number types instead of C++ ones + # build/include_subdir: Warnings about prefacing included header files with directory + + # We don't lint all C files because we don't want to lint any that are built + # from Cython files nor do we want to lint C files that we didn't modify for + # this particular codebase (e.g. src/headers, src/klib). However, + # we can lint all header files since they aren't "generated" like C files are. + MSG='Linting .c and .h' ; echo $MSG + cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for use of not concatenated strings' ; echo $MSG + if [[ "$GITHUB_ACTIONS" == "true" ]]; then + $BASE_DIR/scripts/validate_string_concatenation.py --format="[error]{source_path}:{line_number}:{msg}" . + else + $BASE_DIR/scripts/validate_string_concatenation.py . + fi + RET=$(($RET + $?)) ; echo $MSG "DONE" + + echo "isort --version-number" + isort --version-number + + # Imports - Check formatting using isort see setup.cfg for settings + MSG='Check import format using isort' ; echo $MSG + ISORT_CMD="isort --recursive --check-only pandas asv_bench" + if [[ "$GITHUB_ACTIONS" == "true" ]]; then + eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]})) + else + eval $ISORT_CMD + fi + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### PATTERNS ### +if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then + + # Check for imports from pandas.core.common instead of `import pandas.core.common as com` + # Check for imports from collections.abc instead of `from collections import abc` + MSG='Check for non-standard imports' ; echo $MSG + invgrep -R --include="*.py*" -E "from pandas.core.common import" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + invgrep -R --include="*.py*" -E "from pandas.core import common" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + invgrep -R --include="*.py*" -E "from collections.abc import" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + invgrep -R --include="*.py*" -E "from numpy import nan" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # Checks for test suite + # Check for imports from pandas._testing instead of `import pandas._testing as tm` + invgrep -R --include="*.py*" -E "from pandas._testing import" pandas/tests + RET=$(($RET + $?)) ; echo $MSG "DONE" + invgrep -R --include="*.py*" -E "from pandas.util import testing as tm" pandas/tests + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for use of exec' ; echo $MSG + invgrep -R --include="*.py*" -E "[^a-zA-Z0-9_]exec\(" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for pytest warns' ; echo $MSG + invgrep -r -E --include '*.py' 'pytest\.warns' pandas/tests/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for pytest raises without context' ; echo $MSG + invgrep -r -E --include '*.py' "[[:space:]] pytest.raises" pandas/tests/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for python2-style file encodings' ; echo $MSG + invgrep -R --include="*.py" --include="*.pyx" -E "# -\*- coding: utf-8 -\*-" pandas scripts + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for python2-style super usage' ; echo $MSG + invgrep -R --include="*.py" -E "super\(\w*, (self|cls)\)" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # Check for the following code in testing: `np.testing` and `np.array_equal` + MSG='Check for invalid testing' ; echo $MSG + invgrep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal` + MSG='Check for invalid EA testing' ; echo $MSG + invgrep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for deprecated messages without sphinx directive' ; echo $MSG + invgrep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for python2 new-style classes and for empty parentheses' ; echo $MSG + invgrep -R --include="*.py" --include="*.pyx" -E "class\s\S*\((object)?\):" pandas asv_bench/benchmarks scripts + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG + invgrep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for incorrect sphinx directives' ; echo $MSG + invgrep -R --include="*.py" --include="*.pyx" --include="*.rst" -E "\.\. (autosummary|contents|currentmodule|deprecated|function|image|important|include|ipython|literalinclude|math|module|note|raw|seealso|toctree|versionadded|versionchanged|warning):[^:]" ./pandas ./doc/source + RET=$(($RET + $?)) ; echo $MSG "DONE" + + # Check for the following code in testing: `unittest.mock`, `mock.Mock()` or `mock.patch` + MSG='Check that unittest.mock is not used (pytest builtin monkeypatch fixture should be used instead)' ; echo $MSG + invgrep -r -E --include '*.py' '(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)' pandas/tests/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for wrong space after code-block directive and before colon (".. code-block ::" instead of ".. code-block::")' ; echo $MSG + invgrep -R --include="*.rst" ".. code-block ::" doc/source + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for wrong space after ipython directive and before colon (".. ipython ::" instead of ".. ipython::")' ; echo $MSG + invgrep -R --include="*.rst" ".. ipython ::" doc/source + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for extra blank lines after the class definition' ; echo $MSG + invgrep -R --include="*.py" --include="*.pyx" -E 'class.*:\n\n( )+"""' . + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for use of {foo!r} instead of {repr(foo)}' ; echo $MSG + invgrep -R --include=*.{py,pyx} '!r}' pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for use of comment-based annotation syntax' ; echo $MSG + invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG + invgrep -R --include=*.{py,pyx} '\.__class__' pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check for use of xrange instead of range' ; echo $MSG + invgrep -R --include=*.{py,pyx} 'xrange' pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG + INVGREP_APPEND=" <- trailing whitespaces found" + invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" * + RET=$(($RET + $?)) ; echo $MSG "DONE" + unset INVGREP_APPEND +fi + +### CODE ### +if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then + + MSG='Check import. No warnings, and blacklist some optional dependencies' ; echo $MSG + python -W error -c " +import sys +import pandas + +blacklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis', + 'lxml', 'matplotlib', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy', + 'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'} + +# GH#28227 for some of these check for top-level modules, while others are +# more specific (e.g. urllib.request) +import_mods = set(m.split('.')[0] for m in sys.modules) | set(sys.modules) +mods = blacklist & import_mods +if mods: + sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods))) + sys.exit(len(mods)) + " + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### DOCTESTS ### +if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then + + MSG='Doctests frame.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/frame.py + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests series.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/series.py \ + -k"-nonzero -reindex -searchsorted -to_dict" + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests generic.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/generic.py \ + -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard" + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests groupby.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe" + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests datetimes.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/tools/datetimes.py + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests top-level reshaping functions' ; echo $MSG + pytest -q --doctest-modules \ + pandas/core/reshape/concat.py \ + pandas/core/reshape/pivot.py \ + pandas/core/reshape/reshape.py \ + pandas/core/reshape/tile.py \ + pandas/core/reshape/melt.py \ + -k"-crosstab -pivot_table -cut" + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests interval classes' ; echo $MSG + pytest -q --doctest-modules \ + pandas/core/indexes/interval.py + pandas/core/arrays/interval.py + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests arrays'; echo $MSG + pytest -q --doctest-modules \ + pandas/core/arrays/string_.py \ + pandas/core/arrays/integer.py \ + pandas/core/arrays/boolean.py + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Doctests arrays/boolean.py' ; echo $MSG + pytest -q --doctest-modules pandas/core/arrays/boolean.py + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### DOCSTRINGS ### +if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then + + MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS04, SS05, PR03, PR04, PR05, PR10, EX04, RT01, RT04, RT05, SA01, SA02, SA03, SA05)' ; echo $MSG + $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA01,SA02,SA03,SA05 + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### DEPENDENCIES ### +if [[ -z "$CHECK" || "$CHECK" == "dependencies" ]]; then + + MSG='Check that requirements-dev.txt has been generated from environment.yml' ; echo $MSG + $BASE_DIR/scripts/generate_pip_deps_from_conda.py --compare --azure + RET=$(($RET + $?)) ; echo $MSG "DONE" + +fi + +### TYPING ### +if [[ -z "$CHECK" || "$CHECK" == "typing" ]]; then + + echo "mypy --version" + mypy --version + + MSG='Performing static analysis using mypy' ; echo $MSG + mypy pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" +fi + + +exit $RET diff --git a/ci/deps/azure-36-32bit.yaml b/ci/deps/azure-36-32bit.yaml new file mode 100644 index 00000000..cf3fca30 --- /dev/null +++ b/ci/deps/azure-36-32bit.yaml @@ -0,0 +1,26 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.6.* + + # tools + ### Cython 0.29.13 and pytest 5.0.1 for 32 bits are not available with conda, installing below with pip instead + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + - pytest-azurepipelines + + # pandas dependencies + - attrs=19.1.0 + - gcc_linux-32 + - gxx_linux-32 + - numpy=1.14.* + - python-dateutil + - pytz=2017.2 + + # see comment above + - pip + - pip: + - cython>=0.29.13 + - pytest>=5.0.1 diff --git a/ci/deps/azure-36-locale.yaml b/ci/deps/azure-36-locale.yaml new file mode 100644 index 00000000..81055463 --- /dev/null +++ b/ci/deps/azure-36-locale.yaml @@ -0,0 +1,40 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.6.* + + # tools + - cython>=0.29.13 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - pytest-asyncio + - hypothesis>=3.58.0 + - pytest-azurepipelines + + # pandas dependencies + - beautifulsoup4 + - gcsfs + - html5lib + - ipython + - jinja2 + - lxml + - matplotlib=3.0.* + - nomkl + - numexpr + - numpy=1.15.* + - openpyxl + # lowest supported version of pyarrow (putting it here instead of in + # azure-36-minimum_versions because it needs numpy >= 1.14) + - pyarrow=0.13 + - pytables + - python-dateutil + - pytz + - s3fs + - scipy + - xarray + - xlrd + - xlsxwriter + - xlwt + - moto diff --git a/ci/deps/azure-36-locale_slow.yaml b/ci/deps/azure-36-locale_slow.yaml new file mode 100644 index 00000000..48ac50c0 --- /dev/null +++ b/ci/deps/azure-36-locale_slow.yaml @@ -0,0 +1,32 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.6.* + + # tools + - cython>=0.29.13 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + - pytest-azurepipelines + + # pandas dependencies + - beautifulsoup4=4.6.0 + - bottleneck=1.2.* + - lxml + - matplotlib=2.2.2 + - numpy=1.14.* + - openpyxl=2.5.7 + - python-dateutil + - python-blosc + - pytz=2017.2 + - scipy + - sqlalchemy=1.1.4 + - xlrd=1.1.0 + - xlsxwriter=0.9.8 + - xlwt=1.2.0 + - pip + - pip: + - html5lib==1.0b2 diff --git a/ci/deps/azure-36-minimum_versions.yaml b/ci/deps/azure-36-minimum_versions.yaml new file mode 100644 index 00000000..de7e011d --- /dev/null +++ b/ci/deps/azure-36-minimum_versions.yaml @@ -0,0 +1,31 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.6.1 + + # tools + - cython=0.29.13 + - pytest=5.0.1 + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + - pytest-azurepipelines + - psutil + + # pandas dependencies + - beautifulsoup4=4.6.0 + - bottleneck=1.2.1 + - jinja2=2.8 + - numba=0.46.0 + - numexpr=2.6.2 + - numpy=1.13.3 + - openpyxl=2.5.7 + - pytables=3.4.2 + - python-dateutil=2.6.1 + - pytz=2017.2 + - scipy=0.19.0 + - xlrd=1.1.0 + - xlsxwriter=0.9.8 + - xlwt=1.2.0 + - html5lib=1.0.1 diff --git a/ci/deps/azure-37-locale.yaml b/ci/deps/azure-37-locale.yaml new file mode 100644 index 00000000..dc51597a --- /dev/null +++ b/ci/deps/azure-37-locale.yaml @@ -0,0 +1,39 @@ +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.7.* + + # tools + - cython>=0.29.13 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - pytest-asyncio + - hypothesis>=3.58.0 + - pytest-azurepipelines + + # pandas dependencies + - beautifulsoup4 + - html5lib + - ipython + - jinja2 + - lxml + - matplotlib + - moto + - nomkl + - numexpr + - numpy + - openpyxl + - pytables + - python-dateutil + - pytz + - s3fs + - scipy + - xarray + - xlrd + - xlsxwriter + - xlwt + - pyarrow>=0.15 + - pip + - pip: + - pyxlsb diff --git a/ci/deps/azure-37-numpydev.yaml b/ci/deps/azure-37-numpydev.yaml new file mode 100644 index 00000000..2ff9121e --- /dev/null +++ b/ci/deps/azure-37-numpydev.yaml @@ -0,0 +1,22 @@ +name: pandas-dev +channels: + - defaults +dependencies: + - python=3.7.* + + # tools + - cython>=0.29.13 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + - pytest-azurepipelines + + # pandas dependencies + - pytz + - pip + - pip: + - "git+git://github.com/dateutil/dateutil.git" + - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple" + - "--pre" + - "numpy" + - "scipy" diff --git a/ci/deps/azure-macos-36.yaml b/ci/deps/azure-macos-36.yaml new file mode 100644 index 00000000..90980133 --- /dev/null +++ b/ci/deps/azure-macos-36.yaml @@ -0,0 +1,36 @@ +name: pandas-dev +channels: + - defaults +dependencies: + - python=3.6.* + + # tools + - cython>=0.29.13 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + - pytest-azurepipelines + + # pandas dependencies + - beautifulsoup4 + - bottleneck + - html5lib + - jinja2 + - lxml + - matplotlib=2.2.3 + - nomkl + - numexpr + - numpy=1.14 + - openpyxl + - pyarrow>=0.13.0 + - pytables + - python-dateutil==2.6.1 + - pytz + - xarray + - xlrd + - xlsxwriter + - xlwt + - pip + - pip: + - pyreadstat + - pyxlsb diff --git a/ci/deps/azure-windows-36.yaml b/ci/deps/azure-windows-36.yaml new file mode 100644 index 00000000..663c5549 --- /dev/null +++ b/ci/deps/azure-windows-36.yaml @@ -0,0 +1,32 @@ +name: pandas-dev +channels: + - conda-forge + - defaults +dependencies: + - python=3.6.* + + # tools + - cython>=0.29.13 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + - pytest-azurepipelines + + # pandas dependencies + - blosc + - bottleneck + - fastparquet>=0.3.2 + - matplotlib=3.0.2 + - numba + - numexpr + - numpy=1.15.* + - openpyxl + - jinja2 + - pyarrow>=0.13.0 + - pytables + - python-dateutil + - pytz + - scipy + - xlrd + - xlsxwriter + - xlwt diff --git a/ci/deps/azure-windows-37.yaml b/ci/deps/azure-windows-37.yaml new file mode 100644 index 00000000..6b3ad6f5 --- /dev/null +++ b/ci/deps/azure-windows-37.yaml @@ -0,0 +1,40 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.7.* + + # tools + - cython>=0.29.13 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + - pytest-azurepipelines + + # pandas dependencies + - beautifulsoup4 + - bottleneck + - gcsfs + - html5lib + - jinja2 + - lxml + - matplotlib=2.2.* + - moto + - numexpr + - numpy=1.14.* + - openpyxl + - pyarrow=0.14 + - pytables + - python-dateutil + - pytz + - s3fs + - scipy + - sqlalchemy + - xlrd + - xlsxwriter + - xlwt + - pyreadstat + - pip + - pip: + - pyxlsb diff --git a/ci/deps/travis-36-cov.yaml b/ci/deps/travis-36-cov.yaml new file mode 100644 index 00000000..869d2ab6 --- /dev/null +++ b/ci/deps/travis-36-cov.yaml @@ -0,0 +1,54 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.6.* + + # tools + - cython>=0.29.13 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + - pytest-cov # this is only needed in the coverage build + + # pandas dependencies + - beautifulsoup4 + - botocore>=1.11 + - cython>=0.29.13 + - dask + - fastparquet>=0.3.2 + - gcsfs + - geopandas + - html5lib + - matplotlib + - moto + - nomkl + - numexpr + - numpy=1.15.* + - odfpy + - openpyxl<=3.0.1 + # https://github.com/pandas-dev/pandas/pull/30009 openpyxl 3.0.2 broke + - pandas-gbq + - psycopg2 + - pyarrow>=0.13.0 + - pymysql + - pytables + - python-snappy + - pytz + - s3fs + - scikit-learn + - scipy + - sqlalchemy + - statsmodels + - xarray + - xlrd + - xlsxwriter + - xlwt + - pip + - pip: + - brotlipy + - coverage + - pandas-datareader + - python-dateutil + - pyxlsb diff --git a/ci/deps/travis-36-locale.yaml b/ci/deps/travis-36-locale.yaml new file mode 100644 index 00000000..d0bc0465 --- /dev/null +++ b/ci/deps/travis-36-locale.yaml @@ -0,0 +1,42 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.6.* + + # tools + - cython>=0.29.13 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + + # pandas dependencies + - beautifulsoup4 + - blosc=1.14.3 + - python-blosc + - fastparquet=0.3.2 + - gcsfs=0.2.2 + - html5lib + - ipython + - jinja2 + - lxml=3.8.0 + - matplotlib=3.0.* + - moto + - nomkl + - numexpr + - numpy + - openpyxl + - pandas-gbq=0.8.0 + - psycopg2=2.6.2 + - pymysql=0.7.11 + - pytables + - python-dateutil + - pytz + - s3fs=0.3.0 + - scipy + - sqlalchemy=1.1.4 + - xarray=0.10 + - xlrd + - xlsxwriter + - xlwt diff --git a/ci/deps/travis-36-slow.yaml b/ci/deps/travis-36-slow.yaml new file mode 100644 index 00000000..1dfd90d0 --- /dev/null +++ b/ci/deps/travis-36-slow.yaml @@ -0,0 +1,34 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.6.* + + # tools + - cython>=0.29.13 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + + # pandas dependencies + - beautifulsoup4 + - html5lib + - lxml + - matplotlib + - numexpr + - numpy + - openpyxl + - patsy + - psycopg2 + - pymysql + - pytables + - python-dateutil + - pytz + - s3fs + - scipy + - sqlalchemy + - xlrd + - xlsxwriter + - xlwt + - moto diff --git a/ci/deps/travis-37.yaml b/ci/deps/travis-37.yaml new file mode 100644 index 00000000..682b1016 --- /dev/null +++ b/ci/deps/travis-37.yaml @@ -0,0 +1,26 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.7.* + + # tools + - cython>=0.29.13 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + + # pandas dependencies + - botocore>=1.11 + - numpy + - python-dateutil + - nomkl + - pyarrow + - pytz + - s3fs + - tabulate + - pyreadstat + - pip + - pip: + - moto diff --git a/ci/deps/travis-38.yaml b/ci/deps/travis-38.yaml new file mode 100644 index 00000000..a627b7ed --- /dev/null +++ b/ci/deps/travis-38.yaml @@ -0,0 +1,20 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.8.* + + # tools + - cython>=0.29.13 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + + # pandas dependencies + - numpy + - python-dateutil + - nomkl + - pytz + - pip + - tabulate==0.8.3 diff --git a/ci/prep_cython_cache.sh b/ci/prep_cython_cache.sh new file mode 100755 index 00000000..18d93883 --- /dev/null +++ b/ci/prep_cython_cache.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +ls "$HOME/.cache/" + +PYX_CACHE_DIR="$HOME/.cache/pyxfiles" +pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx" -o -name "*.pxd" -o -name "*.pxi.in"` +pyx_cache_file_list=`find ${PYX_CACHE_DIR} -name "*.pyx" -o -name "*.pxd" -o -name "*.pxi.in"` + +CACHE_File="$HOME/.cache/cython_files.tar" + +# Clear the cython cache 0 = NO, 1 = YES +clear_cache=0 + +pyx_files=`echo "$pyx_file_list" | wc -l` +pyx_cache_files=`echo "$pyx_cache_file_list" | wc -l` + +if [[ pyx_files -ne pyx_cache_files ]] +then + echo "Different number of pyx files" + clear_cache=1 +fi + +home_dir=$(pwd) + +if [ -f "$CACHE_File" ] && [ -z "$NOCACHE" ] && [ -d "$PYX_CACHE_DIR" ]; then + + echo "Cache available - checking pyx diff" + + for i in ${pyx_file_list} + do + diff=`diff -u $i $PYX_CACHE_DIR${i}` + if [[ $? -eq 2 ]] + then + echo "${i##*/} can't be diffed; probably not in cache" + clear_cache=1 + fi + if [[ ! -z $diff ]] + then + echo "${i##*/} has changed:" + echo $diff + clear_cache=1 + fi + done + + if [ "$TRAVIS_PULL_REQUEST" == "false" ] + then + echo "Not a PR" + # Uncomment next 2 lines to turn off cython caching not in a PR + # echo "Non PR cython caching is disabled" + # clear_cache=1 + else + echo "In a PR" + # Uncomment next 2 lines to turn off cython caching in a PR + # echo "PR cython caching is disabled" + # clear_cache=1 + fi + +fi + +if [ $clear_cache -eq 0 ] && [ -z "$NOCACHE" ] +then + # No and nocache is not set + echo "Will reuse cached cython file" + cd / + tar xvmf $CACHE_File + cd $home_dir +else + echo "Rebuilding cythonized files" + echo "No cache = $NOCACHE" + echo "Clear cache (1=YES) = $clear_cache" +fi + + +exit 0 diff --git a/ci/print_skipped.py b/ci/print_skipped.py new file mode 100755 index 00000000..72822fa2 --- /dev/null +++ b/ci/print_skipped.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +import os +import xml.etree.ElementTree as et + + +def main(filename): + if not os.path.isfile(filename): + raise RuntimeError(f"Could not find junit file {repr(filename)}") + + tree = et.parse(filename) + root = tree.getroot() + current_class = "" + for el in root.iter("testcase"): + cn = el.attrib["classname"] + for sk in el.findall("skipped"): + old_class = current_class + current_class = cn + if old_class != current_class: + yield None + yield { + "class_name": current_class, + "test_name": el.attrib["name"], + "message": sk.attrib["message"], + } + + +if __name__ == "__main__": + print("SKIPPED TESTS:") + i = 1 + for test_data in main("test-data.xml"): + if test_data is None: + print("-" * 80) + else: + print( + f"#{i} {test_data['class_name']}." + f"{test_data['test_name']}: {test_data['message']}" + ) + i += 1 diff --git a/ci/run_tests.sh b/ci/run_tests.sh new file mode 100755 index 00000000..0cb1f4aa --- /dev/null +++ b/ci/run_tests.sh @@ -0,0 +1,32 @@ +#!/bin/bash -e + +# Workaround for pytest-xdist (it collects different tests in the workers if PYTHONHASHSEED is not set) +# https://github.com/pytest-dev/pytest/issues/920 +# https://github.com/pytest-dev/pytest/issues/1075 +export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') + +if [[ "not network" == *"$PATTERN"* ]]; then + export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4; +fi + +if [ "$COVERAGE" ]; then + COVERAGE_FNAME="/tmp/test_coverage.xml" + COVERAGE="-s --cov=pandas --cov-report=xml:$COVERAGE_FNAME" +fi + +# If no X server is found, we use xvfb to emulate it +if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then + export DISPLAY=":0" + XVFB="xvfb-run " +fi + +PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n auto --dist=loadfile -s --strict --durations=10 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas" + +echo $PYTEST_CMD +sh -c "$PYTEST_CMD" + +if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then + echo "uploading coverage" + echo "bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME" + bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME +fi diff --git a/ci/setup_env.sh b/ci/setup_env.sh new file mode 100755 index 00000000..ae39b0dd --- /dev/null +++ b/ci/setup_env.sh @@ -0,0 +1,157 @@ +#!/bin/bash -e + +# edit the locale file if needed +if [[ "$(uname)" == "Linux" && -n "$LC_ALL" ]]; then + echo "Adding locale to the first line of pandas/__init__.py" + rm -f pandas/__init__.pyc + SEDC="3iimport locale\nlocale.setlocale(locale.LC_ALL, '$LC_ALL')\n" + sed -i "$SEDC" pandas/__init__.py + + echo "[head -4 pandas/__init__.py]" + head -4 pandas/__init__.py + echo +fi + +MINICONDA_DIR="$HOME/miniconda3" + + +if [ -d "$MINICONDA_DIR" ]; then + echo + echo "rm -rf "$MINICONDA_DIR"" + rm -rf "$MINICONDA_DIR" +fi + +echo "Install Miniconda" +UNAME_OS=$(uname) +if [[ "$UNAME_OS" == 'Linux' ]]; then + if [[ "$BITS32" == "yes" ]]; then + CONDA_OS="Linux-x86" + else + CONDA_OS="Linux-x86_64" + fi +elif [[ "$UNAME_OS" == 'Darwin' ]]; then + CONDA_OS="MacOSX-x86_64" +else + echo "OS $UNAME_OS not supported" + exit 1 +fi + +wget -q "https://repo.continuum.io/miniconda/Miniconda3-latest-$CONDA_OS.sh" -O miniconda.sh +chmod +x miniconda.sh +./miniconda.sh -b + +export PATH=$MINICONDA_DIR/bin:$PATH + +echo +echo "which conda" +which conda + +echo +echo "update conda" +conda config --set ssl_verify false +conda config --set quiet true --set always_yes true --set changeps1 false +conda install pip conda # create conda to create a historical artifact for pip & setuptools +conda update -n base conda + +echo "conda info -a" +conda info -a + +echo +echo "set the compiler cache to work" +if [ -z "$NOCACHE" ] && [ "${TRAVIS_OS_NAME}" == "linux" ]; then + echo "Using ccache" + export PATH=/usr/lib/ccache:/usr/lib64/ccache:$PATH + GCC=$(which gcc) + echo "gcc: $GCC" + CCACHE=$(which ccache) + echo "ccache: $CCACHE" + export CC='ccache gcc' +elif [ -z "$NOCACHE" ] && [ "${TRAVIS_OS_NAME}" == "osx" ]; then + echo "Install ccache" + brew install ccache > /dev/null 2>&1 + echo "Using ccache" + export PATH=/usr/local/opt/ccache/libexec:$PATH + gcc=$(which gcc) + echo "gcc: $gcc" + CCACHE=$(which ccache) + echo "ccache: $CCACHE" +else + echo "Not using ccache" +fi + +echo "source deactivate" +source deactivate + +echo "conda list (root environment)" +conda list + +# Clean up any left-over from a previous build +# (note workaround for https://github.com/conda/conda/issues/2679: +# `conda env remove` issue) +conda remove --all -q -y -n pandas-dev + +echo +echo "conda env create -q --file=${ENV_FILE}" +time conda env create -q --file="${ENV_FILE}" + + +if [[ "$BITS32" == "yes" ]]; then + # activate 32-bit compiler + export CONDA_BUILD=1 +fi + +echo "activate pandas-dev" +source activate pandas-dev + +echo +echo "remove any installed pandas package" +echo "w/o removing anything else" +conda remove pandas -y --force || true +pip uninstall -y pandas || true + +echo +echo "remove postgres if has been installed with conda" +echo "we use the one from the CI" +conda remove postgresql -y --force || true + +echo +echo "remove qt" +echo "causes problems with the clipboard, we use xsel for that" +conda remove qt -y --force || true + +echo +echo "conda list pandas" +conda list pandas + +# Make sure any error below is reported as such + +echo "[Build extensions]" +python setup.py build_ext -q -i -j2 + +# XXX: Some of our environments end up with old versions of pip (10.x) +# Adding a new enough version of pip to the requirements explodes the +# solve time. Just using pip to update itself. +# - py35_macos +# - py35_compat +# - py36_32bit +echo "[Updating pip]" +python -m pip install --no-deps -U pip wheel setuptools + +echo "[Install pandas]" +python -m pip install --no-build-isolation -e . + +echo +echo "conda list" +conda list + +# Install DB for Linux + +if [[ -n ${SQL:0} ]]; then + echo "installing dbs" + mysql -e 'create database pandas_nosetest;' + psql -c 'create database pandas_nosetest;' -U postgres +else + echo "not using dbs on non-linux Travis builds or Azure Pipelines" +fi + +echo "done" diff --git a/ci/submit_cython_cache.sh b/ci/submit_cython_cache.sh new file mode 100755 index 00000000..b87acef0 --- /dev/null +++ b/ci/submit_cython_cache.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +CACHE_File="$HOME/.cache/cython_files.tar" +PYX_CACHE_DIR="$HOME/.cache/pyxfiles" +pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx" -o -name "*.pxd" -o -name "*.pxi.in"` + +rm -rf $CACHE_File +rm -rf $PYX_CACHE_DIR + +home_dir=$(pwd) + +mkdir -p $PYX_CACHE_DIR +rsync -Rv $pyx_file_list $PYX_CACHE_DIR + +echo "pyx files:" +echo $pyx_file_list + +tar cf ${CACHE_File} --files-from /dev/null + +for i in ${pyx_file_list} +do + f=${i%.pyx} + ls $f.{c,cpp} | tar rf ${CACHE_File} -T - +done + +echo "Cython files in cache tar:" +tar tvf ${CACHE_File} + +exit 0 diff --git a/ci/travis_encrypt_gbq.sh b/ci/travis_encrypt_gbq.sh new file mode 100755 index 00000000..e404ca73 --- /dev/null +++ b/ci/travis_encrypt_gbq.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +GBQ_JSON_FILE=$1 + +if [[ $# -ne 1 ]]; then + echo -e "Too few arguments.\nUsage: ./travis_encrypt_gbq.sh "\ + "" + exit 1 +fi + +if [[ $GBQ_JSON_FILE != *.json ]]; then + echo "ERROR: Expected *.json file" + exit 1 +fi + +if [[ ! -f $GBQ_JSON_FILE ]]; then + echo "ERROR: File $GBQ_JSON_FILE does not exist" + exit 1 +fi + +echo "Encrypting $GBQ_JSON_FILE..." +read -d "\n" TRAVIS_KEY TRAVIS_IV <<<$(travis encrypt-file $GBQ_JSON_FILE \ +travis_gbq.json.enc -f | grep -o "\w*_iv\|\w*_key"); + +echo "Adding your secure key to travis_gbq_config.txt ..." +echo -e "TRAVIS_IV_ENV=$TRAVIS_IV\nTRAVIS_KEY_ENV=$TRAVIS_KEY"\ +> travis_gbq_config.txt + +echo "Done. Removing file $GBQ_JSON_FILE" +rm $GBQ_JSON_FILE + +echo -e "Created encrypted credentials file travis_gbq.json.enc.\n"\ + "NOTE: Do NOT commit the *.json file containing your unencrypted" \ + "private key" diff --git a/ci/travis_gbq.json.enc b/ci/travis_gbq.json.enc new file mode 100644 index 0000000000000000000000000000000000000000..c2a33bbd6f26383bd7e8a7a504e626284efb5fd0 GIT binary patch literal 2352 zcmV-03D5QoiY_vZjh&7QCFrhKcFBG@`zj6HxkUamBtL*$SOfIYLQAnP$$?HCW-UzE zqY3S}bS_tytBr;XZgqTWlqlC0A?TtDDzJS4<-4yF+82AKZYaOSzyy z)LIN&*Phn|s>u2rH)V_1hyj-xu@)mBOg%_tj5_Sz6kyK>B5Gj0bp;~khYB=Ul|&X? zUFSM`<{}P#4_#PMfT#y?P!&Q=azAz#tG@DOU=aLF%RTb9pTg+mwrTZ+`_vBO5^xdb zCk{k&n*k1|x?M-4M;q$_?J$Z=GMNDL*;ETHrT|OpFalF9aJ;1NN8;rz^YfzF2c#MtNZvI;NuIJQ-M<=GHh=X9{ian$nm(H@?nOf1bgG`&RpLSr<5g9xf z2teKs?kATag6a+LsF}ejFjmcfSCRZKh(1~}uiJ(Qc@Q;)ValsMLtF!2X$O%Cb z2KMdb?&ns7GPy+RSdg<1=+QLqzgq74x1J+)2!4_{d|gtTVv9I=qfT>YNLb!NjSeg= zF|Qh88XA3rHR)>wth;QO_M(&hfA8)$QEpGgANx7DK|J`dW)T_`Xz_E!NK^R8RZg$y zc5}UIuDBt}n1#0!5GPf8Jbgag71LqHsVxL^@1qNIX|Dy=0vXV0(4^j2t$?ktEZdd5 zu_ckdLNK1WUPlJaR4^MLsqCIlhr=wrO2O}*qt8Z*MskXFh93(O!7RnBrwEDnT<`it5D0Mb#*2bx#aqC@LEJC=x_>Rx<|ygktaBRpWD z4#{MIj?XI%F|f1Z!qi;RP!vt6Ble@nmfAd}TzlXws1BJ)f5{5gri+aezIomN6ImrH zx}$i#tM@W$hzh(j)Gt+D=6S|?h}()_-~|h%S3)QyM`7f{Yf{v>p$dbYb8XdaAwacm zYIgF03~bBRJ?Q|Rm{AoSq^LSBkDa|`3tNoi02mXu+-Du+k_EUwoHMFk922)^pS;_D6#vtq~4S z0+*&E9tblkhvce%@L*}odrsPg ze1D(imA!lhnI7E+EDFG9720>Y4#l_d;0oNsr)BvjIN8`WGnc1$a?%?ycY8#Jhm$-C3s{t9ZH!5Tdr>`t41 zT)!t07R`S+w73>s@5X;v4d{Zrz<~%E?>$ry4A?zF{TOsf3y|_$p=_p^7 zyHtMEaO`#lEy8g>>v{%h!1*z-W`(rGI}x7M3P7v}4?u6$pF9q$Z>h4+;M|XMMXn-` zt;L)h+N2X->u!;3$*+|@qIVFK-FHTOWzOKyOMLi?7uHQUumZzC>x@c?*cS{IeR9pz z%j|yMgIP(6EQpB4%%ANMRmAGv^MZ8l-{UC8Un6k3C~MltE7?VC^N!9xT725P)|Gtf z&Y(8ua0ZUJO(-Sc>1rq^R0ra;Wa5&>w$UCFV36KRm<$T^2(h&JMd-wYacGQvViWbN z;Sj}nB6rj56!|*PGf00&z+`c`4W3nX4V>s9=aCW8AGAn)EiROzk#ku76;QET`eHgm z(nw)$QzY5E$?_QwzB-{3OpF_c;7(A1@_v7pYaO5JgoY(y&*&O#VUKi8dkA)N#1BEo z^s5wOm{@=f>c|t#|7>EeQqHh!uRXjICpE`%G!Z+Zt<^J-#-9iG(VG#%Nv?sI+ zbc`m4USJyzcgu?tl;%C}Ez6G@|f#&^hF+`g-yrj{hmY4yhlk+b#gV44cV?S5r%;?ge?g z#lzI?kuY1oXLg&XxdkBG8g*9plC**(x1xRs!fCuZZfAb#o*pyTq1{n<-CM+4c6lHo zqhwh;eK)Jl1X}YUP)?=oto!8X%qgNi1g>n7$x+*H3lrxcs&2-MENP(#=M;+oe_zRD zmCP_qF1Fe;UFgs(|6U79ig}b`dz4{4Eh38)&RvnO=3V=+bB@oe8weiJM6CJ5c%GQ-iz&#q=Du>_LJKa?c5%>1J4;MeQNYk^_$~ z;|WA1#Nz81yr8Jafys`4PisrSy?Jw~yQrKw#cLkq4Jq8We*d_mk#2#X^w3p=gJB>* z#!GJ%sBPy+SR&x<$od^Zj0! zidEfbN|w72WG4PR*<}{0X+HTW38KvQlnKe|LO@K*{nS!xOGu^})|VMf4R={d{^$ZY Wc%~RC+CiWM`BrrE1b(~# literal 0 HcmV?d00001 diff --git a/ci/travis_gbq_config.txt b/ci/travis_gbq_config.txt new file mode 100644 index 00000000..0b28cded --- /dev/null +++ b/ci/travis_gbq_config.txt @@ -0,0 +1,2 @@ +TRAVIS_IV_ENV=encrypted_1d9d7b1f171b_iv +TRAVIS_KEY_ENV=encrypted_1d9d7b1f171b_key diff --git a/ci/travis_process_gbq_encryption.sh b/ci/travis_process_gbq_encryption.sh new file mode 100755 index 00000000..9967d40e --- /dev/null +++ b/ci/travis_process_gbq_encryption.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +source ci/travis_gbq_config.txt + +if [[ -n ${SERVICE_ACCOUNT_KEY} ]]; then + echo "${SERVICE_ACCOUNT_KEY}" > ci/travis_gbq.json; +elif [[ -n ${!TRAVIS_IV_ENV} ]]; then + openssl aes-256-cbc -K ${!TRAVIS_KEY_ENV} -iv ${!TRAVIS_IV_ENV} \ + -in ci/travis_gbq.json.enc -out ci/travis_gbq.json -d; + export GBQ_PROJECT_ID='pandas-travis'; + echo 'Successfully decrypted gbq credentials' +fi + diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..1644bf31 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,13 @@ +codecov: + branch: master + +comment: off + +coverage: + status: + project: + default: + target: '82' + patch: + default: + target: '50' diff --git a/conda.recipe/bld.bat b/conda.recipe/bld.bat new file mode 100644 index 00000000..284926fa --- /dev/null +++ b/conda.recipe/bld.bat @@ -0,0 +1,2 @@ +@echo off +%PYTHON% setup.py install diff --git a/conda.recipe/build.sh b/conda.recipe/build.sh new file mode 100644 index 00000000..f341bce6 --- /dev/null +++ b/conda.recipe/build.sh @@ -0,0 +1,2 @@ +#!/bin/sh +$PYTHON setup.py install diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml new file mode 100644 index 00000000..47f63c11 --- /dev/null +++ b/conda.recipe/meta.yaml @@ -0,0 +1,40 @@ +package: + name: pandas + version: {{ environ.get('GIT_DESCRIBE_TAG','').replace('v', '', 1) }} + +build: + number: {{ environ.get('GIT_DESCRIBE_NUMBER', 0) }} + {% if GIT_DESCRIBE_NUMBER|int == 0 %}string: np{{ CONDA_NPY }}py{{ CONDA_PY }}_0 + {% else %}string: np{{ CONDA_NPY }}py{{ CONDA_PY }}_{{ GIT_BUILD_STR }}{% endif %} + +source: + git_url: ../ + +requirements: + build: + - {{ compiler('c') }} + - {{ compiler('cxx') }} + host: + - python + - pip + - cython + - numpy + - setuptools >=3.3 + - python-dateutil >=2.5.0 + - pytz + run: + - python {{ python }} + - {{ pin_compatible('numpy') }} + - python-dateutil >=2.5.0 + - pytz + +test: + requires: + - pytest + commands: + - python -c "import pandas; pandas.test()" + + +about: + home: https://pandas.pydata.org + license: BSD diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 00000000..e23892d6 --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1,4 @@ +data/ +timeseries.csv +timeseries.parquet +timeseries_wide.parquet diff --git a/doc/README.rst b/doc/README.rst new file mode 100644 index 00000000..5423e741 --- /dev/null +++ b/doc/README.rst @@ -0,0 +1 @@ +See `contributing.rst `_ in this repo. diff --git a/doc/_templates/api_redirect.html b/doc/_templates/api_redirect.html new file mode 100644 index 00000000..c04a8b58 --- /dev/null +++ b/doc/_templates/api_redirect.html @@ -0,0 +1,10 @@ +{% set redirect = redirects[pagename.split("/")[-1]] %} + + + + This API page has moved + + +

This API page has moved here.

+ + diff --git a/doc/_templates/autosummary/accessor.rst b/doc/_templates/autosummary/accessor.rst new file mode 100644 index 00000000..4ba745cd --- /dev/null +++ b/doc/_templates/autosummary/accessor.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module.split('.')[0] }} + +.. autoaccessor:: {{ (module.split('.')[1:] + [objname]) | join('.') }} diff --git a/doc/_templates/autosummary/accessor_attribute.rst b/doc/_templates/autosummary/accessor_attribute.rst new file mode 100644 index 00000000..b5ad65d6 --- /dev/null +++ b/doc/_templates/autosummary/accessor_attribute.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module.split('.')[0] }} + +.. autoaccessorattribute:: {{ (module.split('.')[1:] + [objname]) | join('.') }} diff --git a/doc/_templates/autosummary/accessor_callable.rst b/doc/_templates/autosummary/accessor_callable.rst new file mode 100644 index 00000000..7a330181 --- /dev/null +++ b/doc/_templates/autosummary/accessor_callable.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module.split('.')[0] }} + +.. autoaccessorcallable:: {{ (module.split('.')[1:] + [objname]) | join('.') }}.__call__ diff --git a/doc/_templates/autosummary/accessor_method.rst b/doc/_templates/autosummary/accessor_method.rst new file mode 100644 index 00000000..aefbba6e --- /dev/null +++ b/doc/_templates/autosummary/accessor_method.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module.split('.')[0] }} + +.. autoaccessormethod:: {{ (module.split('.')[1:] + [objname]) | join('.') }} diff --git a/doc/_templates/autosummary/class.rst b/doc/_templates/autosummary/class.rst new file mode 100644 index 00000000..a9c9bd2b --- /dev/null +++ b/doc/_templates/autosummary/class.rst @@ -0,0 +1,33 @@ +{% extends "!autosummary/class.rst" %} + +{% block methods %} +{% if methods %} + +.. + HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages. + .. autosummary:: + :toctree: + {% for item in all_methods %} + {%- if not item.startswith('_') or item in ['__call__'] %} + {{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + +{% endif %} +{% endblock %} + +{% block attributes %} +{% if attributes %} + +.. + HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages. + .. autosummary:: + :toctree: + {% for item in all_attributes %} + {%- if not item.startswith('_') %} + {{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + +{% endif %} +{% endblock %} diff --git a/doc/_templates/autosummary/class_without_autosummary.rst b/doc/_templates/autosummary/class_without_autosummary.rst new file mode 100644 index 00000000..6676c672 --- /dev/null +++ b/doc/_templates/autosummary/class_without_autosummary.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} diff --git a/doc/cheatsheet/Pandas_Cheat_Sheet.pdf b/doc/cheatsheet/Pandas_Cheat_Sheet.pdf new file mode 100644 index 0000000000000000000000000000000000000000..48da05d053b96a1235566aa45c14e8cbe6b2c08c GIT binary patch literal 345905 zcmdSB1z1(xwm*!72ojPaF@fxi zEYVT<_`oXe_7Jd$p_8Gtof%lc&g;iz%qumX4DW{232Jm5LnsC(b?EZ#SsEg zva@pnv2)*4ku`L*lC(9o1G<6TLQFtFHOP}EU|EQhp$X6@h?$!SMobB;W&v@9I4U?o zOd*aCTVsF<@aTrnlPBn?5L=TQeqdkx#@G!*U=cfKTPF|;SlYq_stsa=bqHePgk5w& zzcz<$=* zxSg#N@KBr?bVCg+&I0-|a&cA=^N)dvvw@gzI0B2agP6H)x)A38vHrZ{1hFyw_>K$2 zc0&N@R>9E83F2rA^sK8;Y>Xj&{b%5GQRgfFf80;^qXF zv;kle{&5lcagl^g1X#${*3Jo-A-0>bg4lk}6dSBlT@ZkZiG`uCotrjLW&-$UVq#?G z0CBN1F|x7%k41pN0uu!V-P8w5INCYe11#PM$V~yP3|4V8w1wKkCe+v+ETRk+g}7K4 zLzEh^lsTl>{=Rq1Ly`O(9gwFRfj%$#7jI{_+B!`@T&ZlU*f(b0@! zvQFW!&$5rKD0@4&4n&wXn)zO?B7uak@8hR@2^niY!fi!7Abat220IDtd%5rAR$uQ$ z>vMX|*)b>Zy^y}gMl=4cqA;#-?26i`SY~h0%Z~PMUkAQjq%59%9nkx>Afemfd3D9A z*sygNTS*;<0r9v@84$SKd*iEiMcPBH_buz}us31O_iLA3_~BMb_?-78x`|zcUvp9H zS+#9A@>wBmfsHC&WpQ;Jjcr4qsWxX2?N(7>PTh>!*{C($P8#Da%}L?&B57`=u$ghj zTWRxFyT-FPaUGM)LY4h$G$|=Kn?*4}*YN>tV)82S-VLI;h+U6~nf(W+5VdhGOLs$i z0%kt^%BL0JsUm2z<=1vh)n7L&q%@~VH@b5r7?sz1yYl%e+F(@gv5XDf zEyzTT@!`*pwg5>We3$In7}@-Ku+ z)@EJovW7mgUFEul*G+KMC#g0ft0-bFT5l6pr^sboyFqF-`i_Y+2E8voy}3>FR;-nc zZ+nFphl;n)lSOE~O=V^f3Xa^DZL=AiIDdB=VcDK3ev4-a0hdwZiR)4eRDC_vXAfAl zpTBOIKNq9I*9$c3MQDg{}=@u#rq1=LAe@Xu^?su4HRm(n;>b>)g zLTWc>8cHUZmeP+=w+Vj0jlUx7)Dmncf%kT5{4xAauTs+H7FujpISRW!d;}{`XO><1 zvn}*ob_LWBXrsR(BAf6FX*|t4-4(2qy6Q{$b zorG5-xOwqL?LFcv_;Jyokc#dc1OVNJ`Vfx5wyh_{UIu}RFK zAfaX9o#$awEkYU5t388oCm@Agi{Z`k z?Lj}451Pl*U+-ggQYqSgSv*|Zz23n?)8?uZAlbDibJBh-ZLI&qK=@#YG1jkrFY3J< zd;sCNG?E zM@{Ci=rXUoyYkg)!m%e@o!-qhWm1rA^YY}|&J8J|pO7$MdIpjxBOm(}GQD*_o)098 zVB_SyW>MTg@^7dpE#Lwd=ThazJ4|gu*r@W2xyRBihq*z=>qt(hL(f6gK ze}}Kf_FCBTW%kEx+bZ^t6_y=p^BCUs3>$2c?8!KU#LP4qW!!8ipV=QjIK_%z3BC;a z;6CzVuAOC$O#}Z)btMIp>}g~+UC6+oRyhMY8kR!SY6)9~ViYS#1SK01hH6G|l~kQU zSYPW-Dcv?ShL5AQGgcK)$xc}_WfVZDT8*83iM9pOqCPqngD1PZP=lMqb~+X#RDWqm z&rW~8;X*BddwA@AFIkw5_w05VJXt$`V^dSzJMhtQke#Biy8hGmnim&s)H_ewrz#dM zT#t())HQYMtGyu$cZ&&%>F>2JP8L{?DQKU>A!)tsR&_Zi-E~rXODT7aE+c1v&Y2c> z^-ZVc-Nqpgr~FH4szMgOJHccmVn_a~X_#!`(~aD_&=$4VCTdK2B1;l}3$G7L{VF?` zFp^GgtvX&7eC4S>rTF~X$kBNG{Wx9Dh=J%+R>LbHXm8gd#`jCNyRSYG%T+pQps_iq zR&{^8^GVuZcJ|;Jz37Q!Zv5WXgGQ2FU$tz}OZe9$EmHB(2!#HxW`a36=D`n072*=PsgI>wuv>LU6#%~m=Ia(nb*u|G@i7WeN6~&29YjxbprWJ`jQMZkD zS#=k7S=rZT)Z9gP*Q+lhGS^YVsA;pr*~g3Yb!RV}$9hVr1>7r%)k;}>u7l$Wj&n`h zEf1{Ja}0yd%1ftIGN=3=C-``;PZnUIaSxZg@;tE&y5#g}G#F;QP9E|svDI>Vvo2Nn z#@^ZfQE~7t&TbF|~58+z~HF_~=Vy!$4>^dz^Q5f{;|=hO%eW=wvN z?`APsrkXdAZFZ@V?3xxyxO2Mt?T(IU@Z5IjklV{oGM`jUHqteNXah3wckqK~4VQuSo?yZj6iapql=Ie>v?MU;-sxz@nV|Q z7wq8k&~kOf_cGXNtzpy$>SNl4&#{A9iN~m0lO(?;)KR%+uVGJ^WR>TcO5>c8Sp_bl zZpsyr#O{tx-C9A^M*f!Qtzq=>Yr&GNIYONl5A^PXV-=hX2@=PT54zoh|*W@4GkowM5fPs35xD`7Q4Q4o!ha{J8^0RNAXs? zI}c1=-Mz-4oNKJiiBx~umz^E(>A1V^g=u@a{IS+xg7}E3@$_pa!ZpY7nh~rg){c*A ztnyjd+~6~fkxep1==vyl2U|W=(64PWHQEyD9EprhDZ(unDNI)ipn-_&W zm!(`)%z0-nR9-W^UBo8hv-(PBfFoX$$WF>A_xv@}##-tkT25(^O3g?l6(xk7k7g(? z85)N3iZ|2zp^N{_0_0lvl{j@C^GmB<;#1t`ubw zq7qXaiROjOgkvwO=1P08B(Pv}8%Nu(-43Wh4;s;T)v@l6NDiswRgs* z!(|)tQ$p_arC4T?ge~cZ3B0jQ7;mV2@b$H>(p;G#)IM(mhcx@}TW0d=^y=P@Ar{&u3jYx zyM;=Zxuv`L1_8*di)umJ@5>mLWk2bEgPa~!_|`BmKJ-Uiy%n8M&9%|efv_8f(ta(n zKvpKp<$+>qWy8wu3Q9yw>B-4OckCZf(rU+3f9}~7rSCRwgT`#KbMLd;1bU2 zJc@)sYZ}Qt6Qy_kRfGyL3x}W#7q`;q-)g7>su(lP`~8{+!zxeCdr?Ts_uU#FFx8MN zlSc>2A7Arj@q4&t#ePw|vcq0dS02GndGBkHfhmHFrc(LRyeD9M94Q&jV@bojUXIM{ zRjpQNE$tPBmYHjgJu>AB$?@R$*|X0zy}R%GY=*^xtgoyq{q%yM4>H%Y;naHJ^7M<` zisK!Vs;cM@w&JJ@gyT6YQU?QUVFfxxFy9w}M!f@_JP*nqsZqN40t995axP5Wjie5T z1%B1&&djo)V-@cW(ztgB7IebqE990l0l>C`*7ja>7!GlqjbGD;Cb0`_W~&wD+CWgT z6c4MgHaS&9Zej*(W1^hIechP<(YjFW{cZF1Ym!yPrxSy5`_vhXM2=DSzAHTS(LG?& z=6JD)u-R!Cx|VPC(t}>gs(lxUzQrvYS1@s)G6x}IO$si;OzIZ>M^gLjP3JT0BzV-8 zTQbXG2&3DBi)?;*G7f%uIw*eGk&?FN#Ey5CYAso6ghtQ@x`MbkB;Xwb?Q7)+AEy~48jU1QNo$m`H+_%NVHLaK*JM; zbjgiT{#UIer)nuySLMF%T_-mCDO1~s=a!hq`x!`~nT>uk#I_rKVs^Wz@7x5TS@qFQ zO{6^isjSl@TDFFZ{+oD@H$o@VIPi9-#ZcJo8zp_H*B3tGgEyLMU(tk6rqQt_mBB3~ zDZv?bo68{s5Wo=(#KPanRDJM~JynL=D^w=qV=Pi)Hy-kHge*Qh16pNb16pdau*CnE5ncFSL|Ou|!le)T6`E+2EpSFjF}- zw!V#*7Bf-;a(655V%RgE5Z^#{YlDmrE@tN`CU?FZ@bM>gL?6uD)Bez&m8H$9t;0d~ zykKju_=AwIt4iuBRHbP(vkU=`Xit7KL7@lfz{jDFjpU{DlC$rsx_%!j+5^p|yO z#-w*TAIDe>vJStJC?W9jyHJ(QobHQq{EBocb^V%|EGrRkV9PPn)T8kV zQgI^pGm+lau=xG~4g94fS#)chWIm<-Cr6@GZh1N{LhhB1Q71MWQ(DErdhAGfPaNuu*Tvrv9F1G=sA7hXsW)c8wFvMqSsJijl%M&0vl3L>^L%>N*=e+@5-WKIX3R5-7#t(exo z9IH6iyCz~Eva!%|6bnzHEyQm?`6#ElW4EL#!L>SpEu_EhrJkia0T=fRw@|i#jpNlZ zJa`W}#xvhgU*AviBySvoJo{J=ZJv=2${bvukjhK#ZqTp9mk?QTLboxcQ0wq~Pw@xT z9IFxfSzjlky~re>!x8z$V6;24>U}EcO1SjI#!T~Ie)5(7WU$c3H#S*st#JKBi(??S zJ-p@3P&JvDTJp2jaiGX}cbQ($vE^z60(L(zQvBJzGFB%v>mCg7#j)d&e-z$I^!{0B zgXbo1q`5(;R0roxXcce3;XJpPdf|50Zt?eHmEy!(c%$W;9yCdFgNYV^>8Q#%0*=x| zJBBxJ>1s@EtX|YEjluB`fIY(&!Y`I>>V5=;@LR5Gc^K>gL?I&4~(l*DN)t zpYzwu>d@|@!~#|;%w6U6pd$0g1-5Sg750|WTMO7S)K>H8T~!a;RY6e^d+me(lx{f;i_l&4rhP^&+(JJBy0@AE=$>AV8iGz#D1uImDw;hY`NiU{iXXR! zKf9an=Osdc75BBrM9D{Ll;akZCERoVr>m$=9F8f8eYkwv$g_Gm;ix2G)0XU1=%K}0 z2Ra!T^P7Nc<3w%y!^43N?{9754aDWEzbzDPG^!;gcpW@NHkLtz)}T`(u`+n3QuRJ9 zpU!)qL9bjJi~JSbv!yaw0!4P)srg(&-zfv4mi84b6F=!8D`1g8bl4b{C9)3XFbwRi zv3S-Fc{vh1hM}i8CpfKUPR{S_;nCBiT0Zr{iosIHRg_8Wq=+$W7L5Fa7=Mz#&~(Fp z+l1=r=a=sTOm4sK>^yyM#L@cIr7U@kcn-roaurAMifY#c9aP7#=9vA8l6Y+&Ehz^L z8(fDfB;@xVyH#)A#ZK2WLP$Yb4?#HS^eEZQw7OqPPrjEC%0R7Q_ZZ6BQ;2uL>YRk6 z%N}j2*FT8ZJ{|jVS8H^nv)-fLyTQ|~jqzabp$oO;j_Dh#Di?O`S89WbPZ4Lf`X4OT z)^;`T9p1jX!z!E=nHVf5Ld2qoDhfHTMm`#GVK1A1v&+>i6ef@4Nf2bVSaSOtzWIP1 zuY;^VUxA=mmum^9#Mz^7K0D`V(+DB1xSof{?w^XZ>yyVT+X7$sBY0^(mMNHQ z)G$O1pz92lrq+I7P%xWp0l_m)dbJZiyR<^;`$cbMsk*9QHU%pyok$(aN#DDgg04a$ zAb!ucKsU4W8nfE3?`V7ZyT$kVY}@QwwCP)hF~PPdSA!|mbJ{!aJ{|KXMj*Mtd+!{!5Fh8dp1dz}<*kR>b26 zKF`|&Q12k5x%xgb+v(;~TQWxBpINjc75YSEkY{x5^D?lQV=AMbPU{GHt4u}oGpQS` z3)+2RH@ATOKo+T2O$`yn9Eg*e`uAp89_aSNf9Nq+{k*QlTqxrY4eofAbH@F#4=LOy zm~#^AtZK4{;3SCU{I$loahBYRCi49{=a;ECWgA=$@eCbvgM2Xt0(uIGgG9pw4>3$c zzA+B0jRjLtJtBs)z-FQ#$G8?{W13i9eJ5PV@0D>rfq4Dy)IYz*qo#h~t+vgTrDgof zdpw>WgYok?tE2T7zLz-SD3@~K@2l9+h@{jRTKi7THF?%$??i`}f3Fi5q&?uNZluLW zM!=9!!b3`WKF^zChD@D?gXuOJFEyV*lstlDdiXA;rh2aYt<0H<+ceHxAFAW`*M7}Q zMSj6G$+;cym7C&)vC}>gh?Cu4*!hFqtqEOZF~3*hTE8G}Q{P32-X#j^gyXmsy_@1O z{o*tFxqs=K&9q!lPajI9s2M@m6QOGDlgQ|1nv1t|A#WIqtS%TA%ymaxkE$?^$SzZ) zYmeF!zi)1mOiAH~x3>j19MYjVXQZp}co6a?eLuJ+O5|;SeOolq$oXnIrHenZ9c+%7 z?RZdYSDxYXVexq>Eu41y(ff!tpU4G7rpF;uULUwv+#AO`BG|J!Ydygb-2lVpQm*ha zBV$CdyBk>%6fC1@i4NVtj3JS01_I;!+(J;4)bv`!m(jc0^gOzTsfFBpsfC&qSKGA0 zbB>s=CB2 z^uz#fLk}vW%G6iQwEPw)H9b??=+I8jjQ;6QLrwtg&DQNWNl_@7q3iWIxE}avvA_jH5rseh5%*+~v?+f+9hlT|(=k$&@ zZakhW=QdSECvBWTi#fvDBx(1^Ck1@nq;cir=N`0dxgGPB^lgHaSu-x={NzSS8=+_J zqNBB=rJ@MYJaSj(}G)YP_V+pJ4EJGswbxkZTXoxM8=B?}FFNTmJ#)nO19+w|pa!-kJ-`)bzIVgyY2 z&lFSr;8z=#<+$%GvD|Ifb~KC)P9|nfdX;nw8YCro_?dV=30%fB6>8ttlCju^)v35* zf5u!`WKaL0wGAuSu#D-0D3=ET^TM5s_ib1KLet_s?-NiJP02onrW-T_bOZ$S7SWiQ z;0Gt8N|8zkE3|Pr;Y+aN#X`6=6xx`Q>CyP{Vlg`5g-}`vP^GHqE?X0e(#;*;HpwU` z>=w|Nso)1E!5Zo)kaKO)N14)6_u9v#iyBmD66~lb@DSwOmAcodpirjk?ohG3v0_;p z)Brj+LU*k-ayVMb6DyhEDEIe{)0JSvu;XLj8gq!s$Yb86uxXyS{A0^1g|HM3okc1TZ3hn!nemCKzJ}QeHjo!jse$C*0F`J$AazS-A zW(sL-UC9}%2Naa)cDDul#fEfncXZO}(%hI7EbJ4g?;JV8sl?t_=tiQvbN!ROeX}fqT7u;$+(5b-p?FpnvXDVycKeWjL#6c3q){p(fgXdF z?JrnmN+9;cJPY$8Li$~~3N?7zkyYuvx7E8*!n%i^lg|a_fj{F0Fp2p$C9^_uELAPS zMq_hTUz58Nq>G9P<#Rnea^gNOv!-cN-0y8&g6}PHd(u^c7__3Ezippx(8-rOhVqL7 z4`E}E)A3ApqJ)_gDe13jWl`6tp@cDvGS%}%igrnxWNx@^! za1cdK9`xE*te1m=DLk80i3goXS`sql%<&Tri>Oz&%X zaW59u_RT~6R@5G0Vl}`#z#6k5(NW)Z&=5jd;5z6Yh|V*>BNEToFPsqEKaOReM5h*r zf&aKgeos!X9dvk@pL3Yp^&$qwL~Z7t+~mhC7GgC$fD7d4PMH;eCAtzaVyZMgMwb*~ zdv+QD*p()*rbX=!}$38o0Wd%~PsL5+D!#M(ZGHEB4H700M|vAoVB*N%0- zl{;0fF+|xP?x}3O;b(Mdz~ve)7`l}@dNj9b79Gw{FXpd?eLwyCc6pa+V@bv`im5dBXQwooiNN-7PT8Pn2}d{oGILsFuE{%HcQeJ;*F#=TGfML_`&&($ z?>d&DpOh%l&5m}Dca^8VFsM8oXn?1#f%n;K zlYxi*^e$|85d_)$!#d9_Io>rkbC>8eFx4?>W?F?1H?7qirOb=SN2%h@Rl6M-FL{4< zN>Yx8GC~<$c=0|weX=Qvwj?3w;wGH_*(-p(Yp=s-Pub9Kov1YO))lGz$QOz#idMd> z1N=sc-2+jEE#tRP3A`qJI>H&q!eGML^n5GsQJ77TTe^b-0-9OD?!w{M0rQiUwCJ;$ z^k;4o{n>-N(kG!kEe$6RT)%LaOUL9|%;pGg?$D2Z>nucjM#`Mo@pRx$YO`Q-MWdGM zEJ;tFL*dzXesF;2*KluW(C%i2NLaRTJEPNcrlxNv{2q30bxrlOgB~j>TeAx~Hnv}j zzlg3C+~vPmB-5bVaO)?TfU)CPZ-R8eKpBg3@0f|unj1^7|DL>aVZV`e-iwVmXF zc`F@)v7>acR2q*R9j4pHE8;0r6xf4fddt9fI3EH>Vu!bryRX%6Q*XIQ*jq;wUnYX>auxkEtg7X^!C4trEfA zdkRUI!l3*0)5)KR9?7-zo-Ykd#PXT!M>0mB5$YyzfU<^chCJ{~*7YQN5rtHh?9UPX z--#R|3I+AFZu(*5($}Q*WRNH@x21KHpIF4aDjPw6YGUqK z94xfXfaeMioEeL&(Axt?4iULEAAuJE`?(PZwJXV93L+E}c7p0+DfbF98Eu|xsl#4! z)Cd7Jo@;paeljD=vut?u)bx1yD~`bM%d7_2>myX6@9Pi;l+UD!HtpA{=t-T1T<#aB zJQ!4JpE<7HP~1cBBv>qI;5UpI?7oi8?zkhfzCkOO&TbfmL%@P9cdw$K1BtBAg5)qa z8Tw-aTF9eum=4c8!{u?zsuQt;jV%& zD?ir$HJ3n1epKkleE*|5KS4$wNdc+ESRSbay8?glgA4demksF;g{nJ>u$MP-4t3bb zVIzvpJ)acMdl=JI)U=T@h0TX{%RAUd%TKlNFYC}Iuq1VOk^C1RGsH1tYE4q?>(QL> z%bPnV@+MCLKGl||k55l5x7@>0(7zvSZC&P;hErl>`8tDIs-p1Y{Vjr#JzsYZ!RWgB z-mjKlN0GwFi0+^bsW?1;T)Gj7S(u0TV%VZJEt=1Id}+46ol>uAmZY4B%#l53sW|@W zlS;W3t0v5l9H{Gs!vutLVZ{T#sS2Dauu8+WX891IOQ*3pXJ@5-W!FOODF=#`39qBq0{LTrMldgS<|0~hILOgHg>OTj`Bv|2AQbS z@`PljQe75SWjo0scej<%%_-KIy?IH3$@M5|nEx{N1Nz3r-C{glQA97&VGYIIP;20w zT9^)SmSop*)EgjjF9AwS~||iDvm+QIYUX4<+I`oQmoWYXG2_V|Nl{ zpEDGV4R6%EmT?kdj3tNCFqeM2!jZW&zivy$Z*N!_(B?=A(D!ZC#3wxCd*M7dYqQ3; z&Mw1fmEBOb9CJHdge2}$Hf0tiB;RmDi< zTmtuIQ1rcBJV>|} zx@bh+`wip!mp5O9FOlfY(ss+J$3(OJw8Kq$a?@qJQ-c&_O~T0NV`!9-G>?%R>;pI= z>ZwddL=HstnvRI`hH>)B(6m6RSopfBBlJx{NQ8v9GCCJ1oEo+}o3N+*8E|60wI)5; zMNKF3sDkgM>dWM*AmqFtckZ4}WpGKc?3{(q%j~vIx5YIfz8&b|7j^E?PnOFI2Qvz%g134*ykeVDlnaF4C1wa` zU%q@NUJ+&SrH7lVe^YO+HBXfWjV4nmKszg4&56WhdGAGtFEaZTDRM4Fq&l~W<6AlU za+XaK`2PG@P`#dFfnG-V7{fR2L3>mI+XKw13oKug`_lvaZiH~wJ&5xS-!!(rj3Tze zosyRYR|^O}R~+5_a>a1h>fA*V4qwKb;{9qa#++xKDv4U1Qoiw~W#X8G=;rsc=Y$k@ zA?JEe;bLCYkVn8>e049Myym`}oAk*kUe*a$50oO>x3d%)jxMsoPGu=Ata8nI)V$E-&l=5ho}GHQnByIV6lWn7>NEU@x(bIW~9seg(jOM3tW8Gr@5{A`o=yBqKg$2|Wi$d(rmwgvA<>T=B8uugGjAr8Bib*>L zwKcFa70y3^fX+b30 zX5aDVLo@_b!Hx5Fol|D@nL<+4PIL)8L&FTDCDyk{*wZT7UytH@AiS$L9ERf4Kr)Vx(if0p#R{e4aqt^}KSjp@6ukq}FA2J1UL&+N^PW96RR7GyxZpXX2 z)ov}YFm!fFH#OKM7mrXq^V|$x?yls0NWgqkN0HR>5k^Vh_c-2W-FGh#NrygSI$n}I z6j<>CpFU(l!pIk(<(P}v(5Iq~{;YOeL(Y*pL4Wm4df_D0i~wR{$g$m$O3_j`iknMH zf-%JWPH5@w26ogN-|>oNQnQ!yJg23-OIZ^G9yFhy=?MszdljPWiz~1DxD2Mb zm7O0fNgRgl`_VehlpdM(=G-S7pcj1R^FioQ!!y48vqNSZB?sl67YeU64(>zYO1M5e zFwYz2L)fR3H9J~83FWJqcnF@G8>xuXHgyeRI8llhZvLP~v(iha_5^BK&u2d{N&&U# z)?XB;c2^@7N*Q*)t99p$!X-XkZ9v_m^nSa77;PQ(r99-yh$=*=0nhgGNr}E_2u=Cq zTPuS>gUM;^0sAPfB3v}WvM6-TwPl*spy1kZ{nCUd8k+9p$b7-PoN{AsX~z;?-~A8o zl%M0+M!r{iQX{!7=C#UXS%Vj{QkA`h(Mz-oUiRuaekE;(>d}Sf$IdM)yp9!;b1K2B zp(xP)aDn8lV%F?>^m8!*`=uR`2Whzjq34jz+hxOe1?BgtH5XOnruwJ8I z&pn0)6|xdlh$)9!N5-x{+EHOkXOQc9>mNx&zwlH!N*}ce_oHdV+i@~(e|fhn;AP(| ztJ77ODfEg53;keAg+5+C)SAXevNbq9=YZ}7T}O#IM^_2X(7k544x{t9?)b%odj#y8 zDn68sS!a0tt7p#Sgo~f^H0YbLCzY;fk@WO<ik6}CQU5SJ|#8PC4QG=Tz zj@g9u6Y%OyP4b7EOW^_gIhoPH*IcfQOe2e}$;t3H>Jr_w-r4nm_9WWYg3VE|caf&R zYw=cA?ljh^YI9?MsfLc@lkL(nF;>@C-Q$)b?b=8DX))Qo=+3(sj2CKYbL>1drPdg> zM~%;jz??|xN?gsa7R1XnYYtBFZQst|>NFYqu*{fq+~xfCrA;WidK_ z`UW08r2a|c_gKc(T4@JvzEc{KNa&m4is0+d8mBZxF>caju5T9a2=6Kfo2%DLm=AWt zH$FvP^u^5qmdou)FH|&&mwiYlv_uLbRRy|!oN=LL+heN~nl<|Ty3q8OV=2)S}T>-bci)2hn!DLTyXv01dh8Zme7v6l&vWCk<~rPJfV+`S@BPTcdW zr}srMy>9u)pNhjLbPOdNNX(4xd9#fKWp{bjS=hGcKV%%(Sy8S#417}a;<0P92E)k* zk{x&8M@X^O1IppEb`qXn=nYCfJt0+?@a-HKYba5uoOs~Edcc6PY#xUfD0yPYt}V1C zgFWhjkx!(V;om#MF^j!dwx5P``e$2X;vU8pV`mGzwF>peo*Y-{@~V$ zpclM`reHy&t}OcUi`5jT{DwCwl@kIk6#^(EgsM5$G+bJ@&|S!)WcO?KN;WSVgKoEy zh*b|dbwwFt#?0PDYI5tMSl(^b%dzHY9z3Sv2^IXz8y&(+zv&`Jk6byydnzi8P@9b~ z!W2z?0>ges$0QineQ!IwT%USsb&}ZEz_L_+SMYm*LxxFqwPm)qohzUCZRXu) z#MkW|(dSP;OP|c92kEz9P|{q4+&2!?8JkZ*<+VW%yR`Rof7fjo9fGMb{_WUZFFlQQ zTz09wRAIOrO3n5Y zxl}Fb`f%sHoPE|sSJUEnzJW^52|Uflc4;py|LlK5nh%%uD$xeKt7h&%n1q?@2n5a z)pr#3RWv!(9QA18PN@_APLCDH8>Qe7e?RwF&6VLxNZ2i&OZV~h>Iq&m8Qs=J@qQ(B zN33Mybab&aaN$XeO{Esy_<%gva8RoD}lZ&%;?!;C1d5PPZ!7&^xs~&;)g| zZ7R*_o2TSzImRJJsLG1$MzwuuCT6%Gfg{i84pp)f7*QbRKh-0QOekwOWfb`GRd!|V z^x!IAat(Gl{EqKxQ*YDGbfnIcF8GSHJ)$pysKu(v#@+B9XMULw)KS3sI_|VxS8~^DJ%GXsUuHsMfUUfS1$@4 zBiV1_g-6y(+h*oRICfum(=+uZ+h)Z~Q<+EDZVa6t9Egc@!CCr!BR=J!ic#8$za1#3 zsn)6)@YFfewd)+!zze2zu&It6zQ@+@WlZCdd&3?Z8cU1MiEGJ=x;-T~wIdRZY` zrU#8ilv&Bq;Mj~4)VT zjFaz3FKc6UE145SM&s9*M^CgdUuTJx?&QCxo}%ST_PpTpUnr8!JWaMn7KCe#2#b2W z<@BDM)m`lcSHGu%d|pD)LWl^%C($UH_WmSZ*Y;J4DMZ>&l22)q3{vi|M{hf9)D0HZEXS7N`thMS*teMRl zG7_KsF0fvc5g8A`l56?x>N-{hzv$b%4nrUsy@H&{8X=Mk!bPZ!pS(>n{Rmlhl1iy* z&xl6J`v|Ke%H=RGw^ey7?yRPOVS<^52qQt-u0BDPZ|k~h!O<*UsC40w`Onf7BeZ$q zQj^*ZD;w?}iFQniCEhD*Vx;IQ`uHyeiDvlROLIRU*{9oxOPS!*J#ZI~_1xvxB1S`< zg5(s((66tqAzS*%W!+9WaB!9sd>5{lbK+RGSx^n_s?C`5^T<9CBl2EOz|U+qYJ6X? z|IROTtkUhv=GQ{B)uVd&uP&`=8(bixREcnlM298D#;+L;6qy|?M>P&#*@$c$Bp=CM zb&_R{xiJSX6eBosa^6|6Jg*me5#k!(8mAW+YBlLF9zA+_k41~`!}_dhd>39XYG|q+ zoY2&&oGId?rQ!!iF^}i4LggD4LRz`rHD0)WK0`qf04;ypv(({Tdgn9Nl@h0M{3(oQ z@Ysca?yGa*Ggl#UicgATDklydTcbP6s18iJRTl%~^WR30=Cf^a?i8LZJ;zpj$wpcm z^?-0A7zghjoi@Y|Uweu1q(IEi__@p6v_?q5D28^IVf&(fNTNhTX#UFzjr9d9@hX4a zO?`UPHtWpiE}Wkx#O<*_%?<{aQc;V~buQ3duM;D$ki*8kmTw}l{|LMNBPbgdp8QAD z?oBZ8KjNmr!m?mFJ4YKsYp^j8>-}q}FD!!kzilKU^m`*%$n$TFVDZ;rWoIKNSdh93 z5Los4g$<#Qn;OC*qGDoVbRt0Xva_Q*Sj-j(C$_LP1FKuu3fV#}{!xZi z7KK2K9WCsg>>NQqL$oa%p-v*^hK?W(b|C2e=RGGU(Avbw9Eelr;`$l4{b!sytmR)A z{j=wrxbYh<|A{jo+Sv>WVr2nK0`c}1#zMAc)({XASO{tii%{q0;s64wf7~-LbF%|s z`SwpC7G~y7AWk-*mW$a9OCUB5HXz>K?MH!`jhXp|p8AiPY)njGji0Sp*qMNEY>15- z(B!6$@J;kL12YFZ2u1+J%)-V2>l=vvhRqESlMP{k4ftpL`=0}bMZN!Jz<-Af%Wuet z!;k?+YvgFbAZ%xC@^8b#{4;R=ANT2m){|+bC-*A%n6Q_S4q8~{W#Q%GUxNmCy#w+Zbzwr7u5dF&Be+V5G=3mia z`!VzXr4q5T{Duw_>mTR}dlXD+FHOeD!^3vCSS#GT>K*ogEo+A0pJKo4e^WO{A%rg zHL$<(@`pthkragyh8bm+A9*5RMKJKSik*a{sH~wq_z!NuqUsKx74O@Sh@n6UP|6d9GmkZaQ68QTH%l?}% zO2UNE&K7ECZ3zAM4aCoMDgQ7KY`@s>U)Nafn_MNo;ruVxoWDWmH|G8ucmP$0^#T(I zz`An&YQBL-f0}Q=mjAiRGBa`Ch{JzvAlPr}{9I+(SXpiefH*igfkl?}pR70cjk5ka zViuUi_(LAWoo$VsEbMGy$^`;AwzDy`u!UuAVFvoS@n|4`{=&+Cet;*!K&l*Ak{l*R zCKeWESZWzY*oT{wlZy++z|UTR8Z0-O1!DP;D~9D}{y{7^O#Pl!=ceMnWQ*mW*b-re z5oTlMhIPuw!N~>7K*Puay8^UDnEtD$a=*KRH*EdKhyorB3t&?ixdCShPyt3D=M6V( zI{umHuLk;$sk#B+UytlU3nM3x5Q&MM8OSO0vk1&Huwwvr z6WTZFp>F>BMHE3dVh?0n`b`ynHS)hr%KwQf+^jbsmK%Qx#ByV^K&&_G_dlrutm1!6 zD*q12f0oK$jrMPI@vo)w@4EdVm47F*|4j5(>-%pI{kv}eN2+i$<^Nw#?@i9FKZyQ^ zQ~}sjv2!v0R0Iw{FJPtrRu6vex_;FIK#BfF;QlZ4fc3{BzhhcN{%S-2!SUaCuzzLo|HO^u{BaQU(~bR`Q;6T)SWb=~ zZY)swU%9bAt=6Ag$eVnAyQhEMV9G7+it8m}b_1+st5W4l#6snnNH? zjP@p`e;pLdZ*%^u@Bc3b#m&LZ2uSqLF)=f-0k?o}!N|$eT-=1NVG>7};k zm6#0_mE4JBC4D*SE<{J(73^062rHT}>GjHy2hwoG>)^m)v_sGO%gbnkvnr;!cD)31 zvvDOooQ4CgHfFHH1!6A$^VucW5-|D^zcoGyq>Sd&O|CI2YB#7z85UP;1>F4T5%g+) z)LE)~{2|2ke#4aO_k57sy_)j=RFlQj{iI^)fIcUrJGEoEVSnLTS4nTyg_4Ao zq3)7QP7T53B_g#mR!D#Va^Fd z$%Z*7XHgj`=xPuW+U=$hlH6SeT8#Z1lzNDL7>myc$N2S7Mjht0!NT`; zgGk8he$901A?JEv;+L{$uaasNJ+6_&!v=cOY(T0w1DE{h&l-eAaDM>**WIx4>)X)W zOr`CTH+>PsbkZlq(x7^i;O%KPO0KWW>QWOyVK9?~8<4uGj}Q{5=*b{4VfSQ(viMRsywVZc4Ez5HtZU7P4fGdDh0kDe4tHp2}x4I^eO7 z(|0foL)OoJ!49ClEHI*r)MusS&*FDR7BbXy>CjYEK^O>Y>Gf)BCU|v}x1iY)xDB748#nkU+x4toW}b(K*RazkyzLMSD0lO-sxmDG*Ud_q0p8swhgnQ1GnuW;Fw}+O z`3n)!%&2QA3x3AxU7?zp0}LMIzEuEGGOpsv_NqF>@-bhL?`YV`AOeb7^1)USG~>;m z%AJSK9a>+zuj$z0wqPmCs#OAG2$8lN9)!+4!wQ>M-X3dJ*B+iUhnfvHv)vc&uYLV; zvHRRgl6V?humm`R$iq=Fn0uwrDA1YnumnrlX(gZXRLXd-P1WO4&F&k}g!N^pf}YUH zcuIQ}%8kOhkXaMFY0QwbAl7hcE1z-4x3>7Pd<(VB@h6Sas4rW(}R zRlVBKi%#T9qGloAr?)wxATMz+Bsv#R6*(rf;6VSCTtd+AI}TV}SMPfqFLL*!rYr5r z^}!r$9oKjFcZ^{%H6JJ`1*|F~Foey4D1m1o&&F>LetHNYp7 z=!mNW%h{(D931l9)djGhrQg-HR_E?>>p%SKo{$}Et9vO7zb?f?ErrwlKH*k$$u!9T zniji)`%Ml*T8T+#^aU);*wO^HqW#pB^J#AbMb;M_M==7OXi3MbFRFa0fPQ75WWWtp zE7h)?bG}c|hyB`|?g*)ZvZ`s)iHtO>b1aq~$ zGS`K&R+KsNY}$RjGid>*wMQ#zN$NX-dT?soh80o4wChO6PuEY-p8o!F2qZa*#cXzb z^+rpecl>UXOER~4F@bi|jWMks2)ujdqFLtK$w`D)aMhWgi%l;`3f11bFr^(d2b3+8j zz_j{4hOlwL;7pH3ez3uc7O@227ReXTxpqC9-qsTM4983_R6K1yr`EXuH)FKr1vO(C zLe{Fn^uy*?6qLyo+I`JgJ9YlD4yIiphGdW=xx7bf@j~IpL_+tjwl|nIT=n^DJ%$K9 zj29zs%xMj;=obXb-KPFq>R)9)w7*QV&*<5~Oj^s@`?pVmxlHED(gc-+Ow)grbgoVTQ10qUK+EnagfieOy?L`U5Pi;2UO z_!~OLUG04xqU)~*d`z7$3@mMWswA8DSGGEXo^)B94D(i!{lF+EUQtEPej=J0N>!$a zzNXW!+G{8q*-dsV5uRK~gwr|yg)!S{vZ7hZguR+UovWH*!AMWH+#VSETALBho$eSs z98W5Jh0Rerk2{>~Z17`jWEWy>WI1q_Lgt32CIKzQHf6dMjqCkU`3Qhf+x}7MZGhb} zS=*!M(ThH?T2N3gqtDtNx<~f_xZq@+PHAWX6_&JzV-YUr-mdKiOGB>ecGemL#(m4M zq8+yJ_hhTp&pvvcMy%(dFV3Bfve=B1_ODg}EjG;#ea;K+Rh%|Ri`n92^NcCG;W=6z z{RhdEr^b_NrBM&@fhV1X0nY^NKLE@5W>SV3Uz8E~3Rg!cRq?Ze&YI>enzcIUb5<*w zdgr7A*q+We{Jz_6sB^XS{s2D*&#)uWu?2a<*+{TMu#?m&b6Y?cs>-Ng=ha(B-X7qf zHw9t6aC)DwN=nRJ6n(AdW7cxm1nJvCa*zFKbW{G5u~Ms}AcwPiCk*o`XsBkb5zR(( zaW0?YDuyr6zCEoop!<=q803o(KGG9k7{^-D8a&wwyExBayS*|^J%g8d#d#BP`w=&B z&s$(cfMwBao__^Yn{#sFQwIUmMQ)q~Bnd&xh7UtNBGf8);3R0FIWV93fowNt%5=Q6nQ8(NhC=g-~5 zshZ8W;ZeUjHo$A@2s@Ou%<`6Km6RJVAtTL~C2nm8&OCM%z4AqS8=q#i)8KLB&x@mp zv{@`9KXoe1NnF{Vpac>@(#~5K5E%so9?_S-94_w=4lkB1WlPR7{gQN*sI{!|;Li9d zjLeQG{Jj%-=pKeT|5EvNoxu2qM#cT*r^zMZchGNrZ@zGaSFWw^VxPhu-477pn#!j& z+;@c!-mdvgiJHe+gbc9?=C`>Sd52z~1>DbZiiXtUezNw{iTDnQS(9~-wV*hpBi_=E z{*2BVK8xV7{cg#HyjG#npJ|y|N-d~$g;90$hb(ZGRIKvnE3mYuiri0#<)jqHu6oQPjEE^=(t-X{KZ%(M3otGyYt ziv8pXcTZa!r0$40&bOD=9)IqfX~2)W`-u9$tyW)yU6_{O+?!$7cr!f_)p$qc5t`L^ z@Ac#;JF|uB6g6a<$K1*BksHG^C{FvC4?;g292AL?Z4BBpyvjy(q%i0qE})jRK;wI( zp?)$+m4j!TV{6Z0WrrDkjWF#VFvX4IZBVrKyQt(9Tx}Xvw+uzphmCV*8`e)VYp^tv z=vl=@q&>t*Jlea1w~;v14YD4b@EVDHX*Mh+`o5TFHcM9LBN?X$RycKxDNevVp2vSF zG;tiryJf)cCfqChw8&^L^s)I)pZO#xKnjXq+*E;{W)8+c*7{cT^LL2H`fQ|U9@V_5 zViG%~UkEhy+V>k0^Ye)(MCNt#kL<-eUEgW5hmOFq*r@BPZhqul z;duqDNH4Snc<7uEaiK1jZ*5CmX@|@q#72yx*V@X=#)WG49>#>O@BFDT^aKCkLqOMR!rWy^pX~Y@LC1-RcK^wP| zdi>OY?ZBKhh)2onNro|erIG0h_=gO;3xhR%cw>t@2&h_}B*E2j#h_F>E{0_v<)IO- z5bH4R)F_Rm%&pJL9m74D&^UbVD42v?@Lv%Vg6BfES zlzAEwr?E|c64S%~x5QN$fwHAW)|4t6^Mu|;Ry~Kt)O=fl`eGK)Ok1!>;Z2?~Tu*{j zd2AJwp`6I>2zE$iMKe@Lwcf*Ww18vS3En+>QSsZE`~qMT7Hb*tdlv^g;h!heIdmQ; zhhNkBjEp*BRVaPcjPgsMG!b>EbqocA0z!``bt6rb&fq4zv5FSj^{Y{a{1(+w){D6bJswwe&vPGXT7wo@pzf%|RDVLPbk!-!HE`Kfa^JN8MZgdOT!3Q7){vgO=`^&@k{!ZgRiLEFEjc zBL{55xR%tw1n=p5eFw|1dc@t{EH zL(i+=xu5B=B0iXBN51}e=~q6*b%n?4&`0EGdUNIP0@|@4X1F*XXPSI@(SkXt!bGx? z43JM&)ITQNHBfWJdu8IHP9ypb)t=Vvm660FFF2UbTvSA~N|Dzl8WH>s(w6)?(Jv@z zC7zbTIn(z^gsJIFsV9E~bP<1xQd}WfUUL3xpunfp?9nQ;f`P+|0Xy$+X1C9zB-D7d zY?LK0pT$^?ybWriK&Sb|L*x)?m5r>V_%X95Ef_?4lX6hC}2>9IfE-U#`+`zQGe^~M*p-Zww z^EJMOszS%sR?G4i%rmUFec&!S@}T?MMN!~#5@}ME35M55=(ruW?HD^cM~kfa?wB)W&O+UJG95WXDCMR(puyOs z;pbOBm!bV2?6y*uJV0Mpg|W+`;gbrIiVR-4Btby zZ;qFek0C@V+kW6`ZL1nH{kRdX2Z}v>h9LGaF5Br>sMJEU_=<#O$7Q7mYfJ>*6lGZG zvb5~i$S|~wi*5!DII8r?er;+A`s@+k_&ykm21m>>6gVo@9aBf(Lu7{qf9s8sPJ+&M zuG4zS6nGH*@Y-HS&js% zv~o1+G;3mITg!*t0?U>;i7!pgWpbiwA!bT{9{;f zh`MW{ea1Gr)L6E5a0kIvb~}wNO$^!h&COvAtMkpS8dB2Qs%(9D{@V_d8GiBx!Ew0C`3h)iM^Sax)mK5fn z0z;<4aNl8Q!};?{zy%Q@qQ_EJw2wUL_~h$g6Jz*}4M&o!7Tk`pZ*(5b&O8+~UJ=v> zq|A6!{IA}ytu4Yx(#!NZPfAQHI_;qz8?w6h7&PyD%30305AqF<_W{TypKH!?+k}@s z^@THF)#g{rkCf-|!(Cx@YJlh#U?F*SC-Vk;T)MC%tM`VNS41$12xd57X{PhE*n2XO z9M%}tPwpOEEZ_FrXD6gnRux(r8qN#0Bx(;;%_Vkhfk)M8$S#}2qYW>ZJ$n{G6fmVO zPpK-BJ&xFW#|OFO*6o0~#IeMH00DPe3dGuj zOz9T&nFd_`Y6!5`*J@N54|JM6AO{jJ#QCnhTeOOJ_F3`qir@qM~3(!Dt)h(S}$*=62PkWkCI_C;Vut=6s& zNXqjyprTg;5OL`kc^Sao#H>7f^q%agAJr`ke;6p#p#MZ+nD0HyrbcK8*!7pmR zwb~(X7U~)z&V}MW@b~m64CwNOv3vh{BYZT7fK8r|aFM@IqXzJS?u;(k^xit3`SuG2 zhsgJPlnHHL*@G_rdrO1f+VUw>Rd9%*<8s4&EsGMO76q;!3tSi#KkJH#sVeLM+5Evw}(**KX z-x?(=G7boZ8j35H#1uG`@rPe(gLumvFuh)nr*WHASuD)AR@ws0vdeVy74zCEfd!Zb z37!%Ro>pH)j=<)4z6lp;$bSswVI1+NHck+P^1k20-(qckLa3P5GJg_^AGwu{t{^kO zXbniGW2-M$I?inWIoiC!D)ulEUF~3iopl=tRv2(#j;l(x!th8qnS@$c@57KD1g33@3xY2!K*UXFrahKd z{uH=2eB@|YTtNMJQayDzY}c`3siZ)eadCS4CWflV`D)(517iQCYDT~1qtX2NHSN68 zKyaX99NBUTN0wSIMe~!@rv%a52zw0&%eAqluJaZF%R6_+wHs-cm`s(vFnjeqo0y?N zP~R{zm&-(!=Dqv%^@L(^atQ!(3)-+aA*tB;aQA_Xwk!Q{LxLq z1#jDQnsw-V2iA?<7My)4Xv(L3rvjVzYrE?yB&p?ajKj>wBbCxw`*Fw9P>PSGc*Um= zoG?Ju2HOgURzK~}2*+xdD*!_fZiCtvU;yW^z+2j@uOiqkx8}z7D3uGw@+%p#jRmHc z@dCrH^@QI~;G57auhV#lZqCW69$H_Lsb$jycFm0!i>`ki9w93Vm`~#ZX6rL0mUuZ1 zyM;`CKXEDXixJ3R1^Fk7{up&wi1d8P7i*5ixQsf)u-+0eaO!U9d`BYqx(Tg9;XoYv zBB5@X0wXkoN-$%{a3XPMBUcAcqw3}MiXS@3jlRGg%jDvbiFCX>5mPXHWO) zXtuj!R^pvYwV0Z$dEGLgP8L0JYdI$G&#) zJz;mJsjjqL_B+1phG(;7!saI@45Cd;y+h+BeekT$dwy)-4$W-PUr!uG%6X}}#EsDU z2Hyz=1PCkzM#tN!^nfFP3{k1l>f#O|QIoIVQ;?@QJ3mmKFnE34o`ul^Bu9g<($*q1 z)%Pw3u6`?j9BI{nMNBb4_s=QA6u2&Y^yRaIOlV&A#8*o1@7b*jG7Ev+ zUQH=*M1!hK#QPzInVhs67)_&)VEM>}S1^)~jHHU0q51ai(B9|q%3!0S*btD)Kv#{g zOU8lZVV0$X^Geo`ky&i)BV#YtV3Vq^pk?l;l+v#iXSVUWA-fNfuwU+d z7tOnwGXqxzY%`r<$=d`vp8#K!zW;J63X|eXv_qCpC^26{hWJ3dPl6s0j%^cQ(57jJ zPR#)GA`!6!!htD9Z#dB%b8xGp53Oo_pchf^70VcmGl|7=L;46$+||2h12T9s2oiRS zBfMH2+kQRfVvEOL+O{sOEsw3`51a$k)RdeFxl?r%FdYL3Q?W8{@3TIx9zpevdFgcNp@3r?3}&6M9PQ06}N zOY6?Xx;pU5)+DVSxWMs^!GZI_HovBV!RA zwf0@u5jpwRn^+M=kw%h!JV(ek)9uQHK^NZ3629eRzLrOi6kuHKbx#juIDr-)>9?g` zH6YIo2#7-t;*BSRRSlJ+5Jexy5-XB6$5&1#jRU?$I-ME_FWDhoP0f}2Av}zE)jJA4 zwRVz%GuuW7h!0dOK>vYBGSH2Hh{61|rtV0^&~%`w)eUV}TUTyPVS0{3Vcs1NlNzPCkrq3Hnc)S0-5pHK;aa&*%QI91k+6=Au8I40a*i2!qzUugC9cdxJ%>rB_#Bfd zF6GxYMQ$Y9%&uR;*WJo*y>|xa4eqPIPD@iatAjuaI`8opMayJnD}+tny~4Kk1)I(<`yC`Nq#X;@$HO<=z`gPHQFNU)6A%*r25Z7 zn+Xa?rgk%1g14gOFhX|NS1I3*kU{FRaZ|o-FQQEK8cZ%bk2~b`M`g9?{0`H7X`ipK zee%G!w%wk|YcwQLe~Oh~k@JC^c%jNQQ%k#Z(?CYhm=a&wZ;EN;;Vzw_>H zYx94kd3g%@D&~9bYYdw8DIbZfYfcdI%=`1+UcV3gJJqvr)z6j|Jm@>we9TQT5J~Mvc5HD=C_l5WpDYJzlA_WzBL7Jj3 z4%9(Cx^nWH1T3&yd@)hd-l!ah^a5?XL;6yR!FwA*AvqIRWZ#cRg$FE7gBE5kJ9+D2 zZ@hBi^aw7dD#++ABpXH|N3hNn=wvLuI4^x1^9B78Q_LQxuQ-$PxVwlWkJFp7m{L)= z02s_d8pV~*01|I!Qf?Eza+r62bd^wrZKhkXqtjp#8eFsb7E+eg#26f`XMrxto$Oqyp#5SI#E4E-hy+iVnrK! zI}irG>0{})1CIcG6b8X+!$NoD=G~ArB&Tqxp?XP}6|EoZ%Dw{CCL^m@uZYs_oh1b( zq+dO(%fV@eEtz8Mjax-}i@m-eSqTLPMfxV&hx~vteGZX8< z#1#BO_ysPu>BCVR7X;qj=~y%EV>}=4|#JD zT|lyh#G%-L(!l>nfk?U0+_u+a8z4^i8plx^0DPFqQ`0$BAJ}NMNMoolqK2882gM%U zO!V!CUkt`tQL&)j0QWYNspb9LUhha#JfQ^`U>-mk;9^aSR3Q;dl&b25QT-N1!|#jF z#3MJ8C_GZKiV=WsDi&u3BjOqgLjnIaOEpcLn|getA!m%-)231fZE0@tchg{<_q%DJ zISMB&bQ;&~=8q{(buOrL7_Urmwl8I23tj?cRbd2BGaQ`4gf!yn;wzz1az<>Hz9_C2 zDV5UbYVYL4|WXc z`9*KMI?fM0g;!L5a0|DrffrXr>GW5ERoU&l70^$*MGSbQX$9vGgJFkIqf==n?KrDj%K3Bi9x zJgFOYFli|H%;t_A(kS#~ax8wepPYMjR`t%<#Mv*zMFr3^<649%R)bl9Cuor}a{rl) zcIQ<~nO0WT_0em-2G^@KF=tGbzA9K!JIIre!B$4O1iJ8{&tX~<3z5l&@yBx`gQ4e{ zxhdACefx_8@dRM`8gyWwH)W;wq%UAvjSJ~RekSO>X{nW6SvyY>yN>KuDRLR5n2*z9 z;}4_)jEa0D<=a3#zNEPn29+XomhUuLTyK`(k&QWqJ zU9_F&h3SCZcAXE;wb;kJel?JOYpq@}tOk?O6t$RiZ?$9LAZOL&Sf)!ppP9pRMNaqy zwAsGT_=`DQoRSiXajJQA@Y4Rr+U8v&DhgZr#e&U)&5p_n-1&tG^TSl^q>VPVw zZ{9|dk#|;9A-0oR-xkQ-{qxTC%fXNtE4)r2osQj#Gs-N6G|9PyNxkif5xt2l+>R|6 z^Xx8#a&q{b@V?V2-~2U*Vq9*{9uEjqRJ2DE>AFO zSPu2@!~HO%ZqJgkjSe1UtJ9LsnD0}4nhTqh>!;}WJzG!*(5;2~1kt$W9A6)Qa^8Or zW1p2HS-~CNWQ`LluNC*OgyY%p1vH2*@Itnlg{>Lv$3w)IRYCG=3qB$h?*zCe!pms# zdE1NJD}|4b5Q{l+Lg%4Hc2Dm<9X@?65qwv$=grUY`;@t^pMC`YA}0dhBfjFHi@k-u za0w2oQvLR6xR;p{%Z6miy>~`JL~jpToc$CRT#U=WIkZ0oRKvJ4hIw$F3f1KbwQ9aW z^}~u!@~Ves2vAGaEtJOIA+6?a zr6y@SbOz_n5dJ8)miII6mUIgnwC*B9L;fIHk97*!LdgB=e7Eu(pB(v-=3i?!hpdC*Gvb1neU}+1(8kS1W zd_wzsUM*!72O2(o#?L>-B_3DURwZ;1T!-&xi%mUj7h?EFN(~LyJul6Ib3qk%Ra*c|r z1SZI{G5GYFJ0vZ08q^M^P?`ZA)=m|&^xK($`db1I*L>6W|Jt8=e5W9087Pt>X zj<(*Pb~F+L=)Z!2MlK{ScP{N_!4mvRH{cwl$-H#$6g_(Ql)Z)XH5ay2&7o;K2`{f1ekkTF53Wl?e{b*}K|%Zr<|0>}vZ}Zp%oOwqtWQv6 zOYn>?Vbyd&yvcb}v{h5Wv3Inw_9O&xnag?xv!qZMU#2`rmd(6fxR>sNQ z{rh=k;8hL_7KS^tDH3-OFTdFf6Yx zk<&l@LWu;JpjzHknHwGxo9)1>e}ZPi#qejaq(FzX`h!YY6^b9~QTB=%gOU%0EixZ_ zHpxO|AhYgyi{Krp8%^wus|FV^PA~_X6ZyIPFoKN;L`Fe32&kB_V|b2bN*)k}wLfXW zRg>Szk!giQ0{w3N!rd1^Z5?Kq37D2k?s)F6XThUbzKXxsJ+=50%K)X|ToGFqEV$h* zw4k`X52uSUrB-4jaX z`S98uyaU=$IWJk_EY}TcA2}z8EvAcXnx+!;v#`KWM~HP*kJri&b>%=y-dc5glT!V@ zIE*os$s_FyDQ3F5-n%PG-@%%a;8tSiAkO#_^T^UPJ!2N=l$Ky_R>xpjSiMdN8_+eu zrnB|4yDki-d)CHi=w{GJ;tZ~T-wyB@4s#&pFuE5Qe)2ke@KhUi1bzw(;ua|D5e970 z1&=1&g?C{cT^1FT%It>l7W9Tdh@O=z!jx7w_S8o|V|R^%c5R>+Cw4HdNHmyu2E8p4a!8E6h10y;W6ilp+;|Gs91?%%ZKX~5 zP9+9E^-(Yvo`t*cv6FOV+xALy$~n8vmnD1ViJx?(H`D`ru{Y)*x)1*7Oak4KGNwgU z(}STNPw06fX}B;hZP75oN=6x);!MZ_#5noIBYrXL{f4oJ&{?=S>)`VGS+^ z%RWDMu8UsMt}4EPbxzH8W!o(!a?yib9wLA|KZD-4k}T3Xv-veP@s>N)e%_IIK;K+r zc|iPJYg51Cv}bYcG#3_szDW@L5bo^}3%731V~k?SCaO|3ex@$Tuzz)Y*BEyEqeX~_ zkuN|-V5j%X{?Cs6`@_@75fad^3+(t|g37|?d?;j~7$A4v(WEC=AipX|$zcEHzZ#JtDk-r~?z+Ph$8qN~uZ@!)=DfUrR_YVwuq8$EP^5&;omecawI%V~2+T?CThmk5#pOZ$4! zx7Otu=X@Ysny#+;e4Aj{;74yLybr#cg-0cX9SmK?al3GjYm*{)IFd+iO#%d$9-jH4 z?r6f}tz69O4*oM?$Nk#k?&F)#BKmJx)ds^nJFc$930?(y>DW)nD;him^mKh_1~83XQ1$iGo$)kqt(H@l^mYs8+X@m@tIqU)NR}7U&=X-V0x43X zXPz;eGfOpcJYfUb7dT#j1}`=6jRK#yRbk{>(SwxT@ z>@f^nMOT)8D$b|}(d;xk z+ocGgPU6H}ciHHH6(KSThb=f-SBL4?Vp;Ti4-OZG-*MDdTq6k7yn|a*hy3Ege+XQ( zCd$j)|1hdA@yf6xfI#A+FtK)R{!x-JS$JR4D+{-Y5Yuscs3IMxy#yS$_b6-)XE-F9}fh%-9##Y2u zwuml`U9WZycsaGZ3r_JJ+IDhc@^+7vo1-HL4D9Ct>m@^4>K3|RR#=9%Gj2zl`uJk; z@KCg}ZMWo7N2xo{2gF$6Yt_`3yWt9_k0bW$JLIleVkf}OvX_vGi<6**9! zUhueWRny+moJ;J27Gvqlq-*N8^7r7mhg{###b|k42vT&K_oat#oYoBSi$fawAA|cw z-Z>yQN25+hr3Z=FqK1C>j;yNo5O$Xb#Y*Am+q6AT#jLJ&Rm+hIVB$}pLQ@kr#5z_3 z*E+_-mxWi6$>UTK`N_9XzfAGWHH^0IYXaL zvtd$m1R@ae!$9OpJZRMTlA22Hc0WWvVl5Y*Z2J#pNGU3GU zIaMu}@jd;hMvbC*S)EhSvrPQaSM1Tb1@|RWA_z}rU|CekswY}%({0CZb~?@i;@64^ zPPcD`NGmaGJfAzX+zuk-SCgnqi`5S-ep7kH-gJmogkFejp-c#XW>rUhJ5wdBi>fG% z-1Zt{eyC)Gymt8@b9FK8Euex7%oNqg?E`RYksVa(?>$JbU{@=^y+xbio2RY0jUzVU z4fVm4j=n?MQKPh%TlY(~lWt&M`SPO6SF_1`YkLClZnY}itRjrBSS}H*Y&k_|fF%hI z&-V?}LD6prdXU8|<_+Mq$^%hNSo7r^+sR}IB8ScU@+ai>y{ALgbqjo^sZvLI=(%O4HH0Zdi zCwR@`E5o*tSn0g#!*r^*R#U2X@u+)9ihMyR^?A=_XPpx&bC}z*Dv~y!ew(X#_;%-7 zuAgRcqd9RCi}p}4$Q|kISMAOoA@NzH=}?!ojohm1SLsX_49hcuMy^KHc+7+S{j2d- z?(#b%3#V#XxrH~8diz%P^lnfu;`53h>gPn24qER$bl+XmT0b>3%^{fwZL)rTmnBU3AeKrQmBGEW1o$C8b zx!SJrnNwtx8Db~Cjo+JFK66^xD%NTFcQHbQz(Xu6d@swVS8W(a>)vV}eEMltWdekQY`MML{;Zj?wxiP1e@5*QSv)j}envbf1ajvdv+hU+ zc&foC#{m+2N6fjH=D({sKM$G(?oBfw?KSV}GzYBTR z$M1CBPP8A7qv3 zk#@MMZhB0x34;$-+RxsCIHh9XdCE-V(b4cCu+>|V>#A4G-e9@p?`Y`UzK28)@MtPD zEIqJ|0iek#L{map5yoRbI7Xy4j4E5Bit6l*!!cz`N?o4^lC+Xa+eO>c?2bQP$+T~v zL?U#O!E(Yxb3P15FW~BTS92du`$i$^{xX_uMTd*gWRDH|#7ue3u7kdTxC~kMCZQUw z71m&Lf`>E}siGPc6A$scD-f)lZSx4iE0Gd6mJ$~*=TD?;LOY0(rO6^*;y6aR<1WH0 zx1Qy#X^`mMB90avtsRlq-eo>#XikNZw~xIVBQ&uzJ=th-S8Mv6Qs5@+4gda7Xk5p< z>r4X!*Oaq>SfNhZXE0shR39)va?2LxQdmaSklKXU4KW#!ilRJW{A;)Q8kc?PhA^;% z%IXV@r)##(qv7aU*W@3J$6s5H{~`dh|0BrcpCC9uzO0ou5Jbnp!V276{lWYEG53D}$^FH*1MVAV`OUlggLxzd0#yIy zc4;m)&flA?|0=m>w%_~%R~9Jx&~-p{73Npj2c?Y#>vQ=Z%c00Pq7YH?kcq^hjD_a` z^Ouu;2cf)Ek1r|7$(@&7-~Cur+Bt|+{o1-EKA*v7i(W&c6xKb-GA`1U}815jyID@ zv<$NZGqhl^$iYhvP9KufM|wim?ciwTs=OLI5Q%PfZ@W4;wo?UWTyV@Z@kUQ!5KW8= z-X`^!j&5~FwlxE0c!jot+a4|6!7_cLnI{=Jr;N&sy~_wU587!e=e;$oDaG%;(Uz}Q zr0&_2I##82fE`Q;XOJnKLZ_{UP~00`Yn3izK7E>*{ygJ2Bh@+rJm_Bkot+*wViAr3 zj|g}{qId}*dqupBtaZzGo^1GTbUc-Y36w&-i!CUzFrOwyoM;On2C%EqS)xo*u zDyokg{xNCik{>tysoe?Lqn;@fEmzx?u(1%XJ4)lXq}%l)^w-NV+DcJ)N;f+)QetTn z!(f$R-WWJ@cKS@7%RO7^KNx#}ktZrQ|l?`tv?)TBg z9!-?9=I}rN)b}jQ#@!=uv59S2gkgsL0yALz0bYhG&63rk^wz94<}(SKHRRMXjv$>8 zN;`_iK(H3txQRJ^++E!VjZsl;x^eTtcAK=RQy35yV)zfiM#xB$FhL}op%Kt12_a=M_u;DHeYjsXI$B#jpr(HzfQ~{7j>F* z+M<>!T9iEIK2M@iks8wuyL^Zt=52;?)}SCy#a!G;+K<9Vj!*tszJpQSZ#DGjG_jxG zFrP}L3QMC}QBP;7we;v3Nv1IqSkr;F`B!EC%S`_;7XD&B{5KGsIDizt-@FDE4n`n* zixbEq0kXjUb_cY{|0C1yk7E1>fD^}WLHMh9|Dz)Rqay#KBL82i$UkzR{u@+;4TuK) zqavJ)T&%?GJU}t|ts=kg{wo#vqZt2DMgEb|`Uh!#Hx_b3g%sPM#oGx7QrBTgqscL`--N|-1RK2Fo%*kDv<6H?UL zWB|HdHFB8jx5#wp0c7YXlZi>;-R1CaeanGSOxFy{%R?>pc-DqX8I0Q1-2~-N^IjW0 zl9v$$^V$9#?K#zR=jBh@m>_@uO3%sWb}A?+*q!>qC&C2=GCgT%YroY^;7SGpvXSr% zJTBq+;c~0f>$c71h#3Kd*pK+taw@ycVXOP*<_3z$x3<2X^*9a$B$N1csZtwAf>HpU ziB`GXV5vAw01Cp+^4RfSu3q8(V;RL&5(GqEH~?fe4UO{#tH*RWPF+(IY6FECY?Z>< z#pNWj)^?>{K8te(w4koeX)2os008{7yPhvo72$#gk>Y8w-_WTw5|NaIIuY`?Sl`{< z{a>`b1yt4Dx<0xn=`QJzP#TeLNu|4_yBnlKK~e;ikOnD1X^?JEI;Fe279HzO+~2p) z-shbEId}ZWy^NtQ@E7l#^Uddd=451K#EFmG%@7hehKCjs5fL%(e{HWzNlN;)q{PwI zb_R%iyw)otExl^0tEZPZvkHM!R+#;YixIi{dG5;d6H098d$id4<``Q73jqOvgoGq5 zE$#hXjPRMZzP`R7)u!D3J1YN&Zp1db^NscQ17#bVEH#!GVtw5HeS=l6y4bLJ-tgAC z(&wO&my7FUEzxlPhw~$0bky{e6g_S2q)(sRSAXJe#EQahPF7+>?|RvrFD`Zq*Qcv& z51L{AL;3|}mV@cwqX>9%W*Xxr(MRJ6E{dzIN6ZCWJp9`Wb94GCD!4#^p{xuBU2)GW zX=`iiKd-mk#1@-|ztyO9Ug_Ez&4081Jle78GL*+1Jk| z(@n7C%HlySdC=Y0sOad^^K<065K=JcEM=>np18O;5-v(2+HRjY*TI7Yp9A3lpFEv} zW(1o+@C?IGO-&*m&Dj$Z6G~HDTND9%RTFP7lu>$Pt`@%O)m+VR)jMhvwWntVc$+lc zi6MX+T3cK5^71n4m2+xqy?Tg=PEKJ<@2GAllo58lLxBWQQKi>}jTIH~B2^3}oQAL> zt&%Ij%mN8 zHI56E6cm_s=-khvn|S(PPfkvLe8zTi>Y0&iC-O%)C={A18gRcN1%-vq`=xY1z8smD{~{k4gqxci zB9L!xZcai%B7P#N1a`VJar^fuF|e=mv*ikXyh^J$!W>AZ>OGOn58=;SNV+V zJJ`=4w;Edq2PH*C=wKM6q@>>7-hXDK!gBEMH)?<-Y;0@ z6(1&!s~?V!kF&C}CiuOl;-(JZ$#=GT*4wm#{Ot=iZ*Nnig@Qsfd4xdh`Dh-TwRY|C zjAQes?O}q=)nD;Y64_pNclYC28q&*-aQyOdpp=os>^(7O4x3N#N1kusa$0hMlB~rurO|WVT^Z8-bG>s!|ZO`>!WY1IIYrkTZ ztZ3w*mh>`PGpE_!3&Ev%kB(cOn6igF+Lm;?!298DpeXOi`lLr)+?%j+%Dm|G^wb4m zWaoCfsAN^|g=4aiUZ`#^rmWtta!19-urTSAU~dOU$3zMKtq7>WknP8k&s~Wn-p9(| zFl^w)LBhuvz9!UA*G{{h4O6h4dO;ok=_{yR=*^p~4a{Xov4@(1Xlorm)MQzaKtiNZ8@KIh>(y7TQ_*FIJ7jc)LBE zF^5|DZ;t2Rt}Q_`5rJ+FvOR4#ndE{hDfLZISwXIfg8#@B*;jh+4K)5uoUQQ7D+Elj z01cdzESUh0Sd#mPH#5O?`3}vf8~6*cv$M;@78e&28@x6?{=B=pt5ac-T>RHG3}xW1 ztgMtY97o_BpbhWW`V%ed#Q(#UAwE389S$g|OJA65QZjIr2_ug`wIoh91~YTT)%NCY z|EYdJCfw#A$|m*=(p3@OB4L0v_ob!hY3lu$2T{mxgPMU<@KKkCnJ_WBw0F^e&)Hu; z3}~cS?|c#9ajsr=KPr--U!sSJ1X~Nk!)5-rS5p<1AYRXv=b&!%C95ebms<3HR?Ao9 z$6hH22;`)qggO#sXC(gh$b9H#v;7Q@3=Bk1B4XmigapMV4J?^kg!`8VCu99lbe zUNjX;_90Ov$yrDIY^LQqYZ{_!;=|jX-Kl1+Mm!oOkUQ}CU17jC=^*>ck>64j6h%d0 zY;>uS$J~Kcc+Gb;DkG`I6mu8y{H8O88MojikHf4LXVm>-re^1eS}UsEzN7TsjFq9B zs_3KS4}Igf#T*jF)nF?BiUAd}caYj~)d3dv z6)(zgyWHPO8KB;_beI846*cXOuYN>ndntT=vp!j7I$wXBDcXM{&@W)bQkggFlnA7r`=d6lS4H?tSlIR~3KRV-B7c40obrbA9K3xG}(yj`>~e z9jS?k@9)1I!~I|uJV3N+x&z+hPi$1@xWs-1>`EAJ0VFy*zrXeV`la*3X(Q! zP*m}1JWI45--~Vwr3Roif*Llil zqR^0w9{-1|Fgi*5FB;Q1+MSZNv3 zbtsCqmFT_I=9@DW`Hf!IjY3R31*>QaYbA@L7Vrz9?xAp zT9-ggzrsdymNI92CJx*2H4!1dwH3zB)eGqGecY-RF#c3Lz%#j|`Mhma6z-3b9K_rd zq?;vYismF?%fKqq-;I0|H4*u^SQs#QSFd;c&lemV3w?CB#XZSFRJzsZMZBFeZ@QaD zkB1?O!3r)&PmF`_q7<*(q3kFB-DNU=E)xsr48CItx>GqijS$6$Ztz`-Zr)415+O?e z`mZHDIXySvyJ7^o-8qqjjSsaXI9b{ZXg^V|JC*5shbMHZ6@AGe9w5z0_5SYVLTPrw z|02`<1Oy!ZCDYr3t9R{7AE+=eFj(~K#`6E;1>=9e;BGtM?uNJdYFwV?j;=$E(zYuK zmdjIGNtxc@m%C7T-0Phfd_N-o-$CXn*LAu(fZYIYS-?Zgj|;N ziqgbbwKS()O?7`*!^92KDLfx%Hw!M-FEq_J({2SilDeh~F9xRd2wRjCN6OPk??D`< zY~pc<_d;gQ9<3eKnU1hWKf9y4H_qw1&N!(S&#Z3&`DusDyi%ViCYaxS4a0E%bl7M)NQvm%9mIo9v7c5VD-;9VkQ1?q2!mLkjQZxNduIe8 zD01>7VEGyTOrnJ>->-@iP_OEHeQz7Viler$!|tc}pZEOHK~{XIXX53R4?FoSKIGPv z0IJ2bIfw_=%c(O%bt$4575saztN-abb#h=pOKcs!@_~P}1n7z<&AKW4e`}l4oz1Jr zoK$LPoK*NA83T6xAB*$yKdlA?8lw_+sm%2M^2tkI40GFFG~+LRG5oKF*ZmlXGyW{* z@`uZsjsIN_x#-XSeqrB>QrGSjqo6xZAkRzgep8X2o}RbbY?6_F6Rmtz?868QloC|% z$M1uK5Ws{mz`!+(u@$mmg|wz3X(juhc|$yb59{UoQsj?%ExQPFR|m(d7p`6{bq(KWQ@WP4GI1_&I2} zwI=WgtOrOJ)h)VwKcd7sT9VPodC${#RCrLwo`2VUtG&OrzdR%=ZG2p! zZlQSh0;Z);EI6y*xU#Sc&hlCFo02Qc&=@kzmV>c%#vtKCMeV7c{3!&Ry(wQd-~EE7 zzQ3xesbM#0-~(AGGqW56rS7X!N5W;bPg(LqFvnOi`Fv3SMs8$5cQWJ$w+dhYWB|yG z61q$brGBkT?km*W@dby?3}0~=;^r_gY*wA}FL`;P37|-`BBh=H$Dz-9F8B=!pQo}n z2M=V38iobn(5i)rzWxp)z`uEi6W}r~MB|Td?^mpA=$s|Ms6^|s z{r%HoW1S~@gl-Po0o)@_aofJ+WNP}TA8r~BGHq>Rv%R}3Uv-^uI|oa)OwT&FbKCT6 zx<&x#6}2!4B_$iE3ZFc=T#Z)(Wpl6t+0*R2fv(qAIvunio$#Y)9A3b0@nMIXJ=orH`i|m%`X7_%y!~Ct9~8yWKa~l zF0EszX4rLOKp03C)kH@r=uuo-Tbq~1@aR!{NwZn*$Kl~&X07i-j39Hix3FN~f9NJd zM4@Epy_E|f6QjN9s?Z%OtalmS+l5_S(z*=iS|tV)ZZf~hotB4PXPkp*_01l9HJBuTp>rO@8(1qPO>|@L#{+ zH(+q0=DIomefR5M&(i0{Mfat*sz$Dyod+th%DQI)8UXcdaxVM@AeJMED>v>c9Va*u4+{ z^Sit8a&#>b6&yG!s0cWi$Ie=97! zJe@Gy+uPe1%m9el^~I4X&~S$%(L^5y0F{bX&@Y<8ko-V0V3H?|mE*71a@u%UELK zUF1BNE)2{ay6V0=SvEEXSlG|;t9>tB#=;QCBODGvF%GO=C1vtet!Or;f}C7C_GI$z zm&MEWpBVvnwce*TLLbucDEVF9rbkT|q7U};WHXk$!=?iE5tH69z4c9ccTZ1Ock>ss zkPx{7hOn1#z+Gzl(jp9M#>iwvw0n87vGuDU&4tqG!*eBrNv<#X`J?e#f{o*0w9n6@ z(i=)+zJ33m$H-2rZEWS>kjMB4F@?lFhQL5&=pR>k4gVGz`skVwIRe3D38I$5X~jcP zMf^X090~%M9T_j}?%v+*4Hl<;BPzIbC@3juayW+{;<>u|%H!bfjY@wMN`lyCU`Yvk z3r(O$;42CWifIdZRD?eludc3Qiwwb|sH;B3#1d&Ou;@Z;H3>--^c?8x+dbQxxwr%{ z{?0^+r>kpkUtdSFi2uzbP{XOI+Jb^GPZV4habKhQtCPX8H^|rkfPaI^taND{X5{MP z0{RZE`Yq|Pu__uGgj7_`0Qg8`H`LY9nX7eWq@f8CT?QjFHflp`by;FF+VI0fN0&fN z1h74cb1+H*2ZwQ<(J*gyY3b{uWl%JQsA%CbDwn#h_wjo}t&EJQJ*U7}W5iKk)q7y- z)yKrdfF48#5;&BiJDM+us?_MQDhnR3_UOG5y{g5dq#U4f4%XIwUSo9qgoVXyI&w^; z4H3OJn$K7!h-zqT8JUqWKQJ&bAtB-T*u@bo-OHXanBGms#%B8ZU{FJS_2xKI)7V($ z99b4s9>B3$-~Sqpv^&dRvr{QBzZWt}AL80{lLG(f3J1pvr2uF@FX=&3FDI{|p4v%+Yj8)0kD>piKu~nFtDm351|nV% zSR+U)9B?{JfqD>37ndC`ABJPGm7+pxG@KaJ)io_pJ-qyqng+B%PEJluRrLpZ9t^~u zzP>(SQYN`oYJ!-;d5fOBc+{WqihFBa^-(k>+CP4djzTSVm~VIz>*5iL z8Z1RmDeCV#Rc=0eTB`N^RatpCXk5*Jz!hr#jl&s~a0R>Sab#Q&S*c0f05% z&BWN)>YkP?qTHsSm2_`!@329G_bHm5u8WI{ni_rpyq#lG;6xwj7wj)ID{(7+=1JVe z7)6Y9H!(6Anw$)4PWt?rfq?=1zYBgQ>bkod-7=#1+ufOsqA!&Mi@cbaipc%P1;xdm zMFaQ)PlvM5l`^ohYTwF7@(te^wwf4PT3aW_$G@Q;8yy8Nmq3rCM1T1jSYbySo+K8} z9edNzg969}S<6l98)SWkdf&^ZEf1KqorhET1#XrIHs7KW5xEoN;guV;2Gh$WybTTQ z>h9jz*-1%BK}JGyexs6EC{@ubdI;1zECQ&zRAdx{4Q>^ij3+-cGqcz1#S83@pAz8+ z5K=jw>arGwD*<&EBpfYFg-sO|IG|8Lh&%uW2lBI~aamklsV`I7+@hyqi*R{2aS(%vfYTpVyq@7HE4yF^LD^0#YA?#h5on21+E{DiJrv>i6Ll1G`kRC)ayX zC%Fvnpr+HFNls42#l&l+ns%FeZyU2f3kl#0s0qYWXP5I(3X84dW}8;ctF=Udv5$1mpbI)WIvSdq?ji{1X(Hz* z`*Zbh?^V_U0x99^M!$b2BP6UcH;-0O2-s0+9g@~kS9cNx)tzE;@NMAfWZ4NGKE997 zw?^Pws3DbKJ|1WdSWvmR&$Up9MpR87txlB46cmz>kd-JZ1&l2z)qCt|sj4DhAW@4; zuLs5s@36iZ9u{_?GindTEJ$l<$@E;%7qIzQowMT2!n#&3pt^SM?s?wAm6QIcX#Kuzs&wz z_iXubO+2F#{Ec*N4xyD(nR*J3eSEwtQW&|Yf&Ewk31kXV<>ErR$pvaUyZHu$UPt4l zp`GI#0R|;$83E+Q+J*+*5(?|sKbiwFupUkMQJ+Js?4qehrtk9h>JJj&FPI)r*3~(P zzhHY6=I=s7zx3_R1HXuQcCoF+RwfX8X`cl6`Q1XdbU#uV6o08yE@$^jexl7X2qG{j zwA#|jRdsa2_hmr4P^V=+Lo987y2|m&Z6I5!>$OCP5SpXF^pBR7mfpmYOgxpC`FS5g z!U;&~<;9Bm_1UE_On9SWkk99w5@$(n1YF~PPGQ+Y(0F_8M?bUu@bM%liU@KtGBTp0 z`$4if6kk?ZIa}kH5flV>KuCsLTp4)&n3;L!>U4*ne^6kSeK#H%TY5zsDe#SEg@KZV zm4iPCski_6{v6-aPL|SJsPlOd8}6vJ4;sRK=*}-G$w*7v+u88}>Dx?=qvPxk+yKzM z6m{Pw;Kjr({@Kcz^YhlgZZ%&igM`-sHL$xQ+y)fd8!%ftyW*%m8ygYh^;CXxHwWji zm#|ryt>B>G&ieJGr80|tQhoioq@<+MQVuFAsvB^UQKXe+t+jF~Xi@Y})-?J4`rrr2 zCEh*H&M~d5!u(eJdwqR9V!Kj7L1BBl4nn1ZPv{irHQw17A__;S-|nuZ33+8-302C(d3@r=XkBHmEhp)HiTAcW}HN0utsT7&GmW`PuWvWh7m zKO&am8Holmi!}uY`-!y-V_BCT5fc(-ZML!ziSuZj$JxxlT@EO6{l_K8!Ck)N2@i|4%NV#$ z3KvI3yy7^`BS-^#Menlk2;I>!dvfKLHg(bjiKlaEW}5uoMMd$7B{wvH4MgvxR2Lmu zv5-6AKS;T=$ao<<69CC&eF%zX<=FUmXIGcWw}g)$L3xth*!#Z{K!yr&2M=s)uT09(X)ze`gEXy@0Ak zzr!B2?oC7n={557Q&(3X{#sU+-{FIzTH=IqU+Jv$;J&MdvN-gA=GM<^9dTJ`AJO6H z+i!idda^NSX}KZ$w*$5p)ppA`?ZE89j9D3W@)hU^2*F*SJO75PEwvfm; zrUwWhI39{Ej89HVYo2)qp2Rb-#w`lLJu}KYmpLYp=2JDTKjBMDDlmgnZ?5_?wE)?T*2H6x+(B_y2b@5hYNCnYw+RD^8CnE>^s zJl%pxC(H=>sXaj)D}t^7HntKb=KG9{03|~>!$xoSA!^q^RifU7*47t4ZNtOERa8g1 zb0F{m*ZrDx&sp$Vv9zCZWqjAEcu;gSoYNi0gfK78Uh>WE%p>r*lvMAB53a%jvkMCu zpOc2M4&~+Kv}7AIMEppN$lTSNzJ7iG0l(-WJnn7_qTUv<;Z2W;6r{uNVoB%)w=TKV z=cFWv(3e%$dt^jdpdmBUs0)J|(hCenMy4CZOWoRfYf#j7QKkLeV6CqXd_2|F{gXfz zZU(iY2z(I8U~aybolX1V6CDGCM_*DWWQS47U>aS|P)jQ@HMOr9iYF@c`mm+56A5|6 zaeAVvsR=HIiH&W4Q3_<+ygWQ2`J0!yLvZ)fk$p{0j!vBgv4b(-c0o%eCPsz3Oulv?1v*wWfWEWL^Y zW(?p#j~?aF($XfC_Nl$SuW|Oc*Gv=pKIe`O4FyV{jO5Ds`}@ON2xhi^N}NFci6B-+ zym;ubtL-k3vy1}jz^sf|Afne(EIXygkBd*Cvq#(GMg1wBOia@dyNqXBMWvg2OP>{@)j62ehnTe_R+cyCD z^(sgpGDm01gUow*Wu>Snr2G}|JPZCsaHU>9aTI*6-Fp*~t*wf~q8sOO5EjZ;ut48~~R^O-+qeD#j%X^8s4*moEUX6mjO^(LP0PQ7V< z7aB?_bdj+Ec|566q-hKbA7|EQZ$f@*Y)t$PQxDPTWX>~ERaLdlkDLGx4}MW9S~>&X zS1LrJ0Z^-$gy&2U#%p}e_lI*V;b%=NV%T*|+M1sd*ZZ-co(Kj&;YzDOCm*cnQ-#t6 zz>*Ur)M%NXrxeZ>y}LkYP6ws-;9yM31EhM8v5$ht#_zg*WE$Rh&WM7|!=s_$Af(C+ znG=6A#_Iqj@Rc<&Ssu#jg&gY?r$N3#*bf&9RCFhwd#eO~{3y!Gs#RZGdqVZTv9TG3 zi-7P30U3*kD&X;ek-h{UXh835gRL%0tyPoUtB~S_L&3+u{mGwd_!;*5)v9p&*pk_PkRHU6(% zdjRMO8yM)hx(Ye~rkRGh=?b6SBu1bN#8U(Y_JwoLrQ={y&0D#daB*>!krD4zi?p#c zm=p97*wx^)zWe|p6EAlXC|{@oUJY0vS4=Tl_6=$raXIUJZ;yHK#F%nV;s6w$&QL|v23_NiFnE)C8ek8ja&`kik-KLE$ z9y+bc+S`;)@!2KucveIWIIWB>7ib2Hv;J^efYWbyeecawhf~9ha>K5!LLHm+drTN0(Cd!G7k76%Qe1cF$aQPXv^Gg+yHqet9G%k zGc+R(&(7A?R(UAg>x7Si;WsTt96KF1_n_#75oHS|2?-TN+QN^t&<{(bH@7iAe|bUF zdw!B1LU%<}d12PTj+vS3@fPXDx{^3Y8ReC!col6=EZA@2Q&PxU=(n@_^#MLW!;X0C z1ucS0&pKgYF6xhT2-1ikj2!p5xj;SIBVlC%VJIr9cztjJLZ>3ETdp| zo2jYLW`x|qhm}=V6TNbOt?P4r`g(eCQEif-u+}o+<;%L2F4hL8CEA{7+nk&nQ^0+c zf#3%|lafLfZI%G%_kmhp&#@4LBpU#V-S&Ih{>7@r+QkONs~BIM$g>|E0HAFDon2H^Z#|lqno1sKq(wOigx_|-cNB-O3Ax4`qI@cL}; z?7R#0piqBBJ2o*fn3N^ancO-u-PJV$eXBDY+?f*Q37WsZ0TMWrB>@uU$@Oc0-501t zX{v;TCOe5F3#TvHSEQk+>aM^NA*lCcEZSO?Sby8g9>J5=9Qjrb#>l71Y_#-rz2E)KSpa7*d#b3m`Wmv+uvc!xb8x4&wDR2P>$Yo zDXNWr=&vSy7PhjqL?7o9G@-4k8a&?X6Za;q?|4mM?5W1Cu&?#ZCy9rpl>m;!rV{zW z)$;xa67$z@LjoK_20H)*c}S+$cuHj1^O2tKeV1-km41GKRhvm|e_Bz*fRj`R3I;j0 zT?D9)H`g^Z(Y({6OlRJ{eLFT*2@dFB!QivWvmo&CyQ_VWMmElgf2=FRgo2J9|4AQv z2nDCvDeGz^H*clOS~2ya3i$R=NvRf}pvDNkE6ZJBRn4CrqJ5wXG3I>U8MYTsdUo4kvUP56ZNs1rFH`Wx22Im#&7sPvD z^B-qKgaw2fGU4W~6>d!~X=VAKzK`ihR0~gHH28Snn-2`i24Gb&!f@`=VtBdl!MzlN=-BknmTP?Sz>bc|kH^ zA@GqN{FlfwQ?o@07X}EbmcyiDGFcq@B*R#2x3sRh>)0qAC-&Eyo&sRb4^cbWa~bnsoN zo|^qST4D4_^s(f4BrNP|Za&a_$1TdlROcGp(9i&!edr|?Rg>(ti61;0MwC zk?!OZwLH7IIzpsgzh5E2YpnZXtpmwi8=J#9z}*Jt!wt1J`kXI#ZCD0sjOG`aS)zX? zU*6e4cM9`<5nGg#GrITcNu}oGbV#adNMmcM@d<d$?vr@n9k*fD4z2^_zM(A%R!8!)aMVWuIAC5 z1YN~cVIQK#C+Lwd1thbNAyV}gHZ~FS)Z^4|01p?5uE}MsXR(!Umf582thUYpXO~Lk z^#{<66@}XjfZfL3+^{t)V9hr9rJ%K#z z`*gbJ6LVvuXSjxBY;5eymk-7?uKD<&uhVdYkP%o}Vlp$SpA~|*U+;M!t=3CLm!J~d zX<|3~BfNheS@~~c9HC#C8wZxF4s;jN=@z*YorwMuW3=baxqHx-$$ZykqeNhoY zXT*{2RlI{GcFsh%o2D{ zA(;22^1TKyiot zy*aP1R-6Zi9gTQ(eE;RwNeF@z?=TF z#Mr3`nq!Z|OsU_)!&x4=;_sG9OG*gdVtkSgGeQd75cOcu)*->{|Lxc`jI++qiqM)O z=(b7D^j)qEy44$koni=Rv%uQ}e9ObuV5YEcy27%g;YJ`ZiPIFiyzE__^*~(q{z>gE zA?UTO+ru_?n5d|@wjL(g1qHIw79@nfN}=u8RgzWO5a#Cg2=^OO9Gu17U2+l6m1B{=gHKSul#QuoSPYKKuBI7yuzMbd1GdLxS^EW`2Ia?G0BM=4KhGt zmD0YpndqCAeV0$Emh{fw;C}Y3FS#APX_XRCFt+e8sR3NlTd0;_pd&pQhpw*SezEZz zP=~R}@)TH~2XMN4;epv9euN)6_4MdMQY;3>9zWT-izN_O$7zj70Bq_( z(Fw?8(HAou9H*$}w^sI5m3`*sygz&DwzqT+qYeNeIbU)7?oeWmLa>44wUj;}5`2z} zb2;4}2aOo(C$6wSfTkG5y11lgrA0^9&t1-$ySw-2PP{$YwFbx=v0-N6OXSB?&t#Eg zK7)?2SpDqeq}xhYG)M`WpFAK1eL%6+>Z&Rb8UP|;ytDQaFNN2il^teiIiTHo_HlYz z_gM*q1fqe4*1SJ`+8rZG+Lz&2oNxKHzzTKoq@UNuswyWtTe(@dQ2`KBo;lKq@^IPNxXbIiG??0lY1;wdK z^TjFXLxG&la!oe?&rYzaBk$|iuRnfh8~Brm35&%>^|6p7`$1m7&I3eyfA#13+fhD~ z)sw|0!Q7cJv{h{zBp31Ju{ZvXx*S>-?038xo}5hbc>=(dmDPJ=>X&}kXU?#m$}oKW zk;uD_>1joAw6|%lO=L-6 zAVHB6lz$IeOy0fuGFMkuvlNMVczCb$^=AqfnHqit_0^PWKbPwr2N?Z5=H*6RcS~Wx zXJ&3b1*TqCH!nXQJs{nF#siD8K~h?Ju)|Cv22IT9DMn?f6@|#v<|AZeWWbH8Ob2Bi zpyTM*IHYA|(Htx>wIfBI@_X#s1xga_tS)zk`e9#BNe|~plqyNLe@;tdjs0ea3JS*H z&IHXH!ygsbL)a`&o-D;Y>~d)u&EfZm(l4&<8?r3Kh}}CQw+2BMLJL*_f^(waaIz%ys)ZBNeP0AZ>D*5+sO>)hlS6W9$o)-Nh8PCEiTsIubaRX6+h2y#XY`&C6 zrbf+_d|}O5w=+k%yuJO>9(f82CptGb>qFYFzCLv|y`$5KRL~fboB1Bb_g1I01!s}x zJ<0>j5#NhL8sSSUfV_pQHmk+CK(<#`pMkbzy)YdMi(LnbYoIGcZax6^ZZv;zWcOi? z_2@(m$zTVUii!%@Bji`Cy*ulF6^?NDFjGhKH{X*dh3uA(4gowO z6cp6m9b@>^crmqu%QFUj*!sLW`2d4XXX-wWq3SB zhWQi@W7NRjj&QmCg@d9FwpTx(B>{yPWO<P_fkQh<8T6(V^um`$O*<7+@+1_V z+L@O1#l^>WYE`q<`U{&tH>b+W$=%%%T4MqM0|CxK1 z#q?uYl(jJ`=*qgL0Fs`X7B2;N1ebZlHgGXMo_PBXq)}fro+E>oAicDh<+BeI2uwUa zZeuH23Yt&_F3L6;84>a4&s~TWe3(vEPp}>KgdtD?!U6!KF!R?&+f|>>)=mq%1Ue^e zEiIk?Voc2lSpV_`%79|*bco8&?-{5|w38hjjfra)AjO0j*(nhM95I*BGUDf-p!Wti zSpWq>RRs3*`NIbsQc_)Q?YCOE#KfRTKszL-b^$TGv9w}*oSTT~J(XWaGH;B;N$(^0 zG!Dbk*>M5T>w3z;vB)iDfu<>2I{6M8`SBBHh(GMg7Sk==aX#wfCw+-Mq0{`YUyTn8 zUS(^1`}!5nB88tt474TzeKLSb_5)fH`y}ARfGqzeuxkhGy_}`W)xD?hB=N=&Pk6d& zYd`dO$-q$b4tt{kc9+d-Z_4m6oRF_SP1sPAMp`tmF3#QfXZc3CoRN!)ipA*g8N!F#7M9m zu@$UMG>It$hx2vmTI(Bsc(ga6sxMwpWa3fV9GN_7<%$5=?}zvq6lGAtIlH<(`b-Nf ze#X{ge-_mVZ;>)^x8UZh7M-}m(I+zz0e$BD0Wp>CqR?&D62wWPfsqJ0#D%n1uX<)vuFSy3v*5t;W02nnAs?PowhDlCNP?hnPjCRsr4D5LFy-5QlGch|Uy*^R! zYntFa3IOne4X-pbG!)gLbT}PoxOVKXK~mD-C17`U;Nn6V8mhVKhZe+>#4rhvjG>_+ zuuBx=XCPO|zC9I;G`rbeTkC`+sFerz4-O_KPCR&xs?6(<%9AW6n+XtO9#&S&+dkky z4rXgBs;hsGjX?psMqFGRa5fvv;t1^kUGK{abe-Z}JCDQJT5mH?9(a4%SEZW?+|RA|eG-j7sU+dU`c#axbf@s=P+TT7m61_+H`##D>Xy2KcAuUPM8tgj_z}>&OwAWNt5 z8a{|}h-t3D3tz9kxw#pn9%lN>&6h2~X#(zOQo4qQF~Rm$R$_?{HGIJi<=gA?6Rg+^ zH4}JoTG?6peMnssXt&mYz29zbLayr=cfrygpmFDOw!1{d1bX2Q807JbKL8TZ0|r2$ z2qpk)N5}W03$ALcQ;Y%11>##)ULNSNZczCz@(Fm4{r)`?|0HOL1Q6XOYfn5$NE`#v z`1zGUBIf;+9tY?cGX~)NL2x1VESgF6^@765cqnPW*@i-)GF1M2#4S$8t089RkAo4x z590ta2N1zYd5UOp5JBIZS{C;&A^!QXRlXH0%|kM>jkUFgBh9&Fhh6U%7a2prj-%3pOkRzOk@ZEurKc9 z=!g?D@ntS8pF+EoPgPmjCI8^9kC&Hm9iO0JP7pXkVP-~8dKmyZfD%12qS`)V^#di_ z6g3=1f`yIk>EYqO2*cXn+~niuUkV(R19TELwgha(k00*8IG7%W#@2vT2c(Y$_u3EQ z6x2~oE&gyYI`$IK#W$_Ys*c`D0H7F5qogEayC^#`5m8%vdl?|x^~y$p{2o~3#>R$n z3nAb>H8j{^WCIurv^0uS($g)t0Q(8Fa1V(M@CUdB1YW9x92Q7*Ql6jxP8d|MqQb(2 zKc7DYok8jkoJ)~XU}~&*WdRhXV!yCaS6A0FFd$9+2+z>a(73e)D2dslu@2EOz{?fg zW*S^*-NLo@^z;N3nRjCou+{SNI0Rl^-e)D+-vPv(&`HeA%}q#1XhO^@mOm<9+1Ln( zxaUwP0nnc-(C9)!M0~hnLc7nfih_WsA}`;!x=M#^Q>7~#MvPV zRnrHqGzd$o`!A>^0Gs0A-~b&ReM)r=uG6eiI>#FD5JeInm!7C^iHf%H!@L6&ds|zB z09|xLX}5o1U_|eUNP{?wAZ7P8i{e3KWF&t5g{2Vz{&NtN=exgmcXz*ghx~ltu=U5| z(xu=&>FELHvqa(BVFBo+Lo9Dyxp{c5 z8gjC-f)d^aA!46AWL4rLW}#yt8Xm$Va`*5UF}`F*y|;;oNECGk+N)dtS~?z{1bEp# zX2X6;GP2&D9=y&|Kc~!cw9*K0ZF6wP{6bUb^u%Sqx-^m$`>Zamje$W<*M|vYY$+MD;)5egt-8 z-94^-vzt8we#Kf=n+Cta#x_M?3yOA^6rz-&A=OLG*Uk_S=*DR6vc6R$Z=&b<(d`Wl zeQ;t^=kI+V73I_-m#29gwl8soVrGmGF2=~n=uK|ER3QbH2ZO0ne?;e}U(#D%74f+) z9JJT6vM3e2qqzD}wLhn(k}J*qai02(-G=|Cx{b6nVKix1bnMb%BSTXkBzS=26VyKZPTd zq@<8Oz4FoliC;3|4&U?luUmA+1PJhnjz)}dTYnt@TMbdyZFpn5_{X?5R#zW8w_PHf=?duxjz|QVu>|kc>%I4%{X6$Cn=ICO{{)8IbtLEn7 zZtA9N>~ep|&7U)C{&{j3&;KkL7bh1R-+xHQ!@Zj zql2T9o3*{QxAFb@{2AhZ74^^7kbjEG_b;NVTf5nrKYl{ZF7EDT<>+$%Nx}t;8$3%A zoJjOoQUJ^jKgUx}P7Z!9a2^{g2R9=J2P3#l!O`r0xkA~+(ahb{+~x5d459$BhL}QJ zAdV1M@W%q;2Kf%5hA4y2o)B~J^ZQTM5Qo2hZUMe?0spTAuC(}br#bkJ3&I28{#!sG z>;Jcpz280a_Z0E`tB(Fre9MQUyi4sbvd&?xF>d>sGdDUsaV|7k}~zy5+F z9k0uoYoO5D6UVny+3wU_tQgs~!LgPmiS}sZ+&65N z^Rp|bFNvJ1QvEQyRBChntY~LLOX~Lr{9kT)o(z(sdZzjuewKFS3|L*UynfG`Rv3ei zeDfJ4UU!haYJoTL{WV>`Gf>4``6zw;nUEdo{#5y;%OD&@3SqpZfK^Xf=(PInCq_bDF z+~paT({37ed|pAjav_D;&TmY4SmQ~`eLK)PWcp&dMSlx4ZC3p35+qufHfuO=%HN0# zd#AI9*hC&fyV-hFfaBC%{^UX&W%-f1>7}9)OTwC9_<|ip^RpmMo&+kP=8u`WO2^pbOjMzaeT=KzIT2h!-T?dY3f$axeB^&vh#hmmh@`shZD>ml@gRS0Zz zit(cDNUB37K?2=oX>kYDI8YX?n5wflzyy zT`deke7;HvzH?v@V`Et5h~hL0KD&1s3n7ALpRZbr>5(6@{5 z5WEU*AtJF-Cz1K*vtI)T<0=zaTwOre7Y<+-@!j5GnM4#o5XdWgLLG?^&agYu8X_$2 z+;bF=*2Sm4D+=opdpaN}fEVdz&)+15>HAhD3IMEke%d6Lz#kE4gE?h-119B@GB7YG zg3~{q28Z1XO$_ilKvEoY6o3?Q6)aUSw-pBE@7q+f8M7e`2L@8LKc0^7G@w$0|r_e^=q?O6xU#b+igY^FP>dwFHXPuYc)JZWa&L&ET}qR9Ik)T)?d;H zcd>i-7yu6MiSmO-IcMbub4BD-qv;*a5nc-kI?%GDek;EtUSDucYxty6XbysGp&*pBi3ZvMKHMuPNk zt>D2|YsYx;E94|XmeZefG+@<;hKx&-b2ZE;n%?;$B1{30>_9aklzQ0O^`G0 zB91`G?jqhq&nECF;((mE;e!O*c|ZV?!t~+8AeArS*NgD2;Oep%k>aNAphqy#BFde4~C3Yf;1r~Y?%j4On)3O==X8esVT zSEF-$0N0Rqk({Wx)g1=3XHC!Nn|}$V!DE= zhe)s_6&HvYuA2)KE`; zz|126tP&gx!TG z9aOVcNNmo0fq1z`0k#w7GnzEzU2}3I zi%1PR#3IapBpH80^dvME7(~nK9(|%G1Q#1r9lj4=%?I!_=;aom?-woACgR~vG188` z|Al%#@{F(HsVBD%J`X<;m&UxCu*hMs>qR{TzwC_F2nCkdFtPSVDw`(CJ4PV;k z*u*uMYNj_j0hn{F9S!kE8a|bLARnh6r2*$q&u&FN=@8{LJ3HW?KCLvFp=K?THk>ZoMpEVXkVX+{9mW&*#Iae zzT1#UHe>JSH~-9OvcYg|Om**w%k8?PJmH)Rozea`G$li0npi4A0%}hY8v)n*+g<<6 z>$@GmwZ98y+1v$CwQM!>x2Pj3(@0UhpEyUBdEls>zz|VssA{Ni4c3bgY)XPf0h0EW zK=J`bM59e0Vr-^@d}`f<`eQ(R2#z-WPWC|*CeOY2_r!S#kfZ*=Xbh&>-$3pXBcc3cmiXOSg4w%eQ6D{_AB zeqFxr`?Wp1+}=)Y-Jb6wDa9XiJYV0@WAMIrLpwh@cq8r%F))n}DQ00COGoe* zIq-7bUz7gfsDGV9MqlQ=Fuh-uEO07JaKhL-*S~0Dny2QDlaP+cw*YV#kTkO|SQ|a}tu0->`*256ooN*ow0C z1FxEM*+)pSb+@b6z#R__dHP_ig~RLrx(O2#t+jFBR&w}#xXup_-@iCF&Wu~=(a!OE zl{RKjM;YFg7)=#S5k-kOot+GqM{>UB3OfjO%hJ$qTEdoQ3h==0jx;6x-YVY05_AS} zipU<9vVB4aMo=sb>lKxR#eF;UbgHCq)EVJnY^T=;%u5NQ{6xKAWm+;XSr34OnFmo> zd-cWO)+s&GLQ$Ot7&7#)+tQ0HV2)$Cv_5M&{p^@T(X_&I3rpN)jzqNQl-}n%x3`!$ zaM(I%^{%Hnp!Q80zEEQ{5ou?&GaLvWPxKiZPlkJB%yChcl;bb*l5g7kMH(oCgZ`AX z@>@@hsK&b+#I_%9%VFG`a6R1VEe&ZWd!wpk1T~IFdQ7%fFer zxGEqlj+IHvoL^b?1ZYCDa_W?o+1O4_onO6oGLa8-WN$88!?Byb*lIzJ1JsbYH+OV) zttYQl!_?d2nQw9g48DjD_cVXfUSFnAH?65#AH=Ox>whZ6%YiBKcZ3F1-x$5VT&YAI z)!q_)T7dDf1a@Kcz0ysKf#;_fY06l>oSSy}aVIB+K~Lis2*~rb*UG=Y9HQk}GjH#iQBY!;r0%nsroIecN~ODi_4T z@LR6?ys-ULWe=O$V4ZNcE4wc2YzRPp z_!}ij1Wg(rk}xU+Ss~v~l~@ZkG0KN6do_q=ngV`PMI`TM7w2m8@%Daf=n`GVnT!-M zY<#G=(5WRsfZ4v7_^U%UJC(3nTiVsDJ#eNk=_R8sx>{&PCZ!M!)DN!-W^CFtq0v`N z{N}~t1|FU~4G{?M`=U1d;p-ZHX_R1?XZxSlJzci3c6D?Wgv}*hb;+vHYl_yUBwpyk z_tunD`8Ax}5m$e|7Sj}`<%2R|-`7ADHf$Bv%FsDW9=dr4vex5Zvs$p@pR186FT9JP z+bSPi$>+;A98Fj$Mlqeb8O=7bI4dls)?DqTpB)5Gt`IWTYiYCi^NGziVAU(G$G)8A z+O)YQTRf~gkW0qn%dRTUh&NUQtA*!359xWUjcwfks@h*v9Ym`w=DlSeTKO_;UKTHN zvlf+q;!Cs#J`+Xf^3x9d+$X8j5v+)&1nNK(VWJE*8Zb_NK7vfuQKVolXJ$$SV`a8u{R_$>A)EX`zOm^Ft=BpCRee~*11j2y9A``Cp1RrAAkhxw<;D-)k^$dh0EVNghd zxr|Oyq$t+(!CWqWN9yx7QfWzs=*$ZI$9)OrH#mz~K9p0@Z{hrDY|7T})q=(~0??#X zX@SSb(b?t8v|I_S4W(f1Nm~q*v&=#qy^%Sc|7yl#J?>$k;-#n>8~t!ZpP@vK&g}*U zodP|B9lazc>D@SPPCwvo)>$9Du|g$51q@SQk!6kQckZ~c6so3ksD>uyrv!!0zZeFljR-+bM4-u^lK zk-4%(ygyPPr96mCKkTV)v4cYw8xiZ#b5gM{RW{L=SK>`y)~@VaFc~8=8A+2WQl!$F z(eCIwS@insHVu*$xegRThkfJgkZ>f+B(1IS*tX^Ey z`KJZ?6A{BoQE^qlX8fm&-Dqvkb}_fAQ*W~y=<)09+im|Gj}x7TLtez=J5T^#bhPI=)vbaa4U}d!~ch3Dv2|>fNZ8q*vwHp7JFe`oR1m4|(i{?T(zE*}Vqx zO}v3cxv2~-rGtUHAL-75TD)qK4?&0Ky*XiK zY|84hVshw+ZKm_h)b8 zwZayDjnZ$t=#-_-R-C`UZ|IGFR-W%zKgeNdgd<#D|zw9exxN`ss2diAwK6>{v& z_HT)+2)Rq_tEW%12d|F~2zW%5L+#)yY1tL4$HG05U)k^E#_Qk~PJ9B&ng{7{h&$ye z1hhZe5%5GpJ{aH9N_XCtSWu*LtR(pDIxtL5i_z^ZF%lZI;mVJvMura(c~zX8bAz{K zMrNE?_Z*V~zPCMF?XsA0r7IfR5k=>gN`B+5eORpHR524XIeTvnl*}E22jY6g_uGo# z>$sKpu|iF3i!c68%rTcz_KI|&`rOKG)lGmceSLJiYVRXvkRJDtI$C+@JVO2pbWo(XVi(!)N zl@o=8&%V(W`&fpU#doQgCA&|l5{--eN7RVPUWS73Ijv694vC#q(HpG7qV2nU$!U~Q z$3g?er*zKEHHeafuj+qk*hEd0Y>P^)VL3T>$DrU zL7Uyn+dQeMr62L`9GYgpEM9ZOE3sgfZnq4^LX=8Ww?}0k#84GG#)NhMd2D!Xp%vwvUC;&` zhl{bYRHUc|sVtlLm@_-*I3&M3AWxa?yPo_N|Mz#}iabBWc+r+Q-m^pFYIWQU0pUNg zny_D)Fm@Drh@BRn`3jBIh$sCs zcInn7;go{!;xikAhLiHDSQqHpXhI)t@)0wU$0Jv@6CP7p)7c=CZGS5 z$85rujO=fbgCKc|O6? z$Bq(r5+G{g=AW2vS=mbwXuRa2&C}>=#Hy_Su5EVF7%pSJ)6`xrls`iH^Zja?;C&mB zJ-aBKAg4WtjFc{M-$&w^pu{Y}M-hBH6JIzlo&R5lEA!vQsY25aO+I(y_$Ak8`Ibb@ zE33EOc+!~lXi`P?WVH2_`!Q0_`zA=8v9U5KnHb#Hj*W?dV|W6uKwcMqT-zG@k{yPB za@cbVJrAas<|#0W@`aB=rJsCQuvd?a<|ncAayd9z__!MjGchC7?7CGCON;r5a{L;` zojPl!EiwlDKV7f|_wkhYg4d_7rF6UFCq<_gd59cMKu65dXsj%zIr2zi=0Bq9@;|Mb z--11!!97WG%yUW~2x~{eGLMh5dHOSM=tWQe^=i<&<>v($?c(9Qhi?(6Q}cSdnpH!4clT!Hi~|JF^mA!V zVx8pI_}6>r-R@v2J{=posgq0BPM41GUB$z1+V|1%EuNWd=GF8H`BOAg`qYoSKE;kC ztO>O&26p#X@S9w{}GKI<6MeB#(fK zR3sEH;C7W%lqD8kaV)MvTq$-BAcu^J7`clv1ejW z&l7dX*j{xp2d>}cp^`jbV9Qx9J$X)GeQE9fOVJMMdh}rKb$<5e%9ZDjFPGuWSXlPy z*_KiY44$Q=9*-~-U;DyZ5W`>!%KpS)?v^ji`SHbIg7@?&lGN34MKJ{3T1UQpNGn$B z5`IN~g*v>Zzn6Z~$sw>3-edbMB6y%`#b|Fa*<<~Dyuy`{ECp_?Qu(7XiQ>XX|0BPu zA`{BgO$AKR4C*+2RY?5;MI&~LV3+V+Mh^VP(wYH}1yf3w+Qn0!_VxB|$*pC69Q@I2 z&yjTxXC)`)vZE5;iJ=7iQRmd`bz5le_x+N4{)*g5i?)AG6`V`-#F&`aDP+!e5GLkZ zav{x-bB7?-;?>^C1$Fi36v?6a@dA0rmC4tUGkH#GuHVR-tycr5-i_V4f%bwyp)6mF zz5?LT)P_tTLQ~v~Y4@&%+>w|l25Zv-`Nml5{q-y9+BjxT-qtDXxlS_ua*9>V=1K1jvAN#k}ioaVW1;y_!edDKbXabTGizv&PoZRo86b`>@%YE{cgb1GIMq*#Av z+7qC1qN@uBW`>nzu5SODf&6UcM-G`OS@4-D7iE=dQd2MNyHQh7-*r_{TfKEwXfN#U zv?IAx&6>~ETC*e}z;={Nj$%18X2+$&cSus0%+{K7d>X^9TJ?nq*QojfhUvD8x{=S_ z@*6;j2G$GG1BYsYbE*Dg>UJPG6&Q)n9r(0-oZppzLRf67ECdlmN}B&UGzoxQ27~?O zR2dh*VpS*DD5+sF#cSSTBP^x{3`-X>dbgGps$k931_(Cws>e77S1Mn1L`p*PtnQnU zvJ4|tD`Eh{YEg#x#d8G*5C&H(1OK(SE!4CxA+-2|Y5~cmX#k;75a40;SGRbmljKx= z2#kUorW7^XAsOC^VugP}HJX6Gz6?u4sl_@DAHk*+x=J~uFGS62!Cs!UyqJ_9XhvQW ziv-JNZ9*m5f)N^KQ(Z8SfdqQWTwPdL=M=2GoD^GLMaI`0HXrZ-7KO%&52j1VHL2K! z5_Yc1y*N)9yR@O|jY0a~_zJ8|11Rn%c+hYzE$v2K3cKK=C8|u=j{65eSUQ(5{ZyK~ ze0t!nv;~Mf6;|@YNT#GmH%Q?e&(Jum?mI!`nKr3dn#Xv;Do#GZfY%G?Oc9G z9SVTGCqA=I^f7N%g|;cE@R zQJ}O8HXwHrZzulbF2K>)?zyEKp;$yw3QMAKcqo5N>OeIx#_*EM<8TK0qF_ylL8dmv zf$+MkMjH|4UzV&D`rL&emgA;k|GDk5}iPsT-Fy6!LSs3;HGz&Iw5HSwF^k~ z`5Z8pS-4dT?Mr?LVVUb^9u5-?3=CNM8`{7Yhjgl#k{&;XXx;<37WSn9<^H{8Vr(yY515j%_% zgvAci=7QA8zSpTRh)MCleciNi^2<%e_7&l%5#q6Ef6TCb&9w6QnXq0NQDD`CB6aqT z6H>#IO@T;PTMQ!4Lo$9*C$p{6bSWto`-|rCQ}^DHzD+S@XD!#zLj%$N0GuQTR>xul zq2<9NnL?A^1Bqr6hZZYq591Rw0FHKr=`a9~Yz)o44*_F}!6Fz@WzsWF z?EQoREU^5sjZr?O6M$xRf0FitsiWCmsw|ZjLA!|PRBM9KJT<(+Iv#z3kq7)2LB&1mmiCpKDmvLUoKXF4;IS~CM!g2KdSAngR&aX@e!uejXD2}89h zflI<81QZBWBZHqQQZDq_{CMFVPH&q5!XW??hzR3?zf(ccT2LGRf--nXLU^{cHIA0F z=Wcg+MqF!+L`h+lRPzwZx0(QEHpnzduLa0qZoeH9 zl1i(Qf1(I)#bZ>ZZ8^wHsaldD&pz3Rq1L`Q0uhKdMsT~;R0Lqw+5qOP3x-)BE;Jd} z)E{8Tdi{Nf_|2(FPjsLAm9saD%dM~&QSD;5!}@Uk$bAH&SC zSThw>5fT-J;A8Lr26Z}P*@D|Y0La)Pvgmdbz)3I<^h$!uT&-D%t9?Xcsgq!sori-W zr1QeViaGhB#U(TG!oNbw9Au=1Ao`-+`V=-uhgb>#6RBVIRIM4WOJPz$Y_X%ZP^Bg_ z#O03B)Z4ZN^flG&p5r*_>(0<(n|Pm#Q1m9IFiQYZr#m?IXt}g~UUF>SP`j|~@M!1=?@Uv24{9)RpQmy@Y+cD{8M~HA z#k$P@u^uRaRYKcqfXr$Cxje~0`%`Avy|zhbTRznO{=M~W+fZGdJ-Pe(RWNMr z8Rx1^MK+_WM3C$YclI5m?w51De%vv>`V{88LbkCKA(ZEfcXT}Oe6G-@k!H4yIyz-q z6``(Y%2m^Z2WxvxOQ^4RMx-d*RrOzHS?<0v?Ibf$|Au@7uHFYNf9SJb0^_wPpr!U zN?1K|vMpaQPBd}0wkkB|i+EHv?>yPc^WeJY{Fg^8svs-Qj18-HR+;V1Z_Bm8r^N?e zC!IxtkqmsfwSIDO!4U!{hbXL9!}D}Lm)Fn5p+SSMav42x%P>yl!$PDcTSRE=vodq` zV(mU}2HP69O8#80uCB-bLb<*-d(-qG^+STn5Ou9J9Z*u_0&UA|f07 z%Tl?Pj@!o8);`}jMVei0D;F7YJgA1MQHcrRJpnf2%b3(t*Tw(mKY36=)2Ei!6e$5Y z+WBoldAd0RSa}h4!~{i{)|t3~o!cZMdgUmTanbWf@~l;DT5)iR4VUeZqk7!Gf@!xL zrnUUOl}gH9#niz?^UgzCrKt5iOb2x+0WNSTu{*;DowA5H7Yp13byqrivK%B+WgEE! zq+Ay41i)!-t>BvY_jM=IRq^5Xa%Lfx&K04`9U5y;sB41qEqpx%3p})LC$2BmwpX)l z_(ro5xNyC)AGj_b2Kgx&>d9<(7Z1)I3U)Jw>;ov)uzvuTE}w<#NZb2cX2E$61>@yu zmK2C_N+FiJaCt`A$=;u)zI2fZIAeR)Pkk6JB%AZ)SV~*+#opT}QJ@O2vf%G&mJAp$ zHi@+^pSiD=D-ed!YGv0sMxZquDF%!=6%wH&WaBbGTLe#l%G0z?p7e8PI&@nP*vDZ=lJ>^-w zm~UF*&-qz-^UL{p;u$Bv9gG~C($kq-z9Fh}hM*yH1+9#ZN^>UQjHn}|N^@rTrQ%e$ zGLNqb%HX%`fS)qAk9e4$cN>+aQpuHX<(rKPM+c)0DwS&lSI(I<#cWzLGkcXMFbpS* z{i)8ulw|L&4bh47h{dV%9-xsUKX_Ht(Uk{BoHjtG8FBc*I#yXVfM*HyS<@+3WHNh? z^*Rt>h(HBPqRAst4GkA2Dchs)*sp>j2He!unTFyq5AW^>au0GnZ!P$AKh9q*RL1-G z%uFba#Vmd;zZo&E9&2i5^w&F6W*=)qre0o{u!Oks!)GHW^gpA*s^DGugFPCRev;#x zp(s@vOy7b{)ebXscAF1Ef5H>N5s+=8f!4Z7W<@LfN`-iTC- zt+7rc%Y0x-mHpPHms=)=-r4TW7sB_BdyuL+M>s^*QDe&J=M4G>xDxo^w{t3chs|qM!Cm>ZvRjXH0udREoNwQz_ZI3Fp*x-zI81 z4768VCA<$F4?mko`t~)Jg{(*U2pYOXxB~Z*NS)B9#cajM@t>qCjiTWtf{^IeqA0~pu8fwh{6A7an zC8Z9Et_(OSCRq%+CIrEUcuL$Y!XA^1Y?hM!iWt+lxjDF2a#xkQ8aXpPvpwE0Gri4b zMo&D&GFmn_&_Q+?iIEgbQ;y;i6g{0h(I;Ee@s$Cm@vNnBSNpzE-0m4z>GoEu)p=c= z4YwRO+(Nni;JcK#PP|ypF)M|&6;UQgPe0hkW2XnFDRHndI4R^Za(Gc)+zRsr=t#4? z^kv$z3XJcX#8Klp)Hpj)0jhe>h(_zG;**=hQZGA2gzuNWut^3xF*K+P!o>s|F2;Zg zSu|Ihnb&|hUqVl%kfX+ksKx{vJtSf_+Ep-UjAb5S0jtrHK;9>Xyi2;7!I;-?BNVa` z4AnBG)+(dbUJf5R%`LQ4e(}7yul2kHrvBu4ZS%dE@nPEK{kt-^k62T;i#57?AQ2rw zcl@~%A96=7LMdDbc1MZmBGbEMwV_o1$c3xhM}Ml10EGcK{9N-5Ydl59ihhoswZtyq+M_!9=xVdRi>)yA;~HpDY2f?t8)1AscaI zdhBe)97sL~FG;?fqs+LZQyc;()Y-#{1kT-;7I+ic7M#Zu8N->Dt+@GE`I$Q$?hR}G z+n$uLLXu&A%(l*N_4SOT!>0GcY?Ory(X`6`GT(F+bwy?Y)8Fr%9XYh%jKxkldFmO6 zM%${PxNnS%$Mo#!2FQHGq8TGJkUA@t0;B6az9f^&XWkS_6g3vWLAH2Rb+g=FPJ zw)7s3hA1bo8Qno-wIzx;x(UZ}qk8wJqmn<1gvO zAldmAuM#pes%AF5x~3u24S1K^?TK*Z_vfMuC-8}V$hcC`GJFrQSf|r;y>G}+IH$yJc;XYlNHg+94QUA_1+Kx~R?oO7{ z{D=pP2)TS(Tr30gq%9v-glz7vR@D~|s^46vKNvH-F#K9X#r=4u6S_IM$IQ0BW6LV4 z-73}`J#zoDK%xDUwjzQxS7L_gj-#(xFPNwZBCBTq_%eN}W12^&CfC4iqt%L&32{;( zEd5g$3xG{X(eC>gX&$b?t$;(x7J6YeLl}wzZ;-;(=}UJgM2HGCA~9g0I7fcz-Qn?r zY0DegTLghY)~^7Ec!)3h|JzM@q%iDe@;YrF4`*j`E-cB8SQQ`Cu#;6%#=uGzd*` zXeZ-#?SVMs)~i&0jO=rJaL3EngyWlXfJZqe^*$^5%WSs?($QjoXWibi26lOg?T)!q zk3I@{`yKrzdr+zT+vNSrth5x=BiMf2r6H&NyyIHtNkvcMdT6a!!$J&RVPk;F(G|^TpN9q^hiFc^qM1EZQD90mjJpLeG?HPb1FFaB2Oc>(MTj7|8^^R4QW z*k;mkxSm7#(tAM7Q^drr??g++)}{*l#n24A&e2eRjvlaCTLv;2iCo5P>Qf@uay_(N z9x`SreK88V<9^@3Kgd5pXqb&OClpvwd@|jHtgvJ0utRQEqVlte!(o0{mBxth#}P2; zs0$<<-?rRZe;URKgc!R3z=(ptkoTcM)vbi+T-2$5!i(--$V0X8F*zv7oWot2fGLIs zm%IGHEivurid}EV{iEb$CHN}c`;W}9ufsM5gucT_B6s30Vcoh{uh6Y3`99c1=R?i* zCbbhSz(C2=TZq#n?@TU!_1&YTV~wu5K$1yZQTk4Rw8TI{eN}^dAEbFvz?XT0*0$U? z;^kw=3QgJ6tNhSZt!uwS62qx~Y^#F&l}NYedobW@g4z#xXDvP;#n3WHTy7wxR3n*wUBl zgC`(z@Xm+NjOEJJUzj`dXY!c(2dg=^rHZg7dK(yWIsW+TZ(A_7n~p2%SP|=1q6a6>3|}!ceaZhP*op2!gV|29^u^ht8Aw@%vyNAH+Qsu+7k8zr=}CWqGRwMh#B| z%rxqP4?MLpm`6le)`P%(CBH-;?hHPg)X#Ki1$jqxMRmu~@zl~nMaegN>hb4Zab(LG z5IANd)iHrcJD_f!VRF44=hW7pr<4vj#@lb-w$tM!U*Hyq1Jm9sWgqsO^PZFJD}hU; zE$5B9d@j2%>yEDh*XXtx2*|!yuYd0hBswr7WBkz8hn=3Ejek~o8V!&aLuKT#*#vD? zMGkkK&iN1EYGHDyyxH~|U~~Ss^C5|A`~EpQpTF`U*fm<7D+C;e1;9aCvbhvyjJUli*-_-+cu;R$uCTh~P@BW77;G1528UEIx_fXIq zeeUdLxS9~Owg}HQwg!QWgeUy;(x(aHzp_}Rc%Zm3K7&IB{_aR!Z^9&?DMtIZB|ryk zI&=uOWs++yDUc_ErHgRA7!F;ki2j-YgkA4RCJ7m0N?;eLr=oa>0l7iQGBS!$ATKf) z&N;VJVD?8iCgN}ZC`37mj#{Mf?6`gtmP}mcxE&jZO1tEbYn^VTAyjX4c5pvaEB}yg zcf@{#oM5G?9sW@v^Df`esCc@v_sYk{+6E=(8BDue?Ml?Dl2STlRr>%%DYHG_(3p5W zi*AN5JygtyKvv%a<+f;47bWZSO2e%oa#r8Hp(J?y^zQ4TH&A-i;{2973Txl@bSbZjhaREcdycr@fOaUHI z>#we50eWD)p+{fTQ)7fvjBei@THBlcKBa#Fd5$h%Jj|G-7}K(azsO0>_ItTO&?kgC zrFO0MtJ5J|VbD*uC1Ue=`}~U^OQtSAwKt!kG^!F=eiNc2=w~%-8`YX_cKh6HK!Iy8 zfilvLg%~#x&|3qD=~2Sy$es?*X0qknUK+g6%eKt2J|Ipv8XZ%wPK;gpS8NB;BqMVy{7;(+@!NPb=9MZuO_+S!nV!p zxns?7<9k=i(J{OuKex z&aTJhsI3GTM$AeG1mYK~rr)Ay=IKuv%#B|`26m4rzjN!Sk;`qh>51e@hTpKJKF8@E zle)I_)mN5yO2FZsU2w4s)#5=WwniVEA|^AfT0PfT$r;^dI>e_b1|=Wt_2jJ7;*cM} zo6lDv;1`4=J_SYeL@v8$Ely9E7pV(xy}}JtOsd$joGKE;Vtk7CW`FFEA7tZUW<=>i zz-hRH?n?wNTschQg2EZZIDWRuUL*_gowGhypu2Y;t_UM`X|eDKqB1{EfM&_RNn#42 z#)edsCpDXnw}ZLny+T18dRT9ibFZY$JcHpjIBhs#Zw=c%)!?d_jX)Lx_ zHKd=yxdIe8I2HTaA+ov0d~{5GuB~-y-QdypL`<5*SZp6+G@}2cUEud> zwd&MuE4MU!q<%z9F2w;2-+Ohx*1d8pdI@RKE`t+I5I&Qby%mD;C%5~fV=_dKIz&Q1 zY*RUML(qYL{kx3z5V`C6*0qmM(VnFsbMtG6ZFB_8OI>cjZ-L(`z;a6Eqbz~Qqyjn- zPs8BHJAvM?bz|0gCWo)1x@-nWKL6vR`!^*h)Jaf3!in`5Yv}3nMQ5N#>%Es#cIY4; zirtrIjFTfZTeed2$zydIF3>xp+B0-HHV@E@3TB^`m ztee^+!{42Oo|q&j0)6o##u;;pgMigzk)pVEL2yRu4$B)ToG%Tp$& z6+fu-2{PTMy65)!u|boU`X60KeQoLM-F?$zJlVd%dl?(CxMT92z!|`GhK}HdGunZasp^f-@m|NTcOX_H(gmx`Y8Ftf*bUG77n=y&=!@}b zIQvzNiW=cdE&U+tz0FJ&W8vkwJ2_pmhFC+wk zouMTx)c?fU_-}v)Secob{y&i?Y;5fRSL8{Sj!Y~bdrUVy!%v=V8bE+fg18R=GqABM zTbPbM0!bUG)9IkBb*}@stJ(0{!1wdf0M(L}v2?^$WYu*`L_%rUA@N-i*Hx3vNAmNq z*3@fGvoaQziBv&Be2caQR+BC1K|#0O(W|-j;$u~}1EqOm)pgv8MFmxDN>x&X==S>T z%EB`B-d=ljC4RIG4u08UaPntr1XLXPJjOiMZ$j#;^C)_Eb%~WRVj)`;a+@WWT&oM<6)2j1nrvxOcGOb=Y=tt4W~(z4odGpbWLcLKJ}j%Ac`>>7veAq8vd`2o zN!iXM9a8dyWXWj@Y6(&^jkKt=$l=1G0!bfa71YLYRf;{Ov(7kQDtPqew24N^DXKr& z73vYnBDzMukLWVVV-mm8A!!i^Jgjak&`9o06)DsXAdyIsW#AnMqcH4CLvdte0m>Ef zxX_Jg5+!FZwx}Yxbvm0wgR&GX=N7N{q_hs|3hNW+y&lZ-wu$(J~}BIT}YUb=sdGD4r!r32u3#RG98ogoeZj^Tu>edewHvm{X<`F0D zz`94m46M0bVK}Ys#S}umfJKosb?^=Vo4IUjkeAy255B$vxN@CYGtA7)%*@Qp%*+#J znlLjnCllsOn3Qi<(@;x_Pj)iv}8-{Dy4{T1x$ZT5`_Nhh2z5b>tFl3Xt9ryh(6ZNDbB z)SoSY*Mg*~G-&B$8CH$C$}%8q9V0nT#v=zo!piWxqJ`(6-aN=zv6LciriSAZ3d=91OBwMqpAOjZGw$?4z3AZ*U*%JQFy+^a+eeR1Q|||qq#3qG^JRm z8RmYC$Yf;DH^%$O-pbu%zZmrl6@ZPHfR`$-mT#I3cR)&pitIKFjJSIE_Qbgu+7Xz( zy?Dh3|B+EV5(j|QXoKz4R6HXA=hLNmg8{+4jY(aBj*v2j3B@iZ99Z6yHTQ|IqCIC| zp`VHji=$#NAj{`S6a|BosAkU!@;3e^##Ga0x}^~;8t>qafOevQ7*Sw~kA_Tw&?5o{ zW>q_KfJ31JV*fhho%tdT_A0hyTOuz6UBz&yqq7j@dS*l+XA*=K%wmO81chv+@H2=@ z#$kZ%>a3130Hn8SoE`+UhMbAtS6$fvMNG%gCD>&p|B2J5x_WzAMg?GJneB|6yM zc81gV(Pk0>$*l1d0NJZeDGpg6LKZFr0*h%w59_-6H@%Diwu^Z|Xxb1;;ha7N*b3i;+&lid`UJnJs zjw6eW<%@g40!PKOq5{Xl=dK57WPt*rb|4RRj&mSS!BPnuI=N87Xxb9sy3sTQsNUXc(i#h4O&JNmnNc2E zfZhnUo|ovYI^rg)eotSK1!~V>i4a0Q^X(wGeob&;TSak79Zjm`h664qM(7ICmG7e@ zj@w=b^eHI|VFTp&L4NdTfG8M%2gijj%|r603&yG?G10=IuLT?TIa1#*>13|$MQ4t( z4+b5{7|w&yA$Wx;$M?a=MT{p1%b!xhh2$^eR*9Xl2a)9*u@QtJ32UnDx)3;2A&I5; z9lmoMXCzn#-J9e}%)(pIu*<>Tlal~e2rVFXig4qZ0L=nHW98HwUmPMmse>dsQipqL zgRBXOT3lxpU;$%40R9tVDJ-z+fb;l`%rNIyQV>(xc`A+4N=@qghSm8AVxCmapP*Oh z=!>mz5f!suPt`jI-Y%5RS_T*faWP(oShypiPz(my+DLLT7qle;yi$_(b8v`G!ymFq z{3bnU^9urExf6AAs{^PMYs2NKuY|T*m}48*V674xIMtS_V~$N{;D?+PYxE`~_Au@T zO3@yn^@GXx4&iRWFW84kkD!lh9As?1mv`*V{V69ZX)M0@crlP#ZyPW&W$9K+Od}O# zJ|o6HKWY0(T`-5>Tj5Ys#9(?t#M= zC@=6|#A>c4=Doyq9DNHN_`yu~<8BD!CC-swDq^n@^#cCU-p+BLC;PhEFmn~!YAHe1 z9`Mq4l!reT@%^K(dec>k+KXv|nQ{63g$D5v9n#Um;GaaiDPL2HMKcfR#z2{Pl8VcM z548Tt<+^x-b;J~7AtA*G1+haQZuk5`5rw8bAy))ZHcDmuYC=AM=#*0M%?wwAdfU%1eC5BGX(uhY5wYm&L z)4?c5GaNAwCAe{rr;PUzbVBpbC_<+kUKXjgVi@kcXTDXqLAmk9q8$2M^5{uoups!k>k}C&xJW-;IL6w`D0F z$j=Mr?{CB3^PFNQeqA?w@AUh0y8Zq&Jlx>b(C71Y14z7m_>ntBKZ~ZRq-q@KO<=}9 z$McXE?*9Dw$uADUOO_GC17*`Y`}?{9f%5`?U8T+^u1H9@@Zu_??50M&r-wZbOYQVd zf?@R>P@P#sw6rqZNO2raB%mPk2zOTOH=uJ-?B}gBRIR8(DBgCm7<%z>S*5haDEqBg z2^qaNV&xnxRQtkVZkrtXiKQ2lQeN%hiA6vmPOu~r0BT+p2>_VF)|i*q7wi4D5%u^w z)v*3au^P@NsDOY}wG0VWx1xL9J~(gYOQViFn$p_sD*`o_JpywEAh?pg01zCE4_)X~ z*;f){gr+Uf9^brc&&S*F-EUfz8kZ18ChV9eHOvf%SwugO#(t||Pk^zQU(_SZkUtjX zk|ce<8Dh{c!OBtuM*RK@M?=P&Wk?Ssl6@HFiHI^9`Lr(Zs>_v-s(+I?js=43 z>))-Oo04TF@fn&Nh4Zxlqe{6OKaT#>d*Fr5KoA2Nowu2p5Bh!4*ZJ2NoYuIUp*ZJA z+S$)fryqQq8^{Op^nJj~kv38PNl;5k#<-$OK$gI~Dln(t1eB2J>M!V>MUxz`$h(oa z7CKF-_3!a7q|w{ib&PMG*rM#=;1`X1KF-&{OsW29- zV+Zd}=a&(|T^a3B6hY71GhbhcT(w!OXEF9l&1YF*{m`RuXPC>{p_qcnXGO#&t!csJ z=4L5z`8<@z3MC3Oke&@^k5e)5LZd+yDwG3m;3%{8XvW$Q0zXu)YKh||MBuGoPhlJ|Pd9h*IGxO)C_A!lge>Cx|{t8y0C zn7gGb%QmnWXE6CaY~6G8mh~J%bFQOf8s>bU_`A=%0Hd6i$OHmF`Kd`=x$a%k)$xXF z!o$*|MPUvLj_;fh`>YWc`6&!=7eZeIi;Q~QBs*`#@|YK3m6OlzTE~fSHYo25`FP-a zlU{zDnq4688Na!7Y2H#eYx2?gI>W(li^w<@ceW}y5fkYxTW?S@D7eosdvMmtP{QwX z*U@ayMpvEn=Yy}nc-ppzvnke_9m#3#r6v({7YvC-m zA#24R1UME1}~vRMD6W@(e-C#Rl%;Rlqp}Bi{Tbs3_JpUOq|B>vvimL1TG>}XV6cU}Df_+r# z{q|FhNM0&ZbV^=V!n48qN1BQSGm_Yv@7=?{3nUH+qctOUsKv zvxU~EOQR;mVikiIr3!Fg=X;d+OKml(TdZ)Sv{#HDBht4uaW+G4d-jUnkudV|^ruwU z2CeopV>b0yaxB-GUa`r>lz16Ng5EoQ)~DyIvNdPn`F%#c4*CEai|@#$Sdk}^4lgmD zaFJ&GeoX;&z=;YVSmIVk#)>p5ONz-9qbtTxyH}I37wn)dD=)wIfpZ0(z)7O83GTAz zGDf~n#Iz9q?-IrMu~;D7<=fj^ZehVZW$W2@odd7$c_j>E_*fpRizmBg@us?k-_+A2 zdxlEtb2joAno)_@(k#L+yI|IzLUwlAS74L)L|3R*AJE*&C1*cJKl{HvCV@*FA^(R> z&%b}>3wz`X{a?=WA~7Abf8WJ4%e~r(rae0f?+bq19Ufm6{^Wl7>)ZYLdh&JJ`8)}y z3qab__nSVCv=rPi{?$xit+O_(kzzS$s;gW*^DI%BDk(nzVPV3)0KM}tFWv@`Ur>5?nDsV+CL zP8v1iiHJrKJz*;6hCVkquqlIGQEN=%H0vx+QfRb>h#o-Kwcpub20ZS-bO7eGa+u_9 zks56y-c7x(IYst*82-G}?k5aX2z>qe0*pRCzYTBO7O#EIUjq23 zVK9`MzxMth%%fv_w+IbCjY#g+&UXcJGhH?dT|#YlROUYEuD1)B>8LMBr$Fbl>9xLW zW=UmMEY7kqJzV^vF`%hj^ZI)^3|%+hMdZ$l%A6_-h`Z_6-`}m#-WI+!`uy{8ZM_vi zs~#DV66QwqMuOG_zq+_u9Q$PWj>)>^?TL z@uXa6Os|)v#!tt~GWtz>TPF9#x>8^tJN+v!M&Cq7c0;Bm~fVgtbKOEwU zyX%zG7uEl6$;>EQyiGYdEwOZ=CHa81g3)K_x|R9gVh;ZmqwR_P&$ETH*2)LoN7fE* z{p-`Cdbxx?c1!r7IT@q+lf23D9y&6#_Vr|CMZ0oT4WTw+QpXU9jw7jEk__$oryzfRF|Y3EJGl9 zfvHmmFCC4o@}yQWLR#tTvlv8Xu+E94atzDBtgHk$y81AS^H`>3Oldiyw5CnVqh#il zud!o#NNK^Szs)s{YpPc)m-Hd_IS#;}yCrYvVJ?D3MwZu^t>$vAUq|p6(A}2XYsaSX zXFs_v8}16{V!G5veLgbueR#N@Uf%~aiUq#jEPq!3MtC#c>6K;wE@$+WHuSbUKVQc) zH!@eOyIwaIzPDmy$cmTwba!}T>gVz@f4aS!9PjdS7G=^1)wMfqnr_zjj^m07{PD?e)5`(>dPlH?;cocQY+Z|D94&tq}{BV2~!kr5)uY`Ievz={7i283>R&N=~j|o^-zw9pHdmUdWv+*$d z*n&BI5->FG!(_6%HyS%6{R~IUtMNKv1Ryu&j?cMppY!1{c;%UgtnKdhRt55fqQ%1# zuP}w`pALY;l=G5sv&su0AAUxGarSoLPGO-%n||g#Z5x4M?a8c1rn4#2^_R+2d~X^M zAVbY3i4o9pcn(*`-`&B$a%tJs5yo;^-n|xcC&&7x6=Gw;rEKEEnN2X)ISXUI`!O1u zhwsPBjdZ-TG2MBuiEdX7{Ekvs%wL40Hy0i_a*ErSc6YJ4oG|LLuY+y?b3?3gEqHCr zTmm@HeUs9;>|-`M`lqK0&FPrYTzrF?ih6#*{>fS2SxnhE5KnM>bg;()ID8_Cueu`TCD;Q!qM#mUX` z|Fl4{v;AXs`X|&iJ3IIP&jQ8G#l!P|`aahg>N^p1HU%Ol{S~>2B{VVTbW&uC0>7f< z-AuJ;X?!6;z9hks{bt)-ikqLkxHZ=xtaw%|x%HExB+PEI@W z?2ojw@;6%w7mF!A8Z#($Dqh`uTKt;Y$uz|1*_;u|lZ=VjIQ^;3>{h+P}k1c&e9KA@HTD9Ou zn>c}7RwyK_I5poxrg_g-0~x)RD?p~^?c|Y>{6j5JF7)1!Y0*olU6lEqwOw@g$=uPr zj7!XMSRJ4Br65c?|MUwL`0d;I@(5>(EKCVV*1A(SE)$(lbi@}ugq|G=B8z$G)S=PJ-lljzbGFT$5e0m%Of_Xja zgbCqG(3LA1rY)MoXi?A4ppUv=D}E%l!g5%S~fqfZeZe;jP81+ zJ!!cDb2Sgd4Kc0Eio@>AoQ*>LrADhm4Y@}OUtyshX21ywYSd+jc+pbjAp@jZ z70$PM(Qg#F{ql5(7IqLI;OSW}!3^11fDQ``TBJ}gUH@h`dr?a6BRr$>IJ)2@Qp*V# zi^=Fw#c~;xv1_*0G8^_@)$%yX;IFdU=wQ-sI45MPgst=Q=TCzyAdrq8ja2<2;CMSp z#xMpJaR>}agQpc)nc(F3>@G6o>L%yswO+x*s@b_FvUY;~13dN{ zc`c*%{49oAT;BrfrM|aLW8hS_!heKV7CqN)28&W{GX+d59t}cSh0iUZ>fl+pc22mX zfO4-#qj zv@9Q+0volcT+I_;@Hq?I>IZ9TC*Z^Upj7<>Y9u2TT?mj$r*F}VOSlD*&A9xzxh6Ya zO!m0*Yjg9fVA$$`nbzVz_34&~u}`Qv+7MEMCHa0?93;F3w=v$S^vtEqr&bgFh8LK} z&5RYu-l~Pzp>}ek_St(*_!}XR_`aiebnZ7J_$$Gi3!Bc0**q@G&wNcN159yt$9cw0 z4@+w%RWHLuianLlvjEBa1ldO`JDtSpZ1x1hV@#4rr9h|IC_Ik4({)1KVkThXEVG$| zeWN0+Cfs4S7I|~QmxEme_XZYygvKO2y}QR{%4f0#6i@w+>_g<&F#lXS}f zA={hj&qtK~9Z0Dat~3`#$jHEamf15K*|vWx!`=i9I-wieiF5%XKU>Dg)sjSp;}LIA z%m*+1cdZ#5x9_+iW{34sQXyGC!Y790!4vt}3@B&q(6haB-Ud{Xn{~*@6`FmrYPBYt zh9rI#9G!wnFpcd%EYG4%p<7N>VByNEInw;5v|LDtqYNu}(|^c}2wr7fB4?l3C9UBY zo*_&L!EMf#8s6U$@E(G8J<#b90A*XdgNLBE!a22zj`hI^ld_svn?WkZwYuS{$Yo0; z@C)69j18kaLP}u2t*Q(N%*>Za&6d#-8)cB0M>6-!mF3aM3zyU}n}}+D&)%4y8@j)8 zjjV+WW3f63poqrBBKQuBss~jkLs0aySu1H#mGu%ly91EXY15gdt7vB#XL@W_O71?J zPr<+%RyA{x9H3j6Zer2Z(kN9iT>ybum1=dOWCGXn1?3oTuac}k-8dvj>BU1&Hjd({ zMGl5C_VB^_yjpmMH51bK-C_N|G8}ZNCm&vhL<4t{PtmIs?w+0qzCKB8k#-$}z zxBT>=n;6R2n-m-9r{`Sod#ItBUoli3qYaCVlR)swGITIja9O4!X&bNVhEt%HBN=}KV-R;&upUnI@Bcc~C>~Ueoirkj|kC8jtw{@6%A(qQ;#G+DdON!$8T+;?wj^Mm8=YugxS3OMP(wdTro# z{gr2CXl(`EdZeilN`Zlho)vh~YPtDUZzOh>k;PJzOn1fQ zK&*-T;&Pl=e;LLA9gP}Ywep%amog>KKyc3syz&hT>eF?+T^PeF2fDG2k-4$7>CPS@ zA)ULy6`E{FArAMD<;ZFOq&cA!Q#oSrPXp-Z;w77XDG;OEPfin2A2wLRo14h z;zKl-u{K)L-31M$rDvJS;;Oth!z2}Xb&rOMK1LXw`!k;M9V-cWO^?j#i->9_b}?>^ zboC}hO-$AF`n54s{+RC!I57KYs#<3y`)_x?%{R^s-Dq&{rt;aj3?-cX(YXq<4oc6T&fCvW5|DLt zrXb8zoNp%je5SsKq~}-WZzj3F$5QlWQ|JuqSWT}itnb%>Z5mqJHaC}GFS|B2H4wT- z<|6(IcI`RCeC5=Kf%c?q9EIAShh4))mbE0Fh%aaAsL|8QqMzHv=l3rDX*aQ^L^!Zq z6-ko=jE(lpLRbGXGCy@SE_J254&__Gn8k|cw0b-%vhO{Ra5Hv zbH*a#Kfq>>oyVA=I;M7BeXGPV=(5;0Fu;6P?{r>&yJWb(WY|x^4y5~pq^8xY|5VZ3 zZm1dUm-qCQ_e{sx&^9n9%S!qPVobN6bTn}dt=FMjug! z=jej%^MzC6H15q^>7HIM(sa}4whgEXORIYG-7Wi4@3OQ@P!etr1P3-DWsu+k{S#E) zh$?StK2VFxk{FSJ6ltU%*yQJ?GJd(LTIcBWW7Vjx{led?hl!znws@B@kHX1rS%tf9 z^|{%-7HVjBJS0=h`11spM*EwyET%=1O&<2X#-0;3q>ZdS zp>14m%NBH{sa?gMVq;N6m98&$RqD+~PLLD#{Kvz>=&YtB-?hxri%6nX9(*pTEzsa#;4Q1* z__{dv_8J)1CUk%PCVuf!2Nl!KKiL+C5hq`R6L<1_+Wfnx`0mwEAoQla=J@o{`3Fp} z#zac^e)jRtYs0SRZzK%*iACl6*ky!6qG(*u8e7vS8jhj1B0u!jd8U`{ZEa;{&)n|Z zYrvcG&=U1u0gp~RTn6{fwi|5sTil7;cF7B9Zb7o3K`7s3G$l+K>5W(#j zU|z1x(@0t+C`Jbv93`_E;7+i1Xr%PKHBHNGb6l%jp0UkLI*c2R_98Y7a#e5EZaJBkpwX3(7kNSBF1*(9kjVu zBrT9sLMT~cZK+#%{33uN#Inus*vg9 zu+cQA4%DL#MbM--2D)RzKu@~|&8@{4JNN#ciloA`TUb=Q4?sucOuemm3674)ne{^V zUyzW!S>9ffLWJttp0KMLQ4(#ldfC-bp2D>|*;A|`k*?GQq&V_{M1Y&{fJB4}A0LeK z(zu$d_RklmW;Y#JHchbonez5$?g?z$tlYJav8M9q=rQ(LUg-8)pU zA8dgr*)QVnX#UCw}NeRY84>w zIC3-F`aMBtZJ;gJ24IY+-%WcCnC{-zzsMz0n$j9gqa+$ilLGOu=M;9owR?5x?;AAe z;#)HC_4A^5-BFIb1cq7-a@01pui7rm1xMLt^3{h>fzmzKeF1x(^?gUBD#NQ~vXsN4 zX8yejnm8Dy0exHJCqoTn7wuyj&2r zWxIcT!>L)r-S_w6Lj>^4B?qzgt~R1nKHn}+=9+_wVg(HNZ<+aY5uIRc@7_q_k+-6g zHg`MzQ0njKMqYlsnpxAA;P&)@x?%RG-HZdPfTDGz<;+bN@LW5UyEJ7WYf#^=ZK90$qdU65icFhd2}|w0NRH+bzYe^w_`Qfd zMN0ok8bN#377xre#Qy8U<;Z{LEWUV!rKKAk{_jBAWg?li3U4X84crvfxZ* z{!tXXbdULEl-Ejc@S^+$Vyu~-p@F;|0%45;!8GBe9=pSYh_`NpD!#c>Twhm+&w2v) z9~R0!tCXILL*e)WL;%C$NHD*e}gQj>=R|HX~T`GuXZVI7TL4k4f$ zd>ENWRihzdNdD7_!1_i*&>=9YF7;-SWA;*zcn;y^@+szFp2Oa{>}&f^lCJxf?t$I* zTdD4$5`6zuQqGmFgVNcS-vEl1Q0G6qk#p>!Ar6OJo^8mW zHr2?sb4x!nq$-~UtowAA-8;AXhGU+9F=Mn6Zu|MR_r7smq@+P`Y49flt#oT^Asf9A zN+CYIdk#t=RisYbquR=DA;%Q?cp%J3D0AuGc(s4-cXsX8GEsR}?4ge^)V|A-YV)%- z?Htb=>Y;uz>uKW{OX-=5FFWo@cX>A$y#Stxxw?Pd9|f%=X^n)l-!yrQeR9N5CE{7{ zybmB=<~4cj33@VvrXud5c@-EoAg54ywzZIbO56hUuUlo@uUAAyrd%Be%@ganL%!JK zoxIwNcPrPsKd++aULyk6L;{)$3-J*p@=z1&*#9=YPY6=@cIXf$PAGS6MP~(eH5Bv( zd_k<(4eTpk&!#f(2kRjI?2Z=v?)rID=E6B+)L}>BTa>X*1JdU|oqX4ev_7p%c5&$2 zCLE*h+pTx&mgn28@02K0xr#K%kO**V_=wvmL&c9g^Gll2RN8%9HQdVGtb9KR&ipvs z@Bg~Rw*D^sw>_mBg~;q~1AoPcII+E-pa0j&`o-PVv*B~B=X(u1s_Av@j2QgwtmoWL zLuAumkKWHW(@j%dfrOy6HJ_J_-k`tcM%tYPTGAdJ1yn}6f;;suzrSeX+M!s0QI2^C zZkg8k7ivTmxCN1{SA?Hyt~C=}McN~{cHR3;r7VPkED&9~WHX}v4ze>4)7$?xRZ~{Nl;+jQI~p-b$rF8e(B)#FEKar9S4;VQDtiVDpBm-}X2`&V5ZdS` za^WF@!YhJ?S0jp-JHyG9ZEvkIFuk!@1E96{o`U*@>(I`|gJ9pZJ4L#GITIAJ;BqHK z_&8vCy^|iB;{sk3+5HP}PmD}u7RBM^3Sj9wi@pG&{y=6;1+jG$v>3Ab*3CF}j)g~E=^ z%ETr~oAqA;bxc1#4NmxIzPFd-I{W(c9u_=c8-I%y`wE75>k+OuZm3KW{xom@`{v*N z{&uy!J4$%#KD~bw9mDS<@V8_V7)9PlO358VQ|jy%r0LqD&kITV2OTYe+8h4G8Szn@dl~TKjU3ig>svOn zWG*CHx#6Za85VdJin}uyRS4y8VNtxIK@jhxzh;V%!god?tJB}#9Lk6sD?;x@3Y%<+ z&qe9soFu!GIYzU|D}@)}piNB>W%5WgA^e9>A%bAR{BY<7;NEANW(m4y0z_fnko{@6 z$D>$>)oIU_-jKJ`o?YT09Fqqq(FVW({mI>+aSTxS$)TvcIJ&`HL*Gk}1w~QFFXg0; z%q)UuVfOHnQa@B$R-B2_&f_6N?xYHgTNfNK~KykVs!K?IV-0zf!MG zK+=e)9tD*6uh>CKawwaTTo&>RSwA*toF+nYSN1~_ylm7s?{q09WHvHwa>t}#pJ^p# z@@K1&gSZ(Lse!$yp7@FFs8!tTth$X{%uYRCD+kF0ts~3P`dS4+t*fz^e4XS-%szcU zJLZ?(qBZ?T%MdfYfAbJEy^2M+v6g}Be+nN>Y~8wmSj;{hAmMKu!lw6c0+Ifg(F9KY z=LW(l?~L~Y${SwkaSgVBhqw1kLOK7e+)YjSaW1-m*QGX3gx(*C+V>E`BL6Hn)rAlD zN2*I#)Js`k-0~#PKdgJcCO_)CO9|qhvrue;JF~w=uK>u%-m_TjFK@Nah}3;q#eHnb{lkFUn0Zc5kE`jWARB0r?PnnlXafLnyCYV1?x5 zd>YLgFgQ&=bFp)9-v?1}iZQ3k130W)AhF2g=(8M?r?zW;M@sHw`i@QAQHVo0fAn#L zHRTIAeP@pL{b0SgwVrtC;29xr@ zv0aBgVx|>fj=>;19KXfcG-D^T-1!)cNqxvr&NV${ND-k!3cCI50F7{`IF#P~s2(r; z9q(ueld_$YW`{3Wp4#~o!Z9c5of4xba7Tdz8=a<;QIr%)1SSXuBog&_pl2vsf}my$ z*N#`L&bbJC-1``#VaQaDpm+pUj-Y7dO_o=v3dQ@NCohj&oo79yREXz1v=Jir<+vy; z835hQKnppYZMAVE`TK!rtu9}a_p9tKS~5AY%Q<{3!y5caTtmKekh{x2bjNg(Mno1tV~9jH$VP+wY9)%D6HI}wh>tBC z3RVmvQUgSE0^WrPQv~IKjYq-MR08X#xhVfC2QiWw)JeOrX1u1W-X8w25a;v-us3#sXc35NR9`#fFOsa4C_K&~iFj`nKZ*ww zP&;Ea_eKL`5?`9jL`RX;n*MOV?}Q*bB3Y5fj|{@LRd?obaQ#x1ccor%!hb66+${45 zM<|{6T=rF>_gdFjv${!HbCB%3TQWF5D#FmlQh_yNNUb zQfiw0rs@DH`6nN2#OimiR49c`K)e)unv(%Bmf%WiI7Xzbc|! z@x?nUid+!-34)o&=Y)UHT=`LRseXpG#9x$(`(Kp)cDyJ>H-tOQ$C$&eN?4|YqZV8F z427juQ~a+U(E;c z4e#qy!xu8qptIip-Q>)}^FP?+`Xtpyu+qSl1hNs>fnd68s)!~_?xY(gQo7cX(Ztl}%=8=eb5A;7{|R{C|9t-Y z_btcQ&~Vq#P!mW$sYv2b9>TFhUx#pIX%mF>p~X57D%lm|r7ovt)nianguz6 z8)^liQQ@wnq7WO}=IlS{eqY&yr7h&y7OpSo*?*XyYs1(ok(J>XS#zxsRQ${jo)QL) zs|647t@DSxg9~wWH!L;3Ur2XHO3NR(Z9v3Iy?1J9cqYYwUA^j%`ejxlVEhyALK-yK zo{cM~p`c$oUZobaIH^=Cb{BvbZHJ0?O1`51&B}iFXggH2I2~qkDX9owNipE}*Mk?G zX7)a59T@C9(6D_k)^}?+UE6o+d#e`M0P!gT8#e;&`j*`%5Y|)iEV%v>^zi+R{J^Z zWs|awlT(&gls!pBLqGf#fsD^b!9}GMAnT|1&^4fzD(M(?a zyL{~lo2r$VK<1Crym8kj1MULOI)uA!x2NO0-(-V7Q1YTFA&qGyq{NV!l+`n*d0j9w zPiLH}E+=ZHAOnFIeXftZRt;g^t<%%^Doz7=Z@!mIdBRBOVN>(?y|H3&#z|8?xkB9Y zyt0M#Q3qw_4{@AxCAOA6om&$V4`JTabFAkz`7*+LRWam>DoD6vw9_tAlPm_kMM!$n z*D=B?F52yQn%bjpD`)m`Zc}-!O$Io_!gIW$g_n7aWzwlA`|<6VnY`(|yEvGc*W&G+ zTb_z`)3c1&Qt%VB`oA^}rl*5##ry{!n`M&p3cawY zD_MKvY(Ny6s9?h|Y(jEMhO7b#M5=P;dDH*g|LID2(`3;#B~;?*<#hGh_!bdRue|up z$3WU8TWBe}d^Ho_ZP*L4IN`j`j|#U#c72UvJjM+yqDvGT}{u{6t)!GMTe zdsP!jav)kqR4Tg*iEQ?s>}VXC(TQs1Fwlo>P(5L=E$%DVY3> zInal3lt$Hp_2M=u2<~R-)>Y+pEZ@at4loR!8bGkoTwKkdGqoDc4yn^VL_REs%S-Y53!Pxc zZ=>d{(EON!Y$ns64 zfR*?v~ z)p7$`V>=qoJbF6DW{iAw*V9+G2se(~@fw6cs8yH#Uv#+pW;VsIlo6B~`tsKBt3S)! zU_E^3wE}(NtO@J{_vG5X-Rvmb=OX4-{ha!#%}>v7Z0R1CplFt&BPD%qBNyqOwPZ?I zr&X;f%M*Q)mPZF~z1WkYjd>`xRm4$A=R#>%#-OHqcKqneHxg2lrdfTwQQca-_KV9h zw;JAyj$c!V{~qfYYgP10gHQ7oATBo?EMBdA=W^_lo~Ts+td!!mr|Y`ct>SoBpN8$D zf26rLeXMo+^uTq?&Aw^fnp@wWv+c3h zkGqqO_kt6A0sH32Z-GNZFNorz21%rqj8UR*Z;fPNAD9u*jV%FPer9kM#8I2Fto)=_ z<0DsPHwLlplb-)(DScr?DiqML{Ot?gFaeqh?+z_(zlUX_8iIMa} zIHQ^14~#|Ae#w2ewB|0bXPMwHOz|2r5G1)Zptx!sbEFRuSbq?co#OkMFIRb#4B0 zzk#a~1Q#=IpI3SaM*^qdRWXjqb1d01?P=PBba+Q-fqyerEasE0wGsRxN+4(K(XKUN zSaZX~EFF62=+E(q!$f1Z3vERF6=Lv`Z_uuT)Ea^-a)BX}zdy+jD;w&CUHlFoXI}$V z`c=K_Yzx(Y9cm42;Obbrk2HsX(bd<{vaZA(5qj;y9Xj`}t(VV7b1!VV)G-ecNu zA3Pk(9Hf$-l%K??A)16<{SoFCuTiecMWxnFEKI3c6?}yZCGl+4y}O8zxa&CX8M({4 zF0>9x;SFKumN&e2)fgIfSYvEbd}fmnYoE|UPMxlOdIW-CZ|W7f9Lvy9pKF%{qx#3- z8JQ+ZFwXr-F6^9;hJaK%5&us{cVBRq%tXUN4XWAv-VwwuNTaTVVY~N^qG3&CGPiJq zU+ZPxv2HI&0O$e~3${mzw>y|+@wZA0M)5=d;0&&H0$jES@QG)@x0H}Xy_l<$7_+EugWN?-=yombS|;`7+{az2ckssJ3+6KLUC5Mj5Apyrvrl^eL-R zh4w`ejO#M7`dMtfV6k8{=>$~-dgL*KID;#+g7AWWeDmbso z9IsYjauQID`b^#j5FCU{2pxasM5#2O=__EZ+omDCyaw_H?ZAbf_x1mf9ps2 z;|{fC1V<2Bx`V<8x!8B6Q3pSZBjvU&|AbsOo#>A1OF#%gn++t~zFiV$RwXJfmQXP+ zzgA^n|E_{(KYFTR8rZS<5Ux`3TPaZZ;2!Zh8?BgG8V*c-=(!CP;#a_fM z9;~G4t_LJ@!z3IDlctmb==#Qpjg(7?XJ!p3%d`!I`};8Pj)4r*I-PCDR2|@&_Mj|t zj6r5TfR>$lt^}OG+B}yD>Vs)j6x1kN*7VBi~36-!!Gp!m+e zRrR-Xp?LG+n2Io>_C)>w{>eob=$S*JHp($OAbQ4IlOO61>q_%N@^=~yp8{e|6slXBAxgh9El3zYTTjjq>0wm=-J=-Ix)l`m3QBou~GTix?wCS}s*EKtfZ36D+ zJGrpP!OZSqc0Je<+Af;I73$Wus*XSVUm(Q_nh z@M)sXna?4~R8>VpLJ1-}B(#~rEEYc#rM2c#l&^~4VJ&T9J|UwA5t;l==G%uehIpQo z^I#ntosqj3^-mn-v4z48|Cg{(2wImDsqC@bxns~3XDFw%f`dwDVVYzFaWO7XXJHVK zj<9d&L*}IID9ycoSQ2cNGv@m%2~6Pol`4`Aj#0mOKK=8cV&E5!fem}XH~TjX+@zaR zqi_uz=|5aGe~)#PH?8NF4kR!)6Gc_M=|7~C89yZ2?`>WEGj9A#f#?v#3YRe28pSRjX0B60o)wR zgr<^~I(aP=tkb+L?A&r(T`goTpVQd~t_fdzO1qXD1$(%<^N2?5>imcZy4`lJ_TB;m z13h2PHzc;TH@xe#Stv~CqvTeB?ZU}{W8~u;ARV9?5FBv&y%W?VIDqR)G&dnG4yw(7Tv-#Xx=wQa)_SUs^k>eLs9( z7y+n$Gm~}+*Hm>T|Tn7r5!O7~D}xk}e5LTUSlO#G<4K zA09oR++gsj%A{V=D}X#~_({|%FBx;C81(Wyk(1Xoh@CtY>1q4?W^y(denSpF{>+@| z$x0xH$3k~c@L*2}j3|g08QFk!vpHh7(_ZvqGtm$hp0GdY&sPpa-zH}Sr$rDg&6$oE zcvNm%b?GFIoCVn5<;BIpqv4Awvmq>)BdB00@rA(@FPzn(g!~S>xs45@Q?BI77C|lO z%RpKp3!1P`-OrRrhg@zcY_gIRBRLlu3H41YdUL~Iq-C@Tc_ z#v&*omP71E_8%@!!k^(Gm9;B0WCK5c!DuSRZY6+);z;INR90D zWuaq5K~G^+Vagp_2vk8jNo*le#SRp(7hZ{XAqx8YX=G$GIlcimxW^?*?t}LpdcLxqoK%-UQ){90P&Guez{)u9 z3auq`;bV+uRCkg_Hg@P*Gf2H?P+;ibZ-`Jv(@+RupDUtz0-PGYLXpRS_9&kR2a zUt!?DA*~J#a|L~xr&z2w_67OcA5She{d~9HTlFleP`o+eTk4hz5ynLh)mdMd2kOno)em6+7{TSsA!-=qF?q`*Js|a zgAJc#G@%viqO@{Y+9=aW)bkpbFZnzfqang>{+)ATCY7e=x)L``P*SloRhu4yanR}$ z1dp@~6t$2*#X;erN=gZy72d4xms@fRD-r8NfE-{Pgrx;FejDDN571SHs(hhpmt(+< zs*Q!Ua9?H{&pU1P3pu9nKmy)#dym8AW2=H>5C_Doic67w4p!95Mvl# z+cR%z7MNUE!?v1;mRJRyQt^^iSS#r8{DHq{ZoOYI*=MQOy>8eNpmQ&uHEQjI zMVz~7UD>)gp9y$)7(71kn1q+m7HkB2tYe+z=Fq9%#2CjYb@762zrzGt46(tXDr#5Y_J?r}66E2}>#>Pe^K}yDeZjB zP`SQQ7}6!FN}fDNdx+@(T!hdBWr!lQBqGCOA=(fQ(~(jkL17rkyJi#_g!oaJ4GETv z84_g62H`P?5y;{J9Y@Cn@&&=g~~2~72| z$xCJ78l})HFnC}#bO1&6QB0&v2XmBbL5C^Kc;<=OHhD1wVFuDC*={jRGWHLW_ubd~ z2IS*4-vyyN$uxGz5n~GDFmC|1<`!|floiPev=4apM1tD0l5wJY zx{ye`*{M9g+8%LLt7=<4{8g{RZ|yLK2j5&&I)Fl%CYy3h<_Pf^6bmwYrZEV1h8XNBnF!^j`od zAA-;kcMmP!iR*zyyxQ2G^R?MHcMEo~mtQ=R2$K>AhQvPxFKJMUB(!ZO zlk7r*Bskr@oXbqanl@FUlC(1vYqa8(fAkEw-_91B7yFO!gFZ7#MsXR%n1(1xSNfAd zP*Z59@pbe=gOn#U2CJ7nc32W;j5=vGG;Vmp z8zC@r;#U*auJz9D__K6VltjA={J?uzf0nx5mJKG%9X{R8m7&CN_l-J~@xO_v5yzO5 z+?3A+hvftOHAn-NMYnaqXLgIZ^1H9vUJ|>fk3HI&6#;L)x|@Qc1r*=kFyf!}g-qvI zDLM*2g=AryY)EM9(=~Lq^cQPmsZvYbI!VjTKLZ-OS9fg>MEw*CK|a&OoO>M(qV`;Y z`N2Y*s2jF6gFlk6>N9IHzvFg%Fl-%IQ+l-_c~}*73-O)O--Lbd{q?Z^Hpot0wUC0V zKx*sDIaMgmqZoE5M5?*KsXd%iYNV*W_|aN~Uw0EtnpWh~t3~4>Lv}*yK6B)YK#ug; z7{CKF6?0s7SvoK!0OLq72GrNKSD_IGzfuTdLn|A|`^l5V8T_itgA*>%cl_fU=c2dt z^r_2r)X0T7qkD1uU^DJwk~;;POt*H};(~1bKsU!k9xdgc$;G3?4GX_fgwqIayM4 z4?*o?&rK}ebqdJn_^qMvm`_5 zoJKZ1CL7i@`(e1)L6ZF-Zj~Wbi$l$!QFQwoe6}Y?%KWj+P1cZv^IObdDnu-YE#lqT zf*gs~8#}FB*JNk49Vu@@w;$pjSuT5GcActL5a_wSm1rZ{PY5Gzc+Kbnw|V_Te1si* z31I4}Aa&cc-u*27^m#1jN*~p7evUb{){B<51|LaWy!#rGg?uC_UBIvB)aAY0UiG(K z2Z$cp2v_G+!OjweH00^ph`mk{)P6En@Ch(ex-Qnd=J+f~t1#8&y@PqWbA_fm_3U!s zXL-+YkTl8W(*Oq1_GfWC<-}kuRAmx%Xl+j0iI8<~OY7b}L)A<2(At zkijWIe3RD>pYe#i)F~yWU z+m=~qNbr0Ka(at}*-7vSl5hu8PFEr63q&g(%>7W2{1r8lcAr}-g`=g{~TK{bdxV6DKUW4FOVtw&0XD8C^r|I>y3VZoVR6%zfR49Bl z7XNzb<$0O3%HYjTanfrl&_s6=AF>c?n|amu*1am>d)3IY{piI?P-1R>bgB~w(H=$a zyPHC=fmAo4-_3AxG|C^~&^Ueg;LhlK7jB~7GH!$HN8Rh}yr0f{nd$9WaXp(u={rWK zopi8nlI`KLezi}0>1)fgq{Y}u$-{|o{&cZ3$E%=SE1wk~>-w?zP<^ZH^xYj? zjX2LsnU+Z+GzDwXyWWe}X7+b5wxF#Sv-?#_bG9)e+;SZY)o$d^ON3RT%IqgFUH8Xl z;Fo~Rl#d|w-4(yS;OEh-7s+P#77y$F%``7(ic2QSR81$3oYO={&5;}CrJw=VyUfJ% zHF;@G&5MMq?YZdosJi*W=xD4M;%A~-r|%H@Ve$EoDKMKaFSGIims;#`*CmCIK1I4W zS#Urp0XDPT-aG#}U2XIF5()O~B4|V@g`;L-iRqU;fjwk8!Ux zFW$rZY5DoN9sK6g+)aDoqBBIcK*rMBWHGqe@I{%Y37>Dr)$y{Ti8#3%IN0aphsIz; zv)r2gZTV>Qsf&0xE^G*6O0{!Tx0%ly;8#OZON-m>`SdMBIw%Rx_*g%GzoGkv?!onm zxuJ@I0v$!?{faa)m9%TF7!rku?J+L%x2cGk7 zf1?qgeLR|GBBHeoG6{60cEVO~b%Pf~7@Ett9#JS0eK}y1)9Bs)ar;wo?B(6FbUWMi zv-X&=X8O^G(Rz$l-BurXO`XxvjEhjj=lr~J!Q9_KEIX{sawoPIk{qCC$M@8pe+SQT z@HUnGaqH$SaF7lAQuzp9$&{qy;rRQ2lvL6z4c+w>m73C3!0U=B^LXs_#_dor!uL~m z&S~si@Lk-igmcyE;~fbc)By8UISQ2Kjz%jp-6Qn#b+;y?=WWp52dR!?_I)aG2=0!}aNb&ky@#Z`h zb|W_ZLEiiIyUoe+aCPd5P3%c2J?sj>u=VH1v-UNjcNWHh{jSHQsGGir5bKT+ zXUxX{$tSH-g-Qp(y{{Rb*EIg=CXNrsP3Cy<<8Q%U6JY#_te~H}^F*2^uLQ>8qd2E& zx69pe?DFbomiOZlL$k|AK##wO{Cj?G>|xe*?hyJ%v*+s6co$OKu+P{pazFW~|v%6j?cYMu9a^o1z-oAe; zDJ5LJGw`qG!=6{3OC`NsS+b5jXkxipXkAq2lI%B6FuMtQPag@<%=QUk8JA7@@tHg) zyC-#56|Doxt}wFTU8T)}o;MKD2}B45&;4!?L*x~^9UlZq=&q0;iRhvn?H))I61Tt6 zVmv?CjBb$iu{H=*{E~1ax@1H{3(zZ;@WMZ9N5GPte1#VC$n)-KFZna8x|=d_s+O=W zBfq`n9hBiIprT4T(O3fU;IqUAu<50ueEn2)y4Y6Q_rv2nNH8^@iL)yFtvjC1{ENcT zP6tRMy0RwIzNco(+cczVy3oWRF3l_&T~uWfGWs5GBwtWu{tIcSg}7tH#cbYn;r8|N z@%4Vuqvz0T%46!;S1@<+@zyu@7LS;HzNaKSDU1Rol5Lciq_r6qq^i%14k;Oz0)9ty z(Kw6ZayIn9H@A~GMzWFGon?hdt}R3$otsVsAcOCMQD?{PMg>KX2jHe{}{VPfQm%+VQgoiD_0AbrJJXcN$@~ zs=Ey-#1RrxSljNVo5Yl1=E*xb8u=83G6c=^gTXr>apl6yV#I{LsGQP1y$VIKSwDL{ zQ$uRKH4Ynfmv?}YAPOI!wUy{>SaNA4?>>)b=3~4|Pri!wOY}CU z?Nfq-o1^;cR2d58fkUBvgXNU%pKx96(;OFVJUl%!GLAf6S46XOuS>RETJ#=FZ|Mh{ zZe47edf!T7&Ezz9oEY@0PTtB0u+<*&(HLi4?+_?1gnX(&F=bqIU8_WVXIGy3J^Wec zXS`kRGW=A2iXVqQ@c5=;Wc1R>jxitmE6cyFGux;FiBqLgl5zHk##u4~aoBeS7gJ2E zQ#aZb<>||);rZi%gOxR!M(s6+X~J6aF=Ah@zM?#d+KDjhcw$ceOoaC^-l|!JM8ku4 z4AyqIY#}^`?tsv12Sy$J%HY1ptf~RIYE6w*S883PNO#(S{&ZP)pzx$A*E0GN&RBoD zqZ>S_O=e3*DPaIkw?yF?kVz>NOcKoGb&FtX-)qA1H!=nEW3C@%8DbNjP3&#}M1qU- z8xspKGwjHW3XmJGbP<*zL~`5vPSj>sjQoU)`5J37zh8v|BnPif^)wBeJb*O1hjq{5 zbuNc6nh#3M5(p`5FMYJpPHjZX#(Zc^k}BMT2#;>C{IM6VoEy`VGGUqL=BwR^QS)B5 zdgY$syilZ9y@SMs8(+kuDU#=Oh*VJ$pP|lor{#RJr&8|tz5W?PyKi`!Q%to$@~W0H zdNzABw3GmJ-?E#4Y08yWqn6T6q^}~L^!&$s-`ak#8LbLfzhHM*fQqweZD(uIk`x{} z;VwHVCwGF!-Gm0@ihVwLm4&b+*^Kex$Eh%Z^(hA)@{uyZans-V;t8{>MToLbi(1j4OWYPOv|FY)RKtC?mBEG)mh6~VvML=^>fPF31P=*1Qu~8s)rZ1l zmd4CMhP)x$q5CR&C;HhvRd5rrgiE$!g;X!NCg`cIS`$GL2nlP4ugLY}*KXgO!%;s=o zV@2y)*`Pj3hG0a&TclN2MMVCYN6#(6%}Ppd{uV!4q?^0u zs3II*_HZy9J;oq5^p`Ko4O5LeKikA`)Cjj)^8)55%2zO(>1$?l-nNT z61}Y>S3Vzj42ZIsA9U160Ty1pki`X7Rph)~UKn@6r~83(^*R?}L=$-l&4F%;dP(VZ zxgfXhWOo{_F?H;{O)aR|DH4p(1|BvU7e!|8G|vLrt-hCP9<-;KO&(U7YtT3sLq+b0 z1k=vXZo)nZ1U;Hz(39393i)WTHVZGA7#tNW(`iX>PJcKusonfA)^L9$)D9CzeKyus zz-lH`02r7M&puYt-BS1a(*NXT{9GoW*0CHA;MPGog>`4Wf^MtrxmdaT;16DD?Wr=d zqTd)v+uYMgXuZ zGJ1ObQvROsTKPWU^krQ&LpEYW1lc*;B|oa&T6^ZeJ*iwQ2VI^U#h$mBFDhfI5I+m? zXk|FC6*uz3IS@l)Ja84yFIs&pjKK;R<3@0D(#mK}&0u%;7Ja64(nrfhbEEfdfDBi6 zuN$XnbYbhP6!gVa8xEXGMO1FAkWl5uN!YK`%3rwNG$)Zz2xna{wxGsNoTeSl& zUp#cYKw)wrn09t?k>Z$q1@DCFrIh7~5`>oSxs2mCpH1Ehuu5kkvd>pf+im(A5rPtB z2Y;wp#LQ8ngPg=^8BCn+glrEF*2p3n!y|O6k9=^D=M|;9eB_fbiDMSZ;YWBws0fc_ zPW#EZ9&-hyRd2*7W}r7tv8O_b;{6q&(4{+`xX5#SsFR(7o45Wuy(1{t#=nO3rG9R% zezd9r$(~{`pJAQ7jDHrW)7!7`a91A;g6-`o$a%*_@96SLgLz&G=~{bH#Rkq->UelL zYXzm!U1jAA21Rw_ljO_$^Pr)CkvqZ8qKv%Kqf}%e83|vU#mZ75a1$>*re|4S;;|_m zftGt!S#9;9p{A`{{&yMUHCrhd#b(X=L*F`ZfnATEy$~(>dHjkV52dUEa&|jP+EP61 z{kyX4%<=z=()|x}`(G?=?JvUhKQ!(Cxv?^{5dBmBtB&PgL~nK`PNIJ=|JCB(_Z&fwL>#P4L~LAKL~QK;9CLn^e@t}#+1OZEiMTk~h&cW^!}*0^{ijcM4pt&I z&acA#5AU3vo0*9FE84&J@P+1P`$BLMu`zS~YtPQa`js`u7xq8uS=jzTv2igG{d2^{ z%=#a#SpTCn6VaD+3K1(eHxVn_*SP+P^VKIO8~eZV{3ix0H>WPq|Dv;VeZ~9NMf)=U z`G3>d**KY5xc{5Z&dkEW#me$;eD-W;ouq}Ar`+3~OE@N2xL1}5CMG8TKDbOM+bHY6 zus|k`w!k(ui1Iy^5|_Cs7TVQ$nN>{s&_8f_G+=u*5dRw<3Sl^DWNQ8fQNNWycb#5se#ID03ef;tm zU7BsA?AZcose~2sF!%1JcbnVpKmKM7-(qQ*zPZ-&Ii$`AmevX;6hH6SPx9lsaEcM6 zrib9PHhrDL?BhH))%rWvC5C_A755=S_BvW7JLXo1yMMQ#lNtN;o8dU(V!j~!3vF5? z#CgusB9nf|^IrQcCO!6LonXkbBFKS`kFAxg(|f<{;`hpNh!MuD&8{MP7g0HiltM+O z$=v3$=WO2H;qR}fXdAt^8HdxJdM=Vgbw4$75*Pk<*vCsERyy!;-oDttnl``x^7AF` zGr|;_hwL-X$9ub5x}M&bL&Ww4Gx{Ddp2NuC8Gn14A7rD!Y!%iFq4BY%1RTGf$d=?@kn&f`f7?(=OCU}2lGe<;2_(7W7z@rk5>FA8*OueDd^}5BK;pB6qfgI6D-Kbw8Ig*g2qq|(3BeMd_{6e*wTe6+dqXP< z1=xd8K$|Fq;nDEHiZ0V4F|L6!1DVqXQ~sD0)gjJd_K_XyRK22}AXDv}@3Hi~DPc{; z81Y(@u0@|JAyRDn&U+EzjNeQ!>o+^?CEWaJI_Ou7xI zM7jwU!cT;vc}=>-x6lXx?hdf?;gxw3-tY;EKNZ*DM3qtt{ucZ#G|g829GhzMEA6R; zmwS5$!c#K))}s%<-<1wiyL692)k9hm}yOaWjans zJDbS#+WLYtKrEr!kHQRtGw5Uxeg$>nlGY1_w}YoK+gs!BVL_U5eY4t6>@B%pfU#QD z|M+!xi(V^r<}G^v+w=U-kR$cqb})PYHTp>X4?Ac-!ld$>9!Aei*8MxiF0(Ng+tSVJ z+ei6l-MQDCW540M{{{Fj?DH%SxY*NOj(5BD_$MjN^4?c|Q&D0| z(i8J}=CT%6yYu$T$BWqina97O|1-P)tR>5l{5&Ffixs=GYDgBDV#T{`iz3 zH-HR&6AWpJ13NYd_AUcxnfo^y5H$6(EP?VGaE>$o!l??u6YVi9Gi>X(rD6t8Uy+NH zOHHp)@VHLfi~mGLdSMdAc^L^&X2PK86EhESG5~q8e|#R{*u#eTGdvA+(DJx&f$==N3TB!_1DuIppsBNM2k&*F;Wl2rWBsKD#tOicnu9x|oO08Qyk8z3QZ|-B=U;6ZzeOBhF6Diqw z@Zs-KPb`OHE3vitODY_l#XU4FMyb56N00|X;pu7cRG5}bC&%O(i^wt)QH)4LT!^)r zX{eZl%HzZWgiYJq3r6=E*xJb=+Z$LS4<{$h%`eQ%9n)u2nd@T4=gHJr;j1i$Ga41k zEJ`yTdNSn9a1aI>S>*Rk6HkTGm*pB!iKrr4&oc@jpv`YB z49vw{9wn76Q<{1uigMOTHcB+Fj?}WcAPtOjd>dwMv-9cgwYwDz_!H)1GZSpn);T-Q z;gd^cqI8!>ZENTm+cKP~?X#&qctN5$`(tdW$(<|{M^&C8oA7C}@dd1zJTc!~H`dHEtgz*UB($n%K9_SukXv zEo@`w(tTi&TgvPIuz#HTv2;nImH??&VPm?VJ#gX$yI9TA_r`Ry92~)vj zYj)Rex867D(l5D*DPcJwdNiqVx(MN$&Y$v5^pzyA<&W+ntSM8;LX-6fbvm$C7fJM` zsF_}-C>8i)VmEljo{`;%^GuiSQgbKj-YNDR<4B9Q3f=6J{58TO)7aIHA#7LN+8bOAfKe(jC(dJ_lzDlGRAYf}9b>V{TT)$HPHs6~#6E!!?m!y?b~l6%$ub9D`;`l7t!SmU`aIsA7=RXr(gOfsW- zM$)7moT`zT*OH%8u_?Dc%VVe!mEJ=oF+xr*K9Fv<4?JL~0>WJ8#l;oRW57CZZ#G(c zH&%7FOnIIk;*w6OX8Tbw3Z31B)m;`BN*4nnJpV?!uOiJc@|?l|(Lg_AW;`=X9w&d;2yLD?Vw!5BW1nDjU=-%iNzXaCH|M0h zyB1UO#ljaxQy}+c#bk{IOEn-^9tA7@UCc5~<4`_HzNR;EKJ-i7(d!!n9L5k6m$%}H zIm@mU<>6*|I4#3QF5R;nX5PmY5U^jw@=0gkW9X%5xa&g`aFnLLN= zW8<2KfikyBS*|zbbyv8{IK04#_M67=*g%?n#~7kB+Lis`x;(y#AclwED8* zQNW*+ykde>p7UZFeofQu6e>;A(TZ6w*Z@RXIc2M#HAJ*LgLXM;+RvorQqbnsMN{ML zaB~K;R9yGpK`%Qe#p%Z<^JN|*O4&&uq=*5YCL3{Bi=Ydk*f62GAfE`U zN>89b3KNrsrR$l71+4``k185GFwAF{6_^p?OqQc}?u&Jm8>vQe;lT_UZXtz|M8*Y= z5RZU5Dqy#{lpVCNy80k`Na&0qXf>Kd^zb8N+S$NgE1*1 zER0w>Rjm?D5id*4*NgQ;y62r2I`c&FoEYY1v99*UN0ay6RKCfNAJlWC3z-ktfP+LO|^L zZ=ylTq}RBsfK{Xru04)Hk_|Up>##?tOOy@bF8QAu1#6jrUepbuE`=xDZK7>O zhMf74S5StS+lD{|AVOf~%yG=LcpKE}`n5+`C=?<}3vvqzf&K6y(FNt2e1=;|#p3Fc@G0C5H3GLNg7MOj3^0p45}D_ z0;CGeR+7V##uCO-z>>g{p>iBXBZh$%g#rik1H45Dh*RT4M^cDc+@jE(I9RlxH34!W z5Qa#^*>Mt5Co!E9cs;2|;-e@;K>MYbkanOwMevAIkVL~M#E?mj z5s;lj?O8&QMWA^^por7*$jzZF10#^c1inLq{8B|h0}CfY0RtuVhdUs?L-B+XM2$g? zLGdMdlX?>Fp?}pQK;{+seF?RR(nIVm&n5R1am6UYxh)I)zcc9ZwUl=(>Irh&*bR+= z=>d6L)J>Im=R1(tO_h4b$4wP`hacDpa9dN}4{KdlMh|O+vP1b$sM-9QOH&^bQwS(} zq+K#oA2R3vF`exHHJdCS`Px5Vx4BQ=8UbcUzJ9Kf$wUu7f=Z7i6qRdp$;@7%6p;uh z`6QNPmqgu@bS^0tb@yJ79lIm1m<%b`%^~~ckBU}1pjM#~<4+->SE00_5s@z3BzJno zFPtP}MAl^6hVeCn+E($o5gbx^9r<iD%!&x zamx=NfRpoE6u{_<5v@V^`8Q9XC(vcS*sA=1UDXMH+tTe9z7hRGSA;!z{s8{An9g>L zxmcBhq@1wMMNC?R#(4}@kj8#YT2x!q*oPQ9V=+zYQY$H9XcDD2x=y;i6f46v^*p8W zW4=IsPTH2lxtL47t)vgsz3{wEnYJ>aP&eVdz`U38rEoXyJ;OY&@}*EWz6HMmy z6Dmi6txz}iJ;gk(vYk*jfeZhMOW9(99x5BBA^yoinU!*_2tGR58y#M7cpMR)=u|LR z3lM*ba5!?^(A$EZgw8qirnuSPULZeuMusbvDc7E18xOb)*ajHd;twl{Am_(10hENW zkR8{Fy|WLH8lVjUN`Wy*B2X0O>s-{ja-vG8ssJJpBI3U4{U6m$iV`2N&A9in z^NdP@lJ9V(w?6ZaN}J+7koPw8txB7N7SFsVxMi`00wEokE{G?EWvx4^Yk@06YAQvk z?V`^K`ZE2RA{ugS{xb?xrk-$t?R)Iqr~l^5ip zFoHx$pMh+Cu1Ya4X93v~dKY>ZiXBi7umoa>pywwrWhe<#Q6?cLK!wkHpb`fdkYPbx zgYgr`AjyX18%m%SoMp!GmkAaUi1J6gpekI&mI>zh#(IK%Ew^?KYQ4hmIQOXYegz-j z?~UhUl?27!znMLLJBch~%oB)x!JI!-$`$v4xSuY2RnC<-7x4iz^Ts?GDC1Rj%d_2S zWV};~N=0QLnQ#i-Q4X6VpUAKK|12Yc|GkDRL8-yGU=Sz931f*#Q5GSqKx2r&Ovx=rJ-`AGa=JiCqpg?$Xp3pFpZ?pW# zxS}+bZ)sInGc6ow=dCTFVWYD2l4a8Rzfk+KHYQP)yLLI-5j5X zx(A>kI_+w$1tv+Ij=w7ga5DV-WGg~E85R$=mBC&L_68Gbphvq8f!;vm0aUFs&mzFT z*_BR8p>nGkk})iza)y3H&p@9jM}PUme0AjnYjb6#qrk=K6ty}6z4}xu_&g2etXg;_ z;+E!+cO!S&pmqxc|3sjF7vpEO7eLS8DtX50vCfIn9b?Bt}6p@&A+%g@Cuj`amII&mYL^8MIw(Eqg zBL(pot7aPG+RzJvGFs}A4Mnt%#*~P8=Lue9j`EeD zMOpzc#HS)0q1K4V!=ZAB&oqOmBsPeKt3kO?zT6sLDhvE=>>6;;2Oi31h%E=I0CUu) zhd&(ETx%ute`6LSPMaobnh(^Lnn%mghcd1+j&U3sLzdZ7?jmYc36c%e4g1xFbBOv; z2Zer7{PsUaG9sFqRKQ4AA{O}l#3+m2x5PdZr@W57m9BISQ-$?P`z3gHYC9^HGkt6qt%)@iA}9j?WSp+p_HEwUZa@V zcm?5bXbx`a#u z$L$(h``EGfo8o>G>9ao6E2-VbXX`hHUp*C=5Bg!=%v(IVSgbx7Z-)>dtsr+bmyhaD zsauQmb?5{klH1bm1bRVs=`a}v=zJiA{fAfZcmszV{^AYpDZZwD#|{*;uaraHGZC*s z(0{(=6y{8OUOswfg0C?&Zz@` z)?w6yxY)z%z%KTCwh`F+<8D*5VRQ6JECC$4&~1YzH&}9d;d^b9WIaal?Ge`ySL57( zy7ruFu#bixAWj+4eR|k+;ugE3)y|e+ z5KA!ZcGQWl`wuTJqT#wiR34*mY0Qf3#&W*<UTjSM52hW4gBO zqiXKrE^;yQN4385#xdrAU!UOqh|*ozo%@}8bZk9BEXyM^QwyPyc7%Z4`1$+lRYNP& zC-wz`zEKf(Z=Bf<>{Airdw6RX?gzMjKk~CDR?qMEJ?$ri+s(p#rUQkR9P?+8USB}& z7S+_&^voRNqLYX;;@uA3E=Ulx9AIO&d{<4Ayu?6eZ4Oiu=?63XT!X>0UFDbWxbkB% zAV;-;TU;~F8rpc94)k%{9AjW7jo?xKJ8J`2%1FXX^3X$aTHWA8f@VZ3os2E^>LLe- zmDpot=2XM7QP)lHVS^!0=ze3rH59FgEXIKovmn}dgGq`I{e-{McG=F4ZSgz8fKHji z+Vy61L?JTk>$fB7NF$uALi1bi=>$G~S_!Y7q!`bh zU)(vDLgco{X1h*T0c;oyz#l|+3}~Cb@wEZpr~+}YAmP1)$8Cpfe5kw(%``O))cSVQ z(EA4U{Drc5w9mH_@)k}bCxepJ!(H-!4uPmc!z-=bZgfs>?F(tdpMU$Jb-#PI)*PnK zkZ3>lU!37#WMI))iPa$?<*8;QRspwEWrg+hE~E_Ydxo;m=tf%J0*Oh7x^xPOB%bI< z!E?-ZRs~}*$GsAQi^B&_oY3e<$$5zRn3Uw0EPvd{5ihVG1U7L6qm`kd2Y{4=lba6j zFiSI*8>#HVmow885fd!mCXqZHaYk$zn@O7tKfvT-a7$9qrdlopl2Bd!Wi$TesPGnM zX%nV~TooG=i3kM;uLfUXeX)#`ic+m2UOG5bYOhbtKfpxtC#P_BE&@sl6t-)8dIaIn zR_ejK;|D(QcSa{)?g~6#a>1Vr<0hn`z(P99WNvBjg!JGHdg`aY7Mv>!d4`4s5{Z^l z9GTHHHfU*yMxa1AR3IJ<g?AnsnfzSn{gQ{+e=0JnUm- zz^pEkLJ6KqPTRmDcDSj!14$}&>ueP>f^^O*5_Z*RG%rB&loSD4IZ}|b7R5bSXeGJH z9Z{pRvU!mf+Jf@-^j%l5l>@!Ms(x)q)QI#K;0zP&*#0T%NoGig7eHIS` zx_+12-X0j4o5yf=6yxawk*=;U-1PqKqNkV8_ZY?+-oiHC?~seSgVyWqEw(zjhT?4k z_u0KuvaSlG3>KqVX}_@XGW6q8!$89Ktk$R+yekbjLT|tceEF!+HDpR#s<9Tm^o`Wi zNNDxFAD;(|tBSEjOHxYjZ6il(aOy36lr_IpupN$y#;bDhDa^q0>UIDI#{QU!w0oM%?}d~!3KS7ZUnN08wW1(`vNg5Z^ZmEhq0MCt zP1-TlTxC*05ex!Mz1n0&ndZqzQ?`qt{LmRqrdly{(!+3u@jyW$Y9Yni^E$p8tGl^M zT7LmzWa}U<7De6{wocmk$;KR+(#u!qRS|G9s zP?U|Jrz-gLj97TP#2z(!ezIiOE68{8aw|S1#C}%vmjWC=ad^;=V6c0gK2p*u1Q<1Hv^FA7r{#TN zVm^G7`Oc*7u{kIEIrSfLs`cd(YcC7kuM2;k3=P87?rYSCi*8DPRUeiu1zyVpOLkqu ze7%UMX_d5qH(&!R3a-oW$BIsghMy^Cex~Au1@ecw8S?P)sV(nwH+{FF+V6*GtRG|G z@F?XkXHeUaaR6*wn_Q)xBhn{)H${AZUwf!7{BXGOv3(&4|E_8rg|obpXV5I7z&KB~ zSE6+jr4?nw+A8CQHD}D$?N)9y8}+-1%cWMi`Uh>Yd%W$4+|OYIpvF%Rfm9CagnX=x zB8~H~Sj6yczZQ^kVlM!E+;pnor3c)Dcc|LIHa?C*N(U=-`3Q$2V&itY=)Nu<4PF|5 zS($){9vzK#f+iZCKI)j(O7&PR1eegK*y5(&%Lq|?pp{uo}0cRibX|S z9G-)uV|JBxL!~Mj52c&3p~or9ZomGSFsPLDpZ^C?K(D_LZ;4i`qY@%|dW>0d@e!)V z`@h||xujJa$J)4uokd`1`jO}JRhP-&BArrZN8+mLTk*mq~;m4z400&tzoS(&yt#J)Uc1WhP+fuu2Gw;(y5e6ya?)^ zBfhlsj4#!j!ru0!y?bT@#xxG9d@+s0EkF%HArm6y0Y&oPLmc0>nC$@H?d_*4AGn?M z4T3+44Y)U$>5ZrH)Of`Bt8f`_piEgZgGyYqPVX zZ8nuPCMK>+n7K1KN25_;;t@zLsEgI8WA?Bt48GNKc&E-RjLpG^!*1fKXdS<5=^Bks zl2w_IR+r%Dn-PBW<1>k8zT8}z-_SfAzl0Wzu;t~2=HZ|p{#|b}VCKz?A1v{R$;<=x zF!_wh^7v^}j->K@wIMqtIXhk>^j;x_C1fWhWhaD*y*CPKqYdo_bwSoPYe}|A!(t?s zq)kXKOzG68#vWf5lHu6L+EA6K;QL$rv5)%fm*f?Mn2QGe+#|9-v6c5zLh4e4 z!3fwvv#={QL}d*LiQs=BWVGfYc7m~RLOPE_ScpX!o1SJ((&%_fqgIA=vE`58rx4%4 zt^Ai}zNNySf+9SE7G>ZTRCN5AvV~Hx-M`(XFTxa9izU?@6%~1Mc*Biibe^T(a+SJt z`jqfCb6!STZ13aoL!$*rQtPeBW^0^kNZMLUetL5F&rum^mM|uYns{q+k~LN}B^oz? z;FRKAVQRthVU_E~_DojM$B0V0(q_{d2dDO?X4Kc$rd8ZrE=*VHGzx_VpFsg?J)bFJ z6)6yopTvD9u?XQQJc$|cs)E?#=ZPu!;MJRB=>3L2c#4UqIEkHMgMNFy??qESi1j;= zDq?GI`)tEaAKX}t+_?UOn`(N$G}WwZYQD5K(KL2tGpBIl?Y*5-$KP>u>#fIJ)5hKL z!>-x)E*@32?9Rz&-@l}|@}j%>X%knGh|h97q{GGD(?+s#kFXv>!5}aUgKECPJ3jX#&-aaKd|^}@t(o{qjtU_n@>C`Nd1qXV%epStvpvtd#olW zjpbPD7cFkc?Cs93s7Y_Tpx99mFRq$@-@@U&Eq%|=RW@5d^vLNeW|TPxse7xEMmW6V zuEB8^KpAYJaqrYwL(^4zglBMmhX|Y8>BXV)Ux8Y6zOL)=-D`*49zDWm7=f=}yX=W| z(Y~F;Hyrsz?qIk4FcFl~rGitKS+c!5_r__#&2v7)OfL5)}u9z|; z!=4do8$WaI%yHT5ve_Hw?|}PWTDw|NCz0`p3@DsWLv=@G{g-9 zB;8DKCy;>vhn$lMgXciVK$BSIx%a)QQfZCj&@-pbQ9V7E>fQUV-~ZkJUWdBk!C1%I zBg12#7>h{tR<%^;GFp5_#_TX@JSj_%|M%O0)EQ7Y{1#ZX6B(`gG9mz#!<1ejP}ant z&`4MM&>X6tLq)at#G#1aa)r+G5JNx!RC%7_3n2=B43<8`vlhRB zdH-tNkW`GpZ?OgpGz9Ws>{{TF{{S{1Y>EAMv^Ka6M)U`vDfX_k~pnVU31_+Rp})pAxi4f4c59vRz*LfLM zGJ_3Y)Un_W%>o$t?0JOsSOTCxE)hQx(3$qO@5 z0MZ27fD`|$TtyKmA!HQHS1KjAGyV+t@q2=;GK6c-vdK%>EeMU+u&3)3;g7TG5N*82 zi+X?gMCisLfgVrjkv^9(f{~aw-y<;HtJSOhsG$TC}%) zYa!u~x>@{j)@|+CRq(FP1Q?OGYaM!*C1RIf)3~CQ9rmPKRe4A0j03yhbp3zfA1B(7 zCZyw>9`Rnneu6NF7HyigTTqLW>sH)_&7jH)(Rf!Ji-%^={bz-Hk*?V*V^_H!RUXf? zppIRRio-uHwj>Lin~r|#OriYnKOYH>=3C4RDUyg7ck4)|b6dC5*EiXjDz|tUAtk`S z?2lQ@dfBRnUV8AMS03$?>a6B?!oun)lid{Gba!df{i9xk#URr9p+y2M60Jaskb(0; z?(It)_jV8D-q6@N>4a$pjh`iXEmgtYip*K-@KgW(Smg~)1Fes~{AjWACr9DPw*C7z z+*W2W=TkqvuZx9@cM3!bDkrjp_oc%5wDAo!t2?G46+>5RjNyP*q zmWnEu(EGtHa=Xc+qxdn4wXE6X)=_VQ3zN~K6;~=^od-t4GawsLVDv5|d6ooN>gkJS znl>S36A4UVN}KU~CY;12;$aVYyc_0PkftF%wnp|^f5>`0^ zAs>f*XGqh1CDJvHN5V=4Izo-bxPwl8r>SP&rumLc!UzITwHkMNB7?U%Hw5~ z*`f|twL)k?yp1BMvygW=#q%ytPb0d(Br}5^$co+W1u?dPSh-r2@rgjg=A{1#1%7$MHh3m3^H=$|V> zXzSb@|EHL@dU2(Mo?~Ppf&|_ol50%L%Ci--Os|w8e3Zf71@qLyeXuHM2|~#vF!jt? zf{Q0Swd8^RxQIN$-c>1QD^@>0JR5_q2G1oX(-9CHUZ_d<+GR@bt4fr75k!O*q%a-Q zguTG&HfF_6Nt6nQohgv&_hc1_vuPr%rEv>8Vc8?Eij)I3oDnKB9gykQLk5v zn(vpQYIvvvu#Sd&kv7FVY|Knym7fqTUG}h9hF4z0z+IM&ny3i-^>8*~V}3^bQeq3X zdjHWInlqaCwI3>E00fHg=4<~}zkALfk~=fL*~?fa(CU(f{53!822yQ9B4-^+xB<#c z#K0Rc7<2O^9nitRhwpAQn3{(PTmY?jaY&4)d3JZ+A+gE+$eSZOnA#Xt1)+A$PEQL!Q5bOjCKwf+zrK);v!RJS!($XhE|<7C7t zi@LI9@bQ9cXk^HZ{&Q^^1LVfFAZnflQ4?|^UdV=gm19G`2H6l0B4xW=&~C`Ri*n-% zTn2GE|HzN-zxQ9xtnLHvCtrN1P^c6qL3N8U3RlvcWLlhuAN~TDX z@xXV@1k;dmuGDeWV)_AA*kof_jbCg#{FU4HabB6UFcB6F`AY7dJq2&&5Ald9VAyvw z+3r(eZ;XF(JXU$3QK|wVof_P?Er2L50sLxXobfI}7u2C5PZrE!#J3oGYdDy) zOYySQ;)!~Swdn!8vS#v=ds>Z611Uqm3)lKI|~QXc~ z!Ox8~?HJ0aL;?&KF%%QcZ|n5526VojH5=A+``UK@%}``rE+H3^I4-2cbf9A>=197g z{@%44*Y^0)wt@>-3A+s~?b@-C`ojt`PLjkGAAeZW#2BSkQ z(iwGfvsdeku}*Iyza2=#1Z-g&umw9}JBtt?B~NK(gnR}~Pnjkt&R}b$I>bf|+I7t< z#J4Hr*I~Nz#x0WTA_>%pM1*4^1|*2@w1ls{Sl3bq--iNwNW9?r4*=ixfru4E+|MI+ z>?H7PEq3ZW#ZJm6O>=JDwa~5SGD6j%8{(3$^VrvKJMfRkIv7jB0~bWBnZZb;oHok{mMeo{@r(V4lsP+-9XVD`R{n)1HR*V)*+LV4!TwB0 zEU8d9p%T$SxthX3F{{KsJ23l6m6n+|eTD4WEcU)aO%nvIlF#DU zH3bj_sSa@1p9Qx3KY%Qf$lB)-H}*xui1?r&5{6{L2wsR3?j6^|RibCJa)e=7COpY9 zD%+&05k(oI$*3Nvkkev@TjS@+xrW{!uMn@7_xPMVbm_VnaQr(4)Z7(}x7ekE8Je0V zrIsdlxYL5^P|+V$w5~H9ZMI2-UzO6&kWzCzl+37U#XFNRw?aUO6%w>$Hdm$?IYkH* zt{{3D{j%Gy0tidaR9>@MQMXhH{zdI~R>r~1X)yB_IX)x$9HPT+Jttv|aHqOk4~qX& zSutaq)C-i8f|{L1XEWUVU^`mfqn0+)YT7YNDwkCfb$dFJwn@nErdP5IOdT3wAYlAuK$cgR+B= zlbJG1Q0xra%9ptbF7#Zj7L*P3B#b&~)X}&W&BIt&pbZ1GDVp8p^%IO*fj(I|pi#k< zM(hE(UM?hPwY+i!y`WG|!Vb8gH>h>k-A1esO`AdVq?86(ht_6w8D=lSQ&#fV-^TC2 zUxRrP^;Y>ou@0ok&C`h2i?q&QtFv+iu0#K*Lv=Gu3cZ#>Q*h^x7t`a7h!>xs;!MsN6Drt`l{X` zxHv83Gocn1toR-3U?A*Qq(8APe{6lMlNPaSl7-{Qcf#q0cFl@J=nJQscoS@ zZ|hjsY{+*tZH(BYa-mQz&A0V<`fe!<+}`hQ4s-?7W~W(dWOX*T#bs6b*Phw(y3(C= zv}Bu85dGYJ{cQpv4GZjHot*lEHRHKW?`JGRuz0ZA6f6KysrrKCJnJQ&v>Ym`yr(X6k5W-$-2QZ1z=tx+aPN6D=TdV2U)2mg6CA_93{3G%GGwJL7`&*1F4yYHQs` z`l5L~b~XGZ;Nh2G9;@fTNuj>AxIVJ`i?^hEkAHcGuiV?D6_dDHu3%Gz#Ew12WI37Y zZ(${3Mo4_a=+et{4x>E#vFRgcUOd$))mybPz01%V1!niRj~8w&dTgwXGI8q*KvF`2 zJdPYiZlBscKD-Zxj#zpaG0$M*Q(o`5`UPwp5dkE61Q|zys3l7u%%}dNwd;?GiNe}T zV6r77kLI$}rZQs2oh2z;#4Vr2iyS0^$!hk>)vhG`!Ph+yoG0G6!Yu)U*v^OZ##)Y> zzi{pb%U0niv9x$J^Mt6?v20bGxQ6(zJ!guWKUQ`!=D6DvH#17Mg^h1-uKg-D)2Y6Q z*6g7KxLP51CRWF*vMe^afhMXZJm@TJDsZ^v$=QJCWYh6HwqAvR&PeN*cOvKyg>Y;0D05_SWF!!^qvp$!X-+-a&i@+cI594@v=4AJ5Xsj^rSC9aFQLX0Rh!>R9Q|BkSKOo-7=I zdYVAS(cIK+K&IUZ({V=| z`kPt~o=vSP#78EZTK0`5WX`k?O05W_b^&10Ad*7yYWJy1$A`WVrqw_8E{x+L6Om zooMBV2`2eiz(au)G1Z(mvNLlV1#B0xv4XuMFVvcE5-@{bPR?FVUgo;Cs2ls6g`K*M zv1g8{sRtZ>^#}V5A)){ZFgSH4uaWtZmb}XBdavC2ouwp*a3g(;@uPUTU?t;8@ z1mNYf04_u5)5VpCkd1_(`*M;)J|IwMIEKDqQLF(&3j`ldO1f(DTn2LsjwN>j%v~<5 zax+!TO)x^r;}2=W?Jnt)e0+Y2vBc*I)wlliR#Rt7P%p(%u_5kvgB0xUU0v|);4!*? zRl*QYq;1UrqpuDvh0%#K0)a?~D?RQ0WmQq$^zh~|Ary-lu^2?_l>t4kEzt|)2}FrJ zgM4xN$tMp!1GRMico4i!A~V?O3r%NEoqVS71)#9$ zzsViic@k<&ai(xbX>Vy`VRfmCrh-IF3M-w~G70q?D|&LH;Nxcvc}|m8IURQKDoyYU zKglXQ;&N<;Z@uMeTwb*c6oD?IFwKWH0mrh|z&E|sQ?y1nqwrUYsnm|97*N#v9ez%; z>5yJRpdwS;SFgTye0;sHu|Bg|tG2@WI9riDMM-H=qx^IYl2y)wD^H`FrR3e#VL0~lJsfKZLlhxWwLjfNMr@`BWa=v#`WsrlaaC%R1Jm-axb2Ojsg?>nt3mv6!!B8zi z=3*u+L0(SsD>zWbT<-QPUnJgi~yYB7~X7<rO&I@(!Y4>Q@$r*@{ec-YBw z?gXzxL8uSgi6e)eEIbBn{8VmY{UN9a*@?ow(pafn&>1r&4`hPM)5fAAmxp1Htc5|y z%PtoL7l**51-(L{|5por4($Id73sPk?BloqON{3NJQ+5&qp8M10Y!^lq2;4p;s#ue zkS!J6ZiFNFD}*^NN5Zc46Nm~~iwvDzja2W9QQmH-herhFo z@kvKS(Ne9;81oVW5`;j9o0piNpOe^0lq404ir|HmG=oSDTp1(kaz$?R@$a`{2 z80HIcZ%5{Mp(?d0lI%SKJ!xltOJM`N=*xxe8JISD3&~PVp!Ku?$*-}RSZf5!zjD5# zY{Y~s%6J8F6HBC##X1o5OC0HdKkbl89ch0c?EpHcdO>CS18jaZTQzxuL!E)y1*k|9 z_0FltqHd0%@3Y<;R?#x%EtM|HJ40gAO8hOs`rM?JPjPat)%vwjlz<4WeLl ze^fYwUAUl#g4Y_?40dtauywLTmITL=3FlZcoEFjU zP*5{JQzaHd`;OK?HB*s?V1&B%hb4F`hL!>>zmOpN@F}aBrS1ei_G#ql=aAFbh4Y^r zA8*^=4&zZF7}R>Ws;ljmwkIwD%{_q7Fk@-&)V6_FfdcBvGRR1&@BmFd(swsJ6xK3* zTYi6GH@w)8YmXM70-2FQPpPToDJbetqV9BET~#N<|nM>n$d1LCoS!YbLnt_ zy@>9F8ucd~DnaG7nll28QXXETH=L3@^D`2k2Q$3FjeH(NMIvBMu+LB;lR{B2rbjkMU_NA+T0UIT+ITC$e-He42u8|y&7{SYX4-yZ0?=0L>xc&C{E-2}>`GEpR z5tj4m-2Eo07jl&Gf}^A^JqXltR#+wLxE;<_gx1B^HVum?3yYijevBK3WlO2(!)Z>) zLRqKH{Xo{0uP^T(kQ4P6+>jG(q)^d*Pw9>kHs^)v71BVxdWY5w|6zo-@ZZ&_UW0;L zusPAFkCnY93w6zVO@prU<;@Ro2}`6>z0YEHsX!nU$UW_Wwac3Tf+MEN6Zo_EPmoSz z02xP->YlMdSxktxxQfZ*i$&a4M2o-v0fXupl=;CZY8^%Oqv+@#Ue=&G4T@;w8ce3q zY-_>)(q0JILp{&;U`P+z^Kwg3wi%V2f40>@vUQBSI5RcWy>{*z)5N@ z8`tb$%C2pEFkM7H_}O!hZtr=i2P1k=*-DS!IyZoM&mXIGTL4R_VM^930DxYrtL|~F zRSRjjy4Rept?_Af0O+GBw%)%2cR#USuN1HThCi#7@71bO+aF#N9MCYTWaMY1W5dDL zBVRvy+u!YuDjYFeFxnh+1=2en8VQsgs7axzd~2xB)8bJM<=GaGs;$s9WwfaTlUp(a zF*Uv|7SVS&2JRdUYNQgk*5biLxTkk(d(Y8z33qm5lcPPI(CG%FZQH%B9eo3LtqoJ+ zP~|U$Aww`@TQ#T;q-WQKV;HG&*{$+KO2IW{5BUxk`)APAS;|$S{-jp9RhVk#`mZi3H3gOaFXSNHSrt5d zwZ`ZwlF7#2B+pXau}$-KrC_?ck1k$X>%Q}!ZfV;)+N6NXHUdV-gbI_rt!tW1p6>3| z-r8!LFP|&;X@k#Z^XsWa>ute9U)UC+m1>DxXVX|&jZmpmY7@hmbdiq_76^Z(7Qa;Pc`Qury~@?B zK%X`C0o52W4B3DEJ@gag0HQ%0NN??I0xkA!#LQKM6kpk3c;d( znm?!N6jTt?KwpEa-6s}~Th+N{ZCl6MHSP70<0k;n1>*#Bh?QDf`bupXUPI^!{Cl+> zJKLry?zzkF*je!e9v zpE;+f>yoRKxb3x3uJHqgKYFQ8HSWP*aq|ve-{6<3^$n<4Ut2g8Sdcj4pdhg^pdfi4 zjevq=ss4;Pl`maH)Z_~0uR%JiSwkk#pfrur6e>YbngF0|+xaXd&+&6XqfpIUo8Xvf zs&;ot#jl`(xq(&3AP|F?HH7?qb+GZ-*z62s~@TznD#m2Mj%0NBc z#m-!~r7v7EGzxz1@Qe(%{}mo<2aNZleAfr~?sCqoskY+Q@VuBY!wp0hTH$K)gts(v zL@N-J!rAPOY^l3AW%S$Z0UcF*Vko^~bwc4Q_xF1@p6IvN#fd4x3!HuNojI%DJv5{X zwtIq|URC?B^fgS)RTA|)rF z>e(GNF@K0Zys?<5`Jk9s7uaVaIzJ1&C4;6dvp1I?;Z#`J-zI*20o`WM1|KPzqcn|vr4#Ixf%2Naa=GfRT>u9EG z{j#T5>X+@44}R2s*$?=N#(vo))W2N6EC35zz1%w|`(X%B{t}r||bC z3~|^myDb%89nzpzkA3@0L1v5CDqHHO`MgmxioL!Lzq)+p+_B8wk+=-5%D&#)ml)o~ zdkl7o>m$Fl>M^VgE*&w5QAXLQD1}RK3iia%@RTxiG)rZJMOLP<_i0M}krJHAkHcP@ z%T-^YKJeoUT0@p{4o*4(>=Gf8P$IR#s?-F+z(p3ghO4us#VoPf^)yLfc)vShq+tEw zZV$~~S?nAK6Wy!~7m6uH!=IJ+Huesfdmr-F+?gzq`k5@NvLJgB5j65<8YB_wEk!yD zBS6}o#>=6ECQaP#vi9L#%<3U9}CLi zQp?!9qgt}||J;Y�f6gJDEL?jwUu2Vp@g}F=9HHUEAzz@~S-@<>7LNC$aUxHG#ow zNF^e0T*!#TY^FcvOxWdY=iu;QCyQE3M+Uqyon8|TSzH>S!D^HmeMW20Zgz&Uo4T_5 zO94iyk;ybRoyn;dYV;bZ(WSNp>}E$OyOGxs`33SY@;LJ8ZzC;O1erv(V5^XBt#|LnPVsdT|9XzdwD^rAu`Y4am4xGHx}CkfZWav0KTu7X8U zyCH%4)fM8faW*UDy#qw%m${^toK0L#C~BuwhHv6K_$5VUN;ESCz}z&v(M^ zTgbw|wpl24T0ou%MO6!gz5;*3$%?d-$B_wS>-mxH?!-ir^Ns;Cn?Mpy@RDp8m?&)B zDoC;eaAF$@X)v)1g>tB5E@F~OWK~PzWoY7+ z8;xt?!qcAe9V2Ys3;R_>Vu8@**P49>NkjV!$GlhmFKu4}A4hTJ-Bmr^Gu=H`&(U-A z-1mJ*nvq6_HIi&gzGPYQAs;dr9~f+7$uh>^n9KGDAshx42;nea67q2buqD}y!2y4Z zA)65Af*;u=A8fLld;}7+A%w6C)@ZAGW~8w!bA(+6yQkaIOx3IR{_m()?`^9h(1oc< zo}9}0lj2e;<^mal)j`NlAvZBGf$1xoO?ATncnq0Qa_E;b?j^d*Qiw;}h}My0n-)hX zqYg(@gVQd^Oth^_8^w&(l<6`cdXM@SF;eWe_4FBB!?Bd2ic1ROO~vZUf1jUtzmK>l zBZJ$k2*m1q7bK9+bM~>B$q+YQm<%N@Q^T2p&i7jLK{6G_Rnp2W9R|N9mN2N5Wm;8{(B>92D)ohk_)(?m z*GCsXJoz_}6yHwVG;+z#4ou&*Hv}0yhShv}Xc>5XiJIp@_|Euh?_N zp3OL&H_zVOc|+IEu8yE}EhhZtu1-w&{zYgRI*&FllCpG55?+dax0Y+SU`a12xvJoI znJYglk9TH9{oL&9KL4#u{tXT8l(7<~Q0WS$&`%&KyPUXm#5rrRw8?aN z&E@LL2L>)zlVUe!i}p0$YZ~#3eR!)$y>XX0>KBH) z>a8b4`K$5i5AqeFhPbTk6LG=P(GMqD1dTpEFUV1rdF49FIA!jfQyQh}CCF)(_7K#c zDBY{!qVL+B2h+4=YPuGzcDEs+g5MN(qhy86nQ4o`JVVDncu!N~_j z#atf+D;HlzYX*uAK$`9j;ov+gLKanb1 zGT={IS@IC2<^}cHU*bxGl+vgrg9ewH#{0b#!z;&!EEaf&l4mFxt3mVfPJB$ghUPVg zxVLOS+U&YSET)YVU`L*_Ya7&B8QIvVZ7#rQUX>@cZS!j9X;Yl0tFbWg(@}Jdp=(}^ z-!y7TlYgFRA+xYjUN9Pso>QApDos$SQ&6hBC+0QyBWL*K))IYad8K|q%jN@T;TEr#M2sgh^$9&!xP#z(9#SEdFGqb96k0k)6%tgO{sfWvuB zUTt;Gv#|Pk>;luAK2_(ahas2|?=hfj|%h>Zo+P$rNuP< zBwT0Z6$-&`K{J~}5b!K>pc#SXGQ4 z7wsgYQG)K`DZPn8x&*q_29;cr6cl4`IG*PTy^~ePp7sn`wyB3}4^unKj%8`5 z=}cu#lLuj@tGyE6%FE$%V<|ccpLgfiG&$;0F~OI@ouybIV6*!LMPylR+p3)9RmSAE z*=l``j6?LBIr77<;ocZ0c+D9E@(NB)YH5}xN%U9oZLd4kyJfB;8@0Hi_xXI5NFCzn z2KYT_rEEls$c&f?e*qlJt6Ag2Kf!f7zpWV}A4R(0B;Hq!g&vphl=hX+sn{u1Bb{7~ zs(>0@S`led&`MGYPk-*VyW%~A9r{SnFL0$YOA3`MZEu*>-0aN;lnf&SWJYJ`hU`HNc<3;np{r9B`p z|LDj%#gLg}OhKF2;D!HIqJB1>N-x>kjMgOTb_1ilv=s8Bpi0Fw{=5h;? z8b6UB+KwcU4SRS@4<2vMzOWVam-oD9G~z#P8YHN@O+-Z@q3QxcqtQ(dl{ zIe89KFrKqSQ>QTDEuvU_Oyg|`hudo12-;aOkewTUeq?vt1|1+Bz|0RYUS4 zr^6m|Xc#4H^7`y*v~G9a-*shAG}O5+XKHBCIU`oYOE&m2c!jnSSt2>6(-CT+0FLDO zNWi1`X6g#hqr#(+?Y3d{cF7q#Q%e6DuQ@f94n7%F5oXF=%uZz)fma|Ur)am1H)-hA zYK>YozzBZ3-EU&r)#$d(Qn#euQe$OlNS!oTR1~Yw*-c4DBsii56QlPiWyp)+rt z$+ebihnCzDtrTIE3ITm)9!&PUG#K1iWfo_w zq3$cY7jBv3iU&oz-(hk1+t<|FYlRb>`kh$F=rFLcpwVeyodNG9R=%#r6S1ws~4ihL%KfUY$tO&aZnzDx+QF4JeIb zaf2WLnbD@Tm{^?&+Y77U%kU9|swpCQB&5S)Y$G_tIvT>phfL}t(IL+^(=at$%7LB2 z({oCY1wK6y13rRe)ow!^5{gLYDKt|Pgj=4`lTFg$Fk zqF1@7W%5iZ9pO{hZ~8r6;?fSdP{|{Z@r*HG!z>vLS!$e><@blJHIw}5O{P4lb(yZ^ zW=lF*V`*NJHeQ%Vr{6=P8vWOhO4LNm9uwzl>CKh7GjEEoc@l(>KCT!}%!6Bc? zHCUOlPC}@1QoyH7c4~51(hlK@(6&HH(i{kwZKatcy{Yxt=Dr%!IWwq7BYy0>5ldr* zL=Nf26KJe@q!~RY2m?5Vck_{r!myS@nj!DEN>94$-F;Ve`@+3@ z`WEl%4*nBoOL(10hlUdq-ljJ4!@0w~iO|Bqd2@FxiG>z!U+8U$SxnL9K%gmNT8Jfn z75ET7k4A(o!1@tqip2ypN*17jFtVwnQ$}GUP#n}=S#1QGW;O!PVdF^}o^J%UUescV z1dvZs<_m@1X-avM8(Q+K8?2HMC}us545BnP0_P1ci7~vD|1;jo!pX7lZ8;JHMA<|t zxnyf6wgElidwu?r4Txp!Ey-pNjC!nu2FXNTX=VR!aLA)EItGoyM49RX8KDXN0wa?v zbS^Z?X2c5=8f7an=QxoAcO7-bTrs7k0G=2ll#$!hxX6W0uym!f2Q4|uKg1s7g@cr2 zvPz|Xu;n^KjnJx{$t?0NWan=5lr33@3{s&v7w0^)(oQ8Bt%7D8k$SH;7Hpo=6!PU) zW}OW&J0dZyplC5vYY(_1EnO{Pa!;}|W#QBsrQM-7t0@hyHrq^Aqbb~$jkU*33Qon@ zoO+W=rqn8J2D8;DgxVxa>I8U&Iz-eEu~EY74B{2h>NT8md+<>U_o#k*^a**1^G{1B z8%~X%db7$H&rCCzs*I|VRgRy5R}?CCkhB3@ubWyog%TRzPY!?#F!+MXTlLny#w*TWQJGl?p!M6 zo7I;C-I(i1^jGoc^$k7`SW&r`PMpA3;H_xHK_We(b2|%QWK8c?xRFM98rgSlx5_%G z8YTuyIEZ5sRbfAT6s|go6$qRgxq@Ld99Bj@f;-N}q8W#Zd_+!hrl7-tyk>H%Om->B z3VzA}<&4%W6l1uyB=)<(>w;N}PL)@D=hCG};bg*|RQpT1X}h zaco|5ZiBALppdt;7+SFn%F7Me!DNep6hnjJaAg9e^4XK6MQ4cFO`lQl!jr3dOWtpn zB=jDVvnB(okXFRrtZCG}CiCummELvTH0gpkJ*HUJv( zyh?@^=DQG@>{=Q!I=*-cfDRlur8J^|WvdRp3Tdf>Y3ewk1s@)B7|@IG6kssVF>e0b zmhBqvqtp%r&L`3T)zL3-PTWdA)#egu7R;bhJb^}1t|;!*86nbeFO8{q8I8=#VI$9K zUi<;lArxse+IWT4>Qd=-Iu2;OesnL_+kB$iSp0z2+ORGs$dghIHAg531I|u?1{#kZ~G&bz%z;8-p`{prD8$qMHj6g%vo|!gmWf7d?Tpe<8w`|AQU`*y4kg&csPJYD@*rMU>(S7ZdiB{lf9Z_1KTwCS z*m~JJ@d5Y_2qEe-BMiBaXgysjp+8CR@3bvlnr~aymp?d=Z|z^3Z^azCrFaP1sDp%; z@VrP^!N*8lX~BOIG(p0hqedrpfM`u3bLWjSZ%8tOOh}zcFqBIUCrF7oOL7lQOP8zF;a4dGuRU<+9_Fy0n37^7`5M4phtA2-Wn_dZm7`smT(;X_&h;hvnK`=35Mx@OJj z;ivkMokhx32E1PD(RkjK#^c2u*M$+IhIk%sA#?-D-Gy#kjFuScMZ|@IH|It z5hVn=efI1Qnli`zj-Z)?v37*a@1t*jx*s91Os)~M4}&E~kAmKZkbFo6UjI{vM<*r_ z2c1P6bOzGowZs6rUx)5|yPWUD~wr$%s&e*o+jBVREV?Sftwr$(C{TuE4-+lMP ze%VT=Dyiypa;LjeoxU%)E_}HT0VM(|$KmfZmekNK#T7#}m*CHh|4PHTSn(#X<<=yG z@g?=idL9NQS_2svMH5s;sFrF`kLcC_#xPo0AZpc~u+aO!?gm@fJiSdnFuOF@Q5agX zKc*~13omm#BwD0bSe8fkBa$h7H7^e51PZi*ukvC^m&6Rp;sz^XzZiENCIxRS&$t|< zJ1}-zTdSryt1Vr5r8O|wZUuaA7j`=RJkWa>+CLyaQm+kCqHEh~?ALpiBQ<|qk74X$ z8qgBKPmxhI0u*9Ux)NyvE+uR$%bO$Y4oi#eu$NCIj@b16g)VUxE(Xg>Q-O~*a_XQ)!rsV_}8bt>xp>sX^lVh3kb2dlkvj3FEqep<}1 z(^Pn9EnQw~FX0&EgEZ}dUjNRe1iL#`>6IGlwIRaKpYMj^ZuaI7qR2WMyd7c9)2}wF z{d8t(M-@OTfw31>8&*hw*lZZn4&;WPzmkH(MmBrYwCKf_?8Fo=<#YlfF<&%;mZT$) zLdlvienvD7X@2eLce}NZ{%i%|ZI+cKL%+6`dTrhG!TY=5S}2l{?dD$1?;#b_Z7EjcD&u~&m&jXd!Mbs6Rn33X(&o}MZHj7rB3hWu zULXv`g)dO}YqvysU>U2W>zB5L<&d!jVt?mNU*EGGmBB$jQ%;<%-35LrI~pRr!Vraw ze1v{1j$&sEZ1ByhI?Dg_6u5%kbb3MbM@hggBoLHupgzgv;c9DZsT>C2YMw0L9@P0k_@B4)o zPWXP+-!)vO65<&CRRf;W%sheR5q^#Z{jCj2lRXBEf*I;d>$JrD$j!3l9bP<-bd706+!TVHqg}ZEl zIsxZ6MI@$D8!2E)eyKWLqf%9J7=Uei*|+mVrKcgx>Atd3p|;NtAf*E_wtbujnLX?)eW!x`g1h{o^9|k|NZma z7!1o}&+YqhlIgu&kN)w&_;rDzP~}e;TTe_oNFgJyjg1)7K~8(vp`=qD3D?51(-ohh zw2l=P49ZyqPO~y;>ETohI*UmJTLnZNy|*Z z=S4vA$vP5p8W?Lqf;AVf@!-yeZxtlxgBnLHoVH35{Z_aF-qHya7AYqTTttB@w69){ zE@Nyct^(>~2>|eZT-X~dd5nLu%_lf)414eD@86P@hFoJ{Z1huq|GEf6oEF+m*`{- zbJtK$B!)Z=^0ul(OjQCCFx%MBhD)63O{#Ojo3wPyB&=!-{$^A4=Nk_qY7k-?K8yc& zSSxK6PHzh+${v^ykRspRm%5=?&p3DD@dwV64}|KX z3WqzC%;eK=#T?yqh`e|$S6V}_=pk}1$Av4ernF)pqaMWagiA+D+f=KYd#PC8U@+mY^ILE^Vfs^t|Rgi)Os{0W1r)0 zIu5|!POwtss5o(KRd%U0C@8Gvyt6P=rw!V!XdNx|bb@v^`ORA|P>o1RLm;^pf0rjJ zL{YItX$CY;=aw{0dndNEKVj|0(^6+ml7%jYqt8r`J}p><1~VE=RHsBMx(`=>*?QZn z8vkn!Yy7%)B)`#wIj-_s=~Hv&*l8uU)p9W0^xdmnL)tb%Bo$dgd+bQ^tG%Cf4rjc$ zZ_hNs2%jB~l&alhdF%+-)$a7N*SKtU=c|7PwuzR`hS()06m7g2^|QtC`{A2@bd!3l zif5bMF!m8UrK3$RW*Miu7WvQ2Gu>NYvG(g>;-vqCpRy*q+t?GdMT%XUf!%+xNX5Hd zIxe`S%W1MmrL%&yI#p&uoOlYS&FMwVy*Agys=tODvb7n<1|en!!+Es3K&GiAUV*oD zgaR{2zH>JZ$@)Z`FCtmKSLH9%+O%Dg`h#Vfes`Xwib8Nkkutw&?u&7HE{mk*yz;<5 zJ@iUdjgfz*W4{=-d5|@(iyPCl6kgl!ZbRZn+yn#b+Iw(^b$PRe+U@YK4N=k;Ygh5B zRXGwjaU__Ht$cZf`fIEnmJUZgX0O$v$vsf zW1O~#2+%?2JAxp92mkDWH;9Ggz%!rM5Q@=xTdU^yxY$4gTkXoz`}A44!NlaV@yeqt-rysP_f42DB;3s3>6Cm7o?>o)l_HotEL24`271(7rykZvcL zJwo^&EMK{0&V9x=Ji>5~n}WcFQ8-x&$sfc}pt1KPVxVS_aG+mQ5Gn~ECInLB zd{ORZItUY`J~j{v$Tpw+sc=ssv^{ThfFHu((@-aQ>Jiec12Wta`jkMfKf`3@1eHGZ zW}Q16lN*5#Y1N+v6DHcChn)D` z&^uT;=$6|2l=()gFfi%Dsj)I{$Mr1Cj9sRqzIkuu6GOME8;Jx-i$j@T`i(`hHSO2q z`H&L{@LP}Imnd`E0(ymKU{|_*)fIH>H1Q05gtCjwv;FQ!fX}~hKLI$c+6k$OF;J1{ zvk#X4N=0XFLUYtIsAkhtk;A}*Na$Cjm*W-H=0h*O6o0m(|P1 zX39FL|EU(q*g&JCeI%m)Yk9rqjXvo}?m5|r_+kiylSsRQ9N;~BJ`9}8>`%&W8@DWjm}pI&w{ zex>AGkNwHJ@Fm81)RIpV}@}FQ?`w(YV~aX8ZKJK}5qiwA;i7^xf3o zedPV0f_2S{fShmf#`~Ru%G!fv59B9NhtbbOpl`RI%e6tLz0{t$xIy-$#^nNz4|Zwo zO7+){m_ct9aqIg|VDqcqPJHK8&hvKK5m^0w!OP7v{8YtRs zf>xrCZ8%e;_%YmJV*q6;6y&<8EdhQ+Y@u!zLNGqBrJHRsd!6OOl&Z%l6kkR5f`Q}3JZjaT1Ba=pohuJH$w+sH%T znAS0QVPTYb({+pla?)|fsl*Ud!wE$gYWl_Fdoz6lnRF0xXc?RVb*j1ahduA(<~t!B zK#Kt{Jw1I>aWUh}N3Qd2iix?WWv^Qg=VOt-(=GRDj`z&Z>+VeZWIMrK3k%MNR7ZCu zmL*Tm(voQdBKtD`9KZ5&979^0x7v5z&I1ug(3afwm3Jg&cjkAGIPX%u>&8L!TN|&`oUQK<`^lfT6Ivc6_d#g?5 zb^5hgc#HbeGEbJCH?OXAwt5XmgPVK3crEmj=EN0EHCL1O=fp}Ker!y~>o+d2KIag>U~fTDiS58$|Gb-lTR}z2yANW{!kN>@ z9YkVUwwWK0Ay+xMX(8s~*9H%Aa21@VGnX6)#A>F76{A`uQ9_xo7kj&RTk+yy8=2v3 ziP){BP47kc@J?)Iw0QRJ;J#^^{gEiaT@Oglk3cW5-mgE~j>4CU4p)1tgduH--e15v z(%+Yel{^Y{$?6$f7%i%dRw22PCRFq-{l{G35-zjPznm~h%AP1Pbt|=B_s9P@d*$}e zWTJ*;U-xnCK)zOTeB*jTzdv-dyhDcCCE8va5YxVc-$rEo8F3G`=!;3u$FIwKjAk9Y zHfvns4#88Z3&w65E7=fox6d1C%Jw4%J8^N`^jcjnpS^>oy=$;7u`3$^)~y`&>1Fde zmn}ib6El9IzJcFl`cZ+0TjdNXYW|{9`%CUQ@}GGcbn2)puS_B8*f}=2d80_EwlYx2Gw^k`L*5XI zjaRH#7j~{C+l$1>YJ+mFD!Wg6xQgTN4dHLSWI^Jd=L$9JZU|ic^vYGd0Zcr<+RB

q%QDxiZ5@vtOx4IguGE-dgc`HbtDoF!n69>Tbg{YEP`RLh`C7gD7zb$>k<&?OZ z($}99Z|7G<)3mfIV*fY-5V*}bV7y(wXF%3{Vh{xI5*E;u;A(V;bY)`33Y%J|kEkUa1sT&)VS4i1n~Td0 z<>ft_D!1`JCJwF?O6s=ha4uKZE|>g53To!X=jmbhB}VPy_IDjxN!q3A*!bGPi$zG* zSK@_o^c8gUG>aF_MlsW6?A9hQjY6j7Hx*QNWe($_FAqd-kvCToPb6QCD3H9IL;by% zELsy$IS5@Sd7!U%{de&PVY*CPAE%-smht*aWgSNx0yy8g`ANUCf}&cMu*9&23bVKl znS34Z%Kycm-iUL`S=Ml~ zRagq!7xWpt9#neDM+)DUc3b8`^DR-C%Q_-i^Bv`v9MT=J`-;Bq0KJgU-K68vZh@WSPfab>7zHSJd*pjHtTt2B1~MZ zu&h#HskG9XjeVqcaw_(Po#Q2D#;hhvCW>*>n3g~aA4_n~c{)E(1YUzY12uO0L-zwN zA3qh}O-1b&#LgAlHL~Iw+i6wn2YHAY9-leUTB|p(}U&M{|jDb)L**X5x zc_JJZEGBP2g_^xXT&P_z!2E%Fh)cX3kzJgfpN-s=ur2$wh-u#NOyt=q7W;SD(RUAn zA0Ok}aZcv@(spfmFXa^jC*zqm((6Is4?Qx*Sm5I5uRjSX;;|z=fzec7EIcldM99CSgTphCfN*;uu!z@WwkNs?$5VC1*7+!ZXyw> zU(;Oo!lO=+kO6nmkh3i}$yo+ZkuEy2vaVIB&cJDL9OImO;J|_r^mtEqQz`XL*6$nd z7rI07jZ7mPXlyhsby2Bjm82Uhdr=!P>@_daq5`C@kb8XFb0Bz>*2>q$W}iausfNM0 z(+@hyQF!u7NcHS>40&uzpHdaWQ@|b}sHW%CS~{Rn|ME|~a%FDU0nL+1V)OhIE9T92 z?+k5rzxC`jq9kX`X-nck0d`c3AZQPpc$RBOFz~Z6!us}dipuh~-NqX+;lJInlU=fR z%m}azp~u6#cm1VD&1YYsWlVU8wi1EA4G7@(&CTN0crZ9Y5Z0GzJa|TgR6lLcxqT2b zPVRS`T&!g^C$}&(k= zcevFL3DRn@Ox(%+1{JW?RTlTW?ZXGH<7ZM{iHmdg zi8ZfW(a#)7%j%~$5QLO74I@X-q|yNzW59=ndQB>`bkbUd0Bin&q@{L++9G0Q6-r}P z*Ex`nM!<_kB)bplzBO2&J)`JX4TE*Mc47USsx&If zl^V$MZ?En_<5oM{Y6ztFEs%nL%72Q` zdS8l>jHe!aMs2lZpO}NBN|b72vimQG-m$=pF`{VZ&!8Lo&%J}ZP^sf%ye>of^QgF` zzVc+k(acWENlo2T=S@u)4peY#f_hYxcfi`4KWw)$nzV0bZIEw6s4dI)lDf3%Qk?+T zhPAdFFwp;9POE(n=WEyy#9g-+657|C!+kZhH+Iz1YtFN-aICBh@@=<|p1W}AK4E6Z zrfV)HtM%T-`R$7IH@D&h&#DI@ykUW&m?a?29{LB=y?40^-*8orY++=lLT#P=O6s>Y zYSWqU7*tTkZP3%-|M2{PnVn~9Y~$P5`1LPzRkqgreVkb#ncq4vH@D)lBWl_uu-s`+ z*L&pQxF(*D7Uet=%8X0jqAYLikX6oHDG^g%JSVN;)oYq0XUS1F%GoLvyCj1mWyG8f zRUkYXz;1~|VI}Mmvf>)hvt_ChV-8+2cM&hNyT%k`#~N~K{wz&iH#F2`C&vrF*}!?R zbB|drPQ#<^_ZyMmZGyJtS5j5A1|}e+7Ar$LaoN3OS?b9Bqn@48qK#X*kz_S>^;yrQgqp)xNn37&w2KC!P_;>+mcK- z+o+at*(_d|ny9_~#(uAtus(Zjzn$FWZuV)#A|3haU>qy2w&Ch|*}OVtBBAB)8d+&^ zt<|kno63`Mdb{I(Y<_snz#{B;2pf6AL~AW*mIAtQsGSK$D<0g`!kM@*sN9C}_g~DK zh+4!xAKMtIPmGTgW9NfduHclRs*r$Cug3g_`uAT?YaKB$9a^#tZQ3*UX0Ptl3f&Ke zoyFb7v*(OvL@M^3$5fC)7~Kz^-ms=Vj6{^ixgG`pIMxv3N+!rTipwr!hoIG?Dm&+Y zLYL8{EojMKbaro3Uv%Nvaf)R)B#B$WnlV*HLnQqZJNWw$3113Nz3@9J=#hw$hR-txH2BIVRc2+2M8y6l7MN;0ZdrnRooL;OnnkPwq zuDYF^6IOG|?;J?-)y#J|%bK*cy-z?B1^21dw7XiGk)HFqmn!I<@4N;JX{>}@DW#8J zHYm#&MK&aT-<3LB78dq~{zD;_Q8xfN<(t)gLS#C|NP5T1(TiSQJ+ekKnc%9(Hdc>Y z%=c9%Ns<gN2vvvb328f&2Udoqq4iz|+kjVBmt% zb(p+6%EsOW^-deBSN)<>!0`(}gZ(CeYe@{jO2-IrZIA`%{}TxO1a7 zy=riJ-JbP}D1r^tycKdC-CCe5YmLWgoG<=k^=cU@y3bQRnnW-5Z9@A4ie7x;!+kbVEX836t5^=PgLM5+!iziWbLH7K0NdC-oRpAxY`n|-uq0ww_ja|K>>An%z%qT7{ z|ItgdGw}nSY)Bug;E9^=lxx5J6mOgzEv_!f#0_UJJ*B3fwn}CWQ3cOFj1@ill2NIR zDIWu8SAL=N&)<@|vT@bk>b%&WLPc8a&Gig&^K$h&#?X7^JkL_j=T>t`3FA9@TQ=$+ zzxeq*jm`9@4Wt&5H(Vs}Vj)kl#uS-;sIkMhi3oly{kd|M_Q}sEuS-uu^Yba&eA1weaYA@b)~bl7_0^pk4AQy zY*S92*tDD-;gocZ;22f40H)4gHJ0!iW0Mn>mJ;QmxgVY1aYRBs^rKKiZxt=gT*!8D z#z+IlQbx60v4XZ#`!|fn=*a6iBjZ1O{ST7X30WE&iRa|p8?@^n?myHG{4rOuj0-IM zWneExW~h!eu-pAfP=xAdF76VL5+s?|H8*E-D`ONNlL4x zRcUl&h*$UqCuz9gksUe^s!4wc3G$D*S5=;oEqWI;sp`I?)vZBHWkhCVgsdxmhx1G{ zo{a3$q4vPcIS@b%flOn-Ue{vBxq7rN)g#TLJ6S537J56lIN8(cqS9=9jN))~1|Q3( z3lpzabUkOMz{+fjzld8%jONh)N~$N1*^s-nuH;?Y+1@tQ%Uj!5`Zpq1bqyUWPsDf~ z_uQ?3CrKg?r)tQvFANqis|NMnIKqXgl)N&hjRHHHj#39u4C9Q=yhoyhNKWj29ZZ}=`(EI3o-pKWdf5N$GhEIMXXf#nc_M7Rv8kP z1*TXz0lD19%pf~*qhetKyW5W5vJj!x7h)5bXOq+|;JPy51bWVwQ<+siq}kr?pr-V0 zxcQ7USiI)xCbF1A@HtlNk}e^mQf4}W=fm_Klq8!BY8AA>n1*@4;AV)>0~3yN>{Gka z`RATjJ6oO>n#b37*$vW9Q~)(eGS)>`DwndJ6&bYGC~x|LXxG9K(EFpuDMR`g&7NjnS`1%kS5tP-`JdS*v?d6d&O>+16HUfvNl zr@{4LkXx7`f(I_k$4}_tO1g}&;-~pK*%8N~{B;O_`m>F@m0C>57Hf9y{l3Ju@nq4M z|8Dr0NuQ}BV3TdvUHY8EQ9(cVqS#sHTlIDSGRjHKfxrGVauPO8cv~=*=fcgJo-)I($ZP_5t$=7%jvXaeI@tO zv+ZuY8|SpNJJ-AQ(-^3Lm^D7Xx3=dSY=H0MQ$`h6gX3k`=JR&^3o85XWu1tH<`gv!~Y5-K=k71;N~J!~fzo)32q84qtd%{Bh3AH+LBo z17TG2K+w;AG&Y!_pP=Rl-C}`!zgXZEG8sGFF+tgYTwGc`+;QiO!D;^gs%3xvuS2A{ z@H9xq-s1dJyP$7!=%U6It@agug2@M~~`mG60Ei&%G zlG~^__hb>~Kf(rI>Uo57V;}9%3KJTE>J>?oX&>$2gVbpcdW>%+mdUhtw$P%L@OQQt zQ8&_Dv-suMwcccAvv=TXI?}SpV|Sqzl+Sb49SJE<;se=o4(Qq>bXG^MLTq#T|(-Li210 z==M^;hHgL_^1`w&TR=efex7kJ1037U+QcEN=8D z$>$Si8e$xq#Y+H<@*)xhm2j{GNdDy)ugNBhOD6B88=Qygr#8xHBe7$Ldv?&NjMoWN4Lc)-e={S(r{pV+ zpkvy>vN`pXa;!FkqNte#6G9`-qL5^?EGYe3kit#X|I#K)2B5eW_tYDLqL9b1G(xwu zV4vEgU0FjhV0dbPx*m%3N%derRcw&-I^K{)l{)6?<7Yso%d&@RIQED`t?W;&>(gO4 zu|mEG%m0BJCy%HB?>sRXc^)`t?mv`9S)Yl|;EsJn{#%YucIsI6DkAX5htnYQxTsld zVSpxKkma`c{Q;wjv0sN}WcHj~tV*bEo@$=GWviGDCwA1cM;^y%^{(0+2)r~OREs_| zGR6>5Q#V{Df)9C*O!mLHyD!KY)clRv{>W}t=y$EA;8vXei_H8`hzQrq%T<$0gSG!y zL*lfIvH_)ETHQxo&s%HUvzz}Zw{WzW&A^~23QAv{?@~h0!YI87%Ayf~%afD^dFYe- zXNd(5(Ikd6!p)(q`O(k8q+HhX##Gn!| zLAK)vd!IJWv(-P62W2M(IS{N|FpzjM{8xn{<(xQf(oDaEVI&q3ce?iYB?{5DgJO^V zUhf?_#@;^)SCc}Oi}>isBV8aHO?1?jN$dlqy;D{X=MP~nSne!+(t!1uh6df1V3az> zHW(+F^XQkr@;Sr~ky6OJpIiPFH@6Qa=1jdh+q?uo!@;@OQ-3xH z-`TBsXg0WwAkl<@fPxEL2D)&Is+T$E3*99lMG;^)>J)Wf@0a?z3?=v@kEH)NI)#;< zUm9uqz^AX-8lR&rI1}7L`)IAoC)iD5iI*P;3-;elZKE!ntJ;+&{S?4`{ul+bw)bIyV$NujrXUvL#9MH zi{kt&Z2v96CA5s@xuCs#`SAOQGbY=F^T{J7QjI-xBA7d_A`N~5#vFOzk8Lpay=uaw z;y}!e<6(t9L_X*3EW80t;qRkgi1AMzsk6X2i*_WWTP${sm|QsI+`1k3FG-5_|DJZ` zn4Gs_QQ<%xvIZ9ve!okQ9#y*hbh-GS$W^3MyHtt@YMcrENc$9&z4~hkAg66#LR>s@ z;t|kXjd4WTBny|PreX83iMj~rrRDlT5HjeKd58%`iYqo9Rm_yT?`^%qa*t6C^A=+%^lDOxwph#7Sl%^DbgCry4rI4teRU+dhyTudQrLXp@%ZCSBhN7 zQsOTn=aNpu{Rs54C60oNu=Q-Xv-ai;=|&JZW;sS zRGnFXn3Ak-nFhz2g6Za0Q5tf<-s+WIMwO#Phkp+XxP=&uo!$38UG zX;IAmsUbQjqLhe|1z!5Oi^^kY@F>5_r~lGH!}5=+5MeYjv<(tUEtLlum)!}K*2 zwscMX=#o4$o(y96qV-}`l%~M;(3QjcBbZL00aN2BnSIHy81gl^RCWi5QlJsAr<5#X zbEC(izcq!jPlgckvDa}BngnK^8G3M(>g!`HWyy!QQH;RC@}a;oG^ryfF=3eN$K+e^ zOxU)q*MwsJEbrXD@ThS7@Qc&fNws=r6EGU|V`T zW^eP!eq$}avvFb0+K8)1YBsBOCMfC&i6WIk5+cEngA`9AMc;QJw@Ad7B~MBZl{G8x zZgDd;C9k8(!VI`*PG2rQ@}cQ!s?)|DpCU(1XER{$!Gr$&b*xFdr0f3WamNMT?->KT zX*gjQwM;MsW1H6X9~jX^LzQu&J~uYjKcQeTmo62n!o`Z`Nu#~|RWxO5#+o$AYfo1T zl4g=Z!d#PQAtyU=K>T*D52l}DkYMq%Nrc?f6%>#-19E9a>@aFLFiC0SFVRLPZow8!U%2QK!#Qw%EP>a#gkWF^EOMO zjr3sQ>(ux9E3ay(2L!nkjyjo69s$;<1c$n+6)}>wB7WA)nz03xrmn_`VAVHz+V;U?>Vjzjj@tj6@(dOzGmd zBwh5iF$Z>FUCLNUI{+st*o4L*>T<3@#i!}pjeOrCRa8_S@w8gPh@N4Fl~d5(4&c4- zzz5+r@f@=$?O2HZ^d}37^y$!pAan483VCdS1xT6^DLk~qJhlF@#F5yZJfIFEptO)n zmNLyqSGAj{GHXyIEo<>EkwUhLjtrAA2)VQ0i3WH|R0_0FDxxqXt+F5?+R%`j7veBC z26@!_RF;n~i4SQS*9CPP&JCL(Z`6bHgf3JtS-;PCK-MMuH0&{;#NqWE4od}2H5uQR zfdc5tUgYa)X{hgM=0;JF_t(KAjKU4>_4&2_cv~f&@wJgRyfmDmMWIoVbctV}J1QH? zuyfSCUMGC?qKeywv@DJ#hcraq9$#ZDU&AExaV$yEV=&d~N(Xe>}3> z@2m$g1D}w95K4&g%lPYXk?AT{1bed=l;vtk@%4=AyuzE;fso|$Szh)A1|AVXu2BTF z(DzGmZ4oic^)>p%Sr52uXZ~iNS#Wcee}#WdVJ0I$UAPEmc6%n@PZ{jXb+C8md_U3= zuF&1chX+hcFb_`7Tw$A=Z~4}|Q)Ey@lL#~)+JYbobp}8b)8~2krAw>1bj1_%2R&<7 zU^(}&xVG0DZ6jlV@>Bz%jL{T|{Lu8;#)Sc{G3^gmw@cmKUBTVmfkM3P{{FDR?S5#` zCnI0qTiiQP@S^@cvH1TfBO@029`W_xLSaPN1B223yZpQom{El^Bj52_i?rrFep&dj z0~gR3{@{{T@wUw=kUf;CqAle|F*f&T$6T^@$W+%ZAN)r$U%aw}77+JM=*Z@g- zK0^K{Fn3%33^1v!-o^hjj50}L<+dRz{8hcf&IGwi`+5`+RMfSzaoJ!S^HQwF@d zFJ-gI`KGQY#w`plvATdi(``cmAb}sz40C{|p!3{6hTl-r%1+58HUaGfOmRRb_yL{>Or=2oQxBofhtbdv>3?p~3l7)?nYslzX4{^MhQ2RxLg~tlrx$1wuesixQM0pm+f-Tva-UjzIxx@#(n&kqvAR9 zkoD-9`{?H{aBAsg0yNbQmXh$*ZL11eA)u10U zaY5>vf&}mboiGJ>V)teJUIa9S33PG^4DT;P-6Zk14#Ey-Ix60)FQhp8szEmVHs3!norgxI10wM&8p zK4A%XC+qWpHv9%P^aE#_9M7e~F~K$kdS^9-a#-ZU+QoXZgUFtA#llNP!5GO4Y>=e^ zH&Dh7{>Nld|7;Y0VT|n((jPBnEu`EuHe_^1{#`>f(n)TB6ji&tLU9Q0AcENyB_xEU z%ACN1B^%OTfx!>*o^%#_)^mn3Ci+^caJ$p(R-KAfhM_S2hK3NB#nQydf1Cs`bW zFKj17XOln9OtQcI2hlwr2~86!m>ykbScwEsjSR3<4H3WGgTciOePimY5$A%80m(4R z&#w32$8-bEzWc6^0V<6bIyX~f5()>RU_y;a-CDz<#o%x&i(48aBbIH@j7BO$motQz zn;;^FYcS#-VuKJi_O%I$WP<6{VJ4KELlC3a1jfP6SJIBag;voqs>k3d&Xgyn8@S`e z^^S(X+^Mg3z@Pw-(9y{%;lhfWm+=^a01N0twgKsg3S*=Bpco27hQtRn)3FTXVZmhm zt#0f?V*J;KE;(a_oF2_mCMG5}l(V^S$ zP$Q+gg1EIzl}@?`Ib%#aSQ6fM+m{+KEP#74fR17T@P~kdqyI%WlQR8B{SEyW-3qwx z%?ASVuY*;+Hl27Kl8HAnL<7#>bheOR0gwH=C9Eh;2d*rWwRl<3r6Sm3r+Th*scSRk zx1e0dUmb*^-K110F@OU))C$c;a+4`B+lvPB5_-b@d4v)gYcQEfnUyKbh});6{uVb> z@>S?pambg|l_^M!E(oo^L=zmJ3nb}Jz@RM31Q}PAxDkg*V$e|hu!mN^QPv0&GZvgX zk#Nu-;jX*_=AFdBeISBL7-0m;@UxNvX6(NS;Pi&7G~j=gw1LiflQlG9lOmbYJBSLR zL9h>`#45y=Nhsh=51NL1(_Ivupa$gNodU=y_Jada!}!mgnEQ?2-N~7g0$H@=1AirN zL{Y9nb~P$phGc0bR3EhIj2o0n#9<}vb?l%r{`AnbL9hq}94Vbvm(aqyqI4Z0Q3O&? z37L}2+z#N8(Nw6VZZ07czCBk){Mu@uk}-I_hLgmlrvQtFv_41J+ax5mRH<2l84t zACKWW?T_^nEJ+p<)bNpA%=rkx=2dZUg z#sS>U&V7aBmP6>RNg=Ha`g(k#ZQAg5VC%j-4KrpXj83&Dj3aL%1MqePlSn9g2M}#P zvI_Mhi2_vzc~v2x7yoc5hr$+&RxPSptR%PsO~@8PSoEZnR+`UYT&tR7s%?#n58(><-+r2ihP86pvyjTQ58+vqeIX=5nxrgkQtyH+Nu6P znNuPcF~b<(zsvf3d*0CHu4+qecHjwI@20OWf>3~cJ3H3c6h~_B35`Bi>Ln}6#K6>HERom)baVm%qa+?l zPEB_4-Xmd!WkZnSM%H;Y)fj%Ty&`LYxqY=5y7U%QWrr({_ex$*?ox7rHW(8peeF8S zYb!J(r(wz0^Ld)hAk9vmw$-9Rg8?))$QQ!c4cJgRa$&k0uyhn%%!**F_h`V~=#YP{ z0~yOir6^R50Bk6)13m&Ha_EV@d0N=;twB+5QS>4^TN zagI{tF()pCNi=blSkU{WBN!SbSQvm<`Q)Gb+dl_J3NUCx0A#i4fMSR;+Eq3y_{#Fp zk?naTP{f$DivN6RP0i|sJsuK&u9YIj5A+shq^BcD2A~Qm(hf^0NlL#2H-VfOI&O#s z&X&qRnXzHLF#UFoB62&v60O8JysmQ^b(k5LCQ}b)$v8wF4kri6q8Lx7j*E^99|12x zj}jAF4&{l{iPQFTY`+8#9>aFa;txvabx)_)JjYrnx)VIxof*oTN~4W?v3AC3WERFFH=Z9%a$_DK6D_?-{p zF|-hx`Ma_PN_$%^^VN=|-Ea_wlr0iWc`qr{Gvd>-h2F%IReJ6l&b)y9!S1tz>xD&-4AesYu=--Gt@WGjh%g&9IKfp}VG)XaPXfsit^ala6s2=ZK@ zKx`1doO-H%Hp83b!zacm@Y`&cRe3*e2~dP!e?1srFD}2Jbpv;1C|T2U3AA*^o>w{v z;#x^|C5HDwKh&y3NUiRk;5Z~_A|bwFNhxiAfLE*DVtDXjo*_eiGQds>8MCOK_8K|< zu7e0t*x2xZ{>P99o)=VAq4{DC4Mjsf63@9AyBhs+V1Lu|U;uiFIA}1`%E^ zlf6H+B>FxSI+W8!-S92w_sl;ymYELyG|Gx1n>pW~q6NkUq8=}4j1@*!kMGB)()?uO zU`>gQtpewB*b=|BpD3Xu2JpOM-Fxp|ZLTDY9?}S>CQ}@2IAAfJt;OljkGhR5H9=_7KluWDHs;KS&S6?E#vIQH~Lo|#Ru)J!Y;_Kz1g%jXb-nSA)%i12)?9UXN@Eqc4~noYvaag=P;0P4#ii4g2!lCW`|x@>PM6NAt0+i3-xWk;O*>xU+c`meB^in$UAxnf zz(zyRfVC%%ot|yZ#blpa?{GI9{c*cfKz~{9PtWvT`stCXqOp9S=KHpS{c>Rbd(!j) z$+O)`(YQZwoa=ClN{8wCI#RkFi?A%;(eU%*VV|5eZsx)z2~IsE;y93EVXo+|@W@n2 zPXE5@r19c3qodO|Ok(_#b2pGn{)YfMJoYKoEK6OdjlM1B|-!HLkm=fYU z^Q;!i#%{l$*JFAW|3xAb_I?3(Z&*fgeq0lSz-myKdC*=GBadS9i`)EC^DjE5@#$g; zOL1Y!^H45cTa)Js;NnROucT}BcT3jBLE7ZiNO_xvaEu|@*e(G zr>Z^2-cpk#pbSB$#~;wc{f;VpDA}gxV+I6HN1)meHUpKN=rQ6Vy%H{oEq~^NAcoJ_ z-Jz7BN3DC$vF3u2`8Us$zk=NC*wJ`gX~#YTLR3YwX8ON%x+JW({q(ZSJ`%NJqlBbu zT=3vKmJwW;9!Pakk`CSrUaR#H`HM_wJ|n7``$N$RxD*{w;5hE|aALYkAUUzRibndg z(+LL;uPd12?7mqns({cwFkdmw_dcgTnm4I!?nipjUS28-KwUTa5T3dz~?xAiyc z@@HLWaRN>ajl~}@|5EqfVUCY#i|8Py5}4XM+cck!>BKKL0P4+-F4yh9h9ZIMROT?m z)R}rg212|WTqSzmf6pQP&hFA!s!`fmSz=F|PiucpCKsm)4mcb)pABZPWuD^DA+6m% zu5{`7Zu^r zXp$=qs;btJv}Z_a{8tG5j4mc#*kBc;8W5O?m)A2Pd}eaO)94f+P0;gyvznDRqfEin z2Z@(Ie512L+|`Qyc3{FO(v%;F#rYt@iN&eD=!4L8PfZv3y^Dc~etL9tk&^z25BrFS z@CKCWMfWc3&&RiGqV&qh^uz9Xit(z1h?bI*=rJgOF<>ZQAfP9%8~iE^+T0@W+7TP4 z^Z>%baJ#1907CE64WbzPjY);vkifEqGr~D-Qm42e4dX(L%(?h;k_B^?jSR~tZJ1(2fcnWl+c#FQhPh znxb6x0V}<}^;ct6QtSSU`Uc$cM}nVg3Dt`~j#0h&p_dR}cToqoq;4jfBMK!Zf`Y-UVph*Mam| zuHyw*3q+zRfrBpPG|%_jiE|U!^|=whCbz?&UM+;dvubotB;P^i8^n)?Q(%`$MeGEI z?7QzQ%}1O4*efQu_n@RM8mDSjxMnWu7p7m&SVA@5B}#`46G|(_KFA{>OUd!+>|$;V z|Hk}6`lfx>!gyM)kjp6ykLYzysw57r9VhT{86DlJ1gmx_OXm7j6T6GH#!6B|&UZ37 zExgirY~mb~c&xT`DU$!8b67{)V4(CE?>3))Gsexuygy*gLeRFW1Mrw_iOPHbl-KYy zs`D{uBKhh_Z2pd`zpKPs=Q)fw^t@!SCcB*H?gnXL)i#%zArWZ|ZUUJS|G z!8TF%F*-VA7S)&YXU*^4K)C!+Mj13L5QsrmDrAzdzTcMFCMUU)!yF~69TSB~+oa~H zoA2jmw*QtEf!1}+i0|&z-nTLEN;SJ!Ml5-@YeAoL!6loT#au3cg9nj(52AN>!B%=# zarMxF$S!9J=J-J9N1zzLf&M*jX4SoPqVjY+14oOofXBjv`(?lD#Qs>W*>R`OL1TN% zD(LZMm8lA8{PZ?fmx)+%<=)d_4|ugTk+I=3##SxF;#|)zki_E}igL)Z@^)IVu1oKw zc4%w+nwr%9oIDkMqhiX{$cIbiQ|RO8Jh5>cBb$MRo`$ z=6e|NfZNCD@;8Uegkl!K*+%HP2XrnQJ#d#TdH0*K!)(?LWysW)7MYxIZmu-Wf7K16 z)12Sa>(QcoFX)HP@fJ!DCe1*DDp6QOe6Nu8$6HB>%u&~U12slZw`>J=blD#`UnR|8 zLo4VI2l=sq*~JJKc9dd-EQ^c-A>0B6;?2>U4c6D|oSkifz)xO*Id2N03l09V(!Cms zjxF!DCNt^-IVflpL{NUgwK!wC$>(#c<^rO@a#$4s%D1@%h#=x`6MA=ZXQM+`U-|unmrkb2jwdP+MFYE}rd@mIQTCT6_ z8zV3Nl;_&}N#eIAyczdj^$yk=2#pdWATV$U&a4?vIdwKwM;Z>;@o3q8+O!M|SYXA>nF7GnY@%SAK659a4GnC za>jJ#IOgy=i2EL{CrE~veH!~XC!TdoW*WGU9A7gA8WmIg!LIa<4fZpJI=y_PKe!XBr|pUr9Ceo;}?$P81YP ztt0et!kTi#uXb*#tMdVPyiYoA6o`?{#B^yc-#Ypppj2!b*}C|?MWQlIcDSczY}q%U zc_I^D2yz=vj-@+12vV>6dAi7|WmrK}e%&pNo^+6@=FIl%Fpf8IyPoaYDu`@+%n@R) zEmRn9UQmyhY$uZcJH^t9q28$HpqBeRp>QC6#$tVB1yId2Wf3Pxjzhc6e9X&n2 z%;W8FY_8)#6ICy^#X^kXuj}7k!bUjQ@@biPq#o0C+S~jpq<5qQ{Z)MQyrk zO`e(-B+Df<_25#K=Vqy{9Zcxh+YXwfQ_3zp#QL&TxV8NzcB1A5skby!YmV)t+)}O- z!n#EBIit()hYHMbYZP^!t(y;V84yRO&5aq3%GT>iJ2Lo6N zw;ig+vt4>V%ws}sVa8^?bGt{0r#Eg5pv&v2H`9*9U`oc??v<>sGqQeeb2tabpMMYc zb&)E+JT_=VMq$=++<&)nI#^WzCtUKKJhat!RkuxICJ(T2)3vr9Bvv*QvQiaw8W?2N zOPqZ-s)j_uYRvGoMH#lnXT40BF{R4QFsUCzrgUGIx*_%)+EY5?ue7osqcxK{T@fw1 z3!rq|8Z8%TwvhERH>kMp1MSj3?uD2x4J{syvVd19iNaG^~}W6`Q_f9uHTN0k8d_f zN{=sQR=YQ4YHzeT{d7Z`yWKfYxF$ny@`*@+LwX-?JcMhqb z?#LN({fj3RUe)Tw)MD2Te99HhWdhQc-mN@&h74tPp1Ix6dl}+9ojh$4^#xIBFmpmf zv)^H@&(=nhu(yL9^L^_r{#XYeV^46JtMZl|KPVW8W!9pK?GzajC~#%QdIo7r$nxJS zJ}+44$SJvrNOW*8aZ+&z@Eux7e!x4cBMJA}-c=&re^(&KfEzNnfbTK5Ak~MFg(~(K zu!+boPvu}hy7zk^!TMmVFUm9X+7TYpiH7?(xVG%ssT0hF6*Km_Z%#_p%*)<0yDm)` z-j13)<7$8AytvPDNOiHb(ON_E)KP0E{n0Uv+1g#9Hr)Owe>*wwsr*HguO_*W}=h>SPsE$UIV{@|xP&3Wsa6Wq=EC!uKnL^ALUPkv;nP<^7 zeYar*knEL^?6oQo6tTAGIJ1-S8MDQIrqx)1&=@wBL(sW2&t;SGmw3y%_PA$0;t^N} zdr*jkjYW{1-ATd=yb}uor32O?k-3}!#D4`Sq8O^OZt`7y%k5>i40L5u1s#cq+H_c_ zsHjy6u=#0)ZvPQy*SWt(G-ovFGmLb99CjF{N`BlKXzqut_8y8cDg zj$>*Q!@WiLI$0ORMMYIagtp5FmA($y$SW;VwsK6d3Ua(6hRoVcH?pqKr>o(V1(~=N z$MiF`)HwMq_X%+a7@ZfuHir_pxr)tp+#25pS#@sz9$hJViZE8EEveR-j-R2UyjWtXiCjC}|hi6#`G*Oq-V_*jbl(6<0+jVS}VO=ywSM&0X=g9H$BxC5hub z*QURwh-N&Fi@Ng4x#TBYKkH$ybN#1*gCm|e8rcbFIVgxq%&q0WN9X$d9mnhI{;g=O ztXv(w{f_t4lPuwZuk#6W80MX<9`z^8t?fDvlcXF@Zm@B@q2iP6@{?j-&3>$DdZBc9 z^*Zi(j&76KXB@+pW4oW#*8m#eo4GLgcwWz;L-MV;*yr5h{_CnXQZOX78>z0JP*@lI zRWza6M?8PNu-F?6LO!$sjPx$9IXAF8 z(HyRb_`%{WJ-e-B@<@0kthp{sSkf*jY(T#Qinpf-iH2KEJCv?iXR;49bjME*Vmup{ z!3u9wkfjx>dv-!;1^nWh_al4U0Dz+ekc4RrFkCxNKl~Xi4mzQ6-tOQQe<4&J=Kmw1a&j@Var~!*Sx8toSvfd3|G!c`^M&&0ml&6D{To!gCVLAXHOp>A=0T{vD_KNi>nxfiP>&^3Efeu zQEk}X6!~+rc`5@5PQ3N9bL-asvs)|u(q~KG``qOQ>NHrT3-jtv-k0~MbW+w0ORKTNm|R7OJL=ZB);W)C2jeE8e_#T_hlJ&3*7kU z^z!2Shirtc3CgyC;2W|=1d9os+giam%fLwLS)RZ4p67BJkK!A8*W|2p=1Z#TpFYo1 zv7`o2H%Mtnl6#%mrXLVV?c+Hb-4)jfD>jNBL6(byZP04TiOvok!65v5v3KXM_H@!b z$xXmcEXXzmtASlN{cF6@RCXuWb^f<>hYPxQdl!L4&<+$Qi8a4^6JFHmD*NxxQmzrO zm*}H17h1_?%6YRc=n_m&liufVh%!Go$h?kX=UGCE1l|_PQ^~tK|4m02pb*+43VaW8 zR;IQBg}RBjgn%w;Cc24u8+h&l;>)64*J%Yt)Z2d3H$=HKG$>+KQbiC3+*+3|$jLk6 zrqh5rM%GbiRMPJ?tg*dqD0>)=$5*b*VYI=Y_t0qeAricFoD=G z=8Zj(A8tO=^mg2Sbj}x)u(C@Ny1ZSNSaZK=5)t9P+^4E$+YoImnR7RLnadiQmu-CM z-5oUIkLIM-S(;|3IMFE2;h z+$a?I0iGKL{;&!*LjD7U3yI?w`%%FkOO8t?k9Nx19db6Gr9yvLcI3)j@}DG6bJufVhmU#anRp=Adi|Hb$C_81#t8Nylz)CbTS76kf4O~9O#=E|ttQ>X$RiV=M| z)lT>M*Bt-e}J-0$ue>bFfb|g;<<%;rGsZYW! zsP*Z7W}=-`?kifH%kJStu9x61t!uyeKaWGv8-m+70I5DYgSf&Aye~o18y)@sV|`P$?H)g>#T@Q@M`Qi6M=yEPx%eNP{$EW0E4iF&vidUu8V2H67$y$h1(Fv+okaiSbB4#-3Wu2x z=Ted>4~iO00*tXtVYBW2;&Y_+|G%04xx-()`5GGq?<^j-KZjj}c&{B0(1h}#iit@Q zJ&0pSo#NAj@<>NSHmsPVfnNr7xkAi_^owG<29>)R7@#Te4se{vN@DhFbUKOOyL7`i zw%)w~dl`E*b&aXcLDv6RL*?)FT0q>8JZ_S7zg1Lx$?Wj1O=5{VpgW0R4|kDKq0_g~ z7xL!&&7Vc$HQk?Wo6znR0B>!k*)SzHo9^nx=69ZD*U$0yN%7n*!${x zhdYP+%e%{a{}Z7jp%aUQaAWF}1m;cLdCbDN->8HV&|eVcI7_UwCC93A^;U2T2y$Ww zky2P?1(|6iqXXCzPE$WcW|#OJVuH#WsP%3Y~fj< zGk?nF1DRD?X!9L4npJHa3iPo6BW)$2%jVM|)*DEmZF@UrsLvy*cV+s<-`r2md7fN$q2 z?6VpD6-BV3l0umLOSQ>S)#LmZW@GI2DqR$__p1~(sZDI8i(}ELWl_y?*z9yp`oqt# zjW4{#{!P+R8UAT}cmgxGOA*mnjs}{3YO#X2lu)dPzM>Uqx9ZXwY_G_aNqgD~YT>Sf zs~R6kKRL}QS!-m3FvPGMH)ncVtc_4OQOgZ@SS}k3G#Z7|g*gPezn$m+{@>7x4GpaQ zlFt&14vJk|%fn{5WUsQ=Yng2B9D1|H`RSOGf2<{4_f=yxs>KQ^J8R0UI=Kdq&o8L2 z+zhU*&=^|>GmojM?|2EN)cLQE;6j$N>>Zag{m5N59j}DF6^ggelkA`4i;_}3T+$S) z+4CKaepE3r$UW@uU|~;)^{#2|{Hx10*Y@2TbxQGUgx9KsT{naL#<<^>E@&hNTn3b)X{FKI8Y^C4U( zW48R^al!%cZ_*F_bm$<9h1{+g^Jcb47_!`PIEbElR2EO}PUiYsBm6L?zpSd%LzvV` zK#`L@U+rq(?BDLHBXpu*R!6)E@>x%c>Z4R}tz1$!svNx4vU${;>!@NyG)V7PQK7s{ z=-)Z-LMf49o3w1`Yb3<34BAhIEeTr|$WUWSbVoq;Mn#V4$SkR#)8UXs%K}w$M;hWBgz^9!WV=dO|rwKBf3~<~clPVG;9Yy;hYzLgqFOP}pG} z)>2cnLcy4&da+{lqs~C{XMy3#k6ok~7E zv1p0PkzY9b^;hAnQRuBTbQRVtj?psWncE0P24s@%w~}FbgQPY@ zq#aVjrKUENJUvy+^>iT2lZa_j4#YB-=WWk#wfR9!;(w%!>?9_`L)*pKIm6m2_@ZUe z-{YhIBy|lI3f}34HcE%wUjJ^7)eCH2uV)B~+u|lARlv*n`szGFuLNltG{z1c)K<=b z5v;9ViljMeJ|P<1q465dh@!E$a^&oNC-P z-%SQRhG{*jj#-O4iyIr~6CdjC4YlwNW>xdncadCo_RZyqqgznXFn`5fajL70`o*o0 zt-+MUmeclksKTMLj^alk=6@jmp=Mnje*S_v^jM@N3=P4Tku*tf9;Z?GLVz|bvGe~r_cBc6Q5flVC-IB#Kftoq zc90}vSyvH@yTK)3)e21wuC!vSMyIJvGZL^ejxF z3*2IiLH|Ao+E?)DtN9^zV<_^A?-_?~8aP86MZ4bjZY3A9(@IqrT{^zd?^oE^_B>|y7us?+-qAj*gN?yXdTgHkzEL~I*{Fzi#-m5H$2;%A7n4ctk&qm^ zldjiD9ievd4604ZW|eAyOQ^OY^Gr1RLks60?qHtiuKv1nRyaTkXA3jueN6OILe!Zx04kA`4hnL3+@9$Yoy2o;SO=&?{skFV) zU)uNn1EECsu6jMdI6`0;Z1~>32R_v@-RK$Saz+H2f_4UGJ*sW>fZ&1>jR7?JlNV(Y zzE1Ke+^Jh0Gm{{A%hes}~CZfsZwl24ad# z?FFP=^DCUo@1JkHFHK69`DN-+$29LAJV*UhR#R3}ipVPs7VmGU`<9o9I^HO6$WMAg z-AH~2ZXRd)y|7O88s!_@#4s>Z+3#*#IG zpcMeUT7gKo7exwntyK7L0KKAd7us!7smcyL#Q`mgWcUhTT)99!9FL+rxm4L0h?Wdc zrDhQg|H2kkF)l|_Nut&$kcEpyv!G?sf@?-A2FNNK7obr9WEG7&&~}q>{(^8QB9d_` zLDUoyv@AcvNh#oysTD~L!ucse>HaB`YJ?jBvhqQS6rt2Cn&BdVJ!%%kaBzSr-HbuF zAK*@{AQr7Nd04q16-^*{ShcTA427bNZbmM=3SdedUnPc3K>%=3CzXS1N8_YWPL@(C zNJLYoNJu^^6-z)POp;P16$$sC*h%`MMk)api8cceqd_eb3qkt_2%$kO7yEAJJ9HEu`JhyQPd;cX`AIi?D(Oiz{2lNn7p{=BTmsUk5LDkGqYzZxfuz{f z*b$)ERNEn>*i_$Ppx9L1L88#t*x{hiSKGm)c%Z(nLt~=3Eo1(sM zKx3lmkU4wfH$@9_T(0saK7Xg(eSn87P)YptbTrL{6w|>B%A+`EPanv zd?o(BS@8dKXARZ5nQY!(vUDRxv7IpAjqm5VTgdA9x5hQp=d~is8OEz(>vMWNEhPIO4gE2M94b%kURMj(!*Rw^?B+vw%_NDiw{cL3np-H0|UUGyO zK0DQfU}ZTzn{Odc8zz|DceffQC0nAPFyQ4@B^1GstgLqGr z*<$k@x?@=K?JurQM`k75ZmUlF=$&Lr?=pPhn>!@9!A%w1pY>yWk3L2R`sy~P9-FxN zf$o0H!OC7b4}0r+r5+O+gt3V@yzBkg6>~#I@ zxWn`ulf!!1DQ1C*hRkD{7@q_g1?F>OudGud6R)J~SZ0CoyYF-IK&+Vc-eTBvBE!|m zhKbZPgqvryl7Ni#4%tVxjUgANIdvd|?Bf$!t$t*;Q%ZaK#ZDVv`h{Uz_Aw_=C*9s? zb=-xGsy)1Zeb@zZNpH%9aE=ma)7s0S+m>-m3#4nkfSYz+VABU_(Ifxo*#Xs03MIdjPlSr%Bb2iQ}skef5V zCZo6d*n~}5IZs`L6{QM=3iKQz@YrW&uEjzn9e5?4>U_c~7$0^ldvReZ7@t|6#42Z; zF|4{1=^jY=w6l*p_`0*dD=>-eFKAi8)J0{97>2&aR8x$8+nF^8$?8++EsK++idICkV?J9n^*s4t6I-VfPeUzVF098l73BrZ7K zQRH<1%#-n`$cpY#Zdoo9CHj0S+3S<>-tYT|f#KHlulbw#xW{J7%EP0D0=&qoz*o@# zA_83uq6mr^WsxUW(w%{htb7)=EX7Xk7(jVh^{+#O7{M`i#e^wV!uei{^sjXVU>IQ$ zVG^+q!?NDfH)#o^21)_H#ULIIGrNVd{$_?|gLW)RCzCsyPiLp>6{}i2O6LqGn^9}( zpIK-xDNr1*jM1o6ao zWTVp%cCX;!Nay?5Y!kXX=6U4qA`l7V2v%l-cCVsvhG}Btt9SJ zdK<21p$%bZC~zo1k_>BwC|uLlo7*`2JpYf^ntUGgGWch34`k_DOq<=+bAWjd!a8x+ zx|_ih{95O~zqUI48f#4<)wLUtZEo2=f=WbVaJ(ZTu2p-N)T1}bx4nbY0 zE#NO+du_B8PO1bdU6cf@CZcOl|qS6F)=(DdlS`Po#9I zf2Ln&R-1{Xpt;_%+`h~XYWLVOj>d|tNmW&uJSJ@o^3+FcZ~gUKFjGbiDCbujv)3`7Njsq zpa5<5)eguSybjHKnUN;bN^cj*c%iOB&FSl`m{hmX7= z<)cW8j#PJ)r2nj?99#Eh#XP$Arnk#H9Ys;&wO{f1uKrn_`Bcw$mFw`v#M@LFL+ zL-2w?Y7(?RV10qiBpBbog91r<5Ys}NcS<4gNWcoL!E?DG2w?mm{r;$b8XSq*p#R11zJ1S13^ z)FR|*K%=Tahmzq3!nl9nGZXVC^g(ye8+O_!#I3>qINg z3}3)quqavy8@mqIkXE?rQ6)?>X)1JJ)!5+Yn{|0grqQ4s(*QM~oHoO$>`l1oAJJbK zb~P&A>KZ_AbDCcmjkN{6<+Ard=1#~3L!m0KVo57k2wx7mAM-C{gC%puTA1O_a7a2R zw8x5Rq@EDQNWm5*j?@-L2pmvVKx)7cLdZ{jlBu(gSF+Emzpjr;nSCyC0 z>vsmGgePuI=wxp;PwFIFlJP^90`|1_dgHyz552qg;>ehC%C{2aN==UcDLX{X1>k9J*t#^qHd|q zh2K{B2`sG5jJ~mR;oAuGc1+F3s6UiHYCp0cKlipoup5|S*-&TMl6=H-pOl%{E3|7M zbiWZ<6=4Ydd_0jq*gAN8jq!4u6}@B5i5s%SR{sKFE98&udu-WiU%PUAmiB@-!yPEL zIMS+6WSWk=L+kvhP}nfUEf_ZY_l~zyC%x50m|Ip0|lCh(B0EdQJh13OJ2Fp zE!~Y6E@xJX>4QZB?J4_raboGl0u21&$`+ZmV>BO=Tz`7yW$N)eu85NJN}*$w-VNk# zhaBI%#YKnVC^hHs4_>#V}(Z!A~(e9OVx8KB#Qxl|u2Ov?%F)%{z5d9GOzDJ)e z_lLxNf3C7j#A+qu;aRFB2j zH062ZjvriUS4jUO9m(>YxELCC2$qNUakj4A9PwzW`jVNU-9NO@Im!I#1qFv&_Hv0! zZEiQ&B8_tFr~iAilkeL<61!+JcPAldYbO^U(>SfuB#bp@%Qmywq`Ul1<)Hg*k_`g1 zbvr}-!=1Li$xGOwB64ydC^Q5FB&RF_K@orS5V#(BJI_}?1KSL$=YvCKC(a8D@UaTg zgMze`6L}wS5;(tDXANKwS%ga?!o@^~AE`6nU$uqEvr3kPKux-O(1*^=cnvoU5-4QT z1PdnZb|tno_|*#Zkq&ZezmXOe->;qj`~G$CAkvah3}_xUDRPWrj;XRI)l;_!jjYup zz&#cc3y~yum3Isgj}TLV64xYccqNSG(|0_I`6el5I||ZlUXg2UZLJN4F*B3P#F{fw zL^PEZ8td(1d4IXXnzZ_8?w+%9EhaS+dqHuvb8ebQEJ7oc+ZDrF@3;ejNOogf7v}D$ zH@@~MCOyKv&1IrBq6%yrA>U-$CQ^v7t+jtW-L`sZW)-Rcyc)it3aXe6Rh2yujVJ+= zjjKIwqQq>Hq)}qNVT#7^6XW4pvr1~A%XM9qe%C_@N8bA7L6LJ%DMlH1$eWy%Bq}sk z>q@bfKyO?zfyRIm&rL4AZ8#(~@_BP{l9ILk(>Dm=9uDqbIxDvdjK%1-EN&J}FdK1S zs-^YDJs7*$UBB;YsMJ;STERPk)s?~;DdLB*PK)WpPQ*Gv7o9GG!o`bdu3)cTl+Y>D z@fK-cX0Qy+qhS?`2~2o}RW6R`=A#@H0uN+Xu;v&XM#_50DT=FSBSNNF^NYtU#pAOFhh za=9sDI&Yx0z%|3uA^9-hXD^9Nb(j?Ua6XyKowwU^hiZ|0#={Ou%>9v)#bUElLj>1U z1-xxrWtGv0I#=T*rQ)Rb-tmz$jUKshG0iX;e(EPidO&O>964tHRq)0&pIo_>K{{Th z=@8Xuu0w^la~E4MCls*_>4ed1Fs9`4B02M%wutiAHetAcee`>JSBo?-OdMqtxLKg# zd}K;5=s=Y8U5j5grPxj+!iGT~^U+J!DqU~1Ys)x%UJUB^wfkEKb%t@! zGV*YCspwiRRaE^x&CzVUnQ;!a_zFWjP&E1}6XcV^G*C|P<3e-h?kx z(rn73$0e}2nqgnEO_->XrU4ruRk7|o8D+W>g0&gxFLbJz!Nzv@`9Agd-hG*+q>EQj zr4sVz7`)((WiHPfCYETSOqZU-1K88f@~DI!5TC0o8_esm$0A;GEmw?J8(=L9S9^i` ze4rD%za`c{`})wzgd)Fm;q!L&7<=LGqmbh9w1(ZqQ-o%DcX~*)%Hmu4Sg`DLwFRcq zGd%KcxQDrn{%c)Qk0qLl#ZL8qq*~mc(WtE%**0(%EJ5|ebl%oho!KPSO-aLZ~a zHbgNZJ+*zkn=e9zh^$Snkd0MOQ7UycZ3Rt$9|WsFb7c7)A_Dal(u5Ie4}x~jn%R#M z1+hPXrNAww5k}W(coagcZMwaL0tLqwg{;LcL1%vIJ)VV?m+(nk0KE4q$kD*_SnfnZ z56ou3@9!i^>=1qb^N+72`pJOru?%67{DUiMnT>`2``7^Iz;K$D=txDr>0(E$)~%p9 z5(S%26^UBpQjnEl8vO6NvJ{mT@KS2X9PSjtCWtL3b`FRYDE_$8AmH;2n~6zD zcF-Yreox|&w(xCB!5E(T6(}izP`;OqP(_)Hwo;yIKv@j?DZkANbr$i3CZztGz(>bn zK`A~>ip5hgzP*9_{cf)1dB-tdq8t~~NG_^40EP4@mAiew&`Pu0tl*$m$lurWjIaLj zVUu&^m{k##y%^QlV_3Maq}I+lW|wO})DJl6NJk_@H$GkBuv zS0Cmqj#MQ?1m*8e7zUOF?cgv^Bz^th)Fr858}dC44^K;+Sh7BmF~?~qf(8Ouh9Qxu z!Cst`9w*N49?ocQ6n{K~QXq3pkXyK+1p}(7dMsjYetajOb2_l_e4`*b#I`16Ua-({ z{H``7oM>5;lY3)wvAZwYv1-uk!F>8Gjt47ux7a5unZ;P#j7Ma<-T5<7dq6w5j?U2R zFeW$!T{)}qzW{1LmA{5$XVA(L1BmpX$*Z%pPxH)Asawx!~9tO;OW>o)~1ctrj6F7jn<}()&`%&!Sv=K+YFx6f)bv3aej2>AsT*E3^xnIOp$TWt@eS? zp`SbLNE8@`zXXMHpVcktK@fSOV4MqI(~BOJM|2XzjVA0rkpC1mwd`skjMdeSRLW2# z%dVM^C??#Y0nT9qoWlkCvj^uMR)Vx%^S*HD{bN3&rnsBhOnn)GyDY(Oba}0fkV>p5CQWo9WqpzW0VudYREB! z5rp{zD5o+o)i5pUAZtAen5s}lqp`5vZo3SiKGKnimhh80V6?f6^{!YrY|C#Cw0Q`E zU?smS`z>sRYsee(dqup#TbHhpaiDx8ztbMDu-#UWJagW3jCeKs$;O@sdoTX2C;}dj zhYXH{f9@x>X`;ZVi2|PnzV8%BNeq}1NC5M27|vbY$U*@%IS#BcK1pogKYLUO95lnY%luV^@B0*RC&a zOhngxVbAV|H^hktzkKoN#?i;#ee|LCzrS(hn}7SxwLiLZ#p=6GZQJwgodc`y`60?C zfJS^C%ykczz`lx9VuaR)N^3)Byb#UAV1$V8=E6iRs4Cv5K#LX~nj#2zmeJN3LzkR~089pbX<^UB7R1gw5lQvK}#HtH4eubhh&LEvcw@-;$XZv0~YX9dk%U?Pg;X8twEU9 z*pk-RlGY$h!)H`2L4&ap62UQCM%6>g7$IfOya=Jn&hw_I7_$JaDbUFQd*$>B=$E66 z#(KmdBjS*7#`Pk^4W|7%4X_wpn#F3#zYbqSEHG*VPg}3J{!mNJgX0B)-TCkL^hk-8 z^5xg|#I0=pdyBcY$Ke<0VC%XjU&Y8{e}DAh^N?_V{qo?EL%S-QmW9kxn0WQtAKtlQ z`0l5+?|J4fz`h@8?4vlqzB;S}`w>FfCsc{`EP$pSqFj#{RS!|GhcMRzyiO)yT_({g zLTo`x)G!t`nu;1tMGa#SK78P*5+Vpk5&D}|5}Bdf>{aMbSbky@?AA=&MCZ8R2*7P4WEwnluepp~Qc z+A3RCH(pm@FUXFinl@M8bt_`w_Vw97ZDrhgy@}1wb*yr>X72lT$9TIRgcqPS^uV## z)Q+}>=YCaWHOPi^q;cuU&Sh=eRyJ5o$)*)G`QJypl1?Xbt)x1UhVBYoCSHa!FiU^Xd z=t%{gH33h1QbFg{!4nr9GU%PuPCf6MsRh!pwkRbRmxpL_eA(WwjkWC@ZFF!z7P6+y zs@=Uy$CgFXLp!eBK9p(Pao_6X=s=T1Qv^wK443K{Yp7dQ>q-w_b?sHd8T{%iKR2GX z2SRe0-|n?Ap?KI=zbaF|qOm5^vU>N*!COWu%}&3>iLzwzNP17$>#c4tt6R~O&NL72 zhAPwyy!UyL$~E!Qv(s>%(H2+~@M>N?4N`@-5xmbO zh0_q@W3<K`2FP-`O+TQPjY^1d4elL)mSNKIb(#FGcxTlSsUONI+bUHt31 z!&g*Rti5fNbd=_dA>e_x1H>ZOVHL3mbiVNj?t-sT7an)uQ6paA#4BXnIiooT)PSy8 zF1!O9ISYKtDLdt8nSV&uS=1cTl5G_&xH^5{=&`Xed~9qiIhJ&vEFLDK7lYO7J9n{-no5<=-aWCrz$IAXwoslljMKT#N?%5s4x2FpgVEy|v8e z30n0eo*-~9NlRg`FD&4?s7ZtxSu~OV>td?VroQWNLEuc>h0|06XNF0wnY-{~s?h-2 zI+M!*BDerN%O9vH;7=Ue4N;ty9Wg#?oFVX>-VqIe_qnKHhG+!oMzqXZ9(xbgP9`ne zb=$St3oMP{&Xj!SEO4=wtV{T<+JO0n0DADT_mufQTD=9wmRVVHG@l!y3{u474VwrS-$C(m%;yZt7LFx9NqisQ)?|m{rFs4ApY%MWiN{7h z$C(%sCw0JYAIuxzcjbN!{Q5c-(AJW+BF!AloFYh!!~BH(I9x26A)3K>Ialm=H8o7x zYr;h49cEkUn$DTCpmA0>n^spqYhu4c3pi!3DZ&Jkc}D?9f25@w^0G@eM67sxxVm@eq~8V#*1faM>^9X!5bvM z5%h*S#@k{Y4V5OgeA!z3;c!FTo`1$&*_7`~wpYpdZ`hM9aQ<9#=`Hehsv28@T@6t< zCS$P~q9teG?H5wsRxjZV&EOUg#Y!k-9(o!fm(y{Bym6C+G?XEiRGuM|}0$ z9vs{9;6%n3_R*pTDp$yYi+}}i6{ba9BbSIQTxcPzIlW#!Z<=tQ*KI4Le(F*;Z<5a| zrU{+?`H@t$!a_8vXYOFZ?W zy4HbMDMej>SJ3jtdx~*_2D6YZA?3$d)i6;EN6)F1=5@1x6OeR-uvUL^v{whvVFs z$E`ariVo^RV8XF|;Zj9mSY}aoUIE7hs0#`@YC(cY|DzQOJMos;oZpAOGk03Q`=*lGpy;b`>^%|g&N>Wl#%5-uC$IA{1B1$AhEJ*Jd~ zt<|v4F{OG(!B#Nte%N9`cwT(LdJWB}A*V34$TxM2o-)_;U(-Kw$L8Al@y`xcZI1o9 zU_*SPJs^m|RjWr5x4dxI@|E|#aPzV~YwE2A@-E3Ou--Cn(^U^#vE{*SOYAn>2h0ae z%6R?xajTcHxFoLst{>lY%m27{rOoe`{F-fn1dL)SY#p*~n1_#(J0cwX(jv@JJ{f@y zLjQpff=X6t)xyWiC;5o#M?M%fK2D9E{3UAx)(u#hybo3Rg0My(l$`-9i~o+X24rW@ z#yW~<{5aoGXp?^}@_Zcsa-pq_5bpycv|$mg`VB^A5GK)BfNkLb&|e=~X74N9iyqzq zxR>G`kbk4q77f4QTm)BIulp1c#hw6ei6){V`o{80Z{c4B*No9dY^M4g-M8%pRW%aiV8ZEb>9EUH$C%oGQ%@D;}a@vw@hRf(+eu}83k~1$pyilKCT%Fg{-9M_%+iki@ zMhZBb0SiOq@1(+Uo5!Lj^A8b>CE#@WElgBa{1ri2k0&TR%{zk$&sJw-e%9>0c!=i# zS!r_r#m^SseLfU`W%;>U;wQdx7Z(T>{L@>&B8^x-@=rkv{MrK#RXv0M3{cTiH_4Sd zU{oQul~PgjOgXOf^93xVp#V@T=T>#ycF|}hyWNqgBIk`sz7=v^=Z^~cbMcii94Bx_ z^w?!D9PNJ5Z57#kat#pq;2&-AIApI#w}%4$Ai?!L+#l-g>kZ94T{>3QEDQO_;Ma!Y zYu1d!@po8t4U2_ZXX~XmsSYZQs^&d#+|OWEq7gV1ANUPx-R5KFt>GEmtoe_k;6Vj$ z^OR!V3Jcukq9gf81@4ZvTYh-cO($<|YCrhHn{JripF0-py>ac@eSP6TA9%f~KS=m) z`=9r%==k(c4S~AcMS^*gx zXHO#Y*thcV(Bld`R&G6=oUa^f^Y1d1W57pP$y{W#XJ`{A?i*hjN*o-x>Eynm?JQ+= zp(QuAnN0bAFIsUqc)htlwBF{kRW-MU9g&XD{Os_7p8-pL`uRi4ZoYb5q^iwEml1ty zk6Z(+`uSDszo0a=@48>J>Lb9a8K9&lRNrVIY*RHtQmlnv%oA-yqR{L~imx>`IdI>GlKvWZWm!oY|DA?GF@^jaNpvIfK)9J67cIrG=Wia2B1Ov<`77#9ypps zeT_;s*>YY%-6hyxTVOX9ruS*JV0t?FVS3>o)JlnqjaxrAoY~mFL|}9T0kkt$*|oW; zvcKM+>{>UruDiT;{r;Zvp=C8D^tqnVCz^*cv0Q~*(Y3Org)%{&8y1iZ1GuudDI zlftBX-W9hNT=6vg02UnQz3@jZIFfHRU;C9U8z0%(0O%{bf)X3<+?dU7=m@e>K=%42 z27mbaFYH*FnS9_DVpl;z%ssMsvLh7g7+*{5EKznrfb7fQ-a}X)su^GoK%Ku#ha3Te z!v<2JoPl$AC+#{-5nDh2MYSQdFodc?CySpiDjHs^%uXZwkea_ z0nn-gmcAdLmBDUMSsEdhVlFIBU^zqDnMu3A4`za25GM1eKqz8-AR6T>C!@UNpOi}F zyfc+axnw{9#DQ!|Rg;1$?Mpk9imLfd<}qe|mw5?Xj`|`>IC-m?v?TogsNJCZBgg%b z;;dnpKW-sSct!sAyv`C2dqXyZ?pJ313}ujjT8^4&F8})$mu%FLKq%p#bU5<2v#@Sx zlx6(4_|H%^5s2&jqb?V|0ag@glgpaV0&HN_^g2{E?N+_CgOH9Hc^CZNvWSeq*rn(F zlQM0Yqzls^^>+3baQtuv6hA7?fRg;>1B5`6z^Ru)4kwtr`6-?;M?=0co1VHz{3nTZ=uct-vH3*63VyIC z{OUkM069tLgdvLj5KL3j$XTA%%1h8=2AuGyX$2gni>h7;B`#^zcNS|)&|}u)3pO>X z`52H|;5NXm&Rr+_DONDzzsm;&0cuu4;U%6XSu>vx5}3){X>kjz-xoC79d3zuF$ni9 zGK|@3Oqgvpr!-d+g34TQ0RAE2;QVJ|Y;KsvU2!&qS>$?4>Vv8=W)ISu^AhB0@} z$T{#@$8!oiUkIJhu3#bDPJwe)dEZc~dimy;6?k4bRkA}F?I#^m^AH+NlO;<^1Yc%} zCi1j*C=?VJHDcRg8HsEaJZ_iY)Og=u_jQAnE!Tg0$9{Xwifr@dnXeG(O)GA(Cx;`v1aT9T5SN{)DW!G*5>N8wYoYDb@(?zy>147FXy%; z;eL%|06s|ot(b%>gGm8BU_cmhddz01TN0#nK<4O9M0?#`LVp%CkLm_cCA3UqI^tHP`~Q9odqI8?${d^s3&wkO?uO-Hv4BFEZozjwP3O zb;VeV+h+AxXprxMgN8KSQ-{q-2Q_bI&Yh3wv@N0^A8P=HjUp5)x|o1ae#aQTY~)rX&(<- zrB^p-zN&4^JU0cm>s@C1{hJH7-Cue`-HuOiIb+I%o1Cxva|UCNKQe<8Q&KPary95d ztvB|-r2+b=egLY^`eFljz}Djw$*9 zRfeJ!gUdg_L93#ZUa(*gAasngsc&s+^MjL1+IBy5MRKrXiL9pyi_siwTGO!imSAqI zDLc}d69PsZnr+WRoOYV*X$}c=CzS+t(%|$uKKo*Df7@mOr5^%Q;{V}WAla;}ZZ*`3|34J#Xn>i%3m z(a_(}-`#pHJ<-z*hL|&~8^AndsNdA@gt(Eth@dd=&9-LLT@YhqS{@EdVCh%RoH;A1 zYifa=$ysG*x4FUJKw$ksKf#me>=Ui$6mb4^=mNY_yaMS&pn_{49>a?yTQll@2;e=& z)NEdjIHiRPgfBDyMl`C4eCqP>+ZGtf?ubS;bCEXc#_oIi*4^9}(py0Oh7AB zn>$%l_K{dFY;GQ1>W)+*TN%r;gIUd1Yg)^Zt)OKa`$myvbIRvd*eWE4wPx$XI})9B zAtPDWx4e1lXE)Ei4A+a{!#ad{U_(b_?V7ne3-3_>n;`skJ&D$VYLn;^V}4)6ui7|_ zY-|&p7T#IrL2NkmqZ_jf<5^bGzGrofj^Rv3ZD!HafCGE6Un35TtjJvfIpB}x?AJd{ z#IQ+>2Mn-dTM6aFE<5;HVK_w$0%>0ZDB)i*$?2|M?wZ}u-I`dLAgUX4jYOg`(O6e? zE;!r+TpT^Ud_e5i^`kVmln1R^?tfN$mPL3w{i;wxi86HN4X#;bL$^8MPY_t6&e z&OTgqP66j%4li&*(XN1!x<;=n_T+T%%c@Rlrp$;blO~ zs~ljL%_`thsJ;i7kzU9T=p%||1sYDXDXgW~OK3o8gN#m%a#_M zN5RpTh6O;Lm9``~Soy-lO_Zcn9+1p*aoL!gGL=KY zBKHDWOC2{2AU`sTL{ICYr~g#dlQR@L%CP5@BKMkL-zf!khGh^WdKT35OO+*<>GMT~ zqK`n9ttTI0M2{6dCEfkdmE(7f#?xEw+qm-f9AouE<)eRW*(W<%fzAQCr!Cl=>xwxG zGH36=$iVGKw_JbfPqrS|L@59tk&}WIa1UbzL|1+E7ioW>G|o?VB)^d);6tj@P(#*Inn0p79fbbo z(5ou`DTsG4Sn>Qp>RyU?mcn16aEkJzUXJ$4Z)`H{G7%>I8=e8J1ULT-qAE3in^Xl6 zdj1y~AVgI>uY3ZXBbs^{=uVUThJu*{6JaJz9{n4NM^!@T=_J^1thmbkp)&~l9jimO zV91AJ&bed0u3dwCzcW&pm_B1r@esuh*2R2ncM(*3&yuKww5JW84 zcjHKv&1GXuPK!}8^PE$bT5g)T>G~&c?d-VjOKYXuj#Twe*28F6cIkcMFo=dt*k@2B z$Sy#EiG-zF)A8nhX4ZovY`%oM ztTteJf#vi%i}?ju7?VL@xea+q{_PA_K$8@8yPb{nwW!H3+CeZgV<=^kU|0T3xJzn+#5> zXWNWU=ThMLdVRABIAce{tWr3q?)rt<4ubR_B|S9#@sM_CI^aW+B)S1O2$&>w##sw= z93n&MX?>=;#?i8>N_{0A3 zWbY$Kc0LO2JiUM8#@;K&VQZ(fvo*;`3qlk+b*d~pAxJ&(-Z)wOGU z|LozOPxSBTzoMtJzm+p2scI86Q^SA`yqv5CDfjTKvl|gX^Ykhs((1}`MpKMPAY{>R zbt8?o{Uq>c&s~qqD)35&_s=Seuhn2eVWDB58eT5PDe!{M;aLS-60t;!4=P>K+75$7 zhQ*5nSd=E)J~pR6$n2b3j zLrBOraAsaY!s(On@@60l;{8iicS~y72F9|~>Q>cXRd@UE{qMcs|Ewf?%ff0Y4D*L7 zsf|`Oun%gT(&~>p$A`$5S4jpH+%tdw4Oos;YvkzWhGeSn5}r!-JeJI!nZCViC3CHm zZiAy6R5D<6%zh8BP`4~@7wxp{J3t!u;0odMl}d0+7~0>AmV@Hw+Zr?lO+=w9qpaUp z*}_lPi{%iPtrz%2&0W(XV=Rgkw8)pcE<=&(LP>E^j@&mY>jFHtI9(Uu*@bLfXq<0v z%12R}za&O|C@u$@AIrW23GV}h-|zx96)(I`cAbuZPm`lo;8W5J>vt<50Y5O|_cT8f zc_5dA8Q?JSMZ|-yu0B`<@`v*`dcC922%I~d?gK9uCM%=+&2VW|Ua}q--&Ky)w^aH@ z>y!M3TAFu=v0>BZ!?5s57fyvjmf&vm^I(VrXbh(S2iX#<3t$)!N(e0j*^8}aensrp zCi?%t=3ZP_kjpU_EDEj>`y-}&P%B?~cw)!NTEKCYi2s9{cD$G%@=aL+x^o7v`ka68 z!M&&VLF>J^Wy|p7b~uCYJ+rqPS@B`*@Luq>n1pMU9^akE7f$;SQl0f?}Xz6YYnf! zRO$lLu-4*5T5R=d(|qk^p#L-*9iW!mnB>aC<2&w}2q;Z>fz+OjvGSi8~hRwxy~KD+FgqRd-} zqIVJJke+Vdoc<`JXsvS$ADFfz!fp*iQ!=?)u5u)Np0vmCVYKj% zf8Kd~O9q^z(y%ORa7nTa9}W3!l4}C&Iwn z${xxpE>Xe-V-|dE4PByUOK!))+}8g3z6!s!zQ9k_hWU<~JgoKBv{j-kRiHE$lp2l| za;x6TzuqDy`}*qw7-nH>U06Giy*ZM!+O)FB9OpM0Q7!qQd}n$FvI*uj>_LY;$ickE zH_1hEy2MLHEQ&51LtmpZ@s=ZSonawuzJX?@Ei_vm4le z$B0*PrOEx2@XW)zb`2gHf<82#OmY#l3N|PVKKdNs@qL5_deYp-xIys6t08+M#B7bf zPs4n=`th=_ep}@be`|fNJe1|@v;4+dPc6b58(~pnx-J&Q(5UG}16Hr}qj6(FOdR?) zUtNHM&)r%V;PA72U087e94<9x@8VU`Wgpl@ZaD{xC(1>0T>wW* z3mE8ldm~U@5hH)lkeL3;htOOU`#L^^UZSP|Y;FGq=;j?5HKwH6vZXWY^Jx&>p4pwv z4jo485_jzCgoN~zOGCE-Lf%r@#ott~l{-U2^)laEYoTaU`5I)!ixPmiM$t?5-&Vc_ z5Ezb9yQwa~amqt=p=BidN-cXo1pnZA#r%t;JTz7w>xWbxz_55rQ0w?l)Q&cbhAT>zh7IP>ms0~+Z@pp{;Yg z93>3%*c~nw@oWkBcp0+MBsE~3-Gv5l3XKum>AC>N;IwrCjbaxWh=)n8tZr<>n%1O^ z#GGrUn8|}{k~EdWzv>#_6Z`FjD3t3_+!U%x74>)rg*;I zSH9P!kJWR0pk}E(0HYU$RMeJlhNwlna@0~4uy*MvERMYGmbw7P?gJAY8a)@Q3wV6R z11PdlG%DZn&5%{WSS^Yb z0^s$v3-C`x#WeV}q%)Yv+O$h%=%l+83YgDkQ5N<)B^~%B-KB836kTZ<%-oi`Vd{1c zUSiJAg45jGl1pkxL59E5Gakb+=5wuq8mNx`A_z?YT0S@24@l~OJ+<()n zf`3|Sgf8MjSN8cvQW6M=#cjDrX-9WpxWCKcsHHaSjz;D;PTsjStx^V--`aVrzrPyaaf`2ye|KS^NGToZ zMBF+u>ggOtbtOPPzDWNgu>p8K51{OO-n$uE6GE>i%CJg77xxge{nh;NX0I$X0(Tcp zkAqm3ekN42zsX>>;fx`QonOCA`2G*j5c|5bj?A)-wwm(q_8z%34^5$dD-diz71`~%>xM4@N{5C2p zDQ>!F-;gOWnuBxc6Tqwd8)k{fV~oM`#cr~r39T%aG_+tYix|vczO{KVye$H{MHsiJ z*hB;WrpCy&-VT)BxtJagmm3Kkpg|4nQ(Tx_z_7KBG|Jj*W`LUECp@{q6`y)BSWC<$~AijoCYeUnaqam6TE9h6? zE}7BmYpd{?Fdox$)Dqz)tn0z4ie$F}QeIH|AgVvy(D=NAts)w}QcsF?He!tvhB$Y? zgPsxaQGj&MCp+6U<(r{<2rUIL5#;b{+L%*=14;PZpbmYXgb_=24s;|6;2X;g2++RO zf(<~vb6zdppo9$|-bj0FfHjwHxgro&ng(JE=Z6P@{yz?=UM02>d$5##lNhHK&uxzH zyaZlu@D2kyA!~q=QyO3m_6~bPW7S<-Ak}tEW~znN*5H*q2#U})4^4yc^6M;yKG8G= zn1mx-5a1A@LR+Kod_E}+bS=BErfsdSNsJbrFfG5+f6;y5uMXrErgJt0MadYoT$R|g zZ>+d;gEzRTJRaR~N5w16=$7QhtgEnLEGD^t@BRbeV@Gmuu!9jIbL2uC; zUBGDAkkPm~(i=7o-tyoi)g{sX-G3d}H5xW1ik;MP>9d9eNZ$nY-Vc`_CHDdTgo#e# zNmMI5orgPlpmKWf`hTHpN2IbcxWxw)@(5&IQcH-(wE!q%*>4~zLzb!?4kDj{dXNG9 zcslN@TA*8qnF+K3a}m`}VU2+La2}i!&mffmComS|=ZSoKK8g9otboVkR`wFEP?Q;O zXrCMh+=^(s4V@dN>`hTodAc916n*kaW z#+{L8weV3dfG3Tvcsx-;A?^_R)5-_Aj>ovhtzZ3)#Tnu*Fhg_`Pb1$Iqo&Q3tbAQpqt9+NWpET#wJGM^i$}z@{~?hGiv>5N(3cvi|}3&PCBF zL|&dcJ>KC6S0#4AsH%yG&#vH*>nwH*HA2fdI7M+yeyk+_`mSMv(2q}E!!=w%OhCll z&@+U6Xukkdgs3AgAUe8>2{LXX6Gn(g8P$?eB^gza9SF>@4L0|wZyW=!eShZ~J~$0RnSiT!aGF0j<|qHpk#Bx#V&lip z-v(aC!RuYsdu}dt@49Dl{GL69?uC0G^`2NhP5%&34|YnPl{zH@y=qa`)S|4ZB|E4V z>7zzrh!r|S_I)=CYg$Q&nOPeupPg$1!? zXRzOhJA+9(hDkevsY_~Wq^jAF^vT!^q>ppFANFLhlhA!>-;*J=WJu>ZGpP2~InMT@ zmJD?6ik6Hf^uzVX*ea;$K8vDJBAwrRYFE6xsnP-Wva_@csh7`7lqIhu3L%5ko3S#A z{&(D*flFQ+Q8e{6)Q^qpP8WN~s6iCl<`z7n_!>mHHbC(;wZQp8IVPMysv{*;ay)9V z`m5F&Ax>q;^+`74<+Dk2pjv1iX%}ayRowyY>>(0NIrTM4u2L#(p0L%?*)tGYZ4DzM z0|k#R5cX(gG)dpgd5tQSN@>Z|`rdtNtv$TAcPwV0m1?z05AnEW-m6nD0-97w7W<9a ziQ>fc#3v@6oM2i2yZ=K3>PCV#0^4`ZtAM-_fH(QGk`JSHBfM@XwjyXZY_Wir(e*6( zA830HHFP1hCFE9uw^1-&QTwEp(q?|%r+(A8*|^)dXrwVX_fH{$_Bi(jHV6$^ZV{Aw z9zr3vfO5A)+oecf=I4S@{iZ+|S)<=b>uC{~`=cy^w zd!{5WkXhaOO~_JJu8T z2Rnwg5}2FJVdg0D2=RN2s?|^ajMz`?1T-HZ7RlGnhZE+L_rqXqz~C?(9@#%)HWR4%vk(TzuERED z=eN{h+20Za#6!q_CJ19FE!u>%Xe-j9tw@WLP+GLBX_Wl4>C*KO@6pw5e6E=?s$YSI zvrFs*ymA?}=7XW@0yx%IvgzuAjaTGaQ$R^4d!@JnyLU~BXC1Clhn1{9k+GHcl{_EE zJo_(F$n`6j@dniHtJuPBi;__>E7L8Q)>lcCAR5X3#C(+4N1R4J((K4ceqR@Sc*m4G znkVu>@THrZ+Q%;}$h)Fb^*O!|W-rzJWUA)oxw^a}Iz2Ft2Gd)BGMB~d*%h(F0j13h z;t+FFbzzRjqv!-YjGU_r@`~gE!E_lM3EH%ki7s(EUMF{b^)_0G$n-#nKU@ut!d^6# z=1g{?EY=G+xA~9KbhVspA?x})pf5IC)lk~Xq*eg^ z0&E|dMWwfJRT6PT2M+B?)`h`R4J&HWf7M%Q zWcF|J1X#UVq4+od!yJ5?9FG17ZIm!K0XRLwQzR~Cf_!m z*vL8|%c9YHqB9$Nq8u0P**L=x2Nq5qIeFxOy8dA0ZvHqw8gnkx4?rXk*}1t%SVsKJ z&}5_U74t8TvqFG#&?4day(OhDangwfeo$Q(8bcpgs0#=9xe8+C+~g1(|AHW10OLL+ zaZ0}^!b@LI=WKh`xo(AOVSgk0uh=bPRC0wC<>7sXM(?dDfHV847H%qxfmj>qP_lt* z$_Du|tO$sV1a;T$-d#s&uTz&>g$J2cl=c@bqI?Uoi083IWGKlZ%140aAH?g(=X}G{ z$PgaPAJiY5pFgOlT~n|MYhwhOB^9@&ubUpi$NH z{<(i02ASO7%3`B$9gEv!F*WM3p$aaD^_^`Io2F5rGwW38T#bqATlCg4AHc`c5a)sZ zR%6A`SL`M=F&LG_U{n@^d$GjOVnU2Xi@4+;F5vjqCwg)|(USwO@1Ten{_-^1%v=nS zePZ0_L)~WPbTy`79M$k8l4*iHV8CKABzw7KQP)*ci6`j8XknoV@vsU>yMgeqDmB;J z%*@eWRG2(go5v`hd<-oOO6fLRmhT)sxk+L1!DO4Nv0`|~wyB|8AG(BYZzNlv7OReZHHm)+5;sj++5 zd%r97!L7%PWYkE;STfF#!8rI}Ly!yy$pHMd7$Cy|(vLp%lVLv@Gmy6j$N+>VP#LX! zzz^c(0Nf!*1)S^v#Fc>WAin}|d@VRJ5U&O_&Z?#+!TllH6UlkBvY5pG;l4TZm^*MU znWXC(B0#bXx{wB3s4)h{1xx~U6OFT?vC(DAN{@tq=%whBBt=upR}faDH}25OmS2=H zaK}ZPCuCO1mSyyxD787@vU!aP`mbdwwN~-x{|>PsWlFu8-mW#NXdw3#_@R2&snt@y zgm{OPQUm$Y^WGns`v6}ykvYtl%6s1_Z2&~*howk~ezFg~MlxhHKt}y!)JI0WWYj~( z++y*%iHm67$UBm<~5>54{C<=(G3||It?1vC2 zC%Fq=3HYB2^LUj`-e8@CV~IgO+i4u_WhHbp&2>zoO%F9zN% z{{h%G;GepK!0~*QqW(9fG69|QnUvIzDDo9bWezy)9>{5h#n9w4^%<4w-D6Fz8!QTy z1~6Tr158(`0MlV{9Nd}MF1@3aY9Ij#pqUdu0CIY(h8Uasm%jkveHpD;PhKRyrZ?+kv|O!NK1)u^ z;ZCs{i(z>I+O_4=^j#>cy@aXf0+i^1HpxXMT%;YP6YXSF->au$D$)s;=?0vnqaTvh zLHeo=wYjREkWCX4Vj2(bN|nUs1ez5Ht=D>xn4E*82+)`kedZ{{r|)jS>zZ*Jnngi% zeN>*yJN-sVewT`+m;YYLhP~dPMa7UL{f^ui^t;1G`SLTYktwpJwq!E zx_2|wD`pJ?=R{~Jb`S*hBl;qd#MtKFBv|mI9Nbkbigtp`f@8W>V=77&F1s1Z#%7 zDN{0yQLBKTEuSF2ss!;s8G?$s09in$zXL%%$2nN)U?2hko8@}W=-1nAPWIi$6s(J& zh%xHBREcpBX<~r*OXRaDtOJ+G8_pB3()tp)qvSC}&iMT<>nVSR?97xhR3@W#ory2? zJ*qxV9~0OA(U#>#gjL^KdKd9W&IsV-jP;a2WZ2B_GPITk=f_=Vg!q!W?@j|=Bl82PiC$lPLKxku^L0fdLus#4XVMSu+b}j?aDHkw zJ{UFSQ>kK@`d6)1qw9<$xKyztvnkDmTpezm$!ZL_%@(iS(>s~{q?Yq@u~;|;Xe9tz zeMxR6qC_9D6VXcT>wJ#f4pV~Ur%Oh{>{IJgPX(78hxNz0&N9cP1XQ6QLii)1z@vhx zrv!W)({)w=M@s3Y!YX*0)xHascZHY*wc=zG^(BZ8=FVr*c2|()^o*Q!S}abM$?xbY z-PrH^tj?DYN6OiFB@xQ|So-bq(&nU^3)zRXI#`-WyBUaH4t^}Zkd9}p z)TWWs&^#&B@nWD-n;1`yL^TQph_JhLCY{tU^3U|KXWEFerT)=0-*WG&YdX41?TfEx~ZF-pc9{H@?P4p5spmk`IiO_SG$hnfSBN|jb z)_ElOIQMwRQTH+ZQM7(h$BiRzg9G3ANVE{qno@79jdJb! zsCt{d1mv@nOis&_HW=-l(CT%XJLGzo#R`}`r2+P7%9b0?+j3SFLoxrXH>oL|*45$Z zx4S*|CCg0F~c!zFUdIR!BZjhTzsO5RY4M`Wi9N8>Z6OV-U+%F3TIWE6BSaBH+$ zf9X{3l7Bn7{Zz?M`XOMhQp5XiCyL4W`Vy8A5S-UvnXeZ_$k&&D29Z56)aUC6F}##3 z$W9C30@j(+0%2vrCA3=plz^`C8oEkv>68Gk14PPC>gbjW*-;=NBFKSDCE4falZ6Hr z&(&+_d)JiEgKx#!5_-8@d`IoqsU}SN!0-({zJ8Nh7xh1ysdc+U)uR*qEhFAk%O))^YKafM`-?{di*`(JnV89gyeXE*B!FCX-JDVZzAh5QD) zAMM{peUJJ$n2|)gd^T=^9CQ;JAkGHDZH(LWXOm08!?t70F{v=S(7K%rI3lS(D>ROg zs<8_z67rBbSF6URJ`S7-iy2Hfef=Rm!Qk0PF3m!j8#@PP@*MSV4QcJ?E7|n&8R>oN z{L>K+4{s>|OSK2+DaHH=RbrR0^m61+z{3&X5h1vU^SpyyM9=k$<_MmYpn1=fU9OZ<6!>wm!|P3S_`C%5 z-jB(*NiDd?Ah4zrVvJAz;~R~pEEY>$cGhoi0$_Yn}#6(;gsA-NX*~EF&5sNB#xe-3$<%0}u8L@DkPJCAh~K zti)V&A2v#IC5jc^ z6P44b^k4a&M%f}<>E(ZJBUr&#ymJ0Xn=hf_C^i)!X z`(gY9ZKdeAt zs5g;cOdPYdP*<#T%)VX#{~>{GBS3FQmJ0r}!paj?RWe>hi&wvg5{1j?xJGa9Z@_~g zY3lCrkH+N&r`hTk|r66etNMQjMYv| zjUTB-K4XY%$aHLoTj1-|Hu~++#o2VMcDOQnbS9OUI9!cadc5xLN~)vW?OA|#E0FI{ zUjlavqnw-ZDt@OLlHZD=91CHEA&$B;>yt6AMfOA^(1CSa_0~X#!%y+yI9GA zA619y30Rct%uXLdu}-r4+vyC9bwE@Ap8rnq`BBt=&ZFtWQ9+h`uw*uC`eGiLKD5~J zlzplD3;N^C2^<&{QK4ZYFyN%)>>W=D_9cDy7X)-Hjtz<{3k#)Y@Kr@`oK>+pIKYMm zsNh^rX-B^^FtU5tkxpgYasy`!IB<9n&}%^43&R70^vt9Z1a@MCfFeIkf^V#h50K`O z-$&~KWz^>pbP@&_7HFO7%dw@P!RlSK9&6UCy>%HQw(DZ*%R=+-*HNX0)eUcrO@1E) zcZ^a4rY{5VrnpwiV(*BC%=!SQ0MW<~jCQ?(kuw^5+_U`kmHJlk#cfKNQm%)4{cNPZ zOV+_t6fu_XH;Dqd^%;N4pVB%mk=sfhLfdibwcPJ=RIc|CM}dhfsZV{!_>z$_a*r^_ zTX!6rU%BO2DU$0rCE(4YBfXCZ=v<@v6ej9Ny=>JmI#BHxct_4e~4mytM8~e_W1uN?MvX> zD$ae+oK4a>lFm8W_tlmq%aT{wmTg(Gyv19*#Yw!yOA;pzO6e3LS36I%A+ly*)Q{9IPW7aeVKaoR-nZoT%iMngi%KDq}^(~QC4<|a37 zWLZ|(LuYm0d*Bs&kE__?qe+^=3Ep6WpobjW27E~SxtWqvBniK67ImXPlvi*hs}X7X z2l!D+2QiwoL$ksw4QD%&CW2Q%1m=zNuBY=VetvrbY5S{e0;TKeRmy!H&aBn#S4}b* z*32>CN6lW1QAL&QtgP=ZH_#9->&!e|6LoJ0-v{2y7v38Iwg)ml9UnTR|&b+ZLzxb3Va{HcM=L6y3#SkVoL_tqvN_Q$8z?h!6}(BysAs=JA{&=X zrT74i2JkLE16#nI+XD;(7tYD$j4=)FI`1~P6b4-y+>=l;2HU(|!ObfU5Z7mcr;ows;1`a7!%VVPC`Z4$`rH13WFFWG^es;m3gCES32LF*@ z6>i~k<~h#$2z3Aos^{SUH5-{F)NZrzxfD!6c^^r^G>{EH2mj|JP=1C%>&?$FKsk)) zUu3<1MZ08Y?&ctAGO{bbX4N9Xo!|wAd;2|c8&El?Q^^c^6T@*l3%V^Hi?o73@t1#bSyHVQ6+}Xer)>Gf{YR?-9O26^DP0TE!Lv|{)$45< znE>Q^ug&V!%hd|A-{}mP6be(o>GYcw;81Q=0`ZJeqoioaEUvlkc?AcA%yA>bFjtmGMrVmal9E_;!Q zWlcqPyWh+(W<6=0`{Mn6|;-7iZ$9W{$&(1;~blS=a5@lE@!8}WHO2O)@bup zS%8>MxQkM0&04|80P+NDDzKOfjO=~Rs&L5sBQ$)_Brnh&v$~Kb?t<-fY5867DdIEY zYOM-s&s1F+zv&|5@S3|R4pQOe_+`URf2l2#N`R0V`$0mmTtiAv?!I95Nw%;VJ-K;mSM4gR7X0!{M-d4=z-MSjiCw# zzlWMhks!Lo%1N8zt&sA*(#D1``rF%H8g7Muh=&vthrj}L18i?=w!Pb7S=r`p&y@rI z$=0TP&dSn|=|?i9IH#@v+FL9x4NYrY$Rn(n_z8UGD>zMzV&kw5AoE*-p}qs@)F{3g zPhq0tF*-4cE_%l7qq7TiW#1rGI=kNN){rzlPO7vHz1gK9sXwbYIVn?VRrF0NP5~dR z6ZfuiE?42_$XEV}@}DDESiZ?6kOcS>WVbf>EPsEo2ov{;TdTs+hstjSi+C81 zVNg;^dmzc!DudmRn5Uk6~%kSXBu=KZKzZ}F;@V!oCz7GC3br0s1MnytTAZG^x zEv_NNDQ!aUWKUpmPEG%NX86^gW=39A3M>Vo4>j}SpS`|+|7-v8lU*cD6Eq`A zz#Cx=e+z5q#+oJWu8};VG}*NR=y)F0RD(7GM9vtPg^_9;tjJaw-N;rMQF+d1qFNKF z!t>a~paH+N)%I5tO07j>v8w!a|%qdBu^X3;g%k!xBO0S4mf-}F6;z-N$Qva>$ z!JN9_&U>ed6P;F0y^JXh8kW@sGSl4iDLP*dF{VJ?zj4LWQCw1ffz=q5%W4j71O52p z(s5a7)UfaCth~(X7K|KCec$F1j65w9tXfoutt4y-k^We)EjalF~!~{5T z77`bRT!8=l>Ko!t=V#!xG+cAErMoI^eXqN-yMt6YOd7M6B}$SZgDIRW#v#DgL(D~z zpME^^jnii`-~1XQFciXvo4$4W+}QZy)8E_#wP%82Q1r4JVeOs~YgZvrmJ{53R?lIV za1ExxD0snH7_oY=a=cVOc*9ise<36X0iH3IBuj8s14-YZ0%T{$hHjGJ>_(^~$noN2 z*aY4fJ9qk9uriR8U>&~-HlH~Swmz;>VEU-MOee9mtEj3@)YFU5s4QF5W&85 zL2kIjnxB(b&#odY6jwx4vyqBjkzKTe=*j@zQnPOwJRZZF5Z^)m zt)Pz|XVs@1HoJ>bdaShCz;i}5{lN}#e}`SL9*>CisLp!$fR_>wk86~djBLIl-JXF?k|d<~oFt?Y{2ECpVYr-C6?kNvNux2VWkyEtHkzPz2Z%fJ3rGK_7!*Q*A{FB3 z8TcdOGxC_x>T5?2ieDDN1!YZ$*9bFSuDJU1*4{Db@!R3()@V!|ZDe$)0y+iAd~(gb zGT-_dp|ec++Iu|YsU(Ii|ABH+o!BO9H}(naW-KN0ex_rg%5j5vM8@qyv2!-|1Oi+X zZQIz=v*Y(%TW-c*T;3~cfcf+rQHF~tF-J>Ois;nR!|nKeifhYH_U9H)YtcBChI5W4 zWe&pFNJLhQK+?U+ifVE0R?@!30||6uNQIp150P8^8zx`^ka8q;b@-!&eDV0n-CJ)R z3i>wPKJMw;GFqZ@nUpf#X*M`@3az``5o#`VGK_|W<4@_b=*kj9(chm34KP%*eSAwv_15lIZ{z+g zmA@x5|^9IW(UW})!v4%r!HVL z1R8s%3EWy6YYPTi6Nv(QBw(_J8kYQ3o8w-=9x#Tcrl-QLcsxOz$o3z(Ox}uhVH4PX z>_9G#Lvr_CiezH3ezPr1R(F2~ufZ5hha0fXSTVSgU|h^*muoXaR8OYyx`YlZEQ}?0 zhi!Y!RKL7^^d0WNj-6?sI@>;5o(7RKu~})g&C962VgI4IO3}%l7nffO#%QIGr3Ay8 zlDN@#Qn0LcRM=vweKv-QJDZBlW>?#MKPVM=4O%c5jMdevX@Qm@Q@=zCL|f5pjTZt)3YV*uj=_85v2vg=nY3nJp;oVYs*3gOQ}a^@K-NgEZI=)sPOxHx_bQ9it=*&g>0Y8()2Ry?c>-(*u&VL zr4`mQ$8Op}ZkpZHb(mX7&F~Jc!LgeiS@+Vq^*q$qy1rZff=f$DNp+ zBD<`Yz}r{|s{-|7UW0aRBucehckQ$;{nn0&Qjxf&Xs?zWg_LOd4V*Nw*-Tjvoe9Xw1bB#b(_m4@Bi(ZruqRy57_xw0X^Wf7BO5}xy2{G&_TUGPknZ-mt)1SA9aF=; z$emyNx4EwYaC)?I>-C@fbZgg4lUE@VG=f^Liw+-Zox1dm`*zpO{jbc+-?`m;YO_ZhHvm79Zw1Tm6F;SN*f5l+M?@O5_YG{0b^LN9LXT9*Mt|uj z791n+Q_wt6Fk~Fc+7i*)oJzEvLG}aje`h%PcxP;_*r3QZ2eyZ(e zFk$LaD7s9E!b%z|Zrj;i)AVLvDcKjM`rowpRF*#Yan)yX= zxCcEaP>((21hjye|e&QKF;90A1OJ%+^;H->`u7|y2P?;kgK z4;g8=>C(~LE?!@Qj<=?-uQ^-LeZyeSLa)D|`|x1T!CpVE-TVA!M+P2v{!j`XPdMG!_Fsd!e!rez0g7KfcJi zTv0e$q6tN{g4?h|)W45wI%stjs5so+#V-#${QHI;W&Hty&wW09^; zP;#Mu%Yo$3@zGLC%fi;*;pK9Up*Co&8p!`0CcVq7RlMCW+1KhWN`x)0A{XRf2B^lV z_yUiqsC%xhYI^qiw(lyW9sBw-8R7_RpAqanseLN(j)baj$lsUr_x1Ucgxa3QXD?!E zZA=?8L0)1?FkM68WsisI8Z)JV`3yBBg-5Rh8lZ8b|Bm z`vyyo_TRI+!QgbVD!s#?HFLDv=khdeuB_e~_sDpQ9zr_W8y~I?bW}UN0WZZ{4V*zx z>k7Tb@TNm;vDtxIB~Dcg?1Q7;v&@hckZ%`b;jAx7*v+SLda;Nku}~VPE*gTodo*k< zN(1A0ew>*k$Fc0z11N+pBu8YTMx*MYl#oX$FQl?bP1(H#vf*02K@BAzrtUkw5IX)vzXGA z2H3f}I;uJi>d%&p7*>cX>r%eT(c(-UN_{>Blb4|Mi>Z5u3b3I$RrOz!NAU)LM*bltGoSz1(}wV9lD+@Vmjlunyz zaBnD>du+Dy{B@7d)pB~1uE@ukd^Dg(#zI`E6as>`*~#F*Y^*6Vm$5>w57dA zPVFn|ZYmYzfL1CPUvy*TwmUWl2}^S0*0Mb(M~j{|)s7?!yW8UycVZ&ZG}-6?Ump76 z4ITc@)ZJg*Hu&V1?%CC#P-_IcMPuRRYF^!Q>>Fd8!^A~*-ZL4SNP1O9r{>tV_k_y( zc1kNU8R96U9Vgb3o%O_bpQCA`kj6XDVMa2E9o~4ObHFk?d{lR6# zHe_fKTqmQoX^IMIjSa;r-VS^8CzKn?@d(z6T_?@Kg>f^c!~D28!8katIA64KWZT0Q*UH+dh?742i z8r>4xxi9E#-PIbOj5|Mh;+8u=_t5Plp+Nr)y|J0iT^?s^-&l3?;gQPH{=JRW6Pq?T zy`4i7_|&GU>7k|$WUIMCc0HL$|DHq!3KSV)36TCv#$+au{$*3^>7QKB z5^B~4m0RaGNu+hv=gORCwM?$!zse5zT-UmmDs#2#Gj(mCA zf7;VLm1x@5U^CZ^Cc8fcUK;$?qxa4~yNMtaoQs{ZwXp}Ia@v4~6n8Sp|e+m;4&zGm3 zI(q99I|5~UpE`Q%iJgI`O|jYj&fQIRQ-gSP;F?qx6Yl*X!h|mz-TLrcyl&3}TeC+v zV!sWC!lPsW%C&xJ#0GFnf)gA93qP1itMQHo;KB-uLo0=?yJB>ai)ZLlx9@2r7@Y3rD-UTfO-vf)R;6Tmxg{@u2xVjC~6fH-dYXR zit*oxAkrw91eSg-2O>ZGghH?&UUUGy*%x51)Jfj22uWUdq0|dMSR{=fS!4~RG;IFp zx#BTCvI;|Hhx94{2@@8hl;=}0DIHk@9aryAycRe5R>2%{9c=sp1v%AHM6Ks!1j8wT zsdJ>9pV}VV-c_NZSOvux;v)<3;S<9p=9YsaSMVx`de%e6_*7q8LC=WG@0JTTt;KC9 z@S6QysbtO0Jz2O2uw4)>o`h|%HHQ|<@y>*5~c{ZV}_577RB5h)e>49bd@Zp!~)g|`fH)Z>i;F!NDUvH^KIj(X3jrUxbQP{gJw#&h zClbHXX->+uZWNqEvGfwMoT8X?;0md8g`Dm(CwG!JWWEeC4d4gsaH7Mgb689&qFbgE z*XGMVyep3rcS3pakcbm+T#FNt2D0Hi)IhYR>Pw&=E5Yg@1cdrS$;dhdgqM`HRd{g` zW)gwo*_5Xa3Wtj+G2ch^j@A0w)szkoeNE8#P}{LHsfN_12#=gk<*bY;ZlB%IJl`KK z>i@*%*jAqi8e>RH(`hnxZ%5hOsk!Lc-Cvoj)0j;vm0%GxRzYrdn4QVposAQ5r*bW5 zr0~ep-H;ZpgL2_nh#9M}4cHH}dm$`8moRd8&txYE9*TpRIB1T8syHZ!gLoQmPUw_2 zoASm8*b@O=5l|Na!3cods`3fik`Peu9A)%DUyUGRh|b z(3Xz?(BSyX!QlA#8)8HW5sQjv6==ym6v<_E%PE*$>&rpXmX85R>5l_RpM?T%SuRakE_~1GJK{C~Wod|h%{ss6MdBBO17*)$@K=8tHzh0x8^h*CfvwRO zLdmsanpzYwMsEi_wPMEXO_Z_r8N3<8mR~-H$aaN{pF5|^9{KE1imLzIB60{xqSzb^ zDkE723r*%Koy=7_nXPmZ4IY6(5h|ESj0!=B-ns^&XJZb%ECsn)6(?3a@T;Oqqw#_0TO>={S~TChScup+D)R9AmzM<>%8S_W6rYrUH1-b{`~cAP_4TAD{T4%8zpOcm|n!BWLJclDRi}ZF6r`LXnbkAY*R?BKBQb zV{i28#@?ELZ0t3_O3GF8r90Jn6-_Z3)lXWsR_ctQ=5Xbvcu;`?>)@nZsNOgg*?jXr zz|wSZ^y}a?jnF1oGzuDGM!myqRQs5_V*B6|FsC$ zXDGvaIr3bK)xrNaRxhkeJu+3Zt)rZSVqeL!#mVtl-Ef1$)iT?504a7Pqmrkpw!}T! z;$&&%*47XTa*P_3uYnD^@o;~@5$O&!OePCJ@rLUMLcGy}Lc{7@W(dP9rAdErYuHSy z%{q-)OOn4YO|Kp^D53^(|Mbx8!LvILkvL`Cx_vv_AP%%oFEGzKrPE! zb<5mb8Z0jsQ506122Uh7Ly@)YT{7h6dB(ky*)1-ZE7e31US9*&6FfxfW$&g|B{k*J zLcpGz7W>C&-GPg6{e*`5qD<#Bpjd*L5~vdyjZVstvQsfs*QM9izIhkJEWBknE?Ri;RTkdbOSm2i^CWJ77_v4QizF2d~I8=B=J{6#BBf0R`34o^z_N;GG}6< z3Bo~hXzZSek>gu}rmDgE1RMk}PVT7aEHmoLdh4gRxXd*p$>dNugl(H+&BGBrV4L=I z`U_hp8^V1X5>9h8(N$8ry)#&t9IFj>ChKjshPGbthj@>^u*&JEC@C?AMl-yxw!F+@ zuc@lCxoZ7JTZtHLXaKfkH*Co&tWAoMeZ~p3Qtcwf^H>~>3Qa(-x77VDocv8efh;^@ zc~`ccYpgQ=8erOR-QQ9L$={^JeY4YkRR!r^0sMP-xsPqxhsHWUg^IHG+ORXtVa>79;PLlqv4 zfAaQ2TO&%IXBkeT(p%J|N>J5I9&1;@p%5N8*tX%e>8P_|%l$}2C;osN6!-P_>zMB$esKUQr1sRgpM25Y^ zV-Nlx;=Th+j^bLkt9yD<_jH(^oO9gCGn;0^taex0ps>nel`~>2AtW#&NC*VB$+iJw z{Jzil*_S9JE5VWsn6WX~@&yy^d;MWQ+Zg*Z*Tx2OFGj0ZRo$~Yy9&U4e(wo;x~IB& zw!7+7oxjdGb^dMr+ZT04#Q5m#7jFIXiul8h=K0OtBN?N!G{2>Lep)1rSG;)JJh+73 z^VKC+zHr;Tp&MVkVb?ty(p}X%mcnDIvw8;{$v4A+(ge|AnwVF&{~90lh$3NQB)Imi zJQCa#7e7$&7K#n1VWo${1Efk2)u`yJu#Q^%U;<$^gb7Soe+%cd{x;D%Mt$mWnnASJ z4E@$`L^G>Ga~ROlaD8vId5r3(HSlVW(avi$yxnND3mWq8*XRK6wTc#=p}$S3D3k<6 z^YJdI>`tF1Ho>SH@p_1-(hyInsh4#~p4;L4RXx+63+h(7?_kZJ{!o?t zbeca6HGlH|K=aFXx~jzQ*GYb>%`0ifyBPsxiqfgUm1?7>$%}XZ_Z!UPW90aKU=$?$ z{+E>qBhAY+@3NWPHVaFxXGB>Qp1QzC4xf4r$AJHtN;T^IGSvBrVxow2eygnW+p*41 zcl{<1pwk;|AAFL|Pq)iDKhX7?D!zrbeekJu{@HxGSm`VFxm5XccdzSMGc&=UBycLI z^aCBEJ?*Pzggi4g4|sF&AX57_5^?om;=}Qkj=tFH+ty^kq9xajW;l~c$BGUUZ!u_0 zP@NU$w`OK$?J5;z@qsa@_8}k;okrdGe~@Hyy)vJv_8%Vcq9q6K6M;foh5c-5o{WjHo?3nD+M$aU zwEB4=+Pn3(b>aS&2&<+66b_BXSDMu{cl#_KmAHP%=oPbl-!Y|zn)`PpoXzt~Epu}g zun3WN1-nOgUOU)(!?hPLE@)Yv(HkUIa`0-#z_hKudcdB@EZ#km9xS?|_Vw2<2!%W5 zAT7Tdj*_0K-rS1f&FxaWxus){vub&VtmPf&)$$GA+#dKc^!P5?0M&dTDyYGCPQ3?v zRwZ)e*9@Y5LeW3Ouo^|t>m{dAR`nDyANJoI*ndGHNA%P+eFB^&j6?|bp5A5fx{UBY zZT<<8f>Uw_NmWE`zN00R2&t*)Yu3%(uT!6(>Fc$v(buZh%~bd6Rce~1SD#hUtIzM# zo%6$|m*2ED@9kRE)jU#iCYN5na>dmPqT4>#wYuB;{EF49M$PHr<|Rvf&ekR6((IgV z+l~vj0rPEN^Uv7Wo1H(a(`jq#ncY<0Fp!A!tt>6MF6-*(n+x9Qo-?Q7DP)q?$og?} zuq~f;STp%@V0iwBqN}Qf9by<_@xL#*NqPUXs^wg2C=K*?A}>hW2$= z_ahLSUbJiG;C?&~b{xoUfSNnpF*lYPEVzR8m_FK%U?DLi%Z{1MKr!)>nWKc>F$VNU z`Gp~PI6^N`^mpyoa(oMqt{$)Q3sK~U{%93eIG26Ld}df*_ui+jy7I)Ot(|+H+;KJ+$nyA%A>i*YMKIhW+IJJKjIIX2}B|e(SCe53gDB;D`6Hx4pQ(Jm*VK?NpBS zxIWrS_=$o%PW_}<(OE@?Wn2qr6v&IbwkNUHeuQNzxDfAr{-=cVu{zbv+Zl@WSdBQG z53a8OqBW{V^TC$wBCe?0c19;{J6Au-h^=k1c_)p{I}4F{Cw4@=sL{ML-5>j?d=oPA zq!lC2LTu!T9j(GU8;v|?bIwYiBB&2}XItw*eIG5mpha}SRa5)gfs7u$Gfk>>V*8Tq zU8`?imA1?}@VS#@203Vlj4ny15j+-?$7(eI{mQRgwmzAh(;f&!{aVp&=By$oghP(f z%FFw^uDtEg&ewHDg?)|M16!%VOItj{+Owj;OUqy{?cb2qW}yXsRmAesdTANiOZzu$ z_F0I$w9JIy{Jw zj-JskO$PLVZS!x~QZ{*foWbO=1fAaL@qD}{P+8W3;`st`KT0-Zuo`)zKjh3T+C8iD zg86L>N#_^I@q8btRI~}>zs-~Q&rrvI|2SsB9$MJY3a(qAUwlpocue-vG8LxUQ2T5s zuz_LXoKbx$lkr8Z343d`r{123>U)tk`O}_Y&da?hNhqps6x;{)$GqcjAw8)=`{w;9 zBwF&ioFe&w)}WIE`+n{xUq3ahNZB@;gE;Wgy6vYnfdlgswx6boF5UGv1nTD7ri#`% zpdh}7g;4Ns7z)luzS^ns^qJA1deQ=fBmBCz&C76T`zM@L^s(Mev(6LaCwhHh8;bG+ z-@fqMJK6-Rm0=~9Nw5oAtIHPX-8R&_k3Wl{QFInzarhe5O$UBh-PXJ zBkgX)Zus}Bfy!U8l3$42IJ%=%&DlEj|6z<4`S`+v_uRClgAvR|x5Mld)Hp`#zLkOt z#c18Q0LN&3`!~DxeOHdr>R8tsnvBtcEg=y}z~Gj!Pow+|*C~Hfh)N-h{G}<-K<0~> zu}z3I#f03MCL20?f=r3YA zqJwv2^z-y2fmuCxOty_(q|aclKA|V{sV1k4LBb6f1o01G|0SXw5PrR?k&PKUF&lV; zopq2B{v?nBJ4@AoQB>6Pna_Q9|K;~>PG+m$z4{7x{4VEAcFf5x-qvaH^lTbvUECS7 z>Bt-I`1_G{OCR{~p4&gf$AjzczI1VmedMMmt6%={)$O6))fZn!)Z@({R!IXE_d+P- z0UGtS9zyvKic*i{=5RoaQ`&e*6To?nIIy^zv z5QQ}5WU`hlV50lv4-^OTBzwoIx~Zi5L>2yo8vVv3QaPKkQwT0ze(UBIv)jwz zphA}~4AJW1e9N**16p0)5<|OBVmPRfhsy&AN=kasB??;AltxgjMiwHFS7fvbaAtx^ zJ0dkofoKy5ML{S6!Y&YTf*_tg!!{7Mf`|n~%phU{5Jo^D3DF>=0!b&pbE#3DOH&pb zJh7lD)}n|3Iz9?}-{MNAg|X8gSKP3MfQG$*WJ*BPVFFTu0!lX{JVA;?lu*etsR}AE zH0)KFJPBS_sj`{#!)S#=C#kT$Ci7z9BtA}#{AvIGxnP@m1%*bMWNOk=e`Ey%jq;^| zpV5-1$>Yy?gs+I^@o&js`yzj2{4aItFaV*4$0i}4K4nDdzG#TJKmK8m{KKhsIXd!P z>Q0CnyNHkFIW^h>qRj|rL?}F`J|a)4Ey}#vf+Ct2IILP8hiw{zofLzmF*F8a+`Rmb z{JuPuccYGQqmFRn!RQ#B5%H4qaVInfKsUlVb?eABK034_I*UkX^m^%BDF#jjVXEyVn zB|a0}v-N-OXcz4^jujn75d~#(sWBtF% zbFv8MSiR^6vqeG12=6G9Ach;FLbe8v|A{4jHzsov#_D$of=nic?-G=|@MQ+^#a$wx z;i*=r;{0`~@PmecxHoYioLNC7W`#b^to$ZC{^|t02fJLSEJI{if_pHjaJ$th-W2=n zC$3B;nTyU9*huR+ogCUYxiaB(%E}}Lmu8YqS(zx&jj*MELdYJ)cd2`omC5h%2$AAx zkf1>vGqWWkAfg9-XtMI5Ch3DJ(NNch?#pwaEjO6EAV(#00IEf}W+gbzM<542p2*}7 z!{cZql#%X(x0Vr}GosrsDTC(n%<|?k6)J=B7@4eanJ@@f{_68-n%_^LMUhrn6e(6u z95f4u1#ChkZZ_XIHsI)}@IJUyLJ@Cp)s(=?CGEs=>AWV<^M$-M*CXDN^Lv_1Iu!B7 z#5xxgK1Iaa@J;07iw=JC#;s*WFdDrMi-V(iBaim8c(dirH!VCP-mO$#-xp4yz8WBF zNLBNs#<8xP<;KEKw^Z!{p247Fgi7!lFi!r0oQL(N~_4zx}x?zlRP!SB01A z%ck&|%cd!&UGf*v1y?Oue8os4HXj}5MZfCE^fl%C6OuFAm(2GijZdz;eOp^;%N=W0 z+_9~_dCMJ}7VhYEM+dg{z@sZNuodTXKAi=Zkc0Id+m8``aOgNT?2ZBHnCmk73T4N( zY}{3LY)i+h_|n9VZTfuCdzWa9PL!5ea|dVCBafrzaf*Sc)nqrQ#xyz_q8put+&!Wp z^=NynK~L`hDiWYqedQVUv}|CJSh%Z@6`eJEM6$pVJU&6emI-E*%hw>Pnb33tTt%@ zSrQ%Tw@p*>P_Nkb7wf^Ep`y_kI9$*TQ0`H;y$o4DRxT@kb1(^^Awb!mjR|UBtcq%P z3z0fRD%gT^Ot<82UBcJ@pr~;K*e}9vCW4c0-6kg{?5;@>Ds$9cZ@R8m5p3E5&VQ@N zMTdLIfA*UaX6IjK5i`A8@-FCwz+DC#bJsg4rs?(hKpX zV|{L#cJz|<9rNh6V?Fq;VKKB~{^xe-I~=v;mcQWH_VP^y=17Dh9-3j z%U(XTZ#UK!BdQbdVN+cN`W{hagd-W{Rxyxm8;X$lZ0|&F_O%jzdsEEx`bH2y+XzKa z-r6|EFAf<(@|;QnOT>P)v)*;Ff8sXd7cb)5ZbkL-$>T0JkU7~Gq;^YG@otckalHJZ z&J}WRU17apWa(0^cwLp`?t2Tw#YP$dG9k2x$pv&X`1<0v3pBuB*Lr~}JLZO7Z+AA_ zXx4BMQor?YXRQWp_fL~XHf!SV8u8Dc+T$^L6107YJ+}YyAv~7nGTj5_O_Mdo3Yo@1 zLnQ7ah*uBkKudcO+f+k3e1K zi1q-(Z4{DBcoF<%`WbK5PTz$E3AY`Y{EjlU4*X44503QIho{@^+K>P#$H2)w{WK)~ z_ex|9;J#~tF#frwmW3uC-Ddkv)P(?@)&&1q@t^QWP`)j*&xcPPln&1ZO)4#U;^O$Fv zVvoD@1;0j>ux2acu93RlJg8%+C4wZCBHny<#`p_{fM!K5__yIT?9j4bWk$5Qqhj*M zZEZ~xO>7e_b{0r}!Aoylb+~k za91c;jGPtoxC%#QKdCY26}PC)aSOh%ZDrS zs#bT&P#V?=R#p2#yF;bQ8>Gp+?x`haxh|Ew_aSs-MV&1jeBia{_Ve2ps4t|8NZh{& zK?0c(;;uz6r)03@B`B(DYLM9mP)h@KzR*mnl40eQO6!M?dz^;{VQKdcnCb0KhQPux(~?FJBcg9$TwJp zX3u%k4h|Y}NfOAwCWq`0OOj^B)im#By1cr~PQvR;dx#Y2SO=UvDtajbEvwU zaVvrNvLy8OAv^RNLkXY<$tPu_E8^kJAR{#D&_@1NsWU}Dd)Kp_PmOG7rI3Xh7s!_i z{ootRWC3n)N0`>u8n60d7E+wvl?cBGLls^wUgZWi@J-`Mz8Ey+UzQFwC`05FqvTwe zq#K5eDzSa^{POtn%W}cLfZKZJFmmmFz-u6P^hh7axz()zfFLGTuL5OG)n|lpXOJmF zrXnA6)g1V7or{Xo&?zaij~%Yg^YY5_1Lh$cmLntsaD-Rxl#0i{bYxaD6b*YCw=2DzFlG>ieED(n3u=4g0B2w;aGmxfLJrY^}m-Jnct^-=$ zqE$A>T+R&^88D#FB~xvm0v#}j18{rmDTV)GapFMQrvxn@>AtV7Ieh9EaNn(hIIzjrh{>EbTiV{cWdnH|g_F|8}j2W`%gYkLn zqiM%%6m)3jOq;o>JMU5hsO17LC|J&WJ!dbY_%eZ7;v4$oJb9Q7;pJAZQc7$M0vCe%>3o*^6?15Uto`~@v^ z1V6?DjTEpM7g_DPiAJh9=WD$q1ne7G5%444h&*{ughcH(kv~y!HZ+wBG@>{odzx6G zUzYfMkIaZwpJmv(aUl-dAjojV+;hd8nzpfJsyyin3@ke8H<+0ZY8=#E0V^)r;5T9B+ItkgXO>4tiTbR_^Ba*~V-RooiaZf-uInkl!4Kn65QGHWDWW zRYF(w}s(oVo$MUQ9W*yFN;$wRb4poyU9FatH zslm!b!3>8%48Y&YjI}H0V=NATx%=wH;H29YuR0I#RcNO3ZxVFYq&s}kH- z1?1chcPl>K9}}+MDYN~c?bB0}1@05Ep2H!TC8NqYzl^VCW{I@2BoGoW@FaX=H+M>> zR|uU`hOwABXV1*0FJ3%#!IO!CJ$2ENkhkPUhe!>}jKHi=TGSB{{Tqx~LtH2pYC+l_ zp*hHJL{k^KB63b5sCW+1z*6!bscC!Jn%8Oyp~f)d+D7Agi<_}Up_>6GUuM}|`Ae|P ztZJ2BoA=&%w^5z_!|;ZNI!BEuoo!8i6jQeMNi_Iz>QClYbC+j_@sQQD)m|V4kix_} zwOuw>%K1e0(N4dsh%Wn5tKRE%QR%8N%UdCm6KX zrO(YXi1{9a=hM}W2%`oGcHXoKlIl-AKBdQnd>fWrGZKs6D#bqp-4v zi{t2awzyu!8s-m){CIB!zpwg1RI)R*x?hEQRi5Qe7QXOzkYzF?3Kq|`Lg#b_*}ZzQ zXv+1%Z3%q)`V9MyT|i>}R2a;_3T;;g z8|vA{(bnRW%yu*}^vX0B`T*x`g*n>!N^9mE|CGh`I7wg{Z;k)nL|fr6 zf?uoF)N$4!h%bV*kTLAar}Nk77y9qYF$r4dESzN#!Eb#!1>QxLmy3lVyA{@l59bQ7 zys@)9I3oqI6H*-M0NG$HE#Oh{|$XU^H3Ye!StFz2I zagw)V=KGgdZjJb}a`3Ux0H#j>AZRr#L+2~{HDv2oT7W?7b;8diuzs2hRJwrGM~ha84X`;p|RiQ8x`9~^2<@E&#iWxU_8z=Z&jzEVV#UNU4X;oRxNOU|E6 zBd&$8oR%vquNvKC(CG`n_}B{)KX#_IzY4=5A2)l@&;ztMl3^nnTs@zI^GEd(aIhQA zJ3h`07{;G$2SY4L_2J@ChxN0mv-8QGpcF1^0qBfRWjIo+W;^NF7l=w?n;5R{c$F&T zqE_gv*uT{>VG}CF1!0mE?;P$*HOPWxy}E`LMrNZsFdW4?-8`y2VVkqzn>W7waho>w z)Pi5XL;+3{?=-vgo(UEBBfDC4OdM4?_s+U3h&fvPQew?!lDS00KV<=QTqoM%y&5(K zyg4Mk{)NHw|8vU1x)7$tf&Vw%rZd)Sheu|BuS$VS)Ou7%hTf?mK{3;YK4eOPkSgpx z3s?ZiOz$l_7^(RF?)@~uw9S$?(I)mA7EBi+DcY2mX&PhqWH1tqo|S^41iq87DO1l@ z?^CZ0Fpq*;swnfPRSu{v2&ml3o%?;gV#6q0ox$?q22+ly%bA(_m3=0Och<7mLrkE} z;#P62$2l+-vQfhK9M&kbps^s}tyrS;hjfab@8eTXKW1xKqoy6axSY;k^LqRzPojQ8+(kRV)Vp`55|V<-m1v#@ z`P&RWt?J@j*)v#qz}BblrNl@CX;Uz&)65v`d)I=VGJY!XhkRxn@6j!Mt#tuv*@9E3 z4X;|p%~Xj0AX;-A-Dv~yoS_-U+K*m|Un6+YPRAt)pvNTqrYU(425>bf-|b`yubgTkFR9l1D-aBa17x<@tmYHl4{OwZLz zHksZoa~`JyWLJm)XBfe{X@)+y;He-!~}u>$0k z+5PWsvPRRa7AG#p4l?RkN-7=Ymz3c-pK>M&`X7~BwBvik{w%NkT$hr49hV{b!?t#Q z;y|!)M#KI0OW`3;6X}B1G8d-zT=_8gTYHbkDX!uL8%M#>ye>KN%KA_!POuLZ5l(0pLRvzik zzHgXK+sV;HVy-7FGU0`6YCRGk@Uz+ZI0($%ZyxViZ~wYsy^Oh<(S=mva$UN`z+jo#j4kpB^#x#l$ca6d;pRa}C}&46zaM zVGV!A_sKtne?bK)M>6ZHA0p9G8~xyO{{fF!qDJXYL#TmZ_jVL>y_rRE*izAm7ZiZn z+Ee@PwTup)GHLfKQQ{yPN-NUy>F2iwcm+O!N*l$p2BT7!}- zE{k6CS0O?Xf{{oN(~R}tPOK*r{{D|0ohXQcy|}3Q`$0rL+jI?YT*E{d;-Z9}2Ui?% zy-Rb33J!?M%O&ucmsTnv+T4W5o}^`-`W=EleLMK~`3{Q)do(J&sV+{8u7G|@|qZ_cO)>VUypBe>zOQdxrOz_^?eW(?~O_j|{ z88BF*;;xGYp{z%kcXx!Nlp#OS z;*mPQD3aI9H}MTGleln!4R6*OLf_?)1M_HPSkh9=sCsU0Ka;Z|qd|6$sUz=aDSVfn z14yu85A&-Y3kpAK)tS8TY$g}cd`5y``$t*}CD}G~m*sE;jFGN=b*w9ut+}g*zXTA) z+-eA0ouCWZm&ogQ=_F0R$q-TOimIpQRN?74$dZKk%%d}dR7Xtbq&6Neh9tp3^q_$1 zgUj3e+m^cP)pLhMs;y@8fp42z=WuK)cO4;L1jALV#y$mK1UcqnB|Y%V)Ao$b!WWzKOKIuifqkuY#BlP8P-@AIaL{e;!f2v+c>m zpVoRh@{Aj8rO;FDen_)yJRL;KI7^DJ*=iPp6JNdHD{s*-j+WNeBC$SDT)MdC6EPe^ zT+h_8R7)$c1vxOQdUc9A&`LrrO%G4BG&e^Vj1qhb$Qx#*+@Q?KtK6exnaNE3F|P| z&?0`${}gW}Yx@n+W|$yk$Q1HXmG-Ad3cr&_T+T_vM_r=IDw~nQ+R}_rlT(f)+0cW& zWAN9kW{MSknRa4wQaavxjhY`{Ej1-y^+2LF1x{;j#AVq$f0AY!x#P|%No$KTx`%pF zOMO#OSskm8L{fr&`g_Vg)QsY=BVU&cLjUmpp~j$DnHtgpl~1N%(VTxeJDoI%O96t z7up~`RveXtfIXG`T$kzKcQ5?e=_+H$CmWii=vuE5luDGPUMkii6zgQsbNrif^zmqe z@Y$jt?=F;$$+EMUK#}fohKr7hf%9`3l_B#CG!R}?1P^2}`~9ST`Z3yVIQKHQOMPL{WW^kJ^l5Ue@d{VhL7;A+b||Q$&uC{Xk&j zmQ+)ZRHka1k_9CIEig*-80*(ZI76G*CG@j=ryvX6YI1hj5t5@fx2}k~j0_TOEe$}_ zO-l=llm79h#kqppWK1kLeC$y^YsV_SvrTCI(ZW`d7P^J1^Y7;fX6Kd$PY+HfHEPKXTx9z}f!>VSCm7LgkiQtUu zE|OSp*LB}Z4%ewy+x08w+M0FG&nbpwWcqTv)y3}Tmw>m51YDzPt>(~5m47huVt(Ar z#J)3eagU18n@Fny>A%J~W5mKgLbR?&JnteHG_Wi6EtYY2@AQ3XdGxSv>nQ~oxSqoW zflb8}6vf4vtrtlw(mXVj_jczqu1BZxI8DEGhg|8*KqHf$KXRsJi!g4UOm~8kF1((O zQSD_&TuLvdU%^PSV)uJ|CYYoDcFc?({RT)OI!V*&OfhKRoq` z(TiFIF1Q0H^s?2L0KXRXlZJ|yZ0*M5v;+WQD9X=>JIJn|B{i-$nqyvFyqfRq zbt>X3WPY3x{M)5Mb=owzF^FF9yPT=`a`>ZD<1+f(#=pOQcqIalPij5(wFN|--q>W`Ix}iI+m38$&dn(XP>|5CtA4=832D5oHX^orV(; zSWbRY>W4oBOT^?}^KX4RcoQ6oz&h)B2;I&1JJdX?)!O>0W=GJ5&n7~ zg!V^LjYx44AWle1CaU?%aA_77!(re42uF2U^Gs(OX^^&&{Ru&}BKXX~4Y?y@&b=-)5SVdO^26AtZ|z1cv(^2D=nt34 zPgW^;#f1~s6o^9%KieDx(|;FL4htqYwO#km%E7Iw~WAI4xT!==O(Cs(u=YA zGn4^L$GOS8(i7&h)lg?H>FnlMTHQS&+#Q$L^E$*oQ0~dM)EI5dLZj#`#7op%^Zwi9 zNTSLbDz~Ag{5~UuuaSjiP4tm`u;Mc-Jm~=IIqI8qqGK;TIPSQgmz`W`j6dF}58POEO z(Z+46BiB+m$nYO`xh`C?8AD(5jM+d}JGJlgs?Mb0ud*)xQfniT`vMLZ+d!w1B= zbW;%pTN(u`<}QkqO6QSbZ+r&r)%2Uo4EnCVaP8&fz^B2d!wl#KQXZOaQ#B_;GoL`3 zzWA*8G9?PU`?J?fu0h8P#J|%{hmacJ^UBI|qG8RzXlIu~ zHg|{Mu4^a!ua1}pTtg&{kUA*(RQFdNyNUSV%Xm1T#{hSau{&D4^D8}9rQz+`!z4$E zYJAG>OT1LUs$y=JVIV0Ce} z(Z7O0ub36bF{)S%yTz_bW^}CPq(W~#J!ht~j+3Lw1gcIH#yJbTQ)`l#wy@W!^vO`s z#&QgwfPkafy}w8&S*`gbmy)B4QEC{25Z4>BQh4q77P&9C*G4>Y678s?YpGnHP^B#& zUMuoT*7zo(E3e(Y*i@)qzt}QtwaDfrfDPpZLs9p0zqo#jD!@HJ{pq&ij zLC1Aye1R_%#(asLqegdwBYR5w_Eb%a2Q-9Of^fxTMeSD`RuyGd?7xa%1_I5RT*B3w@%0d*i z)7|Hb-w?i|z-nU2T1}2Bwl$IW6e3*7pwqL_z1y2Q7k$zxJj%>^ZXQQ%uQo`xHKObB zhb4uGJ(fr9dv?{Gpu&v=+Q$HQr=TYTo62)H4I5pt6;zFngfE{)`3LOb5&;CjNoD9L z!)d)u_sCA&pFQ?<@JP%s8ymeQ$D54Yr-!_!?H2~8spBs{39%+0B~Mih4r0pBs_Wpf zk*KH#iHn$a-i?uh<1IMp2{&m$Jvk%u-8n`YcRji9G@%^>`x*s<+E@uAG4jGZ5(mpi zW8PwDTBJ;rooQuoNuts(%#J!@dWv2f>?SX5)MKyx``!gbstmL(rC87hV~xRi=U5?r z-1h~EeWpGX74tFmI_{+ieM^eM<=%1x2u&e)@2YQ&5}YF%9~W2aOaHpMqmQtE=L^F*XQ zS~^z9%S1KSwchf{YKkF6AIk;o6Or}prmmk*H%l2K%iZ-4lefj!neyvc=+2hTn!ohy zEaAuN94s#^1Qyk2A1q$y9{i8F7wTqsW}a0wOjEng=3Sdzr(KraKjU&Ha;Ush?>5(7 z^|o~1rQe<3h2FKnzwPoEX_cPoMgw47ksrfjEDkVjJd zSeo7D?i%RWTg-Ol%XD+Q^jYHB_OY&TI;GI)IkT=&+-ULU75H>n997fMyWdyay36pD z*>Zp9Qv6)jb!HP_6Ky!Ja(j?$C^i3deRyA0Jj8IWa~^Z%aUOD3OI(<4n%EF}yj;}K zD{M8p%uuY^@V|(wz14Y)pzc_{=yrdES?1FJbTN?JzC&EX|8P|t$ho@O=ybQaTG-U} zuHEA6d5rCPy!5rW61o(qd)z9#d1T;f_Nj5%;`Z1(1^ zzwO(a+jG(btMDEq?GeE?FSC)IGD#ocn3ca&sm8{rSegjz z;g~zsy>YPuh^80hJ6++7x3Pm9=VR9`(smUuoJA7LNXbnP7@sh*1h^0cFj7dZ74} z(SHKd&{pi<4AV{L|F8Y;)<40+$*ATbOBjw47B>O-Ggs?=upicin7p_SWel~fM2!9YU<)c=k+(Q z`nO{>PS~BHv73zTaz1B7;r==M-7VA+TkswdFU*nnJ#HxhWC`xQQWmLXb+X7l!I0md zIXNiaarUXTV;|FYw$>%tIJv?_oMOuDCvt%$u!}I2$cS*Qvr-M>ofn_2p?L{2Wj?<| z4}7A8lXd{nqf#bsUX)GO4F9O7P@}`P3^8@%%qsV>96_Ha;MyNvBOEXCwr-9Ea%>6d zPRgsPfbhf`sR=jvZ2?Je!XCqvYH>_fGya+PudB!piiguuXS5NQS=`mK5=P$0xV7>q zEmH!?a3KYiK*6ijvYu&wx%PFn3L zQ`&v_K2QD8a$f4e-7PEQ;fWvbA-bb2qDX?LVNO>3+Zwu!JikU`E5gOag8K*O zvpV)-g`+TPZ!%oKk`0O4)H_BPO?UyA-GTY%>g)?EJC~>P6A!n`f+BS&i_I zy{_$14+kXE)D*ZuHN$Aud)dl3oZQ`3(>}+k+<-K3b80I5n9`xNNhvnh87b$vG&HhL zk%xUeM^hS!*F;!ky-s(fu%2XF51;VL4wC9=KPw*XL0->~?;@4E&t4qC&tK9bxUn5u zNRDLe9gbKemi4eF$(Ev~RPO|=gEbq#GpB5`vRO(*;&_Eh6Ln_r+(=%Qq=% zK_;dZ%bc8iK6scW!MX~}5!igVjJQE(Oy%75DR4H0@?B`XxCIx9_3crWLTVcxZ2Hmt zEZHNe2~?LSOl*W(Vwc39Z0=$;^8JletsRrv&L>()dM=b`0{s;|vBs8TC^t)b7V8f3 zj?`Hm@8U(tO^xu^0=TiMdg3U*gM)R{M13|w7p}{~ZkiJh$Mph!oVKZfH44M%;r>gM{=J?0S7Q-(4yUuB z2JKbPRe3Z2k3|J9jcsoG&3e6j0RXj?2wj5B0INhdADO$M(+1HYDUsLbLD2^%ZgYR6 zR-Cf#`J4;~VWX)BBdRTcRP&Ey;UMmUaO|0se!EY_4qYcV_-_j3T&XCz8TCkc&rIx% z6|?*mqd4mWGve^E2CG>(7^x8yQcd_FyKl^L8#jwPN~h;5hSgFa$u-Fxb=9ux5)-eH z{`Zy5GaEo(DyDZ(*e24rSjK)ew5)R`PJfCxwJF)PAnuJ_c-+so)w2Mxe_x$sS!n*6 z?zXUG+|)Qjrprx95~bx}2HB)eU^|rXqY)>03R_9Gy7cT9)f(bFhr;vV!#x>&E+~_R@u&=PdI)(gXGU`eC-Rid( zFN+ug!r{lSjrc`-nI~D|wy(zoYmRzZviOU?qjeZ3)odi6qNU8kW5WFe;%BNRl7*=j zDF#+agzom{Dj|giMA0qWG?*bUE}HOTvfn zPJ3zucgB#o;pHW`Ad{7=`LZT5c0ZqC5rd_LU(o!+989jQb<96S>?&|S$hkHyXU>mv z7w>l3Rq#;n9%+0RX89vl(gwdK>U^=E!bt2~Tu_K!^F@R)xILYf)C&<>vKuf>&iSe3 zo}d2;hR^UHRs24b+nF-|9uSu9s|wK5RHM4%wb5&t7P!RL`lA=OZhg=9r^c!8+yx$M zWUy$93UDsi;hySS#os(Hc-k}#`%cthZ82qx?N#M`Y1y;wqR4F%IC+tNn@9&a=WN>a zOR9Sj2!~ZU&qm{-rOVl3OBGw=tk@nrb;3f7Yi=>OgPyhX9^KI;(qO!B0@BP5WdE_& z%2KxNtmtj)7Es1#e7_cAxtz8^_e6CeV!s?evjL{WV~Fcw9aurf6+_bIHSXEf9r4EIL&=n^9@{%)#QG|891av_fb)I zXwnp*PLeGt$@U9_(QPj&J=bfzTYYbY$lbnfH$*I8@N(zfVCyH}hL5F*TRg-jVC+)_ zh_dMf^0YGpy>)P06_oP8(=e0MW>~pIv8EF)M|DlEj9&dP|Qon#YHT8)We-ZaJWVlf7=Q1JkV0 zyOh9Bs2xAMupw~+wQQTqu2u(!iY!@|+x@9;rEyYU;*TSC4FlQoC`7LyTN2W`NtziO z&caSTPwCDp>BR=4hfH0e*!L*V;qi|0TBH)Be%OYuU+n-TZxR2Od(pO9Xqmi~03{T3 z@)Qs0{pdDx9fcNG`otVHF{tT&7Xv}%vINu3F26t~NTB+)9 z3@y?;#gK=^cEZNLT^c;lbm%?1NSlsH8M-BzuN;gOAQ6qmA_gTKB}=3OS$`-E4+$H5jE`}10p2lYQT^&Tti2bKf%8l{ckPLd$xp}G(LP7lE zk@Gr%`Yy4t9lkOUY>#c%M~ZQmyZC8ipLVy3lD zD=0Uv@1d}rimPEl5Q#<#lO>eMOvtsDGO|dbLC@=9C)dp*4gVR$|FHIIR;ufPLWxIP%vqEIX0%XRI@6EF?*q@%rj4pTncRNE{ zM*|46Ub8Sb|8E?b-yVxbkA~r}W*-Md|KIWXN@Q}pzZKAX)}U%8)UXnj`Sj=Utf4s6pKI=OH4( zqIO%zY}8(f+QJ*^<8iK5;RhAG_&<8GT<8K%ykQX@UsnuUp!I zUr~*uVNvZsoxTAlOD&rQ>nFbSySs%a@VZWBc$yMKpgoAUa$v*1K3dXnwqoNZpgj>= z4^fr5F{lT&Gerg=J;0us5DzEpowL6ZbVR-9p1YnSI~=lmBfK(t$G@X{hpW^-I$fW` zF(wI&Vb)DMgIDdH8P210h+pAb()CATZU?gB^rv+P?6)6oGuwhXllUgQ+X_2#Us-!H zy|a39ykn6aKBIa9yfcORy9WFNUgLYhUX%SJmv65Aj=gofp?y61P<+DpA^L~vkMxYx z9&H_sJRv_-J|RDEyh*+>yb(WpKQ+7sy^Xx3?!YLD)$ixNAZQXnfsGBG=>gr}fkMz= z{h$Mw3G(*)x;le&Bp{}NCNeO)3^A}6lRn$PlLN2`33LxSNOjo90SLkXYXc854W1D| zZvp_nexiMLgLGKns(~gvki)_uXKD_`d-kgASBHK4|p0U%^QR z&v<|wXfWFwYR*niuk}w#BKn5DdJOur-g-oO27@-le-3$IfUkWwj3A#c^gZ8ziU9|= z29HkUvM-rZtY8cf{THwmh&{+u=s!RI^D$Sz!G^&z3P2D7$P{pZ+;@{|;2GPNxX5N~ zg9pG0IFJFIf0e}twD&o20XDw_^&!DweK*n|pRe>=U%>MQ&qQa3IBpqippI7l2i}@2 zf!@W276)B}wy*Rfy>k%2j=meS|BewCcmN0DhXU3H98CA#v=}^-0{p(A!EYk?-`=MG zt>FLg@`EDYUVp~lwwHX826e*I*FvX57(7FPd~oPkkzoEi zVO;OccYxopjnRBxJ>-9xgP@n}okIlP8hGN+|BG_Zz!QyL|0_5*@ZhTV27Rf|Qj;nb z2`JlFPekwC=R^ir>zhOV?{?fkoyhdWP+)X}XMTVmF}-5Y0r5WtzqI8bWwf3E?59Z| zRDgP*DI35o(3BXU^`F|mWOwB~q5wX~0Nt=aAxN-(z(Lp>F$TZCL_%**>FB|hhUyJ5 zHC|smGJQ*6F*?0@pA#{FDAEL12xpiTg8p9=U+FvgoY(-Dkl=TNXA%Gr#DA%A1^I-X zm@Q6tamKBVGQxoUK!2ctts&;MFyMcYLIS#>f%ZWMSA93=oS9LEQElvQU;c0gtgHXy z`{oMx=cX0{yxx053i2VK@A-E}fd}gb&-Mn-k`IuzMv6BP=eM1`ewq4X-J@I$wIr0w zq?BQv$bhK;iW>@lIsnmEu%z&nwSnh1`W}e?{CK~ih9rIl{)p|7Oz0OX2K4n5S{!Vt zeZTI}hz=+wrhg1LFa+_#0~G@gXh3g*fDbhK>E8#X02q!0dil%&fLVYk55O(xfc&3} zgGqzut-c!!z{eMQe?*|a$=V7zsOp=n%|x#QI{p{%Kj5uC6!@t3rqtk>1?)Ih6#zEh zOW2!hQwI@z1+#$uKmkWn|6upsuz>jCfcC!+nEoZS1iirlx*>p{Q}(~8pC;i2uEgZLFpE3&m4>d=7p|A-yq

zCHDxkKmcL-ZeRg|Ux3pA2a0_+x*(nH&ph_O=E>habDR)AyX)gVf40SZ{yNzaQr_(S zRPvPo0^!O&Lzc6F&F&3+$mXT z#F4>v`}(&Z7iuaC4)QM(iW1z6nQL`G@B6FPnFSQKk4()Rs7eAgUJoy4mKyTfHPIz6 zSs`oI0uXZZ(&5-&%F>~Kcp(yt7IPUuLydhkZ2w#Vq&ppWs0#} zq1=^GF&8XU`74#Pu)O=|RdagB#-HY`pbFK)$JOWTq|Ur4)np`!vvP@XP#{x)N|r|u zw*iJGAN=7SNaoOo9QkJQ@+MynEHS>74!D?o<5w3Yl2oIw+=VdF$2GX#9fL#U$=6`j zDwaSeW{S7__7ylXb<199gi)dBO5pVaW~{P2LR~tbvo}Unaoj;6a*^`HH8Bi}KlYZN z|1toEXa;4Z{QSJCk%0erv22@gXlNM=Ig1b{uSVpna&VcA|L=_5Di`QAIQhOv$-;yT zHFc!#1iT1T1>F*aTD&UDV!9GlS;a|Ymn5-!a5>k%_6Bp+e__q&3L8OTC`N@)dJGJS zVkuNp?qI`8?L4-?YBE&L3K4q<{-qc-guRX#oq;m9)jcEXWENui+F;~C!Pnn5n z6Fas_sZu9|jnhrR>9`A~#Od50d=NDsg@Y;-|I3KgIYL=Q9) zec$bsuG_>Q8rGtTZX%CP_-xWb8!mL$!x{No_KTnpEz;OaBG$O>#}Er`KDPt~#t3<5 zprhOr5Ut(jSjKJuU4G9|%uF>H{U3$Ry5`_-g`rdS*1snzIQI+l`^l{X>5nri@FD&FD5Xd*i?r(U z4;Z=7-U_{&lQ?SiIYPM*SVnul$8;exS8=<>`v$06s)d8)xDXZq;fGiM-uRGB$l z51zvq7L&4yte{CFr~bX9eT5y{=1T)vDBybW<>k0O@qtsrf5VJswc9t6su z1LQ*K7_%Joo)}NxKp%7L@A2LlCEOs$St-bVchGORK?Tq@H5(t?&uMB_56EwF6sCbeG6ChP&XSTSZ!MG*BSsG;#Z5}GVAq*N zHIt|xb_P7CTxk$LaF=w~XR~Ik0>qOrU0rl^RiJ5-QSBkm^ZpNIZvhlnutp6AcXx*n zoZ#+GaCZ&v?he6WkPzJ6-QC^Y-GjTs4tej@*1uI-wOdm&^L6*9r_Y_5nZDH>Y5{VI zJOkG1fQ-k2UEkDBSuH(;L?U1YP9gw}3|NBEJVQ&bHs&r?M8^1Hgiey`C#SC~9$c)L z3kJ=8OO!Q; zqGkL>N$Mmc#A8Q>3saCS64M8-5r`u#_Q+{!c2ovB5h^GQSpz7lz3CF7e}A#~h-G2< zD&5OMFNA;)Y>s+!OH>U>K#gVSw&^>LiyN5>s5;Q=3ci1+$`%kAsbV24km!O8f>&jc z#8VE%kl2F!BCg)|heLi3`VJvRxHm>PF2J0{aR}v`8npj`Q1O&WC{jmLSHB;pg_#+= zB?lrm7ngx5-O6;%GsR93dO?!FS1LBxg0n1+QHVH_aPe>h=%34O3{Eh``Q<~Qff|g# zR{duVaeayvW*Jo8Mm!=Ep&lR&VNL?Xw*?)sjx1eMMaTfjkO2b;f^S+Ig2aV1wISD5`+l3teHR;p43 zZxf1P5|rE~^Y4SA5?o?8ro7~4TJ~>a`lblh;L(ep_>7fU@+CBk9#beX`q(cOd>~AI z>Nqc!Bn5oH$QS$3%y|KsjroYj&zHy9%4Ui)=T{&g7D;UH8;vyAkv^QUwXwCRoX(d; zKa{Cq27+D#jqfd}rQcs{I>RLFx022yCDRKdToNc({r*5l_IaV(6miOn9S=l*WIOQb zVXOB^{@e;F{_ST7d-N?S3PeGq=^GdPEZJd#cy>4X@g~IG*tf92Gcj@8CmTyiymiFfuf)kyDQlDPhPlpCEDEh{z+6`3ZBl{1Ml19Eu-ejd;>2~vYT zMcEa~_y}_xg41p%D^?Ih9^_^cZsi3%>KNA*h%Eml>Hq}9o?{E0QvK-qS~Dt2qSwzO zNjp>kpIrP#2r_Dc|Jw|rd~}ny_;%y;dzPvI_7{AkNC#w-m_B^;j9mP@Sz<03+~2|E z24Ve%S4#mh`uo4=e2&nQWC_YDD=UF7#l^)$c~btMtd!w{!QiN22$FNe(yqet2$d-m zF&riCtZgLFHz_16{GsuP08Dt8%3d3_UD0LOlMdRJ^zWglMOO^lX6e1Z5ulGa^sP2A zz7Ua9g-5SZ)-i_nK?pmUC(*4IuNAMEF6S(^)HoaWo*o>m+>Rcz-r{mCs266*O?@?k zD8|Y2h@peA0G|itLMULK%ToD@0r=LSAOEFDpol?QOecJ>pHc!9rTt~nS0~yMm-MYo>1_y{z zi;914=|c(dEC5pwL@9Wk=4w|UkA#mwn=E!cm zx<&Nt%YIfg#MEJoKXOWee;)krKP-s>1S*7Grvf@YO=a7Xc z34c7bhZB<^FN6uU9W&G9#dK4QPVlOg91cDUEPxZA#8+x7|7cML*i3B~G zM0mysHB1)DnJULZObUVlg#7oj{%!IagB+%DXqEO}p%q&IhJk4gSiE77_okL$;Qo_Rcm!$wzz`|US zqU49*<`Ad6P2pvww%9wQ7?js`fT~zv=tR64+$6!YW2@Q67v2J4t6NFl0x2iEB*J^$ z6hg-aU+KQh;i~7KWeAt~HAt_2u-FQJ11?oue&-dCx03rBbQx)r=#inITZ2xgi3;ca z5YaK`kqv_;dH+^I2a|-b2@iz^+aA*#njsQ)?W2;xl%&AouJ8MV9rpH}={d1x!Axk5 zjGA<4)3uV&Gh?QGFuYygXHt_Wy~2atheU50ZySw{#OrlhhzD_CdoNNDYt&b(u-<}P zJ@6+1ojCT+MnFG!U|DZVSf(8D@b-ac^D!fyi&oZ zba6Zk=x3HKdps@dO)g8a;wvZ@V6TR(R9L~7(W1XRJ!xvKp ze~n~oE$ViIWwEGzobzUMglRV3aI#E`t$MKPn~wP_n{$Qa70AV4^T;4A^28DgCq7}* zGzcFp1dEV}vY(_%9?UNEH4akaQ+vHj@r0-6M$wH!=t7F6lIc(Kyc`L6k%6l{RvF0i zNyn(1`z0TpmwM#~CxCe|{0=}jODyTvn9v%m4s7~w0+Gs02(il7?Yr8dD*3uO`CIZ^ z3*jr4oZ3*CuMCo4b}&1_IbS-zp#s0aCIQAow8gKX>6WkQH*I@yp`+eYQw)(g>pJ70 zi(tY_`jmVw#d$2q6cT9(@Hdv9I^axv);M~b#yl1u>2dIL+akHm&kLXe$`RkI^E#^e z&(8Go?=MIYK%3DzKYWfrYmhOJGzp zbv*xVU$9%catisB*Nmb12ui<-At{;UfHnaI^miDtNv_7X1ODU8A5kB0Py92K#+J#d z2%MyuSfvac$-LOV+mw*Y|zt?=; zWJPV8(gX{klpaI|Duv~b+Tv%VB%^dyFUJr;w}j1Jv*HH|8?nqSavTyQHuG4_F+3i# zD>Rv^7r2e*Vw&y?#v>*s(SIuoE>v~?M*5prw?EkprnatSTkFYIDV0(Cs&V7QD5)eW z|8~DVr&`zA(32t3)~UY<`fK-4QdJ=Hr=Bsr#!<(%^B1nRDG_lhpI+Z1W@q7c|4v~l z|HgQt!m7`a#?0g2RsaO`<-To2$i55iVNokCg7(Y`QhM{zd=fTeKYR8fPryTVn^x4l zNuU@{R;(L|4S9AG3e>?ey83Cc8;myVI>L{Zi3T@bd zZ^VvDn1h=`Lvw*>j|8APqyZV8sJ2(pQS8jK99Zv`ySQr=onl*ZFc>)XxVgj(So(uq z;r<}G_a`Afv*Kg7D30*{@2Jpe(SH5m;GpU6)&3~3?=onN&;(~q`0z+*cF?!KxObm8 zu)hM7EVbN*YXxTwKeW%=GyHAUXfB{Pyfd1z21`lig4_xlieHBHGcM>l84tKXbKZ&XvXTQB3lF0%SH+_mCU^IhuqXt=+MhY#v>Z0)=#EPg8|?j0-} z(0TeJmU}v{+Hs09+YyJ0fTJ0LI>S{LpJ}hGSdI{kwR5N}{tXkiom0!=s9B$?ScIfq zknHl+;(4~!KF+wngqwg(vTmY5_ozXZ*||vM{(9jMJ*e5aB|@VElcvU(HwiVN9PO#% ze(yxG@M;|79OkesUc2ECGX(By-3e6PJg!HQxEa|+#q@P;M%fZxiaN`gE{CJpP5O39 zO`5r)uBW9MxRv;GO?XHh zn8pI0XgOL=1^ca@=aXeP#DOOkfHO=}@dme<#%PW=Qyn5TTZED4N}B7~EoEN5&f3w< z;C{^ack34iOT3xXuR4Tq@HHNi?;L{fO79~yQmb{~Eb3qT5HsbPZ`z3(QZ>Yu8;+e4g$Z2zIIcXaqme{mPE1+#nEg{y%U zbG3;f83w>e%C3k>)y!Q>!@_~tf{|wbgNA!Vgf>>Eg;vYM#Kl668N_W6oJ2f8OR8kp z2imnB8e|MWVR`A)68b>fgbPsjAX!yQk1Basrbnz&N7U{qFbeDU&*Y~0>` zN3W!;+v&tFuUvjS-*gr+>s*{*T&O9#igxeIS9hHlBEh1OtUxkUkyhw=| zNhszdWqrvdL;f8@W8^4A{<{i5LGPV5*`Sk^e&h9fZEJ2&xl$)0gMb#6=jbCdl|qm0 zzON}KIjEn)r1JilViX6QG@~bLE7*@i`AFU@(dy=6vEP94+5ZRSNVVVAcNAo86w^M1 zuf$0oz6*Uv9Qc9t@1u7NQ}tF83n^r7gXU*CrgCMLt(e@dh}s6owIl3lbgri>{dYLV zkd1@qa!5#&TA`K>rX%hL^@GcN8tOhH6nan9uxVaxX)k=``JRxk_rv}0)YU&zPvZFK zgIuBwxij0>8g>vXpG6lue{Y+3Ek+e<;wtEUV0#UWGwXYO7ciL>_jl^v)EgLeB z3-wvoK3)WovKrVgV6@JNiI3!CuQjPK);JcM#nlzpK(H+san{axw!LUoV|xd9t`u-7 zts6s?Y$#i0EmwA)VU)jPn$sRa(q9D)D(&an^2JQPnLTk27AkLE?jMd1k2_+ut>DGi zUpMuiuKM<=5~xJNCC#+@KA@w`aOL~asFrM7yz;FWw_l$i=1=mw+HySHZdEcC7@s?r zvJA@VO4X(%c34!`{_+wvKR)IpL4dgLZBuvN;DZRf8Syz~$`AI&{reqJr$uM~tzz7x zQ48Q!OCjexf35ZEA$XB3?(T_KD6UFwHW)L|rcChh5cm~fCZJJJZ zKuGg_OgGob_Ykw06*pL{Q}HXT_bi19#yvS}$)Jo<_G)>*4|}|rnCr}} zC1}AwM9%$9j=iHOKZu^-W)uSp5_sI%Dq1+f+K(FoIHHHcC4XTE6l!eu-+2KlwRwYlEJO7Att?S!E zC73R6N*eb;S}!N{FKD`3WUgjZ?2p>GoZfQ@&zEvKe}tY7a}+B*)L^mJ#E0sn)mKIy z$PijSUSs#)cogqG<(Jn(KC%ub_?p3fFnCK$H~AdGr5^|mk_JdOhrogv-`Xy}GhN=I zB-@)xSXHn7NsNpLayX$g(YW*RE@@R)wVnV?f3u%XH!RF_)mm7Y-K`?MdVgj*dm1j>^Hbgn|;R%;lhj$ph?ZV=&p18<+f>uBkE$2 zT%NK#&CT<@ED=+bNhieyBiuOZN_MiuGG~`7SI@#(ne%Y|PFGQPDWR#{GYZe)ap^wF zW-~zP*I6N_T!|y<(`>6S*j$=@Z;TwE6(J(f;P zw@g;!JS9Z7F4E=kDLSsLOWh16i-n;ebhF}eHdLN!9rYphGrwl3c8Sg$FDChPwzZp> z9!w6`|2Wy7o`O`@o=|MtvbeoF=`jg<{o;udP|Fw6UBX*Oj9GuMzmpTxa2P}@=O-8( zaeCCrIC_7)+a4{$ODQHFGWcmHoSEf;K0%che|rDnaRl~?>Gs`D^{Su`iSeG9si$X+ zDU5*~=}KgctS9ZQ6VB0B$rt{&yq`;k#E&6C)S8$;0s}Ift=K@3K9f97e4dHN%mxj? zSpU-QA)(&4Ttl#2RXo~g7;&q9j4fm|719D&7CfO3B+T1=cmC6T_wA7HeF1A0(}M!w z5&ZDIe#O_d0QI4ly`44L3w5$zTIGx+xUJ^PhJ;Y(W3`hTcFYMG8U&}NS*E=5Xx^ke z_D4D6G#0y&)QO}}e1Q!Rs^&?UhqMT?BWp1Sr<|Sf9n9|qGqn3i3HRnVnzFudo)$7} zNi{D)^L;z3hsq1a8a&6aEjwoYfZ12##%*~M>79o6f?3oL1R@@xyo+;7>@8 zo^lfHrK*N`k9n1{A1N0dwKkY@9)I(n(I_%An>Pz$m==cz*UaJ@W(t1$ocW7;%qC|y zn_}kEMThcu@f8dt{NMtYo3B1t9U9-B0?&8kn|6w=$x}Hey`-HiBE6db*F}?dFrsWLR41(e?F6&do`}^TD;ey;Z?bE7O z+VY!wg{q&f6GWm?HCoxNq7(5Ee0%6p;|}rTyT$baN`EC?uvR(Bl~~I=n|*x+wbKT$ z7IiEJJ6g=H`Ofi*V7d8e4u?}&we!yxP28))YlANmEa>!ZcIFbh>)2T~PNr}QA!7-9 zPR|GkEPKFQ)z_C=yVi5V8|8CDxd(pmRxCzo8r0v)v^+FHg z&k#*~?T0(+H;WqhE%q%RHuT^K0FEh;y1L43atue(OsMJNI_HHZx96cpgZj-LF1`NC z6qWJP2KxUZF_%a?7%At8`>j7ohtVQ{CU@MfZwBN1-tIuAF_6} z>+-cb5J}IreI3vJz=i(yp9;p=$B3pd^Km*cJeaqGS-hz0^1>QuJPg)IEaxEpw_Sxi zB@b1z#Q~+x;+1p~{1#R^6hoMH4pc<1Epn&#@L%I!e3Mz1_e+IA&#a%vE|d7R6Y3}T z2#Yjz%;$JV&AmdsE+>!((H%3x0H&~9%-kuslkA=At%fgn-M3y(YPx@H%Ri5U0Ss|o z8D4l298Ro`Zz%ZA^Uv%)1%ecezP|p9Kh#@I+&KQdjNZ9Ct~*O=Ui@|+=)eEn;~i(J zY?m8Q_|&>^hPk#aIXhcGd#mI1^TGW0tjPKi?8E7ab4BJR)|EY%zIzTU7w}gGQQ?>H z0sVsJH{S-7k0<(-Z&nUeIh5wV>!$Uvd$R1S1BZTh6XMrKN~@cQnUEhud>F-Anlr)8 zfG*l0pu;zobs~Gv)mp9G5;aH{l~Yci4HT$o@Uh#;Obj*O%tU-&{VB-ln0KFJAMFWl zTe=XmB@P-?$z}eebS|T8(k8uftP`Fq1AU?Fu8bkWcez*c+`A>J;c|8N3_?SjQT{Qv zi8+0rLrHTZzw+{efrvk$|ES?Ri&qn*2D?hjWfSj;c8||9UFK=?^j>`-+yQaAOC(LU zWy@8i8I$VBR@7YBt~-<5$?95;>1&_F_a+jV*#GC3uG%wVM%3B{TNVAqde&@fM0+C} zrHX}|u3NS1IFs(tx=;M+5W>vnt(n%u{tQ-mS(Q-;dfmG2e5ZXym*Rtsn1$pn*S$cf za5KR)TiF=_yC6xOxuni2dUrb&&eH^u8kz9a2Z)w5-Xj96v5k?VlY{Y3>wluHfdv9A zBL^oVF$3{GS&NuKi04f1?``DR*(LcR$0{sG`03}8SV5Te_pDK(DpV3%8?HCz> zE?C*vfo7lXnLcB({HuV8oe?;~#0m`juLKTY4#4K85(gVliSx4vCQhKp%<$RE%>3EQ z{#gt&3sC%zE;9=-9VX^a-^^@4hfK_$xiGW+OY5H*+h@K1QD6r`$j-_6KU0o>3H*bW z6FBl;hn$}Y1JeR}26|%otcscQAB2pbBJ-zH7UoYl{w2%8@(*VwV)lRdv#@?fW@ZM0 z@(*mbf99Vy9IT%c$oiQn5MZFl#_`{I;9&i%;XfnnpY?KlVh{X|`C0mZ#80RH0O9xq zoSA|7zfX4H`)2{1jKD0Im>7ZiG6LuD6M0TnV0r&B0!|Ik3do}WFHap}9pe9ki%kDF z7eD#O@Gl0?F;HUojP)<>|5Ezw1D5ic_-A#Wwfuhx`cLN*kx%>oJ%CLAAICr0{mJY9 zG5V9s|MAbr3DgI2_kXPY|HsjP7C>5l^6`H}{BHyxbU^7pub(l2y!GH{we{B{Z20&)~j}pK!AQ%1* zc$tBb|9h#k0)hMg^Z0+)2afzd!m#`=3=a>zn7Ngcu>-xB)lVm55o1GJBV&4LV;fT^ zGh#+2HYWD}Hh@L2vNJRB@gcze?-R-`Z7mH(`_DY#!=(Gc&rLBA!9oMO9%T{bwV@yo z2nA-re#oy78hw5FYt%Qvi_*Q8sb|SE4%Ukx;}I)4#-0rq>@=&b<`MDnFgNe-lOOol z@?$m+?GJBlq??Bi{3nk0jv1x}f#^aa+Xk=?k7Z%GqNX5&e?cx0G8w4yKOSLL1A=0( zx^=X}+nYo}spw%*Vssd72TozUuYFK+h;qblQxu%- zMRy+IVjCzSmsyn)692qak!R!ki+5*9x&`h9zDK?u`xL58=-N$}vm-uYp1TGfr7N*E z*6saCtC)lX?nrLPK2Fzcri*2Y1-4Dt=Q=^BCqHwT4I)ZeiFZEiC~1=DyKMg`O;SRS z@%8q=-SLp#>{&%S;u1TY|DxyG-Mh8pH3^y20}iFp--0>-`BMGRcs3{a?VFIiikFMe z&XMTz0xNeVcT3Avu%AYEwH+j!@FmnSXou~qt}H7}RE5n5T$1tr-UAG8s#nhj>f3b1 zEn}cgUTC>lkZ}{ex0Ojyn;3ZLmY)`wVyrPw>6wbc&(JMoQd)*S2I&D+lkS}AdmvA z^$8wfZ^0r2QzXSbI>Wg|_kRW$A(x*ssvTeW*F0_*^RX^Wc2s`x8+=aD1x+8@3-iey z#3!!~F8)J#U+%0*`;LD(Xr%K;F(EyAM#XDUI+`FGHthL3nDMmttP-O(RsB4P8BUT9 zs{41IVb6@BkFXn+x1f?;Ho9or4!tyY>zRdyX*5B@6*%5pnb*UWzS#%eh%W1IlDU|$mTR=9>@Zv2iU9}oaW*8bMVY3s`2%5*u2Jr)(v{I@Azda zx|M2gG8t&f7@u`&t9rd}YxcC?h`FfOyUM z$>S33QRNLa>W`<90{6J$o!Jzp^5GM;-^(=#Sa z@9>``@8q(VJ#2lZtTb$$bP$WZr1u*|mdz#2$G)prOV}GWr*)8HGz`;Ef~yd%pVym= zz6%@Um8>K^ADdJu#$VBPK3geUGcQ}=2#5Tsu+_Ogc&267dQiyD)G56#xHd{Df0&B! z*Bbv`A3U?Vs z1psP%N%_(b&0c>-O5oB|3LkH%>wTNQDZ)j=(~2QP{HV+Wa3TubF#@tBRm9K8&Dv&P zedZ^l|CUWg%~4<#HDFh)=uAFkP}2bl3ENz>`WQR(u+JJ6IEc*I>yve(A9Q|GYd zN&^5G0WQOTV>R1 zH-&2VBkSe!uO4QN=`6a#bJx+K4!>-6W43DCr3jqx5B$F1G+r!jfmgAwpV8zOS+*XC zIn~#EDRGud$;|Dt-V&;acrC;w^6pwDp4od6I+WKel~ps`*Ns1fW(_vE*4=2kZ{S6n z^1kbMPtEDv*f%ZAKLFVPbX+u1v{hH2+28^tLM1fFn)QgFGx8+O>mnZLOJ$o#e`|>Os?7qg^U~m8FLfbp6Nh>$%P6 zE#Hr5cUYcI751=XO_8{Ar=CSmaLZAg&a-YFWo!3eBi%YoWpoE=^nIE$swd7Bv1%+TqROz{cDcDNVM>p?#JPK_hc8NOFm7Uk}~ z`f1^!6i4hSp4%llx~itcYrT!l={~P=)w7j)X4y0m5Y^>0P_#CFWrB&T;L}XN~Bs`m=HUjv-SSINur6!-by9~Ej85HkMtZMh+ zWwzgMoMONB?mdt_M+&-se~4(*jej%fY_m{FzUAih)#^0OHuc5*{UX!cdYDOT0Sk8%&aHEb#n<=B&*Ilz ziC3}7=!=y_bg9bghELy%KXVDIYO(ZRoTbK8lwWhL1mWx}d)`TZ<8q(R=xU#|XAyJ9#5X-(;lNZzj)#0-I-^)*3nQ4keQD(FJ20{JTE=6(#*;r`FV2yX#XLiwma*|&K4XXXGMoBF z9pl5YlY>`lXDVgs7WAPxx?@-2u$Me7*4wSXvd2C3pzwllJ`=n0mT(@oG27%O?NR|x zoaz=khY{D5@I?o`| zDduesheO8SL1XggZG~Zqh4>C3#wH@D^a*d2^uF7i>kh`O$3{)h*;?=dbFxW5i)~p> zbhwn~6)8D?<+OG;j*{$-ed(sJl*fjLsK585$6@*A-BE4JN3C?~A&m!;VfGEB&9A|_ z481ZYq{C~?T`Q&`4$Kqnoyj4zm>I;d!E8%zqXywshdM0!8}NM&!u8ed$%Nc?FP*a~ z-KT;2-eF+Fg|D4pg=TR1Y^ngi&PKyBkuZ*Bas>Tva`FZeOC^lQ+RTG!gr;HozS-L- zV>B;KkzoB;A!H!RWRx>`X9`!NNZB_; zDgRG~j#>I~Wd@YF_aExWg|X&Wf;d*aP1R#_?|j!|;qikD=`v=xYcrt^nQ2O}g?94W zpJ)p)VyBCMC7P5f6Skl27A3c(a%QP4j!`nRp^jRl_ZMPi?EvmS2GQ0ctE>h{)_pj) zM|=e+FCM%7)@$l|c=UTMuQqEh0=DrV-qvdmpg`@+#y^{|;_u?Xt;1|CYpYe+cTT69 z5@rd5yl+|Ku@;`iI&V7TZE7SN-#N}rzW^@sHlt5~cir4x@UOH)OPzNz`b=Kd-T^nK zo^<7GzU_GU^X9o3@{y`;YH_O5vG^g7A}5RNatUt!(pYyxWZrqe$GMAC@P;I?*+08H zz(uqwlQAfn+Nq~gnB|;tkN>vL+MYRODYtr2V2(hircd|Mp??H#vR~R+I`6K~mi^sb z{bE`qA~QaC?fpU}$bDy%#w&lWVnk%>S=^Mu<0HTneKYazd$P%>ZFP}$s; zg=r%d*Gqq~%31c}Jn)j$O({QbRP=BD`=UE-)ZjH)dXvrZHkNQ3QJ$!IC<;srZK3 z%~e@Yd~@<`p2CT*U25JvXZgClQr>vZns_Y1zF>1)LFd*}SQ0DiHTvPLh@S<~?4q@2 z&r~0`vaS*1F=(1|pAkcB^3U^Y_Tfav0!zrWqh4qxw^ltF@5Nq8wX`WCSHzL&Rkg_| zVfl-FPD!N)yTu)6e(O*r!Ng4jjdEvv%J^n?nq7LU=scLC!pu$oJQvb=X8{jJMMrPm zp)Mz^<5{w4s0jVW_&p&KWV)7wad|& zwk(5o1#*PKFE54QlX(65q%VsWSMij{=2NTv4uQrI4zXu9VHm1q+V)`>ozi0itU6XG zy!H_+vEOacH$%xasMvO!|#|YcL~_ zvKe!J7=;SCm%`&{yO{_t`B+B|82d89IqJ~2fKHcCDk9$-az%rW$W0CJtPBaq7 zfl?c$CW1%CC9(g*LZ^c+1QxBKKZ9tz3L>``=uc5%QTn=Rx4QA5fdnN- zFFqPkaLMHby=Nbx$;8KuTi!6`^|6x5aOL;u!N^<#QaSJuu!P;0hR*unRr$^_w!Sd_ z8T!rzkWya?v~t51l-e8_Y4$-U_Q5|chYXCg&5g{X`?u!=`wnbFJv>OKthw3{vKNA* zgtp5juA?X*;zFQ`9JM>t;+*X@OUe7=y2=`HDm<`H^FcvTP5@dYne*>|g?jaayRYx< z#!KUP#eR%3co?N?n1*gax0Ej?B&jGKjE~qfcmYZ+IW7h?AGPS^}Q3|`o3*%mP5IDZFQBEdNg%U z$YhtCZBxJl4E2o8bX*QJ$d!O+YNb%*37mYz7u84+BQ+Ifdj*pcPcrnG?3=uXDtlAE z6Gz*;JE04Azms^oUEwzjH-G``a=tLvzS56+L~~i)Q^~A~5o%T62R=&)AlCoudP`=@gvDOHNV)9?ANP`lKy_d7|0-Sts#! zNA{pE?G%Z|()Ox<7a46N#p)_(mnga`)rcuHr)jYE=QjBPcU7FM)yaG2E5X}xvXa06 z69I__-oN>cE8kNUP*~Q3r*}%(`&ee@fe)kW3X)RUwNFK#rR%WV&dc9(hFZq0SuEek zR_$z5;x2DcHEYOW1u>^}X=k##GlC!dg}Y_cK{46{(a|GM_{gj&B3w|l)N8AXENU#9 z7>|q~dG`n9kF80V@I85B*dMo$FZ;$J;*QCWd}o*dLnVI?uG$gtCyXkU0+13v>PFyp zMp+H=>|){ie<$z>jHLR;f-?QBXzBmZ+8w$#?m z9{g*~DoNhxVE>5h;w}mrP~VFk#+a18H??ljEUtBKVk(=u#mJ@3!KfqO+jVXzsfrXv z7_+u2N>*M>Rf^Uig1Z*QWsUr{cO*Q`T{Jj2`Hp!Qi%qV#Dx z3|qBAEokQIcxYYCf1xMnRo%zs)OG9gb8i;4I+L=?(&QtMhcr1`q4{*5s*P8B7AmbR207Pv4dq@7Wths_Q$flk2*vx_pJ0bWLf|~)n6tl5TzVq3 z7Img7i>5MW(f6(MuL=L3w45j7z!&@e8!d|%y`yDRx|mL9(MaiMeeD4i1LdC~!ww14 zz_aJGY6_bC7SN_~a2F}e{LqeqE(u3g zn!%RgI_fEmAv`@K?)S@-Ql`Hq*_Y$07VWEuL265((LLHoB$m!WQ3tg%QBZkIW=V#-DisWNklF^L@vSY^m1+^7KU7usF z?Mmr(sT{IT59^+Ehjc2$%SWFRX?@ljMKSR;+mZZ@{6|Lmx+)q>cCE}0kBSnXjNX}~ zsBUT4&ELb$qL-?6k_%)VbAw)IXuP-YAl&)=G zv-s=QhKnt>?;D6;Hs*XWZS@J^%!#1$m-SC~N;X=MfZ*@a$D8%2kU zt%eSi1XmduyS^(lJ5LC4mePImZwWKN6I4M=0qmxj))^Jm&I8L536fHl11(jB*kRyN zd~DL0;HDZN{X(&h<@IIjKW6Art=Ci!Snsn}$l@coYX`V<>uYP4lV!Htl9oc`Fp6Ny z>#=DLx!Y8*leu8;LTd-iUDJj=YSXaeu#0C|bAB3Y`aX{A8fWLEfTpG3iuc-7ud>Sc0IX zmjlPEj8|m5vPGnsdePUG`q~AD{Yo1OmrWxSzR*782b+ax#}D@NzKMp>7Msh~3E?7i zDks(jl5^{?n@t!Va5EeTs|zt4oS5YnX2KT2l)}$-(|G_f>(>GR>B^%DUZ>fvsWtw> zfZWQX0v^BFZVX$;;(+VQqblB}*=`maqS>yc^{W(~p82knwO2M^vi>TWjc4M2ym^xielSX7|>dAQ^K<~+jX{XD*&j~Uxl;jE(}Pm zER_PV>aPOWG8P9MSKL(bCe3!ut$FeRyen>scyeZjjchLFyY|*R1px8-t57x<3&Rk$ z2FqMQypO2?Hfx?TK>5m24#2~D&b+r2FuvmU0}sh!7sPt467W)IU&(epJpimt2hY`V zSJrwh8=z@zH8pUyqFxACt+Ov>vzZte954?!_>K}M;b&Ps= zSQdsAYzJnB4QwTKj8b@qmWBmv)b)(Qco-{`i(<)alJ$%_cw{S-3j-|H((?oEE0i+> z%`22s!Qyy_W`^Nx*Y%7Vcxd&pg#c}9>8XL`73@kt^$K<_z{=Xlf>apK#yZc^u!$|$ zf>Z`ixQ?XQe7ZGiMMP3Jdv7>!CWN_8)U&tH3adl0-gxF2} z*L=QStj=|H18q$`OrrSd)h2ZAv;3yn{H9S8ZGP}S_lkQw<4xe_Qd=1&+VCd^>YKP& zBzw3gT#GF$#}>3bZ&1`k5JLdhuniwP+FgF~U2$=b>Mq|IMDd75cES=>E* zBtCXt|Loiv7;1hLKV1byW8vPw@>8=Ed^84$Zzd6+7>#Xp*jvLto`_u@)_BVOEq-81Zo1cs7s+`?qOiOQpf*(X{%~tD)v(#5 z%QcYt;dW2M4S1rgS#8pn>g-;I?#O!o6L<>QtLm%S=$3}7ZuC~nOWcEcY-ny^Zp1lq zt@TDF5kpGbS@YU+%{=-uAyeI_%r#HIN1wCrns?NdcC*H-*CA&A%jhEQhrv_$zElFe zI-luN#)(r6s5T-B+`a{o2Qs3C214Np%^ zoH5j>OG5B-6Z{ardP6nOSWUb!oD{DVK}4c4^C)>jh&rK>yWhS~!i)O3adU_<>FB1$ zxk+=xzH9=wx`hT?RhemXx-r41HEmq$Y45H6cQS&Ol?V zCe)Z@6g1(97OO_Piz(WeZWKRZnO23iR(--Wd026n=~sS0ep-HH{vTs5DHtinQI>>a z+QVw}>aS_9pPB+240W{3U?8Re(fO}}Mohy4of1J3U!yybaG@QCRJh8O_0 zWqC<|iSdZ)1;PiF=`Yt!v1M3A(5R~G0+L33svc_ z)=jfze2KaQrwdN+ci8Q?rEv-GhRXwS??=&%rq6D-*#@^b*loC_a0$HxT6;R@e9zxgBW}h&Iwfz)4sA7Oov@6LlH655gl}m23P>i>fQ@ zfphV>L0(_}piF3O|6oC1!Qcv8pG_3&T6NUpDMo{M{#|D-nxZZ#IR(HH`dSA65MhFM zkw(oOjbeH}UF`@#dmrN|=+NVDURz1mvrE^if5&C@8qGo(N});AO$OJnl5J@@FKexS zT4Rb;<7$n!5u6CskL#w~gMQ_~Hq;x1NR4%cg%#spwu_ja7ahy;uI!#(ji>Gk*H@J8 z&U?W_SS9nfnt_`ZY2-hRA` zzgNOt`X2r&$gGrh8V0?EjD�B>(Qm0@a;V$p<%7-{qjV_ayOprxZL{wT;vJ1r=I> z+&c?~C1A4)<&x$R;uRk8S7ulA7L*;d7g7h9uEQ5A4^u-411h|@`Y6sVZvj%Ph&hB^8t!YQlgsBZy0jcKCngyv1 zqv{|3WwOTsq6vRXLY)pQ(ZAC#e`^N5g$@MkS45YAU!iRCmoX@;fM$KfB!rSKT06cZ z80v0#d5{r797>2;K^RKN*)C-}LQ2rX0MIRvLq95FB56o8LBb&jw60}+9&ES)KdTrR zCO^EqU)%bWKfr{$fQLrHgM$fBA0a*acMwC|q8NXqCck7pqrU>D!<2uarJjLG78*s_ z7;jB^?!Z(RO*ANp-l7j+kn5%dIjS7_>&YOf2qU%6=Bn*c7IH*sU&Hs^)N(19;FgAY zlU*k9$>-({o@4G-LrFoujGkf3S}*eKO9(_6^0o{{N3TpGYx~KGIp4DK9-2Q!M@Gj5 zd?WM%XOff%%c~De5wtproo!cX5!PFS3V8VkdpIkhHG|BbT!ING6$8S$A88Z}iG9*d zbON5V$Ps=0JE+wnv>1+k21j~*di@UeLFy0>UXON1#s)ZEYMPzXZ>j2}6{u>xhm~fN z@uAI{Puei;E8$Y^Wdd}6at7$f>kYnp?3vg{G^wnSSNOkhJ!;e2_+7}1zn}D-sg5nY zpU7hSwlDd0vgTU0qg(0e=QtJ_^?E$4)HQMuY&#CLt3pXe~& zk@1T49f?OCvJA}0P^^C4y2sV33=H~EwSLPT=h3%{M|CgKs8FPStfLU`qqxbw!yT!s zdX6t2w*9UEh57r$JYxcz!E~O8gPHA`%md_15=&14AO?tcT3DSS~6&OYOwP({3?q zu!;aTCpRyXLL&KZL#kNcxQpli#n?MWXBK>azH!nW+eydn*y>nM>?gLZj*|{Mwv8vY zZQHhO>-Nll{qD@%x%aMlQLAe2+NbK>S#?hB&;CaLY;s0yA4x(z=tqBABsz?AEReK& z&$*${MiMV7BuJond+ci1`uvz9oK<=hd>H)n@g}^Kdkjr4i)pk<)+%jXg|I6XekDrq z=Pf7=(Msb~BS~()FQ{as=t#)AS35v8DeJpH zGwjTyw77ZV$fWIBie+ti%@jzJEkRWj}x9XKZ1ccDAQyg^tLFfV8OB6SifGa|`#VsLT~vkxk!5ho(WMJP$?YL}CB z$UIUdD+HuP0#ejT#;Wi1Hqu60TPyVBr0(POdgl>^oh~-lzl%1Sv8fXsK`W2L)%JqX z`7(~-kA*31ZXP~iTfW-;nmanjBu@I3Mw1zc7a*Vr=CtK#+Nr0Jaz+tQbE+?T)juaD zfl7b2{umR*EFQA0Kf}!U&Y58O=hw~LOrRR#=Ca)po@fad_)eFqX3egGbCh_Yd?Xa z%O|-F)gE*n3ItJNg#KIi*Jc`%zPF^uV_quv?Si_&$0Vspv#yWEi|$M*vbX*qm@VRk z8mF$C06brfy20Od;th2dV3u=Y`^q+rqFfPNDyCY=mEsD??bk54oeZR_*a>tcu`Lj6 z(zECC`TNMq8F2rIp^7lLut!x*)RN4KT}M`B3wc&U-iMlodZ(-2TIhVE5F~uGz41vL zYd3Y-bY~d$$L^btT?k>)Kb&F%qA;%b0wsETCMu&Rog!-KPncFE3CJNq(3IIkl95D2 zd)-|=AM(iOn2CT$>szV&3&pjEe1PXYC&Q_1eL;a4r&vjm2X*cr`_m6z=gbA z!DjTS0DnT5X#_YtuHr7$r=AIZQyE;YmvV=WDTa)M6Iy~4ry1^#PkG)Y;Uwms0SZI=N@UuZ`EPvrgO#P(L)NSpDXkScJK?tm)Gjej~;bY^yGbMEK; z(?jIK11<&MdcPjoPd9&HOd zKIVPf5RMdVDKiXDW2Mf?UHz1G3t6l_n?N7-6o-)TFng>wDsC(DVh5PGi9HuPx$drx z)Ag`-RF>_P^kKn1*5UAVJo83=27U?Cr4IN-&R;BC7Cj~N*))%45i-ZVAMJeCyDCNe z#PZ#KOI-Jo;MU%x9<>tsHW3ymCqlU9;B|93A=dlEzDiN;$^ypoTB{3jQ8bq&=N?o^ z4g=K9JOwgVwsEQHKyDu^XJR%80=#YuJPHm6=Ax@X; ziQQt+_L*;3*S8SoL`~-4p<6nEAv`?M#k1@r_KHshrykXb&Kr*3!E{UOuC?Jk4U1u6 z5tl?w)O*k-ce`JEE{$EEG)dyJad=fvi8rTPomyXMjSZs5hV1@q!LG&za_F6%Hcz*j zMqcq0tc`geSsM32bB-@v^_FV_bu|MuK$YEkWp!K+uB!m6xp5e$iC0s z)QGBO;yt?Bu4w4|=$dYyglZ!qW#kibyxQp#_7zqfjDT7sep$8)h9$^zWye+f7+a3^U^Y&4`xhBMqx-1(BIJW{vk_PY za9!DtnFMgL@K^uKH1%Q`e>P6J8DZLsHyAwPl@yjr_$6YFFb!Ui%T^oXA9g+R1Ai3+ zra4?7feU2&1&tBF8!rhPo1-J5nGoP9=26(I8FWX(MVrGd=7|!{qw#|WkAL*KrbHE* z?asb&ny)@Kb|jOazVcEzNjJH>EX@^f(!Dv!fIQLzMI>Z91DYWdsB2*sg07Qq$2xzP zhh7*c-XjNLEVwL{4_;aay^49d#yX!RMfgFYF#1{!!si0Pu0FF+t zlEaR8fy={T;MB4J^n|%TqCUHG@hq!RDL_a?Nfl=$Szw4cpHPCK81;mnq)1~Qo{PdFAIG75AN zqgikU8(c9mBN^pj6TBpAD{&G^+0#~Ii$P>%=;W~jjm||@81X&V0k+yC>qFX``b_5Ut}}`hTx3(4t9&zBjwKo_ zvO3C`f2sQmH1o+14#HGuOSgIZMbaqNVrG+Q*DO_(>hns*5{v&ydydqzHK)U1Vov_@ zD*?JsH1@1(2MtzQSv35z?upDZ)?kkzArn<+F$M-JNv|pXRe<--FOatZtN4-V1ms!T z)=aI|l{}+>zEybIq&~*9__an_^1^*>B1K5gH>y=yX&yvBQek6Z5qDPzF8S12;V|0m z2a~^SnFTaCCRZs|9@uOmA%!kR_u5GL5JcLitF-&)u z=eJGljt-PO@I_c9tyIGU160IAocK8AYVff51r}TGf%gAJfU*CF*Z2qS{KG>2)BbDp zKkWbb|6BjRdjD7Je~$Zq8~^Xo|K<75s{gV6^Xz~8|F!Rbpq2kI`#<~tyfiV%{7KbjVe)PKN$X7qU;IyR=hvP0)ZN#Y+4IF#Yx|jwRmZHgH&iDi)8ghMe8Iw{XoybQi6k&Wy3NRM%xGiYG6@lJu<0j8SWMd=&9S zhRx^4ZqQxn5u@W~?DDS!MR5%MY~Rl?_p6EPO;MGI+oP>cecOeMy}G?N5p^;qV{kMG zLrCA?PodB!-D>j@im^)p*w%qH(Gkup-<|W)khk*;;d@a8MdQ1?Qkl{=8>BA8mudM) zF{fk|#Ae@V=e%f_{vvIK26Nk&$xYWhP7>ngs@L8Dnf8xki~A!R8a?lu_0ueMS&b(M1Bh2y!h-csH;I`fpSu&<>I#rK{jv}mH^`}3@%+NiN|;$rIWqKIH+0xZ8vGhi8%=Bv2l18!Y((t2JgyeLh!s}H-! zR z14}&)THAmlGGEKZ?{cdQA|r0ls4O^Vm4dVwvOhKi4z2r zLDCL?cEzNJXt$a4k6zom{$N8Sza2N0eWn;y1MND#hF#dCSF~2`N-Xt~{+!D%lpZF{l3YR)LqjA)=#c1{3C3R} z3#69cWyF5^#!fyl%uOAv_qMZnRafERqtEK8m3ESsd`iA$l?~PuN4`t552h!+d)w?! z@NXQoG9R5-ZEH3h`XFSexU+bv(*>?Gdfy4EH0HOk3B3$L93Y!|WuE4rXHT`7^%JYR zR@0dfV1NtjV(y$Ht~%jB)OQu5x7s8;q#Oi)kL(oHWT!+-|EDGoHOa27+rK8AM19lrf5s7{3)7*iv9NX&S=y4M}uJG{SM3K+rQ{BxZvHL5O<0I zc__X_wvwAvaD;~6}^n*Q_Jc^f{-0HZ*}KxxA3vP z7O?IDuIO&85f9ZBKJI!S(^M0)BaJ&R;s|QP3JR{6`50KV3Dg;dOuD%ty&6HN-*dmC zBD|iRpj4G|Xivj%W+E{TF1%&!y0t_lJ^J=qFihRgMp^pcoH#PH;`)7!dd56)bR1X< zYZo?MW;5%xFP*hV$v@7NTzcxBC_Czoc&2IZO*kc;c${_EwD&>W=CEA@P8MMf!jkI;HvOi<~myE`H$xOlYo z*i31HTxYQkS7rbM9c!`7E@K@o*{;?$l~S49RF+j0b*r&A{MfhLZ#N!mu{Kwl8Y`>e zN9c@?6`PTzo2g+Yo%ebzM>^-Ma`SH&BU;-oDL$ z+y&~Pnw89aPpj_MxgZ5?JU90~fI)MrZ-3T?$9M{ek$2zxt@G6@3I{)Sep|JRneo2Z zn0OsTxQN_xu^;lT+F8Lr(!L?s?eVrXmosIH95_jWkYzqxGoRSwmgrh=GM_kz%ek&Z;tqDpC;iSUr1mTG7sGWmty$KHty0X(;Yy`@C$Id9a@3bFbc+w;Q*6A6 zVZymMv3ITPA#S*x#_dFDmm{NRz!Vn^&J%aZp5cOP%VklSEB2dy74oQ8Z`=Lio%lCF z6L=>ACDqpZWQ>I*XA`93ZhOm7P@$82hFz zln>gWttA zE_D@dr5zQA-_IRYt}ijD7?kYEshW~SS1=QqXFJFNMw5mx3uG0G?5Vl213vS!2H1Qa z;~qJFJgIdgMCm0?z}({C8iBnH-{D(G^d&J;ciGcwJ*hw1nc{qsYuc|x+*_&Y6l_AK z9Bm1lqiuryTF9h#~CrRbxI`M z>fl^-621kwc|OG8ZLle!yj^hQ%^(+fU$@tbk@TwS&_}kDF7b)O$(QnOKg~TCn6I4J zNm@AMRYiM{RK^eQ%w`mkZm(wt?A}Rw_QD)`X5;mG&8JJvBuuvG{Q#kQZiw>@aS}oi zL)#NumBQHWJ4R}Oo%5#I6PxoaQ6X(v^y00NXMdTx3S+H5XUukmuPvYrb#|3NHPzP=N}g?R=0i0LvpJ;l9K9JLGh7#EHf7z3JM|FI!{}Ta;Z^Bb3ITtV+Amy`Z%N{o!MbKl6XF zU0Cm0N!XyG0UWuZ4!(sFN>{y9F3wXNc)d?qRqA}G5SX+}JS^5RqMzcfy?OC|@b9BH z(hN69U)I4Czg9Qp&6E<~W?r;NPkO;xWo?u{xeHotHUT!uFdbFi#@*@;J-^=(jMNX& zUWS90+D!E|+=7mM^8O)Y#~=U2dGzgtAZbA^)uojGkhMbLcOmrA;&87K;wQbsZr{CyuvotN$(3vztep7O`-z8q8NvbpLV-gR_WMhGOpx@WuYI`g`nu##DfeZ8QMIEUXf$@G34E3Z@w0$dO+miWP+O z$zOFCf+L(HK0BZ;nIdgl&xDqXaw^tQLX4PWfD=hD8loyO_d5+DHMKtGXVHeU;6<)C zsqCG(XZ`DP`|?c9o)Ogm)=<)bT=)iN8?S5E+-13IrM<1QuDA4^M~9@0 z9O;$=1`flX@jjjJKHMKoThUZr|L*1CW~h1=y-(E_2D&jKR45oM_HK?Kv9o&+T7K1t zsaVDjFxjAa*+njsj}9O(>!g=4Ok-IdSsqy&S%ufb*TXkhG9z|8nR^acI}~yVX%|K= zP|jktrZvv8r?f21x;XROgvf9F;_NMQo1LhaDaRiBS6(lpQL$`>Lo?Vay@6qMVO8w{ z&nDfq!Zp*iqcQWnUtV0s$MS8=p`ZPM*D2EHiwyV%v11`=euCO7o*lDkTNP9J3XRyB zJ46fz_J?{oG{k_i-iI`@Wuc?+RLu&-bYHbT>hq6#9XM+{%-uiMIG+`#?Q0~uFy1kG z_LFt`wf8G~>uYM<&JlYu!(sB?0`tJ6P+2;RN&v+bk;E~t#qt| zIwo4F7aA?U964sz&pih3l%ALn$zXn8(D-g?x#-y&&+o1ClGT~)gBSU8 z>fzq)0Q+~Gjsizxb0Ne|4dt@VWE%0eNH@Z_*ZTz8#NA~5uUKOR*P4NULYIAkjk!U` zdS30;V;~RC3E}=aQ%BsPOtvvaiOczY$7nSgC%o|t+d>wz$kaKd_E}8EkkTBb4+bri z3r*Is)LG(1&QG-K5U--4I1O&AM~D$sCoAoj7waY(6CyVOh)ddqr9Z3wB{5Z{qEuMY ztbm_)1+At0Iyx=%eo#dO0YMZd;;5?QbL>+hiFK+p@|HrK*2#4_AeL-=YL^zuAX|>#Ud=80G+}Bg?lPnVLLpn%ziEX2YNn?tREBxMq z!Y()Bk>WJ>6_NEc^q!a5`c6p}NHQ%14CGw!4h{!Vzk&(-OhgYt52NgpzQLv%7K=#@ zPg{7P)R6CEj?jnGr>XCUGDaUwU9pYit{Tf;FMBz!#7@srRu=VvYU-^O^vrtJy|Q7l zK=Y+||306$UXH^{!C5GXm9-8`US?`pWZ+0pNw}y(=jH^^l+6Vusi$h` zuHn0zyk%%CN2s+c>dx5Z>e4On zW}W4n@GWiwP{VY=-V7@k>?dQ5oHSqlq>Vaq*HNSGg$nMVR>tnnXp>n59b%tmdbh zj-Qy|5C*BJqBbdhi!457))vFwHv7%$jI9mIXA<5bCqVhokbfTi2oKJ=7!bEnOAZwT z5R2k9MG4c-C59c?2<>||qeAmqpu*ac z6{eG{uNfQ3!S+J7h91Y%`khWN8Cf{Ys#tTG5vIAvWy9?AL)T+7-I@G?biI%)Qoc>?J zPW3L+63sl|oC@E{!CfH{yCwW0A?oQ!JTZ;+22{5_pdBmyJ}a6>)yfANlyn=|W3&;a zI4c*xvPSF);Zz{7Y*aTgGG;w4Zd(cLv?o<;DEb*$@f>gie+EpO{7xwzbm6=%MU@$F zq0E_~YxanowzysWZr*|cgxLimnh!2OdckU#lU7#YoJiEmOwY3qx_}%g+!vCB`W*8K z$6`59mSBFd?z-wXHLiE8JJt;UZJUn6bMsF^Z(=eLI(biUFLY8qOqi5QRRt#MYT~J< z0!tUOip> zxf-9O89E<+YPbu3)==XjhrMo!FlP?RYi$;trUnUv{o*%Hv$e>HW3_uYO3)o0$WpObG)BkQ(%NoQS#1g~= z%?iy7%>vD|lJcJVo>C6ri4T%8LIW7bT(hvVvNN-@WE#FEo?wIG0fMX_{r6;Z-#~i+ zK_>61d&0RUz!nps(QDd?AV81Vd*mK^?g;Q@`WkVf49a(j+R2em5iorXJ%O7G0T45F zOx+XBwE?b}IwtP1=F$OQh8_VYE+Ai)j=_7{IXVC%E1{{!-xDzqc1o6+N7RWX$Sehk znb6oH?SuwIm5P^=rSFk(!VH20yrmMEtPZJ6-NViG0g#z5Oji3;hE&G(4LqtVC5Vo} zL2s#-Mz64Q9ROdm){qlhP+Q8SacjVd9iWMIW3WE`L=B{s@;O=`eqsvB1aLBM3_C;4 zA%QLcORPFW&NezvUeg>PtCU6-or(G&3)VSG5FRKJfSQX^0i7c`u zXB_w{y(;5usTN8)>g$vJqM&bpy|Y$wzrh7D@_<2>NCUhHm43Fcl9&=dnrxb?p9E!{ z!zQB!j`*HTu@h`@C-fjxKwoMwvk5bz$*3`2FI$QQiZYBck22oeFOUid96$s3NR3N< zWqvR@?MvuS7*6P(PaheW52wi*8NnEukEcnZ2_EU04=CYPW>dycCRgSJRRVNUNK<-K z&jAL2TmT862OtFScU;qAgh;Uld;)v`YLuBHy#>tQC>=d}&?bJNPs3$E_(1!B%RvZ2 z3W5tlQ2Fh3BX>V_yLC5pb9OUtTWt$=t9K`NV|O2Qcll5G$@}MbQ*{q@OLU`di)=$~ zdvEh^>ueKl+iWvz%Wms+<9DBR3;Hklsf$nzc0jZv5Wwg{=tAj&Wx-}aW{dLnRvox>7AKY~AWlXdrXi*)PiBP+np2Lqz-z>p!3!JZ(W zz`wvQp+Aw5k%MTjp zAFz&WVL$A_IkODm+I?QkZ6S(dmJ!AH>7N^f-Zph! zDH~Q9u}Hm&zJX#>5>XkIbV?jWl%&PQBt4wgDk>CBRZF6j59cZB(w_#$k8S6d5 z`F*uC$*x>~D`2*DEq z+-*6=AjlxsAr~0cE`a(feH^P`<$Bydotm3ek=qn(nKTr5bCn<&YGp2cyX@g>ccT$) zGs_meye&l?tNyhAJ$eg<52y~HE5qD9y#?kT$le5wk&%#f+Kyp`O%R^_WuPgmJKrQs z*AiH)6A#|7B|A?v_#^qUOCe)MFP8-_ouuxL8XliaPG;ax`}(7q+v`3 zQURbY%Sy+rqOQ{xcWGUAaocw++k6oLD2c6vs&jJp^EbSXaPll|zdDI(r~I02yHs21 zr?}hnLVY~PjOjJUY7kwQ-Wu57wqv5@Tiw>^%OcoggHq1TdZL`6W2t2)&RJr=e!*k4 zz{2wm@zH?&T%J`CPsLqSkxz#R3CK}CS=bX4ewnE`_>jT}?kkV;guw%U@8f5O#MK<^JqclM^s!OIm)X7D>z~d% zyJOzhtf;wf`Th9ReG~#5l9f(8l*3!t07U=isWSNDRFrmgPf`&f$BLf(vl6$_@sIU(7zzWO0X)${4W`>1_B z*?k^*WSf>V@`&Nya(ca`rr#{qi1jy+BbBnLX*3trPqjoYp*}Tu$GdMP19b4stC_GM2LP$|zz*Kzoh;V^ng`{5+Q zhsF!#2Q%I}9O4yXR03^#N@9c%~E|2xC9 zA{`w>ZBbg(^~%y~N1In_sly6r;sP@sXnpdVI5D-}o= zCRg&iE?rMF!;I2bhx{|CLEvSwQ1?D_?Kqz|JZi;oW!4pP==LX3+VC&&@h~&@kQj_v zImX{nF+DF_wUEDt2386b)oWt*zjElEB+h`*z!1ITZvVfJ=_10;T{=k_qTb2F`RAbI zy67OYm1r^J!cOb2>lDe4?d6c6bJ?jtvqB=zJ&o00s6@oJIabAow{Ds&vmyVAY5a0( zfiWw1nx|)JSOT-T&WTNnINQL7)v8ScSz=1%V;?>*KvfZB;-~8*CMUYzWWvwM2R{MA zBF^Vn`P$90hldNYG!%S4#!tCiDv5kfwJvJLPu%~W1t#(Sur=R0#-F8RFmEMeVP3S2 zmJPHA{QeDyd$);>+ME);`k;xijpI$5WpQ)Nd1s9O1Wy|Q-kIGBqfh@a>|bKy znpmO)?3BG|2?@@}QD6oLT8mA#>F`*bzu5RN!l{72R_qiXc4x|w;K{Wyf}qQOr4>P& zwU=RM;zjU-9O=Elgq`2Yg^h4Y*yjVmW}W2gtaxS+&4esmF9)u=SNyWNyL%hOYFna_ zj~}ixzOOGnYa%LX^?p$34L{fpx`8eb%NduR0SX}sdCLHI@jcd_Lx3#9nBnGb-H*-Ec(`YZN;*XUp;dLF9z9u<$_%$!;?2;F&}@o;2k89V^{_6EDc&cFw^^(zbxuKqMPSEp5cKj`S#M=l(FileIaA{R^WpQ5U9 z1I%hbfFlHApDB(q8LidvbT017${}C4K#C6&KOY?(=3PkFES|qv`BaJ(mT&l2oN-Na zYrosy(SGPDEAW*ySKZVT;+Hfn(Vvs!+9U+rtBSGr+D=)&&sz@3WEIsU?vj>}{w@q^ zOy2rfSk!|Qv26B;uB4>HazB9dF1c(IT|a+N-YCul#I+-OijalHz4nD77rN989<4sY zW1C79A1fU-9^#AJc--N%)0CY|7>|dTF)8;&eyhnw`wE0{5_pmZ;H{&VT}hTVK)#PZ zaFnirg_o%pFIHH|3(D;Emm&X5-C{E8&S-H`f;Wqd7%A6i_i_+BoR&cgC9 z*UpGhW-gaFtRJAmk{`oJpKLSn@o*k@@B}n%AN%xaLpsgAA>eShyd>PP-%Z^jf3Nc< zSZ5z!WH6BQ+S)1cR+a$h&@Ki4y^f+bf}UhQ0u^Hyt&{L9hJw|4?KU2oA9|Vj`hrey zHMV0L-3NvJ#K0;m7dSYs>iLS~xm5o(FO3grd5iU*wwDHRtUET$W*$9ulKAQQ2OzKS*3 zA7I65ys|QI2%7CAtn^XQr_Vo?tG+SnVW)DClEoCrMXmVmkVe%AxnSSkTZFoE1pqxG!N|pa+G8j3ItNi1tFTH*1xaQXV z817@QW z8&#NZzO5IRh@BB>v6`%KEoGEUya?q&j;ZwxExLEr@Rh6$i_tYByHs9ZzOkP;yYBdK z(Q7X=?C2b{SPzr0GO~~?Dhl9Gq*aM9!$?c33e^=s~D>^^^39> zvad`4#^S|zi^|QP$BJ*#kt!Uo$dShDd;{>M4rcP5LR%C7N-=>$kUDj;!^IOCcAt2{+7@wSyllvUmE? zhZ5DeBGp6Ti)9oHtVFC=9p08YYztZ5F7la2SN|S!yl`3oWcy|4BC3K`lFx16R?;)?W6EUY-T$LpDb(TBrrfY2#()_EEoGf#wqT)xLs8rP4A;!rRCm#;=CRew%WJOS9UUJy#`hoZScZIf1LzF$t?TPZ$G z2EK`b?0roH^b*Nv>ZtMgJ>RZ$>b{EQJa~xxeV26+yGZ?Lbq8|%^eUBgT6z7>S>mL% zJF)JM$oC57HjevqednJy@7sI-PS;Z^W2v&uN?tH^zqYZ4g-Fc7!n@Vq&rL=k?qIeh zV26e4cH!2e|D=gOlQ4WMfSR4@3rYfEsrZo!%bGQ>ntp_63WxlA! zyG!P05#-t`<|sKox!Gd3wfZ)#<=f#da&@XLt5ms0JWWo1w(;3}5dH7XPSGo-c+`4u zPyvlq*EId1_(zLT+Q#NlZj8x($~dTuqwrShbmlJZQ_ z5iQ(-M!8cR`khmq>}BD@1h4I_l(Bcd|9dqBgLSxD`W~GB&yhsYLC(h2TABDXEgOFp zV=K2@i=m`pxWj?>pqM-CVFE%3L!*4{h~<(mlnQYqq484^Y!(dp=7>*X?y!Rk?(*>N zbsZH^$Ih5%Q!>ZzzHjO4a_Uo)C0fcyBt##oL+8homauo{e!PNW#2U>N%sluNjT>-# z6qbUwmf!T9=&|C_pv^JUVrDJS%3q=CxP|O6m(M|gvCrZMVCA`c2??2`0P;foT*QYA zJg=|{-l3;RNo+3x)r80uq)m+`2V^b--5*HM%YJ^(y$M;#DJj{uEQLo@l!t}sDKg{O zfw-*_zTh%6L9C1$)^bx$Ja;VPhY@_@99$mO_9#xJ5;5#F;!Z!HW!&W`lHmrZa|UQ| zNksR97pKW%IKs}v9)u;orwxmDkKmLzGe;G94RatE^hT9K%C98~dhunU56i*gBSIch z`M%*>hxmXXk$M%ye_h~s<6oM+&qSl}$i4ES;8z|^KcELfO>o4uD^bTvnm~+%7NbT? ziz-`JIEY_B3HNK-pSU3P)Dk2U^0AZ9^xX8&p_1dk-NtIq#31aVBE3)=kUG9GU+ zhS*PZxPf&_FQCsnAAr z?1#xO%VM9E-Nl+l zZ@HGp)KvIZZLPh}vNW8nppUQ4Nn=w}g`lh5Mg4`{#>1VS@sy_WYk1MSldaJU^BkF^ zzRB5Nhxkse8pv?8iCP20W6Met1lhvGeohuEhlzDCBE^%&T${r`e_}pg zO8VnY{9T5$-7UlcKbO89dpvT<<^Cn_kT%H~B%|DurRRvVnnTgOETB?x3m4anqTUMp zz}%CLY2H$VR1lT1kPVoj7Gv$B>q5@o!eK|0FOOgHe-`K2o;j4$e3#^wV$F3nU2*ap zl@N$c3AfVfZ8*c@C$Ax$jWJ4F`l~+Kmql7d)4V_OGs1^)_k#-6L-6;XrA|>+E@5(T z3Q3Z3Z^Jq#FtqybhI5qJBTkR=CwWMwc;Y5%@=8A=B$qiRe4fp-no z=%gj6DE+YjolwvosgdZE^JLZ7FZ~j{hXiCsB2v%)O}eW z6+@OdiF)g2z7 zpwLTzYm9a@J~yz#{rRo;liA%JEk91?pKvO8D0W_rZug4G1Ar|~TPJpt_Cr6O@3so9C=+4`#5k%BI_}5w%r(|^h{QX;dl5ShBtyf;i z#5XiMW<6HAoKqEt84GGrb6@Y~=wgWTFL`Jv`0LN_zY~YdKm|%9DzyG6{GyhT8W>bm zs5epZ2oV=&WOW50Lq5m}MqNXwI5eok2f~p+`qH8wD7A19d@#i6g$S@dY$q#kRauFI zgeR!6ax8u!!~C&SDOX9nnKCKi$e-@5JPmi(-?JO9(KT92L)_6LUpeJBVFSkhZpa(R z!QVV#z?Dufn{3P~ZRX2Z+WX!5ug##X@&rB1Y&GFo2c* z$Suf${9Zun4w zFU~esvS3(i@p;EdO|a4bjsW^RfiE`53NG8RJ`{uK9s>}U8x<(RjCF^SDwbzsRfi-C zWvt{aan~D~vkht?LKZ zW)eIC<@yCJtt|QP=<%R);Qro;H1iW-P81+MZ#fDoJoIW?&pK8DsbZR1QK@(xJnr#= z6zAWdf);@Vo1B%m>yragY=+;C4<+a^(Iw}Yg+e3-1tda3^)H2_@~272m)WBIkmYOp zLghnZKaigxO66-E2G6MdzS?0oP2mjDDnsNx{x8DLDMl2aY13odwr!hdY}>YN+qP}n zwr$%yGkg9`zTNC*ankBFR#9Zd@SZ`)U3{wx?2?0)HraNGp>$#DWz`B@`z$P~h0r@nVco1q@uDF@(kz z$(D;vld5IWyRwd;z-lM5u5xT2mqkx=qU88EZ)StqxTvWm|BX4i7x64IQl;C)dxk40 zK5?~Or*>ndC9%jowF={*VmO(fR9$68qnnsjnng+!Nyd%kk-`|M;~jk@uJXkOPBo+* zI|3scA<@tt>|q{X?j3XG@aMb8fY~t!g{sC`{=q)y31L2B)6!Ufx*e9OIV2L<_c|Tz zfvPlztfNGgucghNg~!JfBPSYAn6gIJh85P})5g0WIFK-91F~|eE_E}I$cvoGa%6!I zwZUWxCHM;Cv0&I>3o+tTfv7CAN|a*E=j+3oqR2^8^Dmc-&cS`K1_yX?@{%8N>Len_ z0<`eys9H@|LvQXGOQAi*_ktu(NuC=nN^1ZG`zU0Iju&O34eSwqLgY9o8FW!ZlL@== z81CZ7j_OOuIO<7?xhnod<55;WAXg%4FaMvfF)qJ?`V))K($PT!2b6LrTP z5Z>GV>KwoA8HJ2~z8voxPk&BWFz-&(z0$${Jer)^$?NuE=SI* zYjbqut7RUfV`iENDSC@)!WU|;y#KMcy;=&X3hM~gcZLv`Oq#FqFyv(wAnJQ@J;S*% ziA4rX;pT@GG~r`D5&=oG)z?FO-!w1CAH*8+&`Dk^%!~w-u;%SN^iVYfl>g<5CyD@6F*0Um zmW*WEsG^VyqQe^RG(0Zw&YUHCHC)0^+Gh2@!aF*q#YAa)vX4+w5(PWj04(43u*f|{ zDaIA|{0Puan!x0ofvbtaUdv)KxnrI_`X)%w-cTT#wFBquktD{;8K6Vq%C0O|3X>!)mV;Iu(5Y_Fbp)6FI15{SMfR> zB@y8D)q&I=KuUl4l>-_sNe7t^D<>iYWPsj~#LYxyPJhW%redo)Y&HzOW);QpR6kU} z&~O*{iXDh!*!5Ofh$s*8^u=gf+5iwb4`2e-v&;vR>>J+fgnGZLvD5h39{ zXKAdD0n$1BE*?KT#gvf8q{ZWa!ADx@Zdg*eBI9*55y~e@nZU;(Ve5V{U*ajiIM@!O zo2`Ga^{rEktcg&dW6vnT$a}<%d4QN6Gm(&YmS4&*PivC3W}-nsF^$7^pmRN8yNxC> zqNk>hn6naA7n_AZgwm35$PGxIe34`n8$9(vfT<7)d~qjhTva#JWhmOaYPsB2*H-P2 zPrrzY8Z&GJ;zmjurCm%Yt`OuZ6pOr($*RX32}dbpk9R3Y48EcQj(Or~m|zWUNB5DA zZ0M*Jc=Bpaqgx||IJwxw?Q>ShzBgp*(R|aogfNUJIGsV%H^@CW@k(3+L|R`6Z&k<= zz#0c#8o&+vBj=bQAkPHwD`Ad$E=UBN#x)QppXMwieT6y* zaDga4vFW&&3G83I-`GM4%~=DSn1PZDWg7-%$DWUmv*X~Kx`N7TDKs|#ZZhO=12>ztA|LS;}6MZoNB2J{8yehhF3h6F= z+FZ}uMoYu+=@`{O?o8A%SurC3`q+OBY+hXe_ZiHsj>`EP}0H{vzp=t zn&C(LXYS2Jd?_nlb4d3|TH3d8%rG#rT(;hVGB(ShE6{$?)S3`eQLjtOX{EM?Pyi*3 zzV3BBO6xpwWCRjDvaP$N=b!08gx6-^{dZQ>YHvGF6YEajIDYLMoAouJZ=!N1!L=Qg zeW^0-tk-W0I&@JNTJOKwSodWHZN)}Ilh}24k?CaTB~Rj^MAT@uG#g^=dWO;z*AB8q zv}@7d)v*E|pbZ;40&L9hFcZN)Wv=fQ4U$y0>QbpMVH3KUsIP#%2*vx;scp5fbGQAM ztl$Bqn5>xVhOTZXH_zf_we^h7`!!Zlpnt|_4?zzaBGsbZlajgDP5ol=AgjijIRV1T z3vdjdQ0~Am?U1xM=P?6<6GV+zJG983$kHE4qNG4A@(lUVXR|^8!cJ3V-V|E3q(u36-I^2CzJ*sk zB~uZzc`GP|vO-0WJN1gChYPMiq(k_!rg2)8E%;ed~}HY>|lfHA4Rm+lkqxj{>CK7{D4 zg5fRKdFcHMH-V*zllD-cj!a1tgilhpy%D{~fvW@T74?Fjh1l zY0L4Lnz!yC&<44Y$n|up6)m&b;#TvEr+nF8{w4Ru!Jz9i)n8R_}5WQ04L`arJtdC;{r%tXGz6-0sj@x%&<69l~W*ISf|)>qmdckquw*{%uC zbO`TsNfi0nohRt+$9LTQ#$SDF7{XuJ4kZl2V0-3skxpkcpWHd$T_CQ7@o#B(4e1v^ z#QK+n_RvQb+JbUKg(m%A^;!D11G&)kOKmpgKwqJO>_FX8ab+G3MUL=ULb0>?hcka5 zLNB;PoPtzj;{1k^ie#F zyF;UwAuKV}MvG1hNUDNCf^(WX{<+cJzqf%VBO@KmjnBQYp8N zEgB=za}RsWI#NGrj8$Z3E44jRf#NZoGb44&&T5{+kKlNwlBQ<_)B*Rog6?}~51uh> z5dss3GOZoMzwv)yAnqK4(~@GjLfy_W!q~BanT8n}ZT-h;PjvwMwDaHm`n;ByGG}uR zgx%k2(?y%yfg$YBN2Qc}#^!kTRz?m@eimLpfUF_d(1itO8Yhh%wt?Y)D~;jAqFIdK z6W@KRhus=89l?Bj^Zt6Xa|3fjvVUM|{#wer{~-u=kfdBp)mzYnM42!#6ske0lHl|N zfYiJvuTK)rrOTS)Z45esRw$67er(?%B*r8V7K=pkh$n5-?ZC{;sI%)evZwshhe(2ze9*MHLL^J!zTs_SW74l`?Mf*MgX>_r!8~Xl_v@HM@S}s z**H@1%u^4grFuS|OGKc<@HZx79?bXm9WrQ7T8G{21C)T+Mrk#$mE^we0J!p&V*P7;b=KLyiNzWwl8 zJ-n=Otp7~_^E&S;=gKi)IbU|Z^gYgF=~BU!!$+!;mzj}KNsf#v4gn?eC!PQ<6PdWe zWp_v(lxfUY20OX4Qu-ePByOOdL{J*C#UdB!FU%4^Gd_b>4Zt3t^}MX`zI=4hxHseM z{bAe1(1`pCp_crVsJ^5b1!kMG$i~c@zdN^TNw+(?AI38E#!li2x0huMo4N%EN&!2SU9Os82(Z}`}Arq2<-CIx;lKXF06S`rrbsbOYo=yL1Rn&dwrkpIb z^|cQEP4xQYEh+iJa+16iG1Pup)@K_c5=NJ9hvzn9zU`iuxs%k9%B!RV z&WeZP=zxe=AI4||yM;?AEiW8gV|b>8-SN=V0vTrqLz3Ex5{#bOv9XqFVav3VjThl! zzfOE{DqaTjavuuJdoZ85`-1(s8a%|?!*Q6!GbwVtG6#rTKj73XV%eK-?WU7&8{3d| zMS50BG=}^z=da5Qx^@_t4{jz$fgV5V%IQUV2>5QMDW$I^HGaIzsYrqVuwcj-(AN_H zhTHJP`>v0Seb`Svro4cBP4@`=~`sQ#Q&@x>Hdyh_@{})E1SU1K{$)vn-G&$2;J&@J2;4g??%7FB&B`QEF7!k#G$v?97dN@iK+1M zlhd1T{erH7N-Ba!`|AR^-{&+20i7s)i!;vV3P_&r=X51cfV@zSYPlCzS6BF2uVJrT zW*hk2fT8ix^mRK>!#|~<(kB4UEqax*AxmQ)j(xb3y!Kt`3fWt|AS%9GMV}Jwt4{kQ zy#e$Cq}hf73o^BkWLJUiQIPP0Q-Xh-r^t5>2GMSKq@mcdcFV^~i5a~HR2SYae;3|H z-**`_lc>mS?8VMkDk!E>KHo9LEc|6n!l}Kg&N!Ig?vJt@qH}-piQV7F+K_-ZX473& z8zyccDLmaw4eZ7#7O;<*u2WB(kC^p;a)+rGUTm#o`$qQ|e}O+h-3!-OWuQ?SRpXhW zA~v!F8%9UYIem{O1n$V6*r!@TI6ZOQYWrWWERQ+YIUwVy?g9|f;97<$eY)-d?E(OJ z#U-Zs%w6;U&IvVJmBN)ntzXS z>d(uRL&hu%IfgC_FzNKLNEPEhBY+$fZ}Zq^lv+Oaw(L9#$oG`k=(n`=_MvLtPbO!5 zsEHyxV230|GFE3rX>%e#W0K2w^t)Zq$FK-wKtU%RTUDn5ys!E_^9i6e8hstIQdy^G&!u$9^e|A5j)HV~_-cBLn@BQ2IRj9~0WUPvnIjjLp-Yb*K$?D_o1~rL zpdJ~kIV_%otFzY%oB8_d%(kD+w4^UjuZQSyx_Xc~cz8tXly-GWX}D^*zM1jo>Ad!v zdENaypEZ>gs0n9+%~DZMSbCYKY*rY)q8}Hvsl|XdO=Zw@O|KfbW&kxFbns~b_8%`k zWIP8tOG{Ff)GVebF4Db7uuDl)EMN29T|(vL>1RW$?O|cB|6-hFb6sA^chFB;ggb>g zgfSduTbz=YLF|W+#{JttvfkP0JzjX7o)+yI<0R*QEuP3$qLN|EQ*-T@9s5JxwQCpb zvJ0?Wq2xr%l)3^L9B4DHCGJT&F69^VL3aFp%8ui9ZQNl*&lEJWc91#tt&uc-#JAq> zy$$fz6E#l!aM;_%kC)>Erei{V8f*UTDnH&hDolpzxsC)D9tj-oL`s15o*D8_puQU* zp&0ET>8>=#Y4xv(N)%>Ozt3jH%PN+@%!Tm-nf$;&dW^ zm+Ew4-;)K7Tu{_BLPB#h-Gdv_h==8cyUp0(3bHbNs$qZr&f2ywcS^oYVKa@e*I}P- zCXAgO*K2h^O{D~ayu58U3&?mftT~MuWuBi>E+z6pnOonCOGD{FWQ9_)Qn*`(S7Tz2 z*W*lmIkPjqmy@2)ZToR^I-2Nnz4tfpyzkiebVPRNfBlYA!_|olDv09WBp^i5+iQ&t zK^*?6Y#A#bmeIUnW%jEeNl#iDr$kGZ#bLmA@=btZP7A+Qo_$QV3S=aT9Z{FAyTSIA zkt$-Wp$UgliKvfHkv|->LBl54y4Tvhe!>-1r>%NE-g9zzokt}rh=_>md_6{UiTiGl z2N?tQa+==loOA5N=nwg=WuE7l@V^H?C3n58ZmfTGUFQCtV0l?y)Wp%gvZ9ibYT0_Y z>oz8t2^v)#zx{a?iWiZBBs3I2kqdx>-~!AwiTyEl`VfmG=h9pH&DDv#lK1XaI&gi?Ch{P`nOp%$1{KI`=c# zDy{m^f+9uh?;G#OMm#}C`gG7^nk|X-qc3TC48}1NoWNqJIs}Qzj-2L%CL>q3`>43p zc;*GAGDN%%ufg(mw=3Bi$CRrQS~Zrj3BLZsgmhTW*{sKgZZ zPSb0>TUiO(#T{56x@8&Jd8W2Svt&w3S<%h@<@O!VytSHnHc!2g`+P00%a(Sg44*`@r%$`H3BwiCZ)d#o+nMC+dM?TL1; zP?vZ4sQd)5_k@GX6u$+zg=f4)fVd%9d#d&NH0}fFWpkkQJK3%y+LP8m3lFQ$6TKs& zXLabPXSG1L{RFZ1g!7Q-gRDf1*6ZEsL!Y}tH~fKT6yFK2C3rn`%mF&H3($Dr6?p}D zqo}BwoIkQjJS^F!R#^G!_&UKW5+7aW;pq6MyB6GRZF?mlZS6(iDf)M7H%*{T<)83c zp~-kAkyTftl~kjZM5+~Kt}%yW)B)#u8X?g=9{g}U7F_yIrT`dTGKWllIw48$0dui{ zo;5imq%bC*JYK%B$O}z|{Ulx!2n+6{z_@Y(62rWAz?epE}~tDEfWcdOk~9w$QBgtIK{>2rcIr%>T-S738jE6V zJ$~OJ4mKov$M^QPLkUpg8=I4dt)(TsRQcy^1R;o(nAK0AXOQF{U&UO?)^I18!h}wA zv|$!s@Dg#Vx!g^IT@z9s9T@@*C+}0I`012Q8F0;0v*Rhu&!ry`*m7H2e4=g*^qK_| z%#y?V-~8Gd0ioh_-o~-ylG0Un|Eh&WO9*x48m%nLJIlf4<#J*7WaQ;{X~U|%kB()` z4fYpBXi45mm9%Y9Zd3W)lJOaaqoZ_Y87da$O*A_yt)A7gHGt1QGh)ANs-Y$q6UBl= zD$?r21mt5H53&jq^$rnM8L9!fqT;NI7<8GHgSgY#zFN?JK-w!-52=uj%wnVGhX(j} z#>Qxv*oUUN-c9@Wdxu9OJ^|17ILPj^S=l&1Rrm(;P17cU`3If?Vw~e3i5^i4Ulf@| zvzjc_vgAxk*QLP=Opf>5`YY1A#T3}=KM-VQ$hp~Lm0-7Bv&z~?%pYHqY_%)sy2yEj z+uzaXyrkJ}?NzZw8jwyxwQ!*~cs;@1Uas0&-nf}reqfDz@XZ?$0Ppmt-5QT0m>Q~R zj;p3gVqr9EX0H6U%l=WRtTh|gSxDf;`EO?~?-}jSmZ9gFwC!#*tOqyRheh6s-6yHL zdW{BSi(|dm=Y9vd2VT@#LS&-~bGTi1V3-4Ox^d3`XvcqPeI{=Onpa7rH;F zKhTZQ{IakKVQ05xn!2KIw7#H!_h5?{<0uHR!cYGxT|WQ{$R8)b6QRO3kLH2L1q{m~ z6bnOna;6hklEoh}+3cqmb>HkWu?NxWq*T$?JzTY!x`_Hv8bIV~R+<5KIwq>I=ewlp z`brKG$UEdC6!ebW-c_UZwm3Ginm>uq#Jl@npAy$&*z^(v4dzy>9ElNImQPxh(=-CJ z11Pn0PS36At7%^?m2V3x1IHsB;bc+*GXMChs@gWEZkszOXS7=vY1=lQJ-Tr|Ho7oH z?A|tcJsEq3Xu{+i6WnSj2W^rQ7M{8|7DN z9Y2Q5OrcBDp_`-o7M(kYbxgaa^pS;O5jLUS+g4vvIGfS(IfEtj4O^oSy>hQ@6ykn9 zji|#f(IeCwdefIL!|B4ImlJBkKC(>fNEcwJFp4xJ+xt{hQS2OkL4^to7K=e*X}xh)!@6-MVYSZowu|lFMViX)nm&!j<@Nkr z+xd8N_FBDV|JAvF?dG9$zVEhozI*?;oFGc>-O~kM=l&`;mB9B4y)O~&o*e4I6Xb8U z59L109Wc0=6laYI$YTOA_X08J$Ng^-JOt>yjK>yvYn_#JKd%iJ~0 zfI(IZWDf}cgOu+O(Hq+PxJ}+F_!X)bl<$#D+X?5UO{f=?3rO{r*%JnzlScQ6xx57* zze9`Xk#zZTMEq95D9p;$NPACqEzs)qi1^&6^ezbI4Owr2zX*GYDOnzb_(UU;9BmT8 z2Auf7kH6=jYU~1b`KGRSa_{-FD!gsqXeo~QYJLl3AmoeQFN9tCeF6z&${8CT zq2Jh5x{K3Zp4SzI2fqi%$}diL2_l=MdieWOaNst3iDHK7gpWkdh~W&|5d4i>adI+A zNe+mtKKLjLRxBhpV4?XU`|zS%mX}K*<%$PCKdDO^>12CNkR6(svvw{2mkgU(%9L5c zO?OHiDLjo0#P# zxarOnU>TtOWJIA73MelScPkfU(iez?Bt$>eesl=4hnf7TfPnSn@!(hX@?&3LUUrJ| z-y?!w^2>1u%Qg5LRpBnlLKF$77Z%DisoBJ;f~FsXh<;)MD9rukM?$~5HQ#1uLI^?c+;mcI(L znYRcp@(7;*z<~DqxaY>iAG<^FPoVHpmf+wYBf`S_#!1HD`Vc^1srGUKUCdIooHC2Z z_JRF1Vg-~mkRtS&kwn~h1jNyh!Jh*ZyBUx#cEO*Xg|rzViE|hI8H!ZR$+BhQb>IXL z_SsrVSfK8ulCYB)Wy(mAAvB2Wyo46w4@iG7&S0WO07!3=BzySslEF?I)sZ6713p7C zXKXI?1{o`PJIbiGwP!MT^tbkn-@x&m_lEF`W2d**UC`3Obtv?Y5zMlQC&ND}^Wp^P zB^WRw51?_dI<^%q7u41D4hp>E#A`W8jLa&w9&XV{a?5LB_|N0iBOsAcAL_>6QYkEP ztE(_5T2fNdHXbE;HEzutYsoH8Q>`u7#EArh2r;0t9Es>+s8g%!B0AEcA6AjFG@Im7 zOX4eGTx(LAFl%N9EA#sbmS^gjOv6M7jZikPX_A%<8#|-snG1`tIr>oIPQEGsky;%k zKqdVo7?5eCOq>HzRs@wfl9z5)!S{;4k?!d$0&TVgLh}TrisloeA##M|eiap;7r`+n z&l0&nU(*aHqWeJvOHPhZ@f}KfU0%`2LxDqt;|Nyv-)?)NaD*u{ zE3rs2GBMKeyA8-_GcfQZGJKvKk{xdrgRPJl_pn?t;p}K?)KTYpk8-taDR)~*9%DK$ zcTWS6Z(vt+?V48vd$R63w@ajaZnm1Qa9h8eU9}{vvh`$4QPI~OwQ(*J5>y$dsG5ANZT^WR5Z*gaD%h4@+MsMQ#w(rP)ceKl7qNTf*ig(JwYv^FDe> zl=}Bb?TubbXPK?B+bCraZU2WP2`dt7LHn6?eshLR_&ZCShHt`OYlGQFu_Atvw8ge9 zf-^ZS*$V6Fv6$0sE1s{s_^q-9GnU9IhE3*R0TL@TvEwA`;NFsU_FuvayIyd8H$5E@#5aS8_W&_s!^gCWdmXNMSLzWR2pOf?{b0>$>9Y44ldtxTQ^MIy3))Gg;8Ws$*=)*qQF3 zG*yz0)zZA8FxXM2Q_y;O_r#bnWQ2VDzWc7Syjl#pFGjxrPxPCD5dC=_ zPThfDkm`ME|0jof6^5bEhZ|T$xBEAi=CC!+E@sDsV+eWUJ zI6N|&nve*~m=@aZN=OR7E$c}713U8H3W1C0;|Di+Bdef@L?AuGBhy~2m>T|}^-U+suq7$*{SCw%VLXGi2)ACD9Z->(_3{-xnff2#@l%<=*`hIIXr4}T)U39 z12g!~BRrWSB)`ET0~VRBH8DJXPgrlh*Yj#>)x0hPU~fY$i60O#cW8rP3?|N!RPK0I zMFPv&bZMwA7sVPM!1gkbTgvh+Q=Ho^R@P{TmDv=)$dBmT3hHMNtuf7hd$M=V45Yf}znPvvy5tE6 zriHFt)X>#1je?3z8iYv_EsGK%ngxmhON~yW*6AeYFfoNmvY|zPPU^ehf+ORa3dV+z z$HQ1KACj!D2CblNd%il(WaCwT*4Vl0VifolD8WJ{b^?xe*WkPxR3aS1;x`C7PON$F zVY(%W(@`h&<$EB!l>$R=M7lE<%w!a4GSMfdyVZq5mvl?mc`*ZTF+*SopC7~4zfl_& zTU0p7S3fV?k(awxJ~(F|T~LpEzWKMc5s3R>dEOD8e*yGU5fYS;QUmv)dK@B4C7xfY z0~51_?ZSuaT&n|ep(TruzX9&QVHp>{4xq>IIEUtVr&gaN9=)903VzQ0n+WS4WeWjL zyf68PCx{xKldwn|&t0L zUb2Jj5W}@fx;eL}3l$Qa9QN9ly}IbEqJ=Xgf)gCFz$nb5|&vR4iMf3d(6p zH!aIBJFXOyK=;6i)mjF7c%$26Gjg>1bI{z?6PSspay=Q6q5Fs6$yC?xh%A0@Gt>9s zsPF2jHAVQymvpy$1MErQ@zyt+IwdX<&)N$p0pDXECKhL=N}e!5)vz;}>)&RV8Yk@s z+@awyqbd-u;oW;KVq%|%cEC?a{@_Xu^vMM3U=WMLU+>=q{5;OP#(^k@hUY0y%q8p` z))PyFMCUA<>lO9_=5%3(?sUIsGi=B2WV0)dO9`q*+ZVl_q1d^Q2Cio#G$tL@t4Z!(rc2()8-xP{et($~Cyby%p-3RM~QhX&WyccLc@# zMoE0x5g3AZ*nN#5w*ZAoKSoHeNH(`Yn107hkt8xwSB(VHh2#+KG9{QcA%xgwtx2BH z*S{g7FwhUU5qp5d)8@j-AjpcntqkE86{No@u{Z`Kekt>Fzv*(j_Gx;Vn70Xo2h;f4 zX6Xp`i_)B}1A+938}?GrBjhi}2bhc=PE^ud)SpK*55|mP*m#~kM}f*gL^PPUbh+m( zl9!t#UB?#q1P87mLyp}y02eA8`6gZmAp0XS3fwnq(`#UyQM+yhn`Oyn|J^r}4X8Ww zx4@-<XVQv$Q%g5=do^2wDdUF;Og0m;(Y|Azrz2*)oUTEzlSPoa38WY$q6UxRu@J z+Fl)nFVD68X^AuT37s$bMr+?5zqGyek@bqlrD9~Iz};MlWFC8u@xCTBGSPb$Y0f~} z!EDz>>Zvlydc#97%7S33-Ie!ueM~TH=;Vdqvl0VUqzwOHhl`V$Z7NvI4eeY(!OQfz z7E+YVcxW>#msn`oB)0|9Asp^A%eMMyIBIAEoRm`v8Qbe_3jUNVn=9=`b+xJ4t|2U; zqIh|^2z#5e?M`!LANo5vr7)orseE2)J2=LE*4!D{G9B`j$guxT>Ii*a&$MMRJO70i z`*j;x-POp(B8j?hL}}04#zwX(4YKoXPjjV(MUQf?ioI5Id);t(Uw7)l|Cro;cbea( zjVJ(PP6(tl%$Iu`nM+2DB2XDEQ~CI%)L$)<$J=ZT4BX-KwjN>B^SII_n#5=tA5P4M3`TQ4b>b!AB%(3^V=@RD>jwX znz}={bKH;6Kq^YDlGhBYM64M!m(|`K)}6I?Cn`108{aNZ1(;$*aD^+)i9Q6^d6>Z5 z`k>O=e=K9%_^Ua^F9=pBb@XtuoQI>vW!h&=EgJf+c;@eO@i2UE1jYe$7l~F0RI04G z1G}@gB<{^9BAtlUmXSCZBvqrU@fG&K)t9xW!IT|K4OBGadB708TT{8}@)sgx&dGWE6N zg~^jLmN_BD<)pB*H>2BzYPQpr5vE>J)}QVC?HtA?8+5xiN6KJTo4GQIV&EQSZSqDg zrYE6|jXDG=Cj_X}=e2;Nk-@q0COr_*^73BuS8DfILZ0QbW_c)Kl@j7UO*kx%T6>B1 zQBtaPrqaezQsw^tdWI)s9$DgEULq&z}$tsq<2Psv!>#DK5&`=mrlJ7pV(D;K&bvPr-7ski4> zq<@W7{|^Pk8f65|pw&f`fAGjo!SL-ltvg-6ffx}PtGt}|?|cEenePv%Ge9@MC2jA~ zSl@|X$W1up=z=)Ikya_@2Oy<9pC}FhaijS7OHg+kOU`lMj_|46V*)uT`^z*gdU=E3 zUl}C$Xdykt33D>*>?K3yEJBW5Zj*7EN`pEUw53Sx(O3F{CprpR7&iYaepV9;Yca<( zjjHE?H2{;&P->We9)%rR`Oxggr|(gezGO=I<8*=r0wv(x-? z3NvD}g@cj+n(}aH7g+rfhuBNWzCtnT`dJ94!1^X++UxGa-bTM| zVLKtRE;2aZ;H*|v(yX6vcXg7i@2`750GmQX%gU78jffAH;@^|&zg6K9< zRKXq*CcCA+^EeRs8WN`VSq8eA&@3tdGJx~o$x|$M%Cft`6Xn9s?l{N@UGj@j=ZIoD z7Keu{_ZQAN7a_QiF9=1hCup373@Yx}w${!nnp+=bDyD6WQ>Jd1&$-v`#68!;mLVhg zU`84mIhN0q#-SZRk2U%pwyBHPjKAq7s)4#RLsn~?<|Q;0^8vK;6s(RNU^9D)102L? zGopKdjfa?eO2&vT{*K%>kaxUZm|9ZaFP@=2rPUk=Cwj%@qPN@smb84K&jnC6;gJiB zRVC~l;-M;W#8RJR+i}e`6RXHh*PN{ur8>9JXpU#e+ot=hH_;Nc#G+DdsA}(`NWh|| zkR;_qjd3}caa&BRoFrG#QB$aPqJ&6c1%aQe4PxZ#l(!9+x1yteg8o{j^J(!n{&V8jb^AKLh>vj!N*6Oyjxan zm0epBuH^k#-0@od>1{YB7SHlEzzm@bwxgtmLor&;*1g2xF!>RS>~j__DwI{ci!)?fFFa9*PI#eY{_C1VdUJ2`=PM zZw;j5(o}e$AozM)AI^nkY)Hb+mw@pI@J?Vjh}9SwIw%=Zqe0^!(YDE7upXa$7^Ch< zs9yHO#$>y3FJbtZn{N`D5^5ogJf9U3Yd4g9t}*hq^?+%o2GG^RJg>Fa!e4>Zywb zi{!QG#2tfMjnKU==MgVQh|h2)thg&#b%j;s(W{t#>yGw7(8kh=FL_y5B^etaoG<58z;9;TOhb%N~fg{_y^SexUt|Get4BB*;{-{e0*i zoS1ZpzOw=`K{3GGbB!2MfYCDoJ163SrvH2am*#o|w$y)Z@Jn!&U>DC#Xai`OkRN+I zI()#RzC>b{2!0ets?;d)mdvX0mpqt%{zBi2rGC&0?d+l8EhlciPVEckd@`w14-SCd z7kSL1N7l;(!-Fzx+ZO*oBY>|PdBg3q9&wXFhSvC@UU$Ki{__mAoQwRTYDy(?f26k-+h z4*sVKAVdz}ySW^2Ulx%rxVvBm! z=Uj@7rVy~p95CS=NLVq{kf?f~+ylQddP1oUk#} z5KSkU`c`Ws~u=>LD6D}#&;2Lh#F#ucX_ z{|uKkOvfM*+waT52r+t*$07^8H{-^+2wDU{3}kK*;RgU8`wGV^M4ye>*Q0!w9e9(u z-vCuxJ;Ev<@ihn3SqkIdI`Bhj+XPj!4Y@Nggr58WPhZH;lt^6gH(Zj3xaNzAha5M*thqU&;!_#F7OVHYXS#A66iSPWnMH z+6m!JA4MPHJ``}2G1z}r>H;d_Ijo=?_o);7$pk`k9ZsD4z@5Dm=;8fPb!n{Y;1iA} ziZ>-zh4%$7M>Am)55MMR(QMz2ty@yF|jG0(Jfc6T!d*`r% zBj=q$j<5FP(955`d5Q~!566;KP^_P(?i{-w=(gn~%$Xu}$DE7>zY@IS20dA|5g=Rv zUc^tP$cK^}C`-L_D|8&pF<$)P)C2kp-7itiltD*K6#fuh$P-2$-BEiP5gX0S3)lIS zzw4RbUtCn`-GB;APc>!YfAYpniuuEL;ifGFTYu}W`@3Nwj(qbO`N5KlGH=|McfMKB zV^HW1Q{PlQD-N<0bqYQQ0uly( zUJ+~Ug00TP!nV2;wYI|A?1Of;&Fw4&rFe&ejD*ka&-PafUITiU|4=wJW>2vwv9Is7 zkg@UrL3D^vAW7s=2$5%was9m^2CStXp~c0+!MVZ3#ifD|8yg!F)>^U&$^UzQ{#r1Q zd+=JYdvh?ma61sNfcT)Od~17$ak~{*n^k6OY;9iNN53*s04=orIlyRZ_2p`7O#-^( zUt2J=eNM2(nQe(LgX*Z`LoEFFW(&IGHGt06yd8elb&!k5bV7$L2|masBOY@WS8=Izzxl`h{J2U0)3 z$*&q(B(cEPKHrb~><-$#q6SxB>icAVP_Gb8Z+C@#K4_SSYkMtXm_H6cWv*EzfOG#2 zn5L+r=8szF7l75;{=2}hzSr8wwiUX13YORo;6uUS`uY4i#UB(GWSym~OZ?9CqI`GY zt&yn&fqcPzIXgXZ{CS*DD`x;Jfp+SZ9llk5EK7XMPR^UM!M!!ULzQbt-5>En-k)Ge z8fZV?V@lT-_~&qwr*L_(-SQ4s_bUNHwLYKrJ_Ht)mXdfYTYTL1$Aw>D+Pemp{~I!p z{XaCj{}VFsKXAPNiPZalA^rXn{a@#QkNe-ic^v=w>i-GOW8`FEVEG^5JQikV7MA}N zoVV=_>8>og`dYEo)JrIK3)WhCR*>*x2EtNX2{QtOJdOqO;B zzBrr$6+R27h-wVB2u8?wXYG#+mXgx8+4DvfkBBAVA?%WzWj6cYlnyk71yqq*@Y1}7 zezk?ZHB1p=bmR`XT-LkyByVdBy)SRL8g533~?Z)6I>(2R^GjnrJCmw`G#d1tHBi6 zuibG=-r9S36l-@rmUBsa48*))*P*2a-mUIAf)t0}0;2+K1;q8K7IVRb$Y|FX(%SsV z0|fgu`^9>(2d++V+noNS)6T!(p@QD-WhvHy{RN!HC@dIDa)dDP%IG#8opF>*d{9DU z5HOxc^HaCudwc;li@J8_@s!?W`=9}4V)A)JxFjR~5}rUGRWRi-%e2S+bZWT3|CD4W z_N{^B55DIg2oX!Q%VH{mMU(KD>3R48)bD7>6TXgW63%zQhrQJUVwi9K5O;atXoZ5y zEb9?;=7`RMeKh$-#C&FIotHZvI4VxsT-I6-^@FDoFVdpjvj041`-qNq;h*O>3r%<` zxa_!Hv#8E8|DOCg1)}wnw~HFUk7&cL&}VbT5R=gLOvRLxJUD zs`Q1^Lqru{|5&ii5&jL5XY8kku;Z7nQx0&hzch9JA{}fHKpi~qy974HR|3n1=ZS5$ z5y(!s_!#Be7gk{!G#4H)qZkjJ1&Rezw^O&t?vn0U0-V-6lPWSgP5W;ESwN=0@$MYp zQiMaMU+H%1TxCDG0+?mk58PzN@R77j^Ub_GC2GCkcDI}ImhqQ7M|i$>G`>8%R)lr^ zvn^`|%ZTlMz3JzTQjhbXDYrw?ocqlCM~$73FucC&{_uS0 zY2w+&Pleb2ucGQ@d+923PC;f4`es4S3U=qbZuFIFz;*E4Yj5Td?7;cO)bm$_cLKb= z@qE5d;PCv9wGZ6?^ftQ8H@>Z((I5NiBaQBQM`57k=f3E9;x)44yk+7W*q5+fY9Ge^ zL0*OZfGUx3DZj*Nj{)SZcz?ith3CpCM)!3__ZI+5fh9(k9IrNDTm8V+bAc{5a=BP! zp!F6s<8KCTk+`$!*U)kc0A~O>K&6a_#u~7!P7<5a?{lPodK@f+IFXfJcgKkhX}|Nb zvAx$#e-DRFAz$lilsy@^5^3iFn_J@%hQ8E2hTj1&HuVf}0)VloMElV9zXp+gI7Nk{ z(~9lZz4>o}qL|vG4KiR`x?X&P4zCan`{e`dFB-q~F1*jEYZH!63-7B+4+74^es5Q6 zIKtt2O3WPomkB>&c)9dDvZrC2=Y4vgU)zvMFm}spG1gY+U2uOu*)spXM;hDVF~Eto zoEc^qTjDw0#mI4;E&Ij6#=hs6b(s5{9wqzmULg(qyum($J_hUBAiNh%h{_XHmsRo` z_5bV7h5sm@8W$Bmgzz8c>m#<}yk`6o`$?X&JQo*9+!#48xD8OMq_x?9igy2U)(1A0`*f8=qXPde>53fP+ug*$?WP$2#>SX)J-oiqfg}G%N z28^^LzV%nOgE6V~(iSxWUm!kJ_KoT+d8hOCKprl?!`R^n0Q1cn3ZOp<^GKZ{<5nF6 ztYeFDp_I{kLY{M6Hcj5?{T;6hZvoh^(fhKl@_L}xriB2`b0J=zus={c%zhbjRl$y| z9Agik1H9+O8mS;tD=sR3m`^nJaKG`_FBn^a{uTPBPBnfVdq6c`=B>i~<~_SQ7r=a1 zY_HD(`(@o%^#ImgHQe--*Fj!0c#i_VYC#TGt+97ryLl~wPV^p$*KymVL%!M<@qGi< zHi!KU`^I>g8paHjX{4D~SjV)lH}^2Qohr%id~Wci!Hd94W)1pY z_Il1wcI$d!5Pesp`<7_(nKj1wxV3H_{|?w__P1@7KJ&Z` zePPCicZ50Td|`ar_p+w)Jmxta*;7MjAABMR{3PFCM_v~M->CO|T zO><|&9KxB(`N3|zGqNPkGH3mVJ_FtqS%yAr4QoB}nPIKSEqq_6!F%hUr7j+agZUA< zL-tBK-(DGyZjbGk_hYzzJ)|CPFS3ss8f7PS_JRAMQX_|NO*<@`Oabdf zpLdF7EphI(54dB|V>wE6q~(uwSKrO?zWfGb|69$Th0nVq4Yj2`vc#S&kRmt@lS{^v!?|b)J&>6~Y_|?K9Zh8rJuXA4q<-YkkKSwv)>wtmkE_ z)ZtVb`-ZP~%ETtZ2*i!<@>?Z};yj(;QJ{peq)0sNx) z1U*kiw}Rt(p0NE6k$$mHc3jBsw93rcV)AUaTklKFovE&~SbPA>%XtiV#@NA8#%{Y9 z*~&l5X<2WeuQOV@ufv^Nby? zHRB1l8$BP|hI7r{rmvhYwM@*}p7q5#)OnMPj<~Pz;o7f6>&-F!W_hnR`9_-g@T@?= z7dT7Jm=>8be4oJIEj(oMv29&$WR+m%KlU8@oXc}_o#;jT-ZIk{9(PC4lU{F|OdA7C zne-^s8eq)|@wst^!G$7&@ceQfwK_A{RGRR zvjN*G?=O!v>HNOj_#fUc^13Yf7y}ydMyV&-;9&@+*d85j#39rxApyv-nU(B z?guge*rFP0_C#eSEg(A9;PnoA*Y^yWW*>$1Qb8_?_sLw>(XwCG_3%Cl`$EE+ulh+? z)fzyD`cAdL?BUo3-Z1ULo)qS#sxe`l*%aiU_`U)AJU(lw>88IfQ+|l_htKrH`xm}% z=6S?t%Hx42L|*z1I!pA*`*OXGu%EzOaj?GaNBI!W8zGi)n#^^5$IxW_GWzJm+L80G z(^2M?_BTbQKEAho%WnOyvQYbrcAbxcNb#=S+Y;ncS<kgcpc&W`eM_bxgS^X!B#-d9C{DW_toJ$AGZ7JJp<2#11Kh^slo!jf4F(GI85)Spor~MxI)jVaz@+Q%i=v?B2_pAa zd4kA2038Gs0_`W+-+W@g>oK2~-BElS|Kh@&dHDRmdmcVJe{9CgKAg{wye{%xv-yrl zpObja=d&T!jPuj;dsIp2o%7@zYy zcf7w>hVLRnV*%ci@j0v-;5C`o@2i1ZvF@;4pzk3*_d@6UKSek};-PPiZS4KyynH?m z70ABU>1_66ya%6Z;@EdMm)nu=E%^Q?1fRa2_qtd!Ly+r!K4)|P*yea&44=85WyI&+ zy8!gT;Wg}Ak*}^}tkl8d*7a0NJ)v-2d?tMGkMlpjm_qw_y169sKoB?kc+ zF8l*f8$MKU{+oXvyubKWy7^vDkM;NN+e_sPqR%0-|2*_v&wHZNgU;nPi2SzZPvnpI zs)4Qk=>VS;<%c4^=kzxFfLhu6=sN;F&$Cb9`ylM=oc}WZsACj{*pYi`hxdKF{|`PbM2mDURF?Y+l zJ(6;I-o9cVXdB_Xo44%)f6u~a^>IetKiZMAwcaQ5+RFE{w@5#kf3euehwp$MH?o{zYzc5~iSmnlR`|l$(Mj2->P_oN4a<-vM3dp7>Pi2ja%?+*B^$M^4iwpd`x-6GE!-k0|>`w{G=te(JM zS;AtO;-(Sn>ttBL|dF>XCS?lbEsaCILgb1i zA9ym2ARA?~X%YAiDy2eauPRk8yep)kbRI1R&coYobQIa>SvDn56P-;rI(sMushyN^ zenx!*oxM~`0UF`l@ARZ@=;hy$vPl0Pw=?=siV;mv7m`KFxz}0m{Op|X+~!gpMMaR=zI+0GHGiageZIxOZv&*wbWRJ=o zmtB`VF?)6PTHks5EXUDxB}1>H(9#@~IGIi}C7MxUm@aW#q=Z5V<@|to^Eg_G{5jWz zR@n6hkJ`d35Kau;n|CN?ZfvTxf zCQq6;p}wwme9gGA)ni7FI%ee2BZd#Fs;npKFyPwoR|`lzYz(IQ3(w}ACjsL%oFrA z=xn|Na|V!eR-2pw$r*^`R36_T8dTK7SLyc!w^sOlEo#EpI)s;3`0IVaT{=8UhwTC# zNk$|$7bSd^S+gsALDk@^3|5^nyQ#9F0`+Z3Oepu4&rB%lK^qbh5lloVc({Mw26gyg zr32RCl}Bu_C@z_s4Z89x8)pQo$JSL=cyn{>bsClHx`Hv~!B}0FZw~jFmijjIc%*4r zi-)E)6ei8^&uE-f7j!kEP?M{&scA_ttuWZlUlHtf_QzQm^2}fle??`m(2vSSj*B#) zg6@2e-`Dg5q4WM-Umlp!Xi{SGJwFf!d594);e9wv=o@;7@#W@nUzWB6Xc{8H#bfIv z?xShmW(pJ+)(5Qye)CB9O@~^3vpD=FQoO;R%c`nuF#pb&ofTX>%~#X|(%1j;@el8P zL03V;wCS^%8)r88D=I{8^;hs7|qE?DfJ7i{kzDjAUA zV;#(?snbPFv0(f1AT>-krGmwk72GdhWm7|ibdc-vkFDE8CC>XB`ue;ZORxmjb3cL^ zDp)nH5nTJfKpy>7c3YxBF?aEKI{F z5y$Er9)9J@$IL~Vg=%yXYcb0gq-tNCf2P0Q4;u^+KYy2a}PN>t`HdZqJ zfS9}=5P4@pL8}}FSXJl^>r}^w>3AeMyv^$o;n%*VIRD6+Ca&3UDkLAK4@MkQFrsnk zfb_nYmsN0W{;Ed5&*Q6VYHV>9Pixu`2sF*BY?yrn*Wn*Aqsd=WH^{5IHm>d*@7df` zI*n8##}6&)0mn0RgI`@Vc0)j2R5PJ&6BZ)hMdRz5EoGHA46WbL1@G!M`QX@fs>P|C z$Pph$xTbN4#OeIrO#!0C`mL=KbbNY?BApf&PE$0!#ga5nIL$(uEolLr#vf=XYc}Ky zUt8&$!Mgdc`q@nl^*pmE1A4?i6;%Gg6tw(JwY1oKLTo(I^6}_P;q@zy1$Q}n3x9%`AeAM(Ko88lN2waXslj@6q|&e zPxs^1^adZKZPWA`*ts~;1h1sbgg>+kW@;pO%}e58x$+Z}6N2$QQ6c_uII$~@I(RjL`+|Q<&{fX*C|5JKg~s_8!mV(pbxpB&9Zb$2 z(Fo5k?FpQ0XqUK!Xh$-4=LP4!J} z6M_>9i+aShO-~YaTCC>g)T*2zv#fim$hrq7vmMrb zm<5mM=MDPl!*elUnfSa&d>%?DD(0sZMESd^*18LPCwL9bu&zgagr~BuMeG_Ky9OoV zt*a2f8u3&-m41!^Rsh=on_jZ6MEZGnt^l~@Rfsq6)4Bp>FXuBPp7Q{lK=AAZGypE^ z(&iaMYFn(!nuj|I)g z)KdcsJLguNriSF4;wsKL*&32Fr)YNV@kO(0XBN$aZEg{pg6WTxh7!5TSrj znb2UNql5+t9Vv8#&_JO9LZw3eh58Bg6)F+xBh*`{mr$`#&t`8wP?1m%p+ceVLfwQ8 z7wRfhAe1lEMaVCdCzLDX6Uq@fOekBZvyfM)lTb&YETK%H451D}?Si5>NjX9xq5VR?2<;QvEA+F_9-*IveiZsa z==x8Pp^ZY#LK}pF zLU#+@)!eZhbf?fALbnU87rISoozSg9w+Q`1=w_jtgl-i2yU<#p8-&&fT`#m+=sKZm zg{~30y18RI=qia_DYQ!H3Zct|E)%*`Xr<7K=Bx_PB|^)EmI*BtY7)9w=pvydLKg~M zAoMq(zcyzc1Nw{5`9h0@&J#LU=p3Q{5?Um5w$NEZ3x&=UIz#Amp#?(og-#PXRcM~j zDMBX;og{Rk&YW<_OIeI$mg&&`hBjLerZ&Ob1O9Y7}Z{?l2xSRcMMNP8OOZ zG*M`RP`yx{&~ZY?3e^gYm&$6I+hd_sSs z7aAs1)trXqMO6w_2$c&B6&fNG(0e>pCNx;+D4{_@M+zMwG*D=OP^nOVp?*Srg-V3_ z2=x}~B~&cbQ>aL&hftwVccE@VhYNKTDiF#S>LTP9$`i^J@(JY#9VV15)LFy>;v`!KLdM!pMW2MAAs+H?|^TCZ-B3X-Kjluz5;e3{3Y-O@Hy}q@Gsy~ z;1ggc@G5SJdUxs;1Olo3 zkElvb$w{$>@N-Mbwv-(y?y{6IDN|EiD^hMqv0M)+{lD>$QY&eE;mDR)XWYnOT=m4D zx+s`m!_UCj3Bj0)a6+Fjv2KG>m(*`iR{8i~8s9AG_yx|F=8v|Jnzdoj;$k z0G~o=n?r^3TQkhpn9m=6$FT(xT>xv5*Yi#B!on;fiapU)8k*GR%cPU(T)LEQqb>A- zYNrOMIcf>eodb)$|q`T-oYC+msq}@R`BKJDFovuN-)pP?=Z$+G$zrlQv9->F-F_gIn zrS7LqI_+V4M5jJZPe21tLkG{%^Yj93r59-%$~}m;PyH(WcC>aE1);TO+KBq@)it)D z&TvZ)L&MQEx7P7bdWl}9?cmqwb$Wx|q_^m8dWYVn9rQkZ02|mzpVDV0kK}k8?><2K zC-f!lqOT(T;F?~E%Ez^Y>(~zavQ(T(Rp|&7s(wI;I#QLXfGStBVR`4MCF&xYPiND4 z(85x!lLd4pcqz@JQ=y3^(9a55qh)vnw6X?rzn}gA9k68A>(M`;brGbSpp`p~RMz1s zQh5L)e;nh#N6V6>@~DnKMbAJQ&zdpt2(}>oX~?H_WZSjOp8wToUq$ZsAQ6@aOXF*d zTx9nlM*az;_j;6UStjpAz5iI3`;`6#8GlY+7>VviE#J_$^j#aBycf~R!OL-}@978n zk$$GV^b3WsblJ+SVpJUBA!V7jKhYkYW1q=mDOWh3idP9LQ6-r^{6>Cw1}&NEaeQS?YK-LqBJ$h3Wzo126Eb>q`cQ-Iu$LvW|gQ_?jl+?~+gh=Jb`np61px^@6o$ ze<(KM0pV+sVtU0SQlfh|(2o+_Uh75YN3}g>E5*C_+Z`PJYmt~B#UGMin-AM~g*_Q+ zCbFGR@o2$Y7sXN@^`#59kY7Dag_Nu&lZP_YnSrFZLQm(oxXzwJ+Z{V3L7h$(xz!om zs+x#`wwe@3fm&ROtmNxFp2Jdm;Sdx^Iy`o@RqkDXn0qx1p|btwyqC30Kli_wKCpO~ zDt=<%z~W-hF8NE#L?j&T-Ak!lrP}4XRQnXmAL}nI=`+~s*SDwT=ip(MPR=Xn*LSeR zQM)Cy_m_Su)*AKl{$pRZKFJ?@Ze8xhU1#+hR+!$QZ+*qUoWj$ts_PQEs-3S}M$v$F zzU~*e%<0WcRGY zv(r4CyO468hP1ARw9=`CH%_&6fv!ohv2k&U?o>O+F1B5%ww;oQzl5|jrP7H!R(wKC zz~#d>@BKg~q!bft#iZG8+nwM^8RlMtofoUauWLN>up_ao~+-w5r{ri{n$;d2qrMO@yDK1q|;Ll63JG9SWqbXH$ zuefI1>W@~AI;-fec`-@Jo`k34GOH$^s!sQn7YsXLbje{!r2~4-o-!;sv*osHlxy>x z4k`KjxB5n$aP>(o{t<(^x{gUqDyf-!@(JCl#~;1mk_!IyRgv?R?Z*u2K;7va8UH4v zcMYUwB=~ZYlX8=iauXAByL7~PJdm2=ZdaXDlUS25wu=*Rhev`>`aq0C6>lx+<2h$Z z$`emKp|U)$!uq|DdokMHENrs*Kvfag;QtR#S^A zxi8hP(p>!Onf}e;A?$w*whV9Z+AgjAVhs7gKUolC&rflQ6R_C;Vx#olztnYR*V^%; zeedq{SI%9&;e^Ro_AeT+Y)I7!V@vZ=7oJqLY)DaRmM8A1N2}Lfe(vOcozix8_IInO zn=-jz*6iF)#=@6E!yTyyEeKn9Vr;BCDL2>d1PkBTA-5nKwi!rGAD8S-8tbmHYYLnI zRY!D`KCn!yH`=_L53LX@7DH-R5;khTv2>TQ>QWi3wWj~Lyz1{BjI!&Y+T6Drn=Z~6 zUMNOxCnUtErN4}CHEd(<`>VyScgLk_GiUp*!|2=FDU?ga^pMeTe0+N67WLgdR1D|Q zqP`ClrC*+RQ{Kir*Sx&Nc~&5=I{M6Y*RI^$|F?{C| zNDgapSJ^I}$-AJI>iRHo8DcMG-vU!tZ@~klZKU~s&z>@?Iupo z%y>AXbFV3vOl{h;`J}^#&pc|>IrY8o%Iey;bHBmes;2ZK%ego-$=>R|nhK~Z-EM3s zCo4H8COIbFi@)@O?6`QRQ(|&fYD|vb>rM5O#g|e3W79X|9M5yXY7V51kBn{qQ&D3UD_DCwj15Mf8ppM zJrkFO-t4LhvJ3aURd#-#t|g}qJY{}Hwp!UWH1Fb!iGR7JvHMY-&U`l}v-jx!)|Q7a z`{(m%Nn4UrPM>?(!~2gpZFNI;m&<)6Tik!Q_a)$MU1g%@+$&veS6BOPNmsiUS&}7N z@)lcmV#kgXJI=njiPyx5V>`0lG=$PXX&Q*y&?SMUbQvg60`!qRO-l-GnhpagAJa~m z&Om{d;qmw;GX?sfltg*|xmU6sCoTPknUC+yq+0Nd{(U+3I&y-nNJjCYBs!Xx z6|2!!CHn*;eRsM?5Rrk^g;z(+X{CAv1pO!* zO{_fIv9a;Cwy}@cR34pU(dxeVOsdMgIX6AY2Ok=)3WSdF?%^%v>!M{X5SynH){m~w zQBPd`KlF>zr;r1D_)sB76;Z3wY>mo;iM6|u52c=1V()pXgYBO9MAi#2wM(z;Z3 znV|)~m3hy4130(Tz?=4sOFBKSiC#EQLYk1Ro4QZE0za=WcG2~xUx80=0DQsSZZeX?M8Qk&e&+spu* z=M1&-bQ#CJpwUzrA=ac7X1Pr&q1jb2a`zxeBjQ&)au7D@r3E&%BBvyxL5h3zl2C(b zFXX-SwRC?tOmi$-SXFtb_L)8+!frgZgl?DLB5@ zb~jhr?C)50G~@l{+>fRssxI2FC3*{OI63%r2%p; zX{L@Qg;(BrZ~VPbUJGU<$)`(+p>kFfli*i2er{WBVBzMwzqs+1hX8BcviFHAz3Y=} z7FK$7eem82@0RGs`}$UW<+FG6REpT4Jw26Fw!gn*!MeWwaQ{F_%i6vHqNPsIQa$kD zn${9)42nK9S4~llGM9T|gX`P$?eC3{VoP;FX>M7!WB%&h3)B{B`@Cfprml~FWlip$ zypCQf?rIEGZ+%$g4E*Cqz*!U>7dbnsV#pIJAwPLEC9UV^u-zcxLICEdn=ypV=dc{~ z8O62GWLt#W297ohoUzlF!^`adH*F{sj3fm`qh&$|op&a&+0$N%!~djD{EH?8v4}~H zG~c0MzT-AqOroSrMi*r`-8^pVcG zeft)F^4QSC$+my>`xn(1Jua8N?Edlej+GTU)k{vjDOh5#_<3Gmwzq%n$5)k@^h9>M z!MdxY^B{{I%v(iJqlesJG@u`TCenk5Ri#iY83HsZkQf&wbRZvZ`BeSDffXk6 zy4>3VtgXVbX1qU;n_8#D&e~}8XoAxj!*1FsWRy#si#&6xFDNYE*$*SLt%#G8#z?0$w#|z1Wq8OQvP7n{5EY+^te`fSI-_70AI=;M8LBIPz z?#?ZwD3-d77S3A=vcPq1g3yo4HTgQg5c;|Mrq5y9<{gM zEkC~&RqL=YP)R9N3>hdOD&^dP`C$@TF3u+)2#&sG^sHfIjEt6xQwfMlHg^oF7Fvl9 z!wt!9G})~47|4-&_KUe!axddwv5Pmmz40q;~S8`ByUCd2L z`k*%=e^Gf)y0WOqVRyLPyp30?v^u>)ueT}SgQ=v1RYM`6F=@0~N6}T6idXn;dK1g~ zO(oA@hU{!&>a>4_(X>*{btA!Yp{<$I)94cU;oYXMU+_)>|Km=7#iQfHkvzr|hC*O^iqslchX59Gdgz}y`FjiuD@R2+CD^oYfL z%ROf^-j22=`=1__d{eqw8hvO}nN+Ek3Xe%m&7Er*=Dg?v29$1RVs-BBArIAK|bE?!Y(Y=xU_O!9+T5@mt@?7 zJsz6`hId1&PNgjdpHg*|SGzfv#w<0+;aiDeg=8fvq&|x%ALwNgVtvyC;_1oD=e#H# zsqhXZzct!Js|K5Jd4{3TL(|ZmFAVvs%3}jCSYDgC`h1R9WY5J2#+fc- zR--}``vKj?2Xq(LH4|t3;iKPy=tX)5`ixLlwxl6haZ;sRDmR%)*8l~+P~K|_5qgZCuFRWvBZ1oK65T-K6eiP`rH#E2m8u&N|H^sLe6{> zcnqOEX=}(8av7bx*WvX_9jsC&Wm&1zC^wL(VT956RLBV6w-86sYn@8hRbIwAv|g!P zPHR{Ai8VpeZ@g5f5(}=PE8Mma^D5z0CzcTB&rkCk$Q#)Vllh8Clz?c7v@0@_dh#J} zTkf48W?kmNdyj4E-d|C=Z)t4f@+OTHp9$vLTJ*s>@2apwrTykpxcA)Qk%#*uChkSf zW{LN9eDqxID{DI;&w+l=fS)`Lova=O(Tb;$0XluosgcPr)~mG|O{mxl9cEf#)ys1- znN*_%22ZN7O5Y_Vtg7;?LSaMnS?KaFUL@uj6?UbuP}ofS57FV=9{nB#wpEsPt&B}o zKKprobME;NRlR|9=l()_SxdU}Xj7@)_AQn6=oj(j=dkokd zX$4jT*}%p$z;^KhG=W_(m8EWN8Wx!Dq2omclHu;Ke8j!&WX>nH(1P;kj+bU4T zg~m68<^dWIh;gsINbp|Q9I3qb%h}P7 zc0E^!(mw(@8Bsc|(d%VKBi3kSI+;nQ(+k$5!BS(F>;<`+Fd$tu%bKDjMC(W|DF*1k zTv}=UjVW_nPeNLmCo~LZMUXw5(psvRXau_N9V)P8++lkcc=T?ZgEE zk@!R`ic~DZU1Rtl^fl1d07%=a`}Lp3;Srx3pLJl>w{?UTF-Tx#MgQ z?=1~&x$P5aUwaW|0)%VZ$kI-tEBXtzQIqSitvkg=>) zsWm7Od3M)oCG@5hhNY03G+oMWp}MMRyd;uIvP7Zj!ZZ5|Nqs6IBl3iLQqrFMO#8fj z9~-bNd~d7HEB!lj#~WYr&#z7CX^YkoojSiG_gTua5~o(a^U1x8Tm@YMdJu9Qk!J#p zruCsxYot^eQ9|LBka4_*rP`|+O$B?^K(@OJ zX-<%9ewT4N!$r^Q#9n#2ahz^=MVm;8JjiAzIS@Z%B)bMF>UXh;^_6e*FPOMxQ7WKdl@OShEQdrFTmOk}+;dAOMV+H4zOgsm zxT%R}xW;)j$gSd8ol0BvDq1e_v&H`K1BcfpV+{>a+|d@ z<<7su>MQ66Bq?Ea87hOWkP4q*q%@|^P~o(KrsWDLmI+H=6!X%R7qxHv!zFQP@}g`n zF2D>no}Z5y`T_jg9EX2PDibBOlYUzohq}_A)|gZ(mQ%ZVjEoXHViELQ7onYjImt`l zAS@5}%E!`P+7hT%nX7VYHPk%cAwmqAGSBwG)4%s@5Ee*Uoe1{x=>q zRkUqx|EQ&+#O`n2Sf6V}SFaZC^-~PG6WT5M>T8f44og_@ibZH0kUO*eLvFQ!IZtm0 ziGoduyGE&*%_ENt8^t}PU?{}V2Jx;){yrT2Jx;gqw#U{c2ZMHx${H+s>B(=be&G47 zyZ_f%!d=x83V0mhRFTfnzWedj<;#5d4*%tiTL$;v+t%@#t=wn)ZpWgE?izPTdmN5* ztGUIuA$v>Aq%&2zIh#?9<+3|>_3m8}@HVcmi7u{>=OAE^W~A`C&fzhnMM0c_RXx-L zX|4J|%u8;a(q6B`q(5XkLmfDdOAdf%JP6KS14$Y(TF<3MVH-FhG{rzps&J1qnMfo- zKg7dBqKBJ|G8(d9;chB*PLb@NSKbshef^0siryMqAF(GUTa)XX0?PLBtxZcxJiZkk z>gANr<%l%}Gvg|=biP4%m(HOt@2D<|=01}^|$l|CW&;=_bwz`AwItz zia(uBD?fvGoVM$=dcEmEH4uNCa^EjKOC1pY610(ULeg|JLH2y3VplKR*aK!{LVTUJ zPJ_X0o_>soL}6-Q!dSi~+0#=u^4axupQ!t)#ZYA{ZL4%K8i&bMpDa@|$#>hr;oF8c zeSWmgY@-{ml*pKH=azb%+sZj=Hl%^_cIv#u1e6<50ClBVNx)=8G)2=0LpIxSk~LW$ zgai!2m+%iLo&(|2h@u`q7^oLHfWQPyKx;u_6M5u^G!$e<0ys#})A#;G&yf|jUGw)5 zw;eh9h{hDHtn;q!t=fF3*B9oT#-y#PBWm{Yj)t0Y1(Uq|xju8qrJLNk`r(NMGWs(z zdF|@{((+4U6jMM4+Y16ktAd8l@4NY!8x?|@ngb|f}Gf(k;j?zK}A3NPwdFqE*@+|ju( zv$Tv+H4l8U^JBY9*JXRS-RB?+G?%R>b zrKFSs3*Z0=;arBnCosM(W#B&>=pazOAZo1|dT?WD zhj(f(mt3A%v|v0|9$ynLtt+*k=e@4RRH@Qst+Eu)D|2dP)bj?9%dL0CJbJ#aJ5JQY zv4$&KW6SHkHp5*;t=19zLnP#`UJ|{JQFtnw8mo!E7K6TC1OL}S1PDHh)Zp`aWCx{5 zO^1|3Kh8s_pr8Cr9z4SiQH6LPr(i|Y+Ijs8gNfb)Js-;^dhg9tS4CQ5*2)@kq+W2= zt*KjbXsYd_JC{6=X~p-{RF}7|ElV}W7OjJbw~D$zzsWGjh2m))bZ9QUm6dC>#)CSY z>L5pmaU7xx#3Z7ZF2+d=JfDv?(?*gan@A~Av&p6(*P!pO7_Y8gUC*|y*5exg4G+tZB^mAi`mD0FJxcU6st;o3N_HQ0OWH~Iv?3k{4}dVScOTC zvg4SX6@nn8KLqjPLE+EWh4_&Wih`O#Ko&wEI2_sSW(tl5_>xBr#gkpl-P)ETEu7cr zZ%71{)kmn8GpW{&mFXpTdG4wCC2kYZBo_7;`l*-cH-Qc{3Zx+^sG%Ofh-ROKb>$Gh zFuaKO<7oVRoZKbE34?{CZ-$cjaR0MaT%s*zXHJ?QJNyml{M^G6v9Voy(x-ASd;;cH zfZW##Ily~N=aCbawb8`R4;)h;GS1Te`I_mfljP=Wd{Z=UIvrSFmRMHfGKUhbNRO{5 zHc{U)x}>CTrW$%H~uuo**wfs8rhMgF$e`W443ZGt{RGHRlylVrt1j zsON>vqr!elMa&=ul-b*_*FTK%CwsBBzLo2K&iNdtj&^KcWUj0z)3~aZ*x{@S)6U$L z6~cK-rH$&9(&;q5KfADTQ<|5`l+t~yDxI#)=6Zz_60(=_8~h{s$4nP8K~ztqH8zjO zfc1KWj0TnFF$Q=zVRjiF0XMrJxOn3t5%u|aTXZJ#=8k2$#knuJ@VBe0O#ZmDw#pbtNF7)Hv2E)@pG>8eeTFrzSdMSK z4_8Kf*09GI@!7&20}|~0W%^}l5bQnjG}7SAX(dKTX|+MDe&D$NQC4=Ax(}=kC~dq* z@;$g8BuWw$#7Ys!lKGN_$PBjk%;N@QqPD@ke5K}y(#^Z)RhB5rzbbhgUf@pMBqNabq7?1%nci^Pak~QcdO=nNLvrCfd^F@$=SjLy*MCuSi(>8Ri$DzaZj|+p?VDER{tpb?AiKn^oqCf?g61Kv2F| z4H*)$*-ubX`6nnT`kC0uovr@rigN2i`@+SV(pbW}aPgF);{C38+Ph84#9D;f@hzb1TZ|t3)_DTaSr2$@_JmfQ)fuNr z5@qzkF8ftiySJpd+^Tn#SOXnJ=HP}{YE6SDw(_>NV0%!yU#W0J8vJ~?NUc(B<+RS~ zuK0rSrD2&0~ zY3Y!FaS_P5uFyypDh+0)&|qZLdJ5Oor@Bi_(gGEQdD*iHw^VFv-7nImh3q?H6=`A}Y{wJ!FRHDd z-tB8Ea?NXKsAknawf%Gnn`qmRw0b%EBt=!E@T#%;?VA-$hAT_fkL4b(U0PwH$iI_< z|Aua+|DG{|b;VKWq&JZD@J^sDtq^2bp4FT3a(p9BXcLK>`{dd+O1{oDH?yVb4fD$S zN~?TLd`%@Gzp;knmassyAWBxGxZrTMinQw)ep6snz-t& zzM5UF>x}iKjf=P%xg}7%t`=)FcCS7d)50+nk-lj(uj|jf+PFGtO%EDf&5hU@2%3%9 zY4F;(xX0+Ra&d@06bll*k9nE=S;x~zORSg5%2m^nB|;UnEGs-&MIF&C(g%twTy#xe z<w=qhsDq}LP>8E7i6;De{^b25fE>v{NVAbm-S?eePfmTa*7W`{t*?z4eq8GR z2G!V#+?FSXepb*bSWXxYYfw?)gDg<*l^4mPa4w3sF*nto><}Nak7TDec$KuQ+EG)S zoSNdBsjrewiqeiW_C&vG-YSwE@C#hg1CfjCw^CHeR;wIz?E`d_V1bFwtzU046CT(d zmsT%YT0=3K3-fdb@6%XP-qj&x4tbkpvv=}ya*3kexV=x=LUoj`C@X4duW2LesTFb5 zu%~FG=4cT!lv{JR8f2SK7b84thAK&2qMBiicsQ+>ECGn@gGcEb^~U5>L=Z6^ujGso zlfDc>miuIt%ON0@?A~7`$v*P}3QuVA3=}SKIkEmMQLyNCHcCIhPq4FD2 z)cS_u2*l~8r{>8Kpdr*hV^*;4@%2+tRl;VGx?8&0__h?w^|BWJb5lAK@mXg>_|hV5 zl=w%W3ko$SlnY#%f|IxtowQS3$evIOZa}3V81!pW=xP<0C-TCJukEsz{MTK<=wlGQ z{`y87uj3*2EzBVA+n_)87x6E{Nq%EQLFJL;I;F%>tew4yfKYE<5d*BF5e5gy(H&3Q`ctCMb%q60mR2~-fh-{~jM?^oN zJS^l9)k!W3kNW=zzN-!waYPnCslcuyu|$6;g2fa-y2BPoaV@ zu+N-_0DwOWTQF+|?ae~qgB|=N=-z`G1H|CeZx5X20Ah}392#su*Gn3#24-;N?lzxi z;}BOP+p`I1VlTC9`s)EzP8EF{_ImTT16>UZjUs$|e{z$De@$tLW*^wUWhh;*f5N2- z1dr_D>wyB4c@*GOPoK}{`??Ap*yTH4mZ=b=77xK)lR|-+>@OQ6100GdpeqytAl#`E z&V&;hsCQa^$V}kys9~t?pH}=?ct{k)3w>_ugD-^WJLKkOThCr>Nf)8`^Y;FMe1f<^ z7?CY&(1^#tT-Slm_`9ePEqKOVhLGod=@EMrGdIDf3JDvC~x&xR6G);4ze7Wvc ze*P(gpQiF62SIERzNA67<4V4O*ub1{M|>1#;zd$DM#6Q8(@tjLC7xjZs|35?zOqA8?;T)I;Ydmg{ zFU(*{Sg5|R&96S3sJ3;lSXEdkPfV%&kvb7?$@}!mgDJtAfo#x^@d?C2*&cq_sKy5t zdd$pC=sYGI^rlAq!Ml8di-mT-O^5w5FyT<%2zhxxlM^XDfonT2s(g4=<)>*dujZp< zqoHK)@5UMT4KjDff7dO7?XfTqn1{03WaR#43`7h}*8~A4yc-uG8x!{+fBgm7o02#4 z1P5z-HEX>$7XCU;9nfU9osE`_Nm}|g-Wu}~4+AGBTGC&ASXyFcdbk!<_7UYpF7*~5=xbek|fkF%ahY$uzVz-j1n6UOGA^9tsZb&O|U+hnscS_Ii6*Mg|r@ zzMqtRNSZ<)byY9gP?NMP3?E8+Uc79I()q9IRpWR zh8m`hf4CX5rx~ib7+Rg2O{(4YwJx3`3$dmVTm^hUGBR=UGC^XH*e23xgNTpTq)@8= zIIH_|C_>0UJZvy+v%|26^F_l#pdsL5;9TZlWkf&8MJG`~K=wUi3(XZ>=!f*4@<%5N z@rIfC=*v`21SBlXcx?0hV`PEYPuk$aK0dmciVYg%25k*~9qvvnVCQ}N`GRP?|Nk+` zIR7UoOq+m({Xf(&Rz{Y8=l_1NGBW=Uo0;Xm+W(6EqlNt+%rQ=e|M=~VtYD%3ugozf z26jf~|HT|*W8z@p_&=Fr*V*9S+ABS8b3N)_n%a$~lIp3X@E+Xvk+{^m#6xn`B9qDH z+4(lyz=;NCfg8a+uTo*v0uJjTD4RkA%{=Etro87@ETEg+LQ>biciVZ#>xRkVpB~M> zadAGkui1O=IeWZkZl%eB7((G_ctl9Gbro0V8clzBh4f&N2Q{y(PuKW-H4W$q>u`vJ zdyEzmzY2BDdp_6*r+v#&Z+GWA-XHe-s0fRf;ZOQ*cKQ1GJ6}uAAR*xpiP&Cj^|}u3 z7jo*5loBMvxqNI*)c1(->-sX{Rn?IXyF2^Zn$F(t<(~}XL0_ak1oM6S|3Eyr#LM69 zG1NEh)s*{x*+XXFM%!H8+ISVe!wS9l^SVui&gNt(hdnvvcjG_3!}b8@Y`CG*5FS70 z#NVy2cNDy47^g)eJAMKXhMZ)v+hV8BCqftSD^~V-Sl^vK?%@TQ9C-^lhXj8JF@0{} zchPNhkySi!NTwoU;e8I>qEqU3Jq^E;o8@>58YZV8q>d9B`FkQU$)yuVZplw`6$CpD zZcCJcbVOgC-;yB`g^ml#(oT?nLMTon?oJ`d9UIsBD`mWaIWVR>6Lc6K3#E=jWRaL@ zsudfE-XV+x=N0FL+>x3LAmx@(dU3W2dQxA34k$NAdkIw+@q%7qF(H?NOkA1Vx9x=f z>IasmP3L8vg^mZ->AvbXj`H`3>f@3iE&eq%EyR|1%i28g6~^kb-0@@iLoe5TZ`-q=s#Wl-@t$M{~yNRJ1jFk^|}At z{@=X++xXh)A2Fu-@uG2rJSG^qB_V3Wd@w5njQ~-yN)$tj*K16*A<~|Z^Lif+RuGoe zA-KyRGbf8hW)l3>+quJlO&X;68d_-g@!`p}5`cEYwcrR3elH?;qp0ngbQE24RdWUwuhe1({kE|tjr4BL9XL;Hz4OXYVy00rJ^wcF^nJPIAh zyjpv%yScj2YVur}M4b{I5Ex4wxv`Bxqf6i0T-;t>SI}42W}egMZL4&Y6ZO!voVfn# z-p-b(Bj3q6ZN_rhBxzA1Op_!vLL6fvDck*r1TEVAqjK!R&U&)g-GvO2qYQFD0bdw; z!KKJYQ+~h1`klYwmRX9=`kj;+omxkd(!uH7lNE{4PGGHEFQEv8hmj2e8Gu53kbdUfbowlb{6nrk2_gKVs5Zh{{Cgg^>&g9 zTN z;9ehvv-LILr{?(h%^ih52<`O8a4(_f6df%K{}weDx`P-Y+Vd^TO-R#CrwZ{=a7}FV z!Xi+hUO>FtOeu=_!9S3iT!pMD-ti3y!C2j-wSvk^*8-uB%WMlov1&E8jU7UCF!~E5 z+TF57@+xd2sc0Zcwu7(UmP-m*Y!p#-cQsjy{gz@Q_h6$nJ`UWPmr-v~1)Wn?iE5mTv)x1+6E0o%CUhRU(monVJ@i<5_ThKhgv9Lzi){QMI^7`i>pHGEAp zD3P`JI^4(G%6QuGT%2rnUG}F;U5;+g_{V4UHTz9G85q(Mdi%y5z?iCUImn!s;7SIC ze++&DrIapFkznSTQIL)zBu0L+4f_s;fh-{sBslI8$0Qmzfy5N<6xU3)ycg9w5cO@( zb&iXyAI_Uc3y=wIBwU`2ww}rtk(S0IrRi~85P~5N=31{q_%!wH&>yMP{6mR4BJeq-u8uk%3MA+$tz&x$`W?C5SMHkU{+c9qxK~?$ZbyXzg z7K^H`^ua1wx?Bf}IggQdYqcvXcm{m6bFe8+2+o#R zI*_745KRwFh+I;2{=tqN#U;!mn^@X__QSuJc#DCzdRIB3v0*2U=%Iy{rKazs z4KS<&tn-PXEnw%s+eQZI)vZFr^dv}BF0?O(Iv4f!3*qu@31f^B`_e&~!FYBn~J{p%tFj?xp&VMxJkvmjjdyo?heLvW-cyz20Tymr$7 zd!y7PCn=&Y`@4cBnMOz|Eu7dE64jtoVFYb2g*9c6(58pk{eyu~*rA03bNd>6)1;X7 zqhc#D20mVaFLRBRGz^9saKso;> z4CxwRn*MEX5$ZzFg=h;Q79uQ!=kU(-8cYT3rWODzghzy*^cqx!m~`x>8E_RsDnus) z)Ns;p$7an?3Q=Ry@QrXyizW=k;AbdA&~R@NJRr9I!yBKX`j;Wf6XuHW1i0he zV;m}0yJ0X!4vHi23-Cm{LpVSn3P&6CP_&+Wrhnocc9lQi4hs$kg@Yr!8uxtG5B5-W zgFCPv(hcW?b0f47+X-)DF}ncT32x&sy8zooW_SXykIMA;V;hz7^*@JSA*>PA3hKmg z__tG;U4rODu+<4=YQ&5m52 z5riU`6Tc9>5V#W03T0z559Cf?53*m2EO8@j-NOd0rveVMdGD!J_kuQHHb6EYHUJ|2 z%Zkv|jJwkgJCDnd0W@qZgflernBE5nMi@vJ1k?!Cz+&!aoC7fe$Kwer9<)kl# z2Kr*ug;2mK z2Zu%kWyEzc;Uj|IN{kvGAbYQu+z6+!QTjKHu@6Mu5iTIu)g**v(b*!##5nGK;DT{k zl@ZyhbN@4m6_)}TjDck#h+CKZ=@*HeHRPitge(xOR1^*6Fqdk9D2z5&06b*6zzBpT z5Rrm0SVhYx3gSf>AP(~Y`rcK6CH~zmbRI3*Z31;}iY>PWr;X?+Jd<4(|zk*~kB49e&39!oTSw?1A>U5&W_Z zw-NM0-m4?{LEgj8Xr1|4edP4{zi|z(5&VFU>w*5@9L^E+0=*merxEso+*1sr6ZQh! zmm&B8-H%-|XU|m;&z>!B%;E#al{;n61zuxH{3`SeT;#cH_3XYUd*(b!y7WClQh4a- zo_oU_79qe_wQ`87YUjk2w{rUBWBuWSxhD({Bftla^_N3Ad?oaSKXfDX1wRxFhZph| z=vedUdyqNvnbe*23V5Y(WE!!Zl@|1c9ERqId-SfI_P|p< z@3b3)+ z>SAH-TxB8We8sJ;*_fgjEyv?yQ8lvw@N~CZxyoh(X#-`$*G^gAU@vUI%koEeV^iRF zpe3NUF<0PjAXmUmQ$4?l21i4=m8$+@zp{U_@gx8hJPlmPbfBMUUSDe!5d!BLf`-x> zo`!E7PeY}Zp&sEFdQuDc5xx~nYcdmP20RNW#uPTd^PER=rSzd9JC)7Akg7R&Jv|dB zYwsoYMmQ^cy|A^+Hk*%fBxBHwl$5z8DltQL!kk<`cbZgFzqQfVT1`$k@x*j$E0teU z1gwvgx-KBS#cX#v!(wk5nhOg=ZH*Ax9NNbvb7wb{1UbpXF?0QcKO-F)a_ldRFb1uE zQoyWm^u$I#uiUSn91$6e(}=_vQeK^l3RgorE0wi2qUnn`uk`uSUM~;T20m%P33VVb zjFYCsTPvegu|7KVJ2YL7^YHXCf|soIaxV$SkzL=897qbNHKbBIM05c-aIW20udqE`G{+giv{vnIG69KQlSG zT>T0QzoO9NHJ=1IBr9LyF4xcZ{l%}F0d$5umxMnJ>6An`4G}b9Yl3T|e>IV{1o;IS zZ*kgFLQ8R8HLzcfAdw0121G-g4N=wvS|h4AhXB}ws|5ToY5Ja|YXb8`F2@{E7hw-{ zt~pdi&#}$lFU}+$x5P4%vjk{@T=+TY3BJZW9gyO=i^hu%Lr2CAkkLmb$9I#D2m0j2 zC)8uCaP`D0gXOx=r)P#2_Qb65nZPH2j#M4;V>@Cuq4$<0XUh%BZ|(7#d>8HxV3xbU z4W~WJPL)Tt2M!OQ(`Bb)$#h$J!And>jK#V5wh8WIp9ky*?Fa7Bv1EprW8(+(2PXGE z_-;9$UQPGfptDWMxI2J7GM+AU9=09$ts~R(IVVtl(n(kUQ$CCsr?O>Cv{6dKC@XRs z*=O_{u+^jxhX{prD<{_I7 zEE``MeB+mng=j1zaN*GVQ0YEaB>aZ)z;i7?ACEewct@PhISY9M@_;-YqdqpTxBiao zM$HqfgJd3`c^hYZh#Z&pC0u1Ny0v_O>59J&{<^w`Wk3@7;#=rf4T`ou+V$TFB35D& zUk3#%LVpW{Kg)EQX`J+cGK*$Grd$%%7-UXEz}%Mg{PIM+qD=X7!`5Qw#J!|PZptn3 zoMqqBaaqG22NcKjZXesjR?7RnncmyY!{3KctC1-JI{y;X*}?@%HT(<>B^*6lf?TLL zBvpbG!A%=SOG`^hNk^ACHJh6<*15!j(9>i-V=8^-I3fzCrXVUS2sNoP*>k@EWw#&+ z>6O~#do<|0YPp1=Y7~mjP1{&0>Jdt^c0DcQ*Q;C^UvHEllB%khrqGZgZiiOR-i#8uN95{o|niZs{L`Pj`<{14mmO)hXWa8agtxl^&>WsSW z^vb}*S*D>D!}mZiZ2YH%77Z=vcyAek3S4nrd-s{r_HW`A2p2qZ{qY|e`=1e?W}t6T&nfKlktPHDZG?Vb^Nu=`UZG+EaRZjijTyqjDRW zri+ncqY5c%)B&N^XPY(4B?)Tu-g8ppXEa=pPEKORhh})!T&m&HIIJu3w|3McBqHWP zGAlB?rm*-KgGAMrXVgxCqBe_F#<#I<+>!By*7=dJj@#U5BgGy9q`u=7sn417+8Ndcx3*pH%iu02l5Asa*On>$ZKsWohk4Lz6Z`{O z4K_#85APIc@SmXhBAmvsm$A927_y8Bl1EP!wUiY1V|>ph9x-D2Pl~FsC&b^5*psxY zsj>7gVrN?GI5br2*K2OaVAn;XWIv37PR+(RB2C`ziz0~Mj@reK-ickuTB4;Vk;30v zB4+mS$P%a+ZyugBP3yqtiskW9`*QPJ=EY_$*N0cCAT1g$ni&bR7O9T#q|9owBF8V$ zuxSg{tCXYHb&^t(DnU}&Mym?$=J{V{GCqWsaH#OSZRt;Z>6ykDCj9r4jSWqu*s};j zlu>m??&qLupt`oke9zZlzp%%C+7q z>M}V+4`u%Fd{Ro*=RY7es!GQ2qEw$yP$MT%_fWz zG}k_I`b)uEXD8D8-X{}a1FF<0I_{*S(W>-r{%kdAdxD34Jzrt-iZLH8TcBKcH&VNF zPo-12cu9ZO!P9%~e>kblyr7W>>0^`0h3f}-s0!L4ovzpvmMc_3t$>DrdaQa&HUxkz;e9;1_Jt4?+c$%m|FY#d#$y+(bOy#cyGt=9$TQ;+5;QyM0H>}_QB zo*7tOmLluBBxS{FQmHXIBHOocxgePZ^S)EdSvsf7vh_rJgyR~Xh^4V^0DjIgBk50e zCP%CE9D5=@W~aU$bA2Y|FhS!G5+U-m#t)`nfI?f4#Hl8O}K7X3yX? zdDTGi#oTflrWf245*ZP`lZg1dqez?~QcH%FMXqCNPS2-__N^r4vLPD2Kw{5^55SGU zi^BLQRk~3S@!E#UW~fDQ66dhGL=J>5g`HC5L zcH%Vki}Vf<)E~^J?`3XMYT7Vj%rZuMZ3{+LDf^FzDJ*N3-X7sy03x# z{t#?-KGrhiQRZ2dwY(3;jQCapGrNAadpA)jG{_v1j-j(Ra@s5+`Nmh#Y+6eC1eWRc z$4Ev6F0aW+g4F};PQ$!Oxa?K~O5M%{ z3uSUiN?bFHf8?Lik13140v$1u>_AxsEo$ByD|Gw&;0U=XATNX=|H6`R?(OS{sMm|- zpIB1Ao65l+N|#JetAVnFWi;6la>N3$!jyeYPiG_6&InSw;lXQUQnmUKe|1C3qvt~@ zwxxe5c_)q+8>J{U=_e~LRjsobO*XM(B&_JowS64tS^RFP6>*9f?s6B~0g`Qh?8Uo+s?eKEqo*aW_uT;7(&T5x5gND{`SS=uR+R~)* zVuzI*qIDvT+BlVM#Q8pxNA567RQEGTt1Ti(Zj{ryo$wM4&v~1I68 z&q@xjWPbYA0XlT(6U$7hs^apB=YtAzjuVTx>~1bFt);y4t4wv~f)%y@>X!l8Kds;5 z6K^uDuQcr$u$QSsD|tn08!=Qeq+Q*nyG|>iKTplpn$ytK&t~svr6+2N^=dKs`+K1E zYDsQjUO}C-b$O~98!LNeaqxpgIu!1XW9?N`;qXy$mWb5?ye_4RQ zo*s@684~z@lO#TX8cuc~G&0!4mXqllIB1+5!b85vhOT5`qUynE=Qa6x({V7t& zR;6?jnyj(e_0aVk71a_G5v5bKI;eV>Sn5nRx@kh_WGienIvToL`nXWjgrt|HeKl^oi$NHNg_6ESSe&F&79)IKSR}oCRG&sRoBsLA#F5LuC1wDmq+6Gy9~?n5JvxvlLXe3n%mk$}slr;i;<^QNN|3ahp7ueGo#tz%-TnxZ#`FYLWG@-jzvB zJLmXaOk5|H++JosD)+cDSF)su@~749wsmY+I;v!{p!Gy1INm;>*>4Vp#5C#7E`oYIe}0cr z#kI6I2evJHG6hbdsg_KcX6|W6F(pB32VuS(!Z^7OO(w-;oMh9qv@@*fu#7R6;i)|d zt+VR~pvvC)b@BHVUXUx4>C07XQCsgq9( z)Kn#xZP?PX>-N68TVeFkFvaWofoV-%^eAard?2_>U6G2+D5jaM(1Dl`pcjI9EI4opH zPB&5EEfY$6hLZVb@$^>2}vs_!Gudbeq z`@m_4ki-2(EQwZfxSeK&_6C!yZHOr04|^01+-_-@*IxF|RBE(+BBkknUK*hH?S>X& z4sWa9Pb`fpbch1xo{-1(SKXLjIi}OL`0p4!!DWzc{cGoT09gld=Wo{DXg6k_q}HnQ z=mpD$?fv%CzUbS3eG(cOib_#s?>CdH48MSQZ_Fg-mM_qUwuzBuvkzoWo{?@h@AajZ z)#8S7YmZQ-UxtuY_d}3!lDo1OBvj1OW)4=ItWLl0$zlXaOZ9gccaGX5QjiifO4b+9()rt*X( zT-bmaM~Y2`Rd*CmNLX5{{o z0$QsY%xaXf8&96+2UjaAWtAj(siv*ez5!P5ejSVFiUc`G>X>d$~?j1!^RFtq$>e70}odG;_v0kSv+VpNfG)6hrk zrF=(N35vQ|S=!5sZOIAm$7pc)=Zh+-`+cb>ybK)LXV;TtYNv!Qh+Z!rQACu=ewIc& z3ag`txQ*{pp$u(++-|`5z8xo}6>J;u2ow z7{B-qs*9Bn3-3O57=cyYhfF|-FzQ{VVD0zaDA=5E{=e>~eYS(!zMfC_lUe`r7e&sA zEWeOfpepr++7>61Bcj3k`o^Sg@t`OsDVGk z$4p-nMTeC(E*v4x1dI&t01A?#;?HPcVE#Hgx;Vl`!$nTVd%QwD!(0?a8y0eKF)3ai zlX&1Bx}CR6bjq8cA@^C#J@&)jicl~{c1V$pmjr_!$kRXRyRHefl`r6Kv&}NfvF`dEIiQ4y zw0DBFBmX2);QxL(At^Nydj@v(S&IPdQLkUGP<9aT>k;Qh2T({xn%ft8h)BC);6*Gb z-lWSCo7@HLZ{gjn%wk9CvWAf(NCqbw;O8Y5TQuM_#+Jb}!i@=lg>-pv-b-d!J9`)A zxye&lyn5jA{p8ZgDQ1{H$iEo@#*rgDa{efh*9wqeoS;ijh!EkObAvtg48A(ImkH7d zK1X{uS@Q-h13yRW#NJw_D`GNvX@KS|do1(Dh#NRl=_Ckw9VCiwLZ`_~{kxDmcHkBI zUI-#~d~?hl4tjWmbUcNi*K#lR3h!&fPd6U7)@ks{Z?g}F79ak8-laD(XUv;Ie~86D zgMP=sgOR+jZHNjVJAi)ca&r>pDgT&7Lh0eY{T=kk}CSc<{Z~H zIKQWM2pqfK?oTbx?3NDs9>lLd17@6;9Dz$p=lIa~kf~7bux9sq-V8+#l=5v*I2B)S-75|wP5(4< zoMVn9$q2XfKEL#6dAk$ooDsSnp)U&f0)bdp8WqYXUe4ne;suKf-7NcbDQA9*aj2Y$OC2{xZw zd3(WQ*Rsz0dt1YtaW?v2ckGf!MOi9jy7antCO3<61*c!Z!rU7>om+nPPLx;6u|H=l zYkWI3UEh9-AHUjKdTv*C|16ib+tr<)qOFC0O{eyM^ZhsDr9QN8hb%S7w-D@mg(UDO z)V%%8JVxg)#4~3w@*Rxf@0C^#bILO1U@u&A-np^G-w80O1D?Q2O+pdki$kQRc z(WgmaxCz6~tH$5ezN_*A~H)4-SuFc+tD8c32i!MZvTkUrpaRSlZzoAHi`L@>i z!Vum->wV}B&d7~Ec?L)1M_=B9lk_RJ{)qw7Y@s7z`eObY+}*?8^wVf?rY`nIdOXop zPl;&Mo@&dDsQZhp(Z`%{?T)wgPUxv%SKKcqk{s-S=Z$%t&)B?lZaw3I+12bLIw#}* zZuT}s(Yxf`FuUrBDu@sXB14K$JwZY877)?|N7N5i1=bJf!2NiMnSIrUV)8yki{Mn7 zAc9$-6a;cU-AJ(Y3lZabUjlT(?Y011$Q%pT;g~s#@Cu*+v*ONE5Mw#S5kdJo63^F+ z;K0sT4JRu6720c9hG=)iQ|X#!NdW1|y(vNwihZvN2*fm?+^+)T3l9w~H!}nuD2Fmz zu%aC|w_PiP@f%7)mHx}%@sI6IL4-YeQXgoxvJREwVsf6YAxk-3f~Ll*q2J|l1-KSy zZUrb6RUjp=*`UkJ&zTYcCE}iZuF`b zhG($@rj&25+8BuHSkLBJ9nq@`&)RJT5R?$VARgnPP+j*dfT*b2v^T#DQRI2PQIB7p zuh01et3d|c3c9Ecf0t5Sf>Fs3{g9rK)8v`1k!L`?kB!dr`(#i)b9kEPiov)+1w(I{*r9uvQU#ei4j!Ww<_ zch}{~jHFa8h?2#Kd%3Y|d13eEoVhU}`pNvuXkfu0P7(KoHB!f^i!C}jg(ezR3|aG% zZ9ec`LANVynL}(IuKc1njrY+)0EzXq&hUMVmq57WS5SVG-}M*Xlz0s9-N#>w=(uZ^ zqao)HUN5wb1|Ds!5{b4;mminw4Zf2FDfZ*3s|Cu63=0OJUf9v4A33^oXVbl(EE^qw zB#Q{jZotAdmnh7zqTyiT#$R^0Qsn#jONXc-%yy^a;f8xO_-`>B{uGYDc>Q_$`226x z08>GSrHo3zDSwi4|N12_g=j5@?5u{{Xw+->@hal_lMI^WaM~G-(bZd9?q*(di}mhU zCV!kQfPsvS@;}oDW;W`?S=<;@T&~q-k1cTK*u1Op3AH#=hOdt5aT^*iWWF?h=@k}h zfe^c5XFDe0(46hn;Slsz}$IYIrOFg@nVBF;yxqstY#W`eF zB7ok`Oj!Lq;c@rq>7*c)>% z^6mBrJ;RX2QD@yeS7#}zxR1Omj9u#U_^#D%aaJ6!FP%lN zLwUon8>(YjmE+_u)o`+^qv2m^pQO3hJpQ^i2gNkQsl&&99yDAa;;Qqwo`ajSd!${P zwfU&q0QVZryDeK1H_afLxJ9$7O3$F@<0K}|{M#+oiP zq*S)kGWRAqkmh}fwjC% zCN)IQ3Ifskit}(^GvN`iz_kp$gnLh$_|mTS>)kJMn>w&6upC4`?kl$;u0eREICQ%< ztwn&JB+qK7Wm9A5r!;x;x1UzxA1X?By{O@oh{@z^i28L?{iqZ7>+L;2g6h$dJs7aS zRF*)^Q5ePs(o_bf6fjJNZ0Mv%fP^c#@*;UzVblu01F(avafF6K?M4~!0G+k-1P&Ur zcmnN5`J)Z0A`CIYgDOyu#8b(1A(%n$D-R43M&qPo0vRvkhc^)n!TYpvIKdW}SQ8Oj zVoodHV7aDBMR0DCQU@nMou4&TX$ojZToAq^{_B+$OELb^R5KIXBCZm`uVU(=XaEg!#;&9U2)|%X))L?ozC-1^Sfqs5#>bUh)a%3j zv_2!I{|IQ%4xtueFgP`omaKb1t(r+}0r6bUN8YQzt%7NyZV;e&@(;Mh}|Iq#;pN>Hb79^Z>{g za85yD*RxqzWEG>86Vj-9Q7&YPshYob9<8PeX1a1gp)1=VSG8hE3Ct{Q>7o`RYA<$a zh^Lh!Tf?q#TkIm2uLHAGlC^fX{!<IIDr=#66vT|-(q*+hP)jy>6AN~?7)(osOgm-9 zOfD|2uC6Yws^Mq^(7n56!KSjgd`30q9!oo9!lJ1$KU6DbGm;!Z2b|fuX2WJ)7@(kq z!z#D}jb=?^MTlZUVZPAP1Oc{vy);Q#t7io!V47pqW?`rjOu+eX1(@3<4XbAs?05>h zQO&g>Bw5fKqvmxE)Ue=>_zodna|>1$PKYV){v={-4&YlwuT))yV|+DU`eh83Fzs~! z+=Z-Icshj-OA{|nf52c-v8gNaFwfhbRns}EjKL=6zI*f1WItN?A~5hqCkZbxQzRC-jaN2rFPa2F%Zk4RQ#D>tZUEDf3H(1x*5)x#%c7eDKgank;v4RyHgbY_ltWhm!NVTf_5YWYgR7^H=4ymSN2kqcH9Z$H-|MxcJx z7&d(lq=QRGh1{8fdkh)&CX-ALNA1$ADmR6d;B^ff1Q~C8c@Gfp zB!ETV33d76VRxgUCKYH2O^KAsh;T|rqz)BR70PIWqg8q%O`p32CMZH>bzvi-iUVGX zDodcJkY+M<8Oo5fO5~zarG_Xav@}#QRJt673g9sk*-WMmh)Tp|?d39{ z!kiWrRMz6Sr8KJJ6*MW0D3~PjsxZe1oPb3b$bj8Mpu(UTnU}|i0D?z^C6RENj;NZT z0#zu4+(2WT{Bn>Z&rEt%RSH9pWOBBw?9TlC=f3{;^6RaifTMY@0K$R#JMfG@-1tTe zyH~#P3*Q7qpMqkqLD97v-JgbHXJMGGSN~6YXBvpr+PD2C4I1nub0zI&T-J51Ym%Wd zG-#qZGGp=gjsQkvT-Z4}LORHPD8geDZ3qBNl>DZJ-u?|naeJCFB$-uwCRzF+P> z?9J)_UuzxfJpb#r&NUp%->Bftnxlnn6RVBt*KZ1MGuZlFTv=m2BeWuxsXd>*c;Zfr zh7zTYO?TqkYkXf%kVbbgb|0qnvP2~#*+3`t)c8#uRX&wvrN8KBet!6 zMJ~2S>&(7GH@zPCMy&1IwIsGdbHi5Mi{V>F#twg!qY@q#vSUo;08O*5W`l1XyixJ| zdc~&hp6V6OT|9?Y4Cq-}xQ-cGp|#XI-RD+w*U}^F4)so1yw0k3%7`utdns?p7994O zw!Qn=VHIZGJ-u$_$&||98qz;y{<^|0-n*|?*m&RS-^E&5alGqUdGF2L&i3@qTgUXN zI8q<^fYGby+11nb){q`4@m&_mr8KYmE^l|aYpJ`!4ozn4uB1axW{z|9+j{M;zpJL< zSotj`C%&DzG#``-?z#nEV@_S*jjt_ zvQCokvN0Mmlbsj%8?Wnm;+nShnOdEGGX0F1%_E#u`0ANk;)}N`OqhG#hJQ9;=CrXV zCe1TNG0M7a6%76sGcA5G>C+L0aIdtKn6y9f8A%(>}5Z`!12`Gxo}77nz+7T@xNAJQ>_);)t%(cXrX} z*1KS`bnDd7%(PuG?Ik67dD;RG&EaF^oAVyteEj6eBaOk)nioQ45++7e-3%&yw61%P zt?OU?(x=bBeck(1-K~-Ic^_sstwnvb{hIdEi)Wop*sa#ucT?zh<=^cU?6P91J?wb;|DB`UBcOl-*dUHR^6$`0-!w9P0gRNafysE~mcsb}_8e zYG`ggRynMT{OYftHmbW=m}WLspBWG=TjLh>%x?Y`Fy4=0}iPgh|osXZXvnt1rRPS6C zwnwpV3!`HlWfXh#X}4*S>t03YjaA=Ov|zg2@tOgG^$)@hbsL@&w;*zM!l;n#Th7|G z#+z-{`=vVA#<#iQz%n*zR^+|d$MqvOzCSNDI(McdInuNCL*k)pr(UF}@5+cCxqi!p zV?OP(4bmGM9xSRjeA(~DaCB5WVDt)xS4Y( zaB9BKgz{atgO@iPx61!gmV8yyAar4E#n=2{q08^=-BBbrckP6LFA1gw=gQmcE2=J; zzx?t(*14bl+L~9hf197?`N2!u_NrrUd*t{Hwfkl%4C%l3@YK**E{t z>)^X_ZO&hc2Q4-Zjq|!`bG#{JRdbq^Bs}a+@XgDCyIq1leaN2|?3{BYxBT6ja`j7< z6YKNN`m1#7T{-yC?1KAO*RR-iLi*mZ>-NlonhWc$E!p8*T{8N~vzC=&O`gN($#b&0 z_BOEA2n)@5-FMoqg%joCK7VlhO>m{&ICg?vPrU~t#=LM<*_GT&Zli|zuJLa#4!bsH zdh3+;mZ2A~hS|K2PJVM`U`evk&^Zc5ml8V6lj~Q=H#%=RXFFu>pz-Pgr>3yTD_5D> zPvv&1bSppiviSV#*H_cS*IskX9NwoWv)n1G&*!{1^=8L6mOZ#pv8MKc(sbUM=DhWj zijw7a6&d+m*YoVzd~tXF>VxBZsmZ5R#YV;or(d5Fkb1#&RYvOX8LfvqK0M>~`mz7a zv;*b)3|mSYmcB54kRE#TwLLNJaPj=ml(Q(g>1yi7#NF{OOKq;5cfR}Ow+9crrgYq# zqcYG{g(a5D8zywGO-ZP4(To4~A*Lw1$Fl(+3g^__aQQs1aB5B3qd9f2u0MdauS_}V3=PKW zwA5@WKVmcZ(G1UnLAh&3e|3EsxLNUK>tUs)YK5FO-Reif#(y8Ottm};u6vH{vbPW8h^ zz3+Hrrr-D@hiB9NaYja((pswtXRnl>O%FdkJ~sXJoCjHp-#t9NLpuM<>iYDyfIIKK z!s=}g&k21mwK)92Jo)yw#I+hxuar`UzYp~Z|6G=6JKp4B`j_qrUmZ)}_Vw^}uypruw~W2J zXoY)yxNLxfL;Wk+nTZMqyq?%-RmQs8eAvB9{^~L3n!+UyOE)Lp>g7CUcgeO(8sUBK zGA2w9-EHL;-e~Wewe{FsAK7^m5^fD=)fcE+9<3V2KkssT%K7t!v!;5d*reuH7ntn- zkh`LGXZWoNuRhykxBuXklziYa#>PY#X zk+Si78oA*LUIa5%c!L}tM_^2aYZjQ)Y)NnkBfo~h%<$ZtF zZ|l9rvRy{U?B8xqKVq$at6q>_x8KIva&Y02y%*kk&$Y-mk1ObwS5{=xIBv#szQ1+j z=nd_=S|7EgTvE%vd2nvqnvAX6C!BPdC7N;R^v#PuzK(Qh%TFjPylMN`*1xeXN3G~% zh~xLUqig$z`c*YJ4(vnlc(7hx4mz$9k6iL z+_mkkVH>76s|))7aQ7;WS+2IWWMYx^kaO0-!8T>lOXD_t<$IpJbiB8yzd>c-s_Z=J zlth{KV)lsE8nNB3fX^o^?7KhO{Qb!Cwws@h`kMYwGO&3guFW`=bB)*QLrBSqv)4MB zH^gS?)cDxk?`Upa{IJhQ{+Mo)$8;+?sHtJGdf~|G(qYBTJtin7&fOZcGQaNJk1<8N z*1WXdlGVfLdE(=Q6oW;>`#By+RVj=X&mT4D*V(<7hHkWpzgm}cdWqHEoVa9R+^boM z1v&nflO5`R)$6}&wL`a|qY7PH)&_>RPx%&_W0{fV7yCN@W3zuiO!>ka57&meR#r6Z zx-u-LS$mszTYLQAVS`U!Dctit@Xn40wX4Ry?y)T2CfSZH`|M@XoILx(b-kr;Rz295 zT#?mOJv@7t=5g)z#@F{Jemk6~bEW6fYxA129)(sOn_~N8rqARbl?R%KdETql{@$#e zF@A5XNoe%K>!i1p;y+bsmLFN7Z&KsGetbvhkJF1U`#hgL;!S3{!D{FBS})_*MytOk zydPco?2+oz3ELK%f7&{!`}ook@#7afO!uCy>sD~l)@nhDT$6&B=XOn$P&xG4m?RT+ zqj&u0vn$$G2fd8iaKXv^SugRjkxz!LZMM#OwQ%1_sa05v_~o_cx1&C7I6wOS$Bz3( z=VNz0|NLF`{5{#{R}88f+;l=DuRixWqGQoIz_+GRqy6|J&u)y>pwy|Xalvjy-D|;* zv{pTen0Z9UOT6QOx~jCpL9XPmjqSAQucJ~LYAy2)y)`n>KUAh#w2C+6$<>e*>jPTy zCohhP(y`wbwri8#lYNi(2?K6b$40!^UnqAnZeCR1$p?)m-AnxZ#(Hzu&er^xtz;M-T*kJhe>lwXIK2fa*vob1|Y{#5=!=?e45 zHm4#D*y5V$-`;e6+1KW1&lsiF(e11B?dL3=oUR%7RON@uVUN?q$oHI9d5za&hnL2?9KL+cJu5sqSbAiF*z4t-MZwFaeQ7(MRdBp0?ael? zO9ojj!-5U8nlfS@F#7|4#H_Vm)B5=C@vtLlpGIUwrSh9vt=8J6w76b(D@vGp=vLeJ z>jehqMD-C`s{^ZB);!+->_j=Evp4OEOY<$Yp~r9Jk5(0xO%S)-i){Ehex>$+-)eua zX^L2C<9+H#6iN8m#)f<2I)d#8CKJn?2Qqj3*9hXWTFS}yjxOKc~)>R9^ zs`qX;Uiz|4xq-Jc>v^p<-)giiU4GLEt=-aoPnUFn&u04cc~y$a$zSGHzjaQ&^Xh}q z%?bMc(^gJA_x8aE^95dm5?Vj+c@aCILLo$Stkq!0>|Dt7mLi4n>4_~x9o&2&Q{%T-`)e6<3;<*RUmA1Fm zPky~)idbi%nXT{3vQekX4HCJ%*{tb2*cgZ)G6toAw@rmfU|U6T`QaHWn+ zt7DtXCDl1ziAu>W9VzUZyBim)TzI!@)3E2yJ8Y^>b6Q7Aa-1!Q|;$?D&OCj`EYNX~lMGUtj*R*c((ubZB)ifrD#>TA%ldG0%8 z+%~kv1g)xmAF#f1{DS$`^EXF!jGKNf$m89&p|g+3hQ_78t=MPZT{A^&_?u*0dv(LP zyxG?WJUmq%a;U}q^^2I=3$gXh&r`=&H0>O%b4)uRP5o?X-3{%pZkZcHdua_yzihot zl04e0^7nV&yKfFP{FZrY^S zOLIyso7jKC{_u->MNf?soU+FcHTTdPZNK25t^PMt+og+rB$e?E zbvaE_kEi@TAt`gk#uc+?%x|A74z(-4bGB`5cjpB`iNC(QC3$(Tw{>3T@cDj|dUox( zX6n`ng+=BxOf^woR%w3DT%z)L4;PamHy!2L-{XCwbFzX6y89 z)e?v=B=N5uUBrw3wdht?&n~;yZ;go#)7l_sJkD4z$ii4Z$fEnQfip(#T-a~vk=?Sk z2?>cB5obp1Yd^7~F0$9~y_T!?d+kXWpI3WiREuu#qZx^ASN4w!9;H5Gd(q!OzU@n~PQ zc4wojjzy1S+Hx1Hs@r?!V$EEwVQDj@bq-b*$9^nMSy>XktM(-?@#%rm%L4-Xytlcy zYp=_f7m>CqtL7@XmMnLg5;W?8pL4Bg-hnR(-yW=Q&-Z-(wSAFo@$y!~*#Tvbo8Nlz zO51Xi*BsAkQn?v&wKP_ucP?j`Xjf2EWzxrMZ-TA#wXa3_4AHx~vh49pu~)>(9@otl zDGxqr{?zM7TzmDAsJ)YGBWFe*Rtf(0AbG#~N;@a3tfXW1May{a4`qegU8*nI+}6zh z7I)?Bg9k_ST#xA(s;ljJvT~8A#ADTqHmlgRA@Wud^R~*8V~=^M5i?hYsOK3bmPMV3 z`JnkC%Q84Rbn8|tqf+Upyo>cWt9=Z&2Y$6|E}zkHRoD4>|4qyLmVGf>bL8fp$1jZS zoHzWC>6sU==EA!YsJ&lcu+ptC?%VwN)6?~j-#^#>;cRI`z{?$NB`?wvjaDsPGQ7EV z@3-9Ped;@kMB`od7+Q6_oBeh9$4v_lK6}@poa)qAcVlfjD-MqPka>+Ua3TrbU9Gevf2*TIGbCb>Bt_crJ^cJr4B%U*HM zWu4Y|_oI;o_Z)g_J!f}E9Vs|DJ=NmUqvbZQdwu?d(Q|74IQEp$f-~pmlv|e#J>E8L--ghlvJ=TV zUW%>h9)A6lC`@*&y-QZWo*PIR+x98*eQ|J-rk?4K3pDd5Nu^WS)gq?37>RGpedV zHLYW}*2~}{Rr^bfHrL-b*}Apxt=!#2n~E1Nf{V_jSbi!sA7PZ=YPGjW{l!Ba4~Iux zM6>7p;&a!z`0lT=3i3Th=iTi3Zi{2AtMT;R7kbLyo3=}hvA@^*vbs)&`#rg#5)sR1 zSTSGRRWzt@?lPl8+YhcU>wEA_$lA~^hs!<-oQh1D%Q}O`oM~9(nivyx-Kyr3_w(IT zK6RWsIrPSnTxI{6(O+ZVRy9@k3P1E)8_(^zt-jPWX7%(ty*>`Jn%R`=eBrL1j|^{r zarx4dpS_%-Uhen*etK5Pj(#1^47-J&TK1?eG_$5OS1!=AcdA^;N2bW@@S2S|aU&O> zTHY41>s{=RLRmeH8g|EuI~^}3tJ%g#Pb5~wE*KcJ;`zQ_B>`UIp?*v18z&f#+T*q= zC(lYa_IPV}}H-JK)CL&5iV z{Af$NTV!hZT>Q!7yGf$)>5Eg2wB;^Yarwei+qC*;{jzuM=yStVM^9kdck!ob=hkM| zwry`LfB0Z`Jzuv=mtVF{$;F0@QeEb6E{gMly`6Y7o&4>W+56k`<_0+Ua}A4T&iKKg@l+dD3I^4D;1a&hxC(5%0auBqX+%s_LYaHj4|Z(}D{x2daqo#w8Z zHP3O?a!2VhO(R47Aiba!-YdvzG=f%mc>3uD8ED!s^Kx*~BR_M^JWY+CA^yt^GzB`0 zwpf?ZG?XRow$6H%CT4#ok>3n7UH$#N^?1C%z(AcqfsU823y+aXr953ekI&a85!!yi zp8obh+Ma$R{#t^Glb@rno43E4m!}4|M0*FX0Dl8bO^rX7>;1Rodiv@7+%O$SuN6G< z2Ogs%;BgBgKkLo$^;#C-=;UiS)6LP>%g@W%|IS&BIbMNIzH_|XJh@+-y?ixhJ3G5M zI%x<*LVa!t|COcBV1{ z=jG+`f8QCM6ZQYq;ACR&?_|i=Wu)4=d~JrArz_PH2=y4Dwysc5S683cDe`CLOs{2b z&cXjTnSYD?pN;{t0W)X*pZCp*6@TrXnKSiFyc`3#(J{@$Fd)EfnVwi^#+OK?d~Lq5 ziLN#)nq;g!nK71XGpxxZvq_|F63hIQ`OjQVC~2AI>F00n>FD%7S=!%nfq0Go2mfUM zAFM;;KP&mS_VRyr*MD}`zqJSct>FKRuK(<=e`^o?TfzSsUH{(h`UCsqJf564{%ay& z_+JwVCr>ha2qe>l|J{Uz|F;Q?iJPCdhkYuQiU{G7P?30cTbXLv@c%X!HE z$O)#2lcU!%r+-bXghKw` zCssi*!WT|4^PGldSGIL^7`-w=HY{9YTf50XfEA5Ut^zCWqc=Tx8x~|GwGeRYRR{)wb9p=WFkKl++Ce%v!%8FSfj+qEP~ z(w>gKlRh7j(Oy!$BW#~}-zD31Tn*xm=8a0)@J(h#vbJk-S7*h`vGWRUzY92EQE)*q zYq0;xO?5K80`I&Jd$_%#$Bp-qi)EG`_ACmPDf`WS$)vBU2doMlr%uYBB$MW@*z{%R zOF!AxNMhKmxK^>rYC%CqYgnOn-z+U1pu|Mx{N};$_Fh%ZD|e*EvZ(X%*i8Lf2>6 zqT+ss9Qy9b_b{KA@vK<(hMm>>$iwG{T+JR(Jn%-`sFlY3-*0_eJY=<<*OM%pq>ZB_ zU79WA0xWucj~lLPH;m0{FP7V0pwM9~xNJ6FeanUdb?pNA4+V-V3Wg3T7+6-=|FE~N zUVPu{aYh3s$u*hqn#_hZnHs5YH}+A8x@#{{pHdN|zInYwvAcJ_S(#%cDx)h+V%A;q z9`*2+MoiRW?*TolJ^I}amMAGzb&J_y?#<}jIhI16BkQ-nTGl!uIz@h=y~M0S)q49o zZv)R;%8^;yl{X&p7I@x1lQJaPUhCtn!w0t9@KMUL@AmO_P-(<>Z=<2New~rI;Xvfr zpdNCS_WIl0h>u01jI%?}q1n5Gy5(1m)!)82sOP0db(^fuL0x%OBXcvHvRl++s{|pNd5zjPZ!c_7HE%T7XU8ZySWLPf z@pk%;$k>JvYi`xI^iQu+I(^GmD`S|=@79UwjY{oz$DPhv+oIGFbiL}ByaG>NB{QPR zv@vdv#nCsmO}tj6wulDPmbm1?7rkx6c~%n@;)9~|w>X$EZxvJ`t4urMHd&8hj$c#7uLA%bvW9?xH}Bc~%6AxaEV7?j&-g>p*YDYxF!CxV$j0{M!g|~K z3%G6XXQmK;e&8NXI=55Cm=WoP=}GpSl(vX=Q-u*5h!h)t0jDg*w0}I=mMsq9ygN!i z7e#j4*50;Hbh%ujThpa5{f+Hki|R>&Q@Gt7r>yd)j-S7_DrJ)0(9?1MPGRM#Ckgtp zru|2R47{0Sf0?Pt%CQS4yFnN!GHcW%DqO8ih**_euZPD<&oZtjZ3{SWAj&c;{zc8s&tucf3w^HM*_X6Qn-p56s93f6ykO;>sN$|qFQ~1| z)JfWyHfPWcN4fV~Rg)t0iq)=m;00&e@!r2i z-P$VYnq95j@74w7E!Dcc6RX*n@ZCPLvoeJ#s;?`wVz%FwD807t{V-c5rJGWfrgfyb z_gI~bIsGzfQshVY4Bq6}<7{R?%FwUX&ZXj*32ZNtt@<)_!!kWzdmSY+>=DN>6m3`l4hRd-Lrvepts!A%6PfXvEzo%?* zNYJrV8W>eYMjaUsz005PpPxF$MuE5O=YY|SPqs*XLk8P(GBSVGX!IqnXxvGaWRcn? zC40r|@Do$r_@L!RF^ZY4KCF(O;t(<}`^RtpEAx-TK+wRCW;*E60}{M4n~ zk>$kCLI|-STW&H!Pq8gRWOgOKlB|%F82@L*k;72*?JW+=borz%NbZOYe^fE|>(BBm ztmBSKT#Y{=2dPbkSxHNp$gh=TQT<1_L|@`c{83RaHeJ+`_R51wGR+GF#I3w>8+%_<)FVUF5r2f6oAZ-17d@I3GB z?>cYoyjYX&?>Dr%Zptbi_Wk#P?VFFghIKDc)Gko(RXOqUq~(_cr%#j3k6~6s41Sv8_74DIDdzY?z z=QHGNW?o94c<&Ke)hkQMZXFz0H6Sumxpcz~WS69(Io7;=aQh~PR8mOv$vWd+(_oima77VKe&^i^GRjG`XqY}3xDB|R1{aZ zLT*HBZipr}AKdWg*x90a+#YughNHi<0Aj9@roco2EEq4e&-8n^1bbS@K6QsS>^!&M%Lav0e$iXGx4;*s8 zl0%b2# zwkBna#~!|_(soj+YVnTTK|q1W)n=Bdef%DaD}Nl#j*(N0MoXO|mS#UwibtAw#gkNS zeN_vFKGKYhzU{E?)RHv)9j0$Tt#+Hb+~CTcr@JyG3*%(NrYA+7Op2PxonNk6q-3~I zt7nQlS9uZc4F?^WTYHQ9y|7#Eu~D1g?(Tr%p)c$gz7IQmVbFC)uP3*T6nDQ7FLJl% zJuw;aWQxHP(;@F8Ho2|~tx$dD@XPo@ivPsb{-#R)S+^yMZG~!S1%nK{d$<11YqjeVb*oa+Z*!IMPY8Nu<`DRK>(3%E z?Bi0Q5VP&Ew;p+!K4$ABZ&A-`^_YkVQij#tthYpX%jn$x>$G#6_1062`UqY5qAJzW zu%D-$f1Gn>Ij~t3qzET?kN$Y4U+D&)6t%C8`lIYLKW2so{UT>4Kgv<|c=d4o$SJym zcQ~jH&3GInOJ1Hma%=a2%`Uoq^Q#x@Z!QT^;Z>NXhF?3dA+TYb&Fw=${m)hR*SZyw zx;?pJtl{k&L2~ug-9xtQXkc#MacPmat?>I1?$s!~d3$om=K2dgBF-xtIgBMIn`5TD zb@{Q$woz$8m7G?F@9A5yi#9%OQR}F*{W15Kt`Sd;h19R(kh3;ZW5MLH@te4FEfYpH z`Adg8ca{`&rj2)qD+14fnXdEaY4aE5`iy)fAm=y)1wZS_b;{)ts!D^m$4zPEs_jWQ zVM4q+cc@k%c=>r|6t|Wfo?V*AVZP10`cdt&iOp5=N4stwTzWp$Zr%ssytk5@M;~t! z&iiPQesrOamwAoh3rE-64LxN>Y+LO;?wtA-ndM23uBY5cn0~10_~CKMipgK+9W1yU zSJmOEYT61Xw?V#6&i!R|xp^r|{{QDcp;SX86bghI+~5BE$A~q4{-ojguQoewV= zKpP*l2|ybQ+JvA@1lq))O#<4$@su1pDIH>*1_n~IvBzQdc8mgIhIgy4A4QCN5=sj za|ZC70X$~_&lxfYqqYU`oB=#%0ME&Tq}1}r z%m=j5bxvkJAdapNKHxc-4S`&`Kgf5@K^)y5d|jXe@SMz~=;Z;P^U2rQJMU-mTsCL} zIsnhfEDYoV9SqO`c+LkrC$lwr9f0S2KF|SpP9}d~c|Zr5M^XC<@SG2L&KH3G06gce z@lfj!03Cqmd;!n_c+Lkr=L4Se0nhn>=X}6(J_~dJp7U8St^m*ZEa(rwb3WiXAMl({ zu4z&G8StD>=Hhf5=nueiKHxcD2y_6R^TBmOKHxbY@SG2L&Idf_1DerfaiR`b3WiXx%fq|1Mr*=c+Lkr=L4Se0nhn>=iK$H&Uyua=K^v~ zkdC8u2*`CuI*#rS0pPg+%+CdY=K?T47XY4f*ZQb!0X!D~o(lla1%T%QhOTq+IuKYd zJ+1^`el7q!7XY3M0M7-0=K{cU0pPg+@LT|RE&%g$0pPg+@LT|RE&x0i0GEE(I06Z4}o(lla1%T%Qz;glMxd8B70C+9{JQo0-lQ-4qJ_0-!0GEwa{=JF0PtJ@c+TBJ>%7g}{YJWt<~a*^ zPVTmWTv`XY0|(-04zhsfEZ{i{c+LWzvw-I;;5iF;&H|pZfaffjpR<7HEZ{i{curp8 zruP-#ISY8s0-m#g=Pckk3wX`~p0j}GEZ{i{c+LWzvw-I;;5iF;&H|pZfafgWISY8s z0-m#g=Pckk3wX`~p0j}GEZ{i{c+LWzvw-I;;5qlq303ES=Pckk3wX`~p0j}GEZ{i{ zc+LWzvw-I;;5iF;PVVs2dI8V5XAP+R0(i~>p0j}GEZ{i{c+LWz6ZZysTL8~lz;hPx zoCQ2*0nb^$a~ANN1w3Z~&so587Vw+}JZAyVS-^7^@SFuaX93Syz;hPxoCQ2*0nfQ- zPN*>i=I1QnISY8s0-m#g=R&}9A>g?XTyGZwo(lobgg?X@SJ;MliH_%=R&}9A>g?X@LULZE(AR1o_(XX1@K%5 zcrFAy7XqFO0ndei=R&}9A>g?X@SMCPN$UVS7XqFO0ndei=R&}9A>cXpY#y~w0ndei z=R&}9A>g?X@LULZE(AOm0-g&2&xL^JLcnt&;JFa+TnKnB1Uwf4o(lobg@ETmz;hws zxe)MN2zV|8JQo6<3jxoCfagNMbMo{hJzfCM$rG7$9N;;5N)yC^`T#r^0-g&2&xL^J zLcnt&;JFa+Tm*P70z4N1o{Ip_$ph)sJ`e$(ivZ6>fafB>a}nUV2=H74c+Nco-uW0I z0zBuQQK#c*eux0iMS$lbz;h9ppNjy`MS$lbz;hAcxd`x_d(NEF26)cBA3(>^=OrS* za}nStPT9^knM@LU9VE&@Ck0iKHh&qaXeBEWMI;JFC!oH*gp+5pc*fam1p43JBo zCyD^iMS$nr`y^Dq0iKHh&qaXe+_UWT@<4q6o{Ip_MS$lbz;hAcxd`xF1b8k2JQo3; zivZ6>fafB>a}nUV2=H74crF4w7XhA&0MA8$=OVy!5#YH9@LU9V&b{kF^*P|V2=H74 zcrF4w7XhA&0MA8$=OVy!^70bB&4A}3z;hAcxd`xF1m@==z;hAcxd`xF1b8k2JQo3; zivZ8XfalyhHJ$rX40tXEJSQ)Ifn54{B?def1D=Zk&&7b}V!(4T;JFy^TnuCfzcrFG!7XzM)0nf#N=VHKf zG2pov@LUXdE(Sao1D=Zk&&7b}V!(4T;JFy^Tnu+#&m~}fE&)830G>+#&n1B8+#&n1B8 z62Nor{e4OY;JF0wTmpD50X!$h3-mew&n1B8+# z&n1B862Nm}lt6C_;JF0wTmpD50X!$hA7CAzKLF1;pN`IbE(JX2zW6}L(en?^IhBs1 z`$GzNE(JW70-j3&&!vFpQowU5;5jj_ptl+Dob$Y(v;m$=0nep?=Tg9PDd4#j@LURb zE(JW70-j3&&!vFpoCgV|7x0|8+=4hTt^m)afag-cb1C4t6!4t$B%-z%@LURbE(JU% z#yIry0MDg>=Tg9PDd4#j@LURbE(JW70-j3&&!vFp#Ndb426!$7JeLBVO99V`84Xwm z7*~MjQowU5;JFm=Tnczj%!}x40X&xio=d^}TnczD1w5Amo=XAGrGV#Bz;h|!xfJl6 z7&Fm&0nep?=Ta~~mja$k0nep?=Tg9PDd4#j@LURbE(JW70-j3&&!vFp#ITCqSAgeI zz;p6R6p#xz2zV|9JeLBVOTqkH3V2SwW7K(Dh@A{o=cJ9|IccMGkTyyOX`{v!X`||# zv{7|V+Nkpq(nj%vv;iG7&lz2s=cEnjpm|Q>fDW4HBo64Hc~0Vh4w~o0wg>bF&2th5 zbkIB}aX<&na}o!1&^#w`KnFcPCl+cz2hDR52XxRpCviXr&2th5bkIB}aX<&nb7GGK zbkIB}aX<&na}o!1&^#w`KnKlp5(jk9JSUb;KnKlp5(jk9JSTBL2hDR52XxRpCviXr z&2wTy1$59nCviXr&2th5bkIB}aX<&na}o!1&^#yBSwIKPa}o!1&^#w`KnKlp5(jk9 zJSTBL2hDS0_XTv&JSTBL2hDR52XxRpCviXr&2th5bkIC!bZMTGHo!re=Ohkrkmfmw z101AzPT~LuX`Yifnu7$66ypheKpWj38zZ3gYPgAcF|R(fvV$&~bq04B$Bf zc+LQxGl1s|;5h?$&H$b>faeV0IRkjk0G=~|=bS%lr}-+83;F}_oB=#%0M9vJPI?`n zKLF1ez;g!hoB=#%0M8k~a|ZC70X$~_&l$jT2JoB#JZAvU8NhP}@SFiWX8_O1r)TMX z4|vW1o-=^w4B$Bfc+LQx6EjjU`1kvMu>0e?6WNB0LG z@SG2L4*cQB@_-J&b3WiX@P{Kf2RZ=Hfj=C{1^oed&Idf_1D^B2{2chhk<$$@t^m)0 zKOD&g{Q-Cm{NYG0ecT29a3qedbKnn0PLY5Pz;oaaM{=w1II2c+Lkr=L4Pte>k!Z&>w*3z#opBc7i$w^K;-2M{>dO3h*5G!;xHYyaGH2 z{%|B0j4Qx%;15SmsX>1Lo&$e4k_+k_@ErKVkz6pY0MCIx9699&^#OPe{NYG0s1Lw% z;15S~!MFnRbMj$KnmvH$z#onQ{&1uXj4Qx%;15S~!MFnRbKnn0a%mmFAC3Y3aHNeM zSHK^R#L@i${NYF(tpoVOkvMu>0e?6$^`q-t0OsewACBbG{Q>;pNE}@sz#opp(c=pE z!;v`9AAsk;AC63GL4N?A1AjP@3v>XU1AjP@3;F}_9QebLX*XTxz#oppf&KtI2mWv* z7mO>wbKnn0azTH9`8n{1BbN_Ae}MTp@P{M0pg#c5fj=C{1^oed4*cQB}L zIq-)gxpaR3e>f6H*9Y*2BbP;i4lq9l{%|B0=m0zi{%|B0=m7I`;15SG+tT9-_`{Jn zy3T<=9Ek%u0MCIx9LWVb0MCIx9J$mCbO4?Me>jp$A9sO29EqckSHK^R!~q@PdOPrk zBbUd44lq9l{%|B0=m7I`;15S~X?_5II1)$KIq-)gm-K-Sz;o_R z@ErKVkz7z8faky;j^u)I1$Ykp;TYf#N7{f6aJ?P)!;xH|16*$h{%|B0=m7I`;15S` zTLFFmo&$e4k_+ks@ErKVkz7z8faky;j@(wG>jU`1kvO_PfIl3Gqjdm(I1)$K2k?g@ zaddwGe>ie05a<9r2mWv*m#%Z*4@ctYaRvP0NF3cCz#opp(RB{|;m9pbdRzg2I1&eR z0G!=m0zi{%|B0=m7I`;19l4Nm%+G~@=fEG1JgNb70G^Yt_)_P) z=w9(@V_`{JnS_kllBXM+n0Dm|VN9zFoa3qfA z2k?g@kI4WXV15q#;YcpsA0ohW;15S~fetV~2mWy6F&=tc0e?6WM~^Gu4@csF4lq9# z0iFYYII=wYcm@377~l^_+CYB*o&$e4k_&VIo&$e4k_-9+@ErKVk;kP#e*m5Xe>jp0 z>I2Nrfj=C{rH@y@ACAO<{s24|f%!S`hhu<09BBhO!2F!d>#5@!@P{MI1N{Mb4*cO5 z;15UIz_wbKnn0a)A!ObKnn09;E~03h*5G!;xIjAAsk;ACBaLaRqn| z{NYG0IPLG{SV15q#;m9Y3 zX$}H^I1)$KIq-)gaddwGe>f6HA9sO29C@6T9#_C0j>G{SV15q#;Ycp%55RNa4@Yu= z4lq9#gZVk|ha->b0v%v}4*cOrF3jp0#ueZ>@P{Lh z#)EMMuD1h!IFbv-6}a9G{NYG07*~Mjz#oo0x(~(`;5qPzBe`H)0iFYYIFbv-72rAW zha7oya3l`s0N2}r zKOD)W>jU`1kvMu>0e?6W2l@l>9QebL*FS&`z;oaaM{XU zOThdb_`{J`gFt_P`8n{1Be_5axZW-SJO}=8WO<-I0MCIx93uhqbKnn0;=u6=%+G;8 z9LWWB4(8{;ACA0Q1&&u>eh&QMNG=#xV16zEJO}=8WO+ab;5qPzBd>!29f0S+ACBY# z9W>90MM~#&5H~V%{>S}6SHrWv?9U&F8yPu1|D%n#WpK~Fb#CM4y8mn=ccRHX&40F$ zJIrJZ{iBV%xWs)!wNo2Go`7oOo`>rk$K4nIXB&Aso3NsD8+Y&IpKavII=N2x&o**p zoxH#L&o*+#K(7CFZsV>4|D%mO1kZhAvr`-S{BNgR^1wScm;6_pzpuTUhm&uAS$%!p zT(?zD8jMtz6U4NKlMPiHR;u4lNPO|0~Z`>yk=pd%Qv26%!(#6#>A-2 zWRlq=V-wv;EMxfJM=aGaH0&?y=Wp-pALQ%g++UVFHBUbD*I#zzNYmM7{bm1XBs-_S p?Ejz5I5B9FJj#YJJMSjSt zSedIbB4osYL68B!03ZMW00;okt=GZZ0RaGBVE_P-03d+W1+1+c46Pir6#(W)!;Vq}i~Zyl6$d--USF#K0|% z7GuoFpIz}F2tqrGA;ZLR7p=~dr20dqj>NUsgF2MT^!QBOVw^F#VJ+Ao`}t1_rifGQ z#J`9W0(>NT1c-z9nIe)i50C+MZPP%7oI|$zv>hgBHQPPf}AmEaKod8WNn55E(9^RpV1!%_7 zR)WH(_BpaA&4^JYs^V~y&!KbduWIS^q`{1gzJf4pV08iX{km=&Z&60oM_yQ!wd~2G zj(lL@ILImU$__>-wL#AB*JchM$j-%Mpgt0+@UI0T(N1w(rX2HMtsP0~aMnwkLGG)C z=ldW82`w^DXcBVxZPpQ~Grm)d;1_tq3>16>XE4>bhTpi77V8P+;w%p+QtdY4R($Dh zB+q15k2?-Fa2`1R)-5q`9@UpZEk5j^$X|4ZNY5lT>>$ryWCWRNB|fZBp7i5XfrM>z zj)rD)0rq@~wwsvbG)(vxZItzA78QNoL$8p49dI=M=!oPTPx2$pcX$2z0tS%zUvl2$ z*&!GBJLh-5QyTg^=e6w&E$nHi|Hl89>;D%g`2Te16>)1~z;p;fSD;%2D#e3uzwfLc6H`%>*`YZb&iko;B67AT0dGLUnSJRYW3EY@x?6ls%%lX zSmB{IqjwsLl9Gcb z&W4AiNZ_A=rk1H35}fINV={!1SgrMdS^6bu^5UF3?Wi>8In5!Gb<|N5YF&)3Iw;tR z9DO3s;@~Mou*%>naCo1u8<8z+cgv6t|KH}!V1=Rk<9EIY!T|svf48u)aiFoWGqnG{ zRO&jIT3h|SP@X4mnXl0z3~hnG;YDbRn2jZQ2NDvg9?>1En5^>fHYWBfBt;Yym=1kD zWf~IM%7X=VzK)P=Y$(r)0BuQD!86%EDwK&`mxfn)Eai3}7!I(=Kkabi6;|al?hxtv z&6U3KC%SXJy_?QPV2Pp}q?!D7X3^w zj;T&Xd-ghCDA=uesP4RKA8dTnHGjH523K=MD=w43&wm5-i)W;ckeO^E z1zIvOGJKBo!Lu8B3}Nu1M=;Sr(n(@#&R^>UP1qP|c9JT2R?S)bgm#vjbW{YMqCV3L zVvS**S3+|CVm^=vVy#EtpbD7oY2Wq%3QNE=%@94RFf}xHKZ#~PzBS_UvK7bA9NN4W zSpCu^w>_NAQovUML2uYKug>$@W^1q-q7 zI~Tsu^{WU!*@gCmH5tBmhhVk`_u+$=3|-llZ81)KerLn~d2KL+`)BN*7_fMjDp4sgU#vOh|E<2a#N<^&I7X-BB032h#fQX86MVY*@s&Sp+ zb#js5BO=?v0I>o%)GuM(!M&MlTW1-Z0r?QR{IJdgpacXOp(rS1e=n2Pndfe+OgK3Jd3{CWG~{blVPH8qzL z-M^vhcB#b}q>7rIomtqK%VIY-NBYQrvxYAwY(?Bf>}>qe?}=O)<;S3h{%*1Vs{5;( zNA=*-g!bBf({R2W&sDbwOSGeBLU3v>7q3{z{D!=8AT&A5XnsRp^HnRN!WrrQz!=k8 zlv5fv5FRi7nfQ}UmgnpK+TQQIE2SC?Y9lr!X;Af3z$^6?le@&fe)Ph5AlLnq@wf1; zd;~0)CS(6T&sewdZN^hVZWi|nR`vz#7yHrb=C#@6g7Ep5 z9jj=4v=$@(?RH-5u+%6n^M3SYUom4mxbuexg4Nd*EF(IoA1B^AfbJYA=6E$177LlR zM1Iee_8VVL*YDMvHa@@p&b&%3=@@7wX{bvP9$`;UErh7pZV+3wE-c4YYN>3=hJx-$ zJUgZFr04N0U}+{quSc+5!w?LDQ(f`bSh$U@V(E0~>|)FWW%z!Ev8Fq)!{*BLk!s~D z6Iz6NjZKRV4|~^OH#_7gYup#zgmH6xbRJdhYLeg55*}Gxj(MN;9al;oP=1niIar`! zyBjqu5g;M=b%fq1rcS@C6}0spMAmn}oOk+#w3feCB-f{QlY0_vjsh#ayMNd`rKh-C z&4Hc1i8A*})Sk-D{CId|nGi_4|s!4_SF3J^hm5?Re$}F8|e?ENIF&Eag5!I&@Ue>UJN;dW#~c|7*zgA&4p% ztDi0v^CtCF@uqa8_Sh{dWx-YvKX5HYZCN2kkDYh{g2|UOWu@PNF=u(bpe3a-FW1A3 z1de87e1V~{iFa)OOd%k1gB;`s-fkwmhc9h*z6cSeRUrOE^a9E)s!$2fefQVLs)e&n zG6$6x(I$L*ml4%a(Grz~Gc}}BRTU`rC&7u$DYGL#H<|bTQ2*5#;EQK3-m$D6O)bd! zp?!S*ehOiRc)PyLo*d_rRWrJgQh6j*xzJj2e-2Ji^;!`jB--7b7IFe1L07df;)|fD zn@;?_j9xJaj1cr5Tvp<*k{KPbdyar!6KYLvbV+y65kpAKFn1~upO7N!0X4;aEu35w zNs_y33}{DPplro({$yy`M{IZCW|HkjivAhT+jZVrEa6gCT?Bs&jd$y^uYMRz{Y}TIMm4oF|i9ot`9;(v!j{C&f1?+v4Yi7gN*~ML+ zKXimj1QwE|ZiINqva9>j8u4@&nNu1cy^JhzPtlW?5q=Jtt~{UC4N!9kl0p~FncYzv zHW_NK7HH;3A#z>9IbPej#+;PO;``&ovtqI?x!sI8a2ua8MJslJ)>|PXRl#>5gJ;U| zY1L)4bW&_P+HvKqZ6%awV`4=dD+=FVs+feL*1@9!(Yt^BaxKB&rkBFns_7_|vK zR;_dPvNZHY9PFHI%~6U03G9S6u3G7-v<7)ki7f&nqcLQIsB@^H(vr2h^Zm?JkX`MP z^PJ{`-Rf>fyp~gNR(fuF?vdeDOEO#8@d{^T_M0C>fv@8xluOi$C*2#(jgjTi_)ova z12?VLIv6DTA{#>>Nc7@}6ii6;TscvdrFcalU>nrV(s~m}@nyMNP44M7jg2Yg zFOw~4s^hsR-f@R!Rjp}QO}e_^*z-PY)<-#;G5$6fP4r{@DxC#Xe>g+P_A~W`L*`vsv(^a zf!y}Fn!R57@g&(`lvZ&&zW2fD6si3e0)0 zw~sJW%n)5=s~w)deGv#!j}kJCI0B1xo_h=g_PN<|v>-P7N0V5I2(QSDXJVH(!>#Kk z$rPq6HsITU13CY1p{Ek!2MR(9urCMg&k))Y?QOUkgc<~plX2+T6Rtp(*3t$Fc;j(% z!-`4Cq?XSOT7e6}2w2B_+Iu(E1boh@&5`t!+H@(#&K`$R?DLSXp(1(1xwINCS*!g^ zNGtt_L{qcc1I7ss^+&n2VAN$kKVOSq0ZCp5gD<6jXSYKU^nmC;{%`B1 zRqaF4+5OpdxeW($lNsF?%)Z>mF?TsLiBxhEjlh@3JXvfP`X453bcTvej3=}=f3rs! z7Dd}#r~^k6bwkavPuf=i8dVBj4D7Yr_2YuQF@7?K^bs+$+Hs7Gd&2*9Cf^0Z4zx2; zu$_wqA}e;=Kjb~Q=UOpht;+V+p%EttPOL(OE z6qRTE@?+flTCg2S-Aj6D<-Q5Pow*ld6xDh?SIl%(Z!c`?CBqCy1Yb z3mOQ9z%u!@#0LM*B?VS-G}$Vfs1H8yY8ylg4lp`L=1?5cjDXP+;!bZ|WGNX|-6I0h z417+$9gsjI7ck^W{vFCb9=kw$>@(YID3apw{-|6}d=42_GQ)(m92>tskfw^fJ0Lrd zP$Z-oJq#imslJmS_OqIx_Il?CM$7C5QK_if(N2eZ7Nl!)YKs)JJmo43Oi29~V&9x_95qFe#;lzAcfe6BE*(g>**NX}w z*E~_|SW}IYQOGudI#T*zO*y1RoFw}orO1%lJ8aDpU3=C4o?b~a!-s??Ol4#ME z70*~g&JcZj5~u|Hw{GPk?Ww(Sa(AW5R0mcF$0h+WAHcdF!LSgxj(q z3@jjo3y;zMKnsKF z#00nOJ?$pxFPp*Ut%BV*JndSolH_RDg_=}gpOil0?v+5I^zNE{ibGdU!hPRT)mFv_ ztKy<2TN5AWe>o1dqw#Y-)tL$z{Yj@Mz?~{W`6&-d@4qH=)yznc7NyHpHyuDvpckK( z7@@;V05?SyvI}rlv7*BV0{v43!C5Wkg^5w{ZaC8F4ZB;yO9SUCz}iI)P31feJRdJyGP$J=a{qhSvuo8c)eNV~jTH`~2 zrt*DS96FC&#Kwj?z!UK*kIB1LB0Z?cP8NKT+=uudMa`=mFw^&wWKY*a&IZI=Xibs8 z#z5b?AdOGQ6?;+}i`NtNI?Xt{RI#>w&FcrCK_@}Jc}QodqQg01^<2xPDyE9;1k#sV zWFQ&e(U%kdidbnICaNv%bBG%Wr6kSJi|vX6 zw}CilOG08wC4zpFlEtXqZ(8rrA(H2~gU{0`q;IDtJ7@{cps@XN*aw|^m|VQP%X)p) zZP&VVHG1<`lQ?y@kGrh+-OAwjGA*K<*ncj>KObC%Y!ncnQij~UzObonpYbRPdU-M~ zK2-a(gwMakVwp$rWQEgWjInIw+J^L}R>(mRALNp68~1+;fBU>b89f`0l;+uI#Vzxrsz4o>CVLNq`!ixHz!SblS7boU-JYajSN+_u@vLw9c~ ze{|ulzKk@~AvdHgkVHsBU;Wm=ACI4x9>J%k=RAT;EAQ#n+TQfTRFIQ?Q@=sEVjF?o{f!a)^(GqdybE)cca|qUYu_gboL?o>`GN<7kPtqzs|_T zZHH@Lo#2lbQV&t|k`0GO#w^aqrj!j#L| z2I+>=S&|^hTdK9@TG-B)o~NI#vOX%Lvc0+cXXy3ZiUho&SR<)m~4NoX;o zLULH!T9q&k=$Uq@mN51UNDl~1L4r#CM41YnZE?N`Fa6ptN)5nsyzLdQ>ki}b=UrvU ze=qS3lD%E)xVv@h_1#?(CK_aYCXgViq#J9(B$eNi;Ky0z9hsMIj^_z{`h}7jq4;%+ ze-P?o$oM;NhDLVQdhBR4qET0x^CNF&EN)kvMf|={cJ^-7(vdO;->F`DM{=EFvx~5u zM!LI2G$Y=FIOU-D*ywl=iIFF&d2#79#2kZ`ONB^BZ7}YvpsmiIhPa<-oqF1YuV7D4 zWru<9!b6$wLV}cW-3GZMWtl!im99^KuY8HpWFh=dmU?{2WAz81)g$=+n$({Enl0tB z@DR8NaKoX(ct=}@CfQJ8hZF@Ywf@Ws^(7_cO2sp*B4&U7HI^EX4))mM0(!GERvNKx zP(JiEogY;g3igCE*5S}LGe|Q*3v90?gq45|mfspeM66B(h(2Kp?BDL-Tl)Qh1ngf2 z{(pN&?kxf>)3=9QWUAGvB3>=q%Tg(vK>KJpB#x*^s!BS`i;emk-C5h3c>P+jpF}QV>Hpn-_ z3Ze41!-5rYPcx3vUtAS9_OfS@OU^e*h=o{|>xr$Uqgnis6n6Lo6J~21AN4Z95aP3_ zq4DtqJcn+trNMV!ngUs1t&WJrTCscB7(H|>)ij;Za6n8e8|8i`$QZGr(!EN}{j2-pRN845jmEcHYcc0Gn*y%$_VdIzI1 zoPc)YC0E~GHE!xy_><`i_t$#^#=8dW6g-X&{^^@8{b2u_cmk*A5QcbldzbZ9oOtM* zq6h^+`dZ03Qf5GXYOTDa;f$0>uM=yUG};#1KzYo-88}o2yW?gE zq-;YeF(~OBtYQ2!R;LW#>4JbJB1zI_tvKcxQa1#0(@pIgeYi() z#>$sDhaSdF>AV)D)un+Yw-c@Gj%#YHEJo?iiTKvN(_FucCWyLcXR_gn`Njg!J1(&X zrpm~@fWVnf-W2XM8!C(BrkK?t(=l`ajAjxz%5s3b_2prUp`j+3*n%IWXd zY;F?8FW#W>ACKqzl)5BBpIG(r7dF1W7LXil3M7Z%PC!nG=l47t{mJP!%}0e*sIJum zQfEX(I&-dM$dPzLYEZj6!Vu13lh&z;Kv)UvQIq5n642=#yU_hfmSjD#E_!VHl67(> zh#Bc@esor;ZX7Zn3?dyzy%m{kyIk-_7vn#!Txqe4qiRa16cQU*93zDk@b~EdRCKyY z^mF<$C9M};(pl{mTI$XZ)`@m9S-c;N?I%mU7|H>dQys>8v`3XP5@UGLBACnBvW5l5 zi1xJ)2GC}07$mK4B$@f{Kh>HsxTY!;P;WStRLk=m_-U4DothlCew5c#C;dT-q=o7X zI$s(-W0BboeEprDa|T|nQOZ~*-CE={tp8py^n4IA^ehxzYd@@i-mxS|N7K8ZC*&qn zLt&|Si7k$@jr(nrZf_G4GUzeNd~Et0bsFX$70f10X-SK~8vo*GV~nXbpjpRZ)b}y^ z$Dy)xa0^jUWq{GYJHSkz!^){%{(wp5%K=4z_=igeLI`XR@4tpky4*m6Sr`?mf$f1* z;QsMB37arRI}QQ^{11*zrdHVh+PB|GG^7y_2IQ~${josQGbBDyz=eOATjcEf=>BV{ zo!uD#2=E{9SU}3_C=jwNK0$x!lxbv9$)%Ay<_&YY8l+A*-nNT42tksDTF;V#~EEAuqAcA9YR_lwl7~0CC%mlC}wU;!@hTp;;>usFESaVc< z*NtV%b*@$3qszmZ_?7p+9wpU@-h0F zl$D#jz9uro`q)}{m@gt-ccqCz=xVG1+7R$3u9uc$D8sm&#t4=yagb=tu;V$W@_|6v#1loDUE0k$urSddJp+JPgk zyU)=0%FTXrpANRjxqVl*~zz+HVmj24|K;K@a*8uiny|0t6Ho)Ct zPBw>#fPsP{CC$mfPVGGbw1NLeHEK(<8UjcFH^4C9Uzq=e&xdpy@!M=zi^3YeZTuu? z>&Y3h(}2qz!_~z7BKREEHq|y&%I2o4(_wOEu?!bT49vIljbAHbZWzFf|Kiw;xI^W| z|Ht-^H;cp6u|kJ@9^j}6<;p-pARyr5^PXyW z&iz+k^`Et0d1Ce<_{YuUza9V`FD)MNQw>+-ctc##|0F!|K(Y*w4zp!|`?EokN z{EZ_*#sSU*>|dA3=kDM=2>(I*hTF%FHjVHXHr-hGgP2G})m-}0B^8Cgw3l*Ve|h1S zBj5A&4slhIr*l}{RW>LqGy*(lfgEzF`Tc<&)L!1WyEAFk&1xumGhu{7*`fPPTSZcp zbX0bVn5^9!saC#Al6$t$>41`0F~_>jyi(#^sv-R@Y4d!G9XlNzaZo-*vHc(!)hrnb ztf=4TvCuC27huEo$jOfy#IGU(cl ze(~G|1novAD9PoCz*)pk7g}Xu4!)C_YNrPgkv1Bt5?j)pVpK+XZdf+9r`VsB&0T0H zZaydGrj5V33B^X}?>zFgOrtnB!RH z*4A2RNDn`^BHblM+j4uDg2eomQRJHOPY$_+61rfyDMKPVnf<;m92$e$3vX8fXC&dk zC9j=g`s0Qu_5Hx6XK9Vp+s@(e}*(zzc_9xv?ZMr9v^{&>AdQ!{v!3i2msQvY@ z;E{LO{QGzxf@fvWP!m9dInS3Yym>8uCDV#rI0|^Lf5I&6ey|=UgQJSshNt!QKxgL* z6<#H_dr|e{y0k9Iiy}_6L)sYsn>{n4t ze!Q|n2j9c0+`81~DlVy>hK=Uu*+nOQEseC9n$?xH7X^Iv0PQFZ46tto<0}qBj{+@X*0FWlBW{JGT$|%XmgS-ijbd9eWZ!fB*H@o=#<>yUn$QE zQ*M3c&iml}5^}bAEDrJW^44B@H$PxS8mWsrw5h)JxdtxGFX z{K@(TS2s4~S!O_2tFqm*7F)c@mpIKV<-;LAX@|NRF#xZ^e~x;0v#f)$i)dq*rAhX8 zGZl*#D(Uk@sxzUMhn^GLnQG>jbvFi?B*7(sIp^raAT%tx7M*GnrAyIa|W=E&Jwj5ix&ellT+XhlF4(Uh>8Rp z3Lg?d) zIBoXO`&)Z`xUgw#axNXSc>ezb!+IDz)xJ9K2A-W+_*``MsQ!BgTL}Y~Ldo|~S|;=~ z$8GrDG+4r(^Eho@$HC5L*F6pP?((=bN!I{dao`q1w#e`h$b|~&NM)QocZ%HuErt}- zs^O6Py_!6DersRu|M?Tn=u>05?CSs0h(ggzoW(=GbysZhO5^tFBd`@GkZg;zow@H(XE~|3^B^-G#S5kkXP8d zY=$6w7Iyxcv-L;MRF*6wZ)eF>CTHCDM*PQk;<9?hZv6P|t|Ugp zd4J*05*otKy-o2f9stdbQNph=&BtPBLN^;ZX`s}lGuN>bGaH-dYujB3$I!^3UNSkS zTw4RNhbnpy)l9OVK|I7a3NhE`_4|1mUZ}Nxr=-yh9R>xG6=PE6_M*1oj^|cmMV9Mr zhsw$u=70Lj)P>XrQ!=CTyfDIXsxfsjM5472MZ8iZ!BoxMk&ICpERwK76r01QewN5S zY2u8-Rp`m;tk=*k?o4E)CCr4W!pURVldwoa+ikaIny$-X>~W!dRvwknuYS+$phE-3 zFiQ-r9?|?E$qv;o1M8JE@>FX7G8|*~5Q~)XE~E0Rpk;cE3Ag& zze2TkuHlq{vem3{xpfgRsD(!hYn{*l! zI`Eok4$qycQ?krRi!3LoU&1F9>hkOG!&&jeK}(*h{k}~U1eM{CfcOg{q-|*)U<`g4 zKxdxTtmX+@qOEgN-X;D@^<=cqOd@i8I8Wcqs;>XxDhNZ4>_LVFN}xJf2(oVBye$2e z*oe7Cg0WKv9-0Nc$zVSwf@G&ivg`**g1iMHj?FFqW-{ZI>532zZ<7vT`ApCP zocL-0R9^pn0X=9AtX(sC z)n&pBZNyZF95Hp*i;^n-{FEn9JIwccd*9rnbEn?r<9KNGR7IgNSf|a3(b!_%mN zr0ZC|93y#=e3e#P*fVYniZI|}YHEcRY6jew$kphoHS~(PQ4(~AX2BU1p+*toct-sCwydEK>;zAiDAkTM$p zFn};yQjTwAqcKm>?kv2jAgfpasNWVBeH5287lRGCI)Kjz@Y%n;awzlh5ND;kLZTz| z`at-A`aE31hP-XXl{k-Qy_WL8##|}fz1N76e6XC9;n%(~HCcD>6jaqGgN~2njQN{@ z%sivEc{t$Z$Fj6N#5o)fsr>70N!T%i!`@7H$ZE|CFn$m zix(v&4zLWMENU$Q`$_tPu>)Rd%39f;;x_AurR{?wC5|r~dbrXDNFe4Uq8o*$22?>7xakgqeorQu~@`?RRr)4vBM-8Qx;|~n3P7By? z@Nd=9Q1CUH{uh{kf4(|E=`y?TVR!jEZEl(k-YI zvoBxf@G(ZD=_Cj*bCL{YjF_hDu`x;DewCSWEpgcf>g&(p^S54wibF7CP-GAb$;UzQ zxXnO}_=gMcKF~j$Zh+HKust3n56|fmYByYFUn+9`>X_00*-HE|)R_)JFY$LG+`uVe zwm^IU3c-2gXbICaf#x-yKjqNYRM~w*t35+#AKI-pxT; zt*e~KoZ6FpQ=xc|9h6Y9jBxAVsaQq}5yfMS{i!?qn7ioE$eh1zN`MUiDMnBzEtUKV z-LYJTm8xYlWjibT^&Jo8=id|uS3D$kGw7!dHw$kRy--`KqwIPeMt-NOB;yc7ky_P8 zZCI`}M5-^npO(2xqUVE%_af^%afiJY>L6|@BT3la9~0T`L-Qkn7)gv01MWxItTDT8 zx>My0;wU2WjV!*@tNehAGW=fPBF1V%9_1p$tYfBsxva)))3?*=g4e77)j_6O^8ZyN z{-hX?pE5aV&^xmDL1x$W5?wMEfyuXup8NNt-m=SI0$*=i6VwF!QjO}13nyTpYzja8t@!$9F#a{lZb>w8=`2BF`wcZ+ zCPdvdXH`ttd}RUo`UoopM6V{nngus9Bi6Y9b{?GwJK(YR82+TASSx3KmaBx7&3Wt= zc6Ul_!S^c{7iIXLl#&^r!xn=PgDfAju9Q?6n({GriN}N5(8fmN=KdN0$fcEk^Kw`X>-XfmcuSV&esr@P%M5 zo9TSOHesqb3e@}n!2)dP4FJgDBZcfix1o}N=s^d80%-jM1ziND(WTzv;r=~5mK9$4 zJp(JFLuqHfa|Cgz8_TF9Go*Gz;&<-SSRTmAs7aaCXnKn1IP=WC-?{}l(jOQzGk8)K zOMZ$MXVT*3aju+!H;tG?s)=^BN7>>d^_YArh(d}G`8k)=$GYM_V6=`44(z>*ckB3A zNiV>~b=`$=rMH49o4n(-VusBzpeWcPyEha&?XAj2@C574D_$>ou0G91(Kr%8rR&oHVQI;tgrsVb#@v;lIDWfS9 z5Q7@=7{vU|(`-OzSwJ#y2Ny_9)fpw0R?biJ6V=+%O=m}0#Sb%A#(nXJqT7Mar}&Yb zSyzfk^PN*8+%J!oQdaLMBd*N#1<%fVpnkt*j=6{5S+8Yi5LbCq@B=u>zH&|1N`2G!ai?XXka>FQp~g>g=D>pRG>ge&2z>^+VMX zjEnBR7v8UTaNc0zJnW&aLOrTxMv$pqsFh?Q-VNcZ93w04m`>=|)R@6Q3~#am z(lqiTiq!~|z9({EFT-firc0wL?>guDDqki zQ}6eBH|tF!I{sQh>5D9p?-I|qX19a8QBB83LorIc<0Q?(9H>NXB_BT9zOPr&l=XKv zlJ>eRw!%Ax`XCx_qfki`!~*J>s-5N#ryt&#Qdq({hs^lj=GVO*NT1QRYuai)=$u1~RexS{BnI))!RNASt%#NB zF8)&iI8D;=w0%!%)0XHH1w~DMYc_U+A%*&fo5TtxoH)&AfnU!TMuNVN%lN|J-IZaqR((y&q_u6w-oYLfdK|w!xSf@eNRya7%@m|6Ual2LE;0gc1N;L{ z+SZf6_K+KSsk5{k@AFpLd@!*yRLe7;VgBRJx%dkzjR^u5ffL`w47K!n{%+SInV^Fl z%f}2SbN?D5x5KC4ube+hgt>gkg7ASk2NQBT=%R-pU}kBWkk&?Vty!K)a;^3W|?{%b)oS`{M<*DmNA2o7Rx zmbEXu#QISt<_VsQeN*o9-4nBC`=g5P)vpR_OeOA3GgWqHZpk%OJU>c zU9bMzC1ZOoWt5^s000TkK>zJh82W#g!kAj`4JJK40Uq)oWWir}?F2T~s4SoxZx?dJ zqlsD-Z9Hwl^V#r)8-_y)krN&qz!`bH&GVZjQ>rFV$`u3m*Nqn@)i0=K-Ccd%tcPcc zq#nqoX<+9vJPpT+lECKg@9Z8|Z*^td{vx6ABZS#yj71Znq33yxzWRL7{_OlBixv>s z$lTBN3(vQbz30iyKFX{*2YpdpO%8os^Gtj2|m*%+5jUGq+>>J&iLB{z$%=M*+`Ydg@74 z`b)vyK8q%5ez*JhEBowf-+ss8JUMfwIrCEcP-E@HE6e`D^}+A1Hu+R_rFTMVXlTKs ztH_@)A@@$vY5tI>fA(g^Cd!_K)$Mh?lz^Y~htW8U{Vu(=U6O{&`q6R}u6x#%{oS7( zY(MD_N}8@@)2SIe>L_|HL9VV}A4A(WLmkt2xPR=R_@3Wl?YF{FY`MW|{W+79;VG-R z@VMl*Qi^Sny+U`F-}A^sw1!giFBPXw9gu7B?}}We-^gGup295F?V#tE+>FPUI$mEr z&P5-aZ<6~**smtq`A9F8Hp!=WnrT?8n*={7we(E2-xW8@ag@)ZF6vo4DqVBBUbQ;i zp3iRq@3JlN%Gt!8AS1gD` z)J1|`D=~(E)Mc)`&U@Ob1gIzRBXia}!N%97w3CcjC8m+BG~+gg z%SUA^Q3V#oz+O~wB<L2$lzVS3AzV8z1V2JUa{DkK-M+Qsb;9V9UCcPnJ2N zd1uJkIi-97+};rR@AIM$8}_yQb+Ch1l5~{YOHT4W#}Qy#f6nT3mfVoU78V)c9jgwY zjxQN%i0Ju&JzYZBi5rA5mmojKaVkc=K*$e>@*L|(G}@#97G+pT_=c(akt z3yd1CaU3G`>~1JoaJ8>Rjn{=qHvFQn4NLCofM++`sGbwdd^E4S7-*J>R{ zIVEu_6~F&l(9X}B`#`+UQyGIX)NyL#OB&N_cuK{wywP;w5fV0sa~A1)f9RVF-kM~_ zI*n3R8cFhf7ZDNN!%*6oV&eW}rIVl_P$Jwl)jc@&UUIvXqCegk85h0Er1TukEG{`_ zr$2VcqqI+*k}1r^T?EiuyE2Kt&|XjxvV<&^QPzT|bi{&ZV;w z5+iQuXflOx4dS5V#oQudNR?IE(y;8;1v7nn#j5rk3U9(xo~Ij`4clZG&bteD_XRn#FujRjS=f( zluy{s6f02Ps1*dSWc~k*Y*8W#H$LzZ6=ca zXa_&5!;LDvM93VvIqcxyERh_R0|%n{>^f2BrrmGnH_&{2O`KU>Ir zpB??Bm9!{1D6$xy{2YISxVOv};sWVN#|M&V=!F$5PAf1j;9=WQ029mG0`iCG3X>YJ z*hBrHSTq)_n7UtbGajT@Xi4kT@~!q^yX1)%o7(()K8_pw$Md?Yl+{>WgZYQ;YtMK& z@SSBW=!`(K4K)K3(-FHQ;tJX9I2|~Tz4(hyDrRFXI1QnSXwM*6gR==VU!a#QXbivZ;(ilE<+d&|O z&YudIX14Ba`@pmbO*D@vB@?OMVy9Gv`KA_RIf-QcwEk;$F#^cEk_xa#&*<7zyX_mb z-)M@;4f&{c>^QB}e1mpOvG;UPhL<3xgXetgt&|SJA4X(H2OM_5RoA=g@NrSSuxqq| zt7L7yn3~!~d<2<{vS_4;6UZa%-h!ejXTHgIVpm?u>G2$AN(PqgS;Z34Q_>m#gS>YP zvaMOxg}YbVw(ZrnZELk{+qP}n-D|aN+qP}<_WSO$&PK3 ze{eQJ2N{1+vn6}TU9fsfT9X#fDUL93`>gr|?Ll^#cJ@y@KCr&*bc$w{=cr#<-d~n zafPjb60xLl zu#A&?h^9e9&6=y5nTNB2vu_*zg8vn}X!v0xp6iDWR2>8YNZ)vI3-^1>*JmdXL~_oO zKy~51i%bX_ljVS#sIx#O(<If@Ic*h`;S~X`5t?)C*V+)T$JnedZkYrGJ|7gB}*9P4@l}p zVd-UfvAi}K^*TV=m=9W$sWxSi@|X4j$&KKcSX32I zSR9rl*{jhA-o0;i_)`TT#<_^NI>rpmp#hs(%`BmJ9yJa`sLzUXDi7zPXn0sCh@OJ+5^h+<86MceoLlvIIb(e&heE2aK* zXC)_#pE32(YWenRxIWw#>(6g88T)(@N3y;5T}k*Od**u;RoB9M=*qEORdb2tsw{E} z1v0x!&@5g<`X$#2z=kC0h3mBSi-6J{!AQA7_i9x|1iXWv{6-}%gygp!h}}*0u=G3j zg4S|`m7QkKtU>va-M+~zc51*TbZV1m6 z`<~4g&mR{o9+VrF+*d;c@VYy>hDgeGb{rvhcjvFs-a#Wp;+rl zJzUf#`=WuVWNf;$CRY7PhIQg+G>5Q?^-kpayzMamdu*guJv!71_$)=}5hDDHO{3)U z2mT4NJjcqr2|eU3BIu97MyyTRmEFcGzbFOBZny*4 z3o2uDS+yXY7Z%8lfYN2&4?qhJUrx4CEBOQ@=OjVm+yPjR#5RJC`-6~F#Cl&j9PFP|nVd(hI*TN|ea49v=i~GroRWX;Ev#xdU;>i8{Sv-(#-v{kkxtbq(gu z+Ei>HkJ_9woz(*OD1Tx~KLuZ_Yax)rF$Zfw&^IRW>9Eg(o`eQ`&h^H^TW@ZlPS!rPMh zassYH(Abex0@_bE3$Lg7gBFH9HUmBW{$pho@m!@90X`YZ?IKd2meL78>qk}id_}N1^WD6@>8xpySB*z zV9+07KqMUq-ahSjYPkV5ABaG<5#YI5%L4~gYF?CUH(Ow@{WAY^+iugFHnt}A$u5k) z=GjC>J7|nfK-^UMQnD;?@sHwkqw=Q9V@uetbWQfaxw2F3VdBKewKHvG4>?&p0O_3w zNnmCaW&`L^`X%9kbpEij3Lnl2cZxW+M!7oy9r0POgegn1ms)<;E}4$e_OQzXU{>Qe z0{n?sVbUH`7F~pWw;yNs+Ay3%%~vUqAioT0)%Ib38S+Go^9jbb<`MvtWKzscrn*5B zoywXa2mw2LX|B5QAnh_A7n2IKS|VCw#9vwE&4|@bzkHb`?o%aGI%X%V+3(dn45fA8 zp^~kms_EjlA?uN%q6J4%qKsykJo=pH?GfwZ*L|vl1%{)86KZBqr=zny2B!Mo+c;nnC+k_x`8-2cjC1BQ?yxdx*F8&bTe|ggM#eM#fMfH!B2uNKg}(--cr$nY<)Z0 zK9aV3aGrrE?Ki|kq7a;#E)?5|l~>g;Bid2?G)AK3j$E8I2MZnYlyC8}xMwwp_5}Pi zzGe;R!g!yMD}nelye13a#R%8aP<0HP)MT>_5X8iul|$Cd;l(n9bgl0xfEk$ChTKf1 zLXRXamD`Hv_4?JI{W>RVbXGOB<%~ffAb0jy-IH@kkb5Smp#PKgxzy6yuTYAckN5y>-tHJV{^+>*oOej=GKg;|QKD4Hm1g8C1ssuSBS3|M{xg31tNkY@1r_5&m zg8W^O4vA-W`h&v*9jU+F{F8>SMQ0XDA50qObw^Fs7|vcosj~;LHJ}IAhGi6o_QFy! z?L%_`sYHH^V=dTFHpM9#(lbMBdB!*EREO28^%;KbZMQEhffNlddnP0%f~+8@OL>2| z1Uh577qf#~B7mRuKrTi;K$&t%{*}Syl96O;Y5i1qaQjt=k(;kX>3IaX~GGUMMiEbx0By1;MF?yb<&Zx{PFl|LfJU#)x*I+pXh`8EPM4`W!gUXfN#t% z*&;PLB3{V2ZP8v&4%lQ;RSozwg(*y@vQ@Eby-0XTes8jgXp3JgAQqGWMxa$sfCC5l zin=g>Z85LTZ(MfVgr?>w?D*l8XD)5=xW0Pv*q>qNG&Dt|kHBBgFgGOk>JICbr@{vL z<3!OcD=LRVZT2}W{XVpb$vfNfiP|wND-ZCnwKJ+&rrv|4g`yG6Z>S;@`uOqv=SH>@ z;GQc#IgZnpHMe>yUuOh3e?wl7ywH01J%!Ut;&Z|H13D*qJzb>|9T^P;_{R6bP=JSI zPL$hp0sn44!47;bD82$lnM*Y-WDhX$za2%sFN$Dv&TJQOUg2!L?c)*E4*-LqqTEZ( zDTjF7;CG&&V03K0J%G@#y0!oTK`c$K*&x?bArv z7DRGT630v{@ay@$1coM+43jzwd{fyoYKfvFH(4+5H%(Sxd$eP=Z89;d1)AH|a%>yh z@*IBB>eY6xSD84_3)~lFmu8WMFFNovNckjES;SVUfohB|X3JO?793&%7#uoDJ-M6J zI5q`5W)2QI+OnPmnTeBRJCKLcqF-TG5Gtgd@Hz6ql!3{@gORuexZA_|^fGb3GX>#4 zz4zCjcDHxMRzhHJHSb36b{Z51rzZ8dlTD76iH_p!_BZeoA1%C4v%WfCAh@UuKhHV* z$Z$K{So}<)Q8;Qjv+z$G=Pb`{8)hLa(^&c<6Ix>F9hS|weHvMV=w#u*^l;lU&2iC^ zVOSqch|uhIpezl;W4L-f9UjxKrXL}0>DcOf@?c=CY)u0Mv9MQT<$*DT|Jl$@!dnLF z8eD+)a*o8@iJ)f>=4?m7bFJmz9T^ns=qmiMxF*A=OkDqyTC^<-;Pc}dNqt-Pcl3#u zQXR}KTIHY6ZMfw-B&CKFE1}XYc0uf4Q<1^0M9$9M7L2qP%^n9mHwuED1e|BQbH>{F z-I6f({nRpvx54D{UHt*;H>3pli&OW9scsxk;%_K)cMxeJAh$u1qwD11>`h4?5sC%w z{4)zg8p&rJk&z;%97w7IBN^6AW@52d^*Jvxk`5Wkw%zcQXLsK&UFUWLj>U{$00z{7 zwlq-ieiUhs9rAv$s`>?Ptp{^w)VjfqY@F~>C18mA>Moo6%&za4k>%;jP5baa6mg&x zJDfyK6jzt_B{;OA_c0cJNwOo}vQs4??`e{gX6?#Fvlkovs5yIE^yo~`FC><3R0`J%;WlyB^lQ8Rqp9 zJ-izRf2`e&L%2RDCT~u(?czEE-$zhN?goIoTqSRAs%48R;4QpM=^RI-Mv@kBx&#C0?{7iv`+i0LVL1Li z$?&L=5H!}7U<&wFD%Tq1R_?Dx34adnROP#2_yB}x;_!v(VIKur29GUxs1oz+fbdTV z#cv?ov45Bn9R%X~0w9H7@aFy3MTp%Ezy{z8v<(1y32aNoSIgCm)SV^w?PpB%<~spP z^sXG1Y-FE(Ydu^>E-i6k%gx3m8y3zEqOTfa9Mar!Jq*O}9gyU5m@a_ia@bA-n!}CYZX1^Zik11~%dA9^cP$j+7C@9OuRTjbsWXZ@O!lSo%{dp`ORMy=(jO4DgpwKZ3 zH|5UY5gTPn=Fq-E?$TagH{kHrQciZ(nB%r(lvUNVI%V|He8D2B;}k(Or%ZnQWE6WM zBH&^+Q&o{M{d&(icSkPP+bY7?cHwdcEedtQI-(9oIq96AJcU3clRWJa_VMyFNofhjxy7fdiayfXGqQJ24^CCgYdz4&5aft*#JOVi zglXz~(!l-E!B}Y6{@&&`($xj2k zv&7zJz%zI>jA@UFe7)2ccrLT9(~`FlI+6k}*K2Ld-fsry8!(V_0d5wcTdPFss&BNI zqgB+s$KG>0;|&B$0iSw;rgJN})0TpniUQoKslWV&%E;c+P^UFZ-IVD{fFm9_`D!5E ze>{h-4Tx$=b*Iw85_s3JG+rJ5Ui#h{T{~N;qM66< zR6Gch)nObP79^Pa$_%C|Y!w($)JHr!QZNmMz9l9$OrI+FQ#l~VCa?yoLR?MZG7WZX zF#=ddU(J(bG4ZIwdYN)EBZ~o^mQs zc5J_}xkleY)u%h#1*u+)wkkcds|@WFvU!Eum*GCO_q(o7HJtX#f3YpH!=sf9b7S1U zog&#cb=q##ma3Yy=gMJzJM9t+M-4Zn_vp4#i$3I4C&G(Fy_i`I%nluhHC1YvdV5*3 zL3B^7Y8?~X;UvE|^L}wOkHs`~N~E}$F9-DIYQXLvvgGm3nT*#|Orbw$O^K3N<3G|9 z4*GDy;+($ur+DdM7zHZ%)uLhsZ6z|3=LBf;s>DCZP|ny?cHvLXQ6NpD`@W>z1>`ST zCsvHvyXjYsjNaSNH!Z-F@fc7X*qK{9T>5j3%1Q-*c!?Uz=$+PzCZLvEj~Nhi`t7oi+S!6a~t!6OCKGP-Gqhc@N87X%{V5?@CNksL7+zwL*0kQq`-J`ty z@JI|inJaWt#$&idT+Im!dUhHRfY+-p^x|)huuOHzd|YtJOv!^ESPOq8Kvu}0Qst>A z36j5^hBmpJe2)sdYf(>|T`6JvWv21*R>@kND%Y9CZwI=$No*=S*1f*F%Oz!V>YgX9cPj)?4vcaAKf50jHYKV(7>j(Wq<%-H=3FMdKw<( zx>#Q1ieXVgxV}{D4z&09c84GVj-Q~4*dDY+rDOqf0Y9}LvCAKU*yfQ+P7A*B=sw>l zcF3xO{u!mahE90zt4xMt(EOcZ6voZw&)1~B%r+xGURQN4oRNU7g0beUTPEG1ptak$ z$&(55#`Mp$;*vu*^up^UQrN`j$`GCY@bMn^JD;h0?j+|=>LJ6+8hXG z2k{&4a0RZb-%Nhoea~5!0h&`qg%l{~Tv1JST@8K`gqgHwxh&gGimDxsZDFZW<~6)< zvQ=K2Pq#Py*qmZEg!-8M$;_DBU@>}GGayo3Q-zDk+`dJx8fA}%mcRiSZbuD`=6br( zjrO<9^Aa#2Ago=_SD1x9SOGNa&I ztL9v2`fy@V^cWQf@S>s*HE5dSF}P?=JI}OiDgJnMoB8uC!xhyEy`K?_=X0|dmbV+4 z=tudaijQ-xY^@8~0(dQ6uR`>JFzEtrq|}HE1#7Sm8=lX!zJVbAsMlEzJhom=D*lR? z9AON?7A=(k``q9&slPor&Qyp+hvP^{i)#Xdd_ z{A3hehFDlrMt&P|!gl(Us}-qfMPyn~3K^{n{5k!?$M3!9Lto7SGtyjpn&Wvh&HFq- zeCTW>$L6Lgp(PggR4|T;+o|tiB_L9BB#9`Qfv8@f(}0=%fX#Gat9GSZFrT2SRrD*hjcu*dWxZ zJ2@sAkHyu9@Ff*=O(ira=D=MyxHQ*)6yQsfG%%Criux38A%4eQCb+71ip*x?I1PFcxql%Oev$}>A9|4% znD-ev+~(i%752SvA?G)n*Z=_l;D68fi?9A8z4brX>pzoR|IJwK2@~dhbkM$60Ud(Z z+|H{X4C#-xFpz6Tj&Qn`B=kfPvN?usjmz~cZ@%y_t?|;X zJq)&x4R%f=o2pYU++&2|B}KwuBv|;h1Mj!`kDw|Fkl|4`^pePEl8f*!Uc~xm=M2l9F=;eTBfSbbe=M0@*@B{!jgYzWPt|Q5@q1ItMcxv&WpdA3BqUAOJP@Ay9b{lF2=y2x8R99&|KK0Cw$FEE}FW=0Xg4OjlNud zZWeK7D_}FW@yG%Vom}^@fds;Gr!9I~12xszx2Ne}WsGr{)7cWGD{5FR@ePc5l^(MeZ%Cx-P>)2;{Ng)kV zq!Yegk_3z+qO22Guldy!<1>wk0f{P=ihqurozi!Zi1S$!+f@0*L&Fd2d%Mwl_$3=~ zA!8m0AX1Sg7VE9uGpdcoKNqzV zj+onCr!-bkl5B5=%LMcQ5v9)BIYZm)03`=16!TMlwrIn5e_X7ZymgjLQV{`E{eewVxoQhg`Zdwn$g6=t z0*3``^DKY65551J_02a+$+_fQi?XuEs??$CfT3TPM^CMXSTy@FJA z5pp@zl%T!KQH4)UW;Ag;?Uh^gwz0z7>9z*F%JAh^?{5!g`P-Q<_);L$c>1bo#b2e+ zOOZ8^f2;Bk1;KXg=p!>hr^i|O(Q7K=6@?>0OU65)A)w=sW{bxsG7HPl zmNlxg!%%jW#aM%E(k|}-p0xM2n5SWH+K78}>*+igOzkLJbFKD<2FLd45Cg!qt>!^0 z4V30m?!*&(>oKSA{N^${klOyS0Y_-eUlUHFE|ozu1d%-_0vp?Ok2}FG9>p5m`PfTE z{$anN&!IP@Y?)C(fq8Yp-BnT%PE<#$Yf)Ht^9s#3quv$tVSy;aB3(z_dACfPLhJ0* z3|*p2anjLx>Hkz~!chQLimeHB@T=ufcadhk*BgwFK>g&9GEX$96ikeXPN4XeAWpb& z5dTHeOx&@T-u;O!@zR~b6dGekDIvfM-XwS`Y=64KWKMic^-zh(lcO?g5UzZITE(R2 zyp`|E@tKU3zCi6F(VBmNL`_MF)8yJl{yYZ*n#00YmZC|WZHd6!qfftPaeW0PV;OMg zA9^8FgMHo7lLocy{SI14dN2-LCU9BG3xuc!O-hc5C#O)GspY-K zLL5*DNm!U{z;omzMiOSgT8zz#WRQf!GApq^ZA%o~*^PWdXh_QX7Itt*Co+6mWT3Fx z$oXtA2y$;KDgr~d+V|Z^oUOg5KX~_{P9I3rdza)SZo8Wvl3t*OmF>Lq*`tLw52?Piyyru8WaWhqI9A%>*7pQjz}(!!rlG$RZgXaIc~s$1`gPx7v1JqiZq z^buXnj*B}F!_jKoIzr|+7j5mX z$(rnD85I;hOEjv{z0$Bpf4!?+l&sQVno0S1)9P#Gd`nms9=ie=ZO zJel;&Z+u-~=?8;Q&*73}GQ>)aR9yz?oaA>FgEZna_YC3{W%LpA&BjxXlxmNXu_xv^ zlUDXIE*!Os z%bOX8O&eRKL01T~%7OmI;LSZ47LvUYnv#@w{ z*p8pLJii4!bwi%Jqwn~cKVS}r`|M6%bij`73Y#)$0Ufg1bl46NO#|Gfd$ILJPmaI8c@Fl_EEeB! zwW&*HoMig-$|wU;nz3}i|JJDkG`o3moUYD8lWeQh63u0w7Fj2oL?t$_ZMDR_vz&lf)nhKr_4u_*{WMN%W2A9KgmY*VgU;KO1 zAL!^uW??q%*NVgKbe@)DDlr5{m63+W}p6TI(98_ky%4Ae6}op{Gaq z9x!@;6?BbwKmT|wW5z#V&+Fq;v(87lz*RvXtvy&>1%Eh0H|vUdy?ST?71-n+b!c0> zF9ZGY!hS~6v|lsY)izF8p34H=KAN*=8JdY;41Od<9BnqRC9zBuVFJfP;jDbM^xle( zlW}0gT#QU3p!xfFhCxWRLojR)7wPSbXyERQ8V8YyM9{#z;v)Wmi%+A{7$RV~)%^L! z<2P3S?}cjq&#CY?jQJZ}{@+7|cM^;DwQu@3eV2bE{CB+YFI+(XHv#HZH=QN{+A|W?CSm^_l$tG8W1yrdX8l3S;WM`*jelT`RrO|Zsgx$S7 zdmcU4O;jvp#OE<1#NV2mx~WzNi6!0nt2_yweQ?zoK#2!c{9x3M%WEK+b@%!>qpdmh zGaP`~yiF4Dl^+4bRhKL}_1hcv6qlf`ah0ZgZrslAhtwYEPy~O6_$_((T#dy}Qk=99 zjdm1y6ag|Z5_nV*uT&0*5U2<8eh|M{D_AkbUjY&aU?9Ra2r?i9;+O5xw&cbHpPjB3;>v%b4yX6i%cd7qqEc7RpCz zQ7XhYL!G#zFDItFuIIOfQUa|>MunGR`fOvO~DwUop6;kDJ0V4g6chfLrImBB= zY|~Vl)?L$P9yJ`}#XjNp{#vbWg5tX8ww&h=Hkco4vc3JV36AC^{HLq@hr)aE$<6XC zA0?bvP-4n6@rL)_B*PaHcuqsf_-f_!M9r%xL9LHCQvr30l8=`FK_>4nIo0=2)<6;Pk9sL_J=*E0?y+_-t_YS z3~g!rly^karsZnAO=d#VybWp{bz9lY%1ZYk%s-sh%;9e8xUT-Q`NfSUBlaEl{1dRD zn%S`7iZP3)UaGz4;bOHiZWdd|U{1O3p;L?S4L-~L$z|si>wDq3U%y4$%#;o%=tms*$~;a-|{L~wfxBr z+H@XSfCto!>=b~Hg}v{0OuIU@(poMkxYAA-a&K5fsb`!)wDt^Wc=|za$= zs;UfT7Y!}A%q=o&tMq485-rgb8{d-ttlVjMe-rV)lXnj>L;P;#VBGe4^<*#cr1#UplapJ7B%gkN}B1ks0eHU2ao@ zmP}0{v9OwRy0M+rd4D0rErCvF4o{rTCXY%{U7jX#C2s3Bp)HQO2{r9$NZ#9y_hC^r z5pkJ=LTx(i1`W#P+L(@y+Z?n|noa_ZhNLLHIX%9h3yBfqwO3q~sc=7tg=tkb4Mt6< zlFv&(j>E0^pNd(y0Du=13iGAtqZ*8<XE^G;H%o$@f<9ZI8h-!=ntnNnZJ<<9sRVaZ{f=_PwntBQ^H(L{U8 zo^k)gVKk?3HH35F*&0}5#*2oH)PN7edtOC+&tAN0s}j}@7p&I7lFHX>YZQ+JnFUr| zC$q)|&m60Ubk50z3(~Vkg1Pd=Zf6RTv;Arm0H^YXyRgGVZA@=j+HAo=)2%bN`b4Xx zit`fV2+V}DRZfOPSGEZ&5HOuM0t*AbKVoQaUlbs|^e$6lcmeSsI1S3XvPPV|`x*&bW)U#V7Qr zPx{(sOeMq*(Am{DI-8P&l)c(v9*%f+CfdTgXmIfs0|BLue8_yiSzVZ*#TT3%p&DJh z!+nqx4{z4*K>fvIpOxFvI?qn+`nn7)o&~2r+V#~H;`PcyZ8h(~znyqR#L-EP!``f1 zeHTCAK_CK+PipMGLXuSjbuv;)OC6FVr5@;@QO~fFIAL^Y2?+yRr`Lo7%YwO4c921K zZjSL;W;N;$)g0#w_yZH@LZgjC#MgzRLDX5!SM+@MzAb7KLG&j}nEG?qm@HjzAV)$C zAR4TPbMReq@L8l4Ow$$dsbIb$09(&g>{{mO+Bt6L@Bnz8izk&B`OY5d<#>xH3J6%#dH_WAN=&V8BjWzPh+Pk0dFnDLJZeRkC+zN$x!= zR$$AI*oljGaH@J z7vI7zA+iD99uGjubX8?U_NZg?x8;(k8H{uPnnwo!!zek_z7^fvFa>zOlu<;%F!853 z5FVuTV86wB47&|RIrIkp;Zd*>tKhXm*G&KkXzfs^rlFd?V7_;x-bE?-8O_v$g+!br z-f1v5Uka5O71tTDxTytB+x2h75-&jPPVhG%)Pee+#nRtqLA}aa^coAi7wxnU{=7+5 zWdtpQK3K9P3JA-JaQh2DwNTZv9HHcFbXGo}cg07hr+9Fp;Z>DJ(^+RI8VsDtBzFAW z;YGEvsbpb(dm8)aKsSHkh#P-KTiV?XsRJ3T0wybnaXSH{iEQGX^XDF%O~2BgUBs&w z5u$F>dE}`~2@CoMt|ZmM!Bc;z+3v3RF-!~eQO=s!XGYznaVXG$I8}pHwhec;QFrZR zCNjxv8}r#?R3XG*=I0tpN~IW58GfXAa^F7{c&&v`Kk^Tnx;Tb0M`)nEOknMfEL3tq zm+0&SnTf!c`6+Ce@YR4wtceMVmn?mf^=Yb$w4S^@Xb@oNdNiq5*TyW=QuDNdHE4QJZ>2VM5M6ay+@{0`)}$`LV^# zgNQz^z`>A|58RMJ56v!9XD@FasD0T3G7`t>JDEzEDBCjE95QBqIMXxzWc2*EY6D_F zxjRIPleHdvH+{l1*!kg_Be%tshL1)ZlR~jl4+=_F^RK+Kx+!yeX_%jBojw(<;Nbd zMkb?Tg-GzjGX5mjGQ81Etol60^ZOyNP)~}{u_k8UK%eqHmx!-?mWZ&94I7jVmVMP% zusbw#?h8e`f5dBSv}{7Mx1L%3=Jtod-AY@6X089du?Nxl*ui0t>YzO`(p_XR5*4^( zYrZMQ6(c(Ujoc65Rn7by#=&@{nzYBTc@*{19z$x~lsSB3z@FLpLq1bRVe4t{U>{$@ zh|s3HO>yD%;w*>P>+N+3quxBD6VyNBQ{kjKge6`8UmP*U-!Z;94A02AK62=i5DR9* z5?w?-T?*9<{0MIGd4?&af6=51*~{oiRMxyL{V<+N`!phRNh|Ic;xkRGr>qL;gI{&U z>Q76xs2y8q@Lqs_Pg7^ZW_o{(^!qf!3v5J16^gjkR?WN&1i2w+6t=X~xtAW-*Ef3j z@7Ut1e zsg;`I>Md;0rUxMzMXow6oXL32orQu??h1j!7>DU(2pm+e zqwGKwL|0}0P+@62(NHguQehExJC1&mh*R@69k#M$ZiKZ7^w8Eh0(RCyJ%N<(w^#y8 zY-QlhJx8{PS!89>dM1sg8Z%rRF+wz}&FkC5T6$9+^frsL+UAgOCV*f2P=2I5A)Mtc zS6beKx95GfsgnP&!j&P6=58hBWPc&Lw%o&o8Ag0G*{)PcLWGFZCRi{yEr+*+C^Wus9LG|SjQ5olEdIZI%q{|N?O(FG6^XlLR@ zrMv189`*9a9WfEo`Y@|>{i)6w+KYwq_8w@K8BwizhO@dpc31?AloXvFw1L^8DwMMA zFdoa_%Yb~WXj9VGfct{GUWPZzz$e21Y3$6uL|-0F?j{tMXoIvD7TVDMKC@CZQG^F7 zdz9lrkUa8uy3&Vcp>+SYS0=N4R2P9>x}u>yFlpBOLfIcLIPRnJlyIupO@0}zjwwu@ zU?v<*?(VD;^{KFZbMk|sR=RZ$Jb|tMo(~;65Ughza-3@8Q3!C*>%%grE6f6&Q`09z z@|`$;nKv?}cbIn20-sa6aj%FCLLVD@cr5paPWTd^bNhEVej;6PpX%`U<4xP4v%3Bv zHt(Qz(AvEy=l00%RTe|U2rs;{rrd>EWBr8!$3qqV0#y*Xoz)mX1P&y%^R zzUd3HhwL@n>|T-Q$#dO%rI1qFsWD_W3;Kj=`AGE6yfF3bINr+oXijx2XQk+;Z_yMr ztD9$BhhXrP&ld<>n`e-FiEs;SKg-5ztpo0a-Oa;g=lkDf3As3?ddXb zy9^m%wpk*QUcR^@wh&8++=Rl%NA6jzQ0}>bIEJX49m#%u-oDKFh?Xk99n)6)e$^!3 zXqZ+w+#a?~4Rl0?+aSVu=FSQB6k%U-{`p@l%|gYH)euuV*=Fps;?)#Vcg^8f_jN& zRi)A(F49sIZLAonXsZ@+CLDbFq>Zfm`nOPo!g+ya@*9eb{%=@FOn>LnQT@w8Lhi)- z?Sfx#A-vdSIRBGy1rxMTny&3HAy%10$R6)}4g>V6zP5KSE}{^h)o;C9f2n076i&#S zedl!)%4M@@jS0ON8j&6!cc2YT?JFi;k-i>P33t~kLyb>NjMAT&8`mJ*>(2T4Qo?FU zZtDcnaD8h=rZD5|#V}#L!P|TnVzX3J`h=B?(i@0^#t;*)y*%~jk?%_=X zrd`z^`;N>VNciPazZY&0b`pi2R9XKCc7ayW#V&gm1J?CQ*RIZ;+-fF)rdj(|xDBQ| ziN)eQE;a8ZmmS->?I>#$BwfbZL|u&!$9wcuF}cQo1MhlGZM-y%pCzKkoFAZ*0n!bs zRdyklt{t3r>iWRzS*jf!{#rA)1ptARKJ{-^9&H#^Q>{j0+0HNv6BfMblbfm0)GZ^X z4GS6T<%`h01gQ3;P3UQ8#dw3z)#n3pqRfPkA!=iO*hkGjd4GrlmYYLt@)IC{8T+6- zLJA`h!q9+%#GQiLKffW-$t`H|6{97Ak5;MN+kv;U%HTv}N81HB4?2XMypk!#me?QRb6BLxtub=TgFLErX= zeg_-m^9Le<>rVt|FcV2&qL~Ur0^I)2GZ9{f9Z^!xFDdXhsZ_o&+#)~vCyl9P33V_s zL5nlB$@QRCd^6fll^1xjw5v63%tGf_c(C-{`bU-wT{5-pTxpePR; z;RR<&oeDS-KZJ-JBJo#9g0MZJ&&uC2GAXI8mTW*$d$S6wF9T zF(nhSH5QG;s?~j5?;h7q#6mH?C7?P~UGU>8Mr-S-osP+*$iVVU!=MV)yZA$u=I{!? zO*;eMZ2ToDgP_=a@TbV5V*QmD496$lzk@Wa!x>=!AOHY0=>IdMVg6efq^;Q(u%P>1 zan8Bu9y0^_u?^Ra$atPQl4cm1UjdQhgSGmW5_3K}bJW!=@J#ZElK}Em?U6)3mAB9{ zh~<3V#T>X?r3hP7gYRL`&`R6xq=Hfyq#GXW6}{MTG!o8j|44zDG!y?hfwFV>R*~c5 zuKM7by}pf-1trKJvDuemOewTbXN6l**!}(;XplH%oQU#kxaNFD2HiXUoUlo22;%&Q zNzqEBiPqxW3U$Ot?BJb$Vtz1PyaB<%k(tU(vWiola-?n8OofXuVuge`RrawXyH~yo zoKI?S_BiS-3tdg&=MAxJBqG}Fpf4(lF-A#|cW(s+kxLk3-mEbMGmVX9xJC9>L7$A*1d@hwhm4@RvA$+ z*r`P`%JqX;PHp@^6qGZN;$+ zEkn>>onVadkzq7PO6ub(Tr@X!U#=Ek* z&;<9+T8Bj2z}j*i(9Pt5dL80mF&NE}ij-%gf)kcmB|{O|!$u;0D^ajtl~RTja)L0a zTchqJsw9TRT)n)Kf}s4Sm~d6u77{>?tla>@etXV^9NxcWl}m$9txCHE^Uhp1ep?@N zzWo|7|7{!KgtIRiy@ddk*A=5&33_IuCkUK}lo|cq$Fls5E0nz_RAPWq(G!eC4Ga#P zYk-g4OQ{kN)%Y91UK2oO=({hBRe2>U58Nu!3s|YYuvb+$4I*ttNLHEAKL(XKs-?B{ zEaUgT*n7)>D%Wmnm~N0Rkp^iHNl_^Q=@6v5K^ml`1p#RRNdf8ZZlpoFq`SNGyB0g1 z{XBcW?>Xo1d%m^R&8n;C7;}y}?;E~!`{oLJbQAjTseGf7F@eLmG3?TTN2+gX?U|pK zzjtOI!SWO{i|J(>M=kFxT<7AS;JvCffhC>>XdpO3rs`g}r!q?j=d<<1 z*85<6?^KRCJSQ-Zd}-UJzjH=o{aNK6whw_`7>-6kq>x567gWU-RM|lY?m~cv%(29X zi=k9@cY#Ht@7jKkRFN>N`aEC9W7dN{XJ&7;AgMzpiuRp4r-@QYdyLxs=i%!<85jM9 zrI9V~ajV1miM7>RQy!{X`~o%I`1e5%=Ru4=IC)R_tk6^wc=~ci)R7BjuW8{G9O`PKaVHVi-T15gCJ;-zYIO zpPb5#e#`qI^*Clc!M$m8MhH6xhsQd?EL$OM+ zwxaE`A#{6Ahqp7~g!F2O^N+O6;hw}2FBDe}sRerrd|@2YacF|qEvj5DX(rZMk(Lv` z#?#~G2ym&_9hi@EPHc1w;@4)`is*H!wnQ4Xz!aw&3u{yx6{6 z2W{ezPtR*?zeZusz+FV>f+L{KV&X?=sk1PoLSb(7KB_9pHqXM|wle|S^?sEoma}9Or1N_^Mr|E#11+lK z8ucDk!FoUTCswquWnFlMA=4v#?%BPP+lI_)g{McdPk7E;hZuxkO7vrs{vc7a*7R{` zm%YJ;MSE@{?=rDyZGf2KhTp8bp0=;^1PAr9WO256%XO|uk5|%i8H-i;YJg(7Xr_uO zW%vs+6tdKNIY*Qtucisx;{!gLQu&B19WIxCjMn`56g zlk^2r4WpU+g&2-IO&x66Pn#aE|cTjqQ){FHu6^_1T_P%Rh7E=bxUtnG(*Q zYAz=@A4{g2=^IcRgQD&k(v5ojHQCQ*p`-M8AQ`{aOB?OdCc(IiW3TlFHkpm|OTYBxdb*xG}F8!@b8VVEyY0j4j#Voe@6f&klS-%dQ@zHMiF$ zJZDpI4*YS^KK5Flzph#nd*6THOJ~|AuiaZ7+J6>L&&Xd>@4-MpnSqqqf0fhWzPB=^ z1z=^Ex4_CgC#)pUIJ~mN@gjhgO_`XIHNi)I^WK@1iF+3}@|)~Z+H5-hUcqDe0awS& zMZ+Fnzwuga#`}s_oq>blK{74{!Y2KJfd!(weGaQX3aIch;0uUos2pX#yS}_VUkH9+ zsHrjk;ujsFtsHL~m190G*=0lT`1j^RUmKwX^{!Vwcy)naw2|B%RL4&}aH(&Yn-8qf ztaFlm&e+57_-V8iv5->yt1x25EbRhD77MQ1q<0#a$fi=qjWpRW5X&k#GaL)_r`zz+ zIv>Fvx~URi{6HsESW5XEp^YkZ9Ae3j+OUkO>&9A%x}fM6vZJTbcr{S{Rk?=q;ho5N zx8;Gga-e9TfLH0m>Te@j{O%T1d@IZOPdrLiw{p4Nj!v%8t>#GQ`S}#n%z|9e9~W60 zsbCq)FD2xuYGN*YgB4J(Stej=jKna6l72c!`NFAe|D=2T(TSgT`xP|hVvYTR03AWO zpPF&0b57}!{Eoug?UR(sB4R4iF$Lz)2HI7>Eiq&iGhBW|akhYAwqtK)hh&4 zo@O(B{m^fHHbkG8sCsG`0_>_yEw%TtNqq=nKZadOo1yKAS^jb+Y0Er%x7cSSxGi$@ zVNu%X$jkq-izA8BH$v6TL_ItFP1bJx%;ow1)$melGSzX8>b8zAy#s|(r5#r)47q9C zv5#qj3wNhg!uJ2 z+ae)q{tC}-@S08tVwu~YC7XMeRU*7S6#KlyM)%luWpL<{$8;5w+vmCDOCoO~jy~T5 zuMuxs;T42v+OXF`DJFQR%)??WpN@6ndtX0gspwYEe(m7Pus)1;YVYUplYf;ut zi<-@#VrMDvU3jK+IH9EOkj9W((BHx~q4n!>LiO&GfFFbJe>{i$*tS-LK0kqyW6;QY zSYA;7L8J?umYi8t`98JDWD(A-4OLnWT|={Fd%+`&;G8G2Pha-2&n6D=$?_Q4VLVh; zZhtEw%WgEB=6jdOa

thHtGNr3k|^Is>hj)T$5_8j%4NgtzK?@-k0lC%fakOS$e-LoD?!Ix2^Wl z^Hsr;1J$eIOyd14$_n2CAD;>2XV?h4(Ik6a8MMJgdwdJ(NM~i`G{u$_)%I;0pU zM9gC|G!0h>PgSsEvL|8KzgkXYrqEL!`0j5M3Q%b*H6Bu&PY5|hI$AHZksh=sQj91O zJ*8!A&?KO7h#50vaC#Fvx8~c5R>NgHYoPrmelI@!wXs2jF+Jz*+K(eGt&!TXbm#0N z+^o=y)!zP#jEYA;EJ=dfE61$Y+oSnWiXHe!MDS4#$Vth4_t~^6EtsiTn`;YZKt4ws zlFvDFTvu`!?11_9(y`oxS-<}&()T(4abLmd%F9kN^;=A_u^k??Je&E8+izrZ7|&Ha z*bpx2ecw1(9!TcMJ`eM-UX?dMHs*{E%=N|FS@@(`{4+|K>CM{XPE*$l*JJ9{IEJ7ZG zaI}_>x|p^37yBnm$C`}Z34BuW8<^*z(WZGT)INyZ;?!KhW)D88n7>hwh$vZDVNO%5 zINT}B(}?Dla;Tr0Wn=zClOwurbR%YwVR@8q78_bK$1Tl1m9Iiuh>2z4SRlN3dN>~S zb6v(n{)EjeS#)d5_+8q`HIaF#vzPO+!iExORp#!*XTHi$ud_d5XmKvBtQcasFR5vH z@(%Y!wZLiM?^^aPT{ve{-Qp*2;%s-Fk!X!P;HnKEg-~MoNBNTP6uIg z|5Vd_rV;b`_B>PsBO|nPQqWD2I*Jz_57LDaJ=}-+k0FOkn(5?POz_N9+X~XT)S-e# z25+Fbl)t6tj6eL68uvcv&}Lozy=>AWI!DGN-;w8;D-=j!7Q8o}Y}wxUbo8797G4#8 z^#g}4Fw8%#ydo4}>#1}+HOCKSp(P~HkJY~|>2;K}yzvBY_~5To2D^$rDi$=UVE8@VQ6z5v+`xfCVB$mJH zBaVHzO*Kd&5K+?vO`VPU??1%Qh%YwJ7nQo-B}vGSV_r%u$z=#_n?M2;SR zj$pA&p?dzJN z8)NTg+*p3h=e%aGdA%#RUC1p&b7F(Qk@U$_Lj1$)sH!4q-}xid1+HB-U*)j?9A$*Y zuI^MigZ9v))bG1#&Z@|7X;wqgb@xBga$9X{%|*uRQeksosb+{vmFG&oKcizY4lJfwg^w!Za8D4ZK>z; zH{2cdgznv8$mMPhk>Gk>YS_wGmZ3{Fg?GVKAP-yXNrlz2O%q{&vc$7szqP7ed+d-* zj9&hApK1BFtY=;4%$EHEVOPtz)RN#As_4=5r&8vIzUeJ-`j*Fp?U3r7cZp^osZj0K z;*V8AGW{L7eN1E2#BecY$fGJ_8WaYoCVP=CM>?1LP=$heBrl zs96EnxW}yi*hc$RATeow!c!7Z!(1De!W-s7-hZhD$agqZfRIL9A>^9GU?@n&Pfam3i=Ms@TT*>;y}%c)0avu`UB&p zg0BQ<2*vj`D<7P@KQw3!d-pmQXN#w)V~P`w%{S&`$-Ribuop3l8h{@# zn49R6mpI)LxFER`IE~C;2fS;V{h8GD<#+z9U;;J?;3Y9oyydErh=PLranKyw6aP{QiXjiH^_qDF`8QQ zp`{`VUslES26{G8SXwdPF0>W-(ppX7T3E5ZQ9|QjF--{F-%0A+!U-{m%jjufPNcxY ztp0){E!;p~6jl-c!_NJp^v}2P#+C^~=r!IK;kx zC$9e?f%9qm!Wtgx2WBcWGrk|k3VQT;C8AE&cZInWIh9b?v$~{4t5U*Krddfk`&Mf~ zhJFca1|G!HkB<>;#1|0CvUi&mm<&gdc>Oy<3xR0RwSvWRGR582ipX6Hs?rQu+1T#7f{v8JcJ zOkY!pbp84c`Fv7}g~D!>Om6>)`Wf%^rAu^N@kc$Y)bk^Dbe2dav>Ecx3fvNj zP3U&~NzzC5m}^0emmXrsDQ0Ss3A2{e)w8kME&AeNIVZuOpoPUR2@VvrGD(nlzjkPkd(-K`w3a3e<5!~ZC@kgvf z``L_YF9UT$%~{csz-vJu9)?D6F_QSxr|mPo&8Ey+kmPviuqykJJAdM1|1u@oLxeg$ zPW8qQ7gsCetak|%$~yy&QgR!GQM*PaRP=Bl0YuUp(c5=T=a}Z~vLSQiw7%NP7HFi( zp6F+NrxQx{8Abe^`kDD8x9^AJESfx1it0skclrIAE#?=^{5bSH%=Y0Tt6Hz zeVcU%uWTOUoR5W$sZ(7KwLPGUv?{Keouba(xidG4fje&047smq;eT;K@K07g`aiyE z<-W4z|FP>e{^zT3pBevOzY6y$!Tk-|La%bK2ZLDcomp| z$Nc9#O8&UANTcM_%M|u)s zI#jwDV}>vANftK2_r+HKLe@|*6agDDc?$BTs$E+?V=dVSYogr%q#sDHZ+!QflJJnj zcNKG|WAr*%2XSdiEQD0i{PIeQh}oQ&cG|yBWsRzh5-zsEAP1*U#z!KbV)67!snz-& zP=w!MH$U}~oe(r4wirgrIN5Gqo2Ee{5gaNtu!4T^^3~%O7&u#shc0iWL80pI%BI4n z0OlH+`tVEUV|9N>{`^Q2C8@^y=1 zKuQxb<`J&+H`YNGYNuYdiB?_elKy!N@AOlRmCx zJ9S~6K7w_jqeOSt$3W(oM|gcUuSxsV8`;U>y*8B_X7Sf1g!vP?G0&^02x!ChCz?Mm z8C-RveOLP@^MY&97s9ix9GM$hRT`aW1EV$i7@fK4qhCYp&vG_tFkci}*G z6>R$qYn~JVHsCW0Z=1daSAJB%f=augB_-Wb{)N#U3!$7I`(6swBYI_cYTC#E6U#aj zW0-c)b~akWWm4SmPs_~-7jdoCVc{59DLRDY{nF34CpFSsHCHk1CeFH^kA+LkXCqf5 z(lb-Nc;-Mwm?;YG4A?ypdafuVk7OyYOD%YU7cpu`qEo^-?f7J<#r<@ufoeF@+1r5m zsgW9Xz~+{X3@s-8(rEejM@!4SXtysNS&tZ6+kccM6<02d9hFI{da{0qJDq2$_FL|D zN=~0M-ni3`o~=mw$`C%VuAa0A4{?jdVK}M55sDa!OH~hbUp9 zB~|N+t$t`Z>^h2W-Xivx&5G~eD!=e`(|xu~p01(wJZi#3-GZ*vDH)-5=g111%6)m+y;s$jb0``uO69O`4iJk5sXVJRBYGsLB zsEFb+100*H#li$H74H_gpdfyxMq|KkwU|r%(-U28wAL*njh~N)h(8V44~s`-=)%F4 zvAkSUaa5)lT{kS4_&&H>7o=jGZNSVO_NvfJzj`$;gk!0-%vgF^UJidracW8R)R$Vy zC7KmK91V=gLxOu>cw-Nl&#F2CVqo>CMmYx!O@B3e;ch7B5c zyA_L+>*fQ*Zdt@gV54>sO}OM-3MkW1z7SNLnGpexW_<4l>^`E5iI z1(X!h&Hi(cwuSutgEcG+4NP@eO)Yed4WF=BTN;0olMzElA%=VjUHtVc1t=&OaK8y? zCnQh-0i{d4?Fjw{ZKoh61Xa*avH=Cfaw`5xP|-BnRTH(lJv@VBqzi?bDT zn z@;D-VwB<+$Q9{Tcl!S5$m)(0GS+VL`*~;wl*1GOqBzCba82x;Cck`n5s&UO+Rz^(B zU(W{$jvNXa$%%HL!%iGU2nq(%6AG?@+|zF00pxXoCNk)RRx(xz;rn;`1S0@_usyEjn%|^eUQp=-{d^W@%_7)`(jXQ`yD|;2NZHzTCw=7MS@#}l<72k)4$1hetW)z zhljWC4Fv{%2?bX}C8wftzQ%v&JeH^GalJ%VSi4(8BN^R<3WM3iyAF%_T2f#)AN%L) z+l#BYyW3_TME;9qoG?N1RXFRASVyS1Iasgj>uUq#>M4rGo-Yrw{_UNm5WWIQWUVyu zOfN8?|NfFKTUUq5Kvo?JrUB`kXSHfA$(5zyeDU_?>H#r_$VAZVMWE()+pS((W*Qqv5jOsr)Mq}DI)OKU23l>R@|q>F>l8ULFRVWW zYj<|^lRp`$)OMx2uC9(k_+#D8>95L}_rJs+Czd&Ejj`;#iG=sq+}Z-e-JYJ-2wkK@ zxF1_R6}c6h(^{fq`{##_-SnBq1s``$u^;Z&&NwZ7*28u^+a2AlvRjim2sTb~&gjvO zVbLV6b20OUoKLS~ID^F#zE)TIy{lOL{sdn0@q#b8E7^?A4&lr(0=o@YyALg^C{B); z)hjgKPa1M^a<+a%ry9(LTkTJ*wO$a4wWol>w}^p4Re9czCoL^KoFVPH;77nxf2rt7#2ffV&YfP*ISHwBq%-EjXWH~Ru7v~ zX%O&>oJb^6WU5~lg!QCb@$Rcuyi;xbaI=h{F^O$87R8m`0>t6lqbq*$MCbfAT=%9u0EcCTj-VhSr z!;8UR#~Z`x3f0P+8=1+`g+jJSlrbG-Zu`w|WM7Q>RfvGBP9X<_g7hX_P&AiHlb$VM zM$k$tuSQt>7vc<=#JDdz-9_>`gD`NYD(obxUfUsdaNbYSBw|rxQR^JZT~O zQukw#s+)ZT+Y^=ujPT0&i&EoukhLtdN*OOSs)`0-9*_SGQ)f==sYq2O6w_3KCp;4i z+S`QI5BNNGeP7M@>ceJ*tPW;R)!BjDP&`uROnTwUXJPpYe(J?>m_R&@#a7F-0$ z71!q9DY3F+@3X{nTVYhS=VcYRGTD|5nm*(#!H;WBCPp^96?n{b-gPpA|M6l~7(53A?EmJCs1UbI-ifZG9X^xer0S}^`hfc zz+OXwqxy0`4=gYb(XNba2&=$Qc>Z=U7#JC!PVq}RA_Q?2Lt3FC1+Dm1a19r}DxWiY z0H){%2h4@=MG8SPuaMZZzGq-yAZbrq5tK@qI3hSIvFO$q1SOsb5NwjC^t5|LuWsT8 z4%R#jAgd4fYU=7ce>+&c3B1F9pO3S1_21^>=j{!1MLGQj0Dw#r3_bw@WB-B>{=hxT zN&xt$GepgY%tr&TkO{KK_H-L5gZ9N zaIyG*3{;mNRYXMOKP||Q=^Hd4RDmdPlH|Mb)gX*fB>)9k^_rSze{Z5EhPCo3f4{$*JJ`MfxrwmbMi09eLj;G1A1t+dNlex<_0XI8UyPk%mt%F1fBJyFc}emejg zw(Q=~@FatInV=7#6&%-J6&4`#a$}tAPJ8x+?4=tIR&Gg2NlndVia)8#MtaQU$riWe zbd@as%}6L(_x;xe43N&PLmnr)9cJ?1Y!vCXHr(u3@t;oU*E;T5nwY%iIEG*zgo}ae zjmlCr|LJkkM6DyMqo>mz%#R;GW>Gsbsa49V5g;cDu%}qg^aK8Dw#Sc+n>?Wp7Ya0M zPj`Mfu6$9ZMd{qglVqtj*c!{f$5JN-m3UfUS6Aohl}^TZue)Y>#Kgop`nczwyBx1C zwE71j6{Fi=RiB6BQaASad#wiv)4a))`7HeOrUHveO%ELUxfEQ{i&>tI?a9(Px1+wP zsVOZaV*U?xOs8L`ML!dNanUlLsd3PZhN<7kh*!|AV=5eqg2~U%?;1E-`x%qV)OFX* z%Rvt;H{p8u_G*unnOXX_GCyA$<=t!@@5QHS^0^hXrYQP1ax`ulMkkxOg&Fqz&g2udMX(UH2ET7>J)boHAM-U_R^r0eBC#)2@P2$oS= zmcY)&h8B537&RDeE&WKDQQdw2mr@?VXkK!e8Xg@aB`rsrqq)iMr>ajvzjLA!vA<%n z1Qh&97F6cySKn<8NwS`7Y8JD`xIp2$65hcBV>AXprx8oz#^ayxU9H9*TmjIt-0cH#NiHw^GSJ z?Dw~0(yCJldE6dML?wGoQ}KmSnU;Z>kug&2g;8G|jiXYRFsbZ|lM&>{3G+fnlqo5W z%E6R_Wm2!aMI;|E>2w*Wz?T<3SmUu>?pzbIvULg{k=N2<3AO3u9KF1}wEOvjo6g-P zON_KG^gnjpnXDg6)M@iKKcy3R%#<WF&6POgn8V9RXCIuNGav!#XU|G?HqPKf@Y37qr)Y~lm{vPj+EU#$A65DyDE_4D8F zPrY~o6=tv^3~<)SMFA`l(!^&#>=MZrsvirfJ(xk<{h_5Op5b^u04*E{Xnk7(phYe@ z{OfPXn3a|FpYX#~Z9zk_QmuU*D}fGb$F!$aHu zz$}1}@bK{eIlKQ0LV~DZ;%@^sdH1gD?+9r!NckVV{RJWaojCqV$E_bKXr*Fx{*8kB zHrChK(u8-B>6w_`nV8H|Ot3VK=c_9#DjLwDQix2}yE@J@9#{a37|D{~faB)ml#b_; z&PGU=dg;aL*sQwSsyd+FvhJ(H8*P^Lh6Nls?hxffy@zYb{CV{=-SO!9Ya|Do>SbK8 zFe%RvjdR&M(Q4mghoya%`IWWt9ER=mh=1N63ho^^pYOtdHdM7flO-X#B8qT_GsS{S z6@qX__C0rKC_>>&zg=gZDnJ2-MR@P-SY8~ifECVyVmihzyonH8{-P+bVjbcsU?kTU zh{wo2!58ERA4FRXoT*U6poLFR;9CjSklm(~1-tErVhVBMp<98Q*xH1Q2svSg5SNjj ziG`(twM7Kbpn>0P>Q0W%ljq(x;4_>HsSU@kP2ke%DJ5U4tTU?>=^$q}rsv}SHk{3o z?0+1N0y-pCm;iLE7imTeIbr1FfQJRhpJo{yt@aUZC;q7Q7Kzkw399Ju`LX}X^Dj(n@m2 zVqt&+-mrP20j4tzo*Zui6?vE3A0gHi@Ce$Y-)4Ov1(mgK-wTnnv#V>vdlTUMY@^3rL;vUUL-&)kp&SZ@%Odm7h!M8@0lxLjR5^)~OY_G4cQy#bB$UZ+Ac_X_u-EcWtNNg$ky>(@6 zO-#+hWU)C_P||Afl)K263+)8&$}YsR4MOie2+mVUom$i`vzz;dFJjD-zX}x}9MZR*g5O79jo`5`PR% zZ+Q<Q<2RbCZ0 z#L3CYVaLd$6{OX0F;ejllwV$Z!6IT%^MB>Gn)QZ9jkvQgH4TRWabB0~ZtY%!x>Y^r zg?6dntUbFS%UfXg_WeSS4NcV?wifAs5{b4CTlT?0YiyCo@eU@>2K5Bu%mJizo2Zh z&0HNv#m7H>q|Pp6B^&a_=p%LDvK!5tuTqxm*|ZsxJ*GahO9_rXOOIUem0%5=(_uCQ2XKL86yB z9D%qczsIcQ`sJss4llk+)*H)x?QgU0XEQ#C50Y%!iC(@Tn_DzAG(50yb7XeJ51ROx z5cBlZE=qDxy-X8<>EjxvM?Xa-D`5HCDf7Zl61v`O3r*2`ts7m(sFlLmY?|nR7GWFk5{M9gO_v<-V<6PEI?dofhIy= zJGxT%UcWt4g^fVkPq$nzxOvJ~hL$6hBE`0o(=H#g7yNBbvIdW;9W|k(@qQ;W#M-i27fy^ICdX1CtY!3C)&Bw zaKYQ&T?wnfLxiA7OOqu-*pKS3dG19Uq-dxPlIFwjT##pz09cEXmwqF&f12&#Cd(Bi z1k7E6#y@nCA4)KMv;7?k2UA7<5ejAQ`w4#x0dyN@R7Ff@7~V&Z z9vKl+#1t*VucD))A2=HspY6_QMGt@FbKGt5L-pxy0f9PtVFG4w+&{0+Bfnu!864q!-AQO$QGe~pe=;-n7Dr}a_e34pWuz&)u#o%fbXFToPw`h2 zhwhbkN-B1>Bo<|rJ?c9K1>xaV+nJ)?F1!QCxs!rp>bM4mlM2f6SSlRVa6tMc=A?T2ut^$6K^@Rc-ghsZhJVB5>{EXmLRVFl3T*Anx7d+D`A|nXj5G9x$ z1$|wKn^L>F4p)FrcXKZQcquSyU@jyf6q(qT=)nVF#RC%7*?Gc1Hf#_Q*D`8nLE>7S zWJnN*NOvD8GJ_>Xy5IHZYlVX{d0%d1PNXFBs|uxDO3>c<9KeLf6YRvn!M!&y3XM8 z_Z$=^onn4SRLd)uGxo=a%gg_4+e-mo>(RPUsp}9JPDl{y$G@blV&0jKfnTGIG@jxUsho1!YYjoBfXsi~91H4O9A~$L}@7fA%wWq&%I|qc$fH{Q4h%q{PF39jTMxuvs&MEqGB(3RhlwVHn%(erqzL=7)a|z*k~)z; ze*Bm-;Ml0z`fNb>LH+TNcI;YMfB+SD8tRb^Y2@HSy7 zC%2&HsGsk#Ov1$AV0g~WKgPmVnwp6>?INOrA_h^!cqsVjyMZWRY1Vm05RTSJrWoGu zOqBxxk+HCj{a_rZ(Au_LH0qU><#Rl^kZF1fAi>$zki2k0qZsF1&Jrqce(n6^6A+Pm zY}&sG`e4_}eq|RyVFHSCdBtB@K2)AklW9;3m^}OBtY|ulxtZBV^oK^lT5sMAmzfR+ zSK4PQJV7?2$Q*Kp5Pp6^JAyo+S4*Pp4_=qzE{71n_>bRDmly_}`;xq;-un97`S4_1 zdoEq=04;@T*Vdw&3q)l~xo>GC$HvEd$lf?6yl_3G3Vj}N{fZ$R1`&hg^#@AjRRSiJ zuxl$x$spXYYNm@Wz=e)Lu%NwTj4}J}sj(^hq2%3U3DHxd?+AD|Uvn>QrQevACrp~l zZ>puoJ@b&gp3WS}*S`HAkS&}D&jmv2jFI4|yKa`qFKS;7#ogCfx=>bPCM9kZvY|q# zTt`E3m`8J5s6{eyaVCeBzaReQBSa{bNHH)@GB$69$b^6TRz+|!u@X( z?fQ9v@;>F*aFo~<{M4v7Hi)IhWTC~6#Lnr8mc`#vY$j_g_Y+_7o5gsi&kCy{yiqPC zcBBuk*K=H%4uFFZF~`PZSK9q}P^`MTno+r6gw)R0!1Kkk?-wwi^z&F7aWhGYNUmHLgY3xp7l}xH5U^R~ z#a|27c3>+E>=svHax~2jN$WSR*JBaqV?~&~W08;&qzFH7#AV8K0e%LHKLMr5T7Tl< z6+~Md}AD%~;rJN>{zlIL>QXy@KC0c71Uq@-1(UKJ&!*KF)vi((ODuGwIoaFHQ@d z%nl?9A=mlj$-h>$7|00G$DO-RK|X{wDkdhTXGiHUHbP_NCGdrrIsJ?7KaBYdxD{A6 z39$$&*u(G72Hq8!KzoV!7GNh;Y*qrqAX##0P3IhstU%bkHDHupQx#L|T<_ zRLbe$dvP=dASPY;0BNWlPxR%S*m?dRXU^#V*dgZI~o1>*gWCdTwTz1@#K zFsND29&U+5)Fu$Yy}TJBrl}Ecu=JaXVCj({OaFgb z#$AqPuHPXZD5}IvfaiX!BenlF-|oN6H#-OBUrU$&*QHZO{e9{9q>G;aMj{|-A>u>B z`hZDPjICc6m%AB4um;dT1V@c{jRSc?NCSdM$S!36wJ9}}e>LUy*QNm7iHcb>B`g8A z_NMY+2PE^7o~kZZ8|0yQi^N-kvmO-%@OnAyrhjmD7Mx_qtA}$y($eRMgo8L?I)MU^ zH1AiH&!0d0`S}4S42T~_l(eZN&z-F2o4h1uQb&*?ZN5a%O!!*(DA4|@Rm?VpLuVd5 zX#SNILw3O}+o1;QRd<9sT3{48tvb6k;X>G_|$AT%hrX zOB`VaS>Cpfz?q3x9F~K)P|(1`fPwic&-sODC$Suo3{e9Mruf$T$2ugGhwMW9Uv^<= z4B=n9fby4JNKb|NZ)~WZJ8lIFTvO@}Hn5Tl+i(hma0+0g6azCKzi#~Id4S}PNl|YC z^1r&V{cAUMB)@m#7^g!UqwAPbPs-^jf{Fr;=f}C3`C>$4n8XF6EIBA~HNA>Ct=r>7u{7jFmu?LjEWmnQ=n0@D7kQsdBOT1*t+EI;zZ%+(QuL%(C@A0b)Pd-Fb&?wm!%j$iBQ-MkI49 z0xE(94oG|RQy4zvG*i^n)GS%9aoAc8&E}i6AC}H`lxU~nK|!I! zK5sIl4mh87Rw^|<^?_EZk1b@Ery+#*mdXL{UClMS ziN6yaS|LH#<{nDMtEMaZPbM$%4buWMw5eK8AJ6^k;$=dI#aOt%wpik;^KMH#LBM~M}u>MAp#uFiJ>8&zDJ8k(w=}6 z36R%F@#N6%w>VXVoN&zvxKcnjNk)70?D%r4FvitR+wI`T8yQ|2HF7yrz-v%2q{sl! zZ4E_xmjI;Mdbv!N7z%$xJ0MKbON85;|3(EEr4dl2j7`5Y1`cFc0{B4UD_2_hy8cW?-(tG_`rO)&ugPw!czVX>dv9)kpQ+N2(%M$jO;`Ruv#$7#lF z&+Y{hXs_NZ;-t#)Mc{4Mg)rjh@Ba}kXWvIN?*MBTi*uqwVmC}ofd1rRL`tpZzsk=+ zKCy6#;lzSW>nueB0R%m0^Y9hsN;;f|N+LRg88~28N=g9f&LJ+qFkJ5MVcR_onwkHL z2BX*hV(H5(UcYk}Eg>WlWdeyw13_$47>J0P24)Cpit(>aE&Gk_Isl&bh4A#>2?xSm z0E1}$Lg13($ST1hI{)T?h~ZO*)82=D$NWW9tK9RRzO6#cVGJcLgyet`WK!1{RRCM3-CKg9;v?$0*^^^l=lB6~}!T`8wf;Vx70AE>hiRH%%NjSUQC7OJfJ z=hbC%|EXCxLo^FX(TftQK)V33xNF{q(qxHTS2UL<;jg5O;<#A{BqREMT7iDJWv)yV z@H-9lEO6*WmG;9bsA=U8p<}#7QkN#nm+Ltz5fZ^>RL>+s>zq~Df>F(VnEXlEDxB4M zXU8^0eOuM7XYc3At)sYaR`|Eo2lmqsEpKrbNN%0{-qbu#FiX>7R4F6sbzuVf{{H2S z>bghFDg@>w0e6e+y*xuJgIiT``Nu>#UEbK^&wSOTIkLJGUXdV^Re%(L< zK`6N2YQIo2k0Y#eKD*@W3>slV;^wk@i9ljE(fAL$1utrXf(E;9Bb0xhmOmwiKP3*a zvO_;L2vD3N7g;Jf@#@Uv>`zGa9jFR*OGq}VROV!47)`L*J?I9dJJRt+A4WM*nWfQ! zi^hBOW^#g~oj$eHD2h4L^`Wmw_Gz8@;$vt&DKFfTi745?J*151) z#oTma%dpr|DN9>ZGs`XHk2BG$b0L>aRj(=3y%+JO>+mj8@bNY$J|pwzTNhO#_vEuu zyDWr>&tY^Vb_TcQ9IC?a8b2X+7RBc@J!DurpT`SMDHJ6ETZ4Cq*053pDdg}{0sge! z*Z0JbsyT94kZ3^Ihxt{`QWRH`Ls?`pjErA2LAG|UzbO67VSoq~=tI2t%xWA4oneJ; zD~wcPA#N8dF+1EW>?ksygB8+*np1zE12y82|CA0OV31(}kxn$InNZetKdGI&*{!l! zauQ6tIZpjOW{0s0@k1r(2a?3)@rJNJ2Dqt_4Jge^3w=rKt&?+~nS}0v^R@{AHCa-- zj~WiU8FBKvy1Fg~LqC9;t+zd&sh&fy1<6-{Jk}xi_Tr}p2I+H^yu}Df7V&`VlR}Sr zpxY`QO#qTGjL|f5q9Kt#1O)tEZHhIjh7HbhM$uV+Es*$7Wjics@m3-G%7tO+V2_pV z_-(EAI!0OJbnlY}DSEgvNr?43!x7OPBOb28^ zTGC^*Hc`_vTRxD&%BsKLo2|D^I9Lo~rIm~VB~=cGBafn4zWDu`fDBlmi3Iwq6YSUJ zT1D*MC;kcf5vUJ6E_$?qz!)aLuHPOEQISE)KwJp%0cl=gA^+8Oshyo&AGnJAw;eRG zvhn~mY0F0G5IVDl0%+A4L50Sv;@?y=KcADKzkY_ngo3iq~ka?@+)Uicy%~(Vos6` z4w?$zTOJb5gFxj`l_hFF1-EvWMXfE>7zo_yb2Y`OJdWb$;AV^H-qK+_b3RM$1JUyks7^ylGoBDC~?ahtC})&8TRZ z{S4F%uE>_u?LPRh4_m!^2a0FtCun)q;|Z83Zx1@il7TjVwb!`!PFM(U)$pWO4xUG= z+KnPNH&>E{d9zCG>EUYc*V6YpYSk?iJu6>75$4nBcZ5K)gJf=UvyGS&_}x$Znfjcx z;+SEcBc0r<5#*i(_9I&V4J z3seB%|9U;nxNO$paeI`kb0tOBKUn{YcZv{5S=Rd3H4FMfrgy7Otn; z;jmK9vm!FbJ**{N^|4qMlC&kNFOF1PhH$>~*PTw8A7&=>S156~6_W*Ta~^o-J2uRa9x_oGrckOu8P9 zvQoIyqkcQ~#L&QDfrY@IdPQ~4%mQ{rT;RfzqR=*c^a{i>D^5bMElxlMNDLu;SAJC$ zA(Lh-5()1lYIO3=wlVN>!xyL=V>+`Lc+DOph|`Vp!fgnZ!>I3_UGV7 z5Y%i-08#Z3hQJX7f6i$=?aoEpG44Gv8Q_UIBYwMpJaGdfbfeP0uh)J8l0t2Dumf&? zpT@a|>dJFI+w~87P~aghBJ-+1NXJ`F5*7{x@&b5JKyeAr-EC_^K|%h4i{GIPtn}q< zt*q_9C0Y&^dX~DT?^w*N4UO+6?&hH|B}BzVp`f9mp>)81PmfB3j-g2FVEKO-uioky%h! zR9sS8R$fux(Ad=6(%RPkr+;8@Xn16FY<6ybVR7l-^2+uOcz19A;PB}9^6L8L_U<0~ z@c19N{sZU#{QVDL{|7GIH(cDrZ(J(?Oa!>M!9&0WNC2MI zYfuLA-Z|b-5hz(UFWT#<5!su>I~tN$a?&KKjOFSu?uz*ykX)2i+m3Mo#{_xGNfhXb z%*S2<+ujAsYnuwCvdvMB^`CoefY%Z^d=5mrdmwjIMydd~p>s~}{vD5tuvhx`{5dWV8@x?>^{04L5?$3 z(tSir))$#^csP3ZG|C^OC8U;zRQOdip#LB%N|>a2edPg}wn>hRWExeJcILr7AA&SY z4uW%=X8SKiRNB&*@*6wSY`+=eiq>OW;bICrcPYQ!v)DtB?eF!=j&!hV?NL5i)7~5C zXb~=uL#mw=;2H#+7TRu@SxgG$^!Wyp|50o&+p?vV0!FZ{Ga#Gf-A0R$4YJq{lmpfG|&tAxSm@zKL-(L};CKu2F#9gigS;it9dT8P$&T)UJqh7AoZb7#pLi>PPSU}E|FPZeEIgkS@HOQlL#^791{mivQH8$^Sm@t@AJsZz=hMUI7>>!&EnX$Dn)I(ngC`qOty{ zlN+YjJ9&XlwO^K|hmu>160i^A#IJySH-)!POxy#_yn&E}VbTktH@0PYb?5E+w4P!) zXv%!_@KMvZf3Y4ghO~*gyaJ?^OJ4z?Nvi+W3P{;WK=%g8KsL%j_SKC}rPToN3;0vxs_4k-euc71x5 ziEAHGK?x7JGXE9C-~ZZJsx$VURn$e7-nzgr#QcTF5#J~4|kX|=5(JLxw>h2X|P92XKWkwE^D zBZ4jJK8c;0F5nZvsJ~YIEbRTSm5rmo_cQ0EX=f&uSxRznUxXPkLKbAqwFUY$jLygc zHg|!FtOIj_xAxDkfPL?pyq#*-O#=bn^Z_hc9b=X7TXN&#+)V);2wI9W-KQ7RZ&ThB zJPs=-q)mVH3jGZ;=(DBvCI=)U}aiyvrNNvr}uSKaUILO>wAMpJ- zQRtrhOLH?0eTK-2k?uLKgcsE#PjsIzVAEM8mrHSAD})_X2g*WKFnRXn=|{hv%9a5q zvap~8iV^?W%7hlGA4$0ZfVX4kl-Cuup5zfHw-i<+*y+u>DfToIC1|Z&;rfhzedkX4 z8V~-%<2c>_)VSj8j2IMp)+rRlDfYLT+Xu<`}@v*ntd>G-K6++#-4I0^ehv=bnPw9M5rK&@{!l^ zc?5EP#MSw}N%eB%=$I)C#wgY=B~G}0G`ikGZn5hD4)Pv-u8nvFt^iYY)6`q4{fibiYeVO?5 zBzAs1A0$t~o3M9ctUYSJ&9poWqZautBx^YnE=)4=AWRA7XU|R8)?b%oNBIs8`0Kyg$8?OHBHbL; z+A!9DsW$zE5tSaz{qxqBOs#QTL?b;=k=2?1uIT@ZlG}j&K7pOj7}e+m2ePVsi8*te zwuoV0U~8wFtR9}|h-d<8)WG{RC67}0WR3O;_;dn!1$=lJ9t6!u!$1?4I8cLJp*J!r zReuHa>?i!UR@c+e26ZFkr4FmQG(_}cl6Y=J5Gg&rx}K1!Nv| z6T@^blXnNY3Tf4)VP16j=jwj#{sm$NJ(4{g3ZJ)qLhZRtmJ4xp8!D!-| z?crEV?D@l|^6qykrO*PAlLt}ZLLGx;g9m37b2Az`^9y(%f}F3L7(!oIu~Y2FVR6iR zI%P@o=51Wu21?RH3g15WOK15j52w!V6vVQ+WaxP3V{8bI~$FOX9 zZ4~f3MT6K%J4#1j9<%LFITi2FfUA&8>`s;o7|W&cn$CzKlesAFt6%3Z(-6%rPvnyT z(b_GmA#Q2Ci^{$!OO8g~nj&NLE$a5He++7iku-u2Mq8-Ec|~6W5`o6^S!vRg5N0Cu zZhv=(cBV#SC%QuW6LVYWzkkJ zq0H7ZT}_t8r$CpKpDv6JURIuJta^#a1q?JJZ1;dH%qM-eke^}8+GA}%^#2J6mx#{u zZ7YY?zl>dNQ~$NhTzog-Xu(N(rh%=(9Pc_NeB249`R&daK(rGdrYErQg7UU{zFYkJ zF>8!`r(bFDu8ZQ+&|IXY#ltuO`*(O5?hoeF!H6c-U6766D|azS*HXVp6cgBuJ-POL zx7>!eXB$s)zqvSa<~Ldt{YY0&hSW5AfUt}l7fqL}Lf<)_ZE=Mo!KufG=C&CL*XBrA zvnG&kc!z+id|RTjw30Um8}w~Y_&X5mCCrL9Vyo!Uw02KzeAY59pEyWA9R{aS3SdQO)rz zL~I_Gj}W-c__G)RQ-BcvIE1NivKz}7dZ79o?y*`~OJrySO*w6!XRX-ee3|eq$rn5YZ>>uY_!earpKegqx8^=4R*q33naGchZwE0-@WMOu z2KT|}p%hyxSTEDQ_x(EJ4C0OHv&T^!t(gT6i~Cy1CfZn`%FXyL{!~=M=^_CoLyW&@75}94=hFT1DRffz@HNmhAw^vaQC$sR!l}HlWM*f z;EFsu`zzJBJyvsifOtbpfo(q};i%Yl3bGCe^yculxA_}47*z}9R#JM6-~*J)Fa zCaUdQpB)lzNd$mjRoH^->#e%Ldl~3yD2;5KD_Rp>{BxK;5XjbUog~dlfI)YP>IerG zazqvSfbh>>(8C3#F9%*#RM*Arj<2^pPNM1OsYAdo0KUZiZxo@jy?J_pKg3S(qioD!_-tT!i6cIReQ=jUKXw;=MxwwZ5Mn?%~X&~ z-E6*nKX28sFGH^Yn&i@C`5{?Bq$8!K7Rx0s_RYAcWcsMk=x{`~!WwPrbT6ZfX#;I( zqW)FyVh;Z5fN;7$-Q}!E9K{MGVxNR5L#@)NB7RGIwn(h%olKO|nRFNIn)XhQ&&9e7 zBUjTkO6#$b+dGEy%TP*0oFSrFpoayE1daCf(Y*q&QD}1krPAc>v5Bi`&|q}lj>g(L zjf!w;eOe(03xM!g$eOn4D**j*ldI)!v8|hbh3rZ8YYUs}M(F`9<8R7Tte$=m5p~1F5>h*E26`e;JKmaLU4XWu|1-FlFNKPP_$9BTYtKH;>+)#oG-(0HGbe^9kg<^+BSkmO78t3F2e1DBBBYL224@}>q9~*DY(8EYCpGbb z$#_wQ^Kctg!tV(0edlL~DF$*i>@h8uBS}QD7X~MvzIoGZNR!vwakn1VTJ_5jeKZQ8Oimhi0lLreIWeYro{@hKxO`ZG^V!ZLH1tVl{PSpk%=2~1I@89m`KM@I zO6>d23?WrG1cF=QuMBv_#8Z{`IWMzDS%v+BW_21TU+@>B*U4^=eW_sla={V@UH32W zd6$JRq=mhC*Bei-05}!s(vRb0yN?dj$fX6|o@A4WsNl}|?Qi5Rqiq`02zwuZ;=fu% zFArK)Jes<@+}u3Myw%oAk`)F!GuMB!)V2Z;1lAB{&W@W7)yJ1GrxSSl_l(9xei20YRlOn^!=18H5pb-~E7LRx& z(SuB+d~s7DQpIDxcdQTF`P0R{{7%~|0BV;251RJ7fFVhMjX+yeVeK2K z+dYv2OImk#>UN7#B8nbAg`Osd99V~W^sRWC2MBR}IDVhuEhPXfjSXqPv4OvoY zSu$DmHJ5$7IBG|8ut8b4jU0;&y-motoE&oJ!=^34_9JlGSA>LKSwf;?@Jgb+)m|C>`r zo;8f->J(Mmsu%HOR27a|L?>r;9&D@T1-4DtE_UWUGZ9muamt-LAz>8UiKZWTXU*Z(CE&LfU zvN#x3;Q5a;fJ*SIHr2ed_U0>q(RkP6*^dO+bEW-a&InE2qWH$a&;}_DJFmgjGNsEz z`AMiELSm*abQNj1?nU`0k7}SNLc9HkB#dNRmo`_%_U_Pc&sE-f`2598rw#G`LsGJx zVl~QuG3T-x7N2C2cGmCg( z1uI%iB4;57UrcvVQ#Ndr>Nh^4f06c{a8d+qal$4OP+tCK8pTwvl~kG*9xXDgn{yT} zD^c_-P!jT4S|XU0d^(^FLS}9F?>57~A5#Ra7oz*NZSN++9CN1Rs^`RdTCxT|h^=qC zrl0@8CHlZ$2{Jv0(m?fS(-m=BB9>@aa+ra1EZ$i;QLka)QKo~YLZtvCiy}vBVlAsS6wvN9a(FEKM zf6=cE^W9mmN(;nqiKbU-O;#F=SKZyk9hw#EC+#_fwLIj#AmNGk{B#=Yfe`u`|6e#R zvLDi-tDk|Hi`y(P%Vs{NA3l~krNd1!igJU$3DtN!8{otbB&?6N?qgfhd5~uQfK2W_ zBsqm+aHaD9_Ve!X#S~Wg#+XUxRR0$!^o)I*cwfgm`aMN7WbxT|IMGihv#}MC`^4&c z4-xM-QDKrk?^(RHRl=5Aob(q?x)jdl5F9^ZW=b?%Z61K2_|hlK30Ji+q}7~9aiM`Z0MSRRU^kINx8CnC@3MZ@8k%QDq#jt!5tjze>Qq0=?tYsI-~fD zi!EugmT_QVDM38%qiNMI+219vvCOyfqWMv`6oJTNy>iJzB28uw0E?8HM_2EvX{hxp zfCMz1)ANE@3SCSZcWrJecY5*xCE-o*(%qDTQeFYP|6rPzA=@Z%rgagz4V|6Q16)li zG@sw$;X5f+3AC;Vv_7Qg6j>Y;{~vSex(CMl=0kf9rrzuaUj0AWA9R@xdsux1Y<&U6 zNt~|Lz68Fo!lM3%wJ-grwbTAL549K(@d~h+M)pq8tDimNa2#A%DKU+hJ6ZO9B{>R3n6);?IIXfW zKYzn<&&Xn~>CZ72wbJ4ziEHtqOXJ5AiF=C|TOE&8MbGkzOxk*}Hl6rP z9~9|JvIs1qNr5xeqh+a}gOCUdeF%S)E|CNu_6l%52bWDZ`Mt$iLJ1F#FV{nlRT7D{ z5UTC{6v*Alz`G;fc{Q-4;Tv9QT@lCG$Hl=))8^%;7DZ`5QHg(2_pPeNhxmk&{X114B7q3=Ji>?Qjp=gV%XyvH3Fve1X-5n|G_@BMMf6NI5a5l7$*7>_& zQ>l^m7}{t+7QW5VwQGsU8JO~{s36cDvbfow?&MVZv+ojXV8@)N4EJoMLxz_tpu4lSlI)whI`ZLKY#XEHyVY^^%Le2Leff|tP#_W=xV8T74cNk3MIJ3 zQ}^(^b5TJy7~|@e-0p2ZeR9>#gu*(`r;m)z3XE&=e;Kt8{SD*b>c!cjyuE%1#NaD5 zoF9@L!s;+AEnGhLG_y518h*|rIHSAQvYW1WAU7UTL9lJ!z*)F3ZOJ&NOGSIwHF0A} z>E%DjMS=R29(%56?%rury1Q->@G=o6CiOY_-sUBwmzTi!7aZxnD=b)s;>U{(>}yhO z5Dfd1T=Ef768vVhbfG*62k@DgzXj#qi~JCuVh5F`{=O$Ny`xGuV|(MehT;(ShvS_# z77%^bT-PfA!D*){VaTs?8rqWC6H?{S(yPfoj*-Ez-IwlGl<%231yEXV!RZvSzd&B~ z{!S*`5txUI9BJm2Y(c08!<(mN`Im=eF=QdV2~LpU{o=l!0SXj35W;dlF?CyGUj zDUcth1ebV|(b85zg!!dxQYD5+otLdB&(M&qXSjkOW-Peu!l+5cXF1Rqv$?Yl->_&0 zT+kd>`behSk&7+F2q|~-F`i-NG?+D8qV_}E&1G`@;O^C&k)Mb)`gi`j1cF{_dX0`! zh16Zz{oGNCB0|ObDk6{EH6v@A{Pet6t;)GgF(uZQE`Cv+U4X!0P-i|d7Do?pc7oa})4dZi^&g#)(U+KRw z!4}Mkv+U+s{4gO^9!&g?5~y|pJa)JoIaQm3d@7C&2GDpp5A3}Ru$4%GX&8$(qO;Hn zx3wfBt#Es@afiZKTjzjBpF&aZNjfMDKImMKDTQso`b9U$+d=a!>~CSS=m|eiz4R;? zVhkr)9Gk>cp2ee6imq6Nk9ERC9Y$eqS0wgVG%vMlUh-Xv8Kh(qWqC`*XN$^4%_qej1;SNFpmy zy272SRnC|`g1YePL(?8t{O!AWnnftaYVt~JY1micuA+`n5#3s2$u)fa#c4a*ug;qi zh@az;!v$jh=?+Y7IWkV5C5IBAI9*ye_R{3^p5GXn8+0!)M5xYy8mi{G6ocK9(@eY{ zbmqahIde_ESNA0JN#Z`iO_fef?X@WnDmsjc3JDt+Thk}Da zGAut0korD6YDiq(+P(seaY2WJg}*%bVV0O)R94YCi#kQ|n%-;hRi!o`Lguv@Uje8? z@$Gg;C&kOdW_1eitHbaWY>r&VzpxRoe|gmO41!LoUIFVV55=auNNNK?h2@OFy0cJR z4{-t4mC}9?j#9z6z2){KHZwTaWcr+O*d_Q>#V!( znS(<5NGk;TIbND{bpPv@U|P#4GVZH9RVB}E`b*Jn3Y$H-vY5?{33UGOoA5qq2s#nMEX9vk= z>0^V|toY13-$+x=>1R$AkEALpL-ya}kHvc%-EV^YTxoL-hxuQy>=h&O^80aWu*mO; zVRY>SgWQadyavWc8%#W_B`sw-b_MGz!ap7a@l~s!?z%}V4_9H zb}=;kM%VlbxVn4=7~s6be%TcTmf1ZTFdTeon*-wd*SU!olGh_jcWD$1!}P~Iz$7)@ zn=eKZC&90PzflGUk?&RUMB5#lM7Pb=@uq0V6#TQ3mOcb}5GiKVpROkhm34xuj`c!^ zri~!S%WO%bq*?koFDmp`DmY0&H65~E&c2#?*wEu6erEbbf4O2MGmQ#KdwZ680>tDg zxrqUwV>A~w{w{&#T@#qhP;Nwt)?Ej&ZC;g9*4HeuNt@w`AEu1M}~^n)$z zqOmvff`wU4KpfvR()3s);Uh^rrN<(L;AHKx=iIcpueWb1(|j#{o5|^|!bwSt*Odq- zRJoCTu6A{v53T!?xl5BPvafIE-&y_>3Re<~-)-^7LYwBP6l5gB5_ zHvLEgaxMq|(DymSX>FR9T=cu9n)VgqT7!~ChZzZ0^}0yWZ%dM|g`2bjZUnQoYs2!4 z0&E*uc`G`2n>N$0(>{E6J4Yj^xg&fG%+I88uB0&+v4`BL>vG5+?Mihp)L_2P4M zTt^+-k6n%NI7(Ut9|M2q5~oXN&UaPkR=Yw*JzR?jFn576D041blp?N=8Z(EU<4&d0 zTVxnEU#q&6OU^|na-x3~#GVgbWF;s|Qd~r`-`1WX3f#Rj?t8aRevkFelJ1?$5Ab(6 zu%;;Pb7d9%;lj-cP}q#gjBuYBAq|sEf{`5VA#?a7I(v~qp~n8Gx)FwXKroRF_MBLT z1?=E75?9ywzo-3;oitiKRyxjt>K~~bXeNh1d>}lw5JMG7G%F|6u^>u~G}}Im&uv$A zLJLF+wvrh}JV2hu1~`xB8tEM zw6XSyxnRCkwod)>rdnDuw6B;C(=*pMQk-0T2!Af|N_Pf?Z|}BX#eSz_!Kk!wYcxSv ztY_$ehFAGlv-a~aGm_nkgne*)tJ)_mT4j$IXsYkD^k`@o*w>U9(6v$mGkRXmV`uxG1f&^1z_PC1Uf6K!bjGSv6u>73D)YhF zmG=1%O|zPE!IS?X5T(?yalnPwyZ_sp_!%*n_O0_v-5UQvL=B~a1-jyIG?pN2jV**A z?qnsN{sAL8TRpM_B3s_CX0maGki7}+?>txBSR#%{$N9R{E(&iVxZcNm>m|Qf)$l1gqR0h3T3i%$Plfs>pr}T zEfkpb86iOW@qRE97A#szc6eIoCy}w8m1f?SWhF2L9~{4xjO@12uDD6)qvWiwD-(_p zlzS#`->P(MA_B6S-1P|!{*CGQ z0o!Fh8T-?Nyhazg-KTzo-WXdLy(Xq=QH zQ*6pTdszm$$24=d%W)@#O(_6^^mBeK)kABaJ@TIFL3}k%stu&!d2V(V$+)6QxXH8; zhP|;N$Gp1gP~2*jxLiZO8gDe}=AO)9{WM|oR5h8V9_#11P?o>JosQy_;X?{X88xCZ zTS8|}3~9r9(f`&QYzya_5Dw%KENTYWWDTFN2HsD>70nLb2n6!60?U;P$SyIdR+89w zQ`1!QiGzST*+DAfA<6oVb`p_`E$NB2t~~avrY2X#C9`eNZ5YDf)`Xo#YF9e?>6TG z3mvOes}xOfG_Klyt`%}enMiTmzZdbIo~_nIWh#h9RCIa;a8lj&z6tCDRX}b15J_7{ z4nnKAP}@k=28#?F$<^rPmN#=vc-#Wrm}%;SpNt2$c%6g6B9|9 zYTYKDKP$4t#y@2T+#i?t!MC*AJyBJJBuMAwmI8U!fVh@v(YlWXOb2bFV)*b106cNC zkm}n#_ckYv%+trUTc)xAOK@a$7w}yN}X(GwnOwG5a#-or0z*1r8w=!&&Sy@E}dq z{8i&qp_t0a!;U3Y1(nLK)udfoTbPu75p@KeWPxbE&Ydbnrf^Vse*9qcraX`a=Dz|c z1to4Ldy;BmyR#3s!Dy9YAl_20O7wqG%GgJ%S3LZAql*HIBn2fLQNKC4FIt9g#m> zdZIT8nW#!NP0_wM?JMrA)cE zIDfQr-?_va`jTCGM^n&#_`QL@V?0Nkp|#<6x_VnY3ipcCVI1>XHnJ;zfWp~kvF$w0 z8zIcR0+s|{Yz<4PhexXo^p{u0L{Y@1S8ypob(0o<2Zqy-ay8a8Dt3co!6ZX=3`_Kx zL&6qNkPrR89eb)9DF`E+8?8JIqb*%U+mP6hy&UB6ib7U5MHaGB2tQ6!)i;pkOdOvZ zbUot-N%j}Yl;G00$)))A5%mD;z;p@mY%5>}Ds)3Y`omxrCdpAvZgtV8R8rZ7msMVE zp*rNf@Y~D_<;B;3Rbi!Pu=hkZrevsrEbI~2hqFCKG zYY*H_85PDlA11#)8nof;-^8lTmQAq3T)sv%ARJX+I<9;`HxtYc7x5l&k*)hc7bXL6 zb@isaIqP>Zi0ie8y$*Q=9808{wgkJQ(dDcvG_&)Zr7+)zOTL&tJkFzR_k`=-=MXOB zm75hMd!CI7?{3|epvX=TC4V9r=7athy44|~M4I8K!Mr~5y#DHAONSIgDQxDOi^7;W zH{KMN?cFn+*m<_@z*_U3)+hQ3pwu5v=`f|$crQ$*PsozPO0{-i=>?>&`#6rXxAKo3 zQ)83IK_xt`X|kNFvv^xoM*^jd^Xj@$9+}|~?b@adkk%!fyLIH(PlX|8&=$gFN9D@y zRfo~g*?y_p7WDMO%$|%BCw=nKNQGAcaVo;m!Xa{wL3B@!P36*Uaos*mK%m^k#rgNA zSvvh)B&m)qmgc}Cx-z)Bak%+lBfcI=QC#3&Dm0a*q5FVQ-i+S-bLVIM%#x?rn>~ zq)><0q*K5*_8wR#XbIsJP>9!-M`)8!t82JAD7MSXW=m%_bbCfAOLkZIC6A|EXi9>1 zwy{&Dswcvqx90fUK!bvPsUiz)GX8JYSa5D@ZSGA^d@l1PVYR*GT>!kWwJGVihtmg{ z$DD(knoe=-8u~BJ%j7>}E!jk3tvf^>{8=lHNhFrLKh$f)jK--CT)YDe{h}!QH?o4P z5-^;-+t!KTCYVRw{BaJ&hn`ow+rm|kmjwMRJ=VPb-A9i@;c*Z#OsXyOivF)VK{DJG zTiPT^M(2DiOGM(kf!062QJQ%N33EB1NnYWUlrv^Av6F13s+I_`c_Q&7fmuH&3z}c1 z%R7Dq-dNr~g2rC%_i9$OAh99@stjIxqU_{|0N0Y7EIO-T^@M?K^95Nk)2<@k zMSV3g6=@E;hBNNUg%0zRc`xmJ8K%pF#Mu740lWNvcLTbuSRVHL`Krt8rN&58UT~(9IzKHE!0{v$tzhTX*vA0u)y>C47mNj#jrpZXyZq zpiy4L^oU@3f0Tf*g2YUnR} zStM{b*H;SlJZ25!$YRgI<_g6WiA|<(#!pt3vy*7B>&kqZ0olwh$=&mo)j_SAXJtg7 zU)I*7%4WWLj4BR{obe#Rb5(qfAfb>Z-*x#m4Vm3%Eop0poR!ZPJ%f*pSFX_z!)1Qt z{S@EB!wk7f8Y@#(5N*uE>-4f)?wGmJ*GW%LRx}S&?bEH>-`z@@TdZ^J7V}OUI@kAU zoWxYX__;1qc#DGaqNh2`Qp*`Wy&suoDNBSx-T{mLyHmhu9+)QMMPO@iwsS%*bPVI@XBglK)9hUl;1jti|vH4MBIl=@ z$qZP_p^&GA)RKh^ZN95vJIHYtLsYf0x+ zNR#UH^XEmDM2v@WF-YZiTZt)>*B$*ME$K#Ez~q*4;Qy0Y5z4+J`!FW3=XKI9Yvwwf z&2pW{O7ub~I1t6Ab-Xbw-4RuAy)-6=IJ`#})@zIW2@l{h6HXDQ1O!hV2;DtuE2HRU0;_Y)_fY7l)DS~TszTyW4v_KmGYEP(Sv=VrLbjjiFr3yL$ZJQ z^$M<&0rG7W#TF|#uE+v6B-+sU&&SP34h{s}6H>wKdgLs{OSBeED-EOtd~>fc?v4+5 z2R%ZQ;ESkNfOWi1V-8KgNX-r(9m9d8vj3^TS#hwMEVi$ftG~r%{jlc}bN(`hu0^yt z8{TiJFoHHY2iUBv(3F=iyj_~UmOkH$P5Mu@_@mmSBO@Hh#GQm~{PEb`EHimWK&6IY ziK?pH9;`Rju%bGQb{}w^HfbY{b<|vm?`)q-AH67y7`|q&Uf+y>q&656hSXxh^$;Xh zIsI1da`cxH06OzMlLs9JYYEVuYe`%$eTTm)MRX)r4P^J1)mfA-vB@X#+6C_!29G*z z`u}2O79JAB`KIzD(Bg5Q53>1t`m*o)lD^eJL3upo&Cu@I)EvDmdgPUcTNO?6FK%Ms z%Q9VdBF?wWL+V#RLZ!+hH0Bi`%kF>ccUd@`39Py{)U|#0(}(+~L2}20gZGacXB4nj zAf>L)WH2Skk8sME`ZUjoHo$EL^I$maj;>5R!GT}0j^#>|N z@Bf*TRefChU-5yC$IhE_d8z?j2KV5JzORC&45_dETOM_>Nt7ew^P=JH!~`P^n=iIv zfWm#d785GzH}d!vokvC8io8w|f6}M+_+)Q444biT-hYIqLHhAVg}&nO_~3e`eud?K zIiU7Afocma-fL4*TrxXt;3psA6EzIcJSYOGIEdY{a*;` zk-y|Zh_xRQePFC{4bLf}oTY2@gUu~F7(O$57V$d-rmNp+YCl7kU||W*&Q8y0_7zp) zEymbuwhBd)ft9lDYL#S-{wJ4=+iO(VYz~gqw-S{0lcKd$>7~c2;R%d>+-o6~fWU%Q z93E;KS-!j|Z<+VhFMpf=BnAbEE8_qH32vx{;zU&E8>@xgAL!#2t=P{HDQ`fKeoUSsr6~8JC zO+`dw<&h-Jx63CsIR`M3uz|>zRJNuuSG_j}4y0REBJ~V-*q&-GJTTnQ&U=GGVFNg` zwSwVqUFi(3I9_)(yreP~yaMJvZbI@xJh~K(Ivd*-nv?`E${0h9bPbb4k$wbSrp1<- z{!0uDZ?25Hrvo!>8r8flZph|R-B%b~omox4^ZVixOp{Z|jvzZwFh z+U9%r|BCYNB+m?-As2so7{2 z7=zthhQ9NwC0RSH|Mr@L9wY?;b+N*g@Mwi+)8(_ z@#dLA<+jmT+x>MinfBH~<*M~6DDepZaa47C$&-B;pEdm~t7I{B z%xRHuwG&LE)4D-VibO=kvc8~YsIHR|IZ;A4<{=1|FNap8C--rYg8hqrQ&BXTt)_v% zo{6{S*-9I`$eku+KS?-qcz}|&AtT*18KW#I8sd^f^cz;WQdU(CuH(x;-4;kU|MHVf zOyPP8t>Iw$ZAowwWO8YlNb7R3kfd3QNB{WU=}PfkDfit&&qd*}{gg*JUzRR=_4(Ng z^%stqfzO$#HSl$0_2^e<=1I4;Wu2O2{QagnF9%S!i&Oi7PJ45a<4n_cYuQxWSkG^M z|Ldf?$z^Nw&wo;Goenp1*K1pYrp%HY^8di1eC^7`W_8P}{!+~?szCqZd7x*NHC+w7 zxh=<{U?KyGPiMI-}`jrM>Cz_G+TfLFVM1GwH`+)c{t=<9Vbob2hXIh=zg(+>1-^MIa zs&2I?Uo|iL;uCsKtId$Emus@^N4Ay}Z=>VQ@xN7$MK&Bg&EGaQ72A$GG;?JMrIA@19!626ajiYCVNwRL3_5?5i09X-VF!9p2w+$!&@}yQ zE=H`c{26SfyoKS!A^@hNSidxEswuYhpJ-lEUE`_fKrO-wb2tuo#EZRtyKn?cSd(#SxU|E(+$7*ii1`Bhf@g&Wn~x z9QUz#{{8?IIt0GZA1*Cn;RjM&VsPf|^2KdGr5ZkYk)OwsJ0qW((_!8YF%-u!4ma0( zC%??z8_yIqrRE!cB-8E9PUk=u;!qSsmnD#6tik3x3#5jc-4^HvfK)Q}Z2sbM;5YDv zBC7IcCyx)eR*6tWk;>+K*b+lFBiZ+o3UI~tLqt)7r{HWU;C~VCawEDVjntINH_e?_ zo6i55D{M->#M^G^E$8?W>oj}`7rfL2prm_Qc^Pv$g)@IHvWR7#k9nXLOyEk)PHFH& zAqO)O9O&H-tosfY#rLa2zFz5{f4A(Ow`xwRA(SLD6N_t73@2uD8J_T6DuAXabuZkM zG|x5Jx+|6>3lJs#Nv^MGWy?1pc=$muN^Z@+*G-pV*qXU7f@N(y7fWCLHUUsG*B`2C zXz|i5MdpKa5@n^|UqP7QZ{CLDfi_o{e-mM~AIMu&KkUPHEJ+e45fmY0I<$S&j4aFI z9Y}qWXE#@zr49M&>z>ZWXwj3=^5;E3j$x)inwAj}w=uV*UjIqgyz9q_@AV|+>Squn z2HA{^L6lj&O?HVaLCUvGNW=(-h}0lG-8XEbrE1lH+ormPj}{P#6pMsTM@~r%GQZ@#pO^6_sLcpMD9 zuTE^fTq~4!$=;TnHh<9YWhgeUiqgXRcs$RH6t)+d_7v~u^er2A_1|LY@`r1L2kr|L zXJE(&FbS*-zPYU=$6TSUfoMt3I=fX+OSzoS{6lh;Bmf)>?jaUk`YDZ^U)c1L0pyHn zVp1Oqr}(mX#k{K)M{ChI%SGSsV5Vx0Dko%l<3uD~u;%UfDDkclt>DL-u?X_`xSWkh z@LITJ2l#3yZ>&N+Oy?TMF^n-^0cJEZ9wkikHKwNcRbR0Q-iNM5Ab1LyagY4Mm|5L~ zYOAF|*1WHEb@~Sam%VHtBN{t`Ix{sp12rgP`T(wnc;Lz6$BlBv6?P6$w?ZXRJk~y& zJ2h#{(BF7wN&`6~#4NuJ5`s;u12aQI<4qA6BqaBLAEO529^C8$Hs1! z6R{l@T~McQ*kp%^{7LK~Y;NO#*7qfYmKpa8VNq)@KZ<{8 zTA6-U8Gqj~$zJV-i&eL_tV?hSZ?XFdL{uwQ$#JJEcF(RbuGT-O679mqzWu`;u65C< za_m%HN{X2jREVE*#)fQ;ofBmzmb~M=_nvl}CcnWC$Nwe+_>ae}`)vuhq4mpY-Xv>q z!uD`zekNKWq&qrAG8jLm^be`1`-<>K|Do+IgX##nc2V4eTX2`)?iySJ1Pu~`1q<#@aCdii zf_orna0^cG;O?H=ym`+#-??@F-l|RQVrKSq&vY+)*6Mz+o6Ht-GB#6Cv9YF4tBuft zDpk`G$QCaJ%WlIs_j$+6MbxI9R^j*DdxeembUbe+?&22Zovh*zia!yKKzzOU@}BSD zd=7G>l2-iGG3NJy#yyIGrTka^#^4>H*#-38Ael{GNM|@$MC+U??zEBg4W6K+z07v) zs(~a_!wVvomERHg?2v1K4T{}?{x)^}K-Nt9OusRX_~$TrWUaoeRO;Z6(R>qRyYWE-u4aVT`{R-i1HkwG03<4)t(U?ay8O-0JLLrEdN)#LsGVY>G~wb9kYKVo zHo9LSefDPQAjEA&jVyDmU1aDGkZt~D)FM-CxdiFNtM=J=v95Ex?G5|X7iuwR^M?+! zphwa&SJO+b0Di8(ukG^Gi4n+1RB}{9RrCEa%!SINF7Q>Bc=ERk>ybRlQM5D6HsWg?Cp*e4Uuji0&{)- zwC4kb7VW^i3RRjp6rPN)7j&CjLL94w90U9Vfw`S?R-4iC(Y+Y+4P&`X$I;XyHdbXV zvf64pKj}kmzL23nhn=%wB=SeGYka=4J&)KcmHhphiRwBnk9lcr#H82WwLRlVB@VlZ zGRt0A_iLXrX~0QQoQk<+rQUoE#t+9VYLgL47if5L?-OEsH$hEOGup48RvPVHU$bb) zQkXTIPQMvOky99?Varb3+_g(2gPw37OC}95ovc`r|2`SV)Sk*n^MhQ}D4NsW7}a67 zr}Xs~qnu#L(~SE7*ao~g&XzoKb-WKr?+)r6VwN-F_=w9tD4*(8WtVK~{$SM!-1SR>S_gUqy3gev;m^cv*jnB|%{W{L+WuGyB zlv`k#Cy^K{-LS4S; z)3r`J8cXejtDV*+Xf)E;N?pAcH+Y9j^$zWVR42c{iouGKp({dsXgz&OOq}`CTW(wF z06W^^CMb<5bwKfzQO4<6PJXCp6D47Q$+I)(-)6_S7KMt_VeAc12a?jj3=ICf;gkt`F{IXP_PaQm*) z-&8Z&Muzi8SrvizlA^Kln4-NY%#}(zC-Ln7mDOu)nV)1)Ibo;aik3fgd_tJECs_m$ zi!5-z>zdo)`c94&iSCyZ`2}&j7W2#E$Y4Oa8i&q*|)-c%bbHo;uYjEM0TCF$_oWnCg;O|v5?>jMFsR+zoR zfoa_8uM3L--0i1?xaiy+v+I`A0tep4#$QL3$e4|LxeJEa@rm$b$K3vQW0RVXv=5@6 z-HWh#+!YTo7n9PI>prJBGLDRmjfrI#Z`EOTJR6fdPIoS;a)`|_H0XD{scuOXd!bGhg8$B|!l!*11W&yWK={m4uj677lv z8;VMl*KV}WKc{t^VR3?%m%V;}*SD2fL**@>_MVJ?>sX3uF4}L$uaf9m1eBouLKJ3XBv84NEZ}_VS?~0V2xnT!UVm+^`SiJ=ArxWEIr7N9%ONU1 z!!C^PfAPw?sYF&&i0X2%lpx5b4f*^R#%qsD^ORaP8>LHK%pO^~P&#A~Tra z1Ah^Ikhnln?lQO}*Y;UW>AY>7;$qKkkTY=q3%1CL;whN>{TBja3+wvwR7U+(o~vIv zQ7j5H?ep{OK!A@m8iMO^AAv@oS^k0TZwnrJOYHbQ1WD76kXABEe1{AM&)C_%ox9?- zV3!CspED;T#aqR--*_6+)fIw-=7w&t%PcrUlK5;*DeV#5pA!`d`$|+3?>&YeyV?4S zGKwm9O}&3*A$OiRMFmpFf5_7cPc`#M4Uh{BIq!>xKylE2yJi4qxq{W)o@qFFC15Q+ zet6!ym|;;XIwf`TlQu|wDR$b~LvtFeg~D?RC2idv+wv(8H;vBo+8tp2UdBfmc2@qG z@hUtqXn9vHx5>?WWQ8aeX9F7lF2%5AUHKiM-!COX)^d5&5~>WJwbhoH8-D)pX6SxraqHyIn;jECx4!r z$v$jk^~ygCGYZxUqo-t*_@r)M1}XKLG5tLz99vk8Ly<*X_--Lsej7Ze*6hs0`-@y! zaWdf{*Zp*$(XG<5(m*L_b2Q1CYPs&`w1OZHTf$EVe|A@a*KkS>Uk}2xtH^)l1a9iV zJf0AJHw!jlr@RYjgHVG1xv*2Z6Ek1-JW+CVTs`nOcpt!ZlfC{drQpn8xz(-JF0`JC z>AX@i;POQ!VI(>b((bte;RZ5;+Y)|Y74-{PPBHL6LoA>i$%HFXH|X{yVuZ?s5h_b+ z8#z8cO8`RISDSRhrMGw{Tr-;maw(9(7Ny1GrWA{u7xZW;x%emC4HQVWSd!=>)sFA67%>y3on@CDU^NsYTSH?YGJ+Po+DQXTL6)U$P~P_5Pkz3kPxlnqQiUHH7}|ls zeRWeiI)=*dM+8IqEQ(HNWn6WZv;1}_Ris`}#bWcLS{(dODf?oB;|v zfIIa$bM)z^e318<&{X{k@uQveQBU()NaP*~b?X^t((~?+id%HD>b2Agwg7rPFi7PUw_(Ue^cC;oE>$0 zE+Puz&!y?bK#%HM3g?>s4_;NubRySSHGh)$mbMRmKHYS7s;T%CbH)!7+`26f5HNOr z{`i3~RO;#MPRezsWuu*zxl$N9GhzI6{Y?_V7!`+q)bG+6$D%_l%~7RbzNF$_Y2^OakmpYO+bmOGtIFKPGZM{4g$kf1^}eKC4COWPf9#+D5q4Y->?lHebAijS6F# z|L`tks3iR%2gUBE;F+GFdr>AuIaO_I9jt28zSt)j2=ALxvgq!Tz3&)J&4K9}lT5Rl z9@e4+m*GX!uu9l zx!IOQ^Al6jtwjkHjY3%`erQRk^g(FjFGNAlg{)nR^a)z+jzeGW1@$N2lJ!VIAJ$~>(uDvf&FtG*WGNmsiL=rnuea>mUQlCnuB*H=+Vs*n!e zNPTeFS$0UGFB^0-vbrY4#Kg9AHn(9ug5rB8rmfE>zZu{586x(kCc^!D-G&}zPdT#1 zVr5cNok%i9d$Wv0r2mqp-D_SAUShw?G7Vwj)0L7MuAq+MeFln9oQf({yKg(Fa##Ut zj|0)}9SD36A|w_BUXc~hKfcp`*V>F?<3P6Ix-WHMVkPZHTcJAMWpEHXGZ}00{5HIP zq%0e5Trldh<<~+)rYlW(zQD*41wTh(;AUc2s&8>KYv;=RJVf1uwc=x?D}kN5WXKpc zh)1^mK1*pz6e3fR{{W=4dY&w$>O3k*73ZoeR}X6{rbnpUjAgMwiYNxxY6~3{U$cuc z!&e)dz21)ann99jIo800rg8Qqvt&tkFO7IMsnnTi@(VLct?n0whGxYugOT4syxp5c z(uV6##Exxrnm?mvCB=gnoDE4helTR)XAjtUS$r^ajg{=Xg6a5`X-b@~ON0 z^}#huH9}nPuA8DTW+(TqW_*ro#Z~!P1XMd8WEhOeU4+(G*K3Qto@T0Q-!f%YMnGhi zdv)kN;%!ALweNQ9wmQIakULEp=%XjkjeB2Poe7(iO7!Czw~2?!ipa=T?B}?G^HCOm z-{;22?ptoYf%LF#$L1?*656KPsY)v*c^j=Gy?oeE!cY}cIQNX3u02i1-){}l_TX~i z7nm9|PhcuJpvOh6-(yTS9~_No_{=@B6^@~;U-kX|-nJPWe7J#jR36PT=9c8OGd#8d zj~MxVD{FqIq-ho?vIbpg0dqsa#?PK@<#~6{m_uqsuDl}ax;*}u3tt2kH_KD%<&kf! zH8yZM+PlXY43*=04y)AJlE&QQ&t0m5R=Y2Ht_y7tce!I*cpn7CMh2iEgXiOkP@MR> z_Vlz0p6c$r<$v}^cSW|nlJ0f+)|N9}On6GLvay$7tAw~T`0zc`;YeqZsI|P5zn20l z3b$WAlyAKJ&Gj`{b_UnuQn)m3?qu3ZWbr4jX zOB6Gjc}L)nI}O_$+)K^zS=)##YAO?VWDFHU?Ax#l%h5s{iO@ql>!F!0?X_lR#D32% zGPje+ceKWDejCU3195=Jthx9g&2nFpC)aVw%fzMRFGOg9n5{Dn{G)p(PHE0?S^w11 zO7*-|uDZO+r;0n3*r9YM1c*fln44m%lP8~;c$z7yUkb_4@1+p9@0d*IV6xhp;D~%s zS(UvL3tQ2EsTzZpM=n9zz(6?6ADjht+DBdX*(qHV%5#{cu%JH_RNuR7qy9$gv%3i1GFIucR z=8E?;^P8cf26bnF5h(7Z*En@9rMFeQI}hB!hMegI=e@0dqS!DCQyk4T>}JQCpNV$3 z`foqj2WoUVfR(L@pCbb2-?~JA!Sqb@vNO?C0H70 zckTQkl0KXRN^errJ=r9pO3v7`;Bna(ve>TVwpJjB?Q^Tu#N8CES_yko0=CsB&nluu z`(`qos}o z3U1S6YVbmsjBML+rhNwo1^h;J9h?kZ-!1D@Y;!MX z8CxtVA^aSF>+i#%5T(cc8dv5pjt?7FQ!4=-?hwKYY>|KIrHHLj^me`VTay`^PKD%I zbAbg%L~Mqa@{U+*jd}Z~1t2!H@#mzQlGu=yIR}U)W)=HT+_7Xq+;mEa{TI=*?Ta>8x2E#En2Iz7j4F<4=3mS8uSs>dq>D; z%+g+nqo=hmgTMG%fK#0;$HWYyCvT2<$G4o5vo z(bK2xd@u8&-<300n~2LRw2TlCcIbCfH~sEkY)%MfTjFJ&C#m-AW$jEyjGUX5Vu$E=kd6^OKrjCO~S-4@6a*eCg2kzV0nqzrVFv(U$8A;?en+`cknD$*AIuM z<~P3|{tdQxqaurtoVL6@v+c<`{bP0J5>7s|!_=||c1aK3<9ur{B)dqd{u zXIE!PSyEbf;)h8|4+b>(J7uQ1(&halZ;pBG`2i&JCPp&Fua4nA-^23_=z3^!c$`7? z9BFb8Mq@~sn%U4WiHXx}3$hHyaAdfn7)@ZSM&X9f3|yd9s4s4fpJA?!r1=QwpM!O; z<bf>#Y3_&W07chY<+mw91#bxRzkXMr7+iP85yA+w~^#N7J` z+6jS%?Fsu3l&gXn7qYaG^#=@m)_nsZjT}h;n#ARUwjI><`}HIXM&@JySxQB5;6l(% zv!r0kEXS2(#G;XVvDG522hJG|xx6q|t7?lXGRI*q!XDPyQAM9gtK?UL#+;Cx^$=p+ z--Z8sgZSeCXX*iQ2#A+IY_Qw7t*s-AwTZF0A&a@Sp_vIgv+WnN(05AGuaF2{c9nl6 zDL6aeARSbt#UZN3i1r{L#QkI?#NNB=A7vt@;PkcJc;HJo z;{>5R(3s|F!kQUlgwXXwi8oi#ee9*u3`FnmrB;U8-GrM&{s9}wrjOy+6+-c|>sv|J zj98Lk8#98xZxnP0B@AAG-^t0|#A<3P$IhkdZ}pic5wwiros6ooo2L$^{i8DX@;66- zR~*iON)+nllSAl7(2A(wChH3$13qigP>vrf;QyV}!OVf4p0dJD`_Cy~)Ci}47m3P) zi*(W=#p59mV;~SG{Ay6a7x;XZ^+i>XuT%MqNd^f%{H(JtU*N+8E;2XgMpFDwn`8@q zmHHj-fSsL5r@llbzaMa-mo0A2ez57&j2601xtalt6$25k<6~nd0k3O$^>_Eb&rkIf zFHOXV!XU~xe_ZvvP%Ku;1Z<ej`$Nat3ZJlLWi{uKe>+yrc6yq-?Cr|UF| zX(Oo|X7k?+ZylBzt#`6Kjsby9tK**dhq(VdtVEda7KxTq#kz)cB!SbMp34uBzSTOz zo}kTv=$-BDG*VL&lZG$9TT4plRJrqO|GiSOoD%4CK{xwKoreA>;tzu|fJpkb9sFo^ zCp9(o_FEYf;HzKS*!a{A-1h&BkT+J$)K(d9K**O_98KL9g7PB8bSQVgOIktR`}p_> zUX>5vKTBe31?;(PE+@gH-pz4tFI4L!Lhi<9W^>gBtM?XNzK~B3H#bW$BiS#5DXt80 zAt0Z`So2HJehc9q<)r;W2~cH!ohJV3^rjuX_YBaerN_N6>nJKKSG(;8R-e)?<9`SB z4LocgoOdV4<+r)57uTN4-n8Ftg#QpTmi^b+Aip3Unc%!nXcZL|hv6*WzrA_e>HWk? z>I{DP!q9j9m3M?>{&1m=VKmQNb70~jgWEdI`I9CNFHu;)TVyR-qOf!kLE# z2mPQQo=$wC>B|AboVt5aNxcM$H!6p*R=4MZNivts3K4SmFE_Ycs(%mZhY@qw<$ksA zw9y-S-aH4GH#BAM0OLnmc-P3>UU4=cJc;%Jj7fj*=Q>}wik`P5#+@G$sZmNlbItf&s*CW$$DCYe?8TD>FKq%XKsglBAvv#Zw&>cnq`V+Z+qJ5 zNGkb1IS_=TS7i41SP}5ASO0$rt0;Uw|9@2lHRvkSs8sGhLE7!of`3sNzA*JvqVPq9 z3v`_S=@H_RCTpzpEz)S)3 zK2Mj$B7IKU+Sr3-h?@Wd%LpHta@&Im#91mrwy*36O9rFc~? zhw~T|Ok8S#s0tikRdj)VqK6q1emK->Dc_JY2t7b0=9h^L4)h04QV#j@OrVvL$mb{c z!s6)gQNN=;5BK|%(?PzSjCg*wlha@b;uI0xR0hz_F2dH%IH2E7H;mwnm~|cC5=P8S z;-{zj|4?`e_5;zRoe#%tFO*XER|lKr>0es+USuW*2(IHu3ft?~uU`>xHs9};-<+&g zW7!PJ0d>vf2Y84r2je6|w(02Tx&*4uLK_zY#B2vq4*~Ph-mlW>h}RrbQ&Wr?tKJVs zKve)eRu7cMoW2L(H?juoTMgfdMZ6#LB*V8y)0-@2$^i2b;O>5d;+%sDW>Ok@SIT;q zA2gqRiGRzKQkGz;Khdt=XtoGxM@?CGg+b?c%eg9`X1*-c{sAOO>73^5W@8yZfvqdu zHUd#_8BOETYjrG5_}1LaOG|48cy|^S7T(awyndN0WW6v2oGo!4ADu_;0EKqGkMnk_ z*@h`E6$^Jcikw#Ho`|^@#-E;7JRgNomI2w7g#`QO#f2y{nX6AP`|X_gaD-BhW>Ca!4z#KjSz8MO5BIdET@8(oM@N~L(!c!jRvKc|-18u;f3~x> zY*>4e40YP$Xu0JAkWpFuDv6~Q`pWBHO~DxZA_%IOA)nCUyLWkPRw8oTCj8pExmtBk z^%0`rqX5|wsem(z7h}7vp9kxM(=b!hvfTi&P3IFg6u(q7$?tsW=rZyUdhH81OftU- z^fgGmT(&|ZNpvZeGgy%Tdgk7V57jTE)PV3R^6x~uev5q}{m#v*kBIZ(e19aNkuC;u z>iQDq5ZVnKu4{4$aX1Ym=4oOVphm4pji1?HCy;i&)1ly~j`B^Q(M%+l7>MFghoXO4 zcrSkD0VE(6##K-)zMt8e`?;a6%&C?^F)6%zmseY(nl zHX;Td!9*hXZ)(Qgf2#-@3np#>yk{6C(M;&pW?UxQ&A(b?#3|T^j0rF@qWF+4@Q#Lm zUgzD}vA>YKvzCP()Yh^awzB0HzyK8__A;%yg!~A<;Dj{pw%_dgk zCl`g^4!}L32AbE87fLsrrip*TZ}89f6woc4G+hm!?IWe(X_7_cX+fV-Am;K0xk!jp zbe|bBLDNPrKbUiM{Ahny=LlX=2cj)gn#9ATpKQQ*9=Jh0ZO)hu=aCA(rZ4IUA4k!P zr{fm}vsEP-Uct57MPcdwM(IVzyJG=2YRNkx>t%{i$orx)))p0Syv)}e9-vV>j->a% z-P+MlYZ0rH>-CLIb;5z>VSonu1RK$HAz)5^k*@Yy(TtyUb%NapbR_WKQJuipU0MR- z6mRbQUT`;({~nG0#NkH$;>BTtgRzurCRq9`=9KV@W`TQMo^T_Fpsip3Jvu{~+awW3 zJik9v9*pvO3g`p^E(=DjZ)912dPA{*1Et^ee2a#svYB*CMc;S0}4~fv3W1 zga7}fi?(R;+(GAs|NBaWOEA|NmCMfjp*^0i=9sjBAI9yp|NdeeXtS9rJ&)t%mTf2C zQR#bKsb7=>N<9R`RgaU7GC8aHZ;%>&Z3Oo!NZ_^MAd*q!=j1?~Fb7=3%}UyZ(Z)_~ z$sPMqd)m)8SZbP^n}7LIb!QDU7I3a~-oFnxxH_0474craf5X5~4)oHtre`d$Y`$tx zh&<~JE#iE4aNoH(?B1}RMAPQvL4Umw?|j3-F^!2mcZ=?Gywrpy#M)e6W-wXxGS+Lc zzA-pM;_PQ7G7@p+yInAOD9}w9Y z*sBOc;2Rxwlpb5$E==Vcf#Xc>4DADG-O@uedlgcy)YG}GpMk4kkE(znpo)Syk=)g2 zz4S@()T)QqZGV;V@#u4qf(#8m`xeoB=aUO~HlX-gKlLG9oB)z$N`-tt9NIiKI9RG( zS9FE5Ka?fp!Q8TiiI|lw`C_QU|8*M?Pm{DERxluh)%~&__w$+Ffe^PDNn6`l0E=BxhTLROo-}pxjQN3ez`kU7#(*v5EvW+o5KHu_J7u`*)VmGA0Uh~|kljNx_f~lEn zSqOSguc^QV@LVu&As_>6MKtCh=5Bk(1L&fUqb4~_QFkfvY}7OJCA8I9oz`O`2E#1b;ABWp_}e@u~L7a->>p!F0r zPw|kQffw?Sc3Xg4_lg1Xck~Lc>uF9~WO9zHn3!L_k(}VHpV|Y~MhUi+J@3+QnqYn7 z7-nC*^`nBssjXUg)9J|zZcOyIWZygUJQ6a)kMmcFV*`GR?XBNV!J@0V%nSqnN+|tZ zUJPc*qO+N?aj&BuPVt$XuFLL(m>1kJfeD`QI87K;mCF5Be@93w_Fu=`{H#X`QhrX< z?;4J>Sl3tPe@->e(|7SAqP_jl*+V-orR+tm$2oNDnWcy=tDApm>$wLtZ(Oo!?F`4` z)Gj^bTM?B*=fDb6mVT_$^d+;+yx?Q6l4@8uqTq-}Uz*H_c&6ahiS;AM4F3I{&rcs1 z>n)Y+2q}oEl&UKiKTMM1Jx-LMB@H!mrHM+!yY}>mN-wI*I8`=}aW}*YN7!$I{tP_& zD__X3!$U)dbaA#x3??q`ta!aK`O3owQ4}BqP#^eZ!16pg6A@+L)GHe|+K{ct zLS-QYIH3=!H=w#F4RgW@%)Ov-~0UWUmXyozF ztXwJ?z9ov(*qYN(uF3iq?)kk3@R)E6&nJJ=c+wF^HCY~4Ym9>Lf_2;JY1a=wt_vxF z>B~pm{~+6i1)0T(<~ay0TIK*J>zQSOsqOlj?qpAMotup~<%7L~G?XFj+5t_lY~DGz zLgcR-i^<$bOUNLK#}%SP=eMXqxz{nT7!>w#xS*V&_5)Z;nIWhQC4!cwU4R~ei01IW=ZsEkG7IaDm z3^qBLP!4k_hb{IczMyhn-vwF-aC`LNuh+h!ahp#WpKbI3CpHUMxUX=pNPHgc;-3L3 z!e-d^PmO^HpdQ>$I-0};JI_Zwo-f$f_vzKz3}Q)m1;<3qcUilS`(u89#P0{+D}2@* zU>pA4W&eHQHEpn*OUo|(tkcc1^;@$a^3ts1DFudJiMFJE=r$plf+x;x;j2M9q+OH0up0k_qQnR0C_ z(TQ=77iEXq?Y%q__;`2DHJgR^5*SIuaiWwMB~AtUwxS*axT58_^}zeOE*qdA48RkA zygi#U@Dcjf?E&s+Vrr@rVi$?#M6Sg5R;p|6oe)b!MUC64A9kEhn=y^T|dKjllNts z)%v^H)F>NW{(l)Z^-n{F_T_)nQ<4e2XF;rK7LBm3DJokePKh&Cr4Q<+^@2uTNLK%G z7(D(nZbGl+v(n}VO$Qg80Q=_QQN1@#`RIf^UmgLbK039Z8%#1ZG{pW_0^;dcsoLnl zKkwn;PqKk2c3vAn*ZX6H60EZ1ch6z8HiqZTv4$TZpdVh10gD2ox%J-i@8#7X3%~kA z^uybe^vzs0q#E&J-0#F4%#WxTQhygMs}<9^U)6+c6YturGjK$B|0+>o`p_uR{z=BY z_x)QeFunJwVKms#p?_C@31XXK)!FP&&i;Z=?ijW}>*Xbszfc<0eWGkHQ-4}Zk(Nt3 zPAq;7z!@2ReSHwO9n4lP<-QFL#iAtx;b0DunUmNKKRFCddQN1$u z4uzwP7Qc8Y1s*;fv||<*-WJ&8ailR=$)CJeayq^VP2oo>W8E!0M$eNLklh=1Pe>iS zq9A*MDBEfdII&7lHNcA(BLDK|?O{5Wh9FQy#3}mLG==IRzP4!V!C#SXe&lLxaB zyeH8ZA-7(CT2+AG_IQdt=Ur>lU=$UCQWpvcj|y?OP-jpPkKUz5e;d97;R1(LZc_y9 zph-$pkO%Dm5#EGEu~tY)Ap?WBfUswQa7ZsF`c5&H{CZTKD+z=eZ83a#VqEV(m_bcO z@MB@-;15}&{UO1uykHruI&}Q#EPE!5DWgQTjIl23xU&*E$RI`MRYy??Nt{9vl!L#B zg+7Hb84HC*IXh{w9+Qg7NaS3AdyXj4LK$a!B5_BP3u8zt3XkiItk#)>0FHJKX60DC z9xfFH1{cR4CIBPERa)Ygu`-0fH2k}OZ#sPuW#Ib$MKP#ugm07^7=_iejVZOT?*gO| z6uxAG*xyT|Js!pi1APXgMqP`H2X@u~@guP~ge1%-EZlQS;Nj0ggTTN75oY1PC%&=6 z0-%&pW|2|e2FlM#;@1ysV;^c?Cj?0B+^C~GGDKo6usVjg0~4W^azkCiZV&pn8s-}+ zxi3ZUx|vcb(7uJHy#g|1KchnU>;Lz;Uu*{0RJqXM{)wgev`~;P5`u$oIPE|sHpDDB z%)5Yhl&p2eFAmDSuK>mVJmu|kSiTd&aS;_UPHq$vovipv*nm$J<1czjjNzm} zQP>%0Pwam~KbRnyLKAWmd;D0)UxZRjwV#nmsewr!JRPF$*HZeHOqCW(qpxbwP)~ns zyk5KX4e`-zvt7|2hb)oe41u+^(;tLLjpjG-+Q#eX5%R=dmDsnErs=#3~Dm=-@Y9%c%A-Sh3Us$a&M4Ukobz3sr9 zBj$GuA@iJhUj(f-N|6u@U<_D4Dz!Kq>A(B0`{iksh=8d=YvKW57Kls*Y#Hu<@&`>0 zviA3GcU^A?2|c-lY>K)pD`~V3XhO*ed;_*1mQvc(*!UN4m;sk@Utb??IFM{^05oRb z94#e#xfzrraL_{H!L%!g6*4jpf;TkT85>m7uXidbN!n6sx49VPxBO3m+;SJjw38!1 z4N19eXqN(vr)k-K7~n}FPnXlc69-Vmbb|Bw)?mpGWIX2OR;L3%e!bLW^Wu%mg~^2O zy~m}2rS$7Uc#kC7g{;!~{{Qm@Hjvs%_2Nni|`jRRY?o(6_vz&ZItK6~c zoKwlfTtlnD#JK&!$Er(cn`c;?!`~_cY$_^w-dCqH!20cR)BunjfE)oz_=-E|{}G?P zAWHw@Gi(W$M3Wd%Ed*Dz8F?9sbqWBTzri?h8K>4Vza z+sW;!A0Iu7`A)qt5lA7)bKL>3rBiSvbvTwYr$R1gdv`7lqdSlG{gmsJ%CiCCAHJmF zAGME?s}WZK?1-(-Z18SmgVh42Po7y2$vkigDyu=*400Q;V}A6Oa!5r4kR#B2iv$kd z2z6?a6=xlnmr^Nj&navRy(le#0notBd$SC$<{CSobSmlON2@;+siQOIr+&H$VlZBu zcbA(x%|Y@#lK&$dJO<%lC!qcQ-R6?K_NYyTxGPImm3_(9O@Q)t>sG%|QU|z1yTDKB z%@q`QD3-YBQWavbkt|L0QqXGpq6Y$S68Bwl#tU~afB=w>CMl;s0`%bkQmFs`fEXM4;&C1Lb zq)#g2&9S&=gh=c;MCz-{PZ6_zKKAw!3%Dz?=P*oVz~mlIg9Ow9YvTq^|&s7a;IXZo0@ zslMSdU!vg!&r3K`I5*&v?sZGOMzT;xq?<={5FqiaTVw2@$9k`3@ww4l6{?~#-*x!< z!Dth8xW?4OCP~86pwrZ0Ds(xeGi#x))`s7VRs}<rPZ1dzHTQ@LRVyBr#`!3h#jj|-Zx&5T%EvkqO!Y`-SYPps*eZ#^Gda^O zbVws9wsoHg6T(30XJQ%eP{!Ik%b$~^5YKJc-pEfNB;b4a)|=4Yd81t z7X$_&pm!elzNzF(RqD1(WgqxZJqOwVx4s@ zj#~aOz}1x}V3al}-M-m=_I8)g|HiZ}inw~d6la_KQ1EjxN?XXVak!zQ9B&uhGVWlZz0OmBV{G67 z9mCt=OXI=4JYDcAXyHxi5hn*gA`ysjL3s8thibQ(##*+jo{`42nHuh zRpt!2)Lf^pnPb6E%K`IrDm2ku!AyFszrIs-pVU{Uburi4G8vnE&dy>))JtY49K=Bn zyaj5QF}!1HdKzR#>zxiXGQ8QX7dhF)XhsT={OC@E2=EwqIpCz`#jZZ_VA&H)CVxw# z9ophwc6+?F3l%|rm|^+Zr4%YkgkznZm2Z{rQD^BBKG_X&awey1KT;5f2M68uON&9M zWxRd8UtXQ%ww+q#fE)L2Xww&c@W_~=vF&5A1kQ2k?PaUtpHzHuu-1nCtjE^5EW5V* z0kT&m%Xyb?=v{0&0Bw$gU;O-t;MpacPA{O@6A_XU6B~dnW7eSOgx28ugCf^;eygXO zc=uH-DEC-QgPZa`?k#|9l$|jI)VJefhD-^+lU66t%ymF}fT8>Ta4cYQ5R9ERG&Q00 zEr8>%I>~+~5@QuuaPgmR*pav2P#yZYnU!dw{%0~N*fpzuelsS4d=@wUsCg87dANWq z#DTLBgt>upp2I{HzgAgof5YZVkjnyckMi)i_wI$M{<3-FXYWK@j+gfv%O@1Z5kkss zZrGsuF{<;h;c3W_*jgc3f9Z1i?Q=hU06;9j9qMXptJ;oRXUaA6=3teB(vumr146YX zRar&YV0z)4!(u~Q0swfd)qGgm!aXCIz3P6D@@IHX%Q7iEtWo%8S>aHxyrmUTber&K zMSno?_|)6`UuW2|(7;Jx0uQg6fyHNp^$TZ&hARt8w!W6uH?Ifh$^-yFa4Eb5>*I|~ELNM(Uz0Cs_76{n5!P*Uw(fAhy1zMz77{D?W|ZrV zUc!H|+TyUI^?9Q?AXw<0{Ogb#(n5ip2`oJ!;yi6mJ+y%R*3S?O$}F#IZ7T(nQckb? zOGGRhUAsPMW1WOijgv!g>6MUeAlK>YHItm0lYz4vyqp5%d>yt2pS^zy5$1Wr7KwaQg0Sv%vhP{vx885eWO%Nt)uwd@kxhmTKm!u-z zgZy4z+S)2a9df`+mS~a$UyF#>mP|l^(5zvt!Tyh&f)@Z2ko8|R&Nta){|T`BU2i_U zpCz|;9n{-#esPh1{YY^dy3~0J=>qK~tDw#H{)FV>r|AVTa$YbRX)INxk40FeN7Q}3 z>11mO4KB9BMJPnt^zAH8WTi4J^)NU3>R~f_fK5B;jX2RFC=3z4U4u6MRiRy1s#cuF zw*kXW1Ra@6e%*yhf&0+Q#Lc7j(T}*-70M32npc>6Bq|M&XV^t|7+U$GbVFC*CqHs> zvO@Tx0n_iZ=oAW(toOhyF=>7K2zE$*!KXjvfRx*mAN3=~dp^+v;|jb_YYr|V1nUps zvD>Po065MFeQ{pXl5j^1d+Ye%fuB|^miE?ipI2xCeB1O`VXBN}N@`Cf5u&=z zO)+C+KoJndBP;fiou%-chs5BgonM>xt!{!hfpO}uw5ljT5Q@zfQ1z+#AmRO#hRh8e zzQ2-C)c}cwec|Kr671K6!Wdsm;Eq&HvaNR_@*XA7A&l+OWq3s&)C8vmBvyL~z*?5U;B=6B7`u`HwQ4{`)w@pV^?=KFSV$+9t4j2QD)^WsQQ$H2oO(`z)SDNt z1*p5juj33M@@<8$-aQ>GxvdY@Q=wBq@_!>U7ocPC4Vv-eB=6u#QQ^&YFL}N?61T7} z!+D8{-w<%61gT4oeodtRslqGbULt&Tf)GH7-OMbNdBowR_0E=)G5X$~Vk$bdl=T>-gRI zjb3z_;x8k?E(MX-j4M^NXU0ap2sN5=~AA9ko2@*>nvHf8#0isjX`D$h&r&TLDA|e73 z8ZO^~`||{uLh<^2nq`&2cTdHN>c;;ToRq>qi~eGzDI|jwdMP3x@xEPW)@$XonrG$U zZ~z5jpBFbmmHwxZV+rcM^d+{SFAu*;0ci|Z7ZAw_*nDrPXn(AkmoZ{!Pfk{5VPt%X zny5uIH8i&2S?^^;O4yY#W8R96>dBJLfC`Kk*ceUt_C;NNg-81`T{H^PXqIvUz_JFI zGwC+l0!$F-G3(XuPoM^jfu6#|0L#P>juO8g?%{^ic*aP#TCa`!nwI}ViUrr-C-Q5= zd;MA3X71WLf#PDG+E$ZhDf^mF4sRL3_D+O|y)R{E1UmC;pke!^zf&S3L1jW#iOnDM<~sdmTwdOhS2bGdvSgkxoVFibQC^F9dyZ!bjSHLz;zO(;kD*{0C9&B? zqV<@iqkm4Y{Z!#{vMlV?O5VK!uvX$ZWf39DqQf%YM9mEXvFkiC+qaA=RH|06lOc*<|k4?knTwe7Ewc1BK`wYK5d-> zC*E_fFd1a{n^>;z0aP){{O0}xn&7GLYbgJTBE|#DS+?%Zi26pMP*wi&sbu znfRh_W{zHfk-YW|c6+%-^@$wW{8V`{s*Timb4!d=i2A@2Wa@2lgYP};T^SYQ!U zx};mWl?1$o1Qd}5L6Hy#>fxO4ocG`NU4MY= zGS|-BGjq=!*FBpQ>>VP?c@5;``Jf*>j9tAXpJ<{iNWB^Va33lTjt}Ng>AENC!&168 zY-5D#3P&x;;JZ2qnva!(lpOxaqYd$SUU|LjmqHx+D|iHzJi9TwHf#nTb-5l>BO)^0 z%X9VSLW7=%SPkD|2PCE9JtUY=*zFCK$@IcYU%pFA(U=5f5(e5E6{tXx)}(+^6}5Gk z1Bm0@`WA^3dPn}&*ewL{F{RBXNqe?1OJ&40ppjq9(^1|JoEVg5C*dfY!aZfY7fL)p zB}ah=jt|Yyl>JX4Jjo0so86D-?MBL;#C#7yMziC2)gn(d3h9l5N@C_9;fQf02p&cI zR1(J353{p!#=OkI!M_*5nch+47rQ)~oTS4T8A04A znpH)ZK)Qq%kJKXsIU}ZfojrhrLcxJ>f0u5PDkH^TsS##RUp_rCXsVa2!?L++ZZ431 z;1j;X|YQQb* zk0BhCXfU84=}4Ze6CFIa@O8ITEg`G9r!sk4c`qnmfP=AMt1=0O6`z(6gbB_@arST; zoN5D{`;TF(g_7N0O6^s9@Rh&|gSEbOy6CIv#VXpeJhc3VM*erz+l#>*MyI~g2=@W6 zNJiM_HwL)($}0`SHF9%or|c5L2R+%3k};G0ZgR`{kF3(DkeiyF^LrDZt3ZM^64x6_+fUgk;vT-R#fVu;C)eku>pVWm#Pff~}eWxBFYjM#1}vGV<_zwATh)LXch6(A318vPx*Z zp~5FinEry{`XEE_L^5(xPz7|y&6tp-^F=g>*W$bCKS)hou|>H2SsDCY+zW<#gAByl zm!cFAv0+P#tu&&hiFjzE7&-*P1b9`X>Ym(4sS&S$5H+cCvXS1tel4q_G7UV70<;Q1 zYDs@r>+S6=F=x2Hzh8zR4hywRU`LBhAyuNJNkc=kmj7Md-t>}s&rU?>_k>JAa!GKG zw@@WJ&W|DxoQ_ySE{l)ubXn=?>t`k>-yM6px@sT)N{Wv!eDEFC^1H`K4H{Z}AZ_3e zA}GWD83}q@7+MlSAdC-O=9C9{h2J=1?k_L5d>Ja5NTQ9_OitcP2y#bsP1i@xyR9A- z%I;GJ$pC&E;4$rGOdl13sMO)^x)rXeykndrCRq@@poGjy{YxT_h!}DAh`1-Y(m`sG zp4pTYP2b05$o<^`eVD+&eK3_p53tVxkDcDce?GS5rAwUZ`? z9f2hR^DDTOHM@>FZD1EhlqW&=Ukiwq?aTr)yLa6KJIka8+Z#?COMq|m17sJd4R%ux zc0Fq221Ia{XvKn9UI9>%P=OsLQrz=KV}|h2*$X{lz3PC*rAY-gVbTr3*Wd#@zY2lm zFR(q_5=;B;8-91g&w6~E>rD@^bwC0ttn(>e_fj)wOk21MfjfxylAE>xQbgv;b-#iK z8yJslI}ZV#xHQn^G$`e~(me0kM>@sXs~)V`mzfg-60e3(80rk#z`G~XjewC(0nbc{ zo4%_DpAdwR*UC;7jg0xMU0`I^lo=~J&Y+mlPk>>Ts6jPK4x9q*VgQxI^GGdP!^~YI zj)b_Yz$MCr01ZjcGE_#(_Ka5@E{2MqrE45FXmb0GVu6-RRC(u(n|_z#OFjTfO%Qmq z_FJf!byxx_uKM;@?Abn5-z67zc@E+8^DD*zL$rofq2d#R>#d{+*9pY))M-Hij}hKQ zo$I+o>k;>zqY2nN2}-fs-AUDvgKaC?y9$G)pBtMeFOoPb){z%lNhFt{aJe5JNnMRLRtOA?~|7&XPxX|1e zYq7&2V3eRZd`l;#gR?C4F;C6$p7M->a z7u_=agoUJ8%@kL+jlu)fFW&EW7!AlFtP(r_CboQVOR)bx&EpYVVNu` z=-DJzP^s$Cx7XD1IcYb;m>C#~Z`UjYBcb++%ue(RZY%B#l$3xd3<{*}<=nV8n6eXB z>W+sNguWYbfXyYOw{S*m{|>iWv{r+)wmOXhk3I6Ft#bnX9?yHUArqLJn?DTv4!DXS zX|~>S+AOe08mBoKK?P4K15%$dTJc&d;NNa`;;2med?_z91=I?l2UC+4ZV0=Vcs^WP z1RvD*l+J?^0lP{?G!E$yBr$ZT2bqL~NKHIaci^J}_I4!*=>Zdd;@0(({{Lo=?(7$5 zM;)pbJ5kHaBAQSIEpRt`NSR%0^t2(I!)X3+xB6DQE0$m6ZB{=mb#+%zZUEv>$AKq{ zfn@^pfbtD^ETD>HI~fQRch~9sAi3|_A;ibWN0{2z-+#qWEL9fHPEDi&4*vZiB)iFa z4&^r4J>uiS`0n@QBeRhf)gRcLR*ZgJ0`zF>dfKSnK^qFn_ z<$81RSj+)ft8fC{IMU1I!i=7IL!HK$7G}G)(zXavK3C|})9V_Atp$S#aO$NfA{I^S zh)9TZQ4joAOcb!AOsc3n&Ir3OU?D{33_jt6!-WQe4e|TplBEa%`c?W(QzJ-K>(cjb4e%pGq8T!$}!XjkL z@+SN}bC`h2z07>4<1#f!Z<`U;n9&FHb$#2IrRGk^pn{JjC>y-nQ9cuuQw#|{K7Psr z5lccIV(Bwqv74Sm0axsSH|U|YBG`-`^ZxL11}wH%oLYhk7xna&Tn%AhKwxjc0}k2l zQos3sAG7LydK>S-*g4-$@_X-HcxT;d!6+I&W7Z9}oaXH8xK#61HF>G1oRSK+p{15Y zEAPXnq*Vn7DBwL_%AJ6dRp3Dpb;)*E$HEDfV%}VEs~yhYCVhyKD)6POCKZIBt>Z#M z5F6)-v#lw3q<~tZKo9}yZ(r_~^(R6ucmz@AN|?BXB@hHLoC`hbg$wxALC{<<7UrIj zqO~&#aR3R$#>x!B*Azt$5Js-O|H>JG`hVC+bv*cZqvPr$evcARV+M{{=B|rWlepQ@ zP04|Qf#)s{P@acMBsyCMuV7!1Dh@+w@e`5K91m_20q#2|3rimrXgQYxrm8{wv zTzve1ok1D4=Dj&&0{wR_fv14TcKU`sJ5&VyA#pU|&I9QtD2uA0K69FGo<3UOsj(WU z&?r(Cq;_wdY2f7K?7ceM0dgigz?D;2ltqBg7vNvhfx5DnfD#Pq3BE_OHWXvW0lTfY zSMiI)HZeJhVl3Rq0;@-sJ^^1ul;9zE;L$vJkz$&99k1dY|Gn7=Pz&b$`Tng>0f!Eu zPJRc=MIcl^4T8I21cs4^r(ar5?0~0DXuv%b0BX%57h1EIqO^LxPTiUQI+%xg;0&tc z9F2YU)l>})KY><9ulZ&KJ08pO)&?ziJciZ5_HJe7^Kws7OUHNti>%?!I$5TsvvFHX zGYFnK3^!tW??oFO4WON4+rGEhh6^FX2ehZBJRbKm^>z}47 z3`aXWU3fre2}`L0ZuXhRp+GDCgoK`i{Pw&c>|JCKiV%COwYui{%dNoGL=I0!KJQc{ z4W>A1czwLl@yab7A#IQb0%dfIz^NAUTz&XTv9c+sS`FV+r;1b#yfjj-NB@4S`SUlh z?7=m?pejelQW%in<3&z*n;DoxMKP`@4guXoD&V^EXkNR9|hLxJ9;$PSTK7) zYD5;ghUg3I;pph-It}Qc9uA!%d*BDafbp6MM&&Ni1c4zkPIGu}UNst@S;tsr3q;Z5 zh`3Pb?9QtKVnyu|@y4|b(dt@n21FutQ%wOfibNYIZbGZ}(EcuFjb+{&z-~eLApkH? z-acUKW~>fFC&(pg+U^(V%LN5E10ttK0mla>zP^(qAlol;ysXkD`#CoSSlXEZ2g{>1 zgiKH@dv8F@CgpW*l@dmvC*pOUK?h}2Add^-?7wXNd#J1jq5@$tP5~qvuwQV$VwHHc z&hfE0ejTs#3U7S^)8Q)3!v`u_N6{4&-Xrgl;qgc>4yrQIT zSdWkchNwYp(C9|<_ToUqSs6k(@x$L1oxhYnpNqKLN3v1t@`m;S$odW0G~opVinCND zlc#H}3Is2H0E~)CP%{M%)MrswMA0Scvq~)XfXR{xbabI6GBUCbrNfJ0z#TaNYgF=7 z@kW}Gx9T;p0<8?^(QvQw>Imi(77xoIf6%dsv*Otf2f9b;5&v+b^l&$J6F4(V$_4=` zxib4?r-(_rtbXRueD{2{Q5Q#b4@JKNS>BuuArR(E0bILMK8G*L^sqKR;_-z_ocGM5tKXa zKbH1I=qPs_t)c|pVG7B8;B5(mdaVkd)A!g2!ZbYlc@i-OOf0eNU9hX=_0oi0ZSDJ@ zNE5o2nt@086O(yLAxuA5f_L{ZsO6lWi z;X)+i!fTH82-_xwHi+-U6;Q*Qf(F&f584qI_Y#7^8YbE1DBgXDojihi&$*3P1;Sd`&vGmi>9g}Aq;bixpoE~E)F;*2My0Ff0l(g~3#;ZGA*fLc+E_|n&c#9zWe<+v z;NfL&)UMFy19i26YIC=Bs9AzYkLItMx8CS?d)H6Gn@Os&YfW`JFLE;9b<_1Rl_TTatEh7S+K*``+mKukmworRR}W_0HVpLPOM?$#^7 zL}Y6r*<>FO+ST_f5EHyQH_gs(6#$zAMrAl(Yr!~rEFD=U$C}fnaA};6!C4wJ!LUXG zx>AtGW{H{78>2|Q>M{Nv)}qp@z*LH5Nus939vVqR(8wMenn&dvOivSA3-Kdi482Rv z%rrwh>#(Q6=0=f#H{h0vPv|c;pf12N7pN|`Hy;u2*?UmXYVNflI`MgyVA)K4$k!yF z7?HYbl<_(R3+_F8EC!TD0_`Z{G%Qy3D|XGjM;xW0wCKRR6IBRhKgd4%jXv;XI~y6F zM7&^N*h5XRuwj6)*JSuTQG!Gmjqa} zD&mAkov>^D-#c!QsBdK>1xV@~1<8m)-DfCO!q^W&bty(L6ec$^ql{|5`O0*^mj`yqZ{#oN7Q7|tWi1N zpnTs#l$XkS*PhB3BWKai zX>nC@d~(Fc3c{c4{n*`qqXyK-z-`$0pj0>GlUd___(?B{*Z{(QL$B0RlHrDqYz`tY z6SZMyN98Zq3jEJ2l@o$efxTJFeJ$*~3%^$>!VTRdVCbe==DnMx4$=sy>e~cW0!qX1 z>U!X+k{%{TsA$+qBId%Jxs1+|30|~Sz~VDHnj-*8k@pMSaszzDj=>H?E(;%Q7#nch zH^x)bZDeNvC;jVFi?_}J3vj;BEQB=EuH-Q`ZWMxkK;LT4I*h$)q`SvdgC&B>I>!MbUag+}^c${pH)K{V$(b zurFn~q+|8#TTgORv@-vOWr9O+LUNdnBu0lBjrkeltEy_bCK zeT5FYbrXZd1!JiWwuT*^+(pZmQ5^!UG0INJM@x+r9JhAucGfyx1+OzCR{M2e?cWb- zPekK>cU7Vn%8uLV*K?f=RkJN@K8*7%4~NwpLU)iPIsskirsmkddqoWEOIcVRCZcM4 z&~JvfjK&skGO~DC;0X~G^q~a7m=xILw~=2}tE!+ZYzd@2zr0`2Zcax%Fe-`lnwK_F zrP#DFO!5`>6VrmlwJ4coA=+e{Sazn}kbxD6>yJ?NVObwgYrd=KNK+jZ2p3-xt@bOs z_NBV4=jp**K`GA>)v%z{I*e*U_L#|6|2@-YDynA=0qjx@bAW#hG7{fijQb&9zrPdd zdUpcrxKZk7u{QsFUelzszqHcZQ|-ET)Ut5E%UniAoHuH(dFbh3`mfVWy&Di3Dn9d| zF15Xw93~gw(z?e@$9EbX*+6WY6^tq@l2L4Az?N#3dE&$8$xEiGW1*|{Y=Gs}#Pcst zY8rQpd~-jJ^%#hE7e}^Jlc-ytyJN>s!9l4G~Xcm_98uahk802>fQmmAAg3od>q+Jk6d>ApZWq0rP*rZkN zE&&8`dWr=3d9B*@P?|hKfIzH}AP}tI*Q%!tsL*nCaz0zBF&b^k9qbrQ)SFk;*50Mw zzg*%03!Q04u(|7-;$@(kA?boL)l9}S)u0sA$x=P-`@JT4*}QFgBh@dg^3tUc0$}JBJp{PW1_TKXXL zsy#ErL7kWqGc{vlU4WlJWv$essA$I693jBd=Dj;nxp7D?in=0NSxrT_Ty!2TWej7| z$$1i}PJtb}B_36qQXmvig(o8QfQ1XV{3BD1n zy0FDMgfgWhTq+!E2;rt8DqDFL=3!7iPeaMjwKT?n;z>AVYF~F0FDU*A*>R$NLR$-6 z?W21_ml|q~M<}I|E%xvuBPt(g7vD73yd%c^?g{xJYk2zYK@qO`K$9c|x@JkTkDv1* zDk~-TuU#~!j%05Ql);>afb#?k0z)u&GCP}2HZvzz3;0Pwkqbp&$bo=A`QQEUOH#4} zdMbSV?s3G7?;P*(sBIGNv%#TDcRlP^E$!=WM3b8NDjlvb(m0HRB=!5l6|tm81G^y)D`h4DAuXwu>r*Aw z-~$DCb2teyeU_OR-2ARcbh5A;O*9MA%%p`td?vbtJ7b3Zyj6Z@L$;waltPNmen>_ zlriIfL6ZeJR{q#hcm!0(pB2kibb|#2Sb!185D4avi_pZ``Ty47ANwv-v(0gi9kZ#j z_SE>ZA!6}c?br1s#h4LKrKsEo90e)0Nd=x>*8BeD&YY-hgWB-T2aRdoK4e0^;h&oE zqexo$ZFZQ6FNb3xJR+w2=7{f*{AyT9GILSa*@C+_m%4r_)&Erb<4B5e6I4c3{N?Qw zUnAxD(E>i)$;(w`)k#Ep&hd|@UeUcZzqCs?MUdk!NOpyuqtWLbVyD&^OuBAvXQqXb zQBn8XVpfvbJ)aml)s!e-=Yzo9-DN+0WB(YV+lnbgky&?XAG%s?h1}3CIwoJ zwLy+&Nl$L{n`dQ)eq6XZT9-IJRIeo2(rOY&sT=xz%x}H%y7i<+uASq4{}$(+Ri7gq zz6TE-Q*eWn2KPdj*ycAQ9(@dAEx^oC(Ner)+MwHapIKrSW0~&rrG+J;xAm^peY9c^ zT1$7U7T!~B&lI7`HdS}NY1yNq!}4CZ!dPTA7_9fEF5Xs`6j~;~$f8I1m2`|BliM)~ z;n@oZ=E|{nQPB<7>Jk2_FA;Sz+3&7=Pn7gQUc-@zIgE#iZD6^4oIQx_##o=+B@7t4 zDY&CLVes57;-F;8@a}^C8!M*i(nQo3Z~aA(jFvu;9ok6?Lhzgg$Ia;S@p8ePWcOq@bi`dbE;OYO)NKOAt17VyYT+wC=fbLqP_&0`vBwIQTSse!T+Zx;pFIU z;plFp>FsRcW&q!4stP{?Sns(%MgjnHU~{7X)OrVOWFuD#dp9;#`1fBMc9mK9eg};t zubkh__eWa>)N_XgZa9E9H*vRda=hrR@e{6Ok8(mF`6^Hd@N<56sDj)4;4Y7wy^Xnr z+xZWUiy-E`N|@znA&`_k#9tuvg#HY{c@YF7m+&YK5P%jG0wMpQ4-m*l({qS(A^bX; z3xz_m1Bf~R%n8Vj;wLaRi=X;)j(Ik|vw>ZRhEpjcrVh~jLDzmpE4KVAw6if@h=xo5 z8r=^}jV=&5%UnB23{}bfD7U&VJ-O&V?8V|5rsD9`%*jp|aAfHXod2cx`lz~mI z5hWfNe>V{8&oUtP`~mMjBK<)IKM3~1$(komlh*;$ehcibpC`-v&JW1{k?xN@y=ak( zKoVxd;rkhE@t@%^{r(>A!acM8VtY6O^bQU9dq2Ys2mU=A*G0X%?m5iT4|>=0r@>}~ z{yiM`MQ}W?Gwwlvol!~nYwwC9{~qqfMQ~YdiedDicl97N^YdUAWB(rR=0$M36tXny zKyTWDHS;r^QsUpk@mvIVEF`kD0jA;(_OEchDSr>gdl8%ny48nq0C(?Ca3yJf565>A z+-UViWFe4UDp+?vZ@7-EzlY;szXqk^74O3G&$j{{JGg z{B&##J^3fd=WF?2LXVgHBj~?A?6T~P@u4UXhy(Gj2HI@tIkdaAg@Xm#+4mogH~3A; z@CPMC5q7*G$OeJHzu?y||M4JrdXDhJWy~D#%b3rN#6J*jN=%=w8i3CS0$(6__`@S? z76SQiB*NX=!@<WbS5-g;iv|L@4*r<}JEQDN%+IpZi0ko`4%+FO7jP)J#fl@$y1AU;rg)XQU zP!Y>h5f>LCoW6zLvp!Ef|Do8kLe%`0b{4h(I%+XvJsVX3{gV`c@mWiC!0+NuCI2Q4 z8WgBUkoc1rwT!-vr5zB|KS*amM_XZ8TWwo?(6O*ABY^R_hOjIXfa&Q7G!7w43tLd3 z5G~->D1_($zeX-Z51@UTMHy-#1_14|M$|%#0NUpfQ428v=>EFK3}E={hy}p#s{Kz&Hj(*QuPrzWN7nE;^I^C1Hr0QCB$jTF#QS61Ivl^TR6wVb}A zEw!jQ2r#~1AN;>QM4zUFn%Ba@(iSvV49|K47@qY4Fgz=-4gle!XROU<>8J{7nFcfx z8X77Z1^^2KD-|mfsF)u#Y|xb102rQ?5C&S>S%Fl1HkIcSYFTPIptgmL)zj4KI#Kh> zQVZzY8|&)J2=jq@m($j<0af{BG{2_b20-&WQcn&4jg*kF8E6iuh0H)m2giL9 z>suJw8Ubh-8CmF`#B6MV`r77*aE{3vZi|*U%oW=%o_DXBuU7YrwB9qfe)rPlsmN}u zjMp-x2is~NMtUn+!x+w8LctDeaQtwnDo@b(eUYH?)zs}<2rf4DyxF^QB?@=c;1P+V z_ZG`)8k`Jy+)ZW==ku0xxr?>t1I1d!+Qojv-<7@}vM(+yyWS2hKTf({UnUtoo-$80 z-QOKD7dy@888e#teI#@~&$Dd3(Oh<)Nan{Ks%&K%N#nj<-Ao%vyzCC%`f)%=Q0w1r z*vsm^S5hA^^LhFp$E@m|OHSeyGVkFR3xWAVR<49*Z{nUy8NWPoo@sj>>IR;A{Lgdy zJf6gks*cK!0#5ZCBH>@>#I|JFy=TvRT0ho=8>0Y$lHt?3)*}vqN#PBKnlGrVe9!tV65$1!#!wZ^e44~7y*3PNR zuS24}dn&_aBYF+xH%^_t*_UqcX$az`DFy{(TvkHcHfaxWC!J`(n{-f+IYVM4MY zFJW6FDqpC9Ya;s5y3FstEHgpEBmr+PuRtc3=kF37!806L-$0{f;bk7TS9wDl zE`-O!E@FR4H{=&^g$Z<{Mj(Nz=Dbez=sk?^g1x%uX&)51sl=&uaBS_^mYcuVuuR2-wam{1PpA= z9z3{v+^ilh9-hjU-=5fmH5PVJVOm^LzW1perR8zMgjgJ0J@IIEbDY86U(6eLH4~SH zX0`9U;7oR2LO3gPoKW0$f3`TZ*?B74Vj9}GxV*Huz(JwKjHP3^h?`Dde{7Q8>H=6v zYjL?m$O&EMJpmpz#0zcs#$b_WZTdd09`!$55X$-KX1^oR|Xx}o9E>_;3ImQ;!2_1!JLqS4TgsL=z} zhmH@AU*QNj?+`Zxf3#_l*A7w>?yBO%lU4BNX#j*ybF6UUT4g_A-C{<2DRx7noG{ z@9Z>RVw;;GVO%3bvSfJ1nKUjheQD!Htd%Y=SwrKRAxc11R>-Ja&gYPoO(GQA%rf>1 z(OOJllt@V;MHecH)NZz_AqolJ7Feg`0e6$G;63z?wfd-d-$RuMWh&SywT~O;mHFew zV-@>2vnzidO#g-eurmkWhfqn!Kzq8EN~UqQ&1yIC7Dd> zjS4uO_j($8T+sH;@+p!0D9yVlEeB*lO%Vcg&S+UWoK$t`h!xo%`Z2}(#wBBQ7a1*O zZ~RSh!r;j3=P82VgDp>$t;3+^l0*8*ov={f(Vudo!IO7LNs`T_2g1Qay7Fr(jp9BY zz?0jF(7t_FRsO|PfiJ6f{K_xponm=F1s}0^Yp=dv+;y_6vbrQr3@S?*S-@I9lT~!^ zZ6Gqi`y_Nk#21s;rpZGA%xH_G>x-}mU2QBT?UIrjaP3q^LHlYVm>N`O&YU`R@&0qb zDgpX@YIZ~)<#;}(LUq_JX_W0nIOIAojPnwaQjeHlpnq6uB+}e>@`l=pF`w+( zGv~_HHl7}fu?K43<@50Oab^hSbaN`_^Oy%|iNt_|InxTi6!je4cao%kVEkw-5bAso z?@SH3a#9M0H1qwcboZPt9xcPzTE<%(7qONT#^FRI@~z$m3ox;Sh;X{<2WE^?fIs%j z*|+9uIshw9(sF|b#*8j4B5YX_THtJ57me_)=7iyJ$3p`(EsYaHOIKgU`Wt?sP<%xl zPEy*$VCkR;E;8D=8tEV}KptUiPb*QhlG4be%j;1CBRQRG{5(QvB3d%qv(S$r*jaDZ z_k#&baBv32eb?T^$ss~8?nh3WeZ!tC`Feu!Hsf5%{=G+rpLddr+S!e>lbQXi+Fr?v zqji|93eDabdyC9UXera|D{Bhn0McodSZ zT~X7KVI>E5FD!L2DCaKVU^b7!LFF1W;^^HPAFXwUG`v+%QT3rra9o&D+t=y|Fm2va_rM=7MFzI|F#; z;G$LAE6oDNGN%Srpjy)ai`Ad*N~r`)C7k9zZnQHs$?=EHl8DqQA`15;h*ftUyV znC5fEz4f=fo94A`89Zp{785nK2)5z zw@wNX!`_A7-ld|yEMXpn31iPt$cH%8hb;GETqwH`$L568BqxZ|8>f`1Wr@n4=ug+BstNYyHaCli=RB%xIxH*Z@C#EZ|^qDb{fi6xgnfkwhDaOWeg)EAc|#E_NM7w zoN=nyH!t^DxQ(c(vbz?)FWAejNW(Q&(Y-qAm|b}HU2h(4qKebxwMxL!usWctQQT{) zJwyqo{dgOa3Wi|~=o=_TItNEbr5c9QU^aD(XgBVZjGz`uvkGMWFq8lCX?y020@|Jp zazz#5)YRjka-I@z!^LG=iwPFbEqp6Tl&+hU%`)NAm!9H~1ieQx3&HZTyRS6bc`Ph+ zNi(!lYiG(*9g1~QzbcleniuOL>1K1{{t$5ieC^ksIpDONE zn68}Dc2pVRsp{AJK~ZfJH%32A))J@wCG$d5EODAXM((Sj$~2>vepM6}X%cE%noLb9 z>p0U0$9PRDwG8tJvrMfgy#hg)Yo=A~aW(oWYk%~}CYEXPfJ3o0XN$-tM@we^?wpf? z%%&3jc?*@{eC*|@-Jwc`|LW1fBofuYgbsXK>B~oBSkXfkHRgp7Z5sYi#1NTJ))eNO z`*RT&*a5pz@N_J(m#21zZF5tDlQg10qBg;}oAEW^qQLsLp+~c;V_p5@sG1w}yD_G; zQmz#e@I=nGJ7m$81a>qZ_er`M5=$@+> z;_ZV~bm*ge>5TIH{v*0EUgv%oIpZ>zCo#-1`p~cw54F>JSUp-E5bc?SZH3;}RiU7D zE=XZ29JrC|yPUtLPkHDrhn0@168cig7A2^`BNdQ(p{g+9&32wvg@E6OzNYg!8q>g~ zj;^d$`Y}c&8SUd5?-E~O=tMuXdl?nW)R?~p9`cNTEdR-b?+R=RZ%|CdN7j=Yoz>kU zTe49twiEAex)c98r5ib%9J-Xt4?zR+ePWq?l$7VTZJ2RI;ffh*TAWj>oBerYG0$i9 za-8Ubd#CQcC5OQonK9oo3ykgx3&&g4 z$6{HtcK8l3rhx`Xq$Ux7PbG|Q<0Ob#$D!S^Nk$J^i>U*Qqr4jY=0a!Q*@q$;q2aM; zR$r`~Iw0eUI$p-%Zy40!ztsj4#*;a%5><4M#Q%h=;@{ZyDhxPOvQ#7x^!2lHJ9}-M(xiE*WNO!x= zj}Kc%V9Zn+$0f@Y7HIK9_IiGRFjgqKXxQa({a)Jhz|JES5}ECcgObqhqxHqO^*n>C zRQm}uhx9>vq`Eq1;z~X?G%**l+@it;j&dfR^W@SR>m$w^)$q~bF4t%ojoebav=QyHKG0y|}Yf%7omx(c= zrWcvwCSOn0h*Y*R4lYfn*HO-~b0%?H*4p^y#yNyBd_wz_g+iL9Q8lAOlC~?it5BNl zF_FtQWOH(nakaRlVF6iDTmj&Nd1NmJiOIlxtyPjl8PkLyUHt?KjD2C0L}Fk9`32lb z+GH|0+GGmBc{u1|C1MfongJ6Y?n^*;5-lL`w0Zkd&KREQ! zSB!5~y_rme>p1X&iY$-udZk@nsWorHtCepieGxauDm67~H(phj0vHJ(D$@vCy#aui zxLP5MxNb+Wc<;SJ`+x$IC?If+DQYNRz43tE(v#&J+MYrgUBz2af4Xf(KY&?dKg^{% zEmg>0>C=*_FakY3V|kfo(dyo%|0Q6iu_(;p`f%X(RodzCmGJe=#o5D-=s9Yu#*$n8 zX{&2(@oIo-pC9^rSu!!|vp1 z3#Rp%kj;$Z(fC88X|2(UVsV9H@7bZPo7ufeDYDp+0|OL~kU;7)5D;9INuTbjch96^ zQS;`H2tqdNmJ{WZH+1?`w&AicyVVzZr-o5Ys6Q9b4Wp{FO82C^0OLbp+LbeQ0EeMT zBioi3jgdutU8A(;gZ#u4Ad4wQ?GPplJd2Qy23#fnvKQ|2mX1CzBBzn0i>Nz#;avGhc$Ap-$WR;kvy-!_uJxuuuDi zLAhdh8!S5GR(Grp@ShN&b>>iuRN@ILwTcUKob+A*j8A>hM(e~SR=DlZj&}%L63D1s$7woL*Tj1;U@$mz%8h&t55F zlut@TZn1x75jM}3a?4IT6}p`2eWl2d;zPPzWO{e$&uQ(!>i*Eg(LZNVD;NHX*16P} z9>PUvyZpTsgR2uSm}snSfkV=#JQ{GUC=9c=akkoW3+A<6bNw9A?iHl5+X##HSovz} zL17`b3gS$ShfGquK^0O8gBUgQJTb%28w@=rc&hE}HG3g?{n98&@UzTBV@jp=0&;JU zzJ` zGJa+JP)Q@zYUQoGG2C@vrOjyI)We&1Z095E(MmCAN?STR$-3TKP%ZSeb%AtfhMsk(qnJ<7lXEV*(m`Zp zdop7@kRl7o`Q*bvRwG}Kx!#XUOC{4vwjC1M9-tyHw{@W8v6L(J9lWG8M_;;tgpFbh zPLiehw2QD6jOEFK?jRX$dre7@$MM>8nkyxm23t6w=40U>opot35BK=_P#lBac`hi(nJSGlRjv~uQ4OC`}x z`#~)=ylcvO8O#qG2UJdg26EsT5|kfT=10M?M?V>78Evzqk&VsT3LAch3yPGW3<*Vz z25pr*>U1(Tkam#W%HaxR5ZmI4q80mZI%7Z|8ETj5OU%*aX@znnd>VehO^`F`{E5q$ zj&;$fp$uK(NwfvmKW1}LAGVZ3f?lMEx6zTcz^OgBNv;P{DsUjw$`2D`)jQkOcK~_3 zP6?e6QjG!^^p7b76P!vFld-JEzLtKKC^O?>lvHE5Szx*tgUgu8qtR{{&&;~puR1A3 zwEZ-8r7G5R`?i}AKCpOkP#p?iRxAOm)&MG^I_YcGTF@p%EZi}~v_;M6XDW3v#Q@WV zS4xd3>z^A*@bPm|6Pr`E;Uq%OWJsxrLTUYCrtS4ZL?yIEW3rV3lEvVlzwOw=`KHte zYNIj|#8Iu_67}{)dV}w8LDD0IvdjNh9R|^VVSaSmkg}U8rB;Cv_ z3*8zyR!NWW_o}si_jquW?0xxodhupnwYKr*&RN*q_LzHhdp=8_u&9OF9v1@*O&+0v z$Xxr)tifs3R|S_MnL35<5SQ_z-=j=t5fQ2)!BwOs!(f9r{lOr>5`0&wFWj8AbQH*q z0{e^WPQJ)516iY_10w9k>Dby{ManOmvqmWxGL(?#WLN9xewNqNWsM7|#sHXB8CJ4P zsP~fi%gMO#m>zudd-bC}pE0by9*6+f!=w&91*h*c_$fvfTP8hD_#@lF#*dt`T=nRF z185BB*F-)Be*JRG)Cq^ZD!emIGe2}Cskk*4S7u(#)V^ODpIljsQ&U)4)FMtjHY0r<-(o>CbPJF zdRWXx1+%eXD-zaQTX{0NUXc9m>+X4%fj!P`6LvJJO4TA15Ql0{Pj%YsUgJXF8Wuk6 zys+5wT}%@-EEvCH6Kg`k!$F;T6$$^zAqc7n>7xM7I&Nb zGt|fr=uE|B40S^aeSpa^&?;MmGD=yo4FOPQqeA%X!H;Ya;gp^Z@F}34n=Hb#TWRXPq0Il1$^WunTI{l}8MKPm@ zx2ir;VJYrFqEA)2#V%5pDf;~4T@6Xv&^vE-gT+v3A+B}p6Fi^9J@$4WC65YlTf#ox zV*JYUlV@)5eqx)w{}LmZF=RtA#j=Ou3`tER<{;zeYAZ#?s6EpT;E-N5%`0OTlg6bE z#$*V+KpXGMxKn-RV7kQyl#Isus82gTAJ&P{JWuwrRDn*9tLInOSEyeb?>4tO=K~ym zTznhzXsyqYk9T+_ygzJPwijHm)i3~s`y~$jQp=(&o^ytV3Hj)OkI=UVYx5*^5*4%Q zyIPhhA~fVD-C~Su`DM8n?2WSG>*!05lvhS4A|GtseXfXX$DvT{Z|6`0_z+uB%`O~JtW)S*WZ0> zUfpx?m$}+l{m!uy?7NC?X79W`@OZa*K6F1b5HKG$D(imTWM*4PogD#dwjoKaYqp9h zV3Cck=~HJRUH&sPF?#336D*2#j4~;6&(tKc!rRNjaONAF8=0L4Gp(iMW3IC zafSMaglj9&j`2N$SY-Rqw|UXh>D0Le(>@?m4i71cnk~MTm_bV3l95Rd zR>q@D8u~d=E(_QxBz+}cGz~}dWy?&DjXK1j0VO8Sh?0S8e(V!9i`olyW?S}D4J%p0 zVw?7(%3UpQK8MD4EDi4zMLs*_C3ip=NI9j5#Q~>~;?H-%x4;8>)%qKKzYAh)IV(>djy`oi|lnyK$l26&ZP@-7UzF0KxeVh6aIhWx99>nP4S6gC(9y1hPCP;9h%BIXI`XM4%WG`;K!Z}hv=Uv3) z@si#Jfme3BL+1*FFe>{rRc0X$_B&hupsoi}O#zwrB!oruq68+$qW2c!3Wkv)2OlDT zdPXUyhypP72X*#qu;w&M>Ct`Nj^Ob@t-UYn-xbFU(h!h2W{H_dV2HFp{uWy^vnvkw zP08?`s)OD2veN=JH2&LplBQS(cb}{< zlE~}ZUV68r2>mwBo;AVeTN=aKwjI*Oly%T{`w9X9Og?3bSYaT?a3Md+$a;z$6a}M8 z+@gW%(K~91kznKQ4n;fcE?;(1oAQ^DIw}gOdn5s*!m4P?058Z!c-%uI$dSuFn2hQ- zElI(GW9Ex7*CZKO4un(M$c8khku<)>;{80Wz?9yo9!LI}$SKE)K+w(RE*cX=ne%9M zB1gDJ1ema|zO~A=$SbRarWlTx4V5iPR`+v-T$yaSIyIrR52l4-JS;QJt!~FZ8Fz5h zh`rKu1c?_ox4@-mfGAqZaVE?=Bo?F6*c1#J<=Xbkt!&=Mqn-yxJFssOvot-eAnTxT zb1NE)veIZvwj;~erZNCP&J;*pMu$z+iHXz0O!n(ksxO=2Sxl~t=}4m{RYU!T-8C%B zRQW!rP!y(B2JFNdP2?1%i~(0>MNGh1T(WlzHCa4Rj;j^P6a}^ZOp@o=314xW>}K9B z7h@boY+>*a7~t*8WN5i(_xttR=v^}1B|-U#pYEP)ZGBG~8{)d!+Bgb8Q{+lPQ}k*N zE{e7*lma`;mxAFL&N~l>N5BP=?#pn}obixNS*2#F<#!~hXTTw}NI8Y#st0I*9}64# z?KwnoM5h{izN2Mj;FNqtrE}`2w$gru?+Jm{qqgSe(ru1KWL?nrbXQ>2ktz!uZ{uZm z1e=i#=-XD5Lw$c3=w5);O(7CtNMQ?%r0Oyvv}VN6s~?!!nP<@6<=4(}1P2felx?)a z(FCNK5rNa4O_}~WO@gq%=^+E_Y)+UMH1mo*njyT%ANSo9^URr~GT`aDSS5@WJ zQ-pt=oo=)sG03Yt$GdLcj3T1b9Z5z8p>GWTrM4R4=j1q3iGr7kanhOXW7$jnfo27v z_BlGK7AjtCRjO;30+E=)^nF1R)uS5c0+I+!1uAo&6JcNCex@yk=p1zk4V(xPsBY$` z6LX1&z<7c0J;~m{SuWzzP-(eQEZ!Ma+=j7l?~x-&ikOW_#Mo1s*xpI=T8>M_*09~+ zXCmug5J7F0HG|qo$=CWh&U+Orv3R-{F93xq!LKa0x>xq=XwGpnEKoFBeihVDilPQi zY<_wNDM^5N^V!}BUl4g5&Br&#B)O*GC=XFRuc=fxHGOLqG3$r}5d$AKO#T?!apl?&pTYfy>Tj39C*6?#xOOi8+?6m-02p3RJA8tN-JkMn) z4b_Y`9f(}j|!diTI;7Id(@Agh;n_2_ub z2NQ5?!9}gJbihLI5Z7$TLtT5YMrwQ^}ki0Zkx7n}1MRj8&wr803x z`=m$$5Br70&SIeH}s4c;djPt+u466rtijZ;e}po>#iB@9&U!!=qxOx%!FwvE2+8F#vE zq}bNfYLU!#0;6r{=cwHM&C2DkoG8?(6_>2gSk9+%T)p>dk5`kAR%F;2`y8ux3&_J$ zI_Iz5&daJ8=d9Tr{VW#y0^>r-}a_+uIz z{D}KWWx#6Ubh5!67p-Ql9qL#82Lf9(LA#6a;pMGWY5RwtZ+(w7S8>&#$BehRl@cc4 zT9r@GIB;}WD9cWqG|z*6Vggf|a!Z4=w)U=dD@&kfl;w~xE_>0wqHd3lkQ^AIbC;i} zhr2*kiK5*GtC351#j?F;?`c*}Mi0GGpHCJLZe|X=c&JlF{uHbjK%5j5V`_We7d%A; z^<{O+t8YuG>CC#}hu%9o`KALkJ%aTB6iOjcos!R}a^+ro=8|uwhSXJTjyTMv; zyizJww)2OdJujnVImdJoAYBwE&K&v59v&XaK{$ghvgBK-L)C(Hvxf)&>ehP=2)8LO zFby0?O5|a4OF)KvL3lbkcYN$yT6dbSz8AE<+#Zjmg5mRDDOZA!7tvGVfjDl2zE`zz ztqb*2VOxzoLkHByVbIj`-F9VreMYQxG)kX}*&Ul4G%Z5!3fIOI9TwX@w+%G0>8Xsr znpNalx?|D2oqODl(=Bi0Jzlz9&iL7!cRV9@hGe1fV)fR)(=d3${)P*nfunEUqS}^k zO^WVR6oXfn7n(@w@X>4Lrca1utF(^^t~ba>GNH^doa&K%37SwYh?l08HBH|W7PWy& zA0xdF!R?|rDY(|&SB=hNzy#Xq^2(EAWF=x7gX4^Xzfm!fLqzm!v>+rL{s`o*0V5B% z;ZH>zKEkO1E3^$LxlSS9mmzvMe0t*zPtYq7^qaLhmd%rRQ|~zeBlM0NPS9Z-12oN= zGyL96B|9?TUqlFBO+YJg%7)o=ZI&adfgJ=6Kw#H_uf@X?k~+_Os-e4mgnW9W;Qi~7 z!Yb%KA?Wrp*^^6I`#`;~p_b7vtlxEkha5MC5~)8qxV()8w0(NWG71s^wnXuBh%ro! zD<#*%dnI}v@9bE;bxLi=AEATXxM6k-QO4S)-SW6XKS#jTjm6U%q=Q5l-Mc58=OkiU2tKq%5QiuW z9Pq)YgSMq;5MIFBtN#!<)_CH)JbPqBb-M95djwgs1bRm-!cNhwVD1riba!h6p~n#u zSYIg6Z?IG(dKJ~$dXd-I!Uk6)pu8|3SR8lW!`?00XTEJ6eMjV9lFNG=+%t-_KOTRD zoFR#k#D)qLUxyqX51*!^ zxFyxx_V@ARdNx;+8qBJtU>T!R4>gY)0omO7qJaQyQzyY}_lLlf!keF-ryUEFxAm&a z-Ub=7Vq-Oi5=;9vOSXrQnt56(Nq*ls{A!&Pt8T+usn|Id_48vLt;dSrP zq>%E8(rDAnigYSxsfE>vofX()QcIEw@TBk8{PH_e#N>PF?RSJkA4FGuEMd_y8?SJsqmWrdU; z)sX=$tkA#5cQQ^}GxU|4Pl7Vx=slI?adL`ghSXI14>-7P66Z5rxk9!w4c?r{dl7SOFunyoWuTF?`?_<@;~woO=MG^P((mtAkk?BxW+Vd=Y5zj5bn zYSGMDK&GVja_F;+Bv%xzbRE6)m( zdQ)?Bbn_w9IV~6#3HDI#b?D?_Bedbfw)kF6PGefvcr#cN+x3ndpUh}Am?-R6@u$)3 z!&W!YZ}*&+gJ?hKvy^b;=ywn8ifto%hjQ(?ekm@5h}fzM+K*;xR*ICeLmy9O%6PA@ zk<=&5TOt3Z@Jd+3R91y=MoYc#LQAso&dAg#D-Fr44dv$8!wNyqsR8F}-2yJWu!aV6 zK9Z?Pp33YTpJ+=1k38}vR)9UMMssBcWGYF!YE9V`vbbPwb*`=T)y3Hnye$HAuZf@0 zo+2YrAe*1MYKdLnWw&fkS}od6cCaSJ;OYX}zDLDgmj^Fe+NOTh-fWJnw#lZDx2U!T zPUSnS5bzw7bQ?H)6)LpWT|G&IY=o(8_#K5@u^cGdK}r0t?B)5mHpTYs|f{EKY>S2)n&x;FwkY> z4QJWb&pQbyjb)qDsE;vmXx1RAegbPUf58JgZ{=(;FRTA98^2K3)4+`HBb#i}2_Ay+ z`;LjYC8G2a$~h=$97cbM_BZl8+MZzWG|*eBm{e(2qNz)#)s zf78n8Ug$8Q5S{<39%e^5Ahfd3@i3H zAwWKkd!1Owb)%4A`Db*5Y~q=N6}_5+VA|63xJM;y>P>LYY`C;YkXl^>R0U~TqpeP0 zidv#;jaKa>SE#k*nOd1Uwk8>GYJ3f6uOV*`z}fUbY2mm%3ld_J7>=7Ec{FU&-^1vy z+Z-uThftQyN7O}CY5MH$r>#zSfCz!6VtcRYe?h)yJswi**{E&BPmb|b&+J1F;_WAL zs8kizoJxL8n#y_?glXuSwW0BoTsCYSICdp1d|(O&LbVaQ9BXW zgK1S#V_rtZ(YH0=hzKK2wDM#U9^vsjBx*ur z3AEms(W5WlY9Nahhf~;vcZpFXk%=7eF!1UGWLm!=&nEkbP?#a4bE8KX1bUeBh8^J( zbveYCW6-8-3_*`7_AV|(IbVf$S4Dw$7m@%9fjtQ(aXDXMWY?%-1)q+e9#oLNEL0Fq zAd$#{6oZ7$ce#y;Ni@{3H26egzlvIsonxQ$$P4j|Y8ZypDuJMtj-@ zLYCM+KUP+g7Y04ca~k@(d&?erl?%`8qOoXdAK8e^Po~um{7qWe9kREj?@)`$c!A=^NHpm(hRRNt86(|1q zPF3aOhV6^xj<+pgEOjnl_hDJon`pS+bNAc^&T>_D;`_s*t%i++Ms@e1lrA?p+x^%- zyy$GqA{KVAeT&xO`e18g=W>3tvz?Pwm&vBo%zlzdoL-;%+AEho@AI7jTI!u1v5B=y z8Jj}0NM_VgpCMdIUKOotE?JAhj;XSeYW$Zc+Q+$tUEMlP8U)Zxn_8DDo{gxsZ{oS>q3OaKcBm< z(IW3vQes&;h+yyXFS_`=FFjd*IeDsh?IETbYN3CGb0_?Bq$&wNw$m|a&+1Yg&ZBo? zMI9ckpVfMi+S^$`sUCv_o=zJCRI%2z4h+Cn(LB~4oAvO$@%yqXC@nq+n#S` zJIjeEV5eJK5qs(Fl=W^@h4fnt8hos(QA|n(i&A5tk_yx4RK#8s=&sD?h~zGX`0ZYS zytIASf!D%9Dg#?LwAMQ)S|-x!c|JJWcZR13P**lKI(laHit8Kofc)mwD-T?FS^_)qKvwE#2L51IK$p&Au6 zC&b7DHhR*j{V_y>)Alf;QJGLRNdztOM4Pl3)!Rl0P&oa8rhk^vW%ZLd(q~*A@M@-A-#R zevAngIb+#eo$gIJNQSzv3oql`6NSAqw4NHPnfN4h#CX~564QkI#qNkJb~%>H7$pq0 zDFwYK->qo8ObVw9s>e~KcwAV`DH6RTo^yZ)Q`BU)MD{d3dE%OsPR)R3|Z|nXCY&;3%9%N5H*t zk8mB#`zrd$M>T+!IkPgUo48Q9M=?1H){2%aA`O#mWBm+xgXV;?_Er}8re3@9ATu|H zAy3B@4~vE%#!gr+%2cpBoX`D^_ua?I&yir&=z@KF!V?lwleWp3_4F+v!QV^`tSAlM zRQ%k_Cj%VWJsiTM>^gOO@=3U*MAp%wDQc?7Gx|8^1>xdI%NOP~Qu)8BCMZI5`tcA5JkEU|F6drMj6FEkakX+jI@Yjt zKGa^{pWRXEPJAV6D3kmk2hY!R9zuLKPkU=W*i5)({_&uY{>3^?=7|oqMhnl{@FhJ#S(8+JOV<8C1ZiTc|vEza~etY5k4_ROdithDo=i$G&^8o)nXpIIZ2}T zczSWHao6_FqHb??(U#4}no zS1?3qP$8>UWAVD^R38pQsWTy>f<=PXgLm3CK8#OYe_*3g6Tsv*Dy62wlwSJ3`2-YLoEj~(pmz?5SI(d(xSX3THK5~G~fyj*A!1OTB7wWot;H&nF(+&@J z?oRw&DOH5dr0T&=Of|E0m)B*-kwsrj7Wb*@4Iu_zIG}4BQe|&Yeu^-FJCqBEtAxXe zk>TZ$Q1EzreztYrqdiBls2U?eKWkorQ%{w<`sozM4Ku7cgFTm3Lq@Z@%;8J6T~#19 zGqWhKiF7EY*~_lkGwHWlSWVu4_Drvq~G@zNq4d5F)d8P3u7pm)TiNmXDmM11yS zxF7HssBr^x+GCf=?|kJHZ5sj(s&bItH;@ZWW5Z zQ8dLnpxFpYk?enVUUlh%1Rq;(!~com-kkL=oV2JIx)WXjwgb2PW4-a!xR#4*k{TH) z^x6=`xJiYGL{)?!T(c%)>r2ura`^W&vhAza-Dpz-?>MezV># z&r@vRsk^iKysMD44>iF^WYEx><@B(1sCe5`T?>{PQ!}R`o~u>egZ(9^F~ygpYN%pv z+?v>+I@w6KsQNaBU5OI%{hen)n(g}vv+B9B$x(lI@ul*wsb;yvR>?~A@<;gz?!DYI zC*;c7Du|OaJN^|&8g#guG_hH8=z5{XeclpWnZOEYELnuTq#L2*8UX4smgyN{z1Tc&)Xhmc43u$=7c@>c$Gr8-bmJ2X&m$I9>(O1bvV{Qkt7e9cO^YhH#?QdQ% zDvy*YRaqcc2gsLJCT0X|&diTQPjfCVQ{ZZlv^O@s(W8$YT*9FieZtgW>Hv+eU=rEOE2np;81OdJFrxfvyABl*Q#7z$uIc@ zb}9#pR6ZM)5`Z~c89ai_)f{g;>7TLu6{>2|>v)AbD4l8VC&seS<Z-+n$~oMwvgq;GmiIUNQx3DqVWQJHTp=VWI)|p zsJP@N=_v~Ps-E#9eF2q^Fj3U_@j8>>SoUhcun^QTYzN7KQt=-OE-H+5_FzLZu-0wo zeJ!?ji5>a6`%Pi2XC)?t-fNb0rj9!$*eRWWZQ&@{&X%S%>vDPZUVZ^m!(KT)d^x)j z52M6Ubqu>$PnJ^iS83NS8fHa6X2S0<_-RVZ<8?736dq>vCTFoa-mk5(s1t~;!P>66 z>0BvjIdnRyL(;JzTb*Ai^WQP?PeJ2Br1||kjx&y~wC$tQ-w9ox1{;J79Lqo0fmlPc%4JZa)-&_GC@?3}S zIpm&_mWdJY)B%8&4m37U+n|v9r?~+I!t2vL4fr`||93c_;`aY-!2dU7=>9-P=m{Cn zXmxr1`{*OA22chw=gl%{|*x)Eh~T$)By-4CVBv<+y4sFb2f!P zhW&4eHU85IqW=RY;eT-Y-$V2(r^WvbBBp;JVth_Q@rSNYvj1C%epl}Q5jr3P`ezX_ zGyIzQ|G0=4>Ha{6hW>AK7-;?*bQqu0bNn&lr#bky5&yG_KqN==2P7gONIuC@3R~J5 z8SDN7il`olEsSlQC`JCVLp^QK+6Ap&5Lx|7m?38=Z(;nD$O5#O8J{oxp~t_HU#NmI zT!3_vQ3w3PZO>Q!*7A3X{ZpI2S(#r{;Hm2;Vy63*pyS!(K$qn#g+&EEX1GHd71!SFUK*aQvy9Ge|E7Qo6*gp#tK>I7<2}pvK@!$COS>$gWC4qYSKoH%N z{M`>IU5t#rp)tshIg#*!^1A3h<&UwmvNF>*f1=qx@_;;d^LI%Kw~! z|7?dWjI95#1c;=6V~MAte^>%EAOF}PEA#)x4rzY3Lsn*H5ScU30$5p@0HCJ-3rR3O z1N;a4|E>4>4`wL(-!a4All|Wr;ja`me;DCk`DT7Ivwyi1RRG;{>F-APeC2OgemBD3 zb@_j1f^>hS()rT_>3*@>vkCr{0_Pw7pGE%G@c)wuK6mqXOaGGz{yzTyY=Hl3H}(HN z5kBW8`nM7PV}XBe^qBtO#E(zLXlY?%X{K%S4<-H&J3YEzwD^yo0&S;$+e-ZIr~Ym0 z^DO&s%KpPte=x)!+WimsfLzXR+X&|02;#{b{X-C-J;gr+K}*B>+~I$ARE)n91Oq+Y zlcQn=FoBo?)bxKPh=1EW(fz*T7qYX^wKcZ1crr`?NMlQLZDWh4v`w_2cAj_E`XDdz zbpFp9RKf?!nDvw^3zXN1j*j-JMXIMah^$yxo)q}2RZxxp!`@kk#g%P|KEWZlLvTWH zx55+LA!u-ScY?cnaM$1v9D=)hkl^m_&a2R!+sWQi1#A5g-vH0k}7tF-S_Fc7TSXcqy z5m+<~-;cnu`RM+mPbGg1NuI>|w-pKiuzbHE4I9vD2Ue5@z)Z*Xy*d85(4Q^zf1##- zciVrhrvEU=-+^O4_xk@}^F9H`el+jzEVAFbRMt@Idk_Z{hR?#(!dli+OGgi=Fuy~- zXn+6s_lx-~EUdo|H&#|Q8sHEDjxyl513DYbj5PENzn=oz85p?&qgs_GO5D?bKMh5K zr(q9-uKhe$GJYS`KjizRQT``X0jQ`zXN-XH$yX*|d@@=Dj87Bzud48L;a`gNKiOA5 zyRW~r#eb*JzZ7zTZy&GgWvR_W!I3-wpH+`F`pC{}EOAZ608JngZ?3g2f1CIHY0 z`NyR2{p>$ggx}r4pGVviPxFU#-{+qvXY230SJq#=SFxYG*MBV=#`MoF?629dpIlg= zwga`~7m2@r_fHc4jSKr{5&uX0Sk~V%lE3@0f0GXT)sJOm`OS|7Ui=q+EX$ul%qQIG zpQ`KMqs0HYqkhZS{+A<;-*U#Ze?%N?tUpH_bbn;VfT_KI?ym11@t@@Wo9_BM zLI1MTsDZiS-;>SXV~l@2`~bSEKZhUxm2P8X1^nJ^z@Yxm;RkSv`seEZJkk7!@x_hI zO@LV)9a|uNmyQvr#;h~|0!9EE4X~5xnSo9oP?LFOfzG1h_xuj6AOREY+aJW(w@(2K zkOwO-Bl7)4W@u|`$xcgasb#LKWkX~6;X5gq#=_cwmi#-q_GfDBuff0*#r8+X{DovJ zbj&nB=k2>_05%31w*Q`FHbBuF92{tX%My0FMiw+W7G|^#mee}HHelZL`&trd4As`|7 zyYuk9LiBV@zz~WBm|>)0{f_AUR~!3#jVys_IvqV*3v&xgTO%_gC#~;+IpOly$s6T(?4d|JE(&8sC{pYrQ`tqM- z{u?d+3nBkg1^%xtL;RB8VFNJztiXT0TJ&q8n-v%s{aA+ht3)@;pVG5WXzX9A{vD(J z@8Dqn!Mgqj2>TDf^&d#=KTy|yfUto}Ld;K2wpmG%!>8Te1}RD=Pu; z=gBV7zL(NA2HG9q6;H5hpt=6dV*h^1MDW9c(f&8^*uaH2U?~K@H6buT^Zg8{T>n)( z_HU~ywErGHHq$So_m^PrKLBI{taExUfuV9m{4d_2B$S2vyBol2VT zWJ7g`ZC_M?x-*#>MB$9B5KIbX`h=9^&u#Q_Gd=3lI(7DpU>;mtsJVpryvu2uwWl26 zDjs7rtprxANggG4p_rC{T>RJ^5nt2i;Q-(5cbw>-5_w@nA=P;yqT!4DU>Adq@*uZ8#Y-(PR584V$lRhENeb2117zp%^Q+$NZMNnX5WkkJ)YwGKI13Og#tWzO8FpRTaZ-pIuvrWi%eK;%4U1ii>-rH04P zbega#H^bS1QKB7%+N*{OU(8_0Ult+KpVRR(4GG4Q0sY`28J8}a_j&luIg~FSMDB&Z zP#OAeIP*1XUjpE`*LjR+Z7dh`_JNK5Z58a7Zi}M9d`UvS+&4Tzc)>5B{e0JP=MNz1 z+7*pKtGE0ww+=96K)H@O&Dz&DAQ5w>?~Z(22aGIrcpyuAST(Z<81rd`=}8T zo)Asxa=ktpdV}C%YEdp&wo$%xLItf>Q@h#BBOwPjHjS+{d4`@;E#qUNsAQ*)Z0_hM zSinA;a|PFN8{}qpH+Kt3k20qaNcEjnw96*za~B#`LkG%*Y2KzUSh~$Lnr%Ig z*N-c?`edT^FqfoI61mEqIF{R88t&kJ>r~GlV|?F?ktfn@OE=0(`pW6K%$!<-Lhizw zl;bJYb_dtqp<&l*#?m^@P~~;v(4Za%l7yYN_ zj1RV#>(>W9!TW=8C_~jrD81E(JcQrIAsEd|Ex5|4=DW}=5+Pn*UR%Cc>JnLOZ0MmTiDfQeSc zA1+G`@}Ny_K**m$Jpq$A+rfjQ?va*e^ipdB23p667xi%lphe}WUoF@FS&15D`S7z6 z;(EO;xe1BHJDmzDPOa%8m;6}KZ<7OxkC!BT(VqeLQ^NV_E!BKPq0H-*B==d{hAJRd6aiG<3|K617`tzN*v@0$xX9z3 z=aSde7uK`Qa?atWpdOj@4MB=IbO5DDqt86CuzIu3NF?uQf~zraGPjBsRKRt>Cm7|N zF5}F61H8`t1@JnnMFf&qOrBN`jf5I%VH4&v5GgpMl7tM&q`vn;lEWWzU+9;{V%5_z z7nemPkF8VmQX2K4b}OGyM3j&SEh5=+Wl_@aVR^NR#d*6?xA?2D?My`$>`p}*mud(V zlmK6iZn!UJPSI4Fx-n4EoaG3KdtIkWbwZu**=6j2FNvFT^;#7F9@GR&Y50Illv2#B5mX37r`)e$#lKbwOq`F{~gmZU~~y&W$u}+?aCo7<92JK=|0nQIq0wZuR*rt}M$cCRGq?6`#K=umZE%Xj+m*uTao|MUG6*RQ z;xIqI=#RHGhy#dHbS!4t4GR@zRj39t^4(yU39&W;V`6d3uey#Eke{iR^RUa{e46M z;2Uk&OWE*S`QTHTZHUzJl19!FGjO-jp7THo;%A-3SX7 zb35QyXWh#j;_tR_2v)mre?wxBTf0R)7!&r%?wy!J%J14lwUv+1+ey9`04*_2 z5phDR=q%e)Wu>yL*jdr&$Bu~+XO$y^HXQ8a?lo(lN%jCNdMbLSx1?t7Q}-H693t)) zGj7WZPdMpl2~;-jsbYlk77GbCN_1m;zNYD2zDN_bI1X56jf{@YgihY}@P3fp($@NV zyuRD+<;&W*)56DuUS3%VKw=@Vmlz}fwpuIS=FW)vc-r+uT`;oh3tgieto+ptXsRTzkHWaa{h{KU3zF4 zzUr96wbs?aA+-LjM)iwI?Fo9nDHuH?8_TNwZ%SRALsB8IUBp1nxA{yaT){cJ68+0S z#ycJQ$Nk;&)zf({(d&!-3pkUbq>)AXJ5#EM37*(>jplVGT6ef+9IabgU!Bd&;^^l5 zbA6?2v8j+4%N6hv9`~llDgZoP)kAOShw<`Jtc3;B#nt}D8%J*xnf;4#oxnn3d#`*2;L-9?>XmDtgt_oXAE`9RQ)0%IeU-j~6FWQMz z4%@8~^0RaCzZHDTM=9^jg@E3n27icG;;uxrD-X%{cCz2AUP&n!w>0M5$(*VOTl6>+ z*R{4${cFvXt;*YlzSg{2^*N_0>xCdPeI9G2SH|_J-6xq{&xFH3(*yG!-R>*jj8(gl zx~q@*UEJfK5F9aL=6pRjtxv6gu`+3DjGC)+yy+m;(YWCxjmlc9qmGYo`XajSOq;to z``WR@>bb7Mn%IRUImt19s?qU#a!8ttscH#?cM%JH*|7dCR2rgeb6%XM&M#JDzbVVs z;}Dk)F|n(OzBGPm1xwh0S6U|%Ikk7_IptDp)1W+F>3^C?iS}Xe0nc%8ioKiC+>||e zN;SX7nKlx=I4+QE_s< zLrDx{=Z13Ni)*yv0rcSF0hB9+p|2cMPuG5}U41-K&=2lZ*noDSG}uWo0p4*9HWZ%} zC-HgVoUQXImowLd$KrFeV*tU9Bi@K(7z968Fc(^olHEqQB>oX*Wf^qLRQL0kVa#{e z51i>ISi}^I_z-g3I&UUUGAeH#=nS(w)jyL`r$w3)e?ep{qSp<%Ih5{ z2qbc%R$XNZ@Rc+tp6VN`rsz3Bd=u@-VBCITPeDZ9?rXNl?`vOqkh0z3& z8clkHfacgS`hNXhLw~onI3=L%ImW)zH`3q1Q z{+|7BmENiGDLl{Oc+`BeN6H65(D6*q>rItA>&xX;Mz;s<5w$rtw}d)gdlb8xZQ-P;t_;Yt@%uuZuHt z45#KAa{wvB;-@toGLe zjiViIrAfi^BQ<(mFS2d;8^mYIBAA>X&&8mZ*>ES8TDf8>0Uz<%$&p3A9mOx?9;@Z% z9&fv1Kk_){^t&yq z2AwD?-2ZIZRJc!P7gB|bS<*eD?_!v~#U-SnyQGmGmz2T2V$3)cum>!B=PSL;{vSsn zw287T>;fHBv0+!*T+ht6@3#F!Uou3Q?R}-61hQ`;3xL8hzw2(?_G#VQyL@!IzjL`g ztF5~Rd8KU8G{@Pd1K7cVl3OVPDQ>Ir4!Pw9ZKN;W$lT5_RDg5$(?Ygsht04&S#B!L z?7y0xr$jpx6V>=wsRayZY6tNjE8@jPEAhD6BsAXxBpP*84KE1#eGO5jP-;)r-0!j- z*RqS#+*27S8m6qPdq`H=sapJ=qnC4y-=yYeA0Nik#I``89>Ji{;z3M2j_U2LxAF zA&}KxEIHCs+dk_Kjm`Db%vO_saP9*kdbM@dE6iP;A)gxgIL1lj?%ab?gVtXs24~78B2#KLiqgq%SMi`w|dgWN(1lU&Pd4Vq9}>K(7ij8ng% zhEd}|agaVLXQk|P;07HH(gc)k7;Q`K+Jr-F_7Yr-HWX@#zSVANqQ|qgb|E*SXffVe zITvtK5x2F**E+O%y@3}J37~NuOHnN({zx0o2|hO5v_*Y28EIkaKaU2Vuf8#k$|gt| zybOdL$VTH5r))0{YjD87&=%<+nw(3)Vjdn-d$#(z!;(w;lDcfQuq~MK?ME=9i#HaQ zkP)HuF|hRVbtpwIm=>Zo#l#S`$S&ykw)HOCnbVqTG23(nY6pxA0*ylWroVwO2{chC z*OkE=bTe4!ki+9?pyhGeYjRdc0#2pK2-dZ}76h!-n!U|~6NbEzJv(Zb?n{uqZFFz0f<5LfXPQ?L0&rZ*&Q1fD*7@r-U z(c@vtv0rIAUax`O-x4(!B0-qwBbVB?nBPfv@AYgJ#Hrd)&OHoksm6~@T z_XAsK&FS0WHV$$eR+=-G?nYeMxQt6RFo&_{295-5IAN(tP%3JBkyRz06}^%=i{L)v zwbG35@FyuOJ&?IGkR4iU8@;6N3^bZej~zf4hnG+&{QLrNvs}!nay;StA(ZE1P>y0a z6il}Y9t6jkpSKlsf5w>=*V|i&Sh-Q z6b+ONVTKZOiUwwCO_t)`E=AahTV_tx;4o(x2r$JKoK{Yh^gW zCdXW z1>)FEYI2L0DYfdG0I?k}5_#9Hu|)5poy1eOvV2VJSEY=S%JPXHp1l~;z%lDMO7+)9 zF>X{J+rh@4%ub}+yK@LXG^c10 z$(kKmNZ1Xrp+#V^PMDe$M5HohvS^I>>KBQX&Xoryki}a^p{!nHhpGqiLAAmoVaKR? z*4a1#{jg&x6#aC_4ESh)&i>SV>|M_7u6qttYN`0K6KNpFkFznc-}2BhF|JoSNzE_A z7E^x}k>gIXeUHnuk8VJnp`)aWKMm~lyXwhLARm)T5p3Uf5T)Czhnl3K+G@fI5}EOi z9PFsRck3!C33>rJ@oYm$%gAr4^R&iRVNJ)1v4bA}gO8z{ zvZaGqDd^I~`!yRcG3P0+R=BtMVpMpmuVn75j!?!&Fu*Ubx5qi$C*3^PCg@g`f@<-Pw8>6DO@*;qGZoAV9r(0?!q=sZkC+`N)Ye#TB-Ewx8Eq3VYOr70 zkWp{+y>?HY=i8o&tiih_vZ3LY_WN z%h7Qk+aLpf#Xy9e%(DQ&Ju6~4b9~Oyu38KoHQNEA_sLa7#|6_CZ{{^^*Sxbdf{ulm z1@EJYpW9aVM3PzCcKF322|2dBeShga8R7vaV}m)V?5TZLwyPOUZtSi)+nS*gQb;WM;$Piy6b6;)~C zQ=PeblOBNsaKQp_XO0yBhvn8_+r;w6b+t=?+d0m+j$WptjDwtnQi!PpXQtN!!}Eud zPP>bvg$p-2XhcB>MohRxC)-GHRkm;Vz8~~}hv#rCxvQis7CqsaHi}#l!nuz2ZO%pS zF>-{McoE2bhMbCwJsw3^EwrM;k1}J4g*TCC`thtXP6J3pMwA6&WkhR-$Bf+?eVatz zW{4Ccqoyb@yBYAZR5p{g_aF!eh8?eb{6-Kh8jJ_3T}RHSH)R(>na1F4kfR!wP)NjB zcx@9M#0$A0(_@K2B3RH0sBk426fP^Z6;77x!}_d9pnmn)tx~gnS`3V8DSmlbhQ6KN z1#LW}2}<3ZL!$ljDBO0p5hg~SR_Lm&5WZvBcKCela2n+~1PKoDy~S8dcKw#7sEU}w z#``RgHQQPCxhxEZgIid$;}ggMD1k2_;oh8q&Mgq($5`IoWG^LYx`@=ey0!sH-= zSEAKnKoxcdLF?oS$k3x^IBjAN}Ey1}&+=t`pw$L^n^jNRTjmR;Ok*_ul$gxYfk&QXm(izvP(vl>V* z!ai~Ai^{3SP@<4TmFL4JB~`IY$XcN*!#;PUcr}P53sG7wJ?AavY7MY8}ZY z7&QM0I~TxOm!OpmfXMeFlWfBFew`yhFN0I4u!>@!A(0rFc_Y`3WU!&c=z`kaKk>e! zx0Ry__w$G#Nxg{GWR04gGI3J|6QiC@`A#JSjXwEDoeileQm5P8zTT~+H{j-KyX{y@ zj+l0@bcPV=2I7)mP;H96_NE=CYMVx(GZoa&%f36x+2R3 zDl+YTK{yw<3T+WgnL%Jq)G6u&ol@O&MaR2dr2u0_K`|UQLVf@!7qKj8DC-?H!er$B3o6EUo4Cj zaPTou#SqK93ss155rHy>(kE>dHUbxtTa_bGkIEK<8Vk9DF2xlGcXytX1C?KoCTo`v z4MdmF^s2YPOCBFCFcMr2S-;Nko02Yuq^DcNylEPTgO?y` zoKwh#=~R3$W)II|>(zC8IyZMqPiXi~{z}DV-Ydd5pG)M-tAbEsB^u)bLyaxSRcHBDr>)_15EJnhSxEx<8NgLyo97p#kGmXtZk3^19P;q<>7+k z)g;?=jzL)MadFJ#SJCwqKUVh9%N}rd`?!ryyc+7=$xa}LP@j!go8m|PY(trXNC*@K z$(8C^SmORq%LSg`<9s5HJa2|5^+^lgfq$&{2&I~!ubc5+A$jHU6ZN3PXr(;pxx|nT z2b3DXJhhVCV_^6&(SJ4<0k1KT<+QElkcDRydjE0}?ORb*i!6(k#Dp1a13$Ok>1uA; z_QAQup+zA5m!kXGLzXl=4cA}|_vJ-&gpa|-1kI=idE=ETFE0yUjXJb>34G`-%Y)bu z+XvWpt%T?V6|;(!U|>x2xQGxc&8()bL9ovT&YW0dlbX{=8Ulp}&9@1sV{p?wWRRblNWD6?%Fjme?Td zUg&wDe}-Xk!YE5O=j&=CVGHwBH9^NPaz{aDcU?{~r;cr;sTigRg9$*HgC_6D+>)8z z7SJ9&5~?F#JL$Y8hiMU#)P`|R;c^*}7QW`62CuLz-zX-nMQ-__LDE+dyeXI>Sr-ba zDk!A{d&ykTS9T63uWjO)gAGOU$0)|VK2;)|^lpNW`sr=y3$aJNS2<;-mLEIXY~HPq zF*Rf>SD?;Kl^MIDN~((%Td}Y_7nsUFC*`$tEs@U*ftY?mmDSN_}I_L$R;lT>0$HyRVcDFNwQ%Dfj z<7W(bu(rp3L9KOf1|szV>y5UZaM5@cAAFWSdz2kxTD5yQcR=TI3b?^Rq*AGbWj!X4 z4!`p;c+XkDavQ?gd8V3V5uUDoOy}s+ilkig`OU|zj0xfOEm*sr>0B7= z!$QnMXsqRudy64d#Crk=&`Cf}aGP$VL-XPzLih~hnD9GUDY z$WBMgt+TiXmx@+5=d4g^jVs?HJZXqkLJSXwtrwE#f&wYWO>A&c&`M>ak3c($BvcGh z3dxMQryiM8zN}*L4q+4{T`GSq+S+Q}kSm}yZ`(*Ph*D?*)4P{27=0}nxn{DJU|j+; zkE!{{@6%2bfAlP!r6>MEYV?5&;)J1(bz2z_lW&bZe(Fcp<(}W~eP=Aq%dFKs5>GoA zmgY9IYzEmWHp0*RW|^O#di+CbHP13y;h|)#@OHr4hX7*F_)sOn<#cW?Aw|@g0&j;A zux(l)Nu#!A4F|5KR$2#*yMZYcRHJ&=8fmlwtF7{a^lj?{WqE*hcB+E!D-QqsHY8Tb zPx6G+nV@pZ0hG;A1raD?{Al(MPPE(T`QE#JXT55Ecr!xUu%BEtQ5iJF!!19~4 zak?VPure*g)q3GtACL>_WbsKRBSMAmew3S+;L-7@s$PE14a&Bjbj7*M`=MYs*E~XH zr6>20UgGY?)-=dnM*N)stE_Gn1cW0~BT>6cx9+D^R?1xO*H+5G#3IYEW+bV+UZ?0u z(f3@8c~DYSwp=C;`E`prAIywS&8uAwF5jzPe_Z)?QnyI@elt~_A#8M^ed+nFe;?cs zG=vf%+TFkx;DWWqtw)~$un%7V`!Fjj)BeK64sMYHNJ37Gcz(%E+}o&2bpU(8><1i* z1soiBS28yK-JMR;aRCZingP3pmj_6z+2_)9NY{7=sLLhf}=>(H@ zK%SUEy7Q7H{hemLA5(Oy^{g|6#&}Z}k$MBale!E0X7(iCRQuZ(^O&xcAC=9Nayr5L z?cqqP@Isig6e$R`?u&|RN%s-1)6|VBi6Gn zH9l=Wvtx6jAMt@NBq!uke6V!mjm`b3e@k=t_we8)L+FygS5!p_&vK55-@eKRg8{Xp&mCYwb4BtLuO^i*Q2#Q--l-_aCpMzrK5Rc z-|F`^7G4_mD`vxBi8Mt7!I{8f5jQo+K>+RK0N`zj|FKXU&PvtHh!2n?Xe|)JL%LOk z0ifvYuon2tQ;b9%#o-@<%PtzROL(WWRV==aNiL=<;KLJ|1s2vG)6V*~2My`Li+?PH zDxqFvR{2YfS39J{nU_E=G#tqcmvh%Or^L|0O>S`PYru_{`Nk~ z8#Qc2i^@g6_LoIPK4Y-N2vyHWyiDg;9r$o?19NzG6 zBZXqjd-NFW$%40@$q{B{os<@AHh%2$aOeh@Umxc43V!i@(>V}9n^q=;#y=zL8&agA zgk&R10FSFkLqs)k6C*#GTSQYK$C5>viD=kR6(V+YJuZuy!;D;qx07QBbGX(=C*vYQ zN%RQb-DWoEldOt#L@WFX7@>w2`JehDdzOKBGk!)Zu@|syCWvGr&VgpDBWg%faGb4R z)Wpp#{lr8_I@Nyp0=btiDltL@+r zc*9kc-Y-L(?;NfiPLIcjOPlU}G@7{=xsj5GLF54-{iMY(0Qf>#U*aR@v4RL)y#$IP zha;jq-k>;dxeRO%>OvT{YISFWFz#>j}8=>+OW{DNOL>S zO=!BO>`6)wFlY6#IWqQkcLzR1_HbSLYOVWL!@Xt%UEaHE9b1#oO_F(Oemr;BqK5U= z@+~k0hO^#Z$l9@?fuDED=~{b#(G5<8{~RrvDGfXB_7fE?UD^bqHQQ-|J_$}W9m5XK zG=##}$B72<{?i6reM!Yr7V1<)!~PXz7d!#`X{&Jo1)NFiM4C<=Eh?(Ao663?WL2U= z42R%-~6a8$?5We;K_Iw+Aiu|XjL?#({7 zu=Qj`$Bm1tWXiZ1ROD;aN|aqo;QNEeY$}VktB$%T4j5lQ1B0Wm!EZMwrVQKucmCy1 zr*+nh7HXA5i-iVP$<(avo2GSG)W`w&@7gz2>)5yrcKe&`Ims2W_$$^orjsdIFK4Xi ztNe!RtZiI|CgU(juQIWz6RBCd^_%7#Q53!^?px{|X_Xp}s^Pw@s>s5Y`zmWSCq8ja zlYF@u+BV$+oWUDh@uh(dV@yrSZ%?XqnLUMI3)SC4FmCygy;KTa;8p5yz|(*}huz;+ z+9kiosF4)!S0pdIB`5k9Ztk(TfZkm*A=Kg}$ugs$%e6YwQoq3(x;*=pHa61xai}4q z(xUSuCwU#^v}LItO~2!kwh&>7+IyM!RHgrflq_HNiZs0Qtoch6b7Q)ueV)>_Ggf3f$4W2cYA-b8Ul#_ug4 z$}DVth|X|?4mxeJp$oyy5C6qLdjZ)NW-}qLq-3=d20SojYF#AAFV$qfD1WSKgY48$ zBQ91f!_45+M*(e_%HF-OtdNhr!)X*F{!%KRRA94sY&Q;haqrvi1rs)DS``PSsI>$J zng-JfVywIz?aa`7Nrpz0gUpx>(No6#Akz}R zjIM;vQH7|%{aN56cHnA9IWydxsytfmcXq(%{T6lL^>|j&UT&ogU#%aYWbb=1%Qa%J zBznAiB~>22Ys%|u?FyCz!cT zQpCh#@=dh!>$OhsdTR{r?%;!L?0WV>#sD^+=OXYKByB9IqsHR99Xpq> z?hD#4N-@%ohvB1h4XXCMRIk>Uc+=Ml1ydjMjSLe?{1Be=ykr^1G|sw`G$fOp_-e?S zri}!N?&^d!|Ghi0pHXmzIP;OULM7t!#BKYzq}%#oI{0)o7M?}!Mv5T&s>mRjSn~k! z)ARD49%I0ODCHM(mO&_k#p~lSgXWw~G=mrHONqIce2~!rj~`*=iz=rtgA}8Bjn~${ zk_j*4I?{BnGv z+_TT&R)A|xj*ijE3&B!)yGHo!0A`U$QGCpiNOh~ilJgy0L$Vx{#psbbN=(~vC*=nk zM+a)Dz^VxvhlxZ>dDNkCNxnWvFbkfJV0mKtTwRDp+=ZextkZ3eW1hZ1d16)rf&Kwo zsJi2DeQ~8yjwv435A~YSI#bE*Cnq^-YO>(S#kDJ<6=ZzjaoU4KSS356Ov}~FpUSuA zAs6D_9O11>3E_cyUuF#Q>sEjeU&IV+GHm59$5pZQ;c&hN+!|Al&TXx1(Y)E*P9g?M5 zBHbaA8nsGvw32gTI20jzdO`)?JN5x3eP@+;kx2{PdlG6_rlk|pc} zLRru9ZB1%0fKQ9xhcJRoDUP6}mEcRBT~tpKPMvuQj@`K5n5DxRC>ju$p_#Ht;%@b4 zf+^y%`aXC?2$0j7(O$}BbST^UZO;!>dh|2nmbq<3VZR+fyysN`G0CN_A@t<1u$rIh zgaxi%Tjc|EmpTO-oW_Onjg_eS?kQTss|5; z965>ce>i57VS)uN}Usz|IO*^g(U2wap%X>Iq?uySq z2U#TK5xY>JO;Ig?2uz_7ppcgTLiC*0O_WMt;o;;uQS!2(-uZ6H!iHSPY+|~s*ABI# z4JirTsUl@2@3@l3&m8e}+Kc{SpD&;J{5REV(jq(W_|V4--x+@FR{(@+F-pv+d-dQ; z9j&0%H9Gsa-DZ#-GV25oSg5{9@#>RO71s1GG(f~iv$!-YG-%4DY7h0R0a3kA8fU2(!au( zWmC@xcZQvc=u9HETMO=KS!rBXxgzVnpHmduSdn9f(ziBSR1s>JdzS;z6+Cqz&y4&@ zg0S)px+317+$#y9IJ}y@{&^mAj1JraY;?x5*9(>6qb0fI+%RE}4j*lRJ1iSo>n$WEN9e1VYt{A~9?Q|(vme5*ny}saOA9q;q1Y&2Ughx8e7=AD z+u7Qcium^a5^Z^IU(l(Rkl_|jlcfoR4*M$uJMw}8{J7>TxEda22*rtj@Q;M0AdsMT zY@CO4G&RyvbLt{VvqE2xUROR&k%i&g#S zpAWIDuGa(8`b$1zt22;OG(<9Vk8gt%LCM20B%mT?>eM9%pr0KuPjSijTcWgxvt}xR z7s6o}JT^D%?n=CbSUJ2!{Xi*9LKv0;7{3^)Dv&cm7-umWF4FVhf6km!wCHxpG5Z`@ z2->rT>scV*%Bh-*LzBBgQ7$&Ago~t24Ojn}tBz!U=%2)k&W&O9{zttaPT+U-)32%3gz08Cs?Wd;R zo=p}gcR=H(jf!5*kr0--ToK#?NX>Hcms2^#6M(4U)nfEPtYQ(IKfh&sU-}%R<=iul z@TE<;{^{hU|O%$V>TGs0(ah4IXEPGmQ`BmTR$eig}A2@DXPo2&TAMZo$9NwTpQ6YwtS`gH-mbY?P_B}*CS|jwW!=5a^vnsN(_c~5 z^#TajKz&nr7$6m-@Z(3$UoPrbPK|vvfpCw57W0%P| zA)+4GibA$X(cyGzecUV6gATNP*VB6FFxUHdE2#(r|7C_~38bcYdrGz8 zGK^BUv2{$ZGpXQ@N8F=sDgux>qd|2EDIA7*6xDhE+Ip3RM4W4aQpI>8k_| zR^oA=&k;~9%%`qQCAGvrLqs1Y4+a}9aHji7D31QThGw!WTU)ZA4yxeRK+CaZrFd|* zWoDV=;v@oo>7s^xvNfiNO${s<12taM_*JOw2dzu(^e*%EK@|GA%Ijwys|t#nZSCVCUdW4 zfzP|pv}9{x`GwRY;edIxiamG;4S%4Y`!uRqr+2X_pspHxl$~eMtL|F=5hT$im~N1U zWk@q$%PAjkVf_nv(N`x05cc5Ztyh$!JkKgA_ zqu&ILj>L^xjtbZIU_Y3uJu*?j_)or|eu)qKfk67egZRP!p=Y*9qG5a9WxoEp0CLpp z!0@f&83}vVbxN6w&7LpYMFXhH@J#1;bup>oyjw`@zL4R98<{;GQ{e6ENQ0TWDNGEw zbMEZiH(C+mfrC(c*D(t+BU!j**iLzg>^NUIB z&<^x7dIDH}FHJ;*L1<4x79W2oq*p8ac>(CpiS>O#dyqI+grJy1J7iAtl}IZ2&f**rw-WU<@~$&Jn>&$hoEjOVGQ}7QB~q%&Q?+@U#|Wa8iNJm3K;8V4 znttCi|AQ{@Z=eG)Gt<)mi3Q9+)EEm38x4@d1^gALjsGDm?gv%(mp$`8b6ft`e*CZf z_+R_+|E2x-nJ@Hjupf-9tiRb0ApD1cfQb!w_-;bJpZ%v+q6YD=wXZBNeEl?E`{SmI2 z9y^SyW&-U46*vO@B7s{t%D-u{SV7nezI*c;I`A0>i;h>Q7c!kT5%TAc;HYLLh`wSS z0r61nh){vr1A}i?ru{lvr#%8`kEy4pyK2l|8f#1?&?uEO;TGI{{V?Awbm*V?HQBBH zOKQpZe!-0r`m=xhi%(4kSY>8rt_-Dcyk-4Ati1(TmEG1ZyeKK@l5SACJ4Hh2F6r(D z=@LXj1eB0&5R?XKkWlHAZls%qz#`U}xZihwd!KKgxX%Bt>*8Xuc%J!;Ip-K-j&YBB z&V>jW88mTpUTUNCq=!Hj-d=&PD7iX6Tx#(>ZS>gW!h+BQ(%je#Wi+}hwVj-tpi&1^ zR#)?Ey@5bJ(A-Rw>w^QZRQg$U%BO2=R7WTgAbi}H&G3)fW!`(!c%JeQh_X~LWGtCL z;Dn^z;@cZ-U0u`#JTAmpMt2X7?Wjt}nQG;9fl-9Ksw%gk3}F-$6sXhjM5(5;ukl1aZL2A^mIc#z0~*bz1F|uZ6-*q|^bB;}|dmx+>T@) z0@^Tn-_|DK-JUl!HKjVkyG}TF~D}@AZ z=czNZML$rQY$1!AW&T^zYM5D+wGo8PThmw|I+?Q zy}3`&)%5iAJ3fxnGoS22M~OeeL7~tL$>6&gDJd;=JE&v;^5x6N|2O$SCp* z3kxzbGD%aZ74Wm&shhuhc^@0wxY3>QZfX~*fkvK?ot{;E`gGKQ>>*9TTFb4k| zaiSe!<{SHw_VV(9f9t;#3)BZS0O$Vwa>E7=W@ej}uMw!&6s}p=FjN#Ms!nwLtxAWF zudgrI@e4-c=*ZRD+S;+TBzF4et(0%$8UA+I|LP!~s zAxbwUA?)yz&2PZO37cAiO;yFg*7l3%c47cH-mzyeUGP_U*~gETWkAEK{x_PO`JJCkMWv=BEi3D0Z{u>u*~_{ZxP>*xwdot*jhJ5iOE%K3;@ir`LEg=hFUu zXOwVj{YMg%RH5I?%j;yGmh7_oH9_?xP|6q*&c5UAgp((fup>_nZXut8jDSfnyiiEv z$=3+X6HDG%&x!Z*gmH0kClyIhRWg&sEih}!i^Wm(w5?-bzdkmElRwUQqxYQsNlhyk z%@beCUskj``w@7wZ!qzyQ_}ZQ$G@iDED6579V#n2wma?9lJ+C2ov|uAJ3Dhn8r!|u zDXUmFeCm=WW*BKuh^1ioquNE~Av{VhJ>_<6aur7AML-J^4N(v&9I% zSc`YZ6!k}QqL_VhkW0a7`|um42yVZRWwWxkw>Jwk8~;cgxX5w(jjro_$>#6(W6Xvh z!44!Dm(#r^KcJ8Jz-42I-@kufb$bw>iX#rIxr(#`XWO)2|5Mwg2VMcSlC{>m?+-iy z&4x_;?H(O3Tco%Q9NHal6M8SFoh9mWvWTKQkYFp`b8wzgK$ zauSVuh%tK57(~3PpY#t`hWy|ZZ#1}~DQjuEP1V>#K8hmg%!VY*42%qn_iOm=0gq8| z4#L(j+7`|=3aku&nJCzf|I)_iEaR@v!)O#Yq3u8_#MsND9Jmxi=Bv2B$Ly~UgIg&# zdY%S*pKDd!^@=p~hvZQT;r7z#XjRY+Y^KHrtk(cV)an?$L zL%6A_pf1FD*(rZDSq$B3cbXHBhlACVn1m!HIa#Gm8(aPc>F(jd#kk=u(0_=oAAbwH z9#Cp0Cnr&#gXe7l!lVq;At5e>A&0y+LnA+H&JKxIEuXYvK7QUn68W+Om(Gz>5JTOD zb3{`0$-E0e9?R;j9WC({$hZ}w_Z z#NUEhcp(HEmw%$m5`P=<__`;a6|dx(fb+a9ckJC{X5{5d+No&XyrS~kN|2|TskI{r z9Qn1eK}<|+ZDmC%?B4DB;ID2-QdWP{EQjl4N5Z6b{!);jOPiOS?blcTSTz1ky(H;f zp{TcDu|?s5i~~Nnt|U ze=DLcV@E0u4$OokFaGZ_mkIvshHvJcp0zbKH47+|u;C2hHDhX2zfjGSx}9{ zm*U?_8Lahn`% zV;+y?%Digvi2&+BvPJ#=3k=S)jiJv0I~Cnd`OCtni70N6{#c4U3QPv>34zHoe!dmt zx}3_9Y%bgUW2hy|Thw3fR(eSmw=-&w1^%gk8%;`31^Rmc;3eIY?W$3SjoaBfsW|X| zD!V*N61?M&O{l*vgXiEX&qGzc242BoHGkP-KE5s-Z>nNzWA}e^W3USY>#KxYGINQ5 zpMN`syUr|mgVn0-7I=?8Yoiv|CC+nTSK4KRe`iSio;&9qw<8!c7ApR&5lu%_=fZ}l6UYpt(0In%Px zv)Zj}`<`z2;| zC-{4O?$*V{wP*Hq(k*!)*$%-VtOlw5TAp!KKbVq5G#4Gl8-3{tZ9njRxB#QykU`Ia z;b6GQS~2rU!*>JBB`9tj#BmA@gS!JBc4`Js@<<2!q*b(^e_fYE_@g9;@z*6;mT5VX z`KdVa@S3bwW1l3=CEhKU22bBM8=n000f#4I@0@S&PIHk}Z?yZ7Zx*cDZWl3=;K)+& zk_)m^v#{G(6_^*4^YlMmCjaL$soNdoAm(52+ooz!d5l8i6@n$pv= zb7LVGGtljxsZ<;Ss14!i%2sgKsan&S{J<-Gu`}JcOD^ePId1AVx6hU;^OFA$GTjY8 z@X=o~y*z}y>RNeAegFP_HlwDA;{STUa!P(MOVA*268c`&KE?=q3$RfUc`>#geg`2sayI^s>>&!n!Lo)lstt-l7lxXGlAC-BRBf=j?4yqHKpx?x*15%p+Do+4q!Ftgk*s-S_$t6nCOc9pAF^ zLuRz>HU#S8;xa{i)M_(Kg&goLQ~;CmAhSY2EFQQaR}ymxCaD2=t*vpdo@gSFauJN zc2u4}mz7eDRlG+<(~z z-S$yPejgkZ54foG_?B)Oddm&udB#6zA<@^@_fn7JNlf5Wr%)Z|C=wHu3^n4(o3KzM zFd$5Da1ArO{)<_B%bQUa_HWkv%yQfgI@=$Ba(7vCly-ODz{JJ zx<(|T^G2=Uyusj<)?a}$ily1wBPMx@aE_jM6auKE6ZNA%iD0`Q)wA~7AJB|9u!e>P zPU99K5QVa`Dl$lh+S%Lh?CmMlT_xWvz|(B9au07kw|v^JkN|o` zBTh<1#R0OyM~^PolT<<49OlgY2zGTPd9_yql1~6upe`sP_V)H-Q;I-0(p|E|d=qYHb0n@o|g)yb4{EuqamB(^0MO#r$y~^dEpA4l16A34n z2lW6FSMM}Oj1j>nww5XCBXO}De*LuN?580;Jw2cFLW2wS&3H$wmdJzF<6iP4fb1k? z$jZPcI#u8x8CH=f91KwCU0ht;+1UZksj0A#u|z-t9u^jcMJ_P%4KI)p{Ua8mLhy&R zPtP0OH-|C>T^8@}ly6&_3SJgFDN;9jKKN8plEbMl-H=2m>b>{k_l%9Gh{!!uRIJ8$ zlYsAam?RtkQ+a!RmkQHg@Y-z#b+g7thEEMf#JwLLs3OV{F~?LK!moy<}AsZJ5~7aFT_naf~1u} z{(mgR{yF;m{~j0yUs&)rFf0td5c5BMt=IoYV3?bSM?mln`~SOW{32s76J3w++wX?q z`(f~k&cN$&jwJ&YzD4UuJ)n9rFeIx32lj^#62byOG~ z@)Hz?2X}L{hO-(^wDqgyo_Abo0Xvm16QZ#@6 zdhl&27#?_g?d#!jdU5gMfD>RKIyyRFW$qaU;OOVipE)=H4O z{oV&E(N$4l+&bGH++JF`9f3)5#LGew5xd%hbD=#J!HA1iAsleQg+bgyPD#1Hx#_dq zfk5c!>)V){Yi4Lj&ZMTR>tbuW%;fCsJTyG)lG8C9jE8nZ=_y)OH#IqF*8i;npJ>+uyC_l2 z+ZNmYH$yG4wiR+Br}#eJ_wL=x4*X48{~N%j%9$c{<>g9w=1vtA73aTy|2{tlK#%NT zp4mdZlcAcLnuEhGaH61Re!u#NtVD8X$uf6H2i3u&glg9-00hjI*QOJ|g&PN~_2Uf41KK;B=G7n2x zQLzhWI&H6D`LgSKcJOVZ-tqz&UFl~mN4o`%m;s5|X9GzA5 z(k#lg7uY^FOFA1(iEYti^r^1>9H{&><6_qHKp;H0ey{^ zMt|Pyjb+#zA0H2D5Zx$-y1L)ughHt+O&@Bo;S1`G-zm}aMtmx&A!@fUJ-x>brdN;H z*eqvbCM3QhWA?`hnaPJzk4&thGBX#4goGp~C!d_SyI^GbIx&Yaddl0|&t4r4Yiq4v zpTy{xnW>+nDxfRjI&#v4F`_}ltKPHfBF-b&n$?_N!C=ptp3sG(C6B;u9UNqE;(A07 z)Yv%>$tWn;w((!zr_+1fUQkf*MAPFJiHYmp{(`-O!<9P=1fKe0k| zh<6K8Qc?|b$9!l1YL@R#>IIA z2Fh$Ag92u%t>(`vbw59^s;&lwt2wZ6MV}w7bg8JQ%twfcMc{Yh!xa4d zV7exy@#NpQd26=aF$kKv$BWpzkbnME0f-qirBX4KMvr42WpDaSUr%p+b2DG7%5-LC z2E;1>uok+WnwVJM*Hu7P+!D2w>+kO$HE!`c!!R^(cX!v+BnU=ybV&`F8UXczgQa#= zew7abDSP+Fkz>5fO-)Cpr=!|aKYUMV5ZKH@8_tWMuERZ@as@>FMdHC@5~>>N%ydHT{xDK)s`)fx64a#6ma_)^W%MigR*u`YoS6 z#d-HW1%VJLgNw(2z4WyzPpTN%Ln<;wx!nuHpHA8dV1yTEG$KhFU20vO=mp@!wE5vqgbL8_p8mzt0sAs zPvwimf#8aS^n3L>4_N#Njuh%y^WntwURQOssHbPpu1x~|C2bfZ6zSyj)XK6J!2n_p zzf^UiFnrqdWEI>&!=N{pmX?NxhgX7KG#LQW-OcQ>O1)w$%YbAKeuC>KXapY2qh~1P zp`i94(VM-E0tBXQ^kRj5Y6wDcg`c-i_P`FvBi?3Y#AeE*G*S9we;;q_bWJT)g4KR(bOTkOcws~ks(L}5e zjS`Xs8&ED~DcVzuot>3)1V=T5^EWu19vn@9`UN>TDm!~PhUC^vLrMxs=KfpI)sXr5d6zcnGJDuhLrHd%&+PPT z=xRA7Utj}iyyks)Jt#RiM;JMarw;5_ipaz}20rMLzP`S;j*gcE z(s`!D`RTzzGr}A7U%??%h)v_4Ka&#?)md4^DJuu>s&|ga>1t`YN`mZ8B`xeG@HRco9x>jkW3pgbud7wm5NE$m$l#oJZAa(BUWLvx-r*m9v zLF#ugTN&9s$roW#m6I1iU2be?F{q%lOZcNXAOh>tQ5yF@!p-6;r9Zg7dfT2mpfU&+DrE+I56-Ax^3`|+PJ)+R;`-TH|>!g z+b~#xL8A3Fvsy!6|Mh`9C>QE?EM`k(9?aIcz&wZYWP4x8M2KOyh|GTJ=;-KAsmQ@s zk6&E$CnB1HWL#dXSzZ0U41kMo)(i{zpHtzkD2{<^g3c*z`UqQZt^yh7b{;&O21yZN zes*?tT-+cCHb;`GYHQ~kT(Uz$5e|vS@ycsM&L6U}?!wM?g@uPj<~jG0P;une^iV>= zb!v=Nt!Z=i1HWgK|pcK#ShcT0D1MMZXI=Kk)kKM3FE8eClF zzu*Of>ZPRD4xu0xUitSjJ#*XX7s@bH1&gdcEx?kIaM0*qN;OJN$J78}wVYZPW zOyTM57WE81PrDr!8rIXiv9eNSJxFF`w2+#bT3N|OO-+4$v)U75Ytv|_RtZWJgVPOd z0YBabLJCRt&hzpuYU{8*mH*t>*ofY#RaREs*=d4Ms}m5phWJkQ^h8J^5E*qpS64sW zZ`h1@nE|BxSo*`wXNhg#W19DlI41T7+br7&744KKenflfjoeNk0eiqWy}D`+0_N+t z$;s9>HYus8psF5iqFzr+3r8Oe4a)>r_Kzgy(G3YdsKUrL+CnH&Tmrp~I*#rzr-5Y% zWs!nP`nz|?mH4KTA*@nuVPSz%9i!NGmB%DRM7djEzjE9stHsE9JGj{BTVvDErU|ll z|HDBPE|8GR0vAd5sFEwk9z-gl{RWMqppvfFD}G##e;fDi!+6_vC+`j1nB059lz1xGw0lsXT`kvKaR zerVc+!rw(3#nY{NB-{)LMyuXCm$U-p@0h2~J1T%E*P~TYrr>dV7419Kdf4HpyH0cUiduEi=Ldc79Tbxv zluS5cewB;mLX9F5feKQbd#b>%eLsErw7WNxfkj?F(infwo4)In z17z<4su}kRXWXto1r>zXsIxpC9{$3O>})?@4@>Gw6DfXVur);ZtP#oPJotg%sHk(y zeW`0qpKIWtGkpDYbJ)gaQ=#AzuM_{pGMD8T17(Q4y_iEr76$_6*I%$Ra*N&vh#6G; zeHULNViH;IA%Vs5NNQ`o{M+atew;Bir5&y= zWqlgo*4(1|m2(s#sWWva^F#O%P04{k%qKJVG>x`3Z?6N{aGS*By3bq|(j9oQ zBKDNufI{{|YAQsmVBO;m84(w0$;mP8z0VKn2Zkdr{|(Jo%g%0lSkhrxz3bI*V}L$l z5;g3uB@l&MLTxES?u9T}S?%ZL(LH_7z{KP|klF*;WmYww#WXb0)lJFB7-)y$ONzZX z>gee~L0xm1ovLeVLx^YL;5b;81<|&kfPh5t)@9)c!ku*FAJfv}GUh>g!)#>af&Rww zn915^^NGBCe!vrVnQV!{4_qs{A%2Jj$y`g}S%y41=6&6jsM&OZMyi`5AQncxSR~Fx`iRWftO?&9_}v zS&3=%`Fj^}VNMR_Tg;rtH*m-67yK(GM5Fh3QDVFfwzszd?sGL}_ErKtkUt#1Hp;yXCo5dw1 z$#2p^vSt<*Jia2gs!CiQgyZl@+-QAUYF{&#V(O6UYc@tzr>hFh`nf@IOBo&9_0Ex2TEd)Hnay&O_21$cBC@8Vp6KtOJ#N zFrz%ml}msnCrYHPf$HuY6>(+B-G#mi| zZEa^U4OYm4bohjzGn6nu!Q6axB)1=OqFGksRy@xTcEq+*sY_5HCK!0-_!{Pbj!b)6xC=%e8!ok@T zR)10%%gD(Y3~Iv0#)cjqqRxP6rfp@hCgeDMKSUnlBLRO-iORo_g^NYKXzOXt%gb9u zPO@Jw@s+K`ig19?u>rU9*;}wO3G%0c^o2V3`H(eg%?z{6z{r+ZV@fo+hqm^lo=lgR z#O<5u{#i<&Lnfuv9E3`M2w8D+S0!?&oP!2PP*5U37K)9Io(tz@5=aEd1c>+#VmLj4 zZUWHlCUboG$aP)8$-Z(%$T3yGrzUpDb!~jLL?=v|{fp}of>FzhJ8z~jnh|AM7zKkx zy0jbhnNudF_|#RqA8f-MYMnu{*{W9;7dbZPKRi*IVG^_gaN*&1GKPGX-&gzko{8Cj zRr)mnGhhH7@itX}#jU^ZmV>^=;n*ArA^wgSYBMa5DiF7}>#smYlBkG5H*Fk3&-q$R zT}f4yWW&llC5PR^efHw!JmWp<{-w`T;(O)$o#AyQBivI6*6K;PEcbA3>sngGBqVOL z;oXw^HuBo{REUY`C*A!-P6mGdVaW?qstzntQfkV~r7xM0Z&%2! zZ{olI@P%mgeWy5r?n!6}!tH<^v$8hhFEdK@rgD!nE2&WnsyLilb6zK@BVqxS=CrRDRIX93S8iOs{_Y*2vH8LL zy%kjzr0I0voFGE+g_Jx`-q-(t@O|k90A<(Dyt1-pyYZro42pydO^q<{6=Q*qZTfNu zhqFG(z1kL)?=!7EHml(w`i2!ZOI0vzMsvQp}iqJhDOkarKU3=QRp zcWJx~T2oUW7{9rmim9d)K9RTIzsyRYmlSjDC@$8y*Xp_G{c?Yz%jQnPkA9gTsP4SE z0iG{i(0^xl_f@1frPg!0iK(gK)LfaKw9c{F-rh0jOa1w zZ(Ic#JVhtY)F2|FsQyGD;CPV6YhyQ*iRyjG!V<~$(+z?Z67X?wPt>nA{m0G=En{Qj zhE7JV&apxfxcL47AR79lu>m&^`IcJ&v6r}zx(hE-NV-sU9_g^@aO&{rc!~Z>Yi??) z(p!Z1_VvL)v0=}jBaiOk!xvj4myiv6`z9BIN1X1quCIZWW@cv(dreoSs3lKc?#~$?N zy#FE_?Kr$wMRWY^U_BXM)Y{4lW1@fPl%9r0*kr$dVtD4j$uE%!9_>Bx0K2*OG7l&H(cB3vWJy8r}vKyKK2Mr_mfj#21MuR&d%QE`dwSM$cf%(Cbq zSJ?=(`xH2i(I7wG+R)a)@XLy|n0xv1<-|m-@nH|!eIffIYeN6P+s1pDF8ID1IkJ{0$(kBENM zIZRnvQk0|-l>#5>EqsY8KQmvJd|`~FfrW%JH$T6(vJx6O`TaZE-2193?(?&3Q0}wv zG5wa9^Fc#XvpF*_R=v?xcg7}3_ri5=vrVwz#i!~nWk`f7d#=+q)h zI(w`^xB3Jg)I63yj@Ov|Q~YhX=*7i7tgMFGZ}}xzSeiV-T3T9wvyZ&QrfyT%G56uc zxQFo@F!;f0PH!;%NVCXsp@|5kKk!FH*e~`2sm`G^-p#GieBf@wiV;S-TK&(Ld^c@E zw8x9fENw79P^|9mV!B58JxwUf&mZ4^{-{=GdNv|MBcipl((Dw%0U7B2s+3kO#Cr;# z8)pG^hmJVWV~ZgQb(QN5jQIt?lij7ilJG!T}E#g`v&;SKo4{&^)Vo z)$hh8XWTt%i5G7{HC7T~D;R#0czw;$vV=X~7MPCFVUF-tfoNksnceV$fPtAAhcr*F z#}1v3zpnRVo8Q@N-+R{9R-f0}PZAOmo;|xap?xJJ1bvZ-7mA9+&K94OL&H}J*8OIm zLpjZUYKCO>upV>A`7f^r7g5zd=9v2IOxPj32DO%W3Un>aH6h`~)yYgT-@eh&V?Y$n zd>`{^c3>bjTI@GTjF&C^mg8r(H$=50FAs|g-4vBr&UlgZY8`v?J{Fdy?o>ma*H;ca z6bbhN><$^yv`D5sxmRxWvRokEUyrs$w0|TsYo;(t(@b>rN2w0R~aGmg;4h& z3k$6tVbReTGBUe|?VDd3eJ7{ROg!7iT`&X(=6$gq(Cf9P zexGox4o)et#*O0#G;Sg&^2o9E#HVf5yYL9qmfw(BA>;CSvoW*m=%mmfz*lOOZR3k+gY|8>a|6P>=&vgJ zXj_^ZQ)vsIPOme^7(^>-`m(ZlGlp^0@60?sW8>q87n;${&xlmB#BRn_?_rXBm~Ud- zWBO>T0WyK`!QUwfGgGuD-YGe!hA=x)XS%5876r>^#jnuq{s^3O6F{2<-X7pv-VVldrA@OnHWe+`A|a{V7SPpI zzxv#J(h7Gib#+8wHg@ie+B;&QqvJVvo9C63D9Blp68)%zcHz`Lsndh7ws%E)U6bMB zF7NG8kdn$$alA&S(?d|ZdMI3}cZHCJMP3vHJYkhJvjiJk`LA`pckf965F*dOTn;uQ zxXt7g7YE-pP=)T)b#Yj>8hI3tIUCc;@`iq zv9aIi>(8t{IwY5F(9x%weXrvem=ZG0>m0GITaY8IF1 z%S|U@YH532Z2kn)VI1-zW%kFx-0lSea7W~K5G28?AOgMp+uP8-J_AU)_0Yt_N87gv zgwk5LozcmFO+73*1(_}@u*Ai6jctEv>r_`eU}Yuvy{~C!TmLBb5D=1!RVHtbWELnz zTS#BX8UZ4~hr~qpvzV6WV@6X+k7msMPEK06tuUk%g@VGYZq@- z2E-IT7Y1ou!TW@4^=3FYH~{b?ARuroE76%K0z4joRk}1M2hmUeo*I8U6mVVD+kCh) zJtK@oEkQynls`uz#5 z%fPGOZt%X^C<3FgnA`5zSruusnYXko)zmC&at!t38o{I9D@rqxmwGs&ZW) zGBeo|J~^U;gfXlqS?Aj1OU=~?4%?$gEAbC{T|2EUEg=tF1*2gX7W-MwIZ8Aw==U&m z9xyQK?MXFZSO9Twm9MsKk?51+09P^~0D+`}tFcLTBW;?(wmY_25`G-|HHWP3LO7 z5Hw)n_@3X}FXd2a&~u(Vwc~EuU7%Xs*?H!KI)jZHS6G<)HuJ~8fR?7=@!3=cC=4mi zeU1`(sbATFyDabq?H<-xz{L@*_$4+#-Xhl9H51(-JL~IwplsPJ&cMdz*p229;sH@y z42Hh~2ZxUBJ;=8kpK5@7L003peVefS5LYh^?BG&YR|k890?YOG=7V6bk&YhZ=ZLyiCV=dShyv{w z*H{JSF2S;lh|kQln9gN}9@5|aTJd1%u&kTo`4?zKaOr*WB2ax)ERv7LrJJS-4etcC z@Rf0k9+mVW8D$iUdq@*6*dXB~*adYU_Om#JdmlslJd__d#sNf+e z&#dQ#oI*rGQch03a+IxvPH6xa<&cJojQsuk9>f+gO24iz%n@hG1gHQ}2>?=9g&X4> z>(A#KXT{w^+)}q!R?dDfXJiL&6cj+2(9B$q(1nG4LJWv^^Wx&L@En6>S&(CTWFmkg z<`r8-{_q{t-T)^Hpg`y?x7vG3j+ z$?S`r6@UC_c4+)OPy5rykNDQO;pMW!%p2eXQ>F&VgzUC7U_zbp-c6K7{hrN?i?pEaD)-TP;5%q&!gWWQ2!QEed zw#nCj{Tj|_RFV!o9hE=rnIPd86l`vEGHA>2fZF1UJX-x8M#GePh~J=@O=Y(#2gfwt5?*}3(f*w5Zqcx}{`mwA(%7(1+Y zY-|nhg+hd>XcuxPn436K3kWXvi^^Y};X#NP;gK3opHk-F)7T%I(|7VlgXs5d(j1x^ zNa5T(Jm^2r0gIn=@IIJFcg0_(3YjfYKNWCUyk%m{W1nk>FQ=He6SMO)2E#*_$5&e` z>%1~0C|&yP7jZ+RmLG!~%`MhS8lx}~%n`C!S$3A%gH5DhO-&-S+4Q+JMuvupd6f7f zcMnk00Z5r<@eq;%l5OI9Bf#F_>^#A}hxnv2a2@#6!}bc`ZWcdk3jpPLtHnJ+k#yOG zgg`gl7B2G3sg@ZpF-cQNSvM`+FwxcKhKf$luDG<+ZuN<0KLLWd#FdgPoisaItu!+0 zJ|Pj&Bi3CtRx@(B_@A_9?_h#YD#ug|4b!Wt3d_siw_LL*DvD|_-;mQy2T6e)PXX7N zAY~@uBx5up4tqfxwoeHFUa;YnmX?;XK9T{q8w1ag^92Y>T6{$ue;>NLQ$PA^XN*AfyEB{R&bS$c+s4VsG2>gwuz$D}%e?Y9J6;s+;0$$tR+iOET? z(~UuKESqOEQ0!W(=~<8lGm_th=R?u)@7}!wG%v*z4q;(ovtt0Ye+iQf){~ciMPe39 z?zYO%12)JqXum)Vr5a&bXz?X5Y;JFF2cd_h(Q5l;M_8ta7ly2XiAj8zldY{($^-2H zutWLg>iiTt;X>0KQJPL+p78+E)CS6}4PfuLr>B_5#{JtcId4$7^Z&iKLd^nd;rE!7 z2+H3A645;-K%odD1arqA^raV}ZlXtu3CauBx7?y4P-ESs4q6rx@tgShb1dmm=m;qw zx=lBp`jC>kgkT5@tAaqxkB1Q#=ossL!1;sVL7dohQk$Da#nbT7GJ&&=ghJ)1gM>&r zTu;^`EYBZ?A%hPS0Wk*C#9w2A z=(R2s=i2Yh*U?ct6jqwmmw?Udo$FM)e0Zp;DyIN8ECar=xNjjqCm0w2FLiF?+4=bRfQ-zqwGG&6b#)?=pr9aMh2CcXaVPhX@bmK%5fPb_ z2uc-?OV_rx0wV4?6iNv62L=jVD9FeU*39V+nAXvdkkyrx2G-XZQ0?ms6k-FVO-)T5 z99S9ADvV4_)_4SJzy{d1He(@@E07vC)Yf*l<(|whEX2Qg14bXve2}Y`X;?x4X(;Wg z=Pnc(3Z*H?wXK^y^q@sr(KvWYBLmnJhlhuln3yvfzYuyXYvuCo01r_j$!T_tuAQd@ze_tPd&sm^r2%H2_aO{9iXU((f$k4 zo+R<&<(TrsYomF{?88%Xl+ftDbqKw-0V(ZEX0|xPLX89=fNgg_c&=r4zhH%IP4XDn zx_VfY;B|!05#ICr>y}ln98Ki=KsVnfBR`Uy89qDna(^b2l2~cNM)u&r_u=8q3a$I0 z^e<`69@P(YX(7p=u9&*I9-WHIO5*za`-9S^EuB^6=F2oG5D{J$9;qg#;lH*dR#i~g zIyfM1{(|r&q&xTaVPp7S-W=VVhq+(1g_SpVCFx+{>aE%R}L-V{x`9)t{sX+Iww&FGB7ktGo;s2%*@Pw6jm!WvS51dv(y@m z>3`^-#QtyK^%PY^lI#J~WKDT*AZ(vc7nKE%=$ znx^iv4i0YTUe!l9E>G=3VB6AX;GLytv}BMAk%hV?M!m3pdlgVtFJ+3^MN8QC4bu~& zm*Ltfmi6^_@n}_9S(NwBef2@$mqxTJ^!Uw-4*dxcLIUFBF;l$GABVtJBeV=!!X1|X z7}w_d`a`#`u=b^j7u9KCFL(QTmlz|%e|srK058*O4FCPRXncEpikpEUNC;TmV;LC` zM^4z+C45GH3VH|X?yrD9O=x4I0iglz{VuXNxk8Pz-@k9CHtCLCj!5u@A2R&s9SZ+? zSlr*I5GmO?J8)`PS$G=K2yyVy@Nw|a2=m9MAm7&uIbOG1KEj)sp@ z8vOO=v>#~;PdgW9PEAg2ji+}%4{bd?T}3!KUCo>=%{(|=rKItQ}`hPA^wSaL};_Cl43cnZd=wc_%%)k2^TX($UV@hTX!} z%FNTl*2>D0!`0IIzxU|RQI!Ae(Z8Om^&fg9Ajr)j$o;2B;GiQ8UjDltaSQxE4E9}z zT;1)QEv!6UoLyW!?VRlV%yPAs~m?JfJ zD-SR*e_DG)!ztqq1^_ht1e{6q@QDZ*CSfifZf-7NUI89Kb}oKqE-q$pnX-%J|KSQX zcNa@93oG}Bw{VCu#13Ksafi4-Jiw1N#1rxvLIY6)?|mRv;Pbn8b`a;kKDP#cxr6_! zf-9~6+-U{=;)Mu6`2QBr<^S(y|Ml!F!GBfMKQ^FhZm;R-#Hm8V&&36fXacH5!zaYe z^S?Q*NswQN@89?23>uk4S^v-$r0XR@?tB%(Y5ddn@ENaWu!}Z(n$&7jx20fB+Ds+3 zc>gr-&7e|q>q2H$)(?(;rYFcrjw2)HgvU>x;86#B&wokd>w3DJPL3N(b3vniB%S>= z@$7m>^z5u5Nz-dLDF3aMe|z)!)y}+!O6I9(@s8Fcmb01+UpV!v5>%&a+{v76o`Oyx z(QdcO=%5u;(IVQW&J|{FSMK{yu@)O?i_9jV{X(%9PalL8ZY10mwo56GzukMw7nmx| zTKH;xdz=74(AMc2EefoFb6kM}876@>O9qp`p3S9n+dw44b6FfWvO*QNxBtB5!hjvM znA4@uK!o=iTQ#rUvgDEY=)+pIP!2hmaZjhMj1OO6A#IgFICnmRWO!9~64OqvUEZ_c z-W5DQir8=Te50i72PK`z;l)Yyd4<&EO$42S;?oRj0+D<|t9W*m48%s=on)1EuF@j( zxu_15K}tLIjlz%SL#gde#S3T(4REjwWCK@@E$ANPAe&D`O2y)#3W&_Rg)6It)OjK# zV0mToEWZ`uWF5QL^gvozqXJ*^!xQ&=eoSZogRgG@&LnEPoH!Fs>||ndlDx4ov2EMQ z#I|kQwr$&QY}?7^`?q#$ck8dMI(?hB`{}OR-52M14m+&!C}_r?mKp!M?bH@Wp6w~N z79=80-jw^?1w1+u3w?`&`b2%&;ww;<@k-1`PSczGG^y7&DhIg;`NC8G;0 zv)J(9xTEHkLpJveZI(Ls=`@s16UU=IEOzWs1#(hU_+Y4LxLPqU7V=A&U|o62p^AB> z{Ic7@=WNFwxj`|D64oei9TD%evc+^C6FVmZMz*Qa*nY**^R_t~n})7>BvvQq(hHH; zbm&A~*rTJ@vi36i!~NGMpjt88BgfWre-&+MZhKQ6B!vtPgEzw5LcvMI^qX8yQ%rXv z??nS{aQYx*l+8v$^T@T~W^@C%$=Nt(85aZ@h1jrUefF6QT^W1XN4{%q;q}7s!A&&fxBR_2+>R`;l4xG{5B^kBT8 za>8doDu(x?K;AI}Ai?rkLNzCU=!rcnptl%-ctCd9=wU#Ln3dkd_{LWRJp0#cK;c09 z{6T~(e&g6P5^;jj#wzo5i=p*58gSTgWR1fx+^+w58x#?^&#=m=*@V0u0=8s?_^*Zl zjKGo1=p0Jf?=9kAK#XHzEz%MyNE9&*xgQW(Cp5%LPbeHbXTZUig07bX?2=R!<#!KQ zgVfmdPd7gH=r2G>0Wj_zl1_RSlh!95KPuJkpAh};JU71z+=)@m646Rf?w$tWoa@l% z`jEJQR8(_&-N~g?`wHRb(GuuR4c$poF@q#^q#2?K+37gm_{(IMQ!#;ZkVWE$ti>N1 zE)Y%>x+?irBfU6Ve%eH4G#xZ0&{RI!;P~l;7(mO?;o~sFMl7{lyK>*Qs0!*~IOIsL zU>~v7AHw4Cy;vPR(_mUehoCy6!7+xv_KBFlu=0qcRG%Fc7oyR<@If+`j2Fo0N9AIB z;!1j^^XJ(e_rl!9`lj>A*Xw)y*+6eVe{lRJ>VGm2qJ!X#{)yW&O(~DU{p>1WY_N0m zlMrH>uw&}pXDo?pqVEYMFDOukyodP`WuKD-9;+c%zh`MrH4w@L?EV6pD;OhQ1LVv>0Q4D3Ktr{|K8MRVq4=|jc+Jfe z`CpaDJx_;B;5+jM>>g7+$+eveWeU0bBjp!-gOT8;F?G!t?V4%Sf) zlW@V8NJoj@k~K^!XZ%_rVj2S1*7p-KO^>naJsZu0uj3hj417n7g$_kV#tV~PSRx!! z(@j(eAv_i>{xul$M)Z8c?&3kN9Z95JsiQ3{v0t`db6w!KF9leA?_~Lm&E(f`D0r|b(E)~ zPy?8;c$~o$#52;GRL_yDMXN(mfrG4<#X{F4}Sygj2* z|IU3U_mTeDP~ysvE_A{&_g5)<6pj$Qh&}FH!TLqL+(P22MgG~r8wA&P+q2!*cYL7p zXnE%;*+K3j5V-GLvZowB>M6z!n{t9z_O_2euzdWpMZf=9_CgS39M7&o+Pm9FNNVZ+ zIwC@vP&$z{Bv4z?H6YI%TEZLsS3Xe)pGcz50#UuhA49!Qe|Y+*PN?)|{F;zD5qYCS zEeCt2%}j&cLrP#|{Ikj7z!>=osPEz4xd^@cF;nx%zsr$Kk6^O>dk}o@G)kA<_}{25 za8@L@V8WvoaBoM%u53_8n62Ukq$iO9$6L$ph|2;DLx0pw+99QoMeuyb!e2*>b@J>} z@iO|Bf+{4rDj~fg;M#-)J{azq2D}epq5*O}V%7Eoyn31UMHwu_?^nK;mR8_(KAeh{AD!4P!`=EEM~%H%jK% z!9zGujzOQb-(L@Wdj2KSC+KxBaj@+4TKw_oid>L|tVbC8@#9mbIc_^@VIKz@2oF^e1{rV_Lgl=$FVE)#o`#1}V(TpBhlu>MZ&*${|w=dsAR z4g^J@A=-ukUV`1CjrbrT8qw9580O+X-{vA}d1OH@`D0gUcDUW7esDg?pkwH`T)5cX zoSuw+XcTwwVdZx{Ai|+^d1UppwPkl_l~!2Z_|k zzXL2r_~nSrcua3nM*kLvi=k?MOg!Cm26;)i0W7w^z94c5fxgWz8Y|T732q*IY z8ZR2O0}zF#VU*D0P88qf+l$-#!^zu&oe>@puV~(SLQ$RS5iJu@mPbcTo@A~N?$bLr z-6dGxAuSc**&;J7-%mFOFHdYAF7@r&3=KAXBF(dlRz`<5=jZo|>9jRZ_xf%u+!tgp zqNH1b`yja%FFL@mk{$;#N5>5H(n!wa2*ocL6jqC~2|C)JH~pKF*ZxL}n?JK7FzJ_^ z7K59(9U49*Ld2u^FX0X?>>Qje+kDc*qwIl<(MDQ}GDUaw`8d9uywqb;x}Ez5uGCyL zhTL+)3!C=&>Pi~8MR`68u6aqc&p2xT3g6X}!db=49!qy9mvQ0bhVXdCy zKSb)cOdoN$Z%srute>N+$Y?1vt<5Jzf*ceRn^UunOFr(iuv4T5ZPH!w%3ReJ zgW88A4Wtzg7NJA@Y_s{euo;=mQ)+2tShVv#xoCY#({RToD>W0@QPS6dv8q^!BWxz= z28@d9WgX5DLn-5thRseY^_|2__?ehk304l0L9QQEVBA3#D5kEsra^s(79|?%hwzr`Kap3*%f} ziT?9&W69m^cQ~i>r_`qor`Yoea$Akub9OlncpqyF(kjl9%V%2M6ZRvsLyeu|-0DPU zrw;gd|A>Yfvx4-0oS0GMS7#ffR#;(;w^8D4b~LlO*0{HMJC4W!$3hR`fMMMpH)=%` zW@w{4rnw11x5q(m;7eFr5m1qLirJ|8=|Xc%bcsi_xE6nZSldF4Ad8LN;GzgZ zL-|U9K&iXXy2QkRhOPYt>mkA|>HON^^nl*;!&W0x>ywUGYKq*1E7I~^SoK-FoJo2j z=UEI}vDEX`UE>1{YpJ`$CYJG?RUx_7;E&XJ38Uz|^TMqEm!3J3{zOjG8!V!iXs5@4 z)dTK{gXjnUY)N&F$hx$sNMx*>f#Cusdpou(8$0A@MwWs_o*1D*wYKCiCcU6&$}F)2 z1&3w^uw`^DJ~lu7@PRxfDgu4ev5Mw$bCVm$+fmQDxND?yb#x(CMC^V8*O?fKPVav8 zj{m%V-WYt!yO>?htKgjrYdPJpvvo#g%?Roqb^#NOV&~-FB$!F)P>Wcb0K9ftO$qJh(o{;ny!xN56KEkVdbbHV#KIs+UYh4*=^RqT@Rw z`pVrjg^%DNhww9(k5rTkm(!-s6sXJnEToR`_M*$C&d^jgwb=I9OXZ?uMp>P-t5Z&t z;A;!-6ti_z7xSq&s+ZxMmMOA>ycR%WmdZO#7V`J<58~|R#Nkr*` zv$U~e7&|svL|zi)Cu8OP7D@0DZ;nE^?6y~Bdit$@fZJ8&;X@f(vM5t}%r$McznYjP1eM;en92p4H*A$dHVa}>dZJXMcoVNJj*&89+*O;w$TZHR3v6n7 zUwAirp1&>LlbOyY_U6HUwZ~h%Ux@Yw&!HW|)OCl_b8n`{z2apfved}y?Uc|!{3p}Y z6b#i4^@Q>Ah0qjMD2zp9=}=;P8c1p2?r`id6VBA)sG#m}K(RLtSM)8tj-}H^S&2&H zKMofKvn!0H*+$t3U=_mBH@;TV7fc{D?kX~jNX@5&>{PtT`I&E=QWL?!dQG06Vub1t zp&um|QSNp)TjqAYB@85RO%P|YZ2_9WiuYLK;Q`l3Q`T17E1RFJ)04wUNw9utI?#c= zg_!5;yF-~F(vnLuWjMz7+Y1zt^os!sSt2Bu@+Nf4bT#9U3e)Gxx(#Vk^NZ@MKX-Aa z|FB@mkhyKu1MDYfZ2t=`kJev*8!eiyh}NUKlADqZ-n;8fzl$csPsFC+O_irBt+~Oe zhB#kOOBtRBB_S4rE0>!cry=p6Zfm!rk(|Vcf13qBoaM$iMhi$2(i#g=e`P@FomV`e zMO4$$4*_bd=AsFoJ&xs%iIFNUdrd^#<+H#Tx`6FEV%X-~E%U`JKeNUS$CD)G^s3Fd zTPiOUH_=pP!&YZUdp84if!5D~CF)^kWkFP0`cptI`+>zNr<3HdmgHNE#ppW7>u6{UrK48QC7+_c(kQiq5)f=Tn%*D$YIh!UvUu~hvh2rV%ss_(s6?Hw{Bc2!5!JrjlySs!-4~v4z|)m)wm#3E=yV~y0DCPhVPd0D)% zKO5fF@c301)BBNcGiVUZz-!Zz@#x^IZ91DC7M3i+H{&Y-Q$|%nTSXI1+6E0}07Q3} z1Y&y6%i?C%vDjsvr+7E$Uk8EH4s04Ac^$j0*$ZV``@&NWE^a%op&Xn#^hF0~gYikf zR?S!2STq1v&Gj8yQ>Ui^g^Qi>Ris&$1edhhon1xYQcRHLopxKt=Ygd z_;KolH!&kpH1)jrY5j2GBlkYv4ro{TD_>ea+F;m!ONxK!EQ}2`$-~`YdxfZ0J<=Cn zql7Hn^2ZTVL*U3Z@0X;6Amgp&&&Y`Ay|V_FQeT1g{N{wMNIAAzOZNYTrL(%F`z4SO zu1D#psOj)TPK{|ggsTkT<{I#IMgMDRbG}kZi*H;ajeNEGKi)Ihd0h{mnY?oJWjNl? zY7Zw$&f^yOZ=*b@i~B~i4lSC(EUre5yrmAAV|NETEt1UOw(Y@^YOgGedd+9N$nItdLY6=@yc(E+rV+dR6o z&Vj9=qdyj<9Ia$Y=)Et<8<`kgrH*j37Iw(J&&$86rmR0p-7XcjROr$Bh_qGLX-L)R z?$!S%p8lrqsAWh+@@qSrwF5jpx3$hgsN7=hiqcoc()Jre9a_{uk~-AwAZ4g6;zY6k zxX5SA>Zq$PEM9Fq=L;x54m~QXonaE<5<@K`RvO@(l@haBmUSgAy=uFTpOD|aik_{5 zE}fMwJ^hv48aMkeXL+BupMNdcc#^ie)uyDS?`Y_9)eNe%@ogxFPm|g?OC2q^R${LG zj!LQ_?1JUJ{=lq3U%s{o>`uBi@O)+`eODy_uVAGbf5Tj%koZDXHllRfi>7l+=$~9c z02pjS4@GgaHFbH4%Ta4(@e&$MsMpzR#|KuZg+vMj;jKVG>Xw6rGi=$x_;yrSSj z-8t~VPHTs;^c>EDuM&Occ$9|G3Eo^uU$sywNUryp*_hWqOTRpSDQXY8|Ms?=p99!B zgYvqdfoPbVl=RhA%mDvdhVdLImUtqRR$hhGbCbuey$-SAkYBfWVU}L-L@r?L9NbrW z{w}RthP0ZFEokaOvR8k0n|5C2{MJ8|W>VlpUUmjwTR;i@ep2Lt)yo`KPg6XdVcGVO zYru7rGxs;kQ?|YFCBT!E#MWOga@&CLZOhEUe3)@$v1QM&LL<*H&YzlZ&*H^pOFqd| zlBe^F)1?jNT2Xa8TAu0oXTT!>`p#g%QX}BC^;Lwom_wk>LUD&Iu=>g42`RD^n%+3C z%JX;rJp6;Rkf92%o}KzL)o0tKdIH^z0uW-i!AfFl#(TKS^3Z|!1ShfOC$3&H&5{~N zS`^#cwa{IENQvT(|xVO+<-&iJE2YJLs(zy)EY9m!!(Jg!n@uTye7es?1K zoVojq8;%KacP;k}Zy+P1O}b&lDf85Q(PXQ!h)AQLX6i+6)^hlMrC2OIUO$MSYI#Z zI_j_l*LtW6t?zv9`hy%~Ji-Ab?>aw+3aCQ3*n|o*?r3WeD0hfr6)1O88f(1_-B!Br zbhdC`X$$?@P%m#EUk@Q;FHgUZ(3k9$BewbcHnE9$VPxI;utupYM+De4FH=UbI)dYT zcunG865D(u`3SVn8@D?c`=8)eKM1=UjLT0PC@CN&eND!c_i6Rf>&^888GH+y#Apol zo34z&a(#=lP#MzfWIA-CGN{tVazJ@NQmKw;ZB1k(wrR4z7)vSt-YsJ^u}Ez&-?t1l zv@w4Cf17WoFy^>SmO4_FrQ@fl#=qAnf|HXQv^5tpGPqQuwW*Q6~VyL2ZjJp@^l@IQKvfOB`YkYT4kW9&?dr%>^h*Cv(70gii&0&<^p~)JWqzP z^x*;p#Pg#;vb*fNkghU>K8}nmk2`d=ODpmz0t>I!=~cc3qhprwa#xqPioWl!^Qird z*Wml&e>=@YjCJVmUFMNeQzs(>sxi7foCAr_b_OFBnso$^&Z=|oxWA`|NagS6#a;Er z;^=1dq%VHDPM>%8U>|~brOQc%V*xTIC!P9SH6 z-)XOCg@LmIXt#AMi`qJPeH^Fbnss+SF60GGz5Sy2G{XHs%XqKjK z%U71EQg46a#B=vf4T}1>O%wL>&eiVXo`}0prfApXvzart5xL&kMLv~{iTXa<4Y8hk zD{aYc$!{rp7k|ok=6|*yD?-w3;bS^?TvQacySdg#U@#d*Zmv$eO3WSza=(UVcB6f) zO_z_K+7=$>-~PY~ZzH&_F&Gu~n9zT&h>6=VfGqv4SKcBEJs^*I33~E(+7$WgfkinBf1Gk)sr8*mMj16dEzg^)FB*6r|n%mPo+@_}^Mk|BDUr-;D&bGqU}EMuOScSpFaV z%0*A92qQ6NZM{g7ArT*p5IyuHx;!c6KkTV?#Fw2&vsvaA#%X%GW#wh%FlqKiPG?Go zX1c=8f8+(l;D~kguzr%LGDuuqPxm$bLJnta0*v6$q{goNc4e$9 z|L}auY)MVTv$NTl(B2k)3UA{jm(E~2;w0Z4XjRitDamVUx^64cQcGa@JWxoMsW82Q~**+D^9iQEogL?U-|gyLW+l+Jwnda)nHd z$@|t-&zhPum*p+6nC5rSCBS1>QIscziGty>y3|)#L_$7WGH;oO&6UBZ@U6PB@~t)^ z`qu$iu%&X9Mm5mk>%uS`SFf>jOtlhC+2B<7QtO1k`mMyZj>~#0k(o*&`N0It_=^9z zl=uEhM}~(glL}q#+(mQT!Ey8j!8Ag}X2EvJ2XRsmaEW9MoW*WtE-0g|P6%X-k;&K6 zW7`+eVE%m};x7#!Yu^w=YzhZ7UkU8hK(AlO!7(6g5_#)myS2PdGQq&~F8rN7q-fgE zEGfr7(Zg?6o=YgqE#YHd#p3L%EW>JQmRca@EryOarMw!E^c=1P;QIjeSvaJ87 zS&3rg(Vt34zuL6Z@bSo8<(pLH@|@pN{jD~9#XB?t4O0C-gX~#5%6e_)*}{TU*7WXn zg8GMWl0NwHu*YlVhAk)8UHY3;ZqP-~7#1TTXrbawpR>uQYe>sS5RKQUZs9H(j3$@U z`^-&24on+~Bim|%-UXs+NUlLq8qfwoLs!z~emEpyi=s71xx<3fss-9>K;a2Zo2sgl z9=mh}9%bZV`-7=a0sZWfwxg2Hn8})abJEf6wOp&`kv{6+|>M^!YWN;*o+(-etx4RQU4NV3dan6&`|9 zG_gtOHYj4}H@0X~&3C~t%cF7nS$%sn-BimX5m)ZCD{{jwf@IaY_w(SoGN1P=WCbPk zYViw#T+6WrRn}$N-piTG2X!U<;Jt@Lp&2hD=?tQ!sHb(Uw1((E3IRuF4M!(SHdjqT zPco+VqrO>IKJV9ir7gmkvwntjn}^9itT%PlXQeocJB8MuXgp-Q)>+lh8zf2Q6O#>! z0^vl}pw)a+CZEl#RT}|W3e2*c@vyAWnINKRk;{RQ}so~ zb@#Yg zfEEv-1vC5AVT(9d^vK6ELn7MQd@r0}VHS#5Rj^l_5%L{XxZSSCdEiHo;+p9XCc6ee z4J==t6sH@HXgZ|aTtCh;Igk{eP2q+PEF{cFIQ<`5!}x5`j;2fG^sEf$S{HEx+pcf^ z1|DPIEHLA+y@!Qs*dhx#FB9y21(Y6tDG)s$wF5pyn4pJuBy8;jzEGt60ZruCbTjMq z$n+))DV12nMWBYb>lr=7Y&7fEvBBCA_S5tR=pfW!SOW4ID8w8i+kAu7mme3aK8bz+ zTKPk$VHRhX&}igO41e=q=-?i7gN~qO^N!&XLF#XgKZ1HCtPk$YuBHI>FW1HMNXC@m z^VT3eEXLVhIz8X*e*UF2-C%SeEfy*JeI*EV5D>C5EI;5PKhOksMnEuhHuykmD1K_m z$|V>`lTDnbn>Vw&A+IoF-!a6)b(C>K7xWXMr*bI*2~#i+0ydu@Fu90Koo9G^fsson z=dY*=++tfgNUAZ2gN#JnPHy}rjAj*jQ9gR zYg7L70M1C&^jdIeVpcjVrJ6H4g&fMyVZp2Q^pJMJLMt5Ox7vg;{6W zo^KuslqI5IMnE$fy`*P}Kh<`-g-rJvhBnYomU}y9m40%0_BWT*nw)f25hASu+(--D zZis14irT$Ie&EUr?(20C{01MQpi%f)A@<#|*=rUxm(2WAL@oE^W_1~gLY&1c z3!E~z4VEK}qUaRGWw=rU#28mcmskk?t` zTEfm#F=*w0o)%}N_9*`WytnHMhhs1WgwLE;$XJ|VShtWe3c_g!0<`rzYzziy(b?VI zUxK>f8F@Ae$aN<*x>pHkul&qH(%f&R?*0has-SNEggCl+MvCK&BrpDYKS)k`D3O`5 z^lU35e?{@5sQ8L3TN#$OTwAXBDBrM%{4!O(uy4@Ht1n+dq>LCaHOERjp16j8wi`5F z7akq7SZ(O^rGK0#KaNaZr${f?5P6DvqD0aJSDUZ6T(*2oq9HIP=945!b0j1+P@1PR zu}h?}U1%w5RW_y>O(f;eg)VIwt8&EIbjZ}QV^?4T<*!#8d@@xoCsSExAqHQpYHp>M zwRiF{p4S$(QNm%JQ_$xJhDDrRhB2tl8RV=ssLX0qVvhV0l+tRNs|SYq|{P3EmOl!u2oT5*PjIj$c^{|^1pcu`5DmBy8p<#WB#Nv|MmND44zsRb( z4gd*ls*f6}Gb?P9Bxx5KV7!WK(V~)zl|(6tl6E2eOSVtfvmePo04@pPLh0W4>>o^S ziz!pofe_9(DI2(Oo7KHkMK4EUZmqe?Vly6K14^!vw#RnHk_84+9~Ue$PS%%f89I!@ zk0MqQnMv2PNNcoqbuLg-5;o2>45H*|Y# z>i(kOJ|CQ}v!RVI^=K_kKucfM$-}h2t6$6rrHwZ(A2U2lfp8~7KShOQ?HIAXyyOk*0ybFcPer~j-CC%0 zm;XeRN9aGjiWfcWH1Ivf~AcNHLZ zd$ao38NWY&dgieBbP}xMCF`lwS=^PYy_Ko?vUS=!DN}Cs@Ym#op!mI#r|>;>EUg$4SVWEW(JQtdVy{q?Vx08_*-$&v(OZ}E+r-1R5EO-Zi&hcS>DFE|%srB%= zw1ilitw0aKFZ2P8a@6Vfm)fVw0X%lekDtGwu_qzb@3~e&@80gnNb%}eYFhI-Vm96J zF9Wk%JS4h;NqH~*XM~%QmXZyw1^>oNTA2<4w?eFMRo~6(eoWH@te`r3(7x;gMQ@0$jl1{N@s5Wi8SlEQ|EDvS{kOmAOztN|Nl z;ayul(@c(bPqz$vbKZhZ*u5l6mp^Luz%8^|<@^%|u1jS3kiMi+&SX6Hn6l9$x!N$^ z(E2(i26G$Io@^;E`=RwvIHmE@;$Qm_W&@$3g{@*01yXS5(67(^_9;N@vId={{?pph zEj2z9>`frJ+)m}})s7_-%t8~PswiGP66cpG2iS-MV(XaGL3NdfcMKnwfbB=w!CcCz zz2aD*lDRCXS>o7~6^O#o+jxAAxguA?DA}rSnVg+YrWB>m_uTpDjeilgWbDv1e#(S} z2TIWsTQRtZDl_OiqM$4b)#X#2SYyuGMI9!ZvsahS9ck`BzTs%?fvOL0JH&Ine_6re z;9q}8@T7d4yH(0;40?!O5r2?!H~z)4`(DsQkg`6L_YW`MHAnm@W0OR6+08U4B%zOP zrOq~^`JbfaOpf9y-09G997f}s+pnPy-mRZguNG&VPpo8j!;%BdQhw)*MZ&~b8jt#E z7o#|dSeRsjZ1K8dBtff8fcnkDC^Gj!!>)SmbAl=iTb{E);-=pe?^l?+yCyPi%+dl} z-0><0A;rV5!2|J+bYQ9^%6N*L6Nb92ykaSA{wT=Wdczzv?}9nlH7pTl`I!yO)m|qi z)i&ClZz69zY>!c`E-?>0MEJ({1H2z0nNg>{WctT|Zl=veSNN0Uqj3LUmMSVCbZcE1HW zJMCJqXNDd>`zMgFQkB{V>;Zl`jZVD$-=32)@ zml?zL*iW{rjzEOAU&n)Z{VrX*&}sC2(tiL5|1Kf&?5HPe0KH90L5-E|njhk*X5e=b zD}_kynnvAg>f;XUM@x4QG&W5E#xq3Thu4+6x3l`}>srYM?IPbCkYbu?a+>M=*B*=D z`jfUZK^xD*VTYJ8anG1(>-~$Y$?{$ zMd6K{&VFeFv$edQX7eW!8V3YY9#;zUKOQ-va#jbFOY^5LPS|3iwl7{yKug(zjN?!9 z9PyxLk+@39mw6o=&Sbe)KNl{xl&j0!hfPK!qVi8>xWX2d^&8{v8@_Q;P+WN@^AD1{ z1H~LWBGel0V19!aE?}1e;MW?N)XYcM#>q@FsE#W^8=_QPnLm|z!DNdX>}$y}la^_V zJeSf^Yl6aQWJG4&KF);wonyzT_y;^0kzi41Yrq>VlN~lcMsz%+awjPo=$}z@z zg65Q*Fd=*FUf+3eZ+o`8w?tbX(R^1HHjXXK#aa&FDe@y#6?hTFcplb80DH; z7p-jy{>f_ZLoCklWT-4BvOP4ve9R5Rf?ef!DRBsnqa}B@)Dz!Vb2bBmT~ZH|)aihE zY(7X30QvSI_y&ofMW;{t0*{*5oyIrlZweO#)F=ZQUnC@75gdt)B4UtbP-NW$y(r!_ah71+gawbOT(T5gC_m!iyD*nH)-9W&LV^b4oF57?+%Gxno zn7@V!GETOh7%uxLMw_E>(L7dgu4Wta(#aLv&97JIqAmN)ANAHVZ`6uu&!Zb9tIo&RfVR-X}rT>eJk8Aj$* z3BKdW+}sY9WU!lH6XlVBW?SA$>IQ8`u_e;}GK?Jq`WFl_ZfiA#+eoTK(>CbL5*o zoMp7#p<-#OJWWzPskUDWsSyHJ$#f%l8DoyLmpNj8>9z`Gezu%*OgAoDOCplF_Y&;} za#eM&%`0(77-k?lot#RmEQ0Av;wb#XzB>u7=6YE=iiRh;bPHT;XI5 zZo$FlZ<@y41y)>g51f)b{0DQTDk>(p=1wP|{k^V|M`KWv{K&X{`WbH<@nRO$+--Pq z!#Bvq9_l;qDjj*`+0vfH4yreEm?}?MKkefvoxX7*Sz2y2#aukuIgVOf#BOyW^@-qh zlaGN9(O$M|#AGm1x~bt^29&#UDAqf`p@&+t!$^{9_b5h4*!?tzOLlmOdp-KDm|2ty zbYLPV-uR&|Cq9v(3el9rDrFjyXthZ%I<21a2#cw_AyA38%{c!;JY@)T>`2dC(R8;u z+vR<${k_p*Fhcgvz{%shwekqyWluX3;U4H~g+(djbaJb>jMnxj6?1_k_XVY_*;cn0 zSwb0@&E-?P(Z$gJg)z*E=WId@t@Bc@!JBuePyCeK7|T}mlw8FjcG2p1QZ`Ph<#aN) zs25vokV%+c1Nw0yzqlg~x$66~yz|#Cd~uzrsL(J(UG=1R0BEV^O%~W(d>PZZ>4m?k7V!su9NDK~Erb)xzgP|8-?NmG(`HjP;qm zn&$5E+Lu^y%?(U&FV?StSda90k93LTZjS7;FzJ-#clpvN_DHa+AH_Tu6`t1L?l8Cm zv+U_pV?xo9I|YOpKLI43Cvz-SbnQSu#wNugth!TCc26uwQ(P_9*YV$~iWJrM*IW9@ z#dlQG0T>3z)Z3GUQi40$^%vm2t#g~I0Q)lROO#u2-pyPD|5V)Zm8b_lI&5@vOG&q_ zquv?>H3y85!3dGi&rFoz8s|0ChOY^C+*;-Q>@;ww)k%aE@Ao->VGBn>GFv*s%ZbX}c`@8h)9kRu?Bo=BI?x<`I*8f~Y#BcH zAe#QPkVc4kp|$gl?K1NhM$AuirT=Pcl+NYEg9~B4nr_ebozdzO;xOn`N;y9l`Cbyw zM6${QvT&8@3JZ!7;?m^B&BZB8DUCwq#?(&ar^J)(f7GM)CL;ey-0hHf5eY))Hv>VY z^|08{2;Xk-AX6VV$!nZwPjX$BXDKZ-mzP*9NXkf0x-nJ5He1bKVB*mV*2yInhF5Po zZLnN4d--lx?WjD*tj^jq2#otG8uVR-b;Sw=BY91jrzhG0PgOp(B%DlW-xvC1Xg|iY zw@ts0Z%zU4c5trR`bwoWh)Q7>Thp2!V`_|IJtMKtw(lerI{q(%lU(3xCw08O8H=p< zC4lii-KBdr#>FSKug}q^UnRV9cTZjiXW22HK=4UmMCUR&tA8BWcica3CSh+#F~y%% zcguS7HU!@4)jZ^l^t=vz%daKGwGcdJ&nZkrZ(}$(@2wD@ynS4i!|}ToFtrlYjO|rt;-qp3U6kW=;radpQZUeJ=5);Fr8mZ)tFSymSklg}xF$ zyr~9#k?+v@nhSQE-~;WOIUkGo{4(7$Q;=m@2E-A0lw03Wc!6Fn`_#el!MDO2Oumk* zsG;Cj+C@D&FTjT_q`aR!IM}%XwMx-d(a#eP;z~!NcdOr>C@AV_Ju`%d9%-d+1MKBr zrJtxV*Tbig7SniV6cS1@A@sgeV7OV5`rZNkNN)t3^Mj8&=a*u!q}QGyx%lr>pS!oo zWZ|p-YIjd`+1)MFX_Dd(ENe5Y=)>IKf(ISzJiAphOX!;DcKc)qE(!2FPVAm&mr%nH z8N7imBIsD4dLVRuMi5d6V<<%we~nA_fb_~98`Td3**|TT_SFaY-v}vEO9Q?}$c8%a zSG^Uybvke8S-T$8pF{(YXS`YQQMS2*hItjeb3Y?r!F+t!$TNTJSmtS9aH?V`s8(PA zih}D;h&&cV$Qjt%5gE%Lc8l@le!rWeeeF&^d~ka;mb_s)fNF`39F0A7f8_%{Kho>2 z@=$bB=+lp)yT?2}TixsKOhVkFzrr%bawJ$M2WR_(?(FN?^ikc# z_Luw)X3yzgZEwJF*U6I>vQLAQs0-fiSI+CKE$2Iw)AiH#>xwTW?v{^)Pv!;jtonFc z{Iib3`qvbw%ulE-5cjJ9f#|#V&caV^HaQPz{bGH=JI0l9f1PYRD=keWnHw37lB>7Z z#?39IHvliv?aI;l-l}@pX~&~O$3oSV1>0uQhbw%LKC1i?OKPIbFa0By$YO%7R}1qe z|10R$=w5*mvMbqKL0?%X?KcBFmzR*xWMWJ0s2W%H zzC)w@m2r{UWPb*2R{T!O61P>^hq~NTBf-Vs51UopM|-#tiNNk;7;k#cC*V)+)sRQ~ zh`#ZmQs<*hVNz zT+UXRM|g;5t*dauOQnCT|Dxs&4@C;bmJSzl5&vz-x3foBO9xks;K#FC(uPGc1atGZR5+RzI4&}7yF@p(dNaVs^!I?$}7h{3AQl)rMY#8 zmeg)N4|SJiUe|{Mg+5LcUB6J~$35fArnUWT-7CA~<&eESXnVa@qWj(706*=NAuhFjsSBp?+j$I;n3? zN|Q$=RM_i1?VN+Bu-N^AY6a$jZpa*Q+Wz^fw-yiS>t*)g_e zd$7Gd4(Qn3=A8(ahEi;CeGD;w>Z@#2*bU3~h79*SP5b~P5lG8$jFUw@VNDbeQ*Us& zn?9`VE@pd)sN1zn>()&FsQaS0(}?dk&|5_XBHqpwA9JY88UXFuJ}T^26JW9QXYWtE zAG58sX`H1)s4oMUTuu}m8!yOZX{Gx5)j6e-ta z%f9JLinRoP%wAJxH}b%P)rPh^PD&Dd;x6(l?^g`(@b4z?`_|c)_;Jy;gDks%izN^X3_Tfkd#y!!x+8kitDO*7`6HdC<%*I`JAs?hT&yML>UN zgZbYwU>F(L7=JVT{=ei-{~HPn%O4iN|Aztt_>Lm+e+NUT)RKxQ8kWP)!zBXWh28)P zlB~<`UR<8)po-cvbZuuxc8-wXk7Hj=eY$kkFS)SN@H1i{_+t9ffIyKT*pmOvuos}f zK=^G34N9lU#vj#6lAyeBoBE-qsuLQ=l^q96Umu-3^v7HND}rh&fU< zy~C&dG_knVR0I9&r^U~xzSZ2jG4@JHm-y|6>P0IQ?ZvG@EWk1i6Bq!{l#&?1@;AB* zNFrRVkHlqfm=kbG(MZ$~?y`anq_`auNVvATh8_G(JNy{wlKdrx1$IwpFL4h`BSUlQ zs;XGeg@JAko++r6r%`yl@3Jyok3AbcwV#aXBG8KFavJ9$A`wvzX%$MzM~#kFpX|Jc z3@dpQdXPwyZAXB%;`7|{IDSgj%rpGUN8nv;8{KxFn3e`J?bad``8C8$Os+tvyMV-D z|7HFB;E%`x$JofNxe&D-!K8#upoSC=ti(TvI9F)3UuW?;_8~?P^2YJ`Xvw&lTq-T3 zaSGa)bTbLF6DZJ9YK#&e-$?Yk}p@LZie%GCWV69SK} zvR0dU)TXwcU%^E0VzAIU)nR9?WdOiO-2$7*1;&QOBps&j62$X(3Dx96OHV)kr=FA} zk~TM3f|LW6S4dNPxGivCi^QDgdY?TEc60lPMI2)#S*P}vev&o!P00 zVi-dBlR~1ujBs*1i6tHNx)N5X^*kfJ=EXFF2?=4)vWC&X%G9K8s3VE0cF4G(BAJ^7 zW{0_rkZyLd#eVk_nZS~5f_6LUqW1#~gx#G;WQo=DGoDSzz4C#s6W}as*Zaiau5(v~ zuGcJo+*IqGHGRS*!XBtY!cmqzZgODa6=@ELDTv{rSxencN=~^XqifWV8&rHlkcB0?ST_X($m^Smw7Y1N>s*k76;w4P|tt2y# zVeOD5BTG>OdrGcEKzGS?m)=#8;twgGmQv2Ovs6bNb#5{!=4X3BqBXzqynHnX&J?XZ5VkC4i+H;SRfYton+|aRj^Ox-0`H`Jw8%aG7g@g zBL*#$pa&yOa>)cGiankLehrhD?~TJuA}iManABStlYnBxMO_I02TX<6iAXYHmnwpZ z9Z0p!!Vb!D3-$&^C`491I3HrVcq)p$AJvA!G_m?4SVFNJQIS{{2AVVxMd(D5Z=onM zgW`92&2&_URbk|Qs(61YNYlvT*o{?^5(uoEB*;&yW`w7oA*t$;>p5>bz+on%LiHxT z`Xni@Sg(9^K=k^AoKQwP(X??P_!yeJoxHw{rFr7y!rw?#>Rpv2rC(v4R6LG?*<1Ts zamd;C#ceU0QTJ(cI*FTb-c|ED!VXhF8O?3wJBUEJ;VjNztw9>a7@Bg4fc9hi{cPVvw*WEg=gX&4CTk1#V{pMgBcM%Jf1)yjL68~o8V3T#gURh9JI4C~z4FMUGk+wIV12rmv0Y<{m%32w*hD4EQA?rro zki9{}&@KXXiC0vK+$Y`bC#XD4TU4`!d0!lB3#YVughm{#EA`E4NI#CeiH#?IdI!l~c@# zF#MY&E0f$J0JK@hgJ$qbOSNsJ8e{-PN29DWW6ScLkO|PInzTvH(#&9y+ZpLgE`j_@ ze~#~&p~?4)X)5pbbqiQ3WSnmPBM2qWQ@fqoG!h){FGl_1{?BSCD}W}Z@xknO&e{g! z9ZN`6%GDgl!HssAq!9+rj%c@sK^ZT$(2@q$znM^pOHx4~g0r(J45q;%Ai<84`(mB_ zOQAEg{2}W>1C+K}&uUWVzK+va>KPrYg?zvG(b@zQ=TeMV#53;5P)BE=v@ppzph5{Q zOgn_#vC+oRO4+uVBFixF7zWk7hJ{f^T+_0={6KmzS7k`<)%$;=7qms?XTi*H2qA$c zWI5t1YKF;RBu(}twiA{{W@+s^JCL2xxB^v8{Xn_|$n+f7cZP}eMESCcqaiw^Iwcdb z%ns4_Yo!|_i9w1CpNy9vz71nc=}FLNAuS|>&noibzu}RHWm6>uYE~R>N!kjiRddSn zT0oly_?$83JfCU=+#Gd{ccObeu-@+T zubTSVT{|P1`+c6*#cy48hKwzShIJA<*IpI&d0YzmHis*|#ahh$bs^yD(DfGZdY#kf+;<@0>~JOOPhTByc|9DWxQTafP3*gNf8|_Xi*UWX+K6BOdWeIXvlG}9*yyDON1m>}95@-68vqV$ z1P_%VUlQOGWfQCmvl_5=+S))jmRYKQU*&6)n_;M0-+6p)j zTwm@I4>q*L>SRX472Wf@?|Oun9J_Sb2!}7mk^u`K?=?QOLQ$;Ae z^QG|)f#n|zAv6j)R2;a3!mYg|Sl_8*Ft$!WJOr=EZ1u5ad8aGgC9YZtwf`Mew9*>x zPg{zF91xkV%jn3~F$rugo9BYlc2|4j8y-WHS#Uvm$QI#0n&mng-{M+T<>Tk+@Oi#} zTb{}7a(~@^*4-IH6fZtto)E9ubR~Li>!c~7GEED!gUwyj$%EXZqLGtW)dI8f2e{)~ z>-#15yrwjbtcuOV6 z>R7IUjUSrfWpcj7#8^z3@>c$nNpn#WV0lvbR?E~OXXcohu&gfw$!g)7!qVXW;f{TOna zL%Zew@=ro9%cwcaN`=j?a)ak7j!?gOnn{29er`!%wf8a5&7Qz_ucFFe-6|!#!M#vv z$ylxx%}4Wi8VCu0pj4cDTC~HhbF)rAOntI0It0U|A{Fe~3doeZ|2+A?!?I&TC}s_1 zQ?;P2)WGxWI$C;fFW`!2wv)|opd4G^_B;hLKkWbZ?w{drJ0t{i3E+APZxY9i#uP>N zVHqznRg~nEn@dm$8sVsFB}L`O(ep97ZatJ6iAo7Cz%x#%mCnEhMEj6uArwrXETCjg zrVx78^>BE0iPTv7Af(1T`u1kIq?oo9e)eapfr~x-aG?qH<*qHrQmNW_>%2NNBP@C1 zf#p{l8g3=3TNC~c%R|B?EUmBPU$5}_JCzyIw=~6VOgVLSXp2jc+4Q@qqTdqTg(EhG z&8GBXt}=#8Nd}$Q^0(!jDKxvH>_z@zEKareiAC9I(FlZv-uSyW=h3hEorF~L>1{UI zW^<+^`PIaT$}E?%ZAl-SlkiRgC_WZ0KXeii>AvEJCi1 zbDrX$S6$(K2+Dh`JMaekMK_~XV%t+ehmiciwCaR7xX?{|z*e`-RSww4GpkaOIsYJi zGsxOzL)J*+bi+334^$7JSR>H#n2RvdLzno|jnO@k+MCrA!}OjUF3>~{G;98&=0#I7 zKU9tIgjq_RYq1X>@Fs@vGr|Z8i>%2m|Id&tAPEl*VW{_{^{BY9A=$VUqPMYVB8TLE zso4`FqU)TfaF)Tv@UfQtXajaKcsHyW)f2)L*F}R_{U)C5q_Pz2>mZPv!eiBZAQ_Nf zoa3sVe8lSHhCT}vdadM72!m~`{uiv?3!m~|eTZ0cQ;Ju2`_`ajfE8M9M54VU#%o5j zjHE4WiBzR;2q$ee4_F6$Mc2&2>+y!KgfXFh?B?hoe>-S)33xjh`FMzqPeE1`;c@bc zi^n)M!sG5@AGhkw=ClT1CZ`s5y9lJ99z~-kC5>9LB8gRvgqq4h=lQ-~*5$sxw0-vX zyyaBs>~zJIYh><+u}vc`DQPY7T?EQm~r3Ia>_NRQ-Z< z@v~SEcRX||9v|fAOoMxwZSlWykRY0&IW!N-QBg2dsyP5M&gHH!@Oyf$2>tLbK2xQP z7fv|}G#`(H)w`Kjgoi|0E~(wnR>e(=09Nx4bl{nLYIUgw=kv z*GFoRMkhHGMrL;IDzF0G7Mzn^ey`Jl>Ex0TGfsaSM$o}_J@^p=4#Er7_ayY7T zzh--5DZsA(5vVX*m8vqH^^MBo3Ysuhsq8F7>St$t+fet}?-1Sk;B}mV%e33<*Z>hx zrgWYPyl<8}>f`Q|W;7@-ct-M?N_Nf=_1VHZ?X#ku->*tarvae~+L6&gsp$e+7d+c< z7_R!#!Xgs&*UIzq%}T$oQRb~}Mb-6B^V~Mjz{BK)ElP{kSgACBQ(HkY1e3Fg{zi7z zJ9EcoawW;hNOE_MZk!eoiq(S>yp@ke`nk_6j7uQ#axVG zvQMEAP0)PJ-xmC%qbzp``I}DVTQ{*ol`l5@pph#*I(ZL@^gkm~=`_Hx{Oa6->?c!F z%6!UtrA^aN>mzKDP6?GvNxL50~yO=vSbw{ z&UwDrWkp%Z1|aq|Fga&Bz6YwGlIT|E?y~yXMX{RpoF}KEj^{pn;Nc{!4>qP%~k+XbDYu3%`gK^s| z{MC6FFQC&UtNiAAq02tNPg?F&~h!bH~H1E)?n!ebN?k|N2R{}_>_O? zk*SCB;xd#)+ASQNJ;38*j@e_^aq${3j2y^DC0cMX&H_RuUS-UWO;cCbU@KSjKh{T~ zg{(&EylkdSAz?6moHzsu7%RTzSrFMUNBNf7DN) z0sr$W%}67FK%u%r^dav@YX}GNK_W$l9&a}~FlANhzgLdk|6I4gwOGn{8R<%th1|Vn z@O$w8YkU~TV?G8u%Tv2{)%BP^g|)Fa$_R@5*|@FQ)7sTGT)Qe^V9~p*7hADwyn60R z=IpkaUPnGGN%m0w!;N?WNkL# z?#3JkkJUz|*0433Kc?7=rwJrD3&*5jsjbP2Eopv=)r6SV2Y1e~-8K7H>7J(p#SZAw z&4S4v4FpyiHtFddluYmz#x0k8Z$4u!t_borc?54~b0wd706|D4j!_+osi(qvATTor^LPCzAx{ZX|hLj=U{oBsXzUg;O%NMnZm zzk-I9YT(|tmcUuMXeSpRN?TXR9=#RrZ?!X5B`affUw1di>v~V@p!bn)A6;8R$kBQr zkH>|NFQTrwa{*h_kGD#ylg@{?``9Z*v^z&UHq^yhDiI)WpTSKMU_P!R@4B4i(c94J zH7>oM`bUe5oojr1L;>!-M9D4WCF53Iv&LiGoN;Z5vGjfpC(iI|I>hT>Glw8}DXkT2 z=WHyygbRmT>s#$aT>*|?Xa2jNmxV7-Uc_?KH{=KSc~ibXSLEGifdr;b6T#K0xE^el zPGUj1$~dVi9txF3W-GNZSAXugA}r_sRQN15T&c&~A1WkQn#@Fk6j`HhVNU-8X?*Dcj0wH!x>J#Y)j~lxjIq|1XgX)c+=${$8%|j_Oav zT8jy8WCk+JeeCS+RS>9;zaw#YfvG`elq^<1ILd8-<`*K<1el^3+{|GE;9Yl0_a zE~|L)DLuzJENRy1rMv*qpYS=~?s+MI%bBx#SJ`MuvYL2X))sHuSu^{fa}@{9(OrYN zif0%R9q_nrI$m{RO%Jdlv^K#;5YX`dyU+V4Els}C9y=eeU)l`3?{NYWq|J%#0(Di_ zBDRmwF4#JtS*)PQ6i*w>+ntZ;J)B$pbKE#|PT%BUPatuMT|FoJ-J7HL?FeQS2 zvK=-qZb{&DT_sXn)SI$kwY?%uS}^v02AKTwk@2P$zShz*+zk<^1!;QU-gMc?wzy+p#cB9!_?hR(_nVu@dP<`b~S z`Q5=IfziFhK~iH-STswrpkcJ1KN|wcwHhKIs)YblFb%)xAt01nnMBm`USIoiot{p% zl4Bj-1dkRfYR8txS+C|lj8a1U$vKhyrfYn()l>%9o zrf<+|X}HhUY+}_|QjW$jc{b1DH*nMD<~NvHpy8+V zuXf(Vpb68iK8*}H4r0`Z++;H#_f|3f#Gi}qRV;W@K0$5RQQoh*|2-M_awqOJwVT~t zs(z9PKY=%+;ikGdtW4>W zN7T7rVhfL46kWdVR8+HGEroR|Vd9EZSSZ{E7TXgaY7fc_O^>hffO7P)n0stwimU=N z<1p8TPJ5Zfop0)9Vl?8M;62EeL{xm!QR0xqkUw`~fP_wUGCm& z0tBX1_dDJk{GD1MC+5WdGxLv${TF5yGj&T@S?;8~u|JZiz?r>2m^dQyfnF35lA|eA ziQ2xYyuayue95*Hrhm}5=9$FD@BalhZAd5!!P$x=LZo5OB2>{eyeLjqV60j^=V45$ zvD~L%Ba?P$PqcnN3}>3{0=12w4H%N1P;rmI<{}|3}+Z z1YvH?Qoz2;z?x)f_Udef*nLzrHXmzbgii8A#eD=JX+Is0@iCo-R*UwGTxX%27ra;B zTx)mOdS2!Mf1J}&Dpq0>-h~X&N`4EYZ6jpTb_Z4nrno{0UW}bL4QIXAnWp~0W4VeK zWEZayT3?Wo2o_oFx%j>LCBj-yMuw>fd#P)wSALW&>4;wiu#PHQltO-5-(He03WGiQ4!0^=J|mzC-6Oe0?$`9IS#skDEc}Ef zXJ0JCDp+3PmI*krY@&qtB#eYy>EyJ6ocB^1mKdQu%*zG*;9(B{i2E#bV@EZxeHA^`P5=XQU1msU0}I zj}v~PQL6KY!ufn#Wlqt&Q!9P!*^WEihg*MkJ#c)_ooNJ9M%PG-CvuBW8YDpz-bfwZ zgZH`PZhik}w*9s_4i{QDOu>2@_UE`O38#$fV3I84_``b?i+K}e3E1{XI2tru$V7=2 z)RgM+G^w^%H3B`|Z?9&$oV@TmDl|aUhq@^@)87Q{JgwlcSiKjSH~hhPv8(Wu*=;*8W(r|S^HEhD zWp+&D!9fXBW5ye#vOYvpan&r5!t}#pe2n5PEvfrFDl$ z&Np66VXTo}dO)EvzQ3`QdK2ik$kU)vn$R(pfijBbo#sQzc{GN01fud&Jl<>a_P{V9 zNM469t33lAd6?&gL2&d0f?UnEID`qpW2BY)<{BTVfk?4w+n6KUydF_Gc`I$TpOq3` zxQWqONOtN)UtUm_CRda(H;~Px5Fj*>?bM2@+mWu4u&uR$pP;m~E6wVZjF~`>zPFN| zDNFa`hTC6fVD|jC=FQ$WF!+%^T2^O&q_GmIyoxIdE8!1Wxo6w(s9YY90hxs?ynoRJ zL(j`XIZ)fua-y95RmIru(1OyFPm){}YslSFA5A-d`JNVd5v);IGK|N|n#2O>_tD?R z#iU&(xMtQPBQg&67BHW4ib!;Q_wwNm{4VrBh@*y-bW}9iQwx9dtJiD=n}^fQ*q$>V zP9j0wiw!PbZb7cMm7CLnO-h5kv$(nd{R_Q587r8!fJcjfc;{+e;{W&^B81G+G7Q z)`0ykHfpj`lJ;}z%#~Z=BAc!?l9ky# z-)VITcJ7DC!g~R!6Mi`OZfzmHzuhU+KnA@`z?t^Pdg*&i7vtHerqh2;iwpGIDEc`) z+78Nz?4IL_Yob|Dq8vdEGjn^67bGHIt7>Z?^;)3&lQE*1=cys};>plu{O9_otkCu> zni`%pNJ?GxLa%!g9y1EL(rbrhlvpM+tzgcAJg7I%*GRU-n9Q%Ast}B#$m6XxB>X1G zML#F9)@qxL1^Q5mLt0x9-O2Aqi2J7mX z1VSF4nf^5;u9w+W$+wY|TB#05+GpE~!5azpsUXV@CSaG=2Binp&Ejd4&Jkk(WB=AT z8fixW%JeaK5XA~q7h8OTO0R)D4?Svp>Dh`er;zhGqKd05(ndn6e8Z0pWDq`@s z(VI}eW_k~Z&0tJgUGWXP92UlqlV0+`G&{Ww7Jd91foM~8Y^8H(h>4ef<+YAr?H z17P6#aG-`4&aSY>lpNDOLLIWs#zb_%{j%tM_op3X(ShrGtBt9TL3 z!r})P5+m6_pDZcVVNDi4_wXiQo~=0n!v|g}+8G5ZZ?W23PEB10fUS2Z+HH>OCA9Ei zHeJrkf%-7cVsTDyHFA-pF7s`@i!*2Y38tMPj*H?ZNEFqRnprM!Td<;2i<2PYv98HP z`=3clK0U`yI&4##2sIo^UbLda5+x!d-q2=>5vzw{;Q*w{oO6Y_$+Rj4oxA`0-L^c^ zoQxo6{*Cq~O0*Ezw0ZeXF{#OgZ@( z{@o2b!SR@$&`e8-pg$`*HQ(LfB+zE5(o#uXv`_$7CXl23REw^;I{w*(mup!WC_~B| zVL#x`+q75GXe?yWQ&MxB=DMV_7&Qbh&CbjeJ50<_O*OI^rHrqw6xD-S0xEpp|7Y<} zu~9ct*nzi6Hc^fhz}OHsQn*@17t!f8SLr!1qjW`QEo!Z$CXCBxQrpS%1@hBOCvsqL zSLU3qH$}PN|Go7aR zx6)(FSxVrNJr6o*9u3yR*1_6)iTbSmjM>cg-Q^G4&ou1H50h$j7=^WDHPSq*I=@?!&J^{thXg;hOSOz*si7g#!neQ}yTtw9;@ zpx5ypN+RNRk?DQu>2`DupY?FFchstXtgYBKZq67IT_iHOIFvj!q@#hvF?NK!RJN>- z?#>wCKWPXrn2vo;uQSsA2D7If!7Cfxebc*H|8#QdM~{L}(HRQAJlLRz^Iquh1&>V6 zr-LClni1hdVVoT)LT$h(4d0&&GCp9PU}!RuaSB}Ys`OF^^1X2&Ed!3*!2w^fp9UWq z-Zfuz)XS_ZHx=ooSD)wQ(ug!J@{E8^q7u>9vgXg|)X4BRk$|=X!dvi!$pj zybYGwL9q7`e`n4GGtd_v_;(P&vW|uhNn@<%Bkr|920EQ1J#H~Rxaf!FmC*en(cIaY zV8xaFR2(3LaT}6;mcW02QZt)T0IKJQ`(=OqC+i3ONBXP6L*7~6qQl3g;OR`DC%g#O z12~`1hLk8!^_(7mbO&=G+T5+%+t1DSf;;L5gQhxsLps+Lef20Gu3@6=muNuv^-&ht zUx(wbg|F&9#A)sNK5!?NwIZ1_nQ)yZyMhA-e5_#1pn}kZE)?Iki*R)=ZrOXOHDvnd zl~5pjD{><}!b<457$=-uo&k$^%oz^2-Kb&qju92^o(O00jlV~__@8&U2-UG_7EkYh zB*HDv*NK{ks}JP0(X02vbir%rfNpOXE%s~M?8h3~vLj@|^E^IvkLhJ-3yCZ@vC!H6 zCOz!sEQ9VcRO~RQKQZWZR8ZE4mMf7xZ?Cj{0j~aM6R}^Pv0vaDP(0_NPqm-fpZXtO z6W)E~re#p#G0A@OIb~3M=fj7uLG*HnzSxw{N{ZG(fccM?y^xZlMQi=yT4V+o-O zNWr0$82``r9g*Fs;~D`Sre-SUmAcT6$24daJ!L{{djIXS`OyC_Eyi-uG7jJ>?YD)~>u1$fTEXi@qJwd%0LTryz;mX#=tMi~oiil5)3>^wDG>ES9XJWW znt+;+V$S3Ye8y6ovVrvnoUB*twvPgR-*OKYhFKE>c!blH6ND=4A;xiv_GyQBvzSvq0R99Bvrp`=i%vZ5vRtQ z(oh@RKb7<4jS~G;$EdwCa8AKrTyAlM0~+v|KVESEEwB6)6K!6kt(A9jRdf1cFk?Wv zW2pG;iDmxRv4CxazT3y9Cjhijn?1*>z?|p7x>SV_u|(g?$idIw7r|UxWSWVeaLE2M z?}9{bRdnA5hi!$fH5txMR=%?+11w$OPsmgcujF| zG<<>jf)LkJ`vAeKoi3Z~S>d(sp3_Eji^dw8QY*1a?QcE22XsKd=U?JwkZc03;PBQc zxLW|K?J1tM^4W>%8!s$aO6) zY5hL14}!-I$9*M|*IN&LCv6PYZGkG2?>$?#KZ`)vo}>qwR*iJ2;JEl#1bKCG);n)U zTPEWf1izmD9r&cTomvuGaeUNsK20>=Id;6RT~#CbKfL+B@G6w&tL-(6tCYF>sX|^? z$s@k`pxlJ+HPJ4|m#a|H8t3xdutnIW_}xdo{AeiKyFgJFYzZk`f6l-P*jenES^q4| zw4lqa1mlfzdYYal^mlcLx6pV}s1xw@Z4YP-aC33?K$f--bpAqoi0n~7i*>x=7CJ18 zTYAEK1ijGCH?O1flX0r3&>N?t7J{Lt+2_ICI8Y*y-i zE*9`(|?bq^dw)=b3I@T57G!iRZ8E==9v-QE{K*`g%N9R7L4hS4(J1 zc+T$)^Qp7K{T>znOBp^bjGDl9$)f6Eb(7m@`*Y_ccah@Us%y-hiqT4q?tI$52+a^$ zI^GaFezsYU`$k|B2K;g8by{C z0(HcD)c{n9*>aS>bNNPd{VKn87Nzv9uif_!N}ycnZsFlKbk33L+gH84x)6ULt_P1O zhF1?4tSpfhJRFPlGlH&pfw$g4^#l3~#mf~6uFy4e`cEO$DeHLx7h$XA4c)-1(1+2C z@=z7T#+d+N%moo#T5O6!zfUhs|A1t4q9Ql_o3*+f@8|ai4Tr6i#j1OMZ&W_)e?jmlBe@@&nDTbU(gNg0e@j-I} zgnmuQVRvkQU9)iE;N$hG&y@_uzC*64Rwwoep+ftD{vO}*9;U4q`vg-2!R?RnZVi1G zGees#kpCIoHwd)oR5ypOIfgy9W3Qc=(V`BlFT|qtu5VOYo_Mw}N-0F9eXS3|Qs=Hx*A6tM^=>NU{BIZvPGuz1#v!lv%*W){9&n%~H&WOA3zxkH z!7~{CT%F1eNlOc>e%>!lo`uQTS`U9?LHT(;ET0`CEC0S0xxsZ~L+Kto_g`lYl%E&F zaoNu;=mGA=3i9W`NPg}HQp(3Y5HDjv`SUR#*9pXNcJk41ZgvQy{P{$r-hagI``>L= z647~`jV0tA2Vfbv8fYos?z2~7(fd4%IpxoXA_=%|R48BF3l%86T?!c~y>Cc782wMh zYW4Sz#r*a655?^N1Nxi0y%|J|{zv~)Bm|Fs&cWjLN{mm3nTYo79v|~rXS_mv@l1@z z`oEy>dKX%Me@{%{8%Fel$0;LxgT^Ug^dS+VN#co}Vdl4p-BFACgs&+g{2&^li62pl zw}ipQh(D0$uZTYogE+q>Fw+nN8N|0RXiv~Mfe5*nA z7FO&PZ@+GstdP>TO!pZirKm9mnEL@qH2jrn9E_)(Hcj;8J{Fg@BRg($kk3&WBxt1+mV`ok_ui znA48Ks6b~xV-k3JYBomoez9)okko;#a;OUVL5w^(r5_DIgU$_<+$l%Tb|neFr^5FnvDl@`29i)VQ0<#Yj_toAlT$`gnOaCr zCu9{s;9;(CDS<-MZ1X{K%L^9>=}a>!DwowL0^;DDF%pXW@elTt*=mbrRSO(3-KwZ` z#1?6y)Rmf|lGIeIr5}m`Mwr&%O1KfP3@IK3oib557}9#h-I6~M#gss-!4%Pmv;JVV zVoW^RhvtN?C{X!$rrOe~t{g}B#jPAh=*7WCMn2=Y`vA=uLq#<>KB-m##R-=amw2I} zwzi%9x$x)B2_KknXX}lBYU(=~Y);{%n&^CKs%wp%Fm0;FE6k{sfjl`$C4!-y-B+6)r+hRKA8u(kU5Y z8Cpr%bkkYiY>`yl(Mfk9M+f}7!&=!~liR7x?8M>;q{@l~l+!sQ9WKMg8MEj|)4S_h zG2}9eIU|-Vw;Lhr>bY2?Kw~nk7_w!2&CM>kFy=D#zdM=G%MjRiy0~hGxE=N4Xw``i z)(c|R`#rd@b>bDJqqY^|NY%{rB?R`P=PT~DC=UX-8TH~c)y%x5-+Ll~jRBDaBiL_= zu{tL{=NJDO|0)P$}7U^s^G4y<<>S!tsj^s#o@xNFU9jkdAD0Jg_TOMHw%BQm#%;eKs|(yvoG=^XKj zJ1RGllIddGGG#+0s2?pkdKG_Z-TX`B%ln%VCPvS)qXta#0%LJf!11tkavYhNID)xf zd7w?|%}tmr^vX<5ePaR%mT)q7$W08xq1-hs%ubtQgmYAL@+(-`+^k3YS5Eh10vi^s zGI)qoD2-1MWOJ;USFCJLo4Njs_LCvp+ZU+7k7yl(xloVVQ#LT+AsK0N(NXXrm~-Uv z%rMv*rVSZt?+xXC{Z)kXkb$%g$%NlpdgBwA?pl5UlTgxNP1~Mgv z;uUW}d9}j20a~Z~2RQ&a(V?E;<|c{iD&UNvyu?Esk`0V>#KO1Zxrp=T)(c(+?_dqG zLLmFSP*My6JTpy0P>6q^!9gjA`yqbHq742`t7Ec5fcR}_vUWh*{zvsF*T`ht`i8pm zmA>^ASoaF7dTsd5zJp2aYay{C!U~rssq`!!F}O^RA#jy{Jp~fVdWR|RQ#Y2gJk%_K zrI!;)ZO$ajxAKxYUylKlw-D`yIrX zaood%a`u~Qb+gCnAiVeM3lwI%ZT|n=d4!vd<9|7i=n!+VuzVN(d*k5X{6EkCQ^U>1 zrbql=I{qu_otc~MKl1+-0WWT4>uToAC~j-yY9?xC;$UjVC~IbK;c7|D%*@Wk{67U| zVip!oCZ_-0&*a<()>l<*-t&p~aGX6kT9)PKU6Ryb;@QD36c|$gQa=c6LIMN~(3It8 zAQ|f)Sujv6v^0@mfar}7GZkE+KMTG!2&Su=qHvM>Z@YqP;2?c?%Q6zS? zej(CfA8)=zq03CDM-H2&gX|?q26VT52S&4^o^L-PEg6Fe$ng$}sLP>Nbs5O;iTEaJ zS;0@j{P1yD#4Tc^6=dY34wKQ)clW}O@p#BMD71+%fhQzT;Wk46_MgE)!C>I;i!_1; zL=KE400tAB%t0yvg*<~1Zj-8P*=qmYVTgiiPOyPv;c{nZp^3iM*yJjbsdgJ3+48Ib^)>O&SKa{ym@*-`eG^MfvDE_)5ab(hP-LG~TVKp9GQBsru3m8duXnL$A% zeUjS=GyUj~L&e#6)dZw}S9g!|J-2yXh-dTUB%Y!}fA)*_8AG-p5_-tQ3|>z(0L~y` z!YfOFbB0?gk2Yev*yJ{beY()f%&X(y`1oyzC*?HrNmY)d;ATY>shlDb&M5Vy(*%%7 zwd4&ob}`sJ(f89IJM5+s;-R3zSSvr!OC1in2UXe`k@vcb@$|CpcTM(;J#5Z zK|9X_i?W=#C&m&)wt)gR1j8~YvvAPdFIT7{bA~(h_2#uR?nRAB%ZNapt%u#&W5q7a zuU28s&P!LqDMMgBqhv81&t=F1vLNoH#)kr@U21uWZ1_s~Ejn%UpezZ6$UtFu-9UnA zUbkP!x~-ytIME-iJtCRaiC8LQQ)(mzP5)4-e25J$b3i3-pzy)4uM+9O(uiaQyCwJR z87YwTn9<*heuPTNl=<{3;TPQH{FSrf)o6~B)AY^|q(b6R|3Z-Io}`y3z&E-{$U)6z z8jJ~th9+44peVV2KQVETuI_>(-@s(=Uh_|MI*E&O3>6J};+W@ct)=48Um%wL#!!pzLf$t0ODGf$YlFef|zeS6>TUTJq_S#4KWmCI7Q z-Lg-e(sbX_KvYXXl4~p`Gnzf}RCmYjHn3mzQLMdL(@>r>nXY!JUb^CK(Y#>a(lCq| zte8=s$DJ9-VbzsOi@`1}1)=*@$n=Y|I(G7+BAHULD3+80nROGcRfV7e%@}S#BUGbUK5b}=(tfPKI_hV!o9@G^LPVHgSX9Y|o zloi-1cnACdDdCM2WX~?Lw0)MlxE6%6$5-Ta$d9xE_x9`RVFVQlx>k8wjRh|=&c}bp!$$OcTKk$Db8UHZ8#ScYl$!B`d zVb+akVynO@D~GB$Nx%kO-_kK?P-iBxcybXcj*s>y^E2~z!~T9z zgt=9)4@i5Ek+HUNi*UH?a;ZLzkqtFR4jnB+6MC+i%gKvtxGVe3ST%eAn-`)Y=rE}wm)rJd~|$9=5Dd^ z3Z}{0l2T_@vJq~-7EB0gHLEmaxTB6!b7a64mr)UtKd9o`7{LiZZVHAmqjksc{OhK#bk$r#j2Em&PUE^35s=3 zipj1!Ixaii$_~J~R4i}3zZyG}N18W}cbqqj-sR7Vub4NXF6Ga@cVavF;|abIl|N~{ zlj&t6&UCB-!(&e4HzPPMi3LxPb8^eh!lB!Z6F2xzzC0O zX!h^&HHH026uBr-*+IC7nyg$}J*m4yza+1gZ-+5*M245OKu1&-wx z@i*ok?7->|BM?6jCy+h=AT1C?&>qkRIAgYf-XGNFs1HYqazGMTX49OKLkE6$is1D!`&j@z~(f(o~#NtNE zR2Jbl>ERXIjq5F&c4>OMUBgihf{h-t%_%;BC5DyvEE~h%K9Xpj^!WD&_y9=o6Yu(W zp{RG7=34Nx2)?Y5d%NbiLG?8wlT_$|gCF}RHY2soHk2XJSBU;|u70~VVoL~)@Hx74 z?(PI1j7+E}R^dBbtZg-9@mC#}iC+xV$ugWQX4;i%!R--`>xduH(G>)#mNnVr7?P^q z7|QFIxJ|nCGaIx}SdU5lZSZgmQ;>36Vs0XXx=12g)d#>ePQ6rzlTx*tNQgqCGWZe+ zQvAuHdwU)}VcTKMBVwCnRd5xA+zZ^sHD`F|vLO_3P;F#fcxn|NV;kR0MwO;@^7jY4 zt%+yEVhnwKUA9dkw91?Q6B2c#K&;!PEZ~fQ8lOZf0q-rnyDzv)dc1zF8pU*W=P&s7 zj|LragLbbi1%v8}Brc(F-MC{O`1%+0)x-p1Fj!qzkVi`sF9cc2xwWnYYP_mG|u}FWy_(VB$quNw3 zmISC_e&i?a)Q1^ zUEN|P_csoBf1pwosub+YTe%|>NR^9Z=R4yDAQFvbP$O`JpO?X{^VAuP!;KDl9aX^U z7*Aogptlz97In|@wqSJhhxd)Jk0S>`9hz`MlppZgjp7B*o-7&LaHok+m!X~6Kg2vp z$>Y8!Ug*t@!@0piz=Ri|vtwQh^B4!)EP>}}h1$1;{N)pF&X{A_;~`i*@bSY6fWH~> zy&#I%M#X6Y%-uvN0cnvy>U;-T64AOSp`(l~_#1GT_TpR?nI+MxY<8y09*EQvoEO!7(pS%R!ncnqm$C~ii zC`KG7jhC6lO3f{O9brw9aaIhDo0k=yR&`49ieu&yA!*xEiAGsgB0oOX{hYe%`**Qt zVFAY+A~0Zjd6koe!`n2#W&=ENO0-;Mwy}D=E5C6J3!=zj%M?15rLZq_Nl;n_X~RzA zkin+rx{MMJG%XvYHr%*1@483_1EE~00RR(LSnAMyRdvXnej1`)gjXz_UHoQhsz$!R^2q`c z@UzAQ?J%G?z+a(Aa{OSW!YI%nFEa|&gqMz(76~^rluW9mtRw+Hi4zc?IvElwuKH)( zjiLtN=~5v)le*T;XRT=W%g@@;Gta}rTK48OlVkXn=)JGJXT4ddkGr>kc-*lekeIO7 z^8onwBP1lm=k01oa$j%9uR)LX@>p!%H~dBB(ymRTmHSk2nLs3g8L2T^7SmDP9`-G% zH0|=I1*@E^jiJ4=XBCb+7bmW%zgw_OuugDT@C8^msCig0H&84;IWZBs!}cR{H@NcRn;$uGG_8k+^!%M4^#?$d!5QF=o!IDn~Cvpc>OTH9ErN4 zK$v`G$@XC*l*ebKe|pIvy-k3BlN>V1l4S z6f4i0jUIkh>DYAdCW%=9I6V|3CL*8{h^w$8E?Xh0Vk`4U!j>*yHlT+8j=Z~1h@@Ak z=FSyGD;mf`ULy~sq0REf$(N-4!PN1Y$N7Q)yHlg?ZSJk%?e5KDxW1!~F+xtT6d@|U z31^xUj(2Arnw-EXu9iE`?L(vy6<%GxK}$ZY@e@`@DPb=a1P5eNX%z0KW>2D`8$Q;k}eJI;@RBon`R@oE+YCi4qcB0)2 zJnEc(L}-SdDbfYbx`x+4iy3#UARnRqTg1U9ETk8C#17R4-%(d5wAZL3Vxh=Z#uldA zuG)6Dv^yDSnREiS13*&c%t?Y6WQ&3k``F`3)JtWDx-=;twQlB*fjro){7PXb_CX zO-g4R89_GH=atp|4mn#4A5mpIg%-g16N{gUWJw8zCw|s*2tGOKwzJlaXT(hY_S|qs zZ9sF<-?A`!(7xr^Ym~7|A(WA?e^h=icSi?4q}U%CkpDvC7O+;Ion;HGQq{(Q!nhf% zZOp#o03SQgYC$hGKyBx=u~(s&Y~(YmT=Rc1Lq~$!U0HNxA(LU?21=MDE2}zMXwHtp zI_nMyLB!aEh*?Xb;iB?Vr>2K3i0n2F%CC7wmWg*GLJc#Gz|n!4eT?ld1skY9SHDqr z$}{4{)+fMOdu@zE)uvb`D9B4|Rf9&9D&QwiBp5Dr_mf5u3Apg~4);4Q2a92-HQ%wR zkwHZ?(h8QphW&2;w`O4ls8Dq39H3Z4rs6vZeo~=wcB7ScP+ooBkaHJqv$72Kml-YA zpRGRRCrf|Gm!Q4}Y6%2w2`QR&cu%{1H=^!Ak>^N)I5A;5|1L&=^Zb>9%6MNex!=dF z_VSorIKh>z1T&M|-_a%SbjJWi7BUIJ`$WQZ8*eoSG<$v1p7Mi`OnY1 zi>L2DH!*G4%DJ$TJ#nFA7`>t_wWo5E7ISSziH2obWU_;y@}m9!e*U+bpA$h2Zbw0``kMeAB@ypNn3X~y>4KoX(Hj73|vJ;QO9 zIOEvXU#qrO!Rh5q9h{&8Y{1jg$l0;?G=ij_P&4>*1KTub_W%nIOPG{e62!VP8EbOAM6Gg_ zWbGQ3pO;`NQzDb}^&5`U0W)YRT4@QOQz;!vBYl|mSzem`D;NSbX`@3Sx|P93s6a_2`{KGzoR98K)ms*ZZ(A?;#R%#mtq0+8HW7M9IxVhq9MOInt;(-QOI*ak=u>cacP z1njptFgA8DLn){lK|NspwheEznTSLnbb+ZI@n{_hIpALdaSUqXpeB?>NbaqIq5R{J z%-ooewpJ0PT@JGWiw|y34^ZNmz(US;wnDuXa-P9TVwqa#P!LBHVI+Hz>lMc$=lCN1 zJbZs_LOI(CSQdUjd2B$-iqckp;N(k^`o#e?ZboSq=?B2k*(1r6wk6$w@rTHrO4fW; zHcRo!6c$agyigF({Kw|juHMmz0PJ`EtryAo6p)Wb<NYt*Orv%RA@GI(YfBFWZ6D zp=gY@Nlvqd1eG(%eUVQoyjuh)o%f+!`xsO-;%TO{=e<*q1UZk^{1* zBJU-AfGcmR{q!bo;SZK~WPlY#k{nQ<>=t6a{Jj2NkT0_vWET(E?lm#|k96BBTBGSC zXc?e#0zyyHJG%ZLVF(`mZtrx(-(m08E7ZkNdHqZ(LPio2ne^K=jRUQ6yDw3}3y)&f zmcnP2Y{wd+u9kSw%~YVB!r`G3fcH1kJ9lbgfCA$7+h}yxdxS2qQ(aAZozgl->2jTT zH1jsufPTakBCx4wo*HI>G#NFjJkt67EzO+NmNn|Z7Cc^NBpl3nZAGb)+y~~x^z_7m z$!k+x9a=LFTvY+|tQp1-S{-rc^`}4F1AoC(Q2QR+P`#peVi~f{t-{CCTkGWG;^=xy zvXyC?-Oh&4Cf7#u3#B1XL-bbl&egK%Ut0>5A%QaEV$6hBhxAM1`$VaS2+0M*)DdaY zh>nA7@@q-buuQK?Zc9-+x(vx`vhE1%iK=&jiA$70M+Y1}oOzKaBNo)G(hABy+G3nnEfL*jw{cHR(mpVlY`#u7k*QqqT4c2_cu~pIpDe|?S(%gxFMivR`qcC1x^mmJ zS=-!q!r)dU@8)bh+uglMmzfr*|4Q`{ggnXxu{QL#?J#2<@Ede*jHX0+?KGfHuB4zv zpJ7e&P`wtNR1OQ&ArIS>+&6?>I>vukm z{p&GQ7$(exw&P$wqLzx?m|d3*hS&AQxOZ$v<=cVmZClbS%zwdf7YWw?^J)8IgoCDL zIUP@t%prhlrdWbkDe^>^OlO%(Z!E9eR7r33tG$Gv;VzEsSBZbWF0HpL`8k=_+-U$J z1@d=uFfZ&({8__I`S6S&tPA0vAtOB}Ray}UpkgRHdc|;o25$~m*t-EQZj|J}nc5Go zRX>^83-{aD@oOt4uhOKER=m|zFG_aVUcJcGWx2-TUe2ikI;#2U)zgz5>%a-bi#oY3 z#baAf4G>DX_5(^UEwt?Dz76@E1DRT#1$$Z@Hn|es?!N}^$tA6y-R*OhkhgjwUls>Q z)5+kRG%FdX3Req)re7K7+Dnc`bWyt=sBq!P-V#I(50Tzc%g0lL7o2*GGCerV+NLLCaYbfxla~mG@RNc_rD<)`tqc$o+~`%Oh~zY#E)b^CU80+ zJzOr!lj?qO(8>2qch@?S@g?^LBK;-L=SazIP}dCwy)v>DYev_AG}S}Ui7WD4GCCnZ zJRp!9%D51s>6kTqTws{JO5oZUpk6P`v!c;`)78`FCrwQ9+CsKgh#{j7{`s1*{x`p0 z>*LTFvX3s>!!1LoyG-#H%4~h~-)>U0L2@>TDR2w=9=3wkq#P*QNcHu<$4m5AiY*UX zxs^lTm3?QSGNijN!-H5;h@)t4SnR?=`We>C+Hs(y=e--i6Sj-;@+6hw z{$Y$`FysY+m}T9zJX?{!I)bs8b%p3qwca>xs+WjA1OS-;@4Lax6%O7|G zbop#|8(p<-)>>cZvVJiIsFk(N8hy+R?d@<))*iE8F!ric|NqWEW^|?viWc1^pJny#n`cj+yqE$WqRh40ChbuIY5LvVjxzafvi*FjZWr>X z0{#%`r5Z@}d)?Rm zAOyNx<#Pobv?;pJiX0*(Ouc;UJYVSuJvcDxzuHZ__kK}oE5VP}7hu^H(#WgWj3)o) zNk8|6d+y)KzSl{#xc`n(gcr2(c8N7-X_j3$$Vzuz#F!VL{N>MZjg=MI~+CMe44wpgi{!nw7HortHDvP@WTz=qsSOo+W;7qSsx-#)jVXzf*8 ze0o7>k`(x=&@oF!q+_r8HTv?|FZ>R}5pwWl@dBo|=9(eFuQ#w#AI50hAZ`*@=e~d& zcs;)kU1fOA_zTfI00oSMzE2l?Nq2kqc-bEBX8E|%-Lp|=>biO5U8lP0PCc=1gp9dA zWv5214c1UO0U#r!0o@?J}Zaa>v1MM))c=6 zl;}Uql1E2AZ)e)I=>2Q=N(&uZR&)8fhk$lprwoCUxZZYm!3Vd^>Ay0bYA<>C3t!g# z&HC;5@Si>}DzB~_5q4h|?>dWD-5_%Xv(`SQOCc=Bt}A@Z`2)IuXX{Gl5)_^g;NR0< z+9T1e@>@ptl@oCn?h;XWaN$hpwQjM!mi`}rpG~Q4ZJzg6v-gmhprpK$e+C5xO}ust zj&IMcOjL~(=_$J(H)K$#WjrG0=j~3?m}x@6Oi+jgwS;*r=$WRPJ(tg;ydIIfv9<4q zUke?6qZ6WkJzHcWp?8ch3-)Am!`1HfLKH=tSjl>vQmPQ|%NADB8s5))&R3oJ`t_~d zFZ5{CpE1?VKKnD-{h`xxFdDk0$?9swLoDHUd)>Ka8Ehhv8&hF@5dRyV7HsIq|I%6b zfWUeDF_Zgs@98IaoD26>{ftn}oT~5b^81*KOv>^XhQ~V^4V8zWFOWI=?9cn1=ZR2s zz_-Do>z^y3PYK^Lu1(voPh<#CW2|?TSWwyrTHWkS?}+dB!@8`#j}a$-W$FFYI8s)$Vn#u{)+dz?`z!qswWu|@`0lwqx%8fz;B>KF`@5fsL&oT9{ zxpwEf6|8qop_#aD|BXW=p}sy^^TGE+Tit%2wximEgud5g$U&%a+d-?I{=byr)ik9Xo!B1h4Ab!^}+b1QZB_Eo$T&sK? zpR+I)SEyDdQ^BscKd$z8?>S@X`5S~5*(tA2av2zU#V-O)8je@{m=JYu;<_bDw^vGc zR|#-C37Jm{e(zxR=j&s&8Rzy1=j9A=8v^FJ7rA40-(RnKw@7|Dn8!|s-Zx^NM&80~ z2c}%{U&Ex|bgosZU4)MTmiWH^su=C!`g7i8PnJIa7V0-2N;;Pl3iNWD`lZ7siMjeL z!DZ3w{&1GCzWJTw_q@i~>i!kn7bL3iS(u-2l5?9sit*Lzb2d{hmM~H}M&4r@Kwm&6 zX}$egbLLH(z{5)CuD+P+w0n-#OW1$$OoVQEL@(dvZLq0i7hD0v%t7#wu?&6PLBb#uB@(&{yh93CQ0jGg5+bDsB0~|=$GSK^ktL_> z|DeNseX^g}As=9C60Z6w=|X(NgpM9;SSsmDaM_87Ej9fPBko<`*VS1zzo34YK6Rm) zye+G+zvma41)UR(HMLS3U~F=Q>Wdu$oJ>E&gpVnaT2# z(#27Kh*oT4OSbc`rUPHgsJg{+3!{V#i&$Jqjd}RQXOgKxQHj-0gu5owEC6DF% z_nYVU$5rpX6Wok}YC;LxgTuCoVtsdcLwtJk#VB&bL7u5v5IPz7 zTx(2QgrV9A=uwS6~$=xVo77mqajz&JBViI3siC!!BUPN4%+C(7n71$FkeK z`o7XY>3%einf6w8fs!JLoLzR5>F?NZ>!uyPPG%QkeoE5xa2CAmgQs$fyKv}JcS%V0 zxBfb!f`G6>3(~JK5iWp$M*jiFB{w+bu{oeKDb{>)acOE6bGEI7?&#H!X0x{HH;VHiY` zo9qWO>ri&&sU1qGe~dd--1 zpB0-xuPnFa62sa9WNy5q5?&qA0@oAdsP1q( zdaAgE+=+;CLeL|dUP9&>4?697Do4?QswA>2wWWcrqcBT4Rq#Qf-pF88H;el2_L4Pe zdK)5kk8QPJEP86~eQYm8H@tOW-PG#kcib zeBj5=-^+qgwi|W(uHQHK4Merfb4AuRKR!wj-s)nDLg)cxrd`|`RnjVX{b;CiML;sO zC(d0r4LnOcH|9SvNZz*TF4lT(K_j9nLXgSh=+UnLiq7HKE@1!O(JDlE=&9yBvh4sW zv*j-qPI8nTxh{iuu?Mm5-kGAi=rug@4O`?!p)DaFE%lOMY6J0g(kXO*lQArvzDRMh zsV&*|^`yL8spN`|vyi4uSBR10KZWJ>gKT=Q5ISM8c89Kemg>oWvT)IFtb<1HC|}u~ z@9b^qJgS?tCdd&@Dfvot8)}Ftz6%(5Bzf4#7_2^$CQ1zMv;h(j`C^I{P+aF3w$B5V zbXhe-W9#0|CKG3vBqm%wWi#<4Hmd_?km57K5nkfx)Cx1A-Mb9N-*PyeT5f6cg1|A z4SU>C+7EKOO}3c3{(j6XYdR_sj?;yl{Nt{T$vtRY7~-%6E7v*h{AD?PQf;M8>sAUK zb08WCN0)`%qRug%sfvSx04TC`5DE3-TM<-)9ar)H+@^-QD}DS`HC zrl*MAN~8!dHYZtluBN}I84O_1;A8s!mv5zSGc3rXk9q;;#Ri1wsPDVpc=!?s+i35r zF}0!mxX_B!51S3;i$+BuY}jQYO}sPDgf0vjfz^yAz!59gY@PBRDYq=#w_@h6XpOmJ z`(6(74aW6EVzuvAH2 zg?V+eoY_m81>l`YpfI1fOP5w{zn1^N37X+Wb8*qh>dwsJ^!As0r*|{N$;a_v4D5i6 z)%I?jp7+6un&p$l(b0sB*XeQPL@5!D#d)3B<}qsR240^BUo30#T@=Ii92}NI(-B2N zP<#>_=ZYA#Pe1A^pQ^)mt)qIH?10L&djoT1Fm~aUN3R5D(Bb7UsGvP@7$iN`zk6*v z0XJ{n`o5sB`H;;22>6i`nFE9#gd3$*6o`|B*B-gel6GIsKZ>x+7a()5HZMBu2b&Q? zQ{+a}G_B(oX)r*}6LpQJE)K%?$42VpkjxMeyS2u@I4KHBGTpxlNtq?EiscES{UB9E z#0&t;`+>33Ui;VY3#p zSZSSZszPz38!u(q<*pN4hUoVXDn30lhJoPt`3P}6a5K2Lf74=Jl|y;dU)Qih@Rz%s z+$`8atM=B|x`9Je-}$Eo@M#=36*2K7J6cyzRC||;E~gY^rE(J~h>K^eX(WV7g^52dCVv+kP6@9$Mxw6yPXEk9(a2GT@_DR6#B5$I949 zM1F#)$jzPozd^dW{~MXD_YZLWAAI-!ePd%`CH~L;-!j(!0(x^Wa}oc~{l7K-XUoaV zNzB2?Ld?O+L(IwcpI&wjp8xId-x(|0zcbE%GET04nr#2M76<2lWb8Z~|E0(C&%^)p z`k!9*f7)Ec>@3{>)p9Vi{fm^7n4Rl?$yxsktIkaPpO%}2?SJX8{V!c+Vm1yAVm2Ng zVzz(4^#Ah0#YW7<&hcNd{qyiY!0Z2s_x~Tf&d$cg#s0s+>nyCy9NhmCy`Br>r>3zo zFzbIqVcpy!v)W?4(NeFVw?Mf~&7dd76>Uyu-(YUP4p1opO_M-z18h43_10(wCABDxq?M7r1*fX+8VC>wDd~+4;Pkq4lSJ z;Kx8D7ED(4A3D5`&E2UV9^X&iuHG{0BX;*t6 zf26O(@!e2^{c1-S#$dT08f|MD!$Shq?}q)X&sp@D`H0I;H`~6S7vg`*xlf;7zgV+_ z?4W_)`x@zN*0b>M-rvM)5D;k7;>SV=H;}0}=|jCgb3I`)e4D;o*kt}@8Q@vXYd5x- z9G!ItKw6t~3o1%}WsuWjbL2QNe${_ypU&^?IJ)vp{ta^Rue5?^z~sj|lNDDOh;1%t zV*X~A@%%v$#LAvu-TqfCoMn5!O@Il?Kr-KtgB>VBxW@s@`}Uuj3JLgc9OQesB0m@abdpXqyiPIv z0b{9Z^m&^3`bb6fJh-VkO3-dyCIWixKS}ebm}NWXA)}P#4|npVy?g39G5e@u1ex)0 zb(7i{dlopYc(Gf~upfuEiE(eD9_X(AuG9iIoBN`#fB5naWVGU zi4^a-p78n@`9c+`<_~s9^As^a+4sZQasv@-pN1N&Z1+Z`KU~#N$r_AV2s^xqOdII}I@`HkZ-A0N(q~sGkn`IJeQR|< z@TFA>B{>m5PEI~_1)N<>pAOYh)T11Z{?0}DWJO^Nc2&_+Y~&jBTgntV_M{{=^f;c~ z|F%ye3~C(ee(}DN!=@q(fvvJ!5frIK^*CZ9VCH^U4)a@unKyv64J)QQPB-$_ zdan$w6l%7&ZecpuwlcICICOI!mKLiDzv>CEMI$sdr14`1*Ty2YEr6a3irS5?mZzcG znR_|X(b?HHVhm_%@7dz-iaQ5&;H{qhXv2Y&Y*=suWgE3|=7#^cjP5fs$Rw(JbwU`3 zsNThcME>ZOzBUFAFHVPAOyka?5yGx-{FC%gL^he|p%;LFdqh zCa`_Vb3(4f<=y2gkU7tZ9)5hhR`oO6)2P($;%CY^E=zBlszZ}^cj^N+1;ct;EC^d= zrS`C}G||LcMkBF23HjrmCf+jFX15LV4q^imnH@lwp%!(4u2wC8@dmGfR-k{$#LZNEJa zo~#~GHm3DOHPnlJifSKOpK7kjO25jgtk7Mo84h2q>4u|YbQ%*;*}Jv%h#U zn$11B^>t*NZLJ=<=vek0U}jpY$Y#U77^b#ug8!Gb~YM%$%dg?a6<-#U98(mJVJYFx!N zAA6}~F*G{`&NOo9s`g$}!W*S*ad$`6C)+((5Di?brV*y;a<`}S_~yJ$ap0J=Ne+^7 z-Y$!V*5Wd`yeiW(AGpxy^c5Y6*~d+R3OKCE&4rdLT0yayN=hIwp?WVz9j&?z!@FUnzy?j^3p5j*KyX1$99Qv% zIqFG_9u>eT1bsGc9bRazkqszGzL{RhQRlBJUt1xhTwQ6u|9xZ}YCOybO;y{LVAI(} zUIpzqZNtC)_Z)NGqD7;<-3?N6+UmxsdsDAE>jd%Ge!SS?-w*mYF@`_dRP1#P^d|-q zA`I2`)U(rr&E-8}7FsLr9qVb2DY%}vAa+`u>~G+d>g*X@?j>eAhl+T4SJ4`Knht+) z@MeMOl*W|ux9yiBQcJX@>ZqsWKO zdgj@q87Bv2ombNeAIBlw`m!GuhLlK9^~6Xw#v!^l%YT|DdwDx^HYYoJV=rwSR`-4z zJHv+#&z3w{;LOr&DaRR`v|vc=l+>~zTUp6(Tb|j-j@EKQxT_X(Z3SEQ(;m`4;7zpR zOq2CiTeJb4gUXXumw(1)tphgO%0=98ju6H$he8<&q*_%Y9Hna5(N-a`lq>sT*>*G( zb||54TAU42nsVa|E0UlNo7A#t`EJISmk24asGli!U^imvxQCqWly{{u;Vc@PDPfZ( z6Ug#u%;u*o@Fev@G9NpZ&*+zR$64_b#3uyxRB9=-q5X7C!)FQ}!}%ITK8~$(Q+AW9 z;1$%+5DX^_P}~Ok;tUDm$9Jew2Vt=Z=y6BKLUy(GlshjLdosHee-Sf{ri3BsTFZov z8jAg{z~uj^>_cR5kx@Eg4v;(zvA?9K4xiOeaK>W8L zUtS7m8su9~%0}~OS}yptxj78~sa4<=hT{_@2@H$Lk86m==s1dqBm?@i{(@>Rh9!<- z{1w)P?JiVBejE)}0IoWcLrie+!$?N1NmOE4x_FAYUPqR;XvUHP1!Y>-cFjQw~ zuaUuV)t@jf$W7Qh$T|Re@jv72KWP^r37sN8H2E`(MorYiJ4vkMu%G?Y~(2DePq zVN6HlE5tvcHAYk{i;#3Miw)y_$j$?nhAT|CAK2|0p?!a9uZHh%KD3#{aMqX%<8ms) z?Y<}1(&P?#_Wgu9&@*yDJir@T1q`b{VI1|5`-yOizvKWN?=DI4$qy|7N-+7zf|MUI zK&2reXrkBTVH6_OYp|hrL0j~LpxLY-uUSiXk{ya3F|TYOW@nE~XFu8w@kbnxcfTI+ z30Q>=B>Bh$((YpnO|nP&12UkQPz?(lzQywb$@!JM;(<^@hC|H&OK2suC*-^$&U|uy zk>A?@1Hc#zA51P_3_1aokmQosj_L)PpsxfJkkK%m@vq#}zmhs8bS7~7&wV9+BcTAO zik(luJ9IY8GwL;oUC}un3?8&6+BNBxOh;%|U#=6LMt>TRZ-@bq4a23Gbinlup^(tph3F>ZUN4KMgR+79-up<15g3r z4oQo06hy_#~#j%n-ao`}K^^y9?yo5MK0(tg#0gOW;L;u$hdV4Qt zQk47%1tu{ea$&zG0)v_Az;6ZjgP7_-Z}XVyP;VLc35GWNI%r4q`Z{bzvH}0bQbF0b zq*pHPI~_+`J`i`GX=yFLa;F-g1B0s;FQW0~$nr=tN{cQ@? zzoyLS9zRl~?GEL4EZ?&K>rJ=sslJan^v~cnGP~j}4)6%Tvyxs-IdT8n8Shlf)P~rY z&i`LS?s=4*wC-+{ouuwzR9-}XKPEF;`!I$Fu5km44ab=$?K7R9i7Z2IdowYDv|3d# zp+bhIfGGP8{(P~*S2=${LH@STqrgX{y-+ar4f$MiiJ_uj${+a=Wa(YyMr=FQhv{6P zB3qfO$U(Rl?~(CpD|J)ZQR0)}oS}lNcn9!rn($e|Q?8flW%P6S!@FdwvRY&q%1f%x zO^F{Il}L&&F%yQ~wtzM5I40+(;%Cc1Nq?95vZ5>95i}>OEzF(IobN=quRr7nEe0dz zCmBBoP!)wSK`#8ooTw~LO^*IT`YJe0^%tcGnldB_StgPg+pmKuxQV|+F@wB#F4|CAx$Puv6{Y zUlkpAq!`p&Q3TDN9s~W@a*k&8^Z(AK{h)3%}4Fi7JQ+10vr*&fhz_-j$6+ z{ZSqvmV^qwP#(RPPL#icy09K~mK2x9oJNTx{K4I!&OMekU(eyT(GUMK)!h40eo&7b zqs|mIOp#1cPLWQLPZ75uXTdDOEJCZns6nT|q(CbGB!|`kQQ%00!F#0KG)tWT{E$%Q zphSgn(x0f80C?SOw-9+{6!H*hWa!vyX(vZfNafE8A~RznsZY9puZUIU?~pgkB`p;r z5q})8UNraCUr8>Vr}F>uxytl4oBz0;rEBD+anSK+*y-%g*J3vnd4yHynTY7Om`+Bw zYgRDT>0EbOO}Zqs@LO~i7-%++G`mfrCOE(>8fh;cTU)(XTsdW9FRsjSR@*yo#@WBm z7j(AUOKt9OHnKI05iD!6miTCFy4#Dwr^C(tChg>DRlri|?XW1coKFuUv^JR+H5uOz zyBrXu{LAq35Q~!uea`lM=%L2uTENLf-?Y`)WJmN_^fn|5B5#EOj{gvoUc13%a2qa9 zw?MbkcpKCNuMGS-#7XS&mE&IEX4}0NJpmaQwE@u$F2wwz59|iJ)Q2B%WJbQv zIq^Dn;2C2Z?@XR5pH!bkKOvv}%aFpaFe5p;ekwG-zISV9 z4P4^8;74m)^8U>;=JUJAKel;=%}1{Ggx)gFaw#2m_63)7LFcLxHAW2x13K}toz+A8 zs@zm!+t#KVrg;H!K~lYz8eim93zJR{^xUX3?ml4* z_Xc{og#F@X0|LG9J?ZF_>mqu_V(YC@+TzULev-@8%a8B&9}YkJs<9roBmLO+1Py-X z2`aoEfleFz_R0$b2rgVRoMABYZNn|mIyhq+EzTLkonuid~4 zi_+8#yO8%ik+tX^sOzw4Cn5fz*Il!R_Mu@bGPolm(Mia)$d&iGKfN9HcmdvpV15k* z9(!3TeN?M3zI!TO@SSK{u2`4hRC_Y|V*(vbv(%gpXA}85DRr|$hA&|JP_x5C`$`>b zhJXxXq8|AAkD6qm2VJjgjTg2Li06%f7rGwAz|j6-Js@Kb6x1HrmOGRZO(2-Sj(EOj zh~Z7~s?Z+V!=!G|En#DFaJkUyMv{mS&ajCIJLV*m zZgA7Psr@=eHdCqcS>CaFj7*m8!#4l4XPYYgJ1CGgsqj?g?Do_-Az>~;Dzn|$)J|xi z3*Fx^WAlAw-`vsk3yq%u(X@m|NKNyA?M2J+&1mqB@rBC|(THwU3 z_KR=Xr|C%V;|;Mdf~+HCte{(|?*0D&Q9!Q0q>^s%;~L`!3??uU{Kr$rt%^!a#ngDF zg|Tp^WqM{{5z6mXX-+E9^_G}UDbe>+7Oq&LSTu{l^D>^9St7IGxoKs|EQO`Av_54C zy(Km*ky$#+2$sbuH4}R4rl3cBAKz@2$Clug|2~%s)A_834P=8@DOz$dUx4|+SeB=- z9LB>8I1i4fJ{DfW_G9o=>G-*)w(9o~3mXF81u2m$CafQ>m+Ke^*{UGo_jojfoI68ns$Za|#19?Zt(K1;d4bMRqwY zLQrf)#e;^6g$2oi7}H}JF7P}P-#RuQS8;7T> zmlM)oxBg!I72J9Lq3wA>l^*lp#h481`%8z7EF#MyOPIZX?a%uKR9 z1K&E`$!;C!S(K>PXIj%`IazCn13AsCiLy?J@~AvuE-NdGiZ3lO6`C-x)2B@>OzbLP z39e~V&HD-pS6s1oFEj6*HkFcj`S=Oia^e^s;fY_+S6;ru(UcJvM{AZTrfMQYIn8b_ z9>i#&;x)3EDs|}9aV7bMrOA5fY=7be$zUDma2Ca=^=zHmC=V|jQk`j1Kf@kl3udGb zh*gQgFNz}#0 z>VW^7V~)L$|0|p>F%w_Bx98_;(mR9mPREyZcet^x$joy^{I%Y#X~6joo5;g*)<)HM4*Q&DkYDh4E$@9|`j6*&c2Ht`J}eLN`@ z+bgEFt{m%sAT>8Pl~pa-e8Et&v%)cGYE_nht2uAfu+{5JD|6#2k_S(!x#P(}wS!XF zNBj?7E+Ul*TS!GHU8 zT;MZFuafbezKWvfO9ID}!t)RTX4V3!U=PmH&VTX}T`zUANskT8Q>L27e>CF`pU9`$ z?bzQ{#xEGJ{kXv_yMJbVj8sNU1l{;WQN_i$b=0xJdz|0>u`$U44-nr{l3rA6vbOu?F1-J|fvH)xFtdaIHap*CdvKRUK?*FO zC_M$y!tHp%CJHw|IGBTTGp;Ll^rtrp)WR1^dE9eziHX(tIoS|X1;KnuX{r+ZHBtLKfPzlU5iH?yRPJd z2XNKb29Ag`aYmW&7|o|c6WuA?=#Dd3A=#?QiezInX1#%p));k|e4L#P(SbvcVmdlH zLERZRupK?r$_)^~Myos7*bJ-ISB5OBldo1t2!y>759pdd`lVv@{^(ZstQ&Lfirt9IMO(`_V>fySJt08j=g`nFYxPb_*I*=O<@d^th6@XpLI68ZSpb*Ck}rI;tQSv%76^?EHP9)tAax&*z>yo=?68=QedJB+e0I=?e; z$Zg}Pcq6|W=o;_J%Xg|%vg%Siy(d0D^7$(&p7jLaRdcl8{6qS;fmX?3Ha#;-EX|OTU6RpZ$xJY$_e58+9{tE*(uyj+50Z|4*lWMEusBjK z9dN8$WCI4LM@C>RpjD7MRZ%buw$my+ni;8eMn=Z)-wa5079e&&a>;~j9*3w%yD&OC z%bBJ(@|0euj_hPB9?R4w@SX1DKP>OrDj1E@QhfB3I`BDL>Y$io#9bx|w(plW*%M(3 ztlgd|$Hm2-8s6w+F}~2=XS+(BMoUI?t6b>FO7K6KG&o+6B%Q^XE;|#ogR<7zi?Y+B zkHtB%>`_b<^-0e3G-rZ#N_@Im7m-m`AWSV@F{EbQ=^I;4`eybJFzrfTf;K=$!kPj2N~2p2@#?aA{f1#hdw426vH&_i_?s z!zIB}IbFR=SPv$!3QxPk!IYs45A&bt^Y;dYw$4x`hQt=%O&hwD2?xZRgs zW>S9p%W3Qxx)DEfZFjrlhxHhS;m58=-Kll)Jts-ebd(Qp`*GR%hUXvPepVxWPNGcF zDV`FXxcFo-_<1fqJ}!+|uOCDS>punn^7RP4gy9Z3@2^&7P#}$@a#;!Qq znd+W4)LNLE6+J&v>wjo)c}!vMg=G*TFHyWuucZ%{n`xKvN;r`wHX~rMB{@vOLtxAO#@w#w) zu1TA$*$MiiUASmm%~1#IC5)1XpQkRH@mxUr9%m(!Ov< zp0*}m{NS@kl*OK>5Wdd`F!@`H6HmS&!lOHFBSD3AlDpjP_EEXMmDc)*S z($c*2>^POEi;X^(D!vhIHmD?nIp$cF__i_Lpi;#-GWe%eE#6PVr5s3yEU10N9G|JT z8#;x$$Kvg1(b@6Cz@|sQo?#t;FEU1VxLzM+ogFn-H5d4FakJxnqJ)Y+C8A1=?-29y zkH6bxp8Ren(Yu{P7Zx{UP<~00P8#8#Gfc(b?Q-*@H99s{ZAu?rm^~yr(WI506Ru|& z)8#CDyVFD(p6rakN3A+;K$>`~FS7e6!=WMS{ecaVVKVhnY)D7Ko4C?jS>@b z;7KL{X0wDLo9qiocG$;bUOX>hn6M$Jz28?=-D;gSFthvmJ$0$v>ZP}&)=nQ((dGUUvd-j#_tK1KPh56yy=3*DI9Ff95Dz#_WExF_0JI7WZ z{K2uvSgF+}BPDW)%-1$r=-S@n2@XtlWvi_LnHbQkuY{6zn@PFqffw(8;N^!06h?_53F zJ-Z{i;ckE{)TbD@lPJ;&i9B}KGg6nugnc;+ud^fNT^?6Z4bB=%; zd#O-JBsiV%FT6XRgXo*+2?A^Y3s$1j8wny7o8dDsXEg32*ILnIp|GX@?BlyiKjb zf0&EcD5KvBZ?$$0#ea;#pJjY8cy}AG{R`nBIzZk(Fy(M4O>hnx2`Zn%o&svzjcO1Q z@uJd8W{}94lN6kZYTKimK|9VT_X{9%=&E?#g%&Dk!90C!$>+RtAqM8EA~Elg>pD(Y zNhvK-XNGPW96Y@#om_vik0f zbM-5~cLfl6VO`|&z*1Tfb~>t$1%NVcDoQ=_1Qm;UvLFQRE2PK{%|uj)gG&)nJ_P_dfS!DwwkG{d(nevmUl6U`u9!`U*M5nC zvka54QgPAfNDDuKkwGSv2GiY1aCz-Vr!YC!$#;3@V&h173OK42=G28fJ54+gFr7{ zjlp2d#PKHH*AroW8cJ$@<1RW-3`3=+u*VZKOXJMXHBO5``ZIr@3qvs!m72pIZ^SHJ z7m6}r^vC_5>T&e5B}WZwUW`<0*VVQTKf-!PMn`<;4`5CRGe!_I)&XyF7I+iL_yUmY zeU0OKUx!=|xDMqWFVq`y|Du?&0#hT-mL7WH-ZOuHZuJ0oKmGgzZ0#*;*W^HDPq)?D zwR@nvw`jxM=YH`>rQ_cJ{^*&Xf2!Jb?~6~2+`T24pE$jG-F;gU`H8z>JOh6C8K7Nu zkd>s)Gk~Mr4LBOOxeJIIgbn!=psjxRuHP+da>cJS@{Psh3i1cCnKa*BE(F2^N zXY8^#gGPfDU++FPQ2SFVt__fxVB2*jRD6TdN;V@+` zyxQa%0)ZvSP$PxnsnA^R^oGpU7K>zE3cRYY`xE|heR&{`ubuqFzBWs4C~FS;J*st- zr1nEK)6sID}wl*MO@fvVA%%j?u!&NshYITnRWL< zv!06xfJaGm) zP>&b0UmIJSx7zbVgV<#K9FGrS1NY}aQWp>-h#iG#kC8I!XRy;niZi)uiJC&QCGshvwe8f9ZBXLH1Shx z8mDcUXcQ)%{E}s69d>Z`6Iz38-tyJT_hzxbs&zC$&|1|jj=iS_P>{MXmlHEUm;Wcg zB7>}Z7V%+EA{HbFfk>M(X$yEE3b-TA1Z&`5Dyk3}!^mQjj7;mC)Hd=lPKcR=35bxB zBCu`n1KN5s>%{|Nyg_^MIXsN{1`Ig~QU^2@LrT})#BalY38M}g5c-Q!BM3TMrjbAZX`8`T7gcK9i2j=qHO|Pg z=zCc-3#WsnuuGK9#(TmusHtfEod?A~;`z7-D|$ve+da6_Don^l_P-kjn^X2+r$?`M6?$vioyomriU*cHTaHVuL!+ zlkDDA$*5>GEuw7w+YYtwe|&rNo$a0LTg;{I+@`owp%RN#ic)*8f8b_z=;)xYCEOj> z**rFdg)usPcCSMlT6b>iuQk4myS3Pog}KC?*WM%$@-PxcI+4c&sZ#Ei%h+~6BqG>- zMJb}ET5@iJOxA0M$r*I8C}+ynepRKwRig`V3q_Hv3Z?dL*eN#_ge(b);?f8Fil&l% zK)*P2@Xo8A8j95h1D6^i*`1%*5*sS5_R;3B!x=JDGJDeRPugXkRjb(Gu7@@RYkyUT z`!eQarp1xlkxQ+L>Ciu%cY>&m)cY`<1lNe*aKkG_Ma|&t^n?G9#K-J96Ws*wj|u5a0Xc&H28QU)dR~ z_T>yx64$BJOqNaW+-J#DGugpbMlO|!iKi`IlhWw6sEQw%K6dW;ySo%7he2ubn%fdU z?LPAedrR5xWSo?hs{jL}!~}T~IgT8i+B>o40CXM6{2IhIgH22Y0u#FDun9y0^15Tl z1QJ2*Mfy-F`+wTH|1G_nU3VEQwt|$<{vx%xir8>ZMFAT=)$@3nOE948pMB+OcLx68 zSp-aV)33e4wF`i!5eR6R0V z%I8nnWE!TGN$qH<|0=c7*@3vh=BGrsPOb8!SEmFl%Q+vwQbORQ#LzmhojYG-_T#xYDdNg z#8RSCUQFceK_iuot&SQDfr2;EQK%G{8dpe7GPEi+fe%RkQnU%)Ir0j~ z8LkTH!}yVH@Oz+zlad&}6!Ojf-r7Fi)4O{h zsRaHD#|VkMZS%3-6H_PKyKej21Baj7lYATBGLbAL%ozG^JX#p<@o0@&vBqsSI1NgL z$*Ar)_1%x0`2N}c-s4Yga~-(d*EyB|UdVjyUF@?UPIMqg1eq9Ah!qBjWY|y5p!TVj zl7*R>8%A?pDkfQ1Ma9+|y=fQ$u4HDfWUg=%0@RKDO;ftT>0-0*nd8k3hr`#XvCk3` z3PcgJ$7l^$WM7tYuH?%yTiWMK+3CYtEeTE?@>Nf)3G@eHp8b~Hs}+kSVzs|LGRnJz z*?8TFV=wc5U~tQWTjFx1+#CRUYQFZS=mbDy5;=+7Ivoz{eF+Fhf!cZ)yic#u>kE5k zK(a<7tGWu*k;7XF@+OdCP46hL?{`4J%B6T4SaVUjip^Ghtg3Ph%%_cea2WZ*_F3 z9FI6B5=q3k#@`WIRut9E4{nJOVyRRnl>%?Qaze;UOY$6f3eg}>Ax};}_Sm7Ppp-72 zo|q_a+69jeX%BTr=oxH#QCfDD4}sTd!1K;t%ALFW^i%8?fWW3d)_-`{X(%!EIrg^7 z{>mnHb)}o8B1BRFGv&4_3FVtC`}@bh$IqKfoFuPuGVJC>n&!*!8KK0*#n=Kr=EZR& zUbG7Yfi5F3{rgq{_p;i+4_Gupv|c=;4hhM4c4saL1ohX>5GUDu)FdZRi8U2$WbZpa zx*^z{-#Z6+$us7IkMD`g3^4zq)_OD^4N}O&B~V2+m&Vv(c+lPNH6(zp zJX2sp6YLP>Re)7Y?<-IA2cYL1E9Lv39a2@cvl3&?6_DI?H~Y*&0U8-!Plp(D zvD?2q$@svYQ{XS60S?xrgVy*V#mLuD<{f6lIL~1&Q2nJGX7Bw2TTfR#o)w722X&G> zXF+)UC7%R({W$W;=?5Oz_1JFc2M>>pbyYXONVem*aa>P^a@1r zKg{*Hv43C6)Aigxz;Q`?lIN2C0+ZaCt5-3AprzjE^4>0e1ExkOm-22m!ZH5?+#DAp z;e_NVM2oCLM$WHBE@R(CD8vbDaorjh)Qy&8bPZdD7T8g8MSBPJT;5Ic?BGd(gAI|*@`j*drf2Fb&-m#8Rs6}YFqo!%OQ@j}|ySvbiGs7;Y% z-!W)OyGmQxjqqTgKjtdHu+hh6Ds?~`VFMOV#_O=w2Ri*$dUZkVy>oYoWS#m%NQ&Zf424R2n+| zir}{VA3CDof2<>j(-PVjjp(l%Iy&3J&7s5dAtQtk8APo>uTLPi6s2Qh$%NBM^O$T; zBsvblH2K1bi6lJHU+lP*18I6sX@cDX4=VjhZwD8j``MOC6JiZTo`a2#&j38N=6wP9 zezmv(u&A}+vMDJg0DQwP+dto9hw7obIV1jXZdtxqZ|4OBPC9=8`6y(b6X%O%=v8Es zUN4fJoRsx%%pfp!8&tBP8MIWSCaOg?!?v{XgQyfIUv519NZd(%=^eQAcB<&)#*=>Z|MbJ?`r?SiHmB3hs zPze;1wAPbovB63p_VzM7o%aCEwEicQ3T{+VQ9luCe&YjkH9w7#7BRyoN70}12#Xn@ z|DQxYI^ES}8G?n5i-E0M|_ z4Y4dZxR~$1*QyA>+D3|XS9F#8fmqIq1(MJ0!Vx@sH=h$V6<-%7MU83G8+YeRiRk@F zPRu~8^R_{Nb?w{B+Xt+rgjgS}r8Jjb&_RFYwo#@(0L3e&fp|@BgAM+}C~fDzt698e zHCO08)hv&dt)>WN&09^AtV`7`4{nXg6@X~iZC)*Kgd&x{BfM^T6+m#rT6+}#68;OM z3mHNtP(+xBA66#Cc&oRZDL-Gvon^HA`=82ClMI#pbPRQjp{6l(>`yQ0QKKG3^eR23 z)a$pm;(zI2!>(xWv%MJ7i}t?MT2^jBRrr<{i>_g=n>Mj+y!-0-ILL^(<|OP>f?vm9 z<=(g^o3*&E`NDJ=z5cc5F5S`lVlPJYqRN#nzisXY=3Rfh-sJ)7pT=)(Dl0miKpv~@haBXBrFVkk?zp9*A z6KOm4?c=xp&EACCopeSLEfH@xzw?37aMg`k)%x1^Mh5(?e$7aUY4vN{+3qQeQ!AR> zS{O>|@a@UCsnb1l`&dM;koydFKPJKbecL*EkFQVrikouoj(pl^98R?F2zYl64BfFV zMoFW!zpx|bNWr;k*c8ssu8$=#QtNd&ROzgdNpO{Wpb0PHzeGAgUNCVX?HHNCwoM}n z1=0`8&+?!h8EqX%ca1m*Zx4I`U93DfgRU-8-U{_+oz|ztv@_R!_l!o*YE6G3hvD3K ziDglCvafAzi`C!L zvpP_((g#cZY=|}ooz9SnT2!Zx9R9-gD6P@SRYs@Y!RW;rqsEY4Q&{gwJC#FcpWbo& z+51W=Ut4&hZhp1bR+Um~c4u1;tVt_9EdiJ{+zE22m&wCO8o8;ylamJ7R5}cZ{q`wG z*xbA`s9Ka3V`X1IN8Y)uiQHx&ehLn5zPq81&)dPch!QrM(gJ(LU#|Nvt#2NCtG^2z zZw1=j+<&e!X$-mP+Pmn-$st6KxRJj4{y78oeZ+?7vF(sVp?6#?npFe*&A)27I=549 z(Lc>!lduFW%xR#l!7kaUh4-!PTDz{jbKTmG#+~D*C?&j4u!dx%t#zQ%Uf?B!p2B}z zpGV$4O>x&~3b!yP3qkutQed+q$lnV4us~W0QXz*U%_sF;0XRXCMpR8&bz!jF z{G5SmQBmwEclF0w2V#{uRfB!(#sr1rG<;#(UkPFX@uSJ+6EA05eWT%A@2tAm4qO{WH34LC7%Y=N>b zJ~Jovy(1&WNQXbt70`C7e;|Q0a+e^1uogn@;M@S>0QUUHDH^SEb}g&hH4W@K zZ1xQ_AkuIE3HvMSFu#vKyfL4kzn)KM0QR}D>-dCupa3v-0FS_4dy^ml77KRN9_Oa4 zkI&yxYbi>V<;DhCThqwRGCOKl`hy$TQA-dVr)KUr@$|9Q&Qs6aaq{V-t+iQwdaSz@ z1Pw!KZC7D!iv@l2)@L6m_ue^k+pXWbzua@@%$eRpqw&!2p%QqHg@z8nIi=d;1OnFB zJg4Mtq3Ux=XIGk28c+>?*g2)^d`0t|(h}ldZcYgV%WZ+4&JI^Y56mI1pAi@w8cT3J ztasI+zO*?7=ajZ*Q>&wT^y-Q4pJSEIxU;skvE%x+x>D>9c7}A-a~Dn&_K&8Nu$%Gg zzJc_bJ-o$Wm$~`9TLgwbJeOLZyZ;Ls6amkEiSlIYA1jXoR$CbGaZyj`uWHo3!PqDcb72Yqo1 z1@muTM|Ad;#l~?c-NPtxv6PbO;d37buDyx91=c=*yiv~axg(YZlL|iscw79vucvB@T)995-z0+v*=)`)HUSaX-oMD&E9W8F+Wkh~~Jc2xe zeER!HD;7s4k*(M&qz5^S^-cRi+B?p2ELdq)9_*Ry(JGbNo=Kv52C3cwAGm8P+K%_P zZavT+cq2ae#%LV8He_CB4wN^%)n7fUgcEaS_Cd&pr5qpT3Tm0Tod9r0IaezJq}T|~ z!Lea}D(N-o9aMbEB3gdv2wRA#enkm14Ua8DhRH8rB8k!yhoy&-cfUqY(n{$*ZJ^-y zwuKC4n-rJOPz;r3F{~`a+`60dI*-z9%I*LBo?VY@Pb@`;Z63W`(ICY2p`=wYgPv(| z#ab=d&=T_NTv_^k`&mCp==zxAEu-<~NvJtW3^c%a?GM<)#CfC*`4n%ZY%_7LKQ z)$9vKd93c1W3a5`W$p6%2|1LnkxjvYv!a-{4&a$z;(}UcHhm?{m;8XsihY3Z;G_KF ziL;f&hdoSdyh1JGCVqnKz``m795;io(<1UzG@)Gp`U>>}4lB|{oD?f5d_EMTFJYIn{-^S1^#;7@!fVN0F|ouc(2L=fi^PtFl%~qjKRR(mM%1yWN7S30HyM>+E(9-!J*Hru!wDNlM zZ%Zrc4Xdwv@gX680#<8b8P{Ot-GDGbCusS>CMi%_-%?VgH2&+#6nzxB9X}gTO7ZK> z;5C~_)X*@I8?>A`q0e;(c%LGAj`JzgyiXxhb)O>o2C%XN$leRiu3@gpav*&`abSG> zfC9G;LAB^j!CuP+zjX|@S`@phY^94$v8}C5~A zBWYy6z=`MpKQiqELQraB0|A;e5BN;=I?Md&W_}MgAkAD2U2y}SUTPA0!B)tY{M=0R zi;xhyLnf;HxzVj3sk%AX04d@ILU!cqv(TRAn!vsX_h5|^HAz2LK)}{CRnQFA-o>ZD zs-wt|z@$}f=L~k=wB9XpduOomB8?Q?!2vgI8K4C+2e;K(Ouw1i9BF-K!3PA=A#Oa{ zt*NA3-&p6s@F^50iQ3zwI?&gW>tobp?QM}5r9sFK!KW&{AQJrpF1IBZf5JlHPm&6? zLGj+N)Or~~%JeEcpmQliu-{2a)w0 zzmh6a;2h%nhKaXMm6_;BQet+Cb;}>x38yKwE z`4d(Oe~NZF=8(i3Qt9mfn#0xZrYShdNI_U-G6*XKSxL>_ObfpeQYEw*7to%M1C$fU zSsvwQ5fAndVnTdaxG0UAzzZ?bvKeegk$V6xZlDv0*Gp3~*u|m(Dbk?VB@DjG0k8{i^`(H z;v6tj+5u{i7)A!`#o9UL;%p2)R2eQ@w&Z8Q%H25cLGoIL37|~2Y4;~&mM4z0oeUez4ud&2 z+WIkx+N`lVY&NjE&0uwxMKXjz@<{dqFGIo?)$z+Gtx+-b97jeZP}J3AMzm^;{$_jELq0b7-Jio zKnM^6;aVVO!;&xDwj~<@PGEx}7Y@mWAK8QrNp|yn1hP&zLP!9QzN(%XNfw4c*p+^o zo~{{nzk2oR|Bin3$`Y+cl2`}74Xu=&NE0>3%tT-uoG55GaO0J|eIT+KzMRaLIGFzoV8J#H&*G(^=Q@vh$PW^G>I&pFF=Z+Xpc@b3G@n$pivL9Qo!K|nAkQAI&HO7FRZm>tQ-xgXAKrL#VK_T zQ_2~Qc#0P>JdP&t47ke^i8_DDKE_4KKXS*nP8*WXRkR)w3$7 zQn?E^=Z2Raa_^WNF{J3vymltvQm%VdGP<>L{7_!3|97(-avw`_K!41~lN@Ned6&R!;FlpG##ElHD1LQmGVqL4*`EM;iT+&Kid?SmOz_TUAwdZ?`bU`Z;S2*m8Ao}_%~wYkRr zjOm;n)FO(%^)ZTNS|O1~I^-mxRgbiY_atEeC-8G!q|aH-a#$Gl-CwduKB;gPbIt=< zWX8o8?V8_z`Fww*Z+HKK9rHuq=j}x&aK8+~C)>7>;SDM2NG|JYJfiQCEluJQjgUL3ey==O{WIDgW zbdDHL^6=vZQ_JcmOEieA)3RJB^)6G|ms;0USW#z{4JKR8>#RYX#s<^uk%e(qw2E)x zhZ;Cd5Wdc!X@EGJOs5uZ?!xwxH*&i_P_mcsTzgHnZ-b*=D9MFV%C#H9^7)=9e2l7mT~aV7@>;ZR3-1|C|SCz za)&H=Dlp6)5~V|wY~IP0udtPOo|;$Fo>AqHC9XI(oC-*VXI+r@Hdnb+h*~Ah zI-|8dUp&+}t0C+!EX%p-;ts@OT1nBiaMls@M4P&sBINE=SK7jB1eL?7H)|+C)R^ri ztI-r`&BfaiCMB=t?Jm7Ztx#!Ic7xe!l)|mD)o~KMKpiGBM0}L+xk7kEw0ePeZ4EtO z;UCa%jXlJaB>%LW<9+Jvsb5VqTx(_+7t;)ok_C&~zza&X*&EbfbU{I*)@WMrM8)_s ztU;&StR9Ptq8J354u@CGGSr&2A7R^KI}#*?0=;aKifl)>Dv1DY(G~_-uX|%S;>aG+znrX653kXT)#Dm~{-n7}RQzkdmHuhyqtVu?B zHWIbeI+6U#CTqn;&8O4;=KegGkENbOFU5bWt@C@q(#pAZP=a0>c9ye~{VR^wEY z_c0W23OSu26Gv`VC@vy7DL@$@!)nb^F^+2`QJj_sFH2@Eny)0SVvD!A-Cz@|qDdTi zfG?0AB5K=-#ZS0BRZ=QtNMKWx_ZV~y1|`$fWN5+mn#dS(L#ZYMX$ue8Mk*aBm4lot zm35xz&CDK!KDl$#-ja7Y$>B>~h>(Ig|PGJiHF^Cq?w1 zB)s7FI3|Pj#8gZMyfG&6DV`e#Zz5gi<0C^po^lL{le$jk(DPHnJS_qPDsh$&Q;D;L zf@P+d${RQ_47{3#QX=S!nK`(41&M2bhmsPc@4Ar&#(2;dHFIQNA0BvDAZ7&ALHGwA zIYD?UrpF(rS=1|~ng^N>IDt_>nq|Izm}V)|sNw&DBjw;-cq8hehiJqW(L19`C1*k+ zFy@h1$uJI{D5yBeK4fHtA$A+NgJ}79SvMK>%5jt0X@R1t%3t6{y3pv2NRXQIGmg~9 z0^X2WMGwQZkd?w-Z^Qs-D2i$Y8kk2QG&!_1WOZWk6aeiwghmNA32RA$d{)lWus)?54gob)BEe*ID-o*V*0B z+gsn*+nX<5LnXTEa$&&Z1c38h*`h- zam9$J$o*7G=($z}_$}ZpF_$}P;eb`;stJdyU2010$;s>F1!*9s6ed7eLjUwtUCJ<@bQ!_0b zatbGBQ1>0q6mff^Cf3T@YhtmigH<`Rp}BhplQ%}3RN z()zDJuo@q+U9e~3UGPm1M%-tQv1AX?a=KJ(b&}!VY+bad(7L$4aA>g5GO()9f^l_I z@i4SghX^0xeTJ}t50E0zg1-?oLBj2$Mi+k#(Skhtv#*?aMV3Znoas!GpfawUTB{%x<6Ib1Fa zUtOEi)l|dDnJ(#|&RVk|M+&yEJ(7^<{-sM6Q=}ykaD}Zrxp6bJ4!`>IpKZh{Qi(`9 z1rERZOK|u(wWLABD5-xd_9J?>Oz6O$6+c4!@jOFlp@cYNg~(?_{MmF2@rOv2*z@u< z_LR}*9PWTWws&;4B4kMh?P03`6RmNBG1eMK>jl`BV&JUW0Z7?M#N@OZ67^t>t49?4vIV=e zU1&C+&d4O6Z^BGk-(;|4bi?OZqt|LfO3AFU#og|>jVo?u4L+;QYg7Ugz>Td{2_#dXl9ZWX*C#CUi03^|-SAAosCpATI35)BDI?bD z>7)mkF5qS0S9KfOKVK|{F;vyN>NI~)y^s>@%HWxo&`vS;FkZI6&X9Y-cw`oio7J}2 zv<5f!5nG70NWd^+rDt~=3^YI0lpTCluSKFB@RyBDL_{l~Xuebiw!1mBw$N{^DIX&{^v7JnzLLk+Ni|adCeGmz&ek&AzOE4drZe zu|jrI7eZ0Nxy)$iEK~PnGW8b#r*JO~qsca0lFZwoj^EC-Z?2JHx5_5yYrZ>`+Khvb z5WN(Cqp`uWcGbMj(qL2vh?1iQ*Ol*yuQM|h>xy=W5CAErGb3Od>d}>BtBNqX$Nu!_><6H*g3c(^2bow$!lA4~-WbvwE&v(6bmu!2O8_Z#~UD z$omhEJRigU;697Ij#_^Nel9BIsCq@1#Iw)q=-Y4C^%Fg3ILz&3%A>=|;}UY+dPw|A zGOA1OTaI8FoXA!t>iP-KdTf9!`zYlG%w_@B{(h^S?f|8QYBS1jH6p&Ks)5l!6e-FBc?R&is0#`D| z4f7L{WP!Cv`xgp99R_UT>W2p!zAiVVM=cU1DftJBT1aqYL%x-B&SHv#_ONZ&dDy-zvA5_av94qlOe5Eof^wv8sx2bRPfpAChtbcYKQ9=+*#8U7o7js7 zV?Jq->w{EpFyPPdSb}EvD2XC)VVxZan+>|Jf?7p7yTVd7c=@#x|2fhZ#Amz^U52L$ zoOR^nD*Veaa)S?Jqo#IvJ#lf9`7_*rK@Tp{El_3}98relLO#Ts=fz8HipG9jf-$)l z^wAJV6Z`4(#GxmO;CbRCp7xS9?!EIJW(Bv#{cOB)B$P!?ov+Po`?h*@PhY30>r}@s zdsDN8NV%bQ385O&&+~o*P}AvtBO}Pq)^h$f|5FJFm_gO|wBIRUWPBKgMA$h&uV=aV zyHJ$ua9t+R%<>eoCUZDEGmcB)7nrQ5UB_}UIu5mKBa{-Q-Rbub0rH z;>q>bog0~d?+eQ=YG;{hUAtgqCzQGHz->J};;Z^p zQ=?0bz(mgC-79#9HyT8RqgoW)YOIk)4MdIzqlpy__xZ(06Ki|og@en--w>JHR~<{E zk1MzHiHDzR0DlRn5&0nm4Iu~B=DHR6a5Yq~kYlm2p*W(a4e=(}l%HuTItUh;p6>Bz zc*DO4IjO$!>R1ebT=6z?BR7V0=|Q8ps0HND!f|_HTEYBssY4(Yr%yVd_{Lh=7DuF97 z$|ip$Whw#x)d9)=@<(S{I4>rnccw0z;%vSzqv?R zfy(`lR4OKo6^oN~Tew%Yy7~3?Ub<%rBZ-TvJ_S^Xg{X}@pgX{bN|L1S{X|h7W>5B2 zud|e4k6P}_1SI#Fl;C;vSTLD(o`OPv$Ssv!&RA+*M`?*|N3a%*m_AVm1?`f5gL?RyxuGT(vp zi!RAQGzW0%FU4&BB(Nc)_Pu!};Qn91br)E>g83~B?wE5NYPBb6v)6=>k+DiL0&?qE zGy4mA_gz8D{2i5m%ZzK?`$IEV;?kwf`ht8L)x-g1p0(X3x9`h;eF0%#o-bR71cW+Y zv+w%q`n=8L>4d)KxI7eR&HNg-mej0soKKX{h@Hi~8KQba>+fYlT(xdmfYv)P9WBCG zPLG2)nfD`(i#Z5ufy6vKQflyD8_8~Y2|ikb`KCq;OB~<)#Wr75R+*9V;V!QU{cRd* zT%Q`dbiC4x836m6UD&p!dgQb6)}|ht^GN6tcNsFy*_7XPI2aa+^4omwuT_t3Rh9LV zU8WQGJ2NXtn+M@UA`58GKUHnHx7E|}i(S47A{%=T9q!uWlxKCWBZD*B; zHr=?mg?Bk@c5%;ndWZ%+%`m>##u>+t2}hbVRynGagMX(rby#H3>owP6AEpxOA3k(@ zo+4>8y>Q%eJ1dOW6L_o>VYCGJ%?65;y&|+?gPIF0=Zlm(IxD3IM#sge*%9@Y>fV-` z66NVv>TYIJFoHIzPc9r3`pulyt1J%YT}>~d&>XozAd2bdu^&m)+CdnKiBFikWVKdH zbsMd02ST@a;H8SaCIl`a{{sDqt1BoEM(3qbD+ffON5+d)92)H#e3q5QYU^wzo)Ut? ziuHznXh{qc4w4V8NcB7cJy|6_uGB1I*Xh4QX@EQ}6@T|byW3$0yvvN$$&p##5M<%Y zR6bNRsK8wDy@M&Q2BG@ZHH6;SjAK7^&$#x`iLTK8gakXSW#Y2~++E%)@X{ z<@rJFTHk|6E7kZuzWBc%^1Z$752JBAjN3n&!$NBWyQxjb9;!Mj_WYD7$e<6iTVX9Z zKvtN3hXTz8G9fzj_!Uvh=Z*=9w* zsVL>v&6n1@TNNz^F&iQ6ydi-*k-?FiaLiu?+udL@gqher8l18O1xK?t0L1M4Z&S%NOwMaV60CYXStrxI~kFeIagP zAx>2E-A$$V=>1_KEx65m>Mk;mVw1pCo0P85Y)TL=i7Wo>#|N*Ho#bD(>i|U)R80rd zq465Ga!eO>>NQJA9z*98;PZClt`9i>XRHgNHBO6_r0ahU_Hj2JM-QpGe*?+tJgIyG z4@S{LIG{hf!*X+nWQjytx2MS>a+~y$UU}n`K|vEvt{O}%l%N^eH0LZyRY@_qDRmeO z`EpX0H82E936cTKgUqcqH>_lmv6uI%4> zwpI}RUatdgCi^0Rl(SiB`6wW*lsHtSD3cj18zZ)85%M#|{Tq;z@KO z%zm(bxN2jN;2#0JhJZzHz~-+8suwCj5GjP?ptn?nnap;o97kRh;Ra**kZ&RrYO)oI zWJmdUrXi$ObyrVbr8P4wPrQkfsRVEz{L|$h#)XOUiwatp8L2TS4kMV2s0`wmSgPcJ zb-R`=<**~kFU7|cbH|NqM$h&zaCB@(BW{;WAu*!twf-M z`&u}uaeZ&Ft?tv`zcb@sv#aU8-)sLEUWZiQbdb{h+FM_m?qOnFJyUO)($@})sjU4x zPm>Jd-N8HX;EFp2&Y`ofKfX=zp=|2FJ5JC=JX7A&*+Ec)neY(e6)XH3$TlJnILVYc zv^!5dOoJ^`+olJ-C?B~Mh~!GD^6M9g9oqaXUwn`kl9{9EWnwTe2Z4e+3IPc(LMo)%ZXlb%`g7!K`2LPQEFZdX~PA-!k7EeF_KX9~IsMIlVQ{ zX+=z{Muv!^sbuqXtV`APjk(gqk!w(ywA-Hr1&eI8h7sE(z01nVzu19N*@~rX?$!vg zeI(ETZ@Y2MN?bXv7QuY)F4gFfwj1s#g zG`N(jj0e(cr-JVbXQk?n71DwSDvZGLyeE_4?b6{!C)PS<&eiW7G^RN$%w7Abznj@( zZ7u{Uiof)RxKN@4Qe1Ugsh0*T?RUk{wNI{O)xGWIfmIwj&P*(0Xx4b)E+nkVr@3aN&xpN-)6uFxf5S%;xtz6878c+d? zL)XS0*gdbwmf}*gt{jZ2Srdi)W;tsx}LIsBljUPxJ*}bo*M1ZRDu|L#AZ<}1X zxetH3w5mDxL$bDP>faw>+@06Gmc38AE@^-hr+(AFKjE9(3D)}A(Msh*cx;+r7MP~W zMiZuncSue90vU6?18=a=XtbelbA9Fq(kh=KYABVI3SP{4HVBO3>LjdF(p3D570$n3 zof^f5JVCv5x`ceXwCYnN`-`cv{_TCqer+>w0!#hZdb8sj2xog?Fn1?)CtI0|lVX2E zgL>bi=33zFY$oS6%Kl2Go13&Nvc+Ykt3d*T0oS6TJC7yICM8d-xDJ9oCNBcC0rNsP zXN<1H@oIygwxKOqeX=||H6^ONt?dXL`d?B`WGh;F(-FoKGT)9c1&YTmZfz98B7Ad2zgsCoFe6k(*n5LkQx@eBd^fc)P2lCS}<#l>x+hy`p9*sN24{ZbZPes+$=+cjljp+-l8e%IDYLo|J>aC z@>B}Bq1PEjs>sNIieDYkoko_6L|=)^zq1Ly(Q=LOj;8<2CRv0d)$*?Pbg|Bj&@D)De zOaH}otcuBHbIW;?drkQ4xvY|sRh18xx_H8dyJ*l1ax2nj?|jQcS&JE;ofycwlS+kI z?*u2=Tq8yg+)B8ips5&Hw~-LK-0%9ztJF3W_Eo8E%bcv;XfL=C$ZCOZx*=>H0YB(C z1T*9vNDS65+KQA>6s}beazcWEBonbXC^o3J=V-)uE4@3n6`hG`16cR?v!A@1e|dX( z1-taROx@w7!Y`UH@-JUb9k+s0zs*NiLBhpFVy694-PZI1ly>Tgd);+%`=9HOYPR|x zhBh|0a+g<{V;bFU6btAoyOPW{bEU)$hsEN`6gJAlDH%0g&qV}^)?Hz{ILU1JY>WwX zu5oZTflu(A?^&Om@7~sv`y~}K*8Dp~eSwVek@4fnsQdJh^jy}tw&MWo<(iFHZ*E=P z>@OT&4?*qZ>brfnp*6TS4>oPp}6SjLn&asDTLMBeOCGjikjnw&j;eY8@L9eZBExI-5`A!S^bK>f_2} zOi^rFX?AGLKg%0TOlGsxSnl8uih7qYe{@#4sV$~x{9I-MM=nu$$~O=~Hxy?g6Lvb< z)hF5x%S#loqc92U39hsj!+FY(Ybnb*&HB}>R2Id&HV&C!DT~n7kD=?*pJe21xy9|` zyt^{?#E-oF;$*Q(84fq@LN*4P{dvA=1)@X7N2R}yMY$AXn)l>FO6a8@<+RtSONUdQ zVp4a6yN+tE?+QkHaL1R$H%x04W))k8`1J2XgLe01aD=J@X0&a#V2*hY;@wO5P#9@C zSwd#irln{fd3e6nU%i<>7*<8Pf~IpTy%7i#d%iyoUMufjVZU>`7=i8fTj#2-hBx8; z!z2UU?vW60LBt}8$fU{KC5r|ph{!c#5CefEg+YM9fW+oX$i&k}&qc?Mxf+@itXZ~R zDZ<>GH12pDyvgE<^Nqq{vL_6e>P6Pm-SO6;`B&@Gnc=T0D{S+mqzK3pFRmsFa-mdI z)z!Pce-0Wv_;^o(pNu}r6zM_iDQ=1k$U zrJKS8llw9@2vaHQODT)Iq)pFecX3HE<0!RW7bmz$J)AG*?URv#Ju!veUZURS?p;i- zx2s$%)`p#6an54Lsos0QmAf9Wwf z<}724y3p(^`lFdWhIQ#dxV=qNhf$vuKCbvIVnN+T9?Q%~1X|CON+>PGjK-me)1{AZWpf)MEVsXCmVmEdS-6p=}BD` zyK?57EI&~+*+COi`b0fTI%2$Wfs&eC# zx^Y5MffBd3z~pY%+B4|cPMmOnhTr9-=Dc}frg)jqvccAtXCaoUtind3&`IPPhVFxrErxJ?mUJw!60M2 zsAG4o`;Di`nIm@~U>jcI7+eAzZH0W;r^np3YC8Bc&$ljK9;##gAPMZ=pHR06JBQX3 z(@lHe1uL=!as}%z_ZmcU`Rgyoa^-TUn(-?HIg~fvi2J=0x`+Z{;~%ZNpO}wWSd7@U z;6~HFTg8n>z4)*x4TD&qY3))Oq?Om02_VVXdg@L?%eShF*KuVcod_+kHdVI0sVj?k zn%bLf!p@?P>Hzi2;gx;A$4=V{pE~E9KV|48=V0rpOp@u%w?&4ez(Xa{QfG)8Pmzy` zoxq!F4?8y!vh;M`%?CfSJpN6lPn9o?G}JyXQYCv5o_kJc&(%~fFG_ynHhjQ8%q1kS zPe7<_Ig{0&jsgiLAdOcsPEsR(CWp@9|(CjG8 zkN%8~S+2NQCh6(!;&|ifa~0r>{3Exe?YW_ei=R6sYplv0QoiPN;8&8e1d$Xo*dPg? zMj*eZ+AmDQ(Het;*KB~f-9uHUzNFp`560;7Ttwm1;jC@E+M={$P??~#=%=gt@davf z7Br-)hAh$Dc$9}<)<90FOI5*e1YcGXUQ@ms$z^41h>;YzpzSnK2b>tM1w)7@IOg)A zzRqFx>tg>?AOEwoRn=^&2AbOMlj4h&Yo%hNWufF^ygz!$yTS5;gMoTR3obG`&ofGO z(Qb+{py_A@ZNx0;AmR@;pPP5+e6bB$<+v(dhmrgtH+rt6sQ@ZIXYrk7jDnm|kBQ_z zA&F|uaGbQ3XsH*k>#^aocn?TFNWI+rbs__TMV4gihBG9SClZrag7Jg3xO6Kq@SCb5 z*SMYbC0dVY?{i0|?x`-bng(jtnWL+X!x}?!QkzxhYhcWhYOyU z-KDCID*+_)sV}gtG3F`jnDLmqxWlTrTQJ$_LhG~~(s?^uKqRG;tcd%me2TS&mpF=7 zOdnd;ra9X^<+ZFFGuHJ!Y$RtfxCbdKxvA-2F$^^`OcrTJCwNjGcsuuRbBrT3SYu-= zm6m?1#t$cPj#DqDVyMJ(MermI3rA0hLkNo}h_`0x3;0&g*QuZt+J}rPb3Y|5r6q~= zW^0?trW6yaN08jX)P+l$EnfpKTSu|y7^eB&%=Eb*KDt(&&HDOebt6e% zJ)@Ep>%la?wYklk&VFD^JIi`$I@2ve*s(&~jCypEvoy^ndOefg$W~n2_>VG8Z6^_9 z>~bl^eGAW1w9RkqjWmzqNsMNh#UPlQ&wDdN?!&e2Wc4&~-n=Ra;0z2eqUJ=17yLdz)KXwV{l-Zpj!+H9-HgSgW~OLXK(L>IzHhReZM!m4dhibwSx08E z+4)G`_^(hCF}qgIR95U(u#2TAVuNZHB?0^*68sbr{G!28%6VjH#fEN7Gk=S1!@-qv z<8_;j_OONWbje1x+Q*905XHKnqjL0P398TBeL$B;^eJkK+pfgTGqH0#Iv3@*nC5Ym z@EaQ>s!@=AOMYIh^#B)RXJTebPGaUT z-&&oaS&5h%X_fBVGv*xg#8aM(wE$;K1=AT?nq064?M`}h{?FPO$WVt$kviT@CNE7+cm$oXuBFwlmd3 z0XDOlEVfq!Pfu3@&a|xB%Ps8d2J%W)oCnsU%o)+}?3w4BCT1nJ4YpM>(sI({O(v&P z;G7$?-c*{!+p*hnVhXkJ8#XB4_kV%cTUz_+az;MvAii2Y{(aa<+D&h3B%u1>@>L$a z%HSxQT71RSn+Dy0+<(lZmrogj&fxab^d+bnZT4QEPI@wMQ=k3RmKNj=@%^f-KBL>| zMouRH-CmE2rxjm$x6pq}6Bp^zd$|d#O+C?Hv!(aGh~S;iW@dqpRI&>{s+b#89D7o{8@GbXHJKa<`XU zuz6cEIwK(I%nP92UOJ{8q#jFD|4Y1jGnUpF5Hh1d8De#iC!B+fgYI~SQy)I$9!b^{ z6Im{rP^g~F?c^q#BR#IiIF?~GuCA1^mtYC@DtBhb;TPd7BL z!f_QTBh^Ay^P9S>^da)Rn6CQbdV-Mg`^$6np$^7b^;Yd@_sOg=^PSaF<_ z+;`{w4d6j{1no}N$-+s_N#y<*{Vp?QJf&vE)B1RCj%T)g;Ys#g24Da8Xew{_>aFC+ zYfNcPT&}aumTvoc!+RUQh}U#Ys=Th5-|;c8{+Z?J+u{~K%;~n5`Sr~fsn-DXYb@!z z{AcA|xzDp}!zM(W4-EFn*YB2wcS$#>1BAF(lvmOJrQd#UkDqVWJn!u1xS*WUUv!}! zoNkrA&;3~OdKFwpd|m#($h1(S_@CR|ef9$f@W(ak7gau`pZia4kUx;p@KM?S?fn04 z{7QO|`27$3`}x6|`0JIy8x}cJ^one6^x85_GMco9DPNhNF_~MX+VeL}Wt!LaD;a)NzUf zIlKO6quro?#eS+Ztdg0xc`ByyG_0yLPE@3*Qh(43N<`wi8ZN%18q_+%KLcAlQgdq`x>Dz{a5z)p?3Z(i)fIqtz zkbtN7(R+%Q)mu19U?geKVy61gGQh`J0H%I&jv?t%LdK=X28Z7#T;HDB_+Da zbjKFOD`fg0Y>{M6(w|HCCOgn{hi4>fMq4q^GlE>no+r)H11b*nOCR~jrI*hgj*^Gu zaTB(Q*?h>1@>kDqxEZ2GFDQs>_D5`DS0qK+g|J;XTNfnSsv*gdTNz0)mI<&d7nII= z7s}l=B8zl?Jq9Co>{=JT10KShP{7fVSi@={Z8uPz1 zgO$==h{0Y$^DoGGz$56H^bq-Cq*DqCL!l{nN@Ix}QRI9~Qjl(yF>lAA8Vv;Gkzhkd zTW!ZM2?8V5NJKS|0~PU6^y@}Ilsfn(xT5ySpP(hR-tzurO-hfzA2WNldOB4$bmu)gc z6F{IXh)5XgIU>dEKw;?z$xQRf&khP5xk=xv>ywMv^ng&t$O|s$p7;;qq(43~^_z~m zSiP$cz3FFu8Ww~_)}o#}lO3hHH|nMHN{|B}jk=gKsbRfmx8+_Q{3sxCe?(;4au`<- za#>|jqIzx4dZJ!Xv>&1kOp-vY`k*XQMHwbrO_Sv*FKVEN7X7)3{X`e37Un|^?844{ zotnHAsVDyI;m>g+9XJvq!NEW2qfhg*&Sd+Ap^319L$ z;492Hjr_??U?cV1B-Rru+w!ZpyT(itY(yc+CR_^7Q9Zrux>{%OLxr2aDe+jLT3l0BI=neC;Hu? zed>hSXE&v5YS@I%Lzu2_${$kXRssLm;4srFcV_twq4_hCyr0NM++k{feF@^z=&e*Oq ze-7(>#e~?CHXMt|^`*mUM{3XgRg!LzxTaTQwbFAP_`xg}W9JFu@HS=+!cdMg%5-+; zk-cQKc&-jX^N>VMk4seqFMuEhH}Mcb$XjN8Fa4BQL$ku27kM9Ek{Y3hzRUP;j9WsC#V-e zM7EXY(OYl%O`y>+9n>;`&f!R{$4<4s+dd+4tIp$iU1`=b?y@S^{P|Ydb&worhHtA` z^FlyJzltG^y#OXlR< zYYtjenjN(XTD^I*LHszN-Y7KTTeg2kkN|$_-DQvN1OJb_IznY$$Q;ucYT0s8%ruGp zJRk#q_&RBN&ESH8gZOg{=u0cQ^hv%_dKgvwU~y^JW9_6Jqp(BQV`^ zU54o*^d3zH@*mKQ4!vRVXRQx{q*-ljt?wq0g9?XHkbHsMhOf;}i;Vr-=iXJJdf|t@ zp-xaIID`nwIu35=IuBX63BC={-7dK`i7M8k2L!Cfj|>DkUP0wEO60vFoF z3@nvHh~daQf4zNs4o+z=1FLOGIr==|2Ti+|M!7h?b!vq7YZ$F0zxu98=G^ux#=eOB zO9+rSwXJZ62x3`J5ML$wjWPwXZXoduW4G1m*E;q7U18(HhKY9Y9v~37bW}uVU+w03 zDP6#}`UK4bx2#;IJt+i>)%Y#!qh){fIwR>5@5NWU7(x45mZ4oD*cr%PIKO219wVD? zo)g|jWTOI*a?I=}QBk_a9mGF`>K0iq9=Y)K6f=}P|H?Zfwdv5<}&UWA#XXIV*NxQ6++EF9)g2~b5)2MlYp;oc1OV(rk90s)54Tso&t z0lx14 zd4_Hnviy>S1N+>8ucD8-dVHfCVrBxIU?Xy68UEWx9ave)!`1=8@?lE#6UBRqN(eX= zHGEdVVtB%HP>)2JoQvbSx{-^VhPJ0(#Lb+66i46zl9@&A$8g~t9*O|Ufe^TsYY2$`Xh+G+E5`5zp z@~|H&C;v|Rq8EHQm{li&+5-(xJ-9rI>K{un!Z*zvVw%WQK63mZ5A?mi_U_Z0!lJ+p z4=?71wHwq}91rFVxgFLo6q}f!l;dx&NMHq7_bVOJG@!Njlq|e(zuo+ z++P<6aw7~Dbr;A4Bo%jH&}VpKt5+*h*cRk#?rMdY0EN~#$4~+5m};M>jBjrQIY>1{ zR+k!qS2(Y78*>7q9(A0Iq19(GF^4qAfBn!wI=A z5+1bh-T*O39~VqoU38z?j$*6M1ENU(ynByMaLn(~>jy3-K^Jkk1-l4=2-XwqWYzTBfYKKY_(DW?vkFs7>4tk1k#4mC z1d1q@A^G%i4)VtZ1q8G#P~I#Sv<8-^I|!s56sI&Da4m>bDmlf-$3CGya#)|}EaZ6W7!ZTtfnvdgVGAoAD&4d(QO|@3h#hZntMQ88LbZmo z&leK{cE&8f5|(m_WT~*RyBwBlO@vMuh&hr1Aq;3b-%i2G*}V`%@9okmF~Y+o0iSt; zTevQ$QBT+PFLf`84h>6Oe@}~I_gbsk2D|S(DE^D)tv(T>ZQFlKUXmNji|cySt{W?M z*H~Cc?-xeu3Vv?tkc}>n%`Vq=*Jqog&HXN2KDEtmLS+4UK|Ulozi}!mje!uC<_+!6 zgDa|=Dm$!heiyfPSk+?2w&6lFMSOC+8>p{;*=FiqY};L5(~1jA z3*C@XHucO=c@chz;hi+PxFwcbzYBTs{;R33`Ht(dQaH|ASzX@T0*Ijkt z&s%G^(dADz>Zl%RE)CLIge+W<;*{zL%Em8(K5FReY3>7b>%kRP*SO6hEK2{fAdJ%W zthLH3RBBx9fT$I6U4J)lx0Y`$h=l`LesrBxtSuj6C;0J<@Ak_h1;}`XRpSoh^kUNT zs^iv}L6e{ViJsu!^WT(dB1KD+_I8hdx!H@Ywb>hp?AZvzgt$w60ova_hQwO%ia=yW zAv7Wr7?ku5uM^0m6d2Wb7Yen`9t^{dKx9QBH2vs#HVhT&<^+jRh2;qWxYo{Xf`wBD z9fOl?yV+NZ6w{!Ag~l^<0=~paPkZdUKBr7Ka=mUm-zmv=zbd|+yFL>W9sC|=AD#-(R58e#Ef<;!-0+0cQ)@I*NCV@3A55$BG# znd#mTjKwk2$sR~4V0BIJX3q^?zlR=J;ha#7jcg6;u?3Z+g^ruinbj7TO+)9)w!43E-km{QZ6HuR3XHlio_+_GMw-2y#AI$}LCy%JwR zpD`+TeJ4sBdyK_wBdYhd{O!-0R2tw#Ef&e&ymU;bUz*`f5 z)dkG;!CPH{0Jgm@NcQlc41zoD`1U?<_M4!s-9W9~Fs=-vnvx_dh4w zX~(tqLALjS*CGJd5d`_4mjGU7NgOdxXQ4p1M+X6D^)7-yy#Xy(`=1Z^pJVN`!`f#9 z>-a&o5`gJ!gX<`O0*--~&;8GH+q$a$b|8N^W=-wM*P{DZLfg9l0f2iCfpmO(UEu6} zK6Dvc0MhtvEo-w!I;kJeWH?5Up3>I@>>;hNPGQE&u6#oPis} zP6X>7Gjar0N8ee4Xl(~tj`p{LvzG$_@B%L%11+WkFLL*~5ZLpB()R%A)BppvMJ8xi zM@pDdSNM8Wf8qxMaQ!cS_z*h8y)G#BJ~;N9h^^hwt=+J#IS@KCpaAuyJr+2c|AGe7 z+XQNl52Qote@?aY#Wk2-2dZ<;LYD~AdIy|}Zhr};)8&6I;y(e~Y6`SW z3A~I6wEUB{A36fwjwiBx7kKMe$SYpu9|F_)bSUk?EVF9=N>KY%;AN&A7+}CU@Ny~8 z@-_(I8=;i|R7Vg-;<`)Qb+GLkOz#3<3xP zT6UU!!`S&tf|(fIFH6322BHHAoC^70Fgk#ief-aXd)XmdF@XNd1^*G%%N`aji(B2| zq(yUjTjnLO4k3t+1kf_ue+d`bG9dh)U#6bwfKV`JLWu-*;rS zjeL5nYa#@AnQn&%zIA%%p=KU%)`%gad^>s@kDLv0HgSJ{W9O?`D$XOf^N!5bO*m^2 zFWTfau59r+Oef;a_3tb9Nuvnou&Q!u5OGCrIaG(v0 zJMnBZT{MMn1)72!xwP;|NZFR)n;@4IMcAOl#SJggQLUa&6N-4V{ELkkp1jAC+(O= zOgiEfDsTIJe=c;H{BRdD*KypDw7o)dxQ**H!bm0@$qnkV@;7mC?K*EK{|*%lA6fkH z5E{$4i`2yiQ&z>=$i%~QadEr!?3``oOAXOF`;*HfH98`dTMGuJk{qG})mur@y);YB zoIjE~jhx@OYVuI_li9XESa#8!==Go_DJM0LtGH>{D4epc;{^d@|_XBz(07Gp26DQR3Xe#@4J>(UJfcW-7 zc3}?oxqW0wYXwD4g?Q;G>w`%PA)w|V8s$Ly4A;o`Xb}iob7XPV$~d7Ws0Ez_nI*9< zg40KCD#*nk10>4X*VX$5PxwO00}?+^`h|?ph>8KZ){;_acov+ia@`2Yh51VCF4?Zp z`W&-|VKJ^Py^nvhutW-opGcxL#t)&eHD6zpj2}CzGMJLp76%{#;u%J$s360Oy>MiF zaoFaInQ>drLajtPJp7)+9Xq<_aBL7&YSj=Hb;SM#tPt?VvqRwxRjRC787V!4M5QJK z(=+DmQEHHCdI7yGW${Az#(PK)3zlXWP?&w-RwG%QwNXOo#>S!*<6%p^=HTRn$D}3{ zr$!!29z1}Jn0PDIC^aK;YAY!EKK);?JBKWa)$wplj6$;@H*MUKxY5yC1QxH62A28< z_6ISNm)4hb{P2`rTiihfl|z<8(i7)M#8d+y5JKS9y?Sd1ibOyb3$4Epxbl@)A?T%^ zSC~;Gjx8=^$eIANPD#b_?$I|%3v@6AcQol`omvz$)S8&^5GG7eRES_ThB2$CM1u!f z!+H7vDiPlWw5=6ZJYO&ZkK?x#K{3o&Y6YoG9SnV%H#x50C+)H)(iQa$YHDyG#!U1O zDLXGiK^i$t0#-ws(|Kla9}^9BK_4Io*l{vMn5JV9x=3s_(>bXJi>s)$nGqJ?G}VeI zI((dFv&2ERE{i1i5Uea|Ggb^)jyVMBgy}W<<{6e5^&0eQ(kbW$r{i!nf86<(?pd%O zSxg&TVFomENRHX;@yL&3l#Hr@vUhpd(`p@HxJ0BtE6iO+>IzBX9+u%@E2_!aW>`7{ zXM_I@W16ZbM}-1E&Mg-4i6o^l53Uua-X?RsVWgl(>HUS%Oob|BPjFR^HUVNm%bO*x zMo8$FR8AD5C8}=Rc`Fc(udU|Z5MZ48{FKxYGWGR`8X&tU44lrmz^XIC(X zH#C}^`fY%P7~_3DhjRG%*ZC<4f|VC1TGcGZO5X71_mqX@o#%0(qR_0PWZnh=&Rj)Y z9zoy@kZ?|mbK{Lx zT>#6BlLP4r=!eKyX6i&>6%ZyMX(cG+YrGW?9Wbfb^zCexWZKtxg-l@}EhE^pvmG7(uRQxgv-6e3H5 zNNn|MbYPy<$&oFNf35$H1YagPo=zgAgldo>lFqlc`1rO()o2Kg2l3=alV3SZdp4|F!d8o; zk}b1lARw_|eZjIffmpD5^YWtdqQ8#wN8=0(zAZ5S7bz(gZFPLd`>i=#YFPEP5MiC*tFFwGT1{bzw$9Q4nZRh+h7q#i{kU^rc0 zXuZZ1XUqd32A^{D6_xiS^zy|DQ_bOw*~pDr#7eSPTY41U9PEJ0Qds0B99>_Gm5O6i zrm`l1ej6N5(^9NREQ-!pMItFo&*0(cv5v!x{O{m!L^-%LfgOrHJp{F|TZtC;&dD%} z)BJU^$cIK)S$0!%)J{|tLUrY`*|2_}sm3}>(W@xyJ zp&k1?*M4>;D!{cG$OxFZzKWS3=C8Y~Qh*%#tz>^38x2T8mU_^m%Rva0AzTPygTFzo zEPZZ%l7=ohJR6mRig1vcZCroC1Jhyggdwy;gzTBSPR5>t)C3np{2sr_2LaFd-J5u^ zqkMSEy2C1lGo1Qgn|UF-;#SHN0VS#KWld^V-u`Yx9*JSIQM{YfcQN;^U!@aW%bqOH z1kt|p<B6hL#?cdW8Tc2kR6ZO?(`YkWfWtN?Z+bmv8?^2&>`( z@?*PuQz!p2jki(CFyA{(fv+MDObn|4Bc{dBXM|K=qhchBQtQ2LvkuoTXPGP9>kYp- z7IXe`&pFS8ce`h&9SM#m1eMMc-Z!0@=@U@ff*Y|)<`Vish%SnVj1AvpisVhf-CD63CUF_$F=xfOGS^GKnnrWy$+Ejd93+08o)Ht|Z69m@!&`5jI?l5$bassg}E+KYFly@EMC7 zV5vMlR8L!ZSVZ6!;H9-7#!sS-@#y<^k?1|ZCkG5RP_&rd^)Ghf&jTY$ziFt&V%O8e z`#6t-y6HLvL(*_~RXPL3q*iY5^x3@)MQ)B4F>JIxAY5;W*&h1;u=b8YngmgsXq(fv zZQI?`wr$(C`8KC*+qP}n#fpv$D=P&!t zp1}@+P=IqG?XNU24w4WcSUV|A%KS%#4Mw4wb>XKaMI0pPc9*VLjFy>Liz}gsP}m8e zImkIDKy~+mq7zxRK>L2XB9X=hv7R9K+5S%-ges&8aPrh*dL$9dF2Qb-OXut2VP;M2 z^Q%()O*+^$@yBF@{_wWwP#|*P2dn`Q$;cZS5fKvRFICVPpo!j1Akw3uTz-O%&A{f( zAB%O+fWy&A&|%?Q??kX6fj1Ihe6kvgAC2*$jzWTRf$XlLw0Mkex8+McOuF?2QbR+0 zkmvm~qlXN7@mz(J6US)TS%8$}-_a)aAS$YxqWf4+tagE;?zfQ(?%)Bg$Z#BFH7D0{ zZb0M~eA6*J_AN7a#U@R$M3$ZwHXr|tGVA`fzUN)^+wXTaBT&U?;K9g`F4L(FO63eP zDxO@V2LFHqqW+ZILn~ESMT(ldW$V%&RvQ7WYfTvh7>Hd+AvrK;2-}GnpfZ* zzEX1@4SUn+8f*K`Oz{A14IgAIr_?D@AXa1mhyy`AAM=MH2>#F5YtJgmr{tG@gW_6@ z)irv-SrQOIhpb+sY%62Fy?z7oAaYVgLx;?uXlxX*hP|hMr&3=Su==|j(+ZLc^>hna zt=(_Ul$ykA2X5n3o8rxX-p_CAYI%-MDb6?6XT+!bR0kv$>ULuJy(40W>YhEJp)wiAG_Pxc+R?gA2xj(8UMbA+YF-vVr=_O znTCVOf*2mzhx7$HQ9lA*&94)TSJ{l@Nxu0?M5OZsPiq2DIb9rJorSrCUaTUDoj(3f=ywz7=9l_~ClY;YO0yTQw`fAHXd z@0lG<;BVjV{Wa51TaM`e$dPggsOf;93-X>KHFvYgjQQBGhON}vlMI0Gc}GMY6WYePpvWyvxFtm35 za6J17Wl~#O%(1^>UPI$@5&AytRQDha(I z(8%E`>JQm32`JDrT^Lq@-+|`9TV}^V;MCuykiCt2KW77=grf+!@DMk@iGa>1+{ycX z_Yq?9ZQiz#$MRgJLto7p#UM!cy> zTe&s9i|~H01WihLk{@J(Y%){s;TUS4swmLTBq>w6O>Pmvn&2WaHVf}8v*-S{rNygU z>+xD1DrQiOfB3ho2)9&x#wTv|b#3Ex9d>Mj-#xK-D2AtK9nvta+r?YKynQ`=WMk50ikr06bncKql+%T* z;?_%FIZ{g@D6qMj7j@DkN-#Qb0#GAeuZW0NjSY!PPqA4p$DEdFh=A}9u9deDo(^A3 z)Y>HJ)RBwh6B?RM^`!&PI>A-BFGbWlNlVp^NGJ(s~>O+r>BD8_GKWX zfo#+0VLDFWa~NR4eVA-kBv9OkQ3$;Y-DItmYty>6j`1-u!&VCqXi$<$dHU|t6@~B7 z%Yj8W5ZBdFfE1lPx*2tD&3-<>uehYjNlF@d4%#z7YX-*6q3q2?@Pgoe*9s;0)*Tp7 zwvA?P33w=v4ugy5aGq^!u4!j@Ycx*kKG!)rHon~UC-M*iM*tT{B(+fz0!Z9ZomNr7 zZ%=w#L%(VKX8ZJLaq!e)(b^6oIGNQ1zI*62Go>&5UbG?L>?-7EA957hY6W8nzRphv0A*@a`I1rl^8XgB?ZS&GJ!{Ocw(qi`^;0sICWe`9 zE9+sS&z6bG+|buB5*?q&kX;x)VR8pn3`rryIGCP{4$6;F*{LoX_q( z39mAL42*`wV?}?4`5Ch}NHZQ<|mn#bj{Jb+<&o!$93Z{{qukY&Vf zKQ(V`9blr+?_3et@>uoR28vZSsjE^I2hQojAH6cTsnJ|;tl`T|@+miUWa%K?oh#0- zs!&>38xy}~ZbI9z#zD)TvBD7v`@V4Z9`vv2?;fXxz(WsiF+dasC3>tu@G$j3KtK|o z+qf7Zpt`VrOKXNO@s!V|a*l)UqGFOP#|Q|-$23o||BUVf@}Jw~!44LCkqzS=@wBXFulh(EvbupRNuvu#p0IuxOC?1>rTbUju4yp>3f)v`LbLTZOL+p5f-y?(kJvGTvo zt6Q7vkzN_8=uCI5H;R%`5Nv-@hlrM})Q0meul%H)V7u2#C{_Pd${+GU=tmX8x+}*; z_IF{@aTjT3uQ5iM$cI>Yqf(Qvp4O=K%+9tj{^=o0=6@le3prMSIosIjtR6rw^a6imIM*sH+j5?vQ?`YD4+STna^r*d8n7NwO6i5k8k$%eN=~+Yr(C7>jpX zMF%8&1Zx0A=a&jHk=CP2Mfl3~3+mU0co`-*kT<;N;|=FgTm%px5snoGF?9 z&5FuXl~ChSTIrs7-=`FdWWOoqR6GfuA!ba4s9sHexSTpMUk$APSg7-uzV!HTMB|-7 zitT1jO>{=>V5dDjMP-0NZb#2sKuCni;4-n$$tALY2<3Vik)pfqPH*l{hOBPxPfhJ6 zPsX7tHAdLUDViO4_dU%Etf3c~GP>nGlMYLoBQ%YZAF-HJo5oUJwUimoQ$f)0j;bzF z^sC)ebi$^BeE%rntWwd1_45q7X}pRL@wTAOo+ZSPEgd%9%Vo>xXxM7+q5Xd2iN2pb zDguEF3d#Ga`Wzb5`r#=ZPjGhU6B~HrPio5&!$V$j22K@I7cH9EaVHd7{uef6tUa$A zE1}}*Bh`8c>`Iy-1DSAwdDSJBr(D(Sgdrta=kg^cmE4X3&iSQ-laCqONbe>^s&?D_ z^tUaCxoIv}G{Ra&E0-C{LX!vE@@WrcqrBbEy{_}5iFsa5f4d#k%>V=r+Ai5i(IEYt zPUEe!;n^S0o$^otEz0+z5VOU#C*}QWQY+gpE*N6`yUH2sol6KI2b9KFR4m!~z13pF zaPbAU9@3;0DpNg~b8ikz^!D292hOXk@6gXL#993jGVy1GywKm{nv#MYomfaPgzY~o z%K{Pi{Sl+Gubg05g|~lF6pU@iZ=mnF)Y~u(+9>Q^Y05Iv&(kZ2sDD;P39@z%y3kLn zJ_G?&gjSJ4R})*IpvJ{{oE&0)asQ zH}twg0xw2=tEQPA8~u&tVezJL<{*$(cMr)9M{#iK1%@IVi8L|?E)T2{Z0Q`QDEW7K zk4y1$PJVe~CqMZyV+Ca&s|TiCDM`pJ@bD3c4Mhi(9< z;kJL7hDXY!BBa@~$TJ!jGrF-!OJYQNJXU;cNZ1=IQL*oS4PueMGlAaqx!E+GNHF;x zB6XFrY#=+h6>LFA9eg#)bZAw4x2H}hij&MF*k#C}^=Fg8D5H zis2#USlWxTqfjeO8bu82)nI1%sV@t2(qF{N6w8- zL`R!4iYzRq&rnM@Z>;<}l5W!!X<(<<{(8cM(qyP| z4Ndk_iew$%K2)o57(z5hs8;^{>0|^Cs9<(=0@7wUv(VPGimfWZsC+%_>2zoD@#X5J z8f`?g%Q}J4kkq$!sZx@~&F#LN=HwuWzZ}f#VA1F|b$=5cZ=H_NYzV;eE>Dm(z^_1O z|{T3}v}`7B*0mtCt{a!!3@OF|Ag}NQeq3Z0UbKrRusGA2u`|J;#XFC5aj1o&u@_4gW*(1l%nX z2=uLg(suE6x2&SE19UC$WYhiS{;Te%Xj29GexWn1bqD}R(|`#LV^a{xT1e#oS}=#JtP{G@Id8m z1P1*0AtJmtwcD3&$9(ewgxvm>s!=P}j<5J8{`6>)w`sXu25uxw@wf+r(75&*MQv5b zumPpSLIKaCfW698d5HYYb8m$CZQM>UnFS3jeMA#R2!hBpV03UhBqVD^4Z z>??@=c*-xTH3yS+I&%e|ZJY;bUNp)nR1hls@8!CWFg$E&QZiEBa2O&q>^ql+B1-D zF6SveD&ME_Sjec9LF?v;3HRzqB&4|7z}aC5zlqkP@(8T(6mGGTAN+<7_HCKb%BUP} zt<6^*K?P>&sO1N?3s~}lcf+)=LF!EwM#XurI`hnGkQsLBH_!z+w}0kI#6lg+^6Rh? zVI*q7^T;8N0Fz^ExzX{=%0?fz=cUBa0L_YKf#T05O(ifB%k^XlD-`TlT`JO*dfscL zcC`q!a%yzo?$RKn+UclfE{ZpHr)w8uR(uAnqrxhYZ@`(PyhuX!I?NA@Gx!Uv&k^M< zZE4n>MNnA3_D)k@FVktx*a$x>dzWTExQ6>A!DLPT91}E0!Q5C*4$+I*UL50ilxvfN zEt0=_6gql$<1dge$b1S0CpZ{WI}>LYCsRY)|Ah8NR&X#(Tx^VljD-J*d3hPcENxs& zofyPy3|&k`OpWbLOc`WM?aW;)2w6C|`1#>r{#$*IEZ>AptKi>=VK=^!-7*Y8oflL= zPyof-0!XV5AhA)EvdXHHfz}&tO)b zN|^NYgST*aWdKK-SWUPXsz)7o22;qXktSQcOb?!E?Zk)5*EC22WX(B7RsBKEB(yW7 z?Ec8Mb|ZAtlSn#+Ct)2YDIT_E^>U2%L~O28J7k6$?9ZJu>}Y z>5-j@jf?TWDwT*D3(bN8mz z{yyt8-Q8>_oyFX2rYbB13@P1_Nzqi$&TqX>+75xN`fF0;%8zyoFZrc*A+1z0Dq9{^ zi_6W&EVq*2#yLPDD@rSQ*}3Ax+$5FIenqKZZ{VCP%9iJ}H(S(YaZ^MPK{i*wc;+18 z30UO84M~*Ce2FgedwRt*3UJ*P9b)-BwNa6g%kceXI20w)_06&$(a{+kog3$Jn(mCw z5CdDi75<1J!ot2hU0LIF##Sk5BkI&aM`ip(WxCqDJ*~KiBlVu~l)|g!fm-nJC zOmU9%6{T&*yA=F#hUXtY+N04NTKP@ygO2`wiLV5(@7tznGzr3KhSZ({^t@9X&N6zPc~tCa4x`OXUQo zM+JnxWX?-=C9-25+zam$hZkCudY;`Y7Dq67g{({V517uYbS1Z&8Or5U2t36jsZ`7} z0vd9$DE~bD`jMA-DbT5pyKIPUiLH9)NXoA+soC&qumYugQMtM5U1qG6ynL(lMyd+{ zr!}zIXeRzF@=SKgv&GJ>@4)$QGfTvkmxnadYS>xij;3A@Zv$<5I5ngL5RAA z2Y%eu@=JIf{74tR2sqv_@P$comaW?Y`FNa%-DBUrud?xW_l`J?ucvXTka$E!N&*Q0;Of0 z5KV+D1D^q^2vh>~#LSFY(i4c9oqku7S~XWn?g>6Q7fmYP6(4>VR*7U8;xr&puukfX zhX7^?pf-R}AfCis?X4-@>eZt-CzvrM8DiiR_9)j#S==EBcjGx36K7TIOnbK=P-ben z!vfMN=$0vs3NJ&x{Y>xJcuJ!4E85Ybf@8WDR|-;#;3iv=o-C2u_#457T`%9Pqbz-w zn+?gi8^h?a96!sMhEuq4M&;iU699hsQ<0AU@$$Vn3Xu7T=eEdQBcwGM5P!IGv3;cP`17-*+~vB*^86na%i~O%I$+`(Mldm#Y5R0D(`cxBt-b|E|EL z|7z14D&#L9+;120JeoQDIv{AJpK|t+MFj&G#+BS_=p>-GK8HTCCxCV9)|>eT{Qumj z|Dk>V3%op9x$o284*nrq{5tFNr<1s_|HLC?HvJ zuOeb>ftx)4mo=Vzkv2nq(vr>g-=~*2kLb-(^iocFxAI4cSuwnnYw^j(fQ^}US9Let za*BPXy|$UQHP38IxpLNFsJ<-S$S$-HEW49}`b3_eg(0REW+=MU7oCbC9vyE@sGb9Y*~i}flyRpzjoxTV@9_cVxe^}lw$`PcIok6-V=U)EpRUp!ylU%BrH?{OWU zbjMnu#Q+W~*Q zG%tA$vV0VV%s^4POQPf@8+dwXLdzHwVzhd@zWkMP@P*xMnlZ>vapSn5rNL<-yW_AUJhwHK&EN-xf63}D|D z5991mY=q$WUfDb1=c1%FzRlP-l3%rUPWz`NxtylaC!-W!MF!y#GVp7U^aHq^dZkvo z%VIz1?crpmoX7aNDxIZdbwwtV+w{H&fcDQqfBkhK!O?g$8ef&S!D8rkAn3De%djRZ zzZ4FO*TUdA;9E1D&)dXB_FuSr_Xcu`p$y{!;({FzeD&O=YZp2tI9=q4qbrs(w{P_TJ-(TE%J!78{4d!#I1#?%Bt_^*ERO zFvYqX>-iI;b}`mqxVRb)%Kp!wO)J~C#97x@-*+LOs-Jw_;3tVBm3_Ty+Min1eETz^ zYX&;m=i^T^3|ZlUO@)Jh`(Y)TK1UNSDc;Re>5bCIq#dSm!4sC%dY1BuaTR$BKiwu~ z4XuV5)#4ChyRUA0IB)E=IeJ{xUL{8r8)Dvm{BhdpqZbq54ytcA&3WbO$IEiMi11fU$^AP`}su}tX@_n-qpz|*i*BA+2hEjFQ`XvlQ;RAkMF70r+4hU z=XEmX3lw2r++-|OHFi8BNS03KX}#Kgv>eu89P3Zv1;eo7CM6~)GI)kgkkFNe=OTMU zRK$k`h1`!~{%CeoWpE56sO5}2^XN9bexM%RhP41p!wQ?_w`L?eM~Y3uJJw;W9yVQW z%kl)j;ol3h+j28-Vf_cokf;exp^}Sv>YVl$5l0-xOVex~7C19U^{g3Mjp5rzJde7^ z>)CVc?>xL6B|}F&Hoi5L+*BeT8o7>(SxqWKQo^}3&YH?bt%|~N{;nKF7&k^12=-yN$Dt-ELS*`DC9P#l@5F>k z>@3q(){dOmW*Ew{xS%cy=Jiw-EeLL~jF`Cf&oxao={I8wjTCiKq7<-?+1H2L$zBSWbX4}bP&!R9p8^j=g+$A= zkSBm%!XP+>b`0pmYbAvdCDRk|BIMbm<=Ms)OKD^vHZD1EM1}pcf;KHhDqqrX046K0 z&0h}BnKZHm$>-~Fe2M3AQ1@sNMLxh$PZ~=O(@f(D<>+i<_A)@$RoEP6;&KjPjK{59{G?M9kx&=Zw1*!#eAkSkH&h zJi3GmwbmW1Vy)iPfF_~Mr#Q3&eN;EZ*)5H;AYg-W#5e9B=5diZ&(1XXEn702G+}h{ z%`Z(U=twtE6KX9qDt{)GV@#+*HBb2*p8oV2?jEgQR*gnV%nuzxeOpFYOd5Tom+#_W6lHKT> zDiYPxgOMYEjBnntM>ndbY*IV2qLJk^vBIj6tg+*rs(2^+phX{kV5uA8s#ahhqf%t4 zOwjEhcBPf5FK;Z~LflQ@MTqM^&mZZtn^@X3ad^eJYDWw}<;I$-lS1!4WItDEd4}ZB z%g;57Z40@qV&Y*%b>9M?5+NMM}OQgxh1gwp;iEZ|Y+4LvgJu?=o z%a;vRO7+Csj1<;15QYj;CX_pY9RwM|YyL?MxSYl{Upzt_MT)!Qn;j1ojww6SEFa~O zLt|P=M1{@LtJY;_KAw(RfQj2EbxGF^*>^Qm(=@ToRHu1(C5Th(l~8LggB)4jW7;A5 zep-HSQM}2QM34R}`Q^cNHaKEEYay@5uGw7r^$BcheUqT=4f%=wYAetS?FahF{mOP* zxu@P27eoLo@XIe^>(${K{|R&gs9SXzs1>9ZRL}*27&`K|uORfsVG_a;P*mV#0AZ3Z zm}e_-0b!=w%yXRy)4wUwhHT{@9TPH~RF$GpGje5;M)?p7Sqe?P44_ylt9%HV>^xbc zYKWK&osvl(;3+kwY7~p?DwUN#WJ6{~#iR_tl9Ev`Py=9-RV1gCh$JH?OUbAjMIt9A zrxgu}kWnX5st*(n{gskY7^oIW|7S@~sahZcaFmKxHHt(t-?{bXF^pg8Y?qRw+V2=0!6v z4Ui%8qMjE9{36Sea#1hHMAl6{D-h{Gh9~2uF;p)|M7ES#ORA%uR|Ke#aZ?&95{m+! zrATS!#I0-K1AlK(N%OG(bP;RTGe#^r{FTNO~ar_PV zlJY4Y5=eSg0x%@K>HubwUOSOjppS+>&aOPxg=iKqh=}o-sAx`0xAb&N-y8Je^d7P^OGcu=~h;*-D`6RiQ2SBpOiu4jDRVGztS<{SxCvs;ip%&a^_ABQ!_GdC33jjG2{E;>sT_VBB>G{h|>nr2RIp$s6wdLR{d}($5wm5 zOpF%R%N?Z{;{^*s|JEsr;2td5%k(ni9d-FTo@12cru!Ma&!_4@y!WT_;y=pE`OZG# zEw*O8z?JrxarZUlKJ*y9L-O>Rya)YtE}3l`+vR$qDt&f5M=6O*e|Fjtmmz3)04;e* z-!b+|jp`aKZ%yC1Z|1>!EUDcFo5PW*ZG2!WQBT)1^h%HF8fps9W?=5_d*oeo);ZEF zF<*OQT~wBlXTCD?3XUB)wNx$Cf+Dx}I?nwTNUXqz^XS6Zp zLIT+tSg14Wf_JJr=>oU7ETiriO2g#}zIZH?>bO&kdyHHX#dSkm0-MgxWNXq9Ae}Q? zE>`l8HeTmCGoBccsTuO7_UsJvpa=@uwAUu2y0GxQu$Eo9!Sgs z*%&1BiWt~VI(J8kR>M%tS@w>Ab3v*T%M2q0JcI7 zLq2fQMHCGfoUs$rF(cg5@ctLQ+~E?Uz73ZJ<07GcRTegAdV_EMpdhGEJ-&j!z705K z)WbW|)6=k7z`MivPLGDCako#sldT!^)6qF!Tj(!x3doC@rI=t>)ANC(nWNj`3B~g% zlhd?pPBU!CMn_Z{W$~KSiKW2tfuw<@9rK)DQqSq70UK{q%7T(I02yeRI~hkEq7<66 zb*w?uY1CoVaWaxN6HcDNG@*2%w4inq#8lRLW%GqJFqC#JqY_Or_)!&YgyAI(|KM*l zPN+}V9{s%!tXJq)q*vf?e~?|08=j(G@E+hE2>8BUf>#*NpWpA&d$4=XdyQvUE4|{g zO@1(!p!~r6VfGBKBzwYpphdfoy^vk7U7%ffu3=BrWjiYz{q`cun)-24tvwK3Fkezh zi|@?0$aRo!3UI6XCYaE_4almOjo>#Q?zel zH7vKW>nA$MjGCY5_w4(!`|$d%`fQQ-s#=5Wf$hYyGy5`0X6kmJd_f`Ku zY%a9igFmY1;I7%ORj-$|)!Iqa01>Jws>79PRT~ZG-KsThd$-Bg$Jc1uDV?e9vn~wV zP9-0}wtTXyEA3%zsW#>Andd0y7^@coOHp)B#Jzft6&TpdnHRQCUPneRLN5h$u-i=z zTLWRCJE0~0JE7hfR!s^a9IcldEprR?h?iRJj1xL-eSEWg{*$~fTYsIm8t3~p*IBk$ zTBjW3XKNf+?aCe1&E=idP1QR}&E(zW*O}M(s5%Gu9Hm1$c{dOF@s24>Qrow+C$+~-q}uJ_kNuR*qWT^Zf$T<4}6K#jTcC`^#r zyJ_hs^(I@hbrLXv{ zUSxTcyDD{Cb3&6$90#Xnv6CgqCvvyE;qCWHH{XZsD2iCHeY$$_OV5Nlc?=RpA$5hBpQ`H!l86rphup) zK+}ssC#L1MjAQBaUE|0!w*?T5Nkj*b4U{4+(77r;1&p^pjQE;gh&!V2Z5Y~=!~FDzgZE% zML>{#b0R{7fDrv=L8>#U=zQzy{9c{DlkR2kHk75Ap-z2kZyo2jU0j2lNf? zgWv<{1M36o1BVOr45|5Bd%7^Irxeh-VN2U;+>Vpc~K|;2V$|AbU`I zV0#dIpjyyc;98JcAO=tdVDA8oK#YLXz|#QpK=Xjoz|sJU!0kSizQaD_KKebyJ)}Lu zJ)%9qJzxWhxnGw+*O1qsEufn~1an{>f!=}M0iA)>edv4t!{E|a-AC6q-6z{;wkNg+ zvFEkNy{EN@y=S#Yy(hH?v*)zOyr;B>yl1pWyeDLUQis$A#7STQHUl~X&JJ`D=&JIM zDcIl(w#q;7iJ|Tre5JSV9Yf;>pUa-1@gC|19IZGH_@$l}-1{ zXx%SS-RCZD{dRd>DEvm7DC^tsvRiNy*=uE+zh0?3ugRQJEFEy$+_ z#zdqepI2ahA@Yj6 zF7!Ur>)6Gdw2H=#i@;9(JAyegidi*;m$O|m%Ijy|61!rx)!Ef5_LEH?>TjEgE&X{& zDV-COfE$_{9Nlo97T*T-()T`w=XmaP_#Nl589Q;~x$L2{YGzWyrlf>-!I$0>R5J?G z1+C6ip|t&L`dm!7^=Wrb%dIiKC)BQZ^Op9S@!u0|!TUmP1Dbs?Z1byMxeGR_PrT3U z4#z#q0X|JJEr*gj@;qhhw?BG2fX~fOKmAH`3clY6Mq#`{opp=6u?)wam_6cOJKJG# zi*bu65|c_Wc){iL>L0+LfLr;j^xQ|q#i;O;> z{y>L5c(VCaKZ%{2_2@}LfTs52)D#wNzj^UF7sW8pBQ@UQD;Hq?`dL0DB@L>rM`*IS z=ak$>$-SEJ9k>k7*OL@`%73Kp|6*KRI&p{2{l=CxK9joPEyk8pHkd+GDmh!Dqe8qKB@Vn~e;-=%DbPZj(=;=(1 z@q$QCIhCI|Ure}PIFUxyE8W{8WTV*rAXbNlkb-UUJS{bJ6usPRw$3WhRqA(iD>s?F zzCRH#s@!ar7M4&@&ML^3gnpmfqrI;Q_itEv&;0xAi>GXaTo4r81V8S`7sfetNWmTU zC<8Yy9Q2eo_ooKbi3e>J&Bcp@l%0)Aq3m`&Ni!FlxPc^k46O*R6+kx6_oT`!qW1lK0^(XOo2kG^yRqirf7yv-zJVVO7YpOSHj-z&CMH3Bm{XMu?Dpq*UU49xh8&O zd0Cay#7!&&b9+7K`jv>8n7LiKf{#~`Zl#d+zD`SgBQM|py zm6YrsLAfX>{X75fJ$iZl00$8$cOrJ?m{lO4nT0Q(MiXRvaN{s(JT%TtooC*0Az`g+hNLURHVYFF9 zu;ya}|KJGdE21o2=XG$&V~>a2rULTkRSTwoIM(N4i$rkm8ak0COJ@PQ_+{iAu~Nt9 zT&KH)`C;!<>Z3I}k}Q#Fk~86`1{0|0$x&Pk(al3gVyF)AOkmY92#oyS?c;Fn5XMQ& z&5J2ABId|ic*8mer)U_^;|$3N_Vq>XV)NnWi(L*l<7`R{d2!vxi}LlWdZ*(df_QSn zX?*SOyt7-rs}5IdiUkaPwQY)Vt}5@9qM({^gklr*=*O!WnyX#EXyaEbGH-bJ=cD2% z(b8#3ybZqjjp3%2D7HeIx3e76!N$XZMyXRPVrLb#@WI5Z;8FZ{gFogGPY!P3z{U6a zvg*w~){(!?Bfdt~tXo|3`sIEoxxWgbb{NEwKAUkxOZ`!^<1BpZC&{B^qzYZ)_oE2x zy%bzZY0t^L*h6LeP9ENYq2tE~P5qoX#Hy(1+BHq=Dy5_3?a{F|FAhe{(=$Y0Y+W@C zyN`ZcDuiW!ZP}HEWYs{=_uIO~?uq=!5x=Ajg~Vpy(s8-U#%n291X-U)7A()|b9#NE z)mDw?=qg%3!!5|>BjwNgj?iQB7#dFpRGmkW$LgspFp!h-YU~=y#YV}Utqd!f!{O^(W+PHBD*0*L3 z{EZ%mmp#v(iy4226c95xd?1$E!JLo>{__Y*H)KUKwEr%eq_5Fhbgy6pX<5cd<}4jN zgRd-xpT~^&@QZG3uO&D!0da*c>n|nGTTmr z4gto<-A8Vj>~d5PB047?f&%`MiM+opT`ogE@(m)m)J7{*_NpZgM>PVhldzlAkvJ2eDARdYoXi_$D@!?J)tAJz(nMW8Lh(m1p!C(%03lB)F8aBf*_|5d*ctt+&Cct zPD4F5GsOWq2{4{;X#&b%k%oP^iadiEzVJ*2W`QCQbMwkUurlnUnAzz4wqR9OEZk4%(j#tKCy0kddB8Mown5E*a33DGXRU{-<9i~Fr(i$|t`v|~re^VopF z4ovf@H2l1*&Q-9c@*hDQqEV1>MV7%cM#)sA8Y5Pxq|5IxasQ^FX8xHzS4pZ(I%CGQ zvOLaD7t9iJIw3k>Ai@yc7OKY`tX}hQ*nX1tA>Qu&$!x@HWy6qv;4OwgdX(0xlr*;5 ztTQdS>lg5~H9F4>9SnA#e&%6a66>d{+Yh^~@b+uF=woqzY2SXW4n$M{n#9&EDcFPR z1SRHpx@wL4$DZiU!4t$58(XIa&gSq)A>HbIU1_yLCY=%svU}a< z7i+WSp}TrbFN6`dX~ZNOOv#NdK)fgX@DhT}B?rnU^8X4+1cVhZV^%a?1?duV1)6O~ z+S=N0?@IA*YpEnATM$|XXfTBXs(=fuNjm;zENOD0@GOv|pGe~f>0nxAbHs2|q(7p( zZNHciwTBY_a1aYFD6U*KZ+?PEqb3%u9I$Kin$amv9RP`37RVc%XKni2{V5xHDZdrg z3g@udwRC{lN>H`oa}SwkBn2mC$$LOZjuX(sIlx_>c}P4u%+cypqidsUfCth)t(UuW z9MU0%2-Za6d1s}}<$io)NA1)^>cj$U7g}0K{H~`E0rdf;Adkki2~gjD>im{3kT;lPR9q(!HWSQg$yHi-5txn0x9=~5jl z!DvBc_u$$I6=VQjOX($RlWjbOch}+BHNBl+f$E5u4OY8-&Qh<*BH_?S7l-`g&ndl!x^tEf9wL>J4s0;+x&iF*v;8u!eSEV5XMGFl`EPH`c!lS=BRglTP_fzv<~T2%hD~PWeV4dXmAN ztwWt{MV)O!o$W)P7ds?Ks^C98D7P0BNIvVz!DQCof1$H*XqN5U3L%##`>*2gcP}iifPOUOV!{J;fzfdnKye% zkTKLltI_PB#)b@%os<<%6U7bwvu6FEebQZRx}h6KW0cIV-(+fXS0!C*Bs#XPsAbD5kaV!&ND=8y>3JxvnO$HS=vURyDqR zA>w%Z`{{-wjPdaOyxh0jhyi=lG!a)lRC&xf50~HsD$&@9=IBOabgkCat&L>;Ladky zAZHH3nx$ao9UDf;2RlVI)TaQ`E}e(k{f$APlKt=@LL#ztD2EE0%Vnp|KYf1+4H|dK zRI^XT8`KQafiz>`sZX<$G4JG&6YkB)TDLsWdcOiMG1-w_k>NT@dwBi&11~E zaGuJzA1a2xs=iA4Rg^0l&(=18=YkX;G2vDcIz|gLoR2EsQPshq@7vT5#7jImKa~O~hN8}}N~ zB9hROt}y0Fwz;{=&WUuyeXGux7ogw!4NEBd1U^JvyKHL&vMeZnac*)^@jPO(%H}O5 zc9T}dvDO`wf&q>)R;MG_%Cn=q+)(k)p2GTee+ zy}z;g;!>Bx);Z^Yg}j3!QdbXxA|Xm60g+IPTC)}b+n?13--_FW6k(`*G&pfmKO z{tYcr2Dgh3jXB5EGZhi9xD9~9VFT8A>=*MT(IXhW5ccpzw8yK&gB@u$F@=U6%3_6j z*+WQ)NlIlgZhPEP3J^l7z&#&VqQSMZEQfkR`+opfK&QVgtCeO{2Pvbg#^a5;^i=U3 z;(b!@sPXyhTn4iE1da1ypEo2h1Rlh3GpROLd0c+8n#5}e+)dI#(CrBtaaCB)!|227 z$^UgeR%%nwUPHY&g7moZBuQNt86it7~uApc=`-*8dJ() z{I%SGk=zJ+aFO?lPchecNR79mj=HI z-o8fpluFthiqGNXQv``=F)v{~4ohW=L?@W9))Dz#Vnqb`KszQ$vDF#-<81GAE|IfUa@dRe$AEaUo+iZ{2%5}$c$HSx_zQf2(<*G z!>PcZE%p5!-&{y_S6kYht>b+^{7rLT(v2sZCi)WrquWnD=647Brn@73ZS{J#ws$T5 zaIme~T724B-%%Wj_B4pa$E?vVh!Z<6zD*va;#f0wJ(R*R5sNGmT^S8$Jr{GQ-Gn=^ zh?@W~&k^tW8sm*bW8EU&e3aP^i&9es^ajG{X;_dt;kKTW+&t=_k@&er;ytO6f#)BS zK!;0eQOh#AtJR{&gRG-zu6R*{#Uo(;dcqy&%JQhr(XQRpwZ=b1pOul&w4|i)~ydtZFz7e=?QvhkbD5UhgNVN zSOKg-b;&CP0+E7cCc>OitHrbW8RuEm)>7q2F7$JH@vNkuQ8~{_s;$c_MZqgqipcXT z?|J_3&LK7O_x$t0zVBAA`HXbm;kiTC))C%&Up&+;Ypnn7r*9v-cWc}Ew;E^v9_j|( zpB~&_9k#YqDFQcUR{I2>fT1HDD(Mxs-GB>%>gRD{rJN{Eb#hv#g-3$%9 zkghPwk5SXnQ;e3r@H(6WVPdol1^#D>*Wp_kEv!(3_CxqTf=EX70ckVxd>EtCWU?FB z;){&YA@Fu1Q~Ww(w4<3_e4lt5%q)nlMl)0KaAqc#R_nxEPSshSw^8SOGq!EZ3pJ%_ z8FnLr^!e#{h(cMU#xy9(-3q9ok@zd8utK4*MLF;@eBQf<)wMrIO%M zIE9%I&B!6AU&eSwr3{IE}2k%H6T!for!sxV718!975IP^FC9Tbwq#8IwcM=I?!O{~bR) zJYw;B1+Su85Y)q13|ohE8|2`_bheNd{$vAYtDOx&fKXo+LQpc8kTra~c9sjtIC6Q| zSe{o`D!5#WC^yMpuoj?QpP4CoQ2q?`45)@^_nBGzcZ}I5+Wi*RR+h$d#kNwL{I{~5 zPviekYAZ9uhrkRiSV(5j7?_#HIE&^2bPFed{<~^PXQ*^9YIq0WUdnes{yQzVX!u*N zJh;*Z)u)Lt_9TEMnh5g>E)cJP>x)>Ee0>2*meI`uv(X==ww5Y5@IG|26g+xNqP8v- zZ|E!3ZirWmhN9OTDobYU+o|6PIYfOjxoXQ_{fi}=Ni zI-rYb>gxkd8u%ZN1)68-tyCT1!zF z!w1z;tQOTQBlv7~pNS!gpQnP=7MDp)79S!Qlh1DVnwYRCdF%Y58n2=71ZVfxxaRDk zB~9!*e>caGw3;Rloc~<;=#K(ESVXzdL_F`Qb!dHolFh#jG}4ack{(GCpN1Ku^59XA{^ z%mo*5!%@}tWkK|)!48hjONKcW{2F9k+m}buiUn>INGoUo)z^K;5BBdrd0R)%oj=%r z^TL75G5_E#YuDa76!Z;&&;5Bn;koO7-nXjnGe16b`%8yc^&NWdi)-(fIx@TNA6@_8 z&i0<&55U+DG`0g+up2PIRWkphpCZf{4{p8#sEm)it5R`Qf3eJP+bWAIr3j>o0MJ%( zma4x<=#&pDr@(>$Q&|NdcQj3ZarQ_F2*M(+3-wo!j`^rW;@rUud z*2jwXRjPz$xa6wY!Q2*Jr7G_9whm#+d>#SwNrJdEBcpj^@a)t6oncLcIR%->n zB_CleRW7IBq9G>?p?G)G*3$ez@O51G>Gg4UbH348U*$Jmt6~0Ri4SES_(IpJgk4~O z8LJh6B0UvED{K`d-bbYdXi0kxZRfmtD7UUA7L74Bj+S zXL7oXTBF5icAM4ipxav09syJMx(X4XRRuQUzkAXK$*6jW}_VYWchm$olseVxU}#9 z%MHX0l*ShJ25Lo)Dt?Kk3?ZL4Xa+fQf&j5lZ4S8IejUYlMu=f6sB z(CA=Z?X-e>Z@u_F&47FFz-~pgTy;#tVIA=}msrH#&S*L~o2aV_2D!i@Va@O&*E(A} z8xO*w$rAO0HIvee>@iq15RG!%lrkx_;L?6qDz=bUD7FYmK`FK}J_YL&OTqfmIV&Mw7576Atjg z^zg?fL=Kf(c#QLf!(9Drm=nCSLYXWh6G+A!A}|1C1F4wIql^fBQ28r3kaM zW2r)1w$`$gaPm_I(p2O1hOHXaAGF#(P+D`);jK22dc3aqdroDl4!Q#tjp`Kxcbd`& zfG36xw6^&BE{CX7k$@uLpS0PE2U%E!(TO7dYy2muHUns7@ll5Z-w5k7wBBJZrhsf< z4Qel{L1kr>wt%;1HE;m^=$do_4o}|n^PcO^20T7hB26RH3o~tE?j5~Ku)pYVu4JPKSrG5FUY@r zh8}NEtcTR9ZA~194>!TGe-kYBHyPnFK+_`rpBX(Ci5M`P!{B-j)~2i?v?*(dZAxJo zexhZEwnc)?n0ec?Sd+1dXn&>&$C~h_riSj?MI5;P%KE9u?O6$q7eE7AB`knjDKKV(`%S4~kn4O^ zg5!#@irp?~Z^D$!z-j0v&CT@E=BZ?}!bz1Q6bcHA9I>r1DJRoLm($_Zx8FCK-8)*} zb<;PtAFwvAN_B2t*~qb+nqr(i6LU?QKYLC1vA^q^>G58>x_eir$Z<5yaqH3pRReR~ z`Q3w615K-&ogiAXM!Ui8b_Cr*-J094Ic2L)*9?sJ^Z^|`1a$N>)gG)C>%>l=+ATm+ zjlV@H3EY$BbH%yQ*McVAjKOum#e#r79wB z#BzD)mC?JD12DYRifH7|lGBH$qALdmBCN@2F}qAO$n-$kYGSL0a=Gd)pP#IL($X}M z@pWbTBYg*YyVkba@z-yD>h1wP+*We~E7yKl)rwL&bKws)D}u&V2furB|6MbkrrMrF z@uBg_j_KPVyI2Qg>?2>mnz82*_i>?BEP3b0ZPzw{{WQ zj4r0f_4e1j8I3OI4K3otyf8@qu@SD&sC7A5Xr+&;havln7SLOH0haC!EVM*qdMBwO`|*D?^woLM={qTe)f1IvdR1u zeimZ^W7lRJo1%qX*>u}T8xhZE@qiWa0@Swow*jllB8Ji(D@^qKS<5*U9Sx&SAY zuRz+7P{B2j9m9)6(@A-~5TM7DoX5%$r!r+n@@19|gu@ESr>-o1%Q7WdZQ-z@F498X zlKtY)y4!{VY7@wxyk^E0&o*}5*3Y7XhrnushR(?q&QLwlm98o^no@MNCS8kk1vT3~ zG>J5uk-o6XQYYBVjdKrgujy|I=*X6#m7Q~++kD|wRtr5u%Mym+jeViDYc71gbcFif z1mSJT)ue~xdfvfDyq=I()^QN&*kZJsID3@~S;O7`bW4h1T)lNYH(t}IVzhdl;#sPv zRlBg;<(=a5*uL!}Bs79t>j>PJc{Za>b|y!_Yo9s14myo>UwUk0jEK)>54xq9FFn~m*8KCm3yrnw{@_FNPAgw>eBU5k~ z#=K($He;LtMu5gYF!{O!u5cr|giZq2=oAYQxJo^|48XSfVYas*flHwZ2C##~a%+IL zP}UrX)v{)3m@``|G)*a`EhVm(W6Vc352Dp-ZdiZU`1mKsqOZXi!y8{~9k7L57FNa5 zB%^ml63*;&#%T1D=|>JVl<&Q z*(=RNh5~aXXaF-=K|}6w9l+d2R6DUSmco93xQ930j%%GM$T(7PS62$|Hb}u(m4cxw z1)&1NVlpDf6d;Z%Kpaz8NK66Z7z}TKv<_%f5htaug@H*N1oVYEt{+zAVJ?e;Tl$h6 z!*X=XXi6PL*mGVI2lcS;yo5T#bQcA;WmU^c1_7LTDVZ$80McwL`3S?i%<##I>_h9P z@13kpY`JgK$iWO__QD9Ke!TaSeQ6NSKz!@=cV-46_EI?8H#{+X@aUGCp1M2R-%Dsq zJC!c$YnP{*$ER5c6(g_AmYZryC z{fql=`rfDd`}Tfmt#H?ohWuHGzAHq z84njE;!Om*jON;t&O3g|T>QoK;Oe{R^yCtuh&2fCUf{1fVO%$<7XynbOugvLKLV19H9+J zY8JZ%TR;(_w$>7gL3WVRY}gHd5qs2PNzK8?pvC&SQkp%xH&CkUA?q}69-J8P6b7>m zZH2yEQ$Cb4=5MMbuSifO4tpwzctP?^LSBx5OLHgX(Ke&u91_iS&cWElqzsN!w%+d^Ivmw*i2FWVpYYH9_Fu8A?s7lkrAd*XjoTdvYlF-ldAE zojf$OtS&F7P^lEfTJbxB1YW2OiBmQn+_u_w<3nq1-3(+w<4v`txb6CQ8NiP)%*a zD)4bK4pOV}f<222K|y-iA`9yMQ zC@i!M$KmBNS_xjzK38L*UUWGNucN+R3q&qf#lCSu6;G{E+(!1hev2cSUl zhVPebuP9w=g`wQ_*}5K!2H=B6$69>Vj=>K6^d*)-1U657dP24%^<{RnrEHmMxm2U3 zo|7%Jd9-!A4QdL2j^J|xgqqtuso*sA46w${*db(#*=%4u1o5|lvqJOp zUK20{Ojs(}z!uhJ$8#fv^gz%WE2MHYc~_o8V62!FMC4P+Q_!T6XxUG(BiA~%Ai=)r zk%9z!r(y-E++SR>k8~-1l@#?iY1zMYn0f{Z-U|fZ`GO1;FMLdO9jO9GqrDcuDM^*l z=VGA%|7yW+UOJ}efTlqN*oi%YbdXH8!e#EW^XojGUI+rmcGkCoj}yb$-dO>b(CCb1 z+u){wNMStN+FKaTb>{1H_B`Tr6_}q&K`)n=yr4?tCs^je5Zllg4har29w|s*7?4Ux z6$6RsN|;You7+J9%loV0UP&NnTGoP@v6gc@q?ErF;?;aoT8lHp^eENauCWPW$T-P*s?T*5ll-s-irrk$!hYNQO z%uMbs?9c7UPvwVmHb*L7m5&3}oOBHG1KEPA1m3XRdo}339JoD4a(5msNO17|y9?6t z;T>>%fY$H|s%$}0mC;(cNCj433e6v$?*D(oXg^WB0wia@IC$-WAwO%93t?iz%6Qjp zeZU~#!z73j)VKakuzZ9PuLQ#%p-=$uSfl=1@ffm$_sJ8$rUYz_y!Yy8KlT)H1E#^e z00-8L`k*ygTHl_(F{oN{;k+F=|LK>Eg_4b&N&Nbq(`SAPEy6mQ>liLb z;QW@lf@H{)dO$Bf?RY6#()cC&63CCBxMg)U3*4VYo={sO9(Tm1(bytxPsFM=eSj9S z+xx6_A*Y6>2%rkyQRV7yBN&IBdeIezBZOV9D!W>3uR7QGLGqNNC#%+IjUw-JF)Rb{ z*(u5rRlS2GdH_3y{B-r|`dgqJ=j*2GBtWZnozT>QSIP^a_P~aE6!&^NQ@7*{Dpl`I zh3V{u+}gs*fqGx6(3?x=YGo&D`VrH+rR(yO_P?y?|y1&t$ICLaUwbTzgO>D%iK4rrimSKEV7 zx8_tbHZ&j<(%GO)Wb8RrNug!mIbDWR%g{ht0L&#DED;xUzE}>B%VOhS8w$zCO2Q>r zMjLOATR%MJHAgCAp8anOZL;K4{{nnvpZq*mMm;_rk9oaXB)5g@VzG{$h>z^s6o-Oz z4`e!S1cIE)Zpy7M15$C0`-YvX3l@RVE`qxV+*Wz8eSTfPmpB`Sk?}jx*3v zkSazJ)0MvQu>}5cU1KTSss5$p0wl|DvF5nt;{+D(^0=ZXthyqY*(+nm$LI1_aE4`3 zOJ*p4roN483-_Z~k{N{bgPRQov8LHsZeVQ^gDk~rRRusIUV{(gm<+GCy8}kOnywn!yNb}uTA^PD zUUnFJ5#du;mtz&Ic`ZaXY|u%17?~U+j$02x7pv9n)H%Q_atGEJjog}(a(nkS&K1^V zSLJer)`5dgeWcKs^XJX^&j1A+WlD>diKWn#mKkHkYQW``F{F&VX09N?v0HNm360(e zvIBX1243Kp#FSSy6HB40v4D5rZ)>kU@gMD@EipDazaq2UZ zHF~{r7<52~r>|~9@QI%0;{D|3$mg*xY!$W%w<3hLnCi1|eIUzn;LT?g@O(CzUcC4o zEU2cHl>zYi+Y|7sG&2I48J)p|=SQ5BAxMRt;jLqax4fG{}N zF$wS>6DHw3U~DJeNXQW0m?ePn5ndJ;l6>$DkU%~?$iO2&NbB8ut9p?vIbkfTTUA|s zmvhcN_uO;O`LB@((656jyrjDyvu(4;RqKLMh;j}UcvGIu!vERo3zEbnE>|4+kFw}= zZh51O3SUY6ri_a3Uj5T@_lS1oPs^|hBW<8x4`h(PQ*%Z_0Z~<5h!RN_nmVe>$W*P` zQC(=SseAMD@$&X?Uw50wGuE_fXCgkadi~9p zG;`)~<*n`4YTd)B%dZJ_mEN7~?I+Ei=5)$AuqM#bhxe19e0+}jKCuc~J|D$i_d;+J z(k6t}N(`di2}>$KjCT(g`ZfjW=m0v2pm}5zzMYPax!+`Tw&|WirL7I*e{mk`2)t`( zY4~O{`s%52^GFnZxyY5HV~qRFGGj1B_xr&T>htu%P|%JA=^Lc0HpDO7C+aFcTyFh` zEu9WCMba!<;0=FZZSktrT6$z?C|TV)N)N3YN>@F@7i=EI_dd9@Nw7IBlI#$X|IY5Q zdspoq+nw%-N$c-Ce)Y_gcMd7>73sa49+1gZ-p1QW{p;@7wbGVe(~9QO2ccE@MP`~P z5TD0dyT2V|>M$(ERz?Fpi-Fey=UZ1$K%032+FugTPLqH)nFRD1PZSZ1MlgfjI%k_l zVuLkD4je%sZ-8j%fhTxp?um@28T)Uza_w>57`45Jn`ZGsp8Dc~mGmuZ^*PaQsz#a& zeQNlMi@Q-8!%M235t6uj{;yy&lP8=+n0QRb6Nzq&?vGNc5h0}-bi&tm{En57YG^Fg zAguZnIZgNp=W?(f2HBmElxO%5LI6XDpPcf@!F7iU56!X=86%(VIoTHR@1znRHs1Kud2X-2;DhxdDCDHySc&z@Ja? zMgyyvG)6;~&ZN1Z>V_Jjuu>EhTcTPMlu#$JxrX7gA7i*IjoDJA9&rx0Xt3I%I%dn@ zf3I0}q0n2!{0M5LO(2M8ET0X5`C!DW1kP7Z;$-EiaWHrxx{;ox-++Sj+uEoK237cH zFj&y>PcRY+20_7t3$6FgZbBLSoBA#UnbM2B0F3G>@Mk!@Wxzf@IugUN5CbE9gU#K; z&11DesfXLtP_}vuTm@)H8F~0}Xn>>zToRRJ4Loo7Uud4%EgsN{dkgvT5v}hPy9c+afn_9vzBp zyJ)2I`1x5wg<_q^H;-gIvEeGakPTAPp;g2XgG0WPx*8SA4d2m2 ziQkhS3(3E{BCQen*!uTSqzi}%iMSK#Q52Q;8l)o1K6)0@(chm2DH}*DAYlOs5hTnY zVFDSPC?7NeLV^ZSgGR(c(1=(F8bl2uQy`e<0q;PukPbvr2a1Vsph!svqO0Rcl1I1} zXC-3&6lA9dol_)@M9%@z*jq-CdInjmA$vN60RE?^KY>$aX#}M|k+r?!_mloaz1BPP zwd?o)%f8N@nXk^k_pX2S_3haRC;qUnf6vC!p0yhI)xNKPWOVhdr>=+ZBk=u};XAJG zY2R_j`jI=X>S>?615xk6$`R^CNIeucda@cf8t&u`S>p{^m0x_qjrK2~x zc%;S$dV2zva4aCw6riqFg9^uSW=C$U>)jI#uB1jqxMFU&v;pCK%m5%mpSSZe?? z2E5D!w3Qm+Wg6_JV!SK&YFXjmEE9^XXbNSe42YSZVSJepMt!3y#x-ym6rJ?n!IvSQ zYn!}dL(A56EhC(-`WP1G2c&yZ$cEW`_W#U;b(m9;Z6w}UB zas1LP#U!w&zeEs|Mz388b7*~{!F?kE1BXMT2rSBTxjc1ZI9Vha5HR4i>jbT{(S^hzFS@4)T> zyCm5McGK%`C)OV-h2{u=sEq-fGFm$KV7KWD=WY6$*`^0+$giOkO+5{!{ZQ#(Xs)bRG(HMBGcJ#7qWo|Mr@ z!|Ww#lNd^2zgzH-WJCBkH+yoc)2THr-e|x~p|rn9$4DcSFD@d@n?Bg*$=Xy`%l11j z8NcO{%&!sJPSw?CxN;O#5d^8~%xR40Y#mG&NBmlF3F+v)xNP-awJyQ3?owj){>=-5 zNbBw#loR(maxeWgqL;WA>(4ZyM4Jtr&}`_6Wx;O&FpcPUn45@Q#1TAyj}HtK zcD13-dp7tJ1)>mvKbCD9c9kY4*|x-nxoxE`zl^>C3TJ+GA3l?5k-zlNSX77 zA9U7;QNa#S$tYsmhPm>#l3t3XtQW(ob7gj@Iy)oBCn^c)w1qB9!+EjHF2uzHp{qou zdZVSi!;v*8nh-mzvn|*k`K<1x{(g13s$?6;x;zi4v-My@?0~ch%Y-I2x~s#=vP?pV z4Uj9;@0dK90AD5u!bHZ&|AFA@@%>kzL=WiuZja{&WVEu(27_4%X-Sdl$PSd`?A(fu zk^_Yy$JcXu81gLCeISoFPm#k3bHoB4?a0oRR}?!+aR-h(#?Rx(W4vdLI8bOrMtuKN z{&G;QrV&)~?~{@~@86F7{q!=V_<8y#aO!x}po*<o4v zlvb?JN-ZU_w9T5G>nPdKK#i~8-Y~$CET5my9sB3eCQL2L-yoewFNP?|B~qYD9owvk zkdTZvgN&x_3x+=uX&J>4FXhQsFNh>wilTKm5@jhpK|gO+^gWgSU}d0z;3Q>|f~loR z4!&9S;^SC#$b^uTXS*y%1t#3>_p}zSf@f_yV z)d$ufPEFpPL~hj82Up8p#Ii)P8G9SK!UbgbUY5Z6r}+IMK*b^P2tL${X>l-4A@ zlXKUhoE`e{ZE(c^=+i6L*CLm7{uOlp9^@R-Nc#NVusKuZq!$-_4?kC~6@A_0T=}}v zwjs>QZR=N}^3Rlw3f#Je(l=-N4OpG`6Z^dM(0Shjs)7BreH_zHn&DWJ6Yr>mq+0Z} zEv)a+G&OM3q=b`8NQYStzo%?~7tr2Ld>~?Z@A&Iw2JQPC%Zvm(7fZ?wZ1gg zS86R~vs!mcH`%?B@Xf_bG*SUh!&EdBK)*iMBh86r2n!F!sosT)3YP0-5#4e(R0rMh zjYQlxSB{sQG}Z#nswG(00k!?vzn}Z>Lm`9RjVx9|%V^v>i^(+s7v76U7)SXAv8gK+ zD!0sj>&2M3yhd-_u!Vd)f--feZ`8b5q5gItB+;rYiB@Gv1nru{e#fMY9gN^FXY}=L z$gt!>h9w8z|BP3}=#MAx5f;W0Ib^K+LO5Q~-aMQX7|(F*9AN6{Iy$J1u_Sw;!QtpD za>f^|F&vg%PtvL3>@3RBS%m%2*;UWh`JBlXaJm8tyZ!;}wyJK0y7Hx#zC-Iwju6^8 z=W5=p8!z3k^4fc^CL`5l@w z2$t~>2<@_zX?nU;$-wIsI<$A^1A)dZT*XHCenA=}l`wM{oRk?Y`H z*BG`RUqm#YIN#qt3u&%%Ca?P}2#jf3^Gaft6axdpk{pszRkWzrQ^XZ*Y629l#0D!# zG<7zSBotLzuLg4LMU?88-GOR?_UGh;9-hb6SXV`y(BV0j;9|p{%w057d7HAhQo&Hh zBT`S3b z4k6NPIi%B8aPx0uVIhx<`B#gw~ey za8ytN!%EFP-_x(yfS&S3AXcG)qmLcPDWUQ41t1@eD#*u@5FZTyA8X9^WGEPQ3iMB2 zrUhrjABZb}1MbS(W?)ZhfvAJ0XMaTVO2`+8+eohRmnN%SWGIA&vAgnF1jNILb}Kjw z9<$mlG{y3!%1N-1McDI#L#j+7y;eCw-2$l>(~)JrPWT`tI*?BCfwT{}@m7}`B&?lQ zGRXlia?|yCfu|dh)dNDq9^O98kJ1~7QDch zdT1L@ZFjn@3}u!q@8-yt>;i-4M5HM$CkXOo>MW5Zb{Z%ku%{|QGf!Yl^E`Z}omIF6u|SgWdNR=3M5zx#km z_7Nnpmi#tZWPC(3(Mx;++iWIh?>TVUDFVgJodcH_15*5`rum$QwH#>46?0@R$NP?^ zrn^4PAE0K8tXh=YIj3MewZgdHV4fXn{~ymRcj}D zydN1)tw`7kO-=nV@;^mUu(ZU}YEyqEx2{=@`Z9it&8bBFc1O@1=v<%wu&8QkG8s!k zT9qNKKF`{T1kpuo$F$-@El+{1XuAr0tf&z75N~Zd5t;Vvwa&DiWDZm}M|ye;m^n-o z^i-tjL|K21X**elPgb`=dKM)ZF1B5Cc{dq5bWVLEg#0|p@b(vS&2C>rR;>&xdmRq1 z%oHwfD_+s<{iG#Sh{Xrc{S=@DOymF3FT)H&KP{DJlh=4Z`!jq zzP3Y4cRrnK4z;gAXgILvZpK5j5aR}-aQqzXvBW!%1tOo3rl|*;QlB#&Xgt*l!v`1D zfb&>6AVoe?mZqDi2g^;S)aS~k0}FtIF@|!DqYaL6R%a{jG3h30Q{;*jANg2z^twJr zHkEJ*EQQpQiBI*1hsH)nvI7ahWP&T~c8krzyTcFMw{d1P#tMqUTWwasVdLqrYv)xv zuM9*v1x3o2V6Sdw6{yELbd;aRIZww^U>jV@G=qDKvJ%?s;i%LJb-M6bu|d{*biA=D zH_=0C>O@(c77L#(i;V)^(_hc65#gcVg{^H;50ePxeV8`#m`I-W8~n4|1H zsi9O1t}ex3ArTwSk-yYAIFN76Z~VZ@!I_QOL;~a(Gflxck*RFS<+S!S(b!-|INO2t z2!~*w_rdWVCvwCsI7T)HS4AH^=2;vCpobP+!L5v!HU9oA4PwmW^lHq9M%AE=@c zPuGr1tOdgsLst8dGW-smgTtH#J_>$MOW$ACrrq4pvirb-Q>Pj+eQSB!s&j!5)U&q9 zeO`Ozur0m1HPK%P^Ss#_$+mWBM~@~)KDc%eE`&ZxuUi{!kJ(9@@OqM~GO8encCX)K z6*=bqql42MGO5AIPGxY^m1++nogO290G?$1L?>|>_CwojgjGEUwiT63B4U1^Wnbi& zdMvZwKV#jG{TFk32=H5l^Lps0qs9C{xn*A_a;%(D;jjKAc0@F8_tt!)>QWK$o%N_t z@FYpI6q|LS)!sqTY7uT^tv-hnGJAsn_0tAd>qx=X>f{)b`9D@0Pg+D@CeZEn2i%o@ z$a)&_AAHLl@Vh&^#yh-b&TMfI6p;aMle^)VT}g})R}#P0cXT#`wM2@rfpIvctpi)m zwr1f^+_&l+*z&lESdWyMkGKR}R!qg|BR%_5n~xNoo1CPxbXdxns7`3sh@w_J+&Qgn z1zQgnHJ~AA8?#W-ZXo)z6LZtLjKIbD+)ER4Jq9?-3$Gwp7Mvy`{{mxs6F6#(&+jvVt`*d!s-5(v^KU%tGAlQ`jYf-Q2iS%u4 z^XHvs1mWr4E??Sb>23FCeU@BD{-aU%=-O;=RHA?8QSDiGu2k?^MBXLa+$2jn6Wx*2 zn)ZO2=+IIFK}+5nUExxDviVY*k7e9nXz5Y{Nk>b&9Eer+1cM~)OR7;#a%&j3n*0HI zE1Z!Gy?iodLmc!I0u*P7@GB{o^<;KBvez}k%v3|{dKw{5;S*Ww$#U(PYFu68ECgqt z#Kq(4$XlT?;jqIAr>i?!N;CTG!&YaZ+!Zaon+q!W%bK)ymxl7qm7~?`#`!0ciuG;j zftu+q#+Ut2!!?%mMpSYA{XUz2VT z{#)Ia>&p8R9wD}ByxSNThak0@3>@H?QXdyq!f!p1oQ}D)sVaf=s3|lmFlnqw>xpv1 z6Adv^7okSI_h>{MLJYOTu6A}uhf-A{c%orXVQ1Tl@zw>SVHkDk$OT=(oq;1^9h|ct zhP|=FS=%_%zya<>n9R^h;dsvj(scAQy0zJGrh(*n02*CP^fOiS^1VZmI?0FA8?P^I zoEnZK#|~~7**6@&Pl~U~Wmcse=zYVb)Z1&O#+#F4dxzHS-`tcQ-8-Bb>InMVhng~j z?SVk{yH-naF%gK14IVDfwW{P)0%2bB%_ws$0qR zM;mQr^mJp77KaVhcyXbv+&{U>+muPUsshMZRXO5qyJ}_KRxUNC22-&%WGfF%Z_IM` zfTQv*gYb%3I1kUE;R#vP+LGV+L2N5$+n;XE>9#VIJ@7nUQ4L}Yw1}6GkYOcy`SZ!? zh~x}TIcMq-KX0Aa0n02&{(QOq`TL0T(FIOFR0TW_&11$aNKQq9^O4#(w%nOS)NT!{ zCTP$7SaDlT49f`aRG{+Zg%URuO1aFmnYAKZ)79j+!5r*Gzm8|~b)pAca$0NBnncez zaA`3>h?&DbZ~c8M+1h!(r-zA8^M{{Nex#6!dOvfZ5nXv=A-r-i-kLdF){h;Dcivyd z-&Od-IK&3(3x!edH+ZZA{a}#giKQcII^lwlUbrW1tQ#Oj=3@PqcW5heA(5pR6U_%w zo$=OgB4d#)X>TyTm`6Bf z-=BTZq>s{BoE~`ot#7}GS4qy$etYs**<&x-g=1y=epB{AW08bI6{@+)*g#%vKQ6E_ z(Q*mh1IJlj4ThzimuwIOL0r!otA~%nE8*i>#+zVimLVxwbqi*e-aZKu$bS3~V`gX? zzI+HdT}6(x*z4qta8ZDeXjUk<^dP45*28s9)m& zQ9IGFJnFAn8SHXydUb)9p1jo)wYe-z%kH)nn_E>DS{x3K%y#vJOX;eztu2QNSb2gm zRv)r#<$FWJ`DReAUc)|x$R9%c*g+%>jAmOHnhy7zvWLxKIJaI=6iiw;Z1GN6W{4?$ zk^r8(*NxUaHH_xk=lT?ZH|T?;gRS{R;o5GYTa*kre3F@>pQLz4#P5$fdFm;K;bgx< z_1jqLev-O}G|N850WA$tvQ(^Ql*wln%;3L7MKnX6q2BDU+rh&oGfTmKG?71~{vGyX z4RI5Wg4e>WYCi91hMg%2VTWyn!)#i$%F&N}If}{AIh9}KpnRz*-%Pb<`r!>VxMmE| zT*Ygyp7~T6hR+joONnQ`kicTg$vNn)uww7+`B+Erm&=OR#tnLJ()1b=ISznUrppD2FRM%9kiRt zn?6KwHoxlCWcF*1ut3tho`7Pe4$M$y$>;QjB$E9C`6eftXc7+RAN&BSIUL^##ERJh zZ?Y!l>3^e6bM)GjtOw)3A34Zodfrt4yzicI?jQQ7e!w6PdPbh zio2e=!7!Z`LYM1Wo^N={U31 zhGm~8NWjnk3B1BgLSrvY#BmH86Zfr`2cfLIil8~pF!3V3MoY6-pKCNpsYERgX`zt{ zo;KP1PN&~yQh?bR_4}euvz7CtLZP&q`DCOb6!D=)&^`vMM85XL`cY4BEpCI_M z;c&A@5IoI@vUBru;5fYlx3F6u?JBv8&@w0^8~!; zexub%wJ+D`qb4{KRiDfPg|)~0zKGq#acV5!Pq;YFmGB2*DhE0cA}0l3$azub8Ag!A zziEM_TM*pIKp^Gec~1(;bf)qem?3^n_=o_Ws|1&ykY7OgqfZD$_&3zn-dST4!DJj3 z)n%_WWd_=G%Ds%m?y)N_9?*9S?wHpTa|!%L${d5O+y^Nm5NW-QEINf09d^l4V+3ko z_FITdT#*{XjWdqmsfqHweBa6(`rSK}&#i;MNQZoaItX51Zi4joVS2Ob5G)Io!xq({ zYB^v*j%}B&#~dawCc0xDL|&cjqTOS&xdfUXyv?U6EURd~V0$j-e$LFBFmyhuF#~Iy zX{b?fRcnL{uGjc?@|Q`3+Vair=S(8r2jzhMHm_!5S(}EUcs@b>820i-lBKR7Cg61j zUeDm`D~Smx0ryZhQaRkFE+UG3rEEkkz;?CGBA%aSBSm!S?r-zX7M7+5oVS*t68Wg$ zin`qqRbVYH`J;?z^VnpS2TaAa#0O4DzM)%%YT`Y##pXs$r0(KfQI9LC3YCW%K7gH~@h7NH zOfm%9=$=JK(4pW2pg@jbq3|yE0p!d*)C-Ukh%=LzQ(5W-*ei6s19A!wpz=D!Gyev4 znhTEtnIUkPFZ#oc1L9mp+l_-tO<$%hj)2n>w$Us(L0jxWr$@8V%v%=8Oq(or3wy{S zagc}(jOiSjZzrFiU;I1De})oZ`EE_2De!wZHTz**3dQ^{bkGQk)=wMpkt0x3qw?2* zQ)G@@ML?@c$BZ@!b2NcZK14GSEzFewOhB{05t@H(-#{O|ctbR2Q!0}-_z(QE*}^g4 zFUepikqEMi7t(1RxfYf?OPZ+b;cDem1c0;C98Hq9!kQBL$t)(}>%dRcD?3RG=Yap@ za`n-q-Tt?JyUk{&zQsu#Lv|*j(L_ATDZcsn`9G1D!_t2X`Er0L!+zaec?|r4`8W~P zHE7XEq0j;frLs_X1DSG$gu97;6cni+Y5X|4cK72LMwWCsY_8e?A2|w-Af3SAcRMC_ zT*&}yz+>~;MXGbW+aKx~Zv${Cp}PDs$z1(xd4}{cHxeP;ayjk!p5*;5;$D6Z+nti1%Wy3g6pvt^WSu8O)g}lwRRjbpm}$xS$Rp^z^0%-L`L)?B+Dd0-m!kRq zx{{S#80Eb33^>!!4*7c?>AP&b9Ul36We+QTx6Ibs!QeB*s?mh?xI&yKOYn^kvib%unC zF(=Uv7r2dlI*2phq`3(3pl=E26vdi&rFcm6#DDM*1QT}`W-NC8d^-k=50Bg3UJnvw!8 zxSlkVnbkQr_{H`oj(-(428t4FyrP zt_u{n+WFxbv-*_ad0Lk#NITs{!wc$DWr6lQ-5^+uew1JB;6A5*NF1^%eq|MYsYW8| zFX37M&b%CZ*Ik13(V#z|F=E8WT2)zcS=o1XVLZ%hVLKke?dUY(X}}MQaP4LjpK=Mc zhMd|!s{$SWge-7FW zlB`-;Uj*3$chA#MTfG6b*R6P#xfx!+j<0XS*Wa8Ez_IrNy#6r0zG6NA#{jy%9nV6m z$akoJr}x5{c7twL=b>pv5O%PyDEbS9@PAn|e`Hh@&K+%edG3X|LcZp$mI?)H_`k|l zhROV~j2>~rXV3}c1uPACa$$8xwc#!@q6PG{E8Wpc{co~)aC=M3U}riWiDi6|e9#in z93F{DtliO_AL`7+!m*4$oDW*Vn%xU6bK0`@nsrfUv?-Sg*#a$n|DU(-0FSG>);;^o zIn&EI(|hlu8I8)Qk7m@ZW;IK)Rjgu5mgKSJN;4Q6Y(jB@Lw}?|LIQysiftKBAcgNj z;66&;O)hZr__&Xd0LddEg$p_DYFx}+Uxg)66oS*hHGNh3qnz%(UN#0BEU%9)n_?s9ACsG(rJV{&9_-C}cS;7dc{u9ZRa^g%Q8g|(_H&1LHAsqZ!X(ELsA6EXAhB$&+&5^sagiPU+y$(r6%vXRsT8KN%(l%zZ`>{?#YU6Ku9AwCo~oF;B4p5qsyoI7q@^s`7zs6`(_UL5 zWVA%9=7Yt3DGzH48KPt3V=-qcl@=W3=|?S6hly5VgxF2&$>kHMo!UWD(qyvBWQ|cJ zZ9gVU2`Ql^^+X>L1|OuQPHCUh*(Vj0jAqG-w3hJslC@JY>kbp$C2ku0M73w@1gc)x zG+ls-eJ8o%ovBw5y&?Tj*cJ|`XYf%=)=*x=@+(2V>P@P_PcrabP3UEFnWpJv0j>p} zkTC(E7yWhgxoURQlOTX0(su0P{OZ-s|=-^YON8wDu3|0p6>lE zUVG!7-r9SH>|UC+84V7txFp&$rhy7y#u(djX&)$%lWpNJC0K%6#echZx$ z33eW+mL8(|1Mo9&^M^a{?z;SkKtc=fL0ka&i220PUGQ#x=pl=Eu> z-LkobaY9?1g}=g<(e6yI{UtkZ{xAc7&A{bZAUTy8*ie%N@)Ma2oWeqDWchcQnV;pH zJG`VQCUHO%#h^a`!fs1?dI9+#0)_^Z2vEtAQO_opoZ zi$;umW`Lq3wn)O(_49o*zrVch!1RVHSGaL=QMx5K_|!R9~{fpe6VQaWncPA zL+fOXMi_V<`(ct+Sq(B zsnTdvH->J3V$#!xtYEF8p}qy<>D?4zwW}*dPr)qWnY$(L?+@qQR5quFe`NZOMX{ z&iMSX{8GwS265_{w1TGEs8%IvH=4S8YAItKs!bhjE%K#}trAJAG3_f765+;)w$hq+ zI`gT{7~S=b*{?8n!jH3Z!a1dxx)2E=;N(x~<>z>R<$svM=*68i?_}_s&0X(g*1Y;6 z6c*+P3~zFFl~UKz-4a!EH>Etr6S+!xx4tMx-jCF(JKRSR#qM$dA&W?>76SXiZ{bgj zjr+-QgG#Md-R-^^b-UN@%7FsG(v#k{D|*`Zb_Bg` zm-n>q>j;vX9WUH7(0%(0`!ncz+Y8tB-@Pkcwf(mK{=0U=>+Q&oz4$39q=JNtsKS;W zgDgv$PZ6wKmIbw^tb_DsxM1dA!IjRDbyA+Tq9=11{0KRAmkiOIJ69wWauT`J$8#tt zlIK5s5Ce5&#R=gs4$4Qdi7ZAu z%RZkuI3k9J$TGPxLNw@P29Cl(`Y6tj#khu#)aMPFzGaZYlSjCOc0{-c@=dfyqt#_Ki)+GZPz$EPoA{E39Cu;X5oQiV{0gozk; zZsBCi#bin0T!5m8XqKeU=p&44FlGs4f#EbWEFGnW2|jf(O2G#w2ATB{ea~r9zQE-yA#lz2 zYHpwN1ELiC`{`g|uuyKvl1(QGxibrhQxz3-NfuO|%p1^Wf$0Q2mfP>Fsr_AndY{g~ z?s=&HTqbXTMk636jxFtVR^-ZBoBdpU37Z;1G8l+olzTbF%V%Itp;cP zbaQIB+Ag3evyWt)PO-+S@dg*}qy!~nH=-4AaPb4d8?=)sBpQirn2$rrELlG%H=E@} zS+d~_A&)JVmC;`K=bqGTs?7pjP8KasN-nxvQWjgxz|N=|B{7%LY@$*BW#P0G(7iMz zRMDEWwxTr&oN|e;-bn0xXm|J3!zn+j$?v%G!JUD&ntWCagmQ`0U%tL*^9_9wfw^}5 z#`w%_gMlZEWdpUowx*QXl^#jgj8@yh{k`9~qB+=-x%oSrdmj1bE!(OjN;PXUtIdp9 z$tc?oeQ!u*H>%1fZW&FE)Or*Khx*WCGtq+13GN)ARB#Zk9S2dLtMNN^N+>k2S+eC6 zVW8P8s5)gHkd5+X(3jlXCuuW!HkZK(>u}6fv$?WqS8MQ%y))bQTFU#A6FVcGhHVY0(Ujv$N3XpBwDn#;5Dj%* z(UF|&YjrysI)_T?E*~h$@7hsaGO}Tv!_(3`LXK@18}AL&CClwahvvT(YfV?XoXOhe zys?=XT<4e&WTB3T6Sc$y<^Wp(;sBHywi0r{lY#Vt+bt?V43IZuL<}HWDHZrrOnCjf z(+e5c7l{P8{_}=JM!fz-V{6wxwN@mQu1@^ACfZVv)fV&|cw|?ov%WwhrG#>^Bv{>( z7&|r)CCxQ$eeoT43qm?*9stMg(1_d2+eURaAx8ZbI( z!05!lp%cS-(TO34P7Hq+IuX23ka_gr;fJ?|;yWHac zg<{l3wb7{-zAsiv=z5eZNyL~nYPdfme~;mefi=S@Oy`nLb|#r@#DEc!)`?Kr=i`uYfYJV;J>3oq=72iB%wGWo4t) z2*!U-?dxR#^K}|Zs`-RcrxFRIDmgH=3=}Y9TasH^3l+3XLQD0jfxW3sM>pk}>h}$N zKo*NtvbEqPHP+eaZ69z3U1HX%F}w6$k15!isV$wD;eiWDIH5fLHIG|1FXI*ik6XaU z5VuHOg8@qntwRc-5h$69&I?;`KL&L?o#A>yFZTr~*G-gIeqct=()?2--w)d_GzH`AtixWdcg4yD)0GYBg$6 z+Op)j(}Yr^F_t_HDv3Oz0s<1Zo6Z|BE+ZEtq8JXuu0Stya1e$qCo}E}D5}n6@HpDI z62Rbcn=5ea+EmqCu=E8X%w3I#PGqVw8xjl(9~H|)(s0xCy1HFmu|U_=eaVe}3}KQ8 zy=v79HcxYW_QBcmlT+WCtx%hc3I%Is)fQH4vYQ;WQ!UjaDTjPDgrUjA*v)V)Y==_n zM-agj6YGfQxjAZCd?syBk?o@`AhJINCR3m;1&UL^n*ymUS(nzztycL}2{4lYtqD+( z0FeZMyi@n&E&@2=q@%<5DlQ%H8Tc9^E(h}L;-_gT{6Su^7?0C_4i43y1BW8RuSO!n z!*AnL^%xo9yFzqq87hc)#bO2y*Z6Y)ss0iGlJHUh(vP6{-`#cfP}0Y!V;z@2xYO5` z&Qppg5{P6HnZKmH2-P(am}}bm3Z{>5@;_!Mfe@*s0YaqI=2UufwGBMbd*79-A(Bch zQ>Zjb43b!7+o6YfkaW{%#Yn9efuzHaO-JKh6L8YDK%RUYu2kDFmK^JX$1y~T(JF)2?Dn&y5WGKsyG8>6(uWrSNI|b3!y^IF4Oh!k z7#)=oO@KesUxE^iu%N93WgI~+>_nDtN#t6Jb1lWWmLlZ$W&CuCE zdl??cH===MPmy&5vH0pKv}~7`@iV8i{FULaT-NJnaM{^fl%GI~-ccKeFRjge>Dt_v zuI0aUEn*&4icaZE6G=J>0b|1|z{uxad6i4_u_9L{zA!7Yz9N1gBK$uHE8*)*Q4A{^ z{$5y_3*`@!v7zqzv_*t)Z!Y6#-WEf`nkp+nG3v#o(&+RAzonOTS|t0ppG zcf+>&)JW2C^vI#>LEDDw2V&vwE82Nv*)>#BfB8m;EO(}gHg9NlVq`fIu8*4#wyY|5 z6d#=bcC57~>2g$KY`GoErfw*`AH>+Qp0hx##+G$+J|w=OEJ#bj=|!hg6kZ81R|xNA zkcnh>7!Q$Woax039<&-~u8`d4g_WajK&d=IPoIl7l@M=EBD|@D@^t#r@#dT5dmfqH@jxa~vFnjt==xZ&X}qF-qRt&? z8m~lG(s=l1$J=U-ymajF&u(o?A9?wXecu=jR?OTr1h1jWnY&OL=*3ULn^b^s5fw|; zFwH3!wuaT5vQE&*d^-O;5_^b>C*{Z(Rx_8uPms)e{sOizKGH%5-jr&cMuS5qeuvS? zX#ph_|4pVbSky)bD>90?usyM0^stmv*bJB3ngtSzx_(lE3BCR1>7c1J3ibzm$5t0(KCF{o$eb;n{%r*N49|CWx*+$l^ zmIzfAz0PhjD1Kf)JJ9BGSLPWlZZoQwMdSt{yc7^!PQI)x8o!R;_YlB-~3nt>QHem1kB zt1;IXLdl#V`{Fp#PGTT=RIU1kNbAs}OkWFm5JuAo|AU)^T1Uk0j@cF8fP@w<+($0l z17<*_%l&pPH27_ru^H7?qd`e_$&^wqq0QwY#zc0`{|ceWvBf_Lj8I3{aupQOS*-_` z&}0RNCd;ybNz3!9-*r0aysgH+alCdBAc+Ss8E5so89anG{w>FVt0mESFr@!nGZiaT1q09EbLYo%r^DV z@hu6^*MD>VRW|B^SvLv7QF_&x<^RI$@Q-xb@9ctd)Q{iFQNaOUaBlen#2qPwiQnA!kP#a!9 z+vFj2rFAWVjn{Su9vRzaOI8-^?4@1B#p?@=U_;xHky2M`VAl;TO<%cgS5I6dQ!yqZ zIw~cVODo0>*30#*G{1Xa=g>9@t5!D{5>!F9|!+OQ5v7Qp8dR^F0DfJ$cE5Op=yYpvRR?U#dQLv)m4Yk!|kbnND z42Os^GQqUp>-Er#1zEycAuXpNEfLmhCH8ZcusBI#OBlJr5?1y!se@{!mefOZQ&yWy z#DhXQ@|tO~={26XIBWfR%wgJGZ&T#8jA@c?dToiet8>*~#3sh+qLtu5&=;R7}yKTSf+s^hb=vJ(X$51TT$lEo_M!wDFG0@qVYNbfC7jw*Uf)zGU5| zgbv7RW?F*2hS926=eo4RRGx0lE8Ef%@zoBMMOtbrt=6i>4)DiRyUthauovd#nWBRW zjK8cPZnl*c7h9cWL4!39=K|@5GuZ}bvY2S(GT5GQ!2ObDj$jxfg}6c;(&@|E3gZf=D$rDsGGih zYj}NiKq(ZGlt@ZT{N?TO)*UTAQeRfv?AvsxClDV!KH50jQ{`hHwwHGn7Pl9h#wYB_ zs$#M{^`-0gZ%oJ;MkZCM6*{w$Qm~5B(L+rN$O^IUeU0m`8!vZM_3ys1Wc*9r9#7@^ z$nd08rH6CW3FoK-&Jl|Es?MDjDS^w%N$S!Px#cW7>HF&%;F6ZJma`dllJ@=eJeY;L zAZFr2AJm_Li(f{uXVL(j7ou}GpR||h4f=_GICY4B}GO5)jMnrycodl>H23D_<(GLhH0ZOt$1`nZ)A78SLKcBA)D|y~h6s_EG?^xAHV_1e#GEq>;yQ>CkDu!#l_S(sHj)IT} zsd*zQ5XmIChSb84xjwl0_(&XV=sP-?uheQKa@MR>=@lX^RA0qiW%-S96GaL1PLPFa z9s&T?qLwWDkt8FXyU=s$`Ybf>0g*1SR zLnLx0n)24~Y;p?1H}nl2Xmfr?8*M3R*c-N#bR|mH7wEwTROZE#8r*$-bKT9?@98ZP z%T+R|LL=9hRYI9URz7}cohh8(yRS3WTx<=P#&78M`l{NIR^JS{q-M33E6sbks(CM0 z;*|NkTHVZPb@N5F`U)>s4g6#1g}t-_s&#jO5rXf`{{>Q(VwL3G3Rb$L*#Ai;7x9W+ zs#8K66Ux2tw*UA(I)X)%`9 zwB=Q9Sr_)#4<-7J#;sNL>%s3*?d@rMQGUeWA79XU$_wLWLw;eUyS1y6*G<*@dhg@x z5OA5RPb!HdTJ6d}?YnNbsO-JGN#lE>m5crDSIha8?yh|oySB>S%j8XpjPK*=a54+Oy6Bw8g2G@W{38uITVr##(l5D0M5pXMyuo zw!T28$!{%doF&U^Z@F$?U!g?FC`>jKx-DlE6=R1Q5NM5U*xT5A81upGk%BEyQ~Rpc z2cykJR!=VKhISzsNVITkM@QzvW#S<7PZCmd7D!L3dc5#*g6`w>^>c4?et#8uI+s!P zAdk58WCnJ)kflL)Zs1Mo;E%4k`t)UG$%8+<_8NFS8SdEEvhj)*SEzGuYyTCkF7oi5 zAANtM@8PrGy65cUBYlsYysVyt$zVn;U{BGjQZebHn+ZR+>u) zyFp&7vRv5BryHkBSu5NnRgA97m%_i9CWR8VqHkyQ<}VM&^i4;0yh7$9$5o5ks*#8o zyIyNI7!*J{blVlT->)Xk?9MQbq{ znOsIAiX0mG?35mRu4HK`k21QYy%z)|r#SDGEG^5d=zTs+vLgHV1!He0t@=f=7JIS8 z^Y1Oi-n>X_T`{|-z@dCmqd~DZgWw@>IOte-3rTGXod|ZJ5IT*+YGKKb;f9it~{6kDfAj$$HPDlhHG%lhMO}NaP9ioZ4nFDL?y}oIyu3kz+-ov1 zosT?$-~FEn0!LQ-L6C*g07X{x;uvA-02Uo z%IQJoZOO@u=zK)|{}@R@KGpO6`;Tp_k}*29&8)L9LL5o)&=6xqkrcOd<4B4JezW)5 zC%8z8s&qcx(t zr^zVVUUCJHoV**#$yWk(@@e(q^>^r$yBB3(B%^7G)CmcaCIqOsUs?i3^gb|dpij!A&ur)IF0CAp%GDTYV6lRDXE#ahgVjS#Lk@+^$smLmFfE;l?jPO ztOA(t%vWDJRN<}Lyyqy9iwc37GX~_C3SO@rcx}LI1s)6Vnt|5@ zP&#S@2x01|QHJg~uKtY(5(fmDd?LixoC@(%9^xw*g!m~B@hQMVS*Vwlb|V^Ol%fA+ zEbo(pSL~x>d7s>h_Gi#5aPoc$9C1GjShNvOo$_>tm@E+GlqK4W^9l7|i98o6`U}4O zDeqWY&g?#%+o-2bXW%P@=pD;?^L$(Ne|SKP;Fotwxquc1!3Bx}D3blNTTws@AJ2j@ z!;6?EickrN*g?pKa_OhDIahuf=n*&Qv9D9_#2MDf=n}Z#C{q3uc`YI1rcj15=gUw{ z)Jp~($Ysgnj+~1U=g9wm4?)%h&V1HCU9Pq!I*XHI%>{C_?`s>w#Yf{8|8&gI@)_H1JX&VgZ;#)EtLI^+tH3cQSnCYOZ4Sug;)~ z*49{z$u54Lw!t0@A|``G8-t34Fi5m9=*Tq&A)#vgcAH43Ix(}#mMI0hr1FQ<@W)7hH!j15=23KJQ74fFJc6@1#;Ts7{PtWAvkvNcWEi>0&OhA zN&esQk{>G(mM^aYuYbiWejhI1!en!!7=|k!f=4m@DEwH4D#Yw(5PJ&w;xMkLfBRdAs&9$KoD zaDhC_cvB_e@n#SLZjFS;n|v$}oavVd&P(xu;6Wa5{uXnHCKLl<8ia5ymarfArJx=O zD<_(gdQc#SQ&+$(`>P5-c|mi*bb%mJ01C1s+=&oMrISETK`h9)fSNhP36)5}!=aT( zORLfF{gt4kvaxcqQsAuwm02>9R_6PFFa4p@DJuCWjG(TVhq}CF1&4@1wt!Q}M~M}# zwQr~t99ga^8WIJ*Tt5;B5Xd9EB;-Dln+K2Ol3cb;UJjM@8AF1xkagm2W)`LG%- zIV-fPyAq8aRkq$=U*%TFlyX9@3)gptn%6YBw2`5ntH68>bHw44X-phz(sSxNWAn4s ze6+z}GRPS-YqIGqM%ta3U*+vsI^I4e?0xc|kKw2B_6t_1z_G%d&<$vXijO=i=_gmK zuD#L<70*VV&6Om*K$ZD?dzy(Pep;j5t6+tSljk2sR+Dg&k^=`pTYMIK$fDV+rt@#( zpL}Yz+V3h@Oub8{F&h|*(Vzm4$|<>E)FdBrgM0GA7_0-fk0p3B)M8c^b~GO<>_Gu9 zWf_e2JdM7>daY_7|E$zbF8cl|tFQAsjlRCBs(vk3wU6YVEw-E4-(7t!EkUyXj4!EL zysOjOk`8EOG6LCCr!3{o(O^r2H&k>b11YPH(VM_7sLjb}ZT<~?gzp^h@Wiw0+q?>z zQ>mc(4tptz(b0^jI^>SIXob!M=9vu&+Dtp#kKn*nzZBK>Fr>lVke?%PATG=RP6lhv zfFC}h;(1l{3^;oO)l4KbZC~k(xDKp_-8|4#dXREp73W3XR5O~Vf5?kWKv!QR+N=w7q#Q(t@GOjlM(Saq-EgiBZ2 zQ6lLb_*#Nc$nw7B0*qY z){DRI1>T2@` zLdj_r-j<#`C&|b3A$p``ks=>rCA2x9*4HPOoOPTJWxxE)4_9VYNCe4oChI z6tLQCuykwlC%_2)YQdftvNm>5ur_u*vNi^eoie)Ahq30V8FOPNQ3T&SC0hPPhBpd^ zj%S-2TU%e(kdXm3Z)A*&68EOPoX(dj0}8E~(^)hG{^;IMZn*K^_M;IsE+b`42M^uY z-hTMdL^BRG3^?wCxmyNvw^GPms$gyGpkQqbHeqCK?D|uTRV8K*nHv-Ivy&)B%%6Y8 z-dL)px*94CrS`^F>bz+Wp=3<7iB}W#wYBv)uHp5ZiIxLT+Nb|<`;CVVHo=_XXuyBX z-wxmK$zJ4Q6(>l%dE(GvSd*FaAK+v7oiqAlR%gJRD%JulCzuHrlpiO^8({N*A_F#! zXJ2_%(jQOcmp!q%)qZcPjMc+(gpw{T z=$dzMc_K1&-9XQQ6{+gsYkD$6bq;%Gai)DZZFJTzh7^sT{{-BKe+BA~nS%a!BC8S< z$WQGPl9014wNOx?2V#o^VS&c0z=#H=a2Vd5t#}sk8(Wte!>A`3wfupwrkA?d4_7 zZS$%I1TE5V{u8_rzg^HGM`yIin+saxL{?`C+FTJ3c~H_052_B$D3Uuax!38zTuIu` zsY&WfG)V$!l8qX5pXB<7%D;sc4S|f3zeCVwFK-W8RDCM#JNWZIJ7cx)kSj#P>pNty z<*+6ls{&WaDAA!tezzHPiysBhVEflld6eLn}&mq(B1%C93$4;I_f6~E;V%u*>$vouP5 zdEcgNxFuqT3`)x7N^_vD%9RS3x_ctEW{%Mtz!J4qqsjkGA7vX>wRm1iEv|DZwOW;# zN2XOZj7Dnr&cV+sMXvm;4nN+Z{k3kx! z*cgc29}1DI8@Rs@#YYBYesC!o6#Lgu?EV}~&$z#jD=y3L2Gi;~`I5cPvznn5U%dPRYupYSPS2Zq5ns19XxLWLtv&!zMu+gtpEpGN# z4D9ZichmY>l2WTVmNjT(6eV}2Is!U_E8QDU*Xo%>Z`7=FS4duzaRze4%&uhcvZO!$ zDyzYaB&!3ueGECK5b?ObXr_z(z#!}O<*QlSk*o29yLGg^bzRnN3}n2{m`7);U)|O+oUwK{bS%C%(oh#qHyHdjmSa6h zk2&Pxl=eiZHT_ViBVjjHWW)AYIKVQlV8r2Rjyd>>I&W&e!?g&=yy0@MtIDD>T5S1$ z)I)`a*6=o$j&qvn3drSNNHI60SQv7-I99S$oB;z7nmRo4_wIOGrRVxFFmJrkvAl_tNZF@_Egxefnyyd z?~S^vTEcqVJ+HEHankH*S=-#UrqTWntgFIq4LcaMCFFFrfz`c-$Lc7xM$6G|i%PA6 zoE_8|9O>?GL}dQjn%<~^b5~exx^S>dsl4;CX&<6D9pAkMM)JxwR>J?~3fd%svf^leoEWK6CG7(+0 z=L!lQA3P*&up3M^HK8C5leE!hG}<(TLZMVrgaWzWQ7Y7Of}#y-G}n0kH{^DhgCL|4 z3-bhzgc`ObN)`eK;E}{uo_~w<$g;v-?MA4Lk72iqEfE9XgClP>xbH_s6@+_HiwfL_ zrYc~3H*BZh!%SETd@swnhmOu$ceHI&?NM9wDC^$f z(9ztywzhRgb8VY!wMTO{zdw33C)~@!n~@FEX7q!mrY<4?`5CI*XQ_H%SdJ)Yc4#35 z2jq;=0ja6Qsp%)nTF2V@M(cUBzp!Vd(RzojtkUX^*l7+*Vs90c)?H<7qiy{o4ZIs3 zdqx{t2=^BNdhF%`@zS=G5lsYswuI-t-jC(xB4eG%d(b4jNA(CA zHgRYd#OED35#KU1(;CKP;P8)hhSD%Wgj}wW&*RAtA;0x7YZ!}dz*fkHW%Dr#V=y!3 zf+K(^R)aNTUDyI_2wQ_~#I|AkunEvFj0?67jOUi*GFKmHI1t*tyK=XC?Wk{5(cQ1< z$Fl9Rb~ctWq;dy#kM_5xQtkbtyAR|j`?8h1y?5u{`FmThIoNqHan;u9t=1K*ovU?= z22F!_U6Z^?6^_u6y$83h9&Cz4ng&;IJ-C+&Y*_CJV9^(%FK`7@Z^%%_3yCZL0Cb1$ z@3{jtl+3>`f7t-666@caMWOz@o^&di2uer2bTmuH!gcC`^9zp`UZ+g6&;1uXzwkcd zl~^hj`x^TB*JL~y_n}a}CISB*PR8R&d=dJcwxS69#$wv(!ukHu2BTS}CtVBk;a zwqK`WsR#@}nQF&hz+WN%8J8`#x81DNvXn|=5^3=g{((S?N3p{BGU5D;K$|-t?OMoxf|5q`iAHLXhb@gk zi5bW#t(KEb@n`UxCON?)nl#lt#ZC(}(P+u398AN@rgDW;d~@?G-^e9y-AV@{c#De5 ze{i1FYT5J7U;rvsV3bpl4>(-EGnF|!`PXT^p2mOTGB}WhZhQ)VlYAfM`EjWXF92Ss z?9qqtwE7U>O=C0fCZ zzL>ncb}}d3aGEq1T|kOkb84eAy9WZo#1i5e!M-$F7zobj!U7`|_(+^VbrN`8xM!@s zVU?ZMDG9UMs!_XwF-Lv3IgqsZy6b)Z))m#Zbi}7tDNGu(+R#*0oi+y(c3*FupEy;! zywPPtrp8QM8%I$rqpD9?gHEf43#9w1Qv=m@1*2D~Oon!vR6EiEYrtiJ*Q$FV?PQR4 ziv-#|FVND1pOZz&Kf?2$3G3cc+r&JWUmW>7>YMU9G#Z@)GR!Gm(dQT(A&4#qG`^|a z1qt{<3W2MNo+N^0vvn~@2?~zpe@+6mljrTMT-KU@y8%~lcAj^s0SQ#NlCv32PELg{ z+3<7xuQaQ~0ZGYEO(Jg%go4xJZ&FGbj>}Z?yYs(*BO*zXU{0(8j^zcIXAo(;H z!qk`x+yb?uPrR3UudJhU|+&(6umtcgZQ`g-b6!JeoASX(jp32r{p`O zm1J0SGrULCdfIgQW}?8gGjEiO1B_G%hI&q+;c1qq)SSV z`)K-%$HN%Zg7t;`&p2LZVidKTzB0V}o0}V0lf~y3doE`b5u#7)5|7Bzlmp=9tWB?TB1d9Qo!-XE@!ui0AZXPe|7#&6aT@G>FsnFcKS_Zn zz;dWBn;>g}DXqX-@SQIRFNKk(wE!}b$Uo@K+zhh_+#^~2HUlGBm{`1nu_+jv0cL_E zUz^?qYoC+2;|(bzLOz6E533i~=qIZzXTY+P-k1sMZp5r6oFNo)?&6AmwUOUU zD4r7Ry9pta=}V=YcS_ysw z0LAh~7AW^l0ci|WO5k{|D+iIE2|Mc zf}8#j*Xpc{oYZhy^xd%7-^01FiY z#48dBE=p7C=u(tcqF>OU7xu^8#btLA=42o>!%q00l(26O=xjXCyu|wSK!#Ja)nMRh z%E@I60E<9$zc!;m{T;?`wQ#UQVys5i%24>H$czBRxp?qJM|)*zVJJULB4?K}1&P1T z8|Ab`pMS{?709Y&n+rTtkRXD57Bn2pj+IL^@W~NZa+m`O1{KEnrsQ&scgl#gHchE+ z_ijPAlf!-(rZIY_a)ng0bUQ?Y3(GAlmD>mXUF$oRC}`x9o}sMVgSJ>pSJZNcqoT?* zzdw-l=w#DPBOSr~d&TN}-C~f@flP1OpX90hr$&DYtsaWn8CH$8i(Z^hMVhf1DBw<& z+nYHg)p>iQc?u4Uk@ICY_dD)=j^sFV)07+4Uco!MAl-MMj(B>CedS%LA9h5zW+>uk z-K!+v-R`ET+|2!gtzc2s@2Ds>oe+eY*+q0fid(^Fh*_AoM9P}fIq#QgOgh$K)5?2Q zW=Ghbfz@p=LJHZKW6p9W+8yUPf5vNY*?DcJl6=k+)Hp3&i^}3|2LG*C#VW<)_Lx_n zf4WGxH+UTZ6rOZ@d9XR=(b~nS4~`?FhW6u?DJnX4PQtU zD>&XkLaM}}Z^3hGvgpn?S@hvbtRU2Shso&FtMM=4H{dFr!)$iJlNV)xaat@6Ru1ka zj^J_{wp*T&D}a!{a}AJLj^gj&o9A!H;GO`@C>r<;(8 z1r(?e2fV`C8=lds^PMV&Rv&-V#Huvs6jn=_=}xs4j93f?i`9s)9oHCKW+QbF>L#k` zIwO3mAJpP~vVB+-bDd%dk6xgHho~xjIZ{+g-#%;U6P-&7fYQ<@-lwJW7EVtaj;1@y zx!yi+W8AAWQnWHu*A;46+w3wVS9IPEuCsw0Z!@}_Ufq4^!LE8+ZJ*I%g!-q3G8!3A zZGS~@{))|Qx4^n`f+~EmY!j5fZp?Ses#8NzaxSaP5e}1DO_``OD4)@i#Y$j6OO?V1 zIW1)x6e$JywhUiv8E`rr*8DSSMxg{+MIc;NK45tR&jSoMRUJi3JV$k9E*}Tp;Iu3~ z{V8}RFU(VhfE(|Ut-_L6`DD}v^Km*CjgtNffpUJ*Sm2+RB+8ZYP%scEt}|3$xS9xu zp~bbtuBs*bdL!LQms~+nDy2Qr7G1q{Y`4F`rDKgcaFfN7Kf~8KdbalX@nq|UPQRLF zWF&9rObo{?9PVGB)LFoMgE1ceJ&t=Chae3tklzO(zgNJ1ynHek7jhm9lAKG(IY-vc z=KR??7uUCo(=J!sOE&23+S0dVYu2K1CI`0lHq29NDWzI&@~3^x%Q9BNx23gXO|4}Y z7pSkOUFkK!)>73R(&4qWBYhQh<42YzH;*lA4d^HZhnxVa6tRVy>rH-tc3G3ZK5TND z7vx&)&R8?54I2pKOJwUYGiE$V7~pu~+{v6l;)e=>5#*HMV0yM?NfW+Asmh7=Gt{;r92o<{sph6?KJ54D zv-!!)ymJ3QO=DXtCh&n@;Dd6g*~U)twpxJ^Y&Oy#ZBYp&@Z&FIB&ILn22oF`EwDqN zl#ef7FPpvRYS6jxOFQEW3_1;Ls+M3)pk=t;hI_}_y4TlPJYBiIrCYPsy@2tzRY$Tx zj`cT1YFFUz%>T~eC5%d8;;nj>S`{1ERBI^nWtXLE1RlAgt!_hql{3~<=BuwTyHUBC zuoJK&x=N7eZYa;C!Z7&jurOY!Fu%gYA@XMm^`NbOOGYdbA+gAID$AD3s6UD@%8MsdP`` zHKrwItIe2yLrqhPj}`7s(2mp>+&sGaB>n8)E$aYrv73rS(pwRzr|_vs*)J+WMPYD8>k2 z)t84K$2ST4E+OZaLC!D7YO%or-%m1*S#2Oas|{Qv-to(e_gPy!${h8>O`XHl zM!seDs#u#sMJbe|-dW*H_eRXXIb2gaKW1E8-yQ9D=={|_f4YnTUx_Ym^u-4Ec4cqg z(!t8*3I~8fz~vu+ECi0SG?6mdx#?fiDvri86$TUf!~@G>$M688ng#Qr z`pznqX0JCCBG>}eSV%Pji%6}tNVSWE`ixAUwW(sZL=POiX6J#$OTKJ_^;x?x&3}36 z;(d+T=B9?8J%*kI-7Q^xJ)L0F%JqXwn}z}{Ie&A+%7^PbTUr83fx9x5sw}T}r}F!KV#nWHbwub&~vOD(Qd<7Jk%t5=6xI9jDp z%8g|aS9O2Pj5`La8y3cmj`|f%ohz%2BN}(6GLSB#>-u7Sc(CQ>EuC5g1;rE#`|(ig zP>i!V(>;}u#Z8bO_GMyAvwlk`WrOvP>K!}3Pxc|K5e>#oYVarU=U^!`_%j$1*~T<# zLBcYS><^4m85mkSmb&G;7S2jAZX1Vz^AC`>%UJS5OpVc#ay6vJWKNC=g_U+{p(Qxt zBO3nw+lzKP`JvJ7FvnBy&c~@Ec#QfBtZv05BrH<4i85nZC%$+3R_ZSsgb~D95g>Ot z;C1*fXFx2F@W+cG8zDm%;MyzV)fvdYLA8Dk{2W1XM1=B&68!bbP<``CP;-~Xof<<4 zMkTy-F|<+b*#hX8fUyg(eHJEXz|=C2%z)Kh2G<~Pl!OOM@HY{#ZRr9O-Tl`y=FN+Y|pIwOmW_#1&bG+^RvFjc8*Dprd9an;YYyGu%)!kP2xB5N-*Eg6O z4mP~nxD?_{(?HYN=JMv-v(fAiTZ}DN|8K|rEw9XB4*zkq`e$K6!egzEwQ1UFAhx$X z*mkn*@pgOrMEe)ok6aRG+utjo!`9*JSllsu5gzK4b#`~&D&W^$;jXP+C%Yc+`f=Ak zcKz~Vbhkl#y=PDFw!TyUUi9zj-!pG;-Xrtn^Y5I0=K{@wZw!P7u3yM6yl)Y)X#1iM z7k#*R>PY^sD9|SWk!fQmb;dpTtTjQaJXUk8!PdZDv0Y>=~jJf)rYHZ zUBj)}vF7Qu3n9*}TeEKCx~JCtbbap#J@WeKS4Y1(wq-+hL-&TiY?$77ZsRK(-`w~% z#Csb*9tY#9as9X*!ap7vuNlvd-!=Z{O~Vi`UbX0|H#f&NKc8C!ac%C|E$o)HTOQl0 z*m@A+qiuV(O>bYY{ZBi#?YMf!p&hqEe09gYJMPpLlEHt?NVA-+28yhn$Bd z4*dz?211$FLV3he@DU4$j|Be(UFrNPxDJ&~V zLt+|>S_;cc;wh}&VwS=R%xJnt3M(T)a5e^yV=(nz_&W+yYO!t@S`TB!uzfJa0{FBs_>3jk zej((>`r(uJ!{_c1-k5`-jY3Y`@P8Xjvmf4Dc!#@~=NOhmU+ospYOqS-{p;Wpw!^e; z_}+E!J?I%D*k&nhFFYHEvFPd^n7>`c@+^Vzn}jmtE}NeX0v+5~D?DEhub|j<0u^SL zQGCuesibbh6R0?i6D7z*M6G)!Z8^}swK>TDA3+bYntUbufu zNHKp-wl!ycBA0rX1+{ZkUTr@XlS33T;AZHVGx!R4m6j z_`e0-@jux+6S%0#{r{gcXZ9Ho#f(byxTL5E2Uk=wklhfF#kHFZv%u&uGtCSFWjZ1n znkAa$ax2knyQO8DmMw!AnR?r^y6TNG>sDH5cB3Nsf1c-@Ss?Rz@9Xye{eG{P=O~fi zcIsT43Y&VV(dj%CsuH(Iwy(mk+3^w7e9mkcyUNCs4NvCCh1 zjL^hD*EdS*$IJJVQsnp}e;lNYqW_!tzZdBqx~7RnB3WfYzz4z1 z{B4~qlj7PLm`x&A^tc`K%`|rY|I}OU<%;`%rsC#66C)Etvuv0&&NG=bDjZZZ4^Af{ zJ052hJ>zJV?HW08QoP0Ex`xJ28I2&~mm2hv9I{YdNZfRlYNI+34T3ZL@-7skZW`xK zYP(3me;@JcY)W3NG&Pj8NX8TJwtu$2E*PcSO|m?eG9dGuQdET2X(#1sm+FHk#VoZA zQcX6~iejY}irSS@J^Wto?lF&ahZsMu-}G1lHpE_1B@lN76J_d=b{{c6#dTu?>vXq3 ztfEzvlac1rg_u=ImRf11*=SCQng3h%=nKoMee?LaWDsGoN6O``H5NpU&MLDf^R*5sI>S45Ank5_j zPZ)m6U86dddd5JnO^pBfs(YR5o(VDC>Vlcp=_>j}VWr+DUNxYP5GtefNF0loWX|hW zJBLZx8>|+=rvbk<|IY4oiMs*cx0{R{zYIP)5Y6+^C;G`BHR98WP1;3*(S5NU&L5S( zzsnKN2-7~H9Y1~a?Q_}JGeT(bD zuKPT^JuHNgDdY_yG#@Tm9us`4@z1_+b&Ks*-Tdsk-)*CB1=enM`}GPwg9WoDxD)w( z7|ZzI&d6JszikeQ5GQ^Ave9@e57}WwtxKscW@)FY4EQp%mWV!#mptyKSz-(5;Lq!> zv2^KFLw9)5%FvyrYfJ}8R8fu<|DhlKyFn#=yA;WoLo_hapJdG-m-(1s4B0z87pcc$ z+wBi4IzEF8tJOr+b8dmkE78^Y0TEswrLXa?@HN`l?j2`@(y!;deah z3Rv%^QEI2s#Vq>mo9}-b0sj3hi%F-Wc}xacGqGD1(4K7c@u;E&7@2`q8hX>vGZZrl zrSYNEQ#0v4l!bZ4bdM8d6yRqr+Ouhm%V2ogCaoJVKNqsdz3I#>x<)#r71EgnRQ5FV z=HPd_G>_boik@P$$#o{Jg5r9)xJ!J8$d>j*QPU#y^8pLHYL!h_^lLT^tpdo;l15V? zJDbWQ`jhoD=yh&D&kRY^6v~K5CNfi@Q4Vd9-eUaD$Gk$iehRf&)Havekb!a0mUOBC zSvgL!Rh&oo%#cQikBELbID=NDPzG64n;^eb{LY8wM1Celi|7uKhda}#rG=DUy2OlF zn?qYct3*$wQfmlR!XORT)9^Plz^;J)6m=;Gk=B*nOgbK%CEAxF{Y<6g@@Px+Mk;L; zQD2eKSjm?K)Q;|}&ZLo;PG_Z1iwgr0l0l~VaG&Bhm4%gQ}&zQF7@yB3;Faba2JQkS`k&vTmXMKw+{ zpJS+TRC;)uqs(gLjSgpxi`>Bz$--!U2>CTWmM<{aoaKC$!ESUI7oc~lqukDCRhryn zouYE9o419^u{d1(6lFqu6DtIZvkYOq;LT~@M&fv<2t zAl5P1-H_t4T6l}0!fLDGtE`@K-d$Phv6*?71M6DtWl#!JJ?0ABX*U6Y%WihL@)q^#R?pWSkVE|X7!HH{fOGTy4V|7BLy|TjW!c4c>Lj|~br^^9v z5N#pdW^+{W<$%juE1U+Shqu~!4`B_>a0_g-V=ae;FSV9Y!Qw(5bF~LITNjw)c*)G6 zZoa}`ui=fAaH*&_!Dk0#mjSl8tZqWoY^dNXon#qEEJKgmdJCp|9584hG0DKg0Ttp} zBzTPF1{V}FyW$GWWtBFAD-c%`{Yy_I@i2A<5D^!QiyM{D1--{*Fqtb1t_8#n>e4`D zmVvmFbQ>Ko-)=R#<8msEF$VWAc#+R^IUJsHkHWRA&EWw$vDrmpl1A}T}{%z^vJMWx4yDqu1b zGs&ECv(4F+DC!boSDa29Vx@UfZY{NXNPP+`g31;L$rhrrggTZlHMpUx!yc#h6T+5BId$H@IuAwU1>%RneiUMJl_GE z2pZ&)4TX*3?ph`Z$}WX6EG(Zm$4!$5b_0PKcOkSv#uUq2Tqt@ZMUaSPu$eFhD|i&Q z^Nvy!J3E15poQ8WXP0gTQOMwSJB(HXi4T**h>~UZ7{q#NwSi0wk=VsrzEG;v?+v3G znP@c>{cyo~w9b<5kZ_BY!i{L}AGTQ$r{dZ~mP_2JumVjYVqq+)9Ru^ zGZkQRpoUUi=9`Tke}Dz!7{PC{(!`r6hPa{BvCtf{;o2P@k}_g3w@PU&Mv64#E+_jz zskuw$8bS%YU1d|mm3hX==13>aEE1fUQ%P5+eQjpGP7xMW9c{8%p($n~%DTQbc zjpb)%7iHxY7x9=;kdj+8o6pPOQ*vkXH)Q9g#q#O1@(a=n3;DbPK6_ezPIfx_vU5{& ziqo=lGx;gFFE*0Ms`syk(mLZ zDSUoPK~Z*UaZXABpI=;%pI4ZU<FAu24$V@g&)!J^jJQnAiIzN$tcK!c!Cpm zFLi!I1evdBzts6Xdy@I8*h`%s8HxG$&5&`>|>^hK*$NS)QH4j%5GDPGlWyCR@oCu?yLG>}~9Pb|qS?*;=-N zy_4O{Ze*Whx3fO>Nw$f7k!@iQv9Gg7*u(4zZYujNSIqvx&6IK6ELk`=N7kF0E9=YM zBpb}llf`m2*%;0xOXEDUsoY{&KDR_RkMqjx+)~+X+&!|@+y>d*+(y}-x!tlyxl^*| zxYM$I+_$o~xHj1_?u_g!?yT&e-1i*Eo#P_7AGm1lM{YRR&W)B&;F9DiTsHhYy(=@W z_}^r<3d}Zu*>*771!k{+*%1+#C!>?ihWRt{#pz$_ZfMu1r&m}P)jAt)KZ%m!vv zU{(ueYrt$X&WFJ4X)t>k%npFr$6)q3n4JN$4z5_H1+!jY77b>Dz-$DVjRvzxV3rDI zg{sqArv{T#(7tBtASvz+R zr{gwok)Y5Q%*KOR2ACCtnF-9?U{(ueYr(7$%$@|ZSHSGAVD>SXodmPeB@w8bsf_+Xl5zI2dtO(4kV73s< zR)E>vV7491c7WN7VAcd?2f^$;FgpfjC&8>8%+7O*IXSn4i{iXoEVq%eRqm^}?<`@rm7F#8P5&T{AEGVTX?FYZVAK(1XLC!ZjnC{K~2cFAw*%B)v= z2(xG~ivzP8z^ok1YQSt2m^}h!d%)}sF#Cd;$#Tpbwl{MVn*e50z^o9=oM5&X%o@P# z0WfV&Dcn5n2F`|27q@`(a5sZlHJIHAX4}AQH<%p; zv-iR5?`ZwXosuIrVH~KA$@)zm@x5z8cK#2eX}E_9~bi0<#an>>QZ= zN}ey&D*TCvh>1yCR9mZ7uxeFnLql7AeSI5gD4q3Q9Q97Ml2xnQ>g#Y|M2?X*uNQy3 zT^e3GEiujOy{|4UQO&YyxmP;KG=+5CrdG3RZFBQOI5usf_w3)lef#?Lt5?%S>GI?r zq7H2|)YsE>N*cV$d_+TuTEVE5?YwxXb&Oh9$Jg;wlcy$6$H{wnuTsG(Rc-3(`ub`r z2CLQ+NlH1ZR5*ziPTH>~Q!t6nch74`M``8==GFiK6mx87Tf$~h3HIEIn@dUbGGaxz&!(TWRiYlxbRmyF~**wX*yvj>B?|jg4v*0z=5k5(-wY zcBz%{cudTqnEdwqeCmFRlEOrzq>+X~yEFpj_++sm2>JO9?GX`T$k5=E?nz9hF(EeE zsX4^oqG)Gegj`jtBU z49q%aJ+^Nh)7v}Po65+E3$V+#Ycz~Tqh@;JBwk8kme8otC|Qk~tlW+aYbR!^lcrE4 zQdkM#X4U=Od&;qhmE;BoS9hshHp; zsi;!0D&mJ~M6OoJYSj?BXFmk)-$RDP&AZ-7hvag(XElaadsIqRrLL~4>+pIPY2}Po z5y;16%v7ls5vO=Dp}LDqXh_YMa?-11S#1!8SEXT9`e&HKwDO22T}Zmuzi6E(R_fos zhukS=RZ@o1Yf?i>A|hHztx)(?6Q}@sKP(`2lk`LWskDqrmzJECJlsppo_O99hcTF+ zU#|_Biom1t+agF-wP{(I)?Ywj6w13yPEtt4vMLxx($$OSUzwbY!B&!U;sqwj6|7d7 z)zHv@IK)L(RzsMQ(J9IQ_jpL!vT{W$t6~(Y^I;sTRq!Es%hMimAs!gglb+Tf(pIs9 z4;-zW5>a7#cB5R5)v^_X8b*t34(9U`WHb#o6|2?IWFdLm zK@8VSP7y(+Or{(yD8bsAQuOT-In8k5Bc%yecYvz)2L-)e+3 z0&3vEft>@>I@8k9XfTR?psrBJv}!((`_d>if=?EcB`T2pw7CezS0kRJJs>GriVv|F z$p7TPLe+JF(Q4{FNN-bdj9xA+i}x8liU7IkqGdp(CS^dUWpr9S-rLDJ0B5pyi5E&F zdy{o4Rwrc^tpqwX+Wib7@?=9l($==G5Nkp0igp^{q!viUAlU8@L!40U&y;wR8N>&( zMwH_%@%CrBM?bkWSQ;QIjMeqt4pxoYMcstg){0fs zUpKsZmeq%%^Qv{MI{XD+3ttyFn;?iPb@N63THsh}%R1|%XM;XzDPDiBrpIH&J-gZ4J(+3{+ zx>OQ1DjAzm(|xr9@9lNw|v9t7I~NF_CJC!e2{zaIBux5`Qtl6;J!< zmA{xIdud;nYT|7b`$Ee}mwKXCk$R%nvU*qqmipek2dAagb)u5caj~pm4vA{C#kvxdcV2=4 z8iDoF#2>-+GbUnC^IBz861`Pw@q+i(bd* zb>U1na}7Ncyb0bC-x6%{WS`foS-rNSrKM$mM@w^ab4!O_gO2`;(_6y$Le3KO^k*o; z^Gq{dy?r4^legJ>o}o!{p0qn?@A+WId2t@Sy}#F)EXsaey1m5b^!4}ZWr(;?spNKb zNc+4#oYhCP_HFIkmh^V)N4Ae_2XYP{-oN_w)y?{5J(b<+YxBMB`v~V@oGm!_`I>!A zKFLaIpeUPI*}=3h&Gc+x$hFvN@>0E%EIwarb>DEM@^H0Y&FD3qQNcssgVpzJQZ^}@ zR~T0tS6dES4v+h2%&es9C?QIqZ&8a{l*+{|EpILiQ?p@OvgENZn#u9SG4bupLiMmD z(QyvPq%t~QHi^7MVfmI8P%BLeQ^MOxB_$>8CDNfM<4fSOmPNkBxMOkmC7PSVWNet+ z=VO=vJgFz-(CArBk7KQ03N2ku`U)2iL|aJ~dTmM4hm3u3jL0BQ#2>Z#C6YJ1>iq

+KE(@Bo;=J_gL~zI7Wt?<8b!HsR_p);!a~!*+$dGF;6A6MU_OA;2Bw? zZnjvGqLM6@X8ncljlwA~REZ9KRFqIs;_C>L$-+W{3Op1XD^oxppBMXqMnNDkgcI2u z6m&RD!6P@sD>_l3V#Cw}Er$;uR;ZXTwZ&pNT;Eg0gsYTFy&fw?2{_11A1h}S$~Kn# zTS})lg5;^#FU5IDi9kE)m2{AC+S%5{JH9rtQ|NNEapXZ8RZW4YP#-2on2rq#o6LwP zEKCxU=(8d2Ns1bMNhM&V*9@Q!6`fx4a|+H*?-cr*riJ8Pzk~JK#Ojayao$Ng7PTNe zVJnb!(!>d8`Z0Y#eFzrJK-zSAl?*xQ?8U^}iGkitXiXX#Rm8`KgdKtQwVB1X42^ythL^vnATEPeg zrFMi8(NjBK#>yKDh3P_UNKfBw{k(nQT5{&m|C-7{|NBPrU*An8r!XKyn7sGLxlO%x zt54l`UCE5%(+g+CeASX&+gKY_B-G0H3ANl`8aWv&lSPh(ihG~08hrCqp5dv&dxfxo zw%D2(p^B=>70Z>8vf{#oNTDZbs3WyA4eoNQz0BjVCqxM0q*E2CDlnTW9ClMeKcO$_ z(MDcz!LO4h3=sO0aV|0{I9_C}Fpn(s7%H56erk%)@5-=*al!;)e8Tv#iDSmkK{GL= zDJ*^Z57A67Xvw%Pl1s@;O&BU%E4KRCQ>{+&Ewi-3bUwW>ccPG%o;Y$;TI$%5qf=8R zBwQ;D5)JBmL4yj#@9GJ)?7&dKtb*Zc*&Yn~wX#~4W%hi3a{SqSXOfS;lz7^C|Miw( zpMLjK=ZV)IdGCrvXMUJobFTK)E$xS1U3%cAPvYDo53cTg`11`v_sD*G{apj5#-7-@ z_@PNV=KnM=#xf*oMPkpRTShEv<_13VmFe)*e|)v-x{bHIcB1T|dA-kX`|{QyzYaS07h~t7=Ub+1S(MQRYP#yiygGHZi?EClx z!P4uFNEwGTda#z&fQUkf2GZzoc^`T2HOKCp_wD4&rcK}O9@YAK+2G1Ihu=U$D0+}Q zO6cS5J!nk(#|0Tq?djxS7XI?gh^LyzKGQ=eA`|+{rwKO**^QZv=_^vDue2Clwr*c( zaW1fu-gxQTmG1a}8;KXG6A_egm?F$ls*#rp>|HI-*qxVd{=gObgSVnDH^T8dtV;?kM zxBZ_twj4Zn$G06X4}GERfG={#3oktUH}7qK8#+ElSANyApTF_V!1f;3*1q(k{_a78 zpWFNLf(KtZ-*ZmOb=x-8Uw`jQOM1=fyleLQuq6-7i+FXUWzFUzOTYMgF_RIuAoKVw z6Ld#E&E6Mr?4D)&`fvWaY~OvEu2Ocor6Q*>Zuo)?zrLkBI$1mPwXHk97?d=2V~T4* z>Ys0&+-J^*?>x|OV#g|VzjIUS+49->n}i>J`Xr;G{)*f9IqyA_UO8=fwC7yi=9{0n zxoT=}d)=&l*7?5qZ@#tj`s4iZ2`6ryJn4{c_T4MKzH;E<1$QzXi9)TCMHTxhq>AnT zYGwN^OY^_#q*ZKxC^a3b*y2B=R>TNH#H8;ZI&L!ah1N3q-AXu?e5pQx7PIj}VnV_w z0p}R8m<5}H=MT|O8s~mz{9jh1`c-=dH>=ic_SRg{F{Gr!RUiBFxd%7Y-m15$_fKGWKtOy?Nai z{W?$Gyx`Ch^+Hc;#PqE9&oBqFavoHTJvlq<*G2ao_+rk9DmILA$RTRGm_>v;7P+XZ@yCeGhByd3$r;G~uy_`n>*P=G-4fjJadWnpxwU zbNfCy@LAnQOhRZJ9%by)u5HZJOL>938LlQ4I}{CgkW zUccbp7{{6YJHLK8{mR3oxl5lbx;pc&jSp4$>_c1HhxR{kntx2U>&%v}hB?ERQ;#o_O2n;tJ7oEr69*7E8VZ~ye}{nuahaqpF1 z-npfGaDDlt$6oj54*60&AjkOYeRp3mZS-?9O7cFO`aA@Foee2#C+i#h5s8?p`y92)KNZOyE z`!Rm9>7ntClKks;r#0j?>h5@D@vMKoUUs|T=mQ(~A6R|Jk@=}FZvE+9|9ncQI5j`} z(XZ}Zc;IFA{_~T5*y$dxe0s)PSG~XYhxG?7Be-|(sSrA%cC1Ue^0q|>b9Ti9_g9c zTT{EG?sReWd3JMjzIr(mA!MB1{r5o`d-snl+VW(->Xd}4!+-u{(z3g^8f4E!hwbY4 z@y}1QZw|bnsPiX9^FChZukVk8$Rk3JK*doA9Daq=^9w#*lJXthLoP@Btq>xV8fj0s zf+anSuymtXo0m2SOYijd4u7JyBzeZr4JWUS>=^#3w(y?WpKsr4+-~^8!?QM`W=Ef` zQyU-Jk>j3qP8Atv7V^bSAzQ#c)R@|svf}zne=ZITlmC$wQp)LuP!zHuWC=!b}D5{Ikq8_FT7oJz|dutebnWiLTpHF>T z7Y^0D)N6_pn*-Ww@ou&@O#3+axw>EQr@ue`v%6)boczS6tC1aN%qMgFlh2o6PWOHY zq23Yq$6R zPn*vd4Q1rCtN7Iur2<&@%ms;2vq()+X7|g_sm4%(obLHj_=>2vbQgI2)~}(@SNt}7 zrZrBy&AmQHIEvR5tSVP(l~Da!Y`m(h-Q^zMjHOotXqR>Cf z=c*&m?)7ft%U=gbY@q;&#rY2ua3ywcqJa6&GJqKUe_4vr0Pxiz5r4%Yw-o`3femFe zpoIAfjU$^y6tGeJ+Z785{4G1c8|(lx*un6*+Y6s@JO_|>=)Si|JO+w2i}x+X@-&5{ zy{D{=Z&kA@!ywq2%gvkzUmo>t;iW?tlpkiWmbGH~nD|keJu$mOT%&$Sc9b|@nv2bz z^0eHoNh&%99W5=#P-hfww)E zp7<=O*i!Z`>(q=WyO#L6D&dRBF5GfGC;qYWYKU!jpKVAX7BR8Afy@`&kgPht`bx%{ zN5sG!ej^k&!mIKCYfqn@)w+IV;8Mk@;JN67^4W-B1PWnb>cC(y9$=H2Wr2e zdn%b#%PUHglz-^XA;`WrG`}vUbhsJg>w4L=CY$@te({JW%QeiAKYnyr=rp0mB}dgY ziUk~Y5QN|Fkb)~8PRE=&@+_=8>FxpX;+yU>P9o<<;n*C9=&{3gJ!1N5CXcEt)Fqe~ zx1(K=JMP;TfB}(zTam?qf}9#7ndNv-%TQL@^({s5sn%@aCE` z-l0N1;<-h(Ej1Vpe>dyX3{Q|vIC0ddY4qyJm`A%#(|QZF5q=vV&^P_om1bz-!?qIt4sE8HQ)zBI1h4wOgFPc zwLlvG%GGb6yi@Vu#uOzh@cvg4aoYD*#9Z$!|+>% z*D{=SOO!YO3Jz%WCg$vSV9o?E1`Ky#PV=u}?mzS&01)#wCNByhrUArcfOviDAt9Lo z;uS#skKw>fdlmkh%yRW}xyZOPFCQmdh>QP4q-TH+aAKo zaFc~IYu59#Co+XBtuF0tiFFa#<>hOgp8ERAa$~}hj?B>7vCev>dEdi#$4jMVJ6|qw zm1e-x(hay&p^NM>{UXh1!LbFJ+%Gn{_plg_7LJE4ckYf?3=9fHpV78dMqiMET`pgi zS{ag65%o&1u!`~Y^(%f5(0qc0wTMX?u0~SlGjNACRvNq?O}Ko(Pas^oIB5KYv|E0& zor!Z~v#`sNw8-HXODl_eDVb8EPYcsJ=j>cGUfHl^N2##}vv#qT2a51FI5|}<{MsS} zX&Bac%mtqta+9A)`#`ZxrGxq@I`vjdX@$G=^>{J}BAp-vtvstxT|4-oQcL%Z8+oCj zhd%42h}>S)6(_vUU7>nE&`%qC8;FO^Odrh%5!Cy9RzD09G! zEKPua_uL<|Z;$GvND?Iet!JBUox3FZCpJS!OrRjCj0$l6y*%eIE4{AC+3JWzm`#1k z5-4^w>fOuw{dDVjZ~tnqIIVk-CP`f&0|ol9rU~F$o7n^1R*qzY_P&6yzObzIpD)%f z&3j6__0%@*mVz3sF}2Y>oqfq+=1!KeD{HOOXMIWCRV^ZPF7A|1zz1FXm#6Ug)Qk1# z8GY@29{g@eOGfO#32`gQiRr;QB^|F-fU z^F4(pWouz{a4VG_Ty`J2OiP!?M662?4I(@)ObOS+U%HN}d2Md%-)VVFQDP1Xr*||d z89KL@_eT0oh&edZuF0h-XLO z2}u#-k9TlBA{kyDIU1|mreP#R&^v4vfrAAioVkOUi>tG_vHd^N(ZmK3j)jMVm4un( zAFWHmtV_bd%%x94!lp~Y%F0Q?!Nmituy6sJ+1W`r*w}zIR(4$yP8N<&id&b2otyiU z{gdM0A>m|W0oK@o{Hz?TK#G$U*v`uQNdY;yIXFl-**SqV?oWvVD*!U8_3Vj^Jx<+ z50GSI{%mDq`)uX@1&-a&vt;%Ka~{pOIi={tx-7 z=U*haKSRsL%=SMYF5vlJ=s)N6KO8))Kr1-8Kc|d`1L*L7T7mNb90&x&e?P!(U>neu z|E=wRr~mbT2JAnsvM~Rz*#OeOX#bO|Px61jkL`cK?-O#&z#kwR@CX$0X%mq8ukD|G zpJ4!c_!*f`r~h~Lz~28C!~Vtce@XtNfYAOgs6XNS3FQAm_@9aG%=szAHJpY|P?fPu_&+*^rPaXe(>3_~ZzkrDRgyO$C5OM!TexCmaa=>H+3{KXOc5_uynN|VF4Nl^<3PJ0dD`iXJ3CQRnLzmwtV$>^o+a60+EWA5@ier@g3(P?){ z{Lry?^LlsRs?V91#)SL=U&PfBaWRg+^Kr4av9s&^rQ6TXUz^DB;ZYG=Q!{JaaAf~> z{Mzl7^deV~Nk~uc-FKZ~B;qY?GGc4|V*7VZ#DL-WUVXr7WX##wq(d8Vw0|extLtN{ z9b?`3-lW5XP-mwKpYm*Bd6+TO#;JD&& z6rwT|l61wjm$43Choxjn(JiOQ5hPTrJ~Ye!DwNl$;gQ-?60l~3Tj*!wv1duSAa#lf(i8iI{0J<pAg;**uV+&5|JCjd;56qs8WUELk))XYRfDI2}3g+ zR222qSbDVv-WusC8lTx-e447UNpRq0@u$&O5ORF4dO>W1YKw*;yM&#JgtH-36NW8a z)LwUnf=t9N76U9&wr<-AzpDrj+m)C_QmC2zirj#eviN(>+h`_qHtH4R#?z~q@SU9P zZ|bc=Fr9m!M3)H1%jy3iUP%UJta;30Rxc{WiWwW z4s~I?hr?hIw2OXOc|hsS+~iZTifXAQ_mB+zg!Mw}Tx6dvrtq=#=2X^O3ad(x9+-NS zi?M~YcDBbAo;0iqIWEdgIsYb>;Er4Zo;=B&!9g9eU=~c_ee8TbAS{e7pVIY7AFQwrY%1ER$9(e{4&cO%;AIxA8cRqLTb;ECIn+#=B zZPyYRJn*QCY>a2P7!)v$MIwazJY0@>x7ED#Uh9V7H1LVOt~Ld?35_oWH{x672MltFnn1&#l&~WV30F_*PZ#jZBX^jhm0f@FGkn;?lMf8+Hw?91|Q z!`O5D?kGQ54Nn$P_(f~_WMW9BY?_+)^&iWQNF=PJnuo~Qnp1NuV9KYtCso*W(J z2-yy!GgaFHC`@2F`^dHenL;!tCEP}*e8aiB5b6ebhM$So6F^YY@=cV{%-6{eSHRQ@+Go^!z@+gG<|xSj zzY8}a=u6$q*iumN|D)_yLL&o=ClrAD>XZmdf*+^xi!B|QD|g%_QX4Rw!v`sXfh2J||3Nd$)FTs9^AP~6EK5X=f1@4o^itb(pR zpZ^QF0&|Wm2XZj50lzHsb&EhgZ@?F+rLsabZ~L#!%`^AR(U`!_oRg4)2SUSq)4)l{ zc-_sjut9Tzta9#!o4e3{L+h3$Na=xL!09dSAA=hA4B{Sw-);gEZ#%DDRF~7QO>+uY z(7Xpuc@N{ir!som6=w@}=kLy36F#gSmHu0@aT-SoB-=)~DIQ+LzPtfn6)Jt4G%cg{ zZ}g#25H!jz?&PnwJWDw8LTS|$Fz8Z%+!i=JdOF8`??AXP^+9-q%`1vWUy7fe&l^!5 zDZk~T7=g9Qa!xvZ>mP{=-WA){GD$<%%exw{x=L)1QP9{ZW#!F#Cfdo}i%awMDkt`4 z$P>C`yC_<)^!OD(d#!oY?5BO~YvEtN)U!8q;wj&}B2Mfx z0)Gdb zFEAtg8q|AyXXHQIO6MN|cU~aTs-PomM+(C_8;kzP2Wo3<*$VH;pWxhG0ev^*U*#o0 z%scv^E6C@@YhUyD9WgMrNuT>`tJI_E4Rq)vPSvYgNy!y6Tjq!+57RJ=Qw2EPu9Uv3 z{%L=oRk3qL-&2u*d3ZG8?{L|p=TY;H+PbQ}tGz2;^J+0L46*og-VF{laV2#|cDmL? z4LqK&AZckni-H@$*;X~L`YBK^n!^1Rgo4`}Els!06RW@CqgGzPOZ^l@cX1DkbPsWU zK6jkR5jW(}3GCkq!J%s~yOKk*Ghvbcqd1xQDq9NPee}QK7~Xv_K_0jmnmw1&#~-Z5 zG2QsgSHB}HS$Z`exV$qW-s2jI%s5@);ty0tSN1fIr^SMbU=e zmG=Yb<2su!2kt}YF)i-9eHKP3^zqfB>L=Q>FS)e?j^Ixbb7H*}sT=d;eV0BWlHWgm zNJ4J_8GHqOS9?Sk)%`7F{vrX<^#3f;s^~^xe z&qrWT4U6ZdC^)TG=#%d29&>4YyXq+F42qZKESX;?&b=ldunzpn8^P&xPt5v#3=SJW94m|xtw;Ex3gh!di z!o@B)eb&n?^GDvhrtV9LyQc9Cd8>QF^|RSikM+spUyGNB#}yZsamQ!om-5G_w3k?}_aCiX zMI7NBm_JYUHiMVek2a&1_>VWkmtN<)2=#B79+B?iU@u!{c$eQebqu#1M9+LWwnWa- zT0IKyiCaAi5ZisY)|3u5L#n$5H{z?`Dt(yNJbT|5*PhaSh}WJ>AAPL$6xUy7hq>0F zXG-#4{2FP`FNL$vE<>#5aUI~-7PkCg?0H8vqBtkN5=$&=c67|#ldo?aLv3z^IJ7SM zL2Rs?Uy5@XUR;WDIl4U-{k-aco?$R(!Q3cpPbk!-Y8Mi23Evo*v7hb`5^Gs*6#}$y zwFyahNL}t7$Ddp(Izz2?t`UY{y>IGAPbs$3&pv|Vtp)!v)RE6uw&K(wy2>$T-JA%V$e&1@(9TfKV99{Zpv`b$n`doci(x&o zWSr7VdA2wVpV-LoVSBXHVMVrlFh?>YntY|X^*qGRpk|x4Fc^L1yxq#sWOFoo&-7xt z&6~AqWYrL9C(^_r*rsQ{KGl#(PIr~TJv*zS9*M~hnmjOXdTn$0M&ll*&t|(Dz zhxDhu;=f~aW|q!o&fd-~V~PqH><4SWjLgr+1Y10Q(|$g5eD}fDY^C~q7l$V&xZ-SY zY?2dtoVO1dTbYy--WE$&mbMLfnQXRGb3E8n&%@cNJTHfEE?apKhw4*VELRVZOpFLj z(M*|5Rs)5Gd5`N!6_mQE9E@2lzQ>TnKWTJleTMo|G;4hA4}Q~c^)&weQN)PvzB*ai zs(!7`Q8`?#)8;*fNo?@EwD!&RsK(Dar}pxf8~(CDr-2@^j$zmPGooxwN5GyVXLrN( zfc%&|Jg0u=$>dpH`$=Ke1FH+0tdSn$bk(HY)OyBhZW~hr_^`R7gC0?c)3dgNoKN5#h~ouO!ct zv96GvAFO|_y}*2%WIe?H{{8W-MxcK88)<^)W8=m1bZ+Tm)v9U-b=}As`6a6Rhtp%~2G@DFnTN?T zU&rS9{cyF=xwrc=TZj9VPPgr9wRXqw#p%bN%n0nOSF0miKEt)=+ngi8yIs-8<_T-u zmu+I=FI#m!BcgoW9YtP}1MzEw8PK_Y+vU)Vo<{v|(;FY>Z;BfqgKsb0EgwEnFaEw) zD?Slh_}yt+?;RJ@_8-p=zndL5y1iaHFE~65J(stx{@&_4uHTn-yq{HvutI& z=N#I9Y(CW3Upc&7G>82zvB&qZAU)Nxr)|ej%a*du%5tvpUp*n@H_^%ErlEtP3MBZd zmW#W6&L07mcEkf(;OPj!+8h_w#-$Mr#rd*Fi9w1sjzEHeFM>pd;S)#zpm_lE!Yc5D z8r=~t(>#ZFMkwG$+2LFIw)#sYGTU@;u3zxOY{J{3V)3wX#u|qd(}vM1igxQ zwI}fa(Ix3Y+#d+t6(D&8dlY&1Awfoo-hzInDR?3o9SIZ)eibDlrpE9HdBy#Moft7~ znM;9jeUu9{{ciLX>IrN=59C=FfV%Ypy&WL>j+x(0on_x0Pca!f};R#PtXFq0FIO8kQ3lWIywTgs7_)?rA9i3 zrwb?(gan4|FLvlKBE`aXuG1}SQw|9iy3GgV4LG0+rhqC}i3uE+V&O~#3+2@4R{lq* zw_@T!DDR5b{8xbAmr-RPX+Q4_@M9VM4Re1_(t>(wlqNrodYVA8aE@~N0e&+QDtLT* zXo&i^cQY%G`tB#hGZ%n3nJt(6PK0nz9I$mk(q|eAowptGcUTkcZ!gk_q3p2_G3g_6 zDAW_gqW}Q>aBZ+46zXny5HWPvKam&(kGcB$DeNRO;5#MYRwEIq0QT10Idmp8XMLX? zNSiow0niWU_o-pQ!(ilt+Dqkv`B$EkZDGNcmXmd18CFi@pCf021%*Z=t>mFs(`LYY zcpvQn_~({%vQ{Ap>E56Ph%R5(IamW&DOxe8|UUe5*RBp0Awp*_KfCbn^A^eNE)lY)#9A{uJZSrr1OnOpi!yRK7T|q z!SgXCU?^m%J06vp1+qbN@7Kaio znHEnb7vg8g8Ox0L=CwK*YF%cFDX9hJ@2j3&d#<0z5o*4jrWr^1C4#5HhSHd)v&9?M z7_xCE8X1pi>{X{j^r z3S~(|$nG@}SgDnl?)Og5%fdog68%zlV$$y&iO$(2t%dx4`?nMZ-vnmE71fwLc0=9+ zjT7M05=%A#(5pB9VvMz$#LxiSP`hdG70e9J>v#a`BaGjyf&KtuSnEb-j2sEMmM^r{ z+jB49lwYy?w4M5N*LID??yurBo-vCx9XsX%rQYIM@jzzr)p_);p+XmDv?0Vtt)(^* z^%k3FRQ9P5n|1S5oalzXn?_h-%n*aLW;X`SEciKwbzk(U5Db?-Ksl^4j>#r5YM<#e zF=01GaP;^&#((tXk2pB1#CsvE7rG&WT9YS1Q9=7q#m!TI)`p2D>6dIe#dWFSNzyS= zbgKJUEX8UESd9Wvrm%`#(XVNZgO#y2)ZUYO<4EunO}yGoK6^*g`1>;r;*tLWmr zy}dPBFw`(LTQD>P_M;Gq?od=gV0B?&H6fz7kYEiFq7nne3I?*~hOJntL z;IFB=gps<0iMoWbx`e6vu#x((iTZF59ZcLx%-^^yh;kN(IRnMtnOJv|RtnzTJUnZ` zOL9bY%ORQ6O`baZ%krD6N(poxOYRZHCHNHTDT&ExB1v>%6d8dO1`yIB2GW)mI$$%fvzL0d1=Fh$y6#(GYp-_wY4$r=X72+GW&k_qD z^Vmlo05gp?mjx-QaO4oPV-zVU#L(gZA*sI)LB$?SQr>2DCIZUH$&=X5P^WR;2e+1m z-DQe`LLm2&Z#U0fQB4+ou#b*L^=s6Z{@ib-@z8 zvD*CJ3{mhTcnC$C`-*GTl9uKf{&RF`5mvgOpn9r$(QR{@?$@5KHxH7pgs4X^(e;U5 z*XFuq{VFdhJw<~KcdDI2<_@0?%O51Q@E6U(Jb#&3)Yj@IDilMQqsh_j)pm%hb=wOSXVtNXvqQR(V!!vhU+fU^3}Us?k`7WK3w+t=f^F+JzcQRWL~!81qI4p%3CBc|2C=cLpG~4OvGJ z@u1>F4>t&^vp9{aU=EoA+y*vUOH__h$DF?UpqD;Oa3WhhQT`B|XyW3`9dj358BBeu_VO}tL(`^i{YSkPX0p}DLK9Mdps%l(+c})PG80q17-E3Tar>Jfp0wn zH#kTBLThdF5pj>)(iJr&Zk~@=&X+D@nX?_KG>F|IsyqM}8krONdlN8kr0^mPOypB! z59?bhc;J$getf)OU4MQ%=FAwzDV0RBKc@vuvZUauhxyebSP;^lI9UEFf(RDPR?Jxq z%GP%SqRJ$apbNc6hEVvMvvX{z7vvF~u4IpW~6wGUWm15&pDB`FVfj;ye2 ztBIwZOc7**K#Vg`8}3%w(7TcF6dBW>Xn@LU8l4%(Yf#5QHFL_cmvp%5;Q{=HD$n4TUZ2#+cFtH8j6S&Zc|o&SOeyfJNmqmf(z_H{x!x zYi(Y$I@OF!vv&641@)&J!%bGbVRAuNjIYw(jW z3yEtp&?i0f*23i0Rp+kJ)DWv#&Sa8YRNzlwzixZh%zi%8D zN#fkz!%T=s+t#`qP}P8b*+J(pIo5B>Nfy9^- zZ9^qO7#WuMJNc)oX26#>MbLcx_0jRcq6R8s^As-%GB2jI4&p~*3EREZ67s|_5rzQuE z*mQ1V3>0>>rEh*i_DP~yv`v=_#+VZ{(=dAS@D0M)h%<@0%L$b(411>HZjuB&hRPlXLI7-fD>E|CMqf7wIDgxl()pU zS`g|!MUfYpO|CRo2Sy;2MUN0}EHhg#^=%J$M(erm3k6=JW0f#ZQ zYLwWo-dReQ!S%)SBxt6$K6-qrh>t4rf*s)Q-7e7FAq zf4KKmNMj!y^70dtFmW9oLGg=Z&8#r>^W<+pR*xEq-WOnF;jCZl!i05Ox)hnAE8QRr zr(E@4a+DQ$BTJPgs5oiU>M;;ou8|eoV4@2yzy~-~TNrWjN4WRm5ex6ETM5I!7NndY zKk+crYX$dJ)jtoBIVy3+(;Wfd{KUQkpkq0LA#!)7AS-WcKd$ByM3d+}$91rF+>G(92P!jDs%XC27ic3`WHG{ZR z8$(M(5ONzxfxF)Xi1dV`R+E|{Tzi3J3-=O_4jma?Dcy_IafgEN5Y{&v@o_#h=D|!d zEg(p%tE;M~ejS$=&twj|y@Of6g6mwa>}B8Wa&9XA`&#h}AhL-Uu6WIv#+uV!Y=BaO z?Wuj!zmP_;3Ev#)_R8fAYWM94xLKifKt|7;;lcDt2Aw>*0P-A#tn0+2>N~SN2|cJZ zQ&I}jJz4(OY2lx1ZMY~`cQ?58JIKdW#kYL4tI+gU$cxm%UOU_FLu%qFZV81I(cu>o z5pL)93vk~Quk`5UI)AWJs0QPbiYp^TwdIh2^NHx+morcPefFx^e#AxLBZHD9K++yF z!y7>`f_0<7JH^3Ff(jFuHDM%8fSK4Y`C+pPrHUm9f}cS&U>Z;Gh{Tq&!2BoF4Z(2d zV)9QD3jY2Bc0l;bc`3~zs(p!cYix+6F8Ap4USPOb#Hu*ux)D0SsI98=3PsCUeJt(U z>6bi3HT=r1X3WX;fwih$$WA$By?y&s#EKHeXMWVFn^ogH8o0uN`XC2SMhKVd9VX+J zX!60=^^y1po|Ti{=)7M)Z^snQ_}w8ys?;~m)XPILX!Xyzl_@c{O^`-(q6fW0KpD~W z<}1@{sul^fh)nPMGeU4s}0P(O+ zJVdmSVz1#$Yl{5yDz39|p`|v>Uu68r^^Rj$n@3daM6t!ukVEss91A-{4vR8|r>38)wWX6^=ka;H4Ps3rPs7Tpp4gJ z%$sVy`V%^^pnRC-Cz3I5EVm!0SAgNJ!3;Sds z%;?qlkgZ+y=A%)9cBG7wxX;GZ7l>uV+tu&C*NIAgxpqjHDN4*)BK^#@)s*`)Uc#V> zyJF7t9?A%Qqf_#y4Yo>rC8%f}k1*)zgyGMIh!&}>a?fkU4zmc<)eq+`@JcAp5CvuD z2xvJsIXqTNXVJ8TQskRLK1^DIuMQu)?{V);-WV1z@j^F#37x@dCj={IId(Y=u50j?ssK^b+8-rSF(vhiwb77p zE*=_4f|YU%>|{jLN`^Wc%i}OzNhcE)2}bZMKUa)?I~EGmLTUJ$B)pB4Bazd)>Ri>Q zg|fqF0mP#&)pxW8rPicZTGtj!D%_F9e7_)HO3@-)tCSU2(4fZu_(dMRrBNl!?(jQy z|DZ{j0yFf9mm9yO1H^Z$V>{q`?b7|p%0Z&k>Go~baO2Dj(YqP)X=sQ>-W>^S1AkBf0xzT=g9 zW}!7p$4q5zrv23_ERBAdA)t*S#~N}BSA_JO%|JvE@MSX)Zd8GQxM_D!yHjR2=ZSG1 zW*_18yWc#B{&!QK?BKNO5N~r;FCE%ATbqR?3hkKa;n8oZrGVnOg@v8~y*e?iRlf=v zFT5Xt=osY8_auxd9Ezx4Fr=4E<`E1*3Ly;KvAf7{YG|~ds+unJtpe!JNxa<+Sdeo4 zpUoTI*sE9&absegEXUUjjj+eYlFEwzieQe@svl^{(J_V!R2Dw(QpQq)5K*9{+V9~J zlKC5;dCtdHLZ)Q4q(Km~^g&{XLvHRu)C@QDsVE>)@`N)rLr$s;)m|=p#j6*q>|PP9 z)nk%o5zWJr6<=lz` zKSI`qgWw|#qaDl%!4&n%f)*gP%`LUBzG6%pPO;z|2dZ3V!Iq6|;ge&ZVlm$8h#j_w z)`2NFd_%@0sxm4ajX8(W4DzKh=>RQz$ZanSWI}*`^2c|`$*^!-cR2%Q^{2!t+b?-B zEbpM*x7cO)=ff{e2@9lb0s)CDEMEzI5O+DB??yxy(>vb5N9rb8HL6~Gg8)OYy$j#i zt)q1FcR>Nd| z!4)A~J-uev%)rOH0F;fzuy-0Pz6%v#Qjd1hhjh)w)XJn5!K{{NaLP4*Wk-{jkS#nh z(pI`tWA`V(@gUGNZvbA_5;$cU;f`PPi`S8%tgZ!58>$#R;g(c?t1Q3gjixA}Z9^>mjKunKN3@)I(^c&F(=Uc=0Cc&C>) zh1crjS!lzAXb2m=Pp5KN4w8)Rc+l$6EK}O8L#s)wj~>Tgu$9{cHtCIO8fl$&wx;{w} zdC{hpvawQzBs*kQPDAo7)A*iwC{`~il|p`2UAw{z%ZXsQXQpOVWZ-X;aUat67 z$v1mqvGIOIYcr}Tk6kl!%T#wmtPGE%C50Da`{4|1^phB8w^9;1LvuotG@D~IF=?X?XITz z?zctm|GNIz#Y$hvN>6Wla#?YDd3tHnv`~pBEoH;NFbhUOY75#(mdIUi7C$LM0&0RF zWoJ696^OEDjAFqYi;4p=z`R0m?qVTfRQo0Lf@LH(BNzKRKtmlqXnMbVqN=+3uh8yL z%;RO(mAFnJy?3EAlIV{xe5!j#38Hpy(g~1<)4_P=n(L>%RH2v-gYkuqUnDc?s$BQ} z6(AFYAgR?);SMiuBHaNGcXf5k9_z{YYr@~WqbbOne)Ddr$}fwwZ*V7{cLi+wL--p! zW1hX^{jQi45-u^ea9?_D;>W4wRAboqWpN`i=Fzo4Dk@r%fZ(!UGc~GG42Z<4FrV}Y z(l35W+-!gnyyG|OA=FDL5L_XnQk^Mc#+>_u5Hxil&6`qi$ zbh$+E6**93lv58mDT-2>L&y*lVgg>+AF#PUvYjACF3K&}Pn5s*w9F2QZWhCRRObkwb&qX{-W8>Mg$oEO&?P=Rq?I<-&zg?&Uy6pHvX4e7$tdqCT2 z3FUk482CF>@0oz%h%J>t%1D~(M7Y^B45pvGN9&>}OpwZ7kmUS2O;rD9!ARJ2d36~k z?=sP8{@ff!jafY|Wm1^%ZkBY_W$v41*Dxx!cLBNfsq%^% z9DgXoL-YV|yK#V&N$w-sqkik$K5x5CnH*E0DP_(WYdxl-A@nLny^7j}qicd^bovzk zSKHLuYo&|Z8qzDDb&RelOEdi$UParAo%DE^8Z2_Ggu>DL z&V!vtVi^bc>|Oi8KX$z{1O8zCu6BKZy!2Uz28D_@#f)rVU*V&Tp>0Ge7GG2szzoAV zh7820wHH$XBV_L#!YQtGN=QfOaFK*ZI7c|O~QT(0yN=ogPp%KBQ0NtwkF@xbCuH7pH?q1 z1~72XMxiS!^lO+3f-?$WB@4E3h(p{C0zxJ~0$3Qc_YPOy7^c)c3EfSnQcmVvI>}^m z6f4`gn@&NI$1R+}P(4w*&AAlmEV9suWNjz8H{M^w%?AwA9^j#a@CcUfePvwbHB~OL z7_;w7AHf&&K^W;SGl#eta13HhhEceWdq8erKvUDWI^fAT3Kg4Rxl9y6q?|5e-GY2L zi{FM1=FB7WZHMNE4XlCpxqkhuzTizg!!6nCB=(<)IQLc)bO^IBgkQA&woJlk@|!>c zn7n+2*m)ay3({v>`c2!}JZ&k@mwX#++NlX!^bG~#8&2q=tp+F-yVdc(ag7+tpUVJ zh@7&i2M`+Bw%mxU&*a&HCnP&!nW%2xp7THlwUpRL+e{+7aWAHD5QT1MB8no2+S6XE zrL~AfF@2xJYct{O7&IQgyM+X<6>s)Q5Q5)b$h^l4EJsziygdbhxzFQLna07p6q;9+=$aXnW)F`1A1~ zIeQjV8x6#Yg+#6DDC`qVF;(v)rdj88rz4eXHK&MXY|dRu9TMvZ7kADpB3G%Cd`#^Q zNb~c4!edm;$UP=d$v29@M)_v^Y2heZ;--zGrbY*vAuCF)iM;SXafOv!6e9W;spBFg zULd;0IVb&%53gSy;}|}opD+~PVnR_VoDL18Um*4_X}P92Z0YOJaC{W@tru}y#yXLD@_nx`?tlc;uS~T)*#UzeO~~I} zv}1oZoR5u_OjsPxATq($3+q;p>`+*7Z@#eP<=?kH|6Ih`a{HS#;@#TdEjjGAtsK#4 z##0Jgr@7|fwL?P6V`8LuR44K!WcBG+!v}%j@PH3@|3jOBywmQ2r?KKKnMn2QeYO8}@O zKjbx2-*jT*NxBJ&_y82Ra#mSQxt(|%&a0oUJC^$@7ko=o=!MY|7EC|Pk+#sUp(R1g;yfA)5Uc-Ma5>sTK`d&h(XAA zLon{LiKy9KBjUXLgAkpy5KAlkz;W{^udT<3(?R}Hm9FL2o#3QI%u>_`M^SwEHE>%p zj?~wQ0o-RnoLTCCpRo49n|jpGY}f0ejL3o?b}Hi=sYg@a0OZVpkU78^*!Hr za5zqznj~x&x>ymnx~lYu&S9n!l8VuFv5~?X&(Od`@I|&4I}bX!Hf6DyNm0+)zrDe5 zog;O6i`1~zMN1XRc2~!!WZ_RmXDZ8l0#AnU)SxLm$yU#xpLv6!8NV^5w7(r76oHom z;E24l%@_2F&Mld_ApqIE$mftXlVHShUV+ z#$*`Q7>|U^q8;_Y8h07;oAbzW)Xsq=hX|gT0R7ER5f&m#!rk+P))CH{4zk)a<;KL` zm22Uy=k#-Fn?S#5?V9bIr1pJKJ3j#|0qIf8N#>gxWneP#+;3{$@({77mg_G@$Z~~f zF-y@^6n|m`7k8%Le|PPwx5UsBwPmjZ_HYj#gm!Z+SOVre`Xa+j`E>mVXJ5sdz)kx- zB5E3m{A!43GCd~Cp^5kBB_pkV&7Jfg!E_zceeJAq{`ht=a@TBtKfqf2vpFn%ezv!; zzV~&=&y$N7Z_Sp>x?(Rdke@Bc+vdrS5mt;zU!FySZ|XAM1VqYn(to(?@x1ha*7&=# zeas~Bj1!;#x(;J%Z`_;i>_d;N7Jn!GfT&MSxRI%9K{O8C3V$o;$E>J}7Ng}FnuL0l ziYf?ZJk2zrl2!c?1q-uw{ChTT7JAda$k99D8+~Er;UNyajWT=t~(iNWL_y=c{{>;=iI$G$mC9!ZYK2qx(Xq19t zKX5S*m;rc0Dx6=BdLmXtoWL$@Pe%xGvWIPxsBD|JVR8>`tH8;(3Hu88Lg!!%!N__; zXe*7y^6CIA5UQby27dR8j@ka*k0{-L?Q^CSo&d)urF=Qu4_zSsH6;c(%td6le=yJU8M6E*tY)pDuK7X3z+odl*CW?&JgSMKdD<0cKmo@B{Fc2QO_J|$A-V_{oCa3sF%(_VL zg&NMO!5=v*^`=y`s+}A-3HRX&gj0>KRcisHFi&!UDF||f5Yw5F?DG4lse1Zldr96Z zdtu{zFKSWLl*;~B8}V$t1pz;T7a`2(de3e%)e5o5^xm50)f5P*VU#@Z6 z;No%0JN`nVyk||9?zq6*uX2?h^QhhP<7S{i-a5$;@lpRC%J}pPWdnBSO=AI`&7FwF zP`WSOdU77zqncWWg4&yGo;}E5h61H7U#KAVmYgFhz3~nV`g zM2DAbG(0$2N%g>>I@FKA(-6h;S(w9SS7DM|hnLO9@^2;{+;GQ^&}J$rb@=cc=7z zb+tsyYH8rwvl_T{n7}hVj*6#`ouB{Z74)oi;Pxlt%x{OUELm0({mO*seofua!Zck7 z%Il2*igSI+7CjhnukahG-3P=vjobiZPY@a0`B8SAi$q;H&Ytcv!;qsYHpkjEGw10X z1yi{)MF;X#NV@ZuE=KhlW}47wE}sX9T4sOw`esA{%3CK{sL<10HUT6AubyVQL#LSM zvB!ma!qShZfRIf%e$%}Lb(^YIy$<116mF658Z8nt$mg4z9O@# zV1|XtxC#u(Xo?q=YI9)X1s~N~g|e2~V~q`mv!8X9zYm4u-ur`6QUrWzcOc8mL!;@e z9^DDSdMv@82TESJD~eckJPXmdbHv3lV!v*E#pN_l#-w=nb428?h&Ztf0DE}DLNM!{ zbdg<$^FYLRi&|qRG%qYy_E@+rebWxuMtQrZrh{i(N4PcOYS{;ul8taB~A%UX=dWEFDsg& z=4zC){)A^G`c07zbboEx-9FyM^_J(xPs@^WHTX!Es@8WmyY9!&xPPu9|4QhhtnFV5 zCzENrGcZsfca|C$<%-OlI+j8V5&kW!bUlT~A6;##`n#+)l_}U>ulg70-jVy)Gik#> zWBiSb&53?brwJ1kkOC}pf*p&B6ukID30HO4*4+i)H1S=Du2ly&s0LAMUFO^`X0aC9 z_qd99REI@g?GF?|vFy$q+WxAx>^j=Qm*$cB^Wv19o*JiBy6V$SuBEnPxCY%w{7X}C zL9%R{Cu{yIdW7hm_jNw}G+dnbdNJrSxDuMB!}?aR zj9Rg*SW(s>i52FnphDDj3m|T@r_bU@VFmnO082o$zt1W?w1D7iPEfj|0}B+|u>ATQ zSK?~0Q@h*@x5}M~J)bM{WkD0QqE^*vTGMD5Q%2Y5YctyH%3vk-d{LHU3N)$b=eUm9 zu{y<0sZ-`_e4TIc?Mk=OtISkv@P7|-({L~+5J=24O;HC(;0c@c%eO;=z%xcO z@cyz8_UgsGkL_JpPzxhzv6PvWyV%t|#gy_|9fJd3R=-Yltfm}6&Zc#%l}iair)FzsELlbwiVQzgD9p5d7d5z>u6xPnhQ}jq@dq(xFS- zw3~5zv(vMXFINLp#^tJC^P7IlpYpq42ntEu6iG#?Qfh868c;|Q1Rc~Vdo{0KX;s>d zZbNfBpM})N_QK02bgtuvYO?O^xtV$Q7&_o5#j7Z=s8`KimO5|%_V(JP%k*(8eEZ;o zpDgmq`|mVWtGy5WvW{-M4WS$_6I2a?itocdIPyVWz8mTEEU%HAA#?yA#E~CxCU?&k z=0XvUmctz(#JAN#IbBJ!t=;T2dsEX>v&}(s4wh;$y_9yq0#B{2>|UXj_Xb&xmUD%C zp)ymQY0NZdIx|x<{nQ{eO3kOnsio8oyb}jBD}XiJX)o{*KoN;#Gr3oQZb`kF?UOCQ zzlEl$H$-4Nx>cI+6MDR3UDpk)=uTZBs+JDD^n`E{^wAHeCHavz znP%ACqX^+CtuPy15?v6c{KXVP$2B9mosP{t`%jR^p992Hk1N@$MlmeGGSK4JVMWsH*do4z5gcm_^9t!QT4=Ms@fg5N62$Me^OI@k$WCENIN;BP@(LYHUrRc z;Z&H)q_U|(u9&N7b*&||1vWm%7XvYia;PpgL|Vv?Q`qKf_1E&$MNM6X-Md5y>e6od z`hJ?U*O&2AygYw|mk%!U@=^WdPF}vybWZW|!b$Uz&Di`bSd-%7a-x*cbNTe-zC;}l zQWO<}!QZIhJkb!w)ui3II+~>ASl#@JBxDO>+UBJ(uO4vMRG|@#ytx!ov^&Qclp4ME zXovV~+ISk(+ZM`Bq1vJ)@ux}~8l*sq)m^|#Kjr=uqH2a!(AHu<%oZAjsdl#=<6kaN zg2^fPaj z-}fNnPY%S#zu~?E$ZyQ;i}Qg-+oO~~sr9Go-TJs6^Ct&`E181G!t$Y~os4 zz$+KG*6m%z8*6zn8=llu9r^z0)2+(hC2}>JgWN8^Ec&srQT^xwR+aHWyI|&KJU}kN*dc}nsd~i)^mGaZ`$me z3dK6GZaj@}7=_tJjwuh-#N8(8T^&wbkt89VS|R@E0lIf@ZRkCtD+vz12IqwZSqQQs z(fJ!aA|Vkb-%>GJ+qu42M#Y^)`{Xgbd^m-SA`-vE%g=mL{_WqbwcD|d@0(z^q*fYd z=5rQxyycXmIG07kj=V5s@M>*-zKC2GLu#;E?qizjMF7O8PrebAA$;h*DoHg&9H1r6 zsVfUnSm+*tDS1eICe6!l{~Z=fL#C_!Inyf<*91n~=a#wSvBpNZWftc!Ju668uZ=sn zi~D$HxIAPU8=-MC*UGm#(^J#kQm;glGR0t zxKz(1Q98;Nb47+qGgQ-*z-)$MDwM5NYSkKrGgMvlv{28~vy7>gOO<$;t6>DQ!pU6Y znbA_}c1qTWWd>%OSZuheOCR`zhvi@n<9P@L)Nv#I(DQp0EilcxsRDw#62lHz5^FpD z%|kxA?`BVYKv3@Y(dQk69K$dpgaDy(Fkgr1AJ4%|%P`j=x5%w?x5e`X%#@)kUCr0> z6p)Qa3nL)dD=^G6V6rCh1y z>bY*K*YaRL8%yiUJXu@a&BrD=2dHkLglMzfW>lTu5>Q4W4{tomS*xLlW!pD+-p$C!G;bjfp;Bnn~`&6&^2|4KIa@|xx{=N5n zTKW@S{)!*>@ns=z{}trzM6Z2XP-%0TxY?LSVHuGki8iJoT12Ah1_*jcp+s7GS#R2= z;toHvV1pKc1VPBPq>9H2!tN{7?B!%AC{ssTE#l?(zrN05j(PSah^fy)%&9}v zIg)s$ukspCmF>J=sKz|GRcyze8E^(~XohZ7n$>2b&}OKS?o;+z5Q?lq>|y(ot--eZ zkSqh<9P4PDSe|}>&g%-Z{&c-gcN-*8Pw)&;s$Ya>FrInHKQVIW_Is$u%gs|Q*9V53 zyZ#R>@3A+(nR0*c>`x$WKFjUr&f^Yq$GKN=4>6n0jECd7u?X(fPzq|nD3}HNh;zg( zcm>)ebFg+^?Sk6j+Ofj%!pXv`3IU@=GlSW|j$J! zKja*Cj+}q|{1eWr9NPruM2jx8gRmS_g0|k#HR^`!IM?#h4r+qIn5w6VGfq5`bsDp3 z?{>!w#~}kclSp+A1fm?fneZ(7{uc8MyreAk`w{6aEv3`q?1C=pp+6o^PY(uzJkO_S z8KEoi>8u!8yqMQ_E~{!_9OkO zoRrXt)y#^zdEC zD@J+$3jjjhRG<)dLo-tJ)0Y4n{TyPj3b|*@t#Hj4eLA#a4y#T4C&)Q{Zf~JtgYkKEsJ8{X{70wZRwhGzoA0+xO1Q)Gz77fjj8_Mnp$(K~&4TeeDb}Nh` zJLMU|-h+@!714FPb!kfQuj=L8$gX6vIGye7BeE&+#sh&fEaCiI8oOao&k+%RINkNg zzL~`apwS5>97AVBu6??SRC?(BwM zhyk^pS?X7Bs#FH$@@!2~(+nTJ$h`o3-eyQ5LHX=_1?*eO;fa%WG*$}sQhti+yHxLU zSbvqKvVtF7b%QmY(GMz!;;y6OG4TAWr~U{=j`Y zdL0_THC@+LJFLz1xE;yKKNtr+*U<5&d?i-hJ7I^N{f9ikw}h4eMz?A0ivaHv zVS!jP71(jRVJA6RnF!32Dl;uezROsY(mh{wGL=#u(& zj?(jW`uSNVsw~R9{9muW!bE+_NekfT|DAh=W@hm9Ja;hmaoX`9>9m=`)V+|}I3LCw zhcX{}<_{uQh6XW%(?q~xGEaTmfLy#;7^7J>OlS>Np2pjN;$U?3!UDbmUv}@k@B9G% zQzs`8>B4q<(Y@%%U8wiWcm9N;S!yQ)cKROJNoML}TrqMc46LrTwAfC3Q7D>Ct68VV zZ|SH!*N63%`M7hgN`uF*U*p)0;qcvfw@ds+?=t*qTu5vEuAagvdrXL=N_&J*Qk7S~ z`W>0ja`?&_-8$;)kzF-BWP7=tvc&6>EAiJZ#&tOdaQI6MhlhZ4I*I1wa1I|UB|<2~ zl@VSNBnmUBECH0TF{zAiKX%7PJ-?kt#j1p&{_)L+gIn&Bkf5Zi_-1^^9=Vl1Q7UVP z(!i~TwX+3W_h*1<3cz(cvG(+2 z0k|L$EYV6?X{%VH-3X0ZyVNPg^$ZT!pvM>ZYOB_27CW?_vFb^1Tb9y90xx_*2$kQ! z2)ALfpbzn6*s^%F(CZaEJ8-cZxpnu@DLlq_{3cYC_&|08`Z+Bqq@M(5Tzy3$3aY)) z(>D6I_uUuUJ_~dC9}v4^pN=LS3REML44L&I*~B3p@d>sq1r}Wg zs+cA8M&0)P>4gRAy!U5F+G}!;KchR}{|?268Q4?Lb6=s=L(XTP7?gAt zDW2{bp6NM2D&knB03S0oOQWCa7Z?luW`BtGt1#O20dfFL7bq{u`1tKjw zSc1D@u~15M+zB9^Fr!4<7*nLatv8&4_sQ8`mIANZEs07#Lt80wxy+sUNX$B-bb%Q< zH9gb}Nx4MXl}0vBJF}uF^=ncC8#+-gAalGtO*Yg+#Kk+O(y5?5P(;XE!;^S-sAH8Z zd%6+nK}grj2Tt+=a3de`$_N;0DcYTx%)`~H@C`_}uuOgBBecJ& z>8g9rJ?D4M`TfpkL)Zsr4bAL!x6_5gkRV-8sKi1|)X6#pr|3-L)3^!|saCD`djr~A zX|==B{_}0V>XM~%((4X$oAGH0g%pHHt9bB5;qdUKKYww5A6=t7d4 z2V)`h@!%QG9gjH!WfA^zCuYgSU!#%Ou#uX*$h~SJ_T7_GsmvW;#cQJCHTgf-e-+7! ztjD%u9|-Hc%Lzbk;03@i7&brkg^U3;TV|8oQ!7Bp9yO4fg$QLm+@z41Nj|%$3bSU4}Pmke!TtrxBMNrSw?NeaMYA}(S()|-0_dDrkftc zVK0hZ&F2gwtLquVi2VCsVE_InC!L{ZVcf)yKGp`<&raF3+4^jAwlyQ(vOv0i*gzoB zb`?)aDk&we6qM#lYh|=DUNPY?O&0UT`gCKuwYt3u^}+;(D;PXsxOg^IzzABz%QIQ_ z#d+aO>c*c+jo|y+j-DTt{-M6U9dN!<`_c<6`;e#YID-U=Q|uf3foa)2zJ@a?Ra{W4 zn9BlZC6j5r=hha!%_YqM-W+%2BVn+sBdy=WjxB+BBfwdnbP*9Q5P$7j$L<;7d{h^& z7vw~2EEijg?Z%E{m!nBdtF_fmy$jXvHz9o{)IuAoPqlu%583g&zuJeKpW^TtgVtE3 zTy3v(=m^f`*9y!M(N)h*5q&Pqk55W-&sPYFZZ*mSL+Z;vl>35VPZR18aBzPj66D_>M zS&Y+x8{uGo+Qk`gG3vfF3$4PCAMqqfXmqjVw{aFU9!*$3O465a_jKdl{!xfsdwwJ< z>K&&rI+KR9DT>oi^V?a;iMy#@Lbj!(JhvT&=NIs2{nuZ27J^_Krj~GKiac;vFl2)g zAbnr+c>Ku}VO=s6O;C{Ez;99F@E=_a_7ftnYS(l#A@hWvi(@3r!Ei!`=r?Gwd7=r9 znAofa%92g(Ocm|z+Rn&A%?I552Vg?)a<6IoY3nYRd%+vg{EBy@7|&h2o((ZDV9B>) z@mLC|xSMPJvJabutR)6aolN@tg6Z zbtUg;H0Z;7BJT=Z1Nep?TnMy>!hQZ-xlN?VTZM=5<5yQ!Hd_cyS5cr((Ls|hAE1$W zcQ_o97z#Xrw=i}{V2`*R+mFpc%(J?=v9?&--rU*T-3$@0(@L}&t!8U=XKiP9vA2Nt zr??K#6YI4GKFi~3b@03rpc>}?XAkyCL1ZbZgc{GXje?K)2Pw`5mAR~{Ym#dDwj(}T z@8*UAN0;)fVTwUXHsw+- zBZ|T3k?e|6!ZA@?lr*v2L4F)>ylt~GyOp16uRKW&1}8PEw{Y%UjU9!EV#$F`Co@T? zbK)I)vzRC)i1~O+v9m_$6sVLliEM(@4~X6fwlHG%p5ctPS7C(OI{A1@KHn#g3n&W6cKnWTl)}klE?PlvV#dpkrhXAm14e>uhwhzm3%utNDLFuD}ds0v(chGQII^>2R6%zuzKX5 z?pR3N2K5zr_D&ya>`6-8RyASwh+!cI`{*YW&zZcyn7mwBEC{gK+mo#e2Oj4dahq{m zBq4luEmX*U=+_vHr!xkc;g8x!Y08XxUQnBR3vYvkarxcQuab2Kwi3p7I`oKv76465 zLjU)mEkcqu+qFw}*>2e@Hr|#ubMZVWLYo+`z?2&g1(~9Kj`jO??uNL#^Q~b%KaA7-8cLuIQMR}MSr^QO?JHd)oXZRK z=Bm&2G%gM)I`>&ezCC;xZV*GbLMlj8GYMS=7i| ze=;hRCTSXG?Enw{f8sa?`o-DOXQZ;tY%ALv^+&U8=rc&}S*zB-4wvv+#%oTI);NU! zcg&T-&i#^Qe#X8|J@McfGA+d`=vm)NssT>?90B9ixrj_(FiGBFX4nL~_H2Nkw5OA# z8A!#H<5q3Quq-z%vvk6bdw~-8e;;7aC$YVK)Id#~a2sJPH-GYD>>K}(XhMQW>jw~; zX#JL*3|S8h>ZFaaoM{m<0nfCD%%X&yj+*~A3hXS}#|x~bM6W(v$LY=5QBJ3Tt>?TF3}_p&&rD1ejZ+&bSj+cOa5O4$R?f(mJ~cmn=0sY5 z<0tcTByr@s_9v!{% z1&00T^xD@ijO-knZ6OcYNR(yG$O#PX;3pnZ1yn+)l#Pe;(s11WaqQpWlp^%Rwr1%T zG!U{@&MK-UmBf1w92V(Yt7$c-=G8iO1B6eA69#-bXb`GmyTbYAS*1hxzL!0+=g4Cb zHh3)X*zNM%$3AUJE731Bm2(uAc#JLo;DaB0cpGUb zJqY>#4yhskKDK?(J60nln59@aAX_2hWtkrm7c09mR9=>p?&nhAu=ep)P4Y!q)OMdd z!3o?;FF6yJ_Z0nUGA*)nkVv6>2!CDsos=hM5%%4{OZ$JZe@S>%MZB7W*hZx8u^5ZB z49S#yD=sA^G1S>aLcC=4guf*d zDCi`UtsahFqSe=D7Jf{19&mDgL)}chh$aYsJ$q(t=8Rza!o*Ev44O`-8bQ&d>8h{N zOfT-myURM`4`VRtWi_@LyA-<>&CK84+Swu#f7Ux29a8_^-PxnOwaxX##wMI#?+msk zDI&PdQ=#PZ=!6uL=8)C0AuRw!1eAs>TBOS53)lPuZDW(8DT z6Gh!OEOAThGnSy}Rstjfid^N@$Bv{*c^AJ-P>V{&vnfH5T^?zsW12yM7FeG$P08Zi zK;~#{eY}-aSSpccB~i6fn$BCC%rFVlI!?w^a}sx)l+KZ>IBTSr z@D|L+!HBg>7~E^IC!y~E?LoSjDRzq8BJex)-fTD<&8D-}{#u`hdXJZcOgu-zQmUE) zINMDT)mm@TpNwYX*?h1%fD~TCr;lW-7~7t8?e2M92rO*%oK-E)*WQ*N{C1lL3-qLcvHZx zRS;+aiPV>&zK6PK72&V0<7kC0Mj2^}`!E00P%I2_Xf~JPg9`p!BI!;3IQDNzO>+}x zCKqE@V>e^3#2$s-2x*S?^v)}H0R71qjxHWuKe%zg0vF`T{g>{Od0B_oj;`G}xOu=T zjd1E=(J47sE?vC@I}EMnmtT5H{4Dmjk6(O(pRrNh^R&4cfxQ7402@Yb%J$ znhz2Qs;f&S4qiS0_rT>PVyScC_Qx0;`-Ym-0>fe+;yC#M3P~cB5mdvF)B=9fiind! z$xu{*qJ9D`?PcW89uLPC{ zN|B2f#hhx$mVUpYuyi?>jmx!W)<%A!FqMkC2#NJ{Kc^=v?*Yt*q)y_~aGeePMDO~e z*dN6TVcx!}`$Aku3MnBgk1@4PIxv1xxcS&vzWt%VnylPjM~O zkdbm31D7q4V_c~lt_N`Fz85RSwqpmehh*Oz6bG)!pgOG1s`Ki4b)zZ~=Czj9vyiJp zhZZNhWyntE<<;_Tb}y>~p9gyfQs-9{&Cr3+ztU>&9PIAz9q^~5*QJwqe*Rb(c&qQY zEX$X9gcV`vStWroNu{Cb0wdMU9+gVtWw~~pp0GGle9}*HIo-whEudJjlF8sd`*|VH z%YDbtJ>5%5$h7_XLCtJFMRkfRq719C)GF!;NWQ2ec3l+MKpLYi#WkLm$#^)jR)0Za zhJ`b@NCSgyM1hRjQh-yR_X;5w!fSXT(~w0R!i$b<{8xY}3wd3))~k`~foWK|Y@UW< zha}3$M97gP8E43YiI5gl_+fbO$&#F}a=-7xPMX@1kEl#d%R84olgSY1n&nDSDf7)9GCa$rrF4EF@1HA`)JpZVkTb$Jkc|*jugQ({T`cci0DUV_q!N z%C<_aatlt$bZxe_zP7QpHQ%1^t{tqIq3%saav}*jZE{kQ;KlUW_I!I|f3d$i-J3!H zK@M`$smxN647BL8NBz9FhjPL@7{Teu37MIvw2EKOv{gv0M}{k!U)sLyY&)+x+uOgx8WvA0tRV^p=a}{}#rF;!Rt47q)vi5g){`paa!+A#)5lmU}q(oCilc9WZ>R;gR+m0;~Qwinxn z+eh0M*DkGj5Q%GzI%zIJB;KsRydI#EC)gyLt>&ujVz1Z-iT7l0vOhVR98WGyAbFE3 zoqDI$S?Lfu0+|90bDfA0{Zt2240Y#36%k2hMv=1Y#50|&a=5f@(8|XHCS@Yd#Hf_z z@|nzU-f=$h-lA137Vi|-iBBS;X(#KvMR650lx*c9Hn9ysWaHu-eN)jt^~psZ-`aQs zGILa+xE$P30^h>EiAr)nsO^M(rv0eXWP<{)maeDU=}x+z9;Bf+sjn_J7F%oEYkO<^ zYXqlemO?y`qe00B$lIM>w>Rs}d*Gh*_1%r##p?FzPH(?QDC#)-#Th>h+@xhO>z(UB zOS0NebxebBw$dc(#(hd~7hl}ybR@mb)OtzV(#o7GNQVv^C#5`}rulp-g_kUyZomKi z)}>3A3aX0ZtA`3S%BLhVwZgGo4fQ1m2Wk z^ujbYBWe+lFF-)fz#^uMw6UdaYe(9#cH`d7doO4Yw1>iDLC5P?z&;B{>~5!buzz^# z=4}oZ692B*XyIq?diaRwHr%t!lfjvWK;-^mbji>1`Tj}m@I*(0ox=pXict{=LX&%* ziD-`PG8)67y0d3bq)t-X;kLPLht@PT?lA#zi4MbX*(3n(_gbE8m=G6=zG|_$JrRVa z`D>Qb<7h?#eq8FlSrmM^T$LTMYVL8WCHDkdx@darI*-GrI6^&&Z^X^CYKV@0>{6T& zH#oLXH)I(53@HQAQ~w!E8jRTagUGYKKji2G-P*+oK_DcAQlr|amFwkJd8OPf_sTl> zh2eBGosH*X;umx%=7ie9d>jf^2@uuQOO4V>tzGL0ePNUyr>Dc&5PI(6+*=rx5n46m zTtCBjT|!Pdnd||qoY1!p$gC~ARo_mg{1>(*>;>e07;0KV^^NP6l_y`HK*$_?m!~z!5e%Ha7$_vbQRsYI@rFBUT8Os!dOHYzKXcE8sjc19hQ ztX#~aJSe>YSrJyQkS*m(g-kJ1#qq3BX;#{mPG!&d`V zPNy$rGS()9u>GBURjMYPL_-+bi_dg;OT26$W{1Omfgcb$ac?{}z4T;a`o3SCOfC?* zd`d%BYC6p5HcFVwS^(&H6XZ%6yRk!@Azq8U5ytsUC0i*}iWP|Q!}(}F9nZ!)t=-na z{BV9ezc4q6s{m^6Wpo8WYqO9{r_$McF5k&_^P{8j(K>kL{BC}4b~wARe{r8IzA%wzIx>vd!{XS+VzYIbvPINDJ%&7oB-p5{Qyb>z=c0ZTTV(%Ba;ynH7z1V zVfZ~ndq(c{pU)?grYI!fp2!r3)@{XRltQ95#J4Xo1#-%3t9)+DM>L z1ECL^Ni$1v0o={x=h$H7WA8J}-%2KrO|xs3+RYrSs}W&CHkQR_y^UA(<=ATxi&nmL z^@XbsuD*Pg9AlYE(J4?3l1T~8VQUCdvGw7`@W!>9*B)Pcat$UW6dpc!*OJ7^W%))O zI>^0l9~&ZetuH=$EG>Be<9B2LkhQEVpM?G6vrWY1QlIH0yt9Qy+4~ zepen^7R~ckhIe_1;dZT;M7**#EA$nSOS7DA%5tT}Gl&)VnZe6^A9tC|ucJm(+bJpx`vCc+mFVxJ0k zsIk@D+L`apSqb|RX!W)7E7+oY`9*#+fA!|Io3{_{91ve(*B6ZiNNYBmTg~0(UQ<-? zZp9+#vC?UGCY@>L=-~L^(uKI9z?-Qd#_&BE|ct4~ps1RV2KL6`aLw#DgshR>mRUd-7#&&hIJbXZ~46)Y<& zW&C1wIK&&%@0ZKAGYPi*!D8{~`z!cYjr+fM{VT4i2Bw*{Ro;efO=aDh|-WlM%!jtHbG zWfD93g+XDQnPh<31Z0S~AJ9u7yN{uZf653AE9k}ZJ9<73UZPJjE0G+(SL^QBpryr? zjFawX91|HS)L0qpE5|L+p=f%p*J|m3s%tF)iLAqVqc>Q~;aRnbKK8N7YI-eU*iHs} z?ov1hpoD*v@GErjRgKqlKCIsWPx~#5zq@2-K;lgLb}$`|5FnvdU?H0*JNR(uGY*n4 z2={qb*4YpAa}Q-UsBuk!77w)}{W3@0e1bRW=^L-#FL}oPX4|dZvb`G3=fZ0)h8&Q8 zb25eD7TJ{)FjiH;NSKKfToA2MY5*s%j*qHFa&1Dz$F3N72l;xbUM{YU4SnSQ6r(Zj221A?W$d~n|8b0DFfgqWJ@yJ7k`7=!nrsb zN=%mJ>E(nS5^$F@M)02i&GN6VtOT3FD$+Dt)J1CO96kO1qhn`6DTS;5WaNzT%^Ago zz*Sw@P%J?Pul|aQko%hIAV>In7=!p5@GZo_fI0pcpRoXM+$j!TL^xP)O;@J#_G){h zy=X%Z!b@b5nQSJPDW@ta7%T%vE6h{CN5sWI48g}JHG+x?D3e>W&a7J-)P}uDZ@Rv^ zzP7Sh0k5lIC*lTxKA5u}gXtCnmNn5r)eow4S838iG9+aVk8k&jG@A$QW|Aa_@50Ybpf-%&MMW_esxIgLd$mH`8+b*pKyfMj z(07BA+9~vCeY}6usIM4Zcfb#?J0LGbq>x4qBdM=7Rl zRLhoUmJ!tRQNi%7+U|ekbe2l!h{Cp%l^ogE%!Fw<9URU?Q`a;-Jg+|>_3YDJBsZiTNUJ))vJzEb6poXu%9CKvoQB)D|QfjF;o(N;}+lCl&>6KJ-mB(?-05~Hl1eD zd|F5Y0dFn6o+cb{`PQA=cOKn&@eZ8Bu+7x7 zw8%>uJ~l}m!yc`*+Uaa>3yLCJoG5EbQqOq`VpP^ju@0{p8cwg^;zf>P7%3%_BZ;nM zdN~YeTvHNNL6ZtF)DS;ICXZigsT>8@NVUAe)G*n+Wr$oyRTWvE=(?jx?|nUGTctLc z!$4+B$}s`k7tG}o2qP%$%0f;C9lOsGOEY!HaMCze&6C=p0GaL#c+*=OTibg(dk3qB zt4E9D#r3TlTLhg}z%eYx&Ol44nyqC=c#V%QUbuMq;+2axF5bKdwL^Qpx4O5nx4{3C z-M#(U!3?yuFfTGSO!s>O#uHr${}`6U!jSHrwqJ;>K;#%rDr zdB$(*jKM^F)&!FPZ~+z_#!zgvfEgk_E4jG#va@q`#y@%ZN3K49MZWAAcf?1YT~+&T z9D5(>-MquezW>Axw8zPWd%Ty52M^-etXJG-S<18oi=!$*bPb395&Jf_gorl+{1>Pq zhtq_XW8A^xIQT_Gv2(8B96|oXAK%#8y8q{890wYYzKu4}0YV?3xYxc+YK4DE=KD=z z7jVvhH(UYy$#?hn_7C@u_Q7f2nBJU%)7E#kcaM)QT)%q*r<helfq*v$`ou>;_nXb@| zEK%#sf*Mm}dW%}noUUkZ2-{jk(=?MROJ1vX&AGjWut?*j4 zJdVvjBz*5=q?CnIyBV%yBW3_T)_9!fA<~Zqf~^CcA8=fmVbeP68AM@%%@!Fmh8CF{n@F^OI7kg!0SU zGR!Y7k}DxjV!hBZt-uaI5ffS_G%BEjn{v`l&di(GGXEJqJz+V05?yS8z^Zq|Av|F_ zgZ?SZbi(g{;9CXT6@ zvMRDkDqli%1la_BDCV2(w8$FqK6Kkmy=JdOQuj6WDXT{a+;)1zjLD3 zsMIRJ5F~L>(q&y0HIkt^qAQX(=!uTxXtFMYjLtPZv$Zl=Ar#Y2yGucsgA8yGUY4CW zMd3_|)sHSJBb=m6_ynDd!EquASokU(*`|gl;SuL#3mZzq#0i9|P@b}d`CZJMDR)5a;{FHzig5RA|G1M@#f zI@0C3q9R4ZD-+_qKaI7ql`lIomm}Vg?3q4>Oi7chO?o}EzRIq#o9q_5!|t*nMj&!! zD{g^DEiHM+%UOX*f)v)Ib!m&+=60z)$}$??#GrpUvDJP4UR;iOIOBYy@|JLNbzxXm zcokHyg=pD-SD4Ow__lcTp7%JfzN*N)tYmbX6&Nlra`^F=;0hpGM%5KtdoSq6t4|UM zJ@9Gn_hi#DP180#)NUj1vMAZurLmNRLq2|};Pv9IqGGFp`BB?0n5Jdz=_x&9@Nfxg z!p8~u1~_$%Z0jg?C3Y*COajH$VTmDhR3V`z>iI@~m>=Z{9d*zjk_&G%8?6)RNQ3xX zd`yUU72d8HafI`LdMS=NAs-3D>N0x4uR_+%eOd{416izRaMbOxVVHs}7X=KUnHp^f zA`1X$KGcS48>us8E)>!JH?jF?`U8>nvHSEKhyQ?12ef?8^5wYw0bY`2!CX@X+0b7% zks*qbmDyleN@pd8ku);%x0ON3ZYyu+R@QT0#sN<-z8d$Lz)SnO7cgGH`vFFsZbAwB zDLV1J|B76FszP*|p(1TRsZN}c$TdW((ZHwE%gZc_2Vjpv zgy{c$OUl3EY&o)45i1|Z0WU{;67_SeC-m_lHsv=oG2oQlxa#Wm|7YyWVTyuEFIbynZvrPG{cgs9^n;h&_fq>-zq9UL%GdPYduf)yw!^u>1j z*>)$F)3m?;d5Cp9Y~%hr-g~J?MdWQCKc6JDl|aW1ux!z+EKoTxRZE4D3P5LfPfFHJ zs|0Zi?wlm9VoSXH8!vd*t48TNNv^S+9@J=NgHCX&NAf23D4PIF!5Zw^U@`!cnn`tnPS(%)#Y`zv zE>sExAYYg(^a^U6W;SM;>yuNoi!%-UdS7}m)qx>|+moHH;|62&Qv`$%&s7Xd1I5$x z`SP`5CR1RWHJbjtb8vOgL;QiJy_Ds7E}2X^cxCm#EHm^F4G{k5iKd2g{9p0?I{7f9 z71`xm;+t;{1dg*fO>oQ@bMfbiZzt@S@l=kHdbxxG7E@wn)>fd~m8LRO9%q_>g^o4y zU7S_Btm_1RkOKZwPzZ{0Sypfu2YZ~*HAF!|mQ64wQ*PFA)$3THV47Ka@#~M)?d4?F zNdFrn!={*DSzcZSX{Ob4=jQWx=Y9BFhRZx#sf?!|)7S`VyWfueX_{2IZpOSChGM3t z6DmA`;h2t{_ws%|l?QnrV4>8V87J!3ffz%knyo^4l^j#kw|po^rgA|JqKuhbBPZ$c zP={k3MWQAENk{xTO*+8o{s>xYUj{e1lfQru*L&X6eWCl*e|b;nI21Wk6twyQ_u_(-6gQA#VPmj-A&VWP z-EOyEOXqAq^SI%0+)YWn$#F<8A(iI^l`AHVB+i#9#x}#*uoL#e2wj_yAcnN1>RfG(s4KU-5JG(&pJxzB_2N=d zgmTh>poBd2zV#CCSxGq9aN)yTelX3>|1VJJd|mR^H^uzzm^cx9kZBw0$;{ zvPx?_lhlki>&SZLCXS0N^Cn7(45ga?NU{GpYN2_YuPdZ~^vC$VT6ia1P25a8ns_qt zOyYLp{Sofs?cSCCm8961G=g%vT*`!*#rdWA$^82KrJc(=5AQs(^VrjmKmF9LXKp=v z=k}eKo_PNgHb8G5S$}jrDK{q71a!9XX-4R%ou}?RedpObx9+^~ z*o%*WU~Vbp2gO2ZVZ3^cxTpro z$L~jF&}kLs9_*PDg$(>C@@=D|CU3PQDiihm%80X@q8xJFO|&vpBLY6&^oIrJY#XH z&X}AaYjxego>G-1HB;gQkzN->Ft#t{zSe3b2v_5;2tLFib#a(zupjH-($gpzv}7IH6;om$ zX2qOX6ibz6CDNG%+hgJ6wO*YeM$%F;YQ_~jAtyQpuhQ|!l(6xn^48}PHl z4-+#uw%tn{;OO>R;@2W{Bp_RWfmPx|Quo~D_xSVt0k5!)ug+P`iCZ{ZJc|Qdf_^(x zFRKrz*VL%e57nx}a~ID64!uio!~6QaWgr8zmmu@V?dSIk`=HA{*YEWq%G*`WDp!;v*+du?Zp&;)R@jja+kNksRn zkT1r%U&D8k#W`MLKCThPD@>MrbmH;cTRY(6cVQzxDcXQUl4RM|bSaG#^Oks?}`Gw+5|oeWAWwUMWvbt)Dtw-Y!FLz#{1~tFS(sVl!-(B_&!y zZi>bStgTLVw$CtAasY_|c#_?X?t6nfo}6*t8r}=6$Ln5!krLb_lwC=e4E?qSSs-`N zQC-zyxQ?kVeEjoIY;WV$+ueO~)NC3)V>4U2WPK&X39)J*#xrzXbkb1YG)&GDgsT@_LN>q(%X>F~m&1qw9 zf!ms#o_Mjw2IXQ88bUk2HXY0nn}gbYOL?Li5&6`U>ke#|Sk!3h0JO8Fv4^7@TacN( zS!$Iu^^P)VR^!O&S-)*jZ4W;0X} z=YEyu4V^)rru(6AT1~0&k=JAG5uNa(0dojhavfXbS`?Xd`(474GkJ?o;tT;{#PGwcz zxHry|SrrlK71|xahO@OSiFqQuuI+IyfoY(F1rG{qkNVRm6-Bb9d7(B*J^dfIcVqTEzTJ^#X* zELd6x`g0*8_*$OD>~}La3fWkN8=#hyGKdZ-VVj>A3qPx>s);skIN*dLt!+3xC8`() zg(B9A&7@DHa%?jpN~M_rGqL#s24JBcHZ`njNJXEAokc4Pv4qAB^rpT`8deM$RN~Oj zetaxVDV)jc;=4_ln=Q;YU9QwWJ0hxHX~!t|&PosCqWE^#*;IfOS^l{ciqN0A#TmQv zII>O0;--2lwV(&OL<^>0mAn%X8wb0Gf;w{R?5M5f!i`OA55n%mQb|!aYi#JI_4t@N zhaOPOK_16$GLvjRAa&>5u^#gNl}`|1(%r|M)TbhA8hy(x>HgS{+HW4{c zOkn2i2gAX%mMy#>3o12GeEYG^P41VqwYIgk&9NFk`U`+j)7`02DTIuo^qE1va1b;z z#)?s+dc6)f>RBoBxQ-zI$*V(s*YY|1$=$DiUu10fi z?dBR+)vJ4FA!wjP0>!Q$Biih>H|#Z*96ZaK9%Ig=yo=LT%1Xm<6f&C zYYK5d`FN6tL7mie!;r}^^>Mo6;51U8I~oQVqs~tL+LnB0VYd5JIA+rTrX6Yu;M(e7na?Vb3 zcKc@0R3#<+l`m4?m=uL94mVzH9IGK(Er>_<{`jet7gdwq*M38goy|*7k*OrD9!zcou=~x6zI(JHFHFY;KQ1w#e-FI!uHOK$pA86S^H{pXqfeo{!!UNTFrs6gvvA zSS((A#OttdlWiN1=Z$(Q-WtLqurABcxjjdfEjz+0tx?SNWcmUUzNgyCDK`fzWGRKJ zag5f=DJB3huT^^^`INlvtXNNIFu?W;kaB&C<#s-!umiN9;l4-96nNwSgJ|HMG-i2H ziYsdOK47SIv{SS$-%`ZueR{iZ@d|IGK|^sNi(Ov<)$lgO56L<%+`rx-2}wy(5fBh# zG!myTTedqZM7YqCnYRaKw5ei`FgzuHL{esTDcp0FEIX%N8$Ce6byp9g0KHO z;G|775numTsr;BqLwq^V4zm(qx``5HN;%-}%y@z#ER}F+NXB$ZzzBSf__2Z6S2(;I zg*+pMmk$eeUD;Gt-(AGr>+!)1^Ca^=Q|<~)VS&RUT+UCq_>-M&j5m@XTEd5<6=PAlo z_z@`J?-uk=^5j*!hBL+VGH#no9OK9)pq=b;%iK5R;Xn3&in+Sb>JGmj+#P0Ti_tk^ zGCD^v#hbVGk;vdqlu(9e&9KsBNhQYz`|5*P#Bbw|@N;G|E3-Qaa>wHT3ho#r;I&^t z!VAxV6Bq9P3vvRhf@PXvs@DiQm`A*3PSZG@!{K|B=e*kfw|&0d<>(FLJZu-pEP0{u z5+RA%0X~$4u8G!})|oE*GWT5f1n-INN_(xN-PIc|P6Tx1E{G(WP1wCf@NDCnR|Uif zyptkmv`Y^7VDM+Twsb4Fy!SUJb~hgG=e0MMAPo!9Q6#9c682&SCK-J_wXb|#0%SJZ zy@@L~>o48`%ZLKX)8guMG89v$?b%l;IsKZfDkKAf>8k{l;e4R@;$OkPr?H%znKfPl zgjVeX%Ie0GCq0#VDIHE|2m!tPriq-r_hq&9v*Q zHQ>~O@W=!NAX-6-ptHG);O6qZTaU5XQ>(SQ@BG}Sf*6@^(!)c5lf|cg z_*|M8q<6JNz+ZK1zeWs;JHhX8t#`0wNZ>_SfJdpH;!T4>!2|>6%ya}LBWsGF8oajq zd1tCuBdVt}c<}8g8iybm>-AyVC`u!IvQwFDz~55TXLM6Rh?a(%<%HI0!xk0VN$hA% zISYu&3koK76EiGSCX6ls$>tkVUG5-dI10=x3StJh^NTSpkl`$Z`S!WUIAi`G#)@iV zjxtv9o&{d|*O|!ctKX`6W0Qf=Lt8US#bpp#|LQcK%^Qz#D6H3t-*Aw-;u13L(_&7Y z{Ovf!ZQRa2(TicmOf<>^v#p-$$_oF=eF*2G4lTS8ab)5<;X$RM>1(>#qq+XLuas)Q{?hBCk^yujFq zTi`zAHRLR3yc6+M81%e9Q+Hi#IQZnUN2{Lg(4Pl09DoQay+ENxLd0AyI(0$=qY^>| zMHNX!sy09?t^O-AA3jr`w2GL1(!X1Mpbf)nZP$%CCu(IR^Ir2F7eDXVBPm)*RelpB z_}52->gB^S>xHtJJs{3YwoF&qpX@7cn|5o=jB}^9#(@ORFRPLlsNmIdrR1ZA5z7eBkEH$@QYV6v z>~oZh!ITA0M5fMYZFPCsz24jnO{C(szV$x-R8oJLJ=vWzEFO`PJItBxB5Ora&O4p8 znX<{va#Cb+bt{~yJ@w7hoa+HnDW5O3j9EiV!?~vx(xyy ziG^ctS^#6Ec?C3HD_xLCKXz6UtnAIUPE<^Q;8AE@Vs zjW^S9r7^~55$VHDJeNRsJN23D%?VYl^z)yhE%Va@ZIVElm5IG8I5kpU%k?TJvm6K|ENW811mZZ|110qMz=n`mp2ZrAh z2_)8vGg;!Wp;$SMFy1Lf6q@ixX={h-&?0>j%5tjtrtbpiQ)3*tVJHCajbAmk_v|{$ z7k#6(Z|-7%T?>1tMZ8G=hU(+X6gqIZY7;7UL&ztlFwGWc$Fs?_(fa)9(sE;C17wBe zI%>}PlYd8P%fHe_x#Goz&E6}Tb@VgBXj*Vk#w@G$>JO5sp|;f+g^=Z zTgM#tCNXc*FPaXo^0M+LydtabG-RHeR$Pu|CK(!EB#{p^>xsThI8>aDi85G}VVD z4vNz&zdcfqL9j+rRLaQTF&u}vsdyWGA***MCu!_uXJ?2sOv>u%Dq?I}84=+jbupmA zXfkkm3@{9KbsgsWXnw6*Z@kRFV=jVL$1;HiS02uc{gp_&m+zl@qhA(_-W!NbK(Vgv z9zOl6KsGYtz)@|M6cwS}ls~|qV%x}D577&YT++fSYb%Z642A|v2Y^W|$Y~k{J-&)T zw{JRc6*JX9!fI4;Vdia*(^#CZPKiKW(;KObjP0Nov1J@^LarjKEV2$s|BOCACZY=*Yvy?aYtNou-M2>=Vlft&;EM_`f9Zq85;Y_%nDd5p%=8UfA z?)u28Ts`ft9XDX5>S*kXAV3e`pu)610PqaMJUcgAtk*DBZv!=?6FZMjPosi@f=L9o z4>X2feicHkXYsfApts06S2J^<*JA}&;njU4QUdrJjpIh-A+)Iz9Gsd66gqKG5=uK| zvRo2S1pzo=f=m(`s>V96{be?;2p#lFM@0!2d#$CN^OrvQ0_z)zs5dN`EjQ+?iC)#! zcIA|50R|FFll1R)kGKw0s5E|$0#1@D`W0YdH3=x#zD8TO+Ag0ju zQ4OH8PD9&eQg-bjFKsg`77Dyqh+0c{iHRUhIZO*)TIHk32`C!Imctfm)|ShJIwAp~ zt4XHD;4eSB_bAphVbU0Q+uO+%r5ME~9=yQlE4N0=w;O<$ zy_%{0oZ2b=GC!yY0uCv@{lb zP*$aYjrypL-3LAnEGTizHD+POcuP5v%=tq&HfCI}y}f)U*BUHW$iJecpkoi0&Bu)W zVaZa(CCjaeLr=mtXu2k21U}x5Rd}-97o>mr<4St9- zrF^{4MNmMpKv?SPs{<7;q>Sr5<$}Fs`Vu%lE9sl$?mkRMt}eBJa!!Iho1(0~V9eq< zp5rCysywl3K03t-8&z$aM@1@WoMkH&Qf#3TX;86RbYC8GwIzZ@)25BQyT#m3_m&uh zalLxwG=k^^9D@ijhf;6cOu9To4!bAFt88HbbCWKwPR)c;LOTgg3>%P{o#H5q`^^r{ z=@F-NxkZ5+@%OF96S^kGhT9x(bys_a8fI!->H$n>Lg3r#eMfJN(Rt(r$?Iu0#XAM< z`*y|69S28av^kjW=pOFC;oAN;l95S1s_2V5J=tGQ!60j5ok&JO9aJM6b#NCu)I)iQ z+&#+je38Zg{skf67mXUO6W9(YZA77;1va2F89dL+Ac`=)*_{8l$(bgHq(ao#HNj=B_Uj-O6Wx4#lJ= z3UbKPQ*&1v#;^N7dhiS}aFdYTsI|Th%$*SCNfhi`l zpZuK#oTW7*pI*N4oe7Czs-{Jlevu`xh#HEEgkcig^qP8R;|{QqqMi^TUleE$OYj}c zBdp@0T5t*(m>atr7j{92*R|wZ{cL%$kU04(Lw**bc{YkiNI5qOk(zFbi8>i9HpXx1e!aM%Bx9kMmdDs1=LMp(hgZDF>3aIae;5qU!?hd#wY9T20)8CA@9>I{W z5!Fy9KX;4D-*P}GJ6|NOPlqwT$S7YHqL>kV5?;*r zI^1vwKV1THR?GuU2w{Mtm-)9}**hfOIcRDq5OP=w4m^o5pJc_sHa(f>t7d}3Nqp@| z67iVil%VlkI&$AgN&<54 zZofXHtcdzw!bO&!A4lcg*lgSRF?@Tb>%W6<7k2WBfm=om{ zfl3jRk~H$rU@+%y1|e1EED*GmY{Q~WO#iB2*NYU$8(bYX_bOtlL-ane6EW z|NI&52t6wzO;)7by%-g1B$i~ck0B{)Ebw2 z%h7v}+&O#cvazO^zlnJ>DbeOl1v{GumhIKs@bh=nTw0aZsdL}_;ZNxgHPPv`bOwhu zt)v<^VI+v7H{nq~bP(8^{=94(SEkQNFO#Hj`5lRvBLJCs%;wO|2Q80owi7@|Y+D@k zIzZ?}PiuT?aT;D>GEbd4rz11S+0LDH63_H{b3SoWo>URa$5CFH`}7TOjR)t*cxkx` zO00}caogQbNr=6`|#dJMOe*=afPs zA(ya*ZdaKxvUSw!+!Y=l`M(aa=aqsn4m^XGu0Ksb(8`)O3Na|vg{oJ+!Xk;n`Y{E0 z*O?KzLdvZe56@)Z%t#O9!msRu&N`4pzDPKdG99FJl2Q+TyUXyB*p9N_`Fgti#UKk0 z1c+3BHuVz3Od`+XreennpE94Y*=Tq|$6GZvmPwb#;>XNFKdha zevfd~larH&l#B4zLOya^H*wSdCTvWu*7d#c${&ffbn!{N4t=Z3z17d2n%xH06KpnVDR02-4LV&&YM8ifnhUfEIM zp8$6^nAJ3p-~FcM;};vzEa?u@y?8~S=>iMZZH0Y%06W9-Mw?pe8iZ8&{+JAnX|ByP zR!Hu`WS1pMPJQy(}2I$;$jiq z+tnwXUO*C1f>*21@+5bitG=bp+vBJYHv1wx1E)#B_=MPVRPSz;bNW7FHAGN$-c>;i zrlU)TdWqjv&}8J+_=vBSsba7nA*}_|S)6I0Ud9kjx@FqXkYveKQU<1|_gmmkFx*(N7X0h&qKuON(k1sp0Wq$TsAYy`h7YU-zLovH=sdT9;Fs*1d5-P$LN|?{FY58x4r?px^OhbF zNhth5(6qr3gbkx*kP)C?FbKj#X@&+OKxKGA9LlV%hBV8WWvnv3B-R)6a#=`akP?q< z=O02+h<#Xl1Tsmbh;Mp+7Tek}e_6tZ8{n=cv)oONytmWcU0|jX4caY0YesTgW`L8(N7Xz7TUA@lTw-z5= zKsZy-FAO>fyD^iIHgdA^YJ6zXmY$u_`t}h&DH(wBYt$Q%EWR&5}j5UhEydSzfryFPI zaC??+bh{h}9Iyam~41`X1KuiVSy;Y5M}snDRc zc4vpHab3P^xD3X>a4T!B5GMbvG%dZ{k6)@$0S6NW1F4+U6c;p!6kvH$KTVFE_5vrIqADtZ=lxB3ZRivK9AoY2*iQj z1OyV9{C_uyhMi16hLBXztdR9!^HA*~cu?Z-ZjfN@gZ(&r&;6HsLHxOZ`M^K$#nF;* z8l{HH?*L-~wi5jqazNFhTz6dqtNLO=dPMVB%c*Dw41HQl#}=|?Wf!aO2kPKyiAfH` z2=Gpa-RDX@tN2bhx_;*Zmo8s}WbICUKt`B+)qpYu4?v~v;_p=MRw{svvaw>#V9W%I zy4$Iqgv^owl6myuoUzi-SFx~U@-lcMvi3!O)WYa{4(NcjLiEcZm+g}5cJ%8)+urLV zyJGaC{9xK?4|-%gh$=5KyY1h^LV%(jn6||wjP^_Z+~|D=8-_uV)jzxpYryyNSJz z-YTgULI$$eJ-s>)fQrwhn`B>VFa?7u#LV;XVwCN#9K{=B{dElxqG2&zP#Z}cQdupC zl5LjHrW!0JNLwRf%f#H1=uE26P)(<8cI;P^oee6FI6+)(@7-l}qG8J~@xvZOWs24_ zyXWA>H-!BfRj{X5UC0=TX87!AIwFl!t*mj*D=V=VTd^8&(?6=)@ffh4 z8W5Y71W}FH2<$fEF5bxu=NKCpuA7L@WCQY|*E+aVwKLSWNeI9Kx1~D4dfoj$0u8Xm zR)GPwUI98{fq}b6(-~l=8o|*F2?VLAX4JWc+fHY}=WVyIt37=i$?iZVZ?&%@7w`K$ zk|J<3cKBk`htmJ~d#}37d-tlq67-q^9Xc{O^iCdE_i=wM-f%3y5;vFnwMg$3pw+EP z{}dHug%$D^yzYCv&MFw_|GCJvo|W!X)c<$l|2ohAbkOGUc7^%=a9dPi)*ir{>tAV0dWH~JQbeDd5DZIZHJl%_29!IXvpf%;vS@7vD`aD&9 zUZ}pW)jT!mUYho-&-gUuexBjJqamwS($Z*>{AISkJJ^s){hd@rtEod#siUORUEkO$ zuBOvm?oeQ7x`w&CReT=IQV|(Eo-n`kP%p1eN?k#&qoK|%Ku)Qx-R*Gt2GKwNTSU-Z zgkq5vSdyVgN)_sG2<16NMMcmx_&ndcuD0Gu{g!ebO}P?voYI4FVhkKfTvCnKahz&A zVwt)k3%`=KS`EsYERK~2IH?-(ijG2CWzTf&liuvK+?Z+w83paNl1{V9p-YcO207xI z=9i|I(}_O5B}G@M(-M!M$nDv9#Jq7bJpDF6QCmxm?E<)(lz!6qp6A2_Wr|8tf2oM^ zbB%_2b7QINLyBZRWi|d)QlI=yXGy)6OtHMO^k3qvguUd$E+S3^aowgZAu=Q&96>Kh zNhqnlqwo`W#93q`p~N}pQI!1RQpB&#fI9`$jBtl|r0Qv5V>7}GKZm?(a0j5Ga|I4c z(Fy+!XvO{>q*aVrNk89~LMLXd`QS!p+Qv4VslX4blQP8QJVi5--g^r-9m)3!zeaG6 zcC>lO2VJ`=Xb&{<4)pmA^GwLr_Kaoa{w7Z|cNn$#0UglOE_s~@6~G?!R;0dq1HEbi zD)X7tXGbZ#W*R?MlwR^+%Zj`!(i2lgKl+}f$hB1MiZYo)dcwMV@M`KmpUU&An-bQj zrGL%|w1GbiV*e-#d#oO4U_6!sp&OY*=%bEOBj|8En(=fAevs@Z0XHlzxTOC{HIQ?s zxa`RCaG|TEG{-UjEq-|t{vdgVV7_FSx@f=>?J#7ln&Ro5Mqi{(_2-b#tO0r)$*fb{ z*OF&n_9lktIWK8?=rd*87om zjedUh&4aFhXA^jZ@`SUhh)Il)u&S+_`vl|Dp`-h<1vHE-fiBsp(*bj;xY2iPg|O9u z@3AxLs@;EiYhI_KJZ)Mk{aF1@a=gZ0k;5Ccw27x-Z2?Zdx`?akn?=%u9LM3@xa;{M zPEjst?2X_FbzWS`aU(V+nV4qoShPy6DZAlwet9j`L0~GhS#|@@`rX>Mg~!%&{RyZ( zg*bok%GOyzry%;1)A5~Ohk?Z(LB;|4hFJSg?OfW&AQ6QRdZFwJ=sP% zi0-ON)#uiL(p6d$;k=5qrU6x9o2QlQ`YK9%1@5_2w#Now%?03tQBXUdX#SRC!gpZt z3^#n;p9~#{3q=M+$@|6<;IG}&`#WVy&Ra0`yPBRP{oYv zAvjpl5BwcR*;e6cGH};242oj+@$V$9ouk9DrJ$PW`(rTAmp*Og?Z}!r_->m&e7c*( z#y>Zr_(p=$tswC-tl-mKK- zxz9JzqW!t@+mYMmMW5$f=G|y~%t<%a*KhYQx@qYD4R~~bEgk%uG#{;JLHM1`Y2J&@ zM-6>t}pesTwFKQ@jAaE)VDGv3Z$|(AelxGDi9sU zc7T!{rR;!9R*dPMG&j`DuxwVa?EuG%3pd#DUYi^47pzVg+V1sU$4?z(2Au$?TgFaA z)t#kQ#F{=!dz3HU>S3(9Fv}a%4*;KEd=Yp9T(@wy`)+7o3^x6qH#%MbZ2P&px%+=^ z>28=_QeRYGKt3!#us(iXpyl?`?I*sVe0X|6_JnWu`wo7i^+bLLd`EnTe8+tIZ-eEQ z;$29l<5Aw+Kfn5p`tp27e<%&^cYm7tN;hK<*J5xXbo+e5b!r}s(0`f+1aK+bJYM&# z#7=yKCi{SKuYe$fAPc+^t5|-6oQ}nfJm3dPN9s32JR~n=K#E?5E@H$ZJQ5Zq&rOVQ zgP!iLQXJXGH4l?Tf#gH%Aj%`2==KKP*6rs8Iqeo{C%Ou0aIid;po>4t`I5a-h|3kn zNXhs$GWl+E7i=Todg76=mIEV%tl+T_y1rt6H1w&z!DX_dcc zS62TN+5tVUZdqISTdkh=8e!bUg_5|lt zQo5ain7*w*5F5;mbXP*FV!9-=AhWVrDK&R^dUSfVWWDg&a>cPJ_U8Fe@nHG*UFECQ zBeh+$UHFYM$#yCAZmF&dI$$;63LPDyi^a<#is@^_t;cQl5q$eP%aH#^a2xt_5t&~l zY?1W?HKNv8pDt({=}YfrU^lm)vp`F`D9SQwG^~})z;W;^3>X1V7*&im zRtu$#+{PRRl$>t)RJoz0mp*u)aCt&Sji;3Jj2hA}pFF8p`PODwhM1eYPv2xM(HMXc+%w=_S50t6kY zm(gd#V?h%W1Ghcm1z`AldDO(bH^EZV*jZzui;RcKyss6(PXJVe)po;W15R@n^Q+1# zUy?1n@yAYCosw?)+i}@<7ApN^ih24%b3D$`?w8nf_Sf{o_hcH&0Eb~;)aKaX7+kzI zxqUR%0qcAh#nRIFepU~iudgjkda9JmO;a&Zem*V^M2KTqi7gI9_SG$onq#=bhZC_= zN#zyjTznXuk&mu_j4;W>8)it1aOl`NY#z}ZUz~ktJ`n>;eRJT+sF}|2wUC%7U5fqN zT>Ko_0q;`{kzp2Nb#6=)MbZ*X0g++5(FdRtu}f3dMo-LOPV&ad0)l_?$m`oILcae? zC5)g&tM1ZM6;VGz{ZXSv(XtQiXKs+3SddbEJC?9mC13ojHwb5fGQqheIwzw~Pvs>5 zC!>CT6c;qk>oTqShJcfiL4MSBXpFR%_VB5g7%5H4`)W_jYSh0Wtd>li{lFOG#P9{c zN%vBlzSI>fIhe41I2}!Dx?|VH<78wB9%K>@X12q_oo|Ve)&^_4iGweKc+L8;g3+0a z`sNRU7SYj;ixz;VU?d6B09-MQF(t7Q;@>=!3_($4i=F_X0Mw_vjf$HE*fzBu+ige`M_h|~kPr&mH_jF3KI=gQ(x zorG6fVvJ@2wTfgz8{1*WZx`Y(lK(TpRT{e#!zpOJ=gi-sNWf4tjLL_bP`nj*b;?E_biQZ2?ekoG z*Y=S0&8g_d@Fx$24oAQ71*!;7O$@!NJJ8{W8rw~n5+a?Rnt&L$cTE2nw+&bUvCh8; zCt)p@@Xmu1c>H3WSprUj$NM{+dxuZ?L|g3yXJM&)zi@8t!B!A*qf^hr=>CC2j^94N z;TGlmE3qXtoBGpy=kPy{k8xFs$&Eq&J$hyXk5d*mascUMBvV*fqkl5`IWziCWaqTw ztpl_1*D~=mn)U4hPDP@;R{)%Q3D<#u zPDTsHKd6in{|=u^i=k`Vk$G}-H%9a9YE(0a$^I}!%{awW8BU|N{Mv7;GG6XaJ0m{Saqt2y8o%BKD{!Q z-jIGiFb4g7Iescuh<46Q*kj)`Y5xyVX6(d2RIh!3j6F5{Vz}SQ|KXBT`OkS|)c$dH z!9(^>H5cD)|CEcpO7>4SavxGxhTohHb`4SIe~7@giUj=lYVy-ai%-Md@rxZ1eH>#n8Xa&J;bX=~n;nfF&)~>7xsRbNj>sgmc@B^TS{p z(GQ^;{}JRD@40guvFLELThIFpt4{_ME$xEB~oDK#_lh;(U@!PSf|77X@`Ci}rqpz}eqrX2y z;(ZhS!}vj=@lTgq=`{Z^UUw7y)1~|M^FR0eFixoaXQ~Zg#xDTQyIAUf&cRU?{}1DW z@sEG%a`vM$r^>JP{`~gy7H9D~4Qjduo`$VlSNw3+E&3mEvj}vq|8Xgdb9?vWV|1qQ zIJd9ttv|TEfAne-_M=cZ0Wq(?KafrL4>|WD|JjauF8;>fIfy05SCZr3WmljJ2sPWP zKPvZ*{t>rzXdaQy6}Z#jhRc8be_GS{oO}27c0Uq%|K}h7xj{h8OY4tTW`-x62homw zoP$Kf?XKOwT@W~0Fh@AyB}J?HrNz@!R#XrmE@8-Q#6~?_VhynbB@_vrwAiZ=5l}5g z!3?wM(#D3cDFkc=1#rkn0=Pa~AqhNT&Q}BEbCUF)b5PL+Wh5)FcrXA7B^eq!@DVg} zA)rwOur)v!Yc9EE{+?O^$V|qqp2ynH{t@C9Ld@@CK)S-dq^tGEG;85~SBMP5?qP}f z?83+w@`-b%5Geo5$s2>9%2x7M)K^|50^`8IDQD2T~=r`E77D?$SND?x*n1%8!V zipV=v!MXVOTsnLL!nHnqEb2MRNljCN{P`?JwN^xk7UWo*bG5KZwkmN+ha-agh?@F! zCHNzdj$T6}dun@jY*}VjB}oML4dzA1oTQrmF<4K=D2o;@>^RPB=3um_10!^~O9`43 zi-})=E|!qhNA5Jzx%4ABY@sheAtLz9CKLm;^19LPIVR zK;~s@Nld1IxYSSh@&81GIpl~@hZ0HBhuktTs_mgsJLS|6u4|wuAo|l+$JBBX<#{*F znN16bYS;VVj1UkE2_BDRQ5Kg*1d5^5r!h`qha|Vc-XoxbA(E=3(e$I=@ft$y3Us_` zl;OynQDM%~@Utl?TH9M6Thgw7N=tugpknx^wZEq6c^ptEa~`LckSkSQPM+pyc@}jiA;b zw{F;E7sIT{o}h5#Wn`CVeiH?o{RWUw@{Tb@m+gDvNv;C0KIr1ZnA65#G%9N*Zog;{4>-ui~ zjbh;KsH3BTamwf`O8Dk6DVUV$`>Jkj)zgzJ%|~Vec0wDaYb<3(lFHSf4GnXOuGl~c zeNqhU*@;5bdh&_EH6-x`0+m49v7&#W1NsX{Ez3_`!yoA#y{tHG#+Z!t(4q#+Sr1vJ zeU;CmUx-xVzr=Si8e_cWLb6CxmY*iwoo27|v*HLNt6TzKfwQ!T^d@jz)~fX4sh#)H{_gNI)HJi@ zt^`vXFVG7Fv2^RL@q3_|Cp;zA9lM2JKilhLZH4s0;=?_zkTWXg!7?Hzp`7 z?~-D*VecH$8w6PjnglR-D{Zq2vZ@=1+vOiWp{B*F#oDpWqr7^`g4jV67%_*VZfXS)>LQj2onbs zcG;|bY_7K$3ondKAAzLIl8%E(w8_89ZXFV~Gsv$QjLtt$joy{Q)&;mFC$z9eWSh0& zEJmS6s%0t=r}Mq{1qRCHn$IYQszBKJh(v0+lyx|Pte_DL=Q}3_YZxcGRY^E;F(yHM4R!L_eoexu{n&jU*oKvpBPIQ6b)$YB~O_(FL{`M zv}@G)9xiKm3EUV12PS*fYSSbHljxYSmQLUXX9D4@^7}Nu$npJIP5V?>d-|s$H(k^O z<4hph((q^Kbb8A*kCBa^$k)f#?QzLR>DpNY(p;YjxfPegjj8DlG2Y1KiCrZ`CeI2( ztXFH}>ug1Cq#Ro&mznUU2{h7aK`|KN@?C@{vS_ColU)qR*0n^fh|ofcCZb4l)0;}43(qU@m?#Bx`tpWO?l z^UxNo^WGZblmq8wrOy43YlnX?KpG@WAK!^R2^&w6v_J@x(@b{-h;y#0<<7=;(motw z-ox(ohweybp8ij9 zx_xJ&+uV}|=T!0~@D@(#rAutp_TaFJl3PKm)m~gRx5lb)Ywwiv0D;{4!)o1*`cqu@ ztwUe}w?{8%lyOK!IX+2KzZM^&ao7q~M!&)4TS*?xXZxZ`2xBR#NS9gLWg#l4oIF(> z)tge|9I*)}VR-n^QhJ*Bip^scAivZPG|{PTBg|nh#KeefzyB5teUvE!O_t%=igS>p zRIc%^zI^y#MVRVgx$(^XwUvSL{UnRPgH`$caXHv1y&LaJfM53Ae%X{v*QA4wB%R`b zGdJj}FBz2T!`5JFwSMs-hKNvAH_|Urgla+nGh9WlpW=7quUHiFF?26|FVm7>nMvbLj7RPbsr6u{?&=hKb z8g3>bWHS3vTmWP4vymD{R>&hF+Pk;+7|`VBuGjvg6Euay(x?O1k7% z(NoS*b?UcV>QJfeqP+K;dsenHTHmVh{6h42!YF^M1d%RPq%RhBKC}buhuP4Z>TFKc zn&a`(VUK-HfrTnIgj;!OeH$#d=-S}dZ*^gyf?R0L0!vWfhmAb5@aAHeD?i^Gxz%xOpm;C&>J_G6gA1qZ#G8sfc3lYrGwhOB~0s)#pc6e-(Og<6vnzt zjswG^b5}$WX`QmD`^`>M45tiZXsMRZ8 zhtqv?uDV`yj>M$T4Q*+-oJEl8)ar@_XSVqt&C$D=&$*Ns({Wt96$0*GUEy4;2fiwa zjRGIS!ZS;kYTwIpEwUPXcsgjKbAH`d1JQt8Ecl^t>~sl<0Xtmey#m3{Tx|3Y(mJ## zJ64D!V^~!LV%jar4dGe%+5t|}Mp>_0_lS{yO#^tu7>J`Gdb$wY@7|b^lL?H8p!zQg-@)&DCK?CZ5%`%is5@C{t51MK*!X*DB*(4JmS=L5^w` zZqD*C8M`!1u#4{m4qw#-$U$d;3jR*80giYBnF^5)&W~zK;TWz3g$x_@Y-HUjAHH0n z7qHVd`nw|*sD1w6^NpLNf=bW~ftoB1aWHlZScx9M6C0WcGkIs2F>q!x8{LbkI6o6e$tAV(x z^f$c|5)}h0{HU4zHW4-_l<9SYEpEbJEPBEQ_N?wD#}e_^U*ET=%ND6~()l$5M^e2$qDN0|UQVTMSrWuFszbev?F#I;JQpF#JLwH03X+9pT?!M<7{;3;Gm`3D0wJ{K|8tax)C0 zp!!+Jeo)xN!baV_M0ZN2_rvsgC{OOxk(VyAnKnxcw&NdBRSnjCARBgxQZ%cc_IY>? zmXQvug!YpRnxy4mmZx1v5L^dJYHHP}?6`!#(Gb~XC-;jeXFlSW%Aw+x5enW6RzO!{ zl^QrGct<$u^TmlYqgtFMe)$$h4`4{qyL%>Q;nbq|rmsoQzQ1G{oPXBa@iz{C++`N#zi_G-4 zh*}L`pWc8hTkiunNfeR+MQvFv&Q?5}?zOMvS7F!Ae1ev(_4J5|6Aj%|^JD~AkDxis zncK7rM&VI~kB}hAaZw1oMBte=Oid)~EAf2&b3nwuNs|y>5d8$r$Jy9mO4OSy7I4{B z4G(g2MS;?FN@6%z^mJiw;25GY8g}9gqLZglWbg6605w3$zv9!R_Qc0~#jd7->J>vK z{SgZ%)PNy&&zRjUB!FhdPB$=l8yDOe!-_@Zy&6&@x<*0HTwMGc$LRJUwogW2MIgYU}RHF>3`B$L3{noo{mdk~X3)@ahS2 zo!f>^I_RSoFKI;(uGww%Z*pS@p-tj+rE^`cSl~G?gIl<;014(Z&i&_JJe1{@4oh)E28I`EIWgW1BF?)o97H?K#<=*#tZ|JEaYn&H;%w)tFT@$+ z#u`tLHIfz6g51`|t&@UUEpAnhqB$9JWt60uk?c@B4n=@B7}{1qdZ+c0$N_1@x1YpvQyy+z;?fJ((v`&*#}U zFG_H}^Er8QUhn)3K(@+w3@ib>lw^_d=F(_~jOTzd`YMdMSz71E*SoWDUYF&od9;o$ z>^@gRt2S5w+LgkFQC$lC!!xwi=-p1pjlyh8m%K;w^`@*y6ugs^g;Oy8=auQ$pmpAu zZUK)24>H7qDJwc@p-;h53kd2*Qk%n{e})0lXy~~PQ3LgCJaFCUTAS+f+Qoz?4Ndcr-EA~!sIX^Q(bFDh)2v{=BK-c zl3lZhL)pv;ECJPau4vt|m$u*g8&FN2l$GpFB^wJJuCqPyILjWc=y6wQN}r*K1i)D5 z`+Cn3?OZy!kjwHa0at1f=6)<2ZZ7jJ?%hHGF~37NFmPo0o}dI_1#l=i3QL@NPg}nb zoZ}diF!{iDxf*OJmW0hH;JjL3v0#jVgRC_vUweyW{v@sp=#a@kUq(CR%c||C_ew2t5z3@*HIpgDc;C~JhG*#Yf)&^ zV5G8g^ijdE|Br@_z5re+C_grR!+)SV7xoXO{Ev;1&ISD^^ZM`~kYRMjEx;sy+cN$Z zFLlmi%GXmM++Zcr^wHk*|K`Wk(Sep3+`v#0%T#kt$8;A*9RwLD$M0Kll-NN^Ed?h!U9f`qIO1 z>AssVuL<8p;8pi)Zx8xTm_C?Sr-{hKIW9^7o)(Cj;8VZ}!@n49qBoY5>zeZYAsw-}t6>!s+s2JR;+mr1 z%f;tN;l8#uFm#*a$%Qde6JkKi!k-@91$I}o5NM|7Q|UXOOApNM?V_MPC^`9^Psp1W z;b$xXw@WPODT6rz!@O-c1S&>VoGCXf4Sp(p?VqIowj|QI)=9X_QC2sV&{LLyUdn(L zji>JORO2d;?&{H9Gdc2FNB5Suy8q&MxA%?DRWR5X(ap@UOBwe<=2*do2MO-X4F6wg z)C}oSlQb!5O0Hc{vcu7gD?U9Ow@UymvZDgy1f4+CgO#N7eZ6k_(2_CaFObBtrgv6! zMZ+ILTFQ>Fo378jeSPla&}qgt+{ER(+?BidXm!a}6p;mb*xw3yKu4wTPC7ScpoPo=SYC%loMUP>Z)bWtSB^8Y4rB5`qM-FzIC%R1v zoB*!mt^ohH3||-csIH>1@H|76WU4xgB{VgjIV?W9V$haTC%b3c3#2_2oobE`4Dao{ zsLr)$odui=>sp7>9^9Fh(??|BsRF^1$|`rtQ0Q|1LyS4hAqZSO%)} zkm7+nCfHZCxH1f+8XvNTUMWBdb!xWf)F_;tr}hl^elSKjHza|Qdjy4^!-xngm?z>D1KZZGDJdBHE41v-UX?(oUb4 z{ds|`y0-sxxBqzhTbcCul2rk}xOPE*^F95|3nMRg?OwLNf0RO-(A@4{9(RpTx%OU) zW+#9XoaH%3NpX}LyfJVo`oyKt+WDD!h(dNDK}RpF%sM`kUs{}AqU7x*>Bgt>+XcCO zWw!m2?+9cURH!BD7=mztjQ{a0@n)+j%;|35njs_)A+h!L1e=uEDn(#RewJk$l9S6Y z;f*71C-sniG?os5f}b~{extFpl3i4*FQf=t4fnTEWrG}}9ux8W;us3sSPoz)!ZxGu zg&H-<<6kYrPfh$~_SRjTck4g=&7bXkYcxmj6BN5%lJ)eq9II+NDpC8x5BxS)CP*@0 z-S%pdO38;j)|ciEvz2Zi3n9I{>a8!5aDS)-4#?$rVz2%H!LX|IawXsKP+A|7pK6sH z^_EFNph02@YB18mw0}PV{j@nO++eadT2r1<@(*B9^Eo3%Ev8-5*G*;mkkP zBt+$iS9%5~hkDz7%qv+bDB~q1$zEp99lArQK(+?*!a0x|7kX6_B-&e3l5IX>;2bDk z*xV(W6sk1Bi;`*zlFFbl!e{PK3TwEAS=db2s$qnm{BkL%!CgXKsQtX3Mqo?3=)c>q4Q&Zf(XaCaU(6Kzg$WPjFBFz!rlALk z2p5@$s2s63qSOGc{ly~KLFOdMnD_$Lwm8`kX_xppZX z5Geg(Z>a6v4x^X9r5!OhaHmojSucHxfW}2>r?Hg_HQR{tl5I5SiTu&0XHmi}h*wIRHN|F~ip(?S_RNbr+!DEu3F`x6gI~H*}`( z^rq|n%wE&aQyXJapC@N{3>a?%iX1fXJ(E0|pyX*BN;D znkJ#hZP-@c&i8rf0g%v!e*yw3Hsu!uHbV(F*+>m_K6;#I#Fl3^=9r@x?)({1Xr)ua zNWk`sErn1rCDJp^;)sSfsWr>9U4o}-asze0kVrv%_eC-1Lc`$3Tb(jRQWR88_RuMZ zt+7A}WbmqwZG3)#tJoG${$ATDZCEqkG9GTGHLTS1t7^O?)db}Iwhx3b_u*9hiRIWxhT#xW!SDfEer$ z0SnMxEFFXQiH3u&_5_CS3CwWy;u5nYh-Og->x=ec>2 zLMFPw3yFw;Bwri@bW>uZf0cxY(b5=M5^lP!6mWu=q~W5iGmbRIi{qnd8zpt-g7^-; zcw1Y0QVwG?sm~A(IqADYDbqtkexOt{u}C6j@0LbOw*436EiuAxEVXY0v#Phv7)EBm z!tnM|${XUBOX2YOkE6$uTOP}fY5|ePNlnPDHiHv~0g!bP#sCHQXWW`GQXHUUCQ#cs zw7sDfSaVD;5R|th8%>2(lzU4LwvB5%7pNnZh5xW^emL#DMw5jRurt|U3nB?f87>k9 zo(g)!++k8fi9I+BEGZ$+Byd$xWZuG%T@~x5XeAP<2Td4mUt_p{t|IyJ!6!C)6sdRR zC`xv}Tos4!tm0nk0e(!{V0npDY==?#K!rK29i1wmz=<-<&sjoZ^`L}m0Y#nHMn|*L z)2HWfl6lr!pk(R5BGR#PL#~IMa6S3xuJ8Vv%yjB!9ne%51PKiM`#upyfXbqbu<^s0 z;M6V=1fU+o2P21i^;SxV*O+$E;ZLj))oiNN^b8V=l&h>ZYkMI4>yRjj(lE&Nl8jg$ zY985JLPf5kRuQONBBRc{msO_179HRv&)B=rmU@0L<$LjGFDq|75gpP-X;#RML4KN4 zJapzg%sn<$R9*(d!5Ho?LT|CHiP7lHf+QzJ@!cn(ZOH`}oqO2>!feO@wCKnaGc6x}sGN@jk2vS;;Vdhbw<)L$7Ri4s7?16j}jWHfODu7Lk}+73XjKryXFzA@sGw*Msvul|wkahTbXD_AJECMnxj+Gh zs+u6-*ItcFsg&|Vxog|dXe29U3`;P^&{qPd6i?9Qi_Jgb4SGaKr$9p7t4t~B&I?j> zP0uP>I@Qz*ZCmdK3BjJv#OTtl?8r326-wtiVq$D9nvG|b86~YmWLGa6Lo{3_PE(m$ zPAkvLF|YV8mfn4JZf-s3>W^ysJ|r&{ri9X14XM>Ux&Rs&6pRmnpe(#S26slY(JYMO z_>Nwfg_ul*Y3HXGx?^iY*?7j999Raq5G6!op?j+*52+_)Hew2&_6kG`ltgz`RR7C; z%?`+`58!7xZxlz&L*4k-{hBIlh znxbJ&Oq4Xi+BZj&1QFG~=n>biYW)&9{TuaMambi!!C5$~rkd5KVir)x0RY^YRrIW+ z;HEL*$%nrCbcFeG5Z59iDfuCxh_>FaB6B2P<1?F<@(@TvJoRvbA#SrFA- zq}~=B&rv-?U3GthQut*jrVau)Vs10%#92i6e#u!#b-kNv^ekgm zyKCzc!We$P03<_#vw~r;!m=xj3_WM$9Wb)T?%kY8o$rV!Eh8U|A=8RG%HYNKT(MI7 zS?Pk!FxxRO+;im!BsoA2;s*d=g=^nYi1LwpZXmjY0ns`K)&$<&Q3f8p?*_O_=PJ<2 z`vr!+?Im67_dAe(7ky&kJeHo3x6|SIKfmvLnbcg*YO9#}9qM5*xTB=<HuTT*?`kMa+5G8~^d}hsaU>gY&z*(fvYit@ygU*$cvHv&N`ot^5T^6{oi+KyPCc_sncr)p3ekzaH`S$xsLM?msOu!hx&D}+au|i~?m#Rg`=FwXq#s-SkWLXYY1;a+~mmPz=Sx5EPxu3fd}29__V5TRbfcQ3p#EM+E3EU~;tqL7RQ zagWCF>2F2E*$<^-sn8?4yqvEWtQiLGvp{hksS3FUT+#x}ty%<4BwAvlLzTVRxtGf} z|8i%G9x3Lowiqu3DoUi{Vhrv9ME%egO6o5x0e}0{Q!M%Poc;XeZ~V;tkKI#`OP>HG zd1EMn(f-Dq%Z~G?->^#@p4T=PGpRCV`tAMpb}_iJH1yTg52wqyWLTv8Ktk_i*b06+ zFK%8x|D*UUdR7!>9C577WkhEM8AeJJQar`Ap62-d8#G}Gfdck62-J^$A@h<_J}-01 zre)oG;x6@spd!;cs%)K=DA8ux;;6bj*4QmXxyY15sD40Q+bk71O6VxVOggRAwINC+-@6zSYjSI48D>*FVzMnWt~CzfJ?5>Aj~6CC&AqfM|-?4{>EC!YT2eeRzQ zoW1PPUoEf~4Zs7~csFE0ln*OJZLW^k`5}*62smCKtprgM#ZdF^#iZEQn@98=7j~3` zeB#fBe*bHmVPaz#Ap?JJbU>{x+_<1t*7U!dd-b0vemJBo(gg1cd@vCx^2YwwATrG0D*9}G^t+jt zn*3csX&UlEz(Y`Ul=wEuo0~qQfIIl?QZ9_1G5%dLgQ;G)UJ7Oh3EwMDr-1De(~(-K zl?&vv|KWqVL*MT{{%`HJudPXwzY20I&(YHh(Vo@dSDLU?x^{I+iD^d@BP^w4#} zyal5Fk{&2bIZFapu``fB!El0el|_h~hF3l6l+bSLSEHwX=eg|Iz2tOHF&`f+9{@vgwj^`IHsP`@|-f; zfga%Ds{Ulcq=AAJsB9wB;#3Oc4y?`I1-RYUo&MUz*(a{%f+2jQeWNJ!-}Hq}|4?-Q zR(pIc-v{X$A#bk={pIEE(;m^ipxsy{zOUoDrMBN#+w)WH#jL&msMa^tZfyh+PC(#-@rl%66&pb+WSnc#U?*xU)lrh z{N{cE;>@duzs0MP@v89j9rc8Xr*sF^5=cP}Z(}#bzzs17K_X;=e)h*l2-_ORyhkv? z+>lEj@c8tn+C;W(z+s)F21$yO1Xapuwau{m+22tk4rA4}D7G8370llI3p1_dMMDFQ z+G3B6I@WN5Kg^jeLexWbv-JR0NNgL`T9s0=l+j>lF51onNh6WS9V>X^nkJMB$}5iU}? zQw4`{0}D7(7W|aL)lq0Dy|x(-dZ$T90P=RlA>MbV3J~S4qFT12);E)f5qa#ZsY1d# zcdS5?f}vMScquTI)?WimK;L-*!YDHms^mb;9?k+ndhiVVEE7wh()ex2IoQS|iCn&3 zGQLHqPX!i;2kXUwPpz8!f1wY%LW5<6q2As(=*iy%-p5?Hp+s>csNT5(hWm-oED>-g zN!pqB7z4!CE(i|SJ2o^<6arFxu)OXOQ(h?=2?~~nP6+ioKu4akZER`h8Bc0JDm3Qo zAYPJ!;JN`jHPfn?6hKb_a+v>$GgWV2fI(7eV$Iu^c_`9dgAWdOk>~q3idk(hfzBK? zl;~W%dG|JtuH>?&?h5T}wx2%tm1+>`=URma-E7U&hMR$? z)O`+9WKXwvuE=h%Iftsp`INvO1@$`WDzSHs!3RZQm!zmi>^tCI=YMXpfO97JRr{8~PB)X8ee08yT4I^QbOn&yroidW4n^bbpT+mFus(L+*?Y>cVYb{uXlU2Ln*P ziR`y=#15N$TgeBlH2|>+9v{px9T~Nl)0klW0Oz&6?hBjmXvi}5p*Y|+JK#X8K$$aI zNEFPN#3yO_21H?53!RueD%0d8HV=NprBb(T54iExsPKnMt*JdKa1r*bZp5P+j?$Ma z*Z~8ib%vOzG`6Q-=S_&&Mx?gSKh}s|j&eQBs=VRhOn=Q#C?q~EQ3Enw+rR4k#WOUZ z6kC)%Z~Sc>k*1pa$p|r;8Wk{H)lvF#1sHGw(T-eF3-GtaY>AfzOR~d0s~X{UqWT{RA8{Zp7saBn*)HdwaC)HDgvhu)zA_eE^G*=lUSCq#0v z=)FUQL~`66E8u8y_!wo>!p(*wP-YtEX%h?1jk<#E^7aMDwP07(=1NHlQ8aw+Onu_E zK8&YrYseh=LeheDf8n+SwdK|WUu1t;^Ft07RgKo{y<4u@a2#i?JhCAI)@E6Q1BMz8 z(3X_aJW|T^Nn`dMb&rHwU?Hb*4N;`#oTklV6P3nBzk{$>%Ih)u)*ZEJI;!N($MA-f zj?}YAmdiAJ$EdO=?bQPO=pdUQq}>X;sM`b%H*BDT(`sqM1gkb#vP(G6sIyvZh)S%A z10BBU(LD(r6>)%d?yRCRL_HWNZ|uVuOImF5*u z>M7wPfhZqN)BWb_OW3ru;f{SvqN}q1NLEf*R56b5lKAd}ebBjhO3|mh=VBeo^vk1K z%%!MVegvT9rBPGfRC!vq@Pd<*T|!7iC8DVE{_MT zGDGHIn)!pMb{`}u5^Yb6!H~j*hRqYJWUvZiiVSz*0s|cGK!Ry_8I@GU7fzVHq!ctG zC{=*fpSPN`^=_Dg^b~fx`MTy3mF-hrJ=Uf8GcshI0)Bjfl4C2N4aqXMT0fJ!#1#H0 z_abOZijv8WN0YK}EGtLT3eA{DL5Z!d`5-i_SX#UFBYE_Q201j(KA_(c?a))+%8;vP zIhTb?h~~`0SfR>pn0wsg9lYRC;xLAYN7xO0h?{PPrf0HZY@Mb~e@J2_>|@+td!;2@ zuK6v1m<_j3W0LS|-e@eSSZq{E=Cz|R+UCe2Cxk0fY`sI7$&SXZ2gS+R)n0slrGqiG zryiPwi^|4oX|#lW1Rd95NmY^^U)grJ&uE=kAY<(N7#%D5DPE6Bv&oibNF}nV7D+b& zC1tK>VQPyW^C*KyyrF1hvGoYA{*lxEEORe(q>rTOiL! z2%jr&usOl!k<1U!wAJgt)o-pr)DD$qW$Q>kFgK5nDeW3I+6iyz2mrd+#V zSrLt3qKx00B9MMgyjN{jH!o&>vCx{~KsWjO>ns|spk_wU)gnAE39yi_&J@`sYOGw4lC6xpaQgRWSVoT`&#{A=+cp z5`JwhyCcV=1xUEU{9LsC+^(qQ4DHAQUol|4N5D;w&6A24q!Y3b?V>rB=oA{!0EP<= zZkl4FbDJ4RH?f48x7^&l{!nFLYb`VbP4MBLRB2^1ksrfQg>U2<1V;v%}E>eHtr z=7N~qC5j^EHeMC(Fg6Ajvbk*Tb`fP$S<_s-3dtxDkN=Rto=Bt>@!6o~g^NY&piKW3 zX8yYeL5c0FVl*tK+E+04NO`rXF z-<+IF^SfNizZ02h`NXe4$CGD0eLAaXQJxVQ_00$SVT{Ml&h=$v)iO1g@GMlonsI)6 zzafj09~*sTs7Jzk2EY)cDKqh_D0KC4^zY=(5)zR7?NSZY+Y!``#4o69ys#8?6Oyf5tVeseP(81$D=Mk^|=NP<_qnsEXe`NDFA|5eRJ|1j6(yCv?ik<6C z!!!H)roNuO$3R1K{Q`4NP#eGl$Q5SFsTd$-R z$<#xkP5xRRhMk3T`4%I_%e{x$SMC6drtYCm@=RO10TbwUY_%(xaO{JDVZ$BZEyqq< zQgHQ5^XU`P>(5STce%1OkJ0&%JK@^fvFG={AzkGhISpZ7>9Sa0aa6^WCc}!4Q!y8X zILcmns;BrC-w)1ASnOsQ+3RayGy1PqAwHfgNSTP)DqX|?$wXzyOaVe?xB@xHMAc$z zFBNjM1O@E?Pg-h_gZwDC~H{}-t!Y-$}3wS>aSLTkiEMPLZ~|KP5l+7dtMzYSmbkK z(&H#&R+toOEb6#&8L`(6m!Mb10-@ruKEffN`c&i7u!9PMgm=3MNaa?YHY2P9$wDeXMAB#7~nI`gUL-uQ9v zSbvVB zSC`cQ)nC1mihAVT2ic~fvw~Sq6>Kji774pxdd|7ev*k+Q4~?~vgzkK69)%HJ?|pqI zCp4WuhyijW={{{rn@j1fr+=1xpp=#||9TS1^4Q8E4pxu8CNKJi z#{2LY4Ch`T(v1@5P7$euiHt0jRQ%R@_xr#~Malg>0CEjh7F01;K_$->AZaB-YkkBW zCi<#ZOAE@gfAr&*$@9mi$S;e;y#ltoa((En?zxj|etaGic;6zWq7hi>a8%Z`e;s>T z`RhNv!d`_$kAVeL`%Teb#eJM+X%XESTpV|JuS{!uT%nLUYi^q+$DngHXR1JcWWwaT z#S@T3YogDtFjBBu6NJq(^QxyzrCRX!-{o zp`Xk~>^>eVTIvcfx7~n4h3h5|kCbH3( zEaNLe9%4DGAxKy(h!!xM`^kycSLUIQf%QF2`(D3yZtKkBwf1gNnT+_1Mab^4{x1)QxHH&LzX+Gl2T)`{wm^J+e9O_WpmyzBkB? z59HomR_xkN^okIZw3B^?@e1f;qBrnd^f8CmqWEYJ zF8y=6Ss%`1x%7~|Niv^JxA%7>o+d&nz@L9?HFwsx+wVd88KjL3e7CcX>CvNYNJk${ z8-1fDWWo*5c{anjU^exht2|eM(QSx__&oC{xkK`RcH#?bYorNzL>3A;*JFYSZtu4> zPoAFEEaAl*>g9U*Hy#e|rAO#ddXyci(BbNB%-m*>smd0yl0B*%QM=e$ZyyJ`s>~rT zs$Ze(A?o5nrgz9z!jyyXoO{Bvm(!Wit1U`(m|;K9%G9DRW;VAZ$*nT4vVI_Z%(icP z4RV&K`%rlq@*V17r9Y|S{@auFJ`jxO28>~k3`QF+PtGeZN6JG<+d*EzfoOa_0t%!V zS%*}_G$p3)SP!X?jzp9oV&>^yXPJ}>y*tz+uYO+dN+yWHCSQX2Kz)Vey6G7^GKI`I zXN$RHd_D{Li|e8PI3!-yoAemuNhqcCkm+^udK}%M9?63_jFVEayOX{Jq`sFS`Q?^8 zPi3I7tVeMvkkdG`LArK>G$7lzJV0Pf#&~+|L>}+2<8?@qbA$!kJgJ;U`ul*<^MlbD03 z1M&qqk$R98;m!2#Uw@Dq`qTO|`eV9FAEUKOdRdGDmf(G&l(@Wpl~I)`%k)Pxgq+NA z_xu#TDA}Ir-OpRw=!3;blAub&F}OmyWN41W0tp ziE-5$`XRd${*++IgZ{2cz$ZA?g*X~kSh2|n|D^eVH6(Rl-|R&@c`#9;Z>@o1yGkfM z0v`~C^eaOK?zhyqVQVz|vH9l)crKVxrGAnhS!tp4GUPkdr>Q;|S}&R9|1r&ySqLkE zii>dmkjilO-00k1(kp6&2dfm6SJ)y=$q#wC;G^MzPKHC|hWI8Jm+MYwpGc!C^yD|Xh~>0d|$t0jNsvAqOzV-Z>>d`Zn3 zWE2%J-h9v+5m14j$cqfhUsn@4r=&7`G|*TtgQ`LivI8H;P=b{Vr}{cfO4kz&8LmIQ zV9j@>aT!)ggYaf7(;_vP!5fRK-uMENEEkAovgnO&wyO72HzUn>B(1HXs*d=RjJ+f9 zkjX|q-zm{J+I?t*c;glbbJ{kJk|UnB2-IccZPePT14R@$XUK4-q>w_uYG2W9_g~J{ z2&(^9*q{x1+*0|e52YWzH=UoVRXNqtWPCtd(>9+t^}Xk|{=I8r{sR+_=%$WaY06d~ zhy*~>B72h=Oay`@SzbgG%v48{;|4eR{k2;2vY*9gdW)hkgb|(+r^F?2d!eVXq{ip= zfl!b|b_)8x298$a7HN|1G2p9S?rUX z#3hM7)c;nZuLp^B>mvGl8>E|F)k^n6fhb#mLceckHHI0Lj=CqgL*yIf#Hx3 zo2KcQr_9&QyvaO((8sL`MkhvvWRL!DU)D6fD z{Z%+WtDlB*F~{)PYku9l&up53&M@PH-SxBPV^Gsd`WSx0`NyzcP#fv6LLJv}SKQ$@ zb?B3O@qDJo8{P&hwMyy}Z;(<;$MaizM(7%cI>n3$T@f&Z44-Bu74~q9vre)hvjce@ z_><$OfmU&ct+YzVOY$RU%bbEUSSJbQ!Fuy2YbU!G$wlz*4g5c35y|m6J>BwkYpksXw7BsZtI{HQ-6E28jzIucu<%&3IXy0!-KKJV@H#{9TGfJLON=fe}98|Krf3 z9=-4(exKV4T+HXT@PCjcKIzgssqz0N^e%c8_3&!q;}^p5{(tpbX=x;W17ZRyp#|Rq zVHjWG4tFB>%sCG`4>`{`3r@#@qmn+YQ0++^+j5h1Y)c(M`NsLR>3J)#JogRP_t$9K zUw7ZJaDEW?65(Msq8o@8SA;2aF*WzlwJXw|%t@Po?=iI0w@Y8yJMyAX?hq1NArpn3 z`os|jZ28r16)K~C%z$#t3~s_l03sNG1rNXez6C*$ug#I9A8K1o*A1M9x)_B#45|h8 zb*Yc|1G-s7gxxe2pQ*hzOZQXU#R0tWQ1vjb=yJQ}y-%`YuiXnZnq`lm7ue0oCsT~9 z6;c{$jnqyyoba~!EwisOVx7`F$_rr-Qek6&q$fc$0jAM~PrpMFGNjnetU zDLU{@`UTq1BI4aYNX9rqVS|VkqbKphXlTTHnLIYw58_A6qO5n};c2x?6}JIgHQLKkE%U7uVVH4ybc?^pHF{8IUW@1X&+?nAG(YeZfC&?Yy`yId z^O{!EJhKh;0r8<8se%|I@x>M2dB%HfExV5RQqg*?9kIn|g-BnJd62CeXnBQk=n;S0 z2Y7E{!yDq>tg6PBwiZPBm5dya*7uD zc^$9k8Chd+L!zkwhF^qEZ<`bY1D%tVix+hE0JB2%X4BH4dv&>Qdp%8=iS zdPuKBNHcTu`4xo^M*jwwHt-8BJ$mUkhfEjzkwwul*dJ!4{R@RZ2-Q3viLR-!VJ>3* zIbGHdL@)r}%>1?a%=v z7YCt!@5({Qj-Qy_cuz@Ov0s$cr(oazstf|T^kAdS+|MaMgl(0>t3 zeIVu(F9^gymSEJ#e?^2*9}P4GU;a_)_4X-iqCVKtB!@%OJ?xsZ-{2q5>hF;q64eSb z-h@42PYUqq*yXH7)Dr|=fK_F@y-U44sx`KFu%hbIan;CJi+zmh4GNOMKZ{T^mc&cZ z3*v2S^R3OA@z(zR@(w=0Y_a(n@ZP-iQn*C^Xz&*KX|!pB6uZ_|;@>03X@jlu6k`>_ zcC#b>*HBUi1{vifeprZ_VtJ-JXE&E0@4m4AjnhB81je``4%+Xn>8k&Fgdt5~000-n zkwO|x6k&xGs3TBt1Wz@S&4U)OcawuuW^j$~Nj z{9qjfIC9&|ZE#75mDH!3lGRAXcglNE`=FjQ;8OSKPzzBHJ9C|DCB2m2M-mPy{*C`_ zKwh02kH4P%^y%``<@7tX|Kw1sTjY~sBo*Tlf%6k~xZ=0{_A9!thmLPT#!_Pc1zA4r zK6c_s*Negv?upsgYwQ0V=VSpuDh{Tx>F&QH|2vivaG~GfqYsAPIKRrF-*hqh7}zu6 zL+(rAKXw0IxCwE_ObADz|BCpHl+6L4hCyEpU!-q`Z_|r_H-KvrKM$`oJFg6-8vzIU zMNJpDN3QBG4A7fOKU7FxQ`CgQ-=-HtPwN;nHJyPe%(wKn^i2vKNc)NMi@hVAOCsOY z*IOTh4%dq8Bqd`Sh7Wow+bozk6kpNwV&x=7`9VALcoc{QX?vRBlNd06Sb^lU9DmRZ z&hp=fS_2~68y3`)x5>rCKaRt|2j5&GP<0mtk^Ue?uJs*r@pDIW$l(>@*npwmSy^-- zJ$YF9x2?Vs!dw5U_ztzJ10~ROj`nl7%dkX-mMx_(Xt&Rra|>6+U2iNS{bz**@efe5 z7!}QXqnFLM;akNB#y8MD;p-zl4<~6cEW(Oku-^&q^Y=G__*V~PM^RrZM9;Mq;wA>* zwSB*rSz69Z{SUbx`S{I4_zT~a1#7|DXp~;vr|i$N-;s5e-vm*<>Mw`Q=JI3B*IM$1 zuw`h1=I95JD&w~+vwZvlSq=#W2Zk==C&YcBWv~Zs8#Gz|Ug&h6bixg-^zuHPd_N>U z2~Ibg3r*4d_4jf9;`9OuKBxN4)9{~{L+yli;^px83Cu6!7Gm&y8259KNcc-${wuPP ziB5N#fAx6K{PD~GR>cP+8-N@1(}S=^KFILByodM(^eZXb6YBnU>N_-zSHDBfK#z7) zxPX5V?pbLI*P2EX!N_ujt?B_Fy_rD?!oUp3=eCT*O$ypYlJga60)kRE7k&uQzpx*O zP%B3D)ktR=#}@zVQ_H>g(|gPJern~DdUD_w+|T_2)a6p{7uWcK(2$?~Nj>myC3YN~ zQxh#baaJ6e{s1J^M)KAZk9pierWz;Xt${>M`QZfdpLh7;wAfqe0sX_!_tnRv0mL^K zK5RoYwnlybjpyYBb9-P1;dy_fxmoL6{oz*wel`p`?{v;YUV^_r2#?U?^mzC*d>;vq zh$H4dDK{#O7aIOi^zZEc=6U`myF~x%O~j=ig&ljNxedW4xP)E2)f&9s@FaN`Y+D^a zp|ZeVt6kfJ`j0l@1V158L?_zA4}81Cf8v$4jBg{1`IaGubwQE|{`IYJmSmr}f&LBI zfb+r20pIXMxB>M_8)k|7#44VzkL}BjGD~-1)kdq>RV|1Z{`l*5`?Wh#o|GWp`n1K} zsb65pE+pTz`AX8=U$UreW<|2sx+7%; zU46zRE#z_mbQgfmOU6l(J{FJc%wL=ey;{YV@uR2Pv{o6vg1sv-`G!SXaCteUE2sU> zEbBHcA;@{+55M`f6F>X*!HwhAMl+c?p8L^y8)}^`B0cV+U`0x~UY+a$RLF$X~_vnki921n|nVebm+_+m^b^l~)owMCIp zN8?bimwKb+!hj;@SK&ApK7)S-&Q<^usH-JIBZt5{mI1EwT#CVX#(JP?G5> zk^_u5n(Z~gagVn&oG|BQ+of26h~a!=w_HqsGtWu;YPpiEqRI5%4T^oyu(AT;2yG~h-VCv z<-@q37L;k)7hcrq=dPry$2&T9{u(XQ%VqaTT^|w&g=$ZtP-&`OA9Ur!cxq6DIzdX? z(ya|W-T{5OeZy3z@$kGi4KvE5n#dDg_lFHV(E%*`l7@1G(^`LtWu!XTAnFVyJlE6n zqz#ive~YdtyEfP+Nw8H1g4bh zSUl7zS;%cVdwILfJ8NCCBf%O3IFO+Coa^isRZWQ#50U~n=&JB9h|y*DYpgrujn>Nq zikuCI0qUKH_VU%M_G6p}%+j-3s;p|ETFg+DjV>4j7Z8tnqDLjL+9H)l06|HL;$Y&u z=krt4^3K{;ZIbCn0H18Ix4mJH85^K7>AFLzTM5JfIY4c@Lv>B1IZO*`F)Kv$*nW-K zqs4R+n3ip3y3_dyP4j@93gW;jq)Z%*BnvP*hioD|@&pf)b!VbfT(GLz%>w~+96i(4 zip24Tb4=tQJ6C;>EQkur>lw0Y19Vuls6lAR^NIaR)HUgItVc-*%`14L{_c?`4h54K zb9{-tKuj=Qy8&Q$aw66Rq6>}UJ3bu7e7!N|;^g9cuh8 z97&p;;MNq$dwG!`V~{NDkVmw>ZHYH}I@?{=b$61#`cC`QMqoddRBbhsa{y;bXiRGl zK`K4F70Y_v(**bo4q5aTRaI^W10l+dl;xFD=t&%)$fDKs=?-mT+RV-j>cdsW)&4G< zk|edjOsHW`HWvx070DQYb3UQ6gZG7gdBZf-!`oS~43LMXO*C>@$j8p0MX#kupY*nu zoZiu2t4GF0ls|+-He3KfA2mespc3c8J2zAbbbQE}Ry>j(J6ZcJi4#-MKpP`?WoU-M z`Dl!s547;!&IK)wRy>YQpo2B=4|d?ySVNmN@j1O;(ok}yf9r(7M($FClsCGTA;-Yz z>=ed&(nl5+LppZlOB<>L=3_{>u^4a{d1M4Ml1Fy?hFNUbA(by}C=(d`4AYD|nz$i> z;*@I?EVn_jUfOspokr83Sict?B6ZKvK6EECW4(^8RCdTU^>S;l#)UH>AB7;JCghYF zah6OHDs-B7(zhh+dZJ(TL0%^ne(wTOTuOIm*r7(L@8)&-6Mv=dw`g`c#a$+j?NWnr zMZo9UFaS;fMpdu_ZZhsuQ@=|adt%5g%#qHDjr5@vt+ESJ;=WL>0a(>hp%Cjk%#5Ty zCAS!&MToq6w^A{kMg5DOf3PdD`1TEVE~MJKB_PNQ?_jFOy`gIEA^IW+rSNVwa%oqC zm7geY8&KGitO$Gx#gJh^mp`9Lc(r!x(A9X06?NJ?TQc3?ylFjkgDd|_zSVv> zkygxwiw*5n4$fqM_Z%f7OcJb{|A_tM=%dA-`)?X7a7-Tj#1!~5W)|oMN*7azZ1b5o z(6Zp>a2aUIeuun7uM7USs&iWgtz*2V-m7ANeh*#Oc}CC4+nc%Wg2Uw6GcohChTZ|G znT)rS9gTEr133mRg6!!)%t%(4fedil$rz^2fMO5e9Obq_Kn_F@P&G5A)(l@y<`#}k zGF^2%kZxZ5o>T1jmyF5yicWk_Y{IEUX5Q+ydH5h})0=L0=M%4zDkFTNn!K#C!X7OAo z3106HhxTjxacE#syedRfF7Va*Hcs}gcsobhn})@L_LT}AA(D<%s}FR&18^o$v@V)S zCbl)PolNWrC+5V)AKT8vw)w}lZQHi~*yf#c&O3G9z3<&xwX2t^diUN{yQ^0B`aXq| zl^`{SJ9<_%^=-?UmOh~NIj?kEsa^gU`~SrrW3{E**D#ACQn^EK3wN& z-yVqHS}qs%)@6_waFo%bVsRX{t`(d4EvVAx=e08gI1IFLx$(wi^mAdcd8uDDFlbi# zGz_$u&8xAX^TyX4y(v+&+|o;s0))=h7nAz>9z89tbBgj*`s%v>_2a)ayq8+}<+n&?f-tsTqp&~JG1%LA%hJvOs? z#XTF%U;cGPmRK4sRVb?MO22}Ku9_YoKI)TK4e_V^e83WSv77$ZhO7k`wcu!pOn}>` z7td=(Yg58E6_Sb>lkG^0fS&unQuoUT1aN&zNIug>?oGDTeQT8NMpTc&H+&>)%qqLr z8=uJ+{j8$Nuzo!>z&gxkV-Ei0&x0^MDWjt3*n)t4P03xQcKQ(k?PreV-(&y{|F)W~ z_LvI(DuE~LMNX60UZsfN(-U+YwN}X@h;JZI!e_@eZ63=&*0H|UcJy((`NiAN92R)B zkoIbeE@t@<(r&0ivH$lb^jO49(1m~;LiLEST>$A{b1t|Whu9qw7rMhf)une&(;TAV z1DxvNkNl~Pj~AX-wM8LAz4G1X8QhJ*+fdXx@ehcZ%mgP}qq6-DMs6D^9k_X5O$oP@ zDy49Qmn`MCBY;}1wREJYVfBxDAIASGkFc#ir=v52JRq!H?p26zQ z*GV%LZV2?s>b%d}k}lXF_YI_oYQ@`+>(|b;VE)lN0e7a_svx(X4m7Zvw$Wm5J5_6? zCyPCxe6_>3mg(&3>-E{K?{+Lh5mR#Z|+G z@g8&>Vf_;kfd+*Feap;TJr#^4K}O{^@ZV8su~kC?gV~cg!0IlJJJnMO7CUSs#vx-} ziJhMCSAY4cbua!dFze-j)H|Q0mjJ$C(~?32w0Aagb}}NDsaepUDo^739IWYgAOvad z%x`d~StvuL6>t-^{W(R*U7LgqZxE@CfS1W}hIApb#_W!Uq{wj8%OMNfS$aW|eXmqd zH2F8q+Nvs9i;Ig;D4&zQkN{^H< zLIJp~a=`Z{no!P5kLZgc?PJcKwuNwZ3w$pgq;U7w@25cBg&AdwnZZRdQvOoON~>nE zJKO|zv#(M*_8T7H#6yosD|&S=0{)duwWOL~(^_bcDVdH(G=vcCjb6?u4+=D5M_12b z`L;?TRhq1l0WcAJ51R@y7bR zi)_%*8nOBDt}N`>3#Ng7;tFb~8JS;^UhVyUS}8ltu0|bG%0ND8N?TKN+;8p3z{{CJ zil|3?pO?MtRl#huDRAE`9?e-BsO|Q13LBD@Noov>I}Uoz#|a-xslTm^^G!=N_`MP3 z;d~F{k-Ti$ki$7MuAYiq?Vilro-D6pRG#|Tdxl)`l-e|9yNqerD*4vAfeWRS+U>3JmF)6BD)T7p40@} z9|hQM6fC`PoUj$66}|_JgGzQYnHH~Iu%|1OM*NgR0-Q5*#~QnoJ6g9`4ql&aY`IF8 zTHeh0V`<;Wd;DuGGsy)YqmdC&)6pZvcM{&9o)R>xt9yPwhDLuA2tqm1j}=zyE+%>n zhUJ;nTYqhIarav6*HNjI-Xb0D9k2(6IjiCZ6`-DY$i9(Zws|MeFM3!K(#l64XQX}L zK4xai5<>=ym$PIV76VIwq2o6wx88?n(L|dhbkCO~9FB_eI_flq@mm5#oF{(qR&k8id zDItfXz$k3HGAo|G|IQ?1#ost~0V@7#5bOA*zQY1=x@dV^)684`b@=V)?q5xTr?ZNFSmqLK&o>>JI3oRFI6XX&x4wAZWRi{ z%OQ?$Mg^Pnu#zTd!i9B(i_5o3?b)0O1}RzEn#b{!$hdHlQw)XrMDjs%t-ghq&8nH} zoI~K7^19xhEwZGup^@?XHG$f9ghGP%|uQIZ{8gku$I$}?QQ13EA%)n37nR{ zhF~uXI!F$za_k-Ea8koHVpwR^>MPPCmeEVgqc{btV-Z8(S2Gz<5BLY~;QYB7TlU`+ zzi;z8?Y)kj1s^K^q-eM+AU$?C!z+>A(J6IOL(c`<4290q`4#rrGv01IpYch=GB|7; zEd*!S+{uB?uA#eZb>-!jpde4SHoqGt-WtG!THKK|b+cki{Ca;E{kWjt-+}vVix-4H z9gCjb$juG^iD6qPLMD>HeOiFxPlcj~^-iPGBWqL$=ciHj(IU8#I5npaSwlWzEnnvh zQee+HBDWMw^C-xn6hUoijD>;ZAyS*k@0TuXk?pr^sF1yEf%PZCE8b82I%YtTUf@BV z7LXW4-`X0>Y*?m{!q+^yB5R$uuexeM27Xz5eC)S;peUF7>GFM{#u0)o!vVczD47og z=Q+(<8$)C#Yy*b6B)@(H#OTRO-{tmM9Qnn-w-6T~^wUHA($9W_$vfk31Y{NXOy5st zf$e0Tx$+c27G($|gMsa+>~?R^fAd;CBN7@|fz`skGP)xY$<@uyoXIk25IQOhRO=n7 z3PRg)_>$LM?I-&cnS{Zq`lm#r(slIym&fpMn>Kuyjta|joz4N4%?N#YGrVf~&%99W zX_WIg`NdLpX;s{Y^G}3+?oU(t;S*imd$aSYmukh z%{}}9#!02KSRKWb);bQMWlO(a_7+??C4U}lb*lo0`2{)2Rjghf-x{S6Vht;84#T9a zbB;lc#VusFmBfM^onB^#EL`Y#O+LnA%|v zo-^FiHC4GgF*0y%Pdqi;4Yc^V!_gIUNF;>yavxalwz3 zpXi>q@fy^XWL=ABPP~L*tJ>b-@0PYzb%`x-pr(mEFmZoy8V77j)G_Z?afqQVFG5Pv z>eq1E{Nbvkm38AA;i}zfm6;ZC9wo3%(ap<^ zQ0!OPZc4$IHSbkU{OxN%^IoQPPjUhOqOSJNlTHAUm8^57^!vp%@#WNe!xu+TUl7B! z7dX@Gnd>an3LV3iedV5Ui~rWK-&q0-`r;*xeZ1}+xvA7je$BpWG;d1L6(TMN-`yyv z1zN2WA=`0Ag}*2T6udkTyldtlf|O(fxAm5wJ-4>W*qKGziK@TyJg%I=CK-0_qx(Ui z^zBJ8IAz*J2UBkT+NYWE<<~miMW~Mu-uk-uONUN^F{oW0Ym{CFr6%@CBBA^$-CE{<|nV0j2- zo_{1F*Z?Wa&3KM1U;dx*tjOQc07aES-A;2wisUkr>m#Xx= z=A&&MSvwi;$JMyr%T-pPH_Fri+WNNOkwS z7T9{5D3e2w_CKgUvNeswEr^(#Osk{SM6C>CRFWAs`){(pM( zkI=R@oYO6`O_@0il@-Y5FoHI39{&-0Q%+HU+fejwBgJk0Tvu)+`a0)RMK$ z2g_Oz_eVz9=gK_$h@Ptc(b<{zbUAH(O?@YFLichew6&=7aLxKA%3sVFvSQ>UWIWU- zsuJg60+Y=f!l6V}J0kE$eV*Sf<7d_f38P^PofBbfX1mg6B?D)vUaT>p6qqfN6`nSS z7C9A+0@eCpEEgy9-s1Z42I!KkL?d41L}}#uWB;9kLR6M5nH$>=i!fs4cux1T89ik= zwZbHsdHI||Y+C)^WM+uBq2ITEA==@lN5=lCzE3DNy_V`vJ8oqtnz4nG_@ngNJ?O;L ziCX48!H1E(y9LQ`VVm!F@-}%NOom-GNNp+>1HNq^o?i#U&t2A*b)0KS27oPsj}VvI zOjR&Kx(HuWCEKhPAPnmuR5pZprNmsBi!AlaLF(^at|T!63`@_|{5~1JtNGr1O`Wm9 z9gLkXylDLvwJ@_G8<4@i9M?MI5yq2x*#~zGv9jrQ#VD$6MNj1!`rTA$ccvguva~8D zvK?BmzK74uUTB&-*(Qp$G*+g!ZpS^sCyIq%iKGr;kbu5_V6e1-0Vx!~wCOkNPgsZ` zlc)20BU&kynb%>w&|o^^-XG1I4V7H{4jrYKzP;a;!xk4lVqR##s36aL6nVaqR~90gAelXriT7ziIOAn02|+PO-v0GIxtJjM6;4Cw?Tv3x3R1>b0k? zq}@0ziLZGgA3}Yka3=rI;Wp{@A2AFWC;+tAS+QEoIgd{$+0;dSmgM7E_a95p8k9Eb znjnnTgrQn>_u;l!fFcFAZFbfB2t{#r5sY;{lsuiFrBGxtkcGh@~2O^E5MEtP}rT$4VB4s*$zCOB+77!sseS!*GxrDT0TI^rtY9<*b zW93<+@<-SR$LXfXs9~j8qeBiz(Spg%PnB~q*3=j4M>Fu^Zzca*MR=&NSzC^tQbPrb z6z5NcN&q@>LKsk!S~Z%IV^hRD!b7i<&y8JI>`0I~2a-gB?M~8NwZF3}JY`h8(+^LP z1}dMKO{hXYAv!py*lMj@3ex+r4{MbCcNaco(ZNRT$<+mOOYoIh^ z+0ArZqiSS2UV{~=R$kkSo(1Af4KOJeTz>F|m%?wjsHADZ{d;vx>HbDk4)+T$_GLT9 zbM_zT=`cBeSU%QAlooz`wNxOfFlo%8uSNz01~n1aw;-nFaZ2Y8joh_z z`s2_=F85q*?H}aHE(re5Z#l0nz zWZLif6r$b^PRG@Fws8=O9pEuG&3P-H*1ImGgMYV-IvTl#1bSSCiPGaPY~Kk8k_z?w z9X5m54OOqy#Wy+F#O00Z4jH3zSl_V*ALR^dnniSIXZch(^W_5=If;q_dPA|pvq|K- z8*r!C_^nB@?+&m>bqgP2#7ZHljwM~wZtA+vm&xYbY*jLYb1uu4U6T*r_`-$Xyj@h8XsE&QgOq2liao$Tw_5%Y| zx!<^Crs^V>y{msIHAmN=eH<$xPkf*mhk^h=!iAXne!>!n@@~mkpWut8%altl?npf&W^zP-q=1M@;Usl|0PZPCA-7b(*2e{ zTL+6GCMoF5J{dwDk9q80`PCg!6K%NHxR3+up(~i+cSK%H%ZQM2T@9j}OHNX{+hU(C z=GzM0Y@hPEdB#z(fATj^=dpXZv(3eQsm4|X*(ti-eiX;$)6f7cE%XbMLo5Gwj0=E& z73t^22|+S4$h~T9LV3zTJ_aqti5BHib5LlZoa$Bc%TY-Ay1z#0d{cH*?tHQG@E!8% z;$swkOIt(x`e%r+d9(i{sx<;S7iz9{u~i<*CU}6v+VDxTlED}AZL?BuOz>XTC34|r z*af^M9tN81Uu+RO>@uM0s00&!E>@e1QcOWR+Woz(&ax@JqzPHdFZb`fa50cUV&_$U z2jVIWo~gtxhS2=HzrtKqA{?8sEp*!-wJn3&^8k)&P1i1PkdNxA^Xx+%9)NnlY_nU~ z`k397;aTi8&=}XCExcLT;^NQBkPJ|=d&o<+3}1119^YmhB!I>cru0&rS2HqM2ep}Y zvnqhbw>G(1UOC#NW}8$hmC3@3HkNc%U#C}|!K#pP>dXoKbkN=&0@=a5$21rQFD+1ub5XtTZo zRvjZ!`9?DPo^gWu#h0v`(TNdFHc@B>-KD+LtWI|dMuF>QHFv`A#nDN~kmw?+Pq1$9 z4{9(KwdP2vvj`|s>AFPd%<1f1EuIn{@Fk|pvY{^^C? z#AsT;8u4|$!No(L!o#W;qlwDriXcs0nr-u#1sK-ZB~iVS)$)VzN7lH9zQscCc2g=+ z*0Md7#Uvlz{zbg_#c%ONW+1z!Vx4?mrs&%vpPD~js`7;Bd0nu!CSj;1@ z;^s_WrHzAp!hH;+GOhEhNBi37VRfGOd(P*ocHTCsw| z#QAJFf_d`c zXIPmf;w?^r=^f;qbF*IEqLzkCJYc)C*I0fH0ZfUWlu2!E%X z^jS^Xh%J=Z1Dj)PO7L<@x!|)BiAUQor7Vr)a87 z0^;UrreKiZ+>-$l2xytft?^mTXU^)nB7D}qr^TMl=>D~H8GDj+RCgNV@q2Fdv3M4h z*XQ>|SG^UpYua|W104VrXs#pCM13pRDP8!e#LitpEA7{C&z!-v%gpR0@!u55-(oY7 zKlBOBa0>U{^COeX1Pe|5D0}r2;qNZ>-im+pp->n2seAM3zZsL7 zJL$cc4O&@$LC|xj#dJ&=dFhQ8h_9K$PnqoAdD=#-WdDR|*qq?vqiNB0cN*F3f-;uI zQ)#)h@a-_M9(l7Q2PbTR{3ej?mlh!|(=c_7+qlNUUPd5R1d_V)f#6uQh{?!LQrTZ* zCaly}%hSQ&LQ7`&8EneG1&&`U#rX<)XBKCR)N zL|1Saf0885DP7_t&A_z{!v2RsbyK|5J$`B1et{K)8!q4XlNL0#e>0-yp(`lC`p z7bO=F)xMSV`7rtn?Py zJtwf9VP#IfoCaA*PjsWlskYa+OhoT%c05B9$p7g!US?9&n(1^CVRWX)FJ*07nL*;0 zV;)q(cBku9Dn&eYixk4*5wZQ1Qj904J|!flt~}EMxAiw{@r(SSYseO5k~>md3orQ1 zOBz4SSP{jhnmIV!$2M4E2=`I~mLo1Moti}`dCfvYr!)J4lr5>s`d1UW`xIA-BEkrq zTOuIuWL7nMmK6h~Z3}$W`y_Xlpb0h4U)(MVUEt$MU3A`YpyUL}oDVl6aq2t4* z<|wyfhNr3ET8X}$A|jycATMLT6rr`Jxn#4HZQYcu3!d}Wsa&~=h8=G$CV>kd-f!cb z{a|s6oRvJayp`5me$0BF%dMRk_NTvkx&j*VrAI10O8hpnM6J(mR&C-It#e~gVilD<$Bn&s;sSo zJ^g1!ajVbbgo@a)GlLbX$ivW!fA_ZsW6JayeF<}0Fgkd7a0 zYOjtjhadhR$faOPzNsI+UZ6{!O^arge>MmfGn}ZX(9h4?3P*f!*r0uM@*juiXu@~n zNBWT=IGes_L*mwTxy6cP`R_M`1a~1qqau9nfgXl1MSF(5%ZLxO>QPn61)DShxW~!P zcx}d;coiNk5m+ zFy&5EgXgaNv93Xi^vazMiY%}JbQ3Bwa#&2U#|DMisDsHv@FxggozPnqjwTcXINQe45D@ zbN6$xYB_naq|tOugeT+xW3iJP5e|N+)IIq`G>sZ-iZily`!Ke4ts^7yPvU}{B2kNt zwiWK)9jp_!N;w>%I?8AN!Y_b6T1sg3dxxN7p>Oq7lJ~5-Y1cKsh=7^Jv5d+7Ahwye zchPX@g?p<*t1FWqlA?9Az36fc@zz`iP2YmzdB|xBIx3LnzCaYLwBp%G(1BX*GXW6D8C2XXU+ZPxDF`I0 zKk&M#rgtX+X5a;K%(`$)z5-VgKVjN+}w^sr6ELH(tXm$Ms;2 zU$6_)#`V0;*xOySUS^bi6T<0#1Q;}BWJ=TU+igNrz&tU~=Qc=ieVcax@^8$V$s>Hf zQ1A7Vzc-kitYNiYw7Cm`<%#J{xVQ;|gya;a)rJI%bf1~;uZMtuI{I6gRjQ0wu_XLn zw=|~_AR5JN-qnVCX&rojgbN`T31jxfu@)@*w@bwS8$U};C8fto`5nJE2*JCy5wWb6 z>bU86gy@X%>w!#5XuMVO~Bu?V555Z|+vV{P*wh<(1+ z2n5bA6uxYNC1OeSvaY`?A`Kf&K!b82{s~K&2g^R7Fp=vUk7JPSgHGsJkx#NdPG3yA zgzLgq2nDm26zX667JXrxhGR3>Vw7LE^qD#==CJMmm^rD`N0~9EtWg@ zISXy9hrcuOMLh`KX?ZD+7?~G^2&6ENG^vu#pz*uT-i#QQ;dNmWP8IRD&tA$@Ko4MK z+wKq7qSad_)`!g>9-F>y6%456#%ngqK%@j*zRt_ngbK0WWi``T7GXJr;`bn;0Fm+E9i83q$(O8-0*Dn}UN$2CCQ*ExW>J1R}RR}YN&H9=Q_Pb4=k-~UFdDdwyIFAF;|512%2nCpJ81s)ORl<$Ncw)+W&~$mIY00jLbw--{_9T zX1Pg*8|HOL)?%XN4o8cD*$QPD^BPta$X$BbvzdFFCirCe+o#CfGif{@qv$1R+m^c2 zMT<%84kt{Q1rlCYKvZ{j^C_C8WoRY*63l2K;(FPY&;C51BJ^Jf5%7K$YB|g=7JRpF z+5g-}<_AOeet#Aj-t7w*7JNYQ+E)|!2t&Vq8d<{c?FjYHW-^2PD!EF|Lt*243*r1% zuvk7m8~?hn`~-dEaxQBQM4$?EDBtaFrXy3a5&5Xx?V{=kc7=Sxf5r$zG|3YZPi1;t z=f(sNzxfRQAoB=pUaZ{?G5+wIACAlZd@e>B<&`7wJ&yIljy8rGc9odgd%kX8Zn=!@ zND#Nu{1M{*qcQ)5rSye_U2x0XA4CjOU3YoCRf!n>IRN+7jIPGrBVhl2KWtE4L?Mf8 z;gOMTIlnda#rx~ZlG-f8)X!&;jkFgU(KQqCW8f|P4eFBB3+ndW#fSHNx84hj^5q9G0ROrD zxqV0cGvXb4ck-Rl2OHyg^WDY`zB^>y>=r-2J--GrpIm-51yT-N1 z+r4R3+|E{SMpNxso>|Dp9#?i)nK4|*O7fZW5?@o?`}DgYhVxnPDY93aC+^~`#QLHb zAN(&LqN_V@P{b6`nf&MZ*WsD3Y?qaXEX+!;6ElJ1BHV|H?)sO1Q9HXT={8_?f22_q zZL?UgRnfOy-j~!&NMc>yDV?Oy7WtHpioJ96eM!kzYX*3}_{YpcVjfM`^5j!d3$OHK zn%$%C4dfhA#Bj?bIOolw`o`fctE*?*u_F7*Rej}1eD0{~*<@ubGFyh$N%13SCZD5| zVEKc^2*U$IL$*x8wh z|9de1bN`q8e?9*l`F~&kBkTWP{_m0h$p5eC|C#e2_W$_)NBiIB_kZ*M*F643|BpxI z`ggzoTn#=xCUJ8sCx8Q!xRrquKonqPYYbqL0oa&2nGv&aaC371KL!gi+fOb|4gmo~ zxc^>Vx6FeqSnb5c&#!6sBiu)TD2mfhC@MQ4Q6XgqgI^>IGQUdwW`1F@C{kgwzD>-W|)$a4vF z4&s}hD91644BE?M9d3!-fY-o=8V&&UaGJdb4iwn$yJ}D1bmLv-#p4bxZu-^{nN$Fy zSmtNrx$CV2b??`_h8RCGSsYE=9~N-;-wuBm(bHts?g%K;fs&vr2y~`a3UnhMD$1$p z7(W2+GPt)quv684Lwr?xY)5>FH$W;Asm>2;yilJAPtcJMlVugOG(Ja57qXF?HBq%A zldR#@RhJf5JTSX*1I?4v87doBwSGa=Ot+@!rdZR|Z&i2K=8=_j0D66AXO+o{WZyb1 zoOS9;)AFveiZ8ZKM8~s?JzBBdp{Rg@~M> zA#ZNhlf!ot6BD1K#v@~6Tv-c`ge1#}X+8M+n`O4mf)gMs5hvb+>=VFh<9kL_?t2sF zAE<3%Svo_ne$y+mhZvLclG2eZZoeB;IPe}11sIdRDZK)*NM$|w`n>AUUCV*K;0w!cNc)rvfo34}SlD3yw=ZFhKil`o2r0B{r_>D?GTadzCcs1f>4h(U zI^&OjJxDIJG3`Tia?b4y4;S|`p)42vk5<@Uhb}h@ooWWop+e~_b)ElcEd@-Kv25!o zI6oviGCH_2+%(XhZ-i#F7Y5bV_I$QS-1@$3s-1jzq9l{H4_vJ89op?l&M`*2S&^o| zyJVS`nUPw?$G@oTzRA{;XW1`{NnrEgX61Hntt*Vz;k*8H8~;^m&b3{RRy2$qUC(X?I z(RAu?-2@;&!oM>&G!x@ulC5o}HJgzM`waV;oo? z4S4i%fR2%J0erx^%mTmBamm7i_4t-1P|RapfcCoX46v2QcjB?m??h-f(n_(c&=qoo zahLLd9Is9C)>KZfR%f*iiN=03(85DKEL?H#=s|KC`H#HJle9vysrA^Cg+uBq=y{Ki zYKUkE2ixe-x}=4337hfMT`4*&im)Z2?~Bg7UmhaCn|??ks3PA1e!va;?oF0SP|?)c z1;g8+&?;iLZgVm7ieW$eW+7t9CV;x{gEx0$2oJhNeK;=JH9^AF^4WwNikvh?mD3V` z>6B`oc3oosqIe3IhA@(A>n-C+0Cd4Aq9l1%^yX}SMQSZuzY-AF?9dEv`H)fRIi+3? zz_S%pITA(||8ahuso>+9#O2pHI|+6L`#~H^BYxBP?jNwodYhTR1w`ydkBzs`Fy{`(BrY-j_%` z`RW3a$`)8Pm#(|{Xz}P*d$-A#24D8sp#c1w{C*hE#Dj1}NU4L&doY(fm?h(I=0Q^Y zuNl4IDCm>`Okx~jL7O-OEZkra^OkgWuAG8LewJ_XvrK|hgM2rm`F4}mdY=Lm=MX$o z{6t*G?qP8W$x6&j+767>p!pc_sa^P&VpzJZH;fUZRl!^MdrK;O_pj#UY0QoXbcq1 zujbFtX=m~e;EGk~8S0>026DNE-aRvJI{Jw7JxTLa?jP3p&YbZ3jZ}xPji##IB*xSR z!}!h;?a}{j9T<{cz%@=S$mgC#+UJj?UT?kqkt%#~@Ht6mt5RJ0N@KaKkvo*EPDSf9 z?cVUgT0@(V3N5erv0;ex7;7pkZcW`6^_cXQvtH@>xb|>yn=vcLE4(P3WW8g)H}WJp zue*qshNL!9?7NyXfJbhKPw2oSt^)s z!L~nh##=tkiX+y2>cCgJ3Ljt%ntX%bPn_fy z=?-hfCSN~vc8BWY-_YVY9{$2;?|2o9JVTt@oNX`luJcxMp62URZvuTd0ME$Y$i}6` zJ`D*77*sOtr;;vJ0^NLe_s;$6>)gKd&*{=UGMvJtMq`vHs}X#wZ?$hAE~vRlc*U@A3i7Jrb+ctWMLont`6v z^W5=lSz9S3tidCeacUh8b?0{-w`C~+rGGGe)(_cYd~XpfE%H@Dkc1n&owth9-|{9p z0lw6_kHV3~Arz!vFS-cH-X*U>kx%$e({HA=O~%LB4`Pv58V!2Xn%&)QqD{RX8ePVl zcLG`s!&122)!4JTJCPQVDvFdZ2AhNr;|#MVUt$!?{mxy zKqjQena7C$@w#i;@U)v6rw-{O){T#4j&Y`%=LXZ8NBen#?SZ%IxPwg6!;1U|O}1^C zv;b)DB|M+-9R05k`xa679q$o&w@mR}PK+0&hx}S6%wxISl{7P@TZ<^4X=l-Vy;G=Y z2lORbE%?pWd@lZ5sl6`xcH~3BdnDgri@R?2zMKLd<&;FXvd4{$(7~txIB@i5Ox%s{ zb@==``tOP`hH52X`=Be|t=Eb_YaRZy69boR#LJ0~!XwpCxm%1yVH$3u6?s5ZGAY}un2`-k3S6d%Ug zwb+8C^LOc~bX-QauxvJqZ?=rt&0Deh+pJse<*xjv6_+3M^OrAD9U&|V?ih{zrTEiK z;%Onu?osv}s0CT!_KY_s2e=aFe^=&bDk0{0bG*5E!c%;L9WtX^ke*$~XBE31k&a8e zjCgmf{u#W$&rDA+hrVA562aTL;xe%2#p&pHx#&5giz3B95k;j&fM`19%$5D&8T= zG%9@Uq0W?&0?6SHapTOjCaSZpGDBY8dPUf0KZ(0fw zHR&Uo5BY3HcIH7^n!qoWC&N|0es7Rwy<1?L&Dz1P#5ed0{Rd%I^z&e;s=#vH)#;uX*I~-KXhBjeeIm1X^wF0E{YahF0_4 zF$?)&-ww2XGF3cG{(~GR+Z0W+`7qyL0%z&k|Kh9C>MB<5{xZ7~U|9X?d%idUlwT1K z2y8!`&$SE2KLWjpKA8+NOKV+Qw?E;{yZA&_a#;&xZyWCP%)6HaPF}F*KcP{!2P5jE z>;;;R*+ZdPyqTk%=;wqclqb+U71iUEFS z7jTpDWDm1gw<$zZjp{7BlTCTcHtf6OqPOT=o@WdvTnn2x)^F);M*#Jg^)wGHH{M>$@y_V*;)zG!PQtgYlJm^4e{6?plkXgk1#;<+8g z!w(U0L;Kj|Rn2TK=dKwXW-8_Uso-d8tEJgL;0Wwa;Z<%bblVzDHqyu({6~Mo9}zRl z@7ZuVtgWH=ehz`f7YEWQNc7#9JGNTNkP?N0uI%h=tI zF&!YjzWcKH+;xe!={EI17;}mBnnganPv%BlwYSaF743Yz`?a$yaaZC=cGe&5f^ioK z%(%W%+^7BgXa@q$(NxD@Yn@yKq}-Ynr>^)c?3(GnXqsN1Y*YHqL|iGx_lqguvAgj- zIFry3KoP6j3&b%=dmiZOLqq+Ik>1m{8gTbEO)v&&6_J3pO{-c|x(pqo0VV*eoh(30 z!*+u-2yn~0B3}`tRx|0v+@6S81py8LIlIUK&#G0y>sfQl7KMpXQSG5!Wqt1F(5jyidL9nL{_ zjQ3>Mk>{Y`Bd^|g`U_!K2nAj(ng+5!a?IYo8x+w{$lWYoh~Aj#j%H8${u=VmlftOE=KTz9J+pK+*x}<1-by7jnFSC5s^VH(Gr!TmWVyhPOgFTf;fP3hmVb zPs#_gJzZzm<@(#2>P6UPemiKb%o^UBZo`J`Nq->+`)oVC2*ccjbW*#TO+!2#vguHw ze{#9jt$@_}|;%vY)It68A>-IsGLEnokpBE7t- zqAHy`-{BQ=p0+4CB}uN;>$ae89PT#D9i8gn$T+6k?W`qYC>l4**}!C|B<{pKdI7aR z1hJDCFtT$8P4OswyiUjX$mbbzTyia!JhRgS<}0X2RBtoKb(Uc;O<&i#7q0D*$xlZo z)N(O+P5pRF?XbF&w8{}Zu4*h=-x&3;m-yC&g#crI7B4}|s(+;>u&W87eX`LuadT+^ z-XXIlC8iFvt{sBlnQ3rlA54Z5or+UX0}xe-tZQ|PW9U!fF3w}jH^aEKgh9YDC(^)* zok?N;{=E+&@bDB+ZL`xUZr4BK0nnbwVd>Vl*Gi$`$ZuIl%G#g60Yrt3a=x!fkTIo9 zOQclCrC6s^vuc@G9=Eq&Uddu{6jBS6Ea+-#A3LRm$CXSaDylUPEYfT(t_zGUzNJ^^ zDIES((|apHQ0pK_i<-3V{N|cw#NeW}$r9YRj{LE*RIL8`JfIoPVh~Q-#DU(IG^xOm zi1H8-h6>PFI?z|%v0FUi8nrf3u85k5!vS8+g^P~ZV4}rQ30tU0Sl*I)Mjn?%lD=>j zla8tI4kpiFSK7W0?ji4Tq|j8ENl6CH?A7tSl_)OQpl2}-Q{zM%YAi+C){g@Yky90!=AXNsexO<)Sl4Teh@)2bN`Q7bZPiR;W#)8>qfXYX(H z#%}^S5gIg_DUJLOBOF@VB1MY_01MpG<2tiqOG=T`4jJ=!`wj>BFSi#{bzZa5E)nHV zpcb7-``=`kr%Q}Y;q(@l_-Ce$7OKi!#|&;OKkaLakwyMOZUVT(Vv3OKxm%M;%~~QN zNh&yAdJMt|(kT38NKZ!SEoFQVyr&e%%jpximkvQmDUgJffV?*+b+&K@xP|p(MUJuZ zzt%t5$HB)oMej$37} z@+zY~33z)2IEs7B|FMTsps%-3d%?77(?j)+nMl-TV@Bxg98y|jxDh%D$D7t^^4S5S}GAYm?y|m4%>%@LF+oodlxKu2e97w@9GK@ z19@?&ooVV|7ioho8H33hY8wYtSxU=0b_*(|7%JL-)09k!bTmzYJ3ma=%M#AC^*hBA zDI=o}ji`#q=%NZaS6=2ap`i76a(i0)`%L{0OD8so%HUJyYf6RrKh487Njfz{U- zNEvpanGUToIY`66@QG1d2+;$$c=5sUyxrS%g zcVyj~;hWM!k0POBf#=36nyIJ~Kq$)e4OD%XXYO|bA(#K9&rEB?z21$r zzkH5}SCqQzJ6AQA%noBoES9Ls&x=j3xd)`_YT_+KDtyz*0Uh9=CF0=sC_2M}2;?Ea zA4qXbmsA9;Qk=NuBeez0!-$7K=icK%B3Ju2n4`IKG6h2*vD)8G`b|tTdF;>9KEb4S z@4#-RBk-%f*;PZ_D!Mu&^PwLOKk15BP0=*g(l;){rc%9^L|#}@b%JtwPHtm{T}-11 z0^G3S!gsuL%bN<+69W#p#Wc59W+`4$OKC+!aW8IvBYpma9vESp3&jl;rNFTqbmII2i!ccq@qh#&kV=(15 z=z(~noG=tjwPYm)BjQHJ;3k~bMBh|zY5Ey+hLdo=s{@G97k>^yK|!~?zmbT%!}8Vu z)q^H#yIx-Yxb-)RiuZIfRQ*b%o+_sSfHxrdi$~P$*usX>=3Sz$9menihf_YDh(QaqxM8|F+HF}=Kor$gs%){?We5;aUm+`Q^vpuFYc_)fGz zpeG$JH$K_vU;$^At2=FLc6HQ1*j}&S7j%7kD=pkCYAFnXf5X3-MsjKeukc+M;Wb3; zBOV4XMb#R~&l*B0w6)txSkJVT*LmnP zv(mtS{4d7tDLB)h+ZT8+u_m_d_|yob~+8rK->Vqy|sgoTi~%iXjMUrd&w zq*Qc_UF7=>CBies+%nw`=CqaRvgLNh*;sny(lRjdLZGmt&fBaT?ZjSNNbuCX;#ep~ zqSv9DQxA`<88LK(*q13g`#8JMR-=e#9NouTJACWaQZdulP3lV9hG0dj87Imntp=;y z+@848nH^Y=vDZhV_#~jeDh{uP%?=dx7-p8t#5ZKoRxR-pv)KmY+I1lv3hCz^HQ!1i zWu$UB8x&}wC1hMH;3^x7myBTYLbpZYY1{vW5~r>u+KOvn>g)@X0x@cQO+}GEO~gdA zzzSGd4sa=6N4$N3y%nC(mlj9SNyprJ>on2K`Jql`T{1Cv!GcO&c{+Ql7b#Ld;>91h znJTJCnx~89TqmxX7veFbsw)Pmuzi7iqBFJOnmw$WJ`OalZur?gndk`FeVSQvoe|`g zf9c|jt)Go8HtlpdJoPG(C$tLMtS^2(Go+PPht&w-WSeiC5Dt(2Qe!i7G-I-IYz1hi z%vS*gQk;abzg0(WCSK&QM^l|-v3qF_G0pJQMv%?$)JJ9~UZk)iQtk=?{)!_4DRSi5GEf1KLAcGv9I`V~Ud^c0TPPiz!S|OmlJ$O6*BG&z z&@2Y(q&md_?A1r4&8iefoXwV~4=K#()J9Ouw#tCR6Kz7+F)8(m*sHXM+-7u&Bh3>T zwLq_lHf?N|RAve6#uTMOU}cI@F)(u?<2O*+OjctAeIla-2x}&*Cf*L1NKvW)qL|4l ziPr<7Q=HlX6tpHu0N7NgOu&8WzUoNvM4KqK*F;7cP}l5;ic|;tFx9CZFhXqt1c=j; zs$ic@NUDU3V3$u|)c}7_NGgr|G=okdE9J5;GOG247Yiz7t>#&poBmkA zpnIvHPCQJYNIyuUlpLW`q92oE6k}MWQ_9W6FnE>-Rx-SF33ieZGcvNpE68Rfq;m(| zF(d9A}7K2(f84Bh#PVAReWbT)dbVR)F{O@#uxlwe377El-N@#Hq9#J}gJ z@U@HCyP2tRuFt|ZyLD)H&k0jrM;NLh#oR45?D3g6QTsL6^H7u5i!yO$uaIL^z+oIN zci;x#_wAD~lFXBm$pTpOP?5NV?iG@DrTj^`tKv2!N0>>4WD=5y$se-!sN=** zgi`)QuTbO9WV?yJGWQ7Mt7Ha=zEbyMCauH?A!PZgyS}3wMkvV_keL3GRCAX!F%3u{iIXLJo0ss zhU_Ekq*AFo60amv%yIc7d@{LMC94EV$(CeZ`G#0iS}9T~@TeG5xN-BOQAq-Fuh1i- zqzFm&L|(~;#3Rt8cd2XA_VgoyB<~~yvJ0te3M~m|%yI1`HnK{oYqIu`Blo0DN%h1n zX=k|cX);S8 zQ~q%_vIof;icA?mgem(tT@p@GrDP3RIYl{1rW7EgD9Mz29Gz@Yl18#5DJ3~2F(oM_ zL6(@Bf|{f~#}qJ5mL!}sDG4iiFO?IkcTbd_q$`{x0Z23jlVXrUlwu$EOp1{jNz@eu zq?y8xTPD#Z!ASw60FvT~Kr$&xDN?6|q9h;@wmbl6N;M8aMolJyLs}vTnVh2lh%_aV zayAiVCyO!#@p__^B0G~vn!}1C&wVD3a=65UI}=&xWR|TO@0RpV8cG~W%8@XpJQG3@ zF}As6g+`HcW{`RopQIY&B2!PYC)1NEkR*}nNDxg3O?IV#Aw3f{CWax0A%>xVv4+Qo zgCT<e|OJfN-y+`%}o&)mzqc>!?&&d(~1J)l@q957u zpF`yj7SR{K2X%{d%lCrl3)cf`5K`Nhz6a;{(-)B+JUxcdi#=z5l{aZXwj7!kCgCEKoL<)h==irJReTdOkBoPQmF=V1AUdH&YHsxOsFj zgMV>9*)uhq-eF7`SxH*jcigx%d=Jau!tU5ZvisuRw=CtmZ_+ozh}$3 zyy~GaOL$R%-ZE&kW^cqIIv(%AEnGQhGq?-=&8u5`HFZJXHQw!0S?usFTUHoB-XUS` zMioGjQVYNSvw?i5iTo(@=T&CHst7+EGncS?KxUhj%NMPL6(TIQdls2Cb8LC!c8a_ne`yyR3SJY- zK=2XicHE{w^IIrR&@UK&sNSH%J**e>53smk(mkdZ?2jLaL4^HCdxU#FxA-p@zTiEO z20@Jdn0rJoc)pPQ5Kw)TdnUKYFF$=j{>8uqdGzz_+1+AzqIQAmL%sL&-{N?pcY*0c zG6rGx!|i$1Ve&%dLHP99-@<$1@IvJM=~p{6T!U;07M}yrHH7no=Y=Hz zMGA@NqqrqngC+oZ4)N*d`$b?2;t}Gu$54k+1zOpkyNBq=(}qw5)*7tSH*H9+37Qtd z?TD%gn+C!Q_b|K%8Axf=^HUe81{|)h+3J9sa0*-_q#Z;d*B7G=#-}AoVXSw4lj;QzjU+;7LJ#$6%R$av~(zAMHd?rXhEG#!QGn&_W>! zNhqs6Q00C{CbXhJVnZfQLU=H6A%G+pxL`m6^jrPHcW&l;{@c;F1LofDC)UNk5>Sm~ zI22ns${2MT1Rggys}G*9;oFk>q`%*p0z+_GGL{>sZIOJ!@K5-=c)sI57G`bHd$U}= zmz=xbkvtq8=F@6*VEst8`A?p=_e%(=r@AsAno~OCTXlEue|NTi>B{`heog80=|r+Y zJSlaaJ8fio`Lt)?U}Sdy^hS9{?Y;wc|9%VCkNiepurRZXpcN#q4994YvTeBA?6P(z z-!Q?*9z3bHS@UfPoHlj&y;|Q}FLo*3f_9DbqWI^pEwOJpR!-3vMTVk4MW}t%(ON^X6ME zkIBbhTky9h0(ZtvLcX3TKA=B^K>RG}rBX{6Ti>_-paqK$<~L#k#uF6JxWBmslRTIw z)=l_4ASZtCTxo;`s7@nVIc?ggg}zZaLN)rg3WE*JO~p# zD?N+15k!0vS$`+kCXW6*5Uy0*8jW4c?iy&xLD?8`Z_vAxdO|Z-M%@IJ(M{c5w5@eb zwn6eifP%6sSkNWO<^0+Xs0&vUx&ykNfQN z5B;9KWa1f!D~y*fsFgQ9iT0NkRB#){ixYDw{;)8=4>)!iE!W>b`^tLqafxxbpb3BN zonY1dKw0<0Dy&VYwxkaBNKgpK7kq$un5CQJjfBXk-thk??6%BmOzl$Y#yb*Pq&H9p zs0Xh6t-w3{7qR!djyq~shsOy$59JI{sxpdLhikqAnNAhfLs^qSW>E3Pa1&&t%p6urLxE{~!jv4>nvvuVQRr`i&n6*o2}|#> zab^mO#u~ZSVuk4i8NZs2_F1t-{HLA8l@tU^t7!b6cvf(5F>&xPQ9yPb1z)?Cuf!6C zSCj51Jz;(A*}s&SjlDd(4dy}!c7^BRo?U3adjZc*R|Q7 ztk<)d)2pKbeYH^$oHO&X6yN$^{ItyoIQ$*83R&w_E;Xd!tSZjNl!uIL*yuTEX#g^= zd1tC94>?)+Ym8<_tNx^JpLW$IZIeKAQyirzSwH!4`U*RDBkJ*}J1NU1tWN3?)6?-> zIg0Mv^~o#&#;tzb0D4}>Ko5u41zSF^_?Aj8kq`d>kOpj1V3M48ZU+?9>nj9gXt*07 z6YH5dBHSI3<3g!NDQj>SdE@A-Z=P+*;9*(Lzek9lKgcLIIAj3~42;VnsKCB~YFyli zL?t{@!MT(v4}iBEvyq)bQh{xj7>IJ8pk&1z6{y{HH=}acL1Y zwd?HN@;zjSUnFMZIdm#R-n`7EjH$G6e9r{<6`PHY&rYqR-NxeHl?CPH?0=0nK|K)V z4A%)OU4;Jsy@@b5nSYeO#|r;+uG%Eo7)Yg zB0A1Pnnr#uN8~K2o1mIv>y`Y1r;T*%)-44g50fxf}W@t)-gP zUSbV+U*T$e(b-3qE5K+}iWw}yaA&xfHjmb}MSzVql=FRG`4hS0jT$h3j66^2y(^u2 z;hq1mbD4(!VV`j0B*Q>)?h_$uhC+!i>=t*~jK>+NMPrRtEOApQdTD>!t+h_iL-n#f zK@@Z2q^hWT5%#jQLXCZWh)YTX5XQw5`io<#`d4@Q*>`H1fHWk3ywux*ZBx~@w>@`d`=0^qNBp6C6BzZsJxX~UUO5aI3u2c$x6B0=!$Bcr6bIK56t$Tt*dSK)iZ6Ad78ykq)pm&=#L3$-h?Mk+O!Ji}t44=bNg!Q@B9 zH&f;<;p#`(@e!lSY*NR?kKHqCRvKllHP5n!^kj`0V7i>A$f4AxKLt7&N#Czkp7|O# z)SbWWzIDxOECgN(!3CB{q~$K51`&|1Mrx&+JKr;Je)3{~5Qqgv5a< z`UYyPtx9QOMTgarD$XZgU8scg8Kfny=Pa8+I)#-=G%9Fnk~SX4#a|H{Yb?D);+{!T^#MqLri3)V>XZRh`15WAEc0-G>cbZX+fk77_qLYyz>M-$jQ9p#i;;i~2v+F%Vgo7>nF_1Hr z;HL)31dJ(2`IUCH-YJu{=C;@%z;)cdy)RC_Bsu}jReAJRClyZG`aJpkj5b5$Wd?N$ zbTUV#*VCn_J{u}FZecpzkwmBIPM7D4BLPwZ0e6;dy=*xebeq&LXiQ@b=klvm!>91M z^I!73?q4cvVFqw&2zl3jhlLzv`iSU@=2`^{3<;MXqO$VM27#i18U`r+-3*Z(p=b_c z6~CGDoCy`c|5OcD(n@vx@T*5n8F@75{p}g{OR~dAfDGeMQYStE}T#^a#of z`qtd}E)bcarP^XfR@F$@1Z5AUATjI!SrSq()HXT@;*6pXBr8w_52BzgC3LRo$+Z19 zOb)-sd8Le(Te)Dk+}>$^Fuau|T%u9m{+tmm^5Bv4nR4zK!{{tiewQ#$> zCz7FoTg@nGx$QBy!i|&JaFaX}G#d0;s?jpx7qdA*yah!ohYMDD%$v!yY4Z4AegOgx z51k3Tw2g#;tkjkW)AF%aK)6t@w4{kL6?H7g`u&V~_5%I5@f12Z!TYE)jDqOGqPRrW z;Hjyyme|N9lFU@<&cbqeibL7^y23{1vv5V*-!3OrR)|LZTQo9)wz_J9B_|Wb=nV2U zOS~5D8812Ol{De4s0`VC_RuHCe?SM0mAdm7Cy!zNKuX{!hB2lyvZo{8+7hUC@qQX- zT?nr@50ZC31$!;!SIqW^h~U>#;P{pzjni;SXZcvZyUvGz4NopEFh*Nx)xa6AICt+S zm2rp!vd24Boi8;ykQo!8$E0-<3vO!B?@|$7iS)sEI}h4Cr&=^A7xwJsJ;`Bt)(*{*oLgqA;JI8ni;P z*hx{pmX~%;FD*_#r64~}bLnIZs5oHU%}LQo#0W@HPn2n}(52b#V|Nk|6IYdd;j&#~ zIzsK_cH^-IVF--y7#nzn?hgMw-ZlnHs>YSLY~%t+CkqgJPsBH}+MJtQKzJb<{Ok94 zH`-mjAV2=VWA!(NDI9hrgV_lf1lrvJ+8mD?qDwgfQoaPCVwZ{m+`KDa$d22>_P;Va zW4|t4$l4c(?0x?%HL&)B5crGx!LY*)0CVn|V82s?k8dr+XjAY13-8?x-VB0tQHAvoz&s+A}p$6u(R?0Dm25n%~A<6J7HF8X1GOh68BScwR!2C6`T^ z$1)B0o{<3;8qyhe;83d5cZ@M_DH8r5()5JTo|NF<-DCxBQZ&J0MIw~Q zTS|IroUU)X*AOw*Mw@)v<|UgLWKw>vcqueI>a#t`l%5>2uJTcH+JQRiuf7LA!rOcP zKFue%ekHI^$n!c|iIDsOv*%Xfdt#E|RTKX{YTw3mxE(<+{6=IB?)~tvHMyn0nKlW? z>7$bt-fw8Y#dUfS0 z4#qB1PK|ImXJ@R^Zf)~^s>;b+B(&IqLrq^+$?PZR;ZQq~tISMmrS3AdDAeux4Z)jj zly4!j)VOTfGSv?Ijj3Dr(hJ5N%9@~CGe9TQ^@b+)+U)^dJ34j+LMx)}C+&1$`!d`s z8$c6c6%5|0L(?xn*?SbQsR?Bxjj92;uF8I2z_|Fjo+`rMSuz=6)$tuEqH)a|wW!EH zF?a@ANjthf0d*iflby{n<6`vBaS-!Mb4hb?Jv(XbIwf5@<{5|b6V=!uVwFcZjGZPw z6?0)pchV0tx7RrZ|du=bd~s@=+E zqXfTsuFcC;aT%pObfgw!QG&}2sF$X|PV9t;0+!ScgQpq zdM3dtr|tf;lwg2(Z-=r{0a5P;A-7mbX7cV-w5*2sZ+!F|NHRu*m?fC-D+=K7*e?AYlO3DgSuS?Nmr_TNlf|ONDu<{2EoWsU+7v6}kC0KJ)<Pvrn!b0;Wu!z5or>nAzxW_%P9&g7yp7_^h?=Rbq7QM=+yj#rkJV?nrzmio*sW1IY z;?7Owbt&J=IUVTDb)s z$h)4y9R1ZqvR_mc@^W0q74lqRdOPtu#BT!Pc|Z1|Xul${>LaR)Dj`v26rCHqws)`d zly;mfNALIaE@Gr>du@_ck6zFFZ`f-1zP3cvcw!1VpXqHpO+9@0g${Uc4RHoT_^D?^ z&^-UNksvAU{Wjr6%Ctq%Q=2jxFR=iv0VOC&_u%by(Ymd13yp}iMZdj0D3Rj*$%FTr zusv%&?jAYwljp}|bc6gl;yiZW1Z-M+IBn?iHY69$`EAW0q{sr%4tgDTZcU15TOFP% zi#2e0`u@83IA``W74^o2|AwoBT&SX)9!jh@uZvMj_#BzJrinJF`ty9MJXI~TI-9Wa zc5~~s*ec$s1|d_7x4LS(_Ov zMQq*0%(V%(uq<;f%tn^b|1~wLlSoIdg;*;s*8RPh@SVs1yLq8nM@{N^cNQ-p?Lhnt z#yD9#!4O~4P=ts2+q-bYa4z{%tqON2uh5&;rcnrRiJ-~6(ufnAd5Yj&xop_yo#}?vMW@4`Guc*Nzb1LNjmnx0_+6m^Cg)cvv zKun)|oOF7g$&(nh9^^XZUS#I2grVzlG`syroyPR%n$PvsZNY3W5p0Vf`kYuo$ zJL*(W(VfKu($<_k#%!#AGOms$tr?mTY>>fF1Hl3=vfu~~*CAXGiW`>#h0u6AWGYue ztn3Em^6O;v#J%<}ratCfwQjQ!v*cuOlbkL{Cb5Ao znFLs<1rjudty7S8yyYbnFl4G_&VAg4j=hR5qEK3Ib#u?}V84UjrYl}xVuRob?Dt_&+E&-LHh;gd}g|5}z6liSjzHP|V6KRQ0^A_Bc-LFx`-{!X}3)mCNPAh8U{ban_BPBdO#ze=HJmMv6fJQCN%2`*fI?@y!Aix_hQySqRC^K(DY2H&u zVD?R!VSMi8E`M3)9JnF8`ik(^IS-tB^b@6ENc@Jtu>;#$p!yM;Le9CJ86s(FVW92! zvR{j@+gACJNWV*SybvDWm+~TKUT!+VQ-uFQYP|iR=FOh{txx4gGLvx70Uo;^z2hf~kB7V9FnupLULD4pUq#?5>6V zEZ*Hz)syE{$4T4R#>rBdF$G0y2oE)u6WV};jW-t{Xw&-W0_0uXO3bhvPy1~QtYcL? zQLv7qWv`e}Peor!D{X$82!%MV?lLhm>sjpVqs8xcv6!aHrN^gO@C4bmVnX>rm*Tp% z?ht;BWbxMpP2JH1>`xK;x+X_%TJJBb72wFm)(cldMUAIB|YS6lO>fyVnQm7-aByHBM|$B<#aqO{MOh^Ux}j){VOZ z-1Ym^yk(Axjf^CePM%s$@&O*4($v;)o6iIiQ>hTs95gHo>YG3>DQHVI3_cq`xm{NB zFDA&G$f;iw`t0z+lmg9N$$Dbuf;^qBcX8UuY>t}+icu@t8U_xFXt#(k-=5OTI-a85 z-d0x*{(7B18h4j2Nbe)Zjio~7!&9KT<8 ze^NSJUiSKIW>*wx7z}HAf4WJ1eX~AKhN@}&X8Z)7_nFcT(+8s380t{rMt}9B)$0Bk^Z1p~GG!Ngck>&W-d&Ue9+Gc&R+7&=>x-vaq!I=OVJbZ6+a7 zTApJTY5a|OR5%mJkqp;~#W6{~YnO7}dbwBoBX?+}5>2w0!{x3l^ON3ZaIj^AXp4c{w-87coDJxNO^ z{@}9?@S~f5{yUuEJ4hlkN$gE#2Bdq8PQf-6zA1QVojxx7R-dR~cH=x6z$l)IZ?6?^ zC7Zxeh36YLqyHBLs=QJNsau+Y&^KEsy*{=R^{qyo{qt33A%)WqL}R(qJu%kBI%^C7ns=Vi0RiK7o@m)LoL&JT@>ox?G6zC0=YRDD|-|y&f6y zGMd@|Bextn)_Pms1e@oqHKM{W={aGBt%rvu|7_45Qm32o9yb6>b<^%r zCINZ7!eIBmomGQMgsiBef0-|Z6_nxn>nOrsFE89ETojEGZ4vL5t_1PknlhjS{b(Z; zsxCvwb6c!r${BRr0UQ0fR8>qb8Bg*kcrN;f8M>Ew!v@;AWw0I2P8OovMw?31-n zSi2PZI)cgp&MH7@!Unz!_3aVtukt;J1nTG`RgU-XEeR#NVrW-kBtOSi?CaLKdYG(3 zpV(c8pf@?Rztd4)HKEEJVr8ggF^Y1~Q3R1sD=I%AqJ0Q+e9Bf#x6D3=Y?6JHqM7IE z>w}nRim?*BbJ5!Qy(4}pjGPs1joW3X>JLmb+k)nzSp+Kp`AMs@s0f|9V^j;6mgA<< zw+gb)yKEM6(?a2k4aUN`&)g!<*~B-8s?`ZvS(#@e+~04f&wC)_AkT3?Wr-(M(2lKL^VurtkN_a<;RCK%8*3<)an#7 zyC7MjS=9X90dnXg<0MWnPq*qlRV-+}nNPoyI(mtB6pe%yP5YL}XK_Re+15xi_D#eR zn$=cYN9;s>waEOG<&6_k6crWfJMjm*QjyvASHN)4Jl0;Z^Ur+BL|_xVa&70aPH=6P zaaJAc!WQ~fT+2}ChNXr@6D?MZC=c&|t`b@YNwr|ah=ZKTsyy=6HS>W z(v@n}e~l+Y6%A9Ca*hdQ4+V=-ESQKN%KfBO@lViK{rta6dEmQ3c#m3s#=uVN?HkhR z(u9(1=Ni}5szSSmhR)PL^V)^e7;%DwIk#c=$aE2 zr^*E?T4=B?E#lFlLpRN}q%=h#{XF~Wy-6}{XTo_wUgp6X!$&+P68vr%u||m|9v$lF zgL(_l#`k`qv$y)ISX7w$hkXUzih`REKaf@7ku8}-5i3Z^c;DTaNDmo{4ejdcXJP-G z=VLHHb)SI*u`C|V+c=kxd5#J>dC3YcCif5-!sg+{&ev`l3~^1gvU41=#fzH|E70v{eO8CEX+(Ob_ z7ok9o?Avngk>(T$lMM%zH zs+3)xn;bLtGui=yND~KWspJ@Q?ZQaa{$f;WJ?6F5R#Cn>Kh<^3$7e^{n8TN09v+gDVHq}yp{^P~K7YWA4>9~NrC|SWQsno|5XV2FSP@$OxeYqn_>T6pZ|jjH3b1{;rPKm`jh*R*)h_5a_z8 zUf0*I&h30eNUBYcIx5qDPae1^EqDqPHE4~8z+|B#nZHBdB8+_dg2iHLb-j1R%5*-! ziOyDJ81wbseGkE(!Fc^>1=}o?u2S9n*mZ7(KBQAx8M8Xu+{Z))OH6z;OnfKInqyAX zukZ{(J)w@L-!B+y4KDG2%o=^6CDpzUZgu#t$_;jm8V1lYDk}678v2a;Kv{&xy@G<( zRlaaJZm}o4uoVbv8uDF1@2ZqTdL2o%aK#rr{;0c^!yR5BAVAH=<_0xJO_pA!&NQVl8ahxgZK z%}Box(@EdFGKAtRbj1BArIkjA+?LoKUMP)aKc}{e7>B+R0V*Qf{4|R!k2H@$85Tw6YcLl_ zZE`6u^m-58M2nlFnBE=(>`@R6-|AN2>zFsNgqhLu-W+ZRE}&O`$n&Gm$a-s&w-{hj zko`5yW3*m>4%hLb;5{xOV|oiiS7Rip_Oct z-*Jwlk2RPr$U88@>0%C-A%=UWuEOU!RSmh&%%X<2HiO@yoGi;|GJRztm6FKf!>>_z z&PJ@tY@$>kqb|k0L%iCROkB`SCuc6~@cbLtJ$r`DqI6OYcU|}>VwQ9BT8;t#<{;82D zq7|W9akDs94h_ee)U~TJ_iuiXOL5I_Lis(ujDODn%}Iyo)Xk1IdZT90ORPL@`0k}+Peifi%j z+n9F3ntitOdo7iqakUm9-htdZi3X9cr#g`G?^hPAh-)%iq7{UAw4dKir}pK(wC_h~%Vsq4l(a z2S3Red?Te^*)zzc_0$W#g%B7}zDs^SXgxQ*s4O5-SUsRS>yAfQ;!}K|(O-!?;YQkR z3Im8%3`VLse3-teD)63pP$wK5qv^Z@pJ$twF!(Usawu=zeg<{3# zyXL|Ct+Ek@((Wtpw})X6Dr0kXR}6p7_2N$6>&oh3DUNT2S8IerZWkBbT`+n6kc zQQkFl)2ncZJfj!&E)2ev$;QMg4c-|j;eIGUiEKYaMx9RKHftO(f5P@U`twYOHQ=s8YPGk*^Az)o zo@>9+<6P>6A{`VBtj}47IU$jLsONM#fqV6d)YLUYwnnkPp4D!r- zNMgsFAF=b#bWj3!88JupA3@g^l*c>w$Wa74i(;`u%Hhuz`KbyVM?0?7L)OhKjs#!=sWO(zGR+>ZgQ>P*F)Rnuhs+(eeZUv?z#2P3srWWyN*;? zYyael+jRNQ0Q9cfTIG^5=;nvtoEa=nnU-4ooe3rFluUbSvIm+Jvc@~Q<0l&UOsl(x znszgPyXT;@c`QHFLm=RnB0cPb_+a9DBgji))D;LExR}++%%C*wj4zmQ`#gtShrMpLq_}69U=;QY2 z^4lNkt9?(q^e#Dmq&e)_(nzhTtJ%FHj(wV&PT$##$Hv;coO(CN>T*|q*J|&l%tU)A zu#J_6RE4)1c|hk<-w`R{&e_B` zfw7jwj(mzuy^|0>{<24y8)cpuTp0faj5p4rG1vQa)Ack~E80(feX<+OR>>0oB!inY zc1_An$!AsX@DK3~lj_SwL&87&(r!@WOw~FZPMuwwSR38J*8=sfcra?zvvyLMNeieGr?E}eS`B7eE%UVAn5G)Z;0Y5Zi4 z)W`Z#KK6Tc!CN)l;}&zkegd%OQfTo@ue<_v-z|IPO#V7RriL+^?zGYf*vu20*{Jc5 zmhxV;??H&dm57mqOT9kxn6mOILpC_=eCLjCz0(;p4n63nD!l?vDm*8twE@Yy=ci>cOO2-tR~Hx)OlNHpY3pMqm9bDQ3OtmM zR+GsnrYcn`YehN9px=qJ#;^iDiQQ~-I~Bc4cQH-tiUg!~@N@jG{vz&6wyzD3aQ{#* zL#qHJkq1iSe!Im5hZMIUU!-680q<228|}|j&4*Q`P&Qqj8A4xM$1d8%DSTtI@xgg_ z$6BFdUu+w*hOS3>;JLe8=aCm+ew{+{9Dlaru8*8&=HqI9?jn43Uj31;ZNzn5?J2OI zTXc7|I0fDQ)#)yK4Vk+^tP6C{FY@4YA^RtVB)p5%mWs&x7&JZFT3(0>g~SQ? zA{D^X$+#H)oUOif(Q@G8yGeX>2;CLmyi9C=>|Cw$v+c%m@Lh0onBVC!mtEIn^Ute; zIg)9jjzP+shg0XqVcU%Zt;E*i{1^KcYSX@75xU(*OF!ku&jRTu(kTDq;I$AV9Kx>e z7P)_fXk`)tNT*n^LK|f->e-K*WWa~c%^-}c z`u)J`G{;rDKj-}5F^)Q{=ms!{QgB zS^$Rr;e)?gz_WFt7Yq5HF5n9I=XOO;0E<*-x}@x>2lB>yB))R(SB?6rm^1&^>GEog z5FpO1YjioTkrDqMd-Gix^ONrqm0fC1TkO4xxPgC5r{8lO$=kxGK4o#Y|J;J(!f$8x z{Fz?wnI_71o&R&Y>MKuJKw{ylKIy$6$W@=#uJU5uZeJ(jYeqCLOp)tKZ_ey|-EUqBw8>ZD^sQ$#VG-sz9PXK>M+4JV3k#u?N{ytH+(Ot%KU zHcm@p~`lk4IkbQ@PnllOnlD z!s|I=f^<*dd`)&9Gk6`o1=a??T^nBVzVlSXBtQ9>i=MW`VG-TfzqBX2U!SsBk3mKa zxi7kxe8u-)S^^47SDtwnV7b5HE=$`qORD$p9>yA}iFb{ni2(swyuG7-;aKDD%TJ~Z z#$UNbZHiR4aaiO9%uN!PLH7IyX7z%Q-zVM3wF++J$9_xi;vP6nv#^?on@zO)z3*B- zh8@{Be}fnk3s;4Zzu?1Sd+I@E-)05ZlRILGi{Rax_ifWmm!dSA z)jZh6KC~}new4^#*r73cQxwz?_?rG(s7MAD1H;ebNY_MCJmaJc%A`AOhPMMZw32uX zG2;d-(h4zXU-|*jK&nA!it1_*$SBJ%zFL%E<~Pt5*oqedRGR)Udf<>E(_;iE6LfeV z6<)0Em`G(@gcK7*iLnFb09zdtL2!4SK@p9;S?vN|GA;|hxw5R3td^|VR8xi?vyU<2 z8vT=`Pp)6PU*YE)OY~}GGcnd>85aR$p#m^53vT}8xXcSj`vgVKMWyp(ROx|pNf($^ z>b}sm=HKl1fE|<-_6&8Y5PqGobL!ihF-N#)E>g%zg6QL*!dQHhz8LVy$Ns8$N7^bF3tV^{a<9*tWAk7#X6C7iZZh_wGg2$J$n=lvxuz52@PCI z*u?Mz`9$#q$uZ#AxmaFS1h*~K&$vaiQWj&CT1=YRHt52Lb}NwOMZ68hKK$yBPHSKF zpG}^9_cijBm}27)fYN#yD^)gpVe0e*#<90qrE^GwT)R@cRJ+zXqW^(JHMxNM7X;S` z`m2u%#LOo$zMs&(RmR*7DZGu}z$1fC*ja-?nb7j%3n|#opr1vj{2W`OSk>VX004BmWxxdc9Kr38afp$_ocKoV^>M~ zv79*TLiq%oh4QxPtuJ4%gNxyC?rrWB4NQJkJrdu)`bFkFBX`yIG&Got-s^pIyVwiP9az8olOwK8*Dy!-S+h-rq z%6SSW1i`)Gy<1+VZUzi9&fJ45CUmUXQ%tQ62HFi`7=Pm-T2GdkV@?f3`nXwsym_NUU2dt1fhCu3&g5?8 zdh80zrvCy?K(W6t!CrtDXU-VRu;6fMQ5w;lG4@Jf16>SjkrEU-OAkg2ak+(*WuT(D zg*Hc(gFokJl1;YB80R+m=(1o!dz-kPZkxu^)SyIr{@gsD2A-wVC>toXb=^*dY#}Sg z5+)kXLOsh6hcg$`6jY-n(I`gR-B4n$bk22FInjq)C6&}Svb8q8Mdi+-(&|cWw569! zIV;BbBir+2$gSj}0~RFN^fA@qRAnqE>}Zce$9eWq{BIWPJ^?&a*R!DT3O{v+f`Im z7!--3DLKi5apB2nbczk7jV`gFyt;C~1;D;xN@bHGl-!z}s>T7BSGnJg3%i!8P%0%- z#7+^?RE80gCU4vC0$8o(s#<~;pWQ4#OEc+d0%kWWoMzF}6iib&&84N$2S+G!0ooVW z+I;&Q8k?)D7BtjU(aeHy97p^UZs8mYZpAsaQ7C>vZnJZ4jyuqqL+M$Rp2g{YlmN6Mb`|=pt`J4Wy8}=|fYg8z1zH*MMJzNM#|8&bE58~e?$uT9 zswAqka$%L`%N92jI!EEKvrc0?Rg_ZI5avwNCMM1UbKH8O0BjqSS8|FCqgYuL8_7Qi zTRz)~S+i^GXl)f{m!qHaLTa{g%3NFz)%dv@%r=<`)Gb3?V6fR8Fa#CiAB6*l(0nw; z`BznOB0%#q~8c%J(zxCKu5`Bi$lr}?|^GaM+sL`4xf6R6V z$BnPWm7mK8Vv2L5Uf4tnM4;4hh*SNk=Rs(9L)?^R?|shY4xdl*U?<%f$T7jT9~~R2 z8agMqrzIs1Hg!%7){+|)PXWSVBMwU6ox}{&uKPb|T+~{70ChtV4Z(S3AOms6nho z>_N03K1P@zKv|2j*P-kicolfP7IUse{af)$tyqKC8oDa0QF0YZy6{?uz-KeO4n@=; z49d!;IoTD>$~8^d-q`G^$_f;}T5H7BD0DRnU5yVFb$H!_cteY&;B_WKf!Fcs)k0^< zE6Y*KRj6esUh5Ewc6B4ZMHrwMudi$AN0lqFsjIc?OzjGERhFWLWm-FyVcJljpg z7A4irol`}OoNybnFcZ?FvT;)F%4jHGWBP2 zFxi=6nffusFhw^-4@a^w^<|1;ie!pl3TNuW)SIanQy7z#$-)%M6v7nD6vPzBWM&Fr zGBNox`7s%pR3-zH!X%hLBjIK8FtstYGM!=ih3RLe(@Z}x{mAqK(WX8Mxp3#QMR{>Ahe)2B?wn{1^fA*>rjM9DWcq;VpG@yF zy~p$qrgxd%Vfs7M+e~jU{f+5OrZ<>gXL^n42-B-fuQ0vL^b*sHOfN7!&vcmS5Yux^ z&oVv3)WY;M(^E`OG96?(!1M&uex_!o$C(~udX#A&(_W?~rbZ?=(<4oNbCDiqdWh*k zroS>h!1NcU`20@{qPM-N3Y!>3XIuOq-cDF*Pu4WZJ;Ap6QQF>zJ-%TH6$riF7T;)-**D zt!7%qbPdx=rmLA&FkQv8oasuYWla$ik(M$oVOq>o&-4eTE12q-E@!%oX%W+b@c}#Pe<}l4>n#EMx6kd%~!!(m=2Gd1M)0w6*RWnsFRWe=3G?l4>X-ZR{ z*+}I~WlW_^B~88Yb|#9MCNoWH>J^7Hu_-JGsfcL;(|D#rrg2OKO!-WCOu0=~T=hi` zQ#O-J+k=TLrm;*HFpXgv%`}QBlW8PV2GawQykL(CMQ#WCI^$9DVC`pQw&oylZ~k_QxsDqQv_2uQy-?@Oud-Gn5;|| zrckC3reLNZra&e$ZF`$V0F#NypUIEO$fPnEm=q?#1THT!!GmZ+v?9(RenI?m>M*<{48n`e zftRodyqH4@ilb&mDJ79LkqSh*BU>U3HIa3Zr2etUQ8@)eBLvmdD+2Yn!&|}?2(yIM zg{=-#&6vJ7tRL>bG2a>%(YL^E8ESD^3}CszqJ&VcC2hEtn`KE%E(i^c4ONOmXNF!L z>J3#xLwAQh68ba-T%jXI6@-SwhA7!|eLCdzkfR~StdQc6nIVR)A-h8q!;?b$_evnd z4KO9CsM+6JR^&F7OmmA3?znQgx=O3vejD)7x_Vk=qY&3uH3}tnirY#@LRx&?mMsvI zQ{;{*uWT~x*%OmfRpeexVV6q_dnpVkQdPfr$&C7>8AwTVrHiOfTAYLl^-1!B!Np07 z>lZ`P{|z7UzukcU#Se}kk7|8AVKIJiq_t2|`*4#l)YC`f#pD?*gdcPdt+XtglteG@ z{4X_Rc&7jA&C@lo4gLa8!!glIj1&vSdRPxP!vS~}o`?6~ICw>GQ7CG}b?5a)tBa4( z2H^nSe|s+Wtn(%szxRCRneG?i^)tR}{LFBg^Nj1Dm#5zQCCa?#O;lg^O!tD{Ja3}+ zTO|{$x(0Oxgj1UFV&ghvlkqLXL{A^8!T*lm(X1J;5H5#8SnKmncne$!SHVh*uf$Y( zpSE3llXg4&8SaMbVJB>Y&B)te7hd`O+6I)k7w$(}J^&BGL+~&>2F=v^_a(~F&Sy#a5+Tkv;y2i}E$;CR0e|AY_VL-+`e!pCq7 z$Nw|<7kmN#hOgmU_zq6ODXqlYScXd+(K#q;sAj*<(RyEDyu+fOY8Lw(zC^3)CCIklqjf*hN_`@wsI0HlH=5ij_z`}B zU!WB{cn?y9DvZK}Wif_%KWMq9;b)XMgEDR4!8uHF22IjWm_&du%lghGuSM&O8!7ZXGZj&TNtM~Si82G-|gQ$q`Z-w3}z zFdKh@Kw~agj5g&p?~mdwzYieLFGB6(J=K$i0au$a)cduk9{uAjT%p2@)LIoL<0T4n z4d4%U7zWc1fZlQOPGe5jEZ9LGiLky0~l1*kPDGn=_%Iql=PIWbnJ9k=E(HElTM^r!$f9ON_u+V z$RlYh*B?40B&QCwxgyU~SL)CzhrwZRqHhZ1vO>?u38%zbQ7P88z3`xzDin{0x~?X* z?ZF;R#U7<%kAffqa=Awbv8P4|#v%N{W%lcJ+&{bbahD&}hx)T{l(ez2xdMKbg>`E~ z<*+(%JdD~ONUPY7m+7tE*+Ox>B{!^uL3;STE#!zj0y~A>?*{Yx2V8>^hH(Ztw zgP*@*3^W?emY~3Fvu4@RzJ%K5u>6Tr>C^ zoQ4F0!P(2vD!~6^U z14B*6f_jIV`oxWJB#ur=Oc^tE39Svq=p7N-3Y??QP0=jQxPnz&BLcwiiOZO+=<}WZ z{rR@%(E^Wm&eIkGTm;b7mBv=jPQ#1p6-KW>w_xX@B|IJfG>7U+jC{r+95#bP`Mc){&lMu#!*LV-Dg2DK))O9& zuo_fcl^3B$yr+i107!+Kq)+ti8|Z9S0{2=)TvD?V;ff8kdHeQ_u)Hb;K)fYhF&X0H z`wb|KIMuIYNSn*AOkb4c;u4lguAr^nX@_Vfq!#x3Ju0XN2_c5|TW`8{PwwyMKYYXp z@{2xJ=@#L*^k*1Me)fVXvqw)}T9SCd$^+M|3s0S#S^Y@a_^0Pzao57MQ`^T*9T8QM zpMF`+y8fAiB8HSLE-1XJGVOwdtih3qanHrXCrw>5wQYiUCUQ_l?AYuH`E(!NeqvaY%TU%nQQO|ZanT0#d96$5R}Om`=jYe4hYFAA6CW?9 zv_jp?Fg#PZbIS6TKaQKU`S9}D>qm^;mYsL$goHtpF3quz8<*cNAtpNK+LmjsdVcf7 zK9&=){fCq-DHy-FG%3Iu9*pBO5&if>9H($dfZ5!~{Sb~$+)LOa$ccjk2Z}!7rG3hs zt(KB#YO=4}q2CG1JDS64x2toDcJ@o`4vwGDx-ufe$~`lf{p@*@t47d}6>o02B7Rc8 zQ*D<=B(} zr3yGl?T`rbPS?PTwxK8BdR(m{$zAXuqPk8`_0l8&4k@lfnqFvVi zqU}onqo}gA@2%?UJ-zSg^pZ|@XX|u2olbXWt%PhOWFv%42uorDfe0875fFHQIO4|O zIH)*-%gm_HEg@h8{x}W@Z${CdAJJiaZsVY%7*J-$$Aom{zqhJ72?3RV-utPftGiOS z>bvKjd(L;xJ=Ofk$*f)qMNYm$`iWh&5u!X!K?Q&$W9@0ROX-?tZ}QGtxHxO|z~=Ju z4ZYpVf4wsQGIWHerq8=gM%0)aT~j(LQjC`0$utU4*_OeL8{XX2Fy;1N(}i>Lt?O6v zHSK~#Y7a;R)-fGbNfHK#1X`m+o>-|&vS}p4O08UOQX~#Qh^%akHqB5paaI;#S0m`r zAa9C~z56zre8)I%dt>czuX&}^^RzAGjE^rABxbHF$*9S*!v~mE-!W&Qx1{IE_Oi9X zE9V3U6yzsyt0fo;wiKBzg4iK+*D_&Gt1X!-X;D+T|4s z3N2<|k^B85w_cRCpv5;O)4;uwm9DzfmVcY=(<{_z7L~)Sf)c`AzNonB&iQ%X%0>CE z!f>v|Iy)!1ATP({UfR&I{U*Oe);AI?h*;7iwnVo%fe`3YTq$H?(*j#gSxUmo3B z7ZsD7FC@O)fSkhL|1P)FH8XSTSJ!)qhkw&PGq#c1&OANfE=bh}=WlI_%|x?p7s7an zdJ%cGB(NzSi%E$>Gz(2g=`=}JeUjOnq_+xec2m-DVv}+J*3t<|%1#j`lF|*+h6YXz zKEl?WQ+1Zmiju2OKvq5QhS6KtOzI%E(VnF32#MG76HD^D|7$CFWDY z4BaEYgk_Y%nV(@cXXHDb`I%;OWLP!dl$tJ+b7^!p#j;H*{HXOr*9a#sSwkyB?d^{$Q9S$}c2Fy?X z{&(00157&{gd>cA+CK6Ie31#n-bd&->2F7DxPe30PY@C`+DP(Bp+q8;2_(a!CNNAB z&GEU$8MTuL5of`r*Et2ta!hvc^thx1CkC%XkzgPwKyt}I)+imU=XKL;z~u}&r{pmF z4GQtn;4`!t$YdFr=}p2BPosK-wl$ia24jPvNjBbLZvL~Fu*T-RJH!e|cLR+$?d2(s zYr?yr5i7ERdqvD{wm@#w*$X|V1agzkLVHgi5EwFZqA4wNgeHHmAZ0}t(eH_Rv@>pw zb&tOzFF)j?(=>A$@atoHV&3?SME=tNZkjYB^@OY+hz(67%i(BMqf9Jm;3k_U%14x_ zgIKnSeLr;C)pOD~X-b|J+NaddjZB!5%z725x!Cr{+eVxGO*1h{@%;-#^$Olq87U!=2=oN$e7Weqw#Ryji62c%+L z36k&!Y5&TM;LPsQ+u z5_3siYou;%lb0mAqm8+yIURWBJmf9*nLC(2&XQOQ}lA)+jy%^MPdax3ol|u9nFEE3RZX6~Uz>xM1fP%a_4-YoK}NQk3JHj+G5x9N!)EBhN;4>vDdpbNJ*)vl^>M^^lL#lnZ8 zvmU9fy-5>VP(SIZ35-fE#Rf(MAfAZE1_5W6)zeW=nj{TZneQRaH}(C{S?tW? zz#$LriidV;xUsmTh_&C2vbvkX^=s5u_>;v-2nMTzch(n(>JqHYZ3N zWxim#q3U!=@fj#YXn%XCkEjOh|5pVB^YHo#c$vp%rle9KR4RcGZ6dWsEJ~1*hgG@#MlE>?30UOz|h%A^to}M$GK`*=@R2e&t;a@ex(m43vxIUK)Ml{o@ z)nc+~{_B@09NozL9H#x{Q>bB{gsb38<~ahu%S<1vXQVL(%fN^tQLLX9u!omHg4v?i zIkaMEG$vXN1EZhNT7yxi)gmB?BxSLf^|}U$0+*xF6c_cyj${P3J=vv9uP(~bj;r{JZ6~%*p{REV z3eB}>gMUorxs{7ksK*k}#rIe$CXl9Mf({Rleu7RB!Mp>KCZR(}uxTCvAD-ky5TA}9 z`yccNRUhWyjMotWf4l`=W(Z*tu7^(j!|+=sk@PYac;Pu>>b~VQp3SN^p*E`+h>2H`fzj^%eDR&;Y)mc?pW;N3lRrYA>6c6DZ@e!wE z=y!10EqmLuC~1P|4T(Z3H0DmvU(q=cGtSOb(-M_RktCBrNdm^KK+5$Qt%Di`n`be} z;$c^fL3Q_#<*o3;c6{h|bkkV`i)hQUGj6`ycYV7w@bC`Wj` zV9r6W*I`Rba$reFbyATSZK-Km4ArPJ##B<6lT6t&$req6DAub^pJBA~r3Fy^4v7Q_tkzf0|a*y;f zCy1cRU678TG9R09B7#dduzLi)_bE8?T5O)bG)=2i2&PU-Mquq>@guF&0)H=yo7QTD zLY@Bmu*Pv-7v;66CSyPnTZis=ft6SgG`tF=NMgX1f$yveS}eJtl$gsg>`lz@8rjT@ zlA=xaYRcQFiv_c<7T;lX(k-3d&VA_S z3>)tj^eUy;ni%DBlUa)KDc{r>55odnmIOryG>wV|IqSqG!a_k|&X^JBVpneA&PX!$ z@`}>L{6z@M@L3^3VJj`kH;_`fUMa>%*t`#dAyr7Q9#2Qs8cWOvruF-LR-GG*%PJQN zv-}B(SWH%6M`F^rMLLo+HA;uoNU?qC<2IuNkwWwavJN$ha9w4fah@kWxW+KI)5oP4 zAL~jqzDIiL3SE*=o}u0TD;LEHFD}!ob-%TqAO;Cy&MW~I=2>38r+6J8fgSZ|E42}% zgFG-5+;NcfdddZY(gFACxYjZYoTKgr_DLfS13Kd>%KtDLTEB^{I z|5}hy;LnL=G$XZ!z2@nd{KSsO1r=4DWYba{H{(_X;e^H5A~LMLcD3fJRa--lD5+Rp zxw>)b>Mqpte(vV}=&_nzrrEFd#&|K(D ztzMk}&TG#V#dgkrc5`ih*U#tIuAg5lci+=F>)y7INbK8L*^%j&CZ%bWl79aVsUn?}DRz7P;B({S}tX8i21(xt->TiK=~5!Uqr zjz2=4!o_dJOGrEEGZFNZWm~P;Wgbsyz-kSYdM14q5Y|AMJATkz7O=(|&^vSl{}~_2 zwj!_6jc{Fps}L^35kL{OK#8Q%*ek{33Gq7ZWFUkuy*gyGg{sq2D|2nO+)B909;!}D ztqR%gp{mrh>X04Fbp-a4UBrE8#;W@WK)lMDWV}dxGLVfcvPOtoVE@^(qoZJi2$Nl; ziV(q8fWOm=fO`<8caWzekp*=f0R)kO-b#o_pBC56P2I1UPGDabdmH-Mr6A}+k^5wr zdr!Gx;{A?jy4jo_b)db;lpci-lNYb-MQ@x=?f|<-`VY;>-@SqEX#rWF0!adX0>mJZ zDBbUN>%aiv*{{%v(g)x)TI!%KE5u^GV>%kVl3sl?;A%>uDDpsn-eZ@y<7`I67J70^ zj(nx(HC*tG;DWoFrKdEz0#!bJ+N`@;3o0J{{Lwn*KP;Jr$;kzo7E5M9a&lp&g*-iD z`=UbG5qP^byTp~o=`AD&+VZ~lwh3|Eg=^&mK zcAz2BzKWdgnSMab2eSZP;bO~PgKBv6Q>H@w)Dvj~+XAl$rN7{3BHDfyxV~Zrb@0 z)Ah+`%={OS&YeTop37d_`YMpaPiP6cv{E9T3A)*fAA2_^2SYMw)H*^a6nXPBpMi}t?>&b8QK#Qxyv)Tt%%yKm&E5GD-1Etw;nOdI(NTo_cjOLXln}yApabdc zIrMoh`*}0yKs^gEJ!A>h2SmWuN8)C{jRGng}koaq^)jt*Xa9pMZx zXUP(w3E|p`aiVLUC3WoQZ5SumgYI!3x&KFVQNS$0hlvSZwrN&T27U zHgJfsE=RFNI(|&`9=r*%~z{Iw%mr1f>*d z%Wr6{1d8M`F`-t8)ubG4K!S|RN$gOOSIyz_oNVX#qF3nBio9-j*hmJ+u#uufUS##G zS8j%8=8K!JSWhL5v@?H&ruGrS4MY7eZrSo;KMXN%a_%_kM+|+ z=pY8*URqA$9HmGM0U;+I z#kSya^TSd>n3SUkqVNb%!Uqwxu)_!G1hiKHhtsaKEA`eRFb7a*uI8Yy25I&XOCQFl zxoSKp6yP+Di?f*myuu*}JF3^rh}=|Lk{_{WW}lq!$>%5DsK0Mnu{&Iv=dNE|e_t1R zONZSJi!rXXqiF&=>s=f)VwCuC3K9w^vKh_+Tv;ujRz~-RE(qI+!xv(e)b@WqEu64HxHiVa!f-X@t0tKfB%LP z3gNnA=oMd#QQ)t9B*ENKu|Q0nL(VJ!B47_~_vS>BtOk9mK0ScUqgI!u)9F*w^l4dH zX?ikQahRacfP@I*fH^suR_{9mRUm8hx3od2tIi4pa{e5ONRq^qP@C>8)lV}GK>L2P z)e7*_#s=*a1P;YhC~zpj*<}1SW2gkG-eZe*+3F1}SQUOA0RL0*dlXR=lgO@k!(0&$ za+_gxDa)wk7O}$NAhIo(8+zG#AA^yIqM2%#+T@+Gqr1FzTYs1P<&=B3RXtuCD&5px zoVV(wb@rF-p-z9^x`LM%)RmW>7FyGClA@Is#uS~gwq?<-wmjkRh$Q7JqgM>O4AJJb zGb-)fOQkof`2Ko3RXAM}-0v^*%@gIUoui#@c)lg;aU)YdSG8B|j|~ zwcCsa#2zKmoOGQY>H*XnAaw|)0wEHeRH?5PN+iMaX_f)#qpd3vVWQ2JKBP1Ha_rJ* z9wA19kfwu%X|RqB;qipWaRw6K{2>)5Mdu)f z1=I6cNrq6A5ugHyZQ4Nw_zAz(W&-#tN*pOVypv|S4#Ny!0!EldYf^NM6cgI0#PiBF z?|GWKIbL_5;q4-}61eP)uMjx`T>r1FuT#N5>2yXu9YE!JLXcnc4^-~>-Nxkyx7AeL zb!@}(gIjAxh6(tUtEWD)uqusE5scZfx-PP)DwWv%1oP6oX-_e|?EXpSpw6X%i`y1N zZ|l2L?NTxO7PaQD*?*Npyr2LS9#B$BDmCXYJP&L@2hZfwS_O5 zZys~asNZ-^oL&F9mZ?*dJ2e)7axoYYv_~*Ij@TVxOoqdRt#ZfF4P5_@Zp8f?1<7Tt zg^~H?Zg$@~#Z9J}GotYZ4tq4i{P3i%9$LS@ClX%WyKa5o>PV!g7e|*9dzp;A`gx!O zHh7+vNX-|OZLW*d&2e^fy+q(-nxbgb!@7Ar5|cTopUXu2|o28+IMXsiwSc{LIK6SjT+S z2!DAJ+Ru{J`%Zjt<=|O1zl-o)Msjn+D<@tBB6xAX$z-<;5dMA@a6;Pv%%mkrgoxBS z+Ldzlj@q{P3K(mJSp$5M^*D*WH`0JxWWpwyOW%3Nr8lmr9VEPnV$8a;ZHCW1C6F{F^6Q2LfrSsY*6-N7x$3~DxKe8LYxen& z2sjk)kpq57Cj@dW0gojRuy{xrG6!GNR;?`Rz36dW%*?f%(*m=@oNXpeq}fbmwxJo} zrDZZ&>#mk5F2<)dUlW0=MWaJ(av9?CQ{L}rqAIpT#{A#C=mc}YRa;C5@Azs`p(w=H zXX%6@8%d5-|Lo6?B-hqdT1y^pX}-PQ?WteaylZ=XXOU$|cm19w-}LD-e9NC&5$RrB z(lONwA6WEQcRoppsCVT`Yp6buQ5i@)WY4YgHa6SLCzU#dFstQ`+O@mA(m-81o?l)7 z1OmCVr1jYY`=o>`tn{=#sD1Z7pqia)U&VlJH)x^1JF&Y zP3|FsDlw5#f2&&Tzj&2j!WQB~&a$IuCSLHAenYLsI)|d?VgVoZi1?>IM7$9d#f_-B zdeh5G&t0+bEX0_C4Pi$~K}c_nwnVb)!#4PfwkLXu zp#!EaKl-b?<_F)&oPJx4$?OO(d1z@?UDz(u800taob7cKG-1otg)G;1Y-~}=$>J3P z0wD?TqPD-E7A$71B5M{AwK<7okB!rH{>qo0;05n5nXd_P>^|~df&c3>$ak_&Eg)Os z1z|81Y>rnM``~`y0zr6ymYRXi>_VH8It)Jt6`%-yPHUAl6-7mnRG&ba{kg4;{(_c{ z3li6uPbgv^V+e_~`iC1~(NC&CkJn#uhSSS3T)qrTq#*U<#pt9wFlUOldegtR-STWt zfwz24j<2E674H7kijIBV(d7Kbp_Gy=YsbRYj^x57%3Be%-L@#Rz989CHk;^AENxj< zKIiUvS^k-K)J|DCvpmUCT)(iWYGZ3&M&pVp#qCX{WjbFT?JX^@tn?HHGykT~&acRr z)~N9mq@_=ZR1Inh^=lT3cG7<03sK9%2#^p{qca?`(7DYRN~^2MY! z3I0YTTHHQC3!^8wM*woL?yC91fBBEL;q+F(o6vB0;&VJU!aZI~8phIMau;lS;+4fe z?_2ZuBEOgj-CeO@@2cXeO)o9!cwti&lZdr-Wm{rVQCM#b%_Kf}^xTt63Js^y8g8pI zn;n@8A6dTmiCatZcYgknBflx9t5d2mCd|2ees-LV33l=PwCbSClwuU<09GCcqGO-a zCPYs!nv6r^%yDY!^<6q1yf8gW2cYpMYnN!f-kgkC<%1g5s?Ov&Ns+boG3((uE`h(f!be3J_ z=rFrX1v&71pREvJa_RV3dQmzmC6&?z(iKwDF7-<{NlB^H?7QIV3||;$oby-R8t>Tu z80Vba>+4XtPtECwb4GB32;!XS=Eo-q;MpY)FV4zZu&Zs!!);kv3wMoiPQLFc6FH|; znrgnF>~vj-lTLQN-w5lx0jQIQ=2~Pds}l$XTamYp_DYbK{sQw&$nniDIqulOa>rg| z21~BPHIE0fCy>W|u#x$o5$<~nkw@aWeQ$g)GV?6D%NEQXQUC3rh-HljO)9k6k_L#% z{fKiVAsYCKg+wgS-i}FNueK9#>wPR?96HIRVDO#X*I|M}C4q@xuxeqx+2E_lnzlM` zQ`3nxt&goJ-u3V%8zJofE;6sVDKoj)pO}~ZXk&~2rX91^ezmgIj9K8lqePnZIhB>cvwG7_|GE zM|~#~vz1lU0U zepOUN1h-9X-?Jq2ZBk@9*Xwefr^t6ZmbpO(~UfH{fe!HgU{tAsf zt?brE7WGenAJoqs-Bt94_BT@kHk{ z<`PGA-WPOy{mvNE+0I(=2GDf`gy8{TMog!lmb!ERQyiCa8g^3@pplzOQZ`PbBbfr5 z6o6EFp-iW<%j`Ly+m^{HzmQ_71yage3Bvr_3)&uEo?o@`nMI3U*j&pvJGz%IV#$g=ID;rS#{9nXrk2y$Eo&;A zc3ZUTkp(MWT1S`M|Ce)HR&?JXQ5zLqkG5yyRfYkglYEidLx_lbk)^LkN4_A|l545^ z(UJT3Bj?y7_?-LsBVVIdJ=CM<$OHV56X=|M)DCpyLGB1N5=+T7?2%pkIiI3d+u0*O z<&RuOt(LGyc8}e?libAK>!Gn$Yf-EJ+uoNzwRK&K-cj>Bqwxv>k`PEj0?A+o0XATa zalpny;@V~qLyQ44ggAC>$FcL8*0^?4lfjAO48KX-B&`A4cu3L=tI6w&^WJ)WuS1%o z)9PybSJS5H@9_J|?ce9zD}jgK-~ZlUziV}wy^xNM?m2s(z4zIBpL5UMLNzDEnlDiw z)3335sCih#6(PIn-4aC4BjPo`gqkF-c@))v?B>COy+k!H?jBvip8N{hJvy)L^qpt7 zE!{oJzs`?Kuk+*dOBH>`HmttCr=p_o^Bpbsrz?JH@h80QM9^yXC%vvj&^F(HvcD8X ziv5qld*>sgtGC1rmc*8Z<~vFamL&SgE8?f1QIk{(Y7+ zE)p*Cy%d~&zoE8+vcNu++484diqt%KU*k^g${PgF36R0 z^tMIEjotRHkI)_SYj$YzO%ic&{(szAfsV&M{J@e(mYO^6kFW6SbVX~cH?2rE_%)I0 z>h!Yw%H`Ds%@b{*`nvQ6`=<|43`@UgP?GW-t3wfo*B~LkF+Zs*xgGiDAA;-; z18oi>2=R+0f1Z4KPNvnOU~N8Sa?0}j7SATNagSRDG3E@d;Xo^I?#7KXQ-A+B|eSHQ(RRTU0cB+R~f8{7HGA3v08Gg)mdfc za!hi$*^$S8&Y7R1mq`o`zr&X=GdKl}2!cj5<7lmFR@f)FGNX&;BaJZHBD&lem#**` zSocd^or+oM*6cBvtLTOWPTi7K+U&}|>?V#>%-(@K>ES@b)}41%9DbsqBWhxu&91s# z>!M|!+Sk;$dvlzBu6&u<8?q?fZiif+Q}pA$<6RYQ*>i^6IB15-RYMm>yLgVCe(g^` zp&jU-lwDr<{w0{Z5#CJirdv=Bml>l{H&0$p$y2OSF=XV3VWP&KbJbRpbCtA3>_T zpMME>k|G|ZaS_ij;2F9J9`R0~u=nIATgL!TIl^-X!t)Z{0(jm6JZAwsR%5hJh%Ioc=$?X0XFG`!TLBW?I2Dhj9)1Eu4J=kmSv>@0dH* zj&0lOv2E`dJGPA-?bzt?%#LmD*tTt3Z@+u)ch5QZ-5c@ZMMrmMW@Tn&R@FbEqq-`8 z^L9aBkkI_{{3JO|O{jn{;v;qF3Uo=tJf7mcpGKaeIoZG_#mBA>9>2xYD$)%=l$>Cn z4H&#pnvtbhc3BQNbzWA9hYFulUtlzaly+lvlIIDtzJ#w$VY1S&${WaL&`MW;Q_0o? z7HnwQpUO9M!m#I?Zy#zN%~uc9H%2#PR-d@x$pcGYjmS~4 zU?Dv$#0yy_6T+1LKpG?)2OzaF%1~2FNw>jf(ji4z(xGC>gZ=(F%Q?WR(D-_nM?L@Q z>m+|dt&r@*8@5UFU}&7yMc3Dt(Ol}E;C)OlgTrUj+~qBlsBzzAo8SJiD><37f@i?huh>1ANZ!g|M=dK9_WhZ3HI(f!Evw65fUl4@4=BKLSsYGVZQpxunhskTf zLt)qJ(C@~Es>}&a2nR4Ez(rzdBak?iC*R+IVZH+6sZyMuq$gyb(8`TJq44DV`{=h) z6C^FBw2pL&6GveI!z0B)HcT*Dp&zM;Exe-q=)uh0q9Axr^}NfXzNZnCPra064SjuY zKjKheYNcx7nSw#UW!%k(*3u|BGbxX$p8~r(26AcXU_}{Mevq^g5a&l>2fwU4c*CKu zY{H%+h11y$i$BHRgDDswj(8e1D$1=Sxci891dSavfa)IPFV5hW?7^m+$^K`D}SuCr~hrVI|J5OHwzY7igK=M1_Wz=t*Sbuq!PYph{*FUBHs%p#!n${ zDU6F1NR~GUPn@u96VWF;&zaQtJi^qUz%}M$CqqB# z?=3mI51vx-S|Mw6=)_Ph^zye_Dh8(3bBTTeccds(v(<^>GO$E7+l?ppXwqdz9}TF2#I zMLfeg>gjvjey+p=yecz9NLiHZW;@~`exKvTiDH(!Ef`i)BNy*++}2BbMXZPnl@7Py zQL2}MYZfxy2Uyh)FrD9Bz1~-zck2 zU~afrR;N#J_S1_C$Yhu zd2U0>_keplftnpUa3j;ra;-vubjpkepV@(?uXLs5$YjcaZ%@pfFgZVP!Ib+)IXe$q zh&iPN#J69fCm5QkYzDYy(rBqY&WvK$)+Xn_@Jm-{be4f-2(Jr}kt-DGIO5ttj7l!F zqo;ea-KY(!3qdvU1hXfR3+wC3oGqnn&7S7A>o+fO;rs%`U1FEU8ht##Y)u)wOnQ3q zZkr-=et#ph1<#UbgwY`}AruV6Iuj!j5Gu|}p%|&v ztVkQvau?r%W<-@C=n7{v8}2nfw~sFmu6`&AGW#5lCUS8}$ldkE{VF~lrA5t-; zPTCY)pDW)OqNp)gq;OdEU`&}9)hdmwq0*#22o_LgalT&o>X6)1`5+@Gn{ zbyNasA%V@-IiIzf{l!NX+QrP6ZP|B2;~Xj5rMm?k6;xUkH4PFAoZ$PL)v0K-&#%LN zG^(w6&u``>5z#;sOQ2wj3$H=+!NO;7O9R?KUdS>=x4M;hab2ML#K@Ofs{+dh%N{P+ z46AF&hYL+{g@QsEmbWQh_*Ke9*&G_13>*II-;omFuvJBtrbNf{Oz8*fxcQTOopAa& zzE&IfRgUMZc5N(l#QG2RZB4i|Fty(o9uT4|-#1)@ ze>?|Y{Gz=HU7^tKv8Q;3OhdiAdyjhI>5w8JrV~9cSBF=JS|%#(&-*!Cz;j#=s8B2= zvRLLdXO>5EP8MgDq$Fi7aOBd32J}lDPURm-p^K!;JyEyF#YKlecV5lNm`mb**0t$1 z^_M=X{QxQ{`|Z_VMw51}mWp~v58?e>!)YXNhf*qp9fLDvE7OPSq!#XbAkiPkuAfDZ zd?QJ_k7PF?LsNTUnLu`){BxNKxX3qvSstD8y?lhfZrWfl3%F6iFyx|Uqb;Ak^|^h~ z%k?a_)#H`>DFIR!9G;y}jZRlNRi!h{7U63dnM;;3U8S^>6IUJ(2QQtEwF46V7VR+e zxRIoypudHsZ*uyK{@f5U3A{cEs57d*VaP+?PdVuznnu;`s~3;>v&rd;z$xZouI0Xa z`z<_n`$_U_E^TO_!&>?>io!Fn^CaBqv{lG0^W{?fJt3BTvyM1#QSi80=Lar{p0(3i z@5rfMxP6)7e9cLwEJcs7rJZ!?Oxc_jhOmOp`0bpYljD57Xr!;Zc%>DXU?);@^jAD> z0A8}2=ucJ>+Jt@P47YCSSjxqfOA<@p?GOzEgi7r4U&{jmWy%C4T)l1yv4oc8%h@|$ z!ddaA=0M&;U8JPtWC|UzLmqIk*_awlCeSs=N0q@5u<_w{<2kbfc; zZgmL<>ZKA5#-42x24CMyx#d8phDv6t4KpTm?9`J=g+yg79O(lE5Q5rpi`Mh*by?Pb zuIsWXQ%MV}M_hOTn|qtM-+JL~9y{^f0`SXpYv}5uK3VkcuUjuV^+-0@a7i4)XX+9W zw#SK8O(x5=vy6kLGwaF_W)tsF4JrLQ{GGN>uhYfK2W%Et4GyRhHUyNK5WzTK zop>Yy`n6FJE%XiX^|DFAM)X#jMl@_REY^uB1%pi9A#=AgxFp^d>gvJhQ}?QwRH&yn zh*GcNX_7<-s>{*o-N})^oM!}?GQdZ6`_xINeW6(J4Me5DT(Yef=t+=?)u0$E%5SrZ zi=E0#d6;JV;UD)hyV-ueKDqX&zFN2K6-$Uv6BDB4i2cdY5NZI8`hpWNnb#@aDIo}! zm=>ON=v**eLOJz?HLZL3Zu}Snt^*duXzuv#N}%`;{1G`9j?fD2Htv^K|f$aq{myLvm9ns*l?NPGig=X0+JHs7*n=ep)SJ36?<8v-T zA;LSvXTjF;`8mo#l+8#e;0^{*UH%_Xzmm2gzFVw2UiEC9@LG?iuR~UBydN~IVbKfw zB|5~hvw~UoiD1+gpd1d)FEEA5`&&F9afG=S&#+rvs)ZVUCm6s}IA_8g(CF=^r-vF% z8En|TwUXh29Q7(0>KAWQ#n7BzZXsdwd3bZusFrM3xfAm>6LO%JhFYiaLB~oNXB@HN zVd^7&LoHx!o*7T{_oO=90gc5)953lv|9KTMB(xUq<=|H24G=C=S0so;PfQ z)j}-yrDK|4a*}!{YV&#GT7pna_G)k~UQan>5!=Jf%DbG2ickg9pDm3%OOfBZO=2~{l>?+2M)PkyVKqzhdaXz(8g%NT zeKSJ!j zm%}E$I`4PRvvNkXW|Dr!e{(_br! z*ng5f4;3Y2_W9TonT2RTU0EvEkJ0xHGF%yG0Aluj&3ja(yr(>SMZMVqt z!9CTle8<_n+fS#)$>u`FZP{p_QT$}KBoFcxKTVRC!7Zd z7E=|%e#g7BCRfA)H{~V+SSmXFP#vpf5!aXZw?6Oh!CdJK)q-%nKJp}C`we%dLcKfN ziaYjS)X(6rSV*x`a7r29cWaCpC*P0Um_Wk8N}8T4FW&<{sT)bnQY8re2b(5XZb+4L zy+gUc#P@Q4YQX#{2+cDrC?JU`dpm7T?siX~iCI!3^7aSY%mJ#0~QmN{zGq-p5j zy&1*aRvMZ=%8I*G}D{%woEb#*TAqip&cdigPO&mBGZX)m1rZ zYo?N8I;S2qk-f`8usZtH5uRiNlk9_|p|4yil^fnKE>BxFQ!OM3q&^pnZic2iVuAAj zyDt%^rdxpg3Ea@swZ(q6f2^!MX}#2oV94&M^7(F?F~;+U?nyetZzQV){WM!RJOSM# z+bhD(PZVd&nudN%Ith+6eT3|A(l3Syk;;x?rqaa=rK&ta-k`E9v?=Bs$k(o`@x%sQ zw!<5c?C___in*nAYuYwko=6FctzR;gedBtL?E@HAG#IenP)EIkUJjEDkv+)Mrj0?n z>QOAT5!oPx2^BZy-ke&VRm>6yrgvTBdsaf?^+jTtPDkPwi!vt*pvG;^BufUrHt<$0 zVS{UITQ}DY>sg-epY*y>0YVAJY%R229UrQGR=&@DCcJ@pv1Y%~HsY!;i|0(!aIO|{O{-(rcKof8Q7$$dy zCL>QdiJ?lQ!GNPi9M_UlvEWVwauJ}gOWNm{_6DNjX;PF>-Blcfdd3j=T%F7Zl4w*! zIt=Y+G?t8I{xbgjQ3^V{vXJ2uobc}Z>HHX^Z1D)vU^6t6xj|4*G z{id6u_QW%MxI{|zJdi0*;vf6apP^qT@sT=!TB7D9olH>}=_)PJ7^99JF;^?MN;u(m z33%a-)?j@y@UOC__d_}PD7q?%nRD3yi9MO;>=s9fVF3{8uc5mPh z(JeeH17erD!7RgchQ58=1(RaypCf#AUh_o%i;kDhIGI1$PCK%^a6$SsK1){~N$8@_=iX}*pMt*ihRAc-+rS&z za~A^>R_0CJ*PHi@ca;yzPqdH2H}m(DH@m0J>(91NkxvCXL{Mrdzuap~x`4^I_3l-e zKy;z6hF(`(p>;iijDT&$?H~|?Hpr$e%(XzBt4A_IuuABz?aZs6Gaf z4V)dKg0cI#9RiAY@WLnh*?2Ess308j_b7tDP3_@rd+47&Aq1(elh%}OoQD~WdzSi_ zCU^0KjN2In&_2a1eUI&rX$!mPtQp#xY>Lj-A(Fz$TG2j#g0_F^N!}c}f!nu0cjd82 z9bHkX7QtW-hnuE{qh3auhE}8HgqtQ;$I;Z#5ucf4YbK+*nZL%NgdAynx7U)lH)+sR zN5!?rv{6+D<#+m~CFd9VrbuTUF5IZ`q~`V%S@TJgECrx2`pqK76o3CMjlw2AQPT{j z;g6jeY?4WWX0nh^>>sB|%f2STY44Sq$!I*o3^nCsfOxO|+8<0Z`pBbJH*&+)-?T)? zOsEp5hlj*`;Y?9f<3OJmi8BdI+$QR`G-&D-9Q9XW)%!n@c zia6xrSJsqsUxX~fVfr{5eiMN}--C-1P9tqbZ!1{3EA+kUU9Es?EbL;I3<@8C>aqG8 z@NWCzJ^qnh1V1W$gC2Q?PMzj_SM+j9f4Mq@&82a2;cUMQp3L&M1W`fmlp2L|;9FEy z2^;1A4JCV@$_0p#1Q06upX#8S>(qlFtc!vyLIK5)ikeY&@dil0LVpEqNAZ{XVw8L9 z+1g9;jzV=g(0qfLuXEew=xTb(^ zBQ)9Ucxn}1@_?;-PIoPO0kI^#6MGr%K&Aw$M0y;>4<@837t&OXHV1@w*k0DaQnVG< z$c}>k$v`skY6xV%for!CHLN1ie1sr(QA;|c00v5Y}OEd7d?*n zgWp+N>9*)cc3ONPz%|V&m~&dU?+^Gg#wPrUo2`(q?!xQ##5eLef|}9&Pvd-nS~KJ~ z`(_>RY;WcsZA)HA7p0868G|)@t#x&uW~4)gU2&EBGR`2;qF+b322yHhOZ<$=`4w3s zBm|=9`wV6}CamjeqW4+oI^ptBN6_3mWw(;USEATh-*ai9YuKMRPfHB1Lq&(_9buSf}`T_&C;hC+3l2 zW4^??#vpw`3TavC5w>;Ct@87Z!j+3Hl(>|B3_cAD8wiEw9c&I@u1vrsJU^4VJi}Y> zL3niBeLp7WzmG;#h{`>=*v=x&5&=K1v40r-1IQ+~eq}frdUy1dXfJ--g6&p}`6bUZ zk9Ue2bI)@|clYS(%Kh`{Nm7iAg_|3DBkh>>%e%yYTkMy|XxEs#%NXa_M?|FP*oc%o zM?3Syqi#g!brKh2G*Z$a@M8i*J>&xpBn5k3UNdcm;66Xw|*Hx2sKh|3_=$NS|Q8Goyjp*LRZ=}Po66W3H00l=pR%Xsa|qG&3T z34ML2JZ^eBJL2M+1knL|1rx}D4FN8)X7VZI_K7+|5eE|53Qg%LM5c8T76#?!6c9j^ zvat+VPp6N_|3+?gz0VIW;v+e>n-9J&`4$PIcxz)Y$_xz#?r+N1fp$D{1Y&rOosEZs zcX8H|?3D%fYbM`@{~0$c zGuz(*v$FkH9IR~r$mee*_J1bL{*TcA%Aft8`t1LeHY?{p9diBK5V3OoyHoCeCh(Ux z?*H<}{_l$H-2dqPUo-h*z`^tn?|+fZ|3Y&7*UsyzsoaR;0x4tw3^Gx!Bu*^|2{O!dFuM_&y@;^bvuY-`7&r_=GYQKH)y@J$ z3K7i(Z{AKbt?f}W(FKX*<|V&N8Lu3Af7++l$LA+c`(s6!&M9}XJpDRSK{yo#QU+K) z%^*Vl7l6t7qNE&-iprY#-SRI!Q7hsdgjofvOwRYD7EGmS4ADyHqMVv;jp?pMY*A7y zv=)VI_RFUPZ%!;ez+788)rQksg!y=DtM4H;Rb z3=;68Gu$oBXC^S@)Xmn9;F>cWj*#m*@i;o@f^a$KsP{2~g`bk0A#-tio2^~NW$2RE z@*r>^+wCDM_`}g>SHYIg@<3ygenrfge?`XKZ-e?4i;Me?YoqJ;^I;Z;lm5bo(<3VW zv`}JlA8Dax-g-tdhJqaa9eDpBiXVsH9=)OO_x*!WzvZ@1+(%=F$YC)DlQ?h1iTCjj zyE$~P1x7Cs+ZF zE7zarvdKG)#Jv%g;AMrLtgpC&lz%@4Mh4glNf^@1WJ3Wc=ogt%n*1pPguAu7#XE6( zPWSPfoFr4}r|tA+c7d~T3WC`eyLhOq{e%=paH?eSlnYK{qCq&Z1_D_{kKuLOHS z#FOnZSn?4tr98$vu0KExTWWGd&LZnXa$N`!E)2k!rs|(1T&}qq;Ske{JA|FNqB0O} zOy5wj?^qhA6!v;{3ldi5be2N>kmw}xb?8^^UyeAQqoP~{rv%MI<8FZ`Ef@GW8YJlGrrXUt7>1Rj<$Uqk~G42h-Z zY1=H3&mcwSZble8LB%T70O#rxGv`*%eaM#Im2JB|;dm1A#n zshdP*GGE7DC8D!_?oEFjYm#bQaK4718irb0w^uB}^M^5;XYqmz1z>_8h*!p@e>^ff z@*-@xgzdu^?0x|^|M21f4)!%(L#f|7lite4DTw-h(N|DlH8!wah~o#N@ALbLeZhXS zYQD$WGgMXGz<;O{tmQfhza)P5)Qr#ByIR1oE^!odkm{LtD7mEJ#_F@xs*(C~koqKm zX4cidfa%94b_e`!l)HqPvg=CP8Fmli!~4tBW~CEuoX4@~Ncj~Ap~Y(ve$@XgE!6k@ z9Lpk}`mqR`5W9o5ZvyEmi!(#O!1@FxY_pzJfT_Ln^yA9=$Z}_SKp|xDvnubz)a#hW zNnY3t@x@Gd0?zfY-S#fYy4&Sy&D!n&`K&x&y1U06Tl-Jd4Tfx7FrWi04k zg}Yz3Z^+>YwfLip_#+mGOaQrfGF}yjq0W2vf>Qtkr(=tzN$x!++pZmyZCrER$>NkF zJcv06w@_hJ*Z@v-xjbsd%%xL-pC=w&5LaB1zT02~f@RljZB56boF7t{Jh8#4RU$uj zGpgkwXok)%N@(XCf9NJC6UY4*docf+64BSG$1bQiA1<+w{MjGp3mwmP0ePt_1qdvL zI<5NNL5Nkzh_CzLXN)*q_YCcmCDv#$^6;0xq>;cT`h4v)thsu(J*CI+K2l%>zqz;E z@zxd_WgPl=<`}I0lxA`rwPs#WKh(4L6Iy2Z?2^D)-Dd8NXw1Ym`>M0Uz1~0MkVXI6 z-!ss8nCg=Hnn^(ZdZkZ{@YdcV5F`L~xQuZ+SlY0MPfX*{s1;l_Vh z$PMIsm_F7NKN!64YVTLI#u}G1Pz(4grYpRwh#K0n8dY8$@A1+SaIXsF7wnyT6hOWM)Z5WW-aU8f@LAYDVERtPye28-6NC#lzv=<=TNtZ)z zNMscQPQw(`a^cd;l0WWtG9Kc`wyk$21&+exh_rX0^0DF?j$upg(CNQIVx z#xGTY?ZLk5dXe#~PQVAoOOZUgFTR%$yTL(4@7n=?8xK~uw}wsCB-pFzy2d@of*$+Y zUDBekKC62ySeKPBqIyM*n|_-IeW3guLByiNYJo`S3f8p`_yR}RJHqRLdm3)sJNpYR z%PXLxDD0GP;K;HdO+$0AzKZjasWppc_|n_O2u`(jh$siwOfp6w*ZDd^}n46kEK!Dyq_>C;=up)lx<0MkQe zyPbnRyN@pEr3-elLfI2p)<<$ZuIJ($tQ4Rozg0UHEs{+l$TiXnP$gW?{q)0{#Eg0P@xlva@kX`W4e>{#2_^OQ&9 zc6kgjV(;Q$pL7*6#~H78?;7h4{odHyW!W(3^hdf~vB05srTC@d4ZCjyS1(@OKCt-- zaR{-c=NtJWF=r}w1bnNBmUa>k_!G3Qr|Y8a9+U4Ub|ap0diV%W_^*#w?-K4vvwxz? zNi`qskmH(gL+39buw|@`#lbP9ba@DP1uBz<*~#(JWype_isiWdFr|06Xh3Q5FO>$F*1S!If5zy=gFR3tpak?S?+JomLA)t&*E75N9Wl0%y?F5Wu2xs$mpoY7x#gs9;E@>wy(>E_)brm-U^B3%gN1p z8!0!Q=sz7Sa|ZWiqI{oy+_PEC(p%zfZ<)Dzl86F5e;JsvyIyq$>iM?*H%wiF^NlYi`+hxe&g3G4eQO)5Pj>BEIIE40u zkOW=5ITAFHy6Lw@vqs}pPZn4a;uX_-#J28$c;j!v&m=F z_PugnsqNmE?o20}{o&1jTI24S*PFxaT_dfL2ixG=#HM!kHPo(Ha&QNfkF5!uU)hHl z(*Vf3Mn-g}Ji5oJ)9G@NY-5}omNB?it-`#UG*?@RRGvzO!h#)VS`HkG;%wA~yRQ#v zt8!3O%o2w?{`ULJx7q9#LXE2)b)gA7Po&Am`E6dp#@Ew*T`ca^ZVC~*VH`{zz?pas z^>)y`2dXt|juwflI{6_5zi;L|hK;b1FLh_+VO_u4KqF81_fsEVwh=$*erVwjyWCk} zraK~w)!3kY+!x&QPXai^z|VR@=wFL|$av*`q<=63&Ifz;*7k|GGzG;8A-=!X$RO|V zN_2kiU3$8|FWEL_r+MT|SdZyCZFJ+5PN2W;@`hFm&c7053(^lHKkI{9r|i^eDb4YO zb!2rqWzm)YIm^F`00no4jT1hW6}h^_6BN19U2u9CnU{M2OY*4h5jeyTf)9ZKc22px z>HV$ylsPA5UCA5wbEh}cC&7LEu#R{krux%f zfaO(d!|RIAL@+8SkVAuFGrWN-;tHtRe`NR( zws$?LwLa@B#NIz%tjg?v$(tmu`{cXRoAkmrun5Hg3zgXm0-g)F|m+@BI zdwnwDb&TzE^LsgS3Z&nJ-n4!QEBD$std0GWvu$NkRobO<2G==MKTQj#CeF})aaP|i zk#T9;a9RGdqqMV%cb%wbuFqUbps)M!)Jarx;7mqi75Ytnh}mtll|SCmbEEScp#|=C zTSn74xZh;2`MrbqhTuwEUBd?D?aIhwU6Z6>y~6H&#Nrm}^lVOz+y{BqE#gl?cC&{7 zACsIiKfju#p|DGT0?yh@BO6$I+lZS;Z}q3%rG3D3!w0^oId)ZJN{(y`ZWR9ez+(Zl z|9+1rhZo03NBBpK|0Uv6`QYQFccZ4^=sL+H_D0Gh%PY!RVR?49!Cv+{K){Dm-|R$| znWIeh$!As}`7`NT&I_Vz=CK5@W}j@BFfPdW?VaHI`3=9BVenDUso%AJf;WT390yO7 zbD_kb_}Y$zq-%Nm57>C@A{M^i%wC(2$)FcBL6?Bv6?aaTo51|uYvfyu^1Vvfv`$&q z31oS{K|`xhh05q$K1_v*LK*hS<%uP`rknJsL#7yAp8-YITro}-$8qROo6m?UX3%I90{kIV(2ZN=?uu; zl9We%y1FvMhD-j%Tn6S0+n%1hgtEk;4^Iq*q5wh>SXn;bA@-h>Dq#s1yZU<7#sF)a z?oVf^hT{BEMi<8ohl>KXL-zC|UCBX3gb~CZixbOQC;DiSPQcy|azb~OXd+zGI!lx% zM4Cc?c@>QsLpyB&(3yZ9YNBt|)){^INQ;A95iLuOlgEn+vNS8v(O29DJgY-h1{7y| z?eea`_Go`Iu2%Aspo#TG@dt{WB38WENXw$u<=a{2<=TO+Fn(!@Q-xF8ZNzP49a4p# z3@nfkK*(K}6+K}&VLBn0d0)oqQ(>i6!M=o(=bKkrmYA)!siSZH%ZcTKz@DQ7damMx zl&`4=^#gI^m%I{gIhjk|N&$h!A@RPi<|zEsM@Ds?%Jliqfhi# zzyjsb=dbX`z{OzVBQdq1-%};nB*Iah>(e-XgltlO@}altUE&E&(Re!b>CxlilMXuv!(9xRS|Hf{cS%{Q$COD~suN*cR8$mMQ7ZRlzYQptC&NPUh3zB>n`$bC zvdMS2@k7>y-9Cz*omRgZvMeOJl<5!aw znW|XM?0Jd_ziLY@rI;$1mc|WG&Rib`wZ7hr86x?KnO0B5j)=!ZvgHO|c*kxo-hl&e zKlBU;QfDlbzuoW)B13zGqcCs#aLFFwdWK)`A2=E$H$qt8ko5E$pORhVTW*-ap<^vT zS3vpRNm|(v?JiR5H_}1*lGzmwb(RnR)OJE-4|9O< zFFYtswIhU6)xof1cQT&#@l%xT#m*wG&!KdR%4ItmGQuJxaXb~KJcncdyUx@UHT&-$ zW#V0vyT7cgY#cBZtXGg>*v5a^r20TCZWA)+4;!d(4*hE5sJy_tTwYVo3h5LLroBE^-L} zef+i|2b$OaQc;2C?Clvzl7)#Sufo>RAJ3-1H4aFpw*<`^Ahe*e=GcQb&^gZNX??DG zS(Zb%=c}Uv_xvr0q59E-UjG)vNkvORow}97&6avZIC5;7sZVjtoJj!xOA#Bc*C=dm zZYwMykbOQ*BLcs*S=u0SH>zrM{bXG&2c9N5 zcZG)mH$(87C%+AcE^%!X%s_sqy9&6Y>M@H=Bo!NeYXF$bbALDl)*|#Ne9Lqz_x(yT zzc9{)AedDO&pPCqqJxi0YgcS`YAQ7BvrOvFG-p|90y<-!MY(=3dPZ=z!sG7z9TZ`~ z_t`s~r53Ikh^m6IV8MELGb=LpahV;XmK(GLw7_eACE^bUt{Tco2{&IGpAKjJ!Q|90 z7OQBoqOov!qLy$%Jnrm)uYrCtQ)&}0R2r;}Ap_4?y%qMwE^;;Q__7m((=d~a`)F}H zgvbndgNP*4bJxKI12;nnL~a@%Pnd`5pIR+SIe7%W`q)cM!)>ska`$rI(E`!nBRzjN z$jL1X%gH$mcj*c3VWi+0ku_@KX7C3&YL_1z%#~#l*Ohyf4*mI0v<`p4ClxaG=n&!M zxX+2%FzxA2Wk&*ODw>004AJMd(uG!dZ%TKB@#;zmpWx!!O0hE>{5B}^or+(`T{zdO zd`Z4Bpz0v2AQ}3#m7*^m?v)_T`)c8wSF^2Pri_QIWZAbMnL<}*kZd3fr!xoGB33%( zqv>H>+#=HDe}0~#E=OKx!r7bBPLD}Dg5VuosV+F~wL-KEY_%X%6NQ^vIM(>e&nxT{ z&j)A9+$$!>Lsw75C1mWX{Nm7})TU&69EPpe#Ih58iyL3?PK47ewms9+4w+K{hvz+1 z&ngPt%iN)v;9kawXDzmHJ--&KvogmVFUitpt7xoI5cw`I;t<5%Fqyc5fu?#7(cl$$ zBYVUSGzvUj%3E2Hu$tdlNwKQ97GrN%wNkAXuP9N&s!hf&)mU1h~%PxW@RH#4l+l?re^VD$^<9{KntN@Gj{`@ho6VVr*tk?teA2K z;s6~0#vx@P12EB;IONVzQT5am>&!U zi%;zwJS7AK1r!3_LtbF-F=$XuNau7)QWR?xC(ZtBWydKZ0Rg}ZzPNW1st{LrAM$#cBN-rLumIMJ?2!Q=2hNA2UggLb7#l2r{vveb1H=bh!gkQ!#Z0XN z>|i^n??R_e0adW?Do1L-b3heL2i;x#6bBG1q!F%z=&pUr2`~xAPkL8Br2ZYl|_@5lw_1 zb-nZv9B?#*0j*WlqHyZRK{?zMIbbv-8I~Ty3bj@9hy+Luz=G2wy5j_PahMZ;zeG+w z10q8v(0DYC@PMDF+GNg+Q(6G{kO_1ikt1gyB;Yng9*sxMB72G&Knb5k=A1VL3vkL{ zrSLz18OP+&w@99n1!4ncGk9g8<8>2>x0~Wb&BlWNbRhVrp<|FiG$T7&s*8Q(k;(6l|JD zR6t>X1ROpwo1%rkVyU7Z&==qnA|3n!bB}pKu1YtjQX*LrrU+DY1KI+@;9W3ikVmji zNcONs&_*zBVE1A0DH7@Dw1#Nsq)PG?lNE~;LlvQc4}kOFnUI-aK0qme8ZZoy0z|au z*ebySt^p2!8UQoEmRM(q+ys`I7>>(O9A*jW4eSj30Zb6w%Rk4TFo4kiGT_qRF2K&e zDq#K3?ZHkkSK>GFE66MEE9@)AEtM@aL&_`WE0inZE6^)}YH)WXcPw{kcU*UPcQki! zcYJq*C(0|DYJ7J{cWigqC8%~78%TYSZ0Kz8Y{+cTY^ZFoY+3AtTZk*JtFKp7$WKQo zd`%!o;7FiIU>O0>{&GU__Ja0=_KfyO)hHHF93aA5wp$Ea@>_^o4|`JFrva<}F5MN~ z^xcB~837soxB*?@)&aQw?A`a>Cj~9si=c~Oiy#`{8lW0rzx-+aM*~LvWdf4A;kM#` z*EuFr66DlpFsR=THjS%Ot@(Z)|tw-ZbVu+sO~5^1?%shZF$Cz+APM11Ot+C*k*6%}1tWn0M_ zT_o!FuIr!5w3go6L&=)vYeR;*_rG&nsZtI4CcBqldr~9q0Xa-Hwn54%b9RdA&rgOp zn{>xdkfSLo4^weHXX&kGiz^MwBBv5iSWaBXQ$}HZ!F2bxSeXK*zT4=E#DdR2I4pZ1 z0eE?duMRYC1~*EdO}tm1E70j#f;lxHIRJ|ck&s&**0IIZNkZSj8c0+<5bL2n981qsKxt%ORc?`f%jIgV(Zv;Cj+5#z-`v#&qKN5j1GFXVPLA3 zJm;acHBn)Rbv2d}RB_8;E2BMUTT@#MtjiYWx5YQyb)Jx-DZCf!y^J{uZShe{V&w_p zKjjZVZd#($k1v&~@-vQ6-{nqew4NrUKGpmoEv$H77NHL2!ShzKu=EJ-hq8D#InR4 z;SX<8c&B-Ht#`$O1U8thguh(Ovp42W*vlY^^i8ujEnF*jM(7uvZ!I3Vcm{10G#!2S z4BIHgzZ38Q!CjKdnK7q`-E+si6cBo$ydyu)t0H;dO4pAV8EgnXT|#`VRn3%EJBGn1 z^A(EVQBErLr_rc0@;EFZKOsq1FrW39>Q$~tfOMvCrykE&0Y;EdO~PsCo4P~R*FrgV z<$QqRSDP%JP7CI`pcG`}Ch{>_ZnmiJVf4~+zdM4UvQCy03d}^k=CVoI-`^g)8 zjkpk}-<)c?)5Fbb$C@7L8FeVVJcfRW^DXMQgAb{8ULL9Vri!;iY?{_2I8BgcoHJ}p zt-+P^Lj#sZi9AibqA`;Z3fN9sxD=Nd8Ry1gk3LzJ{I}%AI~u zXxT<}0O=fzOFfNviC}N0HZZhj*ZzcxU7Nj3>%^U1M9}$p^+e!_Tz45U@}5PobcirOrNYl|KotbS#J`$G7pp9cK0A&KP_31KXA zX#oYz0ENTm=80^;Dn2inaF#q~%RKg4S$cS2V>-HIfVS?B^1V~O>^^0$ZDe#w9zNFw zwF^DJR4;Y}VD0;1drE_h;A2;+K(%`cjqkUu?wxOw49_y7qGWjI>oTRfg)955s;}sy zgr|Qh(6KI^EH)MIA8r{X%TLfAExJ|rE47i=Kc@K7Ea2SVd>9)v3Jeo^+3H>N>zUu< zs;6!M?R@&XGka$%%T7^S%d)C;#HamTJ1?-YpR<(LH#&INH+-DV>wVfP++O7g^}IhA zwIDlqU5=*7jn%HhBVHL#m*`iWOdiYo>Sp<}+I~7_ht^ky*3XoNaLPBh1XN1Z)Hpaq zT&VI6GBoOyHfF_#cLmq870yKPse1!tyZ8GzF5iVH3eeH4`wNzBSF^UHaN`rX$bgw6 z)W6Eh%L~fMvLNHLx$V3CLCc!=A_TxAWnK!B3W!^b9n>APhv%phWM=LRF7-FOKC-O5 z9?K@bFET9bySccuywNo2nNbJZoa?Ew9M)OGtBhQ@YpIp}4xFP__*JQ(O;}nv89XvC zG8yc8HtFmoGHT$foYmw;Ip>#no2F+*sf77u--a~e=`Jy`;9$tlqn!!>ZQAuMXmibBEXW|$3zo!5}4elxn=`){QjD9 zH#|h=QrK)rMa`h5xHTzL!&^UTm`|+c zEVn}TB^e!t%fNZeK%yALb2^8ShNJPQ#VX0_Aq$T3?W6TwG|T=;g7zCRu%jbJLYk^P z-RmB5t6~Jddp7~<`$;AKA^B`#`dgY>uv8*?ey;&NWX*P5lh&N{}S@VNz=9 z6Lr|dQKdhDr~tyU&Ps@2=EJ54Huy?slE7 z`pXdJ4Zx>?U^FA=5RP2824qmH(QGn~q!$`1FZ3J&0wgVRE2xf@zn zaZ$3ZT!TXItP-{oV7Fk3%iyP8%3@vA)LTJi0jY>NC-K%vSF){lWx`h@P@@T1sv)sj z_SQS6)}Je4RyCLWv~``|;6*iD%)N2WD>TBr==b)m1s57AJ-z7ge}Oa9oke&P=+q7dq8arb^qaH@+1OgwAkVE^<6| z-;AHHw>tF#wkk>a0#b8ART;kZqF zMO!r2SeS42qr&~#i|aoWvZuY-$QoZxzn4=&=|4^$EF`oNlw@(KQlCweHchTn$HJkh!LN+QU}{!;QEawwQzF z`7N-*)@MoomN;Wl$OAvd^TnJV&gdAc~?bo%z;c;R#ms1g^w%O~N?NJXujQziY%f_KW|6HI_2Z@r-HDOUN8 zm{ob1huRmnjQn)V63N%EUu=lt2}BHvP+4df)Q5$F+E-$^#T290_DpsshHRaGKyVD7 zT zyE)i+#=!BhYM_E*?ymP}kN!cTXc00?k(Xm@W)$SKzxW;_=S$;2ykF%B#ZVP0=dI)Z z5~Z?OGNaZvFuC7J#4b6nl@Vj-(nPYjhR=5>yQb%Z^(1@Dc!IFEF7{nOo@Cuk>W{~vL{&+~rB4zhxifuB;?9Cs7N-BhPDu5%uq_%>$cm7y zXW(6OH?X<8DLCh>sA*YJMzFYlv5=Z}86G4c?J)J+7tS28{(1_H^$A_SvZH8SlT@Vp z>2mC>W`#VoZOfM$~u8*zIVnP>CT%|#w1)WLN#rdp~_!V zVQ_pQ5GUSO<90+~Ju3!dZ6+!uDG-S8dPP!NKA3YZ8j1W-%wA7pM*ypc!GH zF-^`m+H}O+ix7Ck(E>UtCnTiFNS!$2d|Le8<@>}dT}KO% z%B6f zThCag{xt0>TD)ALtEi6ld})5Z9lv~jQvgS+N1dYAE<1ju3p(~;k-Ra)(Dgp& z6(w<3jn&sy%SM0ixU;wIE+rJmYOCQToVxoW(F#q~)5Yg@8|o(Ibok?4)#s?u?NK>m zHJ^tkS-GXR*BH>z^KJjbFEO#+^Q)j(c2<$+b}wTY$?C9*F<4Lp3!2=+9JF4M$coNJ z%aYlC_O>B}P8ESo;+>^nRKyLLnr^NtC&cw>Mn~k5Djg<*msw3lA%-#4%P{a_b?4e_G<5yL6e}ickUtFeU>EQIb`IC9T z0_H#_THLTW5_82LYw7abl~gbH(){h-hhtGSAgj)z+RP0 zP<_~LtC&(2VrZ;lCwlT)pu>&X;?e8Ot=3wnCuNcNbu~G6h8b_{+e=mH1$rGUS5Gpr zS7n2HK_s;p)gmUI%Dr7$;F(PwEP-*0u|4ZR_#6Vqnt=)PLXne;?PkfRWcYsMu8cU2~yl{PPpk- zHVh3$KJ`?+7q97F^!h8mcCQY$9mDC8eR~t{{iFX;&8U2t;Ys2#th#VHqJfMb4j z`oh8~(%T^{$%z-K&Gy^hBVb_QCSX2xL1+me?lm(YyWdv7lo(zW!;{kBY19?Xve|lv zbQV_96|u|v%3I+Nd))dqaektU7jKkt+({n8hP)?u%$T-gTF5?f@shoj0F;SIpQY&s?lBPA&dd+$p z?O+wEaA2Zr(Ha5X?W=w|nw7RDM_s(c1;q3W6de6#(+)hV^E&92H zJ7i?Wyips+5JhG?Y^^SS-NY{!t$wvJX|CHA3^`EQucmAG= zc!Ix2$*Z*#5o$On7Hd;f%V95TR|Q1bc}P=2nz7YY+0q&J@*G=DM~1HiSEH0+lR~h zs&~Q05-#b&LWn&8im)Bk*S0mv9JfM%&0}6|ysl421gm#PKA3$9yKVcIM4q$QAX0MU zs)YJMuXA1}Ix}83xS#h;WMc4-ac9W{nHIrmZ)A6vd>#v{QB+6F?5>r)CrR8YMKTkO z@;h}~8Mu{o=3$rTmdVa_Jqa&1{FwbyVKfqAw|l; zXw+QD<8{qP0Ow6bopABju~OU1=3ad02Qk^T$h2wri>vpe_BFK?7vaob-x>d0Zn*3! zJg~Wl()eGTKGWs#V}F(_o3UidR-tiCH4T;~oEX!U3&DQNRn$xFkdjta+*b)22t8_? z-qY)n9Al!o7T3$KN`Ju5?QPUc0$ zD&KylO@2N_zdJgq`*@F0+y!&2!CNX_TA;y*0$#EJQw3S;8)iN-CYLJmL7QVMcnv|B zCWJ7C#3lZ>>B{+KlX%1Hl}qGve*LwK5~qs)_s1qqymq|9#`r}TB;Nm;NcUF`?d6z` zKiwTsn6#I|zr0jeWB%aeWe)h~{AI zTM_0WkS@sW#CJfQiuQ~>Dj+=XRGh#m?|PY7n~QbjQ45>r1LnX~;H;jzFcBqQHaF%> zmndC;UM>PC8l_%>wP*Yx&|#U#C?FBO5T~d7ecGq3Y4H_J>7*?uwq0@!>=V2|HVP?T z(57lIQLB)4?HT<9h4q2^9Ep|K<$Nv2B5bq^x=ZgS2`?05Eps6rv#8ISOL}rSR_*>H zUEUoR2Gx9@JCQ79dNMxKC0A3Vo4)N%tm&Tl8A%vNooz0Jg%m58uXNT_r907}$#Zpx zLYJ8BlmjR~n9Tw4i9dKQ4)B!4?re$+V<)^C1p5{LDohihOw+Gp^h)r^GP`_e_YJAQ zn(V(nNIY?pmVci!ADr0Qr1qBls|r%r0w$;Zz#8F(7S^Wy3fV%>3sI+FKLWi9A16GQ z$1P_4YjcOKs=Q^wm|%-DuN&9ARS>LTS&$;GzDlK>g#yvTb;XkOu>>`wI4+Y}XWTZw zqr#$#BtfDuBAhl-H+KFCvXKIc>Vqss>$yN<@&u5z0Ci_2*QTkn3RR_4qi zav3q3vzPcqW4r;WRJG?}tUF2@Ob6H#!4Ws!3^=;vDPPg5NEuT>a4$K|p)2Gd7r=68 zcK3%f`4&j|6*4>|tV-D;d49Thy%I|X%xj6ir4!mhHqOd7)q!(BWaeT}*Q3KB%hp{a zQ#_n|)Rde9fzODf^Ec(WTYdN;ts5QdnI-3kThH_-PPKCvM-@mb>R7 zRBt#&ubm33dt14F4KwiN+z^^q_~#>cwXe%Dvzhs5x_985`V^dm(!d3iDzGLFDp$XV zxnx>?uPLD92#PWVQc4fyVLlnX+3s_I)WPDuz?z<}4~8`-Hx~2#*XIUKsGk%SGHG+7 zuNGEl$9(Q`nle?2Qm02$6cU#q;-fZte)eCEF{#}{A2;+|y>jQ*sG~ik-01SR4cBn( z{3LvXrexZQf~ScBDp%ZQWWU0SWt@nWX{Y0FHar%UCL7zIvf_flYrAdrTq_Z^TD0pY zC72H6b=voHt2kUZDfQb35p&!)uEsAR+vdLADjub>(W4VcW;lwyswhy%udPz+dF{yNLWGX}-HE_8- zUF#s;vewjw)78YDUO@{LXVb#Enpv6bn4`O!M4XLoB4(&ky@fmXo;j+g_ z!TFnULrm*}NMJE)&J>FpV6zi45An-Z*vB%bi<1cO4Gstugl0xDVm0XP*;2X4#$V>D zZKJUIe1$VaY-4HR*OQZJC0=9#k3Zj4%ScIb_n_jPA0^Ekg|UD!9Z)qo_XxP9PUq9# zl+i^V^Ttk@%;il`zB-^jwLXH8g*(r_(2-~n^7i6I@MX(V^98zPwOllNcAp8bAoyiD z?$Gl{;Uat^HO1Qt&O0*;JbM=!s?l$*f|wc$dS|!D1_G>ee51mI4J`Hg)q6l%aF@ z_kl|DTL~BNJMSrgTC#h~_bFTeQV^fp@3%qy3gHfl{iiy`zbhgR5 zKA0XlJjLk^^;&}mr)})^xdmR@4J50X>fn0^HJTbn$>K55^f~LRA>A%f82!AR1eZKy zg_sMu4FqXbaUTFOmdt+CC4|$R`z>z?VlkJ_FHd2uxF`u2dKUCnz}jn zq`PKVxqRNFx&dW|+R4&n@!@yuM77%xw`Wfcm+ay|eF(Ld(ZoeJ>wu1E^)lk85XoVU zE>d@_f;&|vR%Lh(T40YZ$j^!ZCz1=gpL|nuw;+@;H`bk*@u6b-9+KdBs&BS1Na$#n8*5a%yW0 ztrW`$T6JJpYeWZs@7SNw;C%~RXLGvlcAT{d8VuL)wXLy0<5Tk9#rv{@^scH*GPOJZfIOy$cl$!DbQ9@ph> zq_)U&Bxuj*@8zjyY=7Zcpo$17%kuI1!c236U5*fX|8#}D9GHK4XKj)WLIwsG*@Yy$ z+5a@EXcy3$n#pG1bA7^|eQpjQ*VpTK&6hgTdjlV<`fsj4arAjfpTr}lHqQ-C!{aF7 zGfvNczh#_NBpx1Hc`Gs7apJ1$rKqbOFoq)4h{x7auqx~@b`%xa9`1ImcBCCj(Oi^Y z(a_gCTdRJ z{5JOkBE+XRckN5Z!Xk^pAq+)#B2Z5e?(Fce!!sr>=45JHtv$Uo+4ID2z;TR8$513mr8({9NT2SKGhY|OS z(QUVAE`jBgP{vHIvr$npBJL5!1q1R0Y7~#jxLtiHDl%N8m6V085_V_$J%v<@Qz~_9 zdF(Cl*Hr(Av&r!Md)~AG+t9?9zIt|#@XUBh>@$glD4BvO2HE=j(cQzcy-?iXXsx*V zJLWNZUN6CZ>_$V@9!;_cjmv4!MyASfcz`Yv9KU>zIxHqq!y<$L<-cy{&o zd4%JI&`1AJ#=Lrehy7jY*pZV>?O=n2_%z80Nsa*T4!rf`(}k(vN{N^ z$nkE7x}{7YdCPrEi@uk9hhDk8FxIDcn|QUN%~8|zdChCUU9mwr)Dg=w_%V1Kvv=8R z5Fl&GDy&~y1JfCSS$)C)!v&(MIFIEofnvm@;>J_)=xUFbS$XwnZNuce#vr+wuYIwL zrWFi}NAx(wCrI2RJRoMX48}t?xqWMXzbaZ>jBbi*)#5D9-vM#7y=GP%jSAMZWLDpJ z(GzNAskB2%`ba36p0kvB@90e|YmOYA`Ej_;BH1&|!q!-P2KzAnE?Ma<<1Irxa+-Yo*Zm@4wL(ja z$K_eZuMj@10QOh)+LdhZ7GpTWT)Hzx)BvXqg+;jf_&&BOGQGk$Y^#(>eXx$L*{s&q zKJj8NINB}fgVmk~%M7Jlx@P*3^B^#eD#DfZkGoJEkbxN$>9t_cU^bWiqS;^@dWtaeiQt{OBe zX4hI?jD>y%l=vPC=Gu#y?)5fK3mJGUovG+mOo)u=Rg2s8wziSedPj#aUKD7go>-6c zPmPfUajB}*@o|-f1X4lNCK_sP4H4Wq+IjE@YHep3y`x#Ag%~-B=~f2GYAPefnzK^l zT~s+K=|^aNCpu;4as8zWQjH5Hls7TnyELXq(4M?O{~^A*PaYgC%`3XINg6C1`s>LmdU{x8a9C1AKbKhw z1KPRZxsdGE#b-OqG)=u|?D2^&mg*jRle(Jd!H0#uFEI6uh)p?S=qGv2{Kfde(0XNM zUdVj49y#Gm3g^N&YcmtA@piERy9hRV{TB&~$}Lre@reoEsk%z3#7M0OBTw{;rj0@w zfM39pE~sp5pQVCwmTE=6Yws^dab}pPoVwLUQz5|isFYga!U~GvOsJVnw^sHKg*53(r4L1_&Ke4Z%Y@XM z#I)tm$_sQ7tU9un4HUbDb>gY~+`fofDd)NZ94fy71T?v@Q+$+!slio!vuT&PAJsu5KeF@3J&59bkYtr zK|4ZCIDm3~5ZLFq;Ked7cf^?&Z|ivv`H#E2;d?)N-Q|@SUi*E{Yp8zimwsQ&58~~g zqr8Tu2w%wten$yMiH^`aU}GR(IxEE8H1G1yj|kBrpNNm7CqKfjqVsQlca+`iUbH%) z%ic1sn!vB~+(|Imv8!EK;4T)BjByQnKRjQW0M`S*U)-L^-{63qt)L&V!alyAX29X}!1oX6YkPVxB-w>_tO^dWH!us=JDp&aPd#Z2txu>4Yw>IDpSUY#i8l) zkpm&Zmmh3)?! z=-PoA`Gvuu*A_6z4$KtCFwkF*uAA6j1@gy{tIKSbm0e7gY{&X>|5}s z;UQ5F&-A#g_C67!ZjhUrY&?6gC0qpKPTTqbc?7ZjFv1&FVBrq|Ij(?@q&yP^xdLA3 z!m)M+0rEk#uql{=IsyaVGz~NDJlU=mKE4V4@5Zu12LWth-o$>l!wTNN zF#*|Oj`%3f#Pg)O41}xV#~n;U3*13`NmJ3#x!|DPvQR51RodEY+}+dKa3NqHYm}qq z_%UoLYtt4x7{Lm}b<_OYel@8)?WV$g5eD4#^!t219_^dvuJIv1W!7e<_z0%!+*c=w z0mtv92mR6P6ji=FIxdaY{`N%ojFeSsVeFWDE8K37&rH7+u~2ZNZEZ&%HO?65HR zmOheAH={yiBjRr4&s)%)2{}_w@ZYU3rY(0yLZ62z01YOa=_r}# z#Dy=TjS(-gP;hdh1-<3_g$3r=Xqelp#BSn&ugc#{$aQqN5rc-oHxpwo`zw*9?~z{Q zlCOUSd<{&ktsN&058Z8;JDg6YN1yu#5!Gop+pWBiQdj0F9)xS9K{FW38}WWWEC?Zjpj$WWINud2Dk3V`PEa zja%cxK0LUYi1zE}1a9_)kFD9|9(3rvHN` z#`@n6{r_oV?0=Y9{ufP*g`I{U1~#mD=#ha-E^-pei$=KF-jcEgaieR0Yge5 zrNFHD*OB&(f`T%xi4z3H1&e@{V`HpXZdH3Vr_dz=NZXVvl{Yk7m*KUvDyx0nL}fT&C)RAix>cmZaFk=LCN2c;M*EB5dH>t*z1X zHQltA{^~aXa*_0!Ikt&~x$%X}!x@rMVs*Q+xbd?e1g*=5?sKgwq@91?_VSY%zKf#x z(+PrE;&l~TuN(bDiM#RcMt`|%V(`TV)Yz}Hp+z)xVlC{`V$|7dt7Ysa_s+YKxw`%L z{@1^>z#&@_eR#_n3bNhtJu9zNrHo^Bf+^e)zb z!ve~(85sSZ*-6j)Z0sJ-6|#x07XlS<8lKNir!;Py&dXP@5wje3(cFSeSRxXGf6xl% zKT!P&Up6CVcx?Jy5N!QHNkvACEuH$bWd>ODMC$nva6qRg_a0zz%D?s`t(VH}o;XiO0{$FXXQ1o9S!$Rni(;PTb&E_23>#I+cq-c%rR<-Y ziWfmjJTO+hH@Uoak*HwPeEe&OC5VDF<1dwT{L1)Vb12puDDZ2!IUy})%-dU7CXA;j z=dRn&y`uKTrut6q@YYRyMSfi;PvLP@RCNcp{b;2~5)c2c_)$i(3kc4=%EAY`8;LXjc+`dnb|hXS+NCQFPJlT?a?l(Q2#&# z{rTnT`gCo(yV~j^Wjbc+%4RB))t24qt7F*SeCk{U_K8}JMz5vGc5GjynPo{O89ERZ z`uLymr>-ie*>hDvf57Kv)+OaPO)b}uX>%IPJ4v=`p{hgy&>PG5IvGdC0*cDfD>^-$ zzKb6yW;|q6Nm*3Y`WT}^dwAqnB55GSns0=097*6oLV!vR_yg4a5mqmbPs=(zJoE)t zzqp?2m0eOHahC41g$i74THXGG0et)LmIAFOT}e6C2sYzs<)@0xdw@5_ zJa!y20qWG#3k+mCG^lJ?JIj#}(W$icdRdYVvPJYVTaZ-+53e5ai2y1-bwzoE<(s-A z`2r?geCRzsewXtq)RfFGqsm?uM^knxk-CLEWhQoa0!wJ&htH+CuEuTW!ZDjUZ$B3q z=G_rMN)1*ne^^ChwXer55C72@ac}NqI0cSO%M*`236sO7%qm{#FBc)$Io{mZGsiJ| zgp7KmtsR7t4w$JN4A4m6XodRlW^Oz?hamNdZ}$oJ7qZ@1S{0eZ`^Lrr+^roN^uxlJ zH#ydC{Z5=@$BYauP7KlY-(O2hhpjAo=5<)%`oi_irskfA<6;@JVY0^|A@)t9JIFG3 zY6(D$#x1|ID+jtF=V>z(QG-j+iq)FRN;1w!S72CG5(BJh{DfX1lA^kKnU$V#=>De9 zjy{HQR@y5#TK*oWzEZGDEJZzN)PeKzp3^7psGY;NwKA?eY!!Xb9Xa#%7Tt!n;S%Ab zo1|GWy*R8pjmKKEBowL{3hi~oV^w!(cTcW1KjP+~aVMY)X{11k%3y|5ExN22s**6& zsYLVgj^>rbFH8`dlgP$hlEIjWh;qm_&xuuzE=m{laFo${0tF+7tx!c#7Uk;BA$kaA zWO$yfKhnmrOK5O7A(9(3V2`XW(25c4#(8di!~Vh$0=)lejDE|gy_sM7E{476_ij$6 z&|oSTy((6Y41mu(K&^)|s$<;^r>Cx^-+#J0-~U%G9L)TZt_1*=3F8_xD7->IWy>n* zETU>)nBNPhe)YzlX~gjrffU-wD4||2X>wnGHY4_0CtHCN4;X!9f`Qpp3O5sA`@*IcgaI7$3!GV=4!LjPU4g;H%Sr2+D4aA{}4`F`_-RHqif_UzB$Ofp3va)Q(SzY4^hp&thhX{JZ~c^mbMP zTJWMjG}`~*fm~9#AxPo$x(}oOqe#@& zaZi^Md7{IfnbJQsld}`<2(5lT=u%|vix#m2JgC2_meor_iuiis)t{n)BqNBJgpxQ< znD%Di8QawopdR_k5pK=R`OGGY4PrfSZ$`XRyG%jzoGQk`!Cb-_DxAzh>*)^aczBAR551-jslpymBo@j+L5eH@yvf4P(mt={>kBA!8mtQ zXMh!AknG2Xmo0618q(J{ z;%>BI;uncyJN#$lmsRF(EAX`m3n^iTG)AQ+u0Iyyn~1<6uyBKu;n%gPnOH|;-jt|V z{k*y0FN6h~F?MC&5S^gsV>$0GpQNOWqXF2rxW#kGOZ&--coq*Ix7T3}jiFsL&dQY7 zzqtbgzX23G_01Gf11Gy7TKzzNp1|MY*j^sCW;fF|U3Rn;6Ws%%4~wr3=&gjAA4s^N zp}I_7O)w2=53n-79)oQDc*xUX4-UA2Y-3ksr7&gOCK$TKPo{G21F$#^Apcp#@ROt^ zO@UIrrn7DU3t=~{k#l^SdHDyqcq=bjtFUY0qwjHrBXR*N%&9~~Pr4f{b=ZFfC8$3j zIolx=jt_WFC^*QdYv67BL0+7P(_^>@!S(VaWwZFs4BF)}@tiKi9>SAK4D4&vPl9b5 zIKTt0)k}UEpaZ7WO*#631o*-qhZs+Y9)}rE2rmgv30LtZvSMdt>Y_QF5v`+5XoOb< zXE7(rgipgvlnJfL%*oA(%}LGk;ieV|1!skAG9g%p=dBP}5waxe{uMO^hy~L0X#tp^ zgaIs|l@M4E=a8yMRbjj6rXp4-m&lNyt@X6om{@kjJz7jgY2@lSK?t5E11&Fac+vBA~&bV9e^g zF7Wcb7K_ECTOsNa zzEU#y3Ena?#3Fv81EfIt$l~!8u0$%j`ER)ya*5qjc0)mD5I+4$dgX3;40&a4Q2=g` zF~eEzbdYkxS*Q?l2;St!+OfCn03*;HGWVoi#DGvpyaHb_h>-9%0ssi;3-YFzeHwx2 zAK|ZpqKVeBXPE%IxX~3zI7IL0UEP3M#a^$nfvv$$D1Z*A8`7qLJr}?Rv=wO+$%&cR zJAL;!tq^RlB&`s2Z#bcr-~ytCv@K*;3RDeQ8`Vh|p-0?ESHehFz(`lt z9uwAw(xoj(KaPZ+EIyDy3}O!m))f)fl@I_%;=vp3i0ASH4Dbzbf_z52BxMiXwGL=B zgyT*O=3ySN#qvZ4$YvP$JD-uC7QT6wf4~Lc1zdq{fo?!_K(r)KYMJ-Q2E?QpvG$t= zJgbj;K-koypmkQ)CpjT%8d6s2F)_A_+i8T{L|3|zVof)u7BA3Ewnbs739lQAn@vj~j`Lv)MCRZ*l&)oOzY zR){IcG{nb>0i1?(gzD0D6e&?JCGt{4EQ3{KWf5y=gv!I#)=G3yYyA*_v}(kBB2%r< zI)8oN$>9PQJb-q9V?aY1t-m~0;}P*>_}VyvDe`2*+Nv;T7qM+2{36fEQF=ya^gal%7 zQv1ZB$UX@uP6$PDIWCzbY5hW9Rbc49>3zaPq7|5i%dimdX)eTqijy6X0%Ccjvj}Ap ziXsU}3?jgPfdY)ej40S5+<_;+)wf3+aXkD6V*Etj>D9lZj(BMN8koI)!j5#LB46NB zRByP{w}S1m92-^a|H5RlOlc=<2#d z_f9l?1NRXgjW4Cga5r*wT~S9EDL;bSQek|<_rx^xhF@TVC-vN+M;bJIgZD5g`1)_W zDL3gYp1G_p9=VJ%55DMkWt2M~B=F6F%y2Mm!FcL@{6;?G0yV+lkM`eGZQGoLo9%UBL1!dx%W4|gntTohkFXiYwdCK zvujdgVO#(3{6~EV@O8hu8|coN)EwO9=q9teikW?upUJ<h1jfsE>>xk& z*m{T&1IsBpafnT_mUt)$_efFBw=}7!<9XU#q*f%2{P1YSa6UO&(bS&l)OG5FU?C*UWBEa{RNAHLyQjXpnL5ep8SyIi`&v{px7!if?z-+I4|BhcO+ z{b^>%=>Kj{TcPi|Y#=O^hr~>lD#WZDU^r{pV5GuM6@6ji#5jD!DEuXY^;azQeA4bbFRR_Q5OF}TATmi5=o6a0onRcRIw0z9sdr95DQd|}_!*Vpp(`PH@5D`4n9 z0U0&s_V`vca`#;zysioxLS(-Gc^XOo!r!@I$_$dz`ulTxxZM>0bJOsQ=08_S{>A6Y zWBr$RHTI3D4W=e!WY=sK=7iV8MC#>m1c-V^H|N%(-@mq11% zBch@=!;Dp|a=Y5mES-|PW0;LG-7Zi!)pM02RfPi?#2RbE8HL z9d7@o;Fs6H=af0S=xrv6Y)Hg zdRqC=Edp#QuB&i7bs(N|p@9cz#z!ilDx#$c}=_uov)*hleiqSdc zE34>+kvPvUu{d8~-N2PSnOBz2-@eqKL2La)?&#m3Glef3a~G2*JfCafGgAk6JHb8C zUat6_)(!ZsR5ph*4{f>oHl6bKKsCc2$nP-sE)lUqMF&PQkLZoz8qNt~Xu3Sp92PH! zx+H9KW{EuR#3$0WJWGwpXB&JCN3yR_DV?`t%X^EOS?^TW_NqOc!oI~(ia}^142DexPQk!JozETIBpw><0@$4D{12G zKO)~U=LpX9!S89j(hW~9ba8{s7u8A!U6%SqIn;y%>4#(Riyge%V(VQ350VpSkMm$y)%?#;TZC{3hVweZdiHNxq5GDk;Iv5P;8rBVLT zEWQ44GNYY8tC63IavG(bLc);_imB#t9Q>juR-Vv`VfA@HBKVaNjFQU9| zRu{-qEJ8bPMeB$fD(DO>pqT%UI2hQGh^Y5j)|$M!ena3kB{~K!<(o*4VURrBp*TQ}WjFFX z*g=lXL}6cpR^!;9xeIRo0Bf=5P$KO`G21I>cyd~k0u5fQ@S{0b@-DaK(4hTRukV+*sTXqguOSXdV9xQ+aOCtZp$jNhf~O+-75`Qn6g7QhArZ{`t*fnm818 z;r7LTlanOvl|V_$_7y04hBu=e%Q2R-r{A032E(5m&{~DuV1TTZO+1>7FYRh#oFPNk zD3N+sQ_b?Avt`g!$@ZeEaI7PGgg|P#zT#?lD7C(X4?fuwBI8>#IKX+X6v-er+oGN;>UQ;4v*WUC6 zP0IpCxsU6@rst)wp&iY!M7DJ_L1EG3Eu~S!dJ@w1v{6JTuRd4KvF0C|#?klU6tG818ut z!z}$fZO^Ms8WV7AdD;d<|^-pZoZ9u$E47e`wQ1AoQYyrbO|a zMUfM4n|s!eb4r!liHF=IlHTa3!^IH|8n%p)T{)KW&~5;cxp9v-Ogl6HR_WFv2u1+C zvGt_h&u7k677jk&JP3R^jt3<3=Axn1Dt&XWMj{~XKtZ51!;+hFx$;DQOkyG6+FH z8Wg0vr3EAvBt#GeB+uaI^OSp?-|sxM6yj#p_QE zLCbF+XQaN7DVE@qU}`lw@C*ec-T<6=h2lDt6O8s2nq^WmfCjGjM?Q$#h@>_7U> zf4M`>o4OWLuKkvkC>bg-VRPY98_p&6>$gWmGI^6nR&SdKp6}=%mF$+_aQ90PNMbkq z_9{l>(_r;-6lW$E8&kjC7~ThJ?aiZ>>Z<3n3kn|Ho>lufmU@Ns!tg55IMg-r{BU??|Y=_#O%A(FjSbsoUjl-QW!lfqh0>x(U$w8tZfVPCt=jX-#>qP z@3hRV6FlocdG$`>LVBRMMiYtqS~l0FgwNW#KQ@`}WXcnil}_KKJmK>1xdl~Sio?Mj zqQ_3F9-jjV(vlT=_wQs>XJiW3x;_3-hH$McdYR+>4FlnW_(|gRna$^bdHcs|;JW;4 zMDl4>^xgxz$P%j+au2g=KoPH%5)tlSpe$-CkjImYPP!zOLK~8XImD%>jZMJkR`{{CzS9SSds=>~6~?=K8b z7c4hr62z9?@%m;8e2e9^7g?=QsV>N+80l@PE14d0ckxck_v6W@&i5aInOUsXFMqc9 z&^BtE{nVH`zwh9*2AkCC4EndIuPU^ zw8{lByb38)4q7R1P;n5(OS9}7ka-ZOJDeyc%P@T`a<%BmGM_1MyUuGQ_SVp9s6b=? zA#aLSJJ?w2!IB!G0fnP+2;7cA|_9fX_4zt?)j~_nI zns7JNt&h4a9uBkc6RG-$W!%VF_$C-&?Mb-Fmm=c{?1r5cgCrJ0s6y z#__`=mx$`X`-VS;NgF9cw4Rg;IeshO7_yvskCJIOBH1MMFw{r+KYOaE!4;EZg zAuF4y_d0@=4tEU%d>1w>MlvfxDw6x13)j9+yf(a>HukZ4Ymp|pu4W+XdIf_Tztv{Z z!A0L&ZR4;H(QT> z+Czk_WTMq9kbLlt(|Me%sH2^`{Ou;r2L!Z@2yL=1q|lAm<%L+SEro~hWPLlf%}J8M z4_gjdo$fOR)1Mrh700(vO4NNX3@;j{Zs%{ftp1=|-n2-cT2QQb+4fpNX7347CCQNG zT^>y5(z0J*+FXg|dhxw=q)`WHofhQ~)rZ**hvSDFAD(JmO^E&Ggiy&`o6zqQiJms? z>FyWMA&}D&Jebcby9`sPiaaz9`JTlmdQa!|-gR2?l@N@fJ(z@B%fTW1dhu5hBS~e^ z&Q^gpeLa$U{HgV{O5)OlN++MHH%SvVXl3zejThVR&!#3OC0S_W6BQEVG4CLq3Nwnv zzVwMOXSzn+O?iPT9l@W|sM0fgtXC|!CjOGYt6bYkfbGLIRZhpL}X{mGWZe zJX7Qk-JW5nh5A3lz5D*Y=nXw0&-Ygp;fk91%DU!;NSDud#W+c6uKJKkyl?E;@K?>G z7S&|_I`$%8Z*4S7jVzFp`NK?iH*bQx4mYKNZpmuEn03iRbER8(E|}_X*&80W?8KDo z$y#Of1ed|QrTlJs)E#I)XT^m)}4=H;;AEz=p~8{IPhk zba_6*&!Be`LtH424`@`J_G`l9V@f$YJZ!^n)T?D(RO*EvqNCD_&lSX>#|zDqn5WP{Sb{(TR0 zKQa8r#y@la@gY!Sxluu#zI?90B=F8vDXx{Vm1^&QriV+zfPPkX1^3tvjUqBs2c zMDkU0@j;@b-NQhGXH})@Up1_06xHWm4$0A1I2F0-@y!Z|Cu0l@1?WGE$Bm z8GVCSj1{qSGfWxNZcRCC9RKH%8x{S-dygcqHa`-{8DcGdaxl7YbU%wHF#9;yRr3mG za#4fEv+hBugzQLG2@<6M*0du<x=Wj0s(wexP#c=%-a@b&Ev zQIA=5Vl)u0Q^pkXMQ!h=oMTX>5$bU6)<=8uwOD=~yg34HO6J@fzHfa?o%{%W3|3ZE z?`sTrI}m=_`Nbq8Nz~ILo%x_wQFMGL(c#?Dy-->eHhpz(lXqjfR~Uxha5p$#&G-BwYtrOI;l}vaHf(EGjNT=9mhJ|O-uFk;5WQ1vFJAqW zv_DaG+i9^oyEjr zo%ojc!E%Dm)y0$5JCz$U0|Me)u^&3cJk+oR%!q2jc4cO2a(pS`smXC^-1t+r0-_Ix?E#x5Qj*w&2N zqBOZ~Jk4CU$fkmCdu3s#hJA-3YdMtj^kHVD`1VhUTag*wp9VYbL~Yln&*U4cWyjBA z9~)$A(dH`F6fdp2xlRP^U;0?o<~LZ2jJAymcYNZsjeMOXf7@yqc{e<*@&@l* zu#bG*rD$&r`<1kxJB@mG2&GfJwhg*e=DQ!MziiXLJnZXzWWRj5ZQO#$^}~tn%G=V~ zR8d=siC`ONHAeB-RnyA&P9LJ0%zEN)omy3oe6y6FiOr0BNT0HxT$5jajkvCyEhKLEKhMxQqoyOsi#r(Zn8M`i5 zze~KAyCEpUwP(}lig&4OEIHrElK^I_e z)hP4!tSCjzkHjmB991-A)=GR?PEM?=%YJRpn{)k#2EOuaO8y$kOY#!GLKpeHga_G) z@0OfcijVGYF0b7F#Ke<38Lb$_DMD1urBWXC1*5^}i=XXhFN1v7>LDS;^62anI+_>m zQKv?}a~NY9!vYzh#3Ijw81EaL$7T+_dL9FxnEoL>GXrkF{B6wh!-|rc+Bc+BwH`@F zk!{dC^6)5TC)XG+vD4LVzgtSNaXOQJ9&`O|!`!dE`Z=#S1G&vU(VZV~H@2Gv9dN_R zs>_VSx-L1-e<|m@M?4@m1tY3-2%Q&b=`7{n_Ux#?)LTdzbjR zj|je@!vocICaurG;!?Lbti{#heQceT*B)K}h%NrGZZo!aEylcxqSNU+*+cIf{r}`LaRlzO<0OZSvMr zRPPI)m7r<-M&6kJ;0IIpPWm;5nxxkyC>`xlcTL7(f=J4*0|XzQ*-T5)5IuU~i-4^; z6^M{&^?O|DiB?rFfQvX3q$`XH{20|Sh?I$}O~7$5@Y>6DnkaM((^e?Zw|RT$p3GXV zDkgst;W?@ma>;$AU_3@O0t@3HYG`9vfjVe@eUxxa`+J$l4!$aGC7F{{4fg;RrG<6J zFGxYu_(g}r_aEzQ*4vU@<@a>uk=idE5_29M_M(#a1j#ksjaysyVeSbPh4jv{1S z2t_F1Z4u;&nYPrzPdp`~XLe2$YhH3$4Ytirz}eJsli>sVCi~D;+}Q$ZA#<6FUm`Ia zK8auJzi)f5Yc5~?8^f2hw3o|I@DH9TZ*!hM6?yt>#(kCV{Bg8DF8hfye~&cRd6K?M(k!2!PQ9>o55Iji zvifa~nm*_G#0&2^pNFVA(_sg+UiavfF&e8c^(gh2FuW!*i=CSk|J;Y*CaP0P-_|bl zDG#qLm-&;IWh_?{U$Bh{jgl0wlVT^PjI1)4yxc2%GjT6J?Sgs)2}-Q3`fa`SJL&`Z zn3rYNoo2|SlUH|%i`7*#>HSf%Mx59{99LZG}Lg6`>&oI?; z@t(1ZP(+b#hpFW{Gv|=vF+5|IV<*0d{ZQv4K5nRdgCw(T+bx@mp`2N_iq4KvG&Z|Ap2)bY-7y9H1?GRTs8_r%Asa&`Y%VVr3!iFI{{R`JgQ%2!q*H@4?QNx({Acit5cZ_m`LT(r(a7o#;HhCiSaT?yh9`trSvS}hIEF9#sE&Bvv&I{$zqY#E8$_MM`0O!hYO~j8j*mg8XU<`YdGeYC zpO{3&D$2`mU->F}<=mde=PnBlyev^vXTodwBsooaf|o)egX+&|E*en4EW!M6&@STvp-SBnOvfo66V%H$E|$ z+^@w;#8`$iy*W*02GWjeTSFK)lnoulB56T`SchDic$ba7rs!e5l(2P)l|s#PVrty5 zr{|CkmU?(2iG{4!`~K$CQh_G6=%oi(eGxO}FF(;=y%_o+<)UJtD)+aNI}fvih+Z{c z4tX%-El0)@sc}0CzJ!^SU~zktp4(1CDfF?oeA6q%sZ>~{jPuz3eO39ME4k61B!eHw zKITvpeU!u&l#$cTV8@FTS=*IxubkfI-8v{g>PNF^Q%O1TnpKBBPB z_pOx12Og8O)H32F_V=oou2^Ijo4MzwZXhrjpA?kfY=Gy{j!8O1FUiczC~Wv7rXK%JSQjGb zMvn&47-KEx60>*W?fedfpwO%v&bh>LWAHb(Gj&gsG(DQI9}5EVnz4SK9Mq@P)oy0mR*ihARNckrdG=VMjtg!tr=I7T_@)l%_rsXWvuY+7`_ za-=a>aH>%}4*kfOoX_+Uli{@nhSe1e5)ZAXuQ|>|3R06T^0(HykP47JccUjyr&1uw z4VUC4CA6+1ekLT6+z|sk$?d+9i8BekC8d-J^9{2vF{+GGu~uaT*9hCpZ~b7`V$Q@n z_!9H5D@9d==^EaJ9ZE&jxmEfr7;3yEk{4}N-xpjvuS=RpBZ;LX&oB0PK#5+`#GMzb znd-trHG1-}{@jQ*DYZ#ruJC?S*Ox(l>^FL!B3dSc@K8}b^Y@2)&-oLh$dUAfu~(=f zA3tw9xA}oRkU>!kGo4m>NfK#k&Q9ijK^?>D$^7lYG*V(t=EsupjFQiuu*(aPWwxK! znPg81uM?ss`-=I4ectF|@_;$LX(&IHfq?au;3xKuE2Bap5%Qtz_98fUZd0b?4l%8r zN2IV{;g7$mX?9bCz@y^gxtmk4Yqk`8&otzjo7!)(u45+jHROgq|2eEquecmcb}2Id ziB+IP(|$nEHrPti zT378H-&i^f?%2bMJ@v<-?nwJ4=NEt-;J`0!bo9E^In9aOxU^)RMyB!ER#mp@J5F_V z?x5z5K(6{!Dm-jQxw}=!l*UH>KchGu>h;dIsZ*Pwu=hf!T+th+BK1rWRk2GbV!2s=q{&!F;S{~fJ#wP?PImnNw|B_ zlCrr^k{@g4oEw9=?E@65#k8z8Xny|w{T8*vXBgu{)}&MHeKJ2|jb(?@*A^6U^?tAi z_-7Pg#A<0-PT||x1|Kytz6g(&jeL}?{(D`si$ z*ZG&v>zqi>^Zs6LVj0Ea7nY&JAg+#R zbrVHI`-JPfb;`>Tvl>6+EE-GFd$9MKiI+aE(7)O`_vt>3$HRr<&E2&THX}~UY{jRV zFapLO3*J7pXb82 zQ0Kd#Qrgz;+?C|)lw)FTbFEW7FDi$FGxdo*MmkgGg`^Ai&u%a#;B{e{T+iv896pHd z>@}C2#Gsm}s9dF9iDH%{ii=`|53ev=l#5FWs-rpgV)ZM zcvqR0Sd;FNT3-BsOTIqH$#w_mwDfi*6-ZTw8J6?7?!xokKyEc*-(1_5)QlZYuIQLu9Pw5E%8+Om%Z1n4`4_^%2xLff&BUim7PZOuaMwhZYg2eM6Szr%m zQJU>;rkU6Wels4visrDCPqMJa>PI3HR^wW)z1b2R-WNm%a=YrE{Q zD&eKmFN)ZI*+{k&-(R@1n?0eYmsH(IE!=jHFw>D)zm|-gNck!xMl8dHS1NOZ>7Z0h zwcwp_1K(_+G()Yr_$o6?b@ueDT!Xh`qd&7}pYoc23#|!0T-Fu#@iH)t=#HHZnYC(g za-SRC7@5RhW;{o@&+dIm@ioEgT=Fn$T8AC7McVl4&#R0%6Y6d~g+DfsOs3MK51KgS z&Yh2Aqr6``Iw9C(y8X!cyy8f;iNX7qjsBi-SBN;SztBEMEjGN z57&9N^~7HtUZ0~f+0u^NOtXSTm9?XT;dWiz0o69>R%rB%cbh*&$j$F(mr`M*dO0Rf zyQl2P)$+Y}MaN^S2PLS8n4f(P{R%2QMsLKftxJ4SpDdoGC>dOsX>iFGUhE#YXgXf$ zwwj}6-C)z$deiPj``z&@64Bm{)X`FXH%gWdiDfP?rf1*ze6+9g!+KNbDfOdjU+g+f z!e&3Ms}yP3&zAI$$1;v3>-3flLI~&)r&H8&Ey~@;nlz5z?7j?mMm=~ORe|G0^pc;h zY<;=wek_9m+muBc;ec}4hY1~Xp`??@C2KfSg^+ous{xL0*FgdAMAj%zQGHmft|`qG zc!rqE!bAVCLy650{EIpd+c($E%R=@PL|k`t_at8EZagg+SG+-fxM0*xRG*kKc|=!nXM2NYp#Q=S z`SNF-K+4DaH}5F=?YT;8x>!u^l6A5rddE@9Pzy;MejN26#GZ6`)9K)~PJ{33{c^eA zxBhr1wIdrxZaphMM*Neh+w|B1S;PI7KzFx%51YErlGzf+pA0^9$)>-Q;rBnC^>L0} z-R^tg1O=d$nOPOD79T7qzVx5h%eG1&GSD`GUc$Q5c@#lWE^)l?sBXIbMXXW8Ub%3} z>t*RP`<<|RdlfT#;>m&O1_4&{E3yeA@hXCS8iq9w@-ltHZJtS&@Nd1zG~RVtTTHrc z<}~2vVPe7;@#d8i+3s}U!X(^x<57GiD(Q1;;WeAPG&mW_X4RXYucb41TunZ0Q%T!1 zonTT(h?uf}f`f!>W;$MATFtBAm!&qn&ZMcrW=PZcP{SMk{{TzStcD?qkYwOC_QJ}XNK(cj+%2! z*IPT^x$b3E*x#*pLbR3y48fx3c*%8&c6{FYsP>AkjQw&ec{BM@zM2o0-al?6dWtXe z&ic=-6&V$+jq{Qr9yZZpfpx<_mcwQk-i2$aO?dZTe0k}?3oGG7Kig6v_+j-kj~@(s zWuIJ5Y!dCX$@8_}_GHn_92pZ;8QEQ73cNZY;OF?0Q!ZijvvUM=!{GJGft-a8xCvoo z4$v+9h_&!j-eB>5(bG3RvSZFKR9+PCYU>xvlT438G`}SqR=t})z3HrR#n{_-Ehb#4PJ1K6wH)qg;drzyz946HE!&UMuYa{miIZ~ecxYbKD;V|8^oH}V<+bgthrKC3 zeARbFIbmOZc3d+$uFM%**ymr-Rmf^oEMp2tajls=X*TSLdV8##VJ6Y=A`2;kDmxiX zGCulV^Qv7U_h|D#meNY$eZh2z33GC+bRT8)Dv#LWVP!d7M`T?Zv;NaZB8o!lZ^Bx~ zLoOZP!xlW_LXQzu_EZEUm$PQ3+j)E(#)>OKj~;w>!k_p-SOe$O(VC=9IL56T%CZn5 zF~f+caylL1u%Ea|v{`--{DIU}YyG($lhu(%XRAYnpxyqK<$JA-$eU)h*>h`xbU})h zzx=U=x~OZltvcRE{A@~B7C&8FyCOHY`){Nm1Etq6-7BwN{po;6kUJ zqFg(%*pWBOcr|Nq^aCjGwWd_iFO2tyoXF2MZf0SV3#T89OOKXJkru=R+8E1SY2o&` z^R)FT;aK>nS`=oZzu}BwTmwN z(7*IkGQ#NH=}uk|7n|$z7o`NuW1n2EsP_`E?JB3%gt(UsepizljuV=3xnSO1_$KY@ zD0$5*@qlL^34HC}cNl&cT*tutt zS%{yTgLG@6w@*%fM(BS)F;@s)aQOaFAcnnma7!2Pi0+8&gWpiA1kRHNfU&WJImt7{<4h`F~TE6ziL$s%DydZ+ORgO54e!V z#V?rEm4_{LnjBK;r)Iyg*F3fzecinsOPl32H^6Nh8~3h8>Mq0V7doF!kAa4t0&nCT zRg(BzcUa9ou+q?osV=H+jeff5GSU68W7>&MY1-56vtHsLVaKy>W&d#Vv>4N5W!kIc zrd4%iE(#j*z2)ePWK6!I~YB)SLo;9fiC6LG1gBnzo6N@P$JNw02zQ(cr@sn}lQeZGnk?t^y=CTL5&kvz zyKWLyi0HdO*fF5K9krSsFzVVaTiZlW!8@cfoP@bkjHLF2gXY!GN;gb@+ z(g)vXe;p-zcUm-Kh3C!D_3AZ-%v(k0f6JJ)7uhOwI#>{=P|7->dC6dJvn>%{#fX#5 z1M9uqckqnyy{R$citpo4b zkH%dhA1L4Yx3q^azl$L#;EPkZ8$)R{v1c(kupV$fRxMa%cTF#VFTUcYgvZR$NYlpA zeuk!dO`WB$9Z$usR!qS6le|M6{D_x26QAEit^3Q^=d09D&eFmIJ90Vn^GOM6NrzNL z7VhF!4DI@4TD26HKcY>KXs4T^-RfSqe=++dy!hwm0ErCU$FhFy4>G@fLDYUQ;!GL% z@TPVm??gGQ(`a*)>tMS-CqIAkP@c>3tD~z1L6-T_fx@fkS99~Wrw!5wA6&m{MxBP>v!#*Q1a54cj? zbGzTV3B5m#s7c zx7T9SHAN?_V?5mshp)cpR~lKyIP?=iO-S?eY+&(NfFwCw} zNHbiyyn_7;zBiXbrRO@t{(b0;CPP*swo?wx2UXGcu9A!Q!!Y_x!jfx9_YU1SmMlIe zeKe_BTtiIoktTbN4|u9x{}%Y{$b)QfvBgrdLuvb7NpwBJ(v$C( zRSeCft7N#jYc%yBSCmgr(?2uaOhLr=P9og;k(|Z1t4H6{=6h_cTorqF@!1|dz@X`m z8hK2Wy?bF6Auix*wEvcjPfC0L@WQX6!Zdnr?sNgR7&5o_m40hf(=bmy8SFum?5Xdt zW6K~6=-1ep@3FKpr!BKJ7v?;ToRRMCo+Tsry*96UH4p)QK7JQ@?Y{(nAg|LZ84)xb z%{t&stbFQeJwNorTV$^JHZzUBp^sfhZ=Mfd%gpH!|MyYaY_{R%bf<~B%IvoXE%gl@ zRfQrwewE#Bt&i~u`|k=ks1W&NP}cmgdfT{dvV;{O=+N-)=P=p&qP1;(_Jldb^eO?L zp~wkyT#(e5K8d_qZaHdNv;XEn=-A}55)AC%q&wZLO0jodJNBhJvQ=cs7$*2!w|?m? zy>s4E?M{VQptt7xs$M>kti59WFRn{9nwt5vD)Z`&aR?nzuAqm~LQ2+M&ZSemOAfvV zoeLQI?}{zSE?=P*RqQj}(NDq|EwwbZI+dIc(@;Yz zTdALWlRqyL-jmgzFJ17jRiAy8OLI6}6JKEKoASuv@Jq0hQe1z^AO&@%8e(&_@sNMz zncOVa`>q2b=j-CGn^ksHS5A)J=O?$pQx3l`UKq33+jZ^pmxMW9^lPm4_#Ard(H_0X z+6lKF&6|G_4ez8{c|R<%%st&G%P4ofM@uj5b9n$0#e7W@_I|7oH~E0k)*P>aQNXoH z4*|9GKw>#@YBqskrt~%1`g6Q*^;pf_bi78XJ=fA6u!W2+C%M10u;%@?IO=g(K>^y8 z*T>ka!mvfu6}!v$Ydo#9)+r=Tdym((_twHc374HVe;5A`r}QxRv>$oX=_Oo!JzBJ!5gG< zD%twU%q^~K9jLgOTOgQ=u`*u*V@M2m)Me64}(Hx!_+pf$0=zceZ z(`nAyI;zV@laMzEi56xZl{QG!2Y zJ{Jo_46fkNon*#aKu&px^p@^<#M*nGst@59oFA|C15t3>&vv-1KOO1E5Fl9!vx@C+ z5T!w6O>X+V2=-MXIYyY`-t*;EItrdF=bDiuegR)gUp}MMY7=Vp z=Ins=R#ndghb=Y94=Y^$)SFebEW(I{iWhtuCw^kb5xe27%|9#m3Or%8oMXAY>Mj~b z^=`WAoO}}5Z$&Lf=~6o#t)SnG_X%emUG4AU@)!=1%4_YizF8Q(M9fm+zM3z06|SEEl1@3{rO)ac!9K{&F)ipBG+n9?nYNM3+m7H%`Slisb=W3yTPg%q4o}HPnIirNQ zsJDo>gR=wF486C5y`!6mw-}>|xs$1d2=w_Zn~#zHw~4!*7$brg#v{lNV-&}Oo+V}} zq9!HtcOhs?jM3WN-C2Z>&&$h;*9*bxSIfnR}R7xQZ*;n7KN+Ia#{5l+dd>d0DusI@vg$ZCE>;Kl9zZL#l4zPdbX8)A!?BQzvTeP_upM|}JgN38J8`MSE?=G5~iMZRi+gphL z9=DtJ?tgu-w>i@(VsGMTCC2E@V{Tz-;$iR3DE_aG)2q5#xItB!oDI=my|lLx|Leo= zYWV(D_5Y;?X!rk9#D9#jw}~?(fd97u$VlXttA&ZXldFc4ll}ksF#lOl^q(0PQYP*e z;&6VLFb_YR2L{vN7ZyPXh`GOY5^4~1~Z}%?L0VSpX z@3C=k_{Z=lDTzoqnR%S;cR49>4-Xr25os7S)1;+QJp6*vQanh06rAVg&0B&z2)HCn zNLpG-N|;~rx6J=6dnOdhk#lr&H*qwx_`j*`-^v2S)BivC!~1{G2L1mu@?VYe{|wjv z4A*}(0{@lq|IDuc8Lt0o1pX`I|CwF?-EjSdeQ1RaEf)W|#25cph+yFe#RXna0Pw#H zMTGt>6p^xVbGA3}fz~$~XRBCxcUKRKvu9~Sl{lH&65zpv1%&_2{CDJ{qh?{rr*3o4 zg8uAVK5cz^7(J36Zb;9k=H%oKZSd2p@yR(_I?)T9H3fZ^(ifFRp`;N4(lBW$enAu* z$$z$7`uEk6`2W8J)PRhQy}N}gpNu`Uz?8Bub27L1XUHTV#Q&cU0Yy!Fq@X;gf)>90 zV#m(CnR81`PCBQqPMI$XG~k604Ht)RDTR`hJkuZLstj z91ynJOIz6+5Jp(`b#+G234He{hna~l!jF87H#o2q=I#q#NJ_;gy2R~!*yLJA#N9a3 zgr`nWB!r>dl8D7s96f@gCGKLJ$Wca2ORMA25j@7)-jDUlNUXF&zShRLAKQ7MNpEDa zBe?3T)7LtjaouYZmx``m-=x-Y!uK{_pKd+aEe#g4cRp+-vICA+m?lZBkAC zs0{8;YS+g~Zxkv0T!oc?SJb&f&)akUno8H3oy7~HaOc^ZwO;c_(-$1Q-RFNDU$~oc zZSYdP_e|HEb6?6f#TWg30ngS=rWnHp0$Iw6j!ua;on2q=zO|X8{4jQW|-tUG{<+YKfP^+6z_dTH6(p8LJ=EpjC3T8gs5f-J~S}jpJ z7+hv#MNw5{{?5LLW|vX6Br z{nfw~KI!2M{K3(B3+LL^oT_8B2= zF0=!k*tNvT^J)g+*-Ll^ads{&B`ssM7bMyTxA2dyOf1}+3AzqFVZ80@KAB?&Pc2!^ z!bB~J*Mj4b<+foD0$cp|c>*HlmI9720^6{>iHR0WMDI&1(e2~Ywwvv9(RPyT zU(IS$_ofc)l-Ry>x2a4 zI87ig_RI{E#RJy}o+8EiJ7GUdOw&^GY~{?aq_VS0*%(kKLze$YY}3xl$r-{^*jXRp z-htlBl>3Ckq9yqfYZR`fTJjc4NTey0)XQHf+_!I?^mGO=@Pw9}oGVv%1MwO9Qcgjo> zUp4l(4_7L~73i2KC2NT4FT?)rZq@DMlNSz6tu{hw%CLUBSVQnPVo?LhmUGp2Hz9OP)7-9p$B zO+?#FN0j-C$fNUC^Vc#WE1U&>ds)qrsX-{03odWFMeQ~1`TSZu?Imi1`0NF}qic;# zd@eKrVOO1*99v$yARL>EYY7IIlxKqHmUw4k=jLf=q7S$9j|c;sj=eGe;bup;fgMc$ zbhCv0t0%GfTga2Rzq^I1!Ux#0yS)X5pSW*m2I zQv~v!aNLEb6tc9)nXf&8rl^ge!~1w5ps8P{*eq^L+;*u=EwGD{s{RwNrOA$cft&Zl97(H%t?}lGY5vggM8c$N&PnUQl9PEvFZ{l zG{|QI71FH>|HsxP)=FHX=a9VL-$5oX+zh^_dfcQ=?#Oj{v7-Y|S!8)6_lf^aY~2_B z2eHKBX*0`pzI~MXOUB%m?c9_^OJ_=E(qI?E7!Q~de-*DsSBX2s=p~N?3@Vji(0{I(xt2XrLx~4@Pas^Wk z?pTXbDy^395Zp{Pi()iOMYLMeAh?yjx>t<{l55%?|{P5@6O7;nNn>Hs2aL&l)YsPmi=(vn-w;fxE zI$}Zf+WioSY;Bi?uFZ9B3lTz&=SRf0Ae9l6Js+`;q1@apM3BGOxqo?}Qx_uJyn9B3 zn+xGUM_BE}$~7@wq({~)hyke37MOB6up1A?R!Mn*dF42F0hSm%jyW1Xo*2Iq;U}@8r3*2cSN8v z?{b6{*g4{D`}F;bt@VAutk#Nsin}e*-WY>RsQM`FGl!e@#&8be&gFej^SxAEWA+d?x9`r>TDJt|O#To&?0)4!9#BxN>e zYwZa%*KEwDQVMg^|4JYSv`wMSwOzHGf*jEHY>PVY$zO>E__efr(>S)YtqqgHkP2J2ay&>`esgog&k@_EXM1OY#rNB5;g1}Suw~euRKos7`tQUf?&>cd5@YTb>&#h7s(CFVrx-6a zLg-MrWMU8#A_u$7+Qznxt9U z_*y8I%eCK57qOsoE#!p@X+ThdGoy0Twh%Vk(&0WWZ?o<{{H~2kh(@BhU5IF9@mfY~ zv@-*5)2~{R%#H z*5Wiaf2^h0Td48IZ0!8DMZeieweD*%YZ2Jx+!6f8#uNPTj##hPeL(Z0zP3e&15h16__O@wHqEF`o_nQc0uI@eK z!BA)#?)bePBWc$_7ARa$uP5b$)>K!XxAFW^JUu&x|GjdfQ4o(X@j3r*Yq7UyCeVC2 z&%L1Vd!gl`AWqWI#^a(uI`ZNSG@XwJ|E~J?GLWR^6M`#vlpm!?aJDc!Rv;MZ_yILG zqt^P@QtxcFc(mUx;BpB4x1gXsoJh*&ptwEUCe}{YGwpzT#$yT+rWx%6*K}MDf^$8u>MS(HQ+jop!3?NSo67lw z24y`>iA4Tw-Xo0?{7chV2&cFO)4Xj@9}DA&g~fvH5`UlhYX6RyUlJ_*%}{6zg~)F1 z(1jC+e?`mQt`?RAc>GXYEXWU~fB!)dv;Yc)K+&Il_xB$Rx*mA;NALL0JeVN#PK-bD z;0WkV8GqynprDBSKk{G#2pAM;|CuKUMdE+tA>l~0Je2UCZNLPC5NLML)xkge2NQ&& z{?q{zM8eVfCn$j47eOI@^s+(-v^*hxel#6WQRsCd|EWy~Fa|BXa5i| z^l?VOh0t^$Pzba!MF;lSAq>zV4A3DA&;juXtzGC?hn@$}Aq>zV4A3DA&;iZyKihyo zghtOp*Ky`==yvEjVEpJh&W;=CWzluOAYP)|q3eM019U*gNwl&sfDRZy2lTcs^s)dQ zFn|siKnJw;L9Y*>13EsV*+It_^gMtLh;Qh203FbJ2;B~#17bJ29Y6=f3UoVw4g^34 zblgQNivZ|=Sc`54&;cqfL0b-JEG?SbO-=+ z2mo{l0CWfdbO-=+KHU zCJfL4;5igr1KI%S0Pq|R;Q3ih^5^)$0X&EEqt7cSk^+=P*8xRR06TOYaDH?hP;`Y} z7Qk~jfag%e1t<&90mT()V*ube9KdrZ3Imh{=m78>4&XTyt)bTk;5i%)&;j5%91hqY z0G`94m;`MM06d37=Qq&o06d2ROn@C=e*kz6ht9vCw*k-r#ggcD038Ux{s8bCx-kR2 zJ^;_*0G`7EJcmQ)RnYne;5i(?b2xzKZ~)KY0G`7EJck2#4hQfY4&XT)z;iehhoWf% z@cir?8M+-{e*kz62k`tXjz_Bxz;if&=Wqbe;Q*e)0X&BTcn*i|@Imhjz&`*yhXZ&H zooqp`4?53))&_v*aOj*Hx*fnj06d37=N{0@0(1a)4hQfY4&XT)z;if&=V#}P(E0+1 z&*1=`!vQ>p1LAWyfah=k&*1=`!vQ>p19%Px@Ei`{IUK-qI3PZU19*-A@Eif)IRe0Q z1c2uV0MF0PpZ>W&5CEPd0P#5jz;gtE=g`SR^fmxIhkh6YutWC`1c2uV0M8Kso+AJ} zKRZW@HUO(RBcLjsWl+0pK|T5T7Fe zJVyX{jsV2x2msF!fcP8%;5h=oa|D3r2msF!0G>l{XGb3^0M8Kso+AJ}M*w(^0Pq|E z;5h=oa|D3r2msHa6Yl6b06a$kc#Z(@90A}t0uY}g06a$kc#Z(@90A}t0>EJaLYiMZ496)@I0Pq|E;5h;ipCbS~ zM*w(^0Pq|E;5h=oa|D3r2msF!0G^*+|3Di90MDVTMd)?_o+AJ}M*w(^0Pq|E;5h=o za|D3r&;>3)|IpW0&=oE;yR&Ole~tkXz;h&k=STq0pO)^AA^|){0(g!D@Ei%? zIdoMHy6@=STq0kpP|}0X#iXfz;hHJK1Ts~jso!f?2b*eJ_F)& z6oBU_0MF0vuS2U3z;hIU=O_TrQ2?H!06a$lc#Z<_90lMx3czy|fafRx&rtxLpWVZU zHUx5oIGaGo!V4J;i4BSfA)h{P%UNXZ`-3dA8;Wy!`rNAO=T>*ubHVcqJioy63p~HT z^9ww`!1D_{zrgbgJioy63p~HT^ZUKX`s~5;3p~Hy|Lfdd4_@H;1)g8v`30U|;Q0lf zU*P!#o?qbk1)g8v`30U|;Q0lf-|uJF=Leo&;CX%~8M){W@ce$~^YQ=Y%}uoRI%B~qJfGnC1kWed=My}Sq@<JfGlsHYFh! zb%5s+JfGnC1kWdUKEd+|o=@<6g69)FpWyif&nMRB6Fi^b`2x?g(W^e^1)eYPd|`dQ z!1IOm`2x=uc)q~%dmqr_7+>J|0?!wCzQFSZo-goxf#>&LvOd4QUM=u^f#(Z6U*P!y z&$DT-&jmbR;Q755s_FpG7kIwF^C*D&c;NX0&lh;U!1D#3FYtVU=L9^97zS@O**i3p`)o`2x=uc)q~%1)eYPe1Yc+JYV4X0?)Hqy82^-=NmlV;Q0p6 zH+a6m^Lu~y<2c>m`3BE7c)r2&4W4iCe1qp3Jm28?2G2KmzOg>v;Q0p6H+a6m^9`PF z@O*>k8$93O`3BE7c%BXS-M`@Z2G4K5K#dLXe1qq=BcOBpy1T*i4W4iCe1qp3Jm27X zqyv3^;Q0p6H+a6m^9`PF@O*>k8$93O`3BE7c)r2&jrI8k&o_9!!SfBCZ}5DB=NmlV z;Q0p6H`eDHJm27X1QIk8$3VY`2o)lcz(e11D+r7{D9{X zQdDgRJU`(10nZP3e!%kxR{H;f=LbAL;Q0a14|smS^V@InIQ|ZJe!%kso*(f1faeE1 zKj8TR&kuNh!1Dv1AMpHkz0_v_o*(f1fakYMrH|M1{D9{NJU`(10nZP3e!%kso*(f1 zfaeE1Kj8TR&kuMWiBNx5;Q0a14|smS^8=nA@ce-1w;!qc5$^!>bje@ zsv~VRKhjp?Ds9ytX{+mQ+E7RHe7tL(rww&9&l86_n&*i_9nJH^p^oNxv_0sL=6T{! zNAo;!sH1tFIMmTRPaNuKo{x9U^R!`HHO~`=an(Fe9L81iJaHITeZM_%7+1~nD3ee} z^E`2=qj{b<)X_Xo9O`JECk}Nq&!c%l9nJH^p^oNx;!sEPJaMR_d7e1b(L5jTn&)Z5 z{Aivh4)dee=ZVAoXr3ny^P_nlbr$A%^E`2=qj{b<)X_Xo9O~%xdE!t<^E^&P)X_Xo z9O`JEe|9uJa4>BcSIzUpVO%xO6NhotJdd^v^P_p5ILwdcdE!t<^E`2=qj{dVt|M?P z#s@xV>;A|E5OF=OGBFX?<0^v)aXqe5NXLQa13Vw#`2f!ccs{`M0iF-=e1PWzJda?s zYXi>*cs{`M0iF-=JaSF^U-SofKEU$}zt*y@hJRjgW{BXH!;JOQ*5AZzJ>OL3ne1PWzJilGGRU3Fd zz;pQFa`^*4z;pQFl8bo`o)7RGez+VD`~c74hsz}!b%5va!zCAWfambTB^Uhxp2H9K z*;)PAm+-?SuIC5*aEa?W;D<|G&ky+F64(6^`*w{#_~8cpaA`vw;Q14H4nJIuhdRLX zC-5A8xS+e|`4e~!KU{Kqp2H89xaJ`IaEa@21wUNATA)9`^C$59{tbW{f8hBOc>aX- z`4f2l1fD;E=kUYjD-!wxJcl1Hx#$n@9DcauVqAge@WbUR9Qp%1haWDv=nwE5ez@df zT!H6L;Q14H4nN#y&wGs*@Em@)obNJzsi~a!5pTP4ctk23Ot7&F3S|mbMPE~xa4A7f#>kU zB^TogJcl1H%c!2`@WUmpzn8!dm$_~DX^{=ofq_~DX^I>2-I;j$FRxB}1N zhf6N{13ZTxF1Z+2;5qzoxmAGv0MFrvOD^UIcn&{Yaxt#JbNJzMD+BWbJcl1HxfoaA zIs9HyE-hf6N{13ZTxF1e@!Jbwny;fKpD zY19Fp!w>iQH_d9Cg6GdzpTiHAHyE-hf6N%0MFrv%cBX*bF9zdhf6NzIo9X!!zH(`yYRy$4&w?u zhaWDFSkNEfIs9!w;8Sj4SXQez-hV>+kLG!zHfA75s3C>pI|vOI&|XgdZ+(-5>D7 zHyE-ha2$2r44m}=kUWN7j=N=7uM(S!{vAwS6H9J50}UGr~~VB_~DX^IkU<@J*8kNmQ4tzqGZOK$ha1kd4z zOK#TzKV0H^e!vfxS7J~Hcn&{Ya#07?=kUWN7j=N=@WUmy#})i=d9?@qf%Q53aLMg? z4nJJtPzQJpKU{KA2Y3!YTwYP?`2jy%;!p>84nJIS`+5aGT;fm%cn&|@fFCYx7+1L8 z4nJIS(I2?q4nJISQ3vj~!w;8N!O$PzIs9F0Zel z4)7d)xa6V^@Em@)vQ;8ZrE^(*>Jcl1HuaNdUhaWC+J+9z~OI-H{{BViudA`7N_~G&s z$mkF79Dcauq7K||U*I|Xa5*080MFrv%WJu)13X_?pTiHA<6&H3eGWfda#06(4nJI8 zB}N@spD*wnez+VDb%5va!zCAWfambT4fx^GhWP=W!w;8S)B&Eu50_lj0iMGTmshsY zAK*FsaLL8|0MFrvOD?Wg;Q0d2;fKp>=BNWahaWDvr~^EQA1=A513ZTxF0a8i2N!q_ zKU{Lb5AYm*xa8ux3!cLdmt61zJcl1{zz>%;)B&Eu50_lj0iMGTm)x!cezA1-mI13ZTxE?X)3dIdjR;(C6-50|*^5BT8{*Vilf;j*ox z=Lh_7i9;Q@-wr=qa?u~)Is9&tJU_VK4nJIS(I2?qe!z41;j)bs z;|e^7A1=A513Z@>u02nwHrKzj=Ly-*uU~!sBh}{oUbIE_aPQ4{Y`dR%y===HJ9)3_ zvF+~57j4<}c|VVOXv>zzr)s-@XX+s?ua!Q=1%chv8e1CZn zm$v)4#>=+6&7bFuk8SrezQ?xP8TZhZ$9@l=X$7hk;p{cm5t`n549`TEuW=k(tG;p2yYy!rL4-u~(P c4?q6#)8F3Yr`q$$J)C*oAnBx?DAN-_N|WX1?$J2Qw$uip(9E zxz5U+JL1ZWh*XpT0YwFX10Vqa01-e0%a3CM2mqLd0{~C}kia@34)!i)_AUmhoj)BFaT*2J5HEKHUcr=Q4=X9mzs z!sp}8l%U)>iHWW#Di!#g#@@I-c8)sY3~7)0P(w3au>hAkp+DPxR8HBGQ3q-RD6?o+ zKFeg61lJKfOdKe>SJA{37XIiv{$^DcWAdTOMXwPO$)Jfwl9a&~!r zBd-MCAMe)}2te`wumJZq3!#6$Ta>;P9Ol~s7&w{PIy2D!!~dV=|6i=`|J&4SlIEmA zm=MEngKiN|c5>~&kgVD;ki1Z|2n;37zxM2zNsC(?98g6utj`Bb$opRIW)iM%7!^)( zRq#z38tSYWadjU|&gi;Z;;VTWwWvI@j16HVW*cnk>f7dc6Vu9gW0K?wLu`!{Q@w|; z!1JgylsHqe>B}-(a8=@kZ&JNR<@9!qUuOc$2Uxuwh^{~2GX7HfK=HV3pFIuo@;PSqumMrGG&@N@9DbHCCNIa=dVo8-yTlO!BXHUg`=Am1t-<5>Wlk?x+XL3@s ze9ocKp$M=OMh@(MBoAdsNX$){BU9T;bi9g4U`Ou@sWiSdv`x-${APYmW;WhxVGah5FSoWg|0fy7sbXH2x zWe1x`EP9yTBWG&Q*p!?Id!qD$c>(O^{G zI)YrZNA(mv&F zZeSn3S(akuwz$0Q%IL!ZCrYUgc^NC4(mxn0c4%yDYdZ$=7bxyl=s6^j#Ak-KpC)xX zG?hnRch;G~ibFSAU^=!1_4F3PV|kfL%%mD_V63Ox=r%5^FGo_5_T>?#>-|BHcITRS z5q3w%pZ)tK5kF6{0y9qLaeJ!w+d+qV1XOn=WV12%Li4B~tO_=#-OcBeh^2|VzL6R-Z7X#LrGP?&&^T3tZyA|MN^RlHtjK@s9vL+bi zlX!7L;$)05QIqUq;-W!qg9}x9ARmye|`QeLq`sK?7`UM99VXh4<%l zA#JfnROysg;OMICanIIgAyc{vyvY|K9&wv;*#wy{yXO12vG8X9@BQlkAIDJYC4`i} zTYNzQ0K)&NrOviire@9z|ADlBADI79)_=jRdwE-Wh!M5_kN1E(WrxF}I)~WmwRmXY z&bUuNLQb2}48%mGp08Ik*TNteKN;yAuW}p9CK12k{Z=A`iaX6 zrXDgWL3>h%+Q1}O#BpOkPbTlcG!tG_>|mR?<+iI!F_I9G=g9kuJG8P@(V4Vy=(n6HW zgs49|^c5|v2jN+wnwjo)F<+>#3e}MFo(*|$jr?lj*|+C84>D}Mq_t3FdezK7t~V{- zddp{NNC35s@TuyzUBRlFGresBI#^@~IG_&CN_R)FM-3cr0lBJf-{ zMauU=)lzJMz7|@T}f{{8(;7FpS?b12dJdno3%NOC(b&(2K>&iE@XXX4mznF z${TLy7C2-MTX*L&4)-nt&Gbv<_B`c{v^M4KwME%5W8T{_40g7t_kW$J`kL*!dOad6 zc+wOs^f#!}Xcm?xw|Q%?Hqc%%`MKFjn_bLa*vk7q)hAE+w#+qdN^iZ7Tkmx6zn=RH zyrz<)z@Y1MjX*-toA@=RwOT8lva_q5tvGlYp>TEr>t{C*PUCj6;0{v4z_-A&S z>z|eNyE7fSvv<8%4tL&HrAN2Riy1g`pOxFFnL1*Q332Jpbgjnd*1^tma?88=&exNQ zMx&>7jSl~0+K`mhOlHkm#6m5q#w=ZmHpw!wqcsAAVYO7Imzir=%<2#Pcj@h9u8jFl z8>O1CPS(R$OV?=lyVhw#q8p$ECZ%w;;78_U(nyHZ*asoPp7QK@li+Gna*T~y&p6*h z+dF!XJdp9Y@^$pJv~*X71}e93H9BP};$5{)nk_$I!7j&jr@cnq7TURRHx4;D!4Rh` z-f5ys2i4r>xIbjtCp>3r=Orp1l(;5jdK-Sdt4As;?>vDT=0^3M7`WPg7ESvvKxodE z(byZ`qF%*1Kg>)joR?YRX*5jw!`JP(JP-mYHGZ|}cud{ZI0K73!^(4@53f;H3{X3u z-B;spH%2UC-{KA{A{(7~+T?vK5jbJS&Y-T3Kn{`_4N^XckM-cKnNWaPb9wTKpyJIX_3cr z_L#Xf6nW0;Q|EkRF^i-%f-BTAlPd31PhZv&(b3Fgqe7>zpR6tmn$B;|m4Z8TXmEh; zyL;1bBt%!GrA@^RyBhBAlz!$=UJnr+D;UOjWpUonOo{B~t**s{X@*vQ*Z?cJY&JE* znle=qs)o6k+psLjok-7FGo}h^0WsX9Kj~2}ZP}kYmf0$Vi~xuYgrq3O{dg!bOePtX zC`b8-rR38cz@!u3TZ7VqrC5fKM*4|Sm~$(o<5FW!oih1?lhcyrX{<3VUj!TF6vH-o zhREbf8%+`R?vN`r{kJ=QUp+=I*CYQKjafb9)x5?XTDD( zW?upEMY)5ruk(j zdno!mMIH}cLk>|fmO^>#;I47`v1R(m$jg~=qJc|Kv(&J`f<_D4HKG-PA+r%97T2AF zGcMo`LKPw-$M(A(0RCU~_&8d+af6ImVP|byZ~0#M4?!zuZyt*T}1_sGkI7yW{Eime|_Yo z@CP!d$h`Z~Rq+_kT!LG{&}OJz?kzCTO?kVzI|&)aCohsJX`4QF+7g^jm_uzd?e!sR z)W$YZw*>N|d6TbtZxD^PCd=P}AWP>9(WqM-aU0KPPJboYoNDtVa`vQPxV~I z;YrZm$5z=3%}>-6nV3cn-?zw8bRf&nH7X8m;2fUwDY0l`5G?QEcl3g_u}j~qA6wAy z)Y#MCJiclHA4!IbIe!7fbcFJ{DeM#E>XjxYqu$TL&L=jxXx)eH=cg{w)fU3UyO>ye z&a@0wfgL~gVzcTdvYSH!C_ysNltiKec6LfiVX3vm1r5I@6VI8N9S1DlhCHAB8(W6d zpBYkMXbrbNWMnyQVQFy_9BBu@uPcP2D~!H+F5e8c(?7kg(~*;zbk(4%6rS?@8~hJ% z1X}{l8l! zYx<(7OrC~hVvm&{=A)$~CJoQHv-AjZ4h%9WcWrUuNN(rJx#~fEN&a>vb}4QJ?w=$5 zNOl*wF)QZ5EVMsfzh)+Hf>RI45y{FG8v zu^W^no#Nh4L7K{ArXn)X>@3{uH@&KfxQ!(8g5$fMRLKbyy8cC zf!pQrsX?#L9e!&5Bf{7F!G3X|WJI13+W=SAY$L6)4w2F@yJby5qnngfcyXih2df-4 z9^0JasV#A3jZyByI*7G{hAEe9EBaU^PGYNmNcmHKgV9-79n2sH;)0`PNmRwIVg(@9 z(qM_LHkl1#`jHRu-jLkO&v!y-f0i7Mfi{E@)YJu}OCI(}G)GPeiM|g{Re&qGETYiN zW3LPrfwdbJK}_RZ0F0TJa0ukTmOZ19Btu}Pyo7zA3V24KyKqL}+iMYfxFsHX(6I^0PY98H19fqm3EZS_V8xGK)kv#Y{{@6|ZX%~jbS94(&z$P86 z1ml7y${>t;u*pLTPM)j;r}&N@H#Sencwq% zkAhz1bSR|g+w^qpaGC$bU9p2;-5z|wA@uk`-}7}n&lxuyw~!Ie@HCeztHl;E-k^wD zGb#7!tGC9=W~u<0i48=HKZCloidkEYCpOU3-O|*uIPta!$LBM)N$>9t>47i7-i|sY z9QQZ&xyYpw(v~T4)VLA>yv@?cZV#Rlfw`kzj@+@|{!$KKb__im_at6PFMjWY(r^7( zWcQd;qxg{?q0FeCWXFs{rV2sP8cD};4gvfrpMdfuOdUsa?TT>5MGv}^75lW)>+XN&YI&=uURx`HUB zj-vOp*(z3pE-Zu|hYGxScq7(su6N&eg&r{;@Z>KZ`Y0F#UaY@1`j+B}#eWO_2s}|6 zsos6dI?e%I5op-$O0jznv|G681MljBb!pXRjU02qXTbk-{JKUkct5dwm3!2A3ckSm zdrThxsJn`ml4W{upgTh!ker!-27 zy<)jeEuF{J-=ta1j%r|~!>VeKwcNavB{YcBLz6MG z!OQWIcp2hb7-p0)UKXUgUBXtVCUru5q$VUo9imCxHrM6iaBsCl93nnpG5IuAd_}R* z;rYx%SXEQSL?N-TVf*jmEGP!N11u@3~7=(Hv)B-D9U=%DhbH%u# z7@VppVrK#IP}@J9xtrK1Q#c){p?N8SkF^QYm`R!o@X>*+EQf8Oca#(66El%(NQy-E zUVf4VQYV&q2wu{3r2MLpPTNd>rL27_<+oOTN6vrD6MS_KrZ8`)c_)9xq-6qWGzSY2 z1KgXEYH=)|h{rZrHSwz_M6=d`A_sddLy@w;mP-J0CDcfGSQ~zUCb_Ue16xh59)cpb z_5y%`fVp^qCtL;}B?%#v-$d&s39->vs!jQ(l~(1n9EcGW8wE$ONziEI2e{M6&oDT7 zBC^C(wNTi&V9}TY6K9DQ&L@T;aVV5CgJb0*HEo@I>}y#X6`9dwVkt>{5#&p4kTw)M z(hJOb(fOuF#(vF6Ka`?X>Q;Sw=$Fqi>L;gkL7P>K$UNICt2(L0TV(#420!PA?RPPS z>U#``vSHhW?>h#mK@Z{NroN8+P%*=tM${yaBEN?NMTI~W07rEoMWs;cPL$?lM3KO> zDXXxAq=cvRv%D_yjv`mga#|G4jNNyc8WLR{%Bs^OD|2J?-C1r#QYTi{)FW=Rk-(Av z!HWc{sR4=p%=C$OC+^9_=RS^eJmRuM3tbnIQ(mVid7={;Wl}Iiaj=C$8jT(|sRF)HC(b__= zS00YoE1PElF=45)lV?aciw2;JHBbhLH9ABe{K4ASY({4sjAfOD&HMn#cz{4@V2NF5 z!@n+^6osyi%K}O�Ugq`J{x#amKtph0o(p{7h zic%!ZX;9l7GnUl4fvz!<%cuiNeSWEK`%xpli9Kd39Vi@WV!k0sg_z*a1r^`0zls7Ph)P>BwSjI4_TrVEIS zI%_VX$R&}hGYtROLP7;6F#dm7#7Tf9{5MNrG$ig94Tuz!#AHN*gbWNogoFb5ccy@l z5Fm(v?`O!s1jao;KTrUOtf{1CJri7PuM%-t;li!Sd@1Ienj+*`P zcNnd*1T${SG%F*MI^Ay7yK*seOEbTVq+_`mM$DV__f!_P;rAuZ!xlG&%qVSNH_7vI z$2aT!s)ew0zbZrH2HGFu)zJ5whCQyYR36s-Dj<2-k^*3-;G7)dTOB~^K8#v{6VOGy z1IBPi2>*~ToU`OgRt@A3%(JW(lB${s*^#z3lIqAjY6Ecq8c+?Sp%{P$SQDu~3ZM?k zDc;Wn9EbERx`X(p@H-{J&wr?AIgUF7QGt&cGw<+EZggI1duiB8L;Wfdq!snTpa)4v z!fYh_LZGlvXHl3?Zd2g}gfYlHPi%i5C4NF(Bch&`&Vt%!6plSnPZF(r@;;ozzH+GC ziIY$UZ96oQP3cpqy~fb*fjsa9t0xxeL-Y=N@yNdYMNQk1VPF;Kq&a`@d#-qh^IDkk zG`f5t($1qa25Le~un`T^*B$v%O{d*=N6=zBX{A#cpOm>1;`>a|;a|_Jea-sQ44VRS zJJnfPn?L!y(uYF(Vog!m@vS~VO^njaWn}r*9g_{_F%2t1B+~EBW>;CPVHsUXSmnd> zE3lE0(Vj^8p#ziO&-%NW`)VFZP#6WpX$T34pCG|Zlkf)u%-G<ILbs-KAuq@3^i0H%jO3(M1n9=!s2pt5^qO#qK9)fN+*M?nF z54uMcPUUiDLWP!opj6$mVxSP&q1Z z=^hoB0+omKfC_Y;azkoREvoH5qJI||LV+ql;2(0?0Q%3Wqx1Z;<|wkCc(0eZa!q8; z`=z=OKI$TlL>H1uk|ZJ0D@rPQ-HZds((HjSe>C+X#!g*H_FQqqG9Ip7L7o;djou_H zGDyIaZ`5Relmeak`{y9Y z6SYYA#~V0}u+X#e{$#H z)6U*$Xqv_R9jtM8;QK!`)?C(x?kNP4mOd+#c05$` zYkJ!0jYbV%fBadj$6g${#}*d}Qq?0;d_|M)!#6%(!XofeYC>lSZEf0DP`3+LwrB_e-Y8?I_Btcu}-0bNj) z6~!|aOi&U{5U-B`4`Q7Ze+nkdvcjfDD%{V66oy=EECLFO0Sg*d`$U0Uq=aUq41z>j zIwF!Mi7+N6QFjCq5{DPU7CS9&2DA%4#Ntbmi>jd_?}vx2FrJf+BFvOnOfm)q7*=`F zh>Rj|!pW{%6*MquP-mZ(0(wq&!B^sgFDN2+ z5QVNi8{X0lpGF@(>+Abow;8;$1WDj(*{t$od@~!VJv%w0ZtttpR2sK}rKmf>wjMoy zB}zY-hiy9TdhNDo-0lX-!EnqpXUb-v3)V>9FV2(NvTI=jA&mmY0Y3H3FeVmnN-@|rrw-g$2wh(q zy0#+{rxNVi= zB}y0bV^w`$g3HQe>)Xej!X+uWB>EV)^k+tnqGT3HeL;%E)#A>O?6~&M$3A$`de`E| z|L#43w>654T@JJ}2@WFWeQD)-Zg9r(y;5+aotrI<7xJ4T`S7|6?P~5(Js7K7R?7Dc z*D;1DkEKVDKQslNKXhVrsMD~$MK&8v(SZi*cy+nmXw~S-S2l0?nuQR0l>d0mr<%f! zkZq`GWQL4Ug+)~>u}pxrQNzf}M~l&vnh3N7v5y2&z_igeaXzSn9}Nbj&+}0kEL2vN zPyy}hJu3M7kfKfoURw40ZP?WqKc_)tW1B70g{*_5a3vSk!2=dy@`t{w`424nN#?X6 zZjP$i1A;8r?kFkMAQDG(?|X#tAJbI{xC#3{KS+l*_E-bm8HNr?D~c2ME5(9gA+Y$B znR(&QjmnbV48p?db%d_Xr*kfDdo{kye=Oi>|HWy^op(C-HnJRG4Du0l$^WrXi`#K_ zBZ}uMd6mcP{>WJ*VtT`_U5AkMGukwFyeVQ}D5{PH!;afggF`bXk5FNIcr59scgRF% zRMB~rn{bk=#P^Pu;xg{e*h_vD?|9@Px276J1mp8SB!=)5jfU&#mLpDg9br)OZZ!IS z0welQbP1vjhUv}E`X`O4BpYm>n!`MtJ*0mMf`1Eo_}>xmCh-*QnY)x%XIh%WA?>MT zLqLJzz=5yw*L=pf+7P(FxY&r{cL5?42;rk`!~Oe~E_w+-z|5F1zBe98@y$q-qIh76 z7PN}xZ@;!kMDXwwAr#96mW7!2W}$|18*77{Y_)N~u>rNU0-u$@7XfG22(GY#c!Ih7 zt?Ugf0jqA^p;>VTHY9=|1+9-`0&IeKh&>;LaZnW?6-(u-;BMgaoA6pTHYlL(oX2dB zKPfeImCF~7+w&{Di>M=3ruL7WSc75`st$s<*F~9Wj)sW68;i0P^--WNV6z|?W@L^B zb#;XdJ5Ot#lyuWjzusvrj!Re;!NjtZEJ&{pVq6B%DUVlQfFH3YG7_hgfA;el5GGlW zhMavpAIw>-zfoA{cH2Ufj=;EtdWaK?&X3nIv5x>JzK&<`*nM6G>&Aka{(p7l{ISw)o zg4K~A2ZhxUAd-;iJ0Vm;^J8$vP;6c!o@f zNQjuhDi04mBE|{C0{EA}hhiG#*5#{?kaI`0H;OQEdYx4_)qq-PBte3*!jYJ7d(sm_&))^+dUr z3tAA=v3TaCjRV|2I6O)tQCD5MI)uSYQnXe$(7TI{O;dIxyYs8M5gT_qY4o9xd#O5;BOa@l$iVML%&RH!D-R9Aflwv5Y?NLf50#pOAodA+2XzUt_tT zr{FbXVi}HHC3v%^=ak@T_h+`!2^|`6N{(RHtHW`tBFHCy7jFe<@^g*mW??dwWGqYZ zXe@=%3kN@E@r};A;-^}s7t<XFD3uax4>CY2=g7I z1-n^bTmSVFR(Q~EUUj0JR<&m(`OS5=nipI#dnl`{QHAsJ!|ZH$gxvch5;ERzWSS=W z4>CFB!#@I0WgmaX#3(WSZ|?nBX7N!i^MBz9wUrB{fduBi;7I|}Z%XCyzu<`?38dVv zeRviXM=y^`n8b{g^Ri$YJD~cVV(HU00S#f_HipC)(oF!`dd~BFB74A|t%48U?F;$J zj}1}HP^&oH#7~jj+BDWq5qz`>3qLuM9!I+6(Kj#YFs7+(Jo{($5EFNx7sZ#NZw2Ub zaz_=}%U%mTD|4@oz5X9t(p1qFo#p+4B{*`&0ZVPldPAMyGcu#o>Q#^K-FI_obnS*}LI2ZnkKqMh zCGaYz=*v;TFUPcVfidB{H09ur_RnU2Gd z4c(Cf?^*6+w2#eS=6a|4fID+NzKC(15|7v|_#R?qsoG1I!{gjORure}4e6=WizGiTKafTXu_Dv{T zjWVl<$QBv|EDcT$R~>nVIwFvjg_Ya7$#XGVP4PM#u@EQDzd7n33p_*pk0*+w8uJ^G zf7KYLU@W;eLt}}RMIv3ZB95JoHg-7b*8O6KbqBT40MP`{0?+|d>l&o_sFjIMD&sOV zG7@=Lz>Gi{09kSzauXdgcjS?f#?FR@ZqR?i4&UN9K zhS*L-1xYDuC@DfbL3%Lb^U``EvNTd z(QaRZ#0$KsBlq!`Yr6GmNr-H$*dXa(@oX#V!YQPcR90crRxTV0G&^zD)O~`n6ylzDE$%#Vkse5+m+A z9iciA1$B zV$*neCxIOW&VE?Gj#tS_YTm1^$cm}5+F8m(1)`=>S?TI1QyE(g%KMcVp)ZOjUg(@= zz{{hxyH3%CtVv+`3z-g~T5^8=!GKhtzlJsd(x1H!M2aPdS4WmGvuW&?mY{H zd$YLu53Y|n)$$Zcs`k3lx;yh*bTtcT%CHJrjbi+nDDgdC=f05(T1k+>xBfD?_lSpO z;(3od^lyh~E3a_jL%M`905tOuo2MhYW_<8`@~^_i51A_qU4zqDd>(fRJ^Cs7dS5k2 zuVv%`X|fS(vO)j)<8Xq27abnx+K(fV%hCJ%PJRm`mojje3-Q=cQW^byVcZU^kSEIf=uUEkl|5 ze0AV;n{gNLSkT4w>o!G=78XqYONc9D^)~X^WRRub)7a}aPH#I3hvgJ>9#A2|>yx7f zS`O=n_vh~!8o>yZLSGGmSX$ZwFA(Wm+LbQd0`B&%Lry)b?b1hrz)83eN006_BV$G1 zX>8Cn`@nZ_F)z@|I(cJMh_l81-SC0kr{2%@9*e`Vb)jf*y3Szq>ogz5;y`9`+1M}- zM8#r>YWhdfvETq~xOZPm>j$0!aS-NMK8=>PsomU{e3mcP`*@bnSgxraZr>;aMK$ae zBf;NBKVV$X=r# z^s=Kdo1|11mOv(`l_ED(?FU&Cs1GYD!;7dDvu${-o$)9KFz#1!Uvx*duTrAwtxc)4 zl`h(HpFi^;^5tW8oT6^x({qeJ$CBK`jM|^HiHt!S^^xqO#(VN<4CoYr55%OK;2e@% z{Hiq*&(O7dRX3XK45&L(ofev57GKrQbw_roP-T$o!|o9Bj_nt@Y^7zc+#KCyx^~Y7 zY46-i4b=+;yL?{mawycN9iN>%*$te0Co2_hQ*x8kDi~qa6{en9K`yV3Aqkst{cs7L zj|atJaLPrAG#}0nF-;Bb>80v#Ro@RzVp#nMV{Gx5Ab0Pg?05dk(Az|*Z8-W&JuWFt z4$+W>WMi|#)u$v>W`|9_+BObGXGDo4Z8Si^?9@$=)3Ev!;jklN;SAz90%Nns{x!G-j9Db%ouOtk{7(GHkTOu6Nb>l4J2OM#*1yGm(h+D-^#t( z5au8TOjQJdRUqy`%Z-q$VQK!8Rf4EZl|frcb7x#a03ut!0^JX434vW3cm}!vb%bCH zYzW)}QsitYCMD`XizMPzvx6bGJ+IpEA`KY;@_Z zqNa}h4hx5miAzwHNQp0nhoWR#8p=d-Tmq2>!H189b?N_QqW*E#4-_GXK$?VE@I~kS zoFL&;{}||^k^r*r|N2H6#1CLvBt^V$2nwtKH;nv`1^1{9R{>Rw#ywjh!3#i=kfD=+8U#8}!u@6kI$^9=F5*z5X{?l6UlQwCB5^~j^X63{yE{uPE zHj~-64BtM-D65K+W@(*sM3GV0>!-8kDjKQ~>IGMv3!g-Yc-!?;D2z&aG?>4QwfSV@ zww#|5)z(XqUS_8UQ?b4Nn;3qL_5?N0Vt#sj-Rls|JUI-)8AmL)kRwh?pE`KN)Bp^ zza-q1Ez4>#9cQd+rUxh1nN|yt3BsbXgb}lesJU@n>L(Oo(p+EaI~e!ODvEFpm?(Gd z(l0O;=4Ewzl`JY6DV4KMt^SZLB@Z;56+6(@9QBk!sVMG`Rv-Sp(p=XT$bKU!jj|MesBdL2qS@>00tw=HY6eZi!Jp`zh{| zX%Nb6rZoVgX`r8@r%SF{MvE=e46Y^ET)jCtxk#;!8ibTgWk#*sCKI}zrF^6Ak|L9C zSFcOm$)Aklq-KgFK?JykS)O zYPGfLc=2gOrzb1&zISavaXLQrW8+Bs{Hav)8(%W863F+AW=-zi=sCc@;OL5HPcI2ouv%UA}Zgf22sTv#XzeIzO!#Dcgh_n3Fv723mIFe|Yk z_Lp9AB@N#&Q0m82m!)mS@#xQxa>*5?>O%zO-+WOy_b5(;_N~?2G8@a=`Caqg&0F4v z@R_PP-5|pRa~>5Y#;a+5RlVtObvYGp57Fw4vF<_--sNvdh10bbw7t9F9Q9lgy>~N| ze{4|iJ7bQi+E=mrF7VjJQI^Qcu#o*FN8)7lL=Y$*d)35K>?31J_b87vvlmMyUnF4zz;k^Qa;;^y<&>k(PA5a_ zvXiCHZuqsf_O=-i%!TaTe0bYTC1oY&EjA`2qT? z;+zwWSf8d-sdOfF?UozSG-Jg)#Has%n;JeFMC>|60|0El1OKgvK zn;IUl=-0q&rt{N77f`2E_Ap&Wom^sjI~}fC1w$+piHB?om~UfdQz1VmmA0*Ky;+Yu zbkV<##Pi<#YrDuJUvBr6E|`+z-}n@WLDeg&d~jL8gHEXr_|6NT?Kt)Q{3M`1aUS%X za%=|k_5;sFwg8=yM_bhL_VqjQ&(-0poP(^h*tBBNXsCV3l$*+Z3D@|1^M89gu;NHZ zAKz~C-ptA0_}zVQ-oeGuYIH33q*`dXanvO{Fp-V_a#k}}igyp}Y0b|;5lNgkm96DX z{&V;I|`$D|T>_2o5#!ih!Ob`C!Oky{Hh)k<__w~7%}8qy8 zS5+$}NQ!1uz`$GcUZFk$^I{S>oTplRnVDypxv%fS@6l9%kYQZv%SU2!+!51x*$R7uk9Ngfs+4WOX(qWOw=7?jXmUB<@j`^%HLDUt&9e}0X{$A*T!HUK??0vyEUl7+_cYykBqc5oAw-9cT&l4Tb zN7{LY1*cyF*7OLBvCUOTG?L@>n3&cD3;8<_f<{*u+Pt8|zU36H(OIuuO1pRB*gWjEW#q5r?J36G=$0mYis& za*LSvvU#xYCwo4I9BcP$HwaKvphHvM9Bz61wc8yc^wx8|m(k=;GvCn~2#ILS!-Tv} zB5`UJO(f#1N>n70^_%u$2_iG(MpBWI)I6!=S<4L3sc|Uv7QhL+g4b$&rLUB=|l5hihhv;cjx`oN7 z$dP)TnB}RsNWuGF6AeDH4LtYL33}dpyPuYhd(VWBnbNT25}Jc|B(2&E~~H86(3l(h7iQ%3yB7-q61x#S$1C0)Xuk3L5RM^AxMVQV&wq+Zb63|&h3nq<_>g`Wbl>-+`x9+WeG-v>YQzJtyR_On9VPB!}L$N5(U zyQ$J}7P8fx+#Idd`erjc1RL)14@GC=0+`)$zS|G8uC^YqO_rS50=}__AA2MnaNC9B z?Z||o{qmtZx!zGxsR28r!k*I|N~a17uz1cvBG9Yj3jU~TXP0M!iYW5Jbb3sBiGthb z1A*Rc83*lUJJOy$`r>%1!9Y;hLerAS3y_afTS4twEzIUodmG9Fe++oYhcDqU}XZsdqv z1gd-=)_N}IY}|W`2xvIoCq?@1dJH}m>e-9$q;_(0%;w5iuH8k(kzEJKWq~5YsBL_} zK(=$gh-kC(IxZ07Bx6%|^^f0#+baJ~*q@A*iTN>x%Md>Nhl*2<)h1SWFtSjMCt$S78@GvuDLXuXkr1A zXqa9OOE7K&H%jyIOSQ}z8JRJ^x4+5dh{oAKC0U%GMf@`B3qj$koXfP~l2Jq<4?D7x zqlJrAuuE`tDN?#bZhw!IR43JCxI&bkC*^BEt%!;PxhD2gY)BaZdj3__IbE!^GA)!P!u7d-sLuk`LSlYhqueAo=&r0LRFh%xN>)%jhCn`ut&5RcqElhEFmA}kX<}7FDfbrf|Lw$lbuQ1@PoD7g0Yk zxf=lFzfNix=;&!dWRn=RboE!&)kl-a+7hq+P25U@q;=fXG{$)YG>HBuJwqNx#@58F zgf>Z$8Rnj&E#3%;@G zItP!zAdxgOS~S}&X-_Z-!xseK5)P~=`4xvM0vYbKA0nfnF=i#;Sn_zv@y=S=mu+?` zbPY9bviK&E7(|QgbTneE+qU*2^38^8FIa2M(>d*llB;J_GlgJ1kNNpwMODy7d9cCG z1Y*Avg!3nwMqSbNKSIZlBbh@?P3@lpL^GjeQmoL0^h(s&_4?jDo_0<@OA+4hf_vWL z{oel4^u1F)SKam3@7&QSd`$9T!x?O6#onO@jaxs3{8)8rLVv*<*2qUxr(fKHLIk^$ z;Zzw%iZaOG_VhYF{+P2_qwyyA#pCy@{@41)P;0J*!Rzss-?6lwtlKj}s|T22`EF+^ zFP9Q~sO*G%BX&-6_vDmAd+%1yO9_7sPuvN`ZAWj<{X2Z@W}j-Eoxxukh4%{)>|KM8 zX$FBBKEGLzZ}jPvQ8Cl({+hzbZ|dVOU+PkoGPU7-uYDidDS!I7lk0O@Ver>`J1O7% zi|+ZbW6zyE9;fV)|3y2vFtgI>NAPf^gCfKDXoPckGf*bbgH~p&CeRiW?*I zIL7H=-W7v}^v5fN=`aDZSaGGnt1Mz-OmP%sl!+=PIMo7gfE9HRDlE@$kW-4pJLVa} z2#VpBK;3@3!X8G(6?Y@n)5K_&{@979@7??Qrq;4Jju6dBwVlSHw7HD0>81yy6MDZ`4W>wm0wISMZ})Z7z~&V;H>FtTcQCR%lL z5?gh*@j_M;*5H3KW#etLxIInDv{I~X0@@88bEZbeSyKJM&Mwe~d z?y_yW%eHOXw(aWjm+yP-opZlAXXbo=&t$C?DorwOSXgoEha z8z>~3CQj(Jlm?kz2-stYwUY;iJUW7fcgn;*w9N1ZNDwi2$B^XJ6P>Q)vVD$^h(d`(!W(nD;&fz*g|xnlWxj9}CZn$^Gepa3uWy#tdg_MmVV zkONBr@^cRm8Pdp;F;&JfZOrpa7`1mQWL*-jrqwGY>WZXl>or)h`$&)#;sn=?{hP5n z7I!KkY#&o;i{L>am)}O(wu+b1V7h(*!3MIxwToj1viKT9R3HID6r-qp1#kYBNJqfK zV1Vxj1Ym~20=s$`fdSw5chKe_)z^-YgWMPJ-vE8%Adlt|E|ZDkkhoBr!LO|^KhCpH zbxEHI2!9f1BCS*xdCTo#RBE_x&Fc(&QhqaBS-2mfOfNBvd>Nt)&pWSZ6fl%|t63vp ziVYWf53=x)yS%ouq%UMHmR_E$TCazVHl8}3c-L4%olBcP=vJ)Od}?dHbv-!q(MT+A zq@QE?vzHC>VXmCdqCOQCo%?v>>izPZRh%KX_Q@2#ezD^{cVL$K2~jb3;8JHlCe{xDST^);@UalvXElNNHbx$4~d;Ddw@kx zgVF1(Q)x7V>_nLDG*B-G$>AtH$?;fqGSkvo^IC>QNu4{;!`2y(-oI?q#rnK|ZTRci zuB=@`L8yl(q6k~Orz0FGkPuP3;KT3{YGG$q8%Jv8N-Q3pS-4z%+p z$(fl_0&brXkq0J>s|>+6HS`pfWEG(G-dH%1GQe^JHo$MfZ?SxTg~h@VfnPuYPRiUF z+3g?~($-$v9^!Y1J@%fu3Tn|A7Myp89d@8@;6Ly^whVK|gg`Wa;i7(eKqW9ap!Zh- zWUB8r&XU!9}toO+rEVn2m`jc!QTK;^c+R|ZPexkNgtje+hT@Pw8=Wirhi%@R{5zmi0`_OFe6i2XgQxl+Aj-C9AL`oslV zm%-$y!sW~I-8U4Ew0sqoGJ%FHf!Y1zZOKX~j>U+E%~G0QpuwsQ#}oH0Jb+7{OxIR{ zj4z>9A<>)jsG)-GXT)y8@L%=}$W`0gy-vB{L!ZW7fc6+Ynp)*AkZe@_^m}H?Y5ns? z*ai)+qH&8y;CVz_1S!^L&~>V})BInrz!|h=M3t|LfK#hhck`|s(WmplOIP?;dkIPCi)EJS`HCPU zO+pKJ6MnQ@r*wbEaIQ?uuJ6){@uBA1{5YWQ_METQBJ&YA7+seVbKuEh3X^oL9L&N}vDVJ71~G=^>B&Nx8K95`#29E=R?xPE<_y63LA z$4T3>{!$pE|B92Xx?@xa>CTvFZ}Li$O`;E@Io7|tb=;z(dtut5J3EZ{d7d-8eVxD*&q>aB-Iv*F?=#ge7jZi*WhXo7x6Rd#Lu=Hz`)-|A9_!UAXRGko z{_4*iRUgvf`6>FGx5F=H&GoLa80PM~k?POUYLySF-grnj)vH_bk`VB0#@;y3rEFq2 z^Dsidv0ECe(j|=rWlSkN(#n)nA}{TJf1IfhGE|Tdh-w@06}$zVo`b^KfiF^&!EID0 zOqZqe$-fXEE5uXgb+2gsE8=Ot^CYEO^xfcjY5HJ)YW}B7wl zKfKT^>xQV8fEr3|?;?hO5`@BQL%9E|L+IJ~9}2SX?K6_18CaxBQ89>lrVEYgwkxG0 zs!ZPN=~RcC3f?XouFluR!@u~n$~Rj)a-Ab@iVh|tzSU8->*|zkc8ZBx8Z?nx{+wCj zY5gIfsptfA3cZSR_eDoG4O%og4sU*sa_9tq{99o-M8TdfCz5pj!IR#B$l9YuwP!%YbLX`Ze)F}s zD`_g|Xy{m=2eH+trek}kqF~jE*&VwvCZMz~VEZ>W(X7F%aZPY4e(*CJdC&2h6|u*3 z$82hY4cLOG6FI&!ioTPKpIsZerU2b)`iVje@MYqDm7&zs__oB9F9Xjg0SlpK;@}eFz%|`e0U<@Xrsjs z+V=kE^e#X0Pr2`(Zx!S8pnIUh1c=&Q+B9ZL-%cvEM$uXVTr$GkygQv(I38rVj zvCR>26YZY`mkX4f_}LJz%#LfPmJV-j4EjJy<=fW?M$h^We>xwT;)+^Z;?{`F=OaPW zr^!yX5F5w#q5*d)I@E)|O}YwU9zxS*QP;G0w7`UsQ|n`Oy75oRA}p}PLB5g?1(Rq* zMjF~@zzfyc+)Mzd#+*GJYc2xdMZyb$A$nw)IY!hzgR6j?eSc?}&Uh%7YV{V<nQ>A((2iS1_`*raWj z*IF9!%F3!jbTUY2%ygMMb9EGD7D!bwq}t$gynq^T-2Tu0`YX%hZ2*rMzc1*0{w zDGlk=@%Bn-jrX1j`P`GO%=K2h2poFU2qUZfU%1p|vFJsZqxK>jTclDF@}(kyHl^Xp z+U9!d;RQ8qa2*#YoXlo`_Cm_cg!F{f_Jq}qezB@bTP(!YpJ#8F z;T4A3$4m&xv3abSI^t}kg35jaw3gw(53Ectwbo`-6HBLyj~@INTb(|?o~xUmx>_z$ z2rmA1n)L-II};-QVeU++FqFWTORq6vs{nq7ss*7YlGDf;b7niyA^o zt!)88a((BFmMej5AoSH*I+EwGh6Wn9XvSG6u;11Tyg_$xO%Z$o-mgh)VZn;%kB7tv zF{a3)q2ft{ISr<|_n3wz4;Jp9@u%^b%MnM_oBSPNe{!Z7W zNW7LBNTmABfpr*y#WvLz4ufNynmVE!8?5^&eWYw)P`(v7sn=l#LP;V4uC^0cc^P$p z9R;pLj~^WLj1aB_!u7=u7;Vm>HyDO(fwB2FJ}-NK)p0c2f+De*n4Sn;9-6b67O+CE zrl!0fwJ)Btn~PGxoUqnN0;HjS^^jmJT2HhG1Gj~cx({a+%}g_%GMm~ghlN-YwCTJX z`&9v+vVt(#Npd-MTQG+>`BHQlrCJL4U^G`>1ZNALGr$yS$6J(RwfPRhO{MnM6AB}` zaLby95RfE5NClSXufftrzCd&b1|=NCuf&iW9SQsxDoqC8`R$;kV;M-AtV~pfF{2?6 zQo%__mC&3jjuD4nh73Z%EG~4EEeQ!pTJk;+kQ9bk6hEsiJza!y0tU>*n=09(1hUBr zXo^r6lKA(L8IpWU!KBG%62Ki?h)gm1Zpe8}F}PTP&xF9v7Di$8FPBIx4GV>>EBeAG zCA&W$7*LdH&7Siu8;;zCVo;$Km0+au4EuIcw;?RpVgx6TxKw7;8e6yia7@G>^&Wr- znJ{`$*>avQRisU2A}x*Z$#Bg4p_PL2VS{?+POfL#s{JO|o6YBfjK{s@5+t}JtI0-$ zS3s$@H}!kyZXFnJu`|whs(OP^?qNLU{Y=%n@l18x`=v>@_3m!H-226Rd~ekdY?Bvxq2K2jH$WkDmRd`{8MytSSlWREdO}im(WW&|^v}4p$MW`O z@27FZ4+pY=Au;cl!K-{~?-%Cocdr;z30P^ccO2_?>co!z;qEoaXJ`Xto^tIrDWteP ztwT}MKyI_+n377P0+z>Rk+E3hq0r`5`8c|<4sWc6RzEg&NDIe(RomAjzL&)i@53Nn zFTk<=K>ZUU_)*@nkNm?1)dcG-1PD%hU)x40=!wS4%%^e$n>pqN#-|LQOF4IRQUJtc zvE>5~HdT3#PUHh};;aq1DNRX9eNlbOp(CIy&#WN^S&CN0X~NabVBeTbCjPcyU>uN( z-xih(hKdad1Y5ZAjWV-)@h%dm)o_NmGttaL?V^=Q{>d#MW7NkSKuh2uUYZFZk z=>${mFfuTWDrr<@z6-aTTGT0vcGu!y9|NF;DwbqZ?yD^`Xza}|840xB>-kP&i9Pox zlx-t(Do&Qcrv1&-f-NsDi~^vvlr}=Tn8)sOQHnGo zLtu^T4#Qjd*BMnHt0nTAh4eOBx`(b`|m9<|&kLH`J6@m^AX3$YY1fhM>AtArf~|1ftH5xj1H z{lN^=h#i}2@@y}>4US93Cl_mb>W7ff#s+tp?hQ zNo3ll<0n0veNd@7fU)oiVuCt=lUH&N%?Td)vJ)~1u8_j&Gqwp-Esb!gaX*k`xO5I@gbRVC%bEyk7&xkpL)aD=T7*{(!0Z9t6kb~SpMU^3 z`u_z{HqdD@7YL@6#R6_1 z85#pI{a8YLZt+G7gpL>F{}lqTw*FfPV5tkuHYNBPOA}XQNCUY$q_%*~?M)R=JSqUq zrm={vAO?}fC0NYPXNX~Cd2};YD?hDGo$xC~WHmTLaYhEYWzxw6$o@mVwmGy5bNeEz zE7m}~t!i-hJ&F~ZBlL{mQcv_ym2#+vk(-Bs09hiR_3t?KP{?}LjS*; zL=Fy9~Q+z%bAF}F?O0JmIM?3w~9%WVuJGz=tGjjG&pG`6%zKG*%OE$(4iYeLMCW% zo7s!uN--iuB8V>`471n?B@jhIT=ra)TT#N((pR+Tg1P61(_TU?je3zBDhuxQ1V5s_ z84P%VnNZz{fV@Xbt4QvzNr^IpGPq?%tHNu?v zgjm~tSY9ef{gKq1>1^r3oW)7hku!zMm*`EN(GW7~b@wN$c>8MWHSLL+Qnl^GmHAH3 z3Z}{7nICO(y9@bo6 zX9ewZN%T(2Ot{Sot(K?52G!+-3y2BBp9VKoha}q&LW5^jkfkjN9KE&NlKX(rI}lpN z4_$mbWF)m^^s}uz9KAB(OA|(uugaoFhYNRhxEdax&aY*0ALKn58hf@(ZwGjI|eFKX`{uC3m|;o!dI@{xrm`JZlHD|Sg34;9VRr(qM;Fv^$bq~9_GS(9_r%fAEC*s zv2Saj1WCr^=QSVS5(i^rg{DZxOy!pkaekHe22QI9`a8rBrK`SjDyb=9j}ft69z~fy2Kib$C+)$u z^evC{^bw}P+wWh$hLOm}_vXgt#)M5NrI{hkbi`mdfO?EjIQW}SvF9g%5$Xl7?SOWG z_T!pCaDi&@{$^4@fad7|=7$ABV3@@v9I)8|(t`#=V2Hvc+>qD-$^{34Ocg~y3N{0Q z1fm5{FR(ZOp~E5K0vQ25MsW;SE%-sc$jefGzq$l=4iFPcItdDVJ2x`dlPMTnI)R}TpSK%Ai05zvp=ONjfdgm~^-$K3RMMkg*7UuGvoG>6(*8yzStY9=YS3r;DH zjB~~&fHfme1)5$zjgD2>GPs~g88baz5@5lghdb5^a8NI036{1rakh)j&?}EjI%$qZ zy@Mrfcz{A%$lqTsOXstQfS@pVjARs zQZ$KxlL&52v+;pC2fW+_J@oy8c;_Pooup{)k7mnA4Fn0`?=qzAaKQ0B^AIpI`Tg|x zJ-Hv4S#5PPS?NtS-4`x7sj$l`@cKB%)cV=I(=t5*!Fkp`?i4Psc&XZzZ5FDw%Xq&k z3?KJL*V2^eG&9M{S`R03g!jS}YQ?K!IWDZ%00Fz^^>rgf@69%WB9TYTd0b z$IDra$VH!@g{f3=mR&`2@`s~9nD?I$IQ9_@I+)CCa59K%d8OjLZ)X65LjVkS0;|60 ztPNFT+>a5|@Q2Y*W}_ct%)wNu<<&*av(YvESM}2$_YfHl3ZJO(rjU2#^J>4iyzuyN ze~=b&`M3?F*0PawlfZX0L*;ovU9g+w(HT_3_e)vDj=Q_hW32>|GTjnz6zOAP@MZ8c zbhk-*K1AnUJ~@(YcD}`?=69*XovXY3f?nh2+b{BIXxr{n)vEZoiJrbHYSog?U>uzh zTH&;g7u^TyvS^i2u`?9CBA-5B%XQU{yW@(00nqL{36>}U*0`BGkA=4kakJ5zeeZPf z&^D^Fj_TAReG%(5VL_NzN{aN6_3%qeVxrPIWMQLb3t|D=s%rLa=Woy&TsZ0C{NJF> z0C{IbXS6~Hp-#IXrjDRz*m1f0McO0==s?;ext0SXPEq0teSYi04Zm4=B~Qb_c0YOk zh!07wcxTOE@A-vE7<_~FfcRKOo?}HltP3E73l0`~F4Mq>D9R$gxzecSr7^Xb;}cd| z@65r!{;;#Yju?-7z~}PEckqd?L6P-3(b>Tlgd()t8IjwdJa9nmz0^14P^GobQ@-1e z(3m`r>!Qrd*}dmm5C`U+a)F=j^d@_bTNdEmQ3sHA#8#@tPHRgRmhNdMF z>8*K(DxE?RIzWL#8P9#W&t*uus2ATJM~@eV7Yvq8&8~REzg)~dsQv49^6IIv<=Fgi z=xX@uG=(n^zejH}OUKfNx(u_#*wrUg$cM-Xh7C^ps8v1j>M5r`3<^?PIJMnQUNiNmwPwvSW%-@b9K491{zM8xzF_xvQ@$~_=2_?#MJ;gl>y4xdw?8}z%x)0I&5yzrm&uO(gW@hC$<9O;}D`C5) z>NF1(qOjB~$FVCKvsVmifP=!oNwG+*$@^*Vy%u&@)@q7iox_DAx_dLjr;N99>FV80 zPLO7}$6ckW1C2nOxdc7?;lc-H!PE{Gx!2iw&_a8a9!4@-sEs9U zi-;gI(I+Y7a)QIdaHCcerX<>fhMlbD3v0y0m^`gG7{`w(ja>`n6%0dZ$ zQZxT2RClV>kM>7k8$yxdM`ViWp0=`4KUKy9Eb+lQQP^k($M2$Lo`Fel#1E7k`kCmlI6=upmxWPT%f-lJAFNO|%mT??V9Vioqr)2x*JCL>L;W zM1R~4<+eTfaNgnSa-=#>A|$qb$=*h@xw623!nT8{ zlM>~!-bf#&drjYjPa?AS`_U-E37OsE&Il1+2*EImw1Ap$I5GtHn_Df8tn-;5#pHC$ zlSeTE>j901g(P92*}I=@w$w(az{tyHp%&XCo&GjzEA%yjb+PQs)g)(%hp(f1%S zuFM!4tU%T_2H2QnuGCgAGYmQ{j>8)?X5210o;0cz{M}!UWW?g><|zg>T;C-fQ7gXF z2Lf@9?hFGtl%-BJgtC$VO`vct6#05ha?*F`ICNgTh9yUr&j#sH7>OWxj*qTUj#IBZ z&^SqPPB-v2Yp%y?@*Jz7V;D^x%|)^wXZo;2*k+r?e4hwn=pcLgK9&lS_8ngkU(%&- za*l(qmo}dBh}J~981Xye``5qC{%y!=z9$3_bzcC8x(R@wfd8V?{|7_&|D9C-zl%@+ z1Io_;Cj9^V)d3*n8u+XH3ZMvsL$i5#6>-LuNFXm%-XpL9w{?Iy3X^@Kt?f$E8lBWF z63A9&~ zUkLF1l> zt_ONZSf5)q84~Ii_C+zowH&kF4mZV{JxDYg0m)sBn@rgOQVs;oopC@S@S`o16U z^7mw+871>jjo(7O+>q+Yg5Pr>YN6spLflYKdhvea4yX?iFHw;%$@U#5FuUBTh?zt0 zaghf)`-&muH-OoreG|k%bnD0s;~2-p7%=fsx&>4&`d2o1+y~zRZorqSK&m1H7rWL@gXI~`vws;x0UM4%3MtkScE!F84-f?1y;zAKfG91FX!S_3Z zCkPb_oRt%*g$(as(c5k3lgv9LP(hn(zsPN9>HfQOEHL; ztpuWvw3oK&31175OJ;6ype_&8V=q^qTZP=&3b@SeKYoG^pWgIxfCnP*rY(8efHv1V zbfg(vXH0Yhmi1p3zP7HcJOmOz`~LH<@>2g};brt4os1nE|M4sQWAG0O_)N{1bzxNB z?YKXP$?n4DJWqenSvAQtL6VvZ&*=E{_Bg`Sw=sV2lxE%b2z(KYt6>V^xxdr6Z@rY{-z4RewN zL2a)dK9675CU5@g9QYx?oD@*h2~_|>5{wsQ1R?Um?*x1A45oz-ch9m{hP2qT^0SSo zWDWOewmNd@xBU-+u$QpKKMO*+?ipoQvY)cpuFsr8f37x7ma~1=AOvU+9BIC)TKFP@ z1-0fw@U^6fI+d_7ZJTJyjC!}%)QuTUM)67w%ifKx}BDp zxk$Fy#}KpYIYB3o^<%}T%%cVNFmc>um%Lcm^0h^msB26%C$|i1jMvW~S&$-qoEcZ) zbD4g}-`3TSz-I?7gapCrd5piy4Y_W(=JOLA-c`w^@ru`kj#gOVY?6g(a`z}s&t~*; z=DaeK(SGX~C7E!Xoy9y`FJH7aZ990dnF;8*Rq9MLXf{Lg=9~8Pb|Jp~EV%{)UU3hV z&z}Foop=0P|3WDG4VfObipef@vv5Ts56D!6jhx+|hB6>lRI&zr8YuC0qF*2qrMh(E zzOC8#`WTPt+B9ErDU)bZk!O#)f_%osuiCQ3!*@+1Rd4!8C~oPOW5c{SDW`MPuiS(x|BM}5k+cP)oc6USGy4p;}r zn>hwUebecKBwVj`DjQ`@*CC6*rCXEvs>229f-TC~^5Gw1W7hXE`1bd(if3}jBQ)Gp z8H5UIj+%7b9%>F>8H9^UL(HlxNtW;L6W<0^eSGf6b#=eGB9Mv_3V+oHP8H%iKTexK z&HGZ2q(5IY5@V-E?}LmVGs08+&NkPAvVhg-(hrMmvO?@>#+;R@?GbD&FLt6T)g@WQ z;Dn9qJwn>OlYqmdp~#*|tKhIljV6#oJ9pYiQr_nAyZ>{12UbOk7e*d_4%O=5C40_+ zZ=a6;Ys#D8Y4&!s{=%7U5 zhuJ@z{Kc!08(6o2?M2A*l0e!g>IguaKe$&dyCv?VKdR!e%8Dpu}$ z7B!7jO`dG4da%?@JO?;dm*JIfFq*{%<0QGgOO-3H)isZ0hP?^G0k>;CqXi!BJJ6!$ z$hBf@l_RCjDA9q2Ovk-yc4?De^oPMT@-o z%PcE;@u}-IkdqIDm-~h;6ioa2_@Sg|8;GGJq@Nm+T38SLwJc;o3`r)Ig`JE!h}JdK zQX$Ogt+xVq-GQfZyFj-YNE@*dGvo=$nPBm?3{4x@`dPT z+efL~=Pm5ciKqmz3wO)72P%S%r_M-i9hVN9V~o$sOqj_N;dk+-=MOQ#A1vM|qJuf+ z5b{P76Q6%9XyKzy9uAt%8P)94^@u3;7VfN3YDJcy&`JHXTe72IcZo_Tssx=<`Oj|t zKfC4s>=ysCTm4_%#6Ly9>x$CvZS~n`YBXk=)m(M+s>!52@gV#e_(jPw=zI%Tbs1hWu~~X(t`gu>ue2L5`s+Mdp)^4KJshX z-{gF$1bmh3R_+ga|3ISyCUJaOkGGsjvdN>~6jp7kSKj0Dw<5U9>7k5771ivBXg3nE zFW^|)Yt-EFc+gkeTtXFF@6@-{s5<|Zy(ezpusGg8?o|+wolZa*-So?LsSa=WdEp)K zV3WEIi37;P!W%IA%3AGYt?8O>nGsDaJil#l^vr|wYZ(584|MMfdZ2sf@&f>n{`C}J zoQ=Qj0G!_J00;%B;qp)TB(3jm>+JNumDKsiTa5q2Qjyc0FntV2kT=>lczu`Tl>wnF z&d9yJA`igP2Wt@Pqz8E?>lw5D0sCGbpIY@kGWl)_1{fW|8Y+Y%8G2dQtQ%Fsi|Amc z59q@?68#yNPnQm}T4n=U(QbBeBJwqXgzZA~P z)=D0%`MH?~M=iv^YX-D*k$f12R5=DC^zxA3y@&E+{S$9=iH8DUG85 zmsu}dY(D+lWe|`J{%Qi$#cKorn*R;V{P!~a4JrRI`1{i0fbyixJU>zw$t^!-MlH>0l$r2>!USc2ZNHkZaIeaRW*1=$5I0PvXy;ZP zF$#m$p9qon{auEe6OW51k})L&Vl#2n19~yw6f$gGuqaq6;32oKwD@#8y>f*O@S-ik z2V@fD%nNX;0;mW6k5tMVR*3j(XvxQLvJ8L3KUECg>!Vfteu#a0eFU0=A$=28>IUdb zl+Pay5}8gE#Jr$hiEV()>^w24oLMCzdcKD;McW3);7ay3ui!>yh#GI$GWEmJ51C?l z`WK`5>(vJ@wqJ3RKTPXQS0ALOOkO-K3fDp!6HB1icBMkoEw(>$U)!%UELW?2c_ z{+_R{bb2_VCEh5p4|N(80si<EiQi%19Zg3G^iO3&`b+dZYvP~Ju%QPV4yc*-=weV0?|iRL33}G$Je{# z^PhY=UY(JG{*XSua0U3>F7`qUiBZu+w6V;q8>)hD1TEpSoiskOH|v)zFFz|yE=@L@ zEVEq7RnMK$JJEjS&s#Jv=OkaZ!-3{7?IdoG-1g2!?FZN{iU5_l>!)=~fwg9I&mC_> zrrojSI^&wDrV81I+++=y0ck_AoM(_AXxGGcFagU6q{2Dz3C4rwguVtf-Mg%Rfd41i zr};mUefIyWsMh~?L?1s^og)~a6h2}8v*`b=`WjRwZE)C;x-d?-X;$j%zI+4^a_i2u zw1z`BAah+QEoZgqq!h{$bA9!JKR6#3fPg({U`S=AMLbU1-DV$Hxel%*liMAXm0ClE zt|mO0w`23&|Cs3<3`N?-B{}(CK%z52$&9h&r+_}JzSz?X2FIoranMs@0jdt1%=U&5 z@ECc;6@Pb=mn{VTQF5{moGw24T0|KSO@|ITnAF$ zgC!EhyFJ*28&e;Mn>5-$sI(-@FWVWiLPQ+Tq{fbD}#CtdBS(Rw$XzG1nYWm23x4Z+~>Sgze9(Mf-eRil8{WXwl- z*z+5%)>>NnNPlrt>kxqx@G0`2O?W}x2i&Z{HYdZic-Aw%Z;`Mssfj_q1A+HQw&|q%V~e+YmPU2GCbz1$3m`4{)^uY)#Hqc+p@kz1uUbSiT{zDBeV*CusW9vw zMa_kmAXO&B++|?HCWjM^X-+CkN$&#}BOdh=$@up7A`lCP51R>wr;ZN;s@z}OADd#U zw_EFg%CIq${cA@(tGxpA3hj6;+#EBuHW{F{kdRwm2=8Cp_ZyFVxGb{(l;Zuoj9C5k z{@Lu+z>}IOTP&hFKfkcG{jen!Fo!dFi+O}Q*n3aGf$Jf3U4nt{eeq;4e(xcV3HM9> z0Kw~;1E%lhq})Xg#R5AoR#7`Hqr915Q|qIP4$N&aDz(Tob-(y=puUjFqg0G)a9=#F zFa7=}<1eOp+#U|dOuRD^hV?t5mlKXx)C@XsREN!u%KY6Tf1{Oe-T9D#@ zmn9^!7Z&fBD$)8w-$>yTu-cf_w4)TKK)1EJ2{6jJ_gkHRWsPP)1@5_)$2>f&} zAnhI`iXo5>=jbaxharRV4Q4C6whJUXxRZ1aN^-X#L};{PbC{w}defn8oFN$%Ytyhn ztT_#34cD5PHkz=^gR%nWP-L{I$*2HopZ@kET=>#Hr_cd>yU|`O1Xbo>Io8yTrlE!d zZd$g#R|VddqeZjdS_vl7QL<1q$o{L%5FC(&H#_})BJ{&6?PvGF{PD|d^ZN;A6Ehz* zBFE-PpO_X2mKz1A0te4NMINClSBW4&1XzmaU?K6;M0u3VOdV~t-I@dFxcxMoh=t4JT;*D)!6K|j;_DL!-W(bL#Zv}Vx2LodrH&R%?Nnzc~ zCMqTr(RH)+*DIJjUq8=J*D+omEPr@GYNUS;A*d#9W#Vshc)3Ml1P%i8F!o56XfajK zzT`+D+o;oa`}XN0`KN=%lF|UD?8E>?pgpc29{UNwWmwX}wj~>xaotL=TKS-LSik-2 z2ZD%3Bh^Na>*Ud#2dP`>aMj{`1L(9rwuVZaFg+S)V>k`7*^7y#xd=+9(n6W)0eG|{ zcsS{6;p3aD{o;c4BQUtH8u|zikR>qC;>8j03M^0}d?S93I#4dnjEC>iU8m(!v-0Fk z!dYy!clXQt`QvlBnKr|t6~cVmk@CA3owd$a;e1I{xIMuTDpo*oSNocC+muGXW~U)L z$h^&A$gTt&B7r9rlY@hs=l;RXusSi2va=G3HA%2bxVf>c z$$~$$tAH!|2wFpu2ILKOzWf|C2*V%0=X2@guWmqIa`hej2GD?gQ0&``? z_j-Z8hte0Ke&aYVrV#)Eu)M-xtwCOpHd!o|S&~r@n_Nw!-~N32py;tQXOLz<#tz)Y zgl^#@5KhD}b3$erV}Qgw^!k)E5cI3W%&^wOe%PBKr0~ny{8%_C6825u+H*?Lr!h5Z z=rxPEVmPd2UDFpu^)E2FGS_$!uqjG5Dj;71f9^C0Pv8F`bpSOGWj`MHE`6{Z|I3Xg zlHV5M(1u~9Yk&=Nqjg}73Pi0NEUDt*(7V|14Lrx{l{TYm++@0bwb<&3CBu8FXt;)F zU!7sci5VW>A)uf~4kiyeN8JqVKpZmYDlXLCrWkRtpd(wfC^7;*lROyURt^SDLtPNg zm!V^6?-M36<6A@Bm&KMXI0VI1hLB@E%KJ{vr~*sHM-^=^{{V9joGA!$|UAgnH@}SZ^ zZ?S&+@p$!CO{eJOyz$}dW4iXTfMkJBLTLn(PWG*jphj>Yi+i98aG(o! zpeYcbDIVWfUDZ-WT3w#R56DM!op-m#+qZH0W%+^`(Wc-qx_L370WcHyLMrpGbAnvN zFR#*G!|h9#%;4dJ*6ah2f`63{Zc%=u3cz`m1oxligY|FuP#w2f7e(E~7~w;j(Ac&7 zgF+0Mq;0y-C3@i9lZvY@JhE$KQXokQgNZ&fTvm=wF=H&BE73+bC-XHZJik4XMMSwu zcUe*zX~Tgy#cO)i+Loe}QP~NN$`yJMZ_hdh1Qk1W&{$W3QvY z#DF238sl>1^$`0gl8wJ3!kGl2CpS9`>PSp>u9+-OUeL2AFbWmqx*w0B))$|nK{=cQ z>Jq0}Weus~$85p&~j+|ncO7IIsBvo`DWQcd#;UZeDeGm%*H|3;c2hYsVC@$er8_E%akIjU`u|!X>?bxZ^iUdu-~wVqLw0_r22UH zd5Qp|=~H&kSAL&Ueh2_`%8l=3Twj%3KA_`CIhBz9c!{IT`ZFjzO8yH7)v^mzC2=wu ztITN$M_L~WKbMxz7swH`=x-Y5y+il@q)SFyzOjV|ta&X*3m&|w3io&o1$j!3^~ojp zjY<&9G{%@h3NDGQlf244%Bbm1g&4QVegCPqz&QA!H4>+FAWWa?aiTe%l~JI0qmV!m z(-_q_wB&RhCc7YdeEG9_`}4-R(Nmk&2Mqmq?;^z-Ow=9nac8i{^T(hTiVaBh+*7|l z+NoYEFbwitmFRd39`{Wjms&JV2!+!%g&IWBXhGnaqk``5sEq7s2gr#?kbBT9E-abf zdL^Y0snI&P(~XOxPTd6#jU0g%rNxw3eBeNZX&5WMc5`Emg0`U>vU!BjAnU6r8F^+a z9kOi05kku@&BDbT*jx(|$7{6&tDngysA59LuJb{Rf|~h?xR391h(`|Dx~Lvi+y0y(7(vPWMg;W&VHWA~v2Qb`Q^$&AA)-%jC4$)Y zB>#f~s=t$d{Zsh>YXI4^Ea7(VV)7%Ro^Od0ssu+&6w$f4+zFRb5!3OL(&VR!2Ixth zK)rV7VD7jrsV)JqTy*J(Qnxh}#3w4Lb%XXt)`~m8x%9819%|}iW(-gQMgSZAKOKc^ ze~Y@(Wb{14->#=PPFF>jcI!|!?b+nb@w!Sgcc6q8nLtCFb$ECv^F>K0*-ST3I$w@_ zfaA$3&<%b_cJ8L0`pI{I3_r%S>-Je@2r{HRkqax1YL|w;*>!D%h~_}P4;|QpgWJM= zL>{{yN^wfu*CB64|LZ6Ube9@CYbTSw5foGUg@eC#sn1m#ZHRQxA_TT?$7;$roYm^E zFN)L~W`)w(b^Wp`V=wSGm9EbBoM}J~xG0ZSQQ#?xFiJS6QW8-hw@|1cj zh^vg*iB;_nf3v2aCyfYFc}csSVUqC22V!IKP01$Gtu7Lw|DO2Pr33Glm@y(&vxcCT zL&dwuN#;T_F}yVMQ67rzW*w3Gwu;-_+jRJuSGa5j?R#yVw7k9T{pacEzFQ9Y=-w%< z@6Qj|nFO-rbaAktU5u5F>$NHU+(?M&&MUhaEGN;*t(z;IbJ_NJbN9}x+(Y+Y0UuLw z%a@O$^k?=;EMZWfvc{%#Q&YCunO^4Dz$GqpYAn4dd=#eV;ZrMxxH|d!3k7|Whh8*8 zX^TMk@H19s+93Fql6T}_{A4CnjME|cU^fyHO6KiecyE0F7Onx`7qBn^_UJc2CdYr; zqksE|A{8ec{@SC={oHN?oX%$R18!$dc7nest%DOGk?K%>QAr6ZL}~}8$^P0K2xH#? zq3=l?HV@uqRg-Y(`-#};rh2|<+ily(#Ze)JuB3D-cmu|}d7AI*KwRff|Lx)Ry#(Vo zKh6a61XwY&MTuz0{%{O}QH8pTKTa(Zu$~>>KG@;V2nQVNsCn39bZlaOcaR(zo1M}) zgW?}Ny%d^_%jXdNFxcodkH=V#S~b)_S#b5FL>$CCihhVc=N++aLJ5ch3E>vL_KQA> zOi^HsMWTjsl1DVK9CSqSOcV54a^sF7rH^jT6$e^ViQ~MLCR7wmnB*3gax0%^^BZxh zQ_x?Ox-N9l=Rs7J;G+Urge_@9-a-*(#@ik5mo-6H7u+|x1jNHlSKB-M3w~wWvrjxbu^V=DzGTOCmYR6sm%Jq^EZjYNk%kdy(}!Tw51-ya}nw`%}xGk}M2`+KS`7N_M&+6w2KFo)_S%yoIS)DdMj_~08 z*$1I!-gGo9z=avX)Z5L)%Y^oFg5(JMNT9_{vS|~wT~%)atzovmg<8 z8~7$J&O=O^%aecC=d&7m$IkIIaWE&F%s-cMTkAOGN|)nxvEjo&>tOAVH7z+eU-_%3 zN*^UfLbS@`)fA&ROK!P^pH#qI!tQ5|eia|b>rKF0!}$FH+~ip83{b!g-Pvg=QiCNX zkF34lpU!v#knSi+6+&P|7h_U5UhoTIcB}b^C=g@e>dRHSr$e*E!9N9&!{Fq3Ao?6) zAbZE=qV55ruK%#Qz|d-#MhqBD)PALhDRGM`X;9(G(qj%O z5GRO5BAohC&fkyvnyZ5}E?PoumiUw`E+k4P>U9`ermDckh~|AXwyTqQM;Fb->^ zF7Ys{`b{a?f)=Yn`IWC>d$}KA8Q+Myl*<;GUsPIZOnoJ{sJfO!e-h8mLcUlw_+fB_ zC@g?wVrV5aMD~-C5|x_oU{S(JuL&I|tD(O#6;3wo_I59We@aM3!m4I>^i1Be;{FCQ z{gFA=QwuJCCcF=IX40!tQ?K7!x5smdYN?;722A;9Q(qH!r3TKM^3JrFn%XC+r8-vx>rl??zE*SYnEKMf=`ojwIJ}KlV5KUG!Pk zit&&34C6IFPY99uJtFdym?tm>0S5Z3OHvIGuy$eW@f0^$PAim5m1A_ySBmA~1*hhQ zB}T&8pxtN4KiHpYSJl!DQjWvjLT!^l zsqW1pENnxiYj1Nwxgh#n2wEfVpESDFjg7$UmZgospC0bndY^A4&tnc%FD$wVvP`?h zGwjUPZ`Hp@Qk$n}mQxvDaNVFdoo#R*Mh>IkOYM3wt3Y^mr9jB%`Q;J%6(!GknQQi; zz^k{IylSB{dX3i`Cl}#(*giE!S($;G%#2$+F&`hSZL5e=D5|0_aH4batIOTdJ96sQ zV1VkuT1@mHvq}*agjd*{X1U=a{pXBjS8Z}&9q>i9fK5^VQ`X|QR2|!cpW1;uMC+@k zIiOJQ<#kG`l6f|kD=i0{kp#@?FYSO3(O;DohvE;dPnh?;!(!toC^|Bma7NE!j?5YD z^>SV7ujAfIjUD4aQ4$S6Gg@0}58#uGaV@1Nc|Ob5ypHGj)M2{t~hKGgE8H?@RK|Zrl~u1Ut9tX<`kUaQ9L3D2Zp^p1V`vNUWSkG%iEql&l9Zkpf#TJ@XKAOQ5zG`_- z^kwpZ!~2zJ`9|}R)2VF?OT&0jNy<=9nuwaqWl2g^Z&#ZBjs?YDx{OwBpooJFshYw?bvoqgb!8o9c`lT_ zXzs-`9Od#ww+C#hrvjP8dWn>LLd(W#lXNpe70=_e$s753yp9Bt$xO;WW0@XU+_ai@ zd}G_Se~HzES$cUL?1aJq}7ArxV+pQzqB?U8oRI(>$;S24aIll^JD-2KF@ zu->^`OQ-0obK!{1@v~Hg^3Nu=FSK_~E=CSx;|&*75K<1`knT{-@l4;Sn!|L?y85CxZ1R|(h|=FFz4OZ-Z^$u zg91I;Qm(d8sr#0jW1K1z50Hl+PzT9h*|^j$eIq6a_NP}sw%Ii9JAONUY_&DIZeJDB z@l6=$+*RR8Zu&@@BnX%nJR%;d*@9 z9A`q=W;Ihbi?2|kbxI3g)ln$vWe%wg(zlWtsGgihwI#slunp@pacs0@NIrq#KYaic zaic<#*MwG-q2ui|87(|J?4hfnXfA|1gRmdVP6H7ihloaKus(V5{-u(CE-g{sYkR7z zk`!w1QRCO7B5>BDqs{rH9>flqZwCE59wzJ|TwTGJirkF*S-cl$I@@{D&A@5{P39;` zpX~94Zm;}X$ny){WWioK{jzRkCUbE)l$70%&9Ft0cj#p$96HK_pCa%Mds+Ia)&%$J z=XKXL1T1k7qkqXA$J^1KwS2kjn)x={%x)PKbDju6 z8VYom>MpTfEHxE93Fd{?M!q__1 zJFmLQ^DEZ3XAh>5*QdVoBsT;Y=VeP#6txh^CdiNHD0g_MOLi!8c&K%DpJ}`qdH3rP=+9U0FW~g9J+8iW z<`-+&b7t`FpyO=lh9_UNLL0%9FMVA5yfK`75zeZPS@ZhN;_G#r%GAnowOre|3P$u1 zdj`E(YYl-fHWMkMWo{+?i*3H@t1eqvpeqZ%h+jJ@LyUDUi@Q< zn}x4UVI1(t3)KI2;KFmuBSCWatK@BR7i(LKN`j?33yU{rg^yr@jrtUNe5L1ZPuk9| zY$VQ|2TbWg+3&Q|g~KI$E5B7svw9NX%Im)`ms5^W(Za^L6dhnOZ%9qFHRr*kU_R_i zP+=J^BTTwNth5}YW>~JMB1}q4B|9#$M3Q-vU}7o~v=)VO^fSK;8`<7zqBZaVnlOMc zA|p|{8zGK1(|=7|fS!nX;cLYTZ)C{|fqGx}!={0Uy`|RkJu=z}Aivw$kfJaWOddDH zlpRnY!qXoNcOwzSK9pJNIY^idV^DD{AHNbMQsH1oF8pEWZpd4|m6C@nsL1#o<*IZY z2@gzjo#6Pvm$v052(3MDQK;s6l$ho~^Oowf8LXjpe~_d+Ye+SI0e2s(b|f0U%Da4S zlO|0xdCF(G@8YK4qgsj|n~m;Bc1JVUC>?g=TjxJ&n&wfJP~$(nv4hEen_Y8JTXRvZ z?Wt1eRjjVBrs1usQEX5hR{mDFWSRZqDWL>1cAozzbTQ{9Qx3PBHjHxOkYw?7PQXBY zVKuU6=kX)57lW8$p*Cj7u9bU}3S{-mz6gi5D?Vt11cz+Qj$+7S(Ie7H&(iwB0}9l1 z@OG4UkVv}Pb3)@~k4Wbt7?j0jkhw4GoT+dfsN;#o4h6QjuRC8{+=*#-w8QwyOl1X6 zYfH#}Jm`G5P`ScLlFMx6{&DvC>d0Qg*>(_=+e{TraPxoH)8`Kw$^#5c~se_{~% zh`~!WHR>-$kh*yN)IEa)0m-uL@?DUqP*u0H0isP~xK$;rknrT#=_2{07v`H!8$K2* z{uE;L&V&X|QX|xXUJp6kZ3y|C^)H`EN;uK=oRQ-~^2Nn_4u0qX=exc`Yt=2{>HhDJj@Sd( z6jvAEPdaHhM1$%Lew2Piwf1-8*_V10IvW`jRzd+6q+cnP*`;iNWt1nkcX2o-H4*Ra zgol{p{_ws2F=g%{kJ88efwOG?M~PdcP4uX|BwNe~?_We8D!>c#-WSi4R1td+d@tKR z`s;C@*FH#h|AXRYnW;~O2g1%yz##vv?&VkI`p=tGZh7Qy_3O8{544TRgWCtN;1@hI zTppB%)AGNLj_I267fsKPA{W+Nrg$bNsVwVx`6f;j&Cr4>{cTg!@!GO=g^&k^=b`Vo zlO$@FG6wQyPmTgN%-q125azoBa)leQ;HZNqPhZKajuFl#L z^0i4n$n^*FrHpo93V~rrk!Zh zecimDzEhXz#+v>f$(#E${ES9DC}rhUS&@Z>#>3;_lUScwrIZyK4XejICs3)pj4hoy z(((Go*!}mC!hc)dU$u0W(O+idTD{3_h$^#0>l z;WpjzAHNE>^~V44t8mNC|KnHTR_p#>ybAx#TM%w#z5iv)U^iW_}YCE{&bcXR?Y) zqP@C^tf{bkvVB-dDJ-sZr(cZyvBRO+2x{5d^ov&o7AJgJ{=IG%eARgypOZ1AXlXmx zryj6L>kTn;fA9a+OK|TQ9b#|4xf#<0T%GhoV?66Zweq`cPQC7CdYEtIcyuso%<(JT zVmXqo;P&dJ4>?lrKH9pAetJAVLlM_ow6+&`QM5knb}tFl%(tl`m3=7rg2YzV5!PmA z=N?rmjNu~!^rDbh@5VK`VNQ}Rl_WERX0w5ZLIg<#i8&iaAjJ577r8^ErB`y+Vn8Ws z3$<`l@?%OcP|Q5!`Njd$uk(>dMB;7;8;KL|=;y#L=mZax zz^!J4Z*`^)?eoO#R@sL?Fef~f?bMh{b>J1*GtWnh{W64{T8yi{tTFVyQA)aCdfa25 zxSLf-NU^?4I>N?qc;Ci3y2VVE#JxG_UHTIxbgJ5GgDpAZdP@#_wsYyxKAV9+vyeDc zP2IOsj|urd>om_wR!f^WwsX{;$S=fnFPpCKVXk%!*E^VCES|2OId-r;Bg8tzyRz@( zcnm`~oHA3Ho%CjK?=x*fUE)n+Y`!gr8*yJ(MsIP*v63 zu&IU4t%kQO?NN$<&mu)8T~K65=PQAgNf(}kn5;A!n)xv}iTYWG(!;PMnbF7emJ;`D z4V9NfyB)A7>ZxTLkA41<84^cRtAYKA28Z7xVtXNmqgjdjPKNR1m?u}O+ zd*>^7|6{BY!77ErivR^BrvU|Z=Z|E<3ky?gQ`XzRe||q%ik9s%7naXF>USX|H|DZ1 zk2d>~0qRcT`w`A>?@FXuzAlS>5+gfY?d2I1WO6@~d+K@Rj`KZeQu0eZ*$_T^WZ$=Ezjg78Q2jl`XA+LY z!tN}XT^vvEtC4?qV-GZc-pslF%;d)wH5z zQaeZ0Fhgsr?70*I>r)wBtcUiwY9rRnQOVLbkw(=#hSXKG&xv=F(n?&;8Z=NJjKwA= zQrugIi%RyA+O;U~w(^wp$t{|(NWcAG8(V0WmHP_;UO_V6slW(%XD4p~vNVj$I1jO# z^snoY*Vf%6RLP`n9`Mk78ZE zNr=*oG5q9W`~{_Pi$&&@4W8=AA$oS#+i46F37pOQHleJWiVN%g^;2-G=L&UiqZ#2z z?rpW7nsBns4ZypM*s*Vhw@}ipay&Ma^D)u$6YP#R6rRKx*wmbY7Hq^%3w^ni%`i=7 zbL7@5e*Xg>t%IpfV%}!@a`N@5l-N$tXCfc-hpt+z2$w^Rlc-18yq`GaEek4Ro|io_ zk9}!1uUD1o@<@m2nDQn+`FdY1#M!v3(mS`b!_SiGefNaZB5krpYs!>X&DzELJjX9K zDzr-Z!i`Yh6N9JLEE*YD3{GCTg`UoQrD-mTP#YR6{?C3bQ9I$cp+9Lo+#nLR%UW0$h^r3A-fFoZZgka zPxW(chJQ`oaZC=Z-S%ki$fGdE!-MX1=oDY2s@>vt;eGT$Wa&k1GxM7jLl%zYqGG{7d@W@X1p@E(02XF>wZ|t!=b~TFiz<7 zTad~d#3d6A^)V$8H4G@CcoHYiC4S&<^EaYHm1!|9nc%7IdH;#QEb;5i4t;-HS3h#Z z;0p}nFRyvHWhw0qaflQRhzpDy@0LA)T4XG%^6(E6zr2j@Zkps~zu2@hSwSKUP})T8 zVE zx*7Q>I{eqa*&;V4er0G@d4KL zCr~l%2y^XnqvuVw5mY+KbIsngoH)JT^mf=hF(Xci-=s7p%QyHTGnn}fCRDGUB@H2k zu;c?lpU_|bD`6IV5(GbbmA?dwp4*{wx%OUE=zAyH)9-LXCO#DGO_)RrRIf$gOyFK2 zfrzpdKS#?F+C%K|%ifay8bbCYFY*mez*mrfe*BHs-INC`h5B5JTRCE+Z|j z3=M^P(=fz-=LsaPGrPIRA1`tq@vrYwNG_&(qix9yl4*0yF7qcK^iV9PW4s-Tz(_ob0Hyuq^ZO_A{9JEB< z4KVVAeoefK=I`@y2w7r~pWN41xY z>sE>iQc^)iub|*4p`ejo(hYVx%Akls!C?77!8K6&I1XY$o)>PSfKF+pV3ifSePuv| z0=x`f@P@O)@UEjr2UkGB@F05reBTf3Cf*+hnH;xGE|7k|eHCk83Tl17Gi3OH>WQA- zT()A`>Abg)p!~-ytbJ+l3ep>+Qksa7>h_{34ni1mqy- z&3-c?MPCBj@=CPg_4Gsom*ZOB$;n9-c6vP)PaM>MNU@KwHcVO6_m%EwOftR-gJ!?` zDjx-|&zAA<@b>+nz`!q{;A&`|XlkCV3*ESm7if9^Sf(hh-L0XOi)%uK!D`~&fW?xQ z6W%Sv`67LNemQ@0-TVqs=zIk?LWFV+&Mqv$1uAJC*7wJcA127v(^QSUv6wmk_R4Y? ze~}!r-g}vBUofEm{*tZORfj7;Rvika0qKlqt!n+9J4?ga()HCPCNYP_@CO;&`eSu= zWqbS50;$xQj&O=SIk*5S$U7)S;XHUcKguV|%F0$+f35Voy7(e^S&);nvA@{b{ZY^J zc-A?)2!3P#RLX!)i}mT^*HJVBOWp*Uz)gUuEv4Ryb{qv+Suv!+URTZGqu# z&&)gxU!q629a}vOr7hgc^;DORwt6-|BvU?=r!7Acfazq9`_hHJ7p3DUvx}c(>tl zm&B%u>iCFRr}C-ulqn}CXKNrj%}_4f+CXZp-J)p13o0ml>v$+sO@RSCd3pJfkMiz| z0R$}d-<88_&*<0M^`cB|YQB9eAK;TykZ~%Xc=yPC<>aY-Jc}tYF|l~uk1a+cQk34@ zMjnn4TaxBXS_J%(m!z^83e|!|5xrTqy!%>}&$Sx8u2vA9PT|;&$8dj)jg9S|tzl+n zCgFM>@p(SQWzN;Xp+Cs8y*6j2lhhXKV*HgvFFcchy%~OouNdIYw^iX z3FEoC*hh;SuP@qO<9-pk@!%EXG-^Yt*7WfM>nQ0rVM z-B9b>-EEPJ?+##->pTkt+Y^xrjPTMmR;_V6#7+@fvw{~IRr4uf0Z-5sQ&(Q=i9}U4 z6w`Ey4?GhKntDpB7e0?;f1=f1eZ-uo?ZKQ*3*$HU<48R@%jeHyNKjS*E%Niz(`X69 zlE#^=cmG^)VoTW7wn+T=q`oeQHR(70ymSk>QjH-@;oFaE7hl(Lf63#OGq~oQ9WS3H0_6oSSF6r@!sl&xZK)V!@uRlP0id$4! zIo^PDddEj18MnlzJ)rrGxWa8rdmPMhKD;02Dkaf1<7Z z4ANCoWWI z>~P2bOVfihGtd9lbVflz-(N;I`9;*p>60h+?tQ2bu6y7Rp}xXfFG+*9p&wm?Vlp@o zKPRP018bo_yh=y z1B;^ggSRLv3*evL6txgCpQpe}PJU^LqLusmG3MannkHC$2UB9<2!mfz^+3}e<5Ekr z!Xol7z1dNrT0lH17A*{qR7_cbI%G;Lzy&?+f+2=DVDZ;b1BOC6xO9(8SX&;!8w{<% zK_H-H2^@(B<^!aE8K|KUs)U5ZUo9wrDIFR(RN)wKl9YPzwILp(N*D^V>NPcde{Z5U zp7rzJdMhz%NBPU8zby-JK95dk91q$FfS2(eOc#l!lXu%H{H#pEXFFH-@Qc7hR#w~X z$x?pj?O+_(id&-LLjfb0Vho@a9M`Xv);a(V&C@*TPGczqJ$SJ5%gV}XYA!N@$lbnW z#a|q6aofyPDGFVUhJ)x2!Z(CXkk0JF?xlGgW(!??D=}}59)vTySfSf4cJ*DzyFok|RxOdOI z$p`vyu}HV}Waq2PYODqwO4qjnIhJaZt?|NJEPctKnM?=l>hdh>^GkC)>FzlmDJiMW ze(rezx9=N^twAA3rRet9)j&SD+t?fAyAdKrtCFqoR_x(bB{q|`5jgbo8FwVl=Xg4| zr^@F&j{2vkr}dDCg}myRP7-G%-x9~V=~>OzIO)d0)PMV!tgK(hR6HC5Q&?EoJ$SVK zB|e|2`=*DN<36z5lpibCmwT+t%<|Wt3k&5@p3l|sp1+=m$DQy79vGuJ0J^6Mv>rS`DIc_J zUVJMv?Okd3dX=iC(Pr5>5}C(*j7b&HWJJ~|#n zzL&BfdPJR(;i3^rJyh{n+)qLdlgXglL=*l~G3Gjt!%Ek>l&!%#xC>#h_7*pe?dfAW+rz?iAy+2J7cy5upi$P_5#w2P|Oo}2^p z0A%qX@MwGoVyL4!DLeW0lE7Q$Le_!s{jImOw)`L1`sgvyKQw`R_SYu7@K+@2{{Cdu z7v*GF$f;lWaC_=yQfRP3Rbc>Gqm%@&NJyLf5n`7}{!jzhNFAXJGM*$hl6a;Q0|2ye zA)uAM3_y!gZshCVkTEAG=dbX_Vgij0LD@`ksP$mw%0UQWZGwk^%(|ko^6%i9!0Y(& zFW?GR?eNgyFPH@o5*{A@ue1BVAS8$iCjT~Ii|5ZP{*I6qL)3ri?Kgz{cgOL&J8t!= zqAsH&RKp;Adq)w{baFdkR~j2O*P z`Uc0%$tj=AC7+9s^;poC)umZ$w^eIUr)9(6fH&@0jtUDna@=7WsYWF0X+j0{vpvb^ z#_Oa9n>rO-urL{Xi01k1U1+t=I1%sPDtyiPCIIu`>#^Mtq6A81H}?T;-OmsO>AvK zMueQOLkMNO&&0w~$=V_T+@MLoT;@)m0h@rIJ-CK*F|*;jv<2MzdTKdo%?)Pl5(DJi z#;kn&pTpT4&HcyWD5FDSg-PIUjiR3sLrxeaCGf+-ln-;vj@J5#wo}_{{UoBFx`kAB zzG~kW_xX+Vg`hM6Hag8pB(5g|uZppbgk*v+Hab3TR^o!chk`Hf@Qrzz923747ka>u zu!`=fp58l8Y!(J6z=o|FO|V>P@t*Le&`?IG!a};`g8RV>iQd3Dc<^$60l-&*Rym!F z4EaKgV$|2OgJombh(L&S1(QL057=)EW}vdx?fW80?hatA#N}*qhTqqot)KFfb7CE>{L|#J zx1_udV|i-bQOC!}hn=Hjt4M2+QsgpT)UndOu!tCUg%Y`K=ltMNqi(D%EhAw-oY$?m zTf5hwV_VO8u3xQ61HYo|eHd-p8`qz}M#H%QBn2f}VS%!Xt3B^|-r>8A4jAQnxmNt% zGS_|Tn~^a$vTXP5C|9FhWtpYVZRN~;KgKBTs%(zO+>mcF*-6O`n#%U`@NJl6sb_iM z#N;8yqHI5yz5E`NoP3W=hh5ZGF-*lQ@a~%x4_cMP$2RP_bRVaBSsQiY4nZLlM(D0j}f%zL(?bARw6FM@WX2-uB%ZetTy?y>vrFps{%ILs`orrBsUfd;!m z@_uwDsZr)g6ylcB9=y?EUVP0Q6`Oti^f}Md*;j~|@9aB>1XU>JmrPAf53D_0 zm|gHgCIeIAAD%eI$PMXK=prx$u48!*P-U|MFMmC4Rs33(myxz{Z>n5MFs1^i4e}T{ z+|MWNW}PNdLD;(7@%G_N)J9-_QBhdY*@M_u5@PYdS{)3K9q&HdJXt3#t~>0JAke)V zFSaV%!TfoKdZK3iOPcqLeP{jk#g;?WZXE%YGg%cwu2;Mb~dx1327Z(G_U$AOY}qi2Ykx<^QAo6RAD~ z3KrVQz>;orrVLBH#Mk9!qnFf8ylAi!{*0^;A`b8_)0zF# zY!5eCE~z14?iMoP)lGRQ%aHEy2NVuvO8g@fD%|!Hc@G3&f&m;5O73F_*3@vpAEGK^ zxx(<0k&&4ZQ^l98z^|dBqaV1MnV;^?>cx#D^1JM|1fag^X#s&cdSf(iq*fMj49dw= zJ+f?}Ot@*T@`xzXv?qB1Tr5BQHNPEz#S$}lZcV>UDeQbircOGhnfn?kt$lRwsH~&C z`~Azwv$C>ixw*gmIEzllnKn7XP25RoTh)>F5jvh7SI1Qt+&$Job{lQVEiUFxoXKf} z=`Bq}aq3xpuBPc&OKM&5xmRb$q$o1db~{_r&y9BwIe$uIT<4C7>6EgD61FBsHC)Id zMwp{Nx`*aEFR8H*P8d35r>g+(u{&4hhq#e#8Xpj3kMI$QS7k#pC8dmx`NA`uATomR z4N;caMa18oxGA%{`*0O-x~p3Oz*m{^Dc0gUgc1vfG9!2(tawAhI!7NE$c7C;;#x-i z97tSikOm0?5$SIuMP{(XsOJpMe>5m#&-Uj#)9)z4Ta`oMqvMmuji2I?7jpAQFOr(M zZoQXx`qlqv?w|cbqufTAA%L$HdSu{3_RkYy6QuN?rr@!6Euli_?j>`-`qwPfswrC@ zp&JaXe8@v#GAI>-M76w6^2UE%{OQx5ZTl+Y>yd4Umb(vw;bhbal=x(2XW(AX+lTEF z;ob(L&VX8!F{V`@Ko;^0EaX1XDQQN%Hbms9ld4-XzVpaL0K2hW&gEdF|ogGnA)!%KhzNa+2cad z3PJerA=byKf8BqT*7Ok#-fQ1H=r-=Kh{+29#PI9ey2 zW^mq_{saU>#^O5mg9)HQ>pOJQ>U_5OG|zJfGEE<0Bshl}(nl}Rs3v$UBD(|%6HuH#RsQbf!|s}k!!IoAKu{v|ZAo=#=XERXb_xR?|DN+qnQ6$GKdJNGtwaIW z!{Z74`7G@Nv<#YE2kRa#5S6LrtJBJjPfYYusJNs&azCL77l`^H&JYQMh(RjtMXj+$ zz@!=R!&Xi%87Gr+4HF~qK9T55b&-N^Di9aRV+WH zOj#*yYG)y$sR4#zxEQ&6-$NZ0wMLs(a@Ni9+v1>?bt&Z&vlk=l-2kt*(>E- zs4yD$v2a}Gu{^iC64`ffr-oNP9RB18QEFw685pM+n>WK0B44N18;_x?sofX(l*j&M zp){|JhIxg9n_E#bIy6K&wE1{zLdw(($rcioQ+9nwN-zWD;E+{PT1<@9?2Oz6c_Be; zKt0uQfEOt5(=H81scoST&H55TSZXX5TLMTOUtZF&1ldT<=8WgR<}X!QN`CoPc`b}L z#;wee9P`IUo;%Y4AQ(|2)v{!qW$YWwv)c^w4j&~p(0o&FHXJ^i^Jr$O=5KY9E-9F& z*mqD|?+5wIdww61s;;hP)F>Jycl0;$dBpeO9Okug0c+!(Y;q#fOZTM^M@pe3BJx)V zI4nvs(jv8;ILd>&rBzrQO>@KY#*G{G*u;g{QO};U$UYXKiac<+!<6j?cm_)l0ky^Y zKiwrw~ss@>JDb>1Pdtz+}N_zuzDcN^bC@#~B~I}{CG{dj?liOyglQ=;KIzDK_#~rWAOo6 zfnAf5fS`#p;(R*zyuGMRXp)nuFj^pD6hnm|gv>~_VX@1A zs7Hohw?Kj=*iI1^B7%n?^kzyJu; zFc2>w^TTLSy3!cQN8;=Krp)FjP0!eVLU0k`rVUKL{ZGUPYB7iAS|4=C5O$~|VS6f_ zW+W>0%*d^{nc1wY3Wxt561BZaHh@f~%m->95?T}qo~!!;PdJPB`^4Vh{f{Qb^oM)9 z9sQe-W}_Ey%cPR_*~lHs#o7-5*iS5WKV11U@{$~T+;6d0FiXTVJ?aORep3}JJrZQ; z|Ep!(A!6$Itop{5Id*JeYqiUE%Lbr-S;(((%ie2>f&+AZa1- zik20VNmA;;fZ-kPW{86|fd(QtYLqlC~O%H#K@fV-2FvSUhF z2DJ9-^T7^C=DmBUwN!0VfZ`{SYy-}EObo#5m58f>p}9G5l3gxI=7FST%n=O-Az=n@ zgdu6(M9sHv-v$H(01^ho4>M}IOj3cDb_-3uva^|^NYVDOQM8l()~}T5zSgSdTEd|- z4;?gr&4~#ZBGz77(zJ(!T0x2O0%B$NZjzAWLx+J}Ic25pG_XwP9kv(BSh&k(5go+- zNIi=F$ue(FCrbSt)cY2vnxZJvp6wpLg1#h_E(CY z{-VTD&mha&76`~pvg*ha2!(k|BVeb^1M^Y0@sweg9EJO+#!+*A(9FhDb?U?VB)u*%mXBUEK2%P zkpI<~D)7l5&s24Fb<0+2oVHfNbNT09jL7G@$agtCxF1m?*$+@&MSru*{nX}G zBrjN545AR!eqFH4N~K)f#-x#Vw!y9vb5YEYHSVl>JV^V%626Tp98|UF#qlRjLmY%7G!@M`w>q_1<~>U3UTJ864Ib}ozR7022X%5AQ`kc@_g0UHGlK@%O+EqFz!}C| zpPofhXy3jZ;&-1XmH^wX3uDAD-2W9V=iWv$&w7Y@hp$r|{3b0&;69OYfVn8I8&$Fb5Ox6di zHDo`MdO}YIrc;|U<(UEDVPFJn1w@ov`qKZKI|mWczubAiZ|JYA1pNmr|6)QvJ*OE+ z<>KSxb8&FoxpODsbwaSi{z5ZQ4;enm=59UiP|GV;zR6bo2iI8xDpbbD#|MYAi#6AR z3hHvX|I{p8A)1ApU&q#wKuha zg5PPVXMsa6`TSx;6ZQQkh|n?7B4lu?{Sz;?dp~9W9`O+OI1} zAP5Ebv-S%m`+JmK-dndqgCR34NZed;D-p=quIRBxRLtd18y<#om*bwko|t34)QfZDS9dKg z)wHsl+%he-QOnWS)y?q;`*kMz49=hA($wpU_UuKebRXVCi`?7h#AjrFd+nx0PmXf5hJn9mQ5oG+5Ns9Gb{YC9xgaIN{pbzunGi!60bVU?< ztTNI_g?XH>#_w>mu%jrv4OMX^SE~YYPZUM z`K8GFtM8eA#OyGRVF9QF13;3v`2I~S2t&8-r9CLkdmk=H>}Qa7o0){}h4Zry12tKX zcgY$Kdl+#GySuy3hr+!;&6axaTN(ifwjlWnQzkg&U!Q;R#vm8aELe(?W048|aa`N9Wq2_kPyQOR8+)Fox<%2y| z`s4J^>Ew*xm6<-I&l)2G3c2G%W#LnwmE2J?sLblgs`YU~v!FuNWKVfB?8$m-8=nEl zg0y7C>usWDWw&^d!z$`H@6FXaq#P`Tu+quJfRZYw!%?z0me_!=laK)mH<3aozJyI& zsa3`4V+%?tj6x;xKJV2B0%L?QyKzS-L`4QE0~t|7O!9)_Vxi0Jaz{tUenT3ZpF3z_ zYwHba(pJo}Anwc#3ZPY26b%}$n$uqWY0uu0!uk1mXDn84{dxWl!kSOb8PD_&5RYc6^8OcBk8r zH0{FT;^NY7eyq>`SmR!moU`@Z(pgJGPgls;xW=zrUw;2q4$|RWciBxcK+OB4oj`vEB$`BCA;>3jd@KAshr(;Xkr#iQ zW^&L}>}+#LxBvnbvML+Y0V;0&ZtGeHnsE@g-_O^Tq47RSo`;(&p;xEJ;B!4((f=Vz z?h=07v68NoeoqfqR3i3)Iu~4Yrs28ZHq%;JIDi)6*2Mbh% zl$I*t2V5*-dh>w%zUw;y113WOy^Z_y^ zhiC%E1q)|HcQqRkics(~6@RU{b%BDlfkeDmZ6MXI%c}7*6 zTs}}YxT;uIw~Ohu4_m!^1Bz$vPtx(~Boi=EUmtW*qycUIa<6gkxtJ*4n(1-h6L=oI zY7eUX{CqhU=FKYYhlgu@iRI2a+SM&oy{n0@2@C0sJHsH^K?;v2bB$P&_&sbvO#LtQ zl9&xi94e?P*^JtPHS4)cq?mk7IeYK`WBgoFVh1VqknGYzI)4;HfF=$;(mong=dHx~ zf(jt~uhJ8YE6+N;uaD9UE+5km4AqPCP7?wti@V_fU2A1i|K=jTz>!v`r0|V}wfo6- zB#+N#kX`o3Fu2~d zL8?dY=d?YhmJ2bL8ez9=-}yYxB4xlH-8F{6D{~Dtub1&1d+w8R+mg@ETyy1j`Q+>2 zs6UH!dDm|z9GjXrEwT^<-Cfn1e`XE4DkFSuLsje$Id%zRnbnt~($>eI0wkXBes^J2 z6(N&u0um|j6lz@B)wVfcxsi)BF7aKt47|@UQ)K8TcwxA>^M2N0$0KR-=&ULq4MD0Z zKoKxVBsD~q!a|@Y0T(!rIcN((2vW=gQa9fhD zDq})mj~mEB-0aQ}IKpxSuBVS5svqR{DHm=%JrO?yJc4-vRb3>Fm9Qc0$^gUi0S8$2OeOA9yG?c_1bJ8Db!X6JLvKE zX`Fd$tqSlvY`bg->dCKeScNkBPZasSD(5QiaX;4qFvINWop2in_vgi zw}y#U710~C-PH)d>S&`?gxOd|7(#7^f6Jza{h}$Dx~r)qr)f+i&ajpR$0$DcS2OMR})J^-h~Rd2aa(J0NY0pnq2w2J)Lu;!9kE z{Mc2-2TmINE=a+=FepI$`Jy&XI_${s^E*kmFZp1EI{myzSPopIo%B{T8VWfA3VG1C z6a)MNel28tFdoa5D_P)BLW5tv2jh@`z%Mhn$kLJvMgF!=$|>JMy?Qs0d}r3K$WeOV z0aD(1)63&Jw(T0>k4_Sf#{eLKO5*6q$RQxx=NC7)-CSQ)P(!+i5QamRb~XStYVz3% zsX$DQjEhr`y@u3wgN#CReY-cO*N-!a5d3~nHwzpmDCN`8by9=tmyG>6#o;?ssMxaw z=SS=Bxy=$+KrwjZN|YQ5Dk>s6F_2V2Fv;WLbhf`pC!f@p_|SZ^#PG}!6a#|F_g%nG zK$(}QPSovW#ozd>kZU>>EUHVrC3XU}Ud=-7^W}!Ef#B0)*VopPD9lVvKiSOI^)Um9dHMHQir_~qi5N=F48R*){x_UeulKqmCn2-zzq zhB~aa0}v%AM?r{_yrN>U>t;al{=FH(2DhVrZZ;j~jX^58H7@Jvh3hMz;^I^ zUcA_9{c2N-o>q4}uCp~!ET#Vm)cqrq33gB!YS$s2^DIbdY3R9TQA{DEne*I14#F?- z;<=N7-+U91&!{9D&Bz-JtFVz7!qU^-?F)MeDk#G5=K}#j&Fy1Og*ciQ#zPa$7LO^@ zcuw0nQq-1FSA>kaKPTyn9kJcycD(5XK5O`}Y61YOhRitt^rVocz6nsV4yl9p288$q z+_33F1c#Bw5j)_D2BBWNe4i?&4G}21fSA7qf*< z1(qW2RgnvN_=~%0a{TJ!1D#J~YfFpY;bAA*vSh18!0F-Pw=;j^*SS;Y&WL0QwibzO z05gJ-JOKA%vu1fal1otJvOS4S&CID1fbsa@E2XCZpYFpAfV(|3tMiv+^TLdRcratb1+#_`?1`wxKk*r79=qVaOK77{UlYU1VmWN-rq#Aecg;l2QO z6cG^-p%ZgdU2Nuq66RuD+a6f}*UY{k&1BUbB@w(vPfy>(Uv%)ga@t?St{Y<;5ZiB~ zxg()O4~K_`nNsGxE_WamT=Gd305m4_+yQ;H2Eo;*1~MV9ADI#%YyHWe!PkobVhd8U z3~0{j7~mu&VKpTzf+7fBhaA7^VTE@BxqhS@zWr%J6!oQfEd~0G4dB)lNI~;d*(%_` z$sCsK<^w4Jz!nwGDuF4u^e1uZ*1X7#DXFUB0j0};R!vP!G0;mBLCy+g8{7wu>L_;u z=e~20A${-dyq0LbWX5wB?ZFy=OyttL4d50WP|f0excKPjxi+LuuYqk1oq!NJP+?>s zh&BUa8LP3pu@XbEUIYkgK6p%*!SeHJXC(TaropM2wIr*-tCqWoWdDb~?~cd1Z~He+ zoJQFrMfTo%?@jhfh(eNNZ=$U1J7FFbw5|v{anxeyPw~` zzx#Z>t}D*dd3-qfvL-n90>e(~Z(Pg~~h!0>Qf?DaBs)HnB4(T1-3e~Ijy?5rz)adDLH^eYs?_$&At z4&$W>%+hzo?o=~yC7?EfK1swCBAo6eKygLq`ium(b_=(dZ(1T_abm%iJ}>D>5Wv;u z9PGz;=TUfFo z>^!|Am#R@YNAPE2PDa`+n_Y|8Kckpyo~zTqNV;lHjGr<8hGc+n51Ya}t&}E;1)XFw zr3)h8wHZkhEA!NJ3m~5p3wRBjkEL&&n?R(hRxG@J3P5>kzyP zI4oqWf{f@S$VfE8CA`RnmQN$WWjD@hs9-R~hrnA`c5^YW(tt%Uk^;YtO}O6qSd2-E zNPG?BjL?>QfZ5VS!R`Fv#FUOEootmW8KEXeh2D(gZvE*@{&yDvR1Tl7{N9(kIsxkz z{(?dTs33XNvg(o!KI=qvuhBk?{|(=_Ihv`S@Hpoj8UqP>Ih@51iVbGayuo4^=20w< zB}Rh#KMjw8ZV_VXYP5HaQN>|NlhtH}Hl?FY<@XEI(xjXJ;CcX$zKZ(7l()N;>-Tzh z7@~GuXYNd;!_i>%kl}m~(7>Ae(oASgPr@YV^q{z4G0M^jjzoN{QjV$=Ug~1F;me_o z((%5eU`D+OOwL9v5eChm(_D4MB@3wankxz#wS7%>8KkulZPwvdr!`pG*qKFv=HbKy zeL{xo{W@eybB3+%+l6ZqeD6mDu0lgXf4yrKny|!5O_LrdQqT+1X#aRO?uwufGs=o1 zfj1Kv=WIzWd9O3#%v=jy?()YTT!m+i|MBhwW8Pz>M4D+pzK0W>83Ua_E$qN$T0@up z320kjKv_O^dJ9`)eY&>iU}b5v00I*I&!5|mkg*h@$MA&MXAgpSX4kNH2PWr5fdctdth3WYdB8Vpc_AJ zr9RNZgJC1brDG__%|+Yb3Hh2hsq7NTnlScJV?CJJ-*vk2hROBo*PWcIzTX6m1;&+` zp<&1u063hN4V*myf=~tMr9~6}1UT7(3@~s-=Np?9#J^*AhzPoH@NLAg=H_GG-Bak~ z5EOhxLN>KebbI~HV?t@ZriSv%qgAN6o=XUdCm&D{*eX?1N~~DwLgWv>c%c{{fl@Y< z>EiXUwUB4f={b`_ZSnzoQ?m7tlll^_0H;6|wJgpf;1wP<`+T+2d<2Fwy(i{v@Y=H? zOnSA_ca5?{ZlcJMuBWw#CG`nNM$)=~aCzmzrelwo&&qqQqc!_5Ej1Pifsf~=JCD7f zZ=m>EfIRbc19EVTS_KdnZhG0@Uv{y+c$a)-0MHFQ%^yi{vvZVD2X)l1X5&(toNr8! z3W?zJ-Dv>Z4?#Y<)xGh0==kYU$ClhqB*pDwZ^IepHX;H-hTB?T5EgLE-@!7jx z<{wB)eDL~vujh?zq>X)~9-H{4j>aw&Fc$h6`UMOJ^bluR<0-VM{od=N3IZ->r8|d3 za5#k%Deur{;GlGW1ika*#cSLb>zS8XfuEA zBWLo>JEV#ChUysn&eE9LT+n%{E{BO>PF5M5@bR7+4ojT0CTL?dWjcnaI|kNsd-qU2 zlN~&OO(W>qc}@o^+unRkgFZ5{5QJJT13C|$U5uJG_*=s8yPO_MP^on?anxz8GX}cx zNK&NOfHvir)S)S*W`APRX)Ow$R80(7602m3ZC|=?t5=r^@y4)_ud8EEbJ<`QNn{cu z<&z&&$JCW7KN&kuR*0Ybny{z1X-t%E3kKvcbH5P3{uC$r+qidP-kZ6tF=@Hp^76q2 zS2d(2gAEQz7fQ(-UB%wKWeK;miRTHToL?(U8T(j=WA|$Gs|X2^B3(ykzF+c#sP%&P zGzlSZ<(KF8$FPVDye3|NeI=cxn;%b7I^%iG#-`WZg1lr)!`y3WL_Pp}o!XXCW|$?C zsY>tQ?j3h@M}hHm5edFEEtOyoX5GfMY`*z-)6d76rrEp1a0wMJb@s4MtLOwUTL=xT z`)6MuQ#UWzarR%vn6@c>f726N)T2#xAgD4nm(7h2QJ%GK_Soq`>-9pbUTyuzC|s#Q zzo(gMg9;C%b~kPw9q)CXye*Jm4^0SENpnY+BGJw;*GPInn;tlPF4P%2>tGhfq_-FMCVjaTQ6Nlu|N{>il}}9Ei_4j z9ycA0&hq6``m{Jgw#v1bbn$gjL_q}IQO+)|SIt!mA6w6rKD4n8PPs##gpi);y+O0X zd=5*Xfi!8k0xgiuR#pd$dL5fbx9HxfV`L*kJc#=B3d~18Kk>VG^5fA$b|RkeeKoo@8RZc?5y*vR`b%@uzbat`wrE+NYS%}2>tsp<;vZ#)%~+=NiKSy zVcS?XnHS-wNx$T(rHWzAI`@SoTs?mq{sP$TytY$=%@O?se!I9zoi(xj8fW59acZrQ z3uAaqLCd_%=G&RGI~1cT%LP^&SX1OQaVqcekF_mv&RDG+uU73 z5xA^=YvCyvvDt9K0fm6}_K{1{4~UBJTLae`rN+PmXaIStakRE%`L~Ud#KVJvj!(@^ z;Fd;J3XarvsJmFLybJ~c{}b_3RD8K`Hk^FVIYEW38*fZy7L>m#(NQH_1@f8L$z9F#7-vccI zyrOwufFnSJ0Pt+Z{nR3ksAI~zwbU%$H%U$aW2tv%P15T7w(w*&CCV{UN)#d}y_zx> z8q{)n6~YzG>n&dbzt!hJ6oeBj@uU5%smr&eYr6eFqOF~sS%garp$EOTxS~qel&&-{ zTw-YAveohzPbxGNYq7w(+hjXP!ghDJJMy|Pb8Z!87DQwdh=-7!uH;IcUD_M24t}uQGu^a1q zsMNxJXzIQjAnTTbt#wBa%1x>*mvWd17P{`7aDK4dj2KwaeA2*3CvC`!TiYZTeY3W> ze1;}H(MF#=teZ6e6aRIZ(UaBi;pd4X7R~nBA1;}=d69>>HoX|K;P5CQ0$l15WEF6C^$w$K*Cp)|Syb?wuafgZ(%nB)B3ZM14jwD&_ABJ@?HyP(eF56TWqe?m`hF?!$>+LDmKE`!H82&A#RD8 zNHOX&nin=+#jRpYHVNUn7?tkgMz(a=I51=)Vma38#Xq z(HbwFFE4C(=^Dmpv;-+hF4e$I!cVj$55MyESFO5Rd8%*8kf>?X%@WV6yMKOnub)$e zEufy^Av#SuLs%}=3@OnV@n`}DA>-tz!+dm%5GdtRo8i*Dg%RwbH2#Fl<~DPX2NInuFLo`T4wuH1tWt}nodj9O zxL{=LvW&T2ji@i*e@(j#v~Q7JucR8?^VkT9hX49(oc}TzQyym2os_ZyRt$76 zC8>T6A(zeL6tqctgl>o~BVWA@%43pMqr(agl4Mw!X59Y~S>SMG&8G~UaC{`<&7jIY`!sEE(CVYU5y*+S80PZf~Ad-0-#l7hnVeTM}NOF~M>FaIhQmu-UBg)6|6 zr0{;?j*O!5sA?byqu2Fwot91SYOkuAMmSVLfT*vpV2O!H9)tZVBPaLFrAMm|fa@l{ z!e_LTaP1jgb);fD$rO!*djy^TgkdqJ!4N}oID|20 z1hc?1m2~DjcrFn(+RxY5yV&(@L0bCQE8>Hk*Q~a2+cOgk4apif3lbPr?QCqu;iQ37 z6;Qt^qF`r>K%g;aZ|zN5fY0R$q#!FgC8AABzKDybA4X`jGa;c$MtA{jtCF2|gvyEP&G> zxB2GrEtEB`5vg`>*^q(-i!rzhX^1EH_|D76H{TfR>Y_bctYBheo^@) z|FRefUEpz$!wp>jr%McO>=&x$6-KNQXCclEslDsnb^mb6D*klJ+^wEWhZcq03p%e| zw8HT41@FdP!iS$-I;Zp=kjxR*k`V7ZiSwDCb6G&lMaAXmqc~>_y2zd)f@mFo-i9Ba#!)24} z@+-AeU?$-n>z*ki{q!jg>rw1LCjE<->-V!|VxLgr8H#o)=}rYd5OZN*W&I3vch&vA z>Z{d&kDFl+LsgY=sXjXLFA?7iBtf1>r_b|+z?NCzuFC4m+$lYJ4d?IM3JCLQ!j4Vh zO(y?FVoT4-6Zhc8DL~RW2Uu`&zAp zjrOQTQE<1+-N6TY&+VPh=i?9OPa;I$fLlnU6<2+*d+3ssUQUssU@#P^T*jc#rKjbu zTtd>jpZy?DiaoVlEZ5o|*4*5(-OLhZ=G2Bx{ppfBv_#8OO4X&1YQ z7NU&(QuoC9rp%seJf}@3+SboB2eYAvPezkQjgAPTs>InmWCbf#>TA!4Z~(dzQZ;xRq_zAkESs45)YfOd1q@F+rx-|#7k z3-5YEHk(Ul{IVpLV1htFDILy3#Un8aW~*?k=~`e z%KwlC`sB&>x?L6TLTmSy%vl76swsn^=7Q<}E*v(#b5Io{DTKE{WgSvull|sh%z)U!-4^vM(WMjd)=bW$msZ*2OkU(O=|h zb{yiK5c-9UQ?X>nrhhr#9EO4w$BrF$6tEtbU>iqA6Wr&%cRmRJ_$f<*fNJ91HyFZ`_U=?S`D3Eg%-tabNWJ z`v7DcTrHc$Z$KE&_F2rRa-&Gp9rzGL)W2rK@Tk?QM2UR8Y=5Wa!lw)>dcqlqp7l6; zmgm%UAENi_WqU4(59lUjLC|JB9A)Q6hflt_)foVqJq4MXlF|rg8ILi$5T>#CCjyPG z?R^XLl<#-t!}8YUjYSv_5)~n2qvJ^|X|$LWKVWyZ<9u~8%Tzlis;(B)A#m zFgBuH6C!wGM7z?7$5r(NOYUbgTNnXD!3 ztiHyV79?K?BUvEd&gEuGntNF7y2tNLoyQBgM@<}x1K*;IJL>tvTe`9Us9t$gF`_k$ zjHtM^$B)rRT%Avb!$OD0--6CJZY~tOB618Qx3>^?sIRL7&QS~S&GM(Pbi%UIxGsj= zGac3EljTSBVtYm=+-nYj;IV1bYFV?$gmTV%-!JK(UQC(ZNR5ho1SBD?Rf~$|mh9ds zTT#=vO9XybdVBvchHn&I_K+N*q^#!Ty*U6~Syn+E*Z%jXOfrZa6H#xoJ6;)i%Vru#&xEQg6B;fB1As|NwN^45M9+tF-j?Wy_E24?dbkz)r6ebtnAFkezBi{!OiZB> zk(kvP$810m8gj)V3W;>z23vpofuhq-*A^&W`xQ?{$e6c!-fZ{XJ=oibla?>6xtbS9 zR4VcHeY4wVQ~QOckZ|b(`nv-@c&`gJY_ZtS;!d;XHegBu7cYXCF&j#~IBMCJ2?28p zmz2fa)8Hv-3?0ua>fp&%z$o{bDHAAY(B5ImqR9_&oK_Bdm0sg8}7YCm4(RIwnv zsDGE9oM%xg^@|)p^w?$vN4ZXDUCe0TQ6)FwE?RF3MsN@jb<$l@iiK!y=(X;CE<3w4 z3(t#|mb5&81+YN4cU`?0Y7S-#g#}7eeiAHIuOwWlq(@9F6z1BF_2xJGX00<`FTb_C}{rFbSrx2I7z>ZwW)|mSi z*4w`Q^=rYmBZkd;Z#p;8y)XmRwJ!1-ZqR;xZugZ2KR=xCd;-(M+YXTr{bCOa>_%Hk zSt!WdzTQL2e6>GG9`jHKi}{5J(fidVqL5o{=l2xO%>Xb&W`7RTW*jO{lo^y{iZ37p z&SA#n(SPeAVW2qd^~P|)sTPwF8H~-u6@B4l{sL3SO0}^o^hz*2JzWt_ z@iV)QNg@WV7`_8Imbgu8tX`yYqvF$7av|k57R-D^Y$z`G#io+-%AA{*EcN_h6zl7j zsRto;)|ALg}w=M@fSaya@O0P+jeYG$+b@R1`MwHY zA7k3-0b}J=^<`PD7pz(=p~Vi32K%I5iN{YZ+%-a?D2GoBHILKMF?;s&K6Tm6Z+v`K z&7}8=HSVnG(ERDG;JjEfc~B@yK3gO7=UX)~Z!CHS>JMvqr|mQa^Jl%qv!d2-|Jl)#r-}iucfAqME4qKSQRxD#zHsqJ$g36jGAV? zNiugweM_`PD{z+C)jj_5#%I32fr*F-U808Wx+EKCP}oGU5PiFFx}I2$Z|RTSH3f&8 z<>V-@n1fn4J~wL8KXnvCds(4E=(ue`SLrz&onmFCp1qlB-G zi=Jam#+35R|y|hEdoez+-;8GK3gb2UZQ@mX7o^l3(#{y;IcqJCd zxJyS6G|dLL*QfjQlz5-s$F+|jcIUz{xoj+6s67{Pa<24(am|P z?t(`FY!4JrGa;aYmfwUN@k(GqQ&uoXD0(Y6nwPVrLc>k7%RetJEWcAGnP4yFKCkFK z{)n*1AOb#7=tIhpfjI4?5P?ZXv{${9^Ik}uu9X@wlFfVYxsDb2(B#us?Z-?6KPYTH zgs(CxUw`JWCPxM{rpMIS{|r7GVv61+x`ca#oG0d_cVk3vMvM_9u$y4yXz=pzKsrj^ z=9;JHBOPV?2WzOZXRFhrd5Sme?u`K(>08$w(FPLc7Vj3FY$l&d)uUc2RqB*dbh0~$9f;@@cb=LU$!#%>i;j*4 zuyKK#i2VqiR>}N9=8dY$9sU<8s~`OVI4MJ5%8m1}6w#qeqd){G$TW{di;&|qpP--{ z0L1KP7Vc^Pe#r5Lb?*exIIvel?WRK-!`llyImufc&6RCOwbN=>Ios0Gba=VAP+k+W ztckJlA`ahynrx|n4oSSC+>nJj-2@;oP+?<2nSGSH0?NMAtKuO@qutPugp)PoI=6X~ zGsFZzkKKIVaSRwRPId;{%cQo}*o;(t*oS-PZ6>aoS6j4-d}EcklTVU8E)aNvDsjoNzOu%_b90hChjQa{;1SV9TthtC(Oc))bLcfA2B5qeri2o5~$p{K% zt7{hGskdyR5EC0bRcmUbPUnvha<=j&I3pYAKb$Q+EV&^?h4v2rB?0NxRDS0e!k%kc zME1$fS1P?WW@Q3e=)2!Rtd(X;M^>7#c(q(C#bkk6{#!nsvm%!sQwAY!sUp}bo`-9X zud+BoESskk;OM9B`9bBO*5@JWZA7Il$b%G zB;FEn+J?aa%%J@TP{bI6=xO*xz%RanXbU*gmt0}sNTq84k$5R*XJrE4Lg;01vk3F) z+-{qcY1|Tx?n0^QN(VHgjLSp4?vRqjF22^VnuLpiz4s!>)0-25R1k%O!R$1>=o=Gt zwweS#4C}5;(8U5xG2`^0vV{pZA8%2PTV6p56WaM@WvOCj^oO{=f+<%%Zh{f-59JCW z!~dA~+W~|shIlv*E)hy?2Axrnbr@$86Gyz#TxwsrT5;bxdS603prBv@?Zpk8316R(8Hj5&G>IO(=k+UdKm@*H`dmw~n%i89fE1Zzk4I zfsy>S1^frxU-ISvmC4P{{$y;iyIUc+*1=cG9hsOI>~lv>;4I|i#S!BkR<7PkWro;` z%KbCrDF_8PKDa_u8q4%2l+A=4qNN*)r|oD_yE+I_NSuqA6!y;J3C81shfQiXRMQF}kA zP?DdcXuCQQNgS6EA4CLaqXJ(T6KSmlX-Xb)wOHxdmkLMS0g4Oo!DxeDqZ$5cW~q*` znkch`<>i38`rV~4hf%3d4Z?ik6Qyv+!nQ#7L5&L&D2?0^)2p_`HhxKMN-28wrJMS4 z;e%@#3|Q?Q&V_^Vh*cQDdPR1Ey%3L$d){xvT*ta&${VLBweOxultY6N!wBN)L{2EQ z2QZLev96Iy*lA)&pdien4zaezg5z&$snto9dex&s`)1YTP!@(+)^OFPzCt@@c2wk@ zq(?Vyy;H>)rGw-Et|ZR66>eJ|jmV@}rM0|7rk}=!5jUe*6`f*x(V#NcYN7I*@b3Tz z5n}<25miPpoMgM&VrpzIJREAQ53dX7PBRg)V6;foKta$ti&jsMmGNar;($u%wjsIf zK}^2#z$RXI!94lA_Kb-epBrojUPObt6(qur0gQV4eZbz$*(RXC#sYBnWcvkgyRMa4 z2_lhzL?b7PX3Yw!}u$ku)a_!_a0jY~cP=nmkj(*3kXgZdy-pHO_hvGFl-z=44FL{eTXmZ=R5tqqfjl*4INL(Tzvh{hhSa@LV|# zQPC+!W55zZcGbYpknHWX_%2}`aW$&UW;XUww%|8OScySZh#fa8D(>FqD2UfmxSFS> zrmi@kpZcT?e;3=#c5jr8+88fV3xfo?^tp>k?n(kV?lgfZr3@u`4UN915N2xlQy4@; zhTI&?7tf!o>FB%%&!Pyo3P3HHBMm-2K8g#LKw+vxS3ri?#q;6DB-31=XUf3Ewb%GV z-HLcCkMW(M@-L&+R9cc+5G&TniS?(u4W}dVD;HF`?rWFK&F?2A-F;oRy6PDBR9Rd+ zY;=Tpc_a_B#lT1$(gpz#L7521iodi?V5BGwVSI3zlhX={cLkzTmX|xfyefH<$Q)&u zl(Zcm7LWTb8d9q!;0tLth2OP*l$cxC4Bc(!zZ-mHQH)&jCnzMU;P|URBN;Eqjw- zZFzpV%2}zvTcN9$DVGnOJDg~3Gx=3cBV)$!9`a0qBFleSK+L>9EugFS*gZ(Htp`Ya zP{6Tx)J8wRazR`tX&)eI!fD$?lbS!XVoR-|2~Z>)u)}0a2l@@>s2=?MU_gFQA7EUD z46q3kg{7V++E1)WL-H4F54VJhp%lw+ZWP&h$N4_2AlnoYP@%nb(Ha=XsM(g6ifl8h*iq|?URp&)E|Cv= zg+=`F;>yPXQWFFZ>o&n5Y9yA_aW!{5?a2GC_U?HZm&a)0{{E%J&_#P>4Gsmh!H?U@ zR2!5E1^Ud8z~ewQ(Ud*^%tm;MGn6~`q^uz6){sk4qr*Uba^vGt?;-B2B45-1%ZXp9&(WF@Gpc&1%iRKvfnEY%gN_N2U%$@Hgk3<*G?xL-x)$PK(-purUV{nNEC~{v zBU9(f0FHZyy8DP!6fTMl<(0T#7E#0<>K3B1j4<9PhwfH|>v_CCpo&sNx}(8s@7fPd zIuDEh+YMVSD&spK?)E@$q@p4aG*Hb0;xYg<^95oAm;>E+4`x_D$&k%t#p40)sfiiz za)(gG1^~w20gZDEmGRO6Rfei60{pLjL!xyDsM!n%>B~U(10F`s?H}JEMbInI_h7Hk<*KQU*|;Hg4&97vOKVpmOCbRF^Kk0?-Q3gQux1Ea6zcF$zIwLJ^E%Z6LC5&rT9a;9V*iMXJpM-d`mR7XfcVq# z?H`|SS4y%1c_xY^5VUg%at_jT~ zqQ0v}=YTMDM>#ofAZiCxVY5bui8eqw4rR{26(V4H`Of>pDXkQ1dl$e{popv+P!F^8 zb@TzORTO}32D23kQV)EvG;NFSs;z>A-xGUmq9bB@}RWurpf?mpD5Hod6%HB~a z89@C?gbh2&x`x5yCtw!|7D80+Xqf;?Tu3l%h(8vW8r>b(*9`XVm!KSY+EF1V!>#CZ!0%5^leMD~rB?ar zD?VdAr%5KC<^wIX*MiOH@gV?3XCUT{A#I?na?#IR$v2Pz1A=4|6>!LLSN<6A?e(>1 zbr;E@;MY&Qozl0#yQs#xGea+0J`pvJcAn(!?6}m5*R*@<=$z2VJi$#ZkJA2*nvyOZ z7@)v0@?@{TAFJR&o#|8Sv5!FkD&>6yQC2$&zfF^lohtdIvOX0;(Dt!VDz)tcb&frq zs4}QEI&@i3f8W(j?B7udq9TZ@r$ZHN?O-6NQCR3{Z!*AF2O;vIFI)qYCF?&&h#y8M zCPsA>wWjC>U@-C>16F11w|kfDC31o zRIn(`M}WEvK(>>9)|?Po{B-Ilz|OhS@hkOtY^@lSkp-(q zjWwP^R*o`V2zA9wiB^VwqnMUg!2WzVaKU__vi!Ti!08g9BLg-Z>aLpnXyKg(EQ0nULlu;ZMpe820VF4Tc<9X%JHlj_M+ z6Ke=s?#V+oBKJ-17m5kJWmJhqU7*t9wLyE|(AnxqX9omNZ%(uk`RqqsKFYV7#kX8* zc%^;#iT5_HIB{LDQwH-Un6;bSvzLs>`E)4GDWxi*_9BxNmA&^}cQ#jFtNv5Rudn4m z&9v#ZlEOkndj%@f09p$ES-NileS%6aq2i~$L6t)kRsjsO@bHS{&tUixZ;b*@dZzJ4 zz2&dL+1HqZ$#@oYCi4prDh|zb{TDeTYP`P#ga2)nh2#w+vGvYU@RKBK1c{R)XrqW- zIj$>KZKg!1k&O>xK#-kUD4o0dVKUg-comAigE9MzkF$-kj~sNCjIu6ZvwvzFI^5EC znD~iUFMbiiMk0c+rTJp)^$mZ0+y$;CR#+z1uSP{M5h>t=HN{;%UNJG1Hik41(C9vc zQ!VYen*LO)x;+TU=T}T?Xw>1O$%mI%$F@5@?ZUEWv}piVj;WmtfZ*d~e~5LkafQec zoYr~;x{F5AbDt&jc{D7^=>91nboUGx@^TXmKx#w{v5w&f_Ha~GRHFf66m`)9-|cZg zz*H&$Y+c!aT~it0wXY= zF}wvZisBO>H{sTL7=M$uC$vxJF8$1Dko9|Yy`4PpwgPue()mn-RmtGb@Q!WkEQbkD*Hyk_|UmyA%%R4TW)bV40#-1K_vHPNqO zpnEPnkeFywnRrIh4rgY0U+}t9asXF5RI=ej^6^kZ6A=5Pj!_%J-^it%iLJWPh&6@x%(^v z^o`;+M98joGMuX@d_b|)*~4@+YSK4ye^;}AV88)Nb1!s~83=#fi?u|r8uiD@RenxG zWp^>#djWUH=0XSc8dmh%M|^V^uYbPM{)oHJU>b^)^B=eXawKC@uUU8usn}#HAIILl z@2M)u&1ba_b;k3E&4JGg#lOR3jv>j^Ao%mj5GWcAw0PMb#Q<&X(<>7|Q>%VJA;L-W zav>slpZDW~VGW=A%3r#d zPLecCSMr{fOG zyzINoJYEFA+AGFXUonc$E@~2EEIC@Z4GDWIJm9~o63=jbxeH2N%37H>t>8a79f8lc zMZb;5DC>Q6^5*>yJ9H?P!Jh3@DLZt-pNh#P%a$Gu_eWD=gF|GdZUOl9SNe$SN2q|K z7PPsPw46_jEyoue&Mzv)*JfO0E)Kj}DZPb@ri|P{G)Er@ZMuYCx{bU%Cw8AkXV0GD zA4S1>3jY?^vcEDDb{t4TPA896mP< zl}J?YxNkS*6``)vU?TE%p4;LZmfkb>FOrucTzJnXaV-!z3r1y_cvsO3ehdrkeg5@Z zcws7}Q^DB=bHT_qN|p-9V{=Epz4V$cmC$2m4B4qOsL5GDY&$!RsC0f)`ip~!2@_Z*4Yu-hn&Bds!sdH7D>=omcWuLxnr2lGx& zGlcIT=jbCVsu~0q1-A$%L6#L(I=M=mE|Lp}_)^IEX6t;c`X)d)*Uf#H7OAIG(o?|a z3Y8C$?q0HH+3i`n(Ep);Xm<=F*m_A!W@kh(ov<&0jW!;xFKHCS@sz#^)J9-p)sUdg z`yk7nN!{TwgE}Dy9R;eRUgX)A-+F}4>2K#?1}d2z1*ys*+~??ZLir9t%;+WwG-o&O zM_z8&^;3QJLg)hQUB2iaP`W%WhgPxZNsh5*O!?iX>SEZW@9;m6IA0GGTFBq-Rj`xd zWrTaqo^x?ry8M+t_*v)N-G?OLLWIb3i`j9Vm)FgJVBg?58eh(6+Se;ZSTo0PcMaMg z|MX_u?IT9oD4xjNUBvB%WxykIF~K>N-$X&BR=rcub?<92d3n+~)g&t_OvMY{b6pB{ z1oU{7gX?t~rC8al3r2#~{O_D7h=V9*2dAFB6s`x349bR$3rcmf`Y~_y5_QtcVYEQl z@6}WNuPg=NneASAI^b`D7?@-8qNJ1<#{Az83+sr!cY*|Qz&A1 zBIl4rP@v!hG$khwkMY+^XGyb7S`uU!+UKr2^Mm?oT@>i|v&_N&gIE&87+-62WvS71 zBc?Hzh6b6a zh(**eu-oTwdT-rV4E>(^jHUKVhHxxKp(~vgx5WCB6lTWj^_oQZPiy{NJx zHPd#+Jl%HS=E$JtZanoxBQjNrj#ETulNzMMN4_z~J8kX9t%W^dYZKeOKU#dA)pX$E zU@ieQDUQZezZqwyv{k-c*85(ZKX2Ona|r$~mpcFK@9KXpb^gz#&i}d8`9GIB|L0QY z|6J<)zrED?&!fgnz?}UGoV4GeRuu6cr>*C8;F!AJa(3hKzUdgP|H7%8kEqRLMb@-K zkp#i6#AIIntvipT!Ry6qgl6o`*Vu`H0hBoQL=Ss!UKW%*G_9IZNyIc|Z5X&N5X+AK z)Pvpd!K>l5H}=~1zG|gtIbeyeb&VQr9W@BOmhal?Az{+xxhnqF=T;q?t3S>ux2reh ztgFTzS<_lvQZ2GoWpc6+y7xiIAzgPT`f=QxhY{}S>dcL^x*e>wbQ!8!POQ^KH}lLDKixa*Ga4FG*DA1fs<@!WU3{Hde^fdyDL>?L z+AW*ce!c8=j}G2O3Y55vbm8#5WFpf{ZBCtgK;g~8rMWPIUGO$Qs3l*k_+e=%!%c0~ zes>M^@IISI={aApV^*YBE=k_jHvhEi|9Bzg+uWm(gC|la8rr@$72EHQyub1xcSm3- zMY3|Y$|EN04OQRP6Qt@UJ}p^JnipmLwdKx7`!MYPxMlm#2_f~O5GI4k*h2g{ACu?#Yoh=4%?Z)F*J4`(LnYu(hHE0N8g6SFc2u(7UkqdeIj=rxl z%$62k(LG8OeZU<`E;?RA^>6$oTa6c$dc^ACXkHyV~}LrOL-W0pBj$zL=eN zc$q$WVznS@(I$gCb-QfnJnY#2zJx9$670&u^4YwOh6cB<|Fodpt*tc6@AOqhq^5Y`3i^r zYx!>auDV-aT4=J~U74WA%$B3ygFE_)>dBP3U{BozN}u9(l|cc7Sv?wK60w4Rl!l6tH$Kcg^4pu1oEGyXe%6w@^W~eB`N!Q>w#mfJ(NV!&FFjAFFCO@zr0AP` zQLK|YZ##dRg!cY+Zo!LpC;i2gscS0KPdy&LYz$wlVDJ<)dQ;wB@T{!-kSJ0J*EZ4f zv-6^cXZ78Rdh5fwa!jMO1DCgXp6M;@QMb2ia^e?{SBT^@W;g6kf1*;bSp6LHk*KS< z^b13lX!2*O#5RrYrN=%!;<)+m{X0Xv-<_{pi#OUzYK`i^WF%S;?0C}~I3u?BW#v9& zR@nW*R*Yvm>vs-s2YN|%(ngx%v#-tPNwvl$ElPZUdTlN>&hE|YG?#lfuiXv0*(GB2 zMqzp~)oygOb;hZnt*`_TrQP`Zk+!cLB2hy-cwU5S~DWsT#5`iej#IBUHY!NaY2`K3D6&_l&N;x zrzw}Jj@eVfdq1BVB76pG7dg%tgePaqQX1roNWm*rMN3)cObK2|AnInhe-Lqy$Y6&f zvmo*$Vn)iUN?A$)_`Oc@{D@gyxz>JW;0Q3m%hKoR9j>DNr0gf%THl{2 z!OlmWhO2qRINu!yTtRRa^YbYuI$HE_yu!AFT!Z7_*MF z&TaIB_D1l=6QVf zU2_HL=*bYP-cn|w$6AzU!j!qMAk+HMGV$bIV#J*LSgB`pcjM6uflNFVnfHnY=ryOH zb7w7j6;#pP$9JJlVMnSA8O>2#levQah^U@V^?ntzfL0Pd z{jnV07hmN9b*7isbA{E}lH|VlX^gmq2+%aJ;9gU(zd$woSfS!=_1LlYu3!IkxnpPh z3ABCmBeX!Y&KuY7HhX^LDxJrU|5_LCwyL-P#DOGe0ZON)jhsc~QlUrUGmQ3!KQ;_!;Pb&h6ZgcDE&-04+>Md7Wlodgjk3eV3MT0;2|NG^ba9Oj5kL>At ztWL##zqI0{yu2YfTRKNw=lp$&u~wexBl>>etYl3-l$gCL*6HWE-u(0^cgBYBrDlPq z63ZD4IeY`nx5-O4+toJo7BHuTZ^eX>&^##+Z*6A}De4w0yi=hJQHTc z-e>73?;?9IBzB=s?5U!E3-%?R4sT4c;kq-%MBYP~&)TgWR8`&UDxcb4_o|{Ha0t%B z!jkaTVnLkLq2J#&!NLASSY{(;))bljG(pB&{Eo%cg_h-@z9Yo#%GBzf^0M&&6AU6w z2l=}M+%*I4a~zBwY8*H+EQ|E17IO(pt&Yuv6{d$O3di0yyiU9xb+g~;vM^4j4vBI6 zUgyMo!sM5UGQcr>u+J1$B3qGNw72}~t8cJrc0#z&3j>A8kFna&4KE9Oq9KAA0WjJ5H`aJPS z8L|F`-KfaL*$WH{Qd4(03#=(_E zWpMhAAHf=r$VCLw70Y)(4FF%E?P`PK@-d#~ft#63=OlUlrod zu27=rIuMe<*?C;sUHfEsKB7;!n9#P&WTffLYAlfH`>_k{fm^QX15=FvR$9H7n6S}UVN4rOH8D23wQK#-v>h0YIF+?dbynP z^p1#S2Vs*sf>7+S3j^eXqa>*=pU{flAS^JdWBrDTloy}q5XSpny_a;d+4YRr=hJBp z#%rU&m--vy9L#7Cl^UP9FHx=0yp|vma!Nw?Y_{N1T~(!{+fO#V!R(DchrRaX)XWo^ zM78R!wk##y3DVIFmcNXB5s3ur;>NaVn?CYrCR$q~Iy2P@W;Aii2V4LXV z>LfqsHtM91m8-ckTOKJp-25)sUhF*ngy5FGY?dS6`tBA!uR>DAWQWi2^lexezf4DT z3~s0}{EzUf;+a6ve$dl{u-8z=<6mEw6mL1X+d8>lHuQ0}b+bTiFI~;Q3ix_sG;|BT zr3YIQ|1aOh0Gsqz@b+i0_Mg94C`jDmj){gAaqd^e<3GR41W{5{7#QUT!NaZHuitX| zyGN5&tIF#`+XLW1bbo#wej0$n$|wnLj@NB${}Dp|`HKKb(BHNvY0mKS5;q!}mJcQx z`Cp`&5&T8kPdPt>vj5zrmv969dg8#KR;lC3XaqYIfEws)C?0~FIRP24|J&2KyIH-+=JO6-5$%MO{p zN&ZL2f0O*{t^Zl+{Lhk|v;G~)Keyojvdy0N{~gIccjNz3vj2mBNAk~&`M;EWC;Q)# z{Bw`~FC|Ch{5z8WXA9tep267Me@F7qrTo8ab8_ClBl)LJ_`gd2>nWO<|L;itr5^rP zwDVi5SP|IMIba(8bx-FN{1W|_JpFIK{H==k=PdW1T8yIPmpFHO+nct$KmY&RISgmn z&lf$K7V>ygiWdzH^&jevj6Yw&4}VGc?Zym#)QuUxwDO-4{`r=TKPCL1y}yHD0{G4i z^m2U*+HwVi2`}Us!38H$IKWyhNJ6;F58UDN8rUENwPumR2WtqUPwpVx7N!m{0cENO z-D~Jm5eV(iH6YqiCMM9eqYpzPv}5O@Y-70{~m_5AXm0 literal 0 HcmV?d00001 diff --git a/doc/cheatsheet/README.txt b/doc/cheatsheet/README.txt new file mode 100644 index 00000000..0eae39f3 --- /dev/null +++ b/doc/cheatsheet/README.txt @@ -0,0 +1,8 @@ +The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013. +To create the PDF version, within Powerpoint, simply do a "Save As" +and pick "PDF" as the format. + +This cheat sheet was inspired by the RStudio Data Wrangling Cheatsheet[1], written by Irv Lustig, Princeton Consultants[2]. + +[1]: https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf +[2]: http://www.princetonoptimization.com/ diff --git a/doc/data/air_quality_long.csv b/doc/data/air_quality_long.csv new file mode 100644 index 00000000..6225d65d --- /dev/null +++ b/doc/data/air_quality_long.csv @@ -0,0 +1,5273 @@ +city,country,date.utc,location,parameter,value,unit +Antwerpen,BE,2019-06-18 06:00:00+00:00,BETR801,pm25,18.0,µg/m³ +Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,pm25,16.0,µg/m³ +Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-06-17 04:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-06-17 03:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-06-17 02:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-06-17 01:00:00+00:00,BETR801,pm25,8.0,µg/m³ +Antwerpen,BE,2019-06-16 01:00:00+00:00,BETR801,pm25,15.0,µg/m³ +Antwerpen,BE,2019-06-15 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-06-14 09:00:00+00:00,BETR801,pm25,12.0,µg/m³ +Antwerpen,BE,2019-06-13 01:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-06-12 01:00:00+00:00,BETR801,pm25,16.0,µg/m³ +Antwerpen,BE,2019-06-11 01:00:00+00:00,BETR801,pm25,3.5,µg/m³ +Antwerpen,BE,2019-06-10 01:00:00+00:00,BETR801,pm25,8.5,µg/m³ +Antwerpen,BE,2019-06-09 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-06-08 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-06 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-05 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-06-04 01:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-06-03 01:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-06-02 01:00:00+00:00,BETR801,pm25,19.0,µg/m³ +Antwerpen,BE,2019-06-01 01:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-05-31 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-30 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-29 01:00:00+00:00,BETR801,pm25,5.5,µg/m³ +Antwerpen,BE,2019-05-28 01:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-05-27 01:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-05-26 01:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-05-25 01:00:00+00:00,BETR801,pm25,10.0,µg/m³ +Antwerpen,BE,2019-05-24 01:00:00+00:00,BETR801,pm25,13.0,µg/m³ +Antwerpen,BE,2019-05-23 01:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-05-22 01:00:00+00:00,BETR801,pm25,15.5,µg/m³ +Antwerpen,BE,2019-05-21 01:00:00+00:00,BETR801,pm25,20.5,µg/m³ +Antwerpen,BE,2019-05-20 17:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 16:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-20 15:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 14:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-05-20 13:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-20 12:00:00+00:00,BETR801,pm25,17.5,µg/m³ +Antwerpen,BE,2019-05-20 11:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-20 10:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-05-20 09:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-20 08:00:00+00:00,BETR801,pm25,19.5,µg/m³ +Antwerpen,BE,2019-05-20 07:00:00+00:00,BETR801,pm25,23.5,µg/m³ +Antwerpen,BE,2019-05-20 06:00:00+00:00,BETR801,pm25,22.0,µg/m³ +Antwerpen,BE,2019-05-20 05:00:00+00:00,BETR801,pm25,25.0,µg/m³ +Antwerpen,BE,2019-05-20 04:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-05-20 03:00:00+00:00,BETR801,pm25,15.0,µg/m³ +Antwerpen,BE,2019-05-20 02:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 01:00:00+00:00,BETR801,pm25,28.0,µg/m³ +Antwerpen,BE,2019-05-19 21:00:00+00:00,BETR801,pm25,35.5,µg/m³ +Antwerpen,BE,2019-05-19 20:00:00+00:00,BETR801,pm25,40.0,µg/m³ +Antwerpen,BE,2019-05-19 19:00:00+00:00,BETR801,pm25,43.5,µg/m³ +Antwerpen,BE,2019-05-19 18:00:00+00:00,BETR801,pm25,35.0,µg/m³ +Antwerpen,BE,2019-05-19 17:00:00+00:00,BETR801,pm25,34.0,µg/m³ +Antwerpen,BE,2019-05-19 16:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-05-19 15:00:00+00:00,BETR801,pm25,44.0,µg/m³ +Antwerpen,BE,2019-05-19 14:00:00+00:00,BETR801,pm25,43.5,µg/m³ +Antwerpen,BE,2019-05-19 13:00:00+00:00,BETR801,pm25,46.0,µg/m³ +Antwerpen,BE,2019-05-19 12:00:00+00:00,BETR801,pm25,43.0,µg/m³ +Antwerpen,BE,2019-05-19 11:00:00+00:00,BETR801,pm25,41.0,µg/m³ +Antwerpen,BE,2019-05-19 10:00:00+00:00,BETR801,pm25,41.5,µg/m³ +Antwerpen,BE,2019-05-19 09:00:00+00:00,BETR801,pm25,42.5,µg/m³ +Antwerpen,BE,2019-05-19 08:00:00+00:00,BETR801,pm25,51.5,µg/m³ +Antwerpen,BE,2019-05-19 07:00:00+00:00,BETR801,pm25,56.0,µg/m³ +Antwerpen,BE,2019-05-19 06:00:00+00:00,BETR801,pm25,58.5,µg/m³ +Antwerpen,BE,2019-05-19 05:00:00+00:00,BETR801,pm25,60.0,µg/m³ +Antwerpen,BE,2019-05-19 04:00:00+00:00,BETR801,pm25,56.5,µg/m³ +Antwerpen,BE,2019-05-19 03:00:00+00:00,BETR801,pm25,52.5,µg/m³ +Antwerpen,BE,2019-05-19 02:00:00+00:00,BETR801,pm25,51.5,µg/m³ +Antwerpen,BE,2019-05-19 01:00:00+00:00,BETR801,pm25,52.0,µg/m³ +Antwerpen,BE,2019-05-19 00:00:00+00:00,BETR801,pm25,49.5,µg/m³ +Antwerpen,BE,2019-05-18 23:00:00+00:00,BETR801,pm25,45.5,µg/m³ +Antwerpen,BE,2019-05-18 22:00:00+00:00,BETR801,pm25,42.0,µg/m³ +Antwerpen,BE,2019-05-18 21:00:00+00:00,BETR801,pm25,40.5,µg/m³ +Antwerpen,BE,2019-05-18 20:00:00+00:00,BETR801,pm25,41.0,µg/m³ +Antwerpen,BE,2019-05-18 19:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-05-18 18:00:00+00:00,BETR801,pm25,37.0,µg/m³ +Antwerpen,BE,2019-05-18 01:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-05-17 01:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-16 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-05-15 02:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-05-15 01:00:00+00:00,BETR801,pm25,13.0,µg/m³ +Antwerpen,BE,2019-05-14 02:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-14 01:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-13 02:00:00+00:00,BETR801,pm25,5.5,µg/m³ +Antwerpen,BE,2019-05-13 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-12 02:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-12 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-11 02:00:00+00:00,BETR801,pm25,19.5,µg/m³ +Antwerpen,BE,2019-05-11 01:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-10 02:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-10 01:00:00+00:00,BETR801,pm25,11.5,µg/m³ +Antwerpen,BE,2019-05-09 02:00:00+00:00,BETR801,pm25,3.5,µg/m³ +Antwerpen,BE,2019-05-09 01:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-05-08 02:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-05-08 01:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-05-07 02:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-05-07 01:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-05-06 02:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-05-06 01:00:00+00:00,BETR801,pm25,10.0,µg/m³ +Antwerpen,BE,2019-05-05 02:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-05-05 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-04 02:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-05-04 01:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-03 02:00:00+00:00,BETR801,pm25,9.5,µg/m³ +Antwerpen,BE,2019-05-03 01:00:00+00:00,BETR801,pm25,8.5,µg/m³ +Antwerpen,BE,2019-05-02 02:00:00+00:00,BETR801,pm25,45.5,µg/m³ +Antwerpen,BE,2019-05-02 01:00:00+00:00,BETR801,pm25,46.0,µg/m³ +Antwerpen,BE,2019-05-01 02:00:00+00:00,BETR801,pm25,28.5,µg/m³ +Antwerpen,BE,2019-05-01 01:00:00+00:00,BETR801,pm25,34.5,µg/m³ +Antwerpen,BE,2019-04-30 02:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-04-30 01:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-04-29 02:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-04-29 01:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-04-28 02:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-04-28 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-04-27 02:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-04-27 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-04-26 02:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-04-26 01:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-04-25 02:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-04-25 01:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-04-24 02:00:00+00:00,BETR801,pm25,19.0,µg/m³ +Antwerpen,BE,2019-04-24 01:00:00+00:00,BETR801,pm25,19.0,µg/m³ +Antwerpen,BE,2019-04-23 02:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-04-23 01:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-04-22 02:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-04-22 01:00:00+00:00,BETR801,pm25,32.5,µg/m³ +Antwerpen,BE,2019-04-21 02:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-21 01:00:00+00:00,BETR801,pm25,27.5,µg/m³ +Antwerpen,BE,2019-04-20 02:00:00+00:00,BETR801,pm25,20.0,µg/m³ +Antwerpen,BE,2019-04-20 01:00:00+00:00,BETR801,pm25,20.0,µg/m³ +Antwerpen,BE,2019-04-19 01:00:00+00:00,BETR801,pm25,20.0,µg/m³ +Antwerpen,BE,2019-04-18 02:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-18 01:00:00+00:00,BETR801,pm25,25.0,µg/m³ +Antwerpen,BE,2019-04-17 03:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-04-17 02:00:00+00:00,BETR801,pm25,8.5,µg/m³ +Antwerpen,BE,2019-04-17 01:00:00+00:00,BETR801,pm25,8.0,µg/m³ +Antwerpen,BE,2019-04-16 02:00:00+00:00,BETR801,pm25,23.0,µg/m³ +Antwerpen,BE,2019-04-16 01:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-04-15 15:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-15 14:00:00+00:00,BETR801,pm25,25.5,µg/m³ +Antwerpen,BE,2019-04-15 13:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-15 12:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-04-15 11:00:00+00:00,BETR801,pm25,26.0,µg/m³ +Antwerpen,BE,2019-04-15 10:00:00+00:00,BETR801,pm25,26.0,µg/m³ +Antwerpen,BE,2019-04-15 09:00:00+00:00,BETR801,pm25,21.5,µg/m³ +Antwerpen,BE,2019-04-15 08:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-04-15 07:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-04-15 06:00:00+00:00,BETR801,pm25,23.0,µg/m³ +Antwerpen,BE,2019-04-15 05:00:00+00:00,BETR801,pm25,23.0,µg/m³ +Antwerpen,BE,2019-04-15 04:00:00+00:00,BETR801,pm25,23.5,µg/m³ +Antwerpen,BE,2019-04-15 03:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-04-15 02:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-04-15 01:00:00+00:00,BETR801,pm25,25.5,µg/m³ +Antwerpen,BE,2019-04-12 02:00:00+00:00,BETR801,pm25,22.0,µg/m³ +Antwerpen,BE,2019-04-12 01:00:00+00:00,BETR801,pm25,22.0,µg/m³ +Antwerpen,BE,2019-04-11 02:00:00+00:00,BETR801,pm25,10.0,µg/m³ +Antwerpen,BE,2019-04-11 01:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-04-10 02:00:00+00:00,BETR801,pm25,26.0,µg/m³ +Antwerpen,BE,2019-04-10 01:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-04-09 13:00:00+00:00,BETR801,pm25,38.0,µg/m³ +Antwerpen,BE,2019-04-09 12:00:00+00:00,BETR801,pm25,41.5,µg/m³ +Antwerpen,BE,2019-04-09 11:00:00+00:00,BETR801,pm25,45.0,µg/m³ +Antwerpen,BE,2019-04-09 10:00:00+00:00,BETR801,pm25,44.5,µg/m³ +Antwerpen,BE,2019-04-09 09:00:00+00:00,BETR801,pm25,43.0,µg/m³ +Antwerpen,BE,2019-04-09 08:00:00+00:00,BETR801,pm25,44.0,µg/m³ +Antwerpen,BE,2019-04-09 07:00:00+00:00,BETR801,pm25,46.5,µg/m³ +Antwerpen,BE,2019-04-09 06:00:00+00:00,BETR801,pm25,52.5,µg/m³ +Antwerpen,BE,2019-04-09 05:00:00+00:00,BETR801,pm25,68.0,µg/m³ +Antwerpen,BE,2019-04-09 04:00:00+00:00,BETR801,pm25,83.5,µg/m³ +Antwerpen,BE,2019-04-09 03:00:00+00:00,BETR801,pm25,99.0,µg/m³ +Antwerpen,BE,2019-04-09 02:00:00+00:00,BETR801,pm25,91.5,µg/m³ +Antwerpen,BE,2019-04-09 01:00:00+00:00,BETR801,pm25,76.0,µg/m³ +London,GB,2019-06-21 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-19 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-18 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-15 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-14 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-14 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-08 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-08 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-08 03:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-08 02:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-08 00:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 23:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 21:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 20:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 19:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 18:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 17:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 15:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 14:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 12:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 11:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 08:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 06:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 05:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-07 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-06 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-06 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 01:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-01 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 22:00:00+00:00,London Westminster,pm25,5.0,µg/m³ +London,GB,2019-05-31 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 07:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 06:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 05:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-21 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-21 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-21 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-21 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 22:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 21:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 20:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 19:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 18:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 17:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-20 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-20 15:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 14:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 13:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 12:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 11:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 10:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 09:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 08:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 07:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 06:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-20 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-20 04:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 03:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 02:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 01:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 00:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 23:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 22:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 21:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 20:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 19:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 18:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 17:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 16:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 15:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 14:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 13:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 12:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 11:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 10:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 09:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 08:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 07:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 06:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 04:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 03:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 02:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 01:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 00:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 23:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 22:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 21:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 20:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 19:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 18:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 17:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 16:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 15:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 14:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 13:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 12:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 11:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 10:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 09:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-18 08:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-18 07:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-18 06:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-18 05:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 04:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 03:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-18 01:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-18 00:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-17 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-17 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 20:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 01:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-16 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-16 22:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-16 21:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-16 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 16:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 14:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 12:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 11:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 10:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 09:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 08:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 07:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 06:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 05:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 04:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 03:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 02:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 01:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-15 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-15 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 19:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 16:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 13:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 12:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-14 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-14 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-12 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-12 22:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 10:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 09:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 08:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 07:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 03:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 16:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 09:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 08:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-11 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-11 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 02:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-11 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-10 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-10 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 20:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 16:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 15:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 14:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 10:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-10 09:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 08:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 07:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 02:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 01:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-09 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-08 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-08 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-07 18:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-07 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-07 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-07 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-06 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-06 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-06 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-05 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-05 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-05 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-04 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-04 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-04 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-04 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-03 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-03 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 01:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-03 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-02 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-02 16:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-02 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-02 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-02 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-02 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-02 11:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-02 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 08:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 07:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 06:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 05:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 04:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 03:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 02:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 01:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-02 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 22:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 21:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 20:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 19:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-01 18:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-01 17:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-01 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 15:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 14:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 12:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 11:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 08:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 07:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 06:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-01 05:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 04:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-01 03:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-01 00:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-30 23:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 22:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 21:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 20:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 19:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 18:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 17:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 16:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 15:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 14:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 13:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 12:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 11:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 10:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 09:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-30 08:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 07:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 06:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 04:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-30 03:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-30 02:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-04-30 01:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-30 00:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-29 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-29 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-29 21:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-29 20:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-29 19:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-29 18:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 17:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 16:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 13:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 12:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 11:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 10:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-29 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-29 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-29 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-29 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-29 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-29 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-29 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-29 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-29 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-29 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-28 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-28 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-28 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-28 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-04-28 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-04-28 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-28 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-27 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-26 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-04-25 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-25 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-04-25 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-25 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-25 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-25 05:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-25 04:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-25 03:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-25 02:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-25 00:00:00+00:00,London Westminster,pm25,21.0,µg/m³ +London,GB,2019-04-24 23:00:00+00:00,London Westminster,pm25,22.0,µg/m³ +London,GB,2019-04-24 22:00:00+00:00,London Westminster,pm25,23.0,µg/m³ +London,GB,2019-04-24 21:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-24 20:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-24 19:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-24 18:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-24 17:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-24 16:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-24 15:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-24 14:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-24 13:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-24 12:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-24 11:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-24 10:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-24 09:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-24 08:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-24 07:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-24 06:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-24 05:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-24 04:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-24 03:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-24 02:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-24 00:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-23 23:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-23 22:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-23 21:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-23 20:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-23 19:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-23 18:00:00+00:00,London Westminster,pm25,33.0,µg/m³ +London,GB,2019-04-23 17:00:00+00:00,London Westminster,pm25,33.0,µg/m³ +London,GB,2019-04-23 16:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-23 15:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-23 14:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-23 13:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-23 12:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-23 11:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-23 10:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-23 09:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-23 08:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-23 07:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-23 06:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-23 05:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-23 04:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-23 03:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-23 02:00:00+00:00,London Westminster,pm25,45.0,µg/m³ +London,GB,2019-04-23 01:00:00+00:00,London Westminster,pm25,45.0,µg/m³ +London,GB,2019-04-23 00:00:00+00:00,London Westminster,pm25,45.0,µg/m³ +London,GB,2019-04-22 23:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-22 22:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-22 21:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-22 20:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-22 19:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-22 18:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-22 17:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-22 16:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 15:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 14:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 13:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 12:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 11:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 10:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-22 09:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-22 08:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-22 07:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-22 06:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-22 05:00:00+00:00,London Westminster,pm25,33.0,µg/m³ +London,GB,2019-04-22 04:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-22 03:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-22 02:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-22 01:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-22 00:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-21 23:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-21 22:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-21 21:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 20:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 19:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 18:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-21 17:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 16:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 15:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 14:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-21 13:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 12:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 11:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 10:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 09:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 08:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 07:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 06:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-21 05:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 04:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 03:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 02:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 01:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-21 00:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-20 23:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-20 22:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 21:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 20:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 19:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 18:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 17:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-20 16:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 15:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 14:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 13:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 12:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 11:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 10:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 09:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 08:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 07:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 06:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 05:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-20 04:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 03:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 02:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 01:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-20 00:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-19 23:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-19 22:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-19 21:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-19 20:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-19 19:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-19 18:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-19 17:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-19 16:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-19 15:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-19 14:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 13:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 12:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 11:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 10:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 09:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-19 08:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-19 07:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-19 06:00:00+00:00,London Westminster,pm25,31.0,µg/m³ +London,GB,2019-04-19 05:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-19 04:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-19 03:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-19 02:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-19 00:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-18 23:00:00+00:00,London Westminster,pm25,45.0,µg/m³ +London,GB,2019-04-18 22:00:00+00:00,London Westminster,pm25,47.0,µg/m³ +London,GB,2019-04-18 21:00:00+00:00,London Westminster,pm25,49.0,µg/m³ +London,GB,2019-04-18 20:00:00+00:00,London Westminster,pm25,50.0,µg/m³ +London,GB,2019-04-18 19:00:00+00:00,London Westminster,pm25,51.0,µg/m³ +London,GB,2019-04-18 18:00:00+00:00,London Westminster,pm25,51.0,µg/m³ +London,GB,2019-04-18 17:00:00+00:00,London Westminster,pm25,51.0,µg/m³ +London,GB,2019-04-18 16:00:00+00:00,London Westminster,pm25,52.0,µg/m³ +London,GB,2019-04-18 15:00:00+00:00,London Westminster,pm25,53.0,µg/m³ +London,GB,2019-04-18 14:00:00+00:00,London Westminster,pm25,53.0,µg/m³ +London,GB,2019-04-18 13:00:00+00:00,London Westminster,pm25,53.0,µg/m³ +London,GB,2019-04-18 12:00:00+00:00,London Westminster,pm25,54.0,µg/m³ +London,GB,2019-04-18 11:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 10:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 09:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 08:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 07:00:00+00:00,London Westminster,pm25,55.0,µg/m³ +London,GB,2019-04-18 06:00:00+00:00,London Westminster,pm25,54.0,µg/m³ +London,GB,2019-04-18 05:00:00+00:00,London Westminster,pm25,53.0,µg/m³ +London,GB,2019-04-18 04:00:00+00:00,London Westminster,pm25,52.0,µg/m³ +London,GB,2019-04-18 03:00:00+00:00,London Westminster,pm25,50.0,µg/m³ +London,GB,2019-04-18 02:00:00+00:00,London Westminster,pm25,48.0,µg/m³ +London,GB,2019-04-18 01:00:00+00:00,London Westminster,pm25,46.0,µg/m³ +London,GB,2019-04-18 00:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-17 23:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-17 22:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-17 21:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-17 20:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-17 19:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 18:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 17:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 16:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-17 15:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 14:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 13:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 12:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 11:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 10:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-17 09:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-17 08:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-17 07:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-17 06:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-17 05:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-17 04:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-17 03:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-17 02:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-17 00:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 23:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 22:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 21:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 20:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 19:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 18:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 17:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-16 15:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-16 14:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-16 13:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-16 12:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-16 11:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-16 10:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-16 09:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-16 08:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-16 07:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-16 06:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-16 05:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-16 04:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-16 03:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-16 02:00:00+00:00,London Westminster,pm25,31.0,µg/m³ +London,GB,2019-04-16 00:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-15 23:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-15 22:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 21:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 20:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-15 19:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-15 18:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 17:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 16:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-15 15:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-15 14:00:00+00:00,London Westminster,pm25,28.0,µg/m³ +London,GB,2019-04-15 13:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-15 12:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-15 11:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-15 10:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-15 09:00:00+00:00,London Westminster,pm25,25.0,µg/m³ +London,GB,2019-04-15 08:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-15 07:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-15 06:00:00+00:00,London Westminster,pm25,23.0,µg/m³ +London,GB,2019-04-15 05:00:00+00:00,London Westminster,pm25,22.0,µg/m³ +London,GB,2019-04-15 04:00:00+00:00,London Westminster,pm25,22.0,µg/m³ +London,GB,2019-04-15 03:00:00+00:00,London Westminster,pm25,21.0,µg/m³ +London,GB,2019-04-15 02:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-04-15 01:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-15 00:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-14 23:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-14 22:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-04-14 21:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-14 20:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-14 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-14 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 16:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 13:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 12:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 11:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 10:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 09:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-14 08:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 07:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-14 01:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-14 00:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-13 23:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 16:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 14:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 13:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 12:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 11:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-13 08:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 06:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 05:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 04:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 03:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-13 00:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 23:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 16:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 15:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 14:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 12:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-12 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 10:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 09:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 08:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 07:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-12 05:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-12 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-12 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-12 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-11 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-11 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-04-11 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-04-11 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-11 02:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-11 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-04-10 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-10 22:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-04-10 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-04-10 16:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-10 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-04-10 14:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-04-10 13:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-04-10 12:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-04-10 11:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-04-10 10:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-04-10 09:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-04-10 08:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-04-10 07:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-04-10 06:00:00+00:00,London Westminster,pm25,21.0,µg/m³ +London,GB,2019-04-10 05:00:00+00:00,London Westminster,pm25,22.0,µg/m³ +London,GB,2019-04-10 04:00:00+00:00,London Westminster,pm25,24.0,µg/m³ +London,GB,2019-04-10 03:00:00+00:00,London Westminster,pm25,26.0,µg/m³ +London,GB,2019-04-10 02:00:00+00:00,London Westminster,pm25,27.0,µg/m³ +London,GB,2019-04-10 01:00:00+00:00,London Westminster,pm25,29.0,µg/m³ +London,GB,2019-04-10 00:00:00+00:00,London Westminster,pm25,30.0,µg/m³ +London,GB,2019-04-09 23:00:00+00:00,London Westminster,pm25,32.0,µg/m³ +London,GB,2019-04-09 22:00:00+00:00,London Westminster,pm25,34.0,µg/m³ +London,GB,2019-04-09 21:00:00+00:00,London Westminster,pm25,35.0,µg/m³ +London,GB,2019-04-09 20:00:00+00:00,London Westminster,pm25,36.0,µg/m³ +London,GB,2019-04-09 19:00:00+00:00,London Westminster,pm25,37.0,µg/m³ +London,GB,2019-04-09 18:00:00+00:00,London Westminster,pm25,38.0,µg/m³ +London,GB,2019-04-09 17:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-09 16:00:00+00:00,London Westminster,pm25,39.0,µg/m³ +London,GB,2019-04-09 15:00:00+00:00,London Westminster,pm25,40.0,µg/m³ +London,GB,2019-04-09 14:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-09 13:00:00+00:00,London Westminster,pm25,41.0,µg/m³ +London,GB,2019-04-09 12:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-09 11:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-09 10:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-09 09:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-09 08:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-09 07:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-09 06:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-09 05:00:00+00:00,London Westminster,pm25,44.0,µg/m³ +London,GB,2019-04-09 04:00:00+00:00,London Westminster,pm25,43.0,µg/m³ +London,GB,2019-04-09 03:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +London,GB,2019-04-09 02:00:00+00:00,London Westminster,pm25,42.0,µg/m³ +Paris,FR,2019-06-21 00:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-06-20 23:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-06-20 22:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-06-20 21:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-06-20 20:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-06-20 19:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-20 18:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-06-20 17:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-20 16:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-06-20 15:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-06-20 14:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-06-20 13:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-19 10:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-06-19 09:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-06-18 22:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-06-18 21:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-06-18 20:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-06-18 19:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-06-18 08:00:00+00:00,FR04014,no2,49.6,µg/m³ +Paris,FR,2019-06-18 07:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-06-18 06:00:00+00:00,FR04014,no2,51.4,µg/m³ +Paris,FR,2019-06-18 05:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-06-18 04:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-06-18 03:00:00+00:00,FR04014,no2,45.5,µg/m³ +Paris,FR,2019-06-18 02:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-06-18 01:00:00+00:00,FR04014,no2,60.1,µg/m³ +Paris,FR,2019-06-18 00:00:00+00:00,FR04014,no2,66.2,µg/m³ +Paris,FR,2019-06-17 23:00:00+00:00,FR04014,no2,73.3,µg/m³ +Paris,FR,2019-06-17 22:00:00+00:00,FR04014,no2,51.0,µg/m³ +Paris,FR,2019-06-17 21:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-06-17 20:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-06-17 19:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-17 18:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-17 17:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-06-17 16:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-06-17 15:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-17 14:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-06-17 13:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-17 12:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-06-17 11:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-17 10:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-06-17 09:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-06-17 08:00:00+00:00,FR04014,no2,51.6,µg/m³ +Paris,FR,2019-06-17 07:00:00+00:00,FR04014,no2,54.4,µg/m³ +Paris,FR,2019-06-17 06:00:00+00:00,FR04014,no2,52.3,µg/m³ +Paris,FR,2019-06-17 05:00:00+00:00,FR04014,no2,44.8,µg/m³ +Paris,FR,2019-06-17 04:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-06-17 03:00:00+00:00,FR04014,no2,49.1,µg/m³ +Paris,FR,2019-06-17 02:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-06-17 01:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-06-17 00:00:00+00:00,FR04014,no2,69.3,µg/m³ +Paris,FR,2019-06-16 23:00:00+00:00,FR04014,no2,67.3,µg/m³ +Paris,FR,2019-06-16 22:00:00+00:00,FR04014,no2,56.6,µg/m³ +Paris,FR,2019-06-16 21:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-16 20:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-06-16 19:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-16 18:00:00+00:00,FR04014,no2,12.3,µg/m³ +Paris,FR,2019-06-16 17:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-16 16:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-06-16 15:00:00+00:00,FR04014,no2,8.4,µg/m³ +Paris,FR,2019-06-16 14:00:00+00:00,FR04014,no2,8.1,µg/m³ +Paris,FR,2019-06-16 13:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-06-16 12:00:00+00:00,FR04014,no2,11.2,µg/m³ +Paris,FR,2019-06-16 11:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-06-16 10:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-06-16 09:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-06-16 08:00:00+00:00,FR04014,no2,9.9,µg/m³ +Paris,FR,2019-06-16 07:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-16 06:00:00+00:00,FR04014,no2,11.6,µg/m³ +Paris,FR,2019-06-16 05:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-16 04:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-16 03:00:00+00:00,FR04014,no2,11.2,µg/m³ +Paris,FR,2019-06-16 02:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-06-16 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-06-16 00:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-06-15 23:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-15 22:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-06-15 21:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-06-15 20:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-15 19:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-06-15 18:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-15 17:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-15 16:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-06-15 15:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-06-15 14:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-15 13:00:00+00:00,FR04014,no2,9.0,µg/m³ +Paris,FR,2019-06-15 12:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-06-15 11:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-15 10:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-06-15 09:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-15 08:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-06-15 07:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-15 06:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-06-15 02:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-06-15 01:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-06-15 00:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-06-14 23:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-06-14 22:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-14 21:00:00+00:00,FR04014,no2,55.0,µg/m³ +Paris,FR,2019-06-14 20:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-06-14 19:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-06-14 18:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-06-14 17:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-14 16:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-06-14 15:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-14 14:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-06-14 13:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-14 12:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-06-14 11:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-06-14 10:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-06-14 09:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-06-14 08:00:00+00:00,FR04014,no2,34.3,µg/m³ +Paris,FR,2019-06-14 07:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-06-14 06:00:00+00:00,FR04014,no2,64.3,µg/m³ +Paris,FR,2019-06-14 05:00:00+00:00,FR04014,no2,49.3,µg/m³ +Paris,FR,2019-06-14 04:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-06-14 03:00:00+00:00,FR04014,no2,48.5,µg/m³ +Paris,FR,2019-06-14 02:00:00+00:00,FR04014,no2,66.6,µg/m³ +Paris,FR,2019-06-14 01:00:00+00:00,FR04014,no2,68.1,µg/m³ +Paris,FR,2019-06-14 00:00:00+00:00,FR04014,no2,74.2,µg/m³ +Paris,FR,2019-06-13 23:00:00+00:00,FR04014,no2,78.3,µg/m³ +Paris,FR,2019-06-13 22:00:00+00:00,FR04014,no2,77.9,µg/m³ +Paris,FR,2019-06-13 21:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-06-13 20:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-06-13 19:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-06-13 18:00:00+00:00,FR04014,no2,24.0,µg/m³ +Paris,FR,2019-06-13 17:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-06-13 16:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-06-13 15:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-06-13 14:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-13 13:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-06-13 12:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-13 11:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-06-13 10:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-13 09:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-06-13 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-13 07:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-06-13 06:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-06-13 05:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-06-13 04:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-06-13 03:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-06-13 02:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-13 01:00:00+00:00,FR04014,no2,18.7,µg/m³ +Paris,FR,2019-06-13 00:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-06-12 23:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-06-12 22:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-06-12 21:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-06-12 20:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-06-12 19:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-06-12 18:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-12 17:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-06-12 16:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-06-12 15:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-06-12 14:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-06-12 13:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-12 12:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-12 11:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-06-12 10:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-06-12 09:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-12 08:00:00+00:00,FR04014,no2,35.5,µg/m³ +Paris,FR,2019-06-12 07:00:00+00:00,FR04014,no2,44.4,µg/m³ +Paris,FR,2019-06-12 06:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-06-12 05:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-12 04:00:00+00:00,FR04014,no2,44.9,µg/m³ +Paris,FR,2019-06-12 03:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-06-12 02:00:00+00:00,FR04014,no2,34.7,µg/m³ +Paris,FR,2019-06-12 01:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-06-12 00:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-06-11 23:00:00+00:00,FR04014,no2,41.5,µg/m³ +Paris,FR,2019-06-11 22:00:00+00:00,FR04014,no2,59.4,µg/m³ +Paris,FR,2019-06-11 21:00:00+00:00,FR04014,no2,54.1,µg/m³ +Paris,FR,2019-06-11 20:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-11 19:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-06-11 18:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-11 17:00:00+00:00,FR04014,no2,35.5,µg/m³ +Paris,FR,2019-06-11 16:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-11 15:00:00+00:00,FR04014,no2,19.8,µg/m³ +Paris,FR,2019-06-11 14:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-11 13:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-11 12:00:00+00:00,FR04014,no2,12.6,µg/m³ +Paris,FR,2019-06-11 11:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-06-11 10:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-06-11 09:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-06-11 08:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-06-11 07:00:00+00:00,FR04014,no2,58.0,µg/m³ +Paris,FR,2019-06-11 06:00:00+00:00,FR04014,no2,55.4,µg/m³ +Paris,FR,2019-06-11 05:00:00+00:00,FR04014,no2,58.7,µg/m³ +Paris,FR,2019-06-11 04:00:00+00:00,FR04014,no2,52.7,µg/m³ +Paris,FR,2019-06-11 03:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-06-11 02:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-06-11 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-11 00:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-06-10 23:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-10 22:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-06-10 21:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-06-10 20:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-10 19:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-06-10 18:00:00+00:00,FR04014,no2,18.4,µg/m³ +Paris,FR,2019-06-10 17:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-10 16:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-06-10 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-10 14:00:00+00:00,FR04014,no2,9.5,µg/m³ +Paris,FR,2019-06-10 13:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-10 12:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-10 11:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-06-10 10:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-10 09:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-06-10 08:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-06-10 07:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-06-10 06:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-10 05:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-06-10 04:00:00+00:00,FR04014,no2,13.7,µg/m³ +Paris,FR,2019-06-10 03:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-10 02:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-10 01:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-06-10 00:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-06-09 23:00:00+00:00,FR04014,no2,39.9,µg/m³ +Paris,FR,2019-06-09 22:00:00+00:00,FR04014,no2,37.1,µg/m³ +Paris,FR,2019-06-09 21:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-06-09 20:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-06-09 19:00:00+00:00,FR04014,no2,30.6,µg/m³ +Paris,FR,2019-06-09 18:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-09 17:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-09 16:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-09 15:00:00+00:00,FR04014,no2,7.2,µg/m³ +Paris,FR,2019-06-09 14:00:00+00:00,FR04014,no2,7.9,µg/m³ +Paris,FR,2019-06-09 13:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-09 12:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-06-09 11:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-06-09 10:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-09 09:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-06-09 08:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-06-09 07:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-06-09 06:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-06-09 05:00:00+00:00,FR04014,no2,42.2,µg/m³ +Paris,FR,2019-06-09 04:00:00+00:00,FR04014,no2,43.0,µg/m³ +Paris,FR,2019-06-09 03:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-06-09 02:00:00+00:00,FR04014,no2,51.2,µg/m³ +Paris,FR,2019-06-09 01:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-06-09 00:00:00+00:00,FR04014,no2,55.9,µg/m³ +Paris,FR,2019-06-08 23:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-06-08 22:00:00+00:00,FR04014,no2,34.8,µg/m³ +Paris,FR,2019-06-08 21:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-06-08 18:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-06-08 17:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-06-08 16:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-08 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-08 14:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-08 13:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-08 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-06-08 11:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-06-08 10:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-08 09:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-08 08:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-08 07:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-08 06:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-06-08 05:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-08 04:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-06-08 03:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-06-08 02:00:00+00:00,FR04014,no2,8.4,µg/m³ +Paris,FR,2019-06-08 01:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-08 00:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-06-07 23:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-07 22:00:00+00:00,FR04014,no2,14.7,µg/m³ +Paris,FR,2019-06-07 21:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-06-07 20:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-07 19:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-06-07 18:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-07 17:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-07 16:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-07 15:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-06-07 14:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-07 13:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-06-07 12:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-07 11:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-07 10:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-06-07 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-06-07 08:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-06-07 07:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-06-07 06:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-06-06 14:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-06-06 13:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-06-06 12:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-06-06 11:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-06-06 10:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-06-06 09:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-06-06 08:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-06-06 07:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-06-06 06:00:00+00:00,FR04014,no2,40.5,µg/m³ +Paris,FR,2019-06-06 05:00:00+00:00,FR04014,no2,40.3,µg/m³ +Paris,FR,2019-06-06 04:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-06-06 03:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-06-06 02:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-06 01:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-06 00:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-06-05 23:00:00+00:00,FR04014,no2,31.8,µg/m³ +Paris,FR,2019-06-05 22:00:00+00:00,FR04014,no2,30.3,µg/m³ +Paris,FR,2019-06-05 21:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-06-05 20:00:00+00:00,FR04014,no2,37.5,µg/m³ +Paris,FR,2019-06-05 19:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-06-05 18:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-06-05 17:00:00+00:00,FR04014,no2,48.8,µg/m³ +Paris,FR,2019-06-05 16:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-06-05 15:00:00+00:00,FR04014,no2,53.5,µg/m³ +Paris,FR,2019-06-05 14:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-06-05 13:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-06-05 12:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-06-05 11:00:00+00:00,FR04014,no2,59.0,µg/m³ +Paris,FR,2019-06-05 10:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-06-05 09:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-06-05 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-05 07:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-06-05 06:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-06-05 05:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-06-05 04:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-05 03:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-06-05 02:00:00+00:00,FR04014,no2,12.4,µg/m³ +Paris,FR,2019-06-05 01:00:00+00:00,FR04014,no2,10.8,µg/m³ +Paris,FR,2019-06-05 00:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-06-04 23:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-04 22:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-06-04 21:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-06-04 20:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-06-04 19:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-06-04 18:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-06-04 17:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-06-04 16:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-06-04 15:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-06-04 14:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-04 13:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-06-04 12:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-06-04 11:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-06-04 10:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-06-04 09:00:00+00:00,FR04014,no2,38.5,µg/m³ +Paris,FR,2019-06-04 08:00:00+00:00,FR04014,no2,50.8,µg/m³ +Paris,FR,2019-06-04 07:00:00+00:00,FR04014,no2,53.5,µg/m³ +Paris,FR,2019-06-04 06:00:00+00:00,FR04014,no2,47.7,µg/m³ +Paris,FR,2019-06-04 05:00:00+00:00,FR04014,no2,36.5,µg/m³ +Paris,FR,2019-06-04 04:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-06-04 03:00:00+00:00,FR04014,no2,41.6,µg/m³ +Paris,FR,2019-06-04 02:00:00+00:00,FR04014,no2,35.0,µg/m³ +Paris,FR,2019-06-04 01:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-06-04 00:00:00+00:00,FR04014,no2,52.4,µg/m³ +Paris,FR,2019-06-03 23:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-03 22:00:00+00:00,FR04014,no2,30.5,µg/m³ +Paris,FR,2019-06-03 21:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-06-03 20:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-06-03 19:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-06-03 18:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-06-03 17:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-06-03 16:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-03 15:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-06-03 14:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-03 13:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-06-03 12:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-06-03 11:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-06-03 10:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-06-03 09:00:00+00:00,FR04014,no2,46.0,µg/m³ +Paris,FR,2019-06-03 08:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-06-03 07:00:00+00:00,FR04014,no2,50.0,µg/m³ +Paris,FR,2019-06-03 06:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-06-03 05:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-06-03 04:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-06-03 03:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-06-03 02:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-03 01:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-03 00:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-06-02 23:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-02 22:00:00+00:00,FR04014,no2,27.6,µg/m³ +Paris,FR,2019-06-02 21:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-06-02 20:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-06-02 19:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-06-02 18:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-06-02 17:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-02 16:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-02 15:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-06-02 14:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-06-02 13:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-02 12:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-06-02 11:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-02 10:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-02 09:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-06-02 08:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-02 07:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-02 06:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-02 05:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-06-02 04:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-02 03:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-02 02:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-06-02 01:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-06-02 00:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-06-01 23:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-06-01 22:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-06-01 21:00:00+00:00,FR04014,no2,49.4,µg/m³ +Paris,FR,2019-06-01 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-06-01 19:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-06-01 18:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-06-01 17:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-01 16:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-01 15:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-01 14:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-06-01 13:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-01 12:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-06-01 11:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-06-01 10:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-06-01 09:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-06-01 08:00:00+00:00,FR04014,no2,33.3,µg/m³ +Paris,FR,2019-06-01 07:00:00+00:00,FR04014,no2,46.4,µg/m³ +Paris,FR,2019-06-01 06:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-01 02:00:00+00:00,FR04014,no2,68.1,µg/m³ +Paris,FR,2019-06-01 01:00:00+00:00,FR04014,no2,74.8,µg/m³ +Paris,FR,2019-06-01 00:00:00+00:00,FR04014,no2,84.7,µg/m³ +Paris,FR,2019-05-31 23:00:00+00:00,FR04014,no2,81.7,µg/m³ +Paris,FR,2019-05-31 22:00:00+00:00,FR04014,no2,68.0,µg/m³ +Paris,FR,2019-05-31 21:00:00+00:00,FR04014,no2,60.2,µg/m³ +Paris,FR,2019-05-31 20:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-31 19:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-31 18:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-31 17:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-31 16:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-31 15:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-31 14:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-31 13:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-05-31 12:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-31 11:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-31 10:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-31 09:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-31 08:00:00+00:00,FR04014,no2,36.6,µg/m³ +Paris,FR,2019-05-31 07:00:00+00:00,FR04014,no2,47.4,µg/m³ +Paris,FR,2019-05-31 06:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-31 05:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-05-31 04:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-31 03:00:00+00:00,FR04014,no2,40.1,µg/m³ +Paris,FR,2019-05-31 02:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-05-31 01:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-05-31 00:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-05-30 23:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-05-30 22:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-30 21:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-05-30 20:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-30 19:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-30 18:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-30 17:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-30 16:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-30 15:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-30 14:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-30 13:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-30 12:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-05-30 11:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-30 10:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-05-30 09:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-30 08:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-05-30 07:00:00+00:00,FR04014,no2,18.3,µg/m³ +Paris,FR,2019-05-30 06:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-30 05:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-05-30 04:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-05-30 03:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-30 02:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-05-30 01:00:00+00:00,FR04014,no2,12.4,µg/m³ +Paris,FR,2019-05-30 00:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-05-29 23:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-29 22:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-29 21:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-29 20:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-29 19:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-29 18:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-29 17:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-29 16:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-29 15:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-29 14:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-29 13:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-29 12:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-29 11:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-05-29 10:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-05-29 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-29 08:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-05-29 07:00:00+00:00,FR04014,no2,50.5,µg/m³ +Paris,FR,2019-05-29 06:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-29 05:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-05-29 04:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-29 03:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-29 02:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-29 01:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-29 00:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-05-28 23:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-28 22:00:00+00:00,FR04014,no2,20.2,µg/m³ +Paris,FR,2019-05-28 21:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-28 20:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-28 19:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-28 18:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-05-28 17:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-28 16:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-28 15:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-28 14:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-28 13:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-28 12:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-28 11:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-28 10:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-28 09:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-28 08:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-28 07:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-05-28 06:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-28 05:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-28 04:00:00+00:00,FR04014,no2,8.9,µg/m³ +Paris,FR,2019-05-28 03:00:00+00:00,FR04014,no2,6.1,µg/m³ +Paris,FR,2019-05-28 02:00:00+00:00,FR04014,no2,6.4,µg/m³ +Paris,FR,2019-05-28 01:00:00+00:00,FR04014,no2,8.2,µg/m³ +Paris,FR,2019-05-28 00:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-27 23:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-05-27 22:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-27 21:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-27 20:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-27 19:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-27 18:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-05-27 17:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-27 16:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-05-27 15:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-05-27 14:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-27 13:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-27 12:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-27 11:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-05-27 10:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-27 09:00:00+00:00,FR04014,no2,31.4,µg/m³ +Paris,FR,2019-05-27 08:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-27 07:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-27 06:00:00+00:00,FR04014,no2,29.1,µg/m³ +Paris,FR,2019-05-27 05:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-27 04:00:00+00:00,FR04014,no2,6.5,µg/m³ +Paris,FR,2019-05-27 03:00:00+00:00,FR04014,no2,4.8,µg/m³ +Paris,FR,2019-05-27 02:00:00+00:00,FR04014,no2,5.9,µg/m³ +Paris,FR,2019-05-27 01:00:00+00:00,FR04014,no2,7.1,µg/m³ +Paris,FR,2019-05-27 00:00:00+00:00,FR04014,no2,9.5,µg/m³ +Paris,FR,2019-05-26 23:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-26 22:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-05-26 21:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-26 20:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-26 19:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-26 18:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-26 17:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-26 16:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-05-26 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-26 14:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-26 13:00:00+00:00,FR04014,no2,12.5,µg/m³ +Paris,FR,2019-05-26 12:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-05-26 11:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-26 10:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-05-26 09:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-26 08:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-26 07:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-05-26 06:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-26 05:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-26 04:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-26 03:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-26 02:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-05-26 01:00:00+00:00,FR04014,no2,49.8,µg/m³ +Paris,FR,2019-05-26 00:00:00+00:00,FR04014,no2,67.0,µg/m³ +Paris,FR,2019-05-25 23:00:00+00:00,FR04014,no2,70.2,µg/m³ +Paris,FR,2019-05-25 22:00:00+00:00,FR04014,no2,63.9,µg/m³ +Paris,FR,2019-05-25 21:00:00+00:00,FR04014,no2,39.5,µg/m³ +Paris,FR,2019-05-25 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-25 19:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-25 18:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-25 17:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-25 16:00:00+00:00,FR04014,no2,31.9,µg/m³ +Paris,FR,2019-05-25 15:00:00+00:00,FR04014,no2,30.0,µg/m³ +Paris,FR,2019-05-25 14:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-05-25 13:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-25 12:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-05-25 11:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-25 10:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-05-25 09:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-05-25 08:00:00+00:00,FR04014,no2,44.5,µg/m³ +Paris,FR,2019-05-25 07:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-05-25 06:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-05-25 02:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-25 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-25 00:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-05-24 23:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-24 22:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-05-24 21:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-05-24 20:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-24 19:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-24 18:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-24 17:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-24 16:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-05-24 15:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-24 14:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-24 13:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-24 12:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-24 11:00:00+00:00,FR04014,no2,40.6,µg/m³ +Paris,FR,2019-05-24 10:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-05-24 09:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-05-24 08:00:00+00:00,FR04014,no2,45.9,µg/m³ +Paris,FR,2019-05-24 07:00:00+00:00,FR04014,no2,54.8,µg/m³ +Paris,FR,2019-05-24 06:00:00+00:00,FR04014,no2,40.7,µg/m³ +Paris,FR,2019-05-24 05:00:00+00:00,FR04014,no2,35.9,µg/m³ +Paris,FR,2019-05-24 04:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-05-24 03:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-05-24 02:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-24 01:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-24 00:00:00+00:00,FR04014,no2,32.8,µg/m³ +Paris,FR,2019-05-23 23:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-23 22:00:00+00:00,FR04014,no2,61.9,µg/m³ +Paris,FR,2019-05-23 21:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-05-23 20:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-05-23 19:00:00+00:00,FR04014,no2,28.0,µg/m³ +Paris,FR,2019-05-23 18:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-05-23 17:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-23 16:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-23 15:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-23 14:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-23 13:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-05-23 12:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-05-23 11:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-05-23 10:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-05-23 09:00:00+00:00,FR04014,no2,79.4,µg/m³ +Paris,FR,2019-05-23 08:00:00+00:00,FR04014,no2,97.0,µg/m³ +Paris,FR,2019-05-23 07:00:00+00:00,FR04014,no2,91.8,µg/m³ +Paris,FR,2019-05-23 06:00:00+00:00,FR04014,no2,79.6,µg/m³ +Paris,FR,2019-05-23 05:00:00+00:00,FR04014,no2,68.7,µg/m³ +Paris,FR,2019-05-23 04:00:00+00:00,FR04014,no2,71.9,µg/m³ +Paris,FR,2019-05-23 03:00:00+00:00,FR04014,no2,76.8,µg/m³ +Paris,FR,2019-05-23 02:00:00+00:00,FR04014,no2,66.6,µg/m³ +Paris,FR,2019-05-23 01:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-05-23 00:00:00+00:00,FR04014,no2,53.3,µg/m³ +Paris,FR,2019-05-22 23:00:00+00:00,FR04014,no2,62.1,µg/m³ +Paris,FR,2019-05-22 22:00:00+00:00,FR04014,no2,29.8,µg/m³ +Paris,FR,2019-05-22 21:00:00+00:00,FR04014,no2,37.7,µg/m³ +Paris,FR,2019-05-22 20:00:00+00:00,FR04014,no2,44.9,µg/m³ +Paris,FR,2019-05-22 19:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-22 18:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-05-22 17:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-05-22 16:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-05-22 15:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-22 14:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-05-22 13:00:00+00:00,FR04014,no2,38.5,µg/m³ +Paris,FR,2019-05-22 12:00:00+00:00,FR04014,no2,42.2,µg/m³ +Paris,FR,2019-05-22 11:00:00+00:00,FR04014,no2,42.6,µg/m³ +Paris,FR,2019-05-22 10:00:00+00:00,FR04014,no2,57.8,µg/m³ +Paris,FR,2019-05-22 09:00:00+00:00,FR04014,no2,63.1,µg/m³ +Paris,FR,2019-05-22 08:00:00+00:00,FR04014,no2,70.8,µg/m³ +Paris,FR,2019-05-22 07:00:00+00:00,FR04014,no2,75.4,µg/m³ +Paris,FR,2019-05-22 06:00:00+00:00,FR04014,no2,75.7,µg/m³ +Paris,FR,2019-05-22 05:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-05-22 04:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-05-22 03:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-22 02:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-22 01:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-22 00:00:00+00:00,FR04014,no2,27.1,µg/m³ +Paris,FR,2019-05-21 23:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-21 22:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-21 21:00:00+00:00,FR04014,no2,43.0,µg/m³ +Paris,FR,2019-05-21 20:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-05-21 19:00:00+00:00,FR04014,no2,50.0,µg/m³ +Paris,FR,2019-05-21 18:00:00+00:00,FR04014,no2,54.3,µg/m³ +Paris,FR,2019-05-21 17:00:00+00:00,FR04014,no2,75.0,µg/m³ +Paris,FR,2019-05-21 16:00:00+00:00,FR04014,no2,42.3,µg/m³ +Paris,FR,2019-05-21 15:00:00+00:00,FR04014,no2,36.6,µg/m³ +Paris,FR,2019-05-21 14:00:00+00:00,FR04014,no2,47.8,µg/m³ +Paris,FR,2019-05-21 13:00:00+00:00,FR04014,no2,49.7,µg/m³ +Paris,FR,2019-05-21 12:00:00+00:00,FR04014,no2,30.5,µg/m³ +Paris,FR,2019-05-21 11:00:00+00:00,FR04014,no2,25.5,µg/m³ +Paris,FR,2019-05-21 10:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-21 09:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-21 08:00:00+00:00,FR04014,no2,54.2,µg/m³ +Paris,FR,2019-05-21 07:00:00+00:00,FR04014,no2,56.0,µg/m³ +Paris,FR,2019-05-21 06:00:00+00:00,FR04014,no2,62.6,µg/m³ +Paris,FR,2019-05-21 05:00:00+00:00,FR04014,no2,38.0,µg/m³ +Paris,FR,2019-05-21 04:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-21 03:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-21 02:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-21 01:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-21 00:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-20 23:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-20 22:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-05-20 21:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-20 20:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-20 19:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-05-20 18:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-20 17:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-20 16:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-05-20 15:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-20 14:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-05-20 13:00:00+00:00,FR04014,no2,23.7,µg/m³ +Paris,FR,2019-05-20 12:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-20 11:00:00+00:00,FR04014,no2,35.4,µg/m³ +Paris,FR,2019-05-20 10:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-05-20 09:00:00+00:00,FR04014,no2,45.5,µg/m³ +Paris,FR,2019-05-20 08:00:00+00:00,FR04014,no2,46.1,µg/m³ +Paris,FR,2019-05-20 07:00:00+00:00,FR04014,no2,46.9,µg/m³ +Paris,FR,2019-05-20 06:00:00+00:00,FR04014,no2,40.1,µg/m³ +Paris,FR,2019-05-20 05:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-20 04:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-20 03:00:00+00:00,FR04014,no2,12.6,µg/m³ +Paris,FR,2019-05-20 02:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-05-20 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-20 00:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-05-19 23:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-19 22:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-19 21:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-19 20:00:00+00:00,FR04014,no2,35.6,µg/m³ +Paris,FR,2019-05-19 19:00:00+00:00,FR04014,no2,51.2,µg/m³ +Paris,FR,2019-05-19 18:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-05-19 17:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-05-19 16:00:00+00:00,FR04014,no2,32.5,µg/m³ +Paris,FR,2019-05-19 15:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-19 14:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-19 13:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-05-19 12:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-19 11:00:00+00:00,FR04014,no2,32.6,µg/m³ +Paris,FR,2019-05-19 10:00:00+00:00,FR04014,no2,31.0,µg/m³ +Paris,FR,2019-05-19 09:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-05-19 08:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-19 07:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-05-19 06:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-19 05:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-05-19 04:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-05-19 03:00:00+00:00,FR04014,no2,36.4,µg/m³ +Paris,FR,2019-05-19 02:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-05-19 01:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-05-19 00:00:00+00:00,FR04014,no2,49.6,µg/m³ +Paris,FR,2019-05-18 23:00:00+00:00,FR04014,no2,50.2,µg/m³ +Paris,FR,2019-05-18 22:00:00+00:00,FR04014,no2,62.5,µg/m³ +Paris,FR,2019-05-18 21:00:00+00:00,FR04014,no2,59.3,µg/m³ +Paris,FR,2019-05-18 20:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-18 19:00:00+00:00,FR04014,no2,67.5,µg/m³ +Paris,FR,2019-05-18 18:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-05-18 17:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-18 16:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-05-18 15:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-18 14:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-05-18 13:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-05-18 12:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-18 11:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-18 10:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-18 09:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-05-18 08:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-18 07:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-05-18 06:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-18 05:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-18 04:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-18 03:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-18 02:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-18 01:00:00+00:00,FR04014,no2,37.4,µg/m³ +Paris,FR,2019-05-18 00:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-05-17 23:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-05-17 22:00:00+00:00,FR04014,no2,28.2,µg/m³ +Paris,FR,2019-05-17 21:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-17 20:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-05-17 19:00:00+00:00,FR04014,no2,24.7,µg/m³ +Paris,FR,2019-05-17 18:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-05-17 17:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-17 16:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-05-17 15:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-17 14:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-17 13:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-05-17 12:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-17 11:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-05-17 10:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-05-17 09:00:00+00:00,FR04014,no2,60.5,µg/m³ +Paris,FR,2019-05-17 08:00:00+00:00,FR04014,no2,57.5,µg/m³ +Paris,FR,2019-05-17 07:00:00+00:00,FR04014,no2,55.0,µg/m³ +Paris,FR,2019-05-17 06:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-17 05:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-05-17 04:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-17 03:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-05-17 02:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-17 01:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-17 00:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-16 23:00:00+00:00,FR04014,no2,43.7,µg/m³ +Paris,FR,2019-05-16 22:00:00+00:00,FR04014,no2,37.1,µg/m³ +Paris,FR,2019-05-16 21:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-16 20:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-16 19:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-05-16 18:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-05-16 17:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-16 16:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-16 15:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-05-16 14:00:00+00:00,FR04014,no2,8.1,µg/m³ +Paris,FR,2019-05-16 13:00:00+00:00,FR04014,no2,8.5,µg/m³ +Paris,FR,2019-05-16 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-05-16 11:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-05-16 10:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-16 09:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-16 08:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-05-16 07:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-05-16 05:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-05-16 04:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-16 03:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-16 02:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-16 01:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-16 00:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-05-15 23:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-05-15 22:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-05-15 21:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-05-15 20:00:00+00:00,FR04014,no2,30.1,µg/m³ +Paris,FR,2019-05-15 19:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-15 18:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-15 17:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-15 16:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-05-15 15:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-15 14:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-05-15 13:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-05-15 12:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-05-15 11:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 10:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 09:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 08:00:00+00:00,FR04014,no2,25.7,µg/m³ +Paris,FR,2019-05-15 07:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-15 06:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-15 05:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-15 04:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-05-15 03:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-15 02:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-15 01:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-15 00:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-14 23:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-14 22:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-05-14 21:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-14 20:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-14 19:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-14 18:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-14 17:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-14 16:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-14 15:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-05-14 14:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-05-14 13:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-14 12:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-05-14 11:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-05-14 10:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-14 09:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-14 08:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-14 07:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-05-14 06:00:00+00:00,FR04014,no2,46.1,µg/m³ +Paris,FR,2019-05-14 05:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-14 04:00:00+00:00,FR04014,no2,31.6,µg/m³ +Paris,FR,2019-05-14 03:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-14 02:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-14 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-14 00:00:00+00:00,FR04014,no2,20.9,µg/m³ +Paris,FR,2019-05-13 23:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-13 22:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-05-13 21:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-13 20:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-05-13 19:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-13 18:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-13 17:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-13 16:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-05-13 15:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-13 14:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-05-13 13:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-05-13 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-05-13 11:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-05-13 10:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-13 09:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-13 08:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-13 07:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-05-13 06:00:00+00:00,FR04014,no2,45.2,µg/m³ +Paris,FR,2019-05-13 05:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-13 04:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-05-13 03:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-05-13 02:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-13 01:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-05-13 00:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-12 23:00:00+00:00,FR04014,no2,32.5,µg/m³ +Paris,FR,2019-05-12 22:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-12 21:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-12 20:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-12 19:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-12 18:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-12 17:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-05-12 16:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-12 15:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-05-12 14:00:00+00:00,FR04014,no2,9.1,µg/m³ +Paris,FR,2019-05-12 13:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-05-12 12:00:00+00:00,FR04014,no2,10.9,µg/m³ +Paris,FR,2019-05-12 11:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-05-12 10:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-05-12 09:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-12 08:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-05-12 07:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-05-12 06:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-12 05:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-12 04:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-05-12 03:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-05-12 02:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-12 01:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-12 00:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-11 23:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-05-11 22:00:00+00:00,FR04014,no2,27.7,µg/m³ +Paris,FR,2019-05-11 21:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-05-11 20:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-05-11 19:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-11 18:00:00+00:00,FR04014,no2,33.1,µg/m³ +Paris,FR,2019-05-11 17:00:00+00:00,FR04014,no2,32.0,µg/m³ +Paris,FR,2019-05-11 16:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-11 15:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-05-11 14:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-11 13:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-05-11 12:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-05-11 11:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-11 10:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-05-11 09:00:00+00:00,FR04014,no2,35.7,µg/m³ +Paris,FR,2019-05-11 08:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-11 07:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-11 06:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-05-11 02:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-11 01:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-11 00:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-10 23:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-10 22:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-05-10 21:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-10 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-10 19:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-05-10 18:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-10 17:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-10 16:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-05-10 15:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-05-10 14:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-10 13:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-05-10 12:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-10 11:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-10 10:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-05-10 09:00:00+00:00,FR04014,no2,53.4,µg/m³ +Paris,FR,2019-05-10 08:00:00+00:00,FR04014,no2,60.7,µg/m³ +Paris,FR,2019-05-10 07:00:00+00:00,FR04014,no2,57.3,µg/m³ +Paris,FR,2019-05-10 06:00:00+00:00,FR04014,no2,47.4,µg/m³ +Paris,FR,2019-05-10 05:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-10 04:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-10 03:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-05-10 02:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-05-10 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-10 00:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-09 23:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-09 22:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-05-09 21:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-09 20:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-05-09 19:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-09 18:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-09 17:00:00+00:00,FR04014,no2,29.9,µg/m³ +Paris,FR,2019-05-09 16:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-09 15:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-09 14:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-09 13:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-05-09 12:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-05-09 11:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-09 10:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-05-09 09:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-05-09 08:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-09 07:00:00+00:00,FR04014,no2,49.0,µg/m³ +Paris,FR,2019-05-09 06:00:00+00:00,FR04014,no2,50.7,µg/m³ +Paris,FR,2019-05-09 05:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-09 04:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-09 03:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-05-09 02:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-05-09 01:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-09 00:00:00+00:00,FR04014,no2,14.7,µg/m³ +Paris,FR,2019-05-08 23:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-08 22:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-08 21:00:00+00:00,FR04014,no2,48.9,µg/m³ +Paris,FR,2019-05-08 20:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-08 19:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-05-08 18:00:00+00:00,FR04014,no2,27.8,µg/m³ +Paris,FR,2019-05-08 17:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-08 16:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-08 15:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-08 14:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-08 13:00:00+00:00,FR04014,no2,14.3,µg/m³ +Paris,FR,2019-05-08 12:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-08 11:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-05-08 10:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-08 09:00:00+00:00,FR04014,no2,19.7,µg/m³ +Paris,FR,2019-05-08 08:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-05-08 07:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-08 06:00:00+00:00,FR04014,no2,21.7,µg/m³ +Paris,FR,2019-05-08 05:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-05-08 04:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-08 03:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-08 02:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-08 01:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-08 00:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-07 23:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-05-07 22:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-05-07 21:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-05-07 20:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-07 19:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-05-07 18:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-05-07 17:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-07 16:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-07 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-07 14:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-07 13:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-07 12:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-07 11:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-07 10:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-07 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-07 08:00:00+00:00,FR04014,no2,56.0,µg/m³ +Paris,FR,2019-05-07 07:00:00+00:00,FR04014,no2,67.9,µg/m³ +Paris,FR,2019-05-07 06:00:00+00:00,FR04014,no2,77.7,µg/m³ +Paris,FR,2019-05-07 05:00:00+00:00,FR04014,no2,72.4,µg/m³ +Paris,FR,2019-05-07 04:00:00+00:00,FR04014,no2,61.9,µg/m³ +Paris,FR,2019-05-07 03:00:00+00:00,FR04014,no2,50.4,µg/m³ +Paris,FR,2019-05-07 02:00:00+00:00,FR04014,no2,27.7,µg/m³ +Paris,FR,2019-05-07 01:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-07 00:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-05-06 23:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-05-06 22:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-06 21:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-05-06 20:00:00+00:00,FR04014,no2,35.9,µg/m³ +Paris,FR,2019-05-06 19:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-05-06 18:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-06 17:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-05-06 16:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-05-06 15:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-05-06 14:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-06 13:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-06 12:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-05-06 11:00:00+00:00,FR04014,no2,44.3,µg/m³ +Paris,FR,2019-05-06 10:00:00+00:00,FR04014,no2,42.4,µg/m³ +Paris,FR,2019-05-06 09:00:00+00:00,FR04014,no2,44.2,µg/m³ +Paris,FR,2019-05-06 08:00:00+00:00,FR04014,no2,52.5,µg/m³ +Paris,FR,2019-05-06 07:00:00+00:00,FR04014,no2,68.9,µg/m³ +Paris,FR,2019-05-06 06:00:00+00:00,FR04014,no2,62.4,µg/m³ +Paris,FR,2019-05-06 05:00:00+00:00,FR04014,no2,56.7,µg/m³ +Paris,FR,2019-05-06 04:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-05-06 03:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-06 02:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-05-06 01:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-05-06 00:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-05-05 23:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-05-05 22:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-05-05 21:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-05-05 20:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-05 19:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-05 18:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-05 17:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-05 16:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-05 15:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-05 14:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-05-05 13:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-05 12:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-05 11:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-05-05 10:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-05 09:00:00+00:00,FR04014,no2,11.6,µg/m³ +Paris,FR,2019-05-05 08:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-05 07:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-05 06:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-05-05 05:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-05-05 04:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-05 03:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-05 02:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-05-05 01:00:00+00:00,FR04014,no2,25.7,µg/m³ +Paris,FR,2019-05-05 00:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-04 23:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-05-04 22:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-04 21:00:00+00:00,FR04014,no2,27.1,µg/m³ +Paris,FR,2019-05-04 20:00:00+00:00,FR04014,no2,33.1,µg/m³ +Paris,FR,2019-05-04 19:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-05-04 18:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-05-04 17:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-04 16:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-04 15:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-04 14:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-05-04 13:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-04 12:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-05-04 11:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-04 10:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-05-04 09:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-04 08:00:00+00:00,FR04014,no2,22.5,µg/m³ +Paris,FR,2019-05-04 07:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-04 06:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-04 05:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-04 04:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-05-04 03:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-04 02:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-04 01:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-04 00:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-05-03 23:00:00+00:00,FR04014,no2,31.3,µg/m³ +Paris,FR,2019-05-03 22:00:00+00:00,FR04014,no2,43.2,µg/m³ +Paris,FR,2019-05-03 21:00:00+00:00,FR04014,no2,31.8,µg/m³ +Paris,FR,2019-05-03 20:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-03 19:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-05-03 18:00:00+00:00,FR04014,no2,59.6,µg/m³ +Paris,FR,2019-05-03 17:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-03 16:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-05-03 15:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-05-03 14:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-05-03 13:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-05-03 12:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-03 11:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-05-03 10:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-03 09:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-05-03 08:00:00+00:00,FR04014,no2,46.4,µg/m³ +Paris,FR,2019-05-03 07:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-03 06:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-05-03 05:00:00+00:00,FR04014,no2,32.8,µg/m³ +Paris,FR,2019-05-03 04:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-03 03:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-05-03 02:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-03 01:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-03 00:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-02 23:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-05-02 22:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-02 21:00:00+00:00,FR04014,no2,31.0,µg/m³ +Paris,FR,2019-05-02 20:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-05-02 19:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-05-02 18:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-02 17:00:00+00:00,FR04014,no2,29.9,µg/m³ +Paris,FR,2019-05-02 16:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-05-02 15:00:00+00:00,FR04014,no2,41.4,µg/m³ +Paris,FR,2019-05-02 14:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-05-02 13:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-02 12:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-02 11:00:00+00:00,FR04014,no2,32.6,µg/m³ +Paris,FR,2019-05-02 10:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-05-02 09:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-02 08:00:00+00:00,FR04014,no2,55.5,µg/m³ +Paris,FR,2019-05-02 07:00:00+00:00,FR04014,no2,51.0,µg/m³ +Paris,FR,2019-05-02 06:00:00+00:00,FR04014,no2,49.4,µg/m³ +Paris,FR,2019-05-02 05:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-05-02 04:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-02 03:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-02 02:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-02 01:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-02 00:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-01 23:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-01 22:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-01 21:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-01 20:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-01 19:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-01 18:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-05-01 17:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-01 16:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-05-01 15:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-01 14:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-01 13:00:00+00:00,FR04014,no2,22.5,µg/m³ +Paris,FR,2019-05-01 12:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-01 11:00:00+00:00,FR04014,no2,28.2,µg/m³ +Paris,FR,2019-05-01 10:00:00+00:00,FR04014,no2,33.3,µg/m³ +Paris,FR,2019-05-01 09:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-05-01 08:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-05-01 07:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-01 06:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-01 05:00:00+00:00,FR04014,no2,28.5,µg/m³ +Paris,FR,2019-05-01 04:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-05-01 03:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-05-01 02:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-01 01:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-01 00:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-04-30 23:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-04-30 22:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-04-30 21:00:00+00:00,FR04014,no2,42.8,µg/m³ +Paris,FR,2019-04-30 20:00:00+00:00,FR04014,no2,39.6,µg/m³ +Paris,FR,2019-04-30 19:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-04-30 18:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-04-30 17:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-04-30 16:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-04-30 15:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-04-30 14:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-04-30 13:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-04-30 12:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-04-30 11:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-04-30 10:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-04-30 09:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-04-30 08:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-04-30 07:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-04-30 06:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-04-30 05:00:00+00:00,FR04014,no2,37.3,µg/m³ +Paris,FR,2019-04-30 04:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-04-30 03:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-04-30 02:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-04-30 01:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-04-30 00:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-04-29 23:00:00+00:00,FR04014,no2,34.3,µg/m³ +Paris,FR,2019-04-29 22:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-04-29 21:00:00+00:00,FR04014,no2,31.6,µg/m³ +Paris,FR,2019-04-29 20:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-04-29 19:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-04-29 18:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-04-29 17:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-04-29 16:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-04-29 15:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-04-29 14:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-04-29 13:00:00+00:00,FR04014,no2,14.3,µg/m³ +Paris,FR,2019-04-29 12:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-04-29 11:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-04-29 10:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-29 09:00:00+00:00,FR04014,no2,28.5,µg/m³ +Paris,FR,2019-04-29 08:00:00+00:00,FR04014,no2,39.1,µg/m³ +Paris,FR,2019-04-29 07:00:00+00:00,FR04014,no2,45.4,µg/m³ +Paris,FR,2019-04-29 06:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-04-29 05:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-04-29 04:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-04-29 03:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-04-29 02:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-04-29 01:00:00+00:00,FR04014,no2,25.5,µg/m³ +Paris,FR,2019-04-29 00:00:00+00:00,FR04014,no2,26.2,µg/m³ +Paris,FR,2019-04-28 23:00:00+00:00,FR04014,no2,29.8,µg/m³ +Paris,FR,2019-04-28 22:00:00+00:00,FR04014,no2,27.1,µg/m³ +Paris,FR,2019-04-28 21:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-04-28 20:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-04-28 19:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-04-28 18:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-04-28 17:00:00+00:00,FR04014,no2,23.7,µg/m³ +Paris,FR,2019-04-28 16:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-04-28 15:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-04-28 14:00:00+00:00,FR04014,no2,18.4,µg/m³ +Paris,FR,2019-04-28 13:00:00+00:00,FR04014,no2,19.8,µg/m³ +Paris,FR,2019-04-28 12:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-04-28 11:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-04-28 10:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-04-28 09:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-04-28 08:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-04-28 07:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-04-28 06:00:00+00:00,FR04014,no2,13.6,µg/m³ +Paris,FR,2019-04-28 05:00:00+00:00,FR04014,no2,12.7,µg/m³ +Paris,FR,2019-04-28 04:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-04-28 03:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-04-28 02:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-04-28 01:00:00+00:00,FR04014,no2,12.3,µg/m³ +Paris,FR,2019-04-28 00:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-04-27 23:00:00+00:00,FR04014,no2,18.7,µg/m³ +Paris,FR,2019-04-27 22:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-04-27 21:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-04-27 20:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-04-27 19:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-04-27 18:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-04-27 17:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-04-27 16:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-27 15:00:00+00:00,FR04014,no2,13.7,µg/m³ +Paris,FR,2019-04-27 14:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-04-27 13:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-04-27 12:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-04-27 11:00:00+00:00,FR04014,no2,12.3,µg/m³ +Paris,FR,2019-04-27 10:00:00+00:00,FR04014,no2,10.9,µg/m³ +Paris,FR,2019-04-27 09:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-04-27 08:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-04-27 07:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-04-27 06:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-04-27 05:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-04-27 04:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-04-27 03:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-04-27 02:00:00+00:00,FR04014,no2,8.6,µg/m³ +Paris,FR,2019-04-27 01:00:00+00:00,FR04014,no2,9.3,µg/m³ +Paris,FR,2019-04-27 00:00:00+00:00,FR04014,no2,10.8,µg/m³ +Paris,FR,2019-04-26 23:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-04-26 22:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-04-26 21:00:00+00:00,FR04014,no2,34.8,µg/m³ +Paris,FR,2019-04-26 20:00:00+00:00,FR04014,no2,38.7,µg/m³ +Paris,FR,2019-04-26 19:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-04-26 18:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-04-26 17:00:00+00:00,FR04014,no2,20.2,µg/m³ +Paris,FR,2019-04-26 16:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-26 15:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-04-26 14:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-26 13:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-04-26 12:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-04-26 11:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-04-26 10:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-04-26 09:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-04-26 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-04-26 07:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-04-26 06:00:00+00:00,FR04014,no2,61.8,µg/m³ +Paris,FR,2019-04-26 05:00:00+00:00,FR04014,no2,70.9,µg/m³ +Paris,FR,2019-04-26 04:00:00+00:00,FR04014,no2,58.3,µg/m³ +Paris,FR,2019-04-26 03:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-04-26 02:00:00+00:00,FR04014,no2,27.8,µg/m³ +Paris,FR,2019-04-26 01:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-04-26 00:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-04-25 23:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-04-25 22:00:00+00:00,FR04014,no2,31.0,µg/m³ +Paris,FR,2019-04-25 21:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-04-25 20:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-04-25 19:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-04-25 18:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-04-25 17:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-04-25 16:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-04-25 15:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-04-25 14:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-04-25 13:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-04-25 12:00:00+00:00,FR04014,no2,29.1,µg/m³ +Paris,FR,2019-04-25 11:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-04-25 10:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-04-25 09:00:00+00:00,FR04014,no2,41.6,µg/m³ +Paris,FR,2019-04-25 08:00:00+00:00,FR04014,no2,37.6,µg/m³ +Paris,FR,2019-04-25 07:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-04-25 06:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-04-25 05:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-04-25 04:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-04-25 03:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-04-25 02:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-04-25 01:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-04-25 00:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-04-24 23:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-04-24 22:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-04-24 21:00:00+00:00,FR04014,no2,40.3,µg/m³ +Paris,FR,2019-04-24 20:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-04-24 19:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-04-24 18:00:00+00:00,FR04014,no2,22.5,µg/m³ +Paris,FR,2019-04-24 17:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-04-24 16:00:00+00:00,FR04014,no2,31.3,µg/m³ +Paris,FR,2019-04-24 15:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-04-24 14:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-04-24 13:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-04-24 12:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-04-24 11:00:00+00:00,FR04014,no2,22.4,µg/m³ +Paris,FR,2019-04-24 10:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-04-24 09:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-04-24 08:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-04-24 07:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-04-24 06:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-04-24 05:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-24 04:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-04-24 03:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-04-24 02:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-04-24 01:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-04-24 00:00:00+00:00,FR04014,no2,43.8,µg/m³ +Paris,FR,2019-04-23 23:00:00+00:00,FR04014,no2,48.8,µg/m³ +Paris,FR,2019-04-23 22:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-04-23 21:00:00+00:00,FR04014,no2,41.2,µg/m³ +Paris,FR,2019-04-23 20:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-04-23 19:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-04-23 18:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-04-23 17:00:00+00:00,FR04014,no2,35.7,µg/m³ +Paris,FR,2019-04-23 16:00:00+00:00,FR04014,no2,52.9,µg/m³ +Paris,FR,2019-04-23 15:00:00+00:00,FR04014,no2,44.5,µg/m³ +Paris,FR,2019-04-23 14:00:00+00:00,FR04014,no2,48.8,µg/m³ +Paris,FR,2019-04-23 13:00:00+00:00,FR04014,no2,53.2,µg/m³ +Paris,FR,2019-04-23 12:00:00+00:00,FR04014,no2,54.1,µg/m³ +Paris,FR,2019-04-23 11:00:00+00:00,FR04014,no2,51.8,µg/m³ +Paris,FR,2019-04-23 10:00:00+00:00,FR04014,no2,47.9,µg/m³ +Paris,FR,2019-04-23 09:00:00+00:00,FR04014,no2,51.9,µg/m³ +Paris,FR,2019-04-23 08:00:00+00:00,FR04014,no2,60.7,µg/m³ +Paris,FR,2019-04-23 07:00:00+00:00,FR04014,no2,86.0,µg/m³ +Paris,FR,2019-04-23 06:00:00+00:00,FR04014,no2,74.7,µg/m³ +Paris,FR,2019-04-23 05:00:00+00:00,FR04014,no2,49.2,µg/m³ +Paris,FR,2019-04-23 04:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-04-23 03:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-04-23 02:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-04-23 01:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-23 00:00:00+00:00,FR04014,no2,35.7,µg/m³ +Paris,FR,2019-04-22 23:00:00+00:00,FR04014,no2,45.6,µg/m³ +Paris,FR,2019-04-22 22:00:00+00:00,FR04014,no2,44.5,µg/m³ +Paris,FR,2019-04-22 21:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-04-22 20:00:00+00:00,FR04014,no2,31.4,µg/m³ +Paris,FR,2019-04-22 19:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-04-22 18:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-04-22 17:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-04-22 16:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-04-22 15:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-04-22 14:00:00+00:00,FR04014,no2,8.9,µg/m³ +Paris,FR,2019-04-22 13:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-04-22 12:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-04-22 11:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-22 10:00:00+00:00,FR04014,no2,43.5,µg/m³ +Paris,FR,2019-04-22 09:00:00+00:00,FR04014,no2,44.4,µg/m³ +Paris,FR,2019-04-22 08:00:00+00:00,FR04014,no2,63.7,µg/m³ +Paris,FR,2019-04-22 07:00:00+00:00,FR04014,no2,51.4,µg/m³ +Paris,FR,2019-04-22 06:00:00+00:00,FR04014,no2,65.7,µg/m³ +Paris,FR,2019-04-22 05:00:00+00:00,FR04014,no2,69.8,µg/m³ +Paris,FR,2019-04-22 04:00:00+00:00,FR04014,no2,80.2,µg/m³ +Paris,FR,2019-04-22 03:00:00+00:00,FR04014,no2,87.9,µg/m³ +Paris,FR,2019-04-22 02:00:00+00:00,FR04014,no2,88.7,µg/m³ +Paris,FR,2019-04-22 01:00:00+00:00,FR04014,no2,99.0,µg/m³ +Paris,FR,2019-04-22 00:00:00+00:00,FR04014,no2,116.4,µg/m³ +Paris,FR,2019-04-21 23:00:00+00:00,FR04014,no2,105.2,µg/m³ +Paris,FR,2019-04-21 22:00:00+00:00,FR04014,no2,117.2,µg/m³ +Paris,FR,2019-04-21 21:00:00+00:00,FR04014,no2,101.1,µg/m³ +Paris,FR,2019-04-21 20:00:00+00:00,FR04014,no2,75.6,µg/m³ +Paris,FR,2019-04-21 19:00:00+00:00,FR04014,no2,45.6,µg/m³ +Paris,FR,2019-04-21 18:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-04-21 17:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-04-21 16:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-04-21 15:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-04-21 14:00:00+00:00,FR04014,no2,9.3,µg/m³ +Paris,FR,2019-04-21 13:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-04-21 12:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-04-21 11:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-04-21 10:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-04-21 09:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-04-21 08:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-04-21 07:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-04-21 06:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-04-21 05:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-04-21 04:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-04-21 03:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-21 02:00:00+00:00,FR04014,no2,28.7,µg/m³ +Paris,FR,2019-04-21 01:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-04-21 00:00:00+00:00,FR04014,no2,40.5,µg/m³ +Paris,FR,2019-04-20 23:00:00+00:00,FR04014,no2,49.2,µg/m³ +Paris,FR,2019-04-20 22:00:00+00:00,FR04014,no2,52.8,µg/m³ +Paris,FR,2019-04-20 21:00:00+00:00,FR04014,no2,52.9,µg/m³ +Paris,FR,2019-04-20 20:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-04-20 19:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-04-20 18:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-04-20 17:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-04-20 16:00:00+00:00,FR04014,no2,12.7,µg/m³ +Paris,FR,2019-04-20 15:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-04-20 14:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-04-20 13:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-04-20 12:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-04-20 11:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-04-20 10:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-04-20 09:00:00+00:00,FR04014,no2,44.0,µg/m³ +Paris,FR,2019-04-20 08:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-04-20 07:00:00+00:00,FR04014,no2,64.5,µg/m³ +Paris,FR,2019-04-20 06:00:00+00:00,FR04014,no2,67.1,µg/m³ +Paris,FR,2019-04-20 05:00:00+00:00,FR04014,no2,45.9,µg/m³ +Paris,FR,2019-04-20 04:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-04-20 03:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-04-20 02:00:00+00:00,FR04014,no2,12.7,µg/m³ +Paris,FR,2019-04-20 01:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-04-20 00:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-04-19 23:00:00+00:00,FR04014,no2,70.2,µg/m³ +Paris,FR,2019-04-19 22:00:00+00:00,FR04014,no2,90.4,µg/m³ +Paris,FR,2019-04-19 21:00:00+00:00,FR04014,no2,96.9,µg/m³ +Paris,FR,2019-04-19 20:00:00+00:00,FR04014,no2,78.4,µg/m³ +Paris,FR,2019-04-19 19:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-04-19 18:00:00+00:00,FR04014,no2,20.2,µg/m³ +Paris,FR,2019-04-19 17:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-04-19 16:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-04-19 15:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-04-19 14:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-04-19 13:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-04-19 12:00:00+00:00,FR04014,no2,19.8,µg/m³ +Paris,FR,2019-04-19 11:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-04-19 10:00:00+00:00,FR04014,no2,51.3,µg/m³ +Paris,FR,2019-04-19 09:00:00+00:00,FR04014,no2,56.3,µg/m³ +Paris,FR,2019-04-19 08:00:00+00:00,FR04014,no2,61.4,µg/m³ +Paris,FR,2019-04-19 07:00:00+00:00,FR04014,no2,86.5,µg/m³ +Paris,FR,2019-04-19 06:00:00+00:00,FR04014,no2,89.3,µg/m³ +Paris,FR,2019-04-19 05:00:00+00:00,FR04014,no2,58.1,µg/m³ +Paris,FR,2019-04-19 04:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-04-19 03:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-04-19 02:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-04-19 01:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-04-19 00:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-04-18 23:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-04-18 22:00:00+00:00,FR04014,no2,41.2,µg/m³ +Paris,FR,2019-04-18 21:00:00+00:00,FR04014,no2,52.7,µg/m³ +Paris,FR,2019-04-18 20:00:00+00:00,FR04014,no2,43.8,µg/m³ +Paris,FR,2019-04-18 19:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-04-18 18:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-04-18 17:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-04-18 16:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-04-18 15:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-04-18 14:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-04-18 13:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-04-18 12:00:00+00:00,FR04014,no2,12.7,µg/m³ +Paris,FR,2019-04-18 11:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-04-18 10:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-04-18 09:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-04-18 08:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-04-18 07:00:00+00:00,FR04014,no2,43.8,µg/m³ +Paris,FR,2019-04-18 06:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-04-18 05:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-04-18 04:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-04-18 03:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-04-18 02:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-04-18 01:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-04-18 00:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-04-17 23:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-04-17 22:00:00+00:00,FR04014,no2,24.7,µg/m³ +Paris,FR,2019-04-17 21:00:00+00:00,FR04014,no2,37.3,µg/m³ +Paris,FR,2019-04-17 20:00:00+00:00,FR04014,no2,41.2,µg/m³ +Paris,FR,2019-04-17 19:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-04-17 18:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-04-17 17:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-04-17 16:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-04-17 15:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-04-17 14:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-04-17 13:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-04-17 12:00:00+00:00,FR04014,no2,15.8,µg/m³ +Paris,FR,2019-04-17 11:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-04-17 10:00:00+00:00,FR04014,no2,46.9,µg/m³ +Paris,FR,2019-04-17 09:00:00+00:00,FR04014,no2,69.3,µg/m³ +Paris,FR,2019-04-17 08:00:00+00:00,FR04014,no2,72.7,µg/m³ +Paris,FR,2019-04-17 07:00:00+00:00,FR04014,no2,70.4,µg/m³ +Paris,FR,2019-04-17 06:00:00+00:00,FR04014,no2,72.9,µg/m³ +Paris,FR,2019-04-17 05:00:00+00:00,FR04014,no2,67.3,µg/m³ +Paris,FR,2019-04-17 04:00:00+00:00,FR04014,no2,65.5,µg/m³ +Paris,FR,2019-04-17 03:00:00+00:00,FR04014,no2,62.5,µg/m³ +Paris,FR,2019-04-17 02:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-04-17 01:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-04-17 00:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-04-16 23:00:00+00:00,FR04014,no2,34.4,µg/m³ +Paris,FR,2019-04-16 22:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-04-16 21:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-04-16 20:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-04-16 19:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-04-16 18:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-04-16 17:00:00+00:00,FR04014,no2,44.0,µg/m³ +Paris,FR,2019-04-16 16:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-04-16 15:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-04-16 14:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-04-16 13:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-04-16 12:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-04-16 11:00:00+00:00,FR04014,no2,38.8,µg/m³ +Paris,FR,2019-04-16 10:00:00+00:00,FR04014,no2,47.1,µg/m³ +Paris,FR,2019-04-16 09:00:00+00:00,FR04014,no2,57.5,µg/m³ +Paris,FR,2019-04-16 08:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-04-16 07:00:00+00:00,FR04014,no2,72.0,µg/m³ +Paris,FR,2019-04-16 06:00:00+00:00,FR04014,no2,79.0,µg/m³ +Paris,FR,2019-04-16 05:00:00+00:00,FR04014,no2,76.9,µg/m³ +Paris,FR,2019-04-16 04:00:00+00:00,FR04014,no2,60.1,µg/m³ +Paris,FR,2019-04-16 03:00:00+00:00,FR04014,no2,34.6,µg/m³ +Paris,FR,2019-04-16 02:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-04-16 01:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-04-16 00:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-04-15 23:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-04-15 22:00:00+00:00,FR04014,no2,29.9,µg/m³ +Paris,FR,2019-04-15 21:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-04-15 20:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-04-15 19:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-04-15 18:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-04-15 17:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-04-15 16:00:00+00:00,FR04014,no2,14.3,µg/m³ +Paris,FR,2019-04-15 15:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-04-15 14:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-04-15 13:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-04-15 12:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-04-15 11:00:00+00:00,FR04014,no2,13.6,µg/m³ +Paris,FR,2019-04-15 10:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-04-15 09:00:00+00:00,FR04014,no2,28.0,µg/m³ +Paris,FR,2019-04-15 08:00:00+00:00,FR04014,no2,53.9,µg/m³ +Paris,FR,2019-04-15 07:00:00+00:00,FR04014,no2,61.2,µg/m³ +Paris,FR,2019-04-15 06:00:00+00:00,FR04014,no2,67.3,µg/m³ +Paris,FR,2019-04-15 05:00:00+00:00,FR04014,no2,52.9,µg/m³ +Paris,FR,2019-04-15 04:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-04-15 03:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-04-15 02:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-15 01:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-04-15 00:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-04-14 23:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-04-14 22:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-04-14 21:00:00+00:00,FR04014,no2,34.4,µg/m³ +Paris,FR,2019-04-14 20:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-04-14 19:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-04-14 18:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-04-14 17:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-04-14 16:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-04-14 15:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-04-14 14:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-04-14 13:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-04-14 12:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-04-14 11:00:00+00:00,FR04014,no2,19.7,µg/m³ +Paris,FR,2019-04-14 10:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-04-14 09:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-04-14 08:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-04-14 07:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-04-14 06:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-04-14 05:00:00+00:00,FR04014,no2,30.6,µg/m³ +Paris,FR,2019-04-14 04:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-04-14 03:00:00+00:00,FR04014,no2,33.3,µg/m³ +Paris,FR,2019-04-14 02:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-04-14 01:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-04-14 00:00:00+00:00,FR04014,no2,41.1,µg/m³ +Paris,FR,2019-04-13 23:00:00+00:00,FR04014,no2,47.8,µg/m³ +Paris,FR,2019-04-13 22:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-04-13 21:00:00+00:00,FR04014,no2,43.8,µg/m³ +Paris,FR,2019-04-13 20:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-04-13 19:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-13 18:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-04-13 17:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-04-13 16:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-04-13 15:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-04-13 14:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-04-13 13:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-04-13 12:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-04-13 11:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-04-13 10:00:00+00:00,FR04014,no2,18.3,µg/m³ +Paris,FR,2019-04-13 09:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-04-13 08:00:00+00:00,FR04014,no2,35.2,µg/m³ +Paris,FR,2019-04-13 07:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-04-13 06:00:00+00:00,FR04014,no2,44.3,µg/m³ +Paris,FR,2019-04-13 05:00:00+00:00,FR04014,no2,38.7,µg/m³ +Paris,FR,2019-04-13 04:00:00+00:00,FR04014,no2,31.9,µg/m³ +Paris,FR,2019-04-13 03:00:00+00:00,FR04014,no2,35.2,µg/m³ +Paris,FR,2019-04-13 02:00:00+00:00,FR04014,no2,38.9,µg/m³ +Paris,FR,2019-04-13 01:00:00+00:00,FR04014,no2,38.9,µg/m³ +Paris,FR,2019-04-13 00:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-04-12 23:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-04-12 22:00:00+00:00,FR04014,no2,42.4,µg/m³ +Paris,FR,2019-04-12 21:00:00+00:00,FR04014,no2,41.6,µg/m³ +Paris,FR,2019-04-12 20:00:00+00:00,FR04014,no2,32.8,µg/m³ +Paris,FR,2019-04-12 19:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-12 18:00:00+00:00,FR04014,no2,26.2,µg/m³ +Paris,FR,2019-04-12 17:00:00+00:00,FR04014,no2,25.9,µg/m³ +Paris,FR,2019-04-12 16:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-04-12 15:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-04-12 14:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-04-12 13:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-04-12 12:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-12 11:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-04-12 10:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-04-12 09:00:00+00:00,FR04014,no2,36.5,µg/m³ +Paris,FR,2019-04-12 08:00:00+00:00,FR04014,no2,44.3,µg/m³ +Paris,FR,2019-04-12 07:00:00+00:00,FR04014,no2,48.3,µg/m³ +Paris,FR,2019-04-12 06:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-04-12 05:00:00+00:00,FR04014,no2,39.0,µg/m³ +Paris,FR,2019-04-12 04:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-04-12 03:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-04-12 02:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-04-12 01:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-04-12 00:00:00+00:00,FR04014,no2,25.7,µg/m³ +Paris,FR,2019-04-11 23:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-04-11 22:00:00+00:00,FR04014,no2,42.6,µg/m³ +Paris,FR,2019-04-11 21:00:00+00:00,FR04014,no2,40.7,µg/m³ +Paris,FR,2019-04-11 20:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-04-11 19:00:00+00:00,FR04014,no2,31.4,µg/m³ +Paris,FR,2019-04-11 18:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-04-11 17:00:00+00:00,FR04014,no2,20.9,µg/m³ +Paris,FR,2019-04-11 16:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-04-11 15:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-04-11 14:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-04-11 13:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-04-11 12:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-04-11 11:00:00+00:00,FR04014,no2,25.4,µg/m³ +Paris,FR,2019-04-11 10:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-04-11 09:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-04-11 08:00:00+00:00,FR04014,no2,43.2,µg/m³ +Paris,FR,2019-04-11 07:00:00+00:00,FR04014,no2,44.3,µg/m³ +Paris,FR,2019-04-11 06:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-04-11 05:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-04-11 04:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-04-11 03:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-04-11 02:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-04-11 01:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-04-11 00:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-04-10 23:00:00+00:00,FR04014,no2,31.3,µg/m³ +Paris,FR,2019-04-10 22:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-04-10 21:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-04-10 20:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-04-10 19:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-04-10 18:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-04-10 17:00:00+00:00,FR04014,no2,46.0,µg/m³ +Paris,FR,2019-04-10 16:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-04-10 15:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-04-10 14:00:00+00:00,FR04014,no2,26.2,µg/m³ +Paris,FR,2019-04-10 13:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-04-10 12:00:00+00:00,FR04014,no2,31.8,µg/m³ +Paris,FR,2019-04-10 11:00:00+00:00,FR04014,no2,34.4,µg/m³ +Paris,FR,2019-04-10 10:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-04-10 09:00:00+00:00,FR04014,no2,41.1,µg/m³ +Paris,FR,2019-04-10 08:00:00+00:00,FR04014,no2,45.2,µg/m³ +Paris,FR,2019-04-10 07:00:00+00:00,FR04014,no2,48.5,µg/m³ +Paris,FR,2019-04-10 06:00:00+00:00,FR04014,no2,40.6,µg/m³ +Paris,FR,2019-04-10 05:00:00+00:00,FR04014,no2,26.2,µg/m³ +Paris,FR,2019-04-10 04:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-04-10 03:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-04-10 02:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-04-10 01:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-04-10 00:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-04-09 23:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-04-09 22:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-04-09 21:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-04-09 20:00:00+00:00,FR04014,no2,39.9,µg/m³ +Paris,FR,2019-04-09 19:00:00+00:00,FR04014,no2,48.7,µg/m³ +Paris,FR,2019-04-09 18:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-04-09 17:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-04-09 16:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-04-09 15:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-04-09 14:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-04-09 13:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-04-09 12:00:00+00:00,FR04014,no2,30.6,µg/m³ +Paris,FR,2019-04-09 11:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-04-09 10:00:00+00:00,FR04014,no2,67.1,µg/m³ +Paris,FR,2019-04-09 09:00:00+00:00,FR04014,no2,66.5,µg/m³ +Paris,FR,2019-04-09 08:00:00+00:00,FR04014,no2,69.5,µg/m³ +Paris,FR,2019-04-09 07:00:00+00:00,FR04014,no2,68.0,µg/m³ +Paris,FR,2019-04-09 06:00:00+00:00,FR04014,no2,66.9,µg/m³ +Paris,FR,2019-04-09 05:00:00+00:00,FR04014,no2,59.5,µg/m³ +Paris,FR,2019-04-09 04:00:00+00:00,FR04014,no2,48.5,µg/m³ +Paris,FR,2019-04-09 03:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-04-09 02:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-04-09 01:00:00+00:00,FR04014,no2,24.4,µg/m³ +Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,no2,41.0,µg/m³ +Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,no2,45.0,µg/m³ +Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,no2,43.5,µg/m³ +Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,no2,42.5,µg/m³ +Antwerpen,BE,2019-06-17 04:00:00+00:00,BETR801,no2,39.5,µg/m³ +Antwerpen,BE,2019-06-17 03:00:00+00:00,BETR801,no2,36.0,µg/m³ +Antwerpen,BE,2019-06-17 02:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-06-17 01:00:00+00:00,BETR801,no2,42.0,µg/m³ +Antwerpen,BE,2019-06-16 01:00:00+00:00,BETR801,no2,42.5,µg/m³ +Antwerpen,BE,2019-06-15 01:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-06-14 09:00:00+00:00,BETR801,no2,36.5,µg/m³ +Antwerpen,BE,2019-06-13 01:00:00+00:00,BETR801,no2,28.5,µg/m³ +Antwerpen,BE,2019-06-12 01:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-06-11 01:00:00+00:00,BETR801,no2,7.5,µg/m³ +Antwerpen,BE,2019-06-10 01:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-06-09 01:00:00+00:00,BETR801,no2,10.0,µg/m³ +Antwerpen,BE,2019-06-05 01:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-06-01 01:00:00+00:00,BETR801,no2,52.5,µg/m³ +Antwerpen,BE,2019-05-31 01:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-05-30 01:00:00+00:00,BETR801,no2,7.5,µg/m³ +Antwerpen,BE,2019-05-29 01:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-28 01:00:00+00:00,BETR801,no2,11.0,µg/m³ +Antwerpen,BE,2019-05-27 01:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-26 01:00:00+00:00,BETR801,no2,53.0,µg/m³ +Antwerpen,BE,2019-05-25 01:00:00+00:00,BETR801,no2,29.0,µg/m³ +Antwerpen,BE,2019-05-24 01:00:00+00:00,BETR801,no2,74.5,µg/m³ +Antwerpen,BE,2019-05-23 01:00:00+00:00,BETR801,no2,60.5,µg/m³ +Antwerpen,BE,2019-05-22 01:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-21 01:00:00+00:00,BETR801,no2,15.5,µg/m³ +Antwerpen,BE,2019-05-20 15:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-20 14:00:00+00:00,BETR801,no2,24.5,µg/m³ +Antwerpen,BE,2019-05-20 13:00:00+00:00,BETR801,no2,32.0,µg/m³ +Antwerpen,BE,2019-05-20 12:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-05-20 11:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-05-20 10:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-05-20 09:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-05-20 08:00:00+00:00,BETR801,no2,40.0,µg/m³ +Antwerpen,BE,2019-05-20 07:00:00+00:00,BETR801,no2,38.0,µg/m³ +Antwerpen,BE,2019-05-20 06:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-20 05:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-20 04:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-05-20 03:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-05-20 02:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-20 01:00:00+00:00,BETR801,no2,17.0,µg/m³ +Antwerpen,BE,2019-05-20 00:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 23:00:00+00:00,BETR801,no2,16.5,µg/m³ +Antwerpen,BE,2019-05-19 22:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-05-19 21:00:00+00:00,BETR801,no2,12.5,µg/m³ +Antwerpen,BE,2019-05-19 20:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-05-19 19:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 18:00:00+00:00,BETR801,no2,15.5,µg/m³ +Antwerpen,BE,2019-05-19 17:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-05-19 16:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-19 15:00:00+00:00,BETR801,no2,33.0,µg/m³ +Antwerpen,BE,2019-05-19 14:00:00+00:00,BETR801,no2,23.0,µg/m³ +Antwerpen,BE,2019-05-19 13:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-19 12:00:00+00:00,BETR801,no2,16.0,µg/m³ +Antwerpen,BE,2019-05-19 11:00:00+00:00,BETR801,no2,17.0,µg/m³ +Antwerpen,BE,2019-05-19 10:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-19 09:00:00+00:00,BETR801,no2,16.0,µg/m³ +Antwerpen,BE,2019-05-19 08:00:00+00:00,BETR801,no2,23.5,µg/m³ +Antwerpen,BE,2019-05-19 07:00:00+00:00,BETR801,no2,30.0,µg/m³ +Antwerpen,BE,2019-05-19 06:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-05-19 05:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 04:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-19 03:00:00+00:00,BETR801,no2,19.0,µg/m³ +Antwerpen,BE,2019-05-19 02:00:00+00:00,BETR801,no2,19.0,µg/m³ +Antwerpen,BE,2019-05-19 01:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-05-19 00:00:00+00:00,BETR801,no2,23.5,µg/m³ +Antwerpen,BE,2019-05-18 23:00:00+00:00,BETR801,no2,29.5,µg/m³ +Antwerpen,BE,2019-05-18 22:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-05-18 21:00:00+00:00,BETR801,no2,39.0,µg/m³ +Antwerpen,BE,2019-05-18 20:00:00+00:00,BETR801,no2,40.0,µg/m³ +Antwerpen,BE,2019-05-18 19:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-05-18 18:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-05-18 01:00:00+00:00,BETR801,no2,41.5,µg/m³ +Antwerpen,BE,2019-05-16 01:00:00+00:00,BETR801,no2,28.0,µg/m³ +Antwerpen,BE,2019-05-15 02:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-05-15 01:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-14 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-05-14 01:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-13 02:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-13 01:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-12 02:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-12 01:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-11 02:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-11 01:00:00+00:00,BETR801,no2,26.5,µg/m³ +Antwerpen,BE,2019-05-10 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-05-10 01:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-09 02:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-09 01:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-08 02:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-08 01:00:00+00:00,BETR801,no2,23.0,µg/m³ +Antwerpen,BE,2019-05-07 02:00:00+00:00,BETR801,no2,45.0,µg/m³ +Antwerpen,BE,2019-05-07 01:00:00+00:00,BETR801,no2,50.5,µg/m³ +Antwerpen,BE,2019-05-06 02:00:00+00:00,BETR801,no2,27.0,µg/m³ +Antwerpen,BE,2019-05-06 01:00:00+00:00,BETR801,no2,30.0,µg/m³ +Antwerpen,BE,2019-05-05 02:00:00+00:00,BETR801,no2,13.0,µg/m³ +Antwerpen,BE,2019-05-05 01:00:00+00:00,BETR801,no2,18.0,µg/m³ +Antwerpen,BE,2019-05-04 02:00:00+00:00,BETR801,no2,9.5,µg/m³ +Antwerpen,BE,2019-05-04 01:00:00+00:00,BETR801,no2,8.5,µg/m³ +Antwerpen,BE,2019-05-03 02:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-03 01:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-05-02 02:00:00+00:00,BETR801,no2,36.5,µg/m³ +Antwerpen,BE,2019-05-02 01:00:00+00:00,BETR801,no2,31.0,µg/m³ +Antwerpen,BE,2019-05-01 02:00:00+00:00,BETR801,no2,12.0,µg/m³ +Antwerpen,BE,2019-05-01 01:00:00+00:00,BETR801,no2,12.5,µg/m³ +Antwerpen,BE,2019-04-30 02:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-04-30 01:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-04-29 02:00:00+00:00,BETR801,no2,52.5,µg/m³ +Antwerpen,BE,2019-04-29 01:00:00+00:00,BETR801,no2,72.5,µg/m³ +Antwerpen,BE,2019-04-28 02:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-04-28 01:00:00+00:00,BETR801,no2,8.5,µg/m³ +Antwerpen,BE,2019-04-27 02:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-04-27 01:00:00+00:00,BETR801,no2,22.0,µg/m³ +Antwerpen,BE,2019-04-26 02:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-04-26 01:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-04-25 02:00:00+00:00,BETR801,no2,12.0,µg/m³ +Antwerpen,BE,2019-04-25 01:00:00+00:00,BETR801,no2,13.0,µg/m³ +Antwerpen,BE,2019-04-22 01:00:00+00:00,BETR801,no2,24.5,µg/m³ +Antwerpen,BE,2019-04-21 02:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-04-21 01:00:00+00:00,BETR801,no2,18.0,µg/m³ +Antwerpen,BE,2019-04-19 01:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-04-18 02:00:00+00:00,BETR801,no2,35.0,µg/m³ +Antwerpen,BE,2019-04-17 03:00:00+00:00,BETR801,no2,38.5,µg/m³ +Antwerpen,BE,2019-04-17 02:00:00+00:00,BETR801,no2,33.0,µg/m³ +Antwerpen,BE,2019-04-17 01:00:00+00:00,BETR801,no2,33.0,µg/m³ +Antwerpen,BE,2019-04-16 02:00:00+00:00,BETR801,no2,21.5,µg/m³ +Antwerpen,BE,2019-04-16 01:00:00+00:00,BETR801,no2,27.5,µg/m³ +Antwerpen,BE,2019-04-15 15:00:00+00:00,BETR801,no2,32.0,µg/m³ +Antwerpen,BE,2019-04-15 14:00:00+00:00,BETR801,no2,28.0,µg/m³ +Antwerpen,BE,2019-04-15 13:00:00+00:00,BETR801,no2,31.0,µg/m³ +Antwerpen,BE,2019-04-15 12:00:00+00:00,BETR801,no2,29.5,µg/m³ +Antwerpen,BE,2019-04-15 11:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-04-15 10:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-04-15 09:00:00+00:00,BETR801,no2,29.5,µg/m³ +Antwerpen,BE,2019-04-15 08:00:00+00:00,BETR801,no2,43.5,µg/m³ +Antwerpen,BE,2019-04-15 07:00:00+00:00,BETR801,no2,54.0,µg/m³ +Antwerpen,BE,2019-04-15 06:00:00+00:00,BETR801,no2,64.0,µg/m³ +Antwerpen,BE,2019-04-15 05:00:00+00:00,BETR801,no2,63.0,µg/m³ +Antwerpen,BE,2019-04-15 04:00:00+00:00,BETR801,no2,49.0,µg/m³ +Antwerpen,BE,2019-04-15 03:00:00+00:00,BETR801,no2,36.5,µg/m³ +Antwerpen,BE,2019-04-15 02:00:00+00:00,BETR801,no2,32.0,µg/m³ +Antwerpen,BE,2019-04-15 01:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-04-12 02:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-04-12 01:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-04-11 02:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-04-11 01:00:00+00:00,BETR801,no2,13.5,µg/m³ +Antwerpen,BE,2019-04-10 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-04-10 01:00:00+00:00,BETR801,no2,13.5,µg/m³ +Antwerpen,BE,2019-04-09 13:00:00+00:00,BETR801,no2,27.5,µg/m³ +Antwerpen,BE,2019-04-09 12:00:00+00:00,BETR801,no2,30.0,µg/m³ +Antwerpen,BE,2019-04-09 11:00:00+00:00,BETR801,no2,28.5,µg/m³ +Antwerpen,BE,2019-04-09 10:00:00+00:00,BETR801,no2,33.5,µg/m³ +Antwerpen,BE,2019-04-09 09:00:00+00:00,BETR801,no2,35.0,µg/m³ +Antwerpen,BE,2019-04-09 08:00:00+00:00,BETR801,no2,39.0,µg/m³ +Antwerpen,BE,2019-04-09 07:00:00+00:00,BETR801,no2,38.5,µg/m³ +Antwerpen,BE,2019-04-09 06:00:00+00:00,BETR801,no2,50.0,µg/m³ +Antwerpen,BE,2019-04-09 05:00:00+00:00,BETR801,no2,46.5,µg/m³ +Antwerpen,BE,2019-04-09 04:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-04-09 03:00:00+00:00,BETR801,no2,54.5,µg/m³ +Antwerpen,BE,2019-04-09 02:00:00+00:00,BETR801,no2,53.5,µg/m³ +Antwerpen,BE,2019-04-09 01:00:00+00:00,BETR801,no2,22.5,µg/m³ +London,GB,2019-06-17 11:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 10:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 09:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 08:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-17 07:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-17 06:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-17 05:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 04:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 03:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-17 02:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-17 01:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-17 00:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-16 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-16 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-16 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-16 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-16 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-16 17:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-16 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-16 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-16 14:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-16 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-16 12:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 11:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-16 10:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-16 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-16 08:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-16 07:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-16 06:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-16 05:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 04:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 03:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-16 02:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-16 01:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-16 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-15 23:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-15 22:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-15 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-15 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-15 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-15 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-15 17:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-15 16:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 15:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-15 13:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 12:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 11:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-15 10:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-15 09:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-15 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-15 07:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 06:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 05:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-15 04:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-15 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-14 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 16:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 15:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 14:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-14 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-14 12:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-14 11:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 10:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 09:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-14 08:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-14 07:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-14 06:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 05:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-14 04:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-14 03:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-14 02:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-14 00:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 23:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 22:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 21:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 20:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-13 19:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 18:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-13 17:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 16:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-13 15:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 12:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-13 09:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 08:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 07:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-13 06:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 05:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 04:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-13 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-13 00:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-12 23:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 21:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-12 20:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-12 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 18:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-12 17:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-12 16:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-12 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-06-12 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 12:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-12 09:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-12 08:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-12 07:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-12 06:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-12 05:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-12 04:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-12 03:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-12 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-11 23:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-11 22:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-11 21:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 19:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-11 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-11 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 15:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-11 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-11 12:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-11 11:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 10:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-11 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-11 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-11 06:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-11 05:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-11 04:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-11 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-11 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-11 01:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-11 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-10 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-10 22:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-10 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-10 19:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 18:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-10 17:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-10 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-10 15:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-10 14:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-10 13:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-06-10 12:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-10 11:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-10 10:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-10 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-10 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-10 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-10 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 05:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 04:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 03:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 02:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 01:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-10 00:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 23:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-09 21:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-09 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 19:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-09 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-09 16:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-09 15:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-09 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-09 13:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-09 12:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-09 11:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-09 10:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-09 09:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-09 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-09 07:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 06:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-09 05:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 04:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 03:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-09 02:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-09 01:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-09 00:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-08 23:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 21:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-08 20:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-08 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-08 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 17:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-08 15:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-08 14:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-08 13:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-08 12:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-08 11:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-08 10:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 09:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-08 08:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-08 07:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 06:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-08 05:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 04:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 03:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-08 02:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-08 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-07 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-07 21:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-07 20:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-07 19:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-07 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-07 16:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-07 15:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-07 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-07 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-07 12:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 11:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 10:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 09:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 08:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-07 07:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 06:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-06 23:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-06 22:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-06 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-06 19:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 18:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-06 15:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-06 14:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-06 13:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-06 12:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-06 11:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-06 10:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-06 09:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-06 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 07:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-06 06:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-06 05:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 04:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 03:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-06 02:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-06 00:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-05 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-05 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-05 21:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 20:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 18:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 17:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 15:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-05 14:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-05 13:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-05 12:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-05 11:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-05 10:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-05 09:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-05 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-05 07:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-05 06:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-05 05:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-05 04:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-05 03:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-05 02:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-05 01:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-05 00:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-04 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 21:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 20:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-04 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-04 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-04 17:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-04 16:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-04 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-04 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-04 13:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-04 12:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-04 11:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-04 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-04 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-04 08:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-04 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-04 06:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-04 05:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-04 04:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-04 03:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-04 02:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-04 01:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-04 00:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-03 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-03 20:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-03 19:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-03 18:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-03 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-03 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-03 15:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-03 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 13:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-03 12:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-03 11:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-03 10:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-03 08:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-03 07:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-03 06:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-03 05:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-03 04:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-03 03:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 02:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 01:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-03 00:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-02 23:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-02 22:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-02 21:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-02 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 19:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 18:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 15:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-02 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-02 13:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 12:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-02 11:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-02 10:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-02 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 08:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-02 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 06:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 05:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-02 04:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-02 03:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-02 02:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-02 01:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-02 00:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-01 23:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-06-01 22:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-06-01 21:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-01 20:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-01 19:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-01 18:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-01 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-01 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-01 15:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-01 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-01 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-01 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-01 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-01 10:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-01 09:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-01 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-01 07:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-01 06:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-01 05:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-01 04:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-01 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-01 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-01 01:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-01 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-31 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-31 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-31 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-31 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-31 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 16:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 15:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-31 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-31 13:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-31 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-31 11:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-31 10:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-31 09:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-31 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-05-31 07:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 06:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-05-31 05:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 04:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 03:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-05-31 02:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-05-31 01:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-31 00:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-30 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-30 22:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-30 21:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-30 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-30 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-30 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-30 14:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-30 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-30 12:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-30 11:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-05-30 10:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-30 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-30 08:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-05-30 07:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-05-30 06:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 05:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 04:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 03:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 02:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 01:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-05-30 00:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-05-29 23:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 22:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 21:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-05-29 20:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-05-29 19:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 18:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 17:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 16:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-05-29 15:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-29 13:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-05-29 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-29 11:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-29 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-29 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 03:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-29 02:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-29 01:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-29 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-28 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-28 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-28 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 19:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 17:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-28 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-28 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-28 14:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-28 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-28 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-28 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-28 09:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 08:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 07:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 06:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-28 05:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-28 04:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-28 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 01:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 00:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-27 23:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 22:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 20:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-27 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 17:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 14:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 12:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-27 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-27 10:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-27 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 08:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 06:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 03:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-27 02:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-27 01:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-27 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 21:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-26 20:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-26 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-26 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-26 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-26 13:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-26 12:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-26 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 10:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-26 09:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-26 08:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-26 07:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 06:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 05:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-26 04:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-26 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 01:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-26 00:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-25 23:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-25 22:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-25 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-25 20:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-25 19:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-25 18:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-25 17:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-25 16:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-25 15:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-25 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-25 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-25 12:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-25 11:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 08:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-25 06:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-25 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 03:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-25 02:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-25 01:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-25 00:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-24 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 21:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-24 20:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-24 19:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-24 18:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-24 17:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-24 16:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-05-24 15:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-24 14:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-24 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-24 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-24 10:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 09:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-24 08:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-24 07:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-24 06:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-24 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-24 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-24 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-23 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-23 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-23 21:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-23 20:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-05-23 19:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-05-23 18:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-05-23 17:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-05-23 16:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-05-23 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-23 14:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-23 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-23 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 11:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-23 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-23 08:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 07:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-23 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-23 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-23 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-23 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-23 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-23 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-23 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-22 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-22 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-22 18:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-22 17:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-22 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-22 14:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-22 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-22 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 09:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 08:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-22 06:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-22 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-22 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-22 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-22 01:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-22 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-21 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 22:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 21:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 20:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-21 19:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-21 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-21 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-21 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-21 15:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-21 14:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-21 12:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 10:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-21 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-21 08:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-21 07:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-21 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-21 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-21 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-21 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 01:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-21 00:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-20 23:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-20 22:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-20 21:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-20 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 19:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 18:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-20 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-20 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 15:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 14:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 08:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 07:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-20 06:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-20 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-20 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-20 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 01:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 21:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 19:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-19 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-19 17:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-19 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-19 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-19 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 10:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-19 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-19 07:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-19 06:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-19 05:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 04:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 03:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 02:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 01:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 00:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-18 23:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-18 22:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-18 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-18 20:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-18 18:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-18 17:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-18 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-18 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-18 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-18 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 12:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-18 11:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-18 10:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-18 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 06:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-18 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 01:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 23:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-17 22:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-17 21:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 20:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 17:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 15:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-17 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 11:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 10:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-17 07:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-17 06:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-17 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 03:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 02:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-17 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-16 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 22:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 19:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 18:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-16 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-16 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-16 11:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-16 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-16 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-16 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 03:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-16 02:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-16 01:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 00:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 22:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 21:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-15 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-15 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 16:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-15 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-15 14:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-15 13:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-15 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-15 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 10:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-15 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 08:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-15 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 05:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-15 04:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-15 03:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-15 02:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-15 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-14 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-14 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-14 18:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-14 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-14 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-14 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-14 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-14 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 05:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 04:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-14 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-13 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 19:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 18:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-13 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-13 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-13 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-13 14:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-13 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-13 12:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-13 10:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-13 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-13 08:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 07:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-13 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-13 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-13 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-13 03:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 02:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 01:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-13 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 23:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 22:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 21:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-12 16:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-12 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 13:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-12 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-12 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 10:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-12 09:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-12 08:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-12 07:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-12 06:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 05:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-12 04:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-12 03:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 02:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 01:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-12 00:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 23:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-11 22:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-11 21:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-11 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-11 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-11 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-11 17:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-11 16:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-11 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-11 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-11 07:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 05:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 04:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-11 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-11 01:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-11 00:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-10 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 19:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 18:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 16:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-10 15:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-10 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-10 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-10 11:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-10 08:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-10 07:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-10 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-10 05:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-10 04:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-10 03:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-10 02:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-10 01:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-05-10 00:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-05-09 23:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 22:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 21:00:00+00:00,London Westminster,no2,65.0,µg/m³ +London,GB,2019-05-09 20:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 19:00:00+00:00,London Westminster,no2,62.0,µg/m³ +London,GB,2019-05-09 18:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-05-09 17:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-05-09 16:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-05-09 15:00:00+00:00,London Westminster,no2,97.0,µg/m³ +London,GB,2019-05-09 14:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-09 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-09 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-09 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-09 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-09 09:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-09 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-09 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-09 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-09 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-08 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-08 21:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-08 19:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-08 18:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-08 17:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-08 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-08 15:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-08 14:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-08 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 12:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-08 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-08 09:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-08 08:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-08 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-08 06:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-08 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-08 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-08 01:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 00:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 23:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 20:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 17:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 16:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 15:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 14:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-07 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-07 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 09:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-07 08:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-07 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-07 06:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-07 04:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-07 03:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 02:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-06 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-06 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-06 21:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-06 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 19:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 18:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 17:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 15:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-06 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-06 13:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-06 11:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-06 10:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-06 09:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-06 06:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-06 05:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-06 04:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-06 03:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 02:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-06 01:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-06 00:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-05 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-05 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-05 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-05 20:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-05 19:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-05 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-05 17:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-05 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-05 15:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-05 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-05 13:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-05 12:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-05 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-05 10:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-05 08:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-05 07:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 06:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 05:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-05 04:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-05 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-05 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-05 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-04 23:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-04 22:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-04 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-04 20:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-04 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-04 18:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-04 17:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-04 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-04 15:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-04 13:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-04 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 08:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-04 07:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-04 06:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-04 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-04 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-04 03:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-04 02:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-04 01:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-04 00:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-03 23:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-03 22:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-03 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-03 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-03 19:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-03 18:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-03 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-03 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 15:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-03 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-03 13:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-03 12:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-03 11:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-05-03 10:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-03 09:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-03 08:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-03 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-03 06:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-03 05:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 04:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 03:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 02:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-03 01:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-03 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-02 23:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-02 22:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-02 21:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-05-02 20:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-05-02 19:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-02 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-02 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-02 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-02 15:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-02 14:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-02 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-02 12:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-02 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-02 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-02 09:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-02 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-02 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-02 06:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-02 05:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-02 04:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-02 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-02 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-02 01:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-02 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-01 23:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-01 22:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-01 21:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 20:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-01 19:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-01 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-01 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-01 15:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-01 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 13:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-01 12:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-01 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 10:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-01 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-01 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-01 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-01 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-01 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-01 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-01 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-01 00:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-30 23:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 22:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 21:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-30 20:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-04-30 19:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-30 18:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-30 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-30 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-30 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-30 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-30 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-30 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-30 11:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-30 10:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-30 09:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-30 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-30 07:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-30 06:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-30 05:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 04:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 03:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 02:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-30 01:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-30 00:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-29 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-29 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-29 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-29 20:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-29 19:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-29 18:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-29 17:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-29 16:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-29 15:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-29 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-29 13:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-29 12:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-29 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-29 10:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-29 09:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-29 08:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-29 07:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-29 06:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-29 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-29 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-29 03:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-29 02:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-29 01:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-29 00:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-28 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-28 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-28 21:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-28 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-28 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-28 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-28 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-28 16:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 15:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 14:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-28 13:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 12:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 11:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-28 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-28 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-04-27 13:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-04-27 12:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-27 11:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-27 10:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-27 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-04-27 08:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-04-27 07:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-04-27 06:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-04-27 05:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-04-27 04:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-04-27 03:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-04-27 02:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-04-27 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-04-26 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-04-26 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-04-26 21:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-04-26 20:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-26 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-26 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-26 17:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-04-26 16:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-26 15:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-26 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-26 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-26 12:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-26 11:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-04-26 10:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-26 09:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-26 08:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-26 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-26 06:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-26 05:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-26 04:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-26 03:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-26 02:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-26 01:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-26 00:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-25 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-25 20:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-25 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-04-25 18:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-25 17:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-25 16:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-25 15:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-25 13:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-04-25 12:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-25 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 10:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 09:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-25 08:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-25 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-25 06:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-25 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 03:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 02:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-25 00:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-24 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-24 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-24 21:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-24 20:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-24 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-24 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-24 17:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-24 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-24 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-24 14:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-24 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-24 12:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-24 11:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-24 10:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-24 09:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-24 08:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-24 07:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-24 06:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-24 05:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-24 04:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-24 03:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-24 02:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-24 00:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-23 23:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-23 22:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-23 21:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-23 20:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-23 19:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-23 18:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-23 17:00:00+00:00,London Westminster,no2,62.0,µg/m³ +London,GB,2019-04-23 16:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-23 15:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-23 14:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-23 13:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-23 12:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-04-23 11:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-04-23 10:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-23 09:00:00+00:00,London Westminster,no2,61.0,µg/m³ +London,GB,2019-04-23 08:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-23 07:00:00+00:00,London Westminster,no2,62.0,µg/m³ +London,GB,2019-04-23 06:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-23 05:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-23 04:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-23 03:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-23 02:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-23 01:00:00+00:00,London Westminster,no2,75.0,µg/m³ +London,GB,2019-04-23 00:00:00+00:00,London Westminster,no2,75.0,µg/m³ +London,GB,2019-04-22 23:00:00+00:00,London Westminster,no2,84.0,µg/m³ +London,GB,2019-04-22 22:00:00+00:00,London Westminster,no2,84.0,µg/m³ +London,GB,2019-04-22 21:00:00+00:00,London Westminster,no2,73.0,µg/m³ +London,GB,2019-04-22 20:00:00+00:00,London Westminster,no2,66.0,µg/m³ +London,GB,2019-04-22 19:00:00+00:00,London Westminster,no2,66.0,µg/m³ +London,GB,2019-04-22 18:00:00+00:00,London Westminster,no2,64.0,µg/m³ +London,GB,2019-04-22 17:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-22 16:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-22 15:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-22 14:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-22 13:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-22 12:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-22 11:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-22 10:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-22 09:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-22 08:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-22 07:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-22 06:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-22 05:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-22 04:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-22 03:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-22 02:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-22 01:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-22 00:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 23:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 22:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 21:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-21 20:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-21 19:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-21 18:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-21 17:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-21 16:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-21 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-21 14:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-21 13:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-21 12:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-21 11:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 10:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-21 09:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-04-21 08:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-21 07:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-21 06:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-21 05:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-21 04:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-21 03:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-21 02:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-21 01:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-21 00:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-20 23:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-20 22:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-20 21:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-20 20:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-20 19:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-20 18:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-20 17:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-20 16:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-20 15:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-20 14:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-20 13:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-20 12:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-20 11:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-20 10:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-20 09:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-20 08:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-20 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-20 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-20 05:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-20 04:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-20 03:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-20 02:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-20 01:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-20 00:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-19 23:00:00+00:00,London Westminster,no2,77.0,µg/m³ +London,GB,2019-04-19 22:00:00+00:00,London Westminster,no2,77.0,µg/m³ +London,GB,2019-04-19 21:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-19 20:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-04-19 19:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-19 18:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-19 17:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-19 16:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-19 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-19 14:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-19 13:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-19 12:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-19 11:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-19 10:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-19 09:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-19 08:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-19 07:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-19 06:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-19 05:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-04-19 04:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-04-19 03:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-19 02:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-19 00:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-04-18 23:00:00+00:00,London Westminster,no2,61.0,µg/m³ +London,GB,2019-04-18 22:00:00+00:00,London Westminster,no2,61.0,µg/m³ +London,GB,2019-04-18 21:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-18 20:00:00+00:00,London Westminster,no2,69.0,µg/m³ +London,GB,2019-04-18 19:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-18 18:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-18 17:00:00+00:00,London Westminster,no2,56.0,µg/m³ +London,GB,2019-04-18 16:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-18 15:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-18 14:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 13:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-18 12:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-18 11:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-18 10:00:00+00:00,London Westminster,no2,56.0,µg/m³ +London,GB,2019-04-18 09:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-18 08:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 07:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 06:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-18 05:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-18 04:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-18 03:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 02:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 01:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-18 00:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 23:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-17 22:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-17 21:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-17 20:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-17 19:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-17 18:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-17 17:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-17 16:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-17 15:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-17 14:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-04-17 13:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 12:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-04-17 11:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-04-17 10:00:00+00:00,London Westminster,no2,56.0,µg/m³ +London,GB,2019-04-17 09:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 08:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-17 07:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-17 06:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-17 05:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 04:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-17 03:00:00+00:00,London Westminster,no2,72.0,µg/m³ +London,GB,2019-04-17 02:00:00+00:00,London Westminster,no2,72.0,µg/m³ +London,GB,2019-04-17 00:00:00+00:00,London Westminster,no2,71.0,µg/m³ +London,GB,2019-04-16 23:00:00+00:00,London Westminster,no2,81.0,µg/m³ +London,GB,2019-04-16 22:00:00+00:00,London Westminster,no2,81.0,µg/m³ +London,GB,2019-04-16 21:00:00+00:00,London Westminster,no2,84.0,µg/m³ +London,GB,2019-04-16 20:00:00+00:00,London Westminster,no2,83.0,µg/m³ +London,GB,2019-04-16 19:00:00+00:00,London Westminster,no2,76.0,µg/m³ +London,GB,2019-04-16 18:00:00+00:00,London Westminster,no2,70.0,µg/m³ +London,GB,2019-04-16 17:00:00+00:00,London Westminster,no2,65.0,µg/m³ +London,GB,2019-04-16 15:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-16 14:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-16 13:00:00+00:00,London Westminster,no2,63.0,µg/m³ +London,GB,2019-04-16 12:00:00+00:00,London Westminster,no2,75.0,µg/m³ +London,GB,2019-04-16 11:00:00+00:00,London Westminster,no2,79.0,µg/m³ +London,GB,2019-04-16 10:00:00+00:00,London Westminster,no2,70.0,µg/m³ +London,GB,2019-04-16 09:00:00+00:00,London Westminster,no2,66.0,µg/m³ +London,GB,2019-04-16 08:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-16 07:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-16 06:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-04-16 05:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-16 04:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-16 03:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-16 02:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-16 00:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-15 23:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-15 22:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-15 21:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-15 20:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-15 19:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-15 18:00:00+00:00,London Westminster,no2,48.0,µg/m³ +London,GB,2019-04-15 17:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-15 16:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-15 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-15 14:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-15 13:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-15 12:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-15 11:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-15 10:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-15 09:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-15 08:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-15 07:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-15 06:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-15 05:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-15 04:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-15 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-15 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-15 01:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-15 00:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-14 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-14 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-14 21:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-14 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-14 19:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-14 18:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-14 17:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-14 16:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-04-14 15:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-04-14 14:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-04-14 13:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-14 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-14 11:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-14 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-14 09:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-14 08:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-14 07:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-14 06:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-14 05:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-14 04:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-14 03:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-14 02:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-14 01:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-14 00:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-13 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-13 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-13 21:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-13 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 19:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-13 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-04-13 17:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-13 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-13 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 14:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-13 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 11:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-13 10:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-13 09:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-13 08:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-13 07:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-04-13 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-13 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 03:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-13 02:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-13 01:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-13 00:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-12 23:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-12 22:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-12 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-12 20:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-12 19:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-12 18:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-12 17:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-12 16:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-12 15:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-12 14:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-12 13:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-12 12:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-12 11:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-12 10:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-12 09:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-12 08:00:00+00:00,London Westminster,no2,57.0,µg/m³ +London,GB,2019-04-12 07:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-12 06:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-12 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-12 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-12 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-12 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-11 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-11 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-11 21:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-11 20:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-11 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-04-11 18:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-04-11 17:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-11 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-11 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-11 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-11 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-04-11 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-11 10:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-11 09:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-11 08:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-11 07:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-04-11 06:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-04-11 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 03:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 02:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-04-11 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-10 23:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-10 22:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-10 21:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-10 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-10 19:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-10 18:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-10 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-10 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-04-10 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-10 14:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-04-10 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-04-10 12:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-10 11:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-04-10 10:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-04-10 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-04-10 08:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-10 07:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-10 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-10 05:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-10 04:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-04-10 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-10 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-04-10 01:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-10 00:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-04-09 23:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-09 22:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-04-09 21:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-09 20:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-04-09 19:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-09 18:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-04-09 17:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-04-09 16:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-04-09 15:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-04-09 14:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-04-09 13:00:00+00:00,London Westminster,no2,56.0,µg/m³ +London,GB,2019-04-09 12:00:00+00:00,London Westminster,no2,55.0,µg/m³ +London,GB,2019-04-09 11:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-09 10:00:00+00:00,London Westminster,no2,50.0,µg/m³ +London,GB,2019-04-09 09:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-04-09 08:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-04-09 07:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-04-09 06:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-09 05:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-09 04:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-04-09 03:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-04-09 02:00:00+00:00,London Westminster,no2,67.0,µg/m³ diff --git a/doc/data/air_quality_no2.csv b/doc/data/air_quality_no2.csv new file mode 100644 index 00000000..7fa879f7 --- /dev/null +++ b/doc/data/air_quality_no2.csv @@ -0,0 +1,1036 @@ +datetime,station_antwerp,station_paris,station_london +2019-05-07 02:00:00,,,23.0 +2019-05-07 03:00:00,50.5,25.0,19.0 +2019-05-07 04:00:00,45.0,27.7,19.0 +2019-05-07 05:00:00,,50.4,16.0 +2019-05-07 06:00:00,,61.9, +2019-05-07 07:00:00,,72.4,26.0 +2019-05-07 08:00:00,,77.7,32.0 +2019-05-07 09:00:00,,67.9,32.0 +2019-05-07 10:00:00,,56.0,28.0 +2019-05-07 11:00:00,,34.5,21.0 +2019-05-07 12:00:00,,20.1,21.0 +2019-05-07 13:00:00,,13.0,18.0 +2019-05-07 14:00:00,,10.6,20.0 +2019-05-07 15:00:00,,13.2,18.0 +2019-05-07 16:00:00,,11.0,20.0 +2019-05-07 17:00:00,,11.7,20.0 +2019-05-07 18:00:00,,18.2,21.0 +2019-05-07 19:00:00,,22.3,20.0 +2019-05-07 20:00:00,,21.4,20.0 +2019-05-07 21:00:00,,26.8,24.0 +2019-05-07 22:00:00,,36.2,24.0 +2019-05-07 23:00:00,,33.9, +2019-05-08 00:00:00,,35.8,24.0 +2019-05-08 01:00:00,,34.0,19.0 +2019-05-08 02:00:00,,22.1,19.0 +2019-05-08 03:00:00,23.0,19.6,20.0 +2019-05-08 04:00:00,20.5,15.3,20.0 +2019-05-08 05:00:00,,13.5,19.0 +2019-05-08 06:00:00,,15.5,19.0 +2019-05-08 07:00:00,,19.3,29.0 +2019-05-08 08:00:00,,21.7,34.0 +2019-05-08 09:00:00,,19.5,36.0 +2019-05-08 10:00:00,,17.0,33.0 +2019-05-08 11:00:00,,19.7,28.0 +2019-05-08 12:00:00,,33.4,27.0 +2019-05-08 13:00:00,,21.4,26.0 +2019-05-08 14:00:00,,15.1,26.0 +2019-05-08 15:00:00,,14.3,24.0 +2019-05-08 16:00:00,,25.3,27.0 +2019-05-08 17:00:00,,26.0,28.0 +2019-05-08 18:00:00,,38.6,31.0 +2019-05-08 19:00:00,,29.3,40.0 +2019-05-08 20:00:00,,27.8,25.0 +2019-05-08 21:00:00,,41.3,29.0 +2019-05-08 22:00:00,,38.3,26.0 +2019-05-08 23:00:00,,48.9, +2019-05-09 00:00:00,,32.2,25.0 +2019-05-09 01:00:00,,25.2,30.0 +2019-05-09 02:00:00,,14.7, +2019-05-09 03:00:00,20.0,10.6,31.0 +2019-05-09 04:00:00,20.5,10.0,31.0 +2019-05-09 05:00:00,,10.4,33.0 +2019-05-09 06:00:00,,15.3,33.0 +2019-05-09 07:00:00,,34.5,33.0 +2019-05-09 08:00:00,,50.7,33.0 +2019-05-09 09:00:00,,49.0,35.0 +2019-05-09 10:00:00,,32.2,36.0 +2019-05-09 11:00:00,,32.3,28.0 +2019-05-09 12:00:00,,43.1,27.0 +2019-05-09 13:00:00,,34.2,30.0 +2019-05-09 14:00:00,,35.1,27.0 +2019-05-09 15:00:00,,21.3,34.0 +2019-05-09 16:00:00,,24.6,97.0 +2019-05-09 17:00:00,,23.9,67.0 +2019-05-09 18:00:00,,27.0,60.0 +2019-05-09 19:00:00,,29.9,58.0 +2019-05-09 20:00:00,,24.4,62.0 +2019-05-09 21:00:00,,23.8,59.0 +2019-05-09 22:00:00,,29.2,65.0 +2019-05-09 23:00:00,,34.5,59.0 +2019-05-10 00:00:00,,29.7,59.0 +2019-05-10 01:00:00,,26.7,52.0 +2019-05-10 02:00:00,,22.7,52.0 +2019-05-10 03:00:00,10.5,19.1,41.0 +2019-05-10 04:00:00,11.5,14.1,41.0 +2019-05-10 05:00:00,,15.0,40.0 +2019-05-10 06:00:00,,20.5,40.0 +2019-05-10 07:00:00,,37.8,39.0 +2019-05-10 08:00:00,,47.4,36.0 +2019-05-10 09:00:00,,57.3,39.0 +2019-05-10 10:00:00,,60.7,34.0 +2019-05-10 11:00:00,,53.4,31.0 +2019-05-10 12:00:00,,35.1,29.0 +2019-05-10 13:00:00,,23.2,28.0 +2019-05-10 14:00:00,,25.3,26.0 +2019-05-10 15:00:00,,22.0,25.0 +2019-05-10 16:00:00,,29.3,25.0 +2019-05-10 17:00:00,,29.6,24.0 +2019-05-10 18:00:00,,30.8,26.0 +2019-05-10 19:00:00,,37.8,26.0 +2019-05-10 20:00:00,,33.4,29.0 +2019-05-10 21:00:00,,39.3,29.0 +2019-05-10 22:00:00,,43.6,29.0 +2019-05-10 23:00:00,,37.0,31.0 +2019-05-11 00:00:00,,28.1,31.0 +2019-05-11 01:00:00,,26.0,27.0 +2019-05-11 02:00:00,,24.8,27.0 +2019-05-11 03:00:00,26.5,15.5,32.0 +2019-05-11 04:00:00,21.0,14.9,32.0 +2019-05-11 05:00:00,,,35.0 +2019-05-11 06:00:00,,,35.0 +2019-05-11 07:00:00,,,30.0 +2019-05-11 08:00:00,,28.9,30.0 +2019-05-11 09:00:00,,29.0,27.0 +2019-05-11 10:00:00,,32.1,30.0 +2019-05-11 11:00:00,,35.7, +2019-05-11 12:00:00,,36.8, +2019-05-11 13:00:00,,33.2, +2019-05-11 14:00:00,,30.2, +2019-05-11 15:00:00,,30.8, +2019-05-11 16:00:00,,17.8,28.0 +2019-05-11 17:00:00,,18.0,26.0 +2019-05-11 18:00:00,,19.5,28.0 +2019-05-11 19:00:00,,32.0,31.0 +2019-05-11 20:00:00,,33.1,33.0 +2019-05-11 21:00:00,,31.2,33.0 +2019-05-11 22:00:00,,24.2,34.0 +2019-05-11 23:00:00,,21.1,37.0 +2019-05-12 00:00:00,,27.7,37.0 +2019-05-12 01:00:00,,26.4,35.0 +2019-05-12 02:00:00,,22.8,35.0 +2019-05-12 03:00:00,17.5,19.2,38.0 +2019-05-12 04:00:00,20.0,17.2,38.0 +2019-05-12 05:00:00,,16.0,36.0 +2019-05-12 06:00:00,,16.2,36.0 +2019-05-12 07:00:00,,19.2,38.0 +2019-05-12 08:00:00,,20.1,44.0 +2019-05-12 09:00:00,,15.9,32.0 +2019-05-12 10:00:00,,14.6,26.0 +2019-05-12 11:00:00,,11.7,26.0 +2019-05-12 12:00:00,,11.4,21.0 +2019-05-12 13:00:00,,11.4,20.0 +2019-05-12 14:00:00,,10.9,19.0 +2019-05-12 15:00:00,,8.7,21.0 +2019-05-12 16:00:00,,9.1,22.0 +2019-05-12 17:00:00,,9.6,23.0 +2019-05-12 18:00:00,,11.7,24.0 +2019-05-12 19:00:00,,13.9,22.0 +2019-05-12 20:00:00,,18.2,22.0 +2019-05-12 21:00:00,,19.5,22.0 +2019-05-12 22:00:00,,24.1,21.0 +2019-05-12 23:00:00,,34.2,22.0 +2019-05-13 00:00:00,,46.5,22.0 +2019-05-13 01:00:00,,32.5,22.0 +2019-05-13 02:00:00,,25.0,22.0 +2019-05-13 03:00:00,14.5,18.9,24.0 +2019-05-13 04:00:00,14.5,18.5,24.0 +2019-05-13 05:00:00,,18.9,33.0 +2019-05-13 06:00:00,,25.1,33.0 +2019-05-13 07:00:00,,38.3,39.0 +2019-05-13 08:00:00,,45.2,39.0 +2019-05-13 09:00:00,,41.0,31.0 +2019-05-13 10:00:00,,32.1,29.0 +2019-05-13 11:00:00,,20.6,27.0 +2019-05-13 12:00:00,,12.8,26.0 +2019-05-13 13:00:00,,9.6,24.0 +2019-05-13 14:00:00,,9.2,25.0 +2019-05-13 15:00:00,,10.1,26.0 +2019-05-13 16:00:00,,10.7,28.0 +2019-05-13 17:00:00,,10.6,29.0 +2019-05-13 18:00:00,,12.1,30.0 +2019-05-13 19:00:00,,13.0,30.0 +2019-05-13 20:00:00,,15.5,31.0 +2019-05-13 21:00:00,,23.9,31.0 +2019-05-13 22:00:00,,28.3,31.0 +2019-05-13 23:00:00,,30.4,31.0 +2019-05-14 00:00:00,,27.3,31.0 +2019-05-14 01:00:00,,22.8,23.0 +2019-05-14 02:00:00,,20.9,23.0 +2019-05-14 03:00:00,14.5,19.1,26.0 +2019-05-14 04:00:00,11.5,19.0,26.0 +2019-05-14 05:00:00,,22.1,30.0 +2019-05-14 06:00:00,,31.6,30.0 +2019-05-14 07:00:00,,38.6,33.0 +2019-05-14 08:00:00,,46.1,34.0 +2019-05-14 09:00:00,,41.3,33.0 +2019-05-14 10:00:00,,28.8,30.0 +2019-05-14 11:00:00,,19.0,31.0 +2019-05-14 12:00:00,,12.9,27.0 +2019-05-14 13:00:00,,11.3,25.0 +2019-05-14 14:00:00,,10.2,25.0 +2019-05-14 15:00:00,,11.0,25.0 +2019-05-14 16:00:00,,15.2,29.0 +2019-05-14 17:00:00,,13.4,32.0 +2019-05-14 18:00:00,,15.3,33.0 +2019-05-14 19:00:00,,17.7,30.0 +2019-05-14 20:00:00,,17.9,28.0 +2019-05-14 21:00:00,,23.3,27.0 +2019-05-14 22:00:00,,28.4,25.0 +2019-05-14 23:00:00,,29.0,26.0 +2019-05-15 00:00:00,,30.9,26.0 +2019-05-15 01:00:00,,24.3,22.0 +2019-05-15 02:00:00,,18.8, +2019-05-15 03:00:00,25.5,17.2,22.0 +2019-05-15 04:00:00,22.5,16.8,22.0 +2019-05-15 05:00:00,,17.9,25.0 +2019-05-15 06:00:00,,28.9,25.0 +2019-05-15 07:00:00,,46.5,33.0 +2019-05-15 08:00:00,,48.1,33.0 +2019-05-15 09:00:00,,32.1,34.0 +2019-05-15 10:00:00,,25.7,35.0 +2019-05-15 11:00:00,,0.0,36.0 +2019-05-15 12:00:00,,0.0,35.0 +2019-05-15 13:00:00,,0.0,30.0 +2019-05-15 14:00:00,,9.4,31.0 +2019-05-15 15:00:00,,10.0,30.0 +2019-05-15 16:00:00,,11.9,38.0 +2019-05-15 17:00:00,,12.9,38.0 +2019-05-15 18:00:00,,12.2,33.0 +2019-05-15 19:00:00,,12.9,35.0 +2019-05-15 20:00:00,,16.5,33.0 +2019-05-15 21:00:00,,20.3,31.0 +2019-05-15 22:00:00,,30.1,32.0 +2019-05-15 23:00:00,,36.0,33.0 +2019-05-16 00:00:00,,44.1,33.0 +2019-05-16 01:00:00,,30.9,33.0 +2019-05-16 02:00:00,,27.4,33.0 +2019-05-16 03:00:00,28.0,26.0,28.0 +2019-05-16 04:00:00,,26.7,28.0 +2019-05-16 05:00:00,,27.9,26.0 +2019-05-16 06:00:00,,37.0,26.0 +2019-05-16 07:00:00,,52.6,33.0 +2019-05-16 08:00:00,,,34.0 +2019-05-16 09:00:00,,40.0,33.0 +2019-05-16 10:00:00,,39.4,32.0 +2019-05-16 11:00:00,,29.5,31.0 +2019-05-16 12:00:00,,13.5,33.0 +2019-05-16 13:00:00,,10.5,30.0 +2019-05-16 14:00:00,,9.2,27.0 +2019-05-16 15:00:00,,8.5,27.0 +2019-05-16 16:00:00,,8.1,26.0 +2019-05-16 17:00:00,,10.1,29.0 +2019-05-16 18:00:00,,10.3,30.0 +2019-05-16 19:00:00,,13.5,25.0 +2019-05-16 20:00:00,,15.9,27.0 +2019-05-16 21:00:00,,14.4,26.0 +2019-05-16 22:00:00,,24.8,25.0 +2019-05-16 23:00:00,,24.3,25.0 +2019-05-17 00:00:00,,37.1,25.0 +2019-05-17 01:00:00,,43.7,23.0 +2019-05-17 02:00:00,,46.3,23.0 +2019-05-17 03:00:00,,26.1,21.0 +2019-05-17 04:00:00,,24.6,21.0 +2019-05-17 05:00:00,,26.6,21.0 +2019-05-17 06:00:00,,28.4,21.0 +2019-05-17 07:00:00,,34.0,25.0 +2019-05-17 08:00:00,,46.3,27.0 +2019-05-17 09:00:00,,55.0,27.0 +2019-05-17 10:00:00,,57.5,29.0 +2019-05-17 11:00:00,,60.5,30.0 +2019-05-17 12:00:00,,51.5,30.0 +2019-05-17 13:00:00,,43.1,30.0 +2019-05-17 14:00:00,,46.5,29.0 +2019-05-17 15:00:00,,37.9,31.0 +2019-05-17 16:00:00,,27.0,32.0 +2019-05-17 17:00:00,,22.2,30.0 +2019-05-17 18:00:00,,20.7,29.0 +2019-05-17 19:00:00,,27.9,31.0 +2019-05-17 20:00:00,,33.6,36.0 +2019-05-17 21:00:00,,24.7,36.0 +2019-05-17 22:00:00,,23.5,36.0 +2019-05-17 23:00:00,,24.3,35.0 +2019-05-18 00:00:00,,28.2,35.0 +2019-05-18 01:00:00,,34.1,31.0 +2019-05-18 02:00:00,,31.5,31.0 +2019-05-18 03:00:00,41.5,37.4,31.0 +2019-05-18 04:00:00,,29.0,31.0 +2019-05-18 05:00:00,,16.1,29.0 +2019-05-18 06:00:00,,16.6,29.0 +2019-05-18 07:00:00,,20.1,27.0 +2019-05-18 08:00:00,,22.1,29.0 +2019-05-18 09:00:00,,27.4,35.0 +2019-05-18 10:00:00,,20.4,32.0 +2019-05-18 11:00:00,,21.1,35.0 +2019-05-18 12:00:00,,24.1,34.0 +2019-05-18 13:00:00,,17.5,38.0 +2019-05-18 14:00:00,,12.9,29.0 +2019-05-18 15:00:00,,10.5,27.0 +2019-05-18 16:00:00,,11.8,28.0 +2019-05-18 17:00:00,,13.0,30.0 +2019-05-18 18:00:00,,14.6,42.0 +2019-05-18 19:00:00,,12.8,42.0 +2019-05-18 20:00:00,35.5,14.5,36.0 +2019-05-18 21:00:00,35.5,67.5,35.0 +2019-05-18 22:00:00,40.0,36.2,41.0 +2019-05-18 23:00:00,39.0,59.3,46.0 +2019-05-19 00:00:00,34.5,62.5,46.0 +2019-05-19 01:00:00,29.5,50.2,49.0 +2019-05-19 02:00:00,23.5,49.6,49.0 +2019-05-19 03:00:00,22.5,34.9,49.0 +2019-05-19 04:00:00,19.0,38.1,49.0 +2019-05-19 05:00:00,19.0,36.4,49.0 +2019-05-19 06:00:00,21.0,39.4,49.0 +2019-05-19 07:00:00,26.0,40.9,38.0 +2019-05-19 08:00:00,30.5,31.1,36.0 +2019-05-19 09:00:00,30.0,32.4,33.0 +2019-05-19 10:00:00,23.5,31.7,30.0 +2019-05-19 11:00:00,16.0,33.0,27.0 +2019-05-19 12:00:00,17.5,31.0,28.0 +2019-05-19 13:00:00,17.0,32.6,25.0 +2019-05-19 14:00:00,16.0,27.9,27.0 +2019-05-19 15:00:00,14.5,21.0,31.0 +2019-05-19 16:00:00,23.0,23.8,29.0 +2019-05-19 17:00:00,33.0,31.7,28.0 +2019-05-19 18:00:00,17.5,32.5,27.0 +2019-05-19 19:00:00,18.5,33.9,29.0 +2019-05-19 20:00:00,15.5,32.7,30.0 +2019-05-19 21:00:00,26.0,51.2,32.0 +2019-05-19 22:00:00,15.0,35.6,32.0 +2019-05-19 23:00:00,12.5,23.2,32.0 +2019-05-20 00:00:00,18.5,22.2,32.0 +2019-05-20 01:00:00,16.5,18.8,28.0 +2019-05-20 02:00:00,26.0,16.4,28.0 +2019-05-20 03:00:00,17.0,12.8,32.0 +2019-05-20 04:00:00,10.5,12.1,32.0 +2019-05-20 05:00:00,9.0,12.6,26.0 +2019-05-20 06:00:00,14.0,14.9,26.0 +2019-05-20 07:00:00,20.0,25.2,31.0 +2019-05-20 08:00:00,26.0,40.1,31.0 +2019-05-20 09:00:00,38.0,46.9,29.0 +2019-05-20 10:00:00,40.0,46.1,29.0 +2019-05-20 11:00:00,30.5,45.5,28.0 +2019-05-20 12:00:00,25.0,43.9,28.0 +2019-05-20 13:00:00,25.0,35.4,28.0 +2019-05-20 14:00:00,34.5,23.8,29.0 +2019-05-20 15:00:00,32.0,23.7,32.0 +2019-05-20 16:00:00,24.5,27.5,32.0 +2019-05-20 17:00:00,25.5,26.5,29.0 +2019-05-20 18:00:00,,32.4,30.0 +2019-05-20 19:00:00,,24.6,33.0 +2019-05-20 20:00:00,,32.2,32.0 +2019-05-20 21:00:00,,21.3,32.0 +2019-05-20 22:00:00,,21.6,34.0 +2019-05-20 23:00:00,,20.3,47.0 +2019-05-21 00:00:00,,20.7,47.0 +2019-05-21 01:00:00,,19.6,35.0 +2019-05-21 02:00:00,,16.9,35.0 +2019-05-21 03:00:00,15.5,16.3,26.0 +2019-05-21 04:00:00,,17.7,26.0 +2019-05-21 05:00:00,,17.9,23.0 +2019-05-21 06:00:00,,18.5,23.0 +2019-05-21 07:00:00,,38.0,30.0 +2019-05-21 08:00:00,,62.6,27.0 +2019-05-21 09:00:00,,56.0,28.0 +2019-05-21 10:00:00,,54.2,29.0 +2019-05-21 11:00:00,,48.1,29.0 +2019-05-21 12:00:00,,30.4,26.0 +2019-05-21 13:00:00,,25.5,26.0 +2019-05-21 14:00:00,,30.5,28.0 +2019-05-21 15:00:00,,49.7,33.0 +2019-05-21 16:00:00,,47.8,34.0 +2019-05-21 17:00:00,,36.6,34.0 +2019-05-21 18:00:00,,42.3,37.0 +2019-05-21 19:00:00,,75.0,35.0 +2019-05-21 20:00:00,,54.3,40.0 +2019-05-21 21:00:00,,50.0,38.0 +2019-05-21 22:00:00,,40.8,33.0 +2019-05-21 23:00:00,,43.0,33.0 +2019-05-22 00:00:00,,33.2,33.0 +2019-05-22 01:00:00,,29.5,30.0 +2019-05-22 02:00:00,,27.1,30.0 +2019-05-22 03:00:00,20.5,27.9,27.0 +2019-05-22 04:00:00,,19.2,27.0 +2019-05-22 05:00:00,,25.2,21.0 +2019-05-22 06:00:00,,33.7,21.0 +2019-05-22 07:00:00,,45.1,28.0 +2019-05-22 08:00:00,,75.7,29.0 +2019-05-22 09:00:00,,75.4,31.0 +2019-05-22 10:00:00,,70.8,31.0 +2019-05-22 11:00:00,,63.1,31.0 +2019-05-22 12:00:00,,57.8,28.0 +2019-05-22 13:00:00,,42.6,25.0 +2019-05-22 14:00:00,,42.2,25.0 +2019-05-22 15:00:00,,38.5,28.0 +2019-05-22 16:00:00,,40.0,30.0 +2019-05-22 17:00:00,,33.2,32.0 +2019-05-22 18:00:00,,34.9,34.0 +2019-05-22 19:00:00,,36.1,34.0 +2019-05-22 20:00:00,,34.1,33.0 +2019-05-22 21:00:00,,36.2,33.0 +2019-05-22 22:00:00,,44.9,31.0 +2019-05-22 23:00:00,,37.7,32.0 +2019-05-23 00:00:00,,29.8,32.0 +2019-05-23 01:00:00,,62.1,23.0 +2019-05-23 02:00:00,,53.3,23.0 +2019-05-23 03:00:00,60.5,53.1,20.0 +2019-05-23 04:00:00,,66.6,20.0 +2019-05-23 05:00:00,,76.8,19.0 +2019-05-23 06:00:00,,71.9,19.0 +2019-05-23 07:00:00,,68.7,24.0 +2019-05-23 08:00:00,,79.6,26.0 +2019-05-23 09:00:00,,91.8,25.0 +2019-05-23 10:00:00,,97.0,23.0 +2019-05-23 11:00:00,,79.4,25.0 +2019-05-23 12:00:00,,28.3,24.0 +2019-05-23 13:00:00,,17.0,25.0 +2019-05-23 14:00:00,,16.4,28.0 +2019-05-23 15:00:00,,21.2,34.0 +2019-05-23 16:00:00,,17.2,38.0 +2019-05-23 17:00:00,,17.5,53.0 +2019-05-23 18:00:00,,17.8,60.0 +2019-05-23 19:00:00,,22.7,54.0 +2019-05-23 20:00:00,,23.5,51.0 +2019-05-23 21:00:00,,28.0,45.0 +2019-05-23 22:00:00,,33.8,44.0 +2019-05-23 23:00:00,,47.0,39.0 +2019-05-24 00:00:00,,61.9,39.0 +2019-05-24 01:00:00,,23.2,31.0 +2019-05-24 02:00:00,,32.8, +2019-05-24 03:00:00,74.5,28.8,31.0 +2019-05-24 04:00:00,,28.4,31.0 +2019-05-24 05:00:00,,19.4,23.0 +2019-05-24 06:00:00,,28.1,23.0 +2019-05-24 07:00:00,,35.9,29.0 +2019-05-24 08:00:00,,40.7,28.0 +2019-05-24 09:00:00,,54.8,26.0 +2019-05-24 10:00:00,,45.9,24.0 +2019-05-24 11:00:00,,37.9,23.0 +2019-05-24 12:00:00,,28.6,26.0 +2019-05-24 13:00:00,,40.6,29.0 +2019-05-24 14:00:00,,29.3,33.0 +2019-05-24 15:00:00,,24.3,39.0 +2019-05-24 16:00:00,,20.5,40.0 +2019-05-24 17:00:00,,22.7,43.0 +2019-05-24 18:00:00,,27.3,46.0 +2019-05-24 19:00:00,,25.2,46.0 +2019-05-24 20:00:00,,23.3,44.0 +2019-05-24 21:00:00,,21.9,42.0 +2019-05-24 22:00:00,,31.7,38.0 +2019-05-24 23:00:00,,18.1,39.0 +2019-05-25 00:00:00,,18.0,39.0 +2019-05-25 01:00:00,,16.5,32.0 +2019-05-25 02:00:00,,17.4,32.0 +2019-05-25 03:00:00,29.0,12.8,25.0 +2019-05-25 04:00:00,,20.3,25.0 +2019-05-25 05:00:00,,,21.0 +2019-05-25 06:00:00,,,21.0 +2019-05-25 07:00:00,,,22.0 +2019-05-25 08:00:00,,36.9,22.0 +2019-05-25 09:00:00,,42.1,23.0 +2019-05-25 10:00:00,,44.5,23.0 +2019-05-25 11:00:00,,33.6,21.0 +2019-05-25 12:00:00,,26.3,23.0 +2019-05-25 13:00:00,,19.5,24.0 +2019-05-25 14:00:00,,18.6,26.0 +2019-05-25 15:00:00,,26.1,31.0 +2019-05-25 16:00:00,,23.6,37.0 +2019-05-25 17:00:00,,30.0,42.0 +2019-05-25 18:00:00,,31.9,46.0 +2019-05-25 19:00:00,,20.6,47.0 +2019-05-25 20:00:00,,30.4,47.0 +2019-05-25 21:00:00,,22.1,44.0 +2019-05-25 22:00:00,,43.6,41.0 +2019-05-25 23:00:00,,39.5,36.0 +2019-05-26 00:00:00,,63.9,36.0 +2019-05-26 01:00:00,,70.2,32.0 +2019-05-26 02:00:00,,67.0,32.0 +2019-05-26 03:00:00,53.0,49.8,26.0 +2019-05-26 04:00:00,,23.4,26.0 +2019-05-26 05:00:00,,22.9,20.0 +2019-05-26 06:00:00,,22.3,20.0 +2019-05-26 07:00:00,,16.8,17.0 +2019-05-26 08:00:00,,15.1,17.0 +2019-05-26 09:00:00,,13.4,15.0 +2019-05-26 10:00:00,,11.0,15.0 +2019-05-26 11:00:00,,10.3,16.0 +2019-05-26 12:00:00,,11.3,17.0 +2019-05-26 13:00:00,,13.3,21.0 +2019-05-26 14:00:00,,11.5,24.0 +2019-05-26 15:00:00,,12.5,25.0 +2019-05-26 16:00:00,,15.3,26.0 +2019-05-26 17:00:00,,11.7,27.0 +2019-05-26 18:00:00,,17.1,26.0 +2019-05-26 19:00:00,,17.3,28.0 +2019-05-26 20:00:00,,22.8,26.0 +2019-05-26 21:00:00,,17.8,25.0 +2019-05-26 22:00:00,,16.6,27.0 +2019-05-26 23:00:00,,16.1,26.0 +2019-05-27 00:00:00,,15.2,26.0 +2019-05-27 01:00:00,,10.3,26.0 +2019-05-27 02:00:00,,9.5,26.0 +2019-05-27 03:00:00,10.5,7.1,24.0 +2019-05-27 04:00:00,,5.9,24.0 +2019-05-27 05:00:00,,4.8,19.0 +2019-05-27 06:00:00,,6.5,19.0 +2019-05-27 07:00:00,,20.3,18.0 +2019-05-27 08:00:00,,29.1,18.0 +2019-05-27 09:00:00,,29.5,18.0 +2019-05-27 10:00:00,,34.2,18.0 +2019-05-27 11:00:00,,31.4,16.0 +2019-05-27 12:00:00,,23.3,17.0 +2019-05-27 13:00:00,,19.3,17.0 +2019-05-27 14:00:00,,17.3,20.0 +2019-05-27 15:00:00,,17.5,20.0 +2019-05-27 16:00:00,,17.3,22.0 +2019-05-27 17:00:00,,25.6,22.0 +2019-05-27 18:00:00,,23.6,22.0 +2019-05-27 19:00:00,,22.9,22.0 +2019-05-27 20:00:00,,25.6,22.0 +2019-05-27 21:00:00,,22.1,23.0 +2019-05-27 22:00:00,,22.3,20.0 +2019-05-27 23:00:00,,18.8,19.0 +2019-05-28 00:00:00,,19.9,19.0 +2019-05-28 01:00:00,,22.6,16.0 +2019-05-28 02:00:00,,15.4,16.0 +2019-05-28 03:00:00,11.0,8.2,16.0 +2019-05-28 04:00:00,,6.4,16.0 +2019-05-28 05:00:00,,6.1,15.0 +2019-05-28 06:00:00,,8.9,15.0 +2019-05-28 07:00:00,,19.9,19.0 +2019-05-28 08:00:00,,28.8,20.0 +2019-05-28 09:00:00,,33.8,20.0 +2019-05-28 10:00:00,,31.2,20.0 +2019-05-28 11:00:00,,24.3,21.0 +2019-05-28 12:00:00,,21.6,21.0 +2019-05-28 13:00:00,,20.5,28.0 +2019-05-28 14:00:00,,24.8,27.0 +2019-05-28 15:00:00,,18.5,29.0 +2019-05-28 16:00:00,,18.8,30.0 +2019-05-28 17:00:00,,25.0,27.0 +2019-05-28 18:00:00,,26.5,25.0 +2019-05-28 19:00:00,,20.8,29.0 +2019-05-28 20:00:00,,16.2,29.0 +2019-05-28 21:00:00,,18.5,29.0 +2019-05-28 22:00:00,,20.4,31.0 +2019-05-28 23:00:00,,20.4, +2019-05-29 00:00:00,,20.2,25.0 +2019-05-29 01:00:00,,25.3,26.0 +2019-05-29 02:00:00,,23.4,26.0 +2019-05-29 03:00:00,21.0,21.6,23.0 +2019-05-29 04:00:00,,19.0,23.0 +2019-05-29 05:00:00,,20.3,21.0 +2019-05-29 06:00:00,,24.1,21.0 +2019-05-29 07:00:00,,36.7,24.0 +2019-05-29 08:00:00,,46.5,22.0 +2019-05-29 09:00:00,,50.5,21.0 +2019-05-29 10:00:00,,45.7,18.0 +2019-05-29 11:00:00,,34.5,18.0 +2019-05-29 12:00:00,,30.7,18.0 +2019-05-29 13:00:00,,22.0,20.0 +2019-05-29 14:00:00,,13.2,13.0 +2019-05-29 15:00:00,,17.8,15.0 +2019-05-29 16:00:00,,0.0,5.0 +2019-05-29 17:00:00,,0.0,3.0 +2019-05-29 18:00:00,,20.1,5.0 +2019-05-29 19:00:00,,22.9,5.0 +2019-05-29 20:00:00,,25.3,5.0 +2019-05-29 21:00:00,,24.1,6.0 +2019-05-29 22:00:00,,20.8,6.0 +2019-05-29 23:00:00,,16.9,5.0 +2019-05-30 00:00:00,,19.0,5.0 +2019-05-30 01:00:00,,19.9,1.0 +2019-05-30 02:00:00,,19.4,1.0 +2019-05-30 03:00:00,7.5,12.4,0.0 +2019-05-30 04:00:00,,9.4,0.0 +2019-05-30 05:00:00,,10.6,0.0 +2019-05-30 06:00:00,,10.4,0.0 +2019-05-30 07:00:00,,12.2,0.0 +2019-05-30 08:00:00,,13.3,2.0 +2019-05-30 09:00:00,,18.3,3.0 +2019-05-30 10:00:00,,16.7,5.0 +2019-05-30 11:00:00,,15.1,9.0 +2019-05-30 12:00:00,,13.8,13.0 +2019-05-30 13:00:00,,14.9,17.0 +2019-05-30 14:00:00,,14.2,20.0 +2019-05-30 15:00:00,,16.1,22.0 +2019-05-30 16:00:00,,14.9,22.0 +2019-05-30 17:00:00,,13.0,27.0 +2019-05-30 18:00:00,,12.8,30.0 +2019-05-30 19:00:00,,20.4,28.0 +2019-05-30 20:00:00,,22.1,28.0 +2019-05-30 21:00:00,,22.9,27.0 +2019-05-30 22:00:00,,21.9,27.0 +2019-05-30 23:00:00,,26.9,23.0 +2019-05-31 00:00:00,,27.0,23.0 +2019-05-31 01:00:00,,29.6,18.0 +2019-05-31 02:00:00,,27.2,18.0 +2019-05-31 03:00:00,9.0,36.9,12.0 +2019-05-31 04:00:00,,44.1,12.0 +2019-05-31 05:00:00,,40.1,9.0 +2019-05-31 06:00:00,,31.1,9.0 +2019-05-31 07:00:00,,37.2,8.0 +2019-05-31 08:00:00,,38.6,9.0 +2019-05-31 09:00:00,,47.4,8.0 +2019-05-31 10:00:00,,36.6,37.0 +2019-05-31 11:00:00,,19.6,15.0 +2019-05-31 12:00:00,,17.2,16.0 +2019-05-31 13:00:00,,15.1,18.0 +2019-05-31 14:00:00,,13.3,21.0 +2019-05-31 15:00:00,,13.8,21.0 +2019-05-31 16:00:00,,15.4,24.0 +2019-05-31 17:00:00,,15.4,26.0 +2019-05-31 18:00:00,,16.3,26.0 +2019-05-31 19:00:00,,20.5,29.0 +2019-05-31 20:00:00,,25.2,33.0 +2019-05-31 21:00:00,,23.3,33.0 +2019-05-31 22:00:00,,37.0,31.0 +2019-05-31 23:00:00,,60.2,26.0 +2019-06-01 00:00:00,,68.0,26.0 +2019-06-01 01:00:00,,81.7,22.0 +2019-06-01 02:00:00,,84.7,22.0 +2019-06-01 03:00:00,52.5,74.8,16.0 +2019-06-01 04:00:00,,68.1,16.0 +2019-06-01 05:00:00,,,11.0 +2019-06-01 06:00:00,,,11.0 +2019-06-01 07:00:00,,,4.0 +2019-06-01 08:00:00,,44.6,2.0 +2019-06-01 09:00:00,,46.4,8.0 +2019-06-01 10:00:00,,33.3,9.0 +2019-06-01 11:00:00,,23.9,12.0 +2019-06-01 12:00:00,,13.8,19.0 +2019-06-01 13:00:00,,12.2,28.0 +2019-06-01 14:00:00,,10.4,33.0 +2019-06-01 15:00:00,,10.2,36.0 +2019-06-01 16:00:00,,10.0,33.0 +2019-06-01 17:00:00,,10.2,31.0 +2019-06-01 18:00:00,,11.8,32.0 +2019-06-01 19:00:00,,11.8,36.0 +2019-06-01 20:00:00,,14.5,38.0 +2019-06-01 21:00:00,,24.6,41.0 +2019-06-01 22:00:00,,43.6,44.0 +2019-06-01 23:00:00,,49.4,52.0 +2019-06-02 00:00:00,,48.1,52.0 +2019-06-02 01:00:00,,32.7,44.0 +2019-06-02 02:00:00,,38.1,44.0 +2019-06-02 03:00:00,,38.2,43.0 +2019-06-02 04:00:00,,39.2,43.0 +2019-06-02 05:00:00,,23.2,37.0 +2019-06-02 06:00:00,,24.5,37.0 +2019-06-02 07:00:00,,37.2,32.0 +2019-06-02 08:00:00,,24.1,32.0 +2019-06-02 09:00:00,,18.1,30.0 +2019-06-02 10:00:00,,19.5,32.0 +2019-06-02 11:00:00,,21.0,35.0 +2019-06-02 12:00:00,,18.1,36.0 +2019-06-02 13:00:00,,13.1,35.0 +2019-06-02 14:00:00,,11.5,34.0 +2019-06-02 15:00:00,,13.0,36.0 +2019-06-02 16:00:00,,15.0,33.0 +2019-06-02 17:00:00,,13.9,32.0 +2019-06-02 18:00:00,,14.4,32.0 +2019-06-02 19:00:00,,14.4,34.0 +2019-06-02 20:00:00,,15.6,34.0 +2019-06-02 21:00:00,,25.8,32.0 +2019-06-02 22:00:00,,40.9,28.0 +2019-06-02 23:00:00,,36.9,27.0 +2019-06-03 00:00:00,,27.6,27.0 +2019-06-03 01:00:00,,17.9,21.0 +2019-06-03 02:00:00,,15.7,21.0 +2019-06-03 03:00:00,,11.8,11.0 +2019-06-03 04:00:00,,11.7,11.0 +2019-06-03 05:00:00,,9.8,3.0 +2019-06-03 06:00:00,,11.4,3.0 +2019-06-03 07:00:00,,29.0,5.0 +2019-06-03 08:00:00,,44.1,6.0 +2019-06-03 09:00:00,,50.0,7.0 +2019-06-03 10:00:00,,43.9,5.0 +2019-06-03 11:00:00,,46.0,11.0 +2019-06-03 12:00:00,,31.7,16.0 +2019-06-03 13:00:00,,27.5,14.0 +2019-06-03 14:00:00,,22.1,15.0 +2019-06-03 15:00:00,,25.8,17.0 +2019-06-03 16:00:00,,23.2,21.0 +2019-06-03 17:00:00,,24.8,22.0 +2019-06-03 18:00:00,,25.3,24.0 +2019-06-03 19:00:00,,24.4,24.0 +2019-06-03 20:00:00,,23.1,23.0 +2019-06-03 21:00:00,,28.9,20.0 +2019-06-03 22:00:00,,33.0,20.0 +2019-06-03 23:00:00,,31.1,17.0 +2019-06-04 00:00:00,,30.5,17.0 +2019-06-04 01:00:00,,44.6,12.0 +2019-06-04 02:00:00,,52.4,12.0 +2019-06-04 03:00:00,,43.9,8.0 +2019-06-04 04:00:00,,35.0,8.0 +2019-06-04 05:00:00,,41.6,5.0 +2019-06-04 06:00:00,,28.8,5.0 +2019-06-04 07:00:00,,36.5,14.0 +2019-06-04 08:00:00,,47.7,18.0 +2019-06-04 09:00:00,,53.5,22.0 +2019-06-04 10:00:00,,50.8,35.0 +2019-06-04 11:00:00,,38.5,31.0 +2019-06-04 12:00:00,,23.3,32.0 +2019-06-04 13:00:00,,19.6,35.0 +2019-06-04 14:00:00,,17.7,37.0 +2019-06-04 15:00:00,,17.4,36.0 +2019-06-04 16:00:00,,18.1,38.0 +2019-06-04 17:00:00,,21.5,38.0 +2019-06-04 18:00:00,,26.3,40.0 +2019-06-04 19:00:00,,23.4,29.0 +2019-06-04 20:00:00,,25.2,20.0 +2019-06-04 21:00:00,,17.0,18.0 +2019-06-04 22:00:00,,16.9,17.0 +2019-06-04 23:00:00,,26.3,17.0 +2019-06-05 00:00:00,,33.5,17.0 +2019-06-05 01:00:00,,17.8,13.0 +2019-06-05 02:00:00,,15.7,13.0 +2019-06-05 03:00:00,15.0,10.8,4.0 +2019-06-05 04:00:00,,12.4,4.0 +2019-06-05 05:00:00,,16.2,6.0 +2019-06-05 06:00:00,,24.5,6.0 +2019-06-05 07:00:00,,39.2,2.0 +2019-06-05 08:00:00,,35.8,1.0 +2019-06-05 09:00:00,,36.9,0.0 +2019-06-05 10:00:00,,35.3,0.0 +2019-06-05 11:00:00,,36.8,5.0 +2019-06-05 12:00:00,,42.1,7.0 +2019-06-05 13:00:00,,59.0,9.0 +2019-06-05 14:00:00,,47.2,14.0 +2019-06-05 15:00:00,,33.6,20.0 +2019-06-05 16:00:00,,38.3,20.0 +2019-06-05 17:00:00,,53.5,19.0 +2019-06-05 18:00:00,,37.9,19.0 +2019-06-05 19:00:00,,48.8,19.0 +2019-06-05 20:00:00,,40.8,19.0 +2019-06-05 21:00:00,,37.8,19.0 +2019-06-05 22:00:00,,37.5,19.0 +2019-06-05 23:00:00,,33.7,17.0 +2019-06-06 00:00:00,,30.3,17.0 +2019-06-06 01:00:00,,31.8,8.0 +2019-06-06 02:00:00,,23.8, +2019-06-06 03:00:00,,18.0,4.0 +2019-06-06 04:00:00,,15.2,4.0 +2019-06-06 05:00:00,,19.2,0.0 +2019-06-06 06:00:00,,28.4,0.0 +2019-06-06 07:00:00,,40.3,1.0 +2019-06-06 08:00:00,,40.5,3.0 +2019-06-06 09:00:00,,43.1,0.0 +2019-06-06 10:00:00,,36.0,1.0 +2019-06-06 11:00:00,,26.0,7.0 +2019-06-06 12:00:00,,21.2,7.0 +2019-06-06 13:00:00,,16.4,12.0 +2019-06-06 14:00:00,,16.5,10.0 +2019-06-06 15:00:00,,16.0,11.0 +2019-06-06 16:00:00,,15.1,16.0 +2019-06-06 17:00:00,,,22.0 +2019-06-06 18:00:00,,,24.0 +2019-06-06 19:00:00,,,24.0 +2019-06-06 20:00:00,,,24.0 +2019-06-06 21:00:00,,,22.0 +2019-06-06 22:00:00,,,24.0 +2019-06-06 23:00:00,,,21.0 +2019-06-07 00:00:00,,,21.0 +2019-06-07 01:00:00,,,23.0 +2019-06-07 02:00:00,,,23.0 +2019-06-07 03:00:00,,,27.0 +2019-06-07 04:00:00,,,27.0 +2019-06-07 05:00:00,,,23.0 +2019-06-07 06:00:00,,,23.0 +2019-06-07 07:00:00,,,25.0 +2019-06-07 08:00:00,,28.9,23.0 +2019-06-07 09:00:00,,23.0,24.0 +2019-06-07 10:00:00,,29.3,25.0 +2019-06-07 11:00:00,,34.5,23.0 +2019-06-07 12:00:00,,32.1,25.0 +2019-06-07 13:00:00,,26.7,27.0 +2019-06-07 14:00:00,,17.8,20.0 +2019-06-07 15:00:00,,15.0,15.0 +2019-06-07 16:00:00,,13.1,15.0 +2019-06-07 17:00:00,,15.6,21.0 +2019-06-07 18:00:00,,19.5,24.0 +2019-06-07 19:00:00,,19.5,27.0 +2019-06-07 20:00:00,,19.1,35.0 +2019-06-07 21:00:00,,19.9,36.0 +2019-06-07 22:00:00,,19.4,35.0 +2019-06-07 23:00:00,,16.3, +2019-06-08 00:00:00,,14.7,33.0 +2019-06-08 01:00:00,,14.4,28.0 +2019-06-08 02:00:00,,11.3, +2019-06-08 03:00:00,,9.6,7.0 +2019-06-08 04:00:00,,8.4,7.0 +2019-06-08 05:00:00,,9.8,3.0 +2019-06-08 06:00:00,,10.7,3.0 +2019-06-08 07:00:00,,14.1,2.0 +2019-06-08 08:00:00,,13.8,3.0 +2019-06-08 09:00:00,,14.0,4.0 +2019-06-08 10:00:00,,13.0,2.0 +2019-06-08 11:00:00,,11.7,3.0 +2019-06-08 12:00:00,,10.3,4.0 +2019-06-08 13:00:00,,10.4,8.0 +2019-06-08 14:00:00,,9.2,10.0 +2019-06-08 15:00:00,,11.1,13.0 +2019-06-08 16:00:00,,10.3,17.0 +2019-06-08 17:00:00,,11.7,19.0 +2019-06-08 18:00:00,,14.1,20.0 +2019-06-08 19:00:00,,14.8,20.0 +2019-06-08 20:00:00,,22.0,19.0 +2019-06-08 21:00:00,,,17.0 +2019-06-08 22:00:00,,,16.0 +2019-06-08 23:00:00,,36.7, +2019-06-09 00:00:00,,34.8,20.0 +2019-06-09 01:00:00,,47.0,10.0 +2019-06-09 02:00:00,,55.9,10.0 +2019-06-09 03:00:00,10.0,41.0,7.0 +2019-06-09 04:00:00,,51.2,7.0 +2019-06-09 05:00:00,,51.5,1.0 +2019-06-09 06:00:00,,43.0,1.0 +2019-06-09 07:00:00,,42.2,5.0 +2019-06-09 08:00:00,,36.7,1.0 +2019-06-09 09:00:00,,32.7,0.0 +2019-06-09 10:00:00,,30.2,0.0 +2019-06-09 11:00:00,,25.0,2.0 +2019-06-09 12:00:00,,16.6,5.0 +2019-06-09 13:00:00,,14.6,8.0 +2019-06-09 14:00:00,,14.6,13.0 +2019-06-09 15:00:00,,10.2,17.0 +2019-06-09 16:00:00,,7.9,19.0 +2019-06-09 17:00:00,,7.2,24.0 +2019-06-09 18:00:00,,10.3,26.0 +2019-06-09 19:00:00,,13.0,20.0 +2019-06-09 20:00:00,,19.5,21.0 +2019-06-09 21:00:00,,30.6,21.0 +2019-06-09 22:00:00,,33.2,22.0 +2019-06-09 23:00:00,,30.9, +2019-06-10 00:00:00,,37.1,24.0 +2019-06-10 01:00:00,,39.9,21.0 +2019-06-10 02:00:00,,28.1,21.0 +2019-06-10 03:00:00,18.5,19.3,25.0 +2019-06-10 04:00:00,,17.8,25.0 +2019-06-10 05:00:00,,18.0,24.0 +2019-06-10 06:00:00,,13.7,24.0 +2019-06-10 07:00:00,,21.3,24.0 +2019-06-10 08:00:00,,26.7,22.0 +2019-06-10 09:00:00,,23.0,27.0 +2019-06-10 10:00:00,,16.9,34.0 +2019-06-10 11:00:00,,18.5,45.0 +2019-06-10 12:00:00,,14.1,41.0 +2019-06-10 13:00:00,,12.2,45.0 +2019-06-10 14:00:00,,11.7,51.0 +2019-06-10 15:00:00,,9.6,40.0 +2019-06-10 16:00:00,,9.5,40.0 +2019-06-10 17:00:00,,11.7,31.0 +2019-06-10 18:00:00,,15.1,28.0 +2019-06-10 19:00:00,,19.1,26.0 +2019-06-10 20:00:00,,18.4,25.0 +2019-06-10 21:00:00,,22.3,26.0 +2019-06-10 22:00:00,,22.6,24.0 +2019-06-10 23:00:00,,23.5,23.0 +2019-06-11 00:00:00,,24.8,23.0 +2019-06-11 01:00:00,,24.1,15.0 +2019-06-11 02:00:00,,19.6,15.0 +2019-06-11 03:00:00,7.5,19.1,16.0 +2019-06-11 04:00:00,,29.6,16.0 +2019-06-11 05:00:00,,32.3,13.0 +2019-06-11 06:00:00,,52.7,13.0 +2019-06-11 07:00:00,,58.7,17.0 +2019-06-11 08:00:00,,55.4,18.0 +2019-06-11 09:00:00,,58.0,21.0 +2019-06-11 10:00:00,,43.6,23.0 +2019-06-11 11:00:00,,31.7,22.0 +2019-06-11 12:00:00,,22.1,22.0 +2019-06-11 13:00:00,,17.3,23.0 +2019-06-11 14:00:00,,12.6,26.0 +2019-06-11 15:00:00,,13.1,35.0 +2019-06-11 16:00:00,,16.6,31.0 +2019-06-11 17:00:00,,19.8,31.0 +2019-06-11 18:00:00,,22.6,30.0 +2019-06-11 19:00:00,,35.5,31.0 +2019-06-11 20:00:00,,44.6,30.0 +2019-06-11 21:00:00,,36.1,22.0 +2019-06-11 22:00:00,,42.7,22.0 +2019-06-11 23:00:00,,54.1,20.0 +2019-06-12 00:00:00,,59.4,20.0 +2019-06-12 01:00:00,,41.5,15.0 +2019-06-12 02:00:00,,37.2, +2019-06-12 03:00:00,21.0,41.9, +2019-06-12 04:00:00,,34.7,11.0 +2019-06-12 05:00:00,,36.3,9.0 +2019-06-12 06:00:00,,44.9,9.0 +2019-06-12 07:00:00,,42.7,12.0 +2019-06-12 08:00:00,,38.4,17.0 +2019-06-12 09:00:00,,44.4,20.0 +2019-06-12 10:00:00,,35.5,22.0 +2019-06-12 11:00:00,,26.7,25.0 +2019-06-12 12:00:00,,0.0,35.0 +2019-06-12 13:00:00,,0.0,33.0 +2019-06-12 14:00:00,,15.4,33.0 +2019-06-12 15:00:00,,17.9,35.0 +2019-06-12 16:00:00,,20.3,42.0 +2019-06-12 17:00:00,,16.8,45.0 +2019-06-12 18:00:00,,23.6,43.0 +2019-06-12 19:00:00,,24.2,45.0 +2019-06-12 20:00:00,,25.3,33.0 +2019-06-12 21:00:00,,23.4,41.0 +2019-06-12 22:00:00,,29.2,43.0 +2019-06-12 23:00:00,,29.3, +2019-06-13 00:00:00,,25.6,35.0 +2019-06-13 01:00:00,,26.9,29.0 +2019-06-13 02:00:00,,20.0, +2019-06-13 03:00:00,28.5,18.7,26.0 +2019-06-13 04:00:00,,18.0,26.0 +2019-06-13 05:00:00,,18.8,16.0 +2019-06-13 06:00:00,,24.6,16.0 +2019-06-13 07:00:00,,37.0,19.0 +2019-06-13 08:00:00,,39.8,21.0 +2019-06-13 09:00:00,,40.9,19.0 +2019-06-13 10:00:00,,35.3,16.0 +2019-06-13 11:00:00,,30.2,18.0 +2019-06-13 12:00:00,,24.5,19.0 +2019-06-13 13:00:00,,22.7,19.0 +2019-06-13 14:00:00,,17.9,16.0 +2019-06-13 15:00:00,,18.2,15.0 +2019-06-13 16:00:00,,19.4,13.0 +2019-06-13 17:00:00,,28.8,11.0 +2019-06-13 18:00:00,,36.1,15.0 +2019-06-13 19:00:00,,38.2,14.0 +2019-06-13 20:00:00,,24.0,13.0 +2019-06-13 21:00:00,,27.5,14.0 +2019-06-13 22:00:00,,31.5,15.0 +2019-06-13 23:00:00,,58.8,15.0 +2019-06-14 00:00:00,,77.9,15.0 +2019-06-14 01:00:00,,78.3,13.0 +2019-06-14 02:00:00,,74.2, +2019-06-14 03:00:00,,68.1,8.0 +2019-06-14 04:00:00,,66.6,8.0 +2019-06-14 05:00:00,,48.5,6.0 +2019-06-14 06:00:00,,37.9,6.0 +2019-06-14 07:00:00,,49.3,13.0 +2019-06-14 08:00:00,,64.3,11.0 +2019-06-14 09:00:00,,51.5,11.0 +2019-06-14 10:00:00,,34.3,14.0 +2019-06-14 11:00:00,36.5,27.9,13.0 +2019-06-14 12:00:00,,25.1,13.0 +2019-06-14 13:00:00,,21.8,15.0 +2019-06-14 14:00:00,,17.1,16.0 +2019-06-14 15:00:00,,15.4,22.0 +2019-06-14 16:00:00,,14.2,25.0 +2019-06-14 17:00:00,,15.2,25.0 +2019-06-14 18:00:00,,18.9,26.0 +2019-06-14 19:00:00,,16.6,27.0 +2019-06-14 20:00:00,,19.0,26.0 +2019-06-14 21:00:00,,25.0,26.0 +2019-06-14 22:00:00,,41.9,25.0 +2019-06-14 23:00:00,,55.0,26.0 +2019-06-15 00:00:00,,35.3,26.0 +2019-06-15 01:00:00,,32.1,26.0 +2019-06-15 02:00:00,,29.6, +2019-06-15 03:00:00,17.5,29.0, +2019-06-15 04:00:00,,33.9, +2019-06-15 05:00:00,,,10.0 +2019-06-15 06:00:00,,,10.0 +2019-06-15 07:00:00,,,13.0 +2019-06-15 08:00:00,,35.8,13.0 +2019-06-15 09:00:00,,24.1,8.0 +2019-06-15 10:00:00,,17.6,8.0 +2019-06-15 11:00:00,,14.0,12.0 +2019-06-15 12:00:00,,12.1,14.0 +2019-06-15 13:00:00,,11.1,13.0 +2019-06-15 14:00:00,,9.4,18.0 +2019-06-15 15:00:00,,9.0,17.0 +2019-06-15 16:00:00,,9.6,18.0 +2019-06-15 17:00:00,,10.5,18.0 +2019-06-15 18:00:00,,10.7,20.0 +2019-06-15 19:00:00,,11.1,22.0 +2019-06-15 20:00:00,,14.0,22.0 +2019-06-15 21:00:00,,14.2,21.0 +2019-06-15 22:00:00,,15.2,20.0 +2019-06-15 23:00:00,,17.2,19.0 +2019-06-16 00:00:00,,20.1,19.0 +2019-06-16 01:00:00,,22.6,15.0 +2019-06-16 02:00:00,,16.5,15.0 +2019-06-16 03:00:00,42.5,12.8,12.0 +2019-06-16 04:00:00,,11.4,12.0 +2019-06-16 05:00:00,,11.2,10.0 +2019-06-16 06:00:00,,11.7,10.0 +2019-06-16 07:00:00,,14.0,8.0 +2019-06-16 08:00:00,,11.6,5.0 +2019-06-16 09:00:00,,10.2,4.0 +2019-06-16 10:00:00,,9.9,5.0 +2019-06-16 11:00:00,,9.4,6.0 +2019-06-16 12:00:00,,8.7,6.0 +2019-06-16 13:00:00,,12.9,10.0 +2019-06-16 14:00:00,,11.2,16.0 +2019-06-16 15:00:00,,8.7,23.0 +2019-06-16 16:00:00,,8.1,26.0 +2019-06-16 17:00:00,,8.4,29.0 +2019-06-16 18:00:00,,9.2,29.0 +2019-06-16 19:00:00,,11.8,28.0 +2019-06-16 20:00:00,,12.3,28.0 +2019-06-16 21:00:00,,14.4,27.0 +2019-06-16 22:00:00,,23.3,25.0 +2019-06-16 23:00:00,,42.7, +2019-06-17 00:00:00,,56.6,23.0 +2019-06-17 01:00:00,,67.3,17.0 +2019-06-17 02:00:00,,69.3,17.0 +2019-06-17 03:00:00,42.0,58.8,14.0 +2019-06-17 04:00:00,35.5,53.1,14.0 +2019-06-17 05:00:00,36.0,49.1,11.0 +2019-06-17 06:00:00,39.5,45.7,11.0 +2019-06-17 07:00:00,42.5,44.8,12.0 +2019-06-17 08:00:00,43.5,52.3,13.0 +2019-06-17 09:00:00,45.0,54.4,13.0 +2019-06-17 10:00:00,41.0,51.6,11.0 +2019-06-17 11:00:00,,30.4,11.0 +2019-06-17 12:00:00,,16.0,11.0 +2019-06-17 13:00:00,,15.2, +2019-06-17 14:00:00,,10.1, +2019-06-17 15:00:00,,9.6, +2019-06-17 16:00:00,,11.5, +2019-06-17 17:00:00,,13.1, +2019-06-17 18:00:00,,11.9, +2019-06-17 19:00:00,,14.9, +2019-06-17 20:00:00,,15.4, +2019-06-17 21:00:00,,15.2, +2019-06-17 22:00:00,,20.5, +2019-06-17 23:00:00,,38.3, +2019-06-18 00:00:00,,51.0, +2019-06-18 01:00:00,,73.3, +2019-06-18 02:00:00,,66.2, +2019-06-18 03:00:00,,60.1, +2019-06-18 04:00:00,,39.8, +2019-06-18 05:00:00,,45.5, +2019-06-18 06:00:00,,26.5, +2019-06-18 07:00:00,,33.8, +2019-06-18 08:00:00,,51.4, +2019-06-18 09:00:00,,52.6, +2019-06-18 10:00:00,,49.6, +2019-06-18 21:00:00,,15.3, +2019-06-18 22:00:00,,17.0, +2019-06-18 23:00:00,,23.1, +2019-06-19 00:00:00,,39.3, +2019-06-19 11:00:00,,27.3, +2019-06-19 12:00:00,,26.6, +2019-06-20 15:00:00,,19.4, +2019-06-20 16:00:00,,20.1, +2019-06-20 17:00:00,,19.3, +2019-06-20 18:00:00,,19.0, +2019-06-20 19:00:00,,23.2, +2019-06-20 20:00:00,,23.9, +2019-06-20 21:00:00,,25.3, +2019-06-20 22:00:00,,21.4, +2019-06-20 23:00:00,,24.9, +2019-06-21 00:00:00,,26.5, +2019-06-21 01:00:00,,21.8, +2019-06-21 02:00:00,,20.0, diff --git a/doc/data/air_quality_no2_long.csv b/doc/data/air_quality_no2_long.csv new file mode 100644 index 00000000..5d959370 --- /dev/null +++ b/doc/data/air_quality_no2_long.csv @@ -0,0 +1,2069 @@ +city,country,date.utc,location,parameter,value,unit +Paris,FR,2019-06-21 00:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-06-20 23:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-06-20 22:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-06-20 21:00:00+00:00,FR04014,no2,24.9,µg/m³ +Paris,FR,2019-06-20 20:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-06-20 19:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-20 18:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-06-20 17:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-20 16:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-06-20 15:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-06-20 14:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-06-20 13:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-19 10:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-06-19 09:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-06-18 22:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-06-18 21:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-06-18 20:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-06-18 19:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-06-18 08:00:00+00:00,FR04014,no2,49.6,µg/m³ +Paris,FR,2019-06-18 07:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-06-18 06:00:00+00:00,FR04014,no2,51.4,µg/m³ +Paris,FR,2019-06-18 05:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-06-18 04:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-06-18 03:00:00+00:00,FR04014,no2,45.5,µg/m³ +Paris,FR,2019-06-18 02:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-06-18 01:00:00+00:00,FR04014,no2,60.1,µg/m³ +Paris,FR,2019-06-18 00:00:00+00:00,FR04014,no2,66.2,µg/m³ +Paris,FR,2019-06-17 23:00:00+00:00,FR04014,no2,73.3,µg/m³ +Paris,FR,2019-06-17 22:00:00+00:00,FR04014,no2,51.0,µg/m³ +Paris,FR,2019-06-17 21:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-06-17 20:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-06-17 19:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-17 18:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-17 17:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-06-17 16:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-06-17 15:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-17 14:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-06-17 13:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-17 12:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-06-17 11:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-17 10:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-06-17 09:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-06-17 08:00:00+00:00,FR04014,no2,51.6,µg/m³ +Paris,FR,2019-06-17 07:00:00+00:00,FR04014,no2,54.4,µg/m³ +Paris,FR,2019-06-17 06:00:00+00:00,FR04014,no2,52.3,µg/m³ +Paris,FR,2019-06-17 05:00:00+00:00,FR04014,no2,44.8,µg/m³ +Paris,FR,2019-06-17 04:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-06-17 03:00:00+00:00,FR04014,no2,49.1,µg/m³ +Paris,FR,2019-06-17 02:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-06-17 01:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-06-17 00:00:00+00:00,FR04014,no2,69.3,µg/m³ +Paris,FR,2019-06-16 23:00:00+00:00,FR04014,no2,67.3,µg/m³ +Paris,FR,2019-06-16 22:00:00+00:00,FR04014,no2,56.6,µg/m³ +Paris,FR,2019-06-16 21:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-16 20:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-06-16 19:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-16 18:00:00+00:00,FR04014,no2,12.3,µg/m³ +Paris,FR,2019-06-16 17:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-16 16:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-06-16 15:00:00+00:00,FR04014,no2,8.4,µg/m³ +Paris,FR,2019-06-16 14:00:00+00:00,FR04014,no2,8.1,µg/m³ +Paris,FR,2019-06-16 13:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-06-16 12:00:00+00:00,FR04014,no2,11.2,µg/m³ +Paris,FR,2019-06-16 11:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-06-16 10:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-06-16 09:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-06-16 08:00:00+00:00,FR04014,no2,9.9,µg/m³ +Paris,FR,2019-06-16 07:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-16 06:00:00+00:00,FR04014,no2,11.6,µg/m³ +Paris,FR,2019-06-16 05:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-16 04:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-16 03:00:00+00:00,FR04014,no2,11.2,µg/m³ +Paris,FR,2019-06-16 02:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-06-16 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-06-16 00:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-06-15 23:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-15 22:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-06-15 21:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-06-15 20:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-15 19:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-06-15 18:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-15 17:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-15 16:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-06-15 15:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-06-15 14:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-15 13:00:00+00:00,FR04014,no2,9.0,µg/m³ +Paris,FR,2019-06-15 12:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-06-15 11:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-15 10:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-06-15 09:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-15 08:00:00+00:00,FR04014,no2,17.6,µg/m³ +Paris,FR,2019-06-15 07:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-15 06:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-06-15 02:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-06-15 01:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-06-15 00:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-06-14 23:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-06-14 22:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-14 21:00:00+00:00,FR04014,no2,55.0,µg/m³ +Paris,FR,2019-06-14 20:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-06-14 19:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-06-14 18:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-06-14 17:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-14 16:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-06-14 15:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-14 14:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-06-14 13:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-14 12:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-06-14 11:00:00+00:00,FR04014,no2,21.8,µg/m³ +Paris,FR,2019-06-14 10:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-06-14 09:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-06-14 08:00:00+00:00,FR04014,no2,34.3,µg/m³ +Paris,FR,2019-06-14 07:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-06-14 06:00:00+00:00,FR04014,no2,64.3,µg/m³ +Paris,FR,2019-06-14 05:00:00+00:00,FR04014,no2,49.3,µg/m³ +Paris,FR,2019-06-14 04:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-06-14 03:00:00+00:00,FR04014,no2,48.5,µg/m³ +Paris,FR,2019-06-14 02:00:00+00:00,FR04014,no2,66.6,µg/m³ +Paris,FR,2019-06-14 01:00:00+00:00,FR04014,no2,68.1,µg/m³ +Paris,FR,2019-06-14 00:00:00+00:00,FR04014,no2,74.2,µg/m³ +Paris,FR,2019-06-13 23:00:00+00:00,FR04014,no2,78.3,µg/m³ +Paris,FR,2019-06-13 22:00:00+00:00,FR04014,no2,77.9,µg/m³ +Paris,FR,2019-06-13 21:00:00+00:00,FR04014,no2,58.8,µg/m³ +Paris,FR,2019-06-13 20:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-06-13 19:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-06-13 18:00:00+00:00,FR04014,no2,24.0,µg/m³ +Paris,FR,2019-06-13 17:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-06-13 16:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-06-13 15:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-06-13 14:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-13 13:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-06-13 12:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-13 11:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-06-13 10:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-13 09:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-06-13 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-13 07:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-06-13 06:00:00+00:00,FR04014,no2,39.8,µg/m³ +Paris,FR,2019-06-13 05:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-06-13 04:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-06-13 03:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-06-13 02:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-13 01:00:00+00:00,FR04014,no2,18.7,µg/m³ +Paris,FR,2019-06-13 00:00:00+00:00,FR04014,no2,20.0,µg/m³ +Paris,FR,2019-06-12 23:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-06-12 22:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-06-12 21:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-06-12 20:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-06-12 19:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-06-12 18:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-12 17:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-06-12 16:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-06-12 15:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-06-12 14:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-06-12 13:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-12 12:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-06-12 11:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-06-12 10:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-06-12 09:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-12 08:00:00+00:00,FR04014,no2,35.5,µg/m³ +Paris,FR,2019-06-12 07:00:00+00:00,FR04014,no2,44.4,µg/m³ +Paris,FR,2019-06-12 06:00:00+00:00,FR04014,no2,38.4,µg/m³ +Paris,FR,2019-06-12 05:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-12 04:00:00+00:00,FR04014,no2,44.9,µg/m³ +Paris,FR,2019-06-12 03:00:00+00:00,FR04014,no2,36.3,µg/m³ +Paris,FR,2019-06-12 02:00:00+00:00,FR04014,no2,34.7,µg/m³ +Paris,FR,2019-06-12 01:00:00+00:00,FR04014,no2,41.9,µg/m³ +Paris,FR,2019-06-12 00:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-06-11 23:00:00+00:00,FR04014,no2,41.5,µg/m³ +Paris,FR,2019-06-11 22:00:00+00:00,FR04014,no2,59.4,µg/m³ +Paris,FR,2019-06-11 21:00:00+00:00,FR04014,no2,54.1,µg/m³ +Paris,FR,2019-06-11 20:00:00+00:00,FR04014,no2,42.7,µg/m³ +Paris,FR,2019-06-11 19:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-06-11 18:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-11 17:00:00+00:00,FR04014,no2,35.5,µg/m³ +Paris,FR,2019-06-11 16:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-11 15:00:00+00:00,FR04014,no2,19.8,µg/m³ +Paris,FR,2019-06-11 14:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-11 13:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-11 12:00:00+00:00,FR04014,no2,12.6,µg/m³ +Paris,FR,2019-06-11 11:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-06-11 10:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-06-11 09:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-06-11 08:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-06-11 07:00:00+00:00,FR04014,no2,58.0,µg/m³ +Paris,FR,2019-06-11 06:00:00+00:00,FR04014,no2,55.4,µg/m³ +Paris,FR,2019-06-11 05:00:00+00:00,FR04014,no2,58.7,µg/m³ +Paris,FR,2019-06-11 04:00:00+00:00,FR04014,no2,52.7,µg/m³ +Paris,FR,2019-06-11 03:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-06-11 02:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-06-11 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-11 00:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-06-10 23:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-10 22:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-06-10 21:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-06-10 20:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-06-10 19:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-06-10 18:00:00+00:00,FR04014,no2,18.4,µg/m³ +Paris,FR,2019-06-10 17:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-10 16:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-06-10 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-10 14:00:00+00:00,FR04014,no2,9.5,µg/m³ +Paris,FR,2019-06-10 13:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-10 12:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-10 11:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-06-10 10:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-10 09:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-06-10 08:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-06-10 07:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-06-10 06:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-10 05:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-06-10 04:00:00+00:00,FR04014,no2,13.7,µg/m³ +Paris,FR,2019-06-10 03:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-10 02:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-10 01:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-06-10 00:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-06-09 23:00:00+00:00,FR04014,no2,39.9,µg/m³ +Paris,FR,2019-06-09 22:00:00+00:00,FR04014,no2,37.1,µg/m³ +Paris,FR,2019-06-09 21:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-06-09 20:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-06-09 19:00:00+00:00,FR04014,no2,30.6,µg/m³ +Paris,FR,2019-06-09 18:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-09 17:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-09 16:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-09 15:00:00+00:00,FR04014,no2,7.2,µg/m³ +Paris,FR,2019-06-09 14:00:00+00:00,FR04014,no2,7.9,µg/m³ +Paris,FR,2019-06-09 13:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-09 12:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-06-09 11:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-06-09 10:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-06-09 09:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-06-09 08:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-06-09 07:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-06-09 06:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-06-09 05:00:00+00:00,FR04014,no2,42.2,µg/m³ +Paris,FR,2019-06-09 04:00:00+00:00,FR04014,no2,43.0,µg/m³ +Paris,FR,2019-06-09 03:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-06-09 02:00:00+00:00,FR04014,no2,51.2,µg/m³ +Paris,FR,2019-06-09 01:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-06-09 00:00:00+00:00,FR04014,no2,55.9,µg/m³ +Paris,FR,2019-06-08 23:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-06-08 22:00:00+00:00,FR04014,no2,34.8,µg/m³ +Paris,FR,2019-06-08 21:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-06-08 18:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-06-08 17:00:00+00:00,FR04014,no2,14.8,µg/m³ +Paris,FR,2019-06-08 16:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-08 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-08 14:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-08 13:00:00+00:00,FR04014,no2,11.1,µg/m³ +Paris,FR,2019-06-08 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-06-08 11:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-06-08 10:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-06-08 09:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-08 08:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-08 07:00:00+00:00,FR04014,no2,14.0,µg/m³ +Paris,FR,2019-06-08 06:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-06-08 05:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-06-08 04:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-06-08 03:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-06-08 02:00:00+00:00,FR04014,no2,8.4,µg/m³ +Paris,FR,2019-06-08 01:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-06-08 00:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-06-07 23:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-07 22:00:00+00:00,FR04014,no2,14.7,µg/m³ +Paris,FR,2019-06-07 21:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-06-07 20:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-06-07 19:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-06-07 18:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-06-07 17:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-07 16:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-07 15:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-06-07 14:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-07 13:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-06-07 12:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-07 11:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-06-07 10:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-06-07 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-06-07 08:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-06-07 07:00:00+00:00,FR04014,no2,23.0,µg/m³ +Paris,FR,2019-06-07 06:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-06-06 14:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-06-06 13:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-06-06 12:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-06-06 11:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-06-06 10:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-06-06 09:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-06-06 08:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-06-06 07:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-06-06 06:00:00+00:00,FR04014,no2,40.5,µg/m³ +Paris,FR,2019-06-06 05:00:00+00:00,FR04014,no2,40.3,µg/m³ +Paris,FR,2019-06-06 04:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-06-06 03:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-06-06 02:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-06-06 01:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-06-06 00:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-06-05 23:00:00+00:00,FR04014,no2,31.8,µg/m³ +Paris,FR,2019-06-05 22:00:00+00:00,FR04014,no2,30.3,µg/m³ +Paris,FR,2019-06-05 21:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-06-05 20:00:00+00:00,FR04014,no2,37.5,µg/m³ +Paris,FR,2019-06-05 19:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-06-05 18:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-06-05 17:00:00+00:00,FR04014,no2,48.8,µg/m³ +Paris,FR,2019-06-05 16:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-06-05 15:00:00+00:00,FR04014,no2,53.5,µg/m³ +Paris,FR,2019-06-05 14:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-06-05 13:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-06-05 12:00:00+00:00,FR04014,no2,47.2,µg/m³ +Paris,FR,2019-06-05 11:00:00+00:00,FR04014,no2,59.0,µg/m³ +Paris,FR,2019-06-05 10:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-06-05 09:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-06-05 08:00:00+00:00,FR04014,no2,35.3,µg/m³ +Paris,FR,2019-06-05 07:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-06-05 06:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-06-05 05:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-06-05 04:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-05 03:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-06-05 02:00:00+00:00,FR04014,no2,12.4,µg/m³ +Paris,FR,2019-06-05 01:00:00+00:00,FR04014,no2,10.8,µg/m³ +Paris,FR,2019-06-05 00:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-06-04 23:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-06-04 22:00:00+00:00,FR04014,no2,33.5,µg/m³ +Paris,FR,2019-06-04 21:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-06-04 20:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-06-04 19:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-06-04 18:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-06-04 17:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-06-04 16:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-06-04 15:00:00+00:00,FR04014,no2,21.5,µg/m³ +Paris,FR,2019-06-04 14:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-04 13:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-06-04 12:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-06-04 11:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-06-04 10:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-06-04 09:00:00+00:00,FR04014,no2,38.5,µg/m³ +Paris,FR,2019-06-04 08:00:00+00:00,FR04014,no2,50.8,µg/m³ +Paris,FR,2019-06-04 07:00:00+00:00,FR04014,no2,53.5,µg/m³ +Paris,FR,2019-06-04 06:00:00+00:00,FR04014,no2,47.7,µg/m³ +Paris,FR,2019-06-04 05:00:00+00:00,FR04014,no2,36.5,µg/m³ +Paris,FR,2019-06-04 04:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-06-04 03:00:00+00:00,FR04014,no2,41.6,µg/m³ +Paris,FR,2019-06-04 02:00:00+00:00,FR04014,no2,35.0,µg/m³ +Paris,FR,2019-06-04 01:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-06-04 00:00:00+00:00,FR04014,no2,52.4,µg/m³ +Paris,FR,2019-06-03 23:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-03 22:00:00+00:00,FR04014,no2,30.5,µg/m³ +Paris,FR,2019-06-03 21:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-06-03 20:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-06-03 19:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-06-03 18:00:00+00:00,FR04014,no2,23.1,µg/m³ +Paris,FR,2019-06-03 17:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-06-03 16:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-06-03 15:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-06-03 14:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-03 13:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-06-03 12:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-06-03 11:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-06-03 10:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-06-03 09:00:00+00:00,FR04014,no2,46.0,µg/m³ +Paris,FR,2019-06-03 08:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-06-03 07:00:00+00:00,FR04014,no2,50.0,µg/m³ +Paris,FR,2019-06-03 06:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-06-03 05:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-06-03 04:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-06-03 03:00:00+00:00,FR04014,no2,9.8,µg/m³ +Paris,FR,2019-06-03 02:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-06-03 01:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-03 00:00:00+00:00,FR04014,no2,15.7,µg/m³ +Paris,FR,2019-06-02 23:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-06-02 22:00:00+00:00,FR04014,no2,27.6,µg/m³ +Paris,FR,2019-06-02 21:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-06-02 20:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-06-02 19:00:00+00:00,FR04014,no2,25.8,µg/m³ +Paris,FR,2019-06-02 18:00:00+00:00,FR04014,no2,15.6,µg/m³ +Paris,FR,2019-06-02 17:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-02 16:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-06-02 15:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-06-02 14:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-06-02 13:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-06-02 12:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-06-02 11:00:00+00:00,FR04014,no2,13.1,µg/m³ +Paris,FR,2019-06-02 10:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-02 09:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-06-02 08:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-06-02 07:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-06-02 06:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-06-02 05:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-06-02 04:00:00+00:00,FR04014,no2,24.5,µg/m³ +Paris,FR,2019-06-02 03:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-06-02 02:00:00+00:00,FR04014,no2,39.2,µg/m³ +Paris,FR,2019-06-02 01:00:00+00:00,FR04014,no2,38.2,µg/m³ +Paris,FR,2019-06-02 00:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-06-01 23:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-06-01 22:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-06-01 21:00:00+00:00,FR04014,no2,49.4,µg/m³ +Paris,FR,2019-06-01 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-06-01 19:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-06-01 18:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-06-01 17:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-01 16:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-06-01 15:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-01 14:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-06-01 13:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-06-01 12:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-06-01 11:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-06-01 10:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-06-01 09:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-06-01 08:00:00+00:00,FR04014,no2,33.3,µg/m³ +Paris,FR,2019-06-01 07:00:00+00:00,FR04014,no2,46.4,µg/m³ +Paris,FR,2019-06-01 06:00:00+00:00,FR04014,no2,44.6,µg/m³ +Paris,FR,2019-06-01 02:00:00+00:00,FR04014,no2,68.1,µg/m³ +Paris,FR,2019-06-01 01:00:00+00:00,FR04014,no2,74.8,µg/m³ +Paris,FR,2019-06-01 00:00:00+00:00,FR04014,no2,84.7,µg/m³ +Paris,FR,2019-05-31 23:00:00+00:00,FR04014,no2,81.7,µg/m³ +Paris,FR,2019-05-31 22:00:00+00:00,FR04014,no2,68.0,µg/m³ +Paris,FR,2019-05-31 21:00:00+00:00,FR04014,no2,60.2,µg/m³ +Paris,FR,2019-05-31 20:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-31 19:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-31 18:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-31 17:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-31 16:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-31 15:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-31 14:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-31 13:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-05-31 12:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-31 11:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-31 10:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-31 09:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-31 08:00:00+00:00,FR04014,no2,36.6,µg/m³ +Paris,FR,2019-05-31 07:00:00+00:00,FR04014,no2,47.4,µg/m³ +Paris,FR,2019-05-31 06:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-31 05:00:00+00:00,FR04014,no2,37.2,µg/m³ +Paris,FR,2019-05-31 04:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-31 03:00:00+00:00,FR04014,no2,40.1,µg/m³ +Paris,FR,2019-05-31 02:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-05-31 01:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-05-31 00:00:00+00:00,FR04014,no2,27.2,µg/m³ +Paris,FR,2019-05-30 23:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-05-30 22:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-30 21:00:00+00:00,FR04014,no2,26.9,µg/m³ +Paris,FR,2019-05-30 20:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-30 19:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-30 18:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-30 17:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-30 16:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-30 15:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-30 14:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-30 13:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-30 12:00:00+00:00,FR04014,no2,14.2,µg/m³ +Paris,FR,2019-05-30 11:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-30 10:00:00+00:00,FR04014,no2,13.8,µg/m³ +Paris,FR,2019-05-30 09:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-30 08:00:00+00:00,FR04014,no2,16.7,µg/m³ +Paris,FR,2019-05-30 07:00:00+00:00,FR04014,no2,18.3,µg/m³ +Paris,FR,2019-05-30 06:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-30 05:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-05-30 04:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-05-30 03:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-30 02:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-05-30 01:00:00+00:00,FR04014,no2,12.4,µg/m³ +Paris,FR,2019-05-30 00:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-05-29 23:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-29 22:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-29 21:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-29 20:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-29 19:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-29 18:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-29 17:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-29 16:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-29 15:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-29 14:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-29 13:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-29 12:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-29 11:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-05-29 10:00:00+00:00,FR04014,no2,30.7,µg/m³ +Paris,FR,2019-05-29 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-29 08:00:00+00:00,FR04014,no2,45.7,µg/m³ +Paris,FR,2019-05-29 07:00:00+00:00,FR04014,no2,50.5,µg/m³ +Paris,FR,2019-05-29 06:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-29 05:00:00+00:00,FR04014,no2,36.7,µg/m³ +Paris,FR,2019-05-29 04:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-29 03:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-29 02:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-29 01:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-29 00:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-05-28 23:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-28 22:00:00+00:00,FR04014,no2,20.2,µg/m³ +Paris,FR,2019-05-28 21:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-28 20:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-28 19:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-28 18:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-05-28 17:00:00+00:00,FR04014,no2,20.8,µg/m³ +Paris,FR,2019-05-28 16:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-28 15:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-28 14:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-28 13:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-28 12:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-28 11:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-28 10:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-28 09:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-28 08:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-28 07:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-05-28 06:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-28 05:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-28 04:00:00+00:00,FR04014,no2,8.9,µg/m³ +Paris,FR,2019-05-28 03:00:00+00:00,FR04014,no2,6.1,µg/m³ +Paris,FR,2019-05-28 02:00:00+00:00,FR04014,no2,6.4,µg/m³ +Paris,FR,2019-05-28 01:00:00+00:00,FR04014,no2,8.2,µg/m³ +Paris,FR,2019-05-28 00:00:00+00:00,FR04014,no2,15.4,µg/m³ +Paris,FR,2019-05-27 23:00:00+00:00,FR04014,no2,22.6,µg/m³ +Paris,FR,2019-05-27 22:00:00+00:00,FR04014,no2,19.9,µg/m³ +Paris,FR,2019-05-27 21:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-27 20:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-27 19:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-27 18:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-05-27 17:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-27 16:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-05-27 15:00:00+00:00,FR04014,no2,25.6,µg/m³ +Paris,FR,2019-05-27 14:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-27 13:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-27 12:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-27 11:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-05-27 10:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-27 09:00:00+00:00,FR04014,no2,31.4,µg/m³ +Paris,FR,2019-05-27 08:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-27 07:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-27 06:00:00+00:00,FR04014,no2,29.1,µg/m³ +Paris,FR,2019-05-27 05:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-27 04:00:00+00:00,FR04014,no2,6.5,µg/m³ +Paris,FR,2019-05-27 03:00:00+00:00,FR04014,no2,4.8,µg/m³ +Paris,FR,2019-05-27 02:00:00+00:00,FR04014,no2,5.9,µg/m³ +Paris,FR,2019-05-27 01:00:00+00:00,FR04014,no2,7.1,µg/m³ +Paris,FR,2019-05-27 00:00:00+00:00,FR04014,no2,9.5,µg/m³ +Paris,FR,2019-05-26 23:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-26 22:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-05-26 21:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-26 20:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-26 19:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-26 18:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-26 17:00:00+00:00,FR04014,no2,17.3,µg/m³ +Paris,FR,2019-05-26 16:00:00+00:00,FR04014,no2,17.1,µg/m³ +Paris,FR,2019-05-26 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-26 14:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-26 13:00:00+00:00,FR04014,no2,12.5,µg/m³ +Paris,FR,2019-05-26 12:00:00+00:00,FR04014,no2,11.5,µg/m³ +Paris,FR,2019-05-26 11:00:00+00:00,FR04014,no2,13.3,µg/m³ +Paris,FR,2019-05-26 10:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-05-26 09:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-26 08:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-26 07:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-05-26 06:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-26 05:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-26 04:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-26 03:00:00+00:00,FR04014,no2,22.9,µg/m³ +Paris,FR,2019-05-26 02:00:00+00:00,FR04014,no2,23.4,µg/m³ +Paris,FR,2019-05-26 01:00:00+00:00,FR04014,no2,49.8,µg/m³ +Paris,FR,2019-05-26 00:00:00+00:00,FR04014,no2,67.0,µg/m³ +Paris,FR,2019-05-25 23:00:00+00:00,FR04014,no2,70.2,µg/m³ +Paris,FR,2019-05-25 22:00:00+00:00,FR04014,no2,63.9,µg/m³ +Paris,FR,2019-05-25 21:00:00+00:00,FR04014,no2,39.5,µg/m³ +Paris,FR,2019-05-25 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-25 19:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-25 18:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-25 17:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-25 16:00:00+00:00,FR04014,no2,31.9,µg/m³ +Paris,FR,2019-05-25 15:00:00+00:00,FR04014,no2,30.0,µg/m³ +Paris,FR,2019-05-25 14:00:00+00:00,FR04014,no2,23.6,µg/m³ +Paris,FR,2019-05-25 13:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-25 12:00:00+00:00,FR04014,no2,18.6,µg/m³ +Paris,FR,2019-05-25 11:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-25 10:00:00+00:00,FR04014,no2,26.3,µg/m³ +Paris,FR,2019-05-25 09:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-05-25 08:00:00+00:00,FR04014,no2,44.5,µg/m³ +Paris,FR,2019-05-25 07:00:00+00:00,FR04014,no2,42.1,µg/m³ +Paris,FR,2019-05-25 06:00:00+00:00,FR04014,no2,36.9,µg/m³ +Paris,FR,2019-05-25 02:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-25 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-25 00:00:00+00:00,FR04014,no2,17.4,µg/m³ +Paris,FR,2019-05-24 23:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-24 22:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-05-24 21:00:00+00:00,FR04014,no2,18.1,µg/m³ +Paris,FR,2019-05-24 20:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-24 19:00:00+00:00,FR04014,no2,21.9,µg/m³ +Paris,FR,2019-05-24 18:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-24 17:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-24 16:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-05-24 15:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-24 14:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-24 13:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-24 12:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-24 11:00:00+00:00,FR04014,no2,40.6,µg/m³ +Paris,FR,2019-05-24 10:00:00+00:00,FR04014,no2,28.6,µg/m³ +Paris,FR,2019-05-24 09:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-05-24 08:00:00+00:00,FR04014,no2,45.9,µg/m³ +Paris,FR,2019-05-24 07:00:00+00:00,FR04014,no2,54.8,µg/m³ +Paris,FR,2019-05-24 06:00:00+00:00,FR04014,no2,40.7,µg/m³ +Paris,FR,2019-05-24 05:00:00+00:00,FR04014,no2,35.9,µg/m³ +Paris,FR,2019-05-24 04:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-05-24 03:00:00+00:00,FR04014,no2,19.4,µg/m³ +Paris,FR,2019-05-24 02:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-24 01:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-24 00:00:00+00:00,FR04014,no2,32.8,µg/m³ +Paris,FR,2019-05-23 23:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-23 22:00:00+00:00,FR04014,no2,61.9,µg/m³ +Paris,FR,2019-05-23 21:00:00+00:00,FR04014,no2,47.0,µg/m³ +Paris,FR,2019-05-23 20:00:00+00:00,FR04014,no2,33.8,µg/m³ +Paris,FR,2019-05-23 19:00:00+00:00,FR04014,no2,28.0,µg/m³ +Paris,FR,2019-05-23 18:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-05-23 17:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-23 16:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-23 15:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-23 14:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-23 13:00:00+00:00,FR04014,no2,21.2,µg/m³ +Paris,FR,2019-05-23 12:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-05-23 11:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-05-23 10:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-05-23 09:00:00+00:00,FR04014,no2,79.4,µg/m³ +Paris,FR,2019-05-23 08:00:00+00:00,FR04014,no2,97.0,µg/m³ +Paris,FR,2019-05-23 07:00:00+00:00,FR04014,no2,91.8,µg/m³ +Paris,FR,2019-05-23 06:00:00+00:00,FR04014,no2,79.6,µg/m³ +Paris,FR,2019-05-23 05:00:00+00:00,FR04014,no2,68.7,µg/m³ +Paris,FR,2019-05-23 04:00:00+00:00,FR04014,no2,71.9,µg/m³ +Paris,FR,2019-05-23 03:00:00+00:00,FR04014,no2,76.8,µg/m³ +Paris,FR,2019-05-23 02:00:00+00:00,FR04014,no2,66.6,µg/m³ +Paris,FR,2019-05-23 01:00:00+00:00,FR04014,no2,53.1,µg/m³ +Paris,FR,2019-05-23 00:00:00+00:00,FR04014,no2,53.3,µg/m³ +Paris,FR,2019-05-22 23:00:00+00:00,FR04014,no2,62.1,µg/m³ +Paris,FR,2019-05-22 22:00:00+00:00,FR04014,no2,29.8,µg/m³ +Paris,FR,2019-05-22 21:00:00+00:00,FR04014,no2,37.7,µg/m³ +Paris,FR,2019-05-22 20:00:00+00:00,FR04014,no2,44.9,µg/m³ +Paris,FR,2019-05-22 19:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-22 18:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-05-22 17:00:00+00:00,FR04014,no2,36.1,µg/m³ +Paris,FR,2019-05-22 16:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-05-22 15:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-22 14:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-05-22 13:00:00+00:00,FR04014,no2,38.5,µg/m³ +Paris,FR,2019-05-22 12:00:00+00:00,FR04014,no2,42.2,µg/m³ +Paris,FR,2019-05-22 11:00:00+00:00,FR04014,no2,42.6,µg/m³ +Paris,FR,2019-05-22 10:00:00+00:00,FR04014,no2,57.8,µg/m³ +Paris,FR,2019-05-22 09:00:00+00:00,FR04014,no2,63.1,µg/m³ +Paris,FR,2019-05-22 08:00:00+00:00,FR04014,no2,70.8,µg/m³ +Paris,FR,2019-05-22 07:00:00+00:00,FR04014,no2,75.4,µg/m³ +Paris,FR,2019-05-22 06:00:00+00:00,FR04014,no2,75.7,µg/m³ +Paris,FR,2019-05-22 05:00:00+00:00,FR04014,no2,45.1,µg/m³ +Paris,FR,2019-05-22 04:00:00+00:00,FR04014,no2,33.7,µg/m³ +Paris,FR,2019-05-22 03:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-22 02:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-22 01:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-22 00:00:00+00:00,FR04014,no2,27.1,µg/m³ +Paris,FR,2019-05-21 23:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-21 22:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-21 21:00:00+00:00,FR04014,no2,43.0,µg/m³ +Paris,FR,2019-05-21 20:00:00+00:00,FR04014,no2,40.8,µg/m³ +Paris,FR,2019-05-21 19:00:00+00:00,FR04014,no2,50.0,µg/m³ +Paris,FR,2019-05-21 18:00:00+00:00,FR04014,no2,54.3,µg/m³ +Paris,FR,2019-05-21 17:00:00+00:00,FR04014,no2,75.0,µg/m³ +Paris,FR,2019-05-21 16:00:00+00:00,FR04014,no2,42.3,µg/m³ +Paris,FR,2019-05-21 15:00:00+00:00,FR04014,no2,36.6,µg/m³ +Paris,FR,2019-05-21 14:00:00+00:00,FR04014,no2,47.8,µg/m³ +Paris,FR,2019-05-21 13:00:00+00:00,FR04014,no2,49.7,µg/m³ +Paris,FR,2019-05-21 12:00:00+00:00,FR04014,no2,30.5,µg/m³ +Paris,FR,2019-05-21 11:00:00+00:00,FR04014,no2,25.5,µg/m³ +Paris,FR,2019-05-21 10:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-21 09:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-21 08:00:00+00:00,FR04014,no2,54.2,µg/m³ +Paris,FR,2019-05-21 07:00:00+00:00,FR04014,no2,56.0,µg/m³ +Paris,FR,2019-05-21 06:00:00+00:00,FR04014,no2,62.6,µg/m³ +Paris,FR,2019-05-21 05:00:00+00:00,FR04014,no2,38.0,µg/m³ +Paris,FR,2019-05-21 04:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-21 03:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-21 02:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-21 01:00:00+00:00,FR04014,no2,16.3,µg/m³ +Paris,FR,2019-05-21 00:00:00+00:00,FR04014,no2,16.9,µg/m³ +Paris,FR,2019-05-20 23:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-20 22:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-05-20 21:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-20 20:00:00+00:00,FR04014,no2,21.6,µg/m³ +Paris,FR,2019-05-20 19:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-05-20 18:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-20 17:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-20 16:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-05-20 15:00:00+00:00,FR04014,no2,26.5,µg/m³ +Paris,FR,2019-05-20 14:00:00+00:00,FR04014,no2,27.5,µg/m³ +Paris,FR,2019-05-20 13:00:00+00:00,FR04014,no2,23.7,µg/m³ +Paris,FR,2019-05-20 12:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-20 11:00:00+00:00,FR04014,no2,35.4,µg/m³ +Paris,FR,2019-05-20 10:00:00+00:00,FR04014,no2,43.9,µg/m³ +Paris,FR,2019-05-20 09:00:00+00:00,FR04014,no2,45.5,µg/m³ +Paris,FR,2019-05-20 08:00:00+00:00,FR04014,no2,46.1,µg/m³ +Paris,FR,2019-05-20 07:00:00+00:00,FR04014,no2,46.9,µg/m³ +Paris,FR,2019-05-20 06:00:00+00:00,FR04014,no2,40.1,µg/m³ +Paris,FR,2019-05-20 05:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-20 04:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-20 03:00:00+00:00,FR04014,no2,12.6,µg/m³ +Paris,FR,2019-05-20 02:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-05-20 01:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-20 00:00:00+00:00,FR04014,no2,16.4,µg/m³ +Paris,FR,2019-05-19 23:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-19 22:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-19 21:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-19 20:00:00+00:00,FR04014,no2,35.6,µg/m³ +Paris,FR,2019-05-19 19:00:00+00:00,FR04014,no2,51.2,µg/m³ +Paris,FR,2019-05-19 18:00:00+00:00,FR04014,no2,32.7,µg/m³ +Paris,FR,2019-05-19 17:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-05-19 16:00:00+00:00,FR04014,no2,32.5,µg/m³ +Paris,FR,2019-05-19 15:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-19 14:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-19 13:00:00+00:00,FR04014,no2,21.0,µg/m³ +Paris,FR,2019-05-19 12:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-19 11:00:00+00:00,FR04014,no2,32.6,µg/m³ +Paris,FR,2019-05-19 10:00:00+00:00,FR04014,no2,31.0,µg/m³ +Paris,FR,2019-05-19 09:00:00+00:00,FR04014,no2,33.0,µg/m³ +Paris,FR,2019-05-19 08:00:00+00:00,FR04014,no2,31.7,µg/m³ +Paris,FR,2019-05-19 07:00:00+00:00,FR04014,no2,32.4,µg/m³ +Paris,FR,2019-05-19 06:00:00+00:00,FR04014,no2,31.1,µg/m³ +Paris,FR,2019-05-19 05:00:00+00:00,FR04014,no2,40.9,µg/m³ +Paris,FR,2019-05-19 04:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-05-19 03:00:00+00:00,FR04014,no2,36.4,µg/m³ +Paris,FR,2019-05-19 02:00:00+00:00,FR04014,no2,38.1,µg/m³ +Paris,FR,2019-05-19 01:00:00+00:00,FR04014,no2,34.9,µg/m³ +Paris,FR,2019-05-19 00:00:00+00:00,FR04014,no2,49.6,µg/m³ +Paris,FR,2019-05-18 23:00:00+00:00,FR04014,no2,50.2,µg/m³ +Paris,FR,2019-05-18 22:00:00+00:00,FR04014,no2,62.5,µg/m³ +Paris,FR,2019-05-18 21:00:00+00:00,FR04014,no2,59.3,µg/m³ +Paris,FR,2019-05-18 20:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-18 19:00:00+00:00,FR04014,no2,67.5,µg/m³ +Paris,FR,2019-05-18 18:00:00+00:00,FR04014,no2,14.5,µg/m³ +Paris,FR,2019-05-18 17:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-18 16:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-05-18 15:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-18 14:00:00+00:00,FR04014,no2,11.8,µg/m³ +Paris,FR,2019-05-18 13:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-05-18 12:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-18 11:00:00+00:00,FR04014,no2,17.5,µg/m³ +Paris,FR,2019-05-18 10:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-18 09:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-05-18 08:00:00+00:00,FR04014,no2,20.4,µg/m³ +Paris,FR,2019-05-18 07:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-05-18 06:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-18 05:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-18 04:00:00+00:00,FR04014,no2,16.6,µg/m³ +Paris,FR,2019-05-18 03:00:00+00:00,FR04014,no2,16.1,µg/m³ +Paris,FR,2019-05-18 02:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-18 01:00:00+00:00,FR04014,no2,37.4,µg/m³ +Paris,FR,2019-05-18 00:00:00+00:00,FR04014,no2,31.5,µg/m³ +Paris,FR,2019-05-17 23:00:00+00:00,FR04014,no2,34.1,µg/m³ +Paris,FR,2019-05-17 22:00:00+00:00,FR04014,no2,28.2,µg/m³ +Paris,FR,2019-05-17 21:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-17 20:00:00+00:00,FR04014,no2,23.5,µg/m³ +Paris,FR,2019-05-17 19:00:00+00:00,FR04014,no2,24.7,µg/m³ +Paris,FR,2019-05-17 18:00:00+00:00,FR04014,no2,33.6,µg/m³ +Paris,FR,2019-05-17 17:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-17 16:00:00+00:00,FR04014,no2,20.7,µg/m³ +Paris,FR,2019-05-17 15:00:00+00:00,FR04014,no2,22.2,µg/m³ +Paris,FR,2019-05-17 14:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-17 13:00:00+00:00,FR04014,no2,37.9,µg/m³ +Paris,FR,2019-05-17 12:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-17 11:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-05-17 10:00:00+00:00,FR04014,no2,51.5,µg/m³ +Paris,FR,2019-05-17 09:00:00+00:00,FR04014,no2,60.5,µg/m³ +Paris,FR,2019-05-17 08:00:00+00:00,FR04014,no2,57.5,µg/m³ +Paris,FR,2019-05-17 07:00:00+00:00,FR04014,no2,55.0,µg/m³ +Paris,FR,2019-05-17 06:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-17 05:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-05-17 04:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-17 03:00:00+00:00,FR04014,no2,26.6,µg/m³ +Paris,FR,2019-05-17 02:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-17 01:00:00+00:00,FR04014,no2,26.1,µg/m³ +Paris,FR,2019-05-17 00:00:00+00:00,FR04014,no2,46.3,µg/m³ +Paris,FR,2019-05-16 23:00:00+00:00,FR04014,no2,43.7,µg/m³ +Paris,FR,2019-05-16 22:00:00+00:00,FR04014,no2,37.1,µg/m³ +Paris,FR,2019-05-16 21:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-16 20:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-16 19:00:00+00:00,FR04014,no2,14.4,µg/m³ +Paris,FR,2019-05-16 18:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-05-16 17:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-16 16:00:00+00:00,FR04014,no2,10.3,µg/m³ +Paris,FR,2019-05-16 15:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-05-16 14:00:00+00:00,FR04014,no2,8.1,µg/m³ +Paris,FR,2019-05-16 13:00:00+00:00,FR04014,no2,8.5,µg/m³ +Paris,FR,2019-05-16 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-05-16 11:00:00+00:00,FR04014,no2,10.5,µg/m³ +Paris,FR,2019-05-16 10:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-16 09:00:00+00:00,FR04014,no2,29.5,µg/m³ +Paris,FR,2019-05-16 08:00:00+00:00,FR04014,no2,39.4,µg/m³ +Paris,FR,2019-05-16 07:00:00+00:00,FR04014,no2,40.0,µg/m³ +Paris,FR,2019-05-16 05:00:00+00:00,FR04014,no2,52.6,µg/m³ +Paris,FR,2019-05-16 04:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-16 03:00:00+00:00,FR04014,no2,27.9,µg/m³ +Paris,FR,2019-05-16 02:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-16 01:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-16 00:00:00+00:00,FR04014,no2,27.4,µg/m³ +Paris,FR,2019-05-15 23:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-05-15 22:00:00+00:00,FR04014,no2,44.1,µg/m³ +Paris,FR,2019-05-15 21:00:00+00:00,FR04014,no2,36.0,µg/m³ +Paris,FR,2019-05-15 20:00:00+00:00,FR04014,no2,30.1,µg/m³ +Paris,FR,2019-05-15 19:00:00+00:00,FR04014,no2,20.3,µg/m³ +Paris,FR,2019-05-15 18:00:00+00:00,FR04014,no2,16.5,µg/m³ +Paris,FR,2019-05-15 17:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-15 16:00:00+00:00,FR04014,no2,12.2,µg/m³ +Paris,FR,2019-05-15 15:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-15 14:00:00+00:00,FR04014,no2,11.9,µg/m³ +Paris,FR,2019-05-15 13:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-05-15 12:00:00+00:00,FR04014,no2,9.4,µg/m³ +Paris,FR,2019-05-15 11:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 10:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 09:00:00+00:00,FR04014,no2,0.0,µg/m³ +Paris,FR,2019-05-15 08:00:00+00:00,FR04014,no2,25.7,µg/m³ +Paris,FR,2019-05-15 07:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-15 06:00:00+00:00,FR04014,no2,48.1,µg/m³ +Paris,FR,2019-05-15 05:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-15 04:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-05-15 03:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-15 02:00:00+00:00,FR04014,no2,16.8,µg/m³ +Paris,FR,2019-05-15 01:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-15 00:00:00+00:00,FR04014,no2,18.8,µg/m³ +Paris,FR,2019-05-14 23:00:00+00:00,FR04014,no2,24.3,µg/m³ +Paris,FR,2019-05-14 22:00:00+00:00,FR04014,no2,30.9,µg/m³ +Paris,FR,2019-05-14 21:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-14 20:00:00+00:00,FR04014,no2,28.4,µg/m³ +Paris,FR,2019-05-14 19:00:00+00:00,FR04014,no2,23.3,µg/m³ +Paris,FR,2019-05-14 18:00:00+00:00,FR04014,no2,17.9,µg/m³ +Paris,FR,2019-05-14 17:00:00+00:00,FR04014,no2,17.7,µg/m³ +Paris,FR,2019-05-14 16:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-14 15:00:00+00:00,FR04014,no2,13.4,µg/m³ +Paris,FR,2019-05-14 14:00:00+00:00,FR04014,no2,15.2,µg/m³ +Paris,FR,2019-05-14 13:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-14 12:00:00+00:00,FR04014,no2,10.2,µg/m³ +Paris,FR,2019-05-14 11:00:00+00:00,FR04014,no2,11.3,µg/m³ +Paris,FR,2019-05-14 10:00:00+00:00,FR04014,no2,12.9,µg/m³ +Paris,FR,2019-05-14 09:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-14 08:00:00+00:00,FR04014,no2,28.8,µg/m³ +Paris,FR,2019-05-14 07:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-05-14 06:00:00+00:00,FR04014,no2,46.1,µg/m³ +Paris,FR,2019-05-14 05:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-14 04:00:00+00:00,FR04014,no2,31.6,µg/m³ +Paris,FR,2019-05-14 03:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-14 02:00:00+00:00,FR04014,no2,19.0,µg/m³ +Paris,FR,2019-05-14 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-14 00:00:00+00:00,FR04014,no2,20.9,µg/m³ +Paris,FR,2019-05-13 23:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-13 22:00:00+00:00,FR04014,no2,27.3,µg/m³ +Paris,FR,2019-05-13 21:00:00+00:00,FR04014,no2,30.4,µg/m³ +Paris,FR,2019-05-13 20:00:00+00:00,FR04014,no2,28.3,µg/m³ +Paris,FR,2019-05-13 19:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-13 18:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-13 17:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-13 16:00:00+00:00,FR04014,no2,12.1,µg/m³ +Paris,FR,2019-05-13 15:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-13 14:00:00+00:00,FR04014,no2,10.7,µg/m³ +Paris,FR,2019-05-13 13:00:00+00:00,FR04014,no2,10.1,µg/m³ +Paris,FR,2019-05-13 12:00:00+00:00,FR04014,no2,9.2,µg/m³ +Paris,FR,2019-05-13 11:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-05-13 10:00:00+00:00,FR04014,no2,12.8,µg/m³ +Paris,FR,2019-05-13 09:00:00+00:00,FR04014,no2,20.6,µg/m³ +Paris,FR,2019-05-13 08:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-13 07:00:00+00:00,FR04014,no2,41.0,µg/m³ +Paris,FR,2019-05-13 06:00:00+00:00,FR04014,no2,45.2,µg/m³ +Paris,FR,2019-05-13 05:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-13 04:00:00+00:00,FR04014,no2,25.1,µg/m³ +Paris,FR,2019-05-13 03:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-05-13 02:00:00+00:00,FR04014,no2,18.5,µg/m³ +Paris,FR,2019-05-13 01:00:00+00:00,FR04014,no2,18.9,µg/m³ +Paris,FR,2019-05-13 00:00:00+00:00,FR04014,no2,25.0,µg/m³ +Paris,FR,2019-05-12 23:00:00+00:00,FR04014,no2,32.5,µg/m³ +Paris,FR,2019-05-12 22:00:00+00:00,FR04014,no2,46.5,µg/m³ +Paris,FR,2019-05-12 21:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-12 20:00:00+00:00,FR04014,no2,24.1,µg/m³ +Paris,FR,2019-05-12 19:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-12 18:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-12 17:00:00+00:00,FR04014,no2,13.9,µg/m³ +Paris,FR,2019-05-12 16:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-12 15:00:00+00:00,FR04014,no2,9.6,µg/m³ +Paris,FR,2019-05-12 14:00:00+00:00,FR04014,no2,9.1,µg/m³ +Paris,FR,2019-05-12 13:00:00+00:00,FR04014,no2,8.7,µg/m³ +Paris,FR,2019-05-12 12:00:00+00:00,FR04014,no2,10.9,µg/m³ +Paris,FR,2019-05-12 11:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-05-12 10:00:00+00:00,FR04014,no2,11.4,µg/m³ +Paris,FR,2019-05-12 09:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-12 08:00:00+00:00,FR04014,no2,14.6,µg/m³ +Paris,FR,2019-05-12 07:00:00+00:00,FR04014,no2,15.9,µg/m³ +Paris,FR,2019-05-12 06:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-12 05:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-12 04:00:00+00:00,FR04014,no2,16.2,µg/m³ +Paris,FR,2019-05-12 03:00:00+00:00,FR04014,no2,16.0,µg/m³ +Paris,FR,2019-05-12 02:00:00+00:00,FR04014,no2,17.2,µg/m³ +Paris,FR,2019-05-12 01:00:00+00:00,FR04014,no2,19.2,µg/m³ +Paris,FR,2019-05-12 00:00:00+00:00,FR04014,no2,22.8,µg/m³ +Paris,FR,2019-05-11 23:00:00+00:00,FR04014,no2,26.4,µg/m³ +Paris,FR,2019-05-11 22:00:00+00:00,FR04014,no2,27.7,µg/m³ +Paris,FR,2019-05-11 21:00:00+00:00,FR04014,no2,21.1,µg/m³ +Paris,FR,2019-05-11 20:00:00+00:00,FR04014,no2,24.2,µg/m³ +Paris,FR,2019-05-11 19:00:00+00:00,FR04014,no2,31.2,µg/m³ +Paris,FR,2019-05-11 18:00:00+00:00,FR04014,no2,33.1,µg/m³ +Paris,FR,2019-05-11 17:00:00+00:00,FR04014,no2,32.0,µg/m³ +Paris,FR,2019-05-11 16:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-11 15:00:00+00:00,FR04014,no2,18.0,µg/m³ +Paris,FR,2019-05-11 14:00:00+00:00,FR04014,no2,17.8,µg/m³ +Paris,FR,2019-05-11 13:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-05-11 12:00:00+00:00,FR04014,no2,30.2,µg/m³ +Paris,FR,2019-05-11 11:00:00+00:00,FR04014,no2,33.2,µg/m³ +Paris,FR,2019-05-11 10:00:00+00:00,FR04014,no2,36.8,µg/m³ +Paris,FR,2019-05-11 09:00:00+00:00,FR04014,no2,35.7,µg/m³ +Paris,FR,2019-05-11 08:00:00+00:00,FR04014,no2,32.1,µg/m³ +Paris,FR,2019-05-11 07:00:00+00:00,FR04014,no2,29.0,µg/m³ +Paris,FR,2019-05-11 06:00:00+00:00,FR04014,no2,28.9,µg/m³ +Paris,FR,2019-05-11 02:00:00+00:00,FR04014,no2,14.9,µg/m³ +Paris,FR,2019-05-11 01:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-11 00:00:00+00:00,FR04014,no2,24.8,µg/m³ +Paris,FR,2019-05-10 23:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-10 22:00:00+00:00,FR04014,no2,28.1,µg/m³ +Paris,FR,2019-05-10 21:00:00+00:00,FR04014,no2,37.0,µg/m³ +Paris,FR,2019-05-10 20:00:00+00:00,FR04014,no2,43.6,µg/m³ +Paris,FR,2019-05-10 19:00:00+00:00,FR04014,no2,39.3,µg/m³ +Paris,FR,2019-05-10 18:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-10 17:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-10 16:00:00+00:00,FR04014,no2,30.8,µg/m³ +Paris,FR,2019-05-10 15:00:00+00:00,FR04014,no2,29.6,µg/m³ +Paris,FR,2019-05-10 14:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-10 13:00:00+00:00,FR04014,no2,22.0,µg/m³ +Paris,FR,2019-05-10 12:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-10 11:00:00+00:00,FR04014,no2,23.2,µg/m³ +Paris,FR,2019-05-10 10:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-05-10 09:00:00+00:00,FR04014,no2,53.4,µg/m³ +Paris,FR,2019-05-10 08:00:00+00:00,FR04014,no2,60.7,µg/m³ +Paris,FR,2019-05-10 07:00:00+00:00,FR04014,no2,57.3,µg/m³ +Paris,FR,2019-05-10 06:00:00+00:00,FR04014,no2,47.4,µg/m³ +Paris,FR,2019-05-10 05:00:00+00:00,FR04014,no2,37.8,µg/m³ +Paris,FR,2019-05-10 04:00:00+00:00,FR04014,no2,20.5,µg/m³ +Paris,FR,2019-05-10 03:00:00+00:00,FR04014,no2,15.0,µg/m³ +Paris,FR,2019-05-10 02:00:00+00:00,FR04014,no2,14.1,µg/m³ +Paris,FR,2019-05-10 01:00:00+00:00,FR04014,no2,19.1,µg/m³ +Paris,FR,2019-05-10 00:00:00+00:00,FR04014,no2,22.7,µg/m³ +Paris,FR,2019-05-09 23:00:00+00:00,FR04014,no2,26.7,µg/m³ +Paris,FR,2019-05-09 22:00:00+00:00,FR04014,no2,29.7,µg/m³ +Paris,FR,2019-05-09 21:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-09 20:00:00+00:00,FR04014,no2,29.2,µg/m³ +Paris,FR,2019-05-09 19:00:00+00:00,FR04014,no2,23.8,µg/m³ +Paris,FR,2019-05-09 18:00:00+00:00,FR04014,no2,24.4,µg/m³ +Paris,FR,2019-05-09 17:00:00+00:00,FR04014,no2,29.9,µg/m³ +Paris,FR,2019-05-09 16:00:00+00:00,FR04014,no2,27.0,µg/m³ +Paris,FR,2019-05-09 15:00:00+00:00,FR04014,no2,23.9,µg/m³ +Paris,FR,2019-05-09 14:00:00+00:00,FR04014,no2,24.6,µg/m³ +Paris,FR,2019-05-09 13:00:00+00:00,FR04014,no2,21.3,µg/m³ +Paris,FR,2019-05-09 12:00:00+00:00,FR04014,no2,35.1,µg/m³ +Paris,FR,2019-05-09 11:00:00+00:00,FR04014,no2,34.2,µg/m³ +Paris,FR,2019-05-09 10:00:00+00:00,FR04014,no2,43.1,µg/m³ +Paris,FR,2019-05-09 09:00:00+00:00,FR04014,no2,32.3,µg/m³ +Paris,FR,2019-05-09 08:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-09 07:00:00+00:00,FR04014,no2,49.0,µg/m³ +Paris,FR,2019-05-09 06:00:00+00:00,FR04014,no2,50.7,µg/m³ +Paris,FR,2019-05-09 05:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-09 04:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-09 03:00:00+00:00,FR04014,no2,10.4,µg/m³ +Paris,FR,2019-05-09 02:00:00+00:00,FR04014,no2,10.0,µg/m³ +Paris,FR,2019-05-09 01:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-09 00:00:00+00:00,FR04014,no2,14.7,µg/m³ +Paris,FR,2019-05-08 23:00:00+00:00,FR04014,no2,25.2,µg/m³ +Paris,FR,2019-05-08 22:00:00+00:00,FR04014,no2,32.2,µg/m³ +Paris,FR,2019-05-08 21:00:00+00:00,FR04014,no2,48.9,µg/m³ +Paris,FR,2019-05-08 20:00:00+00:00,FR04014,no2,38.3,µg/m³ +Paris,FR,2019-05-08 19:00:00+00:00,FR04014,no2,41.3,µg/m³ +Paris,FR,2019-05-08 18:00:00+00:00,FR04014,no2,27.8,µg/m³ +Paris,FR,2019-05-08 17:00:00+00:00,FR04014,no2,29.3,µg/m³ +Paris,FR,2019-05-08 16:00:00+00:00,FR04014,no2,38.6,µg/m³ +Paris,FR,2019-05-08 15:00:00+00:00,FR04014,no2,26.0,µg/m³ +Paris,FR,2019-05-08 14:00:00+00:00,FR04014,no2,25.3,µg/m³ +Paris,FR,2019-05-08 13:00:00+00:00,FR04014,no2,14.3,µg/m³ +Paris,FR,2019-05-08 12:00:00+00:00,FR04014,no2,15.1,µg/m³ +Paris,FR,2019-05-08 11:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-05-08 10:00:00+00:00,FR04014,no2,33.4,µg/m³ +Paris,FR,2019-05-08 09:00:00+00:00,FR04014,no2,19.7,µg/m³ +Paris,FR,2019-05-08 08:00:00+00:00,FR04014,no2,17.0,µg/m³ +Paris,FR,2019-05-08 07:00:00+00:00,FR04014,no2,19.5,µg/m³ +Paris,FR,2019-05-08 06:00:00+00:00,FR04014,no2,21.7,µg/m³ +Paris,FR,2019-05-08 05:00:00+00:00,FR04014,no2,19.3,µg/m³ +Paris,FR,2019-05-08 04:00:00+00:00,FR04014,no2,15.5,µg/m³ +Paris,FR,2019-05-08 03:00:00+00:00,FR04014,no2,13.5,µg/m³ +Paris,FR,2019-05-08 02:00:00+00:00,FR04014,no2,15.3,µg/m³ +Paris,FR,2019-05-08 01:00:00+00:00,FR04014,no2,19.6,µg/m³ +Paris,FR,2019-05-08 00:00:00+00:00,FR04014,no2,22.1,µg/m³ +Paris,FR,2019-05-07 23:00:00+00:00,FR04014,no2,34.0,µg/m³ +Paris,FR,2019-05-07 22:00:00+00:00,FR04014,no2,35.8,µg/m³ +Paris,FR,2019-05-07 21:00:00+00:00,FR04014,no2,33.9,µg/m³ +Paris,FR,2019-05-07 20:00:00+00:00,FR04014,no2,36.2,µg/m³ +Paris,FR,2019-05-07 19:00:00+00:00,FR04014,no2,26.8,µg/m³ +Paris,FR,2019-05-07 18:00:00+00:00,FR04014,no2,21.4,µg/m³ +Paris,FR,2019-05-07 17:00:00+00:00,FR04014,no2,22.3,µg/m³ +Paris,FR,2019-05-07 16:00:00+00:00,FR04014,no2,18.2,µg/m³ +Paris,FR,2019-05-07 15:00:00+00:00,FR04014,no2,11.7,µg/m³ +Paris,FR,2019-05-07 14:00:00+00:00,FR04014,no2,11.0,µg/m³ +Paris,FR,2019-05-07 13:00:00+00:00,FR04014,no2,13.2,µg/m³ +Paris,FR,2019-05-07 12:00:00+00:00,FR04014,no2,10.6,µg/m³ +Paris,FR,2019-05-07 11:00:00+00:00,FR04014,no2,13.0,µg/m³ +Paris,FR,2019-05-07 10:00:00+00:00,FR04014,no2,20.1,µg/m³ +Paris,FR,2019-05-07 09:00:00+00:00,FR04014,no2,34.5,µg/m³ +Paris,FR,2019-05-07 08:00:00+00:00,FR04014,no2,56.0,µg/m³ +Paris,FR,2019-05-07 07:00:00+00:00,FR04014,no2,67.9,µg/m³ +Paris,FR,2019-05-07 06:00:00+00:00,FR04014,no2,77.7,µg/m³ +Paris,FR,2019-05-07 05:00:00+00:00,FR04014,no2,72.4,µg/m³ +Paris,FR,2019-05-07 04:00:00+00:00,FR04014,no2,61.9,µg/m³ +Paris,FR,2019-05-07 03:00:00+00:00,FR04014,no2,50.4,µg/m³ +Paris,FR,2019-05-07 02:00:00+00:00,FR04014,no2,27.7,µg/m³ +Paris,FR,2019-05-07 01:00:00+00:00,FR04014,no2,25.0,µg/m³ +Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,no2,41.0,µg/m³ +Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,no2,45.0,µg/m³ +Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,no2,43.5,µg/m³ +Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,no2,42.5,µg/m³ +Antwerpen,BE,2019-06-17 04:00:00+00:00,BETR801,no2,39.5,µg/m³ +Antwerpen,BE,2019-06-17 03:00:00+00:00,BETR801,no2,36.0,µg/m³ +Antwerpen,BE,2019-06-17 02:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-06-17 01:00:00+00:00,BETR801,no2,42.0,µg/m³ +Antwerpen,BE,2019-06-16 01:00:00+00:00,BETR801,no2,42.5,µg/m³ +Antwerpen,BE,2019-06-15 01:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-06-14 09:00:00+00:00,BETR801,no2,36.5,µg/m³ +Antwerpen,BE,2019-06-13 01:00:00+00:00,BETR801,no2,28.5,µg/m³ +Antwerpen,BE,2019-06-12 01:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-06-11 01:00:00+00:00,BETR801,no2,7.5,µg/m³ +Antwerpen,BE,2019-06-10 01:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-06-09 01:00:00+00:00,BETR801,no2,10.0,µg/m³ +Antwerpen,BE,2019-06-05 01:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-06-01 01:00:00+00:00,BETR801,no2,52.5,µg/m³ +Antwerpen,BE,2019-05-31 01:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-05-30 01:00:00+00:00,BETR801,no2,7.5,µg/m³ +Antwerpen,BE,2019-05-29 01:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-28 01:00:00+00:00,BETR801,no2,11.0,µg/m³ +Antwerpen,BE,2019-05-27 01:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-26 01:00:00+00:00,BETR801,no2,53.0,µg/m³ +Antwerpen,BE,2019-05-25 01:00:00+00:00,BETR801,no2,29.0,µg/m³ +Antwerpen,BE,2019-05-24 01:00:00+00:00,BETR801,no2,74.5,µg/m³ +Antwerpen,BE,2019-05-23 01:00:00+00:00,BETR801,no2,60.5,µg/m³ +Antwerpen,BE,2019-05-22 01:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-21 01:00:00+00:00,BETR801,no2,15.5,µg/m³ +Antwerpen,BE,2019-05-20 15:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-20 14:00:00+00:00,BETR801,no2,24.5,µg/m³ +Antwerpen,BE,2019-05-20 13:00:00+00:00,BETR801,no2,32.0,µg/m³ +Antwerpen,BE,2019-05-20 12:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-05-20 11:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-05-20 10:00:00+00:00,BETR801,no2,25.0,µg/m³ +Antwerpen,BE,2019-05-20 09:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-05-20 08:00:00+00:00,BETR801,no2,40.0,µg/m³ +Antwerpen,BE,2019-05-20 07:00:00+00:00,BETR801,no2,38.0,µg/m³ +Antwerpen,BE,2019-05-20 06:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-20 05:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-20 04:00:00+00:00,BETR801,no2,14.0,µg/m³ +Antwerpen,BE,2019-05-20 03:00:00+00:00,BETR801,no2,9.0,µg/m³ +Antwerpen,BE,2019-05-20 02:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-20 01:00:00+00:00,BETR801,no2,17.0,µg/m³ +Antwerpen,BE,2019-05-20 00:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 23:00:00+00:00,BETR801,no2,16.5,µg/m³ +Antwerpen,BE,2019-05-19 22:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-05-19 21:00:00+00:00,BETR801,no2,12.5,µg/m³ +Antwerpen,BE,2019-05-19 20:00:00+00:00,BETR801,no2,15.0,µg/m³ +Antwerpen,BE,2019-05-19 19:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 18:00:00+00:00,BETR801,no2,15.5,µg/m³ +Antwerpen,BE,2019-05-19 17:00:00+00:00,BETR801,no2,18.5,µg/m³ +Antwerpen,BE,2019-05-19 16:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-19 15:00:00+00:00,BETR801,no2,33.0,µg/m³ +Antwerpen,BE,2019-05-19 14:00:00+00:00,BETR801,no2,23.0,µg/m³ +Antwerpen,BE,2019-05-19 13:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-19 12:00:00+00:00,BETR801,no2,16.0,µg/m³ +Antwerpen,BE,2019-05-19 11:00:00+00:00,BETR801,no2,17.0,µg/m³ +Antwerpen,BE,2019-05-19 10:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-19 09:00:00+00:00,BETR801,no2,16.0,µg/m³ +Antwerpen,BE,2019-05-19 08:00:00+00:00,BETR801,no2,23.5,µg/m³ +Antwerpen,BE,2019-05-19 07:00:00+00:00,BETR801,no2,30.0,µg/m³ +Antwerpen,BE,2019-05-19 06:00:00+00:00,BETR801,no2,30.5,µg/m³ +Antwerpen,BE,2019-05-19 05:00:00+00:00,BETR801,no2,26.0,µg/m³ +Antwerpen,BE,2019-05-19 04:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-19 03:00:00+00:00,BETR801,no2,19.0,µg/m³ +Antwerpen,BE,2019-05-19 02:00:00+00:00,BETR801,no2,19.0,µg/m³ +Antwerpen,BE,2019-05-19 01:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-05-19 00:00:00+00:00,BETR801,no2,23.5,µg/m³ +Antwerpen,BE,2019-05-18 23:00:00+00:00,BETR801,no2,29.5,µg/m³ +Antwerpen,BE,2019-05-18 22:00:00+00:00,BETR801,no2,34.5,µg/m³ +Antwerpen,BE,2019-05-18 21:00:00+00:00,BETR801,no2,39.0,µg/m³ +Antwerpen,BE,2019-05-18 20:00:00+00:00,BETR801,no2,40.0,µg/m³ +Antwerpen,BE,2019-05-18 19:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-05-18 18:00:00+00:00,BETR801,no2,35.5,µg/m³ +Antwerpen,BE,2019-05-18 01:00:00+00:00,BETR801,no2,41.5,µg/m³ +Antwerpen,BE,2019-05-16 01:00:00+00:00,BETR801,no2,28.0,µg/m³ +Antwerpen,BE,2019-05-15 02:00:00+00:00,BETR801,no2,22.5,µg/m³ +Antwerpen,BE,2019-05-15 01:00:00+00:00,BETR801,no2,25.5,µg/m³ +Antwerpen,BE,2019-05-14 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-05-14 01:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-13 02:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-13 01:00:00+00:00,BETR801,no2,14.5,µg/m³ +Antwerpen,BE,2019-05-12 02:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-12 01:00:00+00:00,BETR801,no2,17.5,µg/m³ +Antwerpen,BE,2019-05-11 02:00:00+00:00,BETR801,no2,21.0,µg/m³ +Antwerpen,BE,2019-05-11 01:00:00+00:00,BETR801,no2,26.5,µg/m³ +Antwerpen,BE,2019-05-10 02:00:00+00:00,BETR801,no2,11.5,µg/m³ +Antwerpen,BE,2019-05-10 01:00:00+00:00,BETR801,no2,10.5,µg/m³ +Antwerpen,BE,2019-05-09 02:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-09 01:00:00+00:00,BETR801,no2,20.0,µg/m³ +Antwerpen,BE,2019-05-08 02:00:00+00:00,BETR801,no2,20.5,µg/m³ +Antwerpen,BE,2019-05-08 01:00:00+00:00,BETR801,no2,23.0,µg/m³ +Antwerpen,BE,2019-05-07 02:00:00+00:00,BETR801,no2,45.0,µg/m³ +Antwerpen,BE,2019-05-07 01:00:00+00:00,BETR801,no2,50.5,µg/m³ +London,GB,2019-06-17 11:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 10:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 09:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 08:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-17 07:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-17 06:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-17 05:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 04:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-17 03:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-17 02:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-17 01:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-17 00:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-16 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-16 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-16 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-16 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-16 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-16 17:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-16 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-16 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-16 14:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-16 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-16 12:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 11:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-16 10:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-16 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-16 08:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-16 07:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-16 06:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-16 05:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 04:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-16 03:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-16 02:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-16 01:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-16 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-15 23:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-15 22:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-15 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-15 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-15 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-15 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-15 17:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-15 16:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 15:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-15 13:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-15 12:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 11:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-15 10:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-15 09:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-15 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-15 07:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 06:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-15 05:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-15 04:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-15 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-14 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-14 16:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 15:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-14 14:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-14 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-14 12:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-14 11:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 10:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 09:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-14 08:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-14 07:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-14 06:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-14 05:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-14 04:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-14 03:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-14 02:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-14 00:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 23:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 22:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 21:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 20:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-13 19:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 18:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-13 17:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 16:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-13 15:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-13 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-13 13:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 12:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-13 09:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 08:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 07:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-13 06:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-13 05:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 04:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-13 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-13 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-13 00:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-12 23:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 21:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-12 20:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-12 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 18:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-12 17:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-12 16:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-12 15:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-06-12 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 12:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-12 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-12 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-12 09:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-12 08:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-12 07:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-12 06:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-12 05:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-12 04:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-12 03:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-12 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-11 23:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-11 22:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-11 21:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 19:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-11 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-11 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 15:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-11 14:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-11 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-11 12:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-11 11:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 10:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-11 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-11 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-11 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-11 06:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-11 05:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-11 04:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-11 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-11 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-11 01:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-11 00:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-10 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-10 22:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-10 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-10 19:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 18:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-10 17:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-10 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-10 15:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-10 14:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-10 13:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-06-10 12:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-10 11:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-10 10:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-06-10 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-10 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-10 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-10 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 05:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 04:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-10 03:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 02:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-10 01:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-10 00:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 23:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-09 21:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-09 20:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 19:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-09 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-09 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-06-09 16:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-09 15:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-09 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-09 13:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-09 12:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-09 11:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-09 10:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-09 09:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-09 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-09 07:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 06:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-09 05:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 04:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-09 03:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-09 02:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-09 01:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-09 00:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-08 23:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 21:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-08 20:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-08 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-08 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 17:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-08 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-08 15:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-08 14:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-08 13:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-08 12:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-08 11:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-08 10:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 09:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-08 08:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-08 07:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 06:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-08 05:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 04:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-08 03:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-08 02:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-08 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-07 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-07 21:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-07 20:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-07 19:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-07 18:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-07 16:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-07 15:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-07 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-07 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-07 12:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 11:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 10:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 09:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 08:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-07 07:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 06:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-06-07 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-07 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-07 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-06 23:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-06 22:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-06 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-06 19:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 18:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-06 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-06 15:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-06 14:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-06 13:00:00+00:00,London Westminster,no2,10.0,µg/m³ +London,GB,2019-06-06 12:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-06 11:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-06 10:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-06 09:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-06 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 07:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-06 06:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-06 05:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 04:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-06 03:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-06 02:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-06 00:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-05 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-05 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-05 21:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 20:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 19:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 18:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 17:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 16:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-05 15:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-05 14:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-05 13:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-05 12:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-05 11:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-05 10:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-05 09:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-05 08:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-06-05 07:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-06-05 06:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-05 05:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-05 04:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-05 03:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-05 02:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-05 01:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-05 00:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-06-04 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 21:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-04 20:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-04 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-04 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-06-04 17:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-06-04 16:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-04 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-04 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-04 13:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-04 12:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-04 11:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-04 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-04 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-04 08:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-04 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-06-04 06:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-04 05:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-04 04:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-04 03:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-04 02:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-04 01:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-04 00:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-03 23:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 22:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-03 20:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-06-03 19:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-06-03 18:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-03 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-06-03 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-03 15:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-03 14:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-06-03 13:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-06-03 12:00:00+00:00,London Westminster,no2,14.0,µg/m³ +London,GB,2019-06-03 11:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-03 10:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-03 08:00:00+00:00,London Westminster,no2,7.0,µg/m³ +London,GB,2019-06-03 07:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-06-03 06:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-06-03 05:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-03 04:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-06-03 03:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 02:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-03 01:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-03 00:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-06-02 23:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-02 22:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-06-02 21:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-02 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 19:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 18:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 15:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-02 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-02 13:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-06-02 12:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-02 11:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-02 10:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-06-02 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 08:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-06-02 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 06:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-02 05:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-02 04:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-06-02 03:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-02 02:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-06-02 01:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-02 00:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-01 23:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-06-01 22:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-06-01 21:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-06-01 20:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-06-01 19:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-06-01 18:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-01 17:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-06-01 16:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-06-01 15:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-01 14:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-06-01 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-06-01 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-06-01 11:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-06-01 10:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-06-01 09:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-06-01 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-06-01 07:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-06-01 06:00:00+00:00,London Westminster,no2,4.0,µg/m³ +London,GB,2019-06-01 05:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-01 04:00:00+00:00,London Westminster,no2,11.0,µg/m³ +London,GB,2019-06-01 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-01 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-06-01 01:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-06-01 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-31 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-31 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-31 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-31 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-31 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 16:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-31 15:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-31 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-31 13:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-31 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-31 11:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-31 10:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-31 09:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-31 08:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-05-31 07:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 06:00:00+00:00,London Westminster,no2,8.0,µg/m³ +London,GB,2019-05-31 05:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 04:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-31 03:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-05-31 02:00:00+00:00,London Westminster,no2,12.0,µg/m³ +London,GB,2019-05-31 01:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-31 00:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-30 23:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-30 22:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-30 21:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-30 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-30 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-30 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-30 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-30 14:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-30 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-30 12:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-30 11:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-05-30 10:00:00+00:00,London Westminster,no2,9.0,µg/m³ +London,GB,2019-05-30 09:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-30 08:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-05-30 07:00:00+00:00,London Westminster,no2,2.0,µg/m³ +London,GB,2019-05-30 06:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 05:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 04:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 03:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 02:00:00+00:00,London Westminster,no2,0.0,µg/m³ +London,GB,2019-05-30 01:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-05-30 00:00:00+00:00,London Westminster,no2,1.0,µg/m³ +London,GB,2019-05-29 23:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 22:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 21:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-05-29 20:00:00+00:00,London Westminster,no2,6.0,µg/m³ +London,GB,2019-05-29 19:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 18:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 17:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 16:00:00+00:00,London Westminster,no2,3.0,µg/m³ +London,GB,2019-05-29 15:00:00+00:00,London Westminster,no2,5.0,µg/m³ +London,GB,2019-05-29 14:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-29 13:00:00+00:00,London Westminster,no2,13.0,µg/m³ +London,GB,2019-05-29 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-29 11:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 10:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-29 08:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-29 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-29 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-29 03:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-29 02:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-29 01:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-29 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-28 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-28 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-28 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 19:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 17:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-28 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-28 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-28 14:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-28 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-28 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-28 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-28 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-28 09:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 08:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 07:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-28 06:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-28 05:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-28 04:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-28 03:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 02:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 01:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-28 00:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-27 23:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 22:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 21:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 20:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-27 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 17:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 16:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-27 14:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-27 12:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-27 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-27 10:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-27 09:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 08:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 07:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 06:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-27 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-27 03:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-27 02:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-27 01:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-27 00:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 21:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-26 20:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-26 19:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 18:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-26 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 16:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-26 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-26 13:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-26 12:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-26 11:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 10:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-26 09:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-26 08:00:00+00:00,London Westminster,no2,15.0,µg/m³ +London,GB,2019-05-26 07:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 06:00:00+00:00,London Westminster,no2,17.0,µg/m³ +London,GB,2019-05-26 05:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-26 04:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-26 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-26 01:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-26 00:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-25 23:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-25 22:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-25 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-25 20:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-25 19:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-25 18:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-25 17:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-25 16:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-25 15:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-25 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-25 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-25 12:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-25 11:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 08:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-25 07:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-25 06:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-25 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-25 03:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-25 02:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-25 01:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-25 00:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-24 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 21:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-24 20:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-24 19:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-24 18:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-24 17:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-24 16:00:00+00:00,London Westminster,no2,43.0,µg/m³ +London,GB,2019-05-24 15:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-24 14:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-24 13:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-24 12:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-24 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-24 10:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 09:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-24 08:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-24 07:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-24 06:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-24 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-24 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-24 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-24 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-23 23:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-23 22:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-23 21:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-23 20:00:00+00:00,London Westminster,no2,45.0,µg/m³ +London,GB,2019-05-23 19:00:00+00:00,London Westminster,no2,51.0,µg/m³ +London,GB,2019-05-23 18:00:00+00:00,London Westminster,no2,54.0,µg/m³ +London,GB,2019-05-23 17:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-05-23 16:00:00+00:00,London Westminster,no2,53.0,µg/m³ +London,GB,2019-05-23 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-23 14:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-23 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-23 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 11:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-23 10:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 09:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-23 08:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-23 07:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-23 06:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-23 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-23 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-23 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-23 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-23 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-23 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-22 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-22 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-22 18:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-22 17:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-22 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-22 15:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-22 14:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-22 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-22 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 09:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 08:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-22 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-22 06:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-22 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-22 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-22 03:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-22 02:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-22 01:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-22 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-21 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 22:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 21:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 20:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-21 19:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-21 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-21 17:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-21 16:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-21 15:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-21 14:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-21 13:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-21 12:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 10:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-21 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-21 08:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-21 07:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-21 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-21 05:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-21 04:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-21 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-21 01:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-21 00:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-20 23:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-20 22:00:00+00:00,London Westminster,no2,47.0,µg/m³ +London,GB,2019-05-20 21:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-20 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 19:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 18:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-20 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-20 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 15:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 14:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 08:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-20 07:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-20 06:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-20 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-20 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-20 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-20 01:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-20 00:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 23:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 22:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 21:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 20:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-19 19:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-19 18:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-19 17:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-19 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-19 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-19 11:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-19 10:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-19 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-19 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-19 07:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-19 06:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-19 05:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 04:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 03:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 02:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 01:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-19 00:00:00+00:00,London Westminster,no2,49.0,µg/m³ +London,GB,2019-05-18 23:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-18 22:00:00+00:00,London Westminster,no2,46.0,µg/m³ +London,GB,2019-05-18 21:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-18 20:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-18 18:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-18 17:00:00+00:00,London Westminster,no2,42.0,µg/m³ +London,GB,2019-05-18 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-18 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-18 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-18 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 12:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-18 11:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-18 10:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-18 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-18 07:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 06:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-18 05:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 04:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-18 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 01:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-18 00:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 23:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-17 22:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-17 21:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 20:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 19:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-17 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 17:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 16:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 15:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-17 14:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-17 13:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 11:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 10:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-17 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-17 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-17 07:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-17 06:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-17 05:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 04:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 03:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 02:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-17 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-17 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-16 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 22:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 20:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 19:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 18:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-16 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-16 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-16 15:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 14:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-16 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-16 11:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-16 09:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-16 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-16 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 05:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 04:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-16 03:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-16 02:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-16 01:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-16 00:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 23:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 22:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 21:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-15 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-15 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 18:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 16:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-15 15:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-15 14:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-15 13:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-15 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-15 11:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 10:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-15 09:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-15 08:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-15 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-15 05:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-15 04:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-15 03:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-15 02:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-15 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-14 23:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 22:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 21:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 20:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-14 19:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-14 18:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 17:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 16:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-14 15:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-14 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 12:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-14 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-14 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-14 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 08:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-14 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-14 05:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 04:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-14 03:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 02:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-14 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-14 00:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-13 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 21:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 20:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 19:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 18:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-13 17:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-13 16:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-13 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-13 14:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-13 13:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-13 12:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 11:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-13 10:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-13 09:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-13 08:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-13 07:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-13 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-13 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-13 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-13 03:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 02:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-13 01:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-13 00:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 23:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 22:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 21:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 20:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 19:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 18:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 17:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-12 16:00:00+00:00,London Westminster,no2,23.0,µg/m³ +London,GB,2019-05-12 15:00:00+00:00,London Westminster,no2,22.0,µg/m³ +London,GB,2019-05-12 14:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 13:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-12 12:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-12 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-12 10:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-12 09:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-12 08:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-12 07:00:00+00:00,London Westminster,no2,44.0,µg/m³ +London,GB,2019-05-12 06:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 05:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-12 04:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-12 03:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 02:00:00+00:00,London Westminster,no2,38.0,µg/m³ +London,GB,2019-05-12 01:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-12 00:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 23:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-11 22:00:00+00:00,London Westminster,no2,37.0,µg/m³ +London,GB,2019-05-11 21:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-11 20:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-11 19:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-11 18:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-11 17:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-11 16:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-11 15:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-11 09:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 08:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-11 07:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 06:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-11 05:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 04:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-11 03:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-11 02:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-11 01:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-11 00:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-10 23:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 22:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 21:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 19:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 18:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 17:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 16:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-10 15:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-10 14:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-10 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-10 12:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-10 11:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-10 10:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-10 09:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-10 08:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-10 07:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-10 06:00:00+00:00,London Westminster,no2,39.0,µg/m³ +London,GB,2019-05-10 05:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-10 04:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-10 03:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-10 02:00:00+00:00,London Westminster,no2,41.0,µg/m³ +London,GB,2019-05-10 01:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-05-10 00:00:00+00:00,London Westminster,no2,52.0,µg/m³ +London,GB,2019-05-09 23:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 22:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 21:00:00+00:00,London Westminster,no2,65.0,µg/m³ +London,GB,2019-05-09 20:00:00+00:00,London Westminster,no2,59.0,µg/m³ +London,GB,2019-05-09 19:00:00+00:00,London Westminster,no2,62.0,µg/m³ +London,GB,2019-05-09 18:00:00+00:00,London Westminster,no2,58.0,µg/m³ +London,GB,2019-05-09 17:00:00+00:00,London Westminster,no2,60.0,µg/m³ +London,GB,2019-05-09 16:00:00+00:00,London Westminster,no2,67.0,µg/m³ +London,GB,2019-05-09 15:00:00+00:00,London Westminster,no2,97.0,µg/m³ +London,GB,2019-05-09 14:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-09 13:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-09 12:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-09 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-09 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-09 09:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-09 08:00:00+00:00,London Westminster,no2,35.0,µg/m³ +London,GB,2019-05-09 07:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 06:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 05:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 04:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-09 03:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-09 02:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-09 00:00:00+00:00,London Westminster,no2,30.0,µg/m³ +London,GB,2019-05-08 23:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-08 21:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 20:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-08 19:00:00+00:00,London Westminster,no2,25.0,µg/m³ +London,GB,2019-05-08 18:00:00+00:00,London Westminster,no2,40.0,µg/m³ +London,GB,2019-05-08 17:00:00+00:00,London Westminster,no2,31.0,µg/m³ +London,GB,2019-05-08 16:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-08 15:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-08 14:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-08 13:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 12:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-08 11:00:00+00:00,London Westminster,no2,27.0,µg/m³ +London,GB,2019-05-08 10:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-08 09:00:00+00:00,London Westminster,no2,33.0,µg/m³ +London,GB,2019-05-08 08:00:00+00:00,London Westminster,no2,36.0,µg/m³ +London,GB,2019-05-08 07:00:00+00:00,London Westminster,no2,34.0,µg/m³ +London,GB,2019-05-08 06:00:00+00:00,London Westminster,no2,29.0,µg/m³ +London,GB,2019-05-08 05:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 04:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 03:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-08 02:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-08 01:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-08 00:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 23:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 21:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 20:00:00+00:00,London Westminster,no2,24.0,µg/m³ +London,GB,2019-05-07 19:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 18:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 17:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 16:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 15:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 14:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-07 13:00:00+00:00,London Westminster,no2,20.0,µg/m³ +London,GB,2019-05-07 12:00:00+00:00,London Westminster,no2,18.0,µg/m³ +London,GB,2019-05-07 11:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 10:00:00+00:00,London Westminster,no2,21.0,µg/m³ +London,GB,2019-05-07 09:00:00+00:00,London Westminster,no2,28.0,µg/m³ +London,GB,2019-05-07 08:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-07 07:00:00+00:00,London Westminster,no2,32.0,µg/m³ +London,GB,2019-05-07 06:00:00+00:00,London Westminster,no2,26.0,µg/m³ +London,GB,2019-05-07 04:00:00+00:00,London Westminster,no2,16.0,µg/m³ +London,GB,2019-05-07 03:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 02:00:00+00:00,London Westminster,no2,19.0,µg/m³ +London,GB,2019-05-07 01:00:00+00:00,London Westminster,no2,23.0,µg/m³ diff --git a/doc/data/air_quality_parameters.csv b/doc/data/air_quality_parameters.csv new file mode 100644 index 00000000..915f6300 --- /dev/null +++ b/doc/data/air_quality_parameters.csv @@ -0,0 +1,8 @@ +id,description,name +bc,Black Carbon,BC +co,Carbon Monoxide,CO +no2,Nitrogen Dioxide,NO2 +o3,Ozone,O3 +pm10,Particulate matter less than 10 micrometers in diameter,PM10 +pm25,Particulate matter less than 2.5 micrometers in diameter,PM2.5 +so2,Sulfur Dioxide,SO2 diff --git a/doc/data/air_quality_pm25_long.csv b/doc/data/air_quality_pm25_long.csv new file mode 100644 index 00000000..f74053c2 --- /dev/null +++ b/doc/data/air_quality_pm25_long.csv @@ -0,0 +1,1111 @@ +city,country,date.utc,location,parameter,value,unit +Antwerpen,BE,2019-06-18 06:00:00+00:00,BETR801,pm25,18.0,µg/m³ +Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,pm25,16.0,µg/m³ +Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-06-17 04:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-06-17 03:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-06-17 02:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-06-17 01:00:00+00:00,BETR801,pm25,8.0,µg/m³ +Antwerpen,BE,2019-06-16 01:00:00+00:00,BETR801,pm25,15.0,µg/m³ +Antwerpen,BE,2019-06-15 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-06-14 09:00:00+00:00,BETR801,pm25,12.0,µg/m³ +Antwerpen,BE,2019-06-13 01:00:00+00:00,BETR801,pm25,3.0,µg/m³ +Antwerpen,BE,2019-06-12 01:00:00+00:00,BETR801,pm25,16.0,µg/m³ +Antwerpen,BE,2019-06-11 01:00:00+00:00,BETR801,pm25,3.5,µg/m³ +Antwerpen,BE,2019-06-10 01:00:00+00:00,BETR801,pm25,8.5,µg/m³ +Antwerpen,BE,2019-06-09 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-06-08 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-06 01:00:00+00:00,BETR801,pm25,6.5,µg/m³ +Antwerpen,BE,2019-06-05 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-06-04 01:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-06-03 01:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-06-02 01:00:00+00:00,BETR801,pm25,19.0,µg/m³ +Antwerpen,BE,2019-06-01 01:00:00+00:00,BETR801,pm25,9.0,µg/m³ +Antwerpen,BE,2019-05-31 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-30 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-29 01:00:00+00:00,BETR801,pm25,5.5,µg/m³ +Antwerpen,BE,2019-05-28 01:00:00+00:00,BETR801,pm25,7.0,µg/m³ +Antwerpen,BE,2019-05-27 01:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-05-26 01:00:00+00:00,BETR801,pm25,26.5,µg/m³ +Antwerpen,BE,2019-05-25 01:00:00+00:00,BETR801,pm25,10.0,µg/m³ +Antwerpen,BE,2019-05-24 01:00:00+00:00,BETR801,pm25,13.0,µg/m³ +Antwerpen,BE,2019-05-23 01:00:00+00:00,BETR801,pm25,7.5,µg/m³ +Antwerpen,BE,2019-05-22 01:00:00+00:00,BETR801,pm25,15.5,µg/m³ +Antwerpen,BE,2019-05-21 01:00:00+00:00,BETR801,pm25,20.5,µg/m³ +Antwerpen,BE,2019-05-20 17:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 16:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-20 15:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 14:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-05-20 13:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-20 12:00:00+00:00,BETR801,pm25,17.5,µg/m³ +Antwerpen,BE,2019-05-20 11:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-20 10:00:00+00:00,BETR801,pm25,10.5,µg/m³ +Antwerpen,BE,2019-05-20 09:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-20 08:00:00+00:00,BETR801,pm25,19.5,µg/m³ +Antwerpen,BE,2019-05-20 07:00:00+00:00,BETR801,pm25,23.5,µg/m³ +Antwerpen,BE,2019-05-20 06:00:00+00:00,BETR801,pm25,22.0,µg/m³ +Antwerpen,BE,2019-05-20 05:00:00+00:00,BETR801,pm25,25.0,µg/m³ +Antwerpen,BE,2019-05-20 04:00:00+00:00,BETR801,pm25,24.5,µg/m³ +Antwerpen,BE,2019-05-20 03:00:00+00:00,BETR801,pm25,15.0,µg/m³ +Antwerpen,BE,2019-05-20 02:00:00+00:00,BETR801,pm25,18.5,µg/m³ +Antwerpen,BE,2019-05-20 01:00:00+00:00,BETR801,pm25,28.0,µg/m³ +Antwerpen,BE,2019-05-19 21:00:00+00:00,BETR801,pm25,35.5,µg/m³ +Antwerpen,BE,2019-05-19 20:00:00+00:00,BETR801,pm25,40.0,µg/m³ +Antwerpen,BE,2019-05-19 19:00:00+00:00,BETR801,pm25,43.5,µg/m³ +Antwerpen,BE,2019-05-19 18:00:00+00:00,BETR801,pm25,35.0,µg/m³ +Antwerpen,BE,2019-05-19 17:00:00+00:00,BETR801,pm25,34.0,µg/m³ +Antwerpen,BE,2019-05-19 16:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-05-19 15:00:00+00:00,BETR801,pm25,44.0,µg/m³ +Antwerpen,BE,2019-05-19 14:00:00+00:00,BETR801,pm25,43.5,µg/m³ +Antwerpen,BE,2019-05-19 13:00:00+00:00,BETR801,pm25,46.0,µg/m³ +Antwerpen,BE,2019-05-19 12:00:00+00:00,BETR801,pm25,43.0,µg/m³ +Antwerpen,BE,2019-05-19 11:00:00+00:00,BETR801,pm25,41.0,µg/m³ +Antwerpen,BE,2019-05-19 10:00:00+00:00,BETR801,pm25,41.5,µg/m³ +Antwerpen,BE,2019-05-19 09:00:00+00:00,BETR801,pm25,42.5,µg/m³ +Antwerpen,BE,2019-05-19 08:00:00+00:00,BETR801,pm25,51.5,µg/m³ +Antwerpen,BE,2019-05-19 07:00:00+00:00,BETR801,pm25,56.0,µg/m³ +Antwerpen,BE,2019-05-19 06:00:00+00:00,BETR801,pm25,58.5,µg/m³ +Antwerpen,BE,2019-05-19 05:00:00+00:00,BETR801,pm25,60.0,µg/m³ +Antwerpen,BE,2019-05-19 04:00:00+00:00,BETR801,pm25,56.5,µg/m³ +Antwerpen,BE,2019-05-19 03:00:00+00:00,BETR801,pm25,52.5,µg/m³ +Antwerpen,BE,2019-05-19 02:00:00+00:00,BETR801,pm25,51.5,µg/m³ +Antwerpen,BE,2019-05-19 01:00:00+00:00,BETR801,pm25,52.0,µg/m³ +Antwerpen,BE,2019-05-19 00:00:00+00:00,BETR801,pm25,49.5,µg/m³ +Antwerpen,BE,2019-05-18 23:00:00+00:00,BETR801,pm25,45.5,µg/m³ +Antwerpen,BE,2019-05-18 22:00:00+00:00,BETR801,pm25,42.0,µg/m³ +Antwerpen,BE,2019-05-18 21:00:00+00:00,BETR801,pm25,40.5,µg/m³ +Antwerpen,BE,2019-05-18 20:00:00+00:00,BETR801,pm25,41.0,µg/m³ +Antwerpen,BE,2019-05-18 19:00:00+00:00,BETR801,pm25,36.5,µg/m³ +Antwerpen,BE,2019-05-18 18:00:00+00:00,BETR801,pm25,37.0,µg/m³ +Antwerpen,BE,2019-05-18 01:00:00+00:00,BETR801,pm25,24.0,µg/m³ +Antwerpen,BE,2019-05-17 01:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-16 01:00:00+00:00,BETR801,pm25,11.0,µg/m³ +Antwerpen,BE,2019-05-15 02:00:00+00:00,BETR801,pm25,12.5,µg/m³ +Antwerpen,BE,2019-05-15 01:00:00+00:00,BETR801,pm25,13.0,µg/m³ +Antwerpen,BE,2019-05-14 02:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-14 01:00:00+00:00,BETR801,pm25,4.0,µg/m³ +Antwerpen,BE,2019-05-13 02:00:00+00:00,BETR801,pm25,5.5,µg/m³ +Antwerpen,BE,2019-05-13 01:00:00+00:00,BETR801,pm25,5.0,µg/m³ +Antwerpen,BE,2019-05-12 02:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-12 01:00:00+00:00,BETR801,pm25,6.0,µg/m³ +Antwerpen,BE,2019-05-11 02:00:00+00:00,BETR801,pm25,19.5,µg/m³ +Antwerpen,BE,2019-05-11 01:00:00+00:00,BETR801,pm25,17.0,µg/m³ +Antwerpen,BE,2019-05-10 02:00:00+00:00,BETR801,pm25,13.5,µg/m³ +Antwerpen,BE,2019-05-10 01:00:00+00:00,BETR801,pm25,11.5,µg/m³ +Antwerpen,BE,2019-05-09 02:00:00+00:00,BETR801,pm25,3.5,µg/m³ +Antwerpen,BE,2019-05-09 01:00:00+00:00,BETR801,pm25,4.5,µg/m³ +Antwerpen,BE,2019-05-08 02:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-05-08 01:00:00+00:00,BETR801,pm25,14.5,µg/m³ +Antwerpen,BE,2019-05-07 02:00:00+00:00,BETR801,pm25,14.0,µg/m³ +Antwerpen,BE,2019-05-07 01:00:00+00:00,BETR801,pm25,12.5,µg/m³ +London,GB,2019-06-21 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-20 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-20 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-19 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-19 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-06-18 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-18 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-18 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-17 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-17 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-16 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-16 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-15 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-15 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-14 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-14 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-14 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-13 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-13 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-13 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-12 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-12 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-11 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-11 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-10 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-10 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-09 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-09 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-08 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-08 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-08 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-08 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-08 03:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-08 02:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-08 00:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 23:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 21:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 20:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 19:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-06-07 18:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 17:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 15:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 14:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-06-07 12:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 11:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 10:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 09:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 08:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-06-07 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 06:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 05:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-06-07 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-06-07 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-07 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-06 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-06 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-06 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-06 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-05 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-05 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-05 01:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-05 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-06-04 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-04 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-04 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-03 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-03 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-06-02 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-02 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-06-01 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-06-01 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-06-01 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 22:00:00+00:00,London Westminster,pm25,5.0,µg/m³ +London,GB,2019-05-31 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-31 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-31 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-30 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-30 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-29 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-29 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-28 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-28 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-27 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-27 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 20:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 19:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 18:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 17:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 16:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 15:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 14:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 13:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 12:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 11:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 07:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 06:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 05:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 04:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 03:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 02:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 01:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-26 00:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 23:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 22:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 21:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-25 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-25 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-25 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-25 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-25 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 22:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-24 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-24 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-24 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 06:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-23 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 02:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-23 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 10:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 09:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 08:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-22 05:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-22 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-21 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-21 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-21 07:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 06:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 05:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-21 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-21 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-21 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-21 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-21 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 22:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 21:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 20:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 19:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 18:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-20 17:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-20 16:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-20 15:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 14:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 13:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 12:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 11:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 10:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 09:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-20 08:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 07:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-20 06:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-20 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-20 04:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 03:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 02:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 01:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-20 00:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 23:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 22:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 21:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 20:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 19:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 18:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 17:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 16:00:00+00:00,London Westminster,pm25,20.0,µg/m³ +London,GB,2019-05-19 15:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 14:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 13:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 12:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 11:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 10:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 09:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 08:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 07:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 06:00:00+00:00,London Westminster,pm25,19.0,µg/m³ +London,GB,2019-05-19 05:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 04:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 03:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 02:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 01:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-19 00:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 23:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 22:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 21:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 20:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 19:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 18:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 17:00:00+00:00,London Westminster,pm25,18.0,µg/m³ +London,GB,2019-05-18 16:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 15:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 14:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 13:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 12:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 11:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 10:00:00+00:00,London Westminster,pm25,17.0,µg/m³ +London,GB,2019-05-18 09:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-18 08:00:00+00:00,London Westminster,pm25,16.0,µg/m³ +London,GB,2019-05-18 07:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-18 06:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-18 05:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 04:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 03:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-18 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-18 01:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-18 00:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-17 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-17 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 20:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 13:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 12:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 11:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-17 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 04:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 03:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-17 01:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-17 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-16 23:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-16 22:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-16 21:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-16 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-16 16:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 14:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 13:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 12:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 11:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 10:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 09:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 08:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 07:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 06:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 05:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 04:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 03:00:00+00:00,London Westminster,pm25,15.0,µg/m³ +London,GB,2019-05-16 02:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 01:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-16 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-15 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-15 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-15 19:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 16:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-15 15:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 14:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 13:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 12:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-15 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-15 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-15 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-14 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-14 22:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 21:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 20:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 19:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 18:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 17:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 16:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 15:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 14:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 13:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 12:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 11:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 10:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 09:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 08:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 07:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 04:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 03:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-14 01:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-14 00:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 23:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 22:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 21:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 20:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 19:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 18:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 17:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 16:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 15:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 14:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 13:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 12:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 11:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 10:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 09:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 08:00:00+00:00,London Westminster,pm25,6.0,µg/m³ +London,GB,2019-05-13 07:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 06:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 05:00:00+00:00,London Westminster,pm25,7.0,µg/m³ +London,GB,2019-05-13 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-13 00:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-12 23:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-12 22:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 14:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-12 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-12 10:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 09:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 08:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 07:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-12 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-12 03:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 02:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 01:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-12 00:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 23:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 22:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 21:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 20:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 19:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 18:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 17:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 16:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 15:00:00+00:00,London Westminster,pm25,14.0,µg/m³ +London,GB,2019-05-11 09:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 08:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 07:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-11 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-11 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-11 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 02:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-11 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-11 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-10 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-10 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 20:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 18:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 17:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 16:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 15:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 14:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 13:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 12:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 11:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 10:00:00+00:00,London Westminster,pm25,13.0,µg/m³ +London,GB,2019-05-10 09:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 08:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 07:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 06:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 05:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 04:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 03:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 02:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-10 01:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-10 00:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 23:00:00+00:00,London Westminster,pm25,12.0,µg/m³ +London,GB,2019-05-09 22:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 21:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-09 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 19:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 18:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-09 10:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 09:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 08:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 05:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 04:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 03:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 02:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-09 00:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 23:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 21:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 20:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 19:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 18:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 17:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 16:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 15:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-08 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 07:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 06:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 05:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 04:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-08 03:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-08 02:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 01:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-08 00:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 23:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 21:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 20:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 19:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-07 18:00:00+00:00,London Westminster,pm25,11.0,µg/m³ +London,GB,2019-05-07 17:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 16:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 15:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 14:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 13:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 12:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 11:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 10:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 09:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 08:00:00+00:00,London Westminster,pm25,10.0,µg/m³ +London,GB,2019-05-07 07:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-07 06:00:00+00:00,London Westminster,pm25,9.0,µg/m³ +London,GB,2019-05-07 04:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 03:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 02:00:00+00:00,London Westminster,pm25,8.0,µg/m³ +London,GB,2019-05-07 01:00:00+00:00,London Westminster,pm25,8.0,µg/m³ diff --git a/doc/data/air_quality_stations.csv b/doc/data/air_quality_stations.csv new file mode 100644 index 00000000..9ab1a377 --- /dev/null +++ b/doc/data/air_quality_stations.csv @@ -0,0 +1,67 @@ +location,coordinates.latitude,coordinates.longitude +BELAL01,51.23619,4.38522 +BELHB23,51.1703,4.341 +BELLD01,51.10998,5.00486 +BELLD02,51.12038,5.02155 +BELR833,51.32766,4.36226 +BELSA04,51.31393,4.40387 +BELWZ02,51.1928,5.22153 +BETM802,51.26099,4.4244 +BETN016,51.23365,5.16398 +BETR801,51.20966,4.43182 +BETR802,51.20952,4.43179 +BETR803,51.22863,4.42845 +BETR805,51.20823,4.42156 +BETR811,51.2521,4.49136 +BETR815,51.2147,4.33221 +BETR817,51.17713,4.41795 +BETR820,51.32042,4.44481 +BETR822,51.26429,4.34128 +BETR831,51.3488,4.33971 +BETR834,51.092,4.3801 +BETR891,51.25581,4.38536 +BETR893,51.28138,4.38577 +BETR894,51.2835,4.3495 +BETR897,51.25011,4.3421 +FR04004,48.89167,2.34667 +FR04012,48.82778,2.3275 +FR04014,48.83724,2.3939 +FR04014,48.83722,2.3939 +FR04031,48.86887,2.31194 +FR04031,48.86889,2.31194 +FR04037,48.82861,2.36028 +FR04060,48.8572,2.2933 +FR04071,48.8564,2.33528 +FR04071,48.85639,2.33528 +FR04118,48.87027,2.3325 +FR04118,48.87029,2.3325 +FR04131,48.87333,2.33028 +FR04135,48.83795,2.40806 +FR04135,48.83796,2.40806 +FR04141,48.85278,2.36056 +FR04141,48.85279,2.36056 +FR04143,48.859,2.351 +FR04143,48.85944,2.35111 +FR04179,48.83038,2.26989 +FR04329,48.8386,2.41279 +FR04329,48.83862,2.41278 +Camden Kerbside,51.54421,-0.17527 +Ealing Horn Lane,51.51895,-0.26562 +Haringey Roadside,51.5993,-0.06822 +London Bexley,51.46603,0.18481 +London Bloomsbury,51.52229,-0.12589 +London Eltham,51.45258,0.07077 +London Haringey Priory Park South,51.58413,-0.12525 +London Harlington,51.48879,-0.44161 +London Harrow Stanmore,51.61733,-0.29878 +London Hillingdon,51.49633,-0.46086 +London Marylebone Road,51.52253,-0.15461 +London N. Kensington,51.52105,-0.21349 +London Teddington,51.42099,-0.33965 +London Teddington Bushy Park,51.42529,-0.34561 +London Westminster,51.49467,-0.13193 +Southend-on-Sea,51.5442,0.67841 +Southwark A2 Old Kent Road,51.4805,-0.05955 +Thurrock,51.47707,0.31797 +Tower Hamlets Roadside,51.52253,-0.04216 +Groton Fort Griswold,41.3536,-72.0789 diff --git a/doc/data/baseball.csv b/doc/data/baseball.csv new file mode 100644 index 00000000..aadbaced --- /dev/null +++ b/doc/data/baseball.csv @@ -0,0 +1,101 @@ +id,player,year,stint,team,lg,g,ab,r,h,X2b,X3b,hr,rbi,sb,cs,bb,so,ibb,hbp,sh,sf,gidp +88641,womacto01,2006,2,CHN,NL,19,50,6,14,1,0,1,2.0,1.0,1.0,4,4.0,0.0,0.0,3.0,0.0,0.0 +88643,schilcu01,2006,1,BOS,AL,31,2,0,1,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +88645,myersmi01,2006,1,NYA,AL,62,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +88649,helliri01,2006,1,MIL,NL,20,3,0,0,0,0,0,0.0,0.0,0.0,0,2.0,0.0,0.0,0.0,0.0,0.0 +88650,johnsra05,2006,1,NYA,AL,33,6,0,1,0,0,0,0.0,0.0,0.0,0,4.0,0.0,0.0,0.0,0.0,0.0 +88652,finlest01,2006,1,SFN,NL,139,426,66,105,21,12,6,40.0,7.0,0.0,46,55.0,2.0,2.0,3.0,4.0,6.0 +88653,gonzalu01,2006,1,ARI,NL,153,586,93,159,52,2,15,73.0,0.0,1.0,69,58.0,10.0,7.0,0.0,6.0,14.0 +88662,seleaa01,2006,1,LAN,NL,28,26,2,5,1,0,0,0.0,0.0,0.0,1,7.0,0.0,0.0,6.0,0.0,1.0 +89177,francju01,2007,2,ATL,NL,15,40,1,10,3,0,0,8.0,0.0,0.0,4,10.0,1.0,0.0,0.0,1.0,1.0 +89178,francju01,2007,1,NYN,NL,40,50,7,10,0,0,1,8.0,2.0,1.0,10,13.0,0.0,0.0,0.0,1.0,1.0 +89330,zaungr01,2007,1,TOR,AL,110,331,43,80,24,1,10,52.0,0.0,0.0,51,55.0,8.0,2.0,1.0,6.0,9.0 +89333,witasja01,2007,1,TBA,AL,3,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89334,williwo02,2007,1,HOU,NL,33,59,3,6,0,0,1,2.0,0.0,0.0,0,25.0,0.0,0.0,5.0,0.0,1.0 +89335,wickmbo01,2007,2,ARI,NL,8,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89336,wickmbo01,2007,1,ATL,NL,47,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89337,whitero02,2007,1,MIN,AL,38,109,8,19,4,0,4,20.0,0.0,0.0,6,19.0,0.0,3.0,0.0,1.0,2.0 +89338,whiteri01,2007,1,HOU,NL,20,1,0,0,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89339,wellsda01,2007,2,LAN,NL,7,15,2,4,1,0,0,1.0,0.0,0.0,0,6.0,0.0,0.0,0.0,0.0,0.0 +89340,wellsda01,2007,1,SDN,NL,22,38,1,4,0,0,0,0.0,0.0,0.0,0,12.0,0.0,0.0,4.0,0.0,0.0 +89341,weathda01,2007,1,CIN,NL,67,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89343,walketo04,2007,1,OAK,AL,18,48,5,13,1,0,0,4.0,0.0,0.0,2,4.0,0.0,0.0,0.0,2.0,2.0 +89345,wakefti01,2007,1,BOS,AL,1,2,0,0,0,0,0,0.0,0.0,0.0,0,2.0,0.0,0.0,0.0,0.0,0.0 +89347,vizquom01,2007,1,SFN,NL,145,513,54,126,18,3,4,51.0,14.0,6.0,44,48.0,6.0,1.0,14.0,3.0,14.0 +89348,villoro01,2007,1,NYA,AL,6,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89352,valenjo03,2007,1,NYN,NL,51,166,18,40,11,1,3,18.0,2.0,1.0,15,28.0,4.0,0.0,1.0,1.0,5.0 +89354,trachst01,2007,2,CHN,NL,4,7,0,1,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89355,trachst01,2007,1,BAL,AL,3,5,0,0,0,0,0,0.0,0.0,0.0,0,3.0,0.0,0.0,0.0,0.0,0.0 +89359,timlimi01,2007,1,BOS,AL,4,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89360,thomeji01,2007,1,CHA,AL,130,432,79,119,19,0,35,96.0,0.0,1.0,95,134.0,11.0,6.0,0.0,3.0,10.0 +89361,thomafr04,2007,1,TOR,AL,155,531,63,147,30,0,26,95.0,0.0,0.0,81,94.0,3.0,7.0,0.0,5.0,14.0 +89363,tavarju01,2007,1,BOS,AL,2,4,0,1,0,0,0,0.0,0.0,0.0,1,3.0,0.0,0.0,0.0,0.0,0.0 +89365,sweenma01,2007,2,LAN,NL,30,33,2,9,1,0,0,3.0,0.0,0.0,1,11.0,0.0,0.0,0.0,0.0,0.0 +89366,sweenma01,2007,1,SFN,NL,76,90,18,23,8,0,2,10.0,2.0,0.0,13,18.0,0.0,3.0,1.0,0.0,0.0 +89367,suppaje01,2007,1,MIL,NL,33,61,4,8,0,0,0,2.0,0.0,0.0,3,16.0,0.0,0.0,11.0,0.0,2.0 +89368,stinnke01,2007,1,SLN,NL,26,82,7,13,3,0,1,5.0,0.0,0.0,5,22.0,2.0,0.0,0.0,0.0,2.0 +89370,stantmi02,2007,1,CIN,NL,67,2,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89371,stairma01,2007,1,TOR,AL,125,357,58,103,28,1,21,64.0,2.0,1.0,44,66.0,5.0,2.0,0.0,2.0,7.0 +89372,sprinru01,2007,1,SLN,NL,72,1,0,0,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89374,sosasa01,2007,1,TEX,AL,114,412,53,104,24,1,21,92.0,0.0,0.0,34,112.0,3.0,3.0,0.0,5.0,11.0 +89375,smoltjo01,2007,1,ATL,NL,30,54,1,5,1,0,0,2.0,0.0,0.0,1,19.0,0.0,0.0,13.0,0.0,0.0 +89378,sheffga01,2007,1,DET,AL,133,494,107,131,20,1,25,75.0,22.0,5.0,84,71.0,2.0,9.0,0.0,6.0,10.0 +89381,seleaa01,2007,1,NYN,NL,31,4,0,0,0,0,0,0.0,0.0,0.0,1,1.0,0.0,0.0,1.0,0.0,0.0 +89382,seaneru01,2007,1,LAN,NL,68,1,0,0,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89383,schmija01,2007,1,LAN,NL,6,7,1,1,0,0,1,1.0,0.0,0.0,0,4.0,0.0,0.0,1.0,0.0,0.0 +89384,schilcu01,2007,1,BOS,AL,1,2,0,1,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89385,sandere02,2007,1,KCA,AL,24,73,12,23,7,0,2,11.0,0.0,1.0,11,15.0,0.0,1.0,0.0,0.0,2.0 +89388,rogerke01,2007,1,DET,AL,1,2,0,0,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89389,rodriiv01,2007,1,DET,AL,129,502,50,141,31,3,11,63.0,2.0,2.0,9,96.0,1.0,1.0,1.0,2.0,16.0 +89396,ramirma02,2007,1,BOS,AL,133,483,84,143,33,1,20,88.0,0.0,0.0,71,92.0,13.0,7.0,0.0,8.0,21.0 +89398,piazzmi01,2007,1,OAK,AL,83,309,33,85,17,1,8,44.0,0.0,0.0,18,61.0,0.0,0.0,0.0,2.0,9.0 +89400,perezne01,2007,1,DET,AL,33,64,5,11,3,0,1,6.0,0.0,0.0,4,8.0,0.0,0.0,3.0,0.0,2.0 +89402,parkch01,2007,1,NYN,NL,1,1,0,0,0,0,0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,0.0 +89406,oliveda02,2007,1,LAA,AL,5,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89410,myersmi01,2007,1,NYA,AL,6,1,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89411,mussimi01,2007,1,NYA,AL,2,2,0,0,0,0,0,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,0.0 +89412,moyerja01,2007,1,PHI,NL,33,73,4,9,2,0,0,2.0,0.0,0.0,2,26.0,0.0,0.0,8.0,0.0,1.0 +89420,mesajo01,2007,1,PHI,NL,38,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89421,martipe02,2007,1,NYN,NL,5,9,1,1,1,0,0,0.0,0.0,0.0,0,6.0,0.0,0.0,2.0,0.0,0.0 +89425,maddugr01,2007,1,SDN,NL,33,62,2,9,2,0,0,0.0,1.0,0.0,1,19.0,0.0,0.0,9.0,0.0,2.0 +89426,mabryjo01,2007,1,COL,NL,28,34,4,4,1,0,1,5.0,0.0,0.0,5,10.0,0.0,0.0,0.0,0.0,1.0 +89429,loftoke01,2007,2,CLE,AL,52,173,24,49,9,3,0,15.0,2.0,3.0,17,23.0,0.0,0.0,4.0,2.0,1.0 +89430,loftoke01,2007,1,TEX,AL,84,317,62,96,16,3,7,23.0,21.0,4.0,39,28.0,1.0,2.0,2.0,3.0,5.0 +89431,loaizes01,2007,1,LAN,NL,5,7,0,1,0,0,0,2.0,0.0,0.0,0,2.0,0.0,0.0,2.0,0.0,1.0 +89438,kleskry01,2007,1,SFN,NL,116,362,51,94,27,3,6,44.0,5.0,1.0,46,68.0,2.0,1.0,1.0,1.0,14.0 +89439,kentje01,2007,1,LAN,NL,136,494,78,149,36,1,20,79.0,1.0,3.0,57,61.0,4.0,5.0,0.0,6.0,17.0 +89442,jonesto02,2007,1,DET,AL,5,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89445,johnsra05,2007,1,ARI,NL,10,15,0,1,0,0,0,0.0,0.0,0.0,1,7.0,0.0,0.0,2.0,0.0,0.0 +89450,hoffmtr01,2007,1,SDN,NL,60,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89451,hernaro01,2007,2,LAN,NL,22,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89452,hernaro01,2007,1,CLE,AL,2,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89460,guarded01,2007,1,CIN,NL,15,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89462,griffke02,2007,1,CIN,NL,144,528,78,146,24,1,30,93.0,6.0,1.0,85,99.0,14.0,1.0,0.0,9.0,14.0 +89463,greensh01,2007,1,NYN,NL,130,446,62,130,30,1,10,46.0,11.0,1.0,37,62.0,4.0,5.0,1.0,1.0,14.0 +89464,graffto01,2007,1,MIL,NL,86,231,34,55,8,0,9,30.0,0.0,1.0,24,44.0,6.0,3.0,0.0,2.0,7.0 +89465,gordoto01,2007,1,PHI,NL,44,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89466,gonzalu01,2007,1,LAN,NL,139,464,70,129,23,2,15,68.0,6.0,2.0,56,56.0,4.0,4.0,0.0,2.0,11.0 +89467,gomezch02,2007,2,CLE,AL,19,53,4,15,2,0,0,5.0,0.0,0.0,0,6.0,0.0,0.0,1.0,1.0,1.0 +89468,gomezch02,2007,1,BAL,AL,73,169,17,51,10,1,1,16.0,1.0,2.0,10,20.0,1.0,0.0,5.0,1.0,5.0 +89469,glavito02,2007,1,NYN,NL,33,56,3,12,1,0,0,4.0,0.0,0.0,6,5.0,0.0,0.0,12.0,1.0,0.0 +89473,floydcl01,2007,1,CHN,NL,108,282,40,80,10,1,9,45.0,0.0,0.0,35,47.0,5.0,5.0,0.0,0.0,6.0 +89474,finlest01,2007,1,COL,NL,43,94,9,17,3,0,1,2.0,0.0,0.0,8,4.0,1.0,0.0,0.0,0.0,2.0 +89480,embreal01,2007,1,OAK,AL,4,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89481,edmonji01,2007,1,SLN,NL,117,365,39,92,15,2,12,53.0,0.0,2.0,41,75.0,2.0,0.0,2.0,3.0,9.0 +89482,easleda01,2007,1,NYN,NL,76,193,24,54,6,0,10,26.0,0.0,1.0,19,35.0,1.0,5.0,0.0,1.0,2.0 +89489,delgaca01,2007,1,NYN,NL,139,538,71,139,30,0,24,87.0,4.0,0.0,52,118.0,8.0,11.0,0.0,6.0,12.0 +89493,cormirh01,2007,1,CIN,NL,6,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89494,coninje01,2007,2,NYN,NL,21,41,2,8,2,0,0,5.0,0.0,0.0,7,8.0,2.0,0.0,1.0,1.0,1.0 +89495,coninje01,2007,1,CIN,NL,80,215,23,57,11,1,6,32.0,4.0,0.0,20,28.0,0.0,0.0,1.0,6.0,4.0 +89497,clemero02,2007,1,NYA,AL,2,2,0,1,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89498,claytro01,2007,2,BOS,AL,8,6,1,0,0,0,0,0.0,0.0,0.0,0,3.0,0.0,0.0,0.0,0.0,2.0 +89499,claytro01,2007,1,TOR,AL,69,189,23,48,14,0,1,12.0,2.0,1.0,14,50.0,0.0,1.0,3.0,3.0,8.0 +89501,cirilje01,2007,2,ARI,NL,28,40,6,8,4,0,0,6.0,0.0,0.0,4,6.0,0.0,0.0,0.0,0.0,1.0 +89502,cirilje01,2007,1,MIN,AL,50,153,18,40,9,2,2,21.0,2.0,0.0,15,13.0,0.0,1.0,3.0,2.0,9.0 +89521,bondsba01,2007,1,SFN,NL,126,340,75,94,14,0,28,66.0,5.0,0.0,132,54.0,43.0,3.0,0.0,2.0,13.0 +89523,biggicr01,2007,1,HOU,NL,141,517,68,130,31,3,10,50.0,4.0,3.0,23,112.0,0.0,3.0,7.0,5.0,5.0 +89525,benitar01,2007,2,FLO,NL,34,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89526,benitar01,2007,1,SFN,NL,19,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0 +89530,ausmubr01,2007,1,HOU,NL,117,349,38,82,16,3,3,25.0,6.0,1.0,37,74.0,3.0,6.0,4.0,1.0,11.0 +89533,aloumo01,2007,1,NYN,NL,87,328,51,112,19,1,13,49.0,3.0,0.0,27,30.0,5.0,2.0,0.0,3.0,13.0 +89534,alomasa02,2007,1,NYN,NL,8,22,1,3,1,0,0,0.0,0.0,0.0,0,3.0,0.0,0.0,0.0,0.0,0.0 diff --git a/doc/data/fx_prices b/doc/data/fx_prices new file mode 100644 index 0000000000000000000000000000000000000000..38cadf26909a37e119c1c1cf93a7de94a9020d26 GIT binary patch literal 16177 zcmeI3hgVZc`}h;Oh>8%BP(`qS1;w(;T4q2673^X`QK^C=5(#$23Tr{6H$e~;v7@ZL zqSzbQ6$^F+k&*yu@O$pe?7rvy{R!XK>^Xim$-Q@;dFJUktkVjJ@Cy(0i*yQDyd=me zc!}SlAj6Sxvl=)hBhNkt_{n*9K|HXVi@}?8;OR15zdgXw z3$#BU0&bOrS^N#OG+dB#3FzDL@#CAon!-oZ%Ye~t4_?0l+IMd+{Rk|++_t$I*my(Q zyAc>WcS%tz@OJR)F9y0io=@wG1t!3%v}ZMzz}~x2m#)A|gN7Y&0`k!QH-mr)&-49U zfp!sjpT`1kOs!lq1vvfZTfQ$aL*_9k7}z6j)cM80u;in)%Ym9+hou{UJ4y?iw*epB zGjU1*`UM%E$N}but@v619H6^wVG*!2Tl(u9Fmyw4>@}d$;zZl~z@c`QuFrt?&xuRl z0XvkeSAGNL`b4j-2Rb*b)=~o_m7fRc>hX9wNgvO*2fAl^)ro-5`;Cq23=FyJq38je z#QdN`Rl(=o(W7e|N6`yXn*39PZ%)h?6{p#z{W|Q zf@t92nZ;xN0^Y7Gx}N~_*?MeJ7O*A!*Y|v2y#CKcM}TGRr{$jl`WH;;eg(Mq=!Mz0 zfdf6xy)Osecy~Im0%-8nHuEzuBihF17w~mV?389;Sf|a;`TC&G(;H^B1?KH4X)p!$ z8+c`zHBdU#@klqI?$d70eSsbo@xd;@7t**NZotrX+rqtohXk^GZ{W})b=~Iz*B1Nu zh5$Q^n^7qN=6?1LS_K?=Cq=Un_|!jXeH>7sE=eOM z0|%Dxyg3W_^3OsiuGeD2|!Q)DRFIof2?gCeT$`5@4Y_}%w=NsUJO@{Vgf$yyh z3TlCNT_hC>;K>W&^R(N5KE1;}83AKvKQ$KuMe_3OPQaP!A&(t^KayRh_XDo#J?Qli zU?-Dhkt2b}4@#TH0qcw2^qLObIQI4NdBD<1Bfo?Kr~PnU5DEOc0oAMqI&M8+9|JsR zx4$4BsCMk|CLI`8V&u06Xfw<3^C4izvbT4OfqOzLCR_r3iWs%|CNQeah&E-w)5eE~ zzXFafEG+p5eCb-?{u5|9#&~rj@Ni{29?uZs&bOrl+W@22OV64BjV+c8vjP^k6g}+% z9F=x#rV}tWM;t#0C>|srr-`-R4&w%wqi%s7H2aL2H^Bq`PVR646I4w#%Nexuaobg?!EswXY-g{Ad z;C;oVJtE+o#0%Xz19LOF&h7zp?q~nLKd|JEZQwAVPWhG$cVNnbSnG+v=aSmVGl3z7 zHP8Hkiak|6VZdtf71t=>nxc!PYk`-~bW_CwCr;`Tod8Un5m%fEw6EJbcrUPc%U>6c z02_DK)tv&yI{u2f0=(UK#-ZClpWEJj9s{fVrktz*#=9qZeg@jU+xf5t=>J8b(+q4_ z+w>P7GSa}9d6x`lLmWx1FsnczMKQp72Izh0!%%8Cs6`?akBrdmB3IhXRl2_0q4eXhrE1v{hlpEe-7O-V-*wH{>d|7C}aA4Vs^7+ev{*p&k>wtTg zyL8?P9N2Ntz9is{!U2xizy&*`SN8!`-Ik0x1{~(}dgU45gBveet^$o8xDL1r%nL?m zo&fu`4juXyDAn69`3ltCoiDEidh9pqsRX_lZg@<)J?Qhfe*bpBb{9PEngMgBj+)R3 z_X8WRsGSz^A>8-Nyq(16Hh>4m@ygY3n@TkH_x@EC8+w zdvhidXr%jk=o;Yh?-oxt16?2b_S1ZHwOw0uDP{EzG9I!7HB+EY&jK} zS7)}{7dYyf_n=_lv+ybB7X!shFVrpv7Pdbp-2kjIvp={ExcaEAQwp%1hs}u`;Dqt9 zUkiZmzie7q1hlL9nR^a+GN!7_b>PTO-w)gee$#ezeFj{yt6S+i;Q9P`sR^IiGUjgGom^JOD(S+>;arse9375@T=E_6T^Vp zoYiC9fmU}E4<-QTl=8O?*(pM8Gqpja7}GM{wZMlEq>jv04MIeGy68M(&5(oa-e-*XJG}f z_?A=VXW%HmUN*mgu^}l_nt@_|(sO=Ch&u*qqqe}RT*XcSaJ5U5zy@e*Gw4V+V9}`o z&V7Lm7qf$1fSaae{cr=`@YS*P0#0w_@AU?L+tJ#6F0e;xh;In+yi;(c1gIJKP`C=X zygGl7xR%T z&4G7{TwCmbMrRN9?*+`Bbl~(rpwo|iLl7{cuDxV5kiXSPJ_$G?A*#nL;1kElqk+Jn zl8Aocz!`%_nJ)u=f8>_E4%qX>q4F&zQg5A+u2V-zO#f+-{%KGa2M0$6Nh1x%Ee@B4 zxCPA*4L6hspzAVQBM~}y>(8a%OGE*ItcNk=8kxYLFsYxRL=4Rc&yxpDlexFf!exPx z5{ot7{BTcxl>hkH82=z&i6yi|q;)fdevq_m603i^A8XQzLH`rO^*_P?^glMF0sH^^ zAFkuc?aOsM+^aeIH-GElw;lZUgWqxBcO3W~2mTWWQcoC34sWYRX!@+8%hh$r?P2iK zuM6tXPUmI4J9nx>1LvF@W29AwR@Wb&xG=L81rB*PAug>J^^6t1e9%;jR^D{{Gx2UM zDvjrEs6#JmWn*W?)S@J@&b0QuYthSLD`p%R2>1C`xN9%}g-&wIy{ouYp>6vh! zR3W<^Q4-5`)u_9(!C@<(YE)l7&fj>(PvoB5s5dvg2Bn9aY`k*k7b0)Hexbs>_w$5wdlYN&p}&U>yZ5Q^YxMLb?m-6 z#7Q%4O+6Zx<-PmRk$QA{>80Hked>{s7Iy-5XhS5oay|OG-Qj1uOZCW}D54(StGRv6 z?MMTvbd_*CDiT!y0I6q3YplsEt6 zkI27esP)7uupAlMpLHEH-+;!KCCqLa(STNb?*7}#`^c!7=g831kNZF(Ir`g7b+_iW40Yzr;-?HPUateD zEN9or(e|?o+bDA7=(K+{n6ezr2k+?FC`ZFNv%W89=hlckLCeod8XaOjaIj2{$`^BY$k36{x00=3C)`u~qh+Wm>iw>c`($Vg z=iZGV4}AJ^SR*24{6da`h@r{Rp5j5xC4q88ZhI+5B)o(-qAjlT;0T)#1QBrEMnu9# z+a~lpKVq2S^F|~k2iM5_M~>RP?+qH1BQOq#WO76UmmHDX3uNf4HD{=DM9j6b97T}; zC1?I6L)@t>cQnS67G^HnbdUX6V?3U7BdIZ7%Nd4?F(&?> zZj2)y{8j!o*BEnO7aMfM`H6q>$6fD;iGLsy9J;u6^X+64Ow7K*6w^3pim4t2_;UMQ z{rAYsFtN*AA^y{T?5d4@M3~t3PZ18h=j(YZR){;^Ja;B8Mu@3>3-KJ!Tl&LZ3$ZyD z2q%efcs~Dvu89~Itl$E(7`txxa5G&c#w6Z}#rR+y1gwb`SZ{sGh;3n(m=pKbDND9r zEBqO}c=2v4oD%d`fNzBrKFPW7LQ72aF0#bMGr34+iDP^oU-&4n!o(1o^`gdH1OOyb_dPPl>#b5m{c0?ur%+hQ*2Z0Xw>lQ^);77wxj zOXu5Sa9+3JqiylTxU_JSkG9y8yWp8EPUg&ab!YngQ=PFd=O|fr_*x(sR=FL1#f60+ zJ8WhRnL@{&J}1N;Lr~Kx?b8K6gJ^KRdl&o;qEF!mJDkI%hbB8r?CPi;Uf8oS?aj%~ zR4(JrnEFX)O!AGbEhheu-wBiWe9;Dz`25ov6aUMz!uqvBh>(_;#J9&*n8d39D=OzI zE9^ELPWBvp&)NA03!Dn4@%6j~_P%-mGKK~2lGeI&?J5gQ{n!E@>-uS9&V4b~;R5|Q zFgZ3`rX--5e7$oobHDxpY6)94oo> zJ;ef(IRDB5lk~dRocWUlCUwVj3!J;B24oj*#Cfar*Vl8 zb6Ge~S47XbK#2Q-8r*|~kc+Q9?zvBh!P_A6m|?D(2{$prq`uHM!@sz+87jcE?h)XJ z5cT%W63}y5C&1vvQ?EJ*@O8NAZJik==}t$4p@=9nohZUnQakU6au#FGUG;~F@m3Hw z<%9^&=EAm-2={<$DcV4Udx{|q*0=S^{@)b1Xe;wlhF)mFgtE1-Tl1kRr;hZhR)UC4S37X_H)vr__0 z(zK6&#wk|;Ci#D}0F!dEP=KLWgA6Rdr2c9a;9Xq#a{$hd)B!>aUQ?D(0e;rVB}x${ zbwi5~ZyyT*4&pdhbxj!{#-#iOzbAF5wwT6U&@Y#j#`PEB2rm7WiSR2+PD5f$(uS89 z-&h2WuujbEQjEFiaqyuCll*VQ;)9i#?x#_N{kQ;pTZlz%UWP9j1bn#g=6rR&{G3`%va0YbG9aidMYEOEYwCSt$ zFez74b@B57zXp%GsYC15-nw`>*QTj;F_~li(8X=YKu8aVkb*;>)z=2Bk1?RnQ|seR zTzY(JfQcBsZE!Vr919Iu9b|~PZua6;13ZmO?C%U{-q~P)p*k8of22Mp{p(CUnwN_8 zum?%#x_BQq{<)@uNjo6cp>_3oZA|*#0xeb#Yq5S$i}f#j+AqE3)4cZwpZ1Z_JX&8( z<*{?%;R8^PU3cbTG7fssidsf+g8~hk?`TlM&(&}O8a8KBBf6j(k^03#gJ_?qL0q{u z;g|+-ZN)Kd4I<-|hiU{}8C0-J){iUE#mQWOt3=!s>P;6Fn`bE5+)2siKPvPQ(&j8% zHKOxH4T9kc2-1S?aOL$|4I+K0u?EpTNyX$*BPkiAX%MNqvowg*x1TkL^p`(0h>St( zThU`~Y*g6F<_az73QQAwJ!wJEg+c+ADKK(?fjRk69Eip@#Yi1_7BHS&V#-@MN%l+Br$NQD}p4WFy8Vsj1!o5Ly)H?6wn zreJzeAaXn@N<{rf$>x{}#7+6;I4amYtqD1D<4uPqOi1dHK3O1)x zAm@x8(3vRM+*652{F$LdwC`82Igx_Ro0RA;XcN~*st_5ITB#7JOB)m@k{q~_ox6(J zp$c(r{_#o`B7MPi6(ap?I~C&E$$h<4$dUv=CE}*GnFdNk`b2?(y;mR-7H>Br8V{RU zJZ)xrS0FM6Sk=tt$jxjX)rh#ZFUY(RanbBdTqB}$jYiagG=hz6e%*vfT|T>s&HWnL z9JLV@ac!JY6Pr^vp{Lw9ZFv(S^M$=lh|XP_+4(jzy)+{lUz-sLqi34f+_4#vK2g$) ztVmqSXKT9i7{%+1ypb=DQk1>vA=V2Wn;~YQ(hxUDDKS{;XnieI z7{h1%9;{2|qkNXf{_odGG++PT=Wji58u;I{12Qk}x&buyJY&kOA3`QBZYonl0w%pN~MfWrO$6jrSiQ>q5EH%f=QZsmqOQ@mP*$z zN~O>Hl}hLCS!r}WC{JViOUImeD-AQK{O8lDeDgD?Tx~Nc4`$MNPMb`s*TEU|c^Mg0 zkLxq3-d1JOeaz0Ha+GD!=MBlG*B{EJ*I8v#c|T-R`66?ue!uOe9F|MhdzXXn--8!_ zH+^nw4n2<|cAtAT{rn>1XSQx|7JbeQCbv3+>giEBz1}&UKG!#$UiTr5>RXmZ^fXIhpFyJ(=3!{A7Aw{gUZ^dnVI$5|gPMU)g!{Qs_CY zN~X{CNv8Xrl|-NSfZ3aN5n)Q()zsUF^>)9ZQZ)L+}D(e*Z^(RFSyyX}xc zuiwP%TATUtKYltpldcz+N%u3I`LkgrT{nbX|0A9H>-%)7m(X;24rf?ASjGr`L+bF$ z>GXLq8B`xm8Fb&1GpJnzFuyyWNxy%bLG5Zc^TTc#biY;^bbkXg==ljVsGdKhQ~i8p z@#z}d?_X@6ARpPowKqviH&K`4vpQ zuuQ7wm6>#3lQOB@!F$plcx2J%3wBYx4b7tV^@&}-DUfOUzA1dZaaHFgq`0MrZ4SGy00fp-{zTAAN(w;*E@`(cG2f7-9?|dSVW^ucMy}!od@!Z|?KCN7O-HBYf-zW6*3hP`V-@>!G zRL?DK9|ZlI*;t{%lSMiUwj@p2m4#> zzKmSD&(rLF{X8OM$m@TvUi`gpP6PjY=Pvd4^ObZ8(F^&{pPbv$KRFLE agg+tg>=0rU(k`TZ2>X+9zgT~Nr~e1LGyn$x literal 0 HcmV?d00001 diff --git a/doc/data/iris.data b/doc/data/iris.data new file mode 100644 index 00000000..c19b9c36 --- /dev/null +++ b/doc/data/iris.data @@ -0,0 +1,151 @@ +SepalLength,SepalWidth,PetalLength,PetalWidth,Name +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica \ No newline at end of file diff --git a/doc/data/mindex_ex.csv b/doc/data/mindex_ex.csv new file mode 100644 index 00000000..935ff936 --- /dev/null +++ b/doc/data/mindex_ex.csv @@ -0,0 +1,16 @@ +year,indiv,zit,xit +1977,"A",1.2,.6 +1977,"B",1.5,.5 +1977,"C",1.7,.8 +1978,"A",.2,.06 +1978,"B",.7,.2 +1978,"C",.8,.3 +1978,"D",.9,.5 +1978,"E",1.4,.9 +1979,"C",.2,.15 +1979,"D",.14,.05 +1979,"E",.5,.15 +1979,"F",1.2,.5 +1979,"G",3.4,1.9 +1979,"H",5.4,2.7 +1979,"I",6.4,1.2 diff --git a/doc/data/test.xls b/doc/data/test.xls new file mode 100644 index 0000000000000000000000000000000000000000..db0f9dec7d5e42c87dc0b0d297b66305f7af4225 GIT binary patch literal 30720 zcmeHQ4RloHnf_)n2>~nwMF`qz3{voigalBALcoA2M2YEup$?c3C&Q3V$Y5q@z`-Wn z)-K)EmTp6pny%Q>?%H&N@w@kafA9Cc-}`&-dmWzn*|Zan{?lbARmxGOveZCgqRMf!yKv84dw}InSwGE1tOrP zpY9scyJbi=s)%Y)VRaX-5nM-mT&&zK%B{g6lljDO zr-fOzTCaAh?f9z)9=p|A)vUH^oY)dmjks?f0q8uePg1Bq#9@S?gI)7*ca}nrBU`P; z3IUBn5G%z-iO&+OZ)neSeZ8=?{}Xee#W~q(w(sbKJmpq7*vyl-KH>Qx&Pu(AvRUn* z?r#i63)Tg0X>4u|?k@OaALSZ{dKYn64i39e??lOiMPVEGj`Jn@{EvFn4`eKQs@3lj zyP~n%sQ>g(Z@E_LaS!NoA2jg# z2V50LN|$^1Sy{u+8g{LNSYcQ+Jm3W}B$6p;c+~QtT~~PC4=IB^F1)&M_Vu%8FQ~ib zL!0NB%I0gX@LUCp58G^-(7(wSY`8OdP$3qB@iv2s79_(#ouKBzX%wc8N`Yn#y%BaD zmb;FqTw7o;v#Oit>4weMe8_4joL>mM>gKJi!zxAU8vJ3ZbKbQD&Pmr6B+Xa>=K(k8 zTudlPUQeUuSaQV*d(j)~+Ak_~I#B?Qnd3+S+sQ-S{#It~ zl)iN1hifO)bmK4Nc9$w3dDbEOoDJUTedteZzh;>B>xXH-Hf=j2rMGDNQ~1azDqVXv z;;MA`Fm}@T)1~R-NWmA~9+lUgmEd(+dyc+d-0soWOWG&s>-6@C`g&=5uD+^vrLQyE zvlD*<8!ncR?&(*#IqJ&qFj70F&c2zg-pKv5Qr~)2?c-_7!CgVlpDq6nN&YJHsO@(=&r=!y*py+HQ&yI zEVw^-^YcyncRky*{~_FaHtsw~)EJJx)pW=H{5^@oYBvgwZ{Pi>O99Sf_r_E|;M@{@ z)P?(lgzdfuV{oM_KGCtj!!^0K2lL9B@7UkIcIUzFJr88N@>JgR+v{id1LpK^T((br z^MfAFWr)OU0D%<`J_0=a2>PF&Sxfi3v;G6Pf@aeczgl8+4so)9Ya6GZXd?tqDcQ^n7{X7>DPOLMee=) z16q^Smbo78huMEl(UCdkX$Fu$9uN* zqyMFpe(rzn!O7VV{j%|D>eXKc?$x+(ZO$iOh&S;gzfW8Jg!xevBQG4g_vf=*Tc%HU z-G_U@9s3Kj#If5ROC087xrFa|N#XlsH_f{ol$o3HO#S(Jd~|2XdlAkJczoymG@OZ7 zt~&dT*MGk5uId$ay8T++ew{x5g>Grr(kC$IXpQ%{K8c}YC?#tN!$FTZu|Dui#v#W(HP(G)B$UcF&;LG`U`*Ka5QjN;;(s|pJWx5Z*RmKGPc zwzd}4b5v0vvYi8?#nmm59l@5^?ppzDJ|+}3#2N~*j6hF92dxc_fmqq3iOa&l-DLn_ z0o&2i*c>aWt`7w(A}!nNal39!q^0qmNOP>d$rp?+E9Q7iv!mOK{m#M+iz-Vso!k_t!EV|VWL557#Qa!i$H*M zQ&~yrf(6Tp^(`Tfh*LCH9}8AA)rSUg!gy?BFcfSl0~B+sQ7kq%E-N;OFEefD=EZF- zjX`iC2Dvhe4HBJp_W8hxG(>U7&9;>!@8DQr#ri;f6NLG~VM7W*wknK4J;3V;NcIJ0 zkQk7%}sdi9CRjCbUg8=uL0QZ)h<9AbqMw+3UeAfgXD zvM}>g;~mROrgP-BA4>7$69?TkRvh%@j#dlP4ULgdz25QjcejV3!LirP_4Av?z;>rH zHYwt}so`Tgqp@HE8-2k*?0l5`cSax?<gTLg++MvWR68nTL=Jp zl=p_@0NSY|@4XskxNR_t7r0=gQaDsHH23B&OyQoz)#2JpO1%pfAPq2hl%|yl3`%Nu z*v+vZ6WE-!aG-eRJ+JNcP+=d5FmAl%A;+u{xADKdx&DH*;u%pmR9WAJ8NYF1TD!gY z#v9HL6?IAicJIQm+v!CimBgX4AcnhSNfP^v-A>yLULAQLQi8%Q~x*4r|mJ_PU>Q&(rCW22@-sD~oAIS6c%2CW*pf zYFnfofKaLYC%+;lCM-Q?wZplb7=OQ0c=DKyx z2cO?~WNn<;#!6}%Gck4>GhTT*p>4OxTqf)6wuiDgE<34B<}zh!k9$Oqj#(8*V#(SjG_I$$8CfYd`nKVM66D%v4(}sLWAKRsTEX-k+~} z>NO43jS*8-;CStS)x7`f8ipGfO;w+mdUs*@p{J}CLcMfv$4k#F`i~WaN}Ok^b_;5j zhWgoEjp>8BEgTA|NO%5f9db z{2L9Gyqu|ySWvSy)bH(m?d${Bzo7BR)=(c-FD%-$_C*aNTkk;C4NrzD{{4(KhEN~* z)UUU{y65{EDy3njDkuYy!(G=M+AtkWVr)$(iz#=iTM_#JOTTXR0%@A9tp3$8)CXQ^AZ# ztQSMckM|?_%5<+9Z@@t#- z?28)1i!rVg<2lpR%r7;@daxb~FBGcR4U=!QP4h@5HPvqq(q=!4r~T|Ky}7sS^iY?P zlu@%Wv%JQzG&uHp%)V|nuPE?#Z9HcxpB9pBCb$~UnL6bn^~JQU3S(YLZG({JvsYX4 zQz@h@BxlpdbEX&Wi}-ZO;Pb(Vr%xkYNMF8WMBz|neHUhYJZCy4f~%o+JH04mYPi!U zN9U(+9?zND|3k;0B(xr$`{OxN>uJTAiA=*I6YW@9vq67BJf1WCljKa9b75{$a-4Gi z$I6-RdsjJA=3AL^vh%1WJId@AkIeI^94VPo<`SjGj2ZVqYRthgcPBMw%9wMrGo>q& zGNsP^t7J-@`B%x5I`gmiUy}jRnSYf`>6WBSsWbm7nNnx|HET%zRWhZ{{HtV2+mkY- z&it!nN}c&vkA~{Zzj}t`UnNs|Fey{&%)d&e)R}*kOsO;f3NB&?ocUMDlpe95N~YAA zf0azBGyj@1B>yUzQfK~EGNmT}Dw$Ge{#7!i&it!nN}c)F2}AO)ltVlJnmZ)_nmZ)_ znmZ)_nyaDO`PUih6A*%f$iJq^!0OK#R$|q2XJGXpU<8dGl7W@J@$(qi0WydmT~P7) zJ6TjJUdN9=j~y7m=S*3XgO6LGj8?flG$+)n-NGUB|h zG0+l;Mz+QZZr&9LHWlb~w1zCHSX<5yzI1z$ZZDGGSs~o>UXjWf4x%lpS&=h=$ij)P$$o-QRs@cq42i=+fm5huc46NPoPi^et^PXn0X4t$#*a+ z>Bq%T)$%Z0JAb&u=F7VJ0N!OpqT7T+=v#^M!i;+#Jihp-i}&sS+@^nbPUK3d52Fp= z^=U3vuU)m_D*08QN+3SXow!IP2GCF9Sa`69#}y!ZQoE}ipA+JP!Y}hnz)$K9YlJLd zX5wd1AZawHdD)X5JSzinHqhky5fzE|M>|!>f4a5X744L3cvH0V_pROTXlEcl633k@ zvL_tph`_A6eAYSAj0i174f8UnCy!IojFU-P&Cl zD=&?7R_!mF7D3%w!Ce_GFAaDJ(O#o!e`Sm<)*3F~p~#-M94q(^h5Y`0dxg8K75sgR zp9%TPODkhs&#X|F-M4zpsPy*9aEw*6y0)t-ZiU@bJWYE$t61#hJuMI}P!mYbf!Dxoo`G6Ku?foWNSUfrbFbj@Uo|Y7l|eZi@NKgZoBt z1#qd)1UG$ciJGpEw>&T2>+h+GmgfcXtD?1iZ3*7Td&!0JJbzE6FATK<+ttflvG98ABp=q0{ImsVSiP$q9o+4D2c?EpAv$x+RWO%@;u_t_DUal?Cap1iW0)8 z>8kXFyliD3=>mr?ZSAfo@pZH%!hX)GD1nW@m_Rnl2%d2#RP|`bZ@EA#~U%B1Ixal>9w5k?~$a zps3b#Rg@6vnyyHkCJ*)n#6>%aE{U!vA(6muMj*eok2APAT!ZUYl#s-#s9;#0$JHxJ zNG4`vh5dvH(*E1xX>Gs1-{=DOnrIR&Y9NUYd5Jp7l>2yZB+j--ob5s`wSxSWks+`2 z*L0D{ke5jEjwKSOoN=8%zQ0H2&;r!<5njAE4%`470anGZ)mBWp`JS0h#wfVKpw zq4Ws`yeCDxhP-5$*%4}#j3SmIdxlwo{6N0Y5cU(#crPVZ=2O&ax@0Xfk*yF%KxR$) zM8e~})Ee#$@#7wFXKMQ>gv6@0PxhQ6K#j-X3{wBcVt%xfo)Q;^fZa$EjvSO7@y)&=*BJ zc_p_gSJ)s(EjNK%K~<#XA_oYA$R?+DcSH=xoLqwN^iZIjq z(I{}9ST2szM2OhyAvMzjLQWooMYQ-o{N)y^RZ_1d1LA zog72QW84(irCf7w$sfumXR}5+|F10pDG&SUb*ZL=$GN0coSlJ0w*{^t@S>D(4aArd zU}xmKPFei-_xXW`@E_-yB5{;9Vw91jXLIjG(zS+6p&23-wA4a_P;AS&Xf-W2b-*dv zq7SqK)Bx7FFT_mrfxAcj@pTYW1`Z4gs-ohoMDpn!WOt;MxS3GRjo5CzAs7N-xiAJv z1ZcY}Bk)69*d!VqLoON0Ie=K(H&N7twmyUhL8`xTU}6d?N^1MOPm6#vv?Fo!7_`>J zK;mPKT^3u(f&pJgHlIis3@(1WDvJ69;zkoFY9pk|+L~0uTgXOFI4-;uQfX`H7^pd! zyDwRWo|$vFAtamH%sW~H_Bn}-cJir$PKt;M_a%5{*c0}14nuvzTei|m$P8k~5uEMs zS^PM+LLTLvXxvWaS4e`Gk0S|nMQ`HY#bM_d&p{Fin?I?IZ}GTj3t@N+z);J0QC(axd;o)SS=B=ZO$F6x_t9<)t+}p`}8oQE;Owf~+VY!IN>^`sAlE+INxD zcrTxh5(}w#;HEJQ~*~xeNp;T(r0n zDpE9Pj`%dlM_VD+q2z|I;Z1lRd(z2Qc^)HSv>yaRgeXTG8fWvoLB5jDbZo?iTEwR>*2F>Zc~IsFp@K2nM)&IE zDY~_IuN%A42h*n?+Gr6p)EqHVq{5BtA4dbq1m}FKO}~?}7z$`yOAAD{ z8Ef}}M}rUqx*mx)PFzLb1J5&n@S5eEquu`F0WaoVF1#W8X?dt`be+^MUggb&m|=OA zpy-j;z{h%nLLH+(F%;vj^W_A^oSuhoN$h73tT&UdA&8Sa%~X_dBqf5O94%I*k8=n~ zUM_GBaUq_3e?w!>IsP6Bervb)X}S#tYqXEvr@1SbgO^X`dAO=qtsu>J2pDT>sh(5< z`Z=MS*Jvl-`iL4(euy3Sw2M zE##nh4QeR$!)UYp-hi4zn=647`{_^UH$)qyMlChe8ttWl;waHc-bqkOU8eUk+DqQb z7zui5A-Ez7y&%^Y7cVtyhu(2$`fE%2{l}~J>$itm9%Q#kCxd)3m6U0bD`HIFM4Kd! zS(F^wHfg8qvX59(dsq`X#PHH;i6J+kDaVp4T!U-V!!axs`^B0q3u5poKior(7wMp` zh}6h<2DIEA>bSAr{{F?UMX3veQqLD4c666c>Zt)qQa?pv9f=?4Lg<_bgAc2)h!rK- z)TiaA1|Pv-pNKzkuIWNd2?*pSSEB^es(7E~8w>lD3bl?B8uHR+@cpZ3r|qXXT3+GE zJ2h>;`TiQ@xhrGgO||__LWzJ)C>cfZk=7%d#d+tOb6@~!|4GY^O^5j|)*HR3G zkuO^69PQTcP%Kj|;vwov!Qp5ME$7+qRj8a2?1`{SC`du2D6x+Ql{FDD#vsn;OHp?T zyu?kLFpNQ#sEN2(!V$woQQ;d+F#{YW^pUk>Fh>$gQ8=M-RF9TL7Q=mqY7T`rf#Ik# z4HWNvNuPP&+AU{lf*Bu}RddMSm-+a}p4xu#nj^y>Rqmy9lE{mscaZi$Fi(jXi2%;w z&M_BPb11abcS!azlQ{uz2${zC3za(T&1{{(tbjNA6@Op!E9YKkRGeFoBZPnmf9sRw zrRU~_nL#oTm&eXM!)#Vveqa_}*{+YjaBd>Z9IUdmwKp(JupD`_Mn$+;fP}+%9|}PG zsR@~K5Sn&~0bj;*z(HpB{aQ+>0 zk-`|>G%BS0{^ONVX2?gwMUIsKeq?ER$3_S)0Wr(yj8Mp5H>>r@Q4pdzk53gc5c{l8 z!elFczVT;&xYR2A`-9u@FZ|txe}nO3quvkxJA4VpkqH0~25rb>V1{EH%l}DudOolJ E|39OKD*ylh literal 0 HcmV?d00001 diff --git a/doc/data/tips.csv b/doc/data/tips.csv new file mode 100644 index 00000000..856a65a6 --- /dev/null +++ b/doc/data/tips.csv @@ -0,0 +1,245 @@ +total_bill,tip,sex,smoker,day,time,size +16.99,1.01,Female,No,Sun,Dinner,2 +10.34,1.66,Male,No,Sun,Dinner,3 +21.01,3.5,Male,No,Sun,Dinner,3 +23.68,3.31,Male,No,Sun,Dinner,2 +24.59,3.61,Female,No,Sun,Dinner,4 +25.29,4.71,Male,No,Sun,Dinner,4 +8.77,2.0,Male,No,Sun,Dinner,2 +26.88,3.12,Male,No,Sun,Dinner,4 +15.04,1.96,Male,No,Sun,Dinner,2 +14.78,3.23,Male,No,Sun,Dinner,2 +10.27,1.71,Male,No,Sun,Dinner,2 +35.26,5.0,Female,No,Sun,Dinner,4 +15.42,1.57,Male,No,Sun,Dinner,2 +18.43,3.0,Male,No,Sun,Dinner,4 +14.83,3.02,Female,No,Sun,Dinner,2 +21.58,3.92,Male,No,Sun,Dinner,2 +10.33,1.67,Female,No,Sun,Dinner,3 +16.29,3.71,Male,No,Sun,Dinner,3 +16.97,3.5,Female,No,Sun,Dinner,3 +20.65,3.35,Male,No,Sat,Dinner,3 +17.92,4.08,Male,No,Sat,Dinner,2 +20.29,2.75,Female,No,Sat,Dinner,2 +15.77,2.23,Female,No,Sat,Dinner,2 +39.42,7.58,Male,No,Sat,Dinner,4 +19.82,3.18,Male,No,Sat,Dinner,2 +17.81,2.34,Male,No,Sat,Dinner,4 +13.37,2.0,Male,No,Sat,Dinner,2 +12.69,2.0,Male,No,Sat,Dinner,2 +21.7,4.3,Male,No,Sat,Dinner,2 +19.65,3.0,Female,No,Sat,Dinner,2 +9.55,1.45,Male,No,Sat,Dinner,2 +18.35,2.5,Male,No,Sat,Dinner,4 +15.06,3.0,Female,No,Sat,Dinner,2 +20.69,2.45,Female,No,Sat,Dinner,4 +17.78,3.27,Male,No,Sat,Dinner,2 +24.06,3.6,Male,No,Sat,Dinner,3 +16.31,2.0,Male,No,Sat,Dinner,3 +16.93,3.07,Female,No,Sat,Dinner,3 +18.69,2.31,Male,No,Sat,Dinner,3 +31.27,5.0,Male,No,Sat,Dinner,3 +16.04,2.24,Male,No,Sat,Dinner,3 +17.46,2.54,Male,No,Sun,Dinner,2 +13.94,3.06,Male,No,Sun,Dinner,2 +9.68,1.32,Male,No,Sun,Dinner,2 +30.4,5.6,Male,No,Sun,Dinner,4 +18.29,3.0,Male,No,Sun,Dinner,2 +22.23,5.0,Male,No,Sun,Dinner,2 +32.4,6.0,Male,No,Sun,Dinner,4 +28.55,2.05,Male,No,Sun,Dinner,3 +18.04,3.0,Male,No,Sun,Dinner,2 +12.54,2.5,Male,No,Sun,Dinner,2 +10.29,2.6,Female,No,Sun,Dinner,2 +34.81,5.2,Female,No,Sun,Dinner,4 +9.94,1.56,Male,No,Sun,Dinner,2 +25.56,4.34,Male,No,Sun,Dinner,4 +19.49,3.51,Male,No,Sun,Dinner,2 +38.01,3.0,Male,Yes,Sat,Dinner,4 +26.41,1.5,Female,No,Sat,Dinner,2 +11.24,1.76,Male,Yes,Sat,Dinner,2 +48.27,6.73,Male,No,Sat,Dinner,4 +20.29,3.21,Male,Yes,Sat,Dinner,2 +13.81,2.0,Male,Yes,Sat,Dinner,2 +11.02,1.98,Male,Yes,Sat,Dinner,2 +18.29,3.76,Male,Yes,Sat,Dinner,4 +17.59,2.64,Male,No,Sat,Dinner,3 +20.08,3.15,Male,No,Sat,Dinner,3 +16.45,2.47,Female,No,Sat,Dinner,2 +3.07,1.0,Female,Yes,Sat,Dinner,1 +20.23,2.01,Male,No,Sat,Dinner,2 +15.01,2.09,Male,Yes,Sat,Dinner,2 +12.02,1.97,Male,No,Sat,Dinner,2 +17.07,3.0,Female,No,Sat,Dinner,3 +26.86,3.14,Female,Yes,Sat,Dinner,2 +25.28,5.0,Female,Yes,Sat,Dinner,2 +14.73,2.2,Female,No,Sat,Dinner,2 +10.51,1.25,Male,No,Sat,Dinner,2 +17.92,3.08,Male,Yes,Sat,Dinner,2 +27.2,4.0,Male,No,Thur,Lunch,4 +22.76,3.0,Male,No,Thur,Lunch,2 +17.29,2.71,Male,No,Thur,Lunch,2 +19.44,3.0,Male,Yes,Thur,Lunch,2 +16.66,3.4,Male,No,Thur,Lunch,2 +10.07,1.83,Female,No,Thur,Lunch,1 +32.68,5.0,Male,Yes,Thur,Lunch,2 +15.98,2.03,Male,No,Thur,Lunch,2 +34.83,5.17,Female,No,Thur,Lunch,4 +13.03,2.0,Male,No,Thur,Lunch,2 +18.28,4.0,Male,No,Thur,Lunch,2 +24.71,5.85,Male,No,Thur,Lunch,2 +21.16,3.0,Male,No,Thur,Lunch,2 +28.97,3.0,Male,Yes,Fri,Dinner,2 +22.49,3.5,Male,No,Fri,Dinner,2 +5.75,1.0,Female,Yes,Fri,Dinner,2 +16.32,4.3,Female,Yes,Fri,Dinner,2 +22.75,3.25,Female,No,Fri,Dinner,2 +40.17,4.73,Male,Yes,Fri,Dinner,4 +27.28,4.0,Male,Yes,Fri,Dinner,2 +12.03,1.5,Male,Yes,Fri,Dinner,2 +21.01,3.0,Male,Yes,Fri,Dinner,2 +12.46,1.5,Male,No,Fri,Dinner,2 +11.35,2.5,Female,Yes,Fri,Dinner,2 +15.38,3.0,Female,Yes,Fri,Dinner,2 +44.3,2.5,Female,Yes,Sat,Dinner,3 +22.42,3.48,Female,Yes,Sat,Dinner,2 +20.92,4.08,Female,No,Sat,Dinner,2 +15.36,1.64,Male,Yes,Sat,Dinner,2 +20.49,4.06,Male,Yes,Sat,Dinner,2 +25.21,4.29,Male,Yes,Sat,Dinner,2 +18.24,3.76,Male,No,Sat,Dinner,2 +14.31,4.0,Female,Yes,Sat,Dinner,2 +14.0,3.0,Male,No,Sat,Dinner,2 +7.25,1.0,Female,No,Sat,Dinner,1 +38.07,4.0,Male,No,Sun,Dinner,3 +23.95,2.55,Male,No,Sun,Dinner,2 +25.71,4.0,Female,No,Sun,Dinner,3 +17.31,3.5,Female,No,Sun,Dinner,2 +29.93,5.07,Male,No,Sun,Dinner,4 +10.65,1.5,Female,No,Thur,Lunch,2 +12.43,1.8,Female,No,Thur,Lunch,2 +24.08,2.92,Female,No,Thur,Lunch,4 +11.69,2.31,Male,No,Thur,Lunch,2 +13.42,1.68,Female,No,Thur,Lunch,2 +14.26,2.5,Male,No,Thur,Lunch,2 +15.95,2.0,Male,No,Thur,Lunch,2 +12.48,2.52,Female,No,Thur,Lunch,2 +29.8,4.2,Female,No,Thur,Lunch,6 +8.52,1.48,Male,No,Thur,Lunch,2 +14.52,2.0,Female,No,Thur,Lunch,2 +11.38,2.0,Female,No,Thur,Lunch,2 +22.82,2.18,Male,No,Thur,Lunch,3 +19.08,1.5,Male,No,Thur,Lunch,2 +20.27,2.83,Female,No,Thur,Lunch,2 +11.17,1.5,Female,No,Thur,Lunch,2 +12.26,2.0,Female,No,Thur,Lunch,2 +18.26,3.25,Female,No,Thur,Lunch,2 +8.51,1.25,Female,No,Thur,Lunch,2 +10.33,2.0,Female,No,Thur,Lunch,2 +14.15,2.0,Female,No,Thur,Lunch,2 +16.0,2.0,Male,Yes,Thur,Lunch,2 +13.16,2.75,Female,No,Thur,Lunch,2 +17.47,3.5,Female,No,Thur,Lunch,2 +34.3,6.7,Male,No,Thur,Lunch,6 +41.19,5.0,Male,No,Thur,Lunch,5 +27.05,5.0,Female,No,Thur,Lunch,6 +16.43,2.3,Female,No,Thur,Lunch,2 +8.35,1.5,Female,No,Thur,Lunch,2 +18.64,1.36,Female,No,Thur,Lunch,3 +11.87,1.63,Female,No,Thur,Lunch,2 +9.78,1.73,Male,No,Thur,Lunch,2 +7.51,2.0,Male,No,Thur,Lunch,2 +14.07,2.5,Male,No,Sun,Dinner,2 +13.13,2.0,Male,No,Sun,Dinner,2 +17.26,2.74,Male,No,Sun,Dinner,3 +24.55,2.0,Male,No,Sun,Dinner,4 +19.77,2.0,Male,No,Sun,Dinner,4 +29.85,5.14,Female,No,Sun,Dinner,5 +48.17,5.0,Male,No,Sun,Dinner,6 +25.0,3.75,Female,No,Sun,Dinner,4 +13.39,2.61,Female,No,Sun,Dinner,2 +16.49,2.0,Male,No,Sun,Dinner,4 +21.5,3.5,Male,No,Sun,Dinner,4 +12.66,2.5,Male,No,Sun,Dinner,2 +16.21,2.0,Female,No,Sun,Dinner,3 +13.81,2.0,Male,No,Sun,Dinner,2 +17.51,3.0,Female,Yes,Sun,Dinner,2 +24.52,3.48,Male,No,Sun,Dinner,3 +20.76,2.24,Male,No,Sun,Dinner,2 +31.71,4.5,Male,No,Sun,Dinner,4 +10.59,1.61,Female,Yes,Sat,Dinner,2 +10.63,2.0,Female,Yes,Sat,Dinner,2 +50.81,10.0,Male,Yes,Sat,Dinner,3 +15.81,3.16,Male,Yes,Sat,Dinner,2 +7.25,5.15,Male,Yes,Sun,Dinner,2 +31.85,3.18,Male,Yes,Sun,Dinner,2 +16.82,4.0,Male,Yes,Sun,Dinner,2 +32.9,3.11,Male,Yes,Sun,Dinner,2 +17.89,2.0,Male,Yes,Sun,Dinner,2 +14.48,2.0,Male,Yes,Sun,Dinner,2 +9.6,4.0,Female,Yes,Sun,Dinner,2 +34.63,3.55,Male,Yes,Sun,Dinner,2 +34.65,3.68,Male,Yes,Sun,Dinner,4 +23.33,5.65,Male,Yes,Sun,Dinner,2 +45.35,3.5,Male,Yes,Sun,Dinner,3 +23.17,6.5,Male,Yes,Sun,Dinner,4 +40.55,3.0,Male,Yes,Sun,Dinner,2 +20.69,5.0,Male,No,Sun,Dinner,5 +20.9,3.5,Female,Yes,Sun,Dinner,3 +30.46,2.0,Male,Yes,Sun,Dinner,5 +18.15,3.5,Female,Yes,Sun,Dinner,3 +23.1,4.0,Male,Yes,Sun,Dinner,3 +15.69,1.5,Male,Yes,Sun,Dinner,2 +19.81,4.19,Female,Yes,Thur,Lunch,2 +28.44,2.56,Male,Yes,Thur,Lunch,2 +15.48,2.02,Male,Yes,Thur,Lunch,2 +16.58,4.0,Male,Yes,Thur,Lunch,2 +7.56,1.44,Male,No,Thur,Lunch,2 +10.34,2.0,Male,Yes,Thur,Lunch,2 +43.11,5.0,Female,Yes,Thur,Lunch,4 +13.0,2.0,Female,Yes,Thur,Lunch,2 +13.51,2.0,Male,Yes,Thur,Lunch,2 +18.71,4.0,Male,Yes,Thur,Lunch,3 +12.74,2.01,Female,Yes,Thur,Lunch,2 +13.0,2.0,Female,Yes,Thur,Lunch,2 +16.4,2.5,Female,Yes,Thur,Lunch,2 +20.53,4.0,Male,Yes,Thur,Lunch,4 +16.47,3.23,Female,Yes,Thur,Lunch,3 +26.59,3.41,Male,Yes,Sat,Dinner,3 +38.73,3.0,Male,Yes,Sat,Dinner,4 +24.27,2.03,Male,Yes,Sat,Dinner,2 +12.76,2.23,Female,Yes,Sat,Dinner,2 +30.06,2.0,Male,Yes,Sat,Dinner,3 +25.89,5.16,Male,Yes,Sat,Dinner,4 +48.33,9.0,Male,No,Sat,Dinner,4 +13.27,2.5,Female,Yes,Sat,Dinner,2 +28.17,6.5,Female,Yes,Sat,Dinner,3 +12.9,1.1,Female,Yes,Sat,Dinner,2 +28.15,3.0,Male,Yes,Sat,Dinner,5 +11.59,1.5,Male,Yes,Sat,Dinner,2 +7.74,1.44,Male,Yes,Sat,Dinner,2 +30.14,3.09,Female,Yes,Sat,Dinner,4 +12.16,2.2,Male,Yes,Fri,Lunch,2 +13.42,3.48,Female,Yes,Fri,Lunch,2 +8.58,1.92,Male,Yes,Fri,Lunch,1 +15.98,3.0,Female,No,Fri,Lunch,3 +13.42,1.58,Male,Yes,Fri,Lunch,2 +16.27,2.5,Female,Yes,Fri,Lunch,2 +10.09,2.0,Female,Yes,Fri,Lunch,2 +20.45,3.0,Male,No,Sat,Dinner,4 +13.28,2.72,Male,No,Sat,Dinner,2 +22.12,2.88,Female,Yes,Sat,Dinner,2 +24.01,2.0,Male,Yes,Sat,Dinner,4 +15.69,3.0,Male,Yes,Sat,Dinner,3 +11.61,3.39,Male,No,Sat,Dinner,2 +10.77,1.47,Male,No,Sat,Dinner,2 +15.53,3.0,Male,Yes,Sat,Dinner,2 +10.07,1.25,Male,No,Sat,Dinner,2 +12.6,1.0,Male,Yes,Sat,Dinner,2 +32.83,1.17,Male,Yes,Sat,Dinner,2 +35.83,4.67,Female,No,Sat,Dinner,3 +29.03,5.92,Male,No,Sat,Dinner,3 +27.18,2.0,Female,Yes,Sat,Dinner,2 +22.67,2.0,Male,Yes,Sat,Dinner,2 +17.82,1.75,Male,No,Sat,Dinner,2 +18.78,3.0,Female,No,Thur,Dinner,2 diff --git a/doc/data/titanic.csv b/doc/data/titanic.csv new file mode 100644 index 00000000..5cc466e9 --- /dev/null +++ b/doc/data/titanic.csv @@ -0,0 +1,892 @@ +PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked +1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S +2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C +3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S +4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S +5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,,S +6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q +7,0,1,"McCarthy, Mr. Timothy J",male,54,0,0,17463,51.8625,E46,S +8,0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.075,,S +9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,,S +10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14,1,0,237736,30.0708,,C +11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4,1,1,PP 9549,16.7,G6,S +12,1,1,"Bonnell, Miss. Elizabeth",female,58,0,0,113783,26.55,C103,S +13,0,3,"Saundercock, Mr. William Henry",male,20,0,0,A/5. 2151,8.05,,S +14,0,3,"Andersson, Mr. Anders Johan",male,39,1,5,347082,31.275,,S +15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14,0,0,350406,7.8542,,S +16,1,2,"Hewlett, Mrs. (Mary D Kingcome) ",female,55,0,0,248706,16,,S +17,0,3,"Rice, Master. Eugene",male,2,4,1,382652,29.125,,Q +18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13,,S +19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,31,1,0,345763,18,,S +20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,,C +21,0,2,"Fynney, Mr. Joseph J",male,35,0,0,239865,26,,S +22,1,2,"Beesley, Mr. Lawrence",male,34,0,0,248698,13,D56,S +23,1,3,"McGowan, Miss. Anna ""Annie""",female,15,0,0,330923,8.0292,,Q +24,1,1,"Sloper, Mr. William Thompson",male,28,0,0,113788,35.5,A6,S +25,0,3,"Palsson, Miss. Torborg Danira",female,8,3,1,349909,21.075,,S +26,1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,38,1,5,347077,31.3875,,S +27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.225,,C +28,0,1,"Fortune, Mr. Charles Alexander",male,19,3,2,19950,263,C23 C25 C27,S +29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q +30,0,3,"Todoroff, Mr. Lalio",male,,0,0,349216,7.8958,,S +31,0,1,"Uruchurtu, Don. Manuel E",male,40,0,0,PC 17601,27.7208,,C +32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,B78,C +33,1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,,Q +34,0,2,"Wheadon, Mr. Edward H",male,66,0,0,C.A. 24579,10.5,,S +35,0,1,"Meyer, Mr. Edgar Joseph",male,28,1,0,PC 17604,82.1708,,C +36,0,1,"Holverson, Mr. Alexander Oskar",male,42,1,0,113789,52,,S +37,1,3,"Mamee, Mr. Hanna",male,,0,0,2677,7.2292,,C +38,0,3,"Cann, Mr. Ernest Charles",male,21,0,0,A./5. 2152,8.05,,S +39,0,3,"Vander Planke, Miss. Augusta Maria",female,18,2,0,345764,18,,S +40,1,3,"Nicola-Yarred, Miss. Jamila",female,14,1,0,2651,11.2417,,C +41,0,3,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40,1,0,7546,9.475,,S +42,0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,27,1,0,11668,21,,S +43,0,3,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,,C +44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",female,3,1,2,SC/Paris 2123,41.5792,,C +45,1,3,"Devaney, Miss. Margaret Delia",female,19,0,0,330958,7.8792,,Q +46,0,3,"Rogers, Mr. William John",male,,0,0,S.C./A.4. 23567,8.05,,S +47,0,3,"Lennon, Mr. Denis",male,,1,0,370371,15.5,,Q +48,1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,,Q +49,0,3,"Samaan, Mr. Youssef",male,,2,0,2662,21.6792,,C +50,0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,18,1,0,349237,17.8,,S +51,0,3,"Panula, Master. Juha Niilo",male,7,4,1,3101295,39.6875,,S +52,0,3,"Nosworthy, Mr. Richard Cater",male,21,0,0,A/4. 39886,7.8,,S +53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49,1,0,PC 17572,76.7292,D33,C +54,1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,29,1,0,2926,26,,S +55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65,0,1,113509,61.9792,B30,C +56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,C52,S +57,1,2,"Rugg, Miss. Emily",female,21,0,0,C.A. 31026,10.5,,S +58,0,3,"Novel, Mr. Mansouer",male,28.5,0,0,2697,7.2292,,C +59,1,2,"West, Miss. Constance Mirium",female,5,1,2,C.A. 34651,27.75,,S +60,0,3,"Goodwin, Master. William Frederick",male,11,5,2,CA 2144,46.9,,S +61,0,3,"Sirayanian, Mr. Orsen",male,22,0,0,2669,7.2292,,C +62,1,1,"Icard, Miss. Amelie",female,38,0,0,113572,80,B28, +63,0,1,"Harris, Mr. Henry Birkhardt",male,45,1,0,36973,83.475,C83,S +64,0,3,"Skoog, Master. Harald",male,4,3,2,347088,27.9,,S +65,0,1,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,,C +66,1,3,"Moubarek, Master. Gerios",male,,1,1,2661,15.2458,,C +67,1,2,"Nye, Mrs. (Elizabeth Ramell)",female,29,0,0,C.A. 29395,10.5,F33,S +68,0,3,"Crease, Mr. Ernest James",male,19,0,0,S.P. 3464,8.1583,,S +69,1,3,"Andersson, Miss. Erna Alexandra",female,17,4,2,3101281,7.925,,S +70,0,3,"Kink, Mr. Vincenz",male,26,2,0,315151,8.6625,,S +71,0,2,"Jenkin, Mr. Stephen Curnow",male,32,0,0,C.A. 33111,10.5,,S +72,0,3,"Goodwin, Miss. Lillian Amy",female,16,5,2,CA 2144,46.9,,S +73,0,2,"Hood, Mr. Ambrose Jr",male,21,0,0,S.O.C. 14879,73.5,,S +74,0,3,"Chronopoulos, Mr. Apostolos",male,26,1,0,2680,14.4542,,C +75,1,3,"Bing, Mr. Lee",male,32,0,0,1601,56.4958,,S +76,0,3,"Moen, Mr. Sigurd Hansen",male,25,0,0,348123,7.65,F G73,S +77,0,3,"Staneff, Mr. Ivan",male,,0,0,349208,7.8958,,S +78,0,3,"Moutal, Mr. Rahamin Haim",male,,0,0,374746,8.05,,S +79,1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29,,S +80,1,3,"Dowdell, Miss. Elizabeth",female,30,0,0,364516,12.475,,S +81,0,3,"Waelens, Mr. Achille",male,22,0,0,345767,9,,S +82,1,3,"Sheerlinck, Mr. Jan Baptist",male,29,0,0,345779,9.5,,S +83,1,3,"McDermott, Miss. Brigdet Delia",female,,0,0,330932,7.7875,,Q +84,0,1,"Carrau, Mr. Francisco M",male,28,0,0,113059,47.1,,S +85,1,2,"Ilett, Miss. Bertha",female,17,0,0,SO/C 14885,10.5,,S +86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,33,3,0,3101278,15.85,,S +87,0,3,"Ford, Mr. William Neal",male,16,1,3,W./C. 6608,34.375,,S +88,0,3,"Slocovski, Mr. Selman Francis",male,,0,0,SOTON/OQ 392086,8.05,,S +89,1,1,"Fortune, Miss. Mabel Helen",female,23,3,2,19950,263,C23 C25 C27,S +90,0,3,"Celotti, Mr. Francesco",male,24,0,0,343275,8.05,,S +91,0,3,"Christmann, Mr. Emil",male,29,0,0,343276,8.05,,S +92,0,3,"Andreasson, Mr. Paul Edvin",male,20,0,0,347466,7.8542,,S +93,0,1,"Chaffee, Mr. Herbert Fuller",male,46,1,0,W.E.P. 5734,61.175,E31,S +94,0,3,"Dean, Mr. Bertram Frank",male,26,1,2,C.A. 2315,20.575,,S +95,0,3,"Coxon, Mr. Daniel",male,59,0,0,364500,7.25,,S +96,0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,,S +97,0,1,"Goldschmidt, Mr. George B",male,71,0,0,PC 17754,34.6542,A5,C +98,1,1,"Greenfield, Mr. William Bertram",male,23,0,1,PC 17759,63.3583,D10 D12,C +99,1,2,"Doling, Mrs. John T (Ada Julia Bone)",female,34,0,1,231919,23,,S +100,0,2,"Kantor, Mr. Sinai",male,34,1,0,244367,26,,S +101,0,3,"Petranec, Miss. Matilda",female,28,0,0,349245,7.8958,,S +102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S +103,0,1,"White, Mr. Richard Frasar",male,21,0,1,35281,77.2875,D26,S +104,0,3,"Johansson, Mr. Gustaf Joel",male,33,0,0,7540,8.6542,,S +105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37,2,0,3101276,7.925,,S +106,0,3,"Mionoff, Mr. Stoytcho",male,28,0,0,349207,7.8958,,S +107,1,3,"Salkjelsvik, Miss. Anna Kristine",female,21,0,0,343120,7.65,,S +108,1,3,"Moss, Mr. Albert Johan",male,,0,0,312991,7.775,,S +109,0,3,"Rekic, Mr. Tido",male,38,0,0,349249,7.8958,,S +110,1,3,"Moran, Miss. Bertha",female,,1,0,371110,24.15,,Q +111,0,1,"Porter, Mr. Walter Chamberlain",male,47,0,0,110465,52,C110,S +112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C +113,0,3,"Barton, Mr. David John",male,22,0,0,324669,8.05,,S +114,0,3,"Jussila, Miss. Katriina",female,20,1,0,4136,9.825,,S +115,0,3,"Attalah, Miss. Malake",female,17,0,0,2627,14.4583,,C +116,0,3,"Pekoniemi, Mr. Edvard",male,21,0,0,STON/O 2. 3101294,7.925,,S +117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q +118,0,2,"Turpin, Mr. William John Robert",male,29,1,0,11668,21,,S +119,0,1,"Baxter, Mr. Quigg Edmond",male,24,0,1,PC 17558,247.5208,B58 B60,C +120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2,4,2,347082,31.275,,S +121,0,2,"Hickman, Mr. Stanley George",male,21,2,0,S.O.C. 14879,73.5,,S +122,0,3,"Moore, Mr. Leonard Charles",male,,0,0,A4. 54510,8.05,,S +123,0,2,"Nasser, Mr. Nicholas",male,32.5,1,0,237736,30.0708,,C +124,1,2,"Webber, Miss. Susan",female,32.5,0,0,27267,13,E101,S +125,0,1,"White, Mr. Percival Wayland",male,54,0,1,35281,77.2875,D26,S +126,1,3,"Nicola-Yarred, Master. Elias",male,12,1,0,2651,11.2417,,C +127,0,3,"McMahon, Mr. Martin",male,,0,0,370372,7.75,,Q +128,1,3,"Madsen, Mr. Fridtjof Arne",male,24,0,0,C 17369,7.1417,,S +129,1,3,"Peter, Miss. Anna",female,,1,1,2668,22.3583,F E69,C +130,0,3,"Ekstrom, Mr. Johan",male,45,0,0,347061,6.975,,S +131,0,3,"Drazenoic, Mr. Jozef",male,33,0,0,349241,7.8958,,C +132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20,0,0,SOTON/O.Q. 3101307,7.05,,S +133,0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47,1,0,A/5. 3337,14.5,,S +134,1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29,1,0,228414,26,,S +135,0,2,"Sobey, Mr. Samuel James Hayden",male,25,0,0,C.A. 29178,13,,S +136,0,2,"Richard, Mr. Emile",male,23,0,0,SC/PARIS 2133,15.0458,,C +137,1,1,"Newsom, Miss. Helen Monypeny",female,19,0,2,11752,26.2833,D47,S +138,0,1,"Futrelle, Mr. Jacques Heath",male,37,1,0,113803,53.1,C123,S +139,0,3,"Osen, Mr. Olaf Elon",male,16,0,0,7534,9.2167,,S +140,0,1,"Giglio, Mr. Victor",male,24,0,0,PC 17593,79.2,B86,C +141,0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C +142,1,3,"Nysten, Miss. Anna Sofia",female,22,0,0,347081,7.75,,S +143,1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",female,24,1,0,STON/O2. 3101279,15.85,,S +144,0,3,"Burke, Mr. Jeremiah",male,19,0,0,365222,6.75,,Q +145,0,2,"Andrew, Mr. Edgardo Samuel",male,18,0,0,231945,11.5,,S +146,0,2,"Nicholls, Mr. Joseph Charles",male,19,1,1,C.A. 33112,36.75,,S +147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27,0,0,350043,7.7958,,S +148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9,2,2,W./C. 6608,34.375,,S +149,0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",male,36.5,0,2,230080,26,F2,S +150,0,2,"Byles, Rev. Thomas Roussel Davids",male,42,0,0,244310,13,,S +151,0,2,"Bateman, Rev. Robert James",male,51,0,0,S.O.P. 1166,12.525,,S +152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22,1,0,113776,66.6,C2,S +153,0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.05,,S +154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5,,S +155,0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,,S +156,0,1,"Williams, Mr. Charles Duane",male,51,0,1,PC 17597,61.3792,,C +157,1,3,"Gilnagh, Miss. Katherine ""Katie""",female,16,0,0,35851,7.7333,,Q +158,0,3,"Corn, Mr. Harry",male,30,0,0,SOTON/OQ 392090,8.05,,S +159,0,3,"Smiljanic, Mr. Mile",male,,0,0,315037,8.6625,,S +160,0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,,S +161,0,3,"Cribb, Mr. John Hatfield",male,44,0,1,371362,16.1,,S +162,1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",female,40,0,0,C.A. 33595,15.75,,S +163,0,3,"Bengtsson, Mr. John Viktor",male,26,0,0,347068,7.775,,S +164,0,3,"Calic, Mr. Jovo",male,17,0,0,315093,8.6625,,S +165,0,3,"Panula, Master. Eino Viljami",male,1,4,1,3101295,39.6875,,S +166,1,3,"Goldsmith, Master. Frank John William ""Frankie""",male,9,0,2,363291,20.525,,S +167,1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",female,,0,1,113505,55,E33,S +168,0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",female,45,1,4,347088,27.9,,S +169,0,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.925,,S +170,0,3,"Ling, Mr. Lee",male,28,0,0,1601,56.4958,,S +171,0,1,"Van der hoef, Mr. Wyckoff",male,61,0,0,111240,33.5,B19,S +172,0,3,"Rice, Master. Arthur",male,4,4,1,382652,29.125,,Q +173,1,3,"Johnson, Miss. Eleanor Ileen",female,1,1,1,347742,11.1333,,S +174,0,3,"Sivola, Mr. Antti Wilhelm",male,21,0,0,STON/O 2. 3101280,7.925,,S +175,0,1,"Smith, Mr. James Clinch",male,56,0,0,17764,30.6958,A7,C +176,0,3,"Klasen, Mr. Klas Albin",male,18,1,1,350404,7.8542,,S +177,0,3,"Lefebre, Master. Henry Forbes",male,,3,1,4133,25.4667,,S +178,0,1,"Isham, Miss. Ann Elizabeth",female,50,0,0,PC 17595,28.7125,C49,C +179,0,2,"Hale, Mr. Reginald",male,30,0,0,250653,13,,S +180,0,3,"Leonard, Mr. Lionel",male,36,0,0,LINE,0,,S +181,0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,,S +182,0,2,"Pernot, Mr. Rene",male,,0,0,SC/PARIS 2131,15.05,,C +183,0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9,4,2,347077,31.3875,,S +184,1,2,"Becker, Master. Richard F",male,1,2,1,230136,39,F4,S +185,1,3,"Kink-Heilmann, Miss. Luise Gretchen",female,4,0,2,315153,22.025,,S +186,0,1,"Rood, Mr. Hugh Roscoe",male,,0,0,113767,50,A32,S +187,1,3,"O'Brien, Mrs. Thomas (Johanna ""Hannah"" Godfrey)",female,,1,0,370365,15.5,,Q +188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45,0,0,111428,26.55,,S +189,0,3,"Bourke, Mr. John",male,40,1,1,364849,15.5,,Q +190,0,3,"Turcin, Mr. Stjepan",male,36,0,0,349247,7.8958,,S +191,1,2,"Pinsky, Mrs. (Rosa)",female,32,0,0,234604,13,,S +192,0,2,"Carbines, Mr. William",male,19,0,0,28424,13,,S +193,1,3,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19,1,0,350046,7.8542,,S +194,1,2,"Navratil, Master. Michel M",male,3,1,1,230080,26,F2,S +195,1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",female,44,0,0,PC 17610,27.7208,B4,C +196,1,1,"Lurette, Miss. Elise",female,58,0,0,PC 17569,146.5208,B80,C +197,0,3,"Mernagh, Mr. Robert",male,,0,0,368703,7.75,,Q +198,0,3,"Olsen, Mr. Karl Siegwart Andreas",male,42,0,1,4579,8.4042,,S +199,1,3,"Madigan, Miss. Margaret ""Maggie""",female,,0,0,370370,7.75,,Q +200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24,0,0,248747,13,,S +201,0,3,"Vande Walle, Mr. Nestor Cyriel",male,28,0,0,345770,9.5,,S +202,0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,,S +203,0,3,"Johanson, Mr. Jakob Alfred",male,34,0,0,3101264,6.4958,,S +204,0,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,,C +205,1,3,"Cohen, Mr. Gurshon ""Gus""",male,18,0,0,A/5 3540,8.05,,S +206,0,3,"Strom, Miss. Telma Matilda",female,2,0,1,347054,10.4625,G6,S +207,0,3,"Backstrom, Mr. Karl Alfred",male,32,1,0,3101278,15.85,,S +208,1,3,"Albimona, Mr. Nassef Cassem",male,26,0,0,2699,18.7875,,C +209,1,3,"Carr, Miss. Helen ""Ellen""",female,16,0,0,367231,7.75,,Q +210,1,1,"Blank, Mr. Henry",male,40,0,0,112277,31,A31,C +211,0,3,"Ali, Mr. Ahmed",male,24,0,0,SOTON/O.Q. 3101311,7.05,,S +212,1,2,"Cameron, Miss. Clear Annie",female,35,0,0,F.C.C. 13528,21,,S +213,0,3,"Perkin, Mr. John Henry",male,22,0,0,A/5 21174,7.25,,S +214,0,2,"Givard, Mr. Hans Kristensen",male,30,0,0,250646,13,,S +215,0,3,"Kiernan, Mr. Philip",male,,1,0,367229,7.75,,Q +216,1,1,"Newell, Miss. Madeleine",female,31,1,0,35273,113.275,D36,C +217,1,3,"Honkanen, Miss. Eliina",female,27,0,0,STON/O2. 3101283,7.925,,S +218,0,2,"Jacobsohn, Mr. Sidney Samuel",male,42,1,0,243847,27,,S +219,1,1,"Bazzani, Miss. Albina",female,32,0,0,11813,76.2917,D15,C +220,0,2,"Harris, Mr. Walter",male,30,0,0,W/C 14208,10.5,,S +221,1,3,"Sunderland, Mr. Victor Francis",male,16,0,0,SOTON/OQ 392089,8.05,,S +222,0,2,"Bracken, Mr. James H",male,27,0,0,220367,13,,S +223,0,3,"Green, Mr. George Henry",male,51,0,0,21440,8.05,,S +224,0,3,"Nenkoff, Mr. Christo",male,,0,0,349234,7.8958,,S +225,1,1,"Hoyt, Mr. Frederick Maxfield",male,38,1,0,19943,90,C93,S +226,0,3,"Berglund, Mr. Karl Ivar Sven",male,22,0,0,PP 4348,9.35,,S +227,1,2,"Mellors, Mr. William John",male,19,0,0,SW/PP 751,10.5,,S +228,0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,,S +229,0,2,"Fahlstrom, Mr. Arne Jonas",male,18,0,0,236171,13,,S +230,0,3,"Lefebre, Miss. Mathilde",female,,3,1,4133,25.4667,,S +231,1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",female,35,1,0,36973,83.475,C83,S +232,0,3,"Larsson, Mr. Bengt Edvin",male,29,0,0,347067,7.775,,S +233,0,2,"Sjostedt, Mr. Ernst Adolf",male,59,0,0,237442,13.5,,S +234,1,3,"Asplund, Miss. Lillian Gertrud",female,5,4,2,347077,31.3875,,S +235,0,2,"Leyson, Mr. Robert William Norman",male,24,0,0,C.A. 29566,10.5,,S +236,0,3,"Harknett, Miss. Alice Phoebe",female,,0,0,W./C. 6609,7.55,,S +237,0,2,"Hold, Mr. Stephen",male,44,1,0,26707,26,,S +238,1,2,"Collyer, Miss. Marjorie ""Lottie""",female,8,0,2,C.A. 31921,26.25,,S +239,0,2,"Pengelly, Mr. Frederick William",male,19,0,0,28665,10.5,,S +240,0,2,"Hunt, Mr. George Henry",male,33,0,0,SCO/W 1585,12.275,,S +241,0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C +242,1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,,Q +243,0,2,"Coleridge, Mr. Reginald Charles",male,29,0,0,W./C. 14263,10.5,,S +244,0,3,"Maenpaa, Mr. Matti Alexanteri",male,22,0,0,STON/O 2. 3101275,7.125,,S +245,0,3,"Attalah, Mr. Sleiman",male,30,0,0,2694,7.225,,C +246,0,1,"Minahan, Dr. William Edward",male,44,2,0,19928,90,C78,Q +247,0,3,"Lindahl, Miss. Agda Thorilda Viktoria",female,25,0,0,347071,7.775,,S +248,1,2,"Hamalainen, Mrs. William (Anna)",female,24,0,2,250649,14.5,,S +249,1,1,"Beckwith, Mr. Richard Leonard",male,37,1,1,11751,52.5542,D35,S +250,0,2,"Carter, Rev. Ernest Courtenay",male,54,1,0,244252,26,,S +251,0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,,S +252,0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29,1,1,347054,10.4625,G6,S +253,0,1,"Stead, Mr. William Thomas",male,62,0,0,113514,26.55,C87,S +254,0,3,"Lobb, Mr. William Arthur",male,30,1,0,A/5. 3336,16.1,,S +255,0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",female,41,0,2,370129,20.2125,,S +256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,29,0,2,2650,15.2458,,C +257,1,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,,C +258,1,1,"Cherry, Miss. Gladys",female,30,0,0,110152,86.5,B77,S +259,1,1,"Ward, Miss. Anna",female,35,0,0,PC 17755,512.3292,,C +260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50,0,1,230433,26,,S +261,0,3,"Smith, Mr. Thomas",male,,0,0,384461,7.75,,Q +262,1,3,"Asplund, Master. Edvin Rojj Felix",male,3,4,2,347077,31.3875,,S +263,0,1,"Taussig, Mr. Emil",male,52,1,1,110413,79.65,E67,S +264,0,1,"Harrison, Mr. William",male,40,0,0,112059,0,B94,S +265,0,3,"Henry, Miss. Delia",female,,0,0,382649,7.75,,Q +266,0,2,"Reeves, Mr. David",male,36,0,0,C.A. 17248,10.5,,S +267,0,3,"Panula, Mr. Ernesti Arvid",male,16,4,1,3101295,39.6875,,S +268,1,3,"Persson, Mr. Ernst Ulrik",male,25,1,0,347083,7.775,,S +269,1,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,58,0,1,PC 17582,153.4625,C125,S +270,1,1,"Bissette, Miss. Amelia",female,35,0,0,PC 17760,135.6333,C99,S +271,0,1,"Cairns, Mr. Alexander",male,,0,0,113798,31,,S +272,1,3,"Tornquist, Mr. William Henry",male,25,0,0,LINE,0,,S +273,1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",female,41,0,1,250644,19.5,,S +274,0,1,"Natsch, Mr. Charles H",male,37,0,1,PC 17596,29.7,C118,C +275,1,3,"Healy, Miss. Hanora ""Nora""",female,,0,0,370375,7.75,,Q +276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63,1,0,13502,77.9583,D7,S +277,0,3,"Lindblom, Miss. Augusta Charlotta",female,45,0,0,347073,7.75,,S +278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0,,S +279,0,3,"Rice, Master. Eric",male,7,4,1,382652,29.125,,Q +280,1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35,1,1,C.A. 2673,20.25,,S +281,0,3,"Duane, Mr. Frank",male,65,0,0,336439,7.75,,Q +282,0,3,"Olsson, Mr. Nils Johan Goransson",male,28,0,0,347464,7.8542,,S +283,0,3,"de Pelsmaeker, Mr. Alfons",male,16,0,0,345778,9.5,,S +284,1,3,"Dorking, Mr. Edward Arthur",male,19,0,0,A/5. 10482,8.05,,S +285,0,1,"Smith, Mr. Richard William",male,,0,0,113056,26,A19,S +286,0,3,"Stankovic, Mr. Ivan",male,33,0,0,349239,8.6625,,C +287,1,3,"de Mulder, Mr. Theodore",male,30,0,0,345774,9.5,,S +288,0,3,"Naidenoff, Mr. Penko",male,22,0,0,349206,7.8958,,S +289,1,2,"Hosono, Mr. Masabumi",male,42,0,0,237798,13,,S +290,1,3,"Connolly, Miss. Kate",female,22,0,0,370373,7.75,,Q +291,1,1,"Barber, Miss. Ellen ""Nellie""",female,26,0,0,19877,78.85,,S +292,1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",female,19,1,0,11967,91.0792,B49,C +293,0,2,"Levy, Mr. Rene Jacques",male,36,0,0,SC/Paris 2163,12.875,D,C +294,0,3,"Haas, Miss. Aloisia",female,24,0,0,349236,8.85,,S +295,0,3,"Mineff, Mr. Ivan",male,24,0,0,349233,7.8958,,S +296,0,1,"Lewy, Mr. Ervin G",male,,0,0,PC 17612,27.7208,,C +297,0,3,"Hanna, Mr. Mansour",male,23.5,0,0,2693,7.2292,,C +298,0,1,"Allison, Miss. Helen Loraine",female,2,1,2,113781,151.55,C22 C26,S +299,1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,C106,S +300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50,0,1,PC 17558,247.5208,B58 B60,C +301,1,3,"Kelly, Miss. Anna Katherine ""Annie Kate""",female,,0,0,9234,7.75,,Q +302,1,3,"McCoy, Mr. Bernard",male,,2,0,367226,23.25,,Q +303,0,3,"Johnson, Mr. William Cahoone Jr",male,19,0,0,LINE,0,,S +304,1,2,"Keane, Miss. Nora A",female,,0,0,226593,12.35,E101,Q +305,0,3,"Williams, Mr. Howard Hugh ""Harry""",male,,0,0,A/5 2466,8.05,,S +306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S +307,1,1,"Fleming, Miss. Margaret",female,,0,0,17421,110.8833,,C +308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",female,17,1,0,PC 17758,108.9,C65,C +309,0,2,"Abelson, Mr. Samuel",male,30,1,0,P/PP 3381,24,,C +310,1,1,"Francatelli, Miss. Laura Mabel",female,30,0,0,PC 17485,56.9292,E36,C +311,1,1,"Hays, Miss. Margaret Bechstein",female,24,0,0,11767,83.1583,C54,C +312,1,1,"Ryerson, Miss. Emily Borie",female,18,2,2,PC 17608,262.375,B57 B59 B63 B66,C +313,0,2,"Lahtinen, Mrs. William (Anna Sylfven)",female,26,1,1,250651,26,,S +314,0,3,"Hendekovic, Mr. Ignjac",male,28,0,0,349243,7.8958,,S +315,0,2,"Hart, Mr. Benjamin",male,43,1,1,F.C.C. 13529,26.25,,S +316,1,3,"Nilsson, Miss. Helmina Josefina",female,26,0,0,347470,7.8542,,S +317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24,1,0,244367,26,,S +318,0,2,"Moraweck, Dr. Ernest",male,54,0,0,29011,14,,S +319,1,1,"Wick, Miss. Mary Natalie",female,31,0,2,36928,164.8667,C7,S +320,1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",female,40,1,1,16966,134.5,E34,C +321,0,3,"Dennis, Mr. Samuel",male,22,0,0,A/5 21172,7.25,,S +322,0,3,"Danoff, Mr. Yoto",male,27,0,0,349219,7.8958,,S +323,1,2,"Slayter, Miss. Hilda Mary",female,30,0,0,234818,12.35,,Q +324,1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",female,22,1,1,248738,29,,S +325,0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,,S +326,1,1,"Young, Miss. Marie Grice",female,36,0,0,PC 17760,135.6333,C32,C +327,0,3,"Nysveen, Mr. Johan Hansen",male,61,0,0,345364,6.2375,,S +328,1,2,"Ball, Mrs. (Ada E Hall)",female,36,0,0,28551,13,D,S +329,1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31,1,1,363291,20.525,,S +330,1,1,"Hippach, Miss. Jean Gertrude",female,16,0,1,111361,57.9792,B18,C +331,1,3,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,,Q +332,0,1,"Partner, Mr. Austen",male,45.5,0,0,113043,28.5,C124,S +333,0,1,"Graham, Mr. George Edward",male,38,0,1,PC 17582,153.4625,C91,S +334,0,3,"Vander Planke, Mr. Leo Edmondus",male,16,2,0,345764,18,,S +335,1,1,"Frauenthal, Mrs. Henry William (Clara Heinsheimer)",female,,1,0,PC 17611,133.65,,S +336,0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,,S +337,0,1,"Pears, Mr. Thomas Clinton",male,29,1,0,113776,66.6,C2,S +338,1,1,"Burns, Miss. Elizabeth Margaret",female,41,0,0,16966,134.5,E40,C +339,1,3,"Dahl, Mr. Karl Edwart",male,45,0,0,7598,8.05,,S +340,0,1,"Blackwell, Mr. Stephen Weart",male,45,0,0,113784,35.5,T,S +341,1,2,"Navratil, Master. Edmond Roger",male,2,1,1,230080,26,F2,S +342,1,1,"Fortune, Miss. Alice Elizabeth",female,24,3,2,19950,263,C23 C25 C27,S +343,0,2,"Collander, Mr. Erik Gustaf",male,28,0,0,248740,13,,S +344,0,2,"Sedgwick, Mr. Charles Frederick Waddington",male,25,0,0,244361,13,,S +345,0,2,"Fox, Mr. Stanley Hubert",male,36,0,0,229236,13,,S +346,1,2,"Brown, Miss. Amelia ""Mildred""",female,24,0,0,248733,13,F33,S +347,1,2,"Smith, Miss. Marion Elsie",female,40,0,0,31418,13,,S +348,1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",female,,1,0,386525,16.1,,S +349,1,3,"Coutts, Master. William Loch ""William""",male,3,1,1,C.A. 37671,15.9,,S +350,0,3,"Dimic, Mr. Jovan",male,42,0,0,315088,8.6625,,S +351,0,3,"Odahl, Mr. Nils Martin",male,23,0,0,7267,9.225,,S +352,0,1,"Williams-Lambert, Mr. Fletcher Fellows",male,,0,0,113510,35,C128,S +353,0,3,"Elias, Mr. Tannous",male,15,1,1,2695,7.2292,,C +354,0,3,"Arnold-Franchi, Mr. Josef",male,25,1,0,349237,17.8,,S +355,0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,,C +356,0,3,"Vanden Steen, Mr. Leo Peter",male,28,0,0,345783,9.5,,S +357,1,1,"Bowerman, Miss. Elsie Edith",female,22,0,1,113505,55,E33,S +358,0,2,"Funk, Miss. Annie Clemmer",female,38,0,0,237671,13,,S +359,1,3,"McGovern, Miss. Mary",female,,0,0,330931,7.8792,,Q +360,1,3,"Mockler, Miss. Helen Mary ""Ellie""",female,,0,0,330980,7.8792,,Q +361,0,3,"Skoog, Mr. Wilhelm",male,40,1,4,347088,27.9,,S +362,0,2,"del Carlo, Mr. Sebastiano",male,29,1,0,SC/PARIS 2167,27.7208,,C +363,0,3,"Barbara, Mrs. (Catherine David)",female,45,0,1,2691,14.4542,,C +364,0,3,"Asim, Mr. Adola",male,35,0,0,SOTON/O.Q. 3101310,7.05,,S +365,0,3,"O'Brien, Mr. Thomas",male,,1,0,370365,15.5,,Q +366,0,3,"Adahl, Mr. Mauritz Nils Martin",male,30,0,0,C 7076,7.25,,S +367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",female,60,1,0,110813,75.25,D37,C +368,1,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,,C +369,1,3,"Jermyn, Miss. Annie",female,,0,0,14313,7.75,,Q +370,1,1,"Aubart, Mme. Leontine Pauline",female,24,0,0,PC 17477,69.3,B35,C +371,1,1,"Harder, Mr. George Achilles",male,25,1,0,11765,55.4417,E50,C +372,0,3,"Wiklund, Mr. Jakob Alfred",male,18,1,0,3101267,6.4958,,S +373,0,3,"Beavan, Mr. William Thomas",male,19,0,0,323951,8.05,,S +374,0,1,"Ringhini, Mr. Sante",male,22,0,0,PC 17760,135.6333,,C +375,0,3,"Palsson, Miss. Stina Viola",female,3,3,1,349909,21.075,,S +376,1,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",female,,1,0,PC 17604,82.1708,,C +377,1,3,"Landergren, Miss. Aurora Adelia",female,22,0,0,C 7077,7.25,,S +378,0,1,"Widener, Mr. Harry Elkins",male,27,0,2,113503,211.5,C82,C +379,0,3,"Betros, Mr. Tannous",male,20,0,0,2648,4.0125,,C +380,0,3,"Gustafsson, Mr. Karl Gideon",male,19,0,0,347069,7.775,,S +381,1,1,"Bidois, Miss. Rosalie",female,42,0,0,PC 17757,227.525,,C +382,1,3,"Nakid, Miss. Maria (""Mary"")",female,1,0,2,2653,15.7417,,C +383,0,3,"Tikkanen, Mr. Juho",male,32,0,0,STON/O 2. 3101293,7.925,,S +384,1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",female,35,1,0,113789,52,,S +385,0,3,"Plotcharsky, Mr. Vasil",male,,0,0,349227,7.8958,,S +386,0,2,"Davies, Mr. Charles Henry",male,18,0,0,S.O.C. 14879,73.5,,S +387,0,3,"Goodwin, Master. Sidney Leonard",male,1,5,2,CA 2144,46.9,,S +388,1,2,"Buss, Miss. Kate",female,36,0,0,27849,13,,S +389,0,3,"Sadlier, Mr. Matthew",male,,0,0,367655,7.7292,,Q +390,1,2,"Lehmann, Miss. Bertha",female,17,0,0,SC 1748,12,,C +391,1,1,"Carter, Mr. William Ernest",male,36,1,2,113760,120,B96 B98,S +392,1,3,"Jansson, Mr. Carl Olof",male,21,0,0,350034,7.7958,,S +393,0,3,"Gustafsson, Mr. Johan Birger",male,28,2,0,3101277,7.925,,S +394,1,1,"Newell, Miss. Marjorie",female,23,1,0,35273,113.275,D36,C +395,1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",female,24,0,2,PP 9549,16.7,G6,S +396,0,3,"Johansson, Mr. Erik",male,22,0,0,350052,7.7958,,S +397,0,3,"Olsson, Miss. Elina",female,31,0,0,350407,7.8542,,S +398,0,2,"McKane, Mr. Peter David",male,46,0,0,28403,26,,S +399,0,2,"Pain, Dr. Alfred",male,23,0,0,244278,10.5,,S +400,1,2,"Trout, Mrs. William H (Jessie L)",female,28,0,0,240929,12.65,,S +401,1,3,"Niskanen, Mr. Juha",male,39,0,0,STON/O 2. 3101289,7.925,,S +402,0,3,"Adams, Mr. John",male,26,0,0,341826,8.05,,S +403,0,3,"Jussila, Miss. Mari Aina",female,21,1,0,4137,9.825,,S +404,0,3,"Hakkarainen, Mr. Pekka Pietari",male,28,1,0,STON/O2. 3101279,15.85,,S +405,0,3,"Oreskovic, Miss. Marija",female,20,0,0,315096,8.6625,,S +406,0,2,"Gale, Mr. Shadrach",male,34,1,0,28664,21,,S +407,0,3,"Widegren, Mr. Carl/Charles Peter",male,51,0,0,347064,7.75,,S +408,1,2,"Richards, Master. William Rowe",male,3,1,1,29106,18.75,,S +409,0,3,"Birkeland, Mr. Hans Martin Monsen",male,21,0,0,312992,7.775,,S +410,0,3,"Lefebre, Miss. Ida",female,,3,1,4133,25.4667,,S +411,0,3,"Sdycoff, Mr. Todor",male,,0,0,349222,7.8958,,S +412,0,3,"Hart, Mr. Henry",male,,0,0,394140,6.8583,,Q +413,1,1,"Minahan, Miss. Daisy E",female,33,1,0,19928,90,C78,Q +414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0,,S +415,1,3,"Sundman, Mr. Johan Julian",male,44,0,0,STON/O 2. 3101269,7.925,,S +416,0,3,"Meek, Mrs. Thomas (Annie Louise Rowley)",female,,0,0,343095,8.05,,S +417,1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",female,34,1,1,28220,32.5,,S +418,1,2,"Silven, Miss. Lyyli Karoliina",female,18,0,2,250652,13,,S +419,0,2,"Matthews, Mr. William John",male,30,0,0,28228,13,,S +420,0,3,"Van Impe, Miss. Catharina",female,10,0,2,345773,24.15,,S +421,0,3,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,,C +422,0,3,"Charters, Mr. David",male,21,0,0,A/5. 13032,7.7333,,Q +423,0,3,"Zimmerman, Mr. Leo",male,29,0,0,315082,7.875,,S +424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",female,28,1,1,347080,14.4,,S +425,0,3,"Rosblom, Mr. Viktor Richard",male,18,1,1,370129,20.2125,,S +426,0,3,"Wiseman, Mr. Phillippe",male,,0,0,A/4. 34244,7.25,,S +427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28,1,0,2003,26,,S +428,1,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")",female,19,0,0,250655,26,,S +429,0,3,"Flynn, Mr. James",male,,0,0,364851,7.75,,Q +430,1,3,"Pickard, Mr. Berk (Berk Trembisky)",male,32,0,0,SOTON/O.Q. 392078,8.05,E10,S +431,1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",male,28,0,0,110564,26.55,C52,S +432,1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",female,,1,0,376564,16.1,,S +433,1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",female,42,1,0,SC/AH 3085,26,,S +434,0,3,"Kallio, Mr. Nikolai Erland",male,17,0,0,STON/O 2. 3101274,7.125,,S +435,0,1,"Silvey, Mr. William Baird",male,50,1,0,13507,55.9,E44,S +436,1,1,"Carter, Miss. Lucile Polk",female,14,1,2,113760,120,B96 B98,S +437,0,3,"Ford, Miss. Doolina Margaret ""Daisy""",female,21,2,2,W./C. 6608,34.375,,S +438,1,2,"Richards, Mrs. Sidney (Emily Hocking)",female,24,2,3,29106,18.75,,S +439,0,1,"Fortune, Mr. Mark",male,64,1,4,19950,263,C23 C25 C27,S +440,0,2,"Kvillner, Mr. Johan Henrik Johannesson",male,31,0,0,C.A. 18723,10.5,,S +441,1,2,"Hart, Mrs. Benjamin (Esther Ada Bloomfield)",female,45,1,1,F.C.C. 13529,26.25,,S +442,0,3,"Hampe, Mr. Leon",male,20,0,0,345769,9.5,,S +443,0,3,"Petterson, Mr. Johan Emil",male,25,1,0,347076,7.775,,S +444,1,2,"Reynaldo, Ms. Encarnacion",female,28,0,0,230434,13,,S +445,1,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,,S +446,1,1,"Dodge, Master. Washington",male,4,0,2,33638,81.8583,A34,S +447,1,2,"Mellinger, Miss. Madeleine Violet",female,13,0,1,250644,19.5,,S +448,1,1,"Seward, Mr. Frederic Kimber",male,34,0,0,113794,26.55,,S +449,1,3,"Baclini, Miss. Marie Catherine",female,5,2,1,2666,19.2583,,C +450,1,1,"Peuchen, Major. Arthur Godfrey",male,52,0,0,113786,30.5,C104,S +451,0,2,"West, Mr. Edwy Arthur",male,36,1,2,C.A. 34651,27.75,,S +452,0,3,"Hagland, Mr. Ingvald Olai Olsen",male,,1,0,65303,19.9667,,S +453,0,1,"Foreman, Mr. Benjamin Laventall",male,30,0,0,113051,27.75,C111,C +454,1,1,"Goldenberg, Mr. Samuel L",male,49,1,0,17453,89.1042,C92,C +455,0,3,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,,S +456,1,3,"Jalsevac, Mr. Ivan",male,29,0,0,349240,7.8958,,C +457,0,1,"Millet, Mr. Francis Davis",male,65,0,0,13509,26.55,E38,S +458,1,1,"Kenyon, Mrs. Frederick R (Marion)",female,,1,0,17464,51.8625,D21,S +459,1,2,"Toomey, Miss. Ellen",female,50,0,0,F.C.C. 13531,10.5,,S +460,0,3,"O'Connor, Mr. Maurice",male,,0,0,371060,7.75,,Q +461,1,1,"Anderson, Mr. Harry",male,48,0,0,19952,26.55,E12,S +462,0,3,"Morley, Mr. William",male,34,0,0,364506,8.05,,S +463,0,1,"Gee, Mr. Arthur H",male,47,0,0,111320,38.5,E63,S +464,0,2,"Milling, Mr. Jacob Christian",male,48,0,0,234360,13,,S +465,0,3,"Maisner, Mr. Simon",male,,0,0,A/S 2816,8.05,,S +466,0,3,"Goncalves, Mr. Manuel Estanslas",male,38,0,0,SOTON/O.Q. 3101306,7.05,,S +467,0,2,"Campbell, Mr. William",male,,0,0,239853,0,,S +468,0,1,"Smart, Mr. John Montgomery",male,56,0,0,113792,26.55,,S +469,0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,,Q +470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C +471,0,3,"Keefe, Mr. Arthur",male,,0,0,323592,7.25,,S +472,0,3,"Cacic, Mr. Luka",male,38,0,0,315089,8.6625,,S +473,1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",female,33,1,2,C.A. 34651,27.75,,S +474,1,2,"Jerwan, Mrs. Amin S (Marie Marthe Thuillard)",female,23,0,0,SC/AH Basle 541,13.7917,D,C +475,0,3,"Strandberg, Miss. Ida Sofia",female,22,0,0,7553,9.8375,,S +476,0,1,"Clifford, Mr. George Quincy",male,,0,0,110465,52,A14,S +477,0,2,"Renouf, Mr. Peter Henry",male,34,1,0,31027,21,,S +478,0,3,"Braund, Mr. Lewis Richard",male,29,1,0,3460,7.0458,,S +479,0,3,"Karlsson, Mr. Nils August",male,22,0,0,350060,7.5208,,S +480,1,3,"Hirvonen, Miss. Hildur E",female,2,0,1,3101298,12.2875,,S +481,0,3,"Goodwin, Master. Harold Victor",male,9,5,2,CA 2144,46.9,,S +482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0,,S +483,0,3,"Rouse, Mr. Richard Henry",male,50,0,0,A/5 3594,8.05,,S +484,1,3,"Turkula, Mrs. (Hedwig)",female,63,0,0,4134,9.5875,,S +485,1,1,"Bishop, Mr. Dickinson H",male,25,1,0,11967,91.0792,B49,C +486,0,3,"Lefebre, Miss. Jeannie",female,,3,1,4133,25.4667,,S +487,1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",female,35,1,0,19943,90,C93,S +488,0,1,"Kent, Mr. Edward Austin",male,58,0,0,11771,29.7,B37,C +489,0,3,"Somerton, Mr. Francis William",male,30,0,0,A.5. 18509,8.05,,S +490,1,3,"Coutts, Master. Eden Leslie ""Neville""",male,9,1,1,C.A. 37671,15.9,,S +491,0,3,"Hagland, Mr. Konrad Mathias Reiersen",male,,1,0,65304,19.9667,,S +492,0,3,"Windelov, Mr. Einar",male,21,0,0,SOTON/OQ 3101317,7.25,,S +493,0,1,"Molson, Mr. Harry Markland",male,55,0,0,113787,30.5,C30,S +494,0,1,"Artagaveytia, Mr. Ramon",male,71,0,0,PC 17609,49.5042,,C +495,0,3,"Stanley, Mr. Edward Roland",male,21,0,0,A/4 45380,8.05,,S +496,0,3,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,,C +497,1,1,"Eustis, Miss. Elizabeth Mussey",female,54,1,0,36947,78.2667,D20,C +498,0,3,"Shellard, Mr. Frederick William",male,,0,0,C.A. 6212,15.1,,S +499,0,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1,2,113781,151.55,C22 C26,S +500,0,3,"Svensson, Mr. Olof",male,24,0,0,350035,7.7958,,S +501,0,3,"Calic, Mr. Petar",male,17,0,0,315086,8.6625,,S +502,0,3,"Canavan, Miss. Mary",female,21,0,0,364846,7.75,,Q +503,0,3,"O'Sullivan, Miss. Bridget Mary",female,,0,0,330909,7.6292,,Q +504,0,3,"Laitinen, Miss. Kristina Sofia",female,37,0,0,4135,9.5875,,S +505,1,1,"Maioni, Miss. Roberta",female,16,0,0,110152,86.5,B79,S +506,0,1,"Penasco y Castellana, Mr. Victor de Satode",male,18,1,0,PC 17758,108.9,C65,C +507,1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",female,33,0,2,26360,26,,S +508,1,1,"Bradley, Mr. George (""George Arthur Brayton"")",male,,0,0,111427,26.55,,S +509,0,3,"Olsen, Mr. Henry Margido",male,28,0,0,C 4001,22.525,,S +510,1,3,"Lang, Mr. Fang",male,26,0,0,1601,56.4958,,S +511,1,3,"Daly, Mr. Eugene Patrick",male,29,0,0,382651,7.75,,Q +512,0,3,"Webber, Mr. James",male,,0,0,SOTON/OQ 3101316,8.05,,S +513,1,1,"McGough, Mr. James Robert",male,36,0,0,PC 17473,26.2875,E25,S +514,1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",female,54,1,0,PC 17603,59.4,,C +515,0,3,"Coleff, Mr. Satio",male,24,0,0,349209,7.4958,,S +516,0,1,"Walker, Mr. William Anderson",male,47,0,0,36967,34.0208,D46,S +517,1,2,"Lemore, Mrs. (Amelia Milley)",female,34,0,0,C.A. 34260,10.5,F33,S +518,0,3,"Ryan, Mr. Patrick",male,,0,0,371110,24.15,,Q +519,1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",female,36,1,0,226875,26,,S +520,0,3,"Pavlovic, Mr. Stefo",male,32,0,0,349242,7.8958,,S +521,1,1,"Perreault, Miss. Anne",female,30,0,0,12749,93.5,B73,S +522,0,3,"Vovk, Mr. Janko",male,22,0,0,349252,7.8958,,S +523,0,3,"Lahoud, Mr. Sarkis",male,,0,0,2624,7.225,,C +524,1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,44,0,1,111361,57.9792,B18,C +525,0,3,"Kassem, Mr. Fared",male,,0,0,2700,7.2292,,C +526,0,3,"Farrell, Mr. James",male,40.5,0,0,367232,7.75,,Q +527,1,2,"Ridsdale, Miss. Lucy",female,50,0,0,W./C. 14258,10.5,,S +528,0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,C95,S +529,0,3,"Salonen, Mr. Johan Werner",male,39,0,0,3101296,7.925,,S +530,0,2,"Hocking, Mr. Richard George",male,23,2,1,29104,11.5,,S +531,1,2,"Quick, Miss. Phyllis May",female,2,1,1,26360,26,,S +532,0,3,"Toufik, Mr. Nakli",male,,0,0,2641,7.2292,,C +533,0,3,"Elias, Mr. Joseph Jr",male,17,1,1,2690,7.2292,,C +534,1,3,"Peter, Mrs. Catherine (Catherine Rizk)",female,,0,2,2668,22.3583,,C +535,0,3,"Cacic, Miss. Marija",female,30,0,0,315084,8.6625,,S +536,1,2,"Hart, Miss. Eva Miriam",female,7,0,2,F.C.C. 13529,26.25,,S +537,0,1,"Butt, Major. Archibald Willingham",male,45,0,0,113050,26.55,B38,S +538,1,1,"LeRoy, Miss. Bertha",female,30,0,0,PC 17761,106.425,,C +539,0,3,"Risien, Mr. Samuel Beard",male,,0,0,364498,14.5,,S +540,1,1,"Frolicher, Miss. Hedwig Margaritha",female,22,0,2,13568,49.5,B39,C +541,1,1,"Crosby, Miss. Harriet R",female,36,0,2,WE/P 5735,71,B22,S +542,0,3,"Andersson, Miss. Ingeborg Constanzia",female,9,4,2,347082,31.275,,S +543,0,3,"Andersson, Miss. Sigrid Elisabeth",female,11,4,2,347082,31.275,,S +544,1,2,"Beane, Mr. Edward",male,32,1,0,2908,26,,S +545,0,1,"Douglas, Mr. Walter Donald",male,50,1,0,PC 17761,106.425,C86,C +546,0,1,"Nicholson, Mr. Arthur Ernest",male,64,0,0,693,26,,S +547,1,2,"Beane, Mrs. Edward (Ethel Clarke)",female,19,1,0,2908,26,,S +548,1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,,C +549,0,3,"Goldsmith, Mr. Frank John",male,33,1,1,363291,20.525,,S +550,1,2,"Davies, Master. John Morgan Jr",male,8,1,1,C.A. 33112,36.75,,S +551,1,1,"Thayer, Mr. John Borland Jr",male,17,0,2,17421,110.8833,C70,C +552,0,2,"Sharp, Mr. Percival James R",male,27,0,0,244358,26,,S +553,0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,,Q +554,1,3,"Leeni, Mr. Fahim (""Philip Zenni"")",male,22,0,0,2620,7.225,,C +555,1,3,"Ohman, Miss. Velin",female,22,0,0,347085,7.775,,S +556,0,1,"Wright, Mr. George",male,62,0,0,113807,26.55,,S +557,1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",female,48,1,0,11755,39.6,A16,C +558,0,1,"Robbins, Mr. Victor",male,,0,0,PC 17757,227.525,,C +559,1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",female,39,1,1,110413,79.65,E67,S +560,1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36,1,0,345572,17.4,,S +561,0,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.75,,Q +562,0,3,"Sivic, Mr. Husein",male,40,0,0,349251,7.8958,,S +563,0,2,"Norman, Mr. Robert Douglas",male,28,0,0,218629,13.5,,S +564,0,3,"Simmons, Mr. John",male,,0,0,SOTON/OQ 392082,8.05,,S +565,0,3,"Meanwell, Miss. (Marion Ogden)",female,,0,0,SOTON/O.Q. 392087,8.05,,S +566,0,3,"Davies, Mr. Alfred J",male,24,2,0,A/4 48871,24.15,,S +567,0,3,"Stoytcheff, Mr. Ilia",male,19,0,0,349205,7.8958,,S +568,0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29,0,4,349909,21.075,,S +569,0,3,"Doharr, Mr. Tannous",male,,0,0,2686,7.2292,,C +570,1,3,"Jonsson, Mr. Carl",male,32,0,0,350417,7.8542,,S +571,1,2,"Harris, Mr. George",male,62,0,0,S.W./PP 752,10.5,,S +572,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53,2,0,11769,51.4792,C101,S +573,1,1,"Flynn, Mr. John Irwin (""Irving"")",male,36,0,0,PC 17474,26.3875,E25,S +574,1,3,"Kelly, Miss. Mary",female,,0,0,14312,7.75,,Q +575,0,3,"Rush, Mr. Alfred George John",male,16,0,0,A/4. 20589,8.05,,S +576,0,3,"Patchett, Mr. George",male,19,0,0,358585,14.5,,S +577,1,2,"Garside, Miss. Ethel",female,34,0,0,243880,13,,S +578,1,1,"Silvey, Mrs. William Baird (Alice Munger)",female,39,1,0,13507,55.9,E44,S +579,0,3,"Caram, Mrs. Joseph (Maria Elias)",female,,1,0,2689,14.4583,,C +580,1,3,"Jussila, Mr. Eiriik",male,32,0,0,STON/O 2. 3101286,7.925,,S +581,1,2,"Christy, Miss. Julie Rachel",female,25,1,1,237789,30,,S +582,1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",female,39,1,1,17421,110.8833,C68,C +583,0,2,"Downton, Mr. William James",male,54,0,0,28403,26,,S +584,0,1,"Ross, Mr. John Hugo",male,36,0,0,13049,40.125,A10,C +585,0,3,"Paulner, Mr. Uscher",male,,0,0,3411,8.7125,,C +586,1,1,"Taussig, Miss. Ruth",female,18,0,2,110413,79.65,E68,S +587,0,2,"Jarvis, Mr. John Denzil",male,47,0,0,237565,15,,S +588,1,1,"Frolicher-Stehli, Mr. Maxmillian",male,60,1,1,13567,79.2,B41,C +589,0,3,"Gilinski, Mr. Eliezer",male,22,0,0,14973,8.05,,S +590,0,3,"Murdlin, Mr. Joseph",male,,0,0,A./5. 3235,8.05,,S +591,0,3,"Rintamaki, Mr. Matti",male,35,0,0,STON/O 2. 3101273,7.125,,S +592,1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",female,52,1,0,36947,78.2667,D20,C +593,0,3,"Elsbury, Mr. William James",male,47,0,0,A/5 3902,7.25,,S +594,0,3,"Bourke, Miss. Mary",female,,0,2,364848,7.75,,Q +595,0,2,"Chapman, Mr. John Henry",male,37,1,0,SC/AH 29037,26,,S +596,0,3,"Van Impe, Mr. Jean Baptiste",male,36,1,1,345773,24.15,,S +597,1,2,"Leitch, Miss. Jessie Wills",female,,0,0,248727,33,,S +598,0,3,"Johnson, Mr. Alfred",male,49,0,0,LINE,0,,S +599,0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.225,,C +600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49,1,0,PC 17485,56.9292,A20,C +601,1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",female,24,2,1,243847,27,,S +602,0,3,"Slabenoff, Mr. Petco",male,,0,0,349214,7.8958,,S +603,0,1,"Harrington, Mr. Charles H",male,,0,0,113796,42.4,,S +604,0,3,"Torber, Mr. Ernst William",male,44,0,0,364511,8.05,,S +605,1,1,"Homer, Mr. Harry (""Mr E Haven"")",male,35,0,0,111426,26.55,,C +606,0,3,"Lindell, Mr. Edvard Bengtsson",male,36,1,0,349910,15.55,,S +607,0,3,"Karaic, Mr. Milan",male,30,0,0,349246,7.8958,,S +608,1,1,"Daniel, Mr. Robert Williams",male,27,0,0,113804,30.5,,S +609,1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",female,22,1,2,SC/Paris 2123,41.5792,,C +610,1,1,"Shutes, Miss. Elizabeth W",female,40,0,0,PC 17582,153.4625,C125,S +611,0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",female,39,1,5,347082,31.275,,S +612,0,3,"Jardin, Mr. Jose Neto",male,,0,0,SOTON/O.Q. 3101305,7.05,,S +613,1,3,"Murphy, Miss. Margaret Jane",female,,1,0,367230,15.5,,Q +614,0,3,"Horgan, Mr. John",male,,0,0,370377,7.75,,Q +615,0,3,"Brocklebank, Mr. William Alfred",male,35,0,0,364512,8.05,,S +616,1,2,"Herman, Miss. Alice",female,24,1,2,220845,65,,S +617,0,3,"Danbom, Mr. Ernst Gilbert",male,34,1,1,347080,14.4,,S +618,0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",female,26,1,0,A/5. 3336,16.1,,S +619,1,2,"Becker, Miss. Marion Louise",female,4,2,1,230136,39,F4,S +620,0,2,"Gavey, Mr. Lawrence",male,26,0,0,31028,10.5,,S +621,0,3,"Yasbeck, Mr. Antoni",male,27,1,0,2659,14.4542,,C +622,1,1,"Kimball, Mr. Edwin Nelson Jr",male,42,1,0,11753,52.5542,D19,S +623,1,3,"Nakid, Mr. Sahid",male,20,1,1,2653,15.7417,,C +624,0,3,"Hansen, Mr. Henry Damsgaard",male,21,0,0,350029,7.8542,,S +625,0,3,"Bowen, Mr. David John ""Dai""",male,21,0,0,54636,16.1,,S +626,0,1,"Sutton, Mr. Frederick",male,61,0,0,36963,32.3208,D50,S +627,0,2,"Kirkland, Rev. Charles Leonard",male,57,0,0,219533,12.35,,Q +628,1,1,"Longley, Miss. Gretchen Fiske",female,21,0,0,13502,77.9583,D9,S +629,0,3,"Bostandyeff, Mr. Guentcho",male,26,0,0,349224,7.8958,,S +630,0,3,"O'Connell, Mr. Patrick D",male,,0,0,334912,7.7333,,Q +631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80,0,0,27042,30,A23,S +632,0,3,"Lundahl, Mr. Johan Svensson",male,51,0,0,347743,7.0542,,S +633,1,1,"Stahelin-Maeglin, Dr. Max",male,32,0,0,13214,30.5,B50,C +634,0,1,"Parr, Mr. William Henry Marsh",male,,0,0,112052,0,,S +635,0,3,"Skoog, Miss. Mabel",female,9,3,2,347088,27.9,,S +636,1,2,"Davis, Miss. Mary",female,28,0,0,237668,13,,S +637,0,3,"Leinonen, Mr. Antti Gustaf",male,32,0,0,STON/O 2. 3101292,7.925,,S +638,0,2,"Collyer, Mr. Harvey",male,31,1,1,C.A. 31921,26.25,,S +639,0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",female,41,0,5,3101295,39.6875,,S +640,0,3,"Thorneycroft, Mr. Percival",male,,1,0,376564,16.1,,S +641,0,3,"Jensen, Mr. Hans Peder",male,20,0,0,350050,7.8542,,S +642,1,1,"Sagesser, Mlle. Emma",female,24,0,0,PC 17477,69.3,B35,C +643,0,3,"Skoog, Miss. Margit Elizabeth",female,2,3,2,347088,27.9,,S +644,1,3,"Foo, Mr. Choong",male,,0,0,1601,56.4958,,S +645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C +646,1,1,"Harper, Mr. Henry Sleeper",male,48,1,0,PC 17572,76.7292,D33,C +647,0,3,"Cor, Mr. Liudevit",male,19,0,0,349231,7.8958,,S +648,1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56,0,0,13213,35.5,A26,C +649,0,3,"Willey, Mr. Edward",male,,0,0,S.O./P.P. 751,7.55,,S +650,1,3,"Stanley, Miss. Amy Zillah Elsie",female,23,0,0,CA. 2314,7.55,,S +651,0,3,"Mitkoff, Mr. Mito",male,,0,0,349221,7.8958,,S +652,1,2,"Doling, Miss. Elsie",female,18,0,1,231919,23,,S +653,0,3,"Kalvik, Mr. Johannes Halvorsen",male,21,0,0,8475,8.4333,,S +654,1,3,"O'Leary, Miss. Hanora ""Norah""",female,,0,0,330919,7.8292,,Q +655,0,3,"Hegarty, Miss. Hanora ""Nora""",female,18,0,0,365226,6.75,,Q +656,0,2,"Hickman, Mr. Leonard Mark",male,24,2,0,S.O.C. 14879,73.5,,S +657,0,3,"Radeff, Mr. Alexander",male,,0,0,349223,7.8958,,S +658,0,3,"Bourke, Mrs. John (Catherine)",female,32,1,1,364849,15.5,,Q +659,0,2,"Eitemiller, Mr. George Floyd",male,23,0,0,29751,13,,S +660,0,1,"Newell, Mr. Arthur Webster",male,58,0,2,35273,113.275,D48,C +661,1,1,"Frauenthal, Dr. Henry William",male,50,2,0,PC 17611,133.65,,S +662,0,3,"Badt, Mr. Mohamed",male,40,0,0,2623,7.225,,C +663,0,1,"Colley, Mr. Edward Pomeroy",male,47,0,0,5727,25.5875,E58,S +664,0,3,"Coleff, Mr. Peju",male,36,0,0,349210,7.4958,,S +665,1,3,"Lindqvist, Mr. Eino William",male,20,1,0,STON/O 2. 3101285,7.925,,S +666,0,2,"Hickman, Mr. Lewis",male,32,2,0,S.O.C. 14879,73.5,,S +667,0,2,"Butler, Mr. Reginald Fenton",male,25,0,0,234686,13,,S +668,0,3,"Rommetvedt, Mr. Knud Paust",male,,0,0,312993,7.775,,S +669,0,3,"Cook, Mr. Jacob",male,43,0,0,A/5 3536,8.05,,S +670,1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",female,,1,0,19996,52,C126,S +671,1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",female,40,1,1,29750,39,,S +672,0,1,"Davidson, Mr. Thornton",male,31,1,0,F.C. 12750,52,B71,S +673,0,2,"Mitchell, Mr. Henry Michael",male,70,0,0,C.A. 24580,10.5,,S +674,1,2,"Wilhelms, Mr. Charles",male,31,0,0,244270,13,,S +675,0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0,,S +676,0,3,"Edvardsson, Mr. Gustaf Hjalmar",male,18,0,0,349912,7.775,,S +677,0,3,"Sawyer, Mr. Frederick Charles",male,24.5,0,0,342826,8.05,,S +678,1,3,"Turja, Miss. Anna Sofia",female,18,0,0,4138,9.8417,,S +679,0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",female,43,1,6,CA 2144,46.9,,S +680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36,0,1,PC 17755,512.3292,B51 B53 B55,C +681,0,3,"Peters, Miss. Katie",female,,0,0,330935,8.1375,,Q +682,1,1,"Hassab, Mr. Hammad",male,27,0,0,PC 17572,76.7292,D49,C +683,0,3,"Olsvigen, Mr. Thor Anderson",male,20,0,0,6563,9.225,,S +684,0,3,"Goodwin, Mr. Charles Edward",male,14,5,2,CA 2144,46.9,,S +685,0,2,"Brown, Mr. Thomas William Solomon",male,60,1,1,29750,39,,S +686,0,2,"Laroche, Mr. Joseph Philippe Lemercier",male,25,1,2,SC/Paris 2123,41.5792,,C +687,0,3,"Panula, Mr. Jaako Arnold",male,14,4,1,3101295,39.6875,,S +688,0,3,"Dakic, Mr. Branko",male,19,0,0,349228,10.1708,,S +689,0,3,"Fischer, Mr. Eberhard Thelander",male,18,0,0,350036,7.7958,,S +690,1,1,"Madill, Miss. Georgette Alexandra",female,15,0,1,24160,211.3375,B5,S +691,1,1,"Dick, Mr. Albert Adrian",male,31,1,0,17474,57,B20,S +692,1,3,"Karun, Miss. Manca",female,4,0,1,349256,13.4167,,C +693,1,3,"Lam, Mr. Ali",male,,0,0,1601,56.4958,,S +694,0,3,"Saad, Mr. Khalil",male,25,0,0,2672,7.225,,C +695,0,1,"Weir, Col. John",male,60,0,0,113800,26.55,,S +696,0,2,"Chapman, Mr. Charles Henry",male,52,0,0,248731,13.5,,S +697,0,3,"Kelly, Mr. James",male,44,0,0,363592,8.05,,S +698,1,3,"Mullens, Miss. Katherine ""Katie""",female,,0,0,35852,7.7333,,Q +699,0,1,"Thayer, Mr. John Borland",male,49,1,1,17421,110.8833,C68,C +700,0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,42,0,0,348121,7.65,F G63,S +701,1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",female,18,1,0,PC 17757,227.525,C62 C64,C +702,1,1,"Silverthorne, Mr. Spencer Victor",male,35,0,0,PC 17475,26.2875,E24,S +703,0,3,"Barbara, Miss. Saiide",female,18,0,1,2691,14.4542,,C +704,0,3,"Gallagher, Mr. Martin",male,25,0,0,36864,7.7417,,Q +705,0,3,"Hansen, Mr. Henrik Juul",male,26,1,0,350025,7.8542,,S +706,0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39,0,0,250655,26,,S +707,1,2,"Kelly, Mrs. Florence ""Fannie""",female,45,0,0,223596,13.5,,S +708,1,1,"Calderhead, Mr. Edward Pennington",male,42,0,0,PC 17476,26.2875,E24,S +709,1,1,"Cleaver, Miss. Alice",female,22,0,0,113781,151.55,,S +710,1,3,"Moubarek, Master. Halim Gonios (""William George"")",male,,1,1,2661,15.2458,,C +711,1,1,"Mayne, Mlle. Berthe Antonine (""Mrs de Villiers"")",female,24,0,0,PC 17482,49.5042,C90,C +712,0,1,"Klaber, Mr. Herman",male,,0,0,113028,26.55,C124,S +713,1,1,"Taylor, Mr. Elmer Zebley",male,48,1,0,19996,52,C126,S +714,0,3,"Larsson, Mr. August Viktor",male,29,0,0,7545,9.4833,,S +715,0,2,"Greenberg, Mr. Samuel",male,52,0,0,250647,13,,S +716,0,3,"Soholt, Mr. Peter Andreas Lauritz Andersen",male,19,0,0,348124,7.65,F G73,S +717,1,1,"Endres, Miss. Caroline Louise",female,38,0,0,PC 17757,227.525,C45,C +718,1,2,"Troutt, Miss. Edwina Celia ""Winnie""",female,27,0,0,34218,10.5,E101,S +719,0,3,"McEvoy, Mr. Michael",male,,0,0,36568,15.5,,Q +720,0,3,"Johnson, Mr. Malkolm Joackim",male,33,0,0,347062,7.775,,S +721,1,2,"Harper, Miss. Annie Jessie ""Nina""",female,6,0,1,248727,33,,S +722,0,3,"Jensen, Mr. Svend Lauritz",male,17,1,0,350048,7.0542,,S +723,0,2,"Gillespie, Mr. William Henry",male,34,0,0,12233,13,,S +724,0,2,"Hodges, Mr. Henry Price",male,50,0,0,250643,13,,S +725,1,1,"Chambers, Mr. Norman Campbell",male,27,1,0,113806,53.1,E8,S +726,0,3,"Oreskovic, Mr. Luka",male,20,0,0,315094,8.6625,,S +727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30,3,0,31027,21,,S +728,1,3,"Mannion, Miss. Margareth",female,,0,0,36866,7.7375,,Q +729,0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",male,25,1,0,236853,26,,S +730,0,3,"Ilmakangas, Miss. Pieta Sofia",female,25,1,0,STON/O2. 3101271,7.925,,S +731,1,1,"Allen, Miss. Elisabeth Walton",female,29,0,0,24160,211.3375,B5,S +732,0,3,"Hassan, Mr. Houssein G N",male,11,0,0,2699,18.7875,,C +733,0,2,"Knight, Mr. Robert J",male,,0,0,239855,0,,S +734,0,2,"Berriman, Mr. William John",male,23,0,0,28425,13,,S +735,0,2,"Troupiansky, Mr. Moses Aaron",male,23,0,0,233639,13,,S +736,0,3,"Williams, Mr. Leslie",male,28.5,0,0,54636,16.1,,S +737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48,1,3,W./C. 6608,34.375,,S +738,1,1,"Lesurer, Mr. Gustave J",male,35,0,0,PC 17755,512.3292,B101,C +739,0,3,"Ivanoff, Mr. Kanio",male,,0,0,349201,7.8958,,S +740,0,3,"Nankoff, Mr. Minko",male,,0,0,349218,7.8958,,S +741,1,1,"Hawksford, Mr. Walter James",male,,0,0,16988,30,D45,S +742,0,1,"Cavendish, Mr. Tyrell William",male,36,1,0,19877,78.85,C46,S +743,1,1,"Ryerson, Miss. Susan Parker ""Suzette""",female,21,2,2,PC 17608,262.375,B57 B59 B63 B66,C +744,0,3,"McNamee, Mr. Neal",male,24,1,0,376566,16.1,,S +745,1,3,"Stranden, Mr. Juho",male,31,0,0,STON/O 2. 3101288,7.925,,S +746,0,1,"Crosby, Capt. Edward Gifford",male,70,1,1,WE/P 5735,71,B22,S +747,0,3,"Abbott, Mr. Rossmore Edward",male,16,1,1,C.A. 2673,20.25,,S +748,1,2,"Sinkkonen, Miss. Anna",female,30,0,0,250648,13,,S +749,0,1,"Marvin, Mr. Daniel Warner",male,19,1,0,113773,53.1,D30,S +750,0,3,"Connaghton, Mr. Michael",male,31,0,0,335097,7.75,,Q +751,1,2,"Wells, Miss. Joan",female,4,1,1,29103,23,,S +752,1,3,"Moor, Master. Meier",male,6,0,1,392096,12.475,E121,S +753,0,3,"Vande Velde, Mr. Johannes Joseph",male,33,0,0,345780,9.5,,S +754,0,3,"Jonkoff, Mr. Lalio",male,23,0,0,349204,7.8958,,S +755,1,2,"Herman, Mrs. Samuel (Jane Laver)",female,48,1,2,220845,65,,S +756,1,2,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5,,S +757,0,3,"Carlsson, Mr. August Sigfrid",male,28,0,0,350042,7.7958,,S +758,0,2,"Bailey, Mr. Percy Andrew",male,18,0,0,29108,11.5,,S +759,0,3,"Theobald, Mr. Thomas Leonard",male,34,0,0,363294,8.05,,S +760,1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",female,33,0,0,110152,86.5,B77,S +761,0,3,"Garfirth, Mr. John",male,,0,0,358585,14.5,,S +762,0,3,"Nirva, Mr. Iisakki Antino Aijo",male,41,0,0,SOTON/O2 3101272,7.125,,S +763,1,3,"Barah, Mr. Hanna Assi",male,20,0,0,2663,7.2292,,C +764,1,1,"Carter, Mrs. William Ernest (Lucile Polk)",female,36,1,2,113760,120,B96 B98,S +765,0,3,"Eklund, Mr. Hans Linus",male,16,0,0,347074,7.775,,S +766,1,1,"Hogeboom, Mrs. John C (Anna Andrews)",female,51,1,0,13502,77.9583,D11,S +767,0,1,"Brewe, Dr. Arthur Jackson",male,,0,0,112379,39.6,,C +768,0,3,"Mangan, Miss. Mary",female,30.5,0,0,364850,7.75,,Q +769,0,3,"Moran, Mr. Daniel J",male,,1,0,371110,24.15,,Q +770,0,3,"Gronnestad, Mr. Daniel Danielsen",male,32,0,0,8471,8.3625,,S +771,0,3,"Lievens, Mr. Rene Aime",male,24,0,0,345781,9.5,,S +772,0,3,"Jensen, Mr. Niels Peder",male,48,0,0,350047,7.8542,,S +773,0,2,"Mack, Mrs. (Mary)",female,57,0,0,S.O./P.P. 3,10.5,E77,S +774,0,3,"Elias, Mr. Dibo",male,,0,0,2674,7.225,,C +775,1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",female,54,1,3,29105,23,,S +776,0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",male,18,0,0,347078,7.75,,S +777,0,3,"Tobin, Mr. Roger",male,,0,0,383121,7.75,F38,Q +778,1,3,"Emanuel, Miss. Virginia Ethel",female,5,0,0,364516,12.475,,S +779,0,3,"Kilgannon, Mr. Thomas J",male,,0,0,36865,7.7375,,Q +780,1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",female,43,0,1,24160,211.3375,B3,S +781,1,3,"Ayoub, Miss. Banoura",female,13,0,0,2687,7.2292,,C +782,1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",female,17,1,0,17474,57,B20,S +783,0,1,"Long, Mr. Milton Clyde",male,29,0,0,113501,30,D6,S +784,0,3,"Johnston, Mr. Andrew G",male,,1,2,W./C. 6607,23.45,,S +785,0,3,"Ali, Mr. William",male,25,0,0,SOTON/O.Q. 3101312,7.05,,S +786,0,3,"Harmer, Mr. Abraham (David Lishin)",male,25,0,0,374887,7.25,,S +787,1,3,"Sjoblom, Miss. Anna Sofia",female,18,0,0,3101265,7.4958,,S +788,0,3,"Rice, Master. George Hugh",male,8,4,1,382652,29.125,,Q +789,1,3,"Dean, Master. Bertram Vere",male,1,1,2,C.A. 2315,20.575,,S +790,0,1,"Guggenheim, Mr. Benjamin",male,46,0,0,PC 17593,79.2,B82 B84,C +791,0,3,"Keane, Mr. Andrew ""Andy""",male,,0,0,12460,7.75,,Q +792,0,2,"Gaskell, Mr. Alfred",male,16,0,0,239865,26,,S +793,0,3,"Sage, Miss. Stella Anna",female,,8,2,CA. 2343,69.55,,S +794,0,1,"Hoyt, Mr. William Fisher",male,,0,0,PC 17600,30.6958,,C +795,0,3,"Dantcheff, Mr. Ristiu",male,25,0,0,349203,7.8958,,S +796,0,2,"Otter, Mr. Richard",male,39,0,0,28213,13,,S +797,1,1,"Leader, Dr. Alice (Farnham)",female,49,0,0,17465,25.9292,D17,S +798,1,3,"Osman, Mrs. Mara",female,31,0,0,349244,8.6833,,S +799,0,3,"Ibrahim Shawah, Mr. Yousseff",male,30,0,0,2685,7.2292,,C +800,0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",female,30,1,1,345773,24.15,,S +801,0,2,"Ponesell, Mr. Martin",male,34,0,0,250647,13,,S +802,1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31,1,1,C.A. 31921,26.25,,S +803,1,1,"Carter, Master. William Thornton II",male,11,1,2,113760,120,B96 B98,S +804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C +805,1,3,"Hedman, Mr. Oskar Arvid",male,27,0,0,347089,6.975,,S +806,0,3,"Johansson, Mr. Karl Johan",male,31,0,0,347063,7.775,,S +807,0,1,"Andrews, Mr. Thomas Jr",male,39,0,0,112050,0,A36,S +808,0,3,"Pettersson, Miss. Ellen Natalia",female,18,0,0,347087,7.775,,S +809,0,2,"Meyer, Mr. August",male,39,0,0,248723,13,,S +810,1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",female,33,1,0,113806,53.1,E8,S +811,0,3,"Alexander, Mr. William",male,26,0,0,3474,7.8875,,S +812,0,3,"Lester, Mr. James",male,39,0,0,A/4 48871,24.15,,S +813,0,2,"Slemen, Mr. Richard James",male,35,0,0,28206,10.5,,S +814,0,3,"Andersson, Miss. Ebba Iris Alfrida",female,6,4,2,347082,31.275,,S +815,0,3,"Tomlin, Mr. Ernest Portage",male,30.5,0,0,364499,8.05,,S +816,0,1,"Fry, Mr. Richard",male,,0,0,112058,0,B102,S +817,0,3,"Heininen, Miss. Wendla Maria",female,23,0,0,STON/O2. 3101290,7.925,,S +818,0,2,"Mallet, Mr. Albert",male,31,1,1,S.C./PARIS 2079,37.0042,,C +819,0,3,"Holm, Mr. John Fredrik Alexander",male,43,0,0,C 7075,6.45,,S +820,0,3,"Skoog, Master. Karl Thorsten",male,10,3,2,347088,27.9,,S +821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",female,52,1,1,12749,93.5,B69,S +822,1,3,"Lulic, Mr. Nikola",male,27,0,0,315098,8.6625,,S +823,0,1,"Reuchlin, Jonkheer. John George",male,38,0,0,19972,0,,S +824,1,3,"Moor, Mrs. (Beila)",female,27,0,1,392096,12.475,E121,S +825,0,3,"Panula, Master. Urho Abraham",male,2,4,1,3101295,39.6875,,S +826,0,3,"Flynn, Mr. John",male,,0,0,368323,6.95,,Q +827,0,3,"Lam, Mr. Len",male,,0,0,1601,56.4958,,S +828,1,2,"Mallet, Master. Andre",male,1,0,2,S.C./PARIS 2079,37.0042,,C +829,1,3,"McCormack, Mr. Thomas Joseph",male,,0,0,367228,7.75,,Q +830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62,0,0,113572,80,B28, +831,1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15,1,0,2659,14.4542,,C +832,1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,,S +833,0,3,"Saad, Mr. Amin",male,,0,0,2671,7.2292,,C +834,0,3,"Augustsson, Mr. Albert",male,23,0,0,347468,7.8542,,S +835,0,3,"Allum, Mr. Owen George",male,18,0,0,2223,8.3,,S +836,1,1,"Compton, Miss. Sara Rebecca",female,39,1,1,PC 17756,83.1583,E49,C +837,0,3,"Pasic, Mr. Jakob",male,21,0,0,315097,8.6625,,S +838,0,3,"Sirota, Mr. Maurice",male,,0,0,392092,8.05,,S +839,1,3,"Chip, Mr. Chang",male,32,0,0,1601,56.4958,,S +840,1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7,C47,C +841,0,3,"Alhomaki, Mr. Ilmari Rudolf",male,20,0,0,SOTON/O2 3101287,7.925,,S +842,0,2,"Mudd, Mr. Thomas Charles",male,16,0,0,S.O./P.P. 3,10.5,,S +843,1,1,"Serepeca, Miss. Augusta",female,30,0,0,113798,31,,C +844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,,C +845,0,3,"Culumovic, Mr. Jeso",male,17,0,0,315090,8.6625,,S +846,0,3,"Abbing, Mr. Anthony",male,42,0,0,C.A. 5547,7.55,,S +847,0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,,S +848,0,3,"Markoff, Mr. Marin",male,35,0,0,349213,7.8958,,C +849,0,2,"Harper, Rev. John",male,28,0,1,248727,33,,S +850,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1,0,17453,89.1042,C92,C +851,0,3,"Andersson, Master. Sigvard Harald Elias",male,4,4,2,347082,31.275,,S +852,0,3,"Svensson, Mr. Johan",male,74,0,0,347060,7.775,,S +853,0,3,"Boulos, Miss. Nourelain",female,9,1,1,2678,15.2458,,C +854,1,1,"Lines, Miss. Mary Conover",female,16,0,1,PC 17592,39.4,D28,S +855,0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",female,44,1,0,244252,26,,S +856,1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18,0,1,392091,9.35,,S +857,1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",female,45,1,1,36928,164.8667,,S +858,1,1,"Daly, Mr. Peter Denis ",male,51,0,0,113055,26.55,E17,S +859,1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",female,24,0,3,2666,19.2583,,C +860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C +861,0,3,"Hansen, Mr. Claus Peter",male,41,2,0,350026,14.1083,,S +862,0,2,"Giles, Mr. Frederick Edward",male,21,1,0,28134,11.5,,S +863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",female,48,0,0,17466,25.9292,D17,S +864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,,S +865,0,2,"Gill, Mr. John William",male,24,0,0,233866,13,,S +866,1,2,"Bystrom, Mrs. (Karolina)",female,42,0,0,236852,13,,S +867,1,2,"Duran y More, Miss. Asuncion",female,27,1,0,SC/PARIS 2149,13.8583,,C +868,0,1,"Roebling, Mr. Washington Augustus II",male,31,0,0,PC 17590,50.4958,A24,S +869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5,,S +870,1,3,"Johnson, Master. Harold Theodor",male,4,1,1,347742,11.1333,,S +871,0,3,"Balkic, Mr. Cerin",male,26,0,0,349248,7.8958,,S +872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47,1,1,11751,52.5542,D35,S +873,0,1,"Carlsson, Mr. Frans Olof",male,33,0,0,695,5,B51 B53 B55,S +874,0,3,"Vander Cruyssen, Mr. Victor",male,47,0,0,345765,9,,S +875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28,1,0,P/PP 3381,24,,C +876,1,3,"Najib, Miss. Adele Kiamie ""Jane""",female,15,0,0,2667,7.225,,C +877,0,3,"Gustafsson, Mr. Alfred Ossian",male,20,0,0,7534,9.8458,,S +878,0,3,"Petroff, Mr. Nedelio",male,19,0,0,349212,7.8958,,S +879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S +880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56,0,1,11767,83.1583,C50,C +881,1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25,0,1,230433,26,,S +882,0,3,"Markun, Mr. Johann",male,33,0,0,349257,7.8958,,S +883,0,3,"Dahlberg, Miss. Gerda Ulrika",female,22,0,0,7552,10.5167,,S +884,0,2,"Banfield, Mr. Frederick James",male,28,0,0,C.A./SOTON 34068,10.5,,S +885,0,3,"Sutehall, Mr. Henry Jr",male,25,0,0,SOTON/OQ 392076,7.05,,S +886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39,0,5,382652,29.125,,Q +887,0,2,"Montvila, Rev. Juozas",male,27,0,0,211536,13,,S +888,1,1,"Graham, Miss. Margaret Edith",female,19,0,0,112053,30,B42,S +889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S +890,1,1,"Behr, Mr. Karl Howell",male,26,0,0,111369,30,C148,C +891,0,3,"Dooley, Mr. Patrick",male,32,0,0,370376,7.75,,Q diff --git a/doc/make.py b/doc/make.py new file mode 100755 index 00000000..cf73f44b --- /dev/null +++ b/doc/make.py @@ -0,0 +1,361 @@ +#!/usr/bin/env python +""" +Python script for building documentation. + +To build the docs you must have all optional dependencies for pandas +installed. See the installation instructions for a list of these. + +Usage +----- + $ python make.py clean + $ python make.py html + $ python make.py latex +""" +import argparse +import csv +import importlib +import os +import shutil +import subprocess +import sys +import webbrowser + +import docutils +import docutils.parsers.rst + +DOC_PATH = os.path.dirname(os.path.abspath(__file__)) +SOURCE_PATH = os.path.join(DOC_PATH, "source") +BUILD_PATH = os.path.join(DOC_PATH, "build") +REDIRECTS_FILE = os.path.join(DOC_PATH, "redirects.csv") + + +class DocBuilder: + """ + Class to wrap the different commands of this script. + + All public methods of this class can be called as parameters of the + script. + """ + + def __init__( + self, + num_jobs=0, + include_api=True, + single_doc=None, + verbosity=0, + warnings_are_errors=False, + ): + self.num_jobs = num_jobs + self.verbosity = verbosity + self.warnings_are_errors = warnings_are_errors + + if single_doc: + single_doc = self._process_single_doc(single_doc) + include_api = False + os.environ["SPHINX_PATTERN"] = single_doc + elif not include_api: + os.environ["SPHINX_PATTERN"] = "-api" + + self.single_doc_html = None + if single_doc and single_doc.endswith(".rst"): + self.single_doc_html = os.path.splitext(single_doc)[0] + ".html" + elif single_doc: + self.single_doc_html = f"reference/api/pandas.{single_doc}.html" + + def _process_single_doc(self, single_doc): + """ + Make sure the provided value for --single is a path to an existing + .rst/.ipynb file, or a pandas object that can be imported. + + For example, categorial.rst or pandas.DataFrame.head. For the latter, + return the corresponding file path + (e.g. reference/api/pandas.DataFrame.head.rst). + """ + base_name, extension = os.path.splitext(single_doc) + if extension in (".rst", ".ipynb"): + if os.path.exists(os.path.join(SOURCE_PATH, single_doc)): + return single_doc + else: + raise FileNotFoundError(f"File {single_doc} not found") + + elif single_doc.startswith("pandas."): + try: + obj = pandas # noqa: F821 + for name in single_doc.split("."): + obj = getattr(obj, name) + except AttributeError: + raise ImportError(f"Could not import {single_doc}") + else: + return single_doc[len("pandas.") :] + else: + raise ValueError( + f"--single={single_doc} not understood. " + "Value should be a valid path to a .rst or .ipynb file, " + "or a valid pandas object " + "(e.g. categorical.rst or pandas.DataFrame.head)" + ) + + @staticmethod + def _run_os(*args): + """ + Execute a command as a OS terminal. + + Parameters + ---------- + *args : list of str + Command and parameters to be executed + + Examples + -------- + >>> DocBuilder()._run_os('python', '--version') + """ + subprocess.check_call(args, stdout=sys.stdout, stderr=sys.stderr) + + def _sphinx_build(self, kind: str): + """ + Call sphinx to build documentation. + + Attribute `num_jobs` from the class is used. + + Parameters + ---------- + kind : {'html', 'latex'} + + Examples + -------- + >>> DocBuilder(num_jobs=4)._sphinx_build('html') + """ + if kind not in ("html", "latex"): + raise ValueError(f"kind must be html or latex, not {kind}") + + cmd = ["sphinx-build", "-b", kind] + if self.num_jobs: + cmd += ["-j", str(self.num_jobs)] + if self.warnings_are_errors: + cmd += ["-W", "--keep-going"] + if self.verbosity: + cmd.append(f"-{'v' * self.verbosity}") + cmd += [ + "-d", + os.path.join(BUILD_PATH, "doctrees"), + SOURCE_PATH, + os.path.join(BUILD_PATH, kind), + ] + return subprocess.call(cmd) + + def _open_browser(self, single_doc_html): + """ + Open a browser tab showing single + """ + url = os.path.join("file://", DOC_PATH, "build", "html", single_doc_html) + webbrowser.open(url, new=2) + + def _get_page_title(self, page): + """ + Open the rst file `page` and extract its title. + """ + fname = os.path.join(SOURCE_PATH, f"{page}.rst") + option_parser = docutils.frontend.OptionParser( + components=(docutils.parsers.rst.Parser,) + ) + doc = docutils.utils.new_document("", option_parser.get_default_values()) + with open(fname) as f: + data = f.read() + + parser = docutils.parsers.rst.Parser() + # do not generate any warning when parsing the rst + with open(os.devnull, "a") as f: + doc.reporter.stream = f + parser.parse(data, doc) + + section = next( + node for node in doc.children if isinstance(node, docutils.nodes.section) + ) + title = next( + node for node in section.children if isinstance(node, docutils.nodes.title) + ) + + return title.astext() + + def _add_redirects(self): + """ + Create in the build directory an html file with a redirect, + for every row in REDIRECTS_FILE. + """ + with open(REDIRECTS_FILE) as mapping_fd: + reader = csv.reader(mapping_fd) + for row in reader: + if not row or row[0].strip().startswith("#"): + continue + + path = os.path.join(BUILD_PATH, "html", *row[0].split("/")) + ".html" + + try: + title = self._get_page_title(row[1]) + except Exception: + # the file can be an ipynb and not an rst, or docutils + # may not be able to read the rst because it has some + # sphinx specific stuff + title = "this page" + + if os.path.exists(path): + raise RuntimeError( + f"Redirection would overwrite an existing file: {path}" + ) + + with open(path, "w") as moved_page_fd: + html = f"""\ + + + + + +

+ The page has been moved to {title} +

+ +""" + + moved_page_fd.write(html) + + def html(self): + """ + Build HTML documentation. + """ + ret_code = self._sphinx_build("html") + zip_fname = os.path.join(BUILD_PATH, "html", "pandas.zip") + if os.path.exists(zip_fname): + os.remove(zip_fname) + + if ret_code == 0: + if self.single_doc_html is not None: + self._open_browser(self.single_doc_html) + else: + self._add_redirects() + return ret_code + + def latex(self, force=False): + """ + Build PDF documentation. + """ + if sys.platform == "win32": + sys.stderr.write("latex build has not been tested on windows\n") + else: + ret_code = self._sphinx_build("latex") + os.chdir(os.path.join(BUILD_PATH, "latex")) + if force: + for i in range(3): + self._run_os("pdflatex", "-interaction=nonstopmode", "pandas.tex") + raise SystemExit( + "You should check the file " + '"build/latex/pandas.pdf" for problems.' + ) + else: + self._run_os("make") + return ret_code + + def latex_forced(self): + """ + Build PDF documentation with retries to find missing references. + """ + return self.latex(force=True) + + @staticmethod + def clean(): + """ + Clean documentation generated files. + """ + shutil.rmtree(BUILD_PATH, ignore_errors=True) + shutil.rmtree(os.path.join(SOURCE_PATH, "reference", "api"), ignore_errors=True) + + def zip_html(self): + """ + Compress HTML documentation into a zip file. + """ + zip_fname = os.path.join(BUILD_PATH, "html", "pandas.zip") + if os.path.exists(zip_fname): + os.remove(zip_fname) + dirname = os.path.join(BUILD_PATH, "html") + fnames = os.listdir(dirname) + os.chdir(dirname) + self._run_os("zip", zip_fname, "-r", "-q", *fnames) + + +def main(): + cmds = [method for method in dir(DocBuilder) if not method.startswith("_")] + + joined = ",".join(cmds) + argparser = argparse.ArgumentParser( + description="pandas documentation builder", epilog=f"Commands: {joined}", + ) + + joined = ", ".join(cmds) + argparser.add_argument( + "command", nargs="?", default="html", help=f"command to run: {joined}", + ) + argparser.add_argument( + "--num-jobs", type=int, default=0, help="number of jobs used by sphinx-build" + ) + argparser.add_argument( + "--no-api", default=False, help="omit api and autosummary", action="store_true" + ) + argparser.add_argument( + "--single", + metavar="FILENAME", + type=str, + default=None, + help=( + "filename (relative to the 'source' folder) of section or method name to " + "compile, e.g. 'development/contributing.rst', " + "'ecosystem.rst', 'pandas.DataFrame.join'" + ), + ) + argparser.add_argument( + "--python-path", type=str, default=os.path.dirname(DOC_PATH), help="path" + ) + argparser.add_argument( + "-v", + action="count", + dest="verbosity", + default=0, + help=( + "increase verbosity (can be repeated), " + "passed to the sphinx build command" + ), + ) + argparser.add_argument( + "--warnings-are-errors", + "-W", + action="store_true", + help="fail if warnings are raised", + ) + args = argparser.parse_args() + + if args.command not in cmds: + joined = ", ".join(cmds) + raise ValueError(f"Unknown command {args.command}. Available options: {joined}") + + # Below we update both os.environ and sys.path. The former is used by + # external libraries (namely Sphinx) to compile this module and resolve + # the import of `python_path` correctly. The latter is used to resolve + # the import within the module, injecting it into the global namespace + os.environ["PYTHONPATH"] = args.python_path + sys.path.insert(0, args.python_path) + globals()["pandas"] = importlib.import_module("pandas") + + # Set the matplotlib backend to the non-interactive Agg backend for all + # child processes. + os.environ["MPLBACKEND"] = "module://matplotlib.backends.backend_agg" + + builder = DocBuilder( + args.num_jobs, + not args.no_api, + args.single, + args.verbosity, + args.warnings_are_errors, + ) + return getattr(builder, args.command)() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/doc/redirects.csv b/doc/redirects.csv new file mode 100644 index 00000000..3669ff4b --- /dev/null +++ b/doc/redirects.csv @@ -0,0 +1,1403 @@ +# This file should contain all the redirects in the documentation +# in the format `,` + +# whatsnew +whatsnew,whatsnew/index +release,whatsnew/index + +# getting started +install,getting_started/install +10min,getting_started/10min +basics,getting_started/basics +comparison_with_r,getting_started/comparison/comparison_with_r +comparison_with_sql,getting_started/comparison/comparison_with_sql +comparison_with_sas,getting_started/comparison/comparison_with_sas +comparison_with_stata,getting_started/comparison/comparison_with_stata +dsintro,getting_started/dsintro +overview,getting_started/overview +tutorials,getting_started/tutorials + +# user guide +advanced,user_guide/advanced +categorical,user_guide/categorical +computation,user_guide/computation +cookbook,user_guide/cookbook +enhancingperf,user_guide/enhancingperf +gotchas,user_guide/gotchas +groupby,user_guide/groupby +indexing,user_guide/indexing +integer_na,user_guide/integer_na +io,user_guide/io +merging,user_guide/merging +missing_data,user_guide/missing_data +options,user_guide/options +reshaping,user_guide/reshaping +sparse,user_guide/sparse +style,user_guide/style +text,user_guide/text +timedeltas,user_guide/timedeltas +timeseries,user_guide/timeseries +visualization,user_guide/visualization + +# development +contributing,development/contributing +contributing_docstring,development/contributing_docstring +developer,development/developer +extending,development/extending +internals,development/internals + +# api moved function +reference/api/pandas.io.json.json_normalize,pandas.json_normalize + +# rename due to refactors +reference/api/pandas.core.window.Rolling,pandas.core.window.rolling.Rolling +reference/api/pandas.core.window.Rolling.aggregate,pandas.core.window.rolling.Rolling.aggregate +reference/api/pandas.core.window.Rolling.apply,pandas.core.window.rolling.Rolling.apply +reference/api/pandas.core.window.Rolling.corr,pandas.core.window.rolling.Rolling.corr +reference/api/pandas.core.window.Rolling.count,pandas.core.window.rolling.Rolling.count +reference/api/pandas.core.window.Rolling.cov,pandas.core.window.rolling.Rolling.cov +reference/api/pandas.core.window.Rolling.kurt,pandas.core.window.rolling.Rolling.kurt +reference/api/pandas.core.window.Rolling.max,pandas.core.window.rolling.Rolling.max +reference/api/pandas.core.window.Rolling.mean,pandas.core.window.rolling.Rolling.mean +reference/api/pandas.core.window.Rolling.median,pandas.core.window.rolling.Rolling.median +reference/api/pandas.core.window.Rolling.min,pandas.core.window.rolling.Rolling.min +reference/api/pandas.core.window.Rolling.quantile,pandas.core.window.rolling.Rolling.quantile +reference/api/pandas.core.window.Rolling.skew,pandas.core.window.rolling.Rolling.skew +reference/api/pandas.core.window.Rolling.std,pandas.core.window.rolling.Rolling.std +reference/api/pandas.core.window.Rolling.sum,pandas.core.window.rolling.Rolling.sum +reference/api/pandas.core.window.Rolling.var,pandas.core.window.rolling.Rolling.var + +# api url change (generated -> reference/api rename) +api,reference/index +generated/pandas.api.extensions.ExtensionArray.argsort,../reference/api/pandas.api.extensions.ExtensionArray.argsort +generated/pandas.api.extensions.ExtensionArray.astype,../reference/api/pandas.api.extensions.ExtensionArray.astype +generated/pandas.api.extensions.ExtensionArray.copy,../reference/api/pandas.api.extensions.ExtensionArray.copy +generated/pandas.api.extensions.ExtensionArray.dropna,../reference/api/pandas.api.extensions.ExtensionArray.dropna +generated/pandas.api.extensions.ExtensionArray.dtype,../reference/api/pandas.api.extensions.ExtensionArray.dtype +generated/pandas.api.extensions.ExtensionArray.factorize,../reference/api/pandas.api.extensions.ExtensionArray.factorize +generated/pandas.api.extensions.ExtensionArray.fillna,../reference/api/pandas.api.extensions.ExtensionArray.fillna +generated/pandas.api.extensions.ExtensionArray,../reference/api/pandas.api.extensions.ExtensionArray +generated/pandas.api.extensions.ExtensionArray.isna,../reference/api/pandas.api.extensions.ExtensionArray.isna +generated/pandas.api.extensions.ExtensionArray.nbytes,../reference/api/pandas.api.extensions.ExtensionArray.nbytes +generated/pandas.api.extensions.ExtensionArray.ndim,../reference/api/pandas.api.extensions.ExtensionArray.ndim +generated/pandas.api.extensions.ExtensionArray.shape,../reference/api/pandas.api.extensions.ExtensionArray.shape +generated/pandas.api.extensions.ExtensionArray.take,../reference/api/pandas.api.extensions.ExtensionArray.take +generated/pandas.api.extensions.ExtensionArray.unique,../reference/api/pandas.api.extensions.ExtensionArray.unique +generated/pandas.api.extensions.ExtensionDtype.construct_array_type,../reference/api/pandas.api.extensions.ExtensionDtype.construct_array_type +generated/pandas.api.extensions.ExtensionDtype.construct_from_string,../reference/api/pandas.api.extensions.ExtensionDtype.construct_from_string +generated/pandas.api.extensions.ExtensionDtype,../reference/api/pandas.api.extensions.ExtensionDtype +generated/pandas.api.extensions.ExtensionDtype.is_dtype,../reference/api/pandas.api.extensions.ExtensionDtype.is_dtype +generated/pandas.api.extensions.ExtensionDtype.kind,../reference/api/pandas.api.extensions.ExtensionDtype.kind +generated/pandas.api.extensions.ExtensionDtype.name,../reference/api/pandas.api.extensions.ExtensionDtype.name +generated/pandas.api.extensions.ExtensionDtype.names,../reference/api/pandas.api.extensions.ExtensionDtype.names +generated/pandas.api.extensions.ExtensionDtype.na_value,../reference/api/pandas.api.extensions.ExtensionDtype.na_value +generated/pandas.api.extensions.ExtensionDtype.type,../reference/api/pandas.api.extensions.ExtensionDtype.type +generated/pandas.api.extensions.register_dataframe_accessor,../reference/api/pandas.api.extensions.register_dataframe_accessor +generated/pandas.api.extensions.register_extension_dtype,../reference/api/pandas.api.extensions.register_extension_dtype +generated/pandas.api.extensions.register_index_accessor,../reference/api/pandas.api.extensions.register_index_accessor +generated/pandas.api.extensions.register_series_accessor,../reference/api/pandas.api.extensions.register_series_accessor +generated/pandas.api.types.infer_dtype,../reference/api/pandas.api.types.infer_dtype +generated/pandas.api.types.is_bool_dtype,../reference/api/pandas.api.types.is_bool_dtype +generated/pandas.api.types.is_bool,../reference/api/pandas.api.types.is_bool +generated/pandas.api.types.is_categorical_dtype,../reference/api/pandas.api.types.is_categorical_dtype +generated/pandas.api.types.is_categorical,../reference/api/pandas.api.types.is_categorical +generated/pandas.api.types.is_complex_dtype,../reference/api/pandas.api.types.is_complex_dtype +generated/pandas.api.types.is_complex,../reference/api/pandas.api.types.is_complex +generated/pandas.api.types.is_datetime64_any_dtype,../reference/api/pandas.api.types.is_datetime64_any_dtype +generated/pandas.api.types.is_datetime64_dtype,../reference/api/pandas.api.types.is_datetime64_dtype +generated/pandas.api.types.is_datetime64_ns_dtype,../reference/api/pandas.api.types.is_datetime64_ns_dtype +generated/pandas.api.types.is_datetime64tz_dtype,../reference/api/pandas.api.types.is_datetime64tz_dtype +generated/pandas.api.types.is_datetimetz,../reference/api/pandas.api.types.is_datetimetz +generated/pandas.api.types.is_dict_like,../reference/api/pandas.api.types.is_dict_like +generated/pandas.api.types.is_extension_array_dtype,../reference/api/pandas.api.types.is_extension_array_dtype +generated/pandas.api.types.is_extension_type,../reference/api/pandas.api.types.is_extension_type +generated/pandas.api.types.is_file_like,../reference/api/pandas.api.types.is_file_like +generated/pandas.api.types.is_float_dtype,../reference/api/pandas.api.types.is_float_dtype +generated/pandas.api.types.is_float,../reference/api/pandas.api.types.is_float +generated/pandas.api.types.is_hashable,../reference/api/pandas.api.types.is_hashable +generated/pandas.api.types.is_int64_dtype,../reference/api/pandas.api.types.is_int64_dtype +generated/pandas.api.types.is_integer_dtype,../reference/api/pandas.api.types.is_integer_dtype +generated/pandas.api.types.is_integer,../reference/api/pandas.api.types.is_integer +generated/pandas.api.types.is_interval_dtype,../reference/api/pandas.api.types.is_interval_dtype +generated/pandas.api.types.is_interval,../reference/api/pandas.api.types.is_interval +generated/pandas.api.types.is_iterator,../reference/api/pandas.api.types.is_iterator +generated/pandas.api.types.is_list_like,../reference/api/pandas.api.types.is_list_like +generated/pandas.api.types.is_named_tuple,../reference/api/pandas.api.types.is_named_tuple +generated/pandas.api.types.is_number,../reference/api/pandas.api.types.is_number +generated/pandas.api.types.is_numeric_dtype,../reference/api/pandas.api.types.is_numeric_dtype +generated/pandas.api.types.is_object_dtype,../reference/api/pandas.api.types.is_object_dtype +generated/pandas.api.types.is_period_dtype,../reference/api/pandas.api.types.is_period_dtype +generated/pandas.api.types.is_period,../reference/api/pandas.api.types.is_period +generated/pandas.api.types.is_re_compilable,../reference/api/pandas.api.types.is_re_compilable +generated/pandas.api.types.is_re,../reference/api/pandas.api.types.is_re +generated/pandas.api.types.is_scalar,../reference/api/pandas.api.types.is_scalar +generated/pandas.api.types.is_signed_integer_dtype,../reference/api/pandas.api.types.is_signed_integer_dtype +generated/pandas.api.types.is_sparse,../reference/api/pandas.api.types.is_sparse +generated/pandas.api.types.is_string_dtype,../reference/api/pandas.api.types.is_string_dtype +generated/pandas.api.types.is_timedelta64_dtype,../reference/api/pandas.api.types.is_timedelta64_dtype +generated/pandas.api.types.is_timedelta64_ns_dtype,../reference/api/pandas.api.types.is_timedelta64_ns_dtype +generated/pandas.api.types.is_unsigned_integer_dtype,../reference/api/pandas.api.types.is_unsigned_integer_dtype +generated/pandas.api.types.pandas_dtype,../reference/api/pandas.api.types.pandas_dtype +generated/pandas.api.types.union_categoricals,../reference/api/pandas.api.types.union_categoricals +generated/pandas.bdate_range,../reference/api/pandas.bdate_range +generated/pandas.Categorical.__array__,../reference/api/pandas.Categorical.__array__ +generated/pandas.Categorical.categories,../reference/api/pandas.Categorical.categories +generated/pandas.Categorical.codes,../reference/api/pandas.Categorical.codes +generated/pandas.CategoricalDtype.categories,../reference/api/pandas.CategoricalDtype.categories +generated/pandas.Categorical.dtype,../reference/api/pandas.Categorical.dtype +generated/pandas.CategoricalDtype,../reference/api/pandas.CategoricalDtype +generated/pandas.CategoricalDtype.ordered,../reference/api/pandas.CategoricalDtype.ordered +generated/pandas.Categorical.from_codes,../reference/api/pandas.Categorical.from_codes +generated/pandas.Categorical,../reference/api/pandas.Categorical +generated/pandas.CategoricalIndex.add_categories,../reference/api/pandas.CategoricalIndex.add_categories +generated/pandas.CategoricalIndex.as_ordered,../reference/api/pandas.CategoricalIndex.as_ordered +generated/pandas.CategoricalIndex.as_unordered,../reference/api/pandas.CategoricalIndex.as_unordered +generated/pandas.CategoricalIndex.categories,../reference/api/pandas.CategoricalIndex.categories +generated/pandas.CategoricalIndex.codes,../reference/api/pandas.CategoricalIndex.codes +generated/pandas.CategoricalIndex.equals,../reference/api/pandas.CategoricalIndex.equals +generated/pandas.CategoricalIndex,../reference/api/pandas.CategoricalIndex +generated/pandas.CategoricalIndex.map,../reference/api/pandas.CategoricalIndex.map +generated/pandas.CategoricalIndex.ordered,../reference/api/pandas.CategoricalIndex.ordered +generated/pandas.CategoricalIndex.remove_categories,../reference/api/pandas.CategoricalIndex.remove_categories +generated/pandas.CategoricalIndex.remove_unused_categories,../reference/api/pandas.CategoricalIndex.remove_unused_categories +generated/pandas.CategoricalIndex.rename_categories,../reference/api/pandas.CategoricalIndex.rename_categories +generated/pandas.CategoricalIndex.reorder_categories,../reference/api/pandas.CategoricalIndex.reorder_categories +generated/pandas.CategoricalIndex.set_categories,../reference/api/pandas.CategoricalIndex.set_categories +generated/pandas.Categorical.ordered,../reference/api/pandas.Categorical.ordered +generated/pandas.concat,../reference/api/pandas.concat +generated/pandas.core.groupby.DataFrameGroupBy.all,../reference/api/pandas.core.groupby.DataFrameGroupBy.all +generated/pandas.core.groupby.DataFrameGroupBy.any,../reference/api/pandas.core.groupby.DataFrameGroupBy.any +generated/pandas.core.groupby.DataFrameGroupBy.bfill,../reference/api/pandas.core.groupby.DataFrameGroupBy.bfill +generated/pandas.core.groupby.DataFrameGroupBy.boxplot,../reference/api/pandas.core.groupby.DataFrameGroupBy.boxplot +generated/pandas.core.groupby.DataFrameGroupBy.corr,../reference/api/pandas.core.groupby.DataFrameGroupBy.corr +generated/pandas.core.groupby.DataFrameGroupBy.corrwith,../reference/api/pandas.core.groupby.DataFrameGroupBy.corrwith +generated/pandas.core.groupby.DataFrameGroupBy.count,../reference/api/pandas.core.groupby.DataFrameGroupBy.count +generated/pandas.core.groupby.DataFrameGroupBy.cov,../reference/api/pandas.core.groupby.DataFrameGroupBy.cov +generated/pandas.core.groupby.DataFrameGroupBy.cummax,../reference/api/pandas.core.groupby.DataFrameGroupBy.cummax +generated/pandas.core.groupby.DataFrameGroupBy.cummin,../reference/api/pandas.core.groupby.DataFrameGroupBy.cummin +generated/pandas.core.groupby.DataFrameGroupBy.cumprod,../reference/api/pandas.core.groupby.DataFrameGroupBy.cumprod +generated/pandas.core.groupby.DataFrameGroupBy.cumsum,../reference/api/pandas.core.groupby.DataFrameGroupBy.cumsum +generated/pandas.core.groupby.DataFrameGroupBy.describe,../reference/api/pandas.core.groupby.DataFrameGroupBy.describe +generated/pandas.core.groupby.DataFrameGroupBy.diff,../reference/api/pandas.core.groupby.DataFrameGroupBy.diff +generated/pandas.core.groupby.DataFrameGroupBy.ffill,../reference/api/pandas.core.groupby.DataFrameGroupBy.ffill +generated/pandas.core.groupby.DataFrameGroupBy.fillna,../reference/api/pandas.core.groupby.DataFrameGroupBy.fillna +generated/pandas.core.groupby.DataFrameGroupBy.filter,../reference/api/pandas.core.groupby.DataFrameGroupBy.filter +generated/pandas.core.groupby.DataFrameGroupBy.hist,../reference/api/pandas.core.groupby.DataFrameGroupBy.hist +generated/pandas.core.groupby.DataFrameGroupBy.idxmax,../reference/api/pandas.core.groupby.DataFrameGroupBy.idxmax +generated/pandas.core.groupby.DataFrameGroupBy.idxmin,../reference/api/pandas.core.groupby.DataFrameGroupBy.idxmin +generated/pandas.core.groupby.DataFrameGroupBy.mad,../reference/api/pandas.core.groupby.DataFrameGroupBy.mad +generated/pandas.core.groupby.DataFrameGroupBy.pct_change,../reference/api/pandas.core.groupby.DataFrameGroupBy.pct_change +generated/pandas.core.groupby.DataFrameGroupBy.plot,../reference/api/pandas.core.groupby.DataFrameGroupBy.plot +generated/pandas.core.groupby.DataFrameGroupBy.quantile,../reference/api/pandas.core.groupby.DataFrameGroupBy.quantile +generated/pandas.core.groupby.DataFrameGroupBy.rank,../reference/api/pandas.core.groupby.DataFrameGroupBy.rank +generated/pandas.core.groupby.DataFrameGroupBy.resample,../reference/api/pandas.core.groupby.DataFrameGroupBy.resample +generated/pandas.core.groupby.DataFrameGroupBy.shift,../reference/api/pandas.core.groupby.DataFrameGroupBy.shift +generated/pandas.core.groupby.DataFrameGroupBy.size,../reference/api/pandas.core.groupby.DataFrameGroupBy.size +generated/pandas.core.groupby.DataFrameGroupBy.skew,../reference/api/pandas.core.groupby.DataFrameGroupBy.skew +generated/pandas.core.groupby.DataFrameGroupBy.take,../reference/api/pandas.core.groupby.DataFrameGroupBy.take +generated/pandas.core.groupby.DataFrameGroupBy.tshift,../reference/api/pandas.core.groupby.DataFrameGroupBy.tshift +generated/pandas.core.groupby.GroupBy.agg,../reference/api/pandas.core.groupby.GroupBy.agg +generated/pandas.core.groupby.GroupBy.aggregate,../reference/api/pandas.core.groupby.GroupBy.aggregate +generated/pandas.core.groupby.GroupBy.all,../reference/api/pandas.core.groupby.GroupBy.all +generated/pandas.core.groupby.GroupBy.any,../reference/api/pandas.core.groupby.GroupBy.any +generated/pandas.core.groupby.GroupBy.apply,../reference/api/pandas.core.groupby.GroupBy.apply +generated/pandas.core.groupby.GroupBy.bfill,../reference/api/pandas.core.groupby.GroupBy.bfill +generated/pandas.core.groupby.GroupBy.count,../reference/api/pandas.core.groupby.GroupBy.count +generated/pandas.core.groupby.GroupBy.cumcount,../reference/api/pandas.core.groupby.GroupBy.cumcount +generated/pandas.core.groupby.GroupBy.ffill,../reference/api/pandas.core.groupby.GroupBy.ffill +generated/pandas.core.groupby.GroupBy.first,../reference/api/pandas.core.groupby.GroupBy.first +generated/pandas.core.groupby.GroupBy.get_group,../reference/api/pandas.core.groupby.GroupBy.get_group +generated/pandas.core.groupby.GroupBy.groups,../reference/api/pandas.core.groupby.GroupBy.groups +generated/pandas.core.groupby.GroupBy.head,../reference/api/pandas.core.groupby.GroupBy.head +generated/pandas.core.groupby.GroupBy.indices,../reference/api/pandas.core.groupby.GroupBy.indices +generated/pandas.core.groupby.GroupBy.__iter__,../reference/api/pandas.core.groupby.GroupBy.__iter__ +generated/pandas.core.groupby.GroupBy.last,../reference/api/pandas.core.groupby.GroupBy.last +generated/pandas.core.groupby.GroupBy.max,../reference/api/pandas.core.groupby.GroupBy.max +generated/pandas.core.groupby.GroupBy.mean,../reference/api/pandas.core.groupby.GroupBy.mean +generated/pandas.core.groupby.GroupBy.median,../reference/api/pandas.core.groupby.GroupBy.median +generated/pandas.core.groupby.GroupBy.min,../reference/api/pandas.core.groupby.GroupBy.min +generated/pandas.core.groupby.GroupBy.ngroup,../reference/api/pandas.core.groupby.GroupBy.ngroup +generated/pandas.core.groupby.GroupBy.nth,../reference/api/pandas.core.groupby.GroupBy.nth +generated/pandas.core.groupby.GroupBy.ohlc,../reference/api/pandas.core.groupby.GroupBy.ohlc +generated/pandas.core.groupby.GroupBy.pct_change,../reference/api/pandas.core.groupby.GroupBy.pct_change +generated/pandas.core.groupby.GroupBy.pipe,../reference/api/pandas.core.groupby.GroupBy.pipe +generated/pandas.core.groupby.GroupBy.prod,../reference/api/pandas.core.groupby.GroupBy.prod +generated/pandas.core.groupby.GroupBy.rank,../reference/api/pandas.core.groupby.GroupBy.rank +generated/pandas.core.groupby.GroupBy.sem,../reference/api/pandas.core.groupby.GroupBy.sem +generated/pandas.core.groupby.GroupBy.size,../reference/api/pandas.core.groupby.GroupBy.size +generated/pandas.core.groupby.GroupBy.std,../reference/api/pandas.core.groupby.GroupBy.std +generated/pandas.core.groupby.GroupBy.sum,../reference/api/pandas.core.groupby.GroupBy.sum +generated/pandas.core.groupby.GroupBy.tail,../reference/api/pandas.core.groupby.GroupBy.tail +generated/pandas.core.groupby.GroupBy.transform,../reference/api/pandas.core.groupby.GroupBy.transform +generated/pandas.core.groupby.GroupBy.var,../reference/api/pandas.core.groupby.GroupBy.var +generated/pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing,../reference/api/pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing +generated/pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing,../reference/api/pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing +generated/pandas.core.groupby.SeriesGroupBy.nlargest,../reference/api/pandas.core.groupby.SeriesGroupBy.nlargest +generated/pandas.core.groupby.SeriesGroupBy.nsmallest,../reference/api/pandas.core.groupby.SeriesGroupBy.nsmallest +generated/pandas.core.groupby.SeriesGroupBy.nunique,../reference/api/pandas.core.groupby.SeriesGroupBy.nunique +generated/pandas.core.groupby.SeriesGroupBy.unique,../reference/api/pandas.core.groupby.SeriesGroupBy.unique +generated/pandas.core.groupby.SeriesGroupBy.value_counts,../reference/api/pandas.core.groupby.SeriesGroupBy.value_counts +generated/pandas.core.resample.Resampler.aggregate,../reference/api/pandas.core.resample.Resampler.aggregate +generated/pandas.core.resample.Resampler.apply,../reference/api/pandas.core.resample.Resampler.apply +generated/pandas.core.resample.Resampler.asfreq,../reference/api/pandas.core.resample.Resampler.asfreq +generated/pandas.core.resample.Resampler.backfill,../reference/api/pandas.core.resample.Resampler.backfill +generated/pandas.core.resample.Resampler.bfill,../reference/api/pandas.core.resample.Resampler.bfill +generated/pandas.core.resample.Resampler.count,../reference/api/pandas.core.resample.Resampler.count +generated/pandas.core.resample.Resampler.ffill,../reference/api/pandas.core.resample.Resampler.ffill +generated/pandas.core.resample.Resampler.fillna,../reference/api/pandas.core.resample.Resampler.fillna +generated/pandas.core.resample.Resampler.first,../reference/api/pandas.core.resample.Resampler.first +generated/pandas.core.resample.Resampler.get_group,../reference/api/pandas.core.resample.Resampler.get_group +generated/pandas.core.resample.Resampler.groups,../reference/api/pandas.core.resample.Resampler.groups +generated/pandas.core.resample.Resampler.indices,../reference/api/pandas.core.resample.Resampler.indices +generated/pandas.core.resample.Resampler.interpolate,../reference/api/pandas.core.resample.Resampler.interpolate +generated/pandas.core.resample.Resampler.__iter__,../reference/api/pandas.core.resample.Resampler.__iter__ +generated/pandas.core.resample.Resampler.last,../reference/api/pandas.core.resample.Resampler.last +generated/pandas.core.resample.Resampler.max,../reference/api/pandas.core.resample.Resampler.max +generated/pandas.core.resample.Resampler.mean,../reference/api/pandas.core.resample.Resampler.mean +generated/pandas.core.resample.Resampler.median,../reference/api/pandas.core.resample.Resampler.median +generated/pandas.core.resample.Resampler.min,../reference/api/pandas.core.resample.Resampler.min +generated/pandas.core.resample.Resampler.nearest,../reference/api/pandas.core.resample.Resampler.nearest +generated/pandas.core.resample.Resampler.nunique,../reference/api/pandas.core.resample.Resampler.nunique +generated/pandas.core.resample.Resampler.ohlc,../reference/api/pandas.core.resample.Resampler.ohlc +generated/pandas.core.resample.Resampler.pad,../reference/api/pandas.core.resample.Resampler.pad +generated/pandas.core.resample.Resampler.pipe,../reference/api/pandas.core.resample.Resampler.pipe +generated/pandas.core.resample.Resampler.prod,../reference/api/pandas.core.resample.Resampler.prod +generated/pandas.core.resample.Resampler.quantile,../reference/api/pandas.core.resample.Resampler.quantile +generated/pandas.core.resample.Resampler.sem,../reference/api/pandas.core.resample.Resampler.sem +generated/pandas.core.resample.Resampler.size,../reference/api/pandas.core.resample.Resampler.size +generated/pandas.core.resample.Resampler.std,../reference/api/pandas.core.resample.Resampler.std +generated/pandas.core.resample.Resampler.sum,../reference/api/pandas.core.resample.Resampler.sum +generated/pandas.core.resample.Resampler.transform,../reference/api/pandas.core.resample.Resampler.transform +generated/pandas.core.resample.Resampler.var,../reference/api/pandas.core.resample.Resampler.var +generated/pandas.core.window.EWM.corr,../reference/api/pandas.core.window.EWM.corr +generated/pandas.core.window.EWM.cov,../reference/api/pandas.core.window.EWM.cov +generated/pandas.core.window.EWM.mean,../reference/api/pandas.core.window.EWM.mean +generated/pandas.core.window.EWM.std,../reference/api/pandas.core.window.EWM.std +generated/pandas.core.window.EWM.var,../reference/api/pandas.core.window.EWM.var +generated/pandas.core.window.Expanding.aggregate,../reference/api/pandas.core.window.Expanding.aggregate +generated/pandas.core.window.Expanding.apply,../reference/api/pandas.core.window.Expanding.apply +generated/pandas.core.window.Expanding.corr,../reference/api/pandas.core.window.Expanding.corr +generated/pandas.core.window.Expanding.count,../reference/api/pandas.core.window.Expanding.count +generated/pandas.core.window.Expanding.cov,../reference/api/pandas.core.window.Expanding.cov +generated/pandas.core.window.Expanding.kurt,../reference/api/pandas.core.window.Expanding.kurt +generated/pandas.core.window.Expanding.max,../reference/api/pandas.core.window.Expanding.max +generated/pandas.core.window.Expanding.mean,../reference/api/pandas.core.window.Expanding.mean +generated/pandas.core.window.Expanding.median,../reference/api/pandas.core.window.Expanding.median +generated/pandas.core.window.Expanding.min,../reference/api/pandas.core.window.Expanding.min +generated/pandas.core.window.Expanding.quantile,../reference/api/pandas.core.window.Expanding.quantile +generated/pandas.core.window.Expanding.skew,../reference/api/pandas.core.window.Expanding.skew +generated/pandas.core.window.Expanding.std,../reference/api/pandas.core.window.Expanding.std +generated/pandas.core.window.Expanding.sum,../reference/api/pandas.core.window.Expanding.sum +generated/pandas.core.window.Expanding.var,../reference/api/pandas.core.window.Expanding.var +generated/pandas.core.window.Rolling.aggregate,../reference/api/pandas.core.window.rolling.Rolling.aggregate +generated/pandas.core.window.Rolling.apply,../reference/api/pandas.core.window.rolling.Rolling.apply +generated/pandas.core.window.Rolling.corr,../reference/api/pandas.core.window.rolling.Rolling.corr +generated/pandas.core.window.Rolling.count,../reference/api/pandas.core.window.rolling.Rolling.count +generated/pandas.core.window.Rolling.cov,../reference/api/pandas.core.window.rolling.Rolling.cov +generated/pandas.core.window.Rolling.kurt,../reference/api/pandas.core.window.rolling.Rolling.kurt +generated/pandas.core.window.Rolling.max,../reference/api/pandas.core.window.rolling.Rolling.max +generated/pandas.core.window.Rolling.mean,../reference/api/pandas.core.window.rolling.Rolling.mean +generated/pandas.core.window.Rolling.median,../reference/api/pandas.core.window.rolling.Rolling.median +generated/pandas.core.window.Rolling.min,../reference/api/pandas.core.window.rolling.Rolling.min +generated/pandas.core.window.Rolling.quantile,../reference/api/pandas.core.window.rolling.Rolling.quantile +generated/pandas.core.window.Rolling.skew,../reference/api/pandas.core.window.rolling.Rolling.skew +generated/pandas.core.window.Rolling.std,../reference/api/pandas.core.window.rolling.Rolling.std +generated/pandas.core.window.Rolling.sum,../reference/api/pandas.core.window.rolling.Rolling.sum +generated/pandas.core.window.Rolling.var,../reference/api/pandas.core.window.rolling.Rolling.var +generated/pandas.core.window.Window.mean,../reference/api/pandas.core.window.Window.mean +generated/pandas.core.window.Window.sum,../reference/api/pandas.core.window.Window.sum +generated/pandas.crosstab,../reference/api/pandas.crosstab +generated/pandas.cut,../reference/api/pandas.cut +generated/pandas.DataFrame.abs,../reference/api/pandas.DataFrame.abs +generated/pandas.DataFrame.add,../reference/api/pandas.DataFrame.add +generated/pandas.DataFrame.add_prefix,../reference/api/pandas.DataFrame.add_prefix +generated/pandas.DataFrame.add_suffix,../reference/api/pandas.DataFrame.add_suffix +generated/pandas.DataFrame.agg,../reference/api/pandas.DataFrame.agg +generated/pandas.DataFrame.aggregate,../reference/api/pandas.DataFrame.aggregate +generated/pandas.DataFrame.align,../reference/api/pandas.DataFrame.align +generated/pandas.DataFrame.all,../reference/api/pandas.DataFrame.all +generated/pandas.DataFrame.any,../reference/api/pandas.DataFrame.any +generated/pandas.DataFrame.append,../reference/api/pandas.DataFrame.append +generated/pandas.DataFrame.apply,../reference/api/pandas.DataFrame.apply +generated/pandas.DataFrame.applymap,../reference/api/pandas.DataFrame.applymap +generated/pandas.DataFrame.as_blocks,../reference/api/pandas.DataFrame.as_blocks +generated/pandas.DataFrame.asfreq,../reference/api/pandas.DataFrame.asfreq +generated/pandas.DataFrame.as_matrix,../reference/api/pandas.DataFrame.as_matrix +generated/pandas.DataFrame.asof,../reference/api/pandas.DataFrame.asof +generated/pandas.DataFrame.assign,../reference/api/pandas.DataFrame.assign +generated/pandas.DataFrame.astype,../reference/api/pandas.DataFrame.astype +generated/pandas.DataFrame.at,../reference/api/pandas.DataFrame.at +generated/pandas.DataFrame.at_time,../reference/api/pandas.DataFrame.at_time +generated/pandas.DataFrame.axes,../reference/api/pandas.DataFrame.axes +generated/pandas.DataFrame.between_time,../reference/api/pandas.DataFrame.between_time +generated/pandas.DataFrame.bfill,../reference/api/pandas.DataFrame.bfill +generated/pandas.DataFrame.blocks,../reference/api/pandas.DataFrame.blocks +generated/pandas.DataFrame.bool,../reference/api/pandas.DataFrame.bool +generated/pandas.DataFrame.boxplot,../reference/api/pandas.DataFrame.boxplot +generated/pandas.DataFrame.clip,../reference/api/pandas.DataFrame.clip +generated/pandas.DataFrame.clip_lower,../reference/api/pandas.DataFrame.clip_lower +generated/pandas.DataFrame.clip_upper,../reference/api/pandas.DataFrame.clip_upper +generated/pandas.DataFrame.columns,../reference/api/pandas.DataFrame.columns +generated/pandas.DataFrame.combine_first,../reference/api/pandas.DataFrame.combine_first +generated/pandas.DataFrame.combine,../reference/api/pandas.DataFrame.combine +generated/pandas.DataFrame.convert_objects,../reference/api/pandas.DataFrame.convert_objects +generated/pandas.DataFrame.copy,../reference/api/pandas.DataFrame.copy +generated/pandas.DataFrame.corr,../reference/api/pandas.DataFrame.corr +generated/pandas.DataFrame.corrwith,../reference/api/pandas.DataFrame.corrwith +generated/pandas.DataFrame.count,../reference/api/pandas.DataFrame.count +generated/pandas.DataFrame.cov,../reference/api/pandas.DataFrame.cov +generated/pandas.DataFrame.cummax,../reference/api/pandas.DataFrame.cummax +generated/pandas.DataFrame.cummin,../reference/api/pandas.DataFrame.cummin +generated/pandas.DataFrame.cumprod,../reference/api/pandas.DataFrame.cumprod +generated/pandas.DataFrame.cumsum,../reference/api/pandas.DataFrame.cumsum +generated/pandas.DataFrame.describe,../reference/api/pandas.DataFrame.describe +generated/pandas.DataFrame.diff,../reference/api/pandas.DataFrame.diff +generated/pandas.DataFrame.div,../reference/api/pandas.DataFrame.div +generated/pandas.DataFrame.divide,../reference/api/pandas.DataFrame.divide +generated/pandas.DataFrame.dot,../reference/api/pandas.DataFrame.dot +generated/pandas.DataFrame.drop_duplicates,../reference/api/pandas.DataFrame.drop_duplicates +generated/pandas.DataFrame.drop,../reference/api/pandas.DataFrame.drop +generated/pandas.DataFrame.droplevel,../reference/api/pandas.DataFrame.droplevel +generated/pandas.DataFrame.dropna,../reference/api/pandas.DataFrame.dropna +generated/pandas.DataFrame.dtypes,../reference/api/pandas.DataFrame.dtypes +generated/pandas.DataFrame.duplicated,../reference/api/pandas.DataFrame.duplicated +generated/pandas.DataFrame.empty,../reference/api/pandas.DataFrame.empty +generated/pandas.DataFrame.eq,../reference/api/pandas.DataFrame.eq +generated/pandas.DataFrame.equals,../reference/api/pandas.DataFrame.equals +generated/pandas.DataFrame.eval,../reference/api/pandas.DataFrame.eval +generated/pandas.DataFrame.ewm,../reference/api/pandas.DataFrame.ewm +generated/pandas.DataFrame.expanding,../reference/api/pandas.DataFrame.expanding +generated/pandas.DataFrame.ffill,../reference/api/pandas.DataFrame.ffill +generated/pandas.DataFrame.fillna,../reference/api/pandas.DataFrame.fillna +generated/pandas.DataFrame.filter,../reference/api/pandas.DataFrame.filter +generated/pandas.DataFrame.first,../reference/api/pandas.DataFrame.first +generated/pandas.DataFrame.first_valid_index,../reference/api/pandas.DataFrame.first_valid_index +generated/pandas.DataFrame.floordiv,../reference/api/pandas.DataFrame.floordiv +generated/pandas.DataFrame.from_csv,../reference/api/pandas.DataFrame.from_csv +generated/pandas.DataFrame.from_dict,../reference/api/pandas.DataFrame.from_dict +generated/pandas.DataFrame.from_items,../reference/api/pandas.DataFrame.from_items +generated/pandas.DataFrame.from_records,../reference/api/pandas.DataFrame.from_records +generated/pandas.DataFrame.ge,../reference/api/pandas.DataFrame.ge +generated/pandas.DataFrame.get,../reference/api/pandas.DataFrame.get +generated/pandas.DataFrame.get_value,../reference/api/pandas.DataFrame.get_value +generated/pandas.DataFrame.groupby,../reference/api/pandas.DataFrame.groupby +generated/pandas.DataFrame.gt,../reference/api/pandas.DataFrame.gt +generated/pandas.DataFrame.head,../reference/api/pandas.DataFrame.head +generated/pandas.DataFrame.hist,../reference/api/pandas.DataFrame.hist +generated/pandas.DataFrame,../reference/api/pandas.DataFrame +generated/pandas.DataFrame.iat,../reference/api/pandas.DataFrame.iat +generated/pandas.DataFrame.idxmax,../reference/api/pandas.DataFrame.idxmax +generated/pandas.DataFrame.idxmin,../reference/api/pandas.DataFrame.idxmin +generated/pandas.DataFrame.iloc,../reference/api/pandas.DataFrame.iloc +generated/pandas.DataFrame.index,../reference/api/pandas.DataFrame.index +generated/pandas.DataFrame.infer_objects,../reference/api/pandas.DataFrame.infer_objects +generated/pandas.DataFrame.info,../reference/api/pandas.DataFrame.info +generated/pandas.DataFrame.insert,../reference/api/pandas.DataFrame.insert +generated/pandas.DataFrame.interpolate,../reference/api/pandas.DataFrame.interpolate +generated/pandas.DataFrame.is_copy,../reference/api/pandas.DataFrame.is_copy +generated/pandas.DataFrame.isin,../reference/api/pandas.DataFrame.isin +generated/pandas.DataFrame.isna,../reference/api/pandas.DataFrame.isna +generated/pandas.DataFrame.isnull,../reference/api/pandas.DataFrame.isnull +generated/pandas.DataFrame.items,../reference/api/pandas.DataFrame.items +generated/pandas.DataFrame.__iter__,../reference/api/pandas.DataFrame.__iter__ +generated/pandas.DataFrame.iteritems,../reference/api/pandas.DataFrame.iteritems +generated/pandas.DataFrame.iterrows,../reference/api/pandas.DataFrame.iterrows +generated/pandas.DataFrame.itertuples,../reference/api/pandas.DataFrame.itertuples +generated/pandas.DataFrame.ix,../reference/api/pandas.DataFrame.ix +generated/pandas.DataFrame.join,../reference/api/pandas.DataFrame.join +generated/pandas.DataFrame.keys,../reference/api/pandas.DataFrame.keys +generated/pandas.DataFrame.kurt,../reference/api/pandas.DataFrame.kurt +generated/pandas.DataFrame.kurtosis,../reference/api/pandas.DataFrame.kurtosis +generated/pandas.DataFrame.last,../reference/api/pandas.DataFrame.last +generated/pandas.DataFrame.last_valid_index,../reference/api/pandas.DataFrame.last_valid_index +generated/pandas.DataFrame.le,../reference/api/pandas.DataFrame.le +generated/pandas.DataFrame.loc,../reference/api/pandas.DataFrame.loc +generated/pandas.DataFrame.lookup,../reference/api/pandas.DataFrame.lookup +generated/pandas.DataFrame.lt,../reference/api/pandas.DataFrame.lt +generated/pandas.DataFrame.mad,../reference/api/pandas.DataFrame.mad +generated/pandas.DataFrame.mask,../reference/api/pandas.DataFrame.mask +generated/pandas.DataFrame.max,../reference/api/pandas.DataFrame.max +generated/pandas.DataFrame.mean,../reference/api/pandas.DataFrame.mean +generated/pandas.DataFrame.median,../reference/api/pandas.DataFrame.median +generated/pandas.DataFrame.melt,../reference/api/pandas.DataFrame.melt +generated/pandas.DataFrame.memory_usage,../reference/api/pandas.DataFrame.memory_usage +generated/pandas.DataFrame.merge,../reference/api/pandas.DataFrame.merge +generated/pandas.DataFrame.min,../reference/api/pandas.DataFrame.min +generated/pandas.DataFrame.mode,../reference/api/pandas.DataFrame.mode +generated/pandas.DataFrame.mod,../reference/api/pandas.DataFrame.mod +generated/pandas.DataFrame.mul,../reference/api/pandas.DataFrame.mul +generated/pandas.DataFrame.multiply,../reference/api/pandas.DataFrame.multiply +generated/pandas.DataFrame.ndim,../reference/api/pandas.DataFrame.ndim +generated/pandas.DataFrame.ne,../reference/api/pandas.DataFrame.ne +generated/pandas.DataFrame.nlargest,../reference/api/pandas.DataFrame.nlargest +generated/pandas.DataFrame.notna,../reference/api/pandas.DataFrame.notna +generated/pandas.DataFrame.notnull,../reference/api/pandas.DataFrame.notnull +generated/pandas.DataFrame.nsmallest,../reference/api/pandas.DataFrame.nsmallest +generated/pandas.DataFrame.nunique,../reference/api/pandas.DataFrame.nunique +generated/pandas.DataFrame.pct_change,../reference/api/pandas.DataFrame.pct_change +generated/pandas.DataFrame.pipe,../reference/api/pandas.DataFrame.pipe +generated/pandas.DataFrame.pivot,../reference/api/pandas.DataFrame.pivot +generated/pandas.DataFrame.pivot_table,../reference/api/pandas.DataFrame.pivot_table +generated/pandas.DataFrame.plot.barh,../reference/api/pandas.DataFrame.plot.barh +generated/pandas.DataFrame.plot.bar,../reference/api/pandas.DataFrame.plot.bar +generated/pandas.DataFrame.plot.box,../reference/api/pandas.DataFrame.plot.box +generated/pandas.DataFrame.plot.density,../reference/api/pandas.DataFrame.plot.density +generated/pandas.DataFrame.plot.hexbin,../reference/api/pandas.DataFrame.plot.hexbin +generated/pandas.DataFrame.plot.hist,../reference/api/pandas.DataFrame.plot.hist +generated/pandas.DataFrame.plot,../reference/api/pandas.DataFrame.plot +generated/pandas.DataFrame.plot.kde,../reference/api/pandas.DataFrame.plot.kde +generated/pandas.DataFrame.plot.line,../reference/api/pandas.DataFrame.plot.line +generated/pandas.DataFrame.plot.pie,../reference/api/pandas.DataFrame.plot.pie +generated/pandas.DataFrame.plot.scatter,../reference/api/pandas.DataFrame.plot.scatter +generated/pandas.DataFrame.pop,../reference/api/pandas.DataFrame.pop +generated/pandas.DataFrame.pow,../reference/api/pandas.DataFrame.pow +generated/pandas.DataFrame.prod,../reference/api/pandas.DataFrame.prod +generated/pandas.DataFrame.product,../reference/api/pandas.DataFrame.product +generated/pandas.DataFrame.quantile,../reference/api/pandas.DataFrame.quantile +generated/pandas.DataFrame.query,../reference/api/pandas.DataFrame.query +generated/pandas.DataFrame.radd,../reference/api/pandas.DataFrame.radd +generated/pandas.DataFrame.rank,../reference/api/pandas.DataFrame.rank +generated/pandas.DataFrame.rdiv,../reference/api/pandas.DataFrame.rdiv +generated/pandas.DataFrame.reindex_axis,../reference/api/pandas.DataFrame.reindex_axis +generated/pandas.DataFrame.reindex,../reference/api/pandas.DataFrame.reindex +generated/pandas.DataFrame.reindex_like,../reference/api/pandas.DataFrame.reindex_like +generated/pandas.DataFrame.rename_axis,../reference/api/pandas.DataFrame.rename_axis +generated/pandas.DataFrame.rename,../reference/api/pandas.DataFrame.rename +generated/pandas.DataFrame.reorder_levels,../reference/api/pandas.DataFrame.reorder_levels +generated/pandas.DataFrame.replace,../reference/api/pandas.DataFrame.replace +generated/pandas.DataFrame.resample,../reference/api/pandas.DataFrame.resample +generated/pandas.DataFrame.reset_index,../reference/api/pandas.DataFrame.reset_index +generated/pandas.DataFrame.rfloordiv,../reference/api/pandas.DataFrame.rfloordiv +generated/pandas.DataFrame.rmod,../reference/api/pandas.DataFrame.rmod +generated/pandas.DataFrame.rmul,../reference/api/pandas.DataFrame.rmul +generated/pandas.DataFrame.rolling,../reference/api/pandas.DataFrame.rolling +generated/pandas.DataFrame.round,../reference/api/pandas.DataFrame.round +generated/pandas.DataFrame.rpow,../reference/api/pandas.DataFrame.rpow +generated/pandas.DataFrame.rsub,../reference/api/pandas.DataFrame.rsub +generated/pandas.DataFrame.rtruediv,../reference/api/pandas.DataFrame.rtruediv +generated/pandas.DataFrame.sample,../reference/api/pandas.DataFrame.sample +generated/pandas.DataFrame.select_dtypes,../reference/api/pandas.DataFrame.select_dtypes +generated/pandas.DataFrame.select,../reference/api/pandas.DataFrame.select +generated/pandas.DataFrame.sem,../reference/api/pandas.DataFrame.sem +generated/pandas.DataFrame.set_axis,../reference/api/pandas.DataFrame.set_axis +generated/pandas.DataFrame.set_index,../reference/api/pandas.DataFrame.set_index +generated/pandas.DataFrame.set_value,../reference/api/pandas.DataFrame.set_value +generated/pandas.DataFrame.shape,../reference/api/pandas.DataFrame.shape +generated/pandas.DataFrame.shift,../reference/api/pandas.DataFrame.shift +generated/pandas.DataFrame.size,../reference/api/pandas.DataFrame.size +generated/pandas.DataFrame.skew,../reference/api/pandas.DataFrame.skew +generated/pandas.DataFrame.slice_shift,../reference/api/pandas.DataFrame.slice_shift +generated/pandas.DataFrame.sort_index,../reference/api/pandas.DataFrame.sort_index +generated/pandas.DataFrame.sort_values,../reference/api/pandas.DataFrame.sort_values +generated/pandas.DataFrame.squeeze,../reference/api/pandas.DataFrame.squeeze +generated/pandas.DataFrame.stack,../reference/api/pandas.DataFrame.stack +generated/pandas.DataFrame.std,../reference/api/pandas.DataFrame.std +generated/pandas.DataFrame.style,../reference/api/pandas.DataFrame.style +generated/pandas.DataFrame.sub,../reference/api/pandas.DataFrame.sub +generated/pandas.DataFrame.subtract,../reference/api/pandas.DataFrame.subtract +generated/pandas.DataFrame.sum,../reference/api/pandas.DataFrame.sum +generated/pandas.DataFrame.swapaxes,../reference/api/pandas.DataFrame.swapaxes +generated/pandas.DataFrame.swaplevel,../reference/api/pandas.DataFrame.swaplevel +generated/pandas.DataFrame.tail,../reference/api/pandas.DataFrame.tail +generated/pandas.DataFrame.take,../reference/api/pandas.DataFrame.take +generated/pandas.DataFrame.T,../reference/api/pandas.DataFrame.T +generated/pandas.DataFrame.timetuple,../reference/api/pandas.DataFrame.timetuple +generated/pandas.DataFrame.to_clipboard,../reference/api/pandas.DataFrame.to_clipboard +generated/pandas.DataFrame.to_csv,../reference/api/pandas.DataFrame.to_csv +generated/pandas.DataFrame.to_dict,../reference/api/pandas.DataFrame.to_dict +generated/pandas.DataFrame.to_excel,../reference/api/pandas.DataFrame.to_excel +generated/pandas.DataFrame.to_feather,../reference/api/pandas.DataFrame.to_feather +generated/pandas.DataFrame.to_gbq,../reference/api/pandas.DataFrame.to_gbq +generated/pandas.DataFrame.to_hdf,../reference/api/pandas.DataFrame.to_hdf +generated/pandas.DataFrame.to,../reference/api/pandas.DataFrame.to +generated/pandas.DataFrame.to_json,../reference/api/pandas.DataFrame.to_json +generated/pandas.DataFrame.to_latex,../reference/api/pandas.DataFrame.to_latex +generated/pandas.DataFrame.to_numpy,../reference/api/pandas.DataFrame.to_numpy +generated/pandas.DataFrame.to_panel,../reference/api/pandas.DataFrame.to_panel +generated/pandas.DataFrame.to_parquet,../reference/api/pandas.DataFrame.to_parquet +generated/pandas.DataFrame.to_period,../reference/api/pandas.DataFrame.to_period +generated/pandas.DataFrame.to_pickle,../reference/api/pandas.DataFrame.to_pickle +generated/pandas.DataFrame.to_records,../reference/api/pandas.DataFrame.to_records +generated/pandas.DataFrame.to_sql,../reference/api/pandas.DataFrame.to_sql +generated/pandas.DataFrame.to_stata,../reference/api/pandas.DataFrame.to_stata +generated/pandas.DataFrame.to_string,../reference/api/pandas.DataFrame.to_string +generated/pandas.DataFrame.to_timestamp,../reference/api/pandas.DataFrame.to_timestamp +generated/pandas.DataFrame.to_xarray,../reference/api/pandas.DataFrame.to_xarray +generated/pandas.DataFrame.transform,../reference/api/pandas.DataFrame.transform +generated/pandas.DataFrame.transpose,../reference/api/pandas.DataFrame.transpose +generated/pandas.DataFrame.truediv,../reference/api/pandas.DataFrame.truediv +generated/pandas.DataFrame.truncate,../reference/api/pandas.DataFrame.truncate +generated/pandas.DataFrame.tshift,../reference/api/pandas.DataFrame.tshift +generated/pandas.DataFrame.tz_convert,../reference/api/pandas.DataFrame.tz_convert +generated/pandas.DataFrame.tz_localize,../reference/api/pandas.DataFrame.tz_localize +generated/pandas.DataFrame.unstack,../reference/api/pandas.DataFrame.unstack +generated/pandas.DataFrame.update,../reference/api/pandas.DataFrame.update +generated/pandas.DataFrame.values,../reference/api/pandas.DataFrame.values +generated/pandas.DataFrame.var,../reference/api/pandas.DataFrame.var +generated/pandas.DataFrame.where,../reference/api/pandas.DataFrame.where +generated/pandas.DataFrame.xs,../reference/api/pandas.DataFrame.xs +generated/pandas.date_range,../reference/api/pandas.date_range +generated/pandas.DatetimeIndex.ceil,../reference/api/pandas.DatetimeIndex.ceil +generated/pandas.DatetimeIndex.date,../reference/api/pandas.DatetimeIndex.date +generated/pandas.DatetimeIndex.day,../reference/api/pandas.DatetimeIndex.day +generated/pandas.DatetimeIndex.day_name,../reference/api/pandas.DatetimeIndex.day_name +generated/pandas.DatetimeIndex.dayofweek,../reference/api/pandas.DatetimeIndex.dayofweek +generated/pandas.DatetimeIndex.dayofyear,../reference/api/pandas.DatetimeIndex.dayofyear +generated/pandas.DatetimeIndex.floor,../reference/api/pandas.DatetimeIndex.floor +generated/pandas.DatetimeIndex.freq,../reference/api/pandas.DatetimeIndex.freq +generated/pandas.DatetimeIndex.freqstr,../reference/api/pandas.DatetimeIndex.freqstr +generated/pandas.DatetimeIndex.hour,../reference/api/pandas.DatetimeIndex.hour +generated/pandas.DatetimeIndex,../reference/api/pandas.DatetimeIndex +generated/pandas.DatetimeIndex.indexer_at_time,../reference/api/pandas.DatetimeIndex.indexer_at_time +generated/pandas.DatetimeIndex.indexer_between_time,../reference/api/pandas.DatetimeIndex.indexer_between_time +generated/pandas.DatetimeIndex.inferred_freq,../reference/api/pandas.DatetimeIndex.inferred_freq +generated/pandas.DatetimeIndex.is_leap_year,../reference/api/pandas.DatetimeIndex.is_leap_year +generated/pandas.DatetimeIndex.is_month_end,../reference/api/pandas.DatetimeIndex.is_month_end +generated/pandas.DatetimeIndex.is_month_start,../reference/api/pandas.DatetimeIndex.is_month_start +generated/pandas.DatetimeIndex.is_quarter_end,../reference/api/pandas.DatetimeIndex.is_quarter_end +generated/pandas.DatetimeIndex.is_quarter_start,../reference/api/pandas.DatetimeIndex.is_quarter_start +generated/pandas.DatetimeIndex.is_year_end,../reference/api/pandas.DatetimeIndex.is_year_end +generated/pandas.DatetimeIndex.is_year_start,../reference/api/pandas.DatetimeIndex.is_year_start +generated/pandas.DatetimeIndex.microsecond,../reference/api/pandas.DatetimeIndex.microsecond +generated/pandas.DatetimeIndex.minute,../reference/api/pandas.DatetimeIndex.minute +generated/pandas.DatetimeIndex.month,../reference/api/pandas.DatetimeIndex.month +generated/pandas.DatetimeIndex.month_name,../reference/api/pandas.DatetimeIndex.month_name +generated/pandas.DatetimeIndex.nanosecond,../reference/api/pandas.DatetimeIndex.nanosecond +generated/pandas.DatetimeIndex.normalize,../reference/api/pandas.DatetimeIndex.normalize +generated/pandas.DatetimeIndex.quarter,../reference/api/pandas.DatetimeIndex.quarter +generated/pandas.DatetimeIndex.round,../reference/api/pandas.DatetimeIndex.round +generated/pandas.DatetimeIndex.second,../reference/api/pandas.DatetimeIndex.second +generated/pandas.DatetimeIndex.snap,../reference/api/pandas.DatetimeIndex.snap +generated/pandas.DatetimeIndex.strftime,../reference/api/pandas.DatetimeIndex.strftime +generated/pandas.DatetimeIndex.time,../reference/api/pandas.DatetimeIndex.time +generated/pandas.DatetimeIndex.timetz,../reference/api/pandas.DatetimeIndex.timetz +generated/pandas.DatetimeIndex.to_frame,../reference/api/pandas.DatetimeIndex.to_frame +generated/pandas.DatetimeIndex.to_perioddelta,../reference/api/pandas.DatetimeIndex.to_perioddelta +generated/pandas.DatetimeIndex.to_period,../reference/api/pandas.DatetimeIndex.to_period +generated/pandas.DatetimeIndex.to_pydatetime,../reference/api/pandas.DatetimeIndex.to_pydatetime +generated/pandas.DatetimeIndex.to_series,../reference/api/pandas.DatetimeIndex.to_series +generated/pandas.DatetimeIndex.tz_convert,../reference/api/pandas.DatetimeIndex.tz_convert +generated/pandas.DatetimeIndex.tz,../reference/api/pandas.DatetimeIndex.tz +generated/pandas.DatetimeIndex.tz_localize,../reference/api/pandas.DatetimeIndex.tz_localize +generated/pandas.DatetimeIndex.weekday,../reference/api/pandas.DatetimeIndex.weekday +generated/pandas.DatetimeIndex.week,../reference/api/pandas.DatetimeIndex.week +generated/pandas.DatetimeIndex.weekofyear,../reference/api/pandas.DatetimeIndex.weekofyear +generated/pandas.DatetimeIndex.year,../reference/api/pandas.DatetimeIndex.year +generated/pandas.DatetimeTZDtype.base,../reference/api/pandas.DatetimeTZDtype.base +generated/pandas.DatetimeTZDtype.construct_array_type,../reference/api/pandas.DatetimeTZDtype.construct_array_type +generated/pandas.DatetimeTZDtype.construct_from_string,../reference/api/pandas.DatetimeTZDtype.construct_from_string +generated/pandas.DatetimeTZDtype,../reference/api/pandas.DatetimeTZDtype +generated/pandas.DatetimeTZDtype.isbuiltin,../reference/api/pandas.DatetimeTZDtype.isbuiltin +generated/pandas.DatetimeTZDtype.is_dtype,../reference/api/pandas.DatetimeTZDtype.is_dtype +generated/pandas.DatetimeTZDtype.isnative,../reference/api/pandas.DatetimeTZDtype.isnative +generated/pandas.DatetimeTZDtype.itemsize,../reference/api/pandas.DatetimeTZDtype.itemsize +generated/pandas.DatetimeTZDtype.kind,../reference/api/pandas.DatetimeTZDtype.kind +generated/pandas.DatetimeTZDtype.name,../reference/api/pandas.DatetimeTZDtype.name +generated/pandas.DatetimeTZDtype.names,../reference/api/pandas.DatetimeTZDtype.names +generated/pandas.DatetimeTZDtype.na_value,../reference/api/pandas.DatetimeTZDtype.na_value +generated/pandas.DatetimeTZDtype.num,../reference/api/pandas.DatetimeTZDtype.num +generated/pandas.DatetimeTZDtype.reset_cache,../reference/api/pandas.DatetimeTZDtype.reset_cache +generated/pandas.DatetimeTZDtype.shape,../reference/api/pandas.DatetimeTZDtype.shape +generated/pandas.DatetimeTZDtype.str,../reference/api/pandas.DatetimeTZDtype.str +generated/pandas.DatetimeTZDtype.subdtype,../reference/api/pandas.DatetimeTZDtype.subdtype +generated/pandas.DatetimeTZDtype.tz,../reference/api/pandas.DatetimeTZDtype.tz +generated/pandas.DatetimeTZDtype.unit,../reference/api/pandas.DatetimeTZDtype.unit +generated/pandas.describe_option,../reference/api/pandas.describe_option +generated/pandas.errors.DtypeWarning,../reference/api/pandas.errors.DtypeWarning +generated/pandas.errors.EmptyDataError,../reference/api/pandas.errors.EmptyDataError +generated/pandas.errors.OutOfBoundsDatetime,../reference/api/pandas.errors.OutOfBoundsDatetime +generated/pandas.errors.ParserError,../reference/api/pandas.errors.ParserError +generated/pandas.errors.ParserWarning,../reference/api/pandas.errors.ParserWarning +generated/pandas.errors.PerformanceWarning,../reference/api/pandas.errors.PerformanceWarning +generated/pandas.errors.UnsortedIndexError,../reference/api/pandas.errors.UnsortedIndexError +generated/pandas.errors.UnsupportedFunctionCall,../reference/api/pandas.errors.UnsupportedFunctionCall +generated/pandas.eval,../reference/api/pandas.eval +generated/pandas.ExcelFile.parse,../reference/api/pandas.ExcelFile.parse +generated/pandas.ExcelWriter,../reference/api/pandas.ExcelWriter +generated/pandas.factorize,../reference/api/pandas.factorize +generated/pandas.Float64Index,../reference/api/pandas.Float64Index +generated/pandas.get_dummies,../reference/api/pandas.get_dummies +generated/pandas.get_option,../reference/api/pandas.get_option +generated/pandas.Grouper,../reference/api/pandas.Grouper +generated/pandas.HDFStore.append,../reference/api/pandas.HDFStore.append +generated/pandas.HDFStore.get,../reference/api/pandas.HDFStore.get +generated/pandas.HDFStore.groups,../reference/api/pandas.HDFStore.groups +generated/pandas.HDFStore.info,../reference/api/pandas.HDFStore.info +generated/pandas.HDFStore.keys,../reference/api/pandas.HDFStore.keys +generated/pandas.HDFStore.put,../reference/api/pandas.HDFStore.put +generated/pandas.HDFStore.select,../reference/api/pandas.HDFStore.select +generated/pandas.HDFStore.walk,../reference/api/pandas.HDFStore.walk +generated/pandas.Index.all,../reference/api/pandas.Index.all +generated/pandas.Index.any,../reference/api/pandas.Index.any +generated/pandas.Index.append,../reference/api/pandas.Index.append +generated/pandas.Index.argmax,../reference/api/pandas.Index.argmax +generated/pandas.Index.argmin,../reference/api/pandas.Index.argmin +generated/pandas.Index.argsort,../reference/api/pandas.Index.argsort +generated/pandas.Index.array,../reference/api/pandas.Index.array +generated/pandas.Index.asi8,../reference/api/pandas.Index.asi8 +generated/pandas.Index.asof,../reference/api/pandas.Index.asof +generated/pandas.Index.asof_locs,../reference/api/pandas.Index.asof_locs +generated/pandas.Index.astype,../reference/api/pandas.Index.astype +generated/pandas.Index.copy,../reference/api/pandas.Index.copy +generated/pandas.Index.data,../reference/api/pandas.Index.data +generated/pandas.Index.delete,../reference/api/pandas.Index.delete +generated/pandas.Index.difference,../reference/api/pandas.Index.difference +generated/pandas.Index.drop_duplicates,../reference/api/pandas.Index.drop_duplicates +generated/pandas.Index.drop,../reference/api/pandas.Index.drop +generated/pandas.Index.droplevel,../reference/api/pandas.Index.droplevel +generated/pandas.Index.dropna,../reference/api/pandas.Index.dropna +generated/pandas.Index.dtype,../reference/api/pandas.Index.dtype +generated/pandas.Index.duplicated,../reference/api/pandas.Index.duplicated +generated/pandas.Index.empty,../reference/api/pandas.Index.empty +generated/pandas.Index.equals,../reference/api/pandas.Index.equals +generated/pandas.Index.factorize,../reference/api/pandas.Index.factorize +generated/pandas.Index.fillna,../reference/api/pandas.Index.fillna +generated/pandas.Index.format,../reference/api/pandas.Index.format +generated/pandas.Index.get_indexer_for,../reference/api/pandas.Index.get_indexer_for +generated/pandas.Index.get_indexer,../reference/api/pandas.Index.get_indexer +generated/pandas.Index.get_indexer_non_unique,../reference/api/pandas.Index.get_indexer_non_unique +generated/pandas.Index.get_level_values,../reference/api/pandas.Index.get_level_values +generated/pandas.Index.get_loc,../reference/api/pandas.Index.get_loc +generated/pandas.Index.get_slice_bound,../reference/api/pandas.Index.get_slice_bound +generated/pandas.Index.get_value,../reference/api/pandas.Index.get_value +generated/pandas.Index.groupby,../reference/api/pandas.Index.groupby +generated/pandas.Index.has_duplicates,../reference/api/pandas.Index.has_duplicates +generated/pandas.Index.hasnans,../reference/api/pandas.Index.hasnans +generated/pandas.Index.holds_integer,../reference/api/pandas.Index.holds_integer +generated/pandas.Index,../reference/api/pandas.Index +generated/pandas.Index.identical,../reference/api/pandas.Index.identical +generated/pandas.Index.inferred_type,../reference/api/pandas.Index.inferred_type +generated/pandas.Index.insert,../reference/api/pandas.Index.insert +generated/pandas.Index.intersection,../reference/api/pandas.Index.intersection +generated/pandas.Index.is_all_dates,../reference/api/pandas.Index.is_all_dates +generated/pandas.Index.is_boolean,../reference/api/pandas.Index.is_boolean +generated/pandas.Index.is_categorical,../reference/api/pandas.Index.is_categorical +generated/pandas.Index.is_floating,../reference/api/pandas.Index.is_floating +generated/pandas.Index.is_,../reference/api/pandas.Index.is_ +generated/pandas.Index.isin,../reference/api/pandas.Index.isin +generated/pandas.Index.is_integer,../reference/api/pandas.Index.is_integer +generated/pandas.Index.is_interval,../reference/api/pandas.Index.is_interval +generated/pandas.Index.is_lexsorted_for_tuple,../reference/api/pandas.Index.is_lexsorted_for_tuple +generated/pandas.Index.is_mixed,../reference/api/pandas.Index.is_mixed +generated/pandas.Index.is_monotonic_decreasing,../reference/api/pandas.Index.is_monotonic_decreasing +generated/pandas.Index.is_monotonic,../reference/api/pandas.Index.is_monotonic +generated/pandas.Index.is_monotonic_increasing,../reference/api/pandas.Index.is_monotonic_increasing +generated/pandas.Index.isna,../reference/api/pandas.Index.isna +generated/pandas.Index.isnull,../reference/api/pandas.Index.isnull +generated/pandas.Index.is_numeric,../reference/api/pandas.Index.is_numeric +generated/pandas.Index.is_object,../reference/api/pandas.Index.is_object +generated/pandas.Index.is_type_compatible,../reference/api/pandas.Index.is_type_compatible +generated/pandas.Index.is_unique,../reference/api/pandas.Index.is_unique +generated/pandas.Index.item,../reference/api/pandas.Index.item +generated/pandas.Index.join,../reference/api/pandas.Index.join +generated/pandas.Index.map,../reference/api/pandas.Index.map +generated/pandas.Index.max,../reference/api/pandas.Index.max +generated/pandas.Index.memory_usage,../reference/api/pandas.Index.memory_usage +generated/pandas.Index.min,../reference/api/pandas.Index.min +generated/pandas.Index.name,../reference/api/pandas.Index.name +generated/pandas.Index.names,../reference/api/pandas.Index.names +generated/pandas.Index.nbytes,../reference/api/pandas.Index.nbytes +generated/pandas.Index.ndim,../reference/api/pandas.Index.ndim +generated/pandas.Index.nlevels,../reference/api/pandas.Index.nlevels +generated/pandas.Index.notna,../reference/api/pandas.Index.notna +generated/pandas.Index.notnull,../reference/api/pandas.Index.notnull +generated/pandas.Index.nunique,../reference/api/pandas.Index.nunique +generated/pandas.Index.putmask,../reference/api/pandas.Index.putmask +generated/pandas.Index.ravel,../reference/api/pandas.Index.ravel +generated/pandas.Index.reindex,../reference/api/pandas.Index.reindex +generated/pandas.Index.rename,../reference/api/pandas.Index.rename +generated/pandas.Index.repeat,../reference/api/pandas.Index.repeat +generated/pandas.Index.searchsorted,../reference/api/pandas.Index.searchsorted +generated/pandas.Index.set_names,../reference/api/pandas.Index.set_names +generated/pandas.Index.set_value,../reference/api/pandas.Index.set_value +generated/pandas.Index.shape,../reference/api/pandas.Index.shape +generated/pandas.Index.shift,../reference/api/pandas.Index.shift +generated/pandas.Index.size,../reference/api/pandas.Index.size +generated/pandas.IndexSlice,../reference/api/pandas.IndexSlice +generated/pandas.Index.slice_indexer,../reference/api/pandas.Index.slice_indexer +generated/pandas.Index.slice_locs,../reference/api/pandas.Index.slice_locs +generated/pandas.Index.sort,../reference/api/pandas.Index.sort +generated/pandas.Index.sortlevel,../reference/api/pandas.Index.sortlevel +generated/pandas.Index.sort_values,../reference/api/pandas.Index.sort_values +generated/pandas.Index.str,../reference/api/pandas.Index.str +generated/pandas.Index.summary,../reference/api/pandas.Index.summary +generated/pandas.Index.symmetric_difference,../reference/api/pandas.Index.symmetric_difference +generated/pandas.Index.take,../reference/api/pandas.Index.take +generated/pandas.Index.T,../reference/api/pandas.Index.T +generated/pandas.Index.to_flat_index,../reference/api/pandas.Index.to_flat_index +generated/pandas.Index.to_frame,../reference/api/pandas.Index.to_frame +generated/pandas.Index.to_list,../reference/api/pandas.Index.to_list +generated/pandas.Index.tolist,../reference/api/pandas.Index.tolist +generated/pandas.Index.to_native_types,../reference/api/pandas.Index.to_native_types +generated/pandas.Index.to_numpy,../reference/api/pandas.Index.to_numpy +generated/pandas.Index.to_series,../reference/api/pandas.Index.to_series +generated/pandas.Index.transpose,../reference/api/pandas.Index.transpose +generated/pandas.Index.union,../reference/api/pandas.Index.union +generated/pandas.Index.unique,../reference/api/pandas.Index.unique +generated/pandas.Index.value_counts,../reference/api/pandas.Index.value_counts +generated/pandas.Index.values,../reference/api/pandas.Index.values +generated/pandas.Index.view,../reference/api/pandas.Index.view +generated/pandas.Index.where,../reference/api/pandas.Index.where +generated/pandas.infer_freq,../reference/api/pandas.infer_freq +generated/pandas.Interval.closed,../reference/api/pandas.Interval.closed +generated/pandas.Interval.closed_left,../reference/api/pandas.Interval.closed_left +generated/pandas.Interval.closed_right,../reference/api/pandas.Interval.closed_right +generated/pandas.Interval,../reference/api/pandas.Interval +generated/pandas.IntervalIndex.closed,../reference/api/pandas.IntervalIndex.closed +generated/pandas.IntervalIndex.contains,../reference/api/pandas.IntervalIndex.contains +generated/pandas.IntervalIndex.from_arrays,../reference/api/pandas.IntervalIndex.from_arrays +generated/pandas.IntervalIndex.from_breaks,../reference/api/pandas.IntervalIndex.from_breaks +generated/pandas.IntervalIndex.from_tuples,../reference/api/pandas.IntervalIndex.from_tuples +generated/pandas.IntervalIndex.get_indexer,../reference/api/pandas.IntervalIndex.get_indexer +generated/pandas.IntervalIndex.get_loc,../reference/api/pandas.IntervalIndex.get_loc +generated/pandas.IntervalIndex,../reference/api/pandas.IntervalIndex +generated/pandas.IntervalIndex.is_non_overlapping_monotonic,../reference/api/pandas.IntervalIndex.is_non_overlapping_monotonic +generated/pandas.IntervalIndex.is_overlapping,../reference/api/pandas.IntervalIndex.is_overlapping +generated/pandas.IntervalIndex.left,../reference/api/pandas.IntervalIndex.left +generated/pandas.IntervalIndex.length,../reference/api/pandas.IntervalIndex.length +generated/pandas.IntervalIndex.mid,../reference/api/pandas.IntervalIndex.mid +generated/pandas.IntervalIndex.overlaps,../reference/api/pandas.IntervalIndex.overlaps +generated/pandas.IntervalIndex.right,../reference/api/pandas.IntervalIndex.right +generated/pandas.IntervalIndex.set_closed,../reference/api/pandas.IntervalIndex.set_closed +generated/pandas.IntervalIndex.to_tuples,../reference/api/pandas.IntervalIndex.to_tuples +generated/pandas.IntervalIndex.values,../reference/api/pandas.IntervalIndex.values +generated/pandas.Interval.left,../reference/api/pandas.Interval.left +generated/pandas.Interval.length,../reference/api/pandas.Interval.length +generated/pandas.Interval.mid,../reference/api/pandas.Interval.mid +generated/pandas.Interval.open_left,../reference/api/pandas.Interval.open_left +generated/pandas.Interval.open_right,../reference/api/pandas.Interval.open_right +generated/pandas.Interval.overlaps,../reference/api/pandas.Interval.overlaps +generated/pandas.interval_range,../reference/api/pandas.interval_range +generated/pandas.Interval.right,../reference/api/pandas.Interval.right +generated/pandas.io.formats.style.Styler.apply,../reference/api/pandas.io.formats.style.Styler.apply +generated/pandas.io.formats.style.Styler.applymap,../reference/api/pandas.io.formats.style.Styler.applymap +generated/pandas.io.formats.style.Styler.background_gradient,../reference/api/pandas.io.formats.style.Styler.background_gradient +generated/pandas.io.formats.style.Styler.bar,../reference/api/pandas.io.formats.style.Styler.bar +generated/pandas.io.formats.style.Styler.clear,../reference/api/pandas.io.formats.style.Styler.clear +generated/pandas.io.formats.style.Styler.env,../reference/api/pandas.io.formats.style.Styler.env +generated/pandas.io.formats.style.Styler.export,../reference/api/pandas.io.formats.style.Styler.export +generated/pandas.io.formats.style.Styler.format,../reference/api/pandas.io.formats.style.Styler.format +generated/pandas.io.formats.style.Styler.from_custom_template,../reference/api/pandas.io.formats.style.Styler.from_custom_template +generated/pandas.io.formats.style.Styler.hide_columns,../reference/api/pandas.io.formats.style.Styler.hide_columns +generated/pandas.io.formats.style.Styler.hide_index,../reference/api/pandas.io.formats.style.Styler.hide_index +generated/pandas.io.formats.style.Styler.highlight_max,../reference/api/pandas.io.formats.style.Styler.highlight_max +generated/pandas.io.formats.style.Styler.highlight_min,../reference/api/pandas.io.formats.style.Styler.highlight_min +generated/pandas.io.formats.style.Styler.highlight_null,../reference/api/pandas.io.formats.style.Styler.highlight_null +generated/pandas.io.formats.style.Styler,../reference/api/pandas.io.formats.style.Styler +generated/pandas.io.formats.style.Styler.loader,../reference/api/pandas.io.formats.style.Styler.loader +generated/pandas.io.formats.style.Styler.pipe,../reference/api/pandas.io.formats.style.Styler.pipe +generated/pandas.io.formats.style.Styler.render,../reference/api/pandas.io.formats.style.Styler.render +generated/pandas.io.formats.style.Styler.set_caption,../reference/api/pandas.io.formats.style.Styler.set_caption +generated/pandas.io.formats.style.Styler.set_precision,../reference/api/pandas.io.formats.style.Styler.set_precision +generated/pandas.io.formats.style.Styler.set_properties,../reference/api/pandas.io.formats.style.Styler.set_properties +generated/pandas.io.formats.style.Styler.set_table_attributes,../reference/api/pandas.io.formats.style.Styler.set_table_attributes +generated/pandas.io.formats.style.Styler.set_table_styles,../reference/api/pandas.io.formats.style.Styler.set_table_styles +generated/pandas.io.formats.style.Styler.set_uuid,../reference/api/pandas.io.formats.style.Styler.set_uuid +generated/pandas.io.formats.style.Styler.template,../reference/api/pandas.io.formats.style.Styler.template +generated/pandas.io.formats.style.Styler.to_excel,../reference/api/pandas.io.formats.style.Styler.to_excel +generated/pandas.io.formats.style.Styler.use,../reference/api/pandas.io.formats.style.Styler.use +generated/pandas.io.formats.style.Styler.where,../reference/api/pandas.io.formats.style.Styler.where +generated/pandas.io.json.build_table_schema,../reference/api/pandas.io.json.build_table_schema +generated/pandas.io.json.json_normalize,../reference/api/pandas.json_normalize +generated/pandas.io.stata.StataReader.data_label,../reference/api/pandas.io.stata.StataReader.data_label +generated/pandas.io.stata.StataReader.value_labels,../reference/api/pandas.io.stata.StataReader.value_labels +generated/pandas.io.stata.StataReader.variable_labels,../reference/api/pandas.io.stata.StataReader.variable_labels +generated/pandas.io.stata.StataWriter.write_file,../reference/api/pandas.io.stata.StataWriter.write_file +generated/pandas.isna,../reference/api/pandas.isna +generated/pandas.isnull,../reference/api/pandas.isnull +generated/pandas.melt,../reference/api/pandas.melt +generated/pandas.merge_asof,../reference/api/pandas.merge_asof +generated/pandas.merge,../reference/api/pandas.merge +generated/pandas.merge_ordered,../reference/api/pandas.merge_ordered +generated/pandas.MultiIndex.codes,../reference/api/pandas.MultiIndex.codes +generated/pandas.MultiIndex.droplevel,../reference/api/pandas.MultiIndex.droplevel +generated/pandas.MultiIndex.from_arrays,../reference/api/pandas.MultiIndex.from_arrays +generated/pandas.MultiIndex.from_frame,../reference/api/pandas.MultiIndex.from_frame +generated/pandas.MultiIndex.from_product,../reference/api/pandas.MultiIndex.from_product +generated/pandas.MultiIndex.from_tuples,../reference/api/pandas.MultiIndex.from_tuples +generated/pandas.MultiIndex.get_indexer,../reference/api/pandas.MultiIndex.get_indexer +generated/pandas.MultiIndex.get_level_values,../reference/api/pandas.MultiIndex.get_level_values +generated/pandas.MultiIndex.get_loc,../reference/api/pandas.MultiIndex.get_loc +generated/pandas.MultiIndex.get_loc_level,../reference/api/pandas.MultiIndex.get_loc_level +generated/pandas.MultiIndex,../reference/api/pandas.MultiIndex +generated/pandas.MultiIndex.is_lexsorted,../reference/api/pandas.MultiIndex.is_lexsorted +generated/pandas.MultiIndex.levels,../reference/api/pandas.MultiIndex.levels +generated/pandas.MultiIndex.levshape,../reference/api/pandas.MultiIndex.levshape +generated/pandas.MultiIndex.names,../reference/api/pandas.MultiIndex.names +generated/pandas.MultiIndex.nlevels,../reference/api/pandas.MultiIndex.nlevels +generated/pandas.MultiIndex.remove_unused_levels,../reference/api/pandas.MultiIndex.remove_unused_levels +generated/pandas.MultiIndex.reorder_levels,../reference/api/pandas.MultiIndex.reorder_levels +generated/pandas.MultiIndex.set_codes,../reference/api/pandas.MultiIndex.set_codes +generated/pandas.MultiIndex.set_levels,../reference/api/pandas.MultiIndex.set_levels +generated/pandas.MultiIndex.sortlevel,../reference/api/pandas.MultiIndex.sortlevel +generated/pandas.MultiIndex.swaplevel,../reference/api/pandas.MultiIndex.swaplevel +generated/pandas.MultiIndex.to_flat_index,../reference/api/pandas.MultiIndex.to_flat_index +generated/pandas.MultiIndex.to_frame,../reference/api/pandas.MultiIndex.to_frame +generated/pandas.notna,../reference/api/pandas.notna +generated/pandas.notnull,../reference/api/pandas.notnull +generated/pandas.option_context,../reference/api/pandas.option_context +generated/pandas.Period.asfreq,../reference/api/pandas.Period.asfreq +generated/pandas.Period.day,../reference/api/pandas.Period.day +generated/pandas.Period.dayofweek,../reference/api/pandas.Period.dayofweek +generated/pandas.Period.dayofyear,../reference/api/pandas.Period.dayofyear +generated/pandas.Period.days_in_month,../reference/api/pandas.Period.days_in_month +generated/pandas.Period.daysinmonth,../reference/api/pandas.Period.daysinmonth +generated/pandas.Period.end_time,../reference/api/pandas.Period.end_time +generated/pandas.Period.freq,../reference/api/pandas.Period.freq +generated/pandas.Period.freqstr,../reference/api/pandas.Period.freqstr +generated/pandas.Period.hour,../reference/api/pandas.Period.hour +generated/pandas.Period,../reference/api/pandas.Period +generated/pandas.PeriodIndex.asfreq,../reference/api/pandas.PeriodIndex.asfreq +generated/pandas.PeriodIndex.day,../reference/api/pandas.PeriodIndex.day +generated/pandas.PeriodIndex.dayofweek,../reference/api/pandas.PeriodIndex.dayofweek +generated/pandas.PeriodIndex.dayofyear,../reference/api/pandas.PeriodIndex.dayofyear +generated/pandas.PeriodIndex.days_in_month,../reference/api/pandas.PeriodIndex.days_in_month +generated/pandas.PeriodIndex.daysinmonth,../reference/api/pandas.PeriodIndex.daysinmonth +generated/pandas.PeriodIndex.end_time,../reference/api/pandas.PeriodIndex.end_time +generated/pandas.PeriodIndex.freq,../reference/api/pandas.PeriodIndex.freq +generated/pandas.PeriodIndex.freqstr,../reference/api/pandas.PeriodIndex.freqstr +generated/pandas.PeriodIndex.hour,../reference/api/pandas.PeriodIndex.hour +generated/pandas.PeriodIndex,../reference/api/pandas.PeriodIndex +generated/pandas.PeriodIndex.is_leap_year,../reference/api/pandas.PeriodIndex.is_leap_year +generated/pandas.PeriodIndex.minute,../reference/api/pandas.PeriodIndex.minute +generated/pandas.PeriodIndex.month,../reference/api/pandas.PeriodIndex.month +generated/pandas.PeriodIndex.quarter,../reference/api/pandas.PeriodIndex.quarter +generated/pandas.PeriodIndex.qyear,../reference/api/pandas.PeriodIndex.qyear +generated/pandas.PeriodIndex.second,../reference/api/pandas.PeriodIndex.second +generated/pandas.PeriodIndex.start_time,../reference/api/pandas.PeriodIndex.start_time +generated/pandas.PeriodIndex.strftime,../reference/api/pandas.PeriodIndex.strftime +generated/pandas.PeriodIndex.to_timestamp,../reference/api/pandas.PeriodIndex.to_timestamp +generated/pandas.PeriodIndex.weekday,../reference/api/pandas.PeriodIndex.weekday +generated/pandas.PeriodIndex.week,../reference/api/pandas.PeriodIndex.week +generated/pandas.PeriodIndex.weekofyear,../reference/api/pandas.PeriodIndex.weekofyear +generated/pandas.PeriodIndex.year,../reference/api/pandas.PeriodIndex.year +generated/pandas.Period.is_leap_year,../reference/api/pandas.Period.is_leap_year +generated/pandas.Period.minute,../reference/api/pandas.Period.minute +generated/pandas.Period.month,../reference/api/pandas.Period.month +generated/pandas.Period.now,../reference/api/pandas.Period.now +generated/pandas.Period.ordinal,../reference/api/pandas.Period.ordinal +generated/pandas.Period.quarter,../reference/api/pandas.Period.quarter +generated/pandas.Period.qyear,../reference/api/pandas.Period.qyear +generated/pandas.period_range,../reference/api/pandas.period_range +generated/pandas.Period.second,../reference/api/pandas.Period.second +generated/pandas.Period.start_time,../reference/api/pandas.Period.start_time +generated/pandas.Period.strftime,../reference/api/pandas.Period.strftime +generated/pandas.Period.to_timestamp,../reference/api/pandas.Period.to_timestamp +generated/pandas.Period.weekday,../reference/api/pandas.Period.weekday +generated/pandas.Period.week,../reference/api/pandas.Period.week +generated/pandas.Period.weekofyear,../reference/api/pandas.Period.weekofyear +generated/pandas.Period.year,../reference/api/pandas.Period.year +generated/pandas.pivot,../reference/api/pandas.pivot +generated/pandas.pivot_table,../reference/api/pandas.pivot_table +generated/pandas.plotting.andrews_curves,../reference/api/pandas.plotting.andrews_curves +generated/pandas.plotting.bootstrap_plot,../reference/api/pandas.plotting.bootstrap_plot +generated/pandas.plotting.deregister_matplotlib_converters,../reference/api/pandas.plotting.deregister_matplotlib_converters +generated/pandas.plotting.lag_plot,../reference/api/pandas.plotting.lag_plot +generated/pandas.plotting.parallel_coordinates,../reference/api/pandas.plotting.parallel_coordinates +generated/pandas.plotting.radviz,../reference/api/pandas.plotting.radviz +generated/pandas.plotting.register_matplotlib_converters,../reference/api/pandas.plotting.register_matplotlib_converters +generated/pandas.plotting.scatter_matrix,../reference/api/pandas.plotting.scatter_matrix +generated/pandas.qcut,../reference/api/pandas.qcut +generated/pandas.RangeIndex.from_range,../reference/api/pandas.RangeIndex.from_range +generated/pandas.RangeIndex,../reference/api/pandas.RangeIndex +generated/pandas.read_clipboard,../reference/api/pandas.read_clipboard +generated/pandas.read_csv,../reference/api/pandas.read_csv +generated/pandas.read_excel,../reference/api/pandas.read_excel +generated/pandas.read_feather,../reference/api/pandas.read_feather +generated/pandas.read_fwf,../reference/api/pandas.read_fwf +generated/pandas.read_gbq,../reference/api/pandas.read_gbq +generated/pandas.read_hdf,../reference/api/pandas.read_hdf +generated/pandas.read,../reference/api/pandas.read +generated/pandas.read_json,../reference/api/pandas.read_json +generated/pandas.read_parquet,../reference/api/pandas.read_parquet +generated/pandas.read_pickle,../reference/api/pandas.read_pickle +generated/pandas.read_sas,../reference/api/pandas.read_sas +generated/pandas.read_sql,../reference/api/pandas.read_sql +generated/pandas.read_sql_query,../reference/api/pandas.read_sql_query +generated/pandas.read_sql_table,../reference/api/pandas.read_sql_table +generated/pandas.read_stata,../reference/api/pandas.read_stata +generated/pandas.read_table,../reference/api/pandas.read_table +generated/pandas.reset_option,../reference/api/pandas.reset_option +generated/pandas.Series.abs,../reference/api/pandas.Series.abs +generated/pandas.Series.add,../reference/api/pandas.Series.add +generated/pandas.Series.add_prefix,../reference/api/pandas.Series.add_prefix +generated/pandas.Series.add_suffix,../reference/api/pandas.Series.add_suffix +generated/pandas.Series.agg,../reference/api/pandas.Series.agg +generated/pandas.Series.aggregate,../reference/api/pandas.Series.aggregate +generated/pandas.Series.align,../reference/api/pandas.Series.align +generated/pandas.Series.all,../reference/api/pandas.Series.all +generated/pandas.Series.any,../reference/api/pandas.Series.any +generated/pandas.Series.append,../reference/api/pandas.Series.append +generated/pandas.Series.apply,../reference/api/pandas.Series.apply +generated/pandas.Series.argmax,../reference/api/pandas.Series.argmax +generated/pandas.Series.argmin,../reference/api/pandas.Series.argmin +generated/pandas.Series.argsort,../reference/api/pandas.Series.argsort +generated/pandas.Series.__array__,../reference/api/pandas.Series.__array__ +generated/pandas.Series.array,../reference/api/pandas.Series.array +generated/pandas.Series.as_blocks,../reference/api/pandas.Series.as_blocks +generated/pandas.Series.asfreq,../reference/api/pandas.Series.asfreq +generated/pandas.Series.as_matrix,../reference/api/pandas.Series.as_matrix +generated/pandas.Series.asobject,../reference/api/pandas.Series.asobject +generated/pandas.Series.asof,../reference/api/pandas.Series.asof +generated/pandas.Series.astype,../reference/api/pandas.Series.astype +generated/pandas.Series.at,../reference/api/pandas.Series.at +generated/pandas.Series.at_time,../reference/api/pandas.Series.at_time +generated/pandas.Series.autocorr,../reference/api/pandas.Series.autocorr +generated/pandas.Series.axes,../reference/api/pandas.Series.axes +generated/pandas.Series.between,../reference/api/pandas.Series.between +generated/pandas.Series.between_time,../reference/api/pandas.Series.between_time +generated/pandas.Series.bfill,../reference/api/pandas.Series.bfill +generated/pandas.Series.blocks,../reference/api/pandas.Series.blocks +generated/pandas.Series.bool,../reference/api/pandas.Series.bool +generated/pandas.Series.cat.add_categories,../reference/api/pandas.Series.cat.add_categories +generated/pandas.Series.cat.as_ordered,../reference/api/pandas.Series.cat.as_ordered +generated/pandas.Series.cat.as_unordered,../reference/api/pandas.Series.cat.as_unordered +generated/pandas.Series.cat.categories,../reference/api/pandas.Series.cat.categories +generated/pandas.Series.cat.codes,../reference/api/pandas.Series.cat.codes +generated/pandas.Series.cat,../reference/api/pandas.Series.cat +generated/pandas.Series.cat.ordered,../reference/api/pandas.Series.cat.ordered +generated/pandas.Series.cat.remove_categories,../reference/api/pandas.Series.cat.remove_categories +generated/pandas.Series.cat.remove_unused_categories,../reference/api/pandas.Series.cat.remove_unused_categories +generated/pandas.Series.cat.rename_categories,../reference/api/pandas.Series.cat.rename_categories +generated/pandas.Series.cat.reorder_categories,../reference/api/pandas.Series.cat.reorder_categories +generated/pandas.Series.cat.set_categories,../reference/api/pandas.Series.cat.set_categories +generated/pandas.Series.clip,../reference/api/pandas.Series.clip +generated/pandas.Series.clip_lower,../reference/api/pandas.Series.clip_lower +generated/pandas.Series.clip_upper,../reference/api/pandas.Series.clip_upper +generated/pandas.Series.combine_first,../reference/api/pandas.Series.combine_first +generated/pandas.Series.combine,../reference/api/pandas.Series.combine +generated/pandas.Series.compress,../reference/api/pandas.Series.compress +generated/pandas.Series.convert_objects,../reference/api/pandas.Series.convert_objects +generated/pandas.Series.copy,../reference/api/pandas.Series.copy +generated/pandas.Series.corr,../reference/api/pandas.Series.corr +generated/pandas.Series.count,../reference/api/pandas.Series.count +generated/pandas.Series.cov,../reference/api/pandas.Series.cov +generated/pandas.Series.cummax,../reference/api/pandas.Series.cummax +generated/pandas.Series.cummin,../reference/api/pandas.Series.cummin +generated/pandas.Series.cumprod,../reference/api/pandas.Series.cumprod +generated/pandas.Series.cumsum,../reference/api/pandas.Series.cumsum +generated/pandas.Series.data,../reference/api/pandas.Series.data +generated/pandas.Series.describe,../reference/api/pandas.Series.describe +generated/pandas.Series.diff,../reference/api/pandas.Series.diff +generated/pandas.Series.div,../reference/api/pandas.Series.div +generated/pandas.Series.divide,../reference/api/pandas.Series.divide +generated/pandas.Series.divmod,../reference/api/pandas.Series.divmod +generated/pandas.Series.dot,../reference/api/pandas.Series.dot +generated/pandas.Series.drop_duplicates,../reference/api/pandas.Series.drop_duplicates +generated/pandas.Series.drop,../reference/api/pandas.Series.drop +generated/pandas.Series.droplevel,../reference/api/pandas.Series.droplevel +generated/pandas.Series.dropna,../reference/api/pandas.Series.dropna +generated/pandas.Series.dt.ceil,../reference/api/pandas.Series.dt.ceil +generated/pandas.Series.dt.components,../reference/api/pandas.Series.dt.components +generated/pandas.Series.dt.date,../reference/api/pandas.Series.dt.date +generated/pandas.Series.dt.day,../reference/api/pandas.Series.dt.day +generated/pandas.Series.dt.day_name,../reference/api/pandas.Series.dt.day_name +generated/pandas.Series.dt.dayofweek,../reference/api/pandas.Series.dt.dayofweek +generated/pandas.Series.dt.dayofyear,../reference/api/pandas.Series.dt.dayofyear +generated/pandas.Series.dt.days,../reference/api/pandas.Series.dt.days +generated/pandas.Series.dt.days_in_month,../reference/api/pandas.Series.dt.days_in_month +generated/pandas.Series.dt.daysinmonth,../reference/api/pandas.Series.dt.daysinmonth +generated/pandas.Series.dt.end_time,../reference/api/pandas.Series.dt.end_time +generated/pandas.Series.dt.floor,../reference/api/pandas.Series.dt.floor +generated/pandas.Series.dt.freq,../reference/api/pandas.Series.dt.freq +generated/pandas.Series.dt.hour,../reference/api/pandas.Series.dt.hour +generated/pandas.Series.dt,../reference/api/pandas.Series.dt +generated/pandas.Series.dt.is_leap_year,../reference/api/pandas.Series.dt.is_leap_year +generated/pandas.Series.dt.is_month_end,../reference/api/pandas.Series.dt.is_month_end +generated/pandas.Series.dt.is_month_start,../reference/api/pandas.Series.dt.is_month_start +generated/pandas.Series.dt.is_quarter_end,../reference/api/pandas.Series.dt.is_quarter_end +generated/pandas.Series.dt.is_quarter_start,../reference/api/pandas.Series.dt.is_quarter_start +generated/pandas.Series.dt.is_year_end,../reference/api/pandas.Series.dt.is_year_end +generated/pandas.Series.dt.is_year_start,../reference/api/pandas.Series.dt.is_year_start +generated/pandas.Series.dt.microsecond,../reference/api/pandas.Series.dt.microsecond +generated/pandas.Series.dt.microseconds,../reference/api/pandas.Series.dt.microseconds +generated/pandas.Series.dt.minute,../reference/api/pandas.Series.dt.minute +generated/pandas.Series.dt.month,../reference/api/pandas.Series.dt.month +generated/pandas.Series.dt.month_name,../reference/api/pandas.Series.dt.month_name +generated/pandas.Series.dt.nanosecond,../reference/api/pandas.Series.dt.nanosecond +generated/pandas.Series.dt.nanoseconds,../reference/api/pandas.Series.dt.nanoseconds +generated/pandas.Series.dt.normalize,../reference/api/pandas.Series.dt.normalize +generated/pandas.Series.dt.quarter,../reference/api/pandas.Series.dt.quarter +generated/pandas.Series.dt.qyear,../reference/api/pandas.Series.dt.qyear +generated/pandas.Series.dt.round,../reference/api/pandas.Series.dt.round +generated/pandas.Series.dt.second,../reference/api/pandas.Series.dt.second +generated/pandas.Series.dt.seconds,../reference/api/pandas.Series.dt.seconds +generated/pandas.Series.dt.start_time,../reference/api/pandas.Series.dt.start_time +generated/pandas.Series.dt.strftime,../reference/api/pandas.Series.dt.strftime +generated/pandas.Series.dt.time,../reference/api/pandas.Series.dt.time +generated/pandas.Series.dt.timetz,../reference/api/pandas.Series.dt.timetz +generated/pandas.Series.dt.to_period,../reference/api/pandas.Series.dt.to_period +generated/pandas.Series.dt.to_pydatetime,../reference/api/pandas.Series.dt.to_pydatetime +generated/pandas.Series.dt.to_pytimedelta,../reference/api/pandas.Series.dt.to_pytimedelta +generated/pandas.Series.dt.total_seconds,../reference/api/pandas.Series.dt.total_seconds +generated/pandas.Series.dt.tz_convert,../reference/api/pandas.Series.dt.tz_convert +generated/pandas.Series.dt.tz,../reference/api/pandas.Series.dt.tz +generated/pandas.Series.dt.tz_localize,../reference/api/pandas.Series.dt.tz_localize +generated/pandas.Series.dt.weekday,../reference/api/pandas.Series.dt.weekday +generated/pandas.Series.dt.week,../reference/api/pandas.Series.dt.week +generated/pandas.Series.dt.weekofyear,../reference/api/pandas.Series.dt.weekofyear +generated/pandas.Series.dt.year,../reference/api/pandas.Series.dt.year +generated/pandas.Series.dtype,../reference/api/pandas.Series.dtype +generated/pandas.Series.dtypes,../reference/api/pandas.Series.dtypes +generated/pandas.Series.duplicated,../reference/api/pandas.Series.duplicated +generated/pandas.Series.empty,../reference/api/pandas.Series.empty +generated/pandas.Series.eq,../reference/api/pandas.Series.eq +generated/pandas.Series.equals,../reference/api/pandas.Series.equals +generated/pandas.Series.ewm,../reference/api/pandas.Series.ewm +generated/pandas.Series.expanding,../reference/api/pandas.Series.expanding +generated/pandas.Series.factorize,../reference/api/pandas.Series.factorize +generated/pandas.Series.ffill,../reference/api/pandas.Series.ffill +generated/pandas.Series.fillna,../reference/api/pandas.Series.fillna +generated/pandas.Series.filter,../reference/api/pandas.Series.filter +generated/pandas.Series.first,../reference/api/pandas.Series.first +generated/pandas.Series.first_valid_index,../reference/api/pandas.Series.first_valid_index +generated/pandas.Series.floordiv,../reference/api/pandas.Series.floordiv +generated/pandas.Series.from_array,../reference/api/pandas.Series.from_array +generated/pandas.Series.from_csv,../reference/api/pandas.Series.from_csv +generated/pandas.Series.ge,../reference/api/pandas.Series.ge +generated/pandas.Series.get,../reference/api/pandas.Series.get +generated/pandas.Series.get_value,../reference/api/pandas.Series.get_value +generated/pandas.Series.groupby,../reference/api/pandas.Series.groupby +generated/pandas.Series.gt,../reference/api/pandas.Series.gt +generated/pandas.Series.hasnans,../reference/api/pandas.Series.hasnans +generated/pandas.Series.head,../reference/api/pandas.Series.head +generated/pandas.Series.hist,../reference/api/pandas.Series.hist +generated/pandas.Series,../reference/api/pandas.Series +generated/pandas.Series.iat,../reference/api/pandas.Series.iat +generated/pandas.Series.idxmax,../reference/api/pandas.Series.idxmax +generated/pandas.Series.idxmin,../reference/api/pandas.Series.idxmin +generated/pandas.Series.iloc,../reference/api/pandas.Series.iloc +generated/pandas.Series.imag,../reference/api/pandas.Series.imag +generated/pandas.Series.index,../reference/api/pandas.Series.index +generated/pandas.Series.infer_objects,../reference/api/pandas.Series.infer_objects +generated/pandas.Series.interpolate,../reference/api/pandas.Series.interpolate +generated/pandas.Series.is_copy,../reference/api/pandas.Series.is_copy +generated/pandas.Series.isin,../reference/api/pandas.Series.isin +generated/pandas.Series.is_monotonic_decreasing,../reference/api/pandas.Series.is_monotonic_decreasing +generated/pandas.Series.is_monotonic,../reference/api/pandas.Series.is_monotonic +generated/pandas.Series.is_monotonic_increasing,../reference/api/pandas.Series.is_monotonic_increasing +generated/pandas.Series.isna,../reference/api/pandas.Series.isna +generated/pandas.Series.isnull,../reference/api/pandas.Series.isnull +generated/pandas.Series.is_unique,../reference/api/pandas.Series.is_unique +generated/pandas.Series.item,../reference/api/pandas.Series.item +generated/pandas.Series.items,../reference/api/pandas.Series.items +generated/pandas.Series.__iter__,../reference/api/pandas.Series.__iter__ +generated/pandas.Series.iteritems,../reference/api/pandas.Series.iteritems +generated/pandas.Series.ix,../reference/api/pandas.Series.ix +generated/pandas.Series.keys,../reference/api/pandas.Series.keys +generated/pandas.Series.kurt,../reference/api/pandas.Series.kurt +generated/pandas.Series.kurtosis,../reference/api/pandas.Series.kurtosis +generated/pandas.Series.last,../reference/api/pandas.Series.last +generated/pandas.Series.last_valid_index,../reference/api/pandas.Series.last_valid_index +generated/pandas.Series.le,../reference/api/pandas.Series.le +generated/pandas.Series.loc,../reference/api/pandas.Series.loc +generated/pandas.Series.lt,../reference/api/pandas.Series.lt +generated/pandas.Series.mad,../reference/api/pandas.Series.mad +generated/pandas.Series.map,../reference/api/pandas.Series.map +generated/pandas.Series.mask,../reference/api/pandas.Series.mask +generated/pandas.Series.max,../reference/api/pandas.Series.max +generated/pandas.Series.mean,../reference/api/pandas.Series.mean +generated/pandas.Series.median,../reference/api/pandas.Series.median +generated/pandas.Series.memory_usage,../reference/api/pandas.Series.memory_usage +generated/pandas.Series.min,../reference/api/pandas.Series.min +generated/pandas.Series.mode,../reference/api/pandas.Series.mode +generated/pandas.Series.mod,../reference/api/pandas.Series.mod +generated/pandas.Series.mul,../reference/api/pandas.Series.mul +generated/pandas.Series.multiply,../reference/api/pandas.Series.multiply +generated/pandas.Series.name,../reference/api/pandas.Series.name +generated/pandas.Series.nbytes,../reference/api/pandas.Series.nbytes +generated/pandas.Series.ndim,../reference/api/pandas.Series.ndim +generated/pandas.Series.ne,../reference/api/pandas.Series.ne +generated/pandas.Series.nlargest,../reference/api/pandas.Series.nlargest +generated/pandas.Series.nonzero,../reference/api/pandas.Series.nonzero +generated/pandas.Series.notna,../reference/api/pandas.Series.notna +generated/pandas.Series.notnull,../reference/api/pandas.Series.notnull +generated/pandas.Series.nsmallest,../reference/api/pandas.Series.nsmallest +generated/pandas.Series.nunique,../reference/api/pandas.Series.nunique +generated/pandas.Series.pct_change,../reference/api/pandas.Series.pct_change +generated/pandas.Series.pipe,../reference/api/pandas.Series.pipe +generated/pandas.Series.plot.area,../reference/api/pandas.Series.plot.area +generated/pandas.Series.plot.barh,../reference/api/pandas.Series.plot.barh +generated/pandas.Series.plot.bar,../reference/api/pandas.Series.plot.bar +generated/pandas.Series.plot.box,../reference/api/pandas.Series.plot.box +generated/pandas.Series.plot.density,../reference/api/pandas.Series.plot.density +generated/pandas.Series.plot.hist,../reference/api/pandas.Series.plot.hist +generated/pandas.Series.plot,../reference/api/pandas.Series.plot +generated/pandas.Series.plot.kde,../reference/api/pandas.Series.plot.kde +generated/pandas.Series.plot.line,../reference/api/pandas.Series.plot.line +generated/pandas.Series.plot.pie,../reference/api/pandas.Series.plot.pie +generated/pandas.Series.pop,../reference/api/pandas.Series.pop +generated/pandas.Series.pow,../reference/api/pandas.Series.pow +generated/pandas.Series.prod,../reference/api/pandas.Series.prod +generated/pandas.Series.product,../reference/api/pandas.Series.product +generated/pandas.Series.ptp,../reference/api/pandas.Series.ptp +generated/pandas.Series.quantile,../reference/api/pandas.Series.quantile +generated/pandas.Series.radd,../reference/api/pandas.Series.radd +generated/pandas.Series.rank,../reference/api/pandas.Series.rank +generated/pandas.Series.ravel,../reference/api/pandas.Series.ravel +generated/pandas.Series.rdiv,../reference/api/pandas.Series.rdiv +generated/pandas.Series.rdivmod,../reference/api/pandas.Series.rdivmod +generated/pandas.Series.real,../reference/api/pandas.Series.real +generated/pandas.Series.reindex_axis,../reference/api/pandas.Series.reindex_axis +generated/pandas.Series.reindex,../reference/api/pandas.Series.reindex +generated/pandas.Series.reindex_like,../reference/api/pandas.Series.reindex_like +generated/pandas.Series.rename_axis,../reference/api/pandas.Series.rename_axis +generated/pandas.Series.rename,../reference/api/pandas.Series.rename +generated/pandas.Series.reorder_levels,../reference/api/pandas.Series.reorder_levels +generated/pandas.Series.repeat,../reference/api/pandas.Series.repeat +generated/pandas.Series.replace,../reference/api/pandas.Series.replace +generated/pandas.Series.resample,../reference/api/pandas.Series.resample +generated/pandas.Series.reset_index,../reference/api/pandas.Series.reset_index +generated/pandas.Series.rfloordiv,../reference/api/pandas.Series.rfloordiv +generated/pandas.Series.rmod,../reference/api/pandas.Series.rmod +generated/pandas.Series.rmul,../reference/api/pandas.Series.rmul +generated/pandas.Series.rolling,../reference/api/pandas.Series.rolling +generated/pandas.Series.round,../reference/api/pandas.Series.round +generated/pandas.Series.rpow,../reference/api/pandas.Series.rpow +generated/pandas.Series.rsub,../reference/api/pandas.Series.rsub +generated/pandas.Series.rtruediv,../reference/api/pandas.Series.rtruediv +generated/pandas.Series.sample,../reference/api/pandas.Series.sample +generated/pandas.Series.searchsorted,../reference/api/pandas.Series.searchsorted +generated/pandas.Series.select,../reference/api/pandas.Series.select +generated/pandas.Series.sem,../reference/api/pandas.Series.sem +generated/pandas.Series.set_axis,../reference/api/pandas.Series.set_axis +generated/pandas.Series.set_value,../reference/api/pandas.Series.set_value +generated/pandas.Series.shape,../reference/api/pandas.Series.shape +generated/pandas.Series.shift,../reference/api/pandas.Series.shift +generated/pandas.Series.size,../reference/api/pandas.Series.size +generated/pandas.Series.skew,../reference/api/pandas.Series.skew +generated/pandas.Series.slice_shift,../reference/api/pandas.Series.slice_shift +generated/pandas.Series.sort_index,../reference/api/pandas.Series.sort_index +generated/pandas.Series.sort_values,../reference/api/pandas.Series.sort_values +generated/pandas.Series.sparse.density,../reference/api/pandas.Series.sparse.density +generated/pandas.Series.sparse.fill_value,../reference/api/pandas.Series.sparse.fill_value +generated/pandas.Series.sparse.from_coo,../reference/api/pandas.Series.sparse.from_coo +generated/pandas.Series.sparse.npoints,../reference/api/pandas.Series.sparse.npoints +generated/pandas.Series.sparse.sp_values,../reference/api/pandas.Series.sparse.sp_values +generated/pandas.Series.sparse.to_coo,../reference/api/pandas.Series.sparse.to_coo +generated/pandas.Series.squeeze,../reference/api/pandas.Series.squeeze +generated/pandas.Series.std,../reference/api/pandas.Series.std +generated/pandas.Series.str.capitalize,../reference/api/pandas.Series.str.capitalize +generated/pandas.Series.str.cat,../reference/api/pandas.Series.str.cat +generated/pandas.Series.str.center,../reference/api/pandas.Series.str.center +generated/pandas.Series.str.contains,../reference/api/pandas.Series.str.contains +generated/pandas.Series.str.count,../reference/api/pandas.Series.str.count +generated/pandas.Series.str.decode,../reference/api/pandas.Series.str.decode +generated/pandas.Series.str.encode,../reference/api/pandas.Series.str.encode +generated/pandas.Series.str.endswith,../reference/api/pandas.Series.str.endswith +generated/pandas.Series.str.extractall,../reference/api/pandas.Series.str.extractall +generated/pandas.Series.str.extract,../reference/api/pandas.Series.str.extract +generated/pandas.Series.str.findall,../reference/api/pandas.Series.str.findall +generated/pandas.Series.str.find,../reference/api/pandas.Series.str.find +generated/pandas.Series.str.get_dummies,../reference/api/pandas.Series.str.get_dummies +generated/pandas.Series.str.get,../reference/api/pandas.Series.str.get +generated/pandas.Series.str,../reference/api/pandas.Series.str +generated/pandas.Series.str.index,../reference/api/pandas.Series.str.index +generated/pandas.Series.str.isalnum,../reference/api/pandas.Series.str.isalnum +generated/pandas.Series.str.isalpha,../reference/api/pandas.Series.str.isalpha +generated/pandas.Series.str.isdecimal,../reference/api/pandas.Series.str.isdecimal +generated/pandas.Series.str.isdigit,../reference/api/pandas.Series.str.isdigit +generated/pandas.Series.str.islower,../reference/api/pandas.Series.str.islower +generated/pandas.Series.str.isnumeric,../reference/api/pandas.Series.str.isnumeric +generated/pandas.Series.str.isspace,../reference/api/pandas.Series.str.isspace +generated/pandas.Series.str.istitle,../reference/api/pandas.Series.str.istitle +generated/pandas.Series.str.isupper,../reference/api/pandas.Series.str.isupper +generated/pandas.Series.str.join,../reference/api/pandas.Series.str.join +generated/pandas.Series.str.len,../reference/api/pandas.Series.str.len +generated/pandas.Series.str.ljust,../reference/api/pandas.Series.str.ljust +generated/pandas.Series.str.lower,../reference/api/pandas.Series.str.lower +generated/pandas.Series.str.lstrip,../reference/api/pandas.Series.str.lstrip +generated/pandas.Series.str.match,../reference/api/pandas.Series.str.match +generated/pandas.Series.str.normalize,../reference/api/pandas.Series.str.normalize +generated/pandas.Series.str.pad,../reference/api/pandas.Series.str.pad +generated/pandas.Series.str.partition,../reference/api/pandas.Series.str.partition +generated/pandas.Series.str.repeat,../reference/api/pandas.Series.str.repeat +generated/pandas.Series.str.replace,../reference/api/pandas.Series.str.replace +generated/pandas.Series.str.rfind,../reference/api/pandas.Series.str.rfind +generated/pandas.Series.str.rindex,../reference/api/pandas.Series.str.rindex +generated/pandas.Series.str.rjust,../reference/api/pandas.Series.str.rjust +generated/pandas.Series.str.rpartition,../reference/api/pandas.Series.str.rpartition +generated/pandas.Series.str.rsplit,../reference/api/pandas.Series.str.rsplit +generated/pandas.Series.str.rstrip,../reference/api/pandas.Series.str.rstrip +generated/pandas.Series.str.slice,../reference/api/pandas.Series.str.slice +generated/pandas.Series.str.slice_replace,../reference/api/pandas.Series.str.slice_replace +generated/pandas.Series.str.split,../reference/api/pandas.Series.str.split +generated/pandas.Series.str.startswith,../reference/api/pandas.Series.str.startswith +generated/pandas.Series.str.strip,../reference/api/pandas.Series.str.strip +generated/pandas.Series.str.swapcase,../reference/api/pandas.Series.str.swapcase +generated/pandas.Series.str.title,../reference/api/pandas.Series.str.title +generated/pandas.Series.str.translate,../reference/api/pandas.Series.str.translate +generated/pandas.Series.str.upper,../reference/api/pandas.Series.str.upper +generated/pandas.Series.str.wrap,../reference/api/pandas.Series.str.wrap +generated/pandas.Series.str.zfill,../reference/api/pandas.Series.str.zfill +generated/pandas.Series.sub,../reference/api/pandas.Series.sub +generated/pandas.Series.subtract,../reference/api/pandas.Series.subtract +generated/pandas.Series.sum,../reference/api/pandas.Series.sum +generated/pandas.Series.swapaxes,../reference/api/pandas.Series.swapaxes +generated/pandas.Series.swaplevel,../reference/api/pandas.Series.swaplevel +generated/pandas.Series.tail,../reference/api/pandas.Series.tail +generated/pandas.Series.take,../reference/api/pandas.Series.take +generated/pandas.Series.T,../reference/api/pandas.Series.T +generated/pandas.Series.timetuple,../reference/api/pandas.Series.timetuple +generated/pandas.Series.to_clipboard,../reference/api/pandas.Series.to_clipboard +generated/pandas.Series.to_csv,../reference/api/pandas.Series.to_csv +generated/pandas.Series.to_dict,../reference/api/pandas.Series.to_dict +generated/pandas.Series.to_excel,../reference/api/pandas.Series.to_excel +generated/pandas.Series.to_frame,../reference/api/pandas.Series.to_frame +generated/pandas.Series.to_hdf,../reference/api/pandas.Series.to_hdf +generated/pandas.Series.to_json,../reference/api/pandas.Series.to_json +generated/pandas.Series.to_latex,../reference/api/pandas.Series.to_latex +generated/pandas.Series.to_list,../reference/api/pandas.Series.to_list +generated/pandas.Series.tolist,../reference/api/pandas.Series.tolist +generated/pandas.Series.to_numpy,../reference/api/pandas.Series.to_numpy +generated/pandas.Series.to_period,../reference/api/pandas.Series.to_period +generated/pandas.Series.to_pickle,../reference/api/pandas.Series.to_pickle +generated/pandas.Series.to_sql,../reference/api/pandas.Series.to_sql +generated/pandas.Series.to_string,../reference/api/pandas.Series.to_string +generated/pandas.Series.to_timestamp,../reference/api/pandas.Series.to_timestamp +generated/pandas.Series.to_xarray,../reference/api/pandas.Series.to_xarray +generated/pandas.Series.transform,../reference/api/pandas.Series.transform +generated/pandas.Series.transpose,../reference/api/pandas.Series.transpose +generated/pandas.Series.truediv,../reference/api/pandas.Series.truediv +generated/pandas.Series.truncate,../reference/api/pandas.Series.truncate +generated/pandas.Series.tshift,../reference/api/pandas.Series.tshift +generated/pandas.Series.tz_convert,../reference/api/pandas.Series.tz_convert +generated/pandas.Series.tz_localize,../reference/api/pandas.Series.tz_localize +generated/pandas.Series.unique,../reference/api/pandas.Series.unique +generated/pandas.Series.unstack,../reference/api/pandas.Series.unstack +generated/pandas.Series.update,../reference/api/pandas.Series.update +generated/pandas.Series.valid,../reference/api/pandas.Series.valid +generated/pandas.Series.value_counts,../reference/api/pandas.Series.value_counts +generated/pandas.Series.values,../reference/api/pandas.Series.values +generated/pandas.Series.var,../reference/api/pandas.Series.var +generated/pandas.Series.view,../reference/api/pandas.Series.view +generated/pandas.Series.where,../reference/api/pandas.Series.where +generated/pandas.Series.xs,../reference/api/pandas.Series.xs +generated/pandas.set_option,../reference/api/pandas.set_option +generated/pandas.SparseDataFrame.to_coo,../reference/api/pandas.SparseDataFrame.to_coo +generated/pandas.SparseSeries.from_coo,../reference/api/pandas.SparseSeries.from_coo +generated/pandas.SparseSeries.to_coo,../reference/api/pandas.SparseSeries.to_coo +generated/pandas.test,../reference/api/pandas.test +generated/pandas.testing.assert_frame_equal,../reference/api/pandas.testing.assert_frame_equal +generated/pandas.testing.assert_index_equal,../reference/api/pandas.testing.assert_index_equal +generated/pandas.testing.assert_series_equal,../reference/api/pandas.testing.assert_series_equal +generated/pandas.Timedelta.asm8,../reference/api/pandas.Timedelta.asm8 +generated/pandas.Timedelta.ceil,../reference/api/pandas.Timedelta.ceil +generated/pandas.Timedelta.components,../reference/api/pandas.Timedelta.components +generated/pandas.Timedelta.days,../reference/api/pandas.Timedelta.days +generated/pandas.Timedelta.delta,../reference/api/pandas.Timedelta.delta +generated/pandas.Timedelta.floor,../reference/api/pandas.Timedelta.floor +generated/pandas.Timedelta.freq,../reference/api/pandas.Timedelta.freq +generated/pandas.Timedelta,../reference/api/pandas.Timedelta +generated/pandas.TimedeltaIndex.ceil,../reference/api/pandas.TimedeltaIndex.ceil +generated/pandas.TimedeltaIndex.components,../reference/api/pandas.TimedeltaIndex.components +generated/pandas.TimedeltaIndex.days,../reference/api/pandas.TimedeltaIndex.days +generated/pandas.TimedeltaIndex.floor,../reference/api/pandas.TimedeltaIndex.floor +generated/pandas.TimedeltaIndex,../reference/api/pandas.TimedeltaIndex +generated/pandas.TimedeltaIndex.inferred_freq,../reference/api/pandas.TimedeltaIndex.inferred_freq +generated/pandas.TimedeltaIndex.microseconds,../reference/api/pandas.TimedeltaIndex.microseconds +generated/pandas.TimedeltaIndex.nanoseconds,../reference/api/pandas.TimedeltaIndex.nanoseconds +generated/pandas.TimedeltaIndex.round,../reference/api/pandas.TimedeltaIndex.round +generated/pandas.TimedeltaIndex.seconds,../reference/api/pandas.TimedeltaIndex.seconds +generated/pandas.TimedeltaIndex.to_frame,../reference/api/pandas.TimedeltaIndex.to_frame +generated/pandas.TimedeltaIndex.to_pytimedelta,../reference/api/pandas.TimedeltaIndex.to_pytimedelta +generated/pandas.TimedeltaIndex.to_series,../reference/api/pandas.TimedeltaIndex.to_series +generated/pandas.Timedelta.isoformat,../reference/api/pandas.Timedelta.isoformat +generated/pandas.Timedelta.is_populated,../reference/api/pandas.Timedelta.is_populated +generated/pandas.Timedelta.max,../reference/api/pandas.Timedelta.max +generated/pandas.Timedelta.microseconds,../reference/api/pandas.Timedelta.microseconds +generated/pandas.Timedelta.min,../reference/api/pandas.Timedelta.min +generated/pandas.Timedelta.nanoseconds,../reference/api/pandas.Timedelta.nanoseconds +generated/pandas.timedelta_range,../reference/api/pandas.timedelta_range +generated/pandas.Timedelta.resolution,../reference/api/pandas.Timedelta.resolution +generated/pandas.Timedelta.round,../reference/api/pandas.Timedelta.round +generated/pandas.Timedelta.seconds,../reference/api/pandas.Timedelta.seconds +generated/pandas.Timedelta.to_pytimedelta,../reference/api/pandas.Timedelta.to_pytimedelta +generated/pandas.Timedelta.total_seconds,../reference/api/pandas.Timedelta.total_seconds +generated/pandas.Timedelta.to_timedelta64,../reference/api/pandas.Timedelta.to_timedelta64 +generated/pandas.Timedelta.value,../reference/api/pandas.Timedelta.value +generated/pandas.Timedelta.view,../reference/api/pandas.Timedelta.view +generated/pandas.Timestamp.asm8,../reference/api/pandas.Timestamp.asm8 +generated/pandas.Timestamp.astimezone,../reference/api/pandas.Timestamp.astimezone +generated/pandas.Timestamp.ceil,../reference/api/pandas.Timestamp.ceil +generated/pandas.Timestamp.combine,../reference/api/pandas.Timestamp.combine +generated/pandas.Timestamp.ctime,../reference/api/pandas.Timestamp.ctime +generated/pandas.Timestamp.date,../reference/api/pandas.Timestamp.date +generated/pandas.Timestamp.day,../reference/api/pandas.Timestamp.day +generated/pandas.Timestamp.day_name,../reference/api/pandas.Timestamp.day_name +generated/pandas.Timestamp.dayofweek,../reference/api/pandas.Timestamp.dayofweek +generated/pandas.Timestamp.dayofyear,../reference/api/pandas.Timestamp.dayofyear +generated/pandas.Timestamp.days_in_month,../reference/api/pandas.Timestamp.days_in_month +generated/pandas.Timestamp.daysinmonth,../reference/api/pandas.Timestamp.daysinmonth +generated/pandas.Timestamp.dst,../reference/api/pandas.Timestamp.dst +generated/pandas.Timestamp.floor,../reference/api/pandas.Timestamp.floor +generated/pandas.Timestamp.fold,../reference/api/pandas.Timestamp.fold +generated/pandas.Timestamp.freq,../reference/api/pandas.Timestamp.freq +generated/pandas.Timestamp.freqstr,../reference/api/pandas.Timestamp.freqstr +generated/pandas.Timestamp.fromisoformat,../reference/api/pandas.Timestamp.fromisoformat +generated/pandas.Timestamp.fromordinal,../reference/api/pandas.Timestamp.fromordinal +generated/pandas.Timestamp.fromtimestamp,../reference/api/pandas.Timestamp.fromtimestamp +generated/pandas.Timestamp.hour,../reference/api/pandas.Timestamp.hour +generated/pandas.Timestamp,../reference/api/pandas.Timestamp +generated/pandas.Timestamp.is_leap_year,../reference/api/pandas.Timestamp.is_leap_year +generated/pandas.Timestamp.is_month_end,../reference/api/pandas.Timestamp.is_month_end +generated/pandas.Timestamp.is_month_start,../reference/api/pandas.Timestamp.is_month_start +generated/pandas.Timestamp.isocalendar,../reference/api/pandas.Timestamp.isocalendar +generated/pandas.Timestamp.isoformat,../reference/api/pandas.Timestamp.isoformat +generated/pandas.Timestamp.isoweekday,../reference/api/pandas.Timestamp.isoweekday +generated/pandas.Timestamp.is_quarter_end,../reference/api/pandas.Timestamp.is_quarter_end +generated/pandas.Timestamp.is_quarter_start,../reference/api/pandas.Timestamp.is_quarter_start +generated/pandas.Timestamp.is_year_end,../reference/api/pandas.Timestamp.is_year_end +generated/pandas.Timestamp.is_year_start,../reference/api/pandas.Timestamp.is_year_start +generated/pandas.Timestamp.max,../reference/api/pandas.Timestamp.max +generated/pandas.Timestamp.microsecond,../reference/api/pandas.Timestamp.microsecond +generated/pandas.Timestamp.min,../reference/api/pandas.Timestamp.min +generated/pandas.Timestamp.minute,../reference/api/pandas.Timestamp.minute +generated/pandas.Timestamp.month,../reference/api/pandas.Timestamp.month +generated/pandas.Timestamp.month_name,../reference/api/pandas.Timestamp.month_name +generated/pandas.Timestamp.nanosecond,../reference/api/pandas.Timestamp.nanosecond +generated/pandas.Timestamp.normalize,../reference/api/pandas.Timestamp.normalize +generated/pandas.Timestamp.now,../reference/api/pandas.Timestamp.now +generated/pandas.Timestamp.quarter,../reference/api/pandas.Timestamp.quarter +generated/pandas.Timestamp.replace,../reference/api/pandas.Timestamp.replace +generated/pandas.Timestamp.resolution,../reference/api/pandas.Timestamp.resolution +generated/pandas.Timestamp.round,../reference/api/pandas.Timestamp.round +generated/pandas.Timestamp.second,../reference/api/pandas.Timestamp.second +generated/pandas.Timestamp.strftime,../reference/api/pandas.Timestamp.strftime +generated/pandas.Timestamp.strptime,../reference/api/pandas.Timestamp.strptime +generated/pandas.Timestamp.time,../reference/api/pandas.Timestamp.time +generated/pandas.Timestamp.timestamp,../reference/api/pandas.Timestamp.timestamp +generated/pandas.Timestamp.timetuple,../reference/api/pandas.Timestamp.timetuple +generated/pandas.Timestamp.timetz,../reference/api/pandas.Timestamp.timetz +generated/pandas.Timestamp.to_datetime64,../reference/api/pandas.Timestamp.to_datetime64 +generated/pandas.Timestamp.today,../reference/api/pandas.Timestamp.today +generated/pandas.Timestamp.to_julian_date,../reference/api/pandas.Timestamp.to_julian_date +generated/pandas.Timestamp.toordinal,../reference/api/pandas.Timestamp.toordinal +generated/pandas.Timestamp.to_period,../reference/api/pandas.Timestamp.to_period +generated/pandas.Timestamp.to_pydatetime,../reference/api/pandas.Timestamp.to_pydatetime +generated/pandas.Timestamp.tz_convert,../reference/api/pandas.Timestamp.tz_convert +generated/pandas.Timestamp.tz,../reference/api/pandas.Timestamp.tz +generated/pandas.Timestamp.tzinfo,../reference/api/pandas.Timestamp.tzinfo +generated/pandas.Timestamp.tz_localize,../reference/api/pandas.Timestamp.tz_localize +generated/pandas.Timestamp.tzname,../reference/api/pandas.Timestamp.tzname +generated/pandas.Timestamp.utcfromtimestamp,../reference/api/pandas.Timestamp.utcfromtimestamp +generated/pandas.Timestamp.utcnow,../reference/api/pandas.Timestamp.utcnow +generated/pandas.Timestamp.utcoffset,../reference/api/pandas.Timestamp.utcoffset +generated/pandas.Timestamp.utctimetuple,../reference/api/pandas.Timestamp.utctimetuple +generated/pandas.Timestamp.value,../reference/api/pandas.Timestamp.value +generated/pandas.Timestamp.weekday,../reference/api/pandas.Timestamp.weekday +generated/pandas.Timestamp.weekday_name,../reference/api/pandas.Timestamp.weekday_name +generated/pandas.Timestamp.week,../reference/api/pandas.Timestamp.week +generated/pandas.Timestamp.weekofyear,../reference/api/pandas.Timestamp.weekofyear +generated/pandas.Timestamp.year,../reference/api/pandas.Timestamp.year +generated/pandas.to_datetime,../reference/api/pandas.to_datetime +generated/pandas.to_numeric,../reference/api/pandas.to_numeric +generated/pandas.to_timedelta,../reference/api/pandas.to_timedelta +generated/pandas.tseries.frequencies.to_offset,../reference/api/pandas.tseries.frequencies.to_offset +generated/pandas.unique,../reference/api/pandas.unique +generated/pandas.util.hash_array,../reference/api/pandas.util.hash_array +generated/pandas.util.hash_pandas_object,../reference/api/pandas.util.hash_pandas_object +generated/pandas.wide_to_long,../reference/api/pandas.wide_to_long + +# Cached searches +reference/api/pandas.DataFrame.from_csv,pandas.read_csv diff --git a/doc/source/_static/banklist.html b/doc/source/_static/banklist.html new file mode 100644 index 00000000..cb07c332 --- /dev/null +++ b/doc/source/_static/banklist.html @@ -0,0 +1,4885 @@ + + + + +FDIC: Failed Bank List + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip Header +
+
+
+ + +
+ + +

Federal Deposit
Insurance Corporation

+

Each depositor insured to at least $250,000 per insured bank

+
+ +
+
+ + + + + + +
+ +

Failed Bank List

+ +

The FDIC is often appointed as receiver for failed banks. This page contains useful information for the customers and vendors of these banks. This includes information on the acquiring bank (if applicable), how your accounts and loans are affected, and how vendors can file claims against the receivership. Failed Financial Institution Contact Search displays point of contact information related to failed banks.

+ +

This list includes banks which have failed since October 1, 2000. To search for banks that failed prior to those on this page, visit this link: Failures and Assistance Transactions

+ +

Failed Bank List - CSV file (Updated on Mondays. Also opens in Excel - Excel Help)

+ +

Due to the small screen size some information is no longer visible.
Full information available when viewed on a larger screen.

+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Bank NameCitySTCERTAcquiring InstitutionClosing DateUpdated Date
Banks of Wisconsin d/b/a Bank of KenoshaKenoshaWI35386North Shore Bank, FSBMay 31, 2013May 31, 2013
Central Arizona BankScottsdaleAZ34527Western State BankMay 14, 2013May 20, 2013
Sunrise BankValdostaGA58185Synovus BankMay 10, 2013May 21, 2013
Pisgah Community BankAshevilleNC58701Capital Bank, N.A.May 10, 2013May 14, 2013
Douglas County BankDouglasvilleGA21649Hamilton State BankApril 26, 2013May 16, 2013
Parkway BankLenoirNC57158CertusBank, National AssociationApril 26, 2013May 17, 2013
Chipola Community BankMariannaFL58034First Federal Bank of FloridaApril 19, 2013May 16, 2013
Heritage Bank of North FloridaOrange ParkFL26680FirstAtlantic BankApril 19, 2013May 16, 2013
First Federal BankLexingtonKY29594Your Community BankApril 19, 2013April 23, 2013
Gold Canyon BankGold CanyonAZ58066First Scottsdale Bank, National AssociationApril 5, 2013April 9, 2013
Frontier BankLaGrangeGA16431HeritageBank of the SouthMarch 8, 2013March 26, 2013
Covenant BankChicagoIL22476Liberty Bank and Trust CompanyFebruary 15, 2013March 4, 2013
1st Regents BankAndoverMN57157First Minnesota BankJanuary 18, 2013February 28, 2013
Westside Community BankUniversity PlaceWA33997Sunwest BankJanuary 11, 2013January 24, 2013
Community Bank of the OzarksSunrise BeachMO27331Bank of SullivanDecember 14, 2012January 24, 2013
Hometown Community BankBraseltonGA57928CertusBank, National AssociationNovember 16, 2012January 24, 2013
Citizens First National BankPrincetonIL3731Heartland Bank and Trust CompanyNovember 2, 2012January 24, 2013
Heritage Bank of FloridaLutzFL35009Centennial BankNovember 2, 2012January 24, 2013
NOVA BankBerwynPA27148No AcquirerOctober 26, 2012January 24, 2013
Excel BankSedaliaMO19189Simmons First National BankOctober 19, 2012January 24, 2013
First East Side Savings BankTamaracFL28144Stearns Bank N.A.October 19, 2012January 24, 2013
GulfSouth Private BankDestinFL58073SmartBankOctober 19, 2012January 24, 2013
First United BankCreteIL20685Old Plank Trail Community Bank, National AssociationSeptember 28, 2012November 15, 2012
Truman BankSt. LouisMO27316Simmons First National BankSeptember 14, 2012December 17, 2012
First Commercial BankBloomingtonMN35246Republic Bank & Trust CompanySeptember 7, 2012December 17, 2012
Waukegan Savings BankWaukeganIL28243First Midwest BankAugust 3, 2012October 11, 2012
Jasper Banking CompanyJasperGA16240Stearns Bank N.A.July 27, 2012December 17, 2012
Second Federal Savings and Loan Association of ChicagoChicagoIL27986Hinsdale Bank & Trust CompanyJuly 20, 2012January 14, 2013
Heartland BankLeawoodKS1361Metcalf BankJuly 20, 2012December 17, 2012
First Cherokee State BankWoodstockGA32711Community & Southern BankJuly 20, 2012October 31, 2012
Georgia Trust BankBufordGA57847Community & Southern BankJuly 20, 2012December 17, 2012
The Royal Palm Bank of FloridaNaplesFL57096First National Bank of the Gulf CoastJuly 20, 2012January 7, 2013
Glasgow Savings BankGlasgowMO1056Regional Missouri BankJuly 13, 2012October 11, 2012
Montgomery Bank & TrustAileyGA19498Ameris BankJuly 6, 2012October 31, 2012
The Farmers Bank of LynchburgLynchburgTN1690Clayton Bank and TrustJune 15, 2012October 31, 2012
Security Exchange BankMariettaGA35299Fidelity BankJune 15, 2012October 10, 2012
Putnam State BankPalatkaFL27405Harbor Community BankJune 15, 2012October 10, 2012
Waccamaw BankWhitevilleNC34515First Community BankJune 8, 2012November 8, 2012
Farmers' and Traders' State BankShabbonaIL9257First State BankJune 8, 2012October 10, 2012
Carolina Federal Savings BankCharlestonSC35372Bank of North CarolinaJune 8, 2012October 31, 2012
First Capital BankKingfisherOK416F & M BankJune 8, 2012October 10, 2012
Alabama Trust Bank, National AssociationSylacaugaAL35224Southern States BankMay 18, 2012May 20, 2013
Security Bank, National AssociationNorth LauderdaleFL23156Banesco USAMay 4, 2012October 31, 2012
Palm Desert National BankPalm DesertCA23632Pacific Premier BankApril 27, 2012May 17, 2013
Plantation Federal BankPawleys IslandSC32503First Federal BankApril 27, 2012May 17, 2013
Inter Savings Bank, fsb D/B/A InterBank, fsbMaple GroveMN31495Great Southern BankApril 27, 2012May 17, 2013
HarVest Bank of MarylandGaithersburgMD57766SonabankApril 27, 2012May 17, 2013
Bank of the Eastern ShoreCambridgeMD26759No AcquirerApril 27, 2012October 17, 2012
Fort Lee Federal Savings Bank, FSBFort LeeNJ35527Alma BankApril 20, 2012May 17, 2013
Fidelity BankDearbornMI33883The Huntington National BankMarch 30, 2012May 16, 2013
Premier BankWilmetteIL35419International Bank of ChicagoMarch 23, 2012October 17, 2012
Covenant Bank & TrustRock SpringGA58068Stearns Bank, N.A.March 23, 2012October 31, 2012
New City BankChicagoIL57597No AcquirerMarch 9, 2012October 29, 2012
Global Commerce BankDoravilleGA34046Metro City BankMarch 2, 2012October 31, 2012
Home Savings of AmericaLittle FallsMN29178No AcquirerFebruary 24, 2012December 17, 2012
Central Bank of GeorgiaEllavilleGA5687Ameris BankFebruary 24, 2012August 9, 2012
SCB BankShelbyvilleIN29761First Merchants Bank, National AssociationFebruary 10, 2012March 25, 2013
Charter National Bank and TrustHoffman EstatesIL23187Barrington Bank & Trust Company, National AssociationFebruary 10, 2012March 25, 2013
BankEastKnoxvilleTN19869U.S.Bank National AssociationJanuary 27, 2012March 8, 2013
Patriot Bank MinnesotaForest LakeMN34823First Resource BankJanuary 27, 2012September 12, 2012
Tennessee Commerce BankFranklinTN35296Republic Bank & Trust CompanyJanuary 27, 2012November 20, 2012
First Guaranty Bank and Trust Company of JacksonvilleJacksonvilleFL16579CenterState Bank of Florida, N.A.January 27, 2012September 12, 2012
American Eagle Savings BankBoothwynPA31581Capital Bank, N.A.January 20, 2012January 25, 2013
The First State BankStockbridgeGA19252Hamilton State BankJanuary 20, 2012January 25, 2013
Central Florida State BankBelleviewFL57186CenterState Bank of Florida, N.A.January 20, 2012January 25, 2013
Western National BankPhoenixAZ57917Washington FederalDecember 16, 2011August 13, 2012
Premier Community Bank of the Emerald CoastCrestviewFL58343Summit BankDecember 16, 2011September 12, 2012
Central Progressive BankLacombeLA19657First NBC BankNovember 18, 2011August 13, 2012
Polk County BankJohnstonIA14194Grinnell State BankNovember 18, 2011August 15, 2012
Community Bank of RockmartRockmartGA57860Century Bank of GeorgiaNovember 10, 2011August 13, 2012
SunFirst BankSaint GeorgeUT57087Cache Valley BankNovember 4, 2011November 16, 2012
Mid City Bank, Inc.OmahaNE19397Premier BankNovember 4, 2011August 15, 2012
All American BankDes PlainesIL57759International Bank of ChicagoOctober 28, 2011August 15, 2012
Community Banks of ColoradoGreenwood VillageCO21132Bank Midwest, N.A.October 21, 2011January 2, 2013
Community Capital BankJonesboroGA57036State Bank and Trust CompanyOctober 21, 2011November 8, 2012
Decatur First BankDecaturGA34392Fidelity BankOctober 21, 2011November 8, 2012
Old Harbor BankClearwaterFL575371st United BankOctober 21, 2011November 8, 2012
Country BankAledoIL35395Blackhawk Bank & TrustOctober 14, 2011August 15, 2012
First State BankCranfordNJ58046Northfield BankOctober 14, 2011November 8, 2012
Blue Ridge Savings Bank, Inc.AshevilleNC32347Bank of North CarolinaOctober 14, 2011November 8, 2012
Piedmont Community BankGrayGA57256State Bank and Trust CompanyOctober 14, 2011January 22, 2013
Sun Security BankEllingtonMO20115Great Southern BankOctober 7, 2011November 7, 2012
The RiverBankWyomingMN10216Central BankOctober 7, 2011November 7, 2012
First International BankPlanoTX33513American First National BankSeptember 30, 2011October 9, 2012
Citizens Bank of Northern CaliforniaNevada CityCA33983Tri Counties BankSeptember 23, 2011October 9, 2012
Bank of the CommonwealthNorfolkVA20408Southern Bank and Trust CompanySeptember 23, 2011October 9, 2012
The First National Bank of FloridaMiltonFL25155CharterBankSeptember 9, 2011September 6, 2012
CreekSide BankWoodstockGA58226Georgia Commerce BankSeptember 2, 2011September 6, 2012
Patriot Bank of GeorgiaCummingGA58273Georgia Commerce BankSeptember 2, 2011November 2, 2012
First Choice BankGenevaIL57212Inland Bank & TrustAugust 19, 2011August 15, 2012
First Southern National BankStatesboroGA57239Heritage Bank of the SouthAugust 19, 2011November 2, 2012
Lydian Private BankPalm BeachFL35356Sabadell United Bank, N.A.August 19, 2011November 2, 2012
Public Savings BankHuntingdon ValleyPA34130Capital Bank, N.A.August 18, 2011August 15, 2012
The First National Bank of OlatheOlatheKS4744Enterprise Bank & TrustAugust 12, 2011August 23, 2012
Bank of WhitmanColfaxWA22528Columbia State BankAugust 5, 2011August 16, 2012
Bank of ShorewoodShorewoodIL22637Heartland Bank and Trust CompanyAugust 5, 2011August 16, 2012
Integra Bank National AssociationEvansvilleIN4392Old National BankJuly 29, 2011August 16, 2012
BankMeridian, N.A.ColumbiaSC58222SCBT National AssociationJuly 29, 2011November 2, 2012
Virginia Business BankRichmondVA58283Xenith BankJuly 29, 2011October 9, 2012
Bank of ChoiceGreeleyCO2994Bank Midwest, N.A.July 22, 2011September 12, 2012
LandMark Bank of FloridaSarasotaFL35244American Momentum BankJuly 22, 2011November 2, 2012
Southshore Community BankApollo BeachFL58056American Momentum BankJuly 22, 2011November 2, 2012
Summit BankPrescottAZ57442The Foothills BankJuly 15, 2011August 16, 2012
First Peoples BankPort St. LucieFL34870Premier American Bank, N.A.July 15, 2011November 2, 2012
High Trust BankStockbridgeGA19554Ameris BankJuly 15, 2011November 2, 2012
One Georgia BankAtlantaGA58238Ameris BankJuly 15, 2011November 2, 2012
Signature BankWindsorCO57835Points West Community BankJuly 8, 2011October 26, 2012
Colorado Capital BankCastle RockCO34522First-Citizens Bank & Trust CompanyJuly 8, 2011January 15, 2013
First Chicago Bank & TrustChicagoIL27935Northbrook Bank & Trust CompanyJuly 8, 2011September 9, 2012
Mountain Heritage BankClaytonGA57593First American Bank and Trust CompanyJune 24, 2011November 2, 2012
First Commercial Bank of Tampa BayTampaFL27583Stonegate BankJune 17, 2011November 2, 2012
McIntosh State BankJacksonGA19237Hamilton State BankJune 17, 2011November 2, 2012
Atlantic Bank and TrustCharlestonSC58420First Citizens Bank and Trust Company, Inc.June 3, 2011October 31, 2012
First Heritage BankSnohomishWA23626Columbia State BankMay 27, 2011January 28, 2013
Summit BankBurlingtonWA513Columbia State BankMay 20, 2011January 22, 2013
First Georgia Banking CompanyFranklinGA57647CertusBank, National AssociationMay 20, 2011November 13, 2012
Atlantic Southern BankMaconGA57213CertusBank, National AssociationMay 20, 2011October 31, 2012
Coastal BankCocoa BeachFL34898Florida Community Bank, a division of Premier American Bank, N.A.May 6, 2011November 30, 2012
Community Central BankMount ClemensMI34234Talmer Bank & TrustApril 29, 2011August 16, 2012
The Park Avenue BankValdostaGA19797Bank of the OzarksApril 29, 2011November 30, 2012
First Choice Community BankDallasGA58539Bank of the OzarksApril 29, 2011January 22, 2013
Cortez Community BankBrooksvilleFL57625Florida Community Bank, a division of Premier American Bank, N.A.April 29, 2011November 30, 2012
First National Bank of Central FloridaWinter ParkFL26297Florida Community Bank, a division of Premier American Bank, N.A.April 29, 2011November 30, 2012
Heritage Banking GroupCarthageMS14273Trustmark National BankApril 15, 2011November 30, 2012
Rosemount National BankRosemountMN24099Central BankApril 15, 2011August 16, 2012
Superior BankBirminghamAL17750Superior Bank, National AssociationApril 15, 2011November 30, 2012
Nexity BankBirminghamAL19794AloStar Bank of CommerceApril 15, 2011September 4, 2012
New Horizons BankEast EllijayGA57705Citizens South BankApril 15, 2011August 16, 2012
Bartow County BankCartersvilleGA21495Hamilton State BankApril 15, 2011January 22, 2013
Nevada Commerce BankLas VegasNV35418City National BankApril 8, 2011September 9, 2012
Western Springs National Bank and TrustWestern SpringsIL10086Heartland Bank and Trust CompanyApril 8, 2011January 22, 2013
The Bank of CommerceWood DaleIL34292Advantage National Bank GroupMarch 25, 2011January 22, 2013
Legacy BankMilwaukeeWI34818Seaway Bank and Trust CompanyMarch 11, 2011September 12, 2012
First National Bank of DavisDavisOK4077The Pauls Valley National BankMarch 11, 2011August 20, 2012
Valley Community BankSt. CharlesIL34187First State BankFebruary 25, 2011September 12, 2012
San Luis Trust Bank, FSBSan Luis ObispoCA34783First California BankFebruary 18, 2011August 20, 2012
Charter Oak BankNapaCA57855Bank of MarinFebruary 18, 2011September 12, 2012
Citizens Bank of EffinghamSpringfieldGA34601Heritage Bank of the SouthFebruary 18, 2011November 2, 2012
Habersham BankClarkesvilleGA151SCBT National AssociationFebruary 18, 2011November 2, 2012
Canyon National BankPalm SpringsCA34692Pacific Premier BankFebruary 11, 2011September 12, 2012
Badger State BankCassvilleWI13272Royal BankFebruary 11, 2011September 12, 2012
Peoples State BankHamtramckMI14939First Michigan BankFebruary 11, 2011January 22, 2013
Sunshine State Community BankPort OrangeFL35478Premier American Bank, N.A.February 11, 2011November 2, 2012
Community First Bank ChicagoChicagoIL57948Northbrook Bank & Trust CompanyFebruary 4, 2011August 20, 2012
North Georgia BankWatkinsvilleGA35242BankSouthFebruary 4, 2011November 2, 2012
American Trust BankRoswellGA57432Renasant BankFebruary 4, 2011October 31, 2012
First Community BankTaosNM12261U.S. Bank, N.A.January 28, 2011September 12, 2012
FirsTier BankLouisvilleCO57646No AcquirerJanuary 28, 2011September 12, 2012
Evergreen State BankStoughtonWI5328McFarland State BankJanuary 28, 2011September 12, 2012
The First State BankCamargoOK2303Bank 7January 28, 2011September 12, 2012
United Western BankDenverCO31293First-Citizens Bank & Trust CompanyJanuary 21, 2011September 12, 2012
The Bank of AshevilleAshevilleNC34516First BankJanuary 21, 2011November 2, 2012
CommunitySouth Bank & TrustEasleySC57868CertusBank, National AssociationJanuary 21, 2011November 2, 2012
Enterprise Banking CompanyMcDonoughGA19758No AcquirerJanuary 21, 2011November 2, 2012
Oglethorpe BankBrunswickGA57440Bank of the OzarksJanuary 14, 2011November 2, 2012
Legacy BankScottsdaleAZ57820Enterprise Bank & TrustJanuary 7, 2011September 12, 2012
First Commercial Bank of FloridaOrlandoFL34965First Southern BankJanuary 7, 2011November 2, 2012
Community National BankLino LakesMN23306Farmers & Merchants Savings BankDecember 17, 2010August 20, 2012
First Southern BankBatesvilleAR58052Southern BankDecember 17, 2010August 20, 2012
United Americas Bank, N.A.AtlantaGA35065State Bank and Trust CompanyDecember 17, 2010November 2, 2012
Appalachian Community Bank, FSBMcCaysvilleGA58495Peoples Bank of East TennesseeDecember 17, 2010October 31, 2012
Chestatee State BankDawsonvilleGA34578Bank of the OzarksDecember 17, 2010November 2, 2012
The Bank of Miami,N.A.Coral GablesFL190401st United BankDecember 17, 2010November 2, 2012
Earthstar BankSouthamptonPA35561Polonia BankDecember 10, 2010August 20, 2012
Paramount BankFarmington HillsMI34673Level One BankDecember 10, 2010August 20, 2012
First Banking CenterBurlingtonWI5287First Michigan BankNovember 19, 2010August 20, 2012
Allegiance Bank of North AmericaBala CynwydPA35078VIST BankNovember 19, 2010August 20, 2012
Gulf State Community BankCarrabelleFL20340Centennial BankNovember 19, 2010November 2, 2012
Copper Star BankScottsdaleAZ35463Stearns Bank, N.A.November 12, 2010August 20, 2012
Darby Bank & Trust Co.VidaliaGA14580Ameris BankNovember 12, 2010January 15, 2013
Tifton Banking CompanyTiftonGA57831Ameris BankNovember 12, 2010November 2, 2012
First Vietnamese American Bank
In Vietnamese
WestminsterCA57885Grandpoint BankNovember 5, 2010September 12, 2012
Pierce Commercial BankTacomaWA34411Heritage BankNovember 5, 2010August 20, 2012
Western Commercial BankWoodland HillsCA58087First California BankNovember 5, 2010September 12, 2012
K BankRandallstownMD31263Manufacturers and Traders Trust Company (M&T Bank)November 5, 2010August 20, 2012
First Arizona Savings, A FSBScottsdaleAZ32582No AcquirerOctober 22, 2010August 20, 2012
Hillcrest BankOverland ParkKS22173Hillcrest Bank, N.A.October 22, 2010August 20, 2012
First Suburban National BankMaywoodIL16089Seaway Bank and Trust CompanyOctober 22, 2010August 20, 2012
The First National Bank of BarnesvilleBarnesvilleGA2119United BankOctober 22, 2010November 2, 2012
The Gordon BankGordonGA33904Morris BankOctober 22, 2010November 2, 2012
Progress Bank of FloridaTampaFL32251Bay Cities BankOctober 22, 2010November 2, 2012
First Bank of JacksonvilleJacksonvilleFL27573Ameris BankOctober 22, 2010November 2, 2012
Premier BankJefferson CityMO34016Providence BankOctober 15, 2010August 20, 2012
WestBridge Bank and Trust CompanyChesterfieldMO58205Midland States BankOctober 15, 2010August 20, 2012
Security Savings Bank, F.S.B.OlatheKS30898Simmons First National BankOctober 15, 2010August 20, 2012
Shoreline BankShorelineWA35250GBC International BankOctober 1, 2010August 20, 2012
Wakulla BankCrawfordvilleFL21777Centennial BankOctober 1, 2010November 2, 2012
North County BankArlingtonWA35053Whidbey Island BankSeptember 24, 2010August 20, 2012
Haven Trust Bank FloridaPonte Vedra BeachFL58308First Southern BankSeptember 24, 2010November 5, 2012
Maritime Savings BankWest AllisWI28612North Shore Bank, FSBSeptember 17, 2010August 20, 2012
Bramble Savings BankMilfordOH27808Foundation BankSeptember 17, 2010August 20, 2012
The Peoples BankWinderGA182Community & Southern BankSeptember 17, 2010November 5, 2012
First Commerce Community BankDouglasvilleGA57448Community & Southern BankSeptember 17, 2010January 15, 2013
Bank of EllijayEllijayGA58197Community & Southern BankSeptember 17, 2010January 15, 2013
ISN BankCherry HillNJ57107Customers BankSeptember 17, 2010August 22, 2012
Horizon BankBradentonFL35061Bank of the OzarksSeptember 10, 2010November 5, 2012
Sonoma Valley BankSonomaCA27259Westamerica BankAugust 20, 2010September 12, 2012
Los Padres BankSolvangCA32165Pacific Western BankAugust 20, 2010September 12, 2012
Butte Community BankChicoCA33219Rabobank, N.A.August 20, 2010September 12, 2012
Pacific State BankStocktonCA27090Rabobank, N.A.August 20, 2010September 12, 2012
ShoreBankChicagoIL15640Urban Partnership BankAugust 20, 2010May 16, 2013
Imperial Savings and Loan AssociationMartinsvilleVA31623River Community Bank, N.A.August 20, 2010August 24, 2012
Independent National BankOcalaFL27344CenterState Bank of Florida, N.A.August 20, 2010November 5, 2012
Community National Bank at BartowBartowFL25266CenterState Bank of Florida, N.A.August 20, 2010November 5, 2012
Palos Bank and Trust CompanyPalos HeightsIL17599First Midwest BankAugust 13, 2010August 22, 2012
Ravenswood BankChicagoIL34231Northbrook Bank & Trust CompanyAugust 6, 2010August 22, 2012
LibertyBankEugeneOR31964Home Federal BankJuly 30, 2010August 22, 2012
The Cowlitz BankLongviewWA22643Heritage BankJuly 30, 2010August 22, 2012
Coastal Community BankPanama City BeachFL9619Centennial BankJuly 30, 2010November 5, 2012
Bayside Savings BankPort Saint JoeFL57669Centennial BankJuly 30, 2010November 5, 2012
Northwest Bank & TrustAcworthGA57658State Bank and Trust CompanyJuly 30, 2010November 5, 2012
Home Valley BankCave JunctionOR23181South Valley Bank & TrustJuly 23, 2010September 12, 2012
SouthwestUSA BankLas VegasNV35434Plaza BankJuly 23, 2010August 22, 2012
Community Security BankNew PragueMN34486RoundbankJuly 23, 2010September 12, 2012
Thunder BankSylvan GroveKS10506The Bennington State BankJuly 23, 2010September 13, 2012
Williamsburg First National BankKingstreeSC17837First Citizens Bank and Trust Company, Inc.July 23, 2010November 5, 2012
Crescent Bank and Trust CompanyJasperGA27559Renasant BankJuly 23, 2010November 5, 2012
Sterling BankLantanaFL32536IBERIABANKJuly 23, 2010November 5, 2012
Mainstreet Savings Bank, FSBHastingsMI28136Commercial BankJuly 16, 2010September 13, 2012
Olde Cypress Community BankClewistonFL28864CenterState Bank of Florida, N.A.July 16, 2010November 5, 2012
Turnberry BankAventuraFL32280NAFH National BankJuly 16, 2010November 5, 2012
Metro Bank of Dade CountyMiamiFL25172NAFH National BankJuly 16, 2010November 5, 2012
First National Bank of the SouthSpartanburgSC35383NAFH National BankJuly 16, 2010November 5, 2012
Woodlands BankBlufftonSC32571Bank of the OzarksJuly 16, 2010November 5, 2012
Home National BankBlackwellOK11636RCB BankJuly 9, 2010December 10, 2012
USA BankPort ChesterNY58072New Century BankJuly 9, 2010September 14, 2012
Ideal Federal Savings BankBaltimoreMD32456No AcquirerJuly 9, 2010September 14, 2012
Bay National BankBaltimoreMD35462Bay Bank, FSBJuly 9, 2010January 15, 2013
High Desert State BankAlbuquerqueNM35279First American BankJune 25, 2010September 14, 2012
First National BankSavannahGA34152The Savannah Bank, N.A.June 25, 2010November 5, 2012
Peninsula BankEnglewoodFL26563Premier American Bank, N.A.June 25, 2010November 5, 2012
Nevada Security BankRenoNV57110Umpqua BankJune 18, 2010August 23, 2012
Washington First International BankSeattleWA32955East West BankJune 11, 2010September 14, 2012
TierOne BankLincolnNE29341Great Western BankJune 4, 2010September 14, 2012
Arcola Homestead Savings BankArcolaIL31813No AcquirerJune 4, 2010September 14, 2012
First National BankRosedaleMS15814The Jefferson BankJune 4, 2010November 5, 2012
Sun West BankLas VegasNV34785City National BankMay 28, 2010September 14, 2012
Granite Community Bank, NAGranite BayCA57315Tri Counties BankMay 28, 2010September 14, 2012
Bank of Florida - TampaTampaFL57814EverBankMay 28, 2010November 5, 2012
Bank of Florida - SouthwestNaplesFL35106EverBankMay 28, 2010November 5, 2012
Bank of Florida - SoutheastFort LauderdaleFL57360EverBankMay 28, 2010November 5, 2012
Pinehurst BankSaint PaulMN57735Coulee BankMay 21, 2010October 26, 2012
Midwest Bank and Trust CompanyElmwood ParkIL18117FirstMerit Bank, N.A.May 14, 2010August 23, 2012
Southwest Community BankSpringfieldMO34255Simmons First National BankMay 14, 2010August 23, 2012
New Liberty BankPlymouthMI35586Bank of Ann ArborMay 14, 2010August 23, 2012
Satilla Community BankSaint MarysGA35114Ameris BankMay 14, 2010November 5, 2012
1st Pacific Bank of CaliforniaSan DiegoCA35517City National BankMay 7, 2010December 13, 2012
Towne Bank of ArizonaMesaAZ57697Commerce Bank of ArizonaMay 7, 2010August 23, 2012
Access BankChamplinMN16476PrinsBankMay 7, 2010August 23, 2012
The Bank of BonifayBonifayFL14246First Federal Bank of FloridaMay 7, 2010November 5, 2012
Frontier BankEverettWA22710Union Bank, N.A.April 30, 2010January 15, 2013
BC National BanksButlerMO17792Community First BankApril 30, 2010August 23, 2012
Champion BankCreve CoeurMO58362BankLibertyApril 30, 2010August 23, 2012
CF BancorpPort HuronMI30005First Michigan BankApril 30, 2010January 15, 2013
Westernbank Puerto Rico
En Espanol
MayaguezPR31027Banco Popular de Puerto RicoApril 30, 2010November 5, 2012
R-G Premier Bank of Puerto Rico
En Espanol
Hato ReyPR32185Scotiabank de Puerto RicoApril 30, 2010November 5, 2012
Eurobank
En Espanol
San JuanPR27150Oriental Bank and TrustApril 30, 2010November 5, 2012
Wheatland BankNapervilleIL58429Wheaton Bank & TrustApril 23, 2010August 23, 2012
Peotone Bank and Trust CompanyPeotoneIL10888First Midwest BankApril 23, 2010August 23, 2012
Lincoln Park Savings BankChicagoIL30600Northbrook Bank & Trust CompanyApril 23, 2010August 23, 2012
New Century BankChicagoIL34821MB Financial Bank, N.A.April 23, 2010August 23, 2012
Citizens Bank and Trust Company of ChicagoChicagoIL34658Republic Bank of ChicagoApril 23, 2010August 23, 2012
Broadway BankChicagoIL22853MB Financial Bank, N.A.April 23, 2010August 23, 2012
Amcore Bank, National AssociationRockfordIL3735Harris N.A.April 23, 2010August 23, 2012
City BankLynnwoodWA21521Whidbey Island BankApril 16, 2010September 14, 2012
Tamalpais BankSan RafaelCA33493Union Bank, N.A.April 16, 2010August 23, 2012
Innovative BankOaklandCA23876Center BankApril 16, 2010August 23, 2012
Butler BankLowellMA26619People's United BankApril 16, 2010August 23, 2012
Riverside National Bank of FloridaFort PierceFL24067TD Bank, N.A.April 16, 2010November 5, 2012
AmericanFirst BankClermontFL57724TD Bank, N.A.April 16, 2010October 31, 2012
First Federal Bank of North FloridaPalatkaFL28886TD Bank, N.A.April 16, 2010January 15, 2013
Lakeside Community BankSterling HeightsMI34878No AcquirerApril 16, 2010August 23, 2012
Beach First National BankMyrtle BeachSC34242Bank of North CarolinaApril 9, 2010November 5, 2012
Desert Hills BankPhoenixAZ57060New York Community BankMarch 26, 2010August 23, 2012
Unity National BankCartersvilleGA34678Bank of the OzarksMarch 26, 2010September 14, 2012
Key West BankKey WestFL34684Centennial BankMarch 26, 2010August 23, 2012
McIntosh Commercial BankCarrolltonGA57399CharterBankMarch 26, 2010August 23, 2012
State Bank of AuroraAuroraMN8221Northern State BankMarch 19, 2010August 23, 2012
First Lowndes BankFort DepositAL24957First Citizens BankMarch 19, 2010August 23, 2012
Bank of HiawasseeHiawasseeGA10054Citizens South BankMarch 19, 2010August 23, 2012
Appalachian Community BankEllijayGA33989Community & Southern BankMarch 19, 2010October 31, 2012
Advanta Bank Corp.DraperUT33535No AcquirerMarch 19, 2010September 14, 2012
Century Security BankDuluthGA58104Bank of UpsonMarch 19, 2010August 23, 2012
American National BankParmaOH18806The National Bank and Trust CompanyMarch 19, 2010August 23, 2012
Statewide BankCovingtonLA29561Home BankMarch 12, 2010August 23, 2012
Old Southern BankOrlandoFL58182Centennial BankMarch 12, 2010August 23, 2012
The Park Avenue BankNew YorkNY27096Valley National BankMarch 12, 2010August 23, 2012
LibertyPointe BankNew YorkNY58071Valley National BankMarch 11, 2010August 23, 2012
Centennial BankOgdenUT34430No AcquirerMarch 5, 2010September 14, 2012
Waterfield BankGermantownMD34976No AcquirerMarch 5, 2010August 23, 2012
Bank of IllinoisNormalIL9268Heartland Bank and Trust CompanyMarch 5, 2010August 23, 2012
Sun American BankBoca RatonFL27126First-Citizens Bank & Trust CompanyMarch 5, 2010August 23, 2012
Rainier Pacific BankTacomaWA38129Umpqua BankFebruary 26, 2010August 23, 2012
Carson River Community BankCarson CityNV58352Heritage Bank of NevadaFebruary 26, 2010January 15, 2013
La Jolla Bank, FSBLa JollaCA32423OneWest Bank, FSBFebruary 19, 2010August 24, 2012
George Washington Savings BankOrland ParkIL29952FirstMerit Bank, N.A.February 19, 2010August 24, 2012
The La Coste National BankLa CosteTX3287Community National BankFebruary 19, 2010September 14, 2012
Marco Community BankMarco IslandFL57586Mutual of Omaha BankFebruary 19, 2010August 24, 2012
1st American State Bank of MinnesotaHancockMN15448Community Development Bank, FSBFebruary 5, 2010August 24, 2012
American Marine BankBainbridge IslandWA16730Columbia State BankJanuary 29, 2010August 24, 2012
First Regional BankLos AngelesCA23011First-Citizens Bank & Trust CompanyJanuary 29, 2010August 24, 2012
Community Bank and TrustCorneliaGA5702SCBT National AssociationJanuary 29, 2010January 15, 2013
Marshall Bank, N.A.HallockMN16133United Valley BankJanuary 29, 2010August 23, 2012
Florida Community BankImmokaleeFL5672Premier American Bank, N.A.January 29, 2010January 15, 2013
First National Bank of GeorgiaCarrolltonGA16480Community & Southern BankJanuary 29, 2010December 13, 2012
Columbia River BankThe DallesOR22469Columbia State BankJanuary 22, 2010September 14, 2012
Evergreen BankSeattleWA20501Umpqua BankJanuary 22, 2010January 15, 2013
Charter BankSanta FeNM32498Charter BankJanuary 22, 2010August 23, 2012
Bank of LeetonLeetonMO8265Sunflower Bank, N.A.January 22, 2010January 15, 2013
Premier American BankMiamiFL57147Premier American Bank, N.A.January 22, 2010December 13, 2012
Barnes Banking CompanyKaysvilleUT1252No AcquirerJanuary 15, 2010August 23, 2012
St. Stephen State BankSt. StephenMN17522First State Bank of St. JosephJanuary 15, 2010August 23, 2012
Town Community Bank & TrustAntiochIL34705First American BankJanuary 15, 2010August 23, 2012
Horizon BankBellinghamWA22977Washington Federal Savings and Loan AssociationJanuary 8, 2010August 23, 2012
First Federal Bank of California, F.S.B.Santa MonicaCA28536OneWest Bank, FSBDecember 18, 2009August 23, 2012
Imperial Capital BankLa JollaCA26348City National BankDecember 18, 2009September 5, 2012
Independent Bankers' BankSpringfieldIL26820The Independent BankersBank (TIB)December 18, 2009August 23, 2012
New South Federal Savings BankIrondaleAL32276Beal BankDecember 18, 2009August 23, 2012
Citizens State BankNew BaltimoreMI1006No AcquirerDecember 18, 2009November 5, 2012
Peoples First Community BankPanama CityFL32167Hancock BankDecember 18, 2009November 5, 2012
RockBridge Commercial BankAtlantaGA58315No AcquirerDecember 18, 2009November 5, 2012
SolutionsBankOverland ParkKS4731Arvest BankDecember 11, 2009August 23, 2012
Valley Capital Bank, N.A.MesaAZ58399Enterprise Bank & TrustDecember 11, 2009August 23, 2012
Republic Federal Bank, N.A.MiamiFL228461st United BankDecember 11, 2009November 5, 2012
Greater Atlantic BankRestonVA32583SonabankDecember 4, 2009November 5, 2012
Benchmark BankAuroraIL10440MB Financial Bank, N.A.December 4, 2009August 23, 2012
AmTrust BankClevelandOH29776New York Community BankDecember 4, 2009November 5, 2012
The Tattnall BankReidsvilleGA12080Heritage Bank of the SouthDecember 4, 2009November 5, 2012
First Security National BankNorcrossGA26290State Bank and Trust CompanyDecember 4, 2009November 5, 2012
The Buckhead Community BankAtlantaGA34663State Bank and Trust CompanyDecember 4, 2009November 5, 2012
Commerce Bank of Southwest FloridaFort MyersFL58016Central BankNovember 20, 2009November 5, 2012
Pacific Coast National BankSan ClementeCA57914Sunwest BankNovember 13, 2009August 22, 2012
Orion BankNaplesFL22427IBERIABANKNovember 13, 2009November 5, 2012
Century Bank, F.S.B.SarasotaFL32267IBERIABANKNovember 13, 2009August 22, 2012
United Commercial BankSan FranciscoCA32469East West BankNovember 6, 2009November 5, 2012
Gateway Bank of St. LouisSt. LouisMO19450Central Bank of Kansas CityNovember 6, 2009August 22, 2012
Prosperan BankOakdaleMN35074Alerus Financial, N.A.November 6, 2009August 22, 2012
Home Federal Savings BankDetroitMI30329Liberty Bank and Trust CompanyNovember 6, 2009August 22, 2012
United Security BankSpartaGA22286Ameris BankNovember 6, 2009January 15, 2013
North Houston BankHoustonTX18776U.S. Bank N.A.October 30, 2009August 22, 2012
Madisonville State BankMadisonvilleTX33782U.S. Bank N.A.October 30, 2009August 22, 2012
Citizens National BankTeagueTX25222U.S. Bank N.A.October 30, 2009August 22, 2012
Park National BankChicagoIL11677U.S. Bank N.A.October 30, 2009August 22, 2012
Pacific National BankSan FranciscoCA30006U.S. Bank N.A.October 30, 2009August 22, 2012
California National BankLos AngelesCA34659U.S. Bank N.A.October 30, 2009September 5, 2012
San Diego National BankSan DiegoCA23594U.S. Bank N.A.October 30, 2009August 22, 2012
Community Bank of LemontLemontIL35291U.S. Bank N.A.October 30, 2009January 15, 2013
Bank USA, N.A.PhoenixAZ32218U.S. Bank N.A.October 30, 2009August 22, 2012
First DuPage BankWestmontIL35038First Midwest BankOctober 23, 2009August 22, 2012
Riverview Community BankOtsegoMN57525Central BankOctober 23, 2009August 22, 2012
Bank of ElmwoodRacineWI18321Tri City National BankOctober 23, 2009August 22, 2012
Flagship National BankBradentonFL35044First Federal Bank of FloridaOctober 23, 2009August 22, 2012
Hillcrest Bank FloridaNaplesFL58336Stonegate BankOctober 23, 2009August 22, 2012
American United BankLawrencevilleGA57794Ameris BankOctober 23, 2009September 5, 2012
Partners BankNaplesFL57959Stonegate BankOctober 23, 2009January 15, 2013
San Joaquin BankBakersfieldCA23266Citizens Business BankOctober 16, 2009August 22, 2012
Southern Colorado National BankPuebloCO57263Legacy BankOctober 2, 2009September 5, 2012
Jennings State BankSpring GroveMN11416Central BankOctober 2, 2009August 21, 2012
Warren BankWarrenMI34824The Huntington National BankOctober 2, 2009August 21, 2012
Georgian BankAtlantaGA57151First Citizens Bank and Trust Company, Inc.September 25, 2009August 21, 2012
Irwin Union Bank, F.S.B.LouisvilleKY57068First Financial Bank, N.A.September 18, 2009September 5, 2012
Irwin Union Bank and Trust CompanyColumbusIN10100First Financial Bank, N.A.September 18, 2009August 21, 2012
Venture BankLaceyWA22868First-Citizens Bank & Trust CompanySeptember 11, 2009August 21, 2012
Brickwell Community BankWoodburyMN57736CorTrust Bank N.A.September 11, 2009January 15, 2013
Corus Bank, N.A.ChicagoIL13693MB Financial Bank, N.A.September 11, 2009August 21, 2012
First State BankFlagstaffAZ34875Sunwest BankSeptember 4, 2009January 15, 2013
Platinum Community BankRolling MeadowsIL35030No AcquirerSeptember 4, 2009August 21, 2012
Vantus BankSioux CityIN27732Great Southern BankSeptember 4, 2009August 21, 2012
InBankOak ForestIL20203MB Financial Bank, N.A.September 4, 2009August 21, 2012
First Bank of Kansas CityKansas CityMO25231Great American BankSeptember 4, 2009August 21, 2012
Affinity BankVenturaCA27197Pacific Western BankAugust 28, 2009August 21, 2012
Mainstreet BankForest LakeMN1909Central BankAugust 28, 2009August 21, 2012
Bradford BankBaltimoreMD28312Manufacturers and Traders Trust Company (M&T Bank)August 28, 2009January 15, 2013
Guaranty BankAustinTX32618BBVA CompassAugust 21, 2009August 21, 2012
CapitalSouth BankBirminghamAL22130IBERIABANKAugust 21, 2009January 15, 2013
First Coweta BankNewnanGA57702United BankAugust 21, 2009January 15, 2013
ebankAtlantaGA34682Stearns Bank, N.A.August 21, 2009August 21, 2012
Community Bank of NevadaLas VegasNV34043No AcquirerAugust 14, 2009August 21, 2012
Community Bank of ArizonaPhoenixAZ57645MidFirst BankAugust 14, 2009August 21, 2012
Union Bank, National AssociationGilbertAZ34485MidFirst BankAugust 14, 2009August 21, 2012
Colonial BankMontgomeryAL9609Branch Banking & Trust Company, (BB&T)August 14, 2009September 5, 2012
Dwelling House Savings and Loan AssociationPittsburghPA31559PNC Bank, N.A.August 14, 2009January 15, 2013
Community First BankPrinevilleOR23268Home Federal BankAugust 7, 2009January 15, 2013
Community National Bank of Sarasota CountyVeniceFL27183Stearns Bank, N.A.August 7, 2009August 20, 2012
First State BankSarasotaFL27364Stearns Bank, N.A.August 7, 2009August 20, 2012
Mutual BankHarveyIL18659United Central BankJuly 31, 2009August 20, 2012
First BankAmericanoElizabethNJ34270Crown BankJuly 31, 2009August 20, 2012
Peoples Community BankWest ChesterOH32288First Financial Bank, N.A.July 31, 2009August 20, 2012
Integrity BankJupiterFL57604Stonegate BankJuly 31, 2009August 20, 2012
First State Bank of AltusAltusOK9873Herring BankJuly 31, 2009August 20, 2012
Security Bank of Jones CountyGrayGA8486State Bank and Trust CompanyJuly 24, 2009August 20, 2012
Security Bank of Houston CountyPerryGA27048State Bank and Trust CompanyJuly 24, 2009August 20, 2012
Security Bank of Bibb CountyMaconGA27367State Bank and Trust CompanyJuly 24, 2009August 20, 2012
Security Bank of North MetroWoodstockGA57105State Bank and Trust CompanyJuly 24, 2009August 20, 2012
Security Bank of North FultonAlpharettaGA57430State Bank and Trust CompanyJuly 24, 2009August 20, 2012
Security Bank of Gwinnett CountySuwaneeGA57346State Bank and Trust CompanyJuly 24, 2009August 20, 2012
Waterford Village BankWilliamsvilleNY58065Evans Bank, N.A.July 24, 2009August 20, 2012
Temecula Valley BankTemeculaCA34341First-Citizens Bank & Trust CompanyJuly 17, 2009August 20, 2012
Vineyard BankRancho CucamongaCA23556California Bank & TrustJuly 17, 2009August 20, 2012
BankFirstSioux FallsSD34103Alerus Financial, N.A.July 17, 2009August 20, 2012
First Piedmont BankWinderGA34594First American Bank and Trust CompanyJuly 17, 2009January 15, 2013
Bank of WyomingThermopolisWY22754Central Bank & TrustJuly 10, 2009August 20, 2012
Founders BankWorthIL18390The PrivateBank and Trust CompanyJuly 2, 2009August 20, 2012
Millennium State Bank of TexasDallasTX57667State Bank of TexasJuly 2, 2009October 26, 2012
First National Bank of DanvilleDanvilleIL3644First Financial Bank, N.A.July 2, 2009August 20, 2012
Elizabeth State BankElizabethIL9262Galena State Bank and Trust CompanyJuly 2, 2009August 20, 2012
Rock River BankOregonIL15302The Harvard State BankJuly 2, 2009August 20, 2012
First State Bank of WinchesterWinchesterIL11710The First National Bank of BeardstownJuly 2, 2009August 20, 2012
John Warner BankClintonIL12093State Bank of LincolnJuly 2, 2009August 20, 2012
Mirae BankLos AngelesCA57332Wilshire State BankJune 26, 2009August 20, 2012
MetroPacific BankIrvineCA57893Sunwest BankJune 26, 2009August 20, 2012
Horizon BankPine CityMN9744Stearns Bank, N.A.June 26, 2009August 20, 2012
Neighborhood Community BankNewnanGA35285CharterBankJune 26, 2009August 20, 2012
Community Bank of West GeorgiaVilla RicaGA57436No AcquirerJune 26, 2009August 17, 2012
First National Bank of AnthonyAnthonyKS4614Bank of KansasJune 19, 2009August 17, 2012
Cooperative BankWilmingtonNC27837First BankJune 19, 2009August 17, 2012
Southern Community BankFayettevilleGA35251United Community BankJune 19, 2009August 17, 2012
Bank of LincolnwoodLincolnwoodIL17309Republic Bank of ChicagoJune 5, 2009August 17, 2012
Citizens National BankMacombIL5757Morton Community BankMay 22, 2009September 4, 2012
Strategic Capital BankChampaignIL35175Midland States BankMay 22, 2009September 4, 2012
BankUnited, FSBCoral GablesFL32247BankUnitedMay 21, 2009August 17, 2012
Westsound BankBremertonWA34843Kitsap BankMay 8, 2009September 4, 2012
America West BankLaytonUT35461Cache Valley BankMay 1, 2009August 17, 2012
Citizens Community BankRidgewoodNJ57563North Jersey Community BankMay 1, 2009September 4, 2012
Silverton Bank, NAAtlantaGA26535No AcquirerMay 1, 2009August 17, 2012
First Bank of IdahoKetchumID34396U.S. Bank, N.A.April 24, 2009August 17, 2012
First Bank of Beverly HillsCalabasasCA32069No AcquirerApril 24, 2009September 4, 2012
Michigan Heritage BankFarmington HillsMI34369Level One BankApril 24, 2009August 17, 2012
American Southern BankKennesawGA57943Bank of North GeorgiaApril 24, 2009August 17, 2012
Great Basin Bank of NevadaElkoNV33824Nevada State BankApril 17, 2009September 4, 2012
American Sterling BankSugar CreekMO8266Metcalf BankApril 17, 2009August 31, 2012
New Frontier BankGreeleyCO34881No AcquirerApril 10, 2009September 4, 2012
Cape Fear BankWilmingtonNC34639First Federal Savings and Loan AssociationApril 10, 2009August 17, 2012
Omni National BankAtlantaGA22238No AcquirerMarch 27, 2009August 17, 2012
TeamBank, NAPaolaKS4754Great Southern BankMarch 20, 2009August 17, 2012
Colorado National BankColorado SpringsCO18896Herring BankMarch 20, 2009August 17, 2012
FirstCity BankStockbridgeGA18243No AcquirerMarch 20, 2009August 17, 2012
Freedom Bank of GeorgiaCommerceGA57558Northeast Georgia BankMarch 6, 2009August 17, 2012
Security Savings BankHendersonNV34820Bank of NevadaFebruary 27, 2009September 7, 2012
Heritage Community BankGlenwoodIL20078MB Financial Bank, N.A.February 27, 2009August 17, 2012
Silver Falls BankSilvertonOR35399Citizens BankFebruary 20, 2009August 17, 2012
Pinnacle Bank of OregonBeavertonOR57342Washington Trust Bank of SpokaneFebruary 13, 2009August 17, 2012
Corn Belt Bank & Trust Co.PittsfieldIL16500The Carlinville National BankFebruary 13, 2009August 17, 2012
Riverside Bank of the Gulf CoastCape CoralFL34563TIB BankFebruary 13, 2009August 17, 2012
Sherman County BankLoup CityNE5431Heritage BankFebruary 13, 2009August 17, 2012
County BankMercedCA22574Westamerica BankFebruary 6, 2009September 4, 2012
Alliance BankCulver CityCA23124California Bank & TrustFebruary 6, 2009August 16, 2012
FirstBank Financial ServicesMcDonoughGA57017Regions BankFebruary 6, 2009August 16, 2012
Ocala National BankOcalaFL26538CenterState Bank of Florida, N.A.January 30, 2009September 4, 2012
Suburban FSBCroftonMD30763Bank of EssexJanuary 30, 2009August 16, 2012
MagnetBankSalt Lake CityUT58001No AcquirerJanuary 30, 2009August 16, 2012
1st Centennial BankRedlandsCA33025First California BankJanuary 23, 2009August 16, 2012
Bank of Clark CountyVancouverWA34959Umpqua BankJanuary 16, 2009August 16, 2012
National Bank of CommerceBerkeleyIL19733Republic Bank of ChicagoJanuary 16, 2009August 16, 2012
Sanderson State Bank
En Espanol
SandersonTX11568The Pecos County State BankDecember 12, 2008September 4, 2012
Haven Trust BankDuluthGA35379Branch Banking & Trust Company, (BB&T)December 12, 2008August 16, 2012
First Georgia Community BankJacksonGA34301United BankDecember 5, 2008August 16, 2012
PFF Bank & TrustPomonaCA28344U.S. Bank, N.A.November 21, 2008January 4, 2013
Downey Savings & LoanNewport BeachCA30968U.S. Bank, N.A.November 21, 2008January 4, 2013
Community BankLoganvilleGA16490Bank of EssexNovember 21, 2008September 4, 2012
Security Pacific BankLos AngelesCA23595Pacific Western BankNovember 7, 2008August 28, 2012
Franklin Bank, SSBHoustonTX26870Prosperity BankNovember 7, 2008August 16, 2012
Freedom BankBradentonFL57930Fifth Third BankOctober 31, 2008August 16, 2012
Alpha Bank & TrustAlpharettaGA58241Stearns Bank, N.A.October 24, 2008August 16, 2012
Meridian BankEldredIL13789National BankOctober 10, 2008May 31, 2012
Main Street BankNorthvilleMI57654Monroe Bank & TrustOctober 10, 2008August 16, 2012
Washington Mutual Bank
(Including its subsidiary Washington Mutual Bank FSB)
HendersonNV32633JP Morgan Chase BankSeptember 25, 2008August 16, 2012
AmeribankNorthforkWV6782The Citizens Savings Bank

Pioneer Community Bank, Inc.
September 19, 2008August 16, 2012
Silver State Bank
En Espanol
HendersonNV34194Nevada State BankSeptember 5, 2008August 16, 2012
Integrity BankAlpharettaGA35469Regions BankAugust 29, 2008August 16, 2012
Columbian Bank & TrustTopekaKS22728Citizens Bank & TrustAugust 22, 2008August 16, 2012
First Priority BankBradentonFL57523SunTrust BankAugust 1, 2008August 16, 2012
First Heritage Bank, NANewport BeachCA57961Mutual of Omaha BankJuly 25, 2008August 28, 2012
First National Bank of NevadaRenoNV27011Mutual of Omaha BankJuly 25, 2008August 28, 2012
IndyMac BankPasadenaCA29730OneWest Bank, FSBJuly 11, 2008August 28, 2012
First Integrity Bank, NAStaplesMN12736First International Bank and TrustMay 30, 2008August 28, 2012
ANB Financial, NABentonvilleAR33901Pulaski Bank and Trust CompanyMay 9, 2008August 28, 2012
Hume BankHumeMO1971Security BankMarch 7, 2008August 28, 2012
Douglass National BankKansas CityMO24660Liberty Bank and Trust CompanyJanuary 25, 2008October 26, 2012
Miami Valley BankLakeviewOH16848The Citizens Banking CompanyOctober 4, 2007August 28, 2012
NetBankAlpharettaGA32575ING DIRECTSeptember 28, 2007August 28, 2012
Metropolitan Savings BankPittsburghPA35353Allegheny Valley Bank of PittsburghFebruary 2, 2007October 27, 2010
Bank of EphraimEphraimUT1249Far West BankJune 25, 2004April 9, 2008
Reliance BankWhite PlainsNY26778Union State BankMarch 19, 2004April 9, 2008
Guaranty National Bank of TallahasseeTallahasseeFL26838Hancock Bank of FloridaMarch 12, 2004June 5, 2012
Dollar Savings BankNewarkNJ31330No AcquirerFebruary 14, 2004April 9, 2008
Pulaski Savings BankPhiladelphiaPA27203Earthstar BankNovember 14, 2003July 22, 2005
First National Bank of BlanchardvilleBlanchardvilleWI11639The Park BankMay 9, 2003June 5, 2012
Southern Pacific BankTorranceCA27094Beal BankFebruary 7, 2003October 20, 2008
Farmers Bank of CheneyvilleCheneyvilleLA16445Sabine State Bank & TrustDecember 17, 2002October 20, 2004
Bank of AlamoAlamoTN9961No AcquirerNovember 8, 2002March 18, 2005
AmTrade International Bank
En Espanol
AtlantaGA33784No AcquirerSeptember 30, 2002September 11, 2006
Universal Federal Savings BankChicagoIL29355Chicago Community BankJune 27, 2002April 9, 2008
Connecticut Bank of CommerceStamfordCT19183Hudson United BankJune 26, 2002February 14, 2012
New Century BankShelby TownshipMI34979No AcquirerMarch 28, 2002March 18, 2005
Net 1st National BankBoca RatonFL26652Bank Leumi USAMarch 1, 2002April 9, 2008
NextBank, NAPhoenixAZ22314No AcquirerFebruary 7, 2002August 27, 2010
Oakwood Deposit Bank Co.OakwoodOH8966The State Bank & Trust CompanyFebruary 1, 2002October 25, 2012
Bank of Sierra BlancaSierra BlancaTX22002The Security State Bank of PecosJanuary 18, 2002November 6, 2003
Hamilton Bank, NA
En Espanol
MiamiFL24382Israel Discount Bank of New YorkJanuary 11, 2002June 5, 2012
Sinclair National BankGravetteAR34248Delta Trust & BankSeptember 7, 2001February 10, 2004
Superior Bank, FSBHinsdaleIL32646Superior Federal, FSBJuly 27, 2001June 5, 2012
Malta National BankMaltaOH6629North Valley BankMay 3, 2001November 18, 2002
First Alliance Bank & Trust Co.ManchesterNH34264Southern New Hampshire Bank & TrustFebruary 2, 2001February 18, 2003
National State Bank of MetropolisMetropolisIL3815Banterra Bank of MarionDecember 14, 2000March 17, 2005
Bank of HonoluluHonoluluHI21029Bank of the OrientOctober 13, 2000March 17, 2005
+
+ +
+ + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/ci.png b/doc/source/_static/ci.png new file mode 100644 index 0000000000000000000000000000000000000000..3a4225e3ce1eb4b9043de0fc3c6c6ab6677a11d9 GIT binary patch literal 35295 zcmb@tRZw0{v;}zZAi>?;-5r8E!QI`0I|PCg+}(q_ySux)TX6Rw|DBnKsi|A@FfU!F zPM@!P_1e38cdy-F=yy5suQ1p!004ZIln_w_05B*30Ac$A`uTB1z-^^I)xrUn$0hK?2u&CCz<>*rp4bCQx1%#o<&Q|H zVyDZJVF<0NiD6SOR-*Llp1Mp6ElRPi0p3;o-}v-p%pb zdD4Rp^+i%lR?D7qO6C1C7WqXI0Y}~IPakjZ&H6VFq!-WZsFdAP@tHA?w~mTVD5%AklsXh8cWn^8@%q)KA{IC~(BNMxL5PsT!ivmH*$x=goHCyQLiuQhN+HYK1 z_k3=@Us)eZkWhKpm=ph!rKa74NzK=&(t-T}Te*6}_IP4#A6G5Rj-j_2h(zqT8Qlxb zvZg;diN`KIHf^|T7S2;}*q*70YcDW7^bEtnKI5o;vS%4r-?w_@NVlEh%6oG*mGT5$ z5&o+i)b;g#zr*v!=jM)Us-1cr5p_&6OTD_HDAg2eB)!8C4I26ZZ}z(H;mIM(;_Zro zS0%Yw#^BuA?)eVeFyS(mGT&l!M3I+S+W7qYdrV9s86Ju~Z4Gq#9UESvu_9tV3atsU%se zq7+^EE%QiO@qwmgc{xm;sWe^VjHOk1SBew! z?cVt?nqx)N%t&cj%gTvWSu<5uhU>C(NOk%u)Yr6!<9FtNta4V$-FTm_gP7TcO*Kl0l81xzNWoIfy9 z9HrKI96Jxs(#kTP7^OUr|IjPTc((wnbXUle8(NYy>84{$uEl0nPs5fIeN^kpIBp64 z5i`QpZw);?+lxkJtM=}h!i2TI^(@WPGR3k5Nm{rV zZtHN_ukKyf)-JEV5}v%@I<1!U4s3axP*so34A40r#9+tJ4X=F7N10%(NekX6Pxv0L zPZPaW#%7k=kVFKfNN+%wP=Miv?U~-!S=L(TuTesrin}J4Ik{}IKPGy4aBUN3Wf|m` zM<_R>`YQr5Ha>^hdi zcmLR5sOI8Rk6m2saMn_=*nwJ?WmU&#b z=y@P2{#Vvw&d~@nu!8;KAi&3khh($Lw)Q{+cG$w3cGa^|ouW-JSg%vI9Y;i-*8Gz$=+~wFVOiQDQI~C%3i3PudZJ%Gh&^~IzcQyTi$gHSc zn1#aGrPK~R?irqZQQm-SmY{IBVIN$;6kR7_vRi5v`geax3bnrzUi`qmvh!UB+_ zSjJn9H(e!tQ5fQ4tua`>diKPuTd~s2b1`F^@nq9g$8vrwYA-6)t>8K~M`wS8uAUS} z#l$$t89oaQGAl4IoYw;6@Y1YRNP-nMO7aBD*;&`OKP{y!qnAtkd1dWif5>8~K;KnM zZ`Gqn_^?op$(pg&(U!Kh$HSi1fvDvCMJk1J!Ux?~l@jT~eS}_wkZhD044;Qtg1Tmw zG>2}i76K))#Qf`~6Y@(J3@wMN>XnyL1p!3fUcu47XuQTz_7W#tlNmG-Qho|vYfNWR zwF}{-A3aSbyD{wcDqR&g0{ijV;~`~^M|{i zc%_mISawPNA+;}Q@7?fXL#)AfKg=Rp|5gyinyIaP12gTWlk+3;Y15yG>QBC}Df)A< zgih3!M7rkX1r>M=EydzsXrTPhK{Lz0F!Zy2xG497jsQ2a{1PE_|NC>~bjO#SPN1m;YQ9h3!h!{m- z(8HsFZQl@-xmOtcB2g6lBVnaEsisTMC8QFv8yV?q*y)Ckq~+ zb|v&JYRW_coZz^}1_S)}uQdqJW?jV?rw!wN{2g;PPd1nm(=Lj^CO`-YGC&vpom? zt!$C`hp?wJA|*vrBtK!)2$Y)$g4{`>iYXP;9{z2J{`{@|OlUaFniSo|>8z#5&X(Z* za+e_;C6U1z^FRc<WlOtA=jNkTE*5s&dIW=PTIZvA#7xzTaB-1&Oa~m^yKUW)|GBRfsg)y6+ z44M(&+55F-CPLf8exgsTO>YPDbykTK=nfI&N@2H`?|;gKUGLg|5ac7}7`D-0ph3ss zrK+M!-(;}#T3Ey*1^>!Ma`K}8%Dx9LpFbE8H1TyyNr}d3DUgx~&cjz32Jz=5b=lj@ zLNq6$7nvcPiemaN0bfRXupA`u0>8)gT5v!30R=5?TjF5U?wAn>V+<)oO=b`%sp-kN zYK^{{IpV^|_M(mVhR`;o$UGkC5Z>mCWfn%BAMXHhlN+Ad{bk{Cf zQws;0z2#rJIGq^)PT;t&B36Rew|W!)!Aw6{E-lO)wA_@eM%4p(S(l(^fE2$Q3G)b= zld=b2%yx@QA|SiaP}{A#cqGW+2Z`MmmS`&XP*!CehYBS@dnP^A;_bcf)1*{^&34$F zC{3N;OC-~Sis=<*paP-$zkpM>OOAr24V3V%Bz-Z&LIg>fAOorT?Y`W*mu$)c?U`)^ z1qnjrJ{1)nP>iFzc>T=kt2vepE3Ai!*;?MZXDnTqS{{Dy<8uYb^bwk57shTOy1)i*tcgwdr#aqr?05MM8 z+t2)%jMGl30~3BAUrFeQeHzE3;<$&13k0dk3p5YKKnl(&iyBr2Yr41)(E%z=uqsEc z?Wp^dn4*{ou6n|t5|lsd`(Guy9g$OCTXT}JWpch-4JQcbbkv5bknCWb z`1Tr**kP1v0lTuj+YAl#pZ(dY0jbG#&&7nubIlI;MI1u283LJiWRW0tk9@av2yfH$Q1K@ zNkGM?w*zg#WQhkSM@aH_7axF~Vpl{r${La+l0b=QuhxSo=tt?6$sh5W@nupB`LbJr z0uic({yW#eM}h$RU202M&%7q2ri*K~qizWGDtNRUYcbN8LW-)pDrE1CGQ-~Lgi_zh zR{4$=Y>}jZL)VWi`G-0x2Iv-)BjM~fC2GT>MQY3Nwfw>BWM8yP0dA2WZfpo{Jp+RM z#IsI3rW4NN;EYFqn06Iuq>8{o^8Kl~Yg-7>$(BRE{qeo5hqT#NB~bdhi{PGoYThqO zSN5E4JpfFVg(@Ifpl4u^>n>I|c)V2kLCgDNN|pT%$GC$uUVy}-oOkc8~H!C{PX z_W^TI6>=0uMXk%S%&Ke$Sr=zq)n|@IMK1PB&~{%=_tZpCr#oX9-B)t3r)b^sN!^k< zvRI*JOipMKiiXn^(9tT#IR$SCF%{U7b+xAkS~1n5YI1_QD;J5bqzAx5?YE0sQar$h zF0y%t(_2C8C2zkKv0!Gcq2+qjfgQ~VlS5l}f|Myipd*ib)ehx5Rfj#oZ$!j91yP1u z3UbFZ;b2fq>^c?>&d@L`5f7z6N@TL?LKSwAN)F<9rA&x06kfWBc;_^ylH%{u9>QRp zv6Z-*8J#0g0R5Q8;n-XABRXupt>NcZ|MATIJZ`3e8Pz5{GcMI-u0Bo465L^Vp^rFx z@6fHUzEC|Vg-(V2Y(_$A=)9$=uD-*9FME>?VmO9~p6Q#%N_9!xg_+@jH;1DoT3992 z9evD+^vGWkhnmrCy2pagMU6US$r!+VtvL83iPju5ltdukoKr?2RVvM&fXd4xSCz8IC6M1f+Ar6USA+#Oy=HqK8a%#yeZz|kOnG-xrE=z1%r|@>S$?r;j9sb z_mF%^U)TpOh=c0RwnL5h)fpSZWOPDsJ~D^=^c=!&1rE6CM|X@*Z=EXDj$kuR5z<9K zbvZQE{kA}aJnDvZQ{r>aK@6NEk)*R(`od(!f6z(bY)Bj`CV&i3jNVU;7J;>hh`f5q zfGD|zUQf!JuS*O~=!YObUCRexqk!7Wq4A z{&rFjD58|c46def^PtAh@GyMEL<=pa;1<&BlbyzrjGOJ|+8+GXdjcw=dvIoshuH54k?UQNPebJZ2T11qDe_(_prr#};MitD5elJ{uM3`yfkg)q z3(`b5f~$bgPqs>Q@~4Blh$enaMwASC-gRBUpW~)xXeJR(9~N-ltBllPafZCL1Lq)J5&`eYT}Z+Lynu0 zC(Ecku}pl589RB9#~2Aj5)sTpcd}a*LcWN93XymX(_|)}k|ov~&w?`|&KW+~$&fD8 zrDskQ8x5o!#iL?VxRjCe@nIa~R`jA(!22?*P&|9+zK(*|$Btiu zj?W{}A+9u}l4KzZfi0V^rNOxmv`S;z7uFgiE1DFoZW`rlY)FvrTjDK}!b?uDU`-)9SBE34uah%JmS!!iFyQ@*n;o55Wjxb zxd zW?tiZ`Ud_L10MX(G!l-4{`Hd4bgHqAdTKovpQ}iu4*2O@zPZF%AfvilfT^J4@vA1x zV>WpZqbI;AnyL{6NYd8-YJz>VU1Bba6rnokBI1Sr;gS0ki|((qD$0jllNo#rj!Je6 zjpGRsp~>VYN7S<#TqNiAE(QfA`UQQIRP2k}Ao*a^ZnQ^nU)oY9#+x-=Y z6**)FHyjoWT$a);Vr9O>Jj+-6^*NGAyW=WIzFs;tT?DpjBRGsx2}dFw_QgD+N#O;3 zJ}Ua4kZ(M=J~#Q-h%tXH7V{5IiNJ3ORy(=8%347X^vtoBWv# zP>Uw<_kAc#bH&*dh-Rzc-R=NGfQ%UC@XIBLzI6P~$G5CWd@=45l zK@rMMdnO}gT(DnIkDyKF&dmz(u)0ym!hGF6wF2@B%a)~-qdISbPbw+Yw<2?SUp(@s z>oVq`a=*IgDr+X~)xB%#my#g%=Q( z7A%F}ptGmdRGD;rcJw_0J+L^1<0JVuDkyA(=~{(*bLI!$Mv2;dILlxBr|8{`fjGiG zw2V79Q34i2jLw?eI?2fTi8c5cVAI^N zs{?HXtn-d5jp;`Y4Y3Gh79aVQ5vCN5dY`Aj$|z0^{KTaZ9PDO%ALru&5B1j=KCoeB ze8fCykJ`iU-%4;tdp5mURvK?kj`)6B?~F>E%ip}W!CvU?Dl}m;joFgB=-ppg$#kSN zzZ$@v8zFR#Kbv5D@S~kyG(R2|+xblP3&;n)c+$VSQ?JP@OcRLUqCz>WP^88;&|FCR zk`(p7A^dH><6sSiNd*8xKvLwJvfIkvb$4YHH4NYqhY%VZc?$fOuhe&NDsTuS*d|}i zdg`W~pjPv5qSO`2HfU%kGcc-;CFPn4Eh@0er|`K&gxR3fTSDK7d`TsczmWdgdm;HT z6d-DE;psEz7%)7R_Hr&0JL(w+Et#(fBJ_nQV9F;Gy#OEzKot5?O8l=RD*OGf45*_1 zS0eV5{44*Tm8sdYeMXy`A7l#A1C`#-zl;n|T`*198M_>Iw5(sGm#^~tYSEmDdF%fD z*hAP8uNS(Es8&V{gGPvMphF+;N zqf#*b?{2CpNo_eP^5zB%9~8$>?k4hYc(>n5bB}t2t=HjW;Bzu*)#MYc{s>cQH3~az=R~O^ua`~e%wEy(=Kg07 z9$WW%XG8as0;o>DS;4pydWUNq_d$a!llAqJK{NDVX=ml=HS44Y@1Zv|dm zM>;xVDJx3|J4l&2e^XSHs>;K!VA^4FKeI#P60k66fzj}K{`&sS9 zj?w0fazRw$)6pvMsJ_ajIT?XrM2{#`u8P;h>a%dRm^2quG5o@;1Dy|gnE$#bhSPU3 zLXfQekm!k`+ysS@<^C3I8k+ePjy}@o^nE|c2^wY3yU6;a&OOYa;hSm8028mD(6-Y1 zMx9D?%vFAxK1T&eCqOLPsUP5As*RB^7_^6>25Y@#CyO{xNDQI~>&?!> zpAXLR?+=kNdWe4p>iRPhKdoGUjZLzg-&rXIP}2Wsrvk3Syi9CaW}zg1GMN z?6LQ7E`@i5HF^ih+y{l;f|^=5?{2HzK%PU7wd8)Y}e_G4t%|~J2_<8Z=Ho;=ITHu3Ru}F#bPztE}!{j zbmURg6BV?2^dYW#G%20P=m;mf;L3i3NCp@Ug_&Dtl@&{Lf5-k83 zMtyLYwb|+A;?@FJRMESp0bHL-TAYj!Ka9|U0D(L`Ve`g%*t>XeIzK4`DH0`r*T%Wf zrz2jP*~hg8k+&}N36C4QEkv2sJR+wg`@9WZ?*~6UoIyxV;zYeY{FI-+x zpc%QUxZ*EAO^A&~6zs^l#sk3blYRx0K)$7vv~m{jVhhi#0Cd(ep+C9(of0zQE-kgq zrwPLzPvV*QiR>65MGg+(L+0j#JXSm(T`^E%eBS)Z-<8mugYQo*Pj?WFdT(oFA+YZ& zEF7IBQEy0p8@L!iuA>6iS#lkJ&`{^<-HZ+ii{N+_ZiZDvPkPL?J?!8vrcV9E7}(x7 zShc_=S}$(#VDrp3x>a>_*I9p_&3-;6sOcjhdhBP40wYmJ1+kb0Q)DjFUbZxpY9wh^8ra zJ~&Z*uFP!FC2D?#@G5N7u!qS*F|5VW6h5pY;w>bB0MVY$Zkf1U+iAOn6X`N-_I3p% zVN|U@xn*bVkQ9{d__mFi0(Xy^~ohDidw3M1X7ML^6goXwzZB=kh zt8$@EQfBJt&Umcc79g|295GW zTJ%o8h=-wQwU^4nCSjzZbA`L*Qs3(sf?)QS?mz{57UQ#J$z)k~6%m>fv-p>^h7+5wx>R96nup_fj{t{LgB2-V@3W*bLfLrS0#puzw+H>AZJi-5U}umca$GKuby z&mGCVle1H}IfKvag3Y6OO4CHVqy=3$xB@ zR#M*v2jZ2N{TN3AoW;md^Dua@cf{P^Nm2|)EE&;-n_MH(k|bk!u-*NSUYvH{e{ciY zR<;f^56cB8@?%Ev4Ov)pIKAomC2ygsqQH0U7uHW}Ic?{refIjFRBt7(Vcl44) z3UtjyZhY>QHU-RUi%T16!mmqUR$filVp~qyO8gEkyy!YSUR!X17N28NIAGuhi8T?8 z{7;$+v@8)6SW*W-KV?>Lj$G&2!`Y$URL>l3sZ-nz1?^86n##Sn=W~ zAXl*cx?Js9J5cHH2NXz-`uJI@KZ@X!ez>0{5bN_ULHa$pz0kYUrW)2j?@J!&R>l{d z4sv=uP(icRTm5Q4SB_4>Gu*+fpX|#F%Ej;Iq}n~fW46!5(Mb=$jJO|THS1hrOEZH1 z4i`%d?OT2De;Gx^y&3>~`>Wkj4A}7mLl{oKsD3W7D5gSX~oPiStj(8fBcGt)NX<^vk3Nd}-h7=Kkol z@{3p@U%dGRjmxeKKD#o?s&Nufg z*n}c^(~x_vO+V`KPs4vbF*GLFN5tMoTv_k3ob}gRz4x3IYqrt^rN1(1Gk(Du9sw=M@+Ga_^}cy-tTi zW82eL5MO@U=lAup#rOP+_t@!Jb`4$WB)2J$N2uN?elLZ3CoD`{M`p5{J_aPcsMFCL ze{Hw7BsPJ=2IA@+0S>yT(WI(ik@P07-PahKSy~)a#mJ2}b)c=3LY^#NGE+W_Z5<;w z<4YI>Lr}<5n;`%sLbM4Uw29}^pw6M~BpCijc&n#A$XLpSwS^art zeF2TW8LK9>KU^pc#xU$F)<(X)!Ddq-y)Xm8=**;Hq*?zc;Y=pD-(K@eL4FtFt{!M% z1!@w1;OtAC`r{?M$s@1P%j-JvcrBEx=xL|r;j=zBU6*Ql^9X$gpz}jX$H97~^+ zraK~l%iP{&Ha&~DORv=7)-Ks@{MJ|p*<1cty>9%Bw(w90Sm!Lp-3rwSqv{{?tq2t- z(jdNvEJk$7@oJP%K#&ar4W!`Ja8r7&#sV`k=XJjHEe-wJ~K26UbU2pSP!sQ{5C`rWGVr(0osANj`5V(P?|< zccijiz>});@myt>nqQsFE^qwmvAbw`AwXJO`eXGf$Cb1=uEEmVSyE(ida`l{V&B^9 z#dI_-(rg%{%f5fpN5<3M0v`1u<8eR8`l-A#ZiFJ-cA7MGzKJX^aw2j!OPiB6LKfpD z&;htH(37?N@f^uyx4`>jCh>2;vVSce*Y{{O${3}x0{SyEIQ_C_S$lsS23!}y%lOMz z1SShGwbqu~++Yfms|C;dkm{P;FjyD}hitpHLGA|PcJy|e_Kv$)#=gs@$#$T=?3s$&xverFRCWy2Rp zkGsT?cY9A`$RO4Q_d^u2l@#!}!Ev>mTW^xnA+2KO5Fh}l{%hco6MP(owj!&x$cMLj zraLWX)N8!%{;qnX`@WJ@@%}W@a0oGVL$E>F^_S<>W;3UoxYq@vp?xiEq79AxhV>#t z-w~%axKuB}Rn4VNt&>D1xjW?&zvd~?Nc2}?+{){_Fo987$b$>n^jO*^tNk~{+VMn+ z%nmZB>?T=k)Atp_o9#SL1z=C3aI(<}Ibzy&cRO#>3u;fj(TdZWy!8Es+GmH>=8k0HN&Fg+QBQeRLq5h=F=* z6f?11oNm3HqY0`WE!dyQPGf8_Q>U*( z0aJy(e-w+q5ZFmvTNu`|WL?D0sYFR5{A}JsNV1{!y{e>hcjd|~f7wAzDw8~pgwpEx ziQ-+ywk5(HKtHmatk!R(B6;f)llM^ZdKCYDTIoL?iz0rYVZei8lUVic`J7oGgA4d_-QcrBrvFU#@#jwX{b?%#}Pqj18Cl-$yZ9a{P&zSt#*R;)* zjUhp7#6c^}=aJuRO5!Re!k+)|>n$MXjkOuFZLVtSV4fW}g8-Vk$i->>D5tZ+CYVZ0u%9Br zN(VKl@8`<6BSpV7Q+~w#{1ofA)9}JxjWDp#H6oa%s={E?!l?K$)x-X^&Z2=+^*;%y zu!PvTJo=sXkYN4eQ>rjZO@FjHQ}cV>{UPuBPx)Xg|8V;Ngx1E8%h!nVBc7h>7OTm1`EhU9>M^8b0WPMIKuYc+_9mp{8 zyM7wSyQ+jTnuY4KwP4&T_u+6$Tz#&qUW9gV2XY%n_2m-!G=+#pt}<)qqTf6(xsm)Q ztUvA&Lqd*m->0)4_Nn-vl=4|gil;o})T6<$ve-Yz`JUjET7mp-RVxIh;SE{yEOG;X z{wW0Yd4~gCf@$pac@J6#hX$(Xx6rAtDO><&IQC&YR@?yC>W?g!AE=6fkugMHvsnH$ z6s9umlh}egR!SX0BRN({jku_?SxOD=`vWo&4$m70tW?2N_rj@EEOEDKIDCTPXB-Wx z@d^$Y<4h~X!x@if5(62|>U{>gS$rOm;BX2WQjbMU+!+8!?JipH%13^w3Os3J`FMG| z*7=s9Kykiwyt_U&1oXk~lVQF5B1YvoY92o+A8%9g=_0aDG)C3Ja4WmWP2pHKFXdAC1)8!EKa3 zMC2fAK}SolWcBV=)pB#X*PmP9+@;$shsK~jKU;kkf5NyS_LD(HkQeKJb)qD7O7$}r z7xBCxV9n~hHtSEnn{5H&>zWD!VgNze`{HyBtA&Wdgg;`UxDgL_>9e7fysD5_2b0pd zS-SBe<-?f&JXK|yLR`V)?!mjWVE#6>R$s9YGHM9TIlHJBbzZPA(qD)e(&9%RC)x40 zq-}+Yf@eT%w-6#FVFvY+I}YSZte;3%5QAR;TrpXPk#I?x)FZ*Fyu^!kjZX}~k@|D^ ziHQv4k@2A+x~rNL?uMZ7&N$ldTqOlb417m5TYFZ^)8tRbG*AiPr*t2#{}6q%&$<(x z;${$ENLyr3!W}42_TnNsFZ3M`zS2I>=99FI2Qk666rALGaS*7HI?Nz^n z^&+aO-2ND}7}r+@jPNuD#8)g&@1qQfkuieg6BRU3&n2sk9QkLX;o5^y zEUm2<51#XMp;-U$z(=BagYP4Z{^|J}vK6mpL=EQm!SOavaTB3U;hoTEyP1kc46$4l zcJmita0D2gGFholeV9rM7w4v}c|A1}K!BqB% zv5Fd$1qB^n`5#I)QNcD$v#Ec0|M%?6{|ZJ^2ukTP=kz;WEV^Wv2xx+oC~*w1Wq56( z(ErxLO%zHBH8r!qL~7w`cd|U#(*p`9Q-3d3zLO#=fd4e^r?#%Af^1=B0m91=`snrE zA9lsAs=wa*J)Cd+cXmWDGUaYN=QGQppHnTGA zp>1_IqYf2Oq^>6YAKIQ>6-wfPKk@OTY7`_XZ5^r=TNnbhhc2lNhRNE}^X*addl0>- z_1C|pEL7^p23G#${%w&VpIsnszvwbH3{|HE1GdhvdYw`QBPdG*~m8Zu_(S zNai?Fn14qm-tAe5%^PfU>qj&qO`gEGr=ElwZK>UbmAd!S7P8~(-^ll1Il8p?y z56tAWJO%RYaFI^^dFF=KUFSxt#KSqx8RarN{%7{yFG&ZFDvQVAC4a$}kkTx-&;!=JDHOj)X~s-;3!!R150I>X=ox&q~TLH*&(x z`)ZO6@?|SQff!xemkY4$yRYvXk|2PyJJiAj*-N18krX7rEUT9~lUUv^Y96GmCtv|P ztLgnRb%NKNF(9YTIvYSsl30?gq4Pmr;le8|o+`!J_paD!sgSeAc@93s#YRkY!@k6yj+q+|4&M}iKEe?lW zQ&W?t`YZl*rCI#@Dh}Jl4MKrN%3ZNv$P&bfKV&0rO;7mjO;k9vU=a|~ZZOUpSY}-7 zN+ToQhFyiaUT`R=53e8`07TS1Xlt8wbX$iP<=6>J_GAt}GX;V5u|2&ZQ%`WgRq!#7 z!`@p{*ptg5f}^2Wi)Uj1z{V03p9~RuQP#{$vGZ)Oo6kiFo`oB%fWhwr2Y`abv6TJN z#>}SUac7^yqy^Q0UsDv4;;SJA5ZN|;j4Nm|*SZvIJuGVzMA|BM66&@~bF;s$19zc* zf95+i)sR7aVK$Cy^+o0J-R@6I%!Z^0am+KnZzYWV?~UpjlXxxIsXc7Zc1FGZXp74@3_4}N(Y_zt+YT%K1AW~2dF1{-?*dRx!kRy3_YsXY2Dtx*qr z*P_;KiZE+F>Nswpkgi3oC*16a=lmY)5VgvUyKu)N$FRmdoP zTYqPYuKLeykdL0XI;%jcWl?91{1!EWtVmheNvaJW%Cr0NU}F37gH%c?r+ZiNYbyGU zUEZahz99(d$jfhD&gFVT3~B)fnY&{4RPNAPg;0IyZA(J2b>2`>7NaI$0*)UiS(RXe zA4hU}>TAAZuRnx@jKFhPck#i_{tjhHaO&5{jmlys!e{j}>4Sv-YPw zBd`S~0w?6dwQ*y57LxySyX@By(GpJvqg-naEHF2>*6DA_4%7rdAC{y$piLluqnaOc zNosK<>Na_W(r^*S>2_-f%lxj1Q?*^wq|maFm731tTK2YR9in+nM)$T93JUbUEe#p@ zW&SkK;s}tfa%-Y^miUz`Dl2K5-ehz}fX>{*$l_wwuszfjQf{^QsF8>(B@s7+yGASO zS~DM7)RyqHG%}xX_q?Pb^C^d}B}0E$;L>nh~EBS7S2}04)uDBP{fwb=s`$fGOAJh$cx9Ee#f2*@)&vb6i}I57dZB`Nr@%59 zqZ0+5Iua`@JxQOPlfT);-Pg08C}dqacfG25>>*H`Kb~pi=_&c!4{eE-=vSUrt>JzD zJmhY}#@(MwXItr@g}hY2NV4;Deyv3Hj=oY8?_WX-O>vf`8Vh^Kf-6*f-b zq-QFJ`23dp#@VpIoQoa=50;x~%wW1=UCG4d?D5a~Ath;Mn*J9#&m8 zt}BgwbK$e@3(vzqFdKTV!3lWghuWYM4|ifG2Xi_(w@!_wfp{AK!#c24^$nKmc6@ z25>23tmqR^YIwRUWR?}MBpVMY&V!Yjnw<_@k;?TeY>B@No z;R0PV-{5W+z)5Hhq=CIc)k)4fO__%$GGsuUJ{Sfl`$2E*##IafIB9X*dGaPucmC9u z)^14OJvW7&Yy4YW05{4^e~DLM;YMIf4q0+T>1wLoGY`w#xN}Pl;~Ihit6TXyRn`b3 zZi^IT!z%065*0tp$Os+~tl!t{nU?t_2>KT?rz0^eKH?M^pTiS7rumoFo93+ogo9`A zAqAYqTb6`2k+*}T&QP;jWjf!9rqg-saoM37&0NBo)Z-1cJUASS()Eo1zs35?5-z{J zIHzA?n&yhLL~jbi@3&+!l!|nT5criG##&cACR4ktpj{zU^x5y1(8=`bC^(H2NGAIvYveU z#}^D_Q(fg*FVZh;${*@QtkqNWD2t)L1K!k>+q}A_6d*32gUeJNYQRsGtVRSsbB2G8 z!O~h>2CjBRwB*n=-pgvLjcBhd1U8g$N^>7HaWcSsb@7^Pw2&q9XF@JzyQW4>P9h&) zYk%IWCb8U1Y*PNM{QRG8)t=#iH@58N!06T3{A)|NYVKH4jFOv^69TYx+|xEH$d?`He7?zjCtoiJ;RhS1n@}WLEa^(Vjl0 zGjVg-WK~jk{JO*hx;J}M=wt^~KtCu1W*i6ro-b5GG>bT*E0DZTyz})d7F}Rj@*U`FS+&r!B zpcDILDFYj<6wph{y%y%YF%D6)&DRM%#(JQnkRAy+iVGU89=G?ceZ^nd{@(hyE(2|S z0kd%!Vp$!neYo(35KzzPV0)kDa<-XMB|`#8^;&qX`xq?(g%z*Ka=r`Y937n~B-EVL zT>l<>5k={=o1mg>cTZc)gi(>gbJuxe39Ty;o9++S)Kp!Fw2fja%dn+&u1r?3$x@!6 z3X;zbWg{MD3M47z9hCvVRO7Kx>*3PI%@*tIbA*R{st@gJo#Ym=EfjXN1z?Q*H2yn` z#RJs!ZsKU1abdAPmYi;+Ir_3N1c!4BRPY9f=Dd;cQ_5~+Yu+-ncE~t=Jx5JVCU4L3 z&|2%)E!=zEv@o#hcklEX(irJ@Dr2}6OoGhLD_<2; z*qvYK3shJNV=nqCO$;3E|DBs$j}XaqVzqD$R&$_x$!U|bSS&ZvJU9AI9o)Liqm^cM zC6#|iodC~8f4r~}R}9&LDw5W$6y|j7H^BVH8yLw9#o%vNyslk_NqEYmURp*q09v7-RI$=hb5 z%st#g{`RSCC_BN12gjT)vL`(fAx`;E2DtLVr_QGC^6s4+{O1`SoGDIa>W#TsZM^C zB>oU`m7Ytd9$Vq%lvS<~>ivuN#asn9K?v|;eoBQZ{_c#A9ow0Ln7l!zcs4(Z3#VZw z_h`C^^OiN;MR;dP9VJ{XP^#I>H6W=#G8@~es@55g^;l9E-|Jvtg-Yg8|4x}8F*xc~p}xvFeaVia;Miau1Lgr9HmT`pv_ zl=HFK^`^S5hq^_VgpKJ86ttC|#MLP0^+vY47%c`hz!)2(P3_kg{|{szDV473Ak|$f-tlQb)49q2zwJNLJD*!5j>q^9_W<+CF$H*nHnNK+Lf5?s)=zn_-4ZP<4 zr~=t(p@WqihW(Po6ppPlDyF!Yp=df)+@_Wk?2SdXFw@3^bmaqO@ZP?Wd z@ffvPFaxqeW~$U& z&4E8N$?d@QrpG4L!xx!+^`Z3`8wyju@*l=aSkuzROBkCNd}(ytx)Cm=G5)4Vm49=C zVC)%zjr6#v+k6?Nprbru!qDhMc1qVH>H&ZD%t8!BjsL>8GdW5nfH(~j+I)qB#oyGS z2rW*>^f5&e=4MgyYI-JJRSkNW$S0E(h6|Ls$oWUO03QihiTE!A|62V+8xRl>{pIjo zmX12)is>mMbE!0O!m5tUYyU45@fdaP^_TYj3-ta?|57vm!qv)u$=$!a=Ku4bv=_E5 z4UXc;fd38v@A-@{p7sB==Kt9yus?FaAHjueXcxp_1kdVaE5K`L{5-$@ZHWExMuIlJ zM(3pwA?tT`y(!9zou?y%<9}NIRuUtoc{Fyx5sy$;_h%fYYl>NeCrVjD-IIsnP#HRL z$l{f$yk);qDpNtc+>(AkZ+xGpUD^BQp@=KiMS-{zx`X9_;4@0{>#1#5OWk<3-mHKxlPw3!auHC{U;OM&Ug>*JIjOcQ zBRC*q*O2A$-V-XVui78;CpO{kuEYi_#MC)yZ|oN(uP$IFq-lclt%g-PPHD|LE}fn$ zddyK^9gUbIBN@EVjPM0%q#4KTGaBAKCLcZU^YxH3}Rep&7(AzdV{aM(9;&6KPWlK_#GBvqc0$xN! zJKgJt_tY)XT8qZJ&b_43WPcHI=h}oa%HdV;)kkYHz25n=gio{LORnFPKLDnWd8S1D zwcc1D0T0Q0P`H8O{(E}9JI(TEvO! zF_uZ<^_%#$m;)52*PN#!=%_OoWanwCjWwO2n2m@}BxGYfK&8lxTTMRMd@MVA`cLQJaTFaL&%Gp*5pI_AF;YdGtNz5|<#qE3*z zMjK{U2Q_b#+0Hpu0Lb4ar;eg}WoRRr_usB7J~kvGpcCfR8Zm}v%jO=;VD+mI2)gaV zBlw@K53pT6H2xT@VSHyUA3U3fhM4t@3r?$0s?Ko640>*^JC%I}rhvH9Cfd;yT4R4; z*lACzHaZ*K9yQ>P{AEUXX~&8^c+;43c&2jO5s$lXtsB(8f6Qy=Z*H?GZ9c+At`*xj z_K{xX0|OeNg#UU(L_MnX3W(q7*rblS5vD<|F`&>~d5&2BCAUB)f{m&lJ1n94uInO#^YRm#fGSJo{AMvKMZ%pDxf;VCjkiNWTF z9OA@BU{ItM>CUiSr+S8M;xkIyA18i#x7P2lTtkXxbpVJdBgXA^)*V9|ET)l6ypUua zKkXX~#W`(J{exZO=j9}-(C&Zw-9q_Z0tEvwc=`Gd(n_oX#h!~yeSDi$dJ$?Adatee z(@`HyyZ>;_#W~``eB@LAVqM^T7Psfbvb1;JmA_K`Rn;F`40i^_1SB1oltcGFtIk?J zn|x%*D@iAx?dhCC9>`a@kZcD&;WPnnJnCoE=u$rdItl7Gy(b_IHbPXMj%) z+7a*pjddLCd-&|of?T)_|1}W5DfgxSR$kWQGi_X=Q9r@`lL&u+pYZ`h|3m|M?EC3e zML2_ry#%o#8r(ZdCeOw|zz4*>L}#H)+W5|T+k6MrF=HJPkpEAMqn+az3S&c;?5l4V z$%=Qv2yHTSXfTDWNwGPLPr{2)bKrp1y!~pwr6Blok~^Z((z7cL2^{BRDLMd1fi)0v za-B?aViE0UvT?}P)%1<%)*>4q#r^zjdTX@>A7R0NyW?&Y#DIZ=@-Fv+COhZZq`P7( zIa1iDHsg1Cez-*EN{|ZPU_uB<$n#BTw#c`@0czyYf1Gf5e5VeK(Ly+DG9sSgW%W#6 z3?fEq+(r9>$J-8M>MzyEAYr=T!mwtylvyM6u|kvcuck2{tl~#IT?1;5%LC z(8wC>Mtg&5^iO1e&bKAbr2Ztjm@M!n8XM0#PtT0R+*j#=!c<86yyCr|Yn(fm4wK%7$dbjgwxfJ7wKAl!Og<`Cd z$g!Svm3!AAwO+SLAq^QlPm@ZUrH?JKyiaFnR{F>h%~i{BWD? zzA<&Z*2(RUbjZF!BgPcXhp;uh|LhPx*ZxU+~&w?V1N4 zDF!o3q3I=y|apvXQrpqRcf)B(Q*787D1w8)uA%oi4q* zpRDfor-Rb^V1eNdujk%$PGM>7e^`6X6P*&m%}2m2k2uU{%AcnO2s$8qXtIFXRM7Rt?WWkomrTGyf{i_l6VR8 zPP0nsd1b12Ds%umXWfkTz+}0qimJr6hQ^PncL4O@4-&L6*75tL`eWLv+y4c1cU$@N z@V_DYpND;6p&Bg;55d+L;I*BS7-)~+`u;oLi+hg$X3b$kdOOYM7Fw)&M)~)EZ-!{h zz}Tz*mTFkgsuuZZ7l~S*;HYO?pT=Qz8z#@HR298V`*w5&quQ~y%@gIydODo^zG_>0 zC|WU1)V-Pyybg}Cn+S&7UenRLL^&{VF+FrP_qZW5VbD6V5z!e{>Ufyl?98u|K`pfm zU*1cPm?3NiT&9Yfhjr=yW)}hIot>;OIV{`&b)PupWyH-I$aqYx~Kc_MSsUkjMCs#sGVXhum=+YW8GIJa{G_ejTr+LCynp7}JdlH~SoRC>gNRm!tBTwMju^Pu z6fk4--;fEol}Z|F1{Q05M|s^({!k4opg<)fI>!`j_G~nzcKPqj^3f?sYRrnQ zN9T9^T#jnD88Vp%`3KBy#x}0}k~Vg!;Ra;fae_K6=OsJji!B-J@BIQieg5o!hAx+6 zk6-^gLL&L>c2HSDuGmNDJ^H>` z?2T18HAc9}qO&pO;bE#-a$hwA93X4io_52m;K#RY`<(?8tKg9Yht)N!>@L&AA}I_Q zfRBuKbt8KuTxT8F_#NOoo|~qC0Wts(FERPgoafNIdezo`GGXG|4Y8aWvS7Q8^~9f@ zJk!B<02+J*3jgwi*e`iIwqxjVpG!2%1%!pmDltkkza)}*TZcu^%G z>TN7jHI!S*aZu`x^6-wTv@6%0Ot=73Kol@ZG^2yW7|qhQgs8w;A?dQ}0o#eR`Y7Ty zw}4(eW5i<=c9ncy$6f4@iGdsv+!rVni%E#fd@$-CcvjHeYc21M0TM>IY)1E%6#mk8 za;A!YU!G}3OE12L?)gmQ@bgc~J1q=sfSKE`qT=ZHSj7$`3X@VS_=q)j6V=%~10_LT zbhb|`lVMYrV=QIjle?+4zW*6S{Ti*TO7qib$M~|gs2kllez`N3EK{P&{|C|{^QFXH z-S4KD#W>5*P@-gZGOZ%r?wNfmAyznnuhi-oOVVRGl5MWPWWQ}*WDk&~nz5mz{J(4g z;B5ZlH|HZE6T2SXdtHh!Jle$ZtkOi4z;{Do`2*jx5QM?`emHP_4Xy)8@emsiC;pG5 zL~;NfqEUb=JsHzKxTuI9XewP-V7e(=N8|1@i4EQ~a1Wd)$!<@83a=@11Z~TEXhBJ+&g-| z%E%sB8S+b-d4yLtYC-*x#Aad>R0e$o-vKB_xE&!?9dH{0OgtZ#EPnJ(^By+DTJT4M ztbmwtlx3@XLC7DAzdz0T?(aL8=<}NRh4dq`c;noJul9oZqmwqAZw2k5QyRGo2x=1-aJnXSAE2u2vxtc~Nibqy_nhi^bH-+@&(}=#9LF zd1T0a1kj9p^bWGHC~ttUBY%Z*6DJig?%k&02YgnW%hX>0SWu&?1k!GNH}z`B)1rHC zGH5V>swelPoi}`UMKxOV-3-n;wx&;Jy@-GLi1>_U$N!kSW`-g|Vkt-@uOb6P$&3*4 z>;a5OK2pRmpXodd^|I|q&N$yM&y1hnHt|9IZ360^ow_q0$Ykg+^maBR#>04=Iux=g zCf=R~rFm4HM^G+5srV>SsZwksn1g*1UAPsE`oQ_%x-IP|_bQwIfpwZv6%Zi_A@7C% zpSd=nAFNEdzFB3xlR`Y*zz$D3;SFZDs9*t-3Z*wiKoW;*GB11d&kmfg&)bUTf3}Tt zv+0;;W7jz`+%r@u(M3;xNL;Wcn znJwR*aGsE*JU5sQY9Mi(&DRpudH2O%m3{!DXNiO3C;T~YHVNvrZr}`n^aVNaDwT7C3dus9#wOwX1dPSR31H5%T@q%lGv=DnPzP3?h(z~i!9$L+_DZq9RcQT^%F zVtDR)XCW{K-|K#*e_vQYpui`8gm8UsImM0kvohOZ?(qiT0^HDOtyF%vW++KZ-UlnPE)QcY!vydv&MdI5&;|0KtbJQVPL2$H*Gk=f zM>*vQ);iOI$LKX|m?uc(!(5c~NdXPciuQ~i_L9x8*2F#x0};HN2`fRUAI}m1FbdTf zTgmiO(SzY`w~;i!>tV%2Cf$M$A*dVhw*)e_fbIYx(V+QFmMOS=&W4VxH7^%0Oi|I9 zZM%=@?|e+EJo(qAf0Obc&%+2hKeJzbhk9(nX~yoJQWT|Ig4dG44>n76@hCpx*vCt} zVNbtqA__hCI@C*(I8%?CRhyYx^-}zdy0dLFjg4kR>|&KlyeGncYSoZwza0Nqk_97^KxS35l~0q_I!+%%bcwkX@)T#2K62q0pIYl0rczoIJyarbbaVs`JB{?!`vMJFPC%%|?_P$3 zrC^Q)G^pENYDhGN=r}?@AxF;WCO6L1M{yDkOb(1p3l`Y4hU=I09z9xuCKFqB&#-F_ zgUxsA&@dwk@{z{lQBBG7qhY_C5g0zLB`mzj%e!lmF)dIHjke0)4NQ!Vg2U#^mcV1T z6%Nhe8dxkPE6D11>s-))=s7YoSbhUTBjSpV3x~ZO2r_9cH`!pM}r54rd~(F z+jV3d!+Z7XKJ`Q;RDVUl;h_K`wQbcy>3dr}RT`)*edZl71WpKRI!2L!@{ zb)tOwy<#;|_r`4rEwwM8M*TOi`wy!Bcj47 zMX4*Q*EiLCU%n!N%iAkQM3Uz^2&wBfCU_wqiO*ML5gas}ZvDA&@XK^oF!Ba-2&H-#t| z1cHlv+DDM@U;ai_;38AyA$PWQ}y*3H2*#r(9=J#mml&IU$&mHO{Cs-PZGTo9B6CZ-pWg*GIScgU$ZT= z6^RfDYRRJFZLk~)qa=ob35-Z(dsHZ0tNc~uLPN9~f^gfF{PoJ6t@`SwXdLPx0NE>_nTFS}QJ(}MpeYz+WdlsEF6@O5s0t-O1ClBW!N8|4DYBbHi zKGah{o#G_`(FOP@FEG^^1Fd}RC8l~g-SXcw;R96;@u=89 z+@E9Bf76kYBvdU`=m*Ux(q~A&+_k(4Da9nuTdfIS^{x9}cudNkQI+lK^{_*+jsSQvyRdOJLL zObf4-9T309`5n z7?M!@t;@fSuRrsd^A!Tu{ULAg)#WZyxd3lFoztqh;DG%01s3plef<;?f%!JMgO20u zEFZM!>y3D~fhOO1Ww1Ua5Fn9n)&{TJX^2+r&>-BJhRSKTx)}WQioRoZUV~>ldL$Wm z9xR4+;kM{IOD}03qgY}hy!xb^j{odUNarQ*9JuUpF>Df(s5QikPdxOjs)dXiEwPqU zGDr!-F0_qLgJCs6sZ6az1ypugW&wWjAH}Es1{}{i;NrWWkc-Xj{I?hNdOVpNnXgaY z8y9HHqg{~EgSTDlk8q%^p?nKz6(Wj8y$B3LN8S0=WoIeTWm&9_0NgCD^l-@Y z0ez+7jaEue1!i}!<9J3nA67L@lS=bt|2vf}1nnELdiK5?pihplUCC6eCqj3gm=;*= z%WX^K%7flM)^mB1hI15Wu?BCCODp}?CA0@q*<-T$p7TKz=NDa{+;+S_yUR-1rIX=B z{&{^0z<~nQwbQq0o%^ljIoz#FbRlllsn5PXMRjxVVV-Yk!>*HTO-k8hKwsR-11s)t zjSq)@c3g8^34u>0%Vt=MumMWaLN)>o1*#l26%j%RU@5u|J{A09t9beN{WG77Fl_4? zk-#lN8WZvY3=^ZhK~jWE0({gwF{!bRm~`s+&($vj!5rESt0@X{bn#zM(aF5)nVOdB zOQ-_UK(^QX>&ZO@q5qkHS*nrkei*9r$rI_)j@a%-g9)_jBhB^8Dbi?7LKu1i~2vx15Ty90+8MoM3Y@ z4X@nRPhwmaiTK3#T}&vi-RX3MI)LQ-78i)o6ixG#I9e^y+!kp(8GCn8kl99!+W^N;s`G7emnF}^~G&1Sr zbCI%Dra$=sxDmZ&C?;sp*~$)m55J!(27{GIA*bBh0z&KAJvZe!UnN6kg?jG@HiyWm z#KT5dZVT(#^#G{xWZrf%5<(@Gx^%jAzg{=jD770xATkgsJR}$fCcW#=tkIj-8D_yD zAf*`k*W|Kq2BCt1Kf~74x2Ro<8ufb3N|mT$Mf!OLA+cg9aUQ{of`RTo{oiF+H7CSr z2{2O(LZ*Jm{wn|F#{kRo`^8l#w2=@+^u*2Mdg%u&PFd`(O6+}XP~Gz& z;N2yO{EEPW+9adNt9=HGdAHHtYM75ObXwJ^CK{u1Fs@}mJRPui9j|D(HxN(O{vZ%Y zEvrMNh}P70t|7GWLbT@exN8Vq3-_?VSf<8z^`0%$SE*piGhrK(m$ z_451j5g>^E59&~!9j@tgiFUL|WSHsx&#paK1>l#DArHNnux<(={vfR&IQSKlL5%X} zbW@Le=Js#i$E|%{Ln{tncKI@iF%*3B#IjA?$8}ixr~H3wtrzzMP8|3ZWL7|>tm|`d zPPdZ|(*Dj$u-L3VSbNA}J~~O-K|t8X!_khvIOL4MSubL$1b}p0qVq< zZTB#JgeTT0+fM>FcgW>u*>|bzRt?oh9nU1A;h;QIgK@|ER37iqCrEEQF6*jhRtyz; zMf+C58PBVi8K#bld8KVZcwiUWZPXi=({8Nlz=P{gB?a^WLA=Mq^5r&VJ9*0%RRC^F>rx@y_xWwT{Pi z@Ay*g#|xtc62|tuBJ4B#P*3vCPX7G2Z)eB)GqBQp+R(-8yCGJ=%CXcBC!vBxf0X_w zGFfwPb&B@mRktC6SYWh_jTmLQMy|q9&MIY z4u{PCw}?!nj4)Ih7o_itPbbb9TOTQC44X41CMK4DnuDqrooJ<^5LFV3yJ7)5<2_cf z+x=I_{ol@p3fkvJhf|iqirNAlR(DJD&FIA5Po`m!Ld_O-dQkoA8#<44t2K``ZKE@8 z{0SVfHY;OHIqU+iNp6<%NfvzO1S|u#*y<58|+R~nSrqX z$+o%agq78UBEwLr;kq##JxpzrVQb?^eUrhkOz%TK8uX9ieadi1qMXF_EmczWcxaA0W0&!+d@)YzYMWprO(-q=`z&?CS7LGo5GWB#NU)xl5OeYG{-loZpIcCmFd z@rL>OHWCWUXVALG&D~D+003EtY6kzL;8J5TsC+koDhyCPXxTd=x*XQo2~ri*Xt8_B z_UE>5qy}?^bk;P!na%Tanpq+|zYYJ6^1xa!Ua7((??077J7!9a zDh&FKTBQY1OAdEvezPr_dqG|Dtta?b1KU$doDA|(%FUO}S||={ExG855E2T&w0|Hb z=SY)iy{l=ap?)+on3ql#`S$GKvK^LiiHcgTKoc_{8ZV!^R_>jW7)^hOAQ*;c+@*i02_3?;wl z=5LGR9zQ*_F_U}5A%=oV?6;8n3f@!-NVuFY@<0m$k2OfMkQkbyQdwE!uPU{8y<5b) zk(kT-YsI^y_9zpw+d+0$U+j&I_D=aI4ZOs3o!|uRX1dC6?VHk^$||)lPrvA@oQ&o5 z=#m;kO17F^!Vw}t=iuDkty{OBx2CE*9y^!{p)6W=(GI}(HvOW77%I^-NgSb=pTIMG zmwCm_cn$Qoi4aQ~?I4-+1x$lN(qpdDw_OH@s`%^S*sy(Lrq9KA599L*+GAUuwox zvN6%#3hDEEGR;4l;*wE83SVt@KbluvwZ(h9K1#&_0(a=)n_zUG^ya>{l)bo5H!0@c zTIuozOiy4i*HyHUz08)Q;x?QuHkfd;5tHc%)q9K*NCo}a9>kIW;qc-B0af@-@;az+&i}H{D*vP zdF92<0T{Eh_9yUcvBj~6Wd1?cDScaJxz6({@Ea##s+NJ-wk_$&V&WbuCOZ2)?Rl~`hoUA5o>B*{pJvZyy*Zth4wEUL7^6g7{ z<#pmdkPvh=`4IWp@{ibA2x6iosA%7wU_|wJ`IlkXqy4mS%LQUT;z^U1D5Mxw@Q9Hq zBpAdjV_5LmVy8ZbO}X^aFN*9~zrhLW83-+3nLlMPF6kf+HJ-6m`@v@%9>Yna5R>w} z;v*jIJAUglzIi@qtmxT@1FV>MmX^N?x&`mqTQ_=OP+G=69@&PU5n}T$MvpjNr*rZR za#C|tE>FsUY0y#WoMz$?s>db9TUMsznCZ1WISw>B;I9N9mmQjHd}nkoE7%jCMUTR1 z;o858oAqqE-9I*JxCew|E>|_`_`=|`NaXuQcSNTsu>3lF{$c7~kv@@ku$8t_R)vF? ze~Wc&-NM7%PK!of2Yc(IZy>ERPYa})7>)}5?O}Bu2KszI{b{Pa7KgLRYlpA?ri;_$j|& z|JUN^;U+q1+^FB|V$MCW1=se^L$lpDn=Q>%ae+s+ZMN^72MTPtZXL2xFYN`P<0mdX z_Kz1OCS16X%IcI{-5~ZfXF^tkr=pVGSG9%tS@6^&(<+%y6UM{O`@d(N6SUN=?WCGIp@bq))%a*H*kVoF}OGsQ$PS;~5w zNHbh|o6h)8_CEl(k6QdsnVW9mcydWUne*otAh{0@_GVSy>YJ~#-4^*Z680t&yTp5W zwBwQyzS*0ad;?kua~4KR)uxwW_hQ~b!m9#)+N8l#iN;_FbsdAzjcxnQFXlMRy#{vb zdBsDL&m1ik{D#|GFx%=3cB-Y!DHcP?`$l7f7rbqp%f;^b29+w5j`@cof%ohlb6u z2i{nLmy_`-XbiUY9-!YOy5#ngK?5!I%+IG_;sog%U`{7xbko3XE74k1bRTA_JzHbMD0&n@Ck5kh5Rw+o%+Mw7*GY zyncS)55PCi#iP5nyP{4Ei<7_b2#>k15L#U!X7MKzHQmC9T%_Xry zE-W>+-eKF5*8H)8q8C!@2Lic;wy8fkm#NaP)O2d<+H$#Z=lRz6^A*jHU&Cu}W!WNl ziabsW2SX@q*L;Co&euQJ@0}Z(+5dPV0@mWOd%mxkn7*$eX6$3&F<7@ipkJ-~igIEp zD3o%HZ%Z1?r^Y*8Q5wo^(iF5e0N-Hqk7MuCW$@(X)dK*q2FmXAp*E8(-mDEWfNr_q zl$6#+CRaLx>a|>o_dKl7p*u3YMXwo&AVBiohv0+iWQL4Wmn23UVaQsOz>xhhxlVmA z=q6s~tLx~8O*?*M7n*Uj4U^Rr9@h$fRQ;Fl7=X#&e-Ln>!dw5j1yuI@diD1m0oOE* zejlNElLUD{=nvrkx6~)dcxmi*PemM-3>|u%sF1-vaUXH080vE?cH@n!I{EkNQ6c{~ zQ_hi^ptHBtt5UDFE8GYev!LYUr9bOBx*t42*?;l73+%;m|UefQTa3`_K=yd@nA-&6KPL_!Kke!~ff~@jZoACI4a}8t9YC0|s`LLjgrJ=$jp&vizx} zq|x?Xh}!Q$_txi-^vU=!g;MG(ORshBqdi@vlz~~<&^_b`kC~3wfx)r~2SRhfHi*m} zyppOFfy0(WL;7%#IkeJdaK_8EKp?{*p=%q5%L`|rsegj=<<3I>uIF?5F8-ybV-km< z6%xwRnT|7|tKfAbj+?|A4+6&u`Pg$!o@RST9E(0K)i~Rmj9vWw%gTe0k{ac@)85^< zvZeh}vn=xJ`2IF8bG&B~G7NOp$jH%}Wq7*SsJDtCOm*9N0s}3R5#( zjc~x)F{{Np#oq~_5Hu3a{CZYsoWp5GBNmUj&Ly<_wy(~Lyd7(%>3edH50837$~EbC zv2RAI53kjU6N4a^aa@JgpahIcuZxBwp>3)p@G$Xgre)8(3Z`=5al6;EUkYfj%WIix z0zTLJWC>~RVUF+k;bee@CAz61mPS8QxRl#ZI04+W6U6rFR9q*mdZTK2lemw1^%BQd zMpvJ?WLq>?!>a3MwRJNKTq7mAtsQAK4Z3(LipPG+FM}hV!E>I!>(TwPF3p+qH4t&N z=+#Z2>-EEk`4RVfM{s#`Uc#5^CVtx&bB%u>;f3X+#m^?1{1(ym#WyVwx{kh#1BUM> z3oMzXJOwBoC*bwdkd{Ke4%+4I2dQz}P+(~3pd6AmTRe(;9Tt>6cr-exVETd$b+>wK zN!3Na3MO$c@a)z6;5R}w?XNs@jJH2_Iu3CZ*DI^s`aA^F`XnNkZ`iE96j*j| za=ZoCIM0{E^wmxXEpasvh?ic8o+h$rqK`R=LW(@Lp6_W<<-bj3>*V*8^2ruriYA`DkqOT1JE%&7 zk3MqemXs=~-Cj8B)=x*TH0K7E%%zVt#j@Qvs}`RY(LCLAbTo|*Md)J%UZ|0+vsX4m z9U@4MW>_D3YYGV&;c~_nCd|%XHcpIs?9SM>W@I-@1gI18?XjtH+GB@qXl80N%#0TZ zC8VgBZk@GnPG(J^Xzck{ph@5ERbltrU1rf9J%JWW1d9859Wy4F%Vih>EnO6_xML(_ zDi+fSucw@`#YGbly<8k5YqS#M?b}1p%AQYkNPs~8Ng;WsX;=Q#+j70aH^}9tOs1Q#$bRIgf8(^lpKUQWahD)=6uQ0FrbV*C^M)o6w28hWbhD68 zPi_GLUD81InRlBvV^AB`!R7wjm%^f_$YJLc5W45lT;5Bun=_Hb@`(y9Wh-}sSk(tE zWR(Nupy4G%%sbchlor$S85hYC`c{Xqv!v>h1~FU5a?$BB*CqE);no8@T?Zclmcp`b zG>oS+^Nyw}Z;3DU3)`Ptc;Csy!laR*MYrx=AFjSM-7e+j)BB%D<_Y)hHB z8_6v3)}xU0rpFPtORRHq4{v8wNH|qRgW3$EQh7z=?q$NLG`xkqW@Iw*wFOl+FOE#HP>u!t(nh-jNkLY2h@9Vvoi^( z$2oYu_-#r6|1HDO{?#P?vB{=7NIBKX_#t6s&SGWB4w+Tyu4j6*r%9tBxyPy9dSD(Y zF(}j*Cr+C+UQtVc4M}7EFcscd9Jk_xp#&ZVLFP@4(@i~(EfR#;x{baON64s5WEc!$ zv*2$mdSDFN+K2D<99k-Y?+QWOlMto-=a1Y*TVhF!jLFB*wT%H{;6;q;djT}Mp~4RK zWmP+ph7hGh1kb|2ui_;6LLFjSqHXKPF`7hcl6sSu}jaI zAE;mUkmaZ1m}|H-R&@ODllO#(z|akX1ehV4Pl+H)(p~Bj7NL`lLp{unsqqX;83`m* zWj5yWKNnW2A8X4&LcFXmGhh~>g_$#r@O`e8rL;kd4U*Se7?DwlR0)cf-RDjy$Akz( zNHgNw#eZxF#XyR*f@q*=xNhZ*i|GzoP`>)Dv`uzY96XP+ziLUs}65 z*o=^21!9SKlIuGg?$b252KfW|bm+%l#=ah?m;8}i-|opJ8??NlS0cry7aO)EVKfAR zW*;~lKcNm;PzSHBtiM@SX{?zXuHvB!3gtUGD+%E-Ng80GxTEmOn*>z6q0>G>SU(qC zh2mi6v?$P`u2Gh>Nq(ZlIsJmlfFZ!e2E4?*SHJCi>RWB)_(X{f%xMmzH+G&C;?~TL z5~Grwur>mP#8M+b>d{acdev~svg=0*7~8M$P+3q!{tk;vCtUoT8$~hn_t_(D%7nh9 zKF|oTr*HTl(h^de!`-HCI*ak*oz3ls_LF;3+o)*ci>K>l5udi#7b;*lEH>}MFR7)* zDV4-O^sFNpPlQj<%k7mstGck3)(BKMVyw4Zo9}cYIb?bYf29n`3wWL>|=zFF_{%iH^uVB0lC`b zflVJG>hIFcwz#H9&rhyVWGqu18;zA|=R-azmwHZ$7c32>PkmVlTWh6?*SN}hqm^-@ z<;r?``)E*CUX#gL`|~N9>k!V=4VIyK#f1EUbqM;05`!)^DGpzM32`2v(q^n zSAi^%QXcOj32RvGLObZp(8a#Ke}{^Mh;ho{tVN4Hc5}V;58tCRvv}VIZN=N7-B<}V z&abnbNBf5^gwhL-&=7lRp6YUtSVi$S2Y1VypnJU^u5NufxT{Rm?x9UenaZ2dhGv2e zND<0jZBw=_ipFE~ol*2s+u3>9 z@sbeL7{0i)Z;06Ta~Vv}XG6?Qn%0hQLg5xHsCKKcp9I75;U-SJJU(;q9*M-cZk^%U zq{+-w;iV)*Yq`Je&`$i&xtW?v#Q6B)esZA!j2O&Zu|nTor;@29nLf_Uwm3uguBt`3 zZIhE17xI$vt$i3@G&9A7O4UNKM(a*3#ut>RuPL3-}26Q6?sbD$Fyz ziRDqn6>mM2_xU(LGvobe_vR~a>g*OJx}o8#0%V9Y$atgjpDAl1-yo}zbVVT0hj(7B z4i`<`091I7zT@pin#gKe#E%hw`{aG_u!BtG)>rc>j) zXTb^uGpw5WpPOGOk5CF0<}8cWI|F+}7gflW@u?SLL_BHaz1kGZZ!oXmc8BUQS--HD zki^Tdt83c+S_&z<^HZx^s=!c{O58Y-s5IgD1T;hw0QVI**L)e^RMaHr@p+N2@HR=!i6bB0;Nj_K4p^7AmXYjb@ZCRb zKt@wLED`-vTspiiXC&m<-I39NR~4a>Rc6y6)#!qu+0@(4(n3yMgHl(EpaZ`O5EeoS>xYCl8n#@$YQXQcWu*TmBJ2CahAcu2c^_^yc|qXNIdmZCj5JzgYcOnORF(wZq<7 zvh|sjM-GCMmR;`i#F^O9(5kIk=5Jz};6+33*;3Z{l`-3Tx$Sk!+D4|gRCOQcH6;V3ThRe8KhC+RgLH}di&Hd8*_uhZl zIqmP79`F7|nsI`ge^u-%%31NyZ{?p)ozn3~uZ0K;-nOdP`d)isO?B&^JNLN0>)IU- zIr_fm+~27kGA!KNc;#Lnzc}Td-S*GhGWt(!Uy||kNcPVZJsx?XtsxKmN_T$z`Ep&| z`uzuX6)Pzhea@AhczcdUzhIr&UW;4SOFw-%e7*0Ob{24?XX5k4OjAA@|1Z{P?|A<0 zJ9pjTccE949-epDTAFx#ezeB4dPB>%x0RG$ZPv@a!J$=q-Tt?cpye07Xx%EshV=ET zeC=bdWpB-xvFgX&i#w-BU&%36Pdrny@Xv#mEiFN>jI+Nyy!z+S;pfxetWs^?_x}HH z$@_<9pFXrsd{geRb#gYgQ|`9|yP`V3UmgB4MM-yJ>FH~o9kwQ?@*j#+S5LKhTCla? zF~j>edwy=NTd$(@*HmKR!?3*@gKtLiZhuw$Z2qx+;CM)0!j-F$GapGTSu}0siwiC5 zY^QFTdo5PNEGYEPxfgR{WQy57E#1ApR^HFKFs*dyEDx1iN8Ms&3^Eex2;) z%6%?@iIHJfq1K`J_P39?I_F(WceY;_@?>|(jn|A{j%%L07|grx^Y5>!XOHe!b*o** z@Y%(F%V)qV(0DBv>n_<8x1_ZriC!AyJZ^Ig3*?L>W4 z`Pc5tU(ZZ7o7w@KQ3Xy#>Wh}Xo!_}b_IFg3PT#6;Cs#cR3q5N4GPpB@1vI{MI7skZ z31Wz*M$1*_P>7N!Xr-*6;O7S`&o-=d0WK&74mur7zx3_DfK`H4v{_>z8%X5X^B^6D z1TA0Cuncf{<=dGe*{(CZf@dADUwOdTeB&{3k9Lf_a=6a-}M5Ayk6X^`Vw`q zbN{=!vhrW=NAHY^aj*VmI&Fnq;90S0VoBc}<9&a!G=s;h=E~0A#LW(LK0^Tie((q( za7qWrql|o#ka_(?hT%$}K?xqpmo+xLx%xNw;*)#lkKXaSV!%722q=-^eKuoJD(7XE z li:before { + content:""; + height:2em; + width:2em; + display:block; + float:left; + margin-left:-2em; + background-position:center; + background-repeat:no-repeat; + background-color: #130654; + border-radius: 50%; + background-size:100%; + background-image:url('../question_mark_noback.svg'); + } + +ul.task-bullet > li { + border-left: 1px solid #130654; + padding-left:1em; +} + +ul.task-bullet > li > p:first-child { + font-size: 1.1rem; + padding-left: 0.75rem; +} + +/* Getting started index page */ + +.intro-card { + background:#FFF; + border-radius:0; + padding: 30px 10px 10px 10px; + margin: 10px 0px; +} + +.intro-card .card-text { + margin:20px 0px; + /*min-height: 150px; */ +} + +.intro-card .card-img-top { + margin: 10px; +} + +.install-block { + padding-bottom: 30px; +} + +.install-card .card-header { + border: none; + background-color:white; + color: #150458; + font-size: 1.1rem; + font-weight: bold; + padding: 1rem 1rem 0rem 1rem; +} + +.install-card .card-footer { + border: none; + background-color:white; +} + +.install-card pre { + margin: 0 1em 1em 1em; +} + +.custom-button { + background-color:#DCDCDC; + border: none; + color: #484848; + text-align: center; + text-decoration: none; + display: inline-block; + font-size: 0.9rem; + border-radius: 0.5rem; + max-width: 120px; + padding: 0.5rem 0rem; +} + +.custom-button a { + color: #484848; +} + +.custom-button p { + margin-top: 0; + margin-bottom: 0rem; + color: #484848; +} + +/* intro to tutorial collapsed cards */ + +.tutorial-accordion { + margin-top: 20px; + margin-bottom: 20px; +} + +.tutorial-card .card-header.card-link .btn { + margin-right: 12px; +} + +.tutorial-card .card-header.card-link .btn:after { + content: "-"; +} + +.tutorial-card .card-header.card-link.collapsed .btn:after { + content: "+"; +} + +.tutorial-card-header-1 { + justify-content: space-between; + align-items: center; +} + +.tutorial-card-header-2 { + justify-content: flex-start; + align-items: center; + font-size: 1.3rem; +} + +.tutorial-card .card-header { + cursor: pointer; + background-color: white; +} + +.tutorial-card .card-body { + background-color: #F0F0F0; +} + +.tutorial-card .badge { + background-color: #130654; + margin: 10px 10px 10px 10px; + padding: 5px; +} + +.tutorial-card .gs-badge-link p { + margin: 0px; +} + +.tutorial-card .gs-badge-link a { + color: white; + text-decoration: none; +} + +.tutorial-card .badge:hover { + background-color: grey; +} diff --git a/doc/source/_static/css/pandas.css b/doc/source/_static/css/pandas.css new file mode 100644 index 00000000..43cd6318 --- /dev/null +++ b/doc/source/_static/css/pandas.css @@ -0,0 +1,36 @@ +/* Getting started index page */ + +.intro-card { + background: #fff; + border-radius: 0; + padding: 30px 10px 10px 10px; + margin: 10px 0px; +} + +.intro-card .card-text { + margin: 20px 0px; + /*min-height: 150px; */ +} + +.custom-button { + background-color: #dcdcdc; + border: none; + color: #484848; + text-align: center; + text-decoration: none; + display: inline-block; + font-size: 0.9rem; + border-radius: 0.5rem; + max-width: 220px; + padding: 0.5rem 0rem; +} + +.custom-button a { + color: #484848; +} + +.custom-button p { + margin-top: 0; + margin-bottom: 0rem; + color: #484848; +} diff --git a/doc/source/_static/df_repr_truncated.png b/doc/source/_static/df_repr_truncated.png new file mode 100644 index 0000000000000000000000000000000000000000..8f602703587613cea6ab354ad2eabbb103ba72f4 GIT binary patch literal 8040 zcmb`McT|(_vhRZ+U;_doNC)Yt7<#XvR{;$WiV&qpiS!zp3ZW}VZwdm^dxs#1L8M8C z5I`x?dxwzRcz%2Bd-py2?6c2WCx0Yw*5rLwX5MGM^OlDQa(izzu)#$Gxaov+j;ufxIY8fyTV-ZHZ%A_)+;tgB z@;2wv{kxZ5UjHT`7e(Xq?fr}R0g;477)B<_GVDFM>bZPPk<7jL`4~gE3$nf=fyQ&Wda$*(E$(Y||neqwB)4Nt%Lg+|N&NUEW5z+3KeQ>rB5^QR?w z#?-B=V+&$e4BUXqi*vl`{?q{pc#jX5Q@>6nf`2gY;aa9HCd5#C9l>|+6l5);AMm8S z;C?7d&q{JFZYa#n=b@1&<>gmjAGVjSzVj)2TpQop5}wFGnHz7fpKO^!UzRNzF2nQ6 zup!_sRonOc1bs0rQhC`FAYz z;30vT)t-rQS$en!I?&URz`Lye6vM=o5P%nhbF)PUgoq zvu)$5j=L=u&jVL4=ASnO9*rA5kBimHO6hN!kRzwvcyu~AlBEo$To<{i+VVOS`NPpx z0`Rl5V3mXG#6kAg+bY-(=^qr}SBfsJ0Dx4DtC$L|RT(@Uaj)pecCj{XtcTX#2#xS{ zFw~#{hRw5kNCj)`V+r^NG{8%_{x*Gg%rp-sLXbNXtZGr?!_LJWX?ZQX<0Eb0_0m?w zE5By=V7>UjOoe6!;b%OYA{RVEsOP=ZTzq_m9d`l_Z=R4JtDy#-M4tC6Qz98-*z4KB zv++W2Rth^}GNZjGKC`-!eh$}iq}2YV;yp`(b8}$8pBakep0^6NVCg_QW5I+iGH$FW zvn*A1*LD*@-ENvA27mTW12!vl7uaLg%anO%+Q)3bOnNz9egM7A&-QuOO){hSfrtc6 zQT*L5xQ4;P6DtYz{P6etUE9x+dT(1lZ*Mf;T4Va227?(b=O?uN7BE`vU0qBrsN;^; zn>hG%Z;qTRjF*ERu;Z$sLz`MZ?H+b>Guv5E1|AB8@Q_O zrjz^yH&1NuHLGwA--SlK`8IEypYJ)(H^CH9)M$`f9X46iiQhi1elase`zXDz*3><~ z7UvtoEJ?)+C}`%Q0k$IYNB}eI73owE=&hFWH2{(lrM#MZA<3qy6=09SRXr!W)YkSh zPwxvuQwqz#+>jh;>tz~XIg>5`VC?c?X$@3a;$Uq=p$chz{4wr!Qc}__*gqCQBd`lZ>aN8AN9NRQ zvb1Vl7xue>%kwF5-9v5__s?!^x3*L(2lHtB?vVUcSn6K1n|~BtvJ@*_6Yl=oL1)0P z=xK$Yb~KM9G(fS349Lj~699Z3?Am(Ty2I2{+y`vpq# zlYNMKdR?;P5NvFtCq~b~pFCXRrUBwq zD5f@zpJ(%-7{`8&7pEBbz7&$pcM@3c8cVY3wBHw{8S|+6;t!}q*g-zVSfIK&08)S`CLVRAI2-szZ9O|IW76r?0)rHc-NiP&@quen(BG6rjr5swKa>R z9L;qpoUU|OvhY0z&x(OamB#WP(X(1$zkG*6x`XnJ(biLv>0&p6-hE>=sVSy+zl zlkP2pRJ-?X#tPS2Tik>9C5fAeXml^_tOE~ zlNBS*DQ3fieC%yjd3%>o5^DqjD;*_Vm-tHD!ufW4#HQYzJQTnx-7o@?7W+(dNH4R8 z_l=~1D$w-4(W=${yIVT0_T%Q|fnl_4=-svO_FVr8d^hVta2?KQ7l(G^-~xLSA{+>@ z-G;+fS{bMg6bDzJU(wZOu3oKqcA2g}y0^a#Z+|U07f7dec)T2si(jgLcCHZ&-0V%G zg9N+KAt?#fTE%D8EIqcTZm; zD+@kh&Mk(DOf%IY=_of( zE5RA3>|xs;7wycYAmr zP7gG@s4_##0^)z3Z2P4K2Sd4M8#>KVzPq+8P6wi!a0+yu<+%Eg+ff^cM)``_H+g>& z4WClg^l=JN$!UU3Y{0z4!l}X#a!nMdxGjgoT|ggv&>)kM+s*1DO2S=B!txbX;1KMp zH;Y1?H7baeQ-Q?P?J@58H&#%_jeN$NTk&jkb7)i^UlklGq3%PaHrOP6ed z`;kxMi$R{6b_%r0A=Z}`sN<@Q5psk{JbrSDeb~sOa6RS#RS%SK5|in=d`Ga3q9CZT-^OAqYT7U>{xw}VCy&HUyv;7-UKdwph4}aT z$KXDr!`L&AlirnSAdoSHs*U1TQU%piY>y=fk~x5$?2xBnTZ#VmQB1D+34O7y#JSQQ z&T*EwbBHxFIDpgAGuVnD$T6_sFj*!Nru)v_wj@K=>|@g zz2l6M-5Wt=^7ssriVuOfoFox6<2iwfPm&0`#b0`2LFm-A7w$yW2=+%XTdVc{#9sHS`dJ!o~E zascG`Wyyk0O0PZqMn0_d*IxNAz25;ByL~;?cJWg%jt;9Ng51n)@T88P>&6zaDH6D+ri8m~1BMxYhxbMOH| zRIcZe1xJx8-#>Zjb&EfKqC3se;?^W;SA>_))mAlBxmCSS^)p_zWoIHH0XVb#DE_On z#3P%h&KySUX2`j?Y;;GfbBy*4<>kmZS$YqTE_fNAyTwvh3*n#9`M5jeNYx`o_|YF) zjvRXIa@s-e(cjafe6%L8b7LzyFiY93FPkd~m8tLQXx__wTHxeAnhYleD$~8W3h-^n z9VjY zO{-fgUhg`foHdwjHt34}*A0K1B$5>(y*tbNJANnw-nxrFvNZsEpMN{UCSCksSibhUHY0ZzK(1J+R6y!lloFVz`s4u5iuN#R3m3DE z=3-J}BSxMdeKhBLoo6%4algA5NO^D+7B6wbdeJJu{8Ed{JG6k;1JAEcYHO(v&CDWY zYL?Pa)s*X2%rQIVgP6_2f6BAoO&aqOV;qyy*A$b-rC3UE2g{V>1zgLc5TfUHv1ZkZ z904jaT*HJIT!{?hVg5;=w=nn^(E$TN7!c7uvzAjo>k{tl|70E=DN41BpX;1;ONyHK zBn$xBK>f<CaL$vi8A*pfoWPVfzRP`;R#0rwbD4{a4+moFO~MdpcQkzTXXd( z35UH&@zVDpVIXrw6dws})6YSS344rX;BPdK?M!R|)xmEXbq5NRV2#}-l9$7fmEKdj zAAK}KB)G{h!VS9hGtTUc>CrqCu#FTx?ru+wKKC=xF-!E9E6Gro*)RaSoou7%Q;UEt zc}Veo#sh8znhS9b+QXk~-io-g^cX?V&B?DY|Su zy9^POk(d|GsDcJmfta0vf9xs8eFKKRT|$g0O4mEaW3sB7v>8^{$!B(=1P>wcQFF?_m(mnn}>ByoP)@7{qK95 z4nF=nvw9X||HeOWjz(Lgpyn@|zjNIU=4G{U6fN~~XP{EjB|x4eg9=(EFtd-wTJa#w zz#TOfg<$$e5Ph4DH@@8Rm6!!kM(mPlodc~cF7U0ieG^ep9sBx$?U@VDg=d-8nX9}m zefk?{yGhlsXR=y529{@%9>o_SrtQ7e)>aO0Sb5m#$$wBwoDOebyA0%PoFV;IbNbQu z!h0rEu(z|Kp_^1GM z+&mH(Q}2<>AE%{Jbi2{KaDqSIwh0*d@kux&g?H4(x?VTjmIfFkI41jOE3R=}@Hhnd zqVIPA>C5}<;8)l^a}x$ctON$^Ttonj(f0Hw)!kY*_zQ@g$Lr=d)`|H>EC*w++kb}E z8ggS#rK*o5ahz|5Q!Qhy2i&}FZXZG&{OBcJKO|+P!+fYOeLsFFv>+5aIJNkjUp=4? zE%MP3v16tDg?ewGw1|(`|H{^kyVKLX+j$TNVP<;Q1|g_M@3xxJQLoimCd;^>Zmx7p2|pyr0X<_WPBAl{@?UUs!eGefjOWFD%=c zER9WE5Sy4thWER0K1_tHtpsx~NK8GNb03oTc<}`>AIDDe<4R0gLzcR!|6i&md4W)D zltE>>LkCJfWHNrW=(`;qvh$0^DVLkLt*aI#cm~`J6r^}qZ{NN>P64pM9$ZIK&Z7Pm z`%3?R?E5@VOy6EHNj`@yaxzr3(rX*YLXf8>_5{&g=Z{T$8HBBz+vY*c+8}-`yF4!{ z7NKEaJ6G=DL~K_Fe#OPE%!|Kvv5oN&lR}z$;fBZuDQdwT+j?_%>us9 z7iAzuG2A@_sft(K#&XNRCkTHhZQYM3&4&0{R;rei@FmYeWm zF!7q33in$nrjZ$t&Y}IMf$q*LU*6C%UnNxfUiY&F4opaF`($X_;x}J=4k4VTD-!;4 zda>&oK-$(<&g$SNW=TdJ+B=o|E@T~dz%Ik zYk1vb6$DQUnKDPDpl-o0)K6M3ZXLd1FmBuIog92lLoY^_2wxa1eHkDi6M=-(t@C06D#Z< zyd&tYB~SmEPj>7&#O0x-c@cL=)7#)^MyE~gL@4aRqv6IQt6ox{P$miwdg$*eT!=3H=jz?90rVgoysCJL(bMynu z)H!UZ%dSGMctTK+pZ`4$!QRWHN<(($yv!h08NhXDQJI27>Yj<#uJqWvqoq^ftP&90 z=uz`*Mr(q>!rTp2<9E3HIaG8)hvLn0w$r9xVrrpEVDRPjxx5ZbWBzXwIrGK=!_g&F z0#HCepa0o(+ST-wGzWwLhscPHttfNU>cwrdQBKaxOP#ikpI6L08!;Tx)=+WC#9IzZ zDr-8jP&|IDM9lX^@YP7&TnhV&T&y^J=zY*dk-a-}l}L&AO=A@|{ztpe!u zYh!0^xc^ed!uZ}Ts46jF_YFV)on(B9Eg-G8+{3xKYTI1|_w+5oI-LDBYkp}VTfHv1 z2g!!x+A=^z1~=!oEG_}0TDeI(uGEkK>YhP=ZZcC?3zM`6tpfm}^DlqEr+|!3`ge2} zUP}{jY!L=a3*ExmFH;(B@jZ{pbu-DdjC7?&y>idvLBfAicNouQ0n4D!>}y<|m}jDI zXE{kQZx|1l6OB!um|4>`k|4q--c2r_Wrp}m=kd> zl?HoM*cGMS4+hF$%>jS96eqoqsSs?ioyx*ox$?zmOp{ch=ZWj|lbDn3%Z#3j`^&Os`&BV@iM4A%V)W2%D08fx8Qe2LA%3bB+P_r=E z@}7@&y9d3L1^W;iIEmT%c9z5#42P37M1J`kQ3$i#l8%gIejp)1!_;2?ZQ}OEbLx$W^0ZG}kRHwH#@lGNuG2v83*A;?tO(-3|wZ5Uipc6hP!w+c?q{pRdly zLmc)F36{^`n-Ve(biRX zJGXg|HCKOcx*GJDpd7q0r(Gkm{BWMMSDudKO^DE{My*No?R=6s1zR+K?QCPIclAZYJoB~>60sCe+rga{9QbKJ1q4Sv8n zOT1G>1Yh2WCgI@!NDi`}oFNbl!hGF?b`9(K5{Ke5}t(1DcA#KuQ!DA)^eA|-g z`i}v2xe~Tj(7MT_DX;;sUk1N=t%%w=y&HrM2!`cb9UF*i+RgMjasJ}q$3Nyg>WrA5 z6xe0JMu1AUE@TS{0xzX$<;4h4;e*)eQ~km3;fiJx$70p;d#AIa=Gu=0kOiWDHN-Elk?63(HyJ3UZ@g^rH&t|plKP8Jk-EPje z`v|Ih{P^MH$Dp;fHSyJ+S(sm+j{f{2AR}{MgEBEUr>3WuWxDSnfAT*%vT3~-6@2>? z(CNTB9MVUiUT?P?IY8!xYE?(1U2j9EqHV)DlAr+ApqMEXF+40^;>@T~PV)CAXx$f_ zKxhVCraLs`?4YDVD@j8`gPNKe#?jGnjStg4@ zor#c;Fn(wo;(v8G7qrQTvWVb025wxH0saXN4W;7Z!U0#l+LZYA?VFKc6drT6_09G1 zBD>p(&bcqJhT_%4#8tt~)z+^>^||BK4sw!#_52h)r=O?w^VOy(;G{6Iv5j3>I61LR zO--5bHjb6ydqQ69orEc&QKq$BxsuNBOh4eb;i#sD`#M-Tc5^RRD#}7#9Zx|?sW2Fm zh=@pT#)>;XX|lUpBJJC^cSw|6jEqt3cLybk4eQYvNh-}w+j1}OYOr0DMxs<@Nttq6 z@fg_mD%bDGd?jJw=_!!GYft9$a6YiV)a3kzGPu95Z-U=>XZ-foy~IiTl$0`f^gHj^ zhHo&Ymb&_^z299+du3%jxb*Yq`k6XQOUtB9KCO=*H&s_P%5^NyJwD1=f} z@U0IL;kTH)>+;03f^Xlx-T$4;X#2ESV~znXx&20#M5mNVaw)6;U2QQz71a#RkY{Lj zG~+#gY#vdJG!tjUJYANz(T9f8(r9o`u&H8dcu!%lv4H_?UHeyL=JzWaV)7ePH=JB_ zEG)4Fg@uAg7(F$W6aqce^51#Tq@<*Z%FCtYITwK#T9unapYp6SS zv$N_ZW@bjuPml8;$e+%_L?M{4Ntjgq*`a^GeG_?(kH>6r-EwLQ8Dww80Qo6yd~y<6 zBAh@>tz>3!bhLlbzJnP)37kVzlk+Y*U;IHN4~gMn*XfB@nz82|FtT=x`fwE;`lwD; zS{gFM{jjV-66y^DgM_qnD81s_eJ^KcPDpqnvW?*@d>LwnP;*1*Zi3I#R@^Va+}Oxh z#l+36mOcpr?M-te_>!D^=hv8v=7lL?BhtI%G1i_+R!>PuscuK@rep79{7sq}!c?$? zo_~r>D-VH$<-MNT3yKK40V#<6D0SziLZ|7KSVcC-P6l)3x_GYpkU39qJ9EMluh8IN z3O+u9kdP3bv9QokqwZiNKIfh2_vO4zRV%*twj?6n1UWf5xo4<#rv($~)c9)e-$yM^ z;PeO@9EuV2$bJ6Iqvw4t1@c4Wp}71dT63BXjsJ1jmog6XMF+SZkCzNmTVEgPh={$| zU>_!(C$6skRa0G8*EZR0_}ig^UFmX*+h&H_N>wvFf%od}@HgrOi#XtmP<mIm z0h;+-05Ddj^G;@gW$VtA6KS&O^}I3O#|(wkf=3YbpIRFV_Un>Vw3L+%+_X-`%6StK z5-5G`-u@f5X$u24=Hld>+PRl2W;4C*!`H9Gv+vf$ijs2aPLrd*a9p09t`)YSAz zdHMVr9mT^ZPQxQlGk*0+JC`l~@V?&IS|Oa%r{Rjs%*13~hyI;q1!Unsu*ZucsXAB! z)YV&lkH0iK!1I3oWN<8thyZnNdUkfBHv*fiI+h)?RZA>M?zPQN6U%3j;oAU2A9LdsuBPm(6nXOm6xn-w{aMAZZqc zF&U#Gj-mUL7@w@p?z^~56(|J#v_4H&FUdcgEyEsD)6keo(f9rHx}K9D`iZ@1aQMQ^ zcCmh>{4i!+*Zr8Ow7rIt<`vSmD#LMR zqFg@Swm=3t1l-=HAk~bGpnUh#@44kxfQAN^nVFfaHgp|TPY{}2cz7g1E}v(Jbf%6w zEne>BgJtFAgH;)DWw_PU)F6;DbwSk&LSWkI%{pkRWVtf3ErE@9cn_ElK+)4!S*sHFQs&N?un@eY=%Zafu3`0XhvbwGkIFlcp zuqG~Scz1}=F}|@OBA!A1?zWg2FQ!;Q`~QaKT0aw=TD)L~U`(a&>@<9_*f-N;WsN`M zl2l4E?g*kM5V7M8{V_QX%!AoH36CX)3{nkPnErnvL-%Ob_FHHV4-Y(36g;L*6xP-? z$|wTV(=Z~`?A5HlwS< zG-?7!qFg*Y8z2$&24RSVx3&s_`w|SLup&T6_?;w6OC~Iob#$V$o*#B!YE7lduU9m$ zU$5VuZ>F*v!$a0FUbC`dc-@@z^~Vrb*)H--&&^SDa(=Wi2Rm^GXo%fk) zEx_CWZREAJkp=_=l>1NZO`~9AvUZ<6Q2Uz^IMKs} zb(yn-#aKgPHkO4^9RK21)UQ#aQMXBeqt;rzs5ao9B;0v~JJHOWjhLoJAP& zVy#FDjM;YAwYZ8{uU;9c;Q^D}-FcRNs(}aYT2xvpsj7+-85LCp$VS*PmIRyYfojX{ z;o+g}${Y=cRoz04Eo#4PfT%ib^Udi7dy_G9W34G!?H~~?#fqfEUU*AfS!s={t83%a z-C>o(x&$l&0=1|p`NYHo1lDaznXPVWVZn<1%Y+3N#8WCr0nRMiWKW|n`}yI!yF6Y6 zY=cQM0&{hY21>X}v81Kt8ysXfkO~1$p-7NTFJD}30!ePrgri86frgfriihX({GCN? z$&y^pJK)r_rJtzD$OT*w0b@wzccz8BSnqZ%2m=#rGXH%F`aj=-&(6-y=Ua(Nk=q4u z5fKrY7)%@lZv(z&L7x3sVUiFa@!wMdOK+RP%3c;R7)vTx>3XOR@z<-6QB*|dv0K`E zDOSiRD9mL3AZfX8CCDtz7EVZ;n!Y}1+9hN9R1FI@KVN5!|K-b<-h}?i{PzhdDPhgc z0^d(cfSo}+z;c?8N-8U3(&wX};)7FOec0ihZ*rys0gcC^6aKMK(Gb8OP2dOwyL%O+ z@IAyit69(;){F*h5XhjbC57AbZ)=%pJ;9iU=H`VBO{#=!P%#;je-}4D)_qh_F`6il zr*&4t?}u%1JC$~E;i|X!6WHc;^OE54IE&;1Y$U>y?+;$w9gqX{3O(k2o}mmM)S`Bp2yHCAd z!IE`#t()mt+A>r5ze^x#ePU*2Y#5m*;?P&-w;--g8aL5zTr84a;ot;=N*uviW`hNe z<;M@2lD{D+_#1l@d3dHiaZrXhQ*)1X0KP{h80*{AIRgTgmy^?V!i^eQnC+^z_IyV& z{)ZB0xKuw@w8b}UbAQ0ilJa}adD%=I<%ae9E6R$xum$L zNDS1Udi&KA%Z~)86~UQh2+zCb{_p#D=aV_ROhw=~4(sB~u`2468ImPRX0(#_T?AzF zC!yw&f0>M1)qP$BpD^F%)j)7DI5gDbJSH5bq36-ZpsBB;V{=u)14?WGd3F~S6|JZr!;MZ7AsXeCcuX;evK54M-b6|U+jt|H zEZzE6T*14Q&LPX{=#ad4*za1cvgG@p9nD~4CohiAq**D(q+er(<|04}kV~mF)tfhO zK9dImci%fXsb5S?3BK9mYxa4ef^53^xY7@N#|td`djt_ z&$tqLfP4$INk2(pIVqCQj}e8n22u1{qB&oM7=-6Tx{041uzRdmUTs-fdK4IoMDR1}uV245Jc?B#0Yz|gv+u|OLjW7i35@LO52BZrJ%#H?lgi!n7k+Lo{}2&}`;WnEd4`DqLDJo&Nnk3fzX zJJOP|ws^YE+94V>W(8h+0yH4U{3}l$UM)WUY^IUX%!XVUf$Amcex`qKL0_LN5zkIJq8X-cXs-S zFx@zB_d`rju=CmWV}(0xSfH27RCaHV_J0CYEArhh1TiHHh`){>%HXA>1F;)~vtqyO zVqN4+FcXS2_GpI1|Fc`b>OW|Q9l;ziJj9cA1&YXDncOzv|0!8|Fvmxb$KljpodXCo zYT=Hricy#Bv!FjUp|dGOJy$_L6s!h4?Jc*+PiB&xwKTx2`cUIUr}*DLI#Ybi6}C=A z`JZLLYv~~Fg}7>Or5%b((1q_JQm)xGVK3!<55U}STcO1LlHpAz!Q@9a}=1FdhXh-5dnXbbBD=9)+OoW2@FjPcz zm39hj2;w2_*A=u>b(54VNXNe6c-qtnZ#_A{NA}M!mqm_PHE|e?r32#fxx8}!?MMhc zALnw@tex8Ir zui1`c+*~cVoxWWwC(CGB16zRa`@caHV#A6jBS36b?&n6T>er*;O$R?qUI13qp_8*S z79pYK%;;!_V3k1^Gz|?6g`glYq|)QUOvi266`(muAQUE}?c6BJ%D$jUip&y_(EwZ% zxm|;)DlIQZ2mBB~9M=C2RET%O)4&SgRZ~+F0Em_Ihr#Otw{ff3`iWAE>54eATZU^- z=A7YwGS@GAvCh7e!wGAjatPiBv3inUirH1@mT-3IrU=;FklVfN2aXc<-@kt^)r!Yv z-XCz|y!Q5$4RIyJ{Tp#H#IEP3)92jQRuaIa2hzB$<3Z&M4u<98;xYi7_;$qJ7r?{Y zPKju+;g<@Nno95j>#S!>yTH!-?&)Z0jYiVA-H)mU#%E{YA%nxik}4{%UP@Gb;Xh{l z9PfQ3`tPaC9f#u=vXgSbHmMAAwaTOc!d_Th#3CTzTl*{_A#rxo@l5I;f5pejI`;Dj zY_AjGvED;H_jNcX-DcHN=SABF!-MH!Aj`a91he5($`h?daIOHprtC@AFY=qb^HN-FS}~XT@X+ng=;5hZ~fH*|5aW zES-NpQFaM7h{81`KNP1iPfXG0oHQFa{X70WKfV1r!~NwoySAb2n{%F2e|M~d_rexr z@KBDp1;nOYOibABtFPH{qlkM`+lSlf7_?R*#yQm1A0VjzaJY-X({g_+t~k(9e6~qR$l$Ynv!e7sqx@7!4G- zTmtA1s|`@UYdT+(-s)>fqSUDQ@r_~ zeSM#@pE`#t*x{-f85l5eabG``6c>vFEe8ZS#89KRw3NwasSyhVn~a`bW*-x~7-Ii- zXAloBhuAbuP#%Vd5Aigt;r-$bBO8AI`Gh8ceJ#_I&_wUlMN7*8^I=FSpkBwNSy;=9 z=wUSzTbdifCP=u|z8d5pQqJKa*(psCEzTYe_!HC#G)g`9UVTr}5aP+r|Oo0jk&BCIaEVH5=LE80xhH@WZ6ji23Kw zANP$2I!t2Xs50#at?8FTIelCx=5TnIVGSiYkYy|Yd%Lavl z_oOEWpp0G7z%f6VB)8T9#5M=KhgfsS$I-GO?9Ru#cbFX$RWs!^m83|zki`Utt(*yq z`1p8#-c|39>9*SpclWK9Q*h&P*FiM8XdKqg&uu=BnA3C8D_T=L4x7@r*vD| z%_w|+EbyFFJT|$ZAwzVyQI;ym;E+)3%l$qe`QBrTl}v2odhs{PWI9`}trE`12BY>9 z1p66d>?QQ9x!FOfWn9$q_l#j|=eiqM2|4^jjY}0b5tsP9ge!^=e*WQhnRhMgUbx6| zla5+-naM`V`c>Q@fkTJatntP=v%4#*WU*>)Z7mMaau|}4K?5RQ*2d2u_CP@3X8bLN zc)rS~+7_ceWG3zU;T<1>=p3<@mH_gh7zClcuzRuD$I%*TG|mUZn%dV5d|CCq$(XS) zP0TPZ3od6B?Xb1QP_CS}DVeX#Eb>|Lb#(a0APV@wQB(!~UbjjVlL5?MNk`o4{Ez0V zV6+?T;2?Wfo?Ba62AlmcigT_mEW*O10FMG`4&o1kfX;Sq&=|Kc)yC-t8UHdSma>HQ z3U6IJFBD@z6Gina5&||3g)^S}G#U0*B#nd8^C!%NEC#3`D3?|1?7|M1wB*iz@Hrc> zA(gnc%0=dA*RNVR;Dwi-!hos*#0uD|!_F`vXy7%qbD~yI z1GOP0CWa->?-Jp$s=K<=0P=V$*kct$S0%6Q6s9qvLq+N25N8AZ$>wX|*7sN0InwT= zR0ubF7ZiUZGTFyb&aI%>W02(m^KF4PDk)bfcA1X_0-l5J%STRaj8^K}Z=ImW+L%J67)PsYHdnrI zN}M4Xz41l2R4*Y_76ip_;P3X(@xn*IqN*ThknWzID!XL?ARhATIP^Q}l}w@Xb~=)5 zIUTCaEU#t-E)YB;%BG=w>Tk1n+QJ-nM5=AX>?)b-F(+7}_VxAUId-ON)eHMd9Tozk zFFJUtg)$n$IHo%((?sG*98rOg7sK%OSH3dN6J`6-#zm zbh80xVXH9Q`OHn%v_&2eyV|ZPYu!+odh}kXs;VkHJK7Azu<&ri^Dcc|UFQC`Bs0eq zq>!TG;;P6*APe^kJ&8wKlMm(!Li~)Gwl1Cntx6LGV_htiFf7*ZH`w2G^aRba=|s%M zib2*AWXQyU@C8&6!2iz9dN87P(MkCHL4VNkcUfL@>?l+q3X_n~lI>%5Qta&q!~w^( zm^URdjO7P;BRYCM8|10<-m9|8h=W&A;wTu<%evWN82%W+>N0wi$!b-ikwmX{ zNN^aRLDF(?AYgXpT=k=lL@;7SU}^cCx5K6bBr$R^@=XyZv-huOagM0_`pJY5-zpE^13RB=HHLjLEn)gkN1128Su;O*8 zIBLp&@8mW6S21`=czvVt$s6B5{#ZO}1;4N@R@Fj0tM;|AO+$MX1bl4I2w$XM>RZ~- zARVt72ZwhEcfTlkwRc1;-ed1YPalY}oAm6)$@60a3 zsueJg8^-IMW8_-}i$d0zJ+?2*Zs`|flYA$Bkpp5Z?0G9ZK8oIuwr2%2SU3R|D9B(P zZxC#V`#e=>%M0CUObF-~9j3%epwM8&L9eyV&WcO_fOx!*x1<4v0POn>`-^&+p>yXB zKWJ-5Ei7n#-=8Lot_tKl$8a9BE%B70B=102nAvw)T!MQ1iy~&L10~`tBHN2IM%3%T z$TAe7rl}eJ<;yEoRn>W+tfZvitu2!u1=W8DqN@UGUha`(VK7?PIXB?X!wu>SMR7yR zFO@m5H+A!l@hzqy2xW$-2ygM?A0E70Z&tiOYL-=0#A^uWK5odF+6Rr2my}7u=Kuj* zMYGBXZqY5B{fxh-U*ROVZDqR= z19#z|7{AmW<~HI7-h2r!@?!*rcL?WK&JjN4!Lw>mq6EBVALZfAbT>Z*5%hG&2kzDP z_SF8urs$P=K}P@wm3W>k`2o>@>+Q=D3i@FpE`o!;Mr99(x+a8_hmf6+`ov#uGui$k z=rXOTw6d~tSoOJCUlXUqbp;vVWwZmdjSWEGi=LU;b*2n77tF)K88qg*MOEyB>gweg zYLm&BGr}-l5g8KHX%G*`>orT!U%Ac!^#|admsIQB#CF<}oecpBUC&`fN9@0g-ObJb zn8(^qgJ`l)BH*O*-_dorwfP#0U?2ZpP}c2QSs02KEd5GYs)g;%%4a5IY;=J&N((Se z^GMFjL>Ei#*4EWM{;ADQpclso;?beudyIt0Jc89Q=eA{$B8ul$rNi3k>5RB<=%M2` ze_5JWj;X$&>jz%Y;~;Brj~PT0=)6RGul|hSmk$|-BQA%Ad%NUN{i=*6;zY6r@nOtH zsB|pavV67+TqHv7m>l0w5irOkjdIUSHHuaF+|ROmEZeVvhzG0^B9Y@SmoEYOT{N#E zrd;o>3xE88)hfdS*FfUahk4o!QK9fbysX=R2z+rQ$O}0Ka0l>K$L0}I!OsSNszJK!OHWT1pK*05udGzYNw%CS zFa&)G#l>F+shhuV!{AcJAR94Wky-YT$GgVE1u|Gbl*FbONQT?-B-wxEU9*Lfr^JYX zV>!P&taw|jswF=$@*n^GvYZekeAh4kf7+F$I>xaFR&`oo$6MRm>sNF72oPYsm!6#B zpI-)-eKY46Rm>8d%f7s~q+`f8|JIntn^43W*T*8{JTgJbY83fqw0iY|jufL1D8d7O z<(o**CVndA+^_f_Wbw`&=x)j%J_LYH4r7-gTY4AiIR;93I-LA>mR1pMx>A)J*kpZ} zUE<)ma}6ZUJ_6%dN~A$Bp#-07t5Gp$JeDbpez@l{doTVzU*-FD?7>2{(tomQZgVq{ zXB!!aB+%pRjl^ZFw49=)^XxXb>{2+#zz)WYa;1Dgun$lx{pynJui!-KDoQ|fwpN_b~Z)a0~Q7yKo4NJD3hnBtT8vPIh2#_ z(IjEyT|XahtNG=fOOdEMim;j}^wmuK%!9wb#~j-2UCwpQ{qT)4@v@6ZVjh;~CNr61 z9={Qjx4!PXshX%`iBL*5~l?>(JP+#XTMP({`I_17&=F|FuvKSby*MXepl^^ zZKP#ZSJ{hB_@EFU?CyqDR1QTY8*G-$MZbd@?c?_-R)GQsBO2(&VW9Vh&yp zJ?Y;r0tPj+tE3&i`)I=tryjQ76Xq!3i^aUz!NP%dI##gjpzHCXoR0Eg<>uxF-AR9m z95FCkH90lahu{a=EgPGgeLhkoAf^=aRdPV00*(X5f%-|^mewz!2SHny2bvHLwJz@R z((OgQhEU9{z@9vX>6)e;-~0HW0?Lhy;Hcq0w8E49VNm(*S4WkM&b|1FCW_s!pW=0! zcKzKqGsMyqddw8maI_S8L6j|@(c9oW*)Sktv$ha7-Lw+<8l-KHGrt8$dI;F?bnxdAHqi{yyUR(g)XIoNtA zw(1Ad=zw=y{OWQ*E`GnhFY2=!jBVLoyE}SqJD%zka*;NaLd~B;Jh4T1J|}rhbeb63 zcO(Dd0C?Vt=SqZiZ!tSu! z)XJ@$l3bsH4;Jw&OqWx9yfOvf^sN?4D>|Anp&KC;3vPURI!ROpLSkZKLSkZ2XQ!CZ z)%3^mR@|UiH*EcqUCcn=aey?_0*U{02DYttfyGZMJYZHb{qlr!+WuS9w;sPIy zAhFf&Bu+ztcG(OUfO^_*3jGDBf0L6cLT8;YvhUybEW4rX;{la0<|P+{Mz?00XI+Wi zfPp(&Cctpc;fhcW?}W;cBPG<7aaN*rgOOh8>j+DQmJ|-g<4}ql%uBk&{Uwx(Uhgu6 zRsjeHv==CuN1P6ffcE$@5UHs-2paAJ=gJQ~l0gO7t3P4S5lK&@;vGebERS)!^n2w)M9VX&-tM{M!JDFzrTBOOUI={IHZuy$INYChoc>oKL+Cci zAr80l_L|;($xciiDzUV>yo%{F81g?MNM;dV|b1kYOF&` z0fuloUmBps1LzYm`6|WADk_0sLQd$R(rZe9jUcmg>O9W)>e%9R0}6tY3xkDY(AIbC zE{gu^Lx5~7GOW^@q~$02yAu+yr(E^r!-M9c(=M69ncKZuqD^lkGig~zg&(LFv_qKP zI=ZODjYrxE>FLo3WbP2qh^Bw@1`6V6AH!`uTj|qJn1_uqAUeFP(a;-qshvd)i-Z-B zxP8~<4Ubt{wi=}d@YkX++Uzu}@*wxQW9yX0@CI)hE<0<2O z`|lkac^EL~h;(&on1+btz~4CsH^c61IVKiqb+q9D1e+^^z&ymLm?Pn z?B1Y={_x@!*MYm`fz+^5bg=`M5u4{1pA40)T9g-x`uy50B~k)5dOwl!`1#@~>4?HX zwaZKPGq7M|PBFNINElWvQcz0b0M?|s-gP^04h1`=`>t`Ae5gF0nuiNA<~JK2OBR{9 z>+1egBQ4}=Ow-SfMVQzqF9t@ks(_5{dm~`ZCXJFA?dN@Da<-&K6Z-!ANa4fnRAJ2C z-uj2~>CGl3;>z=WS=omrMkCbI9#c_bKt|0*N$&Ju!CXMs$8!t~*s-u~HDsjB_c4Lf zO77m?CC$O2zLSW$DLGk;gn4KwNP`dmEzi=O{MD~7nBHWe5tee+-^@O$!rkvMQSHkD*6j(LAVSr!h9 ze~u+wHZeA0(7@J zzNs?QeL5*Bf5T#2kpYHoC-lC_hXSKbEcY4AcVnN&vB1PBSuH>y?PTbgf>b&?aU+%= zUrvL?qQy~sF@_H1;M`jar`z-?lgGXnWQWbN-%+F4vQC{of?$^unA~LUwXDB zzb|QQB#?i9Q(V(b#2Sv7Ah8piwJ>NLqcm=GEYagM>##y)`tfUgZF8f9J7%6$HrVFUkKirDtvZ*6n7GtYB!{sW$qrPc6cv43PL%xS$il z#E*#~9H^&5EvsCS750%|QTJ;IV#g-&JDquDHuk$cx$0fT^N+jjuQk^GP18%lBQ##E zbPnFPTeYC_o#~>iJXgVy3b%DrUhO*UCSzA#Mk+^NY-i70HnTrIJYeGS+>*a4@3bzI z?d|W^YAgU&rlO*fgktaG!9kYla6QgQ{AB`|e~5cIU}`{$W6B<_j6>KUkFjm*x_G09 zC*&6F-O9>SD6{!37k;`}aKf_~K?!$uvuWBw9?h{MJQ`NO%bnz|3is4^4z7U7Z>+s9X5YB3KuFsYXyve{*SU2pP9)L zDe=D$C=clisRN!4Kq?q|1l;mviBr&%)6>1u6nXvumkFYmczaz!5Xh$ns3liRyN1KM zie$dqw~UarCw^q64kwH9Z|hG?ivnow^+jYeWfF3?*${G}6rvDKUOV~iKJ?-gi!h**Ay}pHFUOk8=vd~T-^HG912PNSv958>`?tA!MM=B4EmSQ&D z{!{`aI#4O1^@sd7MO5U@*Ojblz14$Tey@l@R?$vEgeeS9GQw6(*Bqa{)GSW9I#a0K zA4hK~S;;PYd(;*!JIO_DJNI|cA^yWzJy~|K(hsmV;o%L);Q@uWx%FuFsye|ci}%B8 zhz`a!Y`$Ol3_BCiTE)dex4Jp-gC`i!QFlLBwN$PL5xEf3KkYXE(j?gYhg|3QbZ}b~R_<4Ibt9c(YGmard zO|K&Ower8x2tf+WIovt7`2f?d@kyzUT*2E}FHF>1C#+0Ay$&<(Q{oTzeW2z&+`XYz z5%Q-*27_s}X;gxE{|4sdl)n2feN-q?bK9unNFFG}$r4(<7khyUXlaTFb!}O1MeB3l ztr~x*cYd(ZIoLVYStq)KAH$ZG1&R2TD7lcAGtL*#GTY22BlCNxg~F9Ptka>+c&U9; zAy@9#iF$LS6eb+LBFxBs&HTtI3Iu<$-YiNF!?}AR$uSZ9gc+5*-uPWW&45aRnIXi zA;CK~C_=a*7h$%!%4YSk$Bk$8eEek%T9J~8&d#{ukgxX)tP9~~T)*G@ZFU@=m+*p0s7>>B!XRsbt`tBa1anxJ1I(Yn+ba{IwQZ%GyP1Y4$&f|}{B*F_XLp-qV;*2m_@48+VY4+g#)w=LwY{6O+ z^@BGenKmCBzs%UWg^!@8dnEAXzULqE{ec#zWUh6tL}#N7Be!RT>QNuu4E!kQ1#l~;za%a%%0kRfaF?l<>+q6PjJMr;&#R*}e11T5-UPijjl zOm_cTVT?EMD=AFbKO~DHC@qo!Rz(EqTt06+2*scS5~ERn0FyW7RyAyV_?vmmKM`S~ zKgBUHTJLo^%cIbHxtgqJxuW~Qn!Wi0Wh%a-lAj2*7uf~{K(pdy_4J_lAGSFlH?}YO z-}qVd1)OSU^M03QUrFg9)%aa?^jlf>L3+o<+MLj5bAx>lu526e$A>967^QX2;U2RK zioK(EDz^@>y#4T-oj1&T>mhYG4f^btux{JBL4kjp}22$1xrVqSt=x^**``}RE zHrZu?oGI8HMt%EOm$){B#SU{EYrJc)Ry=1yb^@r_9v%0!-(oaRP21`tUm)NYj&o{% zKkQq%^v)j~(@Xu45d*H|e5@Nzw9~Xr#|$g!ppoB?1Pu+5nn|NX8C>VIYE|)sg6-U# zQxbPd%CAV*Qw;i@iry%|QJxaTc%|Js|792z# z{j!HS<e?x?(hGc0bgL)7wWYksIW0a zs=q0ly8!W1k7osSU?BME2SUivpG07NMPQ#qNT9tDAACYM{K(h6!;}y%Q#_*wVB1Lu zYcYx4BXh0LR!SSg&1&|SNT4P+B`O&lp=sqFbgp=!=|gEKz)b%@t>u&s$ljRBa*jE1 zC@7d46g_V!;4?R-;l#y$Jdv<)78He}qj#EO2fK?aSkwK~7JxrKu$*<)`OCs zIKSb3I?Wy)3kzbEaVTv^+a2DBhVAT5r6^TQ3x5}oxtiFMh`PV{Z;(2!@i)3OIr`&# zV9;uV(a3N_Bm3+Vrrwk#=9K8YlUUruYwf}l$=}oKm8xs<@553RT+D)}yM4Y2Fl*|_ zs;QO{*7}2bX}RufcgVF?;gPjbJlWB;LoFhJzIA{T@9~BLMqW@wGhf{JJH?=eJW1RZ z{XoDokOc<*GH$t_-s8Dk;P^2%`7=l5cA@>9I~mYx-2La!gB5sn(ak=Z(gT8LEs}5z zr(4yKa$firE?Yw>!RUCHt+$aKYfIM1=CPHKQdzF*CaLmiZ6q=Z9jo$xZ#W?URu0m~Z=Z#2(ZBO$bjuj5c`es5B{=#8toP?ZxZc=#E7G zG#!~gmEktSUGJJZ*?4Bd1Up75j@Z>!gRA*DgoHW8b9|;JFRF0wAO{E>dt8XmZiKJ48PD#}W=*JffMY$vXA)2UUi{{1^!~dg}w<1-R zk2!Q5z)>qH8x`DGrTac^B|pY-y5Xidt{Ur+a^M(Mfr+ud^>CKr=-e5mL}-IQ4q&r= zIu7)%2<&Z0;-P?9@Oy_6_W6~Gw^cQlfgkyQH4iwPEhS=7^$MFniKJ4T_xup^lSe}W z@z_sIsT0VbXbNcXKyME^7?g^S5_sRGYf|Omxntra2Wh_t6q%13B7sCQY!jLh6t&4B0cF~giC~ms}fjV3h?PqR|{H? z`5R^A)T_;k(zQjg8fB6bWep#&{wdPis~E$U$6$)p|If+Y zy~`xE)yrN@uU845mEYD6=zDN#fwD0-+YW9Ul7)+qVELMkrUJL1;&xDwW-6hREOt5f4|ap+q6T6~HNi94Uw{hx^()}gpi4z6E4aPA&FA~jsBvUJ>2`|b zMIFxq!&+d6k%Cm7Ufk6q2+hB^0+&3o{W;mFwP#5*5&E%jB$#Ni`7H4d8In)&N4Jd( zd$^P&*6s5{^6M@HoUQ&oijSL22VlaHzFd%hZY9Q#Xf6^pST3K8ZWKAoBXyd5C&yOt zf6oF8=~SvbrCfAl*(~u_{t%BcMmt77B(Ep%f#viQH11uigI4)}j@SIYTVE?W5nGrz zyrKveqrHeV@$-?PV92oL4fn92>2Az^W71l2dq#u~<;Zpa8_!3EvYzb>=ULlHZ`j6D zW|=Pa=SOI9N(#5qs-LGrSVFA^91k%cgG2?&`sd8=pD8;(M*3wQJ(4|e@c(}O>ogGB zyYlH^#g(o_iJn2D9d;XCw$1 z!oZ-aV*dLv@WJspeWi5VkC8vF-uKS8wp>?djgOlK(xA8ATsk=?g<$0S!=R*~$A9kV zNb6VNZgfKQ9CEm#d7I$>mQKo2Lp=m>ad9;(4FXV@>J@6*iLK~9BXXThQ>~&?AcixG z-i3%jxnS=&#(VO{N;IRj?zLc$o?1KVdqv-D_Hr#Z+mYqRe5DQndi2x9h}1dXVN1i> z*bQU~TYP7At@8LKE_F8HrOHU*TmR-i-o9-JXqf-tuzbXpLYqYsOmT7GAp00BTBT&Y zU=tgud6v%p3F(%F7LZMkDtx`IHT zkJF;0gBi58zuO@hSO9-^4%Hw~vwDq0>CWcy3VLJ%Ba6uG>Tn3C&l^N(->|+wHLa$b z2KHjyj2)BC96kJnzFcn0qDIN&lorrqqyUextDAy}rJWtMmr*tiJw0>~>0n|A2MLx}>$@_^g9EDEQB;2gkjFHjPzDc2WLA2$jb`u-@E zhL8A{xl!ef<)k1QY(t4IZ>44^A&_?+FeXly{tI8@Zu;y;3MNnZb7~h@r4~&)7je7k z!yuZx+Uxb5t_QUS!_k<&**eoVYpZ4vM4y03C(pIQtZi&cL#*WrrzokYBycc4+PuwC zzSN#uX=*pR1b}a=eP0=6tZZMl~)s58wIE2&Y}TLV_O=Hwx){VSPDG;UV2k&RRhV zi|Ku1V`D!voRZ_4gCLFTzuzE|ik>CSzJGOjD`N>m@BalwS2KZrK=ilgRE6l-qpz%& zK&OJL*wY15yd%uBYr7=NKyj4r?cP1D&cXWm?}(_|p-9*XsjJxuty*GiQI<5aP>=J( zSTZv540YpoMl|Ko@#|A*oZUYBfr7ie6ul8Q_VkS88pss_*a)H3!-Gd>@lqu-21_6zj|CDafwnRXzsYFHwf9V8_5cGmUv@%IB zjrw1w_4w%I1nc62#NJ{cP9eo%K+WkabY())Elzniq{sF4eB(7qX`_F`Y5UT_d*zaY z<=>Z)5u0cE?9$3*HhrZ?Z7|YuZ$_XIzJ%=)5YoozP%9brvr^JTebwy(-Hh)u8>BEd< zZ82>^N;CMeo^R^dkbQF2WR{ZIpoli(f~@nPy9EC|p~1ObO?@Xo@wvRXc&nLe-JtRr z4?G70lvkk9K|?^%U>?0vczs&*e|2`&aZz>M9-kqGW<~*rhEZA?1qq1(BqgLCkWylh z&Y^n<1%U@iVWbpP5D<{=6p&Ivq(wmK?zo%heee6ZpZnjve1@9gaQ4}It-aRSd;Pvs zoaYgOAT(|akW3M>kmZqXGWfIQ?nbhXr-}SvL3)(b|H(Tvjgvt_K>uW@`ax~4yjx?# z1fR{l7vo7&g{_@21YfX{JS0gL^v&DATE5Ygu z=}bg-=y_U>C2#9LDT{Oa>5vMv!rQq2_3>zANZLJiW@vsy91Yu@`+?VJUJf*uC5ul! z=1Pfq!MC0CGf7vz^Ta)kRlIs!5p1meyk!Kb&eeOWjM0EVthdw?N=f}wsnbT(Xs$5v z)gu=vM2$GLa9?IHpVy=n^<5$BS{1Yc*JLoU)@dQtS9VAU-;|m>KmG9aoRq_}UI?rc zw7w2Y)g8eIM1SqSj;7I5oRT1T_hb#%`91YD>C-6zi<2A8sfsV%Y-;6l4*T9+gb)(b zzL0lr_{Ldzd_+oyJp1Apn{f1K%O*$g$&B%Oy?q#u9aPqFLDI^4enceU@37~V+|JquFuaHx(8uTHRDWOI-+kyO*zz#@Vhe02NJN_QLKyZjSVa2EAC@a- z5ke}PV8hDiBI??gbbhOwAWLN2Ut{5Nd@c_`QM;FG9@5!25(MVDS&&5-Cse78IdQQs z3i7X9i55~pF#L))@!|%{DLLg)Y&x$Bz_?a3fixCzG+m54t!=WWwuGa;V)x)c!ne zqiV8G&ra$Ck}Rn3>%6U%@OVIe@)K-AO$b>2Pvb(C^}i(Iu+D^cj$es)l* z?Yc}zaEaqm&PPHb8FvC-9`Onyjn}U&$8jnWq7hTqU$}}p<_&gMpvFHaB8TpZbl5)0 zT<4nT6kK_=KfM0H?t|kuR(9+Urz*p2ftD0_!qx7iH_thDVpX`vZZY15t9W1N+)Cj$ zuC(c9)My%zl>O_#yl*ll>Bx5-pFZ;?50ZHAZ|JX*8!~($(P(IZyPea#g@q5&;$iun zWs&u{=A0NB`P}X7uJlIoy2WQlhoku3E$a=p`m-Tp)7fU(v8dnv8@C!wYUq(+%yI$u zTbUY%q`%*ytX^5xc*JvMR?-(n$UQLo{i#?_uJ3DYZFjzCwqWGxC)Ujycbn_xWw>k& zh`-NF-hBt;)x)6#s*L!jX|2aAN0!6akMpX`pT_02`K%E#EQK@soBOdD-J8^>uZd>8 z_Uh*|KXMn{pGC{I}Cq%E#=E;a8gC{GgGZoKbP3!^Lw!w3O+6v!M|* zCiUw=5$=m9t^8M>ZAxp7&w1NcNDAV^V<)4W5ntw}y>ns(>iUzdn&{&}6^|NmHpXkU zFQ%5{;ysXIhYJt$;stosgd^w-6AD`L4LaWb^&pztKYo5HegGv()de#98{V^Wepjmz7d=1Sx*h#y7-sk5yvH{EHPiYZeA;cU5#nsJzC?)utaZT`vgT~$Q*xqm z7GiJenoAl3WM6KW#9d$mqX}JJwbutwi6>da4Fo=Z)FbFXh4l`*SZ%sTK~Jb&LR^<4 zy6Yin7O13JnGF#QZ*@Q_d+a1dG1=2Ansr>yWSR3h!t1QB-mEy%#SnA=^Vuf-<)KiI z-C~XP1@ejDMLtuhV0*T?Oc|=iL*6SZhURfKzWkiLrqB?Q@Yw-b!t;!!WYNle#-dJg z>=UdUU2=k{GSnItF`&|lK|b2Apa_S~fOd=ZAi8L!LAjO=Q=s^?$*}A`-E?{6&y!0_aBA z;5EBJ*AoNb-w(2aZZn(;6(CNrZFZYHX@q@pqk5a{94dnhsxbaE=}#AH$GwYlfA)-S zXw&p~=vW2?| znn}$W+G=AYzca$!T%2h-?T%K(W-o>SH)B#G z!$|Ii;Sw?xp&Wz{e2MjIH+Pmo<(i<=7mPj^S2AVJogMQEjnF&EFkw`=b`>Yi8b+Kz zsp9~d%OngAU}>2-C->R$nq5slq>2%Q*8MRo-LrsXh)D!7p;brqydZybkO5u^Livhp z7TmU+!Nlpb*NEoRef=f(8U!iP?7_*Ld(CC-V)SFbjg&?u8#UbFc=>FgP?$6|!B+pR zG}zHtQG0|oV@vTvD*l_q-v)^TaKEIySh_pyo;+z;pQ?jGLggWSeSLOzc4_VtAH4wm zLfOjd24ps4=9G((iRqDOIs|h6{(U<(-O>b$- zCut23kM21MO?hk0G`+v`hxzBbX2PIN33HOT-H?kMWVP_3$eK|JuG0}(TmtFUTheDj zDY59Y$dgm#Au$vb6kSy8TR+qjiPRFWI&-@}1Z9_og||CyFuV~MwCpQO?|arIN%H!j z#jGTx&YbxC6o#O@2V_$@7Tfc*Qx|9Lh=+v#my{=WR%>vHVta}aR7ulC!Bs0D9_MFS8%MIY%XV7TR|0O-=E)h2vkI;9UmpzbFjE7v$juebZWCj$ zpX0Hk!%sUKQ}|3o!XjL{M-V?MUhpjI=w{f=iA3{(f~T?@S=!b;;rZvyx5q*kxV2Q3 zNm28s}(?DaPc>RNyIp`M*~a4@{zl#hzeA^gHT;NzL?yiI4Mk4P#m)C=+`!qn$=P{Z z2VOyg8{jet$9wR8$-^Nd^aLBj)0xhlYlr0nJSDe3T~HcUvo8dqa0$XAg1H0Yr|E{9 zM&<;?4XnPE`NPg9V*{ab-XeEQvBcjo=hUmC0|bmzhlNos{?yQ}Y5gDN%Z$M99{eCd zlQP5*s6GS7lAmoa4h=hdZw}1jS@MVrWQl^;88$6WX(1$lh!xcK^+BnZh3XSFQWOU( zIs3cWGd&aW-ho#Nn^Apv1BE6PMD%__6^ricj-K-!8ZFM|6xbEKC9$UmJBs6sC*(DB zNrp3K>=dI7{d$sv?Cr^ZcLATp!A}PjHXCO^baYVKyy7W zZdE7wLuh)qZ$)~3SQlO27wJKLJ{hlwxiTP2B`Z#%QjI?`Aew0UP;Yw>$WZjFr@}h} zfzA&DxeU?T49T>6c+)P^EW9C)uB|Kd8(UNm8v}IX#B9~y-$H4#uI?VP%jGSBMhpHI z4v}boD^pd87z-4XS6h0=Y66= zNAu}I)T%!BI+ElPl1}ek2lL@oe`Pud_GBh3Fo+^8vW(y=diW`UYAyb5-f0iXbnFK> z3H&+Nu9yYnWL2tEdUPnA1f?_rx|Bg~@6qojD8u0_UMFW=eRg>B_jvc7ghKg(gQdrk z&D&HQLevYSLTl}=zniAzc;RN&H;te_uWw!)Nv8e5mikMCFAV9^p{%hvkVI$hs1~^- z#A%1NO0T*c+5HYD`LGjtQk2pfIMc*uUm~_C(PO{qfKrF&=__y)?lmcKLVCp}xgDzy zy<>|_`Z^g3r^!T|z|iX4K(jh66!s)S(>nrzesI~ZADc>u9~#2=`C&H&yFI;u3B3VR zGKU)4!h%gn7dNmJKJFN4ScrF@Q1WuFVlsF6y}n`9754}TL=ehu$M}mRvelGi z3p8o^n*JGo5nzJ+#eXeLlPj&y7Wgms2)16zIAw0j$MWM};BiUa$j^UbOT!NWzISk3 zeekgqF&wRGsOJRP@{!Rk`D7vhgTZEZj`D}ab*Gw}Apo~oFbq&-hXIY%Cu#hBRY64J z-p*e)xWROf9)Z3F<>v2;N=scQAy-R2aHx@0S>0m1Cs zv3*a)9>(6haiEgiT}G)8-9U-`?-=mnM_k*SJW$||7Mn>gG+z5%0%c-I-#)RW>7*ww zlNs|9jJ@*txf_^-c6P*q5Bme)ow_C_FVg=Cy=4?QfE{;wIecrU38}t*qQp zeI^-jC~O{dwD1M?*%2Uy40tv@hZk}wPCF21T>on4$kNO!vQc5Kw+=Su(p{Q%d5csaoTuO{Hvhj zKbqdCHTOrc^3Jhh$WBs-+)=l-Xl?Mwt58TtpRJ;i5mHIF9Sj3&(BY#u>p;i@xUu0K zkJC%ne7q7l=7v<*p8qIdCl$`51MU;HiAOYG}-m<9~FjGFGt*@#|_4fA0tkK_Kx$BqYGF>lepEY4>Ju9Fo=0-*Z z_5CT~{gG5m)e~!rnI6+qQ}|@7*vYKkg`a>F)#J^R(O5MX?o&^A z-FBH&!HLEy6DP+XE91My4hwX&Jd0a+|BV?PmLtAuM6%reG1&}6aOS`bn^Qw ze@!_{06ppBp7J&S6;CM?DFKMy0LX}X_Os}i{|{dghbHI4ToX?9);A;edL~FAvcIHa zQ_VytsHH%DjICn<=AovTaNmk@Lt@%uX?(X~WMxk{T@wJ7!O|<*!=t=xV`-=5 zk7w)3p6$rNsHHGZQ%o*t$ykg$yKteb206Q$PH(8YRO0>C+xTB*VyH|rQPJCOmCT+X zC7W_KGH0TM`81gC9voo$=tFgSh|9-|PI0}hPZ-MP{8zSobRIq|tdEmP7|0(;9FLu9TOkv>)N4?zX9H!siX;k@)a~8gT&vjb=RX3FyG;R&6(f|vgyJ;w2 zIAYrY@N)!EfGH$|NJ&crVl1I9ZTV0ChoENOYh{$T1h2&%d+$bt$y(8Ip2U2Te&U&F;q3m{-B)zLeC|KY@C= z1&F@59NGFKu5$H?`uX_{PES)qXq8meL5tou(}(jsQ!jh%n%^MY@2f?EVE2T%eY-Dl z|Gg7HGHar_VmiW3*6ZNl){IvE--k#>X%#2}?CvVIZkv98>ykn-RqIvy7r%NsnDG7m zrS~^x;E+qUH8AKF$*8uukeLk?5C~WUHWfTQJ+n(n;J}E%c3J7TqsZB1%UxWALUh)j z14x-NOA=nIz}+@`#Qu?ZMjpWoh;YfwYrx_G&qj~+wp<+l=08cwuA#KYyl9-WGC=< zEzVW*fC?YV^QJjXK8y3jKlfx|xo~F7&@UiUUw4pBzBz340z6(sp$yqr$s8xV*QO8gL#4jXdeSN+9 zM7t|lV-6xR9vD?{dYQm#8ZbP+_eK*z#7Pfq63i#}h2iKX!a$nxPW_ zIcKqTz&djC!$B?b(8vfe*xJ^mopZgDRCD%jJ=}v;Z+++J5^)2}ijs-kJ;er%lL0jNcn8Cf3fxG8KVOEJ%^g! zk1BT3o>t#qbf$AdLYYNbc=ZiIz)-j?dL4c~d1xP%=N5VzGv`}Q4NG@M9c?j~BOWG)rf;>Pv zy5Ih$50*`U6wnik{W+MU2t2RU_r1jaIQFr`edKy%EO=TQ{np2_2w@6h-f3A zT^$-6%q|Ls5+dCoC5?2Rd*1K+ z&;OtQoU_(h=Uv8&GtV>kbI0D-zOHLeq`IoyLrgME2n6y_L0(!D0zrxgzuD1I!N2U) zt#^XIpw2J_Ep+go54w2-_!+}d{*^NXf@^~Kjr3K#;57t715uEc)bhyKTk!TtJY5jK zo7((?B>$7{eGYQ|w{w?Zq6bz=h4$5jgdND=C0{SVar7QHJ2ZxC3(UKVCpaA%UHTOS zaillSXsO57YAx9Q!Fg+45X8T5^o1p-iGgMG#rv$km$wNUisPTdKMV06Y_5%={n%`L zYj{x1GSxJ-*)O_4UbD*Ve}7=XVDWBhW&wMIbz$-90^$g<9{*YxFpV zv&5o){?z=CoV*II?ob!T#AuWqRq1y^)=OfbCZ_H2&H#mBUZ#F<&d z`-~ljQaGfxmLDQvYs;SHxj|ZD*hG}=a~xSNMNcS!5#kZ8S=gOGCr9x;+wbc2?pz)2 zt5>hA?d@fim80i29o?kfPRKhEEKN6iK704>9UdhmN3z)E??h%+*3L?^KE@j~87RwGoRr@vlU+t_sNFElZ^u;Lxv?z=JkT_jwQB-1r;`US3{CL`6v%7*K#qnQF1tH#ejD8m{-7-(315jlG-jpfPFq9fm3x82X>+qhev?=Q$JDt68{I8)Yw8?1rV z`$a^cEJ#auvR#nps`iDc)9{=6RxZ2;IC}QVc5;aA|)hHs4)q#Nlob_Qpw`gv^IlH=< z%sB4ekt{Khjnn8;zZ$(C;Df$??O$n8%zuv+)SjN6ZrFGRW_;RrF~gaPoO}=q6H_)i zk-PVNZ%$z2G`=0Iz4Ek&!w`7?vtAEp=VzPqZ=D}Q1E(e@XZPwHmYxrjtjWbuJsw>2 zyS5u9A#_;PE~DrovjVehTx}UMUJf?naKrkBRf|TJi2LxKvmRmpg8PzPor9+4`|)GM z*d_5iyuF)rEmcgZ{VsABTCGO1)}y&wJ&sJDe>`f<=I%{QOr%_NP!TONx;})Osyy!! z@5+7u{=J8r+u7GCDP%OhPlVZN0uEGfY;1BT-f+9`8$5dSXt`vV`MuYEVj6nS65Z#OuW(1()z9L zceeY=pvg_e)_k$WTcE1Cy3%^=N#!O1gr1S{2^$+$O-;?#)>ehh2dX!s2M!JneINO3 z98b11cm)LLxViBI@~Mi7S_!rq2P*Kj(iuc5Cmvg$^l3*txi*qa)Ntw6lK zL(9wNqi;1eD~W2)J7%h_G&D7N_RSidqru^iZ6cnut6SJeaE^5ucX7K}nVp-{c<~~* zzFx3kj?~Nzqc8KkuD14xxHvUfi5{?FkA5erAy?3=_ohyx@+FK6U0wF|_Quw$`S>&& zzNZU}AwWM$17|6an~^`Kwx%Zc>sNG#meb_voMtfT>_*KV^A8KN1beMB7A{(RhjU8= z7It@S!wZsfWfgJCn#IABLmYqi<8uf8nw&Ja zBM5)b$0jGkoHvJQSy&X<2fu$OXmZ&Ar|zW;m^ssl0bNn2=mf6bjK%I|&*7C4fw}2v z-S9Vb4$Vt?F&4gWkl<76ZcnD3`(C*~NKRi5T%AY`&rS z>M?3>{Cv2QDoZe|*vEGm+@`y`TcT83WC5JC@a^$}wVTVoRGw`$WIG@tcA`=H1c2G& zK2koJEmd>Qd#PdAxaC)SCN}Z=E0& ztW?{1xIs`A%-(=~Nf29~qbmiv+0Zt>&hYEKUXgG5`_}7fVy7Prb*$KI>Xtr#rf33PKPljX~ieM;5|837Qum-`_<7~!CZb2?@EJ93TI9oifxmmP-{jG6`9J*6S zgY#wu*qgk3e351x6M9Iz&XZrlSkwfD_7lc?oNm83=9o zsi>$hKYdDUT=4m`MER6d`IPrFQCLL!+5Q3>|FZroCa>oEyX#vLQm2>gjPXZ`Dk?SU zjGqb%F(6+tqv9b+NlBp~@4!LGg|N5OpY6^B*Dd+YjU?dfsJwHcktzr)D=O+t=Fq<( z#-=q_E^j`5ci{_CZkb`zMxS*=Q_vlTIdhF(41-c~gzpRnZHULnXAfi~cCpG&pFYJW zCvP0612{qtwLk`uBYwc#R3zNhyqM4P>%<##b90F&34+b%zXuWpUH^PAOw(eOF)~{G zS{*_FL$4&Rz%bvMj(ztI^YiD=Gz<(fKA9OA(HR*OTQ0)kW&WClj}-HNZaY1D_ADVS zZQDiP#)j?c`dZB6ao|&7;aPjnX?4jdN#K%ky&Gc3k?C{N!Y!lhe~n5$x=D zadEfEKOAsGq=*ix+ksoX$ z`Jba-p!MPN3(V|n<{_Y1BE&%y%lVQ`V$dQxGZBHupxld#n0~)?mZ|;C*|4$Dn%3;g zv(x{1h1Q-w`2O;I`QV0Vi)WVK$L@JZb06)H0>$MIs(-*<5cd5s@{k2uz!(2%ObZRT zjUOc0DP=_X8ax-_*{1kCsj<(|gSXDkJyp@s*d@A^$l`ZLabTCwCSo0Iql!C`G=D)R z`Kgj(VIEjI10{OAH}1h47>`k0Ru(3kSE)o3U8yj&o`&bY`cVz8-GXc|F^BnB0M_sQ zXm>-aty>xaIFU&lnlSw?be6MmUiC4HQtU3e!p}t^MEEOp)7JD}5jG(Il)S1Qot};; z$nwJcyE)Paf*#0YP{?$R4YB2qkL!o6AoAf2CUbOESq!Qh8R5jp(oyxjbB?X5;sM(S zK3Vcgx7Ido!41O9#?~cvJ|Fz5+R|kEXTG4vVa~!Ccr*}d@g^oFR{e%!rTWdgexPvO zuNbEKU)B2_*9$%Fy!XWK^u@j6^Mrh(ipZ>-L`jRb!TeTNXLfgUDK9TC0fQkQA0Im& zt;+1q)?l%)uz0M55<8BFoiG0oSn8&9pEo(r^1VJY>3R3q<9LASsE61%=ksT1wbckJ zyy2*>u8xkD7D>O(UIy$=zq`NLbo~6<2KaBM$_&8>X%M&G%yeI^9(mpsh=LZMo*o6@ z6EY;3)9~HamNi#zh3k&CE;>OXq3HKaH}4mUvm4;KTkw zq8Yw?3CmePb7t=8rnQx;l*36%NwGTjx;$Qwh>YxzqxKDX%%+VPr4Q@Zva;Sx_eG(N zA9EiayB*C;A&|_>%!`}9Bbiz5U11Lq8-$lECN(~cy6@GeC-l=tBjUBu^lg)-Y|VbP z`3W~RSU1Ge%*=rvQr>K%ZWBuy_4NGwlexCT8AA0g+XP_e=V}%rwjM}wYajqIb8t|e zdPH;6)6+-S-~P;3QXR2T;jy!`%j=cxH}B9d@ARpEB+cWDs$4^ig1QHVyKW9JCK0)?+I?U)%-g_9fhPzR(HNTLuX&d zZ~m;8qNseo>EL;Wuq!4xzis>VL0jAdETZTO;$Nf3)b|&C;3R?%M9a}EjE|2GpcHY- z$;oNozN4cIkB7I57Y8>qKeBECD{q1m5>AbxbQgAyoDLyFYNyq0=roT$>h&PYXxW~oc#>*O_)Ft)@&x`9G%S!8!4ICJ)*T}vlLqmrD zXOb@%&uwj`dfPzZ)k3A1Pe2gaEr%b58Y3I5l)`BOim^MK(D(koP@!4NpLj!?IOEUElNL?TFWA%Fy*&7|jl%VM^tCiPZLHvCd&9Hj z+SpoJI}QOx36Be>iqGlbv4|h|B6E|);819Xa9Q2cqfMc{C;I!A!lI%zOWU2#(-|`e_zt zaPH1*maU$DdyI*k`j%nZ7dJ>HbE9n;84p}8-@j)0?1F{wqH+n+t2!6vDInSJjcx1p z={^Vx&Rap-ZDDS{yxb9t2;BdXhi9rRtZi){onQ6}v<3#F;m)>{my}>vTmE<}IE+Yl z0Oel(-OK~U9Hhc#ij&>2@ez0t21petY^f13|2vM& z_w(@HooODEJYa~&>m!i@Q5uIrFn}q`1DSz=0py4G3o%*T{2Mv+y(C6n+YG$Cyl`;F z`+xihRm|@MAO(@<;h^~T*sbh41fgq|ArG8fP*cJ8=NmvOk+HYu1b3v#5@%p|(9qD} z@u#q0bZiU>(mygH_vQ^NzwI;)6%(j`jr}eej67Bz*e*0aW=+BbNT)igys?q2$^D?$ z_k2N4X@lS}iz;>L^5KxkJ)z*eKxM|l9S6DyuEfO|4;PuJC~HyYaZu;^)11=crJc>l zOrqFT3uh)5>|r60*v$n@O#f?&KH{hx5R$XCEvcW@vb1E>Voj2oL1Yp{2=`eU@xOBh zNDwx^3BZ61Kt71n4l*PN#fWDW^gO0_+nWvj@Zk|CT2_bBp1EztMMhSpH+X{c10Jkz zAdv|`ag1Rh)hyA-@dCA|!S(l}J$%`n26vc_GFEIKjB#n}AiLysS5GTwq%gX%W*e`Z zCeSdtUz7|^=ruS8TSN;{Sb>}A);s1J&Wwh99 zo<m?FRY}jgC-97cAx+U%az$ZQrqqxv}JVqDW)JHTEVS z&ULb}Uj}d#q76+}sE-1IjgBG zPyxS63_FM90D;>sZRRLc{d08P0A(MNlCPl=(SwJtX+T_=zlsw$5t|IN1u+O!<4<1ZcnxTQJA1z7IVoC|Sg0~m3x+tg0LK8-OY`(8 z?mwjLU}8_w~C+89-g|@bFZ4pWBPxU96c#`(KS`BYuJ;%btpPpGA9Sn!jDq zaY>#);>(`Kq@$xXJV$Zi_<@QQcWCUGi1(vc1p1?w+y@X_+#t|UJooYFb~T#kBNaaw z@KAxYUtf3FuyzzLLA|`~5LxYTlt{U4n>_KjQBJD*hyd14a&g)Ait+i)xs1Mvt!N0E z07?|k*7kT5A`}M^2)G4NUq8H%qn^f0gSbt-tw?HskJH~_aY`e~Qt0DhA&&*>jwQdx zTefv}^73Jahh}h$0q}Y^cwLf%kr5TdmGxf=cRw`^Pt{^d9slrFyu=OmwIWT$ zOD#rG)kZM?NOA7z?MvR|&0h$%sCiUISFzD|1cG~1&W#;uqd}!f4|ns|bgbBY2_KvaR!LiJTSkW<-T>*+ z=j`&?bzL4GKO-lykkGBhx9wGK>z1KF=o3(=l1SfJhg&QyikB4w%;PW|E->~49vDUt zfhD*Z5f^vvPC~_ijyz;p4f1=#k%559&t>v?TU2qwo#|ox$0)T|CKdUwel@X5Ja9oC z+jjX{ULJ118E#Y_u2~o&z3Sr_uUZf+n-{3e7;IE7r>KZYKq%IfTv^T16}(`cIcv(E z>$MOVcE3{|yhovecml3X1$NFt2>B92*@Hl`P=_r%S!@x`)ad5L94D7f;N#B+JuVzj+e`hGf9$ zKHXMUhBGrWv&x^Qypbq~LA%!1q_TWKjdGvUS(#~eO9vMyxOytMS{0y5)0G%d`;2d}3lSsIf})YQF2_62N-fe^?$q3tkhN4cB_`LPSm-YZ_|GOGf$qvu5G= zTS))+@15&|DH=LDXzjJLl(gM#vbLN3-;wlNV`qheblVy63binUJK#E&ykxXVij-%3 zWHHPvEckPxAbVzc?+Z2G91qg*Hxj~{qX3e!9K8Mg`8G^{oVH{}dXFMe0sv4z-T@A$ zi6^*Y`P-FZ-WUxXos>^zdO8-M!@hiB3};FJcx{#=6cd6OicBtGFYV&u((k$u9TAc0 zVwl($TsG$oLeCfX#enT@{G5^6`WMsl=KcK71N%S77>Tt#%f0ZtHl_RkuxVsuq}#+x z02-fJ4L>4`sELv0Op(e}Uq0C$mse9GK2K512MWhqL3>NfjR>|WR~MJ)#wRQ0kBSyv zZ8wUI{2V?)J6vmXwcXTBLdz-)kwiziIdsPD84>xdL(k66E)B9jK!YDYeys4lc5~7P z818*yVgi`&?rv$oHEF$k8TnEh!2J9_Y%rgY`;jH~JqqHHIn1ukjt}R73r#r6tGcJk zLDZ#@&%1SPKB>v(UZjvpOzpAU6fN>(nR$rxactvF+CvkUmMfiAq?!E|C9k2TJZU*o zJ4leX4H;Qb^fYr1_o-!%*tM=+=b_Mwef)~@rzgga-BzN>62IQ?{rU4JXUsaoUQIA+Ud!0k#F__|uidZSc{U~_Su(!9j z)mf9`2iHyA_4CHiTa4lW(~|e;6Edh7bz8jn0N=<+Axhir?lH%gy5vEM!0z&U6ts3Q zJDRy#J1KB%fRNB9=9CMH7YhppGIH{xK@e6JUU)yK2@Jt$+&@~&2)H`0EIr4-l7Bx zj^;zxSPkWJDPyhCdk6;l>z5Wt_VA%|0vIyd{crlAA{~fSxNfigJ<+sOdWZR#UvxAS zGmh_*fPlbS!)BJ0tSle=nZi}9K$4+G4#LDvxL=ws`3c)0pL^tGg??E37A+T?>)ZH`T4n@K6Sn*coO1) zA+Ce=_ug~4#*cK`l);`X9dNWE?1mn=l)}FYU*N+;qsbpG zhbwjL+*z}IkPTcX3RpxaCjKwY65L%svZF1gqf2wtOG7#WJCqY0dU)^WzoLnEiA(Ss4Xk_ z(rRuuu`j3x&+m*NgkdT7w`6aRnHmi^wdE~mAGJ%`1FnP38T~Ab=}_y>4B}(yk1p$xPwLObB|H6y-4XO z%UFQ%(j+N48BpF0H_sCXLVo`I8JY1Bk4Lq+fPCXv2hQn7WNCAp%JZ20fE#@-JMXvT zJbx}zyyxGIm7_jt1at;_ky17tw_wa#{lyEE7+ER1Cm|P*`zd1+zuS8xscd0;#lXW` zJNPrptEXH%EXp2|EFdDS^~L4Y))IkO{&(X?Fvt@a9sd+uk@tN+j(P7v32c9%;|pBTfZMvjf)=9++2md9H&HVU}R zTtfb+W*fS)0v950-#2;0}rE zV*(z@<~p>St~ZD0amb8$cEQr9ZU=f5P07Is#4SPaUi72HyQnA)W81B|RE|l566*By6d~%zQV2asc9^a}+8oISXtZIlZ@$f69HFgb zW#Qy&My{@|;-A-)wUueHayu-F0-B`Wl`;g1JltTB8M}Fhb#rPw`yR~-C`X zW6TH92xHG(4CzvBIwmGGAU%|c0$2=81%RQLt}yAUk1LpXvuC74PXII@$?_?YQi2zS zS{{2fQxFj3DW107RFUgQM%+8?iJLq>6Fw|&n{|-CWemA)lY97^9`3;&n5xW3&%`9M zkPL_eLMGLE7tvB}Hf@<#1Liyh7~LOjdC49^a&mK#K$JleGQ^~%TS;jPx%41)Jo|PV z2G$;ck7<>>5k9$ya!>Zap-d6t0<~;eJ-v@zwV-NB(V+)~>A#qo2JPa|0rMbOZh!%& z9PlTALho=j>S?2kD(oh(&Jd;iIriNcTEL9$TuwcH=b!LE871pX9D;a|jkL>+Vs5X` z%@E`jUot>dhK7_t#;ydZGbNTONzsIhWcNGZQ~(a?YJJ-Ymq{(F8^C&s#WtrkS!p@Bu3%>+)X<0s^zOdCYasLd z-E{ri`rKtotMlS)XPMH@R485e;kfoXtjd&fn)g7f?dK+?u+P!2KV#w!9~2qDuWx## z)0vYC1CI&+3qQodh-X6{3E~ErGgvO(oE&(>N0_1G1#v?|%I21q!eUl(a%i9=cyi;k z01zrjRvt2Gw^tMo_YIJMfBn_gFycRwmUD?^$n9_s$o!%qg|C&y! zc34EJWyMwYYf)Rx84+=T8X^}K6~SJ-NR$%Vq=@EDnUnO1A&}oXTz5^sKelq5ki5qK zEl>dSSiXik-*+)_>yCjL2zn!ix5MmW6I}b)Oj(&GB!mEwR9+z)oy+N)%)8=I`BCUr zMBJ&x=WJF&HI0qqfDWg(lYs}hF^=K7_Y!w3m0>qqYDQr{*B}#9WqcUNfY)FU3o1gd zKaL?#hQ%eTG|OA}gXu4?vKf&EeILPQ>u$&?XsI?Tq3@HE!#;i_W7n0)vrw>JHTFgN zTkeNsK*sZMWNJg|qQk9Xo0pC+%?E5gpqEs<9%mFi>L!#|P;hD-0hw3KRTzehx=C>p zcM#ZiOEx-X!veH2{Uj{6b5_0VU}ZfjC7S&Fe6BQQV9OzlA{!ep@0rsF$s$E!t)RAZ z$4ctLgmyZE9%R}GTyP43WD-U;rXatPJ<)NKp7IagXsxEO{UKY?&jRoU-qImR-UH^p zB2;0>2m8qWJ5$L6xTT-WXl-xQDn9hwVMXIn|J6m-U`+yVVH7L7Wg2SZL_IN{!E{gv zK5Qw4Yj7jpBMxq35}liy8wiHmx><8OPxcwZabEd(5rv zsz;c})5orevMf~N+<;)#x`qsvvCNTT;#d7>)W?)9gPX9EE)=dHaQ&1EhTI=QAP?X$ zz(F7}tG|57w{bUFIE8_s)m%POrMTSWFoXI4IoU1tG%i5pbQq=<0?orYx&2q0KI|@(7RrQNY5V*>@j9LV~~rQ%2rDw$DSLgo|#E zZ%TDK#P9mGnbG0y3uwuIjv?~x$IbLhx z_9?&dUE>D9Y88OhfcyBWRitrVsk(Xhr?_18#3iIY+W2Q@iLkZ6`wWH85~`fjbtz*3 z60mvR73e2*W}1}oH#YQ#j)`Mc&AZ_T$n)&U)pKp3^&E3!!-bi_@sX*Hh!Rg$TRWKv z44tli1{=WBWrsGPLeBCJHID%fWCshMh#1$PX`_t>BMpk&_)z-4s}Td!zQ;;RN(m_` z0%g7fzdMA`?(Rs34w{h+L>=bvNMENl{n>vBU%|h_ijP-d%pvT*yS=Ui3>qZhf65Qs zf4sr=a@-gq1vTb70(tb6ANN5B=x|TMPk6%d8gKF*4>0ro$aLle>FUS|N=jV-l!`u% zqAb<6IoErVkOMpgUM+sz{KSa^&|VUAo-`fPCxPMqrwl`nE!t-Q0bt1ls|TV3mUXBE ztl0vF@wuX6^o5fk2yZx%5{!v4A0kl)6HL-=|KdgsKZ742jw&Y(F+FQNFQ+d#A zziZt;ke5ftoQA&t@Bvz*XQyMyMUtUy%*V^y4(F067)OGfc!`11EdKrbpl<1Fj3Wf? z`JzB|CW++{rtN$^{@B=9a+WA9{gWriz~6U=VMVJ7v07N_8>f2Ob3Mxfl(Q}K^N>gV1^-Gs= zptz)@`+E}G>kmG7s|$>&#@G2~BSkW#c-rfMhT?`bM_ecFYu4mXz0HkR$IaS_U>ni+&?mgRvcke z$SB>AlhAgvDAIHv)O$jjVGr)^H1t440j{2kcUn~EU zgLbdyzUp(*BE^E&$o(n@el9w_bYQ`QR`8{>n+f}jK1jgh;r<{IVVtTq6_i2=z4GFC zpuvrI9Mx{Gx4hJjPP-pNCh&_jd+6p<0zs)zwK_b5azU|kH=gr(F{LB0a$uFdbx&jRlE zNiLckkTyDAPZbQ(ieB;R!m9j(=|Ysz!PR^>f7{x9F7Oa^`OOs!Jv|#5_3q3V9)cH_ z1BHpk))~~&$A-n(g_@WkycEfQ@lx@CM1xLCB?UaM*av%+>}MxUW@k}i3mKRzxg2nT zp(A5Qa5We;W3)4lZgFu*FftMiKb7qQ9lXxsXS)Q9kyeq$99DnFrnHL42-sNF(Tv8G zRJN?81PcA;G8n)fe0~Ck&3tjNSXo|nbp=y$oj9$&C8nV5W&%}2iqnq*u9P;@r6yK4 z;gHifNK}fZ>tkBlAOdt!fkn3;JP!w$nEn~@OmTa1IZC=PfG>q|_$3<+TdW74`A!~zm_)xyooc$vUbU#RVX``5;I@pxc;1be)_+KX|6 zr?em_D5gUCA0M2xGm}J^)x_A`lf%Uipl` z`|$AKg{EPihZrgis_3L(u$5Xmw|6864GqECB8GC#JU#U44l5Rqpn^Ftglkx+QLV`2 z?uSx)4O{KQD(i}x^!DU=nV8=p@%S3Z7#cULo~gLhu+7Uh1Ub~+T8+HhdI~hw`yjdy zXei+HgfnqKc><`I(VbHSCnKLRQi{+xKp6!Q^uH4Y_y%A>Us8MZ<-OA7z9opbA(o^4 zTm_sv#5FF!Pz_8>{1L_$w95b94_GnKAaI^EbpwNFpok0ZTJe$rGil}9bw!8-0OfA` z^O3~HK4S$(s62~HxOXcUhlJrEb5@G(h6yd(@CW73Z}e@U({{)oxe#;jeKfjq1_h=eE-R?lC)d^ufZ5mg7n@@Q_uZT^CJvp^@IR%a)*I>qj+ z!$5!iR1ntx86S|WtVx}4uIVJw7+LxJpBhUA+QpUT0|e$47D5Yt$G;VTr{H^;a;`dj ztv3dE7-~z(h`YVS?@7Xp_rfpzpGI1+3lsfpMpnKZZh;5+mRd+lLxEe}m-5Tfm}_{l zdc!`_v+&9~YRZM%tsN9gk4qmAmw}ZxN4t3MJ2q<9*9lEf{cC7w$SWz4J7xd77+9DD zs#R<2^MW~K%C=9zmwP&a)mFBl%8=_bF*w`bQHG@4;?f^0z91{qyX}3%rF)3&vXIL# zUFO|BWU~7#ui6R{wusc^FXD{`-y)T;D)&4zo^vxRC!G`zba+3ges(+8LS_X>Sin#= z{b2?C2H=t2VRX;R@|^HNxc_agi=N(duUZByMw>U-fC->-!ow^)X_z;fU|wSZ|Bj+ z=3Eyol_S@Y0Z8$(m=PZdP+ysoe0tJFlTHv;{)42Yafxnoqr+(XK$3%lLsBEO=l+~v zC>$5b|9)PZgb3Ab3bW!dZjeIneH5W>+*uY382&O0-IoGL_ry@Pwx}fZ-P@$zR`@NA zJqxJaM{+s$79d5(j+YIiW!0>pN49~bnyO$1rP6%*GF_|m1R8=7`vONsS{mMqeNY<& zJt0J|?3*hKM4*s7vH;vmi#rgJaqV)GkQC&1$d|Xaw&lFf+m}_Ak{>l*@_lnj9i0@T z**81>IUd*!dG+~p<7tE%RO-HF{$?+mj6MN5geKiPmOvhF8F*xtbOQVxDYo`1?NT|z zEB4XaOO>pDO7Xl-yF$$oIFWSNm5QpQE;YREA_!TJ?9kss)kr`#{Srp!rAUJK%koX? zx{cyRJQEVqSnvQ`Px_Dn*&W0nxDOn1lZV@`){NnTy)DV-`q9D{2Z$OhFK&owXsJdQ z9Ksf-(OPnG8#|;;Zgs%NSdFw#qd5hxTRLQ;gFDYKMRXkG6@&e*H&9zrk8=Va(56o-~bf05Wz7KxNKCV zfBy^V@{5z4XD7L(b1hI>F8I|qM`m;U_`xSMDYEb|UgU8G{9rZoWGFs9zneqXg$GNS z;SQ;@_$rwq4N{2vw~m{+lz@?YQ??IF5R-92_`uSkF`WMM=}G#SY1KFJt#B=lS8Z*R z697S#3 zIlLvIOxrDq`L+5Yq*ATq?N9RS(ftiVD5P73Tp3S_Aoz4?haNuyJr9aM#2k1*djAIt z_=;+%$AMI}ay{2h+uM`u)+B9`DshlMU1;JK*>U^`Ur2Z1 zQEb1H*3*P#8H{cKIR0H5|kI@qpKfWr)egC_`0SA zxGu^`b;3l2bl=Oi&7mxlu&d#{eI1gDXNk=iMH+1JqLW23gh!F)IcOv(zo)wxTY~IK z$@Z$INa@~7EhqDmjdMpnOa*sDb-0oul*pbraxEwQ(s2;uYnke0ckO9(a+;qidsB#@6op8SIeV@SNt= z%pI5OPEN$d?jyy`-}KSJ;K2$V$G>S(WN0*~&@jBf^3wR}3C1|l5W+CJ(JpG(l3CclgQ`1CkXPBjgZ8G#T3r~Q#=eMwyIZLfU;M}2pdOW2&(Z6Ec(@!!cbCVY zMd~09#Q%B-;Xgzu!WK5|=3_wdl|R*=BUV1M!`4A-bKrYblqf7Fm}W$RVp5lUMS6HZ7H4G!hY@D&85(@NrA64uk;gd}Jh1`y z>Ds$WHHxEo)7`GeFR05tvcGWfE}GI9`oPt#iq#kW^>ut`I){R3k;UTY`q#lrYFOdy z=mnFE-BaN95a0p3>@N>+cCGEa*rBmyMxW^jn5@L4Tu4ruoozyM?D-YW*6a5z^$HZ- z;$@(fyRl85zK? znN#^Jv;MS4lVh#Mk z?3#FJC58giw=N+1JfmN!kL_i_BRFIZtdBrw=b})*M&Ql^UbiGOQf%gg%cu4Dy5za~ zP3(qRbKdM??`NB?$9l?h^796(isqKbyQH%yoP(WQgG@#7b?>;i9(`@0jYe{}H|A@KIZ4%c#+gq_a)k zf^PWaz8?t_S1m0J9+7UjlSL-}8e4E6YAZ{- z8k?UDF2mpz?pA%+Rk9^NPa^g}Kvr(GcT2wC0zV>~3};)=i>aqirK`rqFmGQ9opu=N zf;{m!~3LN9W0$Cu(qld=?Ys-A*za%0VvM(Yc3X3Gp(TQoOX?;(&&* zuF-3n0*Vu;ut6OFssZ~05wgDnWQR+w^sW2&PjXU*+b`GS+PR#m%6t~zu>}V&xeyxs z8GjQ-Pw{uP;E9kQ>qd`SkMb+%D#J=Dk?KoD;KU#T)5lnSK){wbAgfHXw&2by{ztxd zJ=zN2WM58@T4L~*Aa(`UGEcxDhgqvXa|17Oozc^tDJ$h*dYD9gJ05;77lT3j2QJGr zU%n^URV*h^NpuWV0CuBg^Q1syhO6HG_g1slanWQ`eoo_^PEuOXjb7D210tkXpYz#v zMia&nSELR7i)JCgMs4B&vQVLxaIUZ1Gl9j{F<-^{08ghETmts2Wr-nCc6ehipdsNZ zv7DvR+|tq^i@%N$5cpBKi0J%v-8rBl{?q)~@jB@P+jW+xCqaQ~W);Fl>aiS%c6j59 zC5q>Hn8r=VYlZn`2ZtlqV_|P^57@4cW{*(LMz#>>n3eh8=vYCrcmk+>aW+f2zzz(o zWaj5Jz+F!v;x;s>^C;EG|JKz%e3=+A2(9w#7=gvZKj0=9PzWX#md4AM4*;DFDjD0Q zR>OI14UNFxzs-OD{vF+L;88zr#6t>v0{?mS+}wE$sP=$wL0(xI3i(`6FlWcc&Q5XO z3c4ahpTdhP5ZxX8|LV;pIIGUS-L<;Id7=(1b$v^;7Y_{Gt#iTSEH>Ns3c34tGJ@_9 z+NdJLMgLo)uDRw_}qqqvJOX zow?!-Z#h840!(6%0N?|ld-`-FpXCT8R0Jy;5L?S&2taChDep50Q3vLc;0_wjaJ-UW zN5k8;(lP6925N-8RkFVWA)Vo7#4vYwfe?M``Pf~#)scjD1H z{aPk5wQp9W&mOvz87RGYLHK=euxARiY=A>1>AsN1!Nva`m=ft27<%S%J(G&t>R6K$ zGn7<0Q`Xi~jL*EH7nlFIPEAbA*pc5g@yyi!y*Fuwu?9w;hj0C$t}h4k2HfM*P449v zjfn24pfb$Ci3hzvlmkYxX3FHLhAR;q-7>?xF^POlp^`#Ec7xmdkD%GtZmw1`Uo#i5 zYQ@^6$1rJXYNB!674 z!DR{Pkyd6T1e825sesB;9he{ybv<#e*N5V^H6TFN`&{w>8WKXYl`Y!0TKM2R10#(I z&%eZx1bx1R=wH2AG;C~x=_{U~8Tq}`jQl@q4H!(xQf(>l3uxeFWvaXKc*@}8*53-z z6GUD-1O5asarKTXK~G_)%sORr*1VrUXA5ZO?TC8X8Ug(0qUZC@iH)0?Z1>a#k9jb< zlc+xl_%F9^4A)rml1T#_jm0sGB!)I&?KiNR6GYT$3u1#MtUKj{0QUt_g=i#n&Dd)4K`(enbq$S`3X-&% ztIqG{Jh@d>1Q1}0ApNjsSmrYc91EDCFdsX>gQzp_7I6iK7Eg(Zz5=Xofdca=FcAYM z4^qXAstqrh)7;_G(b%21vb4O)muW!1fWD$Kz0}N1j=T4diHYY*9R+7wJ1m>0O2@Yk zy9w2%TXa~HRws0fPvxe&AC>_D=G5!guU`_m>Jr5hrV~Xk6Ytz9Do#7AYP}hNI&86u z4W;cYJ+Xm6(D@MG3y=i7b|D_BV2k3z(2v;vb@KmDUHKSkh05r(<~%%PG1HB%#S6=$ z*>@)e&(pHP8-Z<59?!%CE4$q7% zS@Q}H4J2n}#U`=o2=9ty$yAppU0o6IeM$^a*VFss!hzXKLSp6@!OVs5B@uHPw8#0K z&<&*vI1Jr}moEYHn$y+EYYJf(bVxU8c%%a`VuNEtDNIrEfr_Ve1 zk@u#u>IooAb)vaJzvb4cSJORQJL7M>DoudJUhJGho3Ft&=c}sj+;bJ+;+ZTrj=Q?@ zQl#-SHKYP((*!J+=`o3eTHz7<#~R@I9D(j21CqI1a1&^){lmjyiuo{E+1{@{JLcB;pbs{t-Q=8;NgkpMlgT@>!@eJ)HlFk zFu$+hH2y_pbwnG8L*Kq>8SHU^j&9IBE7?%{P5|_Of2yt~R1?4D1y17~O@TQH!{4#9 z5)n(>y}(+C0{TWqxwguHk|$L>foRMIb{Np|JpJu8p`M-|qK6C!zToMic35=50m;kH z2gc$DfWriqOF0dVgnGkLZG2|E`OPGb1b?N)#=?SeeSRS4 z>njGbDWdTxD~pqg2`ns#GKh@;r0s531`z#?e2#kX?}N!>WHSVvklA#~y2BolcbkDs z7&$>PX<>yFeJxvQH^*PF4u)WgOg7R`dd<(zZ`u*`z}nhc!qoH$FaeB>js}2;TDc|# z#LiUZu;+GwK7AMEe0eQPj2@<~Kd}C`;AJfx#xI%uEsPLoG(a{0eSpoZ;vv%x4Eaq{ zn4z7yLo;jlhx|c^8}&uiys^A^J~<_osNv;x_4OWnDvo%vE2)Y5l=)M3_HNK}$mVCa z)Jh2=27tRn*%ZCb`@pFr34}c0F$u6^nFb$HS(2M8X**qqW6s9L7OYnq2D+3DIQxL? ze(RTr1?`#>1sYXCLc+?Zh|RZ`s4X=`%hID@x|u=G12`Or8}BT&^vuv>4^Lw104Kw? z%c^kzohuvF74fRBO{_lQ@58KTlb`vUTfLhBw4nGix%neK9Hh(@;O+$5Ywfu^@E-zU z5;$y74eA|{aK&%9ghfPp$_fCAL7dH{HunP~uwg*3SQ*Wg!r5a5^N=WOYWhj))Dm0^ z?wWBo6tFwy&)zhH#$XrY!d-1U<1cTJqet-GU>Ipf>$A3w4# zI%fNxy(;j3+4M8!=anC@%DQ*e8AUNQnd^;Ms8FnlxdW@=*}>A&=T3-r1j}ktz*vLz z1bS*!Qvz77$`?Vd4F*Yy3~szMiMTIK5&{x|O!(~^FCxMTXAb9*J6Ks2EbIV8(&S2I zwGbSx!}4ol0&IH_lV9HKr~$OfnG$83^3=HZsoxP+N)}m3%qQRnQ5`vm$x@mG&B4z4 z#Wx6yQ^dL>zX{qsL89X(1zXt!gaJ1NRsjK>jO<8d)e|V}W_X5B7JQcn*sdpDErzb@ z!1DvpVh%!{9?aD)ru^bVav&tQiL(y6+dwmj=+E(Sr^{mt#Eu4_XZ3{w=w|`WU@%3| zz>}FF?oS1RgS@J$TVa_oYtj>t^1$PR46$0Nnjwe;6x<^>5K?e5RX2Y&5_t#&jlasc*zAUA5hCX1WG0~$Ahvaeqy?Cdx`Yq9`;V9%m* z8KPGQmMa1Ls)#SyP-h1AAsd~0vBiW7v%NfZ=I#(%UtyA+`G9qn&kYSToEU;^iSqOa zllSx;lJ2Zg*xB8aL|m0kRm6AX{6hWz`>LG(ukY7ETy!rULht=@NgfBZ>Ovq2GOE&L IFnG}a0B-H1S^xk5 literal 0 HcmV?d00001 diff --git a/doc/source/_static/index_api.svg b/doc/source/_static/index_api.svg new file mode 100644 index 00000000..70bf0d35 --- /dev/null +++ b/doc/source/_static/index_api.svg @@ -0,0 +1,97 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/index_contribute.svg b/doc/source/_static/index_contribute.svg new file mode 100644 index 00000000..e86c3e9f --- /dev/null +++ b/doc/source/_static/index_contribute.svg @@ -0,0 +1,76 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + diff --git a/doc/source/_static/index_getting_started.svg b/doc/source/_static/index_getting_started.svg new file mode 100644 index 00000000..d00e4624 --- /dev/null +++ b/doc/source/_static/index_getting_started.svg @@ -0,0 +1,66 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/doc/source/_static/index_user_guide.svg b/doc/source/_static/index_user_guide.svg new file mode 100644 index 00000000..a567103a --- /dev/null +++ b/doc/source/_static/index_user_guide.svg @@ -0,0 +1,67 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/doc/source/_static/legacy_0.10.h5 b/doc/source/_static/legacy_0.10.h5 new file mode 100644 index 0000000000000000000000000000000000000000..b1439ef16361abbc0756fbf7d344fd65d8a1a473 GIT binary patch literal 238321 zcmeEP1zZ(P*WW9Mh@u#X2_kkN4d&3&T_)1fDF_yJcOWK;ofw$lV1pvkCDIMjDWKo% z-Q9bENA}+5jqmsV-q%&`Y|MXW&Y3f3&YUy5yGKi1Q&3>60ETb;{1`7L#N4HRevw=K zB_8G}>X^E|n%rTK>25N;-jCQ}V0;*UZ8Ujah`j$q;&xa$dEU%KLjyxOsryqYc^Ql4 zBlf8KQ0M=o{~H|8(oiwNDUBe>WgejY%+16JO0ISX6UXa_1RIP^XYS#0$dCS+X`AUG zLBmF%KU^*;xn*WTKiYl{?kf0?D*mH}4I|~Xw_N|-`}WSSrl(?Rips<5(I5R$^;lwJ zXpYmy&X3ocAG6RfG1WFSVCKiW4wYY8QVJd6`IVR{1NAgKztK`N6;(YAQ)^8_6MYpk z>%QZevD$y#LUMe1*g z6V4v?4(rf?t(&KvtCY3BjoVrWU-q#x>U{ViX0y)OR~n^M2K*S8cuM|-@JwhkH84~s z`Z#gS&r76z{ZhEq6Z^_RoTudYup@mwiq6p+>GMq|FXxP;&o`R9ypTga`n&x7tCYTz zBKc=Twi{a?ai>QIV}xG9^$Miidf2!-;9SS^ch}PflK5F%PgC^q zqkWX0Sy5jM*R$w9+LW2U6kn*6jq6X~?48~5jD6uqb=|Ezyfl8y%f`dr#@E^}(2H43 zW8Ej2_yqj zd!qjYnK_%1w6nEp5oPbWOrnLGuh0v`NAQ&GC~*V$Egdi}8U9XxFnf8T+pnh%%h zOhc)x(`be1DBaq2dpn*c?NLd=(~0BK#b=<@^R|l@o~~FueZ@?a-l~tfiKjod8n@%A z_Q=vDvru~KO3+C>-LX_@>ui*!R>mjdsl)~wu{kL95o~eC(^u#BM&apQ6Rqo#DD5iB zX~9#ywcZ+YQF?}t?>L@ThMX>(D|qOb%@(b_89*zvzoFW&c(_Fto=%wF2vY}K)qPd> z5{x#MD2lgjfb+g@D_$i&L^%unH}$4 z0n(&$ecJCA0zRXbyOM1U@bpu^x8+zp*r&>$n}7NPc%{!X(rRfbs2On6r8A%w)@c+L z#qX{M(=Ke-F>hfB=!>sw^t|SoSgHmo>KLr?@X=J)mDMuRRB~6a@K$kJE90VPr6Rk` z&);&bgR8cgbCBjb8!aCeoHRnm`+GsZJPf21jSPIAz?(=zW?L^9+{byx@;89<8 zn2L#s%2HHkHa*K9>5+>@#w(Vlh8law9r=;PpxpTOHQ0sT>Rva|E_3baPE6#?{P=jq|= z=d;$%&(lZ1OIl#M9bsKd`Vm&Nq^F~!uY;ejfa+RbXAcKoUv-;60WTQ=J8Ns(wa#vS z&K}m*0-m-m4t9P5Ua|rPekNXW0@LliOnxC{G+YR3c zIVJWnthdf&_hW8#~{Ef>1$=8fynWd>eT=0{`{w({XQ6(>p% zEy8VFF>xx9pBT-k*8G$HuW+FE98!xUJa$q~4*aX*ic~yo_e7$c>tuQC@s?XVTZ(yu zKk+bvkN?T>uGq1^ThYCgl={E@wqjF6+=zrEvOyXAZ5=y^=^N(`GW)AM}q zqlx5+$-{eJ>3K{t4;wwB=lR}8{N#z4DZQ`sJZ6n|r2nMn)qmd+TfhBvUfO;Bf&5?p zmHti2kI;9&;xs=X`~;L=hw2}c51ZZJrq3i=F@|zEFb{k8H+Fk)=g+13`SxMN-NT6F zZ}(3UhVhpPwtbpW16C*bCo7nB!4+01#Y;EmgUaof`JZh02(?#4YDJhQLiahvS`UW( zgsE>#RD-}>IKEz1cF3nw9lX_WEj<>Auxq zTe-LGpn3JcYLb}zFR^qecxim|5Saofqat!6{Co^Js4Zb!|Dh5VI%^I2wxbRj?(@x^ z99IHd7mK-l-CPbV*6@{wDZK)h>;~4`&CCJ3?-Fm^^vMA>Z?8V?e4Gzb4xL(Gd9@9` zo*2SAcuE7<`ouduKDHj-UdhODvB`mLzB4|2tFMQ`mpiAO`_u|o6c@iR=P3fpm(o<` zWmiK5$*;CI9ExDfhr@e=Q{thT$R`(%qg^m|#t6-E8J(bB>K%AKq7EK7I`u`r+-6{M zE?Ib3OdO~iza%d%D-|5Oco(W{`3enkopgrRCBy3ZEoq6r8la`u#jYD6)sR2j#?rYW z3JU(}7r1j$88Ck59QAZS3fTTZ-)>*wFKF6mX*FOU7V}8B|F4-zMR42d#wU?&Mev3C z!hvqiHDJ)*(vF0PG@vud@_FT@LU7UWQSgbRF4$PR+(EP+$;Fw_l2q3itfQ; zVo%)^(d*0wr$2n%niW|Jz0V}i{;bso-_4!lxBpc%thng8eD%F1aO(bY5Ao-v@WL{Xz;}(QhNe%TlZma1fsVPeo2#XX)EXlPcU>PJ zO+OjOKqYe@KWR%D4HZ3U&v|}Uy6U!m8gAakn%?g6{*G45*1G%I2dXXechPrK^j7o? zG}6&m(DN`^=dGmVp9Oi$iz`)nY*o!jj$r8=I{ zOSISMYI&&WnYh}j>#ft+Jz`GniTzhVC*RKrYgTzdal0-VfOJSr*3z=|$%I?-KtQB}&ZFRnJqzX(8J>ER2umQL}*6UE?&jIQK z(?yea*8*J&y<&%N%^+CvRX=rrV(1(9Khh0qhazL&ZTD8G1IqmK8=XsIz}kjziPO2w zaOHacy1ApSfJfKml_eh)fUAyr23-eU!v{&0Pp1#gfWBKgVr`>4pxhi8&xp^7;N!wK zUq%clfp}*Z=e{ zZq`n5dR|6(G0`rRKcGh^`E&4J&jHH6yjqbU)t>CNz4bn4edxu!0lwx5mrHUTkEa{4 zjEA|7X$i_fIrXNy;R}|~G9c4w%x5H~3HYIvNjB59WzZRS8y8O>Yy6Q7{Io27S{6U; zj8-MFPRj|{pan#OvFl{HgkRLd)5Dr5+h!g9909(5D)_JEqSZ)ls65kYZqjIR5-I`h z$)G)SQ?w_G_T=$Bv^WaCu0ZbNITZ1ITTf3WP>DLgugt>_&^iemftEWF2lxfF>dD5> zOB#_UiRw#@8+|_yf%{1*K6#+$EUc-8Y3Tev?gysICEAC1OZHP7NQSWd+B^h{crYnu=zS_$+<>c4)&pZ` zALj+`!)J>zIzStKuKT(lwmdU0WB39q9E%s(gKXp^#uxwKjyimq7JeO1@eS{;L>2ja zV$eftxGxyT;GRG{59Q$d-I{_PsGec|*eA&LG#ep{ErV64(Fy{yh$` z)&Y_WO4blU#d;q8pSRnP5TlKOq@1}1T3bU;-CD}pKt*2zKaa}@B{%pn)t}hvkT;Ec9vwQinJ@DQ2W{*CBNd7%ujypW+!@tMNbB9O$ z`1g1P?(nEjv&Wn6^h;T5YU?q3BHnH|?DiTCbPYzS-9?c1ke9h{O{yN zhK2uo<;t>f-Sy?~%T>ltqxuLU-AdJ4j)ni7?j-UvVXU9u(=X4u&p$@L0t??a{S@E$ zP#f@L1}27Q2!8>KHMP3=uk!=;x*y6u7h=vU z*mG_IjN3`w{VCM*W9;X_D0_*-v+RF1zWcm{T`q+q{7$6ZFJ)~{yxIaU7dK%jyB7Bt z;mtJ|Gr|*eo_H?_3rCWQ%`VY;6AZsE>5_`gDH| z!_m#t#t$)}kNQYctjFtvFA<~YApJT_?-#Q&R82^?8R3=dK5ytzADQm;!TllBeUaZx z7GDB4n0)H87k^~B*GH@yj;s&qc4O8E?X$Vl2VZ2n*Mq;?{fPFJk?W%#vK;C`It-ch zK>OY6LHVmVJ>RN;YJ*{%{PtEB%xp;J|7`z@9O%6sU5I3Ym2ppQP_ARDpK0~H{I`#& zc)@5x0;)uo*L(iRIbN`uc>}t50lsvWsyDNr6Y&DJitiaTU}65AL1L-)r~Lfg>))e}vgmC@B!;r3C=5;$?#id)0%4Zu0B!lCf4^H? zAb2@(X(O_$$TvK&aS?XJ#eDL{z2gG0X2kVBzkiPdz2gGwNI2!s`{&~Vl>Oe*zuY}8 z@b~@uH1Pq}C_xa?-A~JoOZ1En{C)pEm+^r<{rg!IBwtxR0^V75a{QsPvp6&1d$LMGK`TM4y z;v08`wzK|Se)_aN{Pa`9d$uI5jPZ@7kcq=TzkfLJhXa2&@P`9`IPix9|3w^N z+lznv?Rc{?&}*CUc>_7$<-*@?#gv9#{_P(AcHD7ASy`lin{q=j9m|v(in(l+azl}h zceXv=qh5df?#x!Ad)Fx8ckk<+jYiJQX2aS@{{kofe1toZ|Brv3;KTpgZ(siTwlzID z@$d4_KXoL^$t26`J#XafpVwmEfX+WZ2Ic6Xr|`Fb;!EJsr!`2|e7E>y1bM%{{qQ^f zdO!4x0I^PAWg5P4968_7FY>yhUv=g$8E|8&LNegS^v+WOQ>J&G3Yaq2@#6>hI&$Vh zell*zT*ptw4bd`wv{)al-zVb-l%Jw+|9f};6!U#8-5l_}^)dPV11`Q}K$Y9~dVR|O ziZiyG|D9Qd|2_Y^7*iIlNP+yT|GkcReJTn%t08_-?`gt$hx=&oH3B_+*2q1BpTm9R zxGNgJE$Nb`U2~7G;m0u((I1W>-6PoV{82s1L%uxxfm9qzwJYT#@9u-@fkFPeMI?rk zU%kx(Wkj15Nklilx{(iYDH&Z-BX-$-_23}l%&9*7>RQZI4HW86$}I+Q+!A+hcI?*90m{`9}^59iXK-lspDOMiNw{%|h+>8L-k^>ugp+5Ygp z{kfFB7DV@;){V3M;QyFEo9z$(o&F%&m)Xp$pYH9)t_RyM-d&gc9{)SJVf(}Trk}!b z=g+3(y|-QR@A7B2^x@C$xI}4Lf`zye#5a~ABo6=l{^7tM4*cQ39}fKCz#k6$7jb}X z?{$A~0XOT;DSM}fpOuUC-@o<$N_AgX{2Tu-<<4OGe<^nc)9FFEGwA)lT==Cae@~Bf z={@|?zgc(vd$$ZCfX&w){Q2XD=GqVa`|p;=lIAMWTAbI3$7~}yEB}8{pZ!A$_Z;ZU z4;{R@=RIuEw{P){bf5jx-@c;mZbR=LttoPbQM8 z*J|=U|LTNV*RD){KY2CDA8zNT|M+`G1erV^O%g<{gXPDa;b`+B`#Yz2)9&3we(FBN z(VyS{6%O?7=gK7EYe@Z1@uB;7squ5J!h=RNV~aBwIaN7%vK$N#32{P}mi--`S1HlUCh>-mgC{9!44BL`$w9Em}_kqM6u zu(#gN2lRe#B%8lI>E$fvRDSN>W6Irk8~T%9x5KcQ5H=0J+ashNy>BY?cURfx2q{P3 z9Y4!H_q#oQ^5nW*zq`slN9o_hz3g-UNqcO4^`HKJPWpcJkA9byT0cbf4^|wyUq5D# z;ru<$iOl9?kR5}HA5-xXs=u)NUH(3z{gv@$2&}Dlzhk!xcXU&B=tQy~gpqgdeIHK! z)Y|Q$;k>sEplwgUT~*tB*w#<>{cF!QP_#Yc+UG+h@WqOxVaFZXK#a_aZSEt!!ew0x zEVk)IgAv7BR_CWxLGv7K-ltg|@Y2logZ^K(!e_%}3*-~qp-|V{;M}5Iz_X=3TIX#F z3~!QE7%FrZ%yZuDBxh0yZ+mQVu2(65ftU7)SA}K(&FSe616)(UM|shqk&?0Ct55aZeCv_H>|W#_UYH37$WI&g)YNyAx92lg$oUk4g&rl1loL*P4Bd8FRmll9^=h|UrmgJY&mWkl5VtcVg zNg{mR3WDBSSAc@>#i3>!ia>5hyhOOjdlFn(Hg11F3QM)sr{3z+W5MZ=N0f9oQF%BxpaW1Dit^>5Ax-z_oSh`}}iqf#J?3 z**91#d^jX%hv|b_IH=^!7v-PLz%j_!@1SWr$mUJDZB+9WR0kfNqw*^SI7qlC-`W!k zIs(^rm941)l2^(_^e&e}=@q92j#^#|w)~37s+R8p*K1}4je1cHhu$rWD!W++vu(F| zoa6Zh&sDt1A26^2P99&n?|VxwoG3W;OYNg7$Y_+`W%aV3^14g&g&nW;Q@$Vbi&2o* z0bkYLif}>iy7zB8`2OZ4!I&Y@%P;1iONXBVTrARdJc8vrhhC5?ZinJImxn0$R6()H zyyA6!*AE8>H^EQ3YM=Qg zRKpjGjY89&G{CzBvx{aby@rjm%6-Ehr9jc(56Y#VKZ7fiv#Z+D%m>=OHC?D$soac^O21ci@~Eo&6n)06JTglm1xM_HaMvM=*@%X2Z7{rbzSL# zuV8_@UeWz|EpSK8Q-{2?0vJ5pW^=)g0?4=i#X(2(oe;s(sUf@Nf51b7Ewl8p>)`t6PK;pKJ-~DfLroDIhYl}BXgA(oGxhtuiMWCobV`vhQ8kt6#Tw}mr{yneqJpF zDvZhMqw|X4p^d@gwbiO&amCIZ!@noM8Ot*zN)zirqiWqo-w6dEqHgEpct#z3>0MMS zZS(^nA>e#Rs`+8c^R+1oduRB4cwnC(*h+2mwgpo zej8*rMBaVia2cA{kJZ_u*9_)1deztEmBOLIN%hj(KEdR*iWh4oGof~>FW>ob(ZHbQ z(F60J4CTUdL!Hb1d9YRHd<=g;G8|A*ENVWW98TyD1%}9d2HTH`WB1=Tz|;w)7o_4! z;Y2Id^ViS42MJG1<)sZO!F}sX;=GZ$(C?ex26f>&C_76^xW1+Wu8#g5^*X8!td1J~ zJosG$7zxvWzg{66rlMG$|EwH@9Ltd#i~3pO(X&QJPGtiRhec|$+c0GV>DAg_f(oGA z%YrXE=3+7ZX6br~eD8$ok9DaBoT~$E=B-PIZ%u`Z&2y^#HF%X@-Fx>^leZqagdQ?` zG5aSR^JVaciGERV;+L;RAUhd+E$@G5#@jOJyLqh0I-L%%>D#3mO|fRsAO{mcT@>1zh^K}?Pnw0`!uX@zI+o{p~!nleM=}fbvb>-8SQo;|3T=yh*bkz zUvbj)MO_n|{oM7sz|>M0`XKtQ)SE)k*mm#C*}Ph4Fis$O?lbyhThVG1EIQo{bg+y2=w@Z_T%on{?S@RVMQ!-GRGWRq@F=h-3v_pR<|5Uqz- zqZ&L{94my)Q7R=Tw&uX7ev?&iNR^=Waa!xwm$Il0T;4nV#-oHX9hXN*1^jcU%gb@kO8uw z7EeG>4a|1j;UksZ3U4{>u>aDY4~Ov?Shp@H0@GAXSCnka25Ze9S@C`-1YV&rzYZ_^ z3?A&8TrNB&8uE_1qBcvv5!CHoKUpfS5a#;l2Grlcl#6YghE}@N0o55>7XR8(4SjvQ zOz-Wf2M1-R$`|LQ!`y*qW>37B2*S(;Xv{j&0{m+}jE*h44Q7?YeU-kYVE%wB7YxNP z<%-3#%QwbWfJq_+b*(q+Kw++GwM%Rf6xhE|TwCTje5#L?#cof6y!DCujh7Tc>9r># zcOR*M^FJ;6;lsERFFtg=n;O3DjaP!J*7GeTraL6j&*(VM# zV)jZm2#@ot0FgSfu_G*!fV%f~Q!x2EXi#`^I{91;G?MRk`l0rBSiLw-Ml7xg?mYbI zV%zaHC^L88vu`3{;FS0IEC?1jqq;u@o=npa)_GhH!P3TxORx<-x-k0OkS&Gd2@ zFl?_?-jo8E_&hgUG&c`OcdU@|h)9AH-uIu^bw3Ua96E2PeNi)1w~ARNR@??3Ze3lQ ztH4mMdA&!dwK5%E^_8`an|l#Hm6jU+zOxSOS=#S`fNwp_%u(p#o81P)M6HAm&if2j zm%m&1aprgEnfSn|(5n-e`-*Jy2j78tko>I1&@MRA^SLHw+6Xfr34HjK)&y}xrss;&|^6a@XeDP0EMz1;X|8~Xm!_^wm7 z0&*>&LEZGi54A>Exj6pznANRt&4^0rA1}*5lF-zV^Q|FRZ=-B5aAy{X9xf|>PwqZg zudG`+Y%8XG;oJt%_hALdesztnF8cx2pE%>%b-D%)IrQtvORzaQij5I?l=W9Mb2bb|@_!ccKLx zsgUAdaJmJu*K-YGt}FBNvGMTr^7N$~=zXm#Bd6yG^Y`Y-?EIYGQ-l`Fq3!Q{mj(AD z^fmuy&u4hasWrVHfn85xKEr7q!p{GnJ}>3YpTVYwyZIbDKez2l_=ot(3FlwuvD|&< zo|?C^+jj)nzI9Cbp?)~cS2?#YaX$un%Q{YB_wU>%Fy)RY$jBRSM42qB|RM-eI5LK1ytAiI(s+I&|>|t#!fF3$vwp} z<1EkN_78v>f2pS$(E1s)w5JDJmBEPC(VQTh-vBB!-Y;Ds)&_Qp-<>u0ODp68t%hM$ z9y2zbF|3DD>4x{JPB*}~!9F6dpVf1NR%b9KMDF~x<7zsvXlm1U5h(yO3nf;~Zu~%x zmf}00G+9uE!H9d(Ja5mFHn9K7?A)p@OgY&h{O#Bqzv$^E(5e_FJ-ooV>6PuDO(+5jQKSAWu0^( zoOSfY++f=b;AK(8SEwFMU-sq@@m);1E93SB%pQ{iZjNr*K6XI`P&YUby=hAcec1$B zm4QuOtMkKg;yZBZqo8)PR3)@i7v4~Xy!^E7O`s9o@6EEV6qja!_LY0x;tw;F6PBF3 zy)C$ao0=oK7sL;p`T0!-eE2wM$7<1EzHQz1w>B~--{x}WMjbT5IIm1@uA1jXc z0TtIXp?t7G;;K`H^koxhM7KY`#n4Wt5)kEa7pqyB4gJHjOJv6X;s%ZAHu1Xqd0Rw2 z7_a~=YkZA-S@xD=bw}rMgGO?0^y$>dZ>I{tg5*hW9o!SZ<6{fnFY5Y9kDl;cXYv%B z?)#l_k2Vkd3FXbdxgR;70Ie@9kKQ{hh8r}Z`$drRTnV#xVAMQu?Q>TXVa3wHms2H5 zxk0O8jNBya)Lfl`?Y5g8S--y?3~txmoNdwu z72eq0($s_WXaNs?p{a;&2LGP`DJeC1Qi#3=X zdb$ZT(rY7pOMhK9S`U9Nyd;p|(gt<%*H1p4z*X654D2~?{57#tO+fnWiRfn{17bSb zjh38H=fHEKY(%$E!v52`=Uc$Fn-Ao4UlziJiD`L1ORDMVzJ6Hx4zs<@Grko)I@$RSPS8~nx)xwvmp z0bCPX9|<&D=+T-Fm+4HyI`_uv- z*p$mki+#Rv>eQj<%eEp!`jxXkUAToPm{JwQRxG5XSxq7qI=69=V!9~f{ zNr}6k!{p)mZ@q02>FFlQM(sT&$~;UgAsYr+`dmEowidd9a|S=AbFjF!w%y*&q-zM^pY zniFUw=Zi$%WQ4jj!0JOuy2{6|z%WMA=Z!l#aE(ACx|2lS+(AKcXgO-R`&IFL=xgE- z8Sy-Yp65w>R8nxd8Sy0}ALndr0M&jUzs+nc02zyRJ&3Wn!VMbHEwg}cmrPX(7_78e z=I#3e_&6&0w$JinZqSHs-5-I%(u1ntE?0h;nxXkHCTCzlJBo$T_7bAz$ZmMA?XL->R_H;hRdp z;MphI!`D>8QMU&D7;>M3J}1yfuMNHE+4#Vy76zK#+8J=E4qTsS_dWi3EPacuSUr8k zOkA(Y_?7gZGmlR>`P;ITrM_9PD@bZ)o7F>Z(1`BTj!m*28{R|z5cAi2-133Y^^Z|& zPV>ajN-9w{ve!;5nYWfztRUdT|(T!pA8zRSBf2@U~v8PtvBajuMud} z-cfrJY+qEjz?Gv8Td#Rv2@C>5wFppjlH5PI=etECXQeu}*6fo9T8!o`opW+&03wMUjNnT6V$@lDTt z#r&ULaQ((Y`3YT(;LxXjrH1=BXhi~z=)V1O9egCrP;NauK*u({9{hT!oV_=WD>Rbx zyqXz1%-f5=bHn|i%U{)lRR>R9HnVS}=NeHqYHuY*^frEx6sRc#kI6-rfSjr->8_(3 zG}EaoK_?O246HV5iXddvgX@Ct48?;!!Q{2sJu07@veCRmd;S1;*rEc6 zt*{FXSyKoPjnA4I+t@~5_Ku}WTW2%rK496XwDlu6D&q-+HYdZCd(9_Go{gX{n?R%S zdwJ@^>m>$C5M(Q{bVDhZYRW zE|~FIGQ>S5lN&VB=LP9kO|)M%z{B+`NLyd6jysW21k@>*MkfHcFpF?{~Y_JiVgWGEn z*S?JXvZx7MSUBc}ooOMME-hUx#&|_fH-ScUUv_z=Zd{1Pq<*|!si|56vM;_!Y^=M( z4I1_DueBGVO0zzI#)w=KUbI%PTsz_9^!O9ppiz5&7c~*jSIULL4KbHC-f4vA((2Z{ zY2?sDeFR(FnRM%{db>k;X9?Ic>{$Mc_&1<*QTjg1$N}^tFo8yLUd2;%dWCfo7|b}F zV>jjpEGT%gGD@x$(2m~(8nyS!FCR4w?xw;`SyMJ$(5r%jPQ6|lsD^P9uW7^^f1@Ed)GaPqDb>*Ko>^u*i@R9IX4hCR*1eaXD9$vyzR9_zn(q zJeo5`stTl4jW6TT7|80qxQ~EpSJJIGvp}=-m+!U)k9$akR|okAAF%l zcNOKdFzKGIcC96HOfksnT)y$_h;-03h+oOPlR?V$u3_3xjJ z^FHmd?f{bDqP>gP2k6{ayO+MLhvGqg6IsF;*o`W-de8&;p4D6}e0dhCTji@xzxTeR2gL}o?ppjk^U9fQeNT(Q3zI=>y4ORyY9$AYBw_l?{ zqY4-JwN*&q_PKw)c%tj(dbs$G#K(6(zJlrR`vpz7-^>jf-NyFF5aEv&`S5fEcGz}P zCTyKM;nCD{bef4(Hlq9P`Q~fU!#{wahW4(Ln$5tNarn4^$3AY#M(rIq*RB>DQVcGX ztY?IKyaHP#Mfq**=y;CGX2nB?he*^_l*WO<1zo}8_tk^*5l>3}vl%pFC<~4Bn!XVb zk2{tEJBosz-P+O$EW&gKKW(7%fV0p@&JT*2<~w$lfHL*m(lfH<@MFOTk*lV3^D!10 z(QOzOaJIoR6O8-VdDWsd4`f?Uo@7{0HxeN<84tx6VvWYxP4nyFtwp@q(hZeBRN0_Y zK!^j~tazyXy;*bb`!<0OD}?lvPS(McV(*4lQ@W@Ft8CQXDj`p!r@2*uoR7_ei{l!> zz8TZK=Ga8hw>MEXlJgS*5h^JU-hqB_^ZdattD(w}lF{;RbUsZ~HWd$LoOiU$+L&Aj z!^8(*>GHwN5*dG=L`W{PIXw=?Ljj}>ZAJqXp@qS(lN8bZ7I&;em zLpkuAjE6E9eE!0G>p#_jyKOfD8bSn=g-S(!Ovs_r$f#@rjr6&l#<6`$-Zh~AbNL{( zxE82AVgBC3Tj}gI78;FX=Z3dVG%#YsJOlAQ{^3`khgJ3=Uq?Ef%0i>|j(TOeDKzsn zsIOhf(>S0E1|*z0o@d9QY$_g#mF?LrqIx6>I%imJ5kAlfM~LR`b`7VSL8G!+@lcm= ztF2#PE0}-VM1s$_9*&i7&A+{kF51FEqxMeSZ;|AxTLm3Pj1tSamj_}igRB|f=zLr( zG@@H(#^KbcsuD0sP+`^$DptOO){0HL3re4;O#6%XCGDC}nbm`JE}BPGwtt_&VueJ*3k5;{K_zhWhwy{^wb3c5cVLFg$`=V~bfZ5i zn-veO$=@y))%5~Of8SP!4Q>WQkK37+2ei<)7=cE9rn8|V-+Zqx0hU9c-Kp6Pp#93? zs}oF{=xa{KLm7-u2U0fa&TIg81I$+@C>4T+*>!h;c@>R>vfx;bpLNVrzz2`Z$Vrm(9C(s%1LwT zn&HDXnVMs3D#7gS)1^hj=w>x6G#bC9_iAm4(D(t&G?&1W-*TYD&tFF>rqZp3VxbY; zHK+T%@PnhESeJx#d`unW4SMYSup@(>ZZaN# zbAv{e*gI_F(Nz#Vo#(93G+d|=o)NgPZ0sUBPZ0tUWg|HcJKR5A$Ug^`y-nC+7r+}c z@YJ`eexh-ZwttiHPzL52{~^7#G!@)dFj}-|QZbxvl^t_L{4;&c$#^KnC_QQS#dAeI ztO=R1;E`@7Fqpsl++7zAy@VAH9pg87-JD2-KI_efXHjP1#7V1*=}T zy1uvwgzO3*`n0PCxJQgjv>hMM4I0fu!$;Jfce+~xC)^!=#ARdy7%um|3PVyu%QY$< ziVZP+$*3Ch5KK*ceS^oh4u(6Vy{J0Op_dS7G=7I%%{scxuNpWWiX6LO@qm~ydLIwX z*iEg_V#A!_!8h#?s@Ivi+E7KM?Pt^ULif*R6G=GtM_?0sHy=>I5}_4 zI=v3KJTE7B$P*`eG%FtZ*dv2)Wa(SzH;n&s(7|_bg}XxBTFo!?TqEP57{h33ou9qJ zYj{vsaJ(xYrd-$fC5lIv!`y%s53P7KP-t{{0jQbscxK4>N~mevIBJp$hgl7QM&q~j zo%p@Gz88ZlMpNaMq9=d!$dlJQUmV?<(1+S!M-KqqX$fP<6DL1g|tUtJ>( zGYbNZ^x9r)HDAW4Pw=bQ=e2>uGC||hhcxRbn!7HEo3~Df!&f9|ICQA1&@cF`JRlPBp3NGd%dR&o%bD~$#^KnI8$8HIjK4w zrk(NAJ-@60@E^RlZ`wsVp9c$#B&9*LgD(uOXy{diicwU zYo6~^{8l{`=xQP z*Qj_XgO@S!s8?YVYy*pL?XBqqhjoi9Z>Foz%Q=BYbf?K*cd(t<4!jF)Khv961%3Co zb!?31FfU=nLnoiuexoAiD;&Ii_w`$^OJLQq&Tu)uTyDxndTrCyM)^ms-{3;GS<_Va zdY>B4)dlXt&CvI|LB{tW4z>eo73e2?+SKrlgN04T zLoo(#G{5nu-K8)#N#l-E zCvW61hLZ763=^9$&dTpg32Znf2WlJ9`w9{w9p<}nu&~K^D8?9ct?b4Q$4dCr?aiqV z7m`3%x#^UETsr?b;u?WQ4FVt*PhG^#m) zMms}iYJHfKnE>y{hOC&KR|mHl4E7qYNcXG*3ytJlpLg<{XXBp0*Xx$=-*WaUFs%I9 zx$6{%`8O*b`uOw>rP{Md8$NKqsdzIJtSDZg7-3sRFXs|OJQRKFNYd+Ja_t*9C&#<^ zd~poi>}Xusu!w_|OQ6ww%=X+(N0G@bz-ejc_LA>tL`uJX=<#w6BQSwR^O8lGX}=a( z6oTWsr}C+fX@cRW^44?~a*!4Rjr6%$xc$M!C>9%L$?tBL*$fo>PaW}a4~O|T6%WPC zqHc-HPG|&5l}ATd6ug3?bj{{We1_4tHyIDbFbH%c6Zu==`O%dlijW^U_xZV$mvtPp z0~rrxU@3yG>lxQ8(VTzs8OfQI@Zf=%pk!gXr!-v>2(Vk4`pEMmu`JdnI{PFGj0e8+QjN&ZJwXiCT<|hoUtHHyVv5JV^yz*PKqfwif`C zfW|ZaOE}C0S@FRyBcTI>#0qm#qhqWvA_BTR3<`S@BS*^n?X7zLtP>!_M!O z-B|?`CrN_lDb@5`Bhbh`zrJNx{7K0hP-l}wpvU-1AZePnS9s2M`t~N!sJ-iE4%wP$ zk_alFCnxLADuI*Eg<0rZaD_&4zM>?|PItj)Fud^m9p%;suxS5~XwOxV^fhP2LqE3i zR}Bv>gwLj|21Vne;O6BCbyh+t^ff2rp%@mv=hbe{rL8a_Z1?1@v<|51BN{r{or6v# z(1`B6yiVJWjcfwNiv$9&qu*ip5_75CIJ&qXk`@Au=xz^Hd|n+_4jlG4ZP`1l1X``w zS5{`iA;Lw)LmB6C7G2=oSP3lVD>CM*wSaA>;m%E*v?41WD)spDvSlxGz~pgyYfUBy z!~{kh`xr0rou21pJQTw^RXZd#zrKgtee?zo8}ec4nz6pM^W z?-zPK9wY|cSDWEp3gh-TYJU!+vs+NvgS2m`8*Js6%QTJ5+i)4?lHX4b^Q5OvvkneKF#XQn4k1qqvD~AFw9d#RWw=Us?5fiG3zy1#FUgWCpbdt{VkI<}m=(q>D9`WLbX z%&me=Efdui%4NYP`hm@j5h?UMC(wv)vw^Qp9n>uYS=U_-Wh$kDNX^+(-X+mJU5bjN z;-QSiI&v?6J~|7H9L5GJrtWpw^C#^u$mePDTNpUVa zH{^qvN&0J6Jg2jI5SomKqGwx_B5Gx{vq3}jx={4;OE_oP)JaDVa*$L4jpk!l8%vM8 z&QAid4+o`&<`saS7w24;4|z||H3E(Lcg&+lsmEO(0{pmq2y#oHt4MnP81 z(R?gLp!Hng=u+4)|4R9qhDH#wRW!_?Kiz6agr?%5C`Ogwx${;g4D%JcI%0qyG(LAI zMEL{NKyI zT=;ynv||+Q>KInd(fB=Wn7c^ysdjKA$zJF7t@DgB<&OA8ebWyt79XRdQ?g1aTQtyJ#J16PF%mlog6 zhO}cS84qP(Q*t^UTGU(0ux^Wx#dxq-F|Rp?@|sKp30ve&kH z6#kIOsK2JhYhqhe_P87I<^U{4VdbR%o~=>GWP34w6d7 zLovp+8NBkPt7G6(TVun=Vuj$=%ZHbeuG9GyQO(JCC!E`e!R?Sg+YYl7}8}Oh5-d!@`plEOk9CbFSAtRYXi?QON7xFKZ9QyDE z4B0A^aCGN87`OSwCW(*N=p~gE4=p#Db}>n=9qjKao+`I05B6_tcV96jlN&UW^E#>5 zvUA)Uq0K10HD*&1fqua1O}<8SaUw)F6%S<$2@y0%I`IJrS!)hPb~+d{-~^A$nJ4r- zXT?K5?X}y!KCu|SXc^ii)E{|>ekCb+iMDWqMs|zDzF6%8AF7~n=!wT&1ubBXRkYv1 z5p*+2RC885bZ?+u>CAKK0Dd>xo357uO^2@vI`ZQtH)W&ods9MQf?r}IxT^WWTWnJy zcrP_idEKHmdNeB@nlrLC=ego95G~oif#S@F(&8Cc7y8RfPkBfyya<0*( zzPRH{BaA#3kgM7l3kOW9nVYuo5Ixlf*U;>TgT=1axw!=%CfcKq_;mLtNz;+RnCjSK-xJJf9 zF|6P4F|$0zHp3|s`R!s9^I^i%{kh|oR?*j-KqI;X)>NL*UiA(>6K=eERQVztZ7^)@ zg&QyEYtD*?3Tf{@ZolU}m@P9fd*VSAo#zPrc;pe#GBb8;qy7Inw z=dd5}dqH}*RSXB)fsBWuH}1%XA5QRm38rcvD+yc17h_h=`)cBLy7dmIY$_hgfblCI z#ecsBi{1oZVECqhSL*WemUHUqxkkl9u@UVpdFf*5Fy`3!le09k;F+V2mN5(He44DX zk$t{skn5V#V-@h0N0|(ErxNgeTX(JXDTnc!iia}ZHJvUiUt0{#@~anb_?!%mCSb2a ze{mSUS@F=H!@o4kpf_qAF+!1tT^~WD;kom=j z;42N0JdITa^wL7cLoqDYT(M9ssR6bPGyb@-b{AZ5obRBm);D^dlkrdncH~p$Sd$S= zV9X3b$N44Ya9e43)~yK~xJJc8vD_hrJpL*HT?6T?UX2nBaE8kdG5tRf5ON%xo{w#n;=SS~+rCLId zrsAR4z4=mFk2kf!AcMU7x`)2Q83Jd23StlFxkkl98AJI3olY)BtJhP#c=Ri>LE-gf z{Y~c4tuaCEO`wt8GV48m?w-N*&|fWk)25jZfX3~sLj)&quzJXND1+fKbj%YJyjT8a zZuErzav@NC7Fkh%z6e086^W>#3Wh1%oe=kYcQ%Z+&kR%lUJJ+QB?eh}aOj}~8rdxe z#BYWcZE1m{wKVLW%zq0E=WoCFf9>6AT#Z}UKk&UvnUk5!awtPIDZ{avG^bHWNku9t zB~6ksLuMlLkSVi_8M4Z(NOMZ_sChRh(zBfHoLA?z&#UM2|NNidE3d9~t?PTOweI`2 zlEJ~<`x^FF2jj-5TL*Jj??0Pus=U4nW*=}$CQ#5;yg=(8Gex4acm)8a-9hbO8 z_h%*ezV{LQzTf}chnCb%EkE8rhe-8yjFG)`pMJlp@Uc_-cXyJkWB=TTesKRD6}|O4 zjotdHM#Vpy8mZi@67v!9=JD%3l*hX~v{7a%d-{Lc?z}!@Z5ovu+G~U9```VH$v?T- z_xFQ_j!&CU@HH+gq;0(3+(1{<)J&Vv6!-gn|8pOzxYH%5uLDnG%Pc=>@xu)?cTPYA zpZy&AZ&%B&`%oV5aAJU&Oa3XkPBv`+*;D~Ft-Q5&iQa$8&HA0C7k|;esgABU?o#cp z*hGSQw%ZyV7xC8f>pqmnugg4jCLy|zhDLl;Jj;InT<_6Syxv|f{Qb=Tz7OR;?mvHV z?Y<)Fn#z6@J0XLV-H!5}RQ|i4Ut{LK?n8Mz_wdX$mX{mpyjdx0?h^rBkrY=hyY>2i z$<69ss36b#F}{{gmv2>>EF&N`#og|HnBGEuYyRs#l*c>MwR*$Dr~)b(t2B7v#dsQ@ z6zYD=Uc@^8`#zLs6{4r!*zFUk)HZ8+eDo6aTdkq5Rd(a|GynTOl>N(f@8#4}Kvi!_ z9+ypMqN_)*i;E2v@m2HdK9u!)NQTmCN%ns#`E)t=Mo>!{txw*3(YyM;ENOB}3e^#8SlR@asO5&u_bzZJTc$PizOKO0K zhxSxZ_tB5fq}^U7>h+sgOm~=F36tt8Vu${^4`shetSC<<{7xm+PMi99mu?f?v!J(( zjMDG^T@71nf8U4lznzU48K>Dq%8LJp?ntO1ZQEbgE$S`e&Ewa7D37OLcafiUF z{F3_oJ8cg?&`Rlo+jDyq5w$%xCau_&^k1UcxfZ=n&f)2>5)xY;U@$xV4Rt1OpU5>Q z{(iOmb06wHL~3rWTnzcVLp-J7csZ4hR+$&0nDYDFzwSeMyd6C}GsCh>S8nZF}U;fL?*>}koC&?oQZY?BG zL4VhM_LB;N#@8#xKIc;VfA2T=Vnz^v00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZaf&Y5~g+6T+vb{C= z{G%hC@`q(5k$rY2Hg6mwAm;5UL4LW7g#XJ#?AM>po}P{l-aMW#P>au3+i_&<6aRCh z-eZr1*`|7uGeXL_>3$jgeV{gSWsp3lPz z6{k0+&~tNoJX~oVOXrVmmDKDS$eD4P;$9l}6X;z&g)3Jh(Ikq?~e_iqrzY0k_+bc zC9}Tg(8w+iY-~=R=TsGDWPOy*?lmiOODk>D{V>FRS~VH{aJr^T-$D_AI()vivC+`$ zE|p~Jpoxc;ITsS=>;qk#@(MVoI^wb6xIccb)wX%5Wa{&T78qdwM;iGE4lfY8Q6WZ$?dq5r(Cl8GCqilOE63D!)+7aL|+W z{MgosVa5+{@HAFvY)G4JRU;yh_0h3&Xjn?#dve25>B{O;Su|^TcmjQ$AR>^h)kdEu z8#8)+p&|1&d>pm)8*To)DJ$BwNJJppk2~Ax(^#=)`t0naRCA3S!V7C}nxs-nxVF`z z&XY@i*2tUc)24WQ)GK;-{>Otim0hWmQsLmL&!PfZAI-I1N;fTQqz&iBsg86lChoK6 ze*JN&oO7zejI72~KPs6Lxtl7u%rX|F=hDl|+k$%*)rpuYTdRDlhr3?8wv!7wc9&nv z7ZKN-_ZDBaa&N21{famKsxd)5`h$MbO4?t4*L6&!&zIk(~D{1zP%0ZHWHkNl`xR4Rpl`?XLL`ekOlLeDleMzm1qsQ;uc%{ofAhG z$ojY^V)O%3-F&)yzFhyD@2TX-;A9!?@Jvpe)-kWYu^RdONTT~YhSYX#IB9Qg5*EtQ7^sIV1U+>^ixu(y=D{RSujpw;fIblZD$12|RciEbiw6E>V z@11>8sAkAKsiGBWoHYsqb@;p~CdcI5FNtZio4twN7#>ZDgG5p{d+v7V-nB8Dz-r`G zYHf2moR>&uK5XzfdZCsI?DG92!;&~N3IkamJqPz18(WZ1YGWkl41bhO9j`a<^h)K% z5eBkzHRPa}MdpZVI=|jnb>`&;^6*7VrPHN+PE}zbJ9FAQ4;o(F-9(4GuX7shdx9>w zcQW_3lYn!o$}5IUm-~6<@Q29v{d09KH9bDUGvjS3k<5HlyWu?dDJKkM&qu$N^)p7_ zDXc99qk**7QmJg3n|W2{W@Z^@#?5Wlx3U`fd^zodh z`SYIJ?mgQ(iQT$|Hhcbv;>;)vWczW>*YADyc5kMA{}@#0E6dXutb9mYDy3dTAbVfk z=O(Ec$a_U>#&2s(l=whr1xf5!Ggu&Es%))3daat^;vY_z551EvA*iOa#!Swcs#DAv zIC@yd82O)T)lBw@l=Z$6dSsqnl!j9&>Hnx;lX3rY5rJAfo(j**LDj#LZho^j=~zky z705~_ZVcvrA`3IJK8m{xtm*vrhI%bKljpoLg(fTV!uGAo5|NSBxFMaixtHhBOPd<^ z>)K_};6A>e-Oi?Q26|q1Tg__Z^Th(UNp5-EM#!HkP{m*AgH9Y#W`zAEY6co)380Dk39$c35s$BVj97M?y=SY~v235p{`25gD7rG`Jq+ z!i?%C?pKc#}!h zG$Fpv9OeG?oKqDBvi%rztXRqXRxGg#zol!KUP5~qZd%#>k~rt{Q5eW-EOS07e&I?z zIe4vIEnKyPtV$5Cdv~2LB9OfUoZ6^9_4tBjk`SNLxWvwv6u8FFZpY&|r+Tga=~Y%E zkGG=v?xTWY@z^1AR@^_eyn&v#iGM8dxQ=qE3Io}GOxNuBZsN>B8g7=?_m=aAs`ZZ2tAH8U=@TTfUBZOFtJ6a@v!eOcy$jP_KW*{k|#;WNWo=U$w{2 z`_+?8)^!VlFK3YPp~;iZUak_6k)1ho8t-%L$ZF!f@K&8pk2-pxRqczTv8buC{ivE{ z)_dLgR60OB?9iV5C3LsgT>I~rpK)e17*M7+>1Q9~x1Q7QWmG{=Y+JN8Ix>N#O!5i# zmN`kd`X~%!XO8jQh`FC+TWPpnp0e!RFc;Z^e z??l7n@Hwpm+%*a_vS&x11rlwB&l^e7YV{jpFG}c;e$wJvK~0=fJ$f9aw^~FXtI;T4 z(O$4TkB$p0eNvxONxltf^&~&ui3ntUe5V|oY~j~L>rX}R`<_%qH@)U5hsdW=u6Beh zQr|rJ=d{5iiN3F8uqS%(%aFgn?|Wda2%;tbeGHJTn`;MyVr* z$cI^X>vy1vbE?8XwjX20yna`=w4AQ#J$do6DXG+T)~)v;L*hjQvi&GHXk%l4tb~l{ zR`G4{cL9;PpxAfpe2xMz@dv1r;-GdDzf2q{-i-kMrg0Z_KE=fP;43rIW7&YbR^HEA8QOZWShP>52 zpO&naNj#I<^Ur&8XA}mq8pmr7%(-HCk?iyQR2OIXie7neCT>z4_h+CmkoEDSUyRhY zfL+vV;K0#Q$Fk^y<#LPa+PN2iFp#ZP@2fE<7az^18)n9w*r=63-CF&(_bSSuTx-?4 zcgrePBaiPL_hs^kh#I=n&Chsw=tp9j!Jm`q%KeQj3}nv^DXV7nS0?2&;a#@b^RNce zf98Ztm6nSlGP36I;G)T)iEraX1hN_xw&?cRKDC-I?|1Hw z)$=cOT-nvRIUBjZJK|1UIm;r3Io}hqj&U6ukFE^MD;9{`!h3{q*S3xUM363=EmB?gER|C+u(}z0k^piD`6n(W1dxB>afLCbgW_bci&`h(4kYB z&$=IY$%!KjWNX#+m~7?BUEip*_Hh}L2m#p^CoMLkphQF<>tofBC)4}SswH_f-#6}U z%%j6gpFHR@G?z2bc&*pmsXzDQXW1vd?zSEDnR9brmFj%5-${D(9)5Suw?|!bd* zhSq`K+llo+_w&6huLELZ1 z!a&x?%eAp)#+TQUH68vu$q2(l(mv z@AbIn_%Bo{!_r`vCil@H%*f81zRT^EV}jx+Q9M>=vG)$;C6A20cRiCcV`cd9qMv=_ zS?#cYGe<9j5y)z+$nV#1Fu#f%?RBH9&$D`BoO0{W=Ml-2 z>m(Ou{Q2w%ReMqKDV@#;te9yYf0ZWJNM@%!$`g^1eOqa(6lXYZ7SP1uW+UzQRg$h< ziSmJH?oZwSdpC8c7y=N000bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_ z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_< z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb z2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$## zAOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;| zfB*y_009U<00RFj0xoKcmRV^n_gU@F&M)@_|P=sW_zt( zqxLTKe;_lx|9%0-5Nn*wHI*C`6<4{kSrjS6Oi5YYO#809`E8_i2lch7H0b&`!7^qb12MPDoA4nfnf|+ljf#%3 zm!*mMGDkLxsB6`_i{I?06S{96#vhYI7Jk2EqT6zVMp~!;IQ*c6V-}7x=KGqs`7JQ9 zVneK#eD}U$S|dGJXI2q$r-q!|E#Y^4{};-CkR@&HU(7Ltg;9X+>KQIdrdq5c51Oat ztqgia)^?k;Yn(w5@$t2CIBF42SKbg$aeLmtQOLAqPMXV1&AjziGmB+qo(HZktfKRe z$Y0rYE|vPlcr8{Nl0q9gqF#R6|B7Q4CSLBA>bh#~?iOqoS7skt7k9da>{;dFqBb#_ zm`~W=`><>Q4VbgC>U4WK#}H-#TGLktdIg#ovmti6kNK!oQBFq9TIq4XtCmcDZnU!f zP!m0$D!=J;)mM%}bOZh8t1A1f)b(UTbc=bS|17PO*qcVlPCd|7!$M$E^Y(55k=i!O zX^lb?#}Lz|&+ze3@iAYc#)b$882Of5Y#@geW;-^iHIf2_`) zGY{}GF`KTbqsN*$|9(G{%Q6kLVCCbldKqP8LC}u`gTZf!gM3+zO=bbdEF4xlu5wvu zK37|r&EnMC$h1!nUlNt0_j7ma6p{`_Tm4(J1@w6POQl(6r5r>mWM;1~K{+7%^*#eFs9JHKuSL+z5Xgl!P7`7Abr0!G@ zfyKMw6SsEI){v`9o}8?s{sp5`jz8ta{`(#=)iXCSSmvN)Y0gIY8d4wB&##JZu`|on z+F3(-j~%dA-zJm}IN`P6?CwJH??VKv)^bvwZth~O$Y$}et$6b3UF{@z!2o}Y_&lmL zB6COD>q7ccO@B9o^hT_Ui50Lc04})6J!&%^Vhv zy59Qc&RWXGR&0p2IeT?|A{*$6TaV7PH>6RO^=`H~ON+_lOKx`sAFDWq(4Xd}s%+%% z@8Q7;8RiijX<%PQO+9+gEE(2G0vEL0nL0L=o~-N8&K#1@F@&D`0v9cp!0FxwY>1aF zHgmU5`a(m7gie@Z`i^vVkN)wtwL*T+=xh!PH7y4(O+DXLW;$$vf40u(Rg!&= z$W$4ag^VvHzN;GEdW}e=A0m!gdWJM}3}Lj;#AemBWe$$ZnMK%Bx%DrGrP27sy+3;k zKGC}hi}Eab))OPgfZn>T>E&xef + + + + + + + + + + + + + diff --git a/doc/source/_static/logo_sas.svg b/doc/source/_static/logo_sas.svg new file mode 100644 index 00000000..d14fa105 --- /dev/null +++ b/doc/source/_static/logo_sas.svg @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/doc/source/_static/logo_sql.svg b/doc/source/_static/logo_sql.svg new file mode 100644 index 00000000..4a5b7d0b --- /dev/null +++ b/doc/source/_static/logo_sql.svg @@ -0,0 +1,73 @@ + + + + + + + + image/svg+xml + + + + + + + + + SQL + diff --git a/doc/source/_static/logo_stata.svg b/doc/source/_static/logo_stata.svg new file mode 100644 index 00000000..a6e3f1d2 --- /dev/null +++ b/doc/source/_static/logo_stata.svg @@ -0,0 +1,17 @@ + + + + + stata-logo-blue + + + + + + + + \ No newline at end of file diff --git a/doc/source/_static/new-excel-index.png b/doc/source/_static/new-excel-index.png new file mode 100644 index 0000000000000000000000000000000000000000..479237c3712d25a790f1e1f8e5635c62465bae1f GIT binary patch literal 11506 zcmc(lbySqy-|mS)7*Le%kQf9+x=TR1TN>$3=@=M`76b%_l%Am!L6DRdB&9p08-||Y z4Ej9J`}@7?ecyA|Ie(l#h*|4iv+w)f`}^6~_qukZmWJXTd}@3Q42(O6k!FfdEe z|99YFp?|*tY0sknV0!B)%3_rF({7+Y0PJPdWiT*4y(PGMhJ*eL^iVSL#=szKyZOQF zaxb>QzpT9PkhGu$#(xo>HcS5*{kDrO(tIV{T!?5loGTS(Wk9m1H`)eg6q~BM+@q`p0 zvyV@r&VvUrggQWE?lt^;LUJ3jkPL!a-UCz!3;|v+`WJlFB)PhrzkZZwiSsrT^#ROP zQH#11XyInm?698Uk^ zqq|7wfDXj`9N_xkE%^KT#s)X|Jo;a+c6WgcVoIMaV5dn)hMY{=^98ES1sr8xpZEFM z9EPchgC$biX}G0{w`S9V!BTuR0+{p{u#g6YT?85)B{7kLMU+%+>Uy7R>YJCJGJ%E&kA zmb+nLsaaW4=PM*sl8h7B7NPCe7HV%htBwp){8usi>ZQjU@YmMet2C#3$xiJ-YzPKt zNF6qoRY>5)+45W4WB+`LQ)a&w$2E&b2P4aAuef0I>-%GQ+T99cy%1B(O9z2X3yzs2sFgO2zcA2#1`?u2w0vZBd^&t;PV&nKMx|OKDZMW_E1`K<3KLF z!sQ29MOr=o&J}_+%l|{sM)SI2i|EA`c6Lpg>h8c3jm>#?NFyO$MAz`}*0R8C8W%x%4xv6=Td-uV@E$)A{b;Rjt%zy-8#Y-}{1&Ls}X z4N2wV?x5dwws8c{fzUvs^oaCJ(ezaG454PgiH$2L#zb=yHiWxi%MN<9`QhO`@O99? zE)Ku+1vm5;A~rT^nvrGyx;EZ~@>c$E4?Uv!*URUNfT&Hw=6Bj3A&S!1gU#;^O|L_s z3+Q#F`mdKYpZrR(K<&7LFPEaZrLM1zno)>~Z1f7tzP?)1ltvyWYD%3CXQPe}x){J` zi*LYJ%M9pc_~LXPe0>*u))utacv?S?Bt#JtN<&Z=Z{wph?*Uq@y|S|b_Y=@J z+Hf)zlC{hs`==5ng9Tcu+oL_W(8YqTA%CJrpyLWwbWL%$cDzJYqibwQzBcvdhT*^_ z1lE_!spuLT=s9xS9QSg|B4}IqdZwQvFP?_Cj=_d}8}Lh#ZS$-5uxpt_dhAsY84+RKcgQ7~N`NMuJ| zN%8Vx%fIQUZb1NfEVdHFdlc6z4nEJ$d49~3`RRq|#pt3i%~x(V#2!G7^NK@IMymUH z{5jas9DKlJ=HWfsr|Z!2YrOe{T$(ONA}2%ijqY5JXb_Bza{#(-*d(l_rf~ZDwYkZZKP}IKPO3%oG1YFTGE%(7 zVix3}F+tC5L1%S@N+m^*Fh>pP@l+H|v8Cj{I;Wrvm*uPD-qhMAnol#j2*8d~ev8fb zW4(Mo&BEi&>bhag8^_zHOZlkGg$fsvs*3JSGH5xfKOo&cmyJE^ejZHAhQ;3ao}GMJj(Sr|vPc-QB*|yPX0^uY#V;z^#tujAl{y zCW%CI+M9_3zXJ2Vu#xc2!e0}`ALrXN0(C(5{#c?K#WEG6S!X7jGtn5l|#zSv_a}i=STieYU*>)#wg~ly-lv<|%L}8ti9nQU|laCzA|}P4{5)wtl*C zZRI0kBwKyPL0N_y!Zd^0J?4b(V^?$b{83SJ(QBE)x?62-wd1$8KzPQs(UsyE2l3kt zDb}6%^NKz9SCdGCy6Ki(nYd%HqGM=zWC`W&5KKi_cgG4kTRhb$tn58FBO>phGpM#s z$SQ^3`xp?ZpV}XPFnpmsS2$>pxn_o_KRYBXf5ArfK15rf$UW=97TrMfaeYzGvKVI9 zglYl7;#u!G%`FtQ;Jz{+4VbQQHrJOv!ea(oqcc{3{Y=V1-2$M)Q1US`BgM6p=4ZF= z(wZXKH^w~jVoIW6I;k5^!TfXwM zA8(2>U(9OkzNRNKBJO?55hkN>XWjh7+WxlPz{by)v>W295mg#0mOoby`1nPeO(G_0 z+iNyX^VSFKbRE?F#YKwdJ5q|)MO;+nl_#UhiwBg=1F#G?3|EfwI!+3Sg!V(%EIy3` z6j6K)LjY}fhVWf^;ax(uWd$hdmtcS6?7EL>85E6cqc69k8u;*U^Eo~CtG=%^N`HHe zZC}jFO@LJqo=Onqoml`+@Kx% z(pnYXG>!1j{ExrdoHN_Da1T|Cb!zXMU`24|ePcpTb&8foHXGq26`eOx8f7}dp%4#L zqDGbHbVNK~I3LSLP1lmVBXwcsD_p&`(57FMBsSAqITNCs9!J~9g^6TBM&yT=l` z`=OZYRI*}-n-Brxcx7uCW6U=a>J42Gckcl+KiIy~dMYS0Rq~28&tHZRk5bs6O3L+_ z@Pumrhq*|SJSJD@S8W`gp6>;;s&fo!BHBjP(}pLe+OYimnUi_Q(f-NtegL#Qfu}lf5 zBtdN42VUsiJr{Uu2ta|}mxSEUCyQ)bCz(cXt~WaVHSKpU-y87xIb0o=O9w7cz$;H zZx?vAnK8#0(|D>#i{+DXjlxEG?aBg*U3w5Pq3QD&V>YF4>!J>El5!yFB#J6N&vZ!i zGA{i?NHy%gVIvlPW2EI&W~<*XrdDd2Y<5%wFP(Sqsu+vXMgR}g&Cb)RuQP`CehE{W zz^f?=03wH18E!sSfh4r+;pke{e9_N*_udTQZ*;nx?^lUFeSw%P_kUv3EiMhmxJ4Bk zF4}WGk=EiFO!tGNv_+6yt3HvouLGe+!VBwpNqFbAYfg3TVrB-gYCWUq*jvnkLu;HV zYN#_mEx`r!B-2~2IeQ+mQHpAOz#2wAeL1{pGybw$xC~ebCZ#?17BBt{j?Pf3|c&)0Gc~go1T{wp+^S4|~&gA0F`GQEE)e^RL^Eacg84qcD zdtEjJcAQ>~fpzI&tJpy0vNE#qitL?8nNOZEro+%PU!46-qpkpTp76f?FBLN^1xB3i z%Akhl`9KvufMy5cgEAEQ!gj>dIJ?Y6F=_Zo?F}|JzsUd;ma1r9XHsnwZ+?F@{LWb~ zr@N;EZ|iweBugcfDH^ao7^`Q;5A1sCx|xBx15?tE?p8=2FY{8l{&1B%>ssQGy6eJ2Ku2$iR|BWrzn9d6q!{>%Ia$&!rCBqZBvJ4HdYAMk9 z(fhMq(5dte=Q#bTGsMwitUhX|h_CCNN32j66*651C@etZ$M8!T8FcPiC@`&`+@{u` z7Sk7qHzrgBd~ZQ`H)PFp7;tcVGf#2WD$jPRt@e{u+nbbz9NAx=&4$bna(Z(cXkv6b zkgCa@AIJAZB+Eo|6c$o;$PM6qNm67j_@I&^I`>3xOm-;$bo|T!#>df+6LW7|IY6bE zbF}OiwNHZvD;nCXr(2w#DSi=r`|#k2CUv0bkBaxn%=|^bRWK1C?%EH+GTj=bT_69& zzy0aZMb)e6g{%SJ4()^tNy+Tr#B#{+B0LvLP_8Tqd?^AO=H#Qz;|sVXes{VXttGdS zq^HlZ2N--NiR0kchRm2NjL^LZfw`5%g<1LLnn_#FxiYzi3b}6VDFU0ylYai#NRa^v zwpjlG*9`vuH*g8d1d|+aF~H$->uZgf`=Gy>g_y*D?(72~Esw&=i!s%633a!_sbBBq z`2M6i%RH1Wv-F}d8&W&(6`7g)dakarNS*W(FtTNxh^?V;W(?^|d;& z{8mx&iucVMOB?N=Zx(xVEUD9g17;gH2gpat=opO~&^lzy+gBjT0JkL(u`b@Px zc8K%f(O;VEB>y%3mnrU+uqa2_j;}du8*5!TDmFM5pts8@6I^gAlo$bBxfy0^6Kd&I zlt*hM`-RDyUAZ9x`nNO*>PXYjWpN$2jhf=)%4a_xN(Ygb+)v|8_$*NalqCpW{`JF~ zq2g_;V~Rw))uT=@zW!BQYM$U zwiN&8x)B^W&k*Eg_jbNHBI-!`XDqJE<;HBu6I(r*d2^)4oq1s|L%Bdjp96wm6Atg3 zRt068!Wr%a=Wp7PymUQq7W#LjslC`521+nh=KmR&da!Z(d;e`D>%)pIssW8qT9%Ab z{hqv(THO&G)ou>aF$?==`ANWwB_&qBw#fRed7r^b&j{;fAvb`App3EO+XBPrZ}PlL zJRK9jJhfDgXFB~(kipBd7XVE`88IMvuiWQM`w0Mnp+fkL0xb6hz{GHMMeU61KW(on zzEOF!qm$k*n9yH%HWFqE9FfP6@jVlJ^wyENcLGvU`D`!;SM%RcB1y8rqla(FB?<@*ywcWe zyTB_yi132q{dF7WzBcx^z(Cb;gJscp25*PPvf;0wC33C>zHwn$q5LJCLtQuIz8Nw< zyyfewvd!Ne)!3 z+7T#{pO~#p0KMX3Fh3f2w0Q9xYu5NbsWEa>_)jYbTiMbzHs?>0I_r7Cc<2<+qBK?LFtfZ z99Ct@s;mT_YM^|EP%x5)FwV*IA+R&H{Qy+FJWBjN=Yt8stGt^D9quji7VqVe{>?0k(x8_z0VK@o>*3})CHR*V+H9%_mvb} zNup&%d;*;5MU7S?ZN@k7>Lhs4}$|Fd}dF$toa=RK4N3qt!dciJ@}1DFmF zw3E2N)U(ce(}%*Op$pzxFVz_t4y32X+m|`rOHc|ey4@c(b+SUhDI~t(%J!My%T3BFK8PVj(-s-qNVZ5}c z70fFvwx~bSdXfWc*l7O13f*xiNJ)S`AB;Z}!_w==_Br^6W$N?1xeL949cVSFHFT7R zZy7(t%j3D1fj>#0y4LETPC~!s5-5m$5L(@qQd`n4x1~awp31!{JNk=UI-)4@gYQArAg2m%5*>ms-HIGbq!ISddat zfBM3Q2VKYon;E6V*7tWcw-ldkoIhwuLpupnIJcQ%2&Y1Wq9W3?I^INixrvw>FTi4z4=`}_p#elebVbdEh z-Q1;NOCj)3Xq#U93%b+%!t_Kxys zSNb#&$|ma?ES$0WCU0-N34p>_h4mGHqa{KJB%)p?1k;>jeE6=S5F+SYJ$U|u#IsIB zZw@RjN20s9bIi|}BxycsJ7kDf8@+j95D+MA$cl!c+pMi&a<>KltQmMbB2UP_owuRp2x4amkG1layRIcUA0)cME3ZEI+_-K}#+)^|J|Iw0~EzU>*D zWQqLpWbt3&rmIC4QRzm5^nhp@kIy!?PwR|+)_qh4Ke$%k8gl&gVr2Un*XjsAoo13$+s-fR#p_50^!5&dnX{aX7)FaBwHc|@G)U07 zb+~A=!4sVB6xITzy;1t+&#tacC1zWbJvB($>CgDU8>?>OH?UKjvPHuV_&4kv*CK(E z42gy!OqDB{ztv(~_}4E3z?MU0+k^LM&Ph~>t(c~6{F%W-@T-8zk{$r71oITMuG&-X zx}5mSSBuR1Hbx2-oL`cSQ{?X6hVQJUTgsZ2){vx+?ge~F$8uWE04ZHlrv}cWEMn@_ z<25o7hg$SB+0%wFU}bo5^epJG=K;_6b;?xd%Zw_`Uw7b^tJJoTVjtSdU5@pYXf49n zcy8E)?kl1bJWS_a$pFX#*C^IwJ=;~*C7V8boLD?mQb5l7&8Fx!Vm5NPV zs?>teW|qs@{yPf2;^3aW%m&Mv_uP+P{c+T~zBO=AB!A%GE8nv8G~$*UUw>^6uM=*- zs~)-iUQ?=^qdm4XmPhk*mnC|A_>;F% zkJoIRXd?ik>&1>LPe>gBU|#si*jzU!!Jv%FRr^B=pjVYI{>GlTR8MJ^kDVS%kuue9 zY47H`Nc`C*Rrp%Q`OwW_z*|Q^6 z9txdeB&T!i!M*PGJw@Eo$OLmc4i%KGm#sxCZ@BdcG95t|FzcErfsZgBy+Rvhi7Ea! z83Q^t`%hN?e_*j+sO2m!VGVQU1tXptV<>Sj;bRhXfPh7zt?*VEoe3x-htOO^$7Se9 z;lJ?d{!w-}iQFF(&KqhOI`6TxujmN#BYj_U4imnfwJKddi}*_O`gyQdCupK|^FDW~RxQ#XrK1F_oq0+p!P>2K^4Ot`iHyv6V446}-I4I+t9x zrDZ5boi3Ov;5c5MKq9YMdF`|lW@(d|8Q_0JKqEHHsax515>8vxeWDFR*qT(FH#dc;9rxm zc+S&p#{AJUvdozdZ(A&nq~wrOBBULJ!aRCo6Fnr+Op{*2~M zJKHL~+nWB4I>AK_!ohANGf2yj!fC*U_KoXCbl}@0tA6hY_ClzoCh~V^K#lEP%q5Gu zsQPGAsV)C-of>lt*G9j#heW?ekhqOoR&g^6JL*RrElidRcyC_F%vxd`K(Ia%HTtY9 zH0D5aG+AXN%f8g?Qcee;~6yu95WOsAde^q#PCjS%{Yno64 zBmAV2BFF2Y39F{LXYBVzXb#6vSw}q4*ny_+XVx#Ro_(w0-l()WYM>G|f&BeE{X3+B zi~Q*e4tJlYGr~uv^FJ$j8^4T={iCm|g%qrV{dKh0#5sOr&*7dL(|N637M{Zo`5QN` zU4fW$#3GM?J#>qBPi5AN$N9B!Q`)qA+E<0FnKB@#BGwo(F_TvwN?_f}o73;Jd-dud z4^;ZepxR{QwWbTCP4c5Fq&D(GO{)ei?@s=acY}xn@)_eM3^FN6I0oouuYZtDuS4}| z>1j`n+>jAX3+O&b>VqtPwdWCfTpzl4}IJv zl`|d3{3ka3nbJ$W!w07nyYgP5sTUY5(9QDa41Gq?86 z_Pvm5ujQxq|8e93KHKz6M!$fsQzpYCg^gWHt3B6EBCo(VaX6 znaSL4n&W&!T7yVAhj4L!3CtzzomU&{*w+ko%reNFMzrMK_mnBlx{_?{Y1}v4nbHZe z)9O2_qbiLV^%p^3`F1c*gnktq4%Zv3Dc?_o=3H1Mgx}REPG@u=TdXAex5N|`i|ren z5rr8{T9w^~qz3-u`lguysy6DBPYF6$M<{XML^4E}f~K?gJiYQmY^@%NhwC zLYr|pQr+}MSnrFmlg9Xt^IN^5xPlJ7nXoTPa`9b9I!L>>3%v>gvw8~6!j;vvz%gSe zMJLxQpVV>!r?v1Y=D*43(+qm@X{RlqZ1$$9J^0N&ZGsDX_g))0uG(^8i@y6QlGN>j zme+UIYcM6FQvBekLf?FN>nvM!0eFu^KlB@Cf@G&| zKo-p_kf6+E;=-IogfNuR?Tt_pE}k3S-_hli1c6*vmmtG`icRl4{chu%s~LQNR|krT zLXSywN2G42Ts0mzkcb|XywQ;4p;33E*l2&@mjWx5Pl2LocaWL6_}3T&lT|5jC4}VV z&9IEfyoEGwV_8@wKYf(j48`A3#|XVg>|WjqhZ{-2?~XET*GVoO4(55<1^O~nn`Os> zooRLxJftTK#eIuZHQmuHF6}Gt)A+12fp;9C;?9~}96bjEa_Ebw?JBp-6_$=Pl$6f? z=)Ns1Q}iHSH2^mv+GV4#=YYL(ZIV-<2xx_^)~AWonrL%}JRc0OZ@^63|Cs=K(0xzh z-uN4kBb4wHK56M&%Xj3R@<0D15l)X!E7iq6`#X=l7V}!HTA?O$fUhutTuzB~i97RK zAI^oA#u+E)>F~gbt3PoCrhptI)YOeWvCq7W3q7_@Pf28JQy=7l!*JHaX7wC$*Ubjt zgGnH~5Y6kWn#(_#Lo zfw2>8dR<QrS%0``lC1(U=MjYP0IAszt?!Rl-`}EUdo2t3i2gEbgg0vf3=dWbq zJ0|l3y}RsrPZI(kZ<+8CLY>1ZTUUXGDr^pUVz~@9^qR_s!a!T(_Zl>Db*K% zCpC|ay7Z*r53#59Y_C2MLWs-LHAbj!iUh!dVQxceW)2!8fE)ztNhr6Du*_7;3fK#*e;Jkr!96Jv`AT6AzAb_A#As zgDVYoKr=ZCI(AhM(MXz%#m6Ftp`VBO5^?)=!cujG+xc3wi zo5H#$xCGEd{P2f}#{b7eyz1Z%J=m?^LtU*&Ur!E%p4^<lJqoHOe6+U0CH+bmz2k*-K_oghc?N+*3c z6tgD&gy?k~actrjhrzeT60tCEP7L$z&a8NL_5EwqK(m$eWrcg|S@ShP&`k5yfTlmJ z!x54*nprU*gS-GYTQ-2EyYeJ;kdp%j?@#b9|{Hpkd6J@AWYnRB*C_vz1X?stdJ5le$m+-axzt zFFHs8{0+=1Nm2EkwsZ5K13|Wvi0|+I_yISNq7!%iCM(20Jr%#C%s%U0eos$l3NOZ) zs_4-tHsi&u)7+%BxkC85HhOM;)LoCn(fSllTrARDgs@Tzmuc&C!xI5QBx7fta2d#D9-_l)9~y!c)p0Ym5~U4{tX5?J3hX~3^Q368>C5KLZ4H{P?pzt JQZ8!=`CmRDiiQ9H literal 0 HcmV?d00001 diff --git a/doc/source/_static/old-excel-index.png b/doc/source/_static/old-excel-index.png new file mode 100644 index 0000000000000000000000000000000000000000..5281367a5ad9d6a16da7a6eba5696b9fa949b502 GIT binary patch literal 11570 zcmb_?bzD?mzb+t1N{1kb0y@&2GJtd=oeD@dN;AL+N_R<0*U(6VI)ro#A)p}L9Rm!_ z-T15bJ?GqW?>(RU2Qb6T+Iz3HzUx^}tkCDGa(D1(@X*lE?kLDhYoMW_!-3a}I2ge1 zRK-X-;19ZshMXi?`5^5&@CMWBnaVRXw5n+Q3sWrMJ+8C7z6%-}Vf)PwdY4m?IT{*U zmxA;&Ezg(RAE(Rd>SK2=S&%ee2?s^H(MVUkcM!`GE=heUpu6bU@+M4Nq?ov*=H{5V z7}%}*!L(DKn4le*TnA{7;xL&{VQlWz5dJhfhcO~qY0Aw-H33} zyu;<9T)1bzA9ZndGY0)VQLZl^euju(FHDbm1@7&v(}sActx~i#7rH$7AABhbA~L8B zT$8rz=Q|IzuGi_{|H&KLpJ4d)7397!Z^IGN|0wIFgFbQrsnglYJ{ijI}@bO z1;Yhrnq#EtDrfm%_bnO^=%CkW)UdG)eO(Ig4nkh`m>z-;X)3h;h-SFE^YW`tPwo_> zd!V6XX8WMH@yF~gUX5$&S&fKcW4NOne=gY$;`#3;Y0Xx{BIqyQCXI*hgZM&zQ8+gE z5C_A+uWLJvYG~3H8(@9E`ToH`N`u{ zf75LLO@|@h#&Ps_s4R*$*O*p}*Y>H^ugqAW*)gSnt6H@i#%CW7P?sPUtx793XI(qB z`_;h)fzKm?QRoO&rrBWZUE2fg3P!)V8_GZV*} zn+Mzg_qQaq{y3M445qFQnD zSP177yS_jo#Ll-y)x>>I50;Uu2=UXUcq>GP?*%X>zK6rXKo487=|z3d$iEu+?S4UJ zZN59aUKNT$_Bt<3WW+=PH+%`%?k@Ba`|~-*kvqav@S7#%vcI#w)#Y^aEJnbBKi{7M zdL&K-fPmk{F|c?L9nFQ+^nZM{ad0Qp>0tMC8*>J{%5!jhvE5Z6Fe{hn*Aj9hy?%Smv&Bb?-4ADp_n zSz(4vHz5|hUP$HaITmzH)sVmQZtvQoBR6sGGEsR zb6ma?Y@end2bv{u&h6z7#egHm#CLu%0%fp+8!^>?_fpAq47uY)M$im)o!|c&?T~49 z6il_(O&2i&)j*2EAVHu91i{?0*Q76wK0{_Oa~;*jd}NJ$O@3?>ZG(7=NS;~Vr&AO{ zLPo)M?dpHrem6s~HESx@@x1$3c2j$lhB_b_;6*BDa%&xLL|L3?73alO1;xj;wP z>Su_v1(%4cPmmAD#k;xC&~on?uv5Up`og9y!IRo|lkMzZy%nn0GM&U4J|cBj!H3s4 zoomK9B^e%wG?SL?Dud-p-ImHJ!~lqUtwgB1KNo!uh8@Ir9XqYbCHoYk$IF#c?q)KR zkWPrIhO5Sgt1?=BMDwcAf1131ndir@2s7~s9DzBZDBi$pD2JT8-Swmkhcm>}HLb;M zY1m=mZ1v4ozugQJ_HM@CH3D4puNnc0=g%7X3>o}gBUVK7UyP5YwmZMvIv3dc{drQ! zW(Mb8yf8*|MrB#32cppvl^(vC*;ng^07bt-d=qFI7q^10?o5w1uPOsqE-!=NiTEK# z^rv}UdroSt&geKQ#65|d3pa9?hIU{L7iZD4c{of^l{B70=Uzi05a)h}a}m8zMVb`% z{prV78y^8&S43@Y_vDtGPHMWV4bwD6_9)mpD$zx>KY0wHiNE4Y zy}5nsEYfI4GUyS|2dDhqqj99~dBP6N+pMyS+C#>?3IZdyCGC;b&fbY@1107Kx=ez| z7l6^Jn7J9}Qus`z%khQd67{quN(qebt3+e3)t(T4m;OO+an__c;dfK?-3;(aiO0>- zXRWA@f^YX5jZI{jl7B zpFXNj6;B#fa`@~QHoZuFj*(I?kB>vm(r4SDE}{K*uX`7OUNc_RXY}_bW%K&klBZF$ zh~sxrKE9{7=jg#ZjEZ<1BI=1I#pCXV5b8_+u8{ptzn8lwru3&rtM>o%q^=5u?G~gQ z4jhBg>e_Rsz8QbH*>m?Rz`H~M002e8lMOjrrI`lJVF*XL5MI@;UZOXFfDvto5^Yie zrgU&(cN%#$H|+w900+PuF8?Tja1S*r@jtEr@Qa_(Ky3h}3s8dN#Xr8PI{`*Moqp;! z7B>v;5LZWBb9|ijirV6)felfAD*zlclPWYi>Kh zvH|S0a!8|%49^JH7@N0q3Uhh0^6~v~YRLi_-EZFTx`ubBoA_wmCJ-%_Kz*hMetO-fznQVmDF*RVdQ%NqxPSUA@S)S)Xe`jBY^wPOpSBtw zqyAS1mDb=j7IW>T6m_7}SWV$~o)w>&Ncv&ya<9m0y5@z!9z0Fb4->()5X2*yp03t~ z(*^ns7YkBY|I{d;J5$jS99xdr0O>0ba|DMDYC%W~H>ozCFmvSSE3%H5pHgL9FWa9? zx*TE4xuf!`FyyO8eG|x0P3)m)C|{wBP?8^`3XhVQY1!+I?UqW?BuvZh zIJAZSCzX3ew=aeH^3=%QEk#7ZaMTi1AX=K3@e~E`HEKQ2QR{-a39?eorcY&WN?wPK z?Q}S#^-*92p}3}axjeUG=m%|8ES75kf-e|R*7`q3`wPyK3@q&8U2qU9Uot%$7W!g&$)Dsyj?BMC)SEn{Jd4=zXyI4mBUKQ!Mz z4lnj<~&_4y%D&5PsrZD(&nY2M=Ip3f5sm`_1k4)-*%8331VSyHN$#<+d+Zq8d zXpKlqjY~GEJXx}V`DkJSTt9pPi*>W`Z>P6C9=a~3%j+i_1DnWiz|QG9$4pGM5yG3a3s;*E@pn=qyj zD_(@?h8&Z!lL`k8hfJfYo29p@mW-|w9L5ySndZ#xl$B!pNc7%-3VPQ%Ld=rg@U0E_ zIh2*n^nNYHSTEBSw|}grCptrGqPyA!meSKE&{jIC^IoU^-8gW1zkr$lcKdS7#a(>1 z%;m6^jW;;XugLX0zvCCoPlsne&^y8gz$vl9qDD*-`ywY+AVILo^W5PPHB6l9T`U`A zrw$A6FTS>-IufdNx!?wde41LC&OH7*fhr9zbBtGY-}2M+<#OcMqYFPVTlp6I{9|M9 z4(=D^{9;#OBRk;)t{ny>TVseDO zdtZ5`r(#wh-)e2wwOv1o=KV12ncbvj_H&u!d3RNR_Xc_KvYI>wOLGB9>7yrZ-4)LiF9ULhRciA`21bS$Ly3n8R;BD3Tu$t&5=_y%x0R zZ12^wZ1`AR1;anO6{J(_y2$5iO^e-Og8@w7SRHRe-#+!dC)=X;0WQQ8%v|T|;wv+dd zSf6N~+!WY0ajb`@)@^0c4Xn}UV#0Ym_II*m$jLM#@TB^1>XPM{^0VcWe$4nvk1LNe zp>$6vVWbSAj1j~W@?Noin#KBke7&A2VU0%cl-Y95ew_q%wBS-=u#NR{zj!58ls{w) zYy(fN)c(ashOi9(4Qdmu?7#y1!XR~gV$vmR(bApkcOaZ+C;xbmV}oRiNtLsYE27PO zJ3Mrzq~axRKADCLtcFY?sFplynxNjrAdP`z#7T+Yjvv#!n#AFZ8EK1xA+bl}uH4DZ zHp7_9(%q?-fDN2zAd^P85N#qC`s=-5@y|!&hH4q2r*m?Zo%$PH?1xPI-R5kp!tb=t_-cU)?ui@_ zgkB*tueYqkA&ZfMerKbZS4$xA(?aOE_#XY|w;w}WMqO!FX?|nHEQ?zRNP(=Xh`=b zta?mqer6u?-rVFcd)6L2Fu?WgP#=%)X7C2XFc_nirR}}xUq5q>O_7NSD+ZTc@Q4^J zpC?ee&$^sP=7fzmRd%VEn=TIrC3aSsBXKSZ-c^P^LDr5tZ4J?H^u}N;>XjNC7gQIL z+(U;pLK5d#npax)o_`~cR8JEN_rF2qxo^Ps-HpcR zsrKp#j?QK4)92i(MV?Qry(Uk&i-`5#ejPSD-xds4Y!a803sTZWD*)8b1BM9aS0PuE z*{9F*_#@6EyK}Cd`^~$ggqiyXy|r%7Q}8?J*OB2fV=y~CYg)Z`jR)R`&&>T~IG)HL z`Nd7Ac6V=OU-UP=bsj(4%s}uP5c1jm#P~s_84=z4)?bc)E%P&XJfX^5oUi3_SjN;T z6&{I5*UHpvGqZF;K;@fogAi97o8=~w`$8|Ti={^LDRbNW7>BdP$4b;;$?jzmiiOBJ z4Xj*67690(ug@oDE62-`v=)lqqgXL<&;I7a4-gk}*LnD22lWwd%rb~3IHovu;vpN^FPvf{L>wm<*) z53b_5QaugOihRdROIG_rUk8~h^)ShuStZFWv$LQLh7Kb&$jOaKq%}IM#6)&Vs#P{HS?y;%zBIWWw+;LYPL4Hj7fupRqno)cu#@nv2uPIue z?_FQ^SJF=pD3WqAo$Z1vHb8gK`?KsesjIfK=oLT6XtzniNg5j<<6R@A6c5b86pM@` ztj1V(Gp-NlosgdkhQY5P2T#^MFQik>>zzgN^{}13=RY4}EKp+{Dy?ODE5wV(CR+Rq zN_b~|RQjEgi~#|=a>%jKfbnA;bpL^RxQC`RX;wnQCt}rbXu?~&rx%`=4%kK{O-BNr z<4y$e;Z21G`V}3?r{Bx{-((EW>yCqGuMp6c-Jk35f$LYe^)DQ01^WdI3IzdW}n%dN-vsZB5gdmmbu+o7>Da0(~5VFVV&LgyK+H`-c`r{HzWcRLv- zX==$o5x3__PR@628as1oRhy{}IWg6Fs8Mr%sKmT8V8B(miLJq*3?~{d^frcI(iP@K~yfsCgELZG*E! z!$irpPQUhjrbT?5B!-0z4BoWU7}x|60p0q4#oRn-`S2I?jDkg>Rp%vL)~l3M7yli= zvBfNB1QhgycoOmmg8D4}f2o-N5!Zd%?ts7INA7uYf|X_;vBNx*kUAT0`?aukR&Qv} z)P4|E7kN4tfIgTYS^q$}FD9wWL?y9Vf*%OfmpFrPl*i=I zvKJ(o-s+Uh98b;ayz=LG26c^~y#E1aKDdRKYyJHxbr$?lEu~3;t=G7$xjH<>_?584 z)mljY0lH2G4=Bp31V>rUxBPoP8+_)asNcyNZuEry$>7I9!g|`<3!Mu$dNyMF-S~@4 zJ0_fzhkvII0h-fx|Q)Q?}}!G0bmMV*;~R1*G^e);I?p7CI7( z5)oTYsUs`y0yN&lbsL({N0%iUx^zS138|F0FczV1w|xCK7Sm*DtBYMkr}t1}VogKf z{cmgiBWSI>tv5107h6VHnPZZPK6^~O&RR`6Q)aPHVgbpjg$;`p{hLixew0rTh_)` zP?n3_yk8RaSKL7LW%+hSF$b|1?@%9C5MiVG06_fmHy|?mvbBcZVUU@b2R6ceVD@#F za(J{JJs*S#Av(C?s`G!6LFmho5v#q9&U_5>QlV>2-U44rHXqrcL!VLRqZQ&+OT@CRO=6 z_RNAg>5A?c=G)Pcy>DaXzJ$19jyU?+0ouOa)meJILH( z0r;eEuC~P~|ASBbUr{;0js^mZ`SV@)^$w7jTpmw3eLY>-tbku_lvI4ZIOya2da{5- zh+hLinAqjvXcDKf81(8QUQNvFcPL?(NE`l-6da=mU9SgQ;m4oFB~XCHjWH1`S04)4(Whr$AT#v;9o76Di~L7-bh)?5 z$~zijj&v#_b8_XMEokcc8(!a=oKhNCW_DNS05qTdeL~%B?dP;1l^plBof*<=N}IjU zlk&r?Kt{IOM`;zN9ej>Io_xxIQ#>Z-_>5&rqCT!_xc=@1UFwh08e{h>4OqN%&qfnp|su0;jkX(iHsx5ZE4GW$u%&t zO7pCvL83#dl~`s)v?jt`2wm)~-S%zA6n$J>e%Kk`%b4QsuL0KfKBY0H(jteP%&m+p z{VV9bd!;7x?Wti>w{n$JbvZwY(9_dG@hl<`dgy%x+nx_f-XL{`x|Sb2yW-F)%`1^ zDrC0Z(`xPKJHRB_6e4H}&%!tU;$w{oZ+R=M0k@zf!opao4uE#%l|BI?I<{2-45pM5&W)J6%l zWR7#c;)Y~KM(5#}*F?X~R8zeDACa|_8mF>GwCDE#@B)gy%6;Aadx;?VdzQZ9exouB zYtE=<9(ND55*#JM1j2(0eipXp3hbYji4Xc&$q>+=WD=T_2 zT0q5jegDHa0aScko!St)?s2H-4Zobc{g^{Tp)$d!Q@8}DN{Bo1Jt1fs>-ABkt`q5~ zj&`YxGNY-^c;}-_B}JS>MHQ}^S}RQ_=Gu=22*Rh=OSLt2?7mZ{(lz@u%~l@B(q_2t`f{m|0f=Dkw=zCLS3}9C*<=7vljW061)&dR^XlxC*0s9T+I8-|X$Y`34k%k(8hl#rTr+axPGocEYoxc?K z842#_c^SS)Ae%W}eHO9NJzdA^;_s8szMiJGCh8QJ=!y*B+tWOK`9=0Bf*6bGg-+QA zzbw(QL}v=<@M*Xt$>FDft&fw%5uQ(J8|rqL#rlgD>jM*}$p{nvvNCB;{(?d67YD7< z^YuQPfq#ZI-mQF}6}S^j272wO(=r9#AvYK!louDajT4(TBrM6tPikl*)=zOBjE1QuSji(&yq|1C6P_-fB?tLl{Tz| z(1Jhjlg&FtG7kru&?w!CM*-?yDeh%&RjED)I0S)c2rqQYLMFVjO%l zkzqI6%;~BoiY*fQ@Af5rZjnzn8gs-Pz%5TsiUWK1OTaHaSG2Aa>2~h(=?A_PNoBg}jBL0^0fbI?JNPH19jrT0Lr= z&91xWS}HK;Y&{9O9q>^bZV^I_xW+2VahJ>IjJKs^mxWX2n&Ghv<3Y- z!mnq5k7v4KhKE?soeKk}X7vkNpxF$IR}jT$2<;eHCe*SFjoOs%n!vH?n}$&Nx=&I% zG^bV;Bd1eY=Wa}~NLcaS=0?U4*RpPU$!xRXG+8|e5lqNEXhWjukHa{^2R72XPxNAidXy>b_@-MC_>lA@m zCpNu3TqT|71?y()$+h_L%fW3 z;(RR4ppU_i?I{EYmg08=DZh)k&5L+;fY_3Vstxl9>F*_?Ge)2Fti}i^6(oS4hJ*#F zCvUjqj`bSQ=sH5nD_OKHI~Ygvt26aTKuYhiZ1`VTtc>k2W=mwlW54sHosl)9mdbiE zGttlGud2anA1Df$J);@c{X zwo@H@E-F;?Cc)t$sX8s|2|bJK9A^DSu`DvC&mVY)Si}97UrOH(^5?A9+k8`_QTDL1 zoRTvslgqi7S|sR|^%s~MOaqqaq77x$%@J!EFDTV=_a3zsWnP+WDm?k3xCws++sLR? znBmSuA8d=+nkTNK#E2>ziIXF|*^ekbnAy+7ekJ5qwgm(L?oDg`IBj{Q=q zSu0q}piS;5277dA2Vy?Br4za|oY{L|^u%>wx;mMf?Qbw-`X|c?F0wvJUICCW;BUjx z@30gDAJ#8>h(+)IzMLd~A!o`s^ftk?_k^%~V4;F0OK$l6F|doy8&1{QDwqWpQeysO#Dx7XAMY%r`)qug-BA z0|1ZZkD&O4J&r~#(e2H(Zz41ki$JJTZG#zLEn!69vz4f^R&vE;~v~h$BT7Jr%Dp z1kdoxWdc){hAE|;xPct3_j%6~?bs1LDsUczg-API*Azp-hM9$EW8=`+haK{q$ZM!EvFE`Z1;62%&2JraC-Ozb5Wy#my{*w!td%>`l zX$R9!PnHd}V2wC;6lW6%!q1&EUX*~2T7USV=n|vTKxrr$G9JW6A+~q_{6}$yMN8HWqO6* zS*j~5b8Fu|j8u43rKlbXgftru}JerXK2k5%6kT~28?3vV6w?}Ssjs>=PYw8EE z>1=B~8F1GuenGCkEk@c~m_J^O_K@xAcp6WBuSv>}{zmUBw_y`JW@*(gfy*lES`p0r zG--k5nHK_pSoY53yRHe%$O#=QG D4m)qq literal 0 HcmV?d00001 diff --git a/doc/source/_static/option_unicode01.png b/doc/source/_static/option_unicode01.png new file mode 100644 index 0000000000000000000000000000000000000000..d7168de126c5bf4526366c26ba0300f37c902a3e GIT binary patch literal 10736 zcmb8VWl&vB69x#uF76V7yF+llxO;H7iv)Lfw-DUj-Q6v?dvJGm-@M7UTeZ8jKep=B zkvcPTdggRL-Tm~0DaeTxn`i=WWoSEfi zzk{PI!;R&}**TFSZWsmW!vwP5up|ujZ6i|P#3?Bpio|&2HLaTz3vehy(Wz3gQRTCO ziOat6A>O1t#&zXzB^;QV)<6R?4m9h4?mPj`K@xjUTRSX_IcY4Kw5Fsj-ElGvcx!xg zxQ*(Hq5%+SVO~CLAc!&N79$EYAE#uK6J@%^XaQ(z^qnboTMMy^^$d<)O2O6@_lI3I zoTUo2xs3Qd&Hgg1))**sd0djSHs$cRV8?5`txGHlN=L8^_NSH{rXg3>bMF-^ENM=rsF*J`>Y zPdkHVEqA(45)+0phRWA;m^&3m8FL+{r?a*wK72g_v&p$K-|R|N-{kwoT@}Ihwt?95 z9n+`VX_8J4yBC|<=Gt6ac)=??mT2RVJ?o)7D9jS(Sk9;%0+XL}yXCFxsvzDww^z1xFtb+}o|$s3y5o)odSO1M>W|d)+gmkV2gfzj{vwUCzjlGlz`MEJ#hL0x z){jP%uK<$Zaj^NbS$boIAC(gTw)`Nj5Hb#>(Fa(dr^Aj&u8kzrJR2{xu)F;*?T4&~ zYi_#gO=-@`;+dCrH$F!glPXfcC zgw7YMIqVLXac4iXH=EowZ+PhMMc(_cCUb{H%tG$87>y?dgm6)(Iz2k%qo%TrPpVr; z9(r@~x@f{ehSg}`oSJWbK$#Q|S?!{CJJ3$O`cd{`+JWF%JmaJlQQR2spk-#gb9ii6 zwnYYntBcxJBe}dHqikuBJX<235h>&^M?Fc>;R0rd zuc0@H2-->0slV^ipk3;OEZpE~JSDu7;?mGik%*a@xw)CqNE}GNRqj$~wAnx|$yZ%B z8dr71x^zS2IB_j{hvEE&@QPRIU1psb3YP6))J#V#RTEyvQs87FqoX6UHMvFQO2+l9 zJWoC%^hw!O`N;|TZYW4fsX^U@9+3@3FwNooX96?=EME)^6$1D4Y z{d~6YpKy*#HpmW?=%oH>GVV^I{W$?D<8c|g4)I8F*(Uk11rxD@#u8v_cW@W8;z^Aj zKX85H>!j(Khu0k73)!QSo~V-GG#6200XA5`cbc8wv8e3TkL(7TM+&@N&~y!BFtW)_ zJKIMZ{H0pg*_W>oG0Uc1pO?8ebRopFaJsg{@N?5pjC@0iYGM@KeyuwBN23b|OQzC) z9>*k$QyfL2xa+Vr=sVRqO<(L)=t+wKh z&>|S0yuU?z>?<}^?pZziQe5_d*_qK*^=Rw%{@?=|E@!ggpY~{o1BJ&eWN;5AXe^xZJ z)OI1FL9XEJ;%H%&q~pbtL_phseGDM8)@-*S%atbMu6lm52nrUZ97{{_cp{8ya+58s z?%ebA!4;Lh&u-#$S=ZzOMh^Gjbt2SholP;X#J*gYfRK>-HuWkm~gD1Zduqaz< zaaJ?^4V&;vPaEfBA`oui)99R4tqjHRO52>r&$}9$Emz97lK^We#Qr2CybfLYJ6PP< zJi3Y}#*#Nu=F5b0)&<%vnDHDo_~UT<%FCZ2o2rWM1+QKGsdLapA_c(8<+QlFH9Fj= zeLl*o#PsM!I<*y-K(U~9{pw4}!N&&2#=I@VRq3#!yAcHjGLJy8tVK}lGu%4+dK#l@wY ztDGB_IlBYZPRIbj<->!IX)oehI}kb=C8{R44&xwdO1 za0&-0+66Rdsuy|8VK)KOvGGhs#qM$9Xb8%NYw}*ZRJ>%wbUDWWO=LU_5DFd5D^4J- zNI&%+-}a9{lc!Wp;5*2lz}L-YhvjydR@o)g>Z_~56qgW=J_Jk!QPK=KOKZo@y?w@C z5?)6(Gxk_G!WJkft4pm|PbMx_g7?2$_+mVBGHuiqytnxY2xESiFg|2dO!`%p*$KDK zX>ja$kEin6X>$2F2au)dTo7&r(KvN7`EEg|tO3A4Fp;_W^hxyd^VHEvotm4^?U7(O z)reCl4K{8pOWAtxZsg${+(6{UPdt$Mh)+Z@6;2Ly&0txxN=c5eK0P%Z5UckdYTuif znAk68a3l!BUR1gN8=c88IsM~=qxq0%uG7#%5V88+fVJ%5J)8GjrVOAXoHk*fNEOz%pNaD3g zEVn+uxhPxcHAnjFWeVJctV{V@GkhBZLk$z*?CxD9nDJn7cUT;Hes_6OMJL>w}%of@__YJoHfTq7pu6a!=OC(-3n4omp=E8w!4sgS<}QPqB)mUzi=ReV4@HSY%_6cACTz zM>E7JO%kOB1sk+vhQupoN|qxujWJxBnmOtK+P#zMpWTG1ak04>K7CHmV)U;wfR>91 zc1sUT$3$F`;#OGKKpFL(>6#i;1L4!9l`rH4Q>t-NxK$Lz;FR^#3oQ`m)V6&11}qfs zV~o@5jDa6Q62$({7-;-W?YlEVQf*c-D#D95Ki2j7b`lQucGY0D9G{_vCB#idoh^KV z^WerYc|!`0&q+jLbR5HiecSRO(%I1(=I4c*^3xd4@YK>6I&&h^Y*tRZ6a1VtTS0g*4O9F znf$u5vl->zF>IptDQs364~7V)szpMAjX`HigX{YP*k|QKA@i~i1$DEurtU4Z?>FO* zo$gZLJu|BrdAh7Fk?(x=t0zoQSR0*iH>=OJ@F7SU!=i_qPhUcNss$~z&{L(fQkpqe z1FU!IT&)*a4f{j71(=PPgCqC2Y0)TTD)7+_iTk+p@0RO6OWsh(z1j}vZtB`Zxy=XX z5G5_Z>rjpZ4Da-z#bj)4_%rNOZ=XYE73_X{%c-lYukDR$&s&P5)eXH`qt{ zki0ajP4B*pog21snbxOWUg+e08cFug3{1*D5O;@kEC#Y=kHp0|d1= z9A7V?tDWN{=M6jyO3FGhU*3?9YR&q(KS9+`*Md-VM zYyMAZ;J(5&R;=iYXJbbm%4j=UMCCn$Nu2l1Ywzg zXv~sVh{@dYT+24@#q_s@-^(XmqfIrawI()!utYT--#*>+apO4A$<9UX8~hr+t*?Kh z^gah`$P5f>evLK`Yw!qky)Q({=Mjy8c*qELXDVKFdI@ShjT`ta zs?zhrFEyfUctK*n8yf9CoVLe&*1&>d?RbDg@!kg1o{Bzf7y5R@n$UFoe#OV*QKLsn z4;H&FugBN2QLGWdi99HVF9UntK(_r|UF^s({w=eQqd`8qne$<@rz!72kH39R#X&?6 zFc$Eh&AzcZ6Svj*D{_+A5=mI4!Yof2oA|q=4fR3AxAKxaG*OA!R5P~3oKIb3ICKrnxO&7 zm9%{?G&j8W$53&qd$_&1xpA&zV`CF7TKR4J0+%V#9I=)%)4cm3@z3-Yv;kyl4B0UFZIMubhn95cWV zcgwD$HAZeCJG)0or2 z0>ztX66IufBJS_@xX+UrrE!&J7v~$8$bKrKn$eEF6G+}Sb84!<5L#(DToStAeEedxJ<6ASCW~f@UT}jD@fLEo}o*b5AU%AmzE;|YOU2USA83`_n5D;-QV8rRF)QfQZn%n< z)WXP=CrB@4U~rc}ng|JfY*Nm_(gy7@d+5#DkmD0>Je5_%z7V3tlNVJhym-mu(iR$< zVt{p)4{u9S3>EFR{fcETi5W7@h6~K`YU*#AB)%&mzp1)&8Imhc(e-O`w!NpRd==y` zp>Sp)rd>sWC+1Q14wq+R15!t?@nF5X0uf5jC67!jl~>M< zv$iyt>ZVT!*yQfg^-%mFsgxD$a5ad9czzclWW6Z{ivua{f+0jrC{g21&% z0tyXMl2C8oa`jKdB~lxKyAt?5<@i=izXcI1dI4MaoC2TR)*4i}%dm4U;9GF#wn*mj zG{^!{K8bPb=C|d6{6GUS<9WuRB`h5wTlh%~lm>4nMS>WrM0cGMA8z;~?{!a5x;8M1 zo${>Spnyzsl38SvGr-`eZ7hQ3M6udDQe{(2&J9!Tim63sLd51ad^J9?QJ6HpfL!E< zs=6*eHm;eVOjhT$cqmi3P(T_EbXtF3j#Hy~C+I8}t}!``f4rSH~0JzLbD~tS{I;1K$J8YUZY{lZi;;mE@JRk0j5+IccjL z+Znd-ETSR&g_H0U9D!fJQvC@+_H^pN_IlT^?+G*07@Axu6Jij%{&La}H3jVrp6k7v zpH`yM{H_38=mn{@2(qjQ4vimDDk;wV0iI8r2W1{h@hqjj|MBrLi3 zEl-s_WIiRksh}-}u*bDud=zQiNa7ZlaWJ%Fc@RkE@kp}g-Wa3(jqwh?+B=z$_Ovn& zA6%ho&3=o1hnYJ|^E)GuIZ`s-sz!(F8Fo`AKmfV1s*2Il-P|KO{QbJ(AXYldNrOX+ z@+h8`WC?j5H}|{W>l?gy4+UXybCtO7aYfmW`4+x9u?h3|6$*@2z7s?!_6!&K`A0s< zawA1Av|)VFvZ99g0w36KO~}1Q4Ac)^u+sNWZjcw_(>!Gg7Bel3<9(YOF@kVKw9{8t zQuwgc`$4GPfL9~bNAZ*kAG#5ByGSrGFw&_lK(R}vBd5g}3aRg(2KtcP$lxREd2%SZ zPH_4~nVd_o$3}@b;C~u#UPK5SOmu#%@>=hJ2sB>Om#Cd9%T=aX#5_w6qDHzHh16O8 zj(OQ6lI>u+QTh$6ZL`0pi@aT(`ro%S0L3O7!Rd^}(s_FHXWzhh4*Lb)dTkm0$l;6n zs78{_ym1?{*w{RbTvn=#_r*fu2f^58bHOrF-{`u!)-WG5B~q^eCCPb#Q<;mM!t{B5 z)R#qyed-@%^WU&c4Ir65{gKl*2T;)pkx3*`SriGI z$j&Tpa^BlSS(J)dI3@h)>hfIa|NRU7KLX493Q%^iGGfj}BqxseZNlF6{t{G$m(HwID2CAgb=HaQ-stv>fcn(n(SCwzCQ6bnyE1~A9`*P2 zI}(7`^Z?fL#Hb5O`${`EX!DxPY%l0SO4?4WU9B3&J>u$`u|lWkSG5R zBk}d1e{$fG{%YTZUWk-b@~=OgD5R`CC`U=efykS6<6vhYA`sK?nN`@3L=_K=xX zPu3Ff4oH&L`3tS`k6US9DE;|Fl?0s-qkDd4YG04|%o1c$Rect!dL41=-}1e1K>u_^ zB8dz`spVg}MilDOq(0;IN|(j2neW1A{UM8`rix)rOqFT0A0yL(K-gs_C|Zi}*bgcW zNudcN{imZXq?fDdl+1P@Pl=7s!(L4w%n@LedM567BwJ4|Hyrc0CvqSc4-XHw%x~qn ztN}x>t>dW`w|-}=llSSTN+hI-%8533Ow-S}pA1(MLHw%y4|5e2dY;q2aY@$7y%H%Y z1vHWeO|8AvWg8fghn(O~gb-xc{jVEvoA$LJ^qK zfsT5Q`bn9m-Nw-9v2^Y9mM&{Q^T*fvUMNyL2|*JA!nIKc-fz#oGPqiZ2)BfqdOD+y zwOt-0))##)KtgL!vy^P;*Y3XArJ-V8qjT<_!7$=Jvhl>oeYn`ZVh~K+vu<;>!9D%` zND|qrUsira7rXKEuO>#z^`;Cq(8e-HVdq3Gt68BL)XS=x|XlwxsI5LIcHtfssVib+!WX_i#n95cAqk=AyeCZIf8+ zyYCp>bEaYtA(uUc@n?uz?sD%g4!EFVUFDE&=LeX5D0`F_9^ZDdUS9;Y}nG~yXIX; zgvFc=p_;{)7Q=7FJ}>Xoq;3Os5FK)bX?fh)Z~pCvqy+m=NuOwwgcb@vuuBd6L%+9}^T8gm?^7;fSWVkFbjdh{4m~C<$2BY91w&|wCYFn;iZuB)c3tb8Xy0opC`DH zm(3jwzsACwmJG--=Tn_jRI~_&9HhY^gI3u47lw5L!9b-eqG)XW5~=UUe*E0Op5}yv zT$4T{Y^@)i+RsCXa#hGE@VZ-X#~ zkR~OqaoQVh=pViEgH1?~S<$LtTpA?c>FC#@U?>(moSs)gw{WaF{JOQ>1OC?(U&m2g zJ8g8(Y=$FH;+9K|KJjjEs>Fm34E2k!%}5L=wQ_^Nvi+akgA3NZR+KnxM9B%{S}>WC z6Co|ky*h@J>^9HPBUl7TW&Ho@Q6{F}fn{fwikCLQzu~n|U3EJzd}!C3*Z=vq0F>_i zwgBIL%fF6AV38!-s1kxrr1SDmG^-M7(_ zfwmg6%~#-_M1f1s6oAn3mn+Mi5*~QOtpM}CGIRrxz#E>tZOzMkATRKm3R;dgdkSo9 zI{MkFnVId}+&%e$)OjM-Y%sEVjB#OT&}CesPwI5oVTr)u&wjxY*zE7~*?6X8hCLNx zIWjYWwF8$6`vXgMw3CbxS$pOHLYaVnF3}E99CaEM2fPo{9&Rki*l55iwR>*gW}0^{ zQ`Sq)2>uB;P1{B|l0~0S`a^U5ai#HPD|B~+ko;nAEN+elYj#LVnnO*b; zNBqiSaal(5QC8kJGl?9e{UI%*TjNA{|7P_;{)y&>{?W+i{j`Q)yZ{JU=}OLKcb>qP zbc|SL_a38Sv_1KXcGmGO9z-ZyhSz7HeEqI|GM9f61y_fVC7@+};56o;h8V4unZz;` zPh2)iFrLaxQdS<5#+bolZ(1S|5#}d}+n1d7wl&J1<(cLH9ZUitZNU652#XdLxMM)b zdD?jfso| z0aUuH;Jos{sP;~;alblzkCy_-2=9O;#qzYWw~YUrVkO)_-Vy$SK0{B0a>2$f|Iy5A zQC~)!Ks_Yk$4HMs@VlD$3C{EL^~+e9F*RpnevY8;?qGMReJTO9{TOsRKkge!6Nfxe z0^ITU6UZ3?6GjO|6%G%1AP{K7p|$jG80rX4oG zM^51kxngXpjh-TVaeu;gdT1XP4N_#vf7at5>f3tP>VRq9-nilPzdaVe(P&EAymu9z zQ(jkua;l5+9`duh6?G>xp&*+W##384`Hknd1=M!dg?S0_?LQiKGaSqvbRT{LPe2f5 zia)R4t1rK4_V*e2OzK>xs#3(%=ODAk_%x|cvi&b2&xH7dTW6E3!|Uw7>D;p#%L^~8Er>YhijJ7N6y=+rx6X?LBBI8e^=I*2h-#5 z&5|C%A}p@R_sK{HbEL3?rXZZ}Z8_Em=!H<48ohgVdwzrTI#4#gsZ_eVyVz%{Ui{0m zH?ohu*Cl5xo^QH}Ac}*sNt+}6d?ud1I-?Kv>DZ0lGi!0uQG&g!Eei9lof^UwM`p$! zvftvY!gKlh|8G4y4RhCL@}Q5BWj-M%wh%6a z|J*S?c=!`Vih0VSX;St!ks#k%$7uqK8S~342kA2tiM3>mvh4+K?YX&Bz^wHB7(Ww@ zU3o?AX2!O5@A)^+eHBv|od<0X-sdx>to1eqor8@P_F<>h63uWwwM46vtox*EOrL9z z#x|I52rksQP1+to2tWN&-7}(zOU+uEVoK7lyrmr7vPz5RZ8o?q$bYFB8l>Zx%Yh^) zdw=_Tsb28QiTgjY{AC99|3#J?;xZsVL|-f~N4(+M)zd*9z1_~oy+oxth2Gd&&h^A^ z>PnMT27}@m#Fj6(JipOWetj+a!d!>|M`6PLT`g9`mXE#qqMcRnp&Q#8PgatTqBX7I zVzWf6ua}Y?qXN$m*Rvcz@y_{QGK}MQ5CV3Wj1?o|%*ZhTS5Xlokga><>q<)b`MX=S zh{QF`=x+o`&MZ|o_CzzEVlLRex&BYFg*Am{-eP8}TTCi%8-Gavw z$(t>uUW+`mU$nubJ?-C5EpuybE$J4<8=s^XE-(aqXCB^igj}+B7So+51(Qcsb%6vOmfMMdmF)CnHDTu;IXFkAO6z*7bLb~uLS9b>p-?pev9^A@oXcCC+1%) z{|{Q-jT2;E7ng;_tJZCpn+Q)?uIK$4tt_!yvfi!yf3jKEekjnO@*c!z$x-d6^S$oW zSk@#=rb7`T)GPDwu}0dZg~fJcO>zQv=NeH8Ulr`Rb>i#j-&<#2a^5Ss_aLuW>+Wj0 zHfCPLqLJ7p{&5vLz5+?97YxPnlwl}~i?Ar)I>g)K`t=+CI)ZL}$m%C{92%K_Y{I3YN@I4th20fGkz?hZi~cXwS}7bmz&aCb{^hhUH7zWFgd z^UZWu*QwK0r#nnZ0fd1{f(iozgCQ*?t^xxC`}_4-0vYc0j>^HY_1eHXs(?gcD#zdb zf`K9TlNJ|IbAvt3^cGfCfB7-tB&Os}$SEb#7ZhX}E}hCZ8Eyb?SOz+)z%`Wh zR900{m3#B?9AieDVa5v-J4}gSDkg$}fHzEKmu?%#awBlN*1mGQHuOW}>d^CfVH|jy zaXPY3J`T)SeE_~|(23Of$N*v5;bx7Rt0Hy>*(NILzhtRW(5g$3OvY~8d)}bwghVOduejb}PcT%!mZ7gJS1ezy2azo=6Y^a($g&L{VtdpKIN8R!y$LU=e-OzS%b+ zb#D<($M`1l}0~0*>9;xGa?+jr8{iNY+1^b zs$33|Z;UYt~3@t^v87ub}W^8+Noy9UW+cdtUR4gOWR{*&3vX@zcVY7Bu8Sgc|s}#Q>;A| zk09+-es1@ZeZI$5n%lZUw537)m&PH2smNS+8MekRS;wf{iqER_ zLzWha(6|=!BbcnBY}Ua{$oEOEEP_`(kkb)@&JwjH-n5cGJI17^HC7EA<}cK~6#FK$ zwXOi7B49Hx!^Q}%Wf4mKaP~8OmNAXrQO07nLEC+^i|gUjG*qq+>it4|emhuNTl)wt znS^!YXaD?+4Gv^-n16 zLXq_P78YCF)O&y$=MgEC;(Xv!h}#lz&t8>Nqjb@v%vYX8(qMe?ajH=AwozPsUA*Lj z`w(2fanJFkiQQh^)ra?jv`myy}fQ_b!BdoZHc zrV>gNp%lP(UHe;1O`1Vft@esP98%`ttSLC{P5NbCq6PUbu3oM=rPU3_{q){uUF#lD}XFO{zsC-RQpB_wwFd>H;#>!Ms^p3~1K;~FLXaP>P71!Eb6ad>EyCHCVzJJ8fXqzE|5FV~sk zm7*CV!4Zh1iU}@`(2x^E*PlJaUNY2+<6Rj>sqm)k;)saJ0^@gXKq$+XGoS zPr1R^fuD#V4ZfDHkCq7-9VlFbRfyCgCwLF_DC-zbT@@A1L>*P9B&SEMVQQ7J zb2+R7==%jvYn1dS!OXY#Ud|5OZMnpAvZ3Hg!Ea8nNxJ|TO?%&KSg08fCvWdPN#$|G zjPCSYb))M$wMXtmez)^#E-qZ@vNY44RrL0q(Q6j9RxqS@7yc%8t zge+TEwEQWqfBB+FbkdIoP=$yCf?kNe1J+wX*&@pvb==XvgAa#IQFlg^t#oUA50k3w z5t&RDOhhpoICFI`g$JcD_qLZOf_W#71+L_c$;3*_dnt~rMM$RQ?rd|weXiW;T)5}J zD7jjsLhmA^CER;Z;XLUXArkP@{!Uzb?%^$cuUe2d;Z8Rd6NxW!FxH@YF+T)f(B_En zXZH`$hT5n={tJt*p^V?Xv;d~90pemF#xq9e;6KAs;ThT2nH^n-U88|@W8>!FDu zG}Vb&>B3+2K%3649)Dm)W!1e=*aK>y5eD6*sCNI3Y$=lTe~VDe?ene1=Vn=O6{(X- z+w-VMG`7xr4+*q8mu4iKSm%44w%U8asc>;oM(99+6eND6nmjH-tofYaQtv4lEVI;n zjfIMOaqCi3L*>I=b2xXa{5(FGmLr^v{Uj zd&&)UiFvP@FsR@dYwv!$z@VtDx5pGGW*|1z*=1g*^P;z9;@6j3uc0g&Yr=*?N`Gou ztS@(dUh_8&-r7p4c1}T;o&~zyz}3@wt;HU^AX+2oz8TjFW#*GDSJ8_|m8z$YU+zoaoPmc47` z>qi>_f7g~QePzxM*0YhqC@In*i~WbVrkEeImYNn!uzLv>y3-w{*8)dm2FyLyi+6H? z1?*E$Agevl)UwPu7zx!1Mfvwd6T3ry z7~!y^Ri8w6#=Bg)O+8o*`g^K0t=<94v&YlwZ+~lE0ZLqN^>~vyVHXb#yb0eOg)X7t z?}R@WEj0;*cS6RGY~6~ZyLKwnxr0HcPOkuK+w@n`e#iC|UJyMZGI6 zzsLaPWZ5hm!`}ETTko5gbYD5Pdbbw+JdljpM%#0e*iE zLDSAtkvgyQ9E>%H{2FP; zn%j9`p*zYQp-Qatp4_d@954-!FB4tUzS-Z4M>$1stg~m*AM58}WauCQkm??T#n1)6W^q(f_KA@O#kWxpNEtdH$M*qgx|>bzJAA+`-?7w5(k>vH&PFM3a!1 zOFexrYOV$H&X#^QO=TfwY(#|xuS;3g&cTb<04_hE_EsuaSGoHrNB5>%onfQSW_xmA~DU61`{^CUQSqK{j zPIx0*SomdUNT^Gv8V4B$mJ$Y@6W&6IwGjp{f6xlQFK^vi8MJS^Mt+9ngSLSfD-47D zn2ub3bfinDjq!dgf_VRTpCK{OeStotG=}blZhV93@2Eez;dNYYD!}DM8v2IhsU8Id zKIKRg-Q<3}uLk^nbjJDF-q^7g5q9-#%-hMN42owr4A~J#tFGBgsE$!(x+reOY6dzTlbwHjXQ-4b?qGlON8K|> zs-DMax~Sot&(v+6Nb2txu#{&`@CVW0bUect?GRSjeeL&UTqT!b8(#G>&XN@y*#G&R zCN9kLKxYd_5XL)5qK&aB%^P)$IDJywIxlMdch-JUem-~H^c|FSQ3^4vJtW%lt^^$a zjsF=3u7zH)pcHJZC)lp$XcWT=mq1L*%-w{Enf+Rvmd((J4V6?1X9h}wLKL*zL~#CM zl%ox=b5Q7hGiWtdi%kLx`_~2xnm++ahc@Sbf50GX#rXLgF9(Q~?EZ7`MN3V)i*HmXa6#-@IVqD)_wSX+Un@PfH3t zS8bkeU3qYp(>KRvWJVxvfZ1QcZv>@$oFFAf2q&4eSv?(C_jF9G2~3(UPPgne_{93; z&Aiq9J}Fr*5Yq_F1{ZtJGWL=pdn zZ4wcserhXB$DW?MZD5BC*jT)u_!d7g0iRYLI>7tPGHxiTZ6XmBr*=l`{iQ00rnRF+qtdRXYV)q})zNalXsmk@*lj;6V~yWg)WTE#1OQ(*Y@ zV1Ln1$|kpRwKQQ!u`op8y74%P!gfhk+8Tb(m`nGMYtAV3{YT3+$+soH4ujOyHC>)= zFL8jNiIkb^+310RFJIFKiv>JSi>m}tA8PKNy?+j+)(O7I7qtj>PkWo^ifg3d`WAmT zsQU>R=&}y_(dl`+w%U4C^ZWPDR!zHXe=)#2-V86U*+v70my`U?-+ZP#cIzWT@f?RG zk473R0fozFz`bq7n#GS+ z*fR~46$-+wLZ zQCA(V;-lC0I&GOu)rk&QU1sKO%VQ^dcz6&NFf&7MHH_10K%MN}VOGb}z1RAh+lmjQ zAm$t(rjaR>oy2Z^b$~_aLo#`?Y3!-5;_@yR+F&aGd@>o##0v0is)mqj<7Wcw=b|Rq%GPmH?}qaRtkxB@ zGGsK~FMTr6NA#)vrayO?UMx=zoi^99cWctrbYh~~lpGu&e(50cL)aX-FNvUrQHowa6TFd0gVxiwZPfTv|9Q z=ab5-GS#T;fKak2+L{DD<`YmhKQ_}|?l^a9S`u_LyRc%q95e8thV1`wI;QCf4@FWB ztwoe3%kt`BOE$CrF7r2SdBf|NpZ8LIpjp5ec+hZy3})L5{G@HwCOj0d&Kw9d%l2Wz zIQNufQ0K{M>1^_jpBdIvzyGz*YFUzR6!b6Z~p?E9(W@hR!_S5!&#EVp=;Ws;aqzRd%O85CLufyNrhkt_6$xp29 z&ejSO6T9!ne>L=uoafrRo-DMifZh99_7)a{pRM{DID*Ix+3H;+vBZ6GWC8z=dzP?u zhqH7WEgPt@k&ck=ES%RsU76wS;F6B3p)!M--cu~GaN+DM05rGXsLs06UEYB8u^;m9SB`9&o2P&wlX8fsUi!I(%KH^-j`@Pp{0&hUlmaoa}3=dQ z29g}24)rgO7^^@Ste8JEYyhHOtPD=?wNm4!hlJtqvY)wRrJ5d?1h~7#%2prT0iK>_ zBQ|FJRNF=kcnj-eYky=4wmi={9>3$z{H@!JBZIWgVtNiK$M4SW{h~WZ0U>su2@=rr{$&ytap7AR zbgGxC-|I=8{7=5xiKjVt_e+xt^6KRpt92y1MZ#PnIc_8*>lSe2F zgF8%9xHRF~8kS@a0t~SG=l(M7tNS*F8Jt_YlRWzGZxS)=TL>-#oo74-%u*{p{INk2 z^%Rw)a1o>xyA~q+dQ`a1qf(3sE=p46q;hGF zOYNBoHGdh+1d9F=^?l4LEN_H>usq9G)vG@VjOP|de`a_&fqYg>GE;(we+9xq7TyU= zUo!t0mYdnuUdywuK|MCPIO@tOD|3Dd_uLlUz0}aVVH4fIO^YwOR-SQITdEW-5S0xlEKONee)qMMOlN*I4iTQ}z~nzV43SmW?z8 zCx1T+A9Ee4Y9pV-Q|u{M64;NKl zcE%r!q$iZEtyHb1jmkjAPU#7GA8aAim^5Kjy?5YSI6ADMICknK$_4>;!OFMf*h;@~ zmSV)DnqnJubp0^j&TQd=3lL2iI;xc7S|zcw=8&Pr0@BHeZu`cb^(AVSIO>%DSyh3L zFv(I&PR4CTY2~up6z5ss76{Cj8N!+9*B~r_)Ua!g%@#mDu*E-RW^vDdvq1WbASDVB z*18|Ue}ecK2mZ=>{RP>vOX_csVX$+Mf1JsSQzer~nE0>3{zLo{o|)5&>DyiD7OE_I zz{OPd?Yr?@N7W?qY?9cS0@u(p4RrhnHI~^92F2v4g7rTFSIZ8IIp~=GXls)N^CWV( zN2=?&9W$TPjD-5-)Kf@0r*>-(tCV5i!8dv6OJ>VMG%*FgaZa)Fn_ulmrFaCr->dYn zf6wL~qCHU*|MAg2>kr-`X2Sw-Ub}yiZ%ev+-nci9V8jY*PG~rz%@^IOMuTQ^HOqb+ z;rsAUDc38*A=cS7Y#Dg<3eQW2eJLoV(~2_yV!b|T$cC*-1j-eT!6e5qD9#7*yai2Xz7`M93W{bt+derZ7S#Co% z84hQkm?EdX)tz{!uV6iPn1uV&VAX8s>7g?KDl1s$=j83!{@)I(DXQK&htdnCj*d!5PNCC_BPm zfv{|VSt_?aki>-^c^;_b+@eZ(oQ;?gw{1$rRYk|63MxG>4Y>EQvl_%Ej$+gdx zjH9|QPMe4rMW2lW)c_*RG^F=qXuI3B%Y>M`Ibl!r%1<7M@C?fCEH)eaP3ZbHCk@Sk z8Lrr{1J*7QULI{tziJR?GYc(b{gR85u@nq|?cdAALf!h}lV%{EWk-nQ$k7?})g*ZF zL=3!-Evhq_Nnlx7RSfOFtK!ErUJk~30((Byf*gj-%#dqNx(lK_SIb>rwQbv=Cu2dJ zV3U7_z~Z-W5wYIWn9-rW(-KTKK0X8=Pp0QaWyepO7k!Vz4(6%hT26*K7~HG9Bj$C@ zhb`Zp__<;|o!*{zb>Wighq%j-u zu3?tEspMG;(AK&QT52g~1xN@(S^v$uk6h|^2V;dG+#Y_mh&J+AjV2h=w*6-bKJC88 zXqS>V$3>rQ<|R7i3KG^LP>w$`X)g(0rRsw4;-iLMAuC!uMkn!%R!`Hd)`A58y+B%#mEtC!&eKu_4LU*aVvl3A+WI)-py418xd ziUP=)PtGWMyZP}-c>ImbQFf~J*=8v&F{Vl=LeNBZ^sW=eB0asrwo)igOPbmfy{`c{ zT~x>DEcaGQTbYp#0Z(PezNaFnrH7bO}Dkh|wl==Zh$dPeC%`zSz9Y_3EFczNnFAx16sBX*>}cAjDLAVTj~ z1bkR{S=<4$Wh|20%6=!@ci2r`HnH<)69O)Pe=3lS)GQM@{vtml1x73>y>j`rg1(1m z=JtZ(_@)_s&ubA4|9U+U$|DBP`A=xp^Z!+nWPBg~ZTYnV{DQLpH9-uNZ~iM6qx5IK Y$DcqLuwP)21*J>r6i`yS8jUXnrDG+9MY@p& z>Ab*q?{nWj-uFE3nKSd7-)N=QIM00008)xb)x0068%w`&y;_U+p`?n3kS zgXQ{4RRK``lkRqh;)|M+ysj74c6uO`T)!6Ol*l=({E0yCeT1=l1m&fg{>AxZOx8EO zx)8mSrE<$}SGuL=8Rip%&rG(-x%E2AZLWeF?Y`MvY3tTJZ%lgDY=TQ!oTWf16jVu* z49JcTu1xW?ews2p@X{QlCgi+Iy!tc6bx{07GC@#~BOxLLX$yQv0>G97fa)o+hznF8 zda~mse*RMG8Vt!4i4!@FE$6@orxMc@sO0wTrVeuxwP2&kz&(iRw2Q3|)ChWAS$h#t z(Ym-Bic{zdH4JaQ^iQcRQ;v?#a?&xeT9^rc@Va-8pU0A=C&O>`#j>H zZ!<0)A2R7nLTvcLE<3j7!}~P5*AfaiBUAKA&8`1(wl&W-okZZ^oqmOBnOFrI&B=Pn zxpLGAbw)qyJem1v%-b1S+E=`|WnG>c_9R^O(JpgYz`?htbczmP0FyiGh;R zD;jDK_k&ms>0u)WSB>R$(8a{Eghb%1n zOa+VI9eW&qEwFA62~Xnlb(m5Rj{l=tqi-~1(L1N9`RU_F6yD<1hdME#qtGI+QJhdD zRqe(Q_`^2n)!MA5=U<||t&kW{PO9V8?x&@qjV@cPEq`KG*S#MXWSD&I8CRPl;q7=1 z@uu)7M)-mK8r})X`n-C3`@@^7rY_HQfz@ze+^6j8YxGU&%ciS>xkKdF*aCgcxUeg1 z#Xp|29}2jqea|kN1umJ38siYI9mJV2!c$kBS)!Sn=PKsaqEBIN1IMS|$j%-(NM0T( zSZ;YtE)W{gXw>BLT{*&TxX4F#17B3hu4%C`xAM95|z!2#^x zTA8%C%yuJ!h=Z=L4bG_LH`KvuSl#{;n^9$2}dI*H(RAKTZHT-FX z$8f@H+5o$C8=sBx)FN)Y(C0CqKk=)mXB%ApA}r125@QIK>Q(;lSyur7hLx@(w$Y zD2@FhIKFFq|DN#U&+IBMXEF;uvs!E;ma1T^vnW+>N(jDc2Gzyz0!vsi6N63#vMb`T z7puz%0ek%H5h3#WKp$H&W2R~R?N+4(aX!m?N$;2Dzt;*rHOL*Ycdw~zOHaqYHKp(@ z#TBTNGDuFSX!2WJOtkPv^hl|o&lfGBJ>ig7+v;TSDvvoKS?J@|nA9M=3)9PBva(>x zzPK;G4}cZR>Vk|w(#GcNC)8=<`lEg6AodY3qjAyl?MYr$8MerDR*mDsW} zYp-Wyjt$p`e%70QPa5IeI z@FYRQ`T^@+s4DZg1^apKFsoutt0PiFndRS(EDpaX|L_;ap6049DPAggF6BgNhjpk( zQ4zfA7CO3Ox=~1*pH_}M0vfmG=83(U>)tGFwQD}OFr(U+JNn^5_-i{d)iKN4>A|j?zK-j4sj^*! zp~im0uU#$koDgpH(^v1_A5?w7JK2#o{2r+s-qaSj#o#F2Ba@n2PJQ^Jf%bW6UwLD- zbyviGvR%Z<5)}HKDp^)-vPslRT4R!2r~FO-Bf7=6ofB;3wXH9fg3ZM)ktqF=1X8Du zNHrxmrWhfseHv1rP)#rpxeP9S*)vtxJG1AJ!{zx`wOeIk!~L|~Nw0wl(qXdQHv@mN z?+xC1Z&Iy%pwdWYCQM!Mnf3_zu$fM6Glbv1DdV!e6qx1^K-vtKEpq0b?#6=EomFC> ze}%~hNQ(T-Qm$wH{3iV7M?0z~7^#uZP@8X zLXLeWlrrPwm|tk!bnp1se9Yw=#e(Gia0QW(YQAcSE$oJ5DWi9oFvs2IF@_~U# zLLO0cuKeP8`-Oan>1+qa_H84t*YWxkd@TJkJc?G7mu*Nt{}r=d1;5)qrjwvwEUw@t z2&LAR`X1vwn8t-jf3GtiK^v-tn4}ts9kG=U>z~d%d`R0M`ga=HDtN(nhOi`<5^;nS|p>IKxFzoEMZf_@qYV*}l#PxX zF4||*TsG*51BZ?sfK4D7&weLT3mm1Ru&&>3j6)?{2}6&P>OB)^bX1d4c6;2X+S_Fy zO)66L{r^?cavR(WUX#I|C|}FD71J)VWZaJouvBO|DI+(OXetFwLC)sC@ah4t&Ef{| zgqMDDv;T1FCEqsX<@?}dQ|+mfj@E`3>B7qTj+yA$2-X_Gy4R)czs5aRQ%&dJe0XQ~ z)EI;)oY0)=_L$Uz#@Ih4*Kzzsz1v;kYW|~cROZc07M2Wbky zUCV#nRFpc=s)Z?@0{d!|c4bUS;7)U{#Dx}~F4kJP4DD%e&f5}8#58Ew>gvo;t_b_3 za7HGt%M(4;x$NS#mh10~j*T2G>~GIRD|he2D%e2CDvLcKb{KY{^_mVn6fgOr#!G45 z>zrT_gJd+jpGDkj>oRts@V3$Tk6IO@8$4He?(ih|)Pc_vpxOIBgB_Zvi9Gb5WZ~72 zgQMj-AC>n5e6Q`{Wa%3PCDd~WpcBENt++`9bIG~|BIWO|U zp$@S-yyq`^mJ?#2;APQDygT5}$Uyh=2GAG0Ptv+9lFBjI8OEq^IZ%P$8;3dAM#X&+ zk6>4NOt!qsa3R>LlR{q2Tkzv>+|YkPbScD|v9A&027l)Yr10XnfNu~5e&sCgZ66(- z#MSbY!xC6bMi>3-uXf?{(uAJ}6SKseR4>j^{BO;VtS!>{!9iwb)x|^kh+6|1#5wJf z&Lz_`3o%rH`x(L+$i15aAY}pog8(GX)Yw1EWI<1%t{YMzI|_7*z`HS{COyb?w<+)v zcFvUSta`oP_$|FH1e0!#QJpe3x?B)JB*gHas z%~&dDW0+%G(kdwhCP@81&upRrfph4*qXN;GtPW*drCZYgEGF}aAa>9G6k)AW)>`{F zj2UsCydfznqO_8)zmeZ8IUdWCy@KjemDK0aDmg3 zo)fbenHOB$xL;7XcR%M-SmtLNXt|mm6g5ie$T2^@QxNnJRL|+T3E}I26P`S#5^>*1 zTT+(jCLE~to%k9_|CfS5xhEv?1!tyMG`GPmne&EE=RZxW>zk!d2SU-iZJdxBnDg)8 zi=!knqgv7^g-K!ev*nbV$Nsm?Vt^#|^-(c0>Or>TJt!VfJ&PwbvWyS(`*bB_|N0sS zCroUFHHzl8)48mmCL(-vkxAKd6VTAXlcgAY$4syYne*GH-zfjN${msa+m2!ExqGH^&<__7=FsMG~-N^8lA6~(C`S9~8pkZnf{$1vtn|&aYr(_0~!F^7N zr3UePF6~}%_F1&~x_KO$Gu|r@z>jT6?E|Nk>n-z1MUGe&_y-h!868We%cT8w;tYxR*2bedu4a6~971p( zO+}Jhx&-~v5EJ;KnY#%yBZJioE$Ho4=?6)E3_wf@D<{{P;a7siHuyzKeey0jxM z@o^<;ukLI%mz=LRZf_Ej@AofvuGx6#ITqnt#b3AJv+H~?nCOn<7!;%o z4t+QoE}Ix3;T%GUn}U-A2!vIco*D_CNck&l7x8EIo`%9(tckXSLg@4*g74 zgJCLPa{ynQ%)9nUnIzttbogt168h!ax!LBwd}qd5fa?DGoG&rPiZXL-=_ndxaMPKj zu|nn6To|`sxJEC7(`d*Z@Yfp7B8Ov09rWr6-cdL0cYT5 zk>K2i8!ixjGQ;c4HHfQcXk?+a$N?~`ZlyyQQjgz7cCd%~dVtwqI*_Fu#@KW zYSBc8_x=E|CP;&(wUV!@U>|)iin)l8S#02@%TqUq_ zt+T8ISqKT6DuL&~m$Z0Q9WtVpl9mYnq-^SYC)7cpIl&_Il!UYHlC)2eekz`vp1$wt zX)IrC1pBc#=@cnstNKyPeGPS#N5SSC2=xCHR@#dkZ8gZKLcWkgKBkx1e7Xa`k5~M^ z0GRq_h-k3Xi};wA)uH$uWEW7xt-!HinCZQMC$stIrhVBbj)eu0h1uo!d^<0IAIa}Y zS^ycqL`_bXUgj^J3G~-+)@HsEx0?DU@?!{b9>LB+X^34#U!|19taV#PZ!?wyt6v{M zP(f-dw4IR~RCr$#=p%wwTBp7*iSQ=}|K?CyQRc21%*R9@Ub)9|3ZD@Rd$+8_4?#mxr$hPdpd Vjx5sX1O&K!)RZ-p$`#B*{s-&WG*SQn literal 0 HcmV?d00001 diff --git a/doc/source/_static/option_unicode04.png b/doc/source/_static/option_unicode04.png new file mode 100644 index 0000000000000000000000000000000000000000..1b839a44422b3e0d667600501b1af16bd863faed GIT binary patch literal 5481 zcmY*cWmr_v)*gmIN6OIo@{x~02YI)*NRZ@~L} z&%NizIcGm-t+n61-gm9NPw-o187vG^3;+OtB_}JX1^^)KAl_xrfDiZU=xxLU$zDxH z0#G_gju;To$w`W9I3w++y1mDLH`}5TgEm47lwfGR~}}n|@1gu4_eLRjpDR zGSPry)7S|~(`=a4e9l&_HLmkkx9Uu*Xn6Hjt*}Bbfcb|cCi9DopQJ><;HcL1UUxIb zTEoE(WeGftrV~_tmrYph51tYXeP-^`&GwXx!YtFY8nyJ_{pTxdEan?HagR*h$kXKFnVg1G!XpHU9SFV!bc`F>5K8E)mWE`|)2xP3ZH z6xh|3!MqmSbUNtGC)cFg!JH{jIV5U2@NF;uB!9s|uXj((|3|}-ZZ}^<{piwr`OKp2 zCDYd+54`~hVIO(xZYV`K6iqU$Wd*HXnm^I5Q#p=|JqJfeTkJ*c3yC%zOY}X{Be6iI zn}k&EDv~up1^TG)s2ZWNH)Nze}Yenc)!2>R~@q6#cwGu?RsI-Mww8QbBaVUe2|(WmUhWtE=J#Zb7^TjsBET>VHS(cII})<01gz4W>K?X?VJ z6)niMsC@V4F79F`(I=;S+)C#>Mm|wEjbLB~8bmiX8I#V=I+lH(rpg?O5n@p_Z*El+ za*>m;$2uPwVF^8gI(}dZ(T13;Pe7=Xncewb9jOE(@I{tG9;$9uAG(=*0ikoF~Ahd z{il43_3x-W340IOZr6niB+CsC(?G!{?D!=rM2FY&ST&Rk~`nSx7x)N9G^Jtlj35H#Cx9+v0IPF2q;JTJ&Tp`ZLu!Po25< z3JG>TKzU6SFBB2Gq`0UB$tPk1v)JpChVU5*`q=7!cyaZTFjLB<_PfBJ=l+a&l*Ml~ zKZklPW7jwE?kuOVCMIRgiNuweUNOPS8mV zc@+G&7){uh&^t(Di#^aSk z>QRlc=O?(akyL@o*&~S`O%8H6I1%JbrZARxp<4QPHRylw9pm&YF4bZ;plgn7{K@6o znDR;Z*2lRyvmpP{XT-tdEJN#3s@SI7+OIMkg-T8SEFge4B`sjKKQ_-7+@GGwC;>s9^KtGM%TTts1AI ztICD}&4|3U<|k9#P}sC^e+?o9rie&Oo2%F{?gtnan6aAiye59?qOdaI7wkv)X98oG zTEve~Z(}W3)EtGh2!K%{)*^6oPI@&mBCYv}TVR$?Ou%aUJd{TO7jKM}ARy=4 zIdAAMZMMD8cp{QaEf)u@BpBOv^)s*gCcnGCRUf2}X}B14p*@SdxTc1{P{4=sb=ocE@| zY)bE^gy0N~ox#1>8~7ul-}n#D*5Tm(cql|a^d7AYCDCzzeX&1Y>_7snn+2Acc5XZl zc1CGz^Q=9N!cC9O=-!~zxg5BR739rZOsrsG{B{X3RC-RdWcn^ONm1v0*HCD|B&&qZ z61{k8E;+Aa?1nptlfXh;I?^7hZSXVV+GRVLTln@byk%gVAa8_K0_o7Xz`rMkh^uCx zh;EoL#au~>(-c<+a_l2aq?D9(r`uJ|lWGb(kHB8ZWF1`UlK{P;6)Oimt@WZZMZ>1J zUgg?j=8XtkHiH^deD!**6YXtvsTE6^m=8Zc6gHdA#Jt#A#W8B{lm!{*@9w^oj3tNg ztgYYja|DTstV<1qc*MUf{L)@b;^ru_Vc7Dv&RDyT(A0&Lsv*g~JM;i6RB2lN-JlfH z;Nqy$!F2+XrURM_Gb1A-Gpj74@y4v4n~=-J>QJonXvAR=p3V*S)BEPt4lJSx?vh`B z_lvW$uN}H|V9t9pt#3TWgxNS>;iAvBa*4b5IrRVuwmJ^*CwS3SE20HYs*TR1Ho^y> zU}Kue!ZJRYttq}jdRsGosnidGe~c9ab4dkU2T{$fJ{V~oyJfTaS#b?c%dVZf$NC5) z3M7t5D6)A%-gjJANKc;yZ%Su!h%Tu9;30;yStbbe2K8(FL^Ch47T{n>^SN49gDKU2bV5?F8hI3 z{^eB4N3E5?s3%>|^`b=rQf*fQj@zDSYTFtbmS45x;_WG)yy2|;91=lu;$*mHB9ueH z3sTyMk@=~0vBmKo&0X-UGaXebL3zYX#B(C1vpVJCh@q{y_m9ma?OKoW!3TYl17Gmk zIHz%91ZMcN&472`j~wwtfcT?a(b8-}9T{ESvHN38^U|%94^6}$3 z=uTY>OAH`kWv1L)v#Y5i#k6-C4-HL zy;;6<%YD_)b_2BBJALs#J@z8&P>zrVR$)Dg<(7~o>RqUn_&GQVMqF|;Ap?8M4PoWo z8C+TNKi8Zm>XGz~Z7vYl_xzBVh8@!ruCxK8yg`p zIwhN{Sy-A1xyN(-Q&ZjNHIwwOPpXS~yowBlSG%Bn?rb>NxbFU(3_8R0&ObR!EL1aJ zO#I1h@E!&v_Rkev68Hd1{Y zZqm#gqn56bo8(Za3%KWVy7-Rr-N+$+*U+ilwMfKmF~0#>VMB`d?`EEz6ZJp_^!bxz zI~iImSL(FgD>ppatmX|x6Ui>qvLt+Pvb9dj0^MsvmQB3Aw4ifF(R48G<{}N}sAP+S z+|*)JReD6h*}Dx$t$rtu8L5I(*VBXwy2C=texKMl+%LC#$*GG8W=-t(6}DZ}TnpF2 zopg+-W=44y%tdFTrI&XQFy*X1#b3)FE2ZozdqBOO_>BS~ClHw6O%`agi~;fnJOcm^ zm<$?EI&Pabta{ndbIb0z9hEf(s(AXWa}Ze~l9dSmA8xn68NgN1eXg&$_4zK9c{ z0g$KxAVpAdr49H7Gs@8)eEd~uY*BS1wd4(Qm1e)P&%NW{-dBcv)F=;VggQXQ?X^=2 z`l2|-d_@hWi@aK4lHTM>dhRR^TN5iRr3L3JwNZNW@JG00KoI62g04WtoP!>8K{i!dZFgRu5eWfLM<< zaNq0Fx(+I0{?W&*z?bh~7VkOqs{0i(xnFc9sq@%)uIpqBFI)l@|exl5SX_ov%dXMC~x)y1BSepo(3{ zZ0<~YE;^BY=mYK9VAB2Qt|-p0x5w&RT{HS=s`c|I#2_Fd&CY3JyHA%G_|sp|X_Y1O zbJFLe!cTeapCytk?k$CF> z_&_(MK9r}7Vfa~I*V3`{2dw!&qAl~?!`{9taNK0A zZOQoGox8CUmH1J8&T$X8Zl#-e2y@S)dl@0F3|yZyG^@$(zm`kxd^PDj;S6Rq>M#9% z#h2|zpT_WX@sSg2n&A*#Eme!^#igYR;S$}7N)n#8&ySC659yaSu;d?gRf})lxiYt3 z?}Pl1e=$MEVS!Mlec~56Pe6zm@TUkGh1*GB03^ic4~l?58zN*B`$x~AXUjNCmrNod zc57VFv|fyVHnYM=2XLi7dCD<;aGKQ9-iV((vW+%wvd<9u=T&Oqb)OwTc4wp3q4Y2V`_%@M2k3$_Wly@JI@mmZL5JA?V5dX_KbTds-g54qH3MEE=v zzqC#6WFl>-Z-@D^_dRCD@H6jpz|u+P^btKA0t4Pu;_;uQY)= zm^gUNQw;e*i0~UagIh>_Y_f0m@4{E4j+;ifItgxLu6>!lk)<76tDllpD0NSOkeL?~ zBS!St^~+jaWBUxO6W;@{80(4Ar+_h!Wo;Gd?RW%2EnAFx-E(?-yam%EHLK*Wp=W-5 z&Z2dzJo`;qc*BE3dbOBO`mpbOdtrFhA{-rzr|kIRvCwBJk?!I&car3>c9?N&Jy$x~ ziHNH*EebR|1vjs+rxzKgjOZz`@#c|`9^|Cl(r|87vqwd1!$QZWq?f`>_LG7J2GQ>d zUofI)XF_}O2&qKETlB^{mO8~zenLZQH}_J9rpHJ;tV4|m@(*&NSTbeP9H;X)iiboD za{_Df*>z1(tNzocvJrjh{Dt+GM?rrf@G))YQ_}oM{}dxF3BA#`E`($Hu`cTn|0$Pw z1t(z6t36+%Yt!iLL5J8I0=(Yib^!q_JO=Mws2PmHqI7a3!zg|rgC1ma2)g{5#{InHkni%2=UPu6l`DVT zg2T(}z-k>~C8rh|^Q|1&@iFjzvbWW_)u8&$ho&=2AW-DxkF^)kc!P3{QV6SQ%|QHz z>hlQ-0)X8GV+E8nQzUvKxK5+-z=3jXYP4P08*m`d=C5c-GU-b?i&0Ye$bo$j z=wHGT9#&et35l1dq&OF+kjIFX@Aj~r%&g*;v{IAMzXlT3lCQX&MnU{B8sAfLIg*& z#4|Eby~H~LS51?Ygo=OQbp{J|rjH?+_#Vv3nbsMD8#_oSpV+K(_w4SSO!?vPP8G+y zQvpXEXXjp)`p;&uXEN5&uNcf_JCXcg5K(yrCt$;gIqzPdse6|r1QVO3FMFEd^}d!T*FB3vAsN_Y z#Fs&mRbQh;7eSepF@Sg*H`|{)_W2tc?MPV6-ahH|C2=kgD(Pf0(Yq${UTxAf*ZrGP z;?=U0e)Who{1Cl9A0V(sWDTxG0L*6M3seP!eaA-?NNtqFqy{cv6IR6gPMpv5Nh#tE zLI7E!v;o5u`doEYl|^s{H+(~n+s8-d-S=``V~+ID#|i~f35;#c+m;Y& zfe_OS$}bb^2+T!*^Cwh5=Qk^eR0!!zL=7nA4#aVU6K_!g2>%XK2Ke4i5~wd6fe;gf ziedkn2 z&vFV>doZiTG@O%Q;)9&(0wM@QqO-K|IA zNt6{UyGX%|2D$A8FY+rpT2^q{S>ZzwnxbEkXl-_rZ2h;7)9(&YaYFL5_Klcph!-*O zynf^;XU*l*XAez?PK50NuFS8%oY9^Ug*t!gNiiZ}k&)rVgbwtg=}IupBvvO5B{nC1 z9_GLZQS8FeJ+5e*$1;Prr?scP0jmQCg&%h#>GoN1q*9Hd?fN40R$3)B2rlt0Sua5^ z(JrB!5U2ajb*Znno>w~Z@eybT^ZWBhwEKB@Wsu0iJwU+o_x*8)!f)~weOc^9PNE{ahh%SqU5zFYm&_}I;pk|TQu9=+mqXMgLpB`WVdqo1_h0kMu(Pc&Dykgz(GY*kn_(6wd?MKMt>!l0xW0`8we`Y3q8#!BHU-R5?^R+`TG#{?(&?4{js5gsvdsmj-ecV!>$zjz4qyprTJh+lM^SSVWu8lob z;v3P#k}zqusVabgRFYBKUB_L5G*Y@nI+_Hk1jDkXvQKj+bC?#r7L*o!%PS4)4G7C) zO)`!gjwP2yw_lwe95I}390y#49phYhuV>EzH)WUj*Olk0hf^mZ=hwH(`=GJu?^Q{) z!{7M1+wI)bUi4oXUTt0t!O6h8JZA+s#_6q;*ER{C3EgGo8Q%SXIfZxssEbuawUv-J zpZ8gkI-U=?_WcOrF?^0+>3ePrHvAL(Hr}UizTbQ~vWex`nCPn5Y={P#i%29swo*82 zUWH-RVrfYMY3SIP>bUI9lIE&-hx|9pHsCi;XxTN|obPMVO{gc8Qq?J&qiZj0dC2&v zUR0hil^BYHS>rpT<}(-uZ@!-zwC^<7*aQ7h(>_Xm)F$C1jU#E5vX@wi;~BPFkGBzZ z#BZ@#0imtN4#oyeuT3XU7uNi&@wat#c6CP3PR|%T9UdQ6Nt+;NKu3coDgLB%tjbDt zg{c%mIAQY*m=7^|XB!gMvl;|jm0C?z#rH$>M}`({eR6w~OV~g%t*O?Qj&g`{@7=RT3TcE zF?-N4tiyMQGlwP6e$}N>kcK#5V6Ch}Gpu>PImF&$zS?H2cJfzVoJ)|)FBeh+aD$P@ ztDEEb+q_hD_xi5ZY@POp7urX1BrBvPu07|9Ho2UthKtLH7J^fP3(jd316f69EEkT$ zvrFPA(I$7@Ck6FVB~6XzT7PX~P8{c&>e;foo5Y98V>_#~28%n#rSlckbn@iGkppB_ z3RbiXDDq$A+RvX}FI(kb3b+8wfVqqb{_0nnW3z9>(+&C7H9;T}<8b2>({s~(BRezR z$B=t^R`W%3^j)+ubAA~fZ7cT!!BM|T`!A3EzqUer@Tj@*HG|u13xLLkV+OB7%H@vQ zjnmt+cI!n`Gj*m&b82PGW$eoptrPR+Q*JlPDcWN<^EWIjHO&i4rmLXE`rOke+5~k> zOP7^4n*gWX-sQ|@w_Bo<<v!I*YgTk}%jAd^- ztC$(-E({NBQ-&X}3rjz~>vQwoExi!*80DCxwo&U(H{pVQa#8r# z$Va_b+#Ghp7u)A*7ZzvRz0$Me>E_Z?#nm}im8UE{8vUbv$A`PEuM?s(5u*Hze0FZ( z4^O&Gj|eA{3(A}2rK?Zvyy+4d$sa3xOOlkp{N8g=!g)Hdg30QFWnL3KcU3L8JBf^& zMeqx)@t>Q?@Di(onP`9qejVpW_Ut|e+arz-oJZ*)ID!9eYPx?H@KtKG;tYK#Xv;ckeP~+_}qpeR2t28=2 ztPBPQo@T6|Vyhx8#ieIyPN$=9scS&zXm0hE;(~$kIC8!HG&iu-A#^l1vjB2A@)G@3 zg6r+~@5l5+gnt#WHRUBzk(MPCva~TEWTj)FV<6&#CnO}~vC;p*CHGb2Kh@v<@e&!? z+FEhZ(>pjg&^a*ES=t!VGjeir(lap8GcnP=m7oPWS=j10(pmtC|8C^}we!^gsApqr zWov9{LHN5}9bHR1TV5if-yQw;^Y=In9F70!$pZKvvEBrv|NVxZk&c1>ziq!&<@x=T zOV-%Yz)a<W&qZDJhF8HBbLH$3IuU^TXMy8;&N>xqH_At>v)S z!ottc$cV;t=UR!n^ndCl9L5Xg10mk2SI;@a%cf4K=q9Gv!@tJvi4vIBagO5p30cpU z#Jz=XKBBiFLL?)cT-x^!5d;ZEAVi5}IdR;21G^WF9M$#+f-3QY_)^Dw#r*!5m5wM< zD-!Pn;tA2TLOkRz0&ZcIZBMiJxq*;>&Y4y0yJ=+R=D*fO-BW^6FcG-EYMgl}(W{(+%OQ{Iy)-lVX1so%RyQhKSM|k->}fWs_DI zM>(jysr?#SK*x75SyxxhhvPPDUor{!cco1CcO+D6frhqr`_**jGgzqWLia`T?liRh zhg|WCHRaN{c#fK&!?s+SKUI4mm|6Bk#C&lsKa8)hscz0~rpc9t=HQUa-*K@|Z%B5t zw){G%f|W<@dC~u~;=hIWawfE@f$(yC4C~{=S0~T4Ge6y8{;@Ejs+2aj}|q#|I4c zKlA-G&!gaq+4&%=rMZE=h zz6i5`nhu=WCR{lf3#D<%`5A}8ue#dmo`p>@#kJMbys-*ft0pcTkFWnv^-7dLcB{Z& ze9_Y8c037U=ENhlb;m^aw$pe6(5~hW!Lwx1e5rJ)so=tszp@>CKcR?R;uV(e{jsuf z$i14NU?vttCIhDYVlEegazlhqZT)lZZB0)S19X*$#?~ZPw0#L}=!2H@NU$qs-E5EBlYwipp&803}QooBG9% za@#7$v8`MGYVXZS5`OYh@$t5@2V96THPfl3^_kbhk;?;ZtCYE1@FPlF&3l~P=oUB6 z@djqa4HJZmssVW73Ohmz#To%<#i1aMg@ZN)P9<&w`o=$9t}K);B23o;=K%45iZVJ! zLrJ*RX60kmTw_`cO)5XB!yxg!bybA8l6J_op6&a__P3Ll8r@i z{oGJ4lYB{)*o_1xrNaSvbnE}jIFi>Q-4$^ya8fv?GCe|q%hgt4{2M+6FjC)=z%or^ z`mB=)V6PoNeQ(H9H;QjI`JfWox5=DZ8nZ+fqM*!$6gX)E1g1Qldc3AAW;=ysS=2X^ z4ye$H|9jQH1re`gsG=wL*S2Ta8xQ+~!h#ifSy@>zvQM9^zxRkEOI%fH?kXUYhmaVi z@h&{p9&Q#DuYE2P`4*~ojXgRlZjrEmZDjxqpdcqFcNjTft)7~kgt>QfzoUL;DEc_a z0FU$y0sIe}*$nWc9)NbJ!aYG_Z_zrfZ*{(HIvCFqf@XnS;g$3HAS0%&5I_w&ZWtA> zuP2!uv$9$CAF5nj0CCPB}VJ}4J=gJ*BkStQ(khHhB`}0 z{@vN{o^?qhcf`Bby8~8_W?MnW)A_zgZMESs@Dd6Z;>FudVKJlP1fzg~G_KmjOgMi~D{Vdo|1dO@&8?9nWdg<1M0=^NR4eNz{8AW>j*A|Iz%rtdnrboJ?*9L<*~>+b|bZTB#}7wA71TcX?u zDa)xfo*zH&*G#RMjLr!j6&9J%SKLy6e|7jtrD4e?Wip9pWDP%lAVrc_AH0mMdP2P# zA`IKsRpotJQQ3o|JvUs273l&$YT{0&mj7#)zHIc@Us2o27Y?hC!HA>zr(e^BSZ@i(_#WJ+#YTM7I&?x( zzgNWR7=nQ$yzzEJ*h_6u&6vO{o!HH=*IVEdY{I6cj8+g!!UHNnhC{mpGJea6y~fSf zs+Ds={y8g#>E9;$?xWSj>iVcA*QFqk0PiWY4c@40dug_8%0Oc$*Q`Wsq$wt>s&?02 z+up6mIom#%$X;+zg`0;?AjmGwEp^ z${3iO@(-9QCg$~pYTV`lvik>IuN_)5B*9`4Y)$HvA35fOT)8Yma8CT9a!s%cN%)TN zb2{t`SZ^5*HSYz|^vi`?Vyzq7Y@Vga6xm2=C3NrZK)+`|Z$p_?wJ}g~QjP0x8@fBn&J9xZHt3~Bl58+jS~uU_r^2jm1OP$W3Q-4 z0&N^5;&!$3&FRpHWFz~p7QT{wR|(l%d*J#$LM<1+08|o8>H9;mIC7E%1?8N72i#%jeUV1)(ASeTMGmpLJr=4^a>Ig27Un3c@m;D0+47Kr75l|c6WwVIpmg0ueAoy3<7yU$*- zHSLwj%^bl4SR4;z@3%0nU}c`+!n(bT2lb{R96=$ie@72rly_z2Lb19b=9A)lTpvJN{Oc4&dKE}Z>Y_wGn-WE9qD zF+EHP#=uCkVlIsz`&~~O_Iup9tm5@_)bN2<`&L%5J#)CwBChIqAgx)hs&jD1N$vB!TXmJ{(-LYov!+2T#>dU?> zC*`u^`ebks!tv_cDHt}t#i8e z8h{&M-|Lb7h{smTi95hQ-0fYvRK<<&e8avs!v-?z+jNZXtsE&aSl?~G#~a19@@7>6 zF7e#-zhWdxZwLIX+pk0@8rr5JE&T>gG>x+~^Zo`!t&^Zb0NWs&j+3TCa5kbZxq*y* zl9mG%wQBB?cpsB;jSOQLZOWPnXpt+6{6qEf+W1%hTr24iT_ek7@I07o&Lh$xug z%lilmucWuA9c;b(jGUu)IeXSHD7BE00q8o-aEJMI$ls?Y!{?cVw#IWY&D>FD;OLqD zjBPtAkP0G2^>Z1jsQ$V1l%iDU!Oy47(fvOA#nlvM#qC9h$MI20%42g^j20B+;_*oG zpb6x5XD%VHqkVlB!OI5iaJw)H%OZsn}TL zJk1XQuC18j$w_8NDXM&aj(FM<01QZhgE<2#1gsRnJVty4S(Iv#c`)dURIaqi?Ps)U ztU))sOCi7aMn+~(M_(q8Gg`cAkcb(b=Lf_JJm&m0tnv6fpM&i^n`(!=+JDK~J{^A2xO?QbmV4Y`SWF8*2=hp0D=KGsU{4rY z(Iwyte_<6#`_U@3lN^M4zMfXTAy7zwx9^AS@i^B$%ty74oqh5QljFw-3qnH6(o=^E zmSVUN`YmlF7R1hCvPPkXlj4`iB4C+*luezUwn=Acf~sf0{Ls$UKp8MVc#k zsA}%h0zvz6Abe96*OZ{T>Ku`N%WO!H2)4GCGk%@ZWhcGjyC(RhCS7l4LpX)lI3M$z z`%SjLH~Qxv*V<_h&Xk?bH-p_Mmids`pKY}Qi&W7>1$zGhhUeW?x2^Nj=ouU_x{IXXn(nucTmHaGzy9x+NwdZxg=LOY&d?mt)5N@GClVNwp}Z= z+MDbiPq-)9yWTK$nQ8bS1f-&;EGEq{gB8lEq0|GOubL+kHu9}d!lZc=QgpuzEuXOO zi!9YMKID^A{l_!_gcTPobDIwbvxRx^-YNxyCeX#8YJz61fq4LZu|0q(uh>pY`%9FY z0FPtA7Kt*`=yjEeWkNu4HBZo~Xhdx2(hml@7A)?aS6Etm3+C0YhOZa1&&S+Z(Pr~( z4brHi#%jR3MeT4r?iJyX3)VM@kF-~sR`=kv1BP@W-{yTpE?G~Zc!-hyRkMV073E>O z)OP68Da!SgUH8w~gtz4~!oE}?Ob7M0XAb<@AwV^$W~lngHL*IgR3>yD&@5?n8iTxX zEzq5@bb|sop7*u*OeZW~C7!=VdB+}PA0w!LwT?Q6IJn>+u_Z9NDnDJKM`oXRO|n%Z zh7eoYzAH|7RTH(&2&&=Qe;>-ZjmqOZM?d`SivN)%+LGmQC+$9xk62pP_V1kmN*I_& zOud=|a8w+cMTxei3}fA_7*tp_SzI>=m{G(BlwH9M%=qfsVT82CgwX-1sFUQ&hK>x% zihXu72VEy8_l#<7?g#Tq0^>DckqP&k4Xy^~X29GmqmsC`v#K2EO#^;nLo=L{oVqt4 zw0S*_OEf84+ooK~*&8cGIxd4j1`My&~`hJ0xs@X~m;cx3V7OTYP9k zSKK6TF<0DVQ9N{&xum)QV7Xk@~da%8hjO2@C!)y!E*={Bdn+(H(Nx4#7l-w2NP3(}6Ck zGjSdUsI)R0rwx%Lzm4&qHhcxr-f^@VeNGi}n-kUUD^4S_C>U%iT_&gx%dH2^HKq4w zie*%|2{ZxXjydbUF)7*?Rny#T27cdKm-rt}iWu+d_KbJH(6Tz@;k#VM>dtmxw^ zavR$I_}s$zN_ZXwVLH;IoL%d1lNcpFNX*@2Z{C`RGP+f>U`~-!uI#X(1PMQb>adZE zvMF?kX1CjwlwDIGyu2U0tZ{8I zbhL=6mr<#yp%M?1C(+R#EgUd9r&js;zI=dXyCF_ZT>zQeCA~#vJG=zW!nMI7zn9Ql zgvyfgG$JH`u5?o=ywz2m#E-GKFinecYVLtQkeIJP4(ZytV`Ox+E)4glD=S`ZZd_qu z;UC|7z@xb4!y(C$f*lirqus(r?05^odgzqdB*yFva`ZKn4GakABKp`;BXBNMe27@j zaFE1M#QSd6ubMRIi{BKv4-E?Q7x12IZ&^_~G%cvDkEU!ZD%&lQw==*r9X(c<9FkBf;E7Mm=CVUEDuJ;W zQypOp87kIe;klgY;Wr5QOf7?UZZw^TAAM`6|Xz1nou8a+F% z(x+oZj~V-x^92^Ofd;S`SY$8qGmH+k(IUXebc(tsmFt}o;}6CBjgfOzS;Hd0XhdPJ zh?TqL_)}4Not{Y~6V`v7M}S)CoEx!1TAOE7A^Y0p`nr8uq6xjySuY)!v|F?1ULQlmyPow6`t&o3A?J1Wl`R$$g2Mg0gii*==(F8pFANeWQVN;tP$B zvdJbOA;ZJuj1O=B1lj&D+mRun6){^_N~4+PF?skSDvWJJts|x+)v1yX1*K8!uo@bA z;6%#SY}Si~Ix?FMcBTf&NvYu65qPC2HU&|^)llpvzX^t6_KJneyUcKZ1vEV+2d5pG zOSV}{+7e_&)+BPrjJ{w00^$@GP!=8Y^VgD{Yc}$@ONrS+Gn@%z&GI3IBv94J2N^rW;OSgb?lZCDW3IBd*rz(cF(;GE z$#xy?VVY@dG*v@u6Re&%Q#Qi=c}T9~th9U6^3_Wz7W>X|jwu?c$1OqX*IE1^(RVB| zdBcgZW@gGk3Ix5y;XVU&%MC-={Ha1ErTFx|-g=G+QsWn8Ug=pE#mr7F) zp5ZBprrMHk-yF5@hSTH_WUW{m669&C{ivEW(7$EQ-KDFA8@F|-H)3e>kt{7Hj1jNI zo?0n>O=ie#D`G4Sp}mCxmRS8F5970^z95~MXKGQc>MwlIodDB%@d z#{IQv0JX>Yf|S|HIHYFn_8W(jl7>W(WYB=$j>C_-Uw-!Oe0lfM%W4KA<-Wbi`&+{L z|Mg7*dY@66V9GcTJzgJI-`z?@ji{B@I;E9jRGTpNG}ek$;VcXd##PDN@kZoM9e2eY zPV+Uyd>nO2wZRXSaDKCo`k_XP1j@HC?X0Nm-Zc}aQHvnbG03$IT2<&|R< zyEE9(Jv;6)<7L>q8uS#<1H{tOaaD)qs6gnxJ2RQ>DXQIVcGY0VkETaUWZEOqYiS{T z+B-vcMYC||I2a@avd#D@CdTVj+xKUR8+l`r6@GUNeY9He{t}*-F8Ol9df#^(w9sOn zI`Va&tR+dIZ7H#Y&#m0QJM{Btqy5DHDOKf~F3ndMfyrO(n!O|n{tfz~aC!Q&nlX=D zIG?Xt@%xQVRT0ytGlyU1J{iAacKHJ3pC!+q3dRyQzur?(6Ph@qzSyo8HV`8Mc1$fq2YS|3-HM?Y1_7XYJbOl(fYk>PUe=78P00{uP@^%CWAAXoAmZ~QheKKK zFO2Sc7H;Y;gzyy_Fzz-UJSsD#s>n3Maaog!bIjHw+d#%DDrn9pyBbDP1diyO7mG68Z;pGz1}-$G4o2t%}13u zYm8o`2AurHLc~`ePKFFrK<;u@j6CDd;Q(%S9FG17@x117T2KcpwZt%3XA@JAtjZUz zbVx??WnRcl*!+%p1uty(Zr7x)QQo+cu4_XBJ^gAbv||om2gMdKOZEMD#0yoE`&?8b ztfLqhK|bb&m~QmD-CGzK2eDs%r7^?joSMp=s-y-fe5=|oD9|YZ5D7g<_CkE}k)zSw z^m-2t0oKk1>kU8cXk=bw$IWd9dKz9Ru~Z~9*uDvf?ske^>Me)WblVx1v9>$0IQ)~? z2yBvenEF7GJ|?FuQzcOyaykq;2qsvRlZ{28h^zYw8tK*^&BSwX2o-QfOws?Z>hG?Po8x!vu6_*Zj>fx9GE* zK2nwM0KfNga_76$8j0hak!eUE3ZtM-Y{tJ-U* z7sk5M`(eA~>0kJE^(JPK&u%RZH&0UtdY_^&MXv_PJGKM?3ZwL4^OB%IX-6&+hqq(8 z>V1g%m(BZs?+XN^A!-}k!j)xAW(?>rgCh)XH0*{i^qBAp-%v$?z#hsmQ}f&AbM)qo z*i8Ya9Qq+r(swEH)tlji5k~H|WjSe25Rt2RLWwI{kW-no6slBx-)41&fC7PHO-haT`@(q#+EJ=B71L`@WwwWkS2AFKV4y6M8qg zyUahmC&@9S3yZP%u-RE>k>rixPjHWRm3RlSseDOD4FBWaUPN-DO`1s~5FGE%>~ zDlag4JbDK8vsta{`*PFahQ-Jta&mIg zTU0_K40gfEAcx>cdd8_yhn|{u*Mzf+%~{SSo-!@kTJv6!snuL?sIvTe^H51zVmnH$ zI*jYc9$-VdnJS;_e3n#r@Qc6pth8&{63``5&1uz*3mj)VC2e8LZKwPe-l$9WbNMs% zt3dHtQiv9_Rl^lZ3nk|)?p~_1?_x^DLu8I%S@TTD- zfh#|(hk8})LpvNK1w}BH?6~nZaL4BU{NwSkRR@!+0F3hmI*1q&l5HF?>L9ElVDI$A zG7#qEp>FHrYYEuR+l-v9TN?;*=POF@>Bz-73ljQz^5^$mI4T zDwW_XQ;b&_Xo%4iRunsR@A{-5PZXQ~o@Za=y;?mQ0eveHL5$s8$)}4Xi)+dCMgQZm zbwsX`SRd0-EW8rru9 z@!_~oQurv~ud=CO@4BKfqq>{AZ-*wHQSXTt_PE|a%zk6*@U>Nd_i5}Q6WyVm)<+=4 zgg>C>p5eWRHFbuqyS7U!*($Q*UTI22AYVrY!;z529OK1gMqict!g7P%b^;nrYQ%2u z3tun$FsVDnlR_fxWPGBd)4(mNhW1980(+WJ7$cWbPD~YC+SRn!iw> z96!m?L>4~mm#|)8I&;R*$ELW^V{wRQ%XCn3r%G+B7F9mIkIfWN{Na|gk{>BLShCpd zz`WroFEDNeC=dbJ7`dbCVn+$0y>22P&C?=rsT#JX3Y#Hlo%PWwTPw(%m$rPml>LrP ztEw?LQ7wv*KHEUy@pS9E8yOv?{{r##lHB>9Tx0#C<6)^POXI=y>Emg8#LY2?n4TV4 z{EEj;SQxr>yMBI&bi>)iq^ME8hm(Mcf{K!I-990@n%CCKx2L{nyr}=ZK8G?aEcMNs zN;wmV@p6$46c;Gx3~rd_u6v&Sq_e13Dt^Ez`>H=zYVbGc8JLd60S2y{mPM~I>#G#q z$efk7vaBUH4zeCS`PVnWtyv2R^qgk{r|PPh9>nWWVZ0*`pxI%eu7op9fCDvU83Zt` zgr2V;fKZkrDF?<3E-Z;E_Dlg^9oR3EG+}Tb-ol$EndYzn@2rE|gyK9_NelfSXCdT* zF$ZHYg16|soiUAP>AaW&nWxs-g7d14wRiZmy^V0D1ta zMa`xvh)mwOAl2ZZ^SM|PUvt#p?W_29n1;Rk0GM!5NI*jXU;S*xXN$5g&6O2q9zYSl z16O;ls57)q<@lz`V|!OhF(x>agpmO1sPr_!U9rS}V?cX|`}_O;fdR%-`@$Ci)|%Gn z%YY54b7s+z6!!MegPSy>TIyUI3$(Hl+f)#;MfU*RE?0q7V;M6dB!8?Mi>j;#i9>K# zsaei|YB^}nPO)y6=L;zUKnecayK!b&C)%9V6U<+dy3>^6dz0Rq>f)cF0wvR};Jy(3 z=k((Z)U5@gjL*#$nT>H`ePcpTv`UMaq--Rub+3} z0_r4MAg7P7t4`v#5570PC5LpIP5(!SCNcu0wVb=Tb7}hVihD8!1&aYD#B*15QM9I6 zrKgz`>7Vye6%AZ0ZVRp?M(v(uZT^Srd7}Kp1$Rq3u0e9RC_8$CDpX1G&BRoawz3KY zsPcTA%CJK44)}7t!dMLBFVR7j5h-^KPCy;qMTe@DyyC&zAvpVYxB+)i>WxgYSP*n zDCF9G$g-8_wF4B5a+1fXv))tw*LeP3Spw$oe%jT#wT`qMF}0S}PdzHy%>B7$Z==1{*N_m5P#R!1|1&JCJ&33M-U}z)k~|~8$k>6 zfIs>kU#pN09ADKrYbNv^$Q=ENyFF3fk!H@$JgBA69$k+sMVSs3R8`HzwX9s`V59k^ zXpmqvg%3HWJue)tKcM{2?K%{QTJrDO4xqUm`_>hCk~9m4oK^J-%#azUaCK4e$tDhjc8X2<*$?JIA@#s zda#DG`7=()_z$*krSah{;?#d^d_gQLHb?0iSQvhruQW5W6VtnjLsU5j@*c@&P8}=2 z*%iK>TLTk^7#6pE$;@c|rx>k_z%Yh2n?^BM{r}L*WlP#2^!f1rM=!(oyVdy!+81v* zZYU!LFxR}|+j}Vo27bJfBe_D18S;jK;^{<OxBBP`D>abDR#Jam`a|J;@hr6ve-E^4WIZ zk+pX=)U+{By@rS)xq3?=X;3uESB)2{!U98sLglKt!uxP@_&ctTOV@^RMkel@z!2&@ zLL=%en7DVr6r=w*FL_2~;HQw0VmHw&wYf2&KfE?-?bX8_QBJ!K((svcVo3I}iqwtb zUfe^lP8jlGlLiqMS1meYqaO|3#d_p4s3?Ta}FDYwbvRy3 zd5KM7XBg$q8myVdxL#6Vr8yVAqv7zxfp_c9i`ZNkzkDUZZ@9FW^<`~;ZvKmG$qX~8ebcohhv0Kfe^`&!4xi;!%=Ongg{<5L zcva2;mUgNmao}cLadlLTb})DQ&&>PIY0TzmhLd3$DUQxCQf<6mNOi;|4{`a_xzUuV_b&up4!sBQ}D=X@8#d1*4DY9XK#o z^zl^VG2TaGV=XWZY9);Y^zCBkg>l1)z=dU$g`5i|s{zLf8a>v9DXl3fO9i9WyY8y1 zMmVARNTC1alA+%1nXW+zPGE{UtDX`?KP5i;KB<5xC%^bpGp~t7cGnhvzJ;|v6HJTK zsdhkAav@u5wI+M$A#q*CQUu>*)KO5a)`HThZJAO*euYxE#Q)D=-&j6*tyn z=+bhgjQ(Bz-hL8OPij@&+Pf05obD;@J~hT+jAp4J5#m2EumNeTQ}dM197gP@vBVbg zMixYXRMr)!Rlk|6i4!({1iB`Y}xC3;^y6%$x5}GkwGWQIxr;nI)C()t2Xeenm5X;#r zCw;s3FjU3w&FIRBco~W;aV}*2utO;B2zgq$o7Rg;x`tH zLW8Ah-mIHS@0H)>3zer%SihIa=>378?0^pamAR69*YIx2uY9AvHK60nNeIL}``(O_ zy`rF}BM{$$CCh0<$QMU*3=%j!z%{xZn)k6(Z9H0b*;5(A6rD50t8eZA!ml zD8n7c^^xH3KE{=-Se5kZibs}OwljQsV|=%n)wc*z3k^`sMqVqkpOj&)_14IRcGh<2 zTBun^7w>J-LQEIyer+J0>M8E1Vz~?|o*7A*`5=@|sw}1^dYwbc>zGk>NVS9WBaTbk zT*@O~-h2%y{~~y(%l2JAKl#$dR1%tQ@36B@`WFj-H0+K8#C@uj4HZoMgB9lTgo9`P zN_7rw0(rvba;a8H=JL;bcgzcbn9ywpZDbbYl*xefL~D@OtrAb@ZYy-|+WC>dhgODe zF+e56s?ZzQyY=H}?UvP;ZYrGRkel$ET{cejF*3Dc9!c{SaEA>5wWuL(*^RO6GW<;< zZNuM5NL<%q3{?MfFWswGh_I7ecpU?Ged~R)U5#u|vTkk2!A~}rzAnHwlE8=5?1g>j zx2vhOfwQgOIEB-Q!=b!`EOmxNV4~IqsCYvxDlai6+@(t>tq?vnPD57K2H^KB3%?3S zg!VC1&lr3Lk9Vq^6w~Cvccbv4<4xDlH_(T6NYTZEYu>ygW)(k{wIC4&|90Nbvo{eU}V^mi<0Y}|Ma+2Yo z@l%Hin)gXNm^0rq#giU6jsGNNw>K&!1R`)!Vo+QLml{W2GQambYk8Lz5P=Rm;kKY6 zcz)@!@Y*}LvNcCE>o9>Y)R3(`;3~d;-A^*3?;SH!wbshI#BYS>j_7dJgpnQ{aekZE z-lO=MB=acCNYX|I>0js-LB~;@2m_#L{>U<-)eN&`i1N9dOPyREGTpOD|IOc*rn( za&czJ>_Eg+&U{fm9D3EM(*I8N2;zy80Y3DL(^?a&Oq1V9pIpnc5ZsH%7(V#GUXtE3 zj_qE0c=``WlT{W(!=9!>p>$BqK~Q|}=cBA;8hpP6e%=f=xAWKW!XXtYzT3~KgL+&o zaTY@%p9|q4jiH-!xk6}KVAx6#hILHQp(O@5g3^7W|DCpRdyTVJXsRDNXJg<8UC18> z{q*-5%T&Bh?AP<_wy#`N1NTf$KZRX-CToudNwBvt!kS-db(so1#B_HY+k%g^%=>eX zsGSwxe6T5Y`+$^yFX&`~wCvq?3fEt+R9)lv+Hgf$jZ9*o=X$aqUm(GMgLc9`^rR8W zo9ApKwN0z*E?Fc0D>C#Y(4fEl&fi`= zjT6U44+hn1Nf;zU?`AmSG4J{QNZf(DL%#`V2+~1_pZ*cXN4hg^I zn;fXhwbL|6bOa%b5r^%^TVs3Jr|in z-p%4uggfq5Xvh;S3|?qfydC_vW9?_dpUdcQvDjaDDeTUqCU~7#BP4J=)FSrQ^VB2E z#r%(HNY9Mu{++>@;x+8i~5IRj$rR_-Sc~a&J65qGMVRb3zs;@}xGdaFkXxw-4eDDbXy#Zat@GD9h3v#Fl7LPy^$$#N# zF}l$2=V|iHzYgD;mt=kNwp-(}%B?Kmz{}7q|r8w*V6ku_5 zZ1G_1G}v#gSkV1_UA6M)ziper!p|6rNA^i`3QQ6m)}n|J$Eh)o2}Xcl{{=*Juzbtu ztY{=MQglJlWn#ttk4gF8A8(8+G$uC5k-20a)xRI!KhcMF%wIXb^)4cu|9?6Lm518O z!M>M!mWh%8@V_#}H-)0X+}vN=$JGmD|Cg=xFYSE~#;+V|7UJH>|2*jLJ4FNaJ|boc zi>mv-44+>=vcP_!mPV1d`21h3>Tr}64$RxS>KUm1k9_*$UwcWSy!{(yw^G}G^lE=& zY7Uv07VIJu+V<^mf`FCS%l17z}+LWRiEFLyRWV3k{bXwjxp*@px;Vlt=B4(jUO}7-^EjUl{c>q zxg}XO#orJ>vvQA#V+-Yjrt#*6!Bs{8=`v=k8jJ%)sYSXQ;S_5dD~&OlZNEpmGdMtbURiD2B6Y z;Qu!SflmCsdp@FeFsDfmIn*%qMSbiz+^?!Ca#4L^5O2e&8M@y=*4JGudY!Qii%HP; zRH?1?Q+ARb%L|Xzb@8R4hB7fI#cK|mp|5}j`Miw9_o%(>;dDKWz-9II+Su1!#s1~~ zLATr4V}%oeR|4xy^T$%v_-LHH>v>QFL?6}1poSsax@MQX>2VZH3b#|Ia?_xLGS>9~DVB9vs-La9FEr9*ZU!s)J zswmHSYvlSOPoK?$q9T6X&$MR~wHSkXm*O}>f^J}IC@p)F92amo(y{HHVzigcFkfv5 zL&7$}M2Cj=o^$1HL%-T+e~7zIHIRf1&YeK+zOCO`FsADxxy!)Zn{CT*KLhuHs4ie^ zqWXY4-_+F@24@!m$30Vss46Bi1gy_Xq+UN#$@yDsK=p;8GK1@VRWg^Tx|P0Cp?`V} ziGn{nPDRTz+?hUv^)kYIeYFaVdazTTk=+@=XJ%mibGkIscdD+ydqio^fbsLcKk<)H zu!M4o*n||`=SL+vNS*v@xt01V9jeNk&K(Tsh@Au2i?%2xyR4%gKbPf9oNM)~YcbIWk zFw5+y{3~CLhyAsjH_@&F>hax`M3uIXD5PUrw547Z#TjTjUZcu~0=n@lPvt~n61;>e z&q!%Yef_h4m{l+lYh=1!S(KMlwY6P^{sU@Yogv-op^mVG$Ec4@5jVvNC?Z^G% z>2lw_0_?WSs(mgbTW#g-)=V*w?s}$1)CbVmc+3SX+*z5;kP=JthZ%0w^IV}1e|qBi zxJl>V>zbLY|AD5QO?Wu9?`nB5#8D`YOsEE;7< z?bXUy8@>h$Ud9MVR=`d7S?_3<@}+9%>y_8LXFraNE*`xn-2gE$qI1L~GS3J5yW6gy zyDe1#dMcoXjnEa>r;~>_1+q=Q`)%P4l>60?@axNSMekFvzmi2>JbDJ+a1RX8qv25J zR~y|6^=*!gKk>e5v8H}!2op@}kn`PM;KKF$2A^kcGk%rOz1zx;0s`lnny7Rf`kwLZ zl>ApJZ8_4R;>69&n=wPcVT>Ivy$)1nj$Js)VA?f5{b)N}At)$Sz><>D&)4;bCC#kQ z4T0M=IBQfi*OOHKbwCKn1DiEXjV}&2Vh>NcJWZyJj%ygnBQ#XL!fd4W-K^$RbA%B7 zw%<7I=VSy-Du*LHhe-+V!Tq=>{#AiB0-hUIAP1FtYQom3p`NV;-*qfO7? z_Hu%LjhCC87^HWy7!M`D^-Tx{3^If9?-dz*%varUdwKznZDhunCoH^H%e^XguR!fx z!65mzjN737I<;AB5`tvv2%Om*#@4D!Eik>#Hr}0S>hR@I#7E;c z*)1~rIB4ik^6-2~xiqJ~Ou8oB$8r{12NLPlVHTk)U zxfq9kVWBe%w_8$lYMR8X^{hl!rEbNqjKq228w-j+E4!zGv)0GQU-7F^f@kPo2s;d~ zdN--;h0S9aoyUsCb}Od8g!ErZ_~@Gi-4S4++mBx>{-;Bs- zu48)hXLzyxz<~8=ww4MWWvGTRtiz+rEa2W>&UxQvRD@5$r+ry^P9_{#4lMz}*#8&( z*v1P00E3R&mkD#hzE^FIN+1~N2%|m0t!>zaEP5g0nkdWb8slSzK|-vqMaJ*j>jV5# zWd2m@wl#7qZanyuaPg9)-l+mX7p~R>Li?xYq=)$+#>?s)w7eb<@m-jPfsO3eT<@G7 z`KWhRw${;b>Z{NBT(HiG)Xv7gefw9&(1r2yr2C9HnkX)HPB{AWfk;Gss-u!HrpD0{ zVw1%|)`xjfnPNDBHnC2)jFIUYt5CEXxacx+@y4de<93S%h|w?v6novrvu{bL-ANET zu^Pp9?op1%=|Sx6i~;Wh55Q(U^3hJMEfmAUX$GeQR8kC=7EyssqrxeqxY*jgE0yMMluwO6bs&XI)3dXy0 z&ga4a&UyYvwPEj4{__$(0hMpN%_orY(kxL}5nep4;Hyp?j^wn48V!M|`ujY!d`qMAMUb2L#QK5jdzfXY8kFg9k=aF_M*sJTZgjld#Q zmLpk>Klv+UQLaSr-t5(9^&kS#V~1X#Nw2Y>@Vw+MHnq^@RAd-;vd!mw@~&*fvnFX* zQF2a(RF^5&JVJm2{UF2}DLcRF!0TX&FvEN$&~prS*PzxiX1GJ2vNs_t9`oGfG9T)$ z458DGW`gH%h>u>a;-NSUI^4xLHOwwQ`Z_l6A!~iD zF5;uWt~5aJu6SyRO2Vk`I%s|2nn4pqq*2&>_F5&_7FcHbY^EMNO^Y)zN(I;Ua8Q#W zsryeylyDU6G+3P_olC1u-1#xH9;1G=E)uu7*sKMm#YxMp?zx`Z5UD03INITaa0LkfcI&>ex*F$wf#c>85y@Mj+0Wxud-#hJ z9}|RYceli#Du^3>xe&G({owP9qDA;7ACd1V>tq~HEvY_owIjY}aKTt4mokK|SV%zi zF2j{kqnzj0PHLUpqiLUnX0M_)XMgo!OIfV1whx=p*w7+p#Ty%@XBHkz1rQF6cj{Ku zR4WihvTnHoBOlQ9(4i2rIf3bYFsWb=3WIjTH+kHZ>_)F*Tom(-V*qM*(5sGQXJUhv zY7ESC*+l9ba@vKbha=;e2$c0O#NoaCAR3Trb}pm5st29WdP)P3N>lCk9rv`jOEP$v z>D=L$>+||M_*2%1mBgbdV(olj}j?G-0g{3%qMz2LZNXw|AWDZUpkZ)(E2 zbrPi%n3P`?t-#}EkH&8scnPS(4r`-r+ z>pkvQEiHD}uX=#}PCv?C)Ga&l&DN4KwT5aR;Q6RyIG{%hnL#ya;6e8NsRr;95+Ne) zI?``P0Un`SE5ui02*1u&o#Tsdr#}_^kY|N&3cn*g5_Uxo@Jd z&_mhceSsjE{ZlR~qkJA)7%mV8f-Nuj=iI13+Lzfp`li#J8GyF7rNXVK@%o~`bdT;EA!Gv8^Ch#x4vb95x4eiy9SLR%;WYfw>sZZxaSzVGT= zp<~et-54uK$XVOR*~AjVqG4dDsGE>Q8`|bkV#Mx=C5*o}SXyaH=i%a7oJP)^y5SiP z;f>`Db<8$Vj0;hYe%n|l?OX|l!rCABZ3gbx>}Tv==)5lF&;rRe`A=C3k$SqOd#d;k zw}l@qdaheZi~n#1E6$dy7^^XBh|`p<;meZDwx_4@fQ36SE%M)`%RLsQ89TXy26@j` z0;@_pes_58HCLs%GI`zD!#k(q$1=IeGw)@^&jv3-$}QY~^Sqi=sTuTa1!#}tGF>sx zPfwsXDAX&`#d*dQU0!2yOXS~sZS-uR&zDl}5eG}n#QKLB|-t2kN$uu2}_3N^WL)cqfkm+K9qciaaQE-vD; zJq{QB^YjP9?^c={H`B+c3}E}aPlwK*c)a{OHu0*8q6d2SwS9R5b$tY)?X*p@7W6`33v1M!DN2`EQAq%O5B4`#*(zO4N>Y@wYc*<$r>NM#_evA5 zQByrTV9|c6U})%luPQMO0=VpF$aNg`y4PRA#9xA}AiH@VojIwB>;siU+J`S`vkuwY z<-aUkp55KIW^s@$FW8~SqMrr4dTW@SGogK-e&DN^#&r$xCoS8u!cAz~NwW-Nqf8F171C~i$8Z+}1h1>keg6SvP< z8u*!Pa%w6rIoXEM<9b#GKoV`9E*LRqR}KAD*PMySRK}4sD~xJrchZwO2MZ9%?zLLL z=%0zLVutGOwmdux$^N>RMK)hU;jj6jeLTpHW@v=f-}AdZXI-%B$(#&*jHm35OZ(k^ zT9%EU!{8?dZ#(vEJ0m`ct5$W{HPqc=MW%k^NX~r*&-|&!jq@#igHD0Hw?eX6f6j3x zDts7COoLxQkd($41%D)WoY9USHW~JXbm;*qtVeDY^ZYWZ{}@Hy1g6)$73%tOWPM75 zV`CEX@-fZ0?;A;#oL;!pDPFi*Jt4bW^Kc|StsjJFh7iE;!`XGRB+~t(iBZm~bIxxd zrjNrLh*`N^B+}7WLM&C}a_GEZuu`%PES>%*9Dl5xq&gZ9-Ck}&yFYP6dp_YRU9`s$ z`1!PLqF|H4!4wd~wC`5r{FUs|f*mm?JiHX5D7n`Ox)Y#;rw%fJ2I&IUi!RQlZtVo3 zI}h1VM9BX+MbCkHgAA#kq$=(m9X+%sh?!t(AB=|{7%V8ECME=oT}T`3<|B08|BN~H z{5k0@-3mEhEZ!5V!L0u`us8ney`t7RPwJn-9LENtYNc{W=3wj>{T^QjEpR>7Hx88belc?{7K-;J; zA;jSSfHLZ7LElbbr*e&FM(h56WOs^Hu!QD{+s|nFEBAQY%Ph$G;0KHoK2R8if)6&^ ze$bSDcWxeHtZ}o`ubC2x<-r~WeG1F%PnO3O4!Z34)H?58MeBmj@MSMeJ~cX1L9TyB zLxOvv^ickHvxK|evk!83-1k)}SzSq-(LH_~Sou)Q|0IJ?)*kNL=DDRCHdd96*%m!= zyOnWwe(FAKU&{2lA=k#|@vb3mAx2+61fB6XM8Uk$iJwOGY37F3G%IjA1oG>$OgoNs z=e4ufUW18IU5l*>B~CET)fd-uPR{(}chVqsVM<9MOKip`6O806cY^aJbH~Gu z-7m2(tJV-1<{|ZbqaKBk@i4Fk_k94}SI`$}UnMoUOh0VD3lduNvm6$I?HmJu|JnB{y*-Mhy-6(~G+KqQ)H*Jgdjc*{yEcO~!K& ztctMEom%5Iwrbr@zzs(ru{%|hKMx#@!V`=XwljhKh?weiD=xKU-9x=!=!5LQs2HIy zj75HwfvA$C+<*G@g%Q7G3U8rAECOU(zmge0=jp-%@rm?&QN&{}EPDA^2IPL3+Vsqx z-KZ44;}h~j7k4X#UyUxWWIVoi;_wM2Q%y>bh-};W_IlLVO>jl8-21H1{R)Ux3Q)YhsG-f`fbe)F72IU*~G?x*FJ+B(L7Mr(1 z9Nc-qmk_V5JPvG^JHHW}QuO<8y#E&lw7k3Z0tw&3{$CXm94pao$_8oovS_{Etc>ai zOAcvdk4mgbbZObnJXtZbE<{VRgCr_AzMj_2bHZ9^0%xMQyT~7)10t%L-oXnXBp!yd z*%h39dGMLmAf{$^=vo8y4Lr}ZyI<{8=rQT|43P>%78Lu|+4Xl(358Bvv1VSMZds6c zKn2^8|Cg>+5%1qJYm3PWIX+!-(>S8ITBT6 z9TIaiY1#eQ*cU&({8zEoKH;}x&&|_XZSC!oi8T z)l7#7<3szk=piY7-$l;HtHJn9xn7JXc5&UM&?-|(xlTvR-2CUEFg-lci0Y$=(s4Oq zh-v5G+=LJZ%)-KgB!K{qw(lVYS+voEA(dvEHGy86Gw{A48BbjcS@*eu-jImXAjE!- zDplCtde~wTb|l@jo8#d_c@qA+^Gf#Sf~hmbIy1R-!WTp}>EZbw{#b{C9IurjsoT=I zM}-9GCIy?#Z?&_&1Fsg=mul|hGFn;_jwc5+XvbM zJ)Z93n+iBB&77nNIB|#uO`az$n8LK=bprFW0$c?n8zX zMjh2y_YOKJx{nn1o@=xhW0Upd$Oqq4n|X!LOSU^uVxk59z zm<8o_jTiqkYt4vJ&ZKjB$Knd0=_TJ2g~Mk!d`BmKn;-pS>l&8IV|l^1su%d^A3=KJ z6t{Bf^0b+jt8sCll}Hf%IK|UFh1<=4d6m`hyAk^Ubv(6 z0!B7BRy$dUhqS^+O!CYN2_BfhR2EcB(c*R@EkOm85envUsU&MNQ-tB(a?K2^f<_oe z7aiS~jd7Z2?PoUFeG&I3(p|{S>e+e&15-gKt5a{ra%W7n;U{r7+#`=>j)q-KY>oyt zQbSOZ>7$f4@1mMrf0g%Y@Bq3(!EcLC4eU1=$t%tKON}HoG@_rLx0z<+O~}`EBdI9; zL&LeULasXSJ}%J{%FQf`T-m?1^t2}n>%n+DF)7tmwMv}LKh44i)~9f1+JN+dOG?D6 zPR$}dJ=mXdi3pGYZ`ZzAA!OKFZP}EglBr*`iqUKWi0VJn4PayK+X|=hi_C7M?*JBJ z5k8}(yYBUwVZ**W(=31yG&Vx`bLTMp-yhn`V`9&cEp3g~b-LSJU9W@R06=q6%_xcf zERn!0>YcOC$=F{y9zVVU+TEdIws^KlWk;MD}1|Zyl!CO z=R{=gKIV{IykDr)(AWWUKSlyrMMr+5ej4+8^>hK89-Ml<%Ol-qWN#@}064$AKI-oc zXZDK&6t;hSvS(CTGk2R>bImTK0(Y$G^&!|=<&F8AezLNCL;_7wv~Ult8TCm-33Ee> zmH@Bl>dg5pUj^3kJqXh~4^0VqnPpe}?=d*iC5Y}@YK2Q!^1khv?^m|k75jD=U6J|K zY~YD6CR^m&9Tw|)qfgZ2BkZ5{hXxEE0WqtZs^cmOtyZ_HbW1~zS2A*A>?m5U=76u0 zhFTbaLWUF%{!?&aEI6eVis<(C!|A)Ud(jN2UXD_CL9lmyuJY(^iTxPK)d<3=R&s<&R+F&c||*^hllDicN%*?xq1!VHh|JFCD1x!lcozE(-(g>e=xnVzA9fW&-VAvl)Pqu?z=>^JYKAOegfsNX3k#L_L;ywWWGk^u`SEM$QR>J za<+JS1-%ct>;jjDi@p|oCmv&ccHkad6%yzRCb5}~>q{_}@uawr%wH@*A_ z9;e!@awzm`Gd}`%)dMKa7N{c=3CVhKUQEf|e|}*VYvE7H74Rm@WAbrgdj_DtaMVAI zRZKT^E-_)I-|6=v_Sh8}{pRe5%n%e&h(@1$JnL4ZevuQLkehm)TSe{g@WfRSjhz&T zg^XQrznq*+HkM??-@k?l)(5~Sxp@VH;*5kzi}}*W19oAyUh0Gn7U#jVI#QG1W^-_t z=*wANTr<<8b`kCMS>apxV9f7h`ss`PMHQDzX83%5uU90JZ1G^^!;$!D*VbECN2lOqsj;jb{MAp2O$Gk=eo8YcwfUrB>5dK} zmdQ^aY@P}6BwrsK8@62CsM;BRyY{i08UHcB{dV{A5@5vnKG2V9U+^$ZDsrg}4f7&8 z@*QW$(uG>|wPVT8Gb7m5PDGo$(9k=&5cleL_JRl6>+V}_%Om~IBwFP5t6G;wr+kSi z*W^Tgc=}&^M^dguUGkH9zzscbJ(7UPXY_-c&Q~%Z1!buu9 z2g7I-er!q)qg(&v1DwZte=zb(A6r>aD^KsbK-0x*zh6fBv@_Ln@b{)DOIdZLnxE?T z4PP8y@TuE&Mee48Qkz7fs*cFhW;({N2m1w4*ASdS^a1$ea}6EC*JgTitj8hXj$ZUL z0l*bOSt}0N$r|__PbWoFyj*t)>0E!6N7s1_Gd_*Oc?O}S8zWL)m!y|(fhpeUtM;DRSXWto$ zN5@H{U3;6hnI`hl(B-~2-csv2NzzR24=|S6FTa0ro7_bJ6lk^!#GHJTh-E86SD6e< zyttY&Qqz2&pwG#J5#<{G=Dhich?ntFJ&A%YKK4RDhjFoy#K&B>ZI@E$&>6o;m{Mlg zQXpM8A!4vnP+hsUB*B#N8=#Q1KlFoUy0SXLdsqLWQMGcB(iNmA0yUljL(x)30GpL- zNzJ?b5^=RT0E2thPb{$YEK-2>;J|V@BtGu~x7<_pJ)q-A=(Q4PX^fuqaJjjov~S;j z^+^u(+U3b8Iq@liee?wmYheLwQ(;|9#w)E)d+Xm)bR7F!eB{1hetqYI>mv0?-$1@q z7x-Mc11#<6@|(GPALD2vQKg2lmFiEFBEG)42*gW)Uw86z6MGP^AQCvUjHHsd&_WBVcNJe+XH5Npzv;4e>G+qazHHNP+%jQb0 zyLfzual>?q2c$uSP)>p4aC-Pa#mEk5u*A{ZjG@(bC4S(4>wdTr?FBo`P<6t*@Y|U zVTUNJu5PMjwiTfP+Nl4iWuEm8WE4ZbzV+43Bzy9ijT=u4nP>g?BAIQ55DCW@&QV?N zv#xuVOtSv__nGcn+ade6>rdFD-j05_dobK&J@<%F^y{g32!{SAFk7Dia^alIjhlY9 zVcKSlY&45>3~YRi9Y1nn7^V7!S#9HT2W?<%n`Nbw7bb;@CLuCLZf>}HZ5kSq;iQRi z`>OmVlnRlR*;7VdpBPw?xAG#V;Nxla?P|X0J6F-L(P+_p8kHXn54rc$bmO&;Q<*x+ z>}jzzDCaGQARBEjD6yoB{v1}8cG2d?HXC!?#t7Py`MSL)W{-S#1Q}!!68N&#>9_Vd zjU~91bD{~XU@|Rk-r*^aG?eGEy?Kod&`iQnak${ zWhX%3a-CoE(Pw9sVjZ2@-Q0=ye@I$fU`+}6k?YKHW{o2YHTLR31yP{k@cN@jy0B6k zl_@7iIPcU9$f4bLt>9~pP^8KYjg!*E_q=s6#>{NCKV}~6aBQHj?irlFO_zMuk367r}ZXLe43_kpM$bQLrF$zChd)EA0T zI)gnss~C2xnVVx{KW$4i%D&$9tHtxN$@4csZ6V*9wVS!l9`#FL+f$(Qz~@~T(u)R; z^hc`gGBkf1I>A>OTeu5f!7Q9Kv8sx<#dGAZSQ(3=Kj&?mGplHL+wlIpLw!0{2R|ff zXlj0#tJK9RD=P~y&sD_mSv*j$)Y-V{Am?uO9XYYFvv+RISXx^~+q?OzjFRu@iHlv8 zbE_|&yj{Pu_e=NNUSlUdc4yA5fTh=EbPqE5Q#^;)ACK>s_1FD^4n!UN10QjUVhtaN zi_p8&r?@uCR_s}BJ=z+&u}$<(q-~8*0z%gtO^d0ydaj?p`zIC!^Ya@hAxm9** zT(%5<>2u;SZO9oW;>weuD-fft|KW~gL@kmxcZj`HlsbyLK{J&H38mrl6@ibfbN;>T ze$H1CpeAx+4oq; z-d01?4Wv5NDIz!pu7MOjXv5a0i6a*eOXFsJbca}8PenkhU z?aPQ8;M0OZyWR6e({ExWPuif+d)J-*0TH+)eQc2oX1ByDkrmmVjQnifD!+iQ@RJ32qbX5@upzWz zKM$sE?!H~mp0de0XJeaKLID(l9J;DT2F(vrOhCJSJ9S^Lru6kV0MP(NmP)7B_>2wr zB@!LhpK+sc8}bodApp6oAN~_fu!EgjLP-j0Ac^~JryD8*iG+mCnu*+!L82h!#}ucR zDHY(3eiVA#6-4Gz24g+%ak>FS&7W(oSj~Tha=1&}*O;zhair|0cCcq+R{ved^?j}1 zN!8ehHXI+vcRv+9zoeuj!XNVc4dRKf+mpmk4pp1pB^Bp%TQlkuLr(F zD;^PlVB}-LYX0dW)9%&8|2?hTV zZ@U>kYzp+Kp|-9NPXis=PC|`0<3x6jXzXdOHz+Qz&VNu}7(Z&4I(kFd=i@v^p<1S= z4qmK(8ffJnMc`#U%Y}gWVD3+HTw-gF%7jsI$N>6j{Fm8|V@Sv1I#c-XOMRuIocI%C zm+?G`QfjfiXXP=FVw;Y3X+slvUeHcww zJ%Y&hj5K_CMo>ZwiHF60^+htB5svz6w#SG~chA99S4W-=`DENp?6#oZPO&?O=EpK7 zNgcP35yNB;wje>ZntQS6eKO7TXtKwD`)EWSv6>VV|PbFZSCwg z+EvNz3{>D+xN~G;XHtn_5$z!GZ0sfA$%|xBpX%i9v<~N{m6RyxL~LNpps3mp-18vx zn%`hq$t-9DODk+3>J|a}8k02tRp{`MRlwCGDn`(Wj8vzHEAx*Ul+%kkXa2!dTZo^} z@z8WEt2gM6DWhU#1ZSQl8&5QqlsU})6Xx}8)YOFUEy6gDOfedY%B{X{03nB*yx+OY zab4@d8SmI+g8uxDbq~1w()Z{S6&VNR{-ca#S0mjQBxLsK358pC0TS+s`7ytFbCkl_ zrsGDBcZ^8}?Im%7&&^@K>rd!{4Y6Gu-9*&t7$(G5uCP}oyna56)6BQ~ZX9iV(+z>**tqVDwgdfHJZG5E|uk&m>ku70b)-lOrlBYN4Wt7>KY zT8Q>lKg!0tz=nh2gBUHTjsV}K*vqw7z-4mw`x;?i`rAt9E0RB1=zTk@K}$3|1{=zl z5}GM~!YgYJ#u8M7SSKAVfYW*JpV?37`h~4z#^otEX5%Sm3`M5q;nzFUAitI~rW_{m zIMWf;m#K#?Rwq}|ro<^cc3Bh#E;W4>pzxWgK2kK#taSr2dY0fM%0i!CL%?nKx-Z_< z@%#i#`mKLl8y(42aXaIV!DYs#TytrhL3ZEVwXpF0CH$T49o|jrWD(xrgVPj9FiF{q z#k(QlX3aU&10Fi`M$rnTGVf^WiVs{Dd9$$)DfCF8D?tu)9P6GWb?YQeJiCWdPFkiA zI@i1vyz7q{d*~Jzv>l3(oe#b|9EAXS7>6mQkgHKjaWDH3&5_rqW zfoBd*2PDHUS8%#$+QJ@V#3v-@zUtZ9V|rx{R@4_4yBm8t=GjSLSX0s8^mg&by1qL> zQ23Hki~i}%>Qz1b6@{LJ)obzclPHM-jeb8>9;+m~2iqi+ALKB(wYN@rc$knY&s{Y;&+A+WhF?sBR+E4}TrtA(1;vp`8}(gg+> zhg^{W+Gg$rLL%V9m_cX)>0Dsgj)l;WE{>a{f&;ZHFA*8*=4jcdc8gR#a|mE@PfM*I zzIi;C|xrMx?}igwHaIV;)e!gk;#Q0#F0DgW1_E1-x#Hmm}E%)E{lfK-%bfWBTRP3~ynJVa`rU^~_@ z9nD0Nh>WMoQOUCM+UZm_T<%e_S&D4K8`?!mmDJR~a_sWbHQ`TX4&hu(mrwJiy^~d+ zu>>tyD2FAuB$VGcl?i`@@^M`hO}f z^TD!Rv9n?=Mvcl7zx;$sr+KMX^`6;hG%WH8FtGYghqs44lmLE6WCrgzfp6JUEFOUI zm|$5=&Vw>j1cb(b*1OnJ{#CQqS4jA36#5;0qgFu{3Qtu(BUMMWhug~xTl^6Zr(Y+F z`96wxT9Ks?5L$5s*CPP^LQ%8UrgYCC?N^FDhlfA8GU+;ez`Iw&l192Cv;y5TiP-O6 zhT3<3oEnr{ME^{Ee{ai+$W}Ip>Pv(U5FK(r4Fikz0M;h5K7ZpGF8Mg&=tUSBhZ|&T z@y$)Bk&?W*hL?g@aTheDQabF$=-k7AwN<12E9PvWr)jJakJ+CpL4*oEdg0U@qm89T z`GIGAz|<|XW}m-&@Vl`on-L-q^5xsu{K8YY6@DYyAF1>O7p(%NB5#u(nzS+9%pVZE zE_=*_&=y zGak?Soy2TBRSe|RD0-ej`5{kM1}aJlxe!?`dIpA#9@iTJ9{86ho5e^fi)P2Ih~(D3 zy3D8bcnUMG0JyN1!MO7_%d6h~_4g$~$sq0~Vf|z3f(w@`*}a4-;nP}jlx9eQp7v?f z(B@`dkLiadc9lAO&ZZx=L|loCX(6yWMgbhSkY5_+MLXLUvz>|4T(c>T6->=nQL`!a z3vtxcd`Od?QVBtk>iTMU#a|ej->q=Ccunc>ik_fr)iIY{qqISc$cFjIxa`V9iyXb6 zoXcx9>7k)B1fS@69r}uRF3iZ#%mj6fcr)3Vvg5x|RZN?&*x-IWngT7`qmMqGw+@00 zM#Fa8?YAjsx;XLftXV!$%AW+;t3~V7eeAf`Ut8N<_wL-K|aA=T9Mkjc!cbVX(HHO4eh^ef62e5Yb@HB)v9Q3|RYqe#S&7VGH*~HdS0cE4`TZr}>{$N8w2|ZRlNM}{ zyi25I>xO3j27R=)DQCVc9b#R=Rt&!XmW4mxm?{Or;(6xO@B$-cqB}-5tH@rWs+r+N z+c4X)w!xC2m%^NyPy6g24i7)ghS)8QO~>F%L0kS8#K|eDWBlph#~Xe zGTqv&j5lLGz$AArG7huH(H?eoL24b-0{_wMn!-w@aym6JL}v)^Cgm5<(7rT&K{e}k zexMU7OV_d&xDLP-VwBeBJ*P{N5t`c>FrOfxbtm}2KzkiXCZ9l#MtVm_$0sICzx;$7qpYYe<8{=YP3nT zhukP6Z{29pQ`=>gm%O(5G(&3WGU)WxA^ei*srT@nG-EUvALyXj)B&C(5U{Sf8a~p- zTG1n`b?7Vun3Fu4_zTbv`AlubQ3!=c8cXPbGt#P!S~n&vtvzgeZvX)oBn^3J_CAfa zAyfI=d;L+ii~0U7q0xTfd}7sqeg*OI?Zo7`ADfHZ?h@#a3|yyrJMWApy{_`t@##IG z`QI=#nhUX6nsKzjx4lA9zgp@9#qx6!Yj<*AkSuNUZoaJ=bN@YAin`A6c~A!`EA?Y* zrDF?z~hV_VS-<9M9SuDoCTRTqmyWqzDqnpKYqsl|HQ2woCBENmqmz|3Pd2#hoiCIMCf+yIbfaO!;)S1Z9VX3 zLW=E%W`xfwV$&tt+nSOAD z^VLuYxrJ-&&PtxKdpV=*;N69}v%UWP>WdAp;4Y8;2;1Y}?GB%q2hCe@@ZZ4+{TF1f zGq_yx^Nq>)e+6o!@G{L}bzUq%?1!KKCk$ZU66taT7U=&QM1%oND!|pvy@A!#s8G+} zM;_Rgo}L5={6W$Te}FcMT3;AShv~;q+6fR8T)9r#Ps#~$5f^5) z%&@bRQdeBEekXNCXI7c{zP|-1@m`n-k6>Ld^vwb-%K+7q-nZa3Qb4|sr0ouv=BD_% zaB)8+#gT&BZMtt|iC%rg?K=?}oZkE=**$U#7Eqhk{fxM2rZxGezo7huAt_$Z-@26` z$IHnF?&h6of<=`zVL3Uu?{A2GSHzs^!Q}qCDG0Ofw5^lgb-ssZBAP)+eHNd)vazTD z>X#MxR_P`BsdOL9zc_)e@yS(*szcl|nmU=31)wV%33&g~|N2r4A*C}>TU|BkPR~s1 zX4*cfjAbS$`5sAo%xorMVvCV+SM&XB%kbZHK!c$D4-~gko4l4AQm=vP8Yihj!&P1m|Yim=YdI6vbkbj7u!(f4M3Gc17B;h_BJ#I-a@9un9^}P z(L^el)A;*>LTH!G-YuQtGOUHjehSn! zuUO|(0jZ(VzPndm^7VMW&URgR_~E@2 z)2HC3JDDMmf)m{><;vkQr52(;L5_metaNG%7u4gVTJ-`s z!!Vzkuo+`LPSBkYF9&9Fx}WHTem@PFts5B`f9DaWY8Y$^oHy*pF}XSnzKm5?&pQXw zeh0N>l`9_qS-XHx&oT+^crSAXshZ3*Qn1v=-^c_%&HF#NuG)gJf4cfX)YSLJ_~aLQ zXu06=2@F}B2u-yoSj-`-#`41hln6Th)-JR+t*S>NnYsMXW`*33gIP4rm(upT%fa4) zo~qvkxwhJHsH(mWP*ou|;_P`l+I24kLwva#x0b^poSEPd$TPRlPe?bnPz0pW@wCem zZ!~t}iF;IGy^YA6ZB&3;CMkj*$xPt$Ww6_duD~$JbTUxA8D8;>{#S6h* ziWj%y5?qSACMi}NiUx`mr?|VjySuvv3x2{rckg@V>^=9(olHJ3NrudZt5Z!ReDhwLNC{I&M(RN15 z9DJFoXg}mV<_y6#4tBkf)1=KsF0jcuTvPkDEk+Shw zT95nOMbbj8WLXi~?VWT|C#oX7o40jt*ul*>%g5|Y!{ok9$NWXypvx$|GpU*7xf3=) z{5fSXPUVm&WGCzR>f6%P744oJ9mV!`7MLA71VX5|%rlQ$;3#5Bawi)?VEq}y6>$3# zW5&wfkk|Q=E-l#yfDP-z?^tW(DcPtj05F|$Zec}WS1Zgq1aP-kE#T_ckpd0Y{&BzH zc9HDDUuda4Q{1YYh*q%X$X55_Jj?693Y|XqaD$#zWmMsrhyp4zEg1iXNEbvP<3F6P zBG|Qembm;@a^wez$$+ARZQHX!yu-gXgjyI~W&&NR@)~tkBCqL12Tb1vE)la`=ZJM` zOnh8F;LlY3{w?%-h}$7^(8t5>#Tu;er^O3Pjz;aSP6n=9jwHAtE_*Hn7t!uk-p0w| zNI}|I9prGd3ab#KtI(4(QdAC|(>wFI-bdN)c zLkB_ptwl)}TN8F|2%I}f-^o9YdrL}MGLE^WPBW&okLcD0P6FN848pkG#Ji{+C9dG(_f;KIhv6FK@|HumlMNaXaCOYe{Ji&+%|_Vbz$N%zsnY}!`qwoG zf)e+iT>wi46-GV(%+eymU}@z(vka01Y*aC4yx4PzGC>YYZ3z_qBffe?Ta5I$kwnlb z`X&ztR%rv(|Gw+~a#%akZI8|s%&{>bLT9$B{p7P~23N(KKpbVR^91+=J&2E*& zEE2-IOLjb zkSDn+Ik7*XRf!Vc_W<^-go<~-er(5?*m_Uqx3~X+@K>k>pQxHt+UzqnTcNuPdL!dA zSIZ*MvbdO!xd{!w*dSlM{&1;w3}34ngUH`fLq8+tr5TDlVlDEamh#{37D8L%g((S_ z;Z<2%eq9<&+LF{v`Eqe~h?b0ZJM5OIxBBE;@g(qFw3Q0R9R~jbby>o(>6LUwg^M#+P+zdaQjD zP0jo_z@ZLP+xnr%@A2)A$KxBbkl{!Arv#bry=4k?ygauzzR_YL`-vK~n$Yh;(;``% zo|j=0c+&`e|M>G0WJ^OFs$M4Y%(_i!=enY>c{`jT8>4D>c~JVp^5>PqRJiV_n!j)nbk z3Z!&~c7^ru(7hDN3$95fpS$(i+j|nH-#w;0S{I?@2|@0pNeZMTqbNRbH!+F}mSV`f z6TU>aRcuv)@czcFcv=f@`c}WqIKYYbAZ*LF41Wc`XqL_)3S86J?9O11sF1`KO0h?( zrNzBNTAg$B6>1r{M5gHm*_4Dha6QrtQ9jjbZh6%X+0!=tdXpV`KBY0#^XGYmi;*1M z@BBS)yz$gap--$@`hXe6ii_CwEediGi1#7p51ww&7U zS_aUp2Nj~?1wDuMhjHhejj9MENQ0R!GMfR)kP)6s0R+fzl(dT_hBm-py!F?scL+u< z#kY$#UugV$k1RzddrNbDt|H}PwrM4XoIq^(w>1BZ#NEkF@&2GSirBCCPsWqy5){hr ziH?VvmDiizaPm{MtoVm%jAk~#f2oSb*hh|x1$G&VkpB=BL&)3`@3*XM|1>tp^&B_o>@?`X z($Y+|H6_ym!$QzRX16Ff3eico({I#o&&mL*!UNy-L(@W zJY%K_NkYpWDB8|2S@}lTGa63DOhs#njnsn zFaViken#>7p=;uYa=<-OcDF%lN?H#8$)W@~-CkGP!?@+H7P5vnqk|9R2Iny7=ppZ- z0Ryj8LFQW`K+Fw-$SKO5-4-Z3J^QxSCYy6VV_#k4yB5wvMu^)_Ua8*zCPp>%yna98 znu1r_xA5%iS`LycX8mDhhcR&g2-{aIb>uBO7i;JH(2ItkaQwS3yPf7Px;MrYQ$WvI zay)KH3xTJ749DdcJ1P#@(mnV`V(yJ^u{MCd>uG)5XOoouT(8WS`O1{Hiq~4gu%~K( zbTZ)GLob&{`g+PU&HaIXB>6{;>~puJY-cbcIrz;7Bpd@18TM_g>}jxqor>lI7% zmX@~1T1f1JD>~6$e`uvxhBQ|Mj1HV?xp&ynp|QN0dJBen!3dlf)V;yS>A9)yK;?H~Hb1wNN^eL#g8f%jTOX8lEDx z98(h^=G=vhY>lj~MPlZPC3@AjB^^o}*stF}h0LSH@xQ|LFGqcEG3Y5Aw>nJusA+sc z<$cL+kpSJ!%}>XLeA^}Vhl*KCf+ z4$&_|EZ{eZ@j4F2URqS>Y1fV>5YJ4Kw(%A{rBpIF3-AJa7FJmeYh_ioLjaTN_uw1PkgQW7 zPI|AA@e@DFSIKU0N&@cT0RFW8k_$p}B=T{GlWE6U@!XSBJ>Ey0;Ni0r^T#MF6yFiQ z2FbD1+^b#2t0r`Ne|S%xo4Y1Z64b`UGt%+NmNx-ynSt~g-Ec&T-ela!(o*^hb57U# zAUjb<%DWDB4Ll}~DweiZ2%}cb#;eFa`2g2Nmg<~ zK)Jbi5ZA;TE&N(-lugn1&mlqCJ7h`^xp+$XVPkzc5j>XQ$3v0PaYq^XV~!hb)XjE$ z;$hU~8y3X%K)hL+8XFQ*0b5l!L>qV|0@NiI+#w+NB}?G`g;~@bk6D0uVKrJO8ILfb zWlBxCHRHWwyX(R4--}j*d;)yw2NQf_B`$rLxI&<_-4E|HTRSa+Rm`*_Aa>+(aU7|- zISjSu8&sxNYrZ#xBDd|-74qg43ErW{zQJ&Z9Mz7hBSAp4d=E+1TLL`mSYkM$JLLB zpyh#y8C%nGFj%VaNgAl@lZjm!*lxtboN$=Omvg$+_#*6LYo4|QnGiQuuf=NZeLmn1 z0l(00ZF?nA%4!71T`K1IVlDdet5e-6A@N&B_`8h(jhg}5C1w_I>H>22ND_x)Qd?)Y zw$B*2X8#!#+}~2U=+^|_^OdZJjNkNLVJNv6569v<=~h?BxXm=KM(V6LoH75O%pR_w z?D&^F~UytIR!&cx!}gvO?fO`NYI5HE5f>A`4fesY1ZdV z`Upg|RST=+SuyU(^e37GDt&K<%<_jCn_VDX)YR$7EtJ=6*$PV;1_YdIyCJ4rH&w+V zie!`Q4yxSWWpoD0@nVKdz4HoUU}rZRv$LPTJ??NX-mIHxA-h^%hZ9~FgwX@oamX~} z=rG2E(T^AyV8}FAsI}0)aPwW2g4!ZQ0ob~V)*ZDBn40fo4Eh`uEWB|Be+S(RGzQcF zGmI4^H2GI;7L#gKUt<$E+MDMt;#@>LeA!(V*eKwwqj0h-UaW(yB$>PRgs!ZO2&_+o zZ4uPndn*R|^!Q@(k)zn|*2i3ysnB+=OpL9|l|-c%eO*o|XgJ!wlWRNG~Gw%q^7*E!SZT+FpuaG&y8!BI&IMR2^- zWEHUOA2DG`pAwUEBHHF)SB~<&qKS`tCZM@$1=sIc0=v3xoB8Am)$j{)%Thph80~AX z$eO@rcgqmr8yuy_!AroS^OLz0Bs;aO2IWQ(l2ZlV4*#y9p7(*o;;vdpt#Qkyy z#2s+tx%=U6(}g!{iGk8mQLwPidRd7*t;dK#$0FDC1S^&8u@0P(;pv>%i%eN=n=GY% zG(#Bp*sFW8EFSozJ^y?;K0k%Oi{ZQ+U-Yj8?QX!>c88`jzkdNR4>g`4RL!A3Sz=7T z+``Nxv*7op0LU2<0Mw>jkHJltJpO5grv*#nc z6Sz{w|JC&mf{C0gUMS$O6H@+nf^MhrOxJ5oYH@M?hgYB9PfGC2*bOBp*(pA~`)`j{ zv`)mnh1mZ$^MC%Eivx+YT-AgoqR3!;7FO=IFwSZ9N-?L&vuZ#5Iz$61GT=CM6GY4F z8~L|s2WE!?sY2?+W#(i?c?t=6vpO;aj7>0o<@))1`4EaaPJk?{P9_IG72Y#k@o^YZ6lkDb}zXi-U z;%I-&xEKFUE&~;{#;kt7U6R>f$jjv0qCN2C)Wo}!p^(;DHG&F2U^u1m?;!w(_IxHt z7N;?HV(;s6<`xCTQ^x&kfVjaqMfXI#V96L%>PGQui{^?wrTRVtj#1I#2n&tvsDFu1 zcwsB?tVb0;)$IIxRbb1(!8taZ68?Q=SqzYFXF_VlgalMbNf*4-*?OaXc?7!np`@6d zwMrG9de|N!#xF9UkpHZ@>x^yhZ-sKWBVvGej<9I%QD)%!ElD1__hb!arbpp=yVMy> z@iRon=CQC)E7pVUuiPQ0h@L0s`wE7>XGfodL6?Msxo3CkpwU+H#`tF3jp^~$LVu&L z5+4vb{<83McLJwP5p9lYKL8R;;bO8N?&1}xB6B`mYyT&7K-sge4cU>%pX^U1nRb&S zT1TM(;hLR)A3z{b20arwbnn)AFCkO5T4B+Z)ZQ?~&7Bs8pa9#5;6fJIXc>a4u@fHF zfA%hJ*px=Vll-zeiNF+Mm_VFj(Ha7*2K>%0o~?f^Tvijv>;tM}z`i*AIiijdjWTMX z=SUxMGX3hMu~eg@!4??WyIIf=5v7#@WkvJfDH3aBG1ISf+!|pSGv*7;q&axTJ|x~x z>37_l*EzN$c9K(6r1(3e!)tro`NQzY85~dew;d-abig~|!D0H8Lk$S>(^Vz_5(cBEr?%#imAyran=-Hc%iF=lu^6Uzv2S3M(v*$;f59 zP}j)b-WsPt!sy!e@Gg3nT;ry+j4AlaM4PBH~)B2&(DFi7( zj|g`=nGIJx@d5F4Eb;f}N&16};pAjQK&qePcO_3Z5cklY%!4PI`=Gwp)L!wH==KKJ zNcw^mJ4MVfb1Hi;>l^c~yiqCr0Rp&p>XIa#8yOGVHkz9ZIGzY(Y^LfF5vF`&uf;Q~ zBI|}`k$Gm5Ro<7T18d1-=t~ZzYwxMf#c10%2|L9qmOGi*JxKTyabO#DKScYHIR`TGeC1FpU#Gc2AY|KYTg_pwC3lMI$Mo2XO}-7C}~9}i=fLSJ+>1e`}Dt>2bf zCKcL!b1lp>RZ1Azgp;2aZTT!q1hCy5{-q^{6cB0kqG6ud!<@9LDBciK zrg*YX<*}Vp2yjA@vF~@xZ=4WB-rf9?I_m8lrRs6va?-oW4HQ8?^ckCt=qSfsx6>F$ z3w8d*P~Wz~4+@-&yy9c}(%RH83maAGhxl6mN@8(+OiKij?HRn4~)=n+AvaHjcv zv-eDznM+1f`4k_}$NDXkhz7M=HlRS&HnCbCbt|+|I5f}^#@_tXfmJi{Ho8z&%2~)J}Hbs8$ld8<D+(Yi8_-e-Bf*1(fR0Ys(#Db50Q*F98)o3Bz|;sO1!v#?m~rQDQ1pPJH>VoxE@^_pexbsk+0w21uUr(}Oua_59<6 z!I4eo?%Oe0lL2>6Q>zQ9TRrj)`NX*15!?;L@9(EZ62$IvUgC@Y=4N~PWhd|8=aV}mYuJHi(f2{Id` zBziOQ%o`lLU2M3kdrUI^AFB6T{uRaWOP7lp!W$6i!$@C8;BK(+)90o0A-t3a)khI! zaui`CX3!V6RmTD)l@EV%8E>1o!LN9Awbh)exQGW?C|>|EPPt!M2{J?hmB^K0s*O#e z1AN9p;SW=l%!%b;3S`fkJ7^0ioAT~t?s)h0LE@TXv(;H@)UCNpdxVW-b%00jE-T+; zM|nWl7joy7;=|$(1OAK3+uX;l1C^J2d0F4?D%{bcDSiJks4RrENx({6_C_v?+NuBU0-m)U^+meS^LEf`O*5Q-^$ zbq(q(b8Cri)D7XP$a^yH&)9y(b&&wA=zu5TnB`M$Ihiz)MTch3k(MyP=w*jQ+ z{=aC2uJ2Yw3^QvEqzA~$MQv2;= z5y3Pr0@Yv9V@)$CEHvjzYQ9sovTCshVLb^wS(cgsk2~!rXdF$f;Kl+>b4VpEKIDSw zh;!ds&R{ugKrx1V<3|_uaLw7f+0~ScfywK+CFlABGQpbwJ*yZYY$7@9bjnLmV(LC| zZmQIAhaYl(+@2~2&3sXAl1FQ+kAI6Z<;vrdRR3kWO~j?uktDNBwvAo zz3Tt;-VL44pd~l?N{&zbtL={Fk8xzF8T{JV$}3~c^><|-cM~GG~TNQO;49&&+d!<>8%dp6?M0HG(MRdXjKR zOgf+WT&fTt#ZdE!*Py2G0eL|2>|`FXNXJq1>aW{`bDEB1pwW`h(XW*@Ta!Yw{lHfB z7k`;hEhki+wu9wlk5}eVY(H+y~tAh9MzscKslwi{B1@5VTk}!@|l9_zP^BE5=|CVt!a?gwK^Oz>#^pFuSp9ZZ*5Hws?e^5SK{D!67C}Z6N5&F1remU>+Lgi$vBxwxTX>w zffE!sQdH!&dGhXT$ITTnh?q&HDJx8~zav4DLa=v8BEV#&WvIQnfK_&K!6B5@Wk5=1 zckOiC_&N;7Hg1?`BU;dA7=cwy^t8Opv=xSJ$mJY@VVYg=Lpe#_lw%%;|6ne9 z$|$06)ne8`){i=D`Q|+obkNa1xdqc_MRL(J;lDAb}(S3Hd=ETU>a&? z2ja8qo5s^0!mqVYlq^yF#$Vp=Y;VY~vT9W_}nLJJ{ zmjR7Q(Fx3o_@KhRreRp>^s2=YJM3)st zPu|%WQDcSCVTU+32I_&?o;%FFeEqfn&=E5daT7vLPOLj!xj+DN~@ASWDKpp$s zO&czY)#!Anbfx6;;GiD~5!t~nOEA_|7{Ot`qz;@NDY*TuyRf#(_IAZ~9U?8#wIf1f#%Q;aCp={QyCxediQJAfzuW;JMQCg(xdSH9#R&d8WQ`yNae1B572;6U z3bP%;bqiTnh?1(QkUlTKLdvstU)OHSKuZ-x5<>XlTQv1ul}o>Sb4_-BTBMbNPethA zzrskr|B2{abDAy@v^d4Cf$Fb^d` z66P6-87XgJ(OC;%ccgEE2c8MWM}C=tGPAOoH#_4XRC|~(I^g-nl2L%-(fbj1SZFe$ z-8z6Yy+g`cIO?5W=6>kE#Vl;XNWs@#tTTbJ)8H#}p!~J&2;tx{ zrtAvtcq+8j=`NgFZ0%&0&R}eub=V76#7>RB@9#5Y;ybq;Aax5F6~*M;7foG=^jj`V zLtaWU_vWO9S{@I@K^&gh^Hqs5LipZ}J3idGVvjLX@7UOC5q@DR6n-$oUphw;UzS62 z2$D{&AdK!jK!)7%By(EF>=uf&Y-iFG z^%;uy4ax52&;bnT9(k4C12vo9wHNsnrU?8vN}a`L7nY(Rd-3N+ykNYpKa!4ob*HNrU_r}0a>b$!RvxkyFjL^z_-j59;xqIs9U8Z zcQKp}E8;GZKmM)cP8V8kjooRq^%Zn){A^$M#!C}lXL<_q=XPH?&C2v;qxyO8IJAF* zW18nhs>k3$(VzHHc&?FxzWt%b;@l&9r4VuM`A0s@x9^(WW|kls-j>GwTo0NR)*8h# zV>F$XpLBX4RKF@#-iEDl_K)-&{d%aat^Kx8V+VEMD=e~1tq>S&BULm`V@^e?M(E%b zYg6ChjX3sO=Gz=6_J+@PzWLpL%g%^5%<%YVWV_B=qy^WR@^Ub%4=ecm#aC7SWb7g3dLd{NTN@y zx<3VQu2TI`z`$2vAWuab(@S74?rd`|;K23NUu#sGiAnjgqB(0~?~;j~wAimJsFIE^ zbG$U?>~`b*N8wL8#*D>}63WD>H0K)s&eyWL=uL4>=ajQy=L1dFf>xk5sU7W5jDeF= zU?vvKi1YBESbfip*VSImWxodPyJefu8~|1Ek^5DJ_CQ!~u2IAfMfcqr7PRIm*5UcG zrOoS&_sJ0Qv>zY`E@i!*48M>!6X;zn?bP2;Gv~$aWbUYg;PANyL>=`Uy>^{A`D{Q# z`oCaTqqy4oCMkOS!-9hMx3FyX%G_p=VwI{J#nh}D2A%rMo`o`!Z~S`B7S9bXuQd0} zXm=N-FTL(|O(%Z5GJV$3Z#p|1k)vWLP(m)SA?IrOb_S2GDzEwlgbR{js}C>Scj6=l zJr=(QRZ0;P7Q&nsY~;Vs%!@(EwDW!2&)GJjM{6f*uEz@SSKFWYv+uNLpO~tcAg{B` zGGKi5V|PJ>g0?sT;7PpSwIG(s-9lIfpe68)*k+l*)MO%B9*f7FNOxabalCvp>)VE( zUTcN&czM;aVQ?l^qTSA;c9ZLB-!itDN?))+^!|%z=LrhC zi%Is~H7krAZwSWlG1Yr2gzg1Y_I?QqX3lCTPa%|(CEa&3EzkI-Fg_7R-lHu~XUnMFdzn_T_?l zEO&t87HLrCqHC4-uk12gnU7WIxJb_P1fgfzaa^>Zl#7Nb zIAIyxZ9$gUb}1Z^v^hdg?7QRoffrDq&!${mO#Dh zJ3jyHOigRfAed8mF)$Z-Z9PuRe%b!!=H|SUY(cRV;NV>s&7Pz{TBsQ8VXZl!srY5r z;IKntZ^!aq8uzx68SeB@%JtKgcrjrjjVYUib*XhBRjl%c(-Y!-Q5mF!^davOC(b) zgHUdl!MAR2WH9sOB@l6p%vrYwGHdLIu|S0=+%G1lKL|GJqqtl$B;r3c$dAuehwKor z&Hh`AE$nxL9o$BP-Vi>N<;W5-FaMDZ9UeM0`*}na#}uUNiPNB?bE4UphIGB)D?KAyt?%1OyaJOSRpl5)l!1=j0a|4NelaiGGhh0$S^O$1AJ& zpEblJszoUR&})I$rS8|~Jpkf|ox478XYv*Mekc#vfV^69TfxltJ)8Q?5kMFudHXFA zaB5(Nuf0|j)$tmhPot3J{Rp=W?K?eL8-dAoD-x-kSBR~a$Y;GP`5xx$@nvJerB%!VO8%fIclmD8Q?j#h)mz@`O#e;G+a-# zQFv2c-P0i~^VhMVJ=YU8{AuYul#MaW>@V%(j<@gy{W17Wv!OvMjg}uGqo^3?7jL__&^BOHLs}g#{Q8-<6Nf34GRjh`kMCb_PR)$1 z=-*d)PHmHiIVq-K2SP4!NU58}n<9l3^?)}sF!TP5*fEfdq?>S#$whmfSZE()Z;y6Q z<>14^+L32L9Rdg2xrH3txqn};yK6n?NVsuFXJyJU+3K z{(S)b_MTZjmkh=!rQy827zYg452YZ_S)O#Co-zdErdnLaSnh*U$8!c(>=S1SZ)B>$ zM#9{Re}=zzZPmw*1s0Dm@*5$Vs!Wg2JoTgyr8uKj6pwA5g3Vby^WuFaIMt3k)HYiP z^c)pkRE;k+Qask8=}9wJH*6{lIM^`c(OnU~|8y0OBLhSU7M;$i`Pz<)>TMz%?XvBiv34u{j|>?QJee_-TE0|tfnE1yp6^>^XCv&#@j1@ORM|26KO|s-FJ6d zs7YKiV@mIBD9IS^k|${T^^{oyrD$azXX+lP7{i}z=w5T#=31zJ0^K@ck1Yp22^!K7 z=-pEXW^j@T7?gToJiuq6|DJk@$U&Q60nNYM&JAA*H{+mB7)$5yXA#{4b9+1DMK{Gh z*KzeW*M1IozUulNIlt#m*ORB0D!|<2+`*rxiI8ySaKGeX#R)yPqYSJ~Plt7A`A5c4 z!`o#eZ3TaPgeASdn|sHX=1PdoQd+DK)P3K&g)q{;|8E6853yFZHHnH9vhj*4 zcIqt<4Y<_aP67AFYnF=*kq&N&w?Y+33T~;{#@I{Ds{T>RY0=6fgo*J3LIS`XRn%Yq z?f{e-HaW#{$ie2K>xiZkCxowD%7>B~!g%|E%O~=^uKxL$iZ)(z$vCz-bxPo~7EQ(= zY%N#XEFj^=bITT%s}EwG8&V@Aa9^uGWV?%S9Xsx+9W?D10Vlg^9+*>-cr;uea72Pp z2*@9Qn|9WcEdY*scV+c2x!cdv9kyl;?!xt@`s}vYvb&W`cg)FjSj%0?2+(R7KJbrJ zQwv9StpfKjXK+vD*bfTAa$MSZgMZ>tSolg0N+kA8+Mgx zvC840|Knfda6tbCeEr-7R6jfxGk;Rw$2asJtJ}o;Gs^)gc7FuHw3$@-piVjfsNyic zs?yDsxoLjx7UqEpS-ELvrWS^0p8)F4rs3#!4JzG#95^)x{%M|Sbpw_>azl6lkB87^ zbj5?w;kPQA?@D>*_d{j>f`$CJG#Tef^50(lk&c>CjnVkb zTw&g@`&=#4QmSf`k!zuG@NqrCsU|{dm<0b|34Er62MNMaLbJb{gftiY8hNg5iJx!C zSyM`!{Gh`>Z2J+lD%+h-HBHUzKM^{6mIBt2ShF~i*pvyFIa5q7VP%We#1K=A;TIsQ zbJ9s_)1K}`ah})#NsQC8NMv`;gtDK9ZtrJB7t70>A~CtWbgKJ7vg{5?xsZ3VpTa_i z{F2o7^r~MIQoOf-_SMS8uV$Qfjv@(qi@kMeJr&13*J4e0ae}4!i(wG-l4jCn*sqE) zvqM`_4r4RIP<8to@I`l69H;f?n8U6L6@lCo-p@2{TN+S;U5tG|)_v_}=Y8ckVdfjx zy*&)U=wZvzJz}mGAmX0il+(~XoJPO3_~m;Nc3I%!Jwwq51{+?`z;`}P6`BLuU!eI* z4{;M*y|yM)!p0K&G8^`j)%R*6lLi>4${nj2n}qS(RdI4bsr0{yguUElONSQ(zQ5?K zZ;r-wDwI`ar|?ozd%ch*t;)xX{8}AhQhVlZPw2>L!IX(V<%}2$pj#AT> zl}jF+lC-1Taa5Yogdlo`nbO4bXxnL&iOIk zR#{MM00JEU!z+cI`2LO|(osZ?fE3UEd^16WpiV)9xk8!y9w{I%@6|lP% zNjD^ae2gKGm6bJez?qpdXMtvGVUFamss=?Ok^oV(azIQQv3Csu|t6R>REOjg7jnVp~5Ko*~!hWb?=!6lOQ6P{&BV+<`3S(Ow_LlrAjnBm^%xlN)XKzv;AQ-=7)HP+Vdp7rgJrybVdJ9Dr~FM9`; z@1M&i|Jrlt-ON_)Ld~8DD9pFBM4w4C-y};~BgTEjfZTqaT3pfrC$yA`#}Yjjsa@=q z&kxXKo(0dO2=BqsEwLH4zb&u1ndbh@2Hq}(T{Mj2q8pB9DGsB@<#;=0J@6*%CG79C z^*BmUrm$5(&blo1J84-ckhdqm^uktMWB^D&wt?_R%kb9uUBrp8;3aEMCj$gC4=rFE zO7_SxEu`xn75+gtV;-R03t9JT(Lm1t3y=)IT#1Q99`E7XEp+k#A6HuWCcik~XHrXt<{kyv zAdw>j$C&$~P#v18>)_Z_4LhXRq-AQ!?P+4D86~s*&qd31pm`xBAPbK`@@9cY%&LHg z`-u6)Z!(LDaX5LQ^(&PMX;~DS5>8$>heQ7)fWz#k%=Y$DInvaaAtG7aaU*z;9>PVw zRGF&L2fbucH&1-!Hx5ow73|`c7d8epSx^QciHVQqg0xTxLr`G|}+wPADq= z5&J&Htxqh1xIr2HZEs(8$cPxn+3I(!x%k7su?Az5=TEnhSTh<(xs;pJbOyoWrcN-d zDe@&v!im3-IyKvHN@!bx?+~~3F%L)fdSn&W6kxah?)kD$Eh}%YqbO1<6&3h|cze~u zR>Bn+$IxXVDSVfm-djB>xtvSl(>tq1;WDFDUvyW2=)Mof;w>u_^T<5~bS+8fXMG_L z7I)_``&q_?KT7mf@-59gXT7wP047_q0XfZEb9=GDwL@xfM$t@gjU3oL3}aOB0r}$& zM&!$6?FF3fv@bP=e#mU0U@x;h<^16s6KUK`383pTiMj1w7V9V^`_RPMEi z+82TU&)S#&BL%Q(ZJ_*lmDbII9`;CH27VH1W-KOC+frAqCM-`9*F(CLN*x+Ii}F~UY5$ex@cmD;$nLVM zL-y%xSg8lTL!8~n=T7;ZU9yP}=rF(L%`Tc=*HE&Urg-$)ke-87HhCKwrnI4%thAiu zRz^DkOx^rw9J|7V5!JHdwlaJjNoClE{QL7+x2#Teq+nHS55KtrMnhMBx60I_H0w`6 zbGy=EgHqw@Jx_S=exp_RPpc-g6!#GlV=-zx#K@{2B+8je)OsZXbj)+#Tql+A zAbn>#r_Ji3M_0D_H}_Osy)dri{f+c(iLsa)JlOXq&{vTwa3d)z4K2a}IqU4Pj$2-s zn(1H$%-V>eS7+mNH1CpIqzsGbHpMV%sfqI?3QUNRl3gFNZHXPdVD|V4Y`5gMPFg@Q zJbHDWd^t(hI3{rQweuPU(I1oBeP#_5vU{z2lgK;7ubY7I&d=wXZ%=O6)4UVB?kYT( zfukp{m!t&QEz8B$z0vB>VL?|Ff4!c2W;WM{n(7>LyCHU|b19VIp5qsDmagW#jU6Mi z5mbB<-@^ZghM2d$2;l#Qki(5?{nP&d{Xa+=bWpu>3*jo99F)#KMdBrN~OJIJ2B;Ab?axnFL@E8z3=qqO?{gQi1gsE22z^D zeqvOS=+NlM)+RN!8KWeyPpB^G`!HoNC#-B3wXZi;`EshgE2m}OkzvrlxqbQjm`Ql& zV8%ovC2$4l^^Zvr>ID``qr<#Ot1~LDLowfru9FIZ+ksH0!kQxSHgcm0Wi6he?k+Lm z@%#HZQ{=gjz7dQR|2sWDmw$xl3=_o_Id|Wlv{ZxBg;ZcATT1!TV=lxkcz6X=AV=r2 zwSSx&@0o?`KF9|RpNqaXDYVw?d%|TtmUW1h7>CjHJUYCVJx(VvU{KO z$eHyEbNp9=M}wPjVwvaOm}(o_lS~cShd0!bga6qD;AOCVxE1OQQ)eV)4KwHq{@pxUD(X1tI)EtMG%gY^xIq5PpIc61 zHPkkLb&1BaFLM&fqos&A_uu<2q1fedg&()fef0E*`hq;gsCammddr?Db|t!^zoj1J z?28M218l6=9DR3)&_TWocn0iHaz;teaJc@=F0Gsr)0Img=cMnlLSv!XY>RP`+SOUe{iGC_wDU;_FDNG zVu(J+RqrEc`o{21U}xeoa6|oESvsoLxyM`jq0r=q$?Myv4}+k>gIRX?##xuwCdXs$ z$&1dM--BX&+kuL^JN52b+^1>H6`6iMW)d}J7_O$W)4-be5#+@=16(OXza6| z?kF1i@u4d=iD-*a&PG2xR=n(4*Y^oSL|$`9#-AgG`0&Oc3UFslyrDBB@res$ZMb2p zbfEWM`Es88T6hCVlLtG3QRm5lr!IMkk?8fjh{4{wBI9hmI$2~k?$0=j*1+t9Yv>g` z$`RWn(ZXzv0pybYb$4_pt0mn*A1e)x2j_jg6BoILzLco((0_v)LFQ>VN!RUOziQ>O z@ljXoh<+F3Xq4uc`6thEaz0y{Y~1MjN=;~~owUPm$2MG%hAc2J@KHpGDb8~pB^wVt z9bQ&xnzLS0Jy0_RxB@4n`2=uX%~atZiKlmOFotb3Hc|{*WP@`o4KpEIxB7hy)ac%; zBL|*0Tc7*9vEx-Cui_cxbkg5hap1a}2Pa?E42CyZ=MuJ*9bzrVVhWy-moPG z(wz=}MDM+1W2-SgaDZD_=}^)i@^0( z@p&TBb>j!1r?0PKHTQFdZUx?oyB_P-{I>MFaNtd<+QmTa@gQ!1clIHD;XXfiQD3j^ z%gr6KZH)IZpcNa#4mQsdBA>4|-q8KnxWm6gf;KxXeX}_sQ~vSMs9eP%>jjhF&wZ7O zo}OHHcH7NY%l2*ST4Db3#LRO1xc;d2TWQz22vrU9#zwLJ_5Q&#$k$f4D81^`Hqb59 zJ)+0!K~S*$^5V>jH6;){s9=TFx`^e%s4JS@#}=7wc6z#maWnfwtAhkfBS<2I{kcIU z1Xf=VBf6}N=7<QdbZ&unl?3h$*Fx7kERE z${9->SMx4w`kQ5G7d2%jj)@fZ@4>(RrtzL5dVxtOo)>#u>ptP#(!Y1Qt#-e;_R0{* z;vEjqE$am>;VXs6(c`LyZ60PsHH&i;s(Z>r-$4HXceB_zMtr`+RCR1T9jon|pJUaz zNhh7Ai>`>QR2hD+)Z!k~i9eDsSQ8N;ipZBorrH`0=1oVY5>+2U8hXHk)3&FrraHOk zoI>r_{uh#WCayNE$96P6yKf`e^ah8iv%t(&`1d_nnj<<-lV>>IwQK;6V1#o!r#WO; zvFdi!oJX-Jho4+Gnp3!CGHV1IMvs0U$$IeTVY1AV-QD(3GZypx3@vpen~nYYzW#(9 z`SqJ>A8>pyda&sp87?mh_er_R^f1SnbQO19$PKbbSm1)381uc5v>eX;6KdMZ^;+T9 z5scL1RzuP5U`HV7I3L&w;?)t>PgXhJwxJe$o2_0cZkfyK+ZgA>2&3oPdaa?9l~QE= zOC@0vhCa%9iS@)0Ntq}p_x4--*d_D>iu&0J5({Udvfp0Lca+259E@N;j8{AYrAAD% zZjG1?eMy_H_U2yirQHauNvR*QJvGeG+8lv1@n@UyTSv|4RQJ4@)I>ko-3bk(`A2A6 zTf#AR?GjiHeyo*$@++a@9m_o~Zgi>uHj!17HdZy^%EyxpdmlW<9~9=ZiW(YHS}T~1 zSs@MC$H85%DAXBrOFQ(-ZKXyE-KK78G`t*M(yH>uPMS!RnQEY=LQ6Kr(`zjTA3Avv zN4tG(42gE9VfI>MVx6HmHkd5NeP+Vch$NvD#F+6tl8{^6eN`DeTh69%@*+l9GM$!` zw#ZtW6Ek;KUDiK-KJw(Q<1JrdO0#de@+J^v!O1_}OsJ# zm`5G5NJEdNs>U#%wmEoj7muEZXm!$wW2@vhz;ScGBYK|WV;!|ps>piQ&j?O>kNvB9 za}F!A<&-2m!h$=V`W@UaK(Azv&e~e>peZ()KMUdcnaY4!kEpV=; z;$n>|M}=8bJwJG?=_Ah(T^yH(YKiYtq3T8e5x2bx2r(BeCmVzFDH(D{aD!Q)OK~f! zqDJo`nHn?EN)DT0F$sFgw-biC@koXRo~D`D)4)2|>X!!hk>sCIz!8J@e4Qcw+7^Ai z=wzAJOGvq~jbg*$V!@lh)(cp9Wy?iT0E-bGJt`iqzsMsiym()pXLYCP$;Biuw!HsK z1(MZG)?&J^eF7wo6LCV#Zud%nv_(?{>Pmbihq2wk|>7$go7R0AVcMAm^@)_T$2` zCcO1Xtya>fWd1$1-h2HR9C+Lygz`K{b6kHrQJF7_#G(BpzIJ0J9sOqUd#w8mgkU}h z$@a69%_016;-JGjq{-H37OWahmN#b^A_X96dl=ezS9Bw9-Ur&M@a6eb4{Sk{bB0=J zp%l(&|5p>Lav^P_ioUicNIokiCkv~(VO|M1`Gg$Oub6k5j3&p|R;imD?f2ye zA%}#@?X>PWiaXyU|0ZhAm4c1xm{)tIB#0lvJoJRV2CAusQQ_egP@IxtOk2KT#JGeLs*q zgsWX{nuS*bpo3T+L!F6gnPxqro5V_sIOCJE7iAf-2_3A*K0Cqq*NqCcDByEXB`L|+ ziY7)r&J-9b%%Ha^fvD?PyqNZ|))pax&#{_<6wuGKz6dBaTtF~OP>e6FraV+eu?fgX zzITanF)N1UB_&&D8RHZLQF))E`$z+#HUZRDv)dvSAJ@`^w?${ucJ~kB8!??*S@Frm zsD!{kJbA8t4OG>b$o~*te2~`4&ld18g(rs3vc=_LPa5;Fzzdj+0|TnXSrqDM)u|J? zj+s{(`ettz1cvNuWh2KKb*~C>bubMtDFC8H&I!V&g*T8!tk*i-GdFeW#{9!|y`QZ;>gX!Fp_T#jeJ9?oCucxfLl ziY^YBxR2J^r-vY;YuU{Squ;?n59zEi8ir45&MHsMVvPLhKJ z`_|_O7&@nf;D;;qODYvVv7P-IZ`-PSqWg71T#XpHw=6e!BKfG_?~K=}5uwHUM=NEl zn0eN~jNCZTOHHr3F_n(PaarxU*SBeB>Lnx?Ni2c7gVqB4>sFqlq>Fe9Xe~y=l-Zb> zSm6iIb!Q)Kl=BdeqIFo+GvoLDAtjIVD*CaA@s^t^hq~l@QF&ECeR+)9?kbWeLW+85 zqg;wn4sC(VST&^zr?{p35*0}ggYK;^9;trCI<7M795HLBrqsvAg8Wh@GZa`}5-m`0 zJs9lIse7GlqalMbA6S?3JGuwwl;&dF8Z%<`SlY~hy>t}y^iccE zd5jcc?L<}$Z@Y>*L}PYRYbIb}4<9D^A)?FY=L!9(QT%zuo__y?RtefFm5}K=dIgV@ z9i8IMzpli8$qmYpu4I|d@Vtf!$!z)g8Pa-?tw3BviI?Z5*)p0Fy29g>*ULv(NHR@D z;1xfClDnaCQh8>X$iqB{9lZmLuaQ;*h8z)t(o;XV^$kI$E$&HWHts!9$f8Lj-| zjSv4taOlLj!9Q;*K2a zFqcu<&YFD*B1ymI>b{V?dWFW1)ZvJ)+SHMVWA!Olp^db|N10XfEqZ^NVH$6sQ$2U* zPTR^4fs|pAQx)H#F2wp_Yn_N*{lReI{CCBm{j}eU4`Y{?bGIbtApH0y#1>rP^Bw_a zW1Sh(H1)eWx`y-P9LSfZir0F6!8fp(9W~Y8A-wiHbV5X%*p{HNiIav(yxuj^HqX1h zJGUt!(4G5Sv2AAd<%#4IeP=D;kF|4q19pfy^;&x+PbWr${dvC7`BJ>}dwS*35Ba5B z)kEf|r}IT!Y01_K4aB$%`-67r+v6O@9Iowo=vJFD6V=jH9)0O}Cqxf4J_f-P>6cF; zsXotb{N@?QHfIi^eb6bfa+`5cQC!MAsRezG92c(Fl44?88MRVF*vr{u9p9q zsM=6?+~MuU?<>jN|H8!U}`FN?3*o8Y{4iuP#7;qrz_M#9Du z2un;ctAgh6lr>YOx~cH;^@~TL*-9nvQq^J;!RF%& zNJ&bM*YjD~>GiRdC5`MYpD!$G9yvnVZQEb7yAtBBtWKD(qD=-0EiGgRK1OJg#5s?p zuLlEo?_8d>^Yfe8Q}Mxamt-Dm4JqizH{s@?pf}#qm7%f2c@v5~hebS-1pMlf#}b80 zUv%s&&VVV+cJd(hM}Pyri_Ye1`u3@SQZaz7sOo8jBU@%jIyKH8t<4{5|lP8?5CNv771x zRe}2>Aeyfcm6CLUD2t>0+9_k_T+-nfzi{8dmy~kM#~8pKLoxolYh6aJ+}aVT(I7Cl zsFv$a9<2mRhtyt1l#P!KZOUy)J)S~~ZpCVovGV=e22jgOIFVsQ_{4JNK2$vSBO#`F z3}<4bW%K>}>Ly4BJHlpKrC$x7TWv<Rz zm&9|?0fW>L{%Pfjlh;?teiz;A>p!o?Dk?69W^7vTt2QjI6gkVcK2?mH6=EnqWF)YL zb6!{uSXS{GCDMb3C_cXUmW7N)Bg>SdRZKI>=`GObW?OQYg!1Y zW7dPo16B7P+Jxp}=CY>(=Ufe~gopd0-6O&%ydlP6dG?q7!mG=p2ioftj%~6uLaZmE z*-`|5!{ScBW8P9F^O$Zxm<|O&p}C3nsiQJOjDk@AIAqOwSz$u$?po(cBW45h^R%}? z0qCl27R&8c;Q>6{{KpfJYX)w~2Qkr^vn##fit0K6ql}mu%Z4ctnq;kc4Vkeo)l|+M ziY5+15W6s@Evg%>1H*KY-?^__g_PbebY&`bPH$Fk^xzl0vn?Y&?YHR_s@vDo!e2!` z08`zZd6&#(J8wLBT|X@sebPmco0r z%fsvZUVV|tT=C{u6SfQ+m(s(F7$J#mkMVZT-*j6K2SMSg7 zR$QXD!geN5vv)9X`}i7pU$5|I)l*#LtE|#(>n#roaGJ{5jff&!3+vd4XGwYqI9|k8 zwF46(Gn|QIhEb%tb!-3rOJ%IO3h)sAu?}mw?OTxF#yRL86xUf*lK_$d&7d{e;k}~g zYb09dFsMd9>~!U!K$?vIZT95URQkj*<(jciZgnc1eCP78(<1}LnMUdmjJ{@TUI?W1 zwh#EW)K--%8W_5R4c*_AYf{Yj6Lp_y35=d!>My7Ml@{;<%WYaGz59|36pT$dJ6OK` zUqQ+dQM2k&`?S7fL!dpQ8G%x?Y(4bUOo??3e~u|`On|q^2~4_Wo0IbEyjM`pu*#Zf z{Y`Hg`f|u1bvW|qlQTJ>iqx}t@F$_{yvvZL=|@f?Vv*P@F%aiTEpq0R-FS0x&zIB3 z)`b&|Ggwv*;_cDFkPFL0Kh`kW-|XkQ7knYIFP)Y*gU{_!YrMB$(Xwqf{{Hdj{oTjT zxgk&O`s31x1q+v~IrhIF_uOy12o83!QA_#Rc)_9_$Ig8c{jbU2jqiB_?cTZAs-^zg zZi(GdwV!>hIdPwFWwi{{LHN_pJA1^$v$`+8nu48Odv{JE1uF zF6u|}sXEfwa_K;lN%h#i#erQxJvP$ros*C4btC&R6Juc*wlc^pNOc>5nyQ|NJ9Mb5 zRgg7-(LJ@}KdS^VEBpGQI-T`qDb<*3hP}5FR#(F_jTwv-ji*er#rn%LhNc><8(eA* zX9nAuOk$d05So?sq{(D_slSQI{5KQsTXmOc$4q0mx<^C|!x?U>QFUNgS2ey-D(q8) zBV+-7@TlL0e41IYQ;8s<y!J}G!N?MIdkXGmC_V7!{SI4d$Jpq}g%HN8NqxIyCmXljBpVI6FTvhob zF&)}_7ZHxo9DE%vCc2RLv=2L9(yvIV7&05>&lm~Qi;rJ7`5|U}X;MbDy73WPzUYB0 zspSmo^v-|8_88`GZbXEq&IxZ3Xj#Lw9V4w#LAme@Yp$(v^(e9u8ENOE+S@UAkyXtI z^XZ>|-}N?}uPH+mOE4-%@B0x0mz~NB88(Ax+jdnuzjw8&g(6YrB+T`RC#y|B83MO@ z-?xLO+(|TawH#5o8`n2(tV60r(PYVg@~Fzq7ExV6z2#i0h&bvrncIdcnbN|HyW?Xf z-;rcE%uf<{|J148f{Ae;?*)7QQ$6f!3 zntLxLVOk=~#KNb4O@+E*=P8)pqt}>7+i}8U@ z^kQLUZ$c|L3*{7c^(uuE`2lX-JN(pz6hx(b=1$i+a{EsBhYLq>)fU;4(#YAhQo~F? zA$s^_es9}OwcdZ4-&$>bz*|6`NtQHx%!2$dQ_mX4W-6;!iwFk+){I*&ybkx{*6mq_5! z0UuhwgY*JTo13;|Wrsa_fRDe%J8Q!S#Z8GP^T4e5JYlt-^Xzy#tjA`SmLn|bPe@M~ z|9ZzdYScmcI4PqlL~kppt{?6pA0+pX&pK@ls}2*)#=U)f_fQ-fm39R5^lE9|JA%i-+A9f8LqAR5_`U1Nbu<$neW&NqY5x^EhE1n;INvdl&a84hpt*|^_|qbSU9OlaMI06Zq`FKK#3z={15!qDAA8v}2qMXwV%EU29B;#G?v zB5zt>Wi`%PenrZ>7*sKrBEz3jD((BnHp&vrR5qkaWhN|u}g7n z4h~!{myTvld2bk;@s7T&@p!DX-eFkI+Pd*)~$My2MN5 z_lJU>?#lY$mqW3et+K;J_7}F{ZMhKICL|NA~gXXtXweO&dMI3 zJNAk-9+KNs*NPJav(nySEm~7CS$e;O{%Cb_{WE!^Ezeh~r(0;6#|`;J`Kaf(Q~hv7 z?BnRyl~w!*Qm;Ui`q}eY_P?ywEeCN8845S2*Ddj*l;teoT+0hJMzZ)STCjFea^J4#AN3@$iwg`fi? zr))7f34(5E?*=Ve^;fI@1pb0A|8?+y>*(~k^zQbn6m8+i{cO>UvmZZ)75ssiD-3D!O@$syMh+1oWtlq4JHrHI0OfXT5 z=2NL0a{+bwz6V*m*H@WohipMjSs2G6nly|rcIP!TaA#*l;k`oT-P>wN^da>v8B`fJ zJXsGT$k3>c=3bVn<~gYene=M%{!;j} zENW2&LPW1eqh+#Wsc7=Y%ewHWsp$!euTDDMlO>+A?~;egM3p^8_&%28=!`zKcV2K+ zy_M1Pj19iSx9_1mC@ht0V)T5_NoAg#(voU}JTm>o z$d+eo)Sgokts(3%FUCFxNpZGgXjq50g;gxYnuT$J_19IBMJ2Ov1iLn(3X$uJZqlR4 zt)#@%2)cnH-J;qyrjXluU(N+<`A&c;{mT=&hPB8EyR5uPwE|g^w7FOIFzfB)Vnk|n zJ_hyH`hx8~aQWwCn*UvJHmNt8l>CR7>bT+rseaJhi!cHQ6iZ8J zc)}w4(L~_{^%j zl%-6l!rQp(JLnlURxWi|!@Oo+e(Ajpf~;xz4dFWdLo<)#^ThRT&g2g z1)pZVT&YCfSM_w?AdWex_BMa|Lq*EcFUj;W*j>+kqq1A*s9@|(Pc&WZ9n3aOGpVlA zOr|mqP1@ew;#Jjc6rxaykoMzc!E8g(vC=SA9xQ791J?T+W_$gBkKHXD#hu2O!tC^^ zH0A7W1N+`}m=x1{Y0Gn~5IN-Q$*S+~(k)1BLQHG%=d9|3%$5V-~5c-g=Fm zVML$Id@%XE0;Ed)BJ&>7oh&MEeWKcr{?m4-(0cm9@bnQ~hqumpJ@!=m>;IP#K^PDb zgnadDPE*&9XJ?H{h-=H}3xF#DL#!zTB{F&z7DGjGH_Efu!4R z+yEL+9sMU}_`BUkpxt-}qxRQ!3y!a4{G%^_A^uIF3va!82~uo5`VXW73>DDX*8k6R z=6Rsg9`i(Z9yi~JTd&Jbw&?ThKDGE4a_aW{X<%nCRC4ujgCU zf1bq3^+bdSxP^*t-Fq>pGCr7btK$0*6ndcnDf+!*xO=r!*g#D~8FxK$c{ij|2BBvR z!$Iy%#2*bewj?`9lOsx^{f^F@l46vF{fNwN%a$2CFLLTDQ0K4AY+iIiaoDay*|BBS znI4y0%cadjx>9LA`YUR<8c$BeiJ!@D`?^$M$s_Y`*0Guh3^Jc+SIXV+lV<4-9bHD$LbuU#dy=S#2 z6y@D9wKcdZbDpuJ7{%ET-p1G;?J+-(w~`KGu9?Kn>@~cKTO5E}5`xE|r$m$Cs`Fpv z3?vOaxmrGc&pQ6BYx&)_bA=)It>lBljmDMbtS$(ONkXnJ-R6P0#^CaehN`{MdyxUtP zA9GCP4&54NN4&6x)h?X`?m;NG88iMdXy3aupUTd8nWl@2RUK6#%_Al>3-0F6eLhQl zKPWYK^E_FYv;>Td?;O~6`zMEwCH{HsH#>C`xF1jvX1&e(?p@lba^M6Q{8^t@65a39 zvCsX>{unKz>$Qqn47CUK<2|0NK@In^KUNmpCah9}$gOomM`iuC!yqD`tQ>qLt^Qn# zpNdsV2NaTV)d-sGe==g}qE~%2qsjk_dcbE<-RAXgk2a|H4=5E`w{8?(qD3J2dZY*{ z;YBGU6r?k*nR5AmD!pLQ34;T@G}PoS5Ep3`vkvF8deuLc`O=F;NLSNyt(yL2+7UHV zpFEcp{$h?_{098j?RMIyb^r5Me|@+uXTkCFR^Y%>KZSe2%`Jc_@A|f@^yIHgblTw9 zxu>le&wu?I=yQpk*%62RYyX)9&`vLHdvJ1OknJ?dDHE@~;>A*Qfs%LHz41|8HYF z>;#ej4|0@g0pF{l|6K7i5e1Zi=`MAye=+(-QM^esd-O8vXI66AyFVQeG|I8mqWXf| z^-fUap8YyMlX^Evn$NPcV-lp3a9hQgOLFIz@YF)Ny$X_mS8 z+U_0yt-n}t=ji7BD}Mcc-I1uFKR5(vrZsZ*gEe}-3TnD#@~#%-n(b_@M(?LjC)*BH zbAu32lPm;;&)>Lyd6dal%1jPRv$pk@eC0F8o4nkFRvqiu{hZ-1Uruja?{GOj-by~R z$J5b7+C}LrS0iW1uW)a@B57So9qO#%E?BUQ;&Ae~+b@)6-&r)BZYhg7ZSEvK&`@7* zYQ28!zV3SQmVlmR%#p-_t5J`BY3;V*>#@@y{S_86E8nZ-QkJ>iSkHNVRKi&Q!Ry8= z45H3;s{5FrcpuYIzpTl%-_?W9y$RWWU|-kEUtr) zE?}1E6iPaji(Poud)}`&$Hc!Yeuw_{$Jt*upM00jXztWr7UFyDteuo)Cq7hD)Z*+% z!08GzGr_XRY`prG@&UmY5H zmLZ4`l~Oym|GI~%_0{E$>4x=KlVfF4j(G%G>K)P08mBy|{@}rnD+k(HlGI_CZ`kVb zUx?atW})Kbp0nx)W&o1LHjcnZm$Ka9CMfW2*8t~=7kiF6*N*=d$Vr<1Ui0)9@j3RV z`WJ%}5`MJ)9y@6d5oGSuLl)mp#+Fx;Ry?TQ{;8*~`a{^`2R#qQ1)kHttk24v8;U!7 z=9_vM^UV(ryd0%gzk?jDBYxP**UU}qe)fS;L_eGo%RJngeQAx_72cy7Bfc)dnPb)! z>-uX7J$OYnpf+B{z!45tPL{8JTLRRH&>GWNMx?xRIg$tV8aZ3tVt1~SzwoI}1x&p* z!1l8IAWdfizHr9=Cm#NGRqMKf?xHg}UfaCfgh`S?q4P=GU#51??psBNb9AOp)O|1& z87Dbxm!BPVCIdqU_YJ&pszk{pz@>ywF<%t$HcDlzP=A$BRTpIv-SS>|%PV;j^I_|c z(~Z|S;UcSDzXkGlnHUI$X$lTaNAblTH0@~hX#G!1uy9G#!sY0``4NlCBY=3x)2kEp z(APT=*V(qFd)k!}B{|eL>|siJ1(+rZ@nBr4|%NAB?D$iIn^7@u&D`Sm%Yx%q2~&el_52*vCY43=6_h`w(7x9&>xaX zu6$M@?N@baAipivn4NxRVKuI|ZXPHL9?|#Q8$X-pjvWdpuT4&&gQqbtaT`fVd94@F~$DC(M;u zyhjhps&=;;#qE9adF9-02=A$zURkOqMfaH`&FzI*S-PfS!`9zUY*}!(V8Wii3!i#m z+mo`eo|1y%j+!@R4tm@$$oR)HNe8gvlAU({$oaE`g(Nj2&c|4A%J5rfz8St?!eHh8 zD*?r66`T``w3pv9?JAAMQAzNKaF)Bf3G8-H3~urBh_A=n!Zzxw0uY4s|+bgpi6Kf0vX_X#K0EySlMA?5EZnB;M#v zsy7GYxK=#8nw@g2`j5$wiC)-IbPHWQq{g$qd zq38^R|29nIn&CA5AjPj9inW0hWIFMuU6puA<`aowXr*}Qr|lf^+ZL+|^wBmOY&%vA$=KS%9RzavEux6+|GfEx0{_DSGAWn>Q8SIYi~8bT;pvLfC|o;G2~QW&hPUlIOT3C zW8XMEZo1})`CdBpt#f_IKKR_DcwbzxsLP8|LEjC_V(iO;XQjTiiYQ99`Z*EVhGjy* z$F5I3FyCjaGADg}00ooo#y10suR-8!PmYDPFaiA z_C1e&Q@6_ONlyB93hW zXCOWCg3J39G39Xu>hvDG(A4gnTg4_Xf+E5vTq7*N#5j&Yvxh({<@L{!ibs%RlM4&OfO*h8jsCWDuM&Id zp?@h%JeGydIs^71OUeuM+Iw(A0Ufa|LFu)&?FPgRT@7=s?@Rh$oYp#m#YS5vws00d zi6?6qIlhk@!wR(NYiqrY`T3KeqC6sZ8|6O46;^6H})l{5tc!TCmuZ|5L=$ zoO**oGXx*+W&o<00oy0rr$I_gM_&%*KNx8{XeV&RVnZ}B<0XQpzv7yLQ;QG85?bG;t%th_9hlEp}yxA`2S@)AEwtB zapLIHXis8@9yp_>lOA|MPzaApmC)?O0^Q|Nw^u$GTJXNN$U_jM;Sm{A^oCgSZofF{ zmf7_-w5tJ@$tja$x~16P-qpxIF@Cip(i>k;;Q%oQ=MNXXl%+~Ksy`Ho&5PR`B)=H2 z1BgZOIB#OZ2Ny3d;pG63ARnF;ihVk)dy#KOfM;d8iL6>P{QU_f`w7s;XiE6S1~&mv zuLDeFqV?*{sKJr~n7T-E=_3{eD;Im`M?1dP;O1xOr?+GbpM)ORB_{5V<7U3OeC+D& z?ID#YLKq*JnF0d-$gaRZnpDYM?ATvGoOr|61h<4Gmvb`cu7DA0ejDdhx4@pB70b^T zl$!SX743(Yjm_3{Dn0o60)R=m`MrR9Z+xAe2wQGqRXMTw*lU|IBm*Y&ya)aMsaE+kFD5%QG#Gq+Dr#8b0?AO zD-3Vno^gb1qh}#`7R@`oJ^fE6{x6ocPx=Ek~K!+B3BuLrRWvp)1gh?~h zGNWmE*D-^$OPxnL;%zoMh+m?B6)S@9n_NTEoJN*cdir+TH(iu}u6IrRr()=l`!S&d zS)k7}De8#w+6S9^gbMA;yZp$Xo+Zrak$tS;Mm)wOmCH#!0`^*J<&IW)0B-LsVAph|>rL3ZnX+%JkEew2^;g1T_{>5v? ztB=~`**y5rnql_Wx`VDlTA=%+wPhjSc8YMegB;~Sjc)N%Bn1u<)XQ^G( zjdcg^6l|@~t@&*|UDjxt)02Z!LqH%m`@w=l)&EvCpxQmoj8hdy?jgXR>Qyccn; z>j6>G(H<$UAF(B-7Hfzu%I^;D`6+^Lw7w>^hbQbC590gB6{ndtXLdP&H%)YxjhGP> z1m_D}7q2b4w3$^FI(Tu9!C8WhL)MNxPZod0#$xgD3s!844(>}KL_;Z(4!3bL69ebP zx()&5@IyGFBM1=V&4qCQI39t8iVp4Web4+qBX?dnCcGdRr;h`m3btkBt&f|5`0^!x zdTC8@2Pw(bCeM>x5kip^W;GoFU@1Q^-F^9ktNEqcQV60*6ufV+uBP*|WCPy-6_BQE zH^$~Y$xO>X{)p+AGgEo|QHvvX(od8%msbSb|7;5bP?D3m4mPt#wXv%*VTklz@iFq3 zV6T5-k zz*hiiyc5RSvoIXLSsOz00y z-B-ja%WL-#J?W_-hEQPT0Qy-9y2y`N&9x8f@3HTRQRh!w7hHHMX|D`U0bY;3^=h+q zsQZ`Wr;n}4YcKlCr4U~&c(rU-QRU5ZAb;y4Pe^8)+R?6BpsU%%BeOa3Fj#~P|^3u5CE!=$8;c{ zg(kF{1^}lx-Ng6$B!oE2>vs!QYzKr;$GZME8zPrUkFQH?zRVA^%*X@~L#fuVC|7`i z(PE8$sijXl|6&5fBEsp8WXb*EI&*-eSoJ&AgoY6U&xN%UZXO~3G)E5QuBeg`W4}QLn1Wv&#qvtk+=J?t(lnv zD)%u>JqM$I(7~VgcbprBnE&V0a!|(sJwr!d+f?9kH8Bgtj(S$fU&*NmyC8_*&oyZr z+A~gE;1$DOMJ4iU9sFA8y|G2bnZ)jm>c|I+^tqK2JCR>{iO%$kt{nU6GWOsXXpbs` zKuCD}%ZazmOPVG_>de7R0Nl-WqX9JH*;6)>uv9IxByMnDRi2#k<80WDL`bPDqSB-OAM%opjlsDc60&l#`GnD_a=4n8=ycr76 zDA3Y#`jy6Q=fqSqCgZP@*+CbunS+^Ss88cz3AS<1FA%SVL9ndExG_WUvk`&-L!kH` z{agm)?;0S)xcpbVls!Ah=!GuCw$!w~(USWs11DU{~ z`igF^esIP5>C%Sqxw?2DN^jl^FKaydH`ITk!6*1$$nhBYpb?TvU|Ws7Zg}AjI9jM} zL0U1`QrIPnIFeVMzV4qjJ(jbynp@dadKdMei3s^iYg&W&6)xv5fxoAy`ReKnow~t$V!(dhU(-K5|GVh(hE)JCMGZ2zO|#PsJ?nh~d_wJ@6FCVded{NBI9#=WE`Q2zL54(v$W(ivTg*gKc8Mo`Fw&G7Rd$cWy032N@E?3JJKWWOl=>XZM@@NP( zG2-|9&Ada2DIFE>TA7XzbL-vs=dy@&&|&-YKZ}dc*)Jq9CW!xKb?M(t(dGe`dDl|9 zBYRN6Waijb02ISm)n564UD~Ifws^(u1;vJr0pXj2QZ_`=~=8P^K_+a zhEYb7C%eMPq-B4@TyXz^RlF6PUw~St$1$NES?73>pJxyN4D<=8I;3ZY_>8Y-1icjG zt>yUE*&@fD9-YxTGD2?Z_S6HjjPQce3UCe#|3oWhEJz-j;{=1qd#oDift=~A>p8>? zZ%gAI%(?zdF0s0JWh|hY{-Uf&mE>@qylCcU`7)5Y zM~5^0{XbfPCo@>F*q)l}r%7PnO2DPU+?hb)2t^BfGPk!D11^s z_i22I!#KTX{h^81{&kX9QI+wq)VI|_Pi^?me2sX`DDNI6`0QWoL3-uOywAG?k4hMXpjz zf}kIDD*k6`JA2O}N?%RGTq2y<3vin9hBC8(_W@IBWn*FGOpnbjP8#<2_g4gVt{MOq zWUC!2jM75?olq3gWhF2vXlbZV6qJJO;6%uD*%&fzugbzbhy$Z$Qe(INg3{)e&qQhJ z@wJ&7(qR-}Nz7V29Dp2ki-%hfzpmyf0O><5GXT@Bo!PZ&V0X><)>8CkUoS6?Y5L-? zNPI|oDbd9X6qZ&NR-IR4D(Vjx9JW9AQ;4sXsmG8>sf7QMaCu+rW#6u-up%I*6lUcI z$?{4Ow^1)|Y284~%us{-L*Tidw7*jtSxvPekPgorVu&}hftWsANA&a>k13Zkt$xjL zH!W!jm(&r<%3w*u0FLuY8%b*S!~uBH7~A?hGY~+e6%o(wMc)x)^P%!@6Kw(NT5C2-Y^?F{A6*;7T?Yd~+K>VM!9U&KyZmFD4!GGocokVX2Z*uS zMOjprIR}3RlF}yW@K5=B2IfXvBIItZ`Bj&N0j;m7z^EkvW5XbqfK&l6C@IrwOq=r& zRX=y`XB={%cM+v`reV&#KwaKl)&VfYoY+XOK8hJ~5LYvs^ooOt8>>f^$IzQ216 zHy?&H>Z6SKCl4qIKNG1w)6QEi|yBr zUyA#6?AZAg-j&YxPTxtbP0mfcyWljF=(&{@u3uA=OLDGm$&n9zzCa#cu^s5tG*~hF z*QUy@TS;F*ii^Y!S-zdA3GB)3IxGBbZH!;-=ewu>fQ$-##dg)*G5K@m>?L=B6-?Zx zmCxR$@_s=KB){-qFn#vFxNjG7?=AWkQ5zvXEj2bVHlpvV=Bmk6Hjt8^S4#c-M^+jFnY~C;813Nz}Kktdt4uHsh#jL=3VxL*J*mNQMd#NmPvQ_xPRF7 zcJ+$AS+#58uUbXstywSU7CHOd#!O(My>aPJ%$=C;yZZC~cHc_=8v2?=^V;?`_YR5m zZms=wcKMv$_e~A?;L{=t%o|h8ztl;FKGS=3*}Ha4{u;lOu2-RJfkWb#w_N{cwJyji z-2BepH_LN;*5|tZ?S93&e$861>ZQUL-dxFz-+RV29_BEH#wt#pPi^~e$E3b>&E5Db zV(zN%x0gLWyd`JfhWz-aJNHPwPmjNQY|2*gt)f@C6C$Ihe%mzpLhidw8@C`P<#?7K z$YE;AOFn(88#ovC^v#s5p_%=2ALbPwJN6~U{JQz8zwg%H-m&Rh-EEQ0IeQ=;Mwu`R z5qx19vEtJHHU9f^i*!0%}<{or;-yQ25OC)B9d_!+X>L z{Ed$6Y@yFrU&;@tO-f&M>dwZ(&8wup@Yb*Io_Y4)>OR?${U0P&pOZ%(J#09zfaMpj z+NwV3lKnRd!+~9lH)~$`wMhTExo+X~kHEtHM%?f7uTo|l{Jb@OGs`AKP1DKp?Dez# z*Z+RIw14_O!)rJD`p-u0eCM+3=e)O3za;OUm*}>>{jp3B#ba&>+ZtW%AODt2?l0W- z_-#z>bKs7)Uyf_i&5F;Yihk2ll9XD8)qKD)Bm$8!6-X5{%d zZG#mKz?1oYiWD!O5q$l2c~raf&yZ{%)BOAD&5cNAp4sfMYJuw2J0v%mJ- zE1m0q-S+dnUVm6B{i{ln0o){qqDCtgsnUq#FD`R^%-^VA-ss{lHWkS&a}t=g^0usb zd{W9|&YI(?=VcKUCL_x;=LbtTT6FGgVBtSpH0Rr$Nr-l$LxKR4daQuRBA((624{}T z+3_QdLMSLOsn;@ySf6EKGn~4IZ+4P&6w(}-Ch$;H|Lws?d+W^?-n(u1_HK_L!a;jE x1@3%#=9&-BbnuJ}Zyh5VK4h~R7mEM*<DTPmKLoe?J2dc)I$ztaD0e0s!|0WnusT literal 0 HcmV?d00001 diff --git a/doc/source/_static/print_df_old.png b/doc/source/_static/print_df_old.png new file mode 100644 index 0000000000000000000000000000000000000000..5f458722f1269a91cfe38c1e5d16d26697e23b22 GIT binary patch literal 89239 zcmce-WmsIx(y)z&1Og<%0t5-}5Zv9}-Q8V=!6CT2ySux)Yw*F{-Ss1TpXZ$H{r>;B z=EtlxZB^Y}Reg66C@m=j4~qc{1_lN%BFrxf1_mhr1_l9m|L*Ng5wfr?7#M852_K)d z2p=D=w5^q)iMat7m~dch9E>v30&4%MIM9cCPnOrp8`%qN%4~0nN1hjgAm$SiL{LEh zMcJCF^k*Ie0W^MObvO%|&oUGg-tdp_RODkED#6Ie*gEiA+)iCi-A=hyQ!gLKmRN36 z5?$I)!MI;i2|wj@%7abaCw58WI05a&DX^Hp-gCoCdr?;CRtiAWM?`>m8jrnbY~F&= zTUaGa^grD0zAD(J@PEbxBY|3n3JXXLJ44sEuZHdt2a|RV&|+efqZt+Yr1%N%1Ktw* z=wS5{$LNP@io`@D>;tbe7_c*aRI!AQV9r2FASxSrAYUH7dFSrg-FvC>!;zgT=J%)E z039H(kG}r9dCZxVP1GwY9S=ryuV~zbIVD|}%LTtT{cO_frQMTNZhqkI?1aF(MDi*Qf2?We!F|r|-|2rg^=uWzTWXWR_2|Ycl3c zC&;X?(WD9|O-&z!zm1(8NE-hxgG@OZ8ojqqczubVV+WOZGL_(66LGIP<(lIzqnL2D zEUsTYY6Ck=tZK5i!!9vr-Z}?R=cLJ;U7COzEdYAi#YK~ zO5>#t8YNTbsHrF~g44KR8+qJ5KGN-el@%0GIg`1@S%D+>_8X^TkA_HC>N~r^W0`A0Hh#SJM!;A*Duaf|ZhdBUZ5?4VO zZpvT$J9z*&3!kPm7&2@x_qjA}kAIIeRm}%7KhX@^y-)IR&mU{K(K5}Xajw8HJpD4A zCXBv9N9#Zp+kn;_JS4c&5Dxdod(K(E<#rDEfT8DYLc#(`7-_f00|>FFLU3Rt_FV*4a>_g zTPbrcyFP1pQfM-CPx8v*3Ji$+4A0kDpeOzV;VTglMs&zvAF{6KkC}w(gyDqdgzqEF zsKE+d7`n$5ZS!BvVI3(Q$#1~wz(HZh-3Yq<*32nnW5~Nd;rc4A6B~G!IG1depqD6@ zKAqsF{haGkTW>wD1aNZVXa#ZkafP@0dU&N1NPl>Mg5jm{;#20Emt^KOCR`%;_?als zmY**R=|@Te?~*VD{wxwT5)RU8v@iiy6o(0EpwJM0Y@}pVS=2k>^nnX~q$(JeFbX0o z{J99wAmYF&DRK~aZ|oPb!5s7KX;~_nG+EA}3H=d+1ln(j@U$qjp|th-D+6EjuML*< zl?HYO;)c@VT1nY>38mo+u;o!5IBLBYd5`nM1}C>Dw!OEfw(EwlqMM0sWwD8^a8C)x zBdH^}C_F^mBmA=Vr`V0n7l{@>FB&XHKqz)fdlg1xNtamjW(w)Z%V^qE>gH^i_QLks z1$`lmiSqg)M=D{`JVl!8BAOn{5VIVE6kAU+N4_eXCasdOQ501mn=_SRtF&EBrZFSN zpx}^i8hwDoP?Hq6C|he@>th-}BZx{uVXfYwDP6NR?d^Y1(G&9WK3G1`!$`v5 z#DEBO!-%IBq>rW7V34DCGE1BLHk)O_Vs>8RXr?g%6i~`(p8U0Ea3%RrP}fvfUZ?yR z;TV3C6P79YQ?gCcXADUri;9m)#HrrW|D+%-PBTNZOfz+b&&`{YFTJ>3sa?#Yt78Ip zNV}RRx>Jk~g)dJhOeb|+4?id}c#V%TD~S!6S-L-^UzSu3<;DoJ%$>vK2EB#Des!(@cy@;b*l zUJwwZ!^Y7JXnuBl{s=AjUXOgUh@)?1+1yuywTaLrVQ3_C+qG> zob*mQSnD19XZa41W}VC@mZd9nNXCgz_XIckrbF9z)VAWcJm!&W3v0Qvam%&O*wRlA zU=3h>eV>!Dn3UYZU(#;f#!(L`AM0GnH;^@`>qw|5U#enTuOATOxMH4ZYAH?B#vMV& zFizS|XHH9?{i;jjKy?wx!L_mujnL-(=3qyU`D)wo+Ns{$SeHPTUKc_GaD&mutDEEb z+uRg2_xi5ZES>g;7s^Ls1Z#vP);(ZFn{0Mf!^LHI3(hId1|oCE7<^C7 zV6kX{vWq-!!6n76W$k{zJLX&I$n!YRyA|w%Mb3t;5!7a%Z)ajOZtyy+R1VN;oZg;w zSTFiDQ)h-Sr&>l=#~rg>q>Y!$RvpL6;|8Lx(B<+9Rd z>+ihVx17=Jc8hnid>VcjzqGsLagTf43G!D#B*xk1=wqh>^18`B$UmFRSoO6t2%8h` zzGH`JO84b(p&!6@eQw^nrRIYkCmk2lGHxw!Gr6v^n0INoovBjdD~B#87J_|^c+`8v z%w{rrv45_1p$FRUm7X0>HBJn7Ot z!kvgMC~cOPu0FMMq=}{{eX00alBfvg`;nRSgQpV%n6xff#x>q^SJi^Mv*3hzI2Yd< z*SWbA2fiAZsXBN7{{$DJXZJDK9)4WF{HGqA6Ig9Cv;8}Nevw(l>Ple=VKZ4)hHLD7 zCYwGm*W>i0B=e-C2h~u5$4{O&=L?)}s?(cb4Lb{<1?a1X8gGvoEq#0##j)uTB``4X zR1~nBh4ovA3kJpxV14^)VPLO=3$QS^v||Nu;QdvC_3iub z*VK5pe-*JeZ})i-37{r6i|X%m2f zxiY_rg@L8rTOXXvbWH4jmH&T!`CpHJ)KvLjO-4G}KWqN+%YSOJQ~ws=4}t#H)?aVm z)Wr$QPW@libHXxNLPmmtaf6BQ^T-3hk5WH)BKKauwpX)Nv$=Fq-NKV+cz%N=NqZl_ zG_=7Z4p#m(fJkQAgFXqcPK_}M=w%B8c*=l7heK_8!H|6Af!>OEG;EA(HF7ene?4`O z8lAnmQFpOECOG|ms1TXjd_u$`g|Nngu;sT$W;xHdq5+ZogCY&HbmiKjr>HXgv)rA|!Wa zO+QLF2`6{t`wta$Wa3)OLkSB3acu}fUWLDIX5K+ZasH#s2C+2(6pm2WSFAvsLbw~g zA18d1`I{n0ng3LEKy*judwB$bbej!pMz$!_2N=B)m~6ydy6>|8oC|B~58p!8wzaQh ztW1S?KTRm>nNzf1bCPcSYudiyL4>08(8sO5>$GQvc^CSluJ5c+l3(vZ(M_1@pF-M0 zgrb|%`<=cOxGa#nx7HY7nr=W~;&NaWnG*g})!gr=eYp9c@%)q1fw&*67NvRk8s?hj zFm&0k&t=l<98#|{_o~bEVjcf60ik>unGj+8f=cO}hrbj=0iKX^~=WJ zIGAKZmRq?j$H)9bZ$G)+-cK_P6o@@G)lBXxuiiA3v&JL(3%I%UM+iUio1>d%-8gW5 zuJiLR(gM-sko_sr#Cs?N1>6YZ_P}{!-NT3{Xe_qo7K`m#c4t!ee|>Yot3_#yoPN7)P0 z3jKD0vy6-Chl^B6QbNm}>K${r<;oUW`}q5KgXq%q8tg!!0UjNS)yc&m$v-DG!{keW zl=i1$3wk#K4FZ<*RO2SVm821KoK!yJhhcyQo-MH&F?w_$fzq_@er)OxQmxYQ z#t^1UXKKN#@lB+?3~fz^&hIHhlU7k@~j6BnNcEe zYq2G61{ko*6RR=!8nYT??*=X&T4cvzy4a)!yc*X@HUab>usuL+zM@5 z9EUV*|7h>cLVgP5ZsFk}U?X;>Pt-sw9^0$~@kbT1ur83Na!B>et}vRm{y;kpybCit z%&8vEno-FQBOtrXJW?p*3sUN{NHKTND*9HQS{1MUrxv;2lR%umgN!^%*_zZZDMr9n zR}{)QpLbh0TpANW9ZZj6+n#)4Ug#$@E9nK|x#KUe4jxwl4!%J-?+NGP6DmvQrGO5O zuW1WMkjp8OTBihtQy&5isagLVyH~b9w+7n7RwBZanz= z=f&#}GYV%B(%E18&L2YbcM0l;uYfsKjeIewsj1bb+eIpcg@rzlFJKS~;9qCphiKj* z$o$Kij`GZ zg>9GphR+ulKrFM@o?Vl;JUT5ecnb^LlLwLZqGR;~1|@3ECxtts$*(0{{hg%*e=6~} zAK4&`IFJy~(OFOC0NykNF2+oC*3a6=@&pc1nuMy?+f1?S<1dgla@5~i%=K$H^;@8W zq@nREDe(X5qiL*gg!Xc_v81F#r_ENYWk;iakzT&%IHL<5($!3Ew+`Bv60=^r)qFyioBWtAZlS<2EhM z&avIh%uFdf9~YWAp2)vC@dCU*1125a^%}IaDIQw7aQE2O?uE${MXQ2`TMh$b@djht zcrN{5g8&l_5;t{|2cr45Cvve0W%{mDOIP1yWZ8KlE3FWV+V9`>Ar4Eo+HgE_%UU`$ zyt@*b8YJ$Xi7NNKlXJMY)V@{fSUHu}sF^iSm@XlnIN;A1H4zpyLu#k1T9T{8gu8X~ z&4pgoaVunKJ3(LmL9@%BvT{LRBPlVW#hV&j`P*Nv;UdvIp%jRUX1Xk1a~;Dtf0|1D z+m9wd0XQU}!!YHgkx6l7J!)cCNQHIN&GYRg9pZYEkJ>gY&y|gjPpQ9CwOXlhb|}dhxHEw-T1gI08O4l@Oan(-W*Z#L*o_b!7L}P`KpR90 z_dA^56n8Gc>GPVQVI1R?oPoXA6^M#og_3j$Z4Rr{YOtmDCvR4wP|9<6I5!0aZHkYoV{Vpiltp`NvPkXC2 zqVYR{45(&*FVXj(ttVI4%a)ricJ|aB zifHqYO`@(?R5ut?t6uDpC001IDaI`4>bTi?@6F4+{BAKlb-9|!ntO*6{u$Y(@TX`}IxZJI^f#vTJ1L+}eQg%KHT>&iR%m0<21b?@MS z40!@1_3V6%HSLp5c9YcvjfM670yj@21Ws99n4U6G))@I$~J4xpBk zhKoH{i71b0xGN}#K^ruAoc$QQm`L(hEP>&b)?L<{Y;Udlysno(+f10(YOSD*i+pHR zvP0#iOZ#qH!=Go#`(>?=EAs<0jfAuLL!_}wCE9N#z4xMl$h>w)sB!dtHLosBtB#Eh z*#^ooHc_nDx-BN%zUu?YXdDAPZ}j8$9a>8kNmbZcKtBUonGvy<#%@Wqn3D=V??`_!T@eGn>S+yM4%=j7fLX0$yw2OD(O>#B0@yLQtD^3@ zluGf*CvNwoD`P>S<~si(T2QaG zRbQ2%jeWWfL9D_2gj41l-#9w@{o#nz4@$k=bj%?}+)@dI+F$CV=`}R#cov3<yuu#I05clWlLBLl+rS3i7@Z~h zSS!|lL_VhKzuADjv{JoaX}xppw5JPr0KxAAZWkVHP9^AQjzhJ+=en|;JZ)z0d2CDE zb)T)wUvvur_A=Qb9&h*eqR7|;CdZ46|61lBKGV$2PJ7$EI`{X=82{?XoK#w@-E-cZPbf!Q*)O#l|2{XD&&)y4LHJ#C=CQ#OUf!3H?otOE{ za*Ox-bd-bv?jn7)`9{iXf01m#bAg*&oi0v@m3R4{uKHQcZDn_#*Z51M&o7;w;n;{$ zEsKf|o|4lUv}a>wU}IH%HfH#knw}VX`Qi!}b~yh<(7)Oy?Yzn)`$06;j&{5;!k z<@Cpc{-FV_l4ME%qLnks;}FE2IJN8DWBZH2jf}wfr|TmwsM3&y`Kt1-sUR_FeeXz| zzqX~{ckQy$!Puc#B^0Pa)?HClN+Y(LT+}6CqJx8o!;Q>bMgUt#Y>Ih`{*h38v2D$g zNI$KH_F0z_JtwdBK=Vz3Itm zs!}|@m~~mp@D#7b^6EHvDttH5Dsl61Lh@uO&@wT5H(-R{QoNk%a%QQ|#E3PWcGSRO z3@ip)*{sbVEFFtB{riU64zo=kIDG4JoYv@QU1gsWZ#B5G8snVn@HZ~Dp7BrnKTwC~ zsoRs1E%ff``oI6Z-vfV}rcV69*q}0wWPCafMCui;cPDEw3@<^j4NLZB6&^+dj8{w| zX5~-HEN<2))6jT}(^nP71-d)2dfkiKdS6$vRl~Hsl-~b@ zclHoFqH$1UyX}4IiXt-#W3~!mam6<&)m(KPdxe$WcvulhF;sSBhqN<(c7(g^staXw z$9f#T3+rVecI-Tv(b`1xxM#D> z-SvJ~o0WA2Nk5236xUYYxs!QWrZ)(&iT1ttj(Ex^c4u^0x?- zXveOrOs(RDimKTIPldwcLQ+s^#+t!EJI^qsbyh!C`HfyLW?QkEGlI3**jz%hSj#I)ic&B~Om z7BD=KVje{bfPI$8TF#+Ccf0Fx@Ug91$7e~nyhX0vOYyQWpBnUo*xeh(@;Q_x2;3+|{NmbhwS^96)~M#g{~g$MKd)I`yv68aEQOwZMZR-0 zcqtYRYzm6N8O4RdrlGer+s0$dkO@fb)jQSC9!?n&-HAD*bxAxtSYs)mO|Ow>U8H!` zx&NZrBU<+S7;&{M3*0^w^=zvj5ZADkad5>k^DlsgrU@ii)og%k^Lp&CZBnvqouSQU zgIIVj!NRk;qV}qFsx7yrMn!Y;G3XtxoHPoho=XmXQF$K0%8DZmysv(YZ~7gPXHY?f z6oK8Lr|UOaj7f~cU=?j4-W47a0G0;*P2W$I{}s&_5KUNdxAV|4$V3gSX7*{U38Ri*}5C`ZzfuR2L7lDIthXNR5uw7D;lG| zK+;%c;KojCFy1JKsgW+7=p>IpM+$bs8N)6+b|acUq^<-r%v=bpS*nUj{|6X*dzg;p zuDPz8B3&&iv(>Pqu7)Reu@o_LCvywU0T+XBl|!~Lt|ZF3N&yWQl|7LsD?5Mj==c49 z&Y=ea9s2@sk>3uQIUiUxkCAQI6lN`sUm7gvTbJ9$uKhj+U3~z-9j~4>bh~zh*7J}72NPS>U`(hj?s2JHw%srE!YU3lcU163 z_pOI@avpb&t<&;ftHRCVGux;6PtW=g7pMMDkDYU!nJ>3K)B>zm4vty%dWB#C`Q2^u z8r0QPs-iuNN$IMrjw!-HhE{X?4sZ7Ikd>;~r$2uPSSA@nwMjdCPop&`fi}TQCiZri z`jdo8By~KVwugsdvEeZ>p^02+*v7`jl8}<}sHh~WNQq>g=7>o^@46BY6K4mt)il@C z#DG#I&#vmnZKQj=LZ1T&jBXs&H1dWfZ8iE0B*_}8OxL-x_q!-C&GG|$q5;vC#Sq_G zOQRO-4D{kbheMM8f=U%$T0~wo5;u?g`M&0B3-AW`Z19Uz2!~0`V}2N@ampKSr@|G3qmdWyz$J_NPet;2zE^jLGs}tX*flGjK`lW8TKI);&WVa)s#)yQLJRWoI&j2@qujEe*O;F$vfH{r8K%S zY$Pvh=(;$4#YWOf4}e##S6NQ$V>3$4xz*~g!U08uuBLNA8r7O&U#)j{J>PCDTv$o- zR-VnHo@%QD9S1zdz2wf-`!*YN1`$bqF@5Jlb)bME8}W||+!xXzoc z;GZ0(XN;4l`lefomQB#GhKVmzu6y#kG z&^3qjCp*XEE5LOete2fggt{fPU+b0J-gIoqjTJSgzcV&zn~i}SCqPpL?0@;t5HW&? zxHu*PY45P7o^a{I&`zeLemj8O)G0dt-;LanNzh@YUn{9-pimz=+$Ppz`Ub*eS<3ZO zhw?r@Ph2;N$2L-ezFo<$W(Z3(l|hEju9R&yMX|)jhS)vlBDee|*JYf4cFh$rA7HXN!xn|8&x1O(s~Qh|1IbK9MyTd?odqBwIYhgb5K% zsVz|JBv~+DRYe7ejk&tPAD?rtHA=5?+THRh*%b`Uapn=f63Y2KT~Tc}oe=Q&Jbb2V z!dCNLe>d%#p32N9)Z=&xhQ?r%75Kbis!xyeUTIzS=qjxHDt-Cq(NklC$%`4ut$xCT zu6>_^5ocI&*uJ@AwZ$?0(*4Zkyhv@yPW*aXwN*`mFH(-{^H=jejC4*0$pPojU_6+uw)@xgAzVArdoo&aj$5E3>pEyn`7s+gh zb~XH>{jvV_pq$lnj+3nOYI#PHVpFoO=NFxg$*w-DM{ELX{zr0W<=mNpeE0s;$&W}) zHa|_ZU*ZJ-06P>~x~RxbkmJwZ7RI81-TtKUEdp^?IygO6mE$-{>Rv66vuG|(C#t8L z7V{9((RkGu`C626cY7wyQ7x0PRcpkqP`=ZAo4Pzf@TaqGI>6QAoJ1P#Y&3OBSg>lY zUt4(BVD*Nqpt`686@B}!DhjHa$7|A2tAia=GxuEj*^k83cztmJI z$JJNC9HSufS<}+dL-X1Mh2ZaFr2Nz^SPBKp^oLUaaxLKeJFH!22JX=WT2^51GqV;8bhEBnk4*jmX!&(wGto( z(%0q~&F#>-ln-pT#d4IT0mz)-9+541dT|KKRM0}E?>>GiSPX|pQ4`S$`tj@$dP1c- z?Qu_BlCzuQ+HAOife=6|_;&l$5AIuS18NS}Q;P5_cja(=sMGCz!TC`&F40hBp%pW! zXf6q|vvCU#_GxGFp~>65PMD~+8d(l5GKIw4KWW#fslZvLMro{BOFvf#WI>fjpI3?v z(MnJ8Dm2Q|$!LeAcGnD&=<#Q*P}J>hK7Kcs;EQ zHp_1p1lSyhnkmT2-bP$*UEhew6qUhcE)UPW{A+zw9*El z6WS`EiuaIDm!(C)BfUlpJMRUaYJ+Gr|7J*kMP?FV9lmIq0d`|7F!+KY9Rrd0HYbg{ z8{TkK)*Bte6!|_D4vm^~!{(bY@@g7rqlyaxRWc7hFCI>idw(TjRJ=JOO+`*+L#~)G zyO};qq+-j50xWZBPhC)DB`clu$xvrcXFgsmZ=YN^mM?L;nvd+quuDFtL(rm+jORA( zUG(E-XOZP7&|0VLV{BPonin~BqL0iBw*KYZWM6E{p2w5%m5GYCQk|=r8BehZtoa2H z(#o9PTJu$`(s}ujr0fd36ZG7K!vw9AIuGL`#?>uxC{{U+=4ti|CNI5M@{xn;cuim~ zh}ycpsdrv-bkSKQ*7Bg7-Hfs9w69qfnOTN5Boi68wm<9oR8Qbx|0~-WY{E_+;|1bQ zhie#1oTI;_aOavWm!*o8HWwFB><(`@1tbmyIRhlr;ZT8~4B8mvkbTW*0d=H}|A18;fp2)gb!i~mv$$eUg-TKho zK^)j72`cojOdUQv20aFl(DwG&PxM@~8e(kfXxqw{$zpbYTqefthc`KgD*5#?2C*Ozqw=#hl)yDQ1*A00<2o%dhth*^<3h*2cLrO;H_k-j z%t+rWZO^q)a|sXuVZdvy^-|gxWq4m)OBwQIk+4Kn7la#6X`fMWmD@$I!{I7B! zJvFY(B9jAN(W;sB14xC~GX*tw2z&CR*sH@I%8W48C@v%oQs3Gj8rjEMYCZj%L9vJ%sO)|9MAL`(b+h zhDDR3P$9l`gq-sYpwIjmMjW=Ucq0`l5Se^6!$+(NN-Q9@*jVv#iB%mqb6{HbPs?L6 z5>ndDcu16*2BwyQE84KN3r@fgKxVM&j>os2iH)gF*Rr#D9^-CBy@rYmh)S4?Wo^iCO-@y17nnq$B;US-@+G;ZAp$>JCmRu}#tP?voa(L54!U z4n1ZXBC(lwCU@X=Cri23rBq8Q%X=;JjT@f{a!@dph99_@qTZNOIc_}(5_Os0GD*z` zOsUXunw3Hg>o2_qhhk{DCkJ)L$NS!y%i*D&(-*ZirQv-W{oa({J`!X*A%sR-e%|b? zvpD7<$Mhot?+{mOq2|#I?RH z@Y*f%T-GtVEZK#GBi>iUx(=b))%N4tR{E-QD;@Gn#c7~1ZL#c^-#(nG9rJ0;B zdP}k3I5H`PeDwdWVCjEA=$O%hmb9F=Q7-v*H#T%*Vq?EEFfiBy*lV%%cVk1{E0`2^ zxBXg6u2PD(--Gr<237LxMf)7Kl?Jcs;8nvsO4F6=O=Zc#XOk?Zb?WV+$kBIGvS#?k z#b+p93Vl7GEnV!s%qEX|nXM#*-ZWBg)Mi482ilIH*zp`AsNKV;35=;7BKc0&$P?&#UT#&uTj&;$tW`q`?$o7A7LOo`HB+3_3kZo|$I&4@KSE_uaW4{{} zn;vKiYtwkeZOc^~r@DSY>%L=Ta)RBz!=}RR&7}g=qbMg#`2rjVdCAbfvKUug+xjES zZB)90tPd~ZtzS7AH_)V)c*|kl=jkJe1JL}Q_n=hLeSQHo{1DsHL))n}d7riXy^xUw z;7RssE?DduApnt%hRoXNKWMO8?}LtG!@LfSiFccIM~Iu%-4nS zl)M7oM*q?cNq5C|qI_4b;vF8Z?i)VXr{ql6t~6U8XrI1*uCV^k%-j&0ezLz2pX z;x+p4TjqkH20TsD+qTl!l~<(YsE=>N-j_a0wbke|XdM&NMIc<93}nRdTc@_%jvhNT zvzp#|{e0W%fQ32*bBE;T5m3x3r1fyAJWJII&qVPW z$4*7GeyXumB;$3|PL1trapjyC;gx~3C6G$wwTvKHd$va>@l^&juGw zR>QoU%Ah${6EUM^#9!Ykk3}-K@*#T;I5CX!Mu(dBLp?NMDf9?~#h`(=Lp_@qXxrhh zX=mTq_eFN7xhXud{=QxN4VA()A*@;%qKTBMU))}wZzn7Dgtm5e@B-8F%SuXe>Yw5g zn)%hGL_~f#1u^gPK?~&O<}RJ!+13RkghkvbqNt)IdVSnISkvy+d{gPJZH>?L_f|5t zzYtebT0?sPscSFhWJxDVq@oAK9O8-c!lGTSoNcS+t;}U_srgJoU>ZuJ)zZdK;2z6a z`5i!;R|2CyA&p_9CbuQUed_pUCvJ{m=O{S1+vkEsb=p$QU4I_x_iL|pQ_>`wA3_DL z`@lsEfun`t)ZCS-MzS5+QbnyNzbrR%wF`*$w=qnPqcG9;pEd%|avm;$du?4%09v${ z*m+aV90ep2GLMpAM*lMzVTu3d4lpNu2*Gj@n7_K40UgekcE3D7%19n^)Us_|14nTK ztPFA5bMpc`;Qj-kj3bqmZi*XvB-j+4O_7BGz7Ar|BFu$V8Pui zN{4Pzd_EJ(d3~5;Ax(`^lRYE`5#Y^%7=;^uD5jcjn0f*=sL5M7j4%!IZR;ktff|FU zwsn7)^Z#6>Jbz#=tv{xn3645+paN}+T8%GA61I-ex@auSuvmp;6pZ-<((te`D}=wp z$4-6-kz0iQy?XykF}?Emanr7AqeG(>R4Sh;L_*P#_mM*7XL>`*THcLq?s0;#OZJ|=ZE+lUpihar0g8z(s-f>6^m=WAI?TS30aGGKn z#JiWZ9sEa*-J1vj>7H#nv%^PThm-n}tgS}5N{wZ~3OkGrCTKhL-@UVF{Od%sAg7P7 zt4`v!540P}ikGN1oBqW(aiI<*xl3zVc5~)Z_2U%oqzv*G{mruHuIeT!4>K%|GDsr7 z?;|M~xLDriUx|)6JWKz-11Z1(cS}94L2$Y#J33+kktNDC`;eLKWfc4%%5$|XL-0cu zDT9<}Y!V;(uYvwg-Y9`!hlxh%?9Lm#6YXy$an|J!thba-hsQz|zy#KK9kRB58%(c~ zX{_38awlor`gir|aL=|eM^*o7r{<+Xp=Cj8kVW?XQQdVB&PYZ8jA?p8Ync>L(i?bl zKfY0~A3i$H`B$#zPi}JvNi#EI6?&=3*&WB!R_sx$R)dyuAdo>;Ka+ek#A6ncP{7xrGT`7+$e#^saS{j@!E0=4n^v+WD znf%VYZ~$yAG2e`2Stz)P`e&vV`1gpf;ExZBnnqTY`jWsmgMHOx;S-DaW-yQ{q~jZ2gei5U@y_x~s5$X@1E8Ika*E`|}<6b$nAT7<`fq@SdcA=oPB!x$8iIDc`L(jtPYmn)XC) zvrEoLa}gVT9$pI5)l^(S#jm3vk`O{pj<(Ewv$Vrlaxo`6C!+oJov5RDH%mD}Aww7| zON`(@=35Ba>y3>Tf1hj5bf|^rC16QjaJYHP+!=MF#Bp@2l7bp0y z9y@<^iRs_6vynPn-R#-YuYT2o;qW%1!|4wdbLUn;L3fVPS++|it?b0XaCj`qs=P~~OlAin_5DcG3`(q-q^oW8>ytJQcjE!Zju z@N{Bdb=(M1uvc zjsHf5Hph6e+BYuoI)W-LHALRJT$QJ+VM``@?MXK^iPv~lDX2uXq|}m zm9lzfP>>`&J}Nn!nvXevq<&fN&W-C4&r~I8`8|%ep*&ZYpqs1TfSqXGL#i4$_Qtuv zN%J=wL{ldPZ`QGUV#ay!QyLq#?VblSt$IcRQ8b4!mAaR%EFag)d!`!=?tUaL#>#(B zhZwLiV(k7|Hh6YS*-wdcKT&Fhda`G<7U(2;rqP5po^sebb{c%{kz6FBDjPa-q@Se4 z;F#NVK7^t&Fh<=?sU<)4qmV zcIJGRzM4`uMc(XO&)b>Tmk;G9ni6=hH%mU&n-ehBCcBFAh}sKPMnA5b_va1L7RC}> z*X=`0vGmGEPW!vGDk}rcYiIlpJj$L#Kb9N&s*y}?W^p3uIh}Q-#_DBTp(p~0 z*II3GHg@8~&emWx&-#)ntU4udi$6o_n4-5Fbl5-f4}tIVMXqnry3*)>?x^D^mN`=M zyfDpwCrQf$+*t`|nKe*M(go^@TiY(IK}16BnwoQ=1!DJIyTNx~WQxn%Qmm1O=7?KAY$5PPx*5|TSzUI-S8B0oDZ#EF}< zoNn=iYwXX-GCD&YIP)^8-P+k(80l~sKIyM+gOEJ~)MX}UCg>2jlyuy7ym39Z&yxG} zA9w35${4~F?K3T;Fm|;UF_whw@^K?pJJ%V@6(Jir=tfvRQr&LGUB64k_ueRMoXv7Z z{ZH=hZ<}oYp8iZyCZ|{M?SBjmu_)zr^8m-XtVO|{>lJdy@L8Ps%8pH%Z+G~VSq@h6 zHwAV*k=!Kbl;$eJ#I)apa}A~_9ouvl%LAF9t-SE#fw*-K9jy(eckO$zFP{u+8ks3N zXsH!33WNKz5eesW=aPL!u0|#-fRix*~t%``{rQ_UpVnZJ}f=vV+c}Bt-#~mcyaFc#mJMM=zet` z`2oEQ<>tKqWV6c76t<*}%pId3BMDqoo=i}sWq4QY%;1=;WTbkQT(VC72EODB(fpd}z zOaOOgC;?MhcV)^{AJN7+RsjBb&e&e~Q#aD{|l` z50dAp7E@Loe9hI{j~w_l?+*mYura9tr3UTGd>=H^Igs@<(>)1os9YfpxYmCuq;E!t zlCubQ429z8b{>RRR?Ro*OdR=d8PW(k)18=jMlyzJlCm;}H0Q!V%DUZ~uz6JX1&8#Yl=hvg7) z=fxgH9Z*!AVa!($G5e3sAd?VoG6NArqyF$HM^IgJ9gY^Mhw!y$#w1)0X#u8RpFXN8 zAO^E*GMl57m!g0b?B9FzSXf<Lif?R)rCZC{!AbIG3{_^~6{B@r|!hY=cXwZFObW86nUun7jgS|dS5n3~`&a?3DW*~yE$dE;Ne#_zSwdC$goUf(Ph>`aq+gq^;6lge z!TkTl6rUjA`8|~=c3?>mJk0xple&p|>8$@hTHZ3Mt*#5#E>7_lic1ScS}0yzi?uiu zcX#*T?ovu|3GVJ5+#$HT1-Iaq!}Gj)-t+GpW1Z|`UahZN}yq!DTo6+ZLm=1%ygcx|u3hSf z%!hRd#1WNoWh`;1GgJhpz>0*kfTZPuvyyN}m}S#fKBV_y}KVJDrWndd3M-crIa5ypWO#2bUq^0H)8wHU z2o&A@2)GuLTz!|u6=@ualf#5`(*sWPpEpjGhE#azSptxzw5gsl^?c#81K;wS_JGzj zM^VxW+c-eKkEt6?;6OtiK^wIB$#8ETRD1}tZ$vt&kiulcmOS`1BxJ?I2K>R2=cHPf zlIJ7yG5pk<+W+a*^UDbpqAK;dx~kKO7K^gJ3nfnKahOcHcca<*hhDC)iN}rnrT?Lh zq{mr#*0ByZ_p*4b$;9YXZc#_5~Ti+|0I1h~A#Aq%Gc9{jI! z$@&@f_$ywqOX<+NXNEd9Bk*yH_%#`&M|!}=I!7ZWM{Oj9;?g;y+F}X%n&>zi%OABs ziNzsRq@Rm;+4$H~@z&EU6g+vfC{1OCYbUOI$ZFsW{vUg@;fAIm+duM~Xd9v1>-iI6 zmqF!CWmF?s6TFF7{~XX8-Q2|&Sv{%`H)QR3gj>V`V!c?23Qc*MN4bv71hM~o^nPZDZpXZiyCY^a+30W0 zScTuLcgGmIS}4p>D&@R?_k7x*=L3%)#ndT!H(OP;5Q4$Z-pHe^qu5SJVLKp!XanP)>ux_G^+v>oZ7|dRIs6Oly{I;;>QjHDatv6V{0aR>N#^uRh=bwbEjK_+(Ww>*a03^nar)c3ISlVx*-xn z$qJDM)PlIgY2DTAe7Kk7_niE?(M)gZib5dDxLc>o+?sg){DWQMY8u#s60(JZ!77>P z`IOH|5$#>H0b)74CB*~=|J0mg=a4t|mbZs;*PI;`Cg1jp)z|?ZN{It_{%E~h+bnL1 zJF^f&cPhA!pSdp}(mcPF_&Jf^Kjj7+O8lkCO6KbQWUawhl#mpm?%Ify8>p-@e5T)jlaS@7`Pk0?W_>p^6pSDr2BqOmSOjs)8cEeu?JH2Gx(0KjL&{RMn=L}l`Nnhp_YEn_~wzg1A^d`$p zsW1`;d9Ynqz6$Jl@a>jipAc8t+}cG_P>^EZX|)#Y@)Z_w?e0F(=nYM4`g=cgrvRg_ zl89%SAGWHbQR|a^0gETg_kbA09j8?v83=z}K7=eHU!89R%KWY*;HTP6IYQ0(*AWXo zu0L*A-Tju1gg9v;+m3-3%1+0NZsJKIr@CMX`if_Bl=PfWY|Qo-XDNq)!M*s%)xXkX z14<+D_hyUPst-x9^Dlc~QE34&(0l`~USrR);_YGGK%_nYwKVs)%Sda62vK^F`kgFn z;d#cA8vtQQKZ`Bk81yB%4NSd)8_hEl@^3u8Nu4Qx8Z*~kwD>hSb6_8lmr2fR{(Hpm zyHB`h&wpreAE&aK| zoTy`Vu@w%F1z`R?Sc^K|*!kwEA_Id? z$S0+Yi`%vHQ>2IU#9x7Tc>@#^_aw$HffMxR*i!qsaIuP5ewL4{~jSElaV zeaFi7w3K%Y9hIwzdL6W>GUS5G3c+l=0zv%6yv^#BW<++8&D5+PO=lge=pb`+(9{0X zA8dA=hx7r`!<7d$;7Y>|87K1a5vbh|__=8G!*;GpC-Y}>?pg61nxuPB0NWG6WVu zo7)CPtO90h2k3`v5NuUc%nEv$6({(NQx{G=EasN28s;qewK(K4M8J$P)G5xvEte$VTkn6JS>pKG>!5aZ7bjeN!q<5q5dDzRR z+ua@~0cP=H;9KR>p48Kwjc5Sv`qMCDNW(RSuY8Rf(lfL0SL_FzkI9Xb&aj2VhV@}t zu%0aF>#f5fFWuJ4);QL?y2>+NC)`UCh0}4-e|y_d;)I@dpQ1*k{z{q?lA+ue4zEkH zR}gxub-0AuXttkDzAPe@gT(LIn}w7*I$dQEjCG9^Rl;_mu_AdbP&@Apw|l?ZudyY4 z$q`ky9@%eZ+ehZsHj7Tu8r{Z?`r^YQhnDQz{zZCWGCLxCS@#%RG+bqT#iOBBkfT{~ zlJ&l4tU`~MsX8Qw?aRsVoQz1rPhiJk#A+YE*P`Cm5edu9IoeEevZ`W;PR!R=y&!{b z`aSodt)%lStL3Q%GLErkI7f;SRPO^>FkUP`wqL7ue;r5o*I1B>zL%X?lyR^%g9_NG z-OKxK+W~T~UFS5$!sXyI9P7_5RpLx) z)_{nW+Sy~@yI$dPm5MCqO7)#yW}tETZRnMTL3;4A4!;G`%I@QIM?m`dqqIrH>g%cu zk<$2NKl}lCL-TIKPplQ%a-)Ns4-lxVs^~bqsd?5_Gxe7M-Cm!bk*PWY7EK}OOLOa1 zg*~L(Bt_(nHdB*RTOjb$^)zcK!O175==S`>JX=kxv^&cYFG?TL*3_d)$ z@wmYlWSl}G#Fji!NFl{(rCMDIPzNp4AH@IT*xo=BEqv}MBxbQ>tw^W?Mc`PcIf`Rd zF#N#P-v|6z-GvTC z40WqY4BOWn@Uw23R0*RbJpnA$J0r%>yf0#`=b;K@m{#pni~1EdJR%EiVzmoXvOD2LJ2gEgVQGwxK}AX*+LfUs%=ANG&VG-SP8%*3|vA(NkHP^ z$`31LRh4qY5v*IzwGsE&+StMjpZir$`Oz_W zk@$(&KJT3dS9G*`l@~g3#@`(vvo5Abw~Ln8 z!hl(1SnF36I`BLgzIC)e)KK;GpsH@DIriSCi~0(B!*ls3{sMK*V^nX7{&fr-K>TKz zF+h|30lw2%4%bk4joN7`6}M_Nt=`XaEhWlDTeri~G+pSer2@Xj&BXz@tgO`_d-J-N z?lbQzAT5bbxC`G8#?qp+AmC%?Ol}EnxuyNPAhAwS+ww(oWYkxw^FJktU(;()HBK0) zN|efh>IL(4hYvQATCTYdf9^9T>Q6x9>?FaL@-40OB4Wf=$UG@CaA$k;KbvQw2xhdW zPBL2s^G7uN*L3^*5cxT$VXS<7ttdP+{6@19F$Jgt#1=<|R^z13 z#$*tRR`T532s%4ai72noX3pu`+ zHo0<9q_(@gT|utvtq6EB+{D1c+<&(yL&)Wq5ovKw!r;DVkoHESLL+BqIPih9?HUq0 zmrFE<*%5izSe}kj?mih%`VdlMl1^OX-^nVO{M4^^aF+3PWaJJBAwiD>Y20d&X@)ee z<#1;faQenw8A%)k%nSCK8R4Dq5JFGt?s4<1^77}lm9t>80M}%%WFUH74&?xn%Orv4AG&3Tb~Lv_O1N{!M=0<0THNIpyEJ5WzKGD014 zzKHc-vapXZm2ZCMzSI_Du!+}_q7tIYyam+ITj zPa(CJMSOg#BQDW-<)7c;SW3{h@0{`p;=pvYa)$_6P3{78je8q=NZ6nSCTt(G7st!}{ z&9;=HX@2rtHCYv&DA{Q!AN)hLD@e+t`>*N^d(I8?=*GnP>cJoQU`z14J`B zco_#wovLTcKST(2kH%iH+b5B@r?4-PF{?e~tF7l{osh(}H_cI_u&LDBvwGJN+v1Ab zbD36Nqxbc{7V6h@qp`;h1LRCg?f5>;JkDHyHqHqBgXFkzw}N@Ld=6XRQ z4f>YL%%bdhyASjBb7cZCvD35!{f`r%FHRvZTwS7qE4GB*@PPx`cZV?{*)v|!5_78l zogoP%6xI_~JewyTCz4c!cI-S|;`g|@WSqnhFIGY+a694cRveePywvZ0@6)ZEt{ z3fvbElf7tT z(#LcW54M?SH1~`U`1tojM(o(a&P`>u@th6Q%R0Fx^+Ac`?=CoP(OvV8Qx8PSy3kEJ zw#L3q#&79a=>#8CB<9>tQTT3!1Yum+Vq(1>{YG;?dW+YOy!<_e%qNI5KIMNIKFe$N z!6k^IW*rz0jZg_|58!x9x~Og#>R^8jNC!7cTT3 zG=!sD(3e4N2pJO3VIRYrA{%xyn|HO*@p9FlGz7c7+^ZdaNv^$G{75x#hxZS6r+-1x z7rrN(X^t#LCYPz56;tK1fd#MO?_42(m^0^AyOEx>Z8E?aw(5`#yIR`7aK|sYIO%_L3zjHMx#EkYthYqT)$k+WbVRx- zv-rrcqvTJ~huwoB!qi{?ovF13W2fMM{{ZLr;~zOGB6kZmA_ZY+z%_xlr#LTONcIOI ziR!#8wK;JQ!&6m87wT@Qlz&VDTDs$7iI@BJyL6nIud&r&OO|KP?C+nN=j>N=al0m3 z&^GAleAiMDuyL>D2cQe=N=PpW1S^`YDZw?+wtz4go(Psw{7Jwd@7m;;eWi5SI|95Eo+b?!H zM9AA)pt`1JlsDzFCB+FSSuTjCsHGK|@I^D4s>PFMXn)-!@3zoo;3X#*cXNt-B7zi` zL#|op?$&dMUZl=40hv)hxl^R~YcYTQDpc`9c}%P%(Wc&KeyYuxq+2oe;L-j#(lXZeIp6i0RNo|Lm^3?>gwwC!PM|k@2lT2 zIUcK3CaTqjpL80j!JeAK=)+|}<t#@+qrMi9+H@nj-D zL(97~>$E(z$qKjLb~Uojl2iX1uryybXcvSM<4PJ%Kr7RopY5<+@rYm0AazF1YK6y} zCipteo+(4UvL8dp+M?)1;6C2-lAx!1m|l6^@08$g+GF&uaANp*JTxSU4Ba_;3&p{h z=R6qlnU=5&@7V?pfA2kng~O_DRCUXw4uy(2dNC0FTjtQ zN#uImFb>3?76;fqp)?`@)UF5IYBBMs+92;S4mxxN)qvCQ;!I_G0!(GQPwpFI>tB{P zY5dD%)pKYiBlm3e%pEyLa9I9{QPLMb0jdH;#R zhF1%tea6k`!ZgWkW&b<6H(YgqavV-_1gyh^&u8vSqjA?OjCMnA6P?w9A0zy;9}@+f zG09UdDVEtO6K(W2mwFBA%;4R9sAU`k#9gdRR_oLLIML&uCO}Ld^B&KqNH1-7g*75L z1}+-UUUrTltlChU{*P!4Iyy?@h>VQM*)J0(?5`E~Mnj^hY$NOSHFOgcqZL4#&(MXi zB}?7u^5@+3jTG9Ob?k?d(Uk9YtQA@|NLM} zeT=Sc=D&m0+&p*Mlg%})`^m^t%5;3JOCaNAvF-&MTSR3I|F#+&Jc0jnxL`}qnyIw; zIYK_;6X=!agM?_I&Nh5{>lc5`9e6s4S3j>1+l#3$u7hg7u#)&Kd|r&!D?+`4i)NA5 z--9Pb2#1sCCanBwXHrOn$|id1df(sXb-LWdh9DGed@1(zBzt>ul&_2w;)F)8L%ZmqYGnoU&; zmaP123V2VK3#Rq^TlSyVY!IY}Z?)Y$bc<*9Gh@8XDiAllD0Z3lpHp`0^`?GfdtOUT zC%dOT19OS-li}vC#g~7a;@F`!3dc^tVf*o;z&J;sv5_%Nl3>yWd zPuFSyi?v(tz4oT)@8d1 zM&_LbN&a#}i#h%Mm+)B zRl7oxwK<8%|I2Me6^k^8`3ZPtIpG4bvCV?93W%QG>8>JRcdL?Ur}5Vq2lQflo!~k1 zC1j${QDmJYSPQVOoiy)myP}#Qp0&Bo(0`74Vmw%A-D-KQ^$Wn^`keRq(Q5U41yQye zY+JXQ%mp25hqC#wJjhCW@x-*W90+Inc5|tc6uxdTwZ6fU;;(F~#e$=N^E}k0Q+&Pl z3{BQOaX{^%^hKy~EO|VHGA_>5u7rR;WiQ1qlmhx)p$qLHz&kdS_K}*Wh1SH^PAP?uMpv4#D**q;<$#!ua+<}470CUqStD&o z2QtpQaEnV*1s=z%SR&y-hX;`qehCNexvp3H z<$?Q(bbMHUnbBM?@BaN#Z;5gpS!}8PAbtJ+9ezdiGA4`t&yW7sm44Py9oOV|@hVky z4$?p4n!zGTzxtYG8H)m~E7jWzJxe+O z#?%CWUlwpz&DGUNC>m-Gt#8=P0~*z=tnVjEV1zm<)EP|!)#0r;#Lhhsuui8QJ8&xl9{%<#asBb%TNkld=qr&gmL%M9RE79pq0`r<-(%({by}FMi&&S&)9xo zn+S>BtH|=@LSzO@1srel32&m7L!lN* zl}rl{C7aQdH3BMqa1$QAU|=j%GyqFW5QIVkY{3H=aQ0G$MNgj9}sP$obg`sd!k zIz1Ovqy=tED}9J0KZ}IdfJWC1F2xqvkM{b%*4j&YgS5@RwHVwQ>Ax=#_B#%I}ygqx{IIt|QbNB^;g}rCCv?2srOKeoFjmKowsq-k?*b z=uiu=Li=NzC%WRb%NJhg0uNW#g)yy|zd8B-??;LAw{)5L*!itt&6SK}y1!Ym&i>BC5PEkf z7RzF%v16Q=AFwkMD6Et%Hi6Ktjsjg!I==r6ytvcSQu5sW`qafL0FG#CLW-3w5T)3f ztY8^titi`l>-FF6-3zqFUYG_anioMpmzUA-ovb+S?9ixqGA~sR!59?{HE|~G`s75` zsdCu$veK>H?S^icGNNP{I{WZNIP#A+4A7bpJ@nEqj^JEAJ|UhN!Stu_-nbEyT@J-@ zw9z43rD8>BLQSP~Ysmo548h{QiI4u9VoQ=Pi*t7jPVGyRcXdq>3A&1`gkCK5tU1=b zW3(5 zvu*yq#?82OJ8V!^A%WZ(PLOgcdi@^?!#`LnZP6Hac6!9koh^S=6c?4uudHv#OL!j4}285%59 zibd>`c)j1@$wtOA&AhyXUAjX07$d0G0g7zuNf1B9mhZ+9ltA^j+CwRu+DG80z0O7v&M~o@-W&5XhJ%cGDFrjcWi{i zKCl-(8@owe(;{ApVOtd3Ya*skr!S-4@mW;E+29)Pmv;P-JgxiG?cs-_PHraNyRg5R zaqLbvc7G+WjM8aJUneP0vye2MT;fpaWv^eBhS_6ZFxx}qA@7X0P0e6vQ79w-0O%x3 zxjI7J``8nBQaFM|747e=q~rFO`ie>aDRR(aN#sKfgC73H@5his?Ikrejx778+C7q~ zzGJyW3SldKrZTY7mHMG&{F9W6h5T&dM0U^w<62*B20yk3-4t6C zE2|?|Ck`<+3zd3KK3v~p?q}G)l*;(wOob-Xh z_z|<6zTigPm;9)$`eJ2i;+qI2-^hv@uj-Bd6*71%zo-u{$vnPajIVLQWv=WI{2q#M zY1!Z{El18L+5d2szKXt8tCw$T{VI0TQc$e?VP89sVv0g4!(R44E8urWq4yD8S?)Q5Jg(JT|x*vD6R zxJVR#dXBOt$9emXwafGS`JL}Jbg+dYhy2rn)MX6c;inPwq=lpD1Vz6p6D|CAjI>`F zzv{OM5%Jg2zz*a>6-$yrvzXQ&bwQVi{O5ZgnjsONnr}TGcLKvSb8Beb3sLNEyw7>G z?~g)^1KkdFqOOX2cwV{KW18z^qO46Z6j*;{zZ`e<(YV+_fptsQuzeUdOiAs`&vlmH zA2xJKvj@Q4D`FCyEeJ753IH^uMY?f+%TYBq>H1vLJCRK#2srzKfbs-$g1U3SCpJlS z`)~qf2|03%3xna%3dbVYIYHqAI_L|i-rVdS^{+A!LJLKnpC7K%**rRXny1rG+A;d2 z>3jGm_QuCTG{)XJF2Cv!?T6D`6Km|>K4o*a0x8c(ID{lM#y=Z9#WpW&*M`@Q3oUc5 zO)q>Xt~%7%WCB28B@mqvOPHb&@1)Darfczmlt|90u9^$j&u(L~Ue7Qk_M6FTj_c;? zDg%Xs1zLS@)DRzvay(*&$%lqE9@8`i<%%u{K8=tlgW=(TfZW}n` zL{cl-!bfE#k)BNL%nfjCIs^&?n7chun7!gzbH2oNB7xez*n0%Pxs5Xsjj8wuA@u9! zmiK#0k)fgS1zj`s*}zUr8;mUEkZ+SJpNJf;RwGnM`tP%kP#cw!SKLcFywA8Rw9QxV z@GG>dmbeydg1zu>1<`@aQ$?QD5ULH@dyk(^uyWaBS{)_tZx1dDD$e#K{np9>{z*vf zNU#zwGC3k+AYT({d6s*u{r<~zh6@ogMy%h-{B9@6V$bX4xDLI%9DL1lHSpwSAD{zu zUyg?>A*{h2wHiLYESmxr2#+5{4e@y}`J?9XiZwnaLt5?1765aRBylzf-ciytA&1+;b zPNxANk+Ngm_m2m{bGwtb6^!SzG|i`R*b^IA?-Cz3=vli?5iKtW z_m*RYn?Z-!AV5d7$vxqe=dIwt1%`~W!4oVG>zi&n_t_~mqu1L+)#Ag5-U2nL0B}Jk zq(%U!Cl(6eakcs*`#2~vZB&)N?L%Y_R;YMKdpIzUa8M|>D3X*&ayk9vI(2s<<#eBQ z^SL-k)$HC!D82bKr{#o4NaE}|@AS#ektET~OMmI|3aM3Tf?%eTme`Mfoy09A?)psG z92*(F2qy2-vM59PW{dT+{X!0=Ud}ZH+gm69P5vFVGIsm(UEb_tPvb|IiG~b+vQALi zw6jtScvGP8BxJ}SRb#OzF(WwYN{yrbTM#9Vh2;2LVmyDSL5esJ%D$h?FgeE7ceH9h zsX;;P1z;S{0QCK>1L(mL1(B0?@O~_7iL6cB!FE5=@ z37bAjODR?n=cf(cx#VU`7DMH}LieP3n?#;}@6br`=7%(s{Eq&8TE>1({*k^rW;HE} z(tQHHG+7Pw10%_&mQh-^e84#YbG*IgVZ`2rRVg}g;+=yQ^OdQCl~SaROjt5RZm5V~ zfJXpD%CMh)cC9+HWN_2I~yANjP~)vj%yZ{6Y3& zsfGp)~e1k@frI5r1JIat{AecW z7P8P2G6qZOY#olO87rz~P{PCR)|V8hb;}=zFGsY5T&;EHL4*rl&SA56l6RgXl>)E5 z`K;*m-&nO=Ik*3Q3UXA19Ze}UmdZet1~c0Wuj$MrKI_g$GtaadEWJ5KiI-E^`6Ou1 zw@s^OnWSpASgc1fM^vpzT@j`3jHre7a`Mi63vY8z>fV>y*o8p({FqOUmvs}|H2jeV z4E|=yYS2g)rFII9zbweVIkaTU#$}I511hldChaBx3D7e%cV1C%l%cc6ij`sxnoOYP zupGDf_wj0Wpiy0_IuKR0wnq9sJicr4-n#G=Ytaee6UtfiAEgt6rgsi% z(j#gWUq(ZJlpYUJ>3(2i3a&(5p4Np++}FwJI5g-vAx09Y^P%Y-+d5)*(=gO5?YRX6WrSiY;z9n86vT)zyC+5m z=orp*%Lgq_uh*ND7ZyK*v+QTe5swD4o7$g?Z8+nB1BM91OA(Gs)KZw& zTDvA_+^Fbklth^u$&e~E5NayH?~Z;m#!j?3fAq&*oYo_>JQ@ip4Vn&B zO~_qZWl|&_YgIqf^)o5s3YhpeSSI?Cr$_Ki%EJDhP$8wrUQbX zsovw{8ogrH*bF`ng;Z*w`P`m)QBp=}AFAty{6oJgj@zYHn{B|SBV8+Ae;)-GrD&<9 zF%sBsTyU)h$L!&fT5^UnhTC;VtHi|Rd26Y&{FC(dFc34rbm3C`j2X~%|MrY2V+|8_ zFTVilIrL=Kl{yszGbxYs>%Q-~B}ky6>wdl08f&w4qep@QGnndG@1G)g;15S%R7h>) z+G?PO)5QWB-!j|`Y8yyDtXy41HZGek=^_cfkQ-+AKXUO8&E~-PD1jWXcpLdZ z9i5;E6yBNgLiX#HLQ`n;DUBWX?!q#_?wvQs>5(5n!3KEa`1_uS5XkhD#&bof~VthDz zyqpPWn1m{@;;mESTwwdV=Vp_NEAhK#X2+8n5j~O*zPhI7r>kui=}m3p(_I~xDmazdsjHa$AS{aa+A6#-Hly$?0dFWa`L?@c!8< z=q?feRiZgKfbz@&05IQj|AG%e0(c5qx&)t z2HE@v&{d(Vv_=tYBKg6yH1)xCjmyK!l6WtE8i{ARF(M^3wdj0fDsaWkvbbb2DzT8H zKjm}{1Wa-HI+<))OcgJM1!xI4lYP%xi#ZjZ-|RrPbG-b;_1^r5FjK68Q#1Q-me`U) zJ*tgK_9Cb(F==!&j3h@Y)1D_YgFV0a{oUd{2}Mw-uWr=n0KUG)js1SHK-g<4YH+F? zMibIfWZ&>x-?h8jiHNl@T^`33-#+8TdU#k|AC42>9gPvsHWWU@w?{SqP}#JbXK> z+b0z$19IBk@RfDj`BAEGNY)ySlL+CDL((XSxPpHikdFH0i?Pb6nR};64&<#Pqx2-Ra=%Iu>_(VZAxak!LP} zZKw}#PK4Z0y3$ckzqxUCRZu-5cfkd=;0XAh2?t_-SdJR^w?o~?LA{~$B;CM8#ir!* z7teg`g8TJ)b+MvGliZhwtL|M01w$zLxD`bTPEZ=ps|stg>ei!bPN{4lV4pY^<@w7f zjTmbg_Pfsv5qEb07S4wElly#Da?hwAwDV7eqfI_u(sOh?ChY0(8#I z`J4Vms>1(5s+kL{TzQS(0FI81bdCxMa|Co1k>|v^4B=auQ&YIW4BJxWw81O*{cx2WVX9JC#711eTqnruRT|2|n< z7CUJR-54|`ov(e5Y$u>31q)=Xad>(MVH)%VOEUoO-s%VdckVOT83T&R)%sNCm9B=@ z)pdNtuZhZ_^+(^9Bw>UU+$6Q{KHz($2A|XiBvzi>Lu%HSsl04(mXe~rE+us~Hqp%)brcc=DscUgy9;!);wrBC}IQmY7DYB zfRM?{=yz)K&_BPJvk95+d^R@SX*1V?*eve3dI%XdFK zYlnx2;%1v_;&9@cH-rz}1lw=eE=P^d+({OHq1gcw898wRlbHwZX%Em>!g3v-h-w6O zc8+in?4=&jP^Yi^~Pp?81*MZ(W;iUM;w@Z!|t}K0n(mS^i{^Ojn4$) zEq0kzlZlP@!?Kt|p5pxkjIQVwM3K`q^5^dacTNAi1g_?*>u4MD+v6w~?$rE;?hrh<+jR1sv;JB0 z&)m$cxh|kDsD7(zzx&zcLBSt7#h?m*uhr`dry`@=&W}|ZydQ(@SHv`}>-A6G4Y~wW zRZ)9&FqP9O8yM*AnWbf9x3+3dkk<-~eseR@$h2PJ9$4K+E}vpkd{;Ms-ZS$gbBd;S zwx9m6NjJV;h5+zZVuCql$78%wg8BQ$dhdGI=xE`R0TKq+4Zan-d$H-2KE(@q9KsOW z3r+z!weR6D^bLZ1QKriaEmA7CKQk<%!D`pH#36z9^<0C}=z&)p?AY z!LwqH()B0IHNm<{`4mJwQ6DO)Apa((g24W9`}@2vNDE0e@Bk90`i$`x-K zg{mKmO?MkcFNlJyn0TN0>lOQwm)_gsE1Xi(+59e}}P`x0<@4KfiR3xK@_ZD*w?tkHc@5`ku)KVq* zJ^aVTs{lrZdC&T~`3O36TDt;!tOqZW!}MI3)Suo6&D~$2+^B9UX-Fh8W}&yKO>+`4 z%lF(IY8M1C=Uh%UiJ<2#%Cix)oiiP~9Q%wXLCnnWW7~Nu&2P?y6NW@2s^i#qm8moHkgd697D*ng^+WU!_!3m_yx6X>nf4u z6(}J>sIlKMZK?)$Sf}Mbhs3SFaHYDfkNa^FPPcl$3Vuhwn)y}5p+jQhlM7yaVS*+$B0;M{Z0QB(f$cK+S{ zqq|&&$m5uo`s3%(ISt(9>voMq79R`Q`EbjZjUQ|#rScS@i99}H`+xQaOYnwu=n^@q z6<@Nh_P|ulP1+Nug^l_tQ-WGLqrJVr!DrKl^!ZO47qh$54OV$d=a9y|T_ND*1MWFf z&_v$(p_pGmvtjxy@o3ro?&Gm0%;Lf8dF?*4$NVq-Rnff%Y&Gv}VFOied2WDv=nsQd zM<^n_)dlundL`}wzY`AhZvovq`-5Pb1GoJ>r8xRBnGv@I*QOkqfw6@rBBMJ>{dBpI z`j`{wVFDcyBnAPHXR!&kSVw%Fj~Uv*|K-zk3@)Gsdo_rx681gb_oD*Nz1d4E^`9cC z3uly#E12f}1xYbmrUGp4;`>@^0Wk9Qgj1g%j9w@%8||xc{Oul3kIgX_-B(K6F3KoKIlg^F~aT_Tlr-X2nO$wZXgXH)qE)CbDJ^ll;d^ltL}1%lr&X&b||HuxtcLa zy%yCbAe^2!`V(?Vge;byq>?m@+L&iUlh0lU{@9*h`)S{yrFR0GTV0v7jLtCn1Hz z!&8vPyjS@-MYtDup;0O#-=6liiCMB-k(%NH|Mn0RX~=Yy6oY>8ied1G{&?LOf~N^C z{`A31dMAk`*j8dsH3hKP6RG$fSN3C~feYv${H3o5`eI5sxbls-B=UZHr^I~IKr8@V z2Z&hj(JK0-N@OI1V6?9&#aB_CJpa;~BI^OZJ9ZY&9PWEXIYx6#KrgX5&tS{R{CqZd z8q2C2KXopmCnXEC(eoCFSBv02WrKd@JZvR^I>FzWC8$@YroAcRg9YgY#g4MDE`TNP zIn^^eNG5j#OFZ|f7G9joMF91Nm8LIW9jOGtf*ct}ifaRXeins>ey5f`Ez+OFT^# z1?oLPk%*p6b@rubtj;iYXx7Kz7v}><2I2;`!fY-{O@1i>Y!hL~y{H(=sqt~ zcR*J4^C$hpIl2=yR$O}0fvHZ28b4#%(~LMtx}Wq>f3df1p9Q~2nukvv8;dG1(Lhc# zg`7A{PX00M;XBiuF7|@Z|~nk6=+*hn5Dn6k!Q=)>q<+_rZ@I)Sz5}v)H8GrZ2ZAqPxOr2 z)19suaTzxAE(aHSNIhFsKG3Ls<3YV*% zU?o5l);^5~R=FU081Q4Yd{ZPE?E*fD*MhnI*KqmZa+}IuX_FK`X-~ol^{crl9ipPv zN@Y&UK3!5?>1Fp>?k{2baVB%bK&aJxI~Bj`6{g`G<8!g;jb*2x7x-hdG3ZNA6icFe zJA^i1`*!l2)A-_j`7s+8p0U=)`uW_X-Y%1)*ge*pZCmS5BEJ$2sPs21g#2rN&3j$9 z5%T08-3k7iEZ6gwD7P*VrBZ#GWleLp7!UZW)EXILJaSiuU-Viw0czxe);`h~2Jx-DBymjV!~^bBDWhBE z*W_g(+Pc0jGoD#gm)*WGq8+!9kLnYiNkSzSFc$VzqcjAANJGkuK8blP%ycRur=&lX z&27>6EZa|cPLJv{G`}f*lEBm4pm9bP>sEXQ=#ChbjoLjqC)w z{CEv`zEe92mo;gP0P!5(*danpGKVw3j;}@1-LJ}3zJC)r$8x{&y3YvP6hK*dy3Z(l zKCf?G2Mokn_`IP&4mIIRyW;LIy|Mm4bl*QQT1m7ChnMd+r_VgMOE=!k1Y`+$T`Nc!Q-Apaj%R~~g zM+7L!bm={O8A3JrGF?^O`~NB;{BPQrQT{rsht+4qt|}1E(OT=Ww^;96H=_kkgm^ws zo;aBq91Z^0T1`p$f0kAiv80=^3COeGRR5QB_NQAo?|fFcnQyK}5&7Oj{$k$o%CIl@ zHW)ixi`V_{clIBm`7e|7)y8oM%pTzG3`G6k55j-e-!{UJ6yJ8S*gZht|N6Uc5hLNp z_=NsXp6h@AY%Mq9qLZ>IVRWhS_}sw2aH;wWW|vma^L((DQPh-XTmc6wO;@Sm`HPa5 zy=FJ%l8s#vBXQXG!c*?2*;hvkX7ad%aDR|W}ex+#6}_6LR5KI6W#W2NR`TK%aOzs5mRp0&~J(CIU~95?)g zn2$6?OJ(9FJtfI@j6$8Te4Ov3i{5Hqje_+4B*7iu6LV2RU951s$8+?lFtU$s+#e_Z zI-`FD&$cWqy4L7=*Sq^IWCKZ%sJw?gc^)5*;YDh`r{BUG{{q8#^o9Ix)Lj|=kk zqg;M3DEt~E!`-se%c&M3J3F60`1s*Tj*D+eA*x9m_Bf|r6v)zA5+eC?_H%jrqHB;l z5udXH_XP9vfNQc50Ny$72~nDTAGrQ&LVvwVZWA%B&SN!_QIpbaeL@~HL}}c08PcaO zfVT!Uv*h(o)^ITy zev2;_l+wYM+t@2-gr%%)!LFK{9Rhs{nG!9>#`+6NO{C=ct?ZXr9G`to1p797@`?M+y{U6$lw*1|X`@Ro+2FwM5^-=Bdl^0J?q$K6NC zb28kTj2SRY%oiAgQ{!3-FG^(g=0{MJP%a{ECp$rP)|(w0(a`ayg;)$a2k3w9+ffm! z1-|?2z&%%UbQ{?5*bf|cECC!lv9)DAz7xB;CY+UT6(~RcYOY>5SJX@XCs;uOI4i*? zc0oX?8_XaiRCh}67wdtj6r%N{KVGfB>5#(tBXPsqLHCAS+~Xyt`h1d{{+nf=oo*X` z>Z(1o@u*W}V|wl6G8h1TgLl^jLm^yLgREzkhf5_%-0e=#qan*P72~w({oUt;MyV~z zu)%7XIQaukb_aT+fX!%^Fs!AJ_!%R?v+t<5k0$8!BGfFG4vpvgexE(_WkvO5KZIX& z1f+DXrxPC+#=aUy(m6`TDCO>Z!fb=yUGmWOmGEbEi4LOlXdy<}>;I*=Ua?K<#%dW>{(r`s9_v=YMb&J0(x8 zxY_@mLb|W*Xxb>hV}E|Y-2_1!4Ffh0_azHt)SZ(f zi54r%w+plRiO~S%yFCUqTeif!Vgs<5b_u|3`EF-o&ii+uw@!Tx(qI-@UibM}dp8Ew zgMJzz-9UvRYM`7lu%7%41kt>LeI`8;oEX(@D-_eilSEcI9CFE@rek)`Gy>dGsVBh5_f$;l8jQo| zxqKz3{pD|uu*vMdH z3C3%ooD(;A`?RMz^#IeQ-QD4H2>X(48EQev_IMB>B92k>PhHm^;Yw=B=b|4xZ(TfB z_$36sXW#8S9D4FTX~cCc`E8U4!(Sy$tWu8svDsmkL!{SbL4r{iKX`~kQ3YU}p_IP! z^#ipkgRm`(UJ{q+#e5*u{J30bShK8v0LHC!Ml387DjHI%AP5}^XI8APD}TTk^b3^9 z!=Uz%Gzk_X4lFu+I}IgZAyiSLi_BK0DH*a9jZSJ)zf=1S^v6<{EO#T&PMX) zKpNuKSA1bLO@q?&6^FW?{ly6v`{B>*#H1t0n zUgB}o8lQX*qDD2L61}px7+NDfks}=DxM$jIt_2_i_Nu84TWA?6i6ae^EM^BTXqa11 z<*MGwL2{KcLBC2o62PE0wP8F`zQKO&$oJ$k09}au0tXd$vvPgPRo^V6#7cy5tm2gU z97=TYOF1HCYSnA*D127K3h0+!z$7_ZPiB;woB0bKw9f9V?Y|w$+xp1k!)G{28`bpp z0)P2;79~kaT(EF{Wp*FC@UL`vP`Sz|Qd5a#Dtim1yrkRJ&yxV1t(mwyyocF?PX%g} zI|IlIYZztDCmw@WlSO_mGnTAB($l1oTFpKQ6g5=(KP|30^VO4FtH_~0$7*DO$LF`b zo@Os__Ky|d4(GFHDOz;X3la3t=MU0x%;EvjJx$v!{2HZ zo7h(#?0rH{cHDWL%GxZBj-zHBTakZ|+!buaf^0S8C)ZgD&aMW60zmxO<3dXLyBo*L zrTHw{G;qa~LH3uh7?i(FL=~67%oBU8DZDV`pBvlB`zQ^$z6Sdy!j>K}K@aWMlb9nf z4&OiJQDF^%;>f*~5<&)7A0tOe(G6C?SEKaPJdtNNA!AWMnlSF03kM?~&;HG7Tr=ma zw8#(`M#UfU>BfWU`y4Jdhk!p%F|43VMS(;Z7gw{Q_Q(*>es^$EQTm0LW;n6!9S0Sp#>scVr_o@4rnV6f{we=-H>SBR-%669EE~oRjK(p;v zdvOYwY`%@|uy5zQAROP}_9O(1IjZ#&$Bgq z9dQR{!vU*Tl>O!mndRU+}e>~}ob(ll2dZM)*EkISt7F^#bA4cy>Vc6GaYzk4tko=Kl? zwbl1dXv{~2WdOh|_8&pEJpnHhqkQ^ZAVcfnpK(Fiz{tTJ4>yxEg9xo1wPbfGyyfe) zV`bf5lOQVw7u(Dx^DAzpo#-glW2o=9w4npCPf-W!Tmu5cB_Y$0=kPzC&8hK$&<qYd2whoN_lq|U(SjQ_~V>b z`5?_`Ad>1d3^VP}_lMU2WSZBUQ)-y2NMG=A26dGyo4lW?j&_sw-QHhD+Ai#-+Jn7tkEY}T zFJLIpECg>)hm&1r`mLP_3u3{%(yYmmS;c&U`hg=>wl=KXZe_}mCsC`fPrK_*Tx;%M zueHy)&M4N!3@5#m>Cf%IwCxUlp8?qOq2K2z35%B=ZTrgfk0PBpsjG4n@q`@B+}t92 zry99f`Ctk+oLhmim=3+HCBrpw;O9u3@FKl$5f zW5!zjv>8g%S=f7dFu$J3A;hVgF)7@&waH_5X;!;eH#xOdkk^{bGkmW4AGhR*0B4QG zo%i9jJX9AS39Saa4B_Dx!idUhhHoL+$>D&4zfUD^6fqz3y3N_8$(`n96q8>X}YM0t~MDSDiO z?(u- zh?gP@`bU5j2lku=hl_ACYfHuFp{yYt)vFWLI+l00U%k#Qw}ZCbkN7$2#c%aD#R7OT zGJKIQ^~z>H`=~s8kZU!C^d3h=1-qj0AOrPFhbwJMc<=|*ZtK5T$7Dasxd_t$S@>S) zbsW8ZV4WE(^2Xum8R!CB*uXtjq85Hkp}^H1E(J+CeyH_k_T{#8pB;_sKc%=33E`uU zoilauiC?HlYXgeJE#EYP4Otg5@~gOEQ*l}VyK$jdKigRw2?4Iu$S9SjBt~7%>4DEm zy5@(97Yzz-U6nJvT=?>>Y~>zSq>MW>3}!!Y#|ie{g4ixZ7i$EWXe$Vdl(#MkoQ+W<42s+%`_Fgb`xTy) z^nu|^bI+H*Qc=?y1!==P_f_igD?)Ggrvm!QO2eoKKMoF4El5ti_3oF|_>R=`v|%|+ zVcN#}K3i?Jlk@Lg*#8jNh`hW=t+ZCEo!*!ci}&Wp5J3k9)8E`I8jnBFDA1DXQu&_A zXC#6AC7`kT8d)E!uyIK9#BzyFxBdR(nV|c}aAVn-dq?eK`6{h^R!;^2!t$ce4O8vS zeU(C=AU8U?X{Q(6y%OsUnNW?0J*M4)_sYkuDR&9ES*))CwbU&zES&DQ`z}^8Ge>6+ zA1nSaQA+M`7%P-TsG^?;3B=d8MPJ;#?xBIG1$L}3?Fx1RD#*4WHCUo_;d3`)c6tb8 zrZo|A6*+sB0>=7jK~a?~o;+?H6P^sx8PAcnBtGL$-8sg$5SgE>v?552w$>7$y_l4%V>c(Iup?8}kqcvWC&w+6;%A)GnT zvaonSFAu9Kzq~A|-5dMH`)b=A<;See<^Y(nmFZCx*B8yT3Ar`bW#jt}q@&T)6#MQr zYq^T05+;6Un!XOoBgGuMP58 znMmx;XLr#5yePp0(LNURe=J+%HA&YC$P3I=Kyew{SQPp@lFa@0cFs;2UpZNC;-NS& zy716r1Lm)QgD8G#czjNIy8Sb7KNm5v&C}ljCC%42`ST zGD}6CgS5yAwr24^>zT9rXb<>$syxdZu~%{7=wthTj$D#KYb7j%we~56I9MHHuc^w% z44g?`e#6pmB=u$P?2d-qc3Y`ol-2SMem^YmV0 z>1uF@ODMF3wjNJjcUi5Vm-Z{MA75{$MfFjJ<0kvl`h>P89b?P5f06D80Is}4lMd+> zQ8pclOid6Tf8yaO!noR74INr2-z$b{>jK$6l?KVBW_sQQ0l9AEa1=Plz1Z(Gjcl3J zK%0cWX-1v~PP@vCDysb+DDO?uNLvukf*nr}3-MYF?*c7Z2InreQPZ&w1|@d6&!HdF z`BFexcI>rT8K{=DAvX6WHc3h=kx{moEG@=8$*UdRckD`p>ithMv*TsoYkW{2BFKjv zdf3citZn7r5QLiW$EqEKdm0ZX(pBVd6+UU>UU$H6X9_Xl3Zdvv&GikSPqDs=)uFq} zDBOiqSaTYxZa8OET(>wMdEySAJh8}Bv1zSaP35g7CAWWmR4nZCTmm}+utvJBnx-Pp zV}|0tDF8-7w%1>@>)&7XV-gjQ2M#%9U{GxI?_j}PM>E%GalK!d`UAeKD&D6N<*x+` z>Fw=ky%?{tM>o?7#t23pTT(+|q0tF`(QT~4@(IgH`!*pFip{29S}nNd({)rM zf(RL)v7?flvCZ(-V2H-n+ydEmA-|~8l+jhI8Haq+ud!EpDbYiMlD&fUXll!Gr-^qI zDo$F~imKzUNhyrvNFb6}_+nmmPRk$Aj#5T-hC?{x z@Am74!>>?Cn;K+aRU*MYO&^&blw(=dvJ@pM-Iml@Ki8>jWIhmFnwj`ABvS}8|5A=^ zthu(WD{_slB({!R@`HV;77@m8e(_v|bfr84%4(MP#Uw>q_ila>p)Q3U{UWOGY_fvZ4E3{aI#uls8l;O`2%702{Ih^FI7DR`1~(Lvj(G9~rR@aNK<@ z0%SV7h6G^gqgO%?Lr}gGv&^H>-!1Z4lj1_XEu4-{pEi67Puh^;Pt5+yxBpiZQ2ofsj7G}nf2v*pP%*<%cPI@PX35Y{n%l#t&y|!Oq>L<|t zpqNn1*z)JV9-1Kw8c}mciu0;z*Dg|=FCEpmA`b?qRhRWA8{ImN><{XbVe+(U9~Hy| z_6-Ik4z#~4lzM~+RajBicO^YaP627|BQ{Oc9Gl;Pk?wGh(Pbydoti-gz@DcypD%BZ zmnzLP9S^L&Ih@{>cev0)WlGPO=I!l%92S$;&V>-=mp0LXGkj-b>HBI=qjn|}0q)Pi zi&}3vSw`SrsGB57Locjw4R{AYaeZZaN&dc4POo!aEpY+yxz*_rk<5CD#=U6awn=H+ z81oS>j*fT;_?_5MaJ&_kV^Temy;!d9Ep*hxyXmqM#w2sOHM2o4*z5^us_dNeC7y*N zN`o)!{EzLrE5`k^JN=LdChU5_fqFyD9|C_Y^S{*!H^L<9-*d4V0{9~R9Fc7ZIQQHbQNM4uUtBe|KxFEi_Ktl%$)4o zzRa{JOJzlur5GS(U=krY6d8i!_~&>^Y!`N_SLil@f_mZ8tsbW? zvp0=cyJclM$*nG%G;^vsH6g#3qg*7>N6(V&A}87L>IP#>V8oDhX}vE?^W-y?Z02JI zGR_5zN)ucl4OS9gP*3n2>b*hY zMd=s}38d{U(~C-GZN;ZAj~P6e^`=@q@2g8`UyslY9dt|_XXn~mpWxR*hi}mLqhpQN z99Rm81S1jSZ5UgB&IJG{M+5p&azx*)K0F8_Zuk|%D{V52HonLlWfI(OkH~0C7?%`s z5YFG;QS&xe+mUcz^<+;U3AF)9qwo6fw3va%T>Y} zGWZmb1ADW_PT!h0QD;O=h-2O?r9Q{QtpzS6l&=>B1x)M5?dtXqWA8uU3zyUS$)!HN za%C`_%x`NvhJpJn3TQ-r-9YsA19tg~Ts}fF6B`%dPOlsg7l|1-%U2nS^c7-}V9aQX z6`)P{_!i!Rz)t#IZ>mE~OcBcH$cNh{fy?dKR#n9=WZkx*I2#DsMaCQr@er2#g@6Fv?$3T(U{U4O zt^Dt{MpuGwJ$_0Xujo0tiSs*8pcsA|{CVLi>q${*L^!cX6x4fF@zyU{fJBE5=8jeuQ8-Z5Y47myYfeeI^ENDZ1gc zC=}#UW9AHk`4T0Dq1L`gIYCx%9nfOZ%gjQGcrB z7>25$)_@`#6?~3Cs;1u*c4^SfcIOc3?eM=H2I&nTIg5njNQcBi44{CGw1TaAkSS+R zt^A;W7kzO&jrh&1pWOFHPj6AT;E3*cI$)(d-4uk|_C)SRwf{^`!|s@!8?_1kc2E!- zq-*rNf59TPkN>#Vn}>x!KAXyQ9+{~1Yr)ygR5&F{&;XqYs2~HEz~r#-EvM1CXBmy9 z$-p&+C(OdA20esu@rhY0@_q|G%QE#1zA>v#j&s^(?qvY)3(VsQO--g|dBIY$BU-`k z3TiBj{^~=|7%FfK`sq3bn>=sR=SFBf*zpdB!d`vz^3kGIltI4ww(fYk7xsrw{IP`r zWO)6LXkw9j`F~4@nGhaRn-udECsq*nn+_;z{ZmfO@BwtTeBXWGa(aCu^=r*FWALlb zH({SoNpw~udgMal*Mdb3Xupe(6FRo>F3L8jg!=2ho9Y>4zv4t^i~gRxj`sVNtIJ+B zg+`H_#9O;dDxsT9b3U{DAM0podMruPtCtqg0PoJ&7;t^p^aW1X&SMjK-edtm=BPY_ zAm4&~!@BqE5ok@?nZWlGQcUZoW#G+`0$C({EdI_3{00JORU?B4p^@+!9dt!xDt>w~ zrkAD3U9L)}>MdQ~SeC&j3;fe43U;}9bP*%R&ZqXHV+0FL@1 z3(4JrlHcA5`ZU~=X59+QRDaW!i#N^#*X5EeE48|PRhh>*KzAtWj(&O?rG$2)n}^&2 z4kjR(ek~y`Xbc!EF(Q5SYaF+3hdIfFPnCDBcYtOSmtpnkCl}FL_L)2-D^kwegBAer zo}sF(l}KQ}(;l-TN5|oGK<5$R^6T<8^f2Of_7t3mI~>JpbFOF&pY^|s_^OAM-#{>P z^2{g8#Xa%o@i_N)4>D`Cpx*1mUk-689*#c~U%f)g%e6|fu#A2W;j`}4>|YDG^MPkg z3#B$yhg-Ymp&{s|n)O_$eUA zH~Mf2Zc2K>uvtC8F?*G5dJADi1Z=v0odA>{Dckz(xR+-82)pX4>z#XZD9wJgtT46b za?5q;sHN70IE7YhCjNQoOLq%LVDyeT9%Xi0%K4Yen!|lrdz@wY7lHPN?4EQ!`=6W( z>E9cHoc)|yo3`EVX7)xW*Z$Q9kUZk@XXRsR7%%@#-T$SV))Q6|>jbh2xHRyI`qhVF zq^Yp$P4G2#RYFriUIxf2cE$Y$jsKsm${#)l9ZcvkyCNI@*9fpB(}8)?P{5?drqn&@ zo-R0@$a8%AdlQ-T_+jsMXzb8f(ALhTR6vdP@pt466cPx4KOve8UAhFU@VY>(Ya%=) zU`>XK90AlF=(A9fpOl6}(@(S2XC7wIt-;r$B0hZ(dv1jyw6%Fxik9a`l+YNnr;#b_ zmd}9$KhJ-VUwLQ|7k{)|+eF!8ZDN_&3t9PUV7JTSmPJ0`;2C_Tej$NB!G%ez)A&~@ zR^>mPxK6}SK${6G_<<*ckYSkill7Tk`P_q30n_)j&^}0})!?|Bp zlalu`wWj=ry7+pbrxweEsoW!sK8AvT@U1UZAT`&DMilS;7KK@F6fE(!3DfE#mR-ptv^4-b`J_NXveb) zXUJUPF&t+O4Ngsz9v+Dp+d|~CWpe(7y(iFJ!mU69dE3@RE)$bVKjJ_vol^iloeGqZ z4<>2h|6tx5OQD9IZO-~3DrwLHvZFwXF4M?-KF<+^J9WaY`3+sEx5|oxWiqaQZi|3* z+?DHGp2-rqK;5(~^NW_Yhk3!>>7V+Toste`+G8bBj$88*?{Msr@%9U>eb^TbFc0*X zFv>;nY+-Ik{Z{uz$poNDanMC{z~Ox^9^?mJk)L88gUQ&aIHDSrdaL+G=Yd_vWqCe< zjvBj=5c>GmUQ*UHT;NS5AEtgv`>xYoG4aZw-77dk|M(R)U@@9;z=CdKn>psv4=7Qs z3R_z!EFt=BW!w%iWLbH#A*~i)8Z+Ty`7EG%#{u{O9n(Ki(gz?p4)82?*JSb(ogs3ut8*MI0$bLm0tQ8c$X=ZVAC9hV zocF7tr7EQhl;W{vRXnDAS>J0AH9Vr5Me+_+7}y$(gwKQMvYZQjGd^+$!tfe}jGu9B zrL_BidsVjGxhmdwwn^)&UFSaL^-FYOKXc*xRqa;lz7$HnJNgxsX!AXJT|?(5dzPDs zjJq^0|0YCAQS0}dcPni+USo&eoTr17zqHXJY9^qk&;h$fTCE)Scu9(Dozr!w175vB zY~2c_-m%Dn5VJ2~rL~yAqK!XDryuefD?}_)4dC-RV(oe6ylnDkk7$+J;T2PPFU3Dz zK_)dhpb5_cF!;Tt*f!tRiK|^t9HK~8vZMd&azc`}g8@T>-fAQeZ=DqHt7dm?7olfA zbl*6f41jqY6C$AGJgkEzMgpcM#Wyuh;?XeG>B>d5vj%`zbibSdZQ*G44TVwK&>L3p zV3u@Mi`1_zbUGWX^SS;B?MkR)R)%!Y;0(~6ITv$I@&rljc>YD$$kz(`BoOMIz=2y6!9imyGub`R9A{!noDHtbS8J)*>)JA- z?}u%WFOok2t{dJdM6dS!g7M@WNpDWHyWTe|6(h1poT$>SHpjEJa((DYh){)uXBt4& z8u15aML!sS+1V70(4vSNE*`P@>oo7sm?t@$?qid+;n+?JpQn4AJw%kUs?^uKe3UaX z!2JEAsW~-ZVmZM0y+|n!di?-!6ZHsqVaQv~6G2Q;xB0+I9vDz=NkKN8vlX`g7v?#{mS048%hl#IXF zd#qUugpR}9hXh}#m+CCJr(sh*xX(6-70t4zRFd+B*%(r;P^~f2h z_$41Tq|;!I1BcQDmkfS%u=ZaUC3)A`1s0B6NRF<)T4^3VkeH*so>yd^@I?i7$g%Y& z=%!n2P_s;~B81*+wqE`=pDJFg7RDw#m!UX;xnaCWQEhBfvXpNtdEkY$FV{dutak|H zV%#h9=MoxeeMclRLCZ3H_YEJQ8IA77Ipv{*DQTF`t^nKyj^zyg#Ksi4oCq?5jzNg2 z-_pTJ?$R;xDaHa&>nehfrJr)~$vcDoT#gJW6IlEOTQWEbmUzz$905TC&ZhHOw`&nw zo3fY)F}|FYI#zM6Zr>RnN{34O#7S&GqzA8qY)cFT^+51z$EUwZFw0;@J;AjiH;KH; z|EijU>~O@_${P$F7Pz0KSL}p(7H2-L_*!fXxdp zioizJ|DTM&$`<{qZd+>{2k-NTw=0dHpZ;!#^LScIF=c=jUuS}^R&HZ3TEbZv4JU#i1NB_I>tL+MM#~pXt6fXH8c-QvUX0>zlAr%Su zd;ZC0W$-1x!j$up03IF6`<0N_*n;oSfd6ejuwkCt{1Gff`)f|Gg-p&Ny%QKA$4KRR zC>zK^W%$483ef)R3WWdu9Q6N7S0K3dqdo3S+pKFT)#$*0WHdW7)#_`)YvUciG4-}= z-h3<8L{1sm27*-9w9)v;!Be}J_1R~n{J)mWd<}xoeX|FD2Fr?*C{*j6h1Se41#?q$ zYb}~>?F?fQeZ6PvS)lLAWh8vhayjruj9vKkcBSu(8u=iwQCl0R@hh_%pl4AM9MRrN z__{d^c{a|+GPl3{rbPL8EKj!HaP=wN(NWNo$Na;WC^?|o60%2;23U)dl|AB&(y`Xq zC7WEoSf(fHeRxr;hf~s{g*R-V(r761$q{QaA;(#>EM)hByLmY)=8HF@ZsqL`Zs*y& zUe};Sfy?MWpLq>}?QbhW4}xc6=X`Pk%hY$7rq#@tW?7Z99qX2U*;@3zc&z`nBD&X8 zSA-|vci6#V3=h7lh)GdAww+XgG8!-cyfcTya?U>ZtHBGiWMAjPyzh&w;DK3o(RUP+ zOUp~lRS<_8|h9EVFE94pO2sU6Q3 zQU}hoi3!ZHQ7#q|dz(HW4@69H=CIL#_FBn1S=?c;p>!M+wgeArp|8B&;i|Oh-Eu84 z-;`zmzt4N;JS*?i;emx$&?a@VBT2HysInY5T@9#W@2A^eu4RA2)A;3EMD#uCzI2a} zH^1Rz1H8(6h<7MLvRh(r*hf%Ri2}Eyv|T?l=Cjp{pK7z?c@$}ZhxVc%f?gIB9Eo2D z`g|D-nIcmw38ACk2;qD; zi!~%u$XoW#smH&jln6RhYy2~EL*jPXkG0OD=z4r@1I~7Q>tf?C%Wi@&Y<(^s(+8f6 zU9;td2kQb(m!jV93k_)Sj?9Lw$0Aj3j(|oQ4c`A-h}$SDTv02eFi^ASi~~8*%+}kkrWs&F(>w1OyA(6eMcK{4&By`shumr05?q zhXKV{3^tw4znCEWtc{A9fa<--zI;0IkCyMh#qYn-^tAh5dTsA#H{#>IP_yyTMLf5d zJ|qb^kfchT3GrY1w6IgIM}VGM(zl+qV4#rT$uI&;)Fr`EV%fyNek&X1{O$n>3DWh6 zSfRYlI{QngW_^ureGQi47wLmmo)AOH+3|p`h5S7ueJy0gb7L%&#r#Qz@!qOUWj#*> zNagE#RMLZPiPY6jot`)%qD>P6&VA*@k@Fd&dg0YyBDZ((qxs?}H#Vfj(Gh;$@C1S? zAky3Sv1o$HXeN+@sM_022U6e|w{!Qts~ev!@UX<9{P#4&b@#1YGHmTcLKnDkQbHE0 zEL+Mt78ejA=PICj3DB3C=(&rGQV3_6f#;tZv-jKIW;eeh;z!`|7Jv z$X}Uzn{;!6&cg9i@litXn6Kt6()}63=HRAyXqYZgQ@Cp7N}4F$mmuUk`-s;c#8DeB zrypxOjj>Q%p_t+wLHu@t#M-$TG+WETyS@0j_Xh%NHTB+eb&@T!qzmA0XXeZ2zb5W@ zJ8jutVSufL>BG_evE5|(BV={2i?&(nwEbwMO}JXS2T#^AJfOlEq0g!Z^-RWo7f>4T-+-7a4Q`>i{acy`5~fR$pO?V>pg8RMT-DbQ=MQb&r!j|SN)$`NrBi1RO}RqWW>+ei#a-n^x~7hIq_+7y~zMn zt#4(qZfv7b=Lh^|(DpAM2MEgQ0TKJ60^+#o?;{wu~%84nJ(_dy0zg750xPab87zYQVzQL-tofkz&4@mhizd<&K zJp`8j3C2D?YfCH{zqBdwdOyQawMYF|=p>(eq{qbgo)y?@khQ**Q!~nzD>LidMC0fW zOZ=<(zTvo7|JVU|7nqtxC>D7MDLWQeS^#yGsR8v&?63)gR zcKc=eTo+iEMRB0#vFe$(PXPEnpgvS*vS*f90Kiai^YV?lx56UM{$9+nZzKqVW%@Am zU{^f+&+4>Hff86|cAR1i#>Vt&wdZDSMK#m5XSZO6W(sLjm4gS8i%7Ky68geqUQpS-A}9WcfaGi&xfCbH8pH3T1-1Xp;U#NCwScbGNAAVWur@< z$e*peveOSc+_lup!b>kGn|wD9N15cE_8qn};z7}zk{YK__eQ)q!BKqVq z8SLq(%8r%#-xk1UeU975pg;f1opn;axD$E$#~5isgrhQJbnaF|eb4j1e^4T{X^4V( z-q8y!f0XfoHc_zJDH~jV_*z&>%5_p1=mI;S61s#B{exUCV6g9T-eK_8XtbaLwu_Kl zc}_5oZEr(jIJ9+2LF%|6i=XtnmlWq;_FGuGiSFcUbQV%Bou&|mwV(Hye{4pqK#43T zPzu_455+q!GTfp-8+#~LQd2EFhj#sk4Nub1?M}s^d|E?p(J<=`?s%Y?8ee4KyJ48O z+EHzY-U>$eKUjOqur{|XTDX)J+F~sQf)pw4?%Lu-i(9bb?rx>DP&CEe-Q6X)LvVL@ zCurcNd!M~e?|HuG`*nX3R`Rab=2~NpG3OwuW|q}MOYn*?#%D*PO)^gr|4UlSFDgqL zdy4@S?5?X45|4WjMFPw@{A_{3onQn{6jV$e9!+#8f9X+D1g3Sxkf=+S#n%Z-mq~rv zv9WNW3v}{1jCsc47@M}Ja&lF0FHGkdSb7WSZJZ*~%rW7(Vf1B-4^PHSUV1i3N9`n~UFoN{v0- z2#-TLbSCt^UCa|^60>P##D!}|W_EmJW2+x>nP*CCmEPU#jXH?$gGZ(Ibb_y7c>IU=k9s4u z#VsK9Ey=6YBcGPi2^Y7UhaDFlK-E$7w>6*7%YEfn{YN<4F7=hf>aEX&BW_^r%Z-!w zg!z`!0q>X8yo){@0xQl?sGIHz_xR4fUxGyd4)qW6qC!@0lT3g{-hiD)o=1-v7vITU zXAC8j&9eoXOxzzwY?0vNv?RET6v?wCk*D_@*aW3JJ=+r#f?miT{On!w5^OP|sRX%? z)^E`emo%k^jYS0b*ES_&YUYWS@mL$?n*3{v7d;L86c7YrkJFut<^fbp?)n?$3{q!> zw?QE|`{g|6;rQ;JOv8-|%>kUe^THr{_AvuLCyA!+zSx@lp7d)PIwfTHq;B_+?x;^7 zz0PnvtH~V`1x9R6Misj{M{c5gC6E=I58fIhvkn)eRq-<18-^LjnwiMjJiN=#y2s44 zijw%|j$vhMU!K3Fw+IxQw)%XyEiG)dFr6(vkEK(KuNClhlqaM4mp59GZob_ZbSBG~ zbFd`;5lOSkD{N4v&}LdYJG}XwNB%JP%nHXGdOQ>pLN1d6ZJEUE@d<%+F)A%f$a&zI zE)nH|@*c?`jH7oBc1PvDF}Zm_PpzQ2BgrfFeGa{eh=s%OSUGy__}O$`Cr~wEMoUJ| zz#0nyQ+)ZI{Csd^aDeyu=QPVhctn`QFw~I`P zGt#b!=VZ1DZV%Uk%1fOOAyYm%mrt~9j2bf_6V`;`Jkz}xAph=f~uR$`%=%x%ll zCKqUNWcB9)RG|hJp57##$JdUy92f1*qFbMXZ%7>w8}%+iZjUf;w~bnAnJ3mc@Dmz$ z1TxRXO?yokQ9qYS2;6y{uDNwgkDBG-Bh5_&S3R%Rq39rq-+R78mNQ66G48qC+oXr- z6NoZYBwEL>(#iWS=v9#Ispt~k+TbrCIbx>^yzLtSq+q8&ziL@ZGzj$dqdv;(5tLSm z1m4}nz+ZNP{?d85L)_xWKbiN_Tbz+CVC_!YJx`jydM%kQDM=M^;U+6EP-@ECR8%C&dq3%-8;HWzbOs|CcU!@qAz72GI! zjJ|u<`#`kv>yl`>4c%Elcvx`kO5N8Dgf?@;QbSP#ajkK*3lV%Z>f!C}6WicJr(>FX z_26H0{MvZR=!8#N9JR8KM8Syk@ZFLRZEBsVN4bMO4%uZYiy`BQf`L~6_mRt&oOh&M z^m=j2(!P1JBf^vgsjwVbxY65tndPFeY(}oKRI<(G8==xUUWG;Wmtdmn5o{#IWLcS2((;k zu%=obFJBJ#@1XQYbs`Q=d3W9T6|8mX(&jy7(`7v_PA#-?HGiA@?y9$$)t;y}+A8qy zhtEi!Ynz6g3cU~9{xFUi!7eCg<&DB9Mv~G-8Oc7yx$$}9VmQYrMsw)xwCioaFD?D5 z*@s!&3lQk>(`o4)l(-)$!OBQk@UfB|K>EVBg<-@Sio;FB1i zk9<1XmG(1H7b=u%Jse;OM1q?HPQ#SY{$t;K(6a%1_(rk*k3a;Cs<}4 z>K9psiS)XB-gPM(*xf?kjol<0?8gQ?Z1eH%3uantDSXEh$TFJHM8uWgqq=TO7oi8> zm!0Wp&j!nKm5{1}dq4E(fU6|~7E2`d0@88z#Vm)i>-vpN44LQ-pcO)_16&Kk@W5p( zv$fD?R5@xqoqCk{0&oWtE9O<#u-wmFkR%Ru1nsT~^juiJ*~J^j2&?H_7$4$*-p0Kt zR?M-kR5x}6$s&o$#E0L?YvZg6vn^?|M8BDjF@3aiK7h4AZqe~fD;6u~7)@E;Dpj5H zk;j5Xl(n}~SAnKZ^85Q#Iy#ZnW9hP;-{mCsh3>AbBNxN+{KH#>2%~#$9+}5r+r)>B zK&G{Hjc-TMjBj5pgF9|lvy$^N#nvBI%Csx?Bb{MnP(^cgG*LiEppSf_Zj+tWw@0
  • B!aiA=hVv&Hw2lLL&rj75#n$Vq+Ew@}@-k{wTKC4)sM|o3cDIF@ zo)&B73!+9dkj}5;xY3w)q`mfC0>!)?lrwHZ|6UfkX{tFl6s!%3C#J*rKv#p}CBukZ zna;)?oA~|pQBrc4`!PS`;;+r@!~Fmdgq@~pIQqb*?nS1@0NqEDXmac2NfHG7@8y$a zgnp`gl8l1_f1Iwl3Lu}QsVV-jYq8ZUWzD^9#c%ST@l;&mmdo{!x+?vkIcDS67_LT%J#g8jktS%OL%IPo{IicUiZ$ zW~|=-ywP1mK$GU+;K=A&uvz!K!~}kdO&p46+UB%FOBr<2=X^CLg}7z|$m{IZtllRl zcjYKn9ip{Xh(CAUm|DbiT#7WFXDOw7_9O+GqkBT~2WT!Gjq`f8|G~5*yW7P$+=yX{z8Vx~*n6_U% z{BEOTc(}OVvu@v2?QG41Ne_YJgdwA=~+AkpR~jo5S`k1B(V;V+#e6N2eybPn$G zKJ==R+juXa^w%Ok?IAShch~oZSQW#!4Jp+EK7`FYMt?)z&{~a0X0v*w!0GNaRG!w) z*!${bnnKg4b&xWGqhknOxcr+2F0{Rf1^Ik}2Qk#n37!#TbA#`^ssnNFNXs`FK zpBQiJv1wR(Rh?^@o+{C<=xQ&*JVtC5asc1J;8ZbK<>~Ex-n4xP-Bq*Hl}S%19oqVT zYbLR1Eq9Fp(IkiFZ_(u9WLitE$HjldZkxPL z)36XQLqWstRQX*CZGW-nW-`#3o)_e@42z|`nN}5TaLx)R3nb**_YpZ)XQ$VbXo$Rm zm)DyvIRzK!3Xm_e3&(GvHWYT`jr!Ky8D<0fXoeT1Xm@J(AD__N3Ai0LafL}ulA7+$t_`poZPnDw1yZtWKC|0T)0?2j~4)SL@vg~x?fu7Us zW2+3(4H3yeg-ZgU>-3yqP)q8A%r}C%IuK>+#$>Y%){y?`;gednH@_kSTy=o97B&JD zD{`VkSt5K7Q<7}ST|__#+3soi2q}5N)le5|9M#N*;p)>YA%zKZ6<`%yAEb=6vR!la z>9V^KLvou@%{8)~yZ;2~d=)J&&Zg1g0-4CoCZVFA5n{3Sc&|m%V)hJPu%Y4m6`HXd z&yBWxG-7abfu7TRjRbkYgyE&_BGFWj>) zz?DxGyZ7^?PdP71_hWWVh%>KjTb2H4GpZ*G|9H6XX8^^(XjdvA{?loBorHPI6X^Ac zw^zcbZJ3hEHQ;eFO_;}Opvy{(V_oi(?dh3PDKeX?%{%A8M}=mhT~HZ7!{hI4S$NPC z`-ncj??)1Ao{Wt0cDDdwo!db!Xf-Um&!~C&d|4Jh#yw_Ht;!9sajNVZJ7+4q-lc22 z?V-J@cZi)8PXrS4s}_FFhsLzuU|*3KXi?HoShiKNdD{B3sI_?BC!THLzDG$>P9Z6; zTW#<5eRi8esU?Wm-z|~8h==3Mg`bA%bi2RJ|GK&nXgG;gi#z*A%S5it3PEgNP=mGW zq|Ea&mo6d=85f-^Ry-Etu0#TZY^+kK&0gZ`{_30NbF$5{xY$*XKgka`++<8^s$adg z?412AzN5odjAI3%k0+b6GV7xLV#tldtLLXos61|z@HN~ue>@eC5v9xcd|!k{3zvfW z6=k1x8Po#`IHh3wM$`AS@p#)Bq2WkRJgE*2b#EI%34_eN;^tYgMqek;kfFN!R#Ef0 zpD2~pOT{~-?4RT+`$;mlmTj?XGN?))6Mp*y5JOzs1h@2_%x>ngNAMnbO9 zuXiu6AojP?0Ay0XMzwMgBOZ$qQL#-6c-Nb9gIGWqN7A=&SkQUf|4&d~P!zZ-H|aecPUXQ!svGin%yl0P|H$^egnFMeArH%0wLa+v?g(?fn; zS(w2<&eMoVITPffezz}w?DV44#b@>s-jbd~qDE+?xxL=QqcfSFmKM6J&Y<}pFZbeS zm5H|e!thd@0P_eE0?HMOHA5zQGW&_YCox&Yo8a3kvP3q?jBb_s_wKyA z$4BT|$*eM1p1)V}j>hA~x?=&sLsb4A|J_+{RR)sL|NaZO#Ub+kSN)<7iK{mrT%l^s zH>AD9nmYoawHd1SO#qUg=;a01;_P3C^)48c=7I&Zs76P9_}4f}>44yMIyTp+_J;=e z?Qqc10P%{9B>G<-<9>9_cS*dG+mYqJ#tl^4`LYDrRWSeAC-=MW^0_r_-_EL$maw6G z@!|_%R#@jzDmfH?yE;CfeDvdm#s;pmM-a))4d5FrNDx_ zm+uD4OV;Xj3rYh1l>Y+O5~c9PawU3Tu_R`xzu=z$c|Y$`5x@*AQ-zHb|1v`iG0+Zc zo9u%2uh&^9?s{gDVps(q7USWlrP~7bQ4C`&d1&T-bXBdSKm+Yh{ffivzn#zB$ijFN zVqZ|GlGNe0r$)vF@2(m>-pX9r`}Stqdg*Zd?mN-^KqK4=?-Iqob}o53scXQ z$KB5*9U3scsxisK%3Z#@*}AFT{De~P`)e8A4JtcNS>|7Z0=|&`}PdFfkbIA5P+acG?~QNncbVp-lX*J13%OOaB+S6R^XJ zaXa7pR)+vLfP-&aHqA*L6xz%)k7+S5X?`hYj-5@UwDqss$wsVm8qr zhFON?j0RAa|Ud5MQoqr@*_hZu;vfdUtG^9Ug3sI$uZd z@^djedq;0S(zl@J1w$#1Cn}x-xSS!xp*`SCH=&qNRT&<3{o?hRucPxX9+wQQ51E-De=d9=VYLE^2s~f zS2WEM-}=!0waJsh(z+PCudp%G8AOgUXEB4&^u^Kv zZu4b$T$YhcEY(q;tB^+=DJhsJUsGi26PF*5w9L4KY4eSZ^k_yHWI;RXXfr)_a5=|E zQ*87+n*HfX0j|0t=vMS@mdx}amOrO(%RY+0+^9SY&%P~IIDe680Yccy6MXvaoEW~! z%_Uc<3Jk-63u{>uM2MHCGiBV=icCA^kTQ=XiDb+&0vnL_QD)8YypQYvCN>t?F&ndbY z$sCZBHEVrd5oF8TB@DUIVrkt&vEsJahvOv|dEN?I#lnynqnpdcuF!NL=@oN$nnGN) zf-w1OnEqD zLw*3fc@`dm{Fo$f>^5KQqnpA?UUMdX|j9_!bXFctzj5;=StRltdB zjYIoM@>e&9Y1_%|6-1NVso??OTgh@rDKX$zF#{Wv&NS}gr@XzX-MOJA?=Iem;eF!j zDPNPzMjrxw1sX8>9*D+LVQ|kJin&eT=*_%$BL|+ogpEk)z!gDKq>_*n8~&b$QKrGE zO1GV+sEJ;*APv{mwc?c{*3F zX+00k+i5|%qV70rUJSO)v%a(RJm$sW?^rqWV(-n{eg37vF<{I=-lrm(i&4QgA^_kT==1^Qs)^W4il|^evA` z5z~8$lW3o*flpBZ#S|qWnyp_&2ED0n*W3$4LiKMpb(L*ID2{ZIa61eNSm|6c@UGeM z5$zos92Gd3R9~^zy~)QHVTU^Fpt8}8H_vyS8}(W!f}ADl+$Vk*&?*!#8t16PHxf3e zyPv(Q;Ac2R%MKI!yafggDUFb5ID10e$A2^xqncLMJ3P~0sZr8B36`{%)6IVSX5}av zh!?qYmGWHJ%h#PQ!Vt|av!o>xex0}%*Eo(8le*9+Cn zWCx$Cji>(jIuWQIPm z8DHQu!Ql%^sc+NOYcn`1)jP9%N5el64~aRc2wOEk+xtAiDw-W{pxDdXu4>pnEPwbZ{d#J< z7TSdoGWgYX^f`3gA{+zw?gT_h_30a`r^nKo=uCv zug#Kre4T5T3nzB?5_ioXjNQpjG?B|?UlZ%GK+hIOJHmFv6?Kn9II2uZuc`Y za?+Qeb7uXZDKP_Yht-xKU1!7RqH6PH#wU0wcnj+7GSM+b{=U6aN_MiL6JgY@l7gPG zLwvn!n0Qcq^F!*LAiAgs`&aO`vYF=|37gF;9xy>c&Z3Fe3gZ%zKbBT`ykT+9OR_l9pgGxU!o(9{S3+9{xSK@u%wRM22_F0fJzafmn$5 z3dj+GgwK%#NIA1uSzAV;Ejw^uq>$i2gF7U+6P&_BfDqg%+}$-u;RJW5(BN9QOIMO} z&pr45`=NXEOOMf)hZ?n4E!k_XDc}6&B3Ma53Jd)eIvgAvmW;HxG8`OYEF2sHF)9kI zG9?E)QwwVoI5^FKIQJ*2`0~{*Bf}_~@zh%>G3%Joj>Ck0 z)LRwCO2HJO61Yl8s!F0t?IO?E$-mN36!^kN8Il!d_)?I$qTYC4f*-G;4PL{8f;xJ7 z_f8tlmy_6VuS;@zdzJ8+;XbUg8|zT&>6;N~=r16NAfm3o(MD2Vi%acl>LerP+_^to zRtbLDrr2m$&ntb@dVFMdYVjTuf)klWy|@UwKohg4gfn+Zmv+E}OY|0J!6n6O2qt(s zC@qPK;F~+!1rJGd#PhH-0x=VN=m3!R5Jg1EqI%)saCr|-e_SJxxqOY1CiePnr}QQ( zDcDdxT_lv7HL z4oc0$b)iUl*DHZaMFg=8Cj`}FkUup|PV`;5u(y+CM4F(y`Va|?f1vRuerdH|kuAK1 zR~j@(Xov!b>?w0&iYmGJtot?5&x~sS4e9mLy01e09CSaryq|c)P+Gh0XaDRFV;*wd zH-2&hx@*tB%fi}!*uHu~m_+t6m`l}2IRiYwqo``88<|c0IT-aY0=tqEVj+-(Uto4E z74+JwsZR^HJ>zf273pc+e zG_q+O!U{v#6v5d>)W;3cT_Qt3^3P~#i050MaS0TCI>`^1P?vw>P>(3H z!uk15h}}j z37d-LnusMr(=V;QKEEC4eDZ<#o31hL*<&P#Tv6EADOtDUF?5yGBQYWoeh|qNmov~E z`E~m(GoyfZn&{5D5BnDK3q#CG)9;-8c3qoiq{KCB9okPzL#9QpC>O>C&0WZX)kP$; z{HNdk$one&81rG@!1i%)qxBYIe4oBH7lqdy{7rzV`4K1yEuU^J-M;U6eNCZQ?PJw3 zcjWw?sO8CJ+krjCN&vL2)SlJT3hy!O%cV}#Q9dt(*_QlkYKAaB2Z2eR$@GI1G=~^v zo!Q6#W$Ty3A>>Sgryyx;bkxw*DNz?=Vf`3;(n7sv2aYNRIO{_w1k&RQDfJ@y=zSA0 zXov2*C>^SD@kogwgd(Cx0LRqu*!yJ>&Jcp!Hf1JMx&GaT5O^90?`J*}Vc&ogB48bP^Y_3*}hkZ6p%alR}ZCNO)uHxwkBl1o&_PVwI!% zbcp@_=$SD})T$VI0k$HS86@U#K;$s7iBaBd0(8lzN}+Ttv25>N<$fL)1!eBP_fUc_ zpi_Oufai?ljGz|K_{CCkQ4Ao7mGwiZx`6s+n<^XU;1k&nh6)mEXgMBg&rrJCiQ`;j=^RL&8JELt5nL$T$J71IPmyCEiHzOQ1;*ciVh3 z!%wGEr?;RFpqrvArSqc4qd%n|d0+QlDECgJ;ah%Z$O`w-#g(Dws|TbOlH~x>cAxyL zte^SHxmB6yS?_ZBMV$k7$*0~-q5GwgDU4En$NPvTh06V<>5Ik}=;Y*N$K?EE*(Cd< zRDo3?R3$?Bk%|L1_bZLK!AJDU&{;W8$-B2=GL+)kVr!x?;wo<)Q#a{{UNcjdvXzoB z2fGN{5FTL7f0(D{N8#7vXKZ9^6mxQR0ywFhy#amzA_5I=}zKy^4CL}Cj$d~y`sc&h(0kc4oUdm!1AnhLC zgW&e{6Urd|ARH-{9`5j&6gq`Z3Od8a!__I^6j|K`T}j<2-CW(vhRyGwhS~=IhLG=e zj*6#jN9HF;$N0w!tDserpHr(?8-R95pTI~@pV=V6fJWa;PkldY2ed7Hc)9IS9ga1F zVue+SYLuLfwU^SI{JX|c)`}JRJM!n5BAIJ%7vBnhk@%AO1qZzzRWZ2YTjE!$U|oE| zr@1eto`pRteda|T_*~^F#?xZlJp!p<#&1u)B?Jq{iOA5&AjzmoWA1nCD=$88DYl%c zpb}M<;!rrrTK|q;9owMVs2Va!U?FOuv4gNS3CgItl>jxL>InV~gMCXMcuiGCRf0pqNd>H_Oi@pXP3}$RyM*jtK)h@(b5L4QWU`fw9-&Zy$F zrnp+CZl-pm-e-+$7k6)TReEjvP;>99Q)anv)M8L;-aY-)h`=R*EJ0l(r8wKFb}u^F zPp1;{#8u*w=`s^Q;qmO^%&o%X3vl5i=a|Y}|FXl4&wa|hG^#{eEr;3;59$DQCk3Rh z301euw@9>1c)#)v@J_$uzQefBN8myTN0xhHi5!nKjJ$)Kim8ptgDpgSN7==YLt{)i zNnb-AN-@Z|FZssHP1?;2$yNMqmq?DdvHH4ZthVZ>Nx`{LU1BT=Xs~VA2XTF8ppk?T zPFHx3#gDk|`%u@=)-;g>+_ceTy0pZYEBZdR9ZwGaIobvz*5#!d@Y=1@h1y}zg$EQW z{ZzV7?mE&k7D$ej{v!HVW_Fq(U1`>^eJdC(YG{?=j5m z5e2*&NPd~*kes>MwRs)i7GJ5JFzrW_$ICxI2}!bjw^(AaWyRXm?Zo)V^Wd_15GEkw z8hy*A{vb<4_c!++a2*hnJ8=4s!mlYcub78Y~>umtdu6bf3pmz;fX z(5S563H}+ACzRW=ncuIdbE|d>L2=d|S{HKjDu*s2nm@Pf(ZCil4bVrI#G>lz1RJs`|3sHNJg zTuW7RU$aX8l^(W^{(aPK%)wT`^SHP}1BF(h(jVok(+VXarS5|62QHT6vS3s7;bez$F#{KhI{iCg^pMgdFWfa5xqSJK1(d&l>ibG zckt}nkbI0>pRR&l)nNopvDuq9v2P+AhEB{+H&3xpSCG{Lbias+cZe@`J9Rg75m1rr zGsIy=jYsCid(%#kB{P^&j?g}8tp@&trzSE{*uY^pFG92A-*r3RY8v47;eUeAp7H`dI@@N-Yy|<*NDYUhy+>zq=#u4-asL|um!+H= z`@%z)wMAtUmyaSM8JMN{!Mov%H z{Us%-POnFW>ER$fYMh@<_oRnApMb`vvn4az!JpFaCkh@o5zEt5z>y@AL>z=XZiOfB zwz-eb`;!Z!66~O1XY(@(a`D9)x z-%f4st>4WBe6b^0E`7lq|2aXxo8)e)s;srLa?MBUC6;1#4_nNvJ9?rr8mY9J!b{3; z6I~NR8D>V(#-8m=J<-EA@p@5|A=T;0sXd^2-||!+aYGqa>?#Gj{`Ol{TX5n$qpA&2)tiM(ij?#Sfi6eulK(O}butg;&L6P|I zi!={!@7zE)Kd^d+!s}qfje(@|8>|-^x@Cd-1C8Ew$1W2hr{)8ydzU;{G_p-y%;%7K z*I4MuqS#{c_IAtiq}GJm#ftf*Uyux7U5Y)z5eT_WOh4T7xedKDeT^jN3B3n{SCP~aqj=3(mlDJ|Vnn^z;B?xQ<+;|&R zI?$WU9S8MBY@{DB%Ct^upjqAQO$a7b8B+GNz{&Eu<_`^cGluCJWMBKgbgu*0p^nB<81+sWztm zNnyvABfYk`#Ib6MFXyA?oUZ3+UVM1^D_Jk{H2ZRjpb(u=R9Y{ zm?c>BSWuz^ZgsXa_CgM4UL$TI4%WJg>YU2`%Ki%Vdi@%U3am=JN)J1Oor<2Qq2mCR zmg>sWK-n0pg5(T{&Sf+p-Xk9HO)x-^2<+6XAM>pF8f-vuE1i)Z)#~7DnIVHq|Ay%c ze$jJDBA#d7LKIBO><-MkLVUdQ{DJhF^7}h)mfcpbBGB@mVLg+Iej7s=lMV$pKrbGK zV{Z$2$1NvhIXE~dvA>%TzQ!Nd;Pa=Ge@BDyvUKV-FMR2BH)ZJldg zaHZXMycVR0AFrO)tbdVlXVN`bIK?>`*Q|D87B z{XAK4!aY*}|K{5p4uLV=>YDFWJC6sCt2`L$Xud`k(t%0HzWy)ZkW;18_`a^Ey1|tc z!dGY@(|z#qr8q;7wz-2|!;|M@y-@Bfv`3%xhpf=Q zd=etP*^AcqRx*+nALqG9LPpT26#fh9ZyZt**$k0_c8Gm@KUav3>}sCgyaXapM)dt? z?kxHsTUjg}c>eV*C@V19zY0zfI za&B{Cvdwd1vuUvdj4q5H`jq>N+WGtIB2xXqVV(Xl(bcJp;W~b|0wdh|954|5%BNl= z*f3+%_CwZDVz2XVzgDgF1_MtgPbKgAg@uPetknGDT-;T(>uS1Cw;-jkVgX4nzKfjpdPw^7L>(Dk)UMz;2 z(|T<&CFt{-x4{fUYeX#P_GDNBWJ(2~;?-z474Tg(}TPLr^z4v=l>#Dxx*f}$0_bw!G;EFWJM@SoVbN=CcN_*{QYd#w_*KNo@QRp%0wCQu{_l`bZ z$*Q8Ox}r{hk=aF}psq{|jMzkvZf(g7$v4Zh`(ST;HTWGC`JFy%MPEOaMPkI*)2kzM zk-TVn{WRrs3Z-uD!mwb8V7VLPmFKkyl5ZPEd&pZ*#~}r}Gz%}~G@~~$U-t<$XU@gD zp5Cio%YmosH|vhi2l2J=31ao>U5mMj)eDK073Ycbqq66U&c&qkXEaz9Tjw+8j`GIj z*K}wAd25?=7@uUNiV>%oc^q9nU!-9_56>RHV`(oNI{Xe$AfW{`HKRxqFVzaowTODx zJ^X-Y@!Jt~6)O+}cg|DJQpRL0N$&la{4v%gMI)&2Q#Kp|X*FQq(0wHZd{6Tr;xMht zLvKdGx>F%1w7U)ID3@JE8mPiM{o?q7YhZtjwc}K=g@BnZ2B0iYWfh4;+M%3 zO|()JW|#@Q^?(TKa^!U6)&}2zjMB(_`qD!MBcx9dKo_Z5La?$x=mQ`AKC5K|985P3cuLGvE9 zs}v4DD}$Y)0dMOi{H~w7D5;pWm`5iwMVdrIRQ|{4weM?Ec%)RziyBcsBX1bN924vz zTPvQ!rOjeN_ord#v*@G**95GjBXqd}T54sg$Llpd)T(OU3LAHZ^482!i=DFc=aL#Z z<%B$WTpG2T^$j)E3$I#KTt+s8%1-YP0}hh`C=r1kcsDt>tA5v8D*lL_$mQbt8qL@D z(}rA^i)){f(1Q9nq4~o7-sm8H&U<@PQ|wdFX~>|)y?Z|LIPWraI^-@yRyHVsFJaHS z;Ni`+(MJBn$ma9S=O^z@`*&Fn3_z_w_26Qc;vS9L8M^73nP<~wW%A{xrA;LSWy0D9 zkIc7_%gf{C2%pEL$i*Ybo651Kokw#Y_lD9nM2c+$U&6)ND;7++GF;#D&tYLxMPXs* z)zVe$2yKg_2X+EFW?0N{8_COVZEMrq!#+Mn*Q`TpJ+Z!Ud3$hh@TnH6nogSXa{NYi zHY^{E?LL~YxY^jlN(yjrf^Ph1Bd%*o?PuLh`Y7r3-1CPu9XA-iEb-RDh z`1>1~8Uj*m;YTCd|5yy}*Q6-_o%Eka@c+C_Be~T)D)Ii(y)QX!p@f|4`*YQG2UtWr zvHfw3K{<$FlE0M79>drHgoN^^CmPy5q+YSkdhX|3iKzH+3Ll_+O=q^jF@Jxyva`C- zVj?e^TK2_=)e8iT49&`q4l8Z&5Z}us55=2HDCzgG%+24rp1>r6vq6og{7dz57Z6nP z5W(5#qNs($i{kKOd%7wm&*j&>?L%WRVsqy zd=5T4E6aGMOfPGM>W^tEGqy367seS0|9bS3eX%`}AMsk)dm*0F?Z~iUziPnve0S<( zGuf_xvQPy8>5rwSluzP{dP*sov)vN(DshLY7U&TYM#S5{1or4|&{5aaOpImJP$+BI zeATd%mz<+ktRBs*Q|AUb8<%D6`}Umiqm53JOX*ha^z{YA?y@<&>8Ou3{p&DNveI~) z?dBlpqSA7TVGit3pCaHoY|5(FSTk9wqp>?#NU7K4Qj3ii7c+Y^k*_##3vFeUO<<}0 zu{Tp*-!lqnDCpcsFkrdAIjDromyO ze?x1_OIMFYvm(Q4xW!kT`+Va$ zqvzUJX9J;<3V)5JuI+Kj?|NwM@iRp(jzO);kaPZ8cA>os7Od3+Dvk<2 z-l??Qoyd+Yb?1!3)NUhALqBSJ+gwo{OluAXp69#`K%W-z&?J5WP16{`rq_MRYnyVm zJ0&XzXL0Gd9woJYP`?~62ja-BpsJ$$wXZ9%$VzwHt}ong3jll8vi z&2#gZ%Y2%%$i1EI9R85u}7{s$0u*~Rt97w#FcXaE1uF>22_`uj1h;_OjGmcyuQ`h6k zTh(M4Q7t6>Tb0@waZiJZn{9GTem}yK8J6YO#B@-psySk%nZ+~+jaR>3?(liM-_lA| zirnn8|In`6MChv4*lasl5Vg%Vvrd)D)gyyJz=EGz%aeK5{)Dva)k!fPaU9)yip+f% zFv=`{4iC2s;*u1p*JSk`Oe?$36oC5GztIJ53JevryX#v#|K)G(5%_QO^+OVsIAZ$U zcy^cy6j&3@a%VnYh88Wg=W4|J>Xzt(8}pauABY89-1K|&*kWG0tAofJ#>?~sM)qp- z-3}YJ)_3yL>$)wWCq1M-Ou$8|-)2pAh5Rgyn(PXqfbsMf*13}wq5Ueu1XJYxYAugV zC*{`%Q=zOoHawdn>2|z3HjG7T#WeS~mjtPyKbB(@TC)AJgj^06=%b>d(inQcX6uD2 z`GSz2EQxb&fMVO`W62Q*ZC1hKF1!mD7KMg+&fUf2{gN%h->Ult8I{iew0WH2m#0}l za-x~)Qig>mBb(`42AWtBpSw?luz(^iEWqp_nd(9g%Vz6gF4Y6z0YYxiatqq@Vx`93P-JI_MIt`pr_cw=< zW7KH3(xMeMYcQu%Ia0Se0x&`paGmbYX9SA(yM(k||7`pS16K*TfBKL!6pzjPtd;NpEcOjkHuyDMB1vl3O~e!oSb*Ne zGtt9Lu1VKr&V&cm#WZTlN}#rfyHd-1qKYIo;8!S*`~=f_LbH)5n6UhOalZqJrUT14c>IDJfwr2z*zK0e&1Ek-vEzeik(5W4A&5iSg%kxz1~3MJxY8S_vFa^$sl z6qmO!EX|Z$>`pN*dU#J{5aRzvPLXo(2>r%yqp)D+h^hup{9+ftbq4EORsU1E*9f`< z=CGP(8ZpJ19~W>p=mgDH;*bcML(O*9kFmd`IHLg)SPhn{WoQS-s+6N^Tt@_EAGXqc zo-OLxG5%c42AP{yf7%|+{E1WuWL~TI&>217Z98-7N>2|9xLKTF{+T7qNbjvwSDH7J z2f439ynY)9hd%5i)<)pDe>)?6!>gm!G=6W`abNs6KVGbQi&)H9-mDx>EMR)NHIi;{ z_TzFV^%ajb-MmVj-Es>OPN}F_tM|Pi4- zPOwS0reDB!%0mfwxeqi;UXz_rKY%9s!>0oQDB`XOVcc3U1TxtlQU~ zu=mW72zjwotWsN2FFJ-6l)`1VBzXKl02`b%|juMj|kPl_0il0`9o>y#&b2I=!s z?b?$;>7t%-ka9A&6%7pg#ZyX!f4!(+)~=BbCjsmh_QS9lDWmzR_4kSlyoNUb4}O_& z{h36i`6`nT)iT|=YWLfV17#Ta7_ou|cwV2(waJ)C9E@iA6Mrs*H_Uw(XMKOB-30?v z5QxK@TWU28+Vsn6g`Dh|yEif(4pUP6etncg6W(x>AtJ zt%j(ZIx9(g2;#9xT0EcDu{&ql`vyZSoMUiGhpFhC$T1 zwG!P1<+BtwobE84t&iQ``uq@|?C<_7W8e+ha?V#s-5;!Nd}~%Y)m8=b%4r8ozmsMp zKVOvCztD8_Gffz_9XJ!*iHPh2E_huovT$n2XLY}zSME}YN29|VvNC;hzCOjD=*!W5 zN5d8p$6Lssywr*`@Xp#FEl`h!OzuQ;^VI-S2s^-C=CJ&HW-W0wyYeIn*4-f$D^F_t ze8nH9ad5x5@{w{e#moRt=|^&pmIZ=h0$LT|dLWK*i{tXviRZ6lL_)xH z=T86j`z8Hyke%La!$Q1Ski?JF>nRVrgQpYv+{7OVU0*0cJzI21_#AQ;yl)RGX%>Q^ zZv4%sF#V5bH~P_4&)4V7pE&|ctc*Lppk;wS@lQ^BM5j|>uo04Y?6ZRa9(CX?W6$f_ z8Reh}7TxbkH8UK!GMCq|H{QQz-kT~m^FUu?6f3W9h9NPN!-a-p8-*$UQ-43t>y3mo zonZ5Fhj&4TdAn`dTi9F6^IBs6dP0g2F*eTG#+^Lhn+d@toyw33iruB+gsI9& za^SABAO8ZUprEb`bjc=YW;arvcvq|PW4ET5IcMXzWs$R9y>05YTqCgQ`9wCYhMser z+2`b%f^6*aDWmO5uC|HQ27dhSwu_C&*aPw*M#{f)OpzmWSWJO7ZzFh=}a`xAR{j`L8tos~brFwUmDX=41LYV(SBfkWCvs1ABBV`SwxV8Zni05*gYgg z$ZLL!JB`ES8(_NK|B0736ZRkK-?tp;YX2;*#I9ej*{$;aq0)RT3!iue#v(eviV*$B zYdv@itOhNfC$mNoeak+NsfHvxSLHC$@-dCbGXAX}tVs`+k(10dyE~#^HlOXw#xq6r z#n8&Cs!rw-#EYX9Yn0dZeEa%Unu0d@H!jAN*7Y43?(({`*@MxS2d=5mmRpv^;yR-q z?Av>zt>IL9{);E1o@6(v8-7nnr0rX$OLb)ZQLxrwJf8v^4$&Pf=#Kl0U~$W4wgqAl zt%tC-L?S-H&aqeg!zq#BCLwsbpiH|9JMYHcmsP`%rf*Mnhoa^48oB4K!fAfcnvm;73eGS zzwpd+zMZHc#}E94RaRVK3*GdXnK`ryqyRkB&pbr6Y3?5)7uE!N^e}oay-d;iz zZV#JA=!M$14i}qb(*)huOY4{3{o05MY$L)W7_}S-^Aod8u6FcF8NOcreNmJ_T-LKn z%|Y(lL?#FGwFherTWQs{H-YOVO}j`2I`<%5=<9u2ZkT(zMU9GhLc&ezcqVBb1O5#_+~eRIv4> z3%4$~t%YTY$^4`tZh_><6G0w+c83gPi29>4tx+7m`emzalx=4Dv$6)`H`R_uEA8W1 z5*QKH)%GyegN)d3)e0>;O~_|0OFZ1{u{PYD3>=t!_lCN`GL7cyIg|U!t0@hGI2d}K zhrYbg%ZD8ZhxKfQru+GAr1zuwOj)X4d8c$Ll>esLVuD7|oA?E!+(5WcwJ_S;{YEca zy;PgS^U6LrJp5w)RDTpIrR|dWHh{|Ya6vgs9L+;M8>Jzbnz$bbv8k7Vkt#-0?Q=a1 zG*t?z{KG{^>9u7rMt_aJA1UC@RZRV{^c6RTvtQF;qb!UIG+7?yVw(tQ+!{)zy~%Th z>4#3*AczyfEALflzxtL?*t^-ws93WyYq3l;!*$$B!pUu<6Gxaytw=S>22wW%cYd>1 zo>1QUFz*fLwgrXMmxq!7w9CL3+8!y^b5(sXE|H$p=dSWsN+w`cvhkczT^g1GAVrcU@mWb09>#woI^`HY!sr zXda3QW8lt=$3#YDU}WWs)HZ0iz#%Ty`dQDVf%kPcN}=|0rBM&R)=#pm?Hn$}|n2J5UdELE&FO9jnMnRxTn>WRViLk@r!!F?2nL^=WO^T$$Nu-}Cv@aAkM?VKi~dM^#QOFzP|#|Hjf_8A$Y z@od38|4YlB^>WK%ypg1dyIDkSi3up`edY8H#)OqstIm!q8Z9Ve)C55&0gXoQhWLj7 zNQ6(F4|YOTJ=11*v-41Snpr0h+^k2v=71@F{D9ndJS)AAta2h&5g)u06`6=2cXGbu zw68s*<%MG^hJNg87PA{n3SU#a)O!6b?70duH;)m&FFo{j!JHJdF1 ztmw6%c2WC~<#s-$0a-1f!L0#*TOy>DOEmO&eXb#KzoASD%h$^FG{8V-5T{uR3}VMr0J>rS%om$7%r^rG#SF#An=+QB~RN7 zvbptYi8ZSLnFSip=^Yh{Z3liYn(^c_)nyxO14%v)M! zC!d$~q^Gm*u40}F%&Y=3WDR{=4De|U_MA#Nz4Fek< z?ET)K0bD0y2JnC}pb|aPJ;b-^(bK!!b!}sITi2o} zrRJ^3&zr&sT^Mf`-?r{;dtxRv@2w8P6)(F+C!YmfbcYZ?e3i?=Tw>OE3u?58^WQ%i zeTJ;rj#>$xdxT#sL?cMSvS|BZHR)+pGs1~E9p`v9@JoF&k_q1at7yC{>a)!ik;Ess3TIV!N!(#(|J;lf0a%sJ#NcO>`dr6i=a|bwV7>o$IMV)%oXCTZAY}-jLI1Ql;!^{ z?Xl5pW;4@gdp4dTE3aLraK`JnDJO8JYF02sn(ulr@BDsD#%pGbm_{+pn)YgfaOYeg zV)$7(-t7*~&Zii>@9rbdhE{k~GXhrgFeJ$J7ULPSL|hMKjL8 zZIT{G4BoMpSNVK9eger@VN?1AwNJOzqzDmZ|Jzuo3D!M#!?C%Uo+)Wb!` zLc%kF;LFQrX0%&P;UL4&rKN-H2%)M@m(+u$ksCe9nDB*n3_Gsu#Gi}n9Vw9$eXrNZ z$zfP(^Q~QidyJ#kNgpjsD&bV~r^dl^IG0UUPG^_qbMDslhCw%P0hPJOhfCoZ9O1ha zj`oiw&6^2t1Z0cc*PK8G1!v{E#5_25GQU-iFNXO?UR+Ii-*1;rHYO_%xfuu(_Yprh=Fi>2r98p}ouq#)4?8Q1ZXX^Yy%Ta*H zECDHf$Yy@um4%anxn0ouND=;?`|}syWwdP~O;{=MfGDJ;eNPNg037yJ!6;Q}oL7Bu zm91N}FxK0JhBjj|dI*U>ZN%vQ!7wnl@mZsdv;$(l0JM4H;pL&{@SY>Aa)k#* zo`=;%4sak#%Q(&cY>vSZ_s8?}Dn1K5lzF;_n>xS>|H;qW50bM=45(%W$LDAzT7pvXTk{sl6ZIpb7)S_2KKBc2As}-Dyb%F8E4CmVncV*WWeo# zXZJe^-m2Lv<==qC0)KAQ{7w;lU`6oz%LOIQe^BAD%(47e^UawO-+wmyAXnYkO%)I# zc+(s$zvElJ016l5Pl^7UNGDrE+Rhw&;}QO!`iE2l2Z)9Dx+(B%M4v2Y)RJt8FAKul z@K5zQqklTVDEIGcibn?U!2him;%JaNS>AomiF7B{R%;j~{csmuXv_RqpoBWvQR@)a z3%dzcXcB^J&Mg0{(*8S^BEWKZ4z>}Z+TWMe{w@q~=)!P0p*MQLZ`=6$12+*C=l$H5 z3b=on%-_wCtG{Z+|FhnOjW`=u^f3t4wUfWoa561~MFCeCbs>)5mr!74XO5A>kfB=j zX=543rUBF3Yvkx5`m*1e(0+YXXg&Bksv}`9^qOdtw7f#m$HSXT&c`wQ>mP&7K+oaN zz??);vHR;_zH`BQJALO8zQ7jrdl%G*qMuvHj*vh%y6?BXs2A;f@+@1TFE-Q@gfx6-;CDgP8OY$Ey>sI$HfkS@I{3%T#&Bq7lh%;^0l(8)L-5-s|m z_}Ayn3YC%w0x1Ho=(oZpz6BP1*l?tc44HbMI=bl0HtyT^$q~PPb~4RceycK7fboyF z5v9b9gg0?gt+Bo{PZm$*0LSb^cF;oO<Kq8~yC`g;{@7#DrzIGKZ>cJRZlEr5qK;h+yJ-nwPLhgY zoTp9}!boB@_o9H8EMai}1}c*Jphb9QQG##;5(vc^XGw@L6@_=ACSrvE)EY zJ91rwpo<6Bs-4!MD!W^6&eyfyRjpLzZ4YK-sy4n}KQ)|xvOKT~4$3v1c%@vYO$ZV` zz?rMCj+wO5lG1jvX31qtIAKx0u!dO8pT=10RLj3pXQC&D?%-Mpu>R4pKPuS9ivUlc zba>@m{<;Bog}$#2ttm$&wV5YlfpgCn99U)8d8d3BAFXaZgLAunmR9}j`6qpBrZ-GL zfN|w?z(F1H@-Grz7)P{L3Ut9P4kXJlg(W7#))M*ueiSiEpzH^ z`QVLZxS+6ifoj19w$NoDH0FbzeuZdij9pyG3xGmI`(n;*28&BAN`7Y}m`Z^6(5b5L z;#^=Is`c3Wc?ZJ=6d;RV1@TT696<_N>1`Vmu*RONq9N6ey4!=m&3mU9TS`dE)*3#+d-{8=T`shL7Y*gB= zma|?<09+4MH;TzW!=ir1A8JtU3L>=c!Mq=wevjCLLl7GM!9bYOq$vK^twPYzCRNkH zKFIOpFayVitq}Ybr^Ti9%>99cUbQ*nH3uN5eDN|cKQ&$GvH^>Rn+#m*L%Vanu&7)U z3bDE)!n5%nD!JxLob+Tavtxx4WX)E%u{4^Wedd;xyv`V_;bJp?V7&VKn#x9@I`cT^#Z8+0k6lySS70sZj)_%;y0< z9zw$LdBhBHSewGqu zo*%rQbXp$p%Gu$T-><^w^aOu+iS*n(+3N)$QV>oAGytdkKu0k1dJZx^r4c>V^ z`5~{Epgc;HI4xG!-9aY14oeRDb7=EbUr}8t&?YzLEU5=7lw@m=2Jo8JVkU~Wi#^dV zx8g3{nadj>eLVNuswEkb!P1#=eR1IEuYPVVQV7Tq;Mos-uB2X~M?PH}*WHg_4R5mE zK-UCKEA;@F>~Lo&j9Up7^A<7i-^KmG;D3dk7CcmtFNf<~3IYAJdr?t|hik}wx{q|$ zNW=bMrN<%`<*3Lk;9mfUrQF$>QZL7BIRUe#Aje`Vr#!*I)L*}9q_^dA~r3^Y(iRT<5b6fQKStkWefder;s3z0hOcgST-J%+WMjDCG$GGt(7 za!tx;yfk_3Fp24RZ?a%55F9=nyPGo>vV?aq7s^_CYX&x}9iHE%0uL+GPLEACi>c7- z(VMDI$6;v5c1~mM^W2+lbf~3mDC=JfGU92CW;QB}H^Qht8sfTQ8_K_s4IDxy1KYI} z=W0$s%g>q@~2}x5v{>@=cUre6qP(rxveI9L~&B6V0+pyD5=&Ufu@K~bbV;qy7{7!kBki*jO;Di7P& z-cY#JyxHRrpbuB5n}EpWWXm8bfQ2d@!0#?lu(f9((fs*AxJ#&`zl1!%6hUYOQF$VmqB(rsE6Zo#eER5g+m=Ehhw5^ z4PG|B_&jHAw_Zg%Bi-`KegBK#Fo5R@W34LP3r5dVSX@G0-=J3`N)T+gZ^Y4 zU|`3^Ylk4PUhop9a{1yU$S^h7hwF54t6&_6MngH663;0oGI0=?VVH{q~bdRhFgF^>eV z1PrlLt$NE|M#hsgZ$_T&LG zWwgwkv04iEYGC7?52*fNVP=J)mr&#YHbJ-Obq?Cq($hq(1P8viXh5TD(*aS2p3l2W z|HRuCgA^VhQ;^+1fvuPml0wr@LFeyilp@%1h9|R76f*Sb^AK*Bo>$9x+R~;>{p)4;i@%7ozjKfkG-5U#sp=9L4I;I$gL=EPjLpJjmFY$C&Ng zHIgA%YBleXd>rLYd`})NDu`le+xeX5q-<*~hHNU4j&58r1m*4KZOnz+^Dt6#Q~x^Z zkLiJ&OZ6emQ6Bu9-zrRr;~a;%6J7AaUkSaJF2qIUj=QjRe28Z67ceWsFi4fCQFMtz z*ZN_H^0lxlDsp0%$;wezyIKD#?p@qG$PCvH)_Z~{Wy*L|C|-_;V8%*KxY&;8XPKAI z#0}RAX7&0=KPnw7WDZCqplriine;(;ApD;k1vaN^-JD|4y=BpDsQ!2-qrx2_QFVLN zRy_dAD_7>Ue$hy`peJfWp4xOfKGM-etP^miyR zL=-EG%|x>07v2wM84m0O+a**Pcxj}=rv?;T>O9T<3U8RG8Gf>l76=)PP`xE?47-0! z890PHKA^vROzcklP{!ZwYGd`E$qf+yXJB8m#n`Y0UF(Sb_pJV#bM`GZe&t9hZ>B84 z9g%SO%axti!qHiz{kfqW!ioIVkq#}5yq{k&-vEhx5v>@RU_*8HPq0v|-?znF- z*{QR+c-{3)KtI?0wQQWx6VAAyiDG$1xY^LQA^O($+KgXS>6S+{akvMP`_rc@g8*GbeIJke!cDfu`hfe#>qfX+-A`@s zw}b_c|5(in=+~^L53j%lg2?0Z2Q#=UY`hH-UVT-w1Sm$#){bU`F6tqRBR8qMhLVmO zSdsM8!imB?rVhGfk!+6(U$M_w^;M55aZx9{qjEUt-|B5YjeK8u8pGs%PG$!@g9HV_ zb}6f;&haq)Vkb0>r(7s*5vLRP0|CQnp10!?8thpMevYH`SehqSt^!1%&ag`5sZ5Pi zd;4vh{3LK42HdJG`hILAF#})T8Bb(Vi2ZePKrG_{+nUsYzc%pao0q>Oh!u#H|CM6L z|1R2OqWK3V_-DKJLXezD#_zU&*Y!_6ZwA5cXd=QC{=G=g@0=mX%|`g?f9IV#5VQjL zp5mYCy?@P6Og|_PgAQNV&|n+POp!q&Legm8CQWVzcSZV=e*9g5Ha6IFhU~R`+!wT* zvq?F^^ST%gTGjw84*0WWRj=~ig~ZY71!)AfWy z>5_R9tuh^JCcmFt2MMP*Pq{wDox6PkmN}#BCH%(8BqB7{ z6pkdDf%~)#^U0DzwMH*JYv&PWwGv+qWkZ9xR=?WeSKtaeomPE7$W~m58*75CL#Gxp zE-MKm87mh4T$P5|yp2aT@nYC8Ten%S_4$42+eTun)s*rSmeU^AQnRNZY_(EpS$@ym z%|iUQNdG+OKr$3qMKLeCjhq^v6(JyU@;F*_xws!$hYc!EmYqoC zMoo?Rhf6i164j=YL^_~F1)b|jGcSkX7_}-Z+18`kla9nAVy#7TKq(Ryr4ni5K??J= z)Q|R?y0(ix{o=*4D1)NA7|*g+tE&Bifb9pmTG{}Hv4Gp!#B#MEoe>u-Xl#}XC5u5S zj`yDj$mAuXiEhIjTn@)J9`bftgiFTL=1KV(k z6Z}9TZMm<*SejoOJ|xQO1p^`45&2s8tW6MlBe3nHGr90QAin(IF~QkU zm=ebDfhl=>pZdH|HY0kX##HOZ@0K?Ys&EMQQq=GK3mTnrjsQIw>Va)AFn+6aV5A)Z zXpQabf47$ba5_=)a`{QZ0lG7<9^91r4n68Li-#sPn1r=AC%sARg0}UFlXtkAl0piZ zrj1n(f^-CJBAXFu@HXoJQ$9hjCK54h6e-B{V9_)1LnkSGeezdUQUVneic#-{`{;hS z{Z^KXMCXE6PKTD0&8P#|Y=FT1s!&Hknd^_|cw07;%rtk+ zZ^0mn$zZ|``2wNySAA<$|30W>ET$rZ4CU81t=QDkw{ZM5w_3_qM^YL7pP zhPUE$%(euxOpkURC=v((vf-FN4~Vns0B4SBogr!B7vj2?9>XDUg$(c9G-(g`zb+|& z&Xz{vDT{pO05$G32{Oi4&~Xwn%dBoI#mjfX8Sj~`Shv;?-j}b#{p05$mS2p8nEtww zwkHJ`hw^4?94)}x!g0hfw_uY+Lqh4ua07~AC9f5YW{hN!79oY7_b?*uW;!7*L>oHE zhUav6t!1r9cDv#A)7+83-soy)3yFMUDHCK$5%orDtPHapwf(6$_7&Dy{Kvw|SA<5< z@%;`CR4|VA6rtcW2X3`I=#?G=QD50%SXuTM)>Rx}T2FhcM~5Ve871^<3bviuRI*<2 z0w5RmpEH>Y8!?@Q??_Rfr>%UG?j0oXUfF|U2Fyqan*-o1=PaCZbBdV)79Ws}S3?`lh?3zF(u zIx_ChdSGt=z3QOz>llTu9G$Bpr&S!oMrT$z4Cgi!+L2|=3;%?4qFKyl+U#gB(1NdQ zTgR3~$H3(HR;}`rpy(6D%YA4ve^9gCT$9f6(X@Jv`IZ2Bd|5Nf=PQe212!e(87oSS6E*{08mSyX z=1|SinxXrIs;=oQpT&6tmmQ!bCx5qD_I=2(LWTMk$ktKbb4Rk_BEY(C$#eDvapZnG z3wP?U84GM%MSn2IT9PvMe1akaB-)Fcv|Pf=68zx^FdarT_fa0acB#e-`iYY&c!J@z zeeuD99*w1+F! zxXkZ=ONFhPU1k?!H zCq5Y{R>$;Lp)7f(Z2|+IAeA(?C(U0ho@Yf!SLHe%0P2n1G1t4kLM?*}{ll1bb^fLY z@9tFuf5)Ujs5ovPQ`bJlVQt9|=ykRoZ{({AL4RS4{hcxp9# z1n>~`hEBH%w_^~4`X|XBlLZVUR$f!S;EFu~aPy`sJNkUM?(K`o;d{;6!6AOU>)*QM zyWtjQX;G>So0p$wz9F+nwLj&RlStP1^Ay->K1us4o?f12uj-q|(n#BtdeCIi=s*}H zEwyq`g1@drNr;#e;%{a!dV3Q2&x)$CytH><<~YuftufiK@cQOR2Fw!F8bmRPd57}q z5u8e!YRA_0HPOy!Dp|u%j5-@?dmU ztJ!H+Xn|eYMD``qwUY5{%$?b4TWTE2|ITAER-bLyV|gv1M%Wvw08&P%TP==j4@$w6 zRyi1~f;5Xo$H|{bil{BmUa5Ukg`)S>fdcCB4?C=i*f&~{Dppvf*xhnLBKILn5Ki3z z^koirKxGn{$eqrW-7=+=9Ddxwh>hF>qO0ZY82Hf%}K1stD6%m z^aoqD=WFdNt;dvW>_>R$b*c+o+lnO0j@lsS{3;|{yB$rAf3`_+F=KFcJ7}|(Rjw1k zTYd;{L_D{TaIu$U^(2WBA6lsVe`B{A@>>1pXp6v^N#8!71B|;(#t&Xc)U((%9$h=c zlzE%y8_WbmmkV2en3l-Hm*?>$KARdJ4bb7}w2idW9$CrqPc=q z(u&o_A_@w(@ckG^_N7~>W54RJ$>Up;TyHcQH z!FWbJby+51haI%cQP|WwTPIw4ucMF^zVq_pYM}blFGj?JPx|LKEF!MFz6k&S)um9&8MMJzi{>J-3n zcT&|`Xj~D5JIL6|bL3ug3Q=E(;N*r*gf`(kyW*F+(3}C8)ET^v32$Pq56lq+Y}(NKq^Eh=X-WG+tSvG=C(qpw8CBiY%;Ds?PXR_ zgp9nL>yclUFyua^pBY?kwdRFi6J1y&aQ@PNHt?A5-dv3|6`uA&#RrAky6b8zvoKFM zJpI_GIfkqKYnZ`|NhO)cWi{Fi&wZ;ZatZ@eM8NHkL+e$=)r9TbvU{_1f(-}A#DjIW zVR~`X*DR4Kvw6V5c76}pYdydJ5OAPZnYiQA#OjN`(&V&Yo#!j$0EvkB`f%< zp`1{B5gZ0g&Nwh7-prge{Z*O)Vli!qP`}OOUw3By}8U3bab$O?+xdcy0_nFYo-!PORbfyKk>~TX0p|@wpqHS#JZwQ>tc$fa*mtk(5Efp!nWB1T}4~(hCNV!fZv)|dB2NV&&*|()ZJ39hA^>; z4aQzHnxH9MBCf60XE%HEYK}fTF+wkv!Q)oT14CsUZ?ta-7er(GQ6=H%0o2cT1*v}3 zGm?7-YY^WA<;%nXt5P0mIRnyhKa_*VXm(gK&hx^jA$(b4Q_E+w(c|N&sm0jCEFTrF zCjIlTt+NMtSs#q0R%eeB7#*$Y`2psQ<>Q7!5UOl`P0fdlI}1XTiP%;iR*baT=N5IZ zJdz;Tb?sbGkP7b}w@(iX40C;P-EX?Pm0mV%O1u-0rLDXI2<&ZE&oy2TWbvM@?BpZF z2(&I3YL21Q#tP>6yr({KW1#N_Dsxg&^7iGS{{JD}`Z4}+u zh_~kv;0R3PlnI;?;wvo%$6hFn$BoBxJ8Bkl7NnbbOCKs!gg%NevJqi+ik0P9~Gbil8PkK61jr!fdjzPYlq!+$aIK`bcbpQwRRg^<64pNL|7R zCkrikQxy8{iI2*q;&xI5JzcCud+576AZH&#qb?~j+7-BGP+i$M8Rk2h6LFOzU64oP zZErTq;IYs@VU#j2>+D*SwPHa*qDbVle;Jl^pixuF&35O1q0n0Pbbryu%8|P)=C!&1 z=4&IMGqKcA6R_WWjcqC0(XZE^}q74krfHK6{Ec_vlmeBgCSlH z6XNA)%W%TQejqC}JI#mRkdh-{L@4ki{Cc=85=_1Sr4_egz|r8}?R}B6Qx-SYsl&vG zC(O7th*LLm=$)~bRc2%H*4M6WX|6hZbGUoap(^{SccI>N$AR#E7zO0C>QksQ`6JBS zTu%UL_kpGsf7rr@GDWyq8Z!OD2w5dEm$?TV6LBZOe7C5g%}^uyFU64ke|D+kIhL%gCEIJmqstFbn8h z!d1;jWtHCc-@weu@#>^r5OLBL^o7dTrAXnIr#?Xki|O;n!yBJUC6D)*Kxu^Nr4mNm z%q{_zPMvG3T)3%^8_0V$pBf>3X4J2<3^E4CO)+aM42G%jM|rX(WY+2krB81cPk>F1 zG(%pTuaXn1q32P|5~L|@GIGcLvX;->j(t?5A(Ad%?(?^%waoJ9VB(^c#t9(4i8u4u zVbroqS~aum{P_7MMuN%4r?-VIxJOkg?fvH4f=s^A5hx_G;Ulq1?4o9lfcdGjb?gk4~wXij>Aze`rPt|VUp(v`l z6e@On1MC+m#282dr|l+7(i0h+&&ny3vH@e^siH>f3G%SU36q6`jczANEd8>T!M?m3 zGV^ujrKqodYJql!QVc`UN;VQZDw=jF9c*s>+nY)YAjBSv?oJt-Pxq>JG}SnI81^mLg+&!~WY(F}|ET`P@GuOXAwS-K70_>~@#j{u^cmju*&Li&zAjSnyDx9s7Ig@{p-VL@lW4R34M_v)Itj51 z_%B}Ef9qRFz|&+&@uSixxAUqA7y0nox>eqW&5~4>8V%K!t2p}|53$uu8`GXIQDAbh z?hlc8CN4NaNbEyfpD`=fO=>3fBB<^qgm=-UgS3b6r-9JSpU<5>$d-HhL~s`m*Wm+7dsQfm7>4suE^A z2CVC6+gYC<1EU5qW?SoD0FeH@FBNXP__ZTYS34xK2|#U358zH?dy1oHp)R60M57yg zS};KcyZuCB>U3WKq2MEun`i4C7$6MGJL%TfQZ)Wvm1)(nC^VBwJg#!# zpZS<8+t}3vRss30N51D$oJOCbVVR|V3$ze%AQ`6^zxot*5Y96rnC6l?R7=_^`e$V78~j)ipfyB zAf!3JuS6To?GMY6&Sf z4-gTY`rUg;jGPJ%uro>xbGuBN!tFp1e^fO7yeoqsyONM^Cp?|vH$3z)Q0xX^{(EB; zN;*Ha_YLzFev*rkVh!nN9%m(&9!@fQ%T-HY27O$V z!u0SeC*Q11g#-7TaL_Te)*$H2M#Z|D0FpwwI#pJ5ugOaY70amC5ZeJ>Vde*H4qOjX zsRoxKKcq;a^V~@rhku-IUFMZixA_2l`%pSGjexy03crfD>+xdA&otv*G5yVB(lci} zDmxu~WX+WOMu?2UIx;C81B`{et=LQLpQCXL#yL!>zyp(O>0#M12L(1O) z%3}@_XA7kX30XqV_l|fSB~}6>*J%xf`$ykS9tekw%S0X{x?ohIRf^SBp1c42!!MA= zF@zOO>o~XS2Eo zaCi#klQ@A}dUtJwlcO8dzKz4(WJo2jVDSC7zHO4BzOdRz4pvDjGlAWRDN)1LUEXZU zt604@@#5I48HXWNkh(9H_bn`P>bxF8x{kgZkJfXKI0)TG*EI#`_SE#7`nFGZK)j#g&v=Pz}&#(8ErY0L z_k>xk@kYMJ1w6iq^Cp`d>zm0jBmep|J|a!H`GLpn=)I3s~9kPbkbiCjW z7|U9wvegu^2)2&5Z^gw^K+rSCg1E z)?1=A0Uk|>Oj>=3i~0hW-U>D5VBzESy7fA(kZZ_CF( zE?vjJ-aEiBJ7{?lBJT9JOkG;?iOme-qZd^sV1`r}$0@ZV$c^fo)L6}M-Iy%-@KnNo zCQ>041*p&jT!b6U9sqd{&sUBzwnBUa2O%6`Ho(J9monf-3w?W6el$E?>u&4luoO)CvPF-q_AzygH=miXL}5n%~p0Z8>C%< zonify!)nunBs+e5ZJSBo>L`Qt#E)SD(+S!hvg+=F(GQ+dp`#qTBb5c+xJO3~Bc)GC zl``nH5LUw%%CgBe;uFP|T$rof#)`+NcwDedyb(ITI?Eb~0x6%TT5{zay|R#?;*8@G zt>193P{A6m)k49SHj#V&*Qp9mE^WJK`y8E)nDbm`dl#a7GLqzCuom}q6G{a5~`9kOGuFXX!DIm~+DDacw9ZZK}7&qi-U?*wl!}{hopQP3j@ALJ$!gj{FxxHg7 zM1t_P9Zw5F63yGKpglcM3%qhaSlCjlu(5{#mqm8>VtFHBT z{0wqt`^rGNk4z~{F9~s(qjWnc>?;EFR{R4hmVSPzNZaB*j2|j|@q|5Kw}Rs1+CFlj zAi@MGGz4`be-ewzQzF6ilg#194~IlnYZO7}I|iWJ0i7D(tq@j3{4HZCU1Xu#O|$0n zm1?ynl~n!Nj)uc|9|acMhjfLhAU9>pjq|awF@UOR^%Z2_4lZP^rDw!a=j?xo+`z zRY+~vjwCEis!6%h{^s#+(iL;x(p!Vg3qo+!EP^*5zvoqwSVLKwd4G%6u)Ur>Xvauz zb0xGRJ}Lt|gr9$MHT{~N7X}uzJwgeCAVhb$KQ$Pb@ACHddFl*>My^%4OqmnVJ}}B> z*vZm!hK=8PQ7$^B1Np30Ta(FKb1;eC3gysS&$L*xzDekM&a=7NR2Y~>dHua|&33%- ziCIOQ0hXw)?FW3|0!p*jrDy|emNu1Xdl%hiVt)0T)RN>khl4JpS+Ka zGl*}4OPC;x38E;&@d&MKkFHc~OdZ~#SRRT{z_7Sxi372>v8QRvxdaVAIi84g<{)R}0_fFEVjLw}YHvrb~s?+XGkc?E-L=KsoJ;qV zwqyXudZ34l$l1CuSd}t9>}O|h1puD?O*s2v8?z@-}PtC z2cP;e#<^~w9vYN(x4)r%XFGwNbkN9t=;3_ys(@CqJKtQi=^B>eaExrKA+Y)!^-lXv zTn<1n!yFH@Pa=7JIrL3+)(Gtz?|Sv#J_K?bYmp)&7obqMXIqjI&Z_g-K+eQpHq~|A z4;g)s&P#TLd8pVoh%T>k-m`3?%XS4gyGaD&9gUxKgq;ptc?69^=>xTlCo5^4&ozb8 zc`Cv_4}A0mZ@^C?+0FUB+K{g2hE2GQ-}-#QX=o5U+0FZt=39lx;`vd!q0()4iXBxv0w^`U2v zOL5I|Hp?{?1a*VbeqT6qhx<(33Bgx;Vf|!&FVQoxB-QQleU4c_9^c}nSvP95=P5b# zL@lr8PY;)WTVGY$3OUyOxOS5LVK>9%+g*9EXz><}GL!VkaZJfpm1qh%^*Zu|`Z}hU z9rnaJrAqseQxE%0hm$&NEI_Q?7C#sb2l5)+>Dar%Tsj@-J>6zgVlNo*2=#hLshC{N zJv2_44e07RR=IxiNsG+`h%Uj4?{$A~z41C&!!O>z)C`=$i|;74EGB}M>p?$wSB4Ky z;GS^>A}T1fB$#Pd;ei%z<(4cbI&MZ*emnIL0BjF8caJhA8`M`Ejm$$F@Q-g)uemL~ z=0+Fit|Z$3`LH3Cs_M$@bcJ7UDj`gij!{tG*tRS<_@id;~w>8y?9N6AgAG z?IbI5PVt!4UX$ftKb9QM;*I3!6dOh#x13biF5(kiO|lMjaVRboyOvL)R@7(g)s{c{ zoe6TjW1~lyI`BPt%bjbuc-(T#1mLXsyi+@*kpZzfM}vw zK7jYo9ljg%xC%2DXuX^KC}+D7C&h6ql;10U^$=ogGb9k={7mnfi?g3fkmWzP90B|M zY%)Q+o~_JwNsuZ#M=zA(GQBbQ80x{?1I#9w!0I9MBMm0=mKJ(%o!>$3UZM8^_3Y5R zCz<43wuimZyYl1U)Xz?CnH^6B`f-yHLTs1JOqs$ue(Z{5)98lv`@b};KL2_ZGwQ3n zH2>>_6h77$534t(8PFk{w!nixN|LWP`gme<##gqqvwGg;e3lOw`Z3=KXs(0y@5r{A z{ed5&p!;E-6+=40qUA}`>?x@~`w?m~=Lkhcrgb9wQ@>VhV562B_i41zgE#IwnCu2N zC`(q+I)8J7!%T)^gO~szWX#Z6iIrvrWI+rW4>Hesk}nN6u3j%3+%h(zcZ4Zj+CwTnN}b6R~8|4ddmuh)hU&`6LXMGifWf6Z{>PBiioHzd)*r> zCHYFi1?!0B0m-tAZhV;!NnTBVK1;acR?BZ&MI3J?o2xby-l9VBvAM$kRE=wQba=Mh z$~jeM&Y_(oYRswHr4Os7&UyNy&1ed-RnI>@fHgyYBtlF1vCvS6wRE-BG{@xBaY2{= zqfz_kjM+*8Ncr95!#W+O*^{i_#A;#U!I3xWjAbbp3atFy<;1t>R(1f@k4>jg*KNRB zR^WovqE1u`gs0tUz`Gm0Bw(Ska))}9ki<&nW6seY_{x=*o7>>Ni31~QarBK!idR$7 zs&6F8Jf35RK<@>uS=biE&#xNtzNc3i#%ww)OVlE=)6&bR-33g&oNqn)6oVpe`NM#t^Vmkll2BRL1eH0{m;ttENET4 zxwC%4vz`H=!zDlXfJXs|VVR@St^MQ@hc*cv29X27xshcUlo=8{Kl1l+bhnZ%VKRD` z#fxiquZ{R8%R)qVhBq7yocRPzz0VWeX7n6Ev01{ zK0z)k{Q(HfIs|r4_f(~)#f1-Fyvw}yQk2174*$yg+YNZsWhp)B-6J3pTsK}xY z^q4Wl65!hooDTEn8a7;hoU`ij2Xxohw2pg^CS9k#O`^_`i_dShR0he?rkTw`(I8tH zkQ*qF(Cp+IQa>6di_$22enUAtWV)*z3RFADk&BwCjL#6<9C`;-LUnk(Q=Z65gPhtL zc&JQoq_EAnw!I%-JSK$XE#2dW#nG0tysH^p(j2Fn7>ZN z@0NPVbH4L6>DuIRXs%_Q5%XeEJbIQqVd+ORT?FNzAEWqMuyW(C#+)2=^$?BT8^-<2 zzPrlyJR9J-8e}bVU%(BjbQ#{CG43QE&K6k`W`{_n6p9_KD8IhhmXai0Em5MBu__mR zkGFpnpj7Syk0#dshVx~ME&Jy3@WbUan} zRd(#PwcU@E+X|}Za{CBV2^&m8f+u>eSn6SkRElYL+$G-Lrsq!`IKhtwnoSkHN7M=$&IfOoYqh z1LgRz+HhEEGv8OwK{I-*tw7I%@OsBJhVG8Xjz#)IWu~o>1&*?Dck2xpm&H(aNl+Au z-(B4M0{_OoVb>Qv5pTJzjqlH-6~vT~)C!TGjG2dl`VB*SZy07BvmD**k4X4%-*yP} zEs~SjmJ`ptYY+D@>-HIX^F-9Ry_5sT=*$RLN2#(N2*^7pI?@lpou)dwwqb11J<{KA zNf5Ou;?UGg&{!coUXOtN+wcerOFJgo4=Q=QPooz}*XoEdV9sCW2;Qs7oZRm!+g$zOUKYQl+vp;W- zk=b%LWrCi!(tms`N7WTrBWWk}`MaNz1a@9Dj%ZMh>;|rHtG&I~n>b-diQ8b!?Z;_ZvZ`MQ+^|MXF++ zAB)F^K_F~CS1wrWuS|W>2HiJD-+lkyi}4I}n}qeuNdKTZxnmHw(LU&835p^wa9S@g zD`4*fP*E@l$@E+#y7?>iTREkkd}Zc0cRU?m855~h|E@+9C0} z-+~pUwNutV#+WTWH^L{;9rj)YBBHi#!XynX(a>L8UpJon0r&4gk0FDOw8QBHAmgZ# zb89r3+N3whr$IsXD}_F$hoF}Ljzfi#r`0-GT1HZudObR|)BL0Z<$5=Zoj{-}WZb>t z-*(}GXDnttTHfEHPEHoIj8MbVOTD|DyX5gMWi}sXtt-P!M~p@{KIpw+_j;sOfZF6I zUy*>!u+iT4e~l&39TxJ$iA;mPR&*pQ|14|(1O+>Cd^bp04@ql zLmFs}6Ui3a;pw{Q57JZ|IfT|W_HpK7p^#Hm0-*|#*mZFJTH+s@w}rR` zYsK;r+rs~8>5vPc51Rgu3osdwzve-%R&AUzpSiL%+e@)+m`zDd440C>THoALf#iU> zUJ1IXr%w2u?@G-YX3e5;pH@qPo!s5KzL}H{SKmsNaGt2(g@pwj zlGYE*#|t$^Dq$xoO0I?p?XOr;62#Werng*avT0hoAl}--XbZrPT()4?{%Y)p`oFF_ zY_zsDFPbmWdaHfYo*H=T;BBRk5b1qN%yjLGz*m@LSa`US5I=dkt1kbuBg%0;OOld0 zg)dh>kwcm?#aMVc_D!4Z&|TTi2W~(i$7iy6-%M#6Q-kS0o_)q`oCs2&dPZags~H4M zgnR8a-N<(aX3=OB^f)%2PI;2b*UWN0yTDvRZbb&$7!vSqxz!6_?}{9*)QK}=_@5F^ zUDsL_m0*0MwH+qVU=J0*Y_@bp!YnV&di2X^F2Ms-1$%gDv z-8E?0XY5x3xPMz9S1(2EzIw;5!q+0z(0Kd6mK|WlNEId%<5=`H;}MpKp}H5U@KZ&Q z#+QFTayo<{=Aq8s!nFWYOWj8xF#)>?yjna~mD|>ARegQWDbnPx!}V~#&hoymg*l~u z=7DrDgH5}LWsm#zN#HqovTxw3Sd>&w=a=G@nVg9}m-p7M=9-}b9$MgQ@$EmghBlfI z<0IoEirmxK*#S;G4zD%p9Q8!O@Y|{HjE5YHoYsDtA1*&q82X!L$qvoDH#w9j?$b2+ z6XgAN+=1|L*`7z(7etR$Uo0y9>Y>0ZV7>)c;#?31C#Tb|O_O@xoe8gK~>4Gm3}T?E6|PK3J4E|m-Jzosymp03e)*!Ahs#jcD&nv&4bB%Pn| zjr1ZocfOxB5u}C|AHq^lV4n>~h`tBT#&mVB2#7NuG!53=4Of}{+XT6xM;1I}>;sku zDtIVttXFLj_pf=Sheq2?EKhP@jXs|9Zf-zCUBaKd(bBuSfrLAc<8bkS426~YgVK!( zBY|LvR-V!}L(0eCw45?=@M{d?QIF2>H|E|Y_x7dull@zE#9bPg<<-cBm!LSPwNu@Wm z%VAD>iF}ywzH{^4B%RCh1Mg;GsGQ^QOB?>ADSmLG^IT$s) zByx*n`94|HU?TjQWua;~WF2L0VIkLc=LM7A1;JzEE%~NtmBLrtCRb{#%2Aydui8+` zLyK1q9}f%xB0oR&F4_p|MBdwkg$0AO--1;uNfdIN%ZEXkF7R<{V9W|(@D~_80{g~~ zxbTU|Kf~ML|L!4`QD{)i<|4hU6QH7C^l+9B5dz7P-)a(EIT|6%GEDalR79=q7NF39 z9dydC61odS22ib5G$YscL*~Be&9ytjZDtJxTG+pHDn8MpA37~QFdEge>?k_FaO#^~ zeF&ScMEzt-Et8M(8wzWH^wtQ>Nl_Kil4B)@>1+GYM_Bn-@D=UGw_{7`jpYo~RtG-O zYv$Kj_1?lkUCt+oj|p8^bjGitO&TSQaM(Q!ExsOqNM?Rphjp>0b-Y#+-aNm+LnBIM?ie#4#~B zUS*Duv}3S{uTgqOPD7zjQgk9pueEtWfuZ1sf?}!sqduZLRiuQZ&1Sl%{O8Lae$uR) z?55HmhPv>v;&}JI!s8WDtfXmmu(d{{k$5AnM{fJ8#GZ$HWNZ)f9Zs%wj0HFjU&Y4Z zhHpDZFM6#8w6>25@{HKQ1%bGTWq2^#eLrB%!dhm-(KMYP?z;Em$Hi1|H{MjSf#Sn0 zV*u1l=Thme|Cd=YZk^}IqY!I%&XV9hqe0XBADCy*cCx(FMCU4Js^}B5;6DC%Md)=O zryh~h6O|Bn)WgYnTKGdT22JU^QgnW1+0d1qnxm0E2nI^esHQ$C6)5`u0CWoIq@2i* zadf)MAjjklLo+2@nSya2>ks@k6NFege-FrzNoH+vwlvs$KEG&eW}NWzTla3^)5?tT zen<6U63`CEq}o--RRn$9cwu?U1sR@o8aXX!9b}*+&Jo>fmrE$Y|7X4a3vNR)hvB3BaE|;8$;;iy0hX=!Fsg%nq zzpGf4KEn5L$XRf3RcS~f0tUdBV5LyqKi((q5bbfs9_rneDKzgbNqB>M%|t5NQ(P`|gz1{2%GkiqeWXwmy!R z>k`DMS(Vi2p#Khz%8}jyTta=*80O~}OT8GkmLL_Ln6VG^TM=Q<=JdvxQ09ltoiAGs z^vu5Q-7-sMEqx+x?UX4^(^Kv1%mSe-*CTeX3d6Av$9JF4bmvR~5@_-AEx*_DiK`@Du` zJXy)YG0Kl+o-@37@3@st<6F1{4m!KXVYy3yYvq>) zqs0!DcCE9Mz`$xpV#CYOP8w3o0q?2r#k!ibt`gmv4OQ1!gbb;PSsQ|@f>@j)`kxw> z5g0<`yPqHkek~v@Q=mx?6=nr@tg}76lb-8}_vY{USVwD&=|Pv(e*g0}iKNP9Vi}3=@r+LL zl_7@L4hQ|A(5q#?wQ2W{6W~42i=};@{;Xltk9InNQ*&%op*dQMOZ)XWLDus1nQ3v+ z8)Rr&cR{hE^@*9BhJUu!ro&rhn8+?=8s5w!4IJs9m{T79S$aeTP!lvSX-GQJy8- z;o|{smdjuS-~)X90CyG9v<2c`7NIuWdI@v>yQnByT2?K92dyKaeMjuykBuQ3F8fNx|k^CtQhb)y3 zfE|G2kH6mQxv`^H1Pg#Qq0IkJdv6(5RoAr*%O<6}OX+TDX;45?xwUiC`0iiNpO;@8dmnqPHOCxtjB%dx9CPI)fYq$SN%#z4Xu#Sz&vQK0lAebCV*_?1|+uXwK5N*P5*z$pwly00o)g?lq|hjTI<< zakAsz*+#D6Lm9xGF)2Z%KNimc-+CT$^W>L#eSaI!qVX{6cr^@-F=H%0{yA>zo}fLdJ1a_2+Q(TBhl@ zYo_Ar!&n_)_l(t$&$4BhRf(Ob3_Eu2;5f6F=puSp5a{Ap$H`|D zxX-HkqHfHA*amL2P#4l#YMW)Hd$WEQF5mLJrQl*XTIxexOv>Q{Sx2E%7b6N1>>DDh zENAGRTF0S6``v_DOUpf>!g6+YeI$%?jbdip?rycpB0ai@n#%rLb9st%C%0i4{l-Xx z^&3Dna}=DX*6d?cOOK+pGC7}2MliqpRZKo;G0q{~%H6w*P_Niyg!AZSzpx|J-Zm{U z`e3$WItt~TU#A#)&v2H-&Oo!TZ!z1O97{sl-v*{P6a(RN&?~maW(72nJnu-kM!6W& zAuAk{%+?N#By^kn_Tq7BXW=~#o=2ouGP~&S@6S-UD(AkTbQzSaEUD1&!W{dT(I>hh9K2*6-M6d-D9nk2%|Q z`A;Kmr$1whd<7HfXn4@P_0j?2NM2AR4z&Yo&s!;+HcO@-yK+Hx2IPI7hh&BAOQrVd z-~rf9U!S;8r^`cwVI*$rb>DuntNqqb>3e;-7@0;H)r@Dm67l<+DaV>Mh7GS3+Eji! zT86hj9P=W&DZoev`nVu0ym8FStSocD^xbO+th-KEg{FzYD%pHPozN&icID-g7aG$p zvQy-CQJSv8V&LK-ilp;W?Y+u1^l37o%^TEXs#Q}K-1L3Q>x{t>1yx)h0z0w( z*k&b(0d9;n9nPkt=hfhA)}$&hN}phVd#N4D{>jz?AqRCAZmuEGAFLs&3Tm&9{g__> zJA+T`j4h`arjb=w7lI|`bnhOY_ESl*=VR8$I`8WTPG^c7=Fd*sg<=me!5f&mP;TGG zU{=b4DR99;fsoD-7qi0yun+kca>;SAB}}(-=2oda4OO94#!Ioum&p~Xx_5g z|75&h(@u*85I@+`_~QTh1>^#bc{5T2YVlZ~}3v-$W`;#u6R*CcRUI6ZY#H!6u^^A@ro~1 zes0+8HjLWA8!m3YzkJ?h@q^U-*CTAf%X++z3a@)EKXu3v54gGWL1z#@$3W2HmAEwP zpTHyPbAU&0C)zEe*Hisr`TJRd+<9^@R}ZDvi$gm+2YV)wLe%B+Jh^i)5@B0>GylMo zpSNMxfO&jiUd+a=-&j^}qB3AM61Mt(N1}4F*vk z(qEjrOpazHo(ReLD7-c=?5qoZ-a@Q9#cF~&<(Va4m9>ap#hOhfI6bGP+8O7$y&e1Z zQ%&MOBl_ljZ&%4Y14|PgIYFF~A3i|w>vd2lVp!@Jd~&KZ%mP*pA4zYQ;(YS%X~w{} zaJzjKIDx9d@Y|xt0d=7UEiGR(4g}ww|5|hJ5`UU7B9lMoDo?dUHSh zzbX-cutXs7Q_B>64Ucfj$!cV%2x&%)NW4#q=ggGYBP}|cnj%zI{~%!`-L$_@V(%~q zObP2jLgL9f2iR%|dCUC+))4kX<-&0sB+^rb*TGBsxTW+yymQ~*wY5#v7QbHproKKj zD5CM0OP@8(HP?L)&{Wp>c}qh_3Iej^1N zVYyNn-G-dGsN)Pu0zSXtGj`j$L1tJk@$)&{ac-{*Ib1^Okv~6aU!6 zv7a-2ze?s8F()# zjgqJX$c$}8FFT2JfyagrXL+^(FTBUPKjCQFzUH}on__R;yJ`*7E6;V#G|vT@X-t^4 z=x`^VmPAcBc>0KV(dl+yq^Ww+ryp8ui>5$M7mv{hY(%p3O!~LP&YEJM)Y&1N6pzl& zX5bX?Yy|ckPvu=34{WPWg`)RVsVt=-D$`_0RFOjU)$NTt4YUQ~}p(X<#RK!^d9$Yr9)&l@0$1}{g& z!mb!8T=kBqN*4(zC62OmIYlf4VLJDDt4W;F!|Zb#i`#--lYa* zYy0~!gnN3wgZCGfmAlEW5|cibx4LI{Be@3c!*U>!ChE(h~*~BZB7w z8g%aJoN=2X+{6?zb5{235O>?KST<@0qi$%3weJ zi4+%5A0&$aWj-o$vPgwWBk7D&7RMb@+}3Dsh6QyrS-Ff-+z?w@Zljv=sTG=PR;~KA z?|xNk`B;>&UUS$;#j*6o^wpGS#=~0ND~vcVZYF|(eBb)P&w9UvRFei)%~YAGEVsko z>F8+ZdwJ7TT3$Sx6O~V^zzlCXBcB#-z`}ZiM$zxWu95h)%K(Utgkh;Q{%+6orQ(|? zDB0K1-~z^zj%5x-0(4|)A}XMA!zVT$^oO<91&?s9%2y=wd&VwW8YJ?M@r|$P4Ilc9 zJl7M;f4#_Ria>dTZpisT6fK<*V47q`g%AYu7^-Ud1SuKLt*H|B8kmDXxc5^2LP{V0 z{|QoN9{l%^vhNO3zPp|pV|Z&V(P46dC^u>k5XbXY`)f^yF6&-PkJUTF3?zEC9pQpt z+!BCbHjMJzYNQWSzGq0-7oosg1yC)Osf3E+VM?dZ27VK1lDKMzeNy9wO~kNoA1^fg z{Wke+-jdE}m3AXti%pw_>yDXKqDH^z%q?R*gQ&1oI@m_*~S0t*N<4kE8mB*B znFwn)(0ItPH~W&1L-Lf#P4dOVoUfYs2+R4)txuYcdYFAcweZXL1iwf77QQ#);iAGz z+{owK-+v`}Q=zpoRqvW%zO?g1F-UCdd!xVSF-rXS0FEaoLH_8b)%&bLs`L$Bt!sAx zkG4Q6q{+UN>vlDzVUk#V?)xoEBGik&cTTvih86Ns5nxyI)O7kAV^XsrD+N2QifEi4 z!6*$vmu4SxUIyfvmy@rtmaBZRgws$_73 z4y-?h(|yxIHRijv%fRd8rbjdL>FmKeessl06#nc6hOMbeUD(=D;0%GhshMil-QtB$ zr#%%sy^_68!u*UhV&@AKY`ubUnWOwvf5Rd0SDihwi}Fnp*TrZK>CWz068E^p#U$S6 zP42$^uTGBav%F$z(M!uwzc(wVD{S48iKSkyYuFt?DEwEnk4XsQg3BY+iqV?B+rcRG zlZxihTf1)K;FBZr(<_D-*G@rN#0Vr`_6)iu$TJ%D=@}3zB^_n8wbCoufztaCfI{3D z{^ID>JEq*#1^Xw|#wO8&l;22-jft@UOq;O0grON zx`|}!q%~iy;@Uo=sC+1zOxvKr&MNe#CQP6bwk4RhK*+4*;+23a(awvH4}`TG9}{$s z&EBsDHLl?*wf!DdT$TL-T{qi{Fi{3qeluNjXG`bsL#gdI?}S8 zXW{Q~fu6|lR#c{DtjCResV&!J6e4)AuffkAE`JcInJE6qixSl3Sef!aLE4`GPe|J~ zzu$$j+uGmc0ychRHI6oeFc&VuThxYeXwWf+XOK_Hd9^8}R(vO+LTiI({Wo0waAu@R z%gjY(rSt}mN#?p=EZVa{-I}n){wJY_5~V+3|&%HeuB)cit)^0 zUAQyXz{nXZTc(FFyZuB)>q`bH2Q@Z>aGxrnD6l=5o{wt?eiz}yIO|n=Im)3!(MzLw z#ej%-)s36S_Uh6&t;aQ>*%1lQWVXy^2MJVJ-{$*s{sf~R4F3a+mKaDs+fOLmLVTTU zHLIZ2YDS{Cto2RjwwgtwDcHhPKINE$JV5|ZPAL>_{{o|mkit3=b(Tb=^RvROw+n_g z@1@Z^P_vrwOSpIRNMS6t?!p>hI_A!A;R3(~Z|N~+*?a04G&C^2w%EPL{|!C*HQcgE zkrQlkLHujkkyw@PKC)1qid1KvhD>^q!^4dBe3<#0S(^)!{c4g6Hhe_V|C4*1p5 z)11D55I0H(=f|Ng>gmjyag3X=5I z4_Z_(MN>%Pz$sIGbj1tgm$gHZRWw&ADywcGDCp>s%EI%L+DUcr!}sFa8S&$%LTPy@9gQN^s<~oa`*&%tc4Km zSdu`?tUx*7U2Z(t#XN8UEwHg$+|?`C0p4cYQowc_EL|k9w75Z4gZBn+XS9Lcx(90R zzBlMO(fHa=oBV2?D{p&TiKVwb`M=XzRK$B{Jv(Hn+7VtZLepQ4&ZC`H#PRivT(M78^Xs%0x5dBWxLgt6Q7S9Doao?O+|?a$b3c(8jB9d-8W#5$?@%B9B9$+Dms7r zk~EP}SV7+T5`%F=07clI?meI(!0o(cL@bsB>4o>!A6!vu>9+YY19AG8fc0X!3 zn{KqVHy(INqiVclZyQit7e*d9`Pmz$0xE}*a#Y&m@PXkCg-xEZip$6Pu+lq**5CB!CBYjF+b5;7IO-4#G zSW25W88@C^IH}t^3uFv+{?R_QE)%Xs(stK7^}#@L+wzi#6VQtrPLWd{SckBt2}rN^ zebSJY9@}YD%ZE z>r$-d4lw{q!wy!rmVz$@YSi;-rQcY=$k`AHtCtx zolvF`QDKS-}`>3#`!b&M!KsByPOIf*)xfv0UmFb&I)aJCBBf zq93F+T27Svx@-+eqe7N~3r0O7B8%NuRp!7dt@ZJiC~a|7AhYoI@8->|mr zIX-A(BasgfL|SQ&G6gSc(f$oZG52m&#-BU9YUU)nMD+!$a!~j^Sy;m#yA- zwnVSvtm-Bg`cA@;J~UoeP2N$3*m7NYtqVJ`Cs4ty-1Bd zp1>-mWuWMShoT27>ze$xts&bXCrh$7APjWG<-&}ky(gkg2|el=TG2^Q!f!Gy6IQakdqIN!INUQq zjnij&nVb2-hz$^&A<)sef3{f)-aoB;U!hqw(k2)aH(B2Bm)=%F0O_cE_~jyUx5W57 z>jieLUnBbfl4R=DeE|<{I0M$n(690w>4eeU@W#f`gH@M;XC?#uM;n7r0MX>d8rAv) zRJhA3E{U1Vjl-A%oZ!#A>y!mun+zn%#&)?~n)^lx zA+d;iY(#IixvbxMt>>2H(-z}&p{vn!%=qZZeAZ>D`!I5)&{>OF+fo_Y=s`Jr$pZ!s^qWwf54#EX8QJ(b z$f5WHo##03QK_1G8M^)`r2Cc=wKU42Z&(#P{J}_Gw}8V}{FA%C?In$lz+tAIBpY&94t| zxK1L6kj$Wmp}#q^+cuhRN)3D8;Y4rGzglGy2rKJQuQV3_x603O1aT?}dAfu%wsmwM zY`dc=_oh9r1cZ{_AxGRPKmz9mKq8=(%vnundaxs!b}2*fq{oWWb-XwnNcDEtLqw#L zm0Byu#QPL?>MHZv%Lv51Nw8aCr-kXg%R8u9DXUf)dJD9M{7uwy`{>KF?f4O%suTb# zvAonif_brat2%9}?U`RQ4>VN*JNwwAV}_&-q*tsCIY&Z9oRA=V)C6)b+Tv zS$e8Q#nI5JC%d&&N^ud|3QxS&FZD_otBP< zVTd`*Nc1&+Uz_Z&)*&FzTcLd3EFfu7ne}V1-Ha&sM5>u<6tSZ$LL|QlU>K2|BwPxU zn!HV(O~Sa9w++yI#rt-H7g9|G67#Z8aBl=`u`t4SHAAv$ES|!(5vpl2ucqr>wSU-X9YL!iI~S|$fXwh!DN4^t^F+o0)n8DiqE4Nggl8XYk%Y0s$=uopIQBF;v6rt zgo+0^Q_o(y(FlqDXp}+qP=_gLv5Id*Fha2%y)!isO#$F z%lkw?&%y^hSF}IgkdXo>)L=o>f3y`5H}P4XmnVSqV=f?NG}n=Kt3#p|6kT;4Kam>U zd~6OOuT=)JIBV~Q6nVl$oNn7M6(j9QK?%ZgU}#Z`z(eG%hz#zKo2<>ZX$S0KJ#urNx2i>p552Z`j344lD*D^TRA7S$p<*H;jN;`V| z@2+i+X2!1$)KnUPQ<-k?YPaopG~)=9{Wap6?)WJYgaM5guSOubgfe#HLr5O%&bV&QHq>64t~!kZ z$6gfM?hU~}q+M*}r;=0soD;tC`9YF3xq28#ioj(Vzc0V0y5 zmtU^qjeH`YU#&(3+Cy0m!@>sJ;|FCcXn`{6T8b#P-}n%_l)#x`E?p42!hi@Mtu^c% zq#NtV@!ic-Y#`0HN7x=fHwFrI?!Em$SA({)VQ&ws8?rNZvUyeE-RlG$fjgy$Fk15p>aQs8|>MxUHqfdttEFomx$=RpfogauNYXmp&6o&?lv%)A_qlZgjhG`6585`o_?~@&QsSw)U1LztngNl3|2G4JV_^xc{OD7yv!|Y?R2s=W}P+#kR12 zH{M_*9|KjJLBp7v!{Rb92;Q!LYs9o_P)~{-X!F=J0`?%GE1~^&?UBI(Tw>B3W6U2! zi}kZliTywNl=j!5QRlS$Xeiwe*r7#e6US>>DZ-qHO~*nbV@wZ}UFP?9N2oh(f{fY{ ze`Bf)ZyKOQ>}QEHUku40^*sn70j1QVUhVUiT7?&jWT&J@0eiLfk~8hbH{E*4xg>9& zYSp1KHyfN$91)&j&0>bPO-@cG?0f`vlYhsN=cV^3|-xPFjm7_ z3mwL?oW`rwL^qaSl^`s+s=y_c419eQAAEDK>9>Q4!f+7)blNIf*QTl{+unvHP|Gta zK=h}xJ-|CpzT#)qrU5aE(YilRW8Bx7mZ|uu%gf*1)T8MQV@d>D;R2L)7jQq;o{}E4 z-uAbS#T%qf2k{=-OiCg6#qH5I-jP??Z0~4do>1>4|5t~!}Lr1&pH~Q5z8R?Fz65Z_;mqEz*tWS zwUGV;aEQV{aY}2cRP5N?+mqDVh2LHNFo`$O{nGFhA|Cug*|8?Nx*4CVA4L#Y#7*7V0= zexe`X4&EwMXf5Wy7vHa9ft?rTa#5X+>li&sVi@iHcY*Ja?3ZQ19Haw3#Pi?bqijA> z9ljZ}lrBCdE4Nt%x`?^#{9Nl${$HO1KY+=W;-O>n|8u>cBR#(#14z0pykWJ$i>h8c zt9NlO7@tpohqSx|55T=ZnN&KW*3q6&`s?=*RcJ|Ns$Qw>k!k| z{G&?z`ENU*CetEi`LlQM%bMOG1Ry*kAhZ6_DX79rGa6wsv&n zbgguxKa;vjd|nA2RaeM+KK`Mr6aTZx!7;ZaMJ?KZLci?+<3%Txr3J_`)pB8`FiLqp ze~>6(B05C&cuSe5>-#Gm*P`hQs{5E7`ZFO=b^Wz9*B|%K@PIpO)PCH5rO!fPmgWaq zeW-L4(4t%16|(7S`kGlKi&HIBZ!^fd&~y;CxOcAe;Ypj7lcn$LQF(kN+7!hv?IkAj zD!FcDpdPu*$6;#Q_V6(b3mXMO>F!Kwnxd{wYNVEAmOu;|1e1JX1ZK}C3Te;1lG?mo z#DA`^op%WfxAX-d;?r&^vM`}zq|{zmMYkg;3Sd}yS%*w(MHcMheg93be7&y(j?H{e z`Gk;MMP;=Q8aS&n{w-c9)*LaRJ|LNr%;;BYAGI6cW(by|}M2Gt<*r z;0Sfr+uQiwo?WWv-AHf%dCIDYWRcaKB$u6vPDC9bF8v1K-8fY#`if z35Z0KZVy*ub4nS=52dk&%aq#hg9m}x{ORCH5Sk&_`U`$%8 zXvGnD`e)C1d;3xY>#*e9pJT)1Y4%g;oiP%Zf9fx^C~SFkFacj}lfCo}$0*Moj5mT) z@^)wzYaBKCOBMCQE8ZDb-&OZ7EK1{%hkMKHJoJje&JC!S_*J$yhp*n*C`!gVRbqMa z!N)fTOQ}4mTVN-#gB=4KtBsIuz3l3(rEI1j@Wc+CSYqfJL32Y1Xt7_J(V3GefMn^l zgu~C(Iv|d#w^JR(!+mUj0iSs=&2v%Ty1QQ>?ayf|yuZnU)sXXH25Z(vF1-1n>8vAv|b3)8=`^EWmv~-kBRG~T|1Q#e9FNu zX6#%MT@p8(Z7rP3Hyt)KpM|YPJJ(SyW(sQAX{8AU^2t^!s-woPd2w>>C7Lf0%JXOmAMP9lyMJ_4s(drj%Wp zIkym^Kprhjqc7W@&mU?0aT;|5ceuDFSlON_aL^uy^BEChZf6kT}!y zQLe)*%t{gX(fYbP4H^SPr^iI|vlB4^XYimwk=CLK+qv+AY| zu_~-%xba<}w~j*WLzya5_ScgAcz1SKp@r!X-r&HE3z3-ecG0{K(0VbVUsz#@-F5*1CuKy{pYhTs*S+@k-su8_~+D z{!Qz7CoJ1B(#@BSYwELFLWf4&L&jX+mhlu_-t^buFsv;f?P%S9z_etmzD2eBs4hs9 z!>q1~Rv$jvOR*;FWUt4+@4zMegf(xhpD0%f;%4=X^oqRDd}2rUi#}Y_s4ehzLS21q zlv}#;s%}@ImCsDE9@`7I&u>~jePI8YPec=eun;>}XnN3M`~f#ZlojUW!&fO4JEGY} zy-4h&uo|$_q1@`H?q_zVL&Z^OLV~97iu|V>oyXc=7)juxzn6SiHN8x0svyiFzRXN>O8&cEH6bPJcju$xLtEVI` z_Y{xoe6XNP3`#3zBya5Vl;OTC)BT|}_bDfsKQ*^D@hVw;WCV1+DG31SDEp9WW3d16 zN#4*jR`m^Ti$cLZQf|3o#+FUqtv&ZfAX5!HQOrvGmaZekpx`3#igRk2x>$oCcr-)m z(-LNKntmo4gJ-^>d{!&ct{4SbA|2UP#n#m}feM-YQK=P0fLl=13m?CH=feF+|CsG~ zL)A7SIg7CPM-?DSbbrsChnKlo!y%ddJqVXRRrChj9bc?Vt|tyd6w;+Ew}SNHJECKx zCsV4$#0S~ox3C%5X;tDFz5IF4jAA3N=~P>Xm21To@DIvVWd=UA_#TB(upT-@w` zxcCH`vk4w_K9n`*{{DVu6sgiaBGbED>zd(kGTrrWuw?^ciVo+s!&l9n!~X{d|Cq zyQ~CaZ`KI*Zqoz8^%YPFtkS89ou9;7wo`KTo4aB1llf=8C$rv>SE-EjMM+>>IY!FO z?_q`7TUuIC!Zu7T)KWDh;dU8#gO68jRE>0=&(rTfW~Q@I zsBL!SaXn$k7E~%;?KgWl4u-@`JFgxQ#4<}q^Q`3x;pmj)hhfb z%JJxdwA#9V-l$!JbqbWk2f%S1iPds`tLj!O)I0K%L7nP8d~J{KN8in~)#Ap`-GscfEYstp0-lVRwe{D#4$93rx{;fIARU zU4bC_bt+rjVI_-Bzmf65hO7W9OPF71f={N~cqU?uTHB0#qP3+$*P#HAYAGM23p%@! zQ*!OT(fG@o$_1vMfY-=#9{&=Tk1x)jDB0T)yxtImWe%ict0 z6ywpk+fIGb1?SmMTdvUE*|O*#w(Po)hRdj2vSEBK%>2DaO!+UyWGT~D%k{PheHz8vSl zU4_j;s@)X5D_VqwW}18~ljHNSw5nJ17Z=?dGOdpYcFVqMOH5;ybE3=8zAs!%`g(v1 zixeoM$VBh@*;(wH%FLh%w&?Xa=ZmoJ%uUslbEEHwDdY)HK1;VcV(mQ!DBylDaVV3V@xm!z zE;a{f@lGn@gN%8%`quw=SuMO}+micv_;i#zcr_tW)J>dr8NyXf#Wl~o`qUB4_Fco2 z<`6Yvex$R|N?yLTG5Hgn_fhd`LjucAy3(?=*lgK>SbFVFLEin(bj|FBQbPnI98_t8 zTbxPTD%A>1Fc420a*-JnYNvnh)-cNf)|9eP1YEr@&|MjV4TKf0HiPo)0>WZQQ~-dInSzWnD^fc4JJ z0`LvPeP4S1g@pd!4gMJR|C#8I@BJ@&(cW#xm#)48d$#F&z~56T1<7J@!?*tjJ4(KX literal 0 HcmV?d00001 diff --git a/doc/source/_static/reshaping_unstack_0.png b/doc/source/_static/reshaping_unstack_0.png new file mode 100644 index 0000000000000000000000000000000000000000..eceddf73eea9e5d3bd0efca118b8f9f75010ee4f GIT binary patch literal 58533 zcmeFZWmF#9(lv?)_u%fX!QCyv-JRg>!QDMru;6aN-Q9viumHi`{Wi%yd!P55d+*Qh z$2Z=L9vRQ`boW}ls%ur%oU;aD^0E>LusEOo2FZ#*cJ-(+FAH)H;pb$~qASehj$I(Uq9R!g}Xq+UzSEH>w=hw6_{SR4! z@r)pc8KL79RaK!Mcw_VxPF||ePS>nFUh1SsARqgdjTULp?}et~a`Jj#Hk}?%$m!l) z%Mm^fib0d%F55(7r#8S5qnM_r_^w~u*~-vDOq1Rk#GZV9rtrqcvOKKH71%*54;jVQ zhXR58Bz12JE&dC&4-fZec5T46 zq=DQO@oX%zY>l77Mt48as83D!T~)6PdM+B~8^@{*9M6LoPx{No`az*>)fnjY6`bqB z1{CR3zGcfaJ1xtMhXf)2Hl~Gf3*W?P(k^FXlppe)cJ-hvJi=Y>JjsvkHV8o;I2S^Xac zFV|d}wd%Zy5aTLel5Ez<{a#e9f2c+vpQ3abe02Fnp^0xD{i7~U2C>{<^=Udf`IlU! zI(QlX9F}G1+lP^!HwO6O+Q!J2FR`h_astLq>Dt{dksE}bDKROaqY$RZtikS(cwGx9pfiUw3n#B8?Ct0LXKs68{*K6YyoZ-Op^kWxo3%!Jbb7vdY=HS zj=+>L$Q(?RR7pfw=*W#(As1r--2^+rVx3k8mKqumtK*a9N2wy4|3a)I3 z89#O=kb+Lw1#rxDzflh&Xc6^+fHoooV?SOybo79P^>>_b489}lI3Gce)(PA|2m-vB zpjU~i&p=20-K*eLiJ`lNW{|;qq0R(_V~qWK+s}_x)b$Fm_MX27$1G=q%*2kH!Xu!#nwn?SkSj(xkj!BTlQ^>!0(hbOt$01 z!<7Xq^sDWTvtw?g)22Q~Jw`mnJ_bLggnSE$6oeN<97HR|DaI`ZBZl8+9d3r6MWsq@ zP8~!wOI1$gMU6&%K|P^RufU)GDA*KU*b}kNc6xoQ{|V zSk;jvkra_#k?xVn*bi7csk^D@DR`-|rhul1Mq5X@3#QYL z=MZP;XFoPlH%xxcZXj&)buAC^ObiT}jbe_d4b2ZU3^R70bY_jOb-t*AFlLjiGs=@q z5;HLll71v^R6G5;Zb>Xn{5D50XH$4ZSRha=Fe4BNwgFl$tSUSulq^ge9UCPdV-__E zwH(!pIQXp+3LHu)@&Tqq7;X5Q@Z>OoBta=EDF`WLN%+I=L&cT1?WGp;Rb)bn5-hUk zU$+|3YZIH4o0TJGFwKR`)%L*lZ1&bz!dY@yc8pgCzmKF17mU;oc_*MHU&PTScoNIe z+frqbtkHC26XmQI+DUKZlLUMX`YF_h*yl`cLSCcT_WfH;c1^NF(|HwaZg|}=aZ*SL zmWs|S`mD?BTQ>4KtU5~zy;<^Ek(q;;LYIic>t!#Sn>?rvD5+dUqxT7F(=r#4zfv%= zFr-SQErqoNm(^M{9}=QdWUI!>GZ>p$nvGW|RtP;TJRUBO_j~Te@0;$upe~^hp(qi= z5$+Jc;Tdtpaj9|0aaY*-vC^?Iv5?a3Qg2gc(niy1(vMk>-jSv(re?fLXTs#5OmEQ0 zrh88xtr4jXs}ZbKuMSF!R%)bbTP{|XTV}hYZ(U|RY{kDcymVZ5RBu#u@vXF0tA4(2 zqQPgAXdn4tazk=+_gMYlwnu8Mc+z}SW63@1!U)qPnJ8IR?MrE{W!*u1x}R3{@*7vN z8@ihuJ`zvV>r1yP&p?kK=Xqyj?z%VKZd~rO?&Wc1k}7%RwrD2~C+>uNS)2T|?Mv-q z?bF^k-a+13k8F={PlaIbz@j1Fzp;S)3^5M52blq{3C)hkkN-&8OOr=oOgcmTjX08I zl=e`Z)5}fL%?!d-RJd0#Pt;g-OFdCj*?xw1F;W{JLF^>VCdxom*V)5J%m}GBdceFt zsqZP$HL@d9Fc~>>GMy?jCE=EOh-vQ=3-=;rlM&bJJd2d8V5wx?Q+^Q|y>t$9r$Sc&g|$<;nLd zd44y)*E-!X4w2!mVXnNWO|GrtfZ@r0d^q43z0*8V`lYhmv~s*uw-TljyG7qKr&hJ8 zzCOB^;e}-1@UU-B<~aGD@j%a}Jh`mWO5bL%1+1c1H??Ws*?Bi@u`c?6X8r&-2xlZ6 zE6pK2=U4BqyU(4Ut5uWd{BR36xR+*@(`=+y%FK5x8C&|CXkXZ$U49)!@kqITJ{Dii zXu-6zt?8-V9P`?FVC1&uerO(TCg5lKWY?ip_%JRK6>`L9!N);R%xmFZcG=jZR^6}{ z_A{Y?KfnD~;jo<6gUZ7)lnc@i?}DqtRmZ0tB;Ax&H{Oq2oJ?CxmktYFZio3NBgJYt zl^D7MZMt3w-2T`@UNR5$#)*#K_A?e*f|>KHwKnfJ+t6IlY$?1b%+@42^d4gG;_i)x zC^O~ilpK_frh})SSZ!DdO-aqvc53*iy#Tx1=bx2c&#$`g+#$hj1x4A0wAXr7YACB8 zs@Le^=pbt8KE*vG9PI?XO-ee}lkMOy@2}jLlP!xVZ+K#V;HyEYuSSs~NvPL_)ZY@m zdjLNzhXS#{1i^~sMU?w7-r)iZZ(V>7!YK?zg|7}d7T#qponO<=t`ecUsEq88Nne<5`5hE z*n-dAl+#4%G-jj|lrI_^!9PLCaZO~mk!Y?;U@W-z-L7_8M%X^Fxte=VTTHE%=9bD# zdKJ2Ab&O|sS9N~uU>*`3Nt|`+Dw}Uz{n}mgy=2AXJBHDK>BO<%0W?nqS9vptnl#5( zo-2aOO!63*3Gd35yo1$Q;8itwhdqOWUi&o*!FK&5{*+^7@NUEI*05L9*k|U$dDAW_ zGj+9w{Rfy`(T=b5S7r*=@XAc3wpfB0To!D0x8if@UAD8U z;q>CTWZRRdOs`~S45niy11|Hk)%}23iD|hKB|0Ue=_XcKR-;Ud2&qN0CtIYo|`a2S@dT#`^T= zs_~NJ>{XQ~5h(BAmZ;rGew>DM^z{DJyHrmt2u*SIAPvuD^~I9quw_S1WPF9_5!tLS zA=<|+b1g|tJI7Mz(C1Fq-zlX$Ywrd5S=$4=GoC+y&iauHrFNkZGvq88kZjCIl0`~H zi_?o=^ttta?q7-UNo7jqO#W7rY(aSAJ%zriY_;F=t>E$z-l1RsG=y#^Kxh%0>upo= zMH(U-p$)GC+spaXlK(}JS#;9dx0pN0TX=E;aq2ksEZKBLymddv&8t~4OGL*pGCjS$ z^Ml>|+ElY;@xs8jN7AnD5ir!WD}r@Ln!W4KT&4vtK8`3K+_2wL$h7p*Uo9`WCZ3$H zh^(aV?zXSZXiTeIuYbJp3z6d6l3D~FtS#|p61cf*y%5MJ1b%;|*6j9fx zD$BTWeXh}Kkv^(KTJoY*B7-6y#Tq3= z#kb-<`eMnJiT_Z4Oc0%8n9|XS=T{Ug5%y@flu1?JIw}wv*S3-77xB}|QJGS;m)-Ma ziE)5CpJT~S``TaTqM#%3v}lvHqW3PI%|6XU6IN4XNs9XicX_jFbLA!275il=d>H~Y z!U^sXn<`T#b1{oEhY=et3uAp%ZC>?Z^>CGHgYGx;DuimZYEN6ey{dt@v9lnh_S)); zV3`EVqV()#t($nh&z_(8!g+&uaoe0)brVoq@7nZ89wf7~;yN6BEwZJMsX6Ha(M#Tn zDA z@!euthJ}TN6tU3+8xMV=-?(FtddztYZB&czoy+Y|9PXW2^f99ytujx)w^jbfqFc?O zv&|4W^v|l9t-9CQk0yPi#j~t4Nv$g9W>FJOSusVcdI^pj^41pkdtcAVf9BgJ3(Qw) z6Kij`Q@E})GdC4i-rI5@ zuR)k0Vewc>bfmX}$=M;366hG@;Vcqjxiqo7w%|htKi6?jZNH)1V|jp)#tikh_LLaN zRF_HyUxf;%ehrQfr~#1+n#k!^!Y8jnvnYC7v>b-h%Z~FlE&W^nH``sqUGqIg)?HS3 zrX^NHCJkmjqif^mA;sa6F7Dy_n2dn7sGfj?_}UEGXf3}7o(VQx7NCg6@o5kY)6br? zG5C6#GUz;I#cXSj53`}ut5X`Um$xc|H2iLeVq|DIYS@gHisqAwl?sirtn!%(n_7Z` z+2V(p*za`HpC`G>mUC}reY<0aLdR03MJJWJM!)k8{|q!KoaMj;)uL=6yUzTGOmNaxMaFGgb`wAGW(}ny6(mitHv8wgTuw_*cz1_rn)s|3n&DSNy#ZW` zZxm7#YA2g3dY^~CsK)wDyyvNMV&e2<=QxpHh${eVHaZl`lCh9;%QR*#v^%P( z^32tLu{b+|DQwZ9!|Sn_?RPxfT^AFjG*(-lwM4Oud0h2ly5(eQd)9hB_e%XfY(XjC z?{r5XWU7){Lbk&QtK-R!v(8m@$FXOA)>b+qc;uEa#D`xq_5RA>YF2aeerjzMmqqJg zpy0*c7i1R{cOCqvAOPZx^pRQxu_U3mve9S!ao3sM;C>Oq_aY8ixeK7s_*V#eO`4%2 zNE2*_4-4vuc=@{P%iZGfhNudgeSO1hnDTBE`sV`+UMJ*FC~9~rP3nB=gHyv>8D&^y zSLnGwseK3%^6He}m|w8*9ql<0g=Ph|26o1`qm4l3ck&{Z*Z+{h-vZO!V?{haU5`Hjadc0>!hS!<;rdwHXQH zUe$a4{oUp6QZajS*ny5DUHXV}d7i6-_s&T_7Q-F0O2jO{7`2Oyn$*2Da-T1fzFU^e zKcq>Hm6oemvmdRi9^qfKz~LA2v=J8ZZFvTIKW<6=P_ptlX6?Qx5t=ZFP|LBkSoOx) zOs(?Hx<0#pH*z?|*nJ__j!93b$*hs2#Z&kEeA6BMGUbapNm81HBnd*E{ie+C&Cds~ z!Qo1XsC8m2u{l0a)ZlM9eDW;HP%HlZ3W=ffQbmT!0;3QQT@908c9k#T`ynqNN<-R$ zraM=1nF`%xu%imQ^l}`?Is-BGslL*kQGUR*rq-crgA0JkrA|PorH)`J#Y!h>p_Cvo zgHPsY;EN%zgv^5MXz~rpE{`p=`!;AW#a$ipbGoI7JSqP%Uv3zFsCwjq<{ec9jaDhI z!pP#$*RUz>lC}cNf@D9ps=lg~-Fo}1p_0^8_uBl#S#n(JWm8YS5aL26%+>>PR|zC; zMjBf=J&ukW(0xC5Awm%=5ziial1#CtxI)AD&Bjd$G(xhq6}7mZvG=rXEYr*pJL{ju z%UeZ4o-U$b7hnl7?=Tq&C#dpyG*l{-&$hn#kSnWr%WgmFf3TvLSm}|Wz7ki_{jUA5M`r#bz1 zH!GX=X)qx}tS5y6!``r|+^kP_rlyD&sTUEWYESNkkl#7hPUa#WBV=SklDU!(yo;VW z?~Jw!rzd{B{q^=-`eJyW@klSV!$UQ!)TMMl?O~p3?%O=-Tt&r)%8T-rGRz7AO}!WT zhvl1_v$YtXm(|#n(`C-;sg}K$k3Q~A<(uFnyI{W9D+{*_@E{e)zE}QHQL`mcQCGE+ zHOye0E0agIJX&T5^dQ^mYi^yJb8O>2K1O$pW19nsz90n$ARwR_=E~|$>L1>78`)Yj z7#Q0cnlQLo+X0dS2neqmH}KKg#L0lj&DzSwk=u=raJ~}zsaWgWyy1FvBvNG5@m@zVQ zad9y+u`sf*&;wV{JG$FA8Mx8gIFkMu?NnoxF^%3;4GM{jt{XUjcLR!}2ozZF+v#Ba0_U z5D-BSDN!M1H_*e3H*V-EH~n%%Gm|aI9w#c{zOc}+Me4|1uwT0g`IWWX| z`h#Hza)Xf&wFNlswIxrY5h2INKCRvKW1Dic3>}*$j0NBDPH?lj+N7@?-o&|1H0?kw zCc;EPNc#Tg!Alm~)pK7a3q~gRpGO)9BzXt)fB&wFlAV1Ci;Eiv0u%c`4|(}>p8pz# zZ2c`LtaN2nHOhZp9{V~0`2S7-_J6gB1es9&aT8xszs5$+u*k@nzEoDT&vY6!oT_Id zS62t~eWtZi{gK$@p4W%oso-zOK~af7Q5!{k35I#-+{FLoo2QI^LIF@x71~XgJ23*! zlC0X?&ii>gi9D`0iw(AVX{nf$!mro5()tly(Pkalc)9U&{N;BLV4%>=S`9XP#d8%p zTEm~|)4xvhJMSkd7s(6RthQ3KS`u&ud`&l2R$X-CiJ?D8%<@E zsLVf{E{wU(HvB;TC$11(!Pp^~?6dBK{@KyGsJ@fttF2AXm&Ys1Bf1N8%wWLthhbP0 zO_!=w_LelaHkgj#;D6qie*bl2was(;`QapaD1ox&a9}nHhs|OT9EmsE_T)#?^rdQZ z$%MnJKM?sPk_rl?I#?~T{_CZp`=L?F4nE#pTz!cXx+*TrJa>CG8~+jq7!R(dx|_vh zm<(DW=na16O@&U2w4`KcJ0>n}XOm4fF9T(_TBtH$v7DpZ`_WX(`*b#q zkyeEE_AR+>+tp`n%tA*_xmIA^Cbtd zC7yeSbKMQch*fAq80WT=tmn;qU=$HUz~_-N-#-M*N(cB7D}%{wiAs&fmBqzU2AY-m z`R!)RL5ZZgfaxffYg#qppW$O*fZ?&IT1a0FCP-;_cW%<;v==`?0KTk4l=vOm?^0uc z{}}q~7GO100)pes%oC?KQ!e+H=3UI?*rF?dFJ66PF&n2FOJlom3-5S&Zt3mqO*vZi zez3my;o*PcA15|PmBsB`V!wY~?R+q2s$;v99_9Xf=aH?mfttEs;o#%G+Eh>lk!5JCo0?ThjYDuTwP}M^a ze0#W5CsTFJ$jz;(HZX@VTdJ09F0)y*l zq*~tkz@SvKu4JSg??KfdJX$AZo?ffosfBRB+rGTj+Eg!^@}%u1bJ>g5HUr_LM@cDF z*4m;Jv!P#MQ5Nk;H;lD5tM`|`YjON~inE^Q5nKAw1M^gSR631i$>MPo$0}1eA9o3p zaxK!^+AYQ*sTuR;r;%bGK_6xfK_E?EFn(_Q1d$-KVr3t z?bgcY2s@np6ytK*jlI@vMtjz5b+a#4E{>l{;wM4B=?;Rs$GZqho=Do@(oUTUu_qid z1ENr&z@T(EvW^>lbuwipoTq<S*LiqFitnqw#71JJRIrXJ0c-Yni^_ z>uv)@mE;4fc&Yo)#$`wLJ79qdp!lZrrf~!&BVsWLvKWWzX~xvFdp!&f#*to)BG=z9 z+jfxO4{*nSwOaZnH6N1FlNkps<&6Gr7niuYCj?3EvbY?^Fftt3e7L2Ekyu@|Ox@te z`E6((!yxZco2P5Bc9VnuHFAPTFnrw0(}~YLK_+3U9B~?2?e<9W2oUrwRH?Nas}P06 zQYw>z?yJ=!lp}4&!Y$Y@*SA6ly(cI9_zm2W-5QRUHhMyw>s4~wISg{6HB>LQN6ehe z2qYr0h=DvfJAcx85lytUXt4|KspMg3GV+-XGXH7-UhZCB=g&G%V3??5ZVngXU!4Oi z6QtEv@6%b3{)cp8iZl6Bf{hp-H%x>8R|aO*Ub6kx^I_WWIU;X(Kux}htqndm8T3F^ zACrIlAs1!TV6)=nfETK1W{B!2pObigx0AJe)R&S7EnbtM+vbrPBVTL!VX@t-g{ZVx z<^5N%3S17GGRV13Kkza3%uIb*Hv*CO!jc{&6wH~q19PcZ?=5ss1+~WPpJ-a!`yB!M zDfsN*4cH&{EM=8CS`tQ2Y?L63Nc}F1KS}ude|M!@M0<^+5O|<~5+HPt5gJl_q_xsZP!g?-5^s!NX+D|XN+l*MnjQU7M+S?zM9Ddbi}3wX-Op|E${ zE>$tIEVNbNjE50&>l`0m^$uxkLS|Yn@<^6IN1=|Sv2mUbZr%0VG0sFNvL3WK*?Zyd zgah2sUJK6aKmGl`LtOw2tlP59T62p~jK(fNo zQ>w-!s%UGLKx|RBng?&($z}4IYg2FfkRHB0 zplD+*%?@0zZI^LQgpH<5|A2!|Mq$T2Ej3QX|W5B-Ob5gH#6g+yNB-aBx#HmjAHmqzuZ5DKH*nqL>Qb#YPJoD zH?>GuHnNI38VcYw?oTu4%}YuKit^D|me~ZKBvezpg^b?hyxy>&vut9HR`~USP+W`P z&%5NM6cpOyVy*vOH8_cYq$;D;mTA;3t@O8tSCr{>c%M(p378|H1cC`O#(gO;=m}mT z4Qj(cvDlZ6ZQkd|AC9v}AMzuztG8bMb=Z z;d))4AcgdVb(mK_`(Xlsb?%YeX2;L)nlF(h=@}z(p(PRVp^JcOXc;5Z}+<^Dv*?`GJCLebICCe9XoZf{rHcKax z?9cRC;j=AU>KyqMDC6Nr@5?rd)eoRgXzCvqI(#~?pTm*Wt%^6Iu|1`6oZC<&#Fz7; z)j2{Tgn;szt(0UB;n#5#7c8^wD%>?r9ZtQl-#q#Wp01P|CH~ly$lgMTk(b&0ToX5c zNbNn{pICcJ4*<)xb7R}r6uXWF7H!}eZ8`P@!LS@0snDpgq?VmpAy{C)x{|zpq|-x` z%-C;%TCL*CiruO9^~u^=1sxkh`}4geeLs^bfILi&e>Bw?523bEZU-Ro7tMPI2M33r z*{h{!p<(6N_9TQl>Milz{2--Ot3W^OfkCk{j_X^Jb<;*05k9aWSQwe2@hvx8R>2)` z7}$~cSP}OHMr&GqeiO&Kd{r#bV5PjU-gb?@RthL>M;$Z&Afh}(px&I>7*%-PQk|q= z(UwD1tBlmObABoB%c~Z9f2jcFe_PBZkSidGa|Zokg^U+&&o-~TWBZ*g@HU9q;D-$3 zr874xLQuOIfTCA?o|=LxZm|xic|7n!T?3nOPH;$8apMrDBH1c}iIr!S0_JC$gF>pu z;Dx{Fki%#8ope3MvJ{`sW5#3e@&}Hg z`3ldJxrqP<$#kjniQquhKyLw5Ore2f*=Df;-`d^9&Y0;OlU(hj16UW2$K^*Esh@>c z`?IXH?^BqFr}${c!udI|ft0O2U*@v+z0YWZvD*tFK>os1U;w~MSUS6;^&IwBI=!B2 z&CaD7Yt`lMq;Y7ar}R3_-x8@5NaB`S#eU<~SX5vqDf!bpyy7)KV^l%)BDQx<0PKUg zKCDGV!N8zUwM7!uvPgGEFf%mLYjsmNF*Bd3h9%A&ovk(=ivLAVHY%i-iPh?<>Q@T@ zQPFfGEtFCT)~VtBUvCIM=~syLYW4%zj$&4HZQwyBx(hAzeP#cWR%jiju3MLhxTcvA zo5hS|@>t_LM zy-qFSBBus5wU{e`bisa&;B?ua#*y)bC~pSZGb%dzGr!k;DBH}uR04#UDrf{BRe7e4 z<+k~Bfz*73PPv#=W>3I900z^qs@l(*b}|zH2%2g#ht@g)lqhh$+71ZI*+d($2DGYW z$Oo>2RIbi}3qT1t4s=jP= zFk5<&t+9w034WQ7=$taNHd(#U&rFS4)4c*YzNI}t~hS&D4jiAYlR z(v2hGbCW^S_HpvK9;3ISSyEMOeyPrYfEU<_qYvECEMq(u`oZbWR+!5?uJ%1jFl zFgdTav~l{?Y_o7Ws(b$tSzAmgQ(1Bn_^b5}k2+7SHJKCUfnd}f3EiwlinAhGfCTE=GQe~!yGKR24Bjt&6@zQrep55!VR4GuQwkHn zflgfe?oWOG^12eAMn~n>ucs*>-N-27jQ|7|x}_3D5|)lL?q4i8BS2cug+eBRzUk!; z1oFw$MjiB`Xb~lgy{5=VuXlO#E56jfnt?PXM4=O?N1_C@B#n(haOk?)n1`NfM*39~ zog!yO$m>Hhni({GF7W0$T5W4Vl^@>Q7zzDB(+7`D`>1_oob$B>I5rE8*Vh3#utf;)mQFF&%Euf)c7_ zCFf7q_Ak?*W`}(5c;w>#?ZZH}`_}qdt&OZS^nnL ziSz;T1?sSLv-dyGbant);Atc)_qC$`_d~D~H9H%AL2V%HKhOC3yfQh-0uq$JGxBRG z`{w`{BS9hTs>*1z|2zZw0t|M^=o9^4j6SH0uRn{v9zNxNop}ZZ+tYi-%lMsA|6U3J z<30mCXJ1>Vc~&hCGjAGr#!<^R5z|9vt4yJP;fi~dB0{{uUQ$Qt?f>1NGjYcP&K zbrj$+sS+sVuBG%2$+m%LRlsW${dbfEbQp3!fMAyiLB!tzXhrJhM^@9(Nh6(BH|D+p z=}K^bMHgb#22hAZjN8+_v-{Po+V=izX;{5T8^GO;6e|^?mSf`M(^s}#r)f9Z5l?xv zrm_R%c|p`or^Y`)R1kocO%C}^$3HX>3HiT}JWI-AXV-3WcHzOQtpN#5vq?Ultnd-8JVD`5{B?vY|VF&0gciYK& zyGK&&Lo<6sEG&v0l}F1Bh2y?Mc#x|*IU*s$aEz@)kg(`>OUsm5pT71PBs1thOt9Jf z;Pe70;QR%vdI)QWt-&0=%v|xPcmQ*fHBR_{69YoHbsN(7ZyxARzWYG|a<*}Z*;ZoH>734=U|S$tM=}DXXnaUl)V?WKQBOs za``;BA}X-mHxMujK=%N)hZD}WoGYWQY&{QuXZ~F@VpyeARiiY02ijc>nto2{`*(H# zvwnN>=H&S&T33tBvOgr8biLIg8Bk|%li7FqL%Lj*VQvn@%x@YZehfHUcb6UpDjUk1RnJPN z!vzH*-Eww7V<}e14Gs(plmx)~cJ%-OTA%?pPPZ%Jr&FP)#~?V&&j7Vbp`{MMdp$rx z+Tu(kdPKdbS_1G5!}{ILn$O59*S`bm0x+u$s?$5jtpIO2WstcZc%xEdEQQT!m#w4I zs>Eak&}9q~SjA_E_PhW+twCr|&F66$3y4X(&z+?JtIPs`psHb-!1b5AaGZj!a8dKN=OXR7+LIsA_l^^xCz@Gk7TVKQkMN zdm*iv>%z9jf6cW^ZbS^tP@HdiVIB%T?foG6pBK+1t3Im zHC_A|LIDNS&vz#{u9>Sk#T!O$|^rDmXoy`w*!12tk1DX&3j(n-k zk}?1a;kzNCL6#e%YR}WF28%rLO707|rn6g*(jfZ>XK*=+#|G{K+%P6I*Q4+0yq=GF zT=?Dd<+Pl*ikGMkv>Ws!;5l-Di752%fpB%uBm~dH%1hb~4ydfCkBz=3!MQnEU&SK! zcN|(n4gmFb_orr^#UAN>?+v^mh9z0l!+pWFz%61jy_V|4wBqRWH%Y-K3*kS^)1P1q z4FX1^JXO%j%TA7mHmVwwNkJvzhw84LXR4ISld&pc;O6G$?@6V!`Yv5M)6i88hInNy zg0cOhS~T4Dhi|k>x*R%cK@3ZY>vhx$#!5NS^`W8AEgo9+R@Ky?2A+VrbvT@)KYf%W z&pIM-ze}o9Z?_>_`<~pjnz}NbL9Kg|?q*BWB)5)QPS2lW7~EWGm8HdD8)3$1EQ4pB zPR(C7x$RZ4z?Mx^oBYgStBbpuF{|BH3CN|p-ic<5nk&b6UeqTQI@k<}^Qt}qojpwC z>sHdIi-{@UHu%&(z?ign13|Q4pkJX}XXZpV>_a+g=bWj_&TxX2=epmU!{IW_{s+w( z<1hyA%_t5lX{x2fo$26u8uU3}Xzehobm?&;CE_-h?N&)GfZ|NUP~qPZ{axDsP7t!t zow*@)Qf`rWtj68Bo8`o<=)rYvr&wLfFfrtkeLuDrR-urK^;Z>WYBAjxpw$&{*=kJbPRrwnjz z>|ln|Ppuw6(7)uNlf@(pNh@LB(uAV!+^3EX?~`$zz^#D?j(5wzP0*=tWNdgDd?=mO zim>Khp9;J|&lyz}LB+q^{CGM@LU+GAl?!&lcJFMu9|GF-hCoiulx)^d?Gu26Jow1x z6mfE9`@PJLrWji*Qv~RbQ1u5bm%KQ>VEFWZi!+qM_Kgi3rQk57N4#zW#L ziVjV?@@0}QxqH{VALxa?V-1|DLw^WGHrt4)2=+W5qP)`N3oOMs*e!FOqJ-bb9UrCf zKu!y{rUW#s!xG_-g$2YjGs94Z2=kJO#&y7s&;iK&;(1Ng3T=(b>0S@1zSAFd6 z{-6@_WPFO~9G-QdS#$H_Of3OGrCZVxYwMH`iF!RE=d@PJ&NtyaVdpfv-9R^}WLiIM zf*q4S)&XR*_$-7(Syqsp-1{nukG=lNlj5`GZ23$Fypix#Zf3_g-gJO?AApgl?wP8ts!j&Z z?RJ_yQW5v~<{A+DNA{)*uP{g(`#nW5)W}pEOw^IWPh6ziwA40hey{>aCG*VC`k&Ny z$^-fs7kyYh=o===GPnd2w=oL8u_KKo6iPIN#F9#K@e$CjouM?Kk?W3Ggp#2*L`jHB z2W8}ka`b@$ikljq*Kt9(J1q`FWr|W1xfiKsp8Ekyha@M4EpM7ij*bpB%$~^jOb8X* z42qGe)nRV57P>*y&E5gQi&SH()&0Whwm|cs2$CJ*b1wN+ySxuj@^DL`)a?Ik6G|lJ zimRpP?8Gn~LoJ0vy+i6wJ(#boL_^rPd$`=&tYL>BZ-={;8UApFnpzYVd(qf zU<2*4F(;EjWge$p`6oX~d(%N>SkSO?LHM2~+8pDVv&&HD2zAY?R_r; zUbpKiC=|jTWQ!K7MV8I)yoT14!uG^l<(V?dAZoN)xp8Oh%JF1vVhD4*Bv$jO8(y93 zUM{9I1~G^leK#Y6!jT?>yl?@oKT?lYTGU=Tv*U%iTE=R=i3)mEfBbi_2`VT^E>%tK zr>~UWwj7JC5{8(rDO1SmNJ3h`s-(-5t_fFI`|0__=cV=u$VKer7)K>EjGxEjOGHCH zxSv`Lo)^ziFC|jWnEz5TW>ERsQ^4!ZzU^L9oX5mo2O0E%yubQjK5cup(a)-K_`zH$ zQ$8KlPQ3a<7@bbV+~Q(lY%R5)p%RR*mBmy3hoGNnSFwF6LFr2~9-_D`AvBET2on{p zzG(rqbd|%)1t-NFUDk7>Wu}GuqT~-#&NHfXuMuxCRt#fD8iaf7d_Uz0!;6|RC$q9n zFNt^XdWbS>LD>N>eJNo28es7b%1Q?__fwo_KZe}VR2SXR*j9L(4bE9$vE={Mn7PJH zYg=L$H*-_5L~!QnuUP^*REkHKF?lIht5HMQBXheW$4chtD#eNW#Xf5pmpJ!hESkoN zsal&Ey_2$2HdyrdROcM^!Ck@Hq6Il4N;PZ>!Z`|zH~n0hXH3v)W`o%pxs&njUyRJ} z{pANjKdp%ss zLgt?bFDe@n(H};#BRf4k+%&jYI#N(6YyiufH_~Y6?^MO2*@&fvkMd4>GskW=j@RN% z97=o{$&D=is&KcvP}WN(Ab)dS?Ep98rXz z$|S@sgrJeZG+eRI3`LV_xl~;WK}wc!k@ja9eKDvO;)4}NKG4`=>SQ&MB-*oT!B@avHwctBG{ZJ;!+!Ir3i3@o zl2E^*nTAE%9%fF*YfoE=O6fiX_wl8lyipm4n9FQ0>d(gMWQ8|5Q@cou!PWKiNyK3@ z$15#Ya~y8A74VkyaYV3d_C{m*rOk9V0n)y~&(_qOlaE zM{2Xtl$?QPpkFUMrVsSrt98lFSN-f*-Z10c2mS5gr0-3RW?Y|c?MfybfsDZ7b??+x z?Iev99p%f6!H!h$hco|c6@r>Xz;qa^U^}sNU~drdkga?@H@bt2uX_6bJvafMGhcgg zBQY%tS^NeEC94;$`0?5P27yykQ~Vhs@83siIfh0>IhSV2Q4Wm%{frPV&z=_c_V$mi z%Ss>12v1)bwdc159dCRK&yv)1%s> zc>y0_`S}2_eSAX$N{;cDnS53l*cT6NUUF-}+>+nDdPo<9OeUq<0 z`9Zvaf>L9#f|JVSTK_(sO=-?beW#3jM3P7cKv>CgeIc+P!5G^v`i&>AQwTs;VbHa0e3j&t6-3s?^;nFBCK`G3NkOl6%L z0N=51LjpMGqviL!FnSV_?uu2QU%*NlVmZ_O7k0}zV79$$X{)sD*Zxultwy^XjRKaI zr5dw|iV*-p+I+tF(^LmUlTHqZ=nPJS2kU#~<>NNbLvHOBgEoeapIf#tGXrKXUEnU( z%4A6^Y>bjbd}!GrEGy5-$SFurF}gq_p9!Ea`X4dBcHHqpx5lsHbjuj1!?^>B#A}CS zdvF8tylOho*x3HK9)MKVchCFo-8&&{*mZKTP+DQfow4*n5zX?qh3`|p@XJPVrdm>9uv%!X*)olFTh})m$$k#>%9-v=1YdRx| zL1VaL`g^|g&{+Z{#LdOy!%bkF|9zL*a5B9aJ?hnFHylcFsl3>fMGvd_a2N7p@cDOd0S$vcb#b`+e03ckCo1P6fl5dzvF zd_*TH%Vd%nWLtXOf~k@6&>3~L0qhV-;uSdgPXNk;_Z!5G^Czr~ekJ` z7S_Rk8FNvMF(Hf9g>gGMm#P%K)%K?RUFU;C!B74?0g!Dh;GQFL0#934KQeuu-P`I0 zA5JfUBw^5Nl1aLNGg8dG_c)IM+CcHYsrTUwdq3Tir%?7%-17iz6ant0 zmVkkQfhn$W5*(uJtyWAeOD)P@T}MFliWdV3bN%wPOo)#DvQslAW|&Erdh$s1r3|Ku z83uw1V5dO5?DxEu#KO^0jBQsM?Q;)*G$|p=vRExJ6~IiN&?IsG1})mW-y?EHvwo` z9|W2uGYXCvl#60BtE<_k?C+67kR8=SKGp%jWf*~SgLd#cY=ETGJGBP|oUa&*>Kh-# zDAW&Jhz2p>9sq6;uVrR_uB{?=9*?F;T$ zgea8(?`8sk@*p{!s~(>Yuk2!%AUc1^7l{E56OY%{@<1rKI`ITSI+nyV7tU>=-rn%(;nwl87}yv1$Tazx zvsusxZ#8`r`6M~Z{`K72URiBbk8n#t8jZS@GasXowBq@EFph>)q$o!RK!a!|pC(W# zLp){yU`wfBp=2Ze9oHgH1*Vv)>X9Igsa+V5s2k8U-@PO)BinCi4NI=#T$_4YmOKWA~9F<9A~8Q*TN z-D2J^)O<=ss)nHN_|Ax+`YMa=bxi6_vc z;%Q7K_u&^I>sGP?iT-SU?RdHNmk8!PTqNxNy+p*R@)nM1wCA7S1?JX+euF^uBIwVM z41l~?uFB+N&wSHM+rY11qWGXUPPnR?z`EflF&IjJ|2RVpodplTnB8@*Gx9&k;hzt9 zlYrz?rhstU`+qZM;~$BoBMm<|9<)?khW2k=7*=e{;jzG_ar}tZmQ2sKIUi+ zb;Hs!9MpWcIO1XFian~-llrrt|9pT2kR>c!+W#5h7wW5%0jiQ7N=0ISF%i6=rqOMH zWRB)paxRW!8zm@aj?FMaGM$?XAvbN$KIfaH!v$I*lEZuWcx* zvRGtNxw|D%?Nka%7jbM>Jclh?$Gus$a&+OLUQ}T-NcBu(Q18@`rX-Y zvIbO8&XWJCpOB0GjU8da*P8aFl9^3uB`hq8&SAKz7XOqr4wx<*8{BUzJhdy0N2D*? z0sUt_dK&wtS{+;RgwwJI>55jdfT3~iVLx-kA`K)eP8609|3N=CZAp4)>~dNP{ygbH zblwSBaA;-l(3Z;n@MY9y&qnWV!h*5!vM2ldI=E_ zdg2iYh3kqsF;4*mc`NnTS|bzvTL^hk-jK7xG+w=bfvJxzfE4bikVd`=*Et=~5i!mOxe9%h8+-kOsWV+J-@$k&=v z=U=;8CS1#({fIK`*_`QrXZ1!`zT@VH;bu8J61gmX`T(=4ozWBz6&xp>lV1 zm~5dt+v??II*}K(E_((jefC{ctT};r4x09&t6u=NpDT_Iq;+&A-^p8adchD>+7h-^ zk2oq-If%DdCd%)Rk>S*t?zzqXq$>oJxy-o_`vP0l-I8{Ce zP(*EX{r_2Uv2$s1WM3MGI8J70{B%+w>F(a}bdc;OtDJN;NL6{z=6+vxD33YP8~e6$ zaHOL)fqh>oK}cv}?2&gO;L%=d>ytWOCguZG=3aQr`}C4>b0>>Q=%fYD_%8)a(k+D? zT0a;cRG-|<%LPT2Zs~MJiXOL`%BIfOd^qfT7!GbdS~b&ubmzCqNXY)cGrdW76^BDO7}`Pz@O|z=mr%)n&!-Z9fvOf zLCtFOqBBq8bg4iAf5}9y@j;6N9O9wE4krr}VkwoSfwU?I+zcqP7>;F$sd1|AI~^}+ z0^djotdk{34Hb?>oX%M!rV2pPZ6Da4sBVKCs7s6Q{Uo?|E(bF0ukwQf+S_M9sDnm-&D095;(CUIRKXbf-U-6RR3 zNTq57>3=7!&Gb8O5qLd>G!Bb#9VRF$C^3}QQ1VZ5_|W(P_2rAs zE)5l|Ve+I1Ar@gFpj{sdpULS+W84ioDq`CW5V=+VWZL7t_$lDK?FUr_W1@Tvi=#S- zI|2o=H+aGY1p_m-B)0!(H1xF(NEtT)>)CA`c{3T+GfL_ZDdMYik%<@@O^r zyh>V4uc5r4Hi7D0?^<^#IRA1|yL|XDl;?!Ay$p-7ViSemv`xtF`sLw9FyWQ>&J%D?2yrf3AEJ z`3elWgLswBZUKjo&C7z|SB(n+;lz=jmrsz@=phBjHA`S=zhulBe@+L5~r^Qgng3mUG6hx)xN8 zXB&em^{=W{5buePF-p0mE?f-dJnESI>aP{kHNzW<^=r7TT3#X33B#a^>+eT>I5{i5$o zzqb7dGpi@fx zb^KKg^bZJ8Kp8Z5UqiF{)XBvsq=cbR&no*XN&>$k><$;TljO(MrfOJ;T_fV`G5t+q zGQUSB8m2UG9P=U>={poh@b4Y5>pS^fReSZ(8rS4#`Q+jLH4sVz{`q`%4s){+Y;!$I z#e$8|=OsC}@F$!I-?3$D@AW_0E1*-#V5bX%!@x_B0Nwcm9(_s@{(}ZIArxZKb7zC3 z0S>^F_wFe@2aAgkbhfM`(=s4(7ht3nkMj6ZIVh6sT#9 zLpMn8d3*<%VkZ!Tf2o1#7E^Wvo$7mM`cMykC7vn|UuXKiMKG1_rhsoH&paoPqIJ4tI<;@%o{N+L%w*-nbEMiCe(k2-)V$O#=TV z8e&MiWcsP>4pj-gNhXZ^n^@B)=-2{Kyo2as8eh(UV;UxUffXaVx(yErd*~#9Egq8q zy5sGVLJ%d8(~BCt_Dh&ZYNs?sLxR;!B|sWP3$QI`9*@J9KLk>@OGtFJRCM|{eqKFF zgif0y^Y8xupOToBsGWbY+6f)p_nLutY6hAi!YM}rWK{m8y?Q}#o9YTC4D{puB;&ME z^@%SyTxx-b7}v!jgrk6}es`3<0Iu@KQ@nEc5;SoQw|R~G?8ex4N;{4ys>-RmqqOxN z0(Y6R&+tP`K;kuR;T-rc9`y!0fWqQ%lq~HQxFJF+=T*QTMC1n*GfN?MZ3mc98og%h z4*l(sbZb)1$cxunR0daFN;rV%WD@7YfjyK=UMAP&ggiSk%DNsGsUT_>ykKCtWw1Wx zI>tS52SeZ?8G?~{5IPi!l)w0D^YJrBk3`?7|6pU+o>kL2)oHh!)U0 zi`}rVf_jXohc(}uRile5zV63Gpl{Xnb(%~jQlTddx&*geEdGreba-8WaNHr-JE635a9tTf@>K53mL5d z8I`=)_Ie&DQk-MpmnOd3G7sVnGBu!kTz-Bso;tq(^YEd(8uJeVe;u>|1aM|mbY}dW z6V9okIzXs5z0PW+$+QJG8vJ2A-Ro&~1J*h&N(FKOp#z4NjvGqm$5R&I8PPz=2YoS7 z#Q%;zk@p)0j?sc(yqdoRsnLnmnus?bRgnpG4pKJPBW`lKh`7ETD;$Bjik75Ko;1|* z{a$4Fe7mjK{#8_l$~X$E!u(F_k#0_R(eT_yinz-2V`lG>3Xl@kyAX?q7qj9ACg3Q# zY0YJcwFb!LO1x=wP16t*T9CUhlOE?OTk*PPoCd3d{~rgIhQ>2}Go^$tSovvp47x)x zm03IkVTs>gXD!GgYq%*jTvNxr%BS};DsZddM0rW$G#aiRG>%A+2%*7rITt_3s;NNy z#~>qZ^eJNkHiWKZMONZmuakBZtQl@Fy(QInHS4>c2+$dOqm7sBkk-=k}(1Af{Dp;pzg53 z?E&Pm)xO8lQO+L6=YvwT?^r-p569%1XMH+{B$GdG~I zulVx=tsXQA9P+Tf$wb!VG`nqa?@(x_l}X9ombwo!K@$ zK^)tTwQ9XA>E+=dUO(d*-+q0T|M#fw;wwX3(4CeZ>}{G;vnLe!tu#ysi1u#`@miRM zlm3erz%y7-vai@)tr`{}h%V}ED7X;4{X>oa$;=}$z!t7$3r+^+hvssAq_9H#2dVxo zk-b6ysQp)3NV$K=*MFZjjq<+75hvLco4htA0)Fzbq5bdU&b;0@;_y1<6#x4dk-w^T zPAVtAd))u7UADaj6+0F{N5rqNj{xL{ND9p%keZcdc2F~I8s+e2V`OZLWz_#WUKW}U z5x8MU*kJg72uGcyM2)zPO}RF1C`7;6Uo9;TL42}Jy}LMFe_x-=?ewJ= z_w8dRSETT!6VS1Nvb_fjYDmMiA8IuN2@MA41F>jG-oDYx z|MHmsQBWYI01@f>IV|k&z54qzz*S-Y^$-6c7f(WfgZ=;c1$cOsO-tc&=(WDSMEdb? zNT`K!^K%m-n#NFX*D!zIR`d68qQsye#AFdtT-1R-a1s)ORLFmll`e2JHCTAs=W5I$ zTWQ^gsDE<)A}G=YMPw%0chIsI|Coj6W9=FiDHX~h5fv@8vESMv+8#B-^go*l3hc|b zFQH*Eewh(s76y9iEjA=IIx6Hk|DBw)FeK(<-13H>2Y zUp_#&gMtu}(OQRn=r$;BkK0U(2WNT=v5t+JDWr=gcgkt=!gL4yKTo_oga-_qBelE5F>2~A9P z`RpzZs(4SvBR`tYwr@V$UG1on%NBVnSjwuJEolpgE7F)=v!Y6CMzZk?#t=4!i*FSr zW%D1}jh)q_$d#!yd9zR~H+jl$J>BGDEx8D~fR~D`u6^EUzk(N~p3I%huoCPm`dGR7 zlP7RPE|t>}2Gce89juHKWFTnk@8X-JJ4vIaXEL#jY5 zlle`y%527z)#gF@hf7Y_jhQrkJDFX#EU9gggQIibe_iMI?!?=K2J`>M$wn$!@<4gw zy)hFz9-XTuO-(jJ)9`ItcB?p5U7XV;g2eG|#NO`wfI7I@R-LA(o=OE?b;sx7ZmDn9 z*l^ky@{;Z8UHNqbQelFE9B$NHx^fi35KQl>sqw~L6y~XeLQA@zez-(Z0{?3zS(?-a zjeuslG50OMdHB9N9v0C6IP{G}I*+Wr_3K@?c_oVke9czmAfe70szr)W5}GK!7@kNu z#kFqZnNxfEvQ81T3G_?HKC5=tk;`MU)H}zY-3Ym_IXnq+-=@(Q^aVeX8m?yyRTv$q zEgC-&++H+qjw_SdI%~jA5OPHHhnb%~(B*1F%0#y_@I>#074A0N*J4HTrE*8@BSzib zrB8AgP9B>RS+8<#vHsI;{Kt6!Ne4)|rD!&bxWq21fE30gPaa;}eJoR{W@;M)dU1!Y zeQX~M+a%=D50B1>ojs@|OZMip33N3@esKf&64B8#(S@S1)|7?wlU5uyp}dj;`GZk{ zk^r&sP3AZdrvu3KODue>R&54;2x(Ut{((So4UH5@batub#`o{33GY2iyNQ~Dp098B z#~fsh`;=KrHRRGdN3`$9+=p#(pNhJPz;@-i=EV}=wc~xipEi+ru_;cA7*6;l3N{N{ zVv4sOFGn{}@8$V7@u_h>u{&qX=Gs=6FIpXDA294m<&QVj(EMO%!!(^q1ktZf6l{%f zv?Ls9Ui@gcIY6EZpZa$-@ppiXgaHHX5J<{3qY37;?JJmq7t2KbDP!@67CtyseGPklao9P zT-kHmyz{zt#CqKM>RnR_lG*3jSN^pK5bZTCeJDO6h*xvDn%SO(u6J0vrOU1NekkUU zH8`;kLgxbk;9mP~@U0Va4RG$N>B-v(Od@S8NUJVoH#e}S3&+};=hUvx-W zym{HKd3h_4`c*6<$67pAd{E$T%{8!1M6q82QL~T_Q8*lv82vxOs zD5sP#0+=EyMMfqQYOR*&LpIy4{N%>$01FtN;|zP<+;f~`tfMR4`+WV8&plxxf=^ARpP}zCUn$Z#*ir^D@uBFnNF}!k?;-9_Ye*zLSRJZ! zzl~uj5kfE6SoOC-ElzV^C;7w0vkL1CM7Gww=q{o8Pg^r(*{I;@(IY5v6dnV_ChvB@ z7kRv-Dgq4RT2ZZH)mbtfRQBh1reL`(8#psM)k`&7a&X__%#H^h3WyZrbe)8rrl~6k zC+1UzHes~3AYPxNhG~+!GBfoWbmIu8=o2+rd0^J$n%)`_t8tkE zrYxWD(!12Y%dsCeZ0FN1!7WIHq{gXn6{WF}R4xqa$>0~ahcoj?8U9Kmw2cE^Eq6S# z=hs@ZEv|t@iS^ z9ap#5c)tl&PheZS7K7qC4ms~E?Y*$5kAfiPvfvh*WuB_-Ar{Z5oy&t77fCfi)GB=> zf68+@AKcYFv$17*&*Ut+Vb8%%vpYO)@}?-vy=DRSuF`OpITabQ&r$1R@FyLF6qR(9 z8ew#gH@eKmgPb1W&r~;?v@V%KxGv}4TkM`TT*yC`*BK1q9N%g_%*zGWzt4=hAADSe zzug=Urr!-EkEZmpqQ3Y7S7N?Zq^R!QWjrM`bNHi`dG{&mfwW44&YsFix_^?26y1+u zub`LP&EnXWkN-P5p^=z>x7(hycF`Y3B9alLr@8GITT7FrLu=(yL=jbAy(Y^|?+tJT z!37|HawEgmM!QNLOks(+61H+Pj@T%O6(HT9yGrRwi)9<=IW|nHUTM`5k9%|&s!YzF zIR7qJ@rYfTtiNn}*rJx7p$77+RSCmd zQ`bjc8Nui<_iNtUmyX0U3`piIQP5MO4}Sf$tdk;-Bfqt?-dnU_-5>3MXag~xrztGb3lFvBg*q^x0OMZ@cD z$j!NSQ2-=0JgsnT7NiIi6w1zgQC&-|nW?4UzF=rrdb3&m1*Y;^1EQ^QQhqRsLl8n5 z4R87CUaPD%^R#`)PF(Z~Yfdv%Ov4HH?CZvGg2dLthY{(UYS$#!!LQaBxxYht#x8|WF1g#paIW*{VrV##avV6 z#J1`#^T^(PG8Ta0-;#g(-0A4{`q3Jh>f8>$MQ%Dn;JN)b_@UL}_on7tP3@&2o?IPzyzV zu2$HLhMtlq*qF$3a$gn;R$P0FUpYLbsh1o{62YRBHeBh;lprMjKFUoTb<|B>%_Usp zDeQ8L^VxZi_k4jBdKkHA+P~#ydHYlR%-Z{qWUWyvapdtoX@|SOD>t7&>P9jV(v&f5 zJ=znj?9^@5vhbkhHlau1V>8+lyV8US^^Qa3dp~{iI)U|Yi6|rR3#}DWFCI$cXF0FZ zyeo&+B8pk;C*jI)``T1(jv7VSa8L&v)D-O*Mv2m5PS*0;3G6zxbym7(b7~TNY4I8l z^#tt1lXyW9|Gs?U$WN6wfh}!1`lp))ZV#0 zMBwX%hAvNikXUL94e06wD-oyH{=7|klJhyXH-|^8dN`I)IcXs)P^Yc5i@5an^b%`RW-s&7p$G@i89 z`;vb21yC*5@r7kG!c%%_>M#2z9Ek}GS7qT)W%ks8LtZl8^Nz4j`4E2~&&M9)Mx5M6 z#Xjf1B-=(AVMS|&{QT>=f`wU#*q6vwBXepp+qdP3&7y2RAL=WNlTuXKv`VmN$);tF zqUR}!CgVQ)D`h5r0^eRfi(TEY+%AKhBax_Ea?N*7w?mh4M^*n&Cy7jXa@ z{StkWrUSq@lm?LPj7yK}=0_=dgsKj(uBtTdj9W@jQV7#G@;|k9EkPv&y*4kF8ah9q zz=x-{1#Q{#v>4W@MsX~{LEcxQY~iFoh0Z!{JViGdyCGuZ`eRTanz!Lc&SNPbWK1;_$V=4r!wH)_jvFX+Ri}OJsiOwUHOnsj9NEr8EJ&w2rpG$7Gi9E zP2)fQ0S~ysD$LC%x68c2(>+<^R#_ncS0y+4tjpMXC}kU`Ppc|gFl|=aE1x5SS`SE5-;I51Zy^xl9@3)wGDwJya`eGC zh3|ZO*M|2n*-H=-sq%Fj%)R@utBE$w`^)6JpavVsscUj`mLwz!O6&UY^!YgOj?W^# zXWi>uuUC`C7O?At{U^QgxJRyTNBtH{Nv-sLGs zmcA<9b&ZSs=>*fY?tuw63d!=W;L>FsOsjgI^eKw}nwz?wk}=xv-My&Bg;af& zebh$j5(RNig4O-S`|ArBvfiA*)WA+$XMpo)l#g?87hRbwVj#-;!N7eGrCBe5XV>$| zrDCq~NXeKB7xBhpT4DF(rx5Pt4=Yam50+veyAv&bq*5x^G=$!*;5XP}j|p#&-ie2c zKl;e)2tLdvO=zwBD4S|-^Aq4a^Fh-j+J_PmS9_dix!Ph%on*>VdEXaFF5$ZkunSS< zA}ssO#D0~V;x;_Rs4K_IRlVHJ_Rjzb!??2*&scR2ll}C}ei*O~{ue#bw#fg_s#x08 zq(oOLSSA1j$#7ROug2Kv6GT0SaZeWKaD7?q|ILtrJQ)ufpuEIht^L(n^>0SF4KQ1& z_PXSMea}A{|Cf7}VqSI4|L5BOa!SJhPsYC8A@`S`{AP!IhyX^21SF{TzYY8^hm`UT zN|dj@MO#R9ve_H!>z($tOt&0TodmSMCIY#fc9v}O%}BHt**WT*hn~{xrkPo|`7`x|dEzi2k zgep9?^FLiExFi2JO&cjZJn-!Oo>gq5x<+Wpr_QI_ZJPJN8^<(PvT`dhpmt+bQfIyL zgockl2+2QyHvYZ>_{CH{RbqIomeK={s{c={lUJn)xq3g{TYo9P@niN82`gIJF2S12 zg_4JiO=6O6A3NzXs!OYt>ej3(+T!zo*vy{xWpf2KM6$!tR!m-U)Y<(^>Bi4tDZ8T! zI&!(DFA7DIvS~YGBJ_rRW*N602MKnYdWi}J9_jw^`9y_VQt<+fHKLaRLs=gSNwZD* z&Wtou{8L$}&Pl$+61vq&)I?&*wcIt2?IkG_Yp?1#UOGoH`n(nqsyPfVy;gyZ;)zXc zvX#e92>#^GxO-K!%H_lJJV(4us}VyHcN7(xP~x_XsQ#Yy4o^7IzrW^<}0x+@%k5to7SZ!*`q(m$XO@r*&PznlTdd z&#{T-sM)-Wj(1CoPwD64yq*_km~0LP>A`Co-?=JGZ#h*PbsF4R9a}c2og-RmEv`!K z)>f>j2_h(>oY(IZHCrr)wCQzsyf$OBdOo(r5veN>n%&O>f1obOoC8Lhq} z(<*n9ZH>MGm4CXFh5!$CXW})uB#E;5?6t=UBhGR{T-fjSh(kAj?dcTI)wRT2`p*r! zzc%@95-)FHlNJC2>|*oC4byHFV3zJ?-L!IzeVunLQo(oNt=$Z*=q{Qqm@}H8eQx6MG0*oqZo3ax z&qPiw{C0E)v2cg??!Mfrk*pwu3)LSjqmYDz<~Y%`y@LDU?N7zmEL{@*a~%LFHQ^)t z#OePB7sP#0t=KnSraK_7uQLp9vv(mph%^}1RDqfD;9sV6U$&3VbY?2=UW|VS+Y+@C z^r7D6NEXzH51m0Q^*1trK3WU|-PFh88Hy2 zd}c@u0sA%R426o|`Oz^R%($OwXs(&;dAcAYh0RlTI4#Jy#da5~-u>xI$?{|8N4Y{< z36C^<{jx95O{KhfYw`_pQk61kbtGlF$-U)X=WU$mrOTVAfr6gNlt`GA^!7fYwc8rC z+t@D-Zj;dV57*93bh37u#(fY5g!Gll-Y-W=hFsc6vhC*Sw-r2BaT#EsYO-7TGZV-$ z-(vs9CghJ7B6fx&^lX)OO&S~(A4}2~h-(#93MpZsi-qY-ee54rq|XbcJjB1N6d7f1 zsmcE6?(WO)bc=I34gW@^Se}CtBYf#ABmsB|=DXt1VXRidE#oif1UGFf^^6B?;bF+J z5MPMG#HmXqNS*qyv?Y28g?7~$O|h@G*hmY6Y0KXxNv9^ z92yp%+L;zF@toI;91FhPOX@T2^7@nBtRg?o95!f_GT~t8gAo6aeTqtH zsmM%^LcqqLrSkOYn%WJqrvHbz81XM^)G|IDr`Kg@k?}+z&UnUMUftlmI0w$TVxJ@* zZe2IqIX%^~i0rW!eL5Nbi?hHzeD7W&MvHYEwzo~F=%CWKSdH1{LF5?2IWH<=TsLK3 zdV|>1&iVQ3N-ePbNC{vzD$6D_!*2$%jR@noFSO^YZL@7Dge2lvMi$xX$E_nD$}Y+b z`Y>>74y4tm%1O(d!+Nt%Zs<4C*zK2|T5LaWa6UGoE5BSdPNiBShhz5iitnEq%`0dk zKVDwa&HDcUl(EtOK~EXv3wbdpF{6mv8*9w^mD~YziQXD1vH2PUgDVY2?Tpoyy+)ev z#GIIELbp}21XYR2DC>6GU2JD5DD1yfuvX_dEcsgxiF-Y;^SknV6uIM6l%YwNr(sfy z1}j=ES@t5J5F1M!DHRZ^FbY2IP^0aWR`mfp0tH=uzNvI)F=`UTEO}^LNxJ>#Zg-*; zb5M}Oa6cB?`4yvRV(q+z2KRmg`LofdXnfX6f8=7l_(QBwK12WF?_BYsgRccdsQ^ry zRB050x^yQf+;UxzqPUN9UsFn}0`Iaq2wk~4@W`F;r9I6fu3*BPLTcVIcG62aO1srn z8ATeW7|aWT&57Pq-g#kNE0#nrfpmH@!{LAhx=>we<9W6bzKkrq6@EoK@~z{?gEwxS zNa_cg2hBYuUjXH*gruHcs25Xrmx2`CsnkJ6c|sSG`N3!qHfEj*Ulavs3q#`)h_7oK z_Q9dYw#)GRJUz!bI?~PJ;BZr#_;zDD+FQyIHcA|IE^;N$d^Ihu^k+G5(6(?*H(TNl zP{-jOn73T#QU0LF-(G=}@%k{rs5rnIt{Pbf2Rpl;?zFJ0=cRnFHmEfvn{%z|h^$q^ zL_Wuchpg3zY)-1ZqUJw3lakFz->E?)$7jaCfAS)qf7reSrEY*%GIDY_@BN-)07b%O zw}r@1Un2ZCwV3903LE|<<&==UiCGO=cvCHbKC$W@JV?zU2Ue!>Qyqw5N|=sg9eV_bPx#9(v0AK32GI(Qu+;Tiz3~x3UwrlL~7Ye@)5ow;@Mh#Y&_Od z`T79uY*l%O!-(A%bYiuydq62*VZ`7oZ>zModo$Ua9nfA>`7NlyN1`~ZwZVF(<7g0{ zpkC3zhfg%F4#$Adnw?DB$`Fr3OZ@lj%g{jz;mFc0u_31qdbRjHNRDUq&nlb!2opfX z-jIadZ_e_N0iMThrR4;Lr< zWfOp4BG-9r({Lv~@EpwDN4%LzQ_T7G=P-#hZQf&_DAUA#O`Mqs;r@oMCH0!YQ>zXt zVtvgbZ>xyn_mFL$KpnLYTGfx%LAJO*w_{Vcv2vnzGx+-@v}TA^g&LgbA*-F1NYD% z=)F$)3!x}URCs>)vdZtlA63T1*ZNyEFQen|@5uWUPq}uX4e~m(;7I1UpJh0$j4{U^&X)sbn<`|PPX~3g|{hno{At)xx z^Jj^K61@iw!blF~VJgbN>V=zw)ykPF8%keVNJ8!J?s*EO1O-e*AxQ5-NDDgvX1eSt zH>}S64kkq-MYhHjz3N~dEA2B^Oe!iiV%Q~*bys#w0-`4m7I?^}$oP?zZ1KlQgY9m? z?v;q+X@Zq5p@p{%uS=1@WlMxrKVgrehYyGUu%8}O5Bxd*olLtBD(`|u(c~^`IKZD1qgF+7O4lUJTDIZvl1`@S1rxH@GmPS zk=R^4fJ@%!P>_5<_kr0QFq-oyJ`|o$IPRL(Z(%}Fhj4;q#`SM*<|27w#3^;mE5+hy z*#|RfJw-PtGZF1^diSZ=x;*H0kSZ4*-C`OOJ z+z@XZ42Kh{GZsBvZ#}-1{xEIoQzofi{DOAQLjiI#@3|<{4s0EV-}0KNwYV zCe7pp841d=RAMPu40y=6s778Z{{@(CQUPuTCAap8&*C4*Uyqd#a_Ikz z_4$D_n4M0mJe)xKOn>+st0R}DJW@+TGKd@cb;!JJc=)aI2-w>7Gd2B|=}e@L>F2pk zx}!$AyxF3F3iT=7_0r@wxhp+lJe2KOgSC&Chd-|umwMAUIIJ{anA1Any!g8-iVl2s zntFo6uU+mVi5lrM+OeEKE)n_#jUA1KW;jU;qIgO6idpz`Ay+v3Q2@TUd&y`KJ7=gf z!OjCy_6)raN6jOeK}3e6fbm^K7t~ zvo5>p{ye1Lr7rqlJ9-;=IW-UzKh|U}ZOXyU`PsxoN+8 znsY&6@x^Uxj3sq-EDo+%x@CwZrLD2@d4zmRxHIP_|iZ?vjmtdT4|Dt0%(xn?YKYEV72nx{yfn>)nSV1^pB{#92N8CzF=dOgo-V$n+nRaLN6R?vC;hv1g&)=BOp4uw)WSR8a@B{SQKClpvey zGpw6xw+C7H#6!y5>#8+I=nu8qS*stGUxM?FlDB}C!+cdyVlk9p&#P5Er8{G7-PN{W z2D38NT1uPywBjHGPc8rBHrQ&l)#u8-j9 z_UG^*x!`Y)1`p%s!MVX1fgK*k=CbCL8J;r}J!6x2dHu_|DY5wU=N9?ArI{WZiihia z%v#uBbhYv5Xox7|h!lZ7apKs4&9X%X$dMdk$}|Zeh_so3-zbMqGGoI^`^17CekEC+ zu7Q>^OS^otmSbH{y}xP8LtC#W(g*i6^~p`zyoj#Nbg`PJ8>a>D?b`h*k;LHtj z(lwl)@M+ z7qn+_Hk)$2qg^vKMKT#Hqk?hC)^Peja26FUGmKSY+8;r>uf8&|Tj_l5X9{TAMMp!6 zA*Aj~Y|rKt^R-o}a-%RSSkBIMwRI+I3s)5vU~}Q~mYp{#V^OmOKadA@_OHm&&=DA$ zE!KR3#7f0Nc=$x0(vUGt=Qa8uzJ(o7<+sdI7}sZer|gPdazyhZNpth(9F|K3<@V+C zxAY2)VE3bs+>PYv_DycoQV3@p~_@wYcAq!8xzh;_2$l4 z_bOny*>WkQ=dlC+j5{|4^hH-nE0%`NX6?~hk>;X6N^o>Xu`9BE)kB$!QTqndPUR2^ zvF_^mTdCZz-ew58o6z!Ct_d5Ae`6^~wpDWc`${37$#W!gYGaQ$0WH)ZhmHixS8Lt& zv8_X5oWs=Tum>|9IGH3x-hqJ(S5Is>@B3}@q0TAyDSH;(p4%te%mGANyQ;{G-H~64 zk)wq<>!%_r*+i&KD`Cf%XUSQo7`UC8NP(nm84iU0!?HZo|N~bi-MZuCKLD8IfWi6Z1*?8iE!SzX*Ce&2QFK}EW z>`{oSsiN9XfLa>cNTZ_^3rXXx0e)9K9nIi)zjj0A>K=K!+Za~db z%0~JT?g~3wX>W(K$ihr1`f|L4fTPK(*{+xG{7Q7b$@r9{&SlDs&}eWArVkCM{c_nj zz-2X=NGV@HEf9#QR<^c6MhTa?D47b??U_^@ePq ze`e9yp#h9Q<}Q1mT`Si1B{MYOe76;8u_mb+(FC6&^Vg$cnfZ9lss^M3OigRt9(q} zy0VclkaV2BL>rGTYvieZ8vZ-e>;%Q0ujYGh;ffquS_*it;_&NDeue@|&|F2d&-ZHy zzM|OJ+P2SIH~Y9cn3*=DP+|RJG-_Z^jc0V*|1vI+a46BLn~b_S>@)Oc+>#D8iLUbN ziAC0atdnieYOI&~H5PK2E3j}%%l<{bb5;}8Z=Q?FAM|E)>OugO`fo#eFM*-S?iCBE z;{KkFzjw2qb4KscZzM~3w zWt#f5q(dkatdPo%CUI>C@oi63k_vV75cQ}e_o25x7}I^Xy^qoxYCB4!uY!2=6ezHs z^AYmKzNf4-IB}H79V;N0QKYr%(Uo+c;TxP9BYDK@ zK-x_4Mf5tH2X9eklAkpW_LAujIDTT7=1xipfWLWD!;q|*M&)V18gL_5`BX1sISUEU!Z<76f zo=i5}6%(2N_6uZDA#V^v3bKVbvXkxp^i?mDgo<_GJ|`8nV=uC8dH95phtS5_5`1GT znF|ayRW{F~8<2z9D#=-^X&K1~?TVs`X6GZ3pb+;UxlNCs8TUGd+Mf(qryl4bQ6D4L zq;2qEP|i_qr-X9(^jCK_ic&hg#z<^dVQe0%T%`iH+8_`mg@r<|Q+bcCOb0@ZlWEF~ zyvuE+%OQ8Aq9pJi^GkPv9+&5oyVZ;v%1yq+{Z=fg4+8mB${&|ml z6}|n3zhVU87BKx9Gn96@C8C{anhWyd&g-Jnwd&v$!i-TNw_Qdws3%*#Cw3j=iNM$8 zE8IM)G!F9te=&((4O*0E8k;aQ$!6?bY0YlNFjBceuDIKD6zKG$C_K2IWV|R z&3#44koTbO$$8V(%kT@0=`~HxHM^iY%#&*blVjj$N=D*#D1PGUmi2X`3g%{$4$TZo*2WF&C-mclEZph?{IEwpB0?d#i zpEo?1pX6VQxk%e66-+bJ2 z@!HgU(%*?=Jjheh4)ZoB-L94}e$#ks<=bXQS2P*Po}b!mGW*KL;p26`ZmDGadz(*X zJ=OM=2pFu8jm#09YwrV3xAMBOq@|IR?ymD7#^?K<-&$v#KXkbs_kQl0xo75@Yi93{Qn8txaVnP% zXq0mhXYxFnNErlP^D1S=I2E`HPV+qJ@06b8Ea^f;aT>BED?3}k)%Sa90k1ov)H>{^ zJ{@#s^mys^(Ny$mWmQxzpri7MkG(sLL7s+eklZ~{$yl=*q?dObZa$7R;UtZd5l-*gMj!!dZ(I~*D}>kp7dwdsmYO*=SjToGDXuW z<(kUUnkFHhbc9!#v)vCO> zR?kkp-Zs_Q+g}sZZ}c*mx-h}gn4Joz-D=&$9UHT5u4eBs=rrUfeVM>C_~=?`w!&_b zoiQ43N&6-5$FyX!gn#zc9d!@Uu0b#5rv20<$CJpXk-L3QoIWKTa*2BxP!Ff_4%lNa z4w6UlMDqLGIJ4yYe3LCX-Hc=G_mzWsE;~ozTgj?XF5)xn1@7dhlESK9kgQn6>IFe` zymScGa?&vld~333^2GgWN9JZR29nqQg1t!PwGhETm22Jn9E)~iV1OX*O@`1?q|WqSQCVwyN9^i$aOz7F>2M+R^n6Uui=Dl2yZ5EQgAs&Tm z{qkkewhUOwQWeK#UMYjIjVi%PF{1@8O#oJzDf5VE?{%g~-?Fg(HUWW>mrH@ zec^p+uiko~_)UaEj%Irwoz@lmhKdHWd`AI&_Xi?`$lS$IWA{S!Cv<3-#P4gsn%xri z8{H~y*i5A#A~reA)7a^|vp$~zzktdj*gxf_B8{Xbdr5J;T$p2sQE&Qbwh9p;p4Nz0 z#XYfK(?;`zsVwatwak{}piEn8z*-2(Jw_lVdI#k5O2u!Rq5vF2$Io-?j^Mf%oMU(y zKtvu(x4b2@ImIcI3kZsyM}Uk zlS>|7swttNrsz{Pp{~Q+XrK`|~jN4-N*peUGt!?W1 z^Dbo1#03kQ?6x*s%ro%b09OeTt&*$c{=%XU(K~db#E%|<$p_5wqyTOT*NhYDK$*G{ z2yZi*%2eL}0)o%UDzc1tq*{w+XgMWl_n=v;|A}LFYAZPL0lWgX>u$l%xAw~5XgIa& zXL11eW4e`3Pfb{X!{$I+J%vw5U*C%Hv&wxI#=9d(ubWv}<+6G8Y1cPX)Lv}0br>b| z{GGy%i)SirhTs4u+O5I&Nl45ro*Rw5ESObuhp4eP3;5S}^a2r5M>}C_wpPb{{eFsP zF#XX6Jhno9sl~BLn>xfJW{A9i2!4#`P?v~Ib7yrOWIGJwMT%Zdeuc{Rc_NCSb{qN9 zgWH?ecelDJ#@lw`)R}YhJEoWL7P_eqp=L&#_5x?lZ?Q@H9ZmZKV2Y-98rpeZXANv{< z{G;skl$dTK32RAM8q47@TymJ(edv9Z9%p|Bn2S($DLguCX?cfsG9PD{YLk~O zYy4jZD@A0i8=3XZfz@)dn#36Ml7m-b0xA%&2HbEBEa9s1#BZecNb7kZ$(1r+Nv)q z)KgB)e^YTAKkaB6)D~fQjU*cewq*UIo_uzT9_r$lcCm{)akN6enlMD?YZl%u#RZi?wmx zM-txy6;NnWA$-MCRDvw0Y4wSqNy-Bptg( z)(~X-+p+A-HiN=i-AH|z@1GBho0l`jQyoY5)Z?s$i8U}Eg?YOvw}ICBaYR~X@>qX# zOk8ZH`p|Bt>lN;N%u!>rxOz5_IV;f?@2gq;`Ic1eoT!BC(M9wFY2uhK*i=*{97c{w z&~HmFAgg*puVSd#4QppHQpIE(MJ63$t{%r|B@AJ1GwG>RMOnqv?%m?RwD8afYRrst zY2!}kr=-Svi!Vh|b+9_#I4T$NL*_|&gqj@e*q`e{2_-M!r_XNtx|s7`6AY`2WH^x^3v}}-&dX_=)F*ia zknr&B4kKE9#v1R_bj|aqTCRCy$ko^5oQ5))mq$7@YjKTVuAzm10^QC-ejx(R}HUq zvSe~Pjpo?P+gqYuHQ(+bSZnivB2aEJhMF4(WUN3VcRrd7F*0#xgN4}K3UZ|?j%#OL zz>~qakh96LaU|N(f>L!wgl6TXukqt@biMexN+E1x3kF6B`r{SUpX7)7rTN$ifu>mP zHIk^iII;y)vCq8{!7Xnz`FB;NU=9Vf?8Tc(%*pYkM}>~p#t1tB$L*$enV3$W&3{6U zW-Xd03J5~T%Mm*hXZpFH94_;L7uVHyCb?4&zjnc*;oyX`b)-(0KrjHrh+op?w+o2i z=lNQ*L1)^f>j5C;GXn{s=06|eXLCKiLAQ1lC%ukdkAWBPdTh#Ix2n#5dq~tMKiW_+ zBS|9C;rL8+75oE1O-*8j&GwJOB6n`iWvUB;fcxm)s}= zfTkZm_CDN+;!yXKKjp-b>zrY_M%DhBSlyH(I!oUdaSZuK-m(4%mLsp&u&3zgfOAF$ zL-tH~hg(5L?9fUr!gzp61_LT-S?{3EiIA(3 zb=p2&Ztnu3Mv}S3JO3x42oI7C;Cc)_+Yh4lh(njduo}cjEFt{Q zxQGb37SmDW%*BlASf;lE{=Kn2!~C7MI5@y<8WgnOzN3Vx-XN%F+0+pEZw4a2AY$Ck zH&g0|r+P&AolR=HAMZmgD}vdh_7xivVWjf+Df-A-&B8pd%5uf6dPH<{kUI<&+9jbD zuCdgUz^+SYh637s*&?Ts?Qsfzi3Rb>y^>)K|If63>1Xaql^rKsTcho>v3mPM6iuG; zcL{^^?Sm5|jr7qGqmI2+&%U|+Wi4#_l=W5ctS4t&n{Gh!Z_|v>N-*TghQBDZVRm{eP^MR zX*crb4D}Zq-JHfTKBRs|@VqCodp~PQEqAAEkx!?N_V>`UQ2kwGVyAL!Fte16$jBjh znLQYi2Use6j$fZ(bZ8-e$^C(H>p9WwhCZIg=ZeWy^uDr3+7vgcCAK4klN}~Dm{2-% zoWzRf7WT3$G(Fv*{B8kXeydzYgYw@HbpGBF6Z(ocMHYfG*fKmp7Qb@7Ytjmt?|LP` zt!aGPN<=FW9}qjcsPtDWvFV=s51zHxhrWIOMkWPL=us2XqYZOtH^(uxuYJ021k=z}{|(dc*7 z{?^fr2tDA1%;H2(v&o7QIGC2F>rRIku%(Q~k-#5)pb4#s1_6%%zKa?!}0-D-`>ptr#R? zdFGsZS$?I67d9}f9(uu_tpy<^1B3DwC@Dw`);F|R%}jZC`Kj*p=xIbM;+`D?)W;G9 z;I?OS-Z2`H!bb%>8#@Ltd6_rNJ;;+S{nM{(^+KNyR@|$&>Jw?OACP+P{&0$O;2QQlF42fv(WSkazm& zEivl~-tNq!Gj%E^JA(G;x1M37drvnz`?lY?W6^lf9Men}x5*Tv(30v31!!F-lF?rf z&4sBxkH(=~N@+8$QuVO+A+;q{kNA|j@nb22kgtDSY!5*us^#r7OjY%Eh&8vWuo$lc zj~Y_PIN7_7=+%lr>+Y8lhJyio^*?=~>g=?G)J7D6d;@MeYG10bc88?R5sSi{w%bqZ!jYV6k9ApaSLyKNe9t>Z>lcKOXv8c zXJs#X0<0Ru>2er=yy2&p=?DZ;`r38g zG5I7l>T@`H;wVS#YG3mQfMgk-uVSM=6&DjwJ|kT`r@`?)gfP9Sg;d$%R5Y>KDv)%*`>d!or1qV58NQ2HuAV}cuunFj3T|;D#0i zQ@Z|cT3cy9U7*zt1MIQgR4}kpL2XLgrmov&$P0sfuj3xfWk=%ra0^3vOnoUJb!B72 z-4-0QgajWH-_!f+wh|DN*X@&Bin`ab>mRC*r&Xz0L}v}^(SONBLI|t#RGW4+7~y}1 z+!VAuy8*y45sEbmjT?+ch}~gk9FRN%-LX(?^?FY?=7K)-SUayIPOrr4kLn8S$QQWN zjN6SNHvzx#Sqb=wK$1xPWGw_w@OsKvpdfPRc`0?P`O++6(-cb(z?Od3R$s@MUJeyE z5{;h3v&KY~#Hc<;m<}5$@fl8N>gEV~Z@w{trTe@?Z95|({B;6{uz*}U>s*blb5_zt z<*5ybQ3uufA?<>?s+5WEyRbHwDeDhd@An2>m9(k`7(1O6xR=r zvWMIh2ZPEYtx4>9A~38D=WW9`gvSx${_ElWL6z|7= zbtbl!BvDxu#gdG%ykdXUFWaqOWn8z}tJDzWIG}pp;Ju50Vyl&-+-7hsgm->PVbY{< z-I{bJu0PoJZYP5BbLb z1$=2Ib4wyrUaMTYTg|sLCD+a1UxcJ8m7o@ajK{GqUIc;_+wnea^!OxYb=s11F%l19 zqD@rcfyi27SCr^z1_M2$SC4!db_1YH0K%6zs?=cG0pHB#h_0PRJklz&hP2?H_$QLl zM+Q|n)f+pE5Zf66I|J5UBOnNTJ#Fe3+KFRp3p0Jcw#r7K(^%N4i7hTA5Ex*^YPY@N z))3Li*5-P^r*S3r488K*JXK=iO9n^-P1MZ2L2$L>ujoSRo}LyQLNq!wgD; z$6tpJj#NQ(4j6vr6em6d`;C$rFPsDTR8UA$wSza^Wq&RZTSDoecCFr*ngMaF?cX@o zuN$g_J1Y)z=c!D@p8E$<%w?l!G+e~{@<#I2wn4BDY@|T1N-O$uHVN5I03PIpbYr7! zz8kfZe-)43pe!xV!K~R(#b9JKYR3oc;cb=9dM2J2p9nZiBl_5<=R16{UXl&?cVsW8 z=|VTsJ||tFjf@;ARUlALUmNlO1G^P19gGfRRsgA}%B| zz2Q-lolsN>#_hDl5OeUXGh zeSPlKC>&nPO?*(c^FDBVbaE>&biQ}Q#1mxf&$$u)Rw-b8EA(rrnPi|i;N25OrYF_D zkdOxF3@vQM6_d>h_-;2TXKjBn>ay{j~+bJ)KKF1Vf$K6)Xy-`)T%8t2WNK_%~S zVJPt4_e1^95O6 zv~VhA=*P4}9)9yaM$!B6^Dql*V}H&GV0C%G`xf6WFpH(v|E?wgVj=+s{TkoR@9+Dx zB8IIIHUz{DXipj|b-QNe-nIl{&LL6_z23c-kuzB)=DV3-^(-dBU{LzjJuLPFzrea@ zt|b(5MUr7Q*f;e3v_Xrfui=Hc)4nWo+0XkSB(LiqXmeP@f)V?=gh%eZO<&&oilC+) zz*q6*?n)FImIj-DFXK70)r?r&XpSuH_d$E!CTOeIRD#!Uq#bs&CTxoyz2y;mG>~Gr zmpG;5(%vD56cYcKJHldUQ)aq9@^TNh_%Ei{z4;v#@J}DY?z=gme3Q=Vf5Bxug1XKB zDKb*}q{A)220zS+Wh1C*5!yU4la;jZ##_G@urt_96d=+f0WXUge`X4a@=lyw+mlj<7Y3vaCV1PPQ1QynKNDB>joFa z+13s=k4cu{hR~*1gp%1Z*+$B9=W`j4rw&b`%d{pYGT=_iIrfltQmJ6_HZzw8d=Ojj z5-)yxK%__an9a(PW@Wj1y&m=WPq+$zl31^_>SAClzuEg7!FDcZcf@Px%pKAjWChdYWN3^<(z)V$%?B=g5Rrz2k;*yeQW81dostg5&A*(@rPYT}{gVYP<*`NEKIJsDFJa zg?L)()9{L*r?gM4tNq(F^ANffkfXYl!hR^2VWoj;eQ7-!bFH{K97C+$Bpl>etCmUM zXZE7D+oNLO@9W5zK&t$!Tukw+4S%2gfdYL1rkzY!aed;n=721_&H+dW{ePY)If+F!KA z70iT{he_f<7pZ~rKY*GAlrlO5*D-lre{k2@3Wp=&?q@k4hni29fabJ`2VEi8e^b5y zMd74vCy_+vMhJW9|M^!XK{Va}sUy(UbX;q&K)QonE!9gFE6 zB7z7}T8dlXfmQXmE&#vsq%AG$Q`bij0I1mlr{7~~+FRt??GP)DNx72dK4ap|9bZlz zy}E+B<9ma@FQ*fBJ4>DQf{%qN*57-3c~C)$)pnGA=hn&VihW(>&uit|5daKY^EqYZ zJ1ka}ES<4cvsxQaoF&rJr%!ss?5JWdp^f)f56HtmJU|s=G&MalGWXA6&RtxBTf7SX zZuu3)Qk0r^4ULtkzwsjneQx@*Zk6bbK8$~TJ(Ts_R=EJjrL{{C!g?nhx;cB2mum1I zq9fL~SYTA~G{K8Di={&;2=M2^UmcB*GS` zGFw^o7B|`>Pt%@<42DDx%2P@&#h=KZJGxsKv^7vc5n>n8X@F5UZPr_iEp6{uqH(j%MtA9RwR& zWTsZqc~6ZvP%SbnXuAsw?6@al)eY7wii)evTmi?#v`ZBfcby85dM(5|++l?~ZkoQn zxq%9iW)bAbcV9~RQDbFVS7wVa$|7xo0MkO()=Nxu@#cAOWikYG27m3dXaZ~8~{m-K{=m5i&hlRn+3e-9*4IV2NM6<{K?hW_7 zR_-=my-cppn8ZIj$cX{}bW4>%EST7<*l_h?t)2i6v_HB-Aq&w-44@rl(5tw~ug9*4 zMU1hQh9atR8BOOCI3lKSfF68$7r|_ccP*AVR&ANA=S{>HhN?V37B5MSjBEDO&z?^c z=G;Qp^@sw;?n&pt1pVODm@$`o?kxoAwd5 zL*6=eW`H{d3*b|g=j~!A`%HeZxtKKNl;hq4S*eO$?Muy=6o@o!T#I$qw7a{^L$svvO4`M$vfYIU4KTkz z@DlfOXM&DFK)U>C#ktAZUccQz%AF-v_vO<_MR*kpIXOw99H~+*OXz~QC;~4X1wl0P z4NdpGunZUBHN95bfQJM;^={We_*H3>(OeqUfqPB7ZZ+T zRn{nbZ!<$TsFMH_cMK1-X@Ls{s%LTqYEnLnMvr34=h<=?JIS{-M~mvL*?(uCz+>Ff zfsYtsuK!t%vIxgp*a|dRdC{=f9e>Q*Xcep2PMxmT(WF=Yvg6E~^)#{clcIz-t@m}2 z9{T0MCV}*Naj31+Gg|R3?Upjt@`o_j2=P>q8AZKCtWIT0x{D-?E5?d5DTsP^ogVY( zvz?L{6!%^VUFHH}mGd>ecC+JDet;WL3w$ldwO!!pZ2zuLBd4QrOo7e0<3_F z3iG(bAr|TkwE3^n0PLR+R}Zipg9uK$j^mriA;GEsxbYiL2`4R|L=rXFpDo@}&^EI~ z5X*y&ex@TeFWE6>%H1Y(+cYTD=sI1HUJ+JJ9bc_EEhkG>X0eP(K_~nQUN`A1q$)jP ziV-^*leQ|R47`!e4(2gH5r(FRRvV`8Gv{e8Holo4xqGsKnCHLP35Oa;OXJ`s9iMVH z^EXnZrQ6U97^3EN`Ifi=rQ_{5P^Mw#VvqOXY7Zuxl}02Uw>B))V)Y5twT`FW$*I|Njv;C6!GYH*(i z(&qR_onVZXb!2&@Ohr{@u9}LLr0LHe(mhr z8osUOQ+wbtsy}cU0i>yh?R-P5ANBcAQ7VHOczSWrn}unu$4^nJ^h?#BoS}YtY{vOAk^8u^f3y4C z;Yzt@mu6-q9rNY+OSB!aM2F27ZA?Dsh-6p5%ZQAyDNf;2+cg8`d2Q2)@h4Y!piCy` z|3W;Bjkt&)5hgnJMAk&@hqeC(A42Y>8Y1HC(zZiMX2_ zS4L~qUb2oa>eN_2Yrsq6e%Z?q?csW{5fgA2;7dUM@pw-I~^ zz1(U1s$d8zCs>Z{*sSc3=+gI9I1gHw#2*heTV&T1VEJV~vpLcNz(aa+^1L(@|sB%R3kbpW(@_T_ZwG+NjaY zM;E-o{bFQUD0jhAU)?F-@xjpqL0|*vcCR*lGvs78xa1F;*%V5^as3OfFOgD4$Oo+9mmWaNml99N zMct@!v$0(F?Tq>O9j9vM@*IbdJ3~S6alI2I8bG7-SF2RaHj5E_)|aJw@KKLOq*XT# z<_*MZSRXq`3}H<%p%MW;yfb_WwgPilMwNCO8kTLFA$AR#UWN8u{q>^Ooj5_WXbK2S z&b)ODOHdM07KY_MDmQgH=C}q#NFUdhH*2jK>Ndt~=GEB%9Tx!EPCLD59f+^uq-VoyWi0zx4=KP!tY21_=df*Sr563>|=gpogXX z2l=0iNOq+V~#C#W{Dwe2Fs87s$6M*h4MyxbsC z9IUFxB%`K(bk9vjy?*G_e{okosjYw2@J!%Od<(SQ017~6v{*60Z6?#Gcq92B@!HZP zc-Ivsc8aAsd^kkduapy0Rj2rU;ZrFTXxP|cy~?^h5wHde0fH3uzJ`+A0wI4l4k-5o zLY}H=b#sJV<_c@kquXs)5h#AYCat5}8)G4V7E@=bXbt5Pqj5`4!c6ZuXn;PhNVI|S zcad4Rz_L9oUL#Jd495$1+<#-HI;WA#&|ho5|ClBxs4aBNvbWF}oYo*XVzp|#cG0i` zTVXcFZqlUF29yFBYnXT*&PAeOU!vuKud~F?zkpkR_S%VMA!dGDkmdQhu9IG6O`R@IWqMfsEWALt$Rf` zBq3ZP@nw0T-r=$tL!SC+=gMfI(I=oB1L`6SZry_2T94QLdj(=m%DZX9msjrVcvMNB z>*^$0RCN^UOw4vJR(K9!FSxaz-LfQjaY0iG-Fmy)E1?BeJ-qdTv~)X^*FROyjjXOhR8CL%&7J;%en<9`eA_?E%bn6#?-7`CA# zy?*!OB8nujbiO|@XcdbDY_<6!bDBB_*eJMmZwSPP?^LF3$6Hy1JW7+dAZffn z=R^&0MmHO4AlOJqwesQ(XN%HSSL=^3aSjgcTTJa0tt`VKX2#O`+h-j0Zfnd7c=E!( zz$1OV8Mok}po;V=;VA*a%&tqQEj+UC+%*pQ+famwOi9#UM$E;9%T z9b%j*QfsOkT8$iunyW7oHaIqFQh2WaL+Uu`+8s1ftZ{5`B$)}1XsHnZY~PrYs$Wgi zR*q$^%@Yx}t{i<>U>%2}DIqYn`U>{+j%lkCnLX)^qr!Q7nYPlv($@QmZ%yB;9g^7< z6UVhUrj6rct7lu5rmt)LDVhxc68InL8HNWq`h;-<01`*a> z8Hw1P$)>2q|79v>IYXSuTebvo%C`z!BH~N26?v;zYu=Q$wfH}Sly1gz)u^?! zVOt%Qm2OueYB<7{r{0OCA;`##n{{-LZZ#50NUis|B__%4ykj zcyZ)R!}YQ}F5x^;IH76@wE3<|iS|maXrrTK!1)j`jCaj0|z6SYL*k zQ3!T^Qx&xYk^!sR(lYs+BXD>M_(Eel3OXL8|^RxzE>fd0$47vy=VE?&BkQkx<^P+7` zfOY0=(~a>bCHdQ6e|!78#grswP;JeNml-G3HS|{btyZ$7-=sCH3&{YtsXA|~>K}p* zblrF?9k#8(u9*zzCtGN-M73gSRY}x>Intp&uiAR1ZuG+hZLzvb(2B8fA~D&&d4h+W zLByXC+ihP--++G6e>UvUo+=D9(M4~dBo3jQp`j_A+ITq{FR`?|{H~1H+q!5$Lt*;o z&mQ%IYo9b;cVeK%#4S(U;bij=LA$ed0W+j|t0hQ7`0u3@1^@rxx30SeCx0Vo2kNx& z1RT*_cAh2Doma1=3c!Vl4~kb;0$jEf9_A3hm2J1%ZTmEKqSR~TA$NtEe7X=&Li%_A zz9qF6xm)^Y2te!wfqb<5((gZgSxr1I$3_1)&lOO!TK88dbky53@m zqe~|(#%Zx&Tuk~Qqfk6Tb8$G;;$~;fu;L?V3mm9Xah2@f10_R*p5c=5Ij)uQQw;Zi z3-zmYNma=&S3wGv^rcwWvVwJ*iCy=c!`J&~nq0+EFS4D(LV@x~{w|-ET>y1$siEC| ztUv!Hbkr65p9rMEhFqlum0veYc>jke`$m;xSGA`pw}8F#VjAPllLC&*D^##a%QbUa zw6KZVBiAr9`529*ru2`M0g1?LiEMiMhXU*=}jikZ_Fvg^rX5xSC_F*IF9= zU|_=^qGekIg=>MC`gv7dbqBx`zd4Z+UIlMjO5&(`qX2A9TOpi`zuuy`oc`(9Yj@+m z)q!%y-t7QvpaNN>*)8Izj&Fvcx+c;5$1)@wCP!!l{=&dav7z(Y{lUuV_nLDM^9?_7 zSJ;{Ym=q5ywAJn=l);}o>CC@1gcPX7Xe8D6#Tc#MQ*(?gxiIdxk~0b7*#~TbxnBEu z@@sdG8dy*ns0kHf_N;JN34BVIJU~ennDGCbcAF#tKuI@Y>b{Os>ki%eitCJ!{Xxfi zqARs}hM{;ER(5t~8HARsxPZB}#=?0I5U5#8uH$F89OqdsXKr}x&5`BF^eoLB^Z$o!oKaRw^e83YR4{y=LrtnY~xgvKFtY$^(6NHu3l4C87anMBG5_t zy;jLpYU%{Qt=;PRbMB*p`fRa*l5ZM{-~@eLxQ< zCF6A+Fy=Yw%OBaiG&L)9%oU@=06F^e4|QvVTL_J$xhYE0HC$mc;Wf1M>%LnXyfN|)LyWAy!ppF2*52ah; zhGg5{ek>$i(M3?t@k^6AAAF9?>b6R8aeGHC8Tv#iKWqD(?ac`W65$-0tn742r6qM@ zGDKdVvOx61E$rOJxM<3v=~<*hh{9Cmg?NAhfY*}`@Eyb+=Z3YMLzs!6HFm)De0s?5T2Yj`qs>(LP1 zFkEq3Q!udGcLONeHui-nJy0PwgZ_Yc?_!DfP&uHr#AOct3@9|@o&qq%=#CEPuF&-n z@8R5!ukqyxo?gg85X1di`OAMRe~9q>xS{9f)8unB6>M;Z0`d$W8OHttI7^Ls$xJBU zuV$2zS%&!kfFW5_fE%T0iJTAw!p8zWf)6FVvZn1xAFeL|9NDAknTNNBCY$G?V|eMm z3K}lEj=!N5HhI}TUQ@vCh^qV0f9Wx_{LBbsxtQuu`o)13n!wyR156IylnKT^7kS8` z{Q8jJ1Q5r5zM?jUrj4Qn_85bS-6#xDqhR*^zijV>2ex;EvpP3|v54QckI54jf40EfGFHnZkmKI_+*QMSN7ZRW28{h7hzdXsMK)wA9Ecw4WfpPOV(js_#c+{?*m|J zH>0hY7h?R~9^~;UFrPu7Kw#W@tU~`kv;PdNR&jQ>WAy(zAz0r^{Q9b!sXK+9zp+oN zq&ORLv1c+rTI=4fe>f#FF8`mY4zJwecqNiN;d{nqSA>9;IxCD%7Pnh)`U*ch(;L$8J7@gR&?7rL5!)5hZf3N{Saw6^eDO7YK31PK) zMddqy5C91ADcr$6^tJ^d^pPw86GGFLFVEve(2=$cZq=B zr#+<23;UXXALXC5evf(rG>^2jR*nB7(7$Do4-m8zrC9g>EWYCZ_GtMxgwsQ%PXYDu zf4~`iQ4lo?;Ze!^v;|+i>+Rvab+f@?<)|$YWjIVgLP)hfl-&_u>EF zpLUUZDLED2{QrfDS!+I2?9AVk2>jn!`kw$BB;7__{@9S<-{y`1)3j;4)Nud%=y3}- zarH%#efV|%MQgw2+@22{_{^K*>Z3oR@%wB?0XUBR)}=Y-KLR52a4?(eW^4Z;V&v)p zO-QbG-KhTWJRwpa`aX>f4uk*b>hBYM)duhtPG%1!|1(eShs!2EH8cLdmyHiZ$cY}9 zVT{4vQ|sxnQAWG$+hg6)^UqTHD%BZJ_bf#Aw0Mu$Gpze*PiV&w_m`l$x=KKN{a$TwRAn-G z>;BlUrk4ofPIr*V?AKolL7+)$eUDXr8?G^XcSzhIa&FG&bRVr0(Ct;Y1eY2cS&^$9v#~KL%=&}6})ake}s45vt}~_DZpEddHSfg zFXv6F%bPrTc1@j>TFvrf=RJqLy={|XNjK6K`KV!iIZQpd%ZDjR?&{u_A`_yEJb{2J zq7iA*6tpUP`ve4MjEE+0YNdQwUAI7KgL}1h;l##q!!@C``R4LtTVBBMw>*e~BJ6q6 z{ml4ehilKdH}{ea2YcAqEhIFez^l+i^gn_=7b9Zl2!Y_pDQDqG*a(e<&UP8xYCL^*57?qDw-Q{7j!7&;lgm!5ENR{xZG*Ha-~Hed9; z#GOf*x4yxY%QKei#}zV&G^HHo>kI-v83X}^qvkhl7q+c+w6pn^?N(0q9Sqmq&~mwv zeg8{(qs=Bl=_i&#vvJ<%Z^ee4`vQwgj5)bgj4f%&S+u8EiiJlQ&60fcz)qxm>}^f= z!zlVj%T@awIw4;?Uo+Rvp7l~ZnaBOwRiKQ0?97C@2b2; zs;5bIa7p-H=YSYE82hj%yY#z7T0Xc9s89>9KwXqcJTYG}go@92Ezv$5Q>y9NkS+~U z#ju}952QG0Oyy5|?MRJwiuFlrn`-rSDipqf-jRIr1i||C%je3^lJUT`8MfNzdE8@K zv{mOr6`#}8gctcW7n2)c=i8!4!~_8d;9B6yeEjJ(e&SwnK-FHb-#E9Y*oM`aAWpQm zFez6irmB8Hk8$Gr+p(jW^I)0$IgL&3{<6t;o+_neui|1Z(7~y=<0>hvDJy{3tL&k9TIHlU)Y2eonW~t6kwFkya&OsW-Dc4K z>@0x4u~RFtX-=vQzTUYIYtUMXHCc2B7ZB7eOF&RR7pq*Gth|6D(}zcsXFg)OFYG!(Ck1++B*cn<=Og2@-ClhH^g!}E_nvwavp8+0!it@_@rnLnv5gn=MwMYtl(40O9z6(!q!JIQApX^ z{W(VzcQNwz!~UyMmSo?x=VP84V@uKBW;wnn`J}I1c&~+|I0p3XnEh-Wg(}bIaem`j z*4KONkj&{s%!M|9LR}p>@v)g{V@k@A+S6*Q8YO<$^C^8P?&Jnk>@=5ieF@Bd&mE@$ zblLRv=M4oshwN#@<>lO%FXX1*Lxxu^qKpnqKtn^5Lzh`SbQh%!J&S5Q1U zSysu1mDlN1^T+0O)Fu?qXeft!RzJIPVr`#$I2mcJfT z#ww~K6+fY7=0Q@uZDB9O6CXE0T1~2(RyKPoG{$eW6>cpB9lp7HRegS3iorWi(`1Jl z)|x4L9x3%^_(RF9VKKgk)_2MjZ^q1Dw*!uzpG(cRB;e0mrhk{#xWFw zu1C9elkqzD*`^VwSn-^j%b5E8dOShf)YkiPgSv9>^Ll&&!pikT7aiSay@UM7dXH0i z4NL_s;qO%ZmwiXn)^F1Cq#=;SIjss!;Ra=YW;-$5GFawbrj0_#Cnq|n)ZFB9%kmT) z)2u+kTRa<^ei4#B8N}WfIJ%J`5K{p<5Jc*tBf>N^~Ro( z$OWyht+`yNtWAgN8h?z;J+~Ea_qbgnQNO5o9c%oFdMP@i-zyqS+y-e>G&mGCwo5{kUwDXlZSQh`?*X^ zzaxMnTVib2Ya0?*DK>zP$cME(*e?`uwC{hLjHpiIzjw5J_r_&p+Rb2C`-ze(&Dqjm+fvRhe?+$|`v(R)vPd&zs(88#X|Zn2-B^GC1(v_$BkW z2bp>ygEbb-agoQ*7qExQ!LGpmKk=u+%F=P}F3@r2&V~WO#o>@;G43i&w3ONv@(9N2 za_l&N)qL@EL6|`&F|qB;*xQW$;6tikvHGU(sPSu^Zm|&7&5Eg32}-KW?0wX;Ykuik zZ6*nl+Qm$v&ArUGr8Tt9yjCvBB^~?W%Trz4*iv z>+YCpU7mjJIZu9n_xgrx-@vdXNV9p7g!J$YgMlB%Z5$CuV;ZHt{NFZpJz=-*_O zSd+?w?{2%P7}}|)-@TFdEGg~Z=g7Yw)iSMipPR)l{aX9YRiF9I^MP#$2BxdRz%bml z3?7Cj{N&MsM->2+%_KSwB53LY3+?}2BvdNPkql7+wq%fsR+abYxlm+3hqocn||MV=Dt1O=KOsx zzS!u)l_gUR&GzoqdouNF+Pe9A_Lu%2InOHmd z;Bo+%c)%%$BP4LaAI_@KJ(_y0FdET=g0lPkJ zEeM7<4dRZ5hTp6l8h8Kq?QlkRhr@xXOe|B@)#%<4f%;^KJG=2e^M{T-SqTpg6)^yT Mr>mdKI;Vst031mTG5`Po literal 0 HcmV?d00001 diff --git a/doc/source/_static/reshaping_unstack_1.png b/doc/source/_static/reshaping_unstack_1.png new file mode 100644 index 0000000000000000000000000000000000000000..ab0ae3796dcc174c6a173d64cef214b6d3f17ba2 GIT binary patch literal 57978 zcmeEu1y>x~wk{Te1b24`1b250!3pjfoZ#*fg1ZF|?(XjH9^BpCUXko`_I~HwKk&x8 z8J!;8Rb8uU*_@xuL6EGp7#s{X3a>s#K-J=m?}r9NEr~=*@Rgwj)^HWIJUBvO&ZGB8ajO zin0Q-?fl3LMBm;M=Xrrf>Ja9qdl3^lLfw1ZG{4-P)Vq%iajR=;KG`YT-%P=S;1>Pt z>yt&L0nuG!FiiBCoHzFqMi)?^U?YFrh?mVmw2mwGpG1P z_2q@euEk@V7leNX>iRnL8d}Jj1mufDnz+q7kOU85T0{br`XCIXA#qVCFt6;nF3`mU zTNGC-{X`l(S9KnU18{x;!pJ^Q5Ja}4i=I0OLWggW;&|?LR&TtFl%brin7h1GOAx<#>|WHLr$W6K7>~^O+4Ztv_jp1^&3r9G@Yp8; zMT)a%8HSZu3rmD#l$_wTdTnhb`2k{*euq=yj2cyiS~THGLd`S26AQ#OYwTA=Y!7}m z5L&drOJJeCAFb1&B$!arE`pi~gRWv*wuI$+5aCLGSzA5GwJIA1y}p8VoLhq=8P7Ir zoMipLIOQTnz`KQEYS73tdK$aS-pFT>)=dan!-H@tOy57{lFZkHi=}VTIt&*Ixy_HT z3$BG2tg%c82H~6DQXj{$IqMKgL(VPBX*waKdB_eZc_^`vC8E0$bY%w(4b!J zK?omN`VwooLgxLVWbsQW81WRTUH6Ma6}cLoMcA+ENJ;o&AEl?su=s76QWfwb-Wg1@ zZ)gvLoo{vVzH1mDUcN*m63Orx*d=Rpyo9U~xF&=rfDSB+G323_Y}xltVxr&ZVXB>`R)eTN!SaqtZPEy=I(p)U@EK}!6hnZ7fEJvraRU!rvn zwJl$Uwpt$+4W80A<|9$MgT8Z7H@&!so|MkC7Vkdw;@%Ot6HcJdPsa+Ai>7qitbVfal3 z22!FpJPcIG+O&X!0iRa1H9@{+lMQ196^Qxq$>L(K<6^Npj-JPT_>dKh*OGXM!u1OY zEH5O#CI$#~{Y#(U5{S;?o3?3V-tx^l3oMt508l4>-=cS{ZEz4??-|KC(jlh2S?NJ? z+F<6uF;=~YTnM3rRC;}z3E>R9xvf!AeWO>I*JXSh1n+`8;}?v8Abp$9pA-RV06#BC8-ale;xAM_ru80tz!xSXT9!-^ zR@2{-|0W&(3y8~`P@%~&wp|PuQ6$-r_p~weAF;E2CIk{Q4&_{BLG#`#B2%H*L)e3T z_HXbr6t2r32OGiFS8}>6q3;v2fLAz&eX6AaXe0D_!Oy~G}4+IleMA#~o(BuIp_gf;}Cg%t#CQ?@CGacRhk>5K7cf*klPu#VssbQj1t zAvsk!KQz!c2-(@&@z^O`vb*TIfV*hlf4Z-{yxF4w;u0t%4q$ed~kktZZd2=uf|#L!6-n!WdNUpY5$3pzAnkOB-2fdr<I%o+g_XnmU-ubqGGZUUav-`3czyDUl_w|0!B|Qt~2XI{`fnU83-# znV>rVqH?3!Lv&bzblC`53S9$ZgTWH{5`l}U%fsdIe&@aDef_;VQ6Lb zsv#;cssZXXDxe=w3iXw&ibaaDimVoNEQ%}!%y}0E7LKcrYV^x4stPOBYi6rQYdtpz z_Yn`q*2Fhg`k{_~8~S?i|5iNkr?xj3>&{PRyTE0hzP6K4XRv<=?MmW39PmPrq6 z4}XueN0vv}r(7^*u&_6u-kQFNgBW?U_a^0?8Wbx$FWw_b7u8R41ClAqDxwhLp$~_m z?CwtDPR0<9!h&7=KZOmHHdSKO6u(Y!&xdH>!HJv%S%&HgYuUT#i|8YCh4q^B#CAW0 zIEJ*Q^2Z~ljwQcOO^Cjw?5E#zW8|EtsMn`kS*~i{c(A+vd>nY~dU7I;B;Nn&F2Xd% zg$OPUJ?ffNlf0SUouQL5izfO56N~M+*SL4EY8SQ5%&EYYZ*JxIyuiFW$2mtfN2{&7 zEx}g*X6$A$-Z{tRZrR})N+4UV5eZ4 zVDw;~VAqeRAEgO;3Bq}J9XcL`&GL2j#wV(yy(!)f+6`h4a17cGcq-m1adTz67d$i1 z?ln!ejzFY1tC}dzYmjMtwn2AgJwEKU4cloLE&NehY*ac@s8tGGiq)v&no+4#UsDrS zN&7;)uXotJCwUxyPj{egSsY(fYOZ71*9cb9rIlE}Z*RYwG+!NdKs9@S)ZNC$*p9ywQs%eGHD2wDkEPTXHO*y%UvFKy=Boer~42J@9OO3}4? zo3-4dIeoDD-6bDt3}S4n_EY8>0~oT))i>@pno%53tjOKTjaS54wI3qxBJcJ4DN<#s z6>JoaCIcp)m@JtHj7W@?cPe-%-GOnr&ps=>o?Uj{xkG^6^bfTPY^iiFRaI0uRH@Ly z)`VBrdWw9AKHBj|i;X?jmTu)O?kQcHkuC}@u6<&C;Hf~WDMykZj;_&qqq8Y^_W*uc z3<+Y20fHI94KMR+q}2iDokb2F2)iKUdpwml!{6KWDXke%X=AW%xge-ng60H+b78YR zyYl)JN5E8s=I+^J*u!mx&%a!3U%)}FzWMC0;U^^AA-vRW*InO*L5hD!75gr7A|fZw zgJO~}naY@Cl;TBoEnpp#498G<3xVpY0NRvu-|1?nagfE0#nHrL(sX>eFso2<%stmp zy>%qLqpa;qD?`8VV9c~#d(mvua%M-xM8T5F1iF5&(daS%0TfpWM{xs)@<+Bzt}Fb@ zRI+gBQIFEbp9jm+z*NU9Ylcuq;3*YA=rP?oEH zm4eJ z#Z|Rkm!bN;^0}6-?Yb@KO8Knl2E(MS0<`m7Qkmei$b9C)0d_8j<)8QR!{#)aDPtn@ zlKb&j2Q=6}N}ROdn$VqjTE2QJxLkP09nLi2+*x$94K5a1u`C}qFu6G%F3XB_y1z)z zj09?uA*?q(5ghM*buqY@E1KO6`kMAMnfJ^DUYe@V9DzTD!-&P|lz;wlm*wngAUQuW z-s&VY)jgg8o&K0!m&4?2dEa+hY*MB`fm%U-vYrWsNxu?fJBk;_GvikNVS4vq^Ksta z&kBE~7@a20C!Wg#|8csaq_wCP5yWK>V{bCdv27u1Vf>V|{T0 zw|45@sFC|P%}A2q%CzK^-o#q3(iBf&9SLjIFQT_WeX4R52|4Wkx3EEzjXq>(WS){d z5xm43q=O_65+z!^(gH$A@`v&RpKIiGWwGRGKHDnn$P)-;wrNI7*{U4W)g*_NjT9WG zFMoa#g7gSz4BZXk#jZ_8P3}p&OLSF-P!m=0S9M)fnJ-ujTC`R7b1ZQVb#8Y(K_TH=JDtv>!D37&<( zlG}#m<$Qd>=fdAOEEWw7V<&zSSB5WA1^X*wI&~g*_4K@CbUUtA|H+fRlH# zQo1y55ct+$($zg2y0S(|fYxAxN6neTB>%;iLB)d`)?0GP#x9zx#RbQhlk+8^rR3e+ zmX#^h$^itGN#rp=+au$+=Tdu?`GVX z&zH{1lQ%}mMJXtjM?Ja-mt~Ui6S9+|?}~0kZa)mqrd<$V1hn$pd+3+jP#Vsk1@?t+ zr5$~cXq{4qHoHHVNhg=c8TH8rDn<@pN)U1~2S6IC{kas);+EI;xJd8=o99#{G*z30UkZUcKh z!t*4)A~;I86PzO! zCHho`d`5dVeHI)>x|*`epXG<;17%9JT2&@xaOEiFu2$N6WxbKZXZ{K;mE{)!lF?>) z$?1#gH&Hxsu5moyx&66un(dmjqLG{Knze}^#M9FvTW!2d(vVpkj0_|qp_mXPMYgauAfI@9`e~HOeZB7 z85v37>kP4QQAc|WTKg!6?T1l@)Ona4Zu=u~?u^5Z>8w8}a`kwaXMf4NRqH?72$VsM zQ%Y^px=w#I>>kRWW}1p^`h0F2I$EC=p0}(WZM!CGVT!kxc}}*TZ57WqTdF~%vDHHE zxYWQ+ON6;=t_OW#C1AnA>lFFMGZ6!BD!Z<{GjAVvA0Wy){f>w0<- zUxJBSJf7Tv66M07%Yt&Y!8mzAMSz-$TbjSo@aBs`$@O-#hMDqRT&2W(8!W!v2i-3y z8bN`IfW{x69ylh3ica>OQA{L@DuUYzy#HW*73b8d3h5rx1&k!Tzo)6QKv%N7P(0x3 zn_yyQK$LFZVeMu!3(Ss99H9$MaF5OxwYFCU|;P z1|I!ugXezvfr56b)vU8C+U=8|*B59JQGES)mvjBia_%w}Dw+y~7mWP6|P@CA;S zQi@5p>SMM(d9znLD(%>I&Q4o>3=SB)B?$E7RZG0T(!H8i+qfTJS;k>hf9TD5`RWC- z3yQM}zRnMjczqp&CVotDNRD*WDIc6w1}E5UqIX^J{pQYmNK`&0{O)7MsBjYBq2V)9%$AOI*493;v)U<`9Y$Q z{cD7~h`ZG3!>!w$A%s^OY;x#3zhQm5WN$6K!Eoo^BFsCn)IrS;wua1_cD zYVKShnInnLToyg(nd%wu5+mo9UY878Tx`{5*wVljbCDx|=C_&AV5KxBrrRs~#Jj({ z*ik6*l?fn=o?63LnwrK?-reJg$`TF%L9;#W-7qJs&1+x!H;=_f- zpDkFAR+WzME*fF+a=Drb@_05~{X8BwC4MQGdmb}&Toee5>IN%kSeY(+U~eRrd8A#R zT{90Jj?;Bq$h2V45U4Sz#;S8wKR@4eguRS=A&(IkrXh%fkfpyZ@_xJi;NCY-3K6hRHv(@s1 zla;F%A-CM%{847m zIA=j~j#*B;w^Lbn+0t&!*R=kE#6;)H?3igX9Lhx_SDrwkTzZVA12RW31Wr0CD;aIJ z)*H}$Z)X7lA#)+uP8#A=k^0D7y{L`44KWk~(v>CU$n}W(56z5|48c3AZX?A_LV-^g zp)hkW1Q>T1bOfXCv$<41mnfcXR(X;se)f>wdeo6Jrx9D~l%%{8RsLCu#hSycT)kae zUsd@FyG6labc?s-;t|~cIGG1BJiry@{^!G*_uYvhQeiFSrn{#Z9p;;*jju`2 zf&EM;xqJg2Fo~Q@Pu508@E3^}!9&VV&bew&TwlBaPP_}|g|#$yOZyI_1kvltCp2`rAT;!)*HD|yAKZDWRI#M4v%j&69PH^vJj=Kur*G{r4DS&`*J8=RZEe!2+37ss=Ep0iSc!+=hf)n`sdYF!w@b^#b%y@`Zq+|(&tZWPk zS!kJQ>4|w^2nh+fZ48V!<%LE6x*Yh%L;S_g&YF{s&e744){%+U%Ep+EfrEpCj-HW@ zk&y=Y1&yt-SM$xOic>>Ha9cZ_kSI4(+5M1$0soW|IfDgf#Z+wB0!-3rH1(eL4(q=loCg0!l( z0{?gG{18a#bs>ah=5eokt;4r%YcPJRD+t9%0TK0Vy_;z?1&<%`2aoGalVZ*%EIiIH z=%;N~(7;2AmS{B>JiKg#^L>n`)g-o~E>^FNGnvfUGp5$4lii;!JvqBLO=RsbA6E&( z<>>fj)ENNx=E8glt;HA@eExsQc^hZGpv(dOgn=jT#<;bL! z;Lv<7i0SP>PVLd{4CwcH0}FA$4(EEl8PpYqx5#dP;69cvQ1NA)xcR!mcrAf|UZ)uCCiZZ?}ODJk$*X|Y`5 z%>P^%2d|sL=h>1{_U>#OkqVVkURcX{3r*APWVty9gUN5vDe-q4BlLli)euEgVU_$V z5hr|9Rrhx^%N2qJ9K;5*xw@|#Awn(DzxT6~ zH?X-e(k~+a3V^GF5FU417|zd+^~c0BEaV?!`ihUW8tfz@42*#hceu$Y0w%@5Fg2Cg zWTHzWu*v1hl*8pRFIVc^<9DyU>5u5 zq9K@Svc!|rj1A~iitnk1>zdv!{+udQmeCdidpz6dZ7dr6J$?LONRnRbjCyw!_OFef zs)Mmv{)#x5Ee%wEDffr$pgCD;#IUpw6&6B#uUw)Lsa|U-B7D&x1yy&kGYY%xK3pbx zzTO?&p#F8eE7Ekjz&}5@T(3Kd)ncLV`RT!M1>IGQZkQgjq?!r zpN{mAwV9Vk4kpWxyl#^;qOlC2xAnF=VaGBnE$-QNdlPC?r2045l86iMudt;8Bskm1JEesz+R~k55;Dwn$5S_8lYtgATA$D<#9c%!{c@u z;|_F!rkz42O6_}pFhF>|D%WWS+`wk7kEt??7S2G_Ahr&G?SM7`ySCTCldUv=fq$NM@* z3)S0XP#wVJC+%eLh$k`Ur(T%PSNLC}hwsPIsLwPJsGsRP;LFsoGvI81{s|SX5K{y$ ze}#>3WZ90Poaa)@mMJAtqU+0Gx)%C_4XtXlph;Kx?2m>s3X4K>F^>J{d>(F33YhrR$7&VGhvZ@K1=4|G4~<2KrJ(>$>E zYhp1*1MaM6XG*oL6c&5^_BlQmC_>lVP30>jA3@Aw3}r7YcNQ&{>x(6;31OKp_K3u~ zPdA-(BJeSP?>y49AkeD`zmpSO-l&nLQP67sdxL&E zhFHbI<+RJY&LQG@e;q2ULNZ{r+?0Q|il-ZvE1d!}ll-QXAgZV>>kDR+_!lOrBxakV z1r`p6!-Q*de{x|~+wGw!I&I!8%BL{vr9$P>ef>*}Wj^JAuXT)Ba2uS+vzr64lC5Po z#2rp+!(Cb#m>Ac%OLbqf9`6rIk&~WKA8t-cZyc(Q55BSZ%n5>P@i^JF_pj<)^*gD@+rzFmIMhP}Opa$1E0|%A0-Lp$|AN$f_K=FP&J>9G@8uTGneq5+B zu}`29^e(2JI_Xd=Tfd8^(`G#g+*;j9^PJh>Po^It+h(nDc)b1Ue7PH`x|Aj;2@J8x zcgX_;oNXPaKL7`D4I*KTq66sy^fh9wK0vI-J^B#O;F5AONv1 z-oce8fb?5rUM|qJZN%TH&+#NKiNy?37c@h)Tu)>k$bRoI>D!umm%uNfHyHfLgtNIi zkVkDVH*>@zJC-b+id(8n!!}OUa&L8BW63uo0CHu```On0e&!G_HP3vM0rtiK#%iO{ zrk^tkNxdWR&rdhnm2W~(9&fQ!k+F&YlYF7G{=qQ)<$fnWbd zU4P@dF;(Gmhw<2aKjv*R0|k~QeWQ%6wxq;qDrTC`kLUEjHZW>scn0Ej(oWc0DuF1~ zpU9hiW~I$Wsew1`z;w~3xt#}DqOT1>g?4iemL+`F=*Tin87!;9NW4+;KlOc!073UB zZt!m+uOmS{E#K4F@z;~Yr!J;I!n=n_FYW~DZs%sHYax)wvqh1>LUbn;A7mR_$cKZ|8 z**C=Q*5`Z4Df~drd^pBj!7tC3lQQeOUtwy){4$BE-{zpcm*r1lvpS9|6P_%&4?Pko zLYG^gK#Wrvot_vR*WN_D1bFO z0a4UJR0Iwe&ic#Uq1okXv&|cv^MRT%n-kk_{{HH~9yw!wx-fY9U{i%Ha|G%9JB@r1 zQIQG`bm@m0Bo4RRGA!a?dR6mDBy6VZBIf;oTx+%k=H>Dc#D4$%nPM~4y`TKMt|~)v zG0ys09TQf+^p6TR*gF=;PkyIYWb1GMh`2VXB)>*5;#e>JFWO&b+EfqPF7_}duvNj^ zFx(E4gJ{^xJib0Z-l^RJv30Nn_Vmm3(c)|gHKTOP!!Z}l&}mTugZ==J^0!9$UZz~N zl#10pm!b{FP)1{xj(xW?J142KaKikGn;OUK?xNc0P^^Oh zrWL8gV2-w2a(othE))o#s7WdR^V{F3fQ0`mmUGb^>FdU}PWIA=R4zhz*I#5$NKNvg zinY$ea413U1O>oEUj*jfSL6%gVzHV`kT$RU{EClta{2@NdM%LPqj->`;6c4#pX3if z8nN>Wk~H{6qf(ac0q@7%p&MEskd6?n6X?~Gq|#YqKCg#+f62vr!R2sB|Dcln5$z)3 z_tN19ixdOE(YeZn#Oq30W%b_dk6v6t(ICdl%B&76wImx)7XVkSFbEnqOo_F7d4Ae< z7Jpl=*E}Rc1elT!0eB)j2}LwaI(ztFbscRv!-{uwJtCDYO&JNyoj&s2Y~}QzK!r8q9$T} zeK=1h8AsiNcq{!Ia3Tf)v6cjhl|bb6>oGF_ZXS!99guq3k9nN-CdgFlZL;q$q3Zfr z50XY#Q{91#-mIM?*Gu(1gi-lKwbF=%j%Xf!!KQchsuJb|Uwt9Nn z9ud#*!ei1C&RvHFC-BKo16$fBq`XH>6Y3gRYxP6s2ZyQv<-s87q`AJ#io zMU)tVd1#CAwavCTY`4V&;qi(@i10fr@8^xv60geeD+~wYY;KM02VGa4axypPxM0H`rcbe$LbgAP~wpA|s3u9xB;hFe}EIlm$)V)}F2 zV&-}qJNfShO33`h%A`{`lc2-G@wmvLc)D$` zkC#3qpqSN_=%-EolpsM=C?oKElxwitTUY7kF4b;r8Sx4RGwAg1D^kpla^|T6f28-i z>vo_4Xas1knV5yEd5T9Iba3stZfN zYdjy%#LBLmBmV$5Flc^_i9eXlExE1T_pQYMi9Z3j(3R<8O|cYN1GIx|zI-<0qWn7J zTmzLtfhvZ(?D~YEJK4nHe!!IR7Mk6>k~KyQeGTPT%=OBtq)+j~?-8`W-DJHug~2dX z7Z!G94?2Xx4k(yBtokCDf8?HDI`|%h9N$wcz4CRbPC1Aw*FHp$Owi(^b@J6Tr*QHD z>_i>z`h&Vvv*B{#OmcwAO1aN@PtpyJJN!}OIVl(G&Uw?PM8<7fMW>=cm!`ik+8G2U zdg1fL_-jwvW}yiUE>}EXrZb}3og-E$eKFqYCWOrklbD5&h=s-ENF^_j9elku_wxc4 zxCZ&Bgw;cvZ-(NKRThh^i7aMuZIXf38fHG}h&ZxnE|$1(+|daCn8RWDOkubM5y*x`_m(|f%%`FoCtvgTDXViS393_s#S!;avwZY z3AJ>^;(OzCFjL&1K6g@Sxzx}yBXS-1It!U-paI>Yg?rmNKyb?h1NEaK{&aP^*6D+& z07dARLM$K^@D9t(=o=yl6f}?feU(O?wLsu1aS_T|df%=IT;-KRZabFSdCVAs87BWV+uo;04hn>7`q~yfvEphxA8yjUs#H)@!$V04)3tQ82>2yXAHr zc8QJ@POwJ%FTQsJJ!ZT2#G!-mXF2};-gfeO=0>3p^glICubWq~(-VH~4}<=fx&%Ds zsVnf*9(?|||0NWX2*-?Pj~+D-bw)ooBw;8{~fgdU1|TjZ~os2Hn))8fYQ~s zJD$~<&D8pM9z67egGpmSk0eqEKMBN7wyY`CklQ)M0T6aZvbTQ*Gh-V zzAfO5ZD$;6IST(&| zrtks^r_m7R(N>k#M!6NhJ}!rybxZ$FQG`>m>Ujr&IqO+s;lE5T?k;wG=zC79 za%Iv23J^}e1>D?rGKP(Xy#R8vPq}}tx4@S)D{eOK&KRI8RDhT~u3IGf5O(rvdyMiv zd;;77n(0Di$EL$eT-|c0%Ip>xn}T;g7)odVfMF&>|sMKyxAmh!+@w%&)qy z*-mHcdT?2OtM6YKp8NCNPD3a%HN0K`0b)0CXMz z{vYnE%8*MX{Y(qyUB&0N0tECKD(P?pJWkSV$@m$fD8ve-WO`knSAPJR{8L^h#R{N` z2;}8t5p~r9TDQw1Ak@cdHM=-dGd^4&eE?XAqgxE2Fv?Sz8bI$MxNFVliP5MO$DTkM z2|RAw)uaf79^naiH5iRYia2yzDvXAyz#-q26$W#>>73Pam5L{sno1CABH(fuDheN) z2mDw`0F_@p`YL;ay)oDw%b=ZQ)C(JD6_iII6(j3N7o=_agdYi4o zd!1jE_6JJtDtWR*S?~KE01@2abhSMS5UU7hHPALkQq%$RP9geGvS0p-4XW!r)UpiY z@0AP?I>axNIiW=w_42Ub-y-j|HfN5-?WOlQ~k8f|r0&qK2A5 z>xbqCkKDmuMJ5p?EEJ7U;yRbti&n70cpjfr0HTRrw0i<@?$CmJJp1xxBt1YT;)?~8U6_Pd#i+pwv1jC+ zewVQh;4Rz-Lb|cy!W|J#q4`Y5s+RiCOoohF18#^tpJw%-4FPdFdECG3o7cf#6@yS@ z&!RCJN?;`;_Ptc|&xNEE@vX1AU7#f6lk>d!fUk+8R1oo!767e} z2EJxCg*X@v;N%_@oQftRx3ZE&m;;7X#Zrs=V##tC9+&yYPjgcX+>JJ5%a>0ds}i?- zA>k$r2jl731{DKTl9A~fG=hXHf2DxOPS50UdkI*|rAj29XU@ldpv&->0oKZ}4cW2X zd@q5gdE6SmGpq$*S=QQ_z*@f_#yErR?%{l;y{6MFE{#IYr!bLwT&;##ZBNa>CMvnS=Fpk%l}5u8sp3xi zt8iUrK=ZUE0 ze7=c9HgyFQpzpUu-*xPOtW}>bcj&_^ooeW}ltIqycRvAbL8MAMeMUMqXCYk`;0lA3 zp%Kgl^jFGL(=1Kn91l{&rd8-rD51vFfy@`?74o(+m6cS2>#*M}^A!x;?=pe%gdj{5tMU!y0>q({0i`W0pt<(fH6Tk!3cKPNV}^%fTQnMyF;1y zgV-`xK?{A$XR3rD^efeS(&Oj%()|4GxR2Q{OKqONZVgu=uIPH%^`!st?&4};2zWoR zPf^hhI#rc1pDWWn2ht(*_Q*Kl?K`G9$XfD=JhLW!xDPXWf?bfc1Y{mDvRbOS2YR73 z+|Io&>fdg^A(y6JhTbfoKt0H)Umu-@{M{3ALS$&2W)cYWnKPn`U0@<;#68yQMn+O- zb&v3CJ_v}80x@IH-V^b?I7pVy(P5I0BZ~GdUU!NnSs)6U4HaUEitT)(qg7yGrvJMI zp&2=}Fsw~`KyA9fciJv{o0-M&!L|q`&Q^JyO8N)8K$5M2Kg^tukbeVA6q@cZNk!rM zTTnD5+|B1P^blMQ`O=iG$GkBOP=XjLCwj(yy%9mP+qG9fpr)Z$Y-Bxe7%9FB zV2lilg<=;KzVD+B!o$VtDY{!a00n~#4#fC7CAB$16HZc8v=K*>M~xBc!axKvF$t%C zh=h>yfe`5X_WfN>><8tU#4kjmQ=G$uQp7_j5`INKbmC-zKI2(p2Y!sFbGj#jW=86Y zBFTuth=P4S2Zx5tK?eLYWinaZbz>5w)82XAu+U$AU^M}eK$U5Zm=uMmFBN-1G?V1; z;+sc7b)=ek8lNYx_$1LVhGhbPJEX1s4x19#42DGXNFM1|zv#eL8YSBoZ8(KR>;So7 zmsAAP6Ogm;3KMn^<8P@~nUGQgMn51^WVrv2Nmm#Q%8^F2uB}u<}w+X%{6@G&Z zD5S`~_K7N7fmhB@8JZ1t`6*ECF04+qiUG3CJWwrDolF7lsB1*_RX`HjvsUHCIN&2{ zBlIKW4=I+#Jz*FrCm`w2FYkUM_!UkUyxbq7W}p8{$y%gH+9`JoPCzpjQq)3)p`0XPmE0Y$Ani*{Rb+`)mWO>DOYd)vS6BAqVpIZS9K3WExqK zFWg9(BZUXz?p%2m{E2;0u(Kb1?+kOvyABjr>vh9#vfng+=u2W72VAeV_t=_v@bf>} z2>fvk{>TNVBw(genZ*FkV%<`mSmekn{c(?}9<8Get(?;#d*wdhn|-3-Gf`52T;S3O zR}~l!Q(l$N@}mYoSL{ovLeBl>RQVB7LEM!DA5rJ;ua5xZ&;A4Qfj?dAqx_2Tk|p?} z*?WfUT0Vw*Z^x~~YD#az=A|6Jc&Q-Xi}{Z#i@f){B<{!NACS@3O=hYs3G`<3IOX~s zhg@{h{mO=?l}N+XtI}zHWkq4kvn&>T{*a7(W7LSL04PAdOW2}8e8~eIW^AHp&*n9S zf{7L%Ly|2D5RH=0As1r^Ck?5Did1LW+uYO=GYF^_i=~2;vc3;DparSs!Iy=;OU;Nv zoR4vwb&((_B%r-C7Vz(dtPcR=UgXT3(XVb{>f^yo`xs%MuvRZnQr^Y}4Uw*Hs%P>~ zK;HX?OOZsRpWmC>LsK`1tdxOrCUQBVT&18*9MRZ54rBUb7o zV(#1XtwH%R9f6}1cqtqxv7Uxep!i^~Iw_-`;wL9mny71^+B>@ZTu3WQ?z!8L!e%)h zi?ACpFwpOV;$A~ye-nsT>Di9Z%i0WT7H}q3Hr9ykF!>ajEq~2z)Xed3A_7hX?uZ-A z-+SoKrNSm)pLS8QbK`ygxo}>Wk9Qvt;OKDtxI_I*QTw+91z2K$ZF_lXWBtzod;N^a z@oF*zGNlRri^=_q(#a)(bQ{K`YQg^_PyRs$#DTz>P3vuiOm=S|3P=DMUfALhca0>4 z%_`MnrrK@ zN~>q92RbWN&Z{o}@0V98Fcz1wi8_4T*92|b5H$gcJXH#KbQc{2BASQt8NFxt5-+*DM2=hT#Cx7DnlSLsl8R?91L(1R#5&$OaP5KLu3Kq>juL~ENXr>D z$q8e%)PS~$JrJWjmBXFjFdL&XeN=6^v`~UeF);W=IfuKb;TSN!%m=tk6~M-o^5w}E z0Sa}re>7L#nFCQ?{92W^1(5ZLn-6OyaR}JV;_)=vEx)K0u=FgTkx9q40OHR&==`sJ zN{Yw}A)UgZpUmJ`WAV}s?f_>w*CY|YVt1|yc6G+-cKL0IhpJ=X_y+|k;xwRlV`Y1C zp02dFPsX#DDb3efrRfZY0-P<;*SnUnC;(9rb4EIq%Tc;SyR~F=O{A-S+GsSjSO%!@ zv7(vj1xj>a|&{e@c`GYSU@hDUY z#ly*iU$xKunX=8!kRc87l~DZ>hm1I30iaZb>@ywqZXEmt*%^(7-UBWx@)mRqt!Bfn z>|_}{tRC_Ky75o~k&28iOE03D->6;+MTfDF85|C4Om8>=te{%TlYl^S8Q{5dSs;%x zeR3uz11fZo5fODz_Z+`j{&5aoBXk#H-OFmrYM}Y1YMB~wECp!5n2K?LwMuX6{K$Se zux%K<%No5h8PURqDWG1Fw-Ug7s}Nr+qsV~5Q$`@_(BGUK)vnAe#=2e48&?38k@e0O zIeBBBosX(#eMkqq9Z}4vm+pkdi&U9tG|IOt98Z=7?xEL$u1hrPGt)wM)U^RN!gwO! z(4bQ8^ZdL~G}ILI_rk_U;-A1b_Iv;b18Zen)F8bI!r9qE=)$-6VvF`7c-3__`h(#$ zW#Va52w@KadTR*OhWJE810^<`cnxQPZJ){6zKN4U2^!6Sfd7-A6w`Jh)sUJKqw0Bp~>(;-hlDhuc*lmOtx<#CxZ zp*jU>F?Y3a%&_nRK{B9p1MP#A)SF$Zxvi1ceauVgp+v=Ot5>p#tRttr@P@3DcJpS z(ph0BRdA>Z=YWW5U^E$!dvdh9Q07Zp+EO>7b z@KH%0+VPWsI;0sXU08H;eK2^$y1DPKDwV*!i92ohzv&S-Nd$?ygIuDX;ZAGg)lH~@ zM09`lVEkrZc~+Df8Rn;~>w(w2T1SXDJ-KG%_AJ3)EK!voQpmamJ+)|=7hcNSGeZg_ zR(p;-z|h-&P_)5!PeYDzum_g0i2}Yzr4-qmxhY((6tq|}vY&(f*G%C8DKfMQN;#8d zUkk}sw5dD`;JK@gcx?~hPpxD$8!^E+S&4%nmUl7pl116)3JraS^~B%#rGOILB}l)OG)nq-gd`tBAG7x+l4pbA8h=$x{wJEV81-3cwFdz zfSTXv#wirA3}?qK+W!ld*}qmqr_^tX{~MnC^`XrSqHgrRuR(lcqmULMfWzKCY_~IDw(yOr3;V-`x{SM zJ^;%F`m9A=*{WapBs4<*1s|c@QnA$fdY9v_V57FeY7#V8$o=`Sz9o!Pc|_A*je?Mn zAAkmAKu$!5l1kzMFq|ts_dmdGI8>2lqtZgP83myEKgZtz+I&AC<8J{lA3v0cneAky zRh#bsC^5*$Kd!U~>R2*%%m7Lfx!en&JktO`bh15|2LJ_{HL%HUgqp0D^HvO=GU1}A zga!}~M6Y0KOPAxWg4qUQE0ur1qv;{do;7$rv`ljTAIjbWsH(jS8yBQIq@=q+N;(wj z4(XKc?nWA<>(GsKcXxwyNq2YGx7GXJ`@Z)-^Z(|Xah#2E&faJJRzB-l>resiGI{j^ z`GjdC4LB%X&%ZHq>B#?ps?w#m<9H*0ntk9$@{ z4{s6H<~~A=HVsYqqgz6LPdaRM=eO8?*LS^kT|k%vxVQjs8)FYBP{*5xZXorr24nhU za%Jzp84gq!c|31b%o#so?cZGnyJ?I^L?Y-GFs8M^6ePJk%}+-1UfwomE&Z5FyK-}= zflA#M>m-BqZGn7rI$9tHCKr^;ud)_}LWXwVZ{RWhEhlw|RUh}i+6d89awNgH?4@rQ zwKR%#V^Q$_PM+&l06J^c;k2@LjT=R$ax$w4Nr6IsJdiF*0PTWYpu(y&pJ#4zorw{P zq1fBWLav7%OXjf2stK9g008b^EYe(;i*0aXvl@VZ{vFoYa=ucCM)7;b1DNBl z#Iq)eFCY<{&%IuMG)2{})7@Ys=`$1pwvIBE9b7D8!1i}Gxh(@b6AMP%1AmwNrZ}W|8ug7_4_3)x_lD-)+A~RbY!{^3<6}wre z21sAqbF4P;K4wDE4Z4wBhfJr<{0Ogj%7~PgIUC~#-MbuJFO#(Vn!=a(YWj%}6o`-7 zC>3>18+?1JRS~Qs%koLv#?&nb?sS1pc(CV>o`}%>iOU-hB;`gUtrHw(I)q9iD)>3a z+0YT252e{T2CcQ{N~KEYLPff^FX0EW%B-QP)tPgfTqk_A7|qq89tVzzlJE>SM`CZ3}%2SN{{lZ zoG24SQA0dnM&(` z4~O3gyF&q4YrdCpyHVHp4{3nGb6Jm9#+jyUDJxfr(N6<8)Nrlwd^tXLe$!kdpYK6W zt@6H&=QBa?Yjxy-O#HxHwMpe+inXVQ98WLK%7gE3%G4C!I$2P0zbuf`=9Nac~8^ZX2K5UJj$ctUQQ8 zwg#~`0S-UXhJ-e}5+V+~XtWvL(zr-$GcEFZt@~;;Sxa48E-==pDJwrw<)yXB3*X4| zBYbBv?2lS5=B~f8_A$1&{opa8({+-pZ$DPC?O+7$BC#1Ienpe$VW-qbH3VYGCu%_6 z{(2oP;C=5vY2SL1aB~{;PqfW~1arP8LRcHOlUxR?fRx)a{?)W@2F5Ub5i(E=Mh^kG zEhreb1TZ^8R;0nJenR{jRw65t2lpoeMHRNjkMA38m^L>89c_2WV}9 znEt{&KzK4`f<&HA;j|0;;sf(l88hn$;M~t{X}h2Kx8pXs-`Oq^zyXxHQb^inbCh6| zvSVP+L`&`iWQ8SL7{)r<`r!~fTW$LjP6Ug@o>8lQM1do-}-!i;CuD~MXjg0 z=*~AE^QjM;SU44j4;lsS)K?AQ4Vs<~DmH~R^S5+P8S|(kQs#rV1 z+@h_1hpmaPA7NgNyRUb{s@9ms30!isIR7}q=|ILS9ZvONNZzp|SG_QbE5k&tF+*Cb z0Am6lt|UiK7`*jF-)teq-y8{d_!{fQToIw@O!@=e<0V}Dy<%wrd^PGIUC_G>JU6W>Y~*2m zsbAPZdpTNbo5$yN^veSo%TeyWpl0`!bBg|xbHRXKb;Ew7SrfDUY1;)VM`-A2e+%=$ zA82xPLo{cg05f{wAVFY?t>mDyY1>#y*Q5oC5&skB#~UKJHH+$nV%4f@O0+btV%#ox zUj%T3&RsyltVM#mL^hJZbki_1OaYK@B=1Doyt1P2PSJ%a_=dw`_`4nTYcy+*3CH2= zc))$l?$rkvzSB8hjBSJqf3|WZMuu(*KYED9&nj8StHv-ADY#Hf-+t@?AsViEq12Du z6Ii;C_T#h?fs?uaZUd{@xT@9!Bc4k8VUZt=-BV-WGhKeloSS@G_z7pGp9y9#26x_} zR$X5+{#tHb*$Qg0W;L9IqQDnoL6tAxC_frW%u<=aNlb}>`24TKv+(=a5>K7CY2&b_ zt1)DkmFBKOXRo-Q=Uc9|Jv3@;FscyvkyOS;;0f?_2!MiR;{&|hi4s5%Y~kDr`a8|+ zj1oxa^SGROeM=I2Is{}bVnFfPxmm)}2suVLLqD3xL@L^e1Ju+((lBU7#L>cT;U_>< zJ!STQG64s+Z5*JCLYy}`Oyj)@>b@6p=d64C$w~ihr(B6S(NF7eG6|)xv(7+f?rLEO z{C(9Z4a`P}OrhhxH&1lGS#nx1JD5X)NY{m0{81dtdfZ_IX2wMPg;+-u$1_$mji+JK zp`!E{wxGEK`buB%hUr{-;D%fz^H4rRk`IGKeUYL2jfH^sM}%vS?DRZ}2&-tX+-9(bC@<+Q4Xg~?Szcb?xldMdv`tjD2Lx764n|#I;DnS!dcT^;T0MTNSc&2C~ zBU*C#DYW|jyMK2Gx&-FSd7!&&K@r`pNySo&=yr^{3%wCOX#etRJ~m?HUKn#WQJRn4 zklf+)aDc_*ZPx%YQEvpvSQ=m5a@K<}eR(DOAfi?N8Zq18xA))dWR}mep?GEzkryO@ zQi)OZzddvcnn0dR@qnPAC|Tx*YZ(|MBf}s8WfIsdhy%!qaC96=TIJGWmhq2aI;jBp zV;KG5w%B*9B7EZx=nvM2`t(`;1wCL{tV?kEq0F3ey5U%S;mu+@oTL)j&8HQV#}86# z)*$Ecae6~Co790BY>VQ1SdeK^%nzW7!ANpKdFnw&000GT!j*H^iKv1b48h+&8tT%; z#qIZYBM?mFwR$4kVzC~6pPe610=0rLC3WX5K)=`n*wT0W=3@WZ1rV6nVboz$Qm9Id z0394dK5>BF_Uw@uw!$MEoi9ca>h-)N%}UBNc68tb2{KA;StoaIC`EJ7#gb{cG~_x^ zN|N+R>pn2W&&}3sv)3%UlZ+#_rZ3vKS{+q*nLYkC#C}xR0S|Peh8cmXL+)SokdF>5 z(a#D1iC%EqloZGlL$AN%nqlH%2?B@VCIt(VGy2sfgy0Mh_{8`NFkxxd58n#V@$LVb zfbnstu?Gfit$mf(!SGZG7+mQ*^hQyrIGY=U3Me zIZ6(e8GTF5hNaSlZG*+bQC?1_XSa6DkJSl4S3|pU74<(=$Sc_YX+U;hiTi#0s7TcD zt%{6EB_a4!xOhwu<_H+rI{E}NW7*l{kB~V>unBZD7yj=ZNFK+zb@0skfO4O?PWX8_ z)Fd-!{c%bjiF5&5r7}tOqkHlnXe|qqCKdvvG|K$Er*gamxv|feZ27 z@5nYyW?Cxn6xEXuEO{l%m-^>PjUX3&E(@QcFe8MQ>Ref;2fPAmUc`-Tdw2yG+^La| z`A{_AL*okM#F=+S$tK&93hGol>hqA&Ga(L!2t_+5O0Nh``*xtTYycl|{PO1XT#MkJ z%32_n;+0K6xfEG26VJO-<_}Z4L#RdpjL-eOT*g0$aCn@{ue(3Us#UeGbJea3d?CXA zNJmUlZqN^aYrd?Z4xTC(urg%VIes7B0Iru_RI?8}Nb~Zwy5s5+O1P-&*K&<&cKh8S z3#wI+1u*eBkI?yLobqxO1#l<*0rx-w3cC8qE1hHr+$ zPJXWi$pnkvrWt8`s#W(>gf=>LU5EC8!=N$#fJ|(rNgL!M@{lO==R9UQM|Lhz3ti@%5$n6sA>?}{ zGMM|-qHyLykyAI#fNjoOA4uZETVh{B8Se*;g^N|3f9wZ>*xQd?bpPwSQGo9Q4QqmE zD@X;mD)cFlVmWGh3WPOPfx|<1BXHW;WH6Gpufkrv`$~=OfndA-2uJmsw060<7PB(*)_rENz|SeKj(zLy!QjUGpd4*EqRb; zKNzK&4}NXZ7@1$XU1Hz(_|Pucjrh1UrG-X@fFD)gQYwk2Ko6MEk|E%Yvg;c?7^7n@ zrx3+!)@gNpPWstw%Q9CRg>&5*3{UfkGXN>h-xv|OtBR1{nGR$d=<$Hr=fMz#2glFX z9%|vo8YrTj{Sg-+__Kz-&uLh86PPL;LX)lcCcOIT)nxkG%38Vk_48uPu$M?z+Fuh0 zuV6o6fWy=USUYTUIw)%hV^E;7Ar;ksf_Tky{fl$8yiJjnwpVr}uW>&y0(?w62D)D}XaD;L5J1$WA43*$9Fr8u2 zb;Y)VqKfWcfuGkOl_j;F3g?uv_ck7=V{fP`+w2k7hqeQojB^r95?;ltDp9jBX z06^E3RNU+TW|I9@pvM3}-Yb$A+~WVAV+HnEOgQZX@ctK4`$xcS01aV}U7;uZ-xBgZ z`9*MEYS10~Tg-o5v|GXWnb4wX<1Af>j0u5*lNtWRK3@kdg!bfV?ehp1ztY|#dHC?} zM;Q0@)(#m`s2UrpfX(09J2Hwcbho8`64aevwkXqBaU9}7E}M6=Jt*L5cv2)%MmLl~ z(Hf~tg&{ljn3b!vn06FPU`utcTR(ttM@#-+m>lgL%x!AD``MvC9=?-!+LE%?kZAd& z9b3>TDST_^utsOjcA;CDb#Wp)!v9jPMQhv-l znY_0-n%Q)|`ja#`oj8qbAVzeS^ZtU`#Z|NJVMI(I2~BZM~> z!drni1cU3^&ygE$GE`P;nqtYE-xMv*L*6J@s2z=e6**r_^tU}+*|5DE#=xjdQaX?A zC;i4Xn=ogi{Y-SaI45MgF@ZRsO_8oY8Y4FIbRB^=?;shXNJF-D5E~Q5$)b`R7n;vd za7Gsc;0|Kk^l4G3J~5`Y(!EYXoB$vDUMF)6 zpEg-(WoI&81Y!t~#sA@6p2F=q`klfEDLt-)wr=RY2iHECdNTLN>~mU^Fl^7HT5%eH z6%hPH+>GFR;<9fbj*#z}J?|7K@)FcS}K6-DKFD=9$--KChz5# zb?)<{shucX&er{h-!h44yFKm(haF$VV8)*^c%E*#P3Y=yi$%R}&`>7mVdVDI3%6U} zd711)Td$T&xSx2wsEy`QASwf`49{H2;am~X$(6Y;*-kvlx`ppikZaMGhB84pQzPQ8Sovbc8{d3Ga>y}A-;O2|`oigT=qu-c@-K6Z z`qW;=lP~~B5%plCi6|IXci{1 z*nU{3P-nGIWBA}Qq{SPNNb-5Y)E;3!=F<6|RpIMMxg*2}5vUC=D(UalM&3{7K6%&P zb7&XvFjtyoI9*OXE1W;>vCNn2$FwRHcna;Ylm_y7=E`B<%-IIDlQtSIb2)h7ipAgO z_M!HrOFxSS4l60>)~3=tr+Z#QVvNFnoyA~OS#mNRauej<-I!HZj2Vq<8X%3E3j9~R zMRECMP5~J3GqPVwa&#+Vx@QjFm02_v^lbDxHjycHCN|QE91KTS3w+5ojdhWU>ieN^D6|P<^L&)!L{$?m@Nrb#; zt62c_xXp(5(ri%d6260@^ca`bo9W4@)DVQ3aGH|FYdb2fVaS*`Vo7h9BxzDz?|#1t zYQ0$>kT6tNVKK#?@!O`4Qb!??_&XW}r9srwt6rwu1YuvbEV6H2e-r4F=sk2HT}PEG zlxl+uMWnrh0Bt}v)cojcAYVEtAQMgzU?i`7-%x#@Bw$Q{jk=2pZyS@2Wq)6pt!Sbm zw%!|&Edp+}9k9Y~k_lTzc6j`)=^j6A;i8YxcpQ4oY3uMa{WHSJdWo42fu^D`RUvJY z7o{~XS!w<-N`2|XcleWh-5bkAE8J)zn^S^4^;t`tI@6OPmn=B=;z*qFtfw=b(+v7t@`DQU^FcVA*bCs_|MwH$j||+u5amw>X{z+|>{% zSbE4PjGM;+eM9+r2v5VE;^?!ZoNa52&^iV38{HYVr>on$s-0-r;8KPa`{(`v+ zTN#JXMiIrJHfhU(ck>tu%6kwz{q?4Ew^ICYJZYncKd!qBnJU}f0>m!S!Pm6PMPd5G zQ6C65(pzw^CC-#XR7Y<>eK>Si!<{-vD5GZB_uw*=2$?O|xPMjz{GEdm*}{H;x7iE2 z9OR!hnCjpz@}fA8Y_9F@I~6d^Q+ixbWu;iM6m@=sS8ORN$_(u5|l7{4W-$a2!@oi$KU>aWyR-X-Hv zfd`RW83rd1rRju5eP&S_IvCN8ORNAEYNUH9tB2D?tJr)I`&L&Zu|Q`63DP0vnq^!d z+A>WTC$5JXS!(=}?1_m6Z7>)&Zj+{ut>htRd67Z-kLi0!`cLB( z@{&2G)Hfn}JyEKKNE{mBP`a2O<^H@H2NR2CjFR5gJ2VOAWklaW`bo>8wg1wbKofd+ zUso$pTgM_cL&4Z$^UQn(IfNCfZxU_7xHStf02;ElCGLy&^wZH?$`xrou^C0Bped6d zyOg-Fsq!9JfzC$gRN3rmy5+ORRW?*g9aY7$uPgmp&Mfgk?ECDgD*2x?uhIB*c^W*= z0$P=FKajI%)H<})U13tq#h{t-M#;^HzV#cl;5RGr&uV3geaUE>n>@dP_3E$S?c3@y z<&86X?=wmeT9Y$lq4;s0tmv=wS0I+M-D2R;E`*o@+NqdyM;ebA+WLA9Z2tP@q|)Y2 zWA9~1UM!94-Th%1WZU4p-osWsngAv8;Y24C`;k_ge4fWmushQhBemi-#?t6Dn^%7j z!3bpM^t;$6POE5y#BxdIosi|sj3J}2xR}6awjcdrgixYSPOj8DgS@ItX3Yz*#CcN+ z>wH>|ew8L$?y6izBAVQ{~V4cH4 zCy<(Zy*1))KfqrVNgAG%JX8O;T+iL(qn z-Bqn6+gq_Qt9*$8nwl#eBF{XJ{23R{x#tqIT!DhYiK8NF`QnM86Nw?iuyHox$X+C8 z8TcW4Ra>!qAAYgqWo=VkyQ2x!yVjdUBzobtNhkh?4}~*U7D$hiHf=)(-&Uf~;>`9c zu^7{af}glAnVP!-@R!iBQ=K+f+EX!Pwa(+X?$5h*mukwpHI%;3e;PBW2=TeF16Cuw z)6>tOoH8C-S$-O|164SxbqtLUhPi;4rs|a(x8BA zrI8Jrr;9=zFwjadQp=q1#u)8msFzG%Di3MWC4^~Qm1uBzs8a5BaL5};lH=GmYZwJH z`!#(T8?18$u+vzgWV6~w)vv{EpHvFvPA)U6&JPC^@RiJhM5_(3YY7Sz%d*f4u^`P@ zCk-U>J5?t4S{T1^ITJqJ+{`Fk+=kA-9@g6+QaV&|MKn2(QQPkp1UM(}TXIimeI)-5 zk zs)hFF;Pl~`qMD{#(^*hyEORr{p_|)r?V8|RhQyrypAZP zFgszTN*Rh6oaG)Z1blc@xrH;+ebkc%^3sWOrY1<-3*TG#9|#tdj5>Lm_fSYhN(~6x7SkYkSwTYO;q$i9Nzbw))Xb zO`{XlTH1er;)(Gm#%$#lZ5Ch-x&;$PLPZzii>9?Gxrdm&HGRWpVqx{_0N>t3~N;F|b~M2df@$ z71xKHzKaJr7mrZJb!0XGt=)>Hug+|i|BZe?w%1~ySfAwhHVutBAlF{BGKSfLVk27R zAoHE>`YNxsla=P|4GGU=V?4V5fIOryY%feuJnb%PLgX{GQb8VOg2=TwD%rLVF}2K3 zXV~icPojr%aqh5&K||0^T9#YTC;8LGJ9Fxj+TYeEw2i`_{;RNNhp|>?X4kosoko~H zPi*lqf+t|;3X{y^sGDIag}zDCDyc%KeuZrkUP_OJ(<~){GyS; zxXmChy#AQbaC0K0u+_1&$Q5I?Rylp?Kz7e$z7o&oDph>-lVddTX)6WS;Q4$so%GQl zZ%*bP+~?H$RV}$2tupP?!r3fCN4!2hRBeotVj5&(k;Oom$QSiWWbFg$C9RWOTXqem zI`S?DbBXKAGX;!^n~zo!?+yA{LC;roV$sY6%b{-#?a&h=6a(M6G(G6tolQDzO@QYv$}=px3o-0a<$-?z*X`rbMmC6VX*e( zwpO+u2r`Op5;f6`=Mq%HS8_jCFc_)m_EPn*@uingbHDWu!@c!E5;|V0w~uTvU)(t$ zf@O3N^gKuAP4HxvJEZteKKu&S`6J+}Tt1F9-L&S_wh6?~K?FzdhGTa!%T2sFKZ|G+ zbE)AKQz^b!P2c+Vp3qb`q=l83F6yyDDHM5FGEdipon}d4d)oDjvrEU$p#S9dX>lj& z0X?FibFpr(tHegL;l)kv!Ks+3Q^2Rj{=^}&0R^!(;49zulvh{_@3-8%7cTm@rN@j$ zlQ*OtX8wSWr-I=0Nvs!LT2(_)La$aJZ*hX@78uiC9|sq8d#a-JlDc2Po2WBq??F2e zLNw8XYP3Wbqca&kS_v-14;xRW-f*)YL(Inyr#^fwa&uZz2-dDQvHnbQ^p-~-OBx>) zrUM9Va5JYpL8y}(J9-wYAUz*dld5fMy8eKD_-pGEjdWdP;hSk%xrxoSPD*PA(az0efY zhf#&&6A&#Te9IO$eh3AxYPnS{tJ{+rD^7_OqTOhEfqRkQAtFG9uSZ+0)n5x?k;fI> zHQUyJyrktJOd+z7A5d=o==>gMYXO|JxpN47r!pgH$aifzN2a=~TqQjb%F*}VnDk>Z zfGT>85xV`}UHIQ6INt{VRiw=G`Mn+be*oT>U)#Y~F8Y@L#@+wJ#FY6p9u#b%L-}tU zt-TGPncv?$R^0seEq-0Eq5};=k1ucMVE?g+KX~$~6}Tro1N+q|?EzPAv|bhe*@>#o zi5O7|tVSbEbZMAqlDtn@+>77E>p6`e2v6-=WeUl8lrOwW|G?2c-#@mO^=wr=utSz% zMG$_9yIbWDCDMQy)}*n0DD7W*@F@d@cIB$RM-vAhOJCePh@v&PS+>h;{`#NX=of1L z0()Cif9dsz2aKK|_<-6L>zbTv`aPbZ^pMHHM~Nu%$L1+&-hPXhQkVJEjy^~BkFDG? z_^7llvfa2k?sNmytZpMuy|pNsAp;#q>xAxXQoqh@b7t+z)@72;#rmwgTRc%CdYgfA z;6gdBdtN;8O8W7=XxdT}F?GmP(!DXirFNsf2iF6zf1Vgdo&fHw#Nhfllg)qk}XEbIXQc5AmBjn{Ea_9vI5m(BWMZaLHCYBG2VAU3|bIoaQpUeW6B2g&H4#!k!En}5~}cxXEs|45{PxY%k~ zP@Kdq2PLh+0J6h)7jj1c2VV7jp#}vJd5u&;UF_A6?Vd;Jo}}T2y5}Xt zGhdQ(wQZvFCTGWnV1-G4TAUH{k&nxHm{rJe&P}>+l!ARUU;zj5H+#wSBJJk*8{8K+ zY<~T~FV@7TEjIDSWOE_cjyY>~@xy}thf|2K$hq?#p-O#}0tL46=X9F7;hN-*m7${l zF;EmGaJf!997YF6>gNH|U>tUz?Q$c1(uP{SsP^o;)d{BH7}k;;hIs*P3H8ievOx_| zb0Db)8LSM&EGBHOZRvfuh3pQaC+B-o2XJv@TY4@`4L&#d_ho7sfm(OpFefgO3-0R& zap?_o0vEs|HdhAK=at~QrYN#D3nm9B`JX8&zVtc(%11~Fs9nu z8IfEh1UIitTs<}aMyoT)-`Opv{7^Dzu_m*2TAH*r|0d-hFL2K$*GY)Juy(K1!e(`+ zb9WZtF|o-sq2RF;cm|IId|(c#t;RIe*+w9<5;fZcmF72BeD zUmpXcl*kO0`cXc~rwhU>j?Ufr5~hpUyA8#CfM1>8TITrJ4!C1oT& zzxVgohrT{&b0It@k7>>>kVudw)Uz(=qFVR(wtHVF(KXG1|I`S^ux>NA`KHE79mL6lW5BE{ZT5^$->paaD*-Eb7!eeSW3S@y*Vb0}wg9{1@ab zEZyVEL#JDrLJ$eI)@dmN9U}^DNua2Z;Y-DoJA2lwXUq$hApMi$;6W)@B3^1tn}ko= z9pKSL*u#O;>O^OjDW16@R#H!u8vPAvC<#p~x$6Gn4dEkAXoR@A=FyrKWufh?Mumr4 z*~c-+$jlhjL?@D^%KYn*`YpXi>Cs77LN};3Ml72YviZW-d<=5l;BPWsK~f2T!O6Cx zfMcR+^lHrk+5CHv_TBZ*SWG+D`>!S1m`*ZAf6VIWnE`XS-v%7CMlo>5>0_wX1^AE) zB#fRP@k^S5bVaKs01(1H^t)DFre^~O{QTX*>LRc5>SnQ{1;c0q4@F&j=~n^8;zp@( zbqq)D8_%UAyg8%eH%o7SNpZ>TCi8@p7xn}2AithT00)vT>$S^AQKB!E;KOqgfmhk> zTuP--v-?t!*p6Cbo0A5^MYCT)RGSs;6cP!u)859ry>`ie!|rq+8y+ zebulmN$&Vw8i*$Q80k#?87&m)1f zbqvoFRFXAPiuxlz9R#q@ zdS1#CW+;YzM9Q(GdMppY6GfwbjkyKx&JtIar&^Zu2n^DpTKQxB!4Dq#(MyIP0=m`R zL*G7aSF9HgJ!Z?`9WtYcJ(#=4&D3()r(l`UyH}3a*&3~15nC?xD(h2Kr~QiXMHW$4pVZR z6J{;r@Umv@p&@c{Gq2?qz1|;c(h4qbA@J9O6G*vcuIa5ihKqs<27^w@%lu3>7o~ zQrV1KaPp=R#YcB3Tm75d20|JMnQ&#{K!>7ngjyt3k5^UV6FQwDUkHYUg4!HEf|@3l zg%Bf65fed0#hMAdDSHll zq1X1QzO<)(qZ9X+BK4r_0E;jv350&Dl@dewMO}-t+1f);NJyLdrBGs!sCFH(yw0*?;g@9k;6B8sjZmkuJl-ql;B&s z>R&L{K0Teg8{_OJ2f-}QOhiHEpBh8YJbCgu%VM*!q~)7jX`ke80c2SF$U`Rh8};%2 z}QG8>+MfN|M-$^Vbb!Cs%9OZCq4gCe|m`rh8r^d(5Cw8m4`iI#YRS z%nSj&O%9lOeDo<-Uvs`1ak4(`mqXW@ zZe)xDr3Gh{eT{3Mc!Nd^rps}<3yX*CHSs24gYDPx8Y_7_F|aM#({cPh)Wp#C7PkVG z^Sjx|*Uyh;>nd~e^B{?Fx=OrpuO4DvIr~72f|0De?=xfSs+NyggcB!u9htz_I*t_Vgdk@r4A2yAhR3A@r^##=5d?$>B>)?mi!FU&DNQjC$g6JOnjGj%m)| z?T0pDtw-6DLNZLJ%2h-)$@gbi_27>-WgVCsA=l5?c8pk{4qo?89#Ef*``ExNp#=_!>10em|ErQ2c(9cZj@%4doIhdBbwDaqdKXYW!IaZzI!u1NnF8=*QHt23d5F z{}o3-kk1XyVv@?tgm#8}5qVN?<2d;ZBh^>$x2?PNA>C|cPLqhnI9~musjmGr)5vW+|82mxyiM&XBIZN<6Jry4;S-0N#lTUx?)q$LJ+ld zp`-GND-HME*WYGr3;V8qxTgaH&RBVM8rf$H>~6@4w$-FeS;GFz6sei__36kxINwXu z&o3}O9?)M;7ni-PS?DkkADfvR72Q}(Wv5Y8S&+ZM;c?j_WX@Uz?TH&^xUwyu(Ucj+ zj>!%hkvj}zW_>-!Gu({ACU@p}R3tiFI*hgOl$rXNYlvYVc%J(EF+!-@Eh~Eb)^*>! zbMo1IL&V4QLd!tF;h=@c!yN%mQrXw>?uN#s)Sa>At#=o!JHq}fUR~$Lsq7ds zO8Fk+$h1G?`{MR1T;0Tn-j8^^y#;g#wCayPHOx47xa)h&5Z4xG;cLV9HN`o>EpF39 z`voo@DHAU6#}I$p%&898a)Wz0@{PPe4ERBte5i*u5BnP&M2N#z*91zhhqSN3V|q<1 z)eJRvQ=7&%UOey7^COyr7ucrd$ojrvI??0a>nscvF-^5$q@>(E%)`}tdA$j1 zj}7{%bVXcgg78L#Tj}5KS_tNpF_px-J{er?4faoNrvm6J@JCnhb<}qdAx(*r0a$aD zPprMc4CRgO@=pGJeGc7+E(bQxo%Gda^!Jo3aVk2Oe)$sa;hgEG7c)oo?pJROTR1S{ zXbhbuS$CM#zsy?_27O#Ld!hPl_K?ZNY(8!ISvE5hceL3J7G!c0<<3H`Y)uUfHYhsc zmrB}(UEVzgaqtNYE>_dK!(bi}^cr$lYjqWttvnX;XsPEgv}geQdVd*a+T&u(EHTnq zra&e)L%Gkts*M|lPoZEdD zMf3GigzfC)N%>J1)yzyT<-97VnI(yF+|AW`^zRQUQd2kzPd;~cl)o&;?2V4K$+wbG zF7;*svzJ2};htA=75lDbqwVbk^AGj<96lAKix`RXbwkKmVx-!)(^oXs{OoFF0*dws z%WAiLX`Z$uP}gqVuEc`{+u?EeKP+wRQXYt4&OQX!yAgkI8CVCt$T~betEo6O#x+oW zG*p$CNRpF5?B|FiEEaMD%kOMmfDdrIccMUqT)?WgFInYM+z_pR?84SHGc|=;)~d!B z^5pQt$JO#q?C=f-zk`@yVLoj;QKp)VlLS4IrQqGUYxCuRzn{Y1e0?{V%;h;xb3$>s z@RNGw{Ss6}JjU7SprdGGn(t<@_jqm+%TLp-V-uqEro-Ao-U+mcCfKc1Gc z{RRIb*%}^_m9d=IOn^`omD9KQ5VOggzTA~Ik~ppDQMY2d$an^0uIhxua99=I$%zv0 z*5Q59`|V3~!zCR<*?JFXBvOve#>CQ1$lH5Gwy$WaGN_wB<1A}5W0mD<9IfjD-I@M; z><;ip!b%-1%Utg~$j1wWT^36ils5>6+;E3n-}$@-d}t6?y7%@#&RZkjRO=P z#LG0P-VFryuAcR9=Y!+d)OUN_b0JMcM$hi>X#}MKEwAjV;Cym&^qX(a^08TLsd~TR z5I?8dD#{lbJ1a#4uEyIR26)5B0DU&9NClmjio=>F*nPYN5%6gXUT$zCr1^&ieQY0c z?PH%5W&5Lu7$JdnXgb7f)0T$fm1>NHGL0??mb?t#Cay09SEl9LzkNpr25{_ZCe-mU z;N&RSr}`bx+`$zew|SIL`7G&^+xyZ_I<5HI`r9kb!+5v@+Y2U*;`YQh?Gin&_P6J{ zg#JWBZx=Y=Q;j+57EtND19M53$cOtOG#txxfRiD`^3`A@tP&zC3mp|V+0XI`D^yMw zzSRuJ*_z-j8?Yqv@kgxWs!{b0twUI3 zVy_hYOYFixGHY7Bregm4k;>hDo?+eXMf+?x8MGh5s!Ks8EbzsvZld-^9F$|*B5v!vrMv7;4DPry6gzVF z3VUh)LI>!SGh8Nn6L~LO6blu{ZD|LRe*cH}^e-(>$}<%DQx&8@1RxRw^-=P{ajUD_ z(O?e!5#<{yKsAQsC(l0p5tP6HWJ+r4S?83|k6eYo==5Qh-zkbWxFH=N^Dx{%p+7R$ z>Du2CHh8z?{ejI)5og=y$6#y$j-(9#P=HWL$ z)96l>_~W25yvYH5ukKbdNd0|}c5?{x9+o(nAS2o>mUzkEM<@U$0VIqj10_R^fBZ}X zoFZm+`zTs=$Do`p=&xss|D_KT>T}*#5<#KD8khLweQ&RB1EaxA5Ys-_8|S_M>N70K zb`U4w{k(+eiDQ^3Lc~P<+Y?vqbVbIe`Cv*F=}n+(uI;TQ?TD7DeHB1wvdT?qn-Cn< z8vr8QFF#Zc1s3jQiCAKu!F8Q*#%-CB}^e}%g2=&aK+&@mxD8ZrG{ zbb8aYb0<8K$+m+-Dp`$a7SOwScayb$WB*~>g>8LQbH|^p7ucv$^@!t98WC|2Bp2Zk zO0Ip_L3#3R#%jlR3yy=6^UFe&O!h-pDt%paG0vZaG6My0PX6uIc9uTcvrouI;m?`o z&0a5jGagq~td^6I%40A7+-tLP16{Zqd2P9eZ79wV4_>MzbjY5P2o2h0QI*ZM1IeP| z&G2m$7C)SZ$qdwOzwo^GsKZVeOKCqUM^P+V64+eC=}Py+K?;{1WO|fi_K}$NZnxX# z6nuE4E~EpBrCYuM-w2GQVj9)~7Tt`rt+L^99a#trC%vCYRu67sEo9p6E6xcBTp{5pA4uuQMAJ{MhIVYsPcZSG z9eB4H7sJKVK3_f5%kucJsXEbHn0xnJc)#xRnQLlaumB2cN|nZJh7^rgCTZ-cTpswy zHD$?8EVJ~I8G3=WiA+bT^^J`xD!`(HzYORM@^;wZtfct9-)Q9#p(T(%lim5^fPVL8 zm&JHi6JL#=h2^#KXqw_RbSmn&q|yYvHkXHF9W1_UaFpSv`;{z^xDIvTo|p3~_VBYl z`2x>RnR%zJpV}%{dy9k2Ua^t_v*fiB>KTp>?QIFJ&&PrS`jP|Z>*GCG{WvPi!ID61 zVLu&A#`Tyq*kn6aIq1*O)Hz} z%e|b$4xKH6fa+8NUvFX2Dr7Y9m+cFq0{-ER(!sz~PYHNZjh9NnslS332^|NF7_3uI zt)=mg<`!Ecd8i<)9zM{*4PH(&v^Gz6^AKTUQ(PlrNmJo?WHYYo(;$E2h&y9I``huW z&j|rV>^|dVxU1{MmT{VTpfK819jnj-tYty_{x6Re9+*m{ya7f=#jM9dpr!%Hw^L<- zJtvF;MVNTJi<`GD49@z}dWY%z{zGQkV^x7g+eAe9z2`?%ts2J}t)ARENV%1z%;rlr z!XS#FMpI4Cnz+emkJ?M8vnDT$w4`fNOs`hlaVD`XWg>KKP)BZX2mP6?MdOunA^C~4 z7N^rk)%dUHtfusy$w6eVB{b_)G!y1>ZV$PmwOSjX+rFOMf2TUh)kk?<(ixMuZ)<3O7K96!Je zaeuYv@5Pu1T}JbnF8kx#G9lM52%EGQX;+VDdASrDQz~1FYB)GIXXE*8Yf~WHEp-&y z2CjVa*Vr0r1A$A4(^I@oI3C;xUdIc?8ijHP(j@hVIdm_r2ISG}8?ly^C0tiZwV%t1 z!ii{=?X!Q-aCb*k^-$t8%Cu&l}<&vyJ0CQrF4gMr!+|2ARQu|q6kQL zm-M2$rMtV~yOvn@{k-3J|LwhhY{r1W;Nm)So->Yl%yZ$&Q*_u}hH25f(i?w9PDgKy zS09b##D5hN73GP%r(>*%Z@wX_N3+^cp3}5FRq>S)zOLk%;Um9pXdjNU!NF!IW*JX- zh?PfhcSnm!7Rv-(Ag#AU4a9)BIb*F;_S&%`o0q>jcCgxxc*XF>g*o7?CaQwC87Egc z6(46&L>Hpbr-@{tpL{UuIwZ9T+PdB`4azrP=IdIc+sk~~+%>6Otx!*uzp$_q63@SI z?r=s`rF@J@(aII@DVooG@goTo*p98}%^Q`?1Un4gvQ2o%g%T&(V?=iJrs(sYaJ@@ZI(XBF($fqHr1Vi`RJb zD!G*Ar=Fxt%3CW6f%Pt!SQ2C88)ISA6Q2yFf8;{ z-;kgUt?Kdz%7-QmewjyE3+%ETOTMuPfb3AS#9LPXH3eyQ|Y^L zKzS*AM`7H%jv^sZ3ovO*!P@ECNh_sq}!mEaRO2JLW_iN-}P`DFeXY4 znm8|v-L9CjJ5$qLI=-s#zbQt=Ye++8y-Y4)@|xjoc&2WBx$T@iwnS^hd)qoM%9DVM6@e^Ur}>s&-e#feke~H701XbQPUzjE*_-^yLdtS^Z0#IW#~gi(#4wmUbL9Ah*ciqSv}m>Tj;zUf#b$-u8|Rk-PP@MM6%CAnV@haJLjzOh5qsA<7;&zU~*dQGdnd7i%S0ZT8+GBMALMi%8#igd46sH`f~ znN+G;$;M}VG%oa@MFJ`3!5ihc8VphlAng6FcIoh4wtDO;)D5{lKoFgmv+Lf=crdy- zg6GWO8b^(%ghd|_#5}UH^=u<3%G-mLKqB4v1UymVM7Z2?&E2B^nj*E-a;g*WRR36w zY6q#Rw)*udO8>L(<%UB6-lfihWuBfG+sPvmTykD~$WheXBHssPkQ-4uq=fdZg`eky zz8nv-EgXeDYs+5#$RA72$wb_Rm7yt-YG|-1!@?UoWWE)b$ zw?`i7gYh21l`)a0HGW{G+ClDIqtkW0fIC-OaVXI+Zv%(bZox>a@6zQX&^Bjk#rk~8 zwSgHSs>ftqJ9}a|cNW;_hKe-a|3u`CCnH1ksAY3g2}|;miZx;c^1TtYlB}@pXD#z3 zh@!h=dEV9kIZ83~sTK!-w-cdvtX{!paZU38UWYQ!Gy*CG6L*gCWPkb`x;++R>x^2r+>*2z2 za##77*Mh@H8vXh=9Q9_JWq|L_*lTcLH71&U3Zx7O#f zRD=3gkX9)oF|h;V^B%CD4!ypiDGHCtZ7z3v_%tA89B(YE8}>k}EX8Gv4LZt4v$Vn_ zf;?9^5+xJ6bCOa;F0A}{=%>%_H|Ih4tlwYsa;@2A)?mKO@FqZ7|3WD(Bh{-)XOS2i zs@t_<@z(l2?_+^|eqIlk4cx=YsXX~)i|$_uN!<=Uk=1Ww-?6MmlQt?-EZvO)UXmP{)u!^CO7M3eTTl$?nrds86E^z@^8VHLtaEpFq;HGgk;56M4 zd1{ytgQ%zPhC5`zvqvBqU81OAe02bpHOIqvcIzK`BDYV zmQFU!T~hV#hzAClC7YX^sx;$AV2Z331t9-F;H4kpcc1!$NDIEScXMX3VFL60i(m(8Z>E9|zpG4h&P}DG;0n6DIuxOMXAqV*sME_V-2& zKLg!g5Aq~|esMUta;1J^7e8BSQiIM+TpLpJe+|3=+JMu!!BiuSyz|gJz)`5z08Otp zO|w7H-Mj9v4Y7lLtKm%C*N2YJnHOfq)TH%$YJhYCdrH6wAMzZaMm)`Wa1eB%Yj@C; zb7Ve0>G}v0OFcGse`XOm84d867+2r)xfm|3BR>F3QcA8xQPHbOWM>nfN@VpX+yEQ|4)UgX1(>kb zCkpAGq5a&~gbZFWHx*JxCLsNQvDC6_b+Xrh{YMu!YZ(q!&ymm}Tsz#D=(y_4?D+xZd|<8#!5H#0DBdOr~5%LBzqi?2j#i! zzl>N?qPfj>V05;b4+?PEJ$%;K?=g!|)1Fm-{d=KROqh_x0v@nJt~O;Cj{+!v(HOj;#c}6dqQv2iPyc&Vp8>CPma%O~ z{^zK60^nB|YR~rHqXO1c%@0r!7N$mHKY1y?mf1TB*f&Cg_{kr-{I}+SU{3IHV*}tk zivQ>!3X~7#bnmIrDyfN*zb%Vkt!$}q+^L9x*q&61KlVxXR6n}FLxlaxiD)grL zZ(Vk!U>qYXtZqC?%z*lPTZ)K%n(H0LjsB&{{%;960%g5jO(Ir%Q=+GonTtB+HS&c< zatWoHdA#NDzu;NO5mdX+swr>Jw)xU)gez~2)?=mO&(Z24VPo&nPSYvB>8gw%LZvTS zs&wFSdV+7$$Td+!U_s9Py?O*hQ5^Vtxj`Y&`8TD~BseglZC|(Q`Cwq&4fmGbFTl2k zTdL|A&oK!HQiXXesK!wg;~^TRhegQ;xh%w#PgLx>@=R#Xc?!VC!C#WI68$=l0P|-C zI<8g)CoI;K+-<8&kd zL|{}}I`n&3P3O==CGf{ls_!W}!Pdpyts{fEmCyu>)!k&cM2)Cp9{z$?O#1D~<+jew z-+XjUG_9HpzVPLBK` z3v^DB9=5*9j35(fwt}7R zp{0Na&zhSWJ^{SA%{^M)JId(Sa6`lF2LQHKY(4yfA)XsHnsg7DpZ|LaN1pxeOo6}X z>+j!s&s|TH{+VmYolE#jv8oH~Z7wdR&zS_F0HYj!Xh5!;aQ2>|SX+%6`NIoxJ$~}F zhr^kpn)q6q4525fk+v8iLf?JbLd09TSkJuji3OU~Hhbcr9}Q%H%)e@Ej&RtWvB3at z$b`y=Sv_i*OZ1A2yR8J_Ip3^zxv!aIH2V~n$TAfMsa7>zI+`kQ6NXn{%~(Uqq)|uf;yzQSdVH6SoWqVGU||~6lZ!_ zF=oGZU+9j>BQ6@nD34;7!e$VOi8tqXK}Ti6vTl_0Go*MV7cySQj_cZEfwxwk$PHS@oZ^41Ts!9XYn} zy_r^7I-HC)ovh}rIVKC*bR^u2h(THSs9I#z=4E%1rU(Sq5-qV9AMZXzFKj4MjN56^ zYJkvFF1&7?jQwnm4yv`fn+{+)a2Kw-$q@y zG)s1xZj_}1Ek-!T%;Kv(5J{*hP>fp?VSG4w}7apQWkRzK-m64!2QCZ9~<#V4G_61evW2 z?A*yiOB0Zf5nKnC>E~GsFtCzTAB%8Zg&HT}sxH zl~j7!zYI>G_eTOy4kt5|5nM9OJDsMNn%=YTyd_yX;^QP9uml493*$Y+Y4=tU&vsMhr|zRLI0<$tUt2H;rI3DXX=aSz&iDPKN&bZ5iD|0 zAfw{H*`N_kok4Y#V5hxi13;Lj+8e~o9giBhf86tNrJC?-41%z=s3f#hPOHbQ;V$0O zvQv;`-LY$tsR8jIW-8+)-BsR+MC*ygxx+YB=H=`=nWnr5Rhn+W0GU=_6L{KZOM8pf zE764>fl{g8-Ka2Zbn9;0#3%T9GS2p?S@OLM{_%%hP|4nWYuRrsI)|Y9*1FR!UHbwLOVI@bCTuh)_`KkSgfa9D5C8dAenl6kbv&zOrdef+IDNB@*%N zThE>TVzCRVb&QkvFwdsg<8HSE7OwW~t_jnm_sQHf+}kXVnP(GZRGxfcPC1E2Ywr*jVlG~>9m4K;1!}{Iv$@K~yijZt zo5JuMW_OA(!ZG-d+!+UBjg>&6pMl%qCj;_T6DmYEfSX+7npzbdv!&}RfzDLic%Jny<&<^9H|`f#s(gI=et z^2=Lwre&>{fe&w(9G+Gp$Jpns=!QmwDR6-cD#pe164{kc59QBr4tqd-2Gx1r6SNB7 z_OvFleV6P6J@A9MTg5mUc_W1a5@$7mt()zM~bxY^G)HnpmP85 zqNOnhJZuTvUwaie1@>aG)VPd_^;FkiHVxVI@Mp6ha~=qMAnEhqq8fL#Um0u0sj9iP zGIGxh&>pzHPzD!YpqG6^J9L%Q<~v&3Go8(hu*NZo*coP4Eu4PUsFm!$?P0aQlS*%E zuTsm6x28oJtqGD7C&9_Jj`jopGuv0MTS83~heLh^ET&_$ela5L?RDpfQJ`|biq3XIJ{%O?2*a+pV?DV>XuV9ry zN__D0KTpUJws|X5%a8N5$ws%h=%Qq9j5k5l3HtM&A)NNK+W#A;-N$A3W$%+UJ27K0 zOfV%TQCz5yh;Um_(|||E-mDjvEYStBSFcxGT%q;MulV5soISGXZ6OH}>%R1S#&_Gp zydd{M?qkPXc0(r19g+nVLHFb>fa0zUzZ0cx==iA#Y+cIcZ+)c(8@&Ox2dC%}Cn)Qx zkKJT5iYHjH+``O$9j)i*tDQfH``+d?7ayv*$lEn(!1r88*p-^$Dq@3r&Eo<8)qBs+ ziH_^xyM7%@P0SYiON--04kzP_C=WD6wHm+TcY7vpm8?f-KF{Dw%M>W7$LC(nx#nKQ zBCqKI^D^I6qbB2)c9oecylD`~aDkR3yhxXp5bkj`fN8k7X1&4OlXwYh0QggCHHOa* z*cRn~dyAkJ=)^_`^`)pBeM~a1nGksi;__k#Oqg>9P(BnhU9Ups325Xr4b6QV9O=y4 zA~voXK~+Dn&uT|6@wsQjP&DO2R(&-B;cqs4`oGxl!aRP@Pv!o@hEGm1`$>jJ<_$@5 zK(_pzk3Kh#jssIPllP>b!Krgmk;SEIGi8cuKVdP&_782Rc$UN6Ni z)?c)+>PuRPX!*MS;5GdS^Gb1PCatL0ze+*)@ihP9$KTwQfTsa|{J#4eti{3HOPZ}`Bn-)r$MqW2;c*uC`yxXisec+;b=|D^$JBc-w+keyJ+yBq>cpm8QI=gCTEry*;CCwk@ZHp>JmI8zZ!bmNWbJv9CJxz1;9@n9zM+J-Un0U6gA!OCr7ty%hyFElIUD-h$# zXvihPtcKan0>K)22;DNjHM$gC`h|5dWj;$p)}P?_K8kBW0W@VDt;0Qyjc5(PS+Ob}qd*M4Ft#aAk4y7||K8Mzy96$LpyIY7Ycj46 zaMtPbJug(;XRQkl86OlDAVJ+S>RlSY_d?kRh&{6xSDnYJHT2*)iEB+z*6W(Hl4}xx z1pCE;TIW{@z|S@EZi7%|CcYT8{em0smj^i>f?#aqx&9^dH{9QPm0C)WnzBf zmwJ2vr0}J5HReCU>pJQF|8QOGWAF6vdGuu%Wm&o2;^ z?h9IrzB~X5cF}@7^lw$Q1Z+3!2wj{=;Y3AaIZQ}(wR~*i$VAgVrY&xkOTO8Bg(nP0 zERj!DxOCnXxoF(H^m|U;#&9lc3Z=3gzKQ_l|3o+EYIhFd$tNFglBVx1=6V<1I(kDB z-}+ZlhmeSc+;9qe?+D&G^*$SJZ;+yTDYqR8B=^7N0o>X7@yX2Yc#}WyXBfan8Icqz zj(wD~{@_+e5W3d)ot7c_ zbfx>Kp1K>$(5Q04$iin-(+BlnW(6ZeI)?EjohZ3TdOP)J^6S%CEts?OGn;TC=Ch7E z|0LFppn{^xwP-|tOnY`XDN0*_PVt#i^rPFJ$)MiMRVLVvnn0EO8hyan_Vg10ecXea zTB5VW9ul?vTV}~ef~JRGqC}?CEvOsolB%uAeJXG3acF7H{8vw>@}RO8H&oW&N>4P; z4`FLWIi`b1q3#4=E<=f+Vc}q|!l7%?zcwzfT*t`-U}Y<}h;*6GtfODB5d%ojuu0)F zTggX+BWw9f9-tYJ4;$iV>UK~A+a6`iW4u&z86WQ*?__N0I2D`MwmGFNWA4wt2{K%| zAmB0+qEUCucx|eFUDwa+X48Ef`QbzGGPG|u3SNudu_4Q9!^qrXioMq&Lqkm-Ib5D9 zgyrh~_53*_1ti9=>h~>tDaI45J0F8`o~bRn?O^8wQm|Kn!QUFD>v10OrC&ev0-_D}8m zE9Zu|0NqlCYvf5o4tq-0ZCsU<-e>3f^wsQGDaHJJWPO^n5gx#@^6|RE<*{96BPC<@ zcKEEAWkbHAQ&VN3&e@^D1bBl1xK-{aJl`<#pDRqjRgIblC3>d4lOA=eW_sv;K>j2= z@7Yj(NRjgqC^H~sVXzF3Zp12!7!U7_=w#Z~XJYyW4B>#_6NfvZ{^Fz%SJ>sFJD;_Y zus`&%HjnNlu?Rw{B}6UMU{F#lXIM0^17mU1OeGhD%`M|4*MnpN7X+kEIM{McJ)vPz z8LIl^!kd!UkiU228$EP!*d-U3v0;TEiC7{U^a*0(lyutqD|$i&Orh)DR6V7%uc|;= z1F!$NIS@AdlG7hAmNx6Jc@_7mcP1r!JJOt~1)<5D?svs5@L0U-7v)!n0loI!M|7xQ^Tc@D}l3k_(+PQ?4JG zbEDc{^c{qOOXHu>>xy4hV$k5B&)$+z7JY!yPiZcsnP$p&LCUdPs48~P!PZha-U_@l zm>b695E1AmC^_+*>3`IZ9k{$KuiY4dB8KuS;C*vj@154l9#@M26{VlZzNdyET`Jad0k%d4-0hlt`0a%P3zDlSE}`Viz=)Sfe3`n=RTG1I{2?te&;;P>b1^4J6EI!su7PDF4YY` zw|0vl%f#K<@MXN*Q#w`lGDO}Cb17|Fsw&p=;mp{|WkTD_VmjG^XKhL_js98j9Qhjf zB;Z@Nnc3vyVco7d=}!UW&(_0C>rVCh}!4?aS9FB{d0>00*;`)E?hm%(B)l!FrNFOk33 zNeZ-7%S>*LzU3hlzL*n`uswMOeb~3`7DLEZw81HKWy-5xEn^>99hn45A>8EiB9u$F z&-BeAB4A6W)JeKyS5RbcMOm>SrD z^+m#}b_Ncxp-eiCSPs^OLPf_!N9Z+P1mR<)l}*L^xAMt0Bw9YXTXQOhxg$j=iNcLi z0+sE;tqxFl#ZT50!-JJE-v5Xg-*)8`y?KGY-D0TbvmaPH;mnfVBtt?x(r*U|7C#~9 zsX08Ft>pr}_Gyr3*6B-NuVPq(pI`Zv%ArXD{V98H^A_6t7imDZ2_Owns>y_ffDGry z@^&`KrU%zf=f9P7fw@ffYD~GTD0fXhKNh?wI2;NS&+QLIfkR&YK?2kB$CrL_c6snH zaP4+v7^rF16Q5Dj_fb2j^MW`$qI#wc4rjZKfnW5cP)+E!z9e9LgC(li5%7EMH?meK zYs*_&16$jcsO(V0^2^2jpw!gfN9q&e*3wl;;#|l z0+~wf`?|fDV6vDWl*2WU!B4}6ZosCjeTlFZFDKiy9zLcv;;ECY$1PEoT(!xV@_0k{XL>hg>YAUc(}Fcf`D)oZcoZQWZbR5WH1>j?*6AsJ5c&{c5i9^2Pgj|l`Y_>FFkfPn_i4j7 z7778}pUeKs=YIgF1Oy2H4gELg;oeUNB>-v4*n~)e|2_ahhqv&*o$sG9NO}(tqIJu3 zqW<@RcQd6h%r6=J)zm9vNY0kZ5dME2e062&GERX;vKtHxY51hnAYR`-7L*>P@h-&J zoE^WjM+v3Q9%-PlRvfu-P`}}1FJJP-3;MF-!d^mex>Rfb-8J;zQAWBWP~kV?hLTux z&9CAWwuZpfcKobY#zk4@E63j)euYFW7r7U(@82s#sWojG>d`c4ROrH9o=LwaiMTI$ zfi#&2am0H$2~cE!I&FZi@7WV8i0OkdznbXIKaNVh-{t=4$SC$hasayE_YR4@(0?l9 z+#7EG0&RXK#J^x|9d6a!E%Xcuf1n3Gd7Nyk;gXxMa>F-w?e1I3{#N$#LYUp~oWiLt zqJZyBW1dPZg77J{`H015hC;1Fs`uD?-wIz4L5lk)W0UbCWak*$SJ_PV)U^x4r?PCKW6-D$08l-(f`ak7wXT8#Vf| z!=D7bF{A*bhp1C4_-s2V?X_x?^SS2{a;Bus+j_XsO^&ACTs#2By(iv_9=&rTUk?p5 zE_rW#F|=V&BfCCeQB-o>D8>%dW{-{GYF|b?UwcbYWU_>@7;&IoWxvaucX@0jaMw3} zc?47mKmmknCM%z*Q78!9znIVs;QgKWP2HbdF7~rakm`zU*#X}sm2^8qi|a+IRRJ_` z-jgf_8EUkW`U=X|FB2?QfaE;ec!s)1wP|5Km9cG3{wZDM5IFtdy2}rIV%5r6{LU@+ zhCcUv#5<)PhHYrrc4P08>DWj9f*S|qaw4t{MmrO>KP-GE^GnX=fAjEx>_&abj6tVp z-9@3q2c{~vcu)I21lwFS^~nx0F_dcp#NQ6jm@1Hw{E5bg^u3e#Agc#t#j8Aa&{JJA zEGF3&Cm*pG_yB-MPt}`Pbq<1o2us#JHhPox^7l|T6Ae+uTF<)~6Hyt=Y%Wh3r9u6J z`m%F6xQ=3YmB^-Jx34qh@Z@fSiss3mQqJH^*L-|-bP7)CYOO7p)5j1C>RTdR_(pCF$yzu%H!;!=R>{wtyVGV~sNVzU|Q=9rMU2QuXfv(ajkV1)6yU(v}Jyc@1El z2F{ai`)A+LMl=7Z%^;&zEQbBbmZ|IEB1;Xyl7N9>(6@gfqYq~d2YPO&@P)9Vlfh0iK*-9mrsC@USuk=a8 z)?8(tsDcMEadPtDS4A5mualZh#1eZkZshzDk!v1z%QR~&PJ804xlVs*I42U=j1qk( z)^z4?)+UW?Y$Y!jX1gs>3>fl%`Y0<>OzoYiu79xjQXIWB#!@T7BgXjT;AI^t?BwK8 zAE`}FGc+KD(>jCHkygtcWb#z76|p#MVg2*1ou{dUw*moy8XsTo^k$_gv$*dGq=0IB zu0n%gIqU5aV2yIzaTQk=6ubL99{1!K^47L8dE(hRcQd7wsXjW@CaVR zLHrTw_M?K#*UAY*zr8n1KuB zn=~~>Q-RIyo9QKS5o{9?0m~x5gr_0XT=r4&iUbuMLi|IKy<=9V{sAL({uequ4NoCU6 zmhV_8i+pg0i05rEn^ay%QfF?N@2wnwfVVE3!5WH@tgaw6;9O1^sneNL9a3Rg>WQnwJv=Ua)V4@W(n; zhcAKs;7EzkJIDQo9#TMxJyStPUq&NIk1`LKnv#EcvgzIg5O8&=opn5;x<=L4gMe;w za+)5XB3HBis;v8YtgMiWH1oKo!!qwHJbxQhJr%&l_kQJ7!~gxKdnaNFsKw6dbu!5L z-PyfY5FZ2h3Z>@gxBmJw%CA57YH7&-8i&D zT^~lhAvHr*#Y4ztqlo}hQD?oLjCg0ed5p%xJGu1H$&qDY@ZCH^Z2se7&Afjz#AKdE zCPTMUuX?q$!DRL2`k1WAgJ2tlIW$5%YOl#JQEwip7K&vkzNo#!#R_l|#na$~BZTl#X)*&a;P zAgBplZ|^l{(X|mXi&$9oL-+@f0V5EYTGmr<=XLi2%O=j&ZMM1WaRJ zg||kc369X^F*iq^F3y&!KrZuqk6HcT#}0(^)hig6la<@co!A<{@G5A=p8SD_R~T5T zoOgk1THwx4VAt+~JP8jWCmdSboea5fS;t8WJ6 zHF(2gLpV@LzyvS0JZl-X1@JthdcjhEz}KUPJTA}C7>v5yoLL(W&FV#g<2+Gu6nYqH z^u0!@Pz9+!Bt6>4NQa;$pF4Xk?4i%c3%2VmC7P-7oD`c{N^1yiYNGX{Qi*X4^{+#ni8o)TpIOO1k}xJz8S{}i0RsR% zc%g4y;g1QpVG)txPe}l)rUX%C@sqsnU*4K}=RVOys2y$j(toK(J&V2+-?2C<4pOulDgATyoWbuM< zSi4=gpQS7vN~YK3XW`GaHa~V3jsp^4d%+iHHa;*COiKp`)d-Qij+gKXtxarbt@A>_ zH67WCS;*TvmUgEe_!{NeQCrYi{oj>#)yDF*DaW0pwO-qr1?7YJ)a!v6vEN8|J7^|+ zPgVuab&sxIQ#4%MU>8>Wkyi6nRk_q%Y*VwyAIIH_`< z`+S4tDO7vS7BGFAUY!q*u{^LB5N#|6pv)>jZbORf;LIt`sVUFIo2$1>06>Fs4@j%eSy zNMB^H@`yw~lLvi*PFC$XM(e1~m$B)i2b`K!+WiX;P(wWUW%|qhc0}Gv$pm-`CME*3 zHV1EOeRFK8CFD+d7hyN&r>nqq2+Z(e=^633Uz`|U=2jWxL>t-WM9=fXF-UqJ1t&DH z@>a%d^t#?(wLqfNq8(SE2K^m27Zg_3Fs>6fWfO9zZ8ilvfD5Hv3wuxoN?ON!4|t6{Du`tA)_VO@sPj zAHf3uvE*tAKCANA0sVt6 z5I>ymd1E8QttblW{Jgg851i4X?hQaE)sAXhzh?CllOP0&hjXF=^jZHix%g6Gat_uc zv432|`^kyJZcA(Q^^9CeqDzmb8E+ZF#F8fdPEEUU~xSqUEv z7CQX6kqRJ=d0_gt(wki})L7-_=&nKofj9}&ZGC!5h5m~W+|H}ooZc+uh3`$p>knOF z;qqjFN#^G@t84#flI@s4770~(^$&yU$LyQHkd(N(Uun(1_74Er0ht-X|JnoGWB0F( zje*?5QUY({_@BkX7Xbu_RVm@eB|LxiP5+SQK<=Z&!!% z8(^m1t{T)}LtgY2LxJrJ_4eQj0jz!z1F(8}>N{puKeSM68~<> z8`=$!RySm(zuSj&1u}y`U@GBj{~2q#w+=9C$RGL;BBr^pQ{-=uvaG!yD!XV`x|C+Wpv=NYVRX2Wa{67y+nJx(a-a?2E z?!yK+{S54n+5h_h80R(W|IGLQ8<5{$_qNcU(v@e`Ycc7SUjDx4PT->()KrgGP@K>Q;v{jNPj+ zu{}Zcqc<*GFqxofv6HdRY9x-GWLTe))P{mItZsfVnFkzmgvd;SJGdH6_moA=Ho=w` zO|>z|Us|>h8!m2K)RmZidIg0^@ySrkwZ~<3tfsB2=dAVw-vprjj=V4!NFjSug1mob zt_~OCEaX*)+aDw$-$V#E!B~;3IomGx2vqiMM{sdhgH;i{^MtQrEAapDdaqADQKDzU(eVLrRBe>8(Tcjuf=YE&&{tb$}db-iD95zJL} z(yzlOAdp`N|h-focvNQ4Mvpq0MPvCLh@%?#{mG2r_Ar zx9|`f!7W6{6rr$QcYe1gDcWkY>8QzEfP4nO>6b>G`dP>18R864w~`taVrP z&8!j(E43o#wI}iW=ffvilYvB~Z=>lyP3bw`RMepy#u*Qoum<{Ow>y(TimFSkCJGB5%0E^@vH z2oX1a@RlG~q@WitkC8(KVUyrx9$I=0n3)=d4t}xNxRe~`klk*T+kPt7bD=$cuyq+V zX1BZ3pqi+EmFn)fyJpCDaguz*Pqr1AYC->!L@sOC{?eJ1_o$}O zt8S>K9M(z-V!D(geTdN4vBI{vuH(bbf%;gGHS;FKv#9ceRQLHXl0}%YL!~iviSQ7W zxYU|H$MkcmFW?VdZHg5u$1VA3J6xXvpGcgEeLmWZU6CJ-ZO|e)zLpXvJZaF-*>YWt zs(SH=gAsRT{^)`%K+<<_T~b6L&1>jv!D z`~u9Xv$OQKuZ$z-hL@vubl0Q&oI};eb&mYr9&q7VKXw>{ovXPCHtwV>Vjy2bgGMAD zV1c?+z8np>DLD{BBrjqB*;dOGS1|KAl}qZSzi5{ko3L*-T23s*D$O-y5H}e$urtsIT|A|@Unh{;Y)98LG!`W#~ z=`XK!4)F>P(B4RySBpp>ok;dR>YF3EvM}D+aAPN_7%JX~oe4C2%ZoM=w>ng^(Q>L7 zS!#4OXC-bWTivzB24#5aaKn3?!-S->5KVC=COW;gT$n|9d+2L!F2=*3uw z1$qDltD@`uwtOTW`2Oc5CMf+aG;G{SY27kE+@C-^s+O$o~nvfv9j8(G`now6xk&oODZMtYKXfpX`__YLeTs!EC%L##`Hce6f7o4 zF9tZM(yupErDe}ypg7UV0)pYxsD>QzhOgv4^cv`wf=I`<%!Q*%__qnZoI= z#Amc2*?m=(8P}xhP{5v2nSGx1h3lO|(7TeeRKb)I-xC=3ena7}=MH^_2j%>Ed1Yif zWo)zNzMmK?*T!_GV%FG!o*($MBM%g+oPE*Fw-!Z$#mtuVo-g6b3Wz~&WTfeMi(7tL z*8?Ay$jIqrs>8K|7Sy4b<>=uL8G9lUyE{ zwmB9cB4Rl{MjrV((dkrrR=gRSmKpH! zSzmFzDvhgC($wBB=4-x@%HLJaQ8!ZMvroJuq4r{DRAlb!s_1v@&Y9~lJm-(L?`?QH z=(G$G1xz(}sLRn!D^R1xY#=rw`KzP{UQpw!BLfO5m}cUueK(8`l~Uc!+w({)W9LCu zces(g3N`TzB*ooAlHUw79ghZwwxU~2zZzPZAF#KD_uzRM*4u3Khl=*4mc5`%a`rFC zO7>uSTxuLGz<6KQ+EeOV_wH<33N+p-ZJ!umUoIf^<*5f|$joKgfVP31q82o4r8!#!YIkR>SC2R4V*V zUrNAOHN=s7gjCBZUS3X6oGfB`>}LC+T~HjsU}-%_xUQ(Kn8(6m@-Wv!|Hgf)b#5g_ zu`%q0WtA;P&$jB8gE%@>+qg~*SE;8(>2Vz``C8hG&ht5Tiq^@k5@Ak@!h$SIqQj8e z*Y5>64w|FfsVO<@pYrc+vs!QiL^P{QNy+E@N%@YYK~Y14ecOWPK@fd2zysONvE$s) zFyIC)=utR(FC0~7=^%Jy!*SJi^nU5{%XFkja~nE(+ZrMAwbW!zcNRJDh}Y^_tc&>%JIVmUwt4sn(z0&ufT%I$;ztT4TnRHV>c134L+cAQPbDSun9*S9jP=OK(xg zyd0zL?2m#?yAewxjp`yYA+<(aqTlh^q^C_9XE4v9t(G_` zIBaC|_JitMK?V)uGR( zms<r_1A`fTQC9{ont+=mb+-~r^3PbY! zW*tMiC=+-a^w?Sw*EpKBYhC+dh&S5G*1yLgMOSs$q~@O>r=9cfRFbK8dD4bJq{G$~ zX`payEcH0IoL3&%ryEdnhxQiosR)o|hG>I(l>p;2g?E_$Y~@_)?>__2oDng#YCff8 zQ$nmSq`@CQ=1rk|r6ssAVnlv70cmIEG01i!+$kgW@Bj7o{sf1}L^3O&g#N3$AAQ=s zd(fQ8KNY0$^#f4{(^;XOYz*ekN~*IfSp hgZ}@4mG-%VxfyY~aHbYKegJ&E5R(%vc&_LB{{UCzYRdos literal 0 HcmV?d00001 diff --git a/doc/source/_static/schemas/01_table_dataframe.svg b/doc/source/_static/schemas/01_table_dataframe.svg new file mode 100644 index 00000000..9bd1c217 --- /dev/null +++ b/doc/source/_static/schemas/01_table_dataframe.svg @@ -0,0 +1,262 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + column + DataFrame + + + row + + + diff --git a/doc/source/_static/schemas/01_table_series.svg b/doc/source/_static/schemas/01_table_series.svg new file mode 100644 index 00000000..d52c882f --- /dev/null +++ b/doc/source/_static/schemas/01_table_series.svg @@ -0,0 +1,127 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + Series + + + diff --git a/doc/source/_static/schemas/01_table_spreadsheet.png b/doc/source/_static/schemas/01_table_spreadsheet.png new file mode 100644 index 0000000000000000000000000000000000000000..b3cf5a0245b9cc955106b95bb718c039fa1dac74 GIT binary patch literal 46286 zcmXuK19T8{4*R+t_$Fw$1l`zQ6xl=bo7}-RE}C zoUX2V>Z$5*1vzm9SR7aY01zZ4M3evk+z0@`%%QQ2hGCQh#U4#t48nX{9Vv4dg2EDQh;0g@tuDsF3MSMw(myHd4xoP$-IHtKK-+uKZB<*q6lG;$8&}^sRBPJ zIZbw+7+vhG9s&!Mb~41L>S529dZfb|f(fXpJY3D!A6kXHACE3QLOs`T>d=d7W;hg{6J^Ty6$P-$fi8DQb z<}&r?T1K~}1B2~f3*lRB>|yeoOI2?9kXUrO=h~^Gf?AD9CigzyZV&9$`I@cp%te8? zvkiUh`qjiW`V<|G#e((L+l5PV3Vw zcmBmo8_(u1wJeyGKM=TaHsyvFmxSddBNx`?MOPP8NQi{9ranLWqd8oZ3BAlYnzq&T zje{aA<(5cLgh>VB3Dcu?s%vfd?rgq4CX&R+d;$8MgjlweX2$g@is_$fWx1C=j%G)& zMhDc>_FD6`Q+ynFZ#v{th5?aRyJuhaA;~Xh#f@C+Uyg`EMwxoe4;FkeM9{&Q+TGUQ ziZeWJt14ksZg6=F?@W^fR{l)$CQ7us(EYC7{efUG5`#}&6N}3(=Ll0)R<_=3jRqSG z^zJ*1#1dYfoRo`!|FuJJNV}G&kQ7_ICOHU{URvgH^0B$nVlNihDk=uXK!;j^6`mN5zaZAt=H%k z_5`9RPcjHI0vIlEthVe1LWC4i3V%uat+?dx9fjn)E9@Upy z!2wpJi_M0tl7i?of5hU_(dhNAm$$F=G3dk|VA}|AEe8IiEEiALxHY#$+8w#6eear@ z>&|$~x99QSFAM#S?3|GW?u3S`k-&(gZEe-<)YQI$_2PNby%b;p9}lyc=kvMEV0@{f z4|={u#)^QS9sZir>c@AOz;#Ws70Cav)eqM{=%qS=YN;X(Ug%=)KH=( z_oKw9AZVY$Ao5ybW$*#-xxDO`{S4DLk`3u}k%y$aIq=^_ZX+`OrDGIG z0A}PKjE4*X_{Aqh%C`1z95amq8lt*sZV@LBo@ZZ3{ z9rRy+zk&w#lgLzwS9T_(aE^+`z8Pnx$}lxLU(#>={lj-PIJT)ltO)$ar+nEl|X*V!u1Uj(ho083PvRJ!y!=#pN-<5 zBMBm0`U$GBS3mFNFn)gA%rpS-vAwn#Fh-Ia!@(oUlnYldx1nws&CrirDQ|c+<2kLe zaPmdJ;i06k8wpgzBd8~s?^KdYx9EpnK@`cf0VM|s-3wxs{A-vtt>H7 zyWFBN?aA(D<(T4II}dM+&)?d#cb-X|OoZRD0Ss`8N#J)*6w#Q8Nl8^&Epp}zV7ZNr zPVX=GWMpKGb%oFJg2~24iHaA=+Fy8=i+2?X}k>uzkL-$eQ&jzL@5R`c86PiP30~3dR8J)KfOxB#CcW@7&*hD4si;_Kgq;rN5gicDBMK>x^r6B?Qd_;uv4 z3ZR2JU67!SheQ=107~P6SR&`9n|mk#fDi?v6rH}+s(H>aC=e+t+M_54(En@?1v6v> z{PHF0^{GgnpS=Q@P1HSGadj9d%b1l@i$XJCz!Nj|6r?Pg2w?*90fFC%#h~r}frXV6 zmk48Wd|=_kT2q+3zxQS-@otfc%iDYGVwMs zIXQWuL|(7Qr%Qk+H=1mUA3-rLBUbqZe#K=j`||URXs6BPcyCOIl7wIU{FNY4h@_9| z7w)>ceViB5FKlFppT0mmDHw9~ZKjJZ#EtW~kZ%iDAbwsWr*l1yZ7_;uW8h8DwFTmH z^duDI+Wo)#;FIXNJxvu@#;(WO8lSKqtvGiDqee_E{pMd|eia~*OB&RRzGSS6F8iz% zwegzdEJC{{OTnl!d$O8RMBCcQDMktA%M^>4jGoKGuC2C3S0p^_-b z9+pOwP{g>e#hB?IESleS%CWX%JU_-}dlu~Lav04O;LdQzAh5!I=$Bo(3Vkadc(F6O zXl`G6658DHq@aJ2y*w;Foc(xBJFj3+<3T4Ul$ai`Cdz`}e4WllQ=yz|TCR$zcOfoU z_+{A{)|~uFwTKAzF8gP7)!5Y3cB@n4SgIPNPvePHE-o$>K0>(C={+-=5vZ|M>q;99 z(lj$BJCSg&Rfu_f52KEt(lAykv zjl*<{c>oH;^M)tm%?YLL+e>IUQga>8WO-0nFGAMw?gVpDF z{@zh3+edXXxi%C%?YPJ1&PQ~He0|e4TTHo|z1ikNzHXLQ>pITq@hF75P<(-EbULMj zK}+T2xPXn#8&KKJ8ML7{6;j-0Xgv;Wtjb4VSm&7S}z0qr^8VLP(l z8@B77YI}&@T#*$0=Z5nd=Z9%t#30?5*D6ocmcr9B%YNI0$OzM(d+hq#FLQ~^W0O6I zTM`Rz>wOU=G~A)V;!djb-ecBd?B&cxVrSi_We88Db)@?nJ1Cn4`5jK-F&BJyDYt}@ z0Q2s_mgPy%=Fc^jF0zUHz! ze%{X@LhsKoMXPnm=xC`NiVl>FuJb{3?leOtviXY3M_K?o$x!}j!&-~QDro=zvc z%uS}#YPMQ+xmhKVeNajnnT95CtXAp>yB=&(q&wtSpuFXE3CU4jd~xLffS)cNg_l>m zLy2z(jed%ztiKsP9Xn?VE4GigXChu51HLbJz9&vZP{3(5={>i{$fRtip1r>x!>-xA z#m*Lp$GYDhTt9p5Q{~L$u-od!67Z@(RU1yDHe5cgPye3IxlrmysW@-tlD(VrXw>+; z?-1uS%=K|c9kAV7Gx2B#L`*h*YgxazILsRbU@+ONli7`wX}R-$UCjr8dbJfB!>nXT zh}&gud*@FyGKt@$!8_kt5C@x23HK)#c?|JGtazEXFf0XV%?3g-ucH3EudE|Qxhz|C zXUK5Zn>i|8dZ)aw8-5`kb<-OB0`TyJLv}w$>8-DR|`?Z%uf7HAOr$^Xjo;b-9D4 z*o9K*iV3Z%KeydT%|F<$A0FgnWHQ-pw<0iT00DE3uC6Xz16s?L*qXD;T@gRHmQzZ>?B9#Hx{9WQl2{qE`Ln=odf56*t7Y9P>f zIs4m=Uq?s=nxvVK-_?p{Vg7buqy&-s!4WyW)p$0D`@9)H<4l%UkNYU@HYU1D7I!v( z+;%FOFK=bLn7N^|Qs9r((N5bOTiDb2by3BPDJpFgLkmnBD-$+u?w*x=wxiWsh;2mH2+k>z_Tv1F_i=BY~A zRRBo!FQ@_Yd-_MnO#lP86-#Izf4;zbwBJ2XWqJ@?4>sDLfEGgGDtCIi*i4_%92>+w!Rf#3nHqG3-pv2l<GP!P2;~UV4-1^0F$~MwWZGhozs6?868D z{L&lKYrO>Zun>W4Z|D6?gq9mS0a-2C%JwHJPm!93X-zvG1XF#|KS-;0Bee%Odu$Um zMS6X8rlN%$c zbtRB*YRuteGgxjPTBJ1C_dxUoKA$&xWb*SYcA{(&*B^Zpx6v>ZhD&DZgqOOO_=23b^OZ;!p!|J zpYGeS#6u~|HoM}sT=VP6lN=wnZX1PSW?8dy{1AHWTPE)j&$L|S^R?l+ zGJSy+@@HJJHX%Y66jL=-Tk*IJF1}j-O>+}qp0VqN*@6Vk$n#1Y2>pwqbIx_eHz7{i zLn5b(4ff1-@o#7}Em5ahO{STIF(D57`JW0oeosC=moPl99kWJ0aM<5rLjJVv^$yB5 zEBB#m8BA9J(zoQFKI^6$6o|lYx_k95mvSap;AW2au%>yvkxNs}5My-GP}xJ<001iP zGj|=+_0-$Rt2UdJ1G8o`ZyQG5JVxJV&9DHX`oe6>WqB6C{kjIWq6000Kh7IsUXHrk z$8h;-w!Dm4DUNO%qu!4lHD`s9>Ml?2mJGs(;5Si$XueyICxh}IbQDpR_74uPRj1si zW(NjOckf%yHlX8^wGAojw%fC!UvtlHHi=7iAc*InRMwsw z+_OxAPrqN8Pe$+QY1Xrqj1ZP^rLmy9ANmk7Gr^>-X}7@?RK)_CRaPIJ^(~KY7C1%W?Msr<9?myh(qS(FTLBzUs~mD$%$q( zrM#J*#%|(b`(<5t*AHs#YR-xLU#kpZ+kdThi{W(gI%kch(rWnU*{U%qIT=rO7Q{;= z4j)@SA$Smb=AL45tIs<-8@+7&ayHI8LW5gXn@(Vq_RkDC;AaF(?uU`mj$fLtP{%S0 z98)?kovzlPv1m{C_Wb$JANbf;oBkeU4pE_mCSp+_Jejwww2fWl_>gq8n11D?e$bRB z0Ef+sTeRB?z{NZ0zETWuGn+7#F9KUwgDkRYd)J&>IyhDq{0qSVB`~G`lOzRtqsuUc zDXp$*yH;nWogQY4pV?JZcN@)q0DmkIf52l06A&;V%x?SZy;rAIpi>t+GV++wyl^MNo&>&GW@#P&A{2Gf=27p%RIXc=r0b#@NwMVV{A;!w5^IIf> zEVVz(V7+M!?%~eZtK=Vxk@0fzDGu9`YzQ^)@UgYX!Vo~6U42p~RbpU{qu<0U zFPy4r3nHw=>i9O4z|BdYvQPWp6v+_)up=I)qb%;l^gG7i8drb)is+2pczp|cyO#u` zmeeLWE1)*|_*>)i(L}r=Jrp35(_AH6ZlM1nCI))(yUk5&1UG!4XhpzM-gR>wa-Z~w z%1{NT9RXAA zevJ1DEQ#lSc6ZR-(`JtelFq}boS%=ne{d~XYkQZ<4dVa9XwUo8`Kw9c6xqlpCt2hx3c;hhy$yeFF%D?>Bu#N8ylfOIbS_=LGJn+7@sCRvuUz2uVA)$47Sid?kowdBrL{PN( zMr0pv^&1Wy5Mmj*n4v6p3zrsCiA@O$(Yf$?(CzU|-<;vKf&(cZ-a>5r03g{P$iC(6+U5Y{YFvWRsa?1$ z@eFO|{Veqw8o=wmFch1-8(n`Tz;H(6j}M);4KYurO1(V_Tru@v0swS*%;pS8V7`R? za=#=|?#YG%0CdBT_HQKZN;R^;^SROsfM0864TqWhdZXPfOLpNyK*zZ6J=x$ZSO zyGvl^NiB#d1M@1^poa%>=o0?nV3;$$pCss3t0{xO&@oq2#T6-v^zXv(kw600>!sB!ftOM>2~_|kf&>h-RsNYxw9I6a@ocf0XS|=i zH@s7#Iug_xNE0d-4p&y;0jabm2^i;UmK^1dc_y^*Avf5@zhq2)IxwLZNT33~ZMQ48 z9?}nZ@OybTo^XaSJ8RP#EYX<(+?+-_> zeA*f&KFv(lRbSRE7}E{0R7Y+*`)7Qxen5xGh|-lyH2nO^Ucb!|@^108bjcJ_e?tr% zp+E`=K);j;k-#U{%PQy;ebcQ9bU@l_uhzok>Hc{danD`7jd}=$@My77Ao3gigwtXq z{2wMOZbbT!EDCG@xBvhRDF-&?i@Xn1I@~qAG}0d`NwbHXug_EL_FrS|lLCHGIhm?5 z7Bx&GG_}v6q4JREfX2&_kCL@`@{~zH5Pd=pfDKLApsdns$!5m+bRoRELyX|U&nj_a=2Um(T zG4;>-X@Nize}uQTptn*K*}OIr^W$~&2&aLulH)dM5{`xjBbdIUdjI%1HF2utWp^i> z<1#tKLnPa0y_5s^I+PDb-K8WI3_wIDA@eVR=+G2{X!!&GCF;f8iX$yirBHGIy90ra zXQEEkSO=;h4crm}HAShn!uWMptTZT%X#SD$=!ME5cpjh}bNPHjLWGB?rLI`^lL=*i z>@bC{$AH<&ji$L)$zbPiudqLLk1y|svZ^np91IOvaSph4B$k;LgA=%>dk?oSy$&ML zr`yLOT1KscvIs?P)6Dv}mY8n!o|L<+tRPHw3=LenN#j#uqEk(W; z&-lLALIL+Ey=TI2u!y#?hao|0OP9CDP|Y`PxSlOh;6#x&+g@b~x>maxZVyiBcmf~S zXW`<_RYI~TfXgqM%N6eCo=5ZZzH6)VTc7oql`ZRl)o4#EXQA(!q=Xv}xgr`H?hM-) zi&8Cb$7#xhg+M(Lg?!JoC0cK>^^9*!G%hSpTE8%P zb+Lh=$Kw9P+2zNCYuY$xh5%`?TG#OUuwC!*hW)lb3vGePVk>$4!at0F3>uv0@$!Jy zi49U@hJUttFU>xV0xdOL5D_Cq>dSkp>_0Z;`o_pewoNJuCL)E?BPzo)MfCHRiJSPQ z4Dm-aeeCcn9;j)(9fOCZUKJTvul-hk)4RYC+pHzInt#Sdn};bm4apxHpRED8QCc$E z%#&^YI^Ru3XYEsxW$-^oxBQGtFV*)E8Q~^9C zge$_|{zah~eqjOLI_|d`oQ!f3UWgN1XD)M1SBpYlL#o)`2kyIZCQsX(tdAp{2ijqo zqQJk*9r>SjJ+|?Ui7>B4~DM%4ie4G{%)muKKoEbRZhyJJbEtpSKx%9Qx7fD@frj zYh^UtriDZe%@Uh!&AR(oHtB-P%%cj`>h7jE{(da$%WeImTrOsKakG5FF0Dp8JZ2b_WCdRUz7`Y=4`j3p((%GoW4R#JiGcb2Tk5ZHy4);flt<9S zU$hy$F_&xzLEx}4EVV2$j71rb?4cV=hPd|MYjK@x`ot z(`rv7q_My#-yRLcY65BAL;kb0XJgqQBP_i4NagyD`SVa|ca8Me%@+Q`r!URu|6dF6 zB&0i{_lYecum15>Qby+P;UOE8wVO}Ut}_@Kj>bhpL;D%qGK*Wbfb#)|j=ZJEV;B0K z{N+2I0$1b_*TdzCf{%IB;}Ai3M|;B&?aQTk>86V-^@|&b@2hd%pU&czH69xn80hJC z`}}w(Kcj85SUNe4yuQBPuJ%O5!71l)yV)D-DOo_r#(ugV!eY>UTjz3R-Lg12=(uo< ztr`1%b9MFfVxXinzn!Lz1xi({C@U-XC`QT2$+60_Ff)H-C$6opG@w#)bH8pYyjNCM zs^~IMQs&|_;H)(ivKBUkJ@A|9VcM$g&`Re9AdFJ7d5iLpP@U`USuSr%p?P*jCV7 z&pD!bewDPd3tz~UUvo0j=9V%-LW(7MClGMiD3OZgbL0-;rugNI$&O9}f+4feK!bjQ z8%lcJ--8RCKf+^neLU}td}*S8`37l=$oXrW?ejx_ri2U$tXFe{o{i++cIHj=?I%b3 zEqYns2(AJ=E>HDmqj5sIsy~fd_2Mfg9iV`YfreIavs}bqbM~iWWq6!qb_LF`bgM3* zH#}&?*6|i@BpPqE%W{$(H9v&EM%SU%_3q|I742ZZ0_*v?O^3_1!dd57x!Wu0>SRhf zfQXWeRO6mOlo%QZ9T^>UL#RldTv^9>dSc=`H246crRAw_IckKIBWg{}jF){Machn3 z9X$RQ>2}E%?s^@m%yzvi>Mjy8c?h6-S(~3i_meGI8?m@8uJJBPahKxH>KegXhnD;P z%~FLW`%*Xe^$`&7HlS!swO$c7-_s8XvlCPbLJg+u;t;o?4EzewSSoU#47A{@EAa z_+_Smf4RMt;XU^9@}6t<;7z5)-H;Fwc{hJU>in_Z1Jm{3zMul)+C9<|Dsq#{qqV{| zgIwl$E~~^uK6}cDIh6Jipga5$KuO4WRVj?p1UomQz%XP-zv)})xVBhrs;AVwt#QSZmU)re5o7#N1bQDuCaO_nJy z3*_O(kGFI+9L5`_)&y~aBogJ$|#=N@fjD}n$2MTbK(@*M01Z`-zn_fL*mQqG4h}g5> zXnV@CE)**lMo2p>syP$V?cv2*`13~ulSF*3L;xvFckvv0H6UZ(0t`VAgR zbm`*{s_#0R?n?FZ>*E(c{aAv+Hg_Gx1oWZZ;~q&bG*na`$K@HJ-*H5ecob4nQgn1x zQWN9w2nZ`xy3gd-(Z7E)Nh(tB{|HBB9TNe<&(NLU51dIc?49h&o!QCAG^i6?&XDUo zZ9VA5{*>r;%g}%Id+XOOj3|%KryTE}lq!GNEU`#)cXuBi9v&y0Fsjeb&mSL`+NW#N zwlOgoHfcy=URYRYYEH4Z5hQD~wGAG@LpB}Zb_>$odbA8Y`BSP`aMIpnIzvFmS}5T& zUqkZY{Cy-mWJ>JgJ-M>Bb~bQ$c8wSSAwruMiroXZ7Ya?U%LnndM!hI|Hx z*T~*7OY?RXE|ASBndlF;T7_GC@+L+o8Q)^NZH|KawzG74%5J#1xlul{m(45R`pLL8 z_{VJUN}q?IMi7Tu>&w0#gy0G9Qng|^nYeqMb|C-=(>Iu;4i73_HV7z^s-;LHL&WT& zi{f*7l(rU{^|yvYL3P6HlG|*z-S_r+3qv6(n=?@P{W;Va57azaLVvB|nE++-mmAZa zi;=(LjU@5jS%3_!c$-Wg##0!#_hS=x1KrtCc}kC&?XVpraDV;I0!U1!lpOq|Ea>QQ z8am8VM6Y8r`g3FV+05%Ll;0E?MpUk(sQO?&MfTDSJ+ZBn*f#0D7c$9mZCt|XK`I%i zm(e(UYc6vgnOPYXnUIt`esi0&Ux|h(CP{kVYGWX2Y%JO)fQ*fP^|!A8Q=d46K?hB` z&>%c&EM7%bHEa;I;H7;+mPDkfjRaayR71sR@g7(08}`8TMFYN+v~(4+s+dazEdzt# z$p<=~Fi40p0;sTXV)+{V6baZrJ zegpsBvZS{ta}^>~RQCMp6szRvSEQRnW#r}cy1h69`_#KUn*99*zc&)YZOI%uMDuv` zczO(L!l9-ynq8lvGc`%47F+Gl50mE&7N=;byq5?sT#jSk${5$~rT_bWrE z-?>Dm9*ozlx9BaGGigx&U2dZ}9xjue?4Pa-6_F{E%l@kWWB+&|i_M;VZ;lU&n@zP^ ztlTeliZn|* zWA1m3#zEsdP4eEKg{r%1XWRE%!AUp+B;vb)bq9W`75xR;~Nk7 z5qVgzDX`m=hNtIK{+7@tt~5G5+KB0h-5s3Da0Nt3NVtMRAixEJYn4b*X$%z$BzJ=s zowG_xa_*THmWSD>>u;A%)sP|T>g%EWCS$)D?~|cqLS z9+pdo$n*;RRtCE*xPYBqZFY7xIjTx%-e~_|POAsc2xd~L@sx|@(CZ}i=rcPlIuH9_6p z))SC|&;x(vGSl%|Q9Px^)z}$(_2^T!eL?y^5Nf^NsHmD(hDPD$;f4%hf3{Z^5dlk| zQqdN|iW{VP9$0Uvb6V|UtRh``le<<@>0ZZFxA;m69ZV8W8Z3k&h%7lKDJUq&?Rq{r zIVqw@%EIEfa-jptIm_qpo#Pbr^WNJtZZI>c7}K&2`OwVNj~m3|s>zF?0DK2g0Gu8LZ0a7$|@h zH}~Wpb#JWLF{7|S1E`R94r7B&-GbCHqaP9y=D}}Q(UV^uCy+MeuUwe%P|fvf;@ZvT z@{KpLw8X^3V1s$M{-+3x@Cjo^MPy`ITw+&n4*L$z38ph#&PRn?4TFql!}qw*0YrY_ z-~xh&^jTm6{=3|+wpFfO@>5nn*x1PVt5D0k$uJ|6e4U-0dzyZ_dAmQqc6dT-A7Ia7 z0914|4fRd(-OU`&YX*sZ4%Y8ozB8gY5vb4*FfcH{Y@TrFaB1SB2WBXO95GG81^k3G zCn5kjmeGwTFMdx)B8%6(!eYCvK%q_ss*0pOWs{7T@-#PJ9h}I@rr1xbk4}i+YKCsQ zxihn|H4W8=4;Jb(V#bGumi&qodQhF6&8Ie<{loKhxl1FfpiimnPVntJ-pGhtW+h%| ztNUq9vvs+uN@{8<;5Yv#1@7cWGWF_fK#S{6c)ImTN(t0!cjtOXOETRIC@ipy_zy~$ z_%U4i(1?(rh$@o&h6YH!lU#21oSvTQDWt{{m1YyH^z{*QQc-c0I~oqM)zwuJI4Q~b zFY-g(k2T!=DamIhi>AceL3Y151zcS8WVn{^Ww~z>>QqqikTq9&e@kVt z#uSY!to~8|LPYd~JMnrjC~teaUT3K_?rwMRQM?P46A1@JT1TV2F_BafTr??@FRR5Q zy1J(3It+zWQYtb6I>O-fg?inIHQ2z)YUlXh@&3Px;z1r>p6F=VMKdL73H@-ho8bhe ze>MfLA>qame@Zmq9?8YpmcAEOpK@`v6I|#b?d}d}1|?x)fxH5b$6IN3w{C1TZ)D^R znbL9KlN+I}9zMPe$Wow*NzI@N*H!Cu7*K}fQwHRsi~c4B16qZcT<#8IgsSfOmYD7F zM(nj$S4EITKm}sd-RWL$PuHhu4Vq~F&a&Sum?K_4wz9#L*;yE&BzH*jh+u-dL|=Pn z+0gUiq>I#IYOXKJk7>oUwKE;+;6Stl2;s4#k&m9l>_g1;W zd3i9ZciisMdfn8ucx8_a4FT@&H?q6>=g(r3moRxkT54*i?`Jot%s^HhcI?>w{EW_y z>ff&+CT8ZvrKKXu`p!<7-#L|!PSO;gY?n`a`*%N*>DBs&&{ zNn6FR}kWdg{0{xMu&;fRB8>3-IpkI4Xrxe4w zCks9xCdT#5`E7};S+H3ljnY`tf}rAxw!$(mJ^0I&`7{Wl!G>BYUw{c5PW@Z@iZ1#M z8ynt+&BFJ_C5eA#db)brj8)H<3>uV~P(SeY@^v^AYO8!urR{5u`<)%ZguONrkN_&J zBh3>4qr{&LXKH$CI1==|B?1CSARj06*25LIaODi=^NW4I0{O0&H$IQ@RyqWRU}4x>%}+ z!}lF0>T?1+Ogj}toU*W0ND6e(&gc2d={)OI8$>$x6^X#F9I2TdJx1d&==8hy`i!Ud zr;JZ=gl#Izs@17UOm?4t&CVqZ>;<-Z^U8%iB@GP->{OJNQqt3Zc(8p1KyZ(amKGKc zuBSOMnif)tIEiKtJ8;UVPMsDrfCPY^mPaAL-r4385|x;yS4dhZ%_yFeLnh zD@i5e=ducthEYc_&=t@n!O-l2cb2Qx6-GRA+7D=xO3+0?G2~KXiiwE{sif*_Zqw7h zP{N1H?au)uelx`lR(nE+CpmgyA`3m@Qjl2*h=ASaM;Gne^+H_kYB5=$1K|QX28P3H zHyfP0|5_TU%XhQmg6mXhOjo$uru)Q6Soub*dz0h2%b=)FcWPt|d0QA?@g$$2zu=f$CM2qqQ7J|fiH zV&xTtMnJ;84RCu0IaUmoVWp_0*u|5+dv$*2cCL&WS{Qf_)fbkHing+{3PJidPhB~5 z9a^C?0IOK-_LyMKADT1Uo^7Ut#jk~qqJG4VV5XLqA5tZmM$#M$4nT%Q1@PKa(;-i3uw0Ik z;ieBpa{lq(EVF2&`4lP8J4P%iLoIM&vSx8h+R%gjnk`S-4}yV?{JEK!Kq&=KQqV5B zB|`@!CZ$9U#q;sz%V(`6D1LwgPS`vw)q!B^@PNzts?WoYv-sE7`kR~fkj`#QW=FCf z8OjNUB=roY3DFw^|I8^76o6G|QMP<&#B?%)VtNb=5ccM6Ad56Gw2dPjGh}u)o%B#t z1Ov3Sv_g$1qE75$6OUI`c9)!6v{tGWwmoucQlu#@)5oze7Wdq{kiy7w@dBGah?(1D z=u^gw>dU{hY11eCgXp$ABtY2TXmk7utTmbkzI~IXw5zmL1Bj|Oq4JZ5iJ?!h z-?UB&R%30g*qGR5KasU2)a#w9Xmv)4H`@08fO%f;HCL#{3QI{vfZ$MW41(|N?d>;s zF(04qf`S4M>n56D0rrVM+!_ZV%T#6gl^aUlqEz-)h2Eji_`D*ty|-xmMn>TbqdG!b z>J_$G9`{GOr&2lDyyZ}e>8m-Oj~Rn+=YLZ58pgLaeeJ~IcrD$dPvn~@A@>7wy^>Eg z|0;W{O*P~?FQ{2|HXexbRucped1!ZePzwYNG&{nKe~}%u;DI_&$f`~sX*#40MZ{mr z<&^Fv_V}43tl_O-O`Fu$I4KJ8?T1G;eM`B0qBOOr@pJCM61equCHkwmM}uaI&4`K# z;|2~HavDu~1!PX|Vxz ze7`Xj)#Y|lczm9#KjF#%QOZQtnGO@LXj@@iAvE#c(Q)lr6wjUmCtzfJNCM65bS04F z(2cos`anz9F(|-4l4whsj3T2HpE*53Q_lihe~PN4#w=Z>t%FS{+ zb=FpUg#|7ejmcokX_srS@h#5C-2Av=wPFAEAkFFgr76zEdVMDSHo-yk>2S&uEyWXN4p+{=QZq7WcqhmiE*R3)a;4`?=;3G%p!>AXh{GXDCt_-rZBB2jJwV-ds+!7$_1 z#7A`+-z2x0jfc6RV#(4x)O~FfcJK_@38hm5y-# zd9pnu!RU`K>u0Uy9Q|q=W@VNVM;Zj@`P9*rYX6&gQSbWgu{~F23Zpf-{a$}r(sUOy z&?{L%0Nv#< zeLRhQXOOQsN#aD4^R+9B`g2KmYsK}s-W(|UPU_`8}7$eu| zBRSE=_jCUSLM=6?xID>nR0otGHd1{-h9qTt*zD|<4@_F%y;X?IdkW}9gs3|^mwI0NQo}d<#SFSfudizN zn0NOuekTZ9C-Ji#o{t1fOz)xa2@-|-sNHqdNr81O3aW^JgOHR`p$G}2-}l!?14>QD zzC;h=9@&c(%NJrqv|DjIs1P36=^o_An4dR;bF1AY)P}_&@n{qQUlFKVT^auEO9z2y zv;24^CCw%?1n77P)ZwBcwHoa_%JI3SrGX)``#s5|VP4@ixy# zdH@0f;>wNZXdZ& zWwphqDnK#-10ALT(T;DFT%Yd4w|sO_E>@dnCMI||<#Wvn*BT`xnyxg|RN9YrKxmnz zr8yx6*PH%y-P5c52;^n`ejB^Qfc_R4E7l67c*ZGLmGL~z;>}{z{@+H&Cd{YBO!#H4 zWUf%*i($yGp+XvgTD7Yk{N)!SmPCXsEXv0XF|vz_ z-@|aSjTd*QL+f91&3UAOdg7Wvw)qD12oWH6pxVQ1-vMNrOC*#i(e-;F0VP2%xS4vK zg;ea$dgjl&V++IZZ@39SL_ISCS!-)+s)Mz0D>ibWwvySH<2b!MHQHfEYwJckTC7Dg zf_M{h_BYG*9aT?xW4bMOTrfbfoA&e}K(ClVtJC0kzUcOFW`!p$phVso9)6zB%Z}5I z$o=Y9o3>m$sY%|(Cae+jx0u}v@z`RYPw!i~ENTNo*0DX0l~=PJ4Qh1QeRD;{-}4QP zrSlO1^3tXm&8-!#zc{^~YV&@N-G#}G=yg3bG<&2b$ttQyYk_!kPGGiwen;QIp{ouq zxIcwK4_sg(nJ&p6C0>PVG)P9S?7i(kQW$xBZTU}LS_9%2eG)Ok1=x$tam-ydo`z)2 z!&neiXK&AxGSO69E1-CXYYnEDqOONEF@#od=J3GrEcZ9wJwG5Iz}390Y^Va_dp*QV zCFfAshSO0mpETA-yTcTht81*s_j;BqsR`e88l!+RA08g6^yX+*E4-V#Xz#pi(oxhZ}xi8KzMo#k^a$#@4lJ8Tw@ z$!2^lUfxbSyp@IBe9CgQ9MiO?x}DK1Z2)_Idbs0Tjc%Q+Qklh!1|l9uZMODDBhJHq z;$%Cy=SfyWr8ot-G<6bFYNdxTH99kSLV*sg6}vjrSL zlk=9>hR7NW!Tt5?3_^7}0XFBG!P|&&q*|+I>t?ri2fbk#Dla>`zre4hN{#Va{fFvm zNHVfvMa6L~@+|H{EKDwUkK$--{qsE)UES&XyG0R`u2b(t`K84h+NsIO@=d8H2a4L_ zBCd$H%F18f)vST`lglrtpaVUfZD{@_X&J6Nw~7#wzkcD#Y`Sc*m`+!^w}YZxR?B6g zy2rTNmD17X-X+m#y)Qtysnf2r>ge@ZXa0CTy5QVx#17W*@_7|cF^)zvn8@oR?f;Hi zAb=5(kqv>u$Yj%JcH|=qi(Y0n=nRpcmBcQ`MI<|~eDV}NS9L3$f`xp2@ zxhI+k5v23+dTHux3-tN!8K%`)*Pf}49AxOk(8ETIm~^_NNm8V!;e4TA{Qh!OPwdt@ zYcQup`7`wlv0;vf3c6eIn#Gaw=NKA(0bX=2~uR|SVuv3(2W#(v0+}aFMVurKL1;= z5mMx_Bk*3f%>A^OsJs+jR`jC|L!OMMyF8 zpf&XhL7u+f^d7{ur+-|mx7a3$m&j$6o3ox8OoI#$`~P1HkW}`_Y_meczEL+&o+_(3 zZK0CP*H$M^Nm<5DIx|bl;Sf~aUFU+*<^E!G{q)oSUFUS|-`R7x5g@=+xl&vG-2~Bk z?eKZ4%hS1QSf!%B$$DK?){npWvX$_&w7SDuGej1Lzj{kOT_j8jRCDuM1>#?&$QPXL z7;{*}`APR~)pu?WgHM+w7Zy<1)>}>GC(IXe#X{6IJ>p4I)0Z*bzdv|bogpNhdNabG zU@2>mDJu6Aqs#WPeUFX!sv-f`XaCG4D2BZ4~qOaD_9`#zj1q!`MVKDxZD zet+Mti8gw5^yO!P0}X?>UawL1h(@2+gOsPsmCkY)cW>*4a3m;?pt1Eq)smLAyam})&ztKM_^+a?_*=^VQ#_XC#UrhWm(No4zd|v^ z!Dq1z+4=vnBHm{O5QhF&S;i7--f1}WjjO!xD+~sGO-04?(@7^b?>`=gPkqeEdhL_b z#d3c z?;O6z7j!r)8|$kqde7A=p9_+TPG6}Klfe^dsbcwkH-decxiQZgai~1U_b{Gz4y`de zV&j*C3c>y^t|)*a;CvV1g_J?m{DQ)iKoU?mz#)eaO6MhCr;fw*&O~U=Ih^6b$iqkhCF#Cn$h9m+I(^+`W$(w-%4`2<1LMC*__64|Pp+|%9R9af z$)s*WztWt(5+RP$kVU0j+y%2M=WgL{oBP-x??Fi7aXAr^oyKM{%F*MK^btJ7_S9)Q znyk zJdVua-I@sWZ^Vffl;z|gsi@g(CYx-cUMnxS%ws#HUXqQ`m}_7DN(%y{=;_tLsgv zF`>p=tk}JQc}R7u{li@rEEp1KdO55F?&|FCb#?Ue`PSyv%bVksoyoyM^!yIbV+_t_)$!8AiJwg- zKjh`@4f^v;O^@8Kd1YpwB?==-J1gVMK`R*e`s!6aqqExK7W=w6IXTPkdVEn#Ax}d? zgT?_5;NrrC&sHs4bN5{ir>aiD>$v6P)J~YJPnmE8?4C6}xNwG&6_ID|41YhHiYz{( zn+|2B2#3S_9-E<9cOKrx_Biy-xBfUhF;Ou~#LC*3+v`^cc_OptyGpAQan_;t3S8*y z$^FZ_mbwc5mjKzVU?4TCiWXi}k>5^@o$Y(Pa@iMvV8U6bg&IL$A)-P%VVFlEuDwyI zyn=^jV?m3CzOHU?i(OeOyN4w<|gD17dVGSP#pKW*!nakkw_&8`xxD zwD0iromJ*{cPt5difkXyu0r*556o7~7P^;t@iPAkg7(wZ@8O4J66nrao}N~-+psXP z2q^wqUqAaSjy@wxAni06mQpr85a7QKNw?SQmBZzClW{O8C=hbE8EXuW&kz4bwe2rq zztvQlxo)ppR#rNz-7vQ?#LCswn+F2>jEbO=vK|A$6N>Nt zC65~zjAhFN=D{n|rm;!NQ9~^Zt_${-S`w^@3MT9n;i}i^bUGbPlqKA6Or-XIcpv(} z3=R&85G%^^H>H_J5<+E9`~Cf+C`^_NQOTm%Y(AsB(TQ5!D7-9$z36F$OmwCsm3Yi1 z1`+usphM}=|036uM`ep(bT-%bqWi)+jWY7WG?vnoPUWuULo5IBsIq*a@A~borJPu_ zJETxW33WniJWC>NfYJH-MIU_;Ef;H)Yw>Qrz* zLK4cbBP>9mEXm(c$Unb1htHSm&pN8s)&ZQ{1FtmTCP~77#dl9r^hl_fEsuYl=tLQoP*!?F|s|?wFX>(b}%pX>d)A{342cA%3t%j@5|BR5UqL)wbEZ~Nx z)!ve9nA0?Ck{&SraV6mhlhfS_vR?T|XW=~Knu;v(lW?q@X6jDNsiKe*7xVow; zQ&klNs8@pCN)>-=<|wZ^5z9sKwu(zg(1|>I+OT;I7qwVv%mj`ZcBliguApG<-P+F1 z&fBQdYjJxy`_ZkU(pD^Ww7dmlx%=t5ti0Uq4>7Ci!|j32diT0JxaH#1oHqMTzrFW- zY*kcNbhP#ex-_vCMU6GxaYvK@hLt>~0_rd$EeQ$85K!Zt2}|yy=}5FHDYx(QIYK zVf!7AXu^WJBJhLS^AQ6^ty6{e{@RFRGYkou@s(`JH1#C@|N-Ef`KsTGeZtNb!~NRST$jsQ|nsj)n#F5$WF=8A`iI&>xakFi237sP(e!! z5{Av3ApJ9K)~wM=R9Hw)Z?~M!uR*t_^8Bhg&o>GtjZfI(rVl+aHfHe(j4vrGgU91) znG8PUf26Ih%CLp^bE8gBTMAz@B;)}V%NLqc4V3>9w4R)#5m2p#6|n9vSZjK!td)h=sVCV@8aGF60g5@?y6O_oO;n%TlO%d;dXe<}0{ zL?8hbukWTWvjY^O)sWF&1lU{}QUw5n)bL96UTLG?1(p4BcsxM0F!i(MQTcpkyHDfZ zr{l^K44#q=&wYfw3iIYlmF^N@9n=c%uwDOs+bQ~A&C?wDT;6JC z0TA77T)e!sH9dFd?RQmHYQDfE5#lwitbGnW{?GB@!#4y1>dDD{%vUZdQU8ojLfhK1 zY=&<-ST)7EAUjS$zKKur{d`;<(+O~YGN*%zgQS;V{6+1u?-R&_enW5o9nFS8?@YPY;2TKNugxS1X~ME znX-iUsv(z%nB9fbHerStU0t9Azpk`2wfp%M^3tXGc}oxO(&Qf^IFS6AQ&M_OG*1Fr zuJR|-nscE79!t+NGn(7I&KN_Cs0XMRe#xc3p$%=6llP>enDqNqMa9H0FhYL3q(X@( z9kri53hk1$yG!^ILPJBZt*uS-w_j=gJ~%m9>BS?&9f}Y}*YSSl&pc)hZ3O_JTOE0! zx!sy~unL(K<;5eAt*&L2c~`Hjst6XUs1%4Jml7nukHyD&M}@SNR)c=>lRE;3m;apI zI$iQMx^nyqY`*Ssi-?Yj`o6ol zXTsfaq)B`K<d+uLA;ZSnmX4Zu!O-XVJd#MfkQxaloWNvsmjuBXTJ3qY17uA zfvstza5P;fhV)f@#ZkfjLKJAR>1=SVI_r2YZm2FB9S@E{cPSe#C} z+9eG<-8+Npw>J#ck?C2E-XL!aS*bN2Sad)^aq;5Ae6bwp5v~G>F3tH+fjE5rDKdCx z^Vw@5$X$jv)(v`EaE`bG4^hhpfmqxnJXh$FGa5+sKc>ZAF+sBL+g>)D1ylhD% z4a(YlmMI!QTvJ2VP}C!{uZ4e!1O#QZYoZ#0)RC%>67{;sw9^09@+9XPIFZgo;N2{opxB3 zxk+&%{OH|UX+ zQdCxL!nM3>ZTYKgw|xPe?!rKKV`Y5)+c@-KuiX}zmuII2%F?I`%DDjGLYf{OX%g@q z9bw2hI6u1J2@@jH>wQ1I@}XB3hfnj&$tWx+qAxDdRaDj1&{9lKAFrz>w)`_$2`~8L z-PUST9=-HT%O(@JZTjiW-E(Q~r0DJc%=j*7re>C%uA;h(iEtFC{9v zZ21>+JJ{>y#?QMshI)cpF3z~SGX;qXe26znUTzLO`N(I|+MbDljqQ=57mh2d`B0Iz z6H-Z0@$){=fnc)MUj*ZrHvR2l1T3z|LQ?X!IHB|oTB>#GM5t(;@-4fj20S@(_Tiyi z`8gs4pNG8K4>Gc*h88}2@%`St(dHF|sFogAjV~8lzFnpQs|ij@G%llT+uFZ+uKt{@ zt_dr{P>tfS>3>cq4g{3t^(p;*l7gv)M*jY#=$n6j&zu%565D@G{8K9%CUCr(>S7(o#rRh>H|0h#2d~yf=7Mk!;|SJlF4~rA=tK%M^{U%^uE? zweO?Ezy(7Xjy!Quj;wlGn*zaxK*_jAZoEKzBPG1;)(n{kI>T!~2nii~} zl9CdT0+E%K{aAT__wjrQo3_5YyF*1qJ zAVh>zQW_2swZzoa%mSgQJesl~XT2t7l(hy1OR@LG)#ev1+C{~LethWE1k()CJFq&ovdC#862Lsv-`E!9eB2P8k^S0#B~lC zrmJS>-jFJ6?X%_pj?toueqUc%qBGqS3xD&Cvzd_9&HU-1Jj0>DVbwYa$Bj*&AjyB|q= zJQodf;Qof!>2yJqZN=AUxBG2%J@`8)00Ka<2ygkjLB^P2qtNZ1BMvXvG#?y!{{jwE zH&wYi#MyZc-K_D%sV?WV%s0Vj{{fw#qQ%o`aq;!{&97X`|Gs6PFU{p?PkF(yNvOei zWP22}QmAM}F)mLjn(nJm;d3AM@5M!w_qK59_r7;}_-hAN+C+&Qwq^JACAMu73+JEB zyh9YSIgFM@(X!Q!xI0|~{mOOH9`|DE(y1cEi4vgSrHe0(AVNv@>xaK>486&HTYbUu z{*uPD;Nt>b~qn=d>Lc~`;LY`xGEHZH{veCXY>Ha6$;Gm$aYVs18cluG6cNtK6 zaqL@PU>Daql|8p#vTsD)mA&n;`~rc^q0#X~YPDLO4JU61A!@jPHGy;*`3{D|HYM*= z?aB}o9QhtUn_ft-aeGE>%1Kd&{d;D1lIBA#tOz$HxDYd3f>c6E z%7`N^JA30zE|`P>5vpV;%>~1(hHDu;+-R-*Eq(gsW-qv^%IfXACTR%dqy$b$X}Ylp zadPcDr?0(gD@W#aFBFx4fP$ig3O~E0Kic5s1x2vokW`=F^z?XTRh5{7O-3Gm*caT< zOw}p#+5E|g3CC65&mzTav%y3JD2N4JSId%>u?5vqz~{Iae|oLWlQchk1u>L2EY#O&HT>Wx#U+X6 zzVh>!a0Y<)u899c3!O<WX_zyT51_x8P;1>)c}htp~3*7e?-kI4czQR33MD=F{J?9=km{cXEL>O>VOzTf;F zMHPwGSN7XIHMydss;JMLW{Z#)7n{K1;2IYz`@>~hM_-NrLdRqRAe7+tVC1V#Z&g*5 z6X)*4<{Geg-U1Nd3Ctc&b}|0r;*NqGRKTG(EHqi&*klDCwNDx5tbjXwb9mU!xD48A zvje7qCh04%ZQQWYD~%cz0U4NRU;v=o`Ped;e}7iha6CXDSv55{avsHUtyQIl_^qr2 zcl3{3C_xme31o^N6)+>6->>mYm=QQJT(q%clT-O%N^)vzb7LpLqk@LUGbrG*`0j%g zvhs)&<|Ox(FHjOTP{0oj_FFN|`KsG5Lt#-$v={Hpl@dvKZ~zKgh-?y)57K_O05mEh$H4rU!q2%v`F!4k zg9B-w4=xG{bxFy@QU1-7@^6b6;DPUwuCx};m~M7slW<<73dx>cNkb{TLj?6p13zMv zpAU1jRAjQ`bT>yF56biByjk?WUeRpWTD~M#VmjWAkCxy90Jfy16*tyqBcoI41ZNE6 z)*v>Yj2+I$KN}}Z06?VB-3C)&I%9HDGXK$GWMt$Y_=P=Ps(*rMw%Qo1eYwHq!BGxd zx{mjyNl08vP&1HiYFb(yAo$bax^V)O_;Ub2gO-W0lbp<#AHLgJx;Oqe|0gYKfMr}ue(G8lS);!e#36X))8 zcT~7`6QTkIaksJ5s56|DOMrB%rr{b11rEg&XE)+KH01Q|lge@WM8XH>vP3v&@~5ul zeH_sNmVu$J!#SIXhR)KYz?Q#3%~^;L$Ly6y+ep$Z zUIi&0?o9$39e)&Onm=vKg8?8gxqa*Z-&_DqiZ=5pho$YGLgUbK-g2{eP{U320f-6x zZJxay*<(~SrcBkAgDg9#d4PbH6V2~WNq=2ftTbBV^T}7`+f!82kE!g3dB$gE#D4hX zBqwK8NiANp3kgL)n7nt~;zhE1y*=t?(1R96zNl0~$tLc28Q1oluc>M@P$K@zoS`ow zu8L3Xlq}7XsuCR)U9wsd0B~4}LCz!5#)v;sQ-zMWi-L|?L2G)4tg&}8%Pu5r0tt(V zis;Zlu>98>gySeVC54oDgcITF8bB|Cw8gvOPf@OZUQ0(!BJZncrLUyFM4|c8$E+t% zPyBqbpN&H170{#6fb^{eH_>@_#xYgb2>Ixlohn9+F&X7ZaEGdE71@23K^y*)i%mxfweS|E&@7ybFRQn}V3N{H+5 z-BV0!{o>QiH9n2cg0-bS9QYoCV+zU`*Lu!b5g;S=z8bjoWpMp_q>2X*3(zJzmO@ zDut%P@}%2l5kvhD&zAY&6TEfd4MBwj4F*Z-IE|FoY}Wk;8u&7#d2xbsVYYP~aksKk z|0{42$h}uBbfK=MELPLox)!%Rx<3|8`RQg9iMEL#i(^9hNqP7JB2#k#oI+ceUbNP@ zh%8NrjxQi8xKpiyR=RF$0ZaSRh}fqitFL+ zkQL~<7toRO8^#U60f5o|J_+x?NWt_kWDU&QzAAG#3p?39ErMRyte`4UK{_AU4GWScv`&O&^63V99^Yh+Rk%hVsvSp9-5feW-#+R!p6hm7sJ$ z!4Q0n|8FMqU0LLfHL@~OC6n=c)mO_N@7QY=h93M9=Y9sR1h`?tGa^2~IUKDT&(c8r}@93vQiaR-58FJ9j4-r&U+F1rVP=3{>p|dn=a3R{er2eVK}6V3mu&pW8h~jp8X9RouCbA={(9FIp6lp!aGFR(Ew3WWa7M zM;$z7$IF}9rl0632n=X?!BZS z`SM_cP@Xh(lq56q1NU~sSZH3p^(TA(4soB~o{G8OzD51yo9x-9+e&mWy zO+|})T5_|VpP%QkTLhe*h=_}8Yh9hEt7!{W7cPHHpQ#D09<!Wu*Pjb5WrEPp-Xi z?s_-<6MHYodws%O|MP49x_4REqWtgpz0u{&REmCLU9$04zu1+QKfGf-kytqd zohD+RFh*cLU&s%3kZ0^S++NjeCd!S-#n4ih8C&w3j6T4vH8ofTUg7%>d8D(>Dmc$| zsfB^t(6jY_WaoltaN=mOKCTUz{)*-=PXuiY$ZLYgrQco&Ihvf_3F^~YBen79Rz_5j zLQ=vq*P@)Dlk|H09PV6IE>0paxTjtW(gObC`F-+C%GHI5KBqpwK}y{ZZOhaLAx4ID z{R+kj?%$om@>Qd`x7YQb0C%|@_#*jW^-``q<@dOAn=V7haps_O9325^I4`Lw{~mi< zK!5q}uei)8F7QdaTTRmc#f0Q!{)>R*pR!&C4O`vONxj@0^LiQ@kl+sMl61wV6oZZ7*sSV&(i zU4Eb~Qgs{m{hSY;u{gTiHK!G5&S)R9R=}}}Oy}ERx9kW)%+%mMZ;kb>Y7U1S?%(OU1 zUEeDbYg+52y;s8OC$#xful?HGQ;+qLGI~K)S4_JrUiq39-R-Qpc{eSLhh{*91{b#S z74XuV67DOM5OMNP`*WYBxIX%?15)+VO^yFH)hhkAMl^MTON9f*%nNC5Ct3T86x>qEth&T;oWn*Gss)$!;GAM$ge-1|xV=R6C3>z&%oqLu)wD;|wB z)&-MZajBM(=Rmm~hA~9t_j2Q_JboNx=if+FVqk=bh=8BhXFf&8n_tdrOCvo_&#Sdh zI^&lTk&>64b9zrc!?V1qJ6AC2Xe;64Z4L00wq=-%XoFQfs16eUR&~_MdJVaWNd&YgO;KgHv>`_OCJcdqqJ0BA zBnUzmWk_j52*l;^KQ}}G8XCHx)kt~$-@8z%+R9ZNAfzq7mP;cv0*&9S=SX1Zz97}} zLCf;uj8+~%%A4+ZMz@r7fAz4zg6Q^VJK}kD_TQfS7f$4M+^cpHbsFr~;2XOX1cF3cz> zPDI)7q3wX@N1R7LVL0SgtR+h?+Wn3Nj8re`YM-bhrqg~L=;!0zG%KK$CRVaw=WhZB ztw5|gFL4hhV1n2n$AEY;NhL1_}8Vbjc<`DN}s zhX>Uw53;rg@4W^ca6EKS2ylnfrAtCF&vkWsW>Bx`!z?#3sm zv1mBbMIsoJGF$q>K+nL2LSmqg)g8xY=x%`_I50e`7X+GsRfp^klpv+=r;{#^%ScTx z{ZChPXZw{T3HiZxAlkRZHOuxdY`U1@)>T%M_6&~NPK6P(4p*7edRaoM?&)zGcQwt` zq?E+OE)Yw18+E-M7SB&If+<+vcG3?j^|sh4nyBvWk)+cm<%@qb<-9g|4I}X z0l)rqw{SFqY?KBUR3FyY7h|uq1pO?nrfut!vxKwlO(#BMxXbW7s`#*=Y0 zqd;;Qsa3{?U2b+baEx0-lKA+|;=?n~wkJJtMy0Z|mWfKvcN}w-ZHW47$8M3*L3_ zq@A=3DQH}r13{Ynm3`8O+d8kEZuxgLyW-*OrKEyCSL{+D0cg6pY=f3j*Ba55PmR|@ z4gHn(p(F3zjxDHx$)7tAv0zcbf5YLhI0XC-AJbbSQiiHJ%S`IrR7Yy^ok?R!_}nJd z*qb}w($17K<=LkAAdZm#$zfi?Bs(BIO9An;Uq;8tp$`q3m)|e{6C(-bnQ)yA9$j_J zw;ReEOQLIMy@-ab?VAKRlm|(wK=C*o*`kao4oo-iBL!#|PJ69Nwr$b+mx{`OR&VFl zlqYVy7Qdc2kF9VDBuSI_14wcyWI%<#^P#&dpX7o6!0h!!#u0~1qE1$7PKsUx0_GW` z@mp>Aig_hU=b9?k0sor>w)ULW)|I3agut?Xmwnr}$d^@u39^#w-@PCsCieq}ygAxA z4P>uyY%5>mKFtmd5cP|z4OHLl^i5M!qG8(D6AA-7Zbj7}Dhro|-}QTS_(T4dYi;o_ zf^419nmS-^`sEcSY*_!l0R%y){yhZrI{^Y-g0#{EHMqRzdRVesehbe;De;D2?KFsv zqn&*>wBS&OPXPEKA;MmT8Pqb}4tI-3%GNCNlc#HL6xBK|xA9DIb@Qs9;820@_z5j7 zEnc1F@7dqrUy`56G(|_PABgJBb|z7bi-11WZ{}LU)j*zV$zC4wHD{7jhZ$t>W@wNl z23LDU^fk0t;6IO1pb_Mmf{vod{=cRbXy5&F|HcSN(w^JP#nEM495Ij1&i2~(pATkx z{kHKxA6^DdNg$=}7QaHmHtBoZULsUGWrEeYlz@Q;%3vhon6R4O_g-}A9~!b52NKKr z8#A@%o@9V72oYf2$Xz?5@3~pB@B{y+>zBtzi}X?GYaSaH5-|?SL72dD%m^I^X6DAE zoWZw_1*$M@Wo@nDIL!(Wiv`Amb7Ez54vjeT>RlZ_KGDbw%*MWL6|q?P+;B=P&-=BX z(Y-@u_n+CkaDm1{&FVjA^q=NeJUv)GUWrccbzkE9_Ik0jFFwBK|ESIzo8fyPJewi1 zpP0^+gAtPUivgE(9_fszsHhl=fE=IzHi{Fob;NBU>aV1ufEO-EMg|VhEhA4Nz{L!o zF3nt(Py6{>Mm4^~EMBAW0(spYj$`k+&9E5lYg^bcd+$w`36h*8v$i)7YTZ&!{f$Yc zGM3@#HW*xkr(!s6UjVXYYq%BA`DfTmF%wjyy6+DHA#(vr0}DuWq}-I%QU#Ql$23V zd*3ixAKVE3O50+94;LVQh%Lr@j-^}L2M<&}wKTM<5-klkov>Qlpn=8Awb zwX&1t^BtO(3Gj0;Aft%u)@y8I`Bk`jqaVV3Iat7})%>ORjvA`_MQLI|rw;??(ZYGhw>2+T zwwF#|wCyI4lU^P_vfpO?B&%lYmqIUS^%vB-P;_w?w-={PB9zU>S<06JeUjHkmLx(3 zw#vpL@fWR1S@zY85E#HU|LRqqQ3<_R`Gt(N#tb)-rq-Wa{hnL;?bF`UUeIH*9%=g9 zm-#_)?Os{c;TK91K1WsMYr4I!{uhLW0gD6b_CgqNmNGh7EiH@{MrwOH48Hb`Oz4z% zYZt~4Qv{XNQst2q>$LsJ>c3Wx`Bc;rX7~kmPM))^u1*)C9+|#FfTXTLDMMThLW!I< zuVO1ZPEAgHCP6?_*WHsjG-b6^F#%Jve+}l3l%YKbj-2(Vp66&9cV^P96HjVHP*9*y zVNB!kWlKKhDOB{@9eswEciLGx>Z&WD!`(8{ogG1X>yY#0EIn?5J5pr7hr5MI!}1GB zc@N9c?#*9c=8vH&Fu-d0;$n|`Dcl-nTFTvX3;X?USOSxt6#}ZFv+LqGG_=oFL9E8( zbV2%;t<7&iVa7Sy!#%n9ylE&n*|Lnd=cI4K>(Co<&HD_|v1D4Ys50@f)%M0Je6fBc zg@n&{k{=d$S!i!963Zi6nNZZlY8)NcvusG`;Bhty^cwQosJjmh*huN@v-eP^CMGWNGfJW5wvRI_5r z&&9@1r7*E66F&B;RQ=7{#X56riIIql2DNRrqc6uJvxn?+(!f~4F?-h*Syu}8c55MvJ@`A+BGd)6V_hF zCOKrhKXw393}+|aJRS9O%sgsJ$-0DAR?q@KA?0a$ZX_iE z@h#~q=wU=dVK{$T89O_X-zf&{md@stNNKE{CEX+?e38!dtBt?|)N0+W4H9Mc;&DUkG#jE~XEmw9_2v2G`3l|d|4;ucrF^=b} zE`8`j0hnszcE|NJrs+qS!^gAt9p+ zSyfp%{2cel4jey+OSI?^!Z#^l_*rZ!!uk{$<~b1P=n9I@30&t3>nlR{2gSwqk(q8}JoNJ$xM z!zX!&z5rlpslfq%QdAriH$1I3iY(LL_GE27oUSb8!v_tNlU(WZ;Yy;W1}K6G+WKcb z_}i#EDMDK6&2{JydAXIJ|4PstIwFr&Ewh8_Xc4>my$*7t$`WYB7;!&b3`jG@=j}L-`$|I&rliPsQFbYO2v|{e4_epBDLXwgQMWoCQ$o!kAbat+ z24r(ryQ{M1z~aP`(?<(WVDCaa3AXGhO6; zdlq8%O+5hA$Rh85_wL2{&!zyiqW4~IN~kWM>!JBURbAz`K_-5O0PIr35-OVJ2o?qD z)YSofr4|>~2(--8uat;Znw`!1Aegsc*W0r;nH&oM;Lwkj{z14=2EL2SRzCENd2m!7 za(RjxHsDpG@W@mqFMAEb6N4dIHM^ewHn}g)j&WHw6tJ3mp@Cu|reU?mEo5iTpjEQo zgr|~VdKJ6#B-KHp{vgLLRxqKntw2zYucxlw8TuxSGwk?HTn5F6T z?i|K5s0y2K@W0XhKo#=nS?WDi%3t#$O9n-E^=I!^gjAZFmFavz?DK_-0T^4oSkjDm zw#$1Ls5G2=c;VDEzyluqnJPn~owu@O$Mm}LLXJ34a+1(N@xj#H#E57eLGrd6tv9JF zoh#|JtkS_oe(QJneeU^jL6Z?#TumfDrFxLNq^GEEYhl++-{rcKT)=Yh%h7&79A;|T=Qm!r z+sS7yj=B3xGr^Tp(V*eZ%aNA}0I`UZ5!Ixx9EOOGKp(mPpk>Ru8)UEuyFm{4Bty(PN`Q<~kY z)oHhwr$2YN_5#!Vz@FZ1L;QN;ll|6bA6YP7)C{MhKDt!xRS-x72c`_q6;6X3Yw#&p z=Er@WNb)u$EJaBxFCBqy-EMhZZH6O{$mp6Xjt!w3d?OQBW(#l!#t|W`x}sD*7jm_!y~wwJ z{^U#SGfH?BA_U8W{@bcK(Oa?OBjbu7$dkM#r|o^``4&81yr-?DDcbH6QktqNY~Ex4 z{x4jauw9}ZX83N8nZGmjlwK=BhDi7PFUSd?O#A^m%i?%` zJ_xw=(5{LHN$k3FWbW*@fx-e#EWvcwb87kBSY`nWuJO+#7_bhII$s{xk#*9mim1s3(*!HqE*etweg-l|8 zhAQ~9b~~_o39kNCNqM;lslw<=O!=GsJ5r|;o<^^V(9eU<5M(m z_>(NX!vB$Fn~Ox9n}UhCsYKWKKKD~2NABqMt9npUYb@6z)1Bk6I{~Tqa0FUn%IJK6{q~9#QDp&MLsStLdQY*ZSC~ z_LSvje$3HM2d1R^cE^@3_2*q{jQ%3jF?$>4I~!w#CqK+23|tJ3h0)IyvHIM_-06I6q9w-WV0_w)a4Non$uR z&e*jw{TUe1vHC$oefp5j$}~?$=5AduB|Ck|6P6sFIwD&oYla(XW!Obf0Rn`*v}pi9 z&CO$0WHQglTkerZZBE7S>hK=oFFg!Ay-ec$L&dKR@qrlm5nUeKt6rXzq+fL;_im7V zw#vIdQDRbt|ETy$!c}95?@OK2nEr!2@}~pM+keL_ry`Zi53^pVZ;zjMbVp6DbHYo( zKug(qaiyUkqt;(pkSyLvZxwSkxpezJJ#kh;L&x%}Z-zlBgl)v4zG9RTT{4y>rc^UhXM5h20t2$MjGQY2KSkQm@+CYsDoil4K!9yd8Pmu9f|$|&Hc*G7^F zj@wR`P6{`Xl}m5sgHGP9UP8~NMo-)L+N4pY+0aB=hc?92Q$!j%Gvt?KN! zGzA0eaaxcj`>_5x6=ZMxpSr#>EUqS4a}pvDJVAp68C-(9OM<&waCaSKNWuhncL^5U z-4Z-FgF6IwXK?o9`*!!`iD>6FDEk=vOmf#CTyjk-#8oN!O z-$y!5ollOb&LJdI0&EEoJlqdlb;O(-ll=vQpA$HV?AO-bX&5p%0J} zw+2_&bVA`HOvG+J0F`;}_5k-=Mft7)vZbK)up~C+JhE`cHDbK#?^nM2jgrOX zl$S2PHNohDvd||&Wp>g_G4Z-RE0q&~BUO))$TYM}pkL!eU(Lsf%vP%E4YMJ%U~|I+ zASL0Lv4}Akt_1@5Fm^NkXV?WS@t=K?LvK%KN2lt0Cd8>|=sgDSf<=;G9>#D8P-6^X zVEFt77#Pnx!?Psp6g^6hZmF?@B{(!kA&A`*^8g*VodPrY9}V{x@960BpS}E?PH>Ss z_p@KZbMzzX*tD`@zawY{B}^+nt+v!1-|PBWvNXOboD%aYW$#C^Rq8ck>i6C-sM>Ok z2-auKovShz^)(t4OK%7Pwyd>0J_{MSJb5UQH?NKJQk#NJPVX@EKM>+~;hn9`2>6s! zCYQo7L`11G3<9|dNA`&;Pqh_>%ap9F6F3Hh3j$eeNJ+G8XsTfydcUEl9bwiV zm^w+xLIU<5ywp57=Av(NO8(`0Lbr7gDv@yakzq)RJm+5+###3w=v3U5=tCP8xeAeJi-&a(O#RPqpdgQA&D-{Nrk?_$JKp4T9VZJ zO(y|Q1=+134`o$*-33o%JjdjfB%@uRn-$4Rl#=GFeZIo!bK+} zya?UhATvkvl-4*T^zWU3i98VV)pIi%q&t}mwFR|UqxJ4McedVJ5W;WLq}8MGzY0~p zn#Ra3wbKx6X+C#ESZc0b>h8wl#-}L;+C7itRBt3Gz(8ZB|MeLFoaE7|*C7b&449>*@3rcXa}^iAp5?}h)+tldl=D}+;CVo*JOXKBt` zbBaY-Xm4pO_jb}JnXr{%xCqT~Q(&#BzX)}N()LBjn8>i`-k0LkS!oLFc8`yss|Vb7 zpwC=35)%L5JP3E9e8X%F=^oA9!h-TSHdLsnpxgjbZC&jpaZK4X0DigppzchE#%uXy+xD(q8KY@E0`xxMMW8MiVci>+N7AS-uG zbdyTSB!StT7Aj|sep-Dv=R@)r6?S|FvhM1UcG4}Fm-zPA;N0nv6wW0!3UU=0+ z$XT5y@j^W4KVhx~YMQZ$oHs*uQEv4{jRRygrHoW>`Lrfh6R15W8jCa6 zOYL5`mx2}F8ViJ-9?p$Ld0az&BkJUJRJq^^pVO1Cc3sRc?dr$Xh3hBH5O)J*$LnRy zAwsbMmzg=6eNycBNz>1nj`iFwMfcB=bU$of+4_Q4`3B5DVro&HYa@&)Ja&eCYgbpj zY*?EqYqz(WgYfH`*K8snV)lPMre~^zWD6RUh zGp{dmFaVth2vz;-*Mlx!>Kx_vqdDeLqGKVOHU%xbE2c|5GMm1CdNd_D z9G^pI3kj^dSzsOnY4V9>r5p^~x)B+8T5||L$!<0*{lofW7{1`Zl;!)^Nzpo^G8#BuUU}{glc52>cwxb8kaywRtI80 zl?cNOe!c|l)A*1-ANc&^+i?ZrQ~TOV8fCAKd+bzRj8FA`p+ZIz9Fk$_?e6Y-w&sKLO#r=)#!{o)%v! zn{sg5l?Xixv?F0EER;DT6&Rf3@fmffSmSE1{HCM#Non`Awkf=_`&0~R?L`WKsc<@)^vm(6G&0PBZ@$X3Rj`)nmu+S0Y55GQ z`p=z}V(#qF01ly4Y9=PMq9k>agCPZqSZoQAsCK7j3XPr%b>>bS$!)c!q*F&kZN1P0 zrg|kID$cbnhCp+zc}ZE=yKv?*qxMq;(y3oykjVcJu>6dhJ@uxBos@Kxl$^&2DE||X z3Vp&{b3CTupsMa9^eS0VZ*sz0*7M2qa)p=7NGWnH5JcEh#|B3a745^0eq zAc=LG_52e)6(5`?SpI#;JmlWwC#HqQ`zM(oKcZ9za?{r+ zgr`%1E;h?od(pH#{P3~MDtq66UAXfdqR(EdAZvxdl91Gd3zxm%j*pAy_s<6El*C=e)+Tqwyb5FGJSCSZ- zl&4{!J$CcwGM?r3H^6XpAwuR6o?!7jnS{KlGsjqVXJMseKN&WUjcleF6iAG$MRF9^VXDBE z-aVXvAi*0tMAvn+o`WCBG8E&}0`0i_Y24y#@!B+B->+#bh+-Ev>XFwyoD|@jS-+XP z-*}@S(ba2^S{~KlFm+Q=^q^$wWDb6FjCx#4p1`qJDD*0rrru!WM;AyZ!F5xqul`qT z>v?JjgD7{Cp#d8#4A70Iy3b1ulNb*}f0@Qh!#-SZm zvS%F@QQ`@uO{auMwDGCin`}TlzEh<{J5@{na8u3bB;@-@|4JGQCfC(L(6uBkjsa=2 z)8-+f=kbnTVLdQ(U(@F0c;8<~;2Vwi84yM1-?R6m7wop4W8vaYzIAG5oO=-F=l?L@ zuH5)Ze1X_ffU!RTAwPYeAFw=}{NlCg)6A5D@>WYLV9#^hJh+9*^f4KeZ(w-;giA#g z{yx)v`*Wt!`j_$Y=C&(X?kcO3rMu&bEEforMP~mDZ0+4-Q#&ap>avulj#+W;%sB-l z8|S!~b@O-f4)sTdFC8tA4Jv4rvxqo+`MJV*qQ^x3>UU9L@!vz*hp(Ecq2FtMr@dAh zgk?&tFDD%-uwqk|QCe+~cHHBR99z=bniz?i$csGS6*Tu)j1JXy$o4 zOf?FVy#9?W_U1J{{wn2D(5MPsW)44%Hrf=S&|xNcm?E$IP4%t#@@oqXwy`Q2{)O`m z*blWQgo|USBQ)xNfY5eecK5y=#p+RGQsx6xM!SITc`{LPsspED7D_PccrbFXqJ)^< zk6rg$ZG2IXLEc*PuzGkVRo20VkpyEQr+$kMjN==l{r*PDZ^$iQ__#7Xw^ZZp05E43 zbjSSQxmbwpRR)OI+VCnm^_8v9Z(JBYoIZx!eE-irw%W1s|H;c}3&i#w6jRlu1v`z? z<5dkGXZyteE*GA_*ZY2lrGN55UK9rV7L`(z65#GUpUC52NL97|LC1>AnsBiX9&{?i z+}5zyDfizw8t3+hVrz(r)lVbjN!cGC4;CSM@>WPMkGB>|;rVfD^f0QpT3fe+c^spU zl>97493e4zj3!q5VZLv@+4~GAoU6mY+EuD*FuYv*J^kU^4^(dUi>dvk(B+Z4=<)Pw zfi1{z`4;cig2m)g1;6yHE#;m)7y2Zf_Q1f$9XwV4GK1Tmc=7Sl^~PGsc42mp{yI!w z&7&*B>aT(uN_1aq*P4=ce>iC~hAd9(=|Js=6RW6f*O>K*{0vx|8|C--AeD-Lz#%x) z^Al5-14#5GO-*x>!)oEwzY|XBMmpF;2?s?RZ+$fDYXV2RPXp`(YGEPdA&HOZOai?*&?lzte*dYvn-S#EW5APHNxJ*Wq#COLd476tJpP0<4x`pZ19vyFp;_?T! zm4fY>Z$hz!J z^NauSf%Eao%k0MAIuKN6GrON5J={A$Ec{nGG41p)xy)j3-+L++-kH9>x-2GO)V_O^KkP;PtrF|+aA&-lxxjZWI4A49n#w$ z#_NhXgKbu8FP0O_NMn9JPkr-}I4J2zFA+Jyv;GST z1WZC6p6)N1AS_WWc%>=T>>0wol;rX?HJsAsQu%RFBc*B-gMAb_b154>AsKn9ulMLqD%)m{H8 zr^}WCX}-Oa@XrVu8%TF1jul*(-QrF*oUdlK`%0EB#6BGjV#2ohv*Qb%o)|KAyW;&j zu#};qsmA8?%j)g@65{V=*`k$H@ybb@K8}5({VItVeu?zaxx&7pL>D$XGcAQ);Y3zj zhT*9y^D&J=n%2JJ@$N-N{Cf8=YgzMTtHLwy{>(U7)WU5& zS+IK8+bIj|GW8Ec2JrCI@-_jzcTIlx?hz64-e=Brx)JKV?nOs`gtv8Su1Xr)=ru>Q zj&2QJx$GA@#x)em1jT?x)k{8LdD_KvY3r zxSQaXz)&9!QhTkbIUO_E!1oQDPFgi1tmpP~yKJ`p**E4z`F@5I>D7VM=;!Sb$f!#~ z4p-YlC-OZ*juY9)>dV)f+-pBT2wWpf!*&Q{FA7m|5L}wCaHL+)QmEl@T)r0mJhX?2 z*Xg!0wE3kMtH@QS3P#9(YLxu{q<#!Gm^ysDAsYa>} zL{Dv59Za8r!Djfg_dF!lx$5_nBFzoTWU+s*F;O;O>f!vJP|jn|FytlD7@R$0rK^gR zd|hu6GEI260#O|l?&=2EKY;_>HbR-kRd?VS`LO7Grwre_&1QfmXWB38il5&poHq~p zh>gQk{2uCicU>Y4+9AwZN7Z1dk)1>#ojg41ys|(3>*W&RFilbFZ+p zio>goA3p|qrxgh*_aiD(dq)Cr3TpSdhX3T8J=E`Xbi$@jSS>V~S=w_CI3PjT#ea$p z>*=)Sw9ER?pQwR7JP^SAgO^&@$hTM>*R|LXPp`noQGJw6Aq%;auf#V`@^RtC5?3>d-{kOH~qbGeO(D`?D6i5#lClj&q_G zge2E3y7DEGs-A?uN9{U*|mxa4ZJCEX|xhBTBDyL6k>BTuyeCWz3 zq1&{tD|Ls^2`;T?Kke-WL)>&R^q`japY)5)J0>2j=B|d`)J5#$vssN?&hbMFCM^yc z#V>p`Y|}+ zyU=4n$pAa^3w^t>`ZtNd;>8(Y*m7}proU>W9=4EH&aHSXDiOB1Sai-|DWU+0aH1s8 zFJ*|l`!R9Be|#OiY~Qguz%kojEl?GM3V7XL;rME*kcPzyqdt4OPz}?0>EY@)!LvK( zGO_G3Qe~<2#|iKner9{e2;7SWc(y>uvcMl07{~(?nY-eh5Yuo#8EEI1qaFwb!%SG+ zJJ~)Uy~;xd?i+BZ7}Dd~CozffuR;QS)_K}l0X&JjR2|nMv?<6aA?B)k?wHC|CyRvf z3JBK3N|3Aw4=lqZ%Fm;I!141ge}w1l5^*(t${+!}FS9u4eCqcBH${wM4PJJE=XS)8 zJj`Tf|9ppHd!L(Sd2}SOhsxz78Ok17w5()VTt%Jc|JN1wjAQy@(|Q zE3#-IL*#vK%+%o4nG>Jrp%<+PIcVYqKo|bXw@o|{BTN%*c&74|#o%J|(cz-EX4#^G zha(X4-w#vJ%5N$Y@8zoBqOSvR7woO3Gn_e;{B&2S#Q6A z3nIc@di{;oT%3KEX&#P#KGWw40m3V-`6M#}@)}C@ng{o@FE8P(vwq#OVdzXKRX_~@ zVOCd_oqV?6rQ)^!wV*5NvAns}n{@@O*Qrks2*lZ0Xjm9yo!%ECt-9zsltd)~-^?hK z#>O+%BXmTC`Y(PP5J+i_&Uqwe$QWD9s@k|d5HsU~YP{e2Ldct+xgYpGR)XQj@hW&O z%rrMcAj8r6R#230h-=IyKZ5NU=<{2JFWw1o4{6OzZgv|DzwIDuvTJnjsXS+U>V-iG zLO`k=r#3p~`ZMmTLqkQk=LW60vylphDXe=pZ&3byz2q93;Ke%iub9&jKHz?(`xPP| zpl$wZCj_LFC8LC+ekHy-0s@)+nW$L%g$tC9UWgH~kFB&vsN z1Dybx3-?WeVuqah`6Ko#faJCU*v@cvje)U8Uzh|K$^8A1o`PhQZCA22)RX%9`gCSQ z{*DMBcHxz_@S*m<9ilDEDkb0Qi-^)O(yM3xPnkZmDLGC>$K|v)+Bd{1%I2w1yoTb; zGx4l1Bb^BY_I04tx&oAKrX3Ec z5Eyqba8657H9q+6fvbpC^lz_Z>uKpb56PP;PqN6qdiE5D4+K*COM~%E$nWOdN?5oZkeBfQuVRLPvgI7M0lWH~cOPe^unp z*vh1(QhrBC&rJQWw>>T14h*0oBLn(#Fz+jj&Z4=Ihx^SM-U#7o<$>RDvL@GB+`meq zNZkOanq<3z!mkXorM0y$nDdXoPTkSfth_014f~c9(VT5yvsKC^3j>(3J5ns7t^Uxi zR32NVI9~ka(tR_^>o|U(Znh!cz_QCq!r)yUJq&W!scIV!p|`1ApK`E(hs{;@Gr+~G zLC;+>nNA6v#37=CmaP&>Kzg<0f33z=j}F*ayL^ znn@p!K^yp*+eIP+XzK`wHm*^y5YTq-SY%OUss1?`Kpr52cRmM!q6~2nds;I8oGT6I zQsoJ(__qMcmdcFKDEQx#StIFVGeMwdqN^|dqZXAui{sp#YWomz0zD3F)<=S2FZUFf zmx_;BCr@GEwyz?jK0=xKJJ!C)INus&QIt)M1wcx#`yCHDfNaY^m%$i_i7r#>OMF;8 z(o=7ITv);X0+bYlJ6g@ihX@j@j_x~KJsOnFOU9SHqc>fRsza|;Ybet3GB4d~)r1<&qp2965;pc7l%vZM$aeFl>t7$sH6ixavswcAZkjvl+#p$Z$e(F(WO zowjbQFbq(ZkCAp^w8r9@O3b+cSE#uz59H?R>gJ^_zt45Mu-EY`Kc*02E{o6cUjQlz zkMdCu6#$xm1FkTSEHyOa3pC0J{rWGr6*)RP5Xx;+t=wRlkB8FrO*na6e?GST8vUF_ z!qDR}`d!KQ2`G+%vH0B}z4x1{LsgBwds&50C{k*1h{Kh~=rm{L;m)7yf_KttSGp(F zIlmnE8y*%kE{5XKg>+52Bzzq5&W`g|_sRDnzmcYy5E5-4t~>D^*av?zsPxwuJ}U2} zShyTV^8b42HRG&o^#oM(_Cp3H-MQE{!u1wVwRgdJdiS!mWejBt+c*sfiLUtKM}46h z4Mz@+6L_n7-E+Sff(V0lZz~7BtB1qlKm(eVgpX=sic2GT-O7Fn&Udj`6!9|JU(bv- zALF$=#cv8+s`&;wU_I{Q`g%3WCP3wCEq1oH3Y1=!EqDWIPiY>J_+K5U=MCH@4ciC~ zx?C(p?NzcokmM-_8%RhNd=yRujH!Ycb4ESdijGr^WlWCVLOUCp9#OxUUT^l5dYe z$GN{dpGg6TUhHu^>i;*SpOqQ!BKPK?6E+7vG0o+PG zTfa++A(I@^h<%kE6Y;!Ql1Ma~;;%}Cz8648RXY0ygZ14^W{LVM3H@ErMiGYO!DvA^ z-8^WiMEFhS3G;o?01E-L{kUcZ1PqqI*?Elh%l*+ZA5CrWs_9DQ?m+BjZL-uQKg@Z5 ze86o0sWOehi=@uBKM6&3VOsx2!mrnB{asV5y{B*ic3IAc=Ees$`+(&sc13o}Z|)`g z@^|$M5U6KD*eRXK@(r3Z>BDRv?*{`qwzH3`?x=pBuH>{W^io_0dl_g&#VlagXs0L) zj6}-0kMAN)=r~*?>3K}Z#C%_zJ1-KSxm|p>_(BRbD&&}>%272`6aD(2h2|zc`@3Wd z7c9KG4=}7Wgu4;3n_M(%exFl405tw-OZl(S>bRbl<2r(O2iKn~3%Z z7U>|grkTmc4{HuwKaA%VhrEM$e3tkiHu&j2_k--Ijb9E+d6sKQc`J$dSe99r(&VWK zov`^6yuAzVE5}L+H4jN{hfnEzm*oP>=YY%izFX}>4eHdp)*IJ`)j1d$Q;vEL{nW{9 z9ct#1-(y)aLhYuc5dE|Bv2!zcgF_v5T)FL@hwTXOLCc)4wv4~V#!e;*RU;xIa1*2< zkhl;T_|Gpr+1by98KgDXCeXVzDdJz_6|1yKN~EQN&8L+&RN}pV-PMtkXcN*BH$fCL zy^cEZN3?Kl8^^kH^M&nVQWD-GC=$8>Thz3J0lNcS%}~+sy5n>_(6uZhn;FpOGdlT9 zwQUn3tNwlo#Em$KDTQ;;kai12alCBdrKNW0bhl=s3QDsmBG}-~{&^B!(TB%Y1VG2U zu%J!jsnO1u0hdk(W(D=$Ty-J}$Xiil)xO|mgSm$fD}IHq#dd03(gkzcwGStS{e<79 z($8)a+ML89_Jv-=3G5X{O-!&IvAb;MILSsxMIP4;F}r`RTd%+bv@9F3| zKR3+|9=MwoMAw!$-(9%Dy)~B5@uDVqTR?WZPTW#)JIxj`dq4trdOT_^?EUqGNP$Mi z)0U3aS!acNChj^6-ynj`0k&ulcL|`DG>d82eriA`$%{9`S?sttpl1M5SBiR@W&cQb~xLkK7Wo;3^}^1b!@iA+(P{HoCVSS;c$M$HWuoB zP*;DSaI~G@iw(}y{!@EVHTm>bAmAIuT5kKYqiggwg|$3Q9^EZFgj5Vw+}PxwRi3iGk#b3RmguQ1M<)=c!D`wa?RR@)}5N&*WB&iHUi< za%#n8h%+shnL@8NXmxGvIFh$r6pdF1MvHV1?$C(+KHfpV%vJ>Z7<0;)z?fPjj@_|< zw#>%yk}rHGCNEi?4If2$j{~BvI@A;=fG@XceoN8#kh$mnc2k0xsQ#0g;^SwgagJCF z@8MTXOMYKk->MSGsw}rovyQS@z9IHSN=F>2fV+NB*GD(tSALV0Mjo4*a(8~~^M3m? z{2O?c%gLav-J&BW=_Tfo!P1V?_Gwya8%7GuvBc}#wvQ6VQOPmF8=IndN~+hM{`S$T zaLWj2ans6)nB{TE2^wl}Wrugd?e_4Qxo-l83OS76g7i}>`_)6>n7pgsxmE5*p>gF{ zhnuh0!K?h224Mr{Co|g8bY8*Qt_-CiNBb#gx(^5MOMBnvL*aLD6Ca{?MMRnMe{ z*P5M}ym$yIP>hH!_RVk!x*OQ+-`Jel-6q(1+F96*|E>!k+nQ6p3-c>wxwf(rvm>(0 z7j{5xcgm=w*a;!cXzkl+ta+Yj?6x!Qz7-w%`#zA9_TvEH93fj#k;QKhGXXov14vUdL#o(W*cKMz|Dlt$tg%{ zZef;e!0@w8?DfuzTfHLQEXi0CP;?;)cU_r!)d7a=%Ej*D}y$T7H*ewXd;U zD_Ri^jiJQrcZ`hc&VChJ^;tom1S9aw1R+0_Nuy`SzISLowfJ(zNNJX~n> zu!+?CBpzAX?xteIvnI*>mR`Uenh``@gwtKD8WQ zcfgd)O%H&AEmZc`BQ!jb_FWXPC?FYQVsG*9HQ9b~he>)M6S(JT?&UMuZKCtzgPIydTZ~*@ZKD;ZsMdDc zXW-hU+zW$~+@WKni(sZ<_n~3n2NmUEN5l~R9$&eAfj$Ch{0W|wx3+d;Qvy0Ct(e6^ zlPiC04HZnu4Q*F-^@MEu^2Pf2!4C`L(*u*BSBa4=-7_6cClNt?>xcJfxegS}0ys9T zLJzbuvNu`wrju@2>bEcGxIKEHS^R_vO$Hg-Z`Rf|9df(A%B$(r)qDBg490$0W11{n zz&F}5Np(LP%}t_z&A)atn@X+fQR>Fru*~RHVb+myyu}>L!~^@d)FK0Lf_DI}$|_Iv1V2e+tA?|0 zKWE10Y#u>Lo7@@5$yf8w6tHMBXmz|}t%Q}$Lt93IdKk4ak8un~R(-4KX((lLE@-PXvn<=YoObP0B z?LRXJU|hajrlAY{sD_<~5?t8xqhq!YC0*RfGs5iAnK`Cn$uV|qV&o7RsoI6XV6+4C zl;^$07G{&-3cmdI=e#yNc8b4{VPOo4m2IS|?54KB;4#@+61?(0@lJ5PSqloX5RT05 zZ~3H>SLczO#MN(oUX5r#f0AME4Yp-UJ#h(4o;?3xG?dCA2>p{3JN_xXiSM~Ig!!(- zpq1>7!!|tKP2wHa)%A5euYXP&nd)f{tv=MASyMfpBBmy2gVdsA`Z>>zrUdDjl~E{O zcXLUmb$4-;$x!&xv2N0kjA7s(Zn5|~V)a=Rrj5yHHcj0&hvUNBn8!+*bMF+SbO^N2 zus=yykEqgS<(pVxZhl^VE)_nS+Bj^gxS(89QE?~0DBB!cQ0P5kga}xj$Cewh_qcq9 zKEGI?Z50|{5PLJ8+x3IuVH7vdG*vOlwmck~SCY#p$RZnfR8t(aDPnbLP=jZtg?oMh za9EzB-Z=NaXA7$+L2sCP!+n^fMQCGA;D3UF6y!X literal 0 HcmV?d00001 diff --git a/doc/source/_static/schemas/02_io_readwrite.svg b/doc/source/_static/schemas/02_io_readwrite.svg new file mode 100644 index 00000000..a99a6d73 --- /dev/null +++ b/doc/source/_static/schemas/02_io_readwrite.svg @@ -0,0 +1,1401 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + read_* + to_* + + + + + + + + + + + + + + + + + + + + + + + CSV + + + + + + + + + + + + + XLS + + + + + + + + + + + + + + + + + + PARQUET + + + + + + + + HTML + + <> + + + + + HDF5 + + + + + + + + JSON + + {} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GBQ + + + + + + + SQL + + + + + + ... + + + + + + + + + + CSV + + + + + + + + + + + + + XLS + + + + + + + + + + + + + + + + + + PARQUET + + + + + + + + HTML + + <> + + + + + HDF5 + + + + + + + + JSON + + {} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + GBQ + + + + + + + SQL + + + + + + ... + + + + + + + diff --git a/doc/source/_static/schemas/03_subset_columns.svg b/doc/source/_static/schemas/03_subset_columns.svg new file mode 100644 index 00000000..5495d3f6 --- /dev/null +++ b/doc/source/_static/schemas/03_subset_columns.svg @@ -0,0 +1,327 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/03_subset_columns_rows.svg b/doc/source/_static/schemas/03_subset_columns_rows.svg new file mode 100644 index 00000000..5ea9d609 --- /dev/null +++ b/doc/source/_static/schemas/03_subset_columns_rows.svg @@ -0,0 +1,272 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/03_subset_rows.svg b/doc/source/_static/schemas/03_subset_rows.svg new file mode 100644 index 00000000..41fe07d7 --- /dev/null +++ b/doc/source/_static/schemas/03_subset_rows.svg @@ -0,0 +1,316 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/04_plot_overview.svg b/doc/source/_static/schemas/04_plot_overview.svg new file mode 100644 index 00000000..44ae5b6a --- /dev/null +++ b/doc/source/_static/schemas/04_plot_overview.svg @@ -0,0 +1,6443 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + .plot.* + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ... + + + diff --git a/doc/source/_static/schemas/05_newcolumn_1.svg b/doc/source/_static/schemas/05_newcolumn_1.svg new file mode 100644 index 00000000..c158aa93 --- /dev/null +++ b/doc/source/_static/schemas/05_newcolumn_1.svg @@ -0,0 +1,347 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/05_newcolumn_2.svg b/doc/source/_static/schemas/05_newcolumn_2.svg new file mode 100644 index 00000000..8bd5ad9a --- /dev/null +++ b/doc/source/_static/schemas/05_newcolumn_2.svg @@ -0,0 +1,347 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/05_newcolumn_3.svg b/doc/source/_static/schemas/05_newcolumn_3.svg new file mode 100644 index 00000000..45272d8c --- /dev/null +++ b/doc/source/_static/schemas/05_newcolumn_3.svg @@ -0,0 +1,352 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_aggregate.svg b/doc/source/_static/schemas/06_aggregate.svg new file mode 100644 index 00000000..14428fed --- /dev/null +++ b/doc/source/_static/schemas/06_aggregate.svg @@ -0,0 +1,211 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_groupby.svg b/doc/source/_static/schemas/06_groupby.svg new file mode 100644 index 00000000..ca4d32be --- /dev/null +++ b/doc/source/_static/schemas/06_groupby.svg @@ -0,0 +1,307 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_groupby_agg_detail.svg b/doc/source/_static/schemas/06_groupby_agg_detail.svg new file mode 100644 index 00000000..23a78d3e --- /dev/null +++ b/doc/source/_static/schemas/06_groupby_agg_detail.svg @@ -0,0 +1,619 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_groupby_select_detail.svg b/doc/source/_static/schemas/06_groupby_select_detail.svg new file mode 100644 index 00000000..589c3add --- /dev/null +++ b/doc/source/_static/schemas/06_groupby_select_detail.svg @@ -0,0 +1,697 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_reduction.svg b/doc/source/_static/schemas/06_reduction.svg new file mode 100644 index 00000000..6ee808b9 --- /dev/null +++ b/doc/source/_static/schemas/06_reduction.svg @@ -0,0 +1,222 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/06_valuecounts.svg b/doc/source/_static/schemas/06_valuecounts.svg new file mode 100644 index 00000000..6d7439b4 --- /dev/null +++ b/doc/source/_static/schemas/06_valuecounts.svg @@ -0,0 +1,269 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + 3 + + 2 + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/07_melt.svg b/doc/source/_static/schemas/07_melt.svg new file mode 100644 index 00000000..c4551b48 --- /dev/null +++ b/doc/source/_static/schemas/07_melt.svg @@ -0,0 +1,315 @@ + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/07_pivot.svg b/doc/source/_static/schemas/07_pivot.svg new file mode 100644 index 00000000..14b61c5f --- /dev/null +++ b/doc/source/_static/schemas/07_pivot.svg @@ -0,0 +1,338 @@ + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/07_pivot_table.svg b/doc/source/_static/schemas/07_pivot_table.svg new file mode 100644 index 00000000..81ddb8b7 --- /dev/null +++ b/doc/source/_static/schemas/07_pivot_table.svg @@ -0,0 +1,455 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/08_concat_column.svg b/doc/source/_static/schemas/08_concat_column.svg new file mode 100644 index 00000000..8c3e92a3 --- /dev/null +++ b/doc/source/_static/schemas/08_concat_column.svg @@ -0,0 +1,465 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/08_concat_row.svg b/doc/source/_static/schemas/08_concat_row.svg new file mode 100644 index 00000000..116afc8f --- /dev/null +++ b/doc/source/_static/schemas/08_concat_row.svg @@ -0,0 +1,392 @@ + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/schemas/08_merge_left.svg b/doc/source/_static/schemas/08_merge_left.svg new file mode 100644 index 00000000..d06fcf23 --- /dev/null +++ b/doc/source/_static/schemas/08_merge_left.svg @@ -0,0 +1,608 @@ + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + key + + + + + + + + + + + + + + + + + + + + + + key + + + + + + + + + + + + + + + + + + + + + + + + + + + key + + + + + key + + + + + + + + + + + + + + + + + + + key + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/source/_static/stub b/doc/source/_static/stub new file mode 100644 index 00000000..e69de29b diff --git a/doc/source/_static/style-excel.png b/doc/source/_static/style-excel.png new file mode 100644 index 0000000000000000000000000000000000000000..f946949e8bcf9c88251a1a592543110eda2323b9 GIT binary patch literal 58167 zcmZ^~Wmucd)+h|cHBj7wwzz9?io3fz6nFQc#VPLYQlQ1%3GNgpxH|+rJbUl=y!*P& z_vOdjlY5rTT9a8elSm~6Ni<|aWGE;oG-)X@6(}g^%J=>?BK&*H)AmIa3JST}T2xd? zT2z!w$=Sie+SVKjN-8or1wli=N(1 z#8?^%ioDoiB^=^p3R4jVEqvG$H(_{ab7F>ws8N0+B)&FFkIB~!h;zU`$Zz9m-S0l< z8X@pG@Dm1Aj|vpk_3ZZ`;6(IAgcQ6zhVXDGf|gOgNhxyH^NSx;LFncAqaz^w$HAW- z+v>am5aA?b<5-umYls36_iRmuBltVJPO_ufJqJC$` zt-<$$aduijMc<3=IKGoS?hpDsh(|isdU7hAl!^IEF6go=Ig3iiGmP-#j%U6l8QUni zjYekkgzngLTCzXN7AZ!=g3f-v>Lzk=RC|M+=#DBUE+ozIIvNG^@I(UY}v+$xm3 zudWQMz|01uo2Vwxq_9x%7f>ZGsjL^UHP*k<8+-_xtFX+FyjCAL!mymbnGnbct~e6( z%`NqR3|waCd5heb-k-n~>-v?pa)25LcA{$WV8shNIW(1oRw~mOE8m483poL&NdJ*a z6U-Hzd^Li?l=?szk1#4sasUNW#EoKvLL5YN@xh-AUo4O_5N%t84I>z}4%!ylD-Y@f z{z;gC2Q@ZW$B{S-wtADH8L=ifGY@YSF0Ka(AHKF5_5rGd9I;mfI|6w`lrH{r5g`4m zM!didJVZE12^Jg8iCmB>m`ivi??V|PT}Z63+tlO~!2yLU$x#UKYseJ-4eldqp%I&0 zNU{;*65^lWY$FFAd;>I>Eo>g5iBON8V#iOusP;k3z1&-E7f{d0IbzV*NaB*^1*l3I zG_OCB!lPzWm|mVFvS&=QKmC$Q+UZ- zrR+3gi^#~1ibs9=EnT5^6g}rxE^S@W7}=QXT7RQ(=zVxRQ^>|cz(~MN5Gz+RrkpOz zCaQ+5o_+MyT(wELvP`qAkAp!zbFk>s6tdNjwLl}1Ho@|4eSUp<{ciof8NvZgD{i*5 zVgcTa;>`K95VNw*b|aPhNbIK=*~e755yoVfpFfg8sWypi^!M61m~A{Jf%&Pys#Eq9T`tF$T3Eb~!n(gZB9FaB7hUu-J|9P1uj-vEy88T%L- z3(sVD*H5*92TYUEGJW zd$g5t{ulfijJvI|cQ>X#o_)N&DY#{P_G`Vc4-0&F+A?%4s2Vw9vA{~*&d|Z-t$+8~ z2f-`VOXz<38uPL7skhUh)5kBjYEEy&aQO(~InCeaCFjxRsr2FPV<6fEfCk`+Y4x$0 zypBGCu#u}w@IbWA#nJczjFF9@EM-|{W~FAOf1s9Tqouqg+^qaCc#(;TjM*OXM@rAk z%FJzNhptKeNdrwXU2<1^O}Dy(*X8z>6WJzlAxoFOCFS>>8pmf|hE~Bx6i}S{NJg~r zpjy9No&M6(axG76mdS)xmAbt=!##0qIL zH?mdIS<=X|-(&`GmRIa;4L9}fW-(@MmT!%~j+~B8myn|{Mk07_UOWDJ zq`%k=8T%O{b^X`Awg0v{ezG%O`RkJG%)O89w|}WGnj@COa{lVR zavI(x88G?OX&Js83&YpB>vH9QjgR*e+jn_n-3yG6L@E=Iwv5wMSvpm4F=c;Z|9a-* zLwq-fD~)H4hjx0qwxTjdGr{cN<#5`0{0StBeolRccS=fgOKZoM=VWvr#B6?P?g7dM zWzu2OyQ|5O>=2&_?Rb`5*3E1M_vHrcygY113?-zLzl#Na#NM#_V{vCV(mHoq1lz^H$4F)z$iq>0BTYx93Q4~C zo)_JZOsHy<5-Qr{)L4~p?KcTCYBOS0Usl7|VcPjwwokbJS^2~LX7V6Au07wdyKgp$ zlS7>|DInxR{?uz0o@e?@alfs!|94_2wU>FVh1juWKk7c&Msuy!;K=!?)p7jn@A5I% z;^b;w>$LamwuNkym&bMOUKiKB)=lGTef#`^&DzR(Q^C1!m+0HTN=;Mky7798m&fi} zrw{o(a5b(~=pyji=*D=ak2`LLR99ry zKQ4SpwANqocVRe5&0!Su|ugITkK*#DC!r$Ekq_zfvet(!sa>+I#^ zS>#3J7I!Z4@CbSajegvvF-`?2nFm#83`4P^9rk6%8#Meh6EWHCUM zNrX+IL-CV9(};dGW#1NwU#${Hc89G?Pv?2fpW|Vlh3J^|Tbl?@jJ2zNfLcWF*4!E! zyXi?!uT__706-r|rQh#0P~1SrM?Pd3HC48ZZW`X=4XD7zq{}qSKtX+gwN}$|)smOv zHFdCKGB$HCF=z6$b9@hMp`iFZdEdKs=B~zMo_4nOF1(%s6#s(YeeeJCnwf&^Um&hF z0u);EN@SuA&gNvCOdL!s6oSZPWMur#W){3EViNyBfBz;xVdd)T$ji*^;o-sL!OrC1 zY{|^}<;xdl7B*%!HpX`dMi(!8S7T2`dl$-o6Zs!HV&*QU&eo2u)(-Y$|Ijryad2}L zprH7N(f|GYd!FW=*8j`N-sL}Hy$i_v&j>Rs6ASbI7n-ZJ#s3f5KO_G}`5<>dzbgA39@l<^Z$#@|1NqsFU)^t!mH?P zZT_yLe^epJ%Fq1&y!#J4Kl49&`Y*lxdr|)N_I*_Zk@=bb?+ObdKb0cwKtTyZNsEc7 zc|xCb0DRR4l6m}r(qzydg0D(NV53?$WvVXhdNit_k8|BQpRp@{KpR(XstA2-dPv%v>K%_mWb87$2#DDTn2gNj{0BYaUwjN5t!zY28PyGdwV5+|~(%xEzbO&*4{Y4Wr zNT3hwu2tdk^dirgXkly$BZGWAbdyfO-dR`MNZtfdSPrP^ieZU4F+#G01}p;KU`zVq z4_RDyCM67K*l)TU2~@mdvF5QaIY{W($wYZcg}@4`jUq zdafD-wGpUl#B)W({1dg0Od>#&r;}@|>(3kuBf?V$_o%xa!XuuFPMbU;0uzv5c?MzI ztA;HdW&8u<-ReZoBd!X4Kh|d`3+2rZQf)QZ0x@wjDt35!JcDZ(A{7UuG%=$ZCmTiYkuSC)akqI?zh2F?37iQAO*mf9 zLNJ5ITC`i*7WC7Y&Me}$5Xd1yzIF38{=!3pgv5Z%G`XxhxK7Dk>Xt1Q3&0_zL{>JG z?KnBWkuuK2ZHP4)4esw^Y5;ol%8O;hOSGN;)aFWAM2}TKOx zAQ_$(#MG~D>9@^TXTn8?Ai>MIq55C#?ou51Ly_|+&A*!uE?U$xOW+0o+_oVWM=iOE!5Rb`kJzCrF zgaF{JEpUL_jHCuC#m~jVlh3oxWx%kEX9m7w4y6}<-Cg7rKcb}e-piNEbD zcJ1+q`uP2vu$G%eZ>bWi2)3^teSzyWQAbdw-Q;*39;yY%L%E z2!FG38&X}L$o03|mz*r*Ssb6rId`oFkFEv6t9*@C=MXiwoM5tL*!@L@8kqx(6OVJ` z(-8_5LXhbDL;#+j5IDMU#`-W*fVcJ#XT%?LY+Z_dKmPj?P@#WF zF%mC?0oBZ6N)f$@Z2NA;ifje!MFDMQ>vE9v4MZ?t*5X|b3IS0`z1Vzxf8d)Gz&Sb| zmm-jIz-lRU&)Nm@Ih!8~*$ z2^+dX-c;0byp78Vg@Dy1xe=(_v4M}KN}mO3?JsPK1_8}gF~DofaIv)6?OLkuBjyBQ zvwUc^_G2Myok|2!nFz-y3%$FAENBp7M2L27qxj-P;Y8b@xt=l;Xi8z_>&^|P=cA8M z;6~1z>VD7tpPEqF2cI$6_gy>bd*$Jw44Ge!#<288HJ!$Cq-z&4%C|XtMXze#LY4xO zLDCJM#^{36Fq7nY2RDgz{oPC}o;J4Dg+FXC`Qq-y#%h`0L?hmJb-biI?4??2Oh<WLTF7TqiRm(blB@ekir{)z%AVdD9bJZJ|u z1fG(rNsEe8`FFe_j&?}&*j+cpx%m#Ow0|n z&~(o-5vaed2cm@*UpVh#E(y09X%l=j^ls1Hv2+^G{Eh{Wy4~HFj;c;*R_g<6|5z#Q z_?QIPEVgTdrq*^uBCtnC`k7ma&o2O#_F}w?x9Mtwb@H^#MdNo!?MP>iwPTQ*F!hhI zLoWeFAn4--F=c6N`MP{x$9rsfYgl#XvGQhlY@O@F2|!TvJ4V4c>AjDTru>;tNCwd+ zEke-QFPL&5Vi;EPa@sf4Qo8+44Nd|8mQax*!LYYHSW~n3)P!@^A6!M|kGtU0Jh)f= zc7g4WiEOK5wqvKk7+wEDzm}}3)qMYKYJrJelYbD-pAQz5qGpe~ zTAwhR-6quVSxsQ0yUduLUrOFk(s_8q3o763FF{w+6}zHZ&bv{v&YFlnj)o^lC4ZS$4LOsoHU4XzIK zN@7bwFBox9%Lqofpx!7CgW*KjoM(&^<<$6to$5=o^sefU`KbXEBa>z(XbRDINPi(Q zlK$JYQ(o}Wu_2jVzLBAg*+6sW3^>+_A8A)ab|0-Bqtwx^C6r#btpfH&BL5U{!|zSx z#x6^4&d$P62^|GJP`HVKCnsD2KtjZtBnlepy|n??L=U%TYZq;zr!cdxVb4^g$30Ay zvu^)HY-Ke`q}(DlK75TYhl+^5C~wFeb?MTp0A^xL>%P4L$L7LZYGMqSB6wkkE`<6n zS^53H#`l&YfKO}o7d^zyTT#mKM-aL}_rBWonvmL;Q}xdzHKO6XcCqYK{H4vR8KEVt zzvpMOw&lXfw|E9_Irywoaz`_=FCXleuIB7z61S$da37BLORkFHjvC+H8AkTDf;Dcz zU}$xDuTmPi=vT+6W9wf#)T?(TaS19FzTZA)YmKfnPhIm&umJsC^@yym*%E=x41JzQ zKP3In5Xf6cwOB;4EXUz#{3Ova$g|?lvS(b^DR$Xkq?FDpFQ>rX_%f|r=0_YY)oze4 zXvKa0f!GSKxCYCYOoC&NMl+1Fo3fhG+{?S(_(%Llmr{_=`S_T7>4z+H$M3lQ4l6=b zLhPRcfmU0avKI%G%mk5(J>bbrKKk-JC7~&=DKM{)_bM4Kx6+V8Io`0PdDY{9lJb7u zGM)!ZEZ^AhcFGjyFPQb$dE+tZkQvL|(5)%7ThHB<7cP#CY@}v3p}Zd3Zs;V#NXgqr zC$@C=KB4ZeHTi~e^dA%d3;6Zx%l1i=-0JC$ifq$O;cIaEn%ajjQxA z6<8-q{gmg9kjOwUf;{*zM2}AQ>+Q9^3N^AGxSt^fa2)?eqvwSqOnn!;qc&x@#=5TE zhj=9`fZ;@3o7N@B40HD*pTgpygepR4Bn>)UI_&$zzk+s}1M6r}mMLc_cW_PZh)X9F zttXmQO(zQi-kuZYNmIB2$k|)FKwLy|7EG6Vr5>I4SYFEB>(65q?5N2U#-;S0MWOR5sOwt;vjAV zyZ+2w*$YSl4ApxKe6<5F3K*jnRlyoUI&vB<%qwM$MRy|>nBe)DQen5!G&>6_<9R9e zhTv_y)XHob+6RdGYm*L21H9LwYfBH`e>L9=-w@+Khfagjvx6~&Xq4e-;mU;)TW$2a z6H=3!PLH5^`oA)G6VLRvrw*`?bW)PL_t8fltq*%?A zHIJB&BR~=bc=WkttPb8;vnRXir_D3Ux-j9f+m35`C~q&T4y<0#a?YNvCcLPpH}A0< z_UXfprQkyZUV%uDLY%ea=ssJ0ow9*qL#kzaQ8~(1>>+c{Bq%p{h~J}cV{9DC;e~Yb z(C`^}_BNu*l50x=c|yEF437|!93~d zF7=Xs*9VpLs4DP$6snnzgn9O`V1jRVvnB?byfU_|zoF)Pi%c|J3~%RF!h;r?p!qQ3 zcxk8)4VEl(j~Q3o_?x?H36TO2TT1aeXOSV4@{}$^cG)h!mh??2#Law>pEm2nA6$Gp zy~@0`j=uY=6^e?(T4+xelf$O=lXzQwGR4Uvn*C%kOCdhpVmK;NLxNe+iCm#4|7FOD zzf+x%E?Gj(A01bp8-2ajmx!>f>jS$Z5(BQEb#yr;!3P=N==(5I?JlQisR+ENP=u6B zV}{pWTAFm8>`_2z+$TcN<|Qc&^E8;L3;lJg)5j=r;gVux*6@m5sI0lH)wG4+F8o=o z38QaoFW?cndseY;(xVGDZngn63{(pWS~jI@vg`uIxyk4k0<%7V#_Qg8G;cB2u#W)p z4!z*G4qENB=$EbY^G=dI1rmY@QXFS^p~={_WmF~=E)RGxz$U} zH9v6>xRLhPd*q7&OqVzNWp}|@yq?r`{oPP#>bjYthMbOxwJ7|2*4ssDXJ+F6@wCmX z|JpF$2cZa%0@$%+{joBM@Ud{?3X7r#A`UB3p_VHM0Fw~Bji#w%h`BAIp2d2VX3mTA zk8IM@+9^MPMwJ#q%bmiZ6LTTcx5L(!FxvKEVzP03GlKx2kJHNl!XbB zjugsI_F`B&^YOf|qbSv~g@7($V(qo|+CK7L8St*SE-i>Q!IG=5L;vp{3e2!5e523! zYyrGG?Vgvn^?p%A64HbW0>B_G2Z-sjQC2A(`zz$L5UB9DM|xnt?U{;wM9SL{SybFV zWo?z@8x<-iI9Pv)rj9Gj(;Eu zZb#O?O6Kn#%NaltPASQJB;Z-Ia4;&;29PI?EL7^|F~gyD(m$I9LU~D~5m3bGR@jwQ z?SFE+T#KOMe@4j*u%tJvy7JanxD5K4dqn*v-U>9i40tEZc5Ds9D6}~4&7%u3N#1{? z=;`@KSuft=oEn&xYW!fMC`4@<#*qkKBJvK=BPSoP2~#PEf>S963AajTM`xYQpWh7Z zMBhXiNj)HZg?-A`gdd2T@+Gx1->%a%y9p zTTzR<=iHcCP{50{52#Yz*C)+-6csEPS1LWEve)_x1)98lcTEmq?yq06jUBFAIwKt8 zceo)vPr>Ftno;qmmD1I!F#TfYGO+w{wmN@CjNyw202R!QR6MnV^MG+p?kUN6!2JoU z(!%$s@6;2UZImCb-W~p6Z@r;N+{O1WKu@ByeCSKA`KPiW zdC))v{6FDCbjf1=(3g#WJc<3#57+AJydeU^f584V=}vusH;H%FGruQ7fR?`5CBS`OeT(;^miw^VW*Jw}F15gvVlM0#CYV?P zcKCzTGU1rfm|iD)E1&E{W3*flYh!8?c;#@p_H_3Y;_mJ4%eKiT&%X0>kQ_Ca5NdZi zx2Of3sZex+QmO}?a@vwVJ4JQXU_I8fb|ZE}6Ou0EuS40Y&|@H!YKlJ?mv1=XQBYpG z3_AA0Zd!C064HgF0}q3??~H15cTI+OTdGJl6ej)cm>njYW*flw?^VVweZbx0`>r;A zeh&Tfo=-kn3ZSY(v;mJE8nJ^>n3HhBF~d_6Ngb@FZU#Pr)3eq?1(rjp?vaD|eS!l8 zio52H_-Q<{;qSSgK-~7PhnQqV4%fdO1LqCTYOG-wTiV6fhI=vOrR>oI4pqJdg|laj zugjy8&SEn#*Msqw1cTe$8iLc)*7vwwT|1UuC*DTyMMYqr*7M*fH81b!`u2U(GXvgn zkeBH&1l9DCh(7n=pwZ_?E)7Jx^V3^PO5x`6i;%-FgE2Ag?f7tz|;UK z4vq1NK5~8jJ}e=@+<*A^gs(tpl_J?Dx$8#gOL=6iHi%XOuZPx5_)j^Q42Xv6!bWB5 ziLZlrxenb&!y0Ry7wxOCUw>z#;F14NsrcW(5yj9gZfo{Zp78zg5lUGXV;meZdJ`XF zaKu5Q1;$Gztl+)2e^LBbftebOs^M}PErCNYY4xzt4SjCg{pnHLu(D>5iMdHJcJ(wMk(9<?-JI`zLy3C^0XBav60$IIv?Yd4 z0S_gjHXM{Qo|{1yQ04^plX+Ho4O^BhPLIWohsCMA{lDAtT$Gye`(-*f9T$HdviLn> zKg);Hs($WG`&qeJ-oY3$Jh!PvVzmMQ7$$#EWg7nuhk`4-cOt8d2M}FTI>fdx%zMpy z?0sm&*1EcInfeZJ5v8+gO=B}hUdKBEdesxIRRNsq7+`czW2ia!dT9u2*z3UgXcyl) zwmnbE%;Z#^xW2iHI~7poXMMAVAMe&IUhN7g54tya(=K~FUD*{S9I+B zXK}a|mCYGy&7Pa$$)5ODPTN)*5kG9a)NXFUT)~HIGJM1+VKO5Kz-@->HUNR?LQ+5w%r-PRTykCT=XD-P9m|RWX!l#odKBP1q zvVi9shU|OG7xM~LP*y@&lm;DzrV)UV!2~ZgS2lm(jsJOrddqe=i^O=56C6Q+Ou3oZ z^~z7>6YNNsiZJX!xpf(LS+`fm(VOqeokc`=0jvrfAC|FL$5Lvq7H0bm>7ol;z@p+7 zKVlJU@lx&eM>;HaN0 zqsiOs_coH7)TszFo|_r+NeTvLMB!g0aHR3W?Ek&Ow4k9Y#O89~IQua+a$@OYp-|HEEdB9gOZ!BtX)#B4wgU=g$d$a>iqR}q5X{XJe(7>*z4}44$b`5 zYRtkzxXRmroWFb?em6)rT>PutRe;~{i|e`$BnIMo7*yB{7(*i`@!Y!bxe{NWQj2mj z+^i(JI5k88Pe3ycV=>tNk*QhbyDYJg8WGO)>!ExL8}CIN%918Gj%9Z(I4(iUf>&5* zhg)fGjXJHfYh%;<*=Pr{$A1FT68Mful3qYz&BzsfR}^=?c;1bJ1v{qL!31m zF`0VXJkK{Ws3$`&jlG?e1(16Z0IxbZUc4Mf*XSJMZza8lSF#}VeXTYO2(P$1du1-^ zCiso=9*Otcg|93zw+Ah@6p=nm(h^vK6vmzr-QtMc09K z$NvhmG)~g|xUZyy<~aU$X!E=VUH)TB7I_mAMR6vgy%`k8W=IJgVSK~ZT`0Tgt^C7> z)o@)f-JeBaFw)7-*{`PrUwJo7Xj!DA)L>SM6Gp#RWr@c>4Hr7D#1m$*Z6^5rj(?1^ zF<$$FTt=&YPFwH%!+0S+UFrMdSK4)$=3B)2dc5waa9~*S4~uLE5}`1Da(&y>R;7cc ztmTh|dfN??M{{m$!nR{sTJvbw;MUR_dU^5L%p|7CMGj5CzCr+^t9vkDW23mJxX-Bi zx%p*l^$@V%n^t&_;9@Z__#F*Z)I@8N9fgR9!7S(3&QP;ExjeDQQ>#Ab+3(6A7YUe= z2Y(8ZfcKM4N2YJWXYgiy{qgvp3j2e=CHaYnbebh2=Q5&;KL^dz;O({%W!c%+BL2&z zRY5Q2s++AffB7GO=oo7yoYNY+9z33Hj2{HlsY$AnEWR%bv9jL#0K7(FnW3Kgu~YVgWjm7||{+ zFBN-)oZ0kPZ8C1>tQ?5uIDsLPQ|9;k%$YE_e?Cy>=iil$W%@4L4o~^5Nzz*)ZjR|v zx5m6-#0DINC#YI>S4HPL%1AlraIiKDFuGyz-w}8Y`lBaW=$lj_eVGeACA50RhrJ6g zqObXGjGmi$Yeb4`$`n?FRA>jwORl8}5U_mSY`HLz${DOd;mt422L>C&oBdFwz(8QeuQ zCvw)?b6Pu5=WXV?_8tJfn4o?!5TlOngug7rq3=gNy?>bziaI;9hnPx6@3<<*HDF0N zxAQbl;Q1ABF#o7>K1fZcK3ZNO)R_rF?H`IqB#9~D(L5Ar$fJ|nfk8n3UMwyjA)=UE z{AU97H~Vid_XMMj;PG9!1^(fW#W59>51AnGKkJe#26}5uBNFt~m=u?LwRi_U3o83Z zi8?>hIu=5TZy+wyx!wm%t(vagb5qPdnTWh)bXu=qjM)AzX;;D*kpd46B*Qa#M72LmDL>oQCk;RiA>8WaJ{w=uE_ZEuF-)A$2q3K-cX=m;WaBD2N4aXXVn?7JqvL~%ZF731fYlRc@;QREoeNcdZ6~-(0^41Ye5FhyJ zMN5pbtv_sznVt%L%Mr^rzL4f$kbl3aNbchTr;!w5KjuN_fx(%%_12-5dQcJ0;>XO- zX~A2?_WU%CU#W0(-u897IGpK*sTpC^Mi-j2A+Hr*Ui=W|;!(s6-%`>CZzomlD_^US zRSnZ%(k9*JNGgC_H@oiG5LcA8Dy%|7NYI>Hlw7Aqz0x4Q=8Q6xGP2+Tmj?!X9hLXz zH;nHoXMAob_3B0g4&m2Vn#Y=tO+^jx83rK}2zT9e+bU9cq&>q|Zc7MUBKtWhEWV9& z@p_N&(RTr`g~z#r1QX;JUC%i|9>~6?HE>Q%^i)Yk@YETgjx-wHejY&Zd{9H);!Aqv3#%e)5BKZU;GiI+ zws0O(eZi^~E!D82pn+4+@A~e zEkPpfH#1v*B!NNV4WBo8J=pXTzKCk9nl}e!|8|A3)%t@mxHbKiz~wy9VXGWWy;C1Y zs^A;39{dCN9fEa1RlkF(l@tC_WPwNAqwukXI3ccA_lS7uqK}gWWsy*IXgr8dbBFBJ z=Yp0LnX$#??WPp|gF`BLi#Q8~q#tfVqZQm8EV1jx8&y?C(Ml(bb(204Q0!u?T`eZ8 z-1cXpvFELB{OdZCRF5FTjo|8?&qb;UXwjEc<#U-^pD_W!Hzd|u8GK0J6FEUDssOm= z`xZT}amTqR;CD-{?xmxQK((htg{dy#to;U;Z?% znMeO07Rq~a$OBtEq6k2EGFm9R>hbyDK zC_T@G+nZouO$X^}7v5caD^8~%z@hboqS>#ek7zUnSt}gD&eFgv8h_Yyp!3@`^k+w% z7zN`#DXkVyFPo62oQ+r(iaATuINzEU^EGM~?RLGH@SQL_>>Ht2C!P{CL?Z{`3NLdEX>{&YXAAOn6 zJn}Z+U-zB9r9imj#q0#HCGc~6;9whSLucdc163Qi1F7Gv@mYsnndB;~YNrhjp>IR{ zHndD(ct|oLJqP6^&@0G>{iR<uh&(t451FtZVj7Y3#M!XJ(i`hzQEem1)4w9dqM3=z^_ z%(U9I9|QkE(zq}9*io7p+iW&71g|*vd$o-dY+vzKxI-~C(q`p-HGH!rPIS2!u%5Xp zP-WT9+05WT(X&O#;6|&DUg)R7%M~a!2Wwrk(B&yLiT*F4W5Wl9Y}Jcb1%g_9q5SLf z5a1E&Qu|gu)-%!B&jzXqEND)Ji{F$SIY!=eI%K<@$z=X-#IR>ASQF9f2!-~A1QEzR zn#g25ulWMdP=CwmUht8R_vn+a-i_AcTpTyrr|w`=e6pzc_Q5aw%%i}6?92m7~B z-^s6qWh{q9eJtKgEuIA-%J^vIZGwsCaT46;-;2gXr~It*Mf#{P$erPeI&MkhTTkqz ze751FzA{YX6SnxE^P^dwJrfAODjF9}?-gv}IeM>uzQE>3EA*XbDR9KRS%tXqT<=L@ z9z_U2Y;WS*t1u)qgwSXK8_***P9>VY^cuc;>N!qW(f0l@VX3ykJD)0z#g^gI1#L^N zknyA!r{d~mOb6h@zFC6%&PA9M{9(J0w}3#r8h=+sBE$@@GC z>nWy5xd;d9x6MaaKwj>5-1dS&Yp2I5Sdq_3A8w!|&fOlVZ^8`NjL^HJh�j;PbFrZQ)irYxdv?t%TgT&_&t+8q+qVKc}h0H1Rr@x-d9lUPZ^YO~{b z&PP2s$<1;SkNrG(EJ%a%4)t}n5D!uY#MWeIXhha=@jJ+G>WDo_p0HO@C`nt8HXP&w zw*W6~#IhmbZ`c(zalaa7McJ6V)?v%(&%@`6xwp_;4Z~8H5=a#XlUYobJJJI9isaNl z;65g41VwE9A?nH``#v0_i7Fad?$5SGG}+rh3ojQPr0v0)*6}aNs8(g=JuaE(y^^_b zJBj35%0_*b%%s-Z%E>a%8;6FS;u@Hw=BCfTFC-LNqqz~3lSKV%f}}pna*Rgqd;_BkXf?D^n@9BIuZS{o zWj9AKs*;z&Bo6Z5!Gkzh2Xz~;^~P-_9wo8(5_Wn-6&xSbODt3u;vNvuL;TdqZv1cz zk2kbmAR#0v4pk36m6op;?^t}N_B|x|c_8VQFKX#A;_IX)V^`cLU4;bO_+FE~o~43+xsstQlCtVg z(>v&ip=oI~H$-J!+i?&*xi_5{J++wGI`6kA;v*Pkr2*nw4Zzqm1mX7l+k^)1+r+tU zXD+Y)=*FL|CZR76<4Sv7Bj+_Io+R2EIBV7XY@P$@a>w`LH>V5Mg#d)goCe=a*9xmz zv$j%;%wqf3_2Xz;muCxFkMd6&nN5_&8`{geKgz;QjyDSH zi6wEi+Dj*R2VBG(Zxi6`ZFN*yNd6pLWJ{QJ0*Vt+^RUE5#%X(ZlANFV`50fK8G?6$ z!m|x7W_*^gS0$?4Gtv10T7~7cBIPfuuuJS_h;6JhcH~n3T41Y z{a=xkOXuF*f@HotA@Y5UJMDjWh6U}{?zYdGeyUv*Nr=1B*!_&r4zH%M-^LKEn$2I$ zJIu$r4dqMVKUCnuGCiQls3{!&HXF68vKV5{O=kM*(wSJvx*_*6Yq;%Y*yvI*Wx&@* z`-yXrbCZRWMBfA8S#2L0k{@K|I=db|=)TqlkTCsJrf>q2bK_}6Es39prNsrYJ&F!z zMx!E5+I07Pciy*d(M%ev1uSP(MOp8i`Wy>ULbJuYR$U)=J4J_b zn%slo_v2?*OGTTonQDQS=Arj|TDf`4h8w+NYF}pe;p#%qcwo2hKX;xR;fr*Aw&V3y zR$vH7eH>*di3PH^Q$C4O=FY_C{hDz)y$K)XJnVJbw|CPOQ2VSWl%1a5}gDmlZ5KEkALVEQ?T<58CO3_tj%gsJp`_Yf?k8>@y0iB7JVxV zu;XQ2w6i5a11o$KVjvrvvXEF-Rin?3>`BnKi#O8YBR}Ls!&C<4f|364ss}y14i+L5 z?1;Z`7regECT&H1;quG3$qxHlctLQuS-%|L6Aa%zHM6QvN-JgIXbOxb7J~b1)Y()G zr}R6^d-+e#k)q?_^UC>RNnwp7n`@j8NiOR?9ShH z|NcZ*hmGY=`pc@m7i)jPK|C?Z6wI(@&0uM$;mvDK3wKtQ$4chdyK* zYwo`4AS@osNPZP_mjucNP*6CCUCXAJ`taJ;?D+aVFN+@p3h=ApSxlpNfNDR+ zEpAWgXtvKRdMi~%)yGgnQRWl7}Y8W3_)%G9sP>)`* zRl8Z@WjtPSr-CR!t^znj_3mmbw;}wp=@)6NnFcMs(jAHsR;I#@{Gv^*Kw)k*?^Tns zDMyH+A6ZU7i5kbtGIeW5jLFa-j;F?EK$NMzeo4Mb$R~0_Izd1@IYE^O9}&i6EO_E` za%8s7apmeMJnBclzTd@s#B)1E(Ag?ns|z0FN`zc~dD}M~0gP&reG0aXv`yGb%ljgz7FAX`Obr>RwuKeJ!}j8-5Z`{b@*G zKC1;mk79@86U&1k>R+Vtbu|F`8s?-@7mN_f2T)`Hr;PBPwOr>Dzg+$}GbzS0MEjk9 z+%X-}^-B%Cz4AxpE)v3luE=hqu1X-sm|xo^yQ|3SX! z$ruQWuaYQqa)W6UW&TT=3@~@8B_n$8o3i*m#bc(pF_!^YRNyymu7kGD1`}`5D+b~H zui8TWkq|bwhri<1Ow%Kn^RBhxL@))r1Qj#4WHC4dv|MU;1UN*D83M{ck;j*Y4jepkYJr3P*WEfb1f}UOCoF*xA_q_akBBCHZI~ zVe7>_wy9u5Vuf?i`uKPIDvEuGn82O7&`}{Q!i${U0(L=_iMba?^M0fRVVNwW%q*%Ze0KT)4fDCURCWL+Z+{(p(7DeCwA`;3 z*(95WZ`Al2&f!-UHqDfARF%>LB|;GcM4ebjY5`CJxpQAVxLVVP*P;6MbvBaH8%@QY z*8xrp9VjJf=Nl@<6ueqh51WXc)mb)Pq~Z=b-xkb!6F2s|i+=ncs@^gxj;P(%g`h#2 z;K3RQ?(XhE0wK6Va1GM9yIXK~cX#dJ+PJ&ByBxl=&%S%#|21mVT2)JGzR#SCF}P~A z6D5&RJWK8Ci!RgKD&W2I^M+ZUJ5&;bPnXs;^9rZ)r^2QlrIfOt-U-!KZ?M8#VHfic zN~D^)w4>?a+o(~Tx*ijbq_{QUMuBRej@?dtxsUl>J9m|P9zuqZp^krhx?l=#jzT}( zWJQPb7~$bIlda}s`QJY!FIY^0=>@VRY@QgS8Xmai!y2%da;qa-ll7K#F-?`V>8`Xr zS6u&h0YI-wM;#0bs|AUHFZb3HyAv6-Hui&H#G?=7VfY&;48ALv79v|VmJf2u2^$W~^+nBG|h@Kv>80awL5KPDFA(n-i2-o@DS znV+;u8mar5PW3%G)a~+Z3=9VWZtj~hUlj8VF2h79LQy>oU#1`aU=YIX+kU`TItF=x zWTVM}r6u>t@^Z#N0=B`ee6ZR%m$$Wv`I|(?Z_)*7+7s?4QKEhUVaYf8ehr3NHHa<8*N8N{NTy;W;qxu;&*clSSBtXN+-PyaLwrUoEc!qFJX^>ML^*>L$6ur2XyZ;_`3W z)oh}cDcR3;7hEd#(#W=$qK-czi)ooS*R44k2_w=?Ar&?4d-W(G=T`RBz*eRc(j2#L zuUOfu?J3g=8g{!4+{jtu-2CS6Sm*SZ#(njpN9{&x8p7vWwX^Ot{*O-Et6H%h8pa9K z|933RgPF-`lJ>uimCBVBN~&qg^q|%J?eF%pWK7a18d5`nuj3CBWZ=h|TY zVch`>st3At6528b$pxBjkFTEP@_|UcRteZ@@s=qOa$A{%h`rNg?F2`nHn?3h?C0Mv zY|>qnEFylkgG7!{{&Tlo57v%^h7iGo{~m!bJy#+$)WZ}+Z~oet&vCefFM+ACV*qtb zm#RU>@B@Op4SnS>O<%r5^Vw6q28Pt?$1Wl26fM_vl z%k!x`_70EB$79i+-!8QHnil#Xsg}%)j(bR}*`U#DD{Uh#!?h7*rTwajaLvkQ(6goT z(>Dp5pw8gfeJu8xzV$~>Y>;R(5MTG1N!tTIjS`Y* zS_bten>y3a)d6m-^zCI0P-C;r_WKyd(d&Fv;`kEVyYPMcbKAs)J$+U<73%2<#>ppGjy-LNZQLb=~m9DID3G6nPPXE*_J ze$za0S%yt*E~NySY4AU;V@{`@*!?+ShYp_QgMZe|Zx|eL-%>?Z(07@*OyK+2{lohS zthau)q4Sj;2X>t?ED3+`1Lhw`X6Zv*N0d4D@XS;%x z`1ZfE6SZ=6s?7Cy{WC!P@~k;QTZ^&IGTs3nLT6oX#WDM?Y7i}~L;WgKdA6^QSdaRp zlUm#O^Vv92lY|tA#l}&E@uL5xVlD}NK8%qtfTw!dZlpFlHsuTMDiK1T`BKk!$wV(R z@)uGkx2`}T8-eP7h@KWaQ!BJ<zPMD8zqY#-KQ)!tLTm4 zP|O1KEb+3sux2MSt+WtFz!k?SId(zkzArKoN$04ZraAH~q;AZKL@BtK`0z@n{m7T; zYQ5{8j}iqnV;Lhx6^vL7srWO_R%HztDKhwpaU(4G|LMD9(RqK6Wi;##EAZP-Ww?l~ z-$1<-=1Es1UYMa6UWr;)0b%+@!2hFgJ3wRApW^1UcG{Zvu`$G-&Y*9EUqKHJzbQ$#Nh*8cnPGZbJDB4Z{6VkUc@x%|cTs{q8?w78a!{!gLK1 zNVYj0YCeFW#-r_2<&@K*iu?+fC$+&x{S@37VMmT;7Lg3lxn_~KYUb~)HtH~1kZ|Uh zG)2viirOmVLz)$DIiU2)Rpz7C_;@6=vq7*?Sm84B4mU=Ot{Wj5+0a3)0FgJkSUTui zBJoLt*M?AOR5eq6UJh|N$+|{dPfoXF;8rVou2p}mi&-&N6l@|tBUMhQ=}PAQicaZ+ z^Ly+6WB9sTDn8(d*_Ks7M(6*gBYg0Mo9-Il0!zU|PzQk~gtV~HeBG`N|Jy>%)Q65ZB*nF|m7ftzU@sBU3WK)dW*JfHO z*Yblnf4$C-Bu_dtSdviN_3e+o1aDw{PqiyKYy^7tWiQ{lA<8(PO+zxCxwRBQ_qxGn zYnSQ#6M?v5SM2tZY9qQQnQ!FR&Z|~8} z!2tRxF4tZykcoYU1Dt$Vr|lUoxwtA-#| zBa|PJfs!jbZ^>MGK9z*c>+%zX6cw;AHM7~11l@M0vFH!uF>Cu9@hTiqphvIyh21UDyk8G*_!u(Bx7p36?l;-k#t>I^= zL`CAe){M)cVEsjB?5JI%`?U->DbSX~K5#XX2V!2upi~rWxO_06e>>L4qxf%9*LrHl zi77j-Ig%`f`-e!`^TXO*@WYL;oMXro|8B#4xloq&*nsUw)IN0XHcH+5uf_D=+h23d zT)*>t`(t;|6MY?|`^qcKPbP5Vm#p-q#Y(9_1?TVnt|knz`O2R-M~#}{YYzYBvBZ11 zhG9!uw4%rLNZNDqt>mYzSKSODYM&v7`dh9u~ZJNvBX?G|d#Bb~N z9(|%uT-1Qt%d4~2Jfo^yzdU@bi|hn9^zq67V7k4d7mwI9B6~%h-vVTM=EhKOzrgAD z&=cS2=&2QhouBX}6akJa9yxqO=c^l7W5%sN(oWrX4Qh4(WZ!ML2eJ8DX_KIJw9?0y zDPJlMkn)JOcT~Q18a5vb&*NO7be)ITfKNTDTkv%J0uB($;>+Awe;2xk_v^`!fl42I z#8Kku(x+3)?Cq6)Cum{h8aJHd;D!-%eu zc<^>5&TyOnzPCM<-~3Jnx2En4XP6k;lRlBc%V+$X$U4M7bV$S(Dg8LTA`|~~S%ZS? zesA9a67%d1oli?&EFX`aNbnPq3P#+&JZrL`Rb^#!N@_BE-`|k`vMxq8eFOsu=r;#U zHD{tyZZ+zs;N&lcb-{zxz0gq8i7&H++u?Wox3qR1eAummQqR#_2);1INb$JIeD`)S z{~7JhKd{k-i`lb`fzK`$1hWk7@%gq1OFGFR-kAvH0=uP=Fs$$7bKRudxmsqBps9W`Wwj z&s>S;g(Eh+kF+M^THu3le|t*tG~8FeozB<%1n{S`sm-xiLujeH<4f?sX)+I#vuOU;2mi$VMlAe6Sqyhv1gIp=goh?2SddNBFPGs`f+UfuE05;Gw!Hr zJ^($7;S%?EVlF`cW^}4L{LDILaaV5Y81ww(I`_;vx0v?AINkm<&sIf4~Me4k3&*bE=Z^qaR0IJBE?=6 zd;*4sZHj)6n~%NzeL0uqMiOOHBFU9=4&Cl^-v^<-4+Lp=_{r#h=W5SS_SB-F)98u8 z)+Zm9dEI>a22zq~`l7oY7hDZ*r+-TRY?b8Kbeu149*H*63z5?5PY5=9_ z$21Xc@joDTQw$=sIF6^2)qQ(AOb{vq7ievgykM8N7((ZJ6q?oMutj1n3CL`NdjG9i5utt-Y2AT|62dL#{im?BPFP8A>=p+3NY^ux4d(|KMTao(u6^tn-{hj%J^cf z!OKf_GERg$acP6*slNGh*c~6!8$KiL4PBn06;Hlm5lRPPiSWyE_xlU5UFi+}??oZ{ zz!8PK?YzRq4F-La5ygY<${)cfxXfp&8(&Ad{E8OnYz?ygzQFtPD&fCJzmsF(fvm;E zget~qUeFl_oPx&Byx)Oh?Y9EdTzm79w)?=8%GpSsD4uPT-W837_YTwV83rP)++4rN zTDbK(Jy7}YR)A(;%A5^>JJLu4TmAWH{jt9i)n-HXE-1IbRg4!DpI@O}pgb)Z^g0QF zYxUTG`a!}3$VivnaZVljFkQ?;j$Usb8dI=Dcw%>|RFi0ixr-Qfoz|TcO4Esh&3Z&; zRwKBbMYj3`qZ#mQtN4M1!grM}^XOG%CO=`l^*N_g`qxrWU45-(0L)y?RM@xbxi$dG85pJ_1eJX^`4ZnN6m?<0B zO#LGSrKHI4YaKL5Vbh-xgBX9B#1V3L7~`={v{CXz1DenRyv5Ch6)Zaw4*tz- z*6{=-=U%ygFfNB_bfo6}c8DtuROzm_Wa*A7K7VGtzL&lTx)Cy$Pa)<(Ve{oo--#&D z=w$_rJ3iboIwR+g;}JPZ4pRIUmDPz|ZAk<5t|X`N0KA|ldiz>(qO7rr!7G2(5M)i* z!B6fu!fRze9>|4YfHbrLxm(y55`<%!D-TekY4%(vO%0*_8YgDjA5jU~G=lM?>L~ZC zj&N4XFHobp&$+C*CJX?ZMXJ%@HZvUG=%%!+eE6{-^SFUB49FEC8OEQ`N&hYCy>;12 zDSsbzgeDmNAMk{cv-zOB1y~=xf^sN0?zVi}AtN~4wcsx!5p75t0Cs!4aFy2A^Jz;J znX`&JLLX-*{f^{z)e%b%}K1wPHO9ytPM$s4Z$~jpK zauY962c^8lYefbskelvw=930K=aYLg+M4y{)no#!OClMZz2G$sMYnbBbUI=QjaHQi1@byBWDjoHow)N2cnd=a;~jh+d+-Xm1x23c9T3f(R})6c zPAL0whd!>0X@WY?);Sa(;XR0NMOpT?`AX-%4D-9K2e{t2kGKp3*0HcfULR!4u;z0h z*p0I1rMLGxYrR{?dl$}yfnRkQVYUK|_sC2|{)OSSX6wkOcyjBw69T>4zXG0^uWj-8 zJB^9cd#_l(bf+YwN|?h^N50|JF$eNbdTVXeR80hpaZ3cIe=}QcfWHvwMTIGV_As^I z=IW%kJmxkm$LlL@J3m{wG)oNSHkMS|&CxB!q35SLg6Pe3z!vRW_d13$FuTD3j7><| z#lBem2F{|$WKUlMS9CuDA`Wz8P&2O2f`Lb}%bc6#hp6csqyIQw=1B%+WeXDzrMDb9XblqnoS{vTW>qDX7B8{X4IWQ!up8C`z@ak>GGhKj|1-V)Ur z_Jvg1oBRcSwjzb^jP%1}h|}t_*^1luER>nvwmVO=vTtK9_u@nHHj4_qou;x7C3lYU zbRpC!s?k8yWpgJLq;oG1dYMeztv%F_bWvc4JD@b_Isw~hOXjvqe+nm^R43AZodOxU zQ-O`1Zcdz?_Y68Sbdn;gldB9h(&$ht{RI zm{p{>8q2{AUun3ws=RNpc+=tS0cS_|!fBn-^}=!*)!{+qc7@I9*rME}E-1%KI6ao{ z~qe9V2S0}$G z<+gY6x+T9RG$pYQ?bik6L@)#VqUDjKYmOSz1osy3NY-T&qV` zj$2wD!@267*M&RGtXrHc$0$)-Ov1UfYyt&{oh6)R&ahnJ_#}RK<`q}b;JRK73CPAb z1r>629HLzwi-cV9FZCrcIH}I(wNw1kRo4kqc%;2J=|4&&a^{o)s)g5Tu0R&T&el+) zTlkKbb9Wxa+1Iup{@W*FS)mJCKe!2aLb`6*-FsHO&h^us7X<>$@23-O(@DpA9&VHM zr1)-*A{Q~o>*c?7m`0?`SgJQ-U+b}%Xfl!MVvZ;3g7uWxL=ERRo>)0xET~oL=smuD z=_>0<`MRaDQH5VYyQ-v+S;>aeKMtvy5+3eKw)uAi}(38>9m@^XIPq?5UD?` zb9SiafKL`H)n+=u$(@b+vvzsNdPRo<2>WPVfT1sA+F~fCptWeRueE@13pkT^{%%Ji2UR;sCL`*1^Vt znqF=TB&&Nd8UrQf~(iY^Cz7 z*D&>ng%X6{l5b#NS&NdzvJ0kWT+V|W!~wTJ`@o7dGgs67EqM0WG&)kzIiNY35e`WT}2qAhjvkk_cKONPZ>mZ_J$-T64I6JFZ*0&C=EfA;V|KK*gj^N2DVb;5+x`*_=llZ{O zPJ#Ybw{yhJ3Wz04Fvm?x=b9Z=&2vwrYrW>gc%LgPz-c_J2o%i1icbf*cFRFn#LZK3 z7>z6pH*=jKo+1OidCmKi?T?3+DB?qDII;hZoyxEb5#rEo1&5I1VN54yU$Sm%dZzcs zj2y;n85JWAe2$L%y4lk6U-;2~WVC-}(B|c4MVY{b|6unRQXZ_5ZmAvZuc_uohc+w! z>$*JXXksp@Y6ACq;fFhsYkU_K-`CG2jS)3{&?3SR_NWVq)ODZSx5kIWy3 zhi>V%qxxSNukpzO++ibC{;~32xdN$l^gM58_gms?`i`fOZx@W-x0tmBiYXm#yWO}q zK42204xwUWVM6Z3pEaZg-w&}4QtgcV68q<2KP$YMr53;y7E!ePZ&a6br+QJ4TO<6Q*D#&k)B!iZe)hBQ{=`!e9f^t^A(uVr^-j_)KlE z743z*5hthlv;+9HbQ*3m+z+{ zT#uNL0en`PE*W=jxjF;O#Gk0N zvhNO9K$b!{g}R({J9C1WZ$u?k+5Cvn{0GhK|}GTJNOGtsSs=-jUdDXu>T zx~qJ)*9rzv3%j~03~x)r#OP|M*AE0LeSZ}BKL1~33h`g}y^JkvHbN8DfK`|fmF6Fn zOBt^0k@H@ziQA5;bjS_-y~r(U(K|;Sc}u$RBQ^Y9f{XmhrZ&=%Jqb)J1#YUoW_$jJ z7{v%64w6#mJ%U;sB**Od9-PeWq%7>Mq??ANoyBhRD%R-lv_C&ewU42SBaZB0TiUy# zYin!YGBx~w#|MSnA!*+<}Eifl+8}V1rZtI z1Hd4H;4cm>`nL`kQh03q(SI-45^An+N?Nh{4qT@sP&}kSO{L%n|FN-eX6cqEOyX__ zsgwuTVI+42`faSgN6mjfi3L;V*1ca1Px2dfUt=6j8hB-g%ys28@MI{Z=>4KuvSXc! z@Ju^vDF3uAIPm;UsOtMnqZ6VYU;i#xu@W>krCR`eLmDQC#p^0&RNS^VCtFvZd)sh{ zshA!uU4E`q53VaEB6cb8{e`eJmD6}+3N*4DFQ57Rv3J=tKQ=0cpH?>qgxNBe;4Ff~ zHY1Pl={f~4;AZ1goD@r_`=M9P=8L(wj4Mt?d2;7YMk+oD;0Hi%X3yhcK#GwptQ(O( z6T?q4bm$R|m;Mrt4iNN<7;q;!o4wSOno!0gEf=mMlcjT5gS!Sse`nVQ^6iU3UX0{nt0*jPGWt1!YSw znAK+LWTI8ZsgBAl6}P$5`uz>r7!V|+MwIw(3WXkI55!;G8?@R~@Nr&k4aC?#yII7e zU#XnNvH=}TRNftocVyDAbSr$-#Nhu?eWFGaG54tV56|0cIE!WXG$D@99}}`SQE{1U zZLAfkg2b^`dGJBjJ z*gRy{%hjPaRBPLlj8j{*R-p+7SB;i~Ojpd4(J?>1x*~9BJAd&~J6`S8$i^Da=D9t& zKg-*{GK6nz1~|N=y|&fJdQ<9tSMv!RkDu5$T!-~?h_wOM2Ui~0PtTiQ z#;WD^B?rGfaeliIX_rflY`eKbcc31a+yTC`{@G-|m{dAIcwnH?{vj5Nvy{!|05g33 zEq}ag5H!JvZnB05cBt|>JF(mZt#wS+Jr->YdE-lFtC=E6|H@PnChC=8J+8eE8WKS0V?|5q!xMZEF*V!mZ^@lJb=Y?yXD9#z)IW z3o%LeF95+iJPMb16&9gG^V&f9qmU7SSpX4w5^~AGTzebJxafvC*4GN`=}WP>D0TYE z-#EUXx_l=*UixdW>P0O%6h1G_Xm+*xUc%{QAm>X@#&fgAMa4(RMjcaN84iPQneF_M zqZxmEun3cKQi2SI)Us(e*LfQXZ4$>7#gM`8fsN$V7b%acDzscdIYXbSCAxI115qJg=wCof;` zE5|;Z5uReb*t?HoKNkTj~IHE(ttKfE(Dd$o-j+}v7 z)&1a#ChR1FSO2UDP4v(c=WNoO_lGl34Z~T6-z`Lx-jSDH@{R!wjRvYo zw87Vg>pskipu#!;@A1@a-slF|ix#FYv-q*r3n zVSj#eG;O&0`;Opi%crTeC$#h*tElD*?NHL+#n@@fSglA4D_U@W&3hn7b~GvM!FjuK z8Y=R?=D=|lj0I7#U1zs8z~Iw`lOhjv)2w^JP0}HLd=qcfo_#a2P^An2AUwP#PYza+HawPrh7(=CcfaEfB|)+ z9l}*JJA4s&Dlzu0Ug-mD$->mbtEmTYGX^-h;n1A-O!yz4-SX>@Fl__@7rlG|d^AUs@^58m+tCu_~=Dc5P;kKy5RI9~x(^ z5k<991Ez@~6`pwGM|_w)!B*hZZij&(vjl^2^yF$QU#nY_y?c+kX1y)q(Dp|E z9}f)^Q2OWYocShwezZn@d$tJK3@9$4dblf=-CJ;TMJHO0nj7;$QN+*oBTPxnnR4z6 zsp$uIG?hWB<%#H1=tGTm5Ygz!XaHQeS`LlWO5|eoZ3btEc9bcta*4gI0yDq(2=c&Q ze0=a)0O&IIspFo6yx_ycKW6!5CkTaI?91{Z8%uqC$#Cum7WF@Po%zPU{)Wk0c{*6# zqLhmc=BwB={jZG^7r}2jc+p_*zkM^rx|X2712oZ3Go*-|ms63w=oX$Ld9skpjh=lf zL*a(qzRrjyArR*ala;u=#PYQPqnA9h^`^{1dhq9EKsOa&_TZIIHSOHn0-JtK9U5+T ztlC6yEKRgW>ps^*9gM2*A1~ub5oQUV!$zYqox#{}!NqQ~_ z6K!4vExDck!8YcTmEdSq_)}Sh0cUTj+j!$6>v+MKzp^C*Sx{#{%12lvBwY`b0^P2W zI_>R-<<`SRHSrz3axVH`3R98W51T#SZVYIs0Tx@5r z*~GHi#XgS0!4I+g*L_1S!Tl0`OAFka_uU#?d(@T;u1+m^moOzf0U?251dHKA_1Dpi zZle%?l1-Kr5g?EhhBX?mx2tqPL8Z+6o+PlP~x3bc*w$_HWh9-Jhl(X8V(pF$usanO+?CrrV zZenO5wf@O67H^d#%$`$UE^4lF?mtyTE+?jtn^G9*rws|)mi&Vq-rUGf6C#QsofiEK z{1l;vxNqiR*ozk;>O~J*#>?VJSIj>FuqNp%1p2PRI#LhZZbQ?I;wBoh=Rj8v9pdWm zb1Rf1M3u{A3VRd;jf2N79K=4~!$|9e@K$M_9+gRsWVU!IqJ81>HwT@F_n96$zxVV0 zi=`Drsr@;K861iO(R)hf_VXtw`QV$Ci=Up4W}(q+3^_dpU9>l_=s~$L+eKwsAOoxT zYf=K&3rh7)|0jM`Q%_3236^hT=#ZHia{RQ^U1=>z37rZf10~pR?%Ey=-fIKq{x(b$X{p^(*?-IIdD+~Cg`q_0U`uMch zSy={R&RY%@yaR?Fm_`-}#*V%`PP)g8r~LPGdist_N4v4gqNr7xy7uiGS4Zbj24g`g z12HQ;Zn1!-T8?gQeK}4-?!=?AMYRXQkc>kDPhx#}38wcGcY7ktsT#ZEwz5?u3p`7v zOqq_MUyAV+X|SoXobEZ3Z!&0*>OOaYYgJf}7}dZ+$k3#-Lv42q{P?;>i94q&Vtw7K z_vVt9kSaG!LVth5-#(VTV9FS7uEMjO1_N;7ht|Jgk2$1_Bw4UcnaE{>sm;cd$l}xHWdn;ANjY~>)Kn2sR-_z`qmM%H8einSJTe-)ek+yzQ6g)ge z06Hw}pJ*a%hP=E%=f>Kt-=W8!wrS3UGUWsZ^Nz1(vP$Act3{HrC+jignSaOsVkK@b z{JS;n1MaS2oz@AYdJz?|c5gN9)k0Twi6?{tI8pi=D=%E295!o)E07@Vh@h&Kdy*PV zzrtF@*K)?n!5Er*<1hJI%g5kgEpP zE;riy_G0RIm1my%R}`Kv_9Q#PgT65^cy`@_t>eQ12~n#2OY<^8RgSJy6r@Do_Lf>UyMie4eV^-{zLHTNsNw-NXwj^5s8{!_t zh86EnaV;#ZaR{bva{d$1c{NKnFf})!jb{b)+B>6Z&-t(05h4S2c0bK!644Pl?ERZx zN-=i=%J%!hiP?y8TT%iDbb|m9R!U}E{D&cdvhD|B3U8bEm6VZ zOu!@aUo?BXpLA%a4xC=e7DeF3+`o9rr4ek2FAtjUAlE~!XZ_0Ni*Ntc*8eoasN0vO zJiC3kPg#=^H6^J?DQMa?B?x3ctH6MIWR>3k2+}1Ye){hQe{>TjV4(>5heR%R^qo#X zePwO@-*H1aGCG}s9Z-1i3 z6!K!X;ks9SKqGA7{PUA%hxj}nl1EceThf=+4~&d&tcls{=egI#YTPh_a8y(Kz2M1f zhPY`)5?(}0TZfF9?}jiHQ?dfAI;cot)g0*#7<2E#B1CR5n)IoENhZMJI8)k`37Rk>xby5D{7~$^wpGBMwTwkhCt}WN zeUXdZ4psg-s)X~Yan-LO_)uL)qbA%0Z!iGOBRb=Ne-&Y+sE8r;{Be&~{4{djcL zNP;8o{F;dO;xi;YP?lv*((7T5%LL7-eyS73Y&zYw9u(Td0FTMeT5V16h54Qp!1?LN zvJb1r!H>%j!zb@4vu+JgS0Cu&XWS;PvgTF!Z?wUXwsP1J7dtopC_Yns z0V~eR~+LrUDy+%#7b+;p6Wqep~(5goec21 zKE)<=V^|fK+Fs}tRwlW`8ULF21)XH6)J-btIo9@EM*h|HA0;OCRD~;(QSX|w6uDVJ zif#|{sPE5nnZYP*;;EE%3nNR}#N(*BWowhT?@MrNbZ15FgLKcO6UMIVENNdHje}a4 z5@{B2Z`j24YEjK->8AxZHuNz#q1^w46D+u#I`^d^JmvrB_J0<{A|61nhAW7iU+N6R zFOOWP?Ma z=TD__e_8kr4212>U?X8WeE2y3QpLv9)O5a>A8CttS)@D>{;#aWKfi|apq@D!oOcq9 z8qLhH=D{empMYIdGbyGgp8=z)c7;k#^oqa3?aktgH8VlsJPZfA{v_uqXbGd&?1riY z8?&zycUq4vW^G)lmTbNaLb)3!4ZTxr%U4gt^`?;0q1 z;?Z@p`jqi861WZ$juV-v=T+0ANkCL15o$=0wd-ob^l0lB4T?CPNHcU*KRwRRlA!j( z-?mf^P|&IF)NH}rdm9w6977^~vQBI;W2q+!b3awUEj$6u>wc z;V+rHnWW37aeE8B>W}d%tBFRdK-vB|#tDVwK};wsi3SvZv`fG_b1?@vCD3Q{>}$)E zy}xm4vUrz2W2S&}i+FWo4!Tvw+75aLEoQ&G&f|8bvE>)%QHT-~zJz#%t8EXIgdBxW z)X6-FbXfK-IfI=oFDgU##9q4FYDG-P>}(5hbX_>MbSbQIpd`~T{f~-wf>e~%ZSfhp zwkTWkcI;D+7e^0W4yAwpD|-4HeldSKRnT|eG(~WVV7`8=b%;a4o;3FoUp>9+Bpka| zjheOlKlDKB!0oJ`%%DYj*hqLtBqD@oDqLQ1;~yM)cjgjjc1UsKY=Zv_i;gH(}0kIIHOLRo)dv zi^h0@JKa{tK;R@FWcVa5_s(MrYkoqC8;!N)@5y6Y z_SmzDIFQIb)2WSzPa9p5X6ccIA1dH*M-TY5P?6f;K(qV$ zJ!b#05E2+E?xAP^EqX-btNK|nqTo8=lRWB92YG3M6Og3Ld>GA>ssK6=~7q z;C2zV>aX2sHC?wo)o@LEcfp%nt4n3)r;}6JePRjSVm*8$A9{IIM@67%3OBQYKDm`wG{yuSzJ!Ef>Qn3NvQ%9Zlh(VZ}jVTyf0ZoHC z@tMU0-yfVHEi6iJNHO#4>-oEQYMO+Sf3MWGXVh1po%&;VM6!E^jkePcsEoqb&@KNp ztg3BPx{(qXDkRJA1Li_R#H|&c1s^`5tLAL>6l~!{M;u|32Kcuv*po%(JKXERh~l}!ANcGa&%8h9v|#S3nqaN?soOmsUHoZor#zz8Q}o3;{)2{rX0Zw!);VIdB} z{`Zxo+q!jMaNVz8B z?Tt&I>C;>?G@_;-G^cO&GO8^t@AR7NU@F|dcU_)lM>@Z^X|JVMI}r|G{}8e!WpTTQ zwCStgoJ9i$X@On4NVN3r0cU$>HeTtKEk;BgmZa%v*36r2V-JVdOodwKJ=J-LL?Ofg zAGhEhRI}5WQkwm*6u^<~hx7s9sRLo|Erw%cR9Thq=q;z_LPU`-MSctVP)B!oWMGAW zaqP9s-bV1^_f?cYR}+WGO9qj;Ha!2UH05Tj;a!HFjbtV3Qu~`ls(>e>j_0|FpJeT0-+jxZ->omw-6M6I zpn8s}TAU;Pq+n#wG}CGge97ZuvJt&O*V6*N_qR_6fuY4Fzm5+g=fY~kG$AN1kBxiq zia}g-nPZ$F9)7O%bjd$fQ1rZOdsgc1Xr!*iP-#a;0!)aV1b;#)By#IpbsJk2Kf(VG4m06rA}F2t_^I+h z{)@dx^1?&u0i`f3OD8Ty9`+m&wuBUTp`}TzH2a&9&>CJZ)na$l;)f%Q%Un=gP0u|( ztSg#ii2R;oy<+@kiW*|TiHY`r?TRXqRS!xzLXv1Wa(4YqF;oG!e%j+o>-Vrc(AH*bT5lJBZjB7Bv?KG4rm_*sM3h}!_NR<| zxI|?9Gzk!{prq~8;+Je+7-cBOg1x?~1@OS-lz8mQMbhK@wSY1W7JF8qX_3PA;T?5W zQiN37JfmVq|ATJn7bdB@5$W#aK=QfxQp2m(@!!;JUFYRk~Z6sc`26bw((u=>V#rj@j+li$lqU?b_}VjJpaA=D$I<_x>ztw^Yuj#}bUI1L z9ox2T+v%7c+qP}nwry8z+qSjy{`>p)_nz#Nbx@$Zr-xUHrT7u-=*yODczrYA*r=!=uuh zgxZme@S{TV2;nD@B0Ka2B#_5JkEkEaXTdcKEI=a(Ce8YEWkT2UJ?0K9*Yeb3Q?jT8WFmn5Uq#~|9i*oOYh+GhAOk}DK%W9LW(IOpQa(BP zMl&c8?Xz{4U1sE`_02WMeOhpujW?FAA2U;5 zRXM0kO`5fpd4Yf_Jl_74;UnCq3~}^8N*>P^hdPW7XwI58!bVMe;dhVzI1FCX5Fh8r zv^&Z=mi_sAvvfI_X!@x2C{tEZGdl@-TFxC42E>gUoNnFquG1lAGt5EPbU(w6<6y;R zpM1A1xMjVA5q>HKay3!}5j^Ixd2ZTQ8<5H#Za1YZz+P1?%bYp>e&a_k3w1Op4fLh1 z|8|UceN|C7W?D7Ig_kp>v6&wo%L49!2BD9$e&qH0D0SmyL^E~z4ETzd>zba}cryPk ziQ3LvH2F05@mSln`YtI;@(wB@3Y`#l7yJqF_x{42$m-7w;kzSBR|pAASLf70Xpl5R zlP%H&``IIM=N0tD1OhF!qzf__?;p!BBVJZRb+|-Zy{7$W=62&$11YsGy3G~FyoHaK zgt6WT2Q*rT=*3Pns#>xzays9X;Nz1NE!)vO{~l1SJIZ?JFyAm-4c%r1M`*wx6iu9 za)XAIa3)E;RYHL+e}MV;dOCGTT$P*J{&KiNkM&|h5OqrytC+b@z^z`c0z`@^4h-_8 zE?8+fyT)r7H+hU8O>$?Z;wv70Y_D@t&ub+qK2Rb!rg?V~?yO4sESk6H@O(q|Duwk2 z{%C<&d4TH^Nb=<_+!S@iBw)SHs<}F3vn|R{v$4O=DKEOI?saGJ4U`FIbYZ#CK^~ERM@aMigX@yxzTf-f?h#&aih~$@ zranLyQ47=DNwG_Yx&)w}A%In}KqO!0LWCgeM!@NaR`utkK^XM#>_%(Y9tj2#<@pI> zCMa#Di*0qx*C|e~Y9Ebd0s`F&FQyoxwDG}kwIf0UwX{YYMD^;C`Yl&L4Qc-DX|1np zy-gG0VOonqb`$!6kk;@U>4w>Ld?s;XR)cY`1QRZ<{eRtPgPUo?SY_MpUJWqD|1frXXutbBfFElNC9gM&&R&}cW?Ra$T#A)%p@-H@6{j5qivO4;~RDv6UO z;SbpKW)WS~CMra%-=K+Ck656>Np;0~!~B}oI_Z}#D4eRKnxkbh2=9yqw<@%Ne_{m@ zuKlJeqI&^?)RUae%s&69f5UiMAUcyH9VU+f$k8MtB`?|?9-H$9%>ku;K_CiB>_9*s z0_In_yugyi^}Vm>C7f;5*;o)96}V+gFwr{(8QKPq^w^ZFP{M<2;Edr86}pSmhBUIA z_6c2g-8M)TtFT;wsosVEY=ww%e{g|^X&cOxvFEVXZJ|{`M{#U_6KY`x?jdV5(8^Lm z<5iM1cKLfwg8}mtP_fc9 zhtr#>cKRF#RjNVbo&zmwQ9?zO*?IUqb8xs^Vxy<-CMlEorh`YT=%9;FiWj(Z(`RdB ztvhf`g@ZU)wSfVXg^p4`1m;AW>Y#i(CcFGt>MP4@1nv#d(3AJO@Yhvb?T>&&{$i)oXbrv2Ssby)W#9W>>*>8dw=?jK9lYTgl1?mLW zGriH9xuRi9z~=RUzT>}Q?%VViVR9Z9cDt8i4~zow{#4Yo&$1{U)7vA?sGr0a6l#3- zldTELUmuoyzT(vU=P0Rf`Tf1&vwlOa+XE+i*^Xqg z8RTYa_#^z+b4>=@?Lx;y_h2GH^9hQQHCb8bf3pB4CF)nXI`nxH<}VW3pMeUkT4Q0m z-+>RbUYbYR)4^o8{a=-}q;J$%0V*9>BGJKjg{9dtj+~(=sPdcAV znFm|nMU~!c&faKux@^wDZgAY>z+{rgJTUycPk-A=Bkbq-DY(WrrUR#RuRVF`^ZxK zvguT2$hyhfSAG39d7Iu?!D5%%FUhY32a?s5y)^Wokev$SV-IrB5!LZ*XC7S5=juJ#sJ^M9D)2t9-G3HD{5GOq+9tW!FevZ+k#y#M?vmwiThhpqtlgk zI%|3|4HpWVv+yvCJ`YERy8_dCs`HoaN)`V< zX(@j=WZN3>cwB}DV=1U)+lPW1rL?;gRrFrW(CM@du}J!|5U!4H8MaO8tP$@3&6z$v zva51Jk%^mj_T(Lk*UP8W?Wgj{%N3hxIiRs-GC4*#*r{Rp7G#XkOcW&n4d-}tL zne==@be=|ryT}O(l2LcRu_(XbZa5ous&=IxvKr0@9^&lE( z4W?JqHWH+bM)5p3Q}V@t#g-rYOC=kwtSQa9)s-I!bPQC4Y(>7M4CZOa6S+q$pg+Eu)PUb_iZI|e=!+bd0*^4rJUXkeMhtj~? zSeKT`_ZX6l$!XKc{#$*osQz%VIUt4xr&z1lEpfHxt8O>Y-q`6ONIVxs}2EG&2;2fL#jFlhsj&~m%oeeLC_s) zR4nfEP-$W^=$9f5dI*Rr&cK6Q#+HBr(=$-gp)#+3?~*7;d3Diev3KM_KO9-(2RcTJ zJF9k5H&rs)Ko+k2*6Ede9q1NU<6kvv#X^gz1jU*~@5@4kTIf$jks0w#* zLs~i{n?owKfI`IcI=Pjhqf+cG*v^_8ufDxRiLM`NI3~GobYM$7(Y9gN{ZHHyYflXd zqN0?Y*T>v;SEUYxtL>a0vMRarg7=xl&IF!t+-^(AQh{A94L%&&UOqHx!4GqJkgtGs zkOucBtJT)_KsN#Pp5p-M_@Nf# zt-!@x@vRK==zO)F$qyfyn%$(vd~77Fhtki-=VippigKy9T!5Z~JZ--T7ck@JT}RyS z`6Yke-)28{O8UGK{i71^+ddd?J*z_F$QS=`U5V~@dzo@akjnnriQsi_~G?ohfaeJ_WY}@ z5i733Q>bW^SZGpZKoVxrwsi6$%bK?eA=q|6wLO=_oV_n^OQkSLF(`jclWDI|HXPsc zhdEN32UW8UPH6M0_HBw71~_c}o?wv8Wqwb}ooU;>&7}a7N}}qXP&6vDQJT#FJG}XC zbI6Hj0r}jW{a&Fj9+l#g4Ehq21kACLl^cAYEXP+B>IcWLQw0zwZ2jtj4jIq8fJc$? zjqgc0KN);@GWc7kI%=OpkjIoC8{{`t2dm!$QQqRZ-L!rRcGoPi23;t|wX;ClB#b(~ zI+AP6J`cxxffZAf3{M)@*b}G8)EEPZd;I+yEzd4axa2!lO!e{$je_Gx3IwJhYw{ zf06R5=xpoAXsu(1{-#%Q;EKaXr}t@BRBjMF5j$kqwu2;lmcJ9cFftUbg*Uc`qW@Q_ z^xP-?D9`cFL?{W#xu2wv`ext2(pW7!&QJY#xY?LWWun&|Qwr4J90i`+fNT$Z1@g9t zkL$oq#^_#q8qNtK7V;ggUy;V%dJ3zy9e+4{r#ER9>j-Ys%4AI zHu%y;`cFg9nZII>v12v8D|iqdBC zZM@l2c{YC>F!2rf13(Kwn{Xa-&rM`7>I?((6Jb%O0g`Vt1a1Yj3JgY4EHIQC8jJ21 zdFa~zKq3D@Nr3+0Bqq4jZB4W18vfSpxxuzOhy%QH8eQLr$=)?@pxf?@#h#rpID z_~s4!@{^$~oBq8Z@&w=9Wm zy4b$k>AnO~Ms1}(t-Fq$?X+Ci5Ow?nzbyiq+RQ*dNsVWZ|GP0L5qdZ3Qqd-f9N;3P zCLwtEm+1L4-0jtWx7rmr^bdF#H_Fnz45Pt zp7CgO^?{W5;SQy98V#vumVD}oTesv;(7$yZBdig9&%DHFnml9CSM@jbRMWtMYR9&d zbknpfbmK#q{(oU4As}pwA*t|RzVQGdnv25hAGL9HclbHg6Ks_!<7}1dY%~u4dbb9w z7@>I;I^2D1sSOi9wsXmyC&#s|->vafZea0L;mIOT(qys>z3FR_-P{Z5dh>QUZxD7% zs{Lf1eR4p}<8TEWm|MwSzIKlrCd+%=vyk0CWJlB15@~)j2_l=bH1Wb5zvTwwiG?*U zxIm)4X-`wL(`*G}@{FdeqNm=rVd1CECAi+L$(C<_0An5QGq3OZo_jT2F#8;L?o0Z`a%`R2Z3^<61}HCU?2fHy0GA8b%AnW0B3g z+HzN)4q&<@Be-=N-5HL>XoP~bRseQOU}^R+`rH?%noi`~8lc z;fOBU6G?kXbpExc&@H7Zd;Zm}woCFGy*~QJ0%@AsD`O|q*&ftfdPK`c97HlA3PJ<| z#8Skwp7+p78w9hnNV<^B^%jR0;h@4diwS@MbVwQz~KJkh{LdSy<5nyy6ObiXSkTY zkG3kJP;1oIxvMhy2A0AT*24pPON!3?8LgB&%;hh?Wq3j z8$Y+FLJ)gp&mahV7EA+yrnjW}E11O@VhQGPc=Y3zjEswx-<|Yp1xmTWNm`lEK+G$Y z;Rn(TgWj4w6^X@L_bpYXoMdBl_-g_t<1>}{k?K6EJ+rVt+}8FF{6U+8o^VUSy4@tLhZTvt(%VY;D}~_lE|0=T zfsN)@85jJYYxqH8NU(ebz2@>hV7hB)clpR*2KmwuRhx&6%uJuaV1m<%c)Q|nuZ9yv z>zDNEVLwNFjS@+cXP0>{);r)G32<5rubSzX=TfOK`bDB?|G8AY@GwOUdao$?@&Ox0 z7q#_C^Kd2}^V}MnbWUQ6zUr;Uj)Zjlw~mT~L+=Ef36Wm-j%NDs>=Ded6W9yEfrY=e z$)6>dHMyh#cAs(W>V956U=xwxT1iFQbs zK8pk?il46%>)bvot=2*+ z+3Wub8&c3%PCL~ZDTf+8_>fUm&UvILeYMCb+BR&hUA!u-0Iq;h4O%m5ai|0<;i^Mu7PbX#mA3ZyT?xFGfDF#P|DM= zk7jvlH7~rm+&rImnX$ zFlgvIr~(gLC^W`4o_M&&8OP400I3*Uxq6Szsf70HsT0gGW5xYe@RONuLiqD0Tc>Cx zd0ocaGit2}F*%->JUA{PJPVPrHdl3j%P?>v8g=dgltyWlwfw48!5?IpKWj5(qssW)*P9!!Xk>4T#O3LcJHei(*4Oe89 zh$p+0w6RQ^kwQS`aN9@1#j{N4d+$4TuSWxAF-0el*&%29@aI74m%oFY@%tP#7H70H zD&&h01;0{kbf=m~eU22Rw&tgg^(zlz)TVJKH^vWeWkCK^F=B#4Db177T`PZ5bMh1* z>vYE}Q(qRqG;0bN(mLn30BhC0l876P4MlE)%zz%zpxI9IFW5(X3W&cqYw%&F+v@0D z08P|}<7yW5+q+l0S_Wo!!UL0)(Q2p@D^w%c(D+z&AO=&Wg?xfdRcdi71FPV|cd8f< z_J?XOl`pZ|?!j{y$z83pEqxRON63P&Txwiy!%D2zrrTlS=4!qGd^w6Df4AJ| zh6e{Jj4L%~n%p2e97!L|Bt<&UwLWpt>JVj&1Mm4g&egN>z`B+cjvZoJ6V+XhG}Kpj zfKIJ)sJoiMR`yA1PF6_T1j6@*5!!r@AT#)Cn3V9!601i-l?4tiHd|jsZ(S;~*>GEecl6N+aYbq}BB~7q%uwb{MSG;PvN-b9aD$1`_p_a- zic^!pC;N$TGf`hAL#@@wE*SxrDYig-F6oQFT1_3kG(2V^xbzj=NYm^t=CoWN4fI^k ziVYt-XD3z>8D&MJCW9px6giAEeUvg+-1qU)=7yfE-j48=><<`jp4M<5rC2V)^Y5$i z4H=>f6FP;mIJ}8Y1F$oEZ{E|Rs2kFQ$PY@MSHcbMV_$+x>#Aamr~3Dq<*3+h93ix`)6hxcCq|to{xT}a{Talt|Etg z3qxJ_W#dn;l~|HClMRh|waEA!1joarZxTO!AB z{-m>=-13Qo2Z_IX@LSL2-sHTez@h7yO zk36sT146v-%gRbp{&%OiI%}{9`m+stq-1ng9?XUtZ{po$^3f!P{UjyG!^X@P)$g>c zVH)1g${h;0ftXMk?w<{6bt0vb5p8pViJ0g%MjA=u$%LvTKYM$?dt1Bj#wN&gMADaZ zr0c(j-14gpse_Q0gLd!;$YGQW4t096Kv5c!zB0_6?Vr^CakEyxeYY-crMMq<)V>&Y zCppBA@Z5IXe+i0K`w7fg(I6rEgx$m)IlXs)H=yVSgl;Z`mUy#j|3a}xpYyHMc}WZF z2|JT-*9U#bVuUp4z%^+1ZbfH`(0_YY^#P+LFY*n)%+tFLZ{mVBe4{)|^sYFUQ!tTL z>o9;k1TIfGF4VemRWO={mK^sV7S8y5aL2@hLw48*Y3BTDIJrxnYh>M$F}Lpa?Q%JV zS9?-3M$-K|Xf45c{Dl1*;Y5iLR*#*ra_(Dc^+(x^_wo36OU8`@r}BDFXC3s7=0)k= zYLu?%d{D0a3~b&9h7v(c0;ihAs+bO@Bza)cRq5 z1w#Wwc4=t#?f{<77g8{*o7|V-aWr;GH*y23-Ifi8Ny>dHuIL4aH$J?{-ar1PItDo} z{|+2oHGWG`fKQUfEt7@Ko&-*WDjDq31$-%)Tmdo0gZ|$TOn4#iU#+&T4j)i|b4rDw z1){(((}lu7$xE>28nrZu!fYHmZ%t%CNQ6npoEzk`TxbTKO@rxVzNT!2HuJ4;DP5Fzr-Z}*RV9YY?5D9t;`MeV<@7?k_T6_ z82F&pk*Ce-{ob_4;FeJSY2!gbZv&Eeu=^ppd;fJZEHUnLNaF1~evl=!iYqV+llO<$6`W5Qqqh4#!l~ZpDRIUs@yC?NqM~gaa z9N&o3SwU73`~fONko~aDL!j@Gk~iDa|FbNChrKKWywpq*b&P^iR$tz&JwJv<2k1bp zv_(YM6MEF+U$t4C^M z7se7i?0NP&senCV`S@-``J>`m*ts&hp$jM<(y5f88H0vXSvU|}prfc-MA@EKs6{|W zUh?gE9G3K59d-CxE^tLh2HR;@W-x=Y^((^iBR{)w?L0ug@L+2Fc|l|ui|_hIjT zxlVAGjLM4JKmRCN*W}mjrm2$!b zTm1g4S;?ZHKI~r&?CY?ZCg+BR>pW!mI-saAEQhZQRw=RYiIGtA?ko-wr#H&kt{+CN z`dE|w>3sv@yC!zHbVkrTVyf<4i;Se1LyY*9IWB&N6vWCRWf2N?ZP22{Q^WHqA(@QO z|5Ah^*zvwCb`qmABv{4BaDGj@e*tdd(R(W=w;UD7+3~W*2`l}){}MeYVOw-B)L`fb zxTk!A0{0j#xGC{o;7%%L*|O>SM)2k?Zl!|ne51yVFT6XyKDA!~D%A<5!*942I5<~S zzpl1+`F@#Sg$R5ft_h&ov0`c-p)IFmCJR&%)e!MrsTJ3~X9v&N9v`G6hiy%I~% z!5d1^)s%g^kY8Vsc(yY0QCd8?bI zy7t@rFXR+}g%$~-Sn#7Z>EnESk{yaO3`Bk9`Z7UTOuKM!i6h%JKx?@5R}kvIP7es| zekvmlC-i|K1xE1X?}u~u?8gV9Qr*du2A5B&R09fv%7JI3pwfAZRoGex7z_zJ_GH!pyn%zd-3z#AK%INR&^_E5<`COtj6>_&eLVbyKjZoWCB$mG>eYKYiJ zT9>(KHUN8nBI<5tJ8s%>QvVw208$3_e_`c&v#kc-@BH>14YY%Q4Qb$r1V%0Gfjy;# zdH#w=e@SrPd|?{g1~EpzG%3slfdHi1V;s<=T353|@U_A9d+9|n(TzHt>mn^V%7QS> zuLg061K1B70E`hcuQT(~f5#&FFR+P>GNf8*(qy8f1XClRuIztD^Kn?tcophy(tR1L2qTFBnx{<*$X5HOT^!Dk0(-8ruLHYkw{pjawE0JP+6?2-MYU_ zN_O=3TVS$1SvfM=O=XA?y$rz2ryks1+gl)k)&LJ8NN%O%;QOF)XI2VMbJMcOBM|TN8#z~Wudd`O1P~P5<%mU~HAp7>({NqPmZ^iI6i_3^=c-*k{(tvA_iMFaTr zd=*-ZAL4Fgo9O<7_4LlHPi0}nc#X2sd41hq@46U2PRw#)Q`1y3N=n3o1IX1>bPY{Y zx&^%}dN|vSY{z1lDX`vFV@5K@;KnOtM`YwY{8i9)d!N}V?p~wR|V0P)(eax**OubxjyIq6caz}FdYrd9G@z!?kpM|mhaZU{rX>P(|WaaC2dNvP;M`q{HPLH`l{Yx&Q<{^7HaYk3h+q?OB(d zPOTkJe#DX|KZ@wId`qQhQ>5*WCKJ2;I--x0qd}NzchkWxli|K>s^Pqhsj1VU-h%6s z=OUYL#oXh0f?)VDj9tY(G`K^wW#%Mk0-}p&51J)f(Y($GT`fOHJ0HE$Y%g9CKzR14 z*fpqcq?>`1E1cwUX>ag1;^s0uz*UU^>!5V$OzA~@%0)=3XTjiK{&7CMqt;#lx0Q_x zM)8t&HG&yFF1?y8;T6nHQ`*$*L zkivNz_p|n2KT?$Dh)<^7kH5;SX9w-oVh-m{86r9klaDZu-s2C^(=1zWw#yW+8 zaw6cBs{#By9Wv96D9{vbZCzd=pzZE(I-N%p1O&-LMG)R)c5#y0(Sg_4S3I1my^$*%B!vRq}*Qv z>TN$U3K|ZF@_p>5poh4;wzaC#Ces)NU*C;)0#){`N%h=8X@s$<+f9!ilTUUQjOX^` zs?WkjyEQY!8K_p9|9Hx8P@N4nuw9S8XKBd%7mVL>+4qZifFI}5@-3B1uo zDB0y0ZQtaTx*HA*vW~el^S;C`&a29n%m>_8NZecQ$<9C* z%L!cPJ@=`OBlAOLTPDKPm9lILzP=%cxi)!|5VGq-SY7V2+;mzfwkvZ_gv;#7M@pjW>Pg$ zVr7WU=z%mw#$n`~1L?YK<1E}|50GDZ%a=K#$+&Ib)xDzJ4!w^(ufe7=P=%)%Ew{_4 zL|fjo)euAijVXBn`W;QtclT7A^#ZfQDP4!R>%ddM=*=-9!o+Tt=DX?=%Ya7mH@B*Eia1fyja2czy1IsHxQ{k1km#EQD|Nl3)cAr3I%2=1?LN2 zUZLEG5Pu3KdN&%XBS!xmwb9J5+sRZ?`y*XWqa_pE%qzrN#g&e;+_j|@>Zl`Q0POi9 zQ$cUh>2IWE!(Q3O6)e-ai%&!=r$(pWT^ix<*l=Sj#f)zsewbM?`{T=q{JUu)FjUVl zXu`AnIoLXUtK&Uy`LVBhpz3069!Gf&*nKMOso)+T>cai0lEi!p5}?3=sT%p0flzbkJ`P z#Stu)EbfMv&0fi}C|@61*M>LCU+g6;g)V=hd@+TthA@9k8p*3D zQkRvP3k!CZ)%(#0N`c!9b3ifGah#r8825t*YgU)^Wy!@(b(Qn{k{`xYPCBbF-WHFX zD%5Yikj|6(YtZ^M+lEPLko`wEqlu$XY2+|FwvaRLvitWNVcTPVVsNj--tV8VL|CPL zjc2+70vGs+SNAM4j@%LM7s){3(IbK|{g$)_@$2r>?%7CdaMnS zIoQ2x-@f`b$lw`=CYqUOL9_^IWFh;Kleah#oe8H{3oC?gW62XmhYl97Pd?W?u-(c? z|MC!M^U4nq>dGb791mSxWW99`cV(^@-rFJ$>ahs{AL4KI&@p2z0?KM>_8?j@!E`wJjY@aLV5pwdlSt zWN8J$sn4K+KkeTo#RQEU$4Lm{1zyCDCl@(o$e7A;HnC|+d4DL%bPK2N!Dh`Y1DF6p zRV;t-&jd^7y|0zLZ?Ey9`Jp1_Kt%uEtxr#{Fc%wcJTUGOHHhTp` z_(GuZV15=4I)I`jKZU0J#YoA!m6XZv6iOi*Y z%TKifH)^4kzB&#Fr_#HqX08IW%U;1R3i^^YDA!|sL-~cg^81JQKM`0S{#4}&+szSk zF1Do(ZZPbP5RO+&35gsb^gYF*1!qlTpfgenYNS9HbAhi3x%RaGoZ#U=&{fq#dF4?f z*W2n@ju?Zjc6bvYga$X;bCg7vA{1pbTE9j9Gzt~h4plb;W(`+7?(kcR3Z+^0u1Sfq zYDNODhxCvT&*$7{PsL2tVUynIcZvJ7VxZiThFJz#UJ$x_p zuVHK;Q5{AEW(iO z_M;?=MJyz_cI2G@463mAV0fKonQQy8@h>;B5yD%g427sp;w{usRd6G>3NBPgo-qg^ajJpj#dkd708F(#*ob#>P_fDy26{%2UOp_@Z*(9@2*ynVF412XoNErpRdF|M|Pw=4i8` zpt5&jW=lc3*;*Pke}kpUu9|sHggV5y(h2zPRH4Mv^LB=_fU& ztMJLPCN3NRlJ6?NuhsY|28Do8^-boj9hRGuqE{hHokuE6#GLM=A&)t+Pc_z_V_hBA zwi{o4G>hp&x7ifqdgC34)^=h(Oo%v2Af1MjN7Z_OCXHzJd;9bEp{gxh!u(uK!QQgf z{83EU@YJMkq2^xhp!a=lc$FEqp}K1Y-P`rrKV!JB2#sv#fLRjX74OhJyen|;&`o5X z7@?4BZk7>id=k(!O5D2Bq~p?6-zE($l#zNW7hm{XQRn%`4PrLCXypnksh*3hT#pc+ z({;o~Q}HB)T9x+dhS6%r$L>Z1zSgXZ@d7Z7OeKW$WY-VFfWhB zY(v_o!sx|A49YwjvQW^H!{mkh^bDkcdz!X5HRVx^`OZ!iw`}^S!+KR_WIU94H73(& zS2Az>dWOD+C(!ri@jDGi1c-twOROj`*m>p`pm=Q`s_`N(VMmzGqd5R!81<9U=AL~@ z&{oRhdiuIssb)uf-Msgg>P-%lcbIA%KkiTdL}NK8u&8EtG{SQFn~#ggQHAEnc^R0Lc?=k66hJ@{d``m9 z`6?f4ybS~+Qf2^=Nmem-;Ehy~1aPhuIurRvQo@udtin+$F^{kO<`gBduGqrGC!hyU zNpRJ?f>>zQY|b52$OWDzE-oku)*Wj^<*-3lNpialUUa%Lpl273(1@B`!6Pw9JI*dbDxI|ZAGaV;ryw8SbB9yG^ZPuRX z01&p&tL%AvX=YBpC_Tf3$rG1KhkNFu1zUCcR9p4d!hH0he`z%{Y`We^6^Srs)SMOx zi?*vrMA(Zw!={4ulD?|1xlq{39k+NTIpmV0y^$`Q#oUbG^q-KXizVNCq-w z^^0f+H6EFNvPR%0^Ydty(G{0Ng*WANUIo`Yw&~Y|miZ4?;`X(2YJDFN18;u={QH5O z$%3P+&Q@YF2LNDS9p4YeJEHc+J&bU)wG9poi@w&XGIxNX=w|jqlZ%Jv90BTnJXx13 zSK0?)bvnDY2)F?MxtUz9->KlzMJaI7u#&@Xs!5?l!H&w`;WR`oNJz7-R9|zD(e4?{{pZ2fLo1;6T=HL8g^&8lo>oG^suJ}95dA5hevT~ztAVCjD>=G|DUIiosI&0 zpWaBeyl-dT=3nG-4>H+;Csp5F(*UM8!!HMQx@k(L}wSeZ|KAkXluIWQlvx_?njki~9?r9WVx6|afiFL)c7LJau z7SVv~uwy7BH#`A`9)_R{GdO`%bs0Gss1w{`I4LwcnEQ`t2D?m>Lk9b*pU+QwVrJ{C z@)(~1CV;-e1kNpL&VKCUrT>Uwe&RA;KOfjl1fqfpv8V&F*qQtf#G0c_onv}@0L%t* z#S&XgwFMVH&2%toKlFoLZraZZN(&o#qn3j#R#Gm!t=vZp{Z1AirowsAntn8T@`O?J4H|Q}$}@Dr~coU0*9* z-#u3Oo@1Mg9(_Hr09zR)BFkRjoSVrV8++5E(Xhsw$CJ`@B1VAS?auwMp|O`R_mrK; zb??+86k`~uNw`^r|NGaG6}lJm1oVUHuU9@c=M;p$k~>ANd0`ha7vH%!IyQ-CpaPdM zinE7>hodE4D7QTg9H*5$0j_wLis(ypbvoWS^{=FS$_SZ6RS)-PiZSpTMmnxFrw1NW zi6w1wqjkMW+q!bRZjuDmb=JrCIy#Xo?@)+;41)~aFSqiBsU)v~J+mS>LWWn*6N=Eu zL}m7ZPO100XeeK;Fhgs=;t(ooqzVs~LErOTl3*6A{s7EAnplMpoPMau6aVdw4LglR z-TWP#oAb9lD{d*8_0qngvfrk%w1#Bvv@J-Ij0*T&kcogo63N0<+_ApD76KX@4nXny zbYr&>XF2sX{dq@s1;4!s;7aE**1S|2+(SZ!BO|$;J2z9It8n2#~wq_Z$1 z!{asl?Muh)7W_+!)k9l4_95fipVf)*j-U;`=DPKl=jT+Hh>l;kbF~2Q923%@?2Ks- zyYUYgdr&QxNpj`6TSr7e?+o_ezaDso+NwfzA&#W;j&xx+P$#h)oPmNefqp$nqe!%H zQ7ue7(V(kK`auSzn~+p#_$GX6>{q+Rn6MP@ffv{%!!$CY>aoL@Yk%M6NyvyJP^>*9 z``eIICikXP+Nx0Rhi-X{(_pZuG)GvXd$%TL-w$kD?uY@#_1B{Brl5VTvb@RyvM?4L0$ORSorpejBywSJS?TS1oL96XR4Oriojjzx2 z?5enQtcCEFtomB&*+x|WvwTbmC*`w>G{j3u?0!x{^W4!)(+Omz*-FGa&-$<$@0D|( zJ)mZ+Fg8)}3SLWD*vCy&reN!btG9<3$=XUZ3Z8)&SoJ1J&bQ|_0=%f83McwTDPxP~H{#H=k zzI#S8=a(a7>^*A^_Sw-<($B0QsHNF?iIMCXS`MwZM^irR-Y#WL0jWa9NzQL)dY)HB zW#4;#W|9QfKxdX{v4A4^AXB8#6ydq0Bf{Orv}cQrN!o*yL5uFqV2^ho6}L9kTKjD- zwu@Q99IvPTiV(|QOPM)S$x|(zJ_HWQ`+CR|!pTx3yaQhO+Dzv?ERMhvyeG2$b{2R} zNDXc8<@;I4b&#}9O5tocoi-~ge=1(oQ`xemSIV(lLc16B45hnLhF8gESn!%u#${Od?`s6OtnCPL{` zs_x1%hf-oexw_xY0Ax05h3ioxk%lz+Cg?xu()N>8*kb5StsVy(z~JDLH9ROLuDkeK z83Is6`+a9}Hlu@_L=4Yj%;{&gwgOmBBVB+t2%qX}-DMmIyFvT^Tth=&LVJDo37;n7 zo{3~})|$c+KeD3o&ukCcbKF|=X}q-k5t6)FRC5AHn&f4JD<>eMbJA3SdKVz0qSmfm zx~|cLIB1kH8a~rXXQ*oG|wdA(nGx?Ar+&6OKzqk#ShCeGP9-dW%wBVsb;?8W#TTp zJS6#T8jfFjyzWPk5q712t^6Qa`4Z%Vw&I=r0`+1u{|c*&<@w43l>Duboo_edkO%V0 z<}mLvbV2ekq909~1D!O)FL97(QyUsshfXQoOw*wdgH{M6QFE`#3_JT)yhrs%v2ES0V%@WE%Ka+d zw|aXU_OaGJD?Y~lmBO>?6)&v6Rjl3o?+rwGqV!Fi)!3QI+OW4$QF<~JBH3M4wz z@G){+_wnFHgL=&NuZH8S|EqUo}1=vwtkR2=)EC6)bcv^9XIn{Qz7~Y{V(?= zt-h9hk3PgK+ulnIk`-=aJNCGqtHz-p?0dZepJiazDDv=%bwKE+sONVme=piXXw&av zPQ6Uq&-uy2=>2HPi;1L+{h~Pbi+EpZxa~!`ZxpY1RSYG%#F0t`O4(n}dz1cs?rJ9h z90$r3HdrF+>_zH4rX1;gu)dn@BH7%e4($})FOD>|Scf{?2Q1`FP0y-85WH576sP?*M;{tk z#`-dn>UNDxp8*m6w;yAcRe`0vi?QbfG5!|bj@dPTh)LI}2<*Tzql|I>$h{}$QieWv zk`obC;zi%b-W2bBTy-zroRR#icbz;kg!;eoH5C^>>`C*v%)j?lDi9y2-<#i5GapS% z`qp&Dr@W5NnSU`&y$3fj*#Rw9{R2IrGn_X*b_S z({I0y@~wWIBKwtl<{cX9(9YLhln;dVD`l`yN#svPP6rjv(E`;jS%J={>`#5=#0Pw6Qq z&7jmTnK8G~%=K5%l<9M6>b*aZ=Mje9{e#Tx-{%%F_VQ-a)UV;j;KX5T4NzXGKc*as zk5tDD-*Ouz3U8z&SKs`vbmWqMq#>(cjG~Zx=qIg|bb@o8qOSMfr}y7~n?CHp5=u`S zy|;HS{o(Ks&A*6yw1=L1=2!H=M<3AtetHWr3%>jXUio*^WO}CZlNl!X&w^!(WsMuw z+(S=1{Unur&w9F?H;-maa9fU->IU}jrWan`O&$C9Q|r#>=`SzBXk1fEf3-+2j%CjY zER9nue(yn{9Ky@dTL^!^58eTy`|&b#&-04rU*KKjx2W!Cdys!O(RzgO_DZaa3QoZ? z-3FpxqiheBWPbgEDsu{!_7?o93jfQ~pxc3csk_wmpDzWi2e8EVD!z+B{Oj;xu?1yO=MOe1n)A_)u{nH`0w$zd z{}an?7;6XKLKxr1#92Y4Xz&3n&8>b0L%WX#R$op1Pyd=y?H^ET`!h6fH<(fMWwkH0 ze+TuS3++nyKC<{s>?Q5SzQb-B>PcY=`ZGHG=+9)ip_UzV`1+fX4~MDF!pkW7^$7ZP z2>qJc$;$3g@b+<QCR=@`OHGW_N{rX8LJ1qZhsNiD@!SZn1b52{hO_&wZa^N@;vm*gLHy;mGaRqBb+|AZZ82QQ^NVq z3P;!1;^;abUpIa~Y&sy91AqUkAG+c`s%w~geQce54Q)SZu=c%Cd}pPOtAhy; z5y0C6&b+M`s&o<@*Z)~}K~V4d-g&tro8Km0GO_wk`&j#zN5b(;5;&cu=wT`5+s1cw z%W+)11S3HIz%pN)$anxP6<27puPt$)6OEsLf!xMiQmTp>5k1qF|wY;OtU5oF$ z_&X~;){aNLtKFNdBksB^`}iXVBo|!-t!ANys10 zT`%S6@2ujbPut5jd=LIm@14%KVOE`e<-a(p@2oPG2Sd*7Me?Ws02UERL_t*Kadh;* zKDw@tr|W*xzFf`=kd>Bdcb{&+GNaj_e)3@E;jrHlW~TBG)Q?_y5cs+*vBvn8s_-6h zB$lwMLRzU>!n6CfY@6)cz&`KX?i@Xr{d$!++|VXz2Fa24{*qOwyU_pWx`eeyA@q;q z_}V)Y@{JoFq&|G+pEvyk(n$B|=rMJsIoYuG3R)bENel7q@Zea7y!<`Z^-UiAQJ~(B z@!BvD&w0{CC#IRq$uz7~OvhWCTC6g-gtzKGEJch+v*Pox@|G5CRj1EDx`$|BeW?1_ zS=q)+DG1KpxNUFHAI^O=npS#RsP_SAa4;%O+Nag(x?kV<#oK%gkOw=DPvF?e11iKf z79zGJ@Z{&M7V^H#KJbzKf8Z6RG3(+^VZb3BK&gL0?Q z2`9K$jLmXpS+5e)^d{1uJZSNWyvt8hUBOWA$GW9J`u1H~k!fkhs%#TR#);{cBK+d@ zjRd3z)~CxK6MyW`>i%%pQ=AyVd}Cu=7ktU^(w#u!q)>%X{|h2s11P+8(M$!qg)hAnb?5kW^B`2qQ-3V_WU#=ZXf z9=hhzbi{dn$muW}`y~S}lp5r*p8>cdde?tz090)lOavBQur=HC>aIj+=O&VeOj%YoDZ< zCoQCNfh~^DAVN1z9Pon{m(cS2@Eta+>YTj{AIpwYed)*y!~V|DzE3FUq|a0CsF3M> zFc0$~<(#<;w$My!K@j0V(B3#;NNMANK0^KcLDbuaEU|D<`eblIXnzV-mieeVI} zop}4a8&kpcB4a;O+pyG zrkN)%pt+hTKEvMM{vI6w#QDsHi)on)rhI@pkakF>Ek)YVe{hvipQrk0ShiW+xJcr7 z)5-H`irwa*zGFTBw!Iaw?XY*3bIR$Iw+MEb;DRCB{h{7vVGNAa-hCu6FmjiAWl-iA zw?CQ6N*r<$gYm*-{yaKwxksh|@6sOKD`jQg(sOCP!8_(pN9#KT5u-T^m(mieU*iY& zQ#Wj2TB0KF2jW5}FPLx1V34eMZeN>Q(k7PJH!ZFw17Ph3dt%}R4SE06e=6(imZkLp z+Xq^3;!|L5Npy-z!~71|AgH7Y#z!W{as6JiEB|G$VRFb1YJQi6$}9QJJVzfelB#*H z?g~;qOPpS$;V~Nndj+fnx@1AiOide1JO#;$bt@(xB&@xLwl@`43N`|hNjI$hhHVNWgkkLm2YcL* z_s_o}jzB@BvkNF~1VUl4KUuZ`+iKS+rWKWmx`tY@#O~RyrpS7LC!$rdTyaUU zDsS+pw^^mtwRMZ$?rzbIAJdR7(Jac#szkJ_S0uU-mJhK~(Z=aWtE{LIF$sSj6ICTF z59N4`bX+!k5ZUy(We0HpZ5~E@hwvNLZ4T-?fW;(=*$(6VV$kB-P-iC~H28k@NO+BY z*ufj_RWyfs6>quEeWBi&Cfjx_dsRWW_gb*-w(@jCV>sN&K9sqv^orWDGEp0ei3DIi z8YKrY*{g}@-mIs@(gRTlvQ(;9(2lbt;V?jUN z)F1>&6Pl!jsyURPHc>tZHiRU~AGTT>)L;itBZJ@%KtZHNfm9|9GHObkNR=HT*^00m z#Z_Pa3R8tsb*Qohc7Ow%7ApdUv_8xk-{I}=&Ft*W?%l2L%*8aYBi+r;*PEF) zZ|Cj2ncZJWLpXN{$U})6Nm&ixL$9oi05xym>(Q37>cfXUxu1Rbn5$C8_%L+}4pko5 zX~bLCVnSJg2P{SHym1*%(P^=sE}k;R`dLBsTUb7-l+%wl^J?t{V+>*=afJSm{P3zZ zSz)?0v4BQ8OzfZj7NIO-y{VW!I!#rdep9NrOn!eWKX_Fc{aRn~v2P<)mWPdIRu;7f zGm+Bb<%&qrh=WIAlSlkSEhjCRT`H?Gqt~imBjdR2Z{|s_#5f{vJ6r0JdY;rKudH$z zbNR8?3Qaz=rnrsW`7U!DO(moiQ_3pVnX1(EF`2OTz64zc6p=D#V_rh0+W>h#8+a!PifL(TZ%X(g zpLVj)JcFU?m`Y!vgLWPMC#7lmwSJRVk^wNgbN}8EkFuRys#$2>HM6rrW8mV4Kv6gNc zS8a9~JL6xcL|d-zdzVIlihQYjCB@o8Ql?C}ra1aHQoWK!w(q9+P;vw^LcKF0FJsmK zs^e1iCK@SyhQ@aK{>&3(zk)rN*dx>yxx# zQ5kK6PGL8eFw4jw&qgV(XvP!N(9lR5%U_}&-+q$bSha|zuWPd=GF2e5_Z?afWm&hY zLSQ^`{MpZcPYV_-pylg%+G6LZPe!Kwzm_rg&G+mRHi{{a*p>>Os1-aWe&)e@(YM`U4u@ww*?2Ra5+1+5C)9 z??^6K__74i-1DixB%|P?5@RfF&#y5uD_PO%80w<6%P~GR9kgU7KM@tvbFX1dv}YCF zIj@w;OMNtb(Kf`-!c*tJ=`cSK=`Q{sI-`fh^$>kr`5p`c?X*zPv}Z^sEA|`+sRI_4 z3DAaT6&Hssc<)NV+Ov63?pg7^mE5}$X$~`>G6{LJCC$#sKMqu;=KS-MBV3-tp z4Rt-4X`5y|+n$w~I}=2BMmYBgAi#q{=6V|;yVx&C?zF{>X7DvwcgS9B zI4t&|8hG=-bm0b;`&zk&q$1(R&K)so#lEB6EC+j#Nh4XCR!m$Ts%M&Vk3ywg49k73 zVBJBCbsC3#k&ZfPns4q;w$F_Zc^PxFDP#4F=~N-iyKZpq(tP-WWFMBXOujn4^ zsps;-0Xt9(;BbmT7LeU1l-UVpmk?F}vl*`U8l4l`yP)_X)b&>juALI+=ql>+O;4I9 zQmSN7Dt7w0S_Iu7!ZSg8PXr0>MpLvQhrip4A3j|v9O!F=q1iO!ZX&MA!>OI~~ywLyAG=RDZ7o|_%1vGcC zo__gwlJA_9m8?i2awY0~(N`%PpKsiPwLgcY?dZ1=32Q&XAF)l)g|lbr%ozvdJ<^n! z(5yp*6-^a4)qb?EiRR5Kr|mm;!hA8GD&NB_N_DjCS^2)iRJZ3wst?-`X`$vHSSWk> zCM>RzA;7%D-;Jo{i}T<6f`u=E=yAHT;}gKGN8tZf-1vdT4!A zT@C_4+FCZ7jsqjUrw4g~5g$F;M5Wa`sR!`q5x}3nzY7x>+)TiqPsU|G=9lQomQ6H@ zHeVh5isGHSDYhnrHvfwJXtNufyKn9@Vv-#DT_Yt>cC6`5p=b(zcE2lRyBpNI@1FZa zcN2f!OII&^OXHCc#g8B=_(M4%Plg#8%49Dq7pSS?VcLdiv8s6)1x|)26zZW+A8uT@ zp-?Avh6ZS8XplOaR?{Nf%eD1v-i3+q)ha03O8Vpeqj=YfnYmcMrX&~?a^u>@k0jN#`GMv4BBJg^h28`i^ zN)Qjt=)e4x)yiX-SOQ)T$L-KfVc+NLjL_mR|Mj(NJV&43(%|D&pYz54c>CMpz$D$dRCZ?8+r zGWc4j`U2=T`yw)L(CZpn5f3sCam6c|nxN*ol z)~w=7Y8nh>$y3MLRhshoJuQA$2h*BHx5Q{L;m=iHbLB&JV~(wh2KzgB(V7p8xMh9J zJa*i!t4B;T9n60`_Lmx^Y1tLB-398MX~sL`s})L?hD%-tynOQHXs&!tB?WO{y>Jpr_LA`>LId+qwyrQLICsir&!U~f&w zS1l(cMxzv?A}UVFn4nRdii`)+rRfwL*XU<77J~(~hze6^I`TS#qAmDhMiK5or*Uzz ze8_6d)z}1pRGhBODm?|xojYKTdzv&?$aWW~cR_QMkGcSXx6o#Dqe=0>!Dm}A2U)R| z(*g;!HHObm6crbzj2mZD7mm%TwNjCU7+5jlDfmCV|h%W4xtb|_Un zZ{EjMCyr~OtaL@96qiy$Dow>Z%BKYNe5$OGsmfIz_C4ouz~g|&0gnS72Xf_rkKdaT(DkfH@EbRM zRxEsNXfX9k35m3;d?Ju{r5Ms)metec^|(b((LH_o3@v_sNv`d@A@kofdsY*SULVk5 zJ?C-2{!7lU#*%%g!#wm0`XTnIHAf|`B`si>8iZScN)q{U8}U4 z(dUgrKRtFccHP!zeFN2&aFYc*KF9(0_~7wDzx8!_xJ4-$U4d{MJLT(zEsLL6OeM{N zTwc&HErQOn&ajQ97w|aXalqq%#{rK6E*!A!S*d(VaVekU`I;4u6$hoH6p+oAi!|?hrz7cXyYj(MB41{QueKo^$s( z`@Z|$cz29fqefSES5SAv0;A-XQ272Ek z2mp`(WW+_(y)w_%Err#z2mo7m6viNhuh5v(-{EzsX_%Zq3o2MuzB}crcg@u&hm{rX zljT^-$z(J%U(ANlMTEKPwLDyUh>b&j2#ZcNXCJ1xB=~M*`kwoPKHq&3M~D7`8)l44 zg*3B>3g}}&?tAffdi?XYWl7tu|62ls|#io%beUm)Id=Jkgc`D!-B9-Kzc${LJ{((Sq4>V04?XCeOL#t z+ZgfjZy!R`?|gd3>s zqogm(gDg@}dejgA!qW}sd^YC;0zyi)nqpP=bV9pkIk9}cp=O323sxFBL1PV@I!tj6 z7u>u3H8Yba$|)By;6Ayo+Q*=otM8XooA>_6nm5}`hJ;+ypO5G4U4xRaomQiM{LI!F zgKa4DZEnX;uuVazP=#__yNQ}G4{A^AE7fU`@^Glls54Req>!`2Gn^j)howou-+fA) z@H5$9;QeW+RN#wZL$~*(pwyH5t+ACXt&vPm!)#)kS`WKsxsLcTi|=cr|3@xIr;l}EHi1kc4$BvND(v_21Pe9eO#ETYDl@jKjIRXuAN>0jC?lngKyx!a;c zxQiB1?^>e!dk4qfHDYfX{QAPs#;d4s*%PxhU|`BGL*`Df;Xgv7cH8*`=j(vBk*LN9 zj`&R-Ae8NR+JAy$$#FQ5akLmrk-6F4clIK#*>$HmMX`Jv%Uk@2{_Y_4xahs8E%(7Cm zV)8m58wwBY1pT@v{nrFbKctR{tF<=!eU_$Dg2Cz@+e_8Zt$OH{ELg?VHK~rvpXHv5 z0UdY+f`0qWSs4}{j5h-QbR;z|CJAO7F6-($6V;IApn=mwS0p<3RQI#VXH)stJZc3a zA^xQEYXQ4mR6X$ts8t|4uKHOHF~W11>Iqhks_06)Ek;}=Xk%bhIVIxZWyBYS=59P zG`9OOGFW1^tA@|vN=AXC7v~&TvqsI~iSGfz?q|a}VautdhQGz3K3zA<;NJCQyJ2SE6UO2R=Ci$`Ev&9!P3TU=e4fb5$%jw#$}hNw+jg zQ5#05=Z@<5#c6jUnbn{XLsYsYjyjl<71nqo=Q;5vOW%GDW7%e<*Yw&>9V3G4cJd~D zY;yc^owI-Adl0&6F*wUJU4_XIN5gSqlfE{KeMFzZ`mcZ(;bSjQ?}l!W5gl*5nS@qv zi;97zG(bbXI)6&-#yz8|v2e~7FnOJi&5g}{J2Em2KF~D_iLa07YwX&Q`cxXgGx^o5 z=M|)O4N*zL5T#3=b+AAxoXbwU0B93eGXfn&MWmNa?jMZ;3l}yC$+T%b1BvDpa7aVu z7{e<&U$SXEa493Rb1u#Aat`fC0_MMRbSPyC_-D%H#Kuh2JiydEZ+>i^wM0EOu~;x4 z8aw9tyKN%xde}YcKWwACyi>}&nP3N57RW&XNKWt49(Nn3C%UX+9N5su z&|Zuw!^jU}J!=bfd82dKf9HPY*b3JEN|#ll*dYCKeE`wieA(?>wNa?QX}vqN!I5(h zLGO3_Q+KBiK-c`Eug_Z;OeG3$?TPmsrDXxcFwlN{Y)Oly8i6M*bXYN6t6s!1(V$>J z65<_~gd;7SH=Q6RUShPt(l4@F29Z zEnji^R?Dz($Cw~!-JDSc(`5Q;EdF7FxX>LuNw(5@v*MF9Y~XsC`^)TrOQY;F&A zjSiIeGHXzAwMRP74Ah%%_puibvmI@$7?$+K#;O+}=lgFrWMy%=2D!bnZ)nMbr@hj+vLH;l6>Jq@|w>+! z;zCN8^JWx1c9VrF{dZpna-%|_d*y_T<8IcC)G?q%&~qlE&Rvj38b*(2(bfaA5|ban zCvlmZJ;dEfy*B$Gx;sP9QnLdf+I(gtGAQQ#jr31b03g8nc5BmUJ*~eJdQX8Z@Wl7QmG0gOK{L?>QM zdC}Jo$8n)0ICg%9hCMWwSCBe1K@Aq9-~hT6p@4&?%Hl~M{H+uV7as=R{l5G7><=C; zUXSOjyDy6u)(9A$W-*Q_U3sB)n%k@FLT<|Tb>MWg8eWsA)irvn>EP74*3(_JxOr|0 zkqskPxTpJHUyiFv6B|RQGse2s!fQ~<0!MVRE<7m)eL}h%Y#kQf_m0RsA{RGn;==;~ z*4no$-zH_+?4SU&$DRai3*%vTWsSTYLrzoLErQr5jAw;U`Frx!HgE*w)2M}JYhA%~ z)YoD^wEN55J}u}8053K?{Tj5-W(osh7T_kBFF+o!xej1r@PaKm(c5C7wO z$b;xtY_95%^^d939pNae55xNx4Izz*+=qkv=Y;GJcaMAmPn|eQGxLPu?skb?fbVPr zo6h2QDqK~*L!=B!N)_S3xg*chLh_YtI&6=FQZmLa z(aLm=_KszAXEZzo;@r_z7)}F+L=5Qbm~d}bh584u(XCCvsnTLnGJm98%)>0>@laLr zs!$s5{#}P$dEj$^np0VPf}hXA&qLHfkmGUHx7Op90K=K+V2q?cLL{B-^)yueS!MmC z0mS>K_`)122d*(t)E<-}p;E?G9qxT(8-lE4RT6-g`=1YaAuWCXMzh@4Eb4-L^* zwm7nv|iHCnTT$$}le;eQy(2G)&NvO&sf2>}4a= z5k#t5c;$pHM_~~vstMU*{I$Y}k@a2Cblhl$pK5BLJwP>Q&qxKwGx%ni-{KyT?&QUm zE#>W^Uyq~33aYF;&kBw_(VxZ|VgWZO?msw61Yt$aiT#rp7TC0Z|6WxzTVL)?5|`Lg zCw$hKq(=xi2z`eMv{sU}Ic?y-fU_^^2*2PM>2Ah8({8m@j1g=vAMw^l%^Tn~8~ zbM5-Z{{&J+=pTG`Jl@HAfoYom!wkRF=5_g_^h&?SNUMq$H(u)ne6iEt0VRY4PfGuM z`RYO3{ldn2wv^2`?btPW#^ZFW5OE*(F|m8C#p|+nXrx_pLiWjje#hwCGO-(CI_oky zdRc@Q=*?v4FBriHX7pI&vSranr!y9^3nFYOA{U-4phfYxI;rg+_@Xw8jPTYhh=3w& zcV2xI@cH*I${#*wPi%4P;Jmd?9xHh;MicpYxLETSmREE3aRa+u*$shEO2b{w#LSw# zIZecCkA&NX-DaKOjkWlLyGAOk(ec&>>$<`IyM1fV7Dgja zx=xnY8%%!p&m}nqreXh2%PbLhFBC12$hb;&)o%hMHpC=LT#Yk6s8`p7JZyZh~NQ1^UEp6K*-^|;lL1avl*dehI&St`Ezr(+k156r8$X^MliNoTdx z`Bw0wYyj3CjhuUr{Rhq^)(7GxNb-_*Y#-KtYSrP-DIl1kzzUx=%4!q2dmmPBc9E+h z((yo%hD*SQtGBpe4-fmexSeiiOm3Pdv3NrXMk#0~v%mnF;#%!JAB~8#TpQvmWm?cT zf7*0Ox;fm?sLLW#U@4e~B+ez$(xs13kkFQ?d0~Bm(k2d0xW}p)JmPMBJWf{m{od3Z zKyp7n@29M(<=_KrJ=fpbTxC@ORgtL?7-qk%?8=2!&f&{Wc43bVS6Rp*me2dJ@nZkl zK@1jr8)V>IY1`%bBgm0#oR0f=5kE_6S@(~fRasbhlQm;sUwb4FAY9;;g{7)`bYpX z5+*Rn2;?b@UTyF8{a%<~RZ~+^wX#+~@iQ`v&53TA?KY0f9p8*&$+dTG8<0ZJoIYt|L*QOxnH7Cb2m)gcaWut#35l*}xlHFS|ilV_> z?oe_@oR#}knvnP5E6Lr0+aU~y`WfW8$=XbXpNP0wwvK}?dVD+R)8Rg}#BPznv@STi zsj3y4QakBg=ot*Ma&PoJTlWjpDy&>SQr=KSh6kXFmL3obqc^R#K)&78WgBDwlf%?+ zkmt8dotttm#Q+pVRq)Dj<>=o506){SVWBK&wE~xZ5(Wc8Bqme8 za@@?J15~G{VF0h3G=t}q;(-hGu|fy0)SuM#DAdOyI}A9U^FD= zbImI$<(OlNZxI2xT+__2jx7USCV-)VUFPoBGmJCh8@9dSr2Rq9%;k?b-Az*CWgp0X z5L8A2th{%*dVHY(%32|T{PQ&r6E2vH9~r;XI2=4B=9}lk%#Sl#W9t|g5iXg}582J7 zuNMHk(gHPt%ge$t;m8UL%PJ79DL;+pxi+u7N7ry1<;l&fA?;S6aHRHb&MlNqw||X6 z^%d5%QBF(*Gt)%+{P^>p1|ChpGBwYAkW?^WF3QOWiNNum1T%TL{fO}yH>`TJ6w;(|Rn>RPz}UZ=_40Z7t=(~=}@ z6>&q8+RUx-G&HILyZii!!7^@>ddiqM&uKHg8}fLG5LY%h$(^WzZ&!s!e8pzP9619|ie42Y#zhNfLcz98tXaQF ziuKI6pij|Hm&s`#$jX!=VtIIJhZ%9MynF_Ik`#L~bT zOX#}LvbC^c@ulo(>{(jwW7g@pGv=AitRp8;1Xg-44G;ULh`k{jlI|p!ei;ybqn)o> zMrMudT9B}loNGh9i(H$1M{XYOipQ);aZio68B?Xa97ClJMc1>d>cm9)>k|0BLJCP1 ztoZOyR;pL{^P#j!kDI&Bz1yH7b{|NXy>wOCqBAvJcARjvfW9dbaZ)*{;&^SMy5zUD zPqEkKpOK@wg6i1E-oUMZ5U~XnFUveGxViio(sUbfUS0FybMC?_)pg<#D@@EBixJ^kLYR zScRPlV(XFt9GWLL3J_xCM(%_<%|{nTK?Jvg2bliy(J{#FcdI6g#4kH2n?1hYQ30i? z%d@2Y)D#&=Bs2l;Zu7&@u$yG3?&pOD7(UcAU77tBQ8P3WJ+?UGzgb@I5uxB020I7> zQSD~s;Qnfl^$(3*ND5+F^4iZ*P9{FOPZfhWEp3y0YMb7M5$9ZGV*M3glcM6Nb?&l-B(q?k+wyV^QHH+++HF%r)nY zqE(Q@AFw*#@*Ys)MxjeMxUXkRDktYfOP{Ib;DuT7wpNm)Tm2#CvWA8uqpO}Dl(}18 zs2C0A(a-YF#+1^Q`|SYJLavcK&;V5sn@xOx$`&UmweqPF7i9ZQ0#?z%V7?z!ZT@m& zI;8LB+hYux!P-rSZ?oO9@2grxddj1xkfSS;pn&>7?3QTo zU_aT=qQ?8I;7MZAxsy)i%@~fSXfwRwC`&*@N}*9%zh7&NJ*Kq?k}}&nq=4P7#C`|8u5k($Sek^JKidxIYLt%>{*Jh z^}M88v)bVroYjZZn0rxI@QokM=UTD=q?dz!MQhtim8@39eBpZdDc|_$+p716P;e__ z^u376(6#}y&7CMMFsIa~!Enw6xna#vhSH)9jBH+a@Q(=q9t(!>KuIt*-`E17I-6J+Q=RLaZMbraOmNXynt zPyTCt6s*f9l(l)MN_-h5cXQIcupV}P|M`i#$JG>B1|ZCa@3{f0ty&akXC%DtrB?3- zZH|;shAtN09nU-N<*P*)2oMi=GuLUt0+i324<$Zfa0z@TwSR7*S&#|d%{<*eC(3Ls#RkPr9! z+|Ng6HIO?{SNb1Vz|Y=a{X_}82mGJwT@QbRw=JmgW20C--89pBjEwt3z{H;)3IQaC zAk()K_-@O6>ZhqQ0j<)PmFvgPx2U&vkFI^>fFJwLuu@@yO@*-JhxA7B2!T<6YS$ z0g-iQ%Zn(}iQ}F(wzsTcJok9jZ-nobt!h#fbHxk|Cl-|RL&1TNtZ$nrfy2^cRoEoE z9Ml>40UOX1rQ#2@MFl*LL>E2Td}a=pzB((9Mi-5>WG~uGnf>I%vX!rQBWi1JZwfaF z)xrwfBuCK{Xzcu}oz_=qvem~LI|fSH_E$9o!Y5$Ja_v?>2Vr^5y;zdrcHFs7_5>x*gO27u==cr|SeTdD_-gv1CuC(9R!l2`g_ z%(#KDV>0|-YK#Lf{0Y#AIZszsu@b{ye$dKoIuExFhx=Hs5KG(Or#CWcKl_v+94X#0 z{d}|lEPOJK05O;r;0d}c8bE{~6r}kqHt_>Y!D+|#z}5j~zGX}9v`0^95u5&|k6@U1 z-Z0uJ(A1pa}XWmZ0XeE0*x>YL@ck3i7}Mz^H}@j9u9vj zK>XV97xmwAQHwa%#n(AJWvC4)^ZLtH+eF3?4RXKThPrgJ!JeGUQ`O5m-SgkuY_Q) zM1m)m9G*~QMNlLXFYv=(a_8QZ7uoNzt^{U+_*fFRHJ6Lo3Yib#MYi`%R|#FQxMQQq z9EYXT`8xA|PWbw4ea+aTCYXGu22TEl0 z4IyDc#I1Kq+3Qe1#@ppBwTSWiUyUd#b+$0-2X=rmk5U|FQ$cItumbcNere(w+fAFZ zKZhRsO5BL%qkiIK)m!cW6);R{tPs+a7Wf5o3hwh+-_%%nj6XBfe5B{v(B3!4KPuMc zeR$kAU!`(TVGw#A*+T&glJPLdGuaf=rLy8LNdy!zQs0sQ^E^l~u_Q%E6{dN6BlZez z(0SwoQLaUC-QV4;N;VmppFf`tKSy*s9%P@R0I=fxM{SvfBc9+U4-9vgP?IkO9N&Rl zJf7kV=lUZsN{DfNsH>s71?+!8LZn<|0m;*3LG6dOTeNQIaut6=tKD~I;a!}rUV z4vnsjbs#++Qog@c=n6r9#L;>l0Nq?W0t^kEx;@RP$tY)>75(K$C5WRp+*B7RZy28f zu$A=6BlIhL?qP;fZ`31o>u)U^bDO@HCI}~RWYk&#d$fMA>#K-A=R4t4k7>_+wW2gM zDMfbjq@h;9$RaV0?J1%kCU3V&TkQ5Yq0lxD>enzmPLP!$i6C_il$&+A0y_g~NOSCs zl+z|rz-Kcx?27Q+gc7C#1@V;5py>EX{jH63zXQ#6>fqOfrW1na04L_Q1NQ#6CRXdA z9C(d=ysW1!s`G_GDiNROIL)y_S|I$m94cO2LtR8!hrO)5_QJHUTZEvZm+{}d0Pre9 zzYgK`GSouPYP0TO9Y)A9vqX(htH!ecU)|bO34t+Zf3J397yy9IXkJ{ir0s^%zN;(y z2>X@OBI#RusqBg{&@Bfa9Z#p!Iqc6Sr2B_kB^zFUDa=^PbXB8%S2{K3Yugjx#2(nt z6>9Ss{){vFF8Z`bMiaOu(?-U@L$hv1ktCn5tX{bB(ITuTwVp2yDK``B-~sZ?*U)Cp z+Ins<1{rMjtZQ7?{m$Ty2^@nt+@5?rnM+p`f65H0Bh2o{3ekGzZD3Mg(i7Rm`_o92I%;!ebPWV2GF z;@`!shQApg`~|$)?f%UHkzM=O_Fr2g*fRzH?wuB&{R1#66A9?yW;HNGZIZ<_H)WH! z5cKsf*c@tdcI-^^qtRzF!HxA#W*EWUj5a={?N7 zSG1Xix7V|Pg6GLrR0X;XFi2;@$;Wdi%du;EUql)ehDRHKgR!@sCSa2wOAHD5cXJ!d({VlDabo1Ol6F6Keu_58*#f+>54`pll+oT5JWU7}v zOadKRq&8omO4#(LQbg@@q}d?o2pV7QJ^e}zyz|6iQ2(e$8@(7Q1DA6ZO?@R)`l!u*3T z<}EvR3*n^Ln;;#!Q5O1YK#uwxO?P!C-b4DqqR($!3=+rD0sd>+w?~~i%ecYRFAf2I`5?Ve&dABbwFl064Pj&u zdQqnF6folh*&sxjiK4e6gg-@9DYk@7}P&e<3Y@>uV0@_NyC ziwZlr2FIQw>c@yFEA}e?L1tD8u)=_i^~}*^`Q#7i1l>`w;Vkc%dFF&7WtF4dsK z+c8E)WOX`nrj6lg528hlOC=rFGrgN82$TQ&5Ujk4quiOrYh!iQIkCxZx-V_KmG%B3 zJJ{4x?6@@`29Edpc_l_HxGD&3^8I&2`xg3H7RV>T&EBDl-N(}>LE5a@ngC4hocaMd zFT4^Tk!u65(HD<*UlsXwmLuVf*o?RPBJhQcCYAlTmwe7zf$ISI5~Axk1(}FjA+CAM zUmjW=PaF;h!Ceq$Zcq-*bVnw?{x<9p!Fp4=w(CiN@F4V5(0OdL-L+@QN zU&73XWt>M2UZH%)?^;)F*OLmfX-&=7$q#j?ps3pZ+1yTzE7(b7T% zmPVk9+BAMcnjF;Jx$9(=XMICx%!kL%pV#X$0s?RNZ#V36B?O%h&mcKum`t!Sim=pIXTy@_S7}GhK>fccU4lG__k^?r1;rw@H@? zNPMgx@hVol_9RSom|;4t>6f|nNi^p1Kcw7@PYJNJh=0}PHd`A`1QW+M3s|@7f47*6 z@T;FkO$=kew98hsRZ<5Nc_G=4hg}@x3Y2lU~ zM|AgSKQiMr4|S9ZD8jRx5l~ovCKK71|JGDch!E99RVoHIi94NZS81h0Kb5+tMn>`d zZ#j;IyEOA}oi@t}z;U4(EV5WGih~=}Hkh%1tUfr|{u%IXcbMZZrB&s{u39ja!QX)K z<;NM4KLB=}&Tbq~frzTaq-s;sRlDoy1{G8G4GX9h^4a04W<7)07GxBa;nIjRlYL^a zMx+;kSN9VU_S#52;zbW@ry9+of*flTD($}}lC5tL7WsD+*}6!}=JF(I9#d*iTmO+z zY?S{;LV>&Y#;6fWI2*W|dvkswx=*p-V-Tk@fU=Y|U4;2Xh`GMgrPIq+9BT8aU(k^m zO+5Hx5oYUCEN%pbe$^R)LTPVR_O7&Mi)s3IueBuZ!~_sHUnFvND-)uGBD zS;~Q`flHdDRyfYT_g02~rv$*9U%|7$kkCk?koJ4IjA{-9WtS6)D>gVkRaC2c8m`gLD26LaEApHEz{vhSVDHhPT=HY}q4ms7E?J58#GG(# zlncE`Ty>hSd1hYuxMbL`_;N}*874&38Wa%@J>OT9wK%N+!KC@?Lm*ST5g#TDscdkZ z{>VwEnFI*jRu$)_9~-XkjGAgyCP`MNqk#I!kLP{~ov`21rm1>2v+>H2uoKIOl%%yy zMCuOOIhViXtY_wi<*dSn$sbY9(2n4_pk|l0r;n-K1j3PTl|IF2O{FWV;Ne!^j!irGim@;Bdui29avAc@h&z#ekE+m!))vzX!KiC0k%DO?|fi-2axWs4L%ycQsv~B z4Z5h5?bE=?r-DM4xVxg0plkx*MR#P!D>Fg^NUUdU&fA8Gv!P_Fy8`(P!NKNY7LVXI zTpw>Lg=1T(0-80n6tqxfeUM^7%PjVXJ;4=k!Gb$tR z)(z0HK4SG-e2_s=oA-PBDKCiAg-c!2i53^EVB8Yeue%?8!R)sF<1>sACq|9xMlkm8%RvVZrY9eNvqJk(l%P=#71{z0$Oggi&ZS zQ`Qml*P`HQYdW7^Ki3}HD3K;oV?*Sj*)J4JEe{Lkwtoo9=QG%%~c2vHgcV(YpyDCrb{zGbZU|DFs zm)I0|xM4FetS0lQw%CE+vM7-rZ=ehFReAkIf-C`$x_<`tDT03EKJ|2QOVuUyI}-|k z{M8jZIrSz>Ev4N!7($ztWt{Q8m7Q3+tG=)H`jSOoN#Knoj_7zxZ*Ki?lQ3_m8fp-= zt5GW`;Dbn(E`5E;j_PlG#8oq2MY!#!GPX5nPL`?7_9-Pr^kCgX}vzcsIlytWp8Kb7kM)Nke}vrE+HKADc`N$w>u8oWCL3;3(yWMlqYEK}NU z?H`^-9ysMg8W;9HI>tekR2OAZyX=x`!_z9k3x3SCH{5QrFPevWPAD0r3JFD{wgLDH z6s+@_e1z_eUEM(W<#xjQx|Eox{c5VF@XpS&X;f*~LU5_$^{1Bd5>k$fb<9KxIE{i&gB(1%MnH4%akEw z{7?A`^VDs?|L;Hw{q*Ph%n&hCs!#Wy&*Z&KD&xY>pNslZ_^Z<<42KxVDVx@5UHmh4p}0fAk7%ME>R|VsqY6Bc z?77p?0X=i2KiYrdJj{Ob)^%B}`0oLX^rc$ImnCkQ87J|e&WiW8QZgU0eHE0$QL|a| zMV7Uvvo&*BhfBnz(dUh$VtT(gDD%9blU_N#ZGhEQ5o~a^Mko^tv}+u96<%p>rW?uex4T&)L zZmuX2O1LmHHlUt-0b9`Nm;P)4B2zZob>(MpYNkfh>72|>eJ;Ty+cI?T3#OMqX{MmH zEKdJquqeD2RBq|Q1WOc6c{zp1&UM_?c{XlX#E<_o_@)Xu4stm%SyJ!w(z!07)$O8- zn!mgPakSc8f#83i*&^1SOOd+;+M5f0>WTQKA6135obB-X3+2fSz= z*U4EXx4bOW&_WYI<_UKVC1HxS^|x_<-Z1~%@10yD$!GybHHt8-2`aY})9B51QD z+t0*^5CA|!gnCpsu2>3#v22V{Qh@*chA&iY0as#>hi7nd9ACKrw~&(lZ%P254A%pF zy;Zb4%t9AETf@5bM08FZkbh)oSX5ewO%m+9`0+z|C;&jApgS(mg59@kE~L$6R_yo> zMnPA_$R%Fi_tnEFAKcA<9zGf_N>lAwGQDbzgy2F9 zJpw*9a|V=UZcfKVh8Qm#3U0Fr31EhL(e>VTPNVk zRVchF=keV7z+X#oM^l}Rf{u-W)JpzXt`;X6U&Rjb;76+Wkc?(O5^`HTkB?Oo#RLWw z2Q(A{{0;--5AQ8uQQNOhR@P#HZh7zCI*XCQ`3iY!(em+e4x+ZgX6}~OfyEoX(ctut z5E~kslpY`FT@P?aAp-6U9N=(TE7hYk!Usr+UXSx(vLPMNnN}Z)s#v zE{ksUqLy0q*4OG6GViZ-RPkpf3+SFywI#XJ_NH%azI$11jh1~Ug8n;lO2bY0`1FB= zj`k$a(g>?opO7lOG@Y=gg*u2yu11QY|AE+T7Pbq|Ff>2gUsTi31Tz@J$GH<5SH1^- z;EQ+<^PV(lgyR0ms-4KHgYYI!F0!?|5s3uyjKHZzfixmC&}l9%xIBaP%VBet+kIPx zl=ZC!=B)>E1+6l)ynr<^vP0(0c?Cs(F5V_J_!X)%w0!1C@AdMx%)_HW6L`+A5|UL; zdN^kc#_+3T{yKjWzKtyDCX3f&-*nehGMcO_@%wl9N6xR^*?Hd)#wq8C_#Aw>0kdU; zPo`?BCeLvSoG`WP^T|0>bwFo!;1lr=jBI9S?^=mN0Z?pPj5-@mA0a6Zttu3u)M+6XBb{N~ZSA>lEKF1NN;)~? zKk_}jkGqJoP&u93H{*g^l$VOaw=yNomdw%Z`~w@+hiJK(k(YtrEVjp8_W4;DFo3Tn zbFMSsK^=QK6_eFn^G$;-t9&RxuJL~7g}fO;fHh1XCNu^#Q7H7^lyxxQ_FuF6{;NLr z?`+J!n(oJ5cSFk%!+*%S@vT@v5}r1L*RN-gxg;Q(Lvti1DfqY?o|JQxw8F8d{xtc* z#g8lQq%ss;l?}-EsxQCp20klnkQ)Gl?1V+(bt;V;U6LvC$sQ{qo>a2%q(G*E7XIoe z5#_BO(*qV?q_@c+F6{<$BX=5=1-%tLWW^dks za!aDo$q1IB*S(lE?9_SRP!r#OCDKbh68Zj_=gGqbZ29}N+Jd*3ByHZvb9(`|hoU7! z<0#gr0vYSQ{kX7vs?6$PfvBdvH?JFJ*r)XAjPf9KQMx%ncC;QVv0E8iUvc-D`6Mwu z8?umieEY56*L6wv0*=UPZ%t=-)Z0f$(etc$lUM-F-@_h{(+gsiVz0sg0OYgT&3<3T z(PnpK`+&!7!7@3Hzx^P~5z9cmJ(Y?8Cy2`TF&p=EH_tcl_<}OdwzvU@Z4|!Z2R-T; z(e$HtPKbnHR5){BX~zIH*X6Bs%G~qrAfXH<3O4;-BD?!1OFN$W-|e?ON-kcaqKnQ8 z6GB9AJ&3*@?l8c9%@&pL4{cROt_7`fzd_v;nOd))jfSbM9<^wuoC zHw^P4{-oj;u$`;p=>o&}=OT`(=!fk;F-R036#Tb4^e<4@nzyd({~HvRH~#+)g^5A_ zB?|jkbu)`6_4P&T8jGV)CkrQ`-W_r4W#p}>5HHmhEl@qYqj`Tx)?%Dx+WM8Mj|;dKWgh6#05 z(n@Q4IktXSK6XJRYDMeA6y7OoiZ>X5{%H*-KnYOvapF+x=&5M3Du$g`B!^gQNWql2 ze$BzTd}JSX@KbtI2y>O;Pzo?EzYSk0T29KMey2X_gH@3r_p-VDWE+lXD6u!n)8lyh zdsnOQF{|qabved)N@J|yOOT#QKg?eoJ4qE7Q{)$e`8U{geX(gZ1I}K{TZ2xM5>J;g8u++{t^lkbcVj=1|^F5DKP6^$-`9e;F4dbBKtQoYV9-@M3|S&QvVXY zF+u+yV5b;EwXoP00i={!6T6D>i*SOgYht7c-w{Sd!9K7y{ymn4)Rgg>zV?I9Ui*h? zIs$z`O%darXL<&sU| z{{r$!n4N^#D^;C-rmlL@gLCZHf`@l-n3@(4`9w3xs3t!9CPEsz9;<0znXDcW`x7cX z5PD2wt(IU&%!W9~uurSn)E6Yo#~f)UTeOc<2}0MB30O5fvme=Ra_|#NjYOP~J%x5;F;BqXZpb>a^WACQ#d?`j)ld>ul&W-1v$YRMx%fk%fXm{QpZ%N z2QTK&$cN6)iy8v&emHshBCCBL@N|WdnB)Sh&J%hwG?$;(p+(ij!mhBNQo&XDaTyRC zoo?2{8=g3N2)ndtsbA|Xtln}=WxvS#;nyAH_#^5(&_*Eqy+oA3RHlQp1fNBaj)WE` zh$$I5`JhE6O^N$0coHto5{$BcQ0N+n&Xh!^hdWgv)KCr8niqd*WNB<4{-6U-j|&%W zRp#GvTeQ@v{L{9Eu~p{|$4#9JvOO|rQGB6#)S#q!)jvEiVvy{8$eC@cXoLJ}HqLp) zoB5|}UBrHF9V(Qc)UFtOuY&`5cLON^V-%7=c02*k>E0B~Q5LI)<7A359+H|Ea=-j` zjZx>Ywh?_sB8@kj!;TYjUd=Q-v4ZO?Ka}^Q(Z6Y*mnT!ku9ZhL!?EjY@8+m|7Ci~m z@ATbr4~lYJ>`WK)ke)Ga1e2I}7u5MZxs`<%Nz1n9VcY&dbCfhu{_b+b`*$zEHoUA6 z5I;AM)k(=qCwMm{aU}tV`gtIZ!~8K*4{v-@u)6k!wlcuerLnslh~E=5;J%=W{Bv=b zc34(!bjZ=d#okat@p15GZKQZrN;xa#g;k(K{oS>btkqsq`S0YkpqH~>#R7$9RC(Z5 z+Hm`KkFRNZ8G^2_XK-Kc^laqRwq>cxfSkxBXAd9Lre1nm)K#AISk5xuq@0HbLAk1L zYo_~EQ!5*gCf5ZddQq)uk)oQzzS6?d@N9pLUoP&cM>diFEub{e9vYF0HenHEj$MQb zKGy2PjXt^yW&C6dlS8bq_};!H_{CWUV==|!omr1i-oc8b@(b$$Bl*>Eay7sMlHFZ~ zq=YE5ec1qFfH(uEZR4QCjKUucRyiYJzg+C@l6lvB4ly^@BTV`#2>E@@-PDAWr8+=e zxR}k~`k&Y|N(sun1qyNpXBUp(%f|^(c4Z=;$IyGSW!Bv|lv(KDI;kh}(|`mkB-xle z7$f&~JKC7PWD9g`!OYTkH@DHvM!7Q80v>B%2SBk}+{0jT;h#t13>(BR9dhS6R8{{i z?A+-&(|i~9tQPSP{>>4%Fs7q~{?7{D|C)cB)#dHSPV5m77@YnN4ZvS;=<%{#G;m7% zH`u9jy^RoQboO8@OfAL<4al{#<965u?pf4G>W05fj519i*bRQAE7M_U1Dk0baCQ#& z(LcgYKRZkpc{fCv4Nmf(F70tgH0L(r>RxHgmfT1xUi?EcYy)bwsoKPmd`{a~rCPyL z0RU(u1*q)$!pA?Hw1AFBJRs}6E)0+0PESP4cq%8 zt*r6;O+{HO86Aq0k*(!+Kp2u}1wQkSPMCdi?_Jqgk;p*N_X(;JuNNx`$e?Uou=#dI z%indtE{=}a?jG_NHs3_{yZzsL1jm1&HUG?%)N)*94tl|*bK#K|+MQfH6ZK?1s&s49 za%-X%SCECwF@#rkCyi9={T{ts<}Yu+7L^`u1UXg{d(Lemhh8<3QqlP^8Np8fKh!h0emHd$c0l;rqx86y}LllCNwy+ zk((EXCv{%|+?nRtmv%CjK21+i2_f(pjN-@|P@uiB95Aay|J48!FiFsdSr`By19$k7 z8(28~R!W&4y?F$ls*gxRiaf4N0<>G2(r(1UJz~WQZ zm-82Yw0oY(n}h?4J@ek^|DaNfGowbo(>hmQNvp4Ig9H|~k|G~4ecmEc6$a$l4xt(@>z-!tpedW71T@tg|!v90x?%{@$2F!Y04@}qOh!642su*NU&;@zj9cwOJCEzIt?YWj%F#|>3{tJjOdH_8>%iPE#5N?-x6s4p`Z51R4)NJEk}0(&wv-A!pU$o6dMLCbpa)2B zH3OPN37-2l2U>!2$cTiH-J8>zB;h=mRS?pkv}KN`bW0PdzEO!yU6JvW>dNOG$IOy8 zK0DX5>`@ScmH+H zI&0RPv-duGzdrB%{_G>-)mx30D{*z|iz#fBPgqGvWx+FM=G>{YTB>Ms&u_S@5P-(3 z^czp{pLo`OoLO>j6@V4ZJnn_gIh94$wV+ID5#WYPg<39zEf|$Z$G~gGL_x6GSwihx z&-TLzIHaS{$kx{+8|w=RE{i`c))0e?+Xf4`rG1=i_D34^+1Rl3c52*71v%S^5Bg zR4yTn{L@v;1OxBK-l_!v9*-e)wY+FhTW?9zaeY|1) z?ekU`0#D>DbH(P2PVu=JTvi@NjSK3~1NH=^*sYsA<0Q;! z*v{95A({W}R$BLnmZ^F7ce@g>zxp*pl--|)lj$M@$Z3~uDm;!TV^fU1yv}cLL+{>W z_N+inx?_5Ckq2Sv`&>g2&<-Ec7|pj$xoREP;s`ilpDx;wMjMxm>tzRr0)Z_Z>1AWV zd){e$;w3*>lX>~kX&4JiqxtrmAZqIpySg|=?gG0{O7W42zhk2Zk7L<~G00v_zJA7E z-v1$p3kOJ>mmiqV#Jf54NhtrzUMO`A>Tw#n-ALDcZhSgx@8(*J()w*5^t^$3u!O(d z+)|`e;eB?omXxh#d3NpH7U?9ako#8F_IKPHz08!a+ZBx6Pg?jYp-{Yy~p`3ufjx zG2nO4>V$Q4cCVQkODvLf)v!0 z{e_ALkebFYg5HdlEn6zo-T5JJPU)T_;Lh%U$5Gyw?Zy6&NjQ!sMu%^ zHON+B_Zrpsvh9Yh`$=q^bO#EnHWaQ>F$YBpkX@Y6XNNutG!HOvv2t6!?G{VUVB;wBZ8b|9O*{c!hb@ktN16(TJaQy5JZ->aLm?0G>We`(`fzwiy#<{wdsPGt_=HT%47uXHxOgIu|SH z{_Jc^;dNs!nea9*OwFc*&InKjajror|T`2B0@1KA_z zufC+R+9N}GF&K5v;ddI0gWxnHHAL-`;3W$UQ^l1Bv^kx1yE3T`#D4BFs!`$bDo~T? z-%!c~8aJ3H78%D>eQL8=+p2?woy@2c7v(nWVQlngs?$hKGocK0qWhf53IxR(MwYHK zHQWW;l&x~|+QS&M_k1g3w?0eJD1|-nyz=dn!@*`7)rkWY4?wi&$8150Dooc%-R$Z@ z$-sV&&Ya87pD!PG>OBWwHZp;$sI^ZmT6nf0UR7msgUvzPtQ9;LGd45?3n^oEwc$BW z#FM-6I_nDrOQIzasokFxIak5<2pb5zsWE5Qzp=!-;J$FNHSLGc)6GC!J)@nIkV}`2 zWB@@`SD65iZENMRO#rU*Bv>%^A=Z(A%WMO6TG#onddCoSb+I1E|EaEz?UJ3#-cKGQ zcGkMg_c9s+dF-4AIuF$`WbTw{(-_hDQ+s6Vqp?*Vv-pWa0PsnJ2 z=~G#>c<+N5xH10P7Aw*h( zl;VspN?Iqz%XZ_vb+;+B>FX`tc z)a%OUEj9CM0nXy1Se?sd`TSkVv!*u4{z1tj+J>_aMc0?v^ z`&yTg`v~h|otg2;KH$iXgW@yf9juh&;RQsb(LJSr5c&rh?kxZR0mROq-`SAjGJKcmR{jsmK!U4KW3HNS>5nIY8AJPt zIANRT7?2}c%jVz2GUBQa?>@jee6|y4quVsB0U1ljf zDOv}QLQ*A3b}?Mh6Zo07Sk9MsJEZ_V3(3zLHS*qbuRZME?(Oktv~g~ldSK#4KJ7Lh zqbnA*MJIT5Pk4V6PI8wtnmyP@?p(yPoMTyQ_OCY1scO|s zDqdGj{jB9;B-C}5ce&{X-(zg_eVU}Ul7vky zjIXU-e9s{Ndlp|`+>eabI34yPET$38rJvK;C%Fmo)nTf!MB=$rvXLuk^Iv`GG-PCRKiKS&LZ8P`3^*s%L(( zHbZk_*nGz_KW@>^`kq`M5qI?&95bHJ5Xd74Srt#fgOgX%#F2SbUlTGrPfM>}i-rk$3vHd!HzHhSIgS zO{+A`HaUMEeQ9NoN^4(Uk5p2f=C&}M&;iF(kNFf1uia97qmQ;hYB?Osvy3^Zxy<#Jo2QEZorzB!r4`PY=I7??!`f zaHQe)>r3ury=&A?Yz$Xaa55`3MehJe}F=d|? zcF|^Lj%<9jRnqKbf>^_< zE=59LxJ;Oirdnk`^Q!c?C)54w9QQ1I3aO>VQ0o$#RvBjO=Qy6?Dq#&3uHNK#B-Hs2 z6zx4&b~9C+Y8)RIpYgX6uQ!m=(fQmfTA;c!@xYIXvrQ~IFkyMn>XpxWy+p`DhF1Sh zyrx@u;qr+nS5}}wM!K#(JGsR^J%EPy=zZH|Lw-YR@tcR=ypX(lT#k-28c(_P&%ajY zZ?np!9!Zj#tSjo_B$spr2zC9Wyn%_2qa0o@O%Y8LFMoBot(&WtnjJ}ee79RXRR-P4 z@IhA1%G920YVlVuW<9arhZ3fiVK*)p^4VYW7x+07?Iv)hcvOYG^+&Y2;!uo9FD4KR zwsZdM=h}+&8ktV}urm-I(C>CH_;yy`pM)Z5NpDMJBY0L#t*QzPcd%pPsaU|a)l$H+ zR;1B9~Hcol$1u^VOWF(4R6h|EN^4 zs?ceRCvZJPBEiFB>iMEl-sis0FHPOwKgSap%H=2{l+2^9q(vS!6E#KO7FOihC~UBM!k4E%AjLm+XGg zL-3eP$)b$eELv3SuJBR)B&dLGmFE=6Se4ZnDA97x;C}fd1?KgMaF*E#{7u|td!ACB zX+H(-Z)PfypV%AOhvkc&V;!8EeOOum?ELiN=@eoL?Z!LJu6&5SRGjT9RAXzIi8u}i zE&iUuOrAc>37+^_$L{cfAW_9IS9T%AWh|p)K7AiR=ZPih%m%_)r&80pHMwYfs(TbB zjwjO4KS>K+Fbg=QN|YTb9YG7gwjL?o*JD|lDDh@;dh z=yMhNm6D9Wo;(g?a1-L`mlLy;)IQ7KC;~vJfB}ga$S0^?0Z+%b)V>_Wad}w(0P9P; z9ZJnc{#6_67oV{2y=8#xUP9x!BUqyNMlVyPg4`va2qN|)78{Tz0J>|_ z8|72WV;hZRtnxbPXV1C>Z~IugsQ>_0N54)z!jf4ecM|egl(Z-y?xp301;{c>VUO9W znuVeTPD!5uo1bkh-l6%7n1DdjZ9ccb3P0P}XKIZGCeGpNCp2A7p! zocYfww{kWb?PI@lL!O|C`9U`Vo#gUQ7t;#WTr4MQ8pd(b>$Zc5wG+Mp9Xbszt)lq+ zdt0>4XYGQC!I3WZoEB4p^tK!X*FGAOjxym9{o!(bt7(mORchk3e!C_9*nBdtv%wjh zhw-x7eGGa+3-3aRzt?8A@91b!%N9x-Y^gk&Q3xWIG2~44wC!N)iJGY!bt|EWB$io1 z^3);oobyil+N?!)$8mFaME$F`6y@d{R>>b%Mu=SYzvkta!5wRKxmz`>BL)+TTk@@g z?v@j8eS(Se%*4D7mjs!dhg!BzJIaG<*cfH)J>IJ{abikM7G{C?m-NwpF#+r2K=Olp z+(HT&uadn9xhK&T^6+#F|K|HZ8wIp8W?TE`FcCI&DY1-=58-&zLb5jZVbpQK!xUa7 zrK)vKK_5U~t%C0+A-^T54DPu5Ogl8DjY0z5AVDu52e5SS~fZ>=j zQgvo7;O89aj)_IYzYr}Jaahwz&oU{`za5|)p2wi^jB zv9;a7;QaH1x2?9sU4c+)=P3`{p|k89>|P+9Pii9K_b5(p&Zhqp-JOsy*}{O1zI(}A z!}EAt#=Xn8rtu8bx?$RptYzKXtoahA<>G+{wVJ=UYR9uP^-a}ufdOI)8Yig4r{$8T zi*D-*&TCmSoLB#n#-IE<@Si)(zZn@%h5V4aue?R)7aO11kL>nL6j`(lEBJ_4{?bfX zCC@2f4VP`rU)LZ1J#vhUGV@dmbFox1q-bNSIOLX3n+mNAvFLb+=Me5@UH_@=H^ctu zcZN+uT)t5$t@+7{(ZlMKs05CI-k7rnEZCP1RXya|7w%t39&z9%?Q6kP!6w|XJTESy z-Jjw61zZgQ!cbdCs?-Ge@Q0k?j2;@Q}RXni)T`%25F zKLO8UxtOKxzzjU946dH#m}jizxk{1FIlDbj`crMMV$XxbOH^AyjBT#le&W@4j1}I8 zyI<&ay-3(qa;I0B8{&aImbwet+nk+LjmSOr*UOg6xB9+!eCV(xA0w1uFeNz=c$LJ2 z_NqH*3f`x_!X}yenoglq^>INO;T2b|{LN&{d?$BK0sk!bB8|mT*EP}jqSX(FL?$9( z{(`l#Y83AJ{=5}hBbttx*7b3lLWiMNg7(kb*bs)VLE>_gTT7C(1C}Fx%Q~}=x9ihT z&SmM~*wys{HFd?V>E)gp$$5$HMC{RbUv%+&HaQM6LpGHodpuh>+{86n&cJ+Eh-(Bbei44NIR*R3T<~geJ=j6C^~g6O~?KupVU}`-)Z;6-Oi|d zfS$YU!?HI7Zo#hDsWnm~hhO#3W8O6m=%+fD;S zs9!d+A+e-7vh+9SuGF~fN^`Rt<>?a#MJ+ZdhBFYxQhnoJ<~RTViO-*J1jyn=$XqxQ zIRc#2waY)^gR)R3CLQA`1`F=KtytpGQ}z>lzTpddRQk_yWGDN#nt(9C3T6GZverg7 zP;LFfpu@GK_G(%>Z0xCs(-kn&6)M;*lPRy+o1ixB27z1hdJMP_TB}HFF@aVaiZIBIu^) zG!yFFsGv}8{4&r7_l`8jgP?u z2CF6AIXo~4#ovYp6J|JX0O;(juP26?VmHbw)0m>m>t_d=4Hqr0?oMm|M7+>i;o1i1X5t;j2YfW{zx?0jeFWq|i*RZDI8*VaPSfJ=GVSH!%gLS^tPKC;gY2nG z_y}5(PodaTcYO7OeQ=QVs{^rHki=;6`RjogP!n!*Pm6b9gN$_V5=IWsDPhxWP1ep; zBI!R8dJ!$_|D~qSH^!1rC2ww$yU@xpZ5i5fZ)%;Iiaumro)1D53>J#RU(Ki@f|0jR zG32un!O{^AuEQ`ti;cl5*Uopd7XFN`6&qCQ>N}a>N=?<&8w3KS&kQT7T32RolBSs? z`KA|dUHdOt|MdC)ko7Yg^*=@D`$~GbFt^5|H{REkUxr}ZEG(OXt4I_yk1UJN;@!yQcr@?5JJe4aCKnrUMxhHFjQxYGb)43?p+Ob3(VEtXQa;N^5>HV!;PQ%r=X zjuCD`)KZM<&r%Zg+ES_^l8$LJ4f9H5w8_VwG)gjE2i=S)Wfk1HgPwcP77TdYv6;V% z5xo$7@qR*-p;NnT^_TCn}*X0Upz$IiB$X9x;u4O)ae5}lUCUGJpj2t6z)_j}3 zQg!` zeIqJajm53!RX(ds_?LQ*kit}PLhOf>VCu(Au z#mWC!EMUn(B8VF8dqrRi$gy2J>rB6|GLrL$1WqEDbscAlq52eN`@kyn6R?=hc4r<5 zZ=?4&Axv%9Q`c)^$l`AoI{5~`#0C)48^nOE%s;~ExzRumlCq!T@d1Kd@N z_*F0Y=5i|H1F?*=Tw8@&-b*B93aR+XpHNfNeWrl6?_@SzN@OUulY3IR%NfAP|6 z0rV<@#g9C57yi4AjQLczu`P%$j=8o>_|*PN+GgJ+^_EVQWjTq=#EagFmRP;JX>w`b zRbT_Tc6Re@sQc||XYADFX_A)F<%6{*1A<^;ANJPE-#QCj8#@GF+IISq1S+EWo*jKT zIV9p@rbBZ*kzu(nIOOWYU}KemYfCaD)p*iowbo{yXsP!F(OZ7C&NQJrp@1cCbV#sZ zxKu;LjFKpah=!ooVGt?#`XnjFu5tAe^Nbuq5(HgHnJ@3o(_4!NKX;i(m#lC6FCE)0 zPikhV&7eeW#zJMro-^A}Ji3Aq;BJ1AtwGBYVaGu@C(tb&0|-oOIXxl!#BWx{XxT=; zh)NGeB^QU^%|$E)DA-}jeF90&3wl<^Jpcr@9tV~#ORPoAcUC5DgSXk2$L5e1(r#8Od(m&C)ybJE zRWBh?VHKPY0TaGN=}la}J=t*l9t|GQjp&htW?R6JT?Lbp9!`Y(#q=4nmocP1!{LtA- z2%&*#q7S`#TCA_*;G**VSWV#qo#gtKzdG}`oQcbzfdN&=_f#Pi)L~y^hFE4>GC%!Q zSa46=uPU49@yvP*bkM#l_ANuHHGB9-e|K24<-DM`RTO!R8kle!q-(&%g~u@*)70O9;fZx8-ZbW!mjp5%;Ktlufk)7X`DJ5r;UvJ1OKkum!R+F@8mAJm*y}e zB4WPIytNI?{OlE%Nqu|ua!~hvgBf5&e0lx$+lPS*|Cr&Sm5fw%f0M?mq7spf2WYr_ zQh9lUuND(MD2(hmpsv>Of8A9x89W`;{3W+h|4hqeWj|b4w7DjVKLnz8@+^)Nv1Zmj5@3<^$M( zH*ThA{pTgniPZpvTlP3n4&&{_FPkZaZ&#GjW*9T5ThwAw2kk}3rMF7z%T$*G5RLN( z=^i=Ya(InzKt@G6P2#|#Ys&y|;6NVYrAbV-SeY?U1i^D8XkkWGSs^@g5t9vks4wQM zS@3)1=7YbBG0~V-D{WQH%PLaDklunQ{(-dwqWO5=%}u;HQ+KvMmIj1~Sr#AFCwFx@ z#iG9lNEW~pN4xCLkzCAmFQ$wZCG4wd0$~?@PwyUitV&HZS@(IeRvx^q`LfDyLA+tB z67cJg|B}9r^~R>dwhGGx7m)MPe@~a8s0<6`8z5jdx!`LnmhCSB_^7+XWFlr?8wqWp zNxi6Xp($VQA(7ecEDC#GIrIVRdwW>Z7NGoI(%(bQSUY53iSzG*zB?;%Uc_YyX>ShoD|daUp;l6shGG-ACZ5kq zda<_w2xVhBXn4JoH{c+c-g~9_Mo!CsJqI2B zh50oOu{!q=@8xPrfS_k@r8JUGrWJ5=?-HGg)F{Lo879cJV?F(j3SFXAc}x1MuymX; ztx}d$QuT*)?&p-8lL5OYW??Ln&jj{%8p^2Wi`r4VV(iZgy1cF*kzoTyql&JIp-Q+j zhX+4i3!k;5R18KJR1R$uyJ-UZv@s`sh%0V**taJ{Fc=WAowlS^`et?DUVYOGN;kgm zv+1+wu;@v|)xtoB%s6TCNOt8COdnCSgt zpxM*hlZld?88GT*n_p(dL*rDHMK8mA&+J>{%X0I#t{diKa9o`HbLc&Vo2$o5!}Idk z2ca?ckkiK#x~IBFg(;!|3OM(hTxko`=}9t}`se-*(IU4D(t?kKzRNzMA~slLV;%!L z9)RVzzSkBe81L47e<>?+esi`{t4mA)rm!igbdE#V+HvsPl*HZE)^L=|Al06Ak2biS zf?764qe{Z;uPlr^vyRWL(w+p5N-F4%7dMNrOFS2O&RC3{gFX@wdCt?MLy*Jc4x=fa z?&Mnb;$(Yqyrjn75j}Q#4w+6|LQ>BcT6Mnf$tiPV7s!Yn`l;GlXFt1%1)Vik=6(#A zQyoX?8ZMts*bP@pJl)h6pM~Vt-f1bI{WWDzyiVsbw`9yf>r@tEP(PGn!|mffP$u6s z4V(yOwIILa!}E8N?7cfMMGoU86FuOt-`t9fBQvas5o%D1#-qJ>bneg zuIel50o{4*bT~vZ(JmrHyhF=*CamiWV2VdXo=f4wLxZaR?wf5d1S4bt`rWz^v$(c^ zo#gTwJ-wO$2N23FtB1Kp}~Jv}AmCanD|PqxRp7v3c=kRUho@Xl%TExr-jXKi&vf=?)`;~|D2XGqr4|DkAO zC;LS#h=1$HGe=aQiaI}i3g{)c_zy?+hpiN5BNuv-^IEoz92Dm}N-xWw7l9#12y%Y<_Qi{WM&p{|80B<;dL`!l9eA6!J~JY!FH3gueA*s?1hX)tFO<=bz-*c z5XRk+mqR{7n#)6};3?;Zbo2+VZmk>{mPjAF>#t>&iUbS)dQj;dxn2gjtYI9ZgVZPH zt<^%%LyqTM{of2L6VwaU1pmUS^522h^MGZVgn)d#i6Qjo907`QsPkx@z@WyZ7qdZ+E@xS-<_f9V#y?j)qKt3;+PoB!2!-1OQ;8005Ze_pomx zA}BZIZw;J-u!Qpa_wQFW=0A89?HPkg{v~$(oB6_TV$(1&BzT+a2q0_k*&03CA}~DT4)d!~QS=DNANCLm}oV zd=F8>D(1#T9CnU!Wt6a=akL?`{@cqC2-lC@bnrHI9w%2{%3OeJDOw}ReT{NmhsUj?=^+V(&8w(T|lkx6jY`x}47ne1LE z`0cuO9P-n#_BIhLlbYk63`sYPG`C^P9G`D?mP#d5L4_aphQvx13`N*#O}< z*mC?ehKTOsrYnve_)=U4=Ed9nu@qQhmiw{A+$KqjpuX84s@tCe?}?MYU;fAaO; z%-i27>jT{d`UyqvL{gMb~bS&u# z82p{A=T@iRWV74xa_eX}j}avl@%l0sTjt-9bfxiOy9(c4;j~$h{;%U zW2@K&U*_t8_h%Sz2*$s2(iCz}9JnVyfpGw7tPG!laEvS{dB6wu!s#JyI}@#nz&wXO zRvXadBJbHwPWe3)Pr&slNVegT@9vDkC~&1qQ{Se^r7En#V$t)E)jU94ZNbN!j#*%4 zZn=49Y46jY+Qi17t@Ie(%pGN|3(~b0?&*Ax9`APSY99DJ!@VQ|F3IER*Y)~>EPpH) zo6`jgb!)@6W4J8m_SXcugv>OzOH`2c!xMP?US7yyzca2W(^U?vFuk5j(&KEJ) zNJ2fc{2bwjXw}ctNdB+#SNLF;-U3(vwN&54b@eOkmjQiwi?2F=ht5Ju9FHAM;3Hc9 zf&eLa%*JqEec4F7p2)vp@mW-dC%Zn(rEz0I7SU?x5TbgLy*EhoHFPbd0RS`Dr@Q{8 z!3$EqL#)YVShdQ$BVN?+seh`UT|4b&o?r|KF`gv_x9>Vi)+Tgl-}&}PnY7BbcY>#H{(9<026mN>;k-)+MlPIpT-`(s8H{ z0xw9ed9Q4Bl>JSNDPOguIY|II9i6;Commj(QLk>jHJ-`bUSin&8;6F847!6a@v(U+!kNIY|(EB zV8~Spqw?{F*`j~g+S7iB|C}x0EXe4?%KEp{QJwHJFG|0Wt=vHV3YRBhTsryicVqya zbaHqmx}I+{{_MhK~31V}5 zPB-=O=eVZF-&N(GXdr9Sol_6RZbY2!@#7w-!{Gekh3U!3IoNu(QK%xN8~X?rk6S9! z_t6UfPrkWSEpEB(Wp;pwHYZjrK>KnOTn9{ifOzPyD%!b=QwtN-^&Dxbgd7K)00`42 z48p+%GC+<*1bs);AcCqsa$_T+Rfmfl)_3f;TEPjycPysrx1Sje_V)p8K=ljqnfpeH zp?m=%#q~t{Ym@GH8>k8+)T;ib zv-3Njnu2?=0YwqAqXma8gCZiv9!?g<>iBWZhG#HY-uEoZnoeZ?S9(M1?#h)FM2V~i zlX!bbfm;2M-VFjVWN=IowDKxc1X~huL)cJ z7u9*>yX@m*l~+(HQ)UnXU`UGM+&=pHf$8HY8K^H4A7JYYL)X2w4`6cVH;(S*)D^Dm zffYO!1H?GPaocC3s^#if$813i0k%-g(ayn10j3g1-JS$;T|_SvJ*e9+<84AiG8nv> zQuE?+vd?ev2FxJD5LxVw2>t&=pkx;(ddcY z2&VZcdY51TXi4g&jBFWQU^`$@Nno@x@jCA?4z76cZpWTHWgigBs+!{*`cmv5eX=TJ zO?4_AtC)+NGnLiWI^2Ph8_+V~eYt)C^ zJ8q#l&I?XFBQ_cF9%ga2aaxhwJA`?RxGQDLn+1SM01JZw@tbT3dw4uZfyt0+u8`z^ zi|LYLA*zPsBswWr#;PzMPBiVw>hc!tlY4GGWhd+xuS^d{W=7@$PX=~Q3P$Ej4bRLi zL%GsX-TZTP!WuKB*(q@%z*m9wRvLqu*5Wd5+LHv2GABdSto|e+(8+NDoZ6EGE7P^C zb``1lR7s`rRYiQUr+=XcuMYoOyN(lG1>if_`ESqw{VBUcFNx(6c8#%%u^;U<>{%Vt z0PZFHn}#T$+OT=|zIuZi!IjL3s@1u-W!bbs*NZ>3lB*dpa{TguX~OpX(ThEt$TPe8 zB%1p@Q#sb}v(MR^Wr_sG2U8@VD#KTn@?{KSmoe^;N)?8%8Md?s6HvGDljxvu*O{Xy zhFX)ysTyGe(Xr=guYsI+)zj_MULD09eLE-dje}aj{Uqw~)211V-kWw{8bkq{LeB*r~3HmRJ`D)I>}b;;+>{Fr{J0k4V7DG@lLH z5JVgD=ajNTabt>xrag4K$Q|IGkLIo&G-w_m_X2|c@KA|e=r;R!0e{8t_dy9!oqHuBHi0~ogEtlVQ>L^O)gKQI3_)M>8k=eoiBaD9sJBI{_{{DlWj zC|nkbfJZ79aR)_8xh04^u6KD4CPZRXKL154!*nxm#y~N!T=83#BS!uwhJWm3lh3&s z$qdrl4~~X8l#t;tZcwnV&;ox)3vTUwb#V#IayOlB*Y=`gIG;`_??t%}#)}HmHp{!8 zT2b73u1p`PD9;%|dYdY>NP18jG|{s%I5Hy|j3GnlLPts}g@;7~h&PsQM6-w7J`@Qo{#V+@aAVq`LfSuB?{yLDGk1*7FJ42Ta>&xgX4;tghZ*4W ztAwLs^iO|Fmj6*g^H}l~N{r0f_e1`dfZvsCs zdF|@98=dr~-V1;JQCX^T$qdYqFXJ&gvPhkQ4)PWo5Civ1Uh>D+#6l5)iq$cC_mj1wb|62BU z>Xj0(ZBZ&%qlAd=GOL{>Q+=Yt+c82&$C;0-h5*3$t$CK4vq}HOsSk7 zpa5}zLgE|h*|GNnUar0teJ1$-QI+BT)z0*2 z!${uIV(<_*!+Jo1R*jaQ^Lp!3=34fj%#PGU>?B)0!!kd^j-dOxDMUcVJY0uZ#03Fo z_w-MZz>8}+^5svjghH_aCEI^;deBCexY%mMdPvn%k$t4{iMkmIPhWyJt{NNA( zTZd=HX=II;aqLO^;ZXNU%U0<2gi_N!ii?W^dfJbu?gHBt9ONo23GhA zn-(mOC3;MtoGjnvLxqZ!m=hj!{ib^V+@ip`%j$A{PEYEms&ymw-ADzGlXJk`p#?ed zH}y65x&ytR1EuqYh_vK~F;@Ur zXu6r{d!J>p_@baeqMy+L{SR|%&nYED0v3L~D|((-`#`dN%-B)AJfI9_7rZ)Yv^(o2 zlr>Sj*9x749}ac2X+pqX4}aGt> zTsr{(IfG9IKYMrnFY5QHxK%!d^*JuUz*=hx5#ya?t21vvHo^oq<&FT&0spP-YA)^%(YOxB0V2{p|0Py~lyV za7#WHG?XYQm10k<`ALM0oJ4ZKPpM0D7wxHTfZl~E3gDB=rCle%_qTNsBY~kHq9c)5 zkVyegG{DKq`APauze|yr&6(jtPVaSez_!k5qBP|;;JZ%$&8grjqv?T}Pe5Bp(ydzB z5zZXxd(nR0ofvyS_wdEho-o`Rr3gSvBa(dukPUXlSLr~RK4!%j6S&J&*5hYs9=%)R z->ct&lOOsEqsbrx;wK?sSt$du_Vy(A5j$W!+_V*qjPV0ZUsh76|Iy|LE6xEi&??Aa zm+HW7p!U&*3*@5-si)P$mF)RV-q*0)h?l;dEd!jYSJI8k-ndZvp>W}QJ~+J%OY>Zl z=~4#Ym+e6ZDHu`btM#9N)=r(dbC_*2Q-E)~^3`9dTsC-sd^#L=OjZc&oK&JdScRI4 zk1I?XFkd|n>#KP>x$;iyop2_=8GldZe}LZFhh-dx)tb)BhqtWoK zewLpzq`$=C-TWhav_)Z?aMe7g7VRIHLz&|Lv6(8v>!-=J^{F)Nt_4MRyNQp(qwmNF+U*P<8 z5IT(kMT+O7+4;Q7W^AXRXrN>8&tpArv4_A^0`l2wa5+RKEjLPF3qW>xIWL2mHZQ6@ zu8>22uX;RyFA&b?pSiRkg;;sBV63;7(W)ajo2#Tfz5}FDctns1c5ZiXb>mkNbA4(= z;b5~it4blf(;F=eVQ*taS@+QCwkAt-g4g}+<&!;ye^90~{hdi$q}_XDxNykrD%K;u`kUXk<#<2l4fR2IE9G@GMo z-O$o0VRXsPKSl^TF}eWDQ~TUMSSU-TKmu-R+9REQh=CQ_F%(;IZ)rAKMf&|XF1ay?Vw zD)guQe)eKR+-2*>XZYlt8eI@&cp}GZkMI&Y_<7;Hd1F7EpaSKw+IcRF>nYg%jqnck zA#l!mTjUicK(l@Mawe!kq;N=S`25aC*sKfqtdx|VQ%ETouJm^vF(x2(JLhdBfh>iF zd&?ZQo*08oa!&9#=4;C zAHdX)351Q;TERuX`>rz^2}rwYKz>or4>49*JRv?sdz*jm|0eViKRDg83x!vS-`TPJ zrlu-WP9q7x%$<)XRVXwu**rDeJk7@N5TL|vQn-faQk?8nFr5NY6^|% zy~b3ukTS84-)nG4DDu&7{!P9ZLb@~|z=xZjZTC!}HR4uq);CWr{!ytY6}t#uIhH$E zzv=Bf(5lIkC)Rn_*C=Dvuw`sakNqbH-$^cHzti=!FV=XojwcUEx z*lRZLxM#yVAScg*M@Qtd3-%}Pp1Twa0KUKDh}l13xxS|MP%xP1_u?ClISG}&1H|~` z%RHttc{l{Md^_B@L3cuZJ5oK7G=o8rkZ|99{YvkqsryZbuWwK@nz_?yC5u5oO*z2D zS;vJ_iNr9%L~nn_X4n5HI*#vISU!ozJ%V{#OoT)5TM4$P$|ePbyP=~F#xTN+fVTm^ zgHEEQEwG5kg85!zrgfppC_B&SVA*OD3@Q^;&rN3kvfMg1Zxq3--L?l)X5B6i@TIma zr@)#BqbW1JJF_IO!*?oZ4jVz1bD5gCJ=w7sNfv!>IqtGB7)o|qDY%z}p|-Dj;j1nP zZ{aYE_{8<>({`%!h>0yqe=*B(+#x7}4$zt!Pq}?G_QLq3O!&4HV`9x51c@)8+n5*h z8_diLG_dF~g#~aE&I++y2Ie%mc*Oax?@%K+UDFiIQhy)(b99>kBJiGW&vU4lg~OvYhK|)cC-=`Dz?|XWmRT&EJg9 z@m%oGp1CDa0b1)Rc3!xvHVt>q!kUsNficXPu%b>L6jDpeU864^<7$EZ`~uLtsWVZ9 z1P+LW>(%U<2x8^LX&WykH$BbMY?vQ|r&~dyL6hK-Jy${X*Zr-KeL)w(YPyEh6(-r( zxRL9$Rth#sflK`Z*!$|?Q^d2;7&!M%a1=JB6_`i`qE-0y>Hbs#y?20fs>--Z^7@#e z-O;!d5*R4-C>WrvdxGES>E(!&{lSQj^@z&_lz@m7dLQj=;%*uMZJzW*_sv`w^QYqn zMG~~r2;~l;Wz4&nnQi#R2+Y%t-`KkNq8 zKDeAV72fYVowR*KEzO16yn0}aSW4W)4L|xy0A4Xc9#k6OI#nGqwXS-#( zNWsjri05%IY5eJIzavJS}vVy^%2vY zk`mVo?h}X{RkpJhFxp3bMZ0ypQ{v6vpK?pUKF!(I%kLlC*V@67@Z< z;?osv8J0No;9Z>1K1y@a96M~+njaDTU}%rn!f$=(qT(nk zE(2r-1?$SzBS*eoN)di9n#mzDS_#ApP8BY@F8N*ByDgdLZTq8Q6HnKJbkL4S&9*j< z(y}NZNnGc{GC0Pq12u7*OT#<_gY}bp%LCX@@z<&Ogri3$y)wcB@Jr@l3<~^P&;orh zSWV8tNmaa#aMF*yF`amHhR~s&=7H`7Uw+fITxV3srin1FFF%WS&6=oUStqokGHin& z4fXt#V~6IYOQt$MY9y&NE)xJsrj08ZkD66<_UO)&0fQp}SNtGB&Tf2k)?CT{DM6k~ zepM>TmfV$a1T+mI@~wO%I>O7qI)lD*hj%yy+$A)_Hj)xvI+~(4S0|H?etw2j6U&Ye zZ+mltof;!7I4Y}mkB+?8Y78>3C=catSn zYDD>QjG=ucm?a+??dRw)2BrsRek9^&kkf#zPxYzfAu^-$GYBP{G)7Q$xrUubyBarw zO8ShEBzvTEV2kO_pn&_OJ%{Ryq-m*^ez$gM)PMH^VBsHS*Jmrk_}a`kDPs7udSx8@ zOqtw7Q1Dh2CaAc?jAV-O1^VlHi5U4DwKRS#{g^G=@fY04ur#l_SlsJtV?r z%WaX$9nt=6Y_u{R!aEF*M%#t-T7l2RmA3;!4)VmQr;P41zC^9T#{y%W^@fCCq$ejJ zH#z%f(QOcf0}{gcGD-t@|2>Nps|B!)b4fGJTn&?d zkOpAdf`LCfzxH>2xa@kvdVVXpYdg$LPbTC^kw%5}Zdn|f=UM3H9s~h26<8 z*hy0m)0vug^TlT%9sh>d}eGz+O$%_ zTreu??rZ)FO1IrcezcmCIWN65m)%NWvG^sDo!aj;S^`)=CEMl5?YHl^F6@$l;wM$9 zGxKU@3b=^hK^;wjKRuN);DMqdCjLBfU-;F=6WV>;Holc+uXk(@d{L9OYp6K%({Q~%S*zY5e!GY;% z+;Eba0TeyX0bc!h>G;uAe}_kYmF`-j87@2xT$L@v_N{d|0z$B=`bQ%*=UL~9??^%q z7ayF7&sLlsW=nJ%Y6xnNW&cu(*Ci3+Y`wG_aD%)14vY+@8ZAzISzTuOZ0Fv|C*md` zB)>|nRc#;gK6()!+&gXD*bsWOFf6)s<%_Rei4P&EZHs;>M*~zco$VS zr&7~>RGUDeztai?J4Qx2mGOUR{(X@5CzV+4g(Wu<6M)^{Zv+!b+q&N$c1A;fu}6N~X(AzeFL?}?KAG7v*} zNIdt&(>{0hWGH0pz_kW-`UWut6I0{Tjomhhei+QG=O5@?U2;m$0? zBuBV35$E)zc4Wy63~|wz#btJRDGkTKI?vMN^VayiHhJ0kw2{)3H7y0b`i--oGej=I zi`jni7pPB3tj^`(e364K(J#ew3pe1tcm9djYro!@@!N#sRKYk4Gsh zCA=%xd-t${XVG=JpsjY`OZrGKmrNf9Cn%NQVkyvs*3;kjklR%~EKhEAuKvEj>q2MVQh!(81E5NI$}KMDad%0q-vlgpLlxOitGnzFC?I^y5A)k zKLK1DTCI8NW?$7YEa1MR&6hPibId4i(iW)L91X`g9BXIHj;SnG?+3+tljCtZF<0S- zHp>!K;7pG3>p*g|-MhGX71jMi6E2S|Hpz;!Z zwBtlr*kc}WJ&nQvOjybe!O@ft^PT@G6HKaFd1U#O{b zId0M6bA29m%ba_$M*{0VkPdcos)I+gzCX zVIRwrM6Rj1p}wBVLyQuy{icl$XSR1FdE#vVXMb3)(qsQpBmzdh8;k9(=Ou#Fd1v#c zbVB|*{l~;w@#D@CI&ZuYxH_u7*QtspKg{u#h@g7br$kRNNZbnYhM+= zfSD`Ke%lutjuNtNe<|XDLLtuPn?wS*GdH|q;D_> znomh8e~F)jwIs|qWcgiDMG$xy_V3_`%XHG8^)pt-kh|wz(l9TwrW}TC0iVqKN1w92 z#DvRqs_0$HE+x#)g{{!lErPIMPh&~LJ=cfIpR6}TEmqGT+Mq>(d~e2@;$b9PVF8<1 zs1&gKIByZ-t|xD2ywx%ZFY?EmI;bf=X5e{X>NU)v-9KRjC2qFnJ)z0>c_ zFZVm*i3+Dg`3EkyVM1-l>y5Nsr0891?zPvAn@p1tEkykERpi*)qqaqE+TCdNAFQyf zEsLk>efcF&jX+>2ktPK}3)4&8D68(Db4GcExt^_;iqL~UT&29B9J9sZf)OH(%c4hy zsbDHb)lpt)wyF&bDu2!<+fUC>CS#!p`cL*6S68C{v(NJA2hpql7tmvo2h>%6zW$07 zds3tLx(i)W^uNKRsJMclhwA8$^CqL&uYV1qu)t~CFIh+LWz%sRZTi#C2O>VP8o%6Z zoHp6-X7PL#h5#Q%pzi<)q|xh$43d!=EI#YUw?ER_ z?@@Q;d?q+m(b)JEx`d(~37yS&yAzT5zvtkbEHN`%-}A*XNj74wbD- zRxM?K*US=5G86Q?^y%3AP@My9^^=-rr;LKFhY>VrAcC1cDYI*BM9u2yd!m!IketgK zzme7h+n>xo_m7xu#vc3Oj?w68`Y_Uo_$6kFteit76JP)u#N1!Wwatt_P# zUP7jUDoCF66cS$)6%jgEhATXr(=XKht1A zQhhXl0uKo38PH_eEKd#_4ykRc3mh|;P1vHPrQ=%@m0PHbtD-T)J>9Q_R)&%r?pY(VGtqpG)&W@xD7qzRIwC4Cuo z4Y*Mgn4ork5ySK5xZqlWovDJCsnp&$hC|e)r55qiH(v?eDr64XqpiaOoO7FQNF|N; zgAWg9u&T<(U{S5kCKNzz9~BKTmn`m0zpjz9{^lUqbRb-U%r3d5>U_#~?gtJ#wnK8k zfHyz?v}C-Rf`=m^;AWF7KGgdX`x27n$a#O*`SRzTuR;>5;+t+s-RusiI}hWI+HM|n zSgQvB26T0|xGI_Io7M@x#KiK}t=wv7+~3}XP+e{U-T8Faiq=k6a6m5HhY4uWZKTrQ z7^Ie-``1K!y`w(D0g_ydwX)#vffXGYF^L~N$bUr?eOFl18CGn(J75strs1;}a-?XJXA^S4ep4H4mIZLa-^wz?KD}iXB&FhSso|ew=B-eN`A=Ds zqLY=%A3SRJQ`SjPVlk0FJsW0zCGg$Lo>gK!+En^a4j}~ z%&j>WeDKP%9X%)@<7K~p@w<|vJe5`W?&PMO8L9_aGEv!a)tK<-`V-hDw3C=^hBY9Q zOj3VV)#f(0p8P?09ZgLF5~}T4j_yw8ajPitd>^m55r5dVWqqb#mMh4fD zI8`z#k8XEIc_rqZ zvA^sZ%pY2wOFz8h0~VuRG>P%ekmDi0(N|hi!ZM6bMo^-R}hSHMhTT*MhH<<*fcjP}* zXd_FaLr|XLrplBxnS=wJsj>8PnDD}PBCop>^iR)l~PX(oSji7znTqEeh+LpNFI;W{%KUcS3V)n6F{8G)x)oXc+N4m6l0bB+PIWcJ7LBr-?psrUznN8AXj z8;aRGPsaA)TX|)SyY^b^b56l1sWJreMazc$Gt&zO8)k>*7RQICzC|Tv!wCsL zxxObl7~L*L>Bu%j-rhP!@_Bm>CA1g2rswLGzLFAM*55YZztJ)&5uk4PARD}9Ob9J{wHtAG5-CWH>fKHIAUubA z>k#l@#%-w2?Ko-;RAv2?c6m#0d9rR`m;ZQj)4SWP&Wy~kIz4JU;+#~Yx_%m$ zKJ3CH4``=L_(*A3<)sYvd20K5zXeBjsA{oIR@kr?tPa>(v+};wiqmCfHAX%xYtd+t zhOs5;2AG&A+R6 zK5eCFNtN~bhjx{vT+sg~xXXPRsw%F$hH(|)^$LvS%N2AzJ>;#qL_$(XOM}yTl&%PJ zYNFA#H&M$P_H@=irO{}k;kQzG|Hm(yDnpn6}05`IR z^&Q!0Sj5COuba9VOaUOqep{b>-+00|mP$Xgnpj`=5}s;c+rqY%CB=Nf{69xt<{(E) z&o=b?Qy$Nw*{WrAv?Zq^y?Dd^5&l3f!7B9$PA2AQhA!6sz`TkZr=yF`ckZ3C!3%yf zT~;T%t$N_sSQ4vIQgSM-NW92u4nT8HbDEsK;JR$vX8{>M%8{_X1hvZqz8AXk`r%YC zrDOdT%m)*}iFYS-lF3bJR}DIY34|i`4CXG-GLeb}n^G=`&2Zt8pvsyW>jsl|Y9GAV z6zjG_b;atLVSRI(o4;i%h~Bq1UwBn*v`X!)3N8g#o?IbdlwKsz`91jZyX-;5BW zMRA<(IXzEr=Kd7}qszcdg_ViN>72*hIK)4SzemT2+55Vu+9Wl%O}Jn+(`B6o*)GCJ z(`p*t%)9Jryn);^uA3z{5Z%WCm(s$&xh)qOtE`S)jUTIDGwvAU4zD?}0fvz4!gDMN zi;hgkSj5*rLRiYXKviU=7B1C{xK-!VICxmZ~`-Ygy*)J$!{b-BQ0 z&vrRqF%OC8VT(JBeZujrhC_Bh&4frsIZ@QQht| zkSMku>+dK@yM|<}qu%Dp(%)c3or-tUC8SLAtUjQ{LtdMHsx`q5F53z1I02>Q)Q)zs ze?`XnDHQ4&+xVe}ha#La;gbCZq3x*`w}f)|doi{4H>)BFp`mXaEkfsZR)4YY2ve^a_b4W0=mEP^yc9wHGX4&}}>*PY39dK*$+%gPhmJbKDsF_A8t8s)O=39P8@ z>dTr5cRq1mxqNG>HXpq0yBRBAeark-78o9VSBpI!^7fp+xaVqo@;!azL z({|@Jkf6&>gvz_wZT>`Klz~r4UpDA}1B`{2am%*~%&D#>dVtO*~ht>@8o~ki|y6%?MgRx zw^)gcKes_5aO}di+JLwuPuIyzKr^Iyo#NFYYb zRY8w(hw2VG?KMcPE4nIjD!)(aO^G1+#u6HuvkoNd2a9|@8`xOp8^EtxtAPc$%lL42 z`Ls{{1=yIFoa+Zibgj8~+p%`|yf0zNY$U(ed)}#BcRD#)bFak1SogNiG-2=3>T$t` z4Az{Z%{Q?uaM~Ss_qo}2gvqD`3}WHOuWPZkdcoyX_zzC@8plET*ea*7ew}N)W-;DW zZ7*BCu;054-or1dr8Z4ja~Xm_^rX@jo0b+YW8AQ>`{)M0gBO z4mwJ32%WDR6>W%n@9tA^Q-QZs-)G!bYk(`AG%MWYHR1k~W#=~csNZf~0soq+Jx2Hn zl~J^Hx^Q#O4h|12x1P5M12!~nb-D^|E@3g?MA8Ok;yQ`Gy;Pr5UOV3E1VH$j8`APB zs^73&5K8pW+`;jLBIM4>&R#;3-|6YEpeJ_ote# zKVaM9L^;}W*%2OfFiCi0w6l#1+E*MJ@sy&(gcdycggz+BJ^E*2&bqmSC;?=;| z!%)ctw5v+)%?gs{Lcg5hP>{7G{sJG7EA=JgwMs55^0CA_!BF@O!GTJLvuHQ|NBKgj zcjFe0#_Slow8xs9-DGx_JQ>SWc=@1-cS?I+_U#QoZDi)nIoV`xt?}tUlrs#O6tWpd z1)qnpiGcpBG-A}`sYt%PxXaucHa!NEwljR@Hd@vB5Ntzimh8oB1^I45aDXKY;iOji zr~bh_G85^)wub>1JZry+D$N62`wxc`kfnz57Hq~@xjEqp#CslnS04B`R`YzzJ@-}`b&KgC_s7A~ zH{mr|3jW;T36s5$y1Ah_t&ZUh{HzRb7G&{cf~3qIegQLAVh5-1>7QvBthF{1?9k`^ zW%;0NkKa>D3-t36X&rv`XYIB9ASJ;p+a;zzH2DLQ3p)Y~Ih z&Yh_GT;7q){BaVy6)^8VgFtolN9RmBhGo`QRl_9toQw0u-mDjG%uFqNqumanjH+fN zk-#3_!Rz&9V2PD-WqhucGpW;@N zX`aLWDP8%@cQK7j*iClPJ#=xw%-e=+UQIe45g?3}Gx=jEY4YOV{}+I|T%{>7C~TqT zK6hI{;Ma=WIGROmBq&)n7;Ga3hIkA{?14DZ?#%0`TX(SF4JIA$8fCBuJum1JgvxF= zF*lp!#w$@~pATs9`L>hut0~*vIl!#)I%& z!`j4-RsX@4>b@V)9eS07+S#RR*>aQ&u1QEN@Hj_3apGFoo zM~Jw4Ij^CeXfX$q)5>Y!mxbpaFsQVNr<}HWrC+Hn@8S`Uw(s7vgzxnhf?(L(` zStO>jnJe43+pqT1gh@W0tp(HtdG>kIy5Xjw52BwEs;8vl6CRm*wx#Q8Jsre z7-dw!YF4e6TPn4CEBBcY(fI+K^#90R0=Ih#>vfIFxk;an5p3LaMoNSszn0up zkJXclufoI1GrHiefut|@JR|nMk0+Ya5-;a@vHL;DnT5FFzo5!Tq&virtzB6T@T>G? zY3AE@KiT+L*Zb3nRRM~r`Xi__=8MlKCrAa(D*-;tWW|i3wJk4BXi-OLF1~+L7 zaee@k?IG87RXocZ-K%}LwYqfzRz+bsys@Byn$gtb1MiwWyOc2fPB^!x!~yA|-FTZ7 z?vw*_vlNaOV-;7S$0(W~H9&Fp%6>RDs}&h-w(i)sZB7MC&~%8I^W?clr~G4=s6Liu z_ME;=p;n99%AkwouL=-cw%O*A$L10$&6_9c0n*N6lPR2_`YP!iKL}U##A0Bc(lZX1_z0!j;h@9S?Yoy1(~1rdi(=%|$gF73XKhI9 z&0Au$?c3mTrAyMIM&D^ET=Iq~N%)KziMLHmy5YtMq6On=cS}$CMTfTq3w=};-UHs< zRScfHQ!CmMQH?&-WwjXlbGwA$`~$;^c-sgr3CGbGE4ikymwdcSD&`>H@5+>-x4C4~ zZS^F1a|8evkwmXzr_%At3!?o~Wx>x2O&lz9#!c-PO zYvmW=>#)j5HlMmy%{*Nw6QnJ->&tPQxGp`U=``%5w<#mNvcmJ*J{QBeXMEbs97WA; zRxF}aB~OdE?%00m`c3SO!S~{sq~T%4=yy~!I%{`YYrf_C{{-ssx;)1T)Bps2E7AHH zfA4?avWJ39TZvgaR=?iku;v_3&4@0#aPG#f{X|oHRUrJQ1zH$4b87fzoFbICJZj9* zwZ@hJFJH;EW!6>orx9CRuVutuab;Rx7>jwioM+7q_X7YCp5F4-&q>h;gLbv)U3kt8 zQ;2%AwijDuq!(R0f1r#NU9ymG9VQv0*M>S7sI~F&QL_6_nK9I$mfFEk;`9lpB}KP| zUzl|abB7rzWv+FNK3|auwNau(iteuK*(RrE8}75bZ_N60ijP>;Jt?~5*)<)1jaO49 zrUL3uJkBK&?6ju9aFHWz6_Bmwg{UKFjZ3D!423p8ATHo*{ABn|>HCoP27_}rI3DN6 zN6XbNY8so3k&aSrKlJh6j`b3!y@^E9IC^qhJvc2 z2ihOOJ^7e&Q7zOI_lc4uOvlD?h8@>-9huI?tL==}s<%Q{PA3*oZ7fFx`L&o-nqAM| z**m$bWSF~_i$)2Rg7?s5HrWIZ#qjTCn4x(#MQZl!m3QGA*Q*l`gaCljbby-;!&)-VP!aZfg0E(a6 zov+Ty5I~m_G!nYM!Z&^1!ax5%(g1*nCUw(nd!C>{sJWDg#E;oV6au@i zF{r|IAX##_+%p>?G5b55Cfat1KGlNM5UZ~bL}Zl(L&su;%@AIEtG-YGVODK?P0-t% zB9(~dyaG*tt$bPMuAS_|<*yIm+>1vg6$lVg>xX=ONaR<$Bvw;s-h&b_@<|1?ST(OQ>tdf2N5o=HrL_SpgM>kk+vIvK3zm$9Mhc{6ph zU})@7m}7YDnqM0!>r)-L%V^xvs-A%Pr=E zSpkR5yzOF?&fA>*DClqb*x&B5t_L8ESglqSIusvGC)&z#N+<)XGS_K7?CV88DsDCOyL3MWyR??h3lb#rje-Ki$}k_JBM zz71`8^6OXu%RKm(Ptuny8sVWy6SYc(8{dK$d3PO`Pd$^D;<$RQcI#lt=BU+XC%0eVYi z)XjFQh7g?Et zPY*f$>tLC=yvGDNREnv)k}3^9eZ_q6v5*vSo2F_qg|SB6y78i;S-*y2A(eboj^ry8 zS-<<*5))DMb}e_qZw=L7Z#2*YP#^f5W#o;HImZ#p?Njn1mc8TY?kv{BV&-B!IYsg$-BYZU*jhJIio{CaQ)kOON(}SH>0U z{=NRj@(Rzdnj=`~P(7$WZpYTBuD|I{pXe&Qyy~XoW=sw8>q|uopA<~ea~%Oev$N}C zS+du=)R{;PbsiqnDW`l-p-<7b5v{14C%_~@S4$gWd)A8Q`?JG^0F1ma>BMEBKLXxfZDRR^mLP{#^P!=k{4q9Ty1L7 z4gclu>tVc@rq&ttCc9ZCO0oQX6h1d0tD(ez->`8;`YN6=3}Uvhxd?xBDSeL@sI~AH=+su^fMn2WAYPI$sy0rmMoZ3Ev6); zqg8su{_$yvj%5cLo2-xHRI|*`osPt@0NDiKD-XNAlKV=x3f+%Up?-HQju8{Lr1Esx z8hSfcfdLT!mA!XlXW7+tK7*+R%1lqySF}Bl`&+RE5RXw4k~k(RVtGD3T7keZfP`&v zJYDcvt1`@RN)0c-WTjwsx8ZprJSYJtIV-i!Ifka zV4-K>`+C|pDo!^Wqf*pD?b!{_)$k-s79y#PxF)M<@T6uB14 z;8QzetM=-^U@5TpRoBPfZPx7chYp)bhjHsu_aZ0yoy`Z^dfMOap5_Cu;_o_0+rKnm za2btD0un1HQrSvJhY=<}g?AEJgi#mhEHdN!FUGXAhn~ci(fFp0j%Xg_;b?2?IGt|x zwLDCf%))5W15Q4;YjXRWP(w78PyybJ{RmS540AuG{3IefFL*LxS>o~Op#+Yf>=0SKGu zOQKX+1_x~;AYaP2)fha457-Uk)IC(4Ps_$JV896>v%x=+1N_bMzeZzsw%6<=QD=vbndQoAV$Ip=Bv5a*Hwp^x18KPv}5}6bg$(CFZ*|G;0XMGMS z6pgFcM#FlvegWxv=WX5%TCeXFF3OgmCnQ|F+`T!C-*e03Y0rxRV&{;swm5?e^0)2U zPywpJMnHhL9ySE&a1y1c$&R!q%$>3tS1Myj4p0sktb1L-?`zLP`I^W3L0d=pokjGvHOi@pC-*P9# zdu4b46xlG=Oeu|~%ieO-?@>K|6xX+#=>l;dYr#&Y=kWpgV(|qEItyBgV&+|Ql;o}! zRNvx9O-@o{{R3`6Ke7@V&jUfck*F#r%&X~<1<-v`T@U?UfZiJ4%)H&79R zZ@)iYto`jEa&wy9D7-GLUS0VMVXeKPt#=;}Uu>>CCg=ot%L{2_3*}T65)DV9izPpq z4^NSfqb)MxBrvMqzXu7yHzWaHeQt{ty{NrhS<=9)SgKecLFt2^F6SP9Qxn=ZZs(;v zzc1{~^82bHgm83XnwC!wJGlxPSur;@?YbBM5F$o~`KfZNERP18=|wZnM$j|kqpiV? z0Sh{;QS6jX`=QPs@*^nml})D8SK1g@Xo|$2Kjw^$aA(7sWDQ^w@8FFE zh^2^(NsE71FvI{zwoob=!{jx6PIuMpNiQi575-c9ngC*k0^sm4w zOLD=UNyeK|s5HI#Ipdq;-qPN7Wn!R;o_b5>Hk;@U1Dv)slwt;9Ji_-nxuXglSU;1Q zc|7?J`SMiTas+_?Yse5=SWAE)Q0ors+k6cP|GM|p?S9#y0vplujSTReNdP)t6!lJ0b*1NixI18 zucz}Wv+l3%3zqJ~_jZ<QX`U3D1kY=Yk~4n7V{It*o+!uVGnW zs_M?1WscQhn`FyO64=l3VC^<9+*y1!K7B{9K;ogU^x>b)FCfVKI^VI&5QHieds%OH zne0pnD9!9>5iR_v!{UO-4LhnE6`(k!k>EVD5*Bk42w|u=_|?2aPu4HSiTc@*D=^CC zLL5h}mWNPxbQE{_SMPnx`6t{cE$4UfA6gc&SABJ9ChyMtez1Vtu({%ptB@l{8~_6o zFF!vyZ$-=gPHct5`B&Rhjhpr3-Nj8fDM^_~&%4VSPL*}reRznF5tr4$iU`7AHzzx~ z6sQ|od9Lm^YiT@A3kUQo5d`@O`y|PFE*;F=?|Y>}kwS2xa-+i$_Z?RX7%krv;wBBh z$fbmsZ0UcJ5@e}!1Axn7LCQqsk6O|GxmPR2_Tz?}Nwz`>W*`Ni{hqEY$z6;TiCK7r z3lA5dY44I|yZwEJv^Tt1 zZ~Nb{WIQ$eH~<$7Ss^)fWhxbvq9XofrBu7@Rl8bZysGm$`!@EBBS{hEMJxIbU3)8W zSx+E5YO^@djuqRk#8VKUscyRc>vyu(wW*$Zq1gWUMzpq#AzPpP*|(4pSv6UtQ$w)Q zh{pcV1euhe=$PC7{Mp5J%McJksg4{rePj$N6{TJ!ND>==^L}4gkc8MD|Lgoi7YSe=HoFND?^pB?o(GF zQxU-AXT$Cwl4M^6ZOI%=UC{q{;k=1e!D_G%U;?YZPf9nAvo{&KljIn6wQ>I7!Zod; zb>SUc1)opRU&GqVBO=Z`69JE7?PC!U<^IwT0L5E(^V9!M>@+LNNvz{(yj~LN?F|}_F zy^$Yr?Yau4D;XoIz}I2y1q>a|Mg_eyx~8oVG@FNC0UG9&h9iJDyOPtGf1q zVv5m(D$519kYvBIis*qQ%VoBTw@30op2JqM0>9M-RPG{3-_-T|fSiU?3NfJF;lbC_ znPVimyo=c+l-W`eHEKrh&hhniuffw=g>AI6tNDS3L?teJ{I{0`+JmC-^f>~VXFS>& zpwmhU??)|Y>nrlyQ`OvL)!)gMuG*>`SLM=+pB06&x0<1`N!mdgb7SA<|9Y~LAX{<} zm9Xv_7kqi}bV=PWL?z5!TTR%o5s}1Jda@5n9HAAeVYErSYz)=8GY0@TDp1}`&b%}! zN#SzW1}j9`#H!r#dwtq(4%hYuY@5Er$ZV={n+hm7?SQM`Et$tb0OD`YU?zp$J4GoO z@6$}7T4yhD=dkqdgV)!K!@NG8HE3c>p4X)@p$EHSPvfC!KfkBU~adhKQh zi#5yluUzoE{6Z>-@LNa+cXyISbSLtJBpg*C0onMs>0T#Z;u-x~a_S1`>_(@_m4zn= z;8PWF!TyfIWGT22p0(RC-65!*wNSo@U5i1SvDCh{4xUQC52d=q)H?Pa>4J$$sLSxh zki7}ApR~0k?g(m=_zB^3Zu@^2+_Vf zPj^x^J?B>an%4{Xa_GqdbISEx#5{pbfYkNU4r)5mTGp3E>pr-rUXFHPHuCZ~?T)rIcl;dpw>ge(svcm} z4dmQ(U~=sP7jje0$J*LO^keBd-2dVuid18|GsWwHFl~W84y1=fP6@MNu!5hv1`_y_ znD%t|zB^*}Zi55^%20BDp*b&+Mh$xl;%;-lOH(8|vpK-niNUHFxhZQp9Q4Fq0vdk7 zLxPzteDc9t>Cp#2Kj4FDvp|9g=+R%{`0sanD)GTDd`ad)cAf_x9?(;7UR^fu&*6n~ z0Xm%j9R0-_|E!etpEu2C_0|9U^I5WK|Ia^a@32&y4pxD*9y*7~B5qcLO@KT^!$adW zWH^;Sa*d-5_4F7DTR95RE!URvlJA@9o&a#u=pB*zTF>%^gktC`x8msiWU(!f**FOE zd|Ar$>bGo<4`minV{eF22UF$)vN1;a7ItE*p$#raWy9K1hP;+nWWh~W4sWfnYP*)1 z#Z|b1oneA@dO&#*rmAE|2ZFXDcb9nO+u{lQjQs8VMJr$A3ukEkXIcQD8#VPMX|&~} z71v!?7xT94M%~>$e-|rRu(nCLg9Q4?+^53B`)sq^5r@{yysaX`)v|(_2;jR@@4{2l z;VN-xSkx(RbE*P9#}%Pf5CGrO^_t1z7J^152TsN4eI@z5)=2rLB8h(MrlMi{t}h@l z?pg86)3?G?;&kWIq}V{3J-C;*&V-GFQ$wA(sV3ZwAkvYnj~24t{YNK{=c25o5(OLA zV5$P+#9!U|uZ@2KoQAV5a1$kjnmrjv8>3~EIZt`)pZ7WkZh3LA657B1f*1#{$$}h^ zF%#R{JWj=}?6rtxBptMBTsp^Tl)71vI=r4XyYt!n{7pQP?AQlfX5Uork1}v<9iBX< zvsean@rvI+Vq{+T*heR584-v|7OVX*4mLD9n)G`EKI3GaQIv*wJx=Zp<K|2+OlZI@yx4q zwW*^mY@_ZDGSn`j9M{kDHCXy_4zvB=C%eWN>$16h4t;ukDv0|%pC{n?zE`yHfqGQE z>_HDr)_=t_W{g2&S6?1*gOMQcE4hNo^KNGLi#_;#n$Zc+bz8L-`6n(OTCNY-vTyPH%ig8c zAF1$#@gKtO1ft1KqabQln>+ms-UQL1ecB^Fq|7g4&D-bxYl9MzFF2YCpsTfdBj2Ba zi#cT^^zZ;ujhc-ny~66h(7|Z6%DQVek;do%08|u{YQu?uMR}dY+Z>$yNXEBANVoOF z%7-69MFf|3%U$nsC9^7|$4FY7nawTx@{NR}hT6F&=YC$(&ZDMIR=3BaFRsp~D(5^h zt?Ysy-{F?{#W4>JFrQC3n%j8#xR86wV64;L4JDR2&=gd?o9(Z(@?%e9ok8klPM1Y{f+OfQfIYckrV<@?VyflUGoF0-C}AJe~u}{?sXAFrN;_I zE8Vu^!%tLSBsU`Kfa=Om@zn%)ASd%3g!&1f1k{L0&;c@On;-OBpZhvWRAtVuYUqSY z+b;IK*?($NLp)gRqFybd2#jBiS|-tAK3!_c^$wPg>&9JSTaSlCpBClfR0zM7IC4w2S9Pd}K^ zO$$O3MHtWN^@{M#8Y9*)tUq`qiM$^?Y0@G2cSTciOpZW zuhx7q(a}chpNx93cYj|{N1T5;4?+xGu?6qOZo{Xg5M#xwX3aK{t=gIXLSL)h=pO$E zB|si{{>6S3v~$2NlY0jMlSmj`yN-3fGX1K1)aZUfn z3t%jwAb=pMO0hWK<}+Qz-b-B?!rzvi)q)@jSK^HK9PDDUKEgC~Blz^)e2PV$-pN~x zo-40Nfp~gzS+t=?f{BHKlvKOq@D^TR{O-@$T45AxB?2h?XBZ$l3>bA^?u!{**~!ER z*WrF7CJYSDRNK)d7XK)sn;k&8uwJb6<_;{D_asy;75FtEDWT$}G8of^GDi>Z8LOdF z)S+(oYFw>66bidsi;^QvD-IxKL;>$=ME%Aq8j?Tn)?0Zf-k?c4uah1cx^>z^l1`rRB~#ic0F^-=@&Kkg+s=LifE)!su-9uYyUr?Kd{_^4}- z`PG$5RPjE#H}L8~_FWXAJE(Z_uRR-KBX3dEoQF><5$^T(yEJ%IKbr5aUr3;mN)FTA zc|=TC+sPvbR~z;$5lJ?NZI9cjdf>Z~P=#xJX}Qmt;H>BEo*%@p6bkpCuyNN>Qtt&+Q+u7$L2rmhalQ$_%ds{ZoP(J0(|Uz z$0QW$9sy8QwBd6e^E@BL`E#xx70x0IF!F7g3{ZfEZEsJ1{VqMrFh(vh2RMJykiSCK zB>;ebt*lLvn}k;{#%!#AWOJc45!cSI+=xCzv4f}m!vwWQlNui5Yu$@`dN?s)3$(?z z_gi=Dv{-xx+m?fjWg(VZ7Tm6+XgYisfg_`?TM)zLT_?JmrSwyQP85Qea6EMRk-!a< z?auL5R0%xG18TRzYc<7@C>tK0%#`1LVoJuKH(K`mL zM#z|LtPkxSl~Kf@&eKW0#$)C^GrAeJy^A>7+s}>j9NjjPtqM&_W9}O~6A*Abe>-+FuPUWjFI|RchIHGBZ*)#g$Zx;N=B7xDB>y7Fqoc2NLyks;}fb;j)*< ztZUt+r$vxxmLH7G`I3mw@!c#$Hp(Q-rR5=w2<;R4tMvd~q|8ZEXJ!S|HY(=d?}LiT zIV*A}23eF(`G>+V-B}u2r)Tw&15OO}(4dsIg>+l&wqq3E83GnjGQEHX?{7zYLx?ul zFIA(8?MRivdhF(tFBVGsZReN^RFTcUtPMQqS+q(4 zba&gQua`c5LyNz)OkyLvc2KEZJr$j#?+0b#cNNxvXBeEfMKR&2! zUvcS#ZTrr8VhGsOS~elDmV3UCIZ)=Xj6AcOL~_0Z|La+{Q4;1D zirZPngY=f(V9m_UCEr zB*;a66YcJ{65u2}N6L(|=g;bjNY1REksyn?4^3@~1f{krnBk2(o_jQsf@E33 zP68zg8t#pr6_kOG!)4j`AO2%1I(}#FRplEm*gBVxQ~Q#s8f5O`XLdOv6PEqrAbK?90h zwCcC<l--?fx1ldGqYwq>@pYfaoi0Wvy^RXr z>&&N1^K8RzvTU4OHgVOY(8$%!gkBnnJKqihE;9wZNjBD=El)dF?aEuJQvXTTypD&P z751};Ty{N^ok<1{znnG`C&4y(}-WeO4n+-;*g@TilSX znwRrT!5Z$9^D6oh*~DuTw@S-m*#Ke;bq z|70qdG6Wcv;~Ym5IuFT<*w@p*um1a^$MYajFM_TxJTnmi-k(%#Z6*jtg2hO!C&oUA>86Y^*kIg}I6 zRE~TL$u~KOD=~+}9|x3;I7unWkjV-nv-ZzbALKr7zk2l2Qle6zG^&H=987Kbgdz7k zOfnE7K~^OvHrk&Y+ZFi3KjVg6R}q~}wr*ocdhk|R@JJZ~Ahw2pqKIYwM?x3tmWiVu zAqES0+-*`T4*2PT&$Hl2?PqhQFZ>v6YS$yHwZUa8Afs+N4#O*1Ppld1?!S}VjL<5_ zf?4BibUqQ;dZldD+8fgJ8|~0Q?yi`^-$r)~R`Jyct`y@A@o-I2HK;H#v6UPexbPkr z&j{1%QCt=)qt{MO7GPsDO2{xGE?r9*A;|%hlS2l&$6d2hrm|r-GfpIx8=9$xHGdW% zDSbRNB-AL3LgJq!VLm&Nan;|egb5PVHba%Aivd0pq6CoG{d*s4}>5@H!q=QY# zj5hV-fJ;+AhVnEU01$>NErpUh{62ak)oRd1SR0#Km>42rZQmXHZ&EjBCR|6wL+e*K z>ho`y+We~GNR&}8LI|#T_(Ws=+I|>0$X+zAyFk!;kAkKUL0;<4_pO22DDPm$)H+b}^Pi?yN5eLWUU z8jfnc{OWS)p9k^HkEs2xxv2em@V~?^0d(_1v8;;ume5mZa4^c%R%=#k^@k~g9B+)5 zw`5d@Q|vc@FJS)F&zID~e%fSlkQVi{FhF-0NdUOTmcpNV5eyH=hJntt2}MgIgaE)j zapENTBQ03IHjZm3_`A;#{OX_fetK9_O>6&daKxHjn!h|*g+C;nD&0+2B?|LR&5Gz# z_hh>c^sVy*{SIBH`C)>B;`6-Me|m@-IkwicKa%owvxQU7AVjP( zNp*ZohCOA~=q|o6K*Ji@T3nK@1cU3w87qhwQnZUPE0w)`j1(H%i8nvd9_an$@0EsA*>NXEJzxr?kjaqtcu>=mJ{i4&^n*D@4N?{2)TK z*O8f>omzT7N^XiVuQP0D>Tj^Aw8BoWZ=}vHlW#IXU$s6H99jOt0G(_`NMf;(Z7s+| z{4Y%Ba;pEcxa!-4T!Qtn+br7HkOw-#xBIiy^{%*<7{5+;ps05pDxKDS9W|9yNH4ayQsD)O`07gXh*WqL-LQXd)G=YH9qf6l-g0m3V-z-0TjevdP%E6S zxRjk#i&afBN8HAJm>Yz`ER`5CS!)g`MAP#Ahc~fz1IyuZf{X0&F=tYX9B}f$5sA)=_(4pCdrHGRwly0CvxFE>%|Ogm z*9b!b@S48Ves%Sl&*paPNSATo>>}FT>e1J^+Mju^&Wr>3)hS~^1V)`nUx(OxY3B9$ z4-32n;)lcaw(sLyvP2s$i6m=g&5|ax5Y1ouYmTn(MOQD(YV@&#0G7c?hh^}t)7#-7%*IJ;+-BFg zS`f?5jIyCWN#;5SsA<<}fs^(>tm^~XkBdq@sk{q4GZxF;Zd_Uqui7!<`Y=%;`oncI z2$_Jz`<3wbLit^4uK0rCtp@8VqjA>DeQ-?*od}J(4f2cQ|CTlOn?6kbO5e#%zS`2! z?8ue)HEPe!&Q^<1CZ_m8qYVX6orQpmC=ICEfR~$`36hWcBx!`h#)L z*qXiWc$TJ{tRRGBNnf;JYX7@Bv6#ueskmVCj$Zlxj){)TTskeHM|AccTS2&V9|zC1 z>4L3>&z7biqh<&Ptb1kSOSAdjSr>^!L4JRim%YDTHs9o5Ol)K$D($fstuH z?2k?i@#Onz<-|GbMRP-YNG9RMA&Hb_({k zKuFSe_M$);U!m@IJeShyWQ+a-xUY$qXVQbJP@H{Bm_aZ4}nvdelo> z*UMt7Al^r5c$Raw5&OgaZmyoVEAuW@x7KSdwTOybSQQ!4aP~XiY2z>1+=TfTS>RZ} zL*VY-Myj==NZMHlGDFCkeMuSU_?^K4pixAV;ovOD*%!=&swdZQ@pN@Pxy#FdEpF64 zp1U0JdORJVOH?v0w1g4;K8Kx%TDYpc67_W;)A{|nGLs%4JoDLZ1>Cvt!3;7ebz8w_ zmij*4$)3UZd|trDvLnu zMR6C3ENSGsReouH@sha&KdP@4$N9{DORtU&=e!d#-z5B5gI)T-Vmwv9&FTfXM673Q zOle_e%c;oE%wR7NY77SqG5q!VoS@?_8Xuy}3k3T^2TYs1mXlzbDtPV80L$UsJLs%) z#r$n=$683+>8R!Flnp50lwB@gp@ze0qt{@AahTs~toXP3opxvI4fnrYwWIaG*>hm(tm0pHADfqn437z@!~oLm z=l2Z0rjIucJ%H~~;;#Cbb51)?ofqhyJqN+-upG}+(;efUY)!`} zbmGDIW?byJD~D^%IGaQ2DiMRsmy>rLSB)D(j5Nn9kwknhL~g2pXot(Gi44B`_p}Mf z&q$+Hw8oG}E>k6|^Vm2Y{GDEh6CFu;Jc`)P2A2~Vs|vx1;Q+KeP!&Uwv%%9xhZBgo z%Jn=aJP~D~O8XT$XDE;(P*b8{>o} z$f5+wRXgWT66x=wgZLi+f^S1i`V}Q`zciz?(DI!@`@eoZB%lpI|yVgYjD!> zERFEK`j8EE@+*#8iA&DPaCfrr@u@#Vp%uKd8)P&8$&D8l|xk<--)tUKhG_T&S+bgB_D|6E?(*TI|8&H*QmdN)bE71{Lwy zv!2!P(}!e#p1}%*hI+WJ-1CW_MoYI0zo2z`k%ARE?htQe9DgLS&UD*0{g<&O2z3Rd zgYZX#f0HmFm}V8f|1AKE$tpWYBB|eun(_{O&wQ0_+!f`4wjA$|O4q`i?gd%@!{Y*; zYKon}hR`waE)0v9dy3nnV+944L6C767v`5J$K|R!ZRdo*Q(jOZ5Pj69bwTGh=?JPT z_w0TI{Yuo7h;^FJgLm=Wh+{{Gxn=V;^MW;-FW$cZ-p8kFYUbA)!#rE-5wnk^%PH@X z0(G^=@mEbVv`en)mN+OaVNGFSmc=1Q($7wu#|6|wPa2QR3Q%lk6~F)aNn;;WSO9>$ z)oqUA%@kiiVhbJlR2`4rYgJ#{dejR_a_(PbM6*o+Ywt7LG^-(i*n5E`Ot>oXVHC00 zNQ$NN+YeR%Kx|#H1qzVOZmacd4oO9~12x9bB~ci#Q_t6>gI~n>E^l^#RtheW_S=n+ zIH9tt>}owL*8iU`k0_4@R}K#!8v)#jpl}XO&Wy>mmWJd)jucprk|lnA8}G5BZ9 zL6D+gUf{-s>s$D7d_;mEr-waK_qBp}nTpmJ2M%J%ZSkUx>7|jKa_O>4kf?@Pg3I(@ zlymHZY}2&{M(vU!NV26eRO@(k!4|o*`dfasLBK-=n}R zLux$)rhD9Q;lhh$>j3E6%Gz<=V-N_}X+ol0n;OOo0OSQD|AFeRX61kcsBt1jng182 z<)8!z&S==z8eb1#?1Mgk%Vxp?+vm^hutq zFjE7%-3cbKAyldNz}DHwph{5(g-^b|NofmZk>HU{{-BWr0B+?ol3Z|~ncdv? zq(oG&29X_lXKt+)Tj*i{W&=SwK!yw5Q5IYX8Y;WP<>$kIxa0GRVr)Nn2=gCY65ZC$ z_Gy&@E}_JqHr82j+wMTMB!ntRTYEOMehX^>0RQx*?E}*>iS25(fB5mM9$99Y+LVU7 zPj8}4f7*Jb55C<_3g{kK5WLs z#l`%Z^y&92!OmkV0#0(P3Gbo-hy8!i?AI=`F1TM%lx$cTErk)0^2l4Gw{S&Md2imS za{8Y{dIv-PLESKrfSa@b;^hJ6(7!IR|Id&(lj)L_u8cQZ4n-iA+zV4%CgZvObd_J0 zx8YrLXQR7nVDv~}5hG71gh)>vwEfCzkZMb=H?3-Wz&9Y!*mbdjbfDh6b+>V02 z(947fo(`MyV@aKCsFh6`n`~~iOi>V#f*!K4IJZ^9=CwN^obsPJ?dS42h6akkY`y!~ z)NxT4^<|u6cgG*>5RR2Kd_Sp}dCnC(v-jpp($pRZ^r*ZBYY3qpx%c@-0y=QG{La-UY^*bhR?mIO_ko$Jp8 zZ0Am78oC&Nmu&9aIFt#%n^5ABR(Cs|!Hxh2G`tov`fK6>r~~2{EAV=)|Jz0%QvBd! zeApgMEN0y@Lj2&1`jCQ{jw^hdH6VS`2yu@o6tiLD(^1}QvVEK)_V)6;>uo`_p$>$* z4pX!Obxo$6Nj5PqD9~Pt2t2Q{w;+D;^vZc$cWN~p>dS!tNmCUtclxU0?UFBc;9}MD z##V}4tg4BppLd|TC=WJwxmC;sg0+37sT=f2{hut};@NRv735iCUxYEyTZdx((KI_c zPG`bNG+YtM!A1$fCEzH!x2DNwGrTf3FH`(kS<@F*;2+Lx#=7!l$e~V5SGpu`q15@O z2ceg;Q5mr(0d)j&rH1pOjoPJ`=l37%Enk(Dru&R%ZTU!qcq=o0bnyUapKVT2+}H1s z-7tUkeEw@-8m5L4%L|2e!>vKz4qKLMd6klYhfa7)w}DV;>dUlmEn}hmW`{_qU)wV+Yu$)QpT_RX zhv1Isrp^}Ibq;8`J6z?@{;{zn5=b5;QUKg<*X?vCWTxzNlJZJR2ZpBS#m#P_-+XJfqd-{ShS;)via9I7c{));Hf@RIggy9T@_^zzZy#u~(G{-TpH@_|Gi+{i zQrCnn9r-qH{!yc=2hZ}hQof<`_VNRvldW$jtpkw^Bh*)C!sgAU%6>9n+pdOFOlZ{# za?0(Y4E5n4dAOyFf!31*_^EY)w^;@pp7U?sT&`BWNalUC@=!Cw9=JL4@yv4U&iF%` z7lNwp7o$hNe(mC5<@(bvHCzJYZT{1!IVdbwM1sJpUkztqviU|P>T}eJZEH=l*LCpK zPXOt#INHPRwRl@ydQkIHs+rQrPcM`TuWl|q&i=wX*wvU=5f6fZ)DJiyay+2;`vTX2 zRz;>l?$6EnvaA%;r{xV*O#1+q_ZBXH)}0e0hkqP15svStXtyL8CxHy%KaffQ!t5t) ze;I1D2xJotS(&ig;p$4CT*au1YfpSN5|x0XOTE{27DxddBM1$ezD3_71xAauUDU~o zd-G04Vfy@hzsfvskc1=4!#1#5b|j;V{Si-EF>tE!sTq}{=QkZ$!60Jqc08LA3znF zNGZlc`YsP%d3fYexD+cx)pNOd2X&jO|bNGG9J0*YM+{dGuwrclLk1t1xxGyL}zEYg*ch# zhF@_Vmqlsv_xy)1Hw~NDbbG61&wswgQbvKO;n1ZfjW>v^23Z0i-y-GmHgpzzUVqEF z5#tTZ@e3M$E9|Hw6bAr&&rP~q41SV(Ce{~?XcL2_=7o8?U&@zDds{%+I4hw)T7qko z@+XHV$?Jibvj<|6Z2o~FRk43@plnVGU#Wi_ru;fA`~UF*sC+m>&At3+%$Ao<@N=l| z`Srx7$u)~*lyUyM`&g1b3moJ11=Dii-4R7gPc~w`20mv`-jQoI{7MtHENjC(#iAUW!9L=_w9#jTwLz(KVAK8}>Fbg#q*Y+ZUuS)IVP_ zwRls<2B zy1@OuX53Ml)MDe{4$AISCNv27dHN+UXqM1&!Wrq{V&+%dm^&W*eskaV>wg)a9s(kQ zIu>pJaA-DL*9*e?>LqHz;186_Zl(-y%r1M&0u7OAM;S3gs=grAL)ogk#5`ztKr`)gr@lES>k*d{BGTTo! zu8!khdCMMDtIkLY_`yQQO=%MtF0E&c{~TU5tJLN7Hh)S85kgDvPF*Qfa%ILNGysW3e~B^v-#zFD}UiLE8ZBYScZI9G38d2U!TJ1 znY){d(s@ZY0zNhV6=itInv1O>IY1SLkPa5v5IO7-fuRY?>BjYD8qycDrVvThy54L& zEZ;@HA+JPgq2wr z#Imq$W85o3j845Suj($D&$#CopRBg}O9pQEtyBJfx!eP6Z7t=sIk#UA-I?-#w{EoD z|H>%u{{Y${T=EPDY4u}BWPs}>(-er%4aIU&EDG%vp<70lS;5Qer@~rLR9T^u>D+BSnQ*kUcM( z5cp2C>3KDC56Vuwt0+DSmO?pJI2pxK=QELvxA$0M;eBol#jx1}-i-4vwocC1gFd9tr}Y$45;FgFv^OhO194OmXYE z8zB*iyORj4Fbba`59eYsUCg$DEs1qCO!emV?K;g}{?xxit&=Mi{AFLSsLcc?;AC=8 z_xEq{zu@9--gxXRZ#PgniYIl$FLKtJF;S%QuQki;0+G58@Nfuj`wq?(Wz@b;rQ8V> z%*BHKF#HQ#?mc`umS_{6oxWFQPS`v}Z5ZVN zs~{Id@OY?#H;yh({VtP-FTSRmhgZjG!Bd9tHJNh>Ro^*mkZWhOjq1S}0SGUg^X|Gr z)it{9{m%rP+~4w(MRJ z?nH^f`YX(?sK5T}+5fdy7<{!AGqzP=A~`lI zmj?UgR(a&t4w67_G@{P2i?LaCG@)S+G$zioD#HI*(OS2+X@~-$qqr4_w)FGCsS`7InDGyI zW)Xq^Ar>JGp_w3B+F zCl$L#=Y`$>L1XAqNWoyj*H%U-i^JXB)4!&k!%%9W{&#A2wVNIq)}y}%1rPAe0^Q2j zKVLb~TneFjIveM_hYJPYZ({YpfE}G8r!ewDeVOh|jLc(}(sw21kOKbwFBT4l4=4l; z494Bp+;&QWqw|-HzcN0mHP#ZtN=M=AM8aLHiHaHPsB?dkxGai)lvZ7Zh9UAS2)P{& zp}*|@tw;_TEmw0$tu^@gDYLvOo9n)2bop{z7$-3K)aiNd;)(EdU+PT}N$J&3Jsdk9 z_p<{hEY?f|y)Aun!gwK3C+@D3{7nJvRT^j9bzXyzj8uX0q@U*M=nTPsold<;|p3iV#1p@JJ1#-F4 z0Vhtikr&^+_W#xPmQithTl#+!0)*fW!9oOrCj^Ja3BlbhxVzI>f;&NiyL)hVcXxMd z92$pzc|X}g3U|q)Y-QHb-^wxnt@b{dF45#=`7lgi)jrG6Q+m6CC?T!pbymR< zJ%G?~w;GI*2DD#@CV_$mum8lA3CmHZ;wD8YxCic}k!S;fbLI&B8xOS2N zf!YUVx8>Z$%H&d$t|)2YXL=sRuLCIyf=-~O+_!SQWz!2Pm1UQI$B-fOZ{dL0;9#O# zqSi@WiYUirhlT*MoDaU5DgYy?2Za}8{Wln*PXe`k&b20rs>75P-t1gq^<`%bmiL;S zxy85XL;MtuQfwLjT&IlcHhj5(Ncp7)?xso82anf`>g-5Z10(wY7=FdX>ZAj_LE0c2T1!H$?PzzlJDszZp zeDmi}lbni&xPtCiGQU(QF|~CFjggm2j=s(SDxh?i3OtRinA)!23n?206lMtpTgK4Q zoP`oJ9Mgl(osnujA^WFt3=ba6h3ChfFlDRwr0p2vaaUf%vnnzWD0;;%`!%-Wg2z66 za8v2=2uoD(TRc3l2J1uh`Dy0HeBWgvyjR9?^-kJh(j3T!uX?_=rv=~*wU&=U1V68!IB^r`Cc>k+%6Y?WiK z?FAL-sY`QxeW8FA1dN^l-fqU`L|UX8%D;uupZ!I4``!7UFBwkz#Z34;PGZ){cnxu9;0x5#PpeCgaMAwvxm9r%umZo*lhOOz2kT?+}7d(f<^4cZ4SP+^gT~ z?N*vj5kHo;&epFqDKcpuSCI9a>FDnTq4>qBKQD$4QhSDWus~kM^V{ofIzlt7-&zyt ze!<2+V`ahxtgE}wm1P<7sl6@f?p6u}fxgmoC+t?Nk56s@QTnV^LFvCm>3I6pGX%iy zzrwZB?h4pWbmN?CLDtsLF-@*z=gr&CWSOiBRkkGff5q(RW^uz^BQ26Mnb0L1^6F}D zZf&XbAV9R436{CBp86FV@zOZ1^f(^KQD}?=#QV-=O1X9H`FC&i^%3_Q%cisnsyb1>@n`SfVCkK4Iv+ zXj%p@upCeK7UYn2_02gl>r7sBXBYN}x3B`7`ot6aqjd1M@#98D_SfoVry;g-E%+Bd zzdncB!jy|q&9JvpkUBPW%>q2C=MdA*kCe%ehCW4AJmKOthtt4mnvuqDzEG(HXv8H( zl92E0y{vtp`CqCI&Xt*`JDV3ycT_dM9j5KbtdqPAG*_Komo390(KY=1}(KzRKX{+ts z)G}Nni)RF~2!ClO5GHo7XKgCL1NsJ&hTvAAmY6~eUaU;7!TT8GG#XmUH%X%M6~I{& z9Cz9S>UU(?m0SZR0l~$A_}O&RJEQYYRZc5#lA0^e;s{&bo! ztHK}Cy0T_A>cv572c`!_)k+q{^Yzy12(q$pcyoRfOhQ;P`gyN)5PO|lc(Eg5FrzMJ zDpN+Ze{NX9R@ja}v}iim6u0 z79!&7%0^o53El`Cvjk=2&Bv)OEj%S3k!!wf6m8_W>U_qA;s6@m-E01^dAdO5lMvMY zqshV^#MkBgt@Zn4!8Ve{D<-zts)dvR>5QJ=og2#8(5Jzv07MXHHIG?Iqp?f3FX>Sx z4FmUSr%w<1`M%70ymqeb@>cgv^*diG9!NQ6s>)i2RM4GFRhgY@<-&3Riq#`|MUOjH zz3KXETKBaPF8W}8c{XiY)95B*YWYrD&O9&ws!o0B)-YsP z@+5d_?b|_!n!cDBmYm=Ae+8`9e8#d(@u7RwHu=uSST#(;ma{ZH+gF{W8TJ+e76?e# zTW$5;qd&M|*}T8qx=x4rF$;VhF`%O+=*}vb@JPP=9{7sFXW0x|Ur7JyM0k!Gj!C{0y7=W%}0Qb9>0uxlj%9Ol3m(Ha~jZ@H}*T_}^K z+D=@$aV+Y&RRE3!DB{$(UJME*?d7V>6=6QzdVl_c)Pu4!WjQ$i$#Q)*JaStAH zJjhP_yoV}RE}AxBR}Sc%p)`J|i%z=lFAw5yjTQ*wx>rlT@77y)PnS1EY|qdW&GzwY zO$t*32NygGdG4T9{T_DJ$&D_@M=^()v>vHkZryX(9t59y`2=7e;D$O6gAXa#j=Fok zKvUBnXRyPiAKe(v`_-@=&kRGuF`gGej?C%I2!=Y1qRMPG76fD+Ex+M7WYLf<01Hnos>Sql=9(X6wmBX?A>&Y|x1h zvAbg{-nZRTre<8?m}hslPxfg&NwaEVx-i@{YNtd!|G9U>z~cTsOjT2WH}3n@ef`a6 znR8?#aRP4=HKWpEKO{2?K095Z7+p@&rl{b_Y)g{fID0ysmS>ds7R8b<(~SFz-iXZX zm-7pl0>}PRC5kAoW-s|Z11~SUFftn7eZ(&t)!T240IcRm3ijZON z*uY=l*-fRG7L-^#d)q*^|C$;S)LL4)>4iQaAhMs`WYAS;#KwmK$qdW&d>NK52~&HB zSO4j<*E;7Gn!Z6=i8n5rm`nnk4h&K~@a#3bvU&vLH3^ycLuEx#%oUMk$7-(B+%9y` zq@cl=`(pmjx)@Q^8#t0VJO|!=eg&#Ib9qnraaVE1=o2%Yk%+<=5u(gZV@d(Xg0P<0a>!i|NP>dmFviNxJvFTg+)UY{0l8@O{VkqH26Cn zR>hO!6xf_R%eiV3*Jk^rPv`FEHu%&-6cSeOrtcdVlFz88>Vh1-GfssEG1N7oXIb+?R7nmci1a8t};b{aZr`+MJ)& zll~I>Q`Am)W7`T}l3bQx?}Rm+=e<1w+#`PBx|jw_&y|>@k`G7Ss9i4bnNE1#Iw+&Dk&NdHSJEgN-LrK+-&NZ)Lay_=&u27j3pVnGkcS}2WMw+p5XIi zX6FmG_1t;oRDZl~iQ;*|OFN&$fqoYy`g&s;N#~5$8{3IL#ujJm(dmx`WQOK>?gjynG*qoQTA$Cz>0L9AHF+LA*;lOv;x zok1k}tkqz&8R?*u(^be)IzJxrd}%WqO!-uGWilJD=LE=2YMr(fRbODlyUP^2Y{pqv_h=U1EDSaMyqTcf9> z^kouJ1QcrlL@Qh6p4`ZDX1<9nGk9so;nG>RqOHXY49;pYaua=sU2LZZVkjy2jd{@r zB1i(esff73vW?{@uhnJt8g}iAc8Rc+#J=dDrDlBKcYx|r8l$2&M<+zNQak@fRAC6j zNqNtz@3AA0raao0l@vm};J*89l^mw#{2R|&tU_M3Xto3AqILAxnIt#{w(CHE#Pe7v z!wa8wXWsJjXOult9zlc$uYTb0+_j?gguF`M%rpmx-ILs<xfvH<=Zf~eZvhhiPAg{Yj|W5Sd|%%EL9hEzGKG! zJ?m9;f}$KF9hW3toe7;%`&`|28jq=`#6gaea%x?VJOOIYasAK5vv{ACv`)h1%6jevRmsDchXVVSP!qlC=GSCs*QdJ-L%kBvX?pHYV9%%f4ZFq>Ou0Rz6=iw z4(Dm1ZT*Sp+Dw(*;GUvhTgq$iQPbo@2{XVjcFN@v*j+&aK0c;;h7MM|k0MuAWHrSj z;Ih^s0a6gNYdzyjmwQ-cPW}hF*@Z7iAcYVlM3S$?raX--`j)@VYQV9nXI)|NK_`WB zFwbpMA$Sm}dMb;uR%O!5-yQA>UA|OWJ8P%3y44bQqb2_MKt&HWN0_DqWsMXJ-cGQa zGZ8&A_bo#s7EAERa)X|~i$iT(|Gk>?5Fh*IsL4db2KZ^Js>2A|!#h$p;`=q!M%$+( zfxTBu+j*n%S65noVkusq%_~M1D$l#IEyk(!3aW2C&mr>;1X|UUx9H0QVMfzm=dga6 z`Yh}DWSr<=0W9>@WsPJNXgwGq*C7`Qz$7&b-PHf!5{H3KA>Pe;&j${*{w0Obl6~tW zJyw^MGUh5bvMTeBe9;$UeJ_Dy*HNwr-FxQ zzggQ|DDGmV{V$oO-}zuBG(YTiiI7h>ABwucmPwx!I}6Gl;+GU&PzX)9 zrLA1Ci6_`~(sNg=>p4;3I0ytBMSpeS!30^ONCQ=(^-2VR`(|vNR~eD5o&(47Aw-uO z+{ZkI^*}B7p=;~ulv&$m5^&&1$~B?Bd{m{u84~LfFi}E7dB;911m|3CM2#6ZID0-3 zIuxawah7}nn^j|v_TveMY#4ZdQ6z5&BB+%Hz+OL_g!9py5ql=dkE&&($=CUAp80J z(!Aevu)R1@dhu%FGY(+ktj+eJbcx@14$%;}Y{VS&2~<#g(Bc&!rOI>Typd zp(Bjve8|R>mfGeRbvA4~th!IAmo2}X z-OhIG$Uw~7zP|Wsw-(&C4@XS_@Q*YEzrrsbwgO4S1p|lvOyz-pMkPcV{gIxThIMWD zrV5_SB9Y0~KtS^#mYIhK^Y>9*MH!U*%cLT~PcDl9;}l^Q7Pp`7JqE2<0(r$@IB}=q z;TUEq!7?&ftg^&K-V?q!BDUi{iusIeuNSsBpJfyRYx#ra<1(Br!;HIU&;Ty<|bEXkZL7@mv>|SZYb^u-@9&7*pRXDJy|uac&#hxYjIk$ zYU}MfBORg_-`|R%?|!7W&(hM;5Rg7Br?NmjmXcAnzZ!jf=ONALWH&}luZUMM{S+~D zv~mwA5c4?a+GDn*G5KChLqT&3(*pKbsI@90h0pwfb=*#@Z}C#na8ipwBoUgI`b{G? zD&a-JK?UKfUlnf)c2VK;+_d^|s(0gw&mxsCrkN9*%N1gEKvP|KOur}!(i^i7QRC|0 z_qaifIg52qA#r=ikjTKSliHCmB*dks&0Qr7UqueBwa4#1qm>d!p79^gWbVfTzc2$N z$uYT%_VJxiabnZ4Rt#QE#x!h7X4Gic%Y9BfW^@w5F3oFIv7xoAQ3w+*+S@COxeL-z z@;FFmR2E4#lCCFQmK3~wr7oagA5Uf$l&c-Ea7cOop32$Xp>Ghy8y$K#s{q%+;lmlO zF*Rs(c@%*%gM*zp@*Kmhn$ddpB7(s75aUJq72kr|`+veqDyt_c1qaEvU#K`>mhq#9 zw7=u!@#q_LDi`X^o19nM%2x|1emD;9pL6ggzN+sFQeN&yHv}2m(YzgQw1h;Ce`?I> zxih3=yuQEwwPv3$cobRCbzz4wp<1SlPRHC({jHzPNo@u6o&v%&N_HU#^{&k7l~;G` zca*%lx7Lb!#yU@3KeKjmG3gV)>y4CEa-+Yd{oX?CWht}y&#Uz!WUBTV*hFOMTjaSY zk`Atk-6p8(7qzqGVBTS$Ot?p`iTw{Ct<(6r@3LC{9xxWARS@@gp}xO+0fcvcnsFkQ z+?jUIVhQ?%B)oE4?@oEFQm#p+j^2>t1>XG=N4g|!vZ{iXoX0pYhN*hVgpJAo5nt+Z zxxzQeXmZjxS~+%HHmjP?quj%>!CC{Ns%@ZpZG{721g;h(`qgM~-*Eq5V;Ith)zm_1 zn?UwanYA?MgR{HkJGfY|w0dz8^|l&IX~9R4+Pb17qp6JGob`@E_Ty#ISCb-1bn?AZ zqL0CE1>q3MY1T#iQv@cvhu9D@>&l85p^x-41qfCp660`pq4D$tj)_4G!)3B;sgBTm zT>_v)KvRisgtQ(EA8C}39QMWep>~6e-)=5tVe-PAJCBilDnK*_BQpb=cbefOy+qeGw>-i z_5;i9LmZce`+>vVUH-XGV}0BGDKRmQd*UcE#10?#+D?(dxVDj!vbYE`+EcHACDN7l z9M^i5#3dyrvij}!RSRiJ-`r>Zk}WLvD))gP;x6=-n&FgALfp|9uOB)4oMZ@`f=;>m zY{ytcvcx2C8r5CgjodwAlKkMnz@#ghLw*E8E<({MjMAK}k1f9|tD~L=SYc1b;%yV) zSDZGIrhBph+p(6aa|C_fg{D2H(gi9q?fcB%e$ukhX`GIqaF`L1w8e%*n{c^R;OSyJ zkw>tJnOf{9Mrn8T<)pep4|(iAo?t-e8!HrBsfVNt%5*WLN8trqHf-pW<}+tBlh9AK zeQ$|COGbC0AxT+L{7qgLcNF6G!`-E>2`TMj&DFzjtv7eQcl2wbuPGXp7+q_%mbqFF zj;wC&kGfYzv|DhC@KKV$9Ps>u6rPclYhhNe|g%QFr7bzKOEGCYJbBwYR=wK_b z3RjrrABgvu*4F+V1q53+0-5WUw&h0s>H6S{gn-889BC@YIN!kCuAOB=>}e2sKLn}v zy7#8ElpaOp^gJKn-Rs@+hG!%uX*&?P`h}f3-@9AE9j66xV_j{S*LDvFrofXLpDeB- z@#jLw9k$fJqF6`vhW{7EE|B9b{1?rW$W!0{E-wg0n!9YRDcehKDbC6Mdw8&K%11uU z!nF!mXQKzC7dN%Zsk<~_j_FXaflLiBNi5CIIZeeca*p|9Ympjz-#8S62p%pHq!&`6 zmyZ)HlNB3$GTz@d6K+pcgMKVUC$Kj-{we@_*ZDf?774;byr*9v2$cIGx;Rs_W{+7q z-AQ>SR(0Kr0FC=5-Eh<%Wt?%m+5~kr42>q$$^Wto{_^1^IX*wYu+ap=aAE&*r2RYp zz5;orG2N_=Bf}Hg;|T=R?|J2`zX@6y7V34uE;oKIABCA=z+v0B*XZ{P-_quKJ{)=p zv=7ShhR4XMDcYHdI6sKp@Si6he?b9`Ez8m%j4F`2bC^b=H1)xBEiF4JsqSp`bkXau zAMpTJAyaf7O4LP5{|H7YWt~rzQmc5_PJcF?{ha^il4IKkGxsn&yDx?Nm7ODoJeAO$ zC!3j&=Nf*B>SR8`i0glKz0p)>*c4VNUDaa);gRU zljgt0iuNBlSF;xNRK50w`aF=i@JgI4#n5vHA-^izZ>YO1U8ovWx|OD+;0A$m(Xg%( zsk%|M^-CEZ{JK{0Qqk1njC+h9*nlq-{J-<;^lkV*D4LB6R#js`>a_5~`&`utyA7Wh z=_*IZtu}rhI2{XATVj0|QnFC=6k2T(jYTPeM<#ueoNU)(wd*)4IxrVg5H2a|qUGQX z-D}*2U5=o&naj!yJ7>AL6$7v<%W3n z{3VqfY^qhIeb>ZbmN$cE@#_A;Wpa6uzFeIEc<4;n3v8yCScoJu?!2wg5XL!|J%| zdbi^hhX@x35torURuQ;S^k zxqjbseHlYCs-w31wW-mWs#E`%KIk76&aK^me-Jkp7-uQdMJXf8b7$Q#C@#o$tsDQ3 zI8}Ps}LL@s!oKR9_1$U0lrey9+bWwp@__;I(jIb3q#>378FJDk%w`4iy(bLFiBpCK=I7LniWIn219 za)B?@N%aE?N{}Zabux0N*J_LZ4x3bXa*lnXTJvVT%LG8QtMV(UVaHp^Oyl2}bL4N# zIr*YOS`PszsdzPE;PeoxhA^k<{6}0Q@qHp;>Ks!1JabeC=!y@`)lmj6x^0Q-wD;lj zP_a7==zceL`IS@x*>Pf7nCNB6EnR4BzbPf5BA@>Sb^(>b}9X5tXtOcM3qP(xlWT>%1o zXpP?5{aK#k1IuL()J0~&HaV1muex#<7U8x7ySK=-P?verEs0Mv1ulxd z9PL8}Tik{Cn#?hj+_Di@&(PYOwcIrQ@$;?alss^bpXUtT{*3!LaG)J}yF#4^w%PfSv>Y~Xq5`>?SxzR5Ut#YThEMy%OH-!? z0TPLbc?oDYqbk;?`yKPtWUipba-Rj^tkN3687dW_iO%7&4vT&@biLaMyRlP)MmVJc zP*uG0XX>bYLCFRlU%7O3n4{^FNPfpglNdw>t{eReQ_8gQg{nIj#4&UChu+X3haf== zJqm^LVTHU2y_@W0Z%MGZ;u2kfsG{Ar@jw`CBRB1W zxJc}DyG`kY1k^FiX_v1MdA@wkj_*0UBGllKsreopG+Tx#+@KJ3=b^WF4p&#+2wMv>Rc(0u9fyj3?=kfL})tZ+sa;wp|j_6J-5fb-aUs?+dW2UAFzA^s| zTWFUUJJ$@!q{v~oPMKVV*#1^iXnclHt_S_qN49@L$)No_3)m}L*hmUq}Y2*6mBTo?zPWjl~=}x z0#-w%E7<#_qZMvT^;C%N_e5V#knx4RHtyN=;i$g6tjh12P;^ipAqFaYgxZvwvsv%r zw;4+a1MimIg3~g33yr~(QOOr(&}iAEs@<0G!1Y_MMG-{}=XD3N6faU7sL70`a~S^x zX=5zuNRjNQO`Usw&N~BXou;8i+saB@@J+bKWWtv*_H5aL7;ch zg>UYm8oqaqWwX+|ja?_mlS2M4qZI($Mj;_GxIA<}{?Hy{5DkFOk7nTQ&~j$6Xp9Oos_L`5GZyJ9Vx0|4Zsa!|+lkIbBjv{){H@qByZ? z=k`u_Ix7N^@rjCHJfVM=R*}9KP;tHIjXhlxOUzqbKB(1r|EIk78Ibq3VhAywz#_U< z$07%LT`}$VxGcL5+5>g%5>MBPz-3!!bT$+`ki~ZSbMRf<0WeErRU~BB;_bxWNx&>w z9z*66QENL_^PY$QAY#T_@|nZlxqH8$co#+k0@bje>H0lhPqkR=ndLt)+0JBG^|(%E z>zmt6J^Pz@8KRtQ=x%ufH6Fbn$~5^E&x#8X)`4ACh>#2!&xl!D)puSW-nhlXwv>@)l-K))lqELfAWUYZDJQ#2 zZMLq59^bp{2mSSq%-Nb)G;)ZLCu}-GQ*bbC-9#!TtT7Yimi!5;d0H3;ja*|VO%2cP3j499Z6VR^oW#VgWvq_*wUQv+GQM#_^Y&a>O_x_&zb}qM& za&uO2*w042Y&+^A1{dw_zq*~awDXY6k2KQ91Lei~H(nr9sBOdNx_ed#G21foK+f!A zDqEFeF!9_WrGL@@_wEzza-nPT5-%wBzqq@5GxBWNPvf*SN5XSTppfRr5*%0NN>$7h zB>xjd?!Cxcq=&B(-q?fS&@6{7$}4^{d%&23J`T0ya#I}w9eYxno@%TxDK50f(iP4J z+NUk0jD}a3`a%#}i`}%45m8;IE>%&{2IjC7?fu{scuF^s)h`CoT>5BT&0a}zs}>A= z^x9v4Ni={tC8H$=k0`Mesn-lMwd-ODIHN`QL4W+*n81fW4`D@gsL=d`D zI17T+Kt4Lw}Y>SSJ@B&0y0@M&yYYS||-lG2Ws2r;ro+SK~g@AFXnEt@~6o0SU}5P-1}*0(?VZRn}N*-=#d}VzYGmNO`cPh zS-B}&l#2jKg44ot@|;{sp?e~zL0)^DOLJ}E)?8L6BN|aA95{BVH21?%a6}-Oj;95d zAWiLIK>ukj^MlwRTL7j2X(gv9mV!_z5%|$5v)JiLZS_O_%9kN86A)W)f&HRWCm7Ld zeph?;TkA&|yz0qlg^1=hHgW`b<}43QWP1_8PzFyDajA64_+=~W8N(jv%fW<^c?LTqCh zzu_=AZs8Jwog%&)_tS$T#YoIt6wODI>Ro| zFeH4g_-wTASv)r{4!qHO$dZR;G6?pWVnV{BSQelqq~M;y3^<9?wV#&mA}WHahCg=;I7_)pqeTwJDiQ@=eo(w(Z^pbRBCKZ?os)mQTv&;O z_YIonMNX(`{2^duV`9*}0Y!dN!I=ig0H}9_ccs4gEjwt0WfIi{vv~F$#BLtOE$!Fz z{+rPfBcdLX9t$Wry+OXbLWn7YmHk<*d9aJKOIVBm(*naEux0Blq^2HZq$@sr2e+R1 z{L;Yp#=ZiGMorcmFRH70LR)s;E}Yg=+swLtQ^=#9M25dHESsKLR5r z;t-|sc7@x!5>)ceE8aJ8eMx&W&xr)uIevRw<3sfwSYBkuk9-J4Mh|3B5AQa*!)y@K z*!J9d)DeB6+^(^kIqvLPLY?ET zqw+FY4SmTYEGy~TlS}9gJGs;e9J&i4Ar5`3!4iKTlr}cuF5A0NqnwiXpIxLKKvN{d z2FC@v8|s$Afsp%x=!ew0S7RW)o@t|@7y|m(li4+%g7tPBX>(`Xnv3onQFqiApu7as zq#HZkY$(PE^ETglaaPMCoa_!Bk02N9#{?8Rrg`Xk)!XLbN$YC5D)d^$P(>)wsBI2g2)2#7fd%s9e@3BEd1(^?t)v(lE(u_`bvwL@qev3|7f8e_DuO3 z?a%KUjaPp!2vi5VZc_|j$QPL_1$aKR5oLOBm_5b62Hxa{D|^*~skjNp$7-{oM$ref zdDh8X=u+Ysc=akd70t%}RMq9=4$f-W9F2C%Y0hAy!^+&`m-xt2U1x_}B4&mur|U>2 zC1^EfX_325+UXUAQ1Bl9g@_?Nf+WYvrBapV)9*uLE4^+`V#OYEG8VWOG&qjZPAsxi zKA090&CQ9}1@BWAP-v_etCjQ+g`0um{V)1UM_851G+y6IpE8qp!^mdqnNBj7T`eA^ z6qo00HtR0WIud!vcG*uhB>jJ>gSi^dEe&SgqACn^=!rLxq5|%hd3^H0*<_9PZt{9< zzNJ00d%%M(4c{FEBWQ*@%m_)1Wux6xC|pw{kh!lU8kTMHASKvVv*#tagjXw4CAe?j z-*@PX<7FJ32+xZ1fY_{*wbjD&xu&I$^s9O|C*sbiJ_Ek8cJcI=wS-y?mmq~4e zdD6bIU|aIgxVX20_Qf3CDiFMnHT-D0pnZV$l*IE?D#ufLJxGw6!mQ?LlB?`QBtNEN zO;w!2Gr6FI11Ot^-Kb-Q;!P6@TiCIS1_)%_Klk}C_;UhEsXi}bf5X#^p*G5!Yewy5z84hlb7&5T z>taS|`A6m=VjDBAdN-QTsg89C`5bJ7-+GlWAJ^R6j?hOPPJ&vO%@V+T8{T?Ia=WQt z9@))4kftf094C$}6yJU_4G-U`g)4})=Ms>gGoAg>sUmOqu96ecA>AVfM*A$|oJ$WtuuOS)Li z#j`$;BxvCn{k!MC$M6*dt+%8#z|^&t^uc7TuVEb>l=9?#-! zPE2-P^KHWvr+m%YwrTOxs7$onbJn9Kd!MI#Q_kNlaE!c7J_{DD?055$GRg+@36-rzI{Zf1l1rA%*%$zPvXbNKbTekW0SHU+}P5|E)X^(Hp+gGLD5PLpXHy1XzwU=gaU|HHe z>~Zl0wq}&zSHhW|Z&p}nyjw64knoF>d5R_IEX&HGgdUY&QA9aZTYW(Y4zyc4oVs&% z4^dA1zQLp3sN<rDaBX z;aG`T(0*fY0T5_^WC#))2yf_2sWe$EY{5R0&(w+wd1YAe5=7IDgU;(3gH_!%BFu6( z`z%$YpO95*VBhKjQ3RT|}kmQ;+!ReL_sN3^u^Jh2{kk;RSK;*P4qpg(a*giE6U z)(8Ftb8L`+7YGyd`uT^KK8HRL)h&2JR&OuX7UZ>fgXTX&K0MrXDMlq}wgf#$udIe+ z3iOL_%N3ilZVKwPbrfRwUU8oHV_OpZY+goJW3rU?=7@m1xgrZ@=s)Y1o1j%JzKaK% zpEixWzJ#LWO5?*&l2{hrA7AeUbG#zIo~^mavfPA%?e=~NvZ*NN>}RJ2(;c13YJ2qE zFg~E~dDwArs_ZzmFb+FBpd)`*2ZZtxsgOZ}M*-u(aIunUvQ-`W8@AveotCzo#*5#T zFt)b*-7DM49o{f{Oz_E61o@rM%21h;Q_Qw9CJQ}_agn9*IlOm{=!{SIt1V*4FN zRnI_&-^G8`Hgeo<6+~=`U|1cYt0kJQ5^#@6WDlS@N=?`($#^<`9i8ckO2O6FT4A+v z_iKwTQOv2Nq4 zpyA5tXsip_Lhs1qsg<^=I{B_!Z_*EC!BsOE{J>{9V-Rq+ZR08?rlw@$e$HDV&Vh>w zA3kLlhrsJmc4|AHSvf%5a$VMHdqjib&CuOeg;b()EPL+15<)H@f0ja^zXUG3Ss2CU z@j5~Z=BaUmO%SYTNeqlgH~fAKPEA$!Xm&nXK$+U#CP&}XvejAXnrx38kBw>nX-=;$ zZB#Xgvr}>Kf#0J0Cq2Oy-PC1KaWLCG*3YE#qFOJ9n~{SqH7V~wJnLi-h0kb1rrL}C zk$U7@J`-=!qluD>v4>1Ez}uIXlkxkT6)`DD<`WsA_3J7zKGAdvy=1g9jhJqel~Dz^ zL-m#AB@k$G$bXn8PKkbAtHZmBmCue#sXg~Po9fGGD=aYtQD;yQIgtvZauR4?H~NvB zBx9KqzvMV4RYJ+t+7C+EQeP3CarwOphPp4d7mWn|h*blm=LZUF{(1lG4%T3l zB>TXB>CHhl11F;IQmz6czf;xW#k2$<4omweB|M;qH` z+j*yr$S;nI;e}=@ui^}V6R4QyU454dda!M+SMV74r_Lju4pY|G47opb{asDt7`_%| z{!umrLw34mklAD$mXy6wQC!{wA_SpRV@7dv;K8M_5+HoT=%Aa=M6 zfekJ4l~7~nlpc=OpDI;OJfGiil0U*C<=XOvfxE0`_KW_A>;nBr1Z{w{3BWL5G6u}W h`G6zizrUP+f^*VuTSt(*o&f(diJ}%@BQan3yZmL_gvR`o#i->^O!JA4J9IcI(!HOLZqS$*MdOMCcyVg zTx{@)Ryl1W1ajX{1um=Ym9qQuwaL?$%~A)O7Pe#Tk7p_BM+1JV%|zHKz!?&ujp<85 z?dj3KYL{w%j;6UPSt;&M(l))>_u8L#rlPhaC52j;%xjvcW*{lvPio=k1dG~_yY-Fb zJ|5bSQ4NvPQhnI>Q02?8@q5>}qup1ZIrhC5j+z5F!(m7y@>fOP8s9r4l9Nh6GYpAj zZIxkz!Qvno2}qk>1a9Mu!z`HE3GTB9n6y#w z^_9{{c>taM`2PL7?rz1#qoGIB&2JkhLhr(0S`2;qc>#AY-%7BtVVy4eNi)6q#S$MM zuk8K+27@WYuF(}}d=2bae)rQsNks*3W8hQZ?p%%eN@rMJadGe++=I(&f#gBiG~*X9 z2wE2T6}SYutnasPxqUYbfw6ILWYyI@-xH$fGb>LoFGqmi!uFF#;^N|#T&eC4PKJkS?%i^#MksO7> zmqdqy`JirBa)#Snxz)Nlp8&F4rlS1(Wqu*t4}C!&KT_58YqD0i%c(tntROERIRBC} zZpKdW>C+G3H!|sle%l?TU-M*7 z3yI{5u-M5UJ61H!AupxWL4)Ybi&7}^wyZ)CZYy7@q0m^&X*ss%gS~<66-6@D4itwb# zqlhC@Jaf?&genkVa|g4Y;x4k6BEL%Ix6(J*9SA=UQ(k`XJ+Yo`6L)bEXB%O8nm(}C zW@tYCzCgEM6AOwM7SPBKEn!>cOjpW%B9!aaeB|WIQ>!B@Ly_`yxd`|=l#p;mB z$;%8u*4-`Yt@`DB8st{j&jQ0YNsnW_f*rV5(TnPymD=;snRmUYiHX+pbga~cFLQDD zFuYQS|6Ic2jO&c%rZzfPVuLL46NJK^Q!nh>=s&hlyb6bWEO`|2gIId+QmYf6XKU5jk!2&kTg3y z{c>)DocC-o^(z{LdHx~8?DTngSROS^9xGk~WX+2GyP={U>kcNx8V!H*U1*mGHaCLb z&^s7U(U4d%n-;hY47}o)A2d|r0E4{?jN^=UPSu6QkzpjPj5SpuqJ~ADfBqNg4tU%nTYxAUnnt?1dZF}&Gn`8#sDqIb@+GD&-%Ju8uj&p zCvkR7B|oZgEQLU6N6P$w#o-k(*$F(lnd#{e1qFq?l9IiO;&0z5=tb>nmz=eT5qs!J zWM&GhVY5*wHTwrVokGg<^YaJJE0{vU!e|f?5s~QzPkzh6&um>8oP?j7U`F%$Vd@`< z``J%R);S<&`wub7Te^e$W(>9?kt;D4Q<_*TXOLJ*9-TWJ95@Z0`yq;1vPMQsWMpLZ zzq6)xSi})YUO(1)F`vAv(g=)403B+d9QMN^A4* zy9ugYuH@*8Vi~Wh^uHwEk$d?<03`PRAt8GmwBaltotN28@%=h}^UJNC;LGCdEPl(a z(EeDAEL{{fQw+!No==bNVJ&JuiZvZ$;gv=93i9j9$;oL?aNz{DwYriAXnb9*!;>x6 zIhx1J&^m7H*QlV{&Qyrd)YC)eKm7_57AAlB@+IV5XXg-A|44>>#;7$P*DjY(oIAQv zGhd=G4isvL-G?qBDam-WF-UOb*Bwb~SiQNiq3X^V_ud%uWM;ni{`Py*2~Iw31*dQs zSU7Rx;hiDMG>_#(NJ*!ipc*_J6{?wm;xEmiJ>z^OuvywhL{ID6wEmW{VwOfBmAPp0 zoHf`W(jp%&^3ynKVUVR4JZOV`?%^*W)o1|;iXp;&$!s%|D zEAXTl(wOegU%@oBRuwf|(8oU9%*0VZu1<3OryX~$El5BP4W}a1uFnh3(ke62R_%3~ zL#(&++~#3I?imYEYy|lkeHPmSLGHA{3JZVB5D!NiIvK%0PHVA(l=~tNe}&rL(MD2P zZ2~q&F=HHsyA;-v_HJ;i>k9Fz*4scoChbwNF`2LLW4;SE4!2jSWY8{h-wUqZFE(Od z77hGE#o{xDezJHzl-#hRF26{AQzWH{!@N(;mieHIszCh;=9K;e!!{FphQ0W=VFCKP zsb)W{lVh{0BodN2g#Nzbt0wxI7!7$vvPB&@h2L2p$A>sNF{86T6p*EuBb+S9Ol)#t z^20*g%Z|@^3Td6a*ET~M`&JVk((gDY?!T*v{I>FbFv{1`uyfmC|GiZvjx=Uo?VEd0 zERYqj5O}TAZWtGzU}9e#Ds?-@(^A`a9@&nJ4%#UW8bPJ6e^cDpS`A|TtnU*?YpIi> zmi`on=8lPvssQtq7xday@ah*=yG_SlZciWDIg~`g;nZg7-!0@6t!)4LIWkd<{`hq+ z%3$9Lg3gZD5gt`2x#qhQJ&n-c?Ed&iaoT_yduE2=I1Ig6d{wckpZcRm3JLm+M`tvs zl8sNz2s)s0%$}>GL#xeIU)jxi*fM{CMSn`MkNUp5v7G-vb5$-xB{VX7WE_8pd32^Y z`Q;c(Q?hOL98aoOvA~&RzLutZ=R8#BcY$5oPx3Lgx4RLHo=l0AJO&c`T8C|(v)L{bhv}zw^cc3IUQ8AhT>ZN$+%oQ>qTOoO z6lXzNFD6h#!{XDiwVgQ$Hk6AVLxlK@#pHT<2u;Lq#_H+SV`eGbJlC4lBXgoWJDZL+ zI*xlO{$WUp=rk>787%0LqJFIHsQF>-5>Gd07#-|_)ke_U3EzeH&Af1ittUS;mh6zu zCZ*OLj_*JVebYsgQ|E5+-II4qD$%gg7!#aS`;xZg7@`0I4{7nw0@SI+!54?hv)pQ364-t16^GAI<4rM$z$|gn7lm*iTf@gKB zy?C^V=ezTfrL0>;%nJD64IOpviLjjOkZxDuB|TzaLn*KUW1qRG<1{qq&hs7}Zm3EG z?*z@%JQMSvAhb1*IbnISoxAAK_OFV^Y>+9UilsSmF-ww)*1QCM_2kCXh+5ZAk20q_ zsB8$o539DPEbF)-TfHxB4u3y;j_|zxCia((vLcQ=;HKy!CR*vwY2zP9chGCC?74h= zp=zZAohdAqw1q6N{Bp41`vJJNnN*?neD?;_A|eDRAqFBN=ua9*KDfro@5#d{`0O&e z^Ij1+=cdH)7=Ob&I|4@^TK<& z)bPwY#8bn_IVM0iP;f__+Iche5xhK|?Pg`(*7rA6)9FGx$D&x`jj&CQ9TI>VVdz`i26c2FIdgL zH=x$^>xRUhQ{FY1#4}2*$39D|U2BS(Qd&OT_PvDC{KdR9Lh%yaL}Ulnbf6KM-adEfe* zfa@HvOT({*G#@Qx3Ys@@4kKg^UvVh@Re`GpNG$3;efo5CWaLhNdBTvLXjXo{3aWkk zUoG8Ao;J6I#b3L&s9M|!YszRDN|tM%8eb@9zVlsxvtMEUFBVD zgUL%BOVJ^Xo^t}MuO1cF=-sonvum6mGbh6kxsPpQO*pZ{$EG>6PH$eHeEqeHXXVqd z#`1A$qFDOom7~d{0t2t*{-?Em*Mj$t%><%xmttEOQ4XW$nj|X&ALZi1X6uS@SE4&q zsKe;bwJ=Y5=Gk^av?#h{ol38?Gp{G`Jv?P?5622?{5R!c`wvwMj-4_)7i_5q^N<#r z=2$V^b-LIGJUF^~fBk=Ofn+17X&%Ik{)4eZ>8UNwO~r5swdJP4h)&aMJ?`dw50;K> z`P{>?4hqN*#`3IhLs4VmVpP$4fic8$f>Qm+hVGoz0<=DX)0O%R$dca%F}{J4WTGmh zWI%QMSuq?Q+bI(T z6+P0%iBiQSq}wL?P;%s}3(K_bl6gi7D&`xB6`640rqjOZQiXg;+WT^O>TmHxhX=w zH8j9T!<`oz3(t5JmL%93Ano_;=Z?QhTd(Am!QLu4!p$_D`yDg7l!P{_WxR4XF3yzh z9X68x>Cy|Yb9Dqo@yqp*6x?ee*!@{^EX5M{>&tJ#k`1Q+ZcMuMae^0^w(pio^AW=z{Iku3kRJ*RE9Btmi+lyM5O9`x$55+KBj2yYjAZrO4A08fG@YfE2>aS;J*F%TtW^Er>q*|T)xFRC1;aRKevfRF zUYgS{EV#ZdRd8!wjrKrs4z|P(*Z6`dBK^5D9?B1%?LNj3s4r#WMxz;@dY8gvB#i~J zYCu|v>8?y>+NkK;X+3xC^;Z&0SoJc$R5&&Co{k=SL((jJT7eB zkLt=K^-rESwjltbTGjz{MXi8CpsSzVG@#I%gg_K$Nhy$>4)sMXIajB&oXDDAaW5>zI z*PSjK_&za_+G~Ex&8#Py@!(BUlGIyqEiJ8Vaf^XEXJ!18FV1Z?BX_2JmTd%GG-W#* zp|8$*`q=L=B}T{Dh*f-WXTf>yuJLz-S)&xmn4qD)WfgP+GoA~0i+3p78j|Jlt@`qC z2=pUT!VOBt6}vLa0-OpFiwm8I#R*Bk=<(>5z-498!o$Ng4GkH+=2s6*rpruwhlhK@ zDS5KRQ(($JkIg6W&rWS!57zz;M1*M8rd~2${XnD^D<_Xf7{7guPg2A$zp9W+ebtxy zPkVhzGVAT`BLbhlGNj)7xfNA=&SM*o2Ub%;!w)LYFLFCwDo#Za7{ z7K4U|FYeiz-rXCc*>{&5FU{1An=DF;gQRC&Ib3RC&tiuA8N7V-TL?~ewyYNxq&7Xivb)!I!OciPgJ}M3nF;GjibMC`n~yTK4b?f6Emte7E`x6*oS*Gs4cAfF zROx?oB`4D6rW}5Z>$4b?J8ARUA>bF^+z^LK`nH?+0|w&hT@S#cnhd2p1Z3+Yc8wI| z?5uv?qzy6?w?*1Z%+%J_w%1-2beC13+nW0}N_|^Yk%K~JH_AM-`Y$?MB%%~CVtU9w zPVEg1c4!?ut|EWV&R7xW4ydccj>D~k>em9Z1!yDwo+ndT-xi|A-iSFcr-lq1mE_w% z)Q6O1l#+3*LMfX3(usSvB_HVQx22ko@!4Ka1O;I}A9{RWfQC>)jOCY`_?Ki-z`_nc zCgpxM+;XP&L(cGYBhmhYwyNv!C@O4o82}jyFt_ZGKezNcD`412mQmE@jw6~ zmHgq_=VN6NCsv1|?E{QQg()=2v8;V^c<@mDR59aOzZAVuO6qt+*qVY#+F+D=7BQ{j<0X+?3y$S;TbI~`qQK`NIpafd z1tMy(B92K|w2DOC*&K(bO3uN&6Gm}IfpVZHYDpIJI;{O`G^OP;8*J?Bj4yqYM;KLn zYtZ=uMM;nY+s5v0+WjX(((;>{?U$Cdwv23a?9>yky|Ul}8Ob_PamQM$TOosHtiFz) zZ+vb|B)|V=il2Fu=ux@dF|K9FlZ4D}mwA>H15m-{OJa%p4+%az&UH9#n!l42mm0Kb zwFw)~3tT5dF9f+im^T5OA;3-!`(pL#CO@N8dZQY$zHWB z53$XDXSg-U9Gv0HJcvIE$XNr3$V^WkW=T}>_l#*`0rzyc#oEr8Po+V$vW?F(R zv%lj~lk(SZ_oj%Sq1=?xm{$jZTHn5%3y6**jg7`1I__UHGb$BVGS;jex5btJNEi98 zP;jZae)F(hcJER%A@VdMr2A4c@WXE@Z3KmB<%~(?3@caU8}>2g0|4yAgm?wIOrAtD z^RQ6xNaH~J_$~Yl)zk=Xj-+qNsUom4IWzO~@t#N?Q$#A7v+iW!J{J(es)<&|!;FM1 z9q*I;tF1Zm)PTDQvS7H+RonkF)Oj_=3Xwq|pdgt$T^~;=6+PId1uaX>>yfEEg^%T@ zr_o<-C9-s6WQjr3p2s2Qan8=l+Pdt7d9ye@jSLE!PR5m@`YpBE&EK5YD$ktIws&ZF zg$H`Iy7#;jg{94OerFxb2kVK(V6s9;BLF%5>1oVrn?}@T`YpbejE=L7-yi0OPi*68njt2mYYZ>mj8E zX0NI|x?zWspvu%1ayGnKx$CQ=VdsM-S&;LS1}!12qghJM{k%2sZx8TGEmGelmiDjL z7~uDfv#gApr&jV^zcl9k_*f&c4)%Ixo^e`h0XMxe)M}K*_VfvEAiyUSUw)_TZhbH( zzDM(k?vReNHr-#`>%asQ6$>k?4zqF@QcMHy-74iLG5gZFdYu9}do=gaOPJ%rwHoLn zdX5ts7-n)fYy@o6v8?nPYhTw&$bPi_v0Q*=IW5UpYpNW6Pejw{=g_)0D9e1~#3wsb zB%eNgilc6;^9hWYFlZiCE~xV{M?Bd06P2*zt~Iz`7<9zqPL$Fw_CRIhkEG9;fKHK~ zzliOR^nwEXLfukDd;4Mw1v13n_m3pv74bIZ@kBIre`0S3U=f&9=?nVZ_{_Uc8J5lM z#ui0^X`4+Dp?20N5=`m7)zp|zl^8y7T7pd2M31q;VEF87>MFukLqUuCN)8Tu&1WkF z7B63#f`x+r&6H7VK)#*iLxG`>HA!R$27+_w^w?NHbTn~xPR{bloMWg?;riRWiD2BN z^2h65Rb}{v#D82M(5{+v-U-jex6}y<38a{4ca@^yN=i5dg@uPpfh3CcQZQI%9_f8e z9&YY~dDmeyh|$~QXTa3vGZimW-W*8y;%dsC3nv8GB3mF!Ee>?ZYlEN^B&S!RqnsRi zqY*#&x<^HTkB>jHut1y^V{8AVYD-^P4ADq1D`#(S-=)UiL~(Ze!m$Ypk|gOZ3Z4Xe`YGU?WvN5{IM1`nyz53k^5%=n z*Zs@68lngKJKdxDVpNe8FtLX|t|x%vtxWg704}IhADx#OfPA5tv;08|2Y`9qa#MVW zYiQ0&G9C3;l(+>gZ7v+X>>``KH(y8I;IWHee+0&_g0IH1%cv=d9O%rtPeYR^8FL{f zV|ZR%?Y!1(>ivpvidWQ_+ew1v0$vA8>5n+r*n*qVt;u*Zpnc+?gEX3U!M`4OBO&yV zaohSKm{BKg1;{*BRmI2L{37XHuO2OJH!VO&cc0T`5;5`oUzlEXN3zyDl5#7IWs{f% z7S6@eW9bW08wxoOr9O&HNr?n-BdfTWH?9gnQSY8jDL+nFYpIDpMu_6inOUR{HNHrM z?Uc4K+5RY!1wePeQn;X?K)_|KSNVq$g8ezilGxN~%;!BP7%V@?F5~l{B?uk>zU<~} zp)acK{JYG;OkPnG)8xV_V&tkdt%ko4At(fGCO9dmsm;NVo92;+9jonKm{yR7w5Gv& z=h*-&P1V(R<+fnEj1uD(rf1KdDXXh@icA?`fEi8SVHhW0lV@wREvu9|K3z$cR{$RP*_BJjis7BQuzXkdLl_VQ!|vh>Uxk0hpgVHT<-8aKMHbyk1WL z2tCHy84g!H2prWRRDIl98_60CO@AMNy&cc>RN&=ziiyd|<>_*B8fjk%;J>2_3(-MA zLCRCga8{mTe9qqGaMg*X_knTWzke^Y9Hc`<*ix%MCqIAapk>EI{?oh9nE}D1jr)-E z)$Pr7lC+t@Ixt$%Q-7=irh7i$1Ai|adt!sTB?mndj`=P+2%L8>)JA3C2Y&$`} zhJ-T8J}K!FF6CE27_BpwH7s0z$U_=TX|W+*E{f6y3H>vX%2F(j$t*C~E9acg)7^k~ zQ{c_g36>sWCj0g0Spl*1!Q2Ynw1Lsp+yFz8+wbIP1v}KVXD2zYJNw>CV`<4@Q6xeUxi?3f&8|8J!q2?@dT^9Eop2wEX7AQ7Q>CVtgw`^e^CVDsz9%FCi|?!I~- zESA5M{vjdQMb9g-PZzu$M^N2WX0O=H`F%Q~mY|??ox;}+a+>+tpmpdyZsw|NDBp$O zg_w+&E6#sNqdcVJQ!JeP^OhX$IpZOvg8uC5c-yj5t3ypzb~dK##z5Q2?wp|W3LLnw z>Gj2N?M_L9T!sP)l6giy~|8zWe0UVEi{~k4qY%^H`v&gUGeJ&1T$8t;6pOQoL zD9$etg_Ge$Y7U@ZdE~-oo58354L3MAI7!@{8;orcIy%_tM|3tiI#ktmGw2Y#G82E4 z3B&Ol?b)SpGtn>fZEb9(<%rrXgq)pENkQJGqo-fzlaJLx5udFyUIBv1gVr`SaSenD zLUd71;;Al)84tJ<@M&pjCFjHjsz^Z;U||EAHWrg64>&KG$$zE4|K|%_A!+1C2nzFb zvRIgQmD|$jy%;3Owt^kf2iuU zj>cD_@dvCDq3*TWfM}XiPs>f~44%!}*k=j#D!fv>{QfuIA)r;_}nv&mKs4bq?ca&?wR zT}T(6O$ed}H5}m8hSjs;*`PW86sG_UX|2#Bl|2_EMIaa&Bn}klw#~+vs=D!cod0w4 z|C%H~gAJQ{LRZF@>jraw>~`!4?@jIm_y#B zsqcr{V$-mB+gmj-t4+B#pL`I$SS~QwGJ4@~?7+-4tBDOcn<+9d_>{ous^BL0pNqri z2~=!NbEFMeB`QveXWQ*H`ByRs2=5kEABZfbGSJVYYiW2busy#E*sD994<|5yE$|!e z2hzPVt+%Rj%%pouZqB3bKa>N8KXew!XM&8ps7U{3CIaP>3uWe%ScgZPrpfE?%uD&W zv;Y0IWHS8F*qh{pB@X?P45>! zf{7**r_-9YLDCl1t+Qr@%{=nW$%5NupuW{Lpw3>K`{IT8>8Q01|`)*ePfdF{bzeFoa+DFo{KQSjdTdtXjd6axWd zfXOqJsq&O!n7V@S$loU?M{I0t@Xh5uNsC5gDXO@0qMj1#W`-mC>*vJqKUwut&G6Sz zYas3eLb%v}C>UJsgz!yv%w|Wqahoa$Fg)i`t6Rah_-Z`2 z%|yTi0D)Sd31kU3OP{Rs+&2Pm18PuKPL4u>JcviGkZi+dP;POr9tjFU^Yc@A#y2-N z?@CJ>f0u`X%lP~9|1LrNcN71Qw*P-|VdWjCCz>;A!bs6Ed)>;TFnKn!q55Esws0Ah zox)8y=;q3%5li6XRR$7 z!$G(*?;Db6diacCnGae8h@}6o()$1RLh%3JfApaPz_FYaBh}s(h{Hdv17tmqlSSKh zJ=Fd~?|JXPt4qBA^8osrKY#v|y*ak|Kd4JR{{BKlGD|aEU!E!frR9I9OQW>;S#jk` ztI=d!T!i3=_=5PPjPb2>twSMC0r6B~%Sx0yR3&Q^=s8gw^CkxAK;HtT${<@f3Onn}7Dz9f&FiBW#m`1+wC zD32XUCPHK{O78&z8EpP8KR*e84FGV5wwcJ7nzDROP8PKNaW8}e)OXfCbfC)R{BnrS zh)SlgHyJt2%}o1Er%V6{l{EfQUEAEmsV+8ZqGO7|AtEBut9Rp4XMf77_TlFzVXHES zpW+}E4g9W;X~f;E6L$fUo^E_C{KfC)^(XH5CjbQlL~Z`9s%km<(VonkH*ZYlu1^u`ySs_Zr)SozN7Q)+qWfu93~ksJusR3n&un+`7<4)HI7YfjT<4# z#>U3U#pOS~)rmnOLF(lQ@`K%U8Jf(dAufN@-?)^Q-^~Dz#L2@G2yTh-yE12GWraY% zUJpUH++iUkTR3TUxY8x0OZS>M6l1^nlJD;uNpRSjnwq53)i#sdV3oi(U)_(HH*<~Y9H@_)$nn=)2p;3+G{-0xc8!(FzxNh z^F({4Xa@hD)vyMBTC1k>jpW+Py@taNX;+xczGur=bAy(`xsQ{nJVCEo+V}_mBcEY- z50f_`FgH*^rwxa8gpvT#!(=R5MM#G;4G8*HnlJY{fGVxbd9}NBm5Yl@ufhV4iqBwr zoj|#fQ~`Q>)+2q8#e9t`w%~QQ+J25|+Ol$Hq{h)0H20FA&q*v-J^u|lU?5TJK-t6x z^q&4b$)7(jxrvus^pUBjicqk$2JgsJoQOyxTFV~@eXLI33|(d6R(YHA}dG*qARJ%I<5i8anAI-AY9 z8sOn)o}jBzjG_w$-eLxzu#h%8KiDY9;gHm+5}p&qWh9 z?&i<_VtY+KqyRa{&QcGWsx=ZbOJ zcrt72wGzf*QKZF0NlAHjxp*55xTAXpwWQ$BgL9uGQG2=tuT>&eHnt3~5w%3VsD>~B zRW-G8R2)`UuPk`)cj+3x4F&@)R0=9>xj_x}fSUcF_ijwDp-t!ROeGG8f&(v!S`bVJ zeH>{fzo*N=Mo4cCLZwYWs5!s>1c6jL{0s+_q?ZKKJ;QnmRAMP<*b@h_X#!3``SIgm z#jLP7+r0h8BVsaga#ZdAyT8Z`gsZK0V4;xPNXBau3TfH(Mca)Q&c$Xr`9 zwSQ{%DF? zax}5zlO+{!n19e`b0=KP;C`$!JIFyDAD&3zfdLJRd zVkYhSx0M%&UxcAy6iAE`fT}p_;`jAOv3;nhEO=I!K52sh>1;~a9ePZGk)X^TBk>2< z_vR=K6+JW(UIGry=a#=`s|Z{-hq}O|yp}03=o0 zN>>Di@ZVI=mbO3Z{mZA9&Hz6O1DQAgHmo21{&j0S?4~c9u{#5)x!i6>h>%g7 zLPbU8>`y;G8surNIvNB78zhyyAu%yAD0htf;7HcgIJX;iJrfI-G$|!m(ezM1w_K z-JG`c0EQ{{9*ZOsJvEf$M$q-kUe`%^#EROU}+YXYbX1>$lh1slVD=i|&)% z2LgdaVP~O^AP`R&Fy;yg0KdlO(D2}|sJdHP2a zH$b5MAu#AkXGG!LXi}x`lH26s%7+u^tL^8moy)mAY`U*?;{~Eh*><|#^T~eePTtOH z)w2>B!O3N>-?`R|UlFMp$w^Il?fPB(MV6|h`c?j|1#4B8L%$P-M6C`TIHiAC9hD68 zKOmuU>!e=j!=ixgsmOr!L0-3n)#HO)@CCEEESdp@^!*M z0DMJT+S|_3cqIHo{*a!ij7`V+Qz?$ubM8IPkyrCQtY$4KbDBfHA+QOQ8kc#PxM~8Jo$3DG_uuMU5&4Js(6L6D?^J8YNg&Q&B zMb`>)`fN4-%Su2I6O$Y7<7jNf9`hBC9y9hi&!sZ>ViqR8IBV%<8GwkP!npamk!X5? z8JhwDq996K%n17yho*(UUaxkx%*b(rculHZ|`NYnxh`;@^qA9;1tO=`Pow>jn zaA42CN%dJIGczhQyY<_b0GyigFfj(wmZ>mSOAYm5(Hb*eQ6J%pt#Sw0iWTRTJ}eD+ z^_E|1I5wW+gkTrYDJ7Phfi%u@j_)-R;K`0jrBk;jPWkItmaZl^3O zc7qSNrNHERyMv5PjJ-i3;QQ=iMXGwg&`ht~{1$G`t}3MZxYMXnmg>@tf40CrQ-YxV z8Dqac4CaV24-4G0OtcQ@jfVZDfMAWTbbl+45mE!QFKU3O}^5cUNE&bW3(ZfT|d-Ve$$Qgp<0w3 zRHz#`ieR7w3CmxDVt#aJ)Xi3R4c7!om*5iDccRqr)Mi);fb8|_IpmP@d6aJ-l(vqp zj6{lit@l4zWlEFU8)!@AR-Z?AAb3dvJUdc%U)#O55(pM;F*7=xJBK%}p10r6s;jGm zSs%9AOML?PY1i1!H&Nd|kKubwCgsJHEJoU6O|{WPnE(TJ@Jz%HHZ zjiIp(g}WWB&$CA+BuAv4YW`uy(z(X@st&H=A?zI%BT5jIl|A(;hwT}UDTVwKNP4Oe zW=bva?o2FA8ietiKvCkw2z-Pa-dGJ3g@?8EIZE$PP7hG-V|8CQVP@2EWs^(<;25~ZNKn$ z3Z^%YLLhwP)(kmpQIUC~=L{2kdX)3z{DZLAbwbBO3gCDt?-fDLyIgw z`7w;Xxv}w3=kmPyL|qe=V~1zQ-|(xSqhkWAD=W#5%;P|rLtWz{2_N*8b@og^BI67< zZ!%oB3-Z8=dW=!*8h((|NRGm;G064tAF6%g@6Q7^OvQh=9+khPpCX82D_>Dp_5^u| zjhIwY5*a0BCp`s-@~c0NR1a0-qW?5`Z)p0G&sC}_1Y^}~&j^hz**D)qNdQOK8E#84 z@JM$R=H*yjfo*dL=B^1|Rr_PBA<>q(M8s)O)ot@0rnQ-*Nz|T!m#bN()^-@MV`)yU z4wmKcsNU)Ik-|zg%Zo@5&l!oi+Q_Q<}hYbbkTH0Si2;7^Gc!2 zaR6m7b09O-IF-(df8Kuo2(KEL3ZPUX*7XR6dppnRed6U__w2XOlClH}y&FvTps< zldpb6k(qU;Q(;cyz!C=nQm&+}w9|jnBX!1P8=Tn}RN&i2()!S`WTI=Xh9RF1y%ep?&xHu9CZUzGuGf(R_?A{Q($> z*mcm(mQGcZPY9-0_13gF<-|_aS&!ouMOG zEO$Nci_nP3#tnbPG<3I*ttO0j21C>ZDRxo#uxs++g5lF-eUi{dpuZw-J0WMl`wFpjJsYWNK{qciiVmU0Eb=E|6O(3H%h5rW?Yl%z?XPn6~+)@u#}lB z2>WAA%6+TzKOO$iwYR_%7Z*h5T!^)%Z;J75=V0l)P5Z&yf~$uZ#mq*ZHZgyHf6y=? zLIv2X`NTtc9)54=)*r~-$tKZrx;mFO>#7M`&z}di(T!+}4}vS&;EG9$+Xl;sphuVp zOB&1pj>*nlafr8Af3C5~{Wv~-`!@gb>=(XpqNk3AKHP$kiFQKi_L-Jhh?s#q^cGUa z#rxx&*d?d|6c^P6VP~PD{JLE~DNR|=S>L;p@i7^lFY@f$)q|%uBPD#23ZG8trj6-$ z);`=|f9%qmo-EEti2Pf~taOMkT1cN=oIWOE{-cNT6kH`qkQ)Zg{*_gElo2JclAwjK zJRi-{m7oUeE^9pM;!oorF$||`^9`{0Y1CkYK!f`h^^RBow5`~84z4ykb=+W#1_`T? zTLmEG7EmM{Z~q4*?85!>;)Th?rQk{ieTrZVX>HN`3hsb_es1cRZ;;wM?y++tG zo@D^JEma8He47CpyY^>^Xs5P=U#2Tj?SB~|LL^^z9G1?^=isqH3Hi&9@Gb5^5K`+} z_Jx+()Z$WKAUB=m)A7Zn26N_N>3G$sdoAw$&ecGA9?9Gp?M#`Bk-qvZ?A(L5p)ubs zy?V8lWpEcrF2@Lb_LkP|14FVIskB&9s`P2r@+w_qRN#x=qv_pt-`E7)U|mQ zDxZPI1sOe24eaX!;4gQatROKnG{g6TsnmdAUnHG|UABW6b6_bz7{b)ADNR&0$L9c?7sSeoo)t{3E&DerAuDw!4D)w6lIvx_L?g#jrEEuk*X$rA}IS?z2x8AoAmMEZnsl|D7|e44!T-e zl>xnq$g!pCCVI04jVjDj-WCu%d>~9P({Capp(`FM$sY$<2qWtTOce2b@64}yk@On< zhu2k8%dM~@m=_&=Co&yt&m@T9&$!9NnzU}3{#vpe?VGqkDyihOTzAZG{{`$j0TXPA zvZjgiPM&EN2|qxPCp<|)Q8t>IPKNSZ9Nz_`>_sAkeHA*pXI{Vv)BuU}f_gZ0lb+qa zo6RBGt-pvHk-1heS%i(|i54Bf2Y>9L;0i;e4A~cz5{50ih(J+1Ye1;?SFXp&E(tzy|evC?0!&V z0cgEd|Bx1$5aFdwRsY!u9o)qyu%7JGdSgR4H|6<`s6c>puMGpG{=aLU{+#k>>H!KH zI4)+Q=fG8#%8D82?|IlHznxYJmw_f_poa{UT(aN3s6|*e3oz8ZO&4q!X=p?X`HGjo zR-}KEz^WZ1!A>w&wdiPP)(St_2ODz`g5#S9>CAZ@gly|TNN#pC@xsyIb1+BJ`Lq=& zhBuw_c%~_LLz>|ev{wx#8=52Rlo8+Y5={^y>|3oGpI{AF-NIsJKt`b>?X$D9WW1&> zC^7->=3oB-~G)xaC=%W1hMXB%>av z+(iqc)+i&Mb|i&kd01mHtrb~DFLDmMHmv`>=T2m&A)={-K#r#TYl zQ66rao$o?5>iF-(D#b_w6(BDS-fqKnQG{Ei-LpUAvzqQ6g0Oh!WeKdjCTbWz?Q5_B ztlwrM`PnWv?QrN%dLXu=^Hg-3`l?7!qF z?Mi%LIJK=~$bG@l%D6kDgOTwXds&&DdHqtjKGdJH#)wvU50?E6SxEpXy-uEkTL_Bd|C z(Y|o>x)1>ZS+Ua@^mV2(wHRBJjiDwkK47BFy>kJFRKc#Dg;IDb_!T=Tanu7OXYk?} z$G&va!=E<5BuMje_1OjB2!Vy~WN|*6V4^03^yO5F7tfX%MEtXk8BM-~FAVgkokB`O zYoe6+6?tgzqKOf!OqO6`Chg13Hl!{{sfQBKs^^)dgsHITh>0_dNm`2B7aq6td>VQN z8ua?{jPy4Zat(P>g<0M7g*57zLKmP3#~)VS(pXSrm?GtrnL}UOkE&8bv@t6u=yL#9 zfL3*4sZY8S*ta-eb3i1)LKrJ%O)5OdRwhvraZq_a9lb3JrjPa$ZiVve2+*X!NqaWT z7FIZ(&X#>vNHjuj;SPCvkWj)m30kpkKI0=}07>WaDDvC8Pi#r|ar^p;*$TlCuNukG zF8F|{#?+0MgPhaaeu!k6S6~2Wo0mwa-!?_|))KTWY*?Z{Fw2`}4TvOL8O51~&4UZ0 zgLAAs%fQ3srO_=8nRti#Rf;24=IOv{`6fqhLwWIfeqz?HdMvR@$(Fn&7Y3@1RooVd zLx~Q~L6D@vGE-|(m~;K>0&yl zFfIFNL+kOT5DYI+1K7H4!X9DiX=op9KO_zJ6w_y#f82_-NXnym@rQi*1*}`zO1p=p zgW4$pD$J|tCKgIyxZi{z>W&K2TA%F%UgRU=sr$09ZOAxeoHCQ2+}mfWG8c{P)^0iW z>{?ObNfwgXLUtkx2UaxPBL{M3285>@AKu4$WuOOI^}I`4xNsDytjF7Jwj_kV=V|oC zD{`JTtb82@T?p&GwjXG<4gIj2(hKsPAXzo{@;0D`-sb$VZPhG;T?^x^mw}+oRcjW z$LG8^Aiy|*E#B31^lFdPoh=`4r3Lu~I?@5!tbr0P)fxTxn>YxUhUTJ_1Vi_r(GU9L z4h4Et1r#_-x6~tPU<9uNNc@5|GUV>|NycZ&MzcOUw3qUZJ=&J*qq*SWpBTc}0wroe zSV4zA2t(NP2X#r1g+{E&Y(-AQHxs#4FU}UUNfm;xHQBmeW-RKJhsEH{=F?Fj9I<9G z(RmFYMeaCu?RIUe-1Bv v documentation". +# html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +html_logo = "../../web/pandas/static/img/pandas.svg" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +html_css_files = [ + "css/getting_started.css", + "css/pandas.css", +] + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +html_favicon = "../../web/pandas/static/img/favicon.ico" + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +# html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +# html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. + +# Add redirect for previously existing API pages +# each item is like `(from_old, to_new)` +# To redirect a class and all its methods, see below +# https://github.com/pandas-dev/pandas/issues/16186 + +moved_api_pages = [ + ("pandas.core.common.isnull", "pandas.isna"), + ("pandas.core.common.notnull", "pandas.notna"), + ("pandas.core.reshape.get_dummies", "pandas.get_dummies"), + ("pandas.tools.merge.concat", "pandas.concat"), + ("pandas.tools.merge.merge", "pandas.merge"), + ("pandas.tools.pivot.pivot_table", "pandas.pivot_table"), + ("pandas.tseries.tools.to_datetime", "pandas.to_datetime"), + ("pandas.io.clipboard.read_clipboard", "pandas.read_clipboard"), + ("pandas.io.excel.ExcelFile.parse", "pandas.ExcelFile.parse"), + ("pandas.io.excel.read_excel", "pandas.read_excel"), + ("pandas.io.gbq.read_gbq", "pandas.read_gbq"), + ("pandas.io.html.read_html", "pandas.read_html"), + ("pandas.io.json.read_json", "pandas.read_json"), + ("pandas.io.parsers.read_csv", "pandas.read_csv"), + ("pandas.io.parsers.read_fwf", "pandas.read_fwf"), + ("pandas.io.parsers.read_table", "pandas.read_table"), + ("pandas.io.pickle.read_pickle", "pandas.read_pickle"), + ("pandas.io.pytables.HDFStore.append", "pandas.HDFStore.append"), + ("pandas.io.pytables.HDFStore.get", "pandas.HDFStore.get"), + ("pandas.io.pytables.HDFStore.put", "pandas.HDFStore.put"), + ("pandas.io.pytables.HDFStore.select", "pandas.HDFStore.select"), + ("pandas.io.pytables.read_hdf", "pandas.read_hdf"), + ("pandas.io.sql.read_sql", "pandas.read_sql"), + ("pandas.io.sql.read_frame", "pandas.read_frame"), + ("pandas.io.sql.write_frame", "pandas.write_frame"), + ("pandas.io.stata.read_stata", "pandas.read_stata"), +] + +# Again, tuples of (from_old, to_new) +moved_classes = [ + ("pandas.tseries.resample.Resampler", "pandas.core.resample.Resampler"), + ("pandas.formats.style.Styler", "pandas.io.formats.style.Styler"), +] + +for old, new in moved_classes: + # the class itself... + moved_api_pages.append((old, new)) + + mod, classname = new.rsplit(".", 1) + klass = getattr(importlib.import_module(mod), classname) + methods = [ + x for x in dir(klass) if not x.startswith("_") or x in ("__iter__", "__array__") + ] + + for method in methods: + # ... and each of its public methods + moved_api_pages.append((f"{old}.{method}", f"{new}.{method}",)) + +if pattern is None: + html_additional_pages = { + "generated/" + page[0]: "api_redirect.html" for page in moved_api_pages + } + + +header = f"""\ +.. currentmodule:: pandas + +.. ipython:: python + :suppress: + + import numpy as np + import pandas as pd + + np.random.seed(123456) + np.set_printoptions(precision=4, suppress=True) + pd.options.display.max_rows = 15 + + import os + os.chdir(r'{os.path.dirname(os.path.dirname(__file__))}') +""" + + +html_context = { + "redirects": {old: new for old, new in moved_api_pages}, + "header": header, +} + +# If false, no module index is generated. +html_use_modindex = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# html_show_sourcelink = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = '' + +# Output file base name for HTML help builder. +htmlhelp_basename = "pandas" + +# -- Options for nbsphinx ------------------------------------------------ + +nbsphinx_allow_errors = True + +# -- Options for LaTeX output -------------------------------------------- + +latex_elements = {} + +# The paper size ('letter' or 'a4'). +# latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +# latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples (source start +# file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ( + "index", + "pandas.tex", + "pandas: powerful Python data analysis toolkit", + "Wes McKinney and the Pandas Development Team", + "manual", + ) +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +# latex_preamble = '' + +# Documents to append as an appendix to all manuals. +# latex_appendices = [] + +# If false, no module index is generated. +# latex_use_modindex = True + + +if pattern is None: + intersphinx_mapping = { + "dateutil": ("https://dateutil.readthedocs.io/en/latest/", None), + "matplotlib": ("https://matplotlib.org/", None), + "numpy": ("https://numpy.org/doc/stable/", None), + "pandas-gbq": ("https://pandas-gbq.readthedocs.io/en/latest/", None), + "py": ("https://pylib.readthedocs.io/en/latest/", None), + "python": ("https://docs.python.org/3/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None), + "statsmodels": ("https://www.statsmodels.org/devel/", None), + } + +# extlinks alias +extlinks = { + "issue": ("https://github.com/pandas-dev/pandas/issues/%s", "GH"), + "wiki": ("https://github.com/pandas-dev/pandas/wiki/%s", "wiki "), +} + + +ipython_warning_is_error = False +ipython_exec_lines = [ + "import numpy as np", + "import pandas as pd", + # This ensures correct rendering on system with console encoding != utf8 + # (windows). It forces pandas to encode its output reprs using utf8 + # wherever the docs are built. The docs' target is the browser, not + # the console, so this is fine. + 'pd.options.display.encoding="utf8"', +] + + +# Add custom Documenter to handle attributes/methods of an AccessorProperty +# eg pandas.Series.str and pandas.Series.dt (see GH9322) + +import sphinx # noqa: E402 isort:skip +from sphinx.util import rpartition # noqa: E402 isort:skip +from sphinx.ext.autodoc import ( # noqa: E402 isort:skip + AttributeDocumenter, + Documenter, + MethodDocumenter, +) +from sphinx.ext.autosummary import Autosummary # noqa: E402 isort:skip + + +class AccessorDocumenter(MethodDocumenter): + """ + Specialized Documenter subclass for accessors. + """ + + objtype = "accessor" + directivetype = "method" + + # lower than MethodDocumenter so this is not chosen for normal methods + priority = 0.6 + + def format_signature(self): + # this method gives an error/warning for the accessors, therefore + # overriding it (accessor has no arguments) + return "" + + +class AccessorLevelDocumenter(Documenter): + """ + Specialized Documenter subclass for objects on accessor level (methods, + attributes). + """ + + # This is the simple straightforward version + # modname is None, base the last elements (eg 'hour') + # and path the part before (eg 'Series.dt') + # def resolve_name(self, modname, parents, path, base): + # modname = 'pandas' + # mod_cls = path.rstrip('.') + # mod_cls = mod_cls.split('.') + # + # return modname, mod_cls + [base] + def resolve_name(self, modname, parents, path, base): + if modname is None: + if path: + mod_cls = path.rstrip(".") + else: + mod_cls = None + # if documenting a class-level object without path, + # there must be a current class, either from a parent + # auto directive ... + mod_cls = self.env.temp_data.get("autodoc:class") + # ... or from a class directive + if mod_cls is None: + mod_cls = self.env.temp_data.get("py:class") + # ... if still None, there's no way to know + if mod_cls is None: + return None, [] + # HACK: this is added in comparison to ClassLevelDocumenter + # mod_cls still exists of class.accessor, so an extra + # rpartition is needed + modname, accessor = rpartition(mod_cls, ".") + modname, cls = rpartition(modname, ".") + parents = [cls, accessor] + # if the module name is still missing, get it like above + if not modname: + modname = self.env.temp_data.get("autodoc:module") + if not modname: + if sphinx.__version__ > "1.3": + modname = self.env.ref_context.get("py:module") + else: + modname = self.env.temp_data.get("py:module") + # ... else, it stays None, which means invalid + return modname, parents + [base] + + +class AccessorAttributeDocumenter(AccessorLevelDocumenter, AttributeDocumenter): + objtype = "accessorattribute" + directivetype = "attribute" + + # lower than AttributeDocumenter so this is not chosen for normal + # attributes + priority = 0.6 + + +class AccessorMethodDocumenter(AccessorLevelDocumenter, MethodDocumenter): + objtype = "accessormethod" + directivetype = "method" + + # lower than MethodDocumenter so this is not chosen for normal methods + priority = 0.6 + + +class AccessorCallableDocumenter(AccessorLevelDocumenter, MethodDocumenter): + """ + This documenter lets us removes .__call__ from the method signature for + callable accessors like Series.plot + """ + + objtype = "accessorcallable" + directivetype = "method" + + # lower than MethodDocumenter; otherwise the doc build prints warnings + priority = 0.5 + + def format_name(self): + return MethodDocumenter.format_name(self).rstrip(".__call__") + + +class PandasAutosummary(Autosummary): + """ + This alternative autosummary class lets us override the table summary for + Series.plot and DataFrame.plot in the API docs. + """ + + def _replace_pandas_items(self, display_name, sig, summary, real_name): + # this a hack: ideally we should extract the signature from the + # .__call__ method instead of hard coding this + if display_name == "DataFrame.plot": + sig = "([x, y, kind, ax, ....])" + summary = "DataFrame plotting accessor and method" + elif display_name == "Series.plot": + sig = "([kind, ax, figsize, ....])" + summary = "Series plotting accessor and method" + return (display_name, sig, summary, real_name) + + @staticmethod + def _is_deprecated(real_name): + try: + obj, parent, modname = _import_by_name(real_name) + except ImportError: + return False + doc = NumpyDocString(obj.__doc__ or "") + summary = "".join(doc["Summary"] + doc["Extended Summary"]) + return ".. deprecated::" in summary + + def _add_deprecation_prefixes(self, items): + for item in items: + display_name, sig, summary, real_name = item + if self._is_deprecated(real_name): + summary = f"(DEPRECATED) {summary}" + yield display_name, sig, summary, real_name + + def get_items(self, names): + items = Autosummary.get_items(self, names) + items = [self._replace_pandas_items(*item) for item in items] + items = list(self._add_deprecation_prefixes(items)) + return items + + +# based on numpy doc/source/conf.py +def linkcode_resolve(domain, info): + """ + Determine the URL corresponding to Python object + """ + if domain != "py": + return None + + modname = info["module"] + fullname = info["fullname"] + + submod = sys.modules.get(modname) + if submod is None: + return None + + obj = submod + for part in fullname.split("."): + try: + obj = getattr(obj, part) + except AttributeError: + return None + + try: + fn = inspect.getsourcefile(inspect.unwrap(obj)) + except TypeError: + fn = None + if not fn: + return None + + try: + source, lineno = inspect.getsourcelines(obj) + except OSError: + lineno = None + + if lineno: + linespec = f"#L{lineno}-L{lineno + len(source) - 1}" + else: + linespec = "" + + fn = os.path.relpath(fn, start=os.path.dirname(pandas.__file__)) + + if "+" in pandas.__version__: + return f"http://github.com/pandas-dev/pandas/blob/master/pandas/{fn}{linespec}" + else: + return ( + f"http://github.com/pandas-dev/pandas/blob/" + f"v{pandas.__version__}/pandas/{fn}{linespec}" + ) + + +# remove the docstring of the flags attribute (inherited from numpy ndarray) +# because these give doc build errors (see GH issue 5331) +def remove_flags_docstring(app, what, name, obj, options, lines): + if what == "attribute" and name.endswith(".flags"): + del lines[:] + + +def process_class_docstrings(app, what, name, obj, options, lines): + """ + For those classes for which we use :: + + :template: autosummary/class_without_autosummary.rst + + the documented attributes/methods have to be listed in the class + docstring. However, if one of those lists is empty, we use 'None', + which then generates warnings in sphinx / ugly html output. + This "autodoc-process-docstring" event connector removes that part + from the processed docstring. + + """ + if what == "class": + joined = "\n".join(lines) + + templates = [ + """.. rubric:: Attributes + +.. autosummary:: + :toctree: + + None +""", + """.. rubric:: Methods + +.. autosummary:: + :toctree: + + None +""", + ] + + for template in templates: + if template in joined: + joined = joined.replace(template, "") + lines[:] = joined.split("\n") + + +suppress_warnings = [ + # We "overwrite" autosummary with our PandasAutosummary, but + # still want the regular autosummary setup to run. So we just + # suppress this warning. + "app.add_directive" +] +if pattern: + # When building a single document we don't want to warn because references + # to other documents are unknown, as it's expected + suppress_warnings.append("ref.ref") + + +def rstjinja(app, docname, source): + """ + Render our pages as a jinja template for fancy templating goodness. + """ + # http://ericholscher.com/blog/2016/jul/25/integrating-jinja-rst-sphinx/ + # Make sure we're outputting HTML + if app.builder.format != "html": + return + src = source[0] + rendered = app.builder.templates.render_string(src, app.config.html_context) + source[0] = rendered + + +def setup(app): + app.connect("source-read", rstjinja) + app.connect("autodoc-process-docstring", remove_flags_docstring) + app.connect("autodoc-process-docstring", process_class_docstrings) + app.add_autodocumenter(AccessorDocumenter) + app.add_autodocumenter(AccessorAttributeDocumenter) + app.add_autodocumenter(AccessorMethodDocumenter) + app.add_autodocumenter(AccessorCallableDocumenter) + app.add_directive("autosummary", PandasAutosummary) diff --git a/doc/source/development/code_style.rst b/doc/source/development/code_style.rst new file mode 100644 index 00000000..2fc2f1fb --- /dev/null +++ b/doc/source/development/code_style.rst @@ -0,0 +1,129 @@ +.. _code_style: + +{{ header }} + +======================= +pandas code style guide +======================= + +.. contents:: Table of contents: + :local: + +Patterns +======== + +foo.__class__ +------------- + +*pandas* uses 'type(foo)' instead 'foo.__class__' as it is making the code more +readable. + +For example: + +**Good:** + +.. code-block:: python + + foo = "bar" + type(foo) + +**Bad:** + +.. code-block:: python + + foo = "bar" + foo.__class__ + + +String formatting +================= + +Concatenated strings +-------------------- + +f-strings +~~~~~~~~~ + +*pandas* uses f-strings formatting instead of '%' and '.format()' string formatters. + +The convention of using f-strings on a string that is concatenated over serveral lines, +is to prefix only the lines containing the value needs to be interpeted. + +For example: + +**Good:** + +.. code-block:: python + + foo = "old_function" + bar = "new_function" + + my_warning_message = ( + f"Warning, {foo} is deprecated, " + "please use the new and way better " + f"{bar}" + ) + +**Bad:** + +.. code-block:: python + + foo = "old_function" + bar = "new_function" + + my_warning_message = ( + f"Warning, {foo} is deprecated, " + f"please use the new and way better " + f"{bar}" + ) + +White spaces +~~~~~~~~~~~~ + +Putting the white space only at the end of the previous line, so +there is no whitespace at the beggining of the concatenated string. + +For example: + +**Good:** + +.. code-block:: python + + example_string = ( + "Some long concatenated string, " + "with good placement of the " + "whitespaces" + ) + +**Bad:** + +.. code-block:: python + + example_string = ( + "Some long concatenated string," + " with bad placement of the" + " whitespaces" + ) + +Representation function (aka 'repr()') +-------------------------------------- + +*pandas* uses 'repr()' instead of '%r' and '!r'. + +The use of 'repr()' will only happend when the value is not an obvious string. + +For example: + +**Good:** + +.. code-block:: python + + value = str + f"Unknown recived value, got: {repr(value)}" + +**Good:** + +.. code-block:: python + + value = str + f"Unknown recived type, got: '{type(value).__name__}'" diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst new file mode 100644 index 00000000..c7a984c6 --- /dev/null +++ b/doc/source/development/contributing.rst @@ -0,0 +1,1506 @@ +.. _contributing: + +{{ header }} + +********************** +Contributing to pandas +********************** + +.. contents:: Table of contents: + :local: + +Where to start? +=============== + +All contributions, bug reports, bug fixes, documentation improvements, +enhancements, and ideas are welcome. + +If you are brand new to pandas or open-source development, we recommend going +through the `GitHub "issues" tab `_ +to find issues that interest you. There are a number of issues listed under `Docs +`_ +and `good first issue +`_ +where you could start out. Once you've found an interesting issue, you can +return here to get your development environment setup. + +When you start working on an issue, it's a good idea to assign the issue to yourself, +so nobody else duplicates the work on it. GitHub restricts assigning issues to maintainers +of the project only. In most projects, and until recently in pandas, contributors added a +comment letting others know they are working on an issue. While this is ok, you need to +check each issue individually, and it's not possible to find the unassigned ones. + +For this reason, we implemented a workaround consisting of adding a comment with the exact +text `take`. When you do it, a GitHub action will automatically assign you the issue +(this will take seconds, and may require refreshint the page to see it). +By doing this, it's possible to filter the list of issues and find only the unassigned ones. + +So, a good way to find an issue to start contributing to pandas is to check the list of +`unassigned good first issues `_ +and assign yourself one you like by writing a comment with the exact text `take`. + +If for whatever reason you are not able to continue working with the issue, please try to +unassign it, so other people know it's available again. You can check the list of +assigned issues, since people may not be working in them anymore. If you want to work on one +that is assigned, feel free to kindly ask the current assignee if you can take it +(please allow at least a week of inactivity before considering work in the issue discontinued). + +Feel free to ask questions on the `mailing list +`_ or on `Gitter`_. + +.. _contributing.bug_reports: + +Bug reports and enhancement requests +==================================== + +Bug reports are an important part of making *pandas* more stable. Having a complete bug report +will allow others to reproduce the bug and provide insight into fixing. See +`this stackoverflow article `_ and +`this blogpost `_ +for tips on writing a good bug report. + +Trying the bug-producing code out on the *master* branch is often a worthwhile exercise +to confirm the bug still exists. It is also worth searching existing bug reports and pull requests +to see if the issue has already been reported and/or fixed. + +Bug reports must: + +#. Include a short, self-contained Python snippet reproducing the problem. + You can format the code nicely by using `GitHub Flavored Markdown + `_:: + + ```python + >>> from pandas import DataFrame + >>> df = DataFrame(...) + ... + ``` + +#. Include the full version string of *pandas* and its dependencies. You can use the built-in function:: + + >>> import pandas as pd + >>> pd.show_versions() + +#. Explain why the current behavior is wrong/not desired and what you expect instead. + +The issue will then show up to the *pandas* community and be open to comments/ideas from others. + +.. _contributing.github: + +Working with the code +===================== + +Now that you have an issue you want to fix, enhancement to add, or documentation to improve, +you need to learn how to work with GitHub and the *pandas* code base. + +.. _contributing.version_control: + +Version control, Git, and GitHub +-------------------------------- + +To the new user, working with Git is one of the more daunting aspects of contributing to *pandas*. +It can very quickly become overwhelming, but sticking to the guidelines below will help keep the process +straightforward and mostly trouble free. As always, if you are having difficulties please +feel free to ask for help. + +The code is hosted on `GitHub `_. To +contribute you will need to sign up for a `free GitHub account +`_. We use `Git `_ for +version control to allow many people to work together on the project. + +Some great resources for learning Git: + +* the `GitHub help pages `_. +* the `NumPy's documentation `_. +* Matthew Brett's `Pydagogue `_. + +Getting started with Git +------------------------ + +`GitHub has instructions `__ for installing git, +setting up your SSH key, and configuring git. All these steps need to be completed before +you can work seamlessly between your local repository and GitHub. + +.. _contributing.forking: + +Forking +------- + +You will need your own fork to work on the code. Go to the `pandas project +page `_ and hit the ``Fork`` button. You will +want to clone your fork to your machine:: + + git clone https://github.com/your-user-name/pandas.git pandas-yourname + cd pandas-yourname + git remote add upstream https://github.com/pandas-dev/pandas.git + +This creates the directory `pandas-yourname` and connects your repository to +the upstream (main project) *pandas* repository. + +.. _contributing.dev_env: + +Creating a development environment +---------------------------------- + +To test out code changes, you'll need to build pandas from source, which +requires a C compiler and Python environment. If you're making documentation +changes, you can skip to :ref:`contributing.documentation` but you won't be able +to build the documentation locally before pushing your changes. + +.. _contributing.dev_c: + +Installing a C compiler +~~~~~~~~~~~~~~~~~~~~~~~ + +Pandas uses C extensions (mostly written using Cython) to speed up certain +operations. To install pandas from source, you need to compile these C +extensions, which means you need a C compiler. This process depends on which +platform you're using. + +**Windows** + +You will need `Build Tools for Visual Studio 2017 +`_. + +.. warning:: + You DO NOT need to install Visual Studio 2019. + You only need "Build Tools for Visual Studio 2019" found by + scrolling down to "All downloads" -> "Tools for Visual Studio 2019". + +**Mac OS** + +Information about compiler installation can be found here: +https://devguide.python.org/setup/#macos + +**Unix** + +Some Linux distributions will come with a pre-installed C compiler. To find out +which compilers (and versions) are installed on your system:: + + # for Debian/Ubuntu: + dpkg --list | grep compiler + # for Red Hat/RHEL/CentOS/Fedora: + yum list installed | grep -i --color compiler + +`GCC (GNU Compiler Collection) `_, is a widely used +compiler, which supports C and a number of other languages. If GCC is listed +as an installed compiler nothing more is required. If no C compiler is +installed (or you wish to install a newer version) you can install a compiler +(GCC in the example code below) with:: + + # for recent Debian/Ubuntu: + sudo apt install build-essential + # for Red Had/RHEL/CentOS/Fedora + yum groupinstall "Development Tools" + +For other Linux distributions, consult your favourite search engine for +compiler installation instructions. + +Let us know if you have any difficulties by opening an issue or reaching out on +`Gitter`_. + +.. _contributing.dev_python: + +Creating a Python environment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Now that you have a C compiler, create an isolated pandas development +environment: + +* Install either `Anaconda `_ or `miniconda + `_ +* Make sure your conda is up to date (``conda update conda``) +* Make sure that you have :ref:`cloned the repository ` +* ``cd`` to the *pandas* source directory + +We'll now kick off a three-step process: + +1. Install the build dependencies +2. Build and install pandas +3. Install the optional dependencies + +.. code-block:: none + + # Create and activate the build environment + conda env create -f environment.yml + conda activate pandas-dev + + # or with older versions of Anaconda: + source activate pandas-dev + + # Build and install pandas + python setup.py build_ext --inplace -j 4 + python -m pip install -e . --no-build-isolation --no-use-pep517 + +At this point you should be able to import pandas from your locally built version:: + + $ python # start an interpreter + >>> import pandas + >>> print(pandas.__version__) + 0.22.0.dev0+29.g4ad6d4d74 + +This will create the new environment, and not touch any of your existing environments, +nor any existing Python installation. + +To view your environments:: + + conda info -e + +To return to your root environment:: + + conda deactivate + +See the full conda docs `here `__. + +.. _contributing.pip: + +Creating a Python environment (pip) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you aren't using conda for your development environment, follow these instructions. +You'll need to have at least Python 3.6.1 installed on your system. + +**Unix**/**Mac OS** + +.. code-block:: bash + + # Create a virtual environment + # Use an ENV_DIR of your choice. We'll use ~/virtualenvs/pandas-dev + # Any parent directories should already exist + python3 -m venv ~/virtualenvs/pandas-dev + + # Activate the virtualenv + . ~/virtualenvs/pandas-dev/bin/activate + + # Install the build dependencies + python -m pip install -r requirements-dev.txt + + # Build and install pandas + python setup.py build_ext --inplace -j 0 + python -m pip install -e . --no-build-isolation --no-use-pep517 + +**Windows** + +Below is a brief overview on how to set-up a virtual environment with Powershell +under Windows. For details please refer to the +`official virtualenv user guide `__ + +Use an ENV_DIR of your choice. We'll use ~\virtualenvs\pandas-dev where +'~' is the folder pointed to by either $env:USERPROFILE (Powershell) or +%USERPROFILE% (cmd.exe) environment variable. Any parent directories +should already exist. + +.. code-block:: powershell + + # Create a virtual environment + python -m venv $env:USERPROFILE\virtualenvs\pandas-dev + + # Activate the virtualenv. Use activate.bat for cmd.exe + ~\virtualenvs\pandas-dev\Scripts\Activate.ps1 + + # Install the build dependencies + python -m pip install -r requirements-dev.txt + + # Build and install pandas + python setup.py build_ext --inplace -j 0 + python -m pip install -e . --no-build-isolation --no-use-pep517 + +Creating a branch +----------------- + +You want your master branch to reflect only production-ready code, so create a +feature branch for making your changes. For example:: + + git branch shiny-new-feature + git checkout shiny-new-feature + +The above can be simplified to:: + + git checkout -b shiny-new-feature + +This changes your working directory to the shiny-new-feature branch. Keep any +changes in this branch specific to one bug or feature so it is clear +what the branch brings to *pandas*. You can have many shiny-new-features +and switch in between them using the git checkout command. + +When creating this branch, make sure your master branch is up to date with +the latest upstream master version. To update your local master branch, you +can do:: + + git checkout master + git pull upstream master --ff-only + +When you want to update the feature branch with changes in master after +you created the branch, check the section on +:ref:`updating a PR `. + +.. _contributing.documentation: + +Contributing to the documentation +================================= + +Contributing to the documentation benefits everyone who uses *pandas*. +We encourage you to help us improve the documentation, and +you don't have to be an expert on *pandas* to do so! In fact, +there are sections of the docs that are worse off after being written by +experts. If something in the docs doesn't make sense to you, updating the +relevant section after you figure it out is a great way to ensure it will help +the next person. + +.. contents:: Documentation: + :local: + + +About the *pandas* documentation +-------------------------------- + +The documentation is written in **reStructuredText**, which is almost like writing +in plain English, and built using `Sphinx `__. The +Sphinx Documentation has an excellent `introduction to reST +`__. Review the Sphinx docs to perform more +complex changes to the documentation as well. + +Some other important things to know about the docs: + +* The *pandas* documentation consists of two parts: the docstrings in the code + itself and the docs in this folder ``doc/``. + + The docstrings provide a clear explanation of the usage of the individual + functions, while the documentation in this folder consists of tutorial-like + overviews per topic together with some other information (what's new, + installation, etc). + +* The docstrings follow a pandas convention, based on the **Numpy Docstring + Standard**. Follow the :ref:`pandas docstring guide ` for detailed + instructions on how to write a correct docstring. + + .. toctree:: + :maxdepth: 2 + + contributing_docstring.rst + +* The tutorials make heavy use of the `ipython directive + `_ sphinx extension. + This directive lets you put code in the documentation which will be run + during the doc build. For example:: + + .. ipython:: python + + x = 2 + x**3 + + will be rendered as:: + + In [1]: x = 2 + + In [2]: x**3 + Out[2]: 8 + + Almost all code examples in the docs are run (and the output saved) during the + doc build. This approach means that code examples will always be up to date, + but it does make the doc building a bit more complex. + +* Our API documentation in ``doc/source/api.rst`` houses the auto-generated + documentation from the docstrings. For classes, there are a few subtleties + around controlling which methods and attributes have pages auto-generated. + + We have two autosummary templates for classes. + + 1. ``_templates/autosummary/class.rst``. Use this when you want to + automatically generate a page for every public method and attribute on the + class. The ``Attributes`` and ``Methods`` sections will be automatically + added to the class' rendered documentation by numpydoc. See ``DataFrame`` + for an example. + + 2. ``_templates/autosummary/class_without_autosummary``. Use this when you + want to pick a subset of methods / attributes to auto-generate pages for. + When using this template, you should include an ``Attributes`` and + ``Methods`` section in the class docstring. See ``CategoricalIndex`` for an + example. + + Every method should be included in a ``toctree`` in ``api.rst``, else Sphinx + will emit a warning. + +.. note:: + + The ``.rst`` files are used to automatically generate Markdown and HTML versions + of the docs. For this reason, please do not edit ``CONTRIBUTING.md`` directly, + but instead make any changes to ``doc/source/development/contributing.rst``. Then, to + generate ``CONTRIBUTING.md``, use `pandoc `_ + with the following command:: + + pandoc doc/source/development/contributing.rst -t markdown_github > CONTRIBUTING.md + +The utility script ``scripts/validate_docstrings.py`` can be used to get a csv +summary of the API documentation. And also validate common errors in the docstring +of a specific class, function or method. The summary also compares the list of +methods documented in ``doc/source/api.rst`` (which is used to generate +the `API Reference `_ page) +and the actual public methods. +This will identify methods documented in ``doc/source/api.rst`` that are not actually +class methods, and existing methods that are not documented in ``doc/source/api.rst``. + + +Updating a *pandas* docstring +----------------------------- + +When improving a single function or method's docstring, it is not necessarily +needed to build the full documentation (see next section). +However, there is a script that checks a docstring (for example for the ``DataFrame.mean`` method):: + + python scripts/validate_docstrings.py pandas.DataFrame.mean + +This script will indicate some formatting errors if present, and will also +run and test the examples included in the docstring. +Check the :ref:`pandas docstring guide ` for a detailed guide +on how to format the docstring. + +The examples in the docstring ('doctests') must be valid Python code, +that in a deterministic way returns the presented output, and that can be +copied and run by users. This can be checked with the script above, and is +also tested on Travis. A failing doctest will be a blocker for merging a PR. +Check the :ref:`examples ` section in the docstring guide +for some tips and tricks to get the doctests passing. + +When doing a PR with a docstring update, it is good to post the +output of the validation script in a comment on github. + + +How to build the *pandas* documentation +--------------------------------------- + +Requirements +~~~~~~~~~~~~ + +First, you need to have a development environment to be able to build pandas +(see the docs on :ref:`creating a development environment above `). + +Building the documentation +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +So how do you build the docs? Navigate to your local +``doc/`` directory in the console and run:: + + python make.py html + +Then you can find the HTML output in the folder ``doc/build/html/``. + +The first time you build the docs, it will take quite a while because it has to run +all the code examples and build all the generated docstring pages. In subsequent +evocations, sphinx will try to only build the pages that have been modified. + +If you want to do a full clean build, do:: + + python make.py clean + python make.py html + +You can tell ``make.py`` to compile only a single section of the docs, greatly +reducing the turn-around time for checking your changes. + +:: + + # omit autosummary and API section + python make.py clean + python make.py --no-api + + # compile the docs with only a single section, relative to the "source" folder. + # For example, compiling only this guide (doc/source/development/contributing.rst) + python make.py clean + python make.py --single development/contributing.rst + + # compile the reference docs for a single function + python make.py clean + python make.py --single pandas.DataFrame.join + +For comparison, a full documentation build may take 15 minutes, but a single +section may take 15 seconds. Subsequent builds, which only process portions +you have changed, will be faster. + +You can also specify to use multiple cores to speed up the documentation build:: + + python make.py html --num-jobs 4 + +Open the following file in a web browser to see the full documentation you +just built:: + + doc/build/html/index.html + +And you'll have the satisfaction of seeing your new and improved documentation! + +.. _contributing.dev_docs: + +Building master branch documentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When pull requests are merged into the *pandas* ``master`` branch, the main parts of +the documentation are also built by Travis-CI. These docs are then hosted `here +`__, see also +the :ref:`Continuous Integration ` section. + +.. _contributing.code: + +Contributing to the code base +============================= + +.. contents:: Code Base: + :local: + +Code standards +-------------- + +Writing good code is not just about what you write. It is also about *how* you +write it. During :ref:`Continuous Integration ` testing, several +tools will be run to check your code for stylistic errors. +Generating any warnings will cause the test to fail. +Thus, good style is a requirement for submitting code to *pandas*. + +There is a tool in pandas to help contributors verify their changes before +contributing them to the project:: + + ./ci/code_checks.sh + +The script verifies the linting of code files, it looks for common mistake patterns +(like missing spaces around sphinx directives that make the documentation not +being rendered properly) and it also validates the doctests. It is possible to +run the checks independently by using the parameters ``lint``, ``patterns`` and +``doctests`` (e.g. ``./ci/code_checks.sh lint``). + +In addition, because a lot of people use our library, it is important that we +do not make sudden changes to the code that could have the potential to break +a lot of user code as a result, that is, we need it to be as *backwards compatible* +as possible to avoid mass breakages. + +Additional standards are outlined on the `pandas code style guide `_ + +Optional dependencies +--------------------- + +Optional dependencies (e.g. matplotlib) should be imported with the private helper +``pandas.compat._optional.import_optional_dependency``. This ensures a +consistent error message when the dependency is not met. + +All methods using an optional dependency should include a test asserting that an +``ImportError`` is raised when the optional dependency is not found. This test +should be skipped if the library is present. + +All optional dependencies should be documented in +:ref:`install.optional_dependencies` and the minimum required version should be +set in the ``pandas.compat._optional.VERSIONS`` dict. + +C (cpplint) +~~~~~~~~~~~ + +*pandas* uses the `Google `_ +standard. Google provides an open source style checker called ``cpplint``, but we +use a fork of it that can be found `here `__. +Here are *some* of the more common ``cpplint`` issues: + +* we restrict line-length to 80 characters to promote readability +* every header file must include a header guard to avoid name collisions if re-included + +:ref:`Continuous Integration ` will run the +`cpplint `_ tool +and report any stylistic errors in your code. Therefore, it is helpful before +submitting code to run the check yourself:: + + cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir modified-c-file + +You can also run this command on an entire directory if necessary:: + + cpplint --extensions=c,h --headers=h --filter=-readability/casting,-runtime/int,-build/include_subdir --recursive modified-c-directory + +To make your commits compliant with this standard, you can install the +`ClangFormat `_ tool, which can be +downloaded `here `__. To configure, in your home directory, +run the following command:: + + clang-format style=google -dump-config > .clang-format + +Then modify the file to ensure that any indentation width parameters are at least four. +Once configured, you can run the tool as follows:: + + clang-format modified-c-file + +This will output what your file will look like if the changes are made, and to apply +them, run the following command:: + + clang-format -i modified-c-file + +To run the tool on an entire directory, you can run the following analogous commands:: + + clang-format modified-c-directory/*.c modified-c-directory/*.h + clang-format -i modified-c-directory/*.c modified-c-directory/*.h + +Do note that this tool is best-effort, meaning that it will try to correct as +many errors as possible, but it may not correct *all* of them. Thus, it is +recommended that you run ``cpplint`` to double check and make any other style +fixes manually. + +Python (PEP8 / black) +~~~~~~~~~~~~~~~~~~~~~ + +*pandas* follows the `PEP8 `_ standard +and uses `Black `_ and +`Flake8 `_ to ensure a consistent code +format throughout the project. + +:ref:`Continuous Integration ` will run those tools and +report any stylistic errors in your code. Therefore, it is helpful before +submitting code to run the check yourself:: + + black pandas + git diff upstream/master -u -- "*.py" | flake8 --diff + +to auto-format your code. Additionally, many editors have plugins that will +apply ``black`` as you edit files. + +You should use a ``black`` version >= 19.10b0 as previous versions are not compatible +with the pandas codebase. + +Optionally, you may wish to setup `pre-commit hooks `_ +to automatically run ``black`` and ``flake8`` when you make a git commit. This +can be done by installing ``pre-commit``:: + + pip install pre-commit + +and then running:: + + pre-commit install + +from the root of the pandas repository. Now ``black`` and ``flake8`` will be run +each time you commit changes. You can skip these checks with +``git commit --no-verify``. + +One caveat about ``git diff upstream/master -u -- "*.py" | flake8 --diff``: this +command will catch any stylistic errors in your changes specifically, but +be beware it may not catch all of them. For example, if you delete the only +usage of an imported function, it is stylistically incorrect to import an +unused function. However, style-checking the diff will not catch this because +the actual import is not part of the diff. Thus, for completeness, you should +run this command, though it will take longer:: + + git diff upstream/master --name-only -- "*.py" | xargs -r flake8 + +Note that on OSX, the ``-r`` flag is not available, so you have to omit it and +run this slightly modified command:: + + git diff upstream/master --name-only -- "*.py" | xargs flake8 + +Windows does not support the ``xargs`` command (unless installed for example +via the `MinGW `__ toolchain), but one can imitate the +behaviour as follows:: + + for /f %i in ('git diff upstream/master --name-only -- "*.py"') do flake8 %i + +This will get all the files being changed by the PR (and ending with ``.py``), +and run ``flake8`` on them, one after the other. + +.. _contributing.import-formatting: + +Import formatting +~~~~~~~~~~~~~~~~~ +*pandas* uses `isort `__ to standardise import +formatting across the codebase. + +A guide to import layout as per pep8 can be found `here `__. + +A summary of our current import sections ( in order ): + +* Future +* Python Standard Library +* Third Party +* ``pandas._libs``, ``pandas.compat``, ``pandas.util._*``, ``pandas.errors`` (largely not dependent on ``pandas.core``) +* ``pandas.core.dtypes`` (largely not dependent on the rest of ``pandas.core``) +* Rest of ``pandas.core.*`` +* Non-core ``pandas.io``, ``pandas.plotting``, ``pandas.tseries`` +* Local application/library specific imports + +Imports are alphabetically sorted within these sections. + + +As part of :ref:`Continuous Integration ` checks we run:: + + isort --recursive --check-only pandas + +to check that imports are correctly formatted as per the `setup.cfg`. + +If you see output like the below in :ref:`Continuous Integration ` checks: + +.. code-block:: shell + + Check import format using isort + ERROR: /home/travis/build/pandas-dev/pandas/pandas/io/pytables.py Imports are incorrectly sorted + Check import format using isort DONE + The command "ci/code_checks.sh" exited with 1 + +You should run:: + + isort pandas/io/pytables.py + +to automatically format imports correctly. This will modify your local copy of the files. + +The `--recursive` flag can be passed to sort all files in a directory. + +You can then verify the changes look ok, then git :ref:`commit ` and :ref:`push `. + +Backwards compatibility +~~~~~~~~~~~~~~~~~~~~~~~ + +Please try to maintain backward compatibility. *pandas* has lots of users with lots of +existing code, so don't break it if at all possible. If you think breakage is required, +clearly state why as part of the pull request. Also, be careful when changing method +signatures and add deprecation warnings where needed. Also, add the deprecated sphinx +directive to the deprecated functions or methods. + +If a function with the same arguments as the one being deprecated exist, you can use +the ``pandas.util._decorators.deprecate``: + +.. code-block:: python + + from pandas.util._decorators import deprecate + + deprecate('old_func', 'new_func', '0.21.0') + +Otherwise, you need to do it manually: + +.. code-block:: python + + import warnings + + + def old_func(): + """Summary of the function. + + .. deprecated:: 0.21.0 + Use new_func instead. + """ + warnings.warn('Use new_func instead.', FutureWarning, stacklevel=2) + new_func() + + + def new_func(): + pass + +You'll also need to + +1. Write a new test that asserts a warning is issued when calling with the deprecated argument +2. Update all of pandas existing tests and code to use the new argument + +See :ref:`contributing.warnings` for more. + +.. _contributing.type_hints: + +Type Hints +---------- + +*pandas* strongly encourages the use of :pep:`484` style type hints. New development should contain type hints and pull requests to annotate existing code are accepted as well! + +Style Guidelines +~~~~~~~~~~~~~~~~ + +Types imports should follow the ``from typing import ...`` convention. So rather than + +.. code-block:: python + + import typing + + primes: typing.List[int] = [] + +You should write + +.. code-block:: python + + from typing import List, Optional, Union + + primes: List[int] = [] + +``Optional`` should be used where applicable, so instead of + +.. code-block:: python + + maybe_primes: List[Union[int, None]] = [] + +You should write + +.. code-block:: python + + maybe_primes: List[Optional[int]] = [] + +In some cases in the code base classes may define class variables that shadow builtins. This causes an issue as described in `Mypy 1775 `_. The defensive solution here is to create an unambiguous alias of the builtin and use that without your annotation. For example, if you come across a definition like + +.. code-block:: python + + class SomeClass1: + str = None + +The appropriate way to annotate this would be as follows + +.. code-block:: python + + str_type = str + + class SomeClass2: + str: str_type = None + +In some cases you may be tempted to use ``cast`` from the typing module when you know better than the analyzer. This occurs particularly when using custom inference functions. For example + +.. code-block:: python + + from typing import cast + + from pandas.core.dtypes.common import is_number + + def cannot_infer_bad(obj: Union[str, int, float]): + + if is_number(obj): + ... + else: # Reasonably only str objects would reach this but... + obj = cast(str, obj) # Mypy complains without this! + return obj.upper() + +The limitation here is that while a human can reasonably understand that ``is_number`` would catch the ``int`` and ``float`` types mypy cannot make that same inference just yet (see `mypy #5206 `_. While the above works, the use of ``cast`` is **strongly discouraged**. Where applicable a refactor of the code to appease static analysis is preferable + +.. code-block:: python + + def cannot_infer_good(obj: Union[str, int, float]): + + if isinstance(obj, str): + return obj.upper() + else: + ... + +With custom types and inference this is not always possible so exceptions are made, but every effort should be exhausted to avoid ``cast`` before going down such paths. + +Pandas-specific Types +~~~~~~~~~~~~~~~~~~~~~ + +Commonly used types specific to *pandas* will appear in `pandas._typing `_ and you should use these where applicable. This module is private for now but ultimately this should be exposed to third party libraries who want to implement type checking against pandas. + +For example, quite a few functions in *pandas* accept a ``dtype`` argument. This can be expressed as a string like ``"object"``, a ``numpy.dtype`` like ``np.int64`` or even a pandas ``ExtensionDtype`` like ``pd.CategoricalDtype``. Rather than burden the user with having to constantly annotate all of those options, this can simply be imported and reused from the pandas._typing module + +.. code-block:: python + + from pandas._typing import Dtype + + def as_type(dtype: Dtype) -> ...: + ... + +This module will ultimately house types for repeatedly used concepts like "path-like", "array-like", "numeric", etc... and can also hold aliases for commonly appearing parameters like `axis`. Development of this module is active so be sure to refer to the source for the most up to date list of available types. + +Validating Type Hints +~~~~~~~~~~~~~~~~~~~~~ + +*pandas* uses `mypy `_ to statically analyze the code base and type hints. After making any change you can ensure your type hints are correct by running + +.. code-block:: shell + + mypy pandas + +.. _contributing.ci: + +Testing with continuous integration +----------------------------------- + +The *pandas* test suite will run automatically on `Travis-CI `__ and +`Azure Pipelines `__ +continuous integration services, once your pull request is submitted. +However, if you wish to run the test suite on a branch prior to submitting the pull request, +then the continuous integration services need to be hooked to your GitHub repository. Instructions are here +for `Travis-CI `__ and +`Azure Pipelines `__. + +A pull-request will be considered for merging when you have an all 'green' build. If any tests are failing, +then you will get a red 'X', where you can click through to see the individual failed tests. +This is an example of a green build. + +.. image:: ../_static/ci.png + +.. note:: + + Each time you push to *your* fork, a *new* run of the tests will be triggered on the CI. + You can enable the auto-cancel feature, which removes any non-currently-running tests for that same pull-request, for + `Travis-CI here `__. + +.. _contributing.tdd: + + +Test-driven development/code writing +------------------------------------ + +*pandas* is serious about testing and strongly encourages contributors to embrace +`test-driven development (TDD) `_. +This development process "relies on the repetition of a very short development cycle: +first the developer writes an (initially failing) automated test case that defines a desired +improvement or new function, then produces the minimum amount of code to pass that test." +So, before actually writing any code, you should write your tests. Often the test can be +taken from the original GitHub issue. However, it is always worth considering additional +use cases and writing corresponding tests. + +Adding tests is one of the most common requests after code is pushed to *pandas*. Therefore, +it is worth getting in the habit of writing tests ahead of time so this is never an issue. + +Like many packages, *pandas* uses `pytest +`_ and the convenient +extensions in `numpy.testing +`_. + +.. note:: + + The earliest supported pytest version is 5.0.1. + +Writing tests +~~~~~~~~~~~~~ + +All tests should go into the ``tests`` subdirectory of the specific package. +This folder contains many current examples of tests, and we suggest looking to these for +inspiration. If your test requires working with files or +network connectivity, there is more information on the `testing page +`_ of the wiki. + +The ``pandas._testing`` module has many special ``assert`` functions that +make it easier to make statements about whether Series or DataFrame objects are +equivalent. The easiest way to verify that your code is correct is to +explicitly construct the result you expect, then compare the actual result to +the expected correct result:: + + def test_pivot(self): + data = { + 'index' : ['A', 'B', 'C', 'C', 'B', 'A'], + 'columns' : ['One', 'One', 'One', 'Two', 'Two', 'Two'], + 'values' : [1., 2., 3., 3., 2., 1.] + } + + frame = DataFrame(data) + pivoted = frame.pivot(index='index', columns='columns', values='values') + + expected = DataFrame({ + 'One' : {'A' : 1., 'B' : 2., 'C' : 3.}, + 'Two' : {'A' : 1., 'B' : 2., 'C' : 3.} + }) + + assert_frame_equal(pivoted, expected) + +Please remember to add the Github Issue Number as a comment to a new test. +E.g. "# brief comment, see GH#28907" + +Transitioning to ``pytest`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +*pandas* existing test structure is *mostly* class-based, meaning that you will typically find tests wrapped in a class. + +.. code-block:: python + + class TestReallyCoolFeature: + pass + +Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer testing +framework that will facilitate testing and developing. Thus, instead of writing test classes, we will write test functions like this: + +.. code-block:: python + + def test_really_cool_feature(): + pass + +Using ``pytest`` +~~~~~~~~~~~~~~~~ + +Here is an example of a self-contained set of tests that illustrate multiple features that we like to use. + +* functional style: tests are like ``test_*`` and *only* take arguments that are either fixtures or parameters +* ``pytest.mark`` can be used to set metadata on test functions, e.g. ``skip`` or ``xfail``. +* using ``parametrize``: allow testing of multiple cases +* to set a mark on a parameter, ``pytest.param(..., marks=...)`` syntax should be used +* ``fixture``, code for object construction, on a per-test basis +* using bare ``assert`` for scalars and truth-testing +* ``tm.assert_series_equal`` (and its counter part ``tm.assert_frame_equal``), for pandas object comparisons. +* the typical pattern of constructing an ``expected`` and comparing versus the ``result`` + +We would name this file ``test_cool_feature.py`` and put in an appropriate place in the ``pandas/tests/`` structure. + +.. code-block:: python + + import pytest + import numpy as np + import pandas as pd + + + @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64']) + def test_dtypes(dtype): + assert str(np.dtype(dtype)) == dtype + + + @pytest.mark.parametrize( + 'dtype', ['float32', pytest.param('int16', marks=pytest.mark.skip), + pytest.param('int32', marks=pytest.mark.xfail( + reason='to show how it works'))]) + def test_mark(dtype): + assert str(np.dtype(dtype)) == 'float32' + + + @pytest.fixture + def series(): + return pd.Series([1, 2, 3]) + + + @pytest.fixture(params=['int8', 'int16', 'int32', 'int64']) + def dtype(request): + return request.param + + + def test_series(series, dtype): + result = series.astype(dtype) + assert result.dtype == dtype + + expected = pd.Series([1, 2, 3], dtype=dtype) + tm.assert_series_equal(result, expected) + + +A test run of this yields + +.. code-block:: shell + + ((pandas) bash-3.2$ pytest test_cool_feature.py -v + =========================== test session starts =========================== + platform darwin -- Python 3.6.2, pytest-3.6.0, py-1.4.31, pluggy-0.4.0 + collected 11 items + + tester.py::test_dtypes[int8] PASSED + tester.py::test_dtypes[int16] PASSED + tester.py::test_dtypes[int32] PASSED + tester.py::test_dtypes[int64] PASSED + tester.py::test_mark[float32] PASSED + tester.py::test_mark[int16] SKIPPED + tester.py::test_mark[int32] xfail + tester.py::test_series[int8] PASSED + tester.py::test_series[int16] PASSED + tester.py::test_series[int32] PASSED + tester.py::test_series[int64] PASSED + +Tests that we have ``parametrized`` are now accessible via the test name, for example we could run these with ``-k int8`` to sub-select *only* those tests which match ``int8``. + + +.. code-block:: shell + + ((pandas) bash-3.2$ pytest test_cool_feature.py -v -k int8 + =========================== test session starts =========================== + platform darwin -- Python 3.6.2, pytest-3.6.0, py-1.4.31, pluggy-0.4.0 + collected 11 items + + test_cool_feature.py::test_dtypes[int8] PASSED + test_cool_feature.py::test_series[int8] PASSED + + +.. _using-hypothesis: + +Using ``hypothesis`` +~~~~~~~~~~~~~~~~~~~~ + +Hypothesis is a library for property-based testing. Instead of explicitly +parametrizing a test, you can describe *all* valid inputs and let Hypothesis +try to find a failing input. Even better, no matter how many random examples +it tries, Hypothesis always reports a single minimal counterexample to your +assertions - often an example that you would never have thought to test. + +See `Getting Started with Hypothesis `_ +for more of an introduction, then `refer to the Hypothesis documentation +for details `_. + +.. code-block:: python + + import json + from hypothesis import given, strategies as st + + any_json_value = st.deferred(lambda: st.one_of( + st.none(), st.booleans(), st.floats(allow_nan=False), st.text(), + st.lists(any_json_value), st.dictionaries(st.text(), any_json_value) + )) + + + @given(value=any_json_value) + def test_json_roundtrip(value): + result = json.loads(json.dumps(value)) + assert value == result + +This test shows off several useful features of Hypothesis, as well as +demonstrating a good use-case: checking properties that should hold over +a large or complicated domain of inputs. + +To keep the Pandas test suite running quickly, parametrized tests are +preferred if the inputs or logic are simple, with Hypothesis tests reserved +for cases with complex logic or where there are too many combinations of +options or subtle interactions to test (or think of!) all of them. + +.. _contributing.warnings: + +Testing warnings +~~~~~~~~~~~~~~~~ + +By default, one of pandas CI workers will fail if any unhandled warnings are emitted. + +If your change involves checking that a warning is actually emitted, use +``tm.assert_produces_warning(ExpectedWarning)``. + + +.. code-block:: python + + import pandas._testing as tm + + + df = pd.DataFrame() + with tm.assert_produces_warning(FutureWarning): + df.some_operation() + +We prefer this to the ``pytest.warns`` context manager because ours checks that the warning's +stacklevel is set correctly. The stacklevel is what ensure the *user's* file name and line number +is printed in the warning, rather than something internal to pandas. It represents the number of +function calls from user code (e.g. ``df.some_operation()``) to the function that actually emits +the warning. Our linter will fail the build if you use ``pytest.warns`` in a test. + +If you have a test that would emit a warning, but you aren't actually testing the +warning itself (say because it's going to be removed in the future, or because we're +matching a 3rd-party library's behavior), then use ``pytest.mark.filterwarnings`` to +ignore the error. + +.. code-block:: python + + @pytest.mark.filterwarnings("ignore:msg:category") + def test_thing(self): + ... + +If the test generates a warning of class ``category`` whose message starts +with ``msg``, the warning will be ignored and the test will pass. + +If you need finer-grained control, you can use Python's usual +`warnings module `__ +to control whether a warning is ignored / raised at different places within +a single test. + +.. code-block:: python + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + # Or use warnings.filterwarnings(...) + +Alternatively, consider breaking up the unit test. + + +Running the test suite +---------------------- + +The tests can then be run directly inside your Git clone (without having to +install *pandas*) by typing:: + + pytest pandas + +The tests suite is exhaustive and takes around 20 minutes to run. Often it is +worth running only a subset of tests first around your changes before running the +entire suite. + +The easiest way to do this is with:: + + pytest pandas/path/to/test.py -k regex_matching_test_name + +Or with one of the following constructs:: + + pytest pandas/tests/[test-module].py + pytest pandas/tests/[test-module].py::[TestClass] + pytest pandas/tests/[test-module].py::[TestClass]::[test_method] + +Using `pytest-xdist `_, one can +speed up local testing on multicore machines. To use this feature, you will +need to install `pytest-xdist` via:: + + pip install pytest-xdist + +Two scripts are provided to assist with this. These scripts distribute +testing across 4 threads. + +On Unix variants, one can type:: + + test_fast.sh + +On Windows, one can type:: + + test_fast.bat + +This can significantly reduce the time it takes to locally run tests before +submitting a pull request. + +For more, see the `pytest `_ documentation. + +Furthermore one can run + +.. code-block:: python + + pd.test() + +with an imported pandas to run tests similarly. + +Running the performance test suite +---------------------------------- + +Performance matters and it is worth considering whether your code has introduced +performance regressions. *pandas* is in the process of migrating to +`asv benchmarks `__ +to enable easy monitoring of the performance of critical *pandas* operations. +These benchmarks are all found in the ``pandas/asv_bench`` directory. asv +supports both python2 and python3. + +To use all features of asv, you will need either ``conda`` or +``virtualenv``. For more details please check the `asv installation +webpage `_. + +To install asv:: + + pip install git+https://github.com/spacetelescope/asv + +If you need to run a benchmark, change your directory to ``asv_bench/`` and run:: + + asv continuous -f 1.1 upstream/master HEAD + +You can replace ``HEAD`` with the name of the branch you are working on, +and report benchmarks that changed by more than 10%. +The command uses ``conda`` by default for creating the benchmark +environments. If you want to use virtualenv instead, write:: + + asv continuous -f 1.1 -E virtualenv upstream/master HEAD + +The ``-E virtualenv`` option should be added to all ``asv`` commands +that run benchmarks. The default value is defined in ``asv.conf.json``. + +Running the full test suite can take up to one hour and use up to 3GB of RAM. +Usually it is sufficient to paste only a subset of the results into the pull +request to show that the committed changes do not cause unexpected performance +regressions. You can run specific benchmarks using the ``-b`` flag, which +takes a regular expression. For example, this will only run tests from a +``pandas/asv_bench/benchmarks/groupby.py`` file:: + + asv continuous -f 1.1 upstream/master HEAD -b ^groupby + +If you want to only run a specific group of tests from a file, you can do it +using ``.`` as a separator. For example:: + + asv continuous -f 1.1 upstream/master HEAD -b groupby.GroupByMethods + +will only run the ``GroupByMethods`` benchmark defined in ``groupby.py``. + +You can also run the benchmark suite using the version of ``pandas`` +already installed in your current Python environment. This can be +useful if you do not have virtualenv or conda, or are using the +``setup.py develop`` approach discussed above; for the in-place build +you need to set ``PYTHONPATH``, e.g. +``PYTHONPATH="$PWD/.." asv [remaining arguments]``. +You can run benchmarks using an existing Python +environment by:: + + asv run -e -E existing + +or, to use a specific Python interpreter,:: + + asv run -e -E existing:python3.6 + +This will display stderr from the benchmarks, and use your local +``python`` that comes from your ``$PATH``. + +Information on how to write a benchmark and how to use asv can be found in the +`asv documentation `_. + +Documenting your code +--------------------- + +Changes should be reflected in the release notes located in ``doc/source/whatsnew/vx.y.z.rst``. +This file contains an ongoing change log for each release. Add an entry to this file to +document your fix, enhancement or (unavoidable) breaking change. Make sure to include the +GitHub issue number when adding your entry (using ``:issue:`1234``` where ``1234`` is the +issue/pull request number). + +If your code is an enhancement, it is most likely necessary to add usage +examples to the existing documentation. This can be done following the section +regarding documentation :ref:`above `. +Further, to let users know when this feature was added, the ``versionadded`` +directive is used. The sphinx syntax for that is: + +.. code-block:: rst + + .. versionadded:: 0.21.0 + +This will put the text *New in version 0.21.0* wherever you put the sphinx +directive. This should also be put in the docstring when adding a new function +or method (`example `__) +or a new keyword argument (`example `__). + +Contributing your changes to *pandas* +===================================== + +.. _contributing.commit-code: + +Committing your code +-------------------- + +Keep style fixes to a separate commit to make your pull request more readable. + +Once you've made changes, you can see them by typing:: + + git status + +If you have created a new file, it is not being tracked by git. Add it by typing:: + + git add path/to/file-to-be-added.py + +Doing 'git status' again should give something like:: + + # On branch shiny-new-feature + # + # modified: /relative/path/to/file-you-added.py + # + +Finally, commit your changes to your local repository with an explanatory message. *Pandas* +uses a convention for commit message prefixes and layout. Here are +some common prefixes along with general guidelines for when to use them: + +* ENH: Enhancement, new functionality +* BUG: Bug fix +* DOC: Additions/updates to documentation +* TST: Additions/updates to tests +* BLD: Updates to the build process/scripts +* PERF: Performance improvement +* TYP: Type annotations +* CLN: Code cleanup + +The following defines how a commit message should be structured. Please reference the +relevant GitHub issues in your commit message using GH1234 or #1234. Either style +is fine, but the former is generally preferred: + +* a subject line with `< 80` chars. +* One blank line. +* Optionally, a commit message body. + +Now you can commit your changes in your local repository:: + + git commit -m + +.. _contributing.push-code: + +Pushing your changes +-------------------- + +When you want your changes to appear publicly on your GitHub page, push your +forked feature branch's commits:: + + git push origin shiny-new-feature + +Here ``origin`` is the default name given to your remote repository on GitHub. +You can see the remote repositories:: + + git remote -v + +If you added the upstream repository as described above you will see something +like:: + + origin git@github.com:yourname/pandas.git (fetch) + origin git@github.com:yourname/pandas.git (push) + upstream git://github.com/pandas-dev/pandas.git (fetch) + upstream git://github.com/pandas-dev/pandas.git (push) + +Now your code is on GitHub, but it is not yet a part of the *pandas* project. For that to +happen, a pull request needs to be submitted on GitHub. + +Review your code +---------------- + +When you're ready to ask for a code review, file a pull request. Before you do, once +again make sure that you have followed all the guidelines outlined in this document +regarding code style, tests, performance tests, and documentation. You should also +double check your branch changes against the branch it was based on: + +#. Navigate to your repository on GitHub -- https://github.com/your-user-name/pandas +#. Click on ``Branches`` +#. Click on the ``Compare`` button for your feature branch +#. Select the ``base`` and ``compare`` branches, if necessary. This will be ``master`` and + ``shiny-new-feature``, respectively. + +Finally, make the pull request +------------------------------ + +If everything looks good, you are ready to make a pull request. A pull request is how +code from a local repository becomes available to the GitHub community and can be looked +at and eventually merged into the master version. This pull request and its associated +changes will eventually be committed to the master branch and available in the next +release. To submit a pull request: + +#. Navigate to your repository on GitHub +#. Click on the ``Pull Request`` button +#. You can then click on ``Commits`` and ``Files Changed`` to make sure everything looks + okay one last time +#. Write a description of your changes in the ``Preview Discussion`` tab +#. Click ``Send Pull Request``. + +This request then goes to the repository maintainers, and they will review +the code. + +.. _contributing.update-pr: + +Updating your pull request +-------------------------- + +Based on the review you get on your pull request, you will probably need to make +some changes to the code. In that case, you can make them in your branch, +add a new commit to that branch, push it to GitHub, and the pull request will be +automatically updated. Pushing them to GitHub again is done by:: + + git push origin shiny-new-feature + +This will automatically update your pull request with the latest code and restart the +:ref:`Continuous Integration ` tests. + +Another reason you might need to update your pull request is to solve conflicts +with changes that have been merged into the master branch since you opened your +pull request. + +To do this, you need to "merge upstream master" in your branch:: + + git checkout shiny-new-feature + git fetch upstream + git merge upstream/master + +If there are no conflicts (or they could be fixed automatically), a file with a +default commit message will open, and you can simply save and quit this file. + +If there are merge conflicts, you need to solve those conflicts. See for +example at https://help.github.com/articles/resolving-a-merge-conflict-using-the-command-line/ +for an explanation on how to do this. +Once the conflicts are merged and the files where the conflicts were solved are +added, you can run ``git commit`` to save those fixes. + +If you have uncommitted changes at the moment you want to update the branch with +master, you will need to ``stash`` them prior to updating (see the +`stash docs `__). +This will effectively store your changes and they can be reapplied after updating. + +After the feature branch has been update locally, you can now update your pull +request by pushing to the branch on GitHub:: + + git push origin shiny-new-feature + +Delete your merged branch (optional) +------------------------------------ + +Once your feature branch is accepted into upstream, you'll probably want to get rid of +the branch. First, merge upstream master into your branch so git knows it is safe to +delete your branch:: + + git fetch upstream + git checkout master + git merge upstream/master + +Then you can do:: + + git branch -d shiny-new-feature + +Make sure you use a lower-case ``-d``, or else git won't warn you if your feature +branch has not actually been merged. + +The branch will still exist on GitHub, so to delete it there do:: + + git push origin --delete shiny-new-feature + +.. _Gitter: https://gitter.im/pydata/pandas diff --git a/doc/source/development/contributing_docstring.rst b/doc/source/development/contributing_docstring.rst new file mode 100644 index 00000000..cb32f0e1 --- /dev/null +++ b/doc/source/development/contributing_docstring.rst @@ -0,0 +1,1006 @@ +.. _docstring: + +{{ header }} + +====================== +pandas docstring guide +====================== + +About docstrings and standards +------------------------------ + +A Python docstring is a string used to document a Python module, class, +function or method, so programmers can understand what it does without having +to read the details of the implementation. + +Also, it is a common practice to generate online (html) documentation +automatically from docstrings. `Sphinx `_ serves +this purpose. + +Next example gives an idea on how a docstring looks like: + +.. code-block:: python + + def add(num1, num2): + """ + Add up two integer numbers. + + This function simply wraps the `+` operator, and does not + do anything interesting, except for illustrating what is + the docstring of a very simple function. + + Parameters + ---------- + num1 : int + First number to add + num2 : int + Second number to add + + Returns + ------- + int + The sum of `num1` and `num2` + + See Also + -------- + subtract : Subtract one integer from another + + Examples + -------- + >>> add(2, 2) + 4 + >>> add(25, 0) + 25 + >>> add(10, -10) + 0 + """ + return num1 + num2 + +Some standards exist about docstrings, so they are easier to read, and they can +be exported to other formats such as html or pdf. + +The first conventions every Python docstring should follow are defined in +`PEP-257 `_. + +As PEP-257 is quite open, and some other standards exist on top of it. In the +case of pandas, the numpy docstring convention is followed. The conventions is +explained in this document: + +* `numpydoc docstring guide `_ + (which is based in the original `Guide to NumPy/SciPy documentation + `_) + +numpydoc is a Sphinx extension to support the numpy docstring convention. + +The standard uses reStructuredText (reST). reStructuredText is a markup +language that allows encoding styles in plain text files. Documentation +about reStructuredText can be found in: + +* `Sphinx reStructuredText primer `_ +* `Quick reStructuredText reference `_ +* `Full reStructuredText specification `_ + +Pandas has some helpers for sharing docstrings between related classes, see +:ref:`docstring.sharing`. + +The rest of this document will summarize all the above guides, and will +provide additional convention specific to the pandas project. + +.. _docstring.tutorial: + +Writing a docstring +------------------- + +.. _docstring.general: + +General rules +~~~~~~~~~~~~~ + +Docstrings must be defined with three double-quotes. No blank lines should be +left before or after the docstring. The text starts in the next line after the +opening quotes. The closing quotes have their own line +(meaning that they are not at the end of the last sentence). + +In rare occasions reST styles like bold text or italics will be used in +docstrings, but is it common to have inline code, which is presented between +backticks. It is considered inline code: + +* The name of a parameter +* Python code, a module, function, built-in, type, literal... (e.g. ``os``, + ``list``, ``numpy.abs``, ``datetime.date``, ``True``) +* A pandas class (in the form ``:class:`pandas.Series```) +* A pandas method (in the form ``:meth:`pandas.Series.sum```) +* A pandas function (in the form ``:func:`pandas.to_datetime```) + +.. note:: + To display only the last component of the linked class, method or + function, prefix it with ``~``. For example, ``:class:`~pandas.Series``` + will link to ``pandas.Series`` but only display the last part, ``Series`` + as the link text. See `Sphinx cross-referencing syntax + `_ + for details. + +**Good:** + +.. code-block:: python + + def add_values(arr): + """ + Add the values in `arr`. + + This is equivalent to Python `sum` of :meth:`pandas.Series.sum`. + + Some sections are omitted here for simplicity. + """ + return sum(arr) + +**Bad:** + +.. code-block:: python + + def func(): + + """Some function. + + With several mistakes in the docstring. + + It has a blank like after the signature `def func():`. + + The text 'Some function' should go in the line after the + opening quotes of the docstring, not in the same line. + + There is a blank line between the docstring and the first line + of code `foo = 1`. + + The closing quotes should be in the next line, not in this one.""" + + foo = 1 + bar = 2 + return foo + bar + +.. _docstring.short_summary: + +Section 1: Short summary +~~~~~~~~~~~~~~~~~~~~~~~~ + +The short summary is a single sentence that expresses what the function does in +a concise way. + +The short summary must start with a capital letter, end with a dot, and fit in +a single line. It needs to express what the object does without providing +details. For functions and methods, the short summary must start with an +infinitive verb. + +**Good:** + +.. code-block:: python + + def astype(dtype): + """ + Cast Series type. + + This section will provide further details. + """ + pass + +**Bad:** + +.. code-block:: python + + def astype(dtype): + """ + Casts Series type. + + Verb in third-person of the present simple, should be infinitive. + """ + pass + +.. code-block:: python + + def astype(dtype): + """ + Method to cast Series type. + + Does not start with verb. + """ + pass + +.. code-block:: python + + def astype(dtype): + """ + Cast Series type + + Missing dot at the end. + """ + pass + +.. code-block:: python + + def astype(dtype): + """ + Cast Series type from its current type to the new type defined in + the parameter dtype. + + Summary is too verbose and doesn't fit in a single line. + """ + pass + +.. _docstring.extended_summary: + +Section 2: Extended summary +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The extended summary provides details on what the function does. It should not +go into the details of the parameters, or discuss implementation notes, which +go in other sections. + +A blank line is left between the short summary and the extended summary. And +every paragraph in the extended summary is finished by a dot. + +The extended summary should provide details on why the function is useful and +their use cases, if it is not too generic. + +.. code-block:: python + + def unstack(): + """ + Pivot a row index to columns. + + When using a MultiIndex, a level can be pivoted so each value in + the index becomes a column. This is especially useful when a subindex + is repeated for the main index, and data is easier to visualize as a + pivot table. + + The index level will be automatically removed from the index when added + as columns. + """ + pass + +.. _docstring.parameters: + +Section 3: Parameters +~~~~~~~~~~~~~~~~~~~~~ + +The details of the parameters will be added in this section. This section has +the title "Parameters", followed by a line with a hyphen under each letter of +the word "Parameters". A blank line is left before the section title, but not +after, and not between the line with the word "Parameters" and the one with +the hyphens. + +After the title, each parameter in the signature must be documented, including +`*args` and `**kwargs`, but not `self`. + +The parameters are defined by their name, followed by a space, a colon, another +space, and the type (or types). Note that the space between the name and the +colon is important. Types are not defined for `*args` and `**kwargs`, but must +be defined for all other parameters. After the parameter definition, it is +required to have a line with the parameter description, which is indented, and +can have multiple lines. The description must start with a capital letter, and +finish with a dot. + +For keyword arguments with a default value, the default will be listed after a +comma at the end of the type. The exact form of the type in this case will be +"int, default 0". In some cases it may be useful to explain what the default +argument means, which can be added after a comma "int, default -1, meaning all +cpus". + +In cases where the default value is `None`, meaning that the value will not be +used. Instead of "str, default None", it is preferred to write "str, optional". +When `None` is a value being used, we will keep the form "str, default None". +For example, in `df.to_csv(compression=None)`, `None` is not a value being used, +but means that compression is optional, and no compression is being used if not +provided. In this case we will use `str, optional`. Only in cases like +`func(value=None)` and `None` is being used in the same way as `0` or `foo` +would be used, then we will specify "str, int or None, default None". + +**Good:** + +.. code-block:: python + + class Series: + def plot(self, kind, color='blue', **kwargs): + """ + Generate a plot. + + Render the data in the Series as a matplotlib plot of the + specified kind. + + Parameters + ---------- + kind : str + Kind of matplotlib plot. + color : str, default 'blue' + Color name or rgb code. + **kwargs + These parameters will be passed to the matplotlib plotting + function. + """ + pass + +**Bad:** + +.. code-block:: python + + class Series: + def plot(self, kind, **kwargs): + """ + Generate a plot. + + Render the data in the Series as a matplotlib plot of the + specified kind. + + Note the blank line between the parameters title and the first + parameter. Also, note that after the name of the parameter `kind` + and before the colon, a space is missing. + + Also, note that the parameter descriptions do not start with a + capital letter, and do not finish with a dot. + + Finally, the `**kwargs` parameter is missing. + + Parameters + ---------- + + kind: str + kind of matplotlib plot + """ + pass + +.. _docstring.parameter_types: + +Parameter types +^^^^^^^^^^^^^^^ + +When specifying the parameter types, Python built-in data types can be used +directly (the Python type is preferred to the more verbose string, integer, +boolean, etc): + +* int +* float +* str +* bool + +For complex types, define the subtypes. For `dict` and `tuple`, as more than +one type is present, we use the brackets to help read the type (curly brackets +for `dict` and normal brackets for `tuple`): + +* list of int +* dict of {str : int} +* tuple of (str, int, int) +* tuple of (str,) +* set of str + +In case where there are just a set of values allowed, list them in curly +brackets and separated by commas (followed by a space). If the values are +ordinal and they have an order, list them in this order. Otherwise, list +the default value first, if there is one: + +* {0, 10, 25} +* {'simple', 'advanced'} +* {'low', 'medium', 'high'} +* {'cat', 'dog', 'bird'} + +If the type is defined in a Python module, the module must be specified: + +* datetime.date +* datetime.datetime +* decimal.Decimal + +If the type is in a package, the module must be also specified: + +* numpy.ndarray +* scipy.sparse.coo_matrix + +If the type is a pandas type, also specify pandas except for Series and +DataFrame: + +* Series +* DataFrame +* pandas.Index +* pandas.Categorical +* pandas.arrays.SparseArray + +If the exact type is not relevant, but must be compatible with a numpy +array, array-like can be specified. If Any type that can be iterated is +accepted, iterable can be used: + +* array-like +* iterable + +If more than one type is accepted, separate them by commas, except the +last two types, that need to be separated by the word 'or': + +* int or float +* float, decimal.Decimal or None +* str or list of str + +If ``None`` is one of the accepted values, it always needs to be the last in +the list. + +For axis, the convention is to use something like: + +* axis : {0 or 'index', 1 or 'columns', None}, default None + +.. _docstring.returns: + +Section 4: Returns or Yields +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If the method returns a value, it will be documented in this section. Also +if the method yields its output. + +The title of the section will be defined in the same way as the "Parameters". +With the names "Returns" or "Yields" followed by a line with as many hyphens +as the letters in the preceding word. + +The documentation of the return is also similar to the parameters. But in this +case, no name will be provided, unless the method returns or yields more than +one value (a tuple of values). + +The types for "Returns" and "Yields" are the same as the ones for the +"Parameters". Also, the description must finish with a dot. + +For example, with a single value: + +.. code-block:: python + + def sample(): + """ + Generate and return a random number. + + The value is sampled from a continuous uniform distribution between + 0 and 1. + + Returns + ------- + float + Random number generated. + """ + return np.random.random() + +With more than one value: + +.. code-block:: python + + import string + + def random_letters(): + """ + Generate and return a sequence of random letters. + + The length of the returned string is also random, and is also + returned. + + Returns + ------- + length : int + Length of the returned string. + letters : str + String of random letters. + """ + length = np.random.randint(1, 10) + letters = ''.join(np.random.choice(string.ascii_lowercase) + for i in range(length)) + return length, letters + +If the method yields its value: + +.. code-block:: python + + def sample_values(): + """ + Generate an infinite sequence of random numbers. + + The values are sampled from a continuous uniform distribution between + 0 and 1. + + Yields + ------ + float + Random number generated. + """ + while True: + yield np.random.random() + +.. _docstring.see_also: + +Section 5: See Also +~~~~~~~~~~~~~~~~~~~ + +This section is used to let users know about pandas functionality +related to the one being documented. In rare cases, if no related methods +or functions can be found at all, this section can be skipped. + +An obvious example would be the `head()` and `tail()` methods. As `tail()` does +the equivalent as `head()` but at the end of the `Series` or `DataFrame` +instead of at the beginning, it is good to let the users know about it. + +To give an intuition on what can be considered related, here there are some +examples: + +* ``loc`` and ``iloc``, as they do the same, but in one case providing indices + and in the other positions +* ``max`` and ``min``, as they do the opposite +* ``iterrows``, ``itertuples`` and ``items``, as it is easy that a user + looking for the method to iterate over columns ends up in the method to + iterate over rows, and vice-versa +* ``fillna`` and ``dropna``, as both methods are used to handle missing values +* ``read_csv`` and ``to_csv``, as they are complementary +* ``merge`` and ``join``, as one is a generalization of the other +* ``astype`` and ``pandas.to_datetime``, as users may be reading the + documentation of ``astype`` to know how to cast as a date, and the way to do + it is with ``pandas.to_datetime`` +* ``where`` is related to ``numpy.where``, as its functionality is based on it + +When deciding what is related, you should mainly use your common sense and +think about what can be useful for the users reading the documentation, +especially the less experienced ones. + +When relating to other libraries (mainly ``numpy``), use the name of the module +first (not an alias like ``np``). If the function is in a module which is not +the main one, like ``scipy.sparse``, list the full module (e.g. +``scipy.sparse.coo_matrix``). + +This section, as the previous, also has a header, "See Also" (note the capital +S and A). Also followed by the line with hyphens, and preceded by a blank line. + +After the header, we will add a line for each related method or function, +followed by a space, a colon, another space, and a short description that +illustrated what this method or function does, why is it relevant in this +context, and what are the key differences between the documented function and +the one referencing. The description must also finish with a dot. + +Note that in "Returns" and "Yields", the description is located in the +following line than the type. But in this section it is located in the same +line, with a colon in between. If the description does not fit in the same +line, it can continue in the next ones, but it has to be indented in them. + +For example: + +.. code-block:: python + + class Series: + def head(self): + """ + Return the first 5 elements of the Series. + + This function is mainly useful to preview the values of the + Series without displaying the whole of it. + + Returns + ------- + Series + Subset of the original series with the 5 first values. + + See Also + -------- + Series.tail : Return the last 5 elements of the Series. + Series.iloc : Return a slice of the elements in the Series, + which can also be used to return the first or last n. + """ + return self.iloc[:5] + +.. _docstring.notes: + +Section 6: Notes +~~~~~~~~~~~~~~~~ + +This is an optional section used for notes about the implementation of the +algorithm. Or to document technical aspects of the function behavior. + +Feel free to skip it, unless you are familiar with the implementation of the +algorithm, or you discover some counter-intuitive behavior while writing the +examples for the function. + +This section follows the same format as the extended summary section. + +.. _docstring.examples: + +Section 7: Examples +~~~~~~~~~~~~~~~~~~~ + +This is one of the most important sections of a docstring, even if it is +placed in the last position. As often, people understand concepts better +with examples, than with accurate explanations. + +Examples in docstrings, besides illustrating the usage of the function or +method, must be valid Python code, that in a deterministic way returns the +presented output, and that can be copied and run by users. + +They are presented as a session in the Python terminal. `>>>` is used to +present code. `...` is used for code continuing from the previous line. +Output is presented immediately after the last line of code generating the +output (no blank lines in between). Comments describing the examples can +be added with blank lines before and after them. + +The way to present examples is as follows: + +1. Import required libraries (except ``numpy`` and ``pandas``) + +2. Create the data required for the example + +3. Show a very basic example that gives an idea of the most common use case + +4. Add examples with explanations that illustrate how the parameters can be + used for extended functionality + +A simple example could be: + +.. code-block:: python + + class Series: + + def head(self, n=5): + """ + Return the first elements of the Series. + + This function is mainly useful to preview the values of the + Series without displaying the whole of it. + + Parameters + ---------- + n : int + Number of values to return. + + Return + ------ + pandas.Series + Subset of the original series with the n first values. + + See Also + -------- + tail : Return the last n elements of the Series. + + Examples + -------- + >>> s = pd.Series(['Ant', 'Bear', 'Cow', 'Dog', 'Falcon', + ... 'Lion', 'Monkey', 'Rabbit', 'Zebra']) + >>> s.head() + 0 Ant + 1 Bear + 2 Cow + 3 Dog + 4 Falcon + dtype: object + + With the `n` parameter, we can change the number of returned rows: + + >>> s.head(n=3) + 0 Ant + 1 Bear + 2 Cow + dtype: object + """ + return self.iloc[:n] + +The examples should be as concise as possible. In cases where the complexity of +the function requires long examples, is recommended to use blocks with headers +in bold. Use double star ``**`` to make a text bold, like in ``**this example**``. + +.. _docstring.example_conventions: + +Conventions for the examples +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Code in examples is assumed to always start with these two lines which are not +shown: + +.. code-block:: python + + import numpy as np + import pandas as pd + +Any other module used in the examples must be explicitly imported, one per line (as +recommended in :pep:`8#imports`) +and avoiding aliases. Avoid excessive imports, but if needed, imports from +the standard library go first, followed by third-party libraries (like +matplotlib). + +When illustrating examples with a single ``Series`` use the name ``s``, and if +illustrating with a single ``DataFrame`` use the name ``df``. For indices, +``idx`` is the preferred name. If a set of homogeneous ``Series`` or +``DataFrame`` is used, name them ``s1``, ``s2``, ``s3``... or ``df1``, +``df2``, ``df3``... If the data is not homogeneous, and more than one structure +is needed, name them with something meaningful, for example ``df_main`` and +``df_to_join``. + +Data used in the example should be as compact as possible. The number of rows +is recommended to be around 4, but make it a number that makes sense for the +specific example. For example in the ``head`` method, it requires to be higher +than 5, to show the example with the default values. If doing the ``mean``, we +could use something like ``[1, 2, 3]``, so it is easy to see that the value +returned is the mean. + +For more complex examples (grouping for example), avoid using data without +interpretation, like a matrix of random numbers with columns A, B, C, D... +And instead use a meaningful example, which makes it easier to understand the +concept. Unless required by the example, use names of animals, to keep examples +consistent. And numerical properties of them. + +When calling the method, keywords arguments ``head(n=3)`` are preferred to +positional arguments ``head(3)``. + +**Good:** + +.. code-block:: python + + class Series: + + def mean(self): + """ + Compute the mean of the input. + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.mean() + 2 + """ + pass + + + def fillna(self, value): + """ + Replace missing values by `value`. + + Examples + -------- + >>> s = pd.Series([1, np.nan, 3]) + >>> s.fillna(0) + [1, 0, 3] + """ + pass + + def groupby_mean(self): + """ + Group by index and return mean. + + Examples + -------- + >>> s = pd.Series([380., 370., 24., 26], + ... name='max_speed', + ... index=['falcon', 'falcon', 'parrot', 'parrot']) + >>> s.groupby_mean() + index + falcon 375.0 + parrot 25.0 + Name: max_speed, dtype: float64 + """ + pass + + def contains(self, pattern, case_sensitive=True, na=numpy.nan): + """ + Return whether each value contains `pattern`. + + In this case, we are illustrating how to use sections, even + if the example is simple enough and does not require them. + + Examples + -------- + >>> s = pd.Series('Antelope', 'Lion', 'Zebra', np.nan) + >>> s.contains(pattern='a') + 0 False + 1 False + 2 True + 3 NaN + dtype: bool + + **Case sensitivity** + + With `case_sensitive` set to `False` we can match `a` with both + `a` and `A`: + + >>> s.contains(pattern='a', case_sensitive=False) + 0 True + 1 False + 2 True + 3 NaN + dtype: bool + + **Missing values** + + We can fill missing values in the output using the `na` parameter: + + >>> s.contains(pattern='a', na=False) + 0 False + 1 False + 2 True + 3 False + dtype: bool + """ + pass + +**Bad:** + +.. code-block:: python + + def method(foo=None, bar=None): + """ + A sample DataFrame method. + + Do not import numpy and pandas. + + Try to use meaningful data, when it makes the example easier + to understand. + + Try to avoid positional arguments like in `df.method(1)`. They + can be all right if previously defined with a meaningful name, + like in `present_value(interest_rate)`, but avoid them otherwise. + + When presenting the behavior with different parameters, do not place + all the calls one next to the other. Instead, add a short sentence + explaining what the example shows. + + Examples + -------- + >>> import numpy as np + >>> import pandas as pd + >>> df = pd.DataFrame(np.random.randn(3, 3), + ... columns=('a', 'b', 'c')) + >>> df.method(1) + 21 + >>> df.method(bar=14) + 123 + """ + pass + + +.. _docstring.doctest_tips: + +Tips for getting your examples pass the doctests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Getting the examples pass the doctests in the validation script can sometimes +be tricky. Here are some attention points: + +* Import all needed libraries (except for pandas and numpy, those are already + imported as ``import pandas as pd`` and ``import numpy as np``) and define + all variables you use in the example. + +* Try to avoid using random data. However random data might be OK in some + cases, like if the function you are documenting deals with probability + distributions, or if the amount of data needed to make the function result + meaningful is too much, such that creating it manually is very cumbersome. + In those cases, always use a fixed random seed to make the generated examples + predictable. Example:: + + >>> np.random.seed(42) + >>> df = pd.DataFrame({'normal': np.random.normal(100, 5, 20)}) + +* If you have a code snippet that wraps multiple lines, you need to use '...' + on the continued lines: :: + + >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], index=['a', 'b', 'c'], + ... columns=['A', 'B']) + +* If you want to show a case where an exception is raised, you can do:: + + >>> pd.to_datetime(["712-01-01"]) + Traceback (most recent call last): + OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 712-01-01 00:00:00 + + It is essential to include the "Traceback (most recent call last):", but for + the actual error only the error name is sufficient. + +* If there is a small part of the result that can vary (e.g. a hash in an object + representation), you can use ``...`` to represent this part. + + If you want to show that ``s.plot()`` returns a matplotlib AxesSubplot object, + this will fail the doctest :: + + >>> s.plot() + + + However, you can do (notice the comment that needs to be added) :: + + >>> s.plot() # doctest: +ELLIPSIS + + + +.. _docstring.example_plots: + +Plots in examples +^^^^^^^^^^^^^^^^^ + +There are some methods in pandas returning plots. To render the plots generated +by the examples in the documentation, the ``.. plot::`` directive exists. + +To use it, place the next code after the "Examples" header as shown below. The +plot will be generated automatically when building the documentation. + +.. code-block:: python + + class Series: + def plot(self): + """ + Generate a plot with the `Series` data. + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 2, 3]) + >>> s.plot() + """ + pass + +.. _docstring.sharing: + +Sharing docstrings +------------------ + +Pandas has a system for sharing docstrings, with slight variations, between +classes. This helps us keep docstrings consistent, while keeping things clear +for the user reading. It comes at the cost of some complexity when writing. + +Each shared docstring will have a base template with variables, like +``%(klass)s``. The variables filled in later on using the ``Substitution`` +decorator. Finally, docstrings can be appended to with the ``Appender`` +decorator. + +In this example, we'll create a parent docstring normally (this is like +``pandas.core.generic.NDFrame``. Then we'll have two children (like +``pandas.core.series.Series`` and ``pandas.core.frame.DataFrame``). We'll +substitute the children's class names in this docstring. + +.. code-block:: python + + class Parent: + def my_function(self): + """Apply my function to %(klass)s.""" + ... + + + class ChildA(Parent): + @Substitution(klass="ChildA") + @Appender(Parent.my_function.__doc__) + def my_function(self): + ... + + + class ChildB(Parent): + @Substitution(klass="ChildB") + @Appender(Parent.my_function.__doc__) + def my_function(self): + ... + +The resulting docstrings are + +.. code-block:: python + + >>> print(Parent.my_function.__doc__) + Apply my function to %(klass)s. + >>> print(ChildA.my_function.__doc__) + Apply my function to ChildA. + >>> print(ChildB.my_function.__doc__) + Apply my function to ChildB. + +Notice two things: + +1. We "append" the parent docstring to the children docstrings, which are + initially empty. +2. Python decorators are applied inside out. So the order is Append then + Substitution, even though Substitution comes first in the file. + +Our files will often contain a module-level ``_shared_doc_kwargs`` with some +common substitution values (things like ``klass``, ``axes``, etc). + +You can substitute and append in one shot with something like + +.. code-block:: python + + @Appender(template % _shared_doc_kwargs) + def my_function(self): + ... + +where ``template`` may come from a module-level ``_shared_docs`` dictionary +mapping function names to docstrings. Wherever possible, we prefer using +``Appender`` and ``Substitution``, since the docstring-writing processes is +slightly closer to normal. + +See ``pandas.core.generic.NDFrame.fillna`` for an example template, and +``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna`` +for the filled versions. diff --git a/doc/source/development/developer.rst b/doc/source/development/developer.rst new file mode 100644 index 00000000..33646e5d --- /dev/null +++ b/doc/source/development/developer.rst @@ -0,0 +1,185 @@ +.. _developer: + +{{ header }} + +.. currentmodule:: pandas + +********* +Developer +********* + +This section will focus on downstream applications of pandas. + +.. _apache.parquet: + +Storing pandas DataFrame objects in Apache Parquet format +--------------------------------------------------------- + +The `Apache Parquet `__ format +provides key-value metadata at the file and column level, stored in the footer +of the Parquet file: + +.. code-block:: shell + + 5: optional list key_value_metadata + +where ``KeyValue`` is + +.. code-block:: shell + + struct KeyValue { + 1: required string key + 2: optional string value + } + +So that a ``pandas.DataFrame`` can be faithfully reconstructed, we store a +``pandas`` metadata key in the ``FileMetaData`` with the value stored as : + +.. code-block:: text + + {'index_columns': [, , ...], + 'column_indexes': [, , ..., ], + 'columns': [, , ...], + 'pandas_version': $VERSION, + 'creator': { + 'library': $LIBRARY, + 'version': $LIBRARY_VERSION + }} + +The "descriptor" values ```` in the ``'index_columns'`` field are +strings (referring to a column) or dictionaries with values as described below. + +The ````/```` and so forth are dictionaries containing the metadata +for each column, *including the index columns*. This has JSON form: + +.. code-block:: text + + {'name': column_name, + 'field_name': parquet_column_name, + 'pandas_type': pandas_type, + 'numpy_type': numpy_type, + 'metadata': metadata} + +See below for the detailed specification for these. + +Index Metadata Descriptors +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``RangeIndex`` can be stored as metadata only, not requiring serialization. The +descriptor format for these as is follows: + +.. code-block:: python + + index = pd.RangeIndex(0, 10, 2) + {'kind': 'range', + 'name': index.name, + 'start': index.start, + 'stop': index.stop, + 'step': index.step} + +Other index types must be serialized as data columns along with the other +DataFrame columns. The metadata for these is a string indicating the name of +the field in the data columns, for example ``'__index_level_0__'``. + +If an index has a non-None ``name`` attribute, and there is no other column +with a name matching that value, then the ``index.name`` value can be used as +the descriptor. Otherwise (for unnamed indexes and ones with names colliding +with other column names) a disambiguating name with pattern matching +``__index_level_\d+__`` should be used. In cases of named indexes as data +columns, ``name`` attribute is always stored in the column descriptors as +above. + +Column Metadata +~~~~~~~~~~~~~~~ + +``pandas_type`` is the logical type of the column, and is one of: + +* Boolean: ``'bool'`` +* Integers: ``'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'`` +* Floats: ``'float16', 'float32', 'float64'`` +* Date and Time Types: ``'datetime', 'datetimetz'``, ``'timedelta'`` +* String: ``'unicode', 'bytes'`` +* Categorical: ``'categorical'`` +* Other Python objects: ``'object'`` + +The ``numpy_type`` is the physical storage type of the column, which is the +result of ``str(dtype)`` for the underlying NumPy array that holds the data. So +for ``datetimetz`` this is ``datetime64[ns]`` and for categorical, it may be +any of the supported integer categorical types. + +The ``metadata`` field is ``None`` except for: + +* ``datetimetz``: ``{'timezone': zone, 'unit': 'ns'}``, e.g. ``{'timezone', + 'America/New_York', 'unit': 'ns'}``. The ``'unit'`` is optional, and if + omitted it is assumed to be nanoseconds. +* ``categorical``: ``{'num_categories': K, 'ordered': is_ordered, 'type': $TYPE}`` + + * Here ``'type'`` is optional, and can be a nested pandas type specification + here (but not categorical) + +* ``unicode``: ``{'encoding': encoding}`` + + * The encoding is optional, and if not present is UTF-8 + +* ``object``: ``{'encoding': encoding}``. Objects can be serialized and stored + in ``BYTE_ARRAY`` Parquet columns. The encoding can be one of: + + * ``'pickle'`` + * ``'bson'`` + * ``'json'`` + +* ``timedelta``: ``{'unit': 'ns'}``. The ``'unit'`` is optional, and if omitted + it is assumed to be nanoseconds. This metadata is optional altogether + +For types other than these, the ``'metadata'`` key can be +omitted. Implementations can assume ``None`` if the key is not present. + +As an example of fully-formed metadata: + +.. code-block:: text + + {'index_columns': ['__index_level_0__'], + 'column_indexes': [ + {'name': None, + 'field_name': 'None', + 'pandas_type': 'unicode', + 'numpy_type': 'object', + 'metadata': {'encoding': 'UTF-8'}} + ], + 'columns': [ + {'name': 'c0', + 'field_name': 'c0', + 'pandas_type': 'int8', + 'numpy_type': 'int8', + 'metadata': None}, + {'name': 'c1', + 'field_name': 'c1', + 'pandas_type': 'bytes', + 'numpy_type': 'object', + 'metadata': None}, + {'name': 'c2', + 'field_name': 'c2', + 'pandas_type': 'categorical', + 'numpy_type': 'int16', + 'metadata': {'num_categories': 1000, 'ordered': False}}, + {'name': 'c3', + 'field_name': 'c3', + 'pandas_type': 'datetimetz', + 'numpy_type': 'datetime64[ns]', + 'metadata': {'timezone': 'America/Los_Angeles'}}, + {'name': 'c4', + 'field_name': 'c4', + 'pandas_type': 'object', + 'numpy_type': 'object', + 'metadata': {'encoding': 'pickle'}}, + {'name': None, + 'field_name': '__index_level_0__', + 'pandas_type': 'int64', + 'numpy_type': 'int64', + 'metadata': None} + ], + 'pandas_version': '0.20.0', + 'creator': { + 'library': 'pyarrow', + 'version': '0.13.0' + }} diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst new file mode 100644 index 00000000..89d43e8a --- /dev/null +++ b/doc/source/development/extending.rst @@ -0,0 +1,504 @@ +.. _extending: + +{{ header }} + +**************** +Extending pandas +**************** + +While pandas provides a rich set of methods, containers, and data types, your +needs may not be fully satisfied. Pandas offers a few options for extending +pandas. + +.. _extending.register-accessors: + +Registering custom accessors +---------------------------- + +Libraries can use the decorators +:func:`pandas.api.extensions.register_dataframe_accessor`, +:func:`pandas.api.extensions.register_series_accessor`, and +:func:`pandas.api.extensions.register_index_accessor`, to add additional +"namespaces" to pandas objects. All of these follow a similar convention: you +decorate a class, providing the name of attribute to add. The class's +``__init__`` method gets the object being decorated. For example: + +.. code-block:: python + + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor: + def __init__(self, pandas_obj): + self._validate(pandas_obj) + self._obj = pandas_obj + + @staticmethod + def _validate(obj): + # verify there is a column latitude and a column longitude + if 'latitude' not in obj.columns or 'longitude' not in obj.columns: + raise AttributeError("Must have 'latitude' and 'longitude'.") + + @property + def center(self): + # return the geographic center point of this DataFrame + lat = self._obj.latitude + lon = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Now users can access your methods using the ``geo`` namespace: + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map + +This can be a convenient way to extend pandas objects without subclassing them. +If you write a custom accessor, make a pull request adding it to our +:ref:`ecosystem` page. + +We highly recommend validating the data in your accessor's `__init__`. +In our ``GeoAccessor``, we validate that the data contains the expected columns, +raising an ``AttributeError`` when the validation fails. +For a ``Series`` accessor, you should validate the ``dtype`` if the accessor +applies only to certain dtypes. + + +.. _extending.extension-types: + +Extension types +--------------- + +.. versionadded:: 0.23.0 + +.. warning:: + + The :class:`pandas.api.extensions.ExtensionDtype` and :class:`pandas.api.extensions.ExtensionArray` APIs are new and + experimental. They may change between versions without warning. + +Pandas defines an interface for implementing data types and arrays that *extend* +NumPy's type system. Pandas itself uses the extension system for some types +that aren't built into NumPy (categorical, period, interval, datetime with +timezone). + +Libraries can define a custom array and data type. When pandas encounters these +objects, they will be handled properly (i.e. not converted to an ndarray of +objects). Many methods like :func:`pandas.isna` will dispatch to the extension +type's implementation. + +If you're building a library that implements the interface, please publicize it +on :ref:`ecosystem.extensions`. + +The interface consists of two classes. + +:class:`~pandas.api.extensions.ExtensionDtype` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A :class:`pandas.api.extensions.ExtensionDtype` is similar to a ``numpy.dtype`` object. It describes the +data type. Implementors are responsible for a few unique items like the name. + +One particularly important item is the ``type`` property. This should be the +class that is the scalar type for your data. For example, if you were writing an +extension array for IP Address data, this might be ``ipaddress.IPv4Address``. + +See the `extension dtype source`_ for interface definition. + +.. versionadded:: 0.24.0 + +:class:`pandas.api.extension.ExtensionDtype` can be registered to pandas to allow creation via a string dtype name. +This allows one to instantiate ``Series`` and ``.astype()`` with a registered string name, for +example ``'category'`` is a registered string accessor for the ``CategoricalDtype``. + +See the `extension dtype dtypes`_ for more on how to register dtypes. + +:class:`~pandas.api.extensions.ExtensionArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This class provides all the array-like functionality. ExtensionArrays are +limited to 1 dimension. An ExtensionArray is linked to an ExtensionDtype via the +``dtype`` attribute. + +Pandas makes no restrictions on how an extension array is created via its +``__new__`` or ``__init__``, and puts no restrictions on how you store your +data. We do require that your array be convertible to a NumPy array, even if +this is relatively expensive (as it is for ``Categorical``). + +They may be backed by none, one, or many NumPy arrays. For example, +``pandas.Categorical`` is an extension array backed by two arrays, +one for codes and one for categories. An array of IPv6 addresses may +be backed by a NumPy structured array with two fields, one for the +lower 64 bits and one for the upper 64 bits. Or they may be backed +by some other storage type, like Python lists. + +See the `extension array source`_ for the interface definition. The docstrings +and comments contain guidance for properly implementing the interface. + +.. _extending.extension.operator: + +:class:`~pandas.api.extensions.ExtensionArray` Operator Support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionadded:: 0.24.0 + +By default, there are no operators defined for the class :class:`~pandas.api.extensions.ExtensionArray`. +There are two approaches for providing operator support for your ExtensionArray: + +1. Define each of the operators on your ``ExtensionArray`` subclass. +2. Use an operator implementation from pandas that depends on operators that are already defined + on the underlying elements (scalars) of the ExtensionArray. + +.. note:: + + Regardless of the approach, you may want to set ``__array_priority__`` + if you want your implementation to be called when involved in binary operations + with NumPy arrays. + +For the first approach, you define selected operators, e.g., ``__add__``, ``__le__``, etc. that +you want your ``ExtensionArray`` subclass to support. + +The second approach assumes that the underlying elements (i.e., scalar type) of the ``ExtensionArray`` +have the individual operators already defined. In other words, if your ``ExtensionArray`` +named ``MyExtensionArray`` is implemented so that each element is an instance +of the class ``MyExtensionElement``, then if the operators are defined +for ``MyExtensionElement``, the second approach will automatically +define the operators for ``MyExtensionArray``. + +A mixin class, :class:`~pandas.api.extensions.ExtensionScalarOpsMixin` supports this second +approach. If developing an ``ExtensionArray`` subclass, for example ``MyExtensionArray``, +can simply include ``ExtensionScalarOpsMixin`` as a parent class of ``MyExtensionArray``, +and then call the methods :meth:`~MyExtensionArray._add_arithmetic_ops` and/or +:meth:`~MyExtensionArray._add_comparison_ops` to hook the operators into +your ``MyExtensionArray`` class, as follows: + +.. code-block:: python + + from pandas.api.extensions import ExtensionArray, ExtensionScalarOpsMixin + + class MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin): + pass + + + MyExtensionArray._add_arithmetic_ops() + MyExtensionArray._add_comparison_ops() + + +.. note:: + + Since ``pandas`` automatically calls the underlying operator on each + element one-by-one, this might not be as performant as implementing your own + version of the associated operators directly on the ``ExtensionArray``. + +For arithmetic operations, this implementation will try to reconstruct a new +``ExtensionArray`` with the result of the element-wise operation. Whether +or not that succeeds depends on whether the operation returns a result +that's valid for the ``ExtensionArray``. If an ``ExtensionArray`` cannot +be reconstructed, an ndarray containing the scalars returned instead. + +For ease of implementation and consistency with operations between pandas +and NumPy ndarrays, we recommend *not* handling Series and Indexes in your binary ops. +Instead, you should detect these cases and return ``NotImplemented``. +When pandas encounters an operation like ``op(Series, ExtensionArray)``, pandas +will + +1. unbox the array from the ``Series`` (``Series.array``) +2. call ``result = op(values, ExtensionArray)`` +3. re-box the result in a ``Series`` + +.. _extending.extension.ufunc: + +NumPy Universal Functions +^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Series` implements ``__array_ufunc__``. As part of the implementation, +pandas unboxes the ``ExtensionArray`` from the :class:`Series`, applies the ufunc, +and re-boxes it if necessary. + +If applicable, we highly recommend that you implement ``__array_ufunc__`` in your +extension array to avoid coercion to an ndarray. See +`the numpy documentation `__ +for an example. + +As part of your implementation, we require that you defer to pandas when a pandas +container (:class:`Series`, :class:`DataFrame`, :class:`Index`) is detected in ``inputs``. +If any of those is present, you should return ``NotImplemented``. Pandas will take care of +unboxing the array from the container and re-calling the ufunc with the unwrapped input. + +.. _extending.extension.testing: + +Testing extension arrays +^^^^^^^^^^^^^^^^^^^^^^^^ + +We provide a test suite for ensuring that your extension arrays satisfy the expected +behavior. To use the test suite, you must provide several pytest fixtures and inherit +from the base test class. The required fixtures are found in +https://github.com/pandas-dev/pandas/blob/master/pandas/tests/extension/conftest.py. + +To use a test, subclass it: + +.. code-block:: python + + from pandas.tests.extension import base + + + class TestConstructors(base.BaseConstructorsTests): + pass + + +See https://github.com/pandas-dev/pandas/blob/master/pandas/tests/extension/base/__init__.py +for a list of all the tests available. + +.. _extending.extension.arrow: + +Compatibility with Apache Arrow +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An ``ExtensionArray`` can support conversion to / from ``pyarrow`` arrays +(and thus support for example serialization to the Parquet file format) +by implementing two methods: ``ExtensionArray.__arrow_array__`` and +``ExtensionDtype.__from_arrow__``. + +The ``ExtensionArray.__arrow_array__`` ensures that ``pyarrow`` knowns how +to convert the specific extension array into a ``pyarrow.Array`` (also when +included as a column in a pandas DataFrame): + +.. code-block:: python + + class MyExtensionArray(ExtensionArray): + ... + + def __arrow_array__(self, type=None): + # convert the underlying array values to a pyarrow Array + import pyarrow + return pyarrow.array(..., type=type) + +The ``ExtensionDtype.__from_arrow__`` method then controls the conversion +back from pyarrow to a pandas ExtensionArray. This method receives a pyarrow +``Array`` or ``ChunkedArray`` as only argument and is expected to return the +appropriate pandas ``ExtensionArray`` for this dtype and the passed values: + +.. code-block:: none + + class ExtensionDtype: + ... + + def __from_arrow__(self, array: pyarrow.Array/ChunkedArray) -> ExtensionArray: + ... + +See more in the `Arrow documentation `__. + +Those methods have been implemented for the nullable integer and string extension +dtypes included in pandas, and ensure roundtrip to pyarrow and the Parquet file format. + +.. _extension dtype dtypes: https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/dtypes.py +.. _extension dtype source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/dtypes/base.py +.. _extension array source: https://github.com/pandas-dev/pandas/blob/master/pandas/core/arrays/base.py + +.. _extending.subclassing-pandas: + +Subclassing pandas data structures +---------------------------------- + +.. warning:: There are some easier alternatives before considering subclassing ``pandas`` data structures. + + 1. Extensible method chains with :ref:`pipe ` + + 2. Use *composition*. See `here `_. + + 3. Extending by :ref:`registering an accessor ` + + 4. Extending by :ref:`extension type ` + +This section describes how to subclass ``pandas`` data structures to meet more specific needs. There are two points that need attention: + +1. Override constructor properties. +2. Define original properties + +.. note:: + + You can find a nice example in `geopandas `_ project. + +Override constructor properties +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Each data structure has several *constructor properties* for returning a new +data structure as the result of an operation. By overriding these properties, +you can retain subclasses through ``pandas`` data manipulations. + +There are 3 constructor properties to be defined: + +* ``_constructor``: Used when a manipulation result has the same dimensions as the original. +* ``_constructor_sliced``: Used when a manipulation result has one lower dimension(s) as the original, such as ``DataFrame`` single columns slicing. +* ``_constructor_expanddim``: Used when a manipulation result has one higher dimension as the original, such as ``Series.to_frame()``. + +Following table shows how ``pandas`` data structures define constructor properties by default. + +=========================== ======================= ============= +Property Attributes ``Series`` ``DataFrame`` +=========================== ======================= ============= +``_constructor`` ``Series`` ``DataFrame`` +``_constructor_sliced`` ``NotImplementedError`` ``Series`` +``_constructor_expanddim`` ``DataFrame`` ``NotImplementedError`` +=========================== ======================= ============= + +Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame`` overriding constructor properties. + +.. code-block:: python + + class SubclassedSeries(pd.Series): + + @property + def _constructor(self): + return SubclassedSeries + + @property + def _constructor_expanddim(self): + return SubclassedDataFrame + + + class SubclassedDataFrame(pd.DataFrame): + + @property + def _constructor(self): + return SubclassedDataFrame + + @property + def _constructor_sliced(self): + return SubclassedSeries + +.. code-block:: python + + >>> s = SubclassedSeries([1, 2, 3]) + >>> type(s) + + + >>> to_framed = s.to_frame() + >>> type(to_framed) + + + >>> df = SubclassedDataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> type(df) + + + >>> sliced1 = df[['A', 'B']] + >>> sliced1 + A B + 0 1 4 + 1 2 5 + 2 3 6 + + >>> type(sliced1) + + + >>> sliced2 = df['A'] + >>> sliced2 + 0 1 + 1 2 + 2 3 + Name: A, dtype: int64 + + >>> type(sliced2) + + +Define original properties +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To let original data structures have additional properties, you should let ``pandas`` know what properties are added. ``pandas`` maps unknown properties to data names overriding ``__getattribute__``. Defining original properties can be done in one of 2 ways: + +1. Define ``_internal_names`` and ``_internal_names_set`` for temporary properties which WILL NOT be passed to manipulation results. +2. Define ``_metadata`` for normal properties which will be passed to manipulation results. + +Below is an example to define two original properties, "internal_cache" as a temporary property and "added_property" as a normal property + +.. code-block:: python + + class SubclassedDataFrame2(pd.DataFrame): + + # temporary properties + _internal_names = pd.DataFrame._internal_names + ['internal_cache'] + _internal_names_set = set(_internal_names) + + # normal properties + _metadata = ['added_property'] + + @property + def _constructor(self): + return SubclassedDataFrame2 + +.. code-block:: python + + >>> df = SubclassedDataFrame2({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}) + >>> df + A B C + 0 1 4 7 + 1 2 5 8 + 2 3 6 9 + + >>> df.internal_cache = 'cached' + >>> df.added_property = 'property' + + >>> df.internal_cache + cached + >>> df.added_property + property + + # properties defined in _internal_names is reset after manipulation + >>> df[['A', 'B']].internal_cache + AttributeError: 'SubclassedDataFrame2' object has no attribute 'internal_cache' + + # properties defined in _metadata are retained + >>> df[['A', 'B']].added_property + property + +.. _extending.plotting-backends: + +Plotting backends +----------------- + +Starting in 0.25 pandas can be extended with third-party plotting backends. The +main idea is letting users select a plotting backend different than the provided +one based on Matplotlib. For example: + +.. code-block:: python + + >>> pd.set_option('plotting.backend', 'backend.module') + >>> pd.Series([1, 2, 3]).plot() + +This would be more or less equivalent to: + +.. code-block:: python + + >>> import backend.module + >>> backend.module.plot(pd.Series([1, 2, 3])) + +The backend module can then use other visualization tools (Bokeh, Altair,...) +to generate the plots. + +Libraries implementing the plotting backend should use `entry points `__ +to make their backend discoverable to pandas. The key is ``"pandas_plotting_backends"``. For example, pandas +registers the default "matplotlib" backend as follows. + +.. code-block:: python + + # in setup.py + setup( # noqa: F821 + ..., + entry_points={ + "pandas_plotting_backends": [ + "matplotlib = pandas:plotting._matplotlib", + ], + }, + ) + + +More information on how to implement a third-party plotting backend can be found at +https://github.com/pandas-dev/pandas/blob/master/pandas/plotting/__init__.py#L1. diff --git a/doc/source/development/index.rst b/doc/source/development/index.rst new file mode 100644 index 00000000..f8a6bb6d --- /dev/null +++ b/doc/source/development/index.rst @@ -0,0 +1,23 @@ +{{ header }} + +.. _development: + +=========== +Development +=========== + +.. If you update this toctree, also update the manual toctree in the + main index.rst.template + +.. toctree:: + :maxdepth: 2 + + contributing + code_style + maintaining + internals + extending + developer + policies + roadmap + meeting diff --git a/doc/source/development/internals.rst b/doc/source/development/internals.rst new file mode 100644 index 00000000..748caae2 --- /dev/null +++ b/doc/source/development/internals.rst @@ -0,0 +1,108 @@ +.. _internals: + +{{ header }} + +********* +Internals +********* + +This section will provide a look into some of pandas internals. It's primarily +intended for developers of pandas itself. + +Indexing +-------- + +In pandas there are a few objects implemented which can serve as valid +containers for the axis labels: + +* ``Index``: the generic "ordered set" object, an ndarray of object dtype + assuming nothing about its contents. The labels must be hashable (and + likely immutable) and unique. Populates a dict of label to location in + Cython to do ``O(1)`` lookups. +* ``Int64Index``: a version of ``Index`` highly optimized for 64-bit integer + data, such as time stamps +* ``Float64Index``: a version of ``Index`` highly optimized for 64-bit float data +* ``MultiIndex``: the standard hierarchical index object +* ``DatetimeIndex``: An Index object with ``Timestamp`` boxed elements (impl are the int64 values) +* ``TimedeltaIndex``: An Index object with ``Timedelta`` boxed elements (impl are the in64 values) +* ``PeriodIndex``: An Index object with Period elements + +There are functions that make the creation of a regular index easy: + +* ``date_range``: fixed frequency date range generated from a time rule or + DateOffset. An ndarray of Python datetime objects +* ``period_range``: fixed frequency date range generated from a time rule or + DateOffset. An ndarray of ``Period`` objects, representing timespans + +The motivation for having an ``Index`` class in the first place was to enable +different implementations of indexing. This means that it's possible for you, +the user, to implement a custom ``Index`` subclass that may be better suited to +a particular application than the ones provided in pandas. + +From an internal implementation point of view, the relevant methods that an +``Index`` must define are one or more of the following (depending on how +incompatible the new object internals are with the ``Index`` functions): + +* ``get_loc``: returns an "indexer" (an integer, or in some cases a + slice object) for a label +* ``slice_locs``: returns the "range" to slice between two labels +* ``get_indexer``: Computes the indexing vector for reindexing / data + alignment purposes. See the source / docstrings for more on this +* ``get_indexer_non_unique``: Computes the indexing vector for reindexing / data + alignment purposes when the index is non-unique. See the source / docstrings + for more on this +* ``reindex``: Does any pre-conversion of the input index then calls + ``get_indexer`` +* ``union``, ``intersection``: computes the union or intersection of two + Index objects +* ``insert``: Inserts a new label into an Index, yielding a new object +* ``delete``: Delete a label, yielding a new object +* ``drop``: Deletes a set of labels +* ``take``: Analogous to ndarray.take + +MultiIndex +~~~~~~~~~~ + +Internally, the ``MultiIndex`` consists of a few things: the **levels**, the +integer **codes** (until version 0.24 named *labels*), and the level **names**: + +.. ipython:: python + + index = pd.MultiIndex.from_product([range(3), ['one', 'two']], + names=['first', 'second']) + index + index.levels + index.codes + index.names + +You can probably guess that the codes determine which unique element is +identified with that location at each layer of the index. It's important to +note that sortedness is determined **solely** from the integer codes and does +not check (or care) whether the levels themselves are sorted. Fortunately, the +constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but +if you compute the levels and codes yourself, please be careful. + +Values +~~~~~~ + +Pandas extends NumPy's type system with custom types, like ``Categorical`` or +datetimes with a timezone, so we have multiple notions of "values". For 1-D +containers (``Index`` classes and ``Series``) we have the following convention: + +* ``cls._ndarray_values`` is *always* a NumPy ``ndarray``. Ideally, + ``_ndarray_values`` is cheap to compute. For example, for a ``Categorical``, + this returns the codes, not the array of objects. +* ``cls._values`` refers is the "best possible" array. This could be an + ``ndarray``, ``ExtensionArray``, or in ``Index`` subclass (note: we're in the + process of removing the index subclasses here so that it's always an + ``ndarray`` or ``ExtensionArray``). + +So, for example, ``Series[category]._values`` is a ``Categorical``, while +``Series[category]._ndarray_values`` is the underlying codes. + +.. _ref-subclassing-pandas: + +Subclassing pandas data structures +---------------------------------- + +This section has been moved to :ref:`extending.subclassing-pandas`. diff --git a/doc/source/development/maintaining.rst b/doc/source/development/maintaining.rst new file mode 100644 index 00000000..0d1088cc --- /dev/null +++ b/doc/source/development/maintaining.rst @@ -0,0 +1,193 @@ +.. _maintaining: + +****************** +Pandas Maintenance +****************** + +This guide is for pandas' maintainers. It may also be interesting to contributors +looking to understand the pandas development process and what steps are necessary +to become a maintainer. + +The main contributing guide is available at :ref:`contributing`. + +Roles +----- + +Pandas uses two levels of permissions: **triage** and **core** team members. + +Triage members can label and close issues and pull requests. + +Core team members can label and close issues and pull request, and can merge +pull requests. + +GitHub publishes the full `list of permissions`_. + +Tasks +----- + +Pandas is largely a volunteer project, so these tasks shouldn't be read as +"expectations" of triage and maintainers. Rather, they're general descriptions +of what it means to be a maintainer. + +* Triage newly filed issues (see :ref:`maintaining.triage`) +* Review newly opened pull requests +* Respond to updates on existing issues and pull requests +* Drive discussion and decisions on stalled issues and pull requests +* Provide experience / wisdom on API design questions to ensure consistency and maintainability +* Project organization (run / attend developer meetings, represent pandas) + +http://matthewrocklin.com/blog/2019/05/18/maintainer may be interesting background +reading. + +.. _maintaining.triage: + +Issue Triage +------------ + + +Here's a typical workflow for triaging a newly opened issue. + +1. **Thank the reporter for opening an issue** + + The issue tracker is many people's first interaction with the pandas project itself, + beyond just using the library. As such, we want it to be a welcoming, pleasant + experience. + +2. **Is the necessary information provided?** + + Ideally reporters would fill out the issue template, but many don't. + If crucial information (like the version of pandas they used), is missing + feel free to ask for that and label the issue with "Needs info". The + report should follow the guidelines in :ref:`contributing.bug_reports`. + You may want to link to that if they didn't follow the template. + + Make sure that the title accurately reflects the issue. Edit it yourself + if it's not clear. + +3. **Is this a duplicate issue?** + + We have many open issues. If a new issue is clearly a duplicate, label the + new issue as "Duplicate" assign the milestone "No Action", and close the issue + with a link to the original issue. Make sure to still thank the reporter, and + encourage them to chime in on the original issue, and perhaps try to fix it. + + If the new issue provides relevant information, such as a better or slightly + different example, add it to the original issue as a comment or an edit to + the original post. + +4. **Is the issue minimal and reproducible**? + + For bug reports, we ask that the reporter provide a minimal reproducible + example. See http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports + for a good explanation. If the example is not reproducible, or if it's + *clearly* not minimal, feel free to ask the reporter if they can provide + and example or simplify the provided one. Do acknowledge that writing + minimal reproducible examples is hard work. If the reporter is struggling, + you can try to write one yourself and we'll edit the original post to include it. + + If a reproducible example can't be provided, add the "Needs info" label. + + If a reproducible example is provided, but you see a simplification, + edit the original post with your simpler reproducible example. + +5. **Is this a clearly defined feature request?** + + Generally, pandas prefers to discuss and design new features in issues, before + a pull request is made. Encourage the submitter to include a proposed API + for the new feature. Having them write a full docstring is a good way to + pin down specifics. + + We'll need a discussion from several pandas maintainers before deciding whether + the proposal is in scope for pandas. + +6. **Is this a usage question?** + + We prefer that usage questions are asked on StackOverflow with the pandas + tag. https://stackoverflow.com/questions/tagged/pandas + + If it's easy to answer, feel free to link to the relevant documentation section, + let them know that in the future this kind of question should be on + StackOverflow, and close the issue. + +7. **What labels and milestones should I add?** + + Apply the relevant labels. This is a bit of an art, and comes with experience. + Look at similar issues to get a feel for how things are labeled. + + If the issue is clearly defined and the fix seems relatively straightforward, + label the issue as "Good first issue". + + Typically, new issues will be assigned the "Contributions welcome" milestone, + unless it's know that this issue should be addressed in a specific release (say + because it's a large regression). + +.. _maintaining.closing: + +Closing Issues +-------------- + +Be delicate here: many people interpret closing an issue as us saying that the +conversation is over. It's typically best to give the reporter some time to +respond or self-close their issue if it's determined that the behavior is not a bug, +or the feature is out of scope. Sometimes reporters just go away though, and +we'll close the issue after the conversation has died. + +Reviewing Pull Requests +----------------------- + +Anybody can review a pull request: regular contributors, triagers, or core-team +members. Here are some guidelines to check. + +* Tests should be in a sensible location. +* New public APIs should be included somewhere in ``doc/source/reference/``. +* New / changed API should use the ``versionadded`` or ``versionchanged`` directives in the docstring. +* User-facing changes should have a whatsnew in the appropriate file. +* Regression tests should reference the original GitHub issue number like ``# GH-1234``. + +Cleaning up old Issues +---------------------- + +Every open issue in pandas has a cost. Open issues make finding duplicates harder, +and can make it harder to know what needs to be done in pandas. That said, closing +issues isn't a goal on its own. Our goal is to make pandas the best it can be, +and that's best done by ensuring that the quality of our open issues is high. + +Occasionally, bugs are fixed but the issue isn't linked to in the Pull Request. +In these cases, comment that "This has been fixed, but could use a test." and +label the issue as "Good First Issue" and "Needs Test". + +If an older issue doesn't follow our issue template, edit the original post to +include a minimal example, the actual output, and the expected output. Uniformity +in issue reports is valuable. + +If an older issue lacks a reproducible example, label it as "Needs Info" and +ask them to provide one (or write one yourself if possible). If one isn't +provide reasonably soon, close it according to the policies in :ref:`maintaining.closing`. + +Cleaning up old Pull Requests +----------------------------- + +Occasionally, contributors are unable to finish off a pull request. +If some time has passed (two weeks, say) since the last review requesting changes, +gently ask if they're still interested in working on this. If another two weeks or +so passes with no response, thank them for their work and close the pull request. +Comment on the original issue that "There's a stalled PR at #1234 that may be +helpful.", and perhaps label the issue as "Good first issue" if the PR was relatively +close to being accepted. + +Additionally, core-team members can push to contributors branches. This can be +helpful for pushing an important PR across the line, or for fixing a small +merge conflict. + +Becoming a pandas maintainer +---------------------------- + +The full process is outlined in our `governance documents`_. In summary, +we're happy to give triage permissions to anyone who shows interest by +being helpful on the issue tracker. + +The current list of core-team members is at +https://github.com/pandas-dev/pandas-governance/blob/master/people.md + +.. _governance documents: https://github.com/pandas-dev/pandas-governance +.. _list of permissions: https://help.github.com/en/github/setting-up-and-managing-organizations-and-teams/repository-permission-levels-for-an-organization \ No newline at end of file diff --git a/doc/source/development/meeting.rst b/doc/source/development/meeting.rst new file mode 100644 index 00000000..1d194086 --- /dev/null +++ b/doc/source/development/meeting.rst @@ -0,0 +1,32 @@ +.. _meeting: + +================== +Developer Meetings +================== + +We hold regular developer meetings on the second Wednesday +of each month at 18:00 UTC. These meetings and their minutes are open to +the public. All are welcome to join. + +Minutes +------- + +The minutes of past meetings are available in `this Google Document `__. + +Calendar +-------- + +This calendar shows all the developer meetings. + +.. raw:: html + + + +You can subscribe to this calendar with the following links: + +* `iCal `__ +* `Google calendar `__ + +Additionally, we'll sometimes have one-off meetings on specific topics. +These will be published on the same calendar. + diff --git a/doc/source/development/policies.rst b/doc/source/development/policies.rst new file mode 100644 index 00000000..22494873 --- /dev/null +++ b/doc/source/development/policies.rst @@ -0,0 +1,57 @@ +.. _develop.policies: + +******** +Policies +******** + +.. _policies.version: + +Version Policy +~~~~~~~~~~~~~~ + +.. versionchanged:: 1.0.0 + +Pandas uses a loose variant of semantic versioning (`SemVer`_) to govern +deprecations, API compatibility, and version numbering. + +A pandas release number is made up of ``MAJOR.MINOR.PATCH``. + +API breaking changes should only occur in **major** releases. Theses changes +will be documented, with clear guidance on what is changing, why it's changing, +and how to migrate existing code to the new behavior. + +Whenever possible, a deprecation path will be provided rather than an outright +breaking change. + +Pandas will introduce deprecations in **minor** releases. These deprecations +will preserve the existing behavior while emitting a warning that provide +guidance on: + +* How to achieve similar behavior if an alternative is available +* The pandas version in which the deprecation will be enforced. + +We will not introduce new deprecations in patch releases. + +Deprecations will only be enforced in **major** releases. For example, if a +behavior is deprecated in pandas 1.2.0, it will continue to work, with a +warning, for all releases in the 1.x series. The behavior will change and the +deprecation removed in the next next major release (2.0.0). + +.. note:: + + Pandas will sometimes make *behavior changing* bug fixes, as part of + minor or patch releases. Whether or not a change is a bug fix or an + API-breaking change is a judgement call. We'll do our best, and we + invite you to participate in development discussion on the issue + tracker or mailing list. + +These policies do not apply to features marked as **experimental** in the documentation. +Pandas may change the behavior of experimental features at any time. + +Python Support +~~~~~~~~~~~~~~ + +Pandas will only drop support for specific Python versions (e.g. 3.6.x, 3.7.x) in +pandas **major** releases. + +.. _SemVer: https://semver.org diff --git a/doc/source/development/roadmap.rst b/doc/source/development/roadmap.rst new file mode 100644 index 00000000..00598830 --- /dev/null +++ b/doc/source/development/roadmap.rst @@ -0,0 +1,193 @@ +.. _roadmap: + +======= +Roadmap +======= + +This page provides an overview of the major themes in pandas' development. Each of +these items requires a relatively large amount of effort to implement. These may +be achieved more quickly with dedicated funding or interest from contributors. + +An item being on the roadmap does not mean that it will *necessarily* happen, even +with unlimited funding. During the implementation period we may discover issues +preventing the adoption of the feature. + +Additionally, an item *not* being on the roadmap does not exclude it from inclusion +in pandas. The roadmap is intended for larger, fundamental changes to the project that +are likely to take months or years of developer time. Smaller-scoped items will continue +to be tracked on our `issue tracker `__. + +See :ref:`roadmap.evolution` for proposing changes to this document. + +Extensibility +------------- + +Pandas :ref:`extending.extension-types` allow for extending NumPy types with custom +data types and array storage. Pandas uses extension types internally, and provides +an interface for 3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy array. +These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the library, +making their behavior more consistent with the handling of NumPy arrays. We'll do this +by cleaning up pandas' internals and adding new methods to the extension array interface. + +String data type +---------------- + +Currently, pandas stores text data in an ``object`` -dtype NumPy array. +The current implementation has two primary drawbacks: First, ``object`` -dtype +is not specific to strings: any Python object can be stored in an ``object`` -dtype +array, not just strings. Second: this is not efficient. The NumPy memory model +isn't especially well-suited to variable width text data. + +To solve the first issue, we propose a new extension type for string data. This +will initially be opt-in, with users explicitly requesting ``dtype="string"``. +The array backing this string dtype may initially be the current implementation: +an ``object`` -dtype NumPy array of Python strings. + +To solve the second issue (performance), we'll explore alternative in-memory +array libraries (for example, Apache Arrow). As part of the work, we may +need to implement certain operations expected by pandas users (for example +the algorithm used in, ``Series.str.upper``). That work may be done outside of +pandas. + +Apache Arrow interoperability +----------------------------- + +`Apache Arrow `__ is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned with +typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and data types +within pandas. This will let us take advantage of its I/O capabilities and +provide for better interoperability with other languages and libraries +using Arrow. + +Block manager rewrite +--------------------- + +We'd like to replace pandas current internal data structures (a collection of +1 or 2-D arrays) with a simpler collection of 1-D arrays. + +Pandas internal data model is quite complex. A DataFrame is made up of +one or more 2-dimensional "blocks", with one or more blocks per dtype. This +collection of 2-D arrays is managed by the BlockManager. + +The primary benefit of the BlockManager is improved performance on certain +operations (construction from a 2D array, binary operations, reductions across the columns), +especially for wide DataFrames. However, the BlockManager substantially increases the +complexity and maintenance burden of pandas. + +By replacing the BlockManager we hope to achieve + +* Substantially simpler code +* Easier extensibility with new logical types +* Better user control over memory use and layout +* Improved micro-performance +* Option to provide a C / Cython API to pandas' internals + +See `these design documents `__ +for more. + +Decoupling of indexing and internals +------------------------------------ + +The code for getting and setting values in pandas' data structures needs refactoring. +In particular, we must clearly separate code that converts keys (e.g., the argument +to ``DataFrame.loc``) to positions from code that uses these positions to get +or set values. This is related to the proposed BlockManager rewrite. Currently, the +BlockManager sometimes uses label-based, rather than position-based, indexing. +We propose that it should only work with positional indexing, and the translation of keys +to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will require care +and attention. More details are discussed at +https://github.com/pandas-dev/pandas/wiki/(Tentative)-rules-for-restructuring-indexing-code + +Numba-accelerated operations +---------------------------- + +`Numba `__ is a JIT compiler for Python code. We'd like to provide +ways for users to apply their own Numba-jitted functions where pandas accepts user-defined functions +(for example, :meth:`Series.apply`, :meth:`DataFrame.apply`, :meth:`DataFrame.applymap`, +and in groupby and window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled code. + + +Documentation improvements +-------------------------- + +We'd like to improve the content, structure, and presentation of the pandas documentation. +Some specific goals include + +* Overhaul the HTML theme with a modern, responsive design (:issue:`15556`) +* Improve the "Getting Started" documentation, designing and writing learning paths + for users different backgrounds (e.g. brand new to programming, familiar with + other languages like R, already familiar with Python). +* Improve the overall organization of the documentation and specific subsections + of the documentation to make navigation and finding content easier. + +Package docstring validation +---------------------------- + +To improve the quality and consistency of pandas docstrings, we've developed +tooling to check docstrings in a variety of ways. +https://github.com/pandas-dev/pandas/blob/master/scripts/validate_docstrings.py +contains the checks. + +Like many other projects, pandas uses the +`numpydoc `__ style for writing +docstrings. With the collaboration of the numpydoc maintainers, we'd like to +move the checks to a package other than pandas so that other projects can easily +use them as well. + +Performance monitoring +---------------------- + +Pandas uses `airspeed velocity `__ to +monitor for performance regressions. ASV itself is a fabulous tool, but requires +some additional work to be integrated into an open source project's workflow. + +The `asv-runner `__ organization, currently made up +of pandas maintainers, provides tools built on top of ASV. We have a physical +machine for running a number of project's benchmarks, and tools managing the +benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +* Be more stable. Currently, they're maintained on the nights and weekends when + a maintainer has free time. +* Tune the system for benchmarks to improve stability, following + https://pyperf.readthedocs.io/en/latest/system.html +* Build a GitHub bot to request ASV runs *before* a PR is merged. Currently, the + benchmarks are only run nightly. + +.. _roadmap.evolution: + +Roadmap Evolution +----------------- + +Pandas continues to evolve. The direction is primarily determined by community +interest. Everyone is welcome to review existing items on the roadmap and +to propose a new item. + +Each item on the roadmap should be a short summary of a larger design proposal. +The proposal should include + +1. Short summary of the changes, which would be appropriate for inclusion in + the roadmap if accepted. +2. Motivation for the changes. +3. An explanation of why the change is in scope for pandas. +4. Detailed design: Preferably with example-usage (even if not implemented yet) + and API documentation +5. API Change: Any API changes that may result from the proposal. + +That proposal may then be submitted as a GitHub issue, where the pandas maintainers +can review and comment on the design. The `pandas mailing list `__ +should be notified of the proposal. + +When there's agreement that an implementation +would be welcome, the roadmap should be updated to include the summary and a +link to the discussion issue. diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst new file mode 100644 index 00000000..b1de406b --- /dev/null +++ b/doc/source/ecosystem.rst @@ -0,0 +1,395 @@ +:orphan: + +.. _ecosystem: + +{{ header }} + +**************** +Pandas ecosystem +**************** + +Increasingly, packages are being built on top of pandas to address specific needs +in data preparation, analysis and visualization. +This is encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for developers to +build powerful and more focused data tools. +The creation of libraries that complement pandas' functionality also allows pandas +development to remain focused around it's original requirements. + +This is an inexhaustive list of projects that build on pandas in order to provide +tools in the PyData space. For a list of projects that depend on pandas, +see the +`libraries.io usage page for pandas `_ +or `search pypi for pandas `_. + +We'd like to make it easier for users to find these projects, if you know of other +substantial projects that you feel should be on this list, please let us know. + +.. _ecosystem.data_cleaning_and_validation: + +Data cleaning and validation +---------------------------- + +`pyjanitor `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +`Engarde `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Engarde is a lightweight library used to explicitly state assumptions about your datasets +and check that they're *actually* true. + +.. _ecosystem.stats: + +Statistics and machine learning +------------------------------- + +`Statsmodels `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Statsmodels is the prominent Python "statistics and econometrics library" and it has +a long-standing special relationship with pandas. Statsmodels provides powerful statistics, +econometrics, analysis and modeling functionality that is out of pandas' scope. +Statsmodels leverages pandas objects as the underlying data container for computation. + +`sklearn-pandas `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use pandas DataFrames in your `scikit-learn `__ +ML pipeline. + +`Featuretools `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Featuretools is a Python library for automated feature engineering built on top of pandas. It excels at transforming temporal and relational datasets into feature matrices for machine learning using reusable feature engineering "primitives". Users can contribute their own primitives in Python and share them with the rest of the community. + +.. _ecosystem.visualization: + +Visualization +------------- + +`Altair `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + + +`Bokeh `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Bokeh is a Python interactive visualization library for large datasets that natively uses +the latest web technologies. Its goal is to provide elegant, concise construction of novel +graphics in the style of Protovis/D3, while delivering high-performance interactivity over +large data to thin clients. + +`Pandas-Bokeh `__ provides a high level API +for Bokeh that can be loaded as a native Pandas plotting backend via + +.. code:: python + + pd.set_option("plotting.backend", "pandas_bokeh") + +It is very similar to the matplotlib plotting backend, but provides interactive +web-based charts and maps. + + +`seaborn `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Seaborn is a Python visualization library based on +`matplotlib `__. It provides a high-level, dataset-oriented +interface for creating attractive statistical graphics. The plotting functions +in seaborn understand pandas objects and leverage pandas grouping operations +internally to support concise specification of complex visualizations. Seaborn +also goes beyond matplotlib and pandas with the option to perform statistical +estimation while plotting, aggregating across observations and visualizing the +fit of statistical models to emphasize patterns in a dataset. + +`plotnine `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Hadley Wickham's `ggplot2 `__ is a foundational exploratory visualization package for the R language. +Based on `"The Grammar of Graphics" `__ it +provides a powerful, declarative and extremely general way to generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is `has2k1/plotnine `__. + +`IPython Vega `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`IPython Vega `__ leverages `Vega +`__ to create plots within Jupyter Notebook. + +`Plotly `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +`Plotly’s `__ `Python API `__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js `__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn `__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks `__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `cloud `__, `offline `__, or `on-premise `__ accounts for private use. + +`QtPandas `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Spun off from the main pandas library, the `qtpandas `__ +library enables DataFrame visualization and manipulation in PyQt4 and PySide applications. + + +.. _ecosystem.ide: + +IDE +------ + +`IPython `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +`Jupyter Notebook / Jupyter Lab `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Jupyter Notebook is a web application for creating Jupyter notebooks. +A Jupyter notebook is a JSON document containing an ordered list +of input/output cells which can contain code, text, mathematics, plots +and rich media. +Jupyter notebooks can be converted to a number of open standard output formats +(HTML, HTML presentation slides, LaTeX, PDF, ReStructuredText, Markdown, +Python) through 'Download As' in the web interface and ``jupyter convert`` +in a shell. + +Pandas DataFrames implement ``_repr_html_``and ``_repr_latex`` methods +which are utilized by Jupyter Notebook for displaying +(abbreviated) HTML or LaTeX tables. LaTeX output is properly escaped. +(Note: HTML tables may or may not be +compatible with non-HTML Jupyter output formats.) + +See :ref:`Options and Settings ` and +:ref:`Available Options ` +for pandas ``display.`` settings. + +`quantopian/qgrid `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +qgrid is "an interactive grid for sorting and filtering +DataFrames in IPython Notebook" built with SlickGrid. + +`Spyder `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Spyder is a cross-platform PyQt-based IDE combining the editing, analysis, +debugging and profiling functionality of a software development tool with the +data exploration, interactive execution, deep inspection and rich visualization +capabilities of a scientific environment like MATLAB or Rstudio. + +Its `Variable Explorer `__ +allows users to view, manipulate and edit pandas ``Index``, ``Series``, +and ``DataFrame`` objects like a "spreadsheet", including copying and modifying +values, sorting, displaying a "heatmap", converting data types and more. +Pandas objects can also be renamed, duplicated, new columns added, +copyed/pasted to/from the clipboard (as TSV), and saved/loaded to/from a file. +Spyder can also import data from a variety of plain text and binary files +or the clipboard into a new pandas DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's `Editor `__ and +`IPython Console `__, +and Spyder's `Help pane `__ can retrieve +and render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + + +.. _ecosystem.api: + +API +--- + +`pandas-datareader `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``pandas-datareader`` is a remote data access library for pandas (PyPI:``pandas-datareader``). +It is based on functionality that was located in ``pandas.io.data`` and ``pandas.io.wb`` but was +split off in v0.19. +See more in the `pandas-datareader docs `_: + +The following data feeds are available: + + * Google Finance + * Tiingo + * Morningstar + * IEX + * Robinhood + * Enigma + * Quandl + * FRED + * Fama/French + * World Bank + * OECD + * Eurostat + * TSP Fund Data + * Nasdaq Trader Symbol Definitions + * Stooq Index Data + * MOEX Data + +`quandl/Python `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Quandl API for Python wraps the Quandl REST API to return +Pandas DataFrames with timeseries indexes. + +`pydatastream `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +PyDatastream is a Python interface to the +`Refinitiv Datastream (DWS) `__ +REST API to return indexed Pandas DataFrames with financial data. +This package requires valid credentials for this API (non free). + +`pandaSDMX `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +pandaSDMX is a library to retrieve and acquire statistical data +and metadata disseminated in +`SDMX `_ 2.1, an ISO-standard +widely used by institutions such as statistics offices, central banks, +and international organisations. pandaSDMX can expose datasets and related +structural metadata including data flows, code-lists, +and data structure definitions as pandas Series +or MultiIndexed DataFrames. + +`fredapi `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +fredapi is a Python interface to the `Federal Reserve Economic Data (FRED) `__ +provided by the Federal Reserve Bank of St. Louis. It works with both the FRED database and ALFRED database that +contains point-in-time data (i.e. historic data revisions). fredapi provides a wrapper in Python to the FRED +HTTP API, and also provides several convenient methods for parsing and analyzing point-in-time data from ALFRED. +fredapi makes use of pandas and returns data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + + +.. _ecosystem.domain: + +Domain specific +--------------- + +`Geopandas `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Geopandas extends pandas data objects to include geographic information which support +geometric operations. If your work entails maps and geographical coordinates, and +you love pandas, you should take a close look at Geopandas. + +`xarray `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +xarray brings the labeled data power of pandas to the physical sciences by +providing N-dimensional variants of the core pandas data structures. It aims to +provide a pandas-like and pandas-compatible toolkit for analytics on multi- +dimensional arrays, rather than the tabular data for which pandas excels. + + +.. _ecosystem.out-of-core: + +Out-of-core +------------- + +`Blaze `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Blaze provides a standard API for doing computations with various +in-memory and on-disk backends: NumPy, Pandas, SQLAlchemy, MongoDB, PyTables, +PySpark. + +`Dask `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar ``DataFrame`` interface for out-of-core, parallel and distributed computing. + +`Dask-ML `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Dask-ML enables parallel and distributed machine learning using Dask alongside existing machine learning libraries like Scikit-Learn, XGBoost, and TensorFlow. + +`Koalas `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Koalas provides a familiar pandas DataFrame interface on top of Apache Spark. It enables users to leverage multi-cores on one machine or a cluster of machines to speed up or scale their DataFrame code. + +`Odo `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Odo provides a uniform API for moving data between different formats. It uses +pandas own ``read_csv`` for CSV IO and leverages many existing packages such as +PyTables, h5py, and pymongo to move data between non pandas formats. Its graph +based approach is also extensible by end users for custom formats that may be +too specific for the core of odo. + +`Pandarallel `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pandarallel provides a simple way to parallelize your pandas operations on all your CPUs by changing only one line of code. +If also displays progress bars. + +.. code:: python + + from pandarallel import pandarallel + + pandarallel.initialize(progress_bar=True) + + # df.apply(func) + df.parallel_apply(func) + +`Ray `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pandas on Ray is an early stage DataFrame library that wraps Pandas and transparently distributes the data and computation. The user does not need to know how many cores their system has, nor do they need to specify how to distribute the data. In fact, users can continue using their previous Pandas notebooks while experiencing a considerable speedup from Pandas on Ray, even on a single machine. Only a modification of the import statement is needed, as we demonstrate below. Once you’ve changed your import statement, you’re ready to use Pandas on Ray just like you would Pandas. + +.. code:: python + + # import pandas as pd + import ray.dataframe as pd + + +`Vaex `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Increasingly, packages are being built on top of pandas to address specific needs in data preparation, analysis and visualization. Vaex is a python library for Out-of-Core DataFrames (similar to Pandas), to visualize and explore big tabular datasets. It can calculate statistics such as mean, sum, count, standard deviation etc, on an N-dimensional grid up to a billion (10\ :sup:`9`) objects/rows per second. Visualization is done using histograms, density plots and 3d volume rendering, allowing interactive exploration of big data. Vaex uses memory mapping, zero memory copy policy and lazy computations for best performance (no memory wasted). + + * vaex.from_pandas + * vaex.to_pandas_df + +.. _ecosystem.extensions: + +Extension data types +-------------------- + +Pandas provides an interface for defining +:ref:`extension types ` to extend NumPy's type +system. The following libraries implement that interface to provide types not +found in NumPy or pandas, which work well with pandas' data containers. + +`cyberpandas`_ +~~~~~~~~~~~~~~ + +Cyberpandas provides an extension type for storing arrays of IP Addresses. These +arrays can be stored inside pandas' Series and DataFrame. + +.. _ecosystem.accessors: + +Accessors +--------- + +A directory of projects providing +:ref:`extension accessors `. This is for users to +discover new accessors and for library authors to coordinate on the namespace. + +============== ========== ========================= +Library Accessor Classes +============== ========== ========================= +`cyberpandas`_ ``ip`` ``Series`` +`pdvega`_ ``vgplot`` ``Series``, ``DataFrame`` +============== ========== ========================= + +.. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest +.. _pdvega: https://altair-viz.github.io/pdvega/ diff --git a/doc/source/getting_started/10min.rst b/doc/source/getting_started/10min.rst new file mode 100644 index 00000000..3055a221 --- /dev/null +++ b/doc/source/getting_started/10min.rst @@ -0,0 +1,813 @@ +.. _10min: + +{{ header }} + +******************** +10 minutes to pandas +******************** + +This is a short introduction to pandas, geared mainly for new users. +You can see more complex recipes in the :ref:`Cookbook`. + +Customarily, we import as follows: + +.. ipython:: python + + import numpy as np + import pandas as pd + +Object creation +--------------- + +See the :ref:`Data Structure Intro section `. + +Creating a :class:`Series` by passing a list of values, letting pandas create +a default integer index: + +.. ipython:: python + + s = pd.Series([1, 3, 5, np.nan, 6, 8]) + s + +Creating a :class:`DataFrame` by passing a NumPy array, with a datetime index +and labeled columns: + +.. ipython:: python + + dates = pd.date_range('20130101', periods=6) + dates + df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD')) + df + +Creating a ``DataFrame`` by passing a dict of objects that can be converted to series-like. + +.. ipython:: python + + df2 = pd.DataFrame({'A': 1., + 'B': pd.Timestamp('20130102'), + 'C': pd.Series(1, index=list(range(4)), dtype='float32'), + 'D': np.array([3] * 4, dtype='int32'), + 'E': pd.Categorical(["test", "train", "test", "train"]), + 'F': 'foo'}) + df2 + +The columns of the resulting ``DataFrame`` have different +:ref:`dtypes `. + +.. ipython:: python + + df2.dtypes + +If you're using IPython, tab completion for column names (as well as public +attributes) is automatically enabled. Here's a subset of the attributes that +will be completed: + +.. ipython:: + + @verbatim + In [1]: df2. # noqa: E225, E999 + df2.A df2.bool + df2.abs df2.boxplot + df2.add df2.C + df2.add_prefix df2.clip + df2.add_suffix df2.clip_lower + df2.align df2.clip_upper + df2.all df2.columns + df2.any df2.combine + df2.append df2.combine_first + df2.apply df2.consolidate + df2.applymap + df2.D + +As you can see, the columns ``A``, ``B``, ``C``, and ``D`` are automatically +tab completed. ``E`` is there as well; the rest of the attributes have been +truncated for brevity. + +Viewing data +------------ + +See the :ref:`Basics section `. + +Here is how to view the top and bottom rows of the frame: + +.. ipython:: python + + df.head() + df.tail(3) + +Display the index, columns: + +.. ipython:: python + + df.index + df.columns + +:meth:`DataFrame.to_numpy` gives a NumPy representation of the underlying data. +Note that this can be an expensive operation when your :class:`DataFrame` has +columns with different data types, which comes down to a fundamental difference +between pandas and NumPy: **NumPy arrays have one dtype for the entire array, +while pandas DataFrames have one dtype per column**. When you call +:meth:`DataFrame.to_numpy`, pandas will find the NumPy dtype that can hold *all* +of the dtypes in the DataFrame. This may end up being ``object``, which requires +casting every value to a Python object. + +For ``df``, our :class:`DataFrame` of all floating-point values, +:meth:`DataFrame.to_numpy` is fast and doesn't require copying data. + +.. ipython:: python + + df.to_numpy() + +For ``df2``, the :class:`DataFrame` with multiple dtypes, +:meth:`DataFrame.to_numpy` is relatively expensive. + +.. ipython:: python + + df2.to_numpy() + +.. note:: + + :meth:`DataFrame.to_numpy` does *not* include the index or column + labels in the output. + +:func:`~DataFrame.describe` shows a quick statistic summary of your data: + +.. ipython:: python + + df.describe() + +Transposing your data: + +.. ipython:: python + + df.T + +Sorting by an axis: + +.. ipython:: python + + df.sort_index(axis=1, ascending=False) + +Sorting by values: + +.. ipython:: python + + df.sort_values(by='B') + +Selection +--------- + +.. note:: + + While standard Python / Numpy expressions for selecting and setting are + intuitive and come in handy for interactive work, for production code, we + recommend the optimized pandas data access methods, ``.at``, ``.iat``, + ``.loc`` and ``.iloc``. + +See the indexing documentation :ref:`Indexing and Selecting Data ` and :ref:`MultiIndex / Advanced Indexing `. + +Getting +~~~~~~~ + +Selecting a single column, which yields a ``Series``, +equivalent to ``df.A``: + +.. ipython:: python + + df['A'] + +Selecting via ``[]``, which slices the rows. + +.. ipython:: python + + df[0:3] + df['20130102':'20130104'] + +Selection by label +~~~~~~~~~~~~~~~~~~ + +See more in :ref:`Selection by Label `. + +For getting a cross section using a label: + +.. ipython:: python + + df.loc[dates[0]] + +Selecting on a multi-axis by label: + +.. ipython:: python + + df.loc[:, ['A', 'B']] + +Showing label slicing, both endpoints are *included*: + +.. ipython:: python + + df.loc['20130102':'20130104', ['A', 'B']] + +Reduction in the dimensions of the returned object: + +.. ipython:: python + + df.loc['20130102', ['A', 'B']] + +For getting a scalar value: + +.. ipython:: python + + df.loc[dates[0], 'A'] + +For getting fast access to a scalar (equivalent to the prior method): + +.. ipython:: python + + df.at[dates[0], 'A'] + +Selection by position +~~~~~~~~~~~~~~~~~~~~~ + +See more in :ref:`Selection by Position `. + +Select via the position of the passed integers: + +.. ipython:: python + + df.iloc[3] + +By integer slices, acting similar to numpy/python: + +.. ipython:: python + + df.iloc[3:5, 0:2] + +By lists of integer position locations, similar to the numpy/python style: + +.. ipython:: python + + df.iloc[[1, 2, 4], [0, 2]] + +For slicing rows explicitly: + +.. ipython:: python + + df.iloc[1:3, :] + +For slicing columns explicitly: + +.. ipython:: python + + df.iloc[:, 1:3] + +For getting a value explicitly: + +.. ipython:: python + + df.iloc[1, 1] + +For getting fast access to a scalar (equivalent to the prior method): + +.. ipython:: python + + df.iat[1, 1] + +Boolean indexing +~~~~~~~~~~~~~~~~ + +Using a single column's values to select data. + +.. ipython:: python + + df[df['A'] > 0] + +Selecting values from a DataFrame where a boolean condition is met. + +.. ipython:: python + + df[df > 0] + +Using the :func:`~Series.isin` method for filtering: + +.. ipython:: python + + df2 = df.copy() + df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three'] + df2 + df2[df2['E'].isin(['two', 'four'])] + +Setting +~~~~~~~ + +Setting a new column automatically aligns the data +by the indexes. + +.. ipython:: python + + s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6)) + s1 + df['F'] = s1 + +Setting values by label: + +.. ipython:: python + + df.at[dates[0], 'A'] = 0 + +Setting values by position: + +.. ipython:: python + + df.iat[0, 1] = 0 + +Setting by assigning with a NumPy array: + +.. ipython:: python + + df.loc[:, 'D'] = np.array([5] * len(df)) + +The result of the prior setting operations. + +.. ipython:: python + + df + +A ``where`` operation with setting. + +.. ipython:: python + + df2 = df.copy() + df2[df2 > 0] = -df2 + df2 + + +Missing data +------------ + +pandas primarily uses the value ``np.nan`` to represent missing data. It is by +default not included in computations. See the :ref:`Missing Data section +`. + +Reindexing allows you to change/add/delete the index on a specified axis. This +returns a copy of the data. + +.. ipython:: python + + df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E']) + df1.loc[dates[0]:dates[1], 'E'] = 1 + df1 + +To drop any rows that have missing data. + +.. ipython:: python + + df1.dropna(how='any') + +Filling missing data. + +.. ipython:: python + + df1.fillna(value=5) + +To get the boolean mask where values are ``nan``. + +.. ipython:: python + + pd.isna(df1) + + +Operations +---------- + +See the :ref:`Basic section on Binary Ops `. + +Stats +~~~~~ + +Operations in general *exclude* missing data. + +Performing a descriptive statistic: + +.. ipython:: python + + df.mean() + +Same operation on the other axis: + +.. ipython:: python + + df.mean(1) + +Operating with objects that have different dimensionality and need alignment. +In addition, pandas automatically broadcasts along the specified dimension. + +.. ipython:: python + + s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2) + s + df.sub(s, axis='index') + + +Apply +~~~~~ + +Applying functions to the data: + +.. ipython:: python + + df.apply(np.cumsum) + df.apply(lambda x: x.max() - x.min()) + +Histogramming +~~~~~~~~~~~~~ + +See more at :ref:`Histogramming and Discretization `. + +.. ipython:: python + + s = pd.Series(np.random.randint(0, 7, size=10)) + s + s.value_counts() + +String Methods +~~~~~~~~~~~~~~ + +Series is equipped with a set of string processing methods in the `str` +attribute that make it easy to operate on each element of the array, as in the +code snippet below. Note that pattern-matching in `str` generally uses `regular +expressions `__ by default (and in +some cases always uses them). See more at :ref:`Vectorized String Methods +`. + +.. ipython:: python + + s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat']) + s.str.lower() + +Merge +----- + +Concat +~~~~~~ + +pandas provides various facilities for easily combining together Series and +DataFrame objects with various kinds of set logic for the indexes +and relational algebra functionality in the case of join / merge-type +operations. + +See the :ref:`Merging section `. + +Concatenating pandas objects together with :func:`concat`: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 4)) + df + + # break it into pieces + pieces = [df[:3], df[3:7], df[7:]] + + pd.concat(pieces) + +.. note:: + Adding a column to a ``DataFrame`` is relatively fast. However, adding + a row requires a copy, and may be expensive. We recommend passing a + pre-built list of records to the ``DataFrame`` constructor instead + of building a ``DataFrame`` by iteratively appending records to it. + See :ref:`Appending to dataframe ` for more. + +Join +~~~~ + +SQL style merges. See the :ref:`Database style joining ` section. + +.. ipython:: python + + left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]}) + right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]}) + left + right + pd.merge(left, right, on='key') + +Another example that can be given is: + +.. ipython:: python + + left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]}) + right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]}) + left + right + pd.merge(left, right, on='key') + +Grouping +-------- + +By "group by" we are referring to a process involving one or more of the +following steps: + + - **Splitting** the data into groups based on some criteria + - **Applying** a function to each group independently + - **Combining** the results into a data structure + +See the :ref:`Grouping section `. + +.. ipython:: python + + df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + df + +Grouping and then applying the :meth:`~DataFrame.sum` function to the resulting +groups. + +.. ipython:: python + + df.groupby('A').sum() + +Grouping by multiple columns forms a hierarchical index, and again we can +apply the ``sum`` function. + +.. ipython:: python + + df.groupby(['A', 'B']).sum() + +Reshaping +--------- + +See the sections on :ref:`Hierarchical Indexing ` and +:ref:`Reshaping `. + +Stack +~~~~~ + +.. ipython:: python + + tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', + 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', + 'one', 'two', 'one', 'two']])) + index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second']) + df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B']) + df2 = df[:4] + df2 + +The :meth:`~DataFrame.stack` method "compresses" a level in the DataFrame's +columns. + +.. ipython:: python + + stacked = df2.stack() + stacked + +With a "stacked" DataFrame or Series (having a ``MultiIndex`` as the +``index``), the inverse operation of :meth:`~DataFrame.stack` is +:meth:`~DataFrame.unstack`, which by default unstacks the **last level**: + +.. ipython:: python + + stacked.unstack() + stacked.unstack(1) + stacked.unstack(0) + +Pivot tables +~~~~~~~~~~~~ +See the section on :ref:`Pivot Tables `. + +.. ipython:: python + + df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3, + 'B': ['A', 'B', 'C'] * 4, + 'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2, + 'D': np.random.randn(12), + 'E': np.random.randn(12)}) + df + +We can produce pivot tables from this data very easily: + +.. ipython:: python + + pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C']) + + +Time series +----------- + +pandas has simple, powerful, and efficient functionality for performing +resampling operations during frequency conversion (e.g., converting secondly +data into 5-minutely data). This is extremely common in, but not limited to, +financial applications. See the :ref:`Time Series section `. + +.. ipython:: python + + rng = pd.date_range('1/1/2012', periods=100, freq='S') + ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng) + ts.resample('5Min').sum() + +Time zone representation: + +.. ipython:: python + + rng = pd.date_range('3/6/2012 00:00', periods=5, freq='D') + ts = pd.Series(np.random.randn(len(rng)), rng) + ts + ts_utc = ts.tz_localize('UTC') + ts_utc + +Converting to another time zone: + +.. ipython:: python + + ts_utc.tz_convert('US/Eastern') + +Converting between time span representations: + +.. ipython:: python + + rng = pd.date_range('1/1/2012', periods=5, freq='M') + ts = pd.Series(np.random.randn(len(rng)), index=rng) + ts + ps = ts.to_period() + ps + ps.to_timestamp() + +Converting between period and timestamp enables some convenient arithmetic +functions to be used. In the following example, we convert a quarterly +frequency with year ending in November to 9am of the end of the month following +the quarter end: + +.. ipython:: python + + prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV') + ts = pd.Series(np.random.randn(len(prng)), prng) + ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9 + ts.head() + +Categoricals +------------ + +pandas can include categorical data in a ``DataFrame``. For full docs, see the +:ref:`categorical introduction ` and the :ref:`API documentation `. + +.. ipython:: python + + df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], + "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) + +Convert the raw grades to a categorical data type. + +.. ipython:: python + + df["grade"] = df["raw_grade"].astype("category") + df["grade"] + +Rename the categories to more meaningful names (assigning to +``Series.cat.categories`` is inplace!). + +.. ipython:: python + + df["grade"].cat.categories = ["very good", "good", "very bad"] + +Reorder the categories and simultaneously add the missing categories (methods under ``Series +.cat`` return a new ``Series`` by default). + +.. ipython:: python + + df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", + "good", "very good"]) + df["grade"] + +Sorting is per order in the categories, not lexical order. + +.. ipython:: python + + df.sort_values(by="grade") + +Grouping by a categorical column also shows empty categories. + +.. ipython:: python + + df.groupby("grade").size() + + +Plotting +-------- + +See the :ref:`Plotting ` docs. + +We use the standard convention for referencing the matplotlib API: + +.. ipython:: python + + import matplotlib.pyplot as plt + plt.close('all') + +.. ipython:: python + + ts = pd.Series(np.random.randn(1000), + index=pd.date_range('1/1/2000', periods=1000)) + ts = ts.cumsum() + + @savefig series_plot_basic.png + ts.plot() + +On a DataFrame, the :meth:`~DataFrame.plot` method is a convenience to plot all +of the columns with labels: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, + columns=['A', 'B', 'C', 'D']) + df = df.cumsum() + + plt.figure() + df.plot() + @savefig frame_plot_basic.png + plt.legend(loc='best') + +Getting data in/out +------------------- + +CSV +~~~ + +:ref:`Writing to a csv file. ` + +.. ipython:: python + + df.to_csv('foo.csv') + +:ref:`Reading from a csv file. ` + +.. ipython:: python + + pd.read_csv('foo.csv') + +.. ipython:: python + :suppress: + + import os + os.remove('foo.csv') + +HDF5 +~~~~ + +Reading and writing to :ref:`HDFStores `. + +Writing to a HDF5 Store. + +.. ipython:: python + + df.to_hdf('foo.h5', 'df') + +Reading from a HDF5 Store. + +.. ipython:: python + + pd.read_hdf('foo.h5', 'df') + +.. ipython:: python + :suppress: + + os.remove('foo.h5') + +Excel +~~~~~ + +Reading and writing to :ref:`MS Excel `. + +Writing to an excel file. + +.. ipython:: python + + df.to_excel('foo.xlsx', sheet_name='Sheet1') + +Reading from an excel file. + +.. ipython:: python + + pd.read_excel('foo.xlsx', 'Sheet1', index_col=None, na_values=['NA']) + +.. ipython:: python + :suppress: + + os.remove('foo.xlsx') + +Gotchas +------- + +If you are attempting to perform an operation you might see an exception like: + +.. code-block:: python + + >>> if pd.Series([False, True, False]): + ... print("I was true") + Traceback + ... + ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). + +See :ref:`Comparisons` for an explanation and what to do. + +See :ref:`Gotchas` as well. diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst new file mode 100644 index 00000000..4fef5efb --- /dev/null +++ b/doc/source/getting_started/basics.rst @@ -0,0 +1,2364 @@ +.. _basics: + +{{ header }} + +============================== + Essential basic functionality +============================== + +Here we discuss a lot of the essential functionality common to the pandas data +structures. Here's how to create some of the objects used in the examples from +the previous section: + +.. ipython:: python + + index = pd.date_range('1/1/2000', periods=8) + s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) + df = pd.DataFrame(np.random.randn(8, 3), index=index, + columns=['A', 'B', 'C']) + +.. _basics.head_tail: + +Head and tail +------------- + +To view a small sample of a Series or DataFrame object, use the +:meth:`~DataFrame.head` and :meth:`~DataFrame.tail` methods. The default number +of elements to display is five, but you may pass a custom number. + +.. ipython:: python + + long_series = pd.Series(np.random.randn(1000)) + long_series.head() + long_series.tail(3) + +.. _basics.attrs: + +Attributes and underlying data +------------------------------ + +pandas objects have a number of attributes enabling you to access the metadata + +* **shape**: gives the axis dimensions of the object, consistent with ndarray +* Axis labels + * **Series**: *index* (only axis) + * **DataFrame**: *index* (rows) and *columns* + +Note, **these attributes can be safely assigned to**! + +.. ipython:: python + + df[:2] + df.columns = [x.lower() for x in df.columns] + df + +Pandas objects (:class:`Index`, :class:`Series`, :class:`DataFrame`) can be +thought of as containers for arrays, which hold the actual data and do the +actual computation. For many types, the underlying array is a +:class:`numpy.ndarray`. However, pandas and 3rd party libraries may *extend* +NumPy's type system to add support for custom arrays +(see :ref:`basics.dtypes`). + +To get the actual data inside a :class:`Index` or :class:`Series`, use +the ``.array`` property + +.. ipython:: python + + s.array + s.index.array + +:attr:`~Series.array` will always be an :class:`~pandas.api.extensions.ExtensionArray`. +The exact details of what an :class:`~pandas.api.extensions.ExtensionArray` is and why pandas uses them is a bit +beyond the scope of this introduction. See :ref:`basics.dtypes` for more. + +If you know you need a NumPy array, use :meth:`~Series.to_numpy` +or :meth:`numpy.asarray`. + +.. ipython:: python + + s.to_numpy() + np.asarray(s) + +When the Series or Index is backed by +an :class:`~pandas.api.extensions.ExtensionArray`, :meth:`~Series.to_numpy` +may involve copying data and coercing values. See :ref:`basics.dtypes` for more. + +:meth:`~Series.to_numpy` gives some control over the ``dtype`` of the +resulting :class:`numpy.ndarray`. For example, consider datetimes with timezones. +NumPy doesn't have a dtype to represent timezone-aware datetimes, so there +are two possibly useful representations: + +1. An object-dtype :class:`numpy.ndarray` with :class:`Timestamp` objects, each + with the correct ``tz`` +2. A ``datetime64[ns]`` -dtype :class:`numpy.ndarray`, where the values have + been converted to UTC and the timezone discarded + +Timezones may be preserved with ``dtype=object`` + +.. ipython:: python + + ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + ser.to_numpy(dtype=object) + +Or thrown away with ``dtype='datetime64[ns]'`` + +.. ipython:: python + + ser.to_numpy(dtype="datetime64[ns]") + +Getting the "raw data" inside a :class:`DataFrame` is possibly a bit more +complex. When your ``DataFrame`` only has a single data type for all the +columns, :meth:`DataFrame.to_numpy` will return the underlying data: + +.. ipython:: python + + df.to_numpy() + +If a DataFrame contains homogeneously-typed data, the ndarray can +actually be modified in-place, and the changes will be reflected in the data +structure. For heterogeneous data (e.g. some of the DataFrame's columns are not +all the same dtype), this will not be the case. The values attribute itself, +unlike the axis labels, cannot be assigned to. + +.. note:: + + When working with heterogeneous data, the dtype of the resulting ndarray + will be chosen to accommodate all of the data involved. For example, if + strings are involved, the result will be of object dtype. If there are only + floats and integers, the resulting array will be of float dtype. + +In the past, pandas recommended :attr:`Series.values` or :attr:`DataFrame.values` +for extracting the data from a Series or DataFrame. You'll still find references +to these in old code bases and online. Going forward, we recommend avoiding +``.values`` and using ``.array`` or ``.to_numpy()``. ``.values`` has the following +drawbacks: + +1. When your Series contains an :ref:`extension type `, it's + unclear whether :attr:`Series.values` returns a NumPy array or the extension array. + :attr:`Series.array` will always return an :class:`~pandas.api.extensions.ExtensionArray`, and will never + copy data. :meth:`Series.to_numpy` will always return a NumPy array, + potentially at the cost of copying / coercing values. +2. When your DataFrame contains a mixture of data types, :attr:`DataFrame.values` may + involve copying data and coercing values to a common dtype, a relatively expensive + operation. :meth:`DataFrame.to_numpy`, being a method, makes it clearer that the + returned NumPy array may not be a view on the same data in the DataFrame. + +.. _basics.accelerate: + +Accelerated operations +---------------------- + +pandas has support for accelerating certain types of binary numerical and boolean operations using +the ``numexpr`` library and the ``bottleneck`` libraries. + +These libraries are especially useful when dealing with large data sets, and provide large +speedups. ``numexpr`` uses smart chunking, caching, and multiple cores. ``bottleneck`` is +a set of specialized cython routines that are especially fast when dealing with arrays that have +``nans``. + +Here is a sample (using 100 column x 100,000 row ``DataFrames``): + +.. csv-table:: + :header: "Operation", "0.11.0 (ms)", "Prior Version (ms)", "Ratio to Prior" + :widths: 25, 25, 25, 25 + :delim: ; + + ``df1 > df2``; 13.32; 125.35; 0.1063 + ``df1 * df2``; 21.71; 36.63; 0.5928 + ``df1 + df2``; 22.04; 36.50; 0.6039 + +You are highly encouraged to install both libraries. See the section +:ref:`Recommended Dependencies ` for more installation info. + +These are both enabled to be used by default, you can control this by setting the options: + +.. code-block:: python + + pd.set_option('compute.use_bottleneck', False) + pd.set_option('compute.use_numexpr', False) + +.. _basics.binop: + +Flexible binary operations +-------------------------- + +With binary operations between pandas data structures, there are two key points +of interest: + +* Broadcasting behavior between higher- (e.g. DataFrame) and + lower-dimensional (e.g. Series) objects. +* Missing data in computations. + +We will demonstrate how to manage these issues independently, though they can +be handled simultaneously. + +Matching / broadcasting behavior +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +DataFrame has the methods :meth:`~DataFrame.add`, :meth:`~DataFrame.sub`, +:meth:`~DataFrame.mul`, :meth:`~DataFrame.div` and related functions +:meth:`~DataFrame.radd`, :meth:`~DataFrame.rsub`, ... +for carrying out binary operations. For broadcasting behavior, +Series input is of primary interest. Using these functions, you can use to +either match on the *index* or *columns* via the **axis** keyword: + +.. ipython:: python + + df = pd.DataFrame({ + 'one': pd.Series(np.random.randn(3), index=['a', 'b', 'c']), + 'two': pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']), + 'three': pd.Series(np.random.randn(3), index=['b', 'c', 'd'])}) + df + row = df.iloc[1] + column = df['two'] + + df.sub(row, axis='columns') + df.sub(row, axis=1) + + df.sub(column, axis='index') + df.sub(column, axis=0) + +.. ipython:: python + :suppress: + + df_orig = df + +Furthermore you can align a level of a MultiIndexed DataFrame with a Series. + +.. ipython:: python + + dfmi = df.copy() + dfmi.index = pd.MultiIndex.from_tuples([(1, 'a'), (1, 'b'), + (1, 'c'), (2, 'a')], + names=['first', 'second']) + dfmi.sub(column, axis=0, level='second') + +Series and Index also support the :func:`divmod` builtin. This function takes +the floor division and modulo operation at the same time returning a two-tuple +of the same type as the left hand side. For example: + +.. ipython:: python + + s = pd.Series(np.arange(10)) + s + div, rem = divmod(s, 3) + div + rem + + idx = pd.Index(np.arange(10)) + idx + div, rem = divmod(idx, 3) + div + rem + +We can also do elementwise :func:`divmod`: + +.. ipython:: python + + div, rem = divmod(s, [2, 2, 3, 3, 4, 4, 5, 5, 6, 6]) + div + rem + +Missing data / operations with fill values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In Series and DataFrame, the arithmetic functions have the option of inputting +a *fill_value*, namely a value to substitute when at most one of the values at +a location are missing. For example, when adding two DataFrame objects, you may +wish to treat NaN as 0 unless both DataFrames are missing that value, in which +case the result will be NaN (you can later replace NaN with some other value +using ``fillna`` if you wish). + +.. ipython:: python + :suppress: + + df2 = df.copy() + df2['three']['a'] = 1. + +.. ipython:: python + + df + df2 + df + df2 + df.add(df2, fill_value=0) + +.. _basics.compare: + +Flexible comparisons +~~~~~~~~~~~~~~~~~~~~ + +Series and DataFrame have the binary comparison methods ``eq``, ``ne``, ``lt``, ``gt``, +``le``, and ``ge`` whose behavior is analogous to the binary +arithmetic operations described above: + +.. ipython:: python + + df.gt(df2) + df2.ne(df) + +These operations produce a pandas object of the same type as the left-hand-side +input that is of dtype ``bool``. These ``boolean`` objects can be used in +indexing operations, see the section on :ref:`Boolean indexing`. + +.. _basics.reductions: + +Boolean reductions +~~~~~~~~~~~~~~~~~~ + +You can apply the reductions: :attr:`~DataFrame.empty`, :meth:`~DataFrame.any`, +:meth:`~DataFrame.all`, and :meth:`~DataFrame.bool` to provide a +way to summarize a boolean result. + +.. ipython:: python + + (df > 0).all() + (df > 0).any() + +You can reduce to a final boolean value. + +.. ipython:: python + + (df > 0).any().any() + +You can test if a pandas object is empty, via the :attr:`~DataFrame.empty` property. + +.. ipython:: python + + df.empty + pd.DataFrame(columns=list('ABC')).empty + +To evaluate single-element pandas objects in a boolean context, use the method +:meth:`~DataFrame.bool`: + +.. ipython:: python + + pd.Series([True]).bool() + pd.Series([False]).bool() + pd.DataFrame([[True]]).bool() + pd.DataFrame([[False]]).bool() + +.. warning:: + + You might be tempted to do the following: + + .. code-block:: python + + >>> if df: + ... pass + + Or + + .. code-block:: python + + >>> df and df2 + + These will both raise errors, as you are trying to compare multiple values.:: + + ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). + +See :ref:`gotchas` for a more detailed discussion. + +.. _basics.equals: + +Comparing if objects are equivalent +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Often you may find that there is more than one way to compute the same +result. As a simple example, consider ``df + df`` and ``df * 2``. To test +that these two computations produce the same result, given the tools +shown above, you might imagine using ``(df + df == df * 2).all()``. But in +fact, this expression is False: + +.. ipython:: python + + df + df == df * 2 + (df + df == df * 2).all() + +Notice that the boolean DataFrame ``df + df == df * 2`` contains some False values! +This is because NaNs do not compare as equals: + +.. ipython:: python + + np.nan == np.nan + +So, NDFrames (such as Series and DataFrames) +have an :meth:`~DataFrame.equals` method for testing equality, with NaNs in +corresponding locations treated as equal. + +.. ipython:: python + + (df + df).equals(df * 2) + +Note that the Series or DataFrame index needs to be in the same order for +equality to be True: + +.. ipython:: python + + df1 = pd.DataFrame({'col': ['foo', 0, np.nan]}) + df2 = pd.DataFrame({'col': [np.nan, 0, 'foo']}, index=[2, 1, 0]) + df1.equals(df2) + df1.equals(df2.sort_index()) + +Comparing array-like objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can conveniently perform element-wise comparisons when comparing a pandas +data structure with a scalar value: + +.. ipython:: python + + pd.Series(['foo', 'bar', 'baz']) == 'foo' + pd.Index(['foo', 'bar', 'baz']) == 'foo' + +Pandas also handles element-wise comparisons between different array-like +objects of the same length: + +.. ipython:: python + + pd.Series(['foo', 'bar', 'baz']) == pd.Index(['foo', 'bar', 'qux']) + pd.Series(['foo', 'bar', 'baz']) == np.array(['foo', 'bar', 'qux']) + +Trying to compare ``Index`` or ``Series`` objects of different lengths will +raise a ValueError: + +.. code-block:: ipython + + In [55]: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo', 'bar']) + ValueError: Series lengths must match to compare + + In [56]: pd.Series(['foo', 'bar', 'baz']) == pd.Series(['foo']) + ValueError: Series lengths must match to compare + +Note that this is different from the NumPy behavior where a comparison can +be broadcast: + +.. ipython:: python + + np.array([1, 2, 3]) == np.array([2]) + +or it can return False if broadcasting can not be done: + +.. ipython:: python + :okwarning: + + np.array([1, 2, 3]) == np.array([1, 2]) + +Combining overlapping data sets +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A problem occasionally arising is the combination of two similar data sets +where values in one are preferred over the other. An example would be two data +series representing a particular economic indicator where one is considered to +be of "higher quality". However, the lower quality series might extend further +back in history or have more complete data coverage. As such, we would like to +combine two DataFrame objects where missing values in one DataFrame are +conditionally filled with like-labeled values from the other DataFrame. The +function implementing this operation is :meth:`~DataFrame.combine_first`, +which we illustrate: + +.. ipython:: python + + df1 = pd.DataFrame({'A': [1., np.nan, 3., 5., np.nan], + 'B': [np.nan, 2., 3., np.nan, 6.]}) + df2 = pd.DataFrame({'A': [5., 2., 4., np.nan, 3., 7.], + 'B': [np.nan, np.nan, 3., 4., 6., 8.]}) + df1 + df2 + df1.combine_first(df2) + +General DataFrame combine +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~DataFrame.combine_first` method above calls the more general +:meth:`DataFrame.combine`. This method takes another DataFrame +and a combiner function, aligns the input DataFrame and then passes the combiner +function pairs of Series (i.e., columns whose names are the same). + +So, for instance, to reproduce :meth:`~DataFrame.combine_first` as above: + +.. ipython:: python + + def combiner(x, y): + return np.where(pd.isna(x), y, x) + df1.combine(df2, combiner) + +.. _basics.stats: + +Descriptive statistics +---------------------- + +There exists a large number of methods for computing descriptive statistics and +other related operations on :ref:`Series `, :ref:`DataFrame +`. Most of these +are aggregations (hence producing a lower-dimensional result) like +:meth:`~DataFrame.sum`, :meth:`~DataFrame.mean`, and :meth:`~DataFrame.quantile`, +but some of them, like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod`, +produce an object of the same size. Generally speaking, these methods take an +**axis** argument, just like *ndarray.{sum, std, ...}*, but the axis can be +specified by name or integer: + +* **Series**: no axis argument needed +* **DataFrame**: "index" (axis=0, default), "columns" (axis=1) + +For example: + +.. ipython:: python + + df + df.mean(0) + df.mean(1) + +All such methods have a ``skipna`` option signaling whether to exclude missing +data (``True`` by default): + +.. ipython:: python + + df.sum(0, skipna=False) + df.sum(axis=1, skipna=True) + +Combined with the broadcasting / arithmetic behavior, one can describe various +statistical procedures, like standardization (rendering data zero mean and +standard deviation 1), very concisely: + +.. ipython:: python + + ts_stand = (df - df.mean()) / df.std() + ts_stand.std() + xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0) + xs_stand.std(1) + +Note that methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod` +preserve the location of ``NaN`` values. This is somewhat different from +:meth:`~DataFrame.expanding` and :meth:`~DataFrame.rolling`. +For more details please see :ref:`this note `. + +.. ipython:: python + + df.cumsum() + +Here is a quick reference summary table of common functions. Each also takes an +optional ``level`` parameter which applies only if the object has a +:ref:`hierarchical index`. + +.. csv-table:: + :header: "Function", "Description" + :widths: 20, 80 + + ``count``, Number of non-NA observations + ``sum``, Sum of values + ``mean``, Mean of values + ``mad``, Mean absolute deviation + ``median``, Arithmetic median of values + ``min``, Minimum + ``max``, Maximum + ``mode``, Mode + ``abs``, Absolute Value + ``prod``, Product of values + ``std``, Bessel-corrected sample standard deviation + ``var``, Unbiased variance + ``sem``, Standard error of the mean + ``skew``, Sample skewness (3rd moment) + ``kurt``, Sample kurtosis (4th moment) + ``quantile``, Sample quantile (value at %) + ``cumsum``, Cumulative sum + ``cumprod``, Cumulative product + ``cummax``, Cumulative maximum + ``cummin``, Cumulative minimum + +Note that by chance some NumPy methods, like ``mean``, ``std``, and ``sum``, +will exclude NAs on Series input by default: + +.. ipython:: python + + np.mean(df['one']) + np.mean(df['one'].to_numpy()) + +:meth:`Series.nunique` will return the number of unique non-NA values in a +Series: + +.. ipython:: python + + series = pd.Series(np.random.randn(500)) + series[20:500] = np.nan + series[10:20] = 5 + series.nunique() + +.. _basics.describe: + +Summarizing data: describe +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There is a convenient :meth:`~DataFrame.describe` function which computes a variety of summary +statistics about a Series or the columns of a DataFrame (excluding NAs of +course): + +.. ipython:: python + + series = pd.Series(np.random.randn(1000)) + series[::2] = np.nan + series.describe() + frame = pd.DataFrame(np.random.randn(1000, 5), + columns=['a', 'b', 'c', 'd', 'e']) + frame.iloc[::2] = np.nan + frame.describe() + +You can select specific percentiles to include in the output: + +.. ipython:: python + + series.describe(percentiles=[.05, .25, .75, .95]) + +By default, the median is always included. + +For a non-numerical Series object, :meth:`~Series.describe` will give a simple +summary of the number of unique values and most frequently occurring values: + +.. ipython:: python + + s = pd.Series(['a', 'a', 'b', 'b', 'a', 'a', np.nan, 'c', 'd', 'a']) + s.describe() + +Note that on a mixed-type DataFrame object, :meth:`~DataFrame.describe` will +restrict the summary to include only numerical columns or, if none are, only +categorical columns: + +.. ipython:: python + + frame = pd.DataFrame({'a': ['Yes', 'Yes', 'No', 'No'], 'b': range(4)}) + frame.describe() + +This behavior can be controlled by providing a list of types as ``include``/``exclude`` +arguments. The special value ``all`` can also be used: + +.. ipython:: python + + frame.describe(include=['object']) + frame.describe(include=['number']) + frame.describe(include='all') + +That feature relies on :ref:`select_dtypes `. Refer to +there for details about accepted inputs. + +.. _basics.idxmin: + +Index of min/max values +~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~DataFrame.idxmin` and :meth:`~DataFrame.idxmax` functions on Series +and DataFrame compute the index labels with the minimum and maximum +corresponding values: + +.. ipython:: python + + s1 = pd.Series(np.random.randn(5)) + s1 + s1.idxmin(), s1.idxmax() + + df1 = pd.DataFrame(np.random.randn(5, 3), columns=['A', 'B', 'C']) + df1 + df1.idxmin(axis=0) + df1.idxmax(axis=1) + +When there are multiple rows (or columns) matching the minimum or maximum +value, :meth:`~DataFrame.idxmin` and :meth:`~DataFrame.idxmax` return the first +matching index: + +.. ipython:: python + + df3 = pd.DataFrame([2, 1, 1, 3, np.nan], columns=['A'], index=list('edcba')) + df3 + df3['A'].idxmin() + +.. note:: + + ``idxmin`` and ``idxmax`` are called ``argmin`` and ``argmax`` in NumPy. + +.. _basics.discretization: + +Value counts (histogramming) / mode +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~Series.value_counts` Series method and top-level function computes a histogram +of a 1D array of values. It can also be used as a function on regular arrays: + +.. ipython:: python + + data = np.random.randint(0, 7, size=50) + data + s = pd.Series(data) + s.value_counts() + pd.value_counts(data) + +Similarly, you can get the most frequently occurring value(s) (the mode) of the values in a Series or DataFrame: + +.. ipython:: python + + s5 = pd.Series([1, 1, 3, 3, 3, 5, 5, 7, 7, 7]) + s5.mode() + df5 = pd.DataFrame({"A": np.random.randint(0, 7, size=50), + "B": np.random.randint(-10, 15, size=50)}) + df5.mode() + + +Discretization and quantiling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Continuous values can be discretized using the :func:`cut` (bins based on values) +and :func:`qcut` (bins based on sample quantiles) functions: + +.. ipython:: python + + arr = np.random.randn(20) + factor = pd.cut(arr, 4) + factor + + factor = pd.cut(arr, [-5, -1, 0, 1, 5]) + factor + +:func:`qcut` computes sample quantiles. For example, we could slice up some +normally distributed data into equal-size quartiles like so: + +.. ipython:: python + + arr = np.random.randn(30) + factor = pd.qcut(arr, [0, .25, .5, .75, 1]) + factor + pd.value_counts(factor) + +We can also pass infinite values to define the bins: + +.. ipython:: python + + arr = np.random.randn(20) + factor = pd.cut(arr, [-np.inf, 0, np.inf]) + factor + +.. _basics.apply: + +Function application +-------------------- + +To apply your own or another library's functions to pandas objects, +you should be aware of the three methods below. The appropriate +method to use depends on whether your function expects to operate +on an entire ``DataFrame`` or ``Series``, row- or column-wise, or elementwise. + +1. `Tablewise Function Application`_: :meth:`~DataFrame.pipe` +2. `Row or Column-wise Function Application`_: :meth:`~DataFrame.apply` +3. `Aggregation API`_: :meth:`~DataFrame.agg` and :meth:`~DataFrame.transform` +4. `Applying Elementwise Functions`_: :meth:`~DataFrame.applymap` + +.. _basics.pipe: + +Tablewise function application +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``DataFrames`` and ``Series`` can be passed into functions. +However, if the function needs to be called in a chain, consider using the :meth:`~DataFrame.pipe` method. + +First some setup: + +.. ipython:: python + + def extract_city_name(df): + """ + Chicago, IL -> Chicago for city_name column + """ + df['city_name'] = df['city_and_code'].str.split(",").str.get(0) + return df + + def add_country_name(df, country_name=None): + """ + Chicago -> Chicago-US for city_name column + """ + col = 'city_name' + df['city_and_country'] = df[col] + country_name + return df + + df_p = pd.DataFrame({'city_and_code': ['Chicago, IL']}) + + +``extract_city_name`` and ``add_country_name`` are functions taking and returning ``DataFrames``. + +Now compare the following: + +.. ipython:: python + + add_country_name(extract_city_name(df_p), country_name='US') + +Is equivalent to: + +.. ipython:: python + + (df_p.pipe(extract_city_name) + .pipe(add_country_name, country_name="US")) + +Pandas encourages the second style, which is known as method chaining. +``pipe`` makes it easy to use your own or another library's functions +in method chains, alongside pandas' methods. + +In the example above, the functions ``extract_city_name`` and ``add_country_name`` each expected a ``DataFrame`` as the first positional argument. +What if the function you wish to apply takes its data as, say, the second argument? +In this case, provide ``pipe`` with a tuple of ``(callable, data_keyword)``. +``.pipe`` will route the ``DataFrame`` to the argument specified in the tuple. + +For example, we can fit a regression using statsmodels. Their API expects a formula first and a ``DataFrame`` as the second argument, ``data``. We pass in the function, keyword pair ``(sm.ols, 'data')`` to ``pipe``: + +.. ipython:: python + :okwarning: + + import statsmodels.formula.api as sm + + bb = pd.read_csv('data/baseball.csv', index_col='id') + + (bb.query('h > 0') + .assign(ln_h=lambda df: np.log(df.h)) + .pipe((sm.ols, 'data'), 'hr ~ ln_h + year + g + C(lg)') + .fit() + .summary() + ) + +The pipe method is inspired by unix pipes and more recently dplyr_ and magrittr_, which +have introduced the popular ``(%>%)`` (read pipe) operator for R_. +The implementation of ``pipe`` here is quite clean and feels right at home in python. +We encourage you to view the source code of :meth:`~DataFrame.pipe`. + +.. _dplyr: https://github.com/hadley/dplyr +.. _magrittr: https://github.com/smbache/magrittr +.. _R: https://www.r-project.org + + +Row or column-wise function application +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Arbitrary functions can be applied along the axes of a DataFrame +using the :meth:`~DataFrame.apply` method, which, like the descriptive +statistics methods, takes an optional ``axis`` argument: + +.. ipython:: python + + df.apply(np.mean) + df.apply(np.mean, axis=1) + df.apply(lambda x: x.max() - x.min()) + df.apply(np.cumsum) + df.apply(np.exp) + +The :meth:`~DataFrame.apply` method will also dispatch on a string method name. + +.. ipython:: python + + df.apply('mean') + df.apply('mean', axis=1) + +The return type of the function passed to :meth:`~DataFrame.apply` affects the +type of the final output from ``DataFrame.apply`` for the default behaviour: + +* If the applied function returns a ``Series``, the final output is a ``DataFrame``. + The columns match the index of the ``Series`` returned by the applied function. +* If the applied function returns any other type, the final output is a ``Series``. + +This default behaviour can be overridden using the ``result_type``, which +accepts three options: ``reduce``, ``broadcast``, and ``expand``. +These will determine how list-likes return values expand (or not) to a ``DataFrame``. + +:meth:`~DataFrame.apply` combined with some cleverness can be used to answer many questions +about a data set. For example, suppose we wanted to extract the date where the +maximum value for each column occurred: + +.. ipython:: python + + tsdf = pd.DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'], + index=pd.date_range('1/1/2000', periods=1000)) + tsdf.apply(lambda x: x.idxmax()) + +You may also pass additional arguments and keyword arguments to the :meth:`~DataFrame.apply` +method. For instance, consider the following function you would like to apply: + +.. code-block:: python + + def subtract_and_divide(x, sub, divide=1): + return (x - sub) / divide + +You may then apply this function as follows: + +.. code-block:: python + + df.apply(subtract_and_divide, args=(5,), divide=3) + +Another useful feature is the ability to pass Series methods to carry out some +Series operation on each column or row: + +.. ipython:: python + :suppress: + + tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], + index=pd.date_range('1/1/2000', periods=10)) + tsdf.iloc[3:7] = np.nan + +.. ipython:: python + + tsdf + tsdf.apply(pd.Series.interpolate) + + +Finally, :meth:`~DataFrame.apply` takes an argument ``raw`` which is False by default, which +converts each row or column into a Series before applying the function. When +set to True, the passed function will instead receive an ndarray object, which +has positive performance implications if you do not need the indexing +functionality. + +.. _basics.aggregate: + +Aggregation API +~~~~~~~~~~~~~~~ + +The aggregation API allows one to express possibly multiple aggregation operations in a single concise way. +This API is similar across pandas objects, see :ref:`groupby API `, the +:ref:`window functions API `, and the :ref:`resample API `. +The entry point for aggregation is :meth:`DataFrame.aggregate`, or the alias +:meth:`DataFrame.agg`. + +We will use a similar starting frame from above: + +.. ipython:: python + + tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], + index=pd.date_range('1/1/2000', periods=10)) + tsdf.iloc[3:7] = np.nan + tsdf + +Using a single function is equivalent to :meth:`~DataFrame.apply`. You can also +pass named methods as strings. These will return a ``Series`` of the aggregated +output: + +.. ipython:: python + + tsdf.agg(np.sum) + + tsdf.agg('sum') + + # these are equivalent to a ``.sum()`` because we are aggregating + # on a single function + tsdf.sum() + +Single aggregations on a ``Series`` this will return a scalar value: + +.. ipython:: python + + tsdf['A'].agg('sum') + + +Aggregating with multiple functions ++++++++++++++++++++++++++++++++++++ + +You can pass multiple aggregation arguments as a list. +The results of each of the passed functions will be a row in the resulting ``DataFrame``. +These are naturally named from the aggregation function. + +.. ipython:: python + + tsdf.agg(['sum']) + +Multiple functions yield multiple rows: + +.. ipython:: python + + tsdf.agg(['sum', 'mean']) + +On a ``Series``, multiple functions return a ``Series``, indexed by the function names: + +.. ipython:: python + + tsdf['A'].agg(['sum', 'mean']) + +Passing a ``lambda`` function will yield a ```` named row: + +.. ipython:: python + + tsdf['A'].agg(['sum', lambda x: x.mean()]) + +Passing a named function will yield that name for the row: + +.. ipython:: python + + def mymean(x): + return x.mean() + + tsdf['A'].agg(['sum', mymean]) + +Aggregating with a dict ++++++++++++++++++++++++ + +Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFrame.agg`` +allows you to customize which functions are applied to which columns. Note that the results +are not in any particular order, you can use an ``OrderedDict`` instead to guarantee ordering. + +.. ipython:: python + + tsdf.agg({'A': 'mean', 'B': 'sum'}) + +Passing a list-like will generate a ``DataFrame`` output. You will get a matrix-like output +of all of the aggregators. The output will consist of all unique functions. Those that are +not noted for a particular column will be ``NaN``: + +.. ipython:: python + + tsdf.agg({'A': ['mean', 'min'], 'B': 'sum'}) + +.. _basics.aggregation.mixed_string: + +Mixed dtypes +++++++++++++ + +When presented with mixed dtypes that cannot aggregate, ``.agg`` will only take the valid +aggregations. This is similar to how groupby ``.agg`` works. + +.. ipython:: python + + mdf = pd.DataFrame({'A': [1, 2, 3], + 'B': [1., 2., 3.], + 'C': ['foo', 'bar', 'baz'], + 'D': pd.date_range('20130101', periods=3)}) + mdf.dtypes + +.. ipython:: python + + mdf.agg(['min', 'sum']) + +.. _basics.aggregation.custom_describe: + +Custom describe ++++++++++++++++ + +With ``.agg()`` is it possible to easily create a custom describe function, similar +to the built in :ref:`describe function `. + +.. ipython:: python + + from functools import partial + + q_25 = partial(pd.Series.quantile, q=0.25) + q_25.__name__ = '25%' + q_75 = partial(pd.Series.quantile, q=0.75) + q_75.__name__ = '75%' + + tsdf.agg(['count', 'mean', 'std', 'min', q_25, 'median', q_75, 'max']) + +.. _basics.transform: + +Transform API +~~~~~~~~~~~~~ + +The :meth:`~DataFrame.transform` method returns an object that is indexed the same (same size) +as the original. This API allows you to provide *multiple* operations at the same +time rather than one-by-one. Its API is quite similar to the ``.agg`` API. + +We create a frame similar to the one used in the above sections. + +.. ipython:: python + + tsdf = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], + index=pd.date_range('1/1/2000', periods=10)) + tsdf.iloc[3:7] = np.nan + tsdf + +Transform the entire frame. ``.transform()`` allows input functions as: a NumPy function, a string +function name or a user defined function. + +.. ipython:: python + :okwarning: + + tsdf.transform(np.abs) + tsdf.transform('abs') + tsdf.transform(lambda x: x.abs()) + +Here :meth:`~DataFrame.transform` received a single function; this is equivalent to a ufunc application. + +.. ipython:: python + + np.abs(tsdf) + +Passing a single function to ``.transform()`` with a ``Series`` will yield a single ``Series`` in return. + +.. ipython:: python + + tsdf['A'].transform(np.abs) + + +Transform with multiple functions ++++++++++++++++++++++++++++++++++ + +Passing multiple functions will yield a column MultiIndexed DataFrame. +The first level will be the original frame column names; the second level +will be the names of the transforming functions. + +.. ipython:: python + + tsdf.transform([np.abs, lambda x: x + 1]) + +Passing multiple functions to a Series will yield a DataFrame. The +resulting column names will be the transforming functions. + +.. ipython:: python + + tsdf['A'].transform([np.abs, lambda x: x + 1]) + + +Transforming with a dict +++++++++++++++++++++++++ + + +Passing a dict of functions will allow selective transforming per column. + +.. ipython:: python + + tsdf.transform({'A': np.abs, 'B': lambda x: x + 1}) + +Passing a dict of lists will generate a MultiIndexed DataFrame with these +selective transforms. + +.. ipython:: python + :okwarning: + + tsdf.transform({'A': np.abs, 'B': [lambda x: x + 1, 'sqrt']}) + +.. _basics.elementwise: + +Applying elementwise functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Since not all functions can be vectorized (accept NumPy arrays and return +another array or value), the methods :meth:`~DataFrame.applymap` on DataFrame +and analogously :meth:`~Series.map` on Series accept any Python function taking +a single value and returning a single value. For example: + +.. ipython:: python + :suppress: + + df4 = df_orig.copy() + +.. ipython:: python + + df4 + + def f(x): + return len(str(x)) + + df4['one'].map(f) + df4.applymap(f) + +:meth:`Series.map` has an additional feature; it can be used to easily +"link" or "map" values defined by a secondary series. This is closely related +to :ref:`merging/joining functionality `: + +.. ipython:: python + + s = pd.Series(['six', 'seven', 'six', 'seven', 'six'], + index=['a', 'b', 'c', 'd', 'e']) + t = pd.Series({'six': 6., 'seven': 7.}) + s + s.map(t) + + +.. _basics.reindexing: + +Reindexing and altering labels +------------------------------ + +:meth:`~Series.reindex` is the fundamental data alignment method in pandas. +It is used to implement nearly all other features relying on label-alignment +functionality. To *reindex* means to conform the data to match a given set of +labels along a particular axis. This accomplishes several things: + +* Reorders the existing data to match a new set of labels +* Inserts missing value (NA) markers in label locations where no data for + that label existed +* If specified, **fill** data for missing labels using logic (highly relevant + to working with time series data) + +Here is a simple example: + +.. ipython:: python + + s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) + s + s.reindex(['e', 'b', 'f', 'd']) + +Here, the ``f`` label was not contained in the Series and hence appears as +``NaN`` in the result. + +With a DataFrame, you can simultaneously reindex the index and columns: + +.. ipython:: python + + df + df.reindex(index=['c', 'f', 'b'], columns=['three', 'two', 'one']) + +You may also use ``reindex`` with an ``axis`` keyword: + +.. ipython:: python + + df.reindex(['c', 'f', 'b'], axis='index') + +Note that the ``Index`` objects containing the actual axis labels can be +**shared** between objects. So if we have a Series and a DataFrame, the +following can be done: + +.. ipython:: python + + rs = s.reindex(df.index) + rs + rs.index is df.index + +This means that the reindexed Series's index is the same Python object as the +DataFrame's index. + +.. versionadded:: 0.21.0 + +:meth:`DataFrame.reindex` also supports an "axis-style" calling convention, +where you specify a single ``labels`` argument and the ``axis`` it applies to. + +.. ipython:: python + + df.reindex(['c', 'f', 'b'], axis='index') + df.reindex(['three', 'two', 'one'], axis='columns') + +.. seealso:: + + :ref:`MultiIndex / Advanced Indexing ` is an even more concise way of + doing reindexing. + +.. note:: + + When writing performance-sensitive code, there is a good reason to spend + some time becoming a reindexing ninja: **many operations are faster on + pre-aligned data**. Adding two unaligned DataFrames internally triggers a + reindexing step. For exploratory analysis you will hardly notice the + difference (because ``reindex`` has been heavily optimized), but when CPU + cycles matter sprinkling a few explicit ``reindex`` calls here and there can + have an impact. + +.. _basics.reindex_like: + +Reindexing to align with another object +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You may wish to take an object and reindex its axes to be labeled the same as +another object. While the syntax for this is straightforward albeit verbose, it +is a common enough operation that the :meth:`~DataFrame.reindex_like` method is +available to make this simpler: + +.. ipython:: python + :suppress: + + df2 = df.reindex(['a', 'b', 'c'], columns=['one', 'two']) + df3 = df2 - df2.mean() + + +.. ipython:: python + + df2 + df3 + df.reindex_like(df2) + +.. _basics.align: + +Aligning objects with each other with ``align`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~Series.align` method is the fastest way to simultaneously align two objects. It +supports a ``join`` argument (related to :ref:`joining and merging `): + + - ``join='outer'``: take the union of the indexes (default) + - ``join='left'``: use the calling object's index + - ``join='right'``: use the passed object's index + - ``join='inner'``: intersect the indexes + +It returns a tuple with both of the reindexed Series: + +.. ipython:: python + + s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) + s1 = s[:4] + s2 = s[1:] + s1.align(s2) + s1.align(s2, join='inner') + s1.align(s2, join='left') + +.. _basics.df_join: + +For DataFrames, the join method will be applied to both the index and the +columns by default: + +.. ipython:: python + + df.align(df2, join='inner') + +You can also pass an ``axis`` option to only align on the specified axis: + +.. ipython:: python + + df.align(df2, join='inner', axis=0) + +.. _basics.align.frame.series: + +If you pass a Series to :meth:`DataFrame.align`, you can choose to align both +objects either on the DataFrame's index or columns using the ``axis`` argument: + +.. ipython:: python + + df.align(df2.iloc[0], axis=1) + +.. _basics.reindex_fill: + +Filling while reindexing +~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~Series.reindex` takes an optional parameter ``method`` which is a +filling method chosen from the following table: + +.. csv-table:: + :header: "Method", "Action" + :widths: 30, 50 + + pad / ffill, Fill values forward + bfill / backfill, Fill values backward + nearest, Fill from the nearest index value + +We illustrate these fill methods on a simple Series: + +.. ipython:: python + + rng = pd.date_range('1/3/2000', periods=8) + ts = pd.Series(np.random.randn(8), index=rng) + ts2 = ts[[0, 3, 6]] + ts + ts2 + + ts2.reindex(ts.index) + ts2.reindex(ts.index, method='ffill') + ts2.reindex(ts.index, method='bfill') + ts2.reindex(ts.index, method='nearest') + +These methods require that the indexes are **ordered** increasing or +decreasing. + +Note that the same result could have been achieved using +:ref:`fillna ` (except for ``method='nearest'``) or +:ref:`interpolate `: + +.. ipython:: python + + ts2.reindex(ts.index).fillna(method='ffill') + +:meth:`~Series.reindex` will raise a ValueError if the index is not monotonically +increasing or decreasing. :meth:`~Series.fillna` and :meth:`~Series.interpolate` +will not perform any checks on the order of the index. + +.. _basics.limits_on_reindex_fill: + +Limits on filling while reindexing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``limit`` and ``tolerance`` arguments provide additional control over +filling while reindexing. Limit specifies the maximum count of consecutive +matches: + +.. ipython:: python + + ts2.reindex(ts.index, method='ffill', limit=1) + +In contrast, tolerance specifies the maximum distance between the index and +indexer values: + +.. ipython:: python + + ts2.reindex(ts.index, method='ffill', tolerance='1 day') + +Notice that when used on a ``DatetimeIndex``, ``TimedeltaIndex`` or +``PeriodIndex``, ``tolerance`` will coerced into a ``Timedelta`` if possible. +This allows you to specify tolerance with appropriate strings. + +.. _basics.drop: + +Dropping labels from an axis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A method closely related to ``reindex`` is the :meth:`~DataFrame.drop` function. +It removes a set of labels from an axis: + +.. ipython:: python + + df + df.drop(['a', 'd'], axis=0) + df.drop(['one'], axis=1) + +Note that the following also works, but is a bit less obvious / clean: + +.. ipython:: python + + df.reindex(df.index.difference(['a', 'd'])) + +.. _basics.rename: + +Renaming / mapping labels +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~DataFrame.rename` method allows you to relabel an axis based on some +mapping (a dict or Series) or an arbitrary function. + +.. ipython:: python + + s + s.rename(str.upper) + +If you pass a function, it must return a value when called with any of the +labels (and must produce a set of unique values). A dict or +Series can also be used: + +.. ipython:: python + + df.rename(columns={'one': 'foo', 'two': 'bar'}, + index={'a': 'apple', 'b': 'banana', 'd': 'durian'}) + +If the mapping doesn't include a column/index label, it isn't renamed. Note that +extra labels in the mapping don't throw an error. + +.. versionadded:: 0.21.0 + +:meth:`DataFrame.rename` also supports an "axis-style" calling convention, where +you specify a single ``mapper`` and the ``axis`` to apply that mapping to. + +.. ipython:: python + + df.rename({'one': 'foo', 'two': 'bar'}, axis='columns') + df.rename({'a': 'apple', 'b': 'banana', 'd': 'durian'}, axis='index') + + +The :meth:`~DataFrame.rename` method also provides an ``inplace`` named +parameter that is by default ``False`` and copies the underlying data. Pass +``inplace=True`` to rename the data in place. + +Finally, :meth:`~Series.rename` also accepts a scalar or list-like +for altering the ``Series.name`` attribute. + +.. ipython:: python + + s.rename("scalar-name") + +.. _basics.rename_axis: + +.. versionadded:: 0.24.0 + +The methods :meth:`~DataFrame.rename_axis` and :meth:`~Series.rename_axis` +allow specific names of a `MultiIndex` to be changed (as opposed to the +labels). + +.. ipython:: python + + df = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], + 'y': [10, 20, 30, 40, 50, 60]}, + index=pd.MultiIndex.from_product([['a', 'b', 'c'], [1, 2]], + names=['let', 'num'])) + df + df.rename_axis(index={'let': 'abc'}) + df.rename_axis(index=str.upper) + +.. _basics.iteration: + +Iteration +--------- + +The behavior of basic iteration over pandas objects depends on the type. +When iterating over a Series, it is regarded as array-like, and basic iteration +produces the values. DataFrames follow the dict-like convention of iterating +over the "keys" of the objects. + +In short, basic iteration (``for i in object``) produces: + +* **Series**: values +* **DataFrame**: column labels + +Thus, for example, iterating over a DataFrame gives you the column names: + +.. ipython:: python + + df = pd.DataFrame({'col1': np.random.randn(3), + 'col2': np.random.randn(3)}, index=['a', 'b', 'c']) + + for col in df: + print(col) + + +Pandas objects also have the dict-like :meth:`~DataFrame.items` method to +iterate over the (key, value) pairs. + +To iterate over the rows of a DataFrame, you can use the following methods: + +* :meth:`~DataFrame.iterrows`: Iterate over the rows of a DataFrame as (index, Series) pairs. + This converts the rows to Series objects, which can change the dtypes and has some + performance implications. +* :meth:`~DataFrame.itertuples`: Iterate over the rows of a DataFrame + as namedtuples of the values. This is a lot faster than + :meth:`~DataFrame.iterrows`, and is in most cases preferable to use + to iterate over the values of a DataFrame. + +.. warning:: + + Iterating through pandas objects is generally **slow**. In many cases, + iterating manually over the rows is not needed and can be avoided with + one of the following approaches: + + * Look for a *vectorized* solution: many operations can be performed using + built-in methods or NumPy functions, (boolean) indexing, ... + + * When you have a function that cannot work on the full DataFrame/Series + at once, it is better to use :meth:`~DataFrame.apply` instead of iterating + over the values. See the docs on :ref:`function application `. + + * If you need to do iterative manipulations on the values but performance is + important, consider writing the inner loop with cython or numba. + See the :ref:`enhancing performance ` section for some + examples of this approach. + +.. warning:: + + You should **never modify** something you are iterating over. + This is not guaranteed to work in all cases. Depending on the + data types, the iterator returns a copy and not a view, and writing + to it will have no effect! + + For example, in the following case setting the value has no effect: + + .. ipython:: python + + df = pd.DataFrame({'a': [1, 2, 3], 'b': ['a', 'b', 'c']}) + + for index, row in df.iterrows(): + row['a'] = 10 + + df + +items +~~~~~ + +Consistent with the dict-like interface, :meth:`~DataFrame.items` iterates +through key-value pairs: + +* **Series**: (index, scalar value) pairs +* **DataFrame**: (column, Series) pairs + +For example: + +.. ipython:: python + + for label, ser in df.items(): + print(label) + print(ser) + +.. _basics.iterrows: + +iterrows +~~~~~~~~ + +:meth:`~DataFrame.iterrows` allows you to iterate through the rows of a +DataFrame as Series objects. It returns an iterator yielding each +index value along with a Series containing the data in each row: + +.. ipython:: python + + for row_index, row in df.iterrows(): + print(row_index, row, sep='\n') + +.. note:: + + Because :meth:`~DataFrame.iterrows` returns a Series for each row, + it does **not** preserve dtypes across the rows (dtypes are + preserved across columns for DataFrames). For example, + + .. ipython:: python + + df_orig = pd.DataFrame([[1, 1.5]], columns=['int', 'float']) + df_orig.dtypes + row = next(df_orig.iterrows())[1] + row + + All values in ``row``, returned as a Series, are now upcasted + to floats, also the original integer value in column `x`: + + .. ipython:: python + + row['int'].dtype + df_orig['int'].dtype + + To preserve dtypes while iterating over the rows, it is better + to use :meth:`~DataFrame.itertuples` which returns namedtuples of the values + and which is generally much faster than :meth:`~DataFrame.iterrows`. + +For instance, a contrived way to transpose the DataFrame would be: + +.. ipython:: python + + df2 = pd.DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) + print(df2) + print(df2.T) + + df2_t = pd.DataFrame({idx: values for idx, values in df2.iterrows()}) + print(df2_t) + +itertuples +~~~~~~~~~~ + +The :meth:`~DataFrame.itertuples` method will return an iterator +yielding a namedtuple for each row in the DataFrame. The first element +of the tuple will be the row's corresponding index value, while the +remaining values are the row values. + +For instance: + +.. ipython:: python + + for row in df.itertuples(): + print(row) + +This method does not convert the row to a Series object; it merely +returns the values inside a namedtuple. Therefore, +:meth:`~DataFrame.itertuples` preserves the data type of the values +and is generally faster as :meth:`~DataFrame.iterrows`. + +.. note:: + + The column names will be renamed to positional names if they are + invalid Python identifiers, repeated, or start with an underscore. + With a large number of columns (>255), regular tuples are returned. + +.. _basics.dt_accessors: + +.dt accessor +------------ + +``Series`` has an accessor to succinctly return datetime like properties for the +*values* of the Series, if it is a datetime/period like Series. +This will return a Series, indexed like the existing Series. + +.. ipython:: python + + # datetime + s = pd.Series(pd.date_range('20130101 09:10:12', periods=4)) + s + s.dt.hour + s.dt.second + s.dt.day + +This enables nice expressions like this: + +.. ipython:: python + + s[s.dt.day == 2] + +You can easily produces tz aware transformations: + +.. ipython:: python + + stz = s.dt.tz_localize('US/Eastern') + stz + stz.dt.tz + +You can also chain these types of operations: + +.. ipython:: python + + s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') + +You can also format datetime values as strings with :meth:`Series.dt.strftime` which +supports the same format as the standard :meth:`~datetime.datetime.strftime`. + +.. ipython:: python + + # DatetimeIndex + s = pd.Series(pd.date_range('20130101', periods=4)) + s + s.dt.strftime('%Y/%m/%d') + +.. ipython:: python + + # PeriodIndex + s = pd.Series(pd.period_range('20130101', periods=4)) + s + s.dt.strftime('%Y/%m/%d') + +The ``.dt`` accessor works for period and timedelta dtypes. + +.. ipython:: python + + # period + s = pd.Series(pd.period_range('20130101', periods=4, freq='D')) + s + s.dt.year + s.dt.day + +.. ipython:: python + + # timedelta + s = pd.Series(pd.timedelta_range('1 day 00:00:05', periods=4, freq='s')) + s + s.dt.days + s.dt.seconds + s.dt.components + +.. note:: + + ``Series.dt`` will raise a ``TypeError`` if you access with a non-datetime-like values. + +Vectorized string methods +------------------------- + +Series is equipped with a set of string processing methods that make it easy to +operate on each element of the array. Perhaps most importantly, these methods +exclude missing/NA values automatically. These are accessed via the Series's +``str`` attribute and generally have names matching the equivalent (scalar) +built-in string methods. For example: + + .. ipython:: python + + s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'], + dtype="string") + s.str.lower() + +Powerful pattern-matching methods are provided as well, but note that +pattern-matching generally uses `regular expressions +`__ by default (and in some cases +always uses them). + +.. note:: + + Prior to pandas 1.0, string methods were only available on ``object`` -dtype + ``Series``. Pandas 1.0 added the :class:`StringDtype` which is dedicated + to strings. See :ref:`text.types` for more. + +Please see :ref:`Vectorized String Methods ` for a complete +description. + +.. _basics.sorting: + +Sorting +------- + +Pandas supports three kinds of sorting: sorting by index labels, +sorting by column values, and sorting by a combination of both. + +.. _basics.sort_index: + +By index +~~~~~~~~ + +The :meth:`Series.sort_index` and :meth:`DataFrame.sort_index` methods are +used to sort a pandas object by its index levels. + +.. ipython:: python + + df = pd.DataFrame({ + 'one': pd.Series(np.random.randn(3), index=['a', 'b', 'c']), + 'two': pd.Series(np.random.randn(4), index=['a', 'b', 'c', 'd']), + 'three': pd.Series(np.random.randn(3), index=['b', 'c', 'd'])}) + + unsorted_df = df.reindex(index=['a', 'd', 'c', 'b'], + columns=['three', 'two', 'one']) + unsorted_df + + # DataFrame + unsorted_df.sort_index() + unsorted_df.sort_index(ascending=False) + unsorted_df.sort_index(axis=1) + + # Series + unsorted_df['three'].sort_index() + +.. _basics.sort_values: + +By values +~~~~~~~~~ + +The :meth:`Series.sort_values` method is used to sort a `Series` by its values. The +:meth:`DataFrame.sort_values` method is used to sort a `DataFrame` by its column or row values. +The optional ``by`` parameter to :meth:`DataFrame.sort_values` may used to specify one or more columns +to use to determine the sorted order. + +.. ipython:: python + + df1 = pd.DataFrame({'one': [2, 1, 1, 1], + 'two': [1, 3, 2, 4], + 'three': [5, 4, 3, 2]}) + df1.sort_values(by='two') + +The ``by`` parameter can take a list of column names, e.g.: + +.. ipython:: python + + df1[['one', 'two', 'three']].sort_values(by=['one', 'two']) + +These methods have special treatment of NA values via the ``na_position`` +argument: + +.. ipython:: python + + s[2] = np.nan + s.sort_values() + s.sort_values(na_position='first') + +.. _basics.sort_indexes_and_values: + +By indexes and values +~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.23.0 + +Strings passed as the ``by`` parameter to :meth:`DataFrame.sort_values` may +refer to either columns or index level names. + +.. ipython:: python + + # Build MultiIndex + idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2), + ('b', 2), ('b', 1), ('b', 1)]) + idx.names = ['first', 'second'] + + # Build DataFrame + df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)}, + index=idx) + df_multi + +Sort by 'second' (index) and 'A' (column) + +.. ipython:: python + + df_multi.sort_values(by=['second', 'A']) + +.. note:: + + If a string matches both a column name and an index level name then a + warning is issued and the column takes precedence. This will result in an + ambiguity error in a future version. + +.. _basics.searchsorted: + +searchsorted +~~~~~~~~~~~~ + +Series has the :meth:`~Series.searchsorted` method, which works similarly to +:meth:`numpy.ndarray.searchsorted`. + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + ser.searchsorted([0, 3]) + ser.searchsorted([0, 4]) + ser.searchsorted([1, 3], side='right') + ser.searchsorted([1, 3], side='left') + ser = pd.Series([3, 1, 2]) + ser.searchsorted([0, 3], sorter=np.argsort(ser)) + +.. _basics.nsorted: + +smallest / largest values +~~~~~~~~~~~~~~~~~~~~~~~~~ + +``Series`` has the :meth:`~Series.nsmallest` and :meth:`~Series.nlargest` methods which return the +smallest or largest :math:`n` values. For a large ``Series`` this can be much +faster than sorting the entire Series and calling ``head(n)`` on the result. + +.. ipython:: python + + s = pd.Series(np.random.permutation(10)) + s + s.sort_values() + s.nsmallest(3) + s.nlargest(3) + +``DataFrame`` also has the ``nlargest`` and ``nsmallest`` methods. + +.. ipython:: python + + df = pd.DataFrame({'a': [-2, -1, 1, 10, 8, 11, -1], + 'b': list('abdceff'), + 'c': [1.0, 2.0, 4.0, 3.2, np.nan, 3.0, 4.0]}) + df.nlargest(3, 'a') + df.nlargest(5, ['a', 'c']) + df.nsmallest(3, 'a') + df.nsmallest(5, ['a', 'c']) + + +.. _basics.multiindex_sorting: + +Sorting by a MultiIndex column +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You must be explicit about sorting when the column is a MultiIndex, and fully specify +all levels to ``by``. + +.. ipython:: python + + df1.columns = pd.MultiIndex.from_tuples([('a', 'one'), + ('a', 'two'), + ('b', 'three')]) + df1.sort_values(by=('a', 'two')) + + +Copying +------- + +The :meth:`~DataFrame.copy` method on pandas objects copies the underlying data (though not +the axis indexes, since they are immutable) and returns a new object. Note that +**it is seldom necessary to copy objects**. For example, there are only a +handful of ways to alter a DataFrame *in-place*: + +* Inserting, deleting, or modifying a column. +* Assigning to the ``index`` or ``columns`` attributes. +* For homogeneous data, directly modifying the values via the ``values`` + attribute or advanced indexing. + +To be clear, no pandas method has the side effect of modifying your data; +almost every method returns a new object, leaving the original object +untouched. If the data is modified, it is because you did so explicitly. + +.. _basics.dtypes: + +dtypes +------ + +For the most part, pandas uses NumPy arrays and dtypes for Series or individual +columns of a DataFrame. NumPy provides support for ``float``, +``int``, ``bool``, ``timedelta64[ns]`` and ``datetime64[ns]`` (note that NumPy +does not support timezone-aware datetimes). + +Pandas and third-party libraries *extend* NumPy's type system in a few places. +This section describes the extensions pandas has made internally. +See :ref:`extending.extension-types` for how to write your own extension that +works with pandas. See :ref:`ecosystem.extensions` for a list of third-party +libraries that have implemented an extension. + +The following table lists all of pandas extension types. For methods requiring ``dtype`` +arguments, strings can be specified as indicated. See the respective +documentation sections for more on each type. + ++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+ +| Kind of Data | Data Type | Scalar | Array | String Aliases | Documentation | ++===================+===========================+====================+===============================+=========================================+===============================+ +| tz-aware datetime | :class:`DatetimeTZDtype` | :class:`Timestamp` | :class:`arrays.DatetimeArray` | ``'datetime64[ns, ]'`` | :ref:`timeseries.timezone` | ++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+ +| Categorical | :class:`CategoricalDtype` | (none) | :class:`Categorical` | ``'category'`` | :ref:`categorical` | ++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+ +| period | :class:`PeriodDtype` | :class:`Period` | :class:`arrays.PeriodArray` | ``'period[]'``, | :ref:`timeseries.periods` | +| (time spans) | | | | ``'Period[]'`` | | ++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+ +| sparse | :class:`SparseDtype` | (none) | :class:`arrays.SparseArray` | ``'Sparse'``, ``'Sparse[int]'``, | :ref:`sparse` | +| | | | | ``'Sparse[float]'`` | | ++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+ +| intervals | :class:`IntervalDtype` | :class:`Interval` | :class:`arrays.IntervalArray` | ``'interval'``, ``'Interval'``, | :ref:`advanced.intervalindex` | +| | | | | ``'Interval[]'``, | | +| | | | | ``'Interval[datetime64[ns, ]]'``, | | +| | | | | ``'Interval[timedelta64[]]'`` | | ++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+ +| nullable integer + :class:`Int64Dtype`, ... | (none) | :class:`arrays.IntegerArray` | ``'Int8'``, ``'Int16'``, ``'Int32'``, | :ref:`integer_na` | +| | | | | ``'Int64'``, ``'UInt8'``, ``'UInt16'``, | | +| | | | | ``'UInt32'``, ``'UInt64'`` | | ++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+ +| Strings | :class:`StringDtype` | :class:`str` | :class:`arrays.StringArray` | ``'string'`` | :ref:`text` | ++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+ +| Boolean (with NA) | :class:`BooleanDtype` | :class:`bool` | :class:`arrays.BooleanArray` | ``'boolean'`` | :ref:`api.arrays.bool` | ++-------------------+---------------------------+--------------------+-------------------------------+-----------------------------------------+-------------------------------+ + +Pandas has two ways to store strings. + +1. ``object`` dtype, which can hold any Python object, including strings. +2. :class:`StringDtype`, which is dedicated to strings. + +Generally, we recommend using :class:`StringDtype`. See :ref:`text.types` fore more. + +Finally, arbitrary objects may be stored using the ``object`` dtype, but should +be avoided to the extent possible (for performance and interoperability with +other libraries and methods. See :ref:`basics.object_conversion`). + +A convenient :attr:`~DataFrame.dtypes` attribute for DataFrame returns a Series +with the data type of each column. + +.. ipython:: python + + dft = pd.DataFrame({'A': np.random.rand(3), + 'B': 1, + 'C': 'foo', + 'D': pd.Timestamp('20010102'), + 'E': pd.Series([1.0] * 3).astype('float32'), + 'F': False, + 'G': pd.Series([1] * 3, dtype='int8')}) + dft + dft.dtypes + +On a ``Series`` object, use the :attr:`~Series.dtype` attribute. + +.. ipython:: python + + dft['A'].dtype + +If a pandas object contains data with multiple dtypes *in a single column*, the +dtype of the column will be chosen to accommodate all of the data types +(``object`` is the most general). + +.. ipython:: python + + # these ints are coerced to floats + pd.Series([1, 2, 3, 4, 5, 6.]) + + # string data forces an ``object`` dtype + pd.Series([1, 2, 3, 6., 'foo']) + +The number of columns of each type in a ``DataFrame`` can be found by calling +``DataFrame.dtypes.value_counts()``. + +.. ipython:: python + + dft.dtypes.value_counts() + +Numeric dtypes will propagate and can coexist in DataFrames. +If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``, +or a passed ``Series``), then it will be preserved in DataFrame operations. Furthermore, +different numeric dtypes will **NOT** be combined. The following example will give you a taste. + +.. ipython:: python + + df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32') + df1 + df1.dtypes + df2 = pd.DataFrame({'A': pd.Series(np.random.randn(8), dtype='float16'), + 'B': pd.Series(np.random.randn(8)), + 'C': pd.Series(np.array(np.random.randn(8), + dtype='uint8'))}) + df2 + df2.dtypes + +defaults +~~~~~~~~ + +By default integer types are ``int64`` and float types are ``float64``, +*regardless* of platform (32-bit or 64-bit). +The following will all result in ``int64`` dtypes. + +.. ipython:: python + + pd.DataFrame([1, 2], columns=['a']).dtypes + pd.DataFrame({'a': [1, 2]}).dtypes + pd.DataFrame({'a': 1}, index=list(range(2))).dtypes + +Note that Numpy will choose *platform-dependent* types when creating arrays. +The following **WILL** result in ``int32`` on 32-bit platform. + +.. ipython:: python + + frame = pd.DataFrame(np.array([1, 2])) + + +upcasting +~~~~~~~~~ + +Types can potentially be *upcasted* when combined with other types, meaning they are promoted +from the current type (e.g. ``int`` to ``float``). + +.. ipython:: python + + df3 = df1.reindex_like(df2).fillna(value=0.0) + df2 + df3 + df3.dtypes + +:meth:`DataFrame.to_numpy` will return the *lower-common-denominator* of the dtypes, meaning +the dtype that can accommodate **ALL** of the types in the resulting homogeneous dtyped NumPy array. This can +force some *upcasting*. + +.. ipython:: python + + df3.to_numpy().dtype + +astype +~~~~~~ + +.. _basics.cast: + +You can use the :meth:`~DataFrame.astype` method to explicitly convert dtypes from one to another. These will by default return a copy, +even if the dtype was unchanged (pass ``copy=False`` to change this behavior). In addition, they will raise an +exception if the astype operation is invalid. + +Upcasting is always according to the **numpy** rules. If two different dtypes are involved in an operation, +then the more *general* one will be used as the result of the operation. + +.. ipython:: python + + df3 + df3.dtypes + + # conversion of dtypes + df3.astype('float32').dtypes + + +Convert a subset of columns to a specified type using :meth:`~DataFrame.astype`. + +.. ipython:: python + + dft = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}) + dft[['a', 'b']] = dft[['a', 'b']].astype(np.uint8) + dft + dft.dtypes + +Convert certain columns to a specific dtype by passing a dict to :meth:`~DataFrame.astype`. + +.. ipython:: python + + dft1 = pd.DataFrame({'a': [1, 0, 1], 'b': [4, 5, 6], 'c': [7, 8, 9]}) + dft1 = dft1.astype({'a': np.bool, 'c': np.float64}) + dft1 + dft1.dtypes + +.. note:: + + When trying to convert a subset of columns to a specified type using :meth:`~DataFrame.astype` and :meth:`~DataFrame.loc`, upcasting occurs. + + :meth:`~DataFrame.loc` tries to fit in what we are assigning to the current dtypes, while ``[]`` will overwrite them taking the dtype from the right hand side. Therefore the following piece of code produces the unintended result. + + .. ipython:: python + + dft = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [7, 8, 9]}) + dft.loc[:, ['a', 'b']].astype(np.uint8).dtypes + dft.loc[:, ['a', 'b']] = dft.loc[:, ['a', 'b']].astype(np.uint8) + dft.dtypes + +.. _basics.object_conversion: + +object conversion +~~~~~~~~~~~~~~~~~ + +pandas offers various functions to try to force conversion of types from the ``object`` dtype to other types. +In cases where the data is already of the correct type, but stored in an ``object`` array, the +:meth:`DataFrame.infer_objects` and :meth:`Series.infer_objects` methods can be used to soft convert +to the correct type. + + .. ipython:: python + + import datetime + df = pd.DataFrame([[1, 2], + ['a', 'b'], + [datetime.datetime(2016, 3, 2), + datetime.datetime(2016, 3, 2)]]) + df = df.T + df + df.dtypes + +Because the data was transposed the original inference stored all columns as object, which +``infer_objects`` will correct. + + .. ipython:: python + + df.infer_objects().dtypes + +The following functions are available for one dimensional object arrays or scalars to perform +hard conversion of objects to a specified type: + +* :meth:`~pandas.to_numeric` (conversion to numeric dtypes) + + .. ipython:: python + + m = ['1.1', 2, 3] + pd.to_numeric(m) + +* :meth:`~pandas.to_datetime` (conversion to datetime objects) + + .. ipython:: python + + import datetime + m = ['2016-07-09', datetime.datetime(2016, 3, 2)] + pd.to_datetime(m) + +* :meth:`~pandas.to_timedelta` (conversion to timedelta objects) + + .. ipython:: python + + m = ['5us', pd.Timedelta('1day')] + pd.to_timedelta(m) + +To force a conversion, we can pass in an ``errors`` argument, which specifies how pandas should deal with elements +that cannot be converted to desired dtype or object. By default, ``errors='raise'``, meaning that any errors encountered +will be raised during the conversion process. However, if ``errors='coerce'``, these errors will be ignored and pandas +will convert problematic elements to ``pd.NaT`` (for datetime and timedelta) or ``np.nan`` (for numeric). This might be +useful if you are reading in data which is mostly of the desired dtype (e.g. numeric, datetime), but occasionally has +non-conforming elements intermixed that you want to represent as missing: + +.. ipython:: python + + import datetime + m = ['apple', datetime.datetime(2016, 3, 2)] + pd.to_datetime(m, errors='coerce') + + m = ['apple', 2, 3] + pd.to_numeric(m, errors='coerce') + + m = ['apple', pd.Timedelta('1day')] + pd.to_timedelta(m, errors='coerce') + +The ``errors`` parameter has a third option of ``errors='ignore'``, which will simply return the passed in data if it +encounters any errors with the conversion to a desired data type: + +.. ipython:: python + + import datetime + m = ['apple', datetime.datetime(2016, 3, 2)] + pd.to_datetime(m, errors='ignore') + + m = ['apple', 2, 3] + pd.to_numeric(m, errors='ignore') + + m = ['apple', pd.Timedelta('1day')] + pd.to_timedelta(m, errors='ignore') + +In addition to object conversion, :meth:`~pandas.to_numeric` provides another argument ``downcast``, which gives the +option of downcasting the newly (or already) numeric data to a smaller dtype, which can conserve memory: + +.. ipython:: python + + m = ['1', 2, 3] + pd.to_numeric(m, downcast='integer') # smallest signed int dtype + pd.to_numeric(m, downcast='signed') # same as 'integer' + pd.to_numeric(m, downcast='unsigned') # smallest unsigned int dtype + pd.to_numeric(m, downcast='float') # smallest float dtype + +As these methods apply only to one-dimensional arrays, lists or scalars; they cannot be used directly on multi-dimensional objects such +as DataFrames. However, with :meth:`~pandas.DataFrame.apply`, we can "apply" the function over each column efficiently: + +.. ipython:: python + + import datetime + df = pd.DataFrame([ + ['2016-07-09', datetime.datetime(2016, 3, 2)]] * 2, dtype='O') + df + df.apply(pd.to_datetime) + + df = pd.DataFrame([['1.1', 2, 3]] * 2, dtype='O') + df + df.apply(pd.to_numeric) + + df = pd.DataFrame([['5us', pd.Timedelta('1day')]] * 2, dtype='O') + df + df.apply(pd.to_timedelta) + +gotchas +~~~~~~~ + +Performing selection operations on ``integer`` type data can easily upcast the data to ``floating``. +The dtype of the input data will be preserved in cases where ``nans`` are not introduced. +See also :ref:`Support for integer NA `. + +.. ipython:: python + + dfi = df3.astype('int32') + dfi['E'] = 1 + dfi + dfi.dtypes + + casted = dfi[dfi > 0] + casted + casted.dtypes + +While float dtypes are unchanged. + +.. ipython:: python + + dfa = df3.copy() + dfa['A'] = dfa['A'].astype('float32') + dfa.dtypes + + casted = dfa[df2 > 0] + casted + casted.dtypes + +Selecting columns based on ``dtype`` +------------------------------------ + +.. _basics.selectdtypes: + +The :meth:`~DataFrame.select_dtypes` method implements subsetting of columns +based on their ``dtype``. + +First, let's create a :class:`DataFrame` with a slew of different +dtypes: + +.. ipython:: python + + df = pd.DataFrame({'string': list('abc'), + 'int64': list(range(1, 4)), + 'uint8': np.arange(3, 6).astype('u1'), + 'float64': np.arange(4.0, 7.0), + 'bool1': [True, False, True], + 'bool2': [False, True, False], + 'dates': pd.date_range('now', periods=3), + 'category': pd.Series(list("ABC")).astype('category')}) + df['tdeltas'] = df.dates.diff() + df['uint64'] = np.arange(3, 6).astype('u8') + df['other_dates'] = pd.date_range('20130101', periods=3) + df['tz_aware_dates'] = pd.date_range('20130101', periods=3, tz='US/Eastern') + df + +And the dtypes: + +.. ipython:: python + + df.dtypes + +:meth:`~DataFrame.select_dtypes` has two parameters ``include`` and ``exclude`` that allow you to +say "give me the columns *with* these dtypes" (``include``) and/or "give the +columns *without* these dtypes" (``exclude``). + +For example, to select ``bool`` columns: + +.. ipython:: python + + df.select_dtypes(include=[bool]) + +You can also pass the name of a dtype in the `NumPy dtype hierarchy +`__: + +.. ipython:: python + + df.select_dtypes(include=['bool']) + +:meth:`~pandas.DataFrame.select_dtypes` also works with generic dtypes as well. + +For example, to select all numeric and boolean columns while excluding unsigned +integers: + +.. ipython:: python + + df.select_dtypes(include=['number', 'bool'], exclude=['unsignedinteger']) + +To select string columns you must use the ``object`` dtype: + +.. ipython:: python + + df.select_dtypes(include=['object']) + +To see all the child dtypes of a generic ``dtype`` like ``numpy.number`` you +can define a function that returns a tree of child dtypes: + +.. ipython:: python + + def subdtypes(dtype): + subs = dtype.__subclasses__() + if not subs: + return dtype + return [dtype, [subdtypes(dt) for dt in subs]] + +All NumPy dtypes are subclasses of ``numpy.generic``: + +.. ipython:: python + + subdtypes(np.generic) + +.. note:: + + Pandas also defines the types ``category``, and ``datetime64[ns, tz]``, which are not integrated into the normal + NumPy hierarchy and won't show up with the above function. diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst new file mode 100644 index 00000000..f67f46fc --- /dev/null +++ b/doc/source/getting_started/comparison/comparison_with_r.rst @@ -0,0 +1,553 @@ +.. _compare_with_r: + +{{ header }} + +Comparison with R / R libraries +******************************* + +Since ``pandas`` aims to provide a lot of the data manipulation and analysis +functionality that people use `R `__ for, this page +was started to provide a more detailed look at the `R language +`__ and its many third +party libraries as they relate to ``pandas``. In comparisons with R and CRAN +libraries, we care about the following things: + +* **Functionality / flexibility**: what can/cannot be done with each tool +* **Performance**: how fast are operations. Hard numbers/benchmarks are + preferable +* **Ease-of-use**: Is one tool easier/harder to use (you may have to be + the judge of this, given side-by-side code comparisons) + +This page is also here to offer a bit of a translation guide for users of these +R packages. + +For transfer of ``DataFrame`` objects from ``pandas`` to R, one option is to +use HDF5 files, see :ref:`io.external_compatibility` for an +example. + + +Quick reference +--------------- + +We'll start off with a quick reference guide pairing some common R +operations using `dplyr +`__ with +pandas equivalents. + + +Querying, filtering, sampling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +=========================================== =========================================== +R pandas +=========================================== =========================================== +``dim(df)`` ``df.shape`` +``head(df)`` ``df.head()`` +``slice(df, 1:10)`` ``df.iloc[:9]`` +``filter(df, col1 == 1, col2 == 1)`` ``df.query('col1 == 1 & col2 == 1')`` +``df[df$col1 == 1 & df$col2 == 1,]`` ``df[(df.col1 == 1) & (df.col2 == 1)]`` +``select(df, col1, col2)`` ``df[['col1', 'col2']]`` +``select(df, col1:col3)`` ``df.loc[:, 'col1':'col3']`` +``select(df, -(col1:col3))`` ``df.drop(cols_to_drop, axis=1)`` but see [#select_range]_ +``distinct(select(df, col1))`` ``df[['col1']].drop_duplicates()`` +``distinct(select(df, col1, col2))`` ``df[['col1', 'col2']].drop_duplicates()`` +``sample_n(df, 10)`` ``df.sample(n=10)`` +``sample_frac(df, 0.01)`` ``df.sample(frac=0.01)`` +=========================================== =========================================== + +.. [#select_range] R's shorthand for a subrange of columns + (``select(df, col1:col3)``) can be approached + cleanly in pandas, if you have the list of columns, + for example ``df[cols[1:3]]`` or + ``df.drop(cols[1:3])``, but doing this by column + name is a bit messy. + + +Sorting +~~~~~~~ + +=========================================== =========================================== +R pandas +=========================================== =========================================== +``arrange(df, col1, col2)`` ``df.sort_values(['col1', 'col2'])`` +``arrange(df, desc(col1))`` ``df.sort_values('col1', ascending=False)`` +=========================================== =========================================== + +Transforming +~~~~~~~~~~~~ + +=========================================== =========================================== +R pandas +=========================================== =========================================== +``select(df, col_one = col1)`` ``df.rename(columns={'col1': 'col_one'})['col_one']`` +``rename(df, col_one = col1)`` ``df.rename(columns={'col1': 'col_one'})`` +``mutate(df, c=a-b)`` ``df.assign(c=df['a']-df['b'])`` +=========================================== =========================================== + + +Grouping and summarizing +~~~~~~~~~~~~~~~~~~~~~~~~ + +============================================== =========================================== +R pandas +============================================== =========================================== +``summary(df)`` ``df.describe()`` +``gdf <- group_by(df, col1)`` ``gdf = df.groupby('col1')`` +``summarise(gdf, avg=mean(col1, na.rm=TRUE))`` ``df.groupby('col1').agg({'col1': 'mean'})`` +``summarise(gdf, total=sum(col1))`` ``df.groupby('col1').sum()`` +============================================== =========================================== + + +Base R +------ + +Slicing with R's |c|_ +~~~~~~~~~~~~~~~~~~~~~ + +R makes it easy to access ``data.frame`` columns by name + +.. code-block:: r + + df <- data.frame(a=rnorm(5), b=rnorm(5), c=rnorm(5), d=rnorm(5), e=rnorm(5)) + df[, c("a", "c", "e")] + +or by integer location + +.. code-block:: r + + df <- data.frame(matrix(rnorm(1000), ncol=100)) + df[, c(1:10, 25:30, 40, 50:100)] + +Selecting multiple columns by name in ``pandas`` is straightforward + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 3), columns=list('abc')) + df[['a', 'c']] + df.loc[:, ['a', 'c']] + +Selecting multiple noncontiguous columns by integer location can be achieved +with a combination of the ``iloc`` indexer attribute and ``numpy.r_``. + +.. ipython:: python + + named = list('abcdefg') + n = 30 + columns = named + np.arange(len(named), n).tolist() + df = pd.DataFrame(np.random.randn(n, n), columns=columns) + + df.iloc[:, np.r_[:10, 24:30]] + +|aggregate|_ +~~~~~~~~~~~~ + +In R you may want to split data into subsets and compute the mean for each. +Using a data.frame called ``df`` and splitting it into groups ``by1`` and +``by2``: + +.. code-block:: r + + df <- data.frame( + v1 = c(1,3,5,7,8,3,5,NA,4,5,7,9), + v2 = c(11,33,55,77,88,33,55,NA,44,55,77,99), + by1 = c("red", "blue", 1, 2, NA, "big", 1, 2, "red", 1, NA, 12), + by2 = c("wet", "dry", 99, 95, NA, "damp", 95, 99, "red", 99, NA, NA)) + aggregate(x=df[, c("v1", "v2")], by=list(mydf2$by1, mydf2$by2), FUN = mean) + +The :meth:`~pandas.DataFrame.groupby` method is similar to base R ``aggregate`` +function. + +.. ipython:: python + + df = pd.DataFrame( + {'v1': [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9], + 'v2': [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99], + 'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12], + 'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99, np.nan, + np.nan]}) + + g = df.groupby(['by1', 'by2']) + g[['v1', 'v2']].mean() + +For more details and examples see :ref:`the groupby documentation +`. + +|match|_ +~~~~~~~~~~~~ + +A common way to select data in R is using ``%in%`` which is defined using the +function ``match``. The operator ``%in%`` is used to return a logical vector +indicating if there is a match or not: + +.. code-block:: r + + s <- 0:4 + s %in% c(2,4) + +The :meth:`~pandas.DataFrame.isin` method is similar to R ``%in%`` operator: + +.. ipython:: python + + s = pd.Series(np.arange(5), dtype=np.float32) + s.isin([2, 4]) + +The ``match`` function returns a vector of the positions of matches +of its first argument in its second: + +.. code-block:: r + + s <- 0:4 + match(s, c(2,4)) + +For more details and examples see :ref:`the reshaping documentation +`. + +|tapply|_ +~~~~~~~~~ + +``tapply`` is similar to ``aggregate``, but data can be in a ragged array, +since the subclass sizes are possibly irregular. Using a data.frame called +``baseball``, and retrieving information based on the array ``team``: + +.. code-block:: r + + baseball <- + data.frame(team = gl(5, 5, + labels = paste("Team", LETTERS[1:5])), + player = sample(letters, 25), + batting.average = runif(25, .200, .400)) + + tapply(baseball$batting.average, baseball.example$team, + max) + +In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this: + +.. ipython:: python + + import random + import string + + baseball = pd.DataFrame( + {'team': ["team %d" % (x + 1) for x in range(5)] * 5, + 'player': random.sample(list(string.ascii_lowercase), 25), + 'batting avg': np.random.uniform(.200, .400, 25)}) + + baseball.pivot_table(values='batting avg', columns='team', aggfunc=np.max) + +For more details and examples see :ref:`the reshaping documentation +`. + +|subset|_ +~~~~~~~~~~ + +The :meth:`~pandas.DataFrame.query` method is similar to the base R ``subset`` +function. In R you might want to get the rows of a ``data.frame`` where one +column's values are less than another column's values: + +.. code-block:: r + + df <- data.frame(a=rnorm(10), b=rnorm(10)) + subset(df, a <= b) + df[df$a <= df$b,] # note the comma + +In ``pandas``, there are a few ways to perform subsetting. You can use +:meth:`~pandas.DataFrame.query` or pass an expression as if it were an +index/slice as well as standard boolean indexing: + +.. ipython:: python + + df = pd.DataFrame({'a': np.random.randn(10), 'b': np.random.randn(10)}) + df.query('a <= b') + df[df['a'] <= df['b']] + df.loc[df['a'] <= df['b']] + +For more details and examples see :ref:`the query documentation +`. + + +|with|_ +~~~~~~~~ + +An expression using a data.frame called ``df`` in R with the columns ``a`` and +``b`` would be evaluated using ``with`` like so: + +.. code-block:: r + + df <- data.frame(a=rnorm(10), b=rnorm(10)) + with(df, a + b) + df$a + df$b # same as the previous expression + +In ``pandas`` the equivalent expression, using the +:meth:`~pandas.DataFrame.eval` method, would be: + +.. ipython:: python + + df = pd.DataFrame({'a': np.random.randn(10), 'b': np.random.randn(10)}) + df.eval('a + b') + df['a'] + df['b'] # same as the previous expression + +In certain cases :meth:`~pandas.DataFrame.eval` will be much faster than +evaluation in pure Python. For more details and examples see :ref:`the eval +documentation `. + +plyr +---- + +``plyr`` is an R library for the split-apply-combine strategy for data +analysis. The functions revolve around three data structures in R, ``a`` +for ``arrays``, ``l`` for ``lists``, and ``d`` for ``data.frame``. The +table below shows how these data structures could be mapped in Python. + ++------------+-------------------------------+ +| R | Python | ++============+===============================+ +| array | list | ++------------+-------------------------------+ +| lists | dictionary or list of objects | ++------------+-------------------------------+ +| data.frame | dataframe | ++------------+-------------------------------+ + +|ddply|_ +~~~~~~~~ + +An expression using a data.frame called ``df`` in R where you want to +summarize ``x`` by ``month``: + +.. code-block:: r + + require(plyr) + df <- data.frame( + x = runif(120, 1, 168), + y = runif(120, 7, 334), + z = runif(120, 1.7, 20.7), + month = rep(c(5,6,7,8),30), + week = sample(1:4, 120, TRUE) + ) + + ddply(df, .(month, week), summarize, + mean = round(mean(x), 2), + sd = round(sd(x), 2)) + +In ``pandas`` the equivalent expression, using the +:meth:`~pandas.DataFrame.groupby` method, would be: + +.. ipython:: python + + df = pd.DataFrame({'x': np.random.uniform(1., 168., 120), + 'y': np.random.uniform(7., 334., 120), + 'z': np.random.uniform(1.7, 20.7, 120), + 'month': [5, 6, 7, 8] * 30, + 'week': np.random.randint(1, 4, 120)}) + + grouped = df.groupby(['month', 'week']) + grouped['x'].agg([np.mean, np.std]) + + +For more details and examples see :ref:`the groupby documentation +`. + +reshape / reshape2 +------------------ + +|meltarray|_ +~~~~~~~~~~~~~ + +An expression using a 3 dimensional array called ``a`` in R where you want to +melt it into a data.frame: + +.. code-block:: r + + a <- array(c(1:23, NA), c(2,3,4)) + data.frame(melt(a)) + +In Python, since ``a`` is a list, you can simply use list comprehension. + +.. ipython:: python + + a = np.array(list(range(1, 24)) + [np.NAN]).reshape(2, 3, 4) + pd.DataFrame([tuple(list(x) + [val]) for x, val in np.ndenumerate(a)]) + +|meltlist|_ +~~~~~~~~~~~~ + +An expression using a list called ``a`` in R where you want to melt it +into a data.frame: + +.. code-block:: r + + a <- as.list(c(1:4, NA)) + data.frame(melt(a)) + +In Python, this list would be a list of tuples, so +:meth:`~pandas.DataFrame` method would convert it to a dataframe as required. + +.. ipython:: python + + a = list(enumerate(list(range(1, 5)) + [np.NAN])) + pd.DataFrame(a) + +For more details and examples see :ref:`the Into to Data Structures +documentation `. + +|meltdf|_ +~~~~~~~~~~~~~~~~ + +An expression using a data.frame called ``cheese`` in R where you want to +reshape the data.frame: + +.. code-block:: r + + cheese <- data.frame( + first = c('John', 'Mary'), + last = c('Doe', 'Bo'), + height = c(5.5, 6.0), + weight = c(130, 150) + ) + melt(cheese, id=c("first", "last")) + +In Python, the :meth:`~pandas.melt` method is the R equivalent: + +.. ipython:: python + + cheese = pd.DataFrame({'first': ['John', 'Mary'], + 'last': ['Doe', 'Bo'], + 'height': [5.5, 6.0], + 'weight': [130, 150]}) + + pd.melt(cheese, id_vars=['first', 'last']) + cheese.set_index(['first', 'last']).stack() # alternative way + +For more details and examples see :ref:`the reshaping documentation +`. + +|cast|_ +~~~~~~~ + +In R ``acast`` is an expression using a data.frame called ``df`` in R to cast +into a higher dimensional array: + +.. code-block:: r + + df <- data.frame( + x = runif(12, 1, 168), + y = runif(12, 7, 334), + z = runif(12, 1.7, 20.7), + month = rep(c(5,6,7),4), + week = rep(c(1,2), 6) + ) + + mdf <- melt(df, id=c("month", "week")) + acast(mdf, week ~ month ~ variable, mean) + +In Python the best way is to make use of :meth:`~pandas.pivot_table`: + +.. ipython:: python + + df = pd.DataFrame({'x': np.random.uniform(1., 168., 12), + 'y': np.random.uniform(7., 334., 12), + 'z': np.random.uniform(1.7, 20.7, 12), + 'month': [5, 6, 7] * 4, + 'week': [1, 2] * 6}) + + mdf = pd.melt(df, id_vars=['month', 'week']) + pd.pivot_table(mdf, values='value', index=['variable', 'week'], + columns=['month'], aggfunc=np.mean) + +Similarly for ``dcast`` which uses a data.frame called ``df`` in R to +aggregate information based on ``Animal`` and ``FeedType``: + +.. code-block:: r + + df <- data.frame( + Animal = c('Animal1', 'Animal2', 'Animal3', 'Animal2', 'Animal1', + 'Animal2', 'Animal3'), + FeedType = c('A', 'B', 'A', 'A', 'B', 'B', 'A'), + Amount = c(10, 7, 4, 2, 5, 6, 2) + ) + + dcast(df, Animal ~ FeedType, sum, fill=NaN) + # Alternative method using base R + with(df, tapply(Amount, list(Animal, FeedType), sum)) + +Python can approach this in two different ways. Firstly, similar to above +using :meth:`~pandas.pivot_table`: + +.. ipython:: python + + df = pd.DataFrame({ + 'Animal': ['Animal1', 'Animal2', 'Animal3', 'Animal2', 'Animal1', + 'Animal2', 'Animal3'], + 'FeedType': ['A', 'B', 'A', 'A', 'B', 'B', 'A'], + 'Amount': [10, 7, 4, 2, 5, 6, 2], + }) + + df.pivot_table(values='Amount', index='Animal', columns='FeedType', + aggfunc='sum') + +The second approach is to use the :meth:`~pandas.DataFrame.groupby` method: + +.. ipython:: python + + df.groupby(['Animal', 'FeedType'])['Amount'].sum() + +For more details and examples see :ref:`the reshaping documentation +` or :ref:`the groupby documentation`. + +|factor|_ +~~~~~~~~~ + +pandas has a data type for categorical data. + +.. code-block:: r + + cut(c(1,2,3,4,5,6), 3) + factor(c(1,2,3,2,2,3)) + +In pandas this is accomplished with ``pd.cut`` and ``astype("category")``: + +.. ipython:: python + + pd.cut(pd.Series([1, 2, 3, 4, 5, 6]), 3) + pd.Series([1, 2, 3, 2, 2, 3]).astype("category") + +For more details and examples see :ref:`categorical introduction ` and the +:ref:`API documentation `. There is also a documentation regarding the +:ref:`differences to R's factor `. + + +.. |c| replace:: ``c`` +.. _c: http://stat.ethz.ch/R-manual/R-patched/library/base/html/c.html + +.. |aggregate| replace:: ``aggregate`` +.. _aggregate: http://finzi.psych.upenn.edu/R/library/stats/html/aggregate.html + +.. |match| replace:: ``match`` / ``%in%`` +.. _match: http://finzi.psych.upenn.edu/R/library/base/html/match.html + +.. |tapply| replace:: ``tapply`` +.. _tapply: http://finzi.psych.upenn.edu/R/library/base/html/tapply.html + +.. |with| replace:: ``with`` +.. _with: http://finzi.psych.upenn.edu/R/library/base/html/with.html + +.. |subset| replace:: ``subset`` +.. _subset: http://finzi.psych.upenn.edu/R/library/base/html/subset.html + +.. |ddply| replace:: ``ddply`` +.. _ddply: http://www.inside-r.org/packages/cran/plyr/docs/ddply + +.. |meltarray| replace:: ``melt.array`` +.. _meltarray: http://www.inside-r.org/packages/cran/reshape2/docs/melt.array + +.. |meltlist| replace:: ``melt.list`` +.. meltlist: http://www.inside-r.org/packages/cran/reshape2/docs/melt.list + +.. |meltdf| replace:: ``melt.data.frame`` +.. meltdf: http://www.inside-r.org/packages/cran/reshape2/docs/melt.data.frame + +.. |cast| replace:: ``cast`` +.. cast: http://www.inside-r.org/packages/cran/reshape2/docs/cast + +.. |factor| replace:: ``factor`` +.. _factor: https://stat.ethz.ch/R-manual/R-devel/library/base/html/factor.html diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst new file mode 100644 index 00000000..a29c9ec1 --- /dev/null +++ b/doc/source/getting_started/comparison/comparison_with_sas.rst @@ -0,0 +1,755 @@ +.. _compare_with_sas: + +{{ header }} + +Comparison with SAS +******************** +For potential users coming from `SAS `__ +this page is meant to demonstrate how different SAS operations would be +performed in pandas. + +If you're new to pandas, you might want to first read through :ref:`10 Minutes to pandas<10min>` +to familiarize yourself with the library. + +As is customary, we import pandas and NumPy as follows: + +.. ipython:: python + + import pandas as pd + import numpy as np + + +.. note:: + + Throughout this tutorial, the pandas ``DataFrame`` will be displayed by calling + ``df.head()``, which displays the first N (default 5) rows of the ``DataFrame``. + This is often used in interactive work (e.g. `Jupyter notebook + `_ or terminal) - the equivalent in SAS would be: + + .. code-block:: sas + + proc print data=df(obs=5); + run; + +Data structures +--------------- + +General terminology translation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. csv-table:: + :header: "pandas", "SAS" + :widths: 20, 20 + + ``DataFrame``, data set + column, variable + row, observation + groupby, BY-group + ``NaN``, ``.`` + + +``DataFrame`` / ``Series`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A ``DataFrame`` in pandas is analogous to a SAS data set - a two-dimensional +data source with labeled columns that can be of different types. As will be +shown in this document, almost any operation that can be applied to a data set +using SAS's ``DATA`` step, can also be accomplished in pandas. + +A ``Series`` is the data structure that represents one column of a +``DataFrame``. SAS doesn't have a separate data structure for a single column, +but in general, working with a ``Series`` is analogous to referencing a column +in the ``DATA`` step. + +``Index`` +~~~~~~~~~ + +Every ``DataFrame`` and ``Series`` has an ``Index`` - which are labels on the +*rows* of the data. SAS does not have an exactly analogous concept. A data set's +rows are essentially unlabeled, other than an implicit integer index that can be +accessed during the ``DATA`` step (``_N_``). + +In pandas, if no index is specified, an integer index is also used by default +(first row = 0, second row = 1, and so on). While using a labeled ``Index`` or +``MultiIndex`` can enable sophisticated analyses and is ultimately an important +part of pandas to understand, for this comparison we will essentially ignore the +``Index`` and just treat the ``DataFrame`` as a collection of columns. Please +see the :ref:`indexing documentation` for much more on how to use an +``Index`` effectively. + + +Data input / output +------------------- + +Constructing a DataFrame from values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A SAS data set can be built from specified values by +placing the data after a ``datalines`` statement and +specifying the column names. + +.. code-block:: sas + + data df; + input x y; + datalines; + 1 2 + 3 4 + 5 6 + ; + run; + +A pandas ``DataFrame`` can be constructed in many different ways, +but for a small number of values, it is often convenient to specify it as +a Python dictionary, where the keys are the column names +and the values are the data. + +.. ipython:: python + + df = pd.DataFrame({'x': [1, 3, 5], 'y': [2, 4, 6]}) + df + + +Reading external data +~~~~~~~~~~~~~~~~~~~~~ + +Like SAS, pandas provides utilities for reading in data from +many formats. The ``tips`` dataset, found within the pandas +tests (`csv `_) +will be used in many of the following examples. + +SAS provides ``PROC IMPORT`` to read csv data into a data set. + +.. code-block:: sas + + proc import datafile='tips.csv' dbms=csv out=tips replace; + getnames=yes; + run; + +The pandas method is :func:`read_csv`, which works similarly. + +.. ipython:: python + + url = ('https://raw.github.com/pandas-dev/' + 'pandas/master/pandas/tests/io/data/csv/tips.csv') + tips = pd.read_csv(url) + tips.head() + + +Like ``PROC IMPORT``, ``read_csv`` can take a number of parameters to specify +how the data should be parsed. For example, if the data was instead tab delimited, +and did not have column names, the pandas command would be: + +.. code-block:: python + + tips = pd.read_csv('tips.csv', sep='\t', header=None) + + # alternatively, read_table is an alias to read_csv with tab delimiter + tips = pd.read_table('tips.csv', header=None) + +In addition to text/csv, pandas supports a variety of other data formats +such as Excel, HDF5, and SQL databases. These are all read via a ``pd.read_*`` +function. See the :ref:`IO documentation` for more details. + +Exporting data +~~~~~~~~~~~~~~ + +The inverse of ``PROC IMPORT`` in SAS is ``PROC EXPORT`` + +.. code-block:: sas + + proc export data=tips outfile='tips2.csv' dbms=csv; + run; + +Similarly in pandas, the opposite of ``read_csv`` is :meth:`~DataFrame.to_csv`, +and other data formats follow a similar api. + +.. code-block:: python + + tips.to_csv('tips2.csv') + + +Data operations +--------------- + +Operations on columns +~~~~~~~~~~~~~~~~~~~~~ + +In the ``DATA`` step, arbitrary math expressions can +be used on new or existing columns. + +.. code-block:: sas + + data tips; + set tips; + total_bill = total_bill - 2; + new_bill = total_bill / 2; + run; + +pandas provides similar vectorized operations by +specifying the individual ``Series`` in the ``DataFrame``. +New columns can be assigned in the same way. + +.. ipython:: python + + tips['total_bill'] = tips['total_bill'] - 2 + tips['new_bill'] = tips['total_bill'] / 2.0 + tips.head() + +.. ipython:: python + :suppress: + + tips = tips.drop('new_bill', axis=1) + +Filtering +~~~~~~~~~ + +Filtering in SAS is done with an ``if`` or ``where`` statement, on one +or more columns. + +.. code-block:: sas + + data tips; + set tips; + if total_bill > 10; + run; + + data tips; + set tips; + where total_bill > 10; + /* equivalent in this case - where happens before the + DATA step begins and can also be used in PROC statements */ + run; + +DataFrames can be filtered in multiple ways; the most intuitive of which is using +:ref:`boolean indexing ` + +.. ipython:: python + + tips[tips['total_bill'] > 10].head() + +If/then logic +~~~~~~~~~~~~~ + +In SAS, if/then logic can be used to create new columns. + +.. code-block:: sas + + data tips; + set tips; + format bucket $4.; + + if total_bill < 10 then bucket = 'low'; + else bucket = 'high'; + run; + +The same operation in pandas can be accomplished using +the ``where`` method from ``numpy``. + +.. ipython:: python + + tips['bucket'] = np.where(tips['total_bill'] < 10, 'low', 'high') + tips.head() + +.. ipython:: python + :suppress: + + tips = tips.drop('bucket', axis=1) + +Date functionality +~~~~~~~~~~~~~~~~~~ + +SAS provides a variety of functions to do operations on +date/datetime columns. + +.. code-block:: sas + + data tips; + set tips; + format date1 date2 date1_plusmonth mmddyy10.; + date1 = mdy(1, 15, 2013); + date2 = mdy(2, 15, 2015); + date1_year = year(date1); + date2_month = month(date2); + * shift date to beginning of next interval; + date1_next = intnx('MONTH', date1, 1); + * count intervals between dates; + months_between = intck('MONTH', date1, date2); + run; + +The equivalent pandas operations are shown below. In addition to these +functions pandas supports other Time Series features +not available in Base SAS (such as resampling and custom offsets) - +see the :ref:`timeseries documentation` for more details. + +.. ipython:: python + + tips['date1'] = pd.Timestamp('2013-01-15') + tips['date2'] = pd.Timestamp('2015-02-15') + tips['date1_year'] = tips['date1'].dt.year + tips['date2_month'] = tips['date2'].dt.month + tips['date1_next'] = tips['date1'] + pd.offsets.MonthBegin() + tips['months_between'] = ( + tips['date2'].dt.to_period('M') - tips['date1'].dt.to_period('M')) + + tips[['date1', 'date2', 'date1_year', 'date2_month', + 'date1_next', 'months_between']].head() + +.. ipython:: python + :suppress: + + tips = tips.drop(['date1', 'date2', 'date1_year', + 'date2_month', 'date1_next', 'months_between'], axis=1) + +Selection of columns +~~~~~~~~~~~~~~~~~~~~ + +SAS provides keywords in the ``DATA`` step to select, +drop, and rename columns. + +.. code-block:: sas + + data tips; + set tips; + keep sex total_bill tip; + run; + + data tips; + set tips; + drop sex; + run; + + data tips; + set tips; + rename total_bill=total_bill_2; + run; + +The same operations are expressed in pandas below. + +.. ipython:: python + + # keep + tips[['sex', 'total_bill', 'tip']].head() + + # drop + tips.drop('sex', axis=1).head() + + # rename + tips.rename(columns={'total_bill': 'total_bill_2'}).head() + + +Sorting by values +~~~~~~~~~~~~~~~~~ + +Sorting in SAS is accomplished via ``PROC SORT`` + +.. code-block:: sas + + proc sort data=tips; + by sex total_bill; + run; + +pandas objects have a :meth:`~DataFrame.sort_values` method, which +takes a list of columns to sort by. + +.. ipython:: python + + tips = tips.sort_values(['sex', 'total_bill']) + tips.head() + + +String processing +----------------- + +Length +~~~~~~ + +SAS determines the length of a character string with the +`LENGTHN `__ +and `LENGTHC `__ +functions. ``LENGTHN`` excludes trailing blanks and ``LENGTHC`` includes trailing blanks. + +.. code-block:: sas + + data _null_; + set tips; + put(LENGTHN(time)); + put(LENGTHC(time)); + run; + +Python determines the length of a character string with the ``len`` function. +``len`` includes trailing blanks. Use ``len`` and ``rstrip`` to exclude +trailing blanks. + +.. ipython:: python + + tips['time'].str.len().head() + tips['time'].str.rstrip().str.len().head() + + +Find +~~~~ + +SAS determines the position of a character in a string with the +`FINDW `__ function. +``FINDW`` takes the string defined by the first argument and searches for the first position of the substring +you supply as the second argument. + +.. code-block:: sas + + data _null_; + set tips; + put(FINDW(sex,'ale')); + run; + +Python determines the position of a character in a string with the +``find`` function. ``find`` searches for the first position of the +substring. If the substring is found, the function returns its +position. Keep in mind that Python indexes are zero-based and +the function will return -1 if it fails to find the substring. + +.. ipython:: python + + tips['sex'].str.find("ale").head() + + +Substring +~~~~~~~~~ + +SAS extracts a substring from a string based on its position with the +`SUBSTR `__ function. + +.. code-block:: sas + + data _null_; + set tips; + put(substr(sex,1,1)); + run; + +With pandas you can use ``[]`` notation to extract a substring +from a string by position locations. Keep in mind that Python +indexes are zero-based. + +.. ipython:: python + + tips['sex'].str[0:1].head() + + +Scan +~~~~ + +The SAS `SCAN `__ +function returns the nth word from a string. The first argument is the string you want to parse and the +second argument specifies which word you want to extract. + +.. code-block:: sas + + data firstlast; + input String $60.; + First_Name = scan(string, 1); + Last_Name = scan(string, -1); + datalines2; + John Smith; + Jane Cook; + ;;; + run; + +Python extracts a substring from a string based on its text +by using regular expressions. There are much more powerful +approaches, but this just shows a simple approach. + +.. ipython:: python + + firstlast = pd.DataFrame({'String': ['John Smith', 'Jane Cook']}) + firstlast['First_Name'] = firstlast['String'].str.split(" ", expand=True)[0] + firstlast['Last_Name'] = firstlast['String'].str.rsplit(" ", expand=True)[0] + firstlast + + +Upcase, lowcase, and propcase +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The SAS `UPCASE `__ +`LOWCASE `__ and +`PROPCASE `__ +functions change the case of the argument. + +.. code-block:: sas + + data firstlast; + input String $60.; + string_up = UPCASE(string); + string_low = LOWCASE(string); + string_prop = PROPCASE(string); + datalines2; + John Smith; + Jane Cook; + ;;; + run; + +The equivalent Python functions are ``upper``, ``lower``, and ``title``. + +.. ipython:: python + + firstlast = pd.DataFrame({'String': ['John Smith', 'Jane Cook']}) + firstlast['string_up'] = firstlast['String'].str.upper() + firstlast['string_low'] = firstlast['String'].str.lower() + firstlast['string_prop'] = firstlast['String'].str.title() + firstlast + +Merging +------- + +The following tables will be used in the merge examples + +.. ipython:: python + + df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], + 'value': np.random.randn(4)}) + df1 + df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'], + 'value': np.random.randn(4)}) + df2 + +In SAS, data must be explicitly sorted before merging. Different +types of joins are accomplished using the ``in=`` dummy +variables to track whether a match was found in one or both +input frames. + +.. code-block:: sas + + proc sort data=df1; + by key; + run; + + proc sort data=df2; + by key; + run; + + data left_join inner_join right_join outer_join; + merge df1(in=a) df2(in=b); + + if a and b then output inner_join; + if a then output left_join; + if b then output right_join; + if a or b then output outer_join; + run; + +pandas DataFrames have a :meth:`~DataFrame.merge` method, which provides +similar functionality. Note that the data does not have +to be sorted ahead of time, and different join +types are accomplished via the ``how`` keyword. + +.. ipython:: python + + inner_join = df1.merge(df2, on=['key'], how='inner') + inner_join + + left_join = df1.merge(df2, on=['key'], how='left') + left_join + + right_join = df1.merge(df2, on=['key'], how='right') + right_join + + outer_join = df1.merge(df2, on=['key'], how='outer') + outer_join + + +Missing data +------------ + +Like SAS, pandas has a representation for missing data - which is the +special float value ``NaN`` (not a number). Many of the semantics +are the same, for example missing data propagates through numeric +operations, and is ignored by default for aggregations. + +.. ipython:: python + + outer_join + outer_join['value_x'] + outer_join['value_y'] + outer_join['value_x'].sum() + +One difference is that missing data cannot be compared to its sentinel value. +For example, in SAS you could do this to filter missing values. + +.. code-block:: sas + + data outer_join_nulls; + set outer_join; + if value_x = .; + run; + + data outer_join_no_nulls; + set outer_join; + if value_x ^= .; + run; + +Which doesn't work in pandas. Instead, the ``pd.isna`` or ``pd.notna`` functions +should be used for comparisons. + +.. ipython:: python + + outer_join[pd.isna(outer_join['value_x'])] + outer_join[pd.notna(outer_join['value_x'])] + +pandas also provides a variety of methods to work with missing data - some of +which would be challenging to express in SAS. For example, there are methods to +drop all rows with any missing values, replacing missing values with a specified +value, like the mean, or forward filling from previous rows. See the +:ref:`missing data documentation` for more. + +.. ipython:: python + + outer_join.dropna() + outer_join.fillna(method='ffill') + outer_join['value_x'].fillna(outer_join['value_x'].mean()) + + +GroupBy +------- + +Aggregation +~~~~~~~~~~~ + +SAS's PROC SUMMARY can be used to group by one or +more key variables and compute aggregations on +numeric columns. + +.. code-block:: sas + + proc summary data=tips nway; + class sex smoker; + var total_bill tip; + output out=tips_summed sum=; + run; + +pandas provides a flexible ``groupby`` mechanism that +allows similar aggregations. See the :ref:`groupby documentation` +for more details and examples. + +.. ipython:: python + + tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum() + tips_summed.head() + + +Transformation +~~~~~~~~~~~~~~ + +In SAS, if the group aggregations need to be used with +the original frame, it must be merged back together. For +example, to subtract the mean for each observation by smoker group. + +.. code-block:: sas + + proc summary data=tips missing nway; + class smoker; + var total_bill; + output out=smoker_means mean(total_bill)=group_bill; + run; + + proc sort data=tips; + by smoker; + run; + + data tips; + merge tips(in=a) smoker_means(in=b); + by smoker; + adj_total_bill = total_bill - group_bill; + if a and b; + run; + + +pandas ``groupby`` provides a ``transform`` mechanism that allows +these type of operations to be succinctly expressed in one +operation. + +.. ipython:: python + + gb = tips.groupby('smoker')['total_bill'] + tips['adj_total_bill'] = tips['total_bill'] - gb.transform('mean') + tips.head() + + +By group processing +~~~~~~~~~~~~~~~~~~~ + +In addition to aggregation, pandas ``groupby`` can be used to +replicate most other by group processing from SAS. For example, +this ``DATA`` step reads the data by sex/smoker group and filters to +the first entry for each. + +.. code-block:: sas + + proc sort data=tips; + by sex smoker; + run; + + data tips_first; + set tips; + by sex smoker; + if FIRST.sex or FIRST.smoker then output; + run; + +In pandas this would be written as: + +.. ipython:: python + + tips.groupby(['sex', 'smoker']).first() + + +Other Considerations +-------------------- + +Disk vs memory +~~~~~~~~~~~~~~ + +pandas operates exclusively in memory, where a SAS data set exists on disk. +This means that the size of data able to be loaded in pandas is limited by your +machine's memory, but also that the operations on that data may be faster. + +If out of core processing is needed, one possibility is the +`dask.dataframe `_ +library (currently in development) which +provides a subset of pandas functionality for an on-disk ``DataFrame`` + +Data interop +~~~~~~~~~~~~ + +pandas provides a :func:`read_sas` method that can read SAS data saved in +the XPORT or SAS7BDAT binary format. + +.. code-block:: sas + + libname xportout xport 'transport-file.xpt'; + data xportout.tips; + set tips(rename=(total_bill=tbill)); + * xport variable names limited to 6 characters; + run; + +.. code-block:: python + + df = pd.read_sas('transport-file.xpt') + df = pd.read_sas('binary-file.sas7bdat') + +You can also specify the file format directly. By default, pandas will try +to infer the file format based on its extension. + +.. code-block:: python + + df = pd.read_sas('transport-file.xpt', format='xport') + df = pd.read_sas('binary-file.sas7bdat', format='sas7bdat') + +XPORT is a relatively limited format and the parsing of it is not as +optimized as some of the other pandas readers. An alternative way +to interop data between SAS and pandas is to serialize to csv. + +.. code-block:: ipython + + # version 0.17, 10M rows + + In [8]: %time df = pd.read_sas('big.xpt') + Wall time: 14.6 s + + In [9]: %time df = pd.read_csv('big.csv') + Wall time: 4.86 s diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst new file mode 100644 index 00000000..2efd3b18 --- /dev/null +++ b/doc/source/getting_started/comparison/comparison_with_sql.rst @@ -0,0 +1,493 @@ +.. _compare_with_sql: + +{{ header }} + +Comparison with SQL +******************** +Since many potential pandas users have some familiarity with +`SQL `_, this page is meant to provide some examples of how +various SQL operations would be performed using pandas. + +If you're new to pandas, you might want to first read through :ref:`10 Minutes to pandas<10min>` +to familiarize yourself with the library. + +As is customary, we import pandas and NumPy as follows: + +.. ipython:: python + + import pandas as pd + import numpy as np + +Most of the examples will utilize the ``tips`` dataset found within pandas tests. We'll read +the data into a DataFrame called `tips` and assume we have a database table of the same name and +structure. + +.. ipython:: python + + url = ('https://raw.github.com/pandas-dev' + '/pandas/master/pandas/tests/io/data/csv/tips.csv') + tips = pd.read_csv(url) + tips.head() + +SELECT +------ +In SQL, selection is done using a comma-separated list of columns you'd like to select (or a ``*`` +to select all columns): + +.. code-block:: sql + + SELECT total_bill, tip, smoker, time + FROM tips + LIMIT 5; + +With pandas, column selection is done by passing a list of column names to your DataFrame: + +.. ipython:: python + + tips[['total_bill', 'tip', 'smoker', 'time']].head(5) + +Calling the DataFrame without the list of column names would display all columns (akin to SQL's +``*``). + +In SQL, you can add a calculated column: + +.. code-block:: sql + + SELECT *, tip/total_bill as tip_rate + FROM tips + LIMIT 5; + +With pandas, you can use the :meth:`DataFrame.assign` method of a DataFrame to append a new column: + +.. ipython:: python + + tips.assign(tip_rate=tips['tip'] / tips['total_bill']).head(5) + +WHERE +----- +Filtering in SQL is done via a WHERE clause. + +.. code-block:: sql + + SELECT * + FROM tips + WHERE time = 'Dinner' + LIMIT 5; + +DataFrames can be filtered in multiple ways; the most intuitive of which is using +`boolean indexing `_. + +.. ipython:: python + + tips[tips['time'] == 'Dinner'].head(5) + +The above statement is simply passing a ``Series`` of True/False objects to the DataFrame, +returning all rows with True. + +.. ipython:: python + + is_dinner = tips['time'] == 'Dinner' + is_dinner.value_counts() + tips[is_dinner].head(5) + +Just like SQL's OR and AND, multiple conditions can be passed to a DataFrame using | (OR) and & +(AND). + +.. code-block:: sql + + -- tips of more than $5.00 at Dinner meals + SELECT * + FROM tips + WHERE time = 'Dinner' AND tip > 5.00; + +.. ipython:: python + + # tips of more than $5.00 at Dinner meals + tips[(tips['time'] == 'Dinner') & (tips['tip'] > 5.00)] + +.. code-block:: sql + + -- tips by parties of at least 5 diners OR bill total was more than $45 + SELECT * + FROM tips + WHERE size >= 5 OR total_bill > 45; + +.. ipython:: python + + # tips by parties of at least 5 diners OR bill total was more than $45 + tips[(tips['size'] >= 5) | (tips['total_bill'] > 45)] + +NULL checking is done using the :meth:`~pandas.Series.notna` and :meth:`~pandas.Series.isna` +methods. + +.. ipython:: python + + frame = pd.DataFrame({'col1': ['A', 'B', np.NaN, 'C', 'D'], + 'col2': ['F', np.NaN, 'G', 'H', 'I']}) + frame + +Assume we have a table of the same structure as our DataFrame above. We can see only the records +where ``col2`` IS NULL with the following query: + +.. code-block:: sql + + SELECT * + FROM frame + WHERE col2 IS NULL; + +.. ipython:: python + + frame[frame['col2'].isna()] + +Getting items where ``col1`` IS NOT NULL can be done with :meth:`~pandas.Series.notna`. + +.. code-block:: sql + + SELECT * + FROM frame + WHERE col1 IS NOT NULL; + +.. ipython:: python + + frame[frame['col1'].notna()] + + +GROUP BY +-------- +In pandas, SQL's GROUP BY operations are performed using the similarly named +:meth:`~pandas.DataFrame.groupby` method. :meth:`~pandas.DataFrame.groupby` typically refers to a +process where we'd like to split a dataset into groups, apply some function (typically aggregation) +, and then combine the groups together. + +A common SQL operation would be getting the count of records in each group throughout a dataset. +For instance, a query getting us the number of tips left by sex: + +.. code-block:: sql + + SELECT sex, count(*) + FROM tips + GROUP BY sex; + /* + Female 87 + Male 157 + */ + + +The pandas equivalent would be: + +.. ipython:: python + + tips.groupby('sex').size() + +Notice that in the pandas code we used :meth:`~pandas.core.groupby.DataFrameGroupBy.size` and not +:meth:`~pandas.core.groupby.DataFrameGroupBy.count`. This is because +:meth:`~pandas.core.groupby.DataFrameGroupBy.count` applies the function to each column, returning +the number of ``not null`` records within each. + +.. ipython:: python + + tips.groupby('sex').count() + +Alternatively, we could have applied the :meth:`~pandas.core.groupby.DataFrameGroupBy.count` method +to an individual column: + +.. ipython:: python + + tips.groupby('sex')['total_bill'].count() + +Multiple functions can also be applied at once. For instance, say we'd like to see how tip amount +differs by day of the week - :meth:`~pandas.core.groupby.DataFrameGroupBy.agg` allows you to pass a dictionary +to your grouped DataFrame, indicating which functions to apply to specific columns. + +.. code-block:: sql + + SELECT day, AVG(tip), COUNT(*) + FROM tips + GROUP BY day; + /* + Fri 2.734737 19 + Sat 2.993103 87 + Sun 3.255132 76 + Thur 2.771452 62 + */ + +.. ipython:: python + + tips.groupby('day').agg({'tip': np.mean, 'day': np.size}) + +Grouping by more than one column is done by passing a list of columns to the +:meth:`~pandas.DataFrame.groupby` method. + +.. code-block:: sql + + SELECT smoker, day, COUNT(*), AVG(tip) + FROM tips + GROUP BY smoker, day; + /* + smoker day + No Fri 4 2.812500 + Sat 45 3.102889 + Sun 57 3.167895 + Thur 45 2.673778 + Yes Fri 15 2.714000 + Sat 42 2.875476 + Sun 19 3.516842 + Thur 17 3.030000 + */ + +.. ipython:: python + + tips.groupby(['smoker', 'day']).agg({'tip': [np.size, np.mean]}) + +.. _compare_with_sql.join: + +JOIN +---- +JOINs can be performed with :meth:`~pandas.DataFrame.join` or :meth:`~pandas.merge`. By default, +:meth:`~pandas.DataFrame.join` will join the DataFrames on their indices. Each method has +parameters allowing you to specify the type of join to perform (LEFT, RIGHT, INNER, FULL) or the +columns to join on (column names or indices). + +.. ipython:: python + + df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], + 'value': np.random.randn(4)}) + df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'], + 'value': np.random.randn(4)}) + +Assume we have two database tables of the same name and structure as our DataFrames. + +Now let's go over the various types of JOINs. + +INNER JOIN +~~~~~~~~~~ +.. code-block:: sql + + SELECT * + FROM df1 + INNER JOIN df2 + ON df1.key = df2.key; + +.. ipython:: python + + # merge performs an INNER JOIN by default + pd.merge(df1, df2, on='key') + +:meth:`~pandas.merge` also offers parameters for cases when you'd like to join one DataFrame's +column with another DataFrame's index. + +.. ipython:: python + + indexed_df2 = df2.set_index('key') + pd.merge(df1, indexed_df2, left_on='key', right_index=True) + +LEFT OUTER JOIN +~~~~~~~~~~~~~~~ +.. code-block:: sql + + -- show all records from df1 + SELECT * + FROM df1 + LEFT OUTER JOIN df2 + ON df1.key = df2.key; + +.. ipython:: python + + # show all records from df1 + pd.merge(df1, df2, on='key', how='left') + +RIGHT JOIN +~~~~~~~~~~ +.. code-block:: sql + + -- show all records from df2 + SELECT * + FROM df1 + RIGHT OUTER JOIN df2 + ON df1.key = df2.key; + +.. ipython:: python + + # show all records from df2 + pd.merge(df1, df2, on='key', how='right') + +FULL JOIN +~~~~~~~~~ +pandas also allows for FULL JOINs, which display both sides of the dataset, whether or not the +joined columns find a match. As of writing, FULL JOINs are not supported in all RDBMS (MySQL). + +.. code-block:: sql + + -- show all records from both tables + SELECT * + FROM df1 + FULL OUTER JOIN df2 + ON df1.key = df2.key; + +.. ipython:: python + + # show all records from both frames + pd.merge(df1, df2, on='key', how='outer') + + +UNION +----- +UNION ALL can be performed using :meth:`~pandas.concat`. + +.. ipython:: python + + df1 = pd.DataFrame({'city': ['Chicago', 'San Francisco', 'New York City'], + 'rank': range(1, 4)}) + df2 = pd.DataFrame({'city': ['Chicago', 'Boston', 'Los Angeles'], + 'rank': [1, 4, 5]}) + +.. code-block:: sql + + SELECT city, rank + FROM df1 + UNION ALL + SELECT city, rank + FROM df2; + /* + city rank + Chicago 1 + San Francisco 2 + New York City 3 + Chicago 1 + Boston 4 + Los Angeles 5 + */ + +.. ipython:: python + + pd.concat([df1, df2]) + +SQL's UNION is similar to UNION ALL, however UNION will remove duplicate rows. + +.. code-block:: sql + + SELECT city, rank + FROM df1 + UNION + SELECT city, rank + FROM df2; + -- notice that there is only one Chicago record this time + /* + city rank + Chicago 1 + San Francisco 2 + New York City 3 + Boston 4 + Los Angeles 5 + */ + +In pandas, you can use :meth:`~pandas.concat` in conjunction with +:meth:`~pandas.DataFrame.drop_duplicates`. + +.. ipython:: python + + pd.concat([df1, df2]).drop_duplicates() + +Pandas equivalents for some SQL analytic and aggregate functions +---------------------------------------------------------------- + +Top N rows with offset +~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sql + + -- MySQL + SELECT * FROM tips + ORDER BY tip DESC + LIMIT 10 OFFSET 5; + +.. ipython:: python + + tips.nlargest(10 + 5, columns='tip').tail(10) + +Top N rows per group +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: sql + + -- Oracle's ROW_NUMBER() analytic function + SELECT * FROM ( + SELECT + t.*, + ROW_NUMBER() OVER(PARTITION BY day ORDER BY total_bill DESC) AS rn + FROM tips t + ) + WHERE rn < 3 + ORDER BY day, rn; + + +.. ipython:: python + + (tips.assign(rn=tips.sort_values(['total_bill'], ascending=False) + .groupby(['day']) + .cumcount() + 1) + .query('rn < 3') + .sort_values(['day', 'rn'])) + +the same using `rank(method='first')` function + +.. ipython:: python + + (tips.assign(rnk=tips.groupby(['day'])['total_bill'] + .rank(method='first', ascending=False)) + .query('rnk < 3') + .sort_values(['day', 'rnk'])) + +.. code-block:: sql + + -- Oracle's RANK() analytic function + SELECT * FROM ( + SELECT + t.*, + RANK() OVER(PARTITION BY sex ORDER BY tip) AS rnk + FROM tips t + WHERE tip < 2 + ) + WHERE rnk < 3 + ORDER BY sex, rnk; + +Let's find tips with (rank < 3) per gender group for (tips < 2). +Notice that when using ``rank(method='min')`` function +`rnk_min` remains the same for the same `tip` +(as Oracle's RANK() function) + +.. ipython:: python + + (tips[tips['tip'] < 2] + .assign(rnk_min=tips.groupby(['sex'])['tip'] + .rank(method='min')) + .query('rnk_min < 3') + .sort_values(['sex', 'rnk_min'])) + + +UPDATE +------ + +.. code-block:: sql + + UPDATE tips + SET tip = tip*2 + WHERE tip < 2; + +.. ipython:: python + + tips.loc[tips['tip'] < 2, 'tip'] *= 2 + +DELETE +------ + +.. code-block:: sql + + DELETE FROM tips + WHERE tip > 9; + +In pandas we select the rows that should remain, instead of deleting them + +.. ipython:: python + + tips = tips.loc[tips['tip'] <= 9] diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst new file mode 100644 index 00000000..31cb76ee --- /dev/null +++ b/doc/source/getting_started/comparison/comparison_with_stata.rst @@ -0,0 +1,678 @@ +.. _compare_with_stata: + +{{ header }} + +Comparison with Stata +********************* +For potential users coming from `Stata `__ +this page is meant to demonstrate how different Stata operations would be +performed in pandas. + +If you're new to pandas, you might want to first read through :ref:`10 Minutes to pandas<10min>` +to familiarize yourself with the library. + +As is customary, we import pandas and NumPy as follows. This means that we can refer to the +libraries as ``pd`` and ``np``, respectively, for the rest of the document. + +.. ipython:: python + + import pandas as pd + import numpy as np + + +.. note:: + + Throughout this tutorial, the pandas ``DataFrame`` will be displayed by calling + ``df.head()``, which displays the first N (default 5) rows of the ``DataFrame``. + This is often used in interactive work (e.g. `Jupyter notebook + `_ or terminal) -- the equivalent in Stata would be: + + .. code-block:: stata + + list in 1/5 + +Data structures +--------------- + +General terminology translation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. csv-table:: + :header: "pandas", "Stata" + :widths: 20, 20 + + ``DataFrame``, data set + column, variable + row, observation + groupby, bysort + ``NaN``, ``.`` + + +``DataFrame`` / ``Series`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A ``DataFrame`` in pandas is analogous to a Stata data set -- a two-dimensional +data source with labeled columns that can be of different types. As will be +shown in this document, almost any operation that can be applied to a data set +in Stata can also be accomplished in pandas. + +A ``Series`` is the data structure that represents one column of a +``DataFrame``. Stata doesn't have a separate data structure for a single column, +but in general, working with a ``Series`` is analogous to referencing a column +of a data set in Stata. + +``Index`` +~~~~~~~~~ + +Every ``DataFrame`` and ``Series`` has an ``Index`` -- labels on the +*rows* of the data. Stata does not have an exactly analogous concept. In Stata, a data set's +rows are essentially unlabeled, other than an implicit integer index that can be +accessed with ``_n``. + +In pandas, if no index is specified, an integer index is also used by default +(first row = 0, second row = 1, and so on). While using a labeled ``Index`` or +``MultiIndex`` can enable sophisticated analyses and is ultimately an important +part of pandas to understand, for this comparison we will essentially ignore the +``Index`` and just treat the ``DataFrame`` as a collection of columns. Please +see the :ref:`indexing documentation` for much more on how to use an +``Index`` effectively. + + +Data input / output +------------------- + +Constructing a DataFrame from values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A Stata data set can be built from specified values by +placing the data after an ``input`` statement and +specifying the column names. + +.. code-block:: stata + + input x y + 1 2 + 3 4 + 5 6 + end + +A pandas ``DataFrame`` can be constructed in many different ways, +but for a small number of values, it is often convenient to specify it as +a Python dictionary, where the keys are the column names +and the values are the data. + +.. ipython:: python + + df = pd.DataFrame({'x': [1, 3, 5], 'y': [2, 4, 6]}) + df + + +Reading external data +~~~~~~~~~~~~~~~~~~~~~ + +Like Stata, pandas provides utilities for reading in data from +many formats. The ``tips`` data set, found within the pandas +tests (`csv `_) +will be used in many of the following examples. + +Stata provides ``import delimited`` to read csv data into a data set in memory. +If the ``tips.csv`` file is in the current working directory, we can import it as follows. + +.. code-block:: stata + + import delimited tips.csv + +The pandas method is :func:`read_csv`, which works similarly. Additionally, it will automatically download +the data set if presented with a url. + +.. ipython:: python + + url = ('https://raw.github.com/pandas-dev' + '/pandas/master/pandas/tests/io/data/csv/tips.csv') + tips = pd.read_csv(url) + tips.head() + +Like ``import delimited``, :func:`read_csv` can take a number of parameters to specify +how the data should be parsed. For example, if the data were instead tab delimited, +did not have column names, and existed in the current working directory, +the pandas command would be: + +.. code-block:: python + + tips = pd.read_csv('tips.csv', sep='\t', header=None) + + # alternatively, read_table is an alias to read_csv with tab delimiter + tips = pd.read_table('tips.csv', header=None) + +Pandas can also read Stata data sets in ``.dta`` format with the :func:`read_stata` function. + +.. code-block:: python + + df = pd.read_stata('data.dta') + +In addition to text/csv and Stata files, pandas supports a variety of other data formats +such as Excel, SAS, HDF5, Parquet, and SQL databases. These are all read via a ``pd.read_*`` +function. See the :ref:`IO documentation` for more details. + + +Exporting data +~~~~~~~~~~~~~~ + +The inverse of ``import delimited`` in Stata is ``export delimited`` + +.. code-block:: stata + + export delimited tips2.csv + +Similarly in pandas, the opposite of ``read_csv`` is :meth:`DataFrame.to_csv`. + +.. code-block:: python + + tips.to_csv('tips2.csv') + +Pandas can also export to Stata file format with the :meth:`DataFrame.to_stata` method. + +.. code-block:: python + + tips.to_stata('tips2.dta') + + +Data operations +--------------- + +Operations on columns +~~~~~~~~~~~~~~~~~~~~~ + +In Stata, arbitrary math expressions can be used with the ``generate`` and +``replace`` commands on new or existing columns. The ``drop`` command drops +the column from the data set. + +.. code-block:: stata + + replace total_bill = total_bill - 2 + generate new_bill = total_bill / 2 + drop new_bill + +pandas provides similar vectorized operations by +specifying the individual ``Series`` in the ``DataFrame``. +New columns can be assigned in the same way. The :meth:`DataFrame.drop` method +drops a column from the ``DataFrame``. + +.. ipython:: python + + tips['total_bill'] = tips['total_bill'] - 2 + tips['new_bill'] = tips['total_bill'] / 2 + tips.head() + + tips = tips.drop('new_bill', axis=1) + +Filtering +~~~~~~~~~ + +Filtering in Stata is done with an ``if`` clause on one or more columns. + +.. code-block:: stata + + list if total_bill > 10 + +DataFrames can be filtered in multiple ways; the most intuitive of which is using +:ref:`boolean indexing `. + +.. ipython:: python + + tips[tips['total_bill'] > 10].head() + +If/then logic +~~~~~~~~~~~~~ + +In Stata, an ``if`` clause can also be used to create new columns. + +.. code-block:: stata + + generate bucket = "low" if total_bill < 10 + replace bucket = "high" if total_bill >= 10 + +The same operation in pandas can be accomplished using +the ``where`` method from ``numpy``. + +.. ipython:: python + + tips['bucket'] = np.where(tips['total_bill'] < 10, 'low', 'high') + tips.head() + +.. ipython:: python + :suppress: + + tips = tips.drop('bucket', axis=1) + +Date functionality +~~~~~~~~~~~~~~~~~~ + +Stata provides a variety of functions to do operations on +date/datetime columns. + +.. code-block:: stata + + generate date1 = mdy(1, 15, 2013) + generate date2 = date("Feb152015", "MDY") + + generate date1_year = year(date1) + generate date2_month = month(date2) + + * shift date to beginning of next month + generate date1_next = mdy(month(date1) + 1, 1, year(date1)) if month(date1) != 12 + replace date1_next = mdy(1, 1, year(date1) + 1) if month(date1) == 12 + generate months_between = mofd(date2) - mofd(date1) + + list date1 date2 date1_year date2_month date1_next months_between + +The equivalent pandas operations are shown below. In addition to these +functions, pandas supports other Time Series features +not available in Stata (such as time zone handling and custom offsets) -- +see the :ref:`timeseries documentation` for more details. + +.. ipython:: python + + tips['date1'] = pd.Timestamp('2013-01-15') + tips['date2'] = pd.Timestamp('2015-02-15') + tips['date1_year'] = tips['date1'].dt.year + tips['date2_month'] = tips['date2'].dt.month + tips['date1_next'] = tips['date1'] + pd.offsets.MonthBegin() + tips['months_between'] = (tips['date2'].dt.to_period('M') + - tips['date1'].dt.to_period('M')) + + tips[['date1', 'date2', 'date1_year', 'date2_month', 'date1_next', + 'months_between']].head() + +.. ipython:: python + :suppress: + + tips = tips.drop(['date1', 'date2', 'date1_year', 'date2_month', + 'date1_next', 'months_between'], axis=1) + +Selection of columns +~~~~~~~~~~~~~~~~~~~~ + +Stata provides keywords to select, drop, and rename columns. + +.. code-block:: stata + + keep sex total_bill tip + + drop sex + + rename total_bill total_bill_2 + +The same operations are expressed in pandas below. Note that in contrast to Stata, these +operations do not happen in place. To make these changes persist, assign the operation back +to a variable. + +.. ipython:: python + + # keep + tips[['sex', 'total_bill', 'tip']].head() + + # drop + tips.drop('sex', axis=1).head() + + # rename + tips.rename(columns={'total_bill': 'total_bill_2'}).head() + + +Sorting by values +~~~~~~~~~~~~~~~~~ + +Sorting in Stata is accomplished via ``sort`` + +.. code-block:: stata + + sort sex total_bill + +pandas objects have a :meth:`DataFrame.sort_values` method, which +takes a list of columns to sort by. + +.. ipython:: python + + tips = tips.sort_values(['sex', 'total_bill']) + tips.head() + + +String processing +----------------- + +Finding length of string +~~~~~~~~~~~~~~~~~~~~~~~~ + +Stata determines the length of a character string with the :func:`strlen` and +:func:`ustrlen` functions for ASCII and Unicode strings, respectively. + +.. code-block:: stata + + generate strlen_time = strlen(time) + generate ustrlen_time = ustrlen(time) + +Python determines the length of a character string with the ``len`` function. +In Python 3, all strings are Unicode strings. ``len`` includes trailing blanks. +Use ``len`` and ``rstrip`` to exclude trailing blanks. + +.. ipython:: python + + tips['time'].str.len().head() + tips['time'].str.rstrip().str.len().head() + + +Finding position of substring +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Stata determines the position of a character in a string with the :func:`strpos` function. +This takes the string defined by the first argument and searches for the +first position of the substring you supply as the second argument. + +.. code-block:: stata + + generate str_position = strpos(sex, "ale") + +Python determines the position of a character in a string with the +:func:`find` function. ``find`` searches for the first position of the +substring. If the substring is found, the function returns its +position. Keep in mind that Python indexes are zero-based and +the function will return -1 if it fails to find the substring. + +.. ipython:: python + + tips['sex'].str.find("ale").head() + + +Extracting substring by position +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Stata extracts a substring from a string based on its position with the :func:`substr` function. + +.. code-block:: stata + + generate short_sex = substr(sex, 1, 1) + +With pandas you can use ``[]`` notation to extract a substring +from a string by position locations. Keep in mind that Python +indexes are zero-based. + +.. ipython:: python + + tips['sex'].str[0:1].head() + + +Extracting nth word +~~~~~~~~~~~~~~~~~~~ + +The Stata :func:`word` function returns the nth word from a string. +The first argument is the string you want to parse and the +second argument specifies which word you want to extract. + +.. code-block:: stata + + clear + input str20 string + "John Smith" + "Jane Cook" + end + + generate first_name = word(name, 1) + generate last_name = word(name, -1) + +Python extracts a substring from a string based on its text +by using regular expressions. There are much more powerful +approaches, but this just shows a simple approach. + +.. ipython:: python + + firstlast = pd.DataFrame({'string': ['John Smith', 'Jane Cook']}) + firstlast['First_Name'] = firstlast['string'].str.split(" ", expand=True)[0] + firstlast['Last_Name'] = firstlast['string'].str.rsplit(" ", expand=True)[0] + firstlast + + +Changing case +~~~~~~~~~~~~~ + +The Stata :func:`strupper`, :func:`strlower`, :func:`strproper`, +:func:`ustrupper`, :func:`ustrlower`, and :func:`ustrtitle` functions +change the case of ASCII and Unicode strings, respectively. + +.. code-block:: stata + + clear + input str20 string + "John Smith" + "Jane Cook" + end + + generate upper = strupper(string) + generate lower = strlower(string) + generate title = strproper(string) + list + +The equivalent Python functions are ``upper``, ``lower``, and ``title``. + +.. ipython:: python + + firstlast = pd.DataFrame({'string': ['John Smith', 'Jane Cook']}) + firstlast['upper'] = firstlast['string'].str.upper() + firstlast['lower'] = firstlast['string'].str.lower() + firstlast['title'] = firstlast['string'].str.title() + firstlast + +Merging +------- + +The following tables will be used in the merge examples + +.. ipython:: python + + df1 = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], + 'value': np.random.randn(4)}) + df1 + df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'], + 'value': np.random.randn(4)}) + df2 + +In Stata, to perform a merge, one data set must be in memory +and the other must be referenced as a file name on disk. In +contrast, Python must have both ``DataFrames`` already in memory. + +By default, Stata performs an outer join, where all observations +from both data sets are left in memory after the merge. One can +keep only observations from the initial data set, the merged data set, +or the intersection of the two by using the values created in the +``_merge`` variable. + +.. code-block:: stata + + * First create df2 and save to disk + clear + input str1 key + B + D + D + E + end + generate value = rnormal() + save df2.dta + + * Now create df1 in memory + clear + input str1 key + A + B + C + D + end + generate value = rnormal() + + preserve + + * Left join + merge 1:n key using df2.dta + keep if _merge == 1 + + * Right join + restore, preserve + merge 1:n key using df2.dta + keep if _merge == 2 + + * Inner join + restore, preserve + merge 1:n key using df2.dta + keep if _merge == 3 + + * Outer join + restore + merge 1:n key using df2.dta + +pandas DataFrames have a :meth:`DataFrame.merge` method, which provides +similar functionality. Note that different join +types are accomplished via the ``how`` keyword. + +.. ipython:: python + + inner_join = df1.merge(df2, on=['key'], how='inner') + inner_join + + left_join = df1.merge(df2, on=['key'], how='left') + left_join + + right_join = df1.merge(df2, on=['key'], how='right') + right_join + + outer_join = df1.merge(df2, on=['key'], how='outer') + outer_join + + +Missing data +------------ + +Like Stata, pandas has a representation for missing data -- the +special float value ``NaN`` (not a number). Many of the semantics +are the same; for example missing data propagates through numeric +operations, and is ignored by default for aggregations. + +.. ipython:: python + + outer_join + outer_join['value_x'] + outer_join['value_y'] + outer_join['value_x'].sum() + +One difference is that missing data cannot be compared to its sentinel value. +For example, in Stata you could do this to filter missing values. + +.. code-block:: stata + + * Keep missing values + list if value_x == . + * Keep non-missing values + list if value_x != . + +This doesn't work in pandas. Instead, the :func:`pd.isna` or :func:`pd.notna` functions +should be used for comparisons. + +.. ipython:: python + + outer_join[pd.isna(outer_join['value_x'])] + outer_join[pd.notna(outer_join['value_x'])] + +Pandas also provides a variety of methods to work with missing data -- some of +which would be challenging to express in Stata. For example, there are methods to +drop all rows with any missing values, replacing missing values with a specified +value, like the mean, or forward filling from previous rows. See the +:ref:`missing data documentation` for more. + +.. ipython:: python + + # Drop rows with any missing value + outer_join.dropna() + + # Fill forwards + outer_join.fillna(method='ffill') + + # Impute missing values with the mean + outer_join['value_x'].fillna(outer_join['value_x'].mean()) + + +GroupBy +------- + +Aggregation +~~~~~~~~~~~ + +Stata's ``collapse`` can be used to group by one or +more key variables and compute aggregations on +numeric columns. + +.. code-block:: stata + + collapse (sum) total_bill tip, by(sex smoker) + +pandas provides a flexible ``groupby`` mechanism that +allows similar aggregations. See the :ref:`groupby documentation` +for more details and examples. + +.. ipython:: python + + tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum() + tips_summed.head() + + +Transformation +~~~~~~~~~~~~~~ + +In Stata, if the group aggregations need to be used with the +original data set, one would usually use ``bysort`` with :func:`egen`. +For example, to subtract the mean for each observation by smoker group. + +.. code-block:: stata + + bysort sex smoker: egen group_bill = mean(total_bill) + generate adj_total_bill = total_bill - group_bill + + +pandas ``groupby`` provides a ``transform`` mechanism that allows +these type of operations to be succinctly expressed in one +operation. + +.. ipython:: python + + gb = tips.groupby('smoker')['total_bill'] + tips['adj_total_bill'] = tips['total_bill'] - gb.transform('mean') + tips.head() + + +By group processing +~~~~~~~~~~~~~~~~~~~ + +In addition to aggregation, pandas ``groupby`` can be used to +replicate most other ``bysort`` processing from Stata. For example, +the following example lists the first observation in the current +sort order by sex/smoker group. + +.. code-block:: stata + + bysort sex smoker: list if _n == 1 + +In pandas this would be written as: + +.. ipython:: python + + tips.groupby(['sex', 'smoker']).first() + + +Other considerations +-------------------- + +Disk vs memory +~~~~~~~~~~~~~~ + +Pandas and Stata both operate exclusively in memory. This means that the size of +data able to be loaded in pandas is limited by your machine's memory. +If out of core processing is needed, one possibility is the +`dask.dataframe `_ +library, which provides a subset of pandas functionality for an +on-disk ``DataFrame``. diff --git a/doc/source/getting_started/comparison/index.rst b/doc/source/getting_started/comparison/index.rst new file mode 100644 index 00000000..998706ce --- /dev/null +++ b/doc/source/getting_started/comparison/index.rst @@ -0,0 +1,15 @@ +{{ header }} + +.. _comparison: + +=========================== +Comparison with other tools +=========================== + +.. toctree:: + :maxdepth: 2 + + comparison_with_r + comparison_with_sql + comparison_with_sas + comparison_with_stata diff --git a/doc/source/getting_started/dsintro.rst b/doc/source/getting_started/dsintro.rst new file mode 100644 index 00000000..93e60ff9 --- /dev/null +++ b/doc/source/getting_started/dsintro.rst @@ -0,0 +1,840 @@ +.. _dsintro: + +{{ header }} + +************************ +Intro to data structures +************************ + +We'll start with a quick, non-comprehensive overview of the fundamental data +structures in pandas to get you started. The fundamental behavior about data +types, indexing, and axis labeling / alignment apply across all of the +objects. To get started, import NumPy and load pandas into your namespace: + +.. ipython:: python + + import numpy as np + import pandas as pd + +Here is a basic tenet to keep in mind: **data alignment is intrinsic**. The link +between labels and data will not be broken unless done so explicitly by you. + +We'll give a brief intro to the data structures, then consider all of the broad +categories of functionality and methods in separate sections. + +.. _basics.series: + +Series +------ + +:class:`Series` is a one-dimensional labeled array capable of holding any data +type (integers, strings, floating point numbers, Python objects, etc.). The axis +labels are collectively referred to as the **index**. The basic method to create a Series is to call: + +:: + + >>> s = pd.Series(data, index=index) + +Here, ``data`` can be many different things: + +* a Python dict +* an ndarray +* a scalar value (like 5) + +The passed **index** is a list of axis labels. Thus, this separates into a few +cases depending on what **data is**: + +**From ndarray** + +If ``data`` is an ndarray, **index** must be the same length as **data**. If no +index is passed, one will be created having values ``[0, ..., len(data) - 1]``. + +.. ipython:: python + + s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) + s + s.index + + pd.Series(np.random.randn(5)) + +.. note:: + + pandas supports non-unique index values. If an operation + that does not support duplicate index values is attempted, an exception + will be raised at that time. The reason for being lazy is nearly all performance-based + (there are many instances in computations, like parts of GroupBy, where the index + is not used). + +**From dict** + +Series can be instantiated from dicts: + +.. ipython:: python + + d = {'b': 1, 'a': 0, 'c': 2} + pd.Series(d) + +.. note:: + + When the data is a dict, and an index is not passed, the ``Series`` index + will be ordered by the dict's insertion order, if you're using Python + version >= 3.6 and Pandas version >= 0.23. + + If you're using Python < 3.6 or Pandas < 0.23, and an index is not passed, + the ``Series`` index will be the lexically ordered list of dict keys. + +In the example above, if you were on a Python version lower than 3.6 or a +Pandas version lower than 0.23, the ``Series`` would be ordered by the lexical +order of the dict keys (i.e. ``['a', 'b', 'c']`` rather than ``['b', 'a', 'c']``). + +If an index is passed, the values in data corresponding to the labels in the +index will be pulled out. + +.. ipython:: python + + d = {'a': 0., 'b': 1., 'c': 2.} + pd.Series(d) + pd.Series(d, index=['b', 'c', 'd', 'a']) + +.. note:: + + NaN (not a number) is the standard missing data marker used in pandas. + +**From scalar value** + +If ``data`` is a scalar value, an index must be +provided. The value will be repeated to match the length of **index**. + +.. ipython:: python + + pd.Series(5., index=['a', 'b', 'c', 'd', 'e']) + +Series is ndarray-like +~~~~~~~~~~~~~~~~~~~~~~ + +``Series`` acts very similarly to a ``ndarray``, and is a valid argument to most NumPy functions. +However, operations such as slicing will also slice the index. + +.. ipython:: python + + s[0] + s[:3] + s[s > s.median()] + s[[4, 3, 1]] + np.exp(s) + +.. note:: + + We will address array-based indexing like ``s[[4, 3, 1]]`` + in :ref:`section `. + +Like a NumPy array, a pandas Series has a :attr:`~Series.dtype`. + +.. ipython:: python + + s.dtype + +This is often a NumPy dtype. However, pandas and 3rd-party libraries +extend NumPy's type system in a few places, in which case the dtype would +be a :class:`~pandas.api.extensions.ExtensionDtype`. Some examples within +pandas are :ref:`categorical` and :ref:`integer_na`. See :ref:`basics.dtypes` +for more. + +If you need the actual array backing a ``Series``, use :attr:`Series.array`. + +.. ipython:: python + + s.array + +Accessing the array can be useful when you need to do some operation without the +index (to disable :ref:`automatic alignment `, for example). + +:attr:`Series.array` will always be an :class:`~pandas.api.extensions.ExtensionArray`. +Briefly, an ExtensionArray is a thin wrapper around one or more *concrete* arrays like a +:class:`numpy.ndarray`. Pandas knows how to take an ``ExtensionArray`` and +store it in a ``Series`` or a column of a ``DataFrame``. +See :ref:`basics.dtypes` for more. + +While Series is ndarray-like, if you need an *actual* ndarray, then use +:meth:`Series.to_numpy`. + +.. ipython:: python + + s.to_numpy() + +Even if the Series is backed by a :class:`~pandas.api.extensions.ExtensionArray`, +:meth:`Series.to_numpy` will return a NumPy ndarray. + +Series is dict-like +~~~~~~~~~~~~~~~~~~~ + +A Series is like a fixed-size dict in that you can get and set values by index +label: + +.. ipython:: python + + s['a'] + s['e'] = 12. + s + 'e' in s + 'f' in s + +If a label is not contained, an exception is raised: + +.. code-block:: python + + >>> s['f'] + KeyError: 'f' + +Using the ``get`` method, a missing label will return None or specified default: + +.. ipython:: python + + s.get('f') + + s.get('f', np.nan) + +See also the :ref:`section on attribute access`. + +Vectorized operations and label alignment with Series +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When working with raw NumPy arrays, looping through value-by-value is usually +not necessary. The same is true when working with Series in pandas. +Series can also be passed into most NumPy methods expecting an ndarray. + +.. ipython:: python + + s + s + s * 2 + np.exp(s) + +A key difference between Series and ndarray is that operations between Series +automatically align the data based on label. Thus, you can write computations +without giving consideration to whether the Series involved have the same +labels. + +.. ipython:: python + + s[1:] + s[:-1] + +The result of an operation between unaligned Series will have the **union** of +the indexes involved. If a label is not found in one Series or the other, the +result will be marked as missing ``NaN``. Being able to write code without doing +any explicit data alignment grants immense freedom and flexibility in +interactive data analysis and research. The integrated data alignment features +of the pandas data structures set pandas apart from the majority of related +tools for working with labeled data. + +.. note:: + + In general, we chose to make the default result of operations between + differently indexed objects yield the **union** of the indexes in order to + avoid loss of information. Having an index label, though the data is + missing, is typically important information as part of a computation. You + of course have the option of dropping labels with missing data via the + **dropna** function. + +Name attribute +~~~~~~~~~~~~~~ + +.. _dsintro.name_attribute: + +Series can also have a ``name`` attribute: + +.. ipython:: python + + s = pd.Series(np.random.randn(5), name='something') + s + s.name + +The Series ``name`` will be assigned automatically in many cases, in particular +when taking 1D slices of DataFrame as you will see below. + +You can rename a Series with the :meth:`pandas.Series.rename` method. + +.. ipython:: python + + s2 = s.rename("different") + s2.name + +Note that ``s`` and ``s2`` refer to different objects. + +.. _basics.dataframe: + +DataFrame +--------- + +**DataFrame** is a 2-dimensional labeled data structure with columns of +potentially different types. You can think of it like a spreadsheet or SQL +table, or a dict of Series objects. It is generally the most commonly used +pandas object. Like Series, DataFrame accepts many different kinds of input: + +* Dict of 1D ndarrays, lists, dicts, or Series +* 2-D numpy.ndarray +* `Structured or record + `__ ndarray +* A ``Series`` +* Another ``DataFrame`` + +Along with the data, you can optionally pass **index** (row labels) and +**columns** (column labels) arguments. If you pass an index and / or columns, +you are guaranteeing the index and / or columns of the resulting +DataFrame. Thus, a dict of Series plus a specific index will discard all data +not matching up to the passed index. + +If axis labels are not passed, they will be constructed from the input data +based on common sense rules. + +.. note:: + + When the data is a dict, and ``columns`` is not specified, the ``DataFrame`` + columns will be ordered by the dict's insertion order, if you are using + Python version >= 3.6 and Pandas >= 0.23. + + If you are using Python < 3.6 or Pandas < 0.23, and ``columns`` is not + specified, the ``DataFrame`` columns will be the lexically ordered list of dict + keys. + +From dict of Series or dicts +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The resulting **index** will be the **union** of the indexes of the various +Series. If there are any nested dicts, these will first be converted to +Series. If no columns are passed, the columns will be the ordered list of dict +keys. + +.. ipython:: python + + d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), + 'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} + df = pd.DataFrame(d) + df + + pd.DataFrame(d, index=['d', 'b', 'a']) + pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three']) + +The row and column labels can be accessed respectively by accessing the +**index** and **columns** attributes: + +.. note:: + + When a particular set of columns is passed along with a dict of data, the + passed columns override the keys in the dict. + +.. ipython:: python + + df.index + df.columns + +From dict of ndarrays / lists +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ndarrays must all be the same length. If an index is passed, it must +clearly also be the same length as the arrays. If no index is passed, the +result will be ``range(n)``, where ``n`` is the array length. + +.. ipython:: python + + d = {'one': [1., 2., 3., 4.], + 'two': [4., 3., 2., 1.]} + pd.DataFrame(d) + pd.DataFrame(d, index=['a', 'b', 'c', 'd']) + +From structured or record array +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This case is handled identically to a dict of arrays. + +.. ipython:: python + + data = np.zeros((2, ), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10')]) + data[:] = [(1, 2., 'Hello'), (2, 3., "World")] + + pd.DataFrame(data) + pd.DataFrame(data, index=['first', 'second']) + pd.DataFrame(data, columns=['C', 'A', 'B']) + +.. note:: + + DataFrame is not intended to work exactly like a 2-dimensional NumPy + ndarray. + +.. _basics.dataframe.from_list_of_dicts: + +From a list of dicts +~~~~~~~~~~~~~~~~~~~~ + +.. ipython:: python + + data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}] + pd.DataFrame(data2) + pd.DataFrame(data2, index=['first', 'second']) + pd.DataFrame(data2, columns=['a', 'b']) + +.. _basics.dataframe.from_dict_of_tuples: + +From a dict of tuples +~~~~~~~~~~~~~~~~~~~~~ + +You can automatically create a MultiIndexed frame by passing a tuples +dictionary. + +.. ipython:: python + + pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2}, + ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4}, + ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6}, + ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8}, + ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}}) + +.. _basics.dataframe.from_series: + +From a Series +~~~~~~~~~~~~~ + +The result will be a DataFrame with the same index as the input Series, and +with one column whose name is the original name of the Series (only if no other +column name provided). + +**Missing data** + +Much more will be said on this topic in the :ref:`Missing data ` +section. To construct a DataFrame with missing data, we use ``np.nan`` to +represent missing values. Alternatively, you may pass a ``numpy.MaskedArray`` +as the data argument to the DataFrame constructor, and its masked entries will +be considered missing. + +Alternate constructors +~~~~~~~~~~~~~~~~~~~~~~ + +.. _basics.dataframe.from_dict: + +**DataFrame.from_dict** + +``DataFrame.from_dict`` takes a dict of dicts or a dict of array-like sequences +and returns a DataFrame. It operates like the ``DataFrame`` constructor except +for the ``orient`` parameter which is ``'columns'`` by default, but which can be +set to ``'index'`` in order to use the dict keys as row labels. + + +.. ipython:: python + + pd.DataFrame.from_dict(dict([('A', [1, 2, 3]), ('B', [4, 5, 6])])) + +If you pass ``orient='index'``, the keys will be the row labels. In this +case, you can also pass the desired column names: + +.. ipython:: python + + pd.DataFrame.from_dict(dict([('A', [1, 2, 3]), ('B', [4, 5, 6])]), + orient='index', columns=['one', 'two', 'three']) + +.. _basics.dataframe.from_records: + +**DataFrame.from_records** + +``DataFrame.from_records`` takes a list of tuples or an ndarray with structured +dtype. It works analogously to the normal ``DataFrame`` constructor, except that +the resulting DataFrame index may be a specific field of the structured +dtype. For example: + +.. ipython:: python + + data + pd.DataFrame.from_records(data, index='C') + +.. _basics.dataframe.sel_add_del: + +Column selection, addition, deletion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can treat a DataFrame semantically like a dict of like-indexed Series +objects. Getting, setting, and deleting columns works with the same syntax as +the analogous dict operations: + +.. ipython:: python + + df['one'] + df['three'] = df['one'] * df['two'] + df['flag'] = df['one'] > 2 + df + +Columns can be deleted or popped like with a dict: + +.. ipython:: python + + del df['two'] + three = df.pop('three') + df + +When inserting a scalar value, it will naturally be propagated to fill the +column: + +.. ipython:: python + + df['foo'] = 'bar' + df + +When inserting a Series that does not have the same index as the DataFrame, it +will be conformed to the DataFrame's index: + +.. ipython:: python + + df['one_trunc'] = df['one'][:2] + df + +You can insert raw ndarrays but their length must match the length of the +DataFrame's index. + +By default, columns get inserted at the end. The ``insert`` function is +available to insert at a particular location in the columns: + +.. ipython:: python + + df.insert(1, 'bar', df['one']) + df + +.. _dsintro.chained_assignment: + +Assigning new columns in method chains +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Inspired by `dplyr's +`__ +``mutate`` verb, DataFrame has an :meth:`~pandas.DataFrame.assign` +method that allows you to easily create new columns that are potentially +derived from existing columns. + +.. ipython:: python + + iris = pd.read_csv('data/iris.data') + iris.head() + (iris.assign(sepal_ratio=iris['SepalWidth'] / iris['SepalLength']) + .head()) + +In the example above, we inserted a precomputed value. We can also pass in +a function of one argument to be evaluated on the DataFrame being assigned to. + +.. ipython:: python + + iris.assign(sepal_ratio=lambda x: (x['SepalWidth'] / x['SepalLength'])).head() + +``assign`` **always** returns a copy of the data, leaving the original +DataFrame untouched. + +Passing a callable, as opposed to an actual value to be inserted, is +useful when you don't have a reference to the DataFrame at hand. This is +common when using ``assign`` in a chain of operations. For example, +we can limit the DataFrame to just those observations with a Sepal Length +greater than 5, calculate the ratio, and plot: + +.. ipython:: python + + @savefig basics_assign.png + (iris.query('SepalLength > 5') + .assign(SepalRatio=lambda x: x.SepalWidth / x.SepalLength, + PetalRatio=lambda x: x.PetalWidth / x.PetalLength) + .plot(kind='scatter', x='SepalRatio', y='PetalRatio')) + +Since a function is passed in, the function is computed on the DataFrame +being assigned to. Importantly, this is the DataFrame that's been filtered +to those rows with sepal length greater than 5. The filtering happens first, +and then the ratio calculations. This is an example where we didn't +have a reference to the *filtered* DataFrame available. + +The function signature for ``assign`` is simply ``**kwargs``. The keys +are the column names for the new fields, and the values are either a value +to be inserted (for example, a ``Series`` or NumPy array), or a function +of one argument to be called on the ``DataFrame``. A *copy* of the original +DataFrame is returned, with the new values inserted. + +.. versionchanged:: 0.23.0 + +Starting with Python 3.6 the order of ``**kwargs`` is preserved. This allows +for *dependent* assignment, where an expression later in ``**kwargs`` can refer +to a column created earlier in the same :meth:`~DataFrame.assign`. + +.. ipython:: python + + dfa = pd.DataFrame({"A": [1, 2, 3], + "B": [4, 5, 6]}) + dfa.assign(C=lambda x: x['A'] + x['B'], + D=lambda x: x['A'] + x['C']) + +In the second expression, ``x['C']`` will refer to the newly created column, +that's equal to ``dfa['A'] + dfa['B']``. + + +Indexing / selection +~~~~~~~~~~~~~~~~~~~~ +The basics of indexing are as follows: + +.. csv-table:: + :header: "Operation", "Syntax", "Result" + :widths: 30, 20, 10 + + Select column, ``df[col]``, Series + Select row by label, ``df.loc[label]``, Series + Select row by integer location, ``df.iloc[loc]``, Series + Slice rows, ``df[5:10]``, DataFrame + Select rows by boolean vector, ``df[bool_vec]``, DataFrame + +Row selection, for example, returns a Series whose index is the columns of the +DataFrame: + +.. ipython:: python + + df.loc['b'] + df.iloc[2] + +For a more exhaustive treatment of sophisticated label-based indexing and +slicing, see the :ref:`section on indexing `. We will address the +fundamentals of reindexing / conforming to new sets of labels in the +:ref:`section on reindexing `. + +.. _dsintro.alignment: + +Data alignment and arithmetic +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Data alignment between DataFrame objects automatically align on **both the +columns and the index (row labels)**. Again, the resulting object will have the +union of the column and row labels. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D']) + df2 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C']) + df + df2 + +When doing an operation between DataFrame and Series, the default behavior is +to align the Series **index** on the DataFrame **columns**, thus `broadcasting +`__ +row-wise. For example: + +.. ipython:: python + + df - df.iloc[0] + +In the special case of working with time series data, if the DataFrame index +contains dates, the broadcasting will be column-wise: + +.. ipython:: python + :okwarning: + + index = pd.date_range('1/1/2000', periods=8) + df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=list('ABC')) + df + type(df['A']) + df - df['A'] + +.. warning:: + + .. code-block:: python + + df - df['A'] + + is now deprecated and will be removed in a future release. The preferred way + to replicate this behavior is + + .. code-block:: python + + df.sub(df['A'], axis=0) + +For explicit control over the matching and broadcasting behavior, see the +section on :ref:`flexible binary operations `. + +Operations with scalars are just as you would expect: + +.. ipython:: python + + df * 5 + 2 + 1 / df + df ** 4 + +.. _dsintro.boolean: + +Boolean operators work as well: + +.. ipython:: python + + df1 = pd.DataFrame({'a': [1, 0, 1], 'b': [0, 1, 1]}, dtype=bool) + df2 = pd.DataFrame({'a': [0, 1, 1], 'b': [1, 1, 0]}, dtype=bool) + df1 & df2 + df1 | df2 + df1 ^ df2 + -df1 + +Transposing +~~~~~~~~~~~ + +To transpose, access the ``T`` attribute (also the ``transpose`` function), +similar to an ndarray: + +.. ipython:: python + + # only show the first 5 rows + df[:5].T + +.. _dsintro.numpy_interop: + +DataFrame interoperability with NumPy functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Elementwise NumPy ufuncs (log, exp, sqrt, ...) and various other NumPy functions +can be used with no issues on Series and DataFrame, assuming the data within +are numeric: + +.. ipython:: python + + np.exp(df) + np.asarray(df) + +DataFrame is not intended to be a drop-in replacement for ndarray as its +indexing semantics and data model are quite different in places from an n-dimensional +array. + +:class:`Series` implements ``__array_ufunc__``, which allows it to work with NumPy's +`universal functions `_. + +The ufunc is applied to the underlying array in a Series. + +.. ipython:: python + + ser = pd.Series([1, 2, 3, 4]) + np.exp(ser) + +.. versionchanged:: 0.25.0 + + When multiple ``Series`` are passed to a ufunc, they are aligned before + performing the operation. + +Like other parts of the library, pandas will automatically align labeled inputs +as part of a ufunc with multiple inputs. For example, using :meth:`numpy.remainder` +on two :class:`Series` with differently ordered labels will align before the operation. + +.. ipython:: python + + ser1 = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + ser2 = pd.Series([1, 3, 5], index=['b', 'a', 'c']) + ser1 + ser2 + np.remainder(ser1, ser2) + +As usual, the union of the two indices is taken, and non-overlapping values are filled +with missing values. + +.. ipython:: python + + ser3 = pd.Series([2, 4, 6], index=['b', 'c', 'd']) + ser3 + np.remainder(ser1, ser3) + +When a binary ufunc is applied to a :class:`Series` and :class:`Index`, the Series +implementation takes precedence and a Series is returned. + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + idx = pd.Index([4, 5, 6]) + + np.maximum(ser, idx) + +NumPy ufuncs are safe to apply to :class:`Series` backed by non-ndarray arrays, +for example :class:`arrays.SparseArray` (see :ref:`sparse.calculation`). If possible, +the ufunc is applied without converting the underlying data to an ndarray. + +Console display +~~~~~~~~~~~~~~~ + +Very large DataFrames will be truncated to display them in the console. +You can also get a summary using :meth:`~pandas.DataFrame.info`. +(Here I am reading a CSV version of the **baseball** dataset from the **plyr** +R package): + +.. ipython:: python + :suppress: + + # force a summary to be printed + pd.set_option('display.max_rows', 5) + +.. ipython:: python + + baseball = pd.read_csv('data/baseball.csv') + print(baseball) + baseball.info() + +.. ipython:: python + :suppress: + :okwarning: + + # restore GlobalPrintConfig + pd.reset_option(r'^display\.') + +However, using ``to_string`` will return a string representation of the +DataFrame in tabular form, though it won't always fit the console width: + +.. ipython:: python + + print(baseball.iloc[-20:, :12].to_string()) + +Wide DataFrames will be printed across multiple rows by +default: + +.. ipython:: python + + pd.DataFrame(np.random.randn(3, 12)) + +You can change how much to print on a single row by setting the ``display.width`` +option: + +.. ipython:: python + + pd.set_option('display.width', 40) # default is 80 + + pd.DataFrame(np.random.randn(3, 12)) + +You can adjust the max width of the individual columns by setting ``display.max_colwidth`` + +.. ipython:: python + + datafile = {'filename': ['filename_01', 'filename_02'], + 'path': ["media/user_name/storage/folder_01/filename_01", + "media/user_name/storage/folder_02/filename_02"]} + + pd.set_option('display.max_colwidth', 30) + pd.DataFrame(datafile) + + pd.set_option('display.max_colwidth', 100) + pd.DataFrame(datafile) + +.. ipython:: python + :suppress: + + pd.reset_option('display.width') + pd.reset_option('display.max_colwidth') + +You can also disable this feature via the ``expand_frame_repr`` option. +This will print the table in one block. + +DataFrame column attribute access and IPython completion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If a DataFrame column label is a valid Python variable name, the column can be +accessed like an attribute: + +.. ipython:: python + + df = pd.DataFrame({'foo1': np.random.randn(5), + 'foo2': np.random.randn(5)}) + df + df.foo1 + +The columns are also connected to the `IPython `__ +completion mechanism so they can be tab-completed: + +.. code-block:: ipython + + In [5]: df.fo # noqa: E225, E999 + df.foo1 df.foo2 diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst new file mode 100644 index 00000000..a2f8f79f --- /dev/null +++ b/doc/source/getting_started/index.rst @@ -0,0 +1,672 @@ +{{ header }} + +.. _getting_started: + +=============== +Getting started +=============== + +Installation +------------ + +Before you can use pandas, you’ll need to get it installed. + +.. raw:: html + +
    +
    +
    +
    +
    + Working with conda? +
    +
    +

    + +Pandas is part of the `Anaconda `__ distribution and can be +installed with Anaconda or Miniconda: + +.. raw:: html + +

    +
    + +
    +
    +
    +
    +
    + Prefer pip? +
    +
    +

    + +Pandas can be installed via pip from `PyPI `__. + +.. raw:: html + +

    +
    + +
    +
    +
    +
    +
    + In-depth instructions? +
    +
    +

    Installing a specific version? + Installing from source? + Check the advanced installation page.

    + +.. container:: custom-button + + :ref:`Learn more ` + +.. raw:: html + +
    +
    +
    +
    +
    + +.. _gentle_intro: + +Intro to pandas +--------------- + +.. raw:: html + +
    +
    + +
    + +
    +
    + +When working with tabular data, such as data stored in spreadsheets or databases, Pandas is the right tool for you. Pandas will help you +to explore, clean and process your data. In Pandas, a data table is called a :class:`DataFrame`. + +.. image:: ../_static/schemas/01_table_dataframe.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_01_tableoriented>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Pandas supports the integration with many file formats or data sources out of the box (csv, excel, sql, json, parquet,…). Importing data from each of these +data sources is provided by function with the prefix ``read_*``. Similarly, the ``to_*`` methods are used to store data. + +.. image:: ../_static/schemas/02_io_readwrite.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_02_read_write>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Selecting or filtering specific rows and/or columns? Filtering the data on a condition? Methods for slicing, selecting, and extracting the +data you need are available in Pandas. + +.. image:: ../_static/schemas/03_subset_columns_rows.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_03_subset>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Pandas provides plotting your data out of the box, using the power of Matplotlib. You can pick the plot type (scatter, bar, boxplot,...) +corresponding to your data. + +.. image:: ../_static/schemas/04_plot_overview.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_04_plotting>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +There is no need to loop over all rows of your data table to do calculations. Data manipulations on a column work elementwise. +Adding a column to a :class:`DataFrame` based on existing data in other columns is straightforward. + +.. image:: ../_static/schemas/05_newcolumn_2.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_05_columns>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Basic statistics (mean, median, min, max, counts...) are easily calculable. These or custom aggregations can be applied on the entire +data set, a sliding window of the data or grouped by categories. The latter is also known as the split-apply-combine approach. + +.. image:: ../_static/schemas/06_groupby.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_06_stats>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Change the structure of your data table in multiple ways. You can :func:`~pandas.melt` your data table from wide to long/tidy form or :func:`~pandas.pivot` +from long to wide format. With aggregations built-in, a pivot table is created with a sinlge command. + +.. image:: ../_static/schemas/07_melt.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_07_reshape>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Multiple tables can be concatenated both column wise as row wise and database-like join/merge operations are provided to combine multiple tables of data. + +.. image:: ../_static/schemas/08_concat_row.svg + :align: center + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_08_combine>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Pandas has great support for time series and has an extensive set of tools for working with dates, times, and time-indexed data. + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_09_timeseries>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    + +
    +
    + +Data sets do not only contain numerical data. Pandas provides a wide range of functions to cleaning textual data and extract useful information from it. + +.. raw:: html + +
    + + +:ref:`To introduction tutorial <10min_tut_10_text>` + +.. raw:: html + + + + +:ref:`To user guide ` + +.. raw:: html + + +
    +
    +
    +
    + +
    +
    + + +.. _comingfrom: + +Coming from... +-------------- + +Currently working with other software for data manipulation in a tabular format? You're probably familiar to typical +data operations and know *what* to do with your tabular data, but lacking the syntax to execute these operations. Get to know +the pandas syntax by looking for equivalents from the software you already know: + +.. raw:: html + +
    +
    +
    +
    + R project logo +
    +

    The R programming language provides the data.frame data structure and multiple packages, + such as tidyverse use and extend data.frames for convenient data handling + functionalities similar to pandas.

    + +.. container:: custom-button + + :ref:`Learn more ` + +.. raw:: html + +
    +
    +
    +
    +
    + SQL logo +
    +

    Already familiar to SELECT, GROUP BY, JOIN,...? + Most of these SQL manipulations do have equivalents in pandas.

    + +.. container:: custom-button + + :ref:`Learn more ` + +.. raw:: html + +
    +
    +
    +
    +
    + STATA logo +
    +

    The data set included in the + STATA statistical software suite corresponds + to the pandas data.frame. Many of the operations known from STATA have an equivalent + in pandas.

    + +.. container:: custom-button + + :ref:`Learn more ` + +.. raw:: html + +
    +
    +
    +
    +
    + SAS logo +
    +

    The SAS statistical software suite + also provides the data set corresponding to the pandas data.frame. + Also vectorized operations, filtering, string processing operations,... from SAS have similar + functions in pandas.

    + +.. container:: custom-button + + :ref:`Learn more ` + +.. raw:: html + +
    +
    +
    +
    +
    + +Community tutorials +------------------- + +The community produces a wide variety of tutorials available online. Some of the +material is enlisted in the community contributed :ref:`tutorials`. + + +.. If you update this toctree, also update the manual toctree in the + main index.rst.template + +.. toctree:: + :maxdepth: 2 + :hidden: + + install + overview + 10min + intro_tutorials/index + basics + dsintro + comparison/index + tutorials diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst new file mode 100644 index 00000000..8f5900a2 --- /dev/null +++ b/doc/source/getting_started/install.rst @@ -0,0 +1,308 @@ +.. _install: + +{{ header }} + +============ +Installation +============ + +The easiest way to install pandas is to install it +as part of the `Anaconda `__ distribution, a +cross platform distribution for data analysis and scientific computing. +This is the recommended installation method for most users. + +Instructions for installing from source, +`PyPI `__, `ActivePython `__, various Linux distributions, or a +`development version `__ are also provided. + +Python version support +---------------------- + +Officially Python 3.6.1 and above, 3.7, and 3.8. + +Installing pandas +----------------- + +.. _install.anaconda: + +Installing with Anaconda +~~~~~~~~~~~~~~~~~~~~~~~~ + +Installing pandas and the rest of the `NumPy `__ and +`SciPy `__ stack can be a little +difficult for inexperienced users. + +The simplest way to install not only pandas, but Python and the most popular +packages that make up the `SciPy `__ stack +(`IPython `__, `NumPy `__, +`Matplotlib `__, ...) is with +`Anaconda `__, a cross-platform +(Linux, Mac OS X, Windows) Python distribution for data analytics and +scientific computing. + +After running the installer, the user will have access to pandas and the +rest of the `SciPy `__ stack without needing to install +anything else, and without needing to wait for any software to be compiled. + +Installation instructions for `Anaconda `__ +`can be found here `__. + +A full list of the packages available as part of the +`Anaconda `__ distribution +`can be found here `__. + +Another advantage to installing Anaconda is that you don't need +admin rights to install it. Anaconda can install in the user's home directory, +which makes it trivial to delete Anaconda if you decide (just delete +that folder). + +.. _install.miniconda: + +Installing with Miniconda +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The previous section outlined how to get pandas installed as part of the +`Anaconda `__ distribution. +However this approach means you will install well over one hundred packages +and involves downloading the installer which is a few hundred megabytes in size. + +If you want to have more control on which packages, or have a limited internet +bandwidth, then installing pandas with +`Miniconda `__ may be a better solution. + +`Conda `__ is the package manager that the +`Anaconda `__ distribution is built upon. +It is a package manager that is both cross-platform and language agnostic +(it can play a similar role to a pip and virtualenv combination). + +`Miniconda `__ allows you to create a +minimal self contained Python installation, and then use the +`Conda `__ command to install additional packages. + +First you will need `Conda `__ to be installed and +downloading and running the `Miniconda +`__ +will do this for you. The installer +`can be found here `__ + +The next step is to create a new conda environment. A conda environment is like a +virtualenv that allows you to specify a specific version of Python and set of libraries. +Run the following commands from a terminal window:: + + conda create -n name_of_my_env python + +This will create a minimal environment with only Python installed in it. +To put your self inside this environment run:: + + source activate name_of_my_env + +On Windows the command is:: + + activate name_of_my_env + +The final step required is to install pandas. This can be done with the +following command:: + + conda install pandas + +To install a specific pandas version:: + + conda install pandas=0.20.3 + +To install other packages, IPython for example:: + + conda install ipython + +To install the full `Anaconda `__ +distribution:: + + conda install anaconda + +If you need packages that are available to pip but not conda, then +install pip, and then use pip to install those packages:: + + conda install pip + pip install django + +Installing from PyPI +~~~~~~~~~~~~~~~~~~~~ + +pandas can be installed via pip from +`PyPI `__. + +:: + + pip install pandas + +Installing with ActivePython +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Installation instructions for +`ActivePython `__ can be found +`here `__. Versions +2.7, 3.5 and 3.6 include pandas. + +Installing using your Linux distribution's package manager. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The commands in this table will install pandas for Python 3 from your distribution. +To install pandas for Python 2, you may need to use the ``python-pandas`` package. + +.. csv-table:: + :header: "Distribution", "Status", "Download / Repository Link", "Install method" + :widths: 10, 10, 20, 50 + + + Debian, stable, `official Debian repository `__ , ``sudo apt-get install python3-pandas`` + Debian & Ubuntu, unstable (latest packages), `NeuroDebian `__ , ``sudo apt-get install python3-pandas`` + Ubuntu, stable, `official Ubuntu repository `__ , ``sudo apt-get install python3-pandas`` + OpenSuse, stable, `OpenSuse Repository `__ , ``zypper in python3-pandas`` + Fedora, stable, `official Fedora repository `__ , ``dnf install python3-pandas`` + Centos/RHEL, stable, `EPEL repository `__ , ``yum install python3-pandas`` + +**However**, the packages in the linux package managers are often a few versions behind, so +to get the newest version of pandas, it's recommended to install using the ``pip`` or ``conda`` +methods described above. + + +Installing from source +~~~~~~~~~~~~~~~~~~~~~~ + +See the :ref:`contributing guide ` for complete instructions on building from the git source tree. Further, see :ref:`creating a development environment ` if you wish to create a *pandas* development environment. + +Running the test suite +---------------------- + +pandas is equipped with an exhaustive set of unit tests, covering about 97% of +the code base as of this writing. To run it on your machine to verify that +everything is working (and that you have all of the dependencies, soft and hard, +installed), make sure you have `pytest +`__ >= 5.0.1 and `Hypothesis +`__ >= 3.58, then run: + +:: + + >>> pd.test() + running: pytest --skip-slow --skip-network C:\Users\TP\Anaconda3\envs\py36\lib\site-packages\pandas + ============================= test session starts ============================= + platform win32 -- Python 3.6.2, pytest-3.6.0, py-1.4.34, pluggy-0.4.0 + rootdir: C:\Users\TP\Documents\Python\pandasdev\pandas, inifile: setup.cfg + collected 12145 items / 3 skipped + + ..................................................................S...... + ........S................................................................ + ......................................................................... + + ==================== 12130 passed, 12 skipped in 368.339 seconds ===================== + +.. _install.dependencies: + +Dependencies +------------ + +================================================================ ========================== +Package Minimum supported version +================================================================ ========================== +`setuptools `__ 24.2.0 +`NumPy `__ 1.13.3 +`python-dateutil `__ 2.6.1 +`pytz `__ 2017.2 +================================================================ ========================== + +.. _install.recommended_dependencies: + +Recommended dependencies +~~~~~~~~~~~~~~~~~~~~~~~~ + +* `numexpr `__: for accelerating certain numerical operations. + ``numexpr`` uses multiple cores as well as smart chunking and caching to achieve large speedups. + If installed, must be Version 2.6.2 or higher. + +* `bottleneck `__: for accelerating certain types of ``nan`` + evaluations. ``bottleneck`` uses specialized cython routines to achieve large speedups. If installed, + must be Version 1.2.1 or higher. + +.. note:: + + You are highly encouraged to install these libraries, as they provide speed improvements, especially + when working with large data sets. + + +.. _install.optional_dependencies: + +Optional dependencies +~~~~~~~~~~~~~~~~~~~~~ + +Pandas has many optional dependencies that are only used for specific methods. +For example, :func:`pandas.read_hdf` requires the ``pytables`` package, while +:meth:`DataFrame.to_markdown` requires the ``tabulate`` package. If the +optional dependency is not installed, pandas will raise an ``ImportError`` when +the method requiring that dependency is called. + +========================= ================== ============================================================= +Dependency Minimum Version Notes +========================= ================== ============================================================= +BeautifulSoup4 4.6.0 HTML parser for read_html (see :ref:`note `) +Jinja2 Conditional formatting with DataFrame.style +PyQt4 Clipboard I/O +PyQt5 Clipboard I/O +PyTables 3.4.2 HDF5-based reading / writing +SQLAlchemy 1.1.4 SQL support for databases other than sqlite +SciPy 0.19.0 Miscellaneous statistical functions +XLsxWriter 0.9.8 Excel writing +blosc Compression for HDF5 +fastparquet 0.3.2 Parquet reading / writing +gcsfs 0.2.2 Google Cloud Storage access +html5lib HTML parser for read_html (see :ref:`note `) +lxml 3.8.0 HTML parser for read_html (see :ref:`note `) +matplotlib 2.2.2 Visualization +numba 0.46.0 Alternative execution engine for rolling operations +openpyxl 2.5.7 Reading / writing for xlsx files +pandas-gbq 0.8.0 Google Big Query access +psycopg2 PostgreSQL engine for sqlalchemy +pyarrow 0.12.0 Parquet, ORC (requires 0.13.0), and feather reading / writing +pymysql 0.7.11 MySQL engine for sqlalchemy +pyreadstat SPSS files (.sav) reading +pytables 3.4.2 HDF5 reading / writing +pyxlsb 1.0.6 Reading for xlsb files +qtpy Clipboard I/O +s3fs 0.3.0 Amazon S3 access +tabulate 0.8.3 Printing in Markdown-friendly format (see `tabulate`_) +xarray 0.8.2 pandas-like API for N-dimensional data +xclip Clipboard I/O on linux +xlrd 1.1.0 Excel reading +xlwt 1.2.0 Excel writing +xsel Clipboard I/O on linux +zlib Compression for HDF5 +========================= ================== ============================================================= + +.. _optional_html: + +Optional dependencies for parsing HTML +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +One of the following combinations of libraries is needed to use the +top-level :func:`~pandas.read_html` function: + +.. versionchanged:: 0.23.0 + +* `BeautifulSoup4`_ and `html5lib`_ +* `BeautifulSoup4`_ and `lxml`_ +* `BeautifulSoup4`_ and `html5lib`_ and `lxml`_ +* Only `lxml`_, although see :ref:`HTML Table Parsing ` + for reasons as to why you should probably **not** take this approach. + +.. warning:: + + * if you install `BeautifulSoup4`_ you must install either + `lxml`_ or `html5lib`_ or both. + :func:`~pandas.read_html` will **not** work with *only* + `BeautifulSoup4`_ installed. + * You are highly encouraged to read :ref:`HTML Table Parsing gotchas `. + It explains issues surrounding the installation and + usage of the above three libraries. + +.. _html5lib: https://github.com/html5lib/html5lib-python +.. _BeautifulSoup4: http://www.crummy.com/software/BeautifulSoup +.. _lxml: http://lxml.de +.. _tabulate: https://github.com/astanin/python-tabulate diff --git a/doc/source/getting_started/intro_tutorials/01_table_oriented.rst b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst new file mode 100644 index 00000000..02e59b3c --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/01_table_oriented.rst @@ -0,0 +1,218 @@ +.. _10min_tut_01_tableoriented: + +{{ header }} + +What kind of data does pandas handle? +===================================== + +.. raw:: html + +
      +
    • + +I want to start using pandas + +.. ipython:: python + + import pandas as pd + +To load the pandas package and start working with it, import the +package. The community agreed alias for pandas is ``pd``, so loading +pandas as ``pd`` is assumed standard practice for all of the pandas +documentation. + +.. raw:: html + +
    • +
    + +Pandas data table representation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/01_table_dataframe.svg + :align: center + +.. raw:: html + +
      +
    • + +I want to store passenger data of the Titanic. For a number of passengers, I know the name (characters), age (integers) and sex (male/female) data. + +.. ipython:: python + + df = pd.DataFrame({ + "Name": ["Braund, Mr. Owen Harris", + "Allen, Mr. William Henry", + "Bonnell, Miss. Elizabeth"], + "Age": [22, 35, 58], + "Sex": ["male", "male", "female"]} + ) + df + +To manually store data in a table, create a ``DataFrame``. When using a Python dictionary of lists, the dictionary keys will be used as column headers and +the values in each list as rows of the ``DataFrame``. + +.. raw:: html + +
    • +
    + +A :class:`DataFrame` is a 2-dimensional data structure that can store data of +different types (including characters, integers, floating point values, +categorical data and more) in columns. It is similar to a spreadsheet, a +SQL table or the ``data.frame`` in R. + +- The table has 3 columns, each of them with a column label. The column + labels are respectively ``Name``, ``Age`` and ``Sex``. +- The column ``Name`` consists of textual data with each value a + string, the column ``Age`` are numbers and the column ``Sex`` is + textual data. + +In spreadsheet software, the table representation of our data would look +very similar: + +.. image:: ../../_static/schemas/01_table_spreadsheet.png + :align: center + +Each column in a ``DataFrame`` is a ``Series`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/01_table_series.svg + :align: center + +.. raw:: html + +
      +
    • + +I’m just interested in working with the data in the column ``Age`` + +.. ipython:: python + + df["Age"] + +When selecting a single column of a pandas :class:`DataFrame`, the result is +a pandas :class:`Series`. To select the column, use the column label in +between square brackets ``[]``. + +.. raw:: html + +
    • +
    + +.. note:: + If you are familiar to Python + :ref:`dictionaries `, the selection of a + single column is very similar to selection of dictionary values based on + the key. + +You can create a ``Series`` from scratch as well: + +.. ipython:: python + + ages = pd.Series([22, 35, 58], name="Age") + ages + +A pandas ``Series`` has no column labels, as it is just a single column +of a ``DataFrame``. A Series does have row labels. + +Do something with a DataFrame or Series +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. raw:: html + +
      +
    • + +I want to know the maximum Age of the passengers + +We can do this on the ``DataFrame`` by selecting the ``Age`` column and +applying ``max()``: + +.. ipython:: python + + df["Age"].max() + +Or to the ``Series``: + +.. ipython:: python + + ages.max() + +.. raw:: html + +
    • +
    + +As illustrated by the ``max()`` method, you can *do* things with a +``DataFrame`` or ``Series``. pandas provides a lot of functionalities, +each of them a *method* you can apply to a ``DataFrame`` or ``Series``. +As methods are functions, do not forget to use parentheses ``()``. + +.. raw:: html + +
      +
    • + +I’m interested in some basic statistics of the numerical data of my data table + +.. ipython:: python + + df.describe() + +The :func:`~DataFrame.describe` method provides a quick overview of the numerical data in +a ``DataFrame``. As the ``Name`` and ``Sex`` columns are textual data, +these are by default not taken into account by the :func:`~DataFrame.describe` method. + +.. raw:: html + +
    • +
    + +Many pandas operations return a ``DataFrame`` or a ``Series``. The +:func:`~DataFrame.describe` method is an example of a pandas operation returning a +pandas ``Series``. + +.. raw:: html + +
    + To user guide + +Check more options on ``describe`` in the user guide section about :ref:`aggregations with describe ` + +.. raw:: html + +
    + +.. note:: + This is just a starting point. Similar to spreadsheet + software, pandas represents data as a table with columns and rows. Apart + from the representation, also the data manipulations and calculations + you would do in spreadsheet software are supported by pandas. Continue + reading the next tutorials to get started! + +.. raw:: html + +
    +

    REMEMBER

    + +- Import the package, aka ``import pandas as pd`` +- A table of data is stored as a pandas ``DataFrame`` +- Each column in a ``DataFrame`` is a ``Series`` +- You can do things by applying a method to a ``DataFrame`` or ``Series`` + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A more extended explanation to ``DataFrame`` and ``Series`` is provided in the :ref:`introduction to data structures `. + +.. raw:: html + +
    \ No newline at end of file diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst new file mode 100644 index 00000000..797bdbcf --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/02_read_write.rst @@ -0,0 +1,232 @@ +.. _10min_tut_02_read_write: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +
      +
      +

      + +This tutorial uses the titanic data set, stored as CSV. The data +consists of the following data columns: + +- PassengerId: Id of every passenger. +- Survived: This feature have value 0 and 1. 0 for not survived and 1 + for survived. +- Pclass: There are 3 classes: Class 1, Class 2 and Class 3. +- Name: Name of passenger. +- Sex: Gender of passenger. +- Age: Age of passenger. +- SibSp: Indication that passenger have siblings and spouse. +- Parch: Whether a passenger is alone or have family. +- Ticket: Ticket number of passenger. +- Fare: Indicating the fare. +- Cabin: The cabin of passenger. +- Embarked: The embarked category. + +.. raw:: html + +

      + To raw data +
      +
      +
    • +
    +
    + +How do I read and write tabular data? +===================================== + +.. image:: ../../_static/schemas/02_io_readwrite.svg + :align: center + +.. raw:: html + +
      +
    • + +I want to analyse the titanic passenger data, available as a CSV file. + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + +pandas provides the :func:`read_csv` function to read data stored as a csv +file into a pandas ``DataFrame``. pandas supports many different file +formats or data sources out of the box (csv, excel, sql, json, parquet, +…), each of them with the prefix ``read_*``. + +.. raw:: html + +
    • +
    + +Make sure to always have a check on the data after reading in the +data. When displaying a ``DataFrame``, the first and last 5 rows will be +shown by default: + +.. ipython:: python + + titanic + +.. raw:: html + +
      +
    • + +I want to see the first 8 rows of a pandas DataFrame. + +.. ipython:: python + + titanic.head(8) + +To see the first N rows of a ``DataFrame``, use the :meth:`~DataFrame.head` method with +the required number of rows (in this case 8) as argument. + +.. raw:: html + +
    • +
    + +.. note:: + + Interested in the last N rows instead? pandas also provides a + :meth:`~DataFrame.tail` method. For example, ``titanic.tail(10)`` will return the last + 10 rows of the DataFrame. + +A check on how pandas interpreted each of the column data types can be +done by requesting the pandas ``dtypes`` attribute: + +.. ipython:: python + + titanic.dtypes + +For each of the columns, the used data type is enlisted. The data types +in this ``DataFrame`` are integers (``int64``), floats (``float63``) and +strings (``object``). + +.. note:: + When asking for the ``dtypes``, no brackets are used! + ``dtypes`` is an attribute of a ``DataFrame`` and ``Series``. Attributes + of ``DataFrame`` or ``Series`` do not need brackets. Attributes + represent a characteristic of a ``DataFrame``/``Series``, whereas a + method (which requires brackets) *do* something with the + ``DataFrame``/``Series`` as introduced in the :ref:`first tutorial <10min_tut_01_tableoriented>`. + +.. raw:: html + +
      +
    • + +My colleague requested the titanic data as a spreadsheet. + +.. ipython:: python + + titanic.to_excel('titanic.xlsx', sheet_name='passengers', index=False) + +Whereas ``read_*`` functions are used to read data to pandas, the +``to_*`` methods are used to store data. The :meth:`~DataFrame.to_excel` method stores +the data as an excel file. In the example here, the ``sheet_name`` is +named *passengers* instead of the default *Sheet1*. By setting +``index=False`` the row index labels are not saved in the spreadsheet. + +.. raw:: html + +
    • +
    + +The equivalent read function :meth:`~DataFrame.to_excel` will reload the data to a +``DataFrame``: + +.. ipython:: python + + titanic = pd.read_excel('titanic.xlsx', sheet_name='passengers') + +.. ipython:: python + + titanic.head() + +.. ipython:: python + :suppress: + + import os + os.remove('titanic.xlsx') + +.. raw:: html + +
      +
    • + +I’m interested in a technical summary of a ``DataFrame`` + +.. ipython:: python + + titanic.info() + + +The method :meth:`~DataFrame.info` provides technical information about a +``DataFrame``, so let’s explain the output in more detail: + +- It is indeed a :class:`DataFrame`. +- There are 891 entries, i.e. 891 rows. +- Each row has a row label (aka the ``index``) with values ranging from + 0 to 890. +- The table has 12 columns. Most columns have a value for each of the + rows (all 891 values are ``non-null``). Some columns do have missing + values and less than 891 ``non-null`` values. +- The columns ``Name``, ``Sex``, ``Cabin`` and ``Embarked`` consists of + textual data (strings, aka ``object``). The other columns are + numerical data with some of them whole numbers (aka ``integer``) and + others are real numbers (aka ``float``). +- The kind of data (characters, integers,…) in the different columns + are summarized by listing the ``dtypes``. +- The approximate amount of RAM used to hold the DataFrame is provided + as well. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- Getting data in to pandas from many different file formats or data + sources is supported by ``read_*`` functions. +- Exporting data out of pandas is provided by different + ``to_*``\ methods. +- The ``head``/``tail``/``info`` methods and the ``dtypes`` attribute + are convenient for a first check. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +For a complete overview of the input and output possibilites from and to pandas, see the user guide section about :ref:`reader and writer functions `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/03_subset_data.rst b/doc/source/getting_started/intro_tutorials/03_subset_data.rst new file mode 100644 index 00000000..7a434790 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/03_subset_data.rst @@ -0,0 +1,405 @@ +.. _10min_tut_03_subset: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +
      +
      +

      + +This tutorial uses the titanic data set, stored as CSV. The data +consists of the following data columns: + +- PassengerId: Id of every passenger. +- Survived: This feature have value 0 and 1. 0 for not survived and 1 + for survived. +- Pclass: There are 3 classes: Class 1, Class 2 and Class 3. +- Name: Name of passenger. +- Sex: Gender of passenger. +- Age: Age of passenger. +- SibSp: Indication that passenger have siblings and spouse. +- Parch: Whether a passenger is alone or have family. +- Ticket: Ticket number of passenger. +- Fare: Indicating the fare. +- Cabin: The cabin of passenger. +- Embarked: The embarked category. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + titanic.head() + +.. raw:: html + +
    • +
    +
    + +How do I select a subset of a ``DataFrame``? +============================================ + +How do I select specific columns from a ``DataFrame``? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/03_subset_columns.svg + :align: center + +.. raw:: html + +
      +
    • + +I’m interested in the age of the titanic passengers. + +.. ipython:: python + + ages = titanic["Age"] + ages.head() + +To select a single column, use square brackets ``[]`` with the column +name of the column of interest. + +.. raw:: html + +
    • +
    + +Each column in a :class:`DataFrame` is a :class:`Series`. As a single column is +selected, the returned object is a pandas :class:`DataFrame`. We can verify this +by checking the type of the output: + +.. ipython:: python + + type(titanic["Age"]) + +And have a look at the ``shape`` of the output: + +.. ipython:: python + + titanic["Age"].shape + +:attr:`DataFrame.shape` is an attribute (remember :ref:`tutorial on reading and writing <10min_tut_02_read_write>`, do not use parantheses for attributes) of a +pandas ``Series`` and ``DataFrame`` containing the number of rows and +columns: *(nrows, ncolumns)*. A pandas Series is 1-dimensional and only +the number of rows is returned. + +.. raw:: html + +
      +
    • + +I’m interested in the age and sex of the titanic passengers. + +.. ipython:: python + + age_sex = titanic[["Age", "Sex"]] + age_sex.head() + +To select multiple columns, use a list of column names within the +selection brackets ``[]``. + +.. raw:: html + +
    • +
    + +.. note:: + The inner square brackets define a + :ref:`Python list ` with column names, whereas + the outer brackets are used to select the data from a pandas + ``DataFrame`` as seen in the previous example. + +The returned data type is a pandas DataFrame: + +.. ipython:: python + + type(titanic[["Age", "Sex"]]) + +.. ipython:: python + + titanic[["Age", "Sex"]].shape + +The selection returned a ``DataFrame`` with 891 rows and 2 columns. Remember, a +``DataFrame`` is 2-dimensional with both a row and column dimension. + +.. raw:: html + +
    + To user guide + +For basic information on indexing, see the user guide section on :ref:`indexing and selecting data `. + +.. raw:: html + +
    + +How do I filter specific rows from a ``DataFrame``? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/03_subset_rows.svg + :align: center + +.. raw:: html + +
      +
    • + +I’m interested in the passengers older than 35 years. + +.. ipython:: python + + above_35 = titanic[titanic["Age"] > 35] + above_35.head() + +To select rows based on a conditional expression, use a condition inside +the selection brackets ``[]``. + +.. raw:: html + +
    • +
    + +The condition inside the selection +brackets ``titanic["Age"] > 35`` checks for which rows the ``Age`` +column has a value larger than 35: + +.. ipython:: python + + titanic["Age"] > 35 + +The output of the conditional expression (``>``, but also ``==``, +``!=``, ``<``, ``<=``,… would work) is actually a pandas ``Series`` of +boolean values (either ``True`` or ``False``) with the same number of +rows as the original ``DataFrame``. Such a ``Series`` of boolean values +can be used to filter the ``DataFrame`` by putting it in between the +selection brackets ``[]``. Only rows for which the value is ``True`` +will be selected. + +We now from before that the original titanic ``DataFrame`` consists of +891 rows. Let’s have a look at the amount of rows which satisfy the +condition by checking the ``shape`` attribute of the resulting +``DataFrame`` ``above_35``: + +.. ipython:: python + + above_35.shape + +.. raw:: html + +
      +
    • + +I’m interested in the titanic passengers from cabin class 2 and 3. + +.. ipython:: python + + class_23 = titanic[titanic["Pclass"].isin([2, 3])] + class_23.head() + +Similar to the conditional expression, the :func:`~Series.isin` conditional function +returns a ``True`` for each row the values are in the provided list. To +filter the rows based on such a function, use the conditional function +inside the selection brackets ``[]``. In this case, the condition inside +the selection brackets ``titanic["Pclass"].isin([2, 3])`` checks for +which rows the ``Pclass`` column is either 2 or 3. + +.. raw:: html + +
    • +
    + +The above is equivalent to filtering by rows for which the class is +either 2 or 3 and combining the two statements with an ``|`` (or) +operator: + +.. ipython:: python + + class_23 = titanic[(titanic["Pclass"] == 2) | (titanic["Pclass"] == 3)] + class_23.head() + +.. note:: + When combining multiple conditional statements, each condition + must be surrounded by parentheses ``()``. Moreover, you can not use + ``or``/``and`` but need to use the ``or`` operator ``|`` and the ``and`` + operator ``&``. + +.. raw:: html + +
    + To user guide + +See the dedicated section in the user guide about :ref:`boolean indexing ` or about the :ref:`isin function `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +I want to work with passenger data for which the age is known. + +.. ipython:: python + + age_no_na = titanic[titanic["Age"].notna()] + age_no_na.head() + +The :meth:`~Series.notna` conditional function returns a ``True`` for each row the +values are not an ``Null`` value. As such, this can be combined with the +selection brackets ``[]`` to filter the data table. + +.. raw:: html + +
    • +
    + +You might wonder what actually changed, as the first 5 lines are still +the same values. One way to verify is to check if the shape has changed: + +.. ipython:: python + + age_no_na.shape + +.. raw:: html + +
    + To user guide + +For more dedicated functions on missing values, see the user guide section about :ref:`handling missing data `. + +.. raw:: html + +
    + +How do I select specific rows and columns from a ``DataFrame``? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/03_subset_columns_rows.svg + :align: center + +.. raw:: html + +
      +
    • + +I’m interested in the names of the passengers older than 35 years. + +.. ipython:: python + + adult_names = titanic.loc[titanic["Age"] > 35, "Name"] + adult_names.head() + +In this case, a subset of both rows and columns is made in one go and +just using selection brackets ``[]`` is not sufficient anymore. The +``loc``/``iloc`` operators are required in front of the selection +brackets ``[]``. When using ``loc``/``iloc``, the part before the comma +is the rows you want, and the part after the comma is the columns you +want to select. + +.. raw:: html + +
    • +
    + +When using the column names, row labels or a condition expression, use +the ``loc`` operator in front of the selection brackets ``[]``. For both +the part before and after the comma, you can use a single label, a list +of labels, a slice of labels, a conditional expression or a colon. Using +a colon specificies you want to select all rows or columns. + +.. raw:: html + +
      +
    • + +I’m interested in rows 10 till 25 and columns 3 to 5. + +.. ipython:: python + + titanic.iloc[9:25, 2:5] + +Again, a subset of both rows and columns is made in one go and just +using selection brackets ``[]`` is not sufficient anymore. When +specifically interested in certain rows and/or columns based on their +position in the table, use the ``iloc`` operator in front of the +selection brackets ``[]``. + +.. raw:: html + +
    • +
    + +When selecting specific rows and/or columns with ``loc`` or ``iloc``, +new values can be assigned to the selected data. For example, to assign +the name ``anonymous`` to the first 3 elements of the third column: + +.. ipython:: python + + titanic.iloc[0:3, 3] = "anonymous" + titanic.head() + +.. raw:: html + +
    + To user guide + +See the user guide section on :ref:`different choices for indexing ` to get more insight in the usage of ``loc`` and ``iloc``. + +.. raw:: html + +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- When selecting subsets of data, square brackets ``[]`` are used. +- Inside these brackets, you can use a single column/row label, a list + of column/row labels, a slice of labels, a conditional expression or + a colon. +- Select specific rows and/or columns using ``loc`` when using the row + and column names +- Select specific rows and/or columns using ``iloc`` when using the + positions in the table +- You can assign new values to a selection based on ``loc``/``iloc``. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A full overview about indexing is provided in the user guide pages on :ref:`indexing and selecting data `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/04_plotting.rst b/doc/source/getting_started/intro_tutorials/04_plotting.rst new file mode 100644 index 00000000..f3d99ee5 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/04_plotting.rst @@ -0,0 +1,252 @@ +.. _10min_tut_04_plotting: + +{{ header }} + +.. ipython:: python + + import pandas as pd + import matplotlib.pyplot as plt + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +
      +
      +

      + +For this tutorial, air quality data about :math:`NO_2` is used, made +available by `openaq `__ and using the +`py-openaq `__ package. +The ``air_quality_no2.csv`` data set provides :math:`NO_2` values for +the measurement stations *FR04014*, *BETR801* and *London Westminster* +in respectively Paris, Antwerp and London. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + air_quality = pd.read_csv("data/air_quality_no2.csv", + index_col=0, parse_dates=True) + air_quality.head() + +.. note:: + The usage of the ``index_col`` and ``parse_dates`` parameters of the ``read_csv`` function to define the first (0th) column as + index of the resulting ``DataFrame`` and convert the dates in the column to :class:`Timestamp` objects, respectively. + +.. raw:: html + +
    • +
    +
    + +How to create plots in pandas? +------------------------------ + +.. image:: ../../_static/schemas/04_plot_overview.svg + :align: center + +.. raw:: html + +
      +
    • + +I want a quick visual check of the data. + +.. ipython:: python + + @savefig 04_airqual_quick.png + air_quality.plot() + +With a ``DataFrame``, pandas creates by default one line plot for each of +the columns with numeric data. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
      +
    • + +I want to plot only the columns of the data table with the data from Paris. + +.. ipython:: python + + @savefig 04_airqual_paris.png + air_quality["station_paris"].plot() + +To plot a specific column, use the selection method of the +:ref:`subset data tutorial <10min_tut_03_subset>` in combination with the :meth:`~DataFrame.plot` +method. Hence, the :meth:`~DataFrame.plot` method works on both ``Series`` and +``DataFrame``. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
      +
    • + +I want to visually compare the :math:`N0_2` values measured in London versus Paris. + +.. ipython:: python + + @savefig 04_airqual_scatter.png + air_quality.plot.scatter(x="station_london", + y="station_paris", + alpha=0.5) + +.. raw:: html + +
    • +
    + +Apart from the default ``line`` plot when using the ``plot`` function, a +number of alternatives are available to plot data. Let’s use some +standard Python to get an overview of the available plot methods: + +.. ipython:: python + + [method_name for method_name in dir(air_quality.plot) + if not method_name.startswith("_")] + +.. note:: + In many development environments as well as ipython and + jupyter notebook, use the TAB button to get an overview of the available + methods, for example ``air_quality.plot.`` + TAB. + +One of the options is :meth:`DataFrame.plot.box`, which refers to a +`boxplot `__. The ``box`` +method is applicable on the air quality example data: + +.. ipython:: python + + @savefig 04_airqual_boxplot.png + air_quality.plot.box() + +.. raw:: html + +
    + To user guide + +For an introduction to plots other than the default line plot, see the user guide section about :ref:`supported plot styles `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +I want each of the columns in a separate subplot. + +.. ipython:: python + + @savefig 04_airqual_area_subplot.png + axs = air_quality.plot.area(figsize=(12, 4), subplots=True) + +Separate subplots for each of the data columns is supported by the ``subplots`` argument +of the ``plot`` functions. The builtin options available in each of the pandas plot +functions that are worthwhile to have a look. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +Some more formatting options are explained in the user guide section on :ref:`plot formatting `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +I want to further customize, extend or save the resulting plot. + +.. ipython:: python + + fig, axs = plt.subplots(figsize=(12, 4)); + air_quality.plot.area(ax=axs); + @savefig 04_airqual_customized.png + axs.set_ylabel("NO$_2$ concentration"); + fig.savefig("no2_concentrations.png") + +.. ipython:: python + :suppress: + + import os + os.remove('no2_concentrations.png') + +.. raw:: html + +
    • +
    + +Each of the plot objects created by pandas are a +`matplotlib `__ object. As Matplotlib provides +plenty of options to customize plots, making the link between pandas and +Matplotlib explicit enables all the power of matplotlib to the plot. +This strategy is applied in the previous example: + +:: + + fig, axs = plt.subplots(figsize=(12, 4)) # Create an empty matplotlib Figure and Axes + air_quality.plot.area(ax=axs) # Use pandas to put the area plot on the prepared Figure/Axes + axs.set_ylabel("NO$_2$ concentration") # Do any matplotlib customization you like + fig.savefig("no2_concentrations.png") # Save the Figure/Axes using the existing matplotlib method. + +.. raw:: html + +
    +

    REMEMBER

    + +- The ``.plot.*`` methods are applicable on both Series and DataFrames +- By default, each of the columns is plotted as a different element + (line, boxplot,…) +- Any plot created by pandas is a Matplotlib object. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A full overview of plotting in pandas is provided in the :ref:`visualization pages `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/05_add_columns.rst b/doc/source/getting_started/intro_tutorials/05_add_columns.rst new file mode 100644 index 00000000..d4f6a8d6 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/05_add_columns.rst @@ -0,0 +1,186 @@ +.. _10min_tut_05_columns: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +
      +
      +

      + +For this tutorial, air quality data about :math:`NO_2` is used, made +available by `openaq `__ and using the +`py-openaq `__ package. +The ``air_quality_no2.csv`` data set provides :math:`NO_2` values for +the measurement stations *FR04014*, *BETR801* and *London Westminster* +in respectively Paris, Antwerp and London. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + air_quality = pd.read_csv("data/air_quality_no2.csv", + index_col=0, parse_dates=True) + air_quality.head() + +.. raw:: html + +
    • +
    +
    + +How to create new columns derived from existing columns? +-------------------------------------------------------- + +.. image:: ../../_static/schemas/05_newcolumn_1.svg + :align: center + +.. raw:: html + +
      +
    • + +I want to express the :math:`NO_2` concentration of the station in London in mg/m\ :math:`^3` + +(*If we assume temperature of 25 degrees Celsius and pressure of 1013 +hPa, the conversion factor is 1.882*) + +.. ipython:: python + + air_quality["london_mg_per_cubic"] = air_quality["station_london"] * 1.882 + air_quality.head() + +To create a new column, use the ``[]`` brackets with the new column name +at the left side of the assignment. + +.. raw:: html + +
    • +
    + +.. note:: + The calculation of the values is done **element_wise**. This + means all values in the given column are multiplied by the value 1.882 + at once. You do not need to use a loop to iterate each of the rows! + +.. image:: ../../_static/schemas/05_newcolumn_2.svg + :align: center + +.. raw:: html + +
      +
    • + +I want to check the ratio of the values in Paris versus Antwerp and save the result in a new column + +.. ipython:: python + + air_quality["ratio_paris_antwerp"] = \ + air_quality["station_paris"] / air_quality["station_antwerp"] + air_quality.head() + +The calculation is again element-wise, so the ``/`` is applied *for the +values in each row*. + +.. raw:: html + +
    • +
    + +Also other mathematical operators (+, -, \*, /) or +logical operators (<, >, =,…) work element wise. The latter was already +used in the :ref:`subset data tutorial <10min_tut_03_subset>` to filter +rows of a table using a conditional expression. + +.. raw:: html + +
      +
    • + +I want to rename the data columns to the corresponding station identifiers used by openAQ + +.. ipython:: python + + air_quality_renamed = air_quality.rename( + columns={"station_antwerp": "BETR801", + "station_paris": "FR04014", + "station_london": "London Westminster"}) + +.. ipython:: python + + air_quality_renamed.head() + +The :meth:`~DataFrame.rename` function can be used for both row labels and column +labels. Provide a dictionary with the keys the current names and the +values the new names to update the corresponding names. + +.. raw:: html + +
    • +
    + +The mapping should not be restricted to fixed names only, but can be a +mapping function as well. For example, converting the column names to +lowercase letters can be done using a function as well: + +.. ipython:: python + + air_quality_renamed = air_quality_renamed.rename(columns=str.lower) + air_quality_renamed.head() + +.. raw:: html + +
    + To user guide + +Details about column or row label renaming is provided in the user guide section on :ref:`renaming labels `. + +.. raw:: html + +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- Create a new column by assigning the output to the DataFrame with a + new column name in between the ``[]``. +- Operations are element-wise, no need to loop over rows. +- Use ``rename`` with a dictionary or function to rename row labels or + column names. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +The user guide contains a separate section on :ref:`column addition and deletion `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst new file mode 100644 index 00000000..7a94c905 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst @@ -0,0 +1,310 @@ +.. _10min_tut_06_stats: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +
      +
      +

      + +This tutorial uses the titanic data set, stored as CSV. The data +consists of the following data columns: + +- PassengerId: Id of every passenger. +- Survived: This feature have value 0 and 1. 0 for not survived and 1 + for survived. +- Pclass: There are 3 classes: Class 1, Class 2 and Class 3. +- Name: Name of passenger. +- Sex: Gender of passenger. +- Age: Age of passenger. +- SibSp: Indication that passenger have siblings and spouse. +- Parch: Whether a passenger is alone or have family. +- Ticket: Ticket number of passenger. +- Fare: Indicating the fare. +- Cabin: The cabin of passenger. +- Embarked: The embarked category. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + titanic.head() + +.. raw:: html + +
    • +
    +
    + +How to calculate summary statistics? +------------------------------------ + +Aggregating statistics +~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/06_aggregate.svg + :align: center + +.. raw:: html + +
      +
    • + +What is the average age of the titanic passengers? + +.. ipython:: python + + titanic["Age"].mean() + +.. raw:: html + +
    • +
    + +Different statistics are available and can be applied to columns with +numerical data. Operations in general exclude missing data and operate +across rows by default. + +.. image:: ../../_static/schemas/06_reduction.svg + :align: center + +.. raw:: html + +
      +
    • + +What is the median age and ticket fare price of the titanic passengers? + +.. ipython:: python + + titanic[["Age", "Fare"]].median() + +The statistic applied to multiple columns of a ``DataFrame`` (the selection of two columns +return a ``DataFrame``, see the :ref:`subset data tutorial <10min_tut_03_subset>`) is calculated for each numeric column. + +.. raw:: html + +
    • +
    + +The aggregating statistic can be calculated for multiple columns at the +same time. Remember the ``describe`` function from :ref:`first tutorial <10min_tut_01_tableoriented>` tutorial? + +.. ipython:: python + + titanic[["Age", "Fare"]].describe() + +Instead of the predefined statistics, specific combinations of +aggregating statistics for given columns can be defined using the +:func:`DataFrame.agg` method: + +.. ipython:: python + + titanic.agg({'Age': ['min', 'max', 'median', 'skew'], + 'Fare': ['min', 'max', 'median', 'mean']}) + +.. raw:: html + +
    + To user guide + +Details about descriptive statistics are provided in the user guide section on :ref:`descriptive statistics `. + +.. raw:: html + +
    + + +Aggregating statistics grouped by category +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/06_groupby.svg + :align: center + +.. raw:: html + +
      +
    • + +What is the average age for male versus female titanic passengers? + +.. ipython:: python + + titanic[["Sex", "Age"]].groupby("Sex").mean() + +As our interest is the average age for each gender, a subselection on +these two columns is made first: ``titanic[["Sex", "Age"]]``. Next, the +:meth:`~DataFrame.groupby` method is applied on the ``Sex`` column to make a group per +category. The average age *for each gender* is calculated and +returned. + +.. raw:: html + +
    • +
    + +Calculating a given statistic (e.g. ``mean`` age) *for each category in +a column* (e.g. male/female in the ``Sex`` column) is a common pattern. +The ``groupby`` method is used to support this type of operations. More +general, this fits in the more general ``split-apply-combine`` pattern: + +- **Split** the data into groups +- **Apply** a function to each group independently +- **Combine** the results into a data structure + +The apply and combine steps are typically done together in pandas. + +In the previous example, we explicitly selected the 2 columns first. If +not, the ``mean`` method is applied to each column containing numerical +columns: + +.. ipython:: python + + titanic.groupby("Sex").mean() + +It does not make much sense to get the average value of the ``Pclass``. +if we are only interested in the average age for each gender, the +selection of columns (rectangular brackets ``[]`` as usual) is supported +on the grouped data as well: + +.. ipython:: python + + titanic.groupby("Sex")["Age"].mean() + +.. image:: ../../_static/schemas/06_groupby_select_detail.svg + :align: center + +.. note:: + The `Pclass` column contains numerical data but actually + represents 3 categories (or factors) with respectively the labels ‘1’, + ‘2’ and ‘3’. Calculating statistics on these does not make much sense. + Therefore, pandas provides a ``Categorical`` data type to handle this + type of data. More information is provided in the user guide + :ref:`categorical` section. + +.. raw:: html + +
      +
    • + +What is the mean ticket fare price for each of the sex and cabin class combinations? + +.. ipython:: python + + titanic.groupby(["Sex", "Pclass"])["Fare"].mean() + +Grouping can be done by multiple columns at the same time. Provide the +column names as a list to the :meth:`~DataFrame.groupby` method. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +A full description on the split-apply-combine approach is provided in the user guide section on :ref:`groupby operations `. + +.. raw:: html + +
    + +Count number of records by category +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/06_valuecounts.svg + :align: center + +.. raw:: html + +
      +
    • + +What is the number of passengers in each of the cabin classes? + +.. ipython:: python + + titanic["Pclass"].value_counts() + +The :meth:`~Series.value_counts` method counts the number of records for each +category in a column. + +.. raw:: html + +
    • +
    + +The function is a shortcut, as it is actually a groupby operation in combination with counting of the number of records +within each group: + +.. ipython:: python + + titanic.groupby("Pclass")["Pclass"].count() + +.. note:: + Both ``size`` and ``count`` can be used in combination with + ``groupby``. Whereas ``size`` includes ``NaN`` values and just provides + the number of rows (size of the table), ``count`` excludes the missing + values. In the ``value_counts`` method, use the ``dropna`` argument to + include or exclude the ``NaN`` values. + +.. raw:: html + +
    + To user guide + +The user guide has a dedicated section on ``value_counts`` , see page on :ref:`discretization `. + +.. raw:: html + +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- Aggregation statistics can be calculated on entire columns or rows +- ``groupby`` provides the power of the *split-apply-combine* pattern +- ``value_counts`` is a convenient shortcut to count the number of + entries in each category of a variable + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A full description on the split-apply-combine approach is provided in the user guide pages about :ref:`groupby operations `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst new file mode 100644 index 00000000..b28a9012 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/07_reshape_table_layout.rst @@ -0,0 +1,404 @@ +.. _10min_tut_07_reshape: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +
      +
      +

      + +This tutorial uses the titanic data set, stored as CSV. The data +consists of the following data columns: + +- PassengerId: Id of every passenger. +- Survived: This feature have value 0 and 1. 0 for not survived and 1 + for survived. +- Pclass: There are 3 classes: Class 1, Class 2 and Class 3. +- Name: Name of passenger. +- Sex: Gender of passenger. +- Age: Age of passenger. +- SibSp: Indication that passenger have siblings and spouse. +- Parch: Whether a passenger is alone or have family. +- Ticket: Ticket number of passenger. +- Fare: Indicating the fare. +- Cabin: The cabin of passenger. +- Embarked: The embarked category. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + titanic.head() + +.. raw:: html + +
    • +
    • + +
      +
      +

      + +This tutorial uses air quality data about :math:`NO_2` and Particulate matter less than 2.5 +micrometers, made available by +`openaq `__ and using the +`py-openaq `__ package. +The ``air_quality_long.csv`` data set provides :math:`NO_2` and +:math:`PM_{25}` values for the measurement stations *FR04014*, *BETR801* +and *London Westminster* in respectively Paris, Antwerp and London. + +The air-quality data set has the following columns: + +- city: city where the sensor is used, either Paris, Antwerp or London +- country: country where the sensor is used, either FR, BE or GB +- location: the id of the sensor, either *FR04014*, *BETR801* or + *London Westminster* +- parameter: the parameter measured by the sensor, either :math:`NO_2` + or Particulate matter +- value: the measured value +- unit: the unit of the measured parameter, in this case ‘µg/m³’ + +and the index of the ``DataFrame`` is ``datetime``, the datetime of the +measurement. + +.. note:: + The air-quality data is provided in a so-called *long format* + data representation with each observation on a separate row and each + variable a separate column of the data table. The long/narrow format is + also known as the `tidy data + format `__. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + air_quality = pd.read_csv("data/air_quality_long.csv", + index_col="date.utc", parse_dates=True) + air_quality.head() + +.. raw:: html + +
    • +
    +
    + +How to reshape the layout of tables? +------------------------------------ + +Sort table rows +~~~~~~~~~~~~~~~ + +.. raw:: html + +
      +
    • + +I want to sort the titanic data according to the age of the passengers. + +.. ipython:: python + + titanic.sort_values(by="Age").head() + +.. raw:: html + +
    • +
    + +.. raw:: html + +
      +
    • + +I want to sort the titanic data according to the cabin class and age in descending order. + +.. ipython:: python + + titanic.sort_values(by=['Pclass', 'Age'], ascending=False).head() + +With :meth:`Series.sort_values`, the rows in the table are sorted according to the +defined column(s). The index will follow the row order. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +More details about sorting of tables is provided in the using guide section on :ref:`sorting data `. + +.. raw:: html + +
    + +Long to wide table format +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Let’s use a small subset of the air quality data set. We focus on +:math:`NO_2` data and only use the first two measurements of each +location (i.e. the head of each group). The subset of data will be +called ``no2_subset`` + +.. ipython:: python + + # filter for no2 data only + no2 = air_quality[air_quality["parameter"] == "no2"] + +.. ipython:: python + + # use 2 measurements (head) for each location (groupby) + no2_subset = no2.sort_index().groupby(["location"]).head(2) + no2_subset + +.. image:: ../../_static/schemas/07_pivot.svg + :align: center + +.. raw:: html + +
      +
    • + +I want the values for the three stations as separate columns next to each other + +.. ipython:: python + + no2_subset.pivot(columns="location", values="value") + +The :meth:`~pandas.pivot_table` function is purely reshaping of the data: a single value +for each index/column combination is required. + +.. raw:: html + +
    • +
    + +As pandas support plotting of multiple columns (see :ref:`plotting tutorial <10min_tut_04_plotting>`) out of the box, the conversion from +*long* to *wide* table format enables the plotting of the different time +series at the same time: + +.. ipython:: python + + no2.head() + +.. ipython:: python + + @savefig 7_reshape_columns.png + no2.pivot(columns="location", values="value").plot() + +.. note:: + When the ``index`` parameter is not defined, the existing + index (row labels) is used. + +.. raw:: html + +
    + To user guide + +For more information about :meth:`~DataFrame.pivot`, see the user guide section on :ref:`pivoting DataFrame objects `. + +.. raw:: html + +
    + +Pivot table +~~~~~~~~~~~ + +.. image:: ../../_static/schemas/07_pivot_table.svg + :align: center + +.. raw:: html + +
      +
    • + +I want the mean concentrations for :math:`NO_2` and :math:`PM_{2.5}` in each of the stations in table form + +.. ipython:: python + + air_quality.pivot_table(values="value", index="location", + columns="parameter", aggfunc="mean") + +In the case of :meth:`~DataFrame.pivot`, the data is only rearranged. When multiple +values need to be aggregated (in this specific case, the values on +different time steps) :meth:`~DataFrame.pivot_table` can be used, providing an +aggregation function (e.g. mean) on how to combine these values. + +.. raw:: html + +
    • +
    + +Pivot table is a well known concept in spreadsheet software. When +interested in summary columns for each variable separately as well, put +the ``margin`` parameter to ``True``: + +.. ipython:: python + + air_quality.pivot_table(values="value", index="location", + columns="parameter", aggfunc="mean", + margins=True) + +.. raw:: html + +
    + To user guide + +For more information about :meth:`~DataFrame.pivot_table`, see the user guide section on :ref:`pivot tables `. + +.. raw:: html + +
    + +.. note:: + If case you are wondering, :meth:`~DataFrame.pivot_table` is indeed directly linked + to :meth:`~DataFrame.groupby`. The same result can be derived by grouping on both + ``parameter`` and ``location``: + + :: + + air_quality.groupby(["parameter", "location"]).mean() + +.. raw:: html + +
    + To user guide + +Have a look at :meth:`~DataFrame.groupby` in combination with :meth:`~DataFrame.unstack` at the user guide section on :ref:`combining stats and groupby `. + +.. raw:: html + +
    + +Wide to long format +~~~~~~~~~~~~~~~~~~~ + +Starting again from the wide format table created in the previous +section: + +.. ipython:: python + + no2_pivoted = no2.pivot(columns="location", values="value").reset_index() + no2_pivoted.head() + +.. image:: ../../_static/schemas/07_melt.svg + :align: center + +.. raw:: html + +
      +
    • + +I want to collect all air quality :math:`NO_2` measurements in a single column (long format) + +.. ipython:: python + + no_2 = no2_pivoted.melt(id_vars="date.utc") + no_2.head() + +The :func:`pandas.melt` method on a ``DataFrame`` converts the data table from wide +format to long format. The column headers become the variable names in a +newly created column. + +.. raw:: html + +
    • +
    + +The solution is the short version on how to apply :func:`pandas.melt`. The method +will *melt* all columns NOT mentioned in ``id_vars`` together into two +columns: A columns with the column header names and a column with the +values itself. The latter column gets by default the name ``value``. + +The :func:`pandas.melt` method can be defined in more detail: + +.. ipython:: python + + no_2 = no2_pivoted.melt(id_vars="date.utc", + value_vars=["BETR801", + "FR04014", + "London Westminster"], + value_name="NO_2", + var_name="id_location") + no_2.head() + +The result in the same, but in more detail defined: + +- ``value_vars`` defines explicitly which columns to *melt* together +- ``value_name`` provides a custom column name for the values column + instead of the default columns name ``value`` +- ``var_name`` provides a custom column name for the columns collecting + the column header names. Otherwise it takes the index name or a + default ``variable`` + +Hence, the arguments ``value_name`` and ``var_name`` are just +user-defined names for the two generated columns. The columns to melt +are defined by ``id_vars`` and ``value_vars``. + +.. raw:: html + +
    + To user guide + +Conversion from wide to long format with :func:`pandas.melt` is explained in the user guide section on :ref:`reshaping by melt `. + +.. raw:: html + +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- Sorting by one or more columns is supported by ``sort_values`` +- The ``pivot`` function is purely restructering of the data, + ``pivot_table`` supports aggregations +- The reverse of ``pivot`` (long to wide format) is ``melt`` (wide to + long format) + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A full overview is available in the user guide on the pages about :ref:`reshaping and pivoting `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst new file mode 100644 index 00000000..f317e7a1 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/08_combine_dataframes.rst @@ -0,0 +1,326 @@ +.. _10min_tut_08_combine: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +
      +
      +

      + +For this tutorial, air quality data about :math:`NO_2` is used, made available by +`openaq `__ and downloaded using the +`py-openaq `__ package. + +The ``air_quality_no2_long.csv`` data set provides :math:`NO_2` +values for the measurement stations *FR04014*, *BETR801* and *London +Westminster* in respectively Paris, Antwerp and London. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + air_quality_no2 = pd.read_csv("data/air_quality_no2_long.csv", + parse_dates=True) + air_quality_no2 = air_quality_no2[["date.utc", "location", + "parameter", "value"]] + air_quality_no2.head() + +.. raw:: html + +
    • +
    • + +
      +
      +

      + +For this tutorial, air quality data about Particulate +matter less than 2.5 micrometers is used, made available by +`openaq `__ and downloaded using the +`py-openaq `__ package. + +The ``air_quality_pm25_long.csv`` data set provides :math:`PM_{25}` +values for the measurement stations *FR04014*, *BETR801* and *London +Westminster* in respectively Paris, Antwerp and London. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + air_quality_pm25 = pd.read_csv("data/air_quality_pm25_long.csv", + parse_dates=True) + air_quality_pm25 = air_quality_pm25[["date.utc", "location", + "parameter", "value"]] + air_quality_pm25.head() + +.. raw:: html + +
    • +
    +
    + + +How to combine data from multiple tables? +----------------------------------------- + +Concatenating objects +~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/08_concat_row.svg + :align: center + +.. raw:: html + +
      +
    • + +I want to combine the measurements of :math:`NO_2` and :math:`PM_{25}`, two tables with a similar structure, in a single table + +.. ipython:: python + + air_quality = pd.concat([air_quality_pm25, air_quality_no2], axis=0) + air_quality.head() + +The :func:`~pandas.concat` function performs concatenation operations of multiple +tables along one of the axis (row-wise or column-wise). + +.. raw:: html + +
    • +
    + +By default concatenation is along axis 0, so the resulting table combines the rows +of the input tables. Let’s check the shape of the original and the +concatenated tables to verify the operation: + +.. ipython:: python + + print('Shape of the `air_quality_pm25` table: ', air_quality_pm25.shape) + print('Shape of the `air_quality_no2` table: ', air_quality_no2.shape) + print('Shape of the resulting `air_quality` table: ', air_quality.shape) + +Hence, the resulting table has 3178 = 1110 + 2068 rows. + +.. note:: + The **axis** argument will return in a number of pandas + methods that can be applied **along an axis**. A ``DataFrame`` has two + corresponding axes: the first running vertically downwards across rows + (axis 0), and the second running horizontally across columns (axis 1). + Most operations like concatenation or summary statistics are by default + across rows (axis 0), but can be applied across columns as well. + +Sorting the table on the datetime information illustrates also the +combination of both tables, with the ``parameter`` column defining the +origin of the table (either ``no2`` from table ``air_quality_no2`` or +``pm25`` from table ``air_quality_pm25``): + +.. ipython:: python + + air_quality = air_quality.sort_values("date.utc") + air_quality.head() + +In this specific example, the ``parameter`` column provided by the data +ensures that each of the original tables can be identified. This is not +always the case. the ``concat`` function provides a convenient solution +with the ``keys`` argument, adding an additional (hierarchical) row +index. For example: + +.. ipython:: python + + air_quality_ = pd.concat([air_quality_pm25, air_quality_no2], + keys=["PM25", "NO2"]) + +.. ipython:: python + + air_quality_.head() + +.. note:: + The existence of multiple row/column indices at the same time + has not been mentioned within these tutorials. *Hierarchical indexing* + or *MultiIndex* is an advanced and powerfull pandas feature to analyze + higher dimensional data. + + Multi-indexing is out of scope for this pandas introduction. For the + moment, remember that the function ``reset_index`` can be used to + convert any level of an index to a column, e.g. + ``air_quality.reset_index(level=0)`` + + .. raw:: html + +
    + To user guide + + Feel free to dive into the world of multi-indexing at the user guide section on :ref:`advanced indexing `. + + .. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +More options on table concatenation (row and column +wise) and how ``concat`` can be used to define the logic (union or +intersection) of the indexes on the other axes is provided at the section on +:ref:`object concatenation `. + +.. raw:: html + +
    + +Join tables using a common identifier +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. image:: ../../_static/schemas/08_merge_left.svg + :align: center + +.. raw:: html + +
      +
    • + +Add the station coordinates, provided by the stations metadata table, to the corresponding rows in the measurements table. + +.. warning:: + The air quality measurement station coordinates are stored in a data + file ``air_quality_stations.csv``, downloaded using the + `py-openaq `__ package. + +.. ipython:: python + + stations_coord = pd.read_csv("data/air_quality_stations.csv") + stations_coord.head() + +.. note:: + The stations used in this example (FR04014, BETR801 and London + Westminster) are just three entries enlisted in the metadata table. We + only want to add the coordinates of these three to the measurements + table, each on the corresponding rows of the ``air_quality`` table. + +.. ipython:: python + + air_quality.head() + +.. ipython:: python + + air_quality = pd.merge(air_quality, stations_coord, + how='left', on='location') + air_quality.head() + +Using the :meth:`~pandas.merge` function, for each of the rows in the +``air_quality`` table, the corresponding coordinates are added from the +``air_quality_stations_coord`` table. Both tables have the column +``location`` in common which is used as a key to combine the +information. By choosing the ``left`` join, only the locations available +in the ``air_quality`` (left) table, i.e. FR04014, BETR801 and London +Westminster, end up in the resulting table. The ``merge`` function +supports multiple join options similar to database-style operations. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
      +
    • + +Add the parameter full description and name, provided by the parameters metadata table, to the measurements table + +.. warning:: + The air quality parameters metadata are stored in a data file + ``air_quality_parameters.csv``, downloaded using the + `py-openaq `__ package. + +.. ipython:: python + + air_quality_parameters = pd.read_csv("data/air_quality_parameters.csv") + air_quality_parameters.head() + +.. ipython:: python + + air_quality = pd.merge(air_quality, air_quality_parameters, + how='left', left_on='parameter', right_on='id') + air_quality.head() + +Compared to the previous example, there is no common column name. +However, the ``parameter`` column in the ``air_quality`` table and the +``id`` column in the ``air_quality_parameters_name`` both provide the +measured variable in a common format. The ``left_on`` and ``right_on`` +arguments are used here (instead of just ``on``) to make the link +between the two tables. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +pandas supports also inner, outer, and right joins. +More information on join/merge of tables is provided in the user guide section on +:ref:`database style merging of tables `. Or have a look at the +:ref:`comparison with SQL` page. + +.. raw:: html + +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- Multiple tables can be concatenated both column as row wise using + the ``concat`` function. +- For database-like merging/joining of tables, use the ``merge`` + function. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +See the user guide for a full description of the various :ref:`facilities to combine data tables `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/09_timeseries.rst b/doc/source/getting_started/intro_tutorials/09_timeseries.rst new file mode 100644 index 00000000..d5b4b316 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/09_timeseries.rst @@ -0,0 +1,389 @@ +.. _10min_tut_09_timeseries: + +{{ header }} + +.. ipython:: python + + import pandas as pd + import matplotlib.pyplot as plt + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +
      +
      +

      + +For this tutorial, air quality data about :math:`NO_2` and Particulate +matter less than 2.5 micrometers is used, made available by +`openaq `__ and downloaded using the +`py-openaq `__ package. +The ``air_quality_no2_long.csv"`` data set provides :math:`NO_2` values +for the measurement stations *FR04014*, *BETR801* and *London +Westminster* in respectively Paris, Antwerp and London. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + air_quality = pd.read_csv("data/air_quality_no2_long.csv") + air_quality = air_quality.rename(columns={"date.utc": "datetime"}) + air_quality.head() + +.. ipython:: python + + air_quality.city.unique() + +.. raw:: html + +
    • +
    +
    + +How to handle time series data with ease? +----------------------------------------- + +Using pandas datetime properties +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. raw:: html + +
      +
    • + +I want to work with the dates in the column ``datetime`` as datetime objects instead of plain text + +.. ipython:: python + + air_quality["datetime"] = pd.to_datetime(air_quality["datetime"]) + air_quality["datetime"] + +Initially, the values in ``datetime`` are character strings and do not +provide any datetime operations (e.g. extract the year, day of the +week,…). By applying the ``to_datetime`` function, pandas interprets the +strings and convert these to datetime (i.e. ``datetime64[ns, UTC]``) +objects. In pandas we call these datetime objects similar to +``datetime.datetime`` from the standard library a :class:`pandas.Timestamp`. + +.. raw:: html + +
    • +
    + +.. note:: + As many data sets do contain datetime information in one of + the columns, pandas input function like :func:`pandas.read_csv` and :func:`pandas.read_json` + can do the transformation to dates when reading the data using the + ``parse_dates`` parameter with a list of the columns to read as + Timestamp: + + :: + + pd.read_csv("../data/air_quality_no2_long.csv", parse_dates=["datetime"]) + +Why are these :class:`pandas.Timestamp` objects useful. Let’s illustrate the added +value with some example cases. + + What is the start and end date of the time series data set working + with? + +.. ipython:: python + + air_quality["datetime"].min(), air_quality["datetime"].max() + +Using :class:`pandas.Timestamp` for datetimes enable us to calculate with date +information and make them comparable. Hence, we can use this to get the +length of our time series: + +.. ipython:: python + + air_quality["datetime"].max() - air_quality["datetime"].min() + +The result is a :class:`pandas.Timedelta` object, similar to ``datetime.timedelta`` +from the standard Python library and defining a time duration. + +.. raw:: html + +
    + To user guide + +The different time concepts supported by pandas are explained in the user guide section on :ref:`time related concepts `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +I want to add a new column to the ``DataFrame`` containing only the month of the measurement + +.. ipython:: python + + air_quality["month"] = air_quality["datetime"].dt.month + air_quality.head() + +By using ``Timestamp`` objects for dates, a lot of time-related +properties are provided by pandas. For example the ``month``, but also +``year``, ``weekofyear``, ``quarter``,… All of these properties are +accessible by the ``dt`` accessor. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +An overview of the existing date properties is given in the +:ref:`time and date components overview table `. More details about the ``dt`` accessor +to return datetime like properties is explained in a dedicated section on the :ref:`dt accessor `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +What is the average :math:`NO_2` concentration for each day of the week for each of the measurement locations? + +.. ipython:: python + + air_quality.groupby( + [air_quality["datetime"].dt.weekday, "location"])["value"].mean() + +Remember the split-apply-combine pattern provided by ``groupby`` from the +:ref:`tutorial on statistics calculation <10min_tut_06_stats>`? +Here, we want to calculate a given statistic (e.g. mean :math:`NO_2`) +**for each weekday** and **for each measurement location**. To group on +weekdays, we use the datetime property ``weekday`` (with Monday=0 and +Sunday=6) of pandas ``Timestamp``, which is also accessible by the +``dt`` accessor. The grouping on both locations and weekdays can be done +to split the calculation of the mean on each of these combinations. + +.. danger:: + As we are working with a very short time series in these + examples, the analysis does not provide a long-term representative + result! + +.. raw:: html + +
    • +
    + +.. raw:: html + +
      +
    • + +Plot the typical :math:`NO_2` pattern during the day of our time series of all stations together. In other words, what is the average value for each hour of the day? + +.. ipython:: python + + fig, axs = plt.subplots(figsize=(12, 4)) + air_quality.groupby( + air_quality["datetime"].dt.hour)["value"].mean().plot(kind='bar', + rot=0, + ax=axs) + plt.xlabel("Hour of the day"); # custom x label using matplotlib + @savefig 09_bar_chart.png + plt.ylabel("$NO_2 (µg/m^3)$"); + +Similar to the previous case, we want to calculate a given statistic +(e.g. mean :math:`NO_2`) **for each hour of the day** and we can use the +split-apply-combine approach again. For this case, the datetime property ``hour`` +of pandas ``Timestamp``, which is also accessible by the ``dt`` accessor. + +.. raw:: html + +
    • +
    + +Datetime as index +~~~~~~~~~~~~~~~~~ + +In the :ref:`tutorial on reshaping <10min_tut_07_reshape>`, +:meth:`~pandas.pivot` was introduced to reshape the data table with each of the +measurements locations as a separate column: + +.. ipython:: python + + no_2 = air_quality.pivot(index="datetime", columns="location", values="value") + no_2.head() + +.. note:: + By pivoting the data, the datetime information became the + index of the table. In general, setting a column as an index can be + achieved by the ``set_index`` function. + +Working with a datetime index (i.e. ``DatetimeIndex``) provides powerful +functionalities. For example, we do not need the ``dt`` accessor to get +the time series properties, but have these properties available on the +index directly: + +.. ipython:: python + + no_2.index.year, no_2.index.weekday + +Some other advantages are the convenient subsetting of time period or +the adapted time scale on plots. Let’s apply this on our data. + +.. raw:: html + +
      +
    • + +Create a plot of the :math:`NO_2` values in the different stations from the 20th of May till the end of 21st of May + +.. ipython:: python + :okwarning: + + @savefig 09_time_section.png + no_2["2019-05-20":"2019-05-21"].plot(); + +By providing a **string that parses to a datetime**, a specific subset of the data can be selected on a ``DatetimeIndex``. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +More information on the ``DatetimeIndex`` and the slicing by using strings is provided in the section on :ref:`time series indexing `. + +.. raw:: html + +
    + +Resample a time series to another frequency +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. raw:: html + +
      +
    • + +Aggregate the current hourly time series values to the monthly maximum value in each of the stations. + +.. ipython:: python + + monthly_max = no_2.resample("M").max() + monthly_max + +A very powerful method on time series data with a datetime index, is the +ability to :meth:`~Series.resample` time series to another frequency (e.g., +converting secondly data into 5-minutely data). + +.. raw:: html + +
    • +
    + +The :meth:`~Series.resample` method is similar to a groupby operation: + +- it provides a time-based grouping, by using a string (e.g. ``M``, + ``5H``,…) that defines the target frequency +- it requires an aggregation function such as ``mean``, ``max``,… + +.. raw:: html + +
    + To user guide + +An overview of the aliases used to define time series frequencies is given in the :ref:`offset aliases overview table `. + +.. raw:: html + +
    + +When defined, the frequency of the time series is provided by the +``freq`` attribute: + +.. ipython:: python + + monthly_max.index.freq + +.. raw:: html + +
      +
    • + +Make a plot of the daily median :math:`NO_2` value in each of the stations. + +.. ipython:: python + :okwarning: + + @savefig 09_resample_mean.png + no_2.resample("D").mean().plot(style="-o", figsize=(10, 5)); + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +More details on the power of time series ``resampling`` is provided in the user gudie section on :ref:`resampling `. + +.. raw:: html + +
    + +.. raw:: html + +
    +

    REMEMBER

    + +- Valid date strings can be converted to datetime objects using + ``to_datetime`` function or as part of read functions. +- Datetime objects in pandas supports calculations, logical operations + and convenient date-related properties using the ``dt`` accessor. +- A ``DatetimeIndex`` contains these date-related properties and + supports convenient slicing. +- ``Resample`` is a powerful method to change the frequency of a time + series. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A full overview on time series is given in the pages on :ref:`time series and date functionality `. + +.. raw:: html + +
    \ No newline at end of file diff --git a/doc/source/getting_started/intro_tutorials/10_text_data.rst b/doc/source/getting_started/intro_tutorials/10_text_data.rst new file mode 100644 index 00000000..3ff64875 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/10_text_data.rst @@ -0,0 +1,278 @@ +.. _10min_tut_10_text: + +{{ header }} + +.. ipython:: python + + import pandas as pd + +.. raw:: html + +
    +
    +
    + Data used for this tutorial: +
    +
    +
      +
    • + +
      +
      +

      + +This tutorial uses the titanic data set, stored as CSV. The data +consists of the following data columns: + +- PassengerId: Id of every passenger. +- Survived: This feature have value 0 and 1. 0 for not survived and 1 + for survived. +- Pclass: There are 3 classes: Class 1, Class 2 and Class 3. +- Name: Name of passenger. +- Sex: Gender of passenger. +- Age: Age of passenger. +- SibSp: Indication that passenger have siblings and spouse. +- Parch: Whether a passenger is alone or have family. +- Ticket: Ticket number of passenger. +- Fare: Indicating the fare. +- Cabin: The cabin of passenger. +- Embarked: The embarked category. + +.. raw:: html + +

      + To raw data +
      +
      + +.. ipython:: python + + titanic = pd.read_csv("data/titanic.csv") + titanic.head() + +.. raw:: html + +
    • +
    +
    + +How to manipulate textual data? +------------------------------- + +.. raw:: html + +
      +
    • + +Make all name characters lowercase + +.. ipython:: python + + titanic["Name"].str.lower() + +To make each of the strings in the ``Name`` column lowercase, select the ``Name`` column +(see :ref:`tutorial on selection of data <10min_tut_03_subset>`), add the ``str`` accessor and +apply the ``lower`` method. As such, each of the strings is converted element wise. + +.. raw:: html + +
    • +
    + +Similar to datetime objects in the :ref:`time series tutorial <10min_tut_09_timeseries>` +having a ``dt`` accessor, a number of +specialized string methods are available when using the ``str`` +accessor. These methods have in general matching names with the +equivalent built-in string methods for single elements, but are applied +element-wise (remember :ref:`element wise calculations <10min_tut_05_columns>`?) +on each of the values of the columns. + +.. raw:: html + +
      +
    • + +Create a new column ``Surname`` that contains the surname of the Passengers by extracting the part before the comma. + +.. ipython:: python + + titanic["Name"].str.split(",") + +Using the :meth:`Series.str.split` method, each of the values is returned as a list of +2 elements. The first element is the part before the comma and the +second element the part after the comma. + +.. ipython:: python + + titanic["Surname"] = titanic["Name"].str.split(",").str.get(0) + titanic["Surname"] + +As we are only interested in the first part representing the surname +(element 0), we can again use the ``str`` accessor and apply :meth:`Series.str.get` to +extract the relevant part. Indeed, these string functions can be +concatenated to combine multiple functions at once! + +.. raw:: html + +
    • +
    + +.. raw:: html + +
    + To user guide + +More information on extracting parts of strings is available in the user guide section on :ref:`splitting and replacing strings `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +Extract the passenger data about the Countess on board of the Titanic. + +.. ipython:: python + + titanic["Name"].str.contains("Countess") + +.. ipython:: python + + titanic[titanic["Name"].str.contains("Countess")] + +(*Interested in her story? See*\ `Wikipedia `__\ *!*) + +The string method :meth:`Series.str.contains` checks for each of the values in the +column ``Name`` if the string contains the word ``Countess`` and returns +for each of the values ``True`` (``Countess`` is part of the name) of +``False`` (``Countess`` is notpart of the name). This output can be used +to subselect the data using conditional (boolean) indexing introduced in +the :ref:`subsetting of data tutorial <10min_tut_03_subset>`. As there was +only 1 Countess on the Titanic, we get one row as a result. + +.. raw:: html + +
    • +
    + +.. note:: + More powerful extractions on strings is supported, as the + :meth:`Series.str.contains` and :meth:`Series.str.extract` methods accepts `regular + expressions `__, but out of + scope of this tutorial. + +.. raw:: html + +
    + To user guide + +More information on extracting parts of strings is available in the user guide section on :ref:`string matching and extracting `. + +.. raw:: html + +
    + +.. raw:: html + +
      +
    • + +Which passenger of the titanic has the longest name? + +.. ipython:: python + + titanic["Name"].str.len() + +To get the longest name we first have to get the lenghts of each of the +names in the ``Name`` column. By using pandas string methods, the +:meth:`Series.str.len` function is applied to each of the names individually +(element-wise). + +.. ipython:: python + + titanic["Name"].str.len().idxmax() + +Next, we need to get the corresponding location, preferably the index +label, in the table for which the name length is the largest. The +:meth:`~Series.idxmax`` method does exactly that. It is not a string method and is +applied to integers, so no ``str`` is used. + +.. ipython:: python + + titanic.loc[titanic["Name"].str.len().idxmax(), "Name"] + +Based on the index name of the row (``307``) and the column (``Name``), +we can do a selection using the ``loc`` operator, introduced in the +`tutorial on subsetting <3_subset_data.ipynb>`__. + +.. raw:: html + +
    • +
    + +.. raw:: html + +
      +
    • + +In the ‘Sex’ columns, replace values of ‘male’ by ‘M’ and all ‘female’ values by ‘F’ + +.. ipython:: python + + titanic["Sex_short"] = titanic["Sex"].replace({"male": "M", + "female": "F"}) + titanic["Sex_short"] + +Whereas :meth:`~Series.replace` is not a string method, it provides a convenient way +to use mappings or vocabularies to translate certain values. It requires +a ``dictionary`` to define the mapping ``{from : to}``. + +.. raw:: html + +
    • +
    + +.. warning:: + There is also a :meth:`~Series.str.replace` methods available to replace a + specific set of characters. However, when having a mapping of multiple + values, this would become: + + :: + + titanic["Sex_short"] = titanic["Sex"].str.replace("female", "F") + titanic["Sex_short"] = titanic["Sex_short"].str.replace("male", "M") + + This would become cumbersome and easily lead to mistakes. Just think (or + try out yourself) what would happen if those two statements are applied + in the opposite order… + +.. raw:: html + +
    +

    REMEMBER

    + +- String methods are available using the ``str`` accessor. +- String methods work element wise and can be used for conditional + indexing. +- The ``replace`` method is a convenient method to convert values + according to a given dictionary. + +.. raw:: html + +
    + +.. raw:: html + +
    + To user guide + +A full overview is provided in the user guide pages on :ref:`working with text data `. + +.. raw:: html + +
    diff --git a/doc/source/getting_started/intro_tutorials/index.rst b/doc/source/getting_started/intro_tutorials/index.rst new file mode 100644 index 00000000..28e76108 --- /dev/null +++ b/doc/source/getting_started/intro_tutorials/index.rst @@ -0,0 +1,22 @@ +{{ header }} + +.. _10times1minute: + +========================= +Getting started tutorials +========================= + +.. toctree:: + :maxdepth: 1 + + 01_table_oriented + 02_read_write + 03_subset_data + 04_plotting + 05_add_columns + 06_calculate_statistics + 07_reshape_table_layout + 08_combine_dataframes + 09_timeseries + 10_text_data + diff --git a/doc/source/getting_started/overview.rst b/doc/source/getting_started/overview.rst new file mode 100644 index 00000000..d8a40c54 --- /dev/null +++ b/doc/source/getting_started/overview.rst @@ -0,0 +1,177 @@ +.. _overview: + +{{ header }} + +**************** +Package overview +**************** + +**pandas** is a `Python `__ package providing fast, +flexible, and expressive data structures designed to make working with +"relational" or "labeled" data both easy and intuitive. It aims to be the +fundamental high-level building block for doing practical, **real world** data +analysis in Python. Additionally, it has the broader goal of becoming **the +most powerful and flexible open source data analysis / manipulation tool +available in any language**. It is already well on its way toward this goal. + +pandas is well suited for many different kinds of data: + + - Tabular data with heterogeneously-typed columns, as in an SQL table or + Excel spreadsheet + - Ordered and unordered (not necessarily fixed-frequency) time series data. + - Arbitrary matrix data (homogeneously typed or heterogeneous) with row and + column labels + - Any other form of observational / statistical data sets. The data actually + need not be labeled at all to be placed into a pandas data structure + +The two primary data structures of pandas, :class:`Series` (1-dimensional) +and :class:`DataFrame` (2-dimensional), handle the vast majority of typical use +cases in finance, statistics, social science, and many areas of +engineering. For R users, :class:`DataFrame` provides everything that R's +``data.frame`` provides and much more. pandas is built on top of `NumPy +`__ and is intended to integrate well within a scientific +computing environment with many other 3rd party libraries. + +Here are just a few of the things that pandas does well: + + - Easy handling of **missing data** (represented as NaN) in floating point as + well as non-floating point data + - Size mutability: columns can be **inserted and deleted** from DataFrame and + higher dimensional objects + - Automatic and explicit **data alignment**: objects can be explicitly + aligned to a set of labels, or the user can simply ignore the labels and + let `Series`, `DataFrame`, etc. automatically align the data for you in + computations + - Powerful, flexible **group by** functionality to perform + split-apply-combine operations on data sets, for both aggregating and + transforming data + - Make it **easy to convert** ragged, differently-indexed data in other + Python and NumPy data structures into DataFrame objects + - Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets + - Intuitive **merging** and **joining** data sets + - Flexible **reshaping** and pivoting of data sets + - **Hierarchical** labeling of axes (possible to have multiple labels per + tick) + - Robust IO tools for loading data from **flat files** (CSV and delimited), + Excel files, databases, and saving / loading data from the ultrafast **HDF5 + format** + - **Time series**-specific functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + +Many of these principles are here to address the shortcomings frequently +experienced using other languages / scientific research environments. For data +scientists, working with data is typically divided into multiple stages: +munging and cleaning data, analyzing / modeling it, then organizing the results +of the analysis into a form suitable for plotting or tabular display. pandas +is the ideal tool for all of these tasks. + +Some other notes + + - pandas is **fast**. Many of the low-level algorithmic bits have been + extensively tweaked in `Cython `__ code. However, as with + anything else generalization usually sacrifices performance. So if you focus + on one feature for your application you may be able to create a faster + specialized tool. + + - pandas is a dependency of `statsmodels + `__, making it an important part of the + statistical computing ecosystem in Python. + + - pandas has been used extensively in production in financial applications. + +Data structures +--------------- + +.. csv-table:: + :header: "Dimensions", "Name", "Description" + :widths: 15, 20, 50 + + 1, "Series", "1D labeled homogeneously-typed array" + 2, "DataFrame", "General 2D labeled, size-mutable tabular structure with potentially heterogeneously-typed column" + +Why more than one data structure? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The best way to think about the pandas data structures is as flexible +containers for lower dimensional data. For example, DataFrame is a container +for Series, and Series is a container for scalars. We would like to be +able to insert and remove objects from these containers in a dictionary-like +fashion. + +Also, we would like sensible default behaviors for the common API functions +which take into account the typical orientation of time series and +cross-sectional data sets. When using ndarrays to store 2- and 3-dimensional +data, a burden is placed on the user to consider the orientation of the data +set when writing functions; axes are considered more or less equivalent (except +when C- or Fortran-contiguousness matters for performance). In pandas, the axes +are intended to lend more semantic meaning to the data; i.e., for a particular +data set there is likely to be a "right" way to orient the data. The goal, +then, is to reduce the amount of mental effort required to code up data +transformations in downstream functions. + +For example, with tabular data (DataFrame) it is more semantically helpful to +think of the **index** (the rows) and the **columns** rather than axis 0 and +axis 1. Iterating through the columns of the DataFrame thus results in more +readable code: + +:: + + for col in df.columns: + series = df[col] + # do something with series + +Mutability and copying of data +------------------------------ + +All pandas data structures are value-mutable (the values they contain can be +altered) but not always size-mutable. The length of a Series cannot be +changed, but, for example, columns can be inserted into a DataFrame. However, +the vast majority of methods produce new objects and leave the input data +untouched. In general we like to **favor immutability** where sensible. + +Getting support +--------------- + +The first stop for pandas issues and ideas is the `Github Issue Tracker +`__. If you have a general question, +pandas community experts can answer through `Stack Overflow +`__. + +Community +--------- + +pandas is actively supported today by a community of like-minded individuals around +the world who contribute their valuable time and energy to help make open source +pandas possible. Thanks to `all of our contributors `__. + +If you're interested in contributing, please visit the :ref:`contributing guide `. + +pandas is a `NumFOCUS `__ sponsored project. +This will help ensure the success of development of pandas as a world-class open-source +project, and makes it possible to `donate `__ to the project. + +Project governance +------------------ + +The governance process that pandas project has used informally since its inception in 2008 is formalized in `Project Governance documents `__. +The documents clarify how decisions are made and how the various elements of our community interact, including the relationship between open source collaborative development and work that may be funded by for-profit or non-profit entities. + +Wes McKinney is the Benevolent Dictator for Life (BDFL). + +Development team +----------------- + +The list of the Core Team members and more detailed information can be found on the `people’s page `__ of the governance repo. + + +Institutional partners +---------------------- + +The information about current institutional partners can be found on `pandas website page `__. + +License +------- + +.. literalinclude:: ../../../LICENSE + diff --git a/doc/source/getting_started/tutorials.rst b/doc/source/getting_started/tutorials.rst new file mode 100644 index 00000000..1ed0e8f6 --- /dev/null +++ b/doc/source/getting_started/tutorials.rst @@ -0,0 +1,109 @@ +.. _tutorials: + +{{ header }} + +********* +Tutorials +********* + +This is a guide to many pandas tutorials, geared mainly for new users. + +Internal guides +=============== + +pandas' own :ref:`10 Minutes to pandas<10min>`. + +More complex recipes are in the :ref:`Cookbook`. + +A handy pandas `cheat sheet `_. + +Community guides +================ + +pandas Cookbook by Julia Evans +------------------------------ + +The goal of this 2015 cookbook (by `Julia Evans `_) is to +give you some concrete examples for getting started with pandas. These +are examples with real-world data, and all the bugs and weirdness that +entails. +For the table of contents, see the `pandas-cookbook GitHub +repository `_. + +Learn Pandas by Hernan Rojas +---------------------------- + +A set of lesson for new pandas users: https://bitbucket.org/hrojas/learn-pandas + +Practical data analysis with Python +----------------------------------- + +This `guide `_ is an introduction to the data analysis process using the Python data ecosystem and an interesting open dataset. +There are four sections covering selected topics as `munging data `__, +`aggregating data `_, `visualizing data `_ +and `time series `_. + +.. _tutorial-exercises-new-users: + +Exercises for new users +----------------------- +Practice your skills with real data sets and exercises. +For more resources, please visit the main `repository `__. + + +.. _tutorial-modern: + +Modern pandas +------------- + +Tutorial series written in 2016 by +`Tom Augspurger `_. +The source may be found in the GitHub repository +`TomAugspurger/effective-pandas `_. + +* `Modern Pandas `_ +* `Method Chaining `_ +* `Indexes `_ +* `Performance `_ +* `Tidy Data `_ +* `Visualization `_ +* `Timeseries `_ + +Excel charts with pandas, vincent and xlsxwriter +------------------------------------------------ + +* `Using Pandas and XlsxWriter to create Excel charts `_ + +Video tutorials +--------------- + +* `Pandas From The Ground Up `_ + (2015) (2:24) + `GitHub repo `__ +* `Introduction Into Pandas `_ + (2016) (1:28) + `GitHub repo `__ +* `Pandas: .head() to .tail() `_ + (2016) (1:26) + `GitHub repo `__ +* `Data analysis in Python with pandas `_ + (2016-2018) + `GitHub repo `__ and + `Jupyter Notebook `__ +* `Best practices with pandas `_ + (2018) + `GitHub repo `__ and + `Jupyter Notebook `__ + + +Various tutorials +----------------- + +* `Wes McKinney's (pandas BDFL) blog `_ +* `Statistical analysis made easy in Python with SciPy and pandas DataFrames, by Randal Olson `_ +* `Statistical Data Analysis in Python, tutorial videos, by Christopher Fonnesbeck from SciPy 2013 `_ +* `Financial analysis in Python, by Thomas Wiecki `_ +* `Intro to pandas data structures, by Greg Reda `_ +* `Pandas and Python: Top 10, by Manish Amde `_ +* `Pandas DataFrames Tutorial, by Karlijn Willems `_ +* `A concise tutorial with real life examples `_ diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template new file mode 100644 index 00000000..4aba8f70 --- /dev/null +++ b/doc/source/index.rst.template @@ -0,0 +1,131 @@ +:notoc: + +.. pandas documentation master file, created by + +.. module:: pandas + +******************** +pandas documentation +******************** + +**Date**: |today| **Version**: |version| + +**Download documentation**: `PDF Version `__ | `Zipped HTML `__ + +**Useful links**: +`Binary Installers `__ | +`Source Repository `__ | +`Issues & Ideas `__ | +`Q&A Support `__ | +`Mailing List `__ + +:mod:`pandas` is an open source, BSD-licensed library providing high-performance, +easy-to-use data structures and data analysis tools for the `Python `__ +programming language. + +.. raw:: html + +
    +
    +
    +
    + getting started with pandas action icon +
    +
    Getting started
    +

    New to pandas? Check out the getting started guides. They + contain an introduction to pandas' main concepts and links to additional tutorials.

    + +.. container:: custom-button + + :ref:`To the getting started guides` + +.. raw:: html + +
    +
    +
    +
    +
    + pandas user guide action icon +
    +
    User guide
    +

    The user guide provides in-depth information on the + key concepts of pandas with useful background information and explanation.

    + +.. container:: custom-button + + :ref:`To the user guide` + +.. raw:: html + +
    +
    +
    +
    +
    + api of pandas action icon +
    +
    API reference
    +

    The reference guide contains a detailed description of + the pandas API. The reference describes how the methods work and which parameters can + be used. It assumes that you have an understanding of the key concepts.

    + +.. container:: custom-button + + :ref:`To the reference guide` + +.. raw:: html + +
    +
    +
    +
    +
    + contribute to pandas action icon +
    +
    Developer guide
    +

    Saw a typo in the documentation? Want to improve + existing functionalities? The contributing guidelines will guide + you through the process of improving pandas.

    + +.. container:: custom-button + + :ref:`To the development guide` + +.. raw:: html + +
    +
    +
    +
    +
    + + +{% if single_doc and single_doc.endswith('.rst') -%} +.. toctree:: + :maxdepth: 3 + :titlesonly: + + {{ single_doc[:-4] }} +{% elif single_doc %} +.. autosummary:: + :toctree: reference/api/ + + {{ single_doc }} +{% else -%} +.. toctree:: + :maxdepth: 3 + :hidden: + :titlesonly: +{% endif %} +{% if not single_doc %} + getting_started/index + user_guide/index + {% endif -%} + {% if include_api -%} + reference/index + {% endif -%} + {% if not single_doc -%} + development/index + whatsnew/index +{% endif %} diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst new file mode 100644 index 00000000..c71350ec --- /dev/null +++ b/doc/source/reference/arrays.rst @@ -0,0 +1,522 @@ +{{ header }} + +.. _api.arrays: + +============= +Pandas arrays +============= + +.. currentmodule:: pandas + +For most data types, pandas uses NumPy arrays as the concrete +objects contained with a :class:`Index`, :class:`Series`, or +:class:`DataFrame`. + +For some data types, pandas extends NumPy's type system. String aliases for these types +can be found at :ref:`basics.dtypes`. + +=================== ========================= ================== ============================= +Kind of Data Pandas Data Type Scalar Array +=================== ========================= ================== ============================= +TZ-aware datetime :class:`DatetimeTZDtype` :class:`Timestamp` :ref:`api.arrays.datetime` +Timedeltas (none) :class:`Timedelta` :ref:`api.arrays.timedelta` +Period (time spans) :class:`PeriodDtype` :class:`Period` :ref:`api.arrays.period` +Intervals :class:`IntervalDtype` :class:`Interval` :ref:`api.arrays.interval` +Nullable Integer :class:`Int64Dtype`, ... (none) :ref:`api.arrays.integer_na` +Categorical :class:`CategoricalDtype` (none) :ref:`api.arrays.categorical` +Sparse :class:`SparseDtype` (none) :ref:`api.arrays.sparse` +Strings :class:`StringDtype` :class:`str` :ref:`api.arrays.string` +Boolean (with NA) :class:`BooleanDtype` :class:`bool` :ref:`api.arrays.bool` +=================== ========================= ================== ============================= + +Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`). +The top-level :meth:`array` method can be used to create a new array, which may be +stored in a :class:`Series`, :class:`Index`, or as a column in a :class:`DataFrame`. + +.. autosummary:: + :toctree: api/ + + array + +.. _api.arrays.datetime: + +Datetime data +------------- + +NumPy cannot natively represent timezone-aware datetimes. Pandas supports this +with the :class:`arrays.DatetimeArray` extension array, which can hold timezone-naive +or timezone-aware values. + +:class:`Timestamp`, a subclass of :class:`datetime.datetime`, is pandas' +scalar type for timezone-naive or timezone-aware datetime data. + +.. autosummary:: + :toctree: api/ + + Timestamp + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Timestamp.asm8 + Timestamp.day + Timestamp.dayofweek + Timestamp.dayofyear + Timestamp.days_in_month + Timestamp.daysinmonth + Timestamp.fold + Timestamp.hour + Timestamp.is_leap_year + Timestamp.is_month_end + Timestamp.is_month_start + Timestamp.is_quarter_end + Timestamp.is_quarter_start + Timestamp.is_year_end + Timestamp.is_year_start + Timestamp.max + Timestamp.microsecond + Timestamp.min + Timestamp.minute + Timestamp.month + Timestamp.nanosecond + Timestamp.quarter + Timestamp.resolution + Timestamp.second + Timestamp.tz + Timestamp.tzinfo + Timestamp.value + Timestamp.week + Timestamp.weekofyear + Timestamp.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Timestamp.astimezone + Timestamp.ceil + Timestamp.combine + Timestamp.ctime + Timestamp.date + Timestamp.day_name + Timestamp.dst + Timestamp.floor + Timestamp.freq + Timestamp.freqstr + Timestamp.fromordinal + Timestamp.fromtimestamp + Timestamp.isocalendar + Timestamp.isoformat + Timestamp.isoweekday + Timestamp.month_name + Timestamp.normalize + Timestamp.now + Timestamp.replace + Timestamp.round + Timestamp.strftime + Timestamp.strptime + Timestamp.time + Timestamp.timestamp + Timestamp.timetuple + Timestamp.timetz + Timestamp.to_datetime64 + Timestamp.to_numpy + Timestamp.to_julian_date + Timestamp.to_period + Timestamp.to_pydatetime + Timestamp.today + Timestamp.toordinal + Timestamp.tz_convert + Timestamp.tz_localize + Timestamp.tzname + Timestamp.utcfromtimestamp + Timestamp.utcnow + Timestamp.utcoffset + Timestamp.utctimetuple + Timestamp.weekday + +A collection of timestamps may be stored in a :class:`arrays.DatetimeArray`. +For timezone-aware data, the ``.dtype`` of a ``DatetimeArray`` is a +:class:`DatetimeTZDtype`. For timezone-naive data, ``np.dtype("datetime64[ns]")`` +is used. + +If the data are tz-aware, then every value in the array must have the same timezone. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.DatetimeArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + DatetimeTZDtype + +.. _api.arrays.timedelta: + +Timedelta data +-------------- + +NumPy can natively represent timedeltas. Pandas provides :class:`Timedelta` +for symmetry with :class:`Timestamp`. + +.. autosummary:: + :toctree: api/ + + Timedelta + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Timedelta.asm8 + Timedelta.components + Timedelta.days + Timedelta.delta + Timedelta.freq + Timedelta.is_populated + Timedelta.max + Timedelta.microseconds + Timedelta.min + Timedelta.nanoseconds + Timedelta.resolution + Timedelta.seconds + Timedelta.value + Timedelta.view + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Timedelta.ceil + Timedelta.floor + Timedelta.isoformat + Timedelta.round + Timedelta.to_pytimedelta + Timedelta.to_timedelta64 + Timedelta.to_numpy + Timedelta.total_seconds + +A collection of timedeltas may be stored in a :class:`TimedeltaArray`. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.TimedeltaArray + +.. _api.arrays.period: + +Timespan data +------------- + +Pandas represents spans of times as :class:`Period` objects. + +Period +------ +.. autosummary:: + :toctree: api/ + + Period + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Period.day + Period.dayofweek + Period.dayofyear + Period.days_in_month + Period.daysinmonth + Period.end_time + Period.freq + Period.freqstr + Period.hour + Period.is_leap_year + Period.minute + Period.month + Period.ordinal + Period.quarter + Period.qyear + Period.second + Period.start_time + Period.week + Period.weekday + Period.weekofyear + Period.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Period.asfreq + Period.now + Period.strftime + Period.to_timestamp + +A collection of timedeltas may be stored in a :class:`arrays.PeriodArray`. +Every period in a ``PeriodArray`` must have the same ``freq``. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.PeriodArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + PeriodDtype + +.. _api.arrays.interval: + +Interval data +------------- + +Arbitrary intervals can be represented as :class:`Interval` objects. + +.. autosummary:: + :toctree: api/ + + Interval + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Interval.closed + Interval.closed_left + Interval.closed_right + Interval.is_empty + Interval.left + Interval.length + Interval.mid + Interval.open_left + Interval.open_right + Interval.overlaps + Interval.right + +A collection of intervals may be stored in an :class:`arrays.IntervalArray`. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.IntervalArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + IntervalDtype + + +.. Those attributes and methods are included in the API because the docstrings +.. of IntervalIndex and IntervalArray are shared. Including it here to make +.. sure a docstring page is built for them to avoid warnings + +.. + .. autosummary:: + :toctree: api/ + + arrays.IntervalArray.left + arrays.IntervalArray.right + arrays.IntervalArray.closed + arrays.IntervalArray.mid + arrays.IntervalArray.length + arrays.IntervalArray.is_empty + arrays.IntervalArray.is_non_overlapping_monotonic + arrays.IntervalArray.from_arrays + arrays.IntervalArray.from_tuples + arrays.IntervalArray.from_breaks + arrays.IntervalArray.contains + arrays.IntervalArray.overlaps + arrays.IntervalArray.set_closed + arrays.IntervalArray.to_tuples + + +.. _api.arrays.integer_na: + +Nullable integer +---------------- + +:class:`numpy.ndarray` cannot natively represent integer-data with missing values. +Pandas provides this through :class:`arrays.IntegerArray`. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.IntegerArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + Int8Dtype + Int16Dtype + Int32Dtype + Int64Dtype + UInt8Dtype + UInt16Dtype + UInt32Dtype + UInt64Dtype + +.. _api.arrays.categorical: + +Categorical data +---------------- + +Pandas defines a custom data type for representing data that can take only a +limited, fixed set of values. The dtype of a ``Categorical`` can be described by +a :class:`pandas.api.types.CategoricalDtype`. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + CategoricalDtype + +.. autosummary:: + :toctree: api/ + + CategoricalDtype.categories + CategoricalDtype.ordered + +Categorical data can be stored in a :class:`pandas.Categorical` + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + Categorical + +The alternative :meth:`Categorical.from_codes` constructor can be used when you +have the categories and integer codes already: + +.. autosummary:: + :toctree: api/ + + Categorical.from_codes + +The dtype information is available on the ``Categorical`` + +.. autosummary:: + :toctree: api/ + + Categorical.dtype + Categorical.categories + Categorical.ordered + Categorical.codes + +``np.asarray(categorical)`` works by implementing the array interface. Be aware, that this converts +the Categorical back to a NumPy array, so categories and order information is not preserved! + +.. autosummary:: + :toctree: api/ + + Categorical.__array__ + +A ``Categorical`` can be stored in a ``Series`` or ``DataFrame``. +To create a Series of dtype ``category``, use ``cat = s.astype(dtype)`` or +``Series(..., dtype=dtype)`` where ``dtype`` is either + +* the string ``'category'`` +* an instance of :class:`~pandas.api.types.CategoricalDtype`. + +If the Series is of dtype ``CategoricalDtype``, ``Series.cat`` can be used to change the categorical +data. See :ref:`api.series.cat` for more. + +.. _api.arrays.sparse: + +Sparse data +----------- + +Data where a single value is repeated many times (e.g. ``0`` or ``NaN``) may +be stored efficiently as a :class:`arrays.SparseArray`. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.SparseArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + SparseDtype + +The ``Series.sparse`` accessor may be used to access sparse-specific attributes +and methods if the :class:`Series` contains sparse values. See +:ref:`api.series.sparse` for more. + + +.. _api.arrays.string: + +Text data +--------- + +When working with text data, where each valid element is a string or missing, +we recommend using :class:`StringDtype` (with the alias ``"string"``). + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.StringArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + StringDtype + +The ``Series.str`` accessor is available for ``Series`` backed by a :class:`arrays.StringArray`. +See :ref:`api.series.str` for more. + + +.. _api.arrays.bool: + +Boolean data with missing values +-------------------------------- + +The boolean dtype (with the alias ``"boolean"``) provides support for storing +boolean data (True, False values) with missing values, which is not possible +with a bool :class:`numpy.ndarray`. + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + arrays.BooleanArray + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + BooleanDtype + + +.. Dtype attributes which are manually listed in their docstrings: including +.. it here to make sure a docstring page is built for them + +.. + .. autosummary:: + :toctree: api/ + + DatetimeTZDtype.unit + DatetimeTZDtype.tz + PeriodDtype.freq + IntervalDtype.subtype diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst new file mode 100644 index 00000000..78fdfbfd --- /dev/null +++ b/doc/source/reference/extensions.rst @@ -0,0 +1,74 @@ +{{ header }} + +.. _api.extensions: + +========== +Extensions +========== +.. currentmodule:: pandas + +These are primarily intended for library authors looking to extend pandas +objects. + +.. autosummary:: + :toctree: api/ + + api.extensions.register_extension_dtype + api.extensions.register_dataframe_accessor + api.extensions.register_series_accessor + api.extensions.register_index_accessor + api.extensions.ExtensionDtype + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + api.extensions.ExtensionArray + arrays.PandasArray + +.. We need this autosummary so that methods and attributes are generated. +.. Separate block, since they aren't classes. + + .. autosummary:: + :toctree: api/ + + api.extensions.ExtensionArray._concat_same_type + api.extensions.ExtensionArray._formatter + api.extensions.ExtensionArray._from_factorized + api.extensions.ExtensionArray._from_sequence + api.extensions.ExtensionArray._from_sequence_of_strings + api.extensions.ExtensionArray._ndarray_values + api.extensions.ExtensionArray._reduce + api.extensions.ExtensionArray._values_for_argsort + api.extensions.ExtensionArray._values_for_factorize + api.extensions.ExtensionArray.argsort + api.extensions.ExtensionArray.astype + api.extensions.ExtensionArray.copy + api.extensions.ExtensionArray.view + api.extensions.ExtensionArray.dropna + api.extensions.ExtensionArray.factorize + api.extensions.ExtensionArray.fillna + api.extensions.ExtensionArray.isna + api.extensions.ExtensionArray.ravel + api.extensions.ExtensionArray.repeat + api.extensions.ExtensionArray.searchsorted + api.extensions.ExtensionArray.shift + api.extensions.ExtensionArray.take + api.extensions.ExtensionArray.unique + api.extensions.ExtensionArray.dtype + api.extensions.ExtensionArray.nbytes + api.extensions.ExtensionArray.ndim + api.extensions.ExtensionArray.shape + +Additionally, we have some utility methods for ensuring your object +behaves correctly. + +.. autosummary:: + :toctree: api/ + + api.indexers.check_array_indexer + + +The sentinel ``pandas.api.extensions.no_default`` is used as the default +value in some methods. Use an ``is`` comparison to check if the user +provides a non-default value. diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst new file mode 100644 index 00000000..dd2af6e2 --- /dev/null +++ b/doc/source/reference/frame.rst @@ -0,0 +1,368 @@ +{{ header }} + +.. _api.dataframe: + +========= +DataFrame +========= +.. currentmodule:: pandas + +Constructor +~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame + +Attributes and underlying data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +**Axes** + +.. autosummary:: + :toctree: api/ + + DataFrame.index + DataFrame.columns + +.. autosummary:: + :toctree: api/ + + DataFrame.dtypes + DataFrame.select_dtypes + DataFrame.values + DataFrame.axes + DataFrame.ndim + DataFrame.size + DataFrame.shape + DataFrame.memory_usage + DataFrame.empty + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.astype + DataFrame.convert_dtypes + DataFrame.infer_objects + DataFrame.copy + DataFrame.isna + DataFrame.notna + DataFrame.bool + +Indexing, iteration +~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.head + DataFrame.at + DataFrame.iat + DataFrame.loc + DataFrame.iloc + DataFrame.insert + DataFrame.__iter__ + DataFrame.items + DataFrame.iteritems + DataFrame.keys + DataFrame.iterrows + DataFrame.itertuples + DataFrame.lookup + DataFrame.pop + DataFrame.tail + DataFrame.xs + DataFrame.get + DataFrame.isin + DataFrame.where + DataFrame.mask + DataFrame.query + +For more information on ``.at``, ``.iat``, ``.loc``, and +``.iloc``, see the :ref:`indexing documentation `. + +Binary operator functions +~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.add + DataFrame.sub + DataFrame.mul + DataFrame.div + DataFrame.truediv + DataFrame.floordiv + DataFrame.mod + DataFrame.pow + DataFrame.dot + DataFrame.radd + DataFrame.rsub + DataFrame.rmul + DataFrame.rdiv + DataFrame.rtruediv + DataFrame.rfloordiv + DataFrame.rmod + DataFrame.rpow + DataFrame.lt + DataFrame.gt + DataFrame.le + DataFrame.ge + DataFrame.ne + DataFrame.eq + DataFrame.combine + DataFrame.combine_first + +Function application, GroupBy & window +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.apply + DataFrame.applymap + DataFrame.pipe + DataFrame.agg + DataFrame.aggregate + DataFrame.transform + DataFrame.groupby + DataFrame.rolling + DataFrame.expanding + DataFrame.ewm + +.. _api.dataframe.stats: + +Computations / descriptive stats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.abs + DataFrame.all + DataFrame.any + DataFrame.clip + DataFrame.corr + DataFrame.corrwith + DataFrame.count + DataFrame.cov + DataFrame.cummax + DataFrame.cummin + DataFrame.cumprod + DataFrame.cumsum + DataFrame.describe + DataFrame.diff + DataFrame.eval + DataFrame.kurt + DataFrame.kurtosis + DataFrame.mad + DataFrame.max + DataFrame.mean + DataFrame.median + DataFrame.min + DataFrame.mode + DataFrame.pct_change + DataFrame.prod + DataFrame.product + DataFrame.quantile + DataFrame.rank + DataFrame.round + DataFrame.sem + DataFrame.skew + DataFrame.sum + DataFrame.std + DataFrame.var + DataFrame.nunique + +Reindexing / selection / label manipulation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.add_prefix + DataFrame.add_suffix + DataFrame.align + DataFrame.at_time + DataFrame.between_time + DataFrame.drop + DataFrame.drop_duplicates + DataFrame.duplicated + DataFrame.equals + DataFrame.filter + DataFrame.first + DataFrame.head + DataFrame.idxmax + DataFrame.idxmin + DataFrame.last + DataFrame.reindex + DataFrame.reindex_like + DataFrame.rename + DataFrame.rename_axis + DataFrame.reset_index + DataFrame.sample + DataFrame.set_axis + DataFrame.set_index + DataFrame.tail + DataFrame.take + DataFrame.truncate + +.. _api.dataframe.missing: + +Missing data handling +~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.dropna + DataFrame.fillna + DataFrame.replace + DataFrame.interpolate + +Reshaping, sorting, transposing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.droplevel + DataFrame.pivot + DataFrame.pivot_table + DataFrame.reorder_levels + DataFrame.sort_values + DataFrame.sort_index + DataFrame.nlargest + DataFrame.nsmallest + DataFrame.swaplevel + DataFrame.stack + DataFrame.unstack + DataFrame.swapaxes + DataFrame.melt + DataFrame.explode + DataFrame.squeeze + DataFrame.to_xarray + DataFrame.T + DataFrame.transpose + +Combining / joining / merging +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.append + DataFrame.assign + DataFrame.join + DataFrame.merge + DataFrame.update + +Time series-related +~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.asfreq + DataFrame.asof + DataFrame.shift + DataFrame.slice_shift + DataFrame.tshift + DataFrame.first_valid_index + DataFrame.last_valid_index + DataFrame.resample + DataFrame.to_period + DataFrame.to_timestamp + DataFrame.tz_convert + DataFrame.tz_localize + +.. _api.frame.metadata: + +Metadata +~~~~~~~~ + +:attr:`DataFrame.attrs` is a dictionary for storing global metadata for this DataFrame. + +.. warning:: ``DataFrame.attrs`` is considered experimental and may change without warning. + +.. autosummary:: + :toctree: api/ + + DataFrame.attrs + + +.. _api.dataframe.plotting: + +Plotting +~~~~~~~~ +``DataFrame.plot`` is both a callable method and a namespace attribute for +specific plotting methods of the form ``DataFrame.plot.``. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_callable.rst + + DataFrame.plot + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + DataFrame.plot.area + DataFrame.plot.bar + DataFrame.plot.barh + DataFrame.plot.box + DataFrame.plot.density + DataFrame.plot.hexbin + DataFrame.plot.hist + DataFrame.plot.kde + DataFrame.plot.line + DataFrame.plot.pie + DataFrame.plot.scatter + +.. autosummary:: + :toctree: api/ + + DataFrame.boxplot + DataFrame.hist + + +.. _api.frame.sparse: + +Sparse accessor +~~~~~~~~~~~~~~~ + +Sparse-dtype specific methods and attributes are provided under the +``DataFrame.sparse`` accessor. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + DataFrame.sparse.density + +.. autosummary:: + :toctree: api/ + + DataFrame.sparse.from_spmatrix + DataFrame.sparse.to_coo + DataFrame.sparse.to_dense + + +Serialization / IO / conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.from_dict + DataFrame.from_records + DataFrame.info + DataFrame.to_parquet + DataFrame.to_pickle + DataFrame.to_csv + DataFrame.to_hdf + DataFrame.to_sql + DataFrame.to_dict + DataFrame.to_excel + DataFrame.to_json + DataFrame.to_html + DataFrame.to_feather + DataFrame.to_latex + DataFrame.to_stata + DataFrame.to_gbq + DataFrame.to_records + DataFrame.to_string + DataFrame.to_clipboard + DataFrame.to_markdown + DataFrame.style diff --git a/doc/source/reference/general_functions.rst b/doc/source/reference/general_functions.rst new file mode 100644 index 00000000..b5832cb8 --- /dev/null +++ b/doc/source/reference/general_functions.rst @@ -0,0 +1,87 @@ +{{ header }} + +.. _api.general_functions: + +================= +General functions +================= +.. currentmodule:: pandas + +Data manipulations +~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + melt + pivot + pivot_table + crosstab + cut + qcut + merge + merge_ordered + merge_asof + concat + get_dummies + factorize + unique + wide_to_long + +Top-level missing data +~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + isna + isnull + notna + notnull + +Top-level conversions +~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + to_numeric + +Top-level dealing with datetimelike +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + to_datetime + to_timedelta + date_range + bdate_range + period_range + timedelta_range + infer_freq + +Top-level dealing with intervals +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + interval_range + +Top-level evaluation +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + eval + +Hashing +~~~~~~~ +.. autosummary:: + :toctree: api/ + + util.hash_array + util.hash_pandas_object + +Testing +~~~~~~~ +.. autosummary:: + :toctree: api/ + + test diff --git a/doc/source/reference/general_utility_functions.rst b/doc/source/reference/general_utility_functions.rst new file mode 100644 index 00000000..0d9e0b0f --- /dev/null +++ b/doc/source/reference/general_utility_functions.rst @@ -0,0 +1,110 @@ +{{ header }} + +.. _api.general_utility_functions: + +========================= +General utility functions +========================= +.. currentmodule:: pandas + +Working with options +-------------------- +.. autosummary:: + :toctree: api/ + + describe_option + reset_option + get_option + set_option + option_context + +.. _api.general.testing: + +Testing functions +----------------- +.. autosummary:: + :toctree: api/ + + testing.assert_frame_equal + testing.assert_series_equal + testing.assert_index_equal + testing.assert_extension_array_equal + +Exceptions and warnings +----------------------- +.. autosummary:: + :toctree: api/ + + errors.DtypeWarning + errors.EmptyDataError + errors.OutOfBoundsDatetime + errors.ParserError + errors.ParserWarning + errors.PerformanceWarning + errors.UnsortedIndexError + errors.UnsupportedFunctionCall + +Data types related functionality +-------------------------------- +.. autosummary:: + :toctree: api/ + + api.types.union_categoricals + api.types.infer_dtype + api.types.pandas_dtype + +Dtype introspection +~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + api.types.is_bool_dtype + api.types.is_categorical_dtype + api.types.is_complex_dtype + api.types.is_datetime64_any_dtype + api.types.is_datetime64_dtype + api.types.is_datetime64_ns_dtype + api.types.is_datetime64tz_dtype + api.types.is_extension_type + api.types.is_extension_array_dtype + api.types.is_float_dtype + api.types.is_int64_dtype + api.types.is_integer_dtype + api.types.is_interval_dtype + api.types.is_numeric_dtype + api.types.is_object_dtype + api.types.is_period_dtype + api.types.is_signed_integer_dtype + api.types.is_string_dtype + api.types.is_timedelta64_dtype + api.types.is_timedelta64_ns_dtype + api.types.is_unsigned_integer_dtype + api.types.is_sparse + +Iterable introspection +~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + api.types.is_dict_like + api.types.is_file_like + api.types.is_list_like + api.types.is_named_tuple + api.types.is_iterator + +Scalar introspection +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + api.types.is_bool + api.types.is_categorical + api.types.is_complex + api.types.is_float + api.types.is_hashable + api.types.is_integer + api.types.is_interval + api.types.is_number + api.types.is_re + api.types.is_re_compilable + api.types.is_scalar diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst new file mode 100644 index 00000000..921eb737 --- /dev/null +++ b/doc/source/reference/groupby.rst @@ -0,0 +1,137 @@ +{{ header }} + +.. _api.groupby: + +======= +GroupBy +======= +.. currentmodule:: pandas.core.groupby + +GroupBy objects are returned by groupby calls: :func:`pandas.DataFrame.groupby`, :func:`pandas.Series.groupby`, etc. + +Indexing, iteration +------------------- +.. autosummary:: + :toctree: api/ + + GroupBy.__iter__ + GroupBy.groups + GroupBy.indices + GroupBy.get_group + +.. currentmodule:: pandas + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + Grouper + +.. currentmodule:: pandas.core.groupby + +Function application +-------------------- +.. autosummary:: + :toctree: api/ + + GroupBy.apply + GroupBy.agg + GroupBy.aggregate + GroupBy.transform + GroupBy.pipe + +Computations / descriptive stats +-------------------------------- +.. autosummary:: + :toctree: api/ + + GroupBy.all + GroupBy.any + GroupBy.bfill + GroupBy.count + GroupBy.cumcount + GroupBy.cummax + GroupBy.cummin + GroupBy.cumprod + GroupBy.cumsum + GroupBy.ffill + GroupBy.first + GroupBy.head + GroupBy.last + GroupBy.max + GroupBy.mean + GroupBy.median + GroupBy.min + GroupBy.ngroup + GroupBy.nth + GroupBy.ohlc + GroupBy.prod + GroupBy.rank + GroupBy.pct_change + GroupBy.size + GroupBy.sem + GroupBy.std + GroupBy.sum + GroupBy.var + GroupBy.tail + +The following methods are available in both ``SeriesGroupBy`` and +``DataFrameGroupBy`` objects, but may differ slightly, usually in that +the ``DataFrameGroupBy`` version usually permits the specification of an +axis argument, and often an argument indicating whether to restrict +application to columns of a specific data type. + +.. autosummary:: + :toctree: api/ + + DataFrameGroupBy.all + DataFrameGroupBy.any + DataFrameGroupBy.bfill + DataFrameGroupBy.corr + DataFrameGroupBy.count + DataFrameGroupBy.cov + DataFrameGroupBy.cummax + DataFrameGroupBy.cummin + DataFrameGroupBy.cumprod + DataFrameGroupBy.cumsum + DataFrameGroupBy.describe + DataFrameGroupBy.diff + DataFrameGroupBy.ffill + DataFrameGroupBy.fillna + DataFrameGroupBy.filter + DataFrameGroupBy.hist + DataFrameGroupBy.idxmax + DataFrameGroupBy.idxmin + DataFrameGroupBy.mad + DataFrameGroupBy.nunique + DataFrameGroupBy.pct_change + DataFrameGroupBy.plot + DataFrameGroupBy.quantile + DataFrameGroupBy.rank + DataFrameGroupBy.resample + DataFrameGroupBy.shift + DataFrameGroupBy.size + DataFrameGroupBy.skew + DataFrameGroupBy.take + DataFrameGroupBy.tshift + +The following methods are available only for ``SeriesGroupBy`` objects. + +.. autosummary:: + :toctree: api/ + + SeriesGroupBy.nlargest + SeriesGroupBy.nsmallest + SeriesGroupBy.nunique + SeriesGroupBy.unique + SeriesGroupBy.value_counts + SeriesGroupBy.is_monotonic_increasing + SeriesGroupBy.is_monotonic_decreasing + +The following methods are available only for ``DataFrameGroupBy`` objects. + +.. autosummary:: + :toctree: api/ + + DataFrameGroupBy.corrwith + DataFrameGroupBy.boxplot diff --git a/doc/source/reference/index.rst b/doc/source/reference/index.rst new file mode 100644 index 00000000..9d5649c3 --- /dev/null +++ b/doc/source/reference/index.rst @@ -0,0 +1,72 @@ +{{ header }} + +.. _api: + +============= +API reference +============= + +This page gives an overview of all public pandas objects, functions and +methods. All classes and functions exposed in ``pandas.*`` namespace are public. + +Some subpackages are public which include ``pandas.errors``, +``pandas.plotting``, and ``pandas.testing``. Public functions in +``pandas.io`` and ``pandas.tseries`` submodules are mentioned in +the documentation. ``pandas.api.types`` subpackage holds some +public functions related to data types in pandas. + +.. warning:: + + The ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed. + +.. If you update this toctree, also update the manual toctree in the + main index.rst.template + +.. toctree:: + :maxdepth: 2 + + io + general_functions + series + frame + arrays + panel + indexing + offset_frequency + window + groupby + resampling + style + plotting + general_utility_functions + extensions + +.. This is to prevent warnings in the doc build. We don't want to encourage +.. these methods. + +.. + .. toctree:: + + api/pandas.DataFrame.blocks + api/pandas.DataFrame.as_matrix + api/pandas.Index.asi8 + api/pandas.Index.data + api/pandas.Index.flags + api/pandas.Index.holds_integer + api/pandas.Index.is_type_compatible + api/pandas.Index.nlevels + api/pandas.Index.sort + api/pandas.Series.asobject + api/pandas.Series.blocks + api/pandas.Series.from_array + api/pandas.Series.imag + api/pandas.Series.real + + +.. Can't convince sphinx to generate toctree for this class attribute. +.. So we do it manually to avoid a warning + +.. + .. toctree:: + + api/pandas.api.extensions.ExtensionDtype.na_value diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst new file mode 100644 index 00000000..ab6ea5ae --- /dev/null +++ b/doc/source/reference/indexing.rst @@ -0,0 +1,490 @@ +{{ header }} + +.. _api.indexing: + +============= +Index objects +============= + +Index +----- +.. currentmodule:: pandas + +**Many of these methods or variants thereof are available on the objects +that contain an index (Series/DataFrame) and those should most likely be +used before calling these methods directly.** + +.. autosummary:: + :toctree: api/ + + Index + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.values + Index.is_monotonic + Index.is_monotonic_increasing + Index.is_monotonic_decreasing + Index.is_unique + Index.has_duplicates + Index.hasnans + Index.dtype + Index.inferred_type + Index.is_all_dates + Index.shape + Index.name + Index.names + Index.nbytes + Index.ndim + Index.size + Index.empty + Index.T + Index.memory_usage + +Modifying and computations +~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.all + Index.any + Index.argmin + Index.argmax + Index.copy + Index.delete + Index.drop + Index.drop_duplicates + Index.duplicated + Index.equals + Index.factorize + Index.identical + Index.insert + Index.is_ + Index.is_boolean + Index.is_categorical + Index.is_floating + Index.is_integer + Index.is_interval + Index.is_mixed + Index.is_numeric + Index.is_object + Index.min + Index.max + Index.reindex + Index.rename + Index.repeat + Index.where + Index.take + Index.putmask + Index.unique + Index.nunique + Index.value_counts + +Compatibility with MultiIndex +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.set_names + Index.droplevel + +Missing values +~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.fillna + Index.dropna + Index.isna + Index.notna + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.astype + Index.item + Index.map + Index.ravel + Index.to_list + Index.to_native_types + Index.to_series + Index.to_frame + Index.view + +Sorting +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.argsort + Index.searchsorted + Index.sort_values + +Time-specific operations +~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.shift + +Combining / joining / set operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.append + Index.join + Index.intersection + Index.union + Index.difference + Index.symmetric_difference + +Selecting +~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.asof + Index.asof_locs + Index.get_indexer + Index.get_indexer_for + Index.get_indexer_non_unique + Index.get_level_values + Index.get_loc + Index.get_slice_bound + Index.get_value + Index.isin + Index.slice_indexer + Index.slice_locs + +.. _api.numericindex: + +Numeric Index +------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + RangeIndex + Int64Index + UInt64Index + Float64Index + +.. We need this autosummary so that the methods are generated. +.. Separate block, since they aren't classes. + +.. autosummary:: + :toctree: api/ + + RangeIndex.start + RangeIndex.stop + RangeIndex.step + RangeIndex.from_range + +.. _api.categoricalindex: + +CategoricalIndex +---------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + CategoricalIndex + +Categorical components +~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CategoricalIndex.codes + CategoricalIndex.categories + CategoricalIndex.ordered + CategoricalIndex.rename_categories + CategoricalIndex.reorder_categories + CategoricalIndex.add_categories + CategoricalIndex.remove_categories + CategoricalIndex.remove_unused_categories + CategoricalIndex.set_categories + CategoricalIndex.as_ordered + CategoricalIndex.as_unordered + +Modifying and computations +~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CategoricalIndex.map + CategoricalIndex.equals + +.. _api.intervalindex: + +IntervalIndex +------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + IntervalIndex + +IntervalIndex components +~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + IntervalIndex.from_arrays + IntervalIndex.from_tuples + IntervalIndex.from_breaks + IntervalIndex.left + IntervalIndex.right + IntervalIndex.mid + IntervalIndex.closed + IntervalIndex.length + IntervalIndex.values + IntervalIndex.is_empty + IntervalIndex.is_non_overlapping_monotonic + IntervalIndex.is_overlapping + IntervalIndex.get_loc + IntervalIndex.get_indexer + IntervalIndex.set_closed + IntervalIndex.contains + IntervalIndex.overlaps + IntervalIndex.to_tuples + +.. _api.multiindex: + +MultiIndex +---------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + MultiIndex + +.. autosummary:: + :toctree: api/ + + IndexSlice + +MultiIndex constructors +~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MultiIndex.from_arrays + MultiIndex.from_tuples + MultiIndex.from_product + MultiIndex.from_frame + +MultiIndex properties +~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MultiIndex.names + MultiIndex.levels + MultiIndex.codes + MultiIndex.nlevels + MultiIndex.levshape + +MultiIndex components +~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MultiIndex.set_levels + MultiIndex.set_codes + MultiIndex.to_flat_index + MultiIndex.to_frame + MultiIndex.is_lexsorted + MultiIndex.sortlevel + MultiIndex.droplevel + MultiIndex.swaplevel + MultiIndex.reorder_levels + MultiIndex.remove_unused_levels + +MultiIndex selecting +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MultiIndex.get_loc + MultiIndex.get_locs + MultiIndex.get_loc_level + MultiIndex.get_indexer + MultiIndex.get_level_values + +.. _api.datetimeindex: + +DatetimeIndex +------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + DatetimeIndex + +Time/Date components +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.year + DatetimeIndex.month + DatetimeIndex.day + DatetimeIndex.hour + DatetimeIndex.minute + DatetimeIndex.second + DatetimeIndex.microsecond + DatetimeIndex.nanosecond + DatetimeIndex.date + DatetimeIndex.time + DatetimeIndex.timetz + DatetimeIndex.dayofyear + DatetimeIndex.weekofyear + DatetimeIndex.week + DatetimeIndex.dayofweek + DatetimeIndex.weekday + DatetimeIndex.quarter + DatetimeIndex.tz + DatetimeIndex.freq + DatetimeIndex.freqstr + DatetimeIndex.is_month_start + DatetimeIndex.is_month_end + DatetimeIndex.is_quarter_start + DatetimeIndex.is_quarter_end + DatetimeIndex.is_year_start + DatetimeIndex.is_year_end + DatetimeIndex.is_leap_year + DatetimeIndex.inferred_freq + +Selecting +~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.indexer_at_time + DatetimeIndex.indexer_between_time + + +Time-specific operations +~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.normalize + DatetimeIndex.strftime + DatetimeIndex.snap + DatetimeIndex.tz_convert + DatetimeIndex.tz_localize + DatetimeIndex.round + DatetimeIndex.floor + DatetimeIndex.ceil + DatetimeIndex.month_name + DatetimeIndex.day_name + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.to_period + DatetimeIndex.to_perioddelta + DatetimeIndex.to_pydatetime + DatetimeIndex.to_series + DatetimeIndex.to_frame + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.mean + +TimedeltaIndex +-------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + TimedeltaIndex + +Components +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + TimedeltaIndex.days + TimedeltaIndex.seconds + TimedeltaIndex.microseconds + TimedeltaIndex.nanoseconds + TimedeltaIndex.components + TimedeltaIndex.inferred_freq + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + TimedeltaIndex.to_pytimedelta + TimedeltaIndex.to_series + TimedeltaIndex.round + TimedeltaIndex.floor + TimedeltaIndex.ceil + TimedeltaIndex.to_frame + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + TimedeltaIndex.mean + +.. currentmodule:: pandas + +PeriodIndex +----------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + PeriodIndex + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + PeriodIndex.day + PeriodIndex.dayofweek + PeriodIndex.dayofyear + PeriodIndex.days_in_month + PeriodIndex.daysinmonth + PeriodIndex.end_time + PeriodIndex.freq + PeriodIndex.freqstr + PeriodIndex.hour + PeriodIndex.is_leap_year + PeriodIndex.minute + PeriodIndex.month + PeriodIndex.quarter + PeriodIndex.qyear + PeriodIndex.second + PeriodIndex.start_time + PeriodIndex.week + PeriodIndex.weekday + PeriodIndex.weekofyear + PeriodIndex.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + PeriodIndex.asfreq + PeriodIndex.strftime + PeriodIndex.to_timestamp diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst new file mode 100644 index 00000000..0037d4a4 --- /dev/null +++ b/doc/source/reference/io.rst @@ -0,0 +1,152 @@ +{{ header }} + +.. _api.io: + +============ +Input/output +============ +.. currentmodule:: pandas + +Pickling +~~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_pickle + +Flat file +~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_table + read_csv + read_fwf + +Clipboard +~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_clipboard + +Excel +~~~~~ +.. autosummary:: + :toctree: api/ + + read_excel + ExcelFile.parse + +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + ExcelWriter + +JSON +~~~~ +.. autosummary:: + :toctree: api/ + + read_json + json_normalize + +.. currentmodule:: pandas.io.json + +.. autosummary:: + :toctree: api/ + + build_table_schema + +.. currentmodule:: pandas + +HTML +~~~~ +.. autosummary:: + :toctree: api/ + + read_html + +HDFStore: PyTables (HDF5) +~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_hdf + HDFStore.put + HDFStore.append + HDFStore.get + HDFStore.select + HDFStore.info + HDFStore.keys + HDFStore.groups + HDFStore.walk + +Feather +~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_feather + +Parquet +~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_parquet + +ORC +~~~ +.. autosummary:: + :toctree: api/ + + read_orc + +SAS +~~~ +.. autosummary:: + :toctree: api/ + + read_sas + +SPSS +~~~~ +.. autosummary:: + :toctree: api/ + + read_spss + +SQL +~~~ +.. autosummary:: + :toctree: api/ + + read_sql_table + read_sql_query + read_sql + +Google BigQuery +~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + read_gbq + +STATA +~~~~~ +.. autosummary:: + :toctree: api/ + + read_stata + +.. currentmodule:: pandas.io.stata + +.. autosummary:: + :toctree: api/ + + StataReader.data_label + StataReader.value_labels + StataReader.variable_labels + StataWriter.write_file diff --git a/doc/source/reference/offset_frequency.rst b/doc/source/reference/offset_frequency.rst new file mode 100644 index 00000000..17544cb7 --- /dev/null +++ b/doc/source/reference/offset_frequency.rst @@ -0,0 +1,1522 @@ +{{ header }} + +.. _api.dateoffsets: + +============ +Date offsets +============ +.. currentmodule:: pandas.tseries.offsets + +DateOffset +---------- +.. autosummary:: + :toctree: api/ + + DateOffset + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DateOffset.freqstr + DateOffset.kwds + DateOffset.name + DateOffset.nanos + DateOffset.normalize + DateOffset.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + DateOffset.apply + DateOffset.copy + DateOffset.isAnchored + DateOffset.onOffset + DateOffset.is_anchored + DateOffset.is_on_offset + DateOffset.__call__ + +BusinessDay +----------- +.. autosummary:: + :toctree: api/ + + BusinessDay + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessDay.freqstr + BusinessDay.kwds + BusinessDay.name + BusinessDay.nanos + BusinessDay.normalize + BusinessDay.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessDay.apply + BusinessDay.apply_index + BusinessDay.copy + BusinessDay.isAnchored + BusinessDay.onOffset + BusinessDay.is_anchored + BusinessDay.is_on_offset + BusinessDay.__call__ + +BusinessHour +------------ +.. autosummary:: + :toctree: api/ + + BusinessHour + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessHour.freqstr + BusinessHour.kwds + BusinessHour.name + BusinessHour.nanos + BusinessHour.normalize + BusinessHour.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessHour.apply + BusinessHour.copy + BusinessHour.isAnchored + BusinessHour.onOffset + BusinessHour.is_anchored + BusinessHour.is_on_offset + BusinessHour.__call__ + +CustomBusinessDay +----------------- +.. autosummary:: + :toctree: api/ + + CustomBusinessDay + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessDay.freqstr + CustomBusinessDay.kwds + CustomBusinessDay.name + CustomBusinessDay.nanos + CustomBusinessDay.normalize + CustomBusinessDay.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessDay.apply + CustomBusinessDay.copy + CustomBusinessDay.isAnchored + CustomBusinessDay.onOffset + CustomBusinessDay.is_anchored + CustomBusinessDay.is_on_offset + CustomBusinessDay.__call__ + +CustomBusinessHour +------------------ +.. autosummary:: + :toctree: api/ + + CustomBusinessHour + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessHour.freqstr + CustomBusinessHour.kwds + CustomBusinessHour.name + CustomBusinessHour.nanos + CustomBusinessHour.normalize + CustomBusinessHour.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessHour.apply + CustomBusinessHour.copy + CustomBusinessHour.isAnchored + CustomBusinessHour.onOffset + CustomBusinessHour.is_anchored + CustomBusinessHour.is_on_offset + CustomBusinessHour.__call__ + +MonthOffset +----------- +.. autosummary:: + :toctree: api/ + + MonthOffset + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MonthOffset.freqstr + MonthOffset.kwds + MonthOffset.name + MonthOffset.nanos + MonthOffset.normalize + MonthOffset.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + MonthOffset.apply + MonthOffset.apply_index + MonthOffset.copy + MonthOffset.isAnchored + MonthOffset.onOffset + MonthOffset.is_anchored + MonthOffset.is_on_offset + MonthOffset.__call__ + +MonthEnd +-------- +.. autosummary:: + :toctree: api/ + + MonthEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MonthEnd.freqstr + MonthEnd.kwds + MonthEnd.name + MonthEnd.nanos + MonthEnd.normalize + MonthEnd.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + MonthEnd.apply + MonthEnd.apply_index + MonthEnd.copy + MonthEnd.isAnchored + MonthEnd.onOffset + MonthEnd.is_anchored + MonthEnd.is_on_offset + MonthEnd.__call__ + +MonthBegin +---------- +.. autosummary:: + :toctree: api/ + + MonthBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MonthBegin.freqstr + MonthBegin.kwds + MonthBegin.name + MonthBegin.nanos + MonthBegin.normalize + MonthBegin.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + MonthBegin.apply + MonthBegin.apply_index + MonthBegin.copy + MonthBegin.isAnchored + MonthBegin.onOffset + MonthBegin.is_anchored + MonthBegin.is_on_offset + MonthBegin.__call__ + +BusinessMonthEnd +---------------- +.. autosummary:: + :toctree: api/ + + BusinessMonthEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessMonthEnd.freqstr + BusinessMonthEnd.kwds + BusinessMonthEnd.name + BusinessMonthEnd.nanos + BusinessMonthEnd.normalize + BusinessMonthEnd.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessMonthEnd.apply + BusinessMonthEnd.apply_index + BusinessMonthEnd.copy + BusinessMonthEnd.isAnchored + BusinessMonthEnd.onOffset + BusinessMonthEnd.is_anchored + BusinessMonthEnd.is_on_offset + BusinessMonthEnd.__call__ + +BusinessMonthBegin +------------------ +.. autosummary:: + :toctree: api/ + + BusinessMonthBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessMonthBegin.freqstr + BusinessMonthBegin.kwds + BusinessMonthBegin.name + BusinessMonthBegin.nanos + BusinessMonthBegin.normalize + BusinessMonthBegin.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BusinessMonthBegin.apply + BusinessMonthBegin.apply_index + BusinessMonthBegin.copy + BusinessMonthBegin.isAnchored + BusinessMonthBegin.onOffset + BusinessMonthBegin.is_anchored + BusinessMonthBegin.is_on_offset + BusinessMonthBegin.__call__ + +CustomBusinessMonthEnd +---------------------- +.. autosummary:: + :toctree: api/ + + CustomBusinessMonthEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessMonthEnd.freqstr + CustomBusinessMonthEnd.kwds + CustomBusinessMonthEnd.m_offset + CustomBusinessMonthEnd.name + CustomBusinessMonthEnd.nanos + CustomBusinessMonthEnd.normalize + CustomBusinessMonthEnd.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessMonthEnd.apply + CustomBusinessMonthEnd.copy + CustomBusinessMonthEnd.isAnchored + CustomBusinessMonthEnd.onOffset + CustomBusinessMonthEnd.is_anchored + CustomBusinessMonthEnd.is_on_offset + CustomBusinessMonthEnd.__call__ + +CustomBusinessMonthBegin +------------------------ +.. autosummary:: + :toctree: api/ + + CustomBusinessMonthBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessMonthBegin.freqstr + CustomBusinessMonthBegin.kwds + CustomBusinessMonthBegin.m_offset + CustomBusinessMonthBegin.name + CustomBusinessMonthBegin.nanos + CustomBusinessMonthBegin.normalize + CustomBusinessMonthBegin.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + CustomBusinessMonthBegin.apply + CustomBusinessMonthBegin.copy + CustomBusinessMonthBegin.isAnchored + CustomBusinessMonthBegin.onOffset + CustomBusinessMonthBegin.is_anchored + CustomBusinessMonthBegin.is_on_offset + CustomBusinessMonthBegin.__call__ + +SemiMonthOffset +--------------- +.. autosummary:: + :toctree: api/ + + SemiMonthOffset + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + SemiMonthOffset.freqstr + SemiMonthOffset.kwds + SemiMonthOffset.name + SemiMonthOffset.nanos + SemiMonthOffset.normalize + SemiMonthOffset.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + SemiMonthOffset.apply + SemiMonthOffset.apply_index + SemiMonthOffset.copy + SemiMonthOffset.isAnchored + SemiMonthOffset.onOffset + SemiMonthOffset.is_anchored + SemiMonthOffset.is_on_offset + SemiMonthOffset.__call__ + +SemiMonthEnd +------------ +.. autosummary:: + :toctree: api/ + + SemiMonthEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + SemiMonthEnd.freqstr + SemiMonthEnd.kwds + SemiMonthEnd.name + SemiMonthEnd.nanos + SemiMonthEnd.normalize + SemiMonthEnd.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + SemiMonthEnd.apply + SemiMonthEnd.apply_index + SemiMonthEnd.copy + SemiMonthEnd.isAnchored + SemiMonthEnd.onOffset + SemiMonthEnd.is_anchored + SemiMonthEnd.is_on_offset + SemiMonthEnd.__call__ + +SemiMonthBegin +-------------- +.. autosummary:: + :toctree: api/ + + SemiMonthBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + SemiMonthBegin.freqstr + SemiMonthBegin.kwds + SemiMonthBegin.name + SemiMonthBegin.nanos + SemiMonthBegin.normalize + SemiMonthBegin.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + SemiMonthBegin.apply + SemiMonthBegin.apply_index + SemiMonthBegin.copy + SemiMonthBegin.isAnchored + SemiMonthBegin.onOffset + SemiMonthBegin.is_anchored + SemiMonthBegin.is_on_offset + SemiMonthBegin.__call__ + +Week +---- +.. autosummary:: + :toctree: api/ + + Week + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Week.freqstr + Week.kwds + Week.name + Week.nanos + Week.normalize + Week.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Week.apply + Week.apply_index + Week.copy + Week.isAnchored + Week.onOffset + Week.is_anchored + Week.is_on_offset + Week.__call__ + +WeekOfMonth +----------- +.. autosummary:: + :toctree: api/ + + WeekOfMonth + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + WeekOfMonth.freqstr + WeekOfMonth.kwds + WeekOfMonth.name + WeekOfMonth.nanos + WeekOfMonth.normalize + WeekOfMonth.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + WeekOfMonth.apply + WeekOfMonth.copy + WeekOfMonth.isAnchored + WeekOfMonth.onOffset + WeekOfMonth.is_anchored + WeekOfMonth.is_on_offset + WeekOfMonth.__call__ + +LastWeekOfMonth +--------------- +.. autosummary:: + :toctree: api/ + + LastWeekOfMonth + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + LastWeekOfMonth.freqstr + LastWeekOfMonth.kwds + LastWeekOfMonth.name + LastWeekOfMonth.nanos + LastWeekOfMonth.normalize + LastWeekOfMonth.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + LastWeekOfMonth.apply + LastWeekOfMonth.copy + LastWeekOfMonth.isAnchored + LastWeekOfMonth.onOffset + LastWeekOfMonth.is_anchored + LastWeekOfMonth.is_on_offset + LastWeekOfMonth.__call__ + +QuarterOffset +------------- +.. autosummary:: + :toctree: api/ + + QuarterOffset + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + QuarterOffset.freqstr + QuarterOffset.kwds + QuarterOffset.name + QuarterOffset.nanos + QuarterOffset.normalize + QuarterOffset.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + QuarterOffset.apply + QuarterOffset.apply_index + QuarterOffset.copy + QuarterOffset.isAnchored + QuarterOffset.onOffset + QuarterOffset.is_anchored + QuarterOffset.is_on_offset + QuarterOffset.__call__ + +BQuarterEnd +----------- +.. autosummary:: + :toctree: api/ + + BQuarterEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BQuarterEnd.freqstr + BQuarterEnd.kwds + BQuarterEnd.name + BQuarterEnd.nanos + BQuarterEnd.normalize + BQuarterEnd.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BQuarterEnd.apply + BQuarterEnd.apply_index + BQuarterEnd.copy + BQuarterEnd.isAnchored + BQuarterEnd.onOffset + BQuarterEnd.is_anchored + BQuarterEnd.is_on_offset + BQuarterEnd.__call__ + +BQuarterBegin +------------- +.. autosummary:: + :toctree: api/ + + BQuarterBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BQuarterBegin.freqstr + BQuarterBegin.kwds + BQuarterBegin.name + BQuarterBegin.nanos + BQuarterBegin.normalize + BQuarterBegin.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BQuarterBegin.apply + BQuarterBegin.apply_index + BQuarterBegin.copy + BQuarterBegin.isAnchored + BQuarterBegin.onOffset + BQuarterBegin.is_anchored + BQuarterBegin.is_on_offset + BQuarterBegin.__call__ + +QuarterEnd +---------- +.. autosummary:: + :toctree: api/ + + QuarterEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + QuarterEnd.freqstr + QuarterEnd.kwds + QuarterEnd.name + QuarterEnd.nanos + QuarterEnd.normalize + QuarterEnd.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + QuarterEnd.apply + QuarterEnd.apply_index + QuarterEnd.copy + QuarterEnd.isAnchored + QuarterEnd.onOffset + QuarterEnd.is_anchored + QuarterEnd.is_on_offset + QuarterEnd.__call__ + +QuarterBegin +------------ +.. autosummary:: + :toctree: api/ + + QuarterBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + QuarterBegin.freqstr + QuarterBegin.kwds + QuarterBegin.name + QuarterBegin.nanos + QuarterBegin.normalize + QuarterBegin.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + QuarterBegin.apply + QuarterBegin.apply_index + QuarterBegin.copy + QuarterBegin.isAnchored + QuarterBegin.onOffset + QuarterBegin.is_anchored + QuarterBegin.is_on_offset + QuarterBegin.__call__ + +YearOffset +---------- +.. autosummary:: + :toctree: api/ + + YearOffset + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + YearOffset.freqstr + YearOffset.kwds + YearOffset.name + YearOffset.nanos + YearOffset.normalize + YearOffset.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + YearOffset.apply + YearOffset.apply_index + YearOffset.copy + YearOffset.isAnchored + YearOffset.onOffset + YearOffset.is_anchored + YearOffset.is_on_offset + YearOffset.__call__ + +BYearEnd +-------- +.. autosummary:: + :toctree: api/ + + BYearEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BYearEnd.freqstr + BYearEnd.kwds + BYearEnd.name + BYearEnd.nanos + BYearEnd.normalize + BYearEnd.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BYearEnd.apply + BYearEnd.apply_index + BYearEnd.copy + BYearEnd.isAnchored + BYearEnd.onOffset + BYearEnd.is_anchored + BYearEnd.is_on_offset + BYearEnd.__call__ + +BYearBegin +---------- +.. autosummary:: + :toctree: api/ + + BYearBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BYearBegin.freqstr + BYearBegin.kwds + BYearBegin.name + BYearBegin.nanos + BYearBegin.normalize + BYearBegin.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BYearBegin.apply + BYearBegin.apply_index + BYearBegin.copy + BYearBegin.isAnchored + BYearBegin.onOffset + BYearBegin.is_anchored + BYearBegin.is_on_offset + BYearBegin.__call__ + +YearEnd +------- +.. autosummary:: + :toctree: api/ + + YearEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + YearEnd.freqstr + YearEnd.kwds + YearEnd.name + YearEnd.nanos + YearEnd.normalize + YearEnd.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + YearEnd.apply + YearEnd.apply_index + YearEnd.copy + YearEnd.isAnchored + YearEnd.onOffset + YearEnd.is_anchored + YearEnd.is_on_offset + YearEnd.__call__ + +YearBegin +--------- +.. autosummary:: + :toctree: api/ + + YearBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + YearBegin.freqstr + YearBegin.kwds + YearBegin.name + YearBegin.nanos + YearBegin.normalize + YearBegin.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + YearBegin.apply + YearBegin.apply_index + YearBegin.copy + YearBegin.isAnchored + YearBegin.onOffset + YearBegin.is_anchored + YearBegin.is_on_offset + YearBegin.__call__ + +FY5253 +------ +.. autosummary:: + :toctree: api/ + + FY5253 + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + FY5253.freqstr + FY5253.kwds + FY5253.name + FY5253.nanos + FY5253.normalize + FY5253.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + FY5253.apply + FY5253.copy + FY5253.get_rule_code_suffix + FY5253.get_year_end + FY5253.isAnchored + FY5253.onOffset + FY5253.is_anchored + FY5253.is_on_offset + FY5253.__call__ + +FY5253Quarter +------------- +.. autosummary:: + :toctree: api/ + + FY5253Quarter + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + FY5253Quarter.freqstr + FY5253Quarter.kwds + FY5253Quarter.name + FY5253Quarter.nanos + FY5253Quarter.normalize + FY5253Quarter.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + FY5253Quarter.apply + FY5253Quarter.copy + FY5253Quarter.get_weeks + FY5253Quarter.isAnchored + FY5253Quarter.onOffset + FY5253Quarter.is_anchored + FY5253Quarter.is_on_offset + FY5253Quarter.year_has_extra_week + FY5253Quarter.__call__ + +Easter +------ +.. autosummary:: + :toctree: api/ + + Easter + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Easter.freqstr + Easter.kwds + Easter.name + Easter.nanos + Easter.normalize + Easter.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Easter.apply + Easter.copy + Easter.isAnchored + Easter.onOffset + Easter.is_anchored + Easter.is_on_offset + Easter.__call__ + +Tick +---- +.. autosummary:: + :toctree: api/ + + Tick + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Tick.delta + Tick.freqstr + Tick.kwds + Tick.name + Tick.nanos + Tick.normalize + Tick.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Tick.copy + Tick.isAnchored + Tick.onOffset + Tick.is_anchored + Tick.is_on_offset + Tick.__call__ + +Day +--- +.. autosummary:: + :toctree: api/ + + Day + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Day.delta + Day.freqstr + Day.kwds + Day.name + Day.nanos + Day.normalize + Day.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Day.copy + Day.isAnchored + Day.onOffset + Day.is_anchored + Day.is_on_offset + Day.__call__ + +Hour +---- +.. autosummary:: + :toctree: api/ + + Hour + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Hour.delta + Hour.freqstr + Hour.kwds + Hour.name + Hour.nanos + Hour.normalize + Hour.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Hour.copy + Hour.isAnchored + Hour.onOffset + Hour.is_anchored + Hour.is_on_offset + Hour.__call__ + +Minute +------ +.. autosummary:: + :toctree: api/ + + Minute + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Minute.delta + Minute.freqstr + Minute.kwds + Minute.name + Minute.nanos + Minute.normalize + Minute.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Minute.copy + Minute.isAnchored + Minute.onOffset + Minute.is_anchored + Minute.is_on_offset + Minute.__call__ + +Second +------ +.. autosummary:: + :toctree: api/ + + Second + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Second.delta + Second.freqstr + Second.kwds + Second.name + Second.nanos + Second.normalize + Second.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Second.copy + Second.isAnchored + Second.onOffset + Second.is_anchored + Second.is_on_offset + Second.__call__ + +Milli +----- +.. autosummary:: + :toctree: api/ + + Milli + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Milli.delta + Milli.freqstr + Milli.kwds + Milli.name + Milli.nanos + Milli.normalize + Milli.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Milli.copy + Milli.isAnchored + Milli.onOffset + Milli.is_anchored + Milli.is_on_offset + Milli.__call__ + +Micro +----- +.. autosummary:: + :toctree: api/ + + Micro + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Micro.delta + Micro.freqstr + Micro.kwds + Micro.name + Micro.nanos + Micro.normalize + Micro.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Micro.copy + Micro.isAnchored + Micro.onOffset + Micro.is_anchored + Micro.is_on_offset + Micro.__call__ + +Nano +---- +.. autosummary:: + :toctree: api/ + + Nano + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Nano.delta + Nano.freqstr + Nano.kwds + Nano.name + Nano.nanos + Nano.normalize + Nano.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Nano.copy + Nano.isAnchored + Nano.onOffset + Nano.is_anchored + Nano.is_on_offset + Nano.__call__ + +BDay +---- +.. autosummary:: + :toctree: api/ + + BDay + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BDay.base + BDay.freqstr + BDay.kwds + BDay.name + BDay.nanos + BDay.normalize + BDay.offset + BDay.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BDay.apply + BDay.apply_index + BDay.copy + BDay.isAnchored + BDay.onOffset + BDay.is_anchored + BDay.is_on_offset + BDay.rollback + BDay.rollforward + BDay.__call__ + +BMonthEnd +--------- +.. autosummary:: + :toctree: api/ + + BMonthEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BMonthEnd.base + BMonthEnd.freqstr + BMonthEnd.kwds + BMonthEnd.name + BMonthEnd.nanos + BMonthEnd.normalize + BMonthEnd.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BMonthEnd.apply + BMonthEnd.apply_index + BMonthEnd.copy + BMonthEnd.isAnchored + BMonthEnd.onOffset + BMonthEnd.is_anchored + BMonthEnd.is_on_offset + BMonthEnd.rollback + BMonthEnd.rollforward + BMonthEnd.__call__ + +BMonthBegin +----------- +.. autosummary:: + :toctree: api/ + + BMonthBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + BMonthBegin.base + BMonthBegin.freqstr + BMonthBegin.kwds + BMonthBegin.name + BMonthBegin.nanos + BMonthBegin.normalize + BMonthBegin.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + BMonthBegin.apply + BMonthBegin.apply_index + BMonthBegin.copy + BMonthBegin.isAnchored + BMonthBegin.onOffset + BMonthBegin.is_anchored + BMonthBegin.is_on_offset + BMonthBegin.rollback + BMonthBegin.rollforward + BMonthBegin.__call__ + +CBMonthEnd +---------- +.. autosummary:: + :toctree: api/ + + CBMonthEnd + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CBMonthEnd.base + CBMonthEnd.cbday_roll + CBMonthEnd.freqstr + CBMonthEnd.kwds + CBMonthEnd.m_offset + CBMonthEnd.month_roll + CBMonthEnd.name + CBMonthEnd.nanos + CBMonthEnd.normalize + CBMonthEnd.offset + CBMonthEnd.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + CBMonthEnd.apply + CBMonthEnd.apply_index + CBMonthEnd.copy + CBMonthEnd.isAnchored + CBMonthEnd.onOffset + CBMonthEnd.is_anchored + CBMonthEnd.is_on_offset + CBMonthEnd.rollback + CBMonthEnd.rollforward + CBMonthEnd.__call__ + +CBMonthBegin +------------ +.. autosummary:: + :toctree: api/ + + CBMonthBegin + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CBMonthBegin.base + CBMonthBegin.cbday_roll + CBMonthBegin.freqstr + CBMonthBegin.kwds + CBMonthBegin.m_offset + CBMonthBegin.month_roll + CBMonthBegin.name + CBMonthBegin.nanos + CBMonthBegin.normalize + CBMonthBegin.offset + CBMonthBegin.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + CBMonthBegin.apply + CBMonthBegin.apply_index + CBMonthBegin.copy + CBMonthBegin.isAnchored + CBMonthBegin.onOffset + CBMonthBegin.is_anchored + CBMonthBegin.is_on_offset + CBMonthBegin.rollback + CBMonthBegin.rollforward + CBMonthBegin.__call__ + +CDay +---- +.. autosummary:: + :toctree: api/ + + CDay + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CDay.base + CDay.freqstr + CDay.kwds + CDay.name + CDay.nanos + CDay.normalize + CDay.offset + CDay.rule_code + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + CDay.apply + CDay.apply_index + CDay.copy + CDay.isAnchored + CDay.onOffset + CDay.is_anchored + CDay.is_on_offset + CDay.rollback + CDay.rollforward + CDay.__call__ + +.. _api.frequencies: + +=========== +Frequencies +=========== +.. currentmodule:: pandas.tseries.frequencies + +.. _api.offsets: + +.. autosummary:: + :toctree: api/ + + to_offset diff --git a/doc/source/reference/panel.rst b/doc/source/reference/panel.rst new file mode 100644 index 00000000..94bfe87f --- /dev/null +++ b/doc/source/reference/panel.rst @@ -0,0 +1,10 @@ +{{ header }} + +.. _api.panel: + +===== +Panel +===== +.. currentmodule:: pandas + +`Panel` was removed in 0.25.0. For prior documentation, see the `0.24 documentation `_ diff --git a/doc/source/reference/plotting.rst b/doc/source/reference/plotting.rst new file mode 100644 index 00000000..95657dfa --- /dev/null +++ b/doc/source/reference/plotting.rst @@ -0,0 +1,26 @@ +{{ header }} + +.. _api.plotting: + +======== +Plotting +======== +.. currentmodule:: pandas.plotting + +The following functions are contained in the `pandas.plotting` module. + +.. autosummary:: + :toctree: api/ + + andrews_curves + autocorrelation_plot + bootstrap_plot + boxplot + deregister_matplotlib_converters + lag_plot + parallel_coordinates + plot_params + radviz + register_matplotlib_converters + scatter_matrix + table diff --git a/doc/source/reference/resampling.rst b/doc/source/reference/resampling.rst new file mode 100644 index 00000000..57263139 --- /dev/null +++ b/doc/source/reference/resampling.rst @@ -0,0 +1,66 @@ +{{ header }} + +.. _api.resampling: + +========== +Resampling +========== +.. currentmodule:: pandas.core.resample + +Resampler objects are returned by resample calls: :func:`pandas.DataFrame.resample`, :func:`pandas.Series.resample`. + +Indexing, iteration +~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Resampler.__iter__ + Resampler.groups + Resampler.indices + Resampler.get_group + +Function application +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Resampler.apply + Resampler.aggregate + Resampler.transform + Resampler.pipe + +Upsampling +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Resampler.ffill + Resampler.backfill + Resampler.bfill + Resampler.pad + Resampler.nearest + Resampler.fillna + Resampler.asfreq + Resampler.interpolate + +Computations / descriptive stats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Resampler.count + Resampler.nunique + Resampler.first + Resampler.last + Resampler.max + Resampler.mean + Resampler.median + Resampler.min + Resampler.ohlc + Resampler.prod + Resampler.size + Resampler.sem + Resampler.std + Resampler.sum + Resampler.var + Resampler.quantile diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst new file mode 100644 index 00000000..1a69fa07 --- /dev/null +++ b/doc/source/reference/series.rst @@ -0,0 +1,584 @@ +{{ header }} + +.. _api.series: + +====== +Series +====== +.. currentmodule:: pandas + +Constructor +----------- +.. autosummary:: + :toctree: api/ + + Series + +Attributes +---------- +**Axes** + +.. autosummary:: + :toctree: api/ + + Series.index + +.. autosummary:: + :toctree: api/ + + Series.array + Series.values + Series.dtype + Series.shape + Series.nbytes + Series.ndim + Series.size + Series.T + Series.memory_usage + Series.hasnans + Series.empty + Series.dtypes + Series.name + +Conversion +---------- +.. autosummary:: + :toctree: api/ + + Series.astype + Series.convert_dtypes + Series.infer_objects + Series.copy + Series.bool + Series.to_numpy + Series.to_period + Series.to_timestamp + Series.to_list + Series.__array__ + +Indexing, iteration +------------------- +.. autosummary:: + :toctree: api/ + + Series.get + Series.at + Series.iat + Series.loc + Series.iloc + Series.__iter__ + Series.items + Series.iteritems + Series.keys + Series.pop + Series.item + Series.xs + +For more information on ``.at``, ``.iat``, ``.loc``, and +``.iloc``, see the :ref:`indexing documentation `. + +Binary operator functions +------------------------- +.. autosummary:: + :toctree: api/ + + Series.add + Series.sub + Series.mul + Series.div + Series.truediv + Series.floordiv + Series.mod + Series.pow + Series.radd + Series.rsub + Series.rmul + Series.rdiv + Series.rtruediv + Series.rfloordiv + Series.rmod + Series.rpow + Series.combine + Series.combine_first + Series.round + Series.lt + Series.gt + Series.le + Series.ge + Series.ne + Series.eq + Series.product + Series.dot + +Function application, groupby & window +-------------------------------------- +.. autosummary:: + :toctree: api/ + + Series.apply + Series.agg + Series.aggregate + Series.transform + Series.map + Series.groupby + Series.rolling + Series.expanding + Series.ewm + Series.pipe + +.. _api.series.stats: + +Computations / descriptive stats +-------------------------------- +.. autosummary:: + :toctree: api/ + + Series.abs + Series.all + Series.any + Series.autocorr + Series.between + Series.clip + Series.corr + Series.count + Series.cov + Series.cummax + Series.cummin + Series.cumprod + Series.cumsum + Series.describe + Series.diff + Series.factorize + Series.kurt + Series.mad + Series.max + Series.mean + Series.median + Series.min + Series.mode + Series.nlargest + Series.nsmallest + Series.pct_change + Series.prod + Series.quantile + Series.rank + Series.sem + Series.skew + Series.std + Series.sum + Series.var + Series.kurtosis + Series.unique + Series.nunique + Series.is_unique + Series.is_monotonic + Series.is_monotonic_increasing + Series.is_monotonic_decreasing + Series.value_counts + +Reindexing / selection / label manipulation +------------------------------------------- +.. autosummary:: + :toctree: api/ + + Series.align + Series.drop + Series.droplevel + Series.drop_duplicates + Series.duplicated + Series.equals + Series.first + Series.head + Series.idxmax + Series.idxmin + Series.isin + Series.last + Series.reindex + Series.reindex_like + Series.rename + Series.rename_axis + Series.reset_index + Series.sample + Series.set_axis + Series.take + Series.tail + Series.truncate + Series.where + Series.mask + Series.add_prefix + Series.add_suffix + Series.filter + +Missing data handling +--------------------- +.. autosummary:: + :toctree: api/ + + Series.isna + Series.notna + Series.dropna + Series.fillna + Series.interpolate + +Reshaping, sorting +------------------ +.. autosummary:: + :toctree: api/ + + Series.argsort + Series.argmin + Series.argmax + Series.reorder_levels + Series.sort_values + Series.sort_index + Series.swaplevel + Series.unstack + Series.explode + Series.searchsorted + Series.ravel + Series.repeat + Series.squeeze + Series.view + +Combining / joining / merging +----------------------------- +.. autosummary:: + :toctree: api/ + + Series.append + Series.replace + Series.update + +Time series-related +------------------- +.. autosummary:: + :toctree: api/ + + Series.asfreq + Series.asof + Series.shift + Series.first_valid_index + Series.last_valid_index + Series.resample + Series.tz_convert + Series.tz_localize + Series.at_time + Series.between_time + Series.tshift + Series.slice_shift + +Accessors +--------- + +Pandas provides dtype-specific methods under various accessors. +These are separate namespaces within :class:`Series` that only apply +to specific data types. + +=========================== ================================= +Data Type Accessor +=========================== ================================= +Datetime, Timedelta, Period :ref:`dt ` +String :ref:`str ` +Categorical :ref:`cat ` +Sparse :ref:`sparse ` +=========================== ================================= + +.. _api.series.dt: + +Datetimelike properties +~~~~~~~~~~~~~~~~~~~~~~~ + +``Series.dt`` can be used to access the values of the series as +datetimelike and return several properties. +These can be accessed like ``Series.dt.``. + +Datetime properties +^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.dt.date + Series.dt.time + Series.dt.timetz + Series.dt.year + Series.dt.month + Series.dt.day + Series.dt.hour + Series.dt.minute + Series.dt.second + Series.dt.microsecond + Series.dt.nanosecond + Series.dt.week + Series.dt.weekofyear + Series.dt.dayofweek + Series.dt.weekday + Series.dt.dayofyear + Series.dt.quarter + Series.dt.is_month_start + Series.dt.is_month_end + Series.dt.is_quarter_start + Series.dt.is_quarter_end + Series.dt.is_year_start + Series.dt.is_year_end + Series.dt.is_leap_year + Series.dt.daysinmonth + Series.dt.days_in_month + Series.dt.tz + Series.dt.freq + +Datetime methods +^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.dt.to_period + Series.dt.to_pydatetime + Series.dt.tz_localize + Series.dt.tz_convert + Series.dt.normalize + Series.dt.strftime + Series.dt.round + Series.dt.floor + Series.dt.ceil + Series.dt.month_name + Series.dt.day_name + +Period properties +^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.dt.qyear + Series.dt.start_time + Series.dt.end_time + +Timedelta properties +^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.dt.days + Series.dt.seconds + Series.dt.microseconds + Series.dt.nanoseconds + Series.dt.components + +Timedelta methods +^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.dt.to_pytimedelta + Series.dt.total_seconds + + +.. _api.series.str: + +String handling +~~~~~~~~~~~~~~~ + +``Series.str`` can be used to access the values of the series as +strings and apply several methods to it. These can be accessed like +``Series.str.``. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.str.capitalize + Series.str.casefold + Series.str.cat + Series.str.center + Series.str.contains + Series.str.count + Series.str.decode + Series.str.encode + Series.str.endswith + Series.str.extract + Series.str.extractall + Series.str.find + Series.str.findall + Series.str.get + Series.str.index + Series.str.join + Series.str.len + Series.str.ljust + Series.str.lower + Series.str.lstrip + Series.str.match + Series.str.normalize + Series.str.pad + Series.str.partition + Series.str.repeat + Series.str.replace + Series.str.rfind + Series.str.rindex + Series.str.rjust + Series.str.rpartition + Series.str.rstrip + Series.str.slice + Series.str.slice_replace + Series.str.split + Series.str.rsplit + Series.str.startswith + Series.str.strip + Series.str.swapcase + Series.str.title + Series.str.translate + Series.str.upper + Series.str.wrap + Series.str.zfill + Series.str.isalnum + Series.str.isalpha + Series.str.isdigit + Series.str.isspace + Series.str.islower + Series.str.isupper + Series.str.istitle + Series.str.isnumeric + Series.str.isdecimal + Series.str.get_dummies + +.. + The following is needed to ensure the generated pages are created with the + correct template (otherwise they would be created in the Series/Index class page) + +.. + .. autosummary:: + :toctree: api/ + :template: autosummary/accessor.rst + + Series.str + Series.cat + Series.dt + Series.sparse + DataFrame.sparse + Index.str + +.. _api.series.cat: + +Categorical accessor +~~~~~~~~~~~~~~~~~~~~ + +Categorical-dtype specific methods and attributes are available under +the ``Series.cat`` accessor. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.cat.categories + Series.cat.ordered + Series.cat.codes + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.cat.rename_categories + Series.cat.reorder_categories + Series.cat.add_categories + Series.cat.remove_categories + Series.cat.remove_unused_categories + Series.cat.set_categories + Series.cat.as_ordered + Series.cat.as_unordered + + +.. _api.series.sparse: + +Sparse accessor +~~~~~~~~~~~~~~~ + +Sparse-dtype specific methods and attributes are provided under the +``Series.sparse`` accessor. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.sparse.npoints + Series.sparse.density + Series.sparse.fill_value + Series.sparse.sp_values + +.. autosummary:: + :toctree: api/ + + Series.sparse.from_coo + Series.sparse.to_coo + + +.. _api.series.metadata: + +Metadata +~~~~~~~~ + +:attr:`Series.attrs` is a dictionary for storing global metadata for this Series. + +.. warning:: ``Series.attrs`` is considered experimental and may change without warning. + +.. autosummary:: + :toctree: api/ + + Series.attrs + + +Plotting +-------- +``Series.plot`` is both a callable method and a namespace attribute for +specific plotting methods of the form ``Series.plot.``. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_callable.rst + + Series.plot + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.plot.area + Series.plot.bar + Series.plot.barh + Series.plot.box + Series.plot.density + Series.plot.hist + Series.plot.kde + Series.plot.line + Series.plot.pie + +.. autosummary:: + :toctree: api/ + + Series.hist + +Serialization / IO / conversion +------------------------------- +.. autosummary:: + :toctree: api/ + + Series.to_pickle + Series.to_csv + Series.to_dict + Series.to_excel + Series.to_frame + Series.to_xarray + Series.to_hdf + Series.to_sql + Series.to_json + Series.to_string + Series.to_clipboard + Series.to_latex + Series.to_markdown diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst new file mode 100644 index 00000000..24a47336 --- /dev/null +++ b/doc/source/reference/style.rst @@ -0,0 +1,67 @@ +{{ header }} + +.. _api.style: + +===== +Style +===== +.. currentmodule:: pandas.io.formats.style + +``Styler`` objects are returned by :attr:`pandas.DataFrame.style`. + +Styler constructor +------------------ +.. autosummary:: + :toctree: api/ + + Styler + Styler.from_custom_template + +Styler properties +----------------- +.. autosummary:: + :toctree: api/ + + Styler.env + Styler.template + Styler.loader + +Style application +----------------- +.. autosummary:: + :toctree: api/ + + Styler.apply + Styler.applymap + Styler.where + Styler.format + Styler.set_precision + Styler.set_table_styles + Styler.set_table_attributes + Styler.set_caption + Styler.set_properties + Styler.set_uuid + Styler.set_na_rep + Styler.clear + Styler.pipe + +Builtin styles +-------------- +.. autosummary:: + :toctree: api/ + + Styler.highlight_max + Styler.highlight_min + Styler.highlight_null + Styler.background_gradient + Styler.bar + +Style export and import +----------------------- +.. autosummary:: + :toctree: api/ + + Styler.render + Styler.export + Styler.use + Styler.to_excel diff --git a/doc/source/reference/window.rst b/doc/source/reference/window.rst new file mode 100644 index 00000000..3db1aa12 --- /dev/null +++ b/doc/source/reference/window.rst @@ -0,0 +1,87 @@ +{{ header }} + +.. _api.window: + +====== +Window +====== + +Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc. +Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc. +EWM objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc. + +Standard moving window functions +-------------------------------- +.. currentmodule:: pandas.core.window.rolling + +.. autosummary:: + :toctree: api/ + + Rolling.count + Rolling.sum + Rolling.mean + Rolling.median + Rolling.var + Rolling.std + Rolling.min + Rolling.max + Rolling.corr + Rolling.cov + Rolling.skew + Rolling.kurt + Rolling.apply + Rolling.aggregate + Rolling.quantile + Window.mean + Window.sum + Window.var + Window.std + +.. _api.functions_expanding: + +Standard expanding window functions +----------------------------------- +.. currentmodule:: pandas.core.window.expanding + +.. autosummary:: + :toctree: api/ + + Expanding.count + Expanding.sum + Expanding.mean + Expanding.median + Expanding.var + Expanding.std + Expanding.min + Expanding.max + Expanding.corr + Expanding.cov + Expanding.skew + Expanding.kurt + Expanding.apply + Expanding.aggregate + Expanding.quantile + +Exponentially-weighted moving window functions +---------------------------------------------- +.. currentmodule:: pandas.core.window.ewm + +.. autosummary:: + :toctree: api/ + + EWM.mean + EWM.std + EWM.var + EWM.corr + EWM.cov + +Window Indexer +-------------- +.. currentmodule:: pandas + +Base class for defining custom window boundaries. + +.. autosummary:: + :toctree: api/ + + api.indexers.BaseIndexer diff --git a/doc/source/styled.xlsx b/doc/source/styled.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1233ff2b8692bad1cfcd54f59d26eb315cfa728c GIT binary patch literal 5682 zcmZ`-1z3}9_aEI#3Ze)|3o^QEv>+i}0uqkcMt6@=I!2e2sC0wWMu&ia0@58SjWqI~ z@B95fU%B3IyYA=Nu4})ubD!s&^E>xXQxyxF5&!_;13p(uKBi|H#zLaMQD6c9MCiMj zvo*v8$o=bDnxGNcE<@barjm9}tB>liko4Mw8eU*Jv0loFCo)o0`ul{pKoS{&O~q2K$v?!mX##}swl>kxCc&A6ncxA^jty9ve{7U(Bm4#& zy}?TKB1JDN2TM(72S*p+Ge<{mPbfq!MvbVQhgAMbWpftBu>rD>sZc^C9>>8T41HmP zgr}#k;q>)h&Ch7uEc<(xDqRJ`bkr{&B*X%`%4pPXr>hT&EZGz=N(W{h+yYwVkR>7o zV(^d;Qg4~KpNdb~yA6qY+IMP?>N}(nif*pi@v@(!7$3x#)Jc$>*I!h?=O0~S{^_s7 z3&@upw4a>l2SfHxe=QxHt$#T@nD9ikgNL-IEn=mmg_SNZ_gkc;1|b})t3Ga~&O9+} zrC_#RxYRg&182xpQ*ve-w)C>98d|}(? z8>b#x!*{ z-xA=>>@Ex9G>4NZrnC~-+*&fZq!9NA%u2MZQnjNAbAGdKG8fb_J+?g1414Bxe^qeJ ztWQM@$E12-W#a5v^>me<&PCZ9PsVKzS>@J*;xj+z%S8_=_m)bZ^Vn!09E#Ppop(Jt zX(pLWq__Y;CL20+(b4gQ09|dYq1Jz2d4I)c%gESeN`%sXr~IQQ_y|hWEfiE&L*ZiU zK3$U*vZYQRN@frZ@v@2!320LiA{|I3*-B7*pCN(GwRt|IptQar;&rgiZLtyd0a=$v?ZDD{_B|t`%7O4cKLpPEM@RW%RQP-(;$m3_ zL+-vuom)9k{Zp<2P}+s%Jx`Ikfze3EyVwg)nzGxo4k2v?AcC5F+mBY0M^k0-uir&$ zYm6QH&40_V$s~+6))=$*m>r9G*C7W`bxglu3&^671Om+>nMPHKr?C!!Iw;jEmETHI|6^C;J zAly0fSkt#0R2Q+(o-k?>H$Kc!t4WKW-W&8o3C`FxQ-1j)rKzug-#g1sY{(uFU|Wn) zwAGX`oOt1~v654iIH4vx)gIqW)^i6hAo&r+#LR$?PJ4 z%x5a`v9{GrMXs!a8~n5=#qv9D;WncZ-am0pGX)9n#^0gC1mCGvWDK!w+PzoK9Ftz! zPR1!KWpP`G*17gpP@U7;5OX8)?h#M*(HgP~!xF(nDf8a%gYj-d_|_xK2L_8RX=t$KSB3MY3rYnH)%Fs??XgxiZx;7HK`ZolEeoyJ4S+|hbr zCOntF>B!n@@&~VuubhBDoGQo<&EqO62>c=<+-j>T*Idl zX6rSBMEr8~&Z=MQ5UsVWA}WWK9~{;14G@ciB@*cBJYLZWo*zARUtucb(N2+Tpp|XK zg1*n15fjNyjc+6z(~5KGINj{>q+JsIg5Btsst;YC92q%ZM?I;?-;qeB>v^%C$GDvv zmO9)a7w+X_mI_Yl))b(GLQ0sq6zk0!Cib`}7p%-5zQPMGe}cKMrEAV>+>ON&bkt%OE;`*ykF63Cs{+KKK#?L zv^R&CO#K!Sxw0jK`n5{l!ENodPZiM_kH)X6c3d7{-tA|(#=)zb+f~#a%KKUv&(0& zL~-TEnOBKqWaG2vFP?*^ux5w+|SH*!!7l}d>Q zC~yUbshf_nkW)rf5aTdXJ1_6-B-(jIO`LHBK|^%-tUgos%e_QN@;l2->aZM?O|cS% zPA};UHqo}SnyYqr!?GT-T}_*QlB}VYR6NK}Tew#p?vml%(5_H&WiWp4ly6?taawE+ z%XE!`ORm5l(3vy5MMbf!Oy`u~DHFCz-S=o-cg@ zu4R<*p@3N|rA6uM%F^yBUSh96g9GB+=OXjr3hA``pCd~|qX`{ApJi61r$3R{9FYjz zjmI(NO{){iUr{HNnEAOwQqkTSv$z%6hVdu8>adM%$zTHj3O4`%n!gFe!@=3!#m3s& z)dl$1?JpK_OEGg;79vKrsl*+3-Tx8$U=&d6gF51Q)v)M33@a8wu{aI6Pj5}fsyG|U zzGaV_;^Rn=jz0sXNmXC;E5Ww&=b6i-)Qu%IlqkOPXd2VuAHz;thuFphbqgV=;eX>fuDne#?nV zWnh5Byt?i_J8Wqz^|j6EybomQdE#>mY5#22tZXQ2*TA}2HSi|Ng(r54Gy`_vcPUW@ zD-y_(=t(((^80T|`NtLoe*c0xcZX0Pw4SFP_VgY@F^Pp^=zLUy-Do9!EeC^?=Be#W zTdHqru)SnW3ABHCZh~3P(z-%^W@M43np!R2MV8rt)S~h5%x&cieL~N&D=Ql^LdpD` z77G)u1jdY`2r{!{F1osTa-v}N;YUWgt)sHa^a3d{E;)XL7$ZAoVV`I|JeZH-sZ}07 z>-{AoFj-`%z3Vo`2Lw3huF*HmT(ZXzw2Fy%(Nb|jk7cZsw!_j5!Nej}=8PhxIH%T+ zaDso}%g-^|spVAgDd@3UvM7IIt!8hxdCcM5la^0>o)2;o!PJxD2**H)#iUE*FBxE$ zBt#s@_j?Xe)%4p|G7scOG^#-I0`IlpV%;JbD1pQ$4H^)T*ADtpatk(M7rnJIcF7HQUkY8ldzPG3Z16-B-t0nuln;?~D$ zrZibbHELA#Q-e;45lGGR53e=|qk*~%!m+PJK_zU&Ds{WQqn~`@?gqM>dfB^@mN-)h zk&A$c3-_6rGIUW71((Gn1?9&VrOm<=6buk3av}Tv%tK5)Zj@@wLIXmJF&)8%d5emc zz&UoQuH#Y&yVppZN>0DmZNP_t^Oc9*%OumavO>kLlER*rseDcPuc;zxWEE8_-pD=I zwDL(*-Svk>FLL*%g{E!=H)5u|I!eY`b5k<)>w7>sOpNf(z|lKEi5P~k)Q`?#6d?Ak z1m0;JfixBL!WZog+MoN8HwrvZDG+^W0*+wni4HjiTQ(N=9{Fp|y)i#gP#u(5(1B?}4yCl-}iy zC*u48?Ob>y8$4<&bC4W^a%2vRrBj#Xq^)WeNk{NMice9t;Jm%${`GWU;eqE4zO|Yj zwMU#Q>a2RpY@+Ge{$#Xey(Fp6=6l}#3rK*N;a=AEMb%57rT5{*3_Vb&C#7;aha39& zbgkj!=AT_>KM8Bu1>J3`(CgP33_Z5G*gSK#w$gTWwuOORe$A~{Tfqftx_kTF(b;iB=Bo5U%<4Xi-A@ENp zpalEeh_AQk&{vKFwXd+xwaIT(uoY&Y1W1(E^D?fkUj1px#DPP-;a{ITRy26&1f#tUkflh3lR>ePbV zhT7VCz8_Qb{v5F^iF4lC4Z~&75XVK9&gCz=NO;!DKR@^STG7>A#i72vkwM|jq_E1I z(Q{z*v^M*B;`@X%JUvqyW2dI^?2kD8+I{v&SI+ctNtA26F*Xljv`R&{&DtGfzPhx| zVO_=Cu0{hJZqQ0|2pq|BmOceBr(j9hUm*AYY|qB)&Nt57>03O*PCjv9u@spx$Ww)K zx-hOow&b?>P(R%496@#jmfG%I-6P&*8?fi~4k*$0vjRV5D=OC#FxvY#pvdE|%i~{U zo?WF7Ueg^xz2Z_-C?9C75;Fs5r-SeGI}97N+2SQ!O$7y1&ihLE>@(IX9W+D2_94vr z2+H6%hC{BAh<8gtv$K)6FE`7Ky$0#n_ckgp%eVObKa_e7;u2_-Ey#(;vYj`H!}l&} ze>7`Z9a>#2B++~V13k}#RssNjCXCDKlEVr*Q9>~QfSZ3O%r7praB#5ymC(^~APp== z($FiyMZ8{EJ}c~f%SPeL5mwrrYMzDyBqXrzr!QW5w4^ufT1$$9>7*$;9a~=h34zqw zlc7ScU@gO88ix51cs!jdhs>g&UvV=tb|Scwym`uXHlJ`IZw9JA&iHmZlb zFg)dAeL@$2H*`WU#xC9`>P7)c8TTp`Ex4j&vCrzJ9xV#2G|6B+)NES$^3X5>A4BQ% zNnZ=)R{LC3RVxu?-4J(EUp}R+Bcz~Z)3~h`0s3}7U3krET+q``IWMOB%Z&7)hSp@_ z@o-5?k3PJ+Jn9FYd)uD@xk9qyxBLnSx{7H34#?lS%0I*MAB_b>7|cToXuIzvqYvH8 zeN$xGTz4t+@$-xHknhP~cQU-6fO7GG1kXA?$RtQqB+k&Ny{n`o_ABC`iuf`Ic%wO~ zDt0?^O5?$@ci+zqyNW6s>uA#OMXH$u#wx2m;(b0UZXgp`b3t*uHijG?{j!nPi^n|B zkv^h}q>&|QZ*5(*AP?+%!?_Bo-q&1N6~T)vqJDv77_U)f(n%rsL&k?eg?j zCe-SvX>{-qUvg@f+YBxsF`!KV{Z#A#24>hDL9?oskxQQ$n4HA}3!7<{mj~aEP02YA zj-j~et|%oVN)i`;&4|ZXAni0hdC7y{1R?mli$_KKX3t;U`?{8k(E6 z>wU|xwjxtXG4M{*^OU?Pc^`SPS&C$Q|K8kg>-X^?7W*>!BlMVKH_l8BwanoooPz=BFYx7yWrs*)AgJjel znmE-5)ipR#k4TWPW=YlEk0h*&kNF8BzAMJCCF{WxzES|a@}ozv1=hg7_Dy=DLCF!})Im zC-{FE_}5YWx|Qo>``Zc(O>6%~z1J;VXTskWDA31-Ul#t$i`Svod*W}XJX(E3L;u+d yu7j`lkKbTjv=oR2|Br5R-Olxr{B1{t{9g-HQxykI`TzhS`W=XVR++yF8SsDP;o9B+ literal 0 HcmV?d00001 diff --git a/doc/source/themes/nature_with_gtoc/layout.html b/doc/source/themes/nature_with_gtoc/layout.html new file mode 100644 index 00000000..6e7d8ece --- /dev/null +++ b/doc/source/themes/nature_with_gtoc/layout.html @@ -0,0 +1,108 @@ +{# + +Subset of agogo theme +agogo/layout.html + +Sphinx layout template for the agogo theme, originally written +by Andi Albrecht. + +:copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. +:license: BSD, see LICENSE for details. +#} +{% extends "basic/layout.html" %} + +{%- block content %} +
    +
    +
    +
    + {%- block sidebar1 %} + {%- block sidebartoc %} +

    {{ _('Table Of Contents') }}

    + {{ toctree(includehidden=True) }} + {%- endblock %} + {%- block sidebarsearch %} +

    {{ _('Search') }}

    + + +

    + {{ _('Enter search terms or a module, class or function name.') }} +

    + +
    + {%- endblock %} + {# possible location for sidebar #} {% endblock %} + + + {%- block document %} +
    + {%- if render_sidebar %} +
    + {%- endif %} +
    + {% block body %} {% endblock %} +
    + {%- if render_sidebar %} +
    + {%- endif %} +
    + {%- endblock %} + + {%- block sidebar2 %} + + {% endblock %} +
    +
    +
    +
    +{%- endblock %} + +{%- block footer %} + +Scroll To Top + + + + + + + +{% endblock %} diff --git a/doc/source/themes/nature_with_gtoc/static/nature.css_t b/doc/source/themes/nature_with_gtoc/static/nature.css_t new file mode 100644 index 00000000..4571d97e --- /dev/null +++ b/doc/source/themes/nature_with_gtoc/static/nature.css_t @@ -0,0 +1,356 @@ +/* + * nature.css_t + * ~~~~~~~~~~~~ + * + * Sphinx stylesheet -- nature theme. + * + * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: Arial, sans-serif; + font-size: 100%; + background-color: #111; + color: #555; + margin: 0; + padding: 0; +} + + +div.documentwrapper { + width: 100%; +} + +div.bodywrapper { +/* ugly hack, probably not attractive with other font size for re*/ + margin: 0 0 0 {{ theme_sidebarwidth|toint}}px; + min-width: 540px; + max-width: 800px; +} + + +hr { + border: 1px solid #B1B4B6; +} + +div.document { + background-color: #eee; +} + +div.body { + background-color: #ffffff; + color: #3E4349; + padding: 0 30px 30px 30px; + font-size: 0.9em; +} + +div.footer { + color: #555; + width: 100%; + padding: 13px 0; + text-align: center; + font-size: 75%; +} + +div.footer a { + color: #444; + text-decoration: underline; +} + +div.related { + background-color: #6BA81E; + line-height: 32px; + color: #fff; + text-shadow: 0px 1px 0 #444; + font-size: 0.9em; +} + +div.related a { + color: #E2F3CC; +} + +div.sphinxsidebar { + font-size: 0.75em; + line-height: 1.5em; + width: {{ theme_sidebarwidth|toint }}px; + margin: 0 ; + float: left; + + background-color: #eee; +} +/* +div.sphinxsidebarwrapper{ + padding: 20px 0; +} +*/ +div.sphinxsidebar h3, +div.sphinxsidebar h4 { + font-family: Arial, sans-serif; + color: #222; + font-size: 1.2em; + font-weight: normal; + margin: 20px 0 0 0; + padding: 5px 10px; + background-color: #ddd; + text-shadow: 1px 1px 0 white +} + +div.sphinxsidebar h4{ + font-size: 1.1em; +} + +div.sphinxsidebar h3 a { + color: #444; +} + + +div.sphinxsidebar p { + color: #888; +/* padding: 5px 20px;*/ +} + +div.sphinxsidebar p.searchtip { + color: #888; + padding: 5px 20px; +} + + +div.sphinxsidebar p.topless { +} + +div.sphinxsidebar ul { + margin: 10px 20px; + padding: 0; + color: #000; +} + +div.sphinxsidebar a { + color: #444; +} + +div.sphinxsidebar input { + border: 1px solid #ccc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar input[type=text]{ + margin-left: 20px; +} + +/* -- body styles ----------------------------------------------------------- */ + +a { + color: #005B81; + text-decoration: none; +} + +a:hover { + color: #E32E00; + text-decoration: underline; +} + +div.body h1, +div.body h2, +div.body h3, +div.body h4, +div.body h5, +div.body h6 { + font-family: Arial, sans-serif; + background-color: #BED4EB; + font-weight: normal; + color: #212224; + margin: 30px 0px 10px 0px; + padding: 5px 0 5px 10px; + text-shadow: 0px 1px 0 white +} + +div.body h1 { border-top: 20px solid white; margin-top: 0; font-size: 200%; } +div.body h2 { font-size: 150%; background-color: #C8D5E3; } +div.body h3 { font-size: 120%; background-color: #D8DEE3; } +div.body h4 { font-size: 110%; background-color: #D8DEE3; } +div.body h5 { font-size: 100%; background-color: #D8DEE3; } +div.body h6 { font-size: 100%; background-color: #D8DEE3; } + +p.rubric { + border-bottom: 1px solid rgb(201, 201, 201); +} + +a.headerlink { + color: #c60f0f; + font-size: 0.8em; + padding: 0 4px 0 4px; + text-decoration: none; +} + +a.headerlink:hover { + background-color: #c60f0f; + color: white; +} + +div.body p, div.body dd, div.body li { + line-height: 1.5em; +} + +div.admonition p.admonition-title + p, div.deprecated p { + display: inline; +} + +div.deprecated { + margin-bottom: 10px; + margin-top: 10px; + padding: 7px; + background-color: #ffe4e4; + border: 1px solid #f66; +} + +div.highlight{ + background-color: white; +} + +div.note { + background-color: #eee; + border: 1px solid #ccc; +} + +div.seealso { + background-color: #ffc; + border: 1px solid #ff6; +} + +div.topic { + background-color: #eee; +} + +div.warning { + background-color: #ffe4e4; + border: 1px solid #f66; +} + +p.admonition-title { + display: inline; +} + +p.admonition-title:after { + content: ":"; +} + +pre { + padding: 10px; + background-color: rgb(250,250,250); + color: #222; + line-height: 1.2em; + border: 1px solid rgb(201,201,201); + font-size: 1.1em; + margin: 1.5em 0 1.5em 0; + -webkit-box-shadow: 1px 1px 1px #d8d8d8; + -moz-box-shadow: 1px 1px 1px #d8d8d8; +} + +tt { + background-color: #ecf0f3; + color: #222; + /* padding: 1px 2px; */ + font-size: 1.1em; + font-family: monospace; +} + +.viewcode-back { + font-family: Arial, sans-serif; +} + +div.viewcode-block:target { + background-color: #f4debf; + border-top: 1px solid #ac9; + border-bottom: 1px solid #ac9; +} + + +/** + * Styling for field lists + */ + + /* grey highlighting of 'parameter' and 'returns' field */ +table.field-list { + border-collapse: separate; + border-spacing: 10px; + margin-left: 1px; + /* border-left: 5px solid rgb(238, 238, 238) !important; */ +} + +table.field-list th.field-name { + /* display: inline-block; */ + padding: 1px 8px 1px 5px; + white-space: nowrap; + background-color: rgb(238, 238, 238); +} + +/* italic font for parameter types */ +table.field-list td.field-body > p { + font-style: italic; +} + +table.field-list td.field-body > p > strong { + font-style: normal; +} + +/* reduced space around parameter description */ +td.field-body blockquote { + border-left: none; + margin: 0em 0em 0.3em; + padding-left: 30px; +} + +// Adapted from the new Jupyter notebook style +// https://github.com/jupyter/notebook/blob/c8841b68c4c0739bbee1291e0214771f24194079/notebook/static/notebook/less/renderedhtml.less#L59 +table { + margin-left: auto; + margin-right: auto; + border: none; + border-collapse: collapse; + border-spacing: 0; + color: @rendered_html_border_color; + table-layout: fixed; +} +thead { + border-bottom: 1px solid @rendered_html_border_color; + vertical-align: bottom; +} +tr, th, td { + vertical-align: middle; + padding: 0.5em 0.5em; + line-height: normal; + white-space: normal; + max-width: none; + border: none; +} +th { + font-weight: bold; +} +th.col_heading { + text-align: right; +} +tbody tr:nth-child(odd) { + background: #f5f5f5; +} + +table td.data, table th.row_heading table th.col_heading { + font-family: monospace; + text-align: right; +} + + +/** + * See also + */ + +div.seealso dd { + margin-top: 0; + margin-bottom: 0; +} diff --git a/doc/source/themes/nature_with_gtoc/theme.conf b/doc/source/themes/nature_with_gtoc/theme.conf new file mode 100644 index 00000000..290a07bd --- /dev/null +++ b/doc/source/themes/nature_with_gtoc/theme.conf @@ -0,0 +1,7 @@ +[theme] +inherit = basic +stylesheet = nature.css +pygments_style = tango + +[options] +sidebarwidth = 270 diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst new file mode 100644 index 00000000..d6f5c0c7 --- /dev/null +++ b/doc/source/user_guide/advanced.rst @@ -0,0 +1,1226 @@ +.. _advanced: + +{{ header }} + +****************************** +MultiIndex / advanced indexing +****************************** + +This section covers :ref:`indexing with a MultiIndex ` +and :ref:`other advanced indexing features `. + +See the :ref:`Indexing and Selecting Data ` for general indexing documentation. + +.. warning:: + + Whether a copy or a reference is returned for a setting operation may + depend on the context. This is sometimes called ``chained assignment`` and + should be avoided. See :ref:`Returning a View versus Copy + `. + +See the :ref:`cookbook` for some advanced strategies. + +.. _advanced.hierarchical: + +Hierarchical indexing (MultiIndex) +---------------------------------- + +Hierarchical / Multi-level indexing is very exciting as it opens the door to some +quite sophisticated data analysis and manipulation, especially for working with +higher dimensional data. In essence, it enables you to store and manipulate +data with an arbitrary number of dimensions in lower dimensional data +structures like ``Series`` (1d) and ``DataFrame`` (2d). + +In this section, we will show what exactly we mean by "hierarchical" indexing +and how it integrates with all of the pandas indexing functionality +described above and in prior sections. Later, when discussing :ref:`group by +` and :ref:`pivoting and reshaping data `, we'll show +non-trivial applications to illustrate how it aids in structuring data for +analysis. + +See the :ref:`cookbook` for some advanced strategies. + +.. versionchanged:: 0.24.0 + + :attr:`MultiIndex.labels` has been renamed to :attr:`MultiIndex.codes` + and :attr:`MultiIndex.set_labels` to :attr:`MultiIndex.set_codes`. + +Creating a MultiIndex (hierarchical index) object +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :class:`MultiIndex` object is the hierarchical analogue of the standard +:class:`Index` object which typically stores the axis labels in pandas objects. You +can think of ``MultiIndex`` as an array of tuples where each tuple is unique. A +``MultiIndex`` can be created from a list of arrays (using +:meth:`MultiIndex.from_arrays`), an array of tuples (using +:meth:`MultiIndex.from_tuples`), a crossed set of iterables (using +:meth:`MultiIndex.from_product`), or a :class:`DataFrame` (using +:meth:`MultiIndex.from_frame`). The ``Index`` constructor will attempt to return +a ``MultiIndex`` when it is passed a list of tuples. The following examples +demonstrate different ways to initialize MultiIndexes. + + +.. ipython:: python + + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + tuples = list(zip(*arrays)) + tuples + + index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second']) + index + + s = pd.Series(np.random.randn(8), index=index) + s + +When you want every pairing of the elements in two iterables, it can be easier +to use the :meth:`MultiIndex.from_product` method: + +.. ipython:: python + + iterables = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']] + pd.MultiIndex.from_product(iterables, names=['first', 'second']) + +You can also construct a ``MultiIndex`` from a ``DataFrame`` directly, using +the method :meth:`MultiIndex.from_frame`. This is a complementary method to +:meth:`MultiIndex.to_frame`. + +.. versionadded:: 0.24.0 + +.. ipython:: python + + df = pd.DataFrame([['bar', 'one'], ['bar', 'two'], + ['foo', 'one'], ['foo', 'two']], + columns=['first', 'second']) + pd.MultiIndex.from_frame(df) + +As a convenience, you can pass a list of arrays directly into ``Series`` or +``DataFrame`` to construct a ``MultiIndex`` automatically: + +.. ipython:: python + + arrays = [np.array(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux']), + np.array(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'])] + s = pd.Series(np.random.randn(8), index=arrays) + s + df = pd.DataFrame(np.random.randn(8, 4), index=arrays) + df + +All of the ``MultiIndex`` constructors accept a ``names`` argument which stores +string names for the levels themselves. If no names are provided, ``None`` will +be assigned: + +.. ipython:: python + + df.index.names + +This index can back any axis of a pandas object, and the number of **levels** +of the index is up to you: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(3, 8), index=['A', 'B', 'C'], columns=index) + df + pd.DataFrame(np.random.randn(6, 6), index=index[:6], columns=index[:6]) + +We've "sparsified" the higher levels of the indexes to make the console output a +bit easier on the eyes. Note that how the index is displayed can be controlled using the +``multi_sparse`` option in ``pandas.set_options()``: + +.. ipython:: python + + with pd.option_context('display.multi_sparse', False): + df + +It's worth keeping in mind that there's nothing preventing you from using +tuples as atomic labels on an axis: + +.. ipython:: python + + pd.Series(np.random.randn(8), index=tuples) + +The reason that the ``MultiIndex`` matters is that it can allow you to do +grouping, selection, and reshaping operations as we will describe below and in +subsequent areas of the documentation. As you will see in later sections, you +can find yourself working with hierarchically-indexed data without creating a +``MultiIndex`` explicitly yourself. However, when loading data from a file, you +may wish to generate your own ``MultiIndex`` when preparing the data set. + +.. _advanced.get_level_values: + +Reconstructing the level labels +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The method :meth:`~MultiIndex.get_level_values` will return a vector of the labels for each +location at a particular level: + +.. ipython:: python + + index.get_level_values(0) + index.get_level_values('second') + +Basic indexing on axis with MultiIndex +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the important features of hierarchical indexing is that you can select +data by a "partial" label identifying a subgroup in the data. **Partial** +selection "drops" levels of the hierarchical index in the result in a +completely analogous way to selecting a column in a regular DataFrame: + +.. ipython:: python + + df['bar'] + df['bar', 'one'] + df['bar']['one'] + s['qux'] + +See :ref:`Cross-section with hierarchical index ` for how to select +on a deeper level. + +.. _advanced.shown_levels: + +Defined levels +~~~~~~~~~~~~~~ + +The :class:`MultiIndex` keeps all the defined levels of an index, even +if they are not actually used. When slicing an index, you may notice this. +For example: + +.. ipython:: python + +   df.columns.levels # original MultiIndex + + df[['foo','qux']].columns.levels # sliced + +This is done to avoid a recomputation of the levels in order to make slicing +highly performant. If you want to see only the used levels, you can use the +:meth:`~MultiIndex.get_level_values` method. + +.. ipython:: python + + df[['foo', 'qux']].columns.to_numpy() + + # for a specific level + df[['foo', 'qux']].columns.get_level_values(0) + +To reconstruct the ``MultiIndex`` with only the used levels, the +:meth:`~MultiIndex.remove_unused_levels` method may be used. + +.. ipython:: python + + new_mi = df[['foo', 'qux']].columns.remove_unused_levels() + new_mi.levels + +Data alignment and using ``reindex`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Operations between differently-indexed objects having ``MultiIndex`` on the +axes will work as you expect; data alignment will work the same as an Index of +tuples: + +.. ipython:: python + + s + s[:-2] + s + s[::2] + +The :meth:`~DataFrame.reindex` method of ``Series``/``DataFrames`` can be +called with another ``MultiIndex``, or even a list or array of tuples: + +.. ipython:: python + + s.reindex(index[:3]) + s.reindex([('foo', 'two'), ('bar', 'one'), ('qux', 'one'), ('baz', 'one')]) + +.. _advanced.advanced_hierarchical: + +Advanced indexing with hierarchical index +----------------------------------------- + +Syntactically integrating ``MultiIndex`` in advanced indexing with ``.loc`` is a +bit challenging, but we've made every effort to do so. In general, MultiIndex +keys take the form of tuples. For example, the following works as you would expect: + +.. ipython:: python + + df = df.T + df + df.loc[('bar', 'two')] + +Note that ``df.loc['bar', 'two']`` would also work in this example, but this shorthand +notation can lead to ambiguity in general. + +If you also want to index a specific column with ``.loc``, you must use a tuple +like this: + +.. ipython:: python + + df.loc[('bar', 'two'), 'A'] + +You don't have to specify all levels of the ``MultiIndex`` by passing only the +first elements of the tuple. For example, you can use "partial" indexing to +get all elements with ``bar`` in the first level as follows: + +df.loc['bar'] + +This is a shortcut for the slightly more verbose notation ``df.loc[('bar',),]`` (equivalent +to ``df.loc['bar',]`` in this example). + +"Partial" slicing also works quite nicely. + +.. ipython:: python + + df.loc['baz':'foo'] + +You can slice with a 'range' of values, by providing a slice of tuples. + +.. ipython:: python + + df.loc[('baz', 'two'):('qux', 'one')] + df.loc[('baz', 'two'):'foo'] + +Passing a list of labels or tuples works similar to reindexing: + +.. ipython:: python + + df.loc[[('bar', 'two'), ('qux', 'one')]] + +.. note:: + + It is important to note that tuples and lists are not treated identically + in pandas when it comes to indexing. Whereas a tuple is interpreted as one + multi-level key, a list is used to specify several keys. Or in other words, + tuples go horizontally (traversing levels), lists go vertically (scanning levels). + +Importantly, a list of tuples indexes several complete ``MultiIndex`` keys, +whereas a tuple of lists refer to several values within a level: + +.. ipython:: python + + s = pd.Series([1, 2, 3, 4, 5, 6], + index=pd.MultiIndex.from_product([["A", "B"], ["c", "d", "e"]])) + s.loc[[("A", "c"), ("B", "d")]] # list of tuples + s.loc[(["A", "B"], ["c", "d"])] # tuple of lists + + +.. _advanced.mi_slicers: + +Using slicers +~~~~~~~~~~~~~ + +You can slice a ``MultiIndex`` by providing multiple indexers. + +You can provide any of the selectors as if you are indexing by label, see :ref:`Selection by Label `, +including slices, lists of labels, labels, and boolean indexers. + +You can use ``slice(None)`` to select all the contents of *that* level. You do not need to specify all the +*deeper* levels, they will be implied as ``slice(None)``. + +As usual, **both sides** of the slicers are included as this is label indexing. + +.. warning:: + + You should specify all axes in the ``.loc`` specifier, meaning the indexer for the **index** and + for the **columns**. There are some ambiguous cases where the passed indexer could be mis-interpreted +   as indexing *both* axes, rather than into say the ``MultiIndex`` for the rows. + + You should do this: + + .. code-block:: python + + df.loc[(slice('A1', 'A3'), ...), :] # noqa: E999 + +   You should **not** do this: +  + .. code-block:: python + + df.loc[(slice('A1', 'A3'), ...)] # noqa: E999 + +.. ipython:: python + + def mklbl(prefix, n): + return ["%s%s" % (prefix, i) for i in range(n)] + + miindex = pd.MultiIndex.from_product([mklbl('A', 4), + mklbl('B', 2), + mklbl('C', 4), + mklbl('D', 2)]) + micolumns = pd.MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], + names=['lvl0', 'lvl1']) + dfmi = pd.DataFrame(np.arange(len(miindex) * len(micolumns)) + .reshape((len(miindex), len(micolumns))), + index=miindex, + columns=micolumns).sort_index().sort_index(axis=1) + dfmi + +Basic MultiIndex slicing using slices, lists, and labels. + +.. ipython:: python + + dfmi.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] + + +You can use :class:`pandas.IndexSlice` to facilitate a more natural syntax +using ``:``, rather than using ``slice(None)``. + +.. ipython:: python + + idx = pd.IndexSlice + dfmi.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']] + +It is possible to perform quite complicated selections using this method on multiple +axes at the same time. + +.. ipython:: python + + dfmi.loc['A1', (slice(None), 'foo')] + dfmi.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']] + +Using a boolean indexer you can provide selection related to the *values*. + +.. ipython:: python + + mask = dfmi[('a', 'foo')] > 200 + dfmi.loc[idx[mask, :, ['C1', 'C3']], idx[:, 'foo']] + +You can also specify the ``axis`` argument to ``.loc`` to interpret the passed +slicers on a single axis. + +.. ipython:: python + + dfmi.loc(axis=0)[:, :, ['C1', 'C3']] + +Furthermore, you can *set* the values using the following methods. + +.. ipython:: python + + df2 = dfmi.copy() + df2.loc(axis=0)[:, :, ['C1', 'C3']] = -10 + df2 + +You can use a right-hand-side of an alignable object as well. + +.. ipython:: python + + df2 = dfmi.copy() + df2.loc[idx[:, :, ['C1', 'C3']], :] = df2 * 1000 + df2 + +.. _advanced.xs: + +Cross-section +~~~~~~~~~~~~~ + +The :meth:`~DataFrame.xs` method of ``DataFrame`` additionally takes a level argument to make +selecting data at a particular level of a ``MultiIndex`` easier. + +.. ipython:: python + + df + df.xs('one', level='second') + +.. ipython:: python + + # using the slicers + df.loc[(slice(None), 'one'), :] + +You can also select on the columns with ``xs``, by +providing the axis argument. + +.. ipython:: python + + df = df.T + df.xs('one', level='second', axis=1) + +.. ipython:: python + + # using the slicers + df.loc[:, (slice(None), 'one')] + +``xs`` also allows selection with multiple keys. + +.. ipython:: python + + df.xs(('one', 'bar'), level=('second', 'first'), axis=1) + +.. ipython:: python + + # using the slicers + df.loc[:, ('bar', 'one')] + +You can pass ``drop_level=False`` to ``xs`` to retain +the level that was selected. + +.. ipython:: python + + df.xs('one', level='second', axis=1, drop_level=False) + +Compare the above with the result using ``drop_level=True`` (the default value). + +.. ipython:: python + + df.xs('one', level='second', axis=1, drop_level=True) + +.. ipython:: python + :suppress: + + df = df.T + +.. _advanced.advanced_reindex: + +Advanced reindexing and alignment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using the parameter ``level`` in the :meth:`~DataFrame.reindex` and +:meth:`~DataFrame.align` methods of pandas objects is useful to broadcast +values across a level. For instance: + +.. ipython:: python + + midx = pd.MultiIndex(levels=[['zero', 'one'], ['x', 'y']], + codes=[[1, 1, 0, 0], [1, 0, 1, 0]]) + df = pd.DataFrame(np.random.randn(4, 2), index=midx) + df + df2 = df.mean(level=0) + df2 + df2.reindex(df.index, level=0) + + # aligning + df_aligned, df2_aligned = df.align(df2, level=0) + df_aligned + df2_aligned + + +Swapping levels with ``swaplevel`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~MultiIndex.swaplevel` method can switch the order of two levels: + +.. ipython:: python + + df[:5] + df[:5].swaplevel(0, 1, axis=0) + +.. _advanced.reorderlevels: + +Reordering levels with ``reorder_levels`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~MultiIndex.reorder_levels` method generalizes the ``swaplevel`` +method, allowing you to permute the hierarchical index levels in one step: + +.. ipython:: python + + df[:5].reorder_levels([1, 0], axis=0) + +.. _advanced.index_names: + +Renaming names of an ``Index`` or ``MultiIndex`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~DataFrame.rename` method is used to rename the labels of a +``MultiIndex``, and is typically used to rename the columns of a ``DataFrame``. +The ``columns`` argument of ``rename`` allows a dictionary to be specified +that includes only the columns you wish to rename. + +.. ipython:: python + + df.rename(columns={0: "col0", 1: "col1"}) + +This method can also be used to rename specific labels of the main index +of the ``DataFrame``. + +.. ipython:: python + + df.rename(index={"one": "two", "y": "z"}) + +The :meth:`~DataFrame.rename_axis` method is used to rename the name of a +``Index`` or ``MultiIndex``. In particular, the names of the levels of a +``MultiIndex`` can be specified, which is useful if ``reset_index()`` is later +used to move the values from the ``MultiIndex`` to a column. + +.. ipython:: python + + df.rename_axis(index=['abc', 'def']) + +Note that the columns of a ``DataFrame`` are an index, so that using +``rename_axis`` with the ``columns`` argument will change the name of that +index. + +.. ipython:: python + + df.rename_axis(columns="Cols").columns + +Both ``rename`` and ``rename_axis`` support specifying a dictionary, +``Series`` or a mapping function to map labels/names to new values. + +When working with an ``Index`` object directly, rather than via a ``DataFrame``, +:meth:`Index.set_names` can be used to change the names. + +.. ipython:: python + + mi = pd.MultiIndex.from_product([[1, 2], ['a', 'b']], names=['x', 'y']) + mi.names + + mi2 = mi.rename("new name", level=0) + mi2 + + +You cannot set the names of the MultiIndex via a level. + +.. ipython:: python + :okexcept: + + mi.levels[0].name = "name via level" + +Use :meth:`Index.set_names` instead. + +Sorting a ``MultiIndex`` +------------------------ + +For :class:`MultiIndex`-ed objects to be indexed and sliced effectively, +they need to be sorted. As with any index, you can use :meth:`~DataFrame.sort_index`. + +.. ipython:: python + + import random + random.shuffle(tuples) + s = pd.Series(np.random.randn(8), index=pd.MultiIndex.from_tuples(tuples)) + s + s.sort_index() + s.sort_index(level=0) + s.sort_index(level=1) + +.. _advanced.sortlevel_byname: + +You may also pass a level name to ``sort_index`` if the ``MultiIndex`` levels +are named. + +.. ipython:: python + + s.index.set_names(['L1', 'L2'], inplace=True) + s.sort_index(level='L1') + s.sort_index(level='L2') + +On higher dimensional objects, you can sort any of the other axes by level if +they have a ``MultiIndex``: + +.. ipython:: python + + df.T.sort_index(level=1, axis=1) + +Indexing will work even if the data are not sorted, but will be rather +inefficient (and show a ``PerformanceWarning``). It will also +return a copy of the data rather than a view: + +.. ipython:: python + + dfm = pd.DataFrame({'jim': [0, 0, 1, 1], + 'joe': ['x', 'x', 'z', 'y'], + 'jolie': np.random.rand(4)}) + dfm = dfm.set_index(['jim', 'joe']) + dfm + +.. code-block:: ipython + + In [4]: dfm.loc[(1, 'z')] + PerformanceWarning: indexing past lexsort depth may impact performance. + + Out[4]: + jolie + jim joe + 1 z 0.64094 + +.. _advanced.unsorted: + +Furthermore, if you try to index something that is not fully lexsorted, this can raise: + +.. code-block:: ipython + + In [5]: dfm.loc[(0, 'y'):(1, 'z')] + UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (1)' + +The :meth:`~MultiIndex.is_lexsorted` method on a ``MultiIndex`` shows if the +index is sorted, and the ``lexsort_depth`` property returns the sort depth: + +.. ipython:: python + + dfm.index.is_lexsorted() + dfm.index.lexsort_depth + +.. ipython:: python + + dfm = dfm.sort_index() + dfm + dfm.index.is_lexsorted() + dfm.index.lexsort_depth + +And now selection works as expected. + +.. ipython:: python + + dfm.loc[(0, 'y'):(1, 'z')] + +Take methods +------------ + +.. _advanced.take: + +Similar to NumPy ndarrays, pandas ``Index``, ``Series``, and ``DataFrame`` also provides +the :meth:`~DataFrame.take` method that retrieves elements along a given axis at the given +indices. The given indices must be either a list or an ndarray of integer +index positions. ``take`` will also accept negative integers as relative positions to the end of the object. + +.. ipython:: python + + index = pd.Index(np.random.randint(0, 1000, 10)) + index + + positions = [0, 9, 3] + + index[positions] + index.take(positions) + + ser = pd.Series(np.random.randn(10)) + + ser.iloc[positions] + ser.take(positions) + +For DataFrames, the given indices should be a 1d list or ndarray that specifies +row or column positions. + +.. ipython:: python + + frm = pd.DataFrame(np.random.randn(5, 3)) + + frm.take([1, 4, 3]) + + frm.take([0, 2], axis=1) + +It is important to note that the ``take`` method on pandas objects are not +intended to work on boolean indices and may return unexpected results. + +.. ipython:: python + + arr = np.random.randn(10) + arr.take([False, False, True, True]) + arr[[0, 1]] + + ser = pd.Series(np.random.randn(10)) + ser.take([False, False, True, True]) + ser.iloc[[0, 1]] + +Finally, as a small note on performance, because the ``take`` method handles +a narrower range of inputs, it can offer performance that is a good deal +faster than fancy indexing. + +.. ipython:: python + + arr = np.random.randn(10000, 5) + indexer = np.arange(10000) + random.shuffle(indexer) + + %timeit arr[indexer] + %timeit arr.take(indexer, axis=0) + +.. ipython:: python + + ser = pd.Series(arr[:, 0]) + %timeit ser.iloc[indexer] + %timeit ser.take(indexer) + +.. _indexing.index_types: + +Index types +----------- + +We have discussed ``MultiIndex`` in the previous sections pretty extensively. +Documentation about ``DatetimeIndex`` and ``PeriodIndex`` are shown :ref:`here `, +and documentation about ``TimedeltaIndex`` is found :ref:`here `. + +In the following sub-sections we will highlight some other index types. + +.. _indexing.categoricalindex: + +CategoricalIndex +~~~~~~~~~~~~~~~~ + +:class:`CategoricalIndex` is a type of index that is useful for supporting +indexing with duplicates. This is a container around a :class:`Categorical` +and allows efficient indexing and storage of an index with a large number of duplicated elements. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + df = pd.DataFrame({'A': np.arange(6), + 'B': list('aabbca')}) + df['B'] = df['B'].astype(CategoricalDtype(list('cab'))) + df + df.dtypes + df['B'].cat.categories + +Setting the index will create a ``CategoricalIndex``. + +.. ipython:: python + + df2 = df.set_index('B') + df2.index + +Indexing with ``__getitem__/.iloc/.loc`` works similarly to an ``Index`` with duplicates. +The indexers **must** be in the category or the operation will raise a ``KeyError``. + +.. ipython:: python + + df2.loc['a'] + +The ``CategoricalIndex`` is **preserved** after indexing: + +.. ipython:: python + + df2.loc['a'].index + +Sorting the index will sort by the order of the categories (recall that we +created the index with ``CategoricalDtype(list('cab'))``, so the sorted +order is ``cab``). + +.. ipython:: python + + df2.sort_index() + +Groupby operations on the index will preserve the index nature as well. + +.. ipython:: python + + df2.groupby(level=0).sum() + df2.groupby(level=0).sum().index + +Reindexing operations will return a resulting index based on the type of the passed +indexer. Passing a list will return a plain-old ``Index``; indexing with +a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories +of the **passed** ``Categorical`` dtype. This allows one to arbitrarily index these even with +values **not** in the categories, similarly to how you can reindex **any** pandas index. + +.. ipython:: python + + df3 = pd.DataFrame({'A': np.arange(3), + 'B': pd.Series(list('abc')).astype('category')}) + df3 = df3.set_index('B') + df3 + +.. ipython:: python + + df3.reindex(['a', 'e']) + df3.reindex(['a', 'e']).index + df3.reindex(pd.Categorical(['a', 'e'], categories=list('abe'))) + df3.reindex(pd.Categorical(['a', 'e'], categories=list('abe'))).index + +.. warning:: + + Reshaping and Comparison operations on a ``CategoricalIndex`` must have the same categories + or a ``TypeError`` will be raised. + + .. ipython:: python + + df4 = pd.DataFrame({'A': np.arange(2), + 'B': list('ba')}) + df4['B'] = df4['B'].astype(CategoricalDtype(list('ab'))) + df4 = df4.set_index('B') + df4.index + + df5 = pd.DataFrame({'A': np.arange(2), + 'B': list('bc')}) + df5['B'] = df5['B'].astype(CategoricalDtype(list('bc'))) + df5 = df5.set_index('B') + df5.index + + .. code-block:: ipython + + In [1]: pd.concat([df4, df5]) + TypeError: categories must match existing categories when appending + +.. _indexing.rangeindex: + +Int64Index and RangeIndex +~~~~~~~~~~~~~~~~~~~~~~~~~ + +:class:`Int64Index` is a fundamental basic index in pandas. This is an immutable array +implementing an ordered, sliceable set. + +:class:`RangeIndex` is a sub-class of ``Int64Index`` that provides the default index for all ``NDFrame`` objects. +``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analogous to Python `range types `__. + +.. _indexing.float64index: + +Float64Index +~~~~~~~~~~~~ + +By default a :class:`Float64Index` will be automatically created when passing floating, or mixed-integer-floating values in index creation. +This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the +same. + +.. ipython:: python + + indexf = pd.Index([1.5, 2, 3, 4.5, 5]) + indexf + sf = pd.Series(range(5), index=indexf) + sf + +Scalar selection for ``[],.loc`` will always be label based. An integer will match an equal float index (e.g. ``3`` is equivalent to ``3.0``). + +.. ipython:: python + + sf[3] + sf[3.0] + sf.loc[3] + sf.loc[3.0] + +The only positional indexing is via ``iloc``. + +.. ipython:: python + + sf.iloc[3] + +A scalar index that is not found will raise a ``KeyError``. +Slicing is primarily on the values of the index when using ``[],ix,loc``, and +**always** positional when using ``iloc``. The exception is when the slice is +boolean, in which case it will always be positional. + +.. ipython:: python + + sf[2:4] + sf.loc[2:4] + sf.iloc[2:4] + +In float indexes, slicing using floats is allowed. + +.. ipython:: python + + sf[2.1:4.6] + sf.loc[2.1:4.6] + +In non-float indexes, slicing using floats will raise a ``TypeError``. + +.. code-block:: ipython + + In [1]: pd.Series(range(5))[3.5] + TypeError: the label [3.5] is not a proper indexer for this index type (Int64Index) + + In [1]: pd.Series(range(5))[3.5:4.5] + TypeError: the slice start [3.5] is not a proper indexer for this index type (Int64Index) + +Here is a typical use-case for using this type of indexing. Imagine that you have a somewhat +irregular timedelta-like indexing scheme, but the data is recorded as floats. This could, for +example, be millisecond offsets. + +.. ipython:: python + + dfir = pd.concat([pd.DataFrame(np.random.randn(5, 2), + index=np.arange(5) * 250.0, + columns=list('AB')), + pd.DataFrame(np.random.randn(6, 2), + index=np.arange(4, 10) * 250.1, + columns=list('AB'))]) + dfir + +Selection operations then will always work on a value basis, for all selection operators. + +.. ipython:: python + + dfir[0:1000.4] + dfir.loc[0:1001, 'A'] + dfir.loc[1000.4] + +You could retrieve the first 1 second (1000 ms) of data as such: + +.. ipython:: python + + dfir[0:1000] + +If you need integer based selection, you should use ``iloc``: + +.. ipython:: python + + dfir.iloc[0:5] + +.. _advanced.intervalindex: + +IntervalIndex +~~~~~~~~~~~~~ + +:class:`IntervalIndex` together with its own dtype, :class:`~pandas.api.types.IntervalDtype` +as well as the :class:`Interval` scalar type, allow first-class support in pandas +for interval notation. + +The ``IntervalIndex`` allows some unique indexing and is also used as a +return type for the categories in :func:`cut` and :func:`qcut`. + +Indexing with an ``IntervalIndex`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +An ``IntervalIndex`` can be used in ``Series`` and in ``DataFrame`` as the index. + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3, 4]}, + index=pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])) + df + +Label based indexing via ``.loc`` along the edges of an interval works as you would expect, +selecting that particular interval. + +.. ipython:: python + + df.loc[2] + df.loc[[2, 3]] + +If you select a label *contained* within an interval, this will also select the interval. + +.. ipython:: python + + df.loc[2.5] + df.loc[[2.5, 3.5]] + +Selecting using an ``Interval`` will only return exact matches (starting from pandas 0.25.0). + +.. ipython:: python + + df.loc[pd.Interval(1, 2)] + +Trying to select an ``Interval`` that is not exactly contained in the ``IntervalIndex`` will raise a ``KeyError``. + +.. code-block:: python + + In [7]: df.loc[pd.Interval(0.5, 2.5)] + --------------------------------------------------------------------------- + KeyError: Interval(0.5, 2.5, closed='right') + +Selecting all ``Intervals`` that overlap a given ``Interval`` can be performed using the +:meth:`~IntervalIndex.overlaps` method to create a boolean indexer. + +.. ipython:: python + + idxr = df.index.overlaps(pd.Interval(0.5, 2.5)) + idxr + df[idxr] + +Binning data with ``cut`` and ``qcut`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`cut` and :func:`qcut` both return a ``Categorical`` object, and the bins they +create are stored as an ``IntervalIndex`` in its ``.categories`` attribute. + +.. ipython:: python + + c = pd.cut(range(4), bins=2) + c + c.categories + +:func:`cut` also accepts an ``IntervalIndex`` for its ``bins`` argument, which enables +a useful pandas idiom. First, We call :func:`cut` with some data and ``bins`` set to a +fixed number, to generate the bins. Then, we pass the values of ``.categories`` as the +``bins`` argument in subsequent calls to :func:`cut`, supplying new data which will be +binned into the same bins. + +.. ipython:: python + + pd.cut([0, 3, 5, 1], bins=c.categories) + +Any value which falls outside all bins will be assigned a ``NaN`` value. + +Generating ranges of intervals +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If we need intervals on a regular frequency, we can use the :func:`interval_range` function +to create an ``IntervalIndex`` using various combinations of ``start``, ``end``, and ``periods``. +The default frequency for ``interval_range`` is a 1 for numeric intervals, and calendar day for +datetime-like intervals: + +.. ipython:: python + + pd.interval_range(start=0, end=5) + + pd.interval_range(start=pd.Timestamp('2017-01-01'), periods=4) + + pd.interval_range(end=pd.Timedelta('3 days'), periods=3) + +The ``freq`` parameter can used to specify non-default frequencies, and can utilize a variety +of :ref:`frequency aliases ` with datetime-like intervals: + +.. ipython:: python + + pd.interval_range(start=0, periods=5, freq=1.5) + + pd.interval_range(start=pd.Timestamp('2017-01-01'), periods=4, freq='W') + + pd.interval_range(start=pd.Timedelta('0 days'), periods=3, freq='9H') + +Additionally, the ``closed`` parameter can be used to specify which side(s) the intervals +are closed on. Intervals are closed on the right side by default. + +.. ipython:: python + + pd.interval_range(start=0, end=4, closed='both') + + pd.interval_range(start=0, end=4, closed='neither') + +.. versionadded:: 0.23.0 + +Specifying ``start``, ``end``, and ``periods`` will generate a range of evenly spaced +intervals from ``start`` to ``end`` inclusively, with ``periods`` number of elements +in the resulting ``IntervalIndex``: + +.. ipython:: python + + pd.interval_range(start=0, end=6, periods=4) + + pd.interval_range(pd.Timestamp('2018-01-01'), + pd.Timestamp('2018-02-28'), periods=3) + +Miscellaneous indexing FAQ +-------------------------- + +Integer indexing +~~~~~~~~~~~~~~~~ + +Label-based indexing with integer axis labels is a thorny topic. It has been +discussed heavily on mailing lists and among various members of the scientific +Python community. In pandas, our general viewpoint is that labels matter more +than integer locations. Therefore, with an integer axis index *only* +label-based indexing is possible with the standard tools like ``.loc``. The +following code will generate exceptions: + +.. ipython:: python + :okexcept: + + s = pd.Series(range(5)) + s[-1] + df = pd.DataFrame(np.random.randn(5, 4)) + df + df.loc[-2:] + +This deliberate decision was made to prevent ambiguities and subtle bugs (many +users reported finding bugs when the API change was made to stop "falling back" +on position-based indexing). + +Non-monotonic indexes require exact matches +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If the index of a ``Series`` or ``DataFrame`` is monotonically increasing or decreasing, then the bounds +of a label-based slice can be outside the range of the index, much like slice indexing a +normal Python ``list``. Monotonicity of an index can be tested with the :meth:`~Index.is_monotonic_increasing` and +:meth:`~Index.is_monotonic_decreasing` attributes. + +.. ipython:: python + + df = pd.DataFrame(index=[2, 3, 3, 4, 5], columns=['data'], data=list(range(5))) + df.index.is_monotonic_increasing + + # no rows 0 or 1, but still returns rows 2, 3 (both of them), and 4: + df.loc[0:4, :] + + # slice is are outside the index, so empty DataFrame is returned + df.loc[13:15, :] + +On the other hand, if the index is not monotonic, then both slice bounds must be +*unique* members of the index. + +.. ipython:: python + + df = pd.DataFrame(index=[2, 3, 1, 4, 3, 5], + columns=['data'], data=list(range(6))) + df.index.is_monotonic_increasing + + # OK because 2 and 4 are in the index + df.loc[2:4, :] + +.. code-block:: ipython + + # 0 is not in the index + In [9]: df.loc[0:4, :] + KeyError: 0 + + # 3 is not a unique label + In [11]: df.loc[2:3, :] + KeyError: 'Cannot get right slice bound for non-unique label: 3' + +``Index.is_monotonic_increasing`` and ``Index.is_monotonic_decreasing`` only check that +an index is weakly monotonic. To check for strict monotonicity, you can combine one of those with +the :meth:`~Index.is_unique` attribute. + +.. ipython:: python + + weakly_monotonic = pd.Index(['a', 'b', 'c', 'c']) + weakly_monotonic + weakly_monotonic.is_monotonic_increasing + weakly_monotonic.is_monotonic_increasing & weakly_monotonic.is_unique + +.. _advanced.endpoints_are_inclusive: + +Endpoints are inclusive +~~~~~~~~~~~~~~~~~~~~~~~ + +Compared with standard Python sequence slicing in which the slice endpoint is +not inclusive, label-based slicing in pandas **is inclusive**. The primary +reason for this is that it is often not possible to easily determine the +"successor" or next element after a particular label in an index. For example, +consider the following ``Series``: + +.. ipython:: python + + s = pd.Series(np.random.randn(6), index=list('abcdef')) + s + +Suppose we wished to slice from ``c`` to ``e``, using integers this would be +accomplished as such: + +.. ipython:: python + + s[2:5] + +However, if you only had ``c`` and ``e``, determining the next element in the +index can be somewhat complicated. For example, the following does not work: + +:: + + s.loc['c':'e' + 1] + +A very common use case is to limit a time series to start and end at two +specific dates. To enable this, we made the design choice to make label-based +slicing include both endpoints: + +.. ipython:: python + + s.loc['c':'e'] + +This is most definitely a "practicality beats purity" sort of thing, but it is +something to watch out for if you expect label-based slicing to behave exactly +in the way that standard Python integer slicing works. + + +Indexing potentially changes underlying Series dtype +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The different indexing operation can potentially change the dtype of a ``Series``. + +.. ipython:: python + + series1 = pd.Series([1, 2, 3]) + series1.dtype + res = series1.reindex([0, 4]) + res.dtype + res + +.. ipython:: python + + series2 = pd.Series([True]) + series2.dtype + res = series2.reindex_like(series1) + res.dtype + res + +This is because the (re)indexing operations above silently inserts ``NaNs`` and the ``dtype`` +changes accordingly. This can cause some issues when using ``numpy`` ``ufuncs`` +such as ``numpy.logical_and``. + +See the `this old issue `__ for a more +detailed discussion. diff --git a/doc/source/user_guide/boolean.rst b/doc/source/user_guide/boolean.rst new file mode 100644 index 00000000..95f19314 --- /dev/null +++ b/doc/source/user_guide/boolean.rst @@ -0,0 +1,102 @@ +.. currentmodule:: pandas + +.. ipython:: python + :suppress: + + import pandas as pd + import numpy as np + +.. _boolean: + +************************** +Nullable Boolean Data Type +************************** + +.. versionadded:: 1.0.0 + + +.. _boolean.indexing: + +Indexing with NA values +----------------------- + +pandas allows indexing with ``NA`` values in a boolean array, which are treated as ``False``. + +.. versionchanged:: 1.0.2 + +.. ipython:: python + :okexcept: + + s = pd.Series([1, 2, 3]) + mask = pd.array([True, False, pd.NA], dtype="boolean") + s[mask] + +If you would prefer to keep the ``NA`` values you can manually fill them with ``fillna(True)``. + +.. ipython:: python + + s[mask.fillna(True)] + +.. _boolean.kleene: + +Kleene Logical Operations +------------------------- + +:class:`arrays.BooleanArray` implements `Kleene Logic`_ (sometimes called three-value logic) for +logical operations like ``&`` (and), ``|`` (or) and ``^`` (exclusive-or). + +This table demonstrates the results for every combination. These operations are symmetrical, +so flipping the left- and right-hand side makes no difference in the result. + +================= ========= +Expression Result +================= ========= +``True & True`` ``True`` +``True & False`` ``False`` +``True & NA`` ``NA`` +``False & False`` ``False`` +``False & NA`` ``False`` +``NA & NA`` ``NA`` +``True | True`` ``True`` +``True | False`` ``True`` +``True | NA`` ``True`` +``False | False`` ``False`` +``False | NA`` ``NA`` +``NA | NA`` ``NA`` +``True ^ True`` ``False`` +``True ^ False`` ``True`` +``True ^ NA`` ``NA`` +``False ^ False`` ``False`` +``False ^ NA`` ``NA`` +``NA ^ NA`` ``NA`` +================= ========= + +When an ``NA`` is present in an operation, the output value is ``NA`` only if +the result cannot be determined solely based on the other input. For example, +``True | NA`` is ``True``, because both ``True | True`` and ``True | False`` +are ``True``. In that case, we don't actually need to consider the value +of the ``NA``. + +On the other hand, ``True & NA`` is ``NA``. The result depends on whether +the ``NA`` really is ``True`` or ``False``, since ``True & True`` is ``True``, +but ``True & False`` is ``False``, so we can't determine the output. + + +This differs from how ``np.nan`` behaves in logical operations. Pandas treated +``np.nan`` is *always false in the output*. + +In ``or`` + +.. ipython:: python + + pd.Series([True, False, np.nan], dtype="object") | True + pd.Series([True, False, np.nan], dtype="boolean") | True + +In ``and`` + +.. ipython:: python + + pd.Series([True, False, np.nan], dtype="object") & True + pd.Series([True, False, np.nan], dtype="boolean") & True + +.. _Kleene Logic: https://en.wikipedia.org/wiki/Three-valued_logic#Kleene_and_Priest_logics diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst new file mode 100644 index 00000000..a55326db --- /dev/null +++ b/doc/source/user_guide/categorical.rst @@ -0,0 +1,1179 @@ +.. _categorical: + +{{ header }} + +**************** +Categorical data +**************** + +This is an introduction to pandas categorical data type, including a short comparison +with R's ``factor``. + +`Categoricals` are a pandas data type corresponding to categorical variables in +statistics. A categorical variable takes on a limited, and usually fixed, +number of possible values (`categories`; `levels` in R). Examples are gender, +social class, blood type, country affiliation, observation time or rating via +Likert scales. + +In contrast to statistical categorical variables, categorical data might have an order (e.g. +'strongly agree' vs 'agree' or 'first observation' vs. 'second observation'), but numerical +operations (additions, divisions, ...) are not possible. + +All values of categorical data are either in `categories` or `np.nan`. Order is defined by +the order of `categories`, not lexical order of the values. Internally, the data structure +consists of a `categories` array and an integer array of `codes` which point to the real value in +the `categories` array. + +The categorical data type is useful in the following cases: + +* A string variable consisting of only a few different values. Converting such a string + variable to a categorical variable will save some memory, see :ref:`here `. +* The lexical order of a variable is not the same as the logical order ("one", "two", "three"). + By converting to a categorical and specifying an order on the categories, sorting and + min/max will use the logical order instead of the lexical order, see :ref:`here `. +* As a signal to other Python libraries that this column should be treated as a categorical + variable (e.g. to use suitable statistical methods or plot types). + +See also the :ref:`API docs on categoricals`. + +.. _categorical.objectcreation: + +Object creation +--------------- + +Series creation +~~~~~~~~~~~~~~~ + +Categorical ``Series`` or columns in a ``DataFrame`` can be created in several ways: + +By specifying ``dtype="category"`` when constructing a ``Series``: + +.. ipython:: python + + s = pd.Series(["a", "b", "c", "a"], dtype="category") + s + +By converting an existing ``Series`` or column to a ``category`` dtype: + +.. ipython:: python + + df = pd.DataFrame({"A": ["a", "b", "c", "a"]}) + df["B"] = df["A"].astype('category') + df + +By using special functions, such as :func:`~pandas.cut`, which groups data into +discrete bins. See the :ref:`example on tiling ` in the docs. + +.. ipython:: python + + df = pd.DataFrame({'value': np.random.randint(0, 100, 20)}) + labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] + + df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, labels=labels) + df.head(10) + +By passing a :class:`pandas.Categorical` object to a ``Series`` or assigning it to a ``DataFrame``. + +.. ipython:: python + + raw_cat = pd.Categorical(["a", "b", "c", "a"], categories=["b", "c", "d"], + ordered=False) + s = pd.Series(raw_cat) + s + df = pd.DataFrame({"A": ["a", "b", "c", "a"]}) + df["B"] = raw_cat + df + +Categorical data has a specific ``category`` :ref:`dtype `: + +.. ipython:: python + + df.dtypes + +DataFrame creation +~~~~~~~~~~~~~~~~~~ + +Similar to the previous section where a single column was converted to categorical, all columns in a +``DataFrame`` can be batch converted to categorical either during or after construction. + +This can be done during construction by specifying ``dtype="category"`` in the ``DataFrame`` constructor: + +.. ipython:: python + + df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}, dtype="category") + df.dtypes + +Note that the categories present in each column differ; the conversion is done column by column, so +only labels present in a given column are categories: + +.. ipython:: python + + df['A'] + df['B'] + + +.. versionadded:: 0.23.0 + +Analogously, all columns in an existing ``DataFrame`` can be batch converted using :meth:`DataFrame.astype`: + +.. ipython:: python + + df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) + df_cat = df.astype('category') + df_cat.dtypes + +This conversion is likewise done column by column: + +.. ipython:: python + + df_cat['A'] + df_cat['B'] + + +Controlling behavior +~~~~~~~~~~~~~~~~~~~~ + +In the examples above where we passed ``dtype='category'``, we used the default +behavior: + +1. Categories are inferred from the data. +2. Categories are unordered. + +To control those behaviors, instead of passing ``'category'``, use an instance +of :class:`~pandas.api.types.CategoricalDtype`. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + s = pd.Series(["a", "b", "c", "a"]) + cat_type = CategoricalDtype(categories=["b", "c", "d"], + ordered=True) + s_cat = s.astype(cat_type) + s_cat + +Similarly, a ``CategoricalDtype`` can be used with a ``DataFrame`` to ensure that categories +are consistent among all columns. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) + cat_type = CategoricalDtype(categories=list('abcd'), + ordered=True) + df_cat = df.astype(cat_type) + df_cat['A'] + df_cat['B'] + +.. note:: + + To perform table-wise conversion, where all labels in the entire ``DataFrame`` are used as + categories for each column, the ``categories`` parameter can be determined programmatically by + ``categories = pd.unique(df.to_numpy().ravel())``. + +If you already have ``codes`` and ``categories``, you can use the +:func:`~pandas.Categorical.from_codes` constructor to save the factorize step +during normal constructor mode: + +.. ipython:: python + + splitter = np.random.choice([0, 1], 5, p=[0.5, 0.5]) + s = pd.Series(pd.Categorical.from_codes(splitter, + categories=["train", "test"])) + + +Regaining original data +~~~~~~~~~~~~~~~~~~~~~~~ + +To get back to the original ``Series`` or NumPy array, use +``Series.astype(original_dtype)`` or ``np.asarray(categorical)``: + +.. ipython:: python + + s = pd.Series(["a", "b", "c", "a"]) + s + s2 = s.astype('category') + s2 + s2.astype(str) + np.asarray(s2) + +.. note:: + + In contrast to R's `factor` function, categorical data is not converting input values to + strings; categories will end up the same data type as the original values. + +.. note:: + + In contrast to R's `factor` function, there is currently no way to assign/change labels at + creation time. Use `categories` to change the categories after creation time. + +.. _categorical.categoricaldtype: + +CategoricalDtype +---------------- + +.. versionchanged:: 0.21.0 + +A categorical's type is fully described by + +1. ``categories``: a sequence of unique values and no missing values +2. ``ordered``: a boolean + +This information can be stored in a :class:`~pandas.api.types.CategoricalDtype`. +The ``categories`` argument is optional, which implies that the actual categories +should be inferred from whatever is present in the data when the +:class:`pandas.Categorical` is created. The categories are assumed to be unordered +by default. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + CategoricalDtype(['a', 'b', 'c']) + CategoricalDtype(['a', 'b', 'c'], ordered=True) + CategoricalDtype() + +A :class:`~pandas.api.types.CategoricalDtype` can be used in any place pandas +expects a `dtype`. For example :func:`pandas.read_csv`, +:func:`pandas.DataFrame.astype`, or in the ``Series`` constructor. + +.. note:: + + As a convenience, you can use the string ``'category'`` in place of a + :class:`~pandas.api.types.CategoricalDtype` when you want the default behavior of + the categories being unordered, and equal to the set values present in the + array. In other words, ``dtype='category'`` is equivalent to + ``dtype=CategoricalDtype()``. + +Equality semantics +~~~~~~~~~~~~~~~~~~ + +Two instances of :class:`~pandas.api.types.CategoricalDtype` compare equal +whenever they have the same categories and order. When comparing two +unordered categoricals, the order of the ``categories`` is not considered. + +.. ipython:: python + + c1 = CategoricalDtype(['a', 'b', 'c'], ordered=False) + + # Equal, since order is not considered when ordered=False + c1 == CategoricalDtype(['b', 'c', 'a'], ordered=False) + + # Unequal, since the second CategoricalDtype is ordered + c1 == CategoricalDtype(['a', 'b', 'c'], ordered=True) + +All instances of ``CategoricalDtype`` compare equal to the string ``'category'``. + +.. ipython:: python + + c1 == 'category' + +.. warning:: + + Since ``dtype='category'`` is essentially ``CategoricalDtype(None, False)``, + and since all instances ``CategoricalDtype`` compare equal to ``'category'``, + all instances of ``CategoricalDtype`` compare equal to a + ``CategoricalDtype(None, False)``, regardless of ``categories`` or + ``ordered``. + +Description +----------- + +Using :meth:`~DataFrame.describe` on categorical data will produce similar +output to a ``Series`` or ``DataFrame`` of type ``string``. + +.. ipython:: python + + cat = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c"]) + df = pd.DataFrame({"cat": cat, "s": ["a", "c", "c", np.nan]}) + df.describe() + df["cat"].describe() + +.. _categorical.cat: + +Working with categories +----------------------- + +Categorical data has a `categories` and a `ordered` property, which list their +possible values and whether the ordering matters or not. These properties are +exposed as ``s.cat.categories`` and ``s.cat.ordered``. If you don't manually +specify categories and ordering, they are inferred from the passed arguments. + +.. ipython:: python + + s = pd.Series(["a", "b", "c", "a"], dtype="category") + s.cat.categories + s.cat.ordered + +It's also possible to pass in the categories in a specific order: + +.. ipython:: python + + s = pd.Series(pd.Categorical(["a", "b", "c", "a"], + categories=["c", "b", "a"])) + s.cat.categories + s.cat.ordered + +.. note:: + + New categorical data are **not** automatically ordered. You must explicitly + pass ``ordered=True`` to indicate an ordered ``Categorical``. + + +.. note:: + + The result of :meth:`~Series.unique` is not always the same as ``Series.cat.categories``, + because ``Series.unique()`` has a couple of guarantees, namely that it returns categories + in the order of appearance, and it only includes values that are actually present. + + .. ipython:: python + + s = pd.Series(list('babc')).astype(CategoricalDtype(list('abcd'))) + s + + # categories + s.cat.categories + + # uniques + s.unique() + +Renaming categories +~~~~~~~~~~~~~~~~~~~ + +Renaming categories is done by assigning new values to the +``Series.cat.categories`` property or by using the +:meth:`~pandas.Categorical.rename_categories` method: + + +.. ipython:: python + + s = pd.Series(["a", "b", "c", "a"], dtype="category") + s + s.cat.categories = ["Group %s" % g for g in s.cat.categories] + s + s = s.cat.rename_categories([1, 2, 3]) + s + # You can also pass a dict-like object to map the renaming + s = s.cat.rename_categories({1: 'x', 2: 'y', 3: 'z'}) + s + +.. note:: + + In contrast to R's `factor`, categorical data can have categories of other types than string. + +.. note:: + + Be aware that assigning new categories is an inplace operation, while most other operations + under ``Series.cat`` per default return a new ``Series`` of dtype `category`. + +Categories must be unique or a `ValueError` is raised: + +.. ipython:: python + + try: + s.cat.categories = [1, 1, 1] + except ValueError as e: + print("ValueError:", str(e)) + +Categories must also not be ``NaN`` or a `ValueError` is raised: + +.. ipython:: python + + try: + s.cat.categories = [1, 2, np.nan] + except ValueError as e: + print("ValueError:", str(e)) + +Appending new categories +~~~~~~~~~~~~~~~~~~~~~~~~ + +Appending categories can be done by using the +:meth:`~pandas.Categorical.add_categories` method: + +.. ipython:: python + + s = s.cat.add_categories([4]) + s.cat.categories + s + +Removing categories +~~~~~~~~~~~~~~~~~~~ + +Removing categories can be done by using the +:meth:`~pandas.Categorical.remove_categories` method. Values which are removed +are replaced by ``np.nan``.: + +.. ipython:: python + + s = s.cat.remove_categories([4]) + s + +Removing unused categories +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Removing unused categories can also be done: + +.. ipython:: python + + s = pd.Series(pd.Categorical(["a", "b", "a"], + categories=["a", "b", "c", "d"])) + s + s.cat.remove_unused_categories() + +Setting categories +~~~~~~~~~~~~~~~~~~ + +If you want to do remove and add new categories in one step (which has some +speed advantage), or simply set the categories to a predefined scale, +use :meth:`~pandas.Categorical.set_categories`. + + +.. ipython:: python + + s = pd.Series(["one", "two", "four", "-"], dtype="category") + s + s = s.cat.set_categories(["one", "two", "three", "four"]) + s + +.. note:: + Be aware that :func:`Categorical.set_categories` cannot know whether some category is omitted + intentionally or because it is misspelled or (under Python3) due to a type difference (e.g., + NumPy S1 dtype and Python strings). This can result in surprising behaviour! + +Sorting and order +----------------- + +.. _categorical.sort: + +If categorical data is ordered (``s.cat.ordered == True``), then the order of the categories has a +meaning and certain operations are possible. If the categorical is unordered, ``.min()/.max()`` will raise a ``TypeError``. + +.. ipython:: python + + s = pd.Series(pd.Categorical(["a", "b", "c", "a"], ordered=False)) + s.sort_values(inplace=True) + s = pd.Series(["a", "b", "c", "a"]).astype( + CategoricalDtype(ordered=True) + ) + s.sort_values(inplace=True) + s + s.min(), s.max() + +You can set categorical data to be ordered by using ``as_ordered()`` or unordered by using ``as_unordered()``. These will by +default return a *new* object. + +.. ipython:: python + + s.cat.as_ordered() + s.cat.as_unordered() + +Sorting will use the order defined by categories, not any lexical order present on the data type. +This is even true for strings and numeric data: + +.. ipython:: python + + s = pd.Series([1, 2, 3, 1], dtype="category") + s = s.cat.set_categories([2, 3, 1], ordered=True) + s + s.sort_values(inplace=True) + s + s.min(), s.max() + + +Reordering +~~~~~~~~~~ + +Reordering the categories is possible via the :meth:`Categorical.reorder_categories` and +the :meth:`Categorical.set_categories` methods. For :meth:`Categorical.reorder_categories`, all +old categories must be included in the new categories and no new categories are allowed. This will +necessarily make the sort order the same as the categories order. + +.. ipython:: python + + s = pd.Series([1, 2, 3, 1], dtype="category") + s = s.cat.reorder_categories([2, 3, 1], ordered=True) + s + s.sort_values(inplace=True) + s + s.min(), s.max() + +.. note:: + + Note the difference between assigning new categories and reordering the categories: the first + renames categories and therefore the individual values in the ``Series``, but if the first + position was sorted last, the renamed value will still be sorted last. Reordering means that the + way values are sorted is different afterwards, but not that individual values in the + ``Series`` are changed. + +.. note:: + + If the ``Categorical`` is not ordered, :meth:`Series.min` and :meth:`Series.max` will raise + ``TypeError``. Numeric operations like ``+``, ``-``, ``*``, ``/`` and operations based on them + (e.g. :meth:`Series.median`, which would need to compute the mean between two values if the length + of an array is even) do not work and raise a ``TypeError``. + +Multi column sorting +~~~~~~~~~~~~~~~~~~~~ + +A categorical dtyped column will participate in a multi-column sort in a similar manner to other columns. +The ordering of the categorical is determined by the ``categories`` of that column. + +.. ipython:: python + + dfs = pd.DataFrame({'A': pd.Categorical(list('bbeebbaa'), + categories=['e', 'a', 'b'], + ordered=True), + 'B': [1, 2, 1, 2, 2, 1, 2, 1]}) + dfs.sort_values(by=['A', 'B']) + +Reordering the ``categories`` changes a future sort. + +.. ipython:: python + + dfs['A'] = dfs['A'].cat.reorder_categories(['a', 'b', 'e']) + dfs.sort_values(by=['A', 'B']) + +Comparisons +----------- + +Comparing categorical data with other objects is possible in three cases: + +* Comparing equality (``==`` and ``!=``) to a list-like object (list, Series, array, + ...) of the same length as the categorical data. +* All comparisons (``==``, ``!=``, ``>``, ``>=``, ``<``, and ``<=``) of categorical data to + another categorical Series, when ``ordered==True`` and the `categories` are the same. +* All comparisons of a categorical data to a scalar. + +All other comparisons, especially "non-equality" comparisons of two categoricals with different +categories or a categorical with any list-like object, will raise a ``TypeError``. + +.. note:: + + Any "non-equality" comparisons of categorical data with a ``Series``, ``np.array``, ``list`` or + categorical data with different categories or ordering will raise a ``TypeError`` because custom + categories ordering could be interpreted in two ways: one with taking into account the + ordering and one without. + +.. ipython:: python + + cat = pd.Series([1, 2, 3]).astype( + CategoricalDtype([3, 2, 1], ordered=True) + ) + cat_base = pd.Series([2, 2, 2]).astype( + CategoricalDtype([3, 2, 1], ordered=True) + ) + cat_base2 = pd.Series([2, 2, 2]).astype( + CategoricalDtype(ordered=True) + ) + + cat + cat_base + cat_base2 + +Comparing to a categorical with the same categories and ordering or to a scalar works: + +.. ipython:: python + + cat > cat_base + cat > 2 + +Equality comparisons work with any list-like object of same length and scalars: + +.. ipython:: python + + cat == cat_base + cat == np.array([1, 2, 3]) + cat == 2 + +This doesn't work because the categories are not the same: + +.. ipython:: python + + try: + cat > cat_base2 + except TypeError as e: + print("TypeError:", str(e)) + +If you want to do a "non-equality" comparison of a categorical series with a list-like object +which is not categorical data, you need to be explicit and convert the categorical data back to +the original values: + +.. ipython:: python + + base = np.array([1, 2, 3]) + + try: + cat > base + except TypeError as e: + print("TypeError:", str(e)) + + np.asarray(cat) > base + +When you compare two unordered categoricals with the same categories, the order is not considered: + +.. ipython:: python + + c1 = pd.Categorical(['a', 'b'], categories=['a', 'b'], ordered=False) + c2 = pd.Categorical(['a', 'b'], categories=['b', 'a'], ordered=False) + c1 == c2 + +Operations +---------- + +Apart from :meth:`Series.min`, :meth:`Series.max` and :meth:`Series.mode`, the +following operations are possible with categorical data: + +``Series`` methods like :meth:`Series.value_counts` will use all categories, +even if some categories are not present in the data: + +.. ipython:: python + + s = pd.Series(pd.Categorical(["a", "b", "c", "c"], + categories=["c", "a", "b", "d"])) + s.value_counts() + +Groupby will also show "unused" categories: + +.. ipython:: python + + cats = pd.Categorical(["a", "b", "b", "b", "c", "c", "c"], + categories=["a", "b", "c", "d"]) + df = pd.DataFrame({"cats": cats, "values": [1, 2, 2, 2, 3, 4, 5]}) + df.groupby("cats").mean() + + cats2 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"]) + df2 = pd.DataFrame({"cats": cats2, + "B": ["c", "d", "c", "d"], + "values": [1, 2, 3, 4]}) + df2.groupby(["cats", "B"]).mean() + + +Pivot tables: + +.. ipython:: python + + raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"]) + df = pd.DataFrame({"A": raw_cat, + "B": ["c", "d", "c", "d"], + "values": [1, 2, 3, 4]}) + pd.pivot_table(df, values='values', index=['A', 'B']) + +Data munging +------------ + +The optimized pandas data access methods ``.loc``, ``.iloc``, ``.at``, and ``.iat``, +work as normal. The only difference is the return type (for getting) and +that only values already in `categories` can be assigned. + +Getting +~~~~~~~ + +If the slicing operation returns either a ``DataFrame`` or a column of type +``Series``, the ``category`` dtype is preserved. + +.. ipython:: python + + idx = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + cats = pd.Series(["a", "b", "b", "b", "c", "c", "c"], + dtype="category", index=idx) + values = [1, 2, 2, 2, 3, 4, 5] + df = pd.DataFrame({"cats": cats, "values": values}, index=idx) + df.iloc[2:4, :] + df.iloc[2:4, :].dtypes + df.loc["h":"j", "cats"] + df[df["cats"] == "b"] + +An example where the category type is not preserved is if you take one single +row: the resulting ``Series`` is of dtype ``object``: + +.. ipython:: python + + # get the complete "h" row as a Series + df.loc["h", :] + +Returning a single item from categorical data will also return the value, not a categorical +of length "1". + +.. ipython:: python + + df.iat[0, 0] + df["cats"].cat.categories = ["x", "y", "z"] + df.at["h", "cats"] # returns a string + +.. note:: + The is in contrast to R's `factor` function, where ``factor(c(1,2,3))[1]`` + returns a single value `factor`. + +To get a single value ``Series`` of type ``category``, you pass in a list with +a single value: + +.. ipython:: python + + df.loc[["h"], "cats"] + +String and datetime accessors +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The accessors ``.dt`` and ``.str`` will work if the ``s.cat.categories`` are of +an appropriate type: + + +.. ipython:: python + + str_s = pd.Series(list('aabb')) + str_cat = str_s.astype('category') + str_cat + str_cat.str.contains("a") + + date_s = pd.Series(pd.date_range('1/1/2015', periods=5)) + date_cat = date_s.astype('category') + date_cat + date_cat.dt.day + +.. note:: + + The returned ``Series`` (or ``DataFrame``) is of the same type as if you used the + ``.str.`` / ``.dt.`` on a ``Series`` of that type (and not of + type ``category``!). + +That means, that the returned values from methods and properties on the accessors of a +``Series`` and the returned values from methods and properties on the accessors of this +``Series`` transformed to one of type `category` will be equal: + +.. ipython:: python + + ret_s = str_s.str.contains("a") + ret_cat = str_cat.str.contains("a") + ret_s.dtype == ret_cat.dtype + ret_s == ret_cat + +.. note:: + + The work is done on the ``categories`` and then a new ``Series`` is constructed. This has + some performance implication if you have a ``Series`` of type string, where lots of elements + are repeated (i.e. the number of unique elements in the ``Series`` is a lot smaller than the + length of the ``Series``). In this case it can be faster to convert the original ``Series`` + to one of type ``category`` and use ``.str.`` or ``.dt.`` on that. + +Setting +~~~~~~~ + +Setting values in a categorical column (or ``Series``) works as long as the +value is included in the `categories`: + +.. ipython:: python + + idx = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + cats = pd.Categorical(["a", "a", "a", "a", "a", "a", "a"], + categories=["a", "b"]) + values = [1, 1, 1, 1, 1, 1, 1] + df = pd.DataFrame({"cats": cats, "values": values}, index=idx) + + df.iloc[2:4, :] = [["b", 2], ["b", 2]] + df + try: + df.iloc[2:4, :] = [["c", 3], ["c", 3]] + except ValueError as e: + print("ValueError:", str(e)) + +Setting values by assigning categorical data will also check that the `categories` match: + +.. ipython:: python + + df.loc["j":"k", "cats"] = pd.Categorical(["a", "a"], categories=["a", "b"]) + df + try: + df.loc["j":"k", "cats"] = pd.Categorical(["b", "b"], + categories=["a", "b", "c"]) + except ValueError as e: + print("ValueError:", str(e)) + +Assigning a ``Categorical`` to parts of a column of other types will use the values: + +.. ipython:: python + + df = pd.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["a", "a", "a", "a", "a"]}) + df.loc[1:2, "a"] = pd.Categorical(["b", "b"], categories=["a", "b"]) + df.loc[2:3, "b"] = pd.Categorical(["b", "b"], categories=["a", "b"]) + df + df.dtypes + +.. _categorical.merge: +.. _categorical.concat: + +Merging / Concatenation +~~~~~~~~~~~~~~~~~~~~~~~ + +By default, combining ``Series`` or ``DataFrames`` which contain the same +categories results in ``category`` dtype, otherwise results will depend on the +dtype of the underlying categories. Merges that result in non-categorical +dtypes will likely have higher memory usage. Use ``.astype`` or +``union_categoricals`` to ensure ``category`` results. + +.. ipython:: python + + from pandas.api.types import union_categoricals + + # same categories + s1 = pd.Series(['a', 'b'], dtype='category') + s2 = pd.Series(['a', 'b', 'a'], dtype='category') + pd.concat([s1, s2]) + + # different categories + s3 = pd.Series(['b', 'c'], dtype='category') + pd.concat([s1, s3]) + + # Output dtype is inferred based on categories values + int_cats = pd.Series([1, 2], dtype="category") + float_cats = pd.Series([3.0, 4.0], dtype="category") + pd.concat([int_cats, float_cats]) + + pd.concat([s1, s3]).astype('category') + union_categoricals([s1.array, s3.array]) + +The following table summarizes the results of merging ``Categoricals``: + ++-------------------+------------------------+----------------------+-----------------------------+ +| arg1 | arg2 | identical | result | ++===================+========================+======================+=============================+ +| category | category | True | category | ++-------------------+------------------------+----------------------+-----------------------------+ +| category (object) | category (object) | False | object (dtype is inferred) | ++-------------------+------------------------+----------------------+-----------------------------+ +| category (int) | category (float) | False | float (dtype is inferred) | ++-------------------+------------------------+----------------------+-----------------------------+ + +See also the section on :ref:`merge dtypes` for notes about +preserving merge dtypes and performance. + +.. _categorical.union: + +Unioning +~~~~~~~~ + +If you want to combine categoricals that do not necessarily have the same +categories, the :func:`~pandas.api.types.union_categoricals` function will +combine a list-like of categoricals. The new categories will be the union of +the categories being combined. + +.. ipython:: python + + from pandas.api.types import union_categoricals + a = pd.Categorical(["b", "c"]) + b = pd.Categorical(["a", "b"]) + union_categoricals([a, b]) + +By default, the resulting categories will be ordered as +they appear in the data. If you want the categories to +be lexsorted, use ``sort_categories=True`` argument. + +.. ipython:: python + + union_categoricals([a, b], sort_categories=True) + +``union_categoricals`` also works with the "easy" case of combining two +categoricals of the same categories and order information +(e.g. what you could also ``append`` for). + +.. ipython:: python + + a = pd.Categorical(["a", "b"], ordered=True) + b = pd.Categorical(["a", "b", "a"], ordered=True) + union_categoricals([a, b]) + +The below raises ``TypeError`` because the categories are ordered and not identical. + +.. code-block:: ipython + + In [1]: a = pd.Categorical(["a", "b"], ordered=True) + In [2]: b = pd.Categorical(["a", "b", "c"], ordered=True) + In [3]: union_categoricals([a, b]) + Out[3]: + TypeError: to union ordered Categoricals, all categories must be the same + +Ordered categoricals with different categories or orderings can be combined by +using the ``ignore_ordered=True`` argument. + +.. ipython:: python + + a = pd.Categorical(["a", "b", "c"], ordered=True) + b = pd.Categorical(["c", "b", "a"], ordered=True) + union_categoricals([a, b], ignore_order=True) + +:func:`~pandas.api.types.union_categoricals` also works with a +``CategoricalIndex``, or ``Series`` containing categorical data, but note that +the resulting array will always be a plain ``Categorical``: + +.. ipython:: python + + a = pd.Series(["b", "c"], dtype='category') + b = pd.Series(["a", "b"], dtype='category') + union_categoricals([a, b]) + +.. note:: + + ``union_categoricals`` may recode the integer codes for categories + when combining categoricals. This is likely what you want, + but if you are relying on the exact numbering of the categories, be + aware. + + .. ipython:: python + + c1 = pd.Categorical(["b", "c"]) + c2 = pd.Categorical(["a", "b"]) + + c1 + # "b" is coded to 0 + c1.codes + + c2 + # "b" is coded to 1 + c2.codes + + c = union_categoricals([c1, c2]) + c + # "b" is coded to 0 throughout, same as c1, different from c2 + c.codes + + +Getting data in/out +------------------- + +You can write data that contains ``category`` dtypes to a ``HDFStore``. +See :ref:`here ` for an example and caveats. + +It is also possible to write data to and reading data from *Stata* format files. +See :ref:`here ` for an example and caveats. + +Writing to a CSV file will convert the data, effectively removing any information about the +categorical (categories and ordering). So if you read back the CSV file you have to convert the +relevant columns back to `category` and assign the right categories and categories ordering. + +.. ipython:: python + + import io + s = pd.Series(pd.Categorical(['a', 'b', 'b', 'a', 'a', 'd'])) + # rename the categories + s.cat.categories = ["very good", "good", "bad"] + # reorder the categories and add missing categories + s = s.cat.set_categories(["very bad", "bad", "medium", "good", "very good"]) + df = pd.DataFrame({"cats": s, "vals": [1, 2, 3, 4, 5, 6]}) + csv = io.StringIO() + df.to_csv(csv) + df2 = pd.read_csv(io.StringIO(csv.getvalue())) + df2.dtypes + df2["cats"] + # Redo the category + df2["cats"] = df2["cats"].astype("category") + df2["cats"].cat.set_categories(["very bad", "bad", "medium", + "good", "very good"], + inplace=True) + df2.dtypes + df2["cats"] + +The same holds for writing to a SQL database with ``to_sql``. + +Missing data +------------ + +pandas primarily uses the value `np.nan` to represent missing data. It is by +default not included in computations. See the :ref:`Missing Data section +`. + +Missing values should **not** be included in the Categorical's ``categories``, +only in the ``values``. +Instead, it is understood that NaN is different, and is always a possibility. +When working with the Categorical's ``codes``, missing values will always have +a code of ``-1``. + +.. ipython:: python + + s = pd.Series(["a", "b", np.nan, "a"], dtype="category") + # only two categories + s + s.cat.codes + + +Methods for working with missing data, e.g. :meth:`~Series.isna`, :meth:`~Series.fillna`, +:meth:`~Series.dropna`, all work normally: + +.. ipython:: python + + s = pd.Series(["a", "b", np.nan], dtype="category") + s + pd.isna(s) + s.fillna("a") + +Differences to R's `factor` +--------------------------- + +The following differences to R's factor functions can be observed: + +* R's `levels` are named `categories`. +* R's `levels` are always of type string, while `categories` in pandas can be of any dtype. +* It's not possible to specify labels at creation time. Use ``s.cat.rename_categories(new_labels)`` + afterwards. +* In contrast to R's `factor` function, using categorical data as the sole input to create a + new categorical series will *not* remove unused categories but create a new categorical series + which is equal to the passed in one! +* R allows for missing values to be included in its `levels` (pandas' `categories`). Pandas + does not allow `NaN` categories, but missing values can still be in the `values`. + + +Gotchas +------- + +.. _categorical.rfactor: + +Memory usage +~~~~~~~~~~~~ + +.. _categorical.memory: + +The memory usage of a ``Categorical`` is proportional to the number of categories plus the length of the data. In contrast, +an ``object`` dtype is a constant times the length of the data. + +.. ipython:: python + + s = pd.Series(['foo', 'bar'] * 1000) + + # object dtype + s.nbytes + + # category dtype + s.astype('category').nbytes + +.. note:: + + If the number of categories approaches the length of the data, the ``Categorical`` will use nearly the same or + more memory than an equivalent ``object`` dtype representation. + + .. ipython:: python + + s = pd.Series(['foo%04d' % i for i in range(2000)]) + + # object dtype + s.nbytes + + # category dtype + s.astype('category').nbytes + + +`Categorical` is not a `numpy` array +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently, categorical data and the underlying ``Categorical`` is implemented as a Python +object and not as a low-level NumPy array dtype. This leads to some problems. + +NumPy itself doesn't know about the new `dtype`: + +.. ipython:: python + + try: + np.dtype("category") + except TypeError as e: + print("TypeError:", str(e)) + + dtype = pd.Categorical(["a"]).dtype + try: + np.dtype(dtype) + except TypeError as e: + print("TypeError:", str(e)) + +Dtype comparisons work: + +.. ipython:: python + + dtype == np.str_ + np.str_ == dtype + +To check if a Series contains Categorical data, use ``hasattr(s, 'cat')``: + +.. ipython:: python + + hasattr(pd.Series(['a'], dtype='category'), 'cat') + hasattr(pd.Series(['a']), 'cat') + +Using NumPy functions on a ``Series`` of type ``category`` should not work as `Categoricals` +are not numeric data (even in the case that ``.categories`` is numeric). + +.. ipython:: python + + s = pd.Series(pd.Categorical([1, 2, 3, 4])) + try: + np.sum(s) + # same with np.log(s),... + except TypeError as e: + print("TypeError:", str(e)) + +.. note:: + If such a function works, please file a bug at https://github.com/pandas-dev/pandas! + +dtype in apply +~~~~~~~~~~~~~~ + +Pandas currently does not preserve the dtype in apply functions: If you apply along rows you get +a `Series` of ``object`` `dtype` (same as getting a row -> getting one element will return a +basic type) and applying along columns will also convert to object. ``NaN`` values are unaffected. +You can use ``fillna`` to handle missing values before applying a function. + +.. ipython:: python + + df = pd.DataFrame({"a": [1, 2, 3, 4], + "b": ["a", "b", "c", "d"], + "cats": pd.Categorical([1, 2, 3, 2])}) + df.apply(lambda row: type(row["cats"]), axis=1) + df.apply(lambda col: col.dtype, axis=0) + +Categorical index +~~~~~~~~~~~~~~~~~ + +``CategoricalIndex`` is a type of index that is useful for supporting +indexing with duplicates. This is a container around a ``Categorical`` +and allows efficient indexing and storage of an index with a large number of duplicated elements. +See the :ref:`advanced indexing docs ` for a more detailed +explanation. + +Setting the index will create a ``CategoricalIndex``: + +.. ipython:: python + + cats = pd.Categorical([1, 2, 3, 4], categories=[4, 2, 3, 1]) + strings = ["a", "b", "c", "d"] + values = [4, 2, 3, 1] + df = pd.DataFrame({"strings": strings, "values": values}, index=cats) + df.index + # This now sorts by the categories order + df.sort_index() + +Side effects +~~~~~~~~~~~~ + +Constructing a ``Series`` from a ``Categorical`` will not copy the input +``Categorical``. This means that changes to the ``Series`` will in most cases +change the original ``Categorical``: + +.. ipython:: python + + cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) + s = pd.Series(cat, name="cat") + cat + s.iloc[0:2] = 10 + cat + df = pd.DataFrame(s) + df["cat"].cat.categories = [1, 2, 3, 4, 5] + cat + +Use ``copy=True`` to prevent such a behaviour or simply don't reuse ``Categoricals``: + +.. ipython:: python + + cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) + s = pd.Series(cat, name="cat", copy=True) + cat + s.iloc[0:2] = 10 + cat + +.. note:: + + This also happens in some cases when you supply a NumPy array instead of a ``Categorical``: + using an int array (e.g. ``np.array([1,2,3,4])``) will exhibit the same behavior, while using + a string array (e.g. ``np.array(["a","b","c","a"])``) will not. diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst new file mode 100644 index 00000000..a2150c20 --- /dev/null +++ b/doc/source/user_guide/computation.rst @@ -0,0 +1,1067 @@ +.. _computation: + +{{ header }} + +Computational tools +=================== + + +Statistical functions +--------------------- + +.. _computation.pct_change: + +Percent change +~~~~~~~~~~~~~~ + +``Series`` and ``DataFrame`` have a method +:meth:`~DataFrame.pct_change` to compute the percent change over a given number +of periods (using ``fill_method`` to fill NA/null values *before* computing +the percent change). + +.. ipython:: python + + ser = pd.Series(np.random.randn(8)) + + ser.pct_change() + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 4)) + + df.pct_change(periods=3) + +.. _computation.covariance: + +Covariance +~~~~~~~~~~ + +:meth:`Series.cov` can be used to compute covariance between series +(excluding missing values). + +.. ipython:: python + + s1 = pd.Series(np.random.randn(1000)) + s2 = pd.Series(np.random.randn(1000)) + s1.cov(s2) + +Analogously, :meth:`DataFrame.cov` to compute pairwise covariances among the +series in the DataFrame, also excluding NA/null values. + +.. _computation.covariance.caveats: + +.. note:: + + Assuming the missing data are missing at random this results in an estimate + for the covariance matrix which is unbiased. However, for many applications + this estimate may not be acceptable because the estimated covariance matrix + is not guaranteed to be positive semi-definite. This could lead to + estimated correlations having absolute values which are greater than one, + and/or a non-invertible covariance matrix. See `Estimation of covariance + matrices `_ + for more details. + +.. ipython:: python + + frame = pd.DataFrame(np.random.randn(1000, 5), + columns=['a', 'b', 'c', 'd', 'e']) + frame.cov() + +``DataFrame.cov`` also supports an optional ``min_periods`` keyword that +specifies the required minimum number of observations for each column pair +in order to have a valid result. + +.. ipython:: python + + frame = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c']) + frame.loc[frame.index[:5], 'a'] = np.nan + frame.loc[frame.index[5:10], 'b'] = np.nan + + frame.cov() + + frame.cov(min_periods=12) + + +.. _computation.correlation: + +Correlation +~~~~~~~~~~~ + +Correlation may be computed using the :meth:`~DataFrame.corr` method. +Using the ``method`` parameter, several methods for computing correlations are +provided: + +.. csv-table:: + :header: "Method name", "Description" + :widths: 20, 80 + + ``pearson (default)``, Standard correlation coefficient + ``kendall``, Kendall Tau correlation coefficient + ``spearman``, Spearman rank correlation coefficient + +.. \rho = \cov(x, y) / \sigma_x \sigma_y + +All of these are currently computed using pairwise complete observations. +Wikipedia has articles covering the above correlation coefficients: + +* `Pearson correlation coefficient `_ +* `Kendall rank correlation coefficient `_ +* `Spearman's rank correlation coefficient `_ + +.. note:: + + Please see the :ref:`caveats ` associated + with this method of calculating correlation matrices in the + :ref:`covariance section `. + +.. ipython:: python + + frame = pd.DataFrame(np.random.randn(1000, 5), + columns=['a', 'b', 'c', 'd', 'e']) + frame.iloc[::2] = np.nan + + # Series with Series + frame['a'].corr(frame['b']) + frame['a'].corr(frame['b'], method='spearman') + + # Pairwise correlation of DataFrame columns + frame.corr() + +Note that non-numeric columns will be automatically excluded from the +correlation calculation. + +Like ``cov``, ``corr`` also supports the optional ``min_periods`` keyword: + +.. ipython:: python + + frame = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c']) + frame.loc[frame.index[:5], 'a'] = np.nan + frame.loc[frame.index[5:10], 'b'] = np.nan + + frame.corr() + + frame.corr(min_periods=12) + + +.. versionadded:: 0.24.0 + +The ``method`` argument can also be a callable for a generic correlation +calculation. In this case, it should be a single function +that produces a single value from two ndarray inputs. Suppose we wanted to +compute the correlation based on histogram intersection: + +.. ipython:: python + + # histogram intersection + def histogram_intersection(a, b): + return np.minimum(np.true_divide(a, a.sum()), + np.true_divide(b, b.sum())).sum() + + frame.corr(method=histogram_intersection) + +A related method :meth:`~DataFrame.corrwith` is implemented on DataFrame to +compute the correlation between like-labeled Series contained in different +DataFrame objects. + +.. ipython:: python + + index = ['a', 'b', 'c', 'd', 'e'] + columns = ['one', 'two', 'three', 'four'] + df1 = pd.DataFrame(np.random.randn(5, 4), index=index, columns=columns) + df2 = pd.DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns) + df1.corrwith(df2) + df2.corrwith(df1, axis=1) + +.. _computation.ranking: + +Data ranking +~~~~~~~~~~~~ + +The :meth:`~Series.rank` method produces a data ranking with ties being +assigned the mean of the ranks (by default) for the group: + +.. ipython:: python + + s = pd.Series(np.random.randn(5), index=list('abcde')) + s['d'] = s['b'] # so there's a tie + s.rank() + +:meth:`~DataFrame.rank` is also a DataFrame method and can rank either the rows +(``axis=0``) or the columns (``axis=1``). ``NaN`` values are excluded from the +ranking. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 6)) + df[4] = df[2][:5] # some ties + df + df.rank(1) + +``rank`` optionally takes a parameter ``ascending`` which by default is true; +when false, data is reverse-ranked, with larger values assigned a smaller rank. + +``rank`` supports different tie-breaking methods, specified with the ``method`` +parameter: + + - ``average`` : average rank of tied group + - ``min`` : lowest rank in the group + - ``max`` : highest rank in the group + - ``first`` : ranks assigned in the order they appear in the array + +.. _stats.moments: + +Window Functions +---------------- + +.. currentmodule:: pandas.core.window + +For working with data, a number of window functions are provided for +computing common *window* or *rolling* statistics. Among these are count, sum, +mean, median, correlation, variance, covariance, standard deviation, skewness, +and kurtosis. + +The ``rolling()`` and ``expanding()`` +functions can be used directly from DataFrameGroupBy objects, +see the :ref:`groupby docs `. + + +.. note:: + + The API for window statistics is quite similar to the way one works with ``GroupBy`` objects, see the documentation :ref:`here `. + +We work with ``rolling``, ``expanding`` and ``exponentially weighted`` data through the corresponding +objects, :class:`~pandas.core.window.Rolling`, :class:`~pandas.core.window.Expanding` and :class:`~pandas.core.window.EWM`. + +.. ipython:: python + + s = pd.Series(np.random.randn(1000), + index=pd.date_range('1/1/2000', periods=1000)) + s = s.cumsum() + s + +These are created from methods on ``Series`` and ``DataFrame``. + +.. ipython:: python + + r = s.rolling(window=60) + r + +These object provide tab-completion of the available methods and properties. + +.. code-block:: ipython + + In [14]: r. # noqa: E225, E999 + r.agg r.apply r.count r.exclusions r.max r.median r.name r.skew r.sum + r.aggregate r.corr r.cov r.kurt r.mean r.min r.quantile r.std r.var + +Generally these methods all have the same interface. They all +accept the following arguments: + +- ``window``: size of moving window +- ``min_periods``: threshold of non-null data points to require (otherwise + result is NA) +- ``center``: boolean, whether to set the labels at the center (default is False) + +We can then call methods on these ``rolling`` objects. These return like-indexed objects: + +.. ipython:: python + + r.mean() + +.. ipython:: python + + s.plot(style='k--') + + @savefig rolling_mean_ex.png + r.mean().plot(style='k') + +.. ipython:: python + :suppress: + + plt.close('all') + +They can also be applied to DataFrame objects. This is really just syntactic +sugar for applying the moving window operator to all of the DataFrame's columns: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 4), + index=pd.date_range('1/1/2000', periods=1000), + columns=['A', 'B', 'C', 'D']) + df = df.cumsum() + + @savefig rolling_mean_frame.png + df.rolling(window=60).sum().plot(subplots=True) + +.. _stats.summary: + +Method summary +~~~~~~~~~~~~~~ + +We provide a number of common statistical functions: + +.. currentmodule:: pandas.core.window + +.. csv-table:: + :header: "Method", "Description" + :widths: 20, 80 + + :meth:`~Rolling.count`, Number of non-null observations + :meth:`~Rolling.sum`, Sum of values + :meth:`~Rolling.mean`, Mean of values + :meth:`~Rolling.median`, Arithmetic median of values + :meth:`~Rolling.min`, Minimum + :meth:`~Rolling.max`, Maximum + :meth:`~Rolling.std`, Bessel-corrected sample standard deviation + :meth:`~Rolling.var`, Unbiased variance + :meth:`~Rolling.skew`, Sample skewness (3rd moment) + :meth:`~Rolling.kurt`, Sample kurtosis (4th moment) + :meth:`~Rolling.quantile`, Sample quantile (value at %) + :meth:`~Rolling.apply`, Generic apply + :meth:`~Rolling.cov`, Unbiased covariance (binary) + :meth:`~Rolling.corr`, Correlation (binary) + +.. _stats.rolling_apply: + +Rolling Apply +~~~~~~~~~~~~~ + +The :meth:`~Rolling.apply` function takes an extra ``func`` argument and performs +generic rolling computations. The ``func`` argument should be a single function +that produces a single value from an ndarray input. Suppose we wanted to +compute the mean absolute deviation on a rolling basis: + +.. ipython:: python + + def mad(x): + return np.fabs(x - x.mean()).mean() + + @savefig rolling_apply_ex.png + s.rolling(window=60).apply(mad, raw=True).plot(style='k') + +.. versionadded:: 1.0 + +Additionally, :meth:`~Rolling.apply` can leverage `Numba `__ +if installed as an optional dependency. The apply aggregation can be executed using Numba by specifying +``engine='numba'`` and ``engine_kwargs`` arguments (``raw`` must also be set to ``True``). +Numba will be applied in potentially two routines: + +1. If ``func`` is a standard Python function, the engine will `JIT `__ +the passed function. ``func`` can also be a JITed function in which case the engine will not JIT the function again. +2. The engine will JIT the for loop where the apply function is applied to each window. + +The ``engine_kwargs`` argument is a dictionary of keyword arguments that will be passed into the +`numba.jit decorator `__. +These keyword arguments will be applied to *both* the passed function (if a standard Python function) +and the apply for loop over each window. Currently only ``nogil``, ``nopython``, and ``parallel`` are supported, +and their default values are set to ``False``, ``True`` and ``False`` respectively. + +.. note:: + + In terms of performance, **the first time a function is run using the Numba engine will be slow** + as Numba will have some function compilation overhead. However, ``rolling`` objects will cache + the function and subsequent calls will be fast. In general, the Numba engine is performant with + a larger amount of data points (e.g. 1+ million). + +.. code-block:: ipython + + In [1]: data = pd.Series(range(1_000_000)) + + In [2]: roll = data.rolling(10) + + In [3]: def f(x): + ...: return np.sum(x) + 5 + # Run the first time, compilation time will affect performance + In [4]: %timeit -r 1 -n 1 roll.apply(f, engine='numba', raw=True) # noqa: E225 + 1.23 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each) + # Function is cached and performance will improve + In [5]: %timeit roll.apply(f, engine='numba', raw=True) + 188 ms ± 1.93 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [6]: %timeit roll.apply(f, engine='cython', raw=True) + 3.92 s ± 59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + +.. _stats.rolling_window: + +Rolling windows +~~~~~~~~~~~~~~~ + +Passing ``win_type`` to ``.rolling`` generates a generic rolling window computation, that is weighted according the ``win_type``. +The following methods are available: + +.. csv-table:: + :header: "Method", "Description" + :widths: 20, 80 + + :meth:`~Window.sum`, Sum of values + :meth:`~Window.mean`, Mean of values + +The weights used in the window are specified by the ``win_type`` keyword. +The list of recognized types are the `scipy.signal window functions +`__: + +* ``boxcar`` +* ``triang`` +* ``blackman`` +* ``hamming`` +* ``bartlett`` +* ``parzen`` +* ``bohman`` +* ``blackmanharris`` +* ``nuttall`` +* ``barthann`` +* ``kaiser`` (needs beta) +* ``gaussian`` (needs std) +* ``general_gaussian`` (needs power, width) +* ``slepian`` (needs width) +* ``exponential`` (needs tau). + +.. ipython:: python + + ser = pd.Series(np.random.randn(10), + index=pd.date_range('1/1/2000', periods=10)) + + ser.rolling(window=5, win_type='triang').mean() + +Note that the ``boxcar`` window is equivalent to :meth:`~Rolling.mean`. + +.. ipython:: python + + ser.rolling(window=5, win_type='boxcar').mean() + ser.rolling(window=5).mean() + +For some windowing functions, additional parameters must be specified: + +.. ipython:: python + + ser.rolling(window=5, win_type='gaussian').mean(std=0.1) + +.. _stats.moments.normalization: + +.. note:: + + For ``.sum()`` with a ``win_type``, there is no normalization done to the + weights for the window. Passing custom weights of ``[1, 1, 1]`` will yield a different + result than passing weights of ``[2, 2, 2]``, for example. When passing a + ``win_type`` instead of explicitly specifying the weights, the weights are + already normalized so that the largest weight is 1. + + In contrast, the nature of the ``.mean()`` calculation is + such that the weights are normalized with respect to each other. Weights + of ``[1, 1, 1]`` and ``[2, 2, 2]`` yield the same result. + +.. _stats.moments.ts: + +Time-aware rolling +~~~~~~~~~~~~~~~~~~ + +It is possible to pass an offset (or convertible) to a ``.rolling()`` method and have it produce +variable sized windows based on the passed time window. For each time point, this includes all preceding values occurring +within the indicated time delta. + +This can be particularly useful for a non-regular time frequency index. + +.. ipython:: python + + dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, + index=pd.date_range('20130101 09:00:00', + periods=5, + freq='s')) + dft + +This is a regular frequency index. Using an integer window parameter works to roll along the window frequency. + +.. ipython:: python + + dft.rolling(2).sum() + dft.rolling(2, min_periods=1).sum() + +Specifying an offset allows a more intuitive specification of the rolling frequency. + +.. ipython:: python + + dft.rolling('2s').sum() + +Using a non-regular, but still monotonic index, rolling with an integer window does not impart any special calculation. + + +.. ipython:: python + + dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, + index=pd.Index([pd.Timestamp('20130101 09:00:00'), + pd.Timestamp('20130101 09:00:02'), + pd.Timestamp('20130101 09:00:03'), + pd.Timestamp('20130101 09:00:05'), + pd.Timestamp('20130101 09:00:06')], + name='foo')) + dft + dft.rolling(2).sum() + + +Using the time-specification generates variable windows for this sparse data. + +.. ipython:: python + + dft.rolling('2s').sum() + +Furthermore, we now allow an optional ``on`` parameter to specify a column (rather than the +default of the index) in a DataFrame. + +.. ipython:: python + + dft = dft.reset_index() + dft + dft.rolling('2s', on='foo').sum() + +.. _stats.custom_rolling_window: + +Custom window rolling +~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.0 + +In addition to accepting an integer or offset as a ``window`` argument, ``rolling`` also accepts +a ``BaseIndexer`` subclass that allows a user to define a custom method for calculating window bounds. +The ``BaseIndexer`` subclass will need to define a ``get_window_bounds`` method that returns +a tuple of two arrays, the first being the starting indices of the windows and second being the +ending indices of the windows. Additionally, ``num_values``, ``min_periods``, ``center``, ``closed`` +and will automatically be passed to ``get_window_bounds`` and the defined method must +always accept these arguments. + +For example, if we have the following ``DataFrame``: + +.. ipython:: python + + use_expanding = [True, False, True, False, True] + use_expanding + df = pd.DataFrame({'values': range(5)}) + df + +and we want to use an expanding window where ``use_expanding`` is ``True`` otherwise a window of size +1, we can create the following ``BaseIndexer``: + +.. code-block:: ipython + + In [2]: from pandas.api.indexers import BaseIndexer + ...: + ...: class CustomIndexer(BaseIndexer): + ...: + ...: def get_window_bounds(self, num_values, min_periods, center, closed): + ...: start = np.empty(num_values, dtype=np.int64) + ...: end = np.empty(num_values, dtype=np.int64) + ...: for i in range(num_values): + ...: if self.use_expanding[i]: + ...: start[i] = 0 + ...: end[i] = i + 1 + ...: else: + ...: start[i] = i + ...: end[i] = i + self.window_size + ...: return start, end + ...: + + In [3]: indexer = CustomIndexer(window_size=1, use_expanding=use_expanding) + + In [4]: df.rolling(indexer).sum() + Out[4]: + values + 0 0.0 + 1 1.0 + 2 3.0 + 3 3.0 + 4 10.0 + + +.. _stats.rolling_window.endpoints: + +Rolling window endpoints +~~~~~~~~~~~~~~~~~~~~~~~~ + +The inclusion of the interval endpoints in rolling window calculations can be specified with the ``closed`` +parameter: + +.. csv-table:: + :header: "``closed``", "Description", "Default for" + :widths: 20, 30, 30 + + ``right``, close right endpoint, time-based windows + ``left``, close left endpoint, + ``both``, close both endpoints, fixed windows + ``neither``, open endpoints, + +For example, having the right endpoint open is useful in many problems that require that there is no contamination +from present information back to past information. This allows the rolling window to compute statistics +"up to that point in time", but not including that point in time. + +.. ipython:: python + + df = pd.DataFrame({'x': 1}, + index=[pd.Timestamp('20130101 09:00:01'), + pd.Timestamp('20130101 09:00:02'), + pd.Timestamp('20130101 09:00:03'), + pd.Timestamp('20130101 09:00:04'), + pd.Timestamp('20130101 09:00:06')]) + + df["right"] = df.rolling('2s', closed='right').x.sum() # default + df["both"] = df.rolling('2s', closed='both').x.sum() + df["left"] = df.rolling('2s', closed='left').x.sum() + df["neither"] = df.rolling('2s', closed='neither').x.sum() + + df + +Currently, this feature is only implemented for time-based windows. +For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed. + +.. _stats.moments.ts-versus-resampling: + +Time-aware rolling vs. resampling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using ``.rolling()`` with a time-based index is quite similar to :ref:`resampling `. They +both operate and perform reductive operations on time-indexed pandas objects. + +When using ``.rolling()`` with an offset. The offset is a time-delta. Take a backwards-in-time looking window, and +aggregate all of the values in that window (including the end-point, but not the start-point). This is the new value +at that point in the result. These are variable sized windows in time-space for each point of the input. You will get +a same sized result as the input. + +When using ``.resample()`` with an offset. Construct a new index that is the frequency of the offset. For each frequency +bin, aggregate points from the input within a backwards-in-time looking window that fall in that bin. The result of this +aggregation is the output for that frequency point. The windows are fixed size in the frequency space. Your result +will have the shape of a regular frequency between the min and the max of the original input object. + +To summarize, ``.rolling()`` is a time-based window operation, while ``.resample()`` is a frequency-based window operation. + +Centering windows +~~~~~~~~~~~~~~~~~ + +By default the labels are set to the right edge of the window, but a +``center`` keyword is available so the labels can be set at the center. + +.. ipython:: python + + ser.rolling(window=5).mean() + ser.rolling(window=5, center=True).mean() + +.. _stats.moments.binary: + +Binary window functions +~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~Rolling.cov` and :meth:`~Rolling.corr` can compute moving window statistics about +two ``Series`` or any combination of ``DataFrame/Series`` or +``DataFrame/DataFrame``. Here is the behavior in each case: + +* two ``Series``: compute the statistic for the pairing. +* ``DataFrame/Series``: compute the statistics for each column of the DataFrame + with the passed Series, thus returning a DataFrame. +* ``DataFrame/DataFrame``: by default compute the statistic for matching column + names, returning a DataFrame. If the keyword argument ``pairwise=True`` is + passed then computes the statistic for each pair of columns, returning a + ``MultiIndexed DataFrame`` whose ``index`` are the dates in question (see :ref:`the next section + `). + +For example: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 4), + index=pd.date_range('1/1/2000', periods=1000), + columns=['A', 'B', 'C', 'D']) + df = df.cumsum() + + df2 = df[:20] + df2.rolling(window=5).corr(df2['B']) + +.. _stats.moments.corr_pairwise: + +Computing rolling pairwise covariances and correlations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In financial data analysis and other fields it's common to compute covariance +and correlation matrices for a collection of time series. Often one is also +interested in moving-window covariance and correlation matrices. This can be +done by passing the ``pairwise`` keyword argument, which in the case of +``DataFrame`` inputs will yield a MultiIndexed ``DataFrame`` whose ``index`` are the dates in +question. In the case of a single DataFrame argument the ``pairwise`` argument +can even be omitted: + +.. note:: + + Missing values are ignored and each entry is computed using the pairwise + complete observations. Please see the :ref:`covariance section + ` for :ref:`caveats + ` associated with this method of + calculating covariance and correlation matrices. + +.. ipython:: python + + covs = (df[['B', 'C', 'D']].rolling(window=50) + .cov(df[['A', 'B', 'C']], pairwise=True)) + covs.loc['2002-09-22':] + +.. ipython:: python + + correls = df.rolling(window=50).corr() + correls.loc['2002-09-22':] + +You can efficiently retrieve the time series of correlations between two +columns by reshaping and indexing: + +.. ipython:: python + :suppress: + + plt.close('all') + +.. ipython:: python + + @savefig rolling_corr_pairwise_ex.png + correls.unstack(1)[('A', 'C')].plot() + +.. _stats.aggregate: + +Aggregation +----------- + +Once the ``Rolling``, ``Expanding`` or ``EWM`` objects have been created, several methods are available to +perform multiple computations on the data. These operations are similar to the :ref:`aggregating API `, +:ref:`groupby API `, and :ref:`resample API `. + + +.. ipython:: python + + dfa = pd.DataFrame(np.random.randn(1000, 3), + index=pd.date_range('1/1/2000', periods=1000), + columns=['A', 'B', 'C']) + r = dfa.rolling(window=60, min_periods=1) + r + +We can aggregate by passing a function to the entire DataFrame, or select a +Series (or multiple Series) via standard ``__getitem__``. + +.. ipython:: python + + r.aggregate(np.sum) + + r['A'].aggregate(np.sum) + + r[['A', 'B']].aggregate(np.sum) + +As you can see, the result of the aggregation will have the selected columns, or all +columns if none are selected. + +.. _stats.aggregate.multifunc: + +Applying multiple functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +With windowed ``Series`` you can also pass a list of functions to do +aggregation with, outputting a DataFrame: + +.. ipython:: python + + r['A'].agg([np.sum, np.mean, np.std]) + +On a windowed DataFrame, you can pass a list of functions to apply to each +column, which produces an aggregated result with a hierarchical index: + +.. ipython:: python + + r.agg([np.sum, np.mean]) + +Passing a dict of functions has different behavior by default, see the next +section. + +Applying different functions to DataFrame columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By passing a dict to ``aggregate`` you can apply a different aggregation to the +columns of a ``DataFrame``: + +.. ipython:: python + + r.agg({'A': np.sum, 'B': lambda x: np.std(x, ddof=1)}) + +The function names can also be strings. In order for a string to be valid it +must be implemented on the windowed object + +.. ipython:: python + + r.agg({'A': 'sum', 'B': 'std'}) + +Furthermore you can pass a nested dict to indicate different aggregations on different columns. + +.. ipython:: python + + r.agg({'A': ['sum', 'std'], 'B': ['mean', 'std']}) + + +.. _stats.moments.expanding: + +Expanding windows +----------------- + +A common alternative to rolling statistics is to use an *expanding* window, +which yields the value of the statistic with all the data available up to that +point in time. + +These follow a similar interface to ``.rolling``, with the ``.expanding`` method +returning an :class:`~pandas.core.window.Expanding` object. + +As these calculations are a special case of rolling statistics, +they are implemented in pandas such that the following two calls are equivalent: + +.. ipython:: python + + df.rolling(window=len(df), min_periods=1).mean()[:5] + + df.expanding(min_periods=1).mean()[:5] + +These have a similar set of methods to ``.rolling`` methods. + +Method summary +~~~~~~~~~~~~~~ + +.. currentmodule:: pandas.core.window + +.. csv-table:: + :header: "Function", "Description" + :widths: 20, 80 + + :meth:`~Expanding.count`, Number of non-null observations + :meth:`~Expanding.sum`, Sum of values + :meth:`~Expanding.mean`, Mean of values + :meth:`~Expanding.median`, Arithmetic median of values + :meth:`~Expanding.min`, Minimum + :meth:`~Expanding.max`, Maximum + :meth:`~Expanding.std`, Unbiased standard deviation + :meth:`~Expanding.var`, Unbiased variance + :meth:`~Expanding.skew`, Unbiased skewness (3rd moment) + :meth:`~Expanding.kurt`, Unbiased kurtosis (4th moment) + :meth:`~Expanding.quantile`, Sample quantile (value at %) + :meth:`~Expanding.apply`, Generic apply + :meth:`~Expanding.cov`, Unbiased covariance (binary) + :meth:`~Expanding.corr`, Correlation (binary) + +.. currentmodule:: pandas + +Aside from not having a ``window`` parameter, these functions have the same +interfaces as their ``.rolling`` counterparts. Like above, the parameters they +all accept are: + +* ``min_periods``: threshold of non-null data points to require. Defaults to + minimum needed to compute statistic. No ``NaNs`` will be output once + ``min_periods`` non-null data points have been seen. +* ``center``: boolean, whether to set the labels at the center (default is False). + +.. _stats.moments.expanding.note: +.. note:: + + The output of the ``.rolling`` and ``.expanding`` methods do not return a + ``NaN`` if there are at least ``min_periods`` non-null values in the current + window. For example: + + .. ipython:: python + + sn = pd.Series([1, 2, np.nan, 3, np.nan, 4]) + sn + sn.rolling(2).max() + sn.rolling(2, min_periods=1).max() + + In case of expanding functions, this differs from :meth:`~DataFrame.cumsum`, + :meth:`~DataFrame.cumprod`, :meth:`~DataFrame.cummax`, + and :meth:`~DataFrame.cummin`, which return ``NaN`` in the output wherever + a ``NaN`` is encountered in the input. In order to match the output of ``cumsum`` + with ``expanding``, use :meth:`~DataFrame.fillna`: + + .. ipython:: python + + sn.expanding().sum() + sn.cumsum() + sn.cumsum().fillna(method='ffill') + + +An expanding window statistic will be more stable (and less responsive) than +its rolling window counterpart as the increasing window size decreases the +relative impact of an individual data point. As an example, here is the +:meth:`~core.window.Expanding.mean` output for the previous time series dataset: + +.. ipython:: python + :suppress: + + plt.close('all') + +.. ipython:: python + + s.plot(style='k--') + + @savefig expanding_mean_frame.png + s.expanding().mean().plot(style='k') + + +.. _stats.moments.exponentially_weighted: + +Exponentially weighted windows +------------------------------ + +.. currentmodule:: pandas.core.window + +A related set of functions are exponentially weighted versions of several of +the above statistics. A similar interface to ``.rolling`` and ``.expanding`` is accessed +through the ``.ewm`` method to receive an :class:`~EWM` object. +A number of expanding EW (exponentially weighted) +methods are provided: + + +.. csv-table:: + :header: "Function", "Description" + :widths: 20, 80 + + :meth:`~EWM.mean`, EW moving average + :meth:`~EWM.var`, EW moving variance + :meth:`~EWM.std`, EW moving standard deviation + :meth:`~EWM.corr`, EW moving correlation + :meth:`~EWM.cov`, EW moving covariance + +In general, a weighted moving average is calculated as + +.. math:: + + y_t = \frac{\sum_{i=0}^t w_i x_{t-i}}{\sum_{i=0}^t w_i}, + +where :math:`x_t` is the input, :math:`y_t` is the result and the :math:`w_i` +are the weights. + +The EW functions support two variants of exponential weights. +The default, ``adjust=True``, uses the weights :math:`w_i = (1 - \alpha)^i` +which gives + +.. math:: + + y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ... + + (1 - \alpha)^t x_{0}}{1 + (1 - \alpha) + (1 - \alpha)^2 + ... + + (1 - \alpha)^t} + +When ``adjust=False`` is specified, moving averages are calculated as + +.. math:: + + y_0 &= x_0 \\ + y_t &= (1 - \alpha) y_{t-1} + \alpha x_t, + +which is equivalent to using weights + +.. math:: + + w_i = \begin{cases} + \alpha (1 - \alpha)^i & \text{if } i < t \\ + (1 - \alpha)^i & \text{if } i = t. + \end{cases} + +.. note:: + + These equations are sometimes written in terms of :math:`\alpha' = 1 - \alpha`, e.g. + + .. math:: + + y_t = \alpha' y_{t-1} + (1 - \alpha') x_t. + +The difference between the above two variants arises because we are +dealing with series which have finite history. Consider a series of infinite +history, with ``adjust=True``: + +.. math:: + + y_t = \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ...} + {1 + (1 - \alpha) + (1 - \alpha)^2 + ...} + +Noting that the denominator is a geometric series with initial term equal to 1 +and a ratio of :math:`1 - \alpha` we have + +.. math:: + + y_t &= \frac{x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ...} + {\frac{1}{1 - (1 - \alpha)}}\\ + &= [x_t + (1 - \alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ...] \alpha \\ + &= \alpha x_t + [(1-\alpha)x_{t-1} + (1 - \alpha)^2 x_{t-2} + ...]\alpha \\ + &= \alpha x_t + (1 - \alpha)[x_{t-1} + (1 - \alpha) x_{t-2} + ...]\alpha\\ + &= \alpha x_t + (1 - \alpha) y_{t-1} + +which is the same expression as ``adjust=False`` above and therefore +shows the equivalence of the two variants for infinite series. +When ``adjust=False``, we have :math:`y_0 = x_0` and +:math:`y_t = \alpha x_t + (1 - \alpha) y_{t-1}`. +Therefore, there is an assumption that :math:`x_0` is not an ordinary value +but rather an exponentially weighted moment of the infinite series up to that +point. + +One must have :math:`0 < \alpha \leq 1`, and while it is possible to pass +:math:`\alpha` directly, it's often easier to think about either the +**span**, **center of mass (com)** or **half-life** of an EW moment: + +.. math:: + + \alpha = + \begin{cases} + \frac{2}{s + 1}, & \text{for span}\ s \geq 1\\ + \frac{1}{1 + c}, & \text{for center of mass}\ c \geq 0\\ + 1 - \exp^{\frac{\log 0.5}{h}}, & \text{for half-life}\ h > 0 + \end{cases} + +One must specify precisely one of **span**, **center of mass**, **half-life** +and **alpha** to the EW functions: + +* **Span** corresponds to what is commonly called an "N-day EW moving average". +* **Center of mass** has a more physical interpretation and can be thought of + in terms of span: :math:`c = (s - 1) / 2`. +* **Half-life** is the period of time for the exponential weight to reduce to + one half. +* **Alpha** specifies the smoothing factor directly. + +Here is an example for a univariate time series: + +.. ipython:: python + + s.plot(style='k--') + + @savefig ewma_ex.png + s.ewm(span=20).mean().plot(style='k') + +EWM has a ``min_periods`` argument, which has the same +meaning it does for all the ``.expanding`` and ``.rolling`` methods: +no output values will be set until at least ``min_periods`` non-null values +are encountered in the (expanding) window. + +EWM also has an ``ignore_na`` argument, which determines how +intermediate null values affect the calculation of the weights. +When ``ignore_na=False`` (the default), weights are calculated based on absolute +positions, so that intermediate null values affect the result. +When ``ignore_na=True``, +weights are calculated by ignoring intermediate null values. +For example, assuming ``adjust=True``, if ``ignore_na=False``, the weighted +average of ``3, NaN, 5`` would be calculated as + +.. math:: + + \frac{(1-\alpha)^2 \cdot 3 + 1 \cdot 5}{(1-\alpha)^2 + 1}. + +Whereas if ``ignore_na=True``, the weighted average would be calculated as + +.. math:: + + \frac{(1-\alpha) \cdot 3 + 1 \cdot 5}{(1-\alpha) + 1}. + +The :meth:`~Ewm.var`, :meth:`~Ewm.std`, and :meth:`~Ewm.cov` functions have a ``bias`` argument, +specifying whether the result should contain biased or unbiased statistics. +For example, if ``bias=True``, ``ewmvar(x)`` is calculated as +``ewmvar(x) = ewma(x**2) - ewma(x)**2``; +whereas if ``bias=False`` (the default), the biased variance statistics +are scaled by debiasing factors + +.. math:: + + \frac{\left(\sum_{i=0}^t w_i\right)^2}{\left(\sum_{i=0}^t w_i\right)^2 - \sum_{i=0}^t w_i^2}. + +(For :math:`w_i = 1`, this reduces to the usual :math:`N / (N - 1)` factor, +with :math:`N = t + 1`.) +See `Weighted Sample Variance `__ +on Wikipedia for further details. diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst new file mode 100644 index 00000000..2df5b9d8 --- /dev/null +++ b/doc/source/user_guide/enhancingperf.rst @@ -0,0 +1,828 @@ +.. _enhancingperf: + +{{ header }} + +********************* +Enhancing performance +********************* + +In this part of the tutorial, we will investigate how to speed up certain +functions operating on pandas ``DataFrames`` using three different techniques: +Cython, Numba and :func:`pandas.eval`. We will see a speed improvement of ~200 +when we use Cython and Numba on a test function operating row-wise on the +``DataFrame``. Using :func:`pandas.eval` we will speed up a sum by an order of +~2. + +.. _enhancingperf.cython: + +Cython (writing C extensions for pandas) +---------------------------------------- + +For many use cases writing pandas in pure Python and NumPy is sufficient. In some +computationally heavy applications however, it can be possible to achieve sizable +speed-ups by offloading work to `cython `__. + +This tutorial assumes you have refactored as much as possible in Python, for example +by trying to remove for-loops and making use of NumPy vectorization. It's always worth +optimising in Python first. + +This tutorial walks through a "typical" process of cythonizing a slow computation. +We use an `example from the Cython documentation `__ +but in the context of pandas. Our final cythonized solution is around 100 times +faster than the pure Python solution. + +.. _enhancingperf.pure: + +Pure Python +~~~~~~~~~~~ + +We have a ``DataFrame`` to which we want to apply a function row-wise. + +.. ipython:: python + + df = pd.DataFrame({'a': np.random.randn(1000), + 'b': np.random.randn(1000), + 'N': np.random.randint(100, 1000, (1000)), + 'x': 'x'}) + df + +Here's the function in pure Python: + +.. ipython:: python + + def f(x): + return x * (x - 1) + + def integrate_f(a, b, N): + s = 0 + dx = (b - a) / N + for i in range(N): + s += f(a + i * dx) + return s * dx + +We achieve our result by using ``apply`` (row-wise): + +.. code-block:: ipython + + In [7]: %timeit df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis=1) + 10 loops, best of 3: 174 ms per loop + +But clearly this isn't fast enough for us. Let's take a look and see where the +time is spent during this operation (limited to the most time consuming +four calls) using the `prun ipython magic function `__: + +.. ipython:: python + + %prun -l 4 df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis=1) # noqa E999 + +By far the majority of time is spend inside either ``integrate_f`` or ``f``, +hence we'll concentrate our efforts cythonizing these two functions. + +.. note:: + + In Python 2 replacing the ``range`` with its generator counterpart (``xrange``) + would mean the ``range`` line would vanish. In Python 3 ``range`` is already a generator. + +.. _enhancingperf.plain: + +Plain Cython +~~~~~~~~~~~~ + +First we're going to need to import the Cython magic function to ipython: + +.. ipython:: python + :okwarning: + + %load_ext Cython + + +Now, let's simply copy our functions over to Cython as is (the suffix +is here to distinguish between function versions): + +.. ipython:: + + In [2]: %%cython + ...: def f_plain(x): + ...: return x * (x - 1) + ...: def integrate_f_plain(a, b, N): + ...: s = 0 + ...: dx = (b - a) / N + ...: for i in range(N): + ...: s += f_plain(a + i * dx) + ...: return s * dx + ...: + +.. note:: + + If you're having trouble pasting the above into your ipython, you may need + to be using bleeding edge ipython for paste to play well with cell magics. + + +.. code-block:: ipython + + In [4]: %timeit df.apply(lambda x: integrate_f_plain(x['a'], x['b'], x['N']), axis=1) + 10 loops, best of 3: 85.5 ms per loop + +Already this has shaved a third off, not too bad for a simple copy and paste. + +.. _enhancingperf.type: + +Adding type +~~~~~~~~~~~ + +We get another huge improvement simply by providing type information: + +.. ipython:: + + In [3]: %%cython + ...: cdef double f_typed(double x) except? -2: + ...: return x * (x - 1) + ...: cpdef double integrate_f_typed(double a, double b, int N): + ...: cdef int i + ...: cdef double s, dx + ...: s = 0 + ...: dx = (b - a) / N + ...: for i in range(N): + ...: s += f_typed(a + i * dx) + ...: return s * dx + ...: + +.. code-block:: ipython + + In [4]: %timeit df.apply(lambda x: integrate_f_typed(x['a'], x['b'], x['N']), axis=1) + 10 loops, best of 3: 20.3 ms per loop + +Now, we're talking! It's now over ten times faster than the original python +implementation, and we haven't *really* modified the code. Let's have another +look at what's eating up time: + +.. ipython:: python + + %prun -l 4 df.apply(lambda x: integrate_f_typed(x['a'], x['b'], x['N']), axis=1) + +.. _enhancingperf.ndarray: + +Using ndarray +~~~~~~~~~~~~~ + +It's calling series... a lot! It's creating a Series from each row, and get-ting from both +the index and the series (three times for each row). Function calls are expensive +in Python, so maybe we could minimize these by cythonizing the apply part. + +.. note:: + + We are now passing ndarrays into the Cython function, fortunately Cython plays + very nicely with NumPy. + +.. ipython:: + + In [4]: %%cython + ...: cimport numpy as np + ...: import numpy as np + ...: cdef double f_typed(double x) except? -2: + ...: return x * (x - 1) + ...: cpdef double integrate_f_typed(double a, double b, int N): + ...: cdef int i + ...: cdef double s, dx + ...: s = 0 + ...: dx = (b - a) / N + ...: for i in range(N): + ...: s += f_typed(a + i * dx) + ...: return s * dx + ...: cpdef np.ndarray[double] apply_integrate_f(np.ndarray col_a, np.ndarray col_b, + ...: np.ndarray col_N): + ...: assert (col_a.dtype == np.float + ...: and col_b.dtype == np.float and col_N.dtype == np.int) + ...: cdef Py_ssize_t i, n = len(col_N) + ...: assert (len(col_a) == len(col_b) == n) + ...: cdef np.ndarray[double] res = np.empty(n) + ...: for i in range(len(col_a)): + ...: res[i] = integrate_f_typed(col_a[i], col_b[i], col_N[i]) + ...: return res + ...: + + +The implementation is simple, it creates an array of zeros and loops over +the rows, applying our ``integrate_f_typed``, and putting this in the zeros array. + + +.. warning:: + + You can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter + to a Cython function. Instead pass the actual ``ndarray`` using the + :meth:`Series.to_numpy`. The reason is that the Cython + definition is specific to an ndarray and not the passed ``Series``. + + So, do not do this: + + .. code-block:: python + + apply_integrate_f(df['a'], df['b'], df['N']) + + But rather, use :meth:`Series.to_numpy` to get the underlying ``ndarray``: + + .. code-block:: python + + apply_integrate_f(df['a'].to_numpy(), + df['b'].to_numpy(), + df['N'].to_numpy()) + +.. note:: + + Loops like this would be *extremely* slow in Python, but in Cython looping + over NumPy arrays is *fast*. + +.. code-block:: ipython + + In [4]: %timeit apply_integrate_f(df['a'].to_numpy(), + df['b'].to_numpy(), + df['N'].to_numpy()) + 1000 loops, best of 3: 1.25 ms per loop + +We've gotten another big improvement. Let's check again where the time is spent: + +.. ipython:: python + + %%prun -l 4 apply_integrate_f(df['a'].to_numpy(), + df['b'].to_numpy(), + df['N'].to_numpy()) + +As one might expect, the majority of the time is now spent in ``apply_integrate_f``, +so if we wanted to make anymore efficiencies we must continue to concentrate our +efforts here. + +.. _enhancingperf.boundswrap: + +More advanced techniques +~~~~~~~~~~~~~~~~~~~~~~~~ + +There is still hope for improvement. Here's an example of using some more +advanced Cython techniques: + +.. ipython:: + + In [5]: %%cython + ...: cimport cython + ...: cimport numpy as np + ...: import numpy as np + ...: cdef double f_typed(double x) except? -2: + ...: return x * (x - 1) + ...: cpdef double integrate_f_typed(double a, double b, int N): + ...: cdef int i + ...: cdef double s, dx + ...: s = 0 + ...: dx = (b - a) / N + ...: for i in range(N): + ...: s += f_typed(a + i * dx) + ...: return s * dx + ...: @cython.boundscheck(False) + ...: @cython.wraparound(False) + ...: cpdef np.ndarray[double] apply_integrate_f_wrap(np.ndarray[double] col_a, + ...: np.ndarray[double] col_b, + ...: np.ndarray[int] col_N): + ...: cdef int i, n = len(col_N) + ...: assert len(col_a) == len(col_b) == n + ...: cdef np.ndarray[double] res = np.empty(n) + ...: for i in range(n): + ...: res[i] = integrate_f_typed(col_a[i], col_b[i], col_N[i]) + ...: return res + ...: + +.. code-block:: ipython + + In [4]: %timeit apply_integrate_f_wrap(df['a'].to_numpy(), + df['b'].to_numpy(), + df['N'].to_numpy()) + 1000 loops, best of 3: 987 us per loop + +Even faster, with the caveat that a bug in our Cython code (an off-by-one error, +for example) might cause a segfault because memory access isn't checked. +For more about ``boundscheck`` and ``wraparound``, see the Cython docs on +`compiler directives `__. + +.. _enhancingperf.numba: + +Using Numba +----------- + +A recent alternative to statically compiling Cython code, is to use a *dynamic jit-compiler*, Numba. + +Numba gives you the power to speed up your applications with high performance functions written directly in Python. With a few annotations, array-oriented and math-heavy Python code can be just-in-time compiled to native machine instructions, similar in performance to C, C++ and Fortran, without having to switch languages or Python interpreters. + +Numba works by generating optimized machine code using the LLVM compiler infrastructure at import time, runtime, or statically (using the included pycc tool). Numba supports compilation of Python to run on either CPU or GPU hardware, and is designed to integrate with the Python scientific software stack. + +.. note:: + + You will need to install Numba. This is easy with ``conda``, by using: ``conda install numba``, see :ref:`installing using miniconda`. + +.. note:: + + As of Numba version 0.20, pandas objects cannot be passed directly to Numba-compiled functions. Instead, one must pass the NumPy array underlying the pandas object to the Numba-compiled function as demonstrated below. + +Jit +~~~ + +We demonstrate how to use Numba to just-in-time compile our code. We simply +take the plain Python code from above and annotate with the ``@jit`` decorator. + +.. code-block:: python + + import numba + + + @numba.jit + def f_plain(x): + return x * (x - 1) + + + @numba.jit + def integrate_f_numba(a, b, N): + s = 0 + dx = (b - a) / N + for i in range(N): + s += f_plain(a + i * dx) + return s * dx + + + @numba.jit + def apply_integrate_f_numba(col_a, col_b, col_N): + n = len(col_N) + result = np.empty(n, dtype='float64') + assert len(col_a) == len(col_b) == n + for i in range(n): + result[i] = integrate_f_numba(col_a[i], col_b[i], col_N[i]) + return result + + + def compute_numba(df): + result = apply_integrate_f_numba(df['a'].to_numpy(), + df['b'].to_numpy(), + df['N'].to_numpy()) + return pd.Series(result, index=df.index, name='result') + +Note that we directly pass NumPy arrays to the Numba function. ``compute_numba`` is just a wrapper that provides a +nicer interface by passing/returning pandas objects. + +.. code-block:: ipython + + In [4]: %timeit compute_numba(df) + 1000 loops, best of 3: 798 us per loop + +In this example, using Numba was faster than Cython. + +Vectorize +~~~~~~~~~ + +Numba can also be used to write vectorized functions that do not require the user to explicitly +loop over the observations of a vector; a vectorized function will be applied to each row automatically. +Consider the following toy example of doubling each observation: + +.. code-block:: python + + import numba + + + def double_every_value_nonumba(x): + return x * 2 + + + @numba.vectorize + def double_every_value_withnumba(x): # noqa E501 + return x * 2 + +.. code-block:: ipython + + # Custom function without numba + In [5]: %timeit df['col1_doubled'] = df['a'].apply(double_every_value_nonumba) # noqa E501 + 1000 loops, best of 3: 797 us per loop + + # Standard implementation (faster than a custom function) + In [6]: %timeit df['col1_doubled'] = df['a'] * 2 + 1000 loops, best of 3: 233 us per loop + + # Custom function with numba + In [7]: %timeit (df['col1_doubled'] = double_every_value_withnumba(df['a'].to_numpy()) + 1000 loops, best of 3: 145 us per loop + +Caveats +~~~~~~~ + +.. note:: + + Numba will execute on any function, but can only accelerate certain classes of functions. + +Numba is best at accelerating functions that apply numerical functions to NumPy +arrays. When passed a function that only uses operations it knows how to +accelerate, it will execute in ``nopython`` mode. + +If Numba is passed a function that includes something it doesn't know how to +work with -- a category that currently includes sets, lists, dictionaries, or +string functions -- it will revert to ``object mode``. In ``object mode``, +Numba will execute but your code will not speed up significantly. If you would +prefer that Numba throw an error if it cannot compile a function in a way that +speeds up your code, pass Numba the argument +``nopython=True`` (e.g. ``@numba.jit(nopython=True)``). For more on +troubleshooting Numba modes, see the `Numba troubleshooting page +`__. + +Read more in the `Numba docs `__. + +.. _enhancingperf.eval: + +Expression evaluation via :func:`~pandas.eval` +----------------------------------------------- + +The top-level function :func:`pandas.eval` implements expression evaluation of +:class:`~pandas.Series` and :class:`~pandas.DataFrame` objects. + +.. note:: + + To benefit from using :func:`~pandas.eval` you need to + install ``numexpr``. See the :ref:`recommended dependencies section + ` for more details. + +The point of using :func:`~pandas.eval` for expression evaluation rather than +plain Python is two-fold: 1) large :class:`~pandas.DataFrame` objects are +evaluated more efficiently and 2) large arithmetic and boolean expressions are +evaluated all at once by the underlying engine (by default ``numexpr`` is used +for evaluation). + +.. note:: + + You should not use :func:`~pandas.eval` for simple + expressions or for expressions involving small DataFrames. In fact, + :func:`~pandas.eval` is many orders of magnitude slower for + smaller expressions/objects than plain ol' Python. A good rule of thumb is + to only use :func:`~pandas.eval` when you have a + :class:`~pandas.core.frame.DataFrame` with more than 10,000 rows. + + +:func:`~pandas.eval` supports all arithmetic expressions supported by the +engine in addition to some extensions available only in pandas. + +.. note:: + + The larger the frame and the larger the expression the more speedup you will + see from using :func:`~pandas.eval`. + +Supported syntax +~~~~~~~~~~~~~~~~ + +These operations are supported by :func:`pandas.eval`: + +* Arithmetic operations except for the left shift (``<<``) and right shift + (``>>``) operators, e.g., ``df + 2 * pi / s ** 4 % 42 - the_golden_ratio`` +* Comparison operations, including chained comparisons, e.g., ``2 < df < df2`` +* Boolean operations, e.g., ``df < df2 and df3 < df4 or not df_bool`` +* ``list`` and ``tuple`` literals, e.g., ``[1, 2]`` or ``(1, 2)`` +* Attribute access, e.g., ``df.a`` +* Subscript expressions, e.g., ``df[0]`` +* Simple variable evaluation, e.g., ``pd.eval('df')`` (this is not very useful) +* Math functions: `sin`, `cos`, `exp`, `log`, `expm1`, `log1p`, + `sqrt`, `sinh`, `cosh`, `tanh`, `arcsin`, `arccos`, `arctan`, `arccosh`, + `arcsinh`, `arctanh`, `abs`, `arctan2` and `log10`. + +This Python syntax is **not** allowed: + +* Expressions + + * Function calls other than math functions. + * ``is``/``is not`` operations + * ``if`` expressions + * ``lambda`` expressions + * ``list``/``set``/``dict`` comprehensions + * Literal ``dict`` and ``set`` expressions + * ``yield`` expressions + * Generator expressions + * Boolean expressions consisting of only scalar values + +* Statements + + * Neither `simple `__ + nor `compound `__ + statements are allowed. This includes things like ``for``, ``while``, and + ``if``. + + + +:func:`~pandas.eval` examples +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:func:`pandas.eval` works well with expressions containing large arrays. + +First let's create a few decent-sized arrays to play with: + +.. ipython:: python + + nrows, ncols = 20000, 100 + df1, df2, df3, df4 = [pd.DataFrame(np.random.randn(nrows, ncols)) for _ in range(4)] + + +Now let's compare adding them together using plain ol' Python versus +:func:`~pandas.eval`: + +.. ipython:: python + + %timeit df1 + df2 + df3 + df4 + +.. ipython:: python + + %timeit pd.eval('df1 + df2 + df3 + df4') + + +Now let's do the same thing but with comparisons: + +.. ipython:: python + + %timeit (df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0) + +.. ipython:: python + + %timeit pd.eval('(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)') + + +:func:`~pandas.eval` also works with unaligned pandas objects: + +.. ipython:: python + + s = pd.Series(np.random.randn(50)) + %timeit df1 + df2 + df3 + df4 + s + +.. ipython:: python + + %timeit pd.eval('df1 + df2 + df3 + df4 + s') + +.. note:: + + Operations such as + + .. code-block:: python + + 1 and 2 # would parse to 1 & 2, but should evaluate to 2 + 3 or 4 # would parse to 3 | 4, but should evaluate to 3 + ~1 # this is okay, but slower when using eval + + should be performed in Python. An exception will be raised if you try to + perform any boolean/bitwise operations with scalar operands that are not + of type ``bool`` or ``np.bool_``. Again, you should perform these kinds of + operations in plain Python. + +The ``DataFrame.eval`` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In addition to the top level :func:`pandas.eval` function you can also +evaluate an expression in the "context" of a :class:`~pandas.DataFrame`. + +.. ipython:: python + :suppress: + + try: + del a + except NameError: + pass + + try: + del b + except NameError: + pass + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 2), columns=['a', 'b']) + df.eval('a + b') + +Any expression that is a valid :func:`pandas.eval` expression is also a valid +:meth:`DataFrame.eval` expression, with the added benefit that you don't have to +prefix the name of the :class:`~pandas.DataFrame` to the column(s) you're +interested in evaluating. + +In addition, you can perform assignment of columns within an expression. +This allows for *formulaic evaluation*. The assignment target can be a +new column name or an existing column name, and it must be a valid Python +identifier. + +The ``inplace`` keyword determines whether this assignment will performed +on the original ``DataFrame`` or return a copy with the new column. + +.. warning:: + + For backwards compatibility, ``inplace`` defaults to ``True`` if not + specified. This will change in a future version of pandas - if your + code depends on an inplace assignment you should update to explicitly + set ``inplace=True``. + +.. ipython:: python + + df = pd.DataFrame(dict(a=range(5), b=range(5, 10))) + df.eval('c = a + b', inplace=True) + df.eval('d = a + b + c', inplace=True) + df.eval('a = 1', inplace=True) + df + +When ``inplace`` is set to ``False``, a copy of the ``DataFrame`` with the +new or modified columns is returned and the original frame is unchanged. + +.. ipython:: python + + df + df.eval('e = a - c', inplace=False) + df + +As a convenience, multiple assignments can be performed by using a +multi-line string. + +.. ipython:: python + + df.eval(""" + c = a + b + d = a + b + c + a = 1""", inplace=False) + +The equivalent in standard Python would be + +.. ipython:: python + + df = pd.DataFrame(dict(a=range(5), b=range(5, 10))) + df['c'] = df['a'] + df['b'] + df['d'] = df['a'] + df['b'] + df['c'] + df['a'] = 1 + df + +The ``query`` method has a ``inplace`` keyword which determines +whether the query modifies the original frame. + +.. ipython:: python + + df = pd.DataFrame(dict(a=range(5), b=range(5, 10))) + df.query('a > 2') + df.query('a > 2', inplace=True) + df + +.. warning:: + + Unlike with ``eval``, the default value for ``inplace`` for ``query`` + is ``False``. This is consistent with prior versions of pandas. + +Local variables +~~~~~~~~~~~~~~~ + +You must *explicitly reference* any local variable that you want to use in an +expression by placing the ``@`` character in front of the name. For example, + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 2), columns=list('ab')) + newcol = np.random.randn(len(df)) + df.eval('b + @newcol') + df.query('b < @newcol') + +If you don't prefix the local variable with ``@``, pandas will raise an +exception telling you the variable is undefined. + +When using :meth:`DataFrame.eval` and :meth:`DataFrame.query`, this allows you +to have a local variable and a :class:`~pandas.DataFrame` column with the same +name in an expression. + + +.. ipython:: python + + a = np.random.randn() + df.query('@a < a') + df.loc[a < df['a']] # same as the previous expression + +With :func:`pandas.eval` you cannot use the ``@`` prefix *at all*, because it +isn't defined in that context. ``pandas`` will let you know this if you try to +use ``@`` in a top-level call to :func:`pandas.eval`. For example, + +.. ipython:: python + :okexcept: + + a, b = 1, 2 + pd.eval('@a + b') + +In this case, you should simply refer to the variables like you would in +standard Python. + +.. ipython:: python + + pd.eval('a + b') + + +:func:`pandas.eval` parsers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are two different parsers and two different engines you can use as +the backend. + +The default ``'pandas'`` parser allows a more intuitive syntax for expressing +query-like operations (comparisons, conjunctions and disjunctions). In +particular, the precedence of the ``&`` and ``|`` operators is made equal to +the precedence of the corresponding boolean operations ``and`` and ``or``. + +For example, the above conjunction can be written without parentheses. +Alternatively, you can use the ``'python'`` parser to enforce strict Python +semantics. + +.. ipython:: python + + expr = '(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)' + x = pd.eval(expr, parser='python') + expr_no_parens = 'df1 > 0 & df2 > 0 & df3 > 0 & df4 > 0' + y = pd.eval(expr_no_parens, parser='pandas') + np.all(x == y) + + +The same expression can be "anded" together with the word :keyword:`and` as +well: + +.. ipython:: python + + expr = '(df1 > 0) & (df2 > 0) & (df3 > 0) & (df4 > 0)' + x = pd.eval(expr, parser='python') + expr_with_ands = 'df1 > 0 and df2 > 0 and df3 > 0 and df4 > 0' + y = pd.eval(expr_with_ands, parser='pandas') + np.all(x == y) + + +The ``and`` and ``or`` operators here have the same precedence that they would +in vanilla Python. + + +:func:`pandas.eval` backends +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There's also the option to make :func:`~pandas.eval` operate identical to plain +ol' Python. + +.. note:: + + Using the ``'python'`` engine is generally *not* useful, except for testing + other evaluation engines against it. You will achieve **no** performance + benefits using :func:`~pandas.eval` with ``engine='python'`` and in fact may + incur a performance hit. + +You can see this by using :func:`pandas.eval` with the ``'python'`` engine. It +is a bit slower (not by much) than evaluating the same expression in Python + +.. ipython:: python + + %timeit df1 + df2 + df3 + df4 + +.. ipython:: python + + %timeit pd.eval('df1 + df2 + df3 + df4', engine='python') + + +:func:`pandas.eval` performance +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:func:`~pandas.eval` is intended to speed up certain kinds of operations. In +particular, those operations involving complex expressions with large +:class:`~pandas.DataFrame`/:class:`~pandas.Series` objects should see a +significant performance benefit. Here is a plot showing the running time of +:func:`pandas.eval` as function of the size of the frame involved in the +computation. The two lines are two different engines. + + +.. image:: ../_static/eval-perf.png + + +.. note:: + + Operations with smallish objects (around 15k-20k rows) are faster using + plain Python: + + .. image:: ../_static/eval-perf-small.png + + +This plot was created using a ``DataFrame`` with 3 columns each containing +floating point values generated using ``numpy.random.randn()``. + +Technical minutia regarding expression evaluation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Expressions that would result in an object dtype or involve datetime operations +(because of ``NaT``) must be evaluated in Python space. The main reason for +this behavior is to maintain backwards compatibility with versions of NumPy < +1.7. In those versions of NumPy a call to ``ndarray.astype(str)`` will +truncate any strings that are more than 60 characters in length. Second, we +can't pass ``object`` arrays to ``numexpr`` thus string comparisons must be +evaluated in Python space. + +The upshot is that this *only* applies to object-dtype expressions. So, if +you have an expression--for example + +.. ipython:: python + + df = pd.DataFrame({'strings': np.repeat(list('cba'), 3), + 'nums': np.repeat(range(3), 3)}) + df + df.query('strings == "a" and nums == 1') + +the numeric part of the comparison (``nums == 1``) will be evaluated by +``numexpr``. + +In general, :meth:`DataFrame.query`/:func:`pandas.eval` will +evaluate the subexpressions that *can* be evaluated by ``numexpr`` and those +that must be evaluated in Python space transparently to the user. This is done +by inferring the result type of an expression from its arguments and operators. diff --git a/doc/source/user_guide/gotchas.rst b/doc/source/user_guide/gotchas.rst new file mode 100644 index 00000000..f9a72b87 --- /dev/null +++ b/doc/source/user_guide/gotchas.rst @@ -0,0 +1,343 @@ +.. _gotchas: + +{{ header }} + +******************************** +Frequently Asked Questions (FAQ) +******************************** + +.. _df-memory-usage: + +DataFrame memory usage +---------------------- +The memory usage of a ``DataFrame`` (including the index) is shown when calling +the :meth:`~DataFrame.info`. A configuration option, ``display.memory_usage`` +(see :ref:`the list of options `), specifies if the +``DataFrame``'s memory usage will be displayed when invoking the ``df.info()`` +method. + +For example, the memory usage of the ``DataFrame`` below is shown +when calling :meth:`~DataFrame.info`: + +.. ipython:: python + + dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]', + 'complex128', 'object', 'bool'] + n = 5000 + data = {t: np.random.randint(100, size=n).astype(t) for t in dtypes} + df = pd.DataFrame(data) + df['categorical'] = df['object'].astype('category') + + df.info() + +The ``+`` symbol indicates that the true memory usage could be higher, because +pandas does not count the memory used by values in columns with +``dtype=object``. + +Passing ``memory_usage='deep'`` will enable a more accurate memory usage report, +accounting for the full usage of the contained objects. This is optional +as it can be expensive to do this deeper introspection. + +.. ipython:: python + + df.info(memory_usage='deep') + +By default the display option is set to ``True`` but can be explicitly +overridden by passing the ``memory_usage`` argument when invoking ``df.info()``. + +The memory usage of each column can be found by calling the +:meth:`~DataFrame.memory_usage` method. This returns a ``Series`` with an index +represented by column names and memory usage of each column shown in bytes. For +the ``DataFrame`` above, the memory usage of each column and the total memory +usage can be found with the ``memory_usage`` method: + +.. ipython:: python + + df.memory_usage() + + # total memory usage of dataframe + df.memory_usage().sum() + +By default the memory usage of the ``DataFrame``'s index is shown in the +returned ``Series``, the memory usage of the index can be suppressed by passing +the ``index=False`` argument: + +.. ipython:: python + + df.memory_usage(index=False) + +The memory usage displayed by the :meth:`~DataFrame.info` method utilizes the +:meth:`~DataFrame.memory_usage` method to determine the memory usage of a +``DataFrame`` while also formatting the output in human-readable units (base-2 +representation; i.e. 1KB = 1024 bytes). + +See also :ref:`Categorical Memory Usage `. + +.. _gotchas.truth: + +Using if/truth statements with pandas +------------------------------------- + +pandas follows the NumPy convention of raising an error when you try to convert +something to a ``bool``. This happens in an ``if``-statement or when using the +boolean operations: ``and``, ``or``, and ``not``. It is not clear what the result +of the following code should be: + +.. code-block:: python + + >>> if pd.Series([False, True, False]): + ... pass + +Should it be ``True`` because it's not zero-length, or ``False`` because there +are ``False`` values? It is unclear, so instead, pandas raises a ``ValueError``: + +.. code-block:: python + + >>> if pd.Series([False, True, False]): + ... print("I was true") + Traceback + ... + ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). + +You need to explicitly choose what you want to do with the ``DataFrame``, e.g. +use :meth:`~DataFrame.any`, :meth:`~DataFrame.all` or :meth:`~DataFrame.empty`. +Alternatively, you might want to compare if the pandas object is ``None``: + +.. code-block:: python + + >>> if pd.Series([False, True, False]) is not None: + ... print("I was not None") + I was not None + + +Below is how to check if any of the values are ``True``: + +.. code-block:: python + + >>> if pd.Series([False, True, False]).any(): + ... print("I am any") + I am any + +To evaluate single-element pandas objects in a boolean context, use the method +:meth:`~DataFrame.bool`: + +.. ipython:: python + + pd.Series([True]).bool() + pd.Series([False]).bool() + pd.DataFrame([[True]]).bool() + pd.DataFrame([[False]]).bool() + +Bitwise boolean +~~~~~~~~~~~~~~~ + +Bitwise boolean operators like ``==`` and ``!=`` return a boolean ``Series``, +which is almost always what you want anyways. + +.. code-block:: python + + >>> s = pd.Series(range(5)) + >>> s == 4 + 0 False + 1 False + 2 False + 3 False + 4 True + dtype: bool + +See :ref:`boolean comparisons` for more examples. + +Using the ``in`` operator +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using the Python ``in`` operator on a ``Series`` tests for membership in the +index, not membership among the values. + +.. ipython:: python + + s = pd.Series(range(5), index=list('abcde')) + 2 in s + 'b' in s + +If this behavior is surprising, keep in mind that using ``in`` on a Python +dictionary tests keys, not values, and ``Series`` are dict-like. +To test for membership in the values, use the method :meth:`~pandas.Series.isin`: + +.. ipython:: python + + s.isin([2]) + s.isin([2]).any() + +For ``DataFrames``, likewise, ``in`` applies to the column axis, +testing for membership in the list of column names. + +``NaN``, Integer ``NA`` values and ``NA`` type promotions +--------------------------------------------------------- + +Choice of ``NA`` representation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For lack of ``NA`` (missing) support from the ground up in NumPy and Python in +general, we were given the difficult choice between either: + +* A *masked array* solution: an array of data and an array of boolean values + indicating whether a value is there or is missing. +* Using a special sentinel value, bit pattern, or set of sentinel values to + denote ``NA`` across the dtypes. + +For many reasons we chose the latter. After years of production use it has +proven, at least in my opinion, to be the best decision given the state of +affairs in NumPy and Python in general. The special value ``NaN`` +(Not-A-Number) is used everywhere as the ``NA`` value, and there are API +functions ``isna`` and ``notna`` which can be used across the dtypes to +detect NA values. + +However, it comes with it a couple of trade-offs which I most certainly have +not ignored. + +.. _gotchas.intna: + +Support for integer ``NA`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the absence of high performance ``NA`` support being built into NumPy from +the ground up, the primary casualty is the ability to represent NAs in integer +arrays. For example: + +.. ipython:: python + + s = pd.Series([1, 2, 3, 4, 5], index=list('abcde')) + s + s.dtype + + s2 = s.reindex(['a', 'b', 'c', 'f', 'u']) + s2 + s2.dtype + +This trade-off is made largely for memory and performance reasons, and also so +that the resulting ``Series`` continues to be "numeric". + +If you need to represent integers with possibly missing values, use one of +the nullable-integer extension dtypes provided by pandas + +* :class:`Int8Dtype` +* :class:`Int16Dtype` +* :class:`Int32Dtype` +* :class:`Int64Dtype` + +.. ipython:: python + + s_int = pd.Series([1, 2, 3, 4, 5], index=list('abcde'), + dtype=pd.Int64Dtype()) + s_int + s_int.dtype + + s2_int = s_int.reindex(['a', 'b', 'c', 'f', 'u']) + s2_int + s2_int.dtype + +See :ref:`integer_na` for more. + +``NA`` type promotions +~~~~~~~~~~~~~~~~~~~~~~ + +When introducing NAs into an existing ``Series`` or ``DataFrame`` via +:meth:`~Series.reindex` or some other means, boolean and integer types will be +promoted to a different dtype in order to store the NAs. The promotions are +summarized in this table: + +.. csv-table:: + :header: "Typeclass","Promotion dtype for storing NAs" + :widths: 40,60 + + ``floating``, no change + ``object``, no change + ``integer``, cast to ``float64`` + ``boolean``, cast to ``object`` + +While this may seem like a heavy trade-off, I have found very few cases where +this is an issue in practice i.e. storing values greater than 2**53. Some +explanation for the motivation is in the next section. + +Why not make NumPy like R? +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Many people have suggested that NumPy should simply emulate the ``NA`` support +present in the more domain-specific statistical programming language `R +`__. Part of the reason is the NumPy type hierarchy: + +.. csv-table:: + :header: "Typeclass","Dtypes" + :widths: 30,70 + :delim: | + + ``numpy.floating`` | ``float16, float32, float64, float128`` + ``numpy.integer`` | ``int8, int16, int32, int64`` + ``numpy.unsignedinteger`` | ``uint8, uint16, uint32, uint64`` + ``numpy.object_`` | ``object_`` + ``numpy.bool_`` | ``bool_`` + ``numpy.character`` | ``string_, unicode_`` + +The R language, by contrast, only has a handful of built-in data types: +``integer``, ``numeric`` (floating-point), ``character``, and +``boolean``. ``NA`` types are implemented by reserving special bit patterns for +each type to be used as the missing value. While doing this with the full NumPy +type hierarchy would be possible, it would be a more substantial trade-off +(especially for the 8- and 16-bit data types) and implementation undertaking. + +An alternate approach is that of using masked arrays. A masked array is an +array of data with an associated boolean *mask* denoting whether each value +should be considered ``NA`` or not. I am personally not in love with this +approach as I feel that overall it places a fairly heavy burden on the user and +the library implementer. Additionally, it exacts a fairly high performance cost +when working with numerical data compared with the simple approach of using +``NaN``. Thus, I have chosen the Pythonic "practicality beats purity" approach +and traded integer ``NA`` capability for a much simpler approach of using a +special value in float and object arrays to denote ``NA``, and promoting +integer arrays to floating when NAs must be introduced. + + +Differences with NumPy +---------------------- +For ``Series`` and ``DataFrame`` objects, :meth:`~DataFrame.var` normalizes by +``N-1`` to produce unbiased estimates of the sample variance, while NumPy's +``var`` normalizes by N, which measures the variance of the sample. Note that +:meth:`~DataFrame.cov` normalizes by ``N-1`` in both pandas and NumPy. + + +Thread-safety +------------- + +As of pandas 0.11, pandas is not 100% thread safe. The known issues relate to +the :meth:`~DataFrame.copy` method. If you are doing a lot of copying of +``DataFrame`` objects shared among threads, we recommend holding locks inside +the threads where the data copying occurs. + +See `this link `__ +for more information. + + +Byte-Ordering issues +-------------------- +Occasionally you may have to deal with data that were created on a machine with +a different byte order than the one on which you are running Python. A common +symptom of this issue is an error like::: + + Traceback + ... + ValueError: Big-endian buffer not supported on little-endian compiler + +To deal +with this issue you should convert the underlying NumPy array to the native +system byte order *before* passing it to ``Series`` or ``DataFrame`` +constructors using something similar to the following: + +.. ipython:: python + + x = np.array(list(range(10)), '>i4') # big endian + newx = x.byteswap().newbyteorder() # force native byteorder + s = pd.Series(newx) + +See `the NumPy documentation on byte order +`__ for more +details. diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst new file mode 100644 index 00000000..8cd22907 --- /dev/null +++ b/doc/source/user_guide/groupby.rst @@ -0,0 +1,1462 @@ +.. _groupby: + +{{ header }} + +***************************** +Group By: split-apply-combine +***************************** + +By "group by" we are referring to a process involving one or more of the following +steps: + +* **Splitting** the data into groups based on some criteria. +* **Applying** a function to each group independently. +* **Combining** the results into a data structure. + +Out of these, the split step is the most straightforward. In fact, in many +situations we may wish to split the data set into groups and do something with +those groups. In the apply step, we might wish to do one of the +following: + +* **Aggregation**: compute a summary statistic (or statistics) for each + group. Some examples: + + * Compute group sums or means. + * Compute group sizes / counts. + +* **Transformation**: perform some group-specific computations and return a + like-indexed object. Some examples: + + * Standardize data (zscore) within a group. + * Filling NAs within groups with a value derived from each group. + +* **Filtration**: discard some groups, according to a group-wise computation + that evaluates True or False. Some examples: + + * Discard data that belongs to groups with only a few members. + * Filter out data based on the group sum or mean. + +* Some combination of the above: GroupBy will examine the results of the apply + step and try to return a sensibly combined result if it doesn't fit into + either of the above two categories. + +Since the set of object instance methods on pandas data structures are generally +rich and expressive, we often simply want to invoke, say, a DataFrame function +on each group. The name GroupBy should be quite familiar to those who have used +a SQL-based tool (or ``itertools``), in which you can write code like: + +.. code-block:: sql + + SELECT Column1, Column2, mean(Column3), sum(Column4) + FROM SomeTable + GROUP BY Column1, Column2 + +We aim to make operations like this natural and easy to express using +pandas. We'll address each area of GroupBy functionality then provide some +non-trivial examples / use cases. + +See the :ref:`cookbook` for some advanced strategies. + +.. _groupby.split: + +Splitting an object into groups +------------------------------- + +pandas objects can be split on any of their axes. The abstract definition of +grouping is to provide a mapping of labels to group names. To create a GroupBy +object (more on what the GroupBy object is later), you may do the following: + +.. ipython:: python + + df = pd.DataFrame([('bird', 'Falconiformes', 389.0), + ('bird', 'Psittaciformes', 24.0), + ('mammal', 'Carnivora', 80.2), + ('mammal', 'Primates', np.nan), + ('mammal', 'Carnivora', 58)], + index=['falcon', 'parrot', 'lion', 'monkey', 'leopard'], + columns=('class', 'order', 'max_speed')) + df + + # default is axis=0 + grouped = df.groupby('class') + grouped = df.groupby('order', axis='columns') + grouped = df.groupby(['class', 'order']) + +The mapping can be specified many different ways: + +* A Python function, to be called on each of the axis labels. +* A list or NumPy array of the same length as the selected axis. +* A dict or ``Series``, providing a ``label -> group name`` mapping. +* For ``DataFrame`` objects, a string indicating a column to be used to group. + Of course ``df.groupby('A')`` is just syntactic sugar for + ``df.groupby(df['A'])``, but it makes life simpler. +* For ``DataFrame`` objects, a string indicating an index level to be used to + group. +* A list of any of the above things. + +Collectively we refer to the grouping objects as the **keys**. For example, +consider the following ``DataFrame``: + +.. note:: + + A string passed to ``groupby`` may refer to either a column or an index level. + If a string matches both a column name and an index level name, a + ``ValueError`` will be raised. + +.. ipython:: python + + df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + df + +On a DataFrame, we obtain a GroupBy object by calling :meth:`~DataFrame.groupby`. +We could naturally group by either the ``A`` or ``B`` columns, or both: + +.. ipython:: python + + grouped = df.groupby('A') + grouped = df.groupby(['A', 'B']) + +.. versionadded:: 0.24 + +If we also have a MultiIndex on columns ``A`` and ``B``, we can group by all +but the specified columns + +.. ipython:: python + + df2 = df.set_index(['A', 'B']) + grouped = df2.groupby(level=df2.index.names.difference(['B'])) + grouped.sum() + +These will split the DataFrame on its index (rows). We could also split by the +columns: + +.. ipython:: + + In [4]: def get_letter_type(letter): + ...: if letter.lower() in 'aeiou': + ...: return 'vowel' + ...: else: + ...: return 'consonant' + ...: + + In [5]: grouped = df.groupby(get_letter_type, axis=1) + +pandas :class:`~pandas.Index` objects support duplicate values. If a +non-unique index is used as the group key in a groupby operation, all values +for the same index value will be considered to be in one group and thus the +output of aggregation functions will only contain unique index values: + +.. ipython:: python + + lst = [1, 2, 3, 1, 2, 3] + + s = pd.Series([1, 2, 3, 10, 20, 30], lst) + + grouped = s.groupby(level=0) + + grouped.first() + + grouped.last() + + grouped.sum() + +Note that **no splitting occurs** until it's needed. Creating the GroupBy object +only verifies that you've passed a valid mapping. + +.. note:: + + Many kinds of complicated data manipulations can be expressed in terms of + GroupBy operations (though can't be guaranteed to be the most + efficient). You can get quite creative with the label mapping functions. + +.. _groupby.sorting: + +GroupBy sorting +~~~~~~~~~~~~~~~~~~~~~~~~~ + +By default the group keys are sorted during the ``groupby`` operation. You may however pass ``sort=False`` for potential speedups: + +.. ipython:: python + + df2 = pd.DataFrame({'X': ['B', 'B', 'A', 'A'], 'Y': [1, 2, 3, 4]}) + df2.groupby(['X']).sum() + df2.groupby(['X'], sort=False).sum() + + +Note that ``groupby`` will preserve the order in which *observations* are sorted *within* each group. +For example, the groups created by ``groupby()`` below are in the order they appeared in the original ``DataFrame``: + +.. ipython:: python + + df3 = pd.DataFrame({'X': ['A', 'B', 'A', 'B'], 'Y': [1, 4, 3, 2]}) + df3.groupby(['X']).get_group('A') + + df3.groupby(['X']).get_group('B') + + + +.. _groupby.attributes: + +GroupBy object attributes +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``groups`` attribute is a dict whose keys are the computed unique groups +and corresponding values being the axis labels belonging to each group. In the +above example we have: + +.. ipython:: python + + df.groupby('A').groups + df.groupby(get_letter_type, axis=1).groups + +Calling the standard Python ``len`` function on the GroupBy object just returns +the length of the ``groups`` dict, so it is largely just a convenience: + +.. ipython:: python + + grouped = df.groupby(['A', 'B']) + grouped.groups + len(grouped) + + +.. _groupby.tabcompletion: + +``GroupBy`` will tab complete column names (and other attributes): + +.. ipython:: python + :suppress: + + n = 10 + weight = np.random.normal(166, 20, size=n) + height = np.random.normal(60, 10, size=n) + time = pd.date_range('1/1/2000', periods=n) + gender = np.random.choice(['male', 'female'], size=n) + df = pd.DataFrame({'height': height, 'weight': weight, + 'gender': gender}, index=time) + +.. ipython:: python + + df + gb = df.groupby('gender') + + +.. ipython:: + + @verbatim + In [1]: gb. # noqa: E225, E999 + gb.agg gb.boxplot gb.cummin gb.describe gb.filter gb.get_group gb.height gb.last gb.median gb.ngroups gb.plot gb.rank gb.std gb.transform + gb.aggregate gb.count gb.cumprod gb.dtype gb.first gb.groups gb.hist gb.max gb.min gb.nth gb.prod gb.resample gb.sum gb.var + gb.apply gb.cummax gb.cumsum gb.fillna gb.gender gb.head gb.indices gb.mean gb.name gb.ohlc gb.quantile gb.size gb.tail gb.weight + +.. _groupby.multiindex: + +GroupBy with MultiIndex +~~~~~~~~~~~~~~~~~~~~~~~ + +With :ref:`hierarchically-indexed data `, it's quite +natural to group by one of the levels of the hierarchy. + +Let's create a Series with a two-level ``MultiIndex``. + +.. ipython:: python + + + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second']) + s = pd.Series(np.random.randn(8), index=index) + s + +We can then group by one of the levels in ``s``. + +.. ipython:: python + + grouped = s.groupby(level=0) + grouped.sum() + +If the MultiIndex has names specified, these can be passed instead of the level +number: + +.. ipython:: python + + s.groupby(level='second').sum() + +The aggregation functions such as ``sum`` will take the level parameter +directly. Additionally, the resulting index will be named according to the +chosen level: + +.. ipython:: python + + s.sum(level='second') + +Grouping with multiple levels is supported. + +.. ipython:: python + :suppress: + + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['doo', 'doo', 'bee', 'bee', 'bop', 'bop', 'bop', 'bop'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + tuples = list(zip(*arrays)) + index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second', 'third']) + s = pd.Series(np.random.randn(8), index=index) + +.. ipython:: python + + s + s.groupby(level=['first', 'second']).sum() + +Index level names may be supplied as keys. + +.. ipython:: python + + s.groupby(['first', 'second']).sum() + +More on the ``sum`` function and aggregation later. + +Grouping DataFrame with Index levels and columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +A DataFrame may be grouped by a combination of columns and index levels by +specifying the column names as strings and the index levels as ``pd.Grouper`` +objects. + +.. ipython:: python + + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + + index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second']) + + df = pd.DataFrame({'A': [1, 1, 1, 1, 2, 2, 3, 3], + 'B': np.arange(8)}, + index=index) + + df + +The following example groups ``df`` by the ``second`` index level and +the ``A`` column. + +.. ipython:: python + + df.groupby([pd.Grouper(level=1), 'A']).sum() + +Index levels may also be specified by name. + +.. ipython:: python + + df.groupby([pd.Grouper(level='second'), 'A']).sum() + +Index level names may be specified as keys directly to ``groupby``. + +.. ipython:: python + + df.groupby(['second', 'A']).sum() + +DataFrame column selection in GroupBy +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Once you have created the GroupBy object from a DataFrame, you might want to do +something different for each of the columns. Thus, using ``[]`` similar to +getting a column from a DataFrame, you can do: + +.. ipython:: python + :suppress: + + df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + +.. ipython:: python + + grouped = df.groupby(['A']) + grouped_C = grouped['C'] + grouped_D = grouped['D'] + +This is mainly syntactic sugar for the alternative and much more verbose: + +.. ipython:: python + + df['C'].groupby(df['A']) + +Additionally this method avoids recomputing the internal grouping information +derived from the passed key. + +.. _groupby.iterating-label: + +Iterating through groups +------------------------ + +With the GroupBy object in hand, iterating through the grouped data is very +natural and functions similarly to :py:func:`itertools.groupby`: + +.. ipython:: + + In [4]: grouped = df.groupby('A') + + In [5]: for name, group in grouped: + ...: print(name) + ...: print(group) + ...: + +In the case of grouping by multiple keys, the group name will be a tuple: + +.. ipython:: + + In [5]: for name, group in df.groupby(['A', 'B']): + ...: print(name) + ...: print(group) + ...: + +See :ref:`timeseries.iterating-label`. + +Selecting a group +----------------- + +A single group can be selected using +:meth:`~pandas.core.groupby.DataFrameGroupBy.get_group`: + +.. ipython:: python + + grouped.get_group('bar') + +Or for an object grouped on multiple columns: + +.. ipython:: python + + df.groupby(['A', 'B']).get_group(('bar', 'one')) + +.. _groupby.aggregate: + +Aggregation +----------- + +Once the GroupBy object has been created, several methods are available to +perform a computation on the grouped data. These operations are similar to the +:ref:`aggregating API `, :ref:`window functions API `, +and :ref:`resample API `. + +An obvious one is aggregation via the +:meth:`~pandas.core.groupby.DataFrameGroupBy.aggregate` or equivalently +:meth:`~pandas.core.groupby.DataFrameGroupBy.agg` method: + +.. ipython:: python + + grouped = df.groupby('A') + grouped.aggregate(np.sum) + + grouped = df.groupby(['A', 'B']) + grouped.aggregate(np.sum) + +As you can see, the result of the aggregation will have the group names as the +new index along the grouped axis. In the case of multiple keys, the result is a +:ref:`MultiIndex ` by default, though this can be +changed by using the ``as_index`` option: + +.. ipython:: python + + grouped = df.groupby(['A', 'B'], as_index=False) + grouped.aggregate(np.sum) + + df.groupby('A', as_index=False).sum() + +Note that you could use the ``reset_index`` DataFrame function to achieve the +same result as the column names are stored in the resulting ``MultiIndex``: + +.. ipython:: python + + df.groupby(['A', 'B']).sum().reset_index() + +Another simple aggregation example is to compute the size of each group. +This is included in GroupBy as the ``size`` method. It returns a Series whose +index are the group names and whose values are the sizes of each group. + +.. ipython:: python + + grouped.size() + +.. ipython:: python + + grouped.describe() + +.. note:: + + Aggregation functions **will not** return the groups that you are aggregating over + if they are named *columns*, when ``as_index=True``, the default. The grouped columns will + be the **indices** of the returned object. + + Passing ``as_index=False`` **will** return the groups that you are aggregating over, if they are + named *columns*. + +Aggregating functions are the ones that reduce the dimension of the returned objects. +Some common aggregating functions are tabulated below: + +.. csv-table:: + :header: "Function", "Description" + :widths: 20, 80 + :delim: ; + + :meth:`~pd.core.groupby.DataFrameGroupBy.mean`;Compute mean of groups + :meth:`~pd.core.groupby.DataFrameGroupBy.sum`;Compute sum of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.size`;Compute group sizes + :meth:`~pd.core.groupby.DataFrameGroupBy.count`;Compute count of group + :meth:`~pd.core.groupby.DataFrameGroupBy.std`;Standard deviation of groups + :meth:`~pd.core.groupby.DataFrameGroupBy.var`;Compute variance of groups + :meth:`~pd.core.groupby.DataFrameGroupBy.sem`;Standard error of the mean of groups + :meth:`~pd.core.groupby.DataFrameGroupBy.describe`;Generates descriptive statistics + :meth:`~pd.core.groupby.DataFrameGroupBy.first`;Compute first of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.last`;Compute last of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.nth`;Take nth value, or a subset if n is a list + :meth:`~pd.core.groupby.DataFrameGroupBy.min`;Compute min of group values + :meth:`~pd.core.groupby.DataFrameGroupBy.max`;Compute max of group values + + +The aggregating functions above will exclude NA values. Any function which +reduces a :class:`Series` to a scalar value is an aggregation function and will work, +a trivial example is ``df.groupby('A').agg(lambda ser: 1)``. Note that +:meth:`~pd.core.groupby.DataFrameGroupBy.nth` can act as a reducer *or* a +filter, see :ref:`here `. + +.. _groupby.aggregate.multifunc: + +Applying multiple functions at once +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +With grouped ``Series`` you can also pass a list or dict of functions to do +aggregation with, outputting a DataFrame: + +.. ipython:: python + + grouped = df.groupby('A') + grouped['C'].agg([np.sum, np.mean, np.std]) + +On a grouped ``DataFrame``, you can pass a list of functions to apply to each +column, which produces an aggregated result with a hierarchical index: + +.. ipython:: python + + grouped.agg([np.sum, np.mean, np.std]) + + +The resulting aggregations are named for the functions themselves. If you +need to rename, then you can add in a chained operation for a ``Series`` like this: + +.. ipython:: python + + (grouped['C'].agg([np.sum, np.mean, np.std]) + .rename(columns={'sum': 'foo', + 'mean': 'bar', + 'std': 'baz'})) + +For a grouped ``DataFrame``, you can rename in a similar manner: + +.. ipython:: python + + (grouped.agg([np.sum, np.mean, np.std]) + .rename(columns={'sum': 'foo', + 'mean': 'bar', + 'std': 'baz'})) + +.. note:: + + In general, the output column names should be unique. You can't apply + the same function (or two functions with the same name) to the same + column. + + .. ipython:: python + :okexcept: + + grouped['C'].agg(['sum', 'sum']) + + + Pandas *does* allow you to provide multiple lambdas. In this case, pandas + will mangle the name of the (nameless) lambda functions, appending ``_`` + to each subsequent lambda. + + .. ipython:: python + + grouped['C'].agg([lambda x: x.max() - x.min(), + lambda x: x.median() - x.mean()]) + + + +.. _groupby.aggregate.named: + +Named aggregation +~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.25.0 + +To support column-specific aggregation *with control over the output column names*, pandas +accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation", where + +- The keywords are the *output* column names +- The values are tuples whose first element is the column to select + and the second element is the aggregation to apply to that column. Pandas + provides the ``pandas.NamedAgg`` namedtuple with the fields ``['column', 'aggfunc']`` + to make it clearer what the arguments are. As usual, the aggregation can + be a callable or a string alias. + +.. ipython:: python + + animals = pd.DataFrame({'kind': ['cat', 'dog', 'cat', 'dog'], + 'height': [9.1, 6.0, 9.5, 34.0], + 'weight': [7.9, 7.5, 9.9, 198.0]}) + animals + + animals.groupby("kind").agg( + min_height=pd.NamedAgg(column='height', aggfunc='min'), + max_height=pd.NamedAgg(column='height', aggfunc='max'), + average_weight=pd.NamedAgg(column='weight', aggfunc=np.mean), + ) + + +``pandas.NamedAgg`` is just a ``namedtuple``. Plain tuples are allowed as well. + +.. ipython:: python + + animals.groupby("kind").agg( + min_height=('height', 'min'), + max_height=('height', 'max'), + average_weight=('weight', np.mean), + ) + + +If your desired output column names are not valid python keywords, construct a dictionary +and unpack the keyword arguments + +.. ipython:: python + + animals.groupby("kind").agg(**{ + 'total weight': pd.NamedAgg(column='weight', aggfunc=sum), + }) + +Additional keyword arguments are not passed through to the aggregation functions. Only pairs +of ``(column, aggfunc)`` should be passed as ``**kwargs``. If your aggregation functions +requires additional arguments, partially apply them with :meth:`functools.partial`. + +.. note:: + + For Python 3.5 and earlier, the order of ``**kwargs`` in a functions was not + preserved. This means that the output column ordering would not be + consistent. To ensure consistent ordering, the keys (and so output columns) + will always be sorted for Python 3.5. + +Named aggregation is also valid for Series groupby aggregations. In this case there's +no column selection, so the values are just the functions. + +.. ipython:: python + + animals.groupby("kind").height.agg( + min_height='min', + max_height='max', + ) + +Applying different functions to DataFrame columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By passing a dict to ``aggregate`` you can apply a different aggregation to the +columns of a DataFrame: + +.. ipython:: python + + grouped.agg({'C': np.sum, + 'D': lambda x: np.std(x, ddof=1)}) + +The function names can also be strings. In order for a string to be valid it +must be either implemented on GroupBy or available via :ref:`dispatching +`: + +.. ipython:: python + + grouped.agg({'C': 'sum', 'D': 'std'}) + +.. _groupby.aggregate.cython: + +Cython-optimized aggregation functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some common aggregations, currently only ``sum``, ``mean``, ``std``, and ``sem``, have +optimized Cython implementations: + +.. ipython:: python + + df.groupby('A').sum() + df.groupby(['A', 'B']).mean() + +Of course ``sum`` and ``mean`` are implemented on pandas objects, so the above +code would work even without the special versions via dispatching (see below). + +.. _groupby.transform: + +Transformation +-------------- + +The ``transform`` method returns an object that is indexed the same (same size) +as the one being grouped. The transform function must: + +* Return a result that is either the same size as the group chunk or + broadcastable to the size of the group chunk (e.g., a scalar, + ``grouped.transform(lambda x: x.iloc[-1])``). +* Operate column-by-column on the group chunk. The transform is applied to + the first group chunk using chunk.apply. +* Not perform in-place operations on the group chunk. Group chunks should + be treated as immutable, and changes to a group chunk may produce unexpected + results. For example, when using ``fillna``, ``inplace`` must be ``False`` + (``grouped.transform(lambda x: x.fillna(inplace=False))``). +* (Optionally) operates on the entire group chunk. If this is supported, a + fast path is used starting from the *second* chunk. + +For example, suppose we wished to standardize the data within each group: + +.. ipython:: python + + index = pd.date_range('10/1/1999', periods=1100) + ts = pd.Series(np.random.normal(0.5, 2, 1100), index) + ts = ts.rolling(window=100, min_periods=100).mean().dropna() + + ts.head() + ts.tail() + + transformed = (ts.groupby(lambda x: x.year) + .transform(lambda x: (x - x.mean()) / x.std())) + +We would expect the result to now have mean 0 and standard deviation 1 within +each group, which we can easily check: + +.. ipython:: python + + # Original Data + grouped = ts.groupby(lambda x: x.year) + grouped.mean() + grouped.std() + + # Transformed Data + grouped_trans = transformed.groupby(lambda x: x.year) + grouped_trans.mean() + grouped_trans.std() + +We can also visually compare the original and transformed data sets. + +.. ipython:: python + + compare = pd.DataFrame({'Original': ts, 'Transformed': transformed}) + + @savefig groupby_transform_plot.png + compare.plot() + +Transformation functions that have lower dimension outputs are broadcast to +match the shape of the input array. + +.. ipython:: python + + ts.groupby(lambda x: x.year).transform(lambda x: x.max() - x.min()) + +Alternatively, the built-in methods could be used to produce the same outputs. + +.. ipython:: python + + max = ts.groupby(lambda x: x.year).transform('max') + min = ts.groupby(lambda x: x.year).transform('min') + + max - min + +Another common data transform is to replace missing data with the group mean. + +.. ipython:: python + :suppress: + + cols = ['A', 'B', 'C'] + values = np.random.randn(1000, 3) + values[np.random.randint(0, 1000, 100), 0] = np.nan + values[np.random.randint(0, 1000, 50), 1] = np.nan + values[np.random.randint(0, 1000, 200), 2] = np.nan + data_df = pd.DataFrame(values, columns=cols) + +.. ipython:: python + + data_df + + countries = np.array(['US', 'UK', 'GR', 'JP']) + key = countries[np.random.randint(0, 4, 1000)] + + grouped = data_df.groupby(key) + + # Non-NA count in each group + grouped.count() + + transformed = grouped.transform(lambda x: x.fillna(x.mean())) + +We can verify that the group means have not changed in the transformed data +and that the transformed data contains no NAs. + +.. ipython:: python + + grouped_trans = transformed.groupby(key) + + grouped.mean() # original group means + grouped_trans.mean() # transformation did not change group means + + grouped.count() # original has some missing data points + grouped_trans.count() # counts after transformation + grouped_trans.size() # Verify non-NA count equals group size + +.. note:: + + Some functions will automatically transform the input when applied to a + GroupBy object, but returning an object of the same shape as the original. + Passing ``as_index=False`` will not affect these transformation methods. + + For example: ``fillna, ffill, bfill, shift.``. + + .. ipython:: python + + grouped.ffill() + + +.. _groupby.transform.window_resample: + +Window and resample operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is possible to use ``resample()``, ``expanding()`` and +``rolling()`` as methods on groupbys. + +The example below will apply the ``rolling()`` method on the samples of +the column B based on the groups of column A. + +.. ipython:: python + + df_re = pd.DataFrame({'A': [1] * 10 + [5] * 10, + 'B': np.arange(20)}) + df_re + + df_re.groupby('A').rolling(4).B.mean() + + +The ``expanding()`` method will accumulate a given operation +(``sum()`` in the example) for all the members of each particular +group. + +.. ipython:: python + + df_re.groupby('A').expanding().sum() + + +Suppose you want to use the ``resample()`` method to get a daily +frequency in each group of your dataframe and wish to complete the +missing values with the ``ffill()`` method. + +.. ipython:: python + + df_re = pd.DataFrame({'date': pd.date_range(start='2016-01-01', periods=4, + freq='W'), + 'group': [1, 1, 2, 2], + 'val': [5, 6, 7, 8]}).set_index('date') + df_re + + df_re.groupby('group').resample('1D').ffill() + +.. _groupby.filter: + +Filtration +---------- + +The ``filter`` method returns a subset of the original object. Suppose we +want to take only elements that belong to groups with a group sum greater +than 2. + +.. ipython:: python + + sf = pd.Series([1, 1, 2, 3, 3, 3]) + sf.groupby(sf).filter(lambda x: x.sum() > 2) + +The argument of ``filter`` must be a function that, applied to the group as a +whole, returns ``True`` or ``False``. + +Another useful operation is filtering out elements that belong to groups +with only a couple members. + +.. ipython:: python + + dff = pd.DataFrame({'A': np.arange(8), 'B': list('aabbbbcc')}) + dff.groupby('B').filter(lambda x: len(x) > 2) + +Alternatively, instead of dropping the offending groups, we can return a +like-indexed objects where the groups that do not pass the filter are filled +with NaNs. + +.. ipython:: python + + dff.groupby('B').filter(lambda x: len(x) > 2, dropna=False) + +For DataFrames with multiple columns, filters should explicitly specify a column as the filter criterion. + +.. ipython:: python + + dff['C'] = np.arange(8) + dff.groupby('B').filter(lambda x: len(x['C']) > 2) + +.. note:: + + Some functions when applied to a groupby object will act as a **filter** on the input, returning + a reduced shape of the original (and potentially eliminating groups), but with the index unchanged. + Passing ``as_index=False`` will not affect these transformation methods. + + For example: ``head, tail``. + + .. ipython:: python + + dff.groupby('B').head(2) + + +.. _groupby.dispatch: + +Dispatching to instance methods +------------------------------- + +When doing an aggregation or transformation, you might just want to call an +instance method on each data group. This is pretty easy to do by passing lambda +functions: + +.. ipython:: python + + grouped = df.groupby('A') + grouped.agg(lambda x: x.std()) + +But, it's rather verbose and can be untidy if you need to pass additional +arguments. Using a bit of metaprogramming cleverness, GroupBy now has the +ability to "dispatch" method calls to the groups: + +.. ipython:: python + + grouped.std() + +What is actually happening here is that a function wrapper is being +generated. When invoked, it takes any passed arguments and invokes the function +with any arguments on each group (in the above example, the ``std`` +function). The results are then combined together much in the style of ``agg`` +and ``transform`` (it actually uses ``apply`` to infer the gluing, documented +next). This enables some operations to be carried out rather succinctly: + +.. ipython:: python + + tsdf = pd.DataFrame(np.random.randn(1000, 3), + index=pd.date_range('1/1/2000', periods=1000), + columns=['A', 'B', 'C']) + tsdf.iloc[::2] = np.nan + grouped = tsdf.groupby(lambda x: x.year) + grouped.fillna(method='pad') + +In this example, we chopped the collection of time series into yearly chunks +then independently called :ref:`fillna ` on the +groups. + +The ``nlargest`` and ``nsmallest`` methods work on ``Series`` style groupbys: + +.. ipython:: python + + s = pd.Series([9, 8, 7, 5, 19, 1, 4.2, 3.3]) + g = pd.Series(list('abababab')) + gb = s.groupby(g) + gb.nlargest(3) + gb.nsmallest(3) + +.. _groupby.apply: + +Flexible ``apply`` +------------------ + +Some operations on the grouped data might not fit into either the aggregate or +transform categories. Or, you may simply want GroupBy to infer how to combine +the results. For these, use the ``apply`` function, which can be substituted +for both ``aggregate`` and ``transform`` in many standard use cases. However, +``apply`` can handle some exceptional use cases, for example: + +.. ipython:: python + + df + grouped = df.groupby('A') + + # could also just call .describe() + grouped['C'].apply(lambda x: x.describe()) + +The dimension of the returned result can also change: + +.. ipython:: + + In [8]: grouped = df.groupby('A')['C'] + + In [10]: def f(group): + ....: return pd.DataFrame({'original': group, + ....: 'demeaned': group - group.mean()}) + ....: + + In [11]: grouped.apply(f) + +``apply`` on a Series can operate on a returned value from the applied function, +that is itself a series, and possibly upcast the result to a DataFrame: + +.. ipython:: python + + def f(x): + return pd.Series([x, x ** 2], index=['x', 'x^2']) + + s = pd.Series(np.random.rand(5)) + s + s.apply(f) + + +.. note:: + + ``apply`` can act as a reducer, transformer, *or* filter function, depending on exactly what is passed to it. + So depending on the path taken, and exactly what you are grouping. Thus the grouped columns(s) may be included in + the output as well as set the indices. + + +Other useful features +--------------------- + +Automatic exclusion of "nuisance" columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Again consider the example DataFrame we've been looking at: + +.. ipython:: python + + df + +Suppose we wish to compute the standard deviation grouped by the ``A`` +column. There is a slight problem, namely that we don't care about the data in +column ``B``. We refer to this as a "nuisance" column. If the passed +aggregation function can't be applied to some columns, the troublesome columns +will be (silently) dropped. Thus, this does not pose any problems: + +.. ipython:: python + + df.groupby('A').std() + +Note that ``df.groupby('A').colname.std().`` is more efficient than +``df.groupby('A').std().colname``, so if the result of an aggregation function +is only interesting over one column (here ``colname``), it may be filtered +*before* applying the aggregation function. + +.. note:: + Any object column, also if it contains numerical values such as ``Decimal`` + objects, is considered as a "nuisance" columns. They are excluded from + aggregate functions automatically in groupby. + + If you do wish to include decimal or object columns in an aggregation with + other non-nuisance data types, you must do so explicitly. + +.. ipython:: python + + from decimal import Decimal + df_dec = pd.DataFrame( + {'id': [1, 2, 1, 2], + 'int_column': [1, 2, 3, 4], + 'dec_column': [Decimal('0.50'), Decimal('0.15'), + Decimal('0.25'), Decimal('0.40')] + } + ) + + # Decimal columns can be sum'd explicitly by themselves... + df_dec.groupby(['id'])[['dec_column']].sum() + + # ...but cannot be combined with standard data types or they will be excluded + df_dec.groupby(['id'])[['int_column', 'dec_column']].sum() + + # Use .agg function to aggregate over standard and "nuisance" data types + # at the same time + df_dec.groupby(['id']).agg({'int_column': 'sum', 'dec_column': 'sum'}) + +.. _groupby.observed: + +Handling of (un)observed Categorical values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When using a ``Categorical`` grouper (as a single grouper, or as part of multiple groupers), the ``observed`` keyword +controls whether to return a cartesian product of all possible groupers values (``observed=False``) or only those +that are observed groupers (``observed=True``). + +Show all values: + +.. ipython:: python + + pd.Series([1, 1, 1]).groupby(pd.Categorical(['a', 'a', 'a'], + categories=['a', 'b']), + observed=False).count() + +Show only the observed values: + +.. ipython:: python + + pd.Series([1, 1, 1]).groupby(pd.Categorical(['a', 'a', 'a'], + categories=['a', 'b']), + observed=True).count() + +The returned dtype of the grouped will *always* include *all* of the categories that were grouped. + +.. ipython:: python + + s = pd.Series([1, 1, 1]).groupby(pd.Categorical(['a', 'a', 'a'], + categories=['a', 'b']), + observed=False).count() + s.index.dtype + +.. _groupby.missing: + +NA and NaT group handling +~~~~~~~~~~~~~~~~~~~~~~~~~ + +If there are any NaN or NaT values in the grouping key, these will be +automatically excluded. In other words, there will never be an "NA group" or +"NaT group". This was not the case in older versions of pandas, but users were +generally discarding the NA group anyway (and supporting it was an +implementation headache). + +Grouping with ordered factors +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Categorical variables represented as instance of pandas's ``Categorical`` class +can be used as group keys. If so, the order of the levels will be preserved: + +.. ipython:: python + + data = pd.Series(np.random.randn(100)) + + factor = pd.qcut(data, [0, .25, .5, .75, 1.]) + + data.groupby(factor).mean() + +.. _groupby.specify: + +Grouping with a grouper specification +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You may need to specify a bit more data to properly group. You can +use the ``pd.Grouper`` to provide this local control. + +.. ipython:: python + + import datetime + + df = pd.DataFrame({'Branch': 'A A A A A A A B'.split(), + 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), + 'Quantity': [1, 3, 5, 1, 8, 1, 9, 3], + 'Date': [ + datetime.datetime(2013, 1, 1, 13, 0), + datetime.datetime(2013, 1, 1, 13, 5), + datetime.datetime(2013, 10, 1, 20, 0), + datetime.datetime(2013, 10, 2, 10, 0), + datetime.datetime(2013, 10, 1, 20, 0), + datetime.datetime(2013, 10, 2, 10, 0), + datetime.datetime(2013, 12, 2, 12, 0), + datetime.datetime(2013, 12, 2, 14, 0)] + }) + + df + +Groupby a specific column with the desired frequency. This is like resampling. + +.. ipython:: python + + df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer']).sum() + +You have an ambiguous specification in that you have a named index and a column +that could be potential groupers. + +.. ipython:: python + + df = df.set_index('Date') + df['Date'] = df.index + pd.offsets.MonthEnd(2) + df.groupby([pd.Grouper(freq='6M', key='Date'), 'Buyer']).sum() + + df.groupby([pd.Grouper(freq='6M', level='Date'), 'Buyer']).sum() + + +Taking the first rows of each group +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Just like for a DataFrame or Series you can call head and tail on a groupby: + +.. ipython:: python + + df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B']) + df + + g = df.groupby('A') + g.head(1) + + g.tail(1) + +This shows the first or last n rows from each group. + +.. _groupby.nth: + +Taking the nth row of each group +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To select from a DataFrame or Series the nth item, use +:meth:`~pd.core.groupby.DataFrameGroupBy.nth`. This is a reduction method, and +will return a single row (or no row) per group if you pass an int for n: + +.. ipython:: python + + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + g = df.groupby('A') + + g.nth(0) + g.nth(-1) + g.nth(1) + +If you want to select the nth not-null item, use the ``dropna`` kwarg. For a DataFrame this should be either ``'any'`` or ``'all'`` just like you would pass to dropna: + +.. ipython:: python + + # nth(0) is the same as g.first() + g.nth(0, dropna='any') + g.first() + + # nth(-1) is the same as g.last() + g.nth(-1, dropna='any') # NaNs denote group exhausted when using dropna + g.last() + + g.B.nth(0, dropna='all') + +As with other methods, passing ``as_index=False``, will achieve a filtration, which returns the grouped row. + +.. ipython:: python + + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + g = df.groupby('A', as_index=False) + + g.nth(0) + g.nth(-1) + +You can also select multiple rows from each group by specifying multiple nth values as a list of ints. + +.. ipython:: python + + business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', freq='B') + df = pd.DataFrame(1, index=business_dates, columns=['a', 'b']) + # get the first, 4th, and last date index for each month + df.groupby([df.index.year, df.index.month]).nth([0, 3, -1]) + +Enumerate group items +~~~~~~~~~~~~~~~~~~~~~ + +To see the order in which each row appears within its group, use the +``cumcount`` method: + +.. ipython:: python + + dfg = pd.DataFrame(list('aaabba'), columns=['A']) + dfg + + dfg.groupby('A').cumcount() + + dfg.groupby('A').cumcount(ascending=False) + +.. _groupby.ngroup: + +Enumerate groups +~~~~~~~~~~~~~~~~ + +To see the ordering of the groups (as opposed to the order of rows +within a group given by ``cumcount``) you can use +:meth:`~pandas.core.groupby.DataFrameGroupBy.ngroup`. + + + +Note that the numbers given to the groups match the order in which the +groups would be seen when iterating over the groupby object, not the +order they are first observed. + +.. ipython:: python + + dfg = pd.DataFrame(list('aaabba'), columns=['A']) + dfg + + dfg.groupby('A').ngroup() + + dfg.groupby('A').ngroup(ascending=False) + +Plotting +~~~~~~~~ + +Groupby also works with some plotting methods. For example, suppose we +suspect that some features in a DataFrame may differ by group, in this case, +the values in column 1 where the group is "B" are 3 higher on average. + +.. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame(np.random.randn(50, 2)) + df['g'] = np.random.choice(['A', 'B'], size=50) + df.loc[df['g'] == 'B', 1] += 3 + +We can easily visualize this with a boxplot: + +.. ipython:: python + :okwarning: + + @savefig groupby_boxplot.png + df.groupby('g').boxplot() + +The result of calling ``boxplot`` is a dictionary whose keys are the values +of our grouping column ``g`` ("A" and "B"). The values of the resulting dictionary +can be controlled by the ``return_type`` keyword of ``boxplot``. +See the :ref:`visualization documentation` for more. + +.. warning:: + + For historical reasons, ``df.groupby("g").boxplot()`` is not equivalent + to ``df.boxplot(by="g")``. See :ref:`here` for + an explanation. + +.. _groupby.pipe: + +Piping function calls +~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.21.0 + +Similar to the functionality provided by ``DataFrame`` and ``Series``, functions +that take ``GroupBy`` objects can be chained together using a ``pipe`` method to +allow for a cleaner, more readable syntax. To read about ``.pipe`` in general terms, +see :ref:`here `. + +Combining ``.groupby`` and ``.pipe`` is often useful when you need to reuse +GroupBy objects. + +As an example, imagine having a DataFrame with columns for stores, products, +revenue and quantity sold. We'd like to do a groupwise calculation of *prices* +(i.e. revenue/quantity) per store and per product. We could do this in a +multi-step operation, but expressing it in terms of piping can make the +code more readable. First we set the data: + +.. ipython:: python + + n = 1000 + df = pd.DataFrame({'Store': np.random.choice(['Store_1', 'Store_2'], n), + 'Product': np.random.choice(['Product_1', + 'Product_2'], n), + 'Revenue': (np.random.random(n) * 50 + 10).round(2), + 'Quantity': np.random.randint(1, 10, size=n)}) + df.head(2) + +Now, to find prices per store/product, we can simply do: + +.. ipython:: python + + (df.groupby(['Store', 'Product']) + .pipe(lambda grp: grp.Revenue.sum() / grp.Quantity.sum()) + .unstack().round(2)) + +Piping can also be expressive when you want to deliver a grouped object to some +arbitrary function, for example: + +.. ipython:: python + + def mean(groupby): + return groupby.mean() + + df.groupby(['Store', 'Product']).pipe(mean) + +where ``mean`` takes a GroupBy object and finds the mean of the Revenue and Quantity +columns respectively for each Store-Product combination. The ``mean`` function can +be any function that takes in a GroupBy object; the ``.pipe`` will pass the GroupBy +object as a parameter into the function you specify. + +Examples +-------- + +Regrouping by factor +~~~~~~~~~~~~~~~~~~~~ + +Regroup columns of a DataFrame according to their sum, and sum the aggregated ones. + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 0, 0], 'b': [0, 1, 0], + 'c': [1, 0, 0], 'd': [2, 3, 4]}) + df + df.groupby(df.sum(), axis=1).sum() + +.. _groupby.multicolumn_factorization: + +Multi-column factorization +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +By using :meth:`~pandas.core.groupby.DataFrameGroupBy.ngroup`, we can extract +information about the groups in a way similar to :func:`factorize` (as described +further in the :ref:`reshaping API `) but which applies +naturally to multiple columns of mixed type and different +sources. This can be useful as an intermediate categorical-like step +in processing, when the relationships between the group rows are more +important than their content, or as input to an algorithm which only +accepts the integer encoding. (For more information about support in +pandas for full categorical data, see the :ref:`Categorical +introduction ` and the +:ref:`API documentation `.) + +.. ipython:: python + + dfg = pd.DataFrame({"A": [1, 1, 2, 3, 2], "B": list("aaaba")}) + + dfg + + dfg.groupby(["A", "B"]).ngroup() + + dfg.groupby(["A", [0, 0, 0, 1, 1]]).ngroup() + +Groupby by indexer to 'resample' data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Resampling produces new hypothetical samples (resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples. + +In order to resample to work on indices that are non-datetimelike, the following procedure can be utilized. + +In the following examples, **df.index // 5** returns a binary array which is used to determine what gets selected for the groupby operation. + +.. note:: The below example shows how we can downsample by consolidation of samples into fewer samples. Here by using **df.index // 5**, we are aggregating the samples in bins. By applying **std()** function, we aggregate the information contained in many samples into a small subset of values which is their standard deviation thereby reducing the number of samples. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 2)) + df + df.index // 5 + df.groupby(df.index // 5).std() + +Returning a Series to propagate names +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Group DataFrame columns, compute a set of metrics and return a named Series. +The Series name is used as the name for the column index. This is especially +useful in conjunction with reshaping operations such as stacking in which the +column index name will be used as the name of the inserted column: + +.. ipython:: python + + df = pd.DataFrame({'a': [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2], + 'b': [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1], + 'c': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0], + 'd': [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1]}) + + def compute_metrics(x): + result = {'b_sum': x['b'].sum(), 'c_mean': x['c'].mean()} + return pd.Series(result, name='metrics') + + result = df.groupby('a').apply(compute_metrics) + + result + + result.stack() diff --git a/doc/source/user_guide/index.rst b/doc/source/user_guide/index.rst new file mode 100644 index 00000000..30b1c0b4 --- /dev/null +++ b/doc/source/user_guide/index.rst @@ -0,0 +1,45 @@ +{{ header }} + +.. _user_guide: + +========== +User Guide +========== + +The User Guide covers all of pandas by topic area. Each of the subsections +introduces a topic (such as "working with missing data"), and discusses how +pandas approaches the problem, with many examples throughout. + +Users brand-new to pandas should start with :ref:`10min`. + +Further information on any specific method can be obtained in the +:ref:`api`. + +.. If you update this toctree, also update the manual toctree in the + main index.rst.template + +.. toctree:: + :maxdepth: 2 + + io + indexing + advanced + merging + reshaping + text + missing_data + categorical + integer_na + boolean + visualization + computation + groupby + timeseries + timedeltas + style + options + enhancingperf + scale + sparse + gotchas + cookbook diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst new file mode 100644 index 00000000..2bd3ff62 --- /dev/null +++ b/doc/source/user_guide/indexing.rst @@ -0,0 +1,1902 @@ +.. _indexing: + +{{ header }} + +*************************** +Indexing and selecting data +*************************** + +The axis labeling information in pandas objects serves many purposes: + +* Identifies data (i.e. provides *metadata*) using known indicators, + important for analysis, visualization, and interactive console display. +* Enables automatic and explicit data alignment. +* Allows intuitive getting and setting of subsets of the data set. + +In this section, we will focus on the final point: namely, how to slice, dice, +and generally get and set subsets of pandas objects. The primary focus will be +on Series and DataFrame as they have received more development attention in +this area. + +.. note:: + + The Python and NumPy indexing operators ``[]`` and attribute operator ``.`` + provide quick and easy access to pandas data structures across a wide range + of use cases. This makes interactive work intuitive, as there's little new + to learn if you already know how to deal with Python dictionaries and NumPy + arrays. However, since the type of the data to be accessed isn't known in + advance, directly using standard operators has some optimization limits. For + production code, we recommended that you take advantage of the optimized + pandas data access methods exposed in this chapter. + +.. warning:: + + Whether a copy or a reference is returned for a setting operation, may + depend on the context. This is sometimes called ``chained assignment`` and + should be avoided. See :ref:`Returning a View versus Copy + `. + +See the :ref:`MultiIndex / Advanced Indexing ` for ``MultiIndex`` and more advanced indexing documentation. + +See the :ref:`cookbook` for some advanced strategies. + +.. _indexing.choice: + +Different choices for indexing +------------------------------ + +Object selection has had a number of user-requested additions in order to +support more explicit location based indexing. Pandas now supports three types +of multi-axis indexing. + +* ``.loc`` is primarily label based, but may also be used with a boolean array. ``.loc`` will raise ``KeyError`` when the items are not found. Allowed inputs are: + + * A single label, e.g. ``5`` or ``'a'`` (Note that ``5`` is interpreted as a + *label* of the index. This use is **not** an integer position along the + index.). + * A list or array of labels ``['a', 'b', 'c']``. + * A slice object with labels ``'a':'f'`` (Note that contrary to usual python + slices, **both** the start and the stop are included, when present in the + index! See :ref:`Slicing with labels ` + and :ref:`Endpoints are inclusive `.) + * A boolean array (any ``NA`` values will be treated as ``False``). + * A ``callable`` function with one argument (the calling Series or DataFrame) and + that returns valid output for indexing (one of the above). + + See more at :ref:`Selection by Label `. + +* ``.iloc`` is primarily integer position based (from ``0`` to + ``length-1`` of the axis), but may also be used with a boolean + array. ``.iloc`` will raise ``IndexError`` if a requested + indexer is out-of-bounds, except *slice* indexers which allow + out-of-bounds indexing. (this conforms with Python/NumPy *slice* + semantics). Allowed inputs are: + + * An integer e.g. ``5``. + * A list or array of integers ``[4, 3, 0]``. + * A slice object with ints ``1:7``. + * A boolean array (any ``NA`` values will be treated as ``False``). + * A ``callable`` function with one argument (the calling Series or DataFrame) and + that returns valid output for indexing (one of the above). + + See more at :ref:`Selection by Position `, + :ref:`Advanced Indexing ` and :ref:`Advanced + Hierarchical `. + +* ``.loc``, ``.iloc``, and also ``[]`` indexing can accept a ``callable`` as indexer. See more at :ref:`Selection By Callable `. + +Getting values from an object with multi-axes selection uses the following +notation (using ``.loc`` as an example, but the following applies to ``.iloc`` as +well). Any of the axes accessors may be the null slice ``:``. Axes left out of +the specification are assumed to be ``:``, e.g. ``p.loc['a']`` is equivalent to +``p.loc['a', :, :]``. + +.. csv-table:: + :header: "Object Type", "Indexers" + :widths: 30, 50 + :delim: ; + + Series; ``s.loc[indexer]`` + DataFrame; ``df.loc[row_indexer,column_indexer]`` + +.. _indexing.basics: + +Basics +------ + +As mentioned when introducing the data structures in the :ref:`last section +`, the primary function of indexing with ``[]`` (a.k.a. ``__getitem__`` +for those familiar with implementing class behavior in Python) is selecting out +lower-dimensional slices. The following table shows return type values when +indexing pandas objects with ``[]``: + +.. csv-table:: + :header: "Object Type", "Selection", "Return Value Type" + :widths: 30, 30, 60 + :delim: ; + + Series; ``series[label]``; scalar value + DataFrame; ``frame[colname]``; ``Series`` corresponding to colname + +Here we construct a simple time series data set to use for illustrating the +indexing functionality: + +.. ipython:: python + + dates = pd.date_range('1/1/2000', periods=8) + df = pd.DataFrame(np.random.randn(8, 4), + index=dates, columns=['A', 'B', 'C', 'D']) + df + +.. note:: + + None of the indexing functionality is time series specific unless + specifically stated. + +Thus, as per above, we have the most basic indexing using ``[]``: + +.. ipython:: python + + s = df['A'] + s[dates[5]] + +You can pass a list of columns to ``[]`` to select columns in that order. +If a column is not contained in the DataFrame, an exception will be +raised. Multiple columns can also be set in this manner: + +.. ipython:: python + + df + df[['B', 'A']] = df[['A', 'B']] + df + +You may find this useful for applying a transform (in-place) to a subset of the +columns. + +.. warning:: + + pandas aligns all AXES when setting ``Series`` and ``DataFrame`` from ``.loc``, and ``.iloc``. + + This will **not** modify ``df`` because the column alignment is before value assignment. + + .. ipython:: python + + df[['A', 'B']] + df.loc[:, ['B', 'A']] = df[['A', 'B']] + df[['A', 'B']] + + The correct way to swap column values is by using raw values: + + .. ipython:: python + + df.loc[:, ['B', 'A']] = df[['A', 'B']].to_numpy() + df[['A', 'B']] + + +Attribute access +---------------- + +.. _indexing.columns.multiple: + +.. _indexing.df_cols: + +.. _indexing.attribute_access: + +You may access an index on a ``Series`` or column on a ``DataFrame`` directly +as an attribute: + +.. ipython:: python + + sa = pd.Series([1, 2, 3], index=list('abc')) + dfa = df.copy() + +.. ipython:: python + + sa.b + dfa.A + +.. ipython:: python + + sa.a = 5 + sa + dfa.A = list(range(len(dfa.index))) # ok if A already exists + dfa + dfa['A'] = list(range(len(dfa.index))) # use this form to create a new column + dfa + +.. warning:: + + - You can use this access only if the index element is a valid Python identifier, e.g. ``s.1`` is not allowed. + See `here for an explanation of valid identifiers + `__. + + - The attribute will not be available if it conflicts with an existing method name, e.g. ``s.min`` is not allowed, but ``s['min']`` is possible. + + - Similarly, the attribute will not be available if it conflicts with any of the following list: ``index``, + ``major_axis``, ``minor_axis``, ``items``. + + - In any of these cases, standard indexing will still work, e.g. ``s['1']``, ``s['min']``, and ``s['index']`` will + access the corresponding element or column. + +If you are using the IPython environment, you may also use tab-completion to +see these accessible attributes. + +You can also assign a ``dict`` to a row of a ``DataFrame``: + +.. ipython:: python + + x = pd.DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]}) + x.iloc[1] = {'x': 9, 'y': 99} + x + +You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; +if you try to use attribute access to create a new column, it creates a new attribute rather than a +new column. In 0.21.0 and later, this will raise a ``UserWarning``: + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In [2]: df.two = [4, 5, 6] + UserWarning: Pandas doesn't allow Series to be assigned into nonexistent columns - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute_access + In [3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 + +Slicing ranges +-------------- + +The most robust and consistent way of slicing ranges along arbitrary axes is +described in the :ref:`Selection by Position ` section +detailing the ``.iloc`` method. For now, we explain the semantics of slicing using the ``[]`` operator. + +With Series, the syntax works exactly as with an ndarray, returning a slice of +the values and the corresponding labels: + +.. ipython:: python + + s[:5] + s[::2] + s[::-1] + +Note that setting works as well: + +.. ipython:: python + + s2 = s.copy() + s2[:5] = 0 + s2 + +With DataFrame, slicing inside of ``[]`` **slices the rows**. This is provided +largely as a convenience since it is such a common operation. + +.. ipython:: python + + df[:3] + df[::-1] + +.. _indexing.label: + +Selection by label +------------------ + +.. warning:: + + Whether a copy or a reference is returned for a setting operation, may depend on the context. + This is sometimes called ``chained assignment`` and should be avoided. + See :ref:`Returning a View versus Copy `. + +.. warning:: + + ``.loc`` is strict when you present slicers that are not compatible (or convertible) with the index type. For example + using integers in a ``DatetimeIndex``. These will raise a ``TypeError``. + + .. ipython:: python + + dfl = pd.DataFrame(np.random.randn(5, 4), + columns=list('ABCD'), + index=pd.date_range('20130101', periods=5)) + dfl + + .. code-block:: ipython + + In [4]: dfl.loc[2:3] + TypeError: cannot do slice indexing on with these indexers [2] of + + String likes in slicing *can* be convertible to the type of the index and lead to natural slicing. + + .. ipython:: python + + dfl.loc['20130102':'20130104'] + +.. warning:: + + Starting in 0.21.0, pandas will show a ``FutureWarning`` if indexing with a list with missing labels. In the future + this will raise a ``KeyError``. See :ref:`list-like Using loc with missing keys in a list is Deprecated `. + +pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol. +Every label asked for must be in the index, or a ``KeyError`` will be raised. +When slicing, both the start bound **AND** the stop bound are *included*, if present in the index. +Integers are valid labels, but they refer to the label **and not the position**. + +The ``.loc`` attribute is the primary access method. The following are valid inputs: + +* A single label, e.g. ``5`` or ``'a'`` (Note that ``5`` is interpreted as a *label* of the index. This use is **not** an integer position along the index.). +* A list or array of labels ``['a', 'b', 'c']``. +* A slice object with labels ``'a':'f'`` (Note that contrary to usual python + slices, **both** the start and the stop are included, when present in the + index! See :ref:`Slicing with labels `. +* A boolean array. +* A ``callable``, see :ref:`Selection By Callable `. + +.. ipython:: python + + s1 = pd.Series(np.random.randn(6), index=list('abcdef')) + s1 + s1.loc['c':] + s1.loc['b'] + +Note that setting works as well: + +.. ipython:: python + + s1.loc['c':] = 0 + s1 + +With a DataFrame: + +.. ipython:: python + + df1 = pd.DataFrame(np.random.randn(6, 4), + index=list('abcdef'), + columns=list('ABCD')) + df1 + df1.loc[['a', 'b', 'd'], :] + +Accessing via label slices: + +.. ipython:: python + + df1.loc['d':, 'A':'C'] + +For getting a cross section using a label (equivalent to ``df.xs('a')``): + +.. ipython:: python + + df1.loc['a'] + +For getting values with a boolean array: + +.. ipython:: python + + df1.loc['a'] > 0 + df1.loc[:, df1.loc['a'] > 0] + +NA values in a boolean array propogate as ``False``: + +.. versionchanged:: 1.0.2 + + mask = pd.array([True, False, True, False, pd.NA, False], dtype="boolean") + mask + df1[mask] + +For getting a value explicitly: + +.. ipython:: python + + # this is also equivalent to ``df1.at['a','A']`` + df1.loc['a', 'A'] + +.. _indexing.slicing_with_labels: + +Slicing with labels +~~~~~~~~~~~~~~~~~~~ + +When using ``.loc`` with slices, if both the start and the stop labels are +present in the index, then elements *located* between the two (including them) +are returned: + +.. ipython:: python + + s = pd.Series(list('abcde'), index=[0, 3, 2, 5, 4]) + s.loc[3:5] + +If at least one of the two is absent, but the index is sorted, and can be +compared against start and stop labels, then slicing will still work as +expected, by selecting labels which *rank* between the two: + +.. ipython:: python + + s.sort_index() + s.sort_index().loc[1:6] + +However, if at least one of the two is absent *and* the index is not sorted, an +error will be raised (since doing otherwise would be computationally expensive, +as well as potentially ambiguous for mixed type indexes). For instance, in the +above example, ``s.loc[1:6]`` would raise ``KeyError``. + +For the rationale behind this behavior, see +:ref:`Endpoints are inclusive `. + +.. _indexing.integer: + +Selection by position +--------------------- + +.. warning:: + + Whether a copy or a reference is returned for a setting operation, may depend on the context. + This is sometimes called ``chained assignment`` and should be avoided. + See :ref:`Returning a View versus Copy `. + +Pandas provides a suite of methods in order to get **purely integer based indexing**. The semantics follow closely Python and NumPy slicing. These are ``0-based`` indexing. When slicing, the start bound is *included*, while the upper bound is *excluded*. Trying to use a non-integer, even a **valid** label will raise an ``IndexError``. + +The ``.iloc`` attribute is the primary access method. The following are valid inputs: + +* An integer e.g. ``5``. +* A list or array of integers ``[4, 3, 0]``. +* A slice object with ints ``1:7``. +* A boolean array. +* A ``callable``, see :ref:`Selection By Callable `. + +.. ipython:: python + + s1 = pd.Series(np.random.randn(5), index=list(range(0, 10, 2))) + s1 + s1.iloc[:3] + s1.iloc[3] + +Note that setting works as well: + +.. ipython:: python + + s1.iloc[:3] = 0 + s1 + +With a DataFrame: + +.. ipython:: python + + df1 = pd.DataFrame(np.random.randn(6, 4), + index=list(range(0, 12, 2)), + columns=list(range(0, 8, 2))) + df1 + +Select via integer slicing: + +.. ipython:: python + + df1.iloc[:3] + df1.iloc[1:5, 2:4] + +Select via integer list: + +.. ipython:: python + + df1.iloc[[1, 3, 5], [1, 3]] + +.. ipython:: python + + df1.iloc[1:3, :] + +.. ipython:: python + + df1.iloc[:, 1:3] + +.. ipython:: python + + # this is also equivalent to ``df1.iat[1,1]`` + df1.iloc[1, 1] + +For getting a cross section using an integer position (equiv to ``df.xs(1)``): + +.. ipython:: python + + df1.iloc[1] + +Out of range slice indexes are handled gracefully just as in Python/Numpy. + +.. ipython:: python + + # these are allowed in python/numpy. + x = list('abcdef') + x + x[4:10] + x[8:10] + s = pd.Series(x) + s + s.iloc[4:10] + s.iloc[8:10] + +Note that using slices that go out of bounds can result in +an empty axis (e.g. an empty DataFrame being returned). + +.. ipython:: python + + dfl = pd.DataFrame(np.random.randn(5, 2), columns=list('AB')) + dfl + dfl.iloc[:, 2:3] + dfl.iloc[:, 1:3] + dfl.iloc[4:6] + +A single indexer that is out of bounds will raise an ``IndexError``. +A list of indexers where any element is out of bounds will raise an +``IndexError``. + +.. code-block:: python + + >>> dfl.iloc[[4, 5, 6]] + IndexError: positional indexers are out-of-bounds + + >>> dfl.iloc[:, 4] + IndexError: single positional indexer is out-of-bounds + +.. _indexing.callable: + +Selection by callable +--------------------- + +``.loc``, ``.iloc``, and also ``[]`` indexing can accept a ``callable`` as indexer. +The ``callable`` must be a function with one argument (the calling Series or DataFrame) that returns valid output for indexing. + +.. ipython:: python + + df1 = pd.DataFrame(np.random.randn(6, 4), + index=list('abcdef'), + columns=list('ABCD')) + df1 + + df1.loc[lambda df: df['A'] > 0, :] + df1.loc[:, lambda df: ['A', 'B']] + + df1.iloc[:, lambda df: [0, 1]] + + df1[lambda df: df.columns[0]] + + +You can use callable indexing in ``Series``. + +.. ipython:: python + + df1['A'].loc[lambda s: s > 0] + +Using these methods / indexers, you can chain data selection operations +without using a temporary variable. + +.. ipython:: python + + bb = pd.read_csv('data/baseball.csv', index_col='id') + (bb.groupby(['year', 'team']).sum() + .loc[lambda df: df['r'] > 100]) + +.. _indexing.deprecate_ix: + +IX indexer is deprecated +------------------------ + +.. warning:: + + Starting in 0.20.0, the ``.ix`` indexer is deprecated, in favor of the more strict ``.iloc`` + and ``.loc`` indexers. + +``.ix`` offers a lot of magic on the inference of what the user wants to do. To wit, ``.ix`` can decide +to index *positionally* OR via *labels* depending on the data type of the index. This has caused quite a +bit of user confusion over the years. + +The recommended methods of indexing are: + +* ``.loc`` if you want to *label* index. +* ``.iloc`` if you want to *positionally* index. + +.. ipython:: python + + dfd = pd.DataFrame({'A': [1, 2, 3], + 'B': [4, 5, 6]}, + index=list('abc')) + + dfd + +Previous behavior, where you wish to get the 0th and the 2nd elements from the index in the 'A' column. + +.. code-block:: ipython + + In [3]: dfd.ix[[0, 2], 'A'] + Out[3]: + a 1 + c 3 + Name: A, dtype: int64 + +Using ``.loc``. Here we will select the appropriate indexes from the index, then use *label* indexing. + +.. ipython:: python + + dfd.loc[dfd.index[[0, 2]], 'A'] + +This can also be expressed using ``.iloc``, by explicitly getting locations on the indexers, and using +*positional* indexing to select things. + +.. ipython:: python + + dfd.iloc[[0, 2], dfd.columns.get_loc('A')] + +For getting *multiple* indexers, using ``.get_indexer``: + +.. ipython:: python + + dfd.iloc[[0, 2], dfd.columns.get_indexer(['A', 'B'])] + + +.. _deprecate_loc_reindex_listlike: +.. _indexing.deprecate_loc_reindex_listlike: + +Indexing with list with missing labels is deprecated +---------------------------------------------------- + +.. warning:: + + Starting in 0.21.0, using ``.loc`` or ``[]`` with a list with one or more missing labels, is deprecated, in favor of ``.reindex``. + +In prior versions, using ``.loc[list-of-labels]`` would work as long as *at least 1* of the keys was found (otherwise it +would raise a ``KeyError``). This behavior is deprecated and will show a warning message pointing to this section. The +recommended alternative is to use ``.reindex()``. + +For example. + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s + +Selection with all keys found is unchanged. + +.. ipython:: python + + s.loc[[1, 2]] + +Previous behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + + +Current behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Passing list-likes to .loc with any non-matching elements will raise + KeyError in the future, you can use .reindex() as an alternative. + + See the documentation here: + https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike + + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + + +Reindexing +~~~~~~~~~~ + +The idiomatic way to achieve selecting potentially not-found elements is via ``.reindex()``. See also the section on :ref:`reindexing `. + +.. ipython:: python + + s.reindex([1, 2, 3]) + +Alternatively, if you want to select only *valid* keys, the following is idiomatic and efficient; it is guaranteed to preserve the dtype of the selection. + +.. ipython:: python + + labels = [1, 2, 3] + s.loc[s.index.intersection(labels)] + +Having a duplicated index will raise for a ``.reindex()``: + +.. ipython:: python + + s = pd.Series(np.arange(4), index=['a', 'a', 'b', 'c']) + labels = ['c', 'd'] + +.. code-block:: ipython + + In [17]: s.reindex(labels) + ValueError: cannot reindex from a duplicate axis + +Generally, you can intersect the desired labels with the current +axis, and then reindex. + +.. ipython:: python + + s.loc[s.index.intersection(labels)].reindex(labels) + +However, this would *still* raise if your resulting index is duplicated. + +.. code-block:: ipython + + In [41]: labels = ['a', 'd'] + + In [42]: s.loc[s.index.intersection(labels)].reindex(labels) + ValueError: cannot reindex from a duplicate axis + + +.. _indexing.basics.partial_setting: + +Selecting random samples +------------------------ + +A random selection of rows or columns from a Series or DataFrame with the :meth:`~DataFrame.sample` method. The method will sample rows by default, and accepts a specific number of rows/columns to return, or a fraction of rows. + +.. ipython:: python + + s = pd.Series([0, 1, 2, 3, 4, 5]) + + # When no arguments are passed, returns 1 row. + s.sample() + + # One may specify either a number of rows: + s.sample(n=3) + + # Or a fraction of the rows: + s.sample(frac=0.5) + +By default, ``sample`` will return each row at most once, but one can also sample with replacement +using the ``replace`` option: + +.. ipython:: python + + s = pd.Series([0, 1, 2, 3, 4, 5]) + + # Without replacement (default): + s.sample(n=6, replace=False) + + # With replacement: + s.sample(n=6, replace=True) + + +By default, each row has an equal probability of being selected, but if you want rows +to have different probabilities, you can pass the ``sample`` function sampling weights as +``weights``. These weights can be a list, a NumPy array, or a Series, but they must be of the same length as the object you are sampling. Missing values will be treated as a weight of zero, and inf values are not allowed. If weights do not sum to 1, they will be re-normalized by dividing all weights by the sum of the weights. For example: + +.. ipython:: python + + s = pd.Series([0, 1, 2, 3, 4, 5]) + example_weights = [0, 0, 0.2, 0.2, 0.2, 0.4] + s.sample(n=3, weights=example_weights) + + # Weights will be re-normalized automatically + example_weights2 = [0.5, 0, 0, 0, 0, 0] + s.sample(n=1, weights=example_weights2) + +When applied to a DataFrame, you can use a column of the DataFrame as sampling weights +(provided you are sampling rows and not columns) by simply passing the name of the column +as a string. + +.. ipython:: python + + df2 = pd.DataFrame({'col1': [9, 8, 7, 6], + 'weight_column': [0.5, 0.4, 0.1, 0]}) + df2.sample(n=3, weights='weight_column') + +``sample`` also allows users to sample columns instead of rows using the ``axis`` argument. + +.. ipython:: python + + df3 = pd.DataFrame({'col1': [1, 2, 3], 'col2': [2, 3, 4]}) + df3.sample(n=1, axis=1) + +Finally, one can also set a seed for ``sample``'s random number generator using the ``random_state`` argument, which will accept either an integer (as a seed) or a NumPy RandomState object. + +.. ipython:: python + + df4 = pd.DataFrame({'col1': [1, 2, 3], 'col2': [2, 3, 4]}) + + # With a given seed, the sample will always draw the same rows. + df4.sample(n=2, random_state=2) + df4.sample(n=2, random_state=2) + + + +Setting with enlargement +------------------------ + +The ``.loc/[]`` operations can perform enlargement when setting a non-existent key for that axis. + +In the ``Series`` case this is effectively an appending operation. + +.. ipython:: python + + se = pd.Series([1, 2, 3]) + se + se[5] = 5. + se + +A ``DataFrame`` can be enlarged on either axis via ``.loc``. + +.. ipython:: python + + dfi = pd.DataFrame(np.arange(6).reshape(3, 2), + columns=['A', 'B']) + dfi + dfi.loc[:, 'C'] = dfi.loc[:, 'A'] + dfi + +This is like an ``append`` operation on the ``DataFrame``. + +.. ipython:: python + + dfi.loc[3] = 5 + dfi + +.. _indexing.basics.get_value: + +Fast scalar value getting and setting +------------------------------------- + +Since indexing with ``[]`` must handle a lot of cases (single-label access, +slicing, boolean indexing, etc.), it has a bit of overhead in order to figure +out what you're asking for. If you only want to access a scalar value, the +fastest way is to use the ``at`` and ``iat`` methods, which are implemented on +all of the data structures. + +Similarly to ``loc``, ``at`` provides **label** based scalar lookups, while, ``iat`` provides **integer** based lookups analogously to ``iloc`` + +.. ipython:: python + + s.iat[5] + df.at[dates[5], 'A'] + df.iat[3, 0] + +You can also set using these same indexers. + +.. ipython:: python + + df.at[dates[5], 'E'] = 7 + df.iat[3, 0] = 7 + +``at`` may enlarge the object in-place as above if the indexer is missing. + +.. ipython:: python + + df.at[dates[-1] + pd.Timedelta('1 day'), 0] = 7 + df + +Boolean indexing +---------------- + +.. _indexing.boolean: + +Another common operation is the use of boolean vectors to filter the data. +The operators are: ``|`` for ``or``, ``&`` for ``and``, and ``~`` for ``not``. +These **must** be grouped by using parentheses, since by default Python will +evaluate an expression such as ``df['A'] > 2 & df['B'] < 3`` as +``df['A'] > (2 & df['B']) < 3``, while the desired evaluation order is +``(df['A > 2) & (df['B'] < 3)``. + +Using a boolean vector to index a Series works exactly as in a NumPy ndarray: + +.. ipython:: python + + s = pd.Series(range(-3, 4)) + s + s[s > 0] + s[(s < -1) | (s > 0.5)] + s[~(s < 0)] + +You may select rows from a DataFrame using a boolean vector the same length as +the DataFrame's index (for example, something derived from one of the columns +of the DataFrame): + +.. ipython:: python + + df[df['A'] > 0] + +List comprehensions and the ``map`` method of Series can also be used to produce +more complex criteria: + +.. ipython:: python + + df2 = pd.DataFrame({'a': ['one', 'one', 'two', 'three', 'two', 'one', 'six'], + 'b': ['x', 'y', 'y', 'x', 'y', 'x', 'x'], + 'c': np.random.randn(7)}) + + # only want 'two' or 'three' + criterion = df2['a'].map(lambda x: x.startswith('t')) + + df2[criterion] + + # equivalent but slower + df2[[x.startswith('t') for x in df2['a']]] + + # Multiple criteria + df2[criterion & (df2['b'] == 'x')] + +With the choice methods :ref:`Selection by Label `, :ref:`Selection by Position `, +and :ref:`Advanced Indexing ` you may select along more than one axis using boolean vectors combined with other indexing expressions. + +.. ipython:: python + + df2.loc[criterion & (df2['b'] == 'x'), 'b':'c'] + +.. _indexing.basics.indexing_isin: + +Indexing with isin +------------------ + +Consider the :meth:`~Series.isin` method of ``Series``, which returns a boolean +vector that is true wherever the ``Series`` elements exist in the passed list. +This allows you to select rows where one or more columns have values you want: + +.. ipython:: python + + s = pd.Series(np.arange(5), index=np.arange(5)[::-1], dtype='int64') + s + s.isin([2, 4, 6]) + s[s.isin([2, 4, 6])] + +The same method is available for ``Index`` objects and is useful for the cases +when you don't know which of the sought labels are in fact present: + +.. ipython:: python + + s[s.index.isin([2, 4, 6])] + + # compare it to the following + s.reindex([2, 4, 6]) + +In addition to that, ``MultiIndex`` allows selecting a separate level to use +in the membership check: + +.. ipython:: python + + s_mi = pd.Series(np.arange(6), + index=pd.MultiIndex.from_product([[0, 1], ['a', 'b', 'c']])) + s_mi + s_mi.iloc[s_mi.index.isin([(1, 'a'), (2, 'b'), (0, 'c')])] + s_mi.iloc[s_mi.index.isin(['a', 'c', 'e'], level=1)] + +DataFrame also has an :meth:`~DataFrame.isin` method. When calling ``isin``, pass a set of +values as either an array or dict. If values is an array, ``isin`` returns +a DataFrame of booleans that is the same shape as the original DataFrame, with True +wherever the element is in the sequence of values. + +.. ipython:: python + + df = pd.DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'], + 'ids2': ['a', 'n', 'c', 'n']}) + + values = ['a', 'b', 1, 3] + + df.isin(values) + +Oftentimes you'll want to match certain values with certain columns. +Just make values a ``dict`` where the key is the column, and the value is +a list of items you want to check for. + +.. ipython:: python + + values = {'ids': ['a', 'b'], 'vals': [1, 3]} + + df.isin(values) + +Combine DataFrame's ``isin`` with the ``any()`` and ``all()`` methods to +quickly select subsets of your data that meet a given criteria. +To select a row where each column meets its own criterion: + +.. ipython:: python + + values = {'ids': ['a', 'b'], 'ids2': ['a', 'c'], 'vals': [1, 3]} + + row_mask = df.isin(values).all(1) + + df[row_mask] + +.. _indexing.where_mask: + +The :meth:`~pandas.DataFrame.where` Method and Masking +------------------------------------------------------ + +Selecting values from a Series with a boolean vector generally returns a +subset of the data. To guarantee that selection output has the same shape as +the original data, you can use the ``where`` method in ``Series`` and ``DataFrame``. + +To return only the selected rows: + +.. ipython:: python + + s[s > 0] + +To return a Series of the same shape as the original: + +.. ipython:: python + + s.where(s > 0) + +Selecting values from a DataFrame with a boolean criterion now also preserves +input data shape. ``where`` is used under the hood as the implementation. +The code below is equivalent to ``df.where(df < 0)``. + +.. ipython:: python + :suppress: + + dates = pd.date_range('1/1/2000', periods=8) + df = pd.DataFrame(np.random.randn(8, 4), + index=dates, columns=['A', 'B', 'C', 'D']) + +.. ipython:: python + + df[df < 0] + +In addition, ``where`` takes an optional ``other`` argument for replacement of +values where the condition is False, in the returned copy. + +.. ipython:: python + + df.where(df < 0, -df) + +You may wish to set values based on some boolean criteria. +This can be done intuitively like so: + +.. ipython:: python + + s2 = s.copy() + s2[s2 < 0] = 0 + s2 + + df2 = df.copy() + df2[df2 < 0] = 0 + df2 + +By default, ``where`` returns a modified copy of the data. There is an +optional parameter ``inplace`` so that the original data can be modified +without creating a copy: + +.. ipython:: python + + df_orig = df.copy() + df_orig.where(df > 0, -df, inplace=True) + df_orig + +.. note:: + + The signature for :func:`DataFrame.where` differs from :func:`numpy.where`. + Roughly ``df1.where(m, df2)`` is equivalent to ``np.where(m, df1, df2)``. + + .. ipython:: python + + df.where(df < 0, -df) == np.where(df < 0, df, -df) + +**Alignment** + +Furthermore, ``where`` aligns the input boolean condition (ndarray or DataFrame), +such that partial selection with setting is possible. This is analogous to +partial setting via ``.loc`` (but on the contents rather than the axis labels). + +.. ipython:: python + + df2 = df.copy() + df2[df2[1:4] > 0] = 3 + df2 + +Where can also accept ``axis`` and ``level`` parameters to align the input when +performing the ``where``. + +.. ipython:: python + + df2 = df.copy() + df2.where(df2 > 0, df2['A'], axis='index') + +This is equivalent to (but faster than) the following. + +.. ipython:: python + + df2 = df.copy() + df.apply(lambda x, y: x.where(x > 0, y), y=df['A']) + +``where`` can accept a callable as condition and ``other`` arguments. The function must +be with one argument (the calling Series or DataFrame) and that returns valid output +as condition and ``other`` argument. + +.. ipython:: python + + df3 = pd.DataFrame({'A': [1, 2, 3], + 'B': [4, 5, 6], + 'C': [7, 8, 9]}) + df3.where(lambda x: x > 4, lambda x: x + 10) + +Mask +~~~~ + +:meth:`~pandas.DataFrame.mask` is the inverse boolean operation of ``where``. + +.. ipython:: python + + s.mask(s >= 0) + df.mask(df >= 0) + +.. _indexing.query: + +The :meth:`~pandas.DataFrame.query` Method +------------------------------------------ + +:class:`~pandas.DataFrame` objects have a :meth:`~pandas.DataFrame.query` +method that allows selection using an expression. + +You can get the value of the frame where column ``b`` has values +between the values of columns ``a`` and ``c``. For example: + +.. ipython:: python + + n = 10 + df = pd.DataFrame(np.random.rand(n, 3), columns=list('abc')) + df + + # pure python + df[(df['a'] < df['b']) & (df['b'] < df['c'])] + + # query + df.query('(a < b) & (b < c)') + +Do the same thing but fall back on a named index if there is no column +with the name ``a``. + +.. ipython:: python + + df = pd.DataFrame(np.random.randint(n / 2, size=(n, 2)), columns=list('bc')) + df.index.name = 'a' + df + df.query('a < b and b < c') + +If instead you don't want to or cannot name your index, you can use the name +``index`` in your query expression: + +.. ipython:: python + + df = pd.DataFrame(np.random.randint(n, size=(n, 2)), columns=list('bc')) + df + df.query('index < b < c') + +.. note:: + + If the name of your index overlaps with a column name, the column name is + given precedence. For example, + + .. ipython:: python + + df = pd.DataFrame({'a': np.random.randint(5, size=5)}) + df.index.name = 'a' + df.query('a > 2') # uses the column 'a', not the index + + You can still use the index in a query expression by using the special + identifier 'index': + + .. ipython:: python + + df.query('index > 2') + + If for some reason you have a column named ``index``, then you can refer to + the index as ``ilevel_0`` as well, but at this point you should consider + renaming your columns to something less ambiguous. + + +:class:`~pandas.MultiIndex` :meth:`~pandas.DataFrame.query` Syntax +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can also use the levels of a ``DataFrame`` with a +:class:`~pandas.MultiIndex` as if they were columns in the frame: + +.. ipython:: python + + n = 10 + colors = np.random.choice(['red', 'green'], size=n) + foods = np.random.choice(['eggs', 'ham'], size=n) + colors + foods + + index = pd.MultiIndex.from_arrays([colors, foods], names=['color', 'food']) + df = pd.DataFrame(np.random.randn(n, 2), index=index) + df + df.query('color == "red"') + +If the levels of the ``MultiIndex`` are unnamed, you can refer to them using +special names: + +.. ipython:: python + + df.index.names = [None, None] + df + df.query('ilevel_0 == "red"') + + +The convention is ``ilevel_0``, which means "index level 0" for the 0th level +of the ``index``. + + +:meth:`~pandas.DataFrame.query` Use Cases +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A use case for :meth:`~pandas.DataFrame.query` is when you have a collection of +:class:`~pandas.DataFrame` objects that have a subset of column names (or index +levels/names) in common. You can pass the same query to both frames *without* +having to specify which frame you're interested in querying + +.. ipython:: python + + df = pd.DataFrame(np.random.rand(n, 3), columns=list('abc')) + df + df2 = pd.DataFrame(np.random.rand(n + 2, 3), columns=df.columns) + df2 + expr = '0.0 <= a <= c <= 0.5' + map(lambda frame: frame.query(expr), [df, df2]) + +:meth:`~pandas.DataFrame.query` Python versus pandas Syntax Comparison +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Full numpy-like syntax: + +.. ipython:: python + + df = pd.DataFrame(np.random.randint(n, size=(n, 3)), columns=list('abc')) + df + df.query('(a < b) & (b < c)') + df[(df['a'] < df['b']) & (df['b'] < df['c'])] + +Slightly nicer by removing the parentheses (by binding making comparison +operators bind tighter than ``&`` and ``|``). + +.. ipython:: python + + df.query('a < b & b < c') + +Use English instead of symbols: + +.. ipython:: python + + df.query('a < b and b < c') + +Pretty close to how you might write it on paper: + +.. ipython:: python + + df.query('a < b < c') + +The ``in`` and ``not in`` operators +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~pandas.DataFrame.query` also supports special use of Python's ``in`` and +``not in`` comparison operators, providing a succinct syntax for calling the +``isin`` method of a ``Series`` or ``DataFrame``. + +.. ipython:: python + + # get all rows where columns "a" and "b" have overlapping values + df = pd.DataFrame({'a': list('aabbccddeeff'), 'b': list('aaaabbbbcccc'), + 'c': np.random.randint(5, size=12), + 'd': np.random.randint(9, size=12)}) + df + df.query('a in b') + + # How you'd do it in pure Python + df[df['a'].isin(df['b'])] + + df.query('a not in b') + + # pure Python + df[~df['a'].isin(df['b'])] + + +You can combine this with other expressions for very succinct queries: + + +.. ipython:: python + + # rows where cols a and b have overlapping values + # and col c's values are less than col d's + df.query('a in b and c < d') + + # pure Python + df[df['b'].isin(df['a']) & (df['c'] < df['d'])] + + +.. note:: + + Note that ``in`` and ``not in`` are evaluated in Python, since ``numexpr`` + has no equivalent of this operation. However, **only the** ``in``/``not in`` + **expression itself** is evaluated in vanilla Python. For example, in the + expression + + .. code-block:: python + + df.query('a in b + c + d') + + ``(b + c + d)`` is evaluated by ``numexpr`` and *then* the ``in`` + operation is evaluated in plain Python. In general, any operations that can + be evaluated using ``numexpr`` will be. + +Special use of the ``==`` operator with ``list`` objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Comparing a ``list`` of values to a column using ``==``/``!=`` works similarly +to ``in``/``not in``. + +.. ipython:: python + + df.query('b == ["a", "b", "c"]') + + # pure Python + df[df['b'].isin(["a", "b", "c"])] + + df.query('c == [1, 2]') + + df.query('c != [1, 2]') + + # using in/not in + df.query('[1, 2] in c') + + df.query('[1, 2] not in c') + + # pure Python + df[df['c'].isin([1, 2])] + + +Boolean operators +~~~~~~~~~~~~~~~~~ + +You can negate boolean expressions with the word ``not`` or the ``~`` operator. + +.. ipython:: python + + df = pd.DataFrame(np.random.rand(n, 3), columns=list('abc')) + df['bools'] = np.random.rand(len(df)) > 0.5 + df.query('~bools') + df.query('not bools') + df.query('not bools') == df[~df['bools']] + +Of course, expressions can be arbitrarily complex too: + +.. ipython:: python + + # short query syntax + shorter = df.query('a < b < c and (not bools) or bools > 2') + + # equivalent in pure Python + longer = df[(df['a'] < df['b']) + & (df['b'] < df['c']) + & (~df['bools']) + | (df['bools'] > 2)] + + shorter + longer + + shorter == longer + + +Performance of :meth:`~pandas.DataFrame.query` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``DataFrame.query()`` using ``numexpr`` is slightly faster than Python for +large frames. + +.. image:: ../_static/query-perf.png + +.. note:: + + You will only see the performance benefits of using the ``numexpr`` engine + with ``DataFrame.query()`` if your frame has more than approximately 200,000 + rows. + + .. image:: ../_static/query-perf-small.png + +This plot was created using a ``DataFrame`` with 3 columns each containing +floating point values generated using ``numpy.random.randn()``. + +.. ipython:: python + :suppress: + + df = pd.DataFrame(np.random.randn(8, 4), + index=dates, columns=['A', 'B', 'C', 'D']) + df2 = df.copy() + + +Duplicate data +-------------- + +.. _indexing.duplicate: + +If you want to identify and remove duplicate rows in a DataFrame, there are +two methods that will help: ``duplicated`` and ``drop_duplicates``. Each +takes as an argument the columns to use to identify duplicated rows. + +* ``duplicated`` returns a boolean vector whose length is the number of rows, and which indicates whether a row is duplicated. +* ``drop_duplicates`` removes duplicate rows. + +By default, the first observed row of a duplicate set is considered unique, but +each method has a ``keep`` parameter to specify targets to be kept. + +* ``keep='first'`` (default): mark / drop duplicates except for the first occurrence. +* ``keep='last'``: mark / drop duplicates except for the last occurrence. +* ``keep=False``: mark / drop all duplicates. + +.. ipython:: python + + df2 = pd.DataFrame({'a': ['one', 'one', 'two', 'two', 'two', 'three', 'four'], + 'b': ['x', 'y', 'x', 'y', 'x', 'x', 'x'], + 'c': np.random.randn(7)}) + df2 + df2.duplicated('a') + df2.duplicated('a', keep='last') + df2.duplicated('a', keep=False) + df2.drop_duplicates('a') + df2.drop_duplicates('a', keep='last') + df2.drop_duplicates('a', keep=False) + +Also, you can pass a list of columns to identify duplications. + +.. ipython:: python + + df2.duplicated(['a', 'b']) + df2.drop_duplicates(['a', 'b']) + +To drop duplicates by index value, use ``Index.duplicated`` then perform slicing. +The same set of options are available for the ``keep`` parameter. + +.. ipython:: python + + df3 = pd.DataFrame({'a': np.arange(6), + 'b': np.random.randn(6)}, + index=['a', 'a', 'b', 'c', 'b', 'a']) + df3 + df3.index.duplicated() + df3[~df3.index.duplicated()] + df3[~df3.index.duplicated(keep='last')] + df3[~df3.index.duplicated(keep=False)] + +.. _indexing.dictionarylike: + +Dictionary-like :meth:`~pandas.DataFrame.get` method +---------------------------------------------------- + +Each of Series or DataFrame have a ``get`` method which can return a +default value. + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + s.get('a') # equivalent to s['a'] + s.get('x', default=-1) + +The :meth:`~pandas.DataFrame.lookup` method +------------------------------------------- + +Sometimes you want to extract a set of values given a sequence of row labels +and column labels, and the ``lookup`` method allows for this and returns a +NumPy array. For instance: + +.. ipython:: python + + dflookup = pd.DataFrame(np.random.rand(20, 4), columns = ['A', 'B', 'C', 'D']) + dflookup.lookup(list(range(0, 10, 2)), ['B', 'C', 'A', 'B', 'D']) + +.. _indexing.class: + +Index objects +------------- + +The pandas :class:`~pandas.Index` class and its subclasses can be viewed as +implementing an *ordered multiset*. Duplicates are allowed. However, if you try +to convert an :class:`~pandas.Index` object with duplicate entries into a +``set``, an exception will be raised. + +:class:`~pandas.Index` also provides the infrastructure necessary for +lookups, data alignment, and reindexing. The easiest way to create an +:class:`~pandas.Index` directly is to pass a ``list`` or other sequence to +:class:`~pandas.Index`: + +.. ipython:: python + + index = pd.Index(['e', 'd', 'a', 'b']) + index + 'd' in index + +You can also pass a ``name`` to be stored in the index: + + +.. ipython:: python + + index = pd.Index(['e', 'd', 'a', 'b'], name='something') + index.name + +The name, if set, will be shown in the console display: + +.. ipython:: python + + index = pd.Index(list(range(5)), name='rows') + columns = pd.Index(['A', 'B', 'C'], name='cols') + df = pd.DataFrame(np.random.randn(5, 3), index=index, columns=columns) + df + df['A'] + +.. _indexing.set_metadata: + +Setting metadata +~~~~~~~~~~~~~~~~ + +Indexes are "mostly immutable", but it is possible to set and change their +metadata, like the index ``name`` (or, for ``MultiIndex``, ``levels`` and +``codes``). + +You can use the ``rename``, ``set_names``, ``set_levels``, and ``set_codes`` +to set these attributes directly. They default to returning a copy; however, +you can specify ``inplace=True`` to have the data change in place. + +See :ref:`Advanced Indexing ` for usage of MultiIndexes. + +.. ipython:: python + + ind = pd.Index([1, 2, 3]) + ind.rename("apple") + ind + ind.set_names(["apple"], inplace=True) + ind.name = "bob" + ind + +``set_names``, ``set_levels``, and ``set_codes`` also take an optional +``level`` argument + +.. ipython:: python + + index = pd.MultiIndex.from_product([range(3), ['one', 'two']], names=['first', 'second']) + index + index.levels[1] + index.set_levels(["a", "b"], level=1) + +.. _indexing.set_ops: + +Set operations on Index objects +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The two main operations are ``union (|)`` and ``intersection (&)``. +These can be directly called as instance methods or used via overloaded +operators. Difference is provided via the ``.difference()`` method. + +.. ipython:: python + + a = pd.Index(['c', 'b', 'a']) + b = pd.Index(['c', 'e', 'd']) + a | b + a & b + a.difference(b) + +Also available is the ``symmetric_difference (^)`` operation, which returns elements +that appear in either ``idx1`` or ``idx2``, but not in both. This is +equivalent to the Index created by ``idx1.difference(idx2).union(idx2.difference(idx1))``, +with duplicates dropped. + +.. ipython:: python + + idx1 = pd.Index([1, 2, 3, 4]) + idx2 = pd.Index([2, 3, 4, 5]) + idx1.symmetric_difference(idx2) + idx1 ^ idx2 + +.. note:: + + The resulting index from a set operation will be sorted in ascending order. + +When performing :meth:`Index.union` between indexes with different dtypes, the indexes +must be cast to a common dtype. Typically, though not always, this is object dtype. The +exception is when performing a union between integer and float data. In this case, the +integer values are converted to float + +.. ipython:: python + + idx1 = pd.Index([0, 1, 2]) + idx2 = pd.Index([0.5, 1.5]) + idx1 | idx2 + +.. _indexing.missing: + +Missing values +~~~~~~~~~~~~~~ + +.. important:: + + Even though ``Index`` can hold missing values (``NaN``), it should be avoided + if you do not want any unexpected results. For example, some operations + exclude missing values implicitly. + +``Index.fillna`` fills missing values with specified scalar value. + +.. ipython:: python + + idx1 = pd.Index([1, np.nan, 3, 4]) + idx1 + idx1.fillna(2) + + idx2 = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), + pd.NaT, + pd.Timestamp('2011-01-03')]) + idx2 + idx2.fillna(pd.Timestamp('2011-01-02')) + +Set / reset index +----------------- + +Occasionally you will load or create a data set into a DataFrame and want to +add an index after you've already done so. There are a couple of different +ways. + +.. _indexing.set_index: + +Set an index +~~~~~~~~~~~~ + +DataFrame has a :meth:`~DataFrame.set_index` method which takes a column name +(for a regular ``Index``) or a list of column names (for a ``MultiIndex``). +To create a new, re-indexed DataFrame: + +.. ipython:: python + :suppress: + + data = pd.DataFrame({'a': ['bar', 'bar', 'foo', 'foo'], + 'b': ['one', 'two', 'one', 'two'], + 'c': ['z', 'y', 'x', 'w'], + 'd': [1., 2., 3, 4]}) + +.. ipython:: python + + data + indexed1 = data.set_index('c') + indexed1 + indexed2 = data.set_index(['a', 'b']) + indexed2 + +The ``append`` keyword option allow you to keep the existing index and append +the given columns to a MultiIndex: + +.. ipython:: python + + frame = data.set_index('c', drop=False) + frame = frame.set_index(['a', 'b'], append=True) + frame + +Other options in ``set_index`` allow you not drop the index columns or to add +the index in-place (without creating a new object): + +.. ipython:: python + + data.set_index('c', drop=False) + data.set_index(['a', 'b'], inplace=True) + data + +Reset the index +~~~~~~~~~~~~~~~ + +As a convenience, there is a new function on DataFrame called +:meth:`~DataFrame.reset_index` which transfers the index values into the +DataFrame's columns and sets a simple integer index. +This is the inverse operation of :meth:`~DataFrame.set_index`. + + +.. ipython:: python + + data + data.reset_index() + +The output is more similar to a SQL table or a record array. The names for the +columns derived from the index are the ones stored in the ``names`` attribute. + +You can use the ``level`` keyword to remove only a portion of the index: + +.. ipython:: python + + frame + frame.reset_index(level=1) + + +``reset_index`` takes an optional parameter ``drop`` which if true simply +discards the index, instead of putting index values in the DataFrame's columns. + +Adding an ad hoc index +~~~~~~~~~~~~~~~~~~~~~~ + +If you create an index yourself, you can just assign it to the ``index`` field: + +.. code-block:: python + + data.index = index + +.. _indexing.view_versus_copy: + +Returning a view versus a copy +------------------------------ + +When setting values in a pandas object, care must be taken to avoid what is called +``chained indexing``. Here is an example. + +.. ipython:: python + + dfmi = pd.DataFrame([list('abcd'), + list('efgh'), + list('ijkl'), + list('mnop')], + columns=pd.MultiIndex.from_product([['one', 'two'], + ['first', 'second']])) + dfmi + +Compare these two access methods: + +.. ipython:: python + + dfmi['one']['second'] + +.. ipython:: python + + dfmi.loc[:, ('one', 'second')] + +These both yield the same results, so which should you use? It is instructive to understand the order +of operations on these and why method 2 (``.loc``) is much preferred over method 1 (chained ``[]``). + +``dfmi['one']`` selects the first level of the columns and returns a DataFrame that is singly-indexed. +Then another Python operation ``dfmi_with_one['second']`` selects the series indexed by ``'second'``. +This is indicated by the variable ``dfmi_with_one`` because pandas sees these operations as separate events. +e.g. separate calls to ``__getitem__``, so it has to treat them as linear operations, they happen one after another. + +Contrast this to ``df.loc[:,('one','second')]`` which passes a nested tuple of ``(slice(None),('one','second'))`` to a single call to +``__getitem__``. This allows pandas to deal with this as a single entity. Furthermore this order of operations *can* be significantly +faster, and allows one to index *both* axes if so desired. + +Why does assignment fail when using chained indexing? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The problem in the previous section is just a performance issue. What's up with +the ``SettingWithCopy`` warning? We don't **usually** throw warnings around when +you do something that might cost a few extra milliseconds! + +But it turns out that assigning to the product of chained indexing has +inherently unpredictable results. To see this, think about how the Python +interpreter executes this code: + +.. ipython:: python + :suppress: + + value = None + +.. code-block:: python + + dfmi.loc[:, ('one', 'second')] = value + # becomes + dfmi.loc.__setitem__((slice(None), ('one', 'second')), value) + +But this code is handled differently: + +.. code-block:: python + + dfmi['one']['second'] = value + # becomes + dfmi.__getitem__('one').__setitem__('second', value) + +See that ``__getitem__`` in there? Outside of simple cases, it's very hard to +predict whether it will return a view or a copy (it depends on the memory layout +of the array, about which pandas makes no guarantees), and therefore whether +the ``__setitem__`` will modify ``dfmi`` or a temporary object that gets thrown +out immediately afterward. **That's** what ``SettingWithCopy`` is warning you +about! + +.. note:: You may be wondering whether we should be concerned about the ``loc`` + property in the first example. But ``dfmi.loc`` is guaranteed to be ``dfmi`` + itself with modified indexing behavior, so ``dfmi.loc.__getitem__`` / + ``dfmi.loc.__setitem__`` operate on ``dfmi`` directly. Of course, + ``dfmi.loc.__getitem__(idx)`` may be a view or a copy of ``dfmi``. + +Sometimes a ``SettingWithCopy`` warning will arise at times when there's no +obvious chained indexing going on. **These** are the bugs that +``SettingWithCopy`` is designed to catch! Pandas is probably trying to warn you +that you've done this: + +.. code-block:: python + + def do_something(df): + foo = df[['bar', 'baz']] # Is foo a view? A copy? Nobody knows! + # ... many lines here ... + # We don't know whether this will modify df or not! + foo['quux'] = value + return foo + +Yikes! + +.. _indexing.evaluation_order: + +Evaluation order matters +~~~~~~~~~~~~~~~~~~~~~~~~ + +When you use chained indexing, the order and type of the indexing operation +partially determine whether the result is a slice into the original object, or +a copy of the slice. + +Pandas has the ``SettingWithCopyWarning`` because assigning to a copy of a +slice is frequently not intentional, but a mistake caused by chained indexing +returning a copy where a slice was expected. + +If you would like pandas to be more or less trusting about assignment to a +chained indexing expression, you can set the :ref:`option ` +``mode.chained_assignment`` to one of these values: + +* ``'warn'``, the default, means a ``SettingWithCopyWarning`` is printed. +* ``'raise'`` means pandas will raise a ``SettingWithCopyException`` + you have to deal with. +* ``None`` will suppress the warnings entirely. + +.. ipython:: python + :okwarning: + + dfb = pd.DataFrame({'a': ['one', 'one', 'two', + 'three', 'two', 'one', 'six'], + 'c': np.arange(7)}) + + # This will show the SettingWithCopyWarning + # but the frame values will be set + dfb['c'][dfb['a'].str.startswith('o')] = 42 + +This however is operating on a copy and will not work. + +:: + + >>> pd.set_option('mode.chained_assignment','warn') + >>> dfb[dfb['a'].str.startswith('o')]['c'] = 42 + Traceback (most recent call last) + ... + SettingWithCopyWarning: + A value is trying to be set on a copy of a slice from a DataFrame. + Try using .loc[row_index,col_indexer] = value instead + +A chained assignment can also crop up in setting in a mixed dtype frame. + +.. note:: + + These setting rules apply to all of ``.loc/.iloc``. + +This is the correct access method: + +.. ipython:: python + + dfc = pd.DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) + dfc.loc[0, 'A'] = 11 + dfc + +This *can* work at times, but it is not guaranteed to, and therefore should be avoided: + +.. ipython:: python + :okwarning: + + dfc = dfc.copy() + dfc['A'][0] = 111 + dfc + +This will **not** work at all, and so should be avoided: + +:: + + >>> pd.set_option('mode.chained_assignment','raise') + >>> dfc.loc[0]['A'] = 1111 + Traceback (most recent call last) + ... + SettingWithCopyException: + A value is trying to be set on a copy of a slice from a DataFrame. + Try using .loc[row_index,col_indexer] = value instead + +.. warning:: + + The chained assignment warnings / exceptions are aiming to inform the user of a possibly invalid + assignment. There may be false positives; situations where a chained assignment is inadvertently + reported. diff --git a/doc/source/user_guide/integer_na.rst b/doc/source/user_guide/integer_na.rst new file mode 100644 index 00000000..a45d7a4f --- /dev/null +++ b/doc/source/user_guide/integer_na.rst @@ -0,0 +1,153 @@ +.. currentmodule:: pandas + +{{ header }} + +.. _integer_na: + +************************** +Nullable integer data type +************************** + +.. versionadded:: 0.24.0 + +.. note:: + + IntegerArray is currently experimental. Its API or implementation may + change without warning. + +.. versionchanged:: 1.0.0 + + Now uses :attr:`pandas.NA` as the missing value rather + than :attr:`numpy.nan`. + +In :ref:`missing_data`, we saw that pandas primarily uses ``NaN`` to represent +missing data. Because ``NaN`` is a float, this forces an array of integers with +any missing values to become floating point. In some cases, this may not matter +much. But if your integer column is, say, an identifier, casting to float can +be problematic. Some integers cannot even be represented as floating point +numbers. + +Construction +------------ + +Pandas can represent integer data with possibly missing values using +:class:`arrays.IntegerArray`. This is an :ref:`extension types ` +implemented within pandas. + +.. ipython:: python + + arr = pd.array([1, 2, None], dtype=pd.Int64Dtype()) + arr + +Or the string alias ``"Int64"`` (note the capital ``"I"``, to differentiate from +NumPy's ``'int64'`` dtype: + +.. ipython:: python + + pd.array([1, 2, np.nan], dtype="Int64") + +All NA-like values are replaced with :attr:`pandas.NA`. + +.. ipython:: python + + pd.array([1, 2, np.nan, None, pd.NA], dtype="Int64") + +This array can be stored in a :class:`DataFrame` or :class:`Series` like any +NumPy array. + +.. ipython:: python + + pd.Series(arr) + +You can also pass the list-like object to the :class:`Series` constructor +with the dtype. + +.. warning:: + + Currently :meth:`pandas.array` and :meth:`pandas.Series` use different + rules for dtype inference. :meth:`pandas.array` will infer a nullable- + integer dtype + + .. ipython:: python + + pd.array([1, None]) + pd.array([1, 2]) + + For backwards-compatibility, :class:`Series` infers these as either + integer or float dtype + + .. ipython:: python + + pd.Series([1, None]) + pd.Series([1, 2]) + + We recommend explicitly providing the dtype to avoid confusion. + + .. ipython:: python + + pd.array([1, None], dtype="Int64") + pd.Series([1, None], dtype="Int64") + + In the future, we may provide an option for :class:`Series` to infer a + nullable-integer dtype. + +Operations +---------- + +Operations involving an integer array will behave similar to NumPy arrays. +Missing values will be propagated, and the data will be coerced to another +dtype if needed. + +.. ipython:: python + + s = pd.Series([1, 2, None], dtype="Int64") + + # arithmetic + s + 1 + + # comparison + s == 1 + + # indexing + s.iloc[1:3] + + # operate with other dtypes + s + s.iloc[1:3].astype('Int8') + + # coerce when needed + s + 0.01 + +These dtypes can operate as part of of ``DataFrame``. + +.. ipython:: python + + df = pd.DataFrame({'A': s, 'B': [1, 1, 3], 'C': list('aab')}) + df + df.dtypes + + +These dtypes can be merged & reshaped & casted. + +.. ipython:: python + + pd.concat([df[['A']], df[['B', 'C']]], axis=1).dtypes + df['A'].astype(float) + +Reduction and groupby operations such as 'sum' work as well. + +.. ipython:: python + + df.sum() + df.groupby('B').A.sum() + +Scalar NA Value +--------------- + +:class:`arrays.IntegerArray` uses :attr:`pandas.NA` as its scalar +missing value. Slicing a single element that's missing will return +:attr:`pandas.NA` + +.. ipython:: python + + a = pd.array([1, None], dtype="Int64") + a[1] diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst new file mode 100644 index 00000000..a8d84469 --- /dev/null +++ b/doc/source/user_guide/io.rst @@ -0,0 +1,5737 @@ +.. _io: + +.. currentmodule:: pandas + + +=============================== +IO tools (text, CSV, HDF5, ...) +=============================== + +The pandas I/O API is a set of top level ``reader`` functions accessed like +:func:`pandas.read_csv` that generally return a pandas object. The corresponding +``writer`` functions are object methods that are accessed like +:meth:`DataFrame.to_csv`. Below is a table containing available ``readers`` and +``writers``. + +.. csv-table:: + :header: "Format Type", "Data Description", "Reader", "Writer" + :widths: 30, 100, 60, 60 + :delim: ; + + text;`CSV `__;:ref:`read_csv`;:ref:`to_csv` + text;Fixed-Width Text File;:ref:`read_fwf` + text;`JSON `__;:ref:`read_json`;:ref:`to_json` + text;`HTML `__;:ref:`read_html`;:ref:`to_html` + text; Local clipboard;:ref:`read_clipboard`;:ref:`to_clipboard` + ;`MS Excel `__;:ref:`read_excel`;:ref:`to_excel` + binary;`OpenDocument `__;:ref:`read_excel`; + binary;`HDF5 Format `__;:ref:`read_hdf`;:ref:`to_hdf` + binary;`Feather Format `__;:ref:`read_feather`;:ref:`to_feather` + binary;`Parquet Format `__;:ref:`read_parquet`;:ref:`to_parquet` + binary;`ORC Format `__;:ref:`read_orc`; + binary;`Msgpack `__;:ref:`read_msgpack`;:ref:`to_msgpack` + binary;`Stata `__;:ref:`read_stata`;:ref:`to_stata` + binary;`SAS `__;:ref:`read_sas`; + binary;`SPSS `__;:ref:`read_spss`; + binary;`Python Pickle Format `__;:ref:`read_pickle`;:ref:`to_pickle` + SQL;`SQL `__;:ref:`read_sql`;:ref:`to_sql` + SQL;`Google BigQuery `__;:ref:`read_gbq`;:ref:`to_gbq` + +:ref:`Here ` is an informal performance comparison for some of these IO methods. + +.. note:: + For examples that use the ``StringIO`` class, make sure you import it + according to your Python version, i.e. ``from StringIO import StringIO`` for + Python 2 and ``from io import StringIO`` for Python 3. + +.. _io.read_csv_table: + +CSV & text files +---------------- + +The workhorse function for reading text files (a.k.a. flat files) is +:func:`read_csv`. See the :ref:`cookbook` for some advanced strategies. + +Parsing options +''''''''''''''' + +:func:`read_csv` accepts the following common arguments: + +Basic ++++++ + +filepath_or_buffer : various + Either a path to a file (a :class:`python:str`, :class:`python:pathlib.Path`, + or :class:`py:py._path.local.LocalPath`), URL (including http, ftp, and S3 + locations), or any object with a ``read()`` method (such as an open file or + :class:`~python:io.StringIO`). +sep : str, defaults to ``','`` for :func:`read_csv`, ``\t`` for :func:`read_table` + Delimiter to use. If sep is ``None``, the C engine cannot automatically detect + the separator, but the Python parsing engine can, meaning the latter will be + used and automatically detect the separator by Python's builtin sniffer tool, + :class:`python:csv.Sniffer`. In addition, separators longer than 1 character and + different from ``'\s+'`` will be interpreted as regular expressions and + will also force the use of the Python parsing engine. Note that regex + delimiters are prone to ignoring quoted data. Regex example: ``'\\r\\t'``. +delimiter : str, default ``None`` + Alternative argument name for sep. +delim_whitespace : boolean, default False + Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) + will be used as the delimiter. Equivalent to setting ``sep='\s+'``. + If this option is set to ``True``, nothing should be passed in for the + ``delimiter`` parameter. + +Column and index locations and names +++++++++++++++++++++++++++++++++++++ + +header : int or list of ints, default ``'infer'`` + Row number(s) to use as the column names, and the start of the + data. Default behavior is to infer the column names: if no names are + passed the behavior is identical to ``header=0`` and column names + are inferred from the first line of the file, if column names are + passed explicitly then the behavior is identical to + ``header=None``. Explicitly pass ``header=0`` to be able to replace + existing names. + + The header can be a list of ints that specify row locations + for a MultiIndex on the columns e.g. ``[0,1,3]``. Intervening rows + that are not specified will be skipped (e.g. 2 in this example is + skipped). Note that this parameter ignores commented lines and empty + lines if ``skip_blank_lines=True``, so header=0 denotes the first + line of data rather than the first line of the file. +names : array-like, default ``None`` + List of column names to use. If file contains no header row, then you should + explicitly pass ``header=None``. Duplicates in this list are not allowed. +index_col : int, str, sequence of int / str, or False, default ``None`` + Column(s) to use as the row labels of the ``DataFrame``, either given as + string name or column index. If a sequence of int / str is given, a + MultiIndex is used. + + Note: ``index_col=False`` can be used to force pandas to *not* use the first + column as the index, e.g. when you have a malformed file with delimiters at + the end of each line. +usecols : list-like or callable, default ``None`` + Return a subset of the columns. If list-like, all elements must either + be positional (i.e. integer indices into the document columns) or strings + that correspond to column names provided either by the user in `names` or + inferred from the document header row(s). For example, a valid list-like + `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``. + + Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. To + instantiate a DataFrame from ``data`` with element order preserved use + ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns + in ``['foo', 'bar']`` order or + ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]`` for + ``['bar', 'foo']`` order. + + If callable, the callable function will be evaluated against the column names, + returning names where the callable function evaluates to True: + + .. ipython:: python + + import pandas as pd + from io import StringIO + data = ('col1,col2,col3\n' + 'a,b,1\n' + 'a,b,2\n' + 'c,d,3') + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data), usecols=lambda x: x.upper() in ['COL1', 'COL3']) + + Using this parameter results in much faster parsing time and lower memory usage. +squeeze : boolean, default ``False`` + If the parsed data only contains one column then return a ``Series``. +prefix : str, default ``None`` + Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... +mangle_dupe_cols : boolean, default ``True`` + Duplicate columns will be specified as 'X', 'X.1'...'X.N', rather than 'X'...'X'. + Passing in ``False`` will cause data to be overwritten if there are duplicate + names in the columns. + +General parsing configuration ++++++++++++++++++++++++++++++ + +dtype : Type name or dict of column -> type, default ``None`` + Data type for data or columns. E.g. ``{'a': np.float64, 'b': np.int32}`` + (unsupported with ``engine='python'``). Use `str` or `object` together + with suitable ``na_values`` settings to preserve and + not interpret dtype. +engine : {``'c'``, ``'python'``} + Parser engine to use. The C engine is faster while the Python engine is + currently more feature-complete. +converters : dict, default ``None`` + Dict of functions for converting values in certain columns. Keys can either be + integers or column labels. +true_values : list, default ``None`` + Values to consider as ``True``. +false_values : list, default ``None`` + Values to consider as ``False``. +skipinitialspace : boolean, default ``False`` + Skip spaces after delimiter. +skiprows : list-like or integer, default ``None`` + Line numbers to skip (0-indexed) or number of lines to skip (int) at the start + of the file. + + If callable, the callable function will be evaluated against the row + indices, returning True if the row should be skipped and False otherwise: + + .. ipython:: python + + data = ('col1,col2,col3\n' + 'a,b,1\n' + 'a,b,2\n' + 'c,d,3') + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data), skiprows=lambda x: x % 2 != 0) + +skipfooter : int, default ``0`` + Number of lines at bottom of file to skip (unsupported with engine='c'). + +nrows : int, default ``None`` + Number of rows of file to read. Useful for reading pieces of large files. +low_memory : boolean, default ``True`` + Internally process the file in chunks, resulting in lower memory use + while parsing, but possibly mixed type inference. To ensure no mixed + types either set ``False``, or specify the type with the ``dtype`` parameter. + Note that the entire file is read into a single ``DataFrame`` regardless, + use the ``chunksize`` or ``iterator`` parameter to return the data in chunks. + (Only valid with C parser) +memory_map : boolean, default False + If a filepath is provided for ``filepath_or_buffer``, map the file object + directly onto memory and access the data directly from there. Using this + option can improve performance because there is no longer any I/O overhead. + +NA and missing data handling +++++++++++++++++++++++++++++ + +na_values : scalar, str, list-like, or dict, default ``None`` + Additional strings to recognize as NA/NaN. If dict passed, specific per-column + NA values. See :ref:`na values const ` below + for a list of the values interpreted as NaN by default. + +keep_default_na : boolean, default ``True`` + Whether or not to include the default NaN values when parsing the data. + Depending on whether `na_values` is passed in, the behavior is as follows: + + * If `keep_default_na` is ``True``, and `na_values` are specified, `na_values` + is appended to the default NaN values used for parsing. + * If `keep_default_na` is ``True``, and `na_values` are not specified, only + the default NaN values are used for parsing. + * If `keep_default_na` is ``False``, and `na_values` are specified, only + the NaN values specified `na_values` are used for parsing. + * If `keep_default_na` is ``False``, and `na_values` are not specified, no + strings will be parsed as NaN. + + Note that if `na_filter` is passed in as ``False``, the `keep_default_na` and + `na_values` parameters will be ignored. +na_filter : boolean, default ``True`` + Detect missing value markers (empty strings and the value of na_values). In + data without any NAs, passing ``na_filter=False`` can improve the performance + of reading a large file. +verbose : boolean, default ``False`` + Indicate number of NA values placed in non-numeric columns. +skip_blank_lines : boolean, default ``True`` + If ``True``, skip over blank lines rather than interpreting as NaN values. + +Datetime handling ++++++++++++++++++ + +parse_dates : boolean or list of ints or names or list of lists or dict, default ``False``. + * If ``True`` -> try parsing the index. + * If ``[1, 2, 3]`` -> try parsing columns 1, 2, 3 each as a separate date + column. + * If ``[[1, 3]]`` -> combine columns 1 and 3 and parse as a single date + column. + * If ``{'foo': [1, 3]}`` -> parse columns 1, 3 as date and call result 'foo'. + A fast-path exists for iso8601-formatted dates. +infer_datetime_format : boolean, default ``False`` + If ``True`` and parse_dates is enabled for a column, attempt to infer the + datetime format to speed up the processing. +keep_date_col : boolean, default ``False`` + If ``True`` and parse_dates specifies combining multiple columns then keep the + original columns. +date_parser : function, default ``None`` + Function to use for converting a sequence of string columns to an array of + datetime instances. The default uses ``dateutil.parser.parser`` to do the + conversion. pandas will try to call date_parser in three different ways, + advancing to the next if an exception occurs: 1) Pass one or more arrays (as + defined by parse_dates) as arguments; 2) concatenate (row-wise) the string + values from the columns defined by parse_dates into a single array and pass + that; and 3) call date_parser once for each row using one or more strings + (corresponding to the columns defined by parse_dates) as arguments. +dayfirst : boolean, default ``False`` + DD/MM format dates, international and European format. +cache_dates : boolean, default True + If True, use a cache of unique, converted dates to apply the datetime + conversion. May produce significant speed-up when parsing duplicate + date strings, especially ones with timezone offsets. + + .. versionadded:: 0.25.0 + +Iteration ++++++++++ + +iterator : boolean, default ``False`` + Return `TextFileReader` object for iteration or getting chunks with + ``get_chunk()``. +chunksize : int, default ``None`` + Return `TextFileReader` object for iteration. See :ref:`iterating and chunking + ` below. + +Quoting, compression, and file format ++++++++++++++++++++++++++++++++++++++ + +compression : {``'infer'``, ``'gzip'``, ``'bz2'``, ``'zip'``, ``'xz'``, ``None``}, default ``'infer'`` + For on-the-fly decompression of on-disk data. If 'infer', then use gzip, + bz2, zip, or xz if filepath_or_buffer is a string ending in '.gz', '.bz2', + '.zip', or '.xz', respectively, and no decompression otherwise. If using 'zip', + the ZIP file must contain only one data file to be read in. + Set to ``None`` for no decompression. + + .. versionchanged:: 0.24.0 'infer' option added and set to default. +thousands : str, default ``None`` + Thousands separator. +decimal : str, default ``'.'`` + Character to recognize as decimal point. E.g. use ``','`` for European data. +float_precision : string, default None + Specifies which converter the C engine should use for floating-point values. + The options are ``None`` for the ordinary converter, ``high`` for the + high-precision converter, and ``round_trip`` for the round-trip converter. +lineterminator : str (length 1), default ``None`` + Character to break file into lines. Only valid with C parser. +quotechar : str (length 1) + The character used to denote the start and end of a quoted item. Quoted items + can include the delimiter and it will be ignored. +quoting : int or ``csv.QUOTE_*`` instance, default ``0`` + Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of + ``QUOTE_MINIMAL`` (0), ``QUOTE_ALL`` (1), ``QUOTE_NONNUMERIC`` (2) or + ``QUOTE_NONE`` (3). +doublequote : boolean, default ``True`` + When ``quotechar`` is specified and ``quoting`` is not ``QUOTE_NONE``, + indicate whether or not to interpret two consecutive ``quotechar`` elements + **inside** a field as a single ``quotechar`` element. +escapechar : str (length 1), default ``None`` + One-character string used to escape delimiter when quoting is ``QUOTE_NONE``. +comment : str, default ``None`` + Indicates remainder of line should not be parsed. If found at the beginning of + a line, the line will be ignored altogether. This parameter must be a single + character. Like empty lines (as long as ``skip_blank_lines=True``), fully + commented lines are ignored by the parameter `header` but not by `skiprows`. + For example, if ``comment='#'``, parsing '#empty\\na,b,c\\n1,2,3' with + `header=0` will result in 'a,b,c' being treated as the header. +encoding : str, default ``None`` + Encoding to use for UTF when reading/writing (e.g. ``'utf-8'``). `List of + Python standard encodings + `_. +dialect : str or :class:`python:csv.Dialect` instance, default ``None`` + If provided, this parameter will override values (default or not) for the + following parameters: `delimiter`, `doublequote`, `escapechar`, + `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to + override values, a ParserWarning will be issued. See :class:`python:csv.Dialect` + documentation for more details. + +Error handling +++++++++++++++ + +error_bad_lines : boolean, default ``True`` + Lines with too many fields (e.g. a csv line with too many commas) will by + default cause an exception to be raised, and no ``DataFrame`` will be + returned. If ``False``, then these "bad lines" will dropped from the + ``DataFrame`` that is returned. See :ref:`bad lines ` + below. +warn_bad_lines : boolean, default ``True`` + If error_bad_lines is ``False``, and warn_bad_lines is ``True``, a warning for + each "bad line" will be output. + +.. _io.dtypes: + +Specifying column data types +'''''''''''''''''''''''''''' + +You can indicate the data type for the whole ``DataFrame`` or individual +columns: + +.. ipython:: python + + import numpy as np + data = ('a,b,c,d\n' + '1,2,3,4\n' + '5,6,7,8\n' + '9,10,11') + print(data) + + df = pd.read_csv(StringIO(data), dtype=object) + df + df['a'][0] + df = pd.read_csv(StringIO(data), + dtype={'b': object, 'c': np.float64, 'd': 'Int64'}) + df.dtypes + +Fortunately, pandas offers more than one way to ensure that your column(s) +contain only one ``dtype``. If you're unfamiliar with these concepts, you can +see :ref:`here` to learn more about dtypes, and +:ref:`here` to learn more about ``object`` conversion in +pandas. + + +For instance, you can use the ``converters`` argument +of :func:`~pandas.read_csv`: + +.. ipython:: python + + data = ("col_1\n" + "1\n" + "2\n" + "'A'\n" + "4.22") + df = pd.read_csv(StringIO(data), converters={'col_1': str}) + df + df['col_1'].apply(type).value_counts() + +Or you can use the :func:`~pandas.to_numeric` function to coerce the +dtypes after reading in the data, + +.. ipython:: python + + df2 = pd.read_csv(StringIO(data)) + df2['col_1'] = pd.to_numeric(df2['col_1'], errors='coerce') + df2 + df2['col_1'].apply(type).value_counts() + +which will convert all valid parsing to floats, leaving the invalid parsing +as ``NaN``. + +Ultimately, how you deal with reading in columns containing mixed dtypes +depends on your specific needs. In the case above, if you wanted to ``NaN`` out +the data anomalies, then :func:`~pandas.to_numeric` is probably your best option. +However, if you wanted for all the data to be coerced, no matter the type, then +using the ``converters`` argument of :func:`~pandas.read_csv` would certainly be +worth trying. + +.. note:: + In some cases, reading in abnormal data with columns containing mixed dtypes + will result in an inconsistent dataset. If you rely on pandas to infer the + dtypes of your columns, the parsing engine will go and infer the dtypes for + different chunks of the data, rather than the whole dataset at once. Consequently, + you can end up with column(s) with mixed dtypes. For example, + + .. ipython:: python + :okwarning: + + col_1 = list(range(500000)) + ['a', 'b'] + list(range(500000)) + df = pd.DataFrame({'col_1': col_1}) + df.to_csv('foo.csv') + mixed_df = pd.read_csv('foo.csv') + mixed_df['col_1'].apply(type).value_counts() + mixed_df['col_1'].dtype + + will result with `mixed_df` containing an ``int`` dtype for certain chunks + of the column, and ``str`` for others due to the mixed dtypes from the + data that was read in. It is important to note that the overall column will be + marked with a ``dtype`` of ``object``, which is used for columns with mixed dtypes. + +.. ipython:: python + :suppress: + + import os + os.remove('foo.csv') + +.. _io.categorical: + +Specifying categorical dtype +'''''''''''''''''''''''''''' + +``Categorical`` columns can be parsed directly by specifying ``dtype='category'`` or +``dtype=CategoricalDtype(categories, ordered)``. + +.. ipython:: python + + data = ('col1,col2,col3\n' + 'a,b,1\n' + 'a,b,2\n' + 'c,d,3') + + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data)).dtypes + pd.read_csv(StringIO(data), dtype='category').dtypes + +Individual columns can be parsed as a ``Categorical`` using a dict +specification: + +.. ipython:: python + + pd.read_csv(StringIO(data), dtype={'col1': 'category'}).dtypes + +.. versionadded:: 0.21.0 + +Specifying ``dtype='category'`` will result in an unordered ``Categorical`` +whose ``categories`` are the unique values observed in the data. For more +control on the categories and order, create a +:class:`~pandas.api.types.CategoricalDtype` ahead of time, and pass that for +that column's ``dtype``. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + dtype = CategoricalDtype(['d', 'c', 'b', 'a'], ordered=True) + pd.read_csv(StringIO(data), dtype={'col1': dtype}).dtypes + +When using ``dtype=CategoricalDtype``, "unexpected" values outside of +``dtype.categories`` are treated as missing values. + +.. ipython:: python + + dtype = CategoricalDtype(['a', 'b', 'd']) # No 'c' + pd.read_csv(StringIO(data), dtype={'col1': dtype}).col1 + +This matches the behavior of :meth:`Categorical.set_categories`. + +.. note:: + + With ``dtype='category'``, the resulting categories will always be parsed + as strings (object dtype). If the categories are numeric they can be + converted using the :func:`to_numeric` function, or as appropriate, another + converter such as :func:`to_datetime`. + + When ``dtype`` is a ``CategoricalDtype`` with homogeneous ``categories`` ( + all numeric, all datetimes, etc.), the conversion is done automatically. + + .. ipython:: python + + df = pd.read_csv(StringIO(data), dtype='category') + df.dtypes + df['col3'] + df['col3'].cat.categories = pd.to_numeric(df['col3'].cat.categories) + df['col3'] + + +Naming and using columns +'''''''''''''''''''''''' + +.. _io.headers: + +Handling column names ++++++++++++++++++++++ + +A file may or may not have a header row. pandas assumes the first row should be +used as the column names: + +.. ipython:: python + + data = ('a,b,c\n' + '1,2,3\n' + '4,5,6\n' + '7,8,9') + print(data) + pd.read_csv(StringIO(data)) + +By specifying the ``names`` argument in conjunction with ``header`` you can +indicate other names to use and whether or not to throw away the header row (if +any): + +.. ipython:: python + + print(data) + pd.read_csv(StringIO(data), names=['foo', 'bar', 'baz'], header=0) + pd.read_csv(StringIO(data), names=['foo', 'bar', 'baz'], header=None) + +If the header is in a row other than the first, pass the row number to +``header``. This will skip the preceding rows: + +.. ipython:: python + + data = ('skip this skip it\n' + 'a,b,c\n' + '1,2,3\n' + '4,5,6\n' + '7,8,9') + pd.read_csv(StringIO(data), header=1) + +.. note:: + + Default behavior is to infer the column names: if no names are + passed the behavior is identical to ``header=0`` and column names + are inferred from the first non-blank line of the file, if column + names are passed explicitly then the behavior is identical to + ``header=None``. + +.. _io.dupe_names: + +Duplicate names parsing +''''''''''''''''''''''' + +If the file or header contains duplicate names, pandas will by default +distinguish between them so as to prevent overwriting data: + +.. ipython:: python + + data = ('a,b,a\n' + '0,1,2\n' + '3,4,5') + pd.read_csv(StringIO(data)) + +There is no more duplicate data because ``mangle_dupe_cols=True`` by default, +which modifies a series of duplicate columns 'X', ..., 'X' to become +'X', 'X.1', ..., 'X.N'. If ``mangle_dupe_cols=False``, duplicate data can +arise: + +.. code-block:: ipython + + In [2]: data = 'a,b,a\n0,1,2\n3,4,5' + In [3]: pd.read_csv(StringIO(data), mangle_dupe_cols=False) + Out[3]: + a b a + 0 2 1 2 + 1 5 4 5 + +To prevent users from encountering this problem with duplicate data, a ``ValueError`` +exception is raised if ``mangle_dupe_cols != True``: + +.. code-block:: ipython + + In [2]: data = 'a,b,a\n0,1,2\n3,4,5' + In [3]: pd.read_csv(StringIO(data), mangle_dupe_cols=False) + ... + ValueError: Setting mangle_dupe_cols=False is not supported yet + +.. _io.usecols: + +Filtering columns (``usecols``) ++++++++++++++++++++++++++++++++ + +The ``usecols`` argument allows you to select any subset of the columns in a +file, either using the column names, position numbers or a callable: + +.. ipython:: python + + data = 'a,b,c,d\n1,2,3,foo\n4,5,6,bar\n7,8,9,baz' + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data), usecols=['b', 'd']) + pd.read_csv(StringIO(data), usecols=[0, 2, 3]) + pd.read_csv(StringIO(data), usecols=lambda x: x.upper() in ['A', 'C']) + +The ``usecols`` argument can also be used to specify which columns not to +use in the final result: + +.. ipython:: python + + pd.read_csv(StringIO(data), usecols=lambda x: x not in ['a', 'c']) + +In this case, the callable is specifying that we exclude the "a" and "c" +columns from the output. + +Comments and empty lines +'''''''''''''''''''''''' + +.. _io.skiplines: + +Ignoring line comments and empty lines +++++++++++++++++++++++++++++++++++++++ + +If the ``comment`` parameter is specified, then completely commented lines will +be ignored. By default, completely blank lines will be ignored as well. + +.. ipython:: python + + data = ('\n' + 'a,b,c\n' + ' \n' + '# commented line\n' + '1,2,3\n' + '\n' + '4,5,6') + print(data) + pd.read_csv(StringIO(data), comment='#') + +If ``skip_blank_lines=False``, then ``read_csv`` will not ignore blank lines: + +.. ipython:: python + + data = ('a,b,c\n' + '\n' + '1,2,3\n' + '\n' + '\n' + '4,5,6') + pd.read_csv(StringIO(data), skip_blank_lines=False) + +.. warning:: + + The presence of ignored lines might create ambiguities involving line numbers; + the parameter ``header`` uses row numbers (ignoring commented/empty + lines), while ``skiprows`` uses line numbers (including commented/empty lines): + + .. ipython:: python + + data = ('#comment\n' + 'a,b,c\n' + 'A,B,C\n' + '1,2,3') + pd.read_csv(StringIO(data), comment='#', header=1) + data = ('A,B,C\n' + '#comment\n' + 'a,b,c\n' + '1,2,3') + pd.read_csv(StringIO(data), comment='#', skiprows=2) + + If both ``header`` and ``skiprows`` are specified, ``header`` will be + relative to the end of ``skiprows``. For example: + +.. ipython:: python + + data = ('# empty\n' + '# second empty line\n' + '# third emptyline\n' + 'X,Y,Z\n' + '1,2,3\n' + 'A,B,C\n' + '1,2.,4.\n' + '5.,NaN,10.0\n') + print(data) + pd.read_csv(StringIO(data), comment='#', skiprows=4, header=1) + +.. _io.comments: + +Comments +++++++++ + +Sometimes comments or meta data may be included in a file: + +.. ipython:: python + :suppress: + + data = ("ID,level,category\n" + "Patient1,123000,x # really unpleasant\n" + "Patient2,23000,y # wouldn't take his medicine\n" + "Patient3,1234018,z # awesome") + + with open('tmp.csv', 'w') as fh: + fh.write(data) + +.. ipython:: python + + print(open('tmp.csv').read()) + +By default, the parser includes the comments in the output: + +.. ipython:: python + + df = pd.read_csv('tmp.csv') + df + +We can suppress the comments using the ``comment`` keyword: + +.. ipython:: python + + df = pd.read_csv('tmp.csv', comment='#') + df + +.. ipython:: python + :suppress: + + os.remove('tmp.csv') + +.. _io.unicode: + +Dealing with Unicode data +''''''''''''''''''''''''' + +The ``encoding`` argument should be used for encoded unicode data, which will +result in byte strings being decoded to unicode in the result: + +.. ipython:: python + + from io import BytesIO + data = (b'word,length\n' + b'Tr\xc3\xa4umen,7\n' + b'Gr\xc3\xbc\xc3\x9fe,5') + data = data.decode('utf8').encode('latin-1') + df = pd.read_csv(BytesIO(data), encoding='latin-1') + df + df['word'][1] + +Some formats which encode all characters as multiple bytes, like UTF-16, won't +parse correctly at all without specifying the encoding. `Full list of Python +standard encodings +`_. + +.. _io.index_col: + +Index columns and trailing delimiters +''''''''''''''''''''''''''''''''''''' + +If a file has one more column of data than the number of column names, the +first column will be used as the ``DataFrame``'s row names: + +.. ipython:: python + + data = ('a,b,c\n' + '4,apple,bat,5.7\n' + '8,orange,cow,10') + pd.read_csv(StringIO(data)) + +.. ipython:: python + + data = ('index,a,b,c\n' + '4,apple,bat,5.7\n' + '8,orange,cow,10') + pd.read_csv(StringIO(data), index_col=0) + +Ordinarily, you can achieve this behavior using the ``index_col`` option. + +There are some exception cases when a file has been prepared with delimiters at +the end of each data line, confusing the parser. To explicitly disable the +index column inference and discard the last column, pass ``index_col=False``: + +.. ipython:: python + + data = ('a,b,c\n' + '4,apple,bat,\n' + '8,orange,cow,') + print(data) + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data), index_col=False) + +If a subset of data is being parsed using the ``usecols`` option, the +``index_col`` specification is based on that subset, not the original data. + +.. ipython:: python + + data = ('a,b,c\n' + '4,apple,bat,\n' + '8,orange,cow,') + print(data) + pd.read_csv(StringIO(data), usecols=['b', 'c']) + pd.read_csv(StringIO(data), usecols=['b', 'c'], index_col=0) + +.. _io.parse_dates: + +Date Handling +''''''''''''' + +Specifying date columns ++++++++++++++++++++++++ + +To better facilitate working with datetime data, :func:`read_csv` +uses the keyword arguments ``parse_dates`` and ``date_parser`` +to allow users to specify a variety of columns and date/time formats to turn the +input text data into ``datetime`` objects. + +The simplest case is to just pass in ``parse_dates=True``: + +.. ipython:: python + :suppress: + + f = open('foo.csv', 'w') + f.write('date,A,B,C\n20090101,a,1,2\n20090102,b,3,4\n20090103,c,4,5') + f.close() + +.. ipython:: python + + # Use a column as an index, and parse it as dates. + df = pd.read_csv('foo.csv', index_col=0, parse_dates=True) + df + + # These are Python datetime objects + df.index + +It is often the case that we may want to store date and time data separately, +or store various date fields separately. the ``parse_dates`` keyword can be +used to specify a combination of columns to parse the dates and/or times from. + +You can specify a list of column lists to ``parse_dates``, the resulting date +columns will be prepended to the output (so as to not affect the existing column +order) and the new column names will be the concatenation of the component +column names: + +.. ipython:: python + :suppress: + + data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" + "KORD,19990127, 23:00:00, 22:56:00, -0.5900") + + with open('tmp.csv', 'w') as fh: + fh.write(data) + +.. ipython:: python + + print(open('tmp.csv').read()) + df = pd.read_csv('tmp.csv', header=None, parse_dates=[[1, 2], [1, 3]]) + df + +By default the parser removes the component date columns, but you can choose +to retain them via the ``keep_date_col`` keyword: + +.. ipython:: python + + df = pd.read_csv('tmp.csv', header=None, parse_dates=[[1, 2], [1, 3]], + keep_date_col=True) + df + +Note that if you wish to combine multiple columns into a single date column, a +nested list must be used. In other words, ``parse_dates=[1, 2]`` indicates that +the second and third columns should each be parsed as separate date columns +while ``parse_dates=[[1, 2]]`` means the two columns should be parsed into a +single column. + +You can also use a dict to specify custom name columns: + +.. ipython:: python + + date_spec = {'nominal': [1, 2], 'actual': [1, 3]} + df = pd.read_csv('tmp.csv', header=None, parse_dates=date_spec) + df + +It is important to remember that if multiple text columns are to be parsed into +a single date column, then a new column is prepended to the data. The `index_col` +specification is based off of this new set of columns rather than the original +data columns: + + +.. ipython:: python + + date_spec = {'nominal': [1, 2], 'actual': [1, 3]} + df = pd.read_csv('tmp.csv', header=None, parse_dates=date_spec, + index_col=0) # index is the nominal column + df + +.. note:: + If a column or index contains an unparsable date, the entire column or + index will be returned unaltered as an object data type. For non-standard + datetime parsing, use :func:`to_datetime` after ``pd.read_csv``. + + +.. note:: + read_csv has a fast_path for parsing datetime strings in iso8601 format, + e.g "2000-01-01T00:01:02+00:00" and similar variations. If you can arrange + for your data to store datetimes in this format, load times will be + significantly faster, ~20x has been observed. + + +.. note:: + + When passing a dict as the `parse_dates` argument, the order of + the columns prepended is not guaranteed, because `dict` objects do not impose + an ordering on their keys. On Python 2.7+ you may use `collections.OrderedDict` + instead of a regular `dict` if this matters to you. Because of this, when using a + dict for 'parse_dates' in conjunction with the `index_col` argument, it's best to + specify `index_col` as a column label rather then as an index on the resulting frame. + + +Date parsing functions +++++++++++++++++++++++ + +Finally, the parser allows you to specify a custom ``date_parser`` function to +take full advantage of the flexibility of the date parsing API: + +.. ipython:: python + + df = pd.read_csv('tmp.csv', header=None, parse_dates=date_spec, + date_parser=pd.io.date_converters.parse_date_time) + df + +Pandas will try to call the ``date_parser`` function in three different ways. If +an exception is raised, the next one is tried: + +1. ``date_parser`` is first called with one or more arrays as arguments, + as defined using `parse_dates` (e.g., ``date_parser(['2013', '2013'], ['1', '2'])``). + +2. If #1 fails, ``date_parser`` is called with all the columns + concatenated row-wise into a single array (e.g., ``date_parser(['2013 1', '2013 2'])``). + +3. If #2 fails, ``date_parser`` is called once for every row with one or more + string arguments from the columns indicated with `parse_dates` + (e.g., ``date_parser('2013', '1')`` for the first row, ``date_parser('2013', '2')`` + for the second, etc.). + +Note that performance-wise, you should try these methods of parsing dates in order: + +1. Try to infer the format using ``infer_datetime_format=True`` (see section below). + +2. If you know the format, use ``pd.to_datetime()``: + ``date_parser=lambda x: pd.to_datetime(x, format=...)``. + +3. If you have a really non-standard format, use a custom ``date_parser`` function. + For optimal performance, this should be vectorized, i.e., it should accept arrays + as arguments. + +You can explore the date parsing functionality in +`date_converters.py `__ +and add your own. We would love to turn this module into a community supported +set of date/time parsers. To get you started, ``date_converters.py`` contains +functions to parse dual date and time columns, year/month/day columns, +and year/month/day/hour/minute/second columns. It also contains a +``generic_parser`` function so you can curry it with a function that deals with +a single date rather than the entire array. + +.. ipython:: python + :suppress: + + os.remove('tmp.csv') + + +.. _io.csv.mixed_timezones: + +Parsing a CSV with mixed timezones +++++++++++++++++++++++++++++++++++ + +Pandas cannot natively represent a column or index with mixed timezones. If your CSV +file contains columns with a mixture of timezones, the default result will be +an object-dtype column with strings, even with ``parse_dates``. + + +.. ipython:: python + + content = """\ + a + 2000-01-01T00:00:00+05:00 + 2000-01-01T00:00:00+06:00""" + df = pd.read_csv(StringIO(content), parse_dates=['a']) + df['a'] + +To parse the mixed-timezone values as a datetime column, pass a partially-applied +:func:`to_datetime` with ``utc=True`` as the ``date_parser``. + +.. ipython:: python + + df = pd.read_csv(StringIO(content), parse_dates=['a'], + date_parser=lambda col: pd.to_datetime(col, utc=True)) + df['a'] + + +.. _io.dayfirst: + + +Inferring datetime format ++++++++++++++++++++++++++ + +If you have ``parse_dates`` enabled for some or all of your columns, and your +datetime strings are all formatted the same way, you may get a large speed +up by setting ``infer_datetime_format=True``. If set, pandas will attempt +to guess the format of your datetime strings, and then use a faster means +of parsing the strings. 5-10x parsing speeds have been observed. pandas +will fallback to the usual parsing if either the format cannot be guessed +or the format that was guessed cannot properly parse the entire column +of strings. So in general, ``infer_datetime_format`` should not have any +negative consequences if enabled. + +Here are some examples of datetime strings that can be guessed (All +representing December 30th, 2011 at 00:00:00): + +* "20111230" +* "2011/12/30" +* "20111230 00:00:00" +* "12/30/2011 00:00:00" +* "30/Dec/2011 00:00:00" +* "30/December/2011 00:00:00" + +Note that ``infer_datetime_format`` is sensitive to ``dayfirst``. With +``dayfirst=True``, it will guess "01/12/2011" to be December 1st. With +``dayfirst=False`` (default) it will guess "01/12/2011" to be January 12th. + +.. ipython:: python + + # Try to infer the format for the index column + df = pd.read_csv('foo.csv', index_col=0, parse_dates=True, + infer_datetime_format=True) + df + +.. ipython:: python + :suppress: + + os.remove('foo.csv') + +International date formats +++++++++++++++++++++++++++ + +While US date formats tend to be MM/DD/YYYY, many international formats use +DD/MM/YYYY instead. For convenience, a ``dayfirst`` keyword is provided: + +.. ipython:: python + :suppress: + + data = ("date,value,cat\n" + "1/6/2000,5,a\n" + "2/6/2000,10,b\n" + "3/6/2000,15,c") + with open('tmp.csv', 'w') as fh: + fh.write(data) + +.. ipython:: python + + print(open('tmp.csv').read()) + + pd.read_csv('tmp.csv', parse_dates=[0]) + pd.read_csv('tmp.csv', dayfirst=True, parse_dates=[0]) + +.. _io.float_precision: + +Specifying method for floating-point conversion +''''''''''''''''''''''''''''''''''''''''''''''' + +The parameter ``float_precision`` can be specified in order to use +a specific floating-point converter during parsing with the C engine. +The options are the ordinary converter, the high-precision converter, and +the round-trip converter (which is guaranteed to round-trip values after +writing to a file). For example: + +.. ipython:: python + + val = '0.3066101993807095471566981359501369297504425048828125' + data = 'a,b,c\n1,2,{0}'.format(val) + abs(pd.read_csv(StringIO(data), engine='c', + float_precision=None)['c'][0] - float(val)) + abs(pd.read_csv(StringIO(data), engine='c', + float_precision='high')['c'][0] - float(val)) + abs(pd.read_csv(StringIO(data), engine='c', + float_precision='round_trip')['c'][0] - float(val)) + + +.. _io.thousands: + +Thousand separators +''''''''''''''''''' + +For large numbers that have been written with a thousands separator, you can +set the ``thousands`` keyword to a string of length 1 so that integers will be parsed +correctly: + +.. ipython:: python + :suppress: + + data = ("ID|level|category\n" + "Patient1|123,000|x\n" + "Patient2|23,000|y\n" + "Patient3|1,234,018|z") + + with open('tmp.csv', 'w') as fh: + fh.write(data) + +By default, numbers with a thousands separator will be parsed as strings: + +.. ipython:: python + + print(open('tmp.csv').read()) + df = pd.read_csv('tmp.csv', sep='|') + df + + df.level.dtype + +The ``thousands`` keyword allows integers to be parsed correctly: + +.. ipython:: python + + print(open('tmp.csv').read()) + df = pd.read_csv('tmp.csv', sep='|', thousands=',') + df + + df.level.dtype + +.. ipython:: python + :suppress: + + os.remove('tmp.csv') + +.. _io.na_values: + +NA values +''''''''' + +To control which values are parsed as missing values (which are signified by +``NaN``), specify a string in ``na_values``. If you specify a list of strings, +then all values in it are considered to be missing values. If you specify a +number (a ``float``, like ``5.0`` or an ``integer`` like ``5``), the +corresponding equivalent values will also imply a missing value (in this case +effectively ``[5.0, 5]`` are recognized as ``NaN``). + +To completely override the default values that are recognized as missing, specify ``keep_default_na=False``. + +.. _io.navaluesconst: + +The default ``NaN`` recognized values are ``['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A', '#N/A', 'N/A', +'n/a', 'NA', '', '#NA', 'NULL', 'null', 'NaN', '-NaN', 'nan', '-nan', '']``. + +Let us consider some examples: + +.. code-block:: python + + pd.read_csv('path_to_file.csv', na_values=[5]) + +In the example above ``5`` and ``5.0`` will be recognized as ``NaN``, in +addition to the defaults. A string will first be interpreted as a numerical +``5``, then as a ``NaN``. + +.. code-block:: python + + pd.read_csv('path_to_file.csv', keep_default_na=False, na_values=[""]) + +Above, only an empty field will be recognized as ``NaN``. + +.. code-block:: python + + pd.read_csv('path_to_file.csv', keep_default_na=False, na_values=["NA", "0"]) + +Above, both ``NA`` and ``0`` as strings are ``NaN``. + +.. code-block:: python + + pd.read_csv('path_to_file.csv', na_values=["Nope"]) + +The default values, in addition to the string ``"Nope"`` are recognized as +``NaN``. + +.. _io.infinity: + +Infinity +'''''''' + +``inf`` like values will be parsed as ``np.inf`` (positive infinity), and ``-inf`` as ``-np.inf`` (negative infinity). +These will ignore the case of the value, meaning ``Inf``, will also be parsed as ``np.inf``. + + +Returning Series +'''''''''''''''' + +Using the ``squeeze`` keyword, the parser will return output with a single column +as a ``Series``: + +.. ipython:: python + :suppress: + + data = ("level\n" + "Patient1,123000\n" + "Patient2,23000\n" + "Patient3,1234018") + + with open('tmp.csv', 'w') as fh: + fh.write(data) + +.. ipython:: python + + print(open('tmp.csv').read()) + + output = pd.read_csv('tmp.csv', squeeze=True) + output + + type(output) + +.. ipython:: python + :suppress: + + os.remove('tmp.csv') + +.. _io.boolean: + +Boolean values +'''''''''''''' + +The common values ``True``, ``False``, ``TRUE``, and ``FALSE`` are all +recognized as boolean. Occasionally you might want to recognize other values +as being boolean. To do this, use the ``true_values`` and ``false_values`` +options as follows: + +.. ipython:: python + + data = ('a,b,c\n' + '1,Yes,2\n' + '3,No,4') + print(data) + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data), true_values=['Yes'], false_values=['No']) + +.. _io.bad_lines: + +Handling "bad" lines +'''''''''''''''''''' + +Some files may have malformed lines with too few fields or too many. Lines with +too few fields will have NA values filled in the trailing fields. Lines with +too many fields will raise an error by default: + +.. ipython:: python + :okexcept: + + data = ('a,b,c\n' + '1,2,3\n' + '4,5,6,7\n' + '8,9,10') + pd.read_csv(StringIO(data)) + +You can elect to skip bad lines: + +.. code-block:: ipython + + In [29]: pd.read_csv(StringIO(data), error_bad_lines=False) + Skipping line 3: expected 3 fields, saw 4 + + Out[29]: + a b c + 0 1 2 3 + 1 8 9 10 + +You can also use the ``usecols`` parameter to eliminate extraneous column +data that appear in some lines but not others: + +.. code-block:: ipython + + In [30]: pd.read_csv(StringIO(data), usecols=[0, 1, 2]) + + Out[30]: + a b c + 0 1 2 3 + 1 4 5 6 + 2 8 9 10 + +.. _io.dialect: + +Dialect +''''''' + +The ``dialect`` keyword gives greater flexibility in specifying the file format. +By default it uses the Excel dialect but you can specify either the dialect name +or a :class:`python:csv.Dialect` instance. + +.. ipython:: python + :suppress: + + data = ('label1,label2,label3\n' + 'index1,"a,c,e\n' + 'index2,b,d,f') + +Suppose you had data with unenclosed quotes: + +.. ipython:: python + + print(data) + +By default, ``read_csv`` uses the Excel dialect and treats the double quote as +the quote character, which causes it to fail when it finds a newline before it +finds the closing double quote. + +We can get around this using ``dialect``: + +.. ipython:: python + :okwarning: + + import csv + dia = csv.excel() + dia.quoting = csv.QUOTE_NONE + pd.read_csv(StringIO(data), dialect=dia) + +All of the dialect options can be specified separately by keyword arguments: + +.. ipython:: python + + data = 'a,b,c~1,2,3~4,5,6' + pd.read_csv(StringIO(data), lineterminator='~') + +Another common dialect option is ``skipinitialspace``, to skip any whitespace +after a delimiter: + +.. ipython:: python + + data = 'a, b, c\n1, 2, 3\n4, 5, 6' + print(data) + pd.read_csv(StringIO(data), skipinitialspace=True) + +The parsers make every attempt to "do the right thing" and not be fragile. Type +inference is a pretty big deal. If a column can be coerced to integer dtype +without altering the contents, the parser will do so. Any non-numeric +columns will come through as object dtype as with the rest of pandas objects. + +.. _io.quoting: + +Quoting and Escape Characters +''''''''''''''''''''''''''''' + +Quotes (and other escape characters) in embedded fields can be handled in any +number of ways. One way is to use backslashes; to properly parse this data, you +should pass the ``escapechar`` option: + +.. ipython:: python + + data = 'a,b\n"hello, \\"Bob\\", nice to see you",5' + print(data) + pd.read_csv(StringIO(data), escapechar='\\') + +.. _io.fwf_reader: +.. _io.fwf: + +Files with fixed width columns +'''''''''''''''''''''''''''''' + +While :func:`read_csv` reads delimited data, the :func:`read_fwf` function works +with data files that have known and fixed column widths. The function parameters +to ``read_fwf`` are largely the same as `read_csv` with two extra parameters, and +a different usage of the ``delimiter`` parameter: + +* ``colspecs``: A list of pairs (tuples) giving the extents of the + fixed-width fields of each line as half-open intervals (i.e., [from, to[ ). + String value 'infer' can be used to instruct the parser to try detecting + the column specifications from the first 100 rows of the data. Default + behavior, if not specified, is to infer. +* ``widths``: A list of field widths which can be used instead of 'colspecs' + if the intervals are contiguous. +* ``delimiter``: Characters to consider as filler characters in the fixed-width file. + Can be used to specify the filler character of the fields + if it is not spaces (e.g., '~'). + +.. ipython:: python + :suppress: + + f = open('bar.csv', 'w') + data1 = ("id8141 360.242940 149.910199 11950.7\n" + "id1594 444.953632 166.985655 11788.4\n" + "id1849 364.136849 183.628767 11806.2\n" + "id1230 413.836124 184.375703 11916.8\n" + "id1948 502.953953 173.237159 12468.3") + f.write(data1) + f.close() + +Consider a typical fixed-width data file: + +.. ipython:: python + + print(open('bar.csv').read()) + +In order to parse this file into a ``DataFrame``, we simply need to supply the +column specifications to the `read_fwf` function along with the file name: + +.. ipython:: python + + # Column specifications are a list of half-intervals + colspecs = [(0, 6), (8, 20), (21, 33), (34, 43)] + df = pd.read_fwf('bar.csv', colspecs=colspecs, header=None, index_col=0) + df + +Note how the parser automatically picks column names X. when +``header=None`` argument is specified. Alternatively, you can supply just the +column widths for contiguous columns: + +.. ipython:: python + + # Widths are a list of integers + widths = [6, 14, 13, 10] + df = pd.read_fwf('bar.csv', widths=widths, header=None) + df + +The parser will take care of extra white spaces around the columns +so it's ok to have extra separation between the columns in the file. + +By default, ``read_fwf`` will try to infer the file's ``colspecs`` by using the +first 100 rows of the file. It can do it only in cases when the columns are +aligned and correctly separated by the provided ``delimiter`` (default delimiter +is whitespace). + +.. ipython:: python + + df = pd.read_fwf('bar.csv', header=None, index_col=0) + df + +``read_fwf`` supports the ``dtype`` parameter for specifying the types of +parsed columns to be different from the inferred type. + +.. ipython:: python + + pd.read_fwf('bar.csv', header=None, index_col=0).dtypes + pd.read_fwf('bar.csv', header=None, dtype={2: 'object'}).dtypes + +.. ipython:: python + :suppress: + + os.remove('bar.csv') + + +Indexes +''''''' + +Files with an "implicit" index column ++++++++++++++++++++++++++++++++++++++ + +.. ipython:: python + :suppress: + + f = open('foo.csv', 'w') + f.write('A,B,C\n20090101,a,1,2\n20090102,b,3,4\n20090103,c,4,5') + f.close() + +Consider a file with one less entry in the header than the number of data +column: + +.. ipython:: python + + print(open('foo.csv').read()) + +In this special case, ``read_csv`` assumes that the first column is to be used +as the index of the ``DataFrame``: + +.. ipython:: python + + pd.read_csv('foo.csv') + +Note that the dates weren't automatically parsed. In that case you would need +to do as before: + +.. ipython:: python + + df = pd.read_csv('foo.csv', parse_dates=True) + df.index + +.. ipython:: python + :suppress: + + os.remove('foo.csv') + + +Reading an index with a ``MultiIndex`` +++++++++++++++++++++++++++++++++++++++ + +.. _io.csv_multiindex: + +Suppose you have data indexed by two columns: + +.. ipython:: python + + print(open('data/mindex_ex.csv').read()) + +The ``index_col`` argument to ``read_csv`` can take a list of +column numbers to turn multiple columns into a ``MultiIndex`` for the index of the +returned object: + +.. ipython:: python + + df = pd.read_csv("data/mindex_ex.csv", index_col=[0, 1]) + df + df.loc[1978] + +.. _io.multi_index_columns: + +Reading columns with a ``MultiIndex`` ++++++++++++++++++++++++++++++++++++++ + +By specifying list of row locations for the ``header`` argument, you +can read in a ``MultiIndex`` for the columns. Specifying non-consecutive +rows will skip the intervening rows. + +.. ipython:: python + + from pandas._testing import makeCustomDataframe as mkdf + df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + df.to_csv('mi.csv') + print(open('mi.csv').read()) + pd.read_csv('mi.csv', header=[0, 1, 2, 3], index_col=[0, 1]) + +``read_csv`` is also able to interpret a more common format +of multi-columns indices. + +.. ipython:: python + :suppress: + + data = ",a,a,a,b,c,c\n,q,r,s,t,u,v\none,1,2,3,4,5,6\ntwo,7,8,9,10,11,12" + fh = open('mi2.csv', 'w') + fh.write(data) + fh.close() + +.. ipython:: python + + print(open('mi2.csv').read()) + pd.read_csv('mi2.csv', header=[0, 1], index_col=0) + +Note: If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it +with ``df.to_csv(..., index=False)``, then any ``names`` on the columns index will be *lost*. + +.. ipython:: python + :suppress: + + os.remove('mi.csv') + os.remove('mi2.csv') + +.. _io.sniff: + +Automatically "sniffing" the delimiter +'''''''''''''''''''''''''''''''''''''' + +``read_csv`` is capable of inferring delimited (not necessarily +comma-separated) files, as pandas uses the :class:`python:csv.Sniffer` +class of the csv module. For this, you have to specify ``sep=None``. + +.. ipython:: python + :suppress: + + df = pd.DataFrame(np.random.randn(10, 4)) + df.to_csv('tmp.sv', sep='|') + df.to_csv('tmp2.sv', sep=':') + +.. ipython:: python + + print(open('tmp2.sv').read()) + pd.read_csv('tmp2.sv', sep=None, engine='python') + +.. _io.multiple_files: + +Reading multiple files to create a single DataFrame +''''''''''''''''''''''''''''''''''''''''''''''''''' + +It's best to use :func:`~pandas.concat` to combine multiple files. +See the :ref:`cookbook` for an example. + +.. _io.chunking: + +Iterating through files chunk by chunk +'''''''''''''''''''''''''''''''''''''' + +Suppose you wish to iterate through a (potentially very large) file lazily +rather than reading the entire file into memory, such as the following: + + +.. ipython:: python + + print(open('tmp.sv').read()) + table = pd.read_csv('tmp.sv', sep='|') + table + + +By specifying a ``chunksize`` to ``read_csv``, the return +value will be an iterable object of type ``TextFileReader``: + +.. ipython:: python + + reader = pd.read_csv('tmp.sv', sep='|', chunksize=4) + reader + + for chunk in reader: + print(chunk) + + +Specifying ``iterator=True`` will also return the ``TextFileReader`` object: + +.. ipython:: python + + reader = pd.read_csv('tmp.sv', sep='|', iterator=True) + reader.get_chunk(5) + +.. ipython:: python + :suppress: + + os.remove('tmp.sv') + os.remove('tmp2.sv') + +Specifying the parser engine +'''''''''''''''''''''''''''' + +Under the hood pandas uses a fast and efficient parser implemented in C as well +as a Python implementation which is currently more feature-complete. Where +possible pandas uses the C parser (specified as ``engine='c'``), but may fall +back to Python if C-unsupported options are specified. Currently, C-unsupported +options include: + +* ``sep`` other than a single character (e.g. regex separators) +* ``skipfooter`` +* ``sep=None`` with ``delim_whitespace=False`` + +Specifying any of the above options will produce a ``ParserWarning`` unless the +python engine is selected explicitly using ``engine='python'``. + +Reading remote files +'''''''''''''''''''' + +You can pass in a URL to a CSV file: + +.. code-block:: python + + df = pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item', + sep='\t') + +S3 URLs are handled as well but require installing the `S3Fs +`_ library: + +.. code-block:: python + + df = pd.read_csv('s3://pandas-test/tips.csv') + +If your S3 bucket requires credentials you will need to set them as environment +variables or in the ``~/.aws/credentials`` config file, refer to the `S3Fs +documentation on credentials +`_. + + + +Writing out data +'''''''''''''''' + +.. _io.store_in_csv: + +Writing to CSV format ++++++++++++++++++++++ + +The ``Series`` and ``DataFrame`` objects have an instance method ``to_csv`` which +allows storing the contents of the object as a comma-separated-values file. The +function takes a number of arguments. Only the first is required. + +* ``path_or_buf``: A string path to the file to write or a file object. If a file object it must be opened with `newline=''` +* ``sep`` : Field delimiter for the output file (default ",") +* ``na_rep``: A string representation of a missing value (default '') +* ``float_format``: Format string for floating point numbers +* ``columns``: Columns to write (default None) +* ``header``: Whether to write out the column names (default True) +* ``index``: whether to write row (index) names (default True) +* ``index_label``: Column label(s) for index column(s) if desired. If None + (default), and `header` and `index` are True, then the index names are + used. (A sequence should be given if the ``DataFrame`` uses MultiIndex). +* ``mode`` : Python write mode, default 'w' +* ``encoding``: a string representing the encoding to use if the contents are + non-ASCII, for Python versions prior to 3 +* ``line_terminator``: Character sequence denoting line end (default `os.linesep`) +* ``quoting``: Set quoting rules as in csv module (default csv.QUOTE_MINIMAL). Note that if you have set a `float_format` then floats are converted to strings and csv.QUOTE_NONNUMERIC will treat them as non-numeric +* ``quotechar``: Character used to quote fields (default '"') +* ``doublequote``: Control quoting of ``quotechar`` in fields (default True) +* ``escapechar``: Character used to escape ``sep`` and ``quotechar`` when + appropriate (default None) +* ``chunksize``: Number of rows to write at a time +* ``date_format``: Format string for datetime objects + +Writing a formatted string +++++++++++++++++++++++++++ + +.. _io.formatting: + +The ``DataFrame`` object has an instance method ``to_string`` which allows control +over the string representation of the object. All arguments are optional: + +* ``buf`` default None, for example a StringIO object +* ``columns`` default None, which columns to write +* ``col_space`` default None, minimum width of each column. +* ``na_rep`` default ``NaN``, representation of NA value +* ``formatters`` default None, a dictionary (by column) of functions each of + which takes a single argument and returns a formatted string +* ``float_format`` default None, a function which takes a single (float) + argument and returns a formatted string; to be applied to floats in the + ``DataFrame``. +* ``sparsify`` default True, set to False for a ``DataFrame`` with a hierarchical + index to print every MultiIndex key at each row. +* ``index_names`` default True, will print the names of the indices +* ``index`` default True, will print the index (ie, row labels) +* ``header`` default True, will print the column labels +* ``justify`` default ``left``, will print column headers left- or + right-justified + +The ``Series`` object also has a ``to_string`` method, but with only the ``buf``, +``na_rep``, ``float_format`` arguments. There is also a ``length`` argument +which, if set to ``True``, will additionally output the length of the Series. + +.. _io.json: + +JSON +---- + +Read and write ``JSON`` format files and strings. + +.. _io.json_writer: + +Writing JSON +'''''''''''' + +A ``Series`` or ``DataFrame`` can be converted to a valid JSON string. Use ``to_json`` +with optional parameters: + +* ``path_or_buf`` : the pathname or buffer to write the output + This can be ``None`` in which case a JSON string is returned +* ``orient`` : + + ``Series``: + * default is ``index`` + * allowed values are {``split``, ``records``, ``index``} + + ``DataFrame``: + * default is ``columns`` + * allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``, ``table``} + + The format of the JSON string + + .. csv-table:: + :widths: 20, 150 + :delim: ; + + ``split``; dict like {index -> [index], columns -> [columns], data -> [values]} + ``records``; list like [{column -> value}, ... , {column -> value}] + ``index``; dict like {index -> {column -> value}} + ``columns``; dict like {column -> {index -> value}} + ``values``; just the values array + +* ``date_format`` : string, type of date conversion, 'epoch' for timestamp, 'iso' for ISO8601. +* ``double_precision`` : The number of decimal places to use when encoding floating point values, default 10. +* ``force_ascii`` : force encoded string to be ASCII, default True. +* ``date_unit`` : The time unit to encode to, governs timestamp and ISO8601 precision. One of 's', 'ms', 'us' or 'ns' for seconds, milliseconds, microseconds and nanoseconds respectively. Default 'ms'. +* ``default_handler`` : The handler to call if an object cannot otherwise be converted to a suitable format for JSON. Takes a single argument, which is the object to convert, and returns a serializable object. +* ``lines`` : If ``records`` orient, then will write each record per line as json. + +Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datetime`` objects will be converted based on the ``date_format`` and ``date_unit`` parameters. + +.. ipython:: python + + dfj = pd.DataFrame(np.random.randn(5, 2), columns=list('AB')) + json = dfj.to_json() + json + +Orient options +++++++++++++++ + +There are a number of different options for the format of the resulting JSON +file / string. Consider the following ``DataFrame`` and ``Series``: + +.. ipython:: python + + dfjo = pd.DataFrame(dict(A=range(1, 4), B=range(4, 7), C=range(7, 10)), + columns=list('ABC'), index=list('xyz')) + dfjo + sjo = pd.Series(dict(x=15, y=16, z=17), name='D') + sjo + +**Column oriented** (the default for ``DataFrame``) serializes the data as +nested JSON objects with column labels acting as the primary index: + +.. ipython:: python + + dfjo.to_json(orient="columns") + # Not available for Series + +**Index oriented** (the default for ``Series``) similar to column oriented +but the index labels are now primary: + +.. ipython:: python + + dfjo.to_json(orient="index") + sjo.to_json(orient="index") + +**Record oriented** serializes the data to a JSON array of column -> value records, +index labels are not included. This is useful for passing ``DataFrame`` data to plotting +libraries, for example the JavaScript library ``d3.js``: + +.. ipython:: python + + dfjo.to_json(orient="records") + sjo.to_json(orient="records") + +**Value oriented** is a bare-bones option which serializes to nested JSON arrays of +values only, column and index labels are not included: + +.. ipython:: python + + dfjo.to_json(orient="values") + # Not available for Series + +**Split oriented** serializes to a JSON object containing separate entries for +values, index and columns. Name is also included for ``Series``: + +.. ipython:: python + + dfjo.to_json(orient="split") + sjo.to_json(orient="split") + +**Table oriented** serializes to the JSON `Table Schema`_, allowing for the +preservation of metadata including but not limited to dtypes and index names. + +.. note:: + + Any orient option that encodes to a JSON object will not preserve the ordering of + index and column labels during round-trip serialization. If you wish to preserve + label ordering use the `split` option as it uses ordered containers. + +Date handling ++++++++++++++ + +Writing in ISO date format: + +.. ipython:: python + + dfd = pd.DataFrame(np.random.randn(5, 2), columns=list('AB')) + dfd['date'] = pd.Timestamp('20130101') + dfd = dfd.sort_index(1, ascending=False) + json = dfd.to_json(date_format='iso') + json + +Writing in ISO date format, with microseconds: + +.. ipython:: python + + json = dfd.to_json(date_format='iso', date_unit='us') + json + +Epoch timestamps, in seconds: + +.. ipython:: python + + json = dfd.to_json(date_format='epoch', date_unit='s') + json + +Writing to a file, with a date index and a date column: + +.. ipython:: python + + dfj2 = dfj.copy() + dfj2['date'] = pd.Timestamp('20130101') + dfj2['ints'] = list(range(5)) + dfj2['bools'] = True + dfj2.index = pd.date_range('20130101', periods=5) + dfj2.to_json('test.json') + + with open('test.json') as fh: + print(fh.read()) + +Fallback behavior ++++++++++++++++++ + +If the JSON serializer cannot handle the container contents directly it will +fall back in the following manner: + +* if the dtype is unsupported (e.g. ``np.complex``) then the ``default_handler``, if provided, will be called + for each value, otherwise an exception is raised. + +* if an object is unsupported it will attempt the following: + + + * check if the object has defined a ``toDict`` method and call it. + A ``toDict`` method should return a ``dict`` which will then be JSON serialized. + + * invoke the ``default_handler`` if one was provided. + + * convert the object to a ``dict`` by traversing its contents. However this will often fail + with an ``OverflowError`` or give unexpected results. + +In general the best approach for unsupported objects or dtypes is to provide a ``default_handler``. +For example: + +.. code-block:: python + + >>> DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json() # raises + RuntimeError: Unhandled numpy dtype 15 + +can be dealt with by specifying a simple ``default_handler``: + +.. ipython:: python + + pd.DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json(default_handler=str) + +.. _io.json_reader: + +Reading JSON +'''''''''''' + +Reading a JSON string to pandas object can take a number of parameters. +The parser will try to parse a ``DataFrame`` if ``typ`` is not supplied or +is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series`` + +* ``filepath_or_buffer`` : a **VALID** JSON string or file handle / StringIO. The string could be + a URL. Valid URL schemes include http, ftp, S3, and file. For file URLs, a host + is expected. For instance, a local file could be + file ://localhost/path/to/table.json +* ``typ`` : type of object to recover (series or frame), default 'frame' +* ``orient`` : + + Series : + * default is ``index`` + * allowed values are {``split``, ``records``, ``index``} + + DataFrame + * default is ``columns`` + * allowed values are {``split``, ``records``, ``index``, ``columns``, ``values``, ``table``} + + The format of the JSON string + + .. csv-table:: + :widths: 20, 150 + :delim: ; + + ``split``; dict like {index -> [index], columns -> [columns], data -> [values]} + ``records``; list like [{column -> value}, ... , {column -> value}] + ``index``; dict like {index -> {column -> value}} + ``columns``; dict like {column -> {index -> value}} + ``values``; just the values array + ``table``; adhering to the JSON `Table Schema`_ + + +* ``dtype`` : if True, infer dtypes, if a dict of column to dtype, then use those, if ``False``, then don't infer dtypes at all, default is True, apply only to the data. +* ``convert_axes`` : boolean, try to convert the axes to the proper dtypes, default is ``True`` +* ``convert_dates`` : a list of columns to parse for dates; If ``True``, then try to parse date-like columns, default is ``True``. +* ``keep_default_dates`` : boolean, default ``True``. If parsing dates, then parse the default date-like columns. +* ``numpy`` : direct decoding to NumPy arrays. default is ``False``; + Supports numeric data only, although labels may be non-numeric. Also note that the JSON ordering **MUST** be the same for each term if ``numpy=True``. +* ``precise_float`` : boolean, default ``False``. Set to enable usage of higher precision (strtod) function when decoding string to double values. Default (``False``) is to use fast but less precise builtin functionality. +* ``date_unit`` : string, the timestamp unit to detect if converting dates. Default + None. By default the timestamp precision will be detected, if this is not desired + then pass one of 's', 'ms', 'us' or 'ns' to force timestamp precision to + seconds, milliseconds, microseconds or nanoseconds respectively. +* ``lines`` : reads file as one json object per line. +* ``encoding`` : The encoding to use to decode py3 bytes. +* ``chunksize`` : when used in combination with ``lines=True``, return a JsonReader which reads in ``chunksize`` lines per iteration. + +The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is not parseable. + +If a non-default ``orient`` was used when encoding to JSON be sure to pass the same +option here so that decoding produces sensible results, see `Orient Options`_ for an +overview. + +Data conversion ++++++++++++++++ + +The default of ``convert_axes=True``, ``dtype=True``, and ``convert_dates=True`` +will try to parse the axes, and all of the data into appropriate types, +including dates. If you need to override specific dtypes, pass a dict to +``dtype``. ``convert_axes`` should only be set to ``False`` if you need to +preserve string-like numbers (e.g. '1', '2') in an axes. + +.. note:: + + Large integer values may be converted to dates if ``convert_dates=True`` and the data and / or column labels appear 'date-like'. The exact threshold depends on the ``date_unit`` specified. 'date-like' means that the column label meets one of the following criteria: + + * it ends with ``'_at'`` + * it ends with ``'_time'`` + * it begins with ``'timestamp'`` + * it is ``'modified'`` + * it is ``'date'`` + +.. warning:: + + When reading JSON data, automatic coercing into dtypes has some quirks: + + * an index can be reconstructed in a different order from serialization, that is, the returned order is not guaranteed to be the same as before serialization + * a column that was ``float`` data will be converted to ``integer`` if it can be done safely, e.g. a column of ``1.`` + * bool columns will be converted to ``integer`` on reconstruction + + Thus there are times where you may want to specify specific dtypes via the ``dtype`` keyword argument. + +Reading from a JSON string: + +.. ipython:: python + + pd.read_json(json) + +Reading from a file: + +.. ipython:: python + + pd.read_json('test.json') + +Don't convert any data (but still convert axes and dates): + +.. ipython:: python + + pd.read_json('test.json', dtype=object).dtypes + +Specify dtypes for conversion: + +.. ipython:: python + + pd.read_json('test.json', dtype={'A': 'float32', 'bools': 'int8'}).dtypes + +Preserve string indices: + +.. ipython:: python + + si = pd.DataFrame(np.zeros((4, 4)), columns=list(range(4)), + index=[str(i) for i in range(4)]) + si + si.index + si.columns + json = si.to_json() + + sij = pd.read_json(json, convert_axes=False) + sij + sij.index + sij.columns + +Dates written in nanoseconds need to be read back in nanoseconds: + +.. ipython:: python + + json = dfj2.to_json(date_unit='ns') + + # Try to parse timestamps as milliseconds -> Won't Work + dfju = pd.read_json(json, date_unit='ms') + dfju + + # Let pandas detect the correct precision + dfju = pd.read_json(json) + dfju + + # Or specify that all timestamps are in nanoseconds + dfju = pd.read_json(json, date_unit='ns') + dfju + +The Numpy parameter ++++++++++++++++++++ + +.. note:: + This param has been deprecated as of version 1.0.0 and will raise a ``FutureWarning``. + + This supports numeric data only. Index and columns labels may be non-numeric, e.g. strings, dates etc. + +If ``numpy=True`` is passed to ``read_json`` an attempt will be made to sniff +an appropriate dtype during deserialization and to subsequently decode directly +to NumPy arrays, bypassing the need for intermediate Python objects. + +This can provide speedups if you are deserialising a large amount of numeric +data: + +.. ipython:: python + + randfloats = np.random.uniform(-100, 1000, 10000) + randfloats.shape = (1000, 10) + dffloats = pd.DataFrame(randfloats, columns=list('ABCDEFGHIJ')) + + jsonfloats = dffloats.to_json() + +.. ipython:: python + + %timeit pd.read_json(jsonfloats) + +.. ipython:: python + :okwarning: + + %timeit pd.read_json(jsonfloats, numpy=True) + +The speedup is less noticeable for smaller datasets: + +.. ipython:: python + + jsonfloats = dffloats.head(100).to_json() + +.. ipython:: python + + %timeit pd.read_json(jsonfloats) + +.. ipython:: python + :okwarning: + + %timeit pd.read_json(jsonfloats, numpy=True) + +.. warning:: + + Direct NumPy decoding makes a number of assumptions and may fail or produce + unexpected output if these assumptions are not satisfied: + + - data is numeric. + + - data is uniform. The dtype is sniffed from the first value decoded. + A ``ValueError`` may be raised, or incorrect output may be produced + if this condition is not satisfied. + + - labels are ordered. Labels are only read from the first container, it is assumed + that each subsequent row / column has been encoded in the same order. This should be satisfied if the + data was encoded using ``to_json`` but may not be the case if the JSON + is from another source. + +.. ipython:: python + :suppress: + + os.remove('test.json') + +.. _io.json_normalize: + +Normalization +''''''''''''' + +pandas provides a utility function to take a dict or list of dicts and *normalize* this semi-structured data +into a flat table. + +.. ipython:: python + + data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}}, + {'name': {'given': 'Mose', 'family': 'Regner'}}, + {'id': 2, 'name': 'Faye Raker'}] + pd.json_normalize(data) + +.. ipython:: python + + data = [{'state': 'Florida', + 'shortname': 'FL', + 'info': {'governor': 'Rick Scott'}, + 'county': [{'name': 'Dade', 'population': 12345}, + {'name': 'Broward', 'population': 40000}, + {'name': 'Palm Beach', 'population': 60000}]}, + {'state': 'Ohio', + 'shortname': 'OH', + 'info': {'governor': 'John Kasich'}, + 'county': [{'name': 'Summit', 'population': 1234}, + {'name': 'Cuyahoga', 'population': 1337}]}] + + pd.json_normalize(data, 'county', ['state', 'shortname', ['info', 'governor']]) + +The max_level parameter provides more control over which level to end normalization. +With max_level=1 the following snippet normalizes until 1st nesting level of the provided dict. + +.. ipython:: python + + data = [{'CreatedBy': {'Name': 'User001'}, + 'Lookup': {'TextField': 'Some text', + 'UserField': {'Id': 'ID001', + 'Name': 'Name001'}}, + 'Image': {'a': 'b'} + }] + pd.json_normalize(data, max_level=1) + +.. _io.jsonl: + +Line delimited json +''''''''''''''''''' + +pandas is able to read and write line-delimited json files that are common in data processing pipelines +using Hadoop or Spark. + +.. versionadded:: 0.21.0 + +For line-delimited json files, pandas can also return an iterator which reads in ``chunksize`` lines at a time. This can be useful for large files or to read from a stream. + +.. ipython:: python + + jsonl = ''' + {"a": 1, "b": 2} + {"a": 3, "b": 4} + ''' + df = pd.read_json(jsonl, lines=True) + df + df.to_json(orient='records', lines=True) + + # reader is an iterator that returns `chunksize` lines each iteration + reader = pd.read_json(StringIO(jsonl), lines=True, chunksize=1) + reader + for chunk in reader: + print(chunk) + +.. _io.table_schema: + +Table schema +'''''''''''' + +`Table Schema`_ is a spec for describing tabular datasets as a JSON +object. The JSON includes information on the field names, types, and +other attributes. You can use the orient ``table`` to build +a JSON string with two fields, ``schema`` and ``data``. + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': ['a', 'b', 'c'], + 'C': pd.date_range('2016-01-01', freq='d', periods=3)}, + index=pd.Index(range(3), name='idx')) + df + df.to_json(orient='table', date_format="iso") + +The ``schema`` field contains the ``fields`` key, which itself contains +a list of column name to type pairs, including the ``Index`` or ``MultiIndex`` +(see below for a list of types). +The ``schema`` field also contains a ``primaryKey`` field if the (Multi)index +is unique. + +The second field, ``data``, contains the serialized data with the ``records`` +orient. +The index is included, and any datetimes are ISO 8601 formatted, as required +by the Table Schema spec. + +The full list of types supported are described in the Table Schema +spec. This table shows the mapping from pandas types: + +=============== ================= +Pandas type Table Schema type +=============== ================= +int64 integer +float64 number +bool boolean +datetime64[ns] datetime +timedelta64[ns] duration +categorical any +object str +=============== ================= + +A few notes on the generated table schema: + +* The ``schema`` object contains a ``pandas_version`` field. This contains + the version of pandas' dialect of the schema, and will be incremented + with each revision. +* All dates are converted to UTC when serializing. Even timezone naive values, + which are treated as UTC with an offset of 0. + + .. ipython:: python + + from pandas.io.json import build_table_schema + s = pd.Series(pd.date_range('2016', periods=4)) + build_table_schema(s) + +* datetimes with a timezone (before serializing), include an additional field + ``tz`` with the time zone name (e.g. ``'US/Central'``). + + .. ipython:: python + + s_tz = pd.Series(pd.date_range('2016', periods=12, + tz='US/Central')) + build_table_schema(s_tz) + +* Periods are converted to timestamps before serialization, and so have the + same behavior of being converted to UTC. In addition, periods will contain + and additional field ``freq`` with the period's frequency, e.g. ``'A-DEC'``. + + .. ipython:: python + + s_per = pd.Series(1, index=pd.period_range('2016', freq='A-DEC', + periods=4)) + build_table_schema(s_per) + +* Categoricals use the ``any`` type and an ``enum`` constraint listing + the set of possible values. Additionally, an ``ordered`` field is included: + + .. ipython:: python + + s_cat = pd.Series(pd.Categorical(['a', 'b', 'a'])) + build_table_schema(s_cat) + +* A ``primaryKey`` field, containing an array of labels, is included + *if the index is unique*: + + .. ipython:: python + + s_dupe = pd.Series([1, 2], index=[1, 1]) + build_table_schema(s_dupe) + +* The ``primaryKey`` behavior is the same with MultiIndexes, but in this + case the ``primaryKey`` is an array: + + .. ipython:: python + + s_multi = pd.Series(1, index=pd.MultiIndex.from_product([('a', 'b'), + (0, 1)])) + build_table_schema(s_multi) + +* The default naming roughly follows these rules: + + * For series, the ``object.name`` is used. If that's none, then the + name is ``values`` + * For ``DataFrames``, the stringified version of the column name is used + * For ``Index`` (not ``MultiIndex``), ``index.name`` is used, with a + fallback to ``index`` if that is None. + * For ``MultiIndex``, ``mi.names`` is used. If any level has no name, + then ``level_`` is used. + + +.. versionadded:: 0.23.0 + +``read_json`` also accepts ``orient='table'`` as an argument. This allows for +the preservation of metadata such as dtypes and index names in a +round-trippable manner. + + .. ipython:: python + + df = pd.DataFrame({'foo': [1, 2, 3, 4], + 'bar': ['a', 'b', 'c', 'd'], + 'baz': pd.date_range('2018-01-01', freq='d', periods=4), + 'qux': pd.Categorical(['a', 'b', 'c', 'c']) + }, index=pd.Index(range(4), name='idx')) + df + df.dtypes + + df.to_json('test.json', orient='table') + new_df = pd.read_json('test.json', orient='table') + new_df + new_df.dtypes + +Please note that the literal string 'index' as the name of an :class:`Index` +is not round-trippable, nor are any names beginning with ``'level_'`` within a +:class:`MultiIndex`. These are used by default in :func:`DataFrame.to_json` to +indicate missing values and the subsequent read cannot distinguish the intent. + +.. ipython:: python + :okwarning: + + df.index.name = 'index' + df.to_json('test.json', orient='table') + new_df = pd.read_json('test.json', orient='table') + print(new_df.index.name) + +.. ipython:: python + :suppress: + + os.remove('test.json') + +.. _Table Schema: https://specs.frictionlessdata.io/json-table-schema/ + +HTML +---- + +.. _io.read_html: + +Reading HTML content +'''''''''''''''''''''' + +.. warning:: + + We **highly encourage** you to read the :ref:`HTML Table Parsing gotchas ` + below regarding the issues surrounding the BeautifulSoup4/html5lib/lxml parsers. + +The top-level :func:`~pandas.io.html.read_html` function can accept an HTML +string/file/URL and will parse HTML tables into list of pandas ``DataFrames``. +Let's look at a few examples. + +.. note:: + + ``read_html`` returns a ``list`` of ``DataFrame`` objects, even if there is + only a single table contained in the HTML content. + +Read a URL with no options: + +.. ipython:: python + + url = 'https://www.fdic.gov/bank/individual/failed/banklist.html' + dfs = pd.read_html(url) + dfs + +.. note:: + + The data from the above URL changes every Monday so the resulting data above + and the data below may be slightly different. + +Read in the content of the file from the above URL and pass it to ``read_html`` +as a string: + +.. ipython:: python + :suppress: + + file_path = os.path.abspath(os.path.join('source', '_static', 'banklist.html')) + +.. ipython:: python + + with open(file_path, 'r') as f: + dfs = pd.read_html(f.read()) + dfs + +You can even pass in an instance of ``StringIO`` if you so desire: + +.. ipython:: python + + with open(file_path, 'r') as f: + sio = StringIO(f.read()) + + dfs = pd.read_html(sio) + dfs + +.. note:: + + The following examples are not run by the IPython evaluator due to the fact + that having so many network-accessing functions slows down the documentation + build. If you spot an error or an example that doesn't run, please do not + hesitate to report it over on `pandas GitHub issues page + `__. + + +Read a URL and match a table that contains specific text: + +.. code-block:: python + + match = 'Metcalf Bank' + df_list = pd.read_html(url, match=match) + +Specify a header row (by default ```` or ```` elements located within a +```` are used to form the column index, if multiple rows are contained within +```` then a MultiIndex is created); if specified, the header row is taken +from the data minus the parsed header elements (```` elements). + +.. code-block:: python + + dfs = pd.read_html(url, header=0) + +Specify an index column: + +.. code-block:: python + + dfs = pd.read_html(url, index_col=0) + +Specify a number of rows to skip: + +.. code-block:: python + + dfs = pd.read_html(url, skiprows=0) + +Specify a number of rows to skip using a list (``xrange`` (Python 2 only) works +as well): + +.. code-block:: python + + dfs = pd.read_html(url, skiprows=range(2)) + +Specify an HTML attribute: + +.. code-block:: python + + dfs1 = pd.read_html(url, attrs={'id': 'table'}) + dfs2 = pd.read_html(url, attrs={'class': 'sortable'}) + print(np.array_equal(dfs1[0], dfs2[0])) # Should be True + +Specify values that should be converted to NaN: + +.. code-block:: python + + dfs = pd.read_html(url, na_values=['No Acquirer']) + +Specify whether to keep the default set of NaN values: + +.. code-block:: python + + dfs = pd.read_html(url, keep_default_na=False) + +Specify converters for columns. This is useful for numerical text data that has +leading zeros. By default columns that are numerical are cast to numeric +types and the leading zeros are lost. To avoid this, we can convert these +columns to strings. + +.. code-block:: python + + url_mcc = 'https://en.wikipedia.org/wiki/Mobile_country_code' + dfs = pd.read_html(url_mcc, match='Telekom Albania', header=0, + converters={'MNC': str}) + +Use some combination of the above: + +.. code-block:: python + + dfs = pd.read_html(url, match='Metcalf Bank', index_col=0) + +Read in pandas ``to_html`` output (with some loss of floating point precision): + +.. code-block:: python + + df = pd.DataFrame(np.random.randn(2, 2)) + s = df.to_html(float_format='{0:.40g}'.format) + dfin = pd.read_html(s, index_col=0) + +The ``lxml`` backend will raise an error on a failed parse if that is the only +parser you provide. If you only have a single parser you can provide just a +string, but it is considered good practice to pass a list with one string if, +for example, the function expects a sequence of strings. You may use: + +.. code-block:: python + + dfs = pd.read_html(url, 'Metcalf Bank', index_col=0, flavor=['lxml']) + +Or you could pass ``flavor='lxml'`` without a list: + +.. code-block:: python + + dfs = pd.read_html(url, 'Metcalf Bank', index_col=0, flavor='lxml') + +However, if you have bs4 and html5lib installed and pass ``None`` or ``['lxml', +'bs4']`` then the parse will most likely succeed. Note that *as soon as a parse +succeeds, the function will return*. + +.. code-block:: python + + dfs = pd.read_html(url, 'Metcalf Bank', index_col=0, flavor=['lxml', 'bs4']) + + +.. _io.html: + +Writing to HTML files +'''''''''''''''''''''' + +``DataFrame`` objects have an instance method ``to_html`` which renders the +contents of the ``DataFrame`` as an HTML table. The function arguments are as +in the method ``to_string`` described above. + +.. note:: + + Not all of the possible options for ``DataFrame.to_html`` are shown here for + brevity's sake. See :func:`~pandas.core.frame.DataFrame.to_html` for the + full set of options. + +.. ipython:: python + :suppress: + + def write_html(df, filename, *args, **kwargs): + static = os.path.abspath(os.path.join('source', '_static')) + with open(os.path.join(static, filename + '.html'), 'w') as f: + df.to_html(f, *args, **kwargs) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(2, 2)) + df + print(df.to_html()) # raw html + +.. ipython:: python + :suppress: + + write_html(df, 'basic') + +HTML: + +.. raw:: html + :file: ../_static/basic.html + +The ``columns`` argument will limit the columns shown: + +.. ipython:: python + + print(df.to_html(columns=[0])) + +.. ipython:: python + :suppress: + + write_html(df, 'columns', columns=[0]) + +HTML: + +.. raw:: html + :file: ../_static/columns.html + +``float_format`` takes a Python callable to control the precision of floating +point values: + +.. ipython:: python + + print(df.to_html(float_format='{0:.10f}'.format)) + +.. ipython:: python + :suppress: + + write_html(df, 'float_format', float_format='{0:.10f}'.format) + +HTML: + +.. raw:: html + :file: ../_static/float_format.html + +``bold_rows`` will make the row labels bold by default, but you can turn that +off: + +.. ipython:: python + + print(df.to_html(bold_rows=False)) + +.. ipython:: python + :suppress: + + write_html(df, 'nobold', bold_rows=False) + +.. raw:: html + :file: ../_static/nobold.html + +The ``classes`` argument provides the ability to give the resulting HTML +table CSS classes. Note that these classes are *appended* to the existing +``'dataframe'`` class. + +.. ipython:: python + + print(df.to_html(classes=['awesome_table_class', 'even_more_awesome_class'])) + +The ``render_links`` argument provides the ability to add hyperlinks to cells +that contain URLs. + +.. versionadded:: 0.24 + +.. ipython:: python + + url_df = pd.DataFrame({ + 'name': ['Python', 'Pandas'], + 'url': ['https://www.python.org/', 'https://pandas.pydata.org']}) + print(url_df.to_html(render_links=True)) + +.. ipython:: python + :suppress: + + write_html(url_df, 'render_links', render_links=True) + +HTML: + +.. raw:: html + :file: ../_static/render_links.html + +Finally, the ``escape`` argument allows you to control whether the +"<", ">" and "&" characters escaped in the resulting HTML (by default it is +``True``). So to get the HTML without escaped characters pass ``escape=False`` + +.. ipython:: python + + df = pd.DataFrame({'a': list('&<>'), 'b': np.random.randn(3)}) + + +.. ipython:: python + :suppress: + + write_html(df, 'escape') + write_html(df, 'noescape', escape=False) + +Escaped: + +.. ipython:: python + + print(df.to_html()) + +.. raw:: html + :file: ../_static/escape.html + +Not escaped: + +.. ipython:: python + + print(df.to_html(escape=False)) + +.. raw:: html + :file: ../_static/noescape.html + +.. note:: + + Some browsers may not show a difference in the rendering of the previous two + HTML tables. + + +.. _io.html.gotchas: + +HTML Table Parsing Gotchas +'''''''''''''''''''''''''' + +There are some versioning issues surrounding the libraries that are used to +parse HTML tables in the top-level pandas io function ``read_html``. + +**Issues with** |lxml|_ + +* Benefits + + * |lxml|_ is very fast. + + * |lxml|_ requires Cython to install correctly. + +* Drawbacks + + * |lxml|_ does *not* make any guarantees about the results of its parse + *unless* it is given |svm|_. + + * In light of the above, we have chosen to allow you, the user, to use the + |lxml|_ backend, but **this backend will use** |html5lib|_ if |lxml|_ + fails to parse + + * It is therefore *highly recommended* that you install both + |BeautifulSoup4|_ and |html5lib|_, so that you will still get a valid + result (provided everything else is valid) even if |lxml|_ fails. + +**Issues with** |BeautifulSoup4|_ **using** |lxml|_ **as a backend** + +* The above issues hold here as well since |BeautifulSoup4|_ is essentially + just a wrapper around a parser backend. + +**Issues with** |BeautifulSoup4|_ **using** |html5lib|_ **as a backend** + +* Benefits + + * |html5lib|_ is far more lenient than |lxml|_ and consequently deals + with *real-life markup* in a much saner way rather than just, e.g., + dropping an element without notifying you. + + * |html5lib|_ *generates valid HTML5 markup from invalid markup + automatically*. This is extremely important for parsing HTML tables, + since it guarantees a valid document. However, that does NOT mean that + it is "correct", since the process of fixing markup does not have a + single definition. + + * |html5lib|_ is pure Python and requires no additional build steps beyond + its own installation. + +* Drawbacks + + * The biggest drawback to using |html5lib|_ is that it is slow as + molasses. However consider the fact that many tables on the web are not + big enough for the parsing algorithm runtime to matter. It is more + likely that the bottleneck will be in the process of reading the raw + text from the URL over the web, i.e., IO (input-output). For very large + tables, this might not be true. + + +.. |svm| replace:: **strictly valid markup** +.. _svm: https://validator.w3.org/docs/help.html#validation_basics + +.. |html5lib| replace:: **html5lib** +.. _html5lib: https://github.com/html5lib/html5lib-python + +.. |BeautifulSoup4| replace:: **BeautifulSoup4** +.. _BeautifulSoup4: https://www.crummy.com/software/BeautifulSoup + +.. |lxml| replace:: **lxml** +.. _lxml: https://lxml.de + + + + +.. _io.excel: + +Excel files +----------- + +The :func:`~pandas.read_excel` method can read Excel 2003 (``.xls``) +files using the ``xlrd`` Python module. Excel 2007+ (``.xlsx``) files +can be read using either ``xlrd`` or ``openpyxl``. Binary Excel (``.xlsb``) +files can be read using ``pyxlsb``. +The :meth:`~DataFrame.to_excel` instance method is used for +saving a ``DataFrame`` to Excel. Generally the semantics are +similar to working with :ref:`csv` data. +See the :ref:`cookbook` for some advanced strategies. + +.. _io.excel_reader: + +Reading Excel files +''''''''''''''''''' + +In the most basic use-case, ``read_excel`` takes a path to an Excel +file, and the ``sheet_name`` indicating which sheet to parse. + +.. code-block:: python + + # Returns a DataFrame + pd.read_excel('path_to_file.xls', sheet_name='Sheet1') + + +.. _io.excel.excelfile_class: + +``ExcelFile`` class ++++++++++++++++++++ + +To facilitate working with multiple sheets from the same file, the ``ExcelFile`` +class can be used to wrap the file and can be passed into ``read_excel`` +There will be a performance benefit for reading multiple sheets as the file is +read into memory only once. + +.. code-block:: python + + xlsx = pd.ExcelFile('path_to_file.xls') + df = pd.read_excel(xlsx, 'Sheet1') + +The ``ExcelFile`` class can also be used as a context manager. + +.. code-block:: python + + with pd.ExcelFile('path_to_file.xls') as xls: + df1 = pd.read_excel(xls, 'Sheet1') + df2 = pd.read_excel(xls, 'Sheet2') + +The ``sheet_names`` property will generate +a list of the sheet names in the file. + +The primary use-case for an ``ExcelFile`` is parsing multiple sheets with +different parameters: + +.. code-block:: python + + data = {} + # For when Sheet1's format differs from Sheet2 + with pd.ExcelFile('path_to_file.xls') as xls: + data['Sheet1'] = pd.read_excel(xls, 'Sheet1', index_col=None, + na_values=['NA']) + data['Sheet2'] = pd.read_excel(xls, 'Sheet2', index_col=1) + +Note that if the same parsing parameters are used for all sheets, a list +of sheet names can simply be passed to ``read_excel`` with no loss in performance. + +.. code-block:: python + + # using the ExcelFile class + data = {} + with pd.ExcelFile('path_to_file.xls') as xls: + data['Sheet1'] = pd.read_excel(xls, 'Sheet1', index_col=None, + na_values=['NA']) + data['Sheet2'] = pd.read_excel(xls, 'Sheet2', index_col=None, + na_values=['NA']) + + # equivalent using the read_excel function + data = pd.read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'], + index_col=None, na_values=['NA']) + +``ExcelFile`` can also be called with a ``xlrd.book.Book`` object +as a parameter. This allows the user to control how the excel file is read. +For example, sheets can be loaded on demand by calling ``xlrd.open_workbook()`` +with ``on_demand=True``. + +.. code-block:: python + + import xlrd + xlrd_book = xlrd.open_workbook('path_to_file.xls', on_demand=True) + with pd.ExcelFile(xlrd_book) as xls: + df1 = pd.read_excel(xls, 'Sheet1') + df2 = pd.read_excel(xls, 'Sheet2') + +.. _io.excel.specifying_sheets: + +Specifying sheets ++++++++++++++++++ + +.. note :: The second argument is ``sheet_name``, not to be confused with ``ExcelFile.sheet_names``. + +.. note :: An ExcelFile's attribute ``sheet_names`` provides access to a list of sheets. + +* The arguments ``sheet_name`` allows specifying the sheet or sheets to read. +* The default value for ``sheet_name`` is 0, indicating to read the first sheet +* Pass a string to refer to the name of a particular sheet in the workbook. +* Pass an integer to refer to the index of a sheet. Indices follow Python + convention, beginning at 0. +* Pass a list of either strings or integers, to return a dictionary of specified sheets. +* Pass a ``None`` to return a dictionary of all available sheets. + +.. code-block:: python + + # Returns a DataFrame + pd.read_excel('path_to_file.xls', 'Sheet1', index_col=None, na_values=['NA']) + +Using the sheet index: + +.. code-block:: python + + # Returns a DataFrame + pd.read_excel('path_to_file.xls', 0, index_col=None, na_values=['NA']) + +Using all default values: + +.. code-block:: python + + # Returns a DataFrame + pd.read_excel('path_to_file.xls') + +Using None to get all sheets: + +.. code-block:: python + + # Returns a dictionary of DataFrames + pd.read_excel('path_to_file.xls', sheet_name=None) + +Using a list to get multiple sheets: + +.. code-block:: python + + # Returns the 1st and 4th sheet, as a dictionary of DataFrames. + pd.read_excel('path_to_file.xls', sheet_name=['Sheet1', 3]) + +``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either +a list of sheet names, a list of sheet positions, or ``None`` to read all sheets. +Sheets can be specified by sheet index or sheet name, using an integer or string, +respectively. + +.. _io.excel.reading_multiindex: + +Reading a ``MultiIndex`` +++++++++++++++++++++++++ + +``read_excel`` can read a ``MultiIndex`` index, by passing a list of columns to ``index_col`` +and a ``MultiIndex`` column by passing a list of rows to ``header``. If either the ``index`` +or ``columns`` have serialized level names those will be read in as well by specifying +the rows/columns that make up the levels. + +For example, to read in a ``MultiIndex`` index without names: + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}, + index=pd.MultiIndex.from_product([['a', 'b'], ['c', 'd']])) + df.to_excel('path_to_file.xlsx') + df = pd.read_excel('path_to_file.xlsx', index_col=[0, 1]) + df + +If the index has level names, they will parsed as well, using the same +parameters. + +.. ipython:: python + + df.index = df.index.set_names(['lvl1', 'lvl2']) + df.to_excel('path_to_file.xlsx') + df = pd.read_excel('path_to_file.xlsx', index_col=[0, 1]) + df + + +If the source file has both ``MultiIndex`` index and columns, lists specifying each +should be passed to ``index_col`` and ``header``: + +.. ipython:: python + + df.columns = pd.MultiIndex.from_product([['a'], ['b', 'd']], + names=['c1', 'c2']) + df.to_excel('path_to_file.xlsx') + df = pd.read_excel('path_to_file.xlsx', index_col=[0, 1], header=[0, 1]) + df + +.. ipython:: python + :suppress: + + os.remove('path_to_file.xlsx') + + +Parsing specific columns +++++++++++++++++++++++++ + +It is often the case that users will insert columns to do temporary computations +in Excel and you may not want to read in those columns. ``read_excel`` takes +a ``usecols`` keyword to allow you to specify a subset of columns to parse. + +.. deprecated:: 0.24.0 + +Passing in an integer for ``usecols`` has been deprecated. Please pass in a list +of ints from 0 to ``usecols`` inclusive instead. + +If ``usecols`` is an integer, then it is assumed to indicate the last column +to be parsed. + +.. code-block:: python + + pd.read_excel('path_to_file.xls', 'Sheet1', usecols=2) + +You can also specify a comma-delimited set of Excel columns and ranges as a string: + +.. code-block:: python + + pd.read_excel('path_to_file.xls', 'Sheet1', usecols='A,C:E') + +If ``usecols`` is a list of integers, then it is assumed to be the file column +indices to be parsed. + +.. code-block:: python + + pd.read_excel('path_to_file.xls', 'Sheet1', usecols=[0, 2, 3]) + +Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. + +.. versionadded:: 0.24 + +If ``usecols`` is a list of strings, it is assumed that each string corresponds +to a column name provided either by the user in ``names`` or inferred from the +document header row(s). Those strings define which columns will be parsed: + +.. code-block:: python + + pd.read_excel('path_to_file.xls', 'Sheet1', usecols=['foo', 'bar']) + +Element order is ignored, so ``usecols=['baz', 'joe']`` is the same as ``['joe', 'baz']``. + +.. versionadded:: 0.24 + +If ``usecols`` is callable, the callable function will be evaluated against +the column names, returning names where the callable function evaluates to ``True``. + +.. code-block:: python + + pd.read_excel('path_to_file.xls', 'Sheet1', usecols=lambda x: x.isalpha()) + +Parsing dates ++++++++++++++ + +Datetime-like values are normally automatically converted to the appropriate +dtype when reading the excel file. But if you have a column of strings that +*look* like dates (but are not actually formatted as dates in excel), you can +use the ``parse_dates`` keyword to parse those strings to datetimes: + +.. code-block:: python + + pd.read_excel('path_to_file.xls', 'Sheet1', parse_dates=['date_strings']) + + +Cell converters ++++++++++++++++ + +It is possible to transform the contents of Excel cells via the ``converters`` +option. For instance, to convert a column to boolean: + +.. code-block:: python + + pd.read_excel('path_to_file.xls', 'Sheet1', converters={'MyBools': bool}) + +This options handles missing values and treats exceptions in the converters +as missing data. Transformations are applied cell by cell rather than to the +column as a whole, so the array dtype is not guaranteed. For instance, a +column of integers with missing values cannot be transformed to an array +with integer dtype, because NaN is strictly a float. You can manually mask +missing data to recover integer dtype: + +.. code-block:: python + + def cfun(x): + return int(x) if x else -1 + + + pd.read_excel('path_to_file.xls', 'Sheet1', converters={'MyInts': cfun}) + +Dtype specifications +++++++++++++++++++++ + +As an alternative to converters, the type for an entire column can +be specified using the `dtype` keyword, which takes a dictionary +mapping column names to types. To interpret data with +no type inference, use the type ``str`` or ``object``. + +.. code-block:: python + + pd.read_excel('path_to_file.xls', dtype={'MyInts': 'int64', 'MyText': str}) + +.. _io.excel_writer: + +Writing Excel files +''''''''''''''''''' + +Writing Excel files to disk ++++++++++++++++++++++++++++ + +To write a ``DataFrame`` object to a sheet of an Excel file, you can use the +``to_excel`` instance method. The arguments are largely the same as ``to_csv`` +described above, the first argument being the name of the excel file, and the +optional second argument the name of the sheet to which the ``DataFrame`` should be +written. For example: + +.. code-block:: python + + df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') + +Files with a ``.xls`` extension will be written using ``xlwt`` and those with a +``.xlsx`` extension will be written using ``xlsxwriter`` (if available) or +``openpyxl``. + +The ``DataFrame`` will be written in a way that tries to mimic the REPL output. +The ``index_label`` will be placed in the second +row instead of the first. You can place it in the first row by setting the +``merge_cells`` option in ``to_excel()`` to ``False``: + +.. code-block:: python + + df.to_excel('path_to_file.xlsx', index_label='label', merge_cells=False) + +In order to write separate ``DataFrames`` to separate sheets in a single Excel file, +one can pass an :class:`~pandas.io.excel.ExcelWriter`. + +.. code-block:: python + + with pd.ExcelWriter('path_to_file.xlsx') as writer: + df1.to_excel(writer, sheet_name='Sheet1') + df2.to_excel(writer, sheet_name='Sheet2') + +.. note:: + + Wringing a little more performance out of ``read_excel`` + Internally, Excel stores all numeric data as floats. Because this can + produce unexpected behavior when reading in data, pandas defaults to trying + to convert integers to floats if it doesn't lose information (``1.0 --> + 1``). You can pass ``convert_float=False`` to disable this behavior, which + may give a slight performance improvement. + +.. _io.excel_writing_buffer: + +Writing Excel files to memory ++++++++++++++++++++++++++++++ + +Pandas supports writing Excel files to buffer-like objects such as ``StringIO`` or +``BytesIO`` using :class:`~pandas.io.excel.ExcelWriter`. + +.. code-block:: python + + # Safe import for either Python 2.x or 3.x + try: + from io import BytesIO + except ImportError: + from cStringIO import StringIO as BytesIO + + bio = BytesIO() + + # By setting the 'engine' in the ExcelWriter constructor. + writer = pd.ExcelWriter(bio, engine='xlsxwriter') + df.to_excel(writer, sheet_name='Sheet1') + + # Save the workbook + writer.save() + + # Seek to the beginning and read to copy the workbook to a variable in memory + bio.seek(0) + workbook = bio.read() + +.. note:: + + ``engine`` is optional but recommended. Setting the engine determines + the version of workbook produced. Setting ``engine='xlrd'`` will produce an + Excel 2003-format workbook (xls). Using either ``'openpyxl'`` or + ``'xlsxwriter'`` will produce an Excel 2007-format workbook (xlsx). If + omitted, an Excel 2007-formatted workbook is produced. + + +.. _io.excel.writers: + +Excel writer engines +'''''''''''''''''''' + +Pandas chooses an Excel writer via two methods: + +1. the ``engine`` keyword argument +2. the filename extension (via the default specified in config options) + +By default, pandas uses the `XlsxWriter`_ for ``.xlsx``, `openpyxl`_ +for ``.xlsm``, and `xlwt`_ for ``.xls`` files. If you have multiple +engines installed, you can set the default engine through :ref:`setting the +config options ` ``io.excel.xlsx.writer`` and +``io.excel.xls.writer``. pandas will fall back on `openpyxl`_ for ``.xlsx`` +files if `Xlsxwriter`_ is not available. + +.. _XlsxWriter: https://xlsxwriter.readthedocs.io +.. _openpyxl: https://openpyxl.readthedocs.io/ +.. _xlwt: http://www.python-excel.org + +To specify which writer you want to use, you can pass an engine keyword +argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: + +* ``openpyxl``: version 2.4 or higher is required +* ``xlsxwriter`` +* ``xlwt`` + +.. code-block:: python + + # By setting the 'engine' in the DataFrame 'to_excel()' methods. + df.to_excel('path_to_file.xlsx', sheet_name='Sheet1', engine='xlsxwriter') + + # By setting the 'engine' in the ExcelWriter constructor. + writer = pd.ExcelWriter('path_to_file.xlsx', engine='xlsxwriter') + + # Or via pandas configuration. + from pandas import options # noqa: E402 + options.io.excel.xlsx.writer = 'xlsxwriter' + + df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') + +.. _io.excel.style: + +Style and formatting +'''''''''''''''''''' + +The look and feel of Excel worksheets created from pandas can be modified using the following parameters on the ``DataFrame``'s ``to_excel`` method. + +* ``float_format`` : Format string for floating point numbers (default ``None``). +* ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will freeze the first row and first column (default ``None``). + +Using the `Xlsxwriter`_ engine provides many options for controlling the +format of an Excel worksheet created with the ``to_excel`` method. Excellent examples can be found in the +`Xlsxwriter`_ documentation here: https://xlsxwriter.readthedocs.io/working_with_pandas.html + +.. _io.ods: + +OpenDocument Spreadsheets +------------------------- + +.. versionadded:: 0.25 + +The :func:`~pandas.read_excel` method can also read OpenDocument spreadsheets +using the ``odfpy`` module. The semantics and features for reading +OpenDocument spreadsheets match what can be done for `Excel files`_ using +``engine='odf'``. + +.. code-block:: python + + # Returns a DataFrame + pd.read_excel('path_to_file.ods', engine='odf') + +.. note:: + + Currently pandas only supports *reading* OpenDocument spreadsheets. Writing + is not implemented. + +.. _io.xlsb: + +Binary Excel (.xlsb) files +-------------------------- + +.. versionadded:: 1.0.0 + +The :func:`~pandas.read_excel` method can also read binary Excel files +using the ``pyxlsb`` module. The semantics and features for reading +binary Excel files mostly match what can be done for `Excel files`_ using +``engine='pyxlsb'``. ``pyxlsb`` does not recognize datetime types +in files and will return floats instead. + +.. code-block:: python + + # Returns a DataFrame + pd.read_excel('path_to_file.xlsb', engine='pyxlsb') + +.. note:: + + Currently pandas only supports *reading* binary Excel files. Writing + is not implemented. + + +.. _io.clipboard: + +Clipboard +--------- + +A handy way to grab data is to use the :meth:`~DataFrame.read_clipboard` method, +which takes the contents of the clipboard buffer and passes them to the +``read_csv`` method. For instance, you can copy the following text to the +clipboard (CTRL-C on many operating systems): + +.. code-block:: console + + A B C + x 1 4 p + y 2 5 q + z 3 6 r + +And then import the data directly to a ``DataFrame`` by calling: + +.. code-block:: python + + >>> clipdf = pd.read_clipboard() + >>> clipdf + A B C + x 1 4 p + y 2 5 q + z 3 6 r + +The ``to_clipboard`` method can be used to write the contents of a ``DataFrame`` to +the clipboard. Following which you can paste the clipboard contents into other +applications (CTRL-V on many operating systems). Here we illustrate writing a +``DataFrame`` into clipboard and reading it back. + +.. code-block:: python + + >>> df = pd.DataFrame({'A': [1, 2, 3], + ... 'B': [4, 5, 6], + ... 'C': ['p', 'q', 'r']}, + ... index=['x', 'y', 'z']) + >>> df + A B C + x 1 4 p + y 2 5 q + z 3 6 r + >>> df.to_clipboard() + >>> pd.read_clipboard() + A B C + x 1 4 p + y 2 5 q + z 3 6 r + +We can see that we got the same content back, which we had earlier written to the clipboard. + +.. note:: + + You may need to install xclip or xsel (with PyQt5, PyQt4 or qtpy) on Linux to use these methods. + +.. _io.pickle: + +Pickling +-------- + +All pandas objects are equipped with ``to_pickle`` methods which use Python's +``cPickle`` module to save data structures to disk using the pickle format. + +.. ipython:: python + + df + df.to_pickle('foo.pkl') + +The ``read_pickle`` function in the ``pandas`` namespace can be used to load +any pickled pandas object (or any other pickled object) from file: + + +.. ipython:: python + + pd.read_pickle('foo.pkl') + +.. ipython:: python + :suppress: + + os.remove('foo.pkl') + +.. warning:: + + Loading pickled data received from untrusted sources can be unsafe. + + See: https://docs.python.org/3/library/pickle.html + +.. warning:: + + :func:`read_pickle` is only guaranteed backwards compatible back to pandas version 0.20.3 + +.. _io.pickle.compression: + +Compressed pickle files +''''''''''''''''''''''' + +:func:`read_pickle`, :meth:`DataFrame.to_pickle` and :meth:`Series.to_pickle` can read +and write compressed pickle files. The compression types of ``gzip``, ``bz2``, ``xz`` are supported for reading and writing. +The ``zip`` file format only supports reading and must contain only one data file +to be read. + +The compression type can be an explicit parameter or be inferred from the file extension. +If 'infer', then use ``gzip``, ``bz2``, ``zip``, or ``xz`` if filename ends in ``'.gz'``, ``'.bz2'``, ``'.zip'``, or +``'.xz'``, respectively. + +.. ipython:: python + + df = pd.DataFrame({ + 'A': np.random.randn(1000), + 'B': 'foo', + 'C': pd.date_range('20130101', periods=1000, freq='s')}) + df + +Using an explicit compression type: + +.. ipython:: python + + df.to_pickle("data.pkl.compress", compression="gzip") + rt = pd.read_pickle("data.pkl.compress", compression="gzip") + rt + +Inferring compression type from the extension: + +.. ipython:: python + + df.to_pickle("data.pkl.xz", compression="infer") + rt = pd.read_pickle("data.pkl.xz", compression="infer") + rt + +The default is to 'infer': + +.. ipython:: python + + df.to_pickle("data.pkl.gz") + rt = pd.read_pickle("data.pkl.gz") + rt + + df["A"].to_pickle("s1.pkl.bz2") + rt = pd.read_pickle("s1.pkl.bz2") + rt + +.. ipython:: python + :suppress: + + os.remove("data.pkl.compress") + os.remove("data.pkl.xz") + os.remove("data.pkl.gz") + os.remove("s1.pkl.bz2") + +.. _io.msgpack: + +msgpack +------- + +pandas support for ``msgpack`` has been removed in version 1.0.0. It is recommended to use pyarrow for on-the-wire transmission of pandas objects. + +Example pyarrow usage: + +.. code-block:: python + + >>> import pandas as pd + >>> import pyarrow as pa + >>> df = pd.DataFrame({'A': [1, 2, 3]}) + >>> context = pa.default_serialization_context() + >>> df_bytestring = context.serialize(df).to_buffer().to_pybytes() + +For documentation on pyarrow, see `here `__. + +.. _io.hdf5: + +HDF5 (PyTables) +--------------- + +``HDFStore`` is a dict-like object which reads and writes pandas using +the high performance HDF5 format using the excellent `PyTables +`__ library. See the :ref:`cookbook ` +for some advanced strategies + +.. warning:: + + pandas requires ``PyTables`` >= 3.0.0. + There is a indexing bug in ``PyTables`` < 3.2 which may appear when querying stores using an index. + If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2. + Stores created previously will need to be rewritten using the updated version. + +.. ipython:: python + :suppress: + :okexcept: + + os.remove('store.h5') + +.. ipython:: python + + store = pd.HDFStore('store.h5') + print(store) + +Objects can be written to the file just like adding key-value pairs to a +dict: + +.. ipython:: python + + index = pd.date_range('1/1/2000', periods=8) + s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) + df = pd.DataFrame(np.random.randn(8, 3), index=index, + columns=['A', 'B', 'C']) + + # store.put('s', s) is an equivalent method + store['s'] = s + + store['df'] = df + + store + +In a current or later Python session, you can retrieve stored objects: + +.. ipython:: python + + # store.get('df') is an equivalent method + store['df'] + + # dotted (attribute) access provides get as well + store.df + +Deletion of the object specified by the key: + +.. ipython:: python + + # store.remove('df') is an equivalent method + del store['df'] + + store + +Closing a Store and using a context manager: + +.. ipython:: python + + store.close() + store + store.is_open + + # Working with, and automatically closing the store using a context manager + with pd.HDFStore('store.h5') as store: + store.keys() + +.. ipython:: python + :suppress: + + store.close() + os.remove('store.h5') + + + +Read/write API +'''''''''''''' + +``HDFStore`` supports a top-level API using ``read_hdf`` for reading and ``to_hdf`` for writing, +similar to how ``read_csv`` and ``to_csv`` work. + +.. ipython:: python + + df_tl = pd.DataFrame({'A': list(range(5)), 'B': list(range(5))}) + df_tl.to_hdf('store_tl.h5', 'table', append=True) + pd.read_hdf('store_tl.h5', 'table', where=['index>2']) + +.. ipython:: python + :suppress: + :okexcept: + + os.remove('store_tl.h5') + + +HDFStore will by default not drop rows that are all missing. This behavior can be changed by setting ``dropna=True``. + + +.. ipython:: python + + df_with_missing = pd.DataFrame({'col1': [0, np.nan, 2], + 'col2': [1, np.nan, np.nan]}) + df_with_missing + + df_with_missing.to_hdf('file.h5', 'df_with_missing', + format='table', mode='w') + + pd.read_hdf('file.h5', 'df_with_missing') + + df_with_missing.to_hdf('file.h5', 'df_with_missing', + format='table', mode='w', dropna=True) + pd.read_hdf('file.h5', 'df_with_missing') + + +.. ipython:: python + :suppress: + + os.remove('file.h5') + + +.. _io.hdf5-fixed: + +Fixed format +'''''''''''' + +The examples above show storing using ``put``, which write the HDF5 to ``PyTables`` in a fixed array format, called +the ``fixed`` format. These types of stores are **not** appendable once written (though you can simply +remove them and rewrite). Nor are they **queryable**; they must be +retrieved in their entirety. They also do not support dataframes with non-unique column names. +The ``fixed`` format stores offer very fast writing and slightly faster reading than ``table`` stores. +This format is specified by default when using ``put`` or ``to_hdf`` or by ``format='fixed'`` or ``format='f'``. + +.. warning:: + + A ``fixed`` format will raise a ``TypeError`` if you try to retrieve using a ``where``: + + .. code-block:: python + + >>> pd.DataFrame(np.random.randn(10, 2)).to_hdf('test_fixed.h5', 'df') + >>> pd.read_hdf('test_fixed.h5', 'df', where='index>5') + TypeError: cannot pass a where specification when reading a fixed format. + this store must be selected in its entirety + + +.. _io.hdf5-table: + +Table format +'''''''''''' + +``HDFStore`` supports another ``PyTables`` format on disk, the ``table`` +format. Conceptually a ``table`` is shaped very much like a DataFrame, +with rows and columns. A ``table`` may be appended to in the same or +other sessions. In addition, delete and query type operations are +supported. This format is specified by ``format='table'`` or ``format='t'`` +to ``append`` or ``put`` or ``to_hdf``. + +This format can be set as an option as well ``pd.set_option('io.hdf.default_format','table')`` to +enable ``put/append/to_hdf`` to by default store in the ``table`` format. + +.. ipython:: python + :suppress: + :okexcept: + + os.remove('store.h5') + +.. ipython:: python + + store = pd.HDFStore('store.h5') + df1 = df[0:4] + df2 = df[4:] + + # append data (creates a table automatically) + store.append('df', df1) + store.append('df', df2) + store + + # select the entire object + store.select('df') + + # the type of stored data + store.root.df._v_attrs.pandas_type + +.. note:: + + You can also create a ``table`` by passing ``format='table'`` or ``format='t'`` to a ``put`` operation. + +.. _io.hdf5-keys: + +Hierarchical keys +''''''''''''''''' + +Keys to a store can be specified as a string. These can be in a +hierarchical path-name like format (e.g. ``foo/bar/bah``), which will +generate a hierarchy of sub-stores (or ``Groups`` in PyTables +parlance). Keys can be specified without the leading '/' and are **always** +absolute (e.g. 'foo' refers to '/foo'). Removal operations can remove +everything in the sub-store and **below**, so be *careful*. + +.. ipython:: python + + store.put('foo/bar/bah', df) + store.append('food/orange', df) + store.append('food/apple', df) + store + + # a list of keys are returned + store.keys() + + # remove all nodes under this level + store.remove('food') + store + + +You can walk through the group hierarchy using the ``walk`` method which +will yield a tuple for each group key along with the relative keys of its contents. + +.. versionadded:: 0.24.0 + + +.. ipython:: python + + for (path, subgroups, subkeys) in store.walk(): + for subgroup in subgroups: + print('GROUP: {}/{}'.format(path, subgroup)) + for subkey in subkeys: + key = '/'.join([path, subkey]) + print('KEY: {}'.format(key)) + print(store.get(key)) + + + +.. warning:: + + Hierarchical keys cannot be retrieved as dotted (attribute) access as described above for items stored under the root node. + + .. code-block:: ipython + + In [8]: store.foo.bar.bah + AttributeError: 'HDFStore' object has no attribute 'foo' + + # you can directly access the actual PyTables node but using the root node + In [9]: store.root.foo.bar.bah + Out[9]: + /foo/bar/bah (Group) '' + children := ['block0_items' (Array), 'block0_values' (Array), 'axis0' (Array), 'axis1' (Array)] + + Instead, use explicit string based keys: + + .. ipython:: python + + store['foo/bar/bah'] + + +.. _io.hdf5-types: + +Storing types +''''''''''''' + +Storing mixed types in a table +++++++++++++++++++++++++++++++ + +Storing mixed-dtype data is supported. Strings are stored as a +fixed-width using the maximum size of the appended column. Subsequent attempts +at appending longer strings will raise a ``ValueError``. + +Passing ``min_itemsize={`values`: size}`` as a parameter to append +will set a larger minimum for the string columns. Storing ``floats, +strings, ints, bools, datetime64`` are currently supported. For string +columns, passing ``nan_rep = 'nan'`` to append will change the default +nan representation on disk (which converts to/from `np.nan`), this +defaults to `nan`. + +.. ipython:: python + + df_mixed = pd.DataFrame({'A': np.random.randn(8), + 'B': np.random.randn(8), + 'C': np.array(np.random.randn(8), dtype='float32'), + 'string': 'string', + 'int': 1, + 'bool': True, + 'datetime64': pd.Timestamp('20010102')}, + index=list(range(8))) + df_mixed.loc[df_mixed.index[3:5], + ['A', 'B', 'string', 'datetime64']] = np.nan + + store.append('df_mixed', df_mixed, min_itemsize={'values': 50}) + df_mixed1 = store.select('df_mixed') + df_mixed1 + df_mixed1.dtypes.value_counts() + + # we have provided a minimum string column size + store.root.df_mixed.table + +Storing MultiIndex DataFrames ++++++++++++++++++++++++++++++ + +Storing MultiIndex ``DataFrames`` as tables is very similar to +storing/selecting from homogeneous index ``DataFrames``. + +.. ipython:: python + + index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['foo', 'bar']) + df_mi = pd.DataFrame(np.random.randn(10, 3), index=index, + columns=['A', 'B', 'C']) + df_mi + + store.append('df_mi', df_mi) + store.select('df_mi') + + # the levels are automatically included as data columns + store.select('df_mi', 'foo=bar') + +.. note:: + The ``index`` keyword is reserved and cannot be use as a level name. + +.. _io.hdf5-query: + +Querying +'''''''' + +Querying a table +++++++++++++++++ + +``select`` and ``delete`` operations have an optional criterion that can +be specified to select/delete only a subset of the data. This allows one +to have a very large on-disk table and retrieve only a portion of the +data. + +A query is specified using the ``Term`` class under the hood, as a boolean expression. + +* ``index`` and ``columns`` are supported indexers of ``DataFrames``. +* if ``data_columns`` are specified, these can be used as additional indexers. +* level name in a MultiIndex, with default name ``level_0``, ``level_1``, … if not provided. + +Valid comparison operators are: + +``=, ==, !=, >, >=, <, <=`` + +Valid boolean expressions are combined with: + +* ``|`` : or +* ``&`` : and +* ``(`` and ``)`` : for grouping + +These rules are similar to how boolean expressions are used in pandas for indexing. + +.. note:: + + - ``=`` will be automatically expanded to the comparison operator ``==`` + - ``~`` is the not operator, but can only be used in very limited + circumstances + - If a list/tuple of expressions is passed they will be combined via ``&`` + +The following are valid expressions: + +* ``'index >= date'`` +* ``"columns = ['A', 'D']"`` +* ``"columns in ['A', 'D']"`` +* ``'columns = A'`` +* ``'columns == A'`` +* ``"~(columns = ['A', 'B'])"`` +* ``'index > df.index[3] & string = "bar"'`` +* ``'(index > df.index[3] & index <= df.index[6]) | string = "bar"'`` +* ``"ts >= Timestamp('2012-02-01')"`` +* ``"major_axis>=20130101"`` + +The ``indexers`` are on the left-hand side of the sub-expression: + +``columns``, ``major_axis``, ``ts`` + +The right-hand side of the sub-expression (after a comparison operator) can be: + +* functions that will be evaluated, e.g. ``Timestamp('2012-02-01')`` +* strings, e.g. ``"bar"`` +* date-like, e.g. ``20130101``, or ``"20130101"`` +* lists, e.g. ``"['A', 'B']"`` +* variables that are defined in the local names space, e.g. ``date`` + +.. note:: + + Passing a string to a query by interpolating it into the query + expression is not recommended. Simply assign the string of interest to a + variable and use that variable in an expression. For example, do this + + .. code-block:: python + + string = "HolyMoly'" + store.select('df', 'index == string') + + instead of this + + .. code-block:: ipython + + string = "HolyMoly'" + store.select('df', 'index == %s' % string) + + The latter will **not** work and will raise a ``SyntaxError``.Note that + there's a single quote followed by a double quote in the ``string`` + variable. + + If you *must* interpolate, use the ``'%r'`` format specifier + + .. code-block:: python + + store.select('df', 'index == %r' % string) + + which will quote ``string``. + + +Here are some examples: + +.. ipython:: python + + dfq = pd.DataFrame(np.random.randn(10, 4), columns=list('ABCD'), + index=pd.date_range('20130101', periods=10)) + store.append('dfq', dfq, format='table', data_columns=True) + +Use boolean expressions, with in-line function evaluation. + +.. ipython:: python + + store.select('dfq', "index>pd.Timestamp('20130104') & columns=['A', 'B']") + +Use inline column reference. + +.. ipython:: python + + store.select('dfq', where="A>0 or C>0") + +The ``columns`` keyword can be supplied to select a list of columns to be +returned, this is equivalent to passing a +``'columns=list_of_columns_to_filter'``: + +.. ipython:: python + + store.select('df', "columns=['A', 'B']") + +``start`` and ``stop`` parameters can be specified to limit the total search +space. These are in terms of the total number of rows in a table. + +.. note:: + + ``select`` will raise a ``ValueError`` if the query expression has an unknown + variable reference. Usually this means that you are trying to select on a column + that is **not** a data_column. + + ``select`` will raise a ``SyntaxError`` if the query expression is not valid. + + +.. _io.hdf5-timedelta: + +Query timedelta64[ns] ++++++++++++++++++++++ + +You can store and query using the ``timedelta64[ns]`` type. Terms can be +specified in the format: ``()``, where float may be signed (and fractional), and unit can be +``D,s,ms,us,ns`` for the timedelta. Here's an example: + +.. ipython:: python + + from datetime import timedelta + dftd = pd.DataFrame({'A': pd.Timestamp('20130101'), + 'B': [pd.Timestamp('20130101') + timedelta(days=i, + seconds=10) + for i in range(10)]}) + dftd['C'] = dftd['A'] - dftd['B'] + dftd + store.append('dftd', dftd, data_columns=True) + store.select('dftd', "C<'-3.5D'") + +.. _io.query_multi: + +Query MultiIndex +++++++++++++++++ + +Selecting from a ``MultiIndex`` can be achieved by using the name of the level. + +.. ipython:: python + + df_mi.index.names + store.select('df_mi', "foo=baz and bar=two") + +If the ``MultiIndex`` levels names are ``None``, the levels are automatically made available via +the ``level_n`` keyword with ``n`` the level of the ``MultiIndex`` you want to select from. + +.. ipython:: python + + index = pd.MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + ) + df_mi_2 = pd.DataFrame(np.random.randn(10, 3), + index=index, columns=["A", "B", "C"]) + df_mi_2 + + store.append("df_mi_2", df_mi_2) + + # the levels are automatically included as data columns with keyword level_n + store.select("df_mi_2", "level_0=foo and level_1=two") + + +Indexing +++++++++ + +You can create/modify an index for a table with ``create_table_index`` +after data is already in the table (after and ``append/put`` +operation). Creating a table index is **highly** encouraged. This will +speed your queries a great deal when you use a ``select`` with the +indexed dimension as the ``where``. + +.. note:: + + Indexes are automagically created on the indexables + and any data columns you specify. This behavior can be turned off by passing + ``index=False`` to ``append``. + +.. ipython:: python + + # we have automagically already created an index (in the first section) + i = store.root.df.table.cols.index.index + i.optlevel, i.kind + + # change an index by passing new parameters + store.create_table_index('df', optlevel=9, kind='full') + i = store.root.df.table.cols.index.index + i.optlevel, i.kind + +Oftentimes when appending large amounts of data to a store, it is useful to turn off index creation for each append, then recreate at the end. + +.. ipython:: python + + df_1 = pd.DataFrame(np.random.randn(10, 2), columns=list('AB')) + df_2 = pd.DataFrame(np.random.randn(10, 2), columns=list('AB')) + + st = pd.HDFStore('appends.h5', mode='w') + st.append('df', df_1, data_columns=['B'], index=False) + st.append('df', df_2, data_columns=['B'], index=False) + st.get_storer('df').table + +Then create the index when finished appending. + +.. ipython:: python + + st.create_table_index('df', columns=['B'], optlevel=9, kind='full') + st.get_storer('df').table + + st.close() + +.. ipython:: python + :suppress: + :okexcept: + + os.remove('appends.h5') + +See `here `__ for how to create a completely-sorted-index (CSI) on an existing store. + +.. _io.hdf5-query-data-columns: + +Query via data columns +++++++++++++++++++++++ + +You can designate (and index) certain columns that you want to be able +to perform queries (other than the `indexable` columns, which you can +always query). For instance say you want to perform this common +operation, on-disk, and return just the frame that matches this +query. You can specify ``data_columns = True`` to force all columns to +be ``data_columns``. + +.. ipython:: python + + df_dc = df.copy() + df_dc['string'] = 'foo' + df_dc.loc[df_dc.index[4:6], 'string'] = np.nan + df_dc.loc[df_dc.index[7:9], 'string'] = 'bar' + df_dc['string2'] = 'cool' + df_dc.loc[df_dc.index[1:3], ['B', 'C']] = 1.0 + df_dc + + # on-disk operations + store.append('df_dc', df_dc, data_columns=['B', 'C', 'string', 'string2']) + store.select('df_dc', where='B > 0') + + # getting creative + store.select('df_dc', 'B > 0 & C > 0 & string == foo') + + # this is in-memory version of this type of selection + df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == 'foo')] + + # we have automagically created this index and the B/C/string/string2 + # columns are stored separately as ``PyTables`` columns + store.root.df_dc.table + +There is some performance degradation by making lots of columns into +`data columns`, so it is up to the user to designate these. In addition, +you cannot change data columns (nor indexables) after the first +append/put operation (Of course you can simply read in the data and +create a new table!). + +Iterator +++++++++ + +You can pass ``iterator=True`` or ``chunksize=number_in_a_chunk`` +to ``select`` and ``select_as_multiple`` to return an iterator on the results. +The default is 50,000 rows returned in a chunk. + +.. ipython:: python + + for df in store.select('df', chunksize=3): + print(df) + +.. note:: + + You can also use the iterator with ``read_hdf`` which will open, then + automatically close the store when finished iterating. + + .. code-block:: python + + for df in pd.read_hdf('store.h5', 'df', chunksize=3): + print(df) + +Note, that the chunksize keyword applies to the **source** rows. So if you +are doing a query, then the chunksize will subdivide the total rows in the table +and the query applied, returning an iterator on potentially unequal sized chunks. + +Here is a recipe for generating a query and using it to create equal sized return +chunks. + +.. ipython:: python + + dfeq = pd.DataFrame({'number': np.arange(1, 11)}) + dfeq + + store.append('dfeq', dfeq, data_columns=['number']) + + def chunks(l, n): + return [l[i:i + n] for i in range(0, len(l), n)] + + evens = [2, 4, 6, 8, 10] + coordinates = store.select_as_coordinates('dfeq', 'number=evens') + for c in chunks(coordinates, 2): + print(store.select('dfeq', where=c)) + +Advanced queries +++++++++++++++++ + +Select a single column +^^^^^^^^^^^^^^^^^^^^^^ + +To retrieve a single indexable or data column, use the +method ``select_column``. This will, for example, enable you to get the index +very quickly. These return a ``Series`` of the result, indexed by the row number. +These do not currently accept the ``where`` selector. + +.. ipython:: python + + store.select_column('df_dc', 'index') + store.select_column('df_dc', 'string') + +.. _io.hdf5-selecting_coordinates: + +Selecting coordinates +^^^^^^^^^^^^^^^^^^^^^ + +Sometimes you want to get the coordinates (a.k.a the index locations) of your query. This returns an +``Int64Index`` of the resulting locations. These coordinates can also be passed to subsequent +``where`` operations. + +.. ipython:: python + + df_coord = pd.DataFrame(np.random.randn(1000, 2), + index=pd.date_range('20000101', periods=1000)) + store.append('df_coord', df_coord) + c = store.select_as_coordinates('df_coord', 'index > 20020101') + c + store.select('df_coord', where=c) + +.. _io.hdf5-where_mask: + +Selecting using a where mask +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Sometime your query can involve creating a list of rows to select. Usually this ``mask`` would +be a resulting ``index`` from an indexing operation. This example selects the months of +a datetimeindex which are 5. + +.. ipython:: python + + df_mask = pd.DataFrame(np.random.randn(1000, 2), + index=pd.date_range('20000101', periods=1000)) + store.append('df_mask', df_mask) + c = store.select_column('df_mask', 'index') + where = c[pd.DatetimeIndex(c).month == 5].index + store.select('df_mask', where=where) + +Storer object +^^^^^^^^^^^^^ + +If you want to inspect the stored object, retrieve via +``get_storer``. You could use this programmatically to say get the number +of rows in an object. + +.. ipython:: python + + store.get_storer('df_dc').nrows + + +Multiple table queries +++++++++++++++++++++++ + +The methods ``append_to_multiple`` and +``select_as_multiple`` can perform appending/selecting from +multiple tables at once. The idea is to have one table (call it the +selector table) that you index most/all of the columns, and perform your +queries. The other table(s) are data tables with an index matching the +selector table's index. You can then perform a very fast query +on the selector table, yet get lots of data back. This method is similar to +having a very wide table, but enables more efficient queries. + +The ``append_to_multiple`` method splits a given single DataFrame +into multiple tables according to ``d``, a dictionary that maps the +table names to a list of 'columns' you want in that table. If `None` +is used in place of a list, that table will have the remaining +unspecified columns of the given DataFrame. The argument ``selector`` +defines which table is the selector table (which you can make queries from). +The argument ``dropna`` will drop rows from the input ``DataFrame`` to ensure +tables are synchronized. This means that if a row for one of the tables +being written to is entirely ``np.NaN``, that row will be dropped from all tables. + +If ``dropna`` is False, **THE USER IS RESPONSIBLE FOR SYNCHRONIZING THE TABLES**. +Remember that entirely ``np.Nan`` rows are not written to the HDFStore, so if +you choose to call ``dropna=False``, some tables may have more rows than others, +and therefore ``select_as_multiple`` may not work or it may return unexpected +results. + +.. ipython:: python + + df_mt = pd.DataFrame(np.random.randn(8, 6), + index=pd.date_range('1/1/2000', periods=8), + columns=['A', 'B', 'C', 'D', 'E', 'F']) + df_mt['foo'] = 'bar' + df_mt.loc[df_mt.index[1], ('A', 'B')] = np.nan + + # you can also create the tables individually + store.append_to_multiple({'df1_mt': ['A', 'B'], 'df2_mt': None}, + df_mt, selector='df1_mt') + store + + # individual tables were created + store.select('df1_mt') + store.select('df2_mt') + + # as a multiple + store.select_as_multiple(['df1_mt', 'df2_mt'], where=['A>0', 'B>0'], + selector='df1_mt') + + +Delete from a table +''''''''''''''''''' + +You can delete from a table selectively by specifying a ``where``. In +deleting rows, it is important to understand the ``PyTables`` deletes +rows by erasing the rows, then **moving** the following data. Thus +deleting can potentially be a very expensive operation depending on the +orientation of your data. To get optimal performance, it's +worthwhile to have the dimension you are deleting be the first of the +``indexables``. + +Data is ordered (on the disk) in terms of the ``indexables``. Here's a +simple use case. You store panel-type data, with dates in the +``major_axis`` and ids in the ``minor_axis``. The data is then +interleaved like this: + +* date_1 + * id_1 + * id_2 + * . + * id_n +* date_2 + * id_1 + * . + * id_n + +It should be clear that a delete operation on the ``major_axis`` will be +fairly quick, as one chunk is removed, then the following data moved. On +the other hand a delete operation on the ``minor_axis`` will be very +expensive. In this case it would almost certainly be faster to rewrite +the table using a ``where`` that selects all but the missing data. + +.. warning:: + + Please note that HDF5 **DOES NOT RECLAIM SPACE** in the h5 files + automatically. Thus, repeatedly deleting (or removing nodes) and adding + again, **WILL TEND TO INCREASE THE FILE SIZE**. + + To *repack and clean* the file, use :ref:`ptrepack `. + +.. _io.hdf5-notes: + +Notes & caveats +''''''''''''''' + + +Compression ++++++++++++ + +``PyTables`` allows the stored data to be compressed. This applies to +all kinds of stores, not just tables. Two parameters are used to +control compression: ``complevel`` and ``complib``. + +``complevel`` specifies if and how hard data is to be compressed. + ``complevel=0`` and ``complevel=None`` disables + compression and ``0`_: The default compression library. A classic in terms of compression, achieves good compression rates but is somewhat slow. + - `lzo `_: Fast compression and decompression. + - `bzip2 `_: Good compression rates. + - `blosc `_: Fast compression and decompression. + + Support for alternative blosc compressors: + + - `blosc:blosclz `_ This is the + default compressor for ``blosc`` + - `blosc:lz4 + `_: + A compact, very popular and fast compressor. + - `blosc:lz4hc + `_: + A tweaked version of LZ4, produces better + compression ratios at the expense of speed. + - `blosc:snappy `_: + A popular compressor used in many places. + - `blosc:zlib `_: A classic; + somewhat slower than the previous ones, but + achieving better compression ratios. + - `blosc:zstd `_: An + extremely well balanced codec; it provides the best + compression ratios among the others above, and at + reasonably fast speed. + + If ``complib`` is defined as something other than the + listed libraries a ``ValueError`` exception is issued. + +.. note:: + + If the library specified with the ``complib`` option is missing on your platform, + compression defaults to ``zlib`` without further ado. + +Enable compression for all objects within the file: + +.. code-block:: python + + store_compressed = pd.HDFStore('store_compressed.h5', complevel=9, + complib='blosc:blosclz') + +Or on-the-fly compression (this only applies to tables) in stores where compression is not enabled: + +.. code-block:: python + + store.append('df', df, complib='zlib', complevel=5) + +.. _io.hdf5-ptrepack: + +ptrepack +++++++++ + +``PyTables`` offers better write performance when tables are compressed after +they are written, as opposed to turning on compression at the very +beginning. You can use the supplied ``PyTables`` utility +``ptrepack``. In addition, ``ptrepack`` can change compression levels +after the fact. + +.. code-block:: console + + ptrepack --chunkshape=auto --propindexes --complevel=9 --complib=blosc in.h5 out.h5 + +Furthermore ``ptrepack in.h5 out.h5`` will *repack* the file to allow +you to reuse previously deleted space. Alternatively, one can simply +remove the file and write again, or use the ``copy`` method. + +.. _io.hdf5-caveats: + +Caveats ++++++++ + +.. warning:: + + ``HDFStore`` is **not-threadsafe for writing**. The underlying + ``PyTables`` only supports concurrent reads (via threading or + processes). If you need reading and writing *at the same time*, you + need to serialize these operations in a single thread in a single + process. You will corrupt your data otherwise. See the (:issue:`2397`) for more information. + +* If you use locks to manage write access between multiple processes, you + may want to use :py:func:`~os.fsync` before releasing write locks. For + convenience you can use ``store.flush(fsync=True)`` to do this for you. +* Once a ``table`` is created columns (DataFrame) + are fixed; only exactly the same columns can be appended +* Be aware that timezones (e.g., ``pytz.timezone('US/Eastern')``) + are not necessarily equal across timezone versions. So if data is + localized to a specific timezone in the HDFStore using one version + of a timezone library and that data is updated with another version, the data + will be converted to UTC since these timezones are not considered + equal. Either use the same version of timezone library or use ``tz_convert`` with + the updated timezone definition. + +.. warning:: + + ``PyTables`` will show a ``NaturalNameWarning`` if a column name + cannot be used as an attribute selector. + *Natural* identifiers contain only letters, numbers, and underscores, + and may not begin with a number. + Other identifiers cannot be used in a ``where`` clause + and are generally a bad idea. + +.. _io.hdf5-data_types: + +DataTypes +''''''''' + +``HDFStore`` will map an object dtype to the ``PyTables`` underlying +dtype. This means the following types are known to work: + +====================================================== ========================= +Type Represents missing values +====================================================== ========================= +floating : ``float64, float32, float16`` ``np.nan`` +integer : ``int64, int32, int8, uint64,uint32, uint8`` +boolean +``datetime64[ns]`` ``NaT`` +``timedelta64[ns]`` ``NaT`` +categorical : see the section below +object : ``strings`` ``np.nan`` +====================================================== ========================= + +``unicode`` columns are not supported, and **WILL FAIL**. + +.. _io.hdf5-categorical: + +Categorical data +++++++++++++++++ + +You can write data that contains ``category`` dtypes to a ``HDFStore``. +Queries work the same as if it was an object array. However, the ``category`` dtyped data is +stored in a more efficient manner. + +.. ipython:: python + + dfcat = pd.DataFrame({'A': pd.Series(list('aabbcdba')).astype('category'), + 'B': np.random.randn(8)}) + dfcat + dfcat.dtypes + cstore = pd.HDFStore('cats.h5', mode='w') + cstore.append('dfcat', dfcat, format='table', data_columns=['A']) + result = cstore.select('dfcat', where="A in ['b', 'c']") + result + result.dtypes + +.. ipython:: python + :suppress: + :okexcept: + + cstore.close() + os.remove('cats.h5') + + +String columns +++++++++++++++ + +**min_itemsize** + +The underlying implementation of ``HDFStore`` uses a fixed column width (itemsize) for string columns. +A string column itemsize is calculated as the maximum of the +length of data (for that column) that is passed to the ``HDFStore``, **in the first append**. Subsequent appends, +may introduce a string for a column **larger** than the column can hold, an Exception will be raised (otherwise you +could have a silent truncation of these columns, leading to loss of information). In the future we may relax this and +allow a user-specified truncation to occur. + +Pass ``min_itemsize`` on the first table creation to a-priori specify the minimum length of a particular string column. +``min_itemsize`` can be an integer, or a dict mapping a column name to an integer. You can pass ``values`` as a key to +allow all *indexables* or *data_columns* to have this min_itemsize. + +Passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically. + +.. note:: + + If you are not passing any ``data_columns``, then the ``min_itemsize`` will be the maximum of the length of any string passed + +.. ipython:: python + + dfs = pd.DataFrame({'A': 'foo', 'B': 'bar'}, index=list(range(5))) + dfs + + # A and B have a size of 30 + store.append('dfs', dfs, min_itemsize=30) + store.get_storer('dfs').table + + # A is created as a data_column with a size of 30 + # B is size is calculated + store.append('dfs2', dfs, min_itemsize={'A': 30}) + store.get_storer('dfs2').table + +**nan_rep** + +String columns will serialize a ``np.nan`` (a missing value) with the ``nan_rep`` string representation. This defaults to the string value ``nan``. +You could inadvertently turn an actual ``nan`` value into a missing value. + +.. ipython:: python + + dfss = pd.DataFrame({'A': ['foo', 'bar', 'nan']}) + dfss + + store.append('dfss', dfss) + store.select('dfss') + + # here you need to specify a different nan rep + store.append('dfss2', dfss, nan_rep='_nan_') + store.select('dfss2') + +.. _io.external_compatibility: + +External compatibility +'''''''''''''''''''''' + +``HDFStore`` writes ``table`` format objects in specific formats suitable for +producing loss-less round trips to pandas objects. For external +compatibility, ``HDFStore`` can read native ``PyTables`` format +tables. + +It is possible to write an ``HDFStore`` object that can easily be imported into ``R`` using the +``rhdf5`` library (`Package website`_). Create a table format store like this: + +.. _package website: https://www.bioconductor.org/packages/release/bioc/html/rhdf5.html + +.. ipython:: python + + df_for_r = pd.DataFrame({"first": np.random.rand(100), + "second": np.random.rand(100), + "class": np.random.randint(0, 2, (100, ))}, + index=range(100)) + df_for_r.head() + + store_export = pd.HDFStore('export.h5') + store_export.append('df_for_r', df_for_r, data_columns=df_dc.columns) + store_export + +.. ipython:: python + :suppress: + + store_export.close() + os.remove('export.h5') + +In R this file can be read into a ``data.frame`` object using the ``rhdf5`` +library. The following example function reads the corresponding column names +and data values from the values and assembles them into a ``data.frame``: + +.. code-block:: R + + # Load values and column names for all datasets from corresponding nodes and + # insert them into one data.frame object. + + library(rhdf5) + + loadhdf5data <- function(h5File) { + + listing <- h5ls(h5File) + # Find all data nodes, values are stored in *_values and corresponding column + # titles in *_items + data_nodes <- grep("_values", listing$name) + name_nodes <- grep("_items", listing$name) + data_paths = paste(listing$group[data_nodes], listing$name[data_nodes], sep = "/") + name_paths = paste(listing$group[name_nodes], listing$name[name_nodes], sep = "/") + columns = list() + for (idx in seq(data_paths)) { + # NOTE: matrices returned by h5read have to be transposed to obtain + # required Fortran order! + data <- data.frame(t(h5read(h5File, data_paths[idx]))) + names <- t(h5read(h5File, name_paths[idx])) + entry <- data.frame(data) + colnames(entry) <- names + columns <- append(columns, entry) + } + + data <- data.frame(columns) + + return(data) + } + +Now you can import the ``DataFrame`` into R: + +.. code-block:: R + + > data = loadhdf5data("transfer.hdf5") + > head(data) + first second class + 1 0.4170220047 0.3266449 0 + 2 0.7203244934 0.5270581 0 + 3 0.0001143748 0.8859421 1 + 4 0.3023325726 0.3572698 1 + 5 0.1467558908 0.9085352 1 + 6 0.0923385948 0.6233601 1 + +.. note:: + The R function lists the entire HDF5 file's contents and assembles the + ``data.frame`` object from all matching nodes, so use this only as a + starting point if you have stored multiple ``DataFrame`` objects to a + single HDF5 file. + + +Performance +''''''''''' + +* ``tables`` format come with a writing performance penalty as compared to + ``fixed`` stores. The benefit is the ability to append/delete and + query (potentially very large amounts of data). Write times are + generally longer as compared with regular stores. Query times can + be quite fast, especially on an indexed axis. +* You can pass ``chunksize=`` to ``append``, specifying the + write chunksize (default is 50000). This will significantly lower + your memory usage on writing. +* You can pass ``expectedrows=`` to the first ``append``, + to set the TOTAL number of rows that ``PyTables`` will expect. + This will optimize read/write performance. +* Duplicate rows can be written to tables, but are filtered out in + selection (with the last items being selected; thus a table is + unique on major, minor pairs) +* A ``PerformanceWarning`` will be raised if you are attempting to + store types that will be pickled by PyTables (rather than stored as + endemic types). See + `Here `__ + for more information and some solutions. + + +.. ipython:: python + :suppress: + + store.close() + os.remove('store.h5') + + +.. _io.feather: + +Feather +------- + +Feather provides binary columnar serialization for data frames. It is designed to make reading and writing data +frames efficient, and to make sharing data across data analysis languages easy. + +Feather is designed to faithfully serialize and de-serialize DataFrames, supporting all of the pandas +dtypes, including extension dtypes such as categorical and datetime with tz. + +Several caveats. + +* This is a newer library, and the format, though stable, is not guaranteed to be backward compatible + to the earlier versions. +* The format will NOT write an ``Index``, or ``MultiIndex`` for the + ``DataFrame`` and will raise an error if a non-default one is provided. You + can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to + ignore it. +* Duplicate column names and non-string columns names are not supported +* Non supported types include ``Period`` and actual Python object types. These will raise a helpful error message + on an attempt at serialization. + +See the `Full Documentation `__. + +.. ipython:: python + :suppress: + + import warnings + # This can be removed once building with pyarrow >=0.15.0 + warnings.filterwarnings("ignore", "The Sparse", FutureWarning) + + +.. ipython:: python + + df = pd.DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.Categorical(list('abc')), + 'g': pd.date_range('20130101', periods=3), + 'h': pd.date_range('20130101', periods=3, tz='US/Eastern'), + 'i': pd.date_range('20130101', periods=3, freq='ns')}) + + df + df.dtypes + +Write to a feather file. + +.. ipython:: python + + df.to_feather('example.feather') + +Read from a feather file. + +.. ipython:: python + + result = pd.read_feather('example.feather') + result + + # we preserve dtypes + result.dtypes + +.. ipython:: python + :suppress: + + os.remove('example.feather') + + +.. _io.parquet: + +Parquet +------- + +.. versionadded:: 0.21.0 + +`Apache Parquet `__ provides a partitioned binary columnar serialization for data frames. It is designed to +make reading and writing data frames efficient, and to make sharing data across data analysis +languages easy. Parquet can use a variety of compression techniques to shrink the file size as much as possible +while still maintaining good read performance. + +Parquet is designed to faithfully serialize and de-serialize ``DataFrame`` s, supporting all of the pandas +dtypes, including extension dtypes such as datetime with tz. + +Several caveats. + +* Duplicate column names and non-string columns names are not supported. +* The ``pyarrow`` engine always writes the index to the output, but ``fastparquet`` only writes non-default + indexes. This extra column can cause problems for non-Pandas consumers that are not expecting it. You can + force including or omitting indexes with the ``index`` argument, regardless of the underlying engine. +* Index level names, if specified, must be strings. +* In the ``pyarrow`` engine, categorical dtypes for non-string types can be serialized to parquet, but will de-serialize as their primitive dtype. +* The ``pyarrow`` engine preserves the ``ordered`` flag of categorical dtypes with string types. ``fastparquet`` does not preserve the ``ordered`` flag. +* Non supported types include ``Interval`` and actual Python object types. These will raise a helpful error message + on an attempt at serialization. ``Period`` type is supported with pyarrow >= 0.16.0. +* The ``pyarrow`` engine preserves extension data types such as the nullable integer and string data + type (requiring pyarrow >= 0.16.0, and requiring the extension type to implement the needed protocols, + see the :ref:`extension types documentation `). + +You can specify an ``engine`` to direct the serialization. This can be one of ``pyarrow``, or ``fastparquet``, or ``auto``. +If the engine is NOT specified, then the ``pd.options.io.parquet.engine`` option is checked; if this is also ``auto``, +then ``pyarrow`` is tried, and falling back to ``fastparquet``. + +See the documentation for `pyarrow `__ and `fastparquet `__. + +.. note:: + + These engines are very similar and should read/write nearly identical parquet format files. + Currently ``pyarrow`` does not support timedelta data, ``fastparquet>=0.1.4`` supports timezone aware datetimes. + These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library). + +.. ipython:: python + + df = pd.DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.date_range('20130101', periods=3), + 'g': pd.date_range('20130101', periods=3, tz='US/Eastern'), + 'h': pd.Categorical(list('abc')), + 'i': pd.Categorical(list('abc'), ordered=True)}) + + df + df.dtypes + +Write to a parquet file. + +.. ipython:: python + :okwarning: + + df.to_parquet('example_pa.parquet', engine='pyarrow') + df.to_parquet('example_fp.parquet', engine='fastparquet') + +Read from a parquet file. + +.. ipython:: python + + result = pd.read_parquet('example_fp.parquet', engine='fastparquet') + result = pd.read_parquet('example_pa.parquet', engine='pyarrow') + + result.dtypes + +Read only certain columns of a parquet file. + +.. ipython:: python + + result = pd.read_parquet('example_fp.parquet', + engine='fastparquet', columns=['a', 'b']) + result = pd.read_parquet('example_pa.parquet', + engine='pyarrow', columns=['a', 'b']) + result.dtypes + + +.. ipython:: python + :suppress: + + os.remove('example_pa.parquet') + os.remove('example_fp.parquet') + + +Handling indexes +'''''''''''''''' + +Serializing a ``DataFrame`` to parquet may include the implicit index as one or +more columns in the output file. Thus, this code: + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + df.to_parquet('test.parquet', engine='pyarrow') + +creates a parquet file with *three* columns if you use ``pyarrow`` for serialization: +``a``, ``b``, and ``__index_level_0__``. If you're using ``fastparquet``, the +index `may or may not `_ +be written to the file. + +This unexpected extra column causes some databases like Amazon Redshift to reject +the file, because that column doesn't exist in the target table. + +If you want to omit a dataframe's indexes when writing, pass ``index=False`` to +:func:`~pandas.DataFrame.to_parquet`: + +.. ipython:: python + + df.to_parquet('test.parquet', index=False) + +This creates a parquet file with just the two expected columns, ``a`` and ``b``. +If your ``DataFrame`` has a custom index, you won't get it back when you load +this file into a ``DataFrame``. + +Passing ``index=True`` will *always* write the index, even if that's not the +underlying engine's default behavior. + +.. ipython:: python + :suppress: + + os.remove('test.parquet') + + +Partitioning Parquet files +'''''''''''''''''''''''''' + +.. versionadded:: 0.24.0 + +Parquet supports partitioning of data based on the values of one or more columns. + +.. ipython:: python + + df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': [0, 1, 0, 1]}) + df.to_parquet(path='test', engine='pyarrow', + partition_cols=['a'], compression=None) + +The `path` specifies the parent directory to which data will be saved. +The `partition_cols` are the column names by which the dataset will be partitioned. +Columns are partitioned in the order they are given. The partition splits are +determined by the unique values in the partition columns. +The above example creates a partitioned dataset that may look like: + +.. code-block:: text + + test + ├── a=0 + │ ├── 0bac803e32dc42ae83fddfd029cbdebc.parquet + │ └── ... + └── a=1 + ├── e6ab24a4f45147b49b54a662f0c412a3.parquet + └── ... + +.. ipython:: python + :suppress: + + from shutil import rmtree + try: + rmtree('test') + except OSError: + pass + +.. _io.orc: + +ORC +--- + +.. versionadded:: 1.0.0 + +Similar to the :ref:`parquet ` format, the `ORC Format `__ is a binary columnar serialization +for data frames. It is designed to make reading data frames efficient. Pandas provides *only* a reader for the +ORC format, :func:`~pandas.read_orc`. This requires the `pyarrow `__ library. + +.. _io.sql: + +SQL queries +----------- + +The :mod:`pandas.io.sql` module provides a collection of query wrappers to both +facilitate data retrieval and to reduce dependency on DB-specific API. Database abstraction +is provided by SQLAlchemy if installed. In addition you will need a driver library for +your database. Examples of such drivers are `psycopg2 `__ +for PostgreSQL or `pymysql `__ for MySQL. +For `SQLite `__ this is +included in Python's standard library by default. +You can find an overview of supported drivers for each SQL dialect in the +`SQLAlchemy docs `__. + +If SQLAlchemy is not installed, a fallback is only provided for sqlite (and +for mysql for backwards compatibility, but this is deprecated and will be +removed in a future version). +This mode requires a Python database adapter which respect the `Python +DB-API `__. + +See also some :ref:`cookbook examples ` for some advanced strategies. + +The key functions are: + +.. autosummary:: + + read_sql_table + read_sql_query + read_sql + DataFrame.to_sql + +.. note:: + + The function :func:`~pandas.read_sql` is a convenience wrapper around + :func:`~pandas.read_sql_table` and :func:`~pandas.read_sql_query` (and for + backward compatibility) and will delegate to specific function depending on + the provided input (database table name or sql query). + Table names do not need to be quoted if they have special characters. + +In the following example, we use the `SQlite `__ SQL database +engine. You can use a temporary SQLite database where data are stored in +"memory". + +To connect with SQLAlchemy you use the :func:`create_engine` function to create an engine +object from database URI. You only need to create the engine once per database you are +connecting to. +For more information on :func:`create_engine` and the URI formatting, see the examples +below and the SQLAlchemy `documentation `__ + +.. ipython:: python + + from sqlalchemy import create_engine + # Create your engine. + engine = create_engine('sqlite:///:memory:') + +If you want to manage your own connections you can pass one of those instead: + +.. code-block:: python + + with engine.connect() as conn, conn.begin(): + data = pd.read_sql_table('data', conn) + +Writing DataFrames +'''''''''''''''''' + +Assuming the following data is in a ``DataFrame`` ``data``, we can insert it into +the database using :func:`~pandas.DataFrame.to_sql`. + ++-----+------------+-------+-------+-------+ +| id | Date | Col_1 | Col_2 | Col_3 | ++=====+============+=======+=======+=======+ +| 26 | 2012-10-18 | X | 25.7 | True | ++-----+------------+-------+-------+-------+ +| 42 | 2012-10-19 | Y | -12.4 | False | ++-----+------------+-------+-------+-------+ +| 63 | 2012-10-20 | Z | 5.73 | True | ++-----+------------+-------+-------+-------+ + + +.. ipython:: python + :suppress: + + import datetime + c = ['id', 'Date', 'Col_1', 'Col_2', 'Col_3'] + d = [(26, datetime.datetime(2010, 10, 18), 'X', 27.5, True), + (42, datetime.datetime(2010, 10, 19), 'Y', -12.5, False), + (63, datetime.datetime(2010, 10, 20), 'Z', 5.73, True)] + + data = pd.DataFrame(d, columns=c) + +.. ipython:: python + + data + data.to_sql('data', engine) + +With some databases, writing large DataFrames can result in errors due to +packet size limitations being exceeded. This can be avoided by setting the +``chunksize`` parameter when calling ``to_sql``. For example, the following +writes ``data`` to the database in batches of 1000 rows at a time: + +.. ipython:: python + + data.to_sql('data_chunked', engine, chunksize=1000) + +SQL data types +++++++++++++++ + +:func:`~pandas.DataFrame.to_sql` will try to map your data to an appropriate +SQL data type based on the dtype of the data. When you have columns of dtype +``object``, pandas will try to infer the data type. + +You can always override the default type by specifying the desired SQL type of +any of the columns by using the ``dtype`` argument. This argument needs a +dictionary mapping column names to SQLAlchemy types (or strings for the sqlite3 +fallback mode). +For example, specifying to use the sqlalchemy ``String`` type instead of the +default ``Text`` type for string columns: + +.. ipython:: python + + from sqlalchemy.types import String + data.to_sql('data_dtype', engine, dtype={'Col_1': String}) + +.. note:: + + Due to the limited support for timedelta's in the different database + flavors, columns with type ``timedelta64`` will be written as integer + values as nanoseconds to the database and a warning will be raised. + +.. note:: + + Columns of ``category`` dtype will be converted to the dense representation + as you would get with ``np.asarray(categorical)`` (e.g. for string categories + this gives an array of strings). + Because of this, reading the database table back in does **not** generate + a categorical. + +.. _io.sql_datetime_data: + +Datetime data types +''''''''''''''''''' + +Using SQLAlchemy, :func:`~pandas.DataFrame.to_sql` is capable of writing +datetime data that is timezone naive or timezone aware. However, the resulting +data stored in the database ultimately depends on the supported data type +for datetime data of the database system being used. + +The following table lists supported data types for datetime data for some +common databases. Other database dialects may have different data types for +datetime data. + +=========== ============================================= =================== +Database SQL Datetime Types Timezone Support +=========== ============================================= =================== +SQLite ``TEXT`` No +MySQL ``TIMESTAMP`` or ``DATETIME`` No +PostgreSQL ``TIMESTAMP`` or ``TIMESTAMP WITH TIME ZONE`` Yes +=========== ============================================= =================== + +When writing timezone aware data to databases that do not support timezones, +the data will be written as timezone naive timestamps that are in local time +with respect to the timezone. + +:func:`~pandas.read_sql_table` is also capable of reading datetime data that is +timezone aware or naive. When reading ``TIMESTAMP WITH TIME ZONE`` types, pandas +will convert the data to UTC. + +.. _io.sql.method: + +Insertion method +++++++++++++++++ + +.. versionadded:: 0.24.0 + +The parameter ``method`` controls the SQL insertion clause used. +Possible values are: + +- ``None``: Uses standard SQL ``INSERT`` clause (one per row). +- ``'multi'``: Pass multiple values in a single ``INSERT`` clause. + It uses a *special* SQL syntax not supported by all backends. + This usually provides better performance for analytic databases + like *Presto* and *Redshift*, but has worse performance for + traditional SQL backend if the table contains many columns. + For more information check the SQLAlchemy `documention + `__. +- callable with signature ``(pd_table, conn, keys, data_iter)``: + This can be used to implement a more performant insertion method based on + specific backend dialect features. + +Example of a callable using PostgreSQL `COPY clause +`__:: + + # Alternative to_sql() *method* for DBs that support COPY FROM + import csv + from io import StringIO + + def psql_insert_copy(table, conn, keys, data_iter): + """ + Execute SQL statement inserting data + + Parameters + ---------- + table : pandas.io.sql.SQLTable + conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection + keys : list of str + Column names + data_iter : Iterable that iterates the values to be inserted + """ + # gets a DBAPI connection that can provide a cursor + dbapi_conn = conn.connection + with dbapi_conn.cursor() as cur: + s_buf = StringIO() + writer = csv.writer(s_buf) + writer.writerows(data_iter) + s_buf.seek(0) + + columns = ', '.join('"{}"'.format(k) for k in keys) + if table.schema: + table_name = '{}.{}'.format(table.schema, table.name) + else: + table_name = table.name + + sql = 'COPY {} ({}) FROM STDIN WITH CSV'.format( + table_name, columns) + cur.copy_expert(sql=sql, file=s_buf) + +Reading tables +'''''''''''''' + +:func:`~pandas.read_sql_table` will read a database table given the +table name and optionally a subset of columns to read. + +.. note:: + + In order to use :func:`~pandas.read_sql_table`, you **must** have the + SQLAlchemy optional dependency installed. + +.. ipython:: python + + pd.read_sql_table('data', engine) + +.. note:: + + Note that pandas infers column dtypes from query outputs, and not by looking + up data types in the physical database schema. For example, assume ``userid`` + is an integer column in a table. Then, intuitively, ``select userid ...`` will + return integer-valued series, while ``select cast(userid as text) ...`` will + return object-valued (str) series. Accordingly, if the query output is empty, + then all resulting columns will be returned as object-valued (since they are + most general). If you foresee that your query will sometimes generate an empty + result, you may want to explicitly typecast afterwards to ensure dtype + integrity. + +You can also specify the name of the column as the ``DataFrame`` index, +and specify a subset of columns to be read. + +.. ipython:: python + + pd.read_sql_table('data', engine, index_col='id') + pd.read_sql_table('data', engine, columns=['Col_1', 'Col_2']) + +And you can explicitly force columns to be parsed as dates: + +.. ipython:: python + + pd.read_sql_table('data', engine, parse_dates=['Date']) + +If needed you can explicitly specify a format string, or a dict of arguments +to pass to :func:`pandas.to_datetime`: + +.. code-block:: python + + pd.read_sql_table('data', engine, parse_dates={'Date': '%Y-%m-%d'}) + pd.read_sql_table('data', engine, + parse_dates={'Date': {'format': '%Y-%m-%d %H:%M:%S'}}) + + +You can check if a table exists using :func:`~pandas.io.sql.has_table` + +Schema support +'''''''''''''' + +Reading from and writing to different schema's is supported through the ``schema`` +keyword in the :func:`~pandas.read_sql_table` and :func:`~pandas.DataFrame.to_sql` +functions. Note however that this depends on the database flavor (sqlite does not +have schema's). For example: + +.. code-block:: python + + df.to_sql('table', engine, schema='other_schema') + pd.read_sql_table('table', engine, schema='other_schema') + +Querying +'''''''' + +You can query using raw SQL in the :func:`~pandas.read_sql_query` function. +In this case you must use the SQL variant appropriate for your database. +When using SQLAlchemy, you can also pass SQLAlchemy Expression language constructs, +which are database-agnostic. + +.. ipython:: python + + pd.read_sql_query('SELECT * FROM data', engine) + +Of course, you can specify a more "complex" query. + +.. ipython:: python + + pd.read_sql_query("SELECT id, Col_1, Col_2 FROM data WHERE id = 42;", engine) + +The :func:`~pandas.read_sql_query` function supports a ``chunksize`` argument. +Specifying this will return an iterator through chunks of the query result: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(20, 3), columns=list('abc')) + df.to_sql('data_chunks', engine, index=False) + +.. ipython:: python + + for chunk in pd.read_sql_query("SELECT * FROM data_chunks", + engine, chunksize=5): + print(chunk) + +You can also run a plain query without creating a ``DataFrame`` with +:func:`~pandas.io.sql.execute`. This is useful for queries that don't return values, +such as INSERT. This is functionally equivalent to calling ``execute`` on the +SQLAlchemy engine or db connection object. Again, you must use the SQL syntax +variant appropriate for your database. + +.. code-block:: python + + from pandas.io import sql + sql.execute('SELECT * FROM table_name', engine) + sql.execute('INSERT INTO table_name VALUES(?, ?, ?)', engine, + params=[('id', 1, 12.2, True)]) + + +Engine connection examples +'''''''''''''''''''''''''' + +To connect with SQLAlchemy you use the :func:`create_engine` function to create an engine +object from database URI. You only need to create the engine once per database you are +connecting to. + +.. code-block:: python + + from sqlalchemy import create_engine + + engine = create_engine('postgresql://scott:tiger@localhost:5432/mydatabase') + + engine = create_engine('mysql+mysqldb://scott:tiger@localhost/foo') + + engine = create_engine('oracle://scott:tiger@127.0.0.1:1521/sidname') + + engine = create_engine('mssql+pyodbc://mydsn') + + # sqlite:/// + # where is relative: + engine = create_engine('sqlite:///foo.db') + + # or absolute, starting with a slash: + engine = create_engine('sqlite:////absolute/path/to/foo.db') + +For more information see the examples the SQLAlchemy `documentation `__ + + +Advanced SQLAlchemy queries +''''''''''''''''''''''''''' + +You can use SQLAlchemy constructs to describe your query. + +Use :func:`sqlalchemy.text` to specify query parameters in a backend-neutral way + +.. ipython:: python + + import sqlalchemy as sa + pd.read_sql(sa.text('SELECT * FROM data where Col_1=:col1'), + engine, params={'col1': 'X'}) + +If you have an SQLAlchemy description of your database you can express where conditions using SQLAlchemy expressions + +.. ipython:: python + + metadata = sa.MetaData() + data_table = sa.Table('data', metadata, + sa.Column('index', sa.Integer), + sa.Column('Date', sa.DateTime), + sa.Column('Col_1', sa.String), + sa.Column('Col_2', sa.Float), + sa.Column('Col_3', sa.Boolean), + ) + + pd.read_sql(sa.select([data_table]).where(data_table.c.Col_3 is True), engine) + +You can combine SQLAlchemy expressions with parameters passed to :func:`read_sql` using :func:`sqlalchemy.bindparam` + +.. ipython:: python + + import datetime as dt + expr = sa.select([data_table]).where(data_table.c.Date > sa.bindparam('date')) + pd.read_sql(expr, engine, params={'date': dt.datetime(2010, 10, 18)}) + + +Sqlite fallback +''''''''''''''' + +The use of sqlite is supported without using SQLAlchemy. +This mode requires a Python database adapter which respect the `Python +DB-API `__. + +You can create connections like so: + +.. code-block:: python + + import sqlite3 + con = sqlite3.connect(':memory:') + +And then issue the following queries: + +.. code-block:: python + + data.to_sql('data', con) + pd.read_sql_query("SELECT * FROM data", con) + + +.. _io.bigquery: + +Google BigQuery +--------------- + +.. warning:: + + Starting in 0.20.0, pandas has split off Google BigQuery support into the + separate package ``pandas-gbq``. You can ``pip install pandas-gbq`` to get it. + +The ``pandas-gbq`` package provides functionality to read/write from Google BigQuery. + +pandas integrates with this external package. if ``pandas-gbq`` is installed, you can +use the pandas methods ``pd.read_gbq`` and ``DataFrame.to_gbq``, which will call the +respective functions from ``pandas-gbq``. + +Full documentation can be found `here `__. + +.. _io.stata: + +Stata format +------------ + +.. _io.stata_writer: + +Writing to stata format +''''''''''''''''''''''' + +The method :func:`~pandas.core.frame.DataFrame.to_stata` will write a DataFrame +into a .dta file. The format version of this file is always 115 (Stata 12). + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 2), columns=list('AB')) + df.to_stata('stata.dta') + +*Stata* data files have limited data type support; only strings with +244 or fewer characters, ``int8``, ``int16``, ``int32``, ``float32`` +and ``float64`` can be stored in ``.dta`` files. Additionally, +*Stata* reserves certain values to represent missing data. Exporting a +non-missing value that is outside of the permitted range in Stata for +a particular data type will retype the variable to the next larger +size. For example, ``int8`` values are restricted to lie between -127 +and 100 in Stata, and so variables with values above 100 will trigger +a conversion to ``int16``. ``nan`` values in floating points data +types are stored as the basic missing data type (``.`` in *Stata*). + +.. note:: + + It is not possible to export missing data values for integer data types. + + +The *Stata* writer gracefully handles other data types including ``int64``, +``bool``, ``uint8``, ``uint16``, ``uint32`` by casting to +the smallest supported type that can represent the data. For example, data +with a type of ``uint8`` will be cast to ``int8`` if all values are less than +100 (the upper bound for non-missing ``int8`` data in *Stata*), or, if values are +outside of this range, the variable is cast to ``int16``. + + +.. warning:: + + Conversion from ``int64`` to ``float64`` may result in a loss of precision + if ``int64`` values are larger than 2**53. + +.. warning:: + + :class:`~pandas.io.stata.StataWriter` and + :func:`~pandas.core.frame.DataFrame.to_stata` only support fixed width + strings containing up to 244 characters, a limitation imposed by the version + 115 dta file format. Attempting to write *Stata* dta files with strings + longer than 244 characters raises a ``ValueError``. + +.. _io.stata_reader: + +Reading from Stata format +''''''''''''''''''''''''' + +The top-level function ``read_stata`` will read a dta file and return +either a ``DataFrame`` or a :class:`~pandas.io.stata.StataReader` that can +be used to read the file incrementally. + +.. ipython:: python + + pd.read_stata('stata.dta') + +Specifying a ``chunksize`` yields a +:class:`~pandas.io.stata.StataReader` instance that can be used to +read ``chunksize`` lines from the file at a time. The ``StataReader`` +object can be used as an iterator. + +.. ipython:: python + + reader = pd.read_stata('stata.dta', chunksize=3) + for df in reader: + print(df.shape) + +For more fine-grained control, use ``iterator=True`` and specify +``chunksize`` with each call to +:func:`~pandas.io.stata.StataReader.read`. + +.. ipython:: python + + reader = pd.read_stata('stata.dta', iterator=True) + chunk1 = reader.read(5) + chunk2 = reader.read(5) + +Currently the ``index`` is retrieved as a column. + +The parameter ``convert_categoricals`` indicates whether value labels should be +read and used to create a ``Categorical`` variable from them. Value labels can +also be retrieved by the function ``value_labels``, which requires :func:`~pandas.io.stata.StataReader.read` +to be called before use. + +The parameter ``convert_missing`` indicates whether missing value +representations in Stata should be preserved. If ``False`` (the default), +missing values are represented as ``np.nan``. If ``True``, missing values are +represented using ``StataMissingValue`` objects, and columns containing missing +values will have ``object`` data type. + +.. note:: + + :func:`~pandas.read_stata` and + :class:`~pandas.io.stata.StataReader` support .dta formats 113-115 + (Stata 10-12), 117 (Stata 13), and 118 (Stata 14). + +.. note:: + + Setting ``preserve_dtypes=False`` will upcast to the standard pandas data types: + ``int64`` for all integer types and ``float64`` for floating point data. By default, + the Stata data types are preserved when importing. + +.. ipython:: python + :suppress: + + os.remove('stata.dta') + +.. _io.stata-categorical: + +Categorical data +++++++++++++++++ + +``Categorical`` data can be exported to *Stata* data files as value labeled data. +The exported data consists of the underlying category codes as integer data values +and the categories as value labels. *Stata* does not have an explicit equivalent +to a ``Categorical`` and information about *whether* the variable is ordered +is lost when exporting. + +.. warning:: + + *Stata* only supports string value labels, and so ``str`` is called on the + categories when exporting data. Exporting ``Categorical`` variables with + non-string categories produces a warning, and can result a loss of + information if the ``str`` representations of the categories are not unique. + +Labeled data can similarly be imported from *Stata* data files as ``Categorical`` +variables using the keyword argument ``convert_categoricals`` (``True`` by default). +The keyword argument ``order_categoricals`` (``True`` by default) determines +whether imported ``Categorical`` variables are ordered. + +.. note:: + + When importing categorical data, the values of the variables in the *Stata* + data file are not preserved since ``Categorical`` variables always + use integer data types between ``-1`` and ``n-1`` where ``n`` is the number + of categories. If the original values in the *Stata* data file are required, + these can be imported by setting ``convert_categoricals=False``, which will + import original data (but not the variable labels). The original values can + be matched to the imported categorical data since there is a simple mapping + between the original *Stata* data values and the category codes of imported + Categorical variables: missing values are assigned code ``-1``, and the + smallest original value is assigned ``0``, the second smallest is assigned + ``1`` and so on until the largest original value is assigned the code ``n-1``. + +.. note:: + + *Stata* supports partially labeled series. These series have value labels for + some but not all data values. Importing a partially labeled series will produce + a ``Categorical`` with string categories for the values that are labeled and + numeric categories for values with no label. + +.. _io.sas: + +.. _io.sas_reader: + +SAS formats +----------- + +The top-level function :func:`read_sas` can read (but not write) SAS +`xport` (.XPT) and (since *v0.18.0*) `SAS7BDAT` (.sas7bdat) format files. + +SAS files only contain two value types: ASCII text and floating point +values (usually 8 bytes but sometimes truncated). For xport files, +there is no automatic type conversion to integers, dates, or +categoricals. For SAS7BDAT files, the format codes may allow date +variables to be automatically converted to dates. By default the +whole file is read and returned as a ``DataFrame``. + +Specify a ``chunksize`` or use ``iterator=True`` to obtain reader +objects (``XportReader`` or ``SAS7BDATReader``) for incrementally +reading the file. The reader objects also have attributes that +contain additional information about the file and its variables. + +Read a SAS7BDAT file: + +.. code-block:: python + + df = pd.read_sas('sas_data.sas7bdat') + +Obtain an iterator and read an XPORT file 100,000 lines at a time: + +.. code-block:: python + + def do_something(chunk): + pass + + rdr = pd.read_sas('sas_xport.xpt', chunk=100000) + for chunk in rdr: + do_something(chunk) + +The specification_ for the xport file format is available from the SAS +web site. + +.. _specification: https://support.sas.com/techsup/technote/ts140.pdf + +No official documentation is available for the SAS7BDAT format. + +.. _io.spss: + +.. _io.spss_reader: + +SPSS formats +------------ + +.. versionadded:: 0.25.0 + +The top-level function :func:`read_spss` can read (but not write) SPSS +`sav` (.sav) and `zsav` (.zsav) format files. + +SPSS files contain column names. By default the +whole file is read, categorical columns are converted into ``pd.Categorical``, +and a ``DataFrame`` with all columns is returned. + +Specify the ``usecols`` parameter to obtain a subset of columns. Specify ``convert_categoricals=False`` +to avoid converting categorical columns into ``pd.Categorical``. + +Read an SPSS file: + +.. code-block:: python + + df = pd.read_spss('spss_data.sav') + +Extract a subset of columns contained in ``usecols`` from an SPSS file and +avoid converting categorical columns into ``pd.Categorical``: + +.. code-block:: python + + df = pd.read_spss('spss_data.sav', usecols=['foo', 'bar'], + convert_categoricals=False) + +More information about the `sav` and `zsav` file format is available here_. + +.. _here: https://www.ibm.com/support/knowledgecenter/en/SSLVMB_22.0.0/com.ibm.spss.statistics.help/spss/base/savedatatypes.htm + +.. _io.other: + +Other file formats +------------------ + +pandas itself only supports IO with a limited set of file formats that map +cleanly to its tabular data model. For reading and writing other file formats +into and from pandas, we recommend these packages from the broader community. + +netCDF +'''''' + +xarray_ provides data structures inspired by the pandas ``DataFrame`` for working +with multi-dimensional datasets, with a focus on the netCDF file format and +easy conversion to and from pandas. + +.. _xarray: https://xarray.pydata.org/ + +.. _io.perf: + +Performance considerations +-------------------------- + +This is an informal comparison of various IO methods, using pandas +0.24.2. Timings are machine dependent and small differences should be +ignored. + +.. code-block:: ipython + + In [1]: sz = 1000000 + In [2]: df = pd.DataFrame({'A': np.random.randn(sz), 'B': [1] * sz}) + + In [3]: df.info() + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 2 columns): + A 1000000 non-null float64 + B 1000000 non-null int64 + dtypes: float64(1), int64(1) + memory usage: 15.3 MB + +Given the next test set: + +.. code-block:: python + + + + import numpy as np + + import os + + sz = 1000000 + df = pd.DataFrame({'A': np.random.randn(sz), 'B': [1] * sz}) + + sz = 1000000 + np.random.seed(42) + df = pd.DataFrame({'A': np.random.randn(sz), 'B': [1] * sz}) + + def test_sql_write(df): + if os.path.exists('test.sql'): + os.remove('test.sql') + sql_db = sqlite3.connect('test.sql') + df.to_sql(name='test_table', con=sql_db) + sql_db.close() + + def test_sql_read(): + sql_db = sqlite3.connect('test.sql') + pd.read_sql_query("select * from test_table", sql_db) + sql_db.close() + + def test_hdf_fixed_write(df): + df.to_hdf('test_fixed.hdf', 'test', mode='w') + + def test_hdf_fixed_read(): + pd.read_hdf('test_fixed.hdf', 'test') + + def test_hdf_fixed_write_compress(df): + df.to_hdf('test_fixed_compress.hdf', 'test', mode='w', complib='blosc') + + def test_hdf_fixed_read_compress(): + pd.read_hdf('test_fixed_compress.hdf', 'test') + + def test_hdf_table_write(df): + df.to_hdf('test_table.hdf', 'test', mode='w', format='table') + + def test_hdf_table_read(): + pd.read_hdf('test_table.hdf', 'test') + + def test_hdf_table_write_compress(df): + df.to_hdf('test_table_compress.hdf', 'test', mode='w', + complib='blosc', format='table') + + def test_hdf_table_read_compress(): + pd.read_hdf('test_table_compress.hdf', 'test') + + def test_csv_write(df): + df.to_csv('test.csv', mode='w') + + def test_csv_read(): + pd.read_csv('test.csv', index_col=0) + + def test_feather_write(df): + df.to_feather('test.feather') + + def test_feather_read(): + pd.read_feather('test.feather') + + def test_pickle_write(df): + df.to_pickle('test.pkl') + + def test_pickle_read(): + pd.read_pickle('test.pkl') + + def test_pickle_write_compress(df): + df.to_pickle('test.pkl.compress', compression='xz') + + def test_pickle_read_compress(): + pd.read_pickle('test.pkl.compress', compression='xz') + + def test_parquet_write(df): + df.to_parquet('test.parquet') + + def test_parquet_read(): + pd.read_parquet('test.parquet') + +When writing, the top-three functions in terms of speed are ``test_feather_write``, ``test_hdf_fixed_write`` and ``test_hdf_fixed_write_compress``. + +.. code-block:: ipython + + In [4]: %timeit test_sql_write(df) + 3.29 s ± 43.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [5]: %timeit test_hdf_fixed_write(df) + 19.4 ms ± 560 µs per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [6]: %timeit test_hdf_fixed_write_compress(df) + 19.6 ms ± 308 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [7]: %timeit test_hdf_table_write(df) + 449 ms ± 5.61 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [8]: %timeit test_hdf_table_write_compress(df) + 448 ms ± 11.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [9]: %timeit test_csv_write(df) + 3.66 s ± 26.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [10]: %timeit test_feather_write(df) + 9.75 ms ± 117 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + + In [11]: %timeit test_pickle_write(df) + 30.1 ms ± 229 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [12]: %timeit test_pickle_write_compress(df) + 4.29 s ± 15.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [13]: %timeit test_parquet_write(df) + 67.6 ms ± 706 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + +When reading, the top three are ``test_feather_read``, ``test_pickle_read`` and +``test_hdf_fixed_read``. + + +.. code-block:: ipython + + In [14]: %timeit test_sql_read() + 1.77 s ± 17.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [15]: %timeit test_hdf_fixed_read() + 19.4 ms ± 436 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [16]: %timeit test_hdf_fixed_read_compress() + 19.5 ms ± 222 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [17]: %timeit test_hdf_table_read() + 38.6 ms ± 857 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [18]: %timeit test_hdf_table_read_compress() + 38.8 ms ± 1.49 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) + + In [19]: %timeit test_csv_read() + 452 ms ± 9.04 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [20]: %timeit test_feather_read() + 12.4 ms ± 99.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + + In [21]: %timeit test_pickle_read() + 18.4 ms ± 191 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) + + In [22]: %timeit test_pickle_read_compress() + 915 ms ± 7.48 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) + + In [23]: %timeit test_parquet_read() + 24.4 ms ± 146 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) + + +For this test case ``test.pkl.compress``, ``test.parquet`` and ``test.feather`` took the least space on disk. +Space on disk (in bytes) + +.. code-block:: none + + 29519500 Oct 10 06:45 test.csv + 16000248 Oct 10 06:45 test.feather + 8281983 Oct 10 06:49 test.parquet + 16000857 Oct 10 06:47 test.pkl + 7552144 Oct 10 06:48 test.pkl.compress + 34816000 Oct 10 06:42 test.sql + 24009288 Oct 10 06:43 test_fixed.hdf + 24009288 Oct 10 06:43 test_fixed_compress.hdf + 24458940 Oct 10 06:44 test_table.hdf + 24458940 Oct 10 06:44 test_table_compress.hdf diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst new file mode 100644 index 00000000..8fdcd8d2 --- /dev/null +++ b/doc/source/user_guide/merging.rst @@ -0,0 +1,1462 @@ +.. _merging: + +{{ header }} + +.. ipython:: python + :suppress: + + from matplotlib import pyplot as plt + import pandas.util._doctools as doctools + p = doctools.TablePlotter() + + +**************************** +Merge, join, and concatenate +**************************** + +pandas provides various facilities for easily combining together Series or +DataFrame with various kinds of set logic for the indexes +and relational algebra functionality in the case of join / merge-type +operations. + +.. _merging.concat: + +Concatenating objects +--------------------- + +The :func:`~pandas.concat` function (in the main pandas namespace) does all of +the heavy lifting of performing concatenation operations along an axis while +performing optional set logic (union or intersection) of the indexes (if any) on +the other axes. Note that I say "if any" because there is only a single possible +axis of concatenation for Series. + +Before diving into all of the details of ``concat`` and what it can do, here is +a simple example: + +.. ipython:: python + + df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], + 'B': ['B0', 'B1', 'B2', 'B3'], + 'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3']}, + index=[0, 1, 2, 3]) + + df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'], + 'B': ['B4', 'B5', 'B6', 'B7'], + 'C': ['C4', 'C5', 'C6', 'C7'], + 'D': ['D4', 'D5', 'D6', 'D7']}, + index=[4, 5, 6, 7]) + + df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'], + 'B': ['B8', 'B9', 'B10', 'B11'], + 'C': ['C8', 'C9', 'C10', 'C11'], + 'D': ['D8', 'D9', 'D10', 'D11']}, + index=[8, 9, 10, 11]) + + frames = [df1, df2, df3] + result = pd.concat(frames) + +.. ipython:: python + :suppress: + + @savefig merging_concat_basic.png + p.plot(frames, result, + labels=['df1', 'df2', 'df3'], vertical=True); + plt.close('all'); + +Like its sibling function on ndarrays, ``numpy.concatenate``, ``pandas.concat`` +takes a list or dict of homogeneously-typed objects and concatenates them with +some configurable handling of "what to do with the other axes": + +:: + + pd.concat(objs, axis=0, join='outer', ignore_index=False, keys=None, + levels=None, names=None, verify_integrity=False, copy=True) + +* ``objs`` : a sequence or mapping of Series or DataFrame objects. If a + dict is passed, the sorted keys will be used as the `keys` argument, unless + it is passed, in which case the values will be selected (see below). Any None + objects will be dropped silently unless they are all None in which case a + ValueError will be raised. +* ``axis`` : {0, 1, ...}, default 0. The axis to concatenate along. +* ``join`` : {'inner', 'outer'}, default 'outer'. How to handle indexes on + other axis(es). Outer for union and inner for intersection. +* ``ignore_index`` : boolean, default False. If True, do not use the index + values on the concatenation axis. The resulting axis will be labeled 0, ..., + n - 1. This is useful if you are concatenating objects where the + concatenation axis does not have meaningful indexing information. Note + the index values on the other axes are still respected in the join. +* ``keys`` : sequence, default None. Construct hierarchical index using the + passed keys as the outermost level. If multiple levels passed, should + contain tuples. +* ``levels`` : list of sequences, default None. Specific levels (unique values) + to use for constructing a MultiIndex. Otherwise they will be inferred from the + keys. +* ``names`` : list, default None. Names for the levels in the resulting + hierarchical index. +* ``verify_integrity`` : boolean, default False. Check whether the new + concatenated axis contains duplicates. This can be very expensive relative + to the actual data concatenation. +* ``copy`` : boolean, default True. If False, do not copy data unnecessarily. + +Without a little bit of context many of these arguments don't make much sense. +Let's revisit the above example. Suppose we wanted to associate specific keys +with each of the pieces of the chopped up DataFrame. We can do this using the +``keys`` argument: + +.. ipython:: python + + result = pd.concat(frames, keys=['x', 'y', 'z']) + +.. ipython:: python + :suppress: + + @savefig merging_concat_keys.png + p.plot(frames, result, + labels=['df1', 'df2', 'df3'], vertical=True) + plt.close('all'); + +As you can see (if you've read the rest of the documentation), the resulting +object's index has a :ref:`hierarchical index `. This +means that we can now select out each chunk by key: + +.. ipython:: python + + result.loc['y'] + +It's not a stretch to see how this can be very useful. More detail on this +functionality below. + +.. note:: + It is worth noting that :func:`~pandas.concat` (and therefore + :func:`~pandas.append`) makes a full copy of the data, and that constantly + reusing this function can create a significant performance hit. If you need + to use the operation over several datasets, use a list comprehension. + +:: + + frames = [ process_your_file(f) for f in files ] + result = pd.concat(frames) + + +Set logic on the other axes +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When gluing together multiple DataFrames, you have a choice of how to handle +the other axes (other than the one being concatenated). This can be done in +the following two ways: + +* Take the union of them all, ``join='outer'``. This is the default + option as it results in zero information loss. +* Take the intersection, ``join='inner'``. + +Here is an example of each of these methods. First, the default ``join='outer'`` +behavior: + +.. ipython:: python + + df4 = pd.DataFrame({'B': ['B2', 'B3', 'B6', 'B7'], + 'D': ['D2', 'D3', 'D6', 'D7'], + 'F': ['F2', 'F3', 'F6', 'F7']}, + index=[2, 3, 6, 7]) + result = pd.concat([df1, df4], axis=1, sort=False) + + +.. ipython:: python + :suppress: + + @savefig merging_concat_axis1.png + p.plot([df1, df4], result, + labels=['df1', 'df4'], vertical=False); + plt.close('all'); + +.. warning:: + + .. versionchanged:: 0.23.0 + + The default behavior with ``join='outer'`` is to sort the other axis + (columns in this case). In a future version of pandas, the default will + be to not sort. We specified ``sort=False`` to opt in to the new + behavior now. + +Here is the same thing with ``join='inner'``: + +.. ipython:: python + + result = pd.concat([df1, df4], axis=1, join='inner') + +.. ipython:: python + :suppress: + + @savefig merging_concat_axis1_inner.png + p.plot([df1, df4], result, + labels=['df1', 'df4'], vertical=False); + plt.close('all'); + +Lastly, suppose we just wanted to reuse the *exact index* from the original +DataFrame: + +.. ipython:: python + + result = pd.concat([df1, df4], axis=1).reindex(df1.index) + +Similarly, we could index before the concatenation: + +.. ipython:: python + + pd.concat([df1, df4.reindex(df1.index)], axis=1) + +.. ipython:: python + :suppress: + + @savefig merging_concat_axis1_join_axes.png + p.plot([df1, df4], result, + labels=['df1', 'df4'], vertical=False); + plt.close('all'); + +.. _merging.concatenation: + +Concatenating using ``append`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A useful shortcut to :func:`~pandas.concat` are the :meth:`~DataFrame.append` +instance methods on ``Series`` and ``DataFrame``. These methods actually predated +``concat``. They concatenate along ``axis=0``, namely the index: + +.. ipython:: python + + result = df1.append(df2) + +.. ipython:: python + :suppress: + + @savefig merging_append1.png + p.plot([df1, df2], result, + labels=['df1', 'df2'], vertical=True); + plt.close('all'); + +In the case of ``DataFrame``, the indexes must be disjoint but the columns do not +need to be: + +.. ipython:: python + + result = df1.append(df4, sort=False) + +.. ipython:: python + :suppress: + + @savefig merging_append2.png + p.plot([df1, df4], result, + labels=['df1', 'df4'], vertical=True); + plt.close('all'); + +``append`` may take multiple objects to concatenate: + +.. ipython:: python + + result = df1.append([df2, df3]) + +.. ipython:: python + :suppress: + + @savefig merging_append3.png + p.plot([df1, df2, df3], result, + labels=['df1', 'df2', 'df3'], vertical=True); + plt.close('all'); + +.. note:: + + Unlike the :py:meth:`~list.append` method, which appends to the original list + and returns ``None``, :meth:`~DataFrame.append` here **does not** modify + ``df1`` and returns its copy with ``df2`` appended. + +.. _merging.ignore_index: + +Ignoring indexes on the concatenation axis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +For ``DataFrame`` objects which don't have a meaningful index, you may wish +to append them and ignore the fact that they may have overlapping indexes. To +do this, use the ``ignore_index`` argument: + +.. ipython:: python + + result = pd.concat([df1, df4], ignore_index=True, sort=False) + +.. ipython:: python + :suppress: + + @savefig merging_concat_ignore_index.png + p.plot([df1, df4], result, + labels=['df1', 'df4'], vertical=True); + plt.close('all'); + +This is also a valid argument to :meth:`DataFrame.append`: + +.. ipython:: python + + result = df1.append(df4, ignore_index=True, sort=False) + +.. ipython:: python + :suppress: + + @savefig merging_append_ignore_index.png + p.plot([df1, df4], result, + labels=['df1', 'df4'], vertical=True); + plt.close('all'); + +.. _merging.mixed_ndims: + +Concatenating with mixed ndims +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can concatenate a mix of ``Series`` and ``DataFrame`` objects. The +``Series`` will be transformed to ``DataFrame`` with the column name as +the name of the ``Series``. + +.. ipython:: python + + s1 = pd.Series(['X0', 'X1', 'X2', 'X3'], name='X') + result = pd.concat([df1, s1], axis=1) + +.. ipython:: python + :suppress: + + @savefig merging_concat_mixed_ndim.png + p.plot([df1, s1], result, + labels=['df1', 's1'], vertical=False); + plt.close('all'); + +.. note:: + + Since we're concatenating a ``Series`` to a ``DataFrame``, we could have + achieved the same result with :meth:`DataFrame.assign`. To concatenate an + arbitrary number of pandas objects (``DataFrame`` or ``Series``), use + ``concat``. + +If unnamed ``Series`` are passed they will be numbered consecutively. + +.. ipython:: python + + s2 = pd.Series(['_0', '_1', '_2', '_3']) + result = pd.concat([df1, s2, s2, s2], axis=1) + +.. ipython:: python + :suppress: + + @savefig merging_concat_unnamed_series.png + p.plot([df1, s2], result, + labels=['df1', 's2'], vertical=False); + plt.close('all'); + +Passing ``ignore_index=True`` will drop all name references. + +.. ipython:: python + + result = pd.concat([df1, s1], axis=1, ignore_index=True) + +.. ipython:: python + :suppress: + + @savefig merging_concat_series_ignore_index.png + p.plot([df1, s1], result, + labels=['df1', 's1'], vertical=False); + plt.close('all'); + +More concatenating with group keys +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A fairly common use of the ``keys`` argument is to override the column names +when creating a new ``DataFrame`` based on existing ``Series``. +Notice how the default behaviour consists on letting the resulting ``DataFrame`` +inherit the parent ``Series``' name, when these existed. + +.. ipython:: python + + s3 = pd.Series([0, 1, 2, 3], name='foo') + s4 = pd.Series([0, 1, 2, 3]) + s5 = pd.Series([0, 1, 4, 5]) + + pd.concat([s3, s4, s5], axis=1) + +Through the ``keys`` argument we can override the existing column names. + +.. ipython:: python + + pd.concat([s3, s4, s5], axis=1, keys=['red', 'blue', 'yellow']) + +Let's consider a variation of the very first example presented: + +.. ipython:: python + + result = pd.concat(frames, keys=['x', 'y', 'z']) + +.. ipython:: python + :suppress: + + @savefig merging_concat_group_keys2.png + p.plot(frames, result, + labels=['df1', 'df2', 'df3'], vertical=True); + plt.close('all'); + +You can also pass a dict to ``concat`` in which case the dict keys will be used +for the ``keys`` argument (unless other keys are specified): + +.. ipython:: python + + pieces = {'x': df1, 'y': df2, 'z': df3} + result = pd.concat(pieces) + +.. ipython:: python + :suppress: + + @savefig merging_concat_dict.png + p.plot([df1, df2, df3], result, + labels=['df1', 'df2', 'df3'], vertical=True); + plt.close('all'); + +.. ipython:: python + + result = pd.concat(pieces, keys=['z', 'y']) + +.. ipython:: python + :suppress: + + @savefig merging_concat_dict_keys.png + p.plot([df1, df2, df3], result, + labels=['df1', 'df2', 'df3'], vertical=True); + plt.close('all'); + +The MultiIndex created has levels that are constructed from the passed keys and +the index of the ``DataFrame`` pieces: + +.. ipython:: python + + result.index.levels + +If you wish to specify other levels (as will occasionally be the case), you can +do so using the ``levels`` argument: + +.. ipython:: python + + result = pd.concat(pieces, keys=['x', 'y', 'z'], + levels=[['z', 'y', 'x', 'w']], + names=['group_key']) + +.. ipython:: python + :suppress: + + @savefig merging_concat_dict_keys_names.png + p.plot([df1, df2, df3], result, + labels=['df1', 'df2', 'df3'], vertical=True); + plt.close('all'); + +.. ipython:: python + + result.index.levels + +This is fairly esoteric, but it is actually necessary for implementing things +like GroupBy where the order of a categorical variable is meaningful. + +.. _merging.append.row: + +Appending rows to a DataFrame +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +While not especially efficient (since a new object must be created), you can +append a single row to a ``DataFrame`` by passing a ``Series`` or dict to +``append``, which returns a new ``DataFrame`` as above. + +.. ipython:: python + + s2 = pd.Series(['X0', 'X1', 'X2', 'X3'], index=['A', 'B', 'C', 'D']) + result = df1.append(s2, ignore_index=True) + +.. ipython:: python + :suppress: + + @savefig merging_append_series_as_row.png + p.plot([df1, s2], result, + labels=['df1', 's2'], vertical=True); + plt.close('all'); + +You should use ``ignore_index`` with this method to instruct DataFrame to +discard its index. If you wish to preserve the index, you should construct an +appropriately-indexed DataFrame and append or concatenate those objects. + +You can also pass a list of dicts or Series: + +.. ipython:: python + + dicts = [{'A': 1, 'B': 2, 'C': 3, 'X': 4}, + {'A': 5, 'B': 6, 'C': 7, 'Y': 8}] + result = df1.append(dicts, ignore_index=True, sort=False) + +.. ipython:: python + :suppress: + + @savefig merging_append_dits.png + p.plot([df1, pd.DataFrame(dicts)], result, + labels=['df1', 'dicts'], vertical=True); + plt.close('all'); + +.. _merging.join: + +Database-style DataFrame or named Series joining/merging +-------------------------------------------------------- + +pandas has full-featured, **high performance** in-memory join operations +idiomatically very similar to relational databases like SQL. These methods +perform significantly better (in some cases well over an order of magnitude +better) than other open source implementations (like ``base::merge.data.frame`` +in R). The reason for this is careful algorithmic design and the internal layout +of the data in ``DataFrame``. + +See the :ref:`cookbook` for some advanced strategies. + +Users who are familiar with SQL but new to pandas might be interested in a +:ref:`comparison with SQL`. + +pandas provides a single function, :func:`~pandas.merge`, as the entry point for +all standard database join operations between ``DataFrame`` or named ``Series`` objects: + +:: + + pd.merge(left, right, how='inner', on=None, left_on=None, right_on=None, + left_index=False, right_index=False, sort=True, + suffixes=('_x', '_y'), copy=True, indicator=False, + validate=None) + +* ``left``: A DataFrame or named Series object. +* ``right``: Another DataFrame or named Series object. +* ``on``: Column or index level names to join on. Must be found in both the left + and right DataFrame and/or Series objects. If not passed and ``left_index`` and + ``right_index`` are ``False``, the intersection of the columns in the + DataFrames and/or Series will be inferred to be the join keys. +* ``left_on``: Columns or index levels from the left DataFrame or Series to use as + keys. Can either be column names, index level names, or arrays with length + equal to the length of the DataFrame or Series. +* ``right_on``: Columns or index levels from the right DataFrame or Series to use as + keys. Can either be column names, index level names, or arrays with length + equal to the length of the DataFrame or Series. +* ``left_index``: If ``True``, use the index (row labels) from the left + DataFrame or Series as its join key(s). In the case of a DataFrame or Series with a MultiIndex + (hierarchical), the number of levels must match the number of join keys + from the right DataFrame or Series. +* ``right_index``: Same usage as ``left_index`` for the right DataFrame or Series +* ``how``: One of ``'left'``, ``'right'``, ``'outer'``, ``'inner'``. Defaults + to ``inner``. See below for more detailed description of each method. +* ``sort``: Sort the result DataFrame by the join keys in lexicographical + order. Defaults to ``True``, setting to ``False`` will improve performance + substantially in many cases. +* ``suffixes``: A tuple of string suffixes to apply to overlapping + columns. Defaults to ``('_x', '_y')``. +* ``copy``: Always copy data (default ``True``) from the passed DataFrame or named Series + objects, even when reindexing is not necessary. Cannot be avoided in many + cases but may improve performance / memory usage. The cases where copying + can be avoided are somewhat pathological but this option is provided + nonetheless. +* ``indicator``: Add a column to the output DataFrame called ``_merge`` + with information on the source of each row. ``_merge`` is Categorical-type + and takes on a value of ``left_only`` for observations whose merge key + only appears in ``'left'`` DataFrame or Series, ``right_only`` for observations whose + merge key only appears in ``'right'`` DataFrame or Series, and ``both`` if the + observation's merge key is found in both. + +* ``validate`` : string, default None. + If specified, checks if merge is of specified type. + + * "one_to_one" or "1:1": checks if merge keys are unique in both + left and right datasets. + * "one_to_many" or "1:m": checks if merge keys are unique in left + dataset. + * "many_to_one" or "m:1": checks if merge keys are unique in right + dataset. + * "many_to_many" or "m:m": allowed, but does not result in checks. + + .. versionadded:: 0.21.0 + +.. note:: + + Support for specifying index levels as the ``on``, ``left_on``, and + ``right_on`` parameters was added in version 0.23.0. + Support for merging named ``Series`` objects was added in version 0.24.0. + +The return type will be the same as ``left``. If ``left`` is a ``DataFrame`` or named ``Series`` +and ``right`` is a subclass of ``DataFrame``, the return type will still be ``DataFrame``. + +``merge`` is a function in the pandas namespace, and it is also available as a +``DataFrame`` instance method :meth:`~DataFrame.merge`, with the calling +``DataFrame`` being implicitly considered the left object in the join. + +The related :meth:`~DataFrame.join` method, uses ``merge`` internally for the +index-on-index (by default) and column(s)-on-index join. If you are joining on +index only, you may wish to use ``DataFrame.join`` to save yourself some typing. + +Brief primer on merge methods (relational algebra) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Experienced users of relational databases like SQL will be familiar with the +terminology used to describe join operations between two SQL-table like +structures (``DataFrame`` objects). There are several cases to consider which +are very important to understand: + +* **one-to-one** joins: for example when joining two ``DataFrame`` objects on + their indexes (which must contain unique values). +* **many-to-one** joins: for example when joining an index (unique) to one or + more columns in a different ``DataFrame``. +* **many-to-many** joins: joining columns on columns. + +.. note:: + + When joining columns on columns (potentially a many-to-many join), any + indexes on the passed ``DataFrame`` objects **will be discarded**. + + +It is worth spending some time understanding the result of the **many-to-many** +join case. In SQL / standard relational algebra, if a key combination appears +more than once in both tables, the resulting table will have the **Cartesian +product** of the associated data. Here is a very basic example with one unique +key combination: + +.. ipython:: python + + left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'], + 'A': ['A0', 'A1', 'A2', 'A3'], + 'B': ['B0', 'B1', 'B2', 'B3']}) + + right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'], + 'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3']}) + result = pd.merge(left, right, on='key') + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +Here is a more complicated example with multiple join keys. Only the keys +appearing in ``left`` and ``right`` are present (the intersection), since +``how='inner'`` by default. + +.. ipython:: python + + left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'], + 'key2': ['K0', 'K1', 'K0', 'K1'], + 'A': ['A0', 'A1', 'A2', 'A3'], + 'B': ['B0', 'B1', 'B2', 'B3']}) + + right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'], + 'key2': ['K0', 'K0', 'K0', 'K0'], + 'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3']}) + + result = pd.merge(left, right, on=['key1', 'key2']) + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key_multiple.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +The ``how`` argument to ``merge`` specifies how to determine which keys are to +be included in the resulting table. If a key combination **does not appear** in +either the left or right tables, the values in the joined table will be +``NA``. Here is a summary of the ``how`` options and their SQL equivalent names: + +.. csv-table:: + :header: "Merge method", "SQL Join Name", "Description" + :widths: 20, 20, 60 + + ``left``, ``LEFT OUTER JOIN``, Use keys from left frame only + ``right``, ``RIGHT OUTER JOIN``, Use keys from right frame only + ``outer``, ``FULL OUTER JOIN``, Use union of keys from both frames + ``inner``, ``INNER JOIN``, Use intersection of keys from both frames + +.. ipython:: python + + result = pd.merge(left, right, how='left', on=['key1', 'key2']) + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key_left.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. ipython:: python + + result = pd.merge(left, right, how='right', on=['key1', 'key2']) + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key_right.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + +.. ipython:: python + + result = pd.merge(left, right, how='outer', on=['key1', 'key2']) + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key_outer.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. ipython:: python + + result = pd.merge(left, right, how='inner', on=['key1', 'key2']) + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key_inner.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +Here is another example with duplicate join keys in DataFrames: + +.. ipython:: python + + left = pd.DataFrame({'A': [1, 2], 'B': [2, 2]}) + + right = pd.DataFrame({'A': [4, 5, 6], 'B': [2, 2, 2]}) + + result = pd.merge(left, right, on='B', how='outer') + +.. ipython:: python + :suppress: + + @savefig merging_merge_on_key_dup.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + + +.. warning:: + + Joining / merging on duplicate keys can cause a returned frame that is the multiplication of the row dimensions, which may result in memory overflow. It is the user' s responsibility to manage duplicate values in keys before joining large DataFrames. + +.. _merging.validation: + +Checking for duplicate keys +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.21.0 + +Users can use the ``validate`` argument to automatically check whether there +are unexpected duplicates in their merge keys. Key uniqueness is checked before +merge operations and so should protect against memory overflows. Checking key +uniqueness is also a good way to ensure user data structures are as expected. + +In the following example, there are duplicate values of ``B`` in the right +``DataFrame``. As this is not a one-to-one merge -- as specified in the +``validate`` argument -- an exception will be raised. + + +.. ipython:: python + + left = pd.DataFrame({'A' : [1,2], 'B' : [1, 2]}) + right = pd.DataFrame({'A' : [4,5,6], 'B': [2, 2, 2]}) + +.. code-block:: ipython + + In [53]: result = pd.merge(left, right, on='B', how='outer', validate="one_to_one") + ... + MergeError: Merge keys are not unique in right dataset; not a one-to-one merge + +If the user is aware of the duplicates in the right ``DataFrame`` but wants to +ensure there are no duplicates in the left DataFrame, one can use the +``validate='one_to_many'`` argument instead, which will not raise an exception. + +.. ipython:: python + + pd.merge(left, right, on='B', how='outer', validate="one_to_many") + + +.. _merging.indicator: + +The merge indicator +~~~~~~~~~~~~~~~~~~~ + +:func:`~pandas.merge` accepts the argument ``indicator``. If ``True``, a +Categorical-type column called ``_merge`` will be added to the output object +that takes on values: + + =================================== ================ + Observation Origin ``_merge`` value + =================================== ================ + Merge key only in ``'left'`` frame ``left_only`` + Merge key only in ``'right'`` frame ``right_only`` + Merge key in both frames ``both`` + =================================== ================ + +.. ipython:: python + + df1 = pd.DataFrame({'col1': [0, 1], 'col_left': ['a', 'b']}) + df2 = pd.DataFrame({'col1': [1, 2, 2], 'col_right': [2, 2, 2]}) + pd.merge(df1, df2, on='col1', how='outer', indicator=True) + +The ``indicator`` argument will also accept string arguments, in which case the indicator function will use the value of the passed string as the name for the indicator column. + +.. ipython:: python + + pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column') + + +.. _merging.dtypes: + +Merge dtypes +~~~~~~~~~~~~ + +Merging will preserve the dtype of the join keys. + +.. ipython:: python + + left = pd.DataFrame({'key': [1], 'v1': [10]}) + left + right = pd.DataFrame({'key': [1, 2], 'v1': [20, 30]}) + right + +We are able to preserve the join keys: + +.. ipython:: python + + pd.merge(left, right, how='outer') + pd.merge(left, right, how='outer').dtypes + +Of course if you have missing values that are introduced, then the +resulting dtype will be upcast. + +.. ipython:: python + + pd.merge(left, right, how='outer', on='key') + pd.merge(left, right, how='outer', on='key').dtypes + +Merging will preserve ``category`` dtypes of the mergands. See also the section on :ref:`categoricals `. + +The left frame. + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + + X = pd.Series(np.random.choice(['foo', 'bar'], size=(10,))) + X = X.astype(CategoricalDtype(categories=['foo', 'bar'])) + + left = pd.DataFrame({'X': X, + 'Y': np.random.choice(['one', 'two', 'three'], + size=(10,))}) + left + left.dtypes + +The right frame. + +.. ipython:: python + + right = pd.DataFrame({'X': pd.Series(['foo', 'bar'], + dtype=CategoricalDtype(['foo', 'bar'])), + 'Z': [1, 2]}) + right + right.dtypes + +The merged result: + +.. ipython:: python + + result = pd.merge(left, right, how='outer') + result + result.dtypes + +.. note:: + + The category dtypes must be *exactly* the same, meaning the same categories and the ordered attribute. + Otherwise the result will coerce to the categories' dtype. + +.. note:: + + Merging on ``category`` dtypes that are the same can be quite performant compared to ``object`` dtype merging. + +.. _merging.join.index: + +Joining on index +~~~~~~~~~~~~~~~~ + +:meth:`DataFrame.join` is a convenient method for combining the columns of two +potentially differently-indexed ``DataFrames`` into a single result +``DataFrame``. Here is a very basic example: + +.. ipython:: python + + left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], + 'B': ['B0', 'B1', 'B2']}, + index=['K0', 'K1', 'K2']) + + right = pd.DataFrame({'C': ['C0', 'C2', 'C3'], + 'D': ['D0', 'D2', 'D3']}, + index=['K0', 'K2', 'K3']) + + result = left.join(right) + +.. ipython:: python + :suppress: + + @savefig merging_join.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. ipython:: python + + result = left.join(right, how='outer') + +.. ipython:: python + :suppress: + + @savefig merging_join_outer.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +The same as above, but with ``how='inner'``. + +.. ipython:: python + + result = left.join(right, how='inner') + +.. ipython:: python + :suppress: + + @savefig merging_join_inner.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +The data alignment here is on the indexes (row labels). This same behavior can +be achieved using ``merge`` plus additional arguments instructing it to use the +indexes: + +.. ipython:: python + + result = pd.merge(left, right, left_index=True, right_index=True, how='outer') + +.. ipython:: python + :suppress: + + @savefig merging_merge_index_outer.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. ipython:: python + + result = pd.merge(left, right, left_index=True, right_index=True, how='inner'); + +.. ipython:: python + :suppress: + + @savefig merging_merge_index_inner.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +Joining key columns on an index +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~DataFrame.join` takes an optional ``on`` argument which may be a column +or multiple column names, which specifies that the passed ``DataFrame`` is to be +aligned on that column in the ``DataFrame``. These two function calls are +completely equivalent: + +:: + + left.join(right, on=key_or_keys) + pd.merge(left, right, left_on=key_or_keys, right_index=True, + how='left', sort=False) + +Obviously you can choose whichever form you find more convenient. For +many-to-one joins (where one of the ``DataFrame``'s is already indexed by the +join key), using ``join`` may be more convenient. Here is a simple example: + +.. ipython:: python + + left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], + 'B': ['B0', 'B1', 'B2', 'B3'], + 'key': ['K0', 'K1', 'K0', 'K1']}) + + right = pd.DataFrame({'C': ['C0', 'C1'], + 'D': ['D0', 'D1']}, + index=['K0', 'K1']) + + result = left.join(right, on='key') + +.. ipython:: python + :suppress: + + @savefig merging_join_key_columns.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. ipython:: python + + result = pd.merge(left, right, left_on='key', right_index=True, + how='left', sort=False); + +.. ipython:: python + :suppress: + + @savefig merging_merge_key_columns.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. _merging.multikey_join: + +To join on multiple keys, the passed DataFrame must have a ``MultiIndex``: + +.. ipython:: python + + left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], + 'B': ['B0', 'B1', 'B2', 'B3'], + 'key1': ['K0', 'K0', 'K1', 'K2'], + 'key2': ['K0', 'K1', 'K0', 'K1']}) + + index = pd.MultiIndex.from_tuples([('K0', 'K0'), ('K1', 'K0'), + ('K2', 'K0'), ('K2', 'K1')]) + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3']}, + index=index) + +Now this can be joined by passing the two key column names: + +.. ipython:: python + + result = left.join(right, on=['key1', 'key2']) + +.. ipython:: python + :suppress: + + @savefig merging_join_multikeys.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. _merging.df_inner_join: + +The default for ``DataFrame.join`` is to perform a left join (essentially a +"VLOOKUP" operation, for Excel users), which uses only the keys found in the +calling DataFrame. Other join types, for example inner join, can be just as +easily performed: + +.. ipython:: python + + result = left.join(right, on=['key1', 'key2'], how='inner') + +.. ipython:: python + :suppress: + + @savefig merging_join_multikeys_inner.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +As you can see, this drops any rows where there was no match. + +.. _merging.join_on_mi: + +Joining a single Index to a MultiIndex +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can join a singly-indexed ``DataFrame`` with a level of a MultiIndexed ``DataFrame``. +The level will match on the name of the index of the singly-indexed frame against +a level name of the MultiIndexed frame. + +.. ipython:: python + + left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], + 'B': ['B0', 'B1', 'B2']}, + index=pd.Index(['K0', 'K1', 'K2'], name='key')) + + index = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'), + ('K2', 'Y2'), ('K2', 'Y3')], + names=['key', 'Y']) + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3']}, + index=index) + + result = left.join(right, how='inner') + +.. ipython:: python + :suppress: + + @savefig merging_join_multiindex_inner.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +This is equivalent but less verbose and more memory efficient / faster than this. + +.. ipython:: python + + result = pd.merge(left.reset_index(), right.reset_index(), + on=['key'], how='inner').set_index(['key','Y']) + +.. ipython:: python + :suppress: + + @savefig merging_merge_multiindex_alternative.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. _merging.join_with_two_multi_indexes: + +Joining with two MultiIndexes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is supported in a limited way, provided that the index for the right +argument is completely used in the join, and is a subset of the indices in +the left argument, as in this example: + +.. ipython:: python + + leftindex = pd.MultiIndex.from_product([list('abc'), list('xy'), [1, 2]], + names=['abc', 'xy', 'num']) + left = pd.DataFrame({'v1': range(12)}, index=leftindex) + left + + rightindex = pd.MultiIndex.from_product([list('abc'), list('xy')], + names=['abc', 'xy']) + right = pd.DataFrame({'v2': [100 * i for i in range(1, 7)]}, index=rightindex) + right + + left.join(right, on=['abc', 'xy'], how='inner') + +If that condition is not satisfied, a join with two multi-indexes can be +done using the following code. + +.. ipython:: python + + leftindex = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'), + ('K1', 'X2')], + names=['key', 'X']) + left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], + 'B': ['B0', 'B1', 'B2']}, + index=leftindex) + + rightindex = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'), + ('K2', 'Y2'), ('K2', 'Y3')], + names=['key', 'Y']) + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3']}, + index=rightindex) + + result = pd.merge(left.reset_index(), right.reset_index(), + on=['key'], how='inner').set_index(['key', 'X', 'Y']) + +.. ipython:: python + :suppress: + + @savefig merging_merge_two_multiindex.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. _merging.merge_on_columns_and_levels: + +Merging on a combination of columns and index levels +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 0.23 + +Strings passed as the ``on``, ``left_on``, and ``right_on`` parameters +may refer to either column names or index level names. This enables merging +``DataFrame`` instances on a combination of index levels and columns without +resetting indexes. + +.. ipython:: python + + left_index = pd.Index(['K0', 'K0', 'K1', 'K2'], name='key1') + + left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], + 'B': ['B0', 'B1', 'B2', 'B3'], + 'key2': ['K0', 'K1', 'K0', 'K1']}, + index=left_index) + + right_index = pd.Index(['K0', 'K1', 'K2', 'K2'], name='key1') + + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3'], + 'key2': ['K0', 'K0', 'K0', 'K1']}, + index=right_index) + + result = left.merge(right, on=['key1', 'key2']) + +.. ipython:: python + :suppress: + + @savefig merge_on_index_and_column.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. note:: + + When DataFrames are merged on a string that matches an index level in both + frames, the index level is preserved as an index level in the resulting + DataFrame. + +.. note:: + When DataFrames are merged using only some of the levels of a `MultiIndex`, + the extra levels will be dropped from the resulting merge. In order to + preserve those levels, use ``reset_index`` on those level names to move + those levels to columns prior to doing the merge. + +.. note:: + + If a string matches both a column name and an index level name, then a + warning is issued and the column takes precedence. This will result in an + ambiguity error in a future version. + +Overlapping value columns +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The merge ``suffixes`` argument takes a tuple of list of strings to append to +overlapping column names in the input ``DataFrame``\ s to disambiguate the result +columns: + +.. ipython:: python + + left = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'v': [1, 2, 3]}) + right = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'v': [4, 5, 6]}) + + result = pd.merge(left, right, on='k') + +.. ipython:: python + :suppress: + + @savefig merging_merge_overlapped.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. ipython:: python + + result = pd.merge(left, right, on='k', suffixes=['_l', '_r']) + +.. ipython:: python + :suppress: + + @savefig merging_merge_overlapped_suffix.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +:meth:`DataFrame.join` has ``lsuffix`` and ``rsuffix`` arguments which behave +similarly. + +.. ipython:: python + + left = left.set_index('k') + right = right.set_index('k') + result = left.join(right, lsuffix='_l', rsuffix='_r') + +.. ipython:: python + :suppress: + + @savefig merging_merge_overlapped_multi_suffix.png + p.plot([left, right], result, + labels=['left', 'right'], vertical=False); + plt.close('all'); + +.. _merging.multiple_join: + +Joining multiple DataFrames +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A list or tuple of ``DataFrames`` can also be passed to :meth:`~DataFrame.join` +to join them together on their indexes. + +.. ipython:: python + + right2 = pd.DataFrame({'v': [7, 8, 9]}, index=['K1', 'K1', 'K2']) + result = left.join([right, right2]) + +.. ipython:: python + :suppress: + + @savefig merging_join_multi_df.png + p.plot([left, right, right2], result, + labels=['left', 'right', 'right2'], vertical=False); + plt.close('all'); + +.. _merging.combine_first.update: + +Merging together values within Series or DataFrame columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Another fairly common situation is to have two like-indexed (or similarly +indexed) ``Series`` or ``DataFrame`` objects and wanting to "patch" values in +one object from values for matching indices in the other. Here is an example: + +.. ipython:: python + + df1 = pd.DataFrame([[np.nan, 3., 5.], [-4.6, np.nan, np.nan], + [np.nan, 7., np.nan]]) + df2 = pd.DataFrame([[-42.6, np.nan, -8.2], [-5., 1.6, 4]], + index=[1, 2]) + +For this, use the :meth:`~DataFrame.combine_first` method: + +.. ipython:: python + + result = df1.combine_first(df2) + +.. ipython:: python + :suppress: + + @savefig merging_combine_first.png + p.plot([df1, df2], result, + labels=['df1', 'df2'], vertical=False); + plt.close('all'); + +Note that this method only takes values from the right ``DataFrame`` if they are +missing in the left ``DataFrame``. A related method, :meth:`~DataFrame.update`, +alters non-NA values in place: + +.. ipython:: python + :suppress: + + df1_copy = df1.copy() + +.. ipython:: python + + df1.update(df2) + +.. ipython:: python + :suppress: + + @savefig merging_update.png + p.plot([df1_copy, df2], df1, + labels=['df1', 'df2'], vertical=False); + plt.close('all'); + +.. _merging.time_series: + +Timeseries friendly merging +--------------------------- + +.. _merging.merge_ordered: + +Merging ordered data +~~~~~~~~~~~~~~~~~~~~ + +A :func:`merge_ordered` function allows combining time series and other +ordered data. In particular it has an optional ``fill_method`` keyword to +fill/interpolate missing data: + +.. ipython:: python + + left = pd.DataFrame({'k': ['K0', 'K1', 'K1', 'K2'], + 'lv': [1, 2, 3, 4], + 's': ['a', 'b', 'c', 'd']}) + + right = pd.DataFrame({'k': ['K1', 'K2', 'K4'], + 'rv': [1, 2, 3]}) + + pd.merge_ordered(left, right, fill_method='ffill', left_by='s') + +.. _merging.merge_asof: + +Merging asof +~~~~~~~~~~~~ + +A :func:`merge_asof` is similar to an ordered left-join except that we match on +nearest key rather than equal keys. For each row in the ``left`` ``DataFrame``, +we select the last row in the ``right`` ``DataFrame`` whose ``on`` key is less +than the left's key. Both DataFrames must be sorted by the key. + +Optionally an asof merge can perform a group-wise merge. This matches the +``by`` key equally, in addition to the nearest match on the ``on`` key. + +For example; we might have ``trades`` and ``quotes`` and we want to ``asof`` +merge them. + +.. ipython:: python + + trades = pd.DataFrame({ + 'time': pd.to_datetime(['20160525 13:30:00.023', + '20160525 13:30:00.038', + '20160525 13:30:00.048', + '20160525 13:30:00.048', + '20160525 13:30:00.048']), + 'ticker': ['MSFT', 'MSFT', + 'GOOG', 'GOOG', 'AAPL'], + 'price': [51.95, 51.95, + 720.77, 720.92, 98.00], + 'quantity': [75, 155, + 100, 100, 100]}, + columns=['time', 'ticker', 'price', 'quantity']) + + quotes = pd.DataFrame({ + 'time': pd.to_datetime(['20160525 13:30:00.023', + '20160525 13:30:00.023', + '20160525 13:30:00.030', + '20160525 13:30:00.041', + '20160525 13:30:00.048', + '20160525 13:30:00.049', + '20160525 13:30:00.072', + '20160525 13:30:00.075']), + 'ticker': ['GOOG', 'MSFT', 'MSFT', + 'MSFT', 'GOOG', 'AAPL', 'GOOG', + 'MSFT'], + 'bid': [720.50, 51.95, 51.97, 51.99, + 720.50, 97.99, 720.50, 52.01], + 'ask': [720.93, 51.96, 51.98, 52.00, + 720.93, 98.01, 720.88, 52.03]}, + columns=['time', 'ticker', 'bid', 'ask']) + +.. ipython:: python + + trades + quotes + +By default we are taking the asof of the quotes. + +.. ipython:: python + + pd.merge_asof(trades, quotes, + on='time', + by='ticker') + +We only asof within ``2ms`` between the quote time and the trade time. + +.. ipython:: python + + pd.merge_asof(trades, quotes, + on='time', + by='ticker', + tolerance=pd.Timedelta('2ms')) + +We only asof within ``10ms`` between the quote time and the trade time and we +exclude exact matches on time. Note that though we exclude the exact matches +(of the quotes), prior quotes **do** propagate to that point in time. + +.. ipython:: python + + pd.merge_asof(trades, quotes, + on='time', + by='ticker', + tolerance=pd.Timedelta('10ms'), + allow_exact_matches=False) diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst new file mode 100644 index 00000000..85f063f1 --- /dev/null +++ b/doc/source/user_guide/missing_data.rst @@ -0,0 +1,971 @@ +.. _missing_data: + +{{ header }} + +************************* +Working with missing data +************************* + +In this section, we will discuss missing (also referred to as NA) values in +pandas. + +.. note:: + + The choice of using ``NaN`` internally to denote missing data was largely + for simplicity and performance reasons. + Starting from pandas 1.0, some optional data types start experimenting + with a native ``NA`` scalar using a mask-based approach. See + :ref:`here ` for more. + +See the :ref:`cookbook` for some advanced strategies. + +Values considered "missing" +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As data comes in many shapes and forms, pandas aims to be flexible with regard +to handling missing data. While ``NaN`` is the default missing value marker for +reasons of computational speed and convenience, we need to be able to easily +detect this value with data of different types: floating point, integer, +boolean, and general object. In many cases, however, the Python ``None`` will +arise and we wish to also consider that "missing" or "not available" or "NA". + +.. note:: + + If you want to consider ``inf`` and ``-inf`` to be "NA" in computations, + you can set ``pandas.options.mode.use_inf_as_na = True``. + +.. _missing.isna: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 3), index=['a', 'c', 'e', 'f', 'h'], + columns=['one', 'two', 'three']) + df['four'] = 'bar' + df['five'] = df['one'] > 0 + df + df2 = df.reindex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']) + df2 + +To make detecting missing values easier (and across different array dtypes), +pandas provides the :func:`isna` and +:func:`notna` functions, which are also methods on +Series and DataFrame objects: + +.. ipython:: python + + df2['one'] + pd.isna(df2['one']) + df2['four'].notna() + df2.isna() + +.. warning:: + + One has to be mindful that in Python (and NumPy), the ``nan's`` don't compare equal, but ``None's`` **do**. + Note that pandas/NumPy uses the fact that ``np.nan != np.nan``, and treats ``None`` like ``np.nan``. + + .. ipython:: python + + None == None # noqa: E711 + np.nan == np.nan + + So as compared to above, a scalar equality comparison versus a ``None/np.nan`` doesn't provide useful information. + + .. ipython:: python + + df2['one'] == np.nan + +Integer dtypes and missing data +------------------------------- + +Because ``NaN`` is a float, a column of integers with even one missing values +is cast to floating-point dtype (see :ref:`gotchas.intna` for more). Pandas +provides a nullable integer array, which can be used by explicitly requesting +the dtype: + +.. ipython:: python + + pd.Series([1, 2, np.nan, 4], dtype=pd.Int64Dtype()) + +Alternatively, the string alias ``dtype='Int64'`` (note the capital ``"I"``) can be +used. + +See :ref:`integer_na` for more. + +Datetimes +--------- + +For datetime64[ns] types, ``NaT`` represents missing values. This is a pseudo-native +sentinel value that can be represented by NumPy in a singular dtype (datetime64[ns]). +pandas objects provide compatibility between ``NaT`` and ``NaN``. + +.. ipython:: python + + df2 = df.copy() + df2['timestamp'] = pd.Timestamp('20120101') + df2 + df2.loc[['a', 'c', 'h'], ['one', 'timestamp']] = np.nan + df2 + df2.dtypes.value_counts() + +.. _missing.inserting: + +Inserting missing data +~~~~~~~~~~~~~~~~~~~~~~ + +You can insert missing values by simply assigning to containers. The +actual missing value used will be chosen based on the dtype. + +For example, numeric containers will always use ``NaN`` regardless of +the missing value type chosen: + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s.loc[0] = None + s + +Likewise, datetime containers will always use ``NaT``. + +For object containers, pandas will use the value given: + +.. ipython:: python + + s = pd.Series(["a", "b", "c"]) + s.loc[0] = None + s.loc[1] = np.nan + s + +.. _missing_data.calculations: + +Calculations with missing data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Missing values propagate naturally through arithmetic operations between pandas +objects. + +.. ipython:: python + :suppress: + + df = df2.loc[:, ['one', 'two', 'three']] + a = df2.loc[df2.index[:5], ['one', 'two']].fillna(method='pad') + b = df2.loc[df2.index[:5], ['one', 'two', 'three']] + +.. ipython:: python + + a + b + a + b + +The descriptive statistics and computational methods discussed in the +:ref:`data structure overview ` (and listed :ref:`here +` and :ref:`here `) are all written to +account for missing data. For example: + +* When summing data, NA (missing) values will be treated as zero. +* If the data are all NA, the result will be 0. +* Cumulative methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod` ignore NA values by default, but preserve them in the resulting arrays. To override this behaviour and include NA values, use ``skipna=False``. + +.. ipython:: python + + df + df['one'].sum() + df.mean(1) + df.cumsum() + df.cumsum(skipna=False) + + +.. _missing_data.numeric_sum: + +Sum/prod of empties/nans +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + + This behavior is now standard as of v0.22.0 and is consistent with the default in ``numpy``; previously sum/prod of all-NA or empty Series/DataFrames would return NaN. + See :ref:`v0.22.0 whatsnew ` for more. + +The sum of an empty or all-NA Series or column of a DataFrame is 0. + +.. ipython:: python + + pd.Series([np.nan]).sum() + + pd.Series([], dtype="float64").sum() + +The product of an empty or all-NA Series or column of a DataFrame is 1. + +.. ipython:: python + + pd.Series([np.nan]).prod() + + pd.Series([], dtype="float64").prod() + + +NA values in GroupBy +~~~~~~~~~~~~~~~~~~~~ + +NA groups in GroupBy are automatically excluded. This behavior is consistent +with R, for example: + +.. ipython:: python + + df + df.groupby('one').mean() + +See the groupby section :ref:`here ` for more information. + +Cleaning / filling missing data +-------------------------------- + +pandas objects are equipped with various data manipulation methods for dealing +with missing data. + +.. _missing_data.fillna: + +Filling missing values: fillna +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~DataFrame.fillna` can "fill in" NA values with non-NA data in a couple +of ways, which we illustrate: + +**Replace NA with a scalar value** + +.. ipython:: python + + df2 + df2.fillna(0) + df2['one'].fillna('missing') + +**Fill gaps forward or backward** + +Using the same filling arguments as :ref:`reindexing `, we +can propagate non-NA values forward or backward: + +.. ipython:: python + + df + df.fillna(method='pad') + +.. _missing_data.fillna.limit: + +**Limit the amount of filling** + +If we only want consecutive gaps filled up to a certain number of data points, +we can use the `limit` keyword: + +.. ipython:: python + :suppress: + + df.iloc[2:4, :] = np.nan + +.. ipython:: python + + df + df.fillna(method='pad', limit=1) + +To remind you, these are the available filling methods: + +.. csv-table:: + :header: "Method", "Action" + :widths: 30, 50 + + pad / ffill, Fill values forward + bfill / backfill, Fill values backward + +With time series data, using pad/ffill is extremely common so that the "last +known value" is available at every time point. + +:meth:`~DataFrame.ffill` is equivalent to ``fillna(method='ffill')`` +and :meth:`~DataFrame.bfill` is equivalent to ``fillna(method='bfill')`` + +.. _missing_data.PandasObject: + +Filling with a PandasObject +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can also fillna using a dict or Series that is alignable. The labels of the dict or index of the Series +must match the columns of the frame you wish to fill. The +use case of this is to fill a DataFrame with the mean of that column. + +.. ipython:: python + + dff = pd.DataFrame(np.random.randn(10, 3), columns=list('ABC')) + dff.iloc[3:5, 0] = np.nan + dff.iloc[4:6, 1] = np.nan + dff.iloc[5:8, 2] = np.nan + dff + + dff.fillna(dff.mean()) + dff.fillna(dff.mean()['B':'C']) + +Same result as above, but is aligning the 'fill' value which is +a Series in this case. + +.. ipython:: python + + dff.where(pd.notna(dff), dff.mean(), axis='columns') + + +.. _missing_data.dropna: + +Dropping axis labels with missing data: dropna +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You may wish to simply exclude labels from a data set which refer to missing +data. To do this, use :meth:`~DataFrame.dropna`: + +.. ipython:: python + :suppress: + + df['two'] = df['two'].fillna(0) + df['three'] = df['three'].fillna(0) + +.. ipython:: python + + df + df.dropna(axis=0) + df.dropna(axis=1) + df['one'].dropna() + +An equivalent :meth:`~Series.dropna` is available for Series. +DataFrame.dropna has considerably more options than Series.dropna, which can be +examined :ref:`in the API `. + +.. _missing_data.interpolate: + +Interpolation +~~~~~~~~~~~~~ + +.. versionadded:: 0.23.0 + + The ``limit_area`` keyword argument was added. + +Both Series and DataFrame objects have :meth:`~DataFrame.interpolate` +that, by default, performs linear interpolation at missing data points. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + idx = pd.date_range('1/1/2000', periods=100, freq='BM') + ts = pd.Series(np.random.randn(100), index=idx) + ts[1:5] = np.nan + ts[20:30] = np.nan + ts[60:80] = np.nan + ts = ts.cumsum() + +.. ipython:: python + + ts + ts.count() + @savefig series_before_interpolate.png + ts.plot() + +.. ipython:: python + + ts.interpolate() + ts.interpolate().count() + + @savefig series_interpolate.png + ts.interpolate().plot() + +Index aware interpolation is available via the ``method`` keyword: + +.. ipython:: python + :suppress: + + ts2 = ts[[0, 1, 30, 60, 99]] + +.. ipython:: python + + ts2 + ts2.interpolate() + ts2.interpolate(method='time') + +For a floating-point index, use ``method='values'``: + +.. ipython:: python + :suppress: + + idx = [0., 1., 10.] + ser = pd.Series([0., np.nan, 10.], idx) + +.. ipython:: python + + ser + ser.interpolate() + ser.interpolate(method='values') + +You can also interpolate with a DataFrame: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2.1, np.nan, 4.7, 5.6, 6.8], + 'B': [.25, np.nan, np.nan, 4, 12.2, 14.4]}) + df + df.interpolate() + +The ``method`` argument gives access to fancier interpolation methods. +If you have scipy_ installed, you can pass the name of a 1-d interpolation routine to ``method``. +You'll want to consult the full scipy interpolation documentation_ and reference guide_ for details. +The appropriate interpolation method will depend on the type of data you are working with. + +* If you are dealing with a time series that is growing at an increasing rate, + ``method='quadratic'`` may be appropriate. +* If you have values approximating a cumulative distribution function, + then ``method='pchip'`` should work well. +* To fill missing values with goal of smooth plotting, consider ``method='akima'``. + +.. warning:: + + These methods require ``scipy``. + +.. ipython:: python + + df.interpolate(method='barycentric') + + df.interpolate(method='pchip') + + df.interpolate(method='akima') + +When interpolating via a polynomial or spline approximation, you must also specify +the degree or order of the approximation: + +.. ipython:: python + + df.interpolate(method='spline', order=2) + + df.interpolate(method='polynomial', order=2) + +Compare several methods: + +.. ipython:: python + + np.random.seed(2) + + ser = pd.Series(np.arange(1, 10.1, .25) ** 2 + np.random.randn(37)) + missing = np.array([4, 13, 14, 15, 16, 17, 18, 20, 29]) + ser[missing] = np.nan + methods = ['linear', 'quadratic', 'cubic'] + + df = pd.DataFrame({m: ser.interpolate(method=m) for m in methods}) + @savefig compare_interpolations.png + df.plot() + +Another use case is interpolation at *new* values. +Suppose you have 100 observations from some distribution. And let's suppose +that you're particularly interested in what's happening around the middle. +You can mix pandas' ``reindex`` and ``interpolate`` methods to interpolate +at the new values. + +.. ipython:: python + + ser = pd.Series(np.sort(np.random.uniform(size=100))) + + # interpolate at new_index + new_index = ser.index | pd.Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75]) + interp_s = ser.reindex(new_index).interpolate(method='pchip') + interp_s[49:51] + +.. _scipy: http://www.scipy.org +.. _documentation: http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation +.. _guide: http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html + +.. _missing_data.interp_limits: + +Interpolation limits +-------------------- + +Like other pandas fill methods, :meth:`~DataFrame.interpolate` accepts a ``limit`` keyword +argument. Use this argument to limit the number of consecutive ``NaN`` values +filled since the last valid observation: + +.. ipython:: python + + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, + np.nan, 13, np.nan, np.nan]) + ser + + # fill all consecutive values in a forward direction + ser.interpolate() + + # fill one consecutive value in a forward direction + ser.interpolate(limit=1) + +By default, ``NaN`` values are filled in a ``forward`` direction. Use +``limit_direction`` parameter to fill ``backward`` or from ``both`` directions. + +.. ipython:: python + + # fill one consecutive value backwards + ser.interpolate(limit=1, limit_direction='backward') + + # fill one consecutive value in both directions + ser.interpolate(limit=1, limit_direction='both') + + # fill all consecutive values in both directions + ser.interpolate(limit_direction='both') + +By default, ``NaN`` values are filled whether they are inside (surrounded by) +existing valid values, or outside existing valid values. Introduced in v0.23 +the ``limit_area`` parameter restricts filling to either inside or outside values. + +.. ipython:: python + + # fill one consecutive inside value in both directions + ser.interpolate(limit_direction='both', limit_area='inside', limit=1) + + # fill all consecutive outside values backward + ser.interpolate(limit_direction='backward', limit_area='outside') + + # fill all consecutive outside values in both directions + ser.interpolate(limit_direction='both', limit_area='outside') + +.. _missing_data.replace: + +Replacing generic values +~~~~~~~~~~~~~~~~~~~~~~~~ +Often times we want to replace arbitrary values with other values. + +:meth:`~Series.replace` in Series and :meth:`~DataFrame.replace` in DataFrame provides an efficient yet +flexible way to perform such replacements. + +For a Series, you can replace a single value or a list of values by another +value: + +.. ipython:: python + + ser = pd.Series([0., 1., 2., 3., 4.]) + + ser.replace(0, 5) + +You can replace a list of values by a list of other values: + +.. ipython:: python + + ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) + +You can also specify a mapping dict: + +.. ipython:: python + + ser.replace({0: 10, 1: 100}) + +For a DataFrame, you can specify individual values by column: + +.. ipython:: python + + df = pd.DataFrame({'a': [0, 1, 2, 3, 4], 'b': [5, 6, 7, 8, 9]}) + + df.replace({'a': 0, 'b': 5}, 100) + +Instead of replacing with specified values, you can treat all given values as +missing and interpolate over them: + +.. ipython:: python + + ser.replace([1, 2, 3], method='pad') + +.. _missing_data.replace_expression: + +String/regular expression replacement +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + Python strings prefixed with the ``r`` character such as ``r'hello world'`` + are so-called "raw" strings. They have different semantics regarding + backslashes than strings without this prefix. Backslashes in raw strings + will be interpreted as an escaped backslash, e.g., ``r'\' == '\\'``. You + should `read about them + `__ + if this is unclear. + +Replace the '.' with ``NaN`` (str -> str): + +.. ipython:: python + + d = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']} + df = pd.DataFrame(d) + df.replace('.', np.nan) + +Now do it with a regular expression that removes surrounding whitespace +(regex -> regex): + +.. ipython:: python + + df.replace(r'\s*\.\s*', np.nan, regex=True) + +Replace a few different values (list -> list): + +.. ipython:: python + + df.replace(['a', '.'], ['b', np.nan]) + +list of regex -> list of regex: + +.. ipython:: python + + df.replace([r'\.', r'(a)'], ['dot', r'\1stuff'], regex=True) + +Only search in column ``'b'`` (dict -> dict): + +.. ipython:: python + + df.replace({'b': '.'}, {'b': np.nan}) + +Same as the previous example, but use a regular expression for +searching instead (dict of regex -> dict): + +.. ipython:: python + + df.replace({'b': r'\s*\.\s*'}, {'b': np.nan}, regex=True) + +You can pass nested dictionaries of regular expressions that use ``regex=True``: + +.. ipython:: python + + df.replace({'b': {'b': r''}}, regex=True) + +Alternatively, you can pass the nested dictionary like so: + +.. ipython:: python + + df.replace(regex={'b': {r'\s*\.\s*': np.nan}}) + +You can also use the group of a regular expression match when replacing (dict +of regex -> dict of regex), this works for lists as well. + +.. ipython:: python + + df.replace({'b': r'\s*(\.)\s*'}, {'b': r'\1ty'}, regex=True) + +You can pass a list of regular expressions, of which those that match +will be replaced with a scalar (list of regex -> regex). + +.. ipython:: python + + df.replace([r'\s*\.\s*', r'a|b'], np.nan, regex=True) + +All of the regular expression examples can also be passed with the +``to_replace`` argument as the ``regex`` argument. In this case the ``value`` +argument must be passed explicitly by name or ``regex`` must be a nested +dictionary. The previous example, in this case, would then be: + +.. ipython:: python + + df.replace(regex=[r'\s*\.\s*', r'a|b'], value=np.nan) + +This can be convenient if you do not want to pass ``regex=True`` every time you +want to use a regular expression. + +.. note:: + + Anywhere in the above ``replace`` examples that you see a regular expression + a compiled regular expression is valid as well. + +Numeric replacement +~~~~~~~~~~~~~~~~~~~ + +:meth:`~DataFrame.replace` is similar to :meth:`~DataFrame.fillna`. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 2)) + df[np.random.rand(df.shape[0]) > 0.5] = 1.5 + df.replace(1.5, np.nan) + +Replacing more than one value is possible by passing a list. + +.. ipython:: python + + df00 = df.iloc[0, 0] + df.replace([1.5, df00], [np.nan, 'a']) + df[1].dtype + +You can also operate on the DataFrame in place: + +.. ipython:: python + + df.replace(1.5, np.nan, inplace=True) + +.. warning:: + + When replacing multiple ``bool`` or ``datetime64`` objects, the first + argument to ``replace`` (``to_replace``) must match the type of the value + being replaced. For example, + + .. code-block:: python + + >>> s = pd.Series([True, False, True]) + >>> s.replace({'a string': 'new value', True: False}) # raises + TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' + + will raise a ``TypeError`` because one of the ``dict`` keys is not of the + correct type for replacement. + + However, when replacing a *single* object such as, + + .. ipython:: python + + s = pd.Series([True, False, True]) + s.replace('a string', 'another string') + + the original ``NDFrame`` object will be returned untouched. We're working on + unifying this API, but for backwards compatibility reasons we cannot break + the latter behavior. See :issue:`6354` for more details. + +Missing data casting rules and indexing +--------------------------------------- + +While pandas supports storing arrays of integer and boolean type, these types +are not capable of storing missing data. Until we can switch to using a native +NA type in NumPy, we've established some "casting rules". When a reindexing +operation introduces missing data, the Series will be cast according to the +rules introduced in the table below. + +.. csv-table:: + :header: "data type", "Cast to" + :widths: 40, 40 + + integer, float + boolean, object + float, no cast + object, no cast + +For example: + +.. ipython:: python + + s = pd.Series(np.random.randn(5), index=[0, 2, 4, 6, 7]) + s > 0 + (s > 0).dtype + crit = (s > 0).reindex(list(range(8))) + crit + crit.dtype + +Ordinarily NumPy will complain if you try to use an object array (even if it +contains boolean values) instead of a boolean array to get or set values from +an ndarray (e.g. selecting values based on some criteria). If a boolean vector +contains NAs, an exception will be generated: + +.. ipython:: python + :okexcept: + + reindexed = s.reindex(list(range(8))).fillna(0) + reindexed[crit] + +However, these can be filled in using :meth:`~DataFrame.fillna` and it will work fine: + +.. ipython:: python + + reindexed[crit.fillna(False)] + reindexed[crit.fillna(True)] + +Pandas provides a nullable integer dtype, but you must explicitly request it +when creating the series or column. Notice that we use a capital "I" in +the ``dtype="Int64"``. + +.. ipython:: python + + s = pd.Series([0, 1, np.nan, 3, 4], dtype="Int64") + s + +See :ref:`integer_na` for more. + + +.. _missing_data.NA: + +Experimental ``NA`` scalar to denote missing values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + + Experimental: the behaviour of ``pd.NA`` can still change without warning. + +.. versionadded:: 1.0.0 + +Starting from pandas 1.0, an experimental ``pd.NA`` value (singleton) is +available to represent scalar missing values. At this moment, it is used in +the nullable :doc:`integer `, boolean and +:ref:`dedicated string ` data types as the missing value indicator. + +The goal of ``pd.NA`` is provide a "missing" indicator that can be used +consistently across data types (instead of ``np.nan``, ``None`` or ``pd.NaT`` +depending on the data type). + +For example, when having missing values in a Series with the nullable integer +dtype, it will use ``pd.NA``: + +.. ipython:: python + + s = pd.Series([1, 2, None], dtype="Int64") + s + s[2] + s[2] is pd.NA + +Currently, pandas does not yet use those data types by default (when creating +a DataFrame or Series, or when reading in data), so you need to specify +the dtype explicitly. An easy way to convert to those dtypes is explained +:ref:`here `. + +Propagation in arithmetic and comparison operations +--------------------------------------------------- + +In general, missing values *propagate* in operations involving ``pd.NA``. When +one of the operands is unknown, the outcome of the operation is also unknown. + +For example, ``pd.NA`` propagates in arithmetic operations, similarly to +``np.nan``: + +.. ipython:: python + + pd.NA + 1 + "a" * pd.NA + +There are a few special cases when the result is known, even when one of the +operands is ``NA``. + +.. ipython:: python + + pd.NA ** 0 + 1 ** pd.NA + +In equality and comparison operations, ``pd.NA`` also propagates. This deviates +from the behaviour of ``np.nan``, where comparisons with ``np.nan`` always +return ``False``. + +.. ipython:: python + + pd.NA == 1 + pd.NA == pd.NA + pd.NA < 2.5 + +To check if a value is equal to ``pd.NA``, the :func:`isna` function can be +used: + +.. ipython:: python + + pd.isna(pd.NA) + +An exception on this basic propagation rule are *reductions* (such as the +mean or the minimum), where pandas defaults to skipping missing values. See +:ref:`above ` for more. + +Logical operations +------------------ + +For logical operations, ``pd.NA`` follows the rules of the +`three-valued logic `__ (or +*Kleene logic*, similarly to R, SQL and Julia). This logic means to only +propagate missing values when it is logically required. + +For example, for the logical "or" operation (``|``), if one of the operands +is ``True``, we already know the result will be ``True``, regardless of the +other value (so regardless the missing value would be ``True`` or ``False``). +In this case, ``pd.NA`` does not propagate: + +.. ipython:: python + + True | False + True | pd.NA + pd.NA | True + +On the other hand, if one of the operands is ``False``, the result depends +on the value of the other operand. Therefore, in this case ``pd.NA`` +propagates: + +.. ipython:: python + + False | True + False | False + False | pd.NA + +The behaviour of the logical "and" operation (``&``) can be derived using +similar logic (where now ``pd.NA`` will not propagate if one of the operands +is already ``False``): + +.. ipython:: python + + False & True + False & False + False & pd.NA + +.. ipython:: python + + True & True + True & False + True & pd.NA + + +``NA`` in a boolean context +--------------------------- + +Since the actual value of an NA is unknown, it is ambiguous to convert NA +to a boolean value. The following raises an error: + +.. ipython:: python + :okexcept: + + bool(pd.NA) + +This also means that ``pd.NA`` cannot be used in a context where it is +evaluated to a boolean, such as ``if condition: ...`` where ``condition`` can +potentially be ``pd.NA``. In such cases, :func:`isna` can be used to check +for ``pd.NA`` or ``condition`` being ``pd.NA`` can be avoided, for example by +filling missing values beforehand. + +A similar situation occurs when using Series or DataFrame objects in ``if`` +statements, see :ref:`gotchas.truth`. + +NumPy ufuncs +------------ + +:attr:`pandas.NA` implements NumPy's ``__array_ufunc__`` protocol. Most ufuncs +work with ``NA``, and generally return ``NA``: + +.. ipython:: python + + np.log(pd.NA) + np.add(pd.NA, 1) + +.. warning:: + + Currently, ufuncs involving an ndarray and ``NA`` will return an + object-dtype filled with NA values. + + .. ipython:: python + + a = np.array([1, 2, 3]) + np.greater(a, pd.NA) + + The return type here may change to return a different array type + in the future. + +See :ref:`dsintro.numpy_interop` for more on ufuncs. + +.. _missing_data.NA.conversion: + +Conversion +---------- + +If you have a DataFrame or Series using traditional types that have missing data +represented using ``np.nan``, there are convenience methods +:meth:`~Series.convert_dtypes` in Series and :meth:`~DataFrame.convert_dtypes` +in DataFrame that can convert data to use the newer dtypes for integers, strings and +booleans listed :ref:`here `. This is especially helpful after reading +in data sets when letting the readers such as :meth:`read_csv` and :meth:`read_excel` +infer default dtypes. + +In this example, while the dtypes of all columns are changed, we show the results for +the first 10 columns. + +.. ipython:: python + + bb = pd.read_csv('data/baseball.csv', index_col='id') + bb[bb.columns[:10]].dtypes + +.. ipython:: python + + bbn = bb.convert_dtypes() + bbn[bbn.columns[:10]].dtypes diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst new file mode 100644 index 00000000..5817efb3 --- /dev/null +++ b/doc/source/user_guide/options.rst @@ -0,0 +1,578 @@ +.. _options: + +{{ header }} + +******************** +Options and settings +******************** + +Overview +-------- +pandas has an options system that lets you customize some aspects of its behaviour, +display-related options being those the user is most likely to adjust. + +Options have a full "dotted-style", case-insensitive name (e.g. ``display.max_rows``). +You can get/set options directly as attributes of the top-level ``options`` attribute: + +.. ipython:: python + + import pandas as pd + pd.options.display.max_rows + pd.options.display.max_rows = 999 + pd.options.display.max_rows + +The API is composed of 5 relevant functions, available directly from the ``pandas`` +namespace: + +* :func:`~pandas.get_option` / :func:`~pandas.set_option` - get/set the value of a single option. +* :func:`~pandas.reset_option` - reset one or more options to their default value. +* :func:`~pandas.describe_option` - print the descriptions of one or more options. +* :func:`~pandas.option_context` - execute a codeblock with a set of options + that revert to prior settings after execution. + +**Note:** Developers can check out `pandas/core/config.py `_ for more information. + +All of the functions above accept a regexp pattern (``re.search`` style) as an argument, +and so passing in a substring will work - as long as it is unambiguous: + +.. ipython:: python + + pd.get_option("display.max_rows") + pd.set_option("display.max_rows", 101) + pd.get_option("display.max_rows") + pd.set_option("max_r", 102) + pd.get_option("display.max_rows") + + +The following will **not work** because it matches multiple option names, e.g. +``display.max_colwidth``, ``display.max_rows``, ``display.max_columns``: + +.. ipython:: python + :okexcept: + + try: + pd.get_option("column") + except KeyError as e: + print(e) + + +**Note:** Using this form of shorthand may cause your code to break if new options with similar names are added in future versions. + + +You can get a list of available options and their descriptions with ``describe_option``. When called +with no argument ``describe_option`` will print out the descriptions for all available options. + +.. ipython:: python + :suppress: + :okwarning: + + pd.reset_option("all") + +Getting and setting options +--------------------------- + +As described above, :func:`~pandas.get_option` and :func:`~pandas.set_option` +are available from the pandas namespace. To change an option, call +``set_option('option regex', new_value)``. + +.. ipython:: python + + pd.get_option('mode.sim_interactive') + pd.set_option('mode.sim_interactive', True) + pd.get_option('mode.sim_interactive') + +**Note:** The option 'mode.sim_interactive' is mostly used for debugging purposes. + +All options also have a default value, and you can use ``reset_option`` to do just that: + +.. ipython:: python + :suppress: + + pd.reset_option("display.max_rows") + +.. ipython:: python + + pd.get_option("display.max_rows") + pd.set_option("display.max_rows", 999) + pd.get_option("display.max_rows") + pd.reset_option("display.max_rows") + pd.get_option("display.max_rows") + + +It's also possible to reset multiple options at once (using a regex): + +.. ipython:: python + :okwarning: + + pd.reset_option("^display") + + +``option_context`` context manager has been exposed through +the top-level API, allowing you to execute code with given option values. Option values +are restored automatically when you exit the `with` block: + +.. ipython:: python + + with pd.option_context("display.max_rows", 10, "display.max_columns", 5): + print(pd.get_option("display.max_rows")) + print(pd.get_option("display.max_columns")) + print(pd.get_option("display.max_rows")) + print(pd.get_option("display.max_columns")) + + +Setting startup options in Python/IPython environment +----------------------------------------------------- + +Using startup scripts for the Python/IPython environment to import pandas and set options makes working with pandas more efficient. To do this, create a .py or .ipy script in the startup directory of the desired profile. An example where the startup folder is in a default ipython profile can be found at: + +.. code-block:: none + + $IPYTHONDIR/profile_default/startup + +More information can be found in the `ipython documentation +`__. An example startup script for pandas is displayed below: + +.. code-block:: python + + import pandas as pd + pd.set_option('display.max_rows', 999) + pd.set_option('precision', 5) + +.. _options.frequently_used: + +Frequently Used Options +----------------------- +The following is a walk-through of the more frequently used display options. + +``display.max_rows`` and ``display.max_columns`` sets the maximum number +of rows and columns displayed when a frame is pretty-printed. Truncated +lines are replaced by an ellipsis. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(7, 2)) + pd.set_option('max_rows', 7) + df + pd.set_option('max_rows', 5) + df + pd.reset_option('max_rows') + +Once the ``display.max_rows`` is exceeded, the ``display.min_rows`` options +determines how many rows are shown in the truncated repr. + +.. ipython:: python + + pd.set_option('max_rows', 8) + pd.set_option('min_rows', 4) + # below max_rows -> all rows shown + df = pd.DataFrame(np.random.randn(7, 2)) + df + # above max_rows -> only min_rows (4) rows shown + df = pd.DataFrame(np.random.randn(9, 2)) + df + pd.reset_option('max_rows') + pd.reset_option('min_rows') + +``display.expand_frame_repr`` allows for the representation of +dataframes to stretch across pages, wrapped over the full column vs row-wise. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 10)) + pd.set_option('expand_frame_repr', True) + df + pd.set_option('expand_frame_repr', False) + df + pd.reset_option('expand_frame_repr') + +``display.large_repr`` lets you select whether to display dataframes that exceed +``max_columns`` or ``max_rows`` as a truncated frame, or as a summary. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 10)) + pd.set_option('max_rows', 5) + pd.set_option('large_repr', 'truncate') + df + pd.set_option('large_repr', 'info') + df + pd.reset_option('large_repr') + pd.reset_option('max_rows') + +``display.max_colwidth`` sets the maximum width of columns. Cells +of this length or longer will be truncated with an ellipsis. + +.. ipython:: python + + df = pd.DataFrame(np.array([['foo', 'bar', 'bim', 'uncomfortably long string'], + ['horse', 'cow', 'banana', 'apple']])) + pd.set_option('max_colwidth', 40) + df + pd.set_option('max_colwidth', 6) + df + pd.reset_option('max_colwidth') + +``display.max_info_columns`` sets a threshold for when by-column info +will be given. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 10)) + pd.set_option('max_info_columns', 11) + df.info() + pd.set_option('max_info_columns', 5) + df.info() + pd.reset_option('max_info_columns') + +``display.max_info_rows``: ``df.info()`` will usually show null-counts for each column. +For large frames this can be quite slow. ``max_info_rows`` and ``max_info_cols`` +limit this null check only to frames with smaller dimensions then specified. Note that you +can specify the option ``df.info(null_counts=True)`` to override on showing a particular frame. + +.. ipython:: python + + df = pd.DataFrame(np.random.choice([0, 1, np.nan], size=(10, 10))) + df + pd.set_option('max_info_rows', 11) + df.info() + pd.set_option('max_info_rows', 5) + df.info() + pd.reset_option('max_info_rows') + +``display.precision`` sets the output display precision in terms of decimal places. +This is only a suggestion. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 5)) + pd.set_option('precision', 7) + df + pd.set_option('precision', 4) + df + +``display.chop_threshold`` sets at what level pandas rounds to zero when +it displays a Series of DataFrame. This setting does not change the +precision at which the number is stored. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(6, 6)) + pd.set_option('chop_threshold', 0) + df + pd.set_option('chop_threshold', .5) + df + pd.reset_option('chop_threshold') + +``display.colheader_justify`` controls the justification of the headers. +The options are 'right', and 'left'. + +.. ipython:: python + + df = pd.DataFrame(np.array([np.random.randn(6), + np.random.randint(1, 9, 6) * .1, + np.zeros(6)]).T, + columns=['A', 'B', 'C'], dtype='float') + pd.set_option('colheader_justify', 'right') + df + pd.set_option('colheader_justify', 'left') + df + pd.reset_option('colheader_justify') + + + +.. _options.available: + +Available options +----------------- + +======================================= ============ ================================== +Option Default Function +======================================= ============ ================================== +display.chop_threshold None If set to a float value, all float + values smaller then the given + threshold will be displayed as + exactly 0 by repr and friends. +display.colheader_justify right Controls the justification of + column headers. used by DataFrameFormatter. +display.column_space 12 No description available. +display.date_dayfirst False When True, prints and parses dates + with the day first, eg 20/01/2005 +display.date_yearfirst False When True, prints and parses dates + with the year first, eg 2005/01/20 +display.encoding UTF-8 Defaults to the detected encoding + of the console. Specifies the encoding + to be used for strings returned by + to_string, these are generally strings + meant to be displayed on the console. +display.expand_frame_repr True Whether to print out the full DataFrame + repr for wide DataFrames across + multiple lines, `max_columns` is + still respected, but the output will + wrap-around across multiple "pages" + if its width exceeds `display.width`. +display.float_format None The callable should accept a floating + point number and return a string with + the desired format of the number. + This is used in some places like + SeriesFormatter. + See core.format.EngFormatter for an example. +display.large_repr truncate For DataFrames exceeding max_rows/max_cols, + the repr (and HTML repr) can show + a truncated table (the default), + or switch to the view from df.info() + (the behaviour in earlier versions of pandas). + allowable settings, ['truncate', 'info'] +display.latex.repr False Whether to produce a latex DataFrame + representation for jupyter frontends + that support it. +display.latex.escape True Escapes special characters in DataFrames, when + using the to_latex method. +display.latex.longtable False Specifies if the to_latex method of a DataFrame + uses the longtable format. +display.latex.multicolumn True Combines columns when using a MultiIndex +display.latex.multicolumn_format 'l' Alignment of multicolumn labels +display.latex.multirow False Combines rows when using a MultiIndex. + Centered instead of top-aligned, + separated by clines. +display.max_columns 0 or 20 max_rows and max_columns are used + in __repr__() methods to decide if + to_string() or info() is used to + render an object to a string. In + case Python/IPython is running in + a terminal this is set to 0 by default and + pandas will correctly auto-detect + the width of the terminal and switch to + a smaller format in case all columns + would not fit vertically. The IPython + notebook, IPython qtconsole, or IDLE + do not run in a terminal and hence + it is not possible to do correct + auto-detection, in which case the default + is set to 20. 'None' value means unlimited. +display.max_colwidth 50 The maximum width in characters of + a column in the repr of a pandas + data structure. When the column overflows, + a "..." placeholder is embedded in + the output. 'None' value means unlimited. +display.max_info_columns 100 max_info_columns is used in DataFrame.info + method to decide if per column information + will be printed. +display.max_info_rows 1690785 df.info() will usually show null-counts + for each column. For large frames + this can be quite slow. max_info_rows + and max_info_cols limit this null + check only to frames with smaller + dimensions then specified. +display.max_rows 60 This sets the maximum number of rows + pandas should output when printing + out various output. For example, + this value determines whether the + repr() for a dataframe prints out + fully or just a truncated or summary repr. + 'None' value means unlimited. +display.min_rows 10 The numbers of rows to show in a truncated + repr (when `max_rows` is exceeded). Ignored + when `max_rows` is set to None or 0. When set + to None, follows the value of `max_rows`. +display.max_seq_items 100 when pretty-printing a long sequence, + no more then `max_seq_items` will + be printed. If items are omitted, + they will be denoted by the addition + of "..." to the resulting string. + If set to None, the number of items + to be printed is unlimited. +display.memory_usage True This specifies if the memory usage of + a DataFrame should be displayed when the + df.info() method is invoked. +display.multi_sparse True "Sparsify" MultiIndex display (don't + display repeated elements in outer + levels within groups) +display.notebook_repr_html True When True, IPython notebook will + use html representation for + pandas objects (if it is available). +display.pprint_nest_depth 3 Controls the number of nested levels + to process when pretty-printing +display.precision 6 Floating point output precision in + terms of number of places after the + decimal, for regular formatting as well + as scientific notation. Similar to + numpy's ``precision`` print option +display.show_dimensions truncate Whether to print out dimensions + at the end of DataFrame repr. + If 'truncate' is specified, only + print out the dimensions if the + frame is truncated (e.g. not display + all rows and/or columns) +display.width 80 Width of the display in characters. + In case python/IPython is running in + a terminal this can be set to None + and pandas will correctly auto-detect + the width. Note that the IPython notebook, + IPython qtconsole, or IDLE do not run in a + terminal and hence it is not possible + to correctly detect the width. +display.html.table_schema False Whether to publish a Table Schema + representation for frontends that + support it. +display.html.border 1 A ``border=value`` attribute is + inserted in the ```` tag + for the DataFrame HTML repr. +display.html.use_mathjax True When True, Jupyter notebook will process + table contents using MathJax, rendering + mathematical expressions enclosed by the + dollar symbol. +io.excel.xls.writer xlwt The default Excel writer engine for + 'xls' files. +io.excel.xlsm.writer openpyxl The default Excel writer engine for + 'xlsm' files. Available options: + 'openpyxl' (the default). +io.excel.xlsx.writer openpyxl The default Excel writer engine for + 'xlsx' files. +io.hdf.default_format None default format writing format, if + None, then put will default to + 'fixed' and append will default to + 'table' +io.hdf.dropna_table True drop ALL nan rows when appending + to a table +io.parquet.engine None The engine to use as a default for + parquet reading and writing. If None + then try 'pyarrow' and 'fastparquet' +mode.chained_assignment warn Controls ``SettingWithCopyWarning``: + 'raise', 'warn', or None. Raise an + exception, warn, or no action if + trying to use :ref:`chained assignment `. +mode.sim_interactive False Whether to simulate interactive mode + for purposes of testing. +mode.use_inf_as_na False True means treat None, NaN, -INF, + INF as NA (old way), False means + None and NaN are null, but INF, -INF + are not NA (new way). +compute.use_bottleneck True Use the bottleneck library to accelerate + computation if it is installed. +compute.use_numexpr True Use the numexpr library to accelerate + computation if it is installed. +plotting.backend matplotlib Change the plotting backend to a different + backend than the current matplotlib one. + Backends can be implemented as third-party + libraries implementing the pandas plotting + API. They can use other plotting libraries + like Bokeh, Altair, etc. +plotting.matplotlib.register_converters True Register custom converters with + matplotlib. Set to False to de-register. +======================================= ============ ================================== + + +.. _basics.console_output: + +Number formatting +------------------ + +pandas also allows you to set how numbers are displayed in the console. +This option is not set through the ``set_options`` API. + +Use the ``set_eng_float_format`` function +to alter the floating-point formatting of pandas objects to produce a particular +format. + +For instance: + +.. ipython:: python + + import numpy as np + + pd.set_eng_float_format(accuracy=3, use_eng_prefix=True) + s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) + s / 1.e3 + s / 1.e6 + +.. ipython:: python + :suppress: + :okwarning: + + pd.reset_option("^display") + +To round floats on a case-by-case basis, you can also use :meth:`~pandas.Series.round` and :meth:`~pandas.DataFrame.round`. + +.. _options.east_asian_width: + +Unicode formatting +------------------ + +.. warning:: + + Enabling this option will affect the performance for printing of DataFrame and Series (about 2 times slower). + Use only when it is actually required. + +Some East Asian countries use Unicode characters whose width corresponds to two Latin characters. +If a DataFrame or Series contains these characters, the default output mode may not align them properly. + +.. note:: Screen captures are attached for each output to show the actual results. + +.. ipython:: python + + df = pd.DataFrame({'国籍': ['UK', '日本'], '名前': ['Alice', 'しのぶ']}) + df + +.. image:: ../_static/option_unicode01.png + +Enabling ``display.unicode.east_asian_width`` allows pandas to check each character's "East Asian Width" property. +These characters can be aligned properly by setting this option to ``True``. However, this will result in longer render +times than the standard ``len`` function. + +.. ipython:: python + + pd.set_option('display.unicode.east_asian_width', True) + df + +.. image:: ../_static/option_unicode02.png + +In addition, Unicode characters whose width is "Ambiguous" can either be 1 or 2 characters wide depending on the +terminal setting or encoding. The option ``display.unicode.ambiguous_as_wide`` can be used to handle the ambiguity. + +By default, an "Ambiguous" character's width, such as "¡" (inverted exclamation) in the example below, is taken to be 1. + +.. ipython:: python + + df = pd.DataFrame({'a': ['xxx', '¡¡'], 'b': ['yyy', '¡¡']}) + df + +.. image:: ../_static/option_unicode03.png + +Enabling ``display.unicode.ambiguous_as_wide`` makes pandas interpret these characters' widths to be 2. +(Note that this option will only be effective when ``display.unicode.east_asian_width`` is enabled.) + +However, setting this option incorrectly for your terminal will cause these characters to be aligned incorrectly: + +.. ipython:: python + + pd.set_option('display.unicode.ambiguous_as_wide', True) + df + +.. image:: ../_static/option_unicode04.png + +.. ipython:: python + :suppress: + + pd.set_option('display.unicode.east_asian_width', False) + pd.set_option('display.unicode.ambiguous_as_wide', False) + +.. _options.table_schema: + +Table schema display +-------------------- + +``DataFrame`` and ``Series`` will publish a Table Schema representation +by default. False by default, this can be enabled globally with the +``display.html.table_schema`` option: + +.. ipython:: python + + pd.set_option('display.html.table_schema', True) + +Only ``'display.max_rows'`` are serialized and published. + + +.. ipython:: python + :suppress: + + pd.reset_option('display.html.table_schema') diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst new file mode 100644 index 00000000..bbec9a77 --- /dev/null +++ b/doc/source/user_guide/reshaping.rst @@ -0,0 +1,849 @@ +.. _reshaping: + +{{ header }} + +************************** +Reshaping and pivot tables +************************** + +.. _reshaping.reshaping: + +Reshaping by pivoting DataFrame objects +--------------------------------------- + +.. image:: ../_static/reshaping_pivot.png + +.. ipython:: python + :suppress: + + import pandas._testing as tm + tm.N = 3 + + def unpivot(frame): + N, K = frame.shape + data = {'value': frame.to_numpy().ravel('F'), + 'variable': np.asarray(frame.columns).repeat(N), + 'date': np.tile(np.asarray(frame.index), K)} + columns = ['date', 'variable', 'value'] + return pd.DataFrame(data, columns=columns) + + df = unpivot(tm.makeTimeDataFrame()) + +Data is often stored in so-called "stacked" or "record" format: + +.. ipython:: python + + df + + +For the curious here is how the above ``DataFrame`` was created: + +.. code-block:: python + + import pandas._testing as tm + + tm.N = 3 + + + def unpivot(frame): + N, K = frame.shape + data = {'value': frame.to_numpy().ravel('F'), + 'variable': np.asarray(frame.columns).repeat(N), + 'date': np.tile(np.asarray(frame.index), K)} + return pd.DataFrame(data, columns=['date', 'variable', 'value']) + + + df = unpivot(tm.makeTimeDataFrame()) + +To select out everything for variable ``A`` we could do: + +.. ipython:: python + + df[df['variable'] == 'A'] + +But suppose we wish to do time series operations with the variables. A better +representation would be where the ``columns`` are the unique variables and an +``index`` of dates identifies individual observations. To reshape the data into +this form, we use the :meth:`DataFrame.pivot` method (also implemented as a +top level function :func:`~pandas.pivot`): + +.. ipython:: python + + df.pivot(index='date', columns='variable', values='value') + +If the ``values`` argument is omitted, and the input ``DataFrame`` has more than +one column of values which are not used as column or index inputs to ``pivot``, +then the resulting "pivoted" ``DataFrame`` will have :ref:`hierarchical columns +` whose topmost level indicates the respective value +column: + +.. ipython:: python + + df['value2'] = df['value'] * 2 + pivoted = df.pivot(index='date', columns='variable') + pivoted + +You can then select subsets from the pivoted ``DataFrame``: + +.. ipython:: python + + pivoted['value2'] + +Note that this returns a view on the underlying data in the case where the data +are homogeneously-typed. + +.. note:: + :func:`~pandas.pivot` will error with a ``ValueError: Index contains duplicate + entries, cannot reshape`` if the index/column pair is not unique. In this + case, consider using :func:`~pandas.pivot_table` which is a generalization + of pivot that can handle duplicate values for one index/column pair. + +.. _reshaping.stacking: + +Reshaping by stacking and unstacking +------------------------------------ + +.. image:: ../_static/reshaping_stack.png + +Closely related to the :meth:`~DataFrame.pivot` method are the related +:meth:`~DataFrame.stack` and :meth:`~DataFrame.unstack` methods available on +``Series`` and ``DataFrame``. These methods are designed to work together with +``MultiIndex`` objects (see the section on :ref:`hierarchical indexing +`). Here are essentially what these methods do: + +* ``stack``: "pivot" a level of the (possibly hierarchical) column labels, + returning a ``DataFrame`` with an index with a new inner-most level of row + labels. +* ``unstack``: (inverse operation of ``stack``) "pivot" a level of the + (possibly hierarchical) row index to the column axis, producing a reshaped + ``DataFrame`` with a new inner-most level of column labels. + +.. image:: ../_static/reshaping_unstack.png + +The clearest way to explain is by example. Let's take a prior example data set +from the hierarchical indexing section: + +.. ipython:: python + + tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', + 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', + 'one', 'two', 'one', 'two']])) + index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second']) + df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B']) + df2 = df[:4] + df2 + +The ``stack`` function "compresses" a level in the ``DataFrame``'s columns to +produce either: + +* A ``Series``, in the case of a simple column Index. +* A ``DataFrame``, in the case of a ``MultiIndex`` in the columns. + +If the columns have a ``MultiIndex``, you can choose which level to stack. The +stacked level becomes the new lowest level in a ``MultiIndex`` on the columns: + +.. ipython:: python + + stacked = df2.stack() + stacked + +With a "stacked" ``DataFrame`` or ``Series`` (having a ``MultiIndex`` as the +``index``), the inverse operation of ``stack`` is ``unstack``, which by default +unstacks the **last level**: + +.. ipython:: python + + stacked.unstack() + stacked.unstack(1) + stacked.unstack(0) + +.. _reshaping.unstack_by_name: + +.. image:: ../_static/reshaping_unstack_1.png + +If the indexes have names, you can use the level names instead of specifying +the level numbers: + +.. ipython:: python + + stacked.unstack('second') + + +.. image:: ../_static/reshaping_unstack_0.png + +Notice that the ``stack`` and ``unstack`` methods implicitly sort the index +levels involved. Hence a call to ``stack`` and then ``unstack``, or vice versa, +will result in a **sorted** copy of the original ``DataFrame`` or ``Series``: + +.. ipython:: python + + index = pd.MultiIndex.from_product([[2, 1], ['a', 'b']]) + df = pd.DataFrame(np.random.randn(4), index=index, columns=['A']) + df + all(df.unstack().stack() == df.sort_index()) + +The above code will raise a ``TypeError`` if the call to ``sort_index`` is +removed. + +.. _reshaping.stack_multiple: + +Multiple levels +~~~~~~~~~~~~~~~ + +You may also stack or unstack more than one level at a time by passing a list +of levels, in which case the end result is as if each level in the list were +processed individually. + +.. ipython:: python + + columns = pd.MultiIndex.from_tuples([ + ('A', 'cat', 'long'), ('B', 'cat', 'long'), + ('A', 'dog', 'short'), ('B', 'dog', 'short')], + names=['exp', 'animal', 'hair_length'] + ) + df = pd.DataFrame(np.random.randn(4, 4), columns=columns) + df + + df.stack(level=['animal', 'hair_length']) + +The list of levels can contain either level names or level numbers (but +not a mixture of the two). + +.. ipython:: python + + # df.stack(level=['animal', 'hair_length']) + # from above is equivalent to: + df.stack(level=[1, 2]) + +Missing data +~~~~~~~~~~~~ + +These functions are intelligent about handling missing data and do not expect +each subgroup within the hierarchical index to have the same set of labels. +They also can handle the index being unsorted (but you can make it sorted by +calling ``sort_index``, of course). Here is a more complex example: + +.. ipython:: python + + columns = pd.MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'), + ('B', 'cat'), ('A', 'dog')], + names=['exp', 'animal']) + index = pd.MultiIndex.from_product([('bar', 'baz', 'foo', 'qux'), + ('one', 'two')], + names=['first', 'second']) + df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=columns) + df2 = df.iloc[[0, 1, 2, 4, 5, 7]] + df2 + +As mentioned above, ``stack`` can be called with a ``level`` argument to select +which level in the columns to stack: + +.. ipython:: python + + df2.stack('exp') + df2.stack('animal') + +Unstacking can result in missing values if subgroups do not have the same +set of labels. By default, missing values will be replaced with the default +fill value for that data type, ``NaN`` for float, ``NaT`` for datetimelike, +etc. For integer types, by default data will converted to float and missing +values will be set to ``NaN``. + +.. ipython:: python + + df3 = df.iloc[[0, 1, 4, 7], [1, 2]] + df3 + df3.unstack() + +Alternatively, unstack takes an optional ``fill_value`` argument, for specifying +the value of missing data. + +.. ipython:: python + + df3.unstack(fill_value=-1e9) + +With a MultiIndex +~~~~~~~~~~~~~~~~~ + +Unstacking when the columns are a ``MultiIndex`` is also careful about doing +the right thing: + +.. ipython:: python + + df[:3].unstack(0) + df2.unstack(1) + +.. _reshaping.melt: + +Reshaping by Melt +----------------- + +.. image:: ../_static/reshaping_melt.png + +The top-level :func:`~pandas.melt` function and the corresponding :meth:`DataFrame.melt` +are useful to massage a ``DataFrame`` into a format where one or more columns +are *identifier variables*, while all other columns, considered *measured +variables*, are "unpivoted" to the row axis, leaving just two non-identifier +columns, "variable" and "value". The names of those columns can be customized +by supplying the ``var_name`` and ``value_name`` parameters. + +For instance, + +.. ipython:: python + + cheese = pd.DataFrame({'first': ['John', 'Mary'], + 'last': ['Doe', 'Bo'], + 'height': [5.5, 6.0], + 'weight': [130, 150]}) + cheese + cheese.melt(id_vars=['first', 'last']) + cheese.melt(id_vars=['first', 'last'], var_name='quantity') + +Another way to transform is to use the :func:`~pandas.wide_to_long` panel data +convenience function. It is less flexible than :func:`~pandas.melt`, but more +user-friendly. + +.. ipython:: python + + dft = pd.DataFrame({"A1970": {0: "a", 1: "b", 2: "c"}, + "A1980": {0: "d", 1: "e", 2: "f"}, + "B1970": {0: 2.5, 1: 1.2, 2: .7}, + "B1980": {0: 3.2, 1: 1.3, 2: .1}, + "X": dict(zip(range(3), np.random.randn(3))) + }) + dft["id"] = dft.index + dft + pd.wide_to_long(dft, ["A", "B"], i="id", j="year") + +.. _reshaping.combine_with_groupby: + +Combining with stats and GroupBy +-------------------------------- + +It should be no shock that combining ``pivot`` / ``stack`` / ``unstack`` with +GroupBy and the basic Series and DataFrame statistical functions can produce +some very expressive and fast data manipulations. + +.. ipython:: python + + df + df.stack().mean(1).unstack() + + # same result, another way + df.groupby(level=1, axis=1).mean() + + df.stack().groupby(level=1).mean() + + df.mean().unstack(0) + + +Pivot tables +------------ + +.. _reshaping.pivot: + + + +While :meth:`~DataFrame.pivot` provides general purpose pivoting with various +data types (strings, numerics, etc.), pandas also provides :func:`~pandas.pivot_table` +for pivoting with aggregation of numeric data. + +The function :func:`~pandas.pivot_table` can be used to create spreadsheet-style +pivot tables. See the :ref:`cookbook` for some advanced +strategies. + +It takes a number of arguments: + +* ``data``: a DataFrame object. +* ``values``: a column or a list of columns to aggregate. +* ``index``: a column, Grouper, array which has the same length as data, or list of them. + Keys to group by on the pivot table index. If an array is passed, it is being used as the same manner as column values. +* ``columns``: a column, Grouper, array which has the same length as data, or list of them. + Keys to group by on the pivot table column. If an array is passed, it is being used as the same manner as column values. +* ``aggfunc``: function to use for aggregation, defaulting to ``numpy.mean``. + +Consider a data set like this: + +.. ipython:: python + + import datetime + df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 6, + 'B': ['A', 'B', 'C'] * 8, + 'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 4, + 'D': np.random.randn(24), + 'E': np.random.randn(24), + 'F': [datetime.datetime(2013, i, 1) for i in range(1, 13)] + + [datetime.datetime(2013, i, 15) for i in range(1, 13)]}) + df + +We can produce pivot tables from this data very easily: + +.. ipython:: python + + pd.pivot_table(df, values='D', index=['A', 'B'], columns=['C']) + pd.pivot_table(df, values='D', index=['B'], columns=['A', 'C'], aggfunc=np.sum) + pd.pivot_table(df, values=['D', 'E'], index=['B'], columns=['A', 'C'], + aggfunc=np.sum) + +The result object is a ``DataFrame`` having potentially hierarchical indexes on the +rows and columns. If the ``values`` column name is not given, the pivot table +will include all of the data that can be aggregated in an additional level of +hierarchy in the columns: + +.. ipython:: python + + pd.pivot_table(df, index=['A', 'B'], columns=['C']) + +Also, you can use ``Grouper`` for ``index`` and ``columns`` keywords. For detail of ``Grouper``, see :ref:`Grouping with a Grouper specification `. + +.. ipython:: python + + pd.pivot_table(df, values='D', index=pd.Grouper(freq='M', key='F'), + columns='C') + +You can render a nice output of the table omitting the missing values by +calling ``to_string`` if you wish: + +.. ipython:: python + + table = pd.pivot_table(df, index=['A', 'B'], columns=['C']) + print(table.to_string(na_rep='')) + +Note that ``pivot_table`` is also available as an instance method on DataFrame, + i.e. :meth:`DataFrame.pivot_table`. + +.. _reshaping.pivot.margins: + +Adding margins +~~~~~~~~~~~~~~ + +If you pass ``margins=True`` to ``pivot_table``, special ``All`` columns and +rows will be added with partial group aggregates across the categories on the +rows and columns: + +.. ipython:: python + + df.pivot_table(index=['A', 'B'], columns='C', margins=True, aggfunc=np.std) + +.. _reshaping.crosstabulations: + +Cross tabulations +----------------- + +Use :func:`~pandas.crosstab` to compute a cross-tabulation of two (or more) +factors. By default ``crosstab`` computes a frequency table of the factors +unless an array of values and an aggregation function are passed. + +It takes a number of arguments + +* ``index``: array-like, values to group by in the rows. +* ``columns``: array-like, values to group by in the columns. +* ``values``: array-like, optional, array of values to aggregate according to + the factors. +* ``aggfunc``: function, optional, If no values array is passed, computes a + frequency table. +* ``rownames``: sequence, default ``None``, must match number of row arrays passed. +* ``colnames``: sequence, default ``None``, if passed, must match number of column + arrays passed. +* ``margins``: boolean, default ``False``, Add row/column margins (subtotals) +* ``normalize``: boolean, {'all', 'index', 'columns'}, or {0,1}, default ``False``. + Normalize by dividing all values by the sum of values. + + +Any ``Series`` passed will have their name attributes used unless row or column +names for the cross-tabulation are specified + +For example: + +.. ipython:: python + + foo, bar, dull, shiny, one, two = 'foo', 'bar', 'dull', 'shiny', 'one', 'two' + a = np.array([foo, foo, bar, bar, foo, foo], dtype=object) + b = np.array([one, one, two, one, two, one], dtype=object) + c = np.array([dull, dull, shiny, dull, dull, shiny], dtype=object) + pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) + + +If ``crosstab`` receives only two Series, it will provide a frequency table. + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 2, 2, 2], 'B': [3, 3, 4, 4, 4], + 'C': [1, 1, np.nan, 1, 1]}) + df + + pd.crosstab(df['A'], df['B']) + +Any input passed containing ``Categorical`` data will have **all** of its +categories included in the cross-tabulation, even if the actual data does +not contain any instances of a particular category. + +.. ipython:: python + + foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) + bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) + pd.crosstab(foo, bar) + +Normalization +~~~~~~~~~~~~~ + +Frequency tables can also be normalized to show percentages rather than counts +using the ``normalize`` argument: + +.. ipython:: python + + pd.crosstab(df['A'], df['B'], normalize=True) + +``normalize`` can also normalize values within each row or within each column: + +.. ipython:: python + + pd.crosstab(df['A'], df['B'], normalize='columns') + +``crosstab`` can also be passed a third ``Series`` and an aggregation function +(``aggfunc``) that will be applied to the values of the third ``Series`` within +each group defined by the first two ``Series``: + +.. ipython:: python + + pd.crosstab(df['A'], df['B'], values=df['C'], aggfunc=np.sum) + +Adding margins +~~~~~~~~~~~~~~ + +Finally, one can also add margins or normalize this output. + +.. ipython:: python + + pd.crosstab(df['A'], df['B'], values=df['C'], aggfunc=np.sum, normalize=True, + margins=True) + +.. _reshaping.tile: +.. _reshaping.tile.cut: + +Tiling +------ + +The :func:`~pandas.cut` function computes groupings for the values of the input +array and is often used to transform continuous variables to discrete or +categorical variables: + +.. ipython:: python + + ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60]) + + pd.cut(ages, bins=3) + +If the ``bins`` keyword is an integer, then equal-width bins are formed. +Alternatively we can specify custom bin-edges: + +.. ipython:: python + + c = pd.cut(ages, bins=[0, 18, 35, 70]) + c + +If the ``bins`` keyword is an ``IntervalIndex``, then these will be +used to bin the passed data.:: + + pd.cut([25, 20, 50], bins=c.categories) + + +.. _reshaping.dummies: + +Computing indicator / dummy variables +------------------------------------- + +To convert a categorical variable into a "dummy" or "indicator" ``DataFrame``, +for example a column in a ``DataFrame`` (a ``Series``) which has ``k`` distinct +values, can derive a ``DataFrame`` containing ``k`` columns of 1s and 0s using +:func:`~pandas.get_dummies`: + +.. ipython:: python + + df = pd.DataFrame({'key': list('bbacab'), 'data1': range(6)}) + + pd.get_dummies(df['key']) + +Sometimes it's useful to prefix the column names, for example when merging the result +with the original ``DataFrame``: + +.. ipython:: python + + dummies = pd.get_dummies(df['key'], prefix='key') + dummies + + df[['data1']].join(dummies) + +This function is often used along with discretization functions like ``cut``: + +.. ipython:: python + + values = np.random.randn(10) + values + + bins = [0, 0.2, 0.4, 0.6, 0.8, 1] + + pd.get_dummies(pd.cut(values, bins)) + +See also :func:`Series.str.get_dummies `. + +:func:`get_dummies` also accepts a ``DataFrame``. By default all categorical +variables (categorical in the statistical sense, those with `object` or +`categorical` dtype) are encoded as dummy variables. + + +.. ipython:: python + + df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['c', 'c', 'b'], + 'C': [1, 2, 3]}) + pd.get_dummies(df) + +All non-object columns are included untouched in the output. You can control +the columns that are encoded with the ``columns`` keyword. + +.. ipython:: python + + pd.get_dummies(df, columns=['A']) + +Notice that the ``B`` column is still included in the output, it just hasn't +been encoded. You can drop ``B`` before calling ``get_dummies`` if you don't +want to include it in the output. + +As with the ``Series`` version, you can pass values for the ``prefix`` and +``prefix_sep``. By default the column name is used as the prefix, and '_' as +the prefix separator. You can specify ``prefix`` and ``prefix_sep`` in 3 ways: + +* string: Use the same value for ``prefix`` or ``prefix_sep`` for each column + to be encoded. +* list: Must be the same length as the number of columns being encoded. +* dict: Mapping column name to prefix. + +.. ipython:: python + + simple = pd.get_dummies(df, prefix='new_prefix') + simple + from_list = pd.get_dummies(df, prefix=['from_A', 'from_B']) + from_list + from_dict = pd.get_dummies(df, prefix={'B': 'from_B', 'A': 'from_A'}) + from_dict + +Sometimes it will be useful to only keep k-1 levels of a categorical +variable to avoid collinearity when feeding the result to statistical models. +You can switch to this mode by turn on ``drop_first``. + +.. ipython:: python + + s = pd.Series(list('abcaa')) + + pd.get_dummies(s) + + pd.get_dummies(s, drop_first=True) + +When a column contains only one level, it will be omitted in the result. + +.. ipython:: python + + df = pd.DataFrame({'A': list('aaaaa'), 'B': list('ababc')}) + + pd.get_dummies(df) + + pd.get_dummies(df, drop_first=True) + +By default new columns will have ``np.uint8`` dtype. +To choose another dtype, use the ``dtype`` argument: + +.. ipython:: python + + df = pd.DataFrame({'A': list('abc'), 'B': [1.1, 2.2, 3.3]}) + + pd.get_dummies(df, dtype=bool).dtypes + +.. versionadded:: 0.23.0 + + +.. _reshaping.factorize: + +Factorizing values +------------------ + +To encode 1-d values as an enumerated type use :func:`~pandas.factorize`: + +.. ipython:: python + + x = pd.Series(['A', 'A', np.nan, 'B', 3.14, np.inf]) + x + labels, uniques = pd.factorize(x) + labels + uniques + +Note that ``factorize`` is similar to ``numpy.unique``, but differs in its +handling of NaN: + +.. note:: + The following ``numpy.unique`` will fail under Python 3 with a ``TypeError`` + because of an ordering bug. See also + `here `__. + +.. code-block:: ipython + + In [1]: x = pd.Series(['A', 'A', np.nan, 'B', 3.14, np.inf]) + In [2]: pd.factorize(x, sort=True) + Out[2]: + (array([ 2, 2, -1, 3, 0, 1]), + Index([3.14, inf, 'A', 'B'], dtype='object')) + + In [3]: np.unique(x, return_inverse=True)[::-1] + Out[3]: (array([3, 3, 0, 4, 1, 2]), array([nan, 3.14, inf, 'A', 'B'], dtype=object)) + +.. note:: + If you just want to handle one column as a categorical variable (like R's factor), + you can use ``df["cat_col"] = pd.Categorical(df["col"])`` or + ``df["cat_col"] = df["col"].astype("category")``. For full docs on :class:`~pandas.Categorical`, + see the :ref:`Categorical introduction ` and the + :ref:`API documentation `. + +Examples +-------- + +In this section, we will review frequently asked questions and examples. The +column names and relevant column values are named to correspond with how this +DataFrame will be pivoted in the answers below. + +.. ipython:: python + + np.random.seed([3, 1415]) + n = 20 + + cols = np.array(['key', 'row', 'item', 'col']) + df = cols + pd.DataFrame((np.random.randint(5, size=(n, 4)) + // [2, 1, 2, 1]).astype(str)) + df.columns = cols + df = df.join(pd.DataFrame(np.random.rand(n, 2).round(2)).add_prefix('val')) + + df + +Pivoting with single aggregations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Suppose we wanted to pivot ``df`` such that the ``col`` values are columns, +``row`` values are the index, and the mean of ``val0`` are the values? In +particular, the resulting DataFrame should look like: + +.. code-block:: text + + col col0 col1 col2 col3 col4 + row + row0 0.77 0.605 NaN 0.860 0.65 + row2 0.13 NaN 0.395 0.500 0.25 + row3 NaN 0.310 NaN 0.545 NaN + row4 NaN 0.100 0.395 0.760 0.24 + +This solution uses :func:`~pandas.pivot_table`. Also note that +``aggfunc='mean'`` is the default. It is included here to be explicit. + +.. ipython:: python + + df.pivot_table( + values='val0', index='row', columns='col', aggfunc='mean') + +Note that we can also replace the missing values by using the ``fill_value`` +parameter. + +.. ipython:: python + + df.pivot_table( + values='val0', index='row', columns='col', aggfunc='mean', fill_value=0) + +Also note that we can pass in other aggregation functions as well. For example, +we can also pass in ``sum``. + +.. ipython:: python + + df.pivot_table( + values='val0', index='row', columns='col', aggfunc='sum', fill_value=0) + +Another aggregation we can do is calculate the frequency in which the columns +and rows occur together a.k.a. "cross tabulation". To do this, we can pass +``size`` to the ``aggfunc`` parameter. + +.. ipython:: python + + df.pivot_table(index='row', columns='col', fill_value=0, aggfunc='size') + +Pivoting with multiple aggregations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We can also perform multiple aggregations. For example, to perform both a +``sum`` and ``mean``, we can pass in a list to the ``aggfunc`` argument. + +.. ipython:: python + + df.pivot_table( + values='val0', index='row', columns='col', aggfunc=['mean', 'sum']) + +Note to aggregate over multiple value columns, we can pass in a list to the +``values`` parameter. + +.. ipython:: python + + df.pivot_table( + values=['val0', 'val1'], index='row', columns='col', aggfunc=['mean']) + +Note to subdivide over multiple columns we can pass in a list to the +``columns`` parameter. + +.. ipython:: python + + df.pivot_table( + values=['val0'], index='row', columns=['item', 'col'], aggfunc=['mean']) + +.. _reshaping.explode: + +Exploding a list-like column +---------------------------- + +.. versionadded:: 0.25.0 + +Sometimes the values in a column are list-like. + +.. ipython:: python + + keys = ['panda1', 'panda2', 'panda3'] + values = [['eats', 'shoots'], ['shoots', 'leaves'], ['eats', 'leaves']] + df = pd.DataFrame({'keys': keys, 'values': values}) + df + +We can 'explode' the ``values`` column, transforming each list-like to a separate row, by using :meth:`~Series.explode`. This will replicate the index values from the original row: + +.. ipython:: python + + df['values'].explode() + +You can also explode the column in the ``DataFrame``. + +.. ipython:: python + + df.explode('values') + +:meth:`Series.explode` will replace empty lists with ``np.nan`` and preserve scalar entries. The dtype of the resulting ``Series`` is always ``object``. + +.. ipython:: python + + s = pd.Series([[1, 2, 3], 'foo', [], ['a', 'b']]) + s + s.explode() + +Here is a typical usecase. You have comma separated strings in a column and want to expand this. + +.. ipython:: python + + df = pd.DataFrame([{'var1': 'a,b,c', 'var2': 1}, + {'var1': 'd,e,f', 'var2': 2}]) + df + +Creating a long form DataFrame is now straightforward using explode and chained operations + +.. ipython:: python + + df.assign(var1=df.var1.str.split(',')).explode('var1') diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst new file mode 100644 index 00000000..61fa24bb --- /dev/null +++ b/doc/source/user_guide/scale.rst @@ -0,0 +1,375 @@ +.. _scale: + +************************* +Scaling to large datasets +************************* + +Pandas provides data structures for in-memory analytics, which makes using pandas +to analyze datasets that are larger than memory datasets somewhat tricky. Even datasets +that are a sizable fraction of memory become unwieldy, as some pandas operations need +to make intermediate copies. + +This document provides a few recommendations for scaling your analysis to larger datasets. +It's a complement to :ref:`enhancingperf`, which focuses on speeding up analysis +for datasets that fit in memory. + +But first, it's worth considering *not using pandas*. Pandas isn't the right +tool for all situations. If you're working with very large datasets and a tool +like PostgreSQL fits your needs, then you should probably be using that. +Assuming you want or need the expressiveness and power of pandas, let's carry on. + +.. ipython:: python + + import pandas as pd + import numpy as np + +.. ipython:: python + :suppress: + + from pandas._testing import _make_timeseries + + # Make a random in-memory dataset + ts = _make_timeseries(freq="30S", seed=0) + ts.to_csv("timeseries.csv") + ts.to_parquet("timeseries.parquet") + + +Load less data +-------------- + +.. ipython:: python + :suppress: + + # make a similar dataset with many columns + timeseries = [ + _make_timeseries(freq="1T", seed=i).rename(columns=lambda x: f"{x}_{i}") + for i in range(10) + ] + ts_wide = pd.concat(timeseries, axis=1) + ts_wide.to_parquet("timeseries_wide.parquet") + +Suppose our raw dataset on disk has many columns:: + + id_0 name_0 x_0 y_0 id_1 name_1 x_1 ... name_8 x_8 y_8 id_9 name_9 x_9 y_9 + timestamp ... + 2000-01-01 00:00:00 1015 Michael -0.399453 0.095427 994 Frank -0.176842 ... Dan -0.315310 0.713892 1025 Victor -0.135779 0.346801 + 2000-01-01 00:01:00 969 Patricia 0.650773 -0.874275 1003 Laura 0.459153 ... Ursula 0.913244 -0.630308 1047 Wendy -0.886285 0.035852 + 2000-01-01 00:02:00 1016 Victor -0.721465 -0.584710 1046 Michael 0.524994 ... Ray -0.656593 0.692568 1064 Yvonne 0.070426 0.432047 + 2000-01-01 00:03:00 939 Alice -0.746004 -0.908008 996 Ingrid -0.414523 ... Jerry -0.958994 0.608210 978 Wendy 0.855949 -0.648988 + 2000-01-01 00:04:00 1017 Dan 0.919451 -0.803504 1048 Jerry -0.569235 ... Frank -0.577022 -0.409088 994 Bob -0.270132 0.335176 + ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... + 2000-12-30 23:56:00 999 Tim 0.162578 0.512817 973 Kevin -0.403352 ... Tim -0.380415 0.008097 1041 Charlie 0.191477 -0.599519 + 2000-12-30 23:57:00 970 Laura -0.433586 -0.600289 958 Oliver -0.966577 ... Zelda 0.971274 0.402032 1038 Ursula 0.574016 -0.930992 + 2000-12-30 23:58:00 1065 Edith 0.232211 -0.454540 971 Tim 0.158484 ... Alice -0.222079 -0.919274 1022 Dan 0.031345 -0.657755 + 2000-12-30 23:59:00 1019 Ingrid 0.322208 -0.615974 981 Hannah 0.607517 ... Sarah -0.424440 -0.117274 990 George -0.375530 0.563312 + 2000-12-31 00:00:00 937 Ursula -0.906523 0.943178 1018 Alice -0.564513 ... Jerry 0.236837 0.807650 985 Oliver 0.777642 0.783392 + + [525601 rows x 40 columns] + + +To load the columns we want, we have two options. +Option 1 loads in all the data and then filters to what we need. + +.. ipython:: python + + columns = ['id_0', 'name_0', 'x_0', 'y_0'] + + pd.read_parquet("timeseries_wide.parquet")[columns] + +Option 2 only loads the columns we request. + +.. ipython:: python + + pd.read_parquet("timeseries_wide.parquet", columns=columns) + +If we were to measure the memory usage of the two calls, we'd see that specifying +``columns`` uses about 1/10th the memory in this case. + +With :func:`pandas.read_csv`, you can specify ``usecols`` to limit the columns +read into memory. Not all file formats that can be read by pandas provide an option +to read a subset of columns. + +Use efficient datatypes +----------------------- + +The default pandas data types are not the most memory efficient. This is +especially true for text data columns with relatively few unique values (commonly +referred to as "low-cardinality" data). By using more efficient data types, you +can store larger datasets in memory. + +.. ipython:: python + + ts = pd.read_parquet("timeseries.parquet") + ts + +Now, let's inspect the data types and memory usage to see where we should focus our +attention. + +.. ipython:: python + + ts.dtypes + +.. ipython:: python + + ts.memory_usage(deep=True) # memory usage in bytes + + +The ``name`` column is taking up much more memory than any other. It has just a +few unique values, so it's a good candidate for converting to a +:class:`Categorical`. With a Categorical, we store each unique name once and use +space-efficient integers to know which specific name is used in each row. + + +.. ipython:: python + + ts2 = ts.copy() + ts2['name'] = ts2['name'].astype('category') + ts2.memory_usage(deep=True) + +We can go a bit further and downcast the numeric columns to their smallest types +using :func:`pandas.to_numeric`. + +.. ipython:: python + + ts2['id'] = pd.to_numeric(ts2['id'], downcast='unsigned') + ts2[['x', 'y']] = ts2[['x', 'y']].apply(pd.to_numeric, downcast='float') + ts2.dtypes + +.. ipython:: python + + ts2.memory_usage(deep=True) + +.. ipython:: python + + reduction = (ts2.memory_usage(deep=True).sum() + / ts.memory_usage(deep=True).sum()) + print(f"{reduction:0.2f}") + +In all, we've reduced the in-memory footprint of this dataset to 1/5 of its +original size. + +See :ref:`categorical` for more on ``Categorical`` and :ref:`basics.dtypes` +for an overview of all of pandas' dtypes. + +Use chunking +------------ + +Some workloads can be achieved with chunking: splitting a large problem like "convert this +directory of CSVs to parquet" into a bunch of small problems ("convert this individual CSV +file into a Parquet file. Now repeat that for each file in this directory."). As long as each chunk +fits in memory, you can work with datasets that are much larger than memory. + +.. note:: + + Chunking works well when the operation you're performing requires zero or minimal + coordination between chunks. For more complicated workflows, you're better off + :ref:`using another library `. + +Suppose we have an even larger "logical dataset" on disk that's a directory of parquet +files. Each file in the directory represents a different year of the entire dataset. + +.. ipython:: python + :suppress: + + import pathlib + + N = 12 + starts = [f'20{i:>02d}-01-01' for i in range(N)] + ends = [f'20{i:>02d}-12-13' for i in range(N)] + + pathlib.Path("data/timeseries").mkdir(exist_ok=True) + + for i, (start, end) in enumerate(zip(starts, ends)): + ts = _make_timeseries(start=start, end=end, freq='1T', seed=i) + ts.to_parquet(f"data/timeseries/ts-{i:0>2d}.parquet") + + +:: + + data + └── timeseries + ├── ts-00.parquet + ├── ts-01.parquet + ├── ts-02.parquet + ├── ts-03.parquet + ├── ts-04.parquet + ├── ts-05.parquet + ├── ts-06.parquet + ├── ts-07.parquet + ├── ts-08.parquet + ├── ts-09.parquet + ├── ts-10.parquet + └── ts-11.parquet + +Now we'll implement an out-of-core ``value_counts``. The peak memory usage of this +workflow is the single largest chunk, plus a small series storing the unique value +counts up to this point. As long as each individual file fits in memory, this will +work for arbitrary-sized datasets. + +.. ipython:: python + + %%time + files = pathlib.Path("data/timeseries/").glob("ts*.parquet") + counts = pd.Series(dtype=int) + for path in files: + # Only one dataframe is in memory at a time... + df = pd.read_parquet(path) + # ... plus a small Series `counts`, which is updated. + counts = counts.add(df['name'].value_counts(), fill_value=0) + counts.astype(int) + +Some readers, like :meth:`pandas.read_csv`, offer parameters to control the +``chunksize`` when reading a single file. + +Manually chunking is an OK option for workflows that don't +require too sophisticated of operations. Some operations, like ``groupby``, are +much harder to do chunkwise. In these cases, you may be better switching to a +different library that implements these out-of-core algorithms for you. + +.. _scale.other_libraries: + +Use other libraries +------------------- + +Pandas is just one library offering a DataFrame API. Because of its popularity, +pandas' API has become something of a standard that other libraries implement. +The pandas documentation maintains a list of libraries implementing a DataFrame API +in :ref:`our ecosystem page `. + +For example, `Dask`_, a parallel computing library, has `dask.dataframe`_, a +pandas-like API for working with larger than memory datasets in parallel. Dask +can use multiple threads or processes on a single machine, or a cluster of +machines to process data in parallel. + + +We'll import ``dask.dataframe`` and notice that the API feels similar to pandas. +We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in. + +.. ipython:: python + :okwarning: + + import dask.dataframe as dd + + ddf = dd.read_parquet("data/timeseries/ts*.parquet", engine="pyarrow") + ddf + +Inspecting the ``ddf`` object, we see a few things + +* There are familiar attributes like ``.columns`` and ``.dtypes`` +* There are familiar methods like ``.groupby``, ``.sum``, etc. +* There are new attributes like ``.npartitions`` and ``.divisions`` + +The partitions and divisions are how Dask parallizes computation. A **Dask** +DataFrame is made up of many **Pandas** DataFrames. A single method call on a +Dask DataFrame ends up making many pandas method calls, and Dask knows how to +coordinate everything to get the result. + +.. ipython:: python + + ddf.columns + ddf.dtypes + ddf.npartitions + +One major difference: the ``dask.dataframe`` API is *lazy*. If you look at the +repr above, you'll notice that the values aren't actually printed out; just the +column names and dtypes. That's because Dask hasn't actually read the data yet. +Rather than executing immediately, doing operations build up a **task graph**. + +.. ipython:: python + + ddf + ddf['name'] + ddf['name'].value_counts() + +Each of these calls is instant because the result isn't being computed yet. +We're just building up a list of computation to do when someone needs the +result. Dask knows that the return type of a ``pandas.Series.value_counts`` +is a pandas Series with a certain dtype and a certain name. So the Dask version +returns a Dask Series with the same dtype and the same name. + +To get the actual result you can call ``.compute()``. + +.. ipython:: python + + %time ddf['name'].value_counts().compute() + +At that point, you get back the same thing you'd get with pandas, in this case +a concrete pandas Series with the count of each ``name``. + +Calling ``.compute`` causes the full task graph to be executed. This includes +reading the data, selecting the columns, and doing the ``value_counts``. The +execution is done *in parallel* where possible, and Dask tries to keep the +overall memory footprint small. You can work with datasets that are much larger +than memory, as long as each partition (a regular pandas DataFrame) fits in memory. + +By default, ``dask.dataframe`` operations use a threadpool to do operations in +parallel. We can also connect to a cluster to distribute the work on many +machines. In this case we'll connect to a local "cluster" made up of several +processes on this single machine. + +.. code-block:: python + + >>> from dask.distributed import Client, LocalCluster + + >>> cluster = LocalCluster() + >>> client = Client(cluster) + >>> client + + +Once this ``client`` is created, all of Dask's computation will take place on +the cluster (which is just processes in this case). + +Dask implements the most used parts of the pandas API. For example, we can do +a familiar groupby aggregation. + +.. ipython:: python + + %time ddf.groupby('name')[['x', 'y']].mean().compute().head() + +The grouping and aggregation is done out-of-core and in parallel. + +When Dask knows the ``divisions`` of a dataset, certain optimizations are +possible. When reading parquet datasets written by dask, the divisions will be +known automatically. In this case, since we created the parquet files manually, +we need to supply the divisions manually. + +.. ipython:: python + + N = 12 + starts = [f'20{i:>02d}-01-01' for i in range(N)] + ends = [f'20{i:>02d}-12-13' for i in range(N)] + + divisions = tuple(pd.to_datetime(starts)) + (pd.Timestamp(ends[-1]),) + ddf.divisions = divisions + ddf + +Now we can do things like fast random access with ``.loc``. + +.. ipython:: python + + ddf.loc['2002-01-01 12:01':'2002-01-01 12:05'].compute() + +Dask knows to just look in the 3rd partition for selecting values in `2002`. It +doesn't need to look at any other data. + +Many workflows involve a large amount of data and processing it in a way that +reduces the size to something that fits in memory. In this case, we'll resample +to daily frequency and take the mean. Once we've taken the mean, we know the +results will fit in memory, so we can safely call ``compute`` without running +out of memory. At that point it's just a regular pandas object. + +.. ipython:: python + :okwarning: + + @savefig dask_resample.png + ddf[['x', 'y']].resample("1D").mean().cumsum().compute().plot() + +These Dask examples have all be done using multiple processes on a single +machine. Dask can be `deployed on a cluster +`_ to scale up to even larger +datasets. + +You see more dask examples at https://examples.dask.org. + +.. _Dask: https://dask.org +.. _dask.dataframe: https://docs.dask.org/en/latest/dataframe.html diff --git a/doc/source/user_guide/sparse.rst b/doc/source/user_guide/sparse.rst new file mode 100644 index 00000000..8588fac4 --- /dev/null +++ b/doc/source/user_guide/sparse.rst @@ -0,0 +1,367 @@ +.. _sparse: + +{{ header }} + +********************** +Sparse data structures +********************** + +Pandas provides data structures for efficiently storing sparse data. +These are not necessarily sparse in the typical "mostly 0". Rather, you can view these +objects as being "compressed" where any data matching a specific value (``NaN`` / missing value, though any value +can be chosen, including 0) is omitted. The compressed values are not actually stored in the array. + +.. ipython:: python + + arr = np.random.randn(10) + arr[2:-2] = np.nan + ts = pd.Series(pd.arrays.SparseArray(arr)) + ts + +Notice the dtype, ``Sparse[float64, nan]``. The ``nan`` means that elements in the +array that are ``nan`` aren't actually stored, only the non-``nan`` elements are. +Those non-``nan`` elements have a ``float64`` dtype. + +The sparse objects exist for memory efficiency reasons. Suppose you had a +large, mostly NA ``DataFrame``: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10000, 4)) + df.iloc[:9998] = np.nan + sdf = df.astype(pd.SparseDtype("float", np.nan)) + sdf.head() + sdf.dtypes + sdf.sparse.density + +As you can see, the density (% of values that have not been "compressed") is +extremely low. This sparse object takes up much less memory on disk (pickled) +and in the Python interpreter. + +.. ipython:: python + + 'dense : {:0.2f} bytes'.format(df.memory_usage().sum() / 1e3) + 'sparse: {:0.2f} bytes'.format(sdf.memory_usage().sum() / 1e3) + +Functionally, their behavior should be nearly +identical to their dense counterparts. + +.. _sparse.array: + +SparseArray +----------- + +:class:`arrays.SparseArray` is a :class:`~pandas.api.extensions.ExtensionArray` +for storing an array of sparse values (see :ref:`basics.dtypes` for more +on extension arrays). It is a 1-dimensional ndarray-like object storing +only values distinct from the ``fill_value``: + +.. ipython:: python + + arr = np.random.randn(10) + arr[2:5] = np.nan + arr[7:8] = np.nan + sparr = pd.arrays.SparseArray(arr) + sparr + +A sparse array can be converted to a regular (dense) ndarray with :meth:`numpy.asarray` + +.. ipython:: python + + np.asarray(sparr) + + +.. _sparse.dtype: + +SparseDtype +----------- + +The :attr:`SparseArray.dtype` property stores two pieces of information + +1. The dtype of the non-sparse values +2. The scalar fill value + + +.. ipython:: python + + sparr.dtype + + +A :class:`SparseDtype` may be constructed by passing each of these + +.. ipython:: python + + pd.SparseDtype(np.dtype('datetime64[ns]')) + +The default fill value for a given NumPy dtype is the "missing" value for that dtype, +though it may be overridden. + +.. ipython:: python + + pd.SparseDtype(np.dtype('datetime64[ns]'), + fill_value=pd.Timestamp('2017-01-01')) + +Finally, the string alias ``'Sparse[dtype]'`` may be used to specify a sparse dtype +in many places + +.. ipython:: python + + pd.array([1, 0, 0, 2], dtype='Sparse[int]') + +.. _sparse.accessor: + +Sparse accessor +--------------- + +.. versionadded:: 0.24.0 + +Pandas provides a ``.sparse`` accessor, similar to ``.str`` for string data, ``.cat`` +for categorical data, and ``.dt`` for datetime-like data. This namespace provides +attributes and methods that are specific to sparse data. + +.. ipython:: python + + s = pd.Series([0, 0, 1, 2], dtype="Sparse[int]") + s.sparse.density + s.sparse.fill_value + +This accessor is available only on data with ``SparseDtype``, and on the :class:`Series` +class itself for creating a Series with sparse data from a scipy COO matrix with. + + +.. versionadded:: 0.25.0 + +A ``.sparse`` accessor has been added for :class:`DataFrame` as well. +See :ref:`api.frame.sparse` for more. + +.. _sparse.calculation: + +Sparse calculation +------------------ + +You can apply NumPy `ufuncs `_ +to ``SparseArray`` and get a ``SparseArray`` as a result. + +.. ipython:: python + + arr = pd.arrays.SparseArray([1., np.nan, np.nan, -2., np.nan]) + np.abs(arr) + + +The *ufunc* is also applied to ``fill_value``. This is needed to get +the correct dense result. + +.. ipython:: python + + arr = pd.arrays.SparseArray([1., -1, -1, -2., -1], fill_value=-1) + np.abs(arr) + np.abs(arr).to_dense() + +.. _sparse.migration: + +Migrating +--------- + +.. note:: + + ``SparseSeries`` and ``SparseDataFrame`` were removed in pandas 1.0.0. This migration + guide is present to aid in migrating from previous versions. + +In older versions of pandas, the ``SparseSeries`` and ``SparseDataFrame`` classes (documented below) +were the preferred way to work with sparse data. With the advent of extension arrays, these subclasses +are no longer needed. Their purpose is better served by using a regular Series or DataFrame with +sparse values instead. + +.. note:: + + There's no performance or memory penalty to using a Series or DataFrame with sparse values, + rather than a SparseSeries or SparseDataFrame. + +This section provides some guidance on migrating your code to the new style. As a reminder, +you can use the python warnings module to control warnings. But we recommend modifying +your code, rather than ignoring the warning. + +**Construction** + +From an array-like, use the regular :class:`Series` or +:class:`DataFrame` constructors with :class:`SparseArray` values. + +.. code-block:: python + + # Previous way + >>> pd.SparseDataFrame({"A": [0, 1]}) + +.. ipython:: python + + # New way + pd.DataFrame({"A": pd.arrays.SparseArray([0, 1])}) + +From a SciPy sparse matrix, use :meth:`DataFrame.sparse.from_spmatrix`, + +.. code-block:: python + + # Previous way + >>> from scipy import sparse + >>> mat = sparse.eye(3) + >>> df = pd.SparseDataFrame(mat, columns=['A', 'B', 'C']) + +.. ipython:: python + + # New way + from scipy import sparse + mat = sparse.eye(3) + df = pd.DataFrame.sparse.from_spmatrix(mat, columns=['A', 'B', 'C']) + df.dtypes + +**Conversion** + +From sparse to dense, use the ``.sparse`` accessors + +.. ipython:: python + + df.sparse.to_dense() + df.sparse.to_coo() + +From dense to sparse, use :meth:`DataFrame.astype` with a :class:`SparseDtype`. + +.. ipython:: python + + dense = pd.DataFrame({"A": [1, 0, 0, 1]}) + dtype = pd.SparseDtype(int, fill_value=0) + dense.astype(dtype) + +**Sparse Properties** + +Sparse-specific properties, like ``density``, are available on the ``.sparse`` accessor. + +.. ipython:: python + + df.sparse.density + +**General differences** + +In a ``SparseDataFrame``, *all* columns were sparse. A :class:`DataFrame` can have a mixture of +sparse and dense columns. As a consequence, assigning new columns to a ``DataFrame`` with sparse +values will not automatically convert the input to be sparse. + +.. code-block:: python + + # Previous Way + >>> df = pd.SparseDataFrame({"A": [0, 1]}) + >>> df['B'] = [0, 0] # implicitly becomes Sparse + >>> df['B'].dtype + Sparse[int64, nan] + +Instead, you'll need to ensure that the values being assigned are sparse + +.. ipython:: python + + df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1])}) + df['B'] = [0, 0] # remains dense + df['B'].dtype + df['B'] = pd.arrays.SparseArray([0, 0]) + df['B'].dtype + +The ``SparseDataFrame.default_kind`` and ``SparseDataFrame.default_fill_value`` attributes +have no replacement. + +.. _sparse.scipysparse: + +Interaction with scipy.sparse +----------------------------- + +Use :meth:`DataFrame.sparse.from_spmatrix` to create a ``DataFrame`` with sparse values from a sparse matrix. + +.. versionadded:: 0.25.0 + +.. ipython:: python + + from scipy.sparse import csr_matrix + + arr = np.random.random(size=(1000, 5)) + arr[arr < .9] = 0 + + sp_arr = csr_matrix(arr) + sp_arr + + sdf = pd.DataFrame.sparse.from_spmatrix(sp_arr) + sdf.head() + sdf.dtypes + +All sparse formats are supported, but matrices that are not in :mod:`COOrdinate ` format will be converted, copying data as needed. +To convert back to sparse SciPy matrix in COO format, you can use the :meth:`DataFrame.sparse.to_coo` method: + +.. ipython:: python + + sdf.sparse.to_coo() + +meth:`Series.sparse.to_coo` is implemented for transforming a ``Series`` with sparse values indexed by a :class:`MultiIndex` to a :class:`scipy.sparse.coo_matrix`. + +The method requires a ``MultiIndex`` with two or more levels. + +.. ipython:: python + + s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) + s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0), + (1, 2, 'a', 1), + (1, 1, 'b', 0), + (1, 1, 'b', 1), + (2, 1, 'b', 0), + (2, 1, 'b', 1)], + names=['A', 'B', 'C', 'D']) + s + ss = s.astype('Sparse') + ss + +In the example below, we transform the ``Series`` to a sparse representation of a 2-d array by specifying that the first and second ``MultiIndex`` levels define labels for the rows and the third and fourth levels define labels for the columns. We also specify that the column and row labels should be sorted in the final sparse representation. + +.. ipython:: python + + A, rows, columns = ss.sparse.to_coo(row_levels=['A', 'B'], + column_levels=['C', 'D'], + sort_labels=True) + + A + A.todense() + rows + columns + +Specifying different row and column labels (and not sorting them) yields a different sparse matrix: + +.. ipython:: python + + A, rows, columns = ss.sparse.to_coo(row_levels=['A', 'B', 'C'], + column_levels=['D'], + sort_labels=False) + + A + A.todense() + rows + columns + +A convenience method :meth:`Series.sparse.from_coo` is implemented for creating a ``Series`` with sparse values from a ``scipy.sparse.coo_matrix``. + +.. ipython:: python + + from scipy import sparse + A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), + shape=(3, 4)) + A + A.todense() + +The default behaviour (with ``dense_index=False``) simply returns a ``Series`` containing +only the non-null entries. + +.. ipython:: python + + ss = pd.Series.sparse.from_coo(A) + ss + +Specifying ``dense_index=True`` will result in an index that is the Cartesian product of the +row and columns coordinates of the matrix. Note that this will consume a significant amount of memory +(relative to ``dense_index=False``) if the sparse matrix is large (and sparse) enough. + +.. ipython:: python + + ss_dense = pd.Series.sparse.from_coo(A, dense_index=True) + ss_dense diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb new file mode 100644 index 00000000..02550eab --- /dev/null +++ b/doc/source/user_guide/style.ipynb @@ -0,0 +1,1244 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Styling\n", + "\n", + "This document is written as a Jupyter Notebook, and can be viewed or downloaded [here](http://nbviewer.ipython.org/github/pandas-dev/pandas/blob/master/doc/source/user_guide/style.ipynb).\n", + "\n", + "You can apply **conditional formatting**, the visual styling of a DataFrame\n", + "depending on the data within, by using the ``DataFrame.style`` property.\n", + "This is a property that returns a ``Styler`` object, which has\n", + "useful methods for formatting and displaying DataFrames.\n", + "\n", + "The styling is accomplished using CSS.\n", + "You write \"style functions\" that take scalars, `DataFrame`s or `Series`, and return *like-indexed* DataFrames or Series with CSS `\"attribute: value\"` pairs for the values.\n", + "These functions can be incrementally passed to the `Styler` which collects the styles before rendering." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Building styles\n", + "\n", + "Pass your style functions into one of the following methods:\n", + "\n", + "- ``Styler.applymap``: elementwise\n", + "- ``Styler.apply``: column-/row-/table-wise\n", + "\n", + "Both of those methods take a function (and some other keyword arguments) and applies your function to the DataFrame in a certain way.\n", + "`Styler.applymap` works through the DataFrame elementwise.\n", + "`Styler.apply` passes each column or row into your DataFrame one-at-a-time or the entire table at once, depending on the `axis` keyword argument.\n", + "For columnwise use `axis=0`, rowwise use `axis=1`, and for the entire table at once use `axis=None`.\n", + "\n", + "For `Styler.applymap` your function should take a scalar and return a single string with the CSS attribute-value pair.\n", + "\n", + "For `Styler.apply` your function should take a Series or DataFrame (depending on the axis parameter), and return a Series or DataFrame with an identical shape where each value is a string with a CSS attribute-value pair.\n", + "\n", + "Let's see some examples." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbsphinx": "hidden" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot\n", + "# We have this here to trigger matplotlib's font cache stuff.\n", + "# This cell is hidden from the output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "np.random.seed(24)\n", + "df = pd.DataFrame({'A': np.linspace(1, 10, 10)})\n", + "df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],\n", + " axis=1)\n", + "df.iloc[3, 3] = np.nan\n", + "df.iloc[0, 2] = np.nan" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's a boring example of rendering a DataFrame, without any (visible) styles:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.style" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*Note*: The `DataFrame.style` attribute is a property that returns a `Styler` object. `Styler` has a `_repr_html_` method defined on it so they are rendered automatically. If you want the actual HTML back for further processing or for writing to file call the `.render()` method which returns a string.\n", + "\n", + "The above output looks very similar to the standard DataFrame HTML representation. But we've done some work behind the scenes to attach CSS classes to each cell. We can view these by calling the `.render` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.style.highlight_null().render().split('\\n')[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `row0_col2` is the identifier for that particular cell. We've also prepended each row/column identifier with a UUID unique to each DataFrame so that the style from one doesn't collide with the styling from another within the same notebook or page (you can set the `uuid` if you'd like to tie together the styling of two DataFrames).\n", + "\n", + "When writing style functions, you take care of producing the CSS attribute / value pairs you want. Pandas matches those up with the CSS classes that identify each cell." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's write a simple style function that will color negative numbers red and positive numbers black." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def color_negative_red(val):\n", + " \"\"\"\n", + " Takes a scalar and returns a string with\n", + " the css property `'color: red'` for negative\n", + " strings, black otherwise.\n", + " \"\"\"\n", + " color = 'red' if val < 0 else 'black'\n", + " return 'color: %s' % color" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this case, the cell's style depends only on it's own value.\n", + "That means we should use the `Styler.applymap` method which works elementwise." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s = df.style.applymap(color_negative_red)\n", + "s" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice the similarity with the standard `df.applymap`, which operates on DataFrames elementwise. We want you to be able to reuse your existing knowledge of how to interact with DataFrames.\n", + "\n", + "Notice also that our function returned a string containing the CSS attribute and value, separated by a colon just like in a `'.format(css))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/doc/source/user_guide/templates/myhtml.tpl b/doc/source/user_guide/templates/myhtml.tpl new file mode 100644 index 00000000..1170fd3d --- /dev/null +++ b/doc/source/user_guide/templates/myhtml.tpl @@ -0,0 +1,5 @@ +{% extends "html.tpl" %} +{% block table %} +

    {{ table_title|default("My Table") }}

    +{{ super() }} +{% endblock table %} diff --git a/doc/source/user_guide/templates/template_structure.html b/doc/source/user_guide/templates/template_structure.html new file mode 100644 index 00000000..0778d8e2 --- /dev/null +++ b/doc/source/user_guide/templates/template_structure.html @@ -0,0 +1,57 @@ + + + +
    before_style
    +
    style +
    <style type="text/css">
    +
    table_styles
    +
    before_cellstyle
    +
    cellstyle
    +
    </style>
    +
    + +
    before_table
    + +
    table +
    <table ...>
    +
    caption
    + +
    thead +
    before_head_rows
    +
    head_tr (loop over headers)
    +
    after_head_rows
    +
    + +
    tbody +
    before_rows
    +
    tr (loop over data rows)
    +
    after_rows
    +
    +
    </table>
    +
    + +
    after_table
    diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst new file mode 100644 index 00000000..2e4d0fec --- /dev/null +++ b/doc/source/user_guide/text.rst @@ -0,0 +1,747 @@ +.. _text: + +{{ header }} + +====================== +Working with text data +====================== + +.. _text.types: + +Text Data Types +--------------- + +.. versionadded:: 1.0.0 + +There are two ways to store text data in pandas: + +1. ``object`` -dtype NumPy array. +2. :class:`StringDtype` extension type. + +We recommend using :class:`StringDtype` to store text data. + +Prior to pandas 1.0, ``object`` dtype was the only option. This was unfortunate +for many reasons: + +1. You can accidentally store a *mixture* of strings and non-strings in an + ``object`` dtype array. It's better to have a dedicated dtype. +2. ``object`` dtype breaks dtype-specific operations like :meth:`DataFrame.select_dtypes`. + There isn't a clear way to select *just* text while excluding non-text + but still object-dtype columns. +3. When reading code, the contents of an ``object`` dtype array is less clear + than ``'string'``. + +Currently, the performance of ``object`` dtype arrays of strings and +:class:`arrays.StringArray` are about the same. We expect future enhancements +to significantly increase the performance and lower the memory overhead of +:class:`~arrays.StringArray`. + +.. warning:: + + ``StringArray`` is currently considered experimental. The implementation + and parts of the API may change without warning. + +For backwards-compatibility, ``object`` dtype remains the default type we +infer a list of strings to + +.. ipython:: python + + pd.Series(['a', 'b', 'c']) + +To explicitly request ``string`` dtype, specify the ``dtype`` + +.. ipython:: python + + pd.Series(['a', 'b', 'c'], dtype="string") + pd.Series(['a', 'b', 'c'], dtype=pd.StringDtype()) + +Or ``astype`` after the ``Series`` or ``DataFrame`` is created + +.. ipython:: python + + s = pd.Series(['a', 'b', 'c']) + s + s.astype("string") + +.. _text.differences: + +Behavior differences +^^^^^^^^^^^^^^^^^^^^ + +These are places where the behavior of ``StringDtype`` objects differ from +``object`` dtype + +l. For ``StringDtype``, :ref:`string accessor methods` + that return **numeric** output will always return a nullable integer dtype, + rather than either int or float dtype, depending on the presence of NA values. + Methods returning **boolean** output will return a nullable boolean dtype. + + .. ipython:: python + + s = pd.Series(["a", None, "b"], dtype="string") + s + s.str.count("a") + s.dropna().str.count("a") + + Both outputs are ``Int64`` dtype. Compare that with object-dtype + + .. ipython:: python + + s2 = pd.Series(["a", None, "b"], dtype="object") + s2.str.count("a") + s2.dropna().str.count("a") + + When NA values are present, the output dtype is float64. Similarly for + methods returning boolean values. + + .. ipython:: python + + s.str.isdigit() + s.str.match("a") + +2. Some string methods, like :meth:`Series.str.decode` are not available + on ``StringArray`` because ``StringArray`` only holds strings, not + bytes. +3. In comparison operations, :class:`arrays.StringArray` and ``Series`` backed + by a ``StringArray`` will return an object with :class:`BooleanDtype`, + rather than a ``bool`` dtype object. Missing values in a ``StringArray`` + will propagate in comparison operations, rather than always comparing + unequal like :attr:`numpy.nan`. + +Everything else that follows in the rest of this document applies equally to +``string`` and ``object`` dtype. + +.. _text.string_methods: + +String Methods +-------------- + +Series and Index are equipped with a set of string processing methods +that make it easy to operate on each element of the array. Perhaps most +importantly, these methods exclude missing/NA values automatically. These are +accessed via the ``str`` attribute and generally have names matching +the equivalent (scalar) built-in string methods: + +.. ipython:: python + + s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'], + dtype="string") + s.str.lower() + s.str.upper() + s.str.len() + +.. ipython:: python + + idx = pd.Index([' jack', 'jill ', ' jesse ', 'frank']) + idx.str.strip() + idx.str.lstrip() + idx.str.rstrip() + +The string methods on Index are especially useful for cleaning up or +transforming DataFrame columns. For instance, you may have columns with +leading or trailing whitespace: + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(3, 2), + columns=[' Column A ', ' Column B '], index=range(3)) + df + +Since ``df.columns`` is an Index object, we can use the ``.str`` accessor + +.. ipython:: python + + df.columns.str.strip() + df.columns.str.lower() + +These string methods can then be used to clean up the columns as needed. +Here we are removing leading and trailing whitespaces, lower casing all names, +and replacing any remaining whitespaces with underscores: + +.. ipython:: python + + df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_') + df + +.. note:: + + If you have a ``Series`` where lots of elements are repeated + (i.e. the number of unique elements in the ``Series`` is a lot smaller than the length of the + ``Series``), it can be faster to convert the original ``Series`` to one of type + ``category`` and then use ``.str.`` or ``.dt.`` on that. + The performance difference comes from the fact that, for ``Series`` of type ``category``, the + string operations are done on the ``.categories`` and not on each element of the + ``Series``. + + Please note that a ``Series`` of type ``category`` with string ``.categories`` has + some limitations in comparison to ``Series`` of type string (e.g. you can't add strings to + each other: ``s + " " + s`` won't work if ``s`` is a ``Series`` of type ``category``). Also, + ``.str`` methods which operate on elements of type ``list`` are not available on such a + ``Series``. + +.. _text.warn_types: + +.. warning:: + + Before v.0.25.0, the ``.str``-accessor did only the most rudimentary type checks. Starting with + v.0.25.0, the type of the Series is inferred and the allowed types (i.e. strings) are enforced more rigorously. + + Generally speaking, the ``.str`` accessor is intended to work only on strings. With very few + exceptions, other uses are not supported, and may be disabled at a later point. + +.. _text.split: + +Splitting and replacing strings +------------------------------- + +Methods like ``split`` return a Series of lists: + +.. ipython:: python + + s2 = pd.Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h'], dtype="string") + s2.str.split('_') + +Elements in the split lists can be accessed using ``get`` or ``[]`` notation: + +.. ipython:: python + + s2.str.split('_').str.get(1) + s2.str.split('_').str[1] + +It is easy to expand this to return a DataFrame using ``expand``. + +.. ipython:: python + + s2.str.split('_', expand=True) + +When original ``Series`` has :class:`StringDtype`, the output columns will all +be :class:`StringDtype` as well. + +It is also possible to limit the number of splits: + +.. ipython:: python + + s2.str.split('_', expand=True, n=1) + +``rsplit`` is similar to ``split`` except it works in the reverse direction, +i.e., from the end of the string to the beginning of the string: + +.. ipython:: python + + s2.str.rsplit('_', expand=True, n=1) + +``replace`` by default replaces `regular expressions +`__: + +.. ipython:: python + + s3 = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', + '', np.nan, 'CABA', 'dog', 'cat'], + dtype="string") + s3 + s3.str.replace('^.a|dog', 'XX-XX ', case=False) + +Some caution must be taken to keep regular expressions in mind! For example, the +following code will cause trouble because of the regular expression meaning of +`$`: + +.. ipython:: python + + # Consider the following badly formatted financial data + dollars = pd.Series(['12', '-$10', '$10,000'], dtype="string") + + # This does what you'd naively expect: + dollars.str.replace('$', '') + + # But this doesn't: + dollars.str.replace('-$', '-') + + # We need to escape the special character (for >1 len patterns) + dollars.str.replace(r'-\$', '-') + +.. versionadded:: 0.23.0 + +If you do want literal replacement of a string (equivalent to +:meth:`str.replace`), you can set the optional ``regex`` parameter to +``False``, rather than escaping each character. In this case both ``pat`` +and ``repl`` must be strings: + +.. ipython:: python + + # These lines are equivalent + dollars.str.replace(r'-\$', '-') + dollars.str.replace('-$', '-', regex=False) + +The ``replace`` method can also take a callable as replacement. It is called +on every ``pat`` using :func:`re.sub`. The callable should expect one +positional argument (a regex object) and return a string. + +.. ipython:: python + + # Reverse every lowercase alphabetic word + pat = r'[a-z]+' + + def repl(m): + return m.group(0)[::-1] + + pd.Series(['foo 123', 'bar baz', np.nan], + dtype="string").str.replace(pat, repl) + + # Using regex groups + pat = r"(?P\w+) (?P\w+) (?P\w+)" + + def repl(m): + return m.group('two').swapcase() + + pd.Series(['Foo Bar Baz', np.nan], + dtype="string").str.replace(pat, repl) + +The ``replace`` method also accepts a compiled regular expression object +from :func:`re.compile` as a pattern. All flags should be included in the +compiled regular expression object. + +.. ipython:: python + + import re + regex_pat = re.compile(r'^.a|dog', flags=re.IGNORECASE) + s3.str.replace(regex_pat, 'XX-XX ') + +Including a ``flags`` argument when calling ``replace`` with a compiled +regular expression object will raise a ``ValueError``. + +.. ipython:: + + @verbatim + In [1]: s3.str.replace(regex_pat, 'XX-XX ', flags=re.IGNORECASE) + --------------------------------------------------------------------------- + ValueError: case and flags cannot be set when pat is a compiled regex + +.. _text.concatenate: + +Concatenation +------------- + +There are several ways to concatenate a ``Series`` or ``Index``, either with itself or others, all based on :meth:`~Series.str.cat`, +resp. ``Index.str.cat``. + +Concatenating a single Series into a string +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The content of a ``Series`` (or ``Index``) can be concatenated: + +.. ipython:: python + + s = pd.Series(['a', 'b', 'c', 'd'], dtype="string") + s.str.cat(sep=',') + +If not specified, the keyword ``sep`` for the separator defaults to the empty string, ``sep=''``: + +.. ipython:: python + + s.str.cat() + +By default, missing values are ignored. Using ``na_rep``, they can be given a representation: + +.. ipython:: python + + t = pd.Series(['a', 'b', np.nan, 'd'], dtype="string") + t.str.cat(sep=',') + t.str.cat(sep=',', na_rep='-') + +Concatenating a Series and something list-like into a Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The first argument to :meth:`~Series.str.cat` can be a list-like object, provided that it matches the length of the calling ``Series`` (or ``Index``). + +.. ipython:: python + + s.str.cat(['A', 'B', 'C', 'D']) + +Missing values on either side will result in missing values in the result as well, *unless* ``na_rep`` is specified: + +.. ipython:: python + + s.str.cat(t) + s.str.cat(t, na_rep='-') + +Concatenating a Series and something array-like into a Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionadded:: 0.23.0 + +The parameter ``others`` can also be two-dimensional. In this case, the number or rows must match the lengths of the calling ``Series`` (or ``Index``). + +.. ipython:: python + + d = pd.concat([t, s], axis=1) + s + d + s.str.cat(d, na_rep='-') + +Concatenating a Series and an indexed object into a Series, with alignment +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. versionadded:: 0.23.0 + +For concatenation with a ``Series`` or ``DataFrame``, it is possible to align the indexes before concatenation by setting +the ``join``-keyword. + +.. ipython:: python + :okwarning: + + u = pd.Series(['b', 'd', 'a', 'c'], index=[1, 3, 0, 2], + dtype="string") + s + u + s.str.cat(u) + s.str.cat(u, join='left') + +.. warning:: + + If the ``join`` keyword is not passed, the method :meth:`~Series.str.cat` will currently fall back to the behavior before version 0.23.0 (i.e. no alignment), + but a ``FutureWarning`` will be raised if any of the involved indexes differ, since this default will change to ``join='left'`` in a future version. + +The usual options are available for ``join`` (one of ``'left', 'outer', 'inner', 'right'``). +In particular, alignment also means that the different lengths do not need to coincide anymore. + +.. ipython:: python + + v = pd.Series(['z', 'a', 'b', 'd', 'e'], index=[-1, 0, 1, 3, 4], + dtype="string") + s + v + s.str.cat(v, join='left', na_rep='-') + s.str.cat(v, join='outer', na_rep='-') + +The same alignment can be used when ``others`` is a ``DataFrame``: + +.. ipython:: python + + f = d.loc[[3, 2, 1, 0], :] + s + f + s.str.cat(f, join='left', na_rep='-') + +Concatenating a Series and many objects into a Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Several array-like items (specifically: ``Series``, ``Index``, and 1-dimensional variants of ``np.ndarray``) +can be combined in a list-like container (including iterators, ``dict``-views, etc.). + +.. ipython:: python + + s + u + s.str.cat([u, u.to_numpy()], join='left') + +All elements without an index (e.g. ``np.ndarray``) within the passed list-like must match in length to the calling ``Series`` (or ``Index``), +but ``Series`` and ``Index`` may have arbitrary length (as long as alignment is not disabled with ``join=None``): + +.. ipython:: python + + v + s.str.cat([v, u, u.to_numpy()], join='outer', na_rep='-') + +If using ``join='right'`` on a list-like of ``others`` that contains different indexes, +the union of these indexes will be used as the basis for the final concatenation: + +.. ipython:: python + + u.loc[[3]] + v.loc[[-1, 0]] + s.str.cat([u.loc[[3]], v.loc[[-1, 0]]], join='right', na_rep='-') + +Indexing with ``.str`` +---------------------- + +.. _text.indexing: + +You can use ``[]`` notation to directly index by position locations. If you index past the end +of the string, the result will be a ``NaN``. + + +.. ipython:: python + + s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, + 'CABA', 'dog', 'cat'], + dtype="string") + + s.str[0] + s.str[1] + +Extracting substrings +--------------------- + +.. _text.extract: + +Extract first match in each subject (extract) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. warning:: + + Before version 0.23, argument ``expand`` of the ``extract`` method defaulted to + ``False``. When ``expand=False``, ``expand`` returns a ``Series``, ``Index``, or + ``DataFrame``, depending on the subject and regular expression + pattern. When ``expand=True``, it always returns a ``DataFrame``, + which is more consistent and less confusing from the perspective of a user. + ``expand=True`` has been the default since version 0.23.0. + +The ``extract`` method accepts a `regular expression +`__ with at least one +capture group. + +Extracting a regular expression with more than one group returns a +DataFrame with one column per group. + +.. ipython:: python + + pd.Series(['a1', 'b2', 'c3'], + dtype="string").str.extract(r'([ab])(\d)', expand=False) + +Elements that do not match return a row filled with ``NaN``. Thus, a +Series of messy strings can be "converted" into a like-indexed Series +or DataFrame of cleaned-up or more useful strings, without +necessitating ``get()`` to access tuples or ``re.match`` objects. The +dtype of the result is always object, even if no match is found and +the result only contains ``NaN``. + +Named groups like + +.. ipython:: python + + pd.Series(['a1', 'b2', 'c3'], + dtype="string").str.extract(r'(?P[ab])(?P\d)', + expand=False) + +and optional groups like + +.. ipython:: python + + pd.Series(['a1', 'b2', '3'], + dtype="string").str.extract(r'([ab])?(\d)', expand=False) + +can also be used. Note that any capture group names in the regular +expression will be used for column names; otherwise capture group +numbers will be used. + +Extracting a regular expression with one group returns a ``DataFrame`` +with one column if ``expand=True``. + +.. ipython:: python + + pd.Series(['a1', 'b2', 'c3'], + dtype="string").str.extract(r'[ab](\d)', expand=True) + +It returns a Series if ``expand=False``. + +.. ipython:: python + + pd.Series(['a1', 'b2', 'c3'], + dtype="string").str.extract(r'[ab](\d)', expand=False) + +Calling on an ``Index`` with a regex with exactly one capture group +returns a ``DataFrame`` with one column if ``expand=True``. + +.. ipython:: python + + s = pd.Series(["a1", "b2", "c3"], ["A11", "B22", "C33"], + dtype="string") + s + s.index.str.extract("(?P[a-zA-Z])", expand=True) + +It returns an ``Index`` if ``expand=False``. + +.. ipython:: python + + s.index.str.extract("(?P[a-zA-Z])", expand=False) + +Calling on an ``Index`` with a regex with more than one capture group +returns a ``DataFrame`` if ``expand=True``. + +.. ipython:: python + + s.index.str.extract("(?P[a-zA-Z])([0-9]+)", expand=True) + +It raises ``ValueError`` if ``expand=False``. + +.. code-block:: python + + >>> s.index.str.extract("(?P[a-zA-Z])([0-9]+)", expand=False) + ValueError: only one regex group is supported with Index + +The table below summarizes the behavior of ``extract(expand=False)`` +(input subject in first column, number of groups in regex in +first row) + ++--------+---------+------------+ +| | 1 group | >1 group | ++--------+---------+------------+ +| Index | Index | ValueError | ++--------+---------+------------+ +| Series | Series | DataFrame | ++--------+---------+------------+ + +Extract all matches in each subject (extractall) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. _text.extractall: + +Unlike ``extract`` (which returns only the first match), + +.. ipython:: python + + s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"], + dtype="string") + s + two_groups = '(?P[a-z])(?P[0-9])' + s.str.extract(two_groups, expand=True) + +the ``extractall`` method returns every match. The result of +``extractall`` is always a ``DataFrame`` with a ``MultiIndex`` on its +rows. The last level of the ``MultiIndex`` is named ``match`` and +indicates the order in the subject. + +.. ipython:: python + + s.str.extractall(two_groups) + +When each subject string in the Series has exactly one match, + +.. ipython:: python + + s = pd.Series(['a3', 'b3', 'c2'], dtype="string") + s + +then ``extractall(pat).xs(0, level='match')`` gives the same result as +``extract(pat)``. + +.. ipython:: python + + extract_result = s.str.extract(two_groups, expand=True) + extract_result + extractall_result = s.str.extractall(two_groups) + extractall_result + extractall_result.xs(0, level="match") + +``Index`` also supports ``.str.extractall``. It returns a ``DataFrame`` which has the +same result as a ``Series.str.extractall`` with a default index (starts from 0). + +.. ipython:: python + + pd.Index(["a1a2", "b1", "c1"]).str.extractall(two_groups) + + pd.Series(["a1a2", "b1", "c1"], dtype="string").str.extractall(two_groups) + + +Testing for Strings that match or contain a pattern +--------------------------------------------------- + +You can check whether elements contain a pattern: + +.. ipython:: python + + pattern = r'[0-9][a-z]' + pd.Series(['1', '2', '3a', '3b', '03c'], + dtype="string").str.contains(pattern) + +Or whether elements match a pattern: + +.. ipython:: python + + pd.Series(['1', '2', '3a', '3b', '03c'], + dtype="string").str.match(pattern) + +The distinction between ``match`` and ``contains`` is strictness: ``match`` +relies on strict ``re.match``, while ``contains`` relies on ``re.search``. + +Methods like ``match``, ``contains``, ``startswith``, and ``endswith`` take +an extra ``na`` argument so missing values can be considered True or False: + +.. ipython:: python + + s4 = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'], + dtype="string") + s4.str.contains('A', na=False) + +.. _text.indicator: + +Creating indicator variables +---------------------------- + +You can extract dummy variables from string columns. +For example if they are separated by a ``'|'``: + +.. ipython:: python + + s = pd.Series(['a', 'a|b', np.nan, 'a|c'], dtype="string") + s.str.get_dummies(sep='|') + +String ``Index`` also supports ``get_dummies`` which returns a ``MultiIndex``. + +.. ipython:: python + + idx = pd.Index(['a', 'a|b', np.nan, 'a|c']) + idx.str.get_dummies(sep='|') + +See also :func:`~pandas.get_dummies`. + +Method summary +-------------- + +.. _text.summary: + +.. csv-table:: + :header: "Method", "Description" + :widths: 20, 80 + :delim: ; + + :meth:`~Series.str.cat`;Concatenate strings + :meth:`~Series.str.split`;Split strings on delimiter + :meth:`~Series.str.rsplit`;Split strings on delimiter working from the end of the string + :meth:`~Series.str.get`;Index into each element (retrieve i-th element) + :meth:`~Series.str.join`;Join strings in each element of the Series with passed separator + :meth:`~Series.str.get_dummies`;Split strings on the delimiter returning DataFrame of dummy variables + :meth:`~Series.str.contains`;Return boolean array if each string contains pattern/regex + :meth:`~Series.str.replace`;Replace occurrences of pattern/regex/string with some other string or the return value of a callable given the occurrence + :meth:`~Series.str.repeat`;Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``) + :meth:`~Series.str.pad`;"Add whitespace to left, right, or both sides of strings" + :meth:`~Series.str.center`;Equivalent to ``str.center`` + :meth:`~Series.str.ljust`;Equivalent to ``str.ljust`` + :meth:`~Series.str.rjust`;Equivalent to ``str.rjust`` + :meth:`~Series.str.zfill`;Equivalent to ``str.zfill`` + :meth:`~Series.str.wrap`;Split long strings into lines with length less than a given width + :meth:`~Series.str.slice`;Slice each string in the Series + :meth:`~Series.str.slice_replace`;Replace slice in each string with passed value + :meth:`~Series.str.count`;Count occurrences of pattern + :meth:`~Series.str.startswith`;Equivalent to ``str.startswith(pat)`` for each element + :meth:`~Series.str.endswith`;Equivalent to ``str.endswith(pat)`` for each element + :meth:`~Series.str.findall`;Compute list of all occurrences of pattern/regex for each string + :meth:`~Series.str.match`;"Call ``re.match`` on each element, returning matched groups as list" + :meth:`~Series.str.extract`;"Call ``re.search`` on each element, returning DataFrame with one row for each element and one column for each regex capture group" + :meth:`~Series.str.extractall`;"Call ``re.findall`` on each element, returning DataFrame with one row for each match and one column for each regex capture group" + :meth:`~Series.str.len`;Compute string lengths + :meth:`~Series.str.strip`;Equivalent to ``str.strip`` + :meth:`~Series.str.rstrip`;Equivalent to ``str.rstrip`` + :meth:`~Series.str.lstrip`;Equivalent to ``str.lstrip`` + :meth:`~Series.str.partition`;Equivalent to ``str.partition`` + :meth:`~Series.str.rpartition`;Equivalent to ``str.rpartition`` + :meth:`~Series.str.lower`;Equivalent to ``str.lower`` + :meth:`~Series.str.casefold`;Equivalent to ``str.casefold`` + :meth:`~Series.str.upper`;Equivalent to ``str.upper`` + :meth:`~Series.str.find`;Equivalent to ``str.find`` + :meth:`~Series.str.rfind`;Equivalent to ``str.rfind`` + :meth:`~Series.str.index`;Equivalent to ``str.index`` + :meth:`~Series.str.rindex`;Equivalent to ``str.rindex`` + :meth:`~Series.str.capitalize`;Equivalent to ``str.capitalize`` + :meth:`~Series.str.swapcase`;Equivalent to ``str.swapcase`` + :meth:`~Series.str.normalize`;Return Unicode normal form. Equivalent to ``unicodedata.normalize`` + :meth:`~Series.str.translate`;Equivalent to ``str.translate`` + :meth:`~Series.str.isalnum`;Equivalent to ``str.isalnum`` + :meth:`~Series.str.isalpha`;Equivalent to ``str.isalpha`` + :meth:`~Series.str.isdigit`;Equivalent to ``str.isdigit`` + :meth:`~Series.str.isspace`;Equivalent to ``str.isspace`` + :meth:`~Series.str.islower`;Equivalent to ``str.islower`` + :meth:`~Series.str.isupper`;Equivalent to ``str.isupper`` + :meth:`~Series.str.istitle`;Equivalent to ``str.istitle`` + :meth:`~Series.str.isnumeric`;Equivalent to ``str.isnumeric`` + :meth:`~Series.str.isdecimal`;Equivalent to ``str.isdecimal`` diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst new file mode 100644 index 00000000..3439a0a4 --- /dev/null +++ b/doc/source/user_guide/timedeltas.rst @@ -0,0 +1,481 @@ +.. _timedeltas: + +{{ header }} + +.. _timedeltas.timedeltas: + +*********** +Time deltas +*********** + +Timedeltas are differences in times, expressed in difference units, e.g. days, hours, minutes, +seconds. They can be both positive and negative. + +``Timedelta`` is a subclass of ``datetime.timedelta``, and behaves in a similar manner, +but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, +parsing, and attributes. + +Parsing +------- + +You can construct a ``Timedelta`` scalar through various arguments: + +.. ipython:: python + + import datetime + + # strings + pd.Timedelta('1 days') + pd.Timedelta('1 days 00:00:00') + pd.Timedelta('1 days 2 hours') + pd.Timedelta('-1 days 2 min 3us') + + # like datetime.timedelta + # note: these MUST be specified as keyword arguments + pd.Timedelta(days=1, seconds=1) + + # integers with a unit + pd.Timedelta(1, unit='d') + + # from a datetime.timedelta/np.timedelta64 + pd.Timedelta(datetime.timedelta(days=1, seconds=1)) + pd.Timedelta(np.timedelta64(1, 'ms')) + + # negative Timedeltas have this string repr + # to be more consistent with datetime.timedelta conventions + pd.Timedelta('-1us') + + # a NaT + pd.Timedelta('nan') + pd.Timedelta('nat') + + # ISO 8601 Duration strings + pd.Timedelta('P0DT0H1M0S') + pd.Timedelta('P0DT0H0M0.000000123S') + +.. versionadded:: 0.23.0 + + Added constructor for `ISO 8601 Duration`_ strings + +:ref:`DateOffsets` (``Day, Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction. + +.. ipython:: python + + pd.Timedelta(pd.offsets.Second(2)) + +Further, operations among the scalars yield another scalar ``Timedelta``. + +.. ipython:: python + + pd.Timedelta(pd.offsets.Day(2)) + pd.Timedelta(pd.offsets.Second(2)) +\ + pd.Timedelta('00:00:00.000123') + +to_timedelta +~~~~~~~~~~~~ + +Using the top-level ``pd.to_timedelta``, you can convert a scalar, array, list, +or Series from a recognized timedelta format / value into a ``Timedelta`` type. +It will construct Series if the input is a Series, a scalar if the input is +scalar-like, otherwise it will output a ``TimedeltaIndex``. + +You can parse a single string to a Timedelta: + +.. ipython:: python + + pd.to_timedelta('1 days 06:05:01.00003') + pd.to_timedelta('15.5us') + +or a list/array of strings: + +.. ipython:: python + + pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan']) + +The ``unit`` keyword argument specifies the unit of the Timedelta: + +.. ipython:: python + + pd.to_timedelta(np.arange(5), unit='s') + pd.to_timedelta(np.arange(5), unit='d') + +.. _timedeltas.limitations: + +Timedelta limitations +~~~~~~~~~~~~~~~~~~~~~ + +Pandas represents ``Timedeltas`` in nanosecond resolution using +64 bit integers. As such, the 64 bit integer limits determine +the ``Timedelta`` limits. + +.. ipython:: python + + pd.Timedelta.min + pd.Timedelta.max + +.. _timedeltas.operations: + +Operations +---------- + +You can operate on Series/DataFrames and construct ``timedelta64[ns]`` Series through +subtraction operations on ``datetime64[ns]`` Series, or ``Timestamps``. + +.. ipython:: python + + s = pd.Series(pd.date_range('2012-1-1', periods=3, freq='D')) + td = pd.Series([pd.Timedelta(days=i) for i in range(3)]) + df = pd.DataFrame({'A': s, 'B': td}) + df + df['C'] = df['A'] + df['B'] + df + df.dtypes + + s - s.max() + s - datetime.datetime(2011, 1, 1, 3, 5) + s + datetime.timedelta(minutes=5) + s + pd.offsets.Minute(5) + s + pd.offsets.Minute(5) + pd.offsets.Milli(5) + +Operations with scalars from a ``timedelta64[ns]`` series: + +.. ipython:: python + + y = s - s[0] + y + +Series of timedeltas with ``NaT`` values are supported: + +.. ipython:: python + + y = s - s.shift() + y + +Elements can be set to ``NaT`` using ``np.nan`` analogously to datetimes: + +.. ipython:: python + + y[1] = np.nan + y + +Operands can also appear in a reversed order (a singular object operated with a Series): + +.. ipython:: python + + s.max() - s + datetime.datetime(2011, 1, 1, 3, 5) - s + datetime.timedelta(minutes=5) + s + +``min, max`` and the corresponding ``idxmin, idxmax`` operations are supported on frames: + +.. ipython:: python + + A = s - pd.Timestamp('20120101') - pd.Timedelta('00:05:05') + B = s - pd.Series(pd.date_range('2012-1-2', periods=3, freq='D')) + + df = pd.DataFrame({'A': A, 'B': B}) + df + + df.min() + df.min(axis=1) + + df.idxmin() + df.idxmax() + +``min, max, idxmin, idxmax`` operations are supported on Series as well. A scalar result will be a ``Timedelta``. + +.. ipython:: python + + df.min().max() + df.min(axis=1).min() + + df.min().idxmax() + df.min(axis=1).idxmin() + +You can fillna on timedeltas, passing a timedelta to get a particular value. + +.. ipython:: python + + y.fillna(pd.Timedelta(0)) + y.fillna(pd.Timedelta(10, unit='s')) + y.fillna(pd.Timedelta('-1 days, 00:00:05')) + +You can also negate, multiply and use ``abs`` on ``Timedeltas``: + +.. ipython:: python + + td1 = pd.Timedelta('-1 days 2 hours 3 seconds') + td1 + -1 * td1 + - td1 + abs(td1) + +.. _timedeltas.timedeltas_reductions: + +Reductions +---------- + +Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` objects. As usual +``NaT`` are skipped during evaluation. + +.. ipython:: python + + y2 = pd.Series(pd.to_timedelta(['-1 days +00:00:05', 'nat', + '-1 days +00:00:05', '1 days'])) + y2 + y2.mean() + y2.median() + y2.quantile(.1) + y2.sum() + +.. _timedeltas.timedeltas_convert: + +Frequency conversion +-------------------- + +Timedelta Series, ``TimedeltaIndex``, and ``Timedelta`` scalars can be converted to other 'frequencies' by dividing by another timedelta, +or by astyping to a specific timedelta type. These operations yield Series and propagate ``NaT`` -> ``nan``. +Note that division by the NumPy scalar is true division, while astyping is equivalent of floor division. + +.. ipython:: python + + december = pd.Series(pd.date_range('20121201', periods=4)) + january = pd.Series(pd.date_range('20130101', periods=4)) + td = january - december + + td[2] += datetime.timedelta(minutes=5, seconds=3) + td[3] = np.nan + td + + # to days + td / np.timedelta64(1, 'D') + td.astype('timedelta64[D]') + + # to seconds + td / np.timedelta64(1, 's') + td.astype('timedelta64[s]') + + # to months (these are constant months) + td / np.timedelta64(1, 'M') + +Dividing or multiplying a ``timedelta64[ns]`` Series by an integer or integer Series +yields another ``timedelta64[ns]`` dtypes Series. + +.. ipython:: python + + td * -1 + td * pd.Series([1, 2, 3, 4]) + +Rounded division (floor-division) of a ``timedelta64[ns]`` Series by a scalar +``Timedelta`` gives a series of integers. + +.. ipython:: python + + td // pd.Timedelta(days=3, hours=4) + pd.Timedelta(days=3, hours=4) // td + +.. _timedeltas.mod_divmod: + +The mod (%) and divmod operations are defined for ``Timedelta`` when operating with another timedelta-like or with a numeric argument. + +.. ipython:: python + + pd.Timedelta(hours=37) % datetime.timedelta(hours=2) + + # divmod against a timedelta-like returns a pair (int, Timedelta) + divmod(datetime.timedelta(hours=2), pd.Timedelta(minutes=11)) + + # divmod against a numeric returns a pair (Timedelta, Timedelta) + divmod(pd.Timedelta(hours=25), 86400000000000) + +Attributes +---------- + +You can access various components of the ``Timedelta`` or ``TimedeltaIndex`` directly using the attributes ``days,seconds,microseconds,nanoseconds``. These are identical to the values returned by ``datetime.timedelta``, in that, for example, the ``.seconds`` attribute represents the number of seconds >= 0 and < 1 day. These are signed according to whether the ``Timedelta`` is signed. + +These operations can also be directly accessed via the ``.dt`` property of the ``Series`` as well. + +.. note:: + + Note that the attributes are NOT the displayed values of the ``Timedelta``. Use ``.components`` to retrieve the displayed values. + +For a ``Series``: + +.. ipython:: python + + td.dt.days + td.dt.seconds + +You can access the value of the fields for a scalar ``Timedelta`` directly. + +.. ipython:: python + + tds = pd.Timedelta('31 days 5 min 3 sec') + tds.days + tds.seconds + (-tds).seconds + +You can use the ``.components`` property to access a reduced form of the timedelta. This returns a ``DataFrame`` indexed +similarly to the ``Series``. These are the *displayed* values of the ``Timedelta``. + +.. ipython:: python + + td.dt.components + td.dt.components.seconds + +.. _timedeltas.isoformat: + +You can convert a ``Timedelta`` to an `ISO 8601 Duration`_ string with the +``.isoformat`` method + +.. ipython:: python + + pd.Timedelta(days=6, minutes=50, seconds=3, + milliseconds=10, microseconds=10, + nanoseconds=12).isoformat() + +.. _ISO 8601 Duration: https://en.wikipedia.org/wiki/ISO_8601#Durations + +.. _timedeltas.index: + +TimedeltaIndex +-------------- + +To generate an index with time delta, you can use either the :class:`TimedeltaIndex` or +the :func:`timedelta_range` constructor. + +Using ``TimedeltaIndex`` you can pass string-like, ``Timedelta``, ``timedelta``, +or ``np.timedelta64`` objects. Passing ``np.nan/pd.NaT/nat`` will represent missing values. + +.. ipython:: python + + pd.TimedeltaIndex(['1 days', '1 days, 00:00:05', np.timedelta64(2, 'D'), + datetime.timedelta(days=2, seconds=2)]) + +The string 'infer' can be passed in order to set the frequency of the index as the +inferred frequency upon creation: + +.. ipython:: python + + pd.TimedeltaIndex(['0 days', '10 days', '20 days'], freq='infer') + +Generating ranges of time deltas +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Similar to :func:`date_range`, you can construct regular ranges of a ``TimedeltaIndex`` +using :func:`timedelta_range`. The default frequency for ``timedelta_range`` is +calendar day: + +.. ipython:: python + + pd.timedelta_range(start='1 days', periods=5) + +Various combinations of ``start``, ``end``, and ``periods`` can be used with +``timedelta_range``: + +.. ipython:: python + + pd.timedelta_range(start='1 days', end='5 days') + + pd.timedelta_range(end='10 days', periods=4) + +The ``freq`` parameter can passed a variety of :ref:`frequency aliases `: + +.. ipython:: python + + pd.timedelta_range(start='1 days', end='2 days', freq='30T') + + pd.timedelta_range(start='1 days', periods=5, freq='2D5H') + + +.. versionadded:: 0.23.0 + +Specifying ``start``, ``end``, and ``periods`` will generate a range of evenly spaced +timedeltas from ``start`` to ``end`` inclusively, with ``periods`` number of elements +in the resulting ``TimedeltaIndex``: + +.. ipython:: python + + pd.timedelta_range('0 days', '4 days', periods=5) + + pd.timedelta_range('0 days', '4 days', periods=10) + +Using the TimedeltaIndex +~~~~~~~~~~~~~~~~~~~~~~~~ + +Similarly to other of the datetime-like indices, ``DatetimeIndex`` and ``PeriodIndex``, you can use +``TimedeltaIndex`` as the index of pandas objects. + +.. ipython:: python + + s = pd.Series(np.arange(100), + index=pd.timedelta_range('1 days', periods=100, freq='h')) + s + +Selections work similarly, with coercion on string-likes and slices: + +.. ipython:: python + + s['1 day':'2 day'] + s['1 day 01:00:00'] + s[pd.Timedelta('1 day 1h')] + +Furthermore you can use partial string selection and the range will be inferred: + +.. ipython:: python + + s['1 day':'1 day 5 hours'] + +Operations +~~~~~~~~~~ + +Finally, the combination of ``TimedeltaIndex`` with ``DatetimeIndex`` allow certain combination operations that are NaT preserving: + +.. ipython:: python + + tdi = pd.TimedeltaIndex(['1 days', pd.NaT, '2 days']) + tdi.to_list() + dti = pd.date_range('20130101', periods=3) + dti.to_list() + (dti + tdi).to_list() + (dti - tdi).to_list() + +Conversions +~~~~~~~~~~~ + +Similarly to frequency conversion on a ``Series`` above, you can convert these indices to yield another Index. + +.. ipython:: python + + tdi / np.timedelta64(1, 's') + tdi.astype('timedelta64[s]') + +Scalars type ops work as well. These can potentially return a *different* type of index. + +.. ipython:: python + + # adding or timedelta and date -> datelike + tdi + pd.Timestamp('20130101') + + # subtraction of a date and a timedelta -> datelike + # note that trying to subtract a date from a Timedelta will raise an exception + (pd.Timestamp('20130101') - tdi).to_list() + + # timedelta + timedelta -> timedelta + tdi + pd.Timedelta('10 days') + + # division can result in a Timedelta if the divisor is an integer + tdi / 2 + + # or a Float64Index if the divisor is a Timedelta + tdi / tdi[0] + +.. _timedeltas.resampling: + +Resampling +---------- + +Similar to :ref:`timeseries resampling `, we can resample with a ``TimedeltaIndex``. + +.. ipython:: python + + s.resample('D').mean() diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst new file mode 100644 index 00000000..08b2ae0a --- /dev/null +++ b/doc/source/user_guide/timeseries.rst @@ -0,0 +1,2436 @@ +.. _timeseries: + +{{ header }} + +******************************** +Time series / date functionality +******************************** + +pandas contains extensive capabilities and features for working with time series data for all domains. +Using the NumPy ``datetime64`` and ``timedelta64`` dtypes, pandas has consolidated a large number of +features from other Python libraries like ``scikits.timeseries`` as well as created +a tremendous amount of new functionality for manipulating time series data. + +For example, pandas supports: + +Parsing time series information from various sources and formats + +.. ipython:: python + + import datetime + + dti = pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01'), + datetime.datetime(2018, 1, 1)]) + dti + +Generate sequences of fixed-frequency dates and time spans + +.. ipython:: python + + dti = pd.date_range('2018-01-01', periods=3, freq='H') + dti + +Manipulating and converting date times with timezone information + +.. ipython:: python + + dti = dti.tz_localize('UTC') + dti + dti.tz_convert('US/Pacific') + +Resampling or converting a time series to a particular frequency + +.. ipython:: python + + idx = pd.date_range('2018-01-01', periods=5, freq='H') + ts = pd.Series(range(len(idx)), index=idx) + ts + ts.resample('2H').mean() + +Performing date and time arithmetic with absolute or relative time increments + +.. ipython:: python + + friday = pd.Timestamp('2018-01-05') + friday.day_name() + # Add 1 day + saturday = friday + pd.Timedelta('1 day') + saturday.day_name() + # Add 1 business day (Friday --> Monday) + monday = friday + pd.offsets.BDay() + monday.day_name() + +pandas provides a relatively compact and self-contained set of tools for +performing the above tasks and more. + + +.. _timeseries.overview: + +Overview +-------- + +pandas captures 4 general time related concepts: + +#. Date times: A specific date and time with timezone support. Similar to ``datetime.datetime`` from the standard library. +#. Time deltas: An absolute time duration. Similar to ``datetime.timedelta`` from the standard library. +#. Time spans: A span of time defined by a point in time and its associated frequency. +#. Date offsets: A relative time duration that respects calendar arithmetic. Similar to ``dateutil.relativedelta.relativedelta`` from the ``dateutil`` package. + +===================== ================= =================== ============================================ ======================================== +Concept Scalar Class Array Class pandas Data Type Primary Creation Method +===================== ================= =================== ============================================ ======================================== +Date times ``Timestamp`` ``DatetimeIndex`` ``datetime64[ns]`` or ``datetime64[ns, tz]`` ``to_datetime`` or ``date_range`` +Time deltas ``Timedelta`` ``TimedeltaIndex`` ``timedelta64[ns]`` ``to_timedelta`` or ``timedelta_range`` +Time spans ``Period`` ``PeriodIndex`` ``period[freq]`` ``Period`` or ``period_range`` +Date offsets ``DateOffset`` ``None`` ``None`` ``DateOffset`` +===================== ================= =================== ============================================ ======================================== + +For time series data, it's conventional to represent the time component in the index of a :class:`Series` or :class:`DataFrame` +so manipulations can be performed with respect to the time element. + +.. ipython:: python + + pd.Series(range(3), index=pd.date_range('2000', freq='D', periods=3)) + +However, :class:`Series` and :class:`DataFrame` can directly also support the time component as data itself. + +.. ipython:: python + + pd.Series(pd.date_range('2000', freq='D', periods=3)) + +:class:`Series` and :class:`DataFrame` have extended data type support and functionality for ``datetime``, ``timedelta`` +and ``Period`` data when passed into those constructors. ``DateOffset`` +data however will be stored as ``object`` data. + +.. ipython:: python + + pd.Series(pd.period_range('1/1/2011', freq='M', periods=3)) + pd.Series([pd.DateOffset(1), pd.DateOffset(2)]) + pd.Series(pd.date_range('1/1/2011', freq='M', periods=3)) + +Lastly, pandas represents null date times, time deltas, and time spans as ``NaT`` which +is useful for representing missing or null date like values and behaves similar +as ``np.nan`` does for float data. + +.. ipython:: python + + pd.Timestamp(pd.NaT) + pd.Timedelta(pd.NaT) + pd.Period(pd.NaT) + # Equality acts as np.nan would + pd.NaT == pd.NaT + +.. _timeseries.representation: + +Timestamps vs. Time Spans +------------------------- + +Timestamped data is the most basic type of time series data that associates +values with points in time. For pandas objects it means using the points in +time. + +.. ipython:: python + + pd.Timestamp(datetime.datetime(2012, 5, 1)) + pd.Timestamp('2012-05-01') + pd.Timestamp(2012, 5, 1) + +However, in many cases it is more natural to associate things like change +variables with a time span instead. The span represented by ``Period`` can be +specified explicitly, or inferred from datetime string format. + +For example: + +.. ipython:: python + + pd.Period('2011-01') + + pd.Period('2012-05', freq='D') + +:class:`Timestamp` and :class:`Period` can serve as an index. Lists of +``Timestamp`` and ``Period`` are automatically coerced to :class:`DatetimeIndex` +and :class:`PeriodIndex` respectively. + +.. ipython:: python + + dates = [pd.Timestamp('2012-05-01'), + pd.Timestamp('2012-05-02'), + pd.Timestamp('2012-05-03')] + ts = pd.Series(np.random.randn(3), dates) + + type(ts.index) + ts.index + + ts + + periods = [pd.Period('2012-01'), pd.Period('2012-02'), pd.Period('2012-03')] + + ts = pd.Series(np.random.randn(3), periods) + + type(ts.index) + ts.index + + ts + +pandas allows you to capture both representations and +convert between them. Under the hood, pandas represents timestamps using +instances of ``Timestamp`` and sequences of timestamps using instances of +``DatetimeIndex``. For regular time spans, pandas uses ``Period`` objects for +scalar values and ``PeriodIndex`` for sequences of spans. Better support for +irregular intervals with arbitrary start and end points are forth-coming in +future releases. + + +.. _timeseries.converting: + +Converting to timestamps +------------------------ + +To convert a :class:`Series` or list-like object of date-like objects e.g. strings, +epochs, or a mixture, you can use the ``to_datetime`` function. When passed +a ``Series``, this returns a ``Series`` (with the same index), while a list-like +is converted to a ``DatetimeIndex``: + +.. ipython:: python + + pd.to_datetime(pd.Series(['Jul 31, 2009', '2010-01-10', None])) + + pd.to_datetime(['2005/11/23', '2010.12.31']) + +If you use dates which start with the day first (i.e. European style), +you can pass the ``dayfirst`` flag: + +.. ipython:: python + + pd.to_datetime(['04-01-2012 10:00'], dayfirst=True) + + pd.to_datetime(['14-01-2012', '01-14-2012'], dayfirst=True) + +.. warning:: + + You see in the above example that ``dayfirst`` isn't strict, so if a date + can't be parsed with the day being first it will be parsed as if + ``dayfirst`` were False. + +If you pass a single string to ``to_datetime``, it returns a single ``Timestamp``. +``Timestamp`` can also accept string input, but it doesn't accept string parsing +options like ``dayfirst`` or ``format``, so use ``to_datetime`` if these are required. + +.. ipython:: python + + pd.to_datetime('2010/11/12') + + pd.Timestamp('2010/11/12') + +You can also use the ``DatetimeIndex`` constructor directly: + +.. ipython:: python + + pd.DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05']) + +The string 'infer' can be passed in order to set the frequency of the index as the +inferred frequency upon creation: + +.. ipython:: python + + pd.DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], freq='infer') + +Providing a format argument +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In addition to the required datetime string, a ``format`` argument can be passed to ensure specific parsing. +This could also potentially speed up the conversion considerably. + +.. ipython:: python + + pd.to_datetime('2010/11/12', format='%Y/%m/%d') + + pd.to_datetime('12-11-2010 00:00', format='%d-%m-%Y %H:%M') + +For more information on the choices available when specifying the ``format`` +option, see the Python `datetime documentation`_. + +.. _datetime documentation: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior + +Assembling datetime from multiple DataFrame columns +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can also pass a ``DataFrame`` of integer or string columns to assemble into a ``Series`` of ``Timestamps``. + +.. ipython:: python + + df = pd.DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5], + 'hour': [2, 3]}) + pd.to_datetime(df) + + +You can pass only the columns that you need to assemble. + +.. ipython:: python + + pd.to_datetime(df[['year', 'month', 'day']]) + +``pd.to_datetime`` looks for standard designations of the datetime component in the column names, including: + +* required: ``year``, ``month``, ``day`` +* optional: ``hour``, ``minute``, ``second``, ``millisecond``, ``microsecond``, ``nanosecond`` + +Invalid data +~~~~~~~~~~~~ + +The default behavior, ``errors='raise'``, is to raise when unparseable: + +.. code-block:: ipython + + In [2]: pd.to_datetime(['2009/07/31', 'asd'], errors='raise') + ValueError: Unknown string format + +Pass ``errors='ignore'`` to return the original input when unparseable: + +.. ipython:: python + + pd.to_datetime(['2009/07/31', 'asd'], errors='ignore') + +Pass ``errors='coerce'`` to convert unparseable data to ``NaT`` (not a time): + +.. ipython:: python + + pd.to_datetime(['2009/07/31', 'asd'], errors='coerce') + + +.. _timeseries.converting.epoch: + +Epoch timestamps +~~~~~~~~~~~~~~~~ + +pandas supports converting integer or float epoch times to ``Timestamp`` and +``DatetimeIndex``. The default unit is nanoseconds, since that is how ``Timestamp`` +objects are stored internally. However, epochs are often stored in another ``unit`` +which can be specified. These are computed from the starting point specified by the +``origin`` parameter. + +.. ipython:: python + + pd.to_datetime([1349720105, 1349806505, 1349892905, + 1349979305, 1350065705], unit='s') + + pd.to_datetime([1349720105100, 1349720105200, 1349720105300, + 1349720105400, 1349720105500], unit='ms') + +Constructing a :class:`Timestamp` or :class:`DatetimeIndex` with an epoch timestamp +with the ``tz`` argument specified will currently localize the epoch timestamps to UTC +first then convert the result to the specified time zone. However, this behavior +is :ref:`deprecated `, and if you have +epochs in wall time in another timezone, it is recommended to read the epochs +as timezone-naive timestamps and then localize to the appropriate timezone: + +.. ipython:: python + + pd.Timestamp(1262347200000000000).tz_localize('US/Pacific') + pd.DatetimeIndex([1262347200000000000]).tz_localize('US/Pacific') + +.. note:: + + Epoch times will be rounded to the nearest nanosecond. + +.. warning:: + + Conversion of float epoch times can lead to inaccurate and unexpected results. + :ref:`Python floats ` have about 15 digits precision in + decimal. Rounding during conversion from float to high precision ``Timestamp`` is + unavoidable. The only way to achieve exact precision is to use a fixed-width + types (e.g. an int64). + + .. ipython:: python + + pd.to_datetime([1490195805.433, 1490195805.433502912], unit='s') + pd.to_datetime(1490195805433502912, unit='ns') + +.. seealso:: + + :ref:`timeseries.origin` + +.. _timeseries.converting.epoch_inverse: + +From timestamps to epoch +~~~~~~~~~~~~~~~~~~~~~~~~ + +To invert the operation from above, namely, to convert from a ``Timestamp`` to a 'unix' epoch: + +.. ipython:: python + + stamps = pd.date_range('2012-10-08 18:15:05', periods=4, freq='D') + stamps + +We subtract the epoch (midnight at January 1, 1970 UTC) and then floor divide by the +"unit" (1 second). + +.. ipython:: python + + (stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s') + +.. _timeseries.origin: + +Using the ``origin`` Parameter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Using the ``origin`` parameter, one can specify an alternative starting point for creation +of a ``DatetimeIndex``. For example, to use 1960-01-01 as the starting date: + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) + +The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``. +Commonly called 'unix epoch' or POSIX time. + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D') + +.. _timeseries.daterange: + +Generating ranges of timestamps +------------------------------- + +To generate an index with timestamps, you can use either the ``DatetimeIndex`` or +``Index`` constructor and pass in a list of datetime objects: + +.. ipython:: python + + dates = [datetime.datetime(2012, 5, 1), + datetime.datetime(2012, 5, 2), + datetime.datetime(2012, 5, 3)] + + # Note the frequency information + index = pd.DatetimeIndex(dates) + index + + # Automatically converted to DatetimeIndex + index = pd.Index(dates) + index + +In practice this becomes very cumbersome because we often need a very long +index with a large number of timestamps. If we need timestamps on a regular +frequency, we can use the :func:`date_range` and :func:`bdate_range` functions +to create a ``DatetimeIndex``. The default frequency for ``date_range`` is a +**calendar day** while the default for ``bdate_range`` is a **business day**: + +.. ipython:: python + + start = datetime.datetime(2011, 1, 1) + end = datetime.datetime(2012, 1, 1) + + index = pd.date_range(start, end) + index + + index = pd.bdate_range(start, end) + index + +Convenience functions like ``date_range`` and ``bdate_range`` can utilize a +variety of :ref:`frequency aliases `: + +.. ipython:: python + + pd.date_range(start, periods=1000, freq='M') + + pd.bdate_range(start, periods=250, freq='BQS') + +``date_range`` and ``bdate_range`` make it easy to generate a range of dates +using various combinations of parameters like ``start``, ``end``, ``periods``, +and ``freq``. The start and end dates are strictly inclusive, so dates outside +of those specified will not be generated: + +.. ipython:: python + + pd.date_range(start, end, freq='BM') + + pd.date_range(start, end, freq='W') + + pd.bdate_range(end=end, periods=20) + + pd.bdate_range(start=start, periods=20) + +.. versionadded:: 0.23.0 + +Specifying ``start``, ``end``, and ``periods`` will generate a range of evenly spaced +dates from ``start`` to ``end`` inclusively, with ``periods`` number of elements in the +resulting ``DatetimeIndex``: + +.. ipython:: python + + pd.date_range('2018-01-01', '2018-01-05', periods=5) + + pd.date_range('2018-01-01', '2018-01-05', periods=10) + +.. _timeseries.custom-freq-ranges: + +Custom frequency ranges +~~~~~~~~~~~~~~~~~~~~~~~ + +``bdate_range`` can also generate a range of custom frequency dates by using +the ``weekmask`` and ``holidays`` parameters. These parameters will only be +used if a custom frequency string is passed. + +.. ipython:: python + + weekmask = 'Mon Wed Fri' + + holidays = [datetime.datetime(2011, 1, 5), datetime.datetime(2011, 3, 14)] + + pd.bdate_range(start, end, freq='C', weekmask=weekmask, holidays=holidays) + + pd.bdate_range(start, end, freq='CBMS', weekmask=weekmask) + +.. seealso:: + + :ref:`timeseries.custombusinessdays` + +.. _timeseries.timestamp-limits: + +Timestamp limitations +--------------------- + +Since pandas represents timestamps in nanosecond resolution, the time span that +can be represented using a 64-bit integer is limited to approximately 584 years: + +.. ipython:: python + + pd.Timestamp.min + pd.Timestamp.max + +.. seealso:: + + :ref:`timeseries.oob` + +.. _timeseries.datetimeindex: + +Indexing +-------- + +One of the main uses for ``DatetimeIndex`` is as an index for pandas objects. +The ``DatetimeIndex`` class contains many time series related optimizations: + +* A large range of dates for various offsets are pre-computed and cached + under the hood in order to make generating subsequent date ranges very fast + (just have to grab a slice). +* Fast shifting using the ``shift`` and ``tshift`` method on pandas objects. +* Unioning of overlapping ``DatetimeIndex`` objects with the same frequency is + very fast (important for fast data alignment). +* Quick access to date fields via properties such as ``year``, ``month``, etc. +* Regularization functions like ``snap`` and very fast ``asof`` logic. + +``DatetimeIndex`` objects have all the basic functionality of regular ``Index`` +objects, and a smorgasbord of advanced time series specific methods for easy +frequency processing. + +.. seealso:: + :ref:`Reindexing methods ` + +.. note:: + + While pandas does not force you to have a sorted date index, some of these + methods may have unexpected or incorrect behavior if the dates are unsorted. + +``DatetimeIndex`` can be used like a regular index and offers all of its +intelligent functionality like selection, slicing, etc. + +.. ipython:: python + + rng = pd.date_range(start, end, freq='BM') + ts = pd.Series(np.random.randn(len(rng)), index=rng) + ts.index + ts[:5].index + ts[::2].index + +.. _timeseries.partialindexing: + +Partial string indexing +~~~~~~~~~~~~~~~~~~~~~~~ + +Dates and strings that parse to timestamps can be passed as indexing parameters: + +.. ipython:: python + + ts['1/31/2011'] + + ts[datetime.datetime(2011, 12, 25):] + + ts['10/31/2011':'12/31/2011'] + +To provide convenience for accessing longer time series, you can also pass in +the year or year and month as strings: + +.. ipython:: python + + ts['2011'] + + ts['2011-6'] + +This type of slicing will work on a ``DataFrame`` with a ``DatetimeIndex`` as well. Since the +partial string selection is a form of label slicing, the endpoints **will be** included. This +would include matching times on an included date: + +.. ipython:: python + + dft = pd.DataFrame(np.random.randn(100000, 1), columns=['A'], + index=pd.date_range('20130101', periods=100000, freq='T')) + dft + dft['2013'] + +This starts on the very first time in the month, and includes the last date and +time for the month: + +.. ipython:: python + + dft['2013-1':'2013-2'] + +This specifies a stop time **that includes all of the times on the last day**: + +.. ipython:: python + + dft['2013-1':'2013-2-28'] + +This specifies an **exact** stop time (and is not the same as the above): + +.. ipython:: python + + dft['2013-1':'2013-2-28 00:00:00'] + +We are stopping on the included end-point as it is part of the index: + +.. ipython:: python + + dft['2013-1-15':'2013-1-15 12:30:00'] + +``DatetimeIndex`` partial string indexing also works on a ``DataFrame`` with a ``MultiIndex``: + +.. ipython:: python + + dft2 = pd.DataFrame(np.random.randn(20, 1), + columns=['A'], + index=pd.MultiIndex.from_product( + [pd.date_range('20130101', periods=10, freq='12H'), + ['a', 'b']])) + dft2 + dft2.loc['2013-01-05'] + idx = pd.IndexSlice + dft2 = dft2.swaplevel(0, 1).sort_index() + dft2.loc[idx[:, '2013-01-05'], :] + +.. versionadded:: 0.25.0 + +Slicing with string indexing also honors UTC offset. + +.. ipython:: python + + df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) + df + df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00'] + +.. _timeseries.slice_vs_exact_match: + +Slice vs. exact match +~~~~~~~~~~~~~~~~~~~~~ + +.. versionchanged:: 0.20.0 + +The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of the index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match. + +Consider a ``Series`` object with a minute resolution index: + +.. ipython:: python + + series_minute = pd.Series([1, 2, 3], + pd.DatetimeIndex(['2011-12-31 23:59:00', + '2012-01-01 00:00:00', + '2012-01-01 00:02:00'])) + series_minute.index.resolution + +A timestamp string less accurate than a minute gives a ``Series`` object. + +.. ipython:: python + + series_minute['2011-12-31 23'] + +A timestamp string with minute resolution (or more accurate), gives a scalar instead, i.e. it is not casted to a slice. + +.. ipython:: python + + series_minute['2011-12-31 23:59'] + series_minute['2011-12-31 23:59:00'] + +If index resolution is second, then the minute-accurate timestamp gives a +``Series``. + +.. ipython:: python + + series_second = pd.Series([1, 2, 3], + pd.DatetimeIndex(['2011-12-31 23:59:59', + '2012-01-01 00:00:00', + '2012-01-01 00:00:01'])) + series_second.index.resolution + series_second['2011-12-31 23:59'] + +If the timestamp string is treated as a slice, it can be used to index ``DataFrame`` with ``[]`` as well. + +.. ipython:: python + + dft_minute = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, + index=series_minute.index) + dft_minute['2011-12-31 23'] + + +.. warning:: + + However, if the string is treated as an exact match, the selection in ``DataFrame``'s ``[]`` will be column-wise and not row-wise, see :ref:`Indexing Basics `. For example ``dft_minute['2011-12-31 23:59']`` will raise ``KeyError`` as ``'2012-12-31 23:59'`` has the same resolution as the index and there is no column with such name: + + To *always* have unambiguous selection, whether the row is treated as a slice or a single selection, use ``.loc``. + + .. ipython:: python + + dft_minute.loc['2011-12-31 23:59'] + +Note also that ``DatetimeIndex`` resolution cannot be less precise than day. + +.. ipython:: python + + series_monthly = pd.Series([1, 2, 3], + pd.DatetimeIndex(['2011-12', '2012-01', '2012-02'])) + series_monthly.index.resolution + series_monthly['2011-12'] # returns Series + + +Exact indexing +~~~~~~~~~~~~~~ + +As discussed in previous section, indexing a ``DatetimeIndex`` with a partial string depends on the "accuracy" of the period, in other words how specific the interval is in relation to the resolution of the index. In contrast, indexing with ``Timestamp`` or ``datetime`` objects is exact, because the objects have exact meaning. These also follow the semantics of *including both endpoints*. + +These ``Timestamp`` and ``datetime`` objects have exact ``hours, minutes,`` and ``seconds``, even though they were not explicitly specified (they are ``0``). + +.. ipython:: python + + dft[datetime.datetime(2013, 1, 1):datetime.datetime(2013, 2, 28)] + +With no defaults. + +.. ipython:: python + + dft[datetime.datetime(2013, 1, 1, 10, 12, 0): + datetime.datetime(2013, 2, 28, 10, 12, 0)] + + +Truncating & fancy indexing +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A :meth:`~DataFrame.truncate` convenience function is provided that is similar +to slicing. Note that ``truncate`` assumes a 0 value for any unspecified date +component in a ``DatetimeIndex`` in contrast to slicing which returns any +partially matching dates: + +.. ipython:: python + + rng2 = pd.date_range('2011-01-01', '2012-01-01', freq='W') + ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2) + + ts2.truncate(before='2011-11', after='2011-12') + ts2['2011-11':'2011-12'] + +Even complicated fancy indexing that breaks the ``DatetimeIndex`` frequency +regularity will result in a ``DatetimeIndex``, although frequency is lost: + +.. ipython:: python + + ts2[[0, 2, 6]].index + +.. _timeseries.components: + +Time/date components +-------------------- + +There are several time/date properties that one can access from ``Timestamp`` or a collection of timestamps like a ``DatetimeIndex``. + +.. csv-table:: + :header: "Property", "Description" + :widths: 15, 65 + + year, "The year of the datetime" + month,"The month of the datetime" + day,"The days of the datetime" + hour,"The hour of the datetime" + minute,"The minutes of the datetime" + second,"The seconds of the datetime" + microsecond,"The microseconds of the datetime" + nanosecond,"The nanoseconds of the datetime" + date,"Returns datetime.date (does not contain timezone information)" + time,"Returns datetime.time (does not contain timezone information)" + timetz,"Returns datetime.time as local time with timezone information" + dayofyear,"The ordinal day of year" + weekofyear,"The week ordinal of the year" + week,"The week ordinal of the year" + dayofweek,"The number of the day of the week with Monday=0, Sunday=6" + weekday,"The number of the day of the week with Monday=0, Sunday=6" + quarter,"Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, etc." + days_in_month,"The number of days in the month of the datetime" + is_month_start,"Logical indicating if first day of month (defined by frequency)" + is_month_end,"Logical indicating if last day of month (defined by frequency)" + is_quarter_start,"Logical indicating if first day of quarter (defined by frequency)" + is_quarter_end,"Logical indicating if last day of quarter (defined by frequency)" + is_year_start,"Logical indicating if first day of year (defined by frequency)" + is_year_end,"Logical indicating if last day of year (defined by frequency)" + is_leap_year,"Logical indicating if the date belongs to a leap year" + +Furthermore, if you have a ``Series`` with datetimelike values, then you can +access these properties via the ``.dt`` accessor, as detailed in the section +on :ref:`.dt accessors`. + +.. _timeseries.offsets: + +DateOffset objects +------------------ + +In the preceding examples, frequency strings (e.g. ``'D'``) were used to specify +a frequency that defined: + +* how the date times in :class:`DatetimeIndex` were spaced when using :meth:`date_range` +* the frequency of a :class:`Period` or :class:`PeriodIndex` + +These frequency strings map to a :class:`DateOffset` object and its subclasses. A :class:`DateOffset` +is similar to a :class:`Timedelta` that represents a duration of time but follows specific calendar duration rules. +For example, a :class:`Timedelta` day will always increment ``datetimes`` by 24 hours, while a :class:`DateOffset` day +will increment ``datetimes`` to the same time the next day whether a day represents 23, 24 or 25 hours due to daylight +savings time. However, all :class:`DateOffset` subclasses that are an hour or smaller +(``Hour``, ``Minute``, ``Second``, ``Milli``, ``Micro``, ``Nano``) behave like +:class:`Timedelta` and respect absolute time. + +The basic :class:`DateOffset` acts similar to ``dateutil.relativedelta`` (`relativedelta documentation`_) +that shifts a date time by the corresponding calendar duration specified. The +arithmetic operator (``+``) or the ``apply`` method can be used to perform the shift. + +.. ipython:: python + + # This particular day contains a day light savings time transition + ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + # Respects absolute time + ts + pd.Timedelta(days=1) + # Respects calendar time + ts + pd.DateOffset(days=1) + friday = pd.Timestamp('2018-01-05') + friday.day_name() + # Add 2 business days (Friday --> Tuesday) + two_business_days = 2 * pd.offsets.BDay() + two_business_days.apply(friday) + friday + two_business_days + (friday + two_business_days).day_name() + +Most ``DateOffsets`` have associated frequencies strings, or offset aliases, that can be passed +into ``freq`` keyword arguments. The available date offsets and associated frequency strings can be found below: + +.. csv-table:: + :header: "Date Offset", "Frequency String", "Description" + :widths: 15, 15, 65 + + :class:`~pandas.tseries.offsets.DateOffset`, None, "Generic offset class, defaults to 1 calendar day" + :class:`~pandas.tseries.offsets.BDay` or :class:`~pandas.tseries.offsets.BusinessDay`, ``'B'``,"business day (weekday)" + :class:`~pandas.tseries.offsets.CDay` or :class:`~pandas.tseries.offsets.CustomBusinessDay`, ``'C'``, "custom business day" + :class:`~pandas.tseries.offsets.Week`, ``'W'``, "one week, optionally anchored on a day of the week" + :class:`~pandas.tseries.offsets.WeekOfMonth`, ``'WOM'``, "the x-th day of the y-th week of each month" + :class:`~pandas.tseries.offsets.LastWeekOfMonth`, ``'LWOM'``, "the x-th day of the last week of each month" + :class:`~pandas.tseries.offsets.MonthEnd`, ``'M'``, "calendar month end" + :class:`~pandas.tseries.offsets.MonthBegin`, ``'MS'``, "calendar month begin" + :class:`~pandas.tseries.offsets.BMonthEnd` or :class:`~pandas.tseries.offsets.BusinessMonthEnd`, ``'BM'``, "business month end" + :class:`~pandas.tseries.offsets.BMonthBegin` or :class:`~pandas.tseries.offsets.BusinessMonthBegin`, ``'BMS'``, "business month begin" + :class:`~pandas.tseries.offsets.CBMonthEnd` or :class:`~pandas.tseries.offsets.CustomBusinessMonthEnd`, ``'CBM'``, "custom business month end" + :class:`~pandas.tseries.offsets.CBMonthBegin` or :class:`~pandas.tseries.offsets.CustomBusinessMonthBegin`, ``'CBMS'``, "custom business month begin" + :class:`~pandas.tseries.offsets.SemiMonthEnd`, ``'SM'``, "15th (or other day_of_month) and calendar month end" + :class:`~pandas.tseries.offsets.SemiMonthBegin`, ``'SMS'``, "15th (or other day_of_month) and calendar month begin" + :class:`~pandas.tseries.offsets.QuarterEnd`, ``'Q'``, "calendar quarter end" + :class:`~pandas.tseries.offsets.QuarterBegin`, ``'QS'``, "calendar quarter begin" + :class:`~pandas.tseries.offsets.BQuarterEnd`, ``'BQ``, "business quarter end" + :class:`~pandas.tseries.offsets.BQuarterBegin`, ``'BQS'``, "business quarter begin" + :class:`~pandas.tseries.offsets.FY5253Quarter`, ``'REQ'``, "retail (aka 52-53 week) quarter" + :class:`~pandas.tseries.offsets.YearEnd`, ``'A'``, "calendar year end" + :class:`~pandas.tseries.offsets.YearBegin`, ``'AS'`` or ``'BYS'``,"calendar year begin" + :class:`~pandas.tseries.offsets.BYearEnd`, ``'BA'``, "business year end" + :class:`~pandas.tseries.offsets.BYearBegin`, ``'BAS'``, "business year begin" + :class:`~pandas.tseries.offsets.FY5253`, ``'RE'``, "retail (aka 52-53 week) year" + :class:`~pandas.tseries.offsets.Easter`, None, "Easter holiday" + :class:`~pandas.tseries.offsets.BusinessHour`, ``'BH'``, "business hour" + :class:`~pandas.tseries.offsets.CustomBusinessHour`, ``'CBH'``, "custom business hour" + :class:`~pandas.tseries.offsets.Day`, ``'D'``, "one absolute day" + :class:`~pandas.tseries.offsets.Hour`, ``'H'``, "one hour" + :class:`~pandas.tseries.offsets.Minute`, ``'T'`` or ``'min'``,"one minute" + :class:`~pandas.tseries.offsets.Second`, ``'S'``, "one second" + :class:`~pandas.tseries.offsets.Milli`, ``'L'`` or ``'ms'``, "one millisecond" + :class:`~pandas.tseries.offsets.Micro`, ``'U'`` or ``'us'``, "one microsecond" + :class:`~pandas.tseries.offsets.Nano`, ``'N'``, "one nanosecond" + +``DateOffsets`` additionally have :meth:`rollforward` and :meth:`rollback` +methods for moving a date forward or backward respectively to a valid offset +date relative to the offset. For example, business offsets will roll dates +that land on the weekends (Saturday and Sunday) forward to Monday since +business offsets operate on the weekdays. + +.. ipython:: python + + ts = pd.Timestamp('2018-01-06 00:00:00') + ts.day_name() + # BusinessHour's valid offset dates are Monday through Friday + offset = pd.offsets.BusinessHour(start='09:00') + # Bring the date to the closest offset date (Monday) + offset.rollforward(ts) + # Date is brought to the closest offset date first and then the hour is added + ts + offset + +These operations preserve time (hour, minute, etc) information by default. +To reset time to midnight, use :meth:`normalize` before or after applying +the operation (depending on whether you want the time information included +in the operation). + +.. ipython:: python + + ts = pd.Timestamp('2014-01-01 09:00') + day = pd.offsets.Day() + day.apply(ts) + day.apply(ts).normalize() + + ts = pd.Timestamp('2014-01-01 22:00') + hour = pd.offsets.Hour() + hour.apply(ts) + hour.apply(ts).normalize() + hour.apply(pd.Timestamp("2014-01-01 23:30")).normalize() + +.. _relativedelta documentation: https://dateutil.readthedocs.io/en/stable/relativedelta.html + + +Parametric offsets +~~~~~~~~~~~~~~~~~~ + +Some of the offsets can be "parameterized" when created to result in different +behaviors. For example, the ``Week`` offset for generating weekly data accepts a +``weekday`` parameter which results in the generated dates always lying on a +particular day of the week: + +.. ipython:: python + + d = datetime.datetime(2008, 8, 18, 9, 0) + d + d + pd.offsets.Week() + d + pd.offsets.Week(weekday=4) + (d + pd.offsets.Week(weekday=4)).weekday() + + d - pd.offsets.Week() + +The ``normalize`` option will be effective for addition and subtraction. + +.. ipython:: python + + d + pd.offsets.Week(normalize=True) + d - pd.offsets.Week(normalize=True) + + +Another example is parameterizing ``YearEnd`` with the specific ending month: + +.. ipython:: python + + d + pd.offsets.YearEnd() + d + pd.offsets.YearEnd(month=6) + + +.. _timeseries.offsetseries: + +Using offsets with ``Series`` / ``DatetimeIndex`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Offsets can be used with either a ``Series`` or ``DatetimeIndex`` to +apply the offset to each element. + +.. ipython:: python + + rng = pd.date_range('2012-01-01', '2012-01-03') + s = pd.Series(rng) + rng + rng + pd.DateOffset(months=2) + s + pd.DateOffset(months=2) + s - pd.DateOffset(months=2) + +If the offset class maps directly to a ``Timedelta`` (``Day``, ``Hour``, +``Minute``, ``Second``, ``Micro``, ``Milli``, ``Nano``) it can be +used exactly like a ``Timedelta`` - see the +:ref:`Timedelta section` for more examples. + +.. ipython:: python + + s - pd.offsets.Day(2) + td = s - pd.Series(pd.date_range('2011-12-29', '2011-12-31')) + td + td + pd.offsets.Minute(15) + +Note that some offsets (such as ``BQuarterEnd``) do not have a +vectorized implementation. They can still be used but may +calculate significantly slower and will show a ``PerformanceWarning`` + +.. ipython:: python + :okwarning: + + rng + pd.offsets.BQuarterEnd() + + +.. _timeseries.custombusinessdays: + +Custom business days +~~~~~~~~~~~~~~~~~~~~ + +The ``CDay`` or ``CustomBusinessDay`` class provides a parametric +``BusinessDay`` class which can be used to create customized business day +calendars which account for local holidays and local weekend conventions. + +As an interesting example, let's look at Egypt where a Friday-Saturday weekend is observed. + +.. ipython:: python + + weekmask_egypt = 'Sun Mon Tue Wed Thu' + + # They also observe International Workers' Day so let's + # add that for a couple of years + + holidays = ['2012-05-01', + datetime.datetime(2013, 5, 1), + np.datetime64('2014-05-01')] + bday_egypt = pd.offsets.CustomBusinessDay(holidays=holidays, + weekmask=weekmask_egypt) + dt = datetime.datetime(2013, 4, 30) + dt + 2 * bday_egypt + +Let's map to the weekday names: + +.. ipython:: python + + dts = pd.date_range(dt, periods=5, freq=bday_egypt) + + pd.Series(dts.weekday, dts).map( + pd.Series('Mon Tue Wed Thu Fri Sat Sun'.split())) + +Holiday calendars can be used to provide the list of holidays. See the +:ref:`holiday calendar` section for more information. + +.. ipython:: python + + from pandas.tseries.holiday import USFederalHolidayCalendar + + bday_us = pd.offsets.CustomBusinessDay(calendar=USFederalHolidayCalendar()) + + # Friday before MLK Day + dt = datetime.datetime(2014, 1, 17) + + # Tuesday after MLK Day (Monday is skipped because it's a holiday) + dt + bday_us + +Monthly offsets that respect a certain holiday calendar can be defined +in the usual way. + +.. ipython:: python + + bmth_us = pd.offsets.CustomBusinessMonthBegin( + calendar=USFederalHolidayCalendar()) + + # Skip new years + dt = datetime.datetime(2013, 12, 17) + dt + bmth_us + + # Define date index with custom offset + pd.date_range(start='20100101', end='20120101', freq=bmth_us) + +.. note:: + + The frequency string 'C' is used to indicate that a CustomBusinessDay + DateOffset is used, it is important to note that since CustomBusinessDay is + a parameterised type, instances of CustomBusinessDay may differ and this is + not detectable from the 'C' frequency string. The user therefore needs to + ensure that the 'C' frequency string is used consistently within the user's + application. + +.. _timeseries.businesshour: + +Business hour +~~~~~~~~~~~~~ + +The ``BusinessHour`` class provides a business hour representation on ``BusinessDay``, +allowing to use specific start and end times. + +By default, ``BusinessHour`` uses 9:00 - 17:00 as business hours. +Adding ``BusinessHour`` will increment ``Timestamp`` by hourly frequency. +If target ``Timestamp`` is out of business hours, move to the next business hour +then increment it. If the result exceeds the business hours end, the remaining +hours are added to the next business day. + +.. ipython:: python + + bh = pd.offsets.BusinessHour() + bh + + # 2014-08-01 is Friday + pd.Timestamp('2014-08-01 10:00').weekday() + pd.Timestamp('2014-08-01 10:00') + bh + + # Below example is the same as: pd.Timestamp('2014-08-01 09:00') + bh + pd.Timestamp('2014-08-01 08:00') + bh + + # If the results is on the end time, move to the next business day + pd.Timestamp('2014-08-01 16:00') + bh + + # Remainings are added to the next day + pd.Timestamp('2014-08-01 16:30') + bh + + # Adding 2 business hours + pd.Timestamp('2014-08-01 10:00') + pd.offsets.BusinessHour(2) + + # Subtracting 3 business hours + pd.Timestamp('2014-08-01 10:00') + pd.offsets.BusinessHour(-3) + +You can also specify ``start`` and ``end`` time by keywords. The argument must +be a ``str`` with an ``hour:minute`` representation or a ``datetime.time`` +instance. Specifying seconds, microseconds and nanoseconds as business hour +results in ``ValueError``. + +.. ipython:: python + + bh = pd.offsets.BusinessHour(start='11:00', end=datetime.time(20, 0)) + bh + + pd.Timestamp('2014-08-01 13:00') + bh + pd.Timestamp('2014-08-01 09:00') + bh + pd.Timestamp('2014-08-01 18:00') + bh + +Passing ``start`` time later than ``end`` represents midnight business hour. +In this case, business hour exceeds midnight and overlap to the next day. +Valid business hours are distinguished by whether it started from valid ``BusinessDay``. + +.. ipython:: python + + bh = pd.offsets.BusinessHour(start='17:00', end='09:00') + bh + + pd.Timestamp('2014-08-01 17:00') + bh + pd.Timestamp('2014-08-01 23:00') + bh + + # Although 2014-08-02 is Saturday, + # it is valid because it starts from 08-01 (Friday). + pd.Timestamp('2014-08-02 04:00') + bh + + # Although 2014-08-04 is Monday, + # it is out of business hours because it starts from 08-03 (Sunday). + pd.Timestamp('2014-08-04 04:00') + bh + +Applying ``BusinessHour.rollforward`` and ``rollback`` to out of business hours results in +the next business hour start or previous day's end. Different from other offsets, ``BusinessHour.rollforward`` +may output different results from ``apply`` by definition. + +This is because one day's business hour end is equal to next day's business hour start. For example, +under the default business hours (9:00 - 17:00), there is no gap (0 minutes) between ``2014-08-01 17:00`` and +``2014-08-04 09:00``. + +.. ipython:: python + + # This adjusts a Timestamp to business hour edge + pd.offsets.BusinessHour().rollback(pd.Timestamp('2014-08-02 15:00')) + pd.offsets.BusinessHour().rollforward(pd.Timestamp('2014-08-02 15:00')) + + # It is the same as BusinessHour().apply(pd.Timestamp('2014-08-01 17:00')). + # And it is the same as BusinessHour().apply(pd.Timestamp('2014-08-04 09:00')) + pd.offsets.BusinessHour().apply(pd.Timestamp('2014-08-02 15:00')) + + # BusinessDay results (for reference) + pd.offsets.BusinessHour().rollforward(pd.Timestamp('2014-08-02')) + + # It is the same as BusinessDay().apply(pd.Timestamp('2014-08-01')) + # The result is the same as rollworward because BusinessDay never overlap. + pd.offsets.BusinessHour().apply(pd.Timestamp('2014-08-02')) + +``BusinessHour`` regards Saturday and Sunday as holidays. To use arbitrary +holidays, you can use ``CustomBusinessHour`` offset, as explained in the +following subsection. + +.. _timeseries.custombusinesshour: + +Custom business hour +~~~~~~~~~~~~~~~~~~~~ + +The ``CustomBusinessHour`` is a mixture of ``BusinessHour`` and ``CustomBusinessDay`` which +allows you to specify arbitrary holidays. ``CustomBusinessHour`` works as the same +as ``BusinessHour`` except that it skips specified custom holidays. + +.. ipython:: python + + from pandas.tseries.holiday import USFederalHolidayCalendar + bhour_us = pd.offsets.CustomBusinessHour(calendar=USFederalHolidayCalendar()) + # Friday before MLK Day + dt = datetime.datetime(2014, 1, 17, 15) + + dt + bhour_us + + # Tuesday after MLK Day (Monday is skipped because it's a holiday) + dt + bhour_us * 2 + +You can use keyword arguments supported by either ``BusinessHour`` and ``CustomBusinessDay``. + +.. ipython:: python + + bhour_mon = pd.offsets.CustomBusinessHour(start='10:00', + weekmask='Tue Wed Thu Fri') + + # Monday is skipped because it's a holiday, business hour starts from 10:00 + dt + bhour_mon * 2 + +.. _timeseries.offset_aliases: + +Offset aliases +~~~~~~~~~~~~~~ + +A number of string aliases are given to useful common time series +frequencies. We will refer to these aliases as *offset aliases*. + +.. csv-table:: + :header: "Alias", "Description" + :widths: 15, 100 + + "B", "business day frequency" + "C", "custom business day frequency" + "D", "calendar day frequency" + "W", "weekly frequency" + "M", "month end frequency" + "SM", "semi-month end frequency (15th and end of month)" + "BM", "business month end frequency" + "CBM", "custom business month end frequency" + "MS", "month start frequency" + "SMS", "semi-month start frequency (1st and 15th)" + "BMS", "business month start frequency" + "CBMS", "custom business month start frequency" + "Q", "quarter end frequency" + "BQ", "business quarter end frequency" + "QS", "quarter start frequency" + "BQS", "business quarter start frequency" + "A, Y", "year end frequency" + "BA, BY", "business year end frequency" + "AS, YS", "year start frequency" + "BAS, BYS", "business year start frequency" + "BH", "business hour frequency" + "H", "hourly frequency" + "T, min", "minutely frequency" + "S", "secondly frequency" + "L, ms", "milliseconds" + "U, us", "microseconds" + "N", "nanoseconds" + +Combining aliases +~~~~~~~~~~~~~~~~~ + +As we have seen previously, the alias and the offset instance are fungible in +most functions: + +.. ipython:: python + + pd.date_range(start, periods=5, freq='B') + + pd.date_range(start, periods=5, freq=pd.offsets.BDay()) + +You can combine together day and intraday offsets: + +.. ipython:: python + + pd.date_range(start, periods=10, freq='2h20min') + + pd.date_range(start, periods=10, freq='1D10U') + +Anchored offsets +~~~~~~~~~~~~~~~~ + +For some frequencies you can specify an anchoring suffix: + +.. csv-table:: + :header: "Alias", "Description" + :widths: 15, 100 + + "W\-SUN", "weekly frequency (Sundays). Same as 'W'" + "W\-MON", "weekly frequency (Mondays)" + "W\-TUE", "weekly frequency (Tuesdays)" + "W\-WED", "weekly frequency (Wednesdays)" + "W\-THU", "weekly frequency (Thursdays)" + "W\-FRI", "weekly frequency (Fridays)" + "W\-SAT", "weekly frequency (Saturdays)" + "(B)Q(S)\-DEC", "quarterly frequency, year ends in December. Same as 'Q'" + "(B)Q(S)\-JAN", "quarterly frequency, year ends in January" + "(B)Q(S)\-FEB", "quarterly frequency, year ends in February" + "(B)Q(S)\-MAR", "quarterly frequency, year ends in March" + "(B)Q(S)\-APR", "quarterly frequency, year ends in April" + "(B)Q(S)\-MAY", "quarterly frequency, year ends in May" + "(B)Q(S)\-JUN", "quarterly frequency, year ends in June" + "(B)Q(S)\-JUL", "quarterly frequency, year ends in July" + "(B)Q(S)\-AUG", "quarterly frequency, year ends in August" + "(B)Q(S)\-SEP", "quarterly frequency, year ends in September" + "(B)Q(S)\-OCT", "quarterly frequency, year ends in October" + "(B)Q(S)\-NOV", "quarterly frequency, year ends in November" + "(B)A(S)\-DEC", "annual frequency, anchored end of December. Same as 'A'" + "(B)A(S)\-JAN", "annual frequency, anchored end of January" + "(B)A(S)\-FEB", "annual frequency, anchored end of February" + "(B)A(S)\-MAR", "annual frequency, anchored end of March" + "(B)A(S)\-APR", "annual frequency, anchored end of April" + "(B)A(S)\-MAY", "annual frequency, anchored end of May" + "(B)A(S)\-JUN", "annual frequency, anchored end of June" + "(B)A(S)\-JUL", "annual frequency, anchored end of July" + "(B)A(S)\-AUG", "annual frequency, anchored end of August" + "(B)A(S)\-SEP", "annual frequency, anchored end of September" + "(B)A(S)\-OCT", "annual frequency, anchored end of October" + "(B)A(S)\-NOV", "annual frequency, anchored end of November" + +These can be used as arguments to ``date_range``, ``bdate_range``, constructors +for ``DatetimeIndex``, as well as various other timeseries-related functions +in pandas. + +Anchored offset semantics +~~~~~~~~~~~~~~~~~~~~~~~~~ + +For those offsets that are anchored to the start or end of specific +frequency (``MonthEnd``, ``MonthBegin``, ``WeekEnd``, etc), the following +rules apply to rolling forward and backwards. + +When ``n`` is not 0, if the given date is not on an anchor point, it snapped to the next(previous) +anchor point, and moved ``|n|-1`` additional steps forwards or backwards. + +.. ipython:: python + + pd.Timestamp('2014-01-02') + pd.offsets.MonthBegin(n=1) + pd.Timestamp('2014-01-02') + pd.offsets.MonthEnd(n=1) + + pd.Timestamp('2014-01-02') - pd.offsets.MonthBegin(n=1) + pd.Timestamp('2014-01-02') - pd.offsets.MonthEnd(n=1) + + pd.Timestamp('2014-01-02') + pd.offsets.MonthBegin(n=4) + pd.Timestamp('2014-01-02') - pd.offsets.MonthBegin(n=4) + +If the given date *is* on an anchor point, it is moved ``|n|`` points forwards +or backwards. + +.. ipython:: python + + pd.Timestamp('2014-01-01') + pd.offsets.MonthBegin(n=1) + pd.Timestamp('2014-01-31') + pd.offsets.MonthEnd(n=1) + + pd.Timestamp('2014-01-01') - pd.offsets.MonthBegin(n=1) + pd.Timestamp('2014-01-31') - pd.offsets.MonthEnd(n=1) + + pd.Timestamp('2014-01-01') + pd.offsets.MonthBegin(n=4) + pd.Timestamp('2014-01-31') - pd.offsets.MonthBegin(n=4) + +For the case when ``n=0``, the date is not moved if on an anchor point, otherwise +it is rolled forward to the next anchor point. + +.. ipython:: python + + pd.Timestamp('2014-01-02') + pd.offsets.MonthBegin(n=0) + pd.Timestamp('2014-01-02') + pd.offsets.MonthEnd(n=0) + + pd.Timestamp('2014-01-01') + pd.offsets.MonthBegin(n=0) + pd.Timestamp('2014-01-31') + pd.offsets.MonthEnd(n=0) + +.. _timeseries.holiday: + +Holidays / holiday calendars +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Holidays and calendars provide a simple way to define holiday rules to be used +with ``CustomBusinessDay`` or in other analysis that requires a predefined +set of holidays. The ``AbstractHolidayCalendar`` class provides all the necessary +methods to return a list of holidays and only ``rules`` need to be defined +in a specific holiday calendar class. Furthermore, the ``start_date`` and ``end_date`` +class attributes determine over what date range holidays are generated. These +should be overwritten on the ``AbstractHolidayCalendar`` class to have the range +apply to all calendar subclasses. ``USFederalHolidayCalendar`` is the +only calendar that exists and primarily serves as an example for developing +other calendars. + +For holidays that occur on fixed dates (e.g., US Memorial Day or July 4th) an +observance rule determines when that holiday is observed if it falls on a weekend +or some other non-observed day. Defined observance rules are: + +.. csv-table:: + :header: "Rule", "Description" + :widths: 15, 70 + + "nearest_workday", "move Saturday to Friday and Sunday to Monday" + "sunday_to_monday", "move Sunday to following Monday" + "next_monday_or_tuesday", "move Saturday to Monday and Sunday/Monday to Tuesday" + "previous_friday", move Saturday and Sunday to previous Friday" + "next_monday", "move Saturday and Sunday to following Monday" + +An example of how holidays and holiday calendars are defined: + +.. ipython:: python + + from pandas.tseries.holiday import Holiday, USMemorialDay,\ + AbstractHolidayCalendar, nearest_workday, MO + class ExampleCalendar(AbstractHolidayCalendar): + rules = [ + USMemorialDay, + Holiday('July 4th', month=7, day=4, observance=nearest_workday), + Holiday('Columbus Day', month=10, day=1, + offset=pd.DateOffset(weekday=MO(2)))] + + cal = ExampleCalendar() + cal.holidays(datetime.datetime(2012, 1, 1), datetime.datetime(2012, 12, 31)) + +:hint: + **weekday=MO(2)** is same as **2 * Week(weekday=2)** + +Using this calendar, creating an index or doing offset arithmetic skips weekends +and holidays (i.e., Memorial Day/July 4th). For example, the below defines +a custom business day offset using the ``ExampleCalendar``. Like any other offset, +it can be used to create a ``DatetimeIndex`` or added to ``datetime`` +or ``Timestamp`` objects. + +.. ipython:: python + + pd.date_range(start='7/1/2012', end='7/10/2012', + freq=pd.offsets.CDay(calendar=cal)).to_pydatetime() + offset = pd.offsets.CustomBusinessDay(calendar=cal) + datetime.datetime(2012, 5, 25) + offset + datetime.datetime(2012, 7, 3) + offset + datetime.datetime(2012, 7, 3) + 2 * offset + datetime.datetime(2012, 7, 6) + offset + +Ranges are defined by the ``start_date`` and ``end_date`` class attributes +of ``AbstractHolidayCalendar``. The defaults are shown below. + +.. ipython:: python + + AbstractHolidayCalendar.start_date + AbstractHolidayCalendar.end_date + +These dates can be overwritten by setting the attributes as +datetime/Timestamp/string. + +.. ipython:: python + + AbstractHolidayCalendar.start_date = datetime.datetime(2012, 1, 1) + AbstractHolidayCalendar.end_date = datetime.datetime(2012, 12, 31) + cal.holidays() + +Every calendar class is accessible by name using the ``get_calendar`` function +which returns a holiday class instance. Any imported calendar class will +automatically be available by this function. Also, ``HolidayCalendarFactory`` +provides an easy interface to create calendars that are combinations of calendars +or calendars with additional rules. + +.. ipython:: python + + from pandas.tseries.holiday import get_calendar, HolidayCalendarFactory,\ + USLaborDay + cal = get_calendar('ExampleCalendar') + cal.rules + new_cal = HolidayCalendarFactory('NewExampleCalendar', cal, USLaborDay) + new_cal.rules + +.. _timeseries.advanced_datetime: + +Time Series-Related Instance Methods +------------------------------------ + +Shifting / lagging +~~~~~~~~~~~~~~~~~~ + +One may want to *shift* or *lag* the values in a time series back and forward in +time. The method for this is :meth:`~Series.shift`, which is available on all of +the pandas objects. + +.. ipython:: python + + ts = pd.Series(range(len(rng)), index=rng) + ts = ts[:5] + ts.shift(1) + +The ``shift`` method accepts an ``freq`` argument which can accept a +``DateOffset`` class or other ``timedelta``-like object or also an +:ref:`offset alias `: + +.. ipython:: python + + ts.shift(5, freq=pd.offsets.BDay()) + ts.shift(5, freq='BM') + +Rather than changing the alignment of the data and the index, ``DataFrame`` and +``Series`` objects also have a :meth:`~Series.tshift` convenience method that +changes all the dates in the index by a specified number of offsets: + +.. ipython:: python + + ts.tshift(5, freq='D') + +Note that with ``tshift``, the leading entry is no longer NaN because the data +is not being realigned. + +Frequency conversion +~~~~~~~~~~~~~~~~~~~~ + +The primary function for changing frequencies is the :meth:`~Series.asfreq` +method. For a ``DatetimeIndex``, this is basically just a thin, but convenient +wrapper around :meth:`~Series.reindex` which generates a ``date_range`` and +calls ``reindex``. + +.. ipython:: python + + dr = pd.date_range('1/1/2010', periods=3, freq=3 * pd.offsets.BDay()) + ts = pd.Series(np.random.randn(3), index=dr) + ts + ts.asfreq(pd.offsets.BDay()) + +``asfreq`` provides a further convenience so you can specify an interpolation +method for any gaps that may appear after the frequency conversion. + +.. ipython:: python + + ts.asfreq(pd.offsets.BDay(), method='pad') + +Filling forward / backward +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Related to ``asfreq`` and ``reindex`` is :meth:`~Series.fillna`, which is +documented in the :ref:`missing data section `. + +Converting to Python datetimes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``DatetimeIndex`` can be converted to an array of Python native +:py:class:`datetime.datetime` objects using the ``to_pydatetime`` method. + +.. _timeseries.resampling: + +Resampling +---------- + +Pandas has a simple, powerful, and efficient functionality for performing +resampling operations during frequency conversion (e.g., converting secondly +data into 5-minutely data). This is extremely common in, but not limited to, +financial applications. + +:meth:`~Series.resample` is a time-based groupby, followed by a reduction method +on each of its groups. See some :ref:`cookbook examples ` for +some advanced strategies. + +The ``resample()`` method can be used directly from ``DataFrameGroupBy`` objects, +see the :ref:`groupby docs `. + +.. note:: + + ``.resample()`` is similar to using a :meth:`~Series.rolling` operation with + a time-based offset, see a discussion :ref:`here `. + +Basics +~~~~~~ + +.. ipython:: python + + rng = pd.date_range('1/1/2012', periods=100, freq='S') + + ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng) + + ts.resample('5Min').sum() + +The ``resample`` function is very flexible and allows you to specify many +different parameters to control the frequency conversion and resampling +operation. + +Any function available via :ref:`dispatching ` is available as +a method of the returned object, including ``sum``, ``mean``, ``std``, ``sem``, +``max``, ``min``, ``median``, ``first``, ``last``, ``ohlc``: + +.. ipython:: python + + ts.resample('5Min').mean() + + ts.resample('5Min').ohlc() + + ts.resample('5Min').max() + + +For downsampling, ``closed`` can be set to 'left' or 'right' to specify which +end of the interval is closed: + +.. ipython:: python + + ts.resample('5Min', closed='right').mean() + + ts.resample('5Min', closed='left').mean() + +Parameters like ``label`` and ``loffset`` are used to manipulate the resulting +labels. ``label`` specifies whether the result is labeled with the beginning or +the end of the interval. ``loffset`` performs a time adjustment on the output +labels. + +.. ipython:: python + + ts.resample('5Min').mean() # by default label='left' + + ts.resample('5Min', label='left').mean() + + ts.resample('5Min', label='left', loffset='1s').mean() + +.. warning:: + + The default values for ``label`` and ``closed`` is '**left**' for all + frequency offsets except for 'M', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' + which all have a default of 'right'. + + This might unintendedly lead to looking ahead, where the value for a later + time is pulled back to a previous time as in the following example with + the :class:`~pandas.tseries.offsets.BusinessDay` frequency: + + .. ipython:: python + + s = pd.date_range('2000-01-01', '2000-01-05').to_series() + s.iloc[2] = pd.NaT + s.dt.day_name() + + # default: label='left', closed='left' + s.resample('B').last().dt.day_name() + + Notice how the value for Sunday got pulled back to the previous Friday. + To get the behavior where the value for Sunday is pushed to Monday, use + instead + + .. ipython:: python + + s.resample('B', label='right', closed='right').last().dt.day_name() + +The ``axis`` parameter can be set to 0 or 1 and allows you to resample the +specified axis for a ``DataFrame``. + +``kind`` can be set to 'timestamp' or 'period' to convert the resulting index +to/from timestamp and time span representations. By default ``resample`` +retains the input representation. + +``convention`` can be set to 'start' or 'end' when resampling period data +(detail below). It specifies how low frequency periods are converted to higher +frequency periods. + + +Upsampling +~~~~~~~~~~ + +For upsampling, you can specify a way to upsample and the ``limit`` parameter to interpolate over the gaps that are created: + +.. ipython:: python + + # from secondly to every 250 milliseconds + + ts[:2].resample('250L').asfreq() + + ts[:2].resample('250L').ffill() + + ts[:2].resample('250L').ffill(limit=2) + +Sparse resampling +~~~~~~~~~~~~~~~~~ + +Sparse timeseries are the ones where you have a lot fewer points relative +to the amount of time you are looking to resample. Naively upsampling a sparse +series can potentially generate lots of intermediate values. When you don't want +to use a method to fill these values, e.g. ``fill_method`` is ``None``, then +intermediate values will be filled with ``NaN``. + +Since ``resample`` is a time-based groupby, the following is a method to efficiently +resample only the groups that are not all ``NaN``. + +.. ipython:: python + + rng = pd.date_range('2014-1-1', periods=100, freq='D') + pd.Timedelta('1s') + ts = pd.Series(range(100), index=rng) + +If we want to resample to the full range of the series: + +.. ipython:: python + + ts.resample('3T').sum() + +We can instead only resample those groups where we have points as follows: + +.. ipython:: python + + from functools import partial + from pandas.tseries.frequencies import to_offset + + def round(t, freq): + # round a Timestamp to a specified freq + freq = to_offset(freq) + return pd.Timestamp((t.value // freq.delta.value) * freq.delta.value) + + ts.groupby(partial(round, freq='3T')).sum() + +.. _timeseries.aggregate: + +Aggregation +~~~~~~~~~~~ + +Similar to the :ref:`aggregating API `, :ref:`groupby API `, and the :ref:`window functions API `, +a ``Resampler`` can be selectively resampled. + +Resampling a ``DataFrame``, the default will be to act on all columns with the same function. + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 3), + index=pd.date_range('1/1/2012', freq='S', periods=1000), + columns=['A', 'B', 'C']) + r = df.resample('3T') + r.mean() + +We can select a specific column or columns using standard getitem. + +.. ipython:: python + + r['A'].mean() + + r[['A', 'B']].mean() + +You can pass a list or dict of functions to do aggregation with, outputting a ``DataFrame``: + +.. ipython:: python + + r['A'].agg([np.sum, np.mean, np.std]) + +On a resampled ``DataFrame``, you can pass a list of functions to apply to each +column, which produces an aggregated result with a hierarchical index: + +.. ipython:: python + + r.agg([np.sum, np.mean]) + +By passing a dict to ``aggregate`` you can apply a different aggregation to the +columns of a ``DataFrame``: + +.. ipython:: python + :okexcept: + + r.agg({'A': np.sum, + 'B': lambda x: np.std(x, ddof=1)}) + +The function names can also be strings. In order for a string to be valid it +must be implemented on the resampled object: + +.. ipython:: python + + r.agg({'A': 'sum', 'B': 'std'}) + +Furthermore, you can also specify multiple aggregation functions for each column separately. + +.. ipython:: python + + r.agg({'A': ['sum', 'std'], 'B': ['mean', 'std']}) + + +If a ``DataFrame`` does not have a datetimelike index, but instead you want +to resample based on datetimelike column in the frame, it can passed to the +``on`` keyword. + +.. ipython:: python + + df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5), + 'a': np.arange(5)}, + index=pd.MultiIndex.from_arrays([ + [1, 2, 3, 4, 5], + pd.date_range('2015-01-01', freq='W', periods=5)], + names=['v', 'd'])) + df + df.resample('M', on='date').sum() + +Similarly, if you instead want to resample by a datetimelike +level of ``MultiIndex``, its name or location can be passed to the +``level`` keyword. + +.. ipython:: python + + df.resample('M', level='d').sum() + +.. _timeseries.iterating-label: + +Iterating through groups +~~~~~~~~~~~~~~~~~~~~~~~~ + +With the ``Resampler`` object in hand, iterating through the grouped data is very +natural and functions similarly to :py:func:`itertools.groupby`: + +.. ipython:: python + + small = pd.Series( + range(6), + index=pd.to_datetime(['2017-01-01T00:00:00', + '2017-01-01T00:30:00', + '2017-01-01T00:31:00', + '2017-01-01T01:00:00', + '2017-01-01T03:00:00', + '2017-01-01T03:05:00']) + ) + resampled = small.resample('H') + + for name, group in resampled: + print("Group: ", name) + print("-" * 27) + print(group, end="\n\n") + +See :ref:`groupby.iterating-label` or :class:`Resampler.__iter__` for more. + + +.. _timeseries.periods: + +Time span representation +------------------------ + +Regular intervals of time are represented by ``Period`` objects in pandas while +sequences of ``Period`` objects are collected in a ``PeriodIndex``, which can +be created with the convenience function ``period_range``. + +Period +~~~~~~ + +A ``Period`` represents a span of time (e.g., a day, a month, a quarter, etc). +You can specify the span via ``freq`` keyword using a frequency alias like below. +Because ``freq`` represents a span of ``Period``, it cannot be negative like "-3D". + +.. ipython:: python + + pd.Period('2012', freq='A-DEC') + + pd.Period('2012-1-1', freq='D') + + pd.Period('2012-1-1 19:00', freq='H') + + pd.Period('2012-1-1 19:00', freq='5H') + +Adding and subtracting integers from periods shifts the period by its own +frequency. Arithmetic is not allowed between ``Period`` with different ``freq`` (span). + +.. ipython:: python + + p = pd.Period('2012', freq='A-DEC') + p + 1 + p - 3 + p = pd.Period('2012-01', freq='2M') + p + 2 + p - 1 + @okexcept + p == pd.Period('2012-01', freq='3M') + + +If ``Period`` freq is daily or higher (``D``, ``H``, ``T``, ``S``, ``L``, ``U``, ``N``), ``offsets`` and ``timedelta``-like can be added if the result can have the same freq. Otherwise, ``ValueError`` will be raised. + +.. ipython:: python + + p = pd.Period('2014-07-01 09:00', freq='H') + p + pd.offsets.Hour(2) + p + datetime.timedelta(minutes=120) + p + np.timedelta64(7200, 's') + +.. code-block:: ipython + + In [1]: p + pd.offsets.Minute(5) + Traceback + ... + ValueError: Input has different freq from Period(freq=H) + +If ``Period`` has other frequencies, only the same ``offsets`` can be added. Otherwise, ``ValueError`` will be raised. + +.. ipython:: python + + p = pd.Period('2014-07', freq='M') + p + pd.offsets.MonthEnd(3) + +.. code-block:: ipython + + In [1]: p + pd.offsets.MonthBegin(3) + Traceback + ... + ValueError: Input has different freq from Period(freq=M) + +Taking the difference of ``Period`` instances with the same frequency will +return the number of frequency units between them: + +.. ipython:: python + + pd.Period('2012', freq='A-DEC') - pd.Period('2002', freq='A-DEC') + +PeriodIndex and period_range +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Regular sequences of ``Period`` objects can be collected in a ``PeriodIndex``, +which can be constructed using the ``period_range`` convenience function: + +.. ipython:: python + + prng = pd.period_range('1/1/2011', '1/1/2012', freq='M') + prng + +The ``PeriodIndex`` constructor can also be used directly: + +.. ipython:: python + + pd.PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M') + +Passing multiplied frequency outputs a sequence of ``Period`` which +has multiplied span. + +.. ipython:: python + + pd.period_range(start='2014-01', freq='3M', periods=4) + +If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor +endpoints for a ``PeriodIndex`` with frequency matching that of the +``PeriodIndex`` constructor. + +.. ipython:: python + + pd.period_range(start=pd.Period('2017Q1', freq='Q'), + end=pd.Period('2017Q2', freq='Q'), freq='M') + +Just like ``DatetimeIndex``, a ``PeriodIndex`` can also be used to index pandas +objects: + +.. ipython:: python + + ps = pd.Series(np.random.randn(len(prng)), prng) + ps + +``PeriodIndex`` supports addition and subtraction with the same rule as ``Period``. + +.. ipython:: python + + idx = pd.period_range('2014-07-01 09:00', periods=5, freq='H') + idx + idx + pd.offsets.Hour(2) + + idx = pd.period_range('2014-07', periods=5, freq='M') + idx + idx + pd.offsets.MonthEnd(3) + +``PeriodIndex`` has its own dtype named ``period``, refer to :ref:`Period Dtypes `. + +.. _timeseries.period_dtype: + +Period dtypes +~~~~~~~~~~~~~ + +``PeriodIndex`` has a custom ``period`` dtype. This is a pandas extension +dtype similar to the :ref:`timezone aware dtype ` (``datetime64[ns, tz]``). + +The ``period`` dtype holds the ``freq`` attribute and is represented with +``period[freq]`` like ``period[D]`` or ``period[M]``, using :ref:`frequency strings `. + +.. ipython:: python + + pi = pd.period_range('2016-01-01', periods=3, freq='M') + pi + pi.dtype + +The ``period`` dtype can be used in ``.astype(...)``. It allows one to change the +``freq`` of a ``PeriodIndex`` like ``.asfreq()`` and convert a +``DatetimeIndex`` to ``PeriodIndex`` like ``to_period()``: + +.. ipython:: python + + # change monthly freq to daily freq + pi.astype('period[D]') + + # convert to DatetimeIndex + pi.astype('datetime64[ns]') + + # convert to PeriodIndex + dti = pd.date_range('2011-01-01', freq='M', periods=3) + dti + dti.astype('period[M]') + + +PeriodIndex partial string indexing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can pass in dates and strings to ``Series`` and ``DataFrame`` with ``PeriodIndex``, in the same manner as ``DatetimeIndex``. For details, refer to :ref:`DatetimeIndex Partial String Indexing `. + +.. ipython:: python + + ps['2011-01'] + + ps[datetime.datetime(2011, 12, 25):] + + ps['10/31/2011':'12/31/2011'] + +Passing a string representing a lower frequency than ``PeriodIndex`` returns partial sliced data. + +.. ipython:: python + + ps['2011'] + + dfp = pd.DataFrame(np.random.randn(600, 1), + columns=['A'], + index=pd.period_range('2013-01-01 9:00', + periods=600, + freq='T')) + dfp + dfp['2013-01-01 10H'] + +As with ``DatetimeIndex``, the endpoints will be included in the result. The example below slices data starting from 10:00 to 11:59. + +.. ipython:: python + + dfp['2013-01-01 10H':'2013-01-01 11H'] + +Frequency conversion and resampling with PeriodIndex +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The frequency of ``Period`` and ``PeriodIndex`` can be converted via the ``asfreq`` +method. Let's start with the fiscal year 2011, ending in December: + +.. ipython:: python + + p = pd.Period('2011', freq='A-DEC') + p + +We can convert it to a monthly frequency. Using the ``how`` parameter, we can +specify whether to return the starting or ending month: + +.. ipython:: python + + p.asfreq('M', how='start') + + p.asfreq('M', how='end') + +The shorthands 's' and 'e' are provided for convenience: + +.. ipython:: python + + p.asfreq('M', 's') + p.asfreq('M', 'e') + +Converting to a "super-period" (e.g., annual frequency is a super-period of +quarterly frequency) automatically returns the super-period that includes the +input period: + +.. ipython:: python + + p = pd.Period('2011-12', freq='M') + + p.asfreq('A-NOV') + +Note that since we converted to an annual frequency that ends the year in +November, the monthly period of December 2011 is actually in the 2012 A-NOV +period. + +.. _timeseries.quarterly: + +Period conversions with anchored frequencies are particularly useful for +working with various quarterly data common to economics, business, and other +fields. Many organizations define quarters relative to the month in which their +fiscal year starts and ends. Thus, first quarter of 2011 could start in 2010 or +a few months into 2011. Via anchored frequencies, pandas works for all quarterly +frequencies ``Q-JAN`` through ``Q-DEC``. + +``Q-DEC`` define regular calendar quarters: + +.. ipython:: python + + p = pd.Period('2012Q1', freq='Q-DEC') + + p.asfreq('D', 's') + + p.asfreq('D', 'e') + +``Q-MAR`` defines fiscal year end in March: + +.. ipython:: python + + p = pd.Period('2011Q4', freq='Q-MAR') + + p.asfreq('D', 's') + + p.asfreq('D', 'e') + +.. _timeseries.interchange: + +Converting between representations +---------------------------------- + +Timestamped data can be converted to PeriodIndex-ed data using ``to_period`` +and vice-versa using ``to_timestamp``: + +.. ipython:: python + + rng = pd.date_range('1/1/2012', periods=5, freq='M') + + ts = pd.Series(np.random.randn(len(rng)), index=rng) + + ts + + ps = ts.to_period() + + ps + + ps.to_timestamp() + +Remember that 's' and 'e' can be used to return the timestamps at the start or +end of the period: + +.. ipython:: python + + ps.to_timestamp('D', how='s') + +Converting between period and timestamp enables some convenient arithmetic +functions to be used. In the following example, we convert a quarterly +frequency with year ending in November to 9am of the end of the month following +the quarter end: + +.. ipython:: python + + prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV') + + ts = pd.Series(np.random.randn(len(prng)), prng) + + ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9 + + ts.head() + +.. _timeseries.oob: + +Representing out-of-bounds spans +-------------------------------- + +If you have data that is outside of the ``Timestamp`` bounds, see :ref:`Timestamp limitations `, +then you can use a ``PeriodIndex`` and/or ``Series`` of ``Periods`` to do computations. + +.. ipython:: python + + span = pd.period_range('1215-01-01', '1381-01-01', freq='D') + span + +To convert from an ``int64`` based YYYYMMDD representation. + +.. ipython:: python + + s = pd.Series([20121231, 20141130, 99991231]) + s + + def conv(x): + return pd.Period(year=x // 10000, month=x // 100 % 100, + day=x % 100, freq='D') + + s.apply(conv) + s.apply(conv)[2] + +These can easily be converted to a ``PeriodIndex``: + +.. ipython:: python + + span = pd.PeriodIndex(s.apply(conv)) + span + +.. _timeseries.timezone: + +Time zone handling +------------------ + +pandas provides rich support for working with timestamps in different time +zones using the ``pytz`` and ``dateutil`` libraries or class:`datetime.timezone` +objects from the standard library. + + +Working with time zones +~~~~~~~~~~~~~~~~~~~~~~~ + +By default, pandas objects are time zone unaware: + +.. ipython:: python + + rng = pd.date_range('3/6/2012 00:00', periods=15, freq='D') + rng.tz is None + +To localize these dates to a time zone (assign a particular time zone to a naive date), +you can use the ``tz_localize`` method or the ``tz`` keyword argument in +:func:`date_range`, :class:`Timestamp`, or :class:`DatetimeIndex`. +You can either pass ``pytz`` or ``dateutil`` time zone objects or Olson time zone database strings. +Olson time zone strings will return ``pytz`` time zone objects by default. +To return ``dateutil`` time zone objects, append ``dateutil/`` before the string. + +* In ``pytz`` you can find a list of common (and less common) time zones using + ``from pytz import common_timezones, all_timezones``. +* ``dateutil`` uses the OS time zones so there isn't a fixed list available. For + common zones, the names are the same as ``pytz``. + +.. ipython:: python + + import dateutil + + # pytz + rng_pytz = pd.date_range('3/6/2012 00:00', periods=3, freq='D', + tz='Europe/London') + rng_pytz.tz + + # dateutil + rng_dateutil = pd.date_range('3/6/2012 00:00', periods=3, freq='D') + rng_dateutil = rng_dateutil.tz_localize('dateutil/Europe/London') + rng_dateutil.tz + + # dateutil - utc special case + rng_utc = pd.date_range('3/6/2012 00:00', periods=3, freq='D', + tz=dateutil.tz.tzutc()) + rng_utc.tz + +.. versionadded:: 0.25.0 + +.. ipython:: python + + # datetime.timezone + rng_utc = pd.date_range('3/6/2012 00:00', periods=3, freq='D', + tz=datetime.timezone.utc) + rng_utc.tz + +Note that the ``UTC`` time zone is a special case in ``dateutil`` and should be constructed explicitly +as an instance of ``dateutil.tz.tzutc``. You can also construct other time +zones objects explicitly first. + +.. ipython:: python + + import pytz + + # pytz + tz_pytz = pytz.timezone('Europe/London') + rng_pytz = pd.date_range('3/6/2012 00:00', periods=3, freq='D') + rng_pytz = rng_pytz.tz_localize(tz_pytz) + rng_pytz.tz == tz_pytz + + # dateutil + tz_dateutil = dateutil.tz.gettz('Europe/London') + rng_dateutil = pd.date_range('3/6/2012 00:00', periods=3, freq='D', + tz=tz_dateutil) + rng_dateutil.tz == tz_dateutil + +To convert a time zone aware pandas object from one time zone to another, +you can use the ``tz_convert`` method. + +.. ipython:: python + + rng_pytz.tz_convert('US/Eastern') + +.. note:: + + When using ``pytz`` time zones, :class:`DatetimeIndex` will construct a different + time zone object than a :class:`Timestamp` for the same time zone input. A :class:`DatetimeIndex` + can hold a collection of :class:`Timestamp` objects that may have different UTC offsets and cannot be + succinctly represented by one ``pytz`` time zone instance while one :class:`Timestamp` + represents one point in time with a specific UTC offset. + + .. ipython:: python + + dti = pd.date_range('2019-01-01', periods=3, freq='D', tz='US/Pacific') + dti.tz + ts = pd.Timestamp('2019-01-01', tz='US/Pacific') + ts.tz + +.. warning:: + + Be wary of conversions between libraries. For some time zones, ``pytz`` and ``dateutil`` have different + definitions of the zone. This is more of a problem for unusual time zones than for + 'standard' zones like ``US/Eastern``. + +.. warning:: + + Be aware that a time zone definition across versions of time zone libraries may not + be considered equal. This may cause problems when working with stored data that + is localized using one version and operated on with a different version. + See :ref:`here` for how to handle such a situation. + +.. warning:: + + For ``pytz`` time zones, it is incorrect to pass a time zone object directly into + the ``datetime.datetime`` constructor + (e.g., ``datetime.datetime(2011, 1, 1, tz=pytz.timezone('US/Eastern'))``. + Instead, the datetime needs to be localized using the ``localize`` method + on the ``pytz`` time zone object. + +Under the hood, all timestamps are stored in UTC. Values from a time zone aware +:class:`DatetimeIndex` or :class:`Timestamp` will have their fields (day, hour, minute, etc.) +localized to the time zone. However, timestamps with the same UTC value are +still considered to be equal even if they are in different time zones: + +.. ipython:: python + + rng_eastern = rng_utc.tz_convert('US/Eastern') + rng_berlin = rng_utc.tz_convert('Europe/Berlin') + + rng_eastern[2] + rng_berlin[2] + rng_eastern[2] == rng_berlin[2] + +Operations between :class:`Series` in different time zones will yield UTC +:class:`Series`, aligning the data on the UTC timestamps: + +.. ipython:: python + + ts_utc = pd.Series(range(3), pd.date_range('20130101', periods=3, tz='UTC')) + eastern = ts_utc.tz_convert('US/Eastern') + berlin = ts_utc.tz_convert('Europe/Berlin') + result = eastern + berlin + result + result.index + +To remove time zone information, use ``tz_localize(None)`` or ``tz_convert(None)``. +``tz_localize(None)`` will remove the time zone yielding the local time representation. +``tz_convert(None)`` will remove the time zone after converting to UTC time. + +.. ipython:: python + + didx = pd.date_range(start='2014-08-01 09:00', freq='H', + periods=3, tz='US/Eastern') + didx + didx.tz_localize(None) + didx.tz_convert(None) + + # tz_convert(None) is identical to tz_convert('UTC').tz_localize(None) + didx.tz_convert('UTC').tz_localize(None) + +.. _timeseries.timezone_ambiguous: + +Ambiguous times when localizing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``tz_localize`` may not be able to determine the UTC offset of a timestamp +because daylight savings time (DST) in a local time zone causes some times to occur +twice within one day ("clocks fall back"). The following options are available: + +* ``'raise'``: Raises a ``pytz.AmbiguousTimeError`` (the default behavior) +* ``'infer'``: Attempt to determine the correct offset base on the monotonicity of the timestamps +* ``'NaT'``: Replaces ambiguous times with ``NaT`` +* ``bool``: ``True`` represents a DST time, ``False`` represents non-DST time. An array-like of ``bool`` values is supported for a sequence of times. + +.. ipython:: python + + rng_hourly = pd.DatetimeIndex(['11/06/2011 00:00', '11/06/2011 01:00', + '11/06/2011 01:00', '11/06/2011 02:00']) + +This will fail as there are ambiguous times (``'11/06/2011 01:00'``) + +.. code-block:: ipython + + In [2]: rng_hourly.tz_localize('US/Eastern') + AmbiguousTimeError: Cannot infer dst time from Timestamp('2011-11-06 01:00:00'), try using the 'ambiguous' argument + +Handle these ambiguous times by specifying the following. + +.. ipython:: python + + rng_hourly.tz_localize('US/Eastern', ambiguous='infer') + rng_hourly.tz_localize('US/Eastern', ambiguous='NaT') + rng_hourly.tz_localize('US/Eastern', ambiguous=[True, True, False, False]) + +.. _timeseries.timezone_nonexistent: + +Nonexistent times when localizing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A DST transition may also shift the local time ahead by 1 hour creating nonexistent +local times ("clocks spring forward"). The behavior of localizing a timeseries with nonexistent times +can be controlled by the ``nonexistent`` argument. The following options are available: + +* ``'raise'``: Raises a ``pytz.NonExistentTimeError`` (the default behavior) +* ``'NaT'``: Replaces nonexistent times with ``NaT`` +* ``'shift_forward'``: Shifts nonexistent times forward to the closest real time +* ``'shift_backward'``: Shifts nonexistent times backward to the closest real time +* timedelta object: Shifts nonexistent times by the timedelta duration + +.. ipython:: python + + dti = pd.date_range(start='2015-03-29 02:30:00', periods=3, freq='H') + # 2:30 is a nonexistent time + +Localization of nonexistent times will raise an error by default. + +.. code-block:: ipython + + In [2]: dti.tz_localize('Europe/Warsaw') + NonExistentTimeError: 2015-03-29 02:30:00 + +Transform nonexistent times to ``NaT`` or shift the times. + +.. ipython:: python + + dti + dti.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + dti.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + dti.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta(1, unit='H')) + dti.tz_localize('Europe/Warsaw', nonexistent='NaT') + + +.. _timeseries.timezone_series: + +Time zone series operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A :class:`Series` with time zone **naive** values is +represented with a dtype of ``datetime64[ns]``. + +.. ipython:: python + + s_naive = pd.Series(pd.date_range('20130101', periods=3)) + s_naive + +A :class:`Series` with a time zone **aware** values is +represented with a dtype of ``datetime64[ns, tz]`` where ``tz`` is the time zone + +.. ipython:: python + + s_aware = pd.Series(pd.date_range('20130101', periods=3, tz='US/Eastern')) + s_aware + +Both of these :class:`Series` time zone information +can be manipulated via the ``.dt`` accessor, see :ref:`the dt accessor section `. + +For example, to localize and convert a naive stamp to time zone aware. + +.. ipython:: python + + s_naive.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') + +Time zone information can also be manipulated using the ``astype`` method. +This method can localize and convert time zone naive timestamps or +convert time zone aware timestamps. + +.. ipython:: python + + # localize and convert a naive time zone + s_naive.astype('datetime64[ns, US/Eastern]') + + # make an aware tz naive + s_aware.astype('datetime64[ns]') + + # convert to a new time zone + s_aware.astype('datetime64[ns, CET]') + +.. note:: + + Using :meth:`Series.to_numpy` on a ``Series``, returns a NumPy array of the data. + NumPy does not currently support time zones (even though it is *printing* in the local time zone!), + therefore an object array of Timestamps is returned for time zone aware data: + + .. ipython:: python + + s_naive.to_numpy() + s_aware.to_numpy() + + By converting to an object array of Timestamps, it preserves the time zone + information. For example, when converting back to a Series: + + .. ipython:: python + + pd.Series(s_aware.to_numpy()) + + However, if you want an actual NumPy ``datetime64[ns]`` array (with the values + converted to UTC) instead of an array of objects, you can specify the + ``dtype`` argument: + + .. ipython:: python + + s_aware.to_numpy(dtype='datetime64[ns]') diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst new file mode 100644 index 00000000..4fde053a --- /dev/null +++ b/doc/source/user_guide/visualization.rst @@ -0,0 +1,1643 @@ +.. _visualization: + +{{ header }} + +************* +Visualization +************* + +We use the standard convention for referencing the matplotlib API: + +.. ipython:: python + + import matplotlib.pyplot as plt + plt.close('all') + +We provide the basics in pandas to easily create decent looking plots. +See the :ref:`ecosystem ` section for visualization +libraries that go beyond the basics documented here. + +.. note:: + + All calls to ``np.random`` are seeded with 123456. + +.. _visualization.basic: + +Basic plotting: ``plot`` +------------------------ + +We will demonstrate the basics, see the :ref:`cookbook` for +some advanced strategies. + +The ``plot`` method on Series and DataFrame is just a simple wrapper around +:meth:`plt.plot() `: + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + ts = pd.Series(np.random.randn(1000), + index=pd.date_range('1/1/2000', periods=1000)) + ts = ts.cumsum() + + @savefig series_plot_basic.png + ts.plot() + +If the index consists of dates, it calls :meth:`gcf().autofmt_xdate() ` +to try to format the x-axis nicely as per above. + +On DataFrame, :meth:`~DataFrame.plot` is a convenience to plot all of the columns with labels: + +.. ipython:: python + :suppress: + + plt.close('all') + np.random.seed(123456) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 4), + index=ts.index, columns=list('ABCD')) + df = df.cumsum() + + plt.figure(); + @savefig frame_plot_basic.png + df.plot(); + +You can plot one column versus another using the `x` and `y` keywords in +:meth:`~DataFrame.plot`: + +.. ipython:: python + :suppress: + + plt.close('all') + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + df3 = pd.DataFrame(np.random.randn(1000, 2), columns=['B', 'C']).cumsum() + df3['A'] = pd.Series(list(range(len(df)))) + + @savefig df_plot_xy.png + df3.plot(x='A', y='B') + +.. note:: + + For more formatting and styling options, see + :ref:`formatting ` below. + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.other: + +Other plots +----------- + +Plotting methods allow for a handful of plot styles other than the +default line plot. These methods can be provided as the ``kind`` +keyword argument to :meth:`~DataFrame.plot`, and include: + +* :ref:`'bar' ` or :ref:`'barh' ` for bar plots +* :ref:`'hist' ` for histogram +* :ref:`'box' ` for boxplot +* :ref:`'kde' ` or :ref:`'density' ` for density plots +* :ref:`'area' ` for area plots +* :ref:`'scatter' ` for scatter plots +* :ref:`'hexbin' ` for hexagonal bin plots +* :ref:`'pie' ` for pie plots + +For example, a bar plot can be created the following way: + +.. ipython:: python + + plt.figure(); + + @savefig bar_plot_ex.png + df.iloc[5].plot(kind='bar'); + +You can also create these other plots using the methods ``DataFrame.plot.`` instead of providing the ``kind`` keyword argument. This makes it easier to discover plot methods and the specific arguments they use: + +.. ipython:: + :verbatim: + + In [14]: df = pd.DataFrame() + + In [15]: df.plot. # noqa: E225, E999 + df.plot.area df.plot.barh df.plot.density df.plot.hist df.plot.line df.plot.scatter + df.plot.bar df.plot.box df.plot.hexbin df.plot.kde df.plot.pie + +In addition to these ``kind`` s, there are the :ref:`DataFrame.hist() `, +and :ref:`DataFrame.boxplot() ` methods, which use a separate interface. + +Finally, there are several :ref:`plotting functions ` in ``pandas.plotting`` +that take a :class:`Series` or :class:`DataFrame` as an argument. These +include: + +* :ref:`Scatter Matrix ` +* :ref:`Andrews Curves ` +* :ref:`Parallel Coordinates ` +* :ref:`Lag Plot ` +* :ref:`Autocorrelation Plot ` +* :ref:`Bootstrap Plot ` +* :ref:`RadViz ` + +Plots may also be adorned with :ref:`errorbars ` +or :ref:`tables `. + +.. _visualization.barplot: + +Bar plots +~~~~~~~~~ + +For labeled, non-time series data, you may wish to produce a bar plot: + +.. ipython:: python + + plt.figure(); + + @savefig bar_plot_ex.png + df.iloc[5].plot.bar() + plt.axhline(0, color='k'); + +Calling a DataFrame's :meth:`plot.bar() ` method produces a multiple +bar plot: + +.. ipython:: python + :suppress: + + plt.close('all') + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + df2 = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) + + @savefig bar_plot_multi_ex.png + df2.plot.bar(); + +To produce a stacked bar plot, pass ``stacked=True``: + +.. ipython:: python + :suppress: + + plt.close('all') + plt.figure() + +.. ipython:: python + + @savefig bar_plot_stacked_ex.png + df2.plot.bar(stacked=True); + +To get horizontal bar plots, use the ``barh`` method: + +.. ipython:: python + :suppress: + + plt.close('all') + plt.figure() + +.. ipython:: python + + @savefig barh_plot_stacked_ex.png + df2.plot.barh(stacked=True); + +.. _visualization.hist: + +Histograms +~~~~~~~~~~ + +Histograms can be drawn by using the :meth:`DataFrame.plot.hist` and :meth:`Series.plot.hist` methods. + +.. ipython:: python + + df4 = pd.DataFrame({'a': np.random.randn(1000) + 1, 'b': np.random.randn(1000), + 'c': np.random.randn(1000) - 1}, columns=['a', 'b', 'c']) + + plt.figure(); + + @savefig hist_new.png + df4.plot.hist(alpha=0.5) + + +.. ipython:: python + :suppress: + + plt.close('all') + +A histogram can be stacked using ``stacked=True``. Bin size can be changed +using the ``bins`` keyword. + +.. ipython:: python + + plt.figure(); + + @savefig hist_new_stacked.png + df4.plot.hist(stacked=True, bins=20) + +.. ipython:: python + :suppress: + + plt.close('all') + +You can pass other keywords supported by matplotlib ``hist``. For example, +horizontal and cumulative histograms can be drawn by +``orientation='horizontal'`` and ``cumulative=True``. + +.. ipython:: python + + plt.figure(); + + @savefig hist_new_kwargs.png + df4['a'].plot.hist(orientation='horizontal', cumulative=True) + +.. ipython:: python + :suppress: + + plt.close('all') + +See the :meth:`hist ` method and the +`matplotlib hist documentation `__ for more. + + +The existing interface ``DataFrame.hist`` to plot histogram still can be used. + +.. ipython:: python + + plt.figure(); + + @savefig hist_plot_ex.png + df['A'].diff().hist() + +.. ipython:: python + :suppress: + + plt.close('all') + +:meth:`DataFrame.hist` plots the histograms of the columns on multiple +subplots: + +.. ipython:: python + + plt.figure() + + @savefig frame_hist_ex.png + df.diff().hist(color='k', alpha=0.5, bins=50) + + +The ``by`` keyword can be specified to plot grouped histograms: + +.. ipython:: python + :suppress: + + plt.close('all') + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + data = pd.Series(np.random.randn(1000)) + + @savefig grouped_hist.png + data.hist(by=np.random.randint(0, 4, 1000), figsize=(6, 4)) + + +.. _visualization.box: + +Box plots +~~~~~~~~~ + +Boxplot can be drawn calling :meth:`Series.plot.box` and :meth:`DataFrame.plot.box`, +or :meth:`DataFrame.boxplot` to visualize the distribution of values within each column. + +For instance, here is a boxplot representing five trials of 10 observations of +a uniform random variable on [0,1). + +.. ipython:: python + :suppress: + + plt.close('all') + np.random.seed(123456) + +.. ipython:: python + + df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E']) + + @savefig box_plot_new.png + df.plot.box() + +Boxplot can be colorized by passing ``color`` keyword. You can pass a ``dict`` +whose keys are ``boxes``, ``whiskers``, ``medians`` and ``caps``. +If some keys are missing in the ``dict``, default colors are used +for the corresponding artists. Also, boxplot has ``sym`` keyword to specify fliers style. + +When you pass other type of arguments via ``color`` keyword, it will be directly +passed to matplotlib for all the ``boxes``, ``whiskers``, ``medians`` and ``caps`` +colorization. + +The colors are applied to every boxes to be drawn. If you want +more complicated colorization, you can get each drawn artists by passing +:ref:`return_type `. + +.. ipython:: python + + color = {'boxes': 'DarkGreen', 'whiskers': 'DarkOrange', + 'medians': 'DarkBlue', 'caps': 'Gray'} + + @savefig box_new_colorize.png + df.plot.box(color=color, sym='r+') + +.. ipython:: python + :suppress: + + plt.close('all') + +Also, you can pass other keywords supported by matplotlib ``boxplot``. +For example, horizontal and custom-positioned boxplot can be drawn by +``vert=False`` and ``positions`` keywords. + +.. ipython:: python + + @savefig box_new_kwargs.png + df.plot.box(vert=False, positions=[1, 4, 5, 6, 8]) + + +See the :meth:`boxplot ` method and the +`matplotlib boxplot documentation `__ for more. + + +The existing interface ``DataFrame.boxplot`` to plot boxplot still can be used. + +.. ipython:: python + :suppress: + + plt.close('all') + np.random.seed(123456) + +.. ipython:: python + :okwarning: + + df = pd.DataFrame(np.random.rand(10, 5)) + plt.figure(); + + @savefig box_plot_ex.png + bp = df.boxplot() + +You can create a stratified boxplot using the ``by`` keyword argument to create +groupings. For instance, + +.. ipython:: python + :suppress: + + plt.close('all') + np.random.seed(123456) + +.. ipython:: python + :okwarning: + + df = pd.DataFrame(np.random.rand(10, 2), columns=['Col1', 'Col2']) + df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B']) + + plt.figure(); + + @savefig box_plot_ex2.png + bp = df.boxplot(by='X') + +You can also pass a subset of columns to plot, as well as group by multiple +columns: + +.. ipython:: python + :suppress: + + plt.close('all') + np.random.seed(123456) + +.. ipython:: python + :okwarning: + + df = pd.DataFrame(np.random.rand(10, 3), columns=['Col1', 'Col2', 'Col3']) + df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B']) + df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B']) + + plt.figure(); + + @savefig box_plot_ex3.png + bp = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.box.return: + +In ``boxplot``, the return type can be controlled by the ``return_type``, keyword. The valid choices are ``{"axes", "dict", "both", None}``. +Faceting, created by ``DataFrame.boxplot`` with the ``by`` +keyword, will affect the output type as well: + +================ ======= ========================== +``return_type=`` Faceted Output type +---------------- ------- -------------------------- + +``None`` No axes +``None`` Yes 2-D ndarray of axes +``'axes'`` No axes +``'axes'`` Yes Series of axes +``'dict'`` No dict of artists +``'dict'`` Yes Series of dicts of artists +``'both'`` No namedtuple +``'both'`` Yes Series of namedtuples +================ ======= ========================== + +``Groupby.boxplot`` always returns a ``Series`` of ``return_type``. + +.. ipython:: python + :okwarning: + + np.random.seed(1234) + df_box = pd.DataFrame(np.random.randn(50, 2)) + df_box['g'] = np.random.choice(['A', 'B'], size=50) + df_box.loc[df_box['g'] == 'B', 1] += 3 + + @savefig boxplot_groupby.png + bp = df_box.boxplot(by='g') + +.. ipython:: python + :suppress: + + plt.close('all') + +The subplots above are split by the numeric columns first, then the value of +the ``g`` column. Below the subplots are first split by the value of ``g``, +then by the numeric columns. + +.. ipython:: python + :okwarning: + + @savefig groupby_boxplot_vis.png + bp = df_box.groupby('g').boxplot() + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.area_plot: + +Area plot +~~~~~~~~~ + +You can create area plots with :meth:`Series.plot.area` and :meth:`DataFrame.plot.area`. +Area plots are stacked by default. To produce stacked area plot, each column must be either all positive or all negative values. + +When input data contains `NaN`, it will be automatically filled by 0. If you want to drop or fill by different values, use :func:`dataframe.dropna` or :func:`dataframe.fillna` before calling `plot`. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + plt.figure() + +.. ipython:: python + + df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) + + @savefig area_plot_stacked.png + df.plot.area(); + +To produce an unstacked plot, pass ``stacked=False``. Alpha value is set to 0.5 unless otherwise specified: + +.. ipython:: python + :suppress: + + plt.close('all') + plt.figure() + +.. ipython:: python + + @savefig area_plot_unstacked.png + df.plot.area(stacked=False); + +.. _visualization.scatter: + +Scatter plot +~~~~~~~~~~~~ + +Scatter plot can be drawn by using the :meth:`DataFrame.plot.scatter` method. +Scatter plot requires numeric columns for the x and y axes. +These can be specified by the ``x`` and ``y`` keywords. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + plt.close('all') + plt.figure() + +.. ipython:: python + + df = pd.DataFrame(np.random.rand(50, 4), columns=['a', 'b', 'c', 'd']) + + @savefig scatter_plot.png + df.plot.scatter(x='a', y='b'); + +To plot multiple column groups in a single axes, repeat ``plot`` method specifying target ``ax``. +It is recommended to specify ``color`` and ``label`` keywords to distinguish each groups. + +.. ipython:: python + + ax = df.plot.scatter(x='a', y='b', color='DarkBlue', label='Group 1'); + @savefig scatter_plot_repeated.png + df.plot.scatter(x='c', y='d', color='DarkGreen', label='Group 2', ax=ax); + +.. ipython:: python + :suppress: + + plt.close('all') + +The keyword ``c`` may be given as the name of a column to provide colors for +each point: + +.. ipython:: python + + @savefig scatter_plot_colored.png + df.plot.scatter(x='a', y='b', c='c', s=50); + + +.. ipython:: python + :suppress: + + plt.close('all') + +You can pass other keywords supported by matplotlib +:meth:`scatter `. The example below shows a +bubble chart using a column of the ``DataFrame`` as the bubble size. + +.. ipython:: python + + @savefig scatter_plot_bubble.png + df.plot.scatter(x='a', y='b', s=df['c'] * 200); + +.. ipython:: python + :suppress: + + plt.close('all') + +See the :meth:`scatter ` method and the +`matplotlib scatter documentation `__ for more. + +.. _visualization.hexbin: + +Hexagonal bin plot +~~~~~~~~~~~~~~~~~~ + +You can create hexagonal bin plots with :meth:`DataFrame.plot.hexbin`. +Hexbin plots can be a useful alternative to scatter plots if your data are +too dense to plot each point individually. + +.. ipython:: python + :suppress: + + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 2), columns=['a', 'b']) + df['b'] = df['b'] + np.arange(1000) + + @savefig hexbin_plot.png + df.plot.hexbin(x='a', y='b', gridsize=25) + + +A useful keyword argument is ``gridsize``; it controls the number of hexagons +in the x-direction, and defaults to 100. A larger ``gridsize`` means more, smaller +bins. + +By default, a histogram of the counts around each ``(x, y)`` point is computed. +You can specify alternative aggregations by passing values to the ``C`` and +``reduce_C_function`` arguments. ``C`` specifies the value at each ``(x, y)`` point +and ``reduce_C_function`` is a function of one argument that reduces all the +values in a bin to a single number (e.g. ``mean``, ``max``, ``sum``, ``std``). In this +example the positions are given by columns ``a`` and ``b``, while the value is +given by column ``z``. The bins are aggregated with NumPy's ``max`` function. + +.. ipython:: python + :suppress: + + plt.close('all') + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 2), columns=['a', 'b']) + df['b'] = df['b'] = df['b'] + np.arange(1000) + df['z'] = np.random.uniform(0, 3, 1000) + + @savefig hexbin_plot_agg.png + df.plot.hexbin(x='a', y='b', C='z', reduce_C_function=np.max, gridsize=25) + +.. ipython:: python + :suppress: + + plt.close('all') + +See the :meth:`hexbin ` method and the +`matplotlib hexbin documentation `__ for more. + +.. _visualization.pie: + +Pie plot +~~~~~~~~ + +You can create a pie plot with :meth:`DataFrame.plot.pie` or :meth:`Series.plot.pie`. +If your data includes any ``NaN``, they will be automatically filled with 0. +A ``ValueError`` will be raised if there are any negative values in your data. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + plt.figure() + +.. ipython:: python + + series = pd.Series(3 * np.random.rand(4), + index=['a', 'b', 'c', 'd'], name='series') + + @savefig series_pie_plot.png + series.plot.pie(figsize=(6, 6)) + +.. ipython:: python + :suppress: + + plt.close('all') + +For pie plots it's best to use square figures, i.e. a figure aspect ratio 1. +You can create the figure with equal width and height, or force the aspect ratio +to be equal after plotting by calling ``ax.set_aspect('equal')`` on the returned +``axes`` object. + +Note that pie plot with :class:`DataFrame` requires that you either specify a +target column by the ``y`` argument or ``subplots=True``. When ``y`` is +specified, pie plot of selected column will be drawn. If ``subplots=True`` is +specified, pie plots for each column are drawn as subplots. A legend will be +drawn in each pie plots by default; specify ``legend=False`` to hide it. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + plt.figure() + +.. ipython:: python + + df = pd.DataFrame(3 * np.random.rand(4, 2), + index=['a', 'b', 'c', 'd'], columns=['x', 'y']) + + @savefig df_pie_plot.png + df.plot.pie(subplots=True, figsize=(8, 4)) + +.. ipython:: python + :suppress: + + plt.close('all') + +You can use the ``labels`` and ``colors`` keywords to specify the labels and colors of each wedge. + +.. warning:: + + Most pandas plots use the ``label`` and ``color`` arguments (note the lack of "s" on those). + To be consistent with :func:`matplotlib.pyplot.pie` you must use ``labels`` and ``colors``. + +If you want to hide wedge labels, specify ``labels=None``. +If ``fontsize`` is specified, the value will be applied to wedge labels. +Also, other keywords supported by :func:`matplotlib.pyplot.pie` can be used. + + +.. ipython:: python + :suppress: + + plt.figure() + +.. ipython:: python + + @savefig series_pie_plot_options.png + series.plot.pie(labels=['AA', 'BB', 'CC', 'DD'], colors=['r', 'g', 'b', 'c'], + autopct='%.2f', fontsize=20, figsize=(6, 6)) + +If you pass values whose sum total is less than 1.0, matplotlib draws a semicircle. + +.. ipython:: python + :suppress: + + plt.close('all') + plt.figure() + +.. ipython:: python + + series = pd.Series([0.1] * 4, index=['a', 'b', 'c', 'd'], name='series2') + + @savefig series_pie_plot_semi.png + series.plot.pie(figsize=(6, 6)) + +See the `matplotlib pie documentation `__ for more. + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.missing_data: + +Plotting with missing data +-------------------------- + +Pandas tries to be pragmatic about plotting ``DataFrames`` or ``Series`` +that contain missing data. Missing values are dropped, left out, or filled +depending on the plot type. + ++----------------+--------------------------------------+ +| Plot Type | NaN Handling | ++================+======================================+ +| Line | Leave gaps at NaNs | ++----------------+--------------------------------------+ +| Line (stacked) | Fill 0's | ++----------------+--------------------------------------+ +| Bar | Fill 0's | ++----------------+--------------------------------------+ +| Scatter | Drop NaNs | ++----------------+--------------------------------------+ +| Histogram | Drop NaNs (column-wise) | ++----------------+--------------------------------------+ +| Box | Drop NaNs (column-wise) | ++----------------+--------------------------------------+ +| Area | Fill 0's | ++----------------+--------------------------------------+ +| KDE | Drop NaNs (column-wise) | ++----------------+--------------------------------------+ +| Hexbin | Drop NaNs | ++----------------+--------------------------------------+ +| Pie | Fill 0's | ++----------------+--------------------------------------+ + +If any of these defaults are not what you want, or if you want to be +explicit about how missing values are handled, consider using +:meth:`~pandas.DataFrame.fillna` or :meth:`~pandas.DataFrame.dropna` +before plotting. + +.. _visualization.tools: + +Plotting Tools +-------------- + +These functions can be imported from ``pandas.plotting`` +and take a :class:`Series` or :class:`DataFrame` as an argument. + +.. _visualization.scatter_matrix: + +Scatter matrix plot +~~~~~~~~~~~~~~~~~~~ + +You can create a scatter plot matrix using the +``scatter_matrix`` method in ``pandas.plotting``: + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + from pandas.plotting import scatter_matrix + df = pd.DataFrame(np.random.randn(1000, 4), columns=['a', 'b', 'c', 'd']) + + @savefig scatter_matrix_kde.png + scatter_matrix(df, alpha=0.2, figsize=(6, 6), diagonal='kde') + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.kde: + +Density plot +~~~~~~~~~~~~ + +You can create density plots using the :meth:`Series.plot.kde` and :meth:`DataFrame.plot.kde` methods. + +.. ipython:: python + :suppress: + + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + ser = pd.Series(np.random.randn(1000)) + + @savefig kde_plot.png + ser.plot.kde() + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.andrews_curves: + +Andrews curves +~~~~~~~~~~~~~~ + +Andrews curves allow one to plot multivariate data as a large number +of curves that are created using the attributes of samples as coefficients +for Fourier series, see the `Wikipedia entry `__ +for more information. By coloring these curves differently for each class +it is possible to visualize data clustering. Curves belonging to samples +of the same class will usually be closer together and form larger structures. + +**Note**: The "Iris" dataset is available `here `__. + +.. ipython:: python + + from pandas.plotting import andrews_curves + + data = pd.read_csv('data/iris.data') + + plt.figure() + + @savefig andrews_curves.png + andrews_curves(data, 'Name') + +.. _visualization.parallel_coordinates: + +Parallel coordinates +~~~~~~~~~~~~~~~~~~~~ + +Parallel coordinates is a plotting technique for plotting multivariate data, +see the `Wikipedia entry `__ +for an introduction. +Parallel coordinates allows one to see clusters in data and to estimate other statistics visually. +Using parallel coordinates points are represented as connected line segments. +Each vertical line represents one attribute. One set of connected line segments +represents one data point. Points that tend to cluster will appear closer together. + +.. ipython:: python + + from pandas.plotting import parallel_coordinates + + data = pd.read_csv('data/iris.data') + + plt.figure() + + @savefig parallel_coordinates.png + parallel_coordinates(data, 'Name') + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.lag: + +Lag plot +~~~~~~~~ + +Lag plots are used to check if a data set or time series is random. Random +data should not exhibit any structure in the lag plot. Non-random structure +implies that the underlying data are not random. The ``lag`` argument may +be passed, and when ``lag=1`` the plot is essentially ``data[:-1]`` vs. +``data[1:]``. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + from pandas.plotting import lag_plot + + plt.figure() + + spacing = np.linspace(-99 * np.pi, 99 * np.pi, num=1000) + data = pd.Series(0.1 * np.random.rand(1000) + 0.9 * np.sin(spacing)) + + @savefig lag_plot.png + lag_plot(data) + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.autocorrelation: + +Autocorrelation plot +~~~~~~~~~~~~~~~~~~~~ + +Autocorrelation plots are often used for checking randomness in time series. +This is done by computing autocorrelations for data values at varying time lags. +If time series is random, such autocorrelations should be near zero for any and +all time-lag separations. If time series is non-random then one or more of the +autocorrelations will be significantly non-zero. The horizontal lines displayed +in the plot correspond to 95% and 99% confidence bands. The dashed line is 99% +confidence band. See the +`Wikipedia entry `__ for more about +autocorrelation plots. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + from pandas.plotting import autocorrelation_plot + + plt.figure() + + spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000) + data = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing)) + + @savefig autocorrelation_plot.png + autocorrelation_plot(data) + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.bootstrap: + +Bootstrap plot +~~~~~~~~~~~~~~ + +Bootstrap plots are used to visually assess the uncertainty of a statistic, such +as mean, median, midrange, etc. A random subset of a specified size is selected +from a data set, the statistic in question is computed for this subset and the +process is repeated a specified number of times. Resulting plots and histograms +are what constitutes the bootstrap plot. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + from pandas.plotting import bootstrap_plot + + data = pd.Series(np.random.rand(1000)) + + @savefig bootstrap_plot.png + bootstrap_plot(data, size=50, samples=500, color='grey') + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.radviz: + +RadViz +~~~~~~ + +RadViz is a way of visualizing multi-variate data. It is based on a simple +spring tension minimization algorithm. Basically you set up a bunch of points in +a plane. In our case they are equally spaced on a unit circle. Each point +represents a single attribute. You then pretend that each sample in the data set +is attached to each of these points by a spring, the stiffness of which is +proportional to the numerical value of that attribute (they are normalized to +unit interval). The point in the plane, where our sample settles to (where the +forces acting on our sample are at an equilibrium) is where a dot representing +our sample will be drawn. Depending on which class that sample belongs it will +be colored differently. +See the R package `Radviz `__ +for more information. + +**Note**: The "Iris" dataset is available `here `__. + +.. ipython:: python + + from pandas.plotting import radviz + + data = pd.read_csv('data/iris.data') + + plt.figure() + + @savefig radviz.png + radviz(data, 'Name') + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.formatting: + +Plot Formatting +--------------- + +Setting the plot style +~~~~~~~~~~~~~~~~~~~~~~ + +From version 1.5 and up, matplotlib offers a range of pre-configured plotting styles. Setting the +style can be used to easily give plots the general look that you want. +Setting the style is as easy as calling ``matplotlib.style.use(my_plot_style)`` before +creating your plot. For example you could write ``matplotlib.style.use('ggplot')`` for ggplot-style +plots. + +You can see the various available style names at ``matplotlib.style.available`` and it's very +easy to try them out. + +General plot style arguments +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Most plotting methods have a set of keyword arguments that control the +layout and formatting of the returned plot: + +.. ipython:: python + + plt.figure(); + @savefig series_plot_basic2.png + ts.plot(style='k--', label='Series'); + +.. ipython:: python + :suppress: + + plt.close('all') + +For each kind of plot (e.g. `line`, `bar`, `scatter`) any additional arguments +keywords are passed along to the corresponding matplotlib function +(:meth:`ax.plot() `, +:meth:`ax.bar() `, +:meth:`ax.scatter() `). These can be used +to control additional styling, beyond what pandas provides. + +Controlling the legend +~~~~~~~~~~~~~~~~~~~~~~ + +You may set the ``legend`` argument to ``False`` to hide the legend, which is +shown by default. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 4), + index=ts.index, columns=list('ABCD')) + df = df.cumsum() + + @savefig frame_plot_basic_noleg.png + df.plot(legend=False) + +.. ipython:: python + :suppress: + + plt.close('all') + +Scales +~~~~~~ + +You may pass ``logy`` to get a log-scale Y axis. + +.. ipython:: python + :suppress: + + plt.figure() + np.random.seed(123456) + +.. ipython:: python + + ts = pd.Series(np.random.randn(1000), + index=pd.date_range('1/1/2000', periods=1000)) + ts = np.exp(ts.cumsum()) + + @savefig series_plot_logy.png + ts.plot(logy=True) + +.. ipython:: python + :suppress: + + plt.close('all') + +See also the ``logx`` and ``loglog`` keyword arguments. + +Plotting on a secondary y-axis +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To plot data on a secondary y-axis, use the ``secondary_y`` keyword: + +.. ipython:: python + :suppress: + + plt.figure() + +.. ipython:: python + + df['A'].plot() + + @savefig series_plot_secondary_y.png + df['B'].plot(secondary_y=True, style='g') + +.. ipython:: python + :suppress: + + plt.close('all') + +To plot some columns in a ``DataFrame``, give the column names to the ``secondary_y`` +keyword: + +.. ipython:: python + + plt.figure() + ax = df.plot(secondary_y=['A', 'B']) + ax.set_ylabel('CD scale') + @savefig frame_plot_secondary_y.png + ax.right_ax.set_ylabel('AB scale') + +.. ipython:: python + :suppress: + + plt.close('all') + +Note that the columns plotted on the secondary y-axis is automatically marked +with "(right)" in the legend. To turn off the automatic marking, use the +``mark_right=False`` keyword: + +.. ipython:: python + + plt.figure() + + @savefig frame_plot_secondary_y_no_right.png + df.plot(secondary_y=['A', 'B'], mark_right=False) + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _plotting.formatters: + +Custom formatters for timeseries plots +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. versionchanged:: 1.0.0 + +Pandas provides custom formatters for timeseries plots. These change the +formatting of the axis labels for dates and times. By default, +the custom formatters are applied only to plots created by pandas with +:meth:`DataFrame.plot` or :meth:`Series.plot`. To have them apply to all +plots, including those made by matplotlib, set the option +``pd.options.plotting.matplotlib.register_converters = True`` or use +:meth:`pandas.plotting.register_matplotlib_converters`. + +Suppressing tick resolution adjustment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +pandas includes automatic tick resolution adjustment for regular frequency +time-series data. For limited cases where pandas cannot infer the frequency +information (e.g., in an externally created ``twinx``), you can choose to +suppress this behavior for alignment purposes. + +Here is the default behavior, notice how the x-axis tick labeling is performed: + +.. ipython:: python + + plt.figure() + + @savefig ser_plot_suppress.png + df['A'].plot() + +.. ipython:: python + :suppress: + + plt.close('all') + +Using the ``x_compat`` parameter, you can suppress this behavior: + +.. ipython:: python + + plt.figure() + + @savefig ser_plot_suppress_parm.png + df['A'].plot(x_compat=True) + +.. ipython:: python + :suppress: + + plt.close('all') + +If you have more than one plot that needs to be suppressed, the ``use`` method +in ``pandas.plotting.plot_params`` can be used in a `with statement`: + +.. ipython:: python + + plt.figure() + + @savefig ser_plot_suppress_context.png + with pd.plotting.plot_params.use('x_compat', True): + df['A'].plot(color='r') + df['B'].plot(color='g') + df['C'].plot(color='b') + +.. ipython:: python + :suppress: + + plt.close('all') + +Automatic date tick adjustment +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``TimedeltaIndex`` now uses the native matplotlib +tick locator methods, it is useful to call the automatic +date tick adjustment from matplotlib for figures whose ticklabels overlap. + +See the :meth:`autofmt_xdate ` method and the +`matplotlib documentation `__ for more. + +Subplots +~~~~~~~~ + +Each ``Series`` in a ``DataFrame`` can be plotted on a different axis +with the ``subplots`` keyword: + +.. ipython:: python + + @savefig frame_plot_subplots.png + df.plot(subplots=True, figsize=(6, 6)); + +.. ipython:: python + :suppress: + + plt.close('all') + +Using layout and targeting multiple axes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The layout of subplots can be specified by the ``layout`` keyword. It can accept +``(rows, columns)``. The ``layout`` keyword can be used in +``hist`` and ``boxplot`` also. If the input is invalid, a ``ValueError`` will be raised. + +The number of axes which can be contained by rows x columns specified by ``layout`` must be +larger than the number of required subplots. If layout can contain more axes than required, +blank axes are not drawn. Similar to a NumPy array's ``reshape`` method, you +can use ``-1`` for one dimension to automatically calculate the number of rows +or columns needed, given the other. + +.. ipython:: python + + @savefig frame_plot_subplots_layout.png + df.plot(subplots=True, layout=(2, 3), figsize=(6, 6), sharex=False); + +.. ipython:: python + :suppress: + + plt.close('all') + +The above example is identical to using: + +.. ipython:: python + + df.plot(subplots=True, layout=(2, -1), figsize=(6, 6), sharex=False); + +.. ipython:: python + :suppress: + + plt.close('all') + +The required number of columns (3) is inferred from the number of series to plot +and the given number of rows (2). + +You can pass multiple axes created beforehand as list-like via ``ax`` keyword. +This allows more complicated layouts. +The passed axes must be the same number as the subplots being drawn. + +When multiple axes are passed via the ``ax`` keyword, ``layout``, ``sharex`` and ``sharey`` keywords +don't affect to the output. You should explicitly pass ``sharex=False`` and ``sharey=False``, +otherwise you will see a warning. + +.. ipython:: python + + fig, axes = plt.subplots(4, 4, figsize=(6, 6)) + plt.subplots_adjust(wspace=0.5, hspace=0.5) + target1 = [axes[0][0], axes[1][1], axes[2][2], axes[3][3]] + target2 = [axes[3][0], axes[2][1], axes[1][2], axes[0][3]] + + df.plot(subplots=True, ax=target1, legend=False, sharex=False, sharey=False); + @savefig frame_plot_subplots_multi_ax.png + (-df).plot(subplots=True, ax=target2, legend=False, + sharex=False, sharey=False); + +.. ipython:: python + :suppress: + + plt.close('all') + +Another option is passing an ``ax`` argument to :meth:`Series.plot` to plot on a particular axis: + +.. ipython:: python + :suppress: + + np.random.seed(123456) + ts = pd.Series(np.random.randn(1000), + index=pd.date_range('1/1/2000', periods=1000)) + ts = ts.cumsum() + + df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, + columns=list('ABCD')) + df = df.cumsum() + +.. ipython:: python + :suppress: + + plt.close('all') + +.. ipython:: python + + fig, axes = plt.subplots(nrows=2, ncols=2) + df['A'].plot(ax=axes[0, 0]); + axes[0, 0].set_title('A'); + df['B'].plot(ax=axes[0, 1]); + axes[0, 1].set_title('B'); + df['C'].plot(ax=axes[1, 0]); + axes[1, 0].set_title('C'); + df['D'].plot(ax=axes[1, 1]); + @savefig series_plot_multi.png + axes[1, 1].set_title('D'); + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.errorbars: + +Plotting with error bars +~~~~~~~~~~~~~~~~~~~~~~~~ + +Plotting with error bars is supported in :meth:`DataFrame.plot` and :meth:`Series.plot`. + +Horizontal and vertical error bars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot()`. The error values can be specified using a variety of formats: + +* As a :class:`DataFrame` or ``dict`` of errors with column names matching the ``columns`` attribute of the plotting :class:`DataFrame` or matching the ``name`` attribute of the :class:`Series`. +* As a ``str`` indicating which of the columns of plotting :class:`DataFrame` contain the error values. +* As raw values (``list``, ``tuple``, or ``np.ndarray``). Must be the same length as the plotting :class:`DataFrame`/:class:`Series`. + +Asymmetrical error bars are also supported, however raw error values must be provided in this case. For a ``M`` length :class:`Series`, a ``Mx2`` array should be provided indicating lower and upper (or left and right) errors. For a ``MxN`` :class:`DataFrame`, asymmetrical errors should be in a ``Mx2xN`` array. + +Here is an example of one way to easily plot group means with standard deviations from the raw data. + +.. ipython:: python + + # Generate the data + ix3 = pd.MultiIndex.from_arrays([ + ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'], + ['foo', 'foo', 'bar', 'bar', 'foo', 'foo', 'bar', 'bar']], + names=['letter', 'word']) + + df3 = pd.DataFrame({'data1': [3, 2, 4, 3, 2, 4, 3, 2], + 'data2': [6, 5, 7, 5, 4, 5, 6, 5]}, index=ix3) + + # Group by index labels and take the means and standard deviations + # for each group + gp3 = df3.groupby(level=('letter', 'word')) + means = gp3.mean() + errors = gp3.std() + means + errors + + # Plot + fig, ax = plt.subplots() + @savefig errorbar_example.png + means.plot.bar(yerr=errors, ax=ax, capsize=4) + +.. ipython:: python + :suppress: + + plt.close('all') + +.. _visualization.table: + +Plotting tables +~~~~~~~~~~~~~~~ + +Plotting with matplotlib table is now supported in :meth:`DataFrame.plot` and :meth:`Series.plot` with a ``table`` keyword. The ``table`` keyword can accept ``bool``, :class:`DataFrame` or :class:`Series`. The simple way to draw a table is to specify ``table=True``. Data will be transposed to meet matplotlib's default layout. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + fig, ax = plt.subplots(1, 1) + df = pd.DataFrame(np.random.rand(5, 3), columns=['a', 'b', 'c']) + ax.get_xaxis().set_visible(False) # Hide Ticks + + @savefig line_plot_table_true.png + df.plot(table=True, ax=ax) + +.. ipython:: python + :suppress: + + plt.close('all') + +Also, you can pass a different :class:`DataFrame` or :class:`Series` to the +``table`` keyword. The data will be drawn as displayed in print method +(not transposed automatically). If required, it should be transposed manually +as seen in the example below. + +.. ipython:: python + + fig, ax = plt.subplots(1, 1) + ax.get_xaxis().set_visible(False) # Hide Ticks + @savefig line_plot_table_data.png + df.plot(table=np.round(df.T, 2), ax=ax) + +.. ipython:: python + :suppress: + + plt.close('all') + +There also exists a helper function ``pandas.plotting.table``, which creates a +table from :class:`DataFrame` or :class:`Series`, and adds it to an +``matplotlib.Axes`` instance. This function can accept keywords which the +matplotlib `table `__ has. + +.. ipython:: python + + from pandas.plotting import table + fig, ax = plt.subplots(1, 1) + + table(ax, np.round(df.describe(), 2), + loc='upper right', colWidths=[0.2, 0.2, 0.2]) + + @savefig line_plot_table_describe.png + df.plot(ax=ax, ylim=(0, 2), legend=None) + +.. ipython:: python + :suppress: + + plt.close('all') + +**Note**: You can get table instances on the axes using ``axes.tables`` property for further decorations. See the `matplotlib table documentation `__ for more. + +.. _visualization.colormaps: + +Colormaps +~~~~~~~~~ + +A potential issue when plotting a large number of columns is that it can be +difficult to distinguish some series due to repetition in the default colors. To +remedy this, ``DataFrame`` plotting supports the use of the ``colormap`` argument, +which accepts either a Matplotlib `colormap `__ +or a string that is a name of a colormap registered with Matplotlib. A +visualization of the default matplotlib colormaps is available `here +`__. + +As matplotlib does not directly support colormaps for line-based plots, the +colors are selected based on an even spacing determined by the number of columns +in the ``DataFrame``. There is no consideration made for background color, so some +colormaps will produce lines that are not easily visible. + +To use the cubehelix colormap, we can pass ``colormap='cubehelix'``. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(1000, 10), index=ts.index) + df = df.cumsum() + + plt.figure() + + @savefig cubehelix.png + df.plot(colormap='cubehelix') + +.. ipython:: python + :suppress: + + plt.close('all') + +Alternatively, we can pass the colormap itself: + +.. ipython:: python + + from matplotlib import cm + + plt.figure() + + @savefig cubehelix_cm.png + df.plot(colormap=cm.cubehelix) + +.. ipython:: python + :suppress: + + plt.close('all') + +Colormaps can also be used other plot types, like bar charts: + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + dd = pd.DataFrame(np.random.randn(10, 10)).applymap(abs) + dd = dd.cumsum() + + plt.figure() + + @savefig greens.png + dd.plot.bar(colormap='Greens') + +.. ipython:: python + :suppress: + + plt.close('all') + +Parallel coordinates charts: + +.. ipython:: python + + plt.figure() + + @savefig parallel_gist_rainbow.png + parallel_coordinates(data, 'Name', colormap='gist_rainbow') + +.. ipython:: python + :suppress: + + plt.close('all') + +Andrews curves charts: + +.. ipython:: python + + plt.figure() + + @savefig andrews_curve_winter.png + andrews_curves(data, 'Name', colormap='winter') + +.. ipython:: python + :suppress: + + plt.close('all') + +Plotting directly with matplotlib +--------------------------------- + +In some situations it may still be preferable or necessary to prepare plots +directly with matplotlib, for instance when a certain type of plot or +customization is not (yet) supported by pandas. ``Series`` and ``DataFrame`` +objects behave like arrays and can therefore be passed directly to +matplotlib functions without explicit casts. + +pandas also automatically registers formatters and locators that recognize date +indices, thereby extending date and time support to practically all plot types +available in matplotlib. Although this formatting does not provide the same +level of refinement you would get when plotting via pandas, it can be faster +when plotting a large number of points. + +.. ipython:: python + :suppress: + + np.random.seed(123456) + +.. ipython:: python + + price = pd.Series(np.random.randn(150).cumsum(), + index=pd.date_range('2000-1-1', periods=150, freq='B')) + ma = price.rolling(20).mean() + mstd = price.rolling(20).std() + + plt.figure() + + plt.plot(price.index, price, 'k') + plt.plot(ma.index, ma, 'b') + @savefig bollinger.png + plt.fill_between(mstd.index, ma - 2 * mstd, ma + 2 * mstd, + color='b', alpha=0.2) + +.. ipython:: python + :suppress: + + plt.close('all') diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst new file mode 100644 index 00000000..dec0807b --- /dev/null +++ b/doc/source/whatsnew/index.rst @@ -0,0 +1,229 @@ +.. _release: + +{{ header }} + +************* +Release Notes +************* + +This is the list of changes to pandas between each release. For full details, +see the commit logs at http://github.com/pandas-dev/pandas. For install and +upgrade instructions, see :ref:`install`. + +Version 1.0 +----------- + +.. toctree:: + :maxdepth: 2 + + v1.0.5 + v1.0.4 + v1.0.3 + v1.0.2 + v1.0.1 + v1.0.0 + +Version 0.25 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.25.3 + v0.25.2 + v0.25.1 + v0.25.0 + +Version 0.24 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.24.2 + v0.24.1 + v0.24.0 + +Version 0.23 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.23.4 + v0.23.3 + v0.23.2 + v0.23.1 + v0.23.0 + +Version 0.22 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.22.0 + +Version 0.21 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.21.1 + v0.21.0 + +Version 0.20 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.20.3 + v0.20.2 + v0.20.0 + +Version 0.19 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.19.2 + v0.19.1 + v0.19.0 + +Version 0.18 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.18.1 + v0.18.0 + +Version 0.17 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.17.1 + v0.17.0 + +Version 0.16 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.16.2 + v0.16.1 + v0.16.0 + +Version 0.15 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.15.2 + v0.15.1 + v0.15.0 + +Version 0.14 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.14.1 + v0.14.0 + +Version 0.13 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.13.1 + v0.13.0 + +Version 0.12 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.12.0 + +Version 0.11 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.11.0 + +Version 0.10 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.10.1 + v0.10.0 + +Version 0.9 +----------- + +.. toctree:: + :maxdepth: 2 + + v0.9.1 + v0.9.0 + +Version 0.8 +------------ + +.. toctree:: + :maxdepth: 2 + + v0.8.1 + v0.8.0 + +Version 0.7 +----------- + +.. toctree:: + :maxdepth: 2 + + v0.7.3 + v0.7.2 + v0.7.1 + v0.7.0 + +Version 0.6 +----------- + +.. toctree:: + :maxdepth: 2 + + v0.6.1 + v0.6.0 + +Version 0.5 +----------- + +.. toctree:: + :maxdepth: 2 + + v0.5.0 + +Version 0.4 +----------- + +.. toctree:: + :maxdepth: 2 + + v0.4.x diff --git a/doc/source/whatsnew/v0.10.0.rst b/doc/source/whatsnew/v0.10.0.rst new file mode 100644 index 00000000..2e044236 --- /dev/null +++ b/doc/source/whatsnew/v0.10.0.rst @@ -0,0 +1,531 @@ +.. _whatsnew_0100: + +v0.10.0 (December 17, 2012) +--------------------------- + +{{ header }} + + +This is a major release from 0.9.1 and includes many new features and +enhancements along with a large number of bug fixes. There are also a number of +important API changes that long-time pandas users should pay close attention +to. + +File parsing new features +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The delimited file parsing engine (the guts of ``read_csv`` and ``read_table``) +has been rewritten from the ground up and now uses a fraction the amount of +memory while parsing, while being 40% or more faster in most use cases (in some +cases much faster). + +There are also many new features: + +- Much-improved Unicode handling via the ``encoding`` option. +- Column filtering (``usecols``) +- Dtype specification (``dtype`` argument) +- Ability to specify strings to be recognized as True/False +- Ability to yield NumPy record arrays (``as_recarray``) +- High performance ``delim_whitespace`` option +- Decimal format (e.g. European format) specification +- Easier CSV dialect options: ``escapechar``, ``lineterminator``, + ``quotechar``, etc. +- More robust handling of many exceptional kinds of files observed in the wild + +API changes +~~~~~~~~~~~ + +**Deprecated DataFrame BINOP TimeSeries special case behavior** + +The default behavior of binary operations between a DataFrame and a Series has +always been to align on the DataFrame's columns and broadcast down the rows, +**except** in the special case that the DataFrame contains time series. Since +there are now method for each binary operator enabling you to specify how you +want to broadcast, we are phasing out this special case (Zen of Python: +*Special cases aren't special enough to break the rules*). Here's what I'm +talking about: + +.. ipython:: python + :okwarning: + + import pandas as pd + df = pd.DataFrame(np.random.randn(6, 4), + index=pd.date_range('1/1/2000', periods=6)) + df + # deprecated now + df - df[0] + # Change your code to + df.sub(df[0], axis=0) # align on axis 0 (rows) + +You will get a deprecation warning in the 0.10.x series, and the deprecated +functionality will be removed in 0.11 or later. + +**Altered resample default behavior** + +The default time series ``resample`` binning behavior of daily ``D`` and +*higher* frequencies has been changed to ``closed='left', label='left'``. Lower +nfrequencies are unaffected. The prior defaults were causing a great deal of +confusion for users, especially resampling data to daily frequency (which +labeled the aggregated group with the end of the interval: the next day). + +.. code-block:: ipython + + In [1]: dates = pd.date_range('1/1/2000', '1/5/2000', freq='4h') + + In [2]: series = pd.Series(np.arange(len(dates)), index=dates) + + In [3]: series + Out[3]: + 2000-01-01 00:00:00 0 + 2000-01-01 04:00:00 1 + 2000-01-01 08:00:00 2 + 2000-01-01 12:00:00 3 + 2000-01-01 16:00:00 4 + 2000-01-01 20:00:00 5 + 2000-01-02 00:00:00 6 + 2000-01-02 04:00:00 7 + 2000-01-02 08:00:00 8 + 2000-01-02 12:00:00 9 + 2000-01-02 16:00:00 10 + 2000-01-02 20:00:00 11 + 2000-01-03 00:00:00 12 + 2000-01-03 04:00:00 13 + 2000-01-03 08:00:00 14 + 2000-01-03 12:00:00 15 + 2000-01-03 16:00:00 16 + 2000-01-03 20:00:00 17 + 2000-01-04 00:00:00 18 + 2000-01-04 04:00:00 19 + 2000-01-04 08:00:00 20 + 2000-01-04 12:00:00 21 + 2000-01-04 16:00:00 22 + 2000-01-04 20:00:00 23 + 2000-01-05 00:00:00 24 + Freq: 4H, dtype: int64 + + In [4]: series.resample('D', how='sum') + Out[4]: + 2000-01-01 15 + 2000-01-02 51 + 2000-01-03 87 + 2000-01-04 123 + 2000-01-05 24 + Freq: D, dtype: int64 + + In [5]: # old behavior + In [6]: series.resample('D', how='sum', closed='right', label='right') + Out[6]: + 2000-01-01 0 + 2000-01-02 21 + 2000-01-03 57 + 2000-01-04 93 + 2000-01-05 129 + Freq: D, dtype: int64 + +- Infinity and negative infinity are no longer treated as NA by ``isnull`` and + ``notnull``. That they ever were was a relic of early pandas. This behavior + can be re-enabled globally by the ``mode.use_inf_as_null`` option: + +.. code-block:: ipython + + In [6]: s = pd.Series([1.5, np.inf, 3.4, -np.inf]) + + In [7]: pd.isnull(s) + Out[7]: + 0 False + 1 False + 2 False + 3 False + Length: 4, dtype: bool + + In [8]: s.fillna(0) + Out[8]: + 0 1.500000 + 1 inf + 2 3.400000 + 3 -inf + Length: 4, dtype: float64 + + In [9]: pd.set_option('use_inf_as_null', True) + + In [10]: pd.isnull(s) + Out[10]: + 0 False + 1 True + 2 False + 3 True + Length: 4, dtype: bool + + In [11]: s.fillna(0) + Out[11]: + 0 1.5 + 1 0.0 + 2 3.4 + 3 0.0 + Length: 4, dtype: float64 + + In [12]: pd.reset_option('use_inf_as_null') + +- Methods with the ``inplace`` option now all return ``None`` instead of the + calling object. E.g. code written like ``df = df.fillna(0, inplace=True)`` + may stop working. To fix, simply delete the unnecessary variable assignment. + +- ``pandas.merge`` no longer sorts the group keys (``sort=False``) by + default. This was done for performance reasons: the group-key sorting is + often one of the more expensive parts of the computation and is often + unnecessary. + +- The default column names for a file with no header have been changed to the + integers ``0`` through ``N - 1``. This is to create consistency with the + DataFrame constructor with no columns specified. The v0.9.0 behavior (names + ``X0``, ``X1``, ...) can be reproduced by specifying ``prefix='X'``: + +.. ipython:: python + + import io + + data = ('a,b,c\n' + '1,Yes,2\n' + '3,No,4') + print(data) + pd.read_csv(io.StringIO(data), header=None) + pd.read_csv(io.StringIO(data), header=None, prefix='X') + +- Values like ``'Yes'`` and ``'No'`` are not interpreted as boolean by default, + though this can be controlled by new ``true_values`` and ``false_values`` + arguments: + +.. ipython:: python + + print(data) + pd.read_csv(io.StringIO(data)) + pd.read_csv(io.StringIO(data), true_values=['Yes'], false_values=['No']) + +- The file parsers will not recognize non-string values arising from a + converter function as NA if passed in the ``na_values`` argument. It's better + to do post-processing using the ``replace`` function instead. + +- Calling ``fillna`` on Series or DataFrame with no arguments is no longer + valid code. You must either specify a fill value or an interpolation method: + +.. ipython:: python + + s = pd.Series([np.nan, 1., 2., np.nan, 4]) + s + s.fillna(0) + s.fillna(method='pad') + +Convenience methods ``ffill`` and ``bfill`` have been added: + +.. ipython:: python + + s.ffill() + + +- ``Series.apply`` will now operate on a returned value from the applied + function, that is itself a series, and possibly upcast the result to a + DataFrame + + .. ipython:: python + + def f(x): + return pd.Series([x, x**2], index=['x', 'x^2']) + + s = pd.Series(np.random.rand(5)) + s + s.apply(f) + +- New API functions for working with pandas options (:issue:`2097`): + + - ``get_option`` / ``set_option`` - get/set the value of an option. Partial + names are accepted. - ``reset_option`` - reset one or more options to + their default value. Partial names are accepted. - ``describe_option`` - + print a description of one or more options. When called with no + arguments. print all registered options. + + Note: ``set_printoptions``/ ``reset_printoptions`` are now deprecated (but + functioning), the print options now live under "display.XYZ". For example: + + .. ipython:: python + + pd.get_option("display.max_rows") + +- to_string() methods now always return unicode strings (:issue:`2224`). + +New features +~~~~~~~~~~~~ + +Wide DataFrame printing +~~~~~~~~~~~~~~~~~~~~~~~ + +Instead of printing the summary information, pandas now splits the string +representation across multiple rows by default: + +.. ipython:: python + + wide_frame = pd.DataFrame(np.random.randn(5, 16)) + + wide_frame + +The old behavior of printing out summary information can be achieved via the +'expand_frame_repr' print option: + +.. ipython:: python + + pd.set_option('expand_frame_repr', False) + + wide_frame + +.. ipython:: python + :suppress: + + pd.reset_option('expand_frame_repr') + +The width of each line can be changed via 'line_width' (80 by default): + +.. code-block:: python + + pd.set_option('line_width', 40) + + wide_frame + + +Updated PyTables support +~~~~~~~~~~~~~~~~~~~~~~~~ + +:ref:`Docs ` for PyTables ``Table`` format & several enhancements to the api. Here is a taste of what to expect. + +.. code-block:: ipython + + In [41]: store = pd.HDFStore('store.h5') + + In [42]: df = pd.DataFrame(np.random.randn(8, 3), + ....: index=pd.date_range('1/1/2000', periods=8), + ....: columns=['A', 'B', 'C']) + + In [43]: df + Out[43]: + A B C + 2000-01-01 -2.036047 0.000830 -0.955697 + 2000-01-02 -0.898872 -0.725411 0.059904 + 2000-01-03 -0.449644 1.082900 -1.221265 + 2000-01-04 0.361078 1.330704 0.855932 + 2000-01-05 -1.216718 1.488887 0.018993 + 2000-01-06 -0.877046 0.045976 0.437274 + 2000-01-07 -0.567182 -0.888657 -0.556383 + 2000-01-08 0.655457 1.117949 -2.782376 + + [8 rows x 3 columns] + + # appending data frames + In [44]: df1 = df[0:4] + + In [45]: df2 = df[4:] + + In [46]: store.append('df', df1) + + In [47]: store.append('df', df2) + + In [48]: store + Out[48]: + + File path: store.h5 + /df frame_table (typ->appendable,nrows->8,ncols->3,indexers->[index]) + + # selecting the entire store + In [49]: store.select('df') + Out[49]: + A B C + 2000-01-01 -2.036047 0.000830 -0.955697 + 2000-01-02 -0.898872 -0.725411 0.059904 + 2000-01-03 -0.449644 1.082900 -1.221265 + 2000-01-04 0.361078 1.330704 0.855932 + 2000-01-05 -1.216718 1.488887 0.018993 + 2000-01-06 -0.877046 0.045976 0.437274 + 2000-01-07 -0.567182 -0.888657 -0.556383 + 2000-01-08 0.655457 1.117949 -2.782376 + + [8 rows x 3 columns] + +.. code-block:: ipython + + In [50]: wp = pd.Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], + ....: major_axis=pd.date_range('1/1/2000', periods=5), + ....: minor_axis=['A', 'B', 'C', 'D']) + + In [51]: wp + Out[51]: + + Dimensions: 2 (items) x 5 (major_axis) x 4 (minor_axis) + Items axis: Item1 to Item2 + Major_axis axis: 2000-01-01 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D + + # storing a panel + In [52]: store.append('wp', wp) + + # selecting via A QUERY + In [53]: store.select('wp', [pd.Term('major_axis>20000102'), + ....: pd.Term('minor_axis', '=', ['A', 'B'])]) + ....: + Out[53]: + + Dimensions: 2 (items) x 3 (major_axis) x 2 (minor_axis) + Items axis: Item1 to Item2 + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to B + + # removing data from tables + In [54]: store.remove('wp', pd.Term('major_axis>20000103')) + Out[54]: 8 + + In [55]: store.select('wp') + Out[55]: + + Dimensions: 2 (items) x 3 (major_axis) x 4 (minor_axis) + Items axis: Item1 to Item2 + Major_axis axis: 2000-01-01 00:00:00 to 2000-01-03 00:00:00 + Minor_axis axis: A to D + + # deleting a store + In [56]: del store['df'] + + In [57]: store + Out[57]: + + File path: store.h5 + /wp wide_table (typ->appendable,nrows->12,ncols->2,indexers->[major_axis,minor_axis]) + + +**Enhancements** + +- added ability to hierarchical keys + + .. code-block:: ipython + + In [58]: store.put('foo/bar/bah', df) + + In [59]: store.append('food/orange', df) + + In [60]: store.append('food/apple', df) + + In [61]: store + Out[61]: + + File path: store.h5 + /foo/bar/bah frame (shape->[8,3]) + /food/apple frame_table (typ->appendable,nrows->8,ncols->3,indexers->[index]) + /food/orange frame_table (typ->appendable,nrows->8,ncols->3,indexers->[index]) + /wp wide_table (typ->appendable,nrows->12,ncols->2,indexers->[major_axis,minor_axis]) + + # remove all nodes under this level + In [62]: store.remove('food') + + In [63]: store + Out[63]: + + File path: store.h5 + /foo/bar/bah frame (shape->[8,3]) + /wp wide_table (typ->appendable,nrows->12,ncols->2,indexers->[major_axis,minor_axis]) + +- added mixed-dtype support! + + .. code-block:: ipython + + In [64]: df['string'] = 'string' + + In [65]: df['int'] = 1 + + In [66]: store.append('df', df) + + In [67]: df1 = store.select('df') + + In [68]: df1 + Out[68]: + A B C string int + 2000-01-01 -2.036047 0.000830 -0.955697 string 1 + 2000-01-02 -0.898872 -0.725411 0.059904 string 1 + 2000-01-03 -0.449644 1.082900 -1.221265 string 1 + 2000-01-04 0.361078 1.330704 0.855932 string 1 + 2000-01-05 -1.216718 1.488887 0.018993 string 1 + 2000-01-06 -0.877046 0.045976 0.437274 string 1 + 2000-01-07 -0.567182 -0.888657 -0.556383 string 1 + 2000-01-08 0.655457 1.117949 -2.782376 string 1 + + [8 rows x 5 columns] + + In [69]: df1.get_dtype_counts() + Out[69]: + float64 3 + int64 1 + object 1 + dtype: int64 + +- performance improvements on table writing +- support for arbitrarily indexed dimensions +- ``SparseSeries`` now has a ``density`` property (:issue:`2384`) +- enable ``Series.str.strip/lstrip/rstrip`` methods to take an input argument + to strip arbitrary characters (:issue:`2411`) +- implement ``value_vars`` in ``melt`` to limit values to certain columns + and add ``melt`` to pandas namespace (:issue:`2412`) + +**Bug Fixes** + +- added ``Term`` method of specifying where conditions (:issue:`1996`). +- ``del store['df']`` now call ``store.remove('df')`` for store deletion +- deleting of consecutive rows is much faster than before +- ``min_itemsize`` parameter can be specified in table creation to force a + minimum size for indexing columns (the previous implementation would set the + column size based on the first append) +- indexing support via ``create_table_index`` (requires PyTables >= 2.3) + (:issue:`698`). +- appending on a store would fail if the table was not first created via ``put`` +- fixed issue with missing attributes after loading a pickled dataframe (GH2431) +- minor change to select and remove: require a table ONLY if where is also + provided (and not None) + +**Compatibility** + +0.10 of ``HDFStore`` is backwards compatible for reading tables created in a prior version of pandas, +however, query terms using the prior (undocumented) methodology are unsupported. You must read in the entire +file and write it out using the new format to take advantage of the updates. + +N dimensional Panels (experimental) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Adding experimental support for Panel4D and factory functions to create n-dimensional named panels. +Here is a taste of what to expect. + +.. code-block:: ipython + + In [58]: p4d = Panel4D(np.random.randn(2, 2, 5, 4), + ....: labels=['Label1','Label2'], + ....: items=['Item1', 'Item2'], + ....: major_axis=date_range('1/1/2000', periods=5), + ....: minor_axis=['A', 'B', 'C', 'D']) + ....: + + In [59]: p4d + Out[59]: + + Dimensions: 2 (labels) x 2 (items) x 5 (major_axis) x 4 (minor_axis) + Labels axis: Label1 to Label2 + Items axis: Item1 to Item2 + Major_axis axis: 2000-01-01 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D + + + + + +See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + + +.. _whatsnew_0.10.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.9.0..v0.10.0 diff --git a/doc/source/whatsnew/v0.10.1.rst b/doc/source/whatsnew/v0.10.1.rst new file mode 100644 index 00000000..c4251f70 --- /dev/null +++ b/doc/source/whatsnew/v0.10.1.rst @@ -0,0 +1,262 @@ +.. _whatsnew_0101: + +v0.10.1 (January 22, 2013) +--------------------------- + +{{ header }} + + +This is a minor release from 0.10.0 and includes new features, enhancements, +and bug fixes. In particular, there is substantial new HDFStore functionality +contributed by Jeff Reback. + +An undesired API breakage with functions taking the ``inplace`` option has been +reverted and deprecation warnings added. + +API changes +~~~~~~~~~~~ + +- Functions taking an ``inplace`` option return the calling object as before. A + deprecation message has been added +- Groupby aggregations Max/Min no longer exclude non-numeric data (:issue:`2700`) +- Resampling an empty DataFrame now returns an empty DataFrame instead of + raising an exception (:issue:`2640`) +- The file reader will now raise an exception when NA values are found in an + explicitly specified integer column instead of converting the column to float + (:issue:`2631`) +- DatetimeIndex.unique now returns a DatetimeIndex with the same name and +- timezone instead of an array (:issue:`2563`) + +New features +~~~~~~~~~~~~ + +- MySQL support for database (contribution from Dan Allan) + +HDFStore +~~~~~~~~ + +You may need to upgrade your existing data files. Please visit the +**compatibility** section in the main docs. + + +.. ipython:: python + :suppress: + :okexcept: + + import os + + os.remove('store.h5') + +You can designate (and index) certain columns that you want to be able to +perform queries on a table, by passing a list to ``data_columns`` + +.. ipython:: python + + store = pd.HDFStore('store.h5') + df = pd.DataFrame(np.random.randn(8, 3), + index=pd.date_range('1/1/2000', periods=8), + columns=['A', 'B', 'C']) + df['string'] = 'foo' + df.loc[df.index[4:6], 'string'] = np.nan + df.loc[df.index[7:9], 'string'] = 'bar' + df['string2'] = 'cool' + df + + # on-disk operations + store.append('df', df, data_columns=['B', 'C', 'string', 'string2']) + store.select('df', "B>0 and string=='foo'") + + # this is in-memory version of this type of selection + df[(df.B > 0) & (df.string == 'foo')] + +Retrieving unique values in an indexable or data column. + +.. code-block:: python + + # note that this is deprecated as of 0.14.0 + # can be replicated by: store.select_column('df','index').unique() + store.unique('df', 'index') + store.unique('df', 'string') + +You can now store ``datetime64`` in data columns + +.. ipython:: python + + df_mixed = df.copy() + df_mixed['datetime64'] = pd.Timestamp('20010102') + df_mixed.loc[df_mixed.index[3:4], ['A', 'B']] = np.nan + + store.append('df_mixed', df_mixed) + df_mixed1 = store.select('df_mixed') + df_mixed1 + df_mixed1.dtypes.value_counts() + +You can pass ``columns`` keyword to select to filter a list of the return +columns, this is equivalent to passing a +``Term('columns',list_of_columns_to_filter)`` + +.. ipython:: python + + store.select('df', columns=['A', 'B']) + +``HDFStore`` now serializes MultiIndex dataframes when appending tables. + +.. code-block:: ipython + + In [19]: index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ....: ['one', 'two', 'three']], + ....: labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + ....: [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + ....: names=['foo', 'bar']) + ....: + + In [20]: df = pd.DataFrame(np.random.randn(10, 3), index=index, + ....: columns=['A', 'B', 'C']) + ....: + + In [21]: df + Out[21]: + A B C + foo bar + foo one -0.116619 0.295575 -1.047704 + two 1.640556 1.905836 2.772115 + three 0.088787 -1.144197 -0.633372 + bar one 0.925372 -0.006438 -0.820408 + two -0.600874 -1.039266 0.824758 + baz two -0.824095 -0.337730 -0.927764 + three -0.840123 0.248505 -0.109250 + qux one 0.431977 -0.460710 0.336505 + two -3.207595 -1.535854 0.409769 + three -0.673145 -0.741113 -0.110891 + + In [22]: store.append('mi', df) + + In [23]: store.select('mi') + Out[23]: + A B C + foo bar + foo one -0.116619 0.295575 -1.047704 + two 1.640556 1.905836 2.772115 + three 0.088787 -1.144197 -0.633372 + bar one 0.925372 -0.006438 -0.820408 + two -0.600874 -1.039266 0.824758 + baz two -0.824095 -0.337730 -0.927764 + three -0.840123 0.248505 -0.109250 + qux one 0.431977 -0.460710 0.336505 + two -3.207595 -1.535854 0.409769 + three -0.673145 -0.741113 -0.110891 + + # the levels are automatically included as data columns + In [24]: store.select('mi', "foo='bar'") + Out[24]: + A B C + foo bar + bar one 0.925372 -0.006438 -0.820408 + two -0.600874 -1.039266 0.824758 + +Multi-table creation via ``append_to_multiple`` and selection via +``select_as_multiple`` can create/select from multiple tables and return a +combined result, by using ``where`` on a selector table. + +.. ipython:: python + + df_mt = pd.DataFrame(np.random.randn(8, 6), + index=pd.date_range('1/1/2000', periods=8), + columns=['A', 'B', 'C', 'D', 'E', 'F']) + df_mt['foo'] = 'bar' + + # you can also create the tables individually + store.append_to_multiple({'df1_mt': ['A', 'B'], 'df2_mt': None}, + df_mt, selector='df1_mt') + store + + # individual tables were created + store.select('df1_mt') + store.select('df2_mt') + + # as a multiple + store.select_as_multiple(['df1_mt', 'df2_mt'], where=['A>0', 'B>0'], + selector='df1_mt') + +.. ipython:: python + :suppress: + + store.close() + os.remove('store.h5') + +**Enhancements** + +- ``HDFStore`` now can read native PyTables table format tables + +- You can pass ``nan_rep = 'my_nan_rep'`` to append, to change the default nan + representation on disk (which converts to/from `np.nan`), this defaults to + `nan`. + +- You can pass ``index`` to ``append``. This defaults to ``True``. This will + automagically create indices on the *indexables* and *data columns* of the + table + +- You can pass ``chunksize=an integer`` to ``append``, to change the writing + chunksize (default is 50000). This will significantly lower your memory usage + on writing. + +- You can pass ``expectedrows=an integer`` to the first ``append``, to set the + TOTAL number of expected rows that ``PyTables`` will expected. This will + optimize read/write performance. + +- ``Select`` now supports passing ``start`` and ``stop`` to provide selection + space limiting in selection. + +- Greatly improved ISO8601 (e.g., yyyy-mm-dd) date parsing for file parsers (:issue:`2698`) +- Allow ``DataFrame.merge`` to handle combinatorial sizes too large for 64-bit + integer (:issue:`2690`) +- Series now has unary negation (-series) and inversion (~series) operators (:issue:`2686`) +- DataFrame.plot now includes a ``logx`` parameter to change the x-axis to log scale (:issue:`2327`) +- Series arithmetic operators can now handle constant and ndarray input (:issue:`2574`) +- ExcelFile now takes a ``kind`` argument to specify the file type (:issue:`2613`) +- A faster implementation for Series.str methods (:issue:`2602`) + +**Bug Fixes** + +- ``HDFStore`` tables can now store ``float32`` types correctly (cannot be + mixed with ``float64`` however) +- Fixed Google Analytics prefix when specifying request segment (:issue:`2713`). +- Function to reset Google Analytics token store so users can recover from + improperly setup client secrets (:issue:`2687`). +- Fixed groupby bug resulting in segfault when passing in MultiIndex (:issue:`2706`) +- Fixed bug where passing a Series with datetime64 values into `to_datetime` + results in bogus output values (:issue:`2699`) +- Fixed bug in ``pattern in HDFStore`` expressions when pattern is not a valid + regex (:issue:`2694`) +- Fixed performance issues while aggregating boolean data (:issue:`2692`) +- When given a boolean mask key and a Series of new values, Series __setitem__ + will now align the incoming values with the original Series (:issue:`2686`) +- Fixed MemoryError caused by performing counting sort on sorting MultiIndex + levels with a very large number of combinatorial values (:issue:`2684`) +- Fixed bug that causes plotting to fail when the index is a DatetimeIndex with + a fixed-offset timezone (:issue:`2683`) +- Corrected business day subtraction logic when the offset is more than 5 bdays + and the starting date is on a weekend (:issue:`2680`) +- Fixed C file parser behavior when the file has more columns than data + (:issue:`2668`) +- Fixed file reader bug that misaligned columns with data in the presence of an + implicit column and a specified `usecols` value +- DataFrames with numerical or datetime indices are now sorted prior to + plotting (:issue:`2609`) +- Fixed DataFrame.from_records error when passed columns, index, but empty + records (:issue:`2633`) +- Several bug fixed for Series operations when dtype is datetime64 (:issue:`2689`, + :issue:`2629`, :issue:`2626`) + + +See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + + +.. _whatsnew_0.10.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.10.0..v0.10.1 diff --git a/doc/source/whatsnew/v0.11.0.rst b/doc/source/whatsnew/v0.11.0.rst new file mode 100644 index 00000000..148ee349 --- /dev/null +++ b/doc/source/whatsnew/v0.11.0.rst @@ -0,0 +1,462 @@ +.. _whatsnew_0110: + +v0.11.0 (April 22, 2013) +------------------------ + +{{ header }} + + +This is a major release from 0.10.1 and includes many new features and +enhancements along with a large number of bug fixes. The methods of Selecting +Data have had quite a number of additions, and Dtype support is now full-fledged. +There are also a number of important API changes that long-time pandas users should +pay close attention to. + +There is a new section in the documentation, :ref:`10 Minutes to Pandas <10min>`, +primarily geared to new users. + +There is a new section in the documentation, :ref:`Cookbook `, a collection +of useful recipes in pandas (and that we want contributions!). + +There are several libraries that are now :ref:`Recommended Dependencies ` + +Selection choices +~~~~~~~~~~~~~~~~~ + +Starting in 0.11.0, object selection has had a number of user-requested additions in +order to support more explicit location based indexing. Pandas now supports +three types of multi-axis indexing. + +- ``.loc`` is strictly label based, will raise ``KeyError`` when the items are not found, allowed inputs are: + + - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is interpreted as a *label* of the index. This use is **not** an integer position along the index) + - A list or array of labels ``['a', 'b', 'c']`` + - A slice object with labels ``'a':'f'``, (note that contrary to usual python slices, **both** the start and the stop are included!) + - A boolean array + + See more at :ref:`Selection by Label ` + +- ``.iloc`` is strictly integer position based (from ``0`` to ``length-1`` of the axis), will raise ``IndexError`` when the requested indices are out of bounds. Allowed inputs are: + + - An integer e.g. ``5`` + - A list or array of integers ``[4, 3, 0]`` + - A slice object with ints ``1:7`` + - A boolean array + + See more at :ref:`Selection by Position ` + +- ``.ix`` supports mixed integer and label based access. It is primarily label based, but will fallback to integer positional access. ``.ix`` is the most general and will support + any of the inputs to ``.loc`` and ``.iloc``, as well as support for floating point label schemes. ``.ix`` is especially useful when dealing with mixed positional and label + based hierarchical indexes. + + As using integer slices with ``.ix`` have different behavior depending on whether the slice + is interpreted as position based or label based, it's usually better to be + explicit and use ``.iloc`` or ``.loc``. + + See more at :ref:`Advanced Indexing ` and :ref:`Advanced Hierarchical `. + + +Selection deprecations +~~~~~~~~~~~~~~~~~~~~~~ + +Starting in version 0.11.0, these methods *may* be deprecated in future versions. + +- ``irow`` +- ``icol`` +- ``iget_value`` + +See the section :ref:`Selection by Position ` for substitutes. + +Dtypes +~~~~~~ + +Numeric dtypes will propagate and can coexist in DataFrames. If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``, or a passed ``Series``, then it will be preserved in DataFrame operations. Furthermore, different numeric dtypes will **NOT** be combined. The following example will give you a taste. + +.. ipython:: python + + df1 = pd.DataFrame(np.random.randn(8, 1), columns=['A'], dtype='float32') + df1 + df1.dtypes + df2 = pd.DataFrame({'A': pd.Series(np.random.randn(8), dtype='float16'), + 'B': pd.Series(np.random.randn(8)), + 'C': pd.Series(range(8), dtype='uint8')}) + df2 + df2.dtypes + + # here you get some upcasting + df3 = df1.reindex_like(df2).fillna(value=0.0) + df2 + df3 + df3.dtypes + +Dtype conversion +~~~~~~~~~~~~~~~~ + +This is lower-common-denominator upcasting, meaning you get the dtype which can accommodate all of the types + +.. ipython:: python + + df3.values.dtype + +Conversion + +.. ipython:: python + + df3.astype('float32').dtypes + +Mixed conversion + +.. code-block:: ipython + + In [12]: df3['D'] = '1.' + + In [13]: df3['E'] = '1' + + In [14]: df3.convert_objects(convert_numeric=True).dtypes + Out[14]: + A float32 + B float64 + C float64 + D float64 + E int64 + dtype: object + + # same, but specific dtype conversion + In [15]: df3['D'] = df3['D'].astype('float16') + + In [16]: df3['E'] = df3['E'].astype('int32') + + In [17]: df3.dtypes + Out[17]: + A float32 + B float64 + C float64 + D float16 + E int32 + dtype: object + +Forcing date coercion (and setting ``NaT`` when not datelike) + +.. code-block:: ipython + + In [18]: import datetime + + In [19]: s = pd.Series([datetime.datetime(2001, 1, 1, 0, 0), 'foo', 1.0, 1, + ....: pd.Timestamp('20010104'), '20010105'], dtype='O') + ....: + + In [20]: s.convert_objects(convert_dates='coerce') + Out[20]: + 0 2001-01-01 + 1 NaT + 2 NaT + 3 NaT + 4 2001-01-04 + 5 2001-01-05 + dtype: datetime64[ns] + +Dtype gotchas +~~~~~~~~~~~~~ + +**Platform gotchas** + +Starting in 0.11.0, construction of DataFrame/Series will use default dtypes of ``int64`` and ``float64``, +*regardless of platform*. This is not an apparent change from earlier versions of pandas. If you specify +dtypes, they *WILL* be respected, however (:issue:`2837`) + +The following will all result in ``int64`` dtypes + +.. code-block:: ipython + + In [21]: pd.DataFrame([1, 2], columns=['a']).dtypes + Out[21]: + a int64 + dtype: object + + In [22]: pd.DataFrame({'a': [1, 2]}).dtypes + Out[22]: + a int64 + dtype: object + + In [23]: pd.DataFrame({'a': 1}, index=range(2)).dtypes + Out[23]: + a int64 + dtype: object + +Keep in mind that ``DataFrame(np.array([1,2]))`` **WILL** result in ``int32`` on 32-bit platforms! + + +**Upcasting gotchas** + +Performing indexing operations on integer type data can easily upcast the data. +The dtype of the input data will be preserved in cases where ``nans`` are not introduced. + +.. code-block:: ipython + + In [24]: dfi = df3.astype('int32') + + In [25]: dfi['D'] = dfi['D'].astype('int64') + + In [26]: dfi + Out[26]: + A B C D E + 0 0 0 0 1 1 + 1 -2 0 1 1 1 + 2 -2 0 2 1 1 + 3 0 -1 3 1 1 + 4 1 0 4 1 1 + 5 0 0 5 1 1 + 6 0 -1 6 1 1 + 7 0 0 7 1 1 + + In [27]: dfi.dtypes + Out[27]: + A int32 + B int32 + C int32 + D int64 + E int32 + dtype: object + + In [28]: casted = dfi[dfi > 0] + + In [29]: casted + Out[29]: + A B C D E + 0 NaN NaN NaN 1 1 + 1 NaN NaN 1.0 1 1 + 2 NaN NaN 2.0 1 1 + 3 NaN NaN 3.0 1 1 + 4 1.0 NaN 4.0 1 1 + 5 NaN NaN 5.0 1 1 + 6 NaN NaN 6.0 1 1 + 7 NaN NaN 7.0 1 1 + + In [30]: casted.dtypes + Out[30]: + A float64 + B float64 + C float64 + D int64 + E int32 + dtype: object + +While float dtypes are unchanged. + +.. code-block:: ipython + + In [31]: df4 = df3.copy() + + In [32]: df4['A'] = df4['A'].astype('float32') + + In [33]: df4.dtypes + Out[33]: + A float32 + B float64 + C float64 + D float16 + E int32 + dtype: object + + In [34]: casted = df4[df4 > 0] + + In [35]: casted + Out[35]: + A B C D E + 0 NaN NaN NaN 1.0 1 + 1 NaN 0.567020 1.0 1.0 1 + 2 NaN 0.276232 2.0 1.0 1 + 3 NaN NaN 3.0 1.0 1 + 4 1.933792 NaN 4.0 1.0 1 + 5 NaN 0.113648 5.0 1.0 1 + 6 NaN NaN 6.0 1.0 1 + 7 NaN 0.524988 7.0 1.0 1 + + In [36]: casted.dtypes + Out[36]: + A float32 + B float64 + C float64 + D float16 + E int32 + dtype: object + +Datetimes conversion +~~~~~~~~~~~~~~~~~~~~ + +Datetime64[ns] columns in a DataFrame (or a Series) allow the use of ``np.nan`` to indicate a nan value, +in addition to the traditional ``NaT``, or not-a-time. This allows convenient nan setting in a generic way. +Furthermore ``datetime64[ns]`` columns are created by default, when passed datetimelike objects (*this change was introduced in 0.10.1*) +(:issue:`2809`, :issue:`2810`) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(6, 2), pd.date_range('20010102', periods=6), + columns=['A', ' B']) + df['timestamp'] = pd.Timestamp('20010103') + df + + # datetime64[ns] out of the box + df.dtypes.value_counts() + + # use the traditional nan, which is mapped to NaT internally + df.loc[df.index[2:4], ['A', 'timestamp']] = np.nan + df + +Astype conversion on ``datetime64[ns]`` to ``object``, implicitly converts ``NaT`` to ``np.nan`` + +.. ipython:: python + + s = pd.Series([datetime.datetime(2001, 1, 2, 0, 0) for i in range(3)]) + s.dtype + s[1] = np.nan + s + s.dtype + s = s.astype('O') + s + s.dtype + + +API changes +~~~~~~~~~~~ + + - Added to_series() method to indices, to facilitate the creation of indexers + (:issue:`3275`) + + - ``HDFStore`` + + - added the method ``select_column`` to select a single column from a table as a Series. + - deprecated the ``unique`` method, can be replicated by ``select_column(key,column).unique()`` + - ``min_itemsize`` parameter to ``append`` will now automatically create data_columns for passed keys + +Enhancements +~~~~~~~~~~~~ + + - Improved performance of df.to_csv() by up to 10x in some cases. (:issue:`3059`) + + - Numexpr is now a :ref:`Recommended Dependencies `, to accelerate certain + types of numerical and boolean operations + + - Bottleneck is now a :ref:`Recommended Dependencies `, to accelerate certain + types of ``nan`` operations + + - ``HDFStore`` + + - support ``read_hdf/to_hdf`` API similar to ``read_csv/to_csv`` + + .. ipython:: python + + df = pd.DataFrame({'A': range(5), 'B': range(5)}) + df.to_hdf('store.h5', 'table', append=True) + pd.read_hdf('store.h5', 'table', where=['index > 2']) + + .. ipython:: python + :suppress: + :okexcept: + + import os + + os.remove('store.h5') + + - provide dotted attribute access to ``get`` from stores, e.g. ``store.df == store['df']`` + + - new keywords ``iterator=boolean``, and ``chunksize=number_in_a_chunk`` are + provided to support iteration on ``select`` and ``select_as_multiple`` (:issue:`3076`) + + - You can now select timestamps from an *unordered* timeseries similarly to an *ordered* timeseries (:issue:`2437`) + + - You can now select with a string from a DataFrame with a datelike index, in a similar way to a Series (:issue:`3070`) + + .. ipython:: python + + idx = pd.date_range("2001-10-1", periods=5, freq='M') + ts = pd.Series(np.random.rand(len(idx)), index=idx) + ts['2001'] + + df = pd.DataFrame({'A': ts}) + df['2001'] + + - ``Squeeze`` to possibly remove length 1 dimensions from an object. + + .. code-block:: python + + >>> p = pd.Panel(np.random.randn(3, 4, 4), items=['ItemA', 'ItemB', 'ItemC'], + ... major_axis=pd.date_range('20010102', periods=4), + ... minor_axis=['A', 'B', 'C', 'D']) + >>> p + + Dimensions: 3 (items) x 4 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2001-01-02 00:00:00 to 2001-01-05 00:00:00 + Minor_axis axis: A to D + + >>> p.reindex(items=['ItemA']).squeeze() + A B C D + 2001-01-02 0.926089 -2.026458 0.501277 -0.204683 + 2001-01-03 -0.076524 1.081161 1.141361 0.479243 + 2001-01-04 0.641817 -0.185352 1.824568 0.809152 + 2001-01-05 0.575237 0.669934 1.398014 -0.399338 + + >>> p.reindex(items=['ItemA'], minor=['B']).squeeze() + 2001-01-02 -2.026458 + 2001-01-03 1.081161 + 2001-01-04 -0.185352 + 2001-01-05 0.669934 + Freq: D, Name: B, dtype: float64 + + - In ``pd.io.data.Options``, + + + Fix bug when trying to fetch data for the current month when already + past expiry. + + Now using lxml to scrape html instead of BeautifulSoup (lxml was faster). + + New instance variables for calls and puts are automatically created + when a method that creates them is called. This works for current month + where the instance variables are simply ``calls`` and ``puts``. Also + works for future expiry months and save the instance variable as + ``callsMMYY`` or ``putsMMYY``, where ``MMYY`` are, respectively, the + month and year of the option's expiry. + + ``Options.get_near_stock_price`` now allows the user to specify the + month for which to get relevant options data. + + ``Options.get_forward_data`` now has optional kwargs ``near`` and + ``above_below``. This allows the user to specify if they would like to + only return forward looking data for options near the current stock + price. This just obtains the data from Options.get_near_stock_price + instead of Options.get_xxx_data() (:issue:`2758`). + + - Cursor coordinate information is now displayed in time-series plots. + + - added option `display.max_seq_items` to control the number of + elements printed per sequence pprinting it. (:issue:`2979`) + + - added option `display.chop_threshold` to control display of small numerical + values. (:issue:`2739`) + + - added option `display.max_info_rows` to prevent verbose_info from being + calculated for frames above 1M rows (configurable). (:issue:`2807`, :issue:`2918`) + + - value_counts() now accepts a "normalize" argument, for normalized + histograms. (:issue:`2710`). + + - DataFrame.from_records now accepts not only dicts but any instance of + the collections.Mapping ABC. + + - added option `display.mpl_style` providing a sleeker visual style + for plots. Based on https://gist.github.com/huyng/816622 (:issue:`3075`). + + - Treat boolean values as integers (values 1 and 0) for numeric + operations. (:issue:`2641`) + + - to_html() now accepts an optional "escape" argument to control reserved + HTML character escaping (enabled by default) and escapes ``&``, in addition + to ``<`` and ``>``. (:issue:`2919`) + +See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + + +.. _whatsnew_0.11.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.10.1..v0.11.0 diff --git a/doc/source/whatsnew/v0.12.0.rst b/doc/source/whatsnew/v0.12.0.rst new file mode 100644 index 00000000..823e177f --- /dev/null +++ b/doc/source/whatsnew/v0.12.0.rst @@ -0,0 +1,518 @@ +.. _whatsnew_0120: + +v0.12.0 (July 24, 2013) +------------------------ + +{{ header }} + + +This is a major release from 0.11.0 and includes several new features and +enhancements along with a large number of bug fixes. + +Highlights include a consistent I/O API naming scheme, routines to read html, +write MultiIndexes to csv files, read & write STATA data files, read & write JSON format +files, Python 3 support for ``HDFStore``, filtering of groupby expressions via ``filter``, and a +revamped ``replace`` routine that accepts regular expressions. + +API changes +~~~~~~~~~~~ + + - The I/O API is now much more consistent with a set of top level ``reader`` functions + accessed like ``pd.read_csv()`` that generally return a ``pandas`` object. + + * ``read_csv`` + * ``read_excel`` + * ``read_hdf`` + * ``read_sql`` + * ``read_json`` + * ``read_html`` + * ``read_stata`` + * ``read_clipboard`` + + The corresponding ``writer`` functions are object methods that are accessed like ``df.to_csv()`` + + * ``to_csv`` + * ``to_excel`` + * ``to_hdf`` + * ``to_sql`` + * ``to_json`` + * ``to_html`` + * ``to_stata`` + * ``to_clipboard`` + + + - Fix modulo and integer division on Series,DataFrames to act similarly to ``float`` dtypes to return + ``np.nan`` or ``np.inf`` as appropriate (:issue:`3590`). This correct a numpy bug that treats ``integer`` + and ``float`` dtypes differently. + + .. ipython:: python + + p = pd.DataFrame({'first': [4, 5, 8], 'second': [0, 0, 3]}) + p % 0 + p % p + p / p + p / 0 + + - Add ``squeeze`` keyword to ``groupby`` to allow reduction from + DataFrame -> Series if groups are unique. This is a Regression from 0.10.1. + We are reverting back to the prior behavior. This means groupby will return the + same shaped objects whether the groups are unique or not. Revert this issue (:issue:`2893`) + with (:issue:`3596`). + + .. ipython:: python + + df2 = pd.DataFrame([{"val1": 1, "val2": 20}, + {"val1": 1, "val2": 19}, + {"val1": 1, "val2": 27}, + {"val1": 1, "val2": 12}]) + + def func(dataf): + return dataf["val2"] - dataf["val2"].mean() + + # squeezing the result frame to a series (because we have unique groups) + df2.groupby("val1", squeeze=True).apply(func) + + # no squeezing (the default, and behavior in 0.10.1) + df2.groupby("val1").apply(func) + + - Raise on ``iloc`` when boolean indexing with a label based indexer mask + e.g. a boolean Series, even with integer labels, will raise. Since ``iloc`` + is purely positional based, the labels on the Series are not alignable (:issue:`3631`) + + This case is rarely used, and there are plenty of alternatives. This preserves the + ``iloc`` API to be *purely* positional based. + + .. ipython:: python + + df = pd.DataFrame(range(5), index=list('ABCDE'), columns=['a']) + mask = (df.a % 2 == 0) + mask + + # this is what you should use + df.loc[mask] + + # this will work as well + df.iloc[mask.values] + + ``df.iloc[mask]`` will raise a ``ValueError`` + + - The ``raise_on_error`` argument to plotting functions is removed. Instead, + plotting functions raise a ``TypeError`` when the ``dtype`` of the object + is ``object`` to remind you to avoid ``object`` arrays whenever possible + and thus you should cast to an appropriate numeric dtype if you need to + plot something. + + - Add ``colormap`` keyword to DataFrame plotting methods. Accepts either a + matplotlib colormap object (ie, matplotlib.cm.jet) or a string name of such + an object (ie, 'jet'). The colormap is sampled to select the color for each + column. Please see :ref:`visualization.colormaps` for more information. + (:issue:`3860`) + + - ``DataFrame.interpolate()`` is now deprecated. Please use + ``DataFrame.fillna()`` and ``DataFrame.replace()`` instead. (:issue:`3582`, + :issue:`3675`, :issue:`3676`) + + - the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are + deprecated + + - ``DataFrame.replace`` 's ``infer_types`` parameter is removed and now + performs conversion by default. (:issue:`3907`) + + - Add the keyword ``allow_duplicates`` to ``DataFrame.insert`` to allow a duplicate column + to be inserted if ``True``, default is ``False`` (same as prior to 0.12) (:issue:`3679`) + - Implement ``__nonzero__`` for ``NDFrame`` objects (:issue:`3691`, :issue:`3696`) + + - IO api + + - added top-level function ``read_excel`` to replace the following, + The original API is deprecated and will be removed in a future version + + .. code-block:: python + + from pandas.io.parsers import ExcelFile + xls = ExcelFile('path_to_file.xls') + xls.parse('Sheet1', index_col=None, na_values=['NA']) + + With + + .. code-block:: python + + import pandas as pd + pd.read_excel('path_to_file.xls', 'Sheet1', index_col=None, na_values=['NA']) + + - added top-level function ``read_sql`` that is equivalent to the following + + .. code-block:: python + + from pandas.io.sql import read_frame + read_frame(...) + + - ``DataFrame.to_html`` and ``DataFrame.to_latex`` now accept a path for + their first argument (:issue:`3702`) + + - Do not allow astypes on ``datetime64[ns]`` except to ``object``, and + ``timedelta64[ns]`` to ``object/int`` (:issue:`3425`) + + - The behavior of ``datetime64`` dtypes has changed with respect to certain + so-called reduction operations (:issue:`3726`). The following operations now + raise a ``TypeError`` when performed on a ``Series`` and return an *empty* + ``Series`` when performed on a ``DataFrame`` similar to performing these + operations on, for example, a ``DataFrame`` of ``slice`` objects: + + - sum, prod, mean, std, var, skew, kurt, corr, and cov + + - ``read_html`` now defaults to ``None`` when reading, and falls back on + ``bs4`` + ``html5lib`` when lxml fails to parse. a list of parsers to try + until success is also valid + + - The internal ``pandas`` class hierarchy has changed (slightly). The + previous ``PandasObject`` now is called ``PandasContainer`` and a new + ``PandasObject`` has become the base class for ``PandasContainer`` as well + as ``Index``, ``Categorical``, ``GroupBy``, ``SparseList``, and + ``SparseArray`` (+ their base classes). Currently, ``PandasObject`` + provides string methods (from ``StringMixin``). (:issue:`4090`, :issue:`4092`) + + - New ``StringMixin`` that, given a ``__unicode__`` method, gets python 2 and + python 3 compatible string methods (``__str__``, ``__bytes__``, and + ``__repr__``). Plus string safety throughout. Now employed in many places + throughout the pandas library. (:issue:`4090`, :issue:`4092`) + +I/O enhancements +~~~~~~~~~~~~~~~~ + + - ``pd.read_html()`` can now parse HTML strings, files or urls and return + DataFrames, courtesy of @cpcloud. (:issue:`3477`, :issue:`3605`, :issue:`3606`, :issue:`3616`). + It works with a *single* parser backend: BeautifulSoup4 + html5lib :ref:`See the docs` + + You can use ``pd.read_html()`` to read the output from ``DataFrame.to_html()`` like so + + .. ipython:: python + :okwarning: + + df = pd.DataFrame({'a': range(3), 'b': list('abc')}) + print(df) + html = df.to_html() + alist = pd.read_html(html, index_col=0) + print(df == alist[0]) + + Note that ``alist`` here is a Python ``list`` so ``pd.read_html()`` and + ``DataFrame.to_html()`` are not inverses. + + - ``pd.read_html()`` no longer performs hard conversion of date strings + (:issue:`3656`). + + .. warning:: + + You may have to install an older version of BeautifulSoup4, + :ref:`See the installation docs` + + - Added module for reading and writing Stata files: ``pandas.io.stata`` (:issue:`1512`) + accessible via ``read_stata`` top-level function for reading, + and ``to_stata`` DataFrame method for writing, :ref:`See the docs` + + - Added module for reading and writing json format files: ``pandas.io.json`` + accessible via ``read_json`` top-level function for reading, + and ``to_json`` DataFrame method for writing, :ref:`See the docs` + various issues (:issue:`1226`, :issue:`3804`, :issue:`3876`, :issue:`3867`, :issue:`1305`) + + - ``MultiIndex`` column support for reading and writing csv format files + + - The ``header`` option in ``read_csv`` now accepts a + list of the rows from which to read the index. + + - The option, ``tupleize_cols`` can now be specified in both ``to_csv`` and + ``read_csv``, to provide compatibility for the pre 0.12 behavior of + writing and reading ``MultIndex`` columns via a list of tuples. The default in + 0.12 is to write lists of tuples and *not* interpret list of tuples as a + ``MultiIndex`` column. + + Note: The default behavior in 0.12 remains unchanged from prior versions, but starting with 0.13, + the default *to* write and read ``MultiIndex`` columns will be in the new + format. (:issue:`3571`, :issue:`1651`, :issue:`3141`) + + - If an ``index_col`` is not specified (e.g. you don't have an index, or wrote it + with ``df.to_csv(..., index=False``), then any ``names`` on the columns index will + be *lost*. + + .. ipython:: python + + from pandas._testing import makeCustomDataframe as mkdf + df = mkdf(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + df.to_csv('mi.csv') + print(open('mi.csv').read()) + pd.read_csv('mi.csv', header=[0, 1, 2, 3], index_col=[0, 1]) + + .. ipython:: python + :suppress: + + import os + os.remove('mi.csv') + + - Support for ``HDFStore`` (via ``PyTables 3.0.0``) on Python3 + + - Iterator support via ``read_hdf`` that automatically opens and closes the + store when iteration is finished. This is only for *tables* + + .. code-block:: ipython + + In [25]: path = 'store_iterator.h5' + + In [26]: pd.DataFrame(np.random.randn(10, 2)).to_hdf(path, 'df', table=True) + + In [27]: for df in pd.read_hdf(path, 'df', chunksize=3): + ....: print(df) + ....: + 0 1 + 0 0.713216 -0.778461 + 1 -0.661062 0.862877 + 2 0.344342 0.149565 + 0 1 + 3 -0.626968 -0.875772 + 4 -0.930687 -0.218983 + 5 0.949965 -0.442354 + 0 1 + 6 -0.402985 1.111358 + 7 -0.241527 -0.670477 + 8 0.049355 0.632633 + 0 1 + 9 -1.502767 -1.225492 + + + + - ``read_csv`` will now throw a more informative error message when a file + contains no columns, e.g., all newline characters + +Other enhancements +~~~~~~~~~~~~~~~~~~ + + - ``DataFrame.replace()`` now allows regular expressions on contained + ``Series`` with object dtype. See the examples section in the regular docs + :ref:`Replacing via String Expression ` + + For example you can do + + .. ipython:: python + + df = pd.DataFrame({'a': list('ab..'), 'b': [1, 2, 3, 4]}) + df.replace(regex=r'\s*\.\s*', value=np.nan) + + to replace all occurrences of the string ``'.'`` with zero or more + instances of surrounding white space with ``NaN``. + + Regular string replacement still works as expected. For example, you can do + + .. ipython:: python + + df.replace('.', np.nan) + + to replace all occurrences of the string ``'.'`` with ``NaN``. + + - ``pd.melt()`` now accepts the optional parameters ``var_name`` and ``value_name`` + to specify custom column names of the returned DataFrame. + + - ``pd.set_option()`` now allows N option, value pairs (:issue:`3667`). + + Let's say that we had an option ``'a.b'`` and another option ``'b.c'``. + We can set them at the same time: + + .. code-block:: ipython + + In [31]: pd.get_option('a.b') + Out[31]: 2 + + In [32]: pd.get_option('b.c') + Out[32]: 3 + + In [33]: pd.set_option('a.b', 1, 'b.c', 4) + + In [34]: pd.get_option('a.b') + Out[34]: 1 + + In [35]: pd.get_option('b.c') + Out[35]: 4 + + - The ``filter`` method for group objects returns a subset of the original + object. Suppose we want to take only elements that belong to groups with a + group sum greater than 2. + + .. ipython:: python + + sf = pd.Series([1, 1, 2, 3, 3, 3]) + sf.groupby(sf).filter(lambda x: x.sum() > 2) + + The argument of ``filter`` must a function that, applied to the group as a + whole, returns ``True`` or ``False``. + + Another useful operation is filtering out elements that belong to groups + with only a couple members. + + .. ipython:: python + + dff = pd.DataFrame({'A': np.arange(8), 'B': list('aabbbbcc')}) + dff.groupby('B').filter(lambda x: len(x) > 2) + + Alternatively, instead of dropping the offending groups, we can return a + like-indexed objects where the groups that do not pass the filter are + filled with NaNs. + + .. ipython:: python + + dff.groupby('B').filter(lambda x: len(x) > 2, dropna=False) + + - Series and DataFrame hist methods now take a ``figsize`` argument (:issue:`3834`) + + - DatetimeIndexes no longer try to convert mixed-integer indexes during join + operations (:issue:`3877`) + + - Timestamp.min and Timestamp.max now represent valid Timestamp instances instead + of the default datetime.min and datetime.max (respectively), thanks @SleepingPills + + - ``read_html`` now raises when no tables are found and BeautifulSoup==4.2.0 + is detected (:issue:`4214`) + + +Experimental features +~~~~~~~~~~~~~~~~~~~~~ + + - Added experimental ``CustomBusinessDay`` class to support ``DateOffsets`` + with custom holiday calendars and custom weekmasks. (:issue:`2301`) + + .. note:: + + This uses the ``numpy.busdaycalendar`` API introduced in Numpy 1.7 and + therefore requires Numpy 1.7.0 or newer. + + .. ipython:: python + + from pandas.tseries.offsets import CustomBusinessDay + from datetime import datetime + # As an interesting example, let's look at Egypt where + # a Friday-Saturday weekend is observed. + weekmask_egypt = 'Sun Mon Tue Wed Thu' + # They also observe International Workers' Day so let's + # add that for a couple of years + holidays = ['2012-05-01', datetime(2013, 5, 1), np.datetime64('2014-05-01')] + bday_egypt = CustomBusinessDay(holidays=holidays, weekmask=weekmask_egypt) + dt = datetime(2013, 4, 30) + print(dt + 2 * bday_egypt) + dts = pd.date_range(dt, periods=5, freq=bday_egypt) + print(pd.Series(dts.weekday, dts).map(pd.Series('Mon Tue Wed Thu Fri Sat Sun'.split()))) + +Bug fixes +~~~~~~~~~ + + - Plotting functions now raise a ``TypeError`` before trying to plot anything + if the associated objects have have a dtype of ``object`` (:issue:`1818`, + :issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object arrays to + numeric arrays if possible so that you can still plot, for example, an + object array with floats. This happens before any drawing takes place which + eliminates any spurious plots from showing up. + + - ``fillna`` methods now raise a ``TypeError`` if the ``value`` parameter is + a list or tuple. + + - ``Series.str`` now supports iteration (:issue:`3638`). You can iterate over the + individual elements of each string in the ``Series``. Each iteration yields + yields a ``Series`` with either a single character at each index of the + original ``Series`` or ``NaN``. For example, + + .. ipython:: python + :okwarning: + + strs = 'go', 'bow', 'joe', 'slow' + ds = pd.Series(strs) + + for s in ds.str: + print(s) + + s + s.dropna().values.item() == 'w' + + The last element yielded by the iterator will be a ``Series`` containing + the last element of the longest string in the ``Series`` with all other + elements being ``NaN``. Here since ``'slow'`` is the longest string + and there are no other strings with the same length ``'w'`` is the only + non-null string in the yielded ``Series``. + + - ``HDFStore`` + + - will retain index attributes (freq,tz,name) on recreation (:issue:`3499`) + - will warn with a ``AttributeConflictWarning`` if you are attempting to append + an index with a different frequency than the existing, or attempting + to append an index with a different name than the existing + - support datelike columns with a timezone as data_columns (:issue:`2852`) + + - Non-unique index support clarified (:issue:`3468`). + + - Fix assigning a new index to a duplicate index in a DataFrame would fail (:issue:`3468`) + - Fix construction of a DataFrame with a duplicate index + - ref_locs support to allow duplicative indices across dtypes, + allows iget support to always find the index (even across dtypes) (:issue:`2194`) + - applymap on a DataFrame with a non-unique index now works + (removed warning) (:issue:`2786`), and fix (:issue:`3230`) + - Fix to_csv to handle non-unique columns (:issue:`3495`) + - Duplicate indexes with getitem will return items in the correct order (:issue:`3455`, :issue:`3457`) + and handle missing elements like unique indices (:issue:`3561`) + - Duplicate indexes with and empty DataFrame.from_records will return a correct frame (:issue:`3562`) + - Concat to produce a non-unique columns when duplicates are across dtypes is fixed (:issue:`3602`) + - Allow insert/delete to non-unique columns (:issue:`3679`) + - Non-unique indexing with a slice via ``loc`` and friends fixed (:issue:`3659`) + - Allow insert/delete to non-unique columns (:issue:`3679`) + - Extend ``reindex`` to correctly deal with non-unique indices (:issue:`3679`) + - ``DataFrame.itertuples()`` now works with frames with duplicate column + names (:issue:`3873`) + - Bug in non-unique indexing via ``iloc`` (:issue:`4017`); added ``takeable`` argument to + ``reindex`` for location-based taking + - Allow non-unique indexing in series via ``.ix/.loc`` and ``__getitem__`` (:issue:`4246`) + - Fixed non-unique indexing memory allocation issue with ``.ix/.loc`` (:issue:`4280`) + + - ``DataFrame.from_records`` did not accept empty recarrays (:issue:`3682`) + - ``read_html`` now correctly skips tests (:issue:`3741`) + - Fixed a bug where ``DataFrame.replace`` with a compiled regular expression + in the ``to_replace`` argument wasn't working (:issue:`3907`) + - Improved ``network`` test decorator to catch ``IOError`` (and therefore + ``URLError`` as well). Added ``with_connectivity_check`` decorator to allow + explicitly checking a website as a proxy for seeing if there is network + connectivity. Plus, new ``optional_args`` decorator factory for decorators. + (:issue:`3910`, :issue:`3914`) + - Fixed testing issue where too many sockets where open thus leading to a + connection reset issue (:issue:`3982`, :issue:`3985`, :issue:`4028`, + :issue:`4054`) + - Fixed failing tests in test_yahoo, test_google where symbols were not + retrieved but were being accessed (:issue:`3982`, :issue:`3985`, + :issue:`4028`, :issue:`4054`) + - ``Series.hist`` will now take the figure from the current environment if + one is not passed + - Fixed bug where a 1xN DataFrame would barf on a 1xN mask (:issue:`4071`) + - Fixed running of ``tox`` under python3 where the pickle import was getting + rewritten in an incompatible way (:issue:`4062`, :issue:`4063`) + - Fixed bug where sharex and sharey were not being passed to grouped_hist + (:issue:`4089`) + - Fixed bug in ``DataFrame.replace`` where a nested dict wasn't being + iterated over when regex=False (:issue:`4115`) + - Fixed bug in the parsing of microseconds when using the ``format`` + argument in ``to_datetime`` (:issue:`4152`) + - Fixed bug in ``PandasAutoDateLocator`` where ``invert_xaxis`` triggered + incorrectly ``MilliSecondLocator`` (:issue:`3990`) + - Fixed bug in plotting that wasn't raising on invalid colormap for + matplotlib 1.1.1 (:issue:`4215`) + - Fixed the legend displaying in ``DataFrame.plot(kind='kde')`` (:issue:`4216`) + - Fixed bug where Index slices weren't carrying the name attribute + (:issue:`4226`) + - Fixed bug in initializing ``DatetimeIndex`` with an array of strings + in a certain time zone (:issue:`4229`) + - Fixed bug where html5lib wasn't being properly skipped (:issue:`4265`) + - Fixed bug where get_data_famafrench wasn't using the correct file edges + (:issue:`4281`) + +See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + + +.. _whatsnew_0.12.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.11.0..v0.12.0 diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst new file mode 100644 index 00000000..43c6083f --- /dev/null +++ b/doc/source/whatsnew/v0.13.0.rst @@ -0,0 +1,1335 @@ +.. _whatsnew_0130: + +v0.13.0 (January 3, 2014) +--------------------------- + +{{ header }} + + + +This is a major release from 0.12.0 and includes a number of API changes, several new features and +enhancements along with a large number of bug fixes. + +Highlights include: + +- support for a new index type ``Float64Index``, and other Indexing enhancements +- ``HDFStore`` has a new string based syntax for query specification +- support for new methods of interpolation +- updated ``timedelta`` operations +- a new string manipulation method ``extract`` +- Nanosecond support for Offsets +- ``isin`` for DataFrames + +Several experimental features are added, including: + +- new ``eval/query`` methods for expression evaluation +- support for ``msgpack`` serialization +- an i/o interface to Google's ``BigQuery`` + +Their are several new or updated docs sections including: + +- :ref:`Comparison with SQL`, which should be useful for those familiar with SQL but still learning pandas. +- :ref:`Comparison with R`, idiom translations from R to pandas. +- :ref:`Enhancing Performance`, ways to enhance pandas performance with ``eval/query``. + +.. warning:: + + In 0.13.0 ``Series`` has internally been refactored to no longer sub-class ``ndarray`` + but instead subclass ``NDFrame``, similar to the rest of the pandas containers. This should be + a transparent change with only very limited API implications. See :ref:`Internal Refactoring` + +API changes +~~~~~~~~~~~ + +- ``read_excel`` now supports an integer in its ``sheetname`` argument giving + the index of the sheet to read in (:issue:`4301`). +- Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf", + "iNf", etc.) as infinity. (:issue:`4220`, :issue:`4219`), affecting + ``read_table``, ``read_csv``, etc. +- ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to + @jtratner. As a result, pandas now uses iterators more extensively. This + also led to the introduction of substantive parts of the Benjamin + Peterson's ``six`` library into compat. (:issue:`4384`, :issue:`4375`, + :issue:`4372`) +- ``pandas.util.compat`` and ``pandas.util.py3compat`` have been merged into + ``pandas.compat``. ``pandas.compat`` now includes many functions allowing + 2/3 compatibility. It contains both list and iterator versions of range, + filter, map and zip, plus other necessary elements for Python 3 + compatibility. ``lmap``, ``lzip``, ``lrange`` and ``lfilter`` all produce + lists instead of iterators, for compatibility with ``numpy``, subscripting + and ``pandas`` constructors.(:issue:`4384`, :issue:`4375`, :issue:`4372`) +- ``Series.get`` with negative indexers now returns the same as ``[]`` (:issue:`4390`) +- Changes to how ``Index`` and ``MultiIndex`` handle metadata (``levels``, + ``labels``, and ``names``) (:issue:`4039`): + + .. code-block:: python + + # previously, you would have set levels or labels directly + >>> pd.index.levels = [[1, 2, 3, 4], [1, 2, 4, 4]] + + # now, you use the set_levels or set_labels methods + >>> index = pd.index.set_levels([[1, 2, 3, 4], [1, 2, 4, 4]]) + + # similarly, for names, you can rename the object + # but setting names is not deprecated + >>> index = pd.index.set_names(["bob", "cranberry"]) + + # and all methods take an inplace kwarg - but return None + >>> pd.index.set_names(["bob", "cranberry"], inplace=True) + +- **All** division with ``NDFrame`` objects is now *truedivision*, regardless + of the future import. This means that operating on pandas objects will by default + use *floating point* division, and return a floating point dtype. + You can use ``//`` and ``floordiv`` to do integer division. + + Integer division + + .. code-block:: ipython + + In [3]: arr = np.array([1, 2, 3, 4]) + + In [4]: arr2 = np.array([5, 3, 2, 1]) + + In [5]: arr / arr2 + Out[5]: array([0, 0, 1, 4]) + + In [6]: pd.Series(arr) // pd.Series(arr2) + Out[6]: + 0 0 + 1 0 + 2 1 + 3 4 + dtype: int64 + + True Division + + .. code-block:: ipython + + In [7]: pd.Series(arr) / pd.Series(arr2) # no future import required + Out[7]: + 0 0.200000 + 1 0.666667 + 2 1.500000 + 3 4.000000 + dtype: float64 + +- Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`) +- ``__nonzero__`` for all NDFrame objects, will now raise a ``ValueError``, this reverts back to (:issue:`1073`, :issue:`4633`) + behavior. See :ref:`gotchas` for a more detailed discussion. + + This prevents doing boolean comparison on *entire* pandas objects, which is inherently ambiguous. These all will raise a ``ValueError``. + + .. code-block:: python + + >>> df = pd.DataFrame({'A': np.random.randn(10), + ... 'B': np.random.randn(10), + ... 'C': pd.date_range('20130101', periods=10) + ... }) + ... + >>> if df: + ... pass + ... + Traceback (most recent call last): + ... + ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, + a.bool(), a.item(), a.any() or a.all(). + + >>> df1 = df + >>> df2 = df + >>> df1 and df2 + Traceback (most recent call last): + ... + ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, + a.bool(), a.item(), a.any() or a.all(). + + >>> d = [1, 2, 3] + >>> s1 = pd.Series(d) + >>> s2 = pd.Series(d) + >>> s1 and s2 + Traceback (most recent call last): + ... + ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, + a.bool(), a.item(), a.any() or a.all(). + + Added the ``.bool()`` method to ``NDFrame`` objects to facilitate evaluating of single-element boolean Series: + + .. ipython:: python + + pd.Series([True]).bool() + pd.Series([False]).bool() + pd.DataFrame([[True]]).bool() + pd.DataFrame([[False]]).bool() + +- All non-Index NDFrames (``Series``, ``DataFrame``, ``Panel``, ``Panel4D``, + ``SparsePanel``, etc.), now support the entire set of arithmetic operators + and arithmetic flex methods (add, sub, mul, etc.). ``SparsePanel`` does not + support ``pow`` or ``mod`` with non-scalars. (:issue:`3765`) +- ``Series`` and ``DataFrame`` now have a ``mode()`` method to calculate the + statistical mode(s) by axis/Series. (:issue:`5367`) + +- Chained assignment will now by default warn if the user is assigning to a copy. This can be changed + with the option ``mode.chained_assignment``, allowed options are ``raise/warn/None``. See :ref:`the docs`. + + .. ipython:: python + + dfc = pd.DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) + pd.set_option('chained_assignment', 'warn') + + The following warning / exception will show if this is attempted. + + .. ipython:: python + :okwarning: + + dfc.loc[0]['A'] = 1111 + + :: + + Traceback (most recent call last) + ... + SettingWithCopyWarning: + A value is trying to be set on a copy of a slice from a DataFrame. + Try using .loc[row_index,col_indexer] = value instead + + Here is the correct method of assignment. + + .. ipython:: python + + dfc.loc[0, 'A'] = 11 + dfc + +- ``Panel.reindex`` has the following call signature ``Panel.reindex(items=None, major_axis=None, minor_axis=None, **kwargs)`` + to conform with other ``NDFrame`` objects. See :ref:`Internal Refactoring` for more information. + +- ``Series.argmin`` and ``Series.argmax`` are now aliased to ``Series.idxmin`` and ``Series.idxmax``. These return the *index* of the + min or max element respectively. Prior to 0.13.0 these would return the position of the min / max element. (:issue:`6214`) + +Prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +These were announced changes in 0.12 or prior that are taking effect as of 0.13.0 + +- Remove deprecated ``Factor`` (:issue:`3650`) +- Remove deprecated ``set_printoptions/reset_printoptions`` (:issue:`3046`) +- Remove deprecated ``_verbose_info`` (:issue:`3215`) +- Remove deprecated ``read_clipboard/to_clipboard/ExcelFile/ExcelWriter`` from ``pandas.io.parsers`` (:issue:`3717`) + These are available as functions in the main pandas namespace (e.g. ``pd.read_clipboard``) +- default for ``tupleize_cols`` is now ``False`` for both ``to_csv`` and ``read_csv``. Fair warning in 0.12 (:issue:`3604`) +- default for `display.max_seq_len` is now 100 rather then `None`. This activates + truncated display ("...") of long sequences in various places. (:issue:`3391`) + +Deprecations +~~~~~~~~~~~~ + +Deprecated in 0.13.0 + +- deprecated ``iterkv``, which will be removed in a future release (this was + an alias of iteritems used to bypass ``2to3``'s changes). + (:issue:`4384`, :issue:`4375`, :issue:`4372`) +- deprecated the string method ``match``, whose role is now performed more + idiomatically by ``extract``. In a future release, the default behavior + of ``match`` will change to become analogous to ``contains``, which returns + a boolean indexer. (Their + distinction is strictness: ``match`` relies on ``re.match`` while + ``contains`` relies on ``re.search``.) In this release, the deprecated + behavior is the default, but the new behavior is available through the + keyword argument ``as_indexer=True``. + +Indexing API changes +~~~~~~~~~~~~~~~~~~~~ + +Prior to 0.13, it was impossible to use a label indexer (``.loc/.ix``) to set a value that +was not contained in the index of a particular axis. (:issue:`2578`). See :ref:`the docs` + +In the ``Series`` case this is effectively an appending operation + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s + s[5] = 5. + s + +.. ipython:: python + + dfi = pd.DataFrame(np.arange(6).reshape(3, 2), + columns=['A', 'B']) + dfi + +This would previously ``KeyError`` + +.. ipython:: python + + dfi.loc[:, 'C'] = dfi.loc[:, 'A'] + dfi + +This is like an ``append`` operation. + +.. ipython:: python + + dfi.loc[3] = 5 + dfi + +A Panel setting operation on an arbitrary axis aligns the input to the Panel + +.. code-block:: ipython + + In [20]: p = pd.Panel(np.arange(16).reshape(2, 4, 2), + ....: items=['Item1', 'Item2'], + ....: major_axis=pd.date_range('2001/1/12', periods=4), + ....: minor_axis=['A', 'B'], dtype='float64') + ....: + + In [21]: p + Out[21]: + + Dimensions: 2 (items) x 4 (major_axis) x 2 (minor_axis) + Items axis: Item1 to Item2 + Major_axis axis: 2001-01-12 00:00:00 to 2001-01-15 00:00:00 + Minor_axis axis: A to B + + In [22]: p.loc[:, :, 'C'] = pd.Series([30, 32], index=p.items) + + In [23]: p + Out[23]: + + Dimensions: 2 (items) x 4 (major_axis) x 3 (minor_axis) + Items axis: Item1 to Item2 + Major_axis axis: 2001-01-12 00:00:00 to 2001-01-15 00:00:00 + Minor_axis axis: A to C + + In [24]: p.loc[:, :, 'C'] + Out[24]: + Item1 Item2 + 2001-01-12 30.0 32.0 + 2001-01-13 30.0 32.0 + 2001-01-14 30.0 32.0 + 2001-01-15 30.0 32.0 + +Float64Index API change +~~~~~~~~~~~~~~~~~~~~~~~ + +- Added a new index type, ``Float64Index``. This will be automatically created when passing floating values in index creation. + This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the + same. See :ref:`the docs`, (:issue:`263`) + + Construction is by default for floating type values. + + .. ipython:: python + + index = pd.Index([1.5, 2, 3, 4.5, 5]) + index + s = pd.Series(range(5), index=index) + s + + Scalar selection for ``[],.ix,.loc`` will always be label based. An integer will match an equal float index (e.g. ``3`` is equivalent to ``3.0``) + + .. ipython:: python + + s[3] + s.loc[3] + + The only positional indexing is via ``iloc`` + + .. ipython:: python + + s.iloc[3] + + A scalar index that is not found will raise ``KeyError`` + + Slicing is ALWAYS on the values of the index, for ``[],ix,loc`` and ALWAYS positional with ``iloc`` + + .. ipython:: python + + s[2:4] + s.loc[2:4] + s.iloc[2:4] + + In float indexes, slicing using floats are allowed + + .. ipython:: python + + s[2.1:4.6] + s.loc[2.1:4.6] + +- Indexing on other index types are preserved (and positional fallback for ``[],ix``), with the exception, that floating point slicing + on indexes on non ``Float64Index`` will now raise a ``TypeError``. + + .. code-block:: ipython + + In [1]: pd.Series(range(5))[3.5] + TypeError: the label [3.5] is not a proper indexer for this index type (Int64Index) + + In [1]: pd.Series(range(5))[3.5:4.5] + TypeError: the slice start [3.5] is not a proper indexer for this index type (Int64Index) + + Using a scalar float indexer will be deprecated in a future version, but is allowed for now. + + .. code-block:: ipython + + In [3]: pd.Series(range(5))[3.0] + Out[3]: 3 + +HDFStore API changes +~~~~~~~~~~~~~~~~~~~~ + +- Query Format Changes. A much more string-like query format is now supported. See :ref:`the docs`. + + .. ipython:: python + + path = 'test.h5' + dfq = pd.DataFrame(np.random.randn(10, 4), + columns=list('ABCD'), + index=pd.date_range('20130101', periods=10)) + dfq.to_hdf(path, 'dfq', format='table', data_columns=True) + + Use boolean expressions, with in-line function evaluation. + + .. ipython:: python + + pd.read_hdf(path, 'dfq', + where="index>Timestamp('20130104') & columns=['A', 'B']") + + Use an inline column reference + + .. ipython:: python + + pd.read_hdf(path, 'dfq', + where="A>0 or C>0") + + .. ipython:: python + :suppress: + + import os + os.remove(path) + +- the ``format`` keyword now replaces the ``table`` keyword; allowed values are ``fixed(f)`` or ``table(t)`` + the same defaults as prior < 0.13.0 remain, e.g. ``put`` implies ``fixed`` format and ``append`` implies + ``table`` format. This default format can be set as an option by setting ``io.hdf.default_format``. + + .. ipython:: python + + path = 'test.h5' + df = pd.DataFrame(np.random.randn(10, 2)) + df.to_hdf(path, 'df_table', format='table') + df.to_hdf(path, 'df_table2', append=True) + df.to_hdf(path, 'df_fixed') + with pd.HDFStore(path) as store: + print(store) + + .. ipython:: python + :suppress: + + import os + os.remove(path) + +- Significant table writing performance improvements +- handle a passed ``Series`` in table format (:issue:`4330`) +- can now serialize a ``timedelta64[ns]`` dtype in a table (:issue:`3577`), See :ref:`the docs`. +- added an ``is_open`` property to indicate if the underlying file handle is_open; + a closed store will now report 'CLOSED' when viewing the store (rather than raising an error) + (:issue:`4409`) +- a close of a ``HDFStore`` now will close that instance of the ``HDFStore`` + but will only close the actual file if the ref count (by ``PyTables``) w.r.t. all of the open handles + are 0. Essentially you have a local instance of ``HDFStore`` referenced by a variable. Once you + close it, it will report closed. Other references (to the same file) will continue to operate + until they themselves are closed. Performing an action on a closed file will raise + ``ClosedFileError`` + + .. ipython:: python + + path = 'test.h5' + df = pd.DataFrame(np.random.randn(10, 2)) + store1 = pd.HDFStore(path) + store2 = pd.HDFStore(path) + store1.append('df', df) + store2.append('df2', df) + + store1 + store2 + store1.close() + store2 + store2.close() + store2 + + .. ipython:: python + :suppress: + + import os + os.remove(path) + +- removed the ``_quiet`` attribute, replace by a ``DuplicateWarning`` if retrieving + duplicate rows from a table (:issue:`4367`) +- removed the ``warn`` argument from ``open``. Instead a ``PossibleDataLossError`` exception will + be raised if you try to use ``mode='w'`` with an OPEN file handle (:issue:`4367`) +- allow a passed locations array or mask as a ``where`` condition (:issue:`4467`). + See :ref:`the docs` for an example. +- add the keyword ``dropna=True`` to ``append`` to change whether ALL nan rows are not written + to the store (default is ``True``, ALL nan rows are NOT written), also settable + via the option ``io.hdf.dropna_table`` (:issue:`4625`) +- pass through store creation arguments; can be used to support in-memory stores + +DataFrame repr changes +~~~~~~~~~~~~~~~~~~~~~~ + +The HTML and plain text representations of :class:`DataFrame` now show +a truncated view of the table once it exceeds a certain size, rather +than switching to the short info view (:issue:`4886`, :issue:`5550`). +This makes the representation more consistent as small DataFrames get +larger. + +.. image:: ../_static/df_repr_truncated.png + :alt: Truncated HTML representation of a DataFrame + +To get the info view, call :meth:`DataFrame.info`. If you prefer the +info view as the repr for large DataFrames, you can set this by running +``set_option('display.large_repr', 'info')``. + +Enhancements +~~~~~~~~~~~~ + +- ``df.to_clipboard()`` learned a new ``excel`` keyword that let's you + paste df data directly into excel (enabled by default). (:issue:`5070`). +- ``read_html`` now raises a ``URLError`` instead of catching and raising a + ``ValueError`` (:issue:`4303`, :issue:`4305`) +- Added a test for ``read_clipboard()`` and ``to_clipboard()`` (:issue:`4282`) +- Clipboard functionality now works with PySide (:issue:`4282`) +- Added a more informative error message when plot arguments contain + overlapping color and style arguments (:issue:`4402`) +- ``to_dict`` now takes ``records`` as a possible out type. Returns an array + of column-keyed dictionaries. (:issue:`4936`) + +- ``NaN`` handing in get_dummies (:issue:`4446`) with `dummy_na` + + .. ipython:: python + + # previously, nan was erroneously counted as 2 here + # now it is not counted at all + pd.get_dummies([1, 2, np.nan]) + + # unless requested + pd.get_dummies([1, 2, np.nan], dummy_na=True) + + +- ``timedelta64[ns]`` operations. See :ref:`the docs`. + + .. warning:: + + Most of these operations require ``numpy >= 1.7`` + + Using the new top-level ``to_timedelta``, you can convert a scalar or array from the standard + timedelta format (produced by ``to_csv``) into a timedelta type (``np.timedelta64`` in ``nanoseconds``). + + .. ipython:: python + + pd.to_timedelta('1 days 06:05:01.00003') + pd.to_timedelta('15.5us') + pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan']) + pd.to_timedelta(np.arange(5), unit='s') + pd.to_timedelta(np.arange(5), unit='d') + + A Series of dtype ``timedelta64[ns]`` can now be divided by another + ``timedelta64[ns]`` object, or astyped to yield a ``float64`` dtyped Series. This + is frequency conversion. See :ref:`the docs` for the docs. + + .. ipython:: python + + import datetime + td = pd.Series(pd.date_range('20130101', periods=4)) - pd.Series( + pd.date_range('20121201', periods=4)) + td[2] += np.timedelta64(datetime.timedelta(minutes=5, seconds=3)) + td[3] = np.nan + td + + # to days + td / np.timedelta64(1, 'D') + td.astype('timedelta64[D]') + + # to seconds + td / np.timedelta64(1, 's') + td.astype('timedelta64[s]') + + Dividing or multiplying a ``timedelta64[ns]`` Series by an integer or integer Series + + .. ipython:: python + + td * -1 + td * pd.Series([1, 2, 3, 4]) + + Absolute ``DateOffset`` objects can act equivalently to ``timedeltas`` + + .. ipython:: python + + from pandas import offsets + td + offsets.Minute(5) + offsets.Milli(5) + + Fillna is now supported for timedeltas + + .. ipython:: python + + td.fillna(pd.Timedelta(0)) + td.fillna(datetime.timedelta(days=1, seconds=5)) + + You can do numeric reduction operations on timedeltas. + + .. ipython:: python + + td.mean() + td.quantile(.1) + +- ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and + ``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set + the bandwidth, and to gkde.evaluate() to specify the indices at which it + is evaluated, respectively. See scipy docs. (:issue:`4298`) + +- DataFrame constructor now accepts a numpy masked record array (:issue:`3478`) + +- The new vectorized string method ``extract`` return regular expression + matches more conveniently. + + .. ipython:: python + :okwarning: + + pd.Series(['a1', 'b2', 'c3']).str.extract('[ab](\\d)') + + Elements that do not match return ``NaN``. Extracting a regular expression + with more than one group returns a DataFrame with one column per group. + + + .. ipython:: python + :okwarning: + + pd.Series(['a1', 'b2', 'c3']).str.extract('([ab])(\\d)') + + Elements that do not match return a row of ``NaN``. + Thus, a Series of messy strings can be *converted* into a + like-indexed Series or DataFrame of cleaned-up or more useful strings, + without necessitating ``get()`` to access tuples or ``re.match`` objects. + + Named groups like + + .. ipython:: python + :okwarning: + + pd.Series(['a1', 'b2', 'c3']).str.extract( + '(?P[ab])(?P\\d)') + + and optional groups can also be used. + + .. ipython:: python + :okwarning: + + pd.Series(['a1', 'b2', '3']).str.extract( + '(?P[ab])?(?P\\d)') + +- ``read_stata`` now accepts Stata 13 format (:issue:`4291`) + +- ``read_fwf`` now infers the column specifications from the first 100 rows of + the file if the data has correctly separated and properly aligned columns + using the delimiter provided to the function (:issue:`4488`). + +- support for nanosecond times as an offset + + .. warning:: + + These operations require ``numpy >= 1.7`` + + Period conversions in the range of seconds and below were reworked and extended + up to nanoseconds. Periods in the nanosecond range are now available. + + .. ipython:: python + + pd.date_range('2013-01-01', periods=5, freq='5N') + + or with frequency as offset + + .. ipython:: python + + pd.date_range('2013-01-01', periods=5, freq=pd.offsets.Nano(5)) + + Timestamps can be modified in the nanosecond range + + .. ipython:: python + + t = pd.Timestamp('20130101 09:01:02') + t + pd.tseries.offsets.Nano(123) + +- A new method, ``isin`` for DataFrames, which plays nicely with boolean indexing. The argument to ``isin``, what we're comparing the DataFrame to, can be a DataFrame, Series, dict, or array of values. See :ref:`the docs` for more. + + To get the rows where any of the conditions are met: + + .. ipython:: python + + dfi = pd.DataFrame({'A': [1, 2, 3, 4], 'B': ['a', 'b', 'f', 'n']}) + dfi + other = pd.DataFrame({'A': [1, 3, 3, 7], 'B': ['e', 'f', 'f', 'e']}) + mask = dfi.isin(other) + mask + dfi[mask.any(1)] + +- ``Series`` now supports a ``to_frame`` method to convert it to a single-column DataFrame (:issue:`5164`) + +- All R datasets listed here http://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html can now be loaded into Pandas objects + + .. code-block:: python + + # note that pandas.rpy was deprecated in v0.16.0 + import pandas.rpy.common as com + com.load_data('Titanic') + +- ``tz_localize`` can infer a fall daylight savings transition based on the structure + of the unlocalized data (:issue:`4230`), see :ref:`the docs` + +- ``DatetimeIndex`` is now in the API documentation, see :ref:`the docs` + +- :meth:`~pandas.io.json.json_normalize` is a new method to allow you to create a flat table + from semi-structured JSON data. See :ref:`the docs` (:issue:`1067`) + +- Added PySide support for the qtpandas DataFrameModel and DataFrameWidget. + +- Python csv parser now supports usecols (:issue:`4335`) + +- Frequencies gained several new offsets: + + * ``LastWeekOfMonth`` (:issue:`4637`) + * ``FY5253``, and ``FY5253Quarter`` (:issue:`4511`) + + +- DataFrame has a new ``interpolate`` method, similar to Series (:issue:`4434`, :issue:`1892`) + + .. ipython:: python + + df = pd.DataFrame({'A': [1, 2.1, np.nan, 4.7, 5.6, 6.8], + 'B': [.25, np.nan, np.nan, 4, 12.2, 14.4]}) + df.interpolate() + + Additionally, the ``method`` argument to ``interpolate`` has been expanded + to include ``'nearest', 'zero', 'slinear', 'quadratic', 'cubic', + 'barycentric', 'krogh', 'piecewise_polynomial', 'pchip', 'polynomial', 'spline'`` + The new methods require scipy_. Consult the Scipy reference guide_ and documentation_ for more information + about when the various methods are appropriate. See :ref:`the docs`. + + Interpolate now also accepts a ``limit`` keyword argument. + This works similar to ``fillna``'s limit: + + .. ipython:: python + + ser = pd.Series([1, 3, np.nan, np.nan, np.nan, 11]) + ser.interpolate(limit=2) + +- Added ``wide_to_long`` panel data convenience function. See :ref:`the docs`. + + .. ipython:: python + + np.random.seed(123) + df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"}, + "A1980" : {0 : "d", 1 : "e", 2 : "f"}, + "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7}, + "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1}, + "X" : dict(zip(range(3), np.random.randn(3))) + }) + df["id"] = df.index + df + pd.wide_to_long(df, ["A", "B"], i="id", j="year") + +.. _scipy: http://www.scipy.org +.. _documentation: http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation +.. _guide: http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html + +- ``to_csv`` now takes a ``date_format`` keyword argument that specifies how + output datetime objects should be formatted. Datetimes encountered in the + index, columns, and values will all have this formatting applied. (:issue:`4313`) +- ``DataFrame.plot`` will scatter plot x versus y by passing ``kind='scatter'`` (:issue:`2215`) +- Added support for Google Analytics v3 API segment IDs that also supports v2 IDs. (:issue:`5271`) + +.. _whatsnew_0130.experimental: + +Experimental +~~~~~~~~~~~~ + +- The new :func:`~pandas.eval` function implements expression evaluation using + ``numexpr`` behind the scenes. This results in large speedups for + complicated expressions involving large DataFrames/Series. For example, + + .. ipython:: python + + nrows, ncols = 20000, 100 + df1, df2, df3, df4 = [pd.DataFrame(np.random.randn(nrows, ncols)) + for _ in range(4)] + + .. ipython:: python + + # eval with NumExpr backend + %timeit pd.eval('df1 + df2 + df3 + df4') + + .. ipython:: python + + # pure Python evaluation + %timeit df1 + df2 + df3 + df4 + + For more details, see the :ref:`the docs` + +- Similar to ``pandas.eval``, :class:`~pandas.DataFrame` has a new + ``DataFrame.eval`` method that evaluates an expression in the context of + the ``DataFrame``. For example, + + .. ipython:: python + :suppress: + + try: + del a # noqa: F821 + except NameError: + pass + + try: + del b # noqa: F821 + except NameError: + pass + + .. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 2), columns=['a', 'b']) + df.eval('a + b') + +- :meth:`~pandas.DataFrame.query` method has been added that allows + you to select elements of a ``DataFrame`` using a natural query syntax + nearly identical to Python syntax. For example, + + .. ipython:: python + :suppress: + + try: + del a # noqa: F821 + except NameError: + pass + + try: + del b # noqa: F821 + except NameError: + pass + + try: + del c # noqa: F821 + except NameError: + pass + + .. ipython:: python + + n = 20 + df = pd.DataFrame(np.random.randint(n, size=(n, 3)), columns=['a', 'b', 'c']) + df.query('a < b < c') + + selects all the rows of ``df`` where ``a < b < c`` evaluates to ``True``. + For more details see the :ref:`the docs`. + +- ``pd.read_msgpack()`` and ``pd.to_msgpack()`` are now a supported method of serialization + of arbitrary pandas (and python objects) in a lightweight portable binary format. See :ref:`the docs` + + .. warning:: + + Since this is an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release. + + .. code-block:: python + + df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB')) + df.to_msgpack('foo.msg') + pd.read_msgpack('foo.msg') + + s = pd.Series(np.random.rand(5), index=pd.date_range('20130101', periods=5)) + pd.to_msgpack('foo.msg', df, s) + pd.read_msgpack('foo.msg') + + You can pass ``iterator=True`` to iterator over the unpacked results + + .. code-block:: python + + for o in pd.read_msgpack('foo.msg', iterator=True): + print(o) + + .. ipython:: python + :suppress: + :okexcept: + + os.remove('foo.msg') + +- ``pandas.io.gbq`` provides a simple way to extract from, and load data into, + Google's BigQuery Data Sets by way of pandas DataFrames. BigQuery is a high + performance SQL-like database service, useful for performing ad-hoc queries + against extremely large datasets. :ref:`See the docs ` + + .. code-block:: python + + from pandas.io import gbq + + # A query to select the average monthly temperatures in the + # in the year 2000 across the USA. The dataset, + # publicata:samples.gsod, is available on all BigQuery accounts, + # and is based on NOAA gsod data. + + query = """SELECT station_number as STATION, + month as MONTH, AVG(mean_temp) as MEAN_TEMP + FROM publicdata:samples.gsod + WHERE YEAR = 2000 + GROUP BY STATION, MONTH + ORDER BY STATION, MONTH ASC""" + + # Fetch the result set for this query + + # Your Google BigQuery Project ID + # To find this, see your dashboard: + # https://console.developers.google.com/iam-admin/projects?authuser=0 + projectid = 'xxxxxxxxx' + df = gbq.read_gbq(query, project_id=projectid) + + # Use pandas to process and reshape the dataset + + df2 = df.pivot(index='STATION', columns='MONTH', values='MEAN_TEMP') + df3 = pd.concat([df2.min(), df2.mean(), df2.max()], + axis=1, keys=["Min Tem", "Mean Temp", "Max Temp"]) + + The resulting DataFrame is:: + + > df3 + Min Tem Mean Temp Max Temp + MONTH + 1 -53.336667 39.827892 89.770968 + 2 -49.837500 43.685219 93.437932 + 3 -77.926087 48.708355 96.099998 + 4 -82.892858 55.070087 97.317240 + 5 -92.378261 61.428117 102.042856 + 6 -77.703334 65.858888 102.900000 + 7 -87.821428 68.169663 106.510714 + 8 -89.431999 68.614215 105.500000 + 9 -86.611112 63.436935 107.142856 + 10 -78.209677 56.880838 92.103333 + 11 -50.125000 48.861228 94.996428 + 12 -50.332258 42.286879 94.396774 + + .. warning:: + + To use this module, you will need a BigQuery account. See + for details. + + As of 10/10/13, there is a bug in Google's API preventing result sets + from being larger than 100,000 rows. A patch is scheduled for the week of + 10/14/13. + +.. _whatsnew_0130.refactoring: + +Internal refactoring +~~~~~~~~~~~~~~~~~~~~ + +In 0.13.0 there is a major refactor primarily to subclass ``Series`` from +``NDFrame``, which is the base class currently for ``DataFrame`` and ``Panel``, +to unify methods and behaviors. Series formerly subclassed directly from +``ndarray``. (:issue:`4080`, :issue:`3862`, :issue:`816`) + +.. warning:: + + There are two potential incompatibilities from < 0.13.0 + + - Using certain numpy functions would previously return a ``Series`` if passed a ``Series`` + as an argument. This seems only to affect ``np.ones_like``, ``np.empty_like``, + ``np.diff`` and ``np.where``. These now return ``ndarrays``. + + .. ipython:: python + + s = pd.Series([1, 2, 3, 4]) + + Numpy Usage + + .. ipython:: python + + np.ones_like(s) + np.diff(s) + np.where(s > 1, s, np.nan) + + Pandonic Usage + + .. ipython:: python + + pd.Series(1, index=s.index) + s.diff() + s.where(s > 1) + + - Passing a ``Series`` directly to a cython function expecting an ``ndarray`` type will no + long work directly, you must pass ``Series.values``, See :ref:`Enhancing Performance` + + - ``Series(0.5)`` would previously return the scalar ``0.5``, instead this will return a 1-element ``Series`` + + - This change breaks ``rpy2<=2.3.8``. an Issue has been opened against rpy2 and a workaround + is detailed in :issue:`5698`. Thanks @JanSchulz. + +- Pickle compatibility is preserved for pickles created prior to 0.13. These must be unpickled with ``pd.read_pickle``, see :ref:`Pickling`. + +- Refactor of series.py/frame.py/panel.py to move common code to generic.py + + - added ``_setup_axes`` to created generic NDFrame structures + - moved methods + + - ``from_axes,_wrap_array,axes,ix,loc,iloc,shape,empty,swapaxes,transpose,pop`` + - ``__iter__,keys,__contains__,__len__,__neg__,__invert__`` + - ``convert_objects,as_blocks,as_matrix,values`` + - ``__getstate__,__setstate__`` (compat remains in frame/panel) + - ``__getattr__,__setattr__`` + - ``_indexed_same,reindex_like,align,where,mask`` + - ``fillna,replace`` (``Series`` replace is now consistent with ``DataFrame``) + - ``filter`` (also added axis argument to selectively filter on a different axis) + - ``reindex,reindex_axis,take`` + - ``truncate`` (moved to become part of ``NDFrame``) + +- These are API changes which make ``Panel`` more consistent with ``DataFrame`` + + - ``swapaxes`` on a ``Panel`` with the same axes specified now return a copy + - support attribute access for setting + - filter supports the same API as the original ``DataFrame`` filter + +- Reindex called with no arguments will now return a copy of the input object + +- ``TimeSeries`` is now an alias for ``Series``. the property ``is_time_series`` + can be used to distinguish (if desired) + +- Refactor of Sparse objects to use BlockManager + + - Created a new block type in internals, ``SparseBlock``, which can hold multi-dtypes + and is non-consolidatable. ``SparseSeries`` and ``SparseDataFrame`` now inherit + more methods from there hierarchy (Series/DataFrame), and no longer inherit + from ``SparseArray`` (which instead is the object of the ``SparseBlock``) + - Sparse suite now supports integration with non-sparse data. Non-float sparse + data is supportable (partially implemented) + - Operations on sparse structures within DataFrames should preserve sparseness, + merging type operations will convert to dense (and back to sparse), so might + be somewhat inefficient + - enable setitem on ``SparseSeries`` for boolean/integer/slices + - ``SparsePanels`` implementation is unchanged (e.g. not using BlockManager, needs work) + +- added ``ftypes`` method to Series/DataFrame, similar to ``dtypes``, but indicates + if the underlying is sparse/dense (as well as the dtype) +- All ``NDFrame`` objects can now use ``__finalize__()`` to specify various + values to propagate to new objects from an existing one (e.g. ``name`` in ``Series`` will + follow more automatically now) +- Internal type checking is now done via a suite of generated classes, allowing ``isinstance(value, klass)`` + without having to directly import the klass, courtesy of @jtratner +- Bug in Series update where the parent frame is not updating its cache based on + changes (:issue:`4080`) or types (:issue:`3217`), fillna (:issue:`3386`) +- Indexing with dtype conversions fixed (:issue:`4463`, :issue:`4204`) +- Refactor ``Series.reindex`` to core/generic.py (:issue:`4604`, :issue:`4618`), allow ``method=`` in reindexing + on a Series to work +- ``Series.copy`` no longer accepts the ``order`` parameter and is now consistent with ``NDFrame`` copy +- Refactor ``rename`` methods to core/generic.py; fixes ``Series.rename`` for (:issue:`4605`), and adds ``rename`` + with the same signature for ``Panel`` +- Refactor ``clip`` methods to core/generic.py (:issue:`4798`) +- Refactor of ``_get_numeric_data/_get_bool_data`` to core/generic.py, allowing Series/Panel functionality +- ``Series`` (for index) / ``Panel`` (for items) now allow attribute access to its elements (:issue:`1903`) + + .. ipython:: python + + s = pd.Series([1, 2, 3], index=list('abc')) + s.b + s.a = 5 + s + +.. _release.bug_fixes-0.13.0: + +Bug fixes +~~~~~~~~~ + +- ``HDFStore`` + + - raising an invalid ``TypeError`` rather than ``ValueError`` when + appending with a different block ordering (:issue:`4096`) + - ``read_hdf`` was not respecting as passed ``mode`` (:issue:`4504`) + - appending a 0-len table will work correctly (:issue:`4273`) + - ``to_hdf`` was raising when passing both arguments ``append`` and + ``table`` (:issue:`4584`) + - reading from a store with duplicate columns across dtypes would raise + (:issue:`4767`) + - Fixed a bug where ``ValueError`` wasn't correctly raised when column + names weren't strings (:issue:`4956`) + - A zero length series written in Fixed format not deserializing properly. + (:issue:`4708`) + - Fixed decoding perf issue on pyt3 (:issue:`5441`) + - Validate levels in a MultiIndex before storing (:issue:`5527`) + - Correctly handle ``data_columns`` with a Panel (:issue:`5717`) +- Fixed bug in tslib.tz_convert(vals, tz1, tz2): it could raise IndexError + exception while trying to access trans[pos + 1] (:issue:`4496`) +- The ``by`` argument now works correctly with the ``layout`` argument + (:issue:`4102`, :issue:`4014`) in ``*.hist`` plotting methods +- Fixed bug in ``PeriodIndex.map`` where using ``str`` would return the str + representation of the index (:issue:`4136`) +- Fixed test failure ``test_time_series_plot_color_with_empty_kwargs`` when + using custom matplotlib default colors (:issue:`4345`) +- Fix running of stata IO tests. Now uses temporary files to write + (:issue:`4353`) +- Fixed an issue where ``DataFrame.sum`` was slower than ``DataFrame.mean`` + for integer valued frames (:issue:`4365`) +- ``read_html`` tests now work with Python 2.6 (:issue:`4351`) +- Fixed bug where ``network`` testing was throwing ``NameError`` because a + local variable was undefined (:issue:`4381`) +- In ``to_json``, raise if a passed ``orient`` would cause loss of data + because of a duplicate index (:issue:`4359`) +- In ``to_json``, fix date handling so milliseconds are the default timestamp + as the docstring says (:issue:`4362`). +- ``as_index`` is no longer ignored when doing groupby apply (:issue:`4648`, + :issue:`3417`) +- JSON NaT handling fixed, NaTs are now serialized to `null` (:issue:`4498`) +- Fixed JSON handling of escapable characters in JSON object keys + (:issue:`4593`) +- Fixed passing ``keep_default_na=False`` when ``na_values=None`` + (:issue:`4318`) +- Fixed bug with ``values`` raising an error on a DataFrame with duplicate + columns and mixed dtypes, surfaced in (:issue:`4377`) +- Fixed bug with duplicate columns and type conversion in ``read_json`` when + ``orient='split'`` (:issue:`4377`) +- Fixed JSON bug where locales with decimal separators other than '.' threw + exceptions when encoding / decoding certain values. (:issue:`4918`) +- Fix ``.iat`` indexing with a ``PeriodIndex`` (:issue:`4390`) +- Fixed an issue where ``PeriodIndex`` joining with self was returning a new + instance rather than the same instance (:issue:`4379`); also adds a test + for this for the other index types +- Fixed a bug with all the dtypes being converted to object when using the + CSV cparser with the usecols parameter (:issue:`3192`) +- Fix an issue in merging blocks where the resulting DataFrame had partially + set _ref_locs (:issue:`4403`) +- Fixed an issue where hist subplots were being overwritten when they were + called using the top level matplotlib API (:issue:`4408`) +- Fixed a bug where calling ``Series.astype(str)`` would truncate the string + (:issue:`4405`, :issue:`4437`) +- Fixed a py3 compat issue where bytes were being repr'd as tuples + (:issue:`4455`) +- Fixed Panel attribute naming conflict if item is named 'a' + (:issue:`3440`) +- Fixed an issue where duplicate indexes were raising when plotting + (:issue:`4486`) +- Fixed an issue where cumsum and cumprod didn't work with bool dtypes + (:issue:`4170`, :issue:`4440`) +- Fixed Panel slicing issued in ``xs`` that was returning an incorrect dimmed + object (:issue:`4016`) +- Fix resampling bug where custom reduce function not used if only one group + (:issue:`3849`, :issue:`4494`) +- Fixed Panel assignment with a transposed frame (:issue:`3830`) +- Raise on set indexing with a Panel and a Panel as a value which needs + alignment (:issue:`3777`) +- frozenset objects now raise in the ``Series`` constructor (:issue:`4482`, + :issue:`4480`) +- Fixed issue with sorting a duplicate MultiIndex that has multiple dtypes + (:issue:`4516`) +- Fixed bug in ``DataFrame.set_values`` which was causing name attributes to + be lost when expanding the index. (:issue:`3742`, :issue:`4039`) +- Fixed issue where individual ``names``, ``levels`` and ``labels`` could be + set on ``MultiIndex`` without validation (:issue:`3714`, :issue:`4039`) +- Fixed (:issue:`3334`) in pivot_table. Margins did not compute if values is + the index. +- Fix bug in having a rhs of ``np.timedelta64`` or ``np.offsets.DateOffset`` + when operating with datetimes (:issue:`4532`) +- Fix arithmetic with series/datetimeindex and ``np.timedelta64`` not working + the same (:issue:`4134`) and buggy timedelta in NumPy 1.6 (:issue:`4135`) +- Fix bug in ``pd.read_clipboard`` on windows with PY3 (:issue:`4561`); not + decoding properly +- ``tslib.get_period_field()`` and ``tslib.get_period_field_arr()`` now raise + if code argument out of range (:issue:`4519`, :issue:`4520`) +- Fix boolean indexing on an empty series loses index names (:issue:`4235`), + infer_dtype works with empty arrays. +- Fix reindexing with multiple axes; if an axes match was not replacing the + current axes, leading to a possible lazy frequency inference issue + (:issue:`3317`) +- Fixed issue where ``DataFrame.apply`` was reraising exceptions incorrectly + (causing the original stack trace to be truncated). +- Fix selection with ``ix/loc`` and non_unique selectors (:issue:`4619`) +- Fix assignment with iloc/loc involving a dtype change in an existing column + (:issue:`4312`, :issue:`5702`) have internal setitem_with_indexer in core/indexing + to use Block.setitem +- Fixed bug where thousands operator was not handled correctly for floating + point numbers in csv_import (:issue:`4322`) +- Fix an issue with CacheableOffset not properly being used by many + DateOffset; this prevented the DateOffset from being cached (:issue:`4609`) +- Fix boolean comparison with a DataFrame on the lhs, and a list/tuple on the + rhs (:issue:`4576`) +- Fix error/dtype conversion with setitem of ``None`` on ``Series/DataFrame`` + (:issue:`4667`) +- Fix decoding based on a passed in non-default encoding in ``pd.read_stata`` + (:issue:`4626`) +- Fix ``DataFrame.from_records`` with a plain-vanilla ``ndarray``. + (:issue:`4727`) +- Fix some inconsistencies with ``Index.rename`` and ``MultiIndex.rename``, + etc. (:issue:`4718`, :issue:`4628`) +- Bug in using ``iloc/loc`` with a cross-sectional and duplicate indices + (:issue:`4726`) +- Bug with using ``QUOTE_NONE`` with ``to_csv`` causing ``Exception``. + (:issue:`4328`) +- Bug with Series indexing not raising an error when the right-hand-side has + an incorrect length (:issue:`2702`) +- Bug in MultiIndexing with a partial string selection as one part of a + MultIndex (:issue:`4758`) +- Bug with reindexing on the index with a non-unique index will now raise + ``ValueError`` (:issue:`4746`) +- Bug in setting with ``loc/ix`` a single indexer with a MultiIndex axis and + a NumPy array, related to (:issue:`3777`) +- Bug in concatenation with duplicate columns across dtypes not merging with + axis=0 (:issue:`4771`, :issue:`4975`) +- Bug in ``iloc`` with a slice index failing (:issue:`4771`) +- Incorrect error message with no colspecs or width in ``read_fwf``. + (:issue:`4774`) +- Fix bugs in indexing in a Series with a duplicate index (:issue:`4548`, + :issue:`4550`) +- Fixed bug with reading compressed files with ``read_fwf`` in Python 3. + (:issue:`3963`) +- Fixed an issue with a duplicate index and assignment with a dtype change + (:issue:`4686`) +- Fixed bug with reading compressed files in as ``bytes`` rather than ``str`` + in Python 3. Simplifies bytes-producing file-handling in Python 3 + (:issue:`3963`, :issue:`4785`). +- Fixed an issue related to ticklocs/ticklabels with log scale bar plots + across different versions of matplotlib (:issue:`4789`) +- Suppressed DeprecationWarning associated with internal calls issued by + repr() (:issue:`4391`) +- Fixed an issue with a duplicate index and duplicate selector with ``.loc`` + (:issue:`4825`) +- Fixed an issue with ``DataFrame.sort_index`` where, when sorting by a + single column and passing a list for ``ascending``, the argument for + ``ascending`` was being interpreted as ``True`` (:issue:`4839`, + :issue:`4846`) +- Fixed ``Panel.tshift`` not working. Added `freq` support to ``Panel.shift`` + (:issue:`4853`) +- Fix an issue in TextFileReader w/ Python engine (i.e. PythonParser) + with thousands != "," (:issue:`4596`) +- Bug in getitem with a duplicate index when using where (:issue:`4879`) +- Fix Type inference code coerces float column into datetime (:issue:`4601`) +- Fixed ``_ensure_numeric`` does not check for complex numbers + (:issue:`4902`) +- Fixed a bug in ``Series.hist`` where two figures were being created when + the ``by`` argument was passed (:issue:`4112`, :issue:`4113`). +- Fixed a bug in ``convert_objects`` for > 2 ndims (:issue:`4937`) +- Fixed a bug in DataFrame/Panel cache insertion and subsequent indexing + (:issue:`4939`, :issue:`5424`) +- Fixed string methods for ``FrozenNDArray`` and ``FrozenList`` + (:issue:`4929`) +- Fixed a bug with setting invalid or out-of-range values in indexing + enlargement scenarios (:issue:`4940`) +- Tests for fillna on empty Series (:issue:`4346`), thanks @immerrr +- Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep + separate metadata. (:issue:`4202`, :issue:`4830`) +- Fixed skiprows option in Python parser for read_csv (:issue:`4382`) +- Fixed bug preventing ``cut`` from working with ``np.inf`` levels without + explicitly passing labels (:issue:`3415`) +- Fixed wrong check for overlapping in ``DatetimeIndex.union`` + (:issue:`4564`) +- Fixed conflict between thousands separator and date parser in csv_parser + (:issue:`4678`) +- Fix appending when dtypes are not the same (error showing mixing + float/np.datetime64) (:issue:`4993`) +- Fix repr for DateOffset. No longer show duplicate entries in kwds. + Removed unused offset fields. (:issue:`4638`) +- Fixed wrong index name during read_csv if using usecols. Applies to c + parser only. (:issue:`4201`) +- ``Timestamp`` objects can now appear in the left hand side of a comparison + operation with a ``Series`` or ``DataFrame`` object (:issue:`4982`). +- Fix a bug when indexing with ``np.nan`` via ``iloc/loc`` (:issue:`5016`) +- Fixed a bug where low memory c parser could create different types in + different chunks of the same file. Now coerces to numerical type or raises + warning. (:issue:`3866`) +- Fix a bug where reshaping a ``Series`` to its own shape raised + ``TypeError`` (:issue:`4554`) and other reshaping issues. +- Bug in setting with ``ix/loc`` and a mixed int/string index (:issue:`4544`) +- Make sure series-series boolean comparisons are label based (:issue:`4947`) +- Bug in multi-level indexing with a Timestamp partial indexer + (:issue:`4294`) +- Tests/fix for MultiIndex construction of an all-nan frame (:issue:`4078`) +- Fixed a bug where :func:`~pandas.read_html` wasn't correctly inferring + values of tables with commas (:issue:`5029`) +- Fixed a bug where :func:`~pandas.read_html` wasn't providing a stable + ordering of returned tables (:issue:`4770`, :issue:`5029`). +- Fixed a bug where :func:`~pandas.read_html` was incorrectly parsing when + passed ``index_col=0`` (:issue:`5066`). +- Fixed a bug where :func:`~pandas.read_html` was incorrectly inferring the + type of headers (:issue:`5048`). +- Fixed a bug where ``DatetimeIndex`` joins with ``PeriodIndex`` caused a + stack overflow (:issue:`3899`). +- Fixed a bug where ``groupby`` objects didn't allow plots (:issue:`5102`). +- Fixed a bug where ``groupby`` objects weren't tab-completing column names + (:issue:`5102`). +- Fixed a bug where ``groupby.plot()`` and friends were duplicating figures + multiple times (:issue:`5102`). +- Provide automatic conversion of ``object`` dtypes on fillna, related + (:issue:`5103`) +- Fixed a bug where default options were being overwritten in the option + parser cleaning (:issue:`5121`). +- Treat a list/ndarray identically for ``iloc`` indexing with list-like + (:issue:`5006`) +- Fix ``MultiIndex.get_level_values()`` with missing values (:issue:`5074`) +- Fix bound checking for Timestamp() with datetime64 input (:issue:`4065`) +- Fix a bug where ``TestReadHtml`` wasn't calling the correct ``read_html()`` + function (:issue:`5150`). +- Fix a bug with ``NDFrame.replace()`` which made replacement appear as + though it was (incorrectly) using regular expressions (:issue:`5143`). +- Fix better error message for to_datetime (:issue:`4928`) +- Made sure different locales are tested on travis-ci (:issue:`4918`). Also + adds a couple of utilities for getting locales and setting locales with a + context manager. +- Fixed segfault on ``isnull(MultiIndex)`` (now raises an error instead) + (:issue:`5123`, :issue:`5125`) +- Allow duplicate indices when performing operations that align + (:issue:`5185`, :issue:`5639`) +- Compound dtypes in a constructor raise ``NotImplementedError`` + (:issue:`5191`) +- Bug in comparing duplicate frames (:issue:`4421`) related +- Bug in describe on duplicate frames +- Bug in ``to_datetime`` with a format and ``coerce=True`` not raising + (:issue:`5195`) +- Bug in ``loc`` setting with multiple indexers and a rhs of a Series that + needs broadcasting (:issue:`5206`) +- Fixed bug where inplace setting of levels or labels on ``MultiIndex`` would + not clear cached ``values`` property and therefore return wrong ``values``. + (:issue:`5215`) +- Fixed bug where filtering a grouped DataFrame or Series did not maintain + the original ordering (:issue:`4621`). +- Fixed ``Period`` with a business date freq to always roll-forward if on a + non-business date. (:issue:`5203`) +- Fixed bug in Excel writers where frames with duplicate column names weren't + written correctly. (:issue:`5235`) +- Fixed issue with ``drop`` and a non-unique index on Series (:issue:`5248`) +- Fixed segfault in C parser caused by passing more names than columns in + the file. (:issue:`5156`) +- Fix ``Series.isin`` with date/time-like dtypes (:issue:`5021`) +- C and Python Parser can now handle the more common MultiIndex column + format which doesn't have a row for index names (:issue:`4702`) +- Bug when trying to use an out-of-bounds date as an object dtype + (:issue:`5312`) +- Bug when trying to display an embedded PandasObject (:issue:`5324`) +- Allows operating of Timestamps to return a datetime if the result is out-of-bounds + related (:issue:`5312`) +- Fix return value/type signature of ``initObjToJSON()`` to be compatible + with numpy's ``import_array()`` (:issue:`5334`, :issue:`5326`) +- Bug when renaming then set_index on a DataFrame (:issue:`5344`) +- Test suite no longer leaves around temporary files when testing graphics. (:issue:`5347`) + (thanks for catching this @yarikoptic!) +- Fixed html tests on win32. (:issue:`4580`) +- Make sure that ``head/tail`` are ``iloc`` based, (:issue:`5370`) +- Fixed bug for ``PeriodIndex`` string representation if there are 1 or 2 + elements. (:issue:`5372`) +- The GroupBy methods ``transform`` and ``filter`` can be used on Series + and DataFrames that have repeated (non-unique) indices. (:issue:`4620`) +- Fix empty series not printing name in repr (:issue:`4651`) +- Make tests create temp files in temp directory by default. (:issue:`5419`) +- ``pd.to_timedelta`` of a scalar returns a scalar (:issue:`5410`) +- ``pd.to_timedelta`` accepts ``NaN`` and ``NaT``, returning ``NaT`` instead of raising (:issue:`5437`) +- performance improvements in ``isnull`` on larger size pandas objects +- Fixed various setitem with 1d ndarray that does not have a matching + length to the indexer (:issue:`5508`) +- Bug in getitem with a MultiIndex and ``iloc`` (:issue:`5528`) +- Bug in delitem on a Series (:issue:`5542`) +- Bug fix in apply when using custom function and objects are not mutated (:issue:`5545`) +- Bug in selecting from a non-unique index with ``loc`` (:issue:`5553`) +- Bug in groupby returning non-consistent types when user function returns a ``None``, (:issue:`5592`) +- Work around regression in numpy 1.7.0 which erroneously raises IndexError from ``ndarray.item`` (:issue:`5666`) +- Bug in repeated indexing of object with resultant non-unique index (:issue:`5678`) +- Bug in fillna with Series and a passed series/dict (:issue:`5703`) +- Bug in groupby transform with a datetime-like grouper (:issue:`5712`) +- Bug in MultiIndex selection in PY3 when using certain keys (:issue:`5725`) +- Row-wise concat of differing dtypes failing in certain cases (:issue:`5754`) + +.. _whatsnew_0.13.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.12.0..v0.13.0 diff --git a/doc/source/whatsnew/v0.13.1.rst b/doc/source/whatsnew/v0.13.1.rst new file mode 100644 index 00000000..4f9ab761 --- /dev/null +++ b/doc/source/whatsnew/v0.13.1.rst @@ -0,0 +1,477 @@ +.. _whatsnew_0131: + +v0.13.1 (February 3, 2014) +-------------------------- + +{{ header }} + + + +This is a minor release from 0.13.0 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +- Added ``infer_datetime_format`` keyword to ``read_csv/to_datetime`` to allow speedups for homogeneously formatted datetimes. +- Will intelligently limit display precision for datetime/timedelta formats. +- Enhanced Panel :meth:`~pandas.Panel.apply` method. +- Suggested tutorials in new :ref:`Tutorials` section. +- Our pandas ecosystem is growing, We now feature related projects in a new :ref:`Pandas Ecosystem` section. +- Much work has been taking place on improving the docs, and a new :ref:`Contributing` section has been added. +- Even though it may only be of interest to devs, we <3 our new CI status page: `ScatterCI `__. + +.. warning:: + + 0.13.1 fixes a bug that was caused by a combination of having numpy < 1.8, and doing + chained assignment on a string-like array. Please review :ref:`the docs`, + chained indexing can have unexpected results and should generally be avoided. + + This would previously segfault: + + .. ipython:: python + + df = pd.DataFrame({'A': np.array(['foo', 'bar', 'bah', 'foo', 'bar'])}) + df['A'].iloc[0] = np.nan + df + + The recommended way to do this type of assignment is: + + .. ipython:: python + + df = pd.DataFrame({'A': np.array(['foo', 'bar', 'bah', 'foo', 'bar'])}) + df.loc[0, 'A'] = np.nan + df + +Output formatting enhancements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- df.info() view now display dtype info per column (:issue:`5682`) + +- df.info() now honors the option ``max_info_rows``, to disable null counts for large frames (:issue:`5974`) + + .. ipython:: python + + max_info_rows = pd.get_option('max_info_rows') + + df = pd.DataFrame({'A': np.random.randn(10), + 'B': np.random.randn(10), + 'C': pd.date_range('20130101', periods=10) + }) + df.iloc[3:6, [0, 2]] = np.nan + + .. ipython:: python + + # set to not display the null counts + pd.set_option('max_info_rows', 0) + df.info() + + .. ipython:: python + + # this is the default (same as in 0.13.0) + pd.set_option('max_info_rows', max_info_rows) + df.info() + +- Add ``show_dimensions`` display option for the new DataFrame repr to control whether the dimensions print. + + .. ipython:: python + + df = pd.DataFrame([[1, 2], [3, 4]]) + pd.set_option('show_dimensions', False) + df + + pd.set_option('show_dimensions', True) + df + +- The ``ArrayFormatter`` for ``datetime`` and ``timedelta64`` now intelligently + limit precision based on the values in the array (:issue:`3401`) + + Previously output might look like: + + .. code-block:: text + + age today diff + 0 2001-01-01 00:00:00 2013-04-19 00:00:00 4491 days, 00:00:00 + 1 2004-06-01 00:00:00 2013-04-19 00:00:00 3244 days, 00:00:00 + + Now the output looks like: + + .. ipython:: python + + df = pd.DataFrame([pd.Timestamp('20010101'), + pd.Timestamp('20040601')], columns=['age']) + df['today'] = pd.Timestamp('20130419') + df['diff'] = df['today'] - df['age'] + df + +API changes +~~~~~~~~~~~ + +- Add ``-NaN`` and ``-nan`` to the default set of NA values (:issue:`5952`). + See :ref:`NA Values `. + +- Added ``Series.str.get_dummies`` vectorized string method (:issue:`6021`), to extract + dummy/indicator variables for separated string columns: + + .. ipython:: python + + s = pd.Series(['a', 'a|b', np.nan, 'a|c']) + s.str.get_dummies(sep='|') + +- Added the ``NDFrame.equals()`` method to compare if two NDFrames are + equal have equal axes, dtypes, and values. Added the + ``array_equivalent`` function to compare if two ndarrays are + equal. NaNs in identical locations are treated as + equal. (:issue:`5283`) See also :ref:`the docs` for a motivating example. + + .. code-block:: python + + df = pd.DataFrame({'col': ['foo', 0, np.nan]}) + df2 = pd.DataFrame({'col': [np.nan, 0, 'foo']}, index=[2, 1, 0]) + df.equals(df2) + df.equals(df2.sort_index()) + +- ``DataFrame.apply`` will use the ``reduce`` argument to determine whether a + ``Series`` or a ``DataFrame`` should be returned when the ``DataFrame`` is + empty (:issue:`6007`). + + Previously, calling ``DataFrame.apply`` an empty ``DataFrame`` would return + either a ``DataFrame`` if there were no columns, or the function being + applied would be called with an empty ``Series`` to guess whether a + ``Series`` or ``DataFrame`` should be returned: + + .. code-block:: ipython + + In [32]: def applied_func(col): + ....: print("Apply function being called with: ", col) + ....: return col.sum() + ....: + + In [33]: empty = DataFrame(columns=['a', 'b']) + + In [34]: empty.apply(applied_func) + Apply function being called with: Series([], Length: 0, dtype: float64) + Out[34]: + a NaN + b NaN + Length: 2, dtype: float64 + + Now, when ``apply`` is called on an empty ``DataFrame``: if the ``reduce`` + argument is ``True`` a ``Series`` will returned, if it is ``False`` a + ``DataFrame`` will be returned, and if it is ``None`` (the default) the + function being applied will be called with an empty series to try and guess + the return type. + + .. code-block:: ipython + + In [35]: empty.apply(applied_func, reduce=True) + Out[35]: + a NaN + b NaN + Length: 2, dtype: float64 + + In [36]: empty.apply(applied_func, reduce=False) + Out[36]: + Empty DataFrame + Columns: [a, b] + Index: [] + + [0 rows x 2 columns] + + +Prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are no announced changes in 0.13 or prior that are taking effect as of 0.13.1 + +Deprecations +~~~~~~~~~~~~ + +There are no deprecations of prior behavior in 0.13.1 + +Enhancements +~~~~~~~~~~~~ + +- ``pd.read_csv`` and ``pd.to_datetime`` learned a new ``infer_datetime_format`` keyword which greatly + improves parsing perf in many cases. Thanks to @lexual for suggesting and @danbirken + for rapidly implementing. (:issue:`5490`, :issue:`6021`) + + If ``parse_dates`` is enabled and this flag is set, pandas will attempt to + infer the format of the datetime strings in the columns, and if it can + be inferred, switch to a faster method of parsing them. In some cases + this can increase the parsing speed by ~5-10x. + + .. code-block:: python + + # Try to infer the format for the index column + df = pd.read_csv('foo.csv', index_col=0, parse_dates=True, + infer_datetime_format=True) + +- ``date_format`` and ``datetime_format`` keywords can now be specified when writing to ``excel`` + files (:issue:`4133`) + +- ``MultiIndex.from_product`` convenience function for creating a MultiIndex from + the cartesian product of a set of iterables (:issue:`6055`): + + .. ipython:: python + + shades = ['light', 'dark'] + colors = ['red', 'green', 'blue'] + + pd.MultiIndex.from_product([shades, colors], names=['shade', 'color']) + +- Panel :meth:`~pandas.Panel.apply` will work on non-ufuncs. See :ref:`the docs`. + + .. code-block:: ipython + + In [28]: import pandas._testing as tm + + In [29]: panel = tm.makePanel(5) + + In [30]: panel + Out[30]: + + Dimensions: 3 (items) x 5 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-07 00:00:00 + Minor_axis axis: A to D + + In [31]: panel['ItemA'] + Out[31]: + A B C D + 2000-01-03 -0.673690 0.577046 -1.344312 -1.469388 + 2000-01-04 0.113648 -1.715002 0.844885 0.357021 + 2000-01-05 -1.478427 -1.039268 1.075770 -0.674600 + 2000-01-06 0.524988 -0.370647 -0.109050 -1.776904 + 2000-01-07 0.404705 -1.157892 1.643563 -0.968914 + + [5 rows x 4 columns] + + Specifying an ``apply`` that operates on a Series (to return a single element) + + .. code-block:: ipython + + In [32]: panel.apply(lambda x: x.dtype, axis='items') + Out[32]: + A B C D + 2000-01-03 float64 float64 float64 float64 + 2000-01-04 float64 float64 float64 float64 + 2000-01-05 float64 float64 float64 float64 + 2000-01-06 float64 float64 float64 float64 + 2000-01-07 float64 float64 float64 float64 + + [5 rows x 4 columns] + + A similar reduction type operation + + .. code-block:: ipython + + In [33]: panel.apply(lambda x: x.sum(), axis='major_axis') + Out[33]: + ItemA ItemB ItemC + A -1.108775 -1.090118 -2.984435 + B -3.705764 0.409204 1.866240 + C 2.110856 2.960500 -0.974967 + D -4.532785 0.303202 -3.685193 + + [4 rows x 3 columns] + + This is equivalent to + + .. code-block:: ipython + + In [34]: panel.sum('major_axis') + Out[34]: + ItemA ItemB ItemC + A -1.108775 -1.090118 -2.984435 + B -3.705764 0.409204 1.866240 + C 2.110856 2.960500 -0.974967 + D -4.532785 0.303202 -3.685193 + + [4 rows x 3 columns] + + A transformation operation that returns a Panel, but is computing + the z-score across the major_axis + + .. code-block:: ipython + + In [35]: result = panel.apply(lambda x: (x - x.mean()) / x.std(), + ....: axis='major_axis') + ....: + + In [36]: result + Out[36]: + + Dimensions: 3 (items) x 5 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-07 00:00:00 + Minor_axis axis: A to D + + In [37]: result['ItemA'] # noqa E999 + Out[37]: + A B C D + 2000-01-03 -0.535778 1.500802 -1.506416 -0.681456 + 2000-01-04 0.397628 -1.108752 0.360481 1.529895 + 2000-01-05 -1.489811 -0.339412 0.557374 0.280845 + 2000-01-06 0.885279 0.421830 -0.453013 -1.053785 + 2000-01-07 0.742682 -0.474468 1.041575 -0.075499 + + [5 rows x 4 columns] + +- Panel :meth:`~pandas.Panel.apply` operating on cross-sectional slabs. (:issue:`1148`) + + .. code-block:: ipython + + In [38]: def f(x): + ....: return ((x.T - x.mean(1)) / x.std(1)).T + ....: + + In [39]: result = panel.apply(f, axis=['items', 'major_axis']) + + In [40]: result + Out[40]: + + Dimensions: 4 (items) x 5 (major_axis) x 3 (minor_axis) + Items axis: A to D + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-07 00:00:00 + Minor_axis axis: ItemA to ItemC + + In [41]: result.loc[:, :, 'ItemA'] + Out[41]: + A B C D + 2000-01-03 0.012922 -0.030874 -0.629546 -0.757034 + 2000-01-04 0.392053 -1.071665 0.163228 0.548188 + 2000-01-05 -1.093650 -0.640898 0.385734 -1.154310 + 2000-01-06 1.005446 -1.154593 -0.595615 -0.809185 + 2000-01-07 0.783051 -0.198053 0.919339 -1.052721 + + [5 rows x 4 columns] + + This is equivalent to the following + + .. code-block:: ipython + + In [42]: result = pd.Panel({ax: f(panel.loc[:, :, ax]) for ax in panel.minor_axis}) + + In [43]: result + Out[43]: + + Dimensions: 4 (items) x 5 (major_axis) x 3 (minor_axis) + Items axis: A to D + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-07 00:00:00 + Minor_axis axis: ItemA to ItemC + + In [44]: result.loc[:, :, 'ItemA'] + Out[44]: + A B C D + 2000-01-03 0.012922 -0.030874 -0.629546 -0.757034 + 2000-01-04 0.392053 -1.071665 0.163228 0.548188 + 2000-01-05 -1.093650 -0.640898 0.385734 -1.154310 + 2000-01-06 1.005446 -1.154593 -0.595615 -0.809185 + 2000-01-07 0.783051 -0.198053 0.919339 -1.052721 + + [5 rows x 4 columns] + +Performance +~~~~~~~~~~~ + +Performance improvements for 0.13.1 + +- Series datetime/timedelta binary operations (:issue:`5801`) +- DataFrame ``count/dropna`` for ``axis=1`` +- Series.str.contains now has a `regex=False` keyword which can be faster for plain (non-regex) string patterns. (:issue:`5879`) +- Series.str.extract (:issue:`5944`) +- ``dtypes/ftypes`` methods (:issue:`5968`) +- indexing with object dtypes (:issue:`5968`) +- ``DataFrame.apply`` (:issue:`6013`) +- Regression in JSON IO (:issue:`5765`) +- Index construction from Series (:issue:`6150`) + +Experimental +~~~~~~~~~~~~ + +There are no experimental changes in 0.13.1 + +.. _release.bug_fixes-0.13.1: + +Bug fixes +~~~~~~~~~ + +- Bug in ``io.wb.get_countries`` not including all countries (:issue:`6008`) +- Bug in Series replace with timestamp dict (:issue:`5797`) +- read_csv/read_table now respects the `prefix` kwarg (:issue:`5732`). +- Bug in selection with missing values via ``.ix`` from a duplicate indexed DataFrame failing (:issue:`5835`) +- Fix issue of boolean comparison on empty DataFrames (:issue:`5808`) +- Bug in isnull handling ``NaT`` in an object array (:issue:`5443`) +- Bug in ``to_datetime`` when passed a ``np.nan`` or integer datelike and a format string (:issue:`5863`) +- Bug in groupby dtype conversion with datetimelike (:issue:`5869`) +- Regression in handling of empty Series as indexers to Series (:issue:`5877`) +- Bug in internal caching, related to (:issue:`5727`) +- Testing bug in reading JSON/msgpack from a non-filepath on windows under py3 (:issue:`5874`) +- Bug when assigning to .ix[tuple(...)] (:issue:`5896`) +- Bug in fully reindexing a Panel (:issue:`5905`) +- Bug in idxmin/max with object dtypes (:issue:`5914`) +- Bug in ``BusinessDay`` when adding n days to a date not on offset when n>5 and n%5==0 (:issue:`5890`) +- Bug in assigning to chained series with a series via ix (:issue:`5928`) +- Bug in creating an empty DataFrame, copying, then assigning (:issue:`5932`) +- Bug in DataFrame.tail with empty frame (:issue:`5846`) +- Bug in propagating metadata on ``resample`` (:issue:`5862`) +- Fixed string-representation of ``NaT`` to be "NaT" (:issue:`5708`) +- Fixed string-representation for Timestamp to show nanoseconds if present (:issue:`5912`) +- ``pd.match`` not returning passed sentinel +- ``Panel.to_frame()`` no longer fails when ``major_axis`` is a + ``MultiIndex`` (:issue:`5402`). +- Bug in ``pd.read_msgpack`` with inferring a ``DateTimeIndex`` frequency + incorrectly (:issue:`5947`) +- Fixed ``to_datetime`` for array with both Tz-aware datetimes and ``NaT``'s (:issue:`5961`) +- Bug in rolling skew/kurtosis when passed a Series with bad data (:issue:`5749`) +- Bug in scipy ``interpolate`` methods with a datetime index (:issue:`5975`) +- Bug in NaT comparison if a mixed datetime/np.datetime64 with NaT were passed (:issue:`5968`) +- Fixed bug with ``pd.concat`` losing dtype information if all inputs are empty (:issue:`5742`) +- Recent changes in IPython cause warnings to be emitted when using previous versions + of pandas in QTConsole, now fixed. If you're using an older version and + need to suppress the warnings, see (:issue:`5922`). +- Bug in merging ``timedelta`` dtypes (:issue:`5695`) +- Bug in plotting.scatter_matrix function. Wrong alignment among diagonal + and off-diagonal plots, see (:issue:`5497`). +- Regression in Series with a MultiIndex via ix (:issue:`6018`) +- Bug in Series.xs with a MultiIndex (:issue:`6018`) +- Bug in Series construction of mixed type with datelike and an integer (which should result in + object type and not automatic conversion) (:issue:`6028`) +- Possible segfault when chained indexing with an object array under NumPy 1.7.1 (:issue:`6026`, :issue:`6056`) +- Bug in setting using fancy indexing a single element with a non-scalar (e.g. a list), + (:issue:`6043`) +- ``to_sql`` did not respect ``if_exists`` (:issue:`4110` :issue:`4304`) +- Regression in ``.get(None)`` indexing from 0.12 (:issue:`5652`) +- Subtle ``iloc`` indexing bug, surfaced in (:issue:`6059`) +- Bug with insert of strings into DatetimeIndex (:issue:`5818`) +- Fixed unicode bug in to_html/HTML repr (:issue:`6098`) +- Fixed missing arg validation in get_options_data (:issue:`6105`) +- Bug in assignment with duplicate columns in a frame where the locations + are a slice (e.g. next to each other) (:issue:`6120`) +- Bug in propagating _ref_locs during construction of a DataFrame with dups + index/columns (:issue:`6121`) +- Bug in ``DataFrame.apply`` when using mixed datelike reductions (:issue:`6125`) +- Bug in ``DataFrame.append`` when appending a row with different columns (:issue:`6129`) +- Bug in DataFrame construction with recarray and non-ns datetime dtype (:issue:`6140`) +- Bug in ``.loc`` setitem indexing with a dataframe on rhs, multiple item setting, and + a datetimelike (:issue:`6152`) +- Fixed a bug in ``query``/``eval`` during lexicographic string comparisons (:issue:`6155`). +- Fixed a bug in ``query`` where the index of a single-element ``Series`` was + being thrown away (:issue:`6148`). +- Bug in ``HDFStore`` on appending a dataframe with MultiIndexed columns to + an existing table (:issue:`6167`) +- Consistency with dtypes in setting an empty DataFrame (:issue:`6171`) +- Bug in selecting on a MultiIndex ``HDFStore`` even in the presence of under + specified column spec (:issue:`6169`) +- Bug in ``nanops.var`` with ``ddof=1`` and 1 elements would sometimes return ``inf`` + rather than ``nan`` on some platforms (:issue:`6136`) +- Bug in Series and DataFrame bar plots ignoring the ``use_index`` keyword (:issue:`6209`) +- Bug in groupby with mixed str/int under python3 fixed; ``argsort`` was failing (:issue:`6212`) + +.. _whatsnew_0.13.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.13.0..v0.13.1 diff --git a/doc/source/whatsnew/v0.14.0.rst b/doc/source/whatsnew/v0.14.0.rst new file mode 100644 index 00000000..25a75492 --- /dev/null +++ b/doc/source/whatsnew/v0.14.0.rst @@ -0,0 +1,1087 @@ +.. _whatsnew_0140: + +v0.14.0 (May 31 , 2014) +----------------------- + +{{ header }} + + +This is a major release from 0.13.1 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +- Highlights include: + + - Officially support Python 3.4 + - SQL interfaces updated to use ``sqlalchemy``, See :ref:`Here`. + - Display interface changes, See :ref:`Here` + - MultiIndexing Using Slicers, See :ref:`Here`. + - Ability to join a singly-indexed DataFrame with a MultiIndexed DataFrame, see :ref:`Here ` + - More consistency in groupby results and more flexible groupby specifications, See :ref:`Here` + - Holiday calendars are now supported in ``CustomBusinessDay``, see :ref:`Here ` + - Several improvements in plotting functions, including: hexbin, area and pie plots, see :ref:`Here`. + - Performance doc section on I/O operations, See :ref:`Here ` + +- :ref:`Other Enhancements ` + +- :ref:`API Changes ` + +- :ref:`Text Parsing API Changes ` + +- :ref:`Groupby API Changes ` + +- :ref:`Performance Improvements ` + +- :ref:`Prior Deprecations ` + +- :ref:`Deprecations ` + +- :ref:`Known Issues ` + +- :ref:`Bug Fixes ` + +.. warning:: + + In 0.14.0 all ``NDFrame`` based containers have undergone significant internal refactoring. Before that each block of + homogeneous data had its own labels and extra care was necessary to keep those in sync with the parent container's labels. + This should not have any visible user/API behavior changes (:issue:`6745`) + +.. _whatsnew_0140.api: + +API changes +~~~~~~~~~~~ + +- ``read_excel`` uses 0 as the default sheet (:issue:`6573`) +- ``iloc`` will now accept out-of-bounds indexers for slices, e.g. a value that exceeds the length of the object being + indexed. These will be excluded. This will make pandas conform more with python/numpy indexing of out-of-bounds + values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise + ``IndexError`` (:issue:`6296`, :issue:`6299`). This could result in an empty axis (e.g. an empty DataFrame being returned) + + .. ipython:: python + + dfl = pd.DataFrame(np.random.randn(5, 2), columns=list('AB')) + dfl + dfl.iloc[:, 2:3] + dfl.iloc[:, 1:3] + dfl.iloc[4:6] + + These are out-of-bounds selections + + .. code-block:: python + + >>> dfl.iloc[[4, 5, 6]] + IndexError: positional indexers are out-of-bounds + + >>> dfl.iloc[:, 4] + IndexError: single positional indexer is out-of-bounds + +- Slicing with negative start, stop & step values handles corner cases better (:issue:`6531`): + + - ``df.iloc[:-len(df)]`` is now empty + - ``df.iloc[len(df)::-1]`` now enumerates all elements in reverse + +- The :meth:`DataFrame.interpolate` keyword ``downcast`` default has been changed from ``infer`` to + ``None``. This is to preserve the original dtype unless explicitly requested otherwise (:issue:`6290`). +- When converting a dataframe to HTML it used to return `Empty DataFrame`. This special case has + been removed, instead a header with the column names is returned (:issue:`6062`). +- ``Series`` and ``Index`` now internally share more common operations, e.g. ``factorize(),nunique(),value_counts()`` are + now supported on ``Index`` types as well. The ``Series.weekday`` property from is removed + from Series for API consistency. Using a ``DatetimeIndex/PeriodIndex`` method on a Series will now raise a ``TypeError``. + (:issue:`4551`, :issue:`4056`, :issue:`5519`, :issue:`6380`, :issue:`7206`). + +- Add ``is_month_start``, ``is_month_end``, ``is_quarter_start``, ``is_quarter_end``, ``is_year_start``, ``is_year_end`` accessors for ``DateTimeIndex`` / ``Timestamp`` which return a boolean array of whether the timestamp(s) are at the start/end of the month/quarter/year defined by the frequency of the ``DateTimeIndex`` / ``Timestamp`` (:issue:`4565`, :issue:`6998`) + +- Local variable usage has changed in + :func:`pandas.eval`/:meth:`DataFrame.eval`/:meth:`DataFrame.query` + (:issue:`5987`). For the :class:`~pandas.DataFrame` methods, two things have + changed + + - Column names are now given precedence over locals + - Local variables must be referred to explicitly. This means that even if + you have a local variable that is *not* a column you must still refer to + it with the ``'@'`` prefix. + - You can have an expression like ``df.query('@a < a')`` with no complaints + from ``pandas`` about ambiguity of the name ``a``. + - The top-level :func:`pandas.eval` function does not allow you use the + ``'@'`` prefix and provides you with an error message telling you so. + - ``NameResolutionError`` was removed because it isn't necessary anymore. + +- Define and document the order of column vs index names in query/eval (:issue:`6676`) +- ``concat`` will now concatenate mixed Series and DataFrames using the Series name + or numbering columns as needed (:issue:`2385`). See :ref:`the docs ` +- Slicing and advanced/boolean indexing operations on ``Index`` classes as well + as :meth:`Index.delete` and :meth:`Index.drop` methods will no longer change the type of the + resulting index (:issue:`6440`, :issue:`7040`) + + .. ipython:: python + + i = pd.Index([1, 2, 3, 'a', 'b', 'c']) + i[[0, 1, 2]] + i.drop(['a', 'b', 'c']) + + Previously, the above operation would return ``Int64Index``. If you'd like + to do this manually, use :meth:`Index.astype` + + .. ipython:: python + + i[[0, 1, 2]].astype(np.int_) + +- ``set_index`` no longer converts MultiIndexes to an Index of tuples. For example, + the old behavior returned an Index in this case (:issue:`6459`): + + .. ipython:: python + :suppress: + + np.random.seed(1234) + from itertools import product + tuples = list(product(('a', 'b'), ('c', 'd'))) + mi = pd.MultiIndex.from_tuples(tuples) + df_multi = pd.DataFrame(np.random.randn(4, 2), index=mi) + tuple_ind = pd.Index(tuples, tupleize_cols=False) + df_multi.index + + .. ipython:: python + + # Old behavior, casted MultiIndex to an Index + tuple_ind + df_multi.set_index(tuple_ind) + + # New behavior + mi + df_multi.set_index(mi) + + This also applies when passing multiple indices to ``set_index``: + + .. ipython:: python + + @suppress + df_multi.index = tuple_ind + + # Old output, 2-level MultiIndex of tuples + df_multi.set_index([df_multi.index, df_multi.index]) + + @suppress + df_multi.index = mi + + # New output, 4-level MultiIndex + df_multi.set_index([df_multi.index, df_multi.index]) + +- ``pairwise`` keyword was added to the statistical moment functions + ``rolling_cov``, ``rolling_corr``, ``ewmcov``, ``ewmcorr``, + ``expanding_cov``, ``expanding_corr`` to allow the calculation of moving + window covariance and correlation matrices (:issue:`4950`). See + :ref:`Computing rolling pairwise covariances and correlations + ` in the docs. + + .. code-block:: ipython + + In [1]: df = pd.DataFrame(np.random.randn(10, 4), columns=list('ABCD')) + + In [4]: covs = pd.rolling_cov(df[['A', 'B', 'C']], + ....: df[['B', 'C', 'D']], + ....: 5, + ....: pairwise=True) + + + In [5]: covs[df.index[-1]] + Out[5]: + B C D + A 0.035310 0.326593 -0.505430 + B 0.137748 -0.006888 -0.005383 + C -0.006888 0.861040 0.020762 + +- ``Series.iteritems()`` is now lazy (returns an iterator rather than a list). This was the documented behavior prior to 0.14. (:issue:`6760`) + +- Added ``nunique`` and ``value_counts`` functions to ``Index`` for counting unique elements. (:issue:`6734`) +- ``stack`` and ``unstack`` now raise a ``ValueError`` when the ``level`` keyword refers + to a non-unique item in the ``Index`` (previously raised a ``KeyError``). (:issue:`6738`) +- drop unused order argument from ``Series.sort``; args now are in the same order as ``Series.order``; + add ``na_position`` arg to conform to ``Series.order`` (:issue:`6847`) +- default sorting algorithm for ``Series.order`` is now ``quicksort``, to conform with ``Series.sort`` + (and numpy defaults) +- add ``inplace`` keyword to ``Series.order/sort`` to make them inverses (:issue:`6859`) +- ``DataFrame.sort`` now places NaNs at the beginning or end of the sort according to the ``na_position`` parameter. (:issue:`3917`) +- accept ``TextFileReader`` in ``concat``, which was affecting a common user idiom (:issue:`6583`), this was a regression + from 0.13.1 +- Added ``factorize`` functions to ``Index`` and ``Series`` to get indexer and unique values (:issue:`7090`) +- ``describe`` on a DataFrame with a mix of Timestamp and string like objects returns a different Index (:issue:`7088`). + Previously the index was unintentionally sorted. +- Arithmetic operations with **only** ``bool`` dtypes now give a warning indicating + that they are evaluated in Python space for ``+``, ``-``, + and ``*`` operations and raise for all others (:issue:`7011`, :issue:`6762`, + :issue:`7015`, :issue:`7210`) + + .. code-block:: python + + >>> x = pd.Series(np.random.rand(10) > 0.5) + >>> y = True + >>> x + y # warning generated: should do x | y instead + UserWarning: evaluating in Python space because the '+' operator is not + supported by numexpr for the bool dtype, use '|' instead + >>> x / y # this raises because it doesn't make sense + NotImplementedError: operator '/' not implemented for bool dtypes + +- In ``HDFStore``, ``select_as_multiple`` will always raise a ``KeyError``, when a key or the selector is not found (:issue:`6177`) +- ``df['col'] = value`` and ``df.loc[:,'col'] = value`` are now completely equivalent; + previously the ``.loc`` would not necessarily coerce the dtype of the resultant series (:issue:`6149`) +- ``dtypes`` and ``ftypes`` now return a series with ``dtype=object`` on empty containers (:issue:`5740`) +- ``df.to_csv`` will now return a string of the CSV data if neither a target path nor a buffer is provided + (:issue:`6061`) +- ``pd.infer_freq()`` will now raise a ``TypeError`` if given an invalid ``Series/Index`` + type (:issue:`6407`, :issue:`6463`) +- A tuple passed to ``DataFame.sort_index`` will be interpreted as the levels of + the index, rather than requiring a list of tuple (:issue:`4370`) +- all offset operations now return ``Timestamp`` types (rather than datetime), Business/Week frequencies were incorrect (:issue:`4069`) +- ``to_excel`` now converts ``np.inf`` into a string representation, + customizable by the ``inf_rep`` keyword argument (Excel has no native inf + representation) (:issue:`6782`) +- Replace ``pandas.compat.scipy.scoreatpercentile`` with ``numpy.percentile`` (:issue:`6810`) +- ``.quantile`` on a ``datetime[ns]`` series now returns ``Timestamp`` instead + of ``np.datetime64`` objects (:issue:`6810`) +- change ``AssertionError`` to ``TypeError`` for invalid types passed to ``concat`` (:issue:`6583`) +- Raise a ``TypeError`` when ``DataFrame`` is passed an iterator as the + ``data`` argument (:issue:`5357`) + + +.. _whatsnew_0140.display: + +Display changes +~~~~~~~~~~~~~~~ + +- The default way of printing large DataFrames has changed. DataFrames + exceeding ``max_rows`` and/or ``max_columns`` are now displayed in a + centrally truncated view, consistent with the printing of a + :class:`pandas.Series` (:issue:`5603`). + + In previous versions, a DataFrame was truncated once the dimension + constraints were reached and an ellipse (...) signaled that part of + the data was cut off. + + .. image:: ../_static/trunc_before.png + :alt: The previous look of truncate. + + In the current version, large DataFrames are centrally truncated, + showing a preview of head and tail in both dimensions. + + .. image:: ../_static/trunc_after.png + :alt: The new look. + +- allow option ``'truncate'`` for ``display.show_dimensions`` to only show the dimensions if the + frame is truncated (:issue:`6547`). + + The default for ``display.show_dimensions`` will now be ``truncate``. This is consistent with + how Series display length. + + .. ipython:: python + + dfd = pd.DataFrame(np.arange(25).reshape(-1, 5), + index=[0, 1, 2, 3, 4], + columns=[0, 1, 2, 3, 4]) + + # show dimensions since this is truncated + with pd.option_context('display.max_rows', 2, 'display.max_columns', 2, + 'display.show_dimensions', 'truncate'): + print(dfd) + + # will not show dimensions since it is not truncated + with pd.option_context('display.max_rows', 10, 'display.max_columns', 40, + 'display.show_dimensions', 'truncate'): + print(dfd) + +- Regression in the display of a MultiIndexed Series with ``display.max_rows`` is less than the + length of the series (:issue:`7101`) +- Fixed a bug in the HTML repr of a truncated Series or DataFrame not showing the class name with the + `large_repr` set to 'info' (:issue:`7105`) +- The `verbose` keyword in ``DataFrame.info()``, which controls whether to shorten the ``info`` + representation, is now ``None`` by default. This will follow the global setting in + ``display.max_info_columns``. The global setting can be overridden with ``verbose=True`` or + ``verbose=False``. +- Fixed a bug with the `info` repr not honoring the `display.max_info_columns` setting (:issue:`6939`) +- Offset/freq info now in Timestamp __repr__ (:issue:`4553`) + +.. _whatsnew_0140.parsing: + +Text parsing API changes +~~~~~~~~~~~~~~~~~~~~~~~~ + +:func:`read_csv`/:func:`read_table` will now be noisier w.r.t invalid options rather than falling back to the ``PythonParser``. + +- Raise ``ValueError`` when ``sep`` specified with + ``delim_whitespace=True`` in :func:`read_csv`/:func:`read_table` + (:issue:`6607`) +- Raise ``ValueError`` when ``engine='c'`` specified with unsupported + options in :func:`read_csv`/:func:`read_table` (:issue:`6607`) +- Raise ``ValueError`` when fallback to python parser causes options to be + ignored (:issue:`6607`) +- Produce :class:`~pandas.io.parsers.ParserWarning` on fallback to python + parser when no options are ignored (:issue:`6607`) +- Translate ``sep='\s+'`` to ``delim_whitespace=True`` in + :func:`read_csv`/:func:`read_table` if no other C-unsupported options + specified (:issue:`6607`) + +.. _whatsnew_0140.groupby: + +Groupby API changes +~~~~~~~~~~~~~~~~~~~ + +More consistent behavior for some groupby methods: + +- groupby ``head`` and ``tail`` now act more like ``filter`` rather than an aggregation: + + .. ipython:: python + + df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B']) + g = df.groupby('A') + g.head(1) # filters DataFrame + + g.apply(lambda x: x.head(1)) # used to simply fall-through + +- groupby head and tail respect column selection: + + .. ipython:: python + + g[['B']].head(1) + +- groupby ``nth`` now reduces by default; filtering can be achieved by passing ``as_index=False``. With an optional ``dropna`` argument to ignore + NaN. See :ref:`the docs `. + + Reducing + + .. ipython:: python + + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) + g = df.groupby('A') + g.nth(0) + + # this is equivalent to g.first() + g.nth(0, dropna='any') + + # this is equivalent to g.last() + g.nth(-1, dropna='any') + + Filtering + + .. ipython:: python + + gf = df.groupby('A', as_index=False) + gf.nth(0) + gf.nth(0, dropna='any') + +- groupby will now not return the grouped column for non-cython functions (:issue:`5610`, :issue:`5614`, :issue:`6732`), + as its already the index + + .. ipython:: python + + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6], [5, 8]], columns=['A', 'B']) + g = df.groupby('A') + g.count() + g.describe() + +- passing ``as_index`` will leave the grouped column in-place (this is not change in 0.14.0) + + .. ipython:: python + + df = pd.DataFrame([[1, np.nan], [1, 4], [5, 6], [5, 8]], columns=['A', 'B']) + g = df.groupby('A', as_index=False) + g.count() + g.describe() + +- Allow specification of a more complex groupby via ``pd.Grouper``, such as grouping + by a Time and a string field simultaneously. See :ref:`the docs `. (:issue:`3794`) + +- Better propagation/preservation of Series names when performing groupby + operations: + + - ``SeriesGroupBy.agg`` will ensure that the name attribute of the original + series is propagated to the result (:issue:`6265`). + - If the function provided to ``GroupBy.apply`` returns a named series, the + name of the series will be kept as the name of the column index of the + DataFrame returned by ``GroupBy.apply`` (:issue:`6124`). This facilitates + ``DataFrame.stack`` operations where the name of the column index is used as + the name of the inserted column containing the pivoted data. + + +.. _whatsnew_0140.sql: + +SQL +~~~ + +The SQL reading and writing functions now support more database flavors +through SQLAlchemy (:issue:`2717`, :issue:`4163`, :issue:`5950`, :issue:`6292`). +All databases supported by SQLAlchemy can be used, such +as PostgreSQL, MySQL, Oracle, Microsoft SQL server (see documentation of +SQLAlchemy on `included dialects +`_). + +The functionality of providing DBAPI connection objects will only be supported +for sqlite3 in the future. The ``'mysql'`` flavor is deprecated. + +The new functions :func:`~pandas.read_sql_query` and :func:`~pandas.read_sql_table` +are introduced. The function :func:`~pandas.read_sql` is kept as a convenience +wrapper around the other two and will delegate to specific function depending on +the provided input (database table name or sql query). + +In practice, you have to provide a SQLAlchemy ``engine`` to the sql functions. +To connect with SQLAlchemy you use the :func:`create_engine` function to create an engine +object from database URI. You only need to create the engine once per database you are +connecting to. For an in-memory sqlite database: + +.. ipython:: python + + from sqlalchemy import create_engine + # Create your connection. + engine = create_engine('sqlite:///:memory:') + +This ``engine`` can then be used to write or read data to/from this database: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']}) + df.to_sql('db_table', engine, index=False) + +You can read data from a database by specifying the table name: + +.. ipython:: python + + pd.read_sql_table('db_table', engine) + +or by specifying a sql query: + +.. ipython:: python + + pd.read_sql_query('SELECT * FROM db_table', engine) + +Some other enhancements to the sql functions include: + +- support for writing the index. This can be controlled with the ``index`` + keyword (default is True). +- specify the column label to use when writing the index with ``index_label``. +- specify string columns to parse as datetimes with the ``parse_dates`` + keyword in :func:`~pandas.read_sql_query` and :func:`~pandas.read_sql_table`. + +.. warning:: + + Some of the existing functions or function aliases have been deprecated + and will be removed in future versions. This includes: ``tquery``, ``uquery``, + ``read_frame``, ``frame_query``, ``write_frame``. + +.. warning:: + + The support for the 'mysql' flavor when using DBAPI connection objects has been deprecated. + MySQL will be further supported with SQLAlchemy engines (:issue:`6900`). + + +.. _whatsnew_0140.slicers: + +MultiIndexing using slicers +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In 0.14.0 we added a new way to slice MultiIndexed objects. +You can slice a MultiIndex by providing multiple indexers. + +You can provide any of the selectors as if you are indexing by label, see :ref:`Selection by Label `, +including slices, lists of labels, labels, and boolean indexers. + +You can use ``slice(None)`` to select all the contents of *that* level. You do not need to specify all the +*deeper* levels, they will be implied as ``slice(None)``. + +As usual, **both sides** of the slicers are included as this is label indexing. + +See :ref:`the docs` +See also issues (:issue:`6134`, :issue:`4036`, :issue:`3057`, :issue:`2598`, :issue:`5641`, :issue:`7106`) + +.. warning:: + + You should specify all axes in the ``.loc`` specifier, meaning the indexer for the **index** and + for the **columns**. Their are some ambiguous cases where the passed indexer could be mis-interpreted + as indexing *both* axes, rather than into say the MuliIndex for the rows. + + You should do this: + + .. code-block:: python + + >>> df.loc[(slice('A1', 'A3'), ...), :] # noqa: E901 + + rather than this: + + .. code-block:: python + + >>> df.loc[(slice('A1', 'A3'), ...)] # noqa: E901 + +.. warning:: + + You will need to make sure that the selection axes are fully lexsorted! + +.. ipython:: python + + def mklbl(prefix, n): + return ["%s%s" % (prefix, i) for i in range(n)] + + index = pd.MultiIndex.from_product([mklbl('A', 4), + mklbl('B', 2), + mklbl('C', 4), + mklbl('D', 2)]) + columns = pd.MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], + names=['lvl0', 'lvl1']) + df = pd.DataFrame(np.arange(len(index) * len(columns)).reshape((len(index), + len(columns))), + index=index, + columns=columns).sort_index().sort_index(axis=1) + df + +Basic MultiIndex slicing using slices, lists, and labels. + +.. ipython:: python + + df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] + +You can use a ``pd.IndexSlice`` to shortcut the creation of these slices + +.. ipython:: python + + idx = pd.IndexSlice + df.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']] + +It is possible to perform quite complicated selections using this method on multiple +axes at the same time. + +.. ipython:: python + + df.loc['A1', (slice(None), 'foo')] + df.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']] + +Using a boolean indexer you can provide selection related to the *values*. + +.. ipython:: python + + mask = df[('a', 'foo')] > 200 + df.loc[idx[mask, :, ['C1', 'C3']], idx[:, 'foo']] + +You can also specify the ``axis`` argument to ``.loc`` to interpret the passed +slicers on a single axis. + +.. ipython:: python + + df.loc(axis=0)[:, :, ['C1', 'C3']] + +Furthermore you can *set* the values using these methods + +.. ipython:: python + + df2 = df.copy() + df2.loc(axis=0)[:, :, ['C1', 'C3']] = -10 + df2 + +You can use a right-hand-side of an alignable object as well. + +.. ipython:: python + + df2 = df.copy() + df2.loc[idx[:, :, ['C1', 'C3']], :] = df2 * 1000 + df2 + +.. _whatsnew_0140.plotting: + +Plotting +~~~~~~~~ + +- Hexagonal bin plots from ``DataFrame.plot`` with ``kind='hexbin'`` (:issue:`5478`), See :ref:`the docs`. +- ``DataFrame.plot`` and ``Series.plot`` now supports area plot with specifying ``kind='area'`` (:issue:`6656`), See :ref:`the docs` +- Pie plots from ``Series.plot`` and ``DataFrame.plot`` with ``kind='pie'`` (:issue:`6976`), See :ref:`the docs`. +- Plotting with Error Bars is now supported in the ``.plot`` method of ``DataFrame`` and ``Series`` objects (:issue:`3796`, :issue:`6834`), See :ref:`the docs`. +- ``DataFrame.plot`` and ``Series.plot`` now support a ``table`` keyword for plotting ``matplotlib.Table``, See :ref:`the docs`. The ``table`` keyword can receive the following values. + + - ``False``: Do nothing (default). + - ``True``: Draw a table using the ``DataFrame`` or ``Series`` called ``plot`` method. Data will be transposed to meet matplotlib's default layout. + - ``DataFrame`` or ``Series``: Draw matplotlib.table using the passed data. The data will be drawn as displayed in print method (not transposed automatically). + Also, helper function ``pandas.tools.plotting.table`` is added to create a table from ``DataFrame`` and ``Series``, and add it to an ``matplotlib.Axes``. + +- ``plot(legend='reverse')`` will now reverse the order of legend labels for + most plot kinds. (:issue:`6014`) +- Line plot and area plot can be stacked by ``stacked=True`` (:issue:`6656`) + +- Following keywords are now acceptable for :meth:`DataFrame.plot` with ``kind='bar'`` and ``kind='barh'``: + + - `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. (:issue:`6604`) + - `align`: Specify the bar alignment. Default is `center` (different from matplotlib). In previous versions, pandas passes `align='edge'` to matplotlib and adjust the location to `center` by itself, and it results `align` keyword is not applied as expected. (:issue:`4525`) + - `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`) + + Because of the default `align` value changes, coordinates of bar plots are now located on integer values (0.0, 1.0, 2.0 ...). This is intended to make bar plot be located on the same coordinates as line plot. However, bar plot may differs unexpectedly when you manually adjust the bar location or drawing area, such as using `set_xlim`, `set_ylim`, etc. In this cases, please modify your script to meet with new coordinates. + +- The :func:`parallel_coordinates` function now takes argument ``color`` + instead of ``colors``. A ``FutureWarning`` is raised to alert that + the old ``colors`` argument will not be supported in a future release. (:issue:`6956`) + +- The :func:`parallel_coordinates` and :func:`andrews_curves` functions now take + positional argument ``frame`` instead of ``data``. A ``FutureWarning`` is + raised if the old ``data`` argument is used by name. (:issue:`6956`) + +- :meth:`DataFrame.boxplot` now supports ``layout`` keyword (:issue:`6769`) +- :meth:`DataFrame.boxplot` has a new keyword argument, `return_type`. It accepts ``'dict'``, + ``'axes'``, or ``'both'``, in which case a namedtuple with the matplotlib + axes and a dict of matplotlib Lines is returned. + + +.. _whatsnew_0140.prior_deprecations: + +Prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +There are prior version deprecations that are taking effect as of 0.14.0. + +- Remove :class:`DateRange` in favor of :class:`DatetimeIndex` (:issue:`6816`) +- Remove ``column`` keyword from ``DataFrame.sort`` (:issue:`4370`) +- Remove ``precision`` keyword from :func:`set_eng_float_format` (:issue:`395`) +- Remove ``force_unicode`` keyword from :meth:`DataFrame.to_string`, + :meth:`DataFrame.to_latex`, and :meth:`DataFrame.to_html`; these function + encode in unicode by default (:issue:`2224`, :issue:`2225`) +- Remove ``nanRep`` keyword from :meth:`DataFrame.to_csv` and + :meth:`DataFrame.to_string` (:issue:`275`) +- Remove ``unique`` keyword from :meth:`HDFStore.select_column` (:issue:`3256`) +- Remove ``inferTimeRule`` keyword from :func:`Timestamp.offset` (:issue:`391`) +- Remove ``name`` keyword from :func:`get_data_yahoo` and + :func:`get_data_google` ( `commit b921d1a `__ ) +- Remove ``offset`` keyword from :class:`DatetimeIndex` constructor + ( `commit 3136390 `__ ) +- Remove ``time_rule`` from several rolling-moment statistical functions, such + as :func:`rolling_sum` (:issue:`1042`) +- Removed neg ``-`` boolean operations on numpy arrays in favor of inv ``~``, as this is going to + be deprecated in numpy 1.9 (:issue:`6960`) + +.. _whatsnew_0140.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- The :func:`pivot_table`/:meth:`DataFrame.pivot_table` and :func:`crosstab` functions + now take arguments ``index`` and ``columns`` instead of ``rows`` and ``cols``. A + ``FutureWarning`` is raised to alert that the old ``rows`` and ``cols`` arguments + will not be supported in a future release (:issue:`5505`) + +- The :meth:`DataFrame.drop_duplicates` and :meth:`DataFrame.duplicated` methods + now take argument ``subset`` instead of ``cols`` to better align with + :meth:`DataFrame.dropna`. A ``FutureWarning`` is raised to alert that the old + ``cols`` arguments will not be supported in a future release (:issue:`6680`) + +- The :meth:`DataFrame.to_csv` and :meth:`DataFrame.to_excel` functions + now takes argument ``columns`` instead of ``cols``. A + ``FutureWarning`` is raised to alert that the old ``cols`` arguments + will not be supported in a future release (:issue:`6645`) + +- Indexers will warn ``FutureWarning`` when used with a scalar indexer and + a non-floating point Index (:issue:`4892`, :issue:`6960`) + + .. code-block:: ipython + + # non-floating point indexes can only be indexed by integers / labels + In [1]: pd.Series(1, np.arange(5))[3.0] + pandas/core/index.py:469: FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point + Out[1]: 1 + + In [2]: pd.Series(1, np.arange(5)).iloc[3.0] + pandas/core/index.py:469: FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point + Out[2]: 1 + + In [3]: pd.Series(1, np.arange(5)).iloc[3.0:4] + pandas/core/index.py:527: FutureWarning: slice indexers when using iloc should be integers and not floating point + Out[3]: + 3 1 + dtype: int64 + + # these are Float64Indexes, so integer or floating point is acceptable + In [4]: pd.Series(1, np.arange(5.))[3] + Out[4]: 1 + + In [5]: pd.Series(1, np.arange(5.))[3.0] + Out[6]: 1 + +- Numpy 1.9 compat w.r.t. deprecation warnings (:issue:`6960`) + +- :meth:`Panel.shift` now has a function signature that matches :meth:`DataFrame.shift`. + The old positional argument ``lags`` has been changed to a keyword argument + ``periods`` with a default value of 1. A ``FutureWarning`` is raised if the + old argument ``lags`` is used by name. (:issue:`6910`) +- The ``order`` keyword argument of :func:`factorize` will be removed. (:issue:`6926`). + +- Remove the ``copy`` keyword from :meth:`DataFrame.xs`, :meth:`Panel.major_xs`, :meth:`Panel.minor_xs`. A view will be + returned if possible, otherwise a copy will be made. Previously the user could think that ``copy=False`` would + ALWAYS return a view. (:issue:`6894`) + +- The :func:`parallel_coordinates` function now takes argument ``color`` + instead of ``colors``. A ``FutureWarning`` is raised to alert that + the old ``colors`` argument will not be supported in a future release. (:issue:`6956`) + +- The :func:`parallel_coordinates` and :func:`andrews_curves` functions now take + positional argument ``frame`` instead of ``data``. A ``FutureWarning`` is + raised if the old ``data`` argument is used by name. (:issue:`6956`) + +- The support for the 'mysql' flavor when using DBAPI connection objects has been deprecated. + MySQL will be further supported with SQLAlchemy engines (:issue:`6900`). + +- The following ``io.sql`` functions have been deprecated: ``tquery``, ``uquery``, ``read_frame``, ``frame_query``, ``write_frame``. + +- The `percentile_width` keyword argument in :meth:`~DataFrame.describe` has been deprecated. + Use the `percentiles` keyword instead, which takes a list of percentiles to display. The + default output is unchanged. + +- The default return type of :func:`boxplot` will change from a dict to a matplotlib Axes + in a future release. You can use the future behavior now by passing ``return_type='axes'`` + to boxplot. + +.. _whatsnew_0140.knownissues: + +Known issues +~~~~~~~~~~~~ + +- OpenPyXL 2.0.0 breaks backwards compatibility (:issue:`7169`) + + +.. _whatsnew_0140.enhancements: + +Enhancements +~~~~~~~~~~~~ + +- DataFrame and Series will create a MultiIndex object if passed a tuples dict, See :ref:`the docs` (:issue:`3323`) + + .. ipython:: python + + pd.Series({('a', 'b'): 1, ('a', 'a'): 0, + ('a', 'c'): 2, ('b', 'a'): 3, ('b', 'b'): 4}) + pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2}, + ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4}, + ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6}, + ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8}, + ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}}) + +- Added the ``sym_diff`` method to ``Index`` (:issue:`5543`) +- ``DataFrame.to_latex`` now takes a longtable keyword, which if True will return a table in a longtable environment. (:issue:`6617`) +- Add option to turn off escaping in ``DataFrame.to_latex`` (:issue:`6472`) +- ``pd.read_clipboard`` will, if the keyword ``sep`` is unspecified, try to detect data copied from a spreadsheet + and parse accordingly. (:issue:`6223`) +- Joining a singly-indexed DataFrame with a MultiIndexed DataFrame (:issue:`3662`) + + See :ref:`the docs`. Joining MultiIndex DataFrames on both the left and right is not yet supported ATM. + + .. ipython:: python + + household = pd.DataFrame({'household_id': [1, 2, 3], + 'male': [0, 1, 0], + 'wealth': [196087.3, 316478.7, 294750] + }, + columns=['household_id', 'male', 'wealth'] + ).set_index('household_id') + household + portfolio = pd.DataFrame({'household_id': [1, 2, 2, 3, 3, 3, 4], + 'asset_id': ["nl0000301109", + "nl0000289783", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "nl0000289965", + np.nan], + 'name': ["ABN Amro", + "Robeco", + "Royal Dutch Shell", + "Royal Dutch Shell", + "AAB Eastern Europe Equity Fund", + "Postbank BioTech Fonds", + np.nan], + 'share': [1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0] + }, + columns=['household_id', 'asset_id', 'name', 'share'] + ).set_index(['household_id', 'asset_id']) + portfolio + + household.join(portfolio, how='inner') + +- ``quotechar``, ``doublequote``, and ``escapechar`` can now be specified when + using ``DataFrame.to_csv`` (:issue:`5414`, :issue:`4528`) +- Partially sort by only the specified levels of a MultiIndex with the + ``sort_remaining`` boolean kwarg. (:issue:`3984`) +- Added ``to_julian_date`` to ``TimeStamp`` and ``DatetimeIndex``. The Julian + Date is used primarily in astronomy and represents the number of days from + noon, January 1, 4713 BC. Because nanoseconds are used to define the time + in pandas the actual range of dates that you can use is 1678 AD to 2262 AD. (:issue:`4041`) +- ``DataFrame.to_stata`` will now check data for compatibility with Stata data types + and will upcast when needed. When it is not possible to losslessly upcast, a warning + is issued (:issue:`6327`) +- ``DataFrame.to_stata`` and ``StataWriter`` will accept keyword arguments time_stamp + and data_label which allow the time stamp and dataset label to be set when creating a + file. (:issue:`6545`) +- ``pandas.io.gbq`` now handles reading unicode strings properly. (:issue:`5940`) +- :ref:`Holidays Calendars` are now available and can be used with the ``CustomBusinessDay`` offset (:issue:`6719`) +- ``Float64Index`` is now backed by a ``float64`` dtype ndarray instead of an + ``object`` dtype array (:issue:`6471`). +- Implemented ``Panel.pct_change`` (:issue:`6904`) +- Added ``how`` option to rolling-moment functions to dictate how to handle resampling; :func:`rolling_max` defaults to max, + :func:`rolling_min` defaults to min, and all others default to mean (:issue:`6297`) +- ``CustomBusinessMonthBegin`` and ``CustomBusinessMonthEnd`` are now available (:issue:`6866`) +- :meth:`Series.quantile` and :meth:`DataFrame.quantile` now accept an array of + quantiles. +- :meth:`~DataFrame.describe` now accepts an array of percentiles to include in the summary statistics (:issue:`4196`) +- ``pivot_table`` can now accept ``Grouper`` by ``index`` and ``columns`` keywords (:issue:`6913`) + + .. ipython:: python + + import datetime + df = pd.DataFrame({ + 'Branch': 'A A A A A B'.split(), + 'Buyer': 'Carl Mark Carl Carl Joe Joe'.split(), + 'Quantity': [1, 3, 5, 1, 8, 1], + 'Date': [datetime.datetime(2013, 11, 1, 13, 0), + datetime.datetime(2013, 9, 1, 13, 5), + datetime.datetime(2013, 10, 1, 20, 0), + datetime.datetime(2013, 10, 2, 10, 0), + datetime.datetime(2013, 11, 1, 20, 0), + datetime.datetime(2013, 10, 2, 10, 0)], + 'PayDay': [datetime.datetime(2013, 10, 4, 0, 0), + datetime.datetime(2013, 10, 15, 13, 5), + datetime.datetime(2013, 9, 5, 20, 0), + datetime.datetime(2013, 11, 2, 10, 0), + datetime.datetime(2013, 10, 7, 20, 0), + datetime.datetime(2013, 9, 5, 10, 0)]}) + df + + df.pivot_table(values='Quantity', + index=pd.Grouper(freq='M', key='Date'), + columns=pd.Grouper(freq='M', key='PayDay'), + aggfunc=np.sum) + +- Arrays of strings can be wrapped to a specified width (``str.wrap``) (:issue:`6999`) +- Add :meth:`~Series.nsmallest` and :meth:`Series.nlargest` methods to Series, See :ref:`the docs ` (:issue:`3960`) + +- `PeriodIndex` fully supports partial string indexing like `DatetimeIndex` (:issue:`7043`) + + .. ipython:: python + + prng = pd.period_range('2013-01-01 09:00', periods=100, freq='H') + ps = pd.Series(np.random.randn(len(prng)), index=prng) + ps + ps['2013-01-02'] + +- ``read_excel`` can now read milliseconds in Excel dates and times with xlrd >= 0.9.3. (:issue:`5945`) +- ``pd.stats.moments.rolling_var`` now uses Welford's method for increased numerical stability (:issue:`6817`) +- pd.expanding_apply and pd.rolling_apply now take args and kwargs that are passed on to + the func (:issue:`6289`) +- ``DataFrame.rank()`` now has a percentage rank option (:issue:`5971`) +- ``Series.rank()`` now has a percentage rank option (:issue:`5971`) +- ``Series.rank()`` and ``DataFrame.rank()`` now accept ``method='dense'`` for ranks without gaps (:issue:`6514`) +- Support passing ``encoding`` with xlwt (:issue:`3710`) +- Refactor Block classes removing `Block.items` attributes to avoid duplication + in item handling (:issue:`6745`, :issue:`6988`). +- Testing statements updated to use specialized asserts (:issue:`6175`) + + + +.. _whatsnew_0140.performance: + +Performance +~~~~~~~~~~~ + +- Performance improvement when converting ``DatetimeIndex`` to floating ordinals + using ``DatetimeConverter`` (:issue:`6636`) +- Performance improvement for ``DataFrame.shift`` (:issue:`5609`) +- Performance improvement in indexing into a MultiIndexed Series (:issue:`5567`) +- Performance improvements in single-dtyped indexing (:issue:`6484`) +- Improve performance of DataFrame construction with certain offsets, by removing faulty caching + (e.g. MonthEnd,BusinessMonthEnd), (:issue:`6479`) +- Improve performance of ``CustomBusinessDay`` (:issue:`6584`) +- improve performance of slice indexing on Series with string keys (:issue:`6341`, :issue:`6372`) +- Performance improvement for ``DataFrame.from_records`` when reading a + specified number of rows from an iterable (:issue:`6700`) +- Performance improvements in timedelta conversions for integer dtypes (:issue:`6754`) +- Improved performance of compatible pickles (:issue:`6899`) +- Improve performance in certain reindexing operations by optimizing ``take_2d`` (:issue:`6749`) +- ``GroupBy.count()`` is now implemented in Cython and is much faster for large + numbers of groups (:issue:`7016`). + +Experimental +~~~~~~~~~~~~ + +There are no experimental changes in 0.14.0 + + +.. _whatsnew_0140.bug_fixes: + +Bug Fixes +~~~~~~~~~ + +- Bug in Series ValueError when index doesn't match data (:issue:`6532`) +- Prevent segfault due to MultiIndex not being supported in HDFStore table + format (:issue:`1848`) +- Bug in ``pd.DataFrame.sort_index`` where mergesort wasn't stable when ``ascending=False`` (:issue:`6399`) +- Bug in ``pd.tseries.frequencies.to_offset`` when argument has leading zeros (:issue:`6391`) +- Bug in version string gen. for dev versions with shallow clones / install from tarball (:issue:`6127`) +- Inconsistent tz parsing ``Timestamp`` / ``to_datetime`` for current year (:issue:`5958`) +- Indexing bugs with reordered indexes (:issue:`6252`, :issue:`6254`) +- Bug in ``.xs`` with a Series multiindex (:issue:`6258`, :issue:`5684`) +- Bug in conversion of a string types to a DatetimeIndex with a specified frequency (:issue:`6273`, :issue:`6274`) +- Bug in ``eval`` where type-promotion failed for large expressions (:issue:`6205`) +- Bug in interpolate with ``inplace=True`` (:issue:`6281`) +- ``HDFStore.remove`` now handles start and stop (:issue:`6177`) +- ``HDFStore.select_as_multiple`` handles start and stop the same way as ``select`` (:issue:`6177`) +- ``HDFStore.select_as_coordinates`` and ``select_column`` works with a ``where`` clause that results in filters (:issue:`6177`) +- Regression in join of non_unique_indexes (:issue:`6329`) +- Issue with groupby ``agg`` with a single function and a a mixed-type frame (:issue:`6337`) +- Bug in ``DataFrame.replace()`` when passing a non- ``bool`` + ``to_replace`` argument (:issue:`6332`) +- Raise when trying to align on different levels of a MultiIndex assignment (:issue:`3738`) +- Bug in setting complex dtypes via boolean indexing (:issue:`6345`) +- Bug in TimeGrouper/resample when presented with a non-monotonic DatetimeIndex that would return invalid results. (:issue:`4161`) +- Bug in index name propagation in TimeGrouper/resample (:issue:`4161`) +- TimeGrouper has a more compatible API to the rest of the groupers (e.g. ``groups`` was missing) (:issue:`3881`) +- Bug in multiple grouping with a TimeGrouper depending on target column order (:issue:`6764`) +- Bug in ``pd.eval`` when parsing strings with possible tokens like ``'&'`` + (:issue:`6351`) +- Bug correctly handle placements of ``-inf`` in Panels when dividing by integer 0 (:issue:`6178`) +- ``DataFrame.shift`` with ``axis=1`` was raising (:issue:`6371`) +- Disabled clipboard tests until release time (run locally with ``nosetests -A disabled``) (:issue:`6048`). +- Bug in ``DataFrame.replace()`` when passing a nested ``dict`` that contained + keys not in the values to be replaced (:issue:`6342`) +- ``str.match`` ignored the na flag (:issue:`6609`). +- Bug in take with duplicate columns that were not consolidated (:issue:`6240`) +- Bug in interpolate changing dtypes (:issue:`6290`) +- Bug in ``Series.get``, was using a buggy access method (:issue:`6383`) +- Bug in hdfstore queries of the form ``where=[('date', '>=', datetime(2013,1,1)), ('date', '<=', datetime(2014,1,1))]`` (:issue:`6313`) +- Bug in ``DataFrame.dropna`` with duplicate indices (:issue:`6355`) +- Regression in chained getitem indexing with embedded list-like from 0.12 (:issue:`6394`) +- ``Float64Index`` with nans not comparing correctly (:issue:`6401`) +- ``eval``/``query`` expressions with strings containing the ``@`` character + will now work (:issue:`6366`). +- Bug in ``Series.reindex`` when specifying a ``method`` with some nan values was inconsistent (noted on a resample) (:issue:`6418`) +- Bug in :meth:`DataFrame.replace` where nested dicts were erroneously + depending on the order of dictionary keys and values (:issue:`5338`). +- Performance issue in concatenating with empty objects (:issue:`3259`) +- Clarify sorting of ``sym_diff`` on ``Index`` objects with ``NaN`` values (:issue:`6444`) +- Regression in ``MultiIndex.from_product`` with a ``DatetimeIndex`` as input (:issue:`6439`) +- Bug in ``str.extract`` when passed a non-default index (:issue:`6348`) +- Bug in ``str.split`` when passed ``pat=None`` and ``n=1`` (:issue:`6466`) +- Bug in ``io.data.DataReader`` when passed ``"F-F_Momentum_Factor"`` and ``data_source="famafrench"`` (:issue:`6460`) +- Bug in ``sum`` of a ``timedelta64[ns]`` series (:issue:`6462`) +- Bug in ``resample`` with a timezone and certain offsets (:issue:`6397`) +- Bug in ``iat/iloc`` with duplicate indices on a Series (:issue:`6493`) +- Bug in ``read_html`` where nan's were incorrectly being used to indicate + missing values in text. Should use the empty string for consistency with the + rest of pandas (:issue:`5129`). +- Bug in ``read_html`` tests where redirected invalid URLs would make one test + fail (:issue:`6445`). +- Bug in multi-axis indexing using ``.loc`` on non-unique indices (:issue:`6504`) +- Bug that caused _ref_locs corruption when slice indexing across columns axis of a DataFrame (:issue:`6525`) +- Regression from 0.13 in the treatment of numpy ``datetime64`` non-ns dtypes in Series creation (:issue:`6529`) +- ``.names`` attribute of MultiIndexes passed to ``set_index`` are now preserved (:issue:`6459`). +- Bug in setitem with a duplicate index and an alignable rhs (:issue:`6541`) +- Bug in setitem with ``.loc`` on mixed integer Indexes (:issue:`6546`) +- Bug in ``pd.read_stata`` which would use the wrong data types and missing values (:issue:`6327`) +- Bug in ``DataFrame.to_stata`` that lead to data loss in certain cases, and could be exported using the + wrong data types and missing values (:issue:`6335`) +- ``StataWriter`` replaces missing values in string columns by empty string (:issue:`6802`) +- Inconsistent types in ``Timestamp`` addition/subtraction (:issue:`6543`) +- Bug in preserving frequency across Timestamp addition/subtraction (:issue:`4547`) +- Bug in empty list lookup caused ``IndexError`` exceptions (:issue:`6536`, :issue:`6551`) +- ``Series.quantile`` raising on an ``object`` dtype (:issue:`6555`) +- Bug in ``.xs`` with a ``nan`` in level when dropped (:issue:`6574`) +- Bug in fillna with ``method='bfill/ffill'`` and ``datetime64[ns]`` dtype (:issue:`6587`) +- Bug in sql writing with mixed dtypes possibly leading to data loss (:issue:`6509`) +- Bug in ``Series.pop`` (:issue:`6600`) +- Bug in ``iloc`` indexing when positional indexer matched ``Int64Index`` of the corresponding axis and no reordering happened (:issue:`6612`) +- Bug in ``fillna`` with ``limit`` and ``value`` specified +- Bug in ``DataFrame.to_stata`` when columns have non-string names (:issue:`4558`) +- Bug in compat with ``np.compress``, surfaced in (:issue:`6658`) +- Bug in binary operations with a rhs of a Series not aligning (:issue:`6681`) +- Bug in ``DataFrame.to_stata`` which incorrectly handles nan values and ignores ``with_index`` keyword argument (:issue:`6685`) +- Bug in resample with extra bins when using an evenly divisible frequency (:issue:`4076`) +- Bug in consistency of groupby aggregation when passing a custom function (:issue:`6715`) +- Bug in resample when ``how=None`` resample freq is the same as the axis frequency (:issue:`5955`) +- Bug in downcasting inference with empty arrays (:issue:`6733`) +- Bug in ``obj.blocks`` on sparse containers dropping all but the last items of same for dtype (:issue:`6748`) +- Bug in unpickling ``NaT (NaTType)`` (:issue:`4606`) +- Bug in ``DataFrame.replace()`` where regex meta characters were being treated + as regex even when ``regex=False`` (:issue:`6777`). +- Bug in timedelta ops on 32-bit platforms (:issue:`6808`) +- Bug in setting a tz-aware index directly via ``.index`` (:issue:`6785`) +- Bug in expressions.py where numexpr would try to evaluate arithmetic ops + (:issue:`6762`). +- Bug in Makefile where it didn't remove Cython generated C files with ``make + clean`` (:issue:`6768`) +- Bug with numpy < 1.7.2 when reading long strings from ``HDFStore`` (:issue:`6166`) +- Bug in ``DataFrame._reduce`` where non bool-like (0/1) integers were being + converted into bools. (:issue:`6806`) +- Regression from 0.13 with ``fillna`` and a Series on datetime-like (:issue:`6344`) +- Bug in adding ``np.timedelta64`` to ``DatetimeIndex`` with timezone outputs incorrect results (:issue:`6818`) +- Bug in ``DataFrame.replace()`` where changing a dtype through replacement + would only replace the first occurrence of a value (:issue:`6689`) +- Better error message when passing a frequency of 'MS' in ``Period`` construction (GH5332) +- Bug in ``Series.__unicode__`` when ``max_rows=None`` and the Series has more than 1000 rows. (:issue:`6863`) +- Bug in ``groupby.get_group`` where a datelike wasn't always accepted (:issue:`5267`) +- Bug in ``groupBy.get_group`` created by ``TimeGrouper`` raises ``AttributeError`` (:issue:`6914`) +- Bug in ``DatetimeIndex.tz_localize`` and ``DatetimeIndex.tz_convert`` converting ``NaT`` incorrectly (:issue:`5546`) +- Bug in arithmetic operations affecting ``NaT`` (:issue:`6873`) +- Bug in ``Series.str.extract`` where the resulting ``Series`` from a single + group match wasn't renamed to the group name +- Bug in ``DataFrame.to_csv`` where setting ``index=False`` ignored the + ``header`` kwarg (:issue:`6186`) +- Bug in ``DataFrame.plot`` and ``Series.plot``, where the legend behave inconsistently when plotting to the same axes repeatedly (:issue:`6678`) +- Internal tests for patching ``__finalize__`` / bug in merge not finalizing (:issue:`6923`, :issue:`6927`) +- accept ``TextFileReader`` in ``concat``, which was affecting a common user idiom (:issue:`6583`) +- Bug in C parser with leading white space (:issue:`3374`) +- Bug in C parser with ``delim_whitespace=True`` and ``\r``-delimited lines +- Bug in python parser with explicit MultiIndex in row following column header (:issue:`6893`) +- Bug in ``Series.rank`` and ``DataFrame.rank`` that caused small floats (<1e-13) to all receive the same rank (:issue:`6886`) +- Bug in ``DataFrame.apply`` with functions that used ``*args`` or ``**kwargs`` and returned + an empty result (:issue:`6952`) +- Bug in sum/mean on 32-bit platforms on overflows (:issue:`6915`) +- Moved ``Panel.shift`` to ``NDFrame.slice_shift`` and fixed to respect multiple dtypes. (:issue:`6959`) +- Bug in enabling ``subplots=True`` in ``DataFrame.plot`` only has single column raises ``TypeError``, and ``Series.plot`` raises ``AttributeError`` (:issue:`6951`) +- Bug in ``DataFrame.plot`` draws unnecessary axes when enabling ``subplots`` and ``kind=scatter`` (:issue:`6951`) +- Bug in ``read_csv`` from a filesystem with non-utf-8 encoding (:issue:`6807`) +- Bug in ``iloc`` when setting / aligning (:issue:`6766`) +- Bug causing UnicodeEncodeError when get_dummies called with unicode values and a prefix (:issue:`6885`) +- Bug in timeseries-with-frequency plot cursor display (:issue:`5453`) +- Bug surfaced in ``groupby.plot`` when using a ``Float64Index`` (:issue:`7025`) +- Stopped tests from failing if options data isn't able to be downloaded from Yahoo (:issue:`7034`) +- Bug in ``parallel_coordinates`` and ``radviz`` where reordering of class column + caused possible color/class mismatch (:issue:`6956`) +- Bug in ``radviz`` and ``andrews_curves`` where multiple values of 'color' + were being passed to plotting method (:issue:`6956`) +- Bug in ``Float64Index.isin()`` where containing ``nan`` s would make indices + claim that they contained all the things (:issue:`7066`). +- Bug in ``DataFrame.boxplot`` where it failed to use the axis passed as the ``ax`` argument (:issue:`3578`) +- Bug in the ``XlsxWriter`` and ``XlwtWriter`` implementations that resulted in datetime columns being formatted without the time (:issue:`7075`) + were being passed to plotting method +- :func:`read_fwf` treats ``None`` in ``colspec`` like regular python slices. It now reads from the beginning + or until the end of the line when ``colspec`` contains a ``None`` (previously raised a ``TypeError``) +- Bug in cache coherence with chained indexing and slicing; add ``_is_view`` property to ``NDFrame`` to correctly predict + views; mark ``is_copy`` on ``xs`` only if its an actual copy (and not a view) (:issue:`7084`) +- Bug in DatetimeIndex creation from string ndarray with ``dayfirst=True`` (:issue:`5917`) +- Bug in ``MultiIndex.from_arrays`` created from ``DatetimeIndex`` doesn't preserve ``freq`` and ``tz`` (:issue:`7090`) +- Bug in ``unstack`` raises ``ValueError`` when ``MultiIndex`` contains ``PeriodIndex`` (:issue:`4342`) +- Bug in ``boxplot`` and ``hist`` draws unnecessary axes (:issue:`6769`) +- Regression in ``groupby.nth()`` for out-of-bounds indexers (:issue:`6621`) +- Bug in ``quantile`` with datetime values (:issue:`6965`) +- Bug in ``Dataframe.set_index``, ``reindex`` and ``pivot`` don't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`3950`, :issue:`5878`, :issue:`6631`) +- Bug in ``MultiIndex.get_level_values`` doesn't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`7092`) +- Bug in ``Groupby`` doesn't preserve ``tz`` (:issue:`3950`) +- Bug in ``PeriodIndex`` partial string slicing (:issue:`6716`) +- Bug in the HTML repr of a truncated Series or DataFrame not showing the class name with the `large_repr` set to 'info' + (:issue:`7105`) +- Bug in ``DatetimeIndex`` specifying ``freq`` raises ``ValueError`` when passed value is too short (:issue:`7098`) +- Fixed a bug with the `info` repr not honoring the `display.max_info_columns` setting (:issue:`6939`) +- Bug ``PeriodIndex`` string slicing with out of bounds values (:issue:`5407`) +- Fixed a memory error in the hashtable implementation/factorizer on resizing of large tables (:issue:`7157`) +- Bug in ``isnull`` when applied to 0-dimensional object arrays (:issue:`7176`) +- Bug in ``query``/``eval`` where global constants were not looked up correctly + (:issue:`7178`) +- Bug in recognizing out-of-bounds positional list indexers with ``iloc`` and a multi-axis tuple indexer (:issue:`7189`) +- Bug in setitem with a single value, MultiIndex and integer indices (:issue:`7190`, :issue:`7218`) +- Bug in expressions evaluation with reversed ops, showing in series-dataframe ops (:issue:`7198`, :issue:`7192`) +- Bug in multi-axis indexing with > 2 ndim and a MultiIndex (:issue:`7199`) +- Fix a bug where invalid eval/query operations would blow the stack (:issue:`5198`) + + +.. _whatsnew_0.14.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.13.1..v0.14.0 \ No newline at end of file diff --git a/doc/source/whatsnew/v0.14.1.rst b/doc/source/whatsnew/v0.14.1.rst new file mode 100644 index 00000000..26018c57 --- /dev/null +++ b/doc/source/whatsnew/v0.14.1.rst @@ -0,0 +1,282 @@ +.. _whatsnew_0141: + +v0.14.1 (July 11, 2014) +----------------------- + +{{ header }} + + +This is a minor release from 0.14.0 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +- Highlights include: + + - New methods :meth:`~pandas.DataFrame.select_dtypes` to select columns + based on the dtype and :meth:`~pandas.Series.sem` to calculate the + standard error of the mean. + - Support for dateutil timezones (see :ref:`docs `). + - Support for ignoring full line comments in the :func:`~pandas.read_csv` + text parser. + - New documentation section on :ref:`Options and Settings `. + - Lots of bug fixes. + +- :ref:`Enhancements ` +- :ref:`API Changes ` +- :ref:`Performance Improvements ` +- :ref:`Experimental Changes ` +- :ref:`Bug Fixes ` + +.. _whatsnew_0141.api: + +API changes +~~~~~~~~~~~ + +- Openpyxl now raises a ValueError on construction of the openpyxl writer + instead of warning on pandas import (:issue:`7284`). + +- For ``StringMethods.extract``, when no match is found, the result - only + containing ``NaN`` values - now also has ``dtype=object`` instead of + ``float`` (:issue:`7242`) + +- ``Period`` objects no longer raise a ``TypeError`` when compared using ``==`` + with another object that *isn't* a ``Period``. Instead + when comparing a ``Period`` with another object using ``==`` if the other + object isn't a ``Period`` ``False`` is returned. (:issue:`7376`) + +- Previously, the behaviour on resetting the time or not in + ``offsets.apply``, ``rollforward`` and ``rollback`` operations differed + between offsets. With the support of the ``normalize`` keyword for all offsets(see + below) with a default value of False (preserve time), the behaviour changed for certain + offsets (BusinessMonthBegin, MonthEnd, BusinessMonthEnd, CustomBusinessMonthEnd, + BusinessYearBegin, LastWeekOfMonth, FY5253Quarter, LastWeekOfMonth, Easter): + + .. code-block:: ipython + + In [6]: from pandas.tseries import offsets + + In [7]: d = pd.Timestamp('2014-01-01 09:00') + + # old behaviour < 0.14.1 + In [8]: d + offsets.MonthEnd() + Out[8]: pd.Timestamp('2014-01-31 00:00:00') + + Starting from 0.14.1 all offsets preserve time by default. The old + behaviour can be obtained with ``normalize=True`` + + .. ipython:: python + :suppress: + + import pandas.tseries.offsets as offsets + d = pd.Timestamp('2014-01-01 09:00') + + .. ipython:: python + + # new behaviour + d + offsets.MonthEnd() + d + offsets.MonthEnd(normalize=True) + + Note that for the other offsets the default behaviour did not change. + +- Add back ``#N/A N/A`` as a default NA value in text parsing, (regression from 0.12) (:issue:`5521`) +- Raise a ``TypeError`` on inplace-setting with a ``.where`` and a non ``np.nan`` value as this is inconsistent + with a set-item expression like ``df[mask] = None`` (:issue:`7656`) + + +.. _whatsnew_0141.enhancements: + +Enhancements +~~~~~~~~~~~~ + +- Add ``dropna`` argument to ``value_counts`` and ``nunique`` (:issue:`5569`). +- Add :meth:`~pandas.DataFrame.select_dtypes` method to allow selection of + columns based on dtype (:issue:`7316`). See :ref:`the docs `. +- All ``offsets`` supports the ``normalize`` keyword to specify whether + ``offsets.apply``, ``rollforward`` and ``rollback`` resets the time (hour, + minute, etc) or not (default ``False``, preserves time) (:issue:`7156`): + + .. code-block:: python + + import pandas.tseries.offsets as offsets + + day = offsets.Day() + day.apply(pd.Timestamp('2014-01-01 09:00')) + + day = offsets.Day(normalize=True) + day.apply(pd.Timestamp('2014-01-01 09:00')) + +- ``PeriodIndex`` is represented as the same format as ``DatetimeIndex`` (:issue:`7601`) +- ``StringMethods`` now work on empty Series (:issue:`7242`) +- The file parsers ``read_csv`` and ``read_table`` now ignore line comments provided by + the parameter `comment`, which accepts only a single character for the C reader. + In particular, they allow for comments before file data begins (:issue:`2685`) +- Add ``NotImplementedError`` for simultaneous use of ``chunksize`` and ``nrows`` + for read_csv() (:issue:`6774`). +- Tests for basic reading of public S3 buckets now exist (:issue:`7281`). +- ``read_html`` now sports an ``encoding`` argument that is passed to the + underlying parser library. You can use this to read non-ascii encoded web + pages (:issue:`7323`). +- ``read_excel`` now supports reading from URLs in the same way + that ``read_csv`` does. (:issue:`6809`) +- Support for dateutil timezones, which can now be used in the same way as + pytz timezones across pandas. (:issue:`4688`) + + .. ipython:: python + + rng = pd.date_range('3/6/2012 00:00', periods=10, freq='D', + tz='dateutil/Europe/London') + rng.tz + + See :ref:`the docs `. + +- Implemented ``sem`` (standard error of the mean) operation for ``Series``, + ``DataFrame``, ``Panel``, and ``Groupby`` (:issue:`6897`) +- Add ``nlargest`` and ``nsmallest`` to the ``Series`` ``groupby`` whitelist, + which means you can now use these methods on a ``SeriesGroupBy`` object + (:issue:`7053`). +- All offsets ``apply``, ``rollforward`` and ``rollback`` can now handle ``np.datetime64``, previously results in ``ApplyTypeError`` (:issue:`7452`) +- ``Period`` and ``PeriodIndex`` can contain ``NaT`` in its values (:issue:`7485`) +- Support pickling ``Series``, ``DataFrame`` and ``Panel`` objects with + non-unique labels along *item* axis (``index``, ``columns`` and ``items`` + respectively) (:issue:`7370`). +- Improved inference of datetime/timedelta with mixed null objects. Regression from 0.13.1 in interpretation of an object Index + with all null elements (:issue:`7431`) + +.. _whatsnew_0141.performance: + +Performance +~~~~~~~~~~~ +- Improvements in dtype inference for numeric operations involving yielding performance gains for dtypes: ``int64``, ``timedelta64``, ``datetime64`` (:issue:`7223`) +- Improvements in Series.transform for significant performance gains (:issue:`6496`) +- Improvements in DataFrame.transform with ufuncs and built-in grouper functions for significant performance gains (:issue:`7383`) +- Regression in groupby aggregation of datetime64 dtypes (:issue:`7555`) +- Improvements in `MultiIndex.from_product` for large iterables (:issue:`7627`) + + +.. _whatsnew_0141.experimental: + +Experimental +~~~~~~~~~~~~ + +- ``pandas.io.data.Options`` has a new method, ``get_all_data`` method, and now consistently returns a + MultiIndexed ``DataFrame`` (:issue:`5602`) +- ``io.gbq.read_gbq`` and ``io.gbq.to_gbq`` were refactored to remove the + dependency on the Google ``bq.py`` command line client. This submodule + now uses ``httplib2`` and the Google ``apiclient`` and ``oauth2client`` API client + libraries which should be more stable and, therefore, reliable than + ``bq.py``. See :ref:`the docs `. (:issue:`6937`). + + +.. _whatsnew_0141.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in ``DataFrame.where`` with a symmetric shaped frame and a passed other of a DataFrame (:issue:`7506`) +- Bug in Panel indexing with a MultiIndex axis (:issue:`7516`) +- Regression in datetimelike slice indexing with a duplicated index and non-exact end-points (:issue:`7523`) +- Bug in setitem with list-of-lists and single vs mixed types (:issue:`7551`:) +- Bug in time ops with non-aligned Series (:issue:`7500`) +- Bug in timedelta inference when assigning an incomplete Series (:issue:`7592`) +- Bug in groupby ``.nth`` with a Series and integer-like column name (:issue:`7559`) +- Bug in ``Series.get`` with a boolean accessor (:issue:`7407`) +- Bug in ``value_counts`` where ``NaT`` did not qualify as missing (``NaN``) (:issue:`7423`) +- Bug in ``to_timedelta`` that accepted invalid units and misinterpreted 'm/h' (:issue:`7611`, :issue:`6423`) +- Bug in line plot doesn't set correct ``xlim`` if ``secondary_y=True`` (:issue:`7459`) +- Bug in grouped ``hist`` and ``scatter`` plots use old ``figsize`` default (:issue:`7394`) +- Bug in plotting subplots with ``DataFrame.plot``, ``hist`` clears passed ``ax`` even if the number of subplots is one (:issue:`7391`). +- Bug in plotting subplots with ``DataFrame.boxplot`` with ``by`` kw raises ``ValueError`` if the number of subplots exceeds 1 (:issue:`7391`). +- Bug in subplots displays ``ticklabels`` and ``labels`` in different rule (:issue:`5897`) +- Bug in ``Panel.apply`` with a MultiIndex as an axis (:issue:`7469`) +- Bug in ``DatetimeIndex.insert`` doesn't preserve ``name`` and ``tz`` (:issue:`7299`) +- Bug in ``DatetimeIndex.asobject`` doesn't preserve ``name`` (:issue:`7299`) +- Bug in MultiIndex slicing with datetimelike ranges (strings and Timestamps), (:issue:`7429`) +- Bug in ``Index.min`` and ``max`` doesn't handle ``nan`` and ``NaT`` properly (:issue:`7261`) +- Bug in ``PeriodIndex.min/max`` results in ``int`` (:issue:`7609`) +- Bug in ``resample`` where ``fill_method`` was ignored if you passed ``how`` (:issue:`2073`) +- Bug in ``TimeGrouper`` doesn't exclude column specified by ``key`` (:issue:`7227`) +- Bug in ``DataFrame`` and ``Series`` bar and barh plot raises ``TypeError`` when ``bottom`` + and ``left`` keyword is specified (:issue:`7226`) +- Bug in ``DataFrame.hist`` raises ``TypeError`` when it contains non numeric column (:issue:`7277`) +- Bug in ``Index.delete`` does not preserve ``name`` and ``freq`` attributes (:issue:`7302`) +- Bug in ``DataFrame.query()``/``eval`` where local string variables with the @ + sign were being treated as temporaries attempting to be deleted + (:issue:`7300`). +- Bug in ``Float64Index`` which didn't allow duplicates (:issue:`7149`). +- Bug in ``DataFrame.replace()`` where truthy values were being replaced + (:issue:`7140`). +- Bug in ``StringMethods.extract()`` where a single match group Series + would use the matcher's name instead of the group name (:issue:`7313`). +- Bug in ``isnull()`` when ``mode.use_inf_as_null == True`` where isnull + wouldn't test ``True`` when it encountered an ``inf``/``-inf`` + (:issue:`7315`). +- Bug in inferred_freq results in None for eastern hemisphere timezones (:issue:`7310`) +- Bug in ``Easter`` returns incorrect date when offset is negative (:issue:`7195`) +- Bug in broadcasting with ``.div``, integer dtypes and divide-by-zero (:issue:`7325`) +- Bug in ``CustomBusinessDay.apply`` raises ``NameError`` when ``np.datetime64`` object is passed (:issue:`7196`) +- Bug in ``MultiIndex.append``, ``concat`` and ``pivot_table`` don't preserve timezone (:issue:`6606`) +- Bug in ``.loc`` with a list of indexers on a single-multi index level (that is not nested) (:issue:`7349`) +- Bug in ``Series.map`` when mapping a dict with tuple keys of different lengths (:issue:`7333`) +- Bug all ``StringMethods`` now work on empty Series (:issue:`7242`) +- Fix delegation of `read_sql` to `read_sql_query` when query does not contain 'select' (:issue:`7324`). +- Bug where a string column name assignment to a ``DataFrame`` with a + ``Float64Index`` raised a ``TypeError`` during a call to ``np.isnan`` + (:issue:`7366`). +- Bug where ``NDFrame.replace()`` didn't correctly replace objects with + ``Period`` values (:issue:`7379`). +- Bug in ``.ix`` getitem should always return a Series (:issue:`7150`) +- Bug in MultiIndex slicing with incomplete indexers (:issue:`7399`) +- Bug in MultiIndex slicing with a step in a sliced level (:issue:`7400`) +- Bug where negative indexers in ``DatetimeIndex`` were not correctly sliced + (:issue:`7408`) +- Bug where ``NaT`` wasn't repr'd correctly in a ``MultiIndex`` (:issue:`7406`, + :issue:`7409`). +- Bug where bool objects were converted to ``nan`` in ``convert_objects`` + (:issue:`7416`). +- Bug in ``quantile`` ignoring the axis keyword argument (:issue:`7306`) +- Bug where ``nanops._maybe_null_out`` doesn't work with complex numbers + (:issue:`7353`) +- Bug in several ``nanops`` functions when ``axis==0`` for + 1-dimensional ``nan`` arrays (:issue:`7354`) +- Bug where ``nanops.nanmedian`` doesn't work when ``axis==None`` + (:issue:`7352`) +- Bug where ``nanops._has_infs`` doesn't work with many dtypes + (:issue:`7357`) +- Bug in ``StataReader.data`` where reading a 0-observation dta failed (:issue:`7369`) +- Bug in ``StataReader`` when reading Stata 13 (117) files containing fixed width strings (:issue:`7360`) +- Bug in ``StataWriter`` where encoding was ignored (:issue:`7286`) +- Bug in ``DatetimeIndex`` comparison doesn't handle ``NaT`` properly (:issue:`7529`) +- Bug in passing input with ``tzinfo`` to some offsets ``apply``, ``rollforward`` or ``rollback`` resets ``tzinfo`` or raises ``ValueError`` (:issue:`7465`) +- Bug in ``DatetimeIndex.to_period``, ``PeriodIndex.asobject``, ``PeriodIndex.to_timestamp`` doesn't preserve ``name`` (:issue:`7485`) +- Bug in ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestamp`` handle ``NaT`` incorrectly (:issue:`7228`) +- Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may return normal ``datetime`` (:issue:`7502`) +- Bug in ``resample`` raises ``ValueError`` when target contains ``NaT`` (:issue:`7227`) +- Bug in ``Timestamp.tz_localize`` resets ``nanosecond`` info (:issue:`7534`) +- Bug in ``DatetimeIndex.asobject`` raises ``ValueError`` when it contains ``NaT`` (:issue:`7539`) +- Bug in ``Timestamp.__new__`` doesn't preserve nanosecond properly (:issue:`7610`) +- Bug in ``Index.astype(float)`` where it would return an ``object`` dtype + ``Index`` (:issue:`7464`). +- Bug in ``DataFrame.reset_index`` loses ``tz`` (:issue:`3950`) +- Bug in ``DatetimeIndex.freqstr`` raises ``AttributeError`` when ``freq`` is ``None`` (:issue:`7606`) +- Bug in ``GroupBy.size`` created by ``TimeGrouper`` raises ``AttributeError`` (:issue:`7453`) +- Bug in single column bar plot is misaligned (:issue:`7498`). +- Bug in area plot with tz-aware time series raises ``ValueError`` (:issue:`7471`) +- Bug in non-monotonic ``Index.union`` may preserve ``name`` incorrectly (:issue:`7458`) +- Bug in ``DatetimeIndex.intersection`` doesn't preserve timezone (:issue:`4690`) +- Bug in ``rolling_var`` where a window larger than the array would raise an error(:issue:`7297`) +- Bug with last plotted timeseries dictating ``xlim`` (:issue:`2960`) +- Bug with ``secondary_y`` axis not being considered for timeseries ``xlim`` (:issue:`3490`) +- Bug in ``Float64Index`` assignment with a non scalar indexer (:issue:`7586`) +- Bug in ``pandas.core.strings.str_contains`` does not properly match in a case insensitive fashion when ``regex=False`` and ``case=False`` (:issue:`7505`) +- Bug in ``expanding_cov``, ``expanding_corr``, ``rolling_cov``, and ``rolling_corr`` for two arguments with mismatched index (:issue:`7512`) +- Bug in ``to_sql`` taking the boolean column as text column (:issue:`7678`) +- Bug in grouped `hist` doesn't handle `rot` kw and `sharex` kw properly (:issue:`7234`) +- Bug in ``.loc`` performing fallback integer indexing with ``object`` dtype indices (:issue:`7496`) +- Bug (regression) in ``PeriodIndex`` constructor when passed ``Series`` objects (:issue:`7701`). + + +.. _whatsnew_0.14.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.14.0..v0.14.1 diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst new file mode 100644 index 00000000..95e354e4 --- /dev/null +++ b/doc/source/whatsnew/v0.15.0.rst @@ -0,0 +1,1242 @@ +.. _whatsnew_0150: + +v0.15.0 (October 18, 2014) +-------------------------- + +{{ header }} + + +This is a major release from 0.14.1 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +.. warning:: + + pandas >= 0.15.0 will no longer support compatibility with NumPy versions < + 1.7.0. If you want to use the latest versions of pandas, please upgrade to + NumPy >= 1.7.0 (:issue:`7711`) + +- Highlights include: + + - The ``Categorical`` type was integrated as a first-class pandas type, see :ref:`here ` + - New scalar type ``Timedelta``, and a new index type ``TimedeltaIndex``, see :ref:`here ` + - New datetimelike properties accessor ``.dt`` for Series, see :ref:`Datetimelike Properties ` + - New DataFrame default display for ``df.info()`` to include memory usage, see :ref:`Memory Usage ` + - ``read_csv`` will now by default ignore blank lines when parsing, see :ref:`here ` + - API change in using Indexes in set operations, see :ref:`here ` + - Enhancements in the handling of timezones, see :ref:`here ` + - A lot of improvements to the rolling and expanding moment functions, see :ref:`here ` + - Internal refactoring of the ``Index`` class to no longer sub-class ``ndarray``, see :ref:`Internal Refactoring ` + - dropping support for ``PyTables`` less than version 3.0.0, and ``numexpr`` less than version 2.1 (:issue:`7990`) + - Split indexing documentation into :ref:`Indexing and Selecting Data ` and :ref:`MultiIndex / Advanced Indexing ` + - Split out string methods documentation into :ref:`Working with Text Data ` + +- Check the :ref:`API Changes ` and :ref:`deprecations ` before updating + +- :ref:`Other Enhancements ` + +- :ref:`Performance Improvements ` + +- :ref:`Bug Fixes ` + +.. warning:: + + In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray`` + but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This change allows very easy sub-classing and creation of new index types. This should be + a transparent change with only very limited API implications (See the :ref:`Internal Refactoring `) + +.. warning:: + + The refactoring in :class:`~pandas.Categorical` changed the two argument constructor from + "codes/labels and levels" to "values and levels (now called 'categories')". This can lead to subtle bugs. If you use + :class:`~pandas.Categorical` directly, please audit your code before updating to this pandas + version and change it to use the :meth:`~pandas.Categorical.from_codes` constructor. See more on ``Categorical`` :ref:`here ` + + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0150.cat: + +Categoricals in Series/DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`~pandas.Categorical` can now be included in `Series` and `DataFrames` and gained new +methods to manipulate. Thanks to Jan Schulz for much of this API/implementation. (:issue:`3943`, :issue:`5313`, :issue:`5314`, +:issue:`7444`, :issue:`7839`, :issue:`7848`, :issue:`7864`, :issue:`7914`, :issue:`7768`, :issue:`8006`, :issue:`3678`, +:issue:`8075`, :issue:`8076`, :issue:`8143`, :issue:`8453`, :issue:`8518`). + +For full docs, see the :ref:`categorical introduction ` and the +:ref:`API documentation `. + +.. ipython:: python + :okwarning: + + df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], + "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) + + df["grade"] = df["raw_grade"].astype("category") + df["grade"] + + # Rename the categories + df["grade"].cat.categories = ["very good", "good", "very bad"] + + # Reorder the categories and simultaneously add the missing categories + df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", + "medium", "good", "very good"]) + df["grade"] + df.sort_values("grade") + df.groupby("grade").size() + +- ``pandas.core.group_agg`` and ``pandas.core.factor_agg`` were removed. As an alternative, construct + a dataframe and use ``df.groupby().agg()``. + +- Supplying "codes/labels and levels" to the :class:`~pandas.Categorical` constructor is not + supported anymore. Supplying two arguments to the constructor is now interpreted as + "values and levels (now called 'categories')". Please change your code to use the :meth:`~pandas.Categorical.from_codes` + constructor. + +- The ``Categorical.labels`` attribute was renamed to ``Categorical.codes`` and is read + only. If you want to manipulate codes, please use one of the + :ref:`API methods on Categoricals `. + +- The ``Categorical.levels`` attribute is renamed to ``Categorical.categories``. + + +.. _whatsnew_0150.timedeltaindex: + +TimedeltaIndex/Scalar +^^^^^^^^^^^^^^^^^^^^^ + +We introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner, +but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes. +This type is very similar to how ``Timestamp`` works for ``datetimes``. It is a nice-API box for the type. See the :ref:`docs `. +(:issue:`3009`, :issue:`4533`, :issue:`8209`, :issue:`8187`, :issue:`8190`, :issue:`7869`, :issue:`7661`, :issue:`8345`, :issue:`8471`) + +.. warning:: + + ``Timedelta`` scalars (and ``TimedeltaIndex``) component fields are *not the same* as the component fields on a ``datetime.timedelta`` object. For example, ``.seconds`` on a ``datetime.timedelta`` object returns the total number of seconds combined between ``hours``, ``minutes`` and ``seconds``. In contrast, the pandas ``Timedelta`` breaks out hours, minutes, microseconds and nanoseconds separately. + + .. code-block:: ipython + + # Timedelta accessor + In [9]: tds = pd.Timedelta('31 days 5 min 3 sec') + + In [10]: tds.minutes + Out[10]: 5L + + In [11]: tds.seconds + Out[11]: 3L + + # datetime.timedelta accessor + # this is 5 minutes * 60 + 3 seconds + In [12]: tds.to_pytimedelta().seconds + Out[12]: 303 + + **Note**: this is no longer true starting from v0.16.0, where full + compatibility with ``datetime.timedelta`` is introduced. See the + :ref:`0.16.0 whatsnew entry ` + +.. warning:: + + Prior to 0.15.0 ``pd.to_timedelta`` would return a ``Series`` for list-like/Series input, and a ``np.timedelta64`` for scalar input. + It will now return a ``TimedeltaIndex`` for list-like input, ``Series`` for Series input, and ``Timedelta`` for scalar input. + + The arguments to ``pd.to_timedelta`` are now ``(arg,unit='ns',box=True,coerce=False)``, previously were ``(arg,box=True,unit='ns')`` as these are more logical. + +Construct a scalar + +.. ipython:: python + + pd.Timedelta('1 days 06:05:01.00003') + pd.Timedelta('15.5us') + pd.Timedelta('1 hour 15.5us') + + # negative Timedeltas have this string repr + # to be more consistent with datetime.timedelta conventions + pd.Timedelta('-1us') + + # a NaT + pd.Timedelta('nan') + +Access fields for a ``Timedelta`` + +.. ipython:: python + + td = pd.Timedelta('1 hour 3m 15.5us') + td.seconds + td.microseconds + td.nanoseconds + +Construct a ``TimedeltaIndex`` + +.. ipython:: python + :suppress: + + import datetime + +.. ipython:: python + + pd.TimedeltaIndex(['1 days', '1 days, 00:00:05', + np.timedelta64(2, 'D'), + datetime.timedelta(days=2, seconds=2)]) + +Constructing a ``TimedeltaIndex`` with a regular range + +.. ipython:: python + + pd.timedelta_range('1 days', periods=5, freq='D') + pd.timedelta_range(start='1 days', end='2 days', freq='30T') + +You can now use a ``TimedeltaIndex`` as the index of a pandas object + +.. ipython:: python + + s = pd.Series(np.arange(5), + index=pd.timedelta_range('1 days', periods=5, freq='s')) + s + +You can select with partial string selections + +.. ipython:: python + + s['1 day 00:00:02'] + s['1 day':'1 day 00:00:02'] + +Finally, the combination of ``TimedeltaIndex`` with ``DatetimeIndex`` allow certain combination operations that are ``NaT`` preserving: + +.. ipython:: python + + tdi = pd.TimedeltaIndex(['1 days', pd.NaT, '2 days']) + tdi.tolist() + dti = pd.date_range('20130101', periods=3) + dti.tolist() + + (dti + tdi).tolist() + (dti - tdi).tolist() + +- iteration of a ``Series`` e.g. ``list(Series(...))`` of ``timedelta64[ns]`` would prior to v0.15.0 return ``np.timedelta64`` for each element. These will now be wrapped in ``Timedelta``. + + +.. _whatsnew_0150.memory: + +Memory usage +^^^^^^^^^^^^ + +Implemented methods to find memory usage of a DataFrame. See the :ref:`FAQ ` for more. (:issue:`6852`). + +A new display option ``display.memory_usage`` (see :ref:`options`) sets the default behavior of the ``memory_usage`` argument in the ``df.info()`` method. By default ``display.memory_usage`` is ``True``. + +.. ipython:: python + + dtypes = ['int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]', + 'complex128', 'object', 'bool'] + n = 5000 + data = {t: np.random.randint(100, size=n).astype(t) for t in dtypes} + df = pd.DataFrame(data) + df['categorical'] = df['object'].astype('category') + + df.info() + +Additionally :meth:`~pandas.DataFrame.memory_usage` is an available method for a dataframe object which returns the memory usage of each column. + +.. ipython:: python + + df.memory_usage(index=True) + + +.. _whatsnew_0150.dt: + +.dt accessor +^^^^^^^^^^^^ + +``Series`` has gained an accessor to succinctly return datetime like properties for the *values* of the Series, if its a datetime/period like Series. (:issue:`7207`) +This will return a Series, indexed like the existing Series. See the :ref:`docs ` + +.. ipython:: python + + # datetime + s = pd.Series(pd.date_range('20130101 09:10:12', periods=4)) + s + s.dt.hour + s.dt.second + s.dt.day + s.dt.freq + +This enables nice expressions like this: + +.. ipython:: python + + s[s.dt.day == 2] + +You can easily produce tz aware transformations: + +.. ipython:: python + + stz = s.dt.tz_localize('US/Eastern') + stz + stz.dt.tz + +You can also chain these types of operations: + +.. ipython:: python + + s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') + +The ``.dt`` accessor works for period and timedelta dtypes. + +.. ipython:: python + + # period + s = pd.Series(pd.period_range('20130101', periods=4, freq='D')) + s + s.dt.year + s.dt.day + +.. ipython:: python + + # timedelta + s = pd.Series(pd.timedelta_range('1 day 00:00:05', periods=4, freq='s')) + s + s.dt.days + s.dt.seconds + s.dt.components + + +.. _whatsnew_0150.tz: + +Timezone handling improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- ``tz_localize(None)`` for tz-aware ``Timestamp`` and ``DatetimeIndex`` now removes timezone holding local time, + previously this resulted in ``Exception`` or ``TypeError`` (:issue:`7812`) + + .. ipython:: python + + ts = pd.Timestamp('2014-08-01 09:00', tz='US/Eastern') + ts + ts.tz_localize(None) + + didx = pd.date_range(start='2014-08-01 09:00', freq='H', + periods=10, tz='US/Eastern') + didx + didx.tz_localize(None) + +- ``tz_localize`` now accepts the ``ambiguous`` keyword which allows for passing an array of bools + indicating whether the date belongs in DST or not, 'NaT' for setting transition times to NaT, + 'infer' for inferring DST/non-DST, and 'raise' (default) for an ``AmbiguousTimeError`` to be raised. See :ref:`the docs` for more details (:issue:`7943`) + +- ``DataFrame.tz_localize`` and ``DataFrame.tz_convert`` now accepts an optional ``level`` argument + for localizing a specific level of a MultiIndex (:issue:`7846`) + +- ``Timestamp.tz_localize`` and ``Timestamp.tz_convert`` now raise ``TypeError`` in error cases, rather than ``Exception`` (:issue:`8025`) + +- a timeseries/index localized to UTC when inserted into a Series/DataFrame will preserve the UTC timezone (rather than being a naive ``datetime64[ns]``) as ``object`` dtype (:issue:`8411`) + +- ``Timestamp.__repr__`` displays ``dateutil.tz.tzoffset`` info (:issue:`7907`) + + +.. _whatsnew_0150.roll: + +Rolling/expanding moments improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- :func:`rolling_min`, :func:`rolling_max`, :func:`rolling_cov`, and :func:`rolling_corr` + now return objects with all ``NaN`` when ``len(arg) < min_periods <= window`` rather + than raising. (This makes all rolling functions consistent in this behavior). (:issue:`7766`) + + Prior to 0.15.0 + + .. ipython:: python + + s = pd.Series([10, 11, 12, 13]) + + .. code-block:: ipython + + In [15]: pd.rolling_min(s, window=10, min_periods=5) + ValueError: min_periods (5) must be <= window (4) + + New behavior + + .. code-block:: ipython + + In [4]: pd.rolling_min(s, window=10, min_periods=5) + Out[4]: + 0 NaN + 1 NaN + 2 NaN + 3 NaN + dtype: float64 + +- :func:`rolling_max`, :func:`rolling_min`, :func:`rolling_sum`, :func:`rolling_mean`, :func:`rolling_median`, + :func:`rolling_std`, :func:`rolling_var`, :func:`rolling_skew`, :func:`rolling_kurt`, :func:`rolling_quantile`, + :func:`rolling_cov`, :func:`rolling_corr`, :func:`rolling_corr_pairwise`, + :func:`rolling_window`, and :func:`rolling_apply` with ``center=True`` previously would return a result of the same + structure as the input ``arg`` with ``NaN`` in the final ``(window-1)/2`` entries. + + Now the final ``(window-1)/2`` entries of the result are calculated as if the input ``arg`` were followed + by ``(window-1)/2`` ``NaN`` values (or with shrinking windows, in the case of :func:`rolling_apply`). + (:issue:`7925`, :issue:`8269`) + + Prior behavior (note final value is ``NaN``): + + .. code-block:: ipython + + In [7]: pd.rolling_sum(Series(range(4)), window=3, min_periods=0, center=True) + Out[7]: + 0 1 + 1 3 + 2 6 + 3 NaN + dtype: float64 + + New behavior (note final value is ``5 = sum([2, 3, NaN])``): + + .. code-block:: ipython + + In [7]: pd.rolling_sum(pd.Series(range(4)), window=3, + ....: min_periods=0, center=True) + Out[7]: + 0 1 + 1 3 + 2 6 + 3 5 + dtype: float64 + +- :func:`rolling_window` now normalizes the weights properly in rolling mean mode (`mean=True`) so that + the calculated weighted means (e.g. 'triang', 'gaussian') are distributed about the same means as those + calculated without weighting (i.e. 'boxcar'). See :ref:`the note on normalization ` for further details. (:issue:`7618`) + + .. ipython:: python + + s = pd.Series([10.5, 8.8, 11.4, 9.7, 9.3]) + + Behavior prior to 0.15.0: + + .. code-block:: ipython + + In [39]: pd.rolling_window(s, window=3, win_type='triang', center=True) + Out[39]: + 0 NaN + 1 6.583333 + 2 6.883333 + 3 6.683333 + 4 NaN + dtype: float64 + + New behavior + + .. code-block:: ipython + + In [10]: pd.rolling_window(s, window=3, win_type='triang', center=True) + Out[10]: + 0 NaN + 1 9.875 + 2 10.325 + 3 10.025 + 4 NaN + dtype: float64 + +- Removed ``center`` argument from all :func:`expanding_ ` functions (see :ref:`list `), + as the results produced when ``center=True`` did not make much sense. (:issue:`7925`) + +- Added optional ``ddof`` argument to :func:`expanding_cov` and :func:`rolling_cov`. + The default value of ``1`` is backwards-compatible. (:issue:`8279`) + +- Documented the ``ddof`` argument to :func:`expanding_var`, :func:`expanding_std`, + :func:`rolling_var`, and :func:`rolling_std`. These functions' support of a + ``ddof`` argument (with a default value of ``1``) was previously undocumented. (:issue:`8064`) + +- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` + now interpret ``min_periods`` in the same manner that the :func:`rolling_*()` and :func:`expanding_*()` functions do: + a given result entry will be ``NaN`` if the (expanding, in this case) window does not contain + at least ``min_periods`` values. The previous behavior was to set to ``NaN`` the ``min_periods`` entries + starting with the first non- ``NaN`` value. (:issue:`7977`) + + Prior behavior (note values start at index ``2``, which is ``min_periods`` after index ``0`` + (the index of the first non-empty value)): + + .. ipython:: python + + s = pd.Series([1, None, None, None, 2, 3]) + + .. code-block:: ipython + + In [51]: ewma(s, com=3., min_periods=2) + Out[51]: + 0 NaN + 1 NaN + 2 1.000000 + 3 1.000000 + 4 1.571429 + 5 2.189189 + dtype: float64 + + New behavior (note values start at index ``4``, the location of the 2nd (since ``min_periods=2``) non-empty value): + + .. code-block:: ipython + + In [2]: pd.ewma(s, com=3., min_periods=2) + Out[2]: + 0 NaN + 1 NaN + 2 NaN + 3 NaN + 4 1.759644 + 5 2.383784 + dtype: float64 + +- :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` + now have an optional ``adjust`` argument, just like :func:`ewma` does, + affecting how the weights are calculated. + The default value of ``adjust`` is ``True``, which is backwards-compatible. + See :ref:`Exponentially weighted moment functions ` for details. (:issue:`7911`) + +- :func:`ewma`, :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, :func:`ewmcov`, and :func:`ewmcorr` + now have an optional ``ignore_na`` argument. + When ``ignore_na=False`` (the default), missing values are taken into account in the weights calculation. + When ``ignore_na=True`` (which reproduces the pre-0.15.0 behavior), missing values are ignored in the weights calculation. + (:issue:`7543`) + + .. code-block:: ipython + + In [7]: pd.ewma(pd.Series([None, 1., 8.]), com=2.) + Out[7]: + 0 NaN + 1 1.0 + 2 5.2 + dtype: float64 + + In [8]: pd.ewma(pd.Series([1., None, 8.]), com=2., + ....: ignore_na=True) # pre-0.15.0 behavior + Out[8]: + 0 1.0 + 1 1.0 + 2 5.2 + dtype: float64 + + In [9]: pd.ewma(pd.Series([1., None, 8.]), com=2., + ....: ignore_na=False) # new default + Out[9]: + 0 1.000000 + 1 1.000000 + 2 5.846154 + dtype: float64 + + .. warning:: + + By default (``ignore_na=False``) the :func:`ewm*()` functions' weights calculation + in the presence of missing values is different than in pre-0.15.0 versions. + To reproduce the pre-0.15.0 calculation of weights in the presence of missing values + one must specify explicitly ``ignore_na=True``. + +- Bug in :func:`expanding_cov`, :func:`expanding_corr`, :func:`rolling_cov`, :func:`rolling_cor`, :func:`ewmcov`, and :func:`ewmcorr` + returning results with columns sorted by name and producing an error for non-unique columns; + now handles non-unique columns and returns columns in original order + (except for the case of two DataFrames with ``pairwise=False``, where behavior is unchanged) (:issue:`7542`) +- Bug in :func:`rolling_count` and :func:`expanding_*()` functions unnecessarily producing error message for zero-length data (:issue:`8056`) +- Bug in :func:`rolling_apply` and :func:`expanding_apply` interpreting ``min_periods=0`` as ``min_periods=1`` (:issue:`8080`) +- Bug in :func:`expanding_std` and :func:`expanding_var` for a single value producing a confusing error message (:issue:`7900`) +- Bug in :func:`rolling_std` and :func:`rolling_var` for a single value producing ``0`` rather than ``NaN`` (:issue:`7900`) + +- Bug in :func:`ewmstd`, :func:`ewmvol`, :func:`ewmvar`, and :func:`ewmcov` + calculation of de-biasing factors when ``bias=False`` (the default). + Previously an incorrect constant factor was used, based on ``adjust=True``, ``ignore_na=True``, + and an infinite number of observations. + Now a different factor is used for each entry, based on the actual weights + (analogous to the usual ``N/(N-1)`` factor). + In particular, for a single point a value of ``NaN`` is returned when ``bias=False``, + whereas previously a value of (approximately) ``0`` was returned. + + For example, consider the following pre-0.15.0 results for ``ewmvar(..., bias=False)``, + and the corresponding debiasing factors: + + .. ipython:: python + + s = pd.Series([1., 2., 0., 4.]) + + .. code-block:: ipython + + In [89]: ewmvar(s, com=2., bias=False) + Out[89]: + 0 -2.775558e-16 + 1 3.000000e-01 + 2 9.556787e-01 + 3 3.585799e+00 + dtype: float64 + + In [90]: ewmvar(s, com=2., bias=False) / ewmvar(s, com=2., bias=True) + Out[90]: + 0 1.25 + 1 1.25 + 2 1.25 + 3 1.25 + dtype: float64 + + Note that entry ``0`` is approximately 0, and the debiasing factors are a constant 1.25. + By comparison, the following 0.15.0 results have a ``NaN`` for entry ``0``, + and the debiasing factors are decreasing (towards 1.25): + + .. code-block:: ipython + + In [14]: pd.ewmvar(s, com=2., bias=False) + Out[14]: + 0 NaN + 1 0.500000 + 2 1.210526 + 3 4.089069 + dtype: float64 + + In [15]: pd.ewmvar(s, com=2., bias=False) / pd.ewmvar(s, com=2., bias=True) + Out[15]: + 0 NaN + 1 2.083333 + 2 1.583333 + 3 1.425439 + dtype: float64 + + See :ref:`Exponentially weighted moment functions ` for details. (:issue:`7912`) + + +.. _whatsnew_0150.sql: + +Improvements in the sql io module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Added support for a ``chunksize`` parameter to ``to_sql`` function. This allows DataFrame to be written in chunks and avoid packet-size overflow errors (:issue:`8062`). +- Added support for a ``chunksize`` parameter to ``read_sql`` function. Specifying this argument will return an iterator through chunks of the query result (:issue:`2908`). +- Added support for writing ``datetime.date`` and ``datetime.time`` object columns with ``to_sql`` (:issue:`6932`). +- Added support for specifying a ``schema`` to read from/write to with ``read_sql_table`` and ``to_sql`` (:issue:`7441`, :issue:`7952`). + For example: + + .. code-block:: python + + df.to_sql('table', engine, schema='other_schema') # noqa F821 + pd.read_sql_table('table', engine, schema='other_schema') # noqa F821 + +- Added support for writing ``NaN`` values with ``to_sql`` (:issue:`2754`). +- Added support for writing datetime64 columns with ``to_sql`` for all database flavors (:issue:`7103`). + + +.. _whatsnew_0150.api: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0150.api_breaking: + +Breaking changes +^^^^^^^^^^^^^^^^ + +API changes related to ``Categorical`` (see :ref:`here ` +for more details): + +- The ``Categorical`` constructor with two arguments changed from + "codes/labels and levels" to "values and levels (now called 'categories')". + This can lead to subtle bugs. If you use :class:`~pandas.Categorical` directly, + please audit your code by changing it to use the :meth:`~pandas.Categorical.from_codes` + constructor. + + An old function call like (prior to 0.15.0): + + .. code-block:: python + + pd.Categorical([0,1,0,2,1], levels=['a', 'b', 'c']) + + will have to adapted to the following to keep the same behaviour: + + .. code-block:: ipython + + In [2]: pd.Categorical.from_codes([0,1,0,2,1], categories=['a', 'b', 'c']) + Out[2]: + [a, b, a, c, b] + Categories (3, object): [a, b, c] + +API changes related to the introduction of the ``Timedelta`` scalar (see +:ref:`above ` for more details): + +- Prior to 0.15.0 :func:`to_timedelta` would return a ``Series`` for list-like/Series input, + and a ``np.timedelta64`` for scalar input. It will now return a ``TimedeltaIndex`` for + list-like input, ``Series`` for Series input, and ``Timedelta`` for scalar input. + +For API changes related to the rolling and expanding functions, see detailed overview :ref:`above `. + +Other notable API changes: + +- Consistency when indexing with ``.loc`` and a list-like indexer when no values are found. + + .. ipython:: python + + df = pd.DataFrame([['a'], ['b']], index=[1, 2]) + df + + In prior versions there was a difference in these two constructs: + + - ``df.loc[[3]]`` would return a frame reindexed by 3 (with all ``np.nan`` values) + - ``df.loc[[3],:]`` would raise ``KeyError``. + + Both will now raise a ``KeyError``. The rule is that *at least 1* indexer must be found when using a list-like and ``.loc`` (:issue:`7999`) + + Furthermore in prior versions these were also different: + + - ``df.loc[[1,3]]`` would return a frame reindexed by [1,3] + - ``df.loc[[1,3],:]`` would raise ``KeyError``. + + Both will now return a frame reindex by [1,3]. E.g. + + .. code-block:: ipython + + In [3]: df.loc[[1, 3]] + Out[3]: + 0 + 1 a + 3 NaN + + In [4]: df.loc[[1, 3], :] + Out[4]: + 0 + 1 a + 3 NaN + + This can also be seen in multi-axis indexing with a ``Panel``. + + .. code-block:: python + + >>> p = pd.Panel(np.arange(2 * 3 * 4).reshape(2, 3, 4), + ... items=['ItemA', 'ItemB'], + ... major_axis=[1, 2, 3], + ... minor_axis=['A', 'B', 'C', 'D']) + >>> p + + Dimensions: 2 (items) x 3 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemB + Major_axis axis: 1 to 3 + Minor_axis axis: A to D + + + The following would raise ``KeyError`` prior to 0.15.0: + + .. code-block:: ipython + + In [5]: + Out[5]: + ItemA ItemD + 1 3 NaN + 2 7 NaN + 3 11 NaN + + Furthermore, ``.loc`` will raise If no values are found in a MultiIndex with a list-like indexer: + + .. ipython:: python + :okexcept: + + s = pd.Series(np.arange(3, dtype='int64'), + index=pd.MultiIndex.from_product([['A'], + ['foo', 'bar', 'baz']], + names=['one', 'two']) + ).sort_index() + s + try: + s.loc[['D']] + except KeyError as e: + print("KeyError: " + str(e)) + +- Assigning values to ``None`` now considers the dtype when choosing an 'empty' value (:issue:`7941`). + + Previously, assigning to ``None`` in numeric containers changed the + dtype to object (or errored, depending on the call). It now uses + ``NaN``: + + .. ipython:: python + + s = pd.Series([1, 2, 3]) + s.loc[0] = None + s + + ``NaT`` is now used similarly for datetime containers. + + For object containers, we now preserve ``None`` values (previously these + were converted to ``NaN`` values). + + .. ipython:: python + + s = pd.Series(["a", "b", "c"]) + s.loc[0] = None + s + + To insert a ``NaN``, you must explicitly use ``np.nan``. See the :ref:`docs `. + +- In prior versions, updating a pandas object inplace would not reflect in other python references to this object. (:issue:`8511`, :issue:`5104`) + + .. ipython:: python + + s = pd.Series([1, 2, 3]) + s2 = s + s += 1.5 + + Behavior prior to v0.15.0 + + .. code-block:: ipython + + + # the original object + In [5]: s + Out[5]: + 0 2.5 + 1 3.5 + 2 4.5 + dtype: float64 + + + # a reference to the original object + In [7]: s2 + Out[7]: + 0 1 + 1 2 + 2 3 + dtype: int64 + + This is now the correct behavior + + .. ipython:: python + + # the original object + s + + # a reference to the original object + s2 + +.. _whatsnew_0150.blanklines: + +- Made both the C-based and Python engines for `read_csv` and `read_table` ignore empty lines in input as well as + white space-filled lines, as long as ``sep`` is not white space. This is an API change + that can be controlled by the keyword parameter ``skip_blank_lines``. See :ref:`the docs ` (:issue:`4466`) + +- A timeseries/index localized to UTC when inserted into a Series/DataFrame will preserve the UTC timezone + and inserted as ``object`` dtype rather than being converted to a naive ``datetime64[ns]`` (:issue:`8411`). + +- Bug in passing a ``DatetimeIndex`` with a timezone that was not being retained in DataFrame construction from a dict (:issue:`7822`) + + In prior versions this would drop the timezone, now it retains the timezone, + but gives a column of ``object`` dtype: + + .. ipython:: python + + i = pd.date_range('1/1/2011', periods=3, freq='10s', tz='US/Eastern') + i + df = pd.DataFrame({'a': i}) + df + df.dtypes + + Previously this would have yielded a column of ``datetime64`` dtype, but without timezone info. + + The behaviour of assigning a column to an existing dataframe as `df['a'] = i` + remains unchanged (this already returned an ``object`` column with a timezone). + +- When passing multiple levels to :meth:`~pandas.DataFrame.stack()`, it will now raise a ``ValueError`` when the + levels aren't all level names or all level numbers (:issue:`7660`). See + :ref:`Reshaping by stacking and unstacking `. + +- Raise a ``ValueError`` in ``df.to_hdf`` with 'fixed' format, if ``df`` has non-unique columns as the resulting file will be broken (:issue:`7761`) + +- ``SettingWithCopy`` raise/warnings (according to the option ``mode.chained_assignment``) will now be issued when setting a value on a sliced mixed-dtype DataFrame using chained-assignment. (:issue:`7845`, :issue:`7950`) + + .. code-block:: python + + In [1]: df = pd.DataFrame(np.arange(0, 9), columns=['count']) + + In [2]: df['group'] = 'b' + + In [3]: df.iloc[0:5]['group'] = 'a' + /usr/local/bin/ipython:1: SettingWithCopyWarning: + A value is trying to be set on a copy of a slice from a DataFrame. + Try using .loc[row_indexer,col_indexer] = value instead + + See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy + +- ``merge``, ``DataFrame.merge``, and ``ordered_merge`` now return the same type + as the ``left`` argument (:issue:`7737`). + +- Previously an enlargement with a mixed-dtype frame would act unlike ``.append`` which will preserve dtypes (related :issue:`2578`, :issue:`8176`): + + .. ipython:: python + + df = pd.DataFrame([[True, 1], [False, 2]], + columns=["female", "fitness"]) + df + df.dtypes + + # dtypes are now preserved + df.loc[2] = df.loc[1] + df + df.dtypes + +- ``Series.to_csv()`` now returns a string when ``path=None``, matching the behaviour of ``DataFrame.to_csv()`` (:issue:`8215`). + +- ``read_hdf`` now raises ``IOError`` when a file that doesn't exist is passed in. Previously, a new, empty file was created, and a ``KeyError`` raised (:issue:`7715`). + +- ``DataFrame.info()`` now ends its output with a newline character (:issue:`8114`) +- Concatenating no objects will now raise a ``ValueError`` rather than a bare ``Exception``. +- Merge errors will now be sub-classes of ``ValueError`` rather than raw ``Exception`` (:issue:`8501`) +- ``DataFrame.plot`` and ``Series.plot`` keywords are now have consistent orders (:issue:`8037`) + + +.. _whatsnew_0150.refactoring: + +Internal refactoring +^^^^^^^^^^^^^^^^^^^^ + +In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray`` +but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This +change allows very easy sub-classing and creation of new index types. This should be +a transparent change with only very limited API implications (:issue:`5080`, :issue:`7439`, :issue:`7796`, :issue:`8024`, :issue:`8367`, :issue:`7997`, :issue:`8522`): + +- you may need to unpickle pandas version < 0.15.0 pickles using ``pd.read_pickle`` rather than ``pickle.load``. See :ref:`pickle docs ` +- when plotting with a ``PeriodIndex``, the matplotlib internal axes will now be arrays of ``Period`` rather than a ``PeriodIndex`` (this is similar to how a ``DatetimeIndex`` passes arrays of ``datetimes`` now) +- MultiIndexes will now raise similarly to other pandas objects w.r.t. truth testing, see :ref:`here ` (:issue:`7897`). +- When plotting a DatetimeIndex directly with matplotlib's `plot` function, + the axis labels will no longer be formatted as dates but as integers (the + internal representation of a ``datetime64``). **UPDATE** This is fixed + in 0.15.1, see :ref:`here `. + +.. _whatsnew_0150.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- The attributes ``Categorical`` ``labels`` and ``levels`` attributes are + deprecated and renamed to ``codes`` and ``categories``. +- The ``outtype`` argument to ``pd.DataFrame.to_dict`` has been deprecated in favor of ``orient``. (:issue:`7840`) +- The ``convert_dummies`` method has been deprecated in favor of + ``get_dummies`` (:issue:`8140`) +- The ``infer_dst`` argument in ``tz_localize`` will be deprecated in favor of + ``ambiguous`` to allow for more flexibility in dealing with DST transitions. + Replace ``infer_dst=True`` with ``ambiguous='infer'`` for the same behavior (:issue:`7943`). + See :ref:`the docs` for more details. +- The top-level ``pd.value_range`` has been deprecated and can be replaced by ``.describe()`` (:issue:`8481`) + +.. _whatsnew_0150.index_set_ops: + +- The ``Index`` set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain index types. ``+`` can be replaced by ``.union()`` or ``|``, and ``-`` by ``.difference()``. Further the method name ``Index.diff()`` is deprecated and can be replaced by ``Index.difference()`` (:issue:`8226`) + + .. code-block:: python + + # + + pd.Index(['a', 'b', 'c']) + pd.Index(['b', 'c', 'd']) + + # should be replaced by + pd.Index(['a', 'b', 'c']).union(pd.Index(['b', 'c', 'd'])) + + .. code-block:: python + + # - + pd.Index(['a', 'b', 'c']) - pd.Index(['b', 'c', 'd']) + + # should be replaced by + pd.Index(['a', 'b', 'c']).difference(pd.Index(['b', 'c', 'd'])) + +- The ``infer_types`` argument to :func:`~pandas.read_html` now has no + effect and is deprecated (:issue:`7762`, :issue:`7032`). + + +.. _whatsnew_0150.prior_deprecations: + +Removal of prior version deprecations/changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Remove ``DataFrame.delevel`` method in favor of ``DataFrame.reset_index`` + + + +.. _whatsnew_0150.enhancements: + +Enhancements +~~~~~~~~~~~~ + +Enhancements in the importing/exporting of Stata files: + +- Added support for bool, uint8, uint16 and uint32 data types in ``to_stata`` (:issue:`7097`, :issue:`7365`) +- Added conversion option when importing Stata files (:issue:`8527`) +- ``DataFrame.to_stata`` and ``StataWriter`` check string length for + compatibility with limitations imposed in dta files where fixed-width + strings must contain 244 or fewer characters. Attempting to write Stata + dta files with strings longer than 244 characters raises a ``ValueError``. (:issue:`7858`) +- ``read_stata`` and ``StataReader`` can import missing data information into a + ``DataFrame`` by setting the argument ``convert_missing`` to ``True``. When + using this options, missing values are returned as ``StataMissingValue`` + objects and columns containing missing values have ``object`` data type. (:issue:`8045`) + +Enhancements in the plotting functions: + +- Added ``layout`` keyword to ``DataFrame.plot``. You can pass a tuple of ``(rows, columns)``, one of which can be ``-1`` to automatically infer (:issue:`6667`, :issue:`8071`). +- Allow to pass multiple axes to ``DataFrame.plot``, ``hist`` and ``boxplot`` (:issue:`5353`, :issue:`6970`, :issue:`7069`) +- Added support for ``c``, ``colormap`` and ``colorbar`` arguments for ``DataFrame.plot`` with ``kind='scatter'`` (:issue:`7780`) +- Histogram from ``DataFrame.plot`` with ``kind='hist'`` (:issue:`7809`), See :ref:`the docs`. +- Boxplot from ``DataFrame.plot`` with ``kind='box'`` (:issue:`7998`), See :ref:`the docs`. + +Other: + +- ``read_csv`` now has a keyword parameter ``float_precision`` which specifies which floating-point converter the C engine should use during parsing, see :ref:`here ` (:issue:`8002`, :issue:`8044`) + +- Added ``searchsorted`` method to ``Series`` objects (:issue:`7447`) + +- :func:`describe` on mixed-types DataFrames is more flexible. Type-based column filtering is now possible via the ``include``/``exclude`` arguments. + See the :ref:`docs ` (:issue:`8164`). + + .. ipython:: python + + df = pd.DataFrame({'catA': ['foo', 'foo', 'bar'] * 8, + 'catB': ['a', 'b', 'c', 'd'] * 6, + 'numC': np.arange(24), + 'numD': np.arange(24.) + .5}) + df.describe(include=["object"]) + df.describe(include=["number", "object"], exclude=["float"]) + + Requesting all columns is possible with the shorthand 'all' + + .. ipython:: python + + df.describe(include='all') + + Without those arguments, ``describe`` will behave as before, including only numerical columns or, if none are, only categorical columns. See also the :ref:`docs ` + +- Added ``split`` as an option to the ``orient`` argument in ``pd.DataFrame.to_dict``. (:issue:`7840`) + +- The ``get_dummies`` method can now be used on DataFrames. By default only + categorical columns are encoded as 0's and 1's, while other columns are + left untouched. + + .. ipython:: python + + df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['c', 'c', 'b'], + 'C': [1, 2, 3]}) + pd.get_dummies(df) + +- ``PeriodIndex`` supports ``resolution`` as the same as ``DatetimeIndex`` (:issue:`7708`) +- ``pandas.tseries.holiday`` has added support for additional holidays and ways to observe holidays (:issue:`7070`) +- ``pandas.tseries.holiday.Holiday`` now supports a list of offsets in Python3 (:issue:`7070`) +- ``pandas.tseries.holiday.Holiday`` now supports a days_of_week parameter (:issue:`7070`) +- ``GroupBy.nth()`` now supports selecting multiple nth values (:issue:`7910`) + + .. ipython:: python + + business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', freq='B') + df = pd.DataFrame(1, index=business_dates, columns=['a', 'b']) + # get the first, 4th, and last date index for each month + df.groupby([df.index.year, df.index.month]).nth([0, 3, -1]) + +- ``Period`` and ``PeriodIndex`` supports addition/subtraction with ``timedelta``-likes (:issue:`7966`) + + If ``Period`` freq is ``D``, ``H``, ``T``, ``S``, ``L``, ``U``, ``N``, ``Timedelta``-like can be added if the result can have same freq. Otherwise, only the same ``offsets`` can be added. + + .. ipython:: python + + idx = pd.period_range('2014-07-01 09:00', periods=5, freq='H') + idx + idx + pd.offsets.Hour(2) + idx + pd.Timedelta('120m') + + idx = pd.period_range('2014-07', periods=5, freq='M') + idx + idx + pd.offsets.MonthEnd(3) + +- Added experimental compatibility with ``openpyxl`` for versions >= 2.0. The ``DataFrame.to_excel`` + method ``engine`` keyword now recognizes ``openpyxl1`` and ``openpyxl2`` + which will explicitly require openpyxl v1 and v2 respectively, failing if + the requested version is not available. The ``openpyxl`` engine is a now a + meta-engine that automatically uses whichever version of openpyxl is + installed. (:issue:`7177`) + +- ``DataFrame.fillna`` can now accept a ``DataFrame`` as a fill value (:issue:`8377`) + +- Passing multiple levels to :meth:`~pandas.DataFrame.stack()` will now work when multiple level + numbers are passed (:issue:`7660`). See + :ref:`Reshaping by stacking and unstacking `. + +- :func:`set_names`, :func:`set_labels`, and :func:`set_levels` methods now take an optional ``level`` keyword argument to all modification of specific level(s) of a MultiIndex. Additionally :func:`set_names` now accepts a scalar string value when operating on an ``Index`` or on a specific level of a ``MultiIndex`` (:issue:`7792`) + + .. ipython:: python + + idx = pd.MultiIndex.from_product([['a'], range(3), list("pqr")], + names=['foo', 'bar', 'baz']) + idx.set_names('qux', level=0) + idx.set_names(['qux', 'corge'], level=[0, 1]) + idx.set_levels(['a', 'b', 'c'], level='bar') + idx.set_levels([['a', 'b', 'c'], [1, 2, 3]], level=[1, 2]) + +- ``Index.isin`` now supports a ``level`` argument to specify which index level + to use for membership tests (:issue:`7892`, :issue:`7890`) + + .. code-block:: ipython + + In [1]: idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b', 'c']]) + + In [2]: idx.values + Out[2]: array([(0, 'a'), (0, 'b'), (0, 'c'), (1, 'a'), (1, 'b'), (1, 'c')], dtype=object) + + In [3]: idx.isin(['a', 'c', 'e'], level=1) + Out[3]: array([ True, False, True, True, False, True], dtype=bool) + +- ``Index`` now supports ``duplicated`` and ``drop_duplicates``. (:issue:`4060`) + + .. ipython:: python + + idx = pd.Index([1, 2, 3, 4, 1, 2]) + idx + idx.duplicated() + idx.drop_duplicates() + +- add ``copy=True`` argument to ``pd.concat`` to enable pass through of complete blocks (:issue:`8252`) + +- Added support for numpy 1.8+ data types (``bool_``, ``int_``, ``float_``, ``string_``) for conversion to R dataframe (:issue:`8400`) + + + +.. _whatsnew_0150.performance: + +Performance +~~~~~~~~~~~ + +- Performance improvements in ``DatetimeIndex.__iter__`` to allow faster iteration (:issue:`7683`) +- Performance improvements in ``Period`` creation (and ``PeriodIndex`` setitem) (:issue:`5155`) +- Improvements in Series.transform for significant performance gains (revised) (:issue:`6496`) +- Performance improvements in ``StataReader`` when reading large files (:issue:`8040`, :issue:`8073`) +- Performance improvements in ``StataWriter`` when writing large files (:issue:`8079`) +- Performance and memory usage improvements in multi-key ``groupby`` (:issue:`8128`) +- Performance improvements in groupby ``.agg`` and ``.apply`` where builtins max/min were not mapped to numpy/cythonized versions (:issue:`7722`) +- Performance improvement in writing to sql (``to_sql``) of up to 50% (:issue:`8208`). +- Performance benchmarking of groupby for large value of ngroups (:issue:`6787`) +- Performance improvement in ``CustomBusinessDay``, ``CustomBusinessMonth`` (:issue:`8236`) +- Performance improvement for ``MultiIndex.values`` for multi-level indexes containing datetimes (:issue:`8543`) + + + +.. _whatsnew_0150.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in pivot_table, when using margins and a dict aggfunc (:issue:`8349`) +- Bug in ``read_csv`` where ``squeeze=True`` would return a view (:issue:`8217`) +- Bug in checking of table name in ``read_sql`` in certain cases (:issue:`7826`). +- Bug in ``DataFrame.groupby`` where ``Grouper`` does not recognize level when frequency is specified (:issue:`7885`) +- Bug in multiindexes dtypes getting mixed up when DataFrame is saved to SQL table (:issue:`8021`) +- Bug in ``Series`` 0-division with a float and integer operand dtypes (:issue:`7785`) +- Bug in ``Series.astype("unicode")`` not calling ``unicode`` on the values correctly (:issue:`7758`) +- Bug in ``DataFrame.as_matrix()`` with mixed ``datetime64[ns]`` and ``timedelta64[ns]`` dtypes (:issue:`7778`) +- Bug in ``HDFStore.select_column()`` not preserving UTC timezone info when selecting a ``DatetimeIndex`` (:issue:`7777`) +- Bug in ``to_datetime`` when ``format='%Y%m%d'`` and ``coerce=True`` are specified, where previously an object array was returned (rather than + a coerced time-series with ``NaT``), (:issue:`7930`) +- Bug in ``DatetimeIndex`` and ``PeriodIndex`` in-place addition and subtraction cause different result from normal one (:issue:`6527`) +- Bug in adding and subtracting ``PeriodIndex`` with ``PeriodIndex`` raise ``TypeError`` (:issue:`7741`) +- Bug in ``combine_first`` with ``PeriodIndex`` data raises ``TypeError`` (:issue:`3367`) +- Bug in MultiIndex slicing with missing indexers (:issue:`7866`) +- Bug in MultiIndex slicing with various edge cases (:issue:`8132`) +- Regression in MultiIndex indexing with a non-scalar type object (:issue:`7914`) +- Bug in ``Timestamp`` comparisons with ``==`` and ``int64`` dtype (:issue:`8058`) +- Bug in pickles contains ``DateOffset`` may raise ``AttributeError`` when ``normalize`` attribute is referred internally (:issue:`7748`) +- Bug in ``Panel`` when using ``major_xs`` and ``copy=False`` is passed (deprecation warning fails because of missing ``warnings``) (:issue:`8152`). +- Bug in pickle deserialization that failed for pre-0.14.1 containers with dup items trying to avoid ambiguity + when matching block and manager items, when there's only one block there's no ambiguity (:issue:`7794`) +- Bug in putting a ``PeriodIndex`` into a ``Series`` would convert to ``int64`` dtype, rather than ``object`` of ``Periods`` (:issue:`7932`) +- Bug in ``HDFStore`` iteration when passing a where (:issue:`8014`) +- Bug in ``DataFrameGroupby.transform`` when transforming with a passed non-sorted key (:issue:`8046`, :issue:`8430`) +- Bug in repeated timeseries line and area plot may result in ``ValueError`` or incorrect kind (:issue:`7733`) +- Bug in inference in a ``MultiIndex`` with ``datetime.date`` inputs (:issue:`7888`) +- Bug in ``get`` where an ``IndexError`` would not cause the default value to be returned (:issue:`7725`) +- Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may reset nanosecond (:issue:`7697`) +- Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may raise ``AttributeError`` if ``Timestamp`` has ``dateutil`` tzinfo (:issue:`7697`) +- Bug in sorting a MultiIndex frame with a ``Float64Index`` (:issue:`8017`) +- Bug in inconsistent panel setitem with a rhs of a ``DataFrame`` for alignment (:issue:`7763`) +- Bug in ``is_superperiod`` and ``is_subperiod`` cannot handle higher frequencies than ``S`` (:issue:`7760`, :issue:`7772`, :issue:`7803`) +- Bug in 32-bit platforms with ``Series.shift`` (:issue:`8129`) +- Bug in ``PeriodIndex.unique`` returns int64 ``np.ndarray`` (:issue:`7540`) +- Bug in ``groupby.apply`` with a non-affecting mutation in the function (:issue:`8467`) +- Bug in ``DataFrame.reset_index`` which has ``MultiIndex`` contains ``PeriodIndex`` or ``DatetimeIndex`` with tz raises ``ValueError`` (:issue:`7746`, :issue:`7793`) +- Bug in ``DataFrame.plot`` with ``subplots=True`` may draw unnecessary minor xticks and yticks (:issue:`7801`) +- Bug in ``StataReader`` which did not read variable labels in 117 files due to difference between Stata documentation and implementation (:issue:`7816`) +- Bug in ``StataReader`` where strings were always converted to 244 characters-fixed width irrespective of underlying string size (:issue:`7858`) +- Bug in ``DataFrame.plot`` and ``Series.plot`` may ignore ``rot`` and ``fontsize`` keywords (:issue:`7844`) +- Bug in ``DatetimeIndex.value_counts`` doesn't preserve tz (:issue:`7735`) +- Bug in ``PeriodIndex.value_counts`` results in ``Int64Index`` (:issue:`7735`) +- Bug in ``DataFrame.join`` when doing left join on index and there are multiple matches (:issue:`5391`) +- Bug in ``GroupBy.transform()`` where int groups with a transform that + didn't preserve the index were incorrectly truncated (:issue:`7972`). +- Bug in ``groupby`` where callable objects without name attributes would take the wrong path, + and produce a ``DataFrame`` instead of a ``Series`` (:issue:`7929`) +- Bug in ``groupby`` error message when a DataFrame grouping column is duplicated (:issue:`7511`) +- Bug in ``read_html`` where the ``infer_types`` argument forced coercion of + date-likes incorrectly (:issue:`7762`, :issue:`7032`). +- Bug in ``Series.str.cat`` with an index which was filtered as to not include the first item (:issue:`7857`) +- Bug in ``Timestamp`` cannot parse ``nanosecond`` from string (:issue:`7878`) +- Bug in ``Timestamp`` with string offset and ``tz`` results incorrect (:issue:`7833`) +- Bug in ``tslib.tz_convert`` and ``tslib.tz_convert_single`` may return different results (:issue:`7798`) +- Bug in ``DatetimeIndex.intersection`` of non-overlapping timestamps with tz raises ``IndexError`` (:issue:`7880`) +- Bug in alignment with TimeOps and non-unique indexes (:issue:`8363`) +- Bug in ``GroupBy.filter()`` where fast path vs. slow path made the filter + return a non scalar value that appeared valid but wasn't (:issue:`7870`). +- Bug in ``date_range()``/``DatetimeIndex()`` when the timezone was inferred from input dates yet incorrect + times were returned when crossing DST boundaries (:issue:`7835`, :issue:`7901`). +- Bug in ``to_excel()`` where a negative sign was being prepended to positive infinity and was absent for negative infinity (:issue:`7949`) +- Bug in area plot draws legend with incorrect ``alpha`` when ``stacked=True`` (:issue:`8027`) +- ``Period`` and ``PeriodIndex`` addition/subtraction with ``np.timedelta64`` results in incorrect internal representations (:issue:`7740`) +- Bug in ``Holiday`` with no offset or observance (:issue:`7987`) +- Bug in ``DataFrame.to_latex`` formatting when columns or index is a ``MultiIndex`` (:issue:`7982`). +- Bug in ``DateOffset`` around Daylight Savings Time produces unexpected results (:issue:`5175`). +- Bug in ``DataFrame.shift`` where empty columns would throw ``ZeroDivisionError`` on numpy 1.7 (:issue:`8019`) +- Bug in installation where ``html_encoding/*.html`` wasn't installed and + therefore some tests were not running correctly (:issue:`7927`). +- Bug in ``read_html`` where ``bytes`` objects were not tested for in + ``_read`` (:issue:`7927`). +- Bug in ``DataFrame.stack()`` when one of the column levels was a datelike (:issue:`8039`) +- Bug in broadcasting numpy scalars with ``DataFrame`` (:issue:`8116`) +- Bug in ``pivot_table`` performed with nameless ``index`` and ``columns`` raises ``KeyError`` (:issue:`8103`) +- Bug in ``DataFrame.plot(kind='scatter')`` draws points and errorbars with different colors when the color is specified by ``c`` keyword (:issue:`8081`) +- Bug in ``Float64Index`` where ``iat`` and ``at`` were not testing and were + failing (:issue:`8092`). +- Bug in ``DataFrame.boxplot()`` where y-limits were not set correctly when + producing multiple axes (:issue:`7528`, :issue:`5517`). +- Bug in ``read_csv`` where line comments were not handled correctly given + a custom line terminator or ``delim_whitespace=True`` (:issue:`8122`). +- Bug in ``read_html`` where empty tables caused a ``StopIteration`` (:issue:`7575`) +- Bug in casting when setting a column in a same-dtype block (:issue:`7704`) +- Bug in accessing groups from a ``GroupBy`` when the original grouper + was a tuple (:issue:`8121`). +- Bug in ``.at`` that would accept integer indexers on a non-integer index and do fallback (:issue:`7814`) +- Bug with kde plot and NaNs (:issue:`8182`) +- Bug in ``GroupBy.count`` with float32 data type were nan values were not excluded (:issue:`8169`). +- Bug with stacked barplots and NaNs (:issue:`8175`). +- Bug in resample with non evenly divisible offsets (e.g. '7s') (:issue:`8371`) +- Bug in interpolation methods with the ``limit`` keyword when no values needed interpolating (:issue:`7173`). +- Bug where ``col_space`` was ignored in ``DataFrame.to_string()`` when ``header=False`` (:issue:`8230`). +- Bug with ``DatetimeIndex.asof`` incorrectly matching partial strings and returning the wrong date (:issue:`8245`). +- Bug in plotting methods modifying the global matplotlib rcParams (:issue:`8242`). +- Bug in ``DataFrame.__setitem__`` that caused errors when setting a dataframe column to a sparse array (:issue:`8131`) +- Bug where ``Dataframe.boxplot()`` failed when entire column was empty (:issue:`8181`). +- Bug with messed variables in ``radviz`` visualization (:issue:`8199`). +- Bug in interpolation methods with the ``limit`` keyword when no values needed interpolating (:issue:`7173`). +- Bug where ``col_space`` was ignored in ``DataFrame.to_string()`` when ``header=False`` (:issue:`8230`). +- Bug in ``to_clipboard`` that would clip long column data (:issue:`8305`) +- Bug in ``DataFrame`` terminal display: Setting max_column/max_rows to zero did not trigger auto-resizing of dfs to fit terminal width/height (:issue:`7180`). +- Bug in OLS where running with "cluster" and "nw_lags" parameters did not work correctly, but also did not throw an error + (:issue:`5884`). +- Bug in ``DataFrame.dropna`` that interpreted non-existent columns in the subset argument as the 'last column' (:issue:`8303`) +- Bug in ``Index.intersection`` on non-monotonic non-unique indexes (:issue:`8362`). +- Bug in masked series assignment where mismatching types would break alignment (:issue:`8387`) +- Bug in ``NDFrame.equals`` gives false negatives with dtype=object (:issue:`8437`) +- Bug in assignment with indexer where type diversity would break alignment (:issue:`8258`) +- Bug in ``NDFrame.loc`` indexing when row/column names were lost when target was a list/ndarray (:issue:`6552`) +- Regression in ``NDFrame.loc`` indexing when rows/columns were converted to Float64Index if target was an empty list/ndarray (:issue:`7774`) +- Bug in ``Series`` that allows it to be indexed by a ``DataFrame`` which has unexpected results. Such indexing is no longer permitted (:issue:`8444`) +- Bug in item assignment of a ``DataFrame`` with MultiIndex columns where right-hand-side columns were not aligned (:issue:`7655`) +- Suppress FutureWarning generated by NumPy when comparing object arrays containing NaN for equality (:issue:`7065`) +- Bug in ``DataFrame.eval()`` where the dtype of the ``not`` operator (``~``) + was not correctly inferred as ``bool``. + + +.. _whatsnew_0.15.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.14.1..v0.15.0 diff --git a/doc/source/whatsnew/v0.15.1.rst b/doc/source/whatsnew/v0.15.1.rst new file mode 100644 index 00000000..2e036267 --- /dev/null +++ b/doc/source/whatsnew/v0.15.1.rst @@ -0,0 +1,318 @@ +.. _whatsnew_0151: + +v0.15.1 (November 9, 2014) +-------------------------- + +{{ header }} + + +This is a minor bug-fix release from 0.15.0 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +- :ref:`Enhancements ` +- :ref:`API Changes ` +- :ref:`Bug Fixes ` + +.. _whatsnew_0151.api: + +API changes +~~~~~~~~~~~ + +- ``s.dt.hour`` and other ``.dt`` accessors will now return ``np.nan`` for missing values (rather than previously -1), (:issue:`8689`) + + .. ipython:: python + + s = pd.Series(pd.date_range('20130101', periods=5, freq='D')) + s.iloc[2] = np.nan + s + + previous behavior: + + .. code-block:: ipython + + In [6]: s.dt.hour + Out[6]: + 0 0 + 1 0 + 2 -1 + 3 0 + 4 0 + dtype: int64 + + current behavior: + + .. ipython:: python + + s.dt.hour + +- ``groupby`` with ``as_index=False`` will not add erroneous extra columns to + result (:issue:`8582`): + + .. ipython:: python + + np.random.seed(2718281) + df = pd.DataFrame(np.random.randint(0, 100, (10, 2)), + columns=['jim', 'joe']) + df.head() + + ts = pd.Series(5 * np.random.randint(0, 3, 10)) + + previous behavior: + + .. code-block:: ipython + + In [4]: df.groupby(ts, as_index=False).max() + Out[4]: + NaN jim joe + 0 0 72 83 + 1 5 77 84 + 2 10 96 65 + + current behavior: + + .. ipython:: python + + df.groupby(ts, as_index=False).max() + +- ``groupby`` will not erroneously exclude columns if the column name conflicts + with the grouper name (:issue:`8112`): + + .. ipython:: python + + df = pd.DataFrame({'jim': range(5), 'joe': range(5, 10)}) + df + gr = df.groupby(df['jim'] < 2) + + previous behavior (excludes 1st column from output): + + .. code-block:: ipython + + In [4]: gr.apply(sum) + Out[4]: + joe + jim + False 24 + True 11 + + current behavior: + + .. ipython:: python + + gr.apply(sum) + +- Support for slicing with monotonic decreasing indexes, even if ``start`` or ``stop`` is + not found in the index (:issue:`7860`): + + .. ipython:: python + + s = pd.Series(['a', 'b', 'c', 'd'], [4, 3, 2, 1]) + s + + previous behavior: + + .. code-block:: ipython + + In [8]: s.loc[3.5:1.5] + KeyError: 3.5 + + current behavior: + + .. ipython:: python + + s.loc[3.5:1.5] + +- ``io.data.Options`` has been fixed for a change in the format of the Yahoo Options page (:issue:`8612`), (:issue:`8741`) + + .. note:: + + As a result of a change in Yahoo's option page layout, when an expiry date is given, + ``Options`` methods now return data for a single expiry date. Previously, methods returned all + data for the selected month. + + The ``month`` and ``year`` parameters have been undeprecated and can be used to get all + options data for a given month. + + If an expiry date that is not valid is given, data for the next expiry after the given + date is returned. + + Option data frames are now saved on the instance as ``callsYYMMDD`` or ``putsYYMMDD``. Previously + they were saved as ``callsMMYY`` and ``putsMMYY``. The next expiry is saved as ``calls`` and ``puts``. + + New features: + + - The expiry parameter can now be a single date or a list-like object containing dates. + + - A new property ``expiry_dates`` was added, which returns all available expiry dates. + + Current behavior: + + .. code-block:: ipython + + In [17]: from pandas.io.data import Options + + In [18]: aapl = Options('aapl', 'yahoo') + + In [19]: aapl.get_call_data().iloc[0:5, 0:1] + Out[19]: + Last + Strike Expiry Type Symbol + 80 2014-11-14 call AAPL141114C00080000 29.05 + 84 2014-11-14 call AAPL141114C00084000 24.80 + 85 2014-11-14 call AAPL141114C00085000 24.05 + 86 2014-11-14 call AAPL141114C00086000 22.76 + 87 2014-11-14 call AAPL141114C00087000 21.74 + + In [20]: aapl.expiry_dates + Out[20]: + [datetime.date(2014, 11, 14), + datetime.date(2014, 11, 22), + datetime.date(2014, 11, 28), + datetime.date(2014, 12, 5), + datetime.date(2014, 12, 12), + datetime.date(2014, 12, 20), + datetime.date(2015, 1, 17), + datetime.date(2015, 2, 20), + datetime.date(2015, 4, 17), + datetime.date(2015, 7, 17), + datetime.date(2016, 1, 15), + datetime.date(2017, 1, 20)] + + In [21]: aapl.get_near_stock_price(expiry=aapl.expiry_dates[0:3]).iloc[0:5, 0:1] + Out[21]: + Last + Strike Expiry Type Symbol + 109 2014-11-22 call AAPL141122C00109000 1.48 + 2014-11-28 call AAPL141128C00109000 1.79 + 110 2014-11-14 call AAPL141114C00110000 0.55 + 2014-11-22 call AAPL141122C00110000 1.02 + 2014-11-28 call AAPL141128C00110000 1.32 + +.. _whatsnew_0151.datetime64_plotting: + +- pandas now also registers the ``datetime64`` dtype in matplotlib's units registry + to plot such values as datetimes. This is activated once pandas is imported. In + previous versions, plotting an array of ``datetime64`` values will have resulted + in plotted integer values. To keep the previous behaviour, you can do + ``del matplotlib.units.registry[np.datetime64]`` (:issue:`8614`). + + +.. _whatsnew_0151.enhancements: + +Enhancements +~~~~~~~~~~~~ + +- ``concat`` permits a wider variety of iterables of pandas objects to be + passed as the first parameter (:issue:`8645`): + + .. ipython:: python + + from collections import deque + df1 = pd.DataFrame([1, 2, 3]) + df2 = pd.DataFrame([4, 5, 6]) + + previous behavior: + + .. code-block:: ipython + + In [7]: pd.concat(deque((df1, df2))) + TypeError: first argument must be a list-like of pandas objects, you passed an object of type "deque" + + current behavior: + + .. ipython:: python + + pd.concat(deque((df1, df2))) + +- Represent ``MultiIndex`` labels with a dtype that utilizes memory based on the level size. In prior versions, the memory usage was a constant 8 bytes per element in each level. In addition, in prior versions, the *reported* memory usage was incorrect as it didn't show the usage for the memory occupied by the underling data array. (:issue:`8456`) + + .. ipython:: python + + dfi = pd.DataFrame(1, index=pd.MultiIndex.from_product([['a'], + range(1000)]), columns=['A']) + + previous behavior: + + .. code-block:: ipython + + # this was underreported in prior versions + In [1]: dfi.memory_usage(index=True) + Out[1]: + Index 8000 # took about 24008 bytes in < 0.15.1 + A 8000 + dtype: int64 + + + current behavior: + + .. ipython:: python + + dfi.memory_usage(index=True) + +- Added Index properties `is_monotonic_increasing` and `is_monotonic_decreasing` (:issue:`8680`). + +- Added option to select columns when importing Stata files (:issue:`7935`) + +- Qualify memory usage in ``DataFrame.info()`` by adding ``+`` if it is a lower bound (:issue:`8578`) + +- Raise errors in certain aggregation cases where an argument such as ``numeric_only`` is not handled (:issue:`8592`). + +- Added support for 3-character ISO and non-standard country codes in :func:`io.wb.download()` (:issue:`8482`) + +- World Bank data requests now will warn/raise based + on an ``errors`` argument, as well as a list of hard-coded country codes and + the World Bank's JSON response. In prior versions, the error messages + didn't look at the World Bank's JSON response. Problem-inducing input were + simply dropped prior to the request. The issue was that many good countries + were cropped in the hard-coded approach. All countries will work now, but + some bad countries will raise exceptions because some edge cases break the + entire response. (:issue:`8482`) + +- Added option to ``Series.str.split()`` to return a ``DataFrame`` rather than a ``Series`` (:issue:`8428`) + +- Added option to ``df.info(null_counts=None|True|False)`` to override the default display options and force showing of the null-counts (:issue:`8701`) + + +.. _whatsnew_0151.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in unpickling of a ``CustomBusinessDay`` object (:issue:`8591`) +- Bug in coercing ``Categorical`` to a records array, e.g. ``df.to_records()`` (:issue:`8626`) +- Bug in ``Categorical`` not created properly with ``Series.to_frame()`` (:issue:`8626`) +- Bug in coercing in astype of a ``Categorical`` of a passed ``pd.Categorical`` (this now raises ``TypeError`` correctly), (:issue:`8626`) +- Bug in ``cut``/``qcut`` when using ``Series`` and ``retbins=True`` (:issue:`8589`) +- Bug in writing Categorical columns to an SQL database with ``to_sql`` (:issue:`8624`). +- Bug in comparing ``Categorical`` of datetime raising when being compared to a scalar datetime (:issue:`8687`) +- Bug in selecting from a ``Categorical`` with ``.iloc`` (:issue:`8623`) +- Bug in groupby-transform with a Categorical (:issue:`8623`) +- Bug in duplicated/drop_duplicates with a Categorical (:issue:`8623`) +- Bug in ``Categorical`` reflected comparison operator raising if the first argument was a numpy array scalar (e.g. np.int64) (:issue:`8658`) +- Bug in Panel indexing with a list-like (:issue:`8710`) +- Compat issue is ``DataFrame.dtypes`` when ``options.mode.use_inf_as_null`` is True (:issue:`8722`) +- Bug in ``read_csv``, ``dialect`` parameter would not take a string (:issue:`8703`) +- Bug in slicing a MultiIndex level with an empty-list (:issue:`8737`) +- Bug in numeric index operations of add/sub with Float/Index Index with numpy arrays (:issue:`8608`) +- Bug in setitem with empty indexer and unwanted coercion of dtypes (:issue:`8669`) +- Bug in ix/loc block splitting on setitem (manifests with integer-like dtypes, e.g. datetime64) (:issue:`8607`) +- Bug when doing label based indexing with integers not found in the index for + non-unique but monotonic indexes (:issue:`8680`). +- Bug when indexing a Float64Index with ``np.nan`` on numpy 1.7 (:issue:`8980`). +- Fix ``shape`` attribute for ``MultiIndex`` (:issue:`8609`) +- Bug in ``GroupBy`` where a name conflict between the grouper and columns + would break ``groupby`` operations (:issue:`7115`, :issue:`8112`) +- Fixed a bug where plotting a column ``y`` and specifying a label would mutate the index name of the original DataFrame (:issue:`8494`) +- Fix regression in plotting of a DatetimeIndex directly with matplotlib (:issue:`8614`). +- Bug in ``date_range`` where partially-specified dates would incorporate current date (:issue:`6961`) +- Bug in Setting by indexer to a scalar value with a mixed-dtype `Panel4d` was failing (:issue:`8702`) +- Bug where ``DataReader``'s would fail if one of the symbols passed was invalid. Now returns data for valid symbols and np.nan for invalid (:issue:`8494`) +- Bug in ``get_quote_yahoo`` that wouldn't allow non-float return values (:issue:`5229`). + + +.. _whatsnew_0.15.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.15.0..v0.15.1 diff --git a/doc/source/whatsnew/v0.15.2.rst b/doc/source/whatsnew/v0.15.2.rst new file mode 100644 index 00000000..292351c7 --- /dev/null +++ b/doc/source/whatsnew/v0.15.2.rst @@ -0,0 +1,257 @@ +.. _whatsnew_0152: + +v0.15.2 (December 12, 2014) +--------------------------- + +{{ header }} + + +This is a minor release from 0.15.1 and includes a large number of bug fixes +along with several new features, enhancements, and performance improvements. +A small number of API changes were necessary to fix existing bugs. +We recommend that all users upgrade to this version. + +- :ref:`Enhancements ` +- :ref:`API Changes ` +- :ref:`Performance Improvements ` +- :ref:`Bug Fixes ` + +.. _whatsnew_0152.api: + +API changes +~~~~~~~~~~~ + +- Indexing in ``MultiIndex`` beyond lex-sort depth is now supported, though + a lexically sorted index will have a better performance. (:issue:`2646`) + + .. ipython:: python + :okwarning: + + df = pd.DataFrame({'jim':[0, 0, 1, 1], + 'joe':['x', 'x', 'z', 'y'], + 'jolie':np.random.rand(4)}).set_index(['jim', 'joe']) + df + df.index.lexsort_depth + + # in prior versions this would raise a KeyError + # will now show a PerformanceWarning + df.loc[(1, 'z')] + + # lexically sorting + df2 = df.sort_index() + df2 + df2.index.lexsort_depth + df2.loc[(1,'z')] + +- Bug in unique of Series with ``category`` dtype, which returned all categories regardless + whether they were "used" or not (see :issue:`8559` for the discussion). + Previous behaviour was to return all categories: + + .. code-block:: ipython + + In [3]: cat = pd.Categorical(['a', 'b', 'a'], categories=['a', 'b', 'c']) + + In [4]: cat + Out[4]: + [a, b, a] + Categories (3, object): [a < b < c] + + In [5]: cat.unique() + Out[5]: array(['a', 'b', 'c'], dtype=object) + + Now, only the categories that do effectively occur in the array are returned: + + .. ipython:: python + + cat = pd.Categorical(['a', 'b', 'a'], categories=['a', 'b', 'c']) + cat.unique() + +- ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters. ``Series.all``, ``Series.any``, ``Index.all``, and ``Index.any`` no longer support the ``out`` and ``keepdims`` parameters, which existed for compatibility with ndarray. Various index types no longer support the ``all`` and ``any`` aggregation functions and will now raise ``TypeError``. (:issue:`8302`). + +- Allow equality comparisons of Series with a categorical dtype and object dtype; previously these would raise ``TypeError`` (:issue:`8938`) + +- Bug in ``NDFrame``: conflicting attribute/column names now behave consistently between getting and setting. Previously, when both a column and attribute named ``y`` existed, ``data.y`` would return the attribute, while ``data.y = z`` would update the column (:issue:`8994`) + + .. ipython:: python + + data = pd.DataFrame({'x': [1, 2, 3]}) + data.y = 2 + data['y'] = [2, 4, 6] + data + + # this assignment was inconsistent + data.y = 5 + + Old behavior: + + .. code-block:: ipython + + In [6]: data.y + Out[6]: 2 + + In [7]: data['y'].values + Out[7]: array([5, 5, 5]) + + New behavior: + + .. ipython:: python + + data.y + data['y'].values + +- ``Timestamp('now')`` is now equivalent to ``Timestamp.now()`` in that it returns the local time rather than UTC. Also, ``Timestamp('today')`` is now equivalent to ``Timestamp.today()`` and both have ``tz`` as a possible argument. (:issue:`9000`) + +- Fix negative step support for label-based slices (:issue:`8753`) + + Old behavior: + + .. code-block:: ipython + + In [1]: s = pd.Series(np.arange(3), ['a', 'b', 'c']) + Out[1]: + a 0 + b 1 + c 2 + dtype: int64 + + In [2]: s.loc['c':'a':-1] + Out[2]: + c 2 + dtype: int64 + + New behavior: + + .. ipython:: python + + s = pd.Series(np.arange(3), ['a', 'b', 'c']) + s.loc['c':'a':-1] + + +.. _whatsnew_0152.enhancements: + +Enhancements +~~~~~~~~~~~~ + +``Categorical`` enhancements: + +- Added ability to export Categorical data to Stata (:issue:`8633`). See :ref:`here ` for limitations of categorical variables exported to Stata data files. +- Added flag ``order_categoricals`` to ``StataReader`` and ``read_stata`` to select whether to order imported categorical data (:issue:`8836`). See :ref:`here ` for more information on importing categorical variables from Stata data files. +- Added ability to export Categorical data to to/from HDF5 (:issue:`7621`). Queries work the same as if it was an object array. However, the ``category`` dtyped data is stored in a more efficient manner. See :ref:`here ` for an example and caveats w.r.t. prior versions of pandas. +- Added support for ``searchsorted()`` on `Categorical` class (:issue:`8420`). + +Other enhancements: + +- Added the ability to specify the SQL type of columns when writing a DataFrame + to a database (:issue:`8778`). + For example, specifying to use the sqlalchemy ``String`` type instead of the + default ``Text`` type for string columns: + + .. code-block:: python + + from sqlalchemy.types import String + data.to_sql('data_dtype', engine, dtype={'Col_1': String}) # noqa F821 + +- ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters (:issue:`8302`): + + .. ipython:: python + + s = pd.Series([False, True, False], index=[0, 0, 1]) + s.any(level=0) + +- ``Panel`` now supports the ``all`` and ``any`` aggregation functions. (:issue:`8302`): + + .. code-block:: python + + >>> p = pd.Panel(np.random.rand(2, 5, 4) > 0.1) + >>> p.all() + 0 1 2 3 + 0 True True True True + 1 True False True True + 2 True True True True + 3 False True False True + 4 True True True True + +- Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on `Timestamp` class (:issue:`5351`). +- Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See `here `__. +- ``Timedelta`` arithmetic returns ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`). +- ``Timedelta`` now supports arithmetic with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`). +- Added ``Timedelta.to_timedelta64()`` method to the public API (:issue:`8884`). +- Added ``gbq.generate_bq_schema()`` function to the gbq module (:issue:`8325`). +- ``Series`` now works with map objects the same way as generators (:issue:`8909`). +- Added context manager to ``HDFStore`` for automatic closing (:issue:`8791`). +- ``to_datetime`` gains an ``exact`` keyword to allow for a format to not require an exact match for a provided format string (if its ``False``). ``exact`` defaults to ``True`` (meaning that exact matching is still the default) (:issue:`8904`) +- Added ``axvlines`` boolean option to parallel_coordinates plot function, determines whether vertical lines will be printed, default is True +- Added ability to read table footers to read_html (:issue:`8552`) +- ``to_sql`` now infers data types of non-NA values for columns that contain NA values and have dtype ``object`` (:issue:`8778`). + + +.. _whatsnew_0152.performance: + +Performance +~~~~~~~~~~~ + +- Reduce memory usage when skiprows is an integer in read_csv (:issue:`8681`) +- Performance boost for ``to_datetime`` conversions with a passed ``format=``, and the ``exact=False`` (:issue:`8904`) + + +.. _whatsnew_0152.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in concat of Series with ``category`` dtype which were coercing to ``object``. (:issue:`8641`) +- Bug in Timestamp-Timestamp not returning a Timedelta type and datelike-datelike ops with timezones (:issue:`8865`) +- Made consistent a timezone mismatch exception (either tz operated with None or incompatible timezone), will now return ``TypeError`` rather than ``ValueError`` (a couple of edge cases only), (:issue:`8865`) +- Bug in using a ``pd.Grouper(key=...)`` with no level/axis or level only (:issue:`8795`, :issue:`8866`) +- Report a ``TypeError`` when invalid/no parameters are passed in a groupby (:issue:`8015`) +- Bug in packaging pandas with ``py2app/cx_Freeze`` (:issue:`8602`, :issue:`8831`) +- Bug in ``groupby`` signatures that didn't include \*args or \*\*kwargs (:issue:`8733`). +- ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo and when it receives no data from Yahoo (:issue:`8761`), (:issue:`8783`). +- Unclear error message in csv parsing when passing dtype and names and the parsed data is a different data type (:issue:`8833`) +- Bug in slicing a MultiIndex with an empty list and at least one boolean indexer (:issue:`8781`) +- ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo (:issue:`8761`). +- ``Timedelta`` kwargs may now be numpy ints and floats (:issue:`8757`). +- Fixed several outstanding bugs for ``Timedelta`` arithmetic and comparisons (:issue:`8813`, :issue:`5963`, :issue:`5436`). +- ``sql_schema`` now generates dialect appropriate ``CREATE TABLE`` statements (:issue:`8697`) +- ``slice`` string method now takes step into account (:issue:`8754`) +- Bug in ``BlockManager`` where setting values with different type would break block integrity (:issue:`8850`) +- Bug in ``DatetimeIndex`` when using ``time`` object as key (:issue:`8667`) +- Bug in ``merge`` where ``how='left'`` and ``sort=False`` would not preserve left frame order (:issue:`7331`) +- Bug in ``MultiIndex.reindex`` where reindexing at level would not reorder labels (:issue:`4088`) +- Bug in certain operations with dateutil timezones, manifesting with dateutil 2.3 (:issue:`8639`) +- Regression in DatetimeIndex iteration with a Fixed/Local offset timezone (:issue:`8890`) +- Bug in ``to_datetime`` when parsing a nanoseconds using the ``%f`` format (:issue:`8989`) +- ``io.data.Options`` now raises ``RemoteDataError`` when no expiry dates are available from Yahoo and when it receives no data from Yahoo (:issue:`8761`), (:issue:`8783`). +- Fix: The font size was only set on x axis if vertical or the y axis if horizontal. (:issue:`8765`) +- Fixed division by 0 when reading big csv files in python 3 (:issue:`8621`) +- Bug in outputting a MultiIndex with ``to_html,index=False`` which would add an extra column (:issue:`8452`) +- Imported categorical variables from Stata files retain the ordinal information in the underlying data (:issue:`8836`). +- Defined ``.size`` attribute across ``NDFrame`` objects to provide compat with numpy >= 1.9.1; buggy with ``np.array_split`` (:issue:`8846`) +- Skip testing of histogram plots for matplotlib <= 1.2 (:issue:`8648`). +- Bug where ``get_data_google`` returned object dtypes (:issue:`3995`) +- Bug in ``DataFrame.stack(..., dropna=False)`` when the DataFrame's ``columns`` is a ``MultiIndex`` + whose ``labels`` do not reference all its ``levels``. (:issue:`8844`) +- Bug in that Option context applied on ``__enter__`` (:issue:`8514`) +- Bug in resample that causes a ValueError when resampling across multiple days + and the last offset is not calculated from the start of the range (:issue:`8683`) +- Bug where ``DataFrame.plot(kind='scatter')`` fails when checking if an np.array is in the DataFrame (:issue:`8852`) +- Bug in ``pd.infer_freq/DataFrame.inferred_freq`` that prevented proper sub-daily frequency inference when the index contained DST days (:issue:`8772`). +- Bug where index name was still used when plotting a series with ``use_index=False`` (:issue:`8558`). +- Bugs when trying to stack multiple columns, when some (or all) of the level names are numbers (:issue:`8584`). +- Bug in ``MultiIndex`` where ``__contains__`` returns wrong result if index is not lexically sorted or unique (:issue:`7724`) +- BUG CSV: fix problem with trailing white space in skipped rows, (:issue:`8679`), (:issue:`8661`), (:issue:`8983`) +- Regression in ``Timestamp`` does not parse 'Z' zone designator for UTC (:issue:`8771`) +- Bug in `StataWriter` the produces writes strings with 244 characters irrespective of actual size (:issue:`8969`) +- Fixed ValueError raised by cummin/cummax when datetime64 Series contains NaT. (:issue:`8965`) +- Bug in DataReader returns object dtype if there are missing values (:issue:`8980`) +- Bug in plotting if sharex was enabled and index was a timeseries, would show labels on multiple axes (:issue:`3964`). +- Bug where passing a unit to the TimedeltaIndex constructor applied the to nano-second conversion twice. (:issue:`9011`). +- Bug in plotting of a period-like array (:issue:`9012`) + + +.. _whatsnew_0.15.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.15.1..v0.15.2 diff --git a/doc/source/whatsnew/v0.16.0.rst b/doc/source/whatsnew/v0.16.0.rst new file mode 100644 index 00000000..855d0b86 --- /dev/null +++ b/doc/source/whatsnew/v0.16.0.rst @@ -0,0 +1,690 @@ +.. _whatsnew_0160: + +v0.16.0 (March 22, 2015) +------------------------ + +{{ header }} + + +This is a major release from 0.15.2 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +- ``DataFrame.assign`` method, see :ref:`here ` +- ``Series.to_coo/from_coo`` methods to interact with ``scipy.sparse``, see :ref:`here ` +- Backwards incompatible change to ``Timedelta`` to conform the ``.seconds`` attribute with ``datetime.timedelta``, see :ref:`here ` +- Changes to the ``.loc`` slicing API to conform with the behavior of ``.ix`` see :ref:`here ` +- Changes to the default for ordering in the ``Categorical`` constructor, see :ref:`here ` +- Enhancement to the ``.str`` accessor to make string operations easier, see :ref:`here ` +- The ``pandas.tools.rplot``, ``pandas.sandbox.qtpandas`` and ``pandas.rpy`` + modules are deprecated. We refer users to external packages like + `seaborn `_, + `pandas-qt `_ and + `rpy2 `_ for similar or equivalent + functionality, see :ref:`here ` + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. contents:: What's new in v0.16.0 + :local: + :backlinks: none + + +.. _whatsnew_0160.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0160.enhancements.assign: + +DataFrame assign +^^^^^^^^^^^^^^^^ + +Inspired by `dplyr's +`__ ``mutate`` verb, DataFrame has a new +:meth:`~pandas.DataFrame.assign` method. +The function signature for ``assign`` is simply ``**kwargs``. The keys +are the column names for the new fields, and the values are either a value +to be inserted (for example, a ``Series`` or NumPy array), or a function +of one argument to be called on the ``DataFrame``. The new values are inserted, +and the entire DataFrame (with all original and new columns) is returned. + +.. ipython:: python + + iris = pd.read_csv('data/iris.data') + iris.head() + + iris.assign(sepal_ratio=iris['SepalWidth'] / iris['SepalLength']).head() + +Above was an example of inserting a precomputed value. We can also pass in +a function to be evaluated. + +.. ipython:: python + + iris.assign(sepal_ratio=lambda x: (x['SepalWidth'] + / x['SepalLength'])).head() + +The power of ``assign`` comes when used in chains of operations. For example, +we can limit the DataFrame to just those with a Sepal Length greater than 5, +calculate the ratio, and plot + +.. ipython:: python + + iris = pd.read_csv('data/iris.data') + (iris.query('SepalLength > 5') + .assign(SepalRatio=lambda x: x.SepalWidth / x.SepalLength, + PetalRatio=lambda x: x.PetalWidth / x.PetalLength) + .plot(kind='scatter', x='SepalRatio', y='PetalRatio')) + +.. image:: ../_static/whatsnew_assign.png + :scale: 50 % + +See the :ref:`documentation ` for more. (:issue:`9229`) + + +.. _whatsnew_0160.enhancements.sparse: + +Interaction with scipy.sparse +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:issue:`8048`) for converting to and from ``scipy.sparse.coo_matrix`` instances (see :ref:`here `). For example, given a SparseSeries with MultiIndex we can convert to a `scipy.sparse.coo_matrix` by specifying the row and column labels as index levels: + +.. code-block:: python + + s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) + s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0), + (1, 2, 'a', 1), + (1, 1, 'b', 0), + (1, 1, 'b', 1), + (2, 1, 'b', 0), + (2, 1, 'b', 1)], + names=['A', 'B', 'C', 'D']) + + s + + # SparseSeries + ss = s.to_sparse() + ss + + A, rows, columns = ss.to_coo(row_levels=['A', 'B'], + column_levels=['C', 'D'], + sort_labels=False) + + A + A.todense() + rows + columns + +The from_coo method is a convenience method for creating a ``SparseSeries`` +from a ``scipy.sparse.coo_matrix``: + +.. code-block:: python + + from scipy import sparse + A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), + shape=(3, 4)) + A + A.todense() + + ss = pd.SparseSeries.from_coo(A) + ss + +.. _whatsnew_0160.enhancements.string: + +String methods enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Following new methods are accessible via ``.str`` accessor to apply the function to each values. This is intended to make it more consistent with standard methods on strings. (:issue:`9282`, :issue:`9352`, :issue:`9386`, :issue:`9387`, :issue:`9439`) + + ============= ============= ============= =============== =============== + .. .. Methods .. .. + ============= ============= ============= =============== =============== + ``isalnum()`` ``isalpha()`` ``isdigit()`` ``isdigit()`` ``isspace()`` + ``islower()`` ``isupper()`` ``istitle()`` ``isnumeric()`` ``isdecimal()`` + ``find()`` ``rfind()`` ``ljust()`` ``rjust()`` ``zfill()`` + ============= ============= ============= =============== =============== + + .. ipython:: python + + s = pd.Series(['abcd', '3456', 'EFGH']) + s.str.isalpha() + s.str.find('ab') + +- :meth:`Series.str.pad` and :meth:`Series.str.center` now accept ``fillchar`` option to specify filling character (:issue:`9352`) + + .. ipython:: python + + s = pd.Series(['12', '300', '25']) + s.str.pad(5, fillchar='_') + +- Added :meth:`Series.str.slice_replace`, which previously raised ``NotImplementedError`` (:issue:`8888`) + + .. ipython:: python + + s = pd.Series(['ABCD', 'EFGH', 'IJK']) + s.str.slice_replace(1, 3, 'X') + # replaced with empty char + s.str.slice_replace(0, 1) + +.. _whatsnew_0160.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Reindex now supports ``method='nearest'`` for frames or series with a monotonic increasing or decreasing index (:issue:`9258`): + + .. ipython:: python + + df = pd.DataFrame({'x': range(5)}) + df.reindex([0.2, 1.8, 3.5], method='nearest') + + This method is also exposed by the lower level ``Index.get_indexer`` and ``Index.get_loc`` methods. + +- The ``read_excel()`` function's :ref:`sheetname ` argument now accepts a list and ``None``, to get multiple or all sheets respectively. If more than one sheet is specified, a dictionary is returned. (:issue:`9450`) + + .. code-block:: python + + # Returns the 1st and 4th sheet, as a dictionary of DataFrames. + pd.read_excel('path_to_file.xls', sheetname=['Sheet1', 3]) + + +- Allow Stata files to be read incrementally with an iterator; support for long strings in Stata files. See the docs :ref:`here` (:issue:`9493`:). +- Paths beginning with ~ will now be expanded to begin with the user's home directory (:issue:`9066`) +- Added time interval selection in ``get_data_yahoo`` (:issue:`9071`) +- Added ``Timestamp.to_datetime64()`` to complement ``Timedelta.to_timedelta64()`` (:issue:`9255`) +- ``tseries.frequencies.to_offset()`` now accepts ``Timedelta`` as input (:issue:`9064`) +- Lag parameter was added to the autocorrelation method of ``Series``, defaults to lag-1 autocorrelation (:issue:`9192`) +- ``Timedelta`` will now accept ``nanoseconds`` keyword in constructor (:issue:`9273`) +- SQL code now safely escapes table and column names (:issue:`8986`) +- Added auto-complete for ``Series.str.``, ``Series.dt.`` and ``Series.cat.`` (:issue:`9322`) +- ``Index.get_indexer`` now supports ``method='pad'`` and ``method='backfill'`` even for any target array, not just monotonic targets. These methods also work for monotonic decreasing as well as monotonic increasing indexes (:issue:`9258`). +- ``Index.asof`` now works on all index types (:issue:`9258`). +- A ``verbose`` argument has been augmented in ``io.read_excel()``, defaults to False. Set to True to print sheet names as they are parsed. (:issue:`9450`) +- Added ``days_in_month`` (compatibility alias ``daysinmonth``) property to ``Timestamp``, ``DatetimeIndex``, ``Period``, ``PeriodIndex``, and ``Series.dt`` (:issue:`9572`) +- Added ``decimal`` option in ``to_csv`` to provide formatting for non-'.' decimal separators (:issue:`781`) +- Added ``normalize`` option for ``Timestamp`` to normalized to midnight (:issue:`8794`) +- Added example for ``DataFrame`` import to R using HDF5 file and ``rhdf5`` + library. See the :ref:`documentation ` for more + (:issue:`9636`). + +.. _whatsnew_0160.api: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0160.api_breaking: + +.. _whatsnew_0160.api_breaking.timedelta: + +Changes in Timedelta +^^^^^^^^^^^^^^^^^^^^ + +In v0.15.0 a new scalar type ``Timedelta`` was introduced, that is a +sub-class of ``datetime.timedelta``. Mentioned :ref:`here ` was a notice of an API change w.r.t. the ``.seconds`` accessor. The intent was to provide a user-friendly set of accessors that give the 'natural' value for that unit, e.g. if you had a ``Timedelta('1 day, 10:11:12')``, then ``.seconds`` would return 12. However, this is at odds with the definition of ``datetime.timedelta``, which defines ``.seconds`` as ``10 * 3600 + 11 * 60 + 12 == 36672``. + +So in v0.16.0, we are restoring the API to match that of ``datetime.timedelta``. Further, the component values are still available through the ``.components`` accessor. This affects the ``.seconds`` and ``.microseconds`` accessors, and removes the ``.hours``, ``.minutes``, ``.milliseconds`` accessors. These changes affect ``TimedeltaIndex`` and the Series ``.dt`` accessor as well. (:issue:`9185`, :issue:`9139`) + +Previous behavior + +.. code-block:: ipython + + In [2]: t = pd.Timedelta('1 day, 10:11:12.100123') + + In [3]: t.days + Out[3]: 1 + + In [4]: t.seconds + Out[4]: 12 + + In [5]: t.microseconds + Out[5]: 123 + +New behavior + +.. ipython:: python + + t = pd.Timedelta('1 day, 10:11:12.100123') + t.days + t.seconds + t.microseconds + +Using ``.components`` allows the full component access + +.. ipython:: python + + t.components + t.components.seconds + +.. _whatsnew_0160.api_breaking.indexing: + +Indexing changes +^^^^^^^^^^^^^^^^ + +The behavior of a small sub-set of edge cases for using ``.loc`` have changed (:issue:`8613`). Furthermore we have improved the content of the error messages that are raised: + +- Slicing with ``.loc`` where the start and/or stop bound is not found in the index is now allowed; this previously would raise a ``KeyError``. This makes the behavior the same as ``.ix`` in this case. This change is only for slicing, not when indexing with a single label. + + .. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 4), + columns=list('ABCD'), + index=pd.date_range('20130101', periods=5)) + df + s = pd.Series(range(5), [-2, -1, 1, 2, 3]) + s + + Previous behavior + + .. code-block:: ipython + + In [4]: df.loc['2013-01-02':'2013-01-10'] + KeyError: 'stop bound [2013-01-10] is not in the [index]' + + In [6]: s.loc[-10:3] + KeyError: 'start bound [-10] is not the [index]' + + New behavior + + .. ipython:: python + + df.loc['2013-01-02':'2013-01-10'] + s.loc[-10:3] + +- Allow slicing with float-like values on an integer index for ``.ix``. Previously this was only enabled for ``.loc``: + + Previous behavior + + .. code-block:: ipython + + In [8]: s.ix[-1.0:2] + TypeError: the slice start value [-1.0] is not a proper indexer for this index type (Int64Index) + + New behavior + + .. code-block:: python + + In [2]: s.ix[-1.0:2] + Out[2]: + -1 1 + 1 2 + 2 3 + dtype: int64 + +- Provide a useful exception for indexing with an invalid type for that index when using ``.loc``. For example trying to use ``.loc`` on an index of type ``DatetimeIndex`` or ``PeriodIndex`` or ``TimedeltaIndex``, with an integer (or a float). + + Previous behavior + + .. code-block:: python + + In [4]: df.loc[2:3] + KeyError: 'start bound [2] is not the [index]' + + New behavior + + .. code-block:: ipython + + In [4]: df.loc[2:3] + TypeError: Cannot do slice indexing on with keys + + +.. _whatsnew_0160.api_breaking.categorical: + +Categorical changes +^^^^^^^^^^^^^^^^^^^ + +In prior versions, ``Categoricals`` that had an unspecified ordering (meaning no ``ordered`` keyword was passed) were defaulted as ``ordered`` Categoricals. Going forward, the ``ordered`` keyword in the ``Categorical`` constructor will default to ``False``. Ordering must now be explicit. + +Furthermore, previously you *could* change the ``ordered`` attribute of a Categorical by just setting the attribute, e.g. ``cat.ordered=True``; This is now deprecated and you should use ``cat.as_ordered()`` or ``cat.as_unordered()``. These will by default return a **new** object and not modify the existing object. (:issue:`9347`, :issue:`9190`) + +Previous behavior + +.. code-block:: ipython + + In [3]: s = pd.Series([0, 1, 2], dtype='category') + + In [4]: s + Out[4]: + 0 0 + 1 1 + 2 2 + dtype: category + Categories (3, int64): [0 < 1 < 2] + + In [5]: s.cat.ordered + Out[5]: True + + In [6]: s.cat.ordered = False + + In [7]: s + Out[7]: + 0 0 + 1 1 + 2 2 + dtype: category + Categories (3, int64): [0, 1, 2] + +New behavior + +.. ipython:: python + + s = pd.Series([0, 1, 2], dtype='category') + s + s.cat.ordered + s = s.cat.as_ordered() + s + s.cat.ordered + + # you can set in the constructor of the Categorical + s = pd.Series(pd.Categorical([0, 1, 2], ordered=True)) + s + s.cat.ordered + +For ease of creation of series of categorical data, we have added the ability to pass keywords when calling ``.astype()``. These are passed directly to the constructor. + +.. code-block:: python + + In [54]: s = pd.Series(["a", "b", "c", "a"]).astype('category', ordered=True) + + In [55]: s + Out[55]: + 0 a + 1 b + 2 c + 3 a + dtype: category + Categories (3, object): [a < b < c] + + In [56]: s = (pd.Series(["a", "b", "c", "a"]) + ....: .astype('category', categories=list('abcdef'), ordered=False)) + + In [57]: s + Out[57]: + 0 a + 1 b + 2 c + 3 a + dtype: category + Categories (6, object): [a, b, c, d, e, f] + + +.. _whatsnew_0160.api_breaking.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- ``Index.duplicated`` now returns ``np.array(dtype=bool)`` rather than ``Index(dtype=object)`` containing ``bool`` values. (:issue:`8875`) +- ``DataFrame.to_json`` now returns accurate type serialisation for each column for frames of mixed dtype (:issue:`9037`) + + Previously data was coerced to a common dtype before serialisation, which for + example resulted in integers being serialised to floats: + + .. code-block:: ipython + + In [2]: pd.DataFrame({'i': [1,2], 'f': [3.0, 4.2]}).to_json() + Out[2]: '{"f":{"0":3.0,"1":4.2},"i":{"0":1.0,"1":2.0}}' + + Now each column is serialised using its correct dtype: + + .. code-block:: ipython + + In [2]: pd.DataFrame({'i': [1,2], 'f': [3.0, 4.2]}).to_json() + Out[2]: '{"f":{"0":3.0,"1":4.2},"i":{"0":1,"1":2}}' + +- ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex.summary`` now output the same format. (:issue:`9116`) +- ``TimedeltaIndex.freqstr`` now output the same string format as ``DatetimeIndex``. (:issue:`9116`) + +- Bar and horizontal bar plots no longer add a dashed line along the info axis. The prior style can be achieved with matplotlib's ``axhline`` or ``axvline`` methods (:issue:`9088`). + +- ``Series`` accessors ``.dt``, ``.cat`` and ``.str`` now raise ``AttributeError`` instead of ``TypeError`` if the series does not contain the appropriate type of data (:issue:`9617`). This follows Python's built-in exception hierarchy more closely and ensures that tests like ``hasattr(s, 'cat')`` are consistent on both Python 2 and 3. + +- ``Series`` now supports bitwise operation for integral types (:issue:`9016`). Previously even if the input dtypes were integral, the output dtype was coerced to ``bool``. + + Previous behavior + + .. code-block:: ipython + + In [2]: pd.Series([0, 1, 2, 3], list('abcd')) | pd.Series([4, 4, 4, 4], list('abcd')) + Out[2]: + a True + b True + c True + d True + dtype: bool + + New behavior. If the input dtypes are integral, the output dtype is also integral and the output + values are the result of the bitwise operation. + + .. code-block:: ipython + + In [2]: pd.Series([0, 1, 2, 3], list('abcd')) | pd.Series([4, 4, 4, 4], list('abcd')) + Out[2]: + a 4 + b 5 + c 6 + d 7 + dtype: int64 + + +- During division involving a ``Series`` or ``DataFrame``, ``0/0`` and ``0//0`` now give ``np.nan`` instead of ``np.inf``. (:issue:`9144`, :issue:`8445`) + + Previous behavior + + .. code-block:: ipython + + In [2]: p = pd.Series([0, 1]) + + In [3]: p / 0 + Out[3]: + 0 inf + 1 inf + dtype: float64 + + In [4]: p // 0 + Out[4]: + 0 inf + 1 inf + dtype: float64 + + + + New behavior + + .. ipython:: python + + p = pd.Series([0, 1]) + p / 0 + p // 0 + +- ``Series.values_counts`` and ``Series.describe`` for categorical data will now put ``NaN`` entries at the end. (:issue:`9443`) +- ``Series.describe`` for categorical data will now give counts and frequencies of 0, not ``NaN``, for unused categories (:issue:`9443`) + +- Due to a bug fix, looking up a partial string label with ``DatetimeIndex.asof`` now includes values that match the string, even if they are after the start of the partial string label (:issue:`9258`). + + Old behavior: + + .. code-block:: ipython + + In [4]: pd.to_datetime(['2000-01-31', '2000-02-28']).asof('2000-02') + Out[4]: Timestamp('2000-01-31 00:00:00') + + Fixed behavior: + + .. ipython:: python + + pd.to_datetime(['2000-01-31', '2000-02-28']).asof('2000-02') + + To reproduce the old behavior, simply add more precision to the label (e.g., use ``2000-02-01`` instead of ``2000-02``). + + +.. _whatsnew_0160.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- The ``rplot`` trellis plotting interface is deprecated and will be removed + in a future version. We refer to external packages like + `seaborn `_ for similar + but more refined functionality (:issue:`3445`). + The documentation includes some examples how to convert your existing code + from ``rplot`` to seaborn `here `__. + +- The ``pandas.sandbox.qtpandas`` interface is deprecated and will be removed in a future version. + We refer users to the external package `pandas-qt `_. (:issue:`9615`) + +- The ``pandas.rpy`` interface is deprecated and will be removed in a future version. + Similar functionality can be accessed through the `rpy2 `_ project (:issue:`9602`) + +- Adding ``DatetimeIndex/PeriodIndex`` to another ``DatetimeIndex/PeriodIndex`` is being deprecated as a set-operation. This will be changed to a ``TypeError`` in a future version. ``.union()`` should be used for the union set operation. (:issue:`9094`) +- Subtracting ``DatetimeIndex/PeriodIndex`` from another ``DatetimeIndex/PeriodIndex`` is being deprecated as a set-operation. This will be changed to an actual numeric subtraction yielding a ``TimeDeltaIndex`` in a future version. ``.difference()`` should be used for the differencing set operation. (:issue:`9094`) + + +.. _whatsnew_0160.prior_deprecations: + +Removal of prior version deprecations/changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- ``DataFrame.pivot_table`` and ``crosstab``'s ``rows`` and ``cols`` keyword arguments were removed in favor + of ``index`` and ``columns`` (:issue:`6581`) +- ``DataFrame.to_excel`` and ``DataFrame.to_csv`` ``cols`` keyword argument was removed in favor of ``columns`` (:issue:`6581`) +- Removed ``convert_dummies`` in favor of ``get_dummies`` (:issue:`6581`) +- Removed ``value_range`` in favor of ``describe`` (:issue:`6581`) + +.. _whatsnew_0160.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Fixed a performance regression for ``.loc`` indexing with an array or list-like (:issue:`9126`:). +- ``DataFrame.to_json`` 30x performance improvement for mixed dtype frames. (:issue:`9037`) +- Performance improvements in ``MultiIndex.duplicated`` by working with labels instead of values (:issue:`9125`) +- Improved the speed of ``nunique`` by calling ``unique`` instead of ``value_counts`` (:issue:`9129`, :issue:`7771`) +- Performance improvement of up to 10x in ``DataFrame.count`` and ``DataFrame.dropna`` by taking advantage of homogeneous/heterogeneous dtypes appropriately (:issue:`9136`) +- Performance improvement of up to 20x in ``DataFrame.count`` when using a ``MultiIndex`` and the ``level`` keyword argument (:issue:`9163`) +- Performance and memory usage improvements in ``merge`` when key space exceeds ``int64`` bounds (:issue:`9151`) +- Performance improvements in multi-key ``groupby`` (:issue:`9429`) +- Performance improvements in ``MultiIndex.sortlevel`` (:issue:`9445`) +- Performance and memory usage improvements in ``DataFrame.duplicated`` (:issue:`9398`) +- Cythonized ``Period`` (:issue:`9440`) +- Decreased memory usage on ``to_hdf`` (:issue:`9648`) + +.. _whatsnew_0160.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Changed ``.to_html`` to remove leading/trailing spaces in table body (:issue:`4987`) +- Fixed issue using ``read_csv`` on s3 with Python 3 (:issue:`9452`) +- Fixed compatibility issue in ``DatetimeIndex`` affecting architectures where ``numpy.int_`` defaults to ``numpy.int32`` (:issue:`8943`) +- Bug in Panel indexing with an object-like (:issue:`9140`) +- Bug in the returned ``Series.dt.components`` index was reset to the default index (:issue:`9247`) +- Bug in ``Categorical.__getitem__/__setitem__`` with listlike input getting incorrect results from indexer coercion (:issue:`9469`) +- Bug in partial setting with a DatetimeIndex (:issue:`9478`) +- Bug in groupby for integer and datetime64 columns when applying an aggregator that caused the value to be + changed when the number was sufficiently large (:issue:`9311`, :issue:`6620`) +- Fixed bug in ``to_sql`` when mapping a ``Timestamp`` object column (datetime + column with timezone info) to the appropriate sqlalchemy type (:issue:`9085`). +- Fixed bug in ``to_sql`` ``dtype`` argument not accepting an instantiated + SQLAlchemy type (:issue:`9083`). +- Bug in ``.loc`` partial setting with a ``np.datetime64`` (:issue:`9516`) +- Incorrect dtypes inferred on datetimelike looking ``Series`` & on ``.xs`` slices (:issue:`9477`) +- Items in ``Categorical.unique()`` (and ``s.unique()`` if ``s`` is of dtype ``category``) now appear in the order in which they are originally found, not in sorted order (:issue:`9331`). This is now consistent with the behavior for other dtypes in pandas. +- Fixed bug on big endian platforms which produced incorrect results in ``StataReader`` (:issue:`8688`). +- Bug in ``MultiIndex.has_duplicates`` when having many levels causes an indexer overflow (:issue:`9075`, :issue:`5873`) +- Bug in ``pivot`` and ``unstack`` where ``nan`` values would break index alignment (:issue:`4862`, :issue:`7401`, :issue:`7403`, :issue:`7405`, :issue:`7466`, :issue:`9497`) +- Bug in left ``join`` on MultiIndex with ``sort=True`` or null values (:issue:`9210`). +- Bug in ``MultiIndex`` where inserting new keys would fail (:issue:`9250`). +- Bug in ``groupby`` when key space exceeds ``int64`` bounds (:issue:`9096`). +- Bug in ``unstack`` with ``TimedeltaIndex`` or ``DatetimeIndex`` and nulls (:issue:`9491`). +- Bug in ``rank`` where comparing floats with tolerance will cause inconsistent behaviour (:issue:`8365`). +- Fixed character encoding bug in ``read_stata`` and ``StataReader`` when loading data from a URL (:issue:`9231`). +- Bug in adding ``offsets.Nano`` to other offsets raises ``TypeError`` (:issue:`9284`) +- Bug in ``DatetimeIndex`` iteration, related to (:issue:`8890`), fixed in (:issue:`9100`) +- Bugs in ``resample`` around DST transitions. This required fixing offset classes so they behave correctly on DST transitions. (:issue:`5172`, :issue:`8744`, :issue:`8653`, :issue:`9173`, :issue:`9468`). +- Bug in binary operator method (eg ``.mul()``) alignment with integer levels (:issue:`9463`). +- Bug in boxplot, scatter and hexbin plot may show an unnecessary warning (:issue:`8877`) +- Bug in subplot with ``layout`` kw may show unnecessary warning (:issue:`9464`) +- Bug in using grouper functions that need passed through arguments (e.g. axis), when using wrapped function (e.g. ``fillna``), (:issue:`9221`) +- ``DataFrame`` now properly supports simultaneous ``copy`` and ``dtype`` arguments in constructor (:issue:`9099`) +- Bug in ``read_csv`` when using skiprows on a file with CR line endings with the c engine. (:issue:`9079`) +- ``isnull`` now detects ``NaT`` in ``PeriodIndex`` (:issue:`9129`) +- Bug in groupby ``.nth()`` with a multiple column groupby (:issue:`8979`) +- Bug in ``DataFrame.where`` and ``Series.where`` coerce numerics to string incorrectly (:issue:`9280`) +- Bug in ``DataFrame.where`` and ``Series.where`` raise ``ValueError`` when string list-like is passed. (:issue:`9280`) +- Accessing ``Series.str`` methods on with non-string values now raises ``TypeError`` instead of producing incorrect results (:issue:`9184`) +- Bug in ``DatetimeIndex.__contains__`` when index has duplicates and is not monotonic increasing (:issue:`9512`) +- Fixed division by zero error for ``Series.kurt()`` when all values are equal (:issue:`9197`) +- Fixed issue in the ``xlsxwriter`` engine where it added a default 'General' format to cells if no other format was applied. This prevented other row or column formatting being applied. (:issue:`9167`) +- Fixes issue with ``index_col=False`` when ``usecols`` is also specified in ``read_csv``. (:issue:`9082`) +- Bug where ``wide_to_long`` would modify the input stub names list (:issue:`9204`) +- Bug in ``to_sql`` not storing float64 values using double precision. (:issue:`9009`) +- ``SparseSeries`` and ``SparsePanel`` now accept zero argument constructors (same as their non-sparse counterparts) (:issue:`9272`). +- Regression in merging ``Categorical`` and ``object`` dtypes (:issue:`9426`) +- Bug in ``read_csv`` with buffer overflows with certain malformed input files (:issue:`9205`) +- Bug in groupby MultiIndex with missing pair (:issue:`9049`, :issue:`9344`) +- Fixed bug in ``Series.groupby`` where grouping on ``MultiIndex`` levels would ignore the sort argument (:issue:`9444`) +- Fix bug in ``DataFrame.Groupby`` where ``sort=False`` is ignored in the case of Categorical columns. (:issue:`8868`) +- Fixed bug with reading CSV files from Amazon S3 on python 3 raising a TypeError (:issue:`9452`) +- Bug in the Google BigQuery reader where the 'jobComplete' key may be present but False in the query results (:issue:`8728`) +- Bug in ``Series.values_counts`` with excluding ``NaN`` for categorical type ``Series`` with ``dropna=True`` (:issue:`9443`) +- Fixed missing numeric_only option for ``DataFrame.std/var/sem`` (:issue:`9201`) +- Support constructing ``Panel`` or ``Panel4D`` with scalar data (:issue:`8285`) +- ``Series`` text representation disconnected from `max_rows`/`max_columns` (:issue:`7508`). + +\ + +- ``Series`` number formatting inconsistent when truncated (:issue:`8532`). + + Previous behavior + + .. code-block:: python + + In [2]: pd.options.display.max_rows = 10 + In [3]: s = pd.Series([1,1,1,1,1,1,1,1,1,1,0.9999,1,1]*10) + In [4]: s + Out[4]: + 0 1 + 1 1 + 2 1 + ... + 127 0.9999 + 128 1.0000 + 129 1.0000 + Length: 130, dtype: float64 + + New behavior + + .. code-block:: python + + 0 1.0000 + 1 1.0000 + 2 1.0000 + 3 1.0000 + 4 1.0000 + ... + 125 1.0000 + 126 1.0000 + 127 0.9999 + 128 1.0000 + 129 1.0000 + dtype: float64 + +- A Spurious ``SettingWithCopy`` Warning was generated when setting a new item in a frame in some cases (:issue:`8730`) + + The following would previously report a ``SettingWithCopy`` Warning. + + .. ipython:: python + + df1 = pd.DataFrame({'x': pd.Series(['a', 'b', 'c']), + 'y': pd.Series(['d', 'e', 'f'])}) + df2 = df1[['x']] + df2['y'] = ['g', 'h', 'i'] + + +.. _whatsnew_0.16.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.15.2..v0.16.0 diff --git a/doc/source/whatsnew/v0.16.1.rst b/doc/source/whatsnew/v0.16.1.rst new file mode 100644 index 00000000..502c1287 --- /dev/null +++ b/doc/source/whatsnew/v0.16.1.rst @@ -0,0 +1,484 @@ +.. _whatsnew_0161: + +v0.16.1 (May 11, 2015) +---------------------- + +{{ header }} + + +This is a minor bug-fix release from 0.16.0 and includes a a large number of +bug fixes along several new features, enhancements, and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + +- Support for a ``CategoricalIndex``, a category based index, see :ref:`here ` +- New section on how-to-contribute to *pandas*, see :ref:`here ` +- Revised "Merge, join, and concatenate" documentation, including graphical examples to make it easier to understand each operations, see :ref:`here ` +- New method ``sample`` for drawing random samples from Series, DataFrames and Panels. See :ref:`here ` +- The default ``Index`` printing has changed to a more uniform format, see :ref:`here ` +- ``BusinessHour`` datetime-offset is now supported, see :ref:`here ` + +- Further enhancement to the ``.str`` accessor to make string operations easier, see :ref:`here ` + +.. contents:: What's new in v0.16.1 + :local: + :backlinks: none + +.. _whatsnew_0161.enhancements: + +.. warning:: + + In pandas 0.17.0, the sub-package ``pandas.io.data`` will be removed in favor of a separately installable package (:issue:`8961`). + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_0161.enhancements.categoricalindex: + +CategoricalIndex +^^^^^^^^^^^^^^^^ + +We introduce a ``CategoricalIndex``, a new type of index object that is useful for supporting +indexing with duplicates. This is a container around a ``Categorical`` (introduced in v0.15.0) +and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1, +setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``. + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({'A': np.arange(6), + ...: 'B': pd.Series(list('aabbca')) + ...: .astype('category', categories=list('cab')) + ...: }) + ...: + + In [2]: df + Out[2]: + A B + 0 0 a + 1 1 a + 2 2 b + 3 3 b + 4 4 c + 5 5 a + + In [3]: df.dtypes + Out[3]: + A int64 + B category + dtype: object + + In [4]: df.B.cat.categories + Out[4]: Index(['c', 'a', 'b'], dtype='object') + + +setting the index, will create create a ``CategoricalIndex`` + +.. code-block:: ipython + + In [5]: df2 = df.set_index('B') + + In [6]: df2.index + Out[6]: CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category') + +indexing with ``__getitem__/.iloc/.loc/.ix`` works similarly to an Index with duplicates. +The indexers MUST be in the category or the operation will raise. + +.. code-block:: ipython + + In [7]: df2.loc['a'] + Out[7]: + A + B + a 0 + a 1 + a 5 + +and preserves the ``CategoricalIndex`` + +.. code-block:: ipython + + In [8]: df2.loc['a'].index + Out[8]: CategoricalIndex(['a', 'a', 'a'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category') + + +sorting will order by the order of the categories + +.. code-block:: ipython + + In [9]: df2.sort_index() + Out[9]: + A + B + c 4 + a 0 + a 1 + a 5 + b 2 + b 3 + +groupby operations on the index will preserve the index nature as well + +.. code-block:: ipython + + In [10]: df2.groupby(level=0).sum() + Out[10]: + A + B + c 4 + a 6 + b 5 + + In [11]: df2.groupby(level=0).sum().index + Out[11]: CategoricalIndex(['c', 'a', 'b'], categories=['c', 'a', 'b'], ordered=False, name='B', dtype='category') + + +reindexing operations, will return a resulting index based on the type of the passed +indexer, meaning that passing a list will return a plain-old-``Index``; indexing with +a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories +of the PASSED ``Categorical`` dtype. This allows one to arbitrarily index these even with +values NOT in the categories, similarly to how you can reindex ANY pandas index. + +.. code-block:: ipython + + In [12]: df2.reindex(['a', 'e']) + Out[12]: + A + B + a 0.0 + a 1.0 + a 5.0 + e NaN + + In [13]: df2.reindex(['a', 'e']).index + Out[13]: pd.Index(['a', 'a', 'a', 'e'], dtype='object', name='B') + + In [14]: df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde'))) + Out[14]: + A + B + a 0.0 + a 1.0 + a 5.0 + e NaN + + In [15]: df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde'))).index + Out[15]: pd.CategoricalIndex(['a', 'a', 'a', 'e'], + categories=['a', 'b', 'c', 'd', 'e'], + ordered=False, name='B', + dtype='category') + +See the :ref:`documentation ` for more. (:issue:`7629`, :issue:`10038`, :issue:`10039`) + +.. _whatsnew_0161.enhancements.sample: + +Sample +^^^^^^ + +Series, DataFrames, and Panels now have a new method: :meth:`~pandas.DataFrame.sample`. +The method accepts a specific number of rows or columns to return, or a fraction of the +total number or rows or columns. It also has options for sampling with or without replacement, +for passing in a column for weights for non-uniform sampling, and for setting seed values to +facilitate replication. (:issue:`2419`) + +.. ipython:: python + + example_series = pd.Series([0, 1, 2, 3, 4, 5]) + + # When no arguments are passed, returns 1 + example_series.sample() + + # One may specify either a number of rows: + example_series.sample(n=3) + + # Or a fraction of the rows: + example_series.sample(frac=0.5) + + # weights are accepted. + example_weights = [0, 0, 0.2, 0.2, 0.2, 0.4] + example_series.sample(n=3, weights=example_weights) + + # weights will also be normalized if they do not sum to one, + # and missing values will be treated as zeros. + example_weights2 = [0.5, 0, 0, 0, None, np.nan] + example_series.sample(n=1, weights=example_weights2) + + +When applied to a DataFrame, one may pass the name of a column to specify sampling weights +when sampling from rows. + +.. ipython:: python + + df = pd.DataFrame({'col1': [9, 8, 7, 6], + 'weight_column': [0.5, 0.4, 0.1, 0]}) + df.sample(n=3, weights='weight_column') + + +.. _whatsnew_0161.enhancements.string: + +String methods enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:ref:`Continuing from v0.16.0 `, the following +enhancements make string operations easier and more consistent with standard python string operations. + + +- Added ``StringMethods`` (``.str`` accessor) to ``Index`` (:issue:`9068`) + + The ``.str`` accessor is now available for both ``Series`` and ``Index``. + + .. ipython:: python + + idx = pd.Index([' jack', 'jill ', ' jesse ', 'frank']) + idx.str.strip() + + One special case for the `.str` accessor on ``Index`` is that if a string method returns ``bool``, the ``.str`` accessor + will return a ``np.array`` instead of a boolean ``Index`` (:issue:`8875`). This enables the following expression + to work naturally: + + .. ipython:: python + + idx = pd.Index(['a1', 'a2', 'b1', 'b2']) + s = pd.Series(range(4), index=idx) + s + idx.str.startswith('a') + s[s.index.str.startswith('a')] + +- The following new methods are accessible via ``.str`` accessor to apply the function to each values. (:issue:`9766`, :issue:`9773`, :issue:`10031`, :issue:`10045`, :issue:`10052`) + + ================ =============== =============== =============== ================ + .. .. Methods .. .. + ================ =============== =============== =============== ================ + ``capitalize()`` ``swapcase()`` ``normalize()`` ``partition()`` ``rpartition()`` + ``index()`` ``rindex()`` ``translate()`` + ================ =============== =============== =============== ================ + +- ``split`` now takes ``expand`` keyword to specify whether to expand dimensionality. ``return_type`` is deprecated. (:issue:`9847`) + + .. ipython:: python + + s = pd.Series(['a,b', 'a,c', 'b,c']) + + # return Series + s.str.split(',') + + # return DataFrame + s.str.split(',', expand=True) + + idx = pd.Index(['a,b', 'a,c', 'b,c']) + + # return Index + idx.str.split(',') + + # return MultiIndex + idx.str.split(',', expand=True) + + +- Improved ``extract`` and ``get_dummies`` methods for ``Index.str`` (:issue:`9980`) + + +.. _whatsnew_0161.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- ``BusinessHour`` offset is now supported, which represents business hours starting from 09:00 - 17:00 on ``BusinessDay`` by default. See :ref:`Here ` for details. (:issue:`7905`) + + .. ipython:: python + + pd.Timestamp('2014-08-01 09:00') + pd.tseries.offsets.BusinessHour() + pd.Timestamp('2014-08-01 07:00') + pd.tseries.offsets.BusinessHour() + pd.Timestamp('2014-08-01 16:30') + pd.tseries.offsets.BusinessHour() + +- ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`) + +- Allow ``clip``, ``clip_lower``, and ``clip_upper`` to accept array-like arguments as thresholds (This is a regression from 0.11.0). These methods now have an ``axis`` parameter which determines how the Series or DataFrame will be aligned with the threshold(s). (:issue:`6966`) + +- ``DataFrame.mask()`` and ``Series.mask()`` now support same keywords as ``where`` (:issue:`8801`) + +- ``drop`` function can now accept ``errors`` keyword to suppress ``ValueError`` raised when any of label does not exist in the target data. (:issue:`6736`) + + .. ipython:: python + + df = pd.DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C']) + df.drop(['A', 'X'], axis=1, errors='ignore') + +- Add support for separating years and quarters using dashes, for + example 2014-Q1. (:issue:`9688`) + +- Allow conversion of values with dtype ``datetime64`` or ``timedelta64`` to strings using ``astype(str)`` (:issue:`9757`) +- ``get_dummies`` function now accepts ``sparse`` keyword. If set to ``True``, the return ``DataFrame`` is sparse, e.g. ``SparseDataFrame``. (:issue:`8823`) +- ``Period`` now accepts ``datetime64`` as value input. (:issue:`9054`) + +- Allow timedelta string conversion when leading zero is missing from time definition, ie `0:00:00` vs `00:00:00`. (:issue:`9570`) +- Allow ``Panel.shift`` with ``axis='items'`` (:issue:`9890`) + +- Trying to write an excel file now raises ``NotImplementedError`` if the ``DataFrame`` has a ``MultiIndex`` instead of writing a broken Excel file. (:issue:`9794`) +- Allow ``Categorical.add_categories`` to accept ``Series`` or ``np.array``. (:issue:`9927`) + +- Add/delete ``str/dt/cat`` accessors dynamically from ``__dir__``. (:issue:`9910`) +- Add ``normalize`` as a ``dt`` accessor method. (:issue:`10047`) + +- ``DataFrame`` and ``Series`` now have ``_constructor_expanddim`` property as overridable constructor for one higher dimensionality data. This should be used only when it is really needed, see :ref:`here ` + +- ``pd.lib.infer_dtype`` now returns ``'bytes'`` in Python 3 where appropriate. (:issue:`10032`) + + +.. _whatsnew_0161.api: + +API changes +~~~~~~~~~~~ + +- When passing in an ax to ``df.plot( ..., ax=ax)``, the `sharex` kwarg will now default to `False`. + The result is that the visibility of xlabels and xticklabels will not anymore be changed. You + have to do that by yourself for the right axes in your figure or set ``sharex=True`` explicitly + (but this changes the visible for all axes in the figure, not only the one which is passed in!). + If pandas creates the subplots itself (e.g. no passed in `ax` kwarg), then the + default is still ``sharex=True`` and the visibility changes are applied. + +- :meth:`~pandas.DataFrame.assign` now inserts new columns in alphabetical order. Previously + the order was arbitrary. (:issue:`9777`) + +- By default, ``read_csv`` and ``read_table`` will now try to infer the compression type based on the file extension. Set ``compression=None`` to restore the previous behavior (no decompression). (:issue:`9770`) + +.. _whatsnew_0161.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- ``Series.str.split``'s ``return_type`` keyword was removed in favor of ``expand`` (:issue:`9847`) + + +.. _whatsnew_0161.index_repr: + +Index representation +~~~~~~~~~~~~~~~~~~~~ + +The string representation of ``Index`` and its sub-classes have now been unified. These will show a single-line display if there are few values; a wrapped multi-line display for a lot of values (but less than ``display.max_seq_items``; if lots of items (> ``display.max_seq_items``) will show a truncated display (the head and tail of the data). The formatting for ``MultiIndex`` is unchanged (a multi-line wrapped display). The display width responds to the option ``display.max_seq_items``, which is defaulted to 100. (:issue:`6482`) + +Previous behavior + +.. code-block:: ipython + + In [2]: pd.Index(range(4), name='foo') + Out[2]: Int64Index([0, 1, 2, 3], dtype='int64') + + In [3]: pd.Index(range(104), name='foo') + Out[3]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...], dtype='int64') + + In [4]: pd.date_range('20130101', periods=4, name='foo', tz='US/Eastern') + Out[4]: + + [2013-01-01 00:00:00-05:00, ..., 2013-01-04 00:00:00-05:00] + Length: 4, Freq: D, Timezone: US/Eastern + + In [5]: pd.date_range('20130101', periods=104, name='foo', tz='US/Eastern') + Out[5]: + + [2013-01-01 00:00:00-05:00, ..., 2013-04-14 00:00:00-04:00] + Length: 104, Freq: D, Timezone: US/Eastern + +New behavior + +.. ipython:: python + + pd.set_option('display.width', 80) + pd.Index(range(4), name='foo') + pd.Index(range(30), name='foo') + pd.Index(range(104), name='foo') + pd.CategoricalIndex(['a', 'bb', 'ccc', 'dddd'], + ordered=True, name='foobar') + pd.CategoricalIndex(['a', 'bb', 'ccc', 'dddd'] * 10, + ordered=True, name='foobar') + pd.CategoricalIndex(['a', 'bb', 'ccc', 'dddd'] * 100, + ordered=True, name='foobar') + pd.date_range('20130101', periods=4, name='foo', tz='US/Eastern') + pd.date_range('20130101', periods=25, freq='D') + pd.date_range('20130101', periods=104, name='foo', tz='US/Eastern') + + +.. _whatsnew_0161.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved csv write performance with mixed dtypes, including datetimes by up to 5x (:issue:`9940`) +- Improved csv write performance generally by 2x (:issue:`9940`) +- Improved the performance of ``pd.lib.max_len_string_array`` by 5-7x (:issue:`10024`) + + +.. _whatsnew_0161.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug where labels did not appear properly in the legend of ``DataFrame.plot()``, passing ``label=`` arguments works, and Series indices are no longer mutated. (:issue:`9542`) +- Bug in json serialization causing a segfault when a frame had zero length. (:issue:`9805`) +- Bug in ``read_csv`` where missing trailing delimiters would cause segfault. (:issue:`5664`) +- Bug in retaining index name on appending (:issue:`9862`) +- Bug in ``scatter_matrix`` draws unexpected axis ticklabels (:issue:`5662`) +- Fixed bug in ``StataWriter`` resulting in changes to input ``DataFrame`` upon save (:issue:`9795`). +- Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`) +- Bug in ``equals`` causing false negatives when block order differed (:issue:`9330`) +- Bug in grouping with multiple ``pd.Grouper`` where one is non-time based (:issue:`10063`) +- Bug in ``read_sql_table`` error when reading postgres table with timezone (:issue:`7139`) +- Bug in ``DataFrame`` slicing may not retain metadata (:issue:`9776`) +- Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`) +- Bug with ``TimedeltaIndex`` constructor ignoring ``name`` when given another ``TimedeltaIndex`` as data (:issue:`10025`). +- Bug in ``DataFrameFormatter._get_formatted_index`` with not applying ``max_colwidth`` to the ``DataFrame`` index (:issue:`7856`) +- Bug in ``.loc`` with a read-only ndarray data source (:issue:`10043`) +- Bug in ``groupby.apply()`` that would raise if a passed user defined function either returned only ``None`` (for all input). (:issue:`9685`) +- Always use temporary files in pytables tests (:issue:`9992`) +- Bug in plotting continuously using ``secondary_y`` may not show legend properly. (:issue:`9610`, :issue:`9779`) +- Bug in ``DataFrame.plot(kind="hist")`` results in ``TypeError`` when ``DataFrame`` contains non-numeric columns (:issue:`9853`) +- Bug where repeated plotting of ``DataFrame`` with a ``DatetimeIndex`` may raise ``TypeError`` (:issue:`9852`) +- Bug in ``setup.py`` that would allow an incompat cython version to build (:issue:`9827`) +- Bug in plotting ``secondary_y`` incorrectly attaches ``right_ax`` property to secondary axes specifying itself recursively. (:issue:`9861`) +- Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`) +- Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`) +- Bug in ``FloatArrayFormatter`` where decision boundary for displaying "small" floats in decimal format is off by one order of magnitude for a given display.precision (:issue:`9764`) +- Fixed bug where ``DataFrame.plot()`` raised an error when both ``color`` and ``style`` keywords were passed and there was no color symbol in the style strings (:issue:`9671`) +- Not showing a ``DeprecationWarning`` on combining list-likes with an ``Index`` (:issue:`10083`) +- Bug in ``read_csv`` and ``read_table`` when using ``skip_rows`` parameter if blank lines are present. (:issue:`9832`) +- Bug in ``read_csv()`` interprets ``index_col=True`` as ``1`` (:issue:`9798`) +- Bug in index equality comparisons using ``==`` failing on Index/MultiIndex type incompatibility (:issue:`9785`) +- Bug in which ``SparseDataFrame`` could not take `nan` as a column name (:issue:`8822`) +- Bug in ``to_msgpack`` and ``read_msgpack`` zlib and blosc compression support (:issue:`9783`) +- Bug ``GroupBy.size`` doesn't attach index name properly if grouped by ``TimeGrouper`` (:issue:`9925`) +- Bug causing an exception in slice assignments because ``length_of_indexer`` returns wrong results (:issue:`9995`) +- Bug in csv parser causing lines with initial white space plus one non-space character to be skipped. (:issue:`9710`) +- Bug in C csv parser causing spurious NaNs when data started with newline followed by white space. (:issue:`10022`) +- Bug causing elements with a null group to spill into the final group when grouping by a ``Categorical`` (:issue:`9603`) +- Bug where .iloc and .loc behavior is not consistent on empty dataframes (:issue:`9964`) +- Bug in invalid attribute access on a ``TimedeltaIndex`` incorrectly raised ``ValueError`` instead of ``AttributeError`` (:issue:`9680`) +- Bug in unequal comparisons between categorical data and a scalar, which was not in the categories (e.g. ``Series(Categorical(list("abc"), ordered=True)) > "d"``. This returned ``False`` for all elements, but now raises a ``TypeError``. Equality comparisons also now return ``False`` for ``==`` and ``True`` for ``!=``. (:issue:`9848`) +- Bug in DataFrame ``__setitem__`` when right hand side is a dictionary (:issue:`9874`) +- Bug in ``where`` when dtype is ``datetime64/timedelta64``, but dtype of other is not (:issue:`9804`) +- Bug in ``MultiIndex.sortlevel()`` results in unicode level name breaks (:issue:`9856`) +- Bug in which ``groupby.transform`` incorrectly enforced output dtypes to match input dtypes. (:issue:`9807`) +- Bug in ``DataFrame`` constructor when ``columns`` parameter is set, and ``data`` is an empty list (:issue:`9939`) +- Bug in bar plot with ``log=True`` raises ``TypeError`` if all values are less than 1 (:issue:`9905`) +- Bug in horizontal bar plot ignores ``log=True`` (:issue:`9905`) +- Bug in PyTables queries that did not return proper results using the index (:issue:`8265`, :issue:`9676`) +- Bug where dividing a dataframe containing values of type ``Decimal`` by another ``Decimal`` would raise. (:issue:`9787`) +- Bug where using DataFrames asfreq would remove the name of the index. (:issue:`9885`) +- Bug causing extra index point when resample BM/BQ (:issue:`9756`) +- Changed caching in ``AbstractHolidayCalendar`` to be at the instance level rather than at the class level as the latter can result in unexpected behaviour. (:issue:`9552`) +- Fixed latex output for MultiIndexed dataframes (:issue:`9778`) +- Bug causing an exception when setting an empty range using ``DataFrame.loc`` (:issue:`9596`) +- Bug in hiding ticklabels with subplots and shared axes when adding a new plot to an existing grid of axes (:issue:`9158`) +- Bug in ``transform`` and ``filter`` when grouping on a categorical variable (:issue:`9921`) +- Bug in ``transform`` when groups are equal in number and dtype to the input index (:issue:`9700`) +- Google BigQuery connector now imports dependencies on a per-method basis.(:issue:`9713`) +- Updated BigQuery connector to no longer use deprecated ``oauth2client.tools.run()`` (:issue:`8327`) +- Bug in subclassed ``DataFrame``. It may not return the correct class, when slicing or subsetting it. (:issue:`9632`) +- Bug in ``.median()`` where non-float null values are not handled correctly (:issue:`10040`) +- Bug in Series.fillna() where it raises if a numerically convertible string is given (:issue:`10092`) + + +.. _whatsnew_0.16.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.16.0..v0.16.1 diff --git a/doc/source/whatsnew/v0.16.2.rst b/doc/source/whatsnew/v0.16.2.rst new file mode 100644 index 00000000..543f9c6b --- /dev/null +++ b/doc/source/whatsnew/v0.16.2.rst @@ -0,0 +1,178 @@ +.. _whatsnew_0162: + +v0.16.2 (June 12, 2015) +----------------------- + +{{ header }} + + +This is a minor bug-fix release from 0.16.1 and includes a a large number of +bug fixes along some new features (:meth:`~DataFrame.pipe` method), enhancements, and performance improvements. + +We recommend that all users upgrade to this version. + +Highlights include: + +- A new ``pipe`` method, see :ref:`here ` +- Documentation on how to use numba_ with *pandas*, see :ref:`here ` + + +.. contents:: What's new in v0.16.2 + :local: + :backlinks: none + +.. _numba: http://numba.pydata.org + +.. _whatsnew_0162.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0162.enhancements.pipe: + +Pipe +^^^^ + +We've introduced a new method :meth:`DataFrame.pipe`. As suggested by the name, ``pipe`` +should be used to pipe data through a chain of function calls. +The goal is to avoid confusing nested function calls like + +.. code-block:: python + + # df is a DataFrame + # f, g, and h are functions that take and return DataFrames + f(g(h(df), arg1=1), arg2=2, arg3=3) # noqa F821 + +The logic flows from inside out, and function names are separated from their keyword arguments. +This can be rewritten as + +.. code-block:: python + + (df.pipe(h) # noqa F821 + .pipe(g, arg1=1) # noqa F821 + .pipe(f, arg2=2, arg3=3) # noqa F821 + ) + +Now both the code and the logic flow from top to bottom. Keyword arguments are next to +their functions. Overall the code is much more readable. + +In the example above, the functions ``f``, ``g``, and ``h`` each expected the DataFrame as the first positional argument. +When the function you wish to apply takes its data anywhere other than the first argument, pass a tuple +of ``(function, keyword)`` indicating where the DataFrame should flow. For example: + +.. ipython:: python + + import statsmodels.formula.api as sm + + bb = pd.read_csv('data/baseball.csv', index_col='id') + + # sm.ols takes (formula, data) + (bb.query('h > 0') + .assign(ln_h=lambda df: np.log(df.h)) + .pipe((sm.ols, 'data'), 'hr ~ ln_h + year + g + C(lg)') + .fit() + .summary() + ) + +The pipe method is inspired by unix pipes, which stream text through +processes. More recently dplyr_ and magrittr_ have introduced the +popular ``(%>%)`` pipe operator for R_. + +See the :ref:`documentation ` for more. (:issue:`10129`) + +.. _dplyr: https://github.com/hadley/dplyr +.. _magrittr: https://github.com/smbache/magrittr +.. _R: http://www.r-project.org + +.. _whatsnew_0162.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Added `rsplit` to Index/Series StringMethods (:issue:`10303`) + +- Removed the hard-coded size limits on the ``DataFrame`` HTML representation + in the IPython notebook, and leave this to IPython itself (only for IPython + v3.0 or greater). This eliminates the duplicate scroll bars that appeared in + the notebook with large frames (:issue:`10231`). + + Note that the notebook has a ``toggle output scrolling`` feature to limit the + display of very large frames (by clicking left of the output). + You can also configure the way DataFrames are displayed using the pandas + options, see here :ref:`here `. + +- ``axis`` parameter of ``DataFrame.quantile`` now accepts also ``index`` and ``column``. (:issue:`9543`) + +.. _whatsnew_0162.api: + +API changes +~~~~~~~~~~~ + +- ``Holiday`` now raises ``NotImplementedError`` if both ``offset`` and ``observance`` are used in the constructor instead of returning an incorrect result (:issue:`10217`). + + +.. _whatsnew_0162.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved ``Series.resample`` performance with ``dtype=datetime64[ns]`` (:issue:`7754`) +- Increase performance of ``str.split`` when ``expand=True`` (:issue:`10081`) + +.. _whatsnew_0162.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in ``Series.hist`` raises an error when a one row ``Series`` was given (:issue:`10214`) +- Bug where ``HDFStore.select`` modifies the passed columns list (:issue:`7212`) +- Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`) +- Bug in ``to_json`` with certain orients and a ``CategoricalIndex`` would segfault (:issue:`10317`) +- Bug where some of the nan functions do not have consistent return dtypes (:issue:`10251`) +- Bug in ``DataFrame.quantile`` on checking that a valid axis was passed (:issue:`9543`) +- Bug in ``groupby.apply`` aggregation for ``Categorical`` not preserving categories (:issue:`10138`) +- Bug in ``to_csv`` where ``date_format`` is ignored if the ``datetime`` is fractional (:issue:`10209`) +- Bug in ``DataFrame.to_json`` with mixed data types (:issue:`10289`) +- Bug in cache updating when consolidating (:issue:`10264`) +- Bug in ``mean()`` where integer dtypes can overflow (:issue:`10172`) +- Bug where ``Panel.from_dict`` does not set dtype when specified (:issue:`10058`) +- Bug in ``Index.union`` raises ``AttributeError`` when passing array-likes. (:issue:`10149`) +- Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`) +- Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`) +- Bug in Index repr when using the ``max_seq_items=None`` setting (:issue:`10182`). +- Bug in getting timezone data with ``dateutil`` on various platforms ( :issue:`9059`, :issue:`8639`, :issue:`9663`, :issue:`10121`) +- Bug in displaying datetimes with mixed frequencies; display 'ms' datetimes to the proper precision. (:issue:`10170`) +- Bug in ``setitem`` where type promotion is applied to the entire block (:issue:`10280`) +- Bug in ``Series`` arithmetic methods may incorrectly hold names (:issue:`10068`) +- Bug in ``GroupBy.get_group`` when grouping on multiple keys, one of which is categorical. (:issue:`10132`) +- Bug in ``DatetimeIndex`` and ``TimedeltaIndex`` names are lost after timedelta arithmetics ( :issue:`9926`) +- Bug in ``DataFrame`` construction from nested ``dict`` with ``datetime64`` (:issue:`10160`) +- Bug in ``Series`` construction from ``dict`` with ``datetime64`` keys (:issue:`9456`) +- Bug in ``Series.plot(label="LABEL")`` not correctly setting the label (:issue:`10119`) +- Bug in ``plot`` not defaulting to matplotlib ``axes.grid`` setting (:issue:`9792`) +- Bug causing strings containing an exponent, but no decimal to be parsed as ``int`` instead of ``float`` in ``engine='python'`` for the ``read_csv`` parser (:issue:`9565`) +- Bug in ``Series.align`` resets ``name`` when ``fill_value`` is specified (:issue:`10067`) +- Bug in ``read_csv`` causing index name not to be set on an empty DataFrame (:issue:`10184`) +- Bug in ``SparseSeries.abs`` resets ``name`` (:issue:`10241`) +- Bug in ``TimedeltaIndex`` slicing may reset freq (:issue:`10292`) +- Bug in ``GroupBy.get_group`` raises ``ValueError`` when group key contains ``NaT`` (:issue:`6992`) +- Bug in ``SparseSeries`` constructor ignores input data name (:issue:`10258`) +- Bug in ``Categorical.remove_categories`` causing a ``ValueError`` when removing the ``NaN`` category if underlying dtype is floating-point (:issue:`10156`) +- Bug where infer_freq infers time rule (WOM-5XXX) unsupported by to_offset (:issue:`9425`) +- Bug in ``DataFrame.to_hdf()`` where table format would raise a seemingly unrelated error for invalid (non-string) column names. This is now explicitly forbidden. (:issue:`9057`) +- Bug to handle masking empty ``DataFrame`` (:issue:`10126`). +- Bug where MySQL interface could not handle numeric table/column names (:issue:`10255`) +- Bug in ``read_csv`` with a ``date_parser`` that returned a ``datetime64`` array of other time resolution than ``[ns]`` (:issue:`10245`) +- Bug in ``Panel.apply`` when the result has ndim=0 (:issue:`10332`) +- Bug in ``read_hdf`` where ``auto_close`` could not be passed (:issue:`9327`). +- Bug in ``read_hdf`` where open stores could not be used (:issue:`10330`). +- Bug in adding empty ``DataFrames``, now results in a ``DataFrame`` that ``.equals`` an empty ``DataFrame`` (:issue:`10181`). +- Bug in ``to_hdf`` and ``HDFStore`` which did not check that complib choices were valid (:issue:`4582`, :issue:`8874`). + + +.. _whatsnew_0.16.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.16.1..v0.16.2 diff --git a/doc/source/whatsnew/v0.17.0.rst b/doc/source/whatsnew/v0.17.0.rst new file mode 100644 index 00000000..67abad65 --- /dev/null +++ b/doc/source/whatsnew/v0.17.0.rst @@ -0,0 +1,1177 @@ +.. _whatsnew_0170: + +v0.17.0 (October 9, 2015) +------------------------- + +{{ header }} + + +This is a major release from 0.16.2 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +.. warning:: + + pandas >= 0.17.0 will no longer support compatibility with Python version 3.2 (:issue:`9118`) + +.. warning:: + + The ``pandas.io.data`` package is deprecated and will be replaced by the + `pandas-datareader package `_. + This will allow the data modules to be independently updated to your pandas + installation. The API for ``pandas-datareader v0.1.1`` is exactly the same + as in ``pandas v0.17.0`` (:issue:`8961`, :issue:`10861`). + + After installing pandas-datareader, you can easily change your imports: + + .. code-block:: python + + from pandas.io import data, wb + + becomes + + .. code-block:: python + + from pandas_datareader import data, wb + +Highlights include: + +- Release the Global Interpreter Lock (GIL) on some cython operations, see :ref:`here ` +- Plotting methods are now available as attributes of the ``.plot`` accessor, see :ref:`here ` +- The sorting API has been revamped to remove some long-time inconsistencies, see :ref:`here ` +- Support for a ``datetime64[ns]`` with timezones as a first-class dtype, see :ref:`here ` +- The default for ``to_datetime`` will now be to ``raise`` when presented with unparseable formats, + previously this would return the original input. Also, date parse + functions now return consistent results. See :ref:`here ` +- The default for ``dropna`` in ``HDFStore`` has changed to ``False``, to store by default all rows even + if they are all ``NaN``, see :ref:`here ` +- Datetime accessor (``dt``) now supports ``Series.dt.strftime`` to generate formatted strings for datetime-likes, and ``Series.dt.total_seconds`` to generate each duration of the timedelta in seconds. See :ref:`here ` +- ``Period`` and ``PeriodIndex`` can handle multiplied freq like ``3D``, which corresponding to 3 days span. See :ref:`here ` +- Development installed versions of pandas will now have ``PEP440`` compliant version strings (:issue:`9518`) +- Development support for benchmarking with the `Air Speed Velocity library `_ (:issue:`8361`) +- Support for reading SAS xport files, see :ref:`here ` +- Documentation comparing SAS to *pandas*, see :ref:`here ` +- Removal of the automatic TimeSeries broadcasting, deprecated since 0.8.0, see :ref:`here ` +- Display format with plain text can optionally align with Unicode East Asian Width, see :ref:`here ` +- Compatibility with Python 3.5 (:issue:`11097`) +- Compatibility with matplotlib 1.5.0 (:issue:`11111`) + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. contents:: What's new in v0.17.0 + :local: + :backlinks: none + +.. _whatsnew_0170.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0170.tz: + +Datetime with TZ +^^^^^^^^^^^^^^^^ + +We are adding an implementation that natively supports datetime with timezones. A ``Series`` or a ``DataFrame`` column previously +*could* be assigned a datetime with timezones, and would work as an ``object`` dtype. This had performance issues with a large +number rows. See the :ref:`docs ` for more details. (:issue:`8260`, :issue:`10763`, :issue:`11034`). + +The new implementation allows for having a single-timezone across all rows, with operations in a performant manner. + +.. ipython:: python + + df = pd.DataFrame({'A': pd.date_range('20130101', periods=3), + 'B': pd.date_range('20130101', periods=3, tz='US/Eastern'), + 'C': pd.date_range('20130101', periods=3, tz='CET')}) + df + df.dtypes + +.. ipython:: python + + df.B + df.B.dt.tz_localize(None) + +This uses a new-dtype representation as well, that is very similar in look-and-feel to its numpy cousin ``datetime64[ns]`` + +.. ipython:: python + + df['B'].dtype + type(df['B'].dtype) + +.. note:: + + There is a slightly different string repr for the underlying ``DatetimeIndex`` as a result of the dtype changes, but + functionally these are the same. + + Previous behavior: + + .. code-block:: ipython + + In [1]: pd.date_range('20130101', periods=3, tz='US/Eastern') + Out[1]: DatetimeIndex(['2013-01-01 00:00:00-05:00', '2013-01-02 00:00:00-05:00', + '2013-01-03 00:00:00-05:00'], + dtype='datetime64[ns]', freq='D', tz='US/Eastern') + + In [2]: pd.date_range('20130101', periods=3, tz='US/Eastern').dtype + Out[2]: dtype('` by supplying the ``kind`` keyword arguments. Unfortunately, many of these kinds of plots use different required and optional keyword arguments, which makes it difficult to discover what any given plot kind uses out of the dozens of possible arguments. + +To alleviate this issue, we have added a new, optional plotting interface, which exposes each kind of plot as a method of the ``.plot`` attribute. Instead of writing ``series.plot(kind=, ...)``, you can now also use ``series.plot.(...)``: + +.. ipython:: + :verbatim: + + In [13]: df = pd.DataFrame(np.random.rand(10, 2), columns=['a', 'b']) + + In [14]: df.plot.bar() + +.. image:: ../_static/whatsnew_plot_submethods.png + +As a result of this change, these methods are now all discoverable via tab-completion: + +.. ipython:: + :verbatim: + + In [15]: df.plot. # noqa: E225, E999 + df.plot.area df.plot.barh df.plot.density df.plot.hist df.plot.line df.plot.scatter + df.plot.bar df.plot.box df.plot.hexbin df.plot.kde df.plot.pie + +Each method signature only includes relevant arguments. Currently, these are limited to required arguments, but in the future these will include optional arguments, as well. For an overview, see the new :ref:`api.dataframe.plotting` API documentation. + +.. _whatsnew_0170.strftime: + +Additional methods for ``dt`` accessor +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +strftime +"""""""" + +We are now supporting a ``Series.dt.strftime`` method for datetime-likes to generate a formatted string (:issue:`10110`). Examples: + +.. ipython:: python + + # DatetimeIndex + s = pd.Series(pd.date_range('20130101', periods=4)) + s + s.dt.strftime('%Y/%m/%d') + +.. ipython:: python + + # PeriodIndex + s = pd.Series(pd.period_range('20130101', periods=4)) + s + s.dt.strftime('%Y/%m/%d') + +The string format is as the python standard library and details can be found `here `_ + +total_seconds +""""""""""""" + +``pd.Series`` of type ``timedelta64`` has new method ``.dt.total_seconds()`` returning the duration of the timedelta in seconds (:issue:`10817`) + +.. ipython:: python + + # TimedeltaIndex + s = pd.Series(pd.timedelta_range('1 minutes', periods=4)) + s + s.dt.total_seconds() + +.. _whatsnew_0170.periodfreq: + +Period frequency enhancement +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Period``, ``PeriodIndex`` and ``period_range`` can now accept multiplied freq. Also, ``Period.freq`` and ``PeriodIndex.freq`` are now stored as a ``DateOffset`` instance like ``DatetimeIndex``, and not as ``str`` (:issue:`7811`) + +A multiplied freq represents a span of corresponding length. The example below creates a period of 3 days. Addition and subtraction will shift the period by its span. + +.. ipython:: python + + p = pd.Period('2015-08-01', freq='3D') + p + p + 1 + p - 2 + p.to_timestamp() + p.to_timestamp(how='E') + +You can use the multiplied freq in ``PeriodIndex`` and ``period_range``. + +.. ipython:: python + + idx = pd.period_range('2015-08-01', periods=4, freq='2D') + idx + idx + 1 + +.. _whatsnew_0170.enhancements.sas_xport: + +Support for SAS XPORT files +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`~pandas.io.read_sas` provides support for reading *SAS XPORT* format files. (:issue:`4052`). + +.. code-block:: python + + df = pd.read_sas('sas_xport.xpt') + +It is also possible to obtain an iterator and read an XPORT file +incrementally. + +.. code-block:: python + + for df in pd.read_sas('sas_xport.xpt', chunksize=10000): + do_something(df) + +See the :ref:`docs ` for more details. + +.. _whatsnew_0170.matheval: + +Support for math functions in .eval() +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`~pandas.eval` now supports calling math functions (:issue:`4893`) + +.. code-block:: python + + df = pd.DataFrame({'a': np.random.randn(10)}) + df.eval("b = sin(a)") + +The support math functions are `sin`, `cos`, `exp`, `log`, `expm1`, `log1p`, +`sqrt`, `sinh`, `cosh`, `tanh`, `arcsin`, `arccos`, `arctan`, `arccosh`, +`arcsinh`, `arctanh`, `abs` and `arctan2`. + +These functions map to the intrinsics for the ``NumExpr`` engine. For the Python +engine, they are mapped to ``NumPy`` calls. + +Changes to Excel with ``MultiIndex`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In version 0.16.2 a ``DataFrame`` with ``MultiIndex`` columns could not be written to Excel via ``to_excel``. +That functionality has been added (:issue:`10564`), along with updating ``read_excel`` so that the data can +be read back with, no loss of information, by specifying which columns/rows make up the ``MultiIndex`` +in the ``header`` and ``index_col`` parameters (:issue:`4679`) + +See the :ref:`documentation ` for more details. + +.. ipython:: python + + df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=pd.MultiIndex.from_product( + [['foo', 'bar'], ['a', 'b']], names=['col1', 'col2']), + index=pd.MultiIndex.from_product([['j'], ['l', 'k']], + names=['i1', 'i2'])) + + df + df.to_excel('test.xlsx') + + df = pd.read_excel('test.xlsx', header=[0, 1], index_col=[0, 1]) + df + +.. ipython:: python + :suppress: + + import os + os.remove('test.xlsx') + +Previously, it was necessary to specify the ``has_index_names`` argument in ``read_excel``, +if the serialized data had index names. For version 0.17.0 the output format of ``to_excel`` +has been changed to make this keyword unnecessary - the change is shown below. + +**Old** + +.. image:: ../_static/old-excel-index.png + +**New** + +.. image:: ../_static/new-excel-index.png + +.. warning:: + + Excel files saved in version 0.16.2 or prior that had index names will still able to be read in, + but the ``has_index_names`` argument must specified to ``True``. + +.. _whatsnew_0170.gbq: + +Google BigQuery enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- Added ability to automatically create a table/dataset using the :func:`pandas.io.gbq.to_gbq` function if the destination table/dataset does not exist. (:issue:`8325`, :issue:`11121`). +- Added ability to replace an existing table and schema when calling the :func:`pandas.io.gbq.to_gbq` function via the ``if_exists`` argument. See the `docs `__ for more details (:issue:`8325`). +- ``InvalidColumnOrder`` and ``InvalidPageToken`` in the gbq module will raise ``ValueError`` instead of ``IOError``. +- The ``generate_bq_schema()`` function is now deprecated and will be removed in a future version (:issue:`11121`) +- The gbq module will now support Python 3 (:issue:`11094`). + +.. _whatsnew_0170.east_asian_width: + +Display alignment with Unicode East Asian width +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. warning:: + + Enabling this option will affect the performance for printing of ``DataFrame`` and ``Series`` (about 2 times slower). + Use only when it is actually required. + +Some East Asian countries use Unicode characters its width is corresponding to 2 alphabets. If a ``DataFrame`` or ``Series`` contains these characters, the default output cannot be aligned properly. The following options are added to enable precise handling for these characters. + +- ``display.unicode.east_asian_width``: Whether to use the Unicode East Asian Width to calculate the display text width. (:issue:`2612`) +- ``display.unicode.ambiguous_as_wide``: Whether to handle Unicode characters belong to Ambiguous as Wide. (:issue:`11102`) + +.. ipython:: python + + df = pd.DataFrame({u'国籍': ['UK', u'日本'], u'名前': ['Alice', u'しのぶ']}) + df; + +.. image:: ../_static/option_unicode01.png + +.. ipython:: python + + pd.set_option('display.unicode.east_asian_width', True) + df; + +.. image:: ../_static/option_unicode02.png + +For further details, see :ref:`here ` + +.. ipython:: python + :suppress: + + pd.set_option('display.unicode.east_asian_width', False) + +.. _whatsnew_0170.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Support for ``openpyxl`` >= 2.2. The API for style support is now stable (:issue:`10125`) +- ``merge`` now accepts the argument ``indicator`` which adds a Categorical-type column (by default called ``_merge``) to the output object that takes on the values (:issue:`8790`) + + =================================== ================ + Observation Origin ``_merge`` value + =================================== ================ + Merge key only in ``'left'`` frame ``left_only`` + Merge key only in ``'right'`` frame ``right_only`` + Merge key in both frames ``both`` + =================================== ================ + + .. ipython:: python + + df1 = pd.DataFrame({'col1':[0,1], 'col_left':['a','b']}) + df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]}) + pd.merge(df1, df2, on='col1', how='outer', indicator=True) + + For more, see the :ref:`updated docs ` + +- ``pd.to_numeric`` is a new function to coerce strings to numbers (possibly with coercion) (:issue:`11133`) + +- ``pd.merge`` will now allow duplicate column names if they are not merged upon (:issue:`10639`). + +- ``pd.pivot`` will now allow passing index as ``None`` (:issue:`3962`). + +- ``pd.concat`` will now use existing Series names if provided (:issue:`10698`). + + .. ipython:: python + + foo = pd.Series([1, 2], name='foo') + bar = pd.Series([1, 2]) + baz = pd.Series([4, 5]) + + Previous behavior: + + .. code-block:: ipython + + In [1]: pd.concat([foo, bar, baz], 1) + Out[1]: + 0 1 2 + 0 1 1 4 + 1 2 2 5 + + New behavior: + + .. ipython:: python + + pd.concat([foo, bar, baz], 1) + +- ``DataFrame`` has gained the ``nlargest`` and ``nsmallest`` methods (:issue:`10393`) + +- Add a ``limit_direction`` keyword argument that works with ``limit`` to enable ``interpolate`` to fill ``NaN`` values forward, backward, or both (:issue:`9218`, :issue:`10420`, :issue:`11115`) + + .. ipython:: python + + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13]) + ser.interpolate(limit=1, limit_direction='both') + +- Added a ``DataFrame.round`` method to round the values to a variable number of decimal places (:issue:`10568`). + + .. ipython:: python + + df = pd.DataFrame(np.random.random([3, 3]), + columns=['A', 'B', 'C'], + index=['first', 'second', 'third']) + df + df.round(2) + df.round({'A': 0, 'C': 2}) + +- ``drop_duplicates`` and ``duplicated`` now accept a ``keep`` keyword to target first, last, and all duplicates. The ``take_last`` keyword is deprecated, see :ref:`here ` (:issue:`6511`, :issue:`8505`) + + .. ipython:: python + + s = pd.Series(['A', 'B', 'C', 'A', 'B', 'D']) + s.drop_duplicates() + s.drop_duplicates(keep='last') + s.drop_duplicates(keep=False) + +- Reindex now has a ``tolerance`` argument that allows for finer control of :ref:`basics.limits_on_reindex_fill` (:issue:`10411`): + + .. ipython:: python + + df = pd.DataFrame({'x': range(5), + 't': pd.date_range('2000-01-01', periods=5)}) + df.reindex([0.1, 1.9, 3.5], + method='nearest', + tolerance=0.2) + + When used on a ``DatetimeIndex``, ``TimedeltaIndex`` or ``PeriodIndex``, ``tolerance`` will coerced into a ``Timedelta`` if possible. This allows you to specify tolerance with a string: + + .. ipython:: python + + df = df.set_index('t') + df.reindex(pd.to_datetime(['1999-12-31']), + method='nearest', + tolerance='1 day') + + ``tolerance`` is also exposed by the lower level ``Index.get_indexer`` and ``Index.get_loc`` methods. + +- Added functionality to use the ``base`` argument when resampling a ``TimeDeltaIndex`` (:issue:`10530`) + +- ``DatetimeIndex`` can be instantiated using strings contains ``NaT`` (:issue:`7599`) + +- ``to_datetime`` can now accept the ``yearfirst`` keyword (:issue:`7599`) + +- ``pandas.tseries.offsets`` larger than the ``Day`` offset can now be used with a ``Series`` for addition/subtraction (:issue:`10699`). See the :ref:`docs ` for more details. + +- ``pd.Timedelta.total_seconds()`` now returns Timedelta duration to ns precision (previously microsecond precision) (:issue:`10939`) + +- ``PeriodIndex`` now supports arithmetic with ``np.ndarray`` (:issue:`10638`) + +- Support pickling of ``Period`` objects (:issue:`10439`) + +- ``.as_blocks`` will now take a ``copy`` optional argument to return a copy of the data, default is to copy (no change in behavior from prior versions), (:issue:`9607`) + +- ``regex`` argument to ``DataFrame.filter`` now handles numeric column names instead of raising ``ValueError`` (:issue:`10384`). + +- Enable reading gzip compressed files via URL, either by explicitly setting the compression parameter or by inferring from the presence of the HTTP Content-Encoding header in the response (:issue:`8685`) + +- Enable writing Excel files in :ref:`memory ` using StringIO/BytesIO (:issue:`7074`) + +- Enable serialization of lists and dicts to strings in ``ExcelWriter`` (:issue:`8188`) + +- SQL io functions now accept a SQLAlchemy connectable. (:issue:`7877`) + +- ``pd.read_sql`` and ``to_sql`` can accept database URI as ``con`` parameter (:issue:`10214`) + +- ``read_sql_table`` will now allow reading from views (:issue:`10750`). + +- Enable writing complex values to ``HDFStores`` when using the ``table`` format (:issue:`10447`) + +- Enable ``pd.read_hdf`` to be used without specifying a key when the HDF file contains a single dataset (:issue:`10443`) + +- ``pd.read_stata`` will now read Stata 118 type files. (:issue:`9882`) + +- ``msgpack`` submodule has been updated to 0.4.6 with backward compatibility (:issue:`10581`) + +- ``DataFrame.to_dict`` now accepts ``orient='index'`` keyword argument (:issue:`10844`). + +- ``DataFrame.apply`` will return a Series of dicts if the passed function returns a dict and ``reduce=True`` (:issue:`8735`). + +- Allow passing `kwargs` to the interpolation methods (:issue:`10378`). + +- Improved error message when concatenating an empty iterable of ``Dataframe`` objects (:issue:`9157`) + +- ``pd.read_csv`` can now read bz2-compressed files incrementally, and the C parser can read bz2-compressed files from AWS S3 (:issue:`11070`, :issue:`11072`). + +- In ``pd.read_csv``, recognize ``s3n://`` and ``s3a://`` URLs as designating S3 file storage (:issue:`11070`, :issue:`11071`). + +- Read CSV files from AWS S3 incrementally, instead of first downloading the entire file. (Full file download still required for compressed files in Python 2.) (:issue:`11070`, :issue:`11073`) + +- ``pd.read_csv`` is now able to infer compression type for files read from AWS S3 storage (:issue:`11070`, :issue:`11074`). + + +.. _whatsnew_0170.api: + +.. _whatsnew_0170.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0170.api_breaking.sorting: + +Changes to sorting API +^^^^^^^^^^^^^^^^^^^^^^ + +The sorting API has had some longtime inconsistencies. (:issue:`9816`, :issue:`8239`). + +Here is a summary of the API **PRIOR** to 0.17.0: + +- ``Series.sort`` is **INPLACE** while ``DataFrame.sort`` returns a new object. +- ``Series.order`` returns a new object +- It was possible to use ``Series/DataFrame.sort_index`` to sort by **values** by passing the ``by`` keyword. +- ``Series/DataFrame.sortlevel`` worked only on a ``MultiIndex`` for sorting by index. + +To address these issues, we have revamped the API: + +- We have introduced a new method, :meth:`DataFrame.sort_values`, which is the merger of ``DataFrame.sort()``, ``Series.sort()``, + and ``Series.order()``, to handle sorting of **values**. +- The existing methods ``Series.sort()``, ``Series.order()``, and ``DataFrame.sort()`` have been deprecated and will be removed in a + future version. +- The ``by`` argument of ``DataFrame.sort_index()`` has been deprecated and will be removed in a future version. +- The existing method ``.sort_index()`` will gain the ``level`` keyword to enable level sorting. + +We now have two distinct and non-overlapping methods of sorting. A ``*`` marks items that +will show a ``FutureWarning``. + +To sort by the **values**: + +================================== ==================================== +Previous Replacement +================================== ==================================== +\* ``Series.order()`` ``Series.sort_values()`` +\* ``Series.sort()`` ``Series.sort_values(inplace=True)`` +\* ``DataFrame.sort(columns=...)`` ``DataFrame.sort_values(by=...)`` +================================== ==================================== + +To sort by the **index**: + +================================== ==================================== +Previous Replacement +================================== ==================================== +``Series.sort_index()`` ``Series.sort_index()`` +``Series.sortlevel(level=...)`` ``Series.sort_index(level=...``) +``DataFrame.sort_index()`` ``DataFrame.sort_index()`` +``DataFrame.sortlevel(level=...)`` ``DataFrame.sort_index(level=...)`` +\* ``DataFrame.sort()`` ``DataFrame.sort_index()`` +================================== ==================================== + +We have also deprecated and changed similar methods in two Series-like classes, ``Index`` and ``Categorical``. + +================================== ==================================== +Previous Replacement +================================== ==================================== +\* ``Index.order()`` ``Index.sort_values()`` +\* ``Categorical.order()`` ``Categorical.sort_values()`` +================================== ==================================== + +.. _whatsnew_0170.api_breaking.to_datetime: + +Changes to to_datetime and to_timedelta +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Error handling +"""""""""""""" + +The default for ``pd.to_datetime`` error handling has changed to ``errors='raise'``. +In prior versions it was ``errors='ignore'``. Furthermore, the ``coerce`` argument +has been deprecated in favor of ``errors='coerce'``. This means that invalid parsing +will raise rather that return the original input as in previous versions. (:issue:`10636`) + +Previous behavior: + +.. code-block:: ipython + + In [2]: pd.to_datetime(['2009-07-31', 'asd']) + Out[2]: array(['2009-07-31', 'asd'], dtype=object) + +New behavior: + +.. code-block:: ipython + + In [3]: pd.to_datetime(['2009-07-31', 'asd']) + ValueError: Unknown string format + +Of course you can coerce this as well. + +.. ipython:: python + + pd.to_datetime(['2009-07-31', 'asd'], errors='coerce') + +To keep the previous behavior, you can use ``errors='ignore'``: + +.. ipython:: python + + pd.to_datetime(['2009-07-31', 'asd'], errors='ignore') + +Furthermore, ``pd.to_timedelta`` has gained a similar API, of ``errors='raise'|'ignore'|'coerce'``, and the ``coerce`` keyword +has been deprecated in favor of ``errors='coerce'``. + +Consistent parsing +"""""""""""""""""" + +The string parsing of ``to_datetime``, ``Timestamp`` and ``DatetimeIndex`` has +been made consistent. (:issue:`7599`) + +Prior to v0.17.0, ``Timestamp`` and ``to_datetime`` may parse year-only datetime-string incorrectly using today's date, otherwise ``DatetimeIndex`` +uses the beginning of the year. ``Timestamp`` and ``to_datetime`` may raise ``ValueError`` in some types of datetime-string which ``DatetimeIndex`` +can parse, such as a quarterly string. + +Previous behavior: + +.. code-block:: ipython + + In [1]: pd.Timestamp('2012Q2') + Traceback + ... + ValueError: Unable to parse 2012Q2 + + # Results in today's date. + In [2]: pd.Timestamp('2014') + Out [2]: 2014-08-12 00:00:00 + +v0.17.0 can parse them as below. It works on ``DatetimeIndex`` also. + +New behavior: + +.. ipython:: python + + pd.Timestamp('2012Q2') + pd.Timestamp('2014') + pd.DatetimeIndex(['2012Q2', '2014']) + +.. note:: + + If you want to perform calculations based on today's date, use ``Timestamp.now()`` and ``pandas.tseries.offsets``. + + .. ipython:: python + + import pandas.tseries.offsets as offsets + pd.Timestamp.now() + pd.Timestamp.now() + offsets.DateOffset(years=1) + +Changes to Index comparisons +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Operator equal on ``Index`` should behavior similarly to ``Series`` (:issue:`9947`, :issue:`10637`) + +Starting in v0.17.0, comparing ``Index`` objects of different lengths will raise +a ``ValueError``. This is to be consistent with the behavior of ``Series``. + +Previous behavior: + +.. code-block:: ipython + + In [2]: pd.Index([1, 2, 3]) == pd.Index([1, 4, 5]) + Out[2]: array([ True, False, False], dtype=bool) + + In [3]: pd.Index([1, 2, 3]) == pd.Index([2]) + Out[3]: array([False, True, False], dtype=bool) + + In [4]: pd.Index([1, 2, 3]) == pd.Index([1, 2]) + Out[4]: False + +New behavior: + +.. code-block:: ipython + + In [8]: pd.Index([1, 2, 3]) == pd.Index([1, 4, 5]) + Out[8]: array([ True, False, False], dtype=bool) + + In [9]: pd.Index([1, 2, 3]) == pd.Index([2]) + ValueError: Lengths must match to compare + + In [10]: pd.Index([1, 2, 3]) == pd.Index([1, 2]) + ValueError: Lengths must match to compare + +Note that this is different from the ``numpy`` behavior where a comparison can +be broadcast: + +.. ipython:: python + + np.array([1, 2, 3]) == np.array([1]) + +or it can return False if broadcasting can not be done: + +.. ipython:: python + :okwarning: + + np.array([1, 2, 3]) == np.array([1, 2]) + +Changes to boolean comparisons vs. None +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Boolean comparisons of a ``Series`` vs ``None`` will now be equivalent to comparing with ``np.nan``, rather than raise ``TypeError``. (:issue:`1079`). + +.. ipython:: python + + s = pd.Series(range(3)) + s.iloc[1] = None + s + +Previous behavior: + +.. code-block:: ipython + + In [5]: s == None + TypeError: Could not compare type with Series + +New behavior: + +.. ipython:: python + + s == None + +Usually you simply want to know which values are null. + +.. ipython:: python + + s.isnull() + +.. warning:: + + You generally will want to use ``isnull/notnull`` for these types of comparisons, as ``isnull/notnull`` tells you which elements are null. One has to be + mindful that ``nan's`` don't compare equal, but ``None's`` do. Note that Pandas/numpy uses the fact that ``np.nan != np.nan``, and treats ``None`` like ``np.nan``. + + .. ipython:: python + + None == None + np.nan == np.nan + +.. _whatsnew_0170.api_breaking.hdf_dropna: + +HDFStore dropna behavior +^^^^^^^^^^^^^^^^^^^^^^^^ + +The default behavior for HDFStore write functions with ``format='table'`` is now to keep rows that are all missing. Previously, the behavior was to drop rows that were all missing save the index. The previous behavior can be replicated using the ``dropna=True`` option. (:issue:`9382`) + +Previous behavior: + +.. ipython:: python + + df_with_missing = pd.DataFrame({'col1': [0, np.nan, 2], + 'col2': [1, np.nan, np.nan]}) + + df_with_missing + + +.. code-block:: ipython + + In [27]: + df_with_missing.to_hdf('file.h5', + 'df_with_missing', + format='table', + mode='w') + + In [28]: pd.read_hdf('file.h5', 'df_with_missing') + + Out [28]: + col1 col2 + 0 0 1 + 2 2 NaN + + +New behavior: + +.. ipython:: python + + df_with_missing.to_hdf('file.h5', + 'df_with_missing', + format='table', + mode='w') + + pd.read_hdf('file.h5', 'df_with_missing') + +.. ipython:: python + :suppress: + + import os + os.remove('file.h5') + +See the :ref:`docs ` for more details. + +.. _whatsnew_0170.api_breaking.display_precision: + +Changes to ``display.precision`` option +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``display.precision`` option has been clarified to refer to decimal places (:issue:`10451`). + +Earlier versions of pandas would format floating point numbers to have one less decimal place than the value in +``display.precision``. + +.. code-block:: ipython + + In [1]: pd.set_option('display.precision', 2) + + In [2]: pd.DataFrame({'x': [123.456789]}) + Out[2]: + x + 0 123.5 + +If interpreting precision as "significant figures" this did work for scientific notation but that same interpretation +did not work for values with standard formatting. It was also out of step with how numpy handles formatting. + +Going forward the value of ``display.precision`` will directly control the number of places after the decimal, for +regular formatting as well as scientific notation, similar to how numpy's ``precision`` print option works. + +.. ipython:: python + + pd.set_option('display.precision', 2) + pd.DataFrame({'x': [123.456789]}) + +To preserve output behavior with prior versions the default value of ``display.precision`` has been reduced to ``6`` +from ``7``. + +.. ipython:: python + :suppress: + + pd.set_option('display.precision', 6) + +.. _whatsnew_0170.api_breaking.categorical_unique: + +Changes to ``Categorical.unique`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Categorical.unique`` now returns new ``Categoricals`` with ``categories`` and ``codes`` that are unique, rather than returning ``np.array`` (:issue:`10508`) + +- unordered category: values and categories are sorted by appearance order. +- ordered category: values are sorted by appearance order, categories keep existing order. + +.. ipython:: python + + cat = pd.Categorical(['C', 'A', 'B', 'C'], + categories=['A', 'B', 'C'], + ordered=True) + cat + cat.unique() + + cat = pd.Categorical(['C', 'A', 'B', 'C'], + categories=['A', 'B', 'C']) + cat + cat.unique() + +Changes to ``bool`` passed as ``header`` in parsers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In earlier versions of pandas, if a bool was passed the ``header`` argument of +``read_csv``, ``read_excel``, or ``read_html`` it was implicitly converted to +an integer, resulting in ``header=0`` for ``False`` and ``header=1`` for ``True`` +(:issue:`6113`) + +A ``bool`` input to ``header`` will now raise a ``TypeError`` + +.. code-block:: ipython + + In [29]: df = pd.read_csv('data.csv', header=False) + TypeError: Passing a bool to header is invalid. Use header=None for no header or + header=int or list-like of ints to specify the row(s) making up the column names + + +.. _whatsnew_0170.api_breaking.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- Line and kde plot with ``subplots=True`` now uses default colors, not all black. Specify ``color='k'`` to draw all lines in black (:issue:`9894`) +- Calling the ``.value_counts()`` method on a Series with a ``categorical`` dtype now returns a Series with a ``CategoricalIndex`` (:issue:`10704`) +- The metadata properties of subclasses of pandas objects will now be serialized (:issue:`10553`). +- ``groupby`` using ``Categorical`` follows the same rule as ``Categorical.unique`` described above (:issue:`10508`) +- When constructing ``DataFrame`` with an array of ``complex64`` dtype previously meant the corresponding column + was automatically promoted to the ``complex128`` dtype. Pandas will now preserve the itemsize of the input for complex data (:issue:`10952`) +- some numeric reduction operators would return ``ValueError``, rather than ``TypeError`` on object types that includes strings and numbers (:issue:`11131`) +- Passing currently unsupported ``chunksize`` argument to ``read_excel`` or ``ExcelFile.parse`` will now raise ``NotImplementedError`` (:issue:`8011`) +- Allow an ``ExcelFile`` object to be passed into ``read_excel`` (:issue:`11198`) +- ``DatetimeIndex.union`` does not infer ``freq`` if ``self`` and the input have ``None`` as ``freq`` (:issue:`11086`) +- ``NaT``'s methods now either raise ``ValueError``, or return ``np.nan`` or ``NaT`` (:issue:`9513`) + + =============================== =============================================================== + Behavior Methods + =============================== =============================================================== + return ``np.nan`` ``weekday``, ``isoweekday`` + return ``NaT`` ``date``, ``now``, ``replace``, ``to_datetime``, ``today`` + return ``np.datetime64('NaT')`` ``to_datetime64`` (unchanged) + raise ``ValueError`` All other public methods (names not beginning with underscores) + =============================== =============================================================== + +.. _whatsnew_0170.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- For ``Series`` the following indexing functions are deprecated (:issue:`10177`). + + ===================== ================================= + Deprecated Function Replacement + ===================== ================================= + ``.irow(i)`` ``.iloc[i]`` or ``.iat[i]`` + ``.iget(i)`` ``.iloc[i]`` or ``.iat[i]`` + ``.iget_value(i)`` ``.iloc[i]`` or ``.iat[i]`` + ===================== ================================= + +- For ``DataFrame`` the following indexing functions are deprecated (:issue:`10177`). + + ===================== ================================= + Deprecated Function Replacement + ===================== ================================= + ``.irow(i)`` ``.iloc[i]`` + ``.iget_value(i, j)`` ``.iloc[i, j]`` or ``.iat[i, j]`` + ``.icol(j)`` ``.iloc[:, j]`` + ===================== ================================= + +.. note:: These indexing function have been deprecated in the documentation since 0.11.0. + +- ``Categorical.name`` was deprecated to make ``Categorical`` more ``numpy.ndarray`` like. Use ``Series(cat, name="whatever")`` instead (:issue:`10482`). +- Setting missing values (NaN) in a ``Categorical``'s ``categories`` will issue a warning (:issue:`10748`). You can still have missing values in the ``values``. +- ``drop_duplicates`` and ``duplicated``'s ``take_last`` keyword was deprecated in favor of ``keep``. (:issue:`6511`, :issue:`8505`) +- ``Series.nsmallest`` and ``nlargest``'s ``take_last`` keyword was deprecated in favor of ``keep``. (:issue:`10792`) +- ``DataFrame.combineAdd`` and ``DataFrame.combineMult`` are deprecated. They + can easily be replaced by using the ``add`` and ``mul`` methods: + ``DataFrame.add(other, fill_value=0)`` and ``DataFrame.mul(other, fill_value=1.)`` + (:issue:`10735`). +- ``TimeSeries`` deprecated in favor of ``Series`` (note that this has been an alias since 0.13.0), (:issue:`10890`) +- ``SparsePanel`` deprecated and will be removed in a future version (:issue:`11157`). +- ``Series.is_time_series`` deprecated in favor of ``Series.index.is_all_dates`` (:issue:`11135`) +- Legacy offsets (like ``'A@JAN'``) are deprecated (note that this has been alias since 0.8.0) (:issue:`10878`) +- ``WidePanel`` deprecated in favor of ``Panel``, ``LongPanel`` in favor of ``DataFrame`` (note these have been aliases since < 0.11.0), (:issue:`10892`) +- ``DataFrame.convert_objects`` has been deprecated in favor of type-specific functions ``pd.to_datetime``, ``pd.to_timestamp`` and ``pd.to_numeric`` (new in 0.17.0) (:issue:`11133`). + +.. _whatsnew_0170.prior_deprecations: + +Removal of prior version deprecations/changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Removal of ``na_last`` parameters from ``Series.order()`` and ``Series.sort()``, in favor of ``na_position``. (:issue:`5231`) +- Remove of ``percentile_width`` from ``.describe()``, in favor of ``percentiles``. (:issue:`7088`) +- Removal of ``colSpace`` parameter from ``DataFrame.to_string()``, in favor of ``col_space``, circa 0.8.0 version. +- Removal of automatic time-series broadcasting (:issue:`2304`) + + .. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame(np.random.randn(5, 2), + columns=list('AB'), + index=pd.date_range('2013-01-01', periods=5)) + df + + Previously + + .. code-block:: ipython + + In [3]: df + df.A + FutureWarning: TimeSeries broadcasting along DataFrame index by default is deprecated. + Please use DataFrame. to explicitly broadcast arithmetic operations along the index + + Out[3]: + A B + 2013-01-01 0.942870 -0.719541 + 2013-01-02 2.865414 1.120055 + 2013-01-03 -1.441177 0.166574 + 2013-01-04 1.719177 0.223065 + 2013-01-05 0.031393 -2.226989 + + Current + + .. ipython:: python + + df.add(df.A, axis='index') + + +- Remove ``table`` keyword in ``HDFStore.put/append``, in favor of using ``format=`` (:issue:`4645`) +- Remove ``kind`` in ``read_excel/ExcelFile`` as its unused (:issue:`4712`) +- Remove ``infer_type`` keyword from ``pd.read_html`` as its unused (:issue:`4770`, :issue:`7032`) +- Remove ``offset`` and ``timeRule`` keywords from ``Series.tshift/shift``, in favor of ``freq`` (:issue:`4853`, :issue:`4864`) +- Remove ``pd.load/pd.save`` aliases in favor of ``pd.to_pickle/pd.read_pickle`` (:issue:`3787`) + +.. _whatsnew_0170.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Development support for benchmarking with the `Air Speed Velocity library `_ (:issue:`8361`) +- Added vbench benchmarks for alternative ExcelWriter engines and reading Excel files (:issue:`7171`) +- Performance improvements in ``Categorical.value_counts`` (:issue:`10804`) +- Performance improvements in ``SeriesGroupBy.nunique`` and ``SeriesGroupBy.value_counts`` and ``SeriesGroupby.transform`` (:issue:`10820`, :issue:`11077`) +- Performance improvements in ``DataFrame.drop_duplicates`` with integer dtypes (:issue:`10917`) +- Performance improvements in ``DataFrame.duplicated`` with wide frames. (:issue:`10161`, :issue:`11180`) +- 4x improvement in ``timedelta`` string parsing (:issue:`6755`, :issue:`10426`) +- 8x improvement in ``timedelta64`` and ``datetime64`` ops (:issue:`6755`) +- Significantly improved performance of indexing ``MultiIndex`` with slicers (:issue:`10287`) +- 8x improvement in ``iloc`` using list-like input (:issue:`10791`) +- Improved performance of ``Series.isin`` for datetimelike/integer Series (:issue:`10287`) +- 20x improvement in ``concat`` of Categoricals when categories are identical (:issue:`10587`) +- Improved performance of ``to_datetime`` when specified format string is ISO8601 (:issue:`10178`) +- 2x improvement of ``Series.value_counts`` for float dtype (:issue:`10821`) +- Enable ``infer_datetime_format`` in ``to_datetime`` when date components do not have 0 padding (:issue:`11142`) +- Regression from 0.16.1 in constructing ``DataFrame`` from nested dictionary (:issue:`11084`) +- Performance improvements in addition/subtraction operations for ``DateOffset`` with ``Series`` or ``DatetimeIndex`` (:issue:`10744`, :issue:`11205`) + +.. _whatsnew_0170.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in incorrect computation of ``.mean()`` on ``timedelta64[ns]`` because of overflow (:issue:`9442`) +- Bug in ``.isin`` on older numpies (:issue:`11232`) +- Bug in ``DataFrame.to_html(index=False)`` renders unnecessary ``name`` row (:issue:`10344`) +- Bug in ``DataFrame.to_latex()`` the ``column_format`` argument could not be passed (:issue:`9402`) +- Bug in ``DatetimeIndex`` when localizing with ``NaT`` (:issue:`10477`) +- Bug in ``Series.dt`` ops in preserving meta-data (:issue:`10477`) +- Bug in preserving ``NaT`` when passed in an otherwise invalid ``to_datetime`` construction (:issue:`10477`) +- Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`) +- Bug in ``to_datetime`` with invalid dates and formats supplied (:issue:`10154`) +- Bug in ``Index.drop_duplicates`` dropping name(s) (:issue:`10115`) +- Bug in ``Series.quantile`` dropping name (:issue:`10881`) +- Bug in ``pd.Series`` when setting a value on an empty ``Series`` whose index has a frequency. (:issue:`10193`) +- Bug in ``pd.Series.interpolate`` with invalid ``order`` keyword values. (:issue:`10633`) +- Bug in ``DataFrame.plot`` raises ``ValueError`` when color name is specified by multiple characters (:issue:`10387`) +- Bug in ``Index`` construction with a mixed list of tuples (:issue:`10697`) +- Bug in ``DataFrame.reset_index`` when index contains ``NaT``. (:issue:`10388`) +- Bug in ``ExcelReader`` when worksheet is empty (:issue:`6403`) +- Bug in ``BinGrouper.group_info`` where returned values are not compatible with base class (:issue:`10914`) +- Bug in clearing the cache on ``DataFrame.pop`` and a subsequent inplace op (:issue:`10912`) +- Bug in indexing with a mixed-integer ``Index`` causing an ``ImportError`` (:issue:`10610`) +- Bug in ``Series.count`` when index has nulls (:issue:`10946`) +- Bug in pickling of a non-regular freq ``DatetimeIndex`` (:issue:`11002`) +- Bug causing ``DataFrame.where`` to not respect the ``axis`` parameter when the frame has a symmetric shape. (:issue:`9736`) +- Bug in ``Table.select_column`` where name is not preserved (:issue:`10392`) +- Bug in ``offsets.generate_range`` where ``start`` and ``end`` have finer precision than ``offset`` (:issue:`9907`) +- Bug in ``pd.rolling_*`` where ``Series.name`` would be lost in the output (:issue:`10565`) +- Bug in ``stack`` when index or columns are not unique. (:issue:`10417`) +- Bug in setting a ``Panel`` when an axis has a MultiIndex (:issue:`10360`) +- Bug in ``USFederalHolidayCalendar`` where ``USMemorialDay`` and ``USMartinLutherKingJr`` were incorrect (:issue:`10278` and :issue:`9760` ) +- Bug in ``.sample()`` where returned object, if set, gives unnecessary ``SettingWithCopyWarning`` (:issue:`10738`) +- Bug in ``.sample()`` where weights passed as ``Series`` were not aligned along axis before being treated positionally, potentially causing problems if weight indices were not aligned with sampled object. (:issue:`10738`) + +- Regression fixed in (:issue:`9311`, :issue:`6620`, :issue:`9345`), where groupby with a datetime-like converting to float with certain aggregators (:issue:`10979`) + +- Bug in ``DataFrame.interpolate`` with ``axis=1`` and ``inplace=True`` (:issue:`10395`) +- Bug in ``io.sql.get_schema`` when specifying multiple columns as primary + key (:issue:`10385`). + +- Bug in ``groupby(sort=False)`` with datetime-like ``Categorical`` raises ``ValueError`` (:issue:`10505`) +- Bug in ``groupby(axis=1)`` with ``filter()`` throws ``IndexError`` (:issue:`11041`) +- Bug in ``test_categorical`` on big-endian builds (:issue:`10425`) +- Bug in ``Series.shift`` and ``DataFrame.shift`` not supporting categorical data (:issue:`9416`) +- Bug in ``Series.map`` using categorical ``Series`` raises ``AttributeError`` (:issue:`10324`) +- Bug in ``MultiIndex.get_level_values`` including ``Categorical`` raises ``AttributeError`` (:issue:`10460`) +- Bug in ``pd.get_dummies`` with ``sparse=True`` not returning ``SparseDataFrame`` (:issue:`10531`) +- Bug in ``Index`` subtypes (such as ``PeriodIndex``) not returning their own type for ``.drop`` and ``.insert`` methods (:issue:`10620`) +- Bug in ``algos.outer_join_indexer`` when ``right`` array is empty (:issue:`10618`) + +- Bug in ``filter`` (regression from 0.16.0) and ``transform`` when grouping on multiple keys, one of which is datetime-like (:issue:`10114`) + + +- Bug in ``to_datetime`` and ``to_timedelta`` causing ``Index`` name to be lost (:issue:`10875`) +- Bug in ``len(DataFrame.groupby)`` causing ``IndexError`` when there's a column containing only NaNs (:issue:`11016`) + +- Bug that caused segfault when resampling an empty Series (:issue:`10228`) +- Bug in ``DatetimeIndex`` and ``PeriodIndex.value_counts`` resets name from its result, but retains in result's ``Index``. (:issue:`10150`) +- Bug in ``pd.eval`` using ``numexpr`` engine coerces 1 element numpy array to scalar (:issue:`10546`) +- Bug in ``pd.concat`` with ``axis=0`` when column is of dtype ``category`` (:issue:`10177`) +- Bug in ``read_msgpack`` where input type is not always checked (:issue:`10369`, :issue:`10630`) +- Bug in ``pd.read_csv`` with kwargs ``index_col=False``, ``index_col=['a', 'b']`` or ``dtype`` + (:issue:`10413`, :issue:`10467`, :issue:`10577`) +- Bug in ``Series.from_csv`` with ``header`` kwarg not setting the ``Series.name`` or the ``Series.index.name`` (:issue:`10483`) +- Bug in ``groupby.var`` which caused variance to be inaccurate for small float values (:issue:`10448`) +- Bug in ``Series.plot(kind='hist')`` Y Label not informative (:issue:`10485`) +- Bug in ``read_csv`` when using a converter which generates a ``uint8`` type (:issue:`9266`) + +- Bug causes memory leak in time-series line and area plot (:issue:`9003`) + +- Bug when setting a ``Panel`` sliced along the major or minor axes when the right-hand side is a ``DataFrame`` (:issue:`11014`) +- Bug that returns ``None`` and does not raise ``NotImplementedError`` when operator functions (e.g. ``.add``) of ``Panel`` are not implemented (:issue:`7692`) + +- Bug in line and kde plot cannot accept multiple colors when ``subplots=True`` (:issue:`9894`) +- Bug in ``DataFrame.plot`` raises ``ValueError`` when color name is specified by multiple characters (:issue:`10387`) + +- Bug in left and right ``align`` of ``Series`` with ``MultiIndex`` may be inverted (:issue:`10665`) +- Bug in left and right ``join`` of with ``MultiIndex`` may be inverted (:issue:`10741`) + +- Bug in ``read_stata`` when reading a file with a different order set in ``columns`` (:issue:`10757`) +- Bug in ``Categorical`` may not representing properly when category contains ``tz`` or ``Period`` (:issue:`10713`) +- Bug in ``Categorical.__iter__`` may not returning correct ``datetime`` and ``Period`` (:issue:`10713`) +- Bug in indexing with a ``PeriodIndex`` on an object with a ``PeriodIndex`` (:issue:`4125`) +- Bug in ``read_csv`` with ``engine='c'``: EOF preceded by a comment, blank line, etc. was not handled correctly (:issue:`10728`, :issue:`10548`) + +- Reading "famafrench" data via ``DataReader`` results in HTTP 404 error because of the website url is changed (:issue:`10591`). +- Bug in ``read_msgpack`` where DataFrame to decode has duplicate column names (:issue:`9618`) +- Bug in ``io.common.get_filepath_or_buffer`` which caused reading of valid S3 files to fail if the bucket also contained keys for which the user does not have read permission (:issue:`10604`) +- Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`) +- Bug in ``Index.take`` may add unnecessary ``freq`` attribute (:issue:`10791`) +- Bug in ``merge`` with empty ``DataFrame`` may raise ``IndexError`` (:issue:`10824`) +- Bug in ``to_latex`` where unexpected keyword argument for some documented arguments (:issue:`10888`) +- Bug in indexing of large ``DataFrame`` where ``IndexError`` is uncaught (:issue:`10645` and :issue:`10692`) +- Bug in ``read_csv`` when using the ``nrows`` or ``chunksize`` parameters if file contains only a header line (:issue:`9535`) +- Bug in serialization of ``category`` types in HDF5 in presence of alternate encodings. (:issue:`10366`) +- Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`) +- Bug in ``pd.DataFrame.diff`` when DataFrame is not consolidated (:issue:`10907`) +- Bug in ``pd.unique`` for arrays with the ``datetime64`` or ``timedelta64`` dtype that meant an array with object dtype was returned instead the original dtype (:issue:`9431`) +- Bug in ``Timedelta`` raising error when slicing from 0s (:issue:`10583`) +- Bug in ``DatetimeIndex.take`` and ``TimedeltaIndex.take`` may not raise ``IndexError`` against invalid index (:issue:`10295`) +- Bug in ``Series([np.nan]).astype('M8[ms]')``, which now returns ``Series([pd.NaT])`` (:issue:`10747`) +- Bug in ``PeriodIndex.order`` reset freq (:issue:`10295`) +- Bug in ``date_range`` when ``freq`` divides ``end`` as nanos (:issue:`10885`) +- Bug in ``iloc`` allowing memory outside bounds of a Series to be accessed with negative integers (:issue:`10779`) +- Bug in ``read_msgpack`` where encoding is not respected (:issue:`10581`) +- Bug preventing access to the first index when using ``iloc`` with a list containing the appropriate negative integer (:issue:`10547`, :issue:`10779`) +- Bug in ``TimedeltaIndex`` formatter causing error while trying to save ``DataFrame`` with ``TimedeltaIndex`` using ``to_csv`` (:issue:`10833`) +- Bug in ``DataFrame.where`` when handling Series slicing (:issue:`10218`, :issue:`9558`) +- Bug where ``pd.read_gbq`` throws ``ValueError`` when Bigquery returns zero rows (:issue:`10273`) +- Bug in ``to_json`` which was causing segmentation fault when serializing 0-rank ndarray (:issue:`9576`) +- Bug in plotting functions may raise ``IndexError`` when plotted on ``GridSpec`` (:issue:`10819`) +- Bug in plot result may show unnecessary minor ticklabels (:issue:`10657`) +- Bug in ``groupby`` incorrect computation for aggregation on ``DataFrame`` with ``NaT`` (E.g ``first``, ``last``, ``min``). (:issue:`10590`, :issue:`11010`) +- Bug when constructing ``DataFrame`` where passing a dictionary with only scalar values and specifying columns did not raise an error (:issue:`10856`) +- Bug in ``.var()`` causing roundoff errors for highly similar values (:issue:`10242`) +- Bug in ``DataFrame.plot(subplots=True)`` with duplicated columns outputs incorrect result (:issue:`10962`) +- Bug in ``Index`` arithmetic may result in incorrect class (:issue:`10638`) +- Bug in ``date_range`` results in empty if freq is negative annually, quarterly and monthly (:issue:`11018`) +- Bug in ``DatetimeIndex`` cannot infer negative freq (:issue:`11018`) +- Remove use of some deprecated numpy comparison operations, mainly in tests. (:issue:`10569`) +- Bug in ``Index`` dtype may not applied properly (:issue:`11017`) +- Bug in ``io.gbq`` when testing for minimum google api client version (:issue:`10652`) +- Bug in ``DataFrame`` construction from nested ``dict`` with ``timedelta`` keys (:issue:`11129`) +- Bug in ``.fillna`` against may raise ``TypeError`` when data contains datetime dtype (:issue:`7095`, :issue:`11153`) +- Bug in ``.groupby`` when number of keys to group by is same as length of index (:issue:`11185`) +- Bug in ``convert_objects`` where converted values might not be returned if all null and ``coerce`` (:issue:`9589`) +- Bug in ``convert_objects`` where ``copy`` keyword was not respected (:issue:`9589`) + + +.. _whatsnew_0.17.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.16.2..v0.17.0 diff --git a/doc/source/whatsnew/v0.17.1.rst b/doc/source/whatsnew/v0.17.1.rst new file mode 100644 index 00000000..55080240 --- /dev/null +++ b/doc/source/whatsnew/v0.17.1.rst @@ -0,0 +1,215 @@ +.. _whatsnew_0171: + +v0.17.1 (November 21, 2015) +--------------------------- + +{{ header }} + + +.. note:: + + We are proud to announce that *pandas* has become a sponsored project of the (`NumFOCUS organization`_). This will help ensure the success of development of *pandas* as a world-class open-source project. + +.. _numfocus organization: http://www.numfocus.org/blog/numfocus-announces-new-fiscally-sponsored-project-pandas + +This is a minor bug-fix release from 0.17.0 and includes a large number of +bug fixes along several new features, enhancements, and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + +- Support for Conditional HTML Formatting, see :ref:`here ` +- Releasing the GIL on the csv reader & other ops, see :ref:`here ` +- Fixed regression in ``DataFrame.drop_duplicates`` from 0.16.2, causing incorrect results on integer values (:issue:`11376`) + +.. contents:: What's new in v0.17.1 + :local: + :backlinks: none + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0171.style: + +Conditional HTML formatting +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. warning:: + This is a new feature and is under active development. + We'll be adding features an possibly making breaking changes in future + releases. Feedback is welcome_. + +.. _welcome: https://github.com/pandas-dev/pandas/issues/11610 + +We've added *experimental* support for conditional HTML formatting: +the visual styling of a DataFrame based on the data. +The styling is accomplished with HTML and CSS. +Accesses the styler class with the :attr:`pandas.DataFrame.style`, attribute, +an instance of :class:`~pandas.core.style.Styler` with your data attached. + +Here's a quick example: + + .. ipython:: python + + np.random.seed(123) + df = pd.DataFrame(np.random.randn(10, 5), columns=list('abcde')) + html = df.style.background_gradient(cmap='viridis', low=.5) + +We can render the HTML to get the following table. + +.. raw:: html + :file: whatsnew_0171_html_table.html + +:class:`~pandas.core.style.Styler` interacts nicely with the Jupyter Notebook. +See the :ref:`documentation ` for more. + +.. _whatsnew_0171.enhancements: + +Enhancements +~~~~~~~~~~~~ + +- ``DatetimeIndex`` now supports conversion to strings with ``astype(str)`` (:issue:`10442`) +- Support for ``compression`` (gzip/bz2) in :meth:`pandas.DataFrame.to_csv` (:issue:`7615`) +- ``pd.read_*`` functions can now also accept :class:`python:pathlib.Path`, or :class:`py:py._path.local.LocalPath` + objects for the ``filepath_or_buffer`` argument. (:issue:`11033`) + - The ``DataFrame`` and ``Series`` functions ``.to_csv()``, ``.to_html()`` and ``.to_latex()`` can now handle paths beginning with tildes (e.g. ``~/Documents/``) (:issue:`11438`) +- ``DataFrame`` now uses the fields of a ``namedtuple`` as columns, if columns are not supplied (:issue:`11181`) +- ``DataFrame.itertuples()`` now returns ``namedtuple`` objects, when possible. (:issue:`11269`, :issue:`11625`) +- Added ``axvlines_kwds`` to parallel coordinates plot (:issue:`10709`) +- Option to ``.info()`` and ``.memory_usage()`` to provide for deep introspection of memory consumption. Note that this can be expensive to compute and therefore is an optional parameter. (:issue:`11595`) + + .. ipython:: python + + df = pd.DataFrame({'A': ['foo'] * 1000}) # noqa: F821 + df['B'] = df['A'].astype('category') + + # shows the '+' as we have object dtypes + df.info() + + # we have an accurate memory assessment (but can be expensive to compute this) + df.info(memory_usage='deep') + +- ``Index`` now has a ``fillna`` method (:issue:`10089`) + + .. ipython:: python + + pd.Index([1, np.nan, 3]).fillna(2) + +- Series of type ``category`` now make ``.str.<...>`` and ``.dt.<...>`` accessor methods / properties available, if the categories are of that type. (:issue:`10661`) + + .. ipython:: python + + s = pd.Series(list('aabb')).astype('category') + s + s.str.contains("a") + + date = pd.Series(pd.date_range('1/1/2015', periods=5)).astype('category') + date + date.dt.day + +- ``pivot_table`` now has a ``margins_name`` argument so you can use something other than the default of 'All' (:issue:`3335`) +- Implement export of ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`11411`) +- Pretty printing sets (e.g. in DataFrame cells) now uses set literal syntax (``{x, y}``) instead of + Legacy Python syntax (``set([x, y])``) (:issue:`11215`) +- Improve the error message in :func:`pandas.io.gbq.to_gbq` when a streaming insert fails (:issue:`11285`) + and when the DataFrame does not match the schema of the destination table (:issue:`11359`) + +.. _whatsnew_0171.api: + +API changes +~~~~~~~~~~~ + +- raise ``NotImplementedError`` in ``Index.shift`` for non-supported index types (:issue:`8038`) +- ``min`` and ``max`` reductions on ``datetime64`` and ``timedelta64`` dtyped series now + result in ``NaT`` and not ``nan`` (:issue:`11245`). +- Indexing with a null key will raise a ``TypeError``, instead of a ``ValueError`` (:issue:`11356`) +- ``Series.ptp`` will now ignore missing values by default (:issue:`11163`) + +.. _whatsnew_0171.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- The ``pandas.io.ga`` module which implements ``google-analytics`` support is deprecated and will be removed in a future version (:issue:`11308`) +- Deprecate the ``engine`` keyword in ``.to_csv()``, which will be removed in a future version (:issue:`11274`) + +.. _whatsnew_0171.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Checking monotonic-ness before sorting on an index (:issue:`11080`) +- ``Series.dropna`` performance improvement when its dtype can't contain ``NaN`` (:issue:`11159`) +- Release the GIL on most datetime field operations (e.g. ``DatetimeIndex.year``, ``Series.dt.year``), normalization, and conversion to and from ``Period``, ``DatetimeIndex.to_period`` and ``PeriodIndex.to_timestamp`` (:issue:`11263`) +- Release the GIL on some rolling algos: ``rolling_median``, ``rolling_mean``, ``rolling_max``, ``rolling_min``, ``rolling_var``, ``rolling_kurt``, ``rolling_skew`` (:issue:`11450`) +- Release the GIL when reading and parsing text files in ``read_csv``, ``read_table`` (:issue:`11272`) +- Improved performance of ``rolling_median`` (:issue:`11450`) +- Improved performance of ``to_excel`` (:issue:`11352`) +- Performance bug in repr of ``Categorical`` categories, which was rendering the strings before chopping them for display (:issue:`11305`) +- Performance improvement in ``Categorical.remove_unused_categories``, (:issue:`11643`). +- Improved performance of ``Series`` constructor with no data and ``DatetimeIndex`` (:issue:`11433`) +- Improved performance of ``shift``, ``cumprod``, and ``cumsum`` with groupby (:issue:`4095`) + +.. _whatsnew_0171.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- ``SparseArray.__iter__()`` now does not cause ``PendingDeprecationWarning`` in Python 3.5 (:issue:`11622`) +- Regression from 0.16.2 for output formatting of long floats/nan, restored in (:issue:`11302`) +- ``Series.sort_index()`` now correctly handles the ``inplace`` option (:issue:`11402`) +- Incorrectly distributed .c file in the build on ``PyPi`` when reading a csv of floats and passing ``na_values=`` would show an exception (:issue:`11374`) +- Bug in ``.to_latex()`` output broken when the index has a name (:issue:`10660`) +- Bug in ``HDFStore.append`` with strings whose encoded length exceeded the max unencoded length (:issue:`11234`) +- Bug in merging ``datetime64[ns, tz]`` dtypes (:issue:`11405`) +- Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`) +- Bug in using ``DataFrame.ix`` with a MultiIndex indexer (:issue:`11372`) +- Bug in ``date_range`` with ambiguous endpoints (:issue:`11626`) +- Prevent adding new attributes to the accessors ``.str``, ``.dt`` and ``.cat``. Retrieving such + a value was not possible, so error out on setting it. (:issue:`10673`) +- Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`) +- Bug in output formatting when using an index of ambiguous times (:issue:`11619`) +- Bug in comparisons of Series vs list-likes (:issue:`11339`) +- Bug in ``DataFrame.replace`` with a ``datetime64[ns, tz]`` and a non-compat to_replace (:issue:`11326`, :issue:`11153`) +- Bug in ``isnull`` where ``numpy.datetime64('NaT')`` in a ``numpy.array`` was not determined to be null(:issue:`11206`) +- Bug in list-like indexing with a mixed-integer Index (:issue:`11320`) +- Bug in ``pivot_table`` with ``margins=True`` when indexes are of ``Categorical`` dtype (:issue:`10993`) +- Bug in ``DataFrame.plot`` cannot use hex strings colors (:issue:`10299`) +- Regression in ``DataFrame.drop_duplicates`` from 0.16.2, causing incorrect results on integer values (:issue:`11376`) +- Bug in ``pd.eval`` where unary ops in a list error (:issue:`11235`) +- Bug in ``squeeze()`` with zero length arrays (:issue:`11230`, :issue:`8999`) +- Bug in ``describe()`` dropping column names for hierarchical indexes (:issue:`11517`) +- Bug in ``DataFrame.pct_change()`` not propagating ``axis`` keyword on ``.fillna`` method (:issue:`11150`) +- Bug in ``.to_csv()`` when a mix of integer and string column names are passed as the ``columns`` parameter (:issue:`11637`) +- Bug in indexing with a ``range``, (:issue:`11652`) +- Bug in inference of numpy scalars and preserving dtype when setting columns (:issue:`11638`) +- Bug in ``to_sql`` using unicode column names giving UnicodeEncodeError with (:issue:`11431`). +- Fix regression in setting of ``xticks`` in ``plot`` (:issue:`11529`). +- Bug in ``holiday.dates`` where observance rules could not be applied to holiday and doc enhancement (:issue:`11477`, :issue:`11533`) +- Fix plotting issues when having plain ``Axes`` instances instead of ``SubplotAxes`` (:issue:`11520`, :issue:`11556`). +- Bug in ``DataFrame.to_latex()`` produces an extra rule when ``header=False`` (:issue:`7124`) +- Bug in ``df.groupby(...).apply(func)`` when a func returns a ``Series`` containing a new datetimelike column (:issue:`11324`) +- Bug in ``pandas.json`` when file to load is big (:issue:`11344`) +- Bugs in ``to_excel`` with duplicate columns (:issue:`11007`, :issue:`10982`, :issue:`10970`) +- Fixed a bug that prevented the construction of an empty series of dtype ``datetime64[ns, tz]`` (:issue:`11245`). +- Bug in ``read_excel`` with MultiIndex containing integers (:issue:`11317`) +- Bug in ``to_excel`` with openpyxl 2.2+ and merging (:issue:`11408`) +- Bug in ``DataFrame.to_dict()`` produces a ``np.datetime64`` object instead of ``Timestamp`` when only datetime is present in data (:issue:`11327`) +- Bug in ``DataFrame.corr()`` raises exception when computes Kendall correlation for DataFrames with boolean and not boolean columns (:issue:`11560`) +- Bug in the link-time error caused by C ``inline`` functions on FreeBSD 10+ (with ``clang``) (:issue:`10510`) +- Bug in ``DataFrame.to_csv`` in passing through arguments for formatting ``MultiIndexes``, including ``date_format`` (:issue:`7791`) +- Bug in ``DataFrame.join()`` with ``how='right'`` producing a ``TypeError`` (:issue:`11519`) +- Bug in ``Series.quantile`` with empty list results has ``Index`` with ``object`` dtype (:issue:`11588`) +- Bug in ``pd.merge`` results in empty ``Int64Index`` rather than ``Index(dtype=object)`` when the merge result is empty (:issue:`11588`) +- Bug in ``Categorical.remove_unused_categories`` when having ``NaN`` values (:issue:`11599`) +- Bug in ``DataFrame.to_sparse()`` loses column names for MultiIndexes (:issue:`11600`) +- Bug in ``DataFrame.round()`` with non-unique column index producing a Fatal Python error (:issue:`11611`) +- Bug in ``DataFrame.round()`` with ``decimals`` being a non-unique indexed Series producing extra columns (:issue:`11618`) + + +.. _whatsnew_0.17.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.17.0..v0.17.1 diff --git a/doc/source/whatsnew/v0.18.0.rst b/doc/source/whatsnew/v0.18.0.rst new file mode 100644 index 00000000..d3f96d41 --- /dev/null +++ b/doc/source/whatsnew/v0.18.0.rst @@ -0,0 +1,1304 @@ +.. _whatsnew_0180: + +v0.18.0 (March 13, 2016) +------------------------ + +{{ header }} + + +This is a major release from 0.17.1 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +.. warning:: + + pandas >= 0.18.0 no longer supports compatibility with Python version 2.6 + and 3.3 (:issue:`7718`, :issue:`11273`) + +.. warning:: + + ``numexpr`` version 2.4.4 will now show a warning and not be used as a computation back-end for pandas because of some buggy behavior. This does not affect other versions (>= 2.1 and >= 2.4.6). (:issue:`12489`) + +Highlights include: + +- Moving and expanding window functions are now methods on Series and DataFrame, + similar to ``.groupby``, see :ref:`here `. +- Adding support for a ``RangeIndex`` as a specialized form of the ``Int64Index`` + for memory savings, see :ref:`here `. +- API breaking change to the ``.resample`` method to make it more ``.groupby`` + like, see :ref:`here `. +- Removal of support for positional indexing with floats, which was deprecated + since 0.14.0. This will now raise a ``TypeError``, see :ref:`here `. +- The ``.to_xarray()`` function has been added for compatibility with the + `xarray package `__, see :ref:`here `. +- The ``read_sas`` function has been enhanced to read ``sas7bdat`` files, see :ref:`here `. +- Addition of the :ref:`.str.extractall() method `, + and API changes to the :ref:`.str.extract() method ` + and :ref:`.str.cat() method `. +- ``pd.test()`` top-level nose test runner is available (:issue:`4327`). + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. contents:: What's new in v0.18.0 + :local: + :backlinks: none + +.. _whatsnew_0180.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0180.enhancements.moments: + +Window functions are now methods +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Window functions have been refactored to be methods on ``Series/DataFrame`` objects, rather than top-level functions, which are now deprecated. This allows these window-type functions, to have a similar API to that of ``.groupby``. See the full documentation :ref:`here ` (:issue:`11603`, :issue:`12373`) + + +.. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame({'A': range(10), 'B': np.random.randn(10)}) + df + +Previous behavior: + +.. code-block:: ipython + + In [8]: pd.rolling_mean(df, window=3) + FutureWarning: pd.rolling_mean is deprecated for DataFrame and will be removed in a future version, replace with + DataFrame.rolling(window=3,center=False).mean() + Out[8]: + A B + 0 NaN NaN + 1 NaN NaN + 2 1 0.237722 + 3 2 -0.023640 + 4 3 0.133155 + 5 4 -0.048693 + 6 5 0.342054 + 7 6 0.370076 + 8 7 0.079587 + 9 8 -0.954504 + +New behavior: + +.. ipython:: python + + r = df.rolling(window=3) + +These show a descriptive repr + +.. ipython:: python + + r +with tab-completion of available methods and properties. + +.. code-block:: ipython + + In [9]: r. # noqa E225, E999 + r.A r.agg r.apply r.count r.exclusions r.max r.median r.name r.skew r.sum + r.B r.aggregate r.corr r.cov r.kurt r.mean r.min r.quantile r.std r.var + +The methods operate on the ``Rolling`` object itself + +.. ipython:: python + + r.mean() + +They provide getitem accessors + +.. ipython:: python + + r['A'].mean() + +And multiple aggregations + +.. ipython:: python + + r.agg({'A': ['mean', 'std'], + 'B': ['mean', 'std']}) + +.. _whatsnew_0180.enhancements.rename: + +Changes to rename +^^^^^^^^^^^^^^^^^ + +``Series.rename`` and ``NDFrame.rename_axis`` can now take a scalar or list-like +argument for altering the Series or axis *name*, in addition to their old behaviors of altering labels. (:issue:`9494`, :issue:`11965`) + +.. ipython:: python + + s = pd.Series(np.random.randn(5)) + s.rename('newname') + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(5, 2)) + (df.rename_axis("indexname") + .rename_axis("columns_name", axis="columns")) + +The new functionality works well in method chains. Previously these methods only accepted functions or dicts mapping a *label* to a new label. +This continues to work as before for function or dict-like values. + + +.. _whatsnew_0180.enhancements.rangeindex: + +Range index +^^^^^^^^^^^ + +A ``RangeIndex`` has been added to the ``Int64Index`` sub-classes to support a memory saving alternative for common use cases. This has a similar implementation to the python ``range`` object (``xrange`` in python 2), in that it only stores the start, stop, and step values for the index. It will transparently interact with the user API, converting to ``Int64Index`` if needed. + +This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`, :issue:`12070`, :issue:`12071`, :issue:`12109`, :issue:`12888`) + +Previous behavior: + +.. code-block:: ipython + + In [3]: s = pd.Series(range(1000)) + + In [4]: s.index + Out[4]: + Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + ... + 990, 991, 992, 993, 994, 995, 996, 997, 998, 999], dtype='int64', length=1000) + + In [6]: s.index.nbytes + Out[6]: 8000 + + +New behavior: + +.. ipython:: python + + s = pd.Series(range(1000)) + s.index + s.index.nbytes + +.. _whatsnew_0180.enhancements.extract: + +Changes to str.extract +^^^^^^^^^^^^^^^^^^^^^^ + +The :ref:`.str.extract ` method takes a regular +expression with capture groups, finds the first match in each subject +string, and returns the contents of the capture groups +(:issue:`11386`). + +In v0.18.0, the ``expand`` argument was added to +``extract``. + +- ``expand=False``: it returns a ``Series``, ``Index``, or ``DataFrame``, depending on the subject and regular expression pattern (same behavior as pre-0.18.0). +- ``expand=True``: it always returns a ``DataFrame``, which is more consistent and less confusing from the perspective of a user. + +Currently the default is ``expand=None`` which gives a ``FutureWarning`` and uses ``expand=False``. To avoid this warning, please explicitly specify ``expand``. + +.. code-block:: ipython + + In [1]: pd.Series(['a1', 'b2', 'c3']).str.extract(r'[ab](\d)', expand=None) + FutureWarning: currently extract(expand=None) means expand=False (return Index/Series/DataFrame) + but in a future version of pandas this will be changed to expand=True (return DataFrame) + + Out[1]: + 0 1 + 1 2 + 2 NaN + dtype: object + +Extracting a regular expression with one group returns a Series if +``expand=False``. + +.. ipython:: python + + pd.Series(['a1', 'b2', 'c3']).str.extract(r'[ab](\d)', expand=False) + +It returns a ``DataFrame`` with one column if ``expand=True``. + +.. ipython:: python + + pd.Series(['a1', 'b2', 'c3']).str.extract(r'[ab](\d)', expand=True) + +Calling on an ``Index`` with a regex with exactly one capture group +returns an ``Index`` if ``expand=False``. + +.. ipython:: python + + s = pd.Series(["a1", "b2", "c3"], ["A11", "B22", "C33"]) + s.index + s.index.str.extract("(?P[a-zA-Z])", expand=False) + +It returns a ``DataFrame`` with one column if ``expand=True``. + +.. ipython:: python + + s.index.str.extract("(?P[a-zA-Z])", expand=True) + +Calling on an ``Index`` with a regex with more than one capture group +raises ``ValueError`` if ``expand=False``. + +.. code-block:: python + + >>> s.index.str.extract("(?P[a-zA-Z])([0-9]+)", expand=False) + ValueError: only one regex group is supported with Index + +It returns a ``DataFrame`` if ``expand=True``. + +.. ipython:: python + + s.index.str.extract("(?P[a-zA-Z])([0-9]+)", expand=True) + +In summary, ``extract(expand=True)`` always returns a ``DataFrame`` +with a row for every subject string, and a column for every capture +group. + +.. _whatsnew_0180.enhancements.extractall: + +Addition of str.extractall +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :ref:`.str.extractall ` method was added +(:issue:`11386`). Unlike ``extract``, which returns only the first +match. + +.. ipython:: python + + s = pd.Series(["a1a2", "b1", "c1"], ["A", "B", "C"]) + s + s.str.extract(r"(?P[ab])(?P\d)", expand=False) + +The ``extractall`` method returns all matches. + +.. ipython:: python + + s.str.extractall(r"(?P[ab])(?P\d)") + +.. _whatsnew_0180.enhancements.strcat: + +Changes to str.cat +^^^^^^^^^^^^^^^^^^ + +The method ``.str.cat()`` concatenates the members of a ``Series``. Before, if ``NaN`` values were present in the Series, calling ``.str.cat()`` on it would return ``NaN``, unlike the rest of the ``Series.str.*`` API. This behavior has been amended to ignore ``NaN`` values by default. (:issue:`11435`). + +A new, friendlier ``ValueError`` is added to protect against the mistake of supplying the ``sep`` as an arg, rather than as a kwarg. (:issue:`11334`). + +.. ipython:: python + + pd.Series(['a', 'b', np.nan, 'c']).str.cat(sep=' ') + pd.Series(['a', 'b', np.nan, 'c']).str.cat(sep=' ', na_rep='?') + +.. code-block:: ipython + + In [2]: pd.Series(['a', 'b', np.nan, 'c']).str.cat(' ') + ValueError: Did you mean to supply a `sep` keyword? + + +.. _whatsnew_0180.enhancements.rounding: + +Datetimelike rounding +^^^^^^^^^^^^^^^^^^^^^ + +``DatetimeIndex``, ``Timestamp``, ``TimedeltaIndex``, ``Timedelta`` have gained the ``.round()``, ``.floor()`` and ``.ceil()`` method for datetimelike rounding, flooring and ceiling. (:issue:`4314`, :issue:`11963`) + +Naive datetimes + +.. ipython:: python + + dr = pd.date_range('20130101 09:12:56.1234', periods=3) + dr + dr.round('s') + + # Timestamp scalar + dr[0] + dr[0].round('10s') + +Tz-aware are rounded, floored and ceiled in local times + +.. ipython:: python + + dr = dr.tz_localize('US/Eastern') + dr + dr.round('s') + +Timedeltas + +.. ipython:: python + + t = pd.timedelta_range('1 days 2 hr 13 min 45 us', periods=3, freq='d') + t + t.round('10min') + + # Timedelta scalar + t[0] + t[0].round('2h') + + +In addition, ``.round()``, ``.floor()`` and ``.ceil()`` will be available through the ``.dt`` accessor of ``Series``. + +.. ipython:: python + + s = pd.Series(dr) + s + s.dt.round('D') + +Formatting of integers in FloatIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Integers in ``FloatIndex``, e.g. 1., are now formatted with a decimal point and a ``0`` digit, e.g. ``1.0`` (:issue:`11713`) +This change not only affects the display to the console, but also the output of IO methods like ``.to_csv`` or ``.to_html``. + +Previous behavior: + +.. code-block:: ipython + + In [2]: s = pd.Series([1, 2, 3], index=np.arange(3.)) + + In [3]: s + Out[3]: + 0 1 + 1 2 + 2 3 + dtype: int64 + + In [4]: s.index + Out[4]: Float64Index([0.0, 1.0, 2.0], dtype='float64') + + In [5]: print(s.to_csv(path=None)) + 0,1 + 1,2 + 2,3 + + +New behavior: + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=np.arange(3.)) + s + s.index + print(s.to_csv(path_or_buf=None, header=False)) + +Changes to dtype assignment behaviors +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When a DataFrame's slice is updated with a new slice of the same dtype, the dtype of the DataFrame will now remain the same. (:issue:`10503`) + +Previous behavior: + +.. code-block:: ipython + + In [5]: df = pd.DataFrame({'a': [0, 1, 1], + 'b': pd.Series([100, 200, 300], dtype='uint32')}) + + In [7]: df.dtypes + Out[7]: + a int64 + b uint32 + dtype: object + + In [8]: ix = df['a'] == 1 + + In [9]: df.loc[ix, 'b'] = df.loc[ix, 'b'] + + In [11]: df.dtypes + Out[11]: + a int64 + b int64 + dtype: object + +New behavior: + +.. ipython:: python + + df = pd.DataFrame({'a': [0, 1, 1], + 'b': pd.Series([100, 200, 300], dtype='uint32')}) + df.dtypes + ix = df['a'] == 1 + df.loc[ix, 'b'] = df.loc[ix, 'b'] + df.dtypes + +When a DataFrame's integer slice is partially updated with a new slice of floats that could potentially be down-casted to integer without losing precision, the dtype of the slice will be set to float instead of integer. + +Previous behavior: + +.. code-block:: ipython + + In [4]: df = pd.DataFrame(np.array(range(1,10)).reshape(3,3), + columns=list('abc'), + index=[[4,4,8], [8,10,12]]) + + In [5]: df + Out[5]: + a b c + 4 8 1 2 3 + 10 4 5 6 + 8 12 7 8 9 + + In [7]: df.ix[4, 'c'] = np.array([0., 1.]) + + In [8]: df + Out[8]: + a b c + 4 8 1 2 0 + 10 4 5 1 + 8 12 7 8 9 + +New behavior: + +.. ipython:: python + + df = pd.DataFrame(np.array(range(1,10)).reshape(3,3), + columns=list('abc'), + index=[[4,4,8], [8,10,12]]) + df + df.loc[4, 'c'] = np.array([0., 1.]) + df + +.. _whatsnew_0180.enhancements.xarray: + +to_xarray +^^^^^^^^^ + +In a future version of pandas, we will be deprecating ``Panel`` and other > 2 ndim objects. In order to provide for continuity, +all ``NDFrame`` objects have gained the ``.to_xarray()`` method in order to convert to ``xarray`` objects, which has +a pandas-like interface for > 2 ndim. (:issue:`11972`) + +See the `xarray full-documentation here `__. + +.. code-block:: ipython + + In [1]: p = Panel(np.arange(2*3*4).reshape(2,3,4)) + + In [2]: p.to_xarray() + Out[2]: + + array([[[ 0, 1, 2, 3], + [ 4, 5, 6, 7], + [ 8, 9, 10, 11]], + + [[12, 13, 14, 15], + [16, 17, 18, 19], + [20, 21, 22, 23]]]) + Coordinates: + * items (items) int64 0 1 + * major_axis (major_axis) int64 0 1 2 + * minor_axis (minor_axis) int64 0 1 2 3 + +Latex representation +^^^^^^^^^^^^^^^^^^^^ + +``DataFrame`` has gained a ``._repr_latex_()`` method in order to allow for conversion to latex in a ipython/jupyter notebook using nbconvert. (:issue:`11778`) + +Note that this must be activated by setting the option ``pd.display.latex.repr=True`` (:issue:`12182`) + +For example, if you have a jupyter notebook you plan to convert to latex using nbconvert, place the statement ``pd.display.latex.repr=True`` in the first cell to have the contained DataFrame output also stored as latex. + +The options ``display.latex.escape`` and ``display.latex.longtable`` have also been added to the configuration and are used automatically by the ``to_latex`` +method. See the :ref:`available options docs ` for more info. + +.. _whatsnew_0180.enhancements.sas: + +``pd.read_sas()`` changes +^^^^^^^^^^^^^^^^^^^^^^^^^ + +``read_sas`` has gained the ability to read SAS7BDAT files, including compressed files. The files can be read in entirety, or incrementally. For full details see :ref:`here `. (:issue:`4052`) + +.. _whatsnew_0180.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Handle truncated floats in SAS xport files (:issue:`11713`) +- Added option to hide index in ``Series.to_string`` (:issue:`11729`) +- ``read_excel`` now supports s3 urls of the format ``s3://bucketname/filename`` (:issue:`11447`) +- add support for ``AWS_S3_HOST`` env variable when reading from s3 (:issue:`12198`) +- A simple version of ``Panel.round()`` is now implemented (:issue:`11763`) +- For Python 3.x, ``round(DataFrame)``, ``round(Series)``, ``round(Panel)`` will work (:issue:`11763`) +- ``sys.getsizeof(obj)`` returns the memory usage of a pandas object, including the + values it contains (:issue:`11597`) +- ``Series`` gained an ``is_unique`` attribute (:issue:`11946`) +- ``DataFrame.quantile`` and ``Series.quantile`` now accept ``interpolation`` keyword (:issue:`10174`). +- Added ``DataFrame.style.format`` for more flexible formatting of cell values (:issue:`11692`) +- ``DataFrame.select_dtypes`` now allows the ``np.float16`` type code (:issue:`11990`) +- ``pivot_table()`` now accepts most iterables for the ``values`` parameter (:issue:`12017`) +- Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`, :issue:`12572`). For further details see `here `__ +- ``HDFStore`` is now iterable: ``for k in store`` is equivalent to ``for k in store.keys()`` (:issue:`12221`). +- Add missing methods/fields to ``.dt`` for ``Period`` (:issue:`8848`) +- The entire code base has been ``PEP``-ified (:issue:`12096`) + +.. _whatsnew_0180.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- the leading white spaces have been removed from the output of ``.to_string(index=False)`` method (:issue:`11833`) +- the ``out`` parameter has been removed from the ``Series.round()`` method. (:issue:`11763`) +- ``DataFrame.round()`` leaves non-numeric columns unchanged in its return, rather than raises. (:issue:`11885`) +- ``DataFrame.head(0)`` and ``DataFrame.tail(0)`` return empty frames, rather than ``self``. (:issue:`11937`) +- ``Series.head(0)`` and ``Series.tail(0)`` return empty series, rather than ``self``. (:issue:`11937`) +- ``to_msgpack`` and ``read_msgpack`` encoding now defaults to ``'utf-8'``. (:issue:`12170`) +- the order of keyword arguments to text file parsing functions (``.read_csv()``, ``.read_table()``, ``.read_fwf()``) changed to group related arguments. (:issue:`11555`) +- ``NaTType.isoformat`` now returns the string ``'NaT`` to allow the result to + be passed to the constructor of ``Timestamp``. (:issue:`12300`) + +NaT and Timedelta operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``NaT`` and ``Timedelta`` have expanded arithmetic operations, which are extended to ``Series`` +arithmetic where applicable. Operations defined for ``datetime64[ns]`` or ``timedelta64[ns]`` +are now also defined for ``NaT`` (:issue:`11564`). + +``NaT`` now supports arithmetic operations with integers and floats. + +.. ipython:: python + + pd.NaT * 1 + pd.NaT * 1.5 + pd.NaT / 2 + pd.NaT * np.nan + +``NaT`` defines more arithmetic operations with ``datetime64[ns]`` and ``timedelta64[ns]``. + +.. ipython:: python + + pd.NaT / pd.NaT + pd.Timedelta('1s') / pd.NaT + +``NaT`` may represent either a ``datetime64[ns]`` null or a ``timedelta64[ns]`` null. +Given the ambiguity, it is treated as a ``timedelta64[ns]``, which allows more operations +to succeed. + +.. ipython:: python + + pd.NaT + pd.NaT + + # same as + pd.Timedelta('1s') + pd.Timedelta('1s') + +as opposed to + +.. code-block:: ipython + + In [3]: pd.Timestamp('19900315') + pd.Timestamp('19900315') + TypeError: unsupported operand type(s) for +: 'Timestamp' and 'Timestamp' + +However, when wrapped in a ``Series`` whose ``dtype`` is ``datetime64[ns]`` or ``timedelta64[ns]``, +the ``dtype`` information is respected. + +.. code-block:: ipython + + In [1]: pd.Series([pd.NaT], dtype='=0.18 / any Python | + +----------------------+------------------------+ + | 0.17 / Python 3 | >=0.18 / any Python | + +----------------------+------------------------+ + | 0.18 | >= 0.18 | + +----------------------+------------------------+ + + + 0.18.0 is backward-compatible for reading files packed by older versions, except for files packed with 0.17 in Python 2, in which case only they can only be unpacked in Python 2. + +Signature change for .rank +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Series.rank`` and ``DataFrame.rank`` now have the same signature (:issue:`11759`) + +Previous signature + +.. code-block:: ipython + + In [3]: pd.Series([0,1]).rank(method='average', na_option='keep', + ascending=True, pct=False) + Out[3]: + 0 1 + 1 2 + dtype: float64 + + In [4]: pd.DataFrame([0,1]).rank(axis=0, numeric_only=None, + method='average', na_option='keep', + ascending=True, pct=False) + Out[4]: + 0 + 0 1 + 1 2 + +New signature + +.. ipython:: python + + pd.Series([0,1]).rank(axis=0, method='average', numeric_only=None, + na_option='keep', ascending=True, pct=False) + pd.DataFrame([0,1]).rank(axis=0, method='average', numeric_only=None, + na_option='keep', ascending=True, pct=False) + + +Bug in QuarterBegin with n=0 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, the behavior of the QuarterBegin offset was inconsistent +depending on the date when the ``n`` parameter was 0. (:issue:`11406`) + +The general semantics of anchored offsets for ``n=0`` is to not move the date +when it is an anchor point (e.g., a quarter start date), and otherwise roll +forward to the next anchor point. + +.. ipython:: python + + d = pd.Timestamp('2014-02-01') + d + d + pd.offsets.QuarterBegin(n=0, startingMonth=2) + d + pd.offsets.QuarterBegin(n=0, startingMonth=1) + +For the ``QuarterBegin`` offset in previous versions, the date would be rolled +*backwards* if date was in the same month as the quarter start date. + +.. code-block:: ipython + + In [3]: d = pd.Timestamp('2014-02-15') + + In [4]: d + pd.offsets.QuarterBegin(n=0, startingMonth=2) + Out[4]: Timestamp('2014-02-01 00:00:00') + +This behavior has been corrected in version 0.18.0, which is consistent with +other anchored offsets like ``MonthBegin`` and ``YearBegin``. + +.. ipython:: python + + d = pd.Timestamp('2014-02-15') + d + pd.offsets.QuarterBegin(n=0, startingMonth=2) + +.. _whatsnew_0180.breaking.resample: + +Resample API +^^^^^^^^^^^^ + +Like the change in the window functions API :ref:`above `, ``.resample(...)`` is changing to have a more groupby-like API. (:issue:`11732`, :issue:`12702`, :issue:`12202`, :issue:`12332`, :issue:`12334`, :issue:`12348`, :issue:`12448`). + +.. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame(np.random.rand(10,4), + columns=list('ABCD'), + index=pd.date_range('2010-01-01 09:00:00', + periods=10, freq='s')) + df + + +**Previous API**: + +You would write a resampling operation that immediately evaluates. If a ``how`` parameter was not provided, it +would default to ``how='mean'``. + +.. code-block:: ipython + + In [6]: df.resample('2s') + Out[6]: + A B C D + 2010-01-01 09:00:00 0.485748 0.447351 0.357096 0.793615 + 2010-01-01 09:00:02 0.820801 0.794317 0.364034 0.531096 + 2010-01-01 09:00:04 0.433985 0.314582 0.424104 0.625733 + 2010-01-01 09:00:06 0.624988 0.609738 0.633165 0.612452 + 2010-01-01 09:00:08 0.510470 0.534317 0.573201 0.806949 + +You could also specify a ``how`` directly + +.. code-block:: ipython + + In [7]: df.resample('2s', how='sum') + Out[7]: + A B C D + 2010-01-01 09:00:00 0.971495 0.894701 0.714192 1.587231 + 2010-01-01 09:00:02 1.641602 1.588635 0.728068 1.062191 + 2010-01-01 09:00:04 0.867969 0.629165 0.848208 1.251465 + 2010-01-01 09:00:06 1.249976 1.219477 1.266330 1.224904 + 2010-01-01 09:00:08 1.020940 1.068634 1.146402 1.613897 + +**New API**: + +Now, you can write ``.resample(..)`` as a 2-stage operation like ``.groupby(...)``, which +yields a ``Resampler``. + +.. ipython:: python + :okwarning: + + r = df.resample('2s') + r + +Downsampling +"""""""""""" + +You can then use this object to perform operations. +These are downsampling operations (going from a higher frequency to a lower one). + +.. ipython:: python + + r.mean() + +.. ipython:: python + + r.sum() + +Furthermore, resample now supports ``getitem`` operations to perform the resample on specific columns. + +.. ipython:: python + + r[['A','C']].mean() + +and ``.aggregate`` type operations. + +.. ipython:: python + + r.agg({'A' : 'mean', 'B' : 'sum'}) + +These accessors can of course, be combined + +.. ipython:: python + + r[['A','B']].agg(['mean','sum']) + +Upsampling +"""""""""" + +.. currentmodule:: pandas.tseries.resample + +Upsampling operations take you from a lower frequency to a higher frequency. These are now +performed with the ``Resampler`` objects with :meth:`~Resampler.backfill`, +:meth:`~Resampler.ffill`, :meth:`~Resampler.fillna` and :meth:`~Resampler.asfreq` methods. + +.. ipython:: python + + s = pd.Series(np.arange(5, dtype='int64'), + index=pd.date_range('2010-01-01', periods=5, freq='Q')) + s + +Previously + +.. code-block:: ipython + + In [6]: s.resample('M', fill_method='ffill') + Out[6]: + 2010-03-31 0 + 2010-04-30 0 + 2010-05-31 0 + 2010-06-30 1 + 2010-07-31 1 + 2010-08-31 1 + 2010-09-30 2 + 2010-10-31 2 + 2010-11-30 2 + 2010-12-31 3 + 2011-01-31 3 + 2011-02-28 3 + 2011-03-31 4 + Freq: M, dtype: int64 + +New API + +.. ipython:: python + + s.resample('M').ffill() + +.. note:: + + In the new API, you can either downsample OR upsample. The prior implementation would allow you to pass an aggregator function (like ``mean``) even though you were upsampling, providing a bit of confusion. + +Previous API will work but with deprecations +"""""""""""""""""""""""""""""""""""""""""""" + +.. warning:: + + This new API for resample includes some internal changes for the prior-to-0.18.0 API, to work with a deprecation warning in most cases, as the resample operation returns a deferred object. We can intercept operations and just do what the (pre 0.18.0) API did (with a warning). Here is a typical use case: + + .. code-block:: ipython + + In [4]: r = df.resample('2s') + + In [6]: r*10 + pandas/tseries/resample.py:80: FutureWarning: .resample() is now a deferred operation + use .resample(...).mean() instead of .resample(...) + + Out[6]: + A B C D + 2010-01-01 09:00:00 4.857476 4.473507 3.570960 7.936154 + 2010-01-01 09:00:02 8.208011 7.943173 3.640340 5.310957 + 2010-01-01 09:00:04 4.339846 3.145823 4.241039 6.257326 + 2010-01-01 09:00:06 6.249881 6.097384 6.331650 6.124518 + 2010-01-01 09:00:08 5.104699 5.343172 5.732009 8.069486 + + However, getting and assignment operations directly on a ``Resampler`` will raise a ``ValueError``: + + .. code-block:: ipython + + In [7]: r.iloc[0] = 5 + ValueError: .resample() is now a deferred operation + use .resample(...).mean() instead of .resample(...) + + There is a situation where the new API can not perform all the operations when using original code. + This code is intending to resample every 2s, take the ``mean`` AND then take the ``min`` of those results. + + .. code-block:: ipython + + In [4]: df.resample('2s').min() + Out[4]: + A 0.433985 + B 0.314582 + C 0.357096 + D 0.531096 + dtype: float64 + + The new API will: + + .. ipython:: python + + df.resample('2s').min() + + The good news is the return dimensions will differ between the new API and the old API, so this should loudly raise + an exception. + + To replicate the original operation + + .. ipython:: python + + df.resample('2s').mean().min() + +Changes to eval +^^^^^^^^^^^^^^^ + +In prior versions, new columns assignments in an ``eval`` expression resulted +in an inplace change to the ``DataFrame``. (:issue:`9297`, :issue:`8664`, :issue:`10486`) + +.. ipython:: python + + df = pd.DataFrame({'a': np.linspace(0, 10, 5), 'b': range(5)}) + df + +.. ipython:: python + :suppress: + + df.eval('c = a + b', inplace=True) + +.. code-block:: ipython + + In [12]: df.eval('c = a + b') + FutureWarning: eval expressions containing an assignment currentlydefault to operating inplace. + This will change in a future version of pandas, use inplace=True to avoid this warning. + + In [13]: df + Out[13]: + a b c + 0 0.0 0 0.0 + 1 2.5 1 3.5 + 2 5.0 2 7.0 + 3 7.5 3 10.5 + 4 10.0 4 14.0 + +In version 0.18.0, a new ``inplace`` keyword was added to choose whether the +assignment should be done inplace or return a copy. + +.. ipython:: python + + df + df.eval('d = c - b', inplace=False) + df + df.eval('d = c - b', inplace=True) + df + +.. warning:: + + For backwards compatibility, ``inplace`` defaults to ``True`` if not specified. + This will change in a future version of pandas. If your code depends on an + inplace assignment you should update to explicitly set ``inplace=True`` + +The ``inplace`` keyword parameter was also added the ``query`` method. + +.. ipython:: python + + df.query('a > 5') + df.query('a > 5', inplace=True) + df + +.. warning:: + + Note that the default value for ``inplace`` in a ``query`` + is ``False``, which is consistent with prior versions. + +``eval`` has also been updated to allow multi-line expressions for multiple +assignments. These expressions will be evaluated one at a time in order. Only +assignments are valid for multi-line expressions. + +.. ipython:: python + + df + df.eval(""" + e = d + a + f = e - 22 + g = f / 2.0""", inplace=True) + df + + +.. _whatsnew_0180.api: + +Other API changes +^^^^^^^^^^^^^^^^^ +- ``DataFrame.between_time`` and ``Series.between_time`` now only parse a fixed set of time strings. Parsing of date strings is no longer supported and raises a ``ValueError``. (:issue:`11818`) + + .. ipython:: python + + s = pd.Series(range(10), pd.date_range('2015-01-01', freq='H', periods=10)) + s.between_time("7:00am", "9:00am") + + This will now raise. + + .. code-block:: ipython + + In [2]: s.between_time('20150101 07:00:00','20150101 09:00:00') + ValueError: Cannot convert arg ['20150101 07:00:00'] to a time. + +- ``.memory_usage()`` now includes values in the index, as does memory_usage in ``.info()`` (:issue:`11597`) +- ``DataFrame.to_latex()`` now supports non-ascii encodings (eg ``utf-8``) in Python 2 with the parameter ``encoding`` (:issue:`7061`) +- ``pandas.merge()`` and ``DataFrame.merge()`` will show a specific error message when trying to merge with an object that is not of type ``DataFrame`` or a subclass (:issue:`12081`) +- ``DataFrame.unstack`` and ``Series.unstack`` now take ``fill_value`` keyword to allow direct replacement of missing values when an unstack results in missing values in the resulting ``DataFrame``. As an added benefit, specifying ``fill_value`` will preserve the data type of the original stacked data. (:issue:`9746`) +- As part of the new API for :ref:`window functions ` and :ref:`resampling `, aggregation functions have been clarified, raising more informative error messages on invalid aggregations. (:issue:`9052`). A full set of examples are presented in :ref:`groupby `. +- Statistical functions for ``NDFrame`` objects (like ``sum(), mean(), min()``) will now raise if non-numpy-compatible arguments are passed in for ``**kwargs`` (:issue:`12301`) +- ``.to_latex`` and ``.to_html`` gain a ``decimal`` parameter like ``.to_csv``; the default is ``'.'`` (:issue:`12031`) +- More helpful error message when constructing a ``DataFrame`` with empty data but with indices (:issue:`8020`) +- ``.describe()`` will now properly handle bool dtype as a categorical (:issue:`6625`) +- More helpful error message with an invalid ``.transform`` with user defined input (:issue:`10165`) +- Exponentially weighted functions now allow specifying alpha directly (:issue:`10789`) and raise ``ValueError`` if parameters violate ``0 < alpha <= 1`` (:issue:`12492`) + +.. _whatsnew_0180.deprecations: + +Deprecations +^^^^^^^^^^^^ + +.. _whatsnew_0180.window_deprecations: + +- The functions ``pd.rolling_*``, ``pd.expanding_*``, and ``pd.ewm*`` are deprecated and replaced by the corresponding method call. Note that + the new suggested syntax includes all of the arguments (even if default) (:issue:`11603`) + + .. code-block:: ipython + + In [1]: s = pd.Series(range(3)) + + In [2]: pd.rolling_mean(s,window=2,min_periods=1) + FutureWarning: pd.rolling_mean is deprecated for Series and + will be removed in a future version, replace with + Series.rolling(min_periods=1,window=2,center=False).mean() + Out[2]: + 0 0.0 + 1 0.5 + 2 1.5 + dtype: float64 + + In [3]: pd.rolling_cov(s, s, window=2) + FutureWarning: pd.rolling_cov is deprecated for Series and + will be removed in a future version, replace with + Series.rolling(window=2).cov(other=) + Out[3]: + 0 NaN + 1 0.5 + 2 0.5 + dtype: float64 + +- The ``freq`` and ``how`` arguments to the ``.rolling``, ``.expanding``, and ``.ewm`` (new) functions are deprecated, and will be removed in a future version. You can simply resample the input prior to creating a window function. (:issue:`11603`). + + For example, instead of ``s.rolling(window=5,freq='D').max()`` to get the max value on a rolling 5 Day window, one could use ``s.resample('D').mean().rolling(window=5).max()``, which first resamples the data to daily data, then provides a rolling 5 day window. + +- ``pd.tseries.frequencies.get_offset_name`` function is deprecated. Use offset's ``.freqstr`` property as alternative (:issue:`11192`) +- ``pandas.stats.fama_macbeth`` routines are deprecated and will be removed in a future version (:issue:`6077`) +- ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var`` routines are deprecated and will be removed in a future version (:issue:`6077`) +- show a ``FutureWarning`` rather than a ``DeprecationWarning`` on using long-time deprecated syntax in ``HDFStore.select``, where the ``where`` clause is not a string-like (:issue:`12027`) + +- The ``pandas.options.display.mpl_style`` configuration has been deprecated + and will be removed in a future version of pandas. This functionality + is better handled by matplotlib's `style sheets`_ (:issue:`11783`). + + +.. _style sheets: http://matplotlib.org/users/style_sheets.html + +.. _whatsnew_0180.float_indexers: + +Removal of deprecated float indexers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In :issue:`4892` indexing with floating point numbers on a non-``Float64Index`` was deprecated (in version 0.14.0). +In 0.18.0, this deprecation warning is removed and these will now raise a ``TypeError``. (:issue:`12165`, :issue:`12333`) + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=[4, 5, 6]) + s + s2 = pd.Series([1, 2, 3], index=list('abc')) + s2 + +Previous behavior: + +.. code-block:: ipython + + # this is label indexing + In [2]: s[5.0] + FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point + Out[2]: 2 + + # this is positional indexing + In [3]: s.iloc[1.0] + FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point + Out[3]: 2 + + # this is label indexing + In [4]: s.loc[5.0] + FutureWarning: scalar indexers for index type Int64Index should be integers and not floating point + Out[4]: 2 + + # .ix would coerce 1.0 to the positional 1, and index + In [5]: s2.ix[1.0] = 10 + FutureWarning: scalar indexers for index type Index should be integers and not floating point + + In [6]: s2 + Out[6]: + a 1 + b 10 + c 3 + dtype: int64 + +New behavior: + +For iloc, getting & setting via a float scalar will always raise. + +.. code-block:: ipython + + In [3]: s.iloc[2.0] + TypeError: cannot do label indexing on with these indexers [2.0] of + +Other indexers will coerce to a like integer for both getting and setting. The ``FutureWarning`` has been dropped for ``.loc``, ``.ix`` and ``[]``. + +.. ipython:: python + + s[5.0] + s.loc[5.0] + +and setting + +.. ipython:: python + + s_copy = s.copy() + s_copy[5.0] = 10 + s_copy + s_copy = s.copy() + s_copy.loc[5.0] = 10 + s_copy + +Positional setting with ``.ix`` and a float indexer will ADD this value to the index, rather than previously setting the value by position. + +.. code-block:: ipython + + In [3]: s2.ix[1.0] = 10 + In [4]: s2 + Out[4]: + a 1 + b 2 + c 3 + 1.0 10 + dtype: int64 + +Slicing will also coerce integer-like floats to integers for a non-``Float64Index``. + +.. ipython:: python + + s.loc[5.0:6] + +Note that for floats that are NOT coercible to ints, the label based bounds will be excluded + +.. ipython:: python + + s.loc[5.1:6] + +Float indexing on a ``Float64Index`` is unchanged. + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=np.arange(3.)) + s[1.0] + s[1.0:2.5] + +.. _whatsnew_0180.prior_deprecations: + +Removal of prior version deprecations/changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Removal of ``rolling_corr_pairwise`` in favor of ``.rolling().corr(pairwise=True)`` (:issue:`4950`) +- Removal of ``expanding_corr_pairwise`` in favor of ``.expanding().corr(pairwise=True)`` (:issue:`4950`) +- Removal of ``DataMatrix`` module. This was not imported into the pandas namespace in any event (:issue:`12111`) +- Removal of ``cols`` keyword in favor of ``subset`` in ``DataFrame.duplicated()`` and ``DataFrame.drop_duplicates()`` (:issue:`6680`) +- Removal of the ``read_frame`` and ``frame_query`` (both aliases for ``pd.read_sql``) + and ``write_frame`` (alias of ``to_sql``) functions in the ``pd.io.sql`` namespace, + deprecated since 0.14.0 (:issue:`6292`). +- Removal of the ``order`` keyword from ``.factorize()`` (:issue:`6930`) + +.. _whatsnew_0180.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of ``andrews_curves`` (:issue:`11534`) +- Improved huge ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex``'s ops performance including ``NaT`` (:issue:`10277`) +- Improved performance of ``pandas.concat`` (:issue:`11958`) +- Improved performance of ``StataReader`` (:issue:`11591`) +- Improved performance in construction of ``Categoricals`` with ``Series`` of datetimes containing ``NaT`` (:issue:`12077`) + + +- Improved performance of ISO 8601 date parsing for dates without separators (:issue:`11899`), leading zeros (:issue:`11871`) and with white space preceding the time zone (:issue:`9714`) + + + + +.. _whatsnew_0180.bug_fixes: + +Bug Fixes +~~~~~~~~~ + +- Bug in ``GroupBy.size`` when data-frame is empty. (:issue:`11699`) +- Bug in ``Period.end_time`` when a multiple of time period is requested (:issue:`11738`) +- Regression in ``.clip`` with tz-aware datetimes (:issue:`11838`) +- Bug in ``date_range`` when the boundaries fell on the frequency (:issue:`11804`, :issue:`12409`) +- Bug in consistency of passing nested dicts to ``.groupby(...).agg(...)`` (:issue:`9052`) +- Accept unicode in ``Timedelta`` constructor (:issue:`11995`) +- Bug in value label reading for ``StataReader`` when reading incrementally (:issue:`12014`) +- Bug in vectorized ``DateOffset`` when ``n`` parameter is ``0`` (:issue:`11370`) +- Compat for numpy 1.11 w.r.t. ``NaT`` comparison changes (:issue:`12049`) +- Bug in ``read_csv`` when reading from a ``StringIO`` in threads (:issue:`11790`) +- Bug in not treating ``NaT`` as a missing value in datetimelikes when factorizing & with ``Categoricals`` (:issue:`12077`) +- Bug in getitem when the values of a ``Series`` were tz-aware (:issue:`12089`) +- Bug in ``Series.str.get_dummies`` when one of the variables was 'name' (:issue:`12180`) +- Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`, :issue:`11755`, :issue:`12217`) +- Bug in ``pd.read_stata`` with version <= 108 files (:issue:`12232`) +- Bug in ``Series.resample`` using a frequency of ``Nano`` when the index is a ``DatetimeIndex`` and contains non-zero nanosecond parts (:issue:`12037`) +- Bug in resampling with ``.nunique`` and a sparse index (:issue:`12352`) +- Removed some compiler warnings (:issue:`12471`) +- Work around compat issues with ``boto`` in python 3.5 (:issue:`11915`) +- Bug in ``NaT`` subtraction from ``Timestamp`` or ``DatetimeIndex`` with timezones (:issue:`11718`) +- Bug in subtraction of ``Series`` of a single tz-aware ``Timestamp`` (:issue:`12290`) +- Use compat iterators in PY2 to support ``.next()`` (:issue:`12299`) +- Bug in ``Timedelta.round`` with negative values (:issue:`11690`) +- Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`) +- Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`) +- Bug in ``.copy`` of datetime tz-aware objects (:issue:`11794`) +- Bug in ``Series.apply`` and ``Series.map`` where ``timedelta64`` was not boxed (:issue:`11349`) +- Bug in ``DataFrame.set_index()`` with tz-aware ``Series`` (:issue:`12358`) + + + +- Bug in subclasses of ``DataFrame`` where ``AttributeError`` did not propagate (:issue:`11808`) +- Bug groupby on tz-aware data where selection not returning ``Timestamp`` (:issue:`11616`) +- Bug in ``pd.read_clipboard`` and ``pd.to_clipboard`` functions not supporting Unicode; upgrade included ``pyperclip`` to v1.5.15 (:issue:`9263`) +- Bug in ``DataFrame.query`` containing an assignment (:issue:`8664`) + +- Bug in ``from_msgpack`` where ``__contains__()`` fails for columns of the unpacked ``DataFrame``, if the ``DataFrame`` has object columns. (:issue:`11880`) +- Bug in ``.resample`` on categorical data with ``TimedeltaIndex`` (:issue:`12169`) + + +- Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`) +- Bug in ``Index`` creation from ``Timestamp`` with mixed tz coerces to UTC (:issue:`11488`) +- Bug in ``to_numeric`` where it does not raise if input is more than one dimension (:issue:`11776`) +- Bug in parsing timezone offset strings with non-zero minutes (:issue:`11708`) +- Bug in ``df.plot`` using incorrect colors for bar plots under matplotlib 1.5+ (:issue:`11614`) +- Bug in the ``groupby`` ``plot`` method when using keyword arguments (:issue:`11805`). +- Bug in ``DataFrame.duplicated`` and ``drop_duplicates`` causing spurious matches when setting ``keep=False`` (:issue:`11864`) +- Bug in ``.loc`` result with duplicated key may have ``Index`` with incorrect dtype (:issue:`11497`) +- Bug in ``pd.rolling_median`` where memory allocation failed even with sufficient memory (:issue:`11696`) +- Bug in ``DataFrame.style`` with spurious zeros (:issue:`12134`) +- Bug in ``DataFrame.style`` with integer columns not starting at 0 (:issue:`12125`) +- Bug in ``.style.bar`` may not rendered properly using specific browser (:issue:`11678`) +- Bug in rich comparison of ``Timedelta`` with a ``numpy.array`` of ``Timedelta`` that caused an infinite recursion (:issue:`11835`) +- Bug in ``DataFrame.round`` dropping column index name (:issue:`11986`) +- Bug in ``df.replace`` while replacing value in mixed dtype ``Dataframe`` (:issue:`11698`) +- Bug in ``Index`` prevents copying name of passed ``Index``, when a new name is not provided (:issue:`11193`) +- Bug in ``read_excel`` failing to read any non-empty sheets when empty sheets exist and ``sheetname=None`` (:issue:`11711`) +- Bug in ``read_excel`` failing to raise ``NotImplemented`` error when keywords ``parse_dates`` and ``date_parser`` are provided (:issue:`11544`) +- Bug in ``read_sql`` with ``pymysql`` connections failing to return chunked data (:issue:`11522`) +- Bug in ``.to_csv`` ignoring formatting parameters ``decimal``, ``na_rep``, ``float_format`` for float indexes (:issue:`11553`) +- Bug in ``Int64Index`` and ``Float64Index`` preventing the use of the modulo operator (:issue:`9244`) +- Bug in ``MultiIndex.drop`` for not lexsorted MultiIndexes (:issue:`12078`) + +- Bug in ``DataFrame`` when masking an empty ``DataFrame`` (:issue:`11859`) + + +- Bug in ``.plot`` potentially modifying the ``colors`` input when the number of columns didn't match the number of series provided (:issue:`12039`). +- Bug in ``Series.plot`` failing when index has a ``CustomBusinessDay`` frequency (:issue:`7222`). +- Bug in ``.to_sql`` for ``datetime.time`` values with sqlite fallback (:issue:`8341`) +- Bug in ``read_excel`` failing to read data with one column when ``squeeze=True`` (:issue:`12157`) +- Bug in ``read_excel`` failing to read one empty column (:issue:`12292`, :issue:`9002`) +- Bug in ``.groupby`` where a ``KeyError`` was not raised for a wrong column if there was only one row in the dataframe (:issue:`11741`) +- Bug in ``.read_csv`` with dtype specified on empty data producing an error (:issue:`12048`) +- Bug in ``.read_csv`` where strings like ``'2E'`` are treated as valid floats (:issue:`12237`) +- Bug in building *pandas* with debugging symbols (:issue:`12123`) + + +- Removed ``millisecond`` property of ``DatetimeIndex``. This would always raise a ``ValueError`` (:issue:`12019`). +- Bug in ``Series`` constructor with read-only data (:issue:`11502`) +- Removed ``pandas._testing.choice()``. Should use ``np.random.choice()``, instead. (:issue:`12386`) +- Bug in ``.loc`` setitem indexer preventing the use of a TZ-aware DatetimeIndex (:issue:`12050`) +- Bug in ``.style`` indexes and MultiIndexes not appearing (:issue:`11655`) +- Bug in ``to_msgpack`` and ``from_msgpack`` which did not correctly serialize or deserialize ``NaT`` (:issue:`12307`). +- Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`) +- Bug in ``Timestamp`` constructor where microsecond resolution was lost if HHMMSS were not separated with ':' (:issue:`10041`) +- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`) + +- Bug in ``crosstab`` where arguments with non-overlapping indexes would return a ``KeyError`` (:issue:`10291`) + +- Bug in ``DataFrame.apply`` in which reduction was not being prevented for cases in which ``dtype`` was not a numpy dtype (:issue:`12244`) +- Bug when initializing categorical series with a scalar value. (:issue:`12336`) +- Bug when specifying a UTC ``DatetimeIndex`` by setting ``utc=True`` in ``.to_datetime`` (:issue:`11934`) +- Bug when increasing the buffer size of CSV reader in ``read_csv`` (:issue:`12494`) +- Bug when setting columns of a ``DataFrame`` with duplicate column names (:issue:`12344`) + + +.. _whatsnew_0.18.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.17.1..v0.18.0 diff --git a/doc/source/whatsnew/v0.18.1.rst b/doc/source/whatsnew/v0.18.1.rst new file mode 100644 index 00000000..f786ce51 --- /dev/null +++ b/doc/source/whatsnew/v0.18.1.rst @@ -0,0 +1,713 @@ +.. _whatsnew_0181: + +v0.18.1 (May 3, 2016) +--------------------- + +{{ header }} + + +This is a minor bug-fix release from 0.18.0 and includes a large number of +bug fixes along with several new features, enhancements, and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + +- ``.groupby(...)`` has been enhanced to provide convenient syntax when working with ``.rolling(..)``, ``.expanding(..)`` and ``.resample(..)`` per group, see :ref:`here ` +- ``pd.to_datetime()`` has gained the ability to assemble dates from a ``DataFrame``, see :ref:`here ` +- Method chaining improvements, see :ref:`here `. +- Custom business hour offset, see :ref:`here `. +- Many bug fixes in the handling of ``sparse``, see :ref:`here ` +- Expanded the :ref:`Tutorials section ` with a feature on modern pandas, courtesy of `@TomAugsburger `__. (:issue:`13045`). + + +.. contents:: What's new in v0.18.1 + :local: + :backlinks: none + +.. _whatsnew_0181.new_features: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0181.enhancements.custombusinesshour: + +Custom business hour +^^^^^^^^^^^^^^^^^^^^ + +The ``CustomBusinessHour`` is a mixture of ``BusinessHour`` and ``CustomBusinessDay`` which +allows you to specify arbitrary holidays. For details, +see :ref:`Custom Business Hour ` (:issue:`11514`) + +.. ipython:: python + + from pandas.tseries.offsets import CustomBusinessHour + from pandas.tseries.holiday import USFederalHolidayCalendar + bhour_us = CustomBusinessHour(calendar=USFederalHolidayCalendar()) + +Friday before MLK Day + +.. ipython:: python + + import datetime + dt = datetime.datetime(2014, 1, 17, 15) + + dt + bhour_us + +Tuesday after MLK Day (Monday is skipped because it's a holiday) + +.. ipython:: python + + dt + bhour_us * 2 + +.. _whatsnew_0181.deferred_ops: + +``.groupby(..)`` syntax with window and resample operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``.groupby(...)`` has been enhanced to provide convenient syntax when working with ``.rolling(..)``, ``.expanding(..)`` and ``.resample(..)`` per group, see (:issue:`12486`, :issue:`12738`). + +You can now use ``.rolling(..)`` and ``.expanding(..)`` as methods on groupbys. These return another deferred object (similar to what ``.rolling()`` and ``.expanding()`` do on ungrouped pandas objects). You can then operate on these ``RollingGroupby`` objects in a similar manner. + +Previously you would have to do this to get a rolling window mean per-group: + +.. ipython:: python + + df = pd.DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8, + 'B': np.arange(40)}) + df + +.. ipython:: python + + df.groupby('A').apply(lambda x: x.rolling(4).B.mean()) + +Now you can do: + +.. ipython:: python + + df.groupby('A').rolling(4).B.mean() + +For ``.resample(..)`` type of operations, previously you would have to: + +.. ipython:: python + + df = pd.DataFrame({'date': pd.date_range(start='2016-01-01', + periods=4, + freq='W'), + 'group': [1, 1, 2, 2], + 'val': [5, 6, 7, 8]}).set_index('date') + + df + +.. ipython:: python + + df.groupby('group').apply(lambda x: x.resample('1D').ffill()) + +Now you can do: + +.. ipython:: python + + df.groupby('group').resample('1D').ffill() + +.. _whatsnew_0181.enhancements.method_chain: + +Method chaining improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following methods / indexers now accept a ``callable``. It is intended to make +these more useful in method chains, see the :ref:`documentation `. +(:issue:`11485`, :issue:`12533`) + +- ``.where()`` and ``.mask()`` +- ``.loc[]``, ``iloc[]`` and ``.ix[]`` +- ``[]`` indexing + +``.where()`` and ``.mask()`` +"""""""""""""""""""""""""""" + +These can accept a callable for the condition and ``other`` +arguments. + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': [4, 5, 6], + 'C': [7, 8, 9]}) + df.where(lambda x: x > 4, lambda x: x + 10) + +``.loc[]``, ``.iloc[]``, ``.ix[]`` +"""""""""""""""""""""""""""""""""" + +These can accept a callable, and a tuple of callable as a slicer. The callable +can return a valid boolean indexer or anything which is valid for these indexer's input. + +.. ipython:: python + + # callable returns bool indexer + df.loc[lambda x: x.A >= 2, lambda x: x.sum() > 10] + + # callable returns list of labels + df.loc[lambda x: [1, 2], lambda x: ['A', 'B']] + +``[]`` indexing +""""""""""""""" + +Finally, you can use a callable in ``[]`` indexing of Series, DataFrame and Panel. +The callable must return a valid input for ``[]`` indexing depending on its +class and index type. + +.. ipython:: python + + df[lambda x: 'A'] + +Using these methods / indexers, you can chain data selection operations +without using temporary variable. + +.. ipython:: python + + bb = pd.read_csv('data/baseball.csv', index_col='id') + (bb.groupby(['year', 'team']) + .sum() + .loc[lambda df: df.r > 100]) + +.. _whatsnew_0181.partial_string_indexing: + +Partial string indexing on ``DateTimeIndex`` when part of a ``MultiIndex`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Partial string indexing now matches on ``DateTimeIndex`` when part of a ``MultiIndex`` (:issue:`10331`) + +.. ipython:: python + + dft2 = pd.DataFrame( + np.random.randn(20, 1), + columns=['A'], + index=pd.MultiIndex.from_product([pd.date_range('20130101', + periods=10, + freq='12H'), + ['a', 'b']])) + dft2 + dft2.loc['2013-01-05'] + +On other levels + +.. ipython:: python + + idx = pd.IndexSlice + dft2 = dft2.swaplevel(0, 1).sort_index() + dft2 + dft2.loc[idx[:, '2013-01-05'], :] + +.. _whatsnew_0181.enhancements.assembling: + +Assembling datetimes +^^^^^^^^^^^^^^^^^^^^ + +``pd.to_datetime()`` has gained the ability to assemble datetimes from a passed in ``DataFrame`` or a dict. (:issue:`8158`). + +.. ipython:: python + + df = pd.DataFrame({'year': [2015, 2016], + 'month': [2, 3], + 'day': [4, 5], + 'hour': [2, 3]}) + df + +Assembling using the passed frame. + +.. ipython:: python + + pd.to_datetime(df) + +You can pass only the columns that you need to assemble. + +.. ipython:: python + + pd.to_datetime(df[['year', 'month', 'day']]) + +.. _whatsnew_0181.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- ``pd.read_csv()`` now supports ``delim_whitespace=True`` for the Python engine (:issue:`12958`) +- ``pd.read_csv()`` now supports opening ZIP files that contains a single CSV, via extension inference or explicit ``compression='zip'`` (:issue:`12175`) +- ``pd.read_csv()`` now supports opening files using xz compression, via extension inference or explicit ``compression='xz'`` is specified; ``xz`` compressions is also supported by ``DataFrame.to_csv`` in the same way (:issue:`11852`) +- ``pd.read_msgpack()`` now always gives writeable ndarrays even when compression is used (:issue:`12359`). +- ``pd.read_msgpack()`` now supports serializing and de-serializing categoricals with msgpack (:issue:`12573`) +- ``.to_json()`` now supports ``NDFrames`` that contain categorical and sparse data (:issue:`10778`) +- ``interpolate()`` now supports ``method='akima'`` (:issue:`7588`). +- ``pd.read_excel()`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`) +- Added ``.weekday_name`` property as a component to ``DatetimeIndex`` and the ``.dt`` accessor. (:issue:`11128`) + +- ``Index.take`` now handles ``allow_fill`` and ``fill_value`` consistently (:issue:`12631`) + + .. ipython:: python + + idx = pd.Index([1., 2., 3., 4.], dtype='float') + + # default, allow_fill=True, fill_value=None + idx.take([2, -1]) + idx.take([2, -1], fill_value=True) + +- ``Index`` now supports ``.str.get_dummies()`` which returns ``MultiIndex``, see :ref:`Creating Indicator Variables ` (:issue:`10008`, :issue:`10103`) + + .. ipython:: python + + idx = pd.Index(['a|b', 'a|c', 'b|c']) + idx.str.get_dummies('|') + + +- ``pd.crosstab()`` has gained a ``normalize`` argument for normalizing frequency tables (:issue:`12569`). Examples in the updated docs :ref:`here `. +- ``.resample(..).interpolate()`` is now supported (:issue:`12925`) +- ``.isin()`` now accepts passed ``sets`` (:issue:`12988`) + +.. _whatsnew_0181.sparse: + +Sparse changes +~~~~~~~~~~~~~~ + +These changes conform sparse handling to return the correct types and work to make a smoother experience with indexing. + +``SparseArray.take`` now returns a scalar for scalar input, ``SparseArray`` for others. Furthermore, it handles a negative indexer with the same rule as ``Index`` (:issue:`10560`, :issue:`12796`) + +.. code-block:: python + + s = pd.SparseArray([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) + s.take(0) + s.take([1, 2, 3]) + +- Bug in ``SparseSeries[]`` indexing with ``Ellipsis`` raises ``KeyError`` (:issue:`9467`) +- Bug in ``SparseArray[]`` indexing with tuples are not handled properly (:issue:`12966`) +- Bug in ``SparseSeries.loc[]`` with list-like input raises ``TypeError`` (:issue:`10560`) +- Bug in ``SparseSeries.iloc[]`` with scalar input may raise ``IndexError`` (:issue:`10560`) +- Bug in ``SparseSeries.loc[]``, ``.iloc[]`` with ``slice`` returns ``SparseArray``, rather than ``SparseSeries`` (:issue:`10560`) +- Bug in ``SparseDataFrame.loc[]``, ``.iloc[]`` may results in dense ``Series``, rather than ``SparseSeries`` (:issue:`12787`) +- Bug in ``SparseArray`` addition ignores ``fill_value`` of right hand side (:issue:`12910`) +- Bug in ``SparseArray`` mod raises ``AttributeError`` (:issue:`12910`) +- Bug in ``SparseArray`` pow calculates ``1 ** np.nan`` as ``np.nan`` which must be 1 (:issue:`12910`) +- Bug in ``SparseArray`` comparison output may incorrect result or raise ``ValueError`` (:issue:`12971`) +- Bug in ``SparseSeries.__repr__`` raises ``TypeError`` when it is longer than ``max_rows`` (:issue:`10560`) +- Bug in ``SparseSeries.shape`` ignores ``fill_value`` (:issue:`10452`) +- Bug in ``SparseSeries`` and ``SparseArray`` may have different ``dtype`` from its dense values (:issue:`12908`) +- Bug in ``SparseSeries.reindex`` incorrectly handle ``fill_value`` (:issue:`12797`) +- Bug in ``SparseArray.to_frame()`` results in ``DataFrame``, rather than ``SparseDataFrame`` (:issue:`9850`) +- Bug in ``SparseSeries.value_counts()`` does not count ``fill_value`` (:issue:`6749`) +- Bug in ``SparseArray.to_dense()`` does not preserve ``dtype`` (:issue:`10648`) +- Bug in ``SparseArray.to_dense()`` incorrectly handle ``fill_value`` (:issue:`12797`) +- Bug in ``pd.concat()`` of ``SparseSeries`` results in dense (:issue:`10536`) +- Bug in ``pd.concat()`` of ``SparseDataFrame`` incorrectly handle ``fill_value`` (:issue:`9765`) +- Bug in ``pd.concat()`` of ``SparseDataFrame`` may raise ``AttributeError`` (:issue:`12174`) +- Bug in ``SparseArray.shift()`` may raise ``NameError`` or ``TypeError`` (:issue:`12908`) + +.. _whatsnew_0181.api: + +API changes +~~~~~~~~~~~ + +.. _whatsnew_0181.api.groubynth: + +``.groupby(..).nth()`` changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The index in ``.groupby(..).nth()`` output is now more consistent when the ``as_index`` argument is passed (:issue:`11039`): + +.. ipython:: python + + df = pd.DataFrame({'A': ['a', 'b', 'a'], + 'B': [1, 2, 3]}) + df + +Previous behavior: + +.. code-block:: ipython + + In [3]: df.groupby('A', as_index=True)['B'].nth(0) + Out[3]: + 0 1 + 1 2 + Name: B, dtype: int64 + + In [4]: df.groupby('A', as_index=False)['B'].nth(0) + Out[4]: + 0 1 + 1 2 + Name: B, dtype: int64 + +New behavior: + +.. ipython:: python + + df.groupby('A', as_index=True)['B'].nth(0) + df.groupby('A', as_index=False)['B'].nth(0) + +Furthermore, previously, a ``.groupby`` would always sort, regardless if ``sort=False`` was passed with ``.nth()``. + +.. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame(np.random.randn(100, 2), columns=['a', 'b']) + df['c'] = np.random.randint(0, 4, 100) + +Previous behavior: + +.. code-block:: ipython + + In [4]: df.groupby('c', sort=True).nth(1) + Out[4]: + a b + c + 0 -0.334077 0.002118 + 1 0.036142 -2.074978 + 2 -0.720589 0.887163 + 3 0.859588 -0.636524 + + In [5]: df.groupby('c', sort=False).nth(1) + Out[5]: + a b + c + 0 -0.334077 0.002118 + 1 0.036142 -2.074978 + 2 -0.720589 0.887163 + 3 0.859588 -0.636524 + +New behavior: + +.. ipython:: python + + df.groupby('c', sort=True).nth(1) + df.groupby('c', sort=False).nth(1) + + +.. _whatsnew_0181.numpy_compatibility: + +numpy function compatibility +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Compatibility between pandas array-like methods (e.g. ``sum`` and ``take``) and their ``numpy`` +counterparts has been greatly increased by augmenting the signatures of the ``pandas`` methods so +as to accept arguments that can be passed in from ``numpy``, even if they are not necessarily +used in the ``pandas`` implementation (:issue:`12644`, :issue:`12638`, :issue:`12687`) + +- ``.searchsorted()`` for ``Index`` and ``TimedeltaIndex`` now accept a ``sorter`` argument to maintain compatibility with numpy's ``searchsorted`` function (:issue:`12238`) +- Bug in numpy compatibility of ``np.round()`` on a ``Series`` (:issue:`12600`) + +An example of this signature augmentation is illustrated below: + +.. code-block:: python + + sp = pd.SparseDataFrame([1, 2, 3]) + sp + +Previous behaviour: + +.. code-block:: ipython + + In [2]: np.cumsum(sp, axis=0) + ... + TypeError: cumsum() takes at most 2 arguments (4 given) + +New behaviour: + +.. code-block:: python + + np.cumsum(sp, axis=0) + +.. _whatsnew_0181.apply_resample: + +Using ``.apply`` on groupby resampling +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Using ``apply`` on resampling groupby operations (using a ``pd.TimeGrouper``) now has the same output types as similar ``apply`` calls on other groupby operations. (:issue:`11742`). + +.. ipython:: python + + df = pd.DataFrame({'date': pd.to_datetime(['10/10/2000', '11/10/2000']), + 'value': [10, 13]}) + df + +Previous behavior: + +.. code-block:: ipython + + In [1]: df.groupby(pd.TimeGrouper(key='date', + ...: freq='M')).apply(lambda x: x.value.sum()) + Out[1]: + ... + TypeError: cannot concatenate a non-NDFrame object + + # Output is a Series + In [2]: df.groupby(pd.TimeGrouper(key='date', + ...: freq='M')).apply(lambda x: x[['value']].sum()) + Out[2]: + date + 2000-10-31 value 10 + 2000-11-30 value 13 + dtype: int64 + +New behavior: + +.. code-block:: ipython + + # Output is a Series + In [55]: df.groupby(pd.TimeGrouper(key='date', + ...: freq='M')).apply(lambda x: x.value.sum()) + Out[55]: + date + 2000-10-31 10 + 2000-11-30 13 + Freq: M, dtype: int64 + + # Output is a DataFrame + In [56]: df.groupby(pd.TimeGrouper(key='date', + ...: freq='M')).apply(lambda x: x[['value']].sum()) + Out[56]: + value + date + 2000-10-31 10 + 2000-11-30 13 + +.. _whatsnew_0181.read_csv_exceptions: + +Changes in ``read_csv`` exceptions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +In order to standardize the ``read_csv`` API for both the ``c`` and ``python`` engines, both will now raise an +``EmptyDataError``, a subclass of ``ValueError``, in response to empty columns or header (:issue:`12493`, :issue:`12506`) + +Previous behaviour: + +.. code-block:: ipython + + In [1]: import io + + In [2]: df = pd.read_csv(io.StringIO(''), engine='c') + ... + ValueError: No columns to parse from file + + In [3]: df = pd.read_csv(io.StringIO(''), engine='python') + ... + StopIteration + +New behaviour: + +.. code-block:: ipython + + In [1]: df = pd.read_csv(io.StringIO(''), engine='c') + ... + pandas.io.common.EmptyDataError: No columns to parse from file + + In [2]: df = pd.read_csv(io.StringIO(''), engine='python') + ... + pandas.io.common.EmptyDataError: No columns to parse from file + +In addition to this error change, several others have been made as well: + +- ``CParserError`` now sub-classes ``ValueError`` instead of just a ``Exception`` (:issue:`12551`) +- A ``CParserError`` is now raised instead of a generic ``Exception`` in ``read_csv`` when the ``c`` engine cannot parse a column (:issue:`12506`) +- A ``ValueError`` is now raised instead of a generic ``Exception`` in ``read_csv`` when the ``c`` engine encounters a ``NaN`` value in an integer column (:issue:`12506`) +- A ``ValueError`` is now raised instead of a generic ``Exception`` in ``read_csv`` when ``true_values`` is specified, and the ``c`` engine encounters an element in a column containing unencodable bytes (:issue:`12506`) +- ``pandas.parser.OverflowError`` exception has been removed and has been replaced with Python's built-in ``OverflowError`` exception (:issue:`12506`) +- ``pd.read_csv()`` no longer allows a combination of strings and integers for the ``usecols`` parameter (:issue:`12678`) + + +.. _whatsnew_0181.api.to_datetime: + +``to_datetime`` error changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Bugs in ``pd.to_datetime()`` when passing a ``unit`` with convertible entries and ``errors='coerce'`` or non-convertible with ``errors='ignore'``. Furthermore, an ``OutOfBoundsDateime`` exception will be raised when an out-of-range value is encountered for that unit when ``errors='raise'``. (:issue:`11758`, :issue:`13052`, :issue:`13059`) + +Previous behaviour: + +.. code-block:: ipython + + In [27]: pd.to_datetime(1420043460, unit='s', errors='coerce') + Out[27]: NaT + + In [28]: pd.to_datetime(11111111, unit='D', errors='ignore') + OverflowError: Python int too large to convert to C long + + In [29]: pd.to_datetime(11111111, unit='D', errors='raise') + OverflowError: Python int too large to convert to C long + +New behaviour: + +.. code-block:: ipython + + In [2]: pd.to_datetime(1420043460, unit='s', errors='coerce') + Out[2]: Timestamp('2014-12-31 16:31:00') + + In [3]: pd.to_datetime(11111111, unit='D', errors='ignore') + Out[3]: 11111111 + + In [4]: pd.to_datetime(11111111, unit='D', errors='raise') + OutOfBoundsDatetime: cannot convert input with unit 'D' + +.. _whatsnew_0181.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- ``.swaplevel()`` for ``Series``, ``DataFrame``, ``Panel``, and ``MultiIndex`` now features defaults for its first two parameters ``i`` and ``j`` that swap the two innermost levels of the index. (:issue:`12934`) +- ``.searchsorted()`` for ``Index`` and ``TimedeltaIndex`` now accept a ``sorter`` argument to maintain compatibility with numpy's ``searchsorted`` function (:issue:`12238`) +- ``Period`` and ``PeriodIndex`` now raises ``IncompatibleFrequency`` error which inherits ``ValueError`` rather than raw ``ValueError`` (:issue:`12615`) +- ``Series.apply`` for category dtype now applies the passed function to each of the ``.categories`` (and not the ``.codes``), and returns a ``category`` dtype if possible (:issue:`12473`) +- ``read_csv`` will now raise a ``TypeError`` if ``parse_dates`` is neither a boolean, list, or dictionary (matches the doc-string) (:issue:`5636`) +- The default for ``.query()/.eval()`` is now ``engine=None``, which will use ``numexpr`` if it's installed; otherwise it will fallback to the ``python`` engine. This mimics the pre-0.18.1 behavior if ``numexpr`` is installed (and which, previously, if numexpr was not installed, ``.query()/.eval()`` would raise). (:issue:`12749`) +- ``pd.show_versions()`` now includes ``pandas_datareader`` version (:issue:`12740`) +- Provide a proper ``__name__`` and ``__qualname__`` attributes for generic functions (:issue:`12021`) +- ``pd.concat(ignore_index=True)`` now uses ``RangeIndex`` as default (:issue:`12695`) +- ``pd.merge()`` and ``DataFrame.join()`` will show a ``UserWarning`` when merging/joining a single- with a multi-leveled dataframe (:issue:`9455`, :issue:`12219`) +- Compat with ``scipy`` > 0.17 for deprecated ``piecewise_polynomial`` interpolation method; support for the replacement ``from_derivatives`` method (:issue:`12887`) + +.. _whatsnew_0181.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- The method name ``Index.sym_diff()`` is deprecated and can be replaced by ``Index.symmetric_difference()`` (:issue:`12591`) +- The method name ``Categorical.sort()`` is deprecated in favor of ``Categorical.sort_values()`` (:issue:`12882`) + + + + + + + + +.. _whatsnew_0181.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved speed of SAS reader (:issue:`12656`, :issue:`12961`) +- Performance improvements in ``.groupby(..).cumcount()`` (:issue:`11039`) +- Improved memory usage in ``pd.read_csv()`` when using ``skiprows=an_integer`` (:issue:`13005`) +- Improved performance of ``DataFrame.to_sql`` when checking case sensitivity for tables. Now only checks if table has been created correctly when table name is not lower case. (:issue:`12876`) +- Improved performance of ``Period`` construction and time series plotting (:issue:`12903`, :issue:`11831`). +- Improved performance of ``.str.encode()`` and ``.str.decode()`` methods (:issue:`13008`) +- Improved performance of ``to_numeric`` if input is numeric dtype (:issue:`12777`) +- Improved performance of sparse arithmetic with ``IntIndex`` (:issue:`13036`) + + + + + + + + +.. _whatsnew_0181.bug_fixes: + +Bug fixes +~~~~~~~~~ +- ``usecols`` parameter in ``pd.read_csv`` is now respected even when the lines of a CSV file are not even (:issue:`12203`) +- Bug in ``groupby.transform(..)`` when ``axis=1`` is specified with a non-monotonic ordered index (:issue:`12713`) +- Bug in ``Period`` and ``PeriodIndex`` creation raises ``KeyError`` if ``freq="Minute"`` is specified. Note that "Minute" freq is deprecated in v0.17.0, and recommended to use ``freq="T"`` instead (:issue:`11854`) +- Bug in ``.resample(...).count()`` with a ``PeriodIndex`` always raising a ``TypeError`` (:issue:`12774`) +- Bug in ``.resample(...)`` with a ``PeriodIndex`` casting to a ``DatetimeIndex`` when empty (:issue:`12868`) +- Bug in ``.resample(...)`` with a ``PeriodIndex`` when resampling to an existing frequency (:issue:`12770`) +- Bug in printing data which contains ``Period`` with different ``freq`` raises ``ValueError`` (:issue:`12615`) +- Bug in ``Series`` construction with ``Categorical`` and ``dtype='category'`` is specified (:issue:`12574`) +- Bugs in concatenation with a coercible dtype was too aggressive, resulting in different dtypes in output formatting when an object was longer than ``display.max_rows`` (:issue:`12411`, :issue:`12045`, :issue:`11594`, :issue:`10571`, :issue:`12211`) +- Bug in ``float_format`` option with option not being validated as a callable. (:issue:`12706`) +- Bug in ``GroupBy.filter`` when ``dropna=False`` and no groups fulfilled the criteria (:issue:`12768`) +- Bug in ``__name__`` of ``.cum*`` functions (:issue:`12021`) +- Bug in ``.astype()`` of a ``Float64Inde/Int64Index`` to an ``Int64Index`` (:issue:`12881`) +- Bug in round tripping an integer based index in ``.to_json()/.read_json()`` when ``orient='index'`` (the default) (:issue:`12866`) +- Bug in plotting ``Categorical`` dtypes cause error when attempting stacked bar plot (:issue:`13019`) +- Compat with >= ``numpy`` 1.11 for ``NaT`` comparisons (:issue:`12969`) +- Bug in ``.drop()`` with a non-unique ``MultiIndex``. (:issue:`12701`) +- Bug in ``.concat`` of datetime tz-aware and naive DataFrames (:issue:`12467`) +- Bug in correctly raising a ``ValueError`` in ``.resample(..).fillna(..)`` when passing a non-string (:issue:`12952`) +- Bug fixes in various encoding and header processing issues in ``pd.read_sas()`` (:issue:`12659`, :issue:`12654`, :issue:`12647`, :issue:`12809`) +- Bug in ``pd.crosstab()`` where would silently ignore ``aggfunc`` if ``values=None`` (:issue:`12569`). +- Potential segfault in ``DataFrame.to_json`` when serialising ``datetime.time`` (:issue:`11473`). +- Potential segfault in ``DataFrame.to_json`` when attempting to serialise 0d array (:issue:`11299`). +- Segfault in ``to_json`` when attempting to serialise a ``DataFrame`` or ``Series`` with non-ndarray values; now supports serialization of ``category``, ``sparse``, and ``datetime64[ns, tz]`` dtypes (:issue:`10778`). +- Bug in ``DataFrame.to_json`` with unsupported dtype not passed to default handler (:issue:`12554`). +- Bug in ``.align`` not returning the sub-class (:issue:`12983`) +- Bug in aligning a ``Series`` with a ``DataFrame`` (:issue:`13037`) +- Bug in ``ABCPanel`` in which ``Panel4D`` was not being considered as a valid instance of this generic type (:issue:`12810`) + + +- Bug in consistency of ``.name`` on ``.groupby(..).apply(..)`` cases (:issue:`12363`) + +- Bug in ``Timestamp.__repr__`` that caused ``pprint`` to fail in nested structures (:issue:`12622`) +- Bug in ``Timedelta.min`` and ``Timedelta.max``, the properties now report the true minimum/maximum ``timedeltas`` as recognized by pandas. See the :ref:`documentation `. (:issue:`12727`) +- Bug in ``.quantile()`` with interpolation may coerce to ``float`` unexpectedly (:issue:`12772`) +- Bug in ``.quantile()`` with empty ``Series`` may return scalar rather than empty ``Series`` (:issue:`12772`) + + +- Bug in ``.loc`` with out-of-bounds in a large indexer would raise ``IndexError`` rather than ``KeyError`` (:issue:`12527`) +- Bug in resampling when using a ``TimedeltaIndex`` and ``.asfreq()``, would previously not include the final fencepost (:issue:`12926`) + +- Bug in equality testing with a ``Categorical`` in a ``DataFrame`` (:issue:`12564`) +- Bug in ``GroupBy.first()``, ``.last()`` returns incorrect row when ``TimeGrouper`` is used (:issue:`7453`) + + + +- Bug in ``pd.read_csv()`` with the ``c`` engine when specifying ``skiprows`` with newlines in quoted items (:issue:`10911`, :issue:`12775`) +- Bug in ``DataFrame`` timezone lost when assigning tz-aware datetime ``Series`` with alignment (:issue:`12981`) + + + + +- Bug in ``.value_counts()`` when ``normalize=True`` and ``dropna=True`` where nulls still contributed to the normalized count (:issue:`12558`) +- Bug in ``Series.value_counts()`` loses name if its dtype is ``category`` (:issue:`12835`) +- Bug in ``Series.value_counts()`` loses timezone info (:issue:`12835`) +- Bug in ``Series.value_counts(normalize=True)`` with ``Categorical`` raises ``UnboundLocalError`` (:issue:`12835`) +- Bug in ``Panel.fillna()`` ignoring ``inplace=True`` (:issue:`12633`) +- Bug in ``pd.read_csv()`` when specifying ``names``, ``usecols``, and ``parse_dates`` simultaneously with the ``c`` engine (:issue:`9755`) +- Bug in ``pd.read_csv()`` when specifying ``delim_whitespace=True`` and ``lineterminator`` simultaneously with the ``c`` engine (:issue:`12912`) +- Bug in ``Series.rename``, ``DataFrame.rename`` and ``DataFrame.rename_axis`` not treating ``Series`` as mappings to relabel (:issue:`12623`). +- Clean in ``.rolling.min`` and ``.rolling.max`` to enhance dtype handling (:issue:`12373`) +- Bug in ``groupby`` where complex types are coerced to float (:issue:`12902`) +- Bug in ``Series.map`` raises ``TypeError`` if its dtype is ``category`` or tz-aware ``datetime`` (:issue:`12473`) + +- Bugs on 32bit platforms for some test comparisons (:issue:`12972`) +- Bug in index coercion when falling back from ``RangeIndex`` construction (:issue:`12893`) +- Better error message in window functions when invalid argument (e.g. a float window) is passed (:issue:`12669`) + +- Bug in slicing subclassed ``DataFrame`` defined to return subclassed ``Series`` may return normal ``Series`` (:issue:`11559`) + + +- Bug in ``.str`` accessor methods may raise ``ValueError`` if input has ``name`` and the result is ``DataFrame`` or ``MultiIndex`` (:issue:`12617`) +- Bug in ``DataFrame.last_valid_index()`` and ``DataFrame.first_valid_index()`` on empty frames (:issue:`12800`) + + +- Bug in ``CategoricalIndex.get_loc`` returns different result from regular ``Index`` (:issue:`12531`) +- Bug in ``PeriodIndex.resample`` where name not propagated (:issue:`12769`) + +- Bug in ``date_range`` ``closed`` keyword and timezones (:issue:`12684`). + +- Bug in ``pd.concat`` raises ``AttributeError`` when input data contains tz-aware datetime and timedelta (:issue:`12620`) +- Bug in ``pd.concat`` did not handle empty ``Series`` properly (:issue:`11082`) + +- Bug in ``.plot.bar`` alignment when ``width`` is specified with ``int`` (:issue:`12979`) + + +- Bug in ``fill_value`` is ignored if the argument to a binary operator is a constant (:issue:`12723`) + +- Bug in ``pd.read_html()`` when using bs4 flavor and parsing table with a header and only one column (:issue:`9178`) + +- Bug in ``.pivot_table`` when ``margins=True`` and ``dropna=True`` where nulls still contributed to margin count (:issue:`12577`) +- Bug in ``.pivot_table`` when ``dropna=False`` where table index/column names disappear (:issue:`12133`) +- Bug in ``pd.crosstab()`` when ``margins=True`` and ``dropna=False`` which raised (:issue:`12642`) + +- Bug in ``Series.name`` when ``name`` attribute can be a hashable type (:issue:`12610`) + +- Bug in ``.describe()`` resets categorical columns information (:issue:`11558`) +- Bug where ``loffset`` argument was not applied when calling ``resample().count()`` on a timeseries (:issue:`12725`) +- ``pd.read_excel()`` now accepts column names associated with keyword argument ``names`` (:issue:`12870`) +- Bug in ``pd.to_numeric()`` with ``Index`` returns ``np.ndarray``, rather than ``Index`` (:issue:`12777`) +- Bug in ``pd.to_numeric()`` with datetime-like may raise ``TypeError`` (:issue:`12777`) +- Bug in ``pd.to_numeric()`` with scalar raises ``ValueError`` (:issue:`12777`) + + +.. _whatsnew_0.18.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.18.0..v0.18.1 diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst new file mode 100644 index 00000000..6eb509a2 --- /dev/null +++ b/doc/source/whatsnew/v0.19.0.rst @@ -0,0 +1,1583 @@ +.. _whatsnew_0190: + +v0.19.0 (October 2, 2016) +------------------------- + +{{ header }} + +This is a major release from 0.18.1 and includes number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +- :func:`merge_asof` for asof-style time-series joining, see :ref:`here ` +- ``.rolling()`` is now time-series aware, see :ref:`here ` +- :func:`read_csv` now supports parsing ``Categorical`` data, see :ref:`here ` +- A function :func:`union_categorical` has been added for combining categoricals, see :ref:`here ` +- ``PeriodIndex`` now has its own ``period`` dtype, and changed to be more consistent with other ``Index`` classes. See :ref:`here ` +- Sparse data structures gained enhanced support of ``int`` and ``bool`` dtypes, see :ref:`here ` +- Comparison operations with ``Series`` no longer ignores the index, see :ref:`here ` for an overview of the API changes. +- Introduction of a pandas development API for utility functions, see :ref:`here `. +- Deprecation of ``Panel4D`` and ``PanelND``. We recommend to represent these types of n-dimensional data with the `xarray package `__. +- Removal of the previously deprecated modules ``pandas.io.data``, ``pandas.io.wb``, ``pandas.tools.rplot``. + +.. warning:: + + pandas >= 0.19.0 will no longer silence numpy ufunc warnings upon import, see :ref:`here `. + +.. contents:: What's new in v0.19.0 + :local: + :backlinks: none + +.. _whatsnew_0190.new_features: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0190.enhancements.asof_merge: + +``merge_asof`` for asof-style time-series joining +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A long-time requested feature has been added through the :func:`merge_asof` function, to +support asof style joining of time-series (:issue:`1870`, :issue:`13695`, :issue:`13709`, :issue:`13902`). Full documentation is +:ref:`here `. + +The :func:`merge_asof` performs an asof merge, which is similar to a left-join +except that we match on nearest key rather than equal keys. + +.. ipython:: python + + left = pd.DataFrame({'a': [1, 5, 10], + 'left_val': ['a', 'b', 'c']}) + right = pd.DataFrame({'a': [1, 2, 3, 6, 7], + 'right_val': [1, 2, 3, 6, 7]}) + + left + right + +We typically want to match exactly when possible, and use the most +recent value otherwise. + +.. ipython:: python + + pd.merge_asof(left, right, on='a') + +We can also match rows ONLY with prior data, and not an exact match. + +.. ipython:: python + + pd.merge_asof(left, right, on='a', allow_exact_matches=False) + + +In a typical time-series example, we have ``trades`` and ``quotes`` and we want to ``asof-join`` them. +This also illustrates using the ``by`` parameter to group data before merging. + +.. ipython:: python + + trades = pd.DataFrame({ + 'time': pd.to_datetime(['20160525 13:30:00.023', + '20160525 13:30:00.038', + '20160525 13:30:00.048', + '20160525 13:30:00.048', + '20160525 13:30:00.048']), + 'ticker': ['MSFT', 'MSFT', + 'GOOG', 'GOOG', 'AAPL'], + 'price': [51.95, 51.95, + 720.77, 720.92, 98.00], + 'quantity': [75, 155, + 100, 100, 100]}, + columns=['time', 'ticker', 'price', 'quantity']) + + quotes = pd.DataFrame({ + 'time': pd.to_datetime(['20160525 13:30:00.023', + '20160525 13:30:00.023', + '20160525 13:30:00.030', + '20160525 13:30:00.041', + '20160525 13:30:00.048', + '20160525 13:30:00.049', + '20160525 13:30:00.072', + '20160525 13:30:00.075']), + 'ticker': ['GOOG', 'MSFT', 'MSFT', 'MSFT', + 'GOOG', 'AAPL', 'GOOG', 'MSFT'], + 'bid': [720.50, 51.95, 51.97, 51.99, + 720.50, 97.99, 720.50, 52.01], + 'ask': [720.93, 51.96, 51.98, 52.00, + 720.93, 98.01, 720.88, 52.03]}, + columns=['time', 'ticker', 'bid', 'ask']) + +.. ipython:: python + + trades + quotes + +An asof merge joins on the ``on``, typically a datetimelike field, which is ordered, and +in this case we are using a grouper in the ``by`` field. This is like a left-outer join, except +that forward filling happens automatically taking the most recent non-NaN value. + +.. ipython:: python + + pd.merge_asof(trades, quotes, + on='time', + by='ticker') + +This returns a merged DataFrame with the entries in the same order as the original left +passed DataFrame (``trades`` in this case), with the fields of the ``quotes`` merged. + +.. _whatsnew_0190.enhancements.rolling_ts: + +``.rolling()`` is now time-series aware +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``.rolling()`` objects are now time-series aware and can accept a time-series offset (or convertible) for the ``window`` argument (:issue:`13327`, :issue:`12995`). +See the full documentation :ref:`here `. + +.. ipython:: python + + dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, + index=pd.date_range('20130101 09:00:00', + periods=5, freq='s')) + dft + +This is a regular frequency index. Using an integer window parameter works to roll along the window frequency. + +.. ipython:: python + + dft.rolling(2).sum() + dft.rolling(2, min_periods=1).sum() + +Specifying an offset allows a more intuitive specification of the rolling frequency. + +.. ipython:: python + + dft.rolling('2s').sum() + +Using a non-regular, but still monotonic index, rolling with an integer window does not impart any special calculation. + +.. ipython:: python + + + dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, + index=pd.Index([pd.Timestamp('20130101 09:00:00'), + pd.Timestamp('20130101 09:00:02'), + pd.Timestamp('20130101 09:00:03'), + pd.Timestamp('20130101 09:00:05'), + pd.Timestamp('20130101 09:00:06')], + name='foo')) + + dft + dft.rolling(2).sum() + +Using the time-specification generates variable windows for this sparse data. + +.. ipython:: python + + dft.rolling('2s').sum() + +Furthermore, we now allow an optional ``on`` parameter to specify a column (rather than the +default of the index) in a DataFrame. + +.. ipython:: python + + dft = dft.reset_index() + dft + dft.rolling('2s', on='foo').sum() + +.. _whatsnew_0190.enhancements.read_csv_dupe_col_names_support: + +``read_csv`` has improved support for duplicate column names +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. ipython:: python + :suppress: + + from io import StringIO + +:ref:`Duplicate column names ` are now supported in :func:`read_csv` whether +they are in the file or passed in as the ``names`` parameter (:issue:`7160`, :issue:`9424`) + +.. ipython:: python + + data = '0,1,2\n3,4,5' + names = ['a', 'b', 'a'] + +**Previous behavior**: + +.. code-block:: ipython + + In [2]: pd.read_csv(StringIO(data), names=names) + Out[2]: + a b a + 0 2 1 2 + 1 5 4 5 + +The first ``a`` column contained the same data as the second ``a`` column, when it should have +contained the values ``[0, 3]``. + +**New behavior**: + +.. ipython:: python + :okexcept: + + pd.read_csv(StringIO(data), names=names) + + +.. _whatsnew_0190.enhancements.read_csv_categorical: + +``read_csv`` supports parsing ``Categorical`` directly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :func:`read_csv` function now supports parsing a ``Categorical`` column when +specified as a dtype (:issue:`10153`). Depending on the structure of the data, +this can result in a faster parse time and lower memory usage compared to +converting to ``Categorical`` after parsing. See the io :ref:`docs here `. + +.. ipython:: python + + data = 'col1,col2,col3\na,b,1\na,b,2\nc,d,3' + + pd.read_csv(StringIO(data)) + pd.read_csv(StringIO(data)).dtypes + pd.read_csv(StringIO(data), dtype='category').dtypes + +Individual columns can be parsed as a ``Categorical`` using a dict specification + +.. ipython:: python + + pd.read_csv(StringIO(data), dtype={'col1': 'category'}).dtypes + +.. note:: + + The resulting categories will always be parsed as strings (object dtype). + If the categories are numeric they can be converted using the + :func:`to_numeric` function, or as appropriate, another converter + such as :func:`to_datetime`. + + .. ipython:: python + + df = pd.read_csv(StringIO(data), dtype='category') + df.dtypes + df['col3'] + df['col3'].cat.categories = pd.to_numeric(df['col3'].cat.categories) + df['col3'] + +.. _whatsnew_0190.enhancements.union_categoricals: + +Categorical concatenation +^^^^^^^^^^^^^^^^^^^^^^^^^ + +- A function :func:`union_categoricals` has been added for combining categoricals, see :ref:`Unioning Categoricals` (:issue:`13361`, :issue:`13763`, :issue:`13846`, :issue:`14173`) + + .. ipython:: python + + from pandas.api.types import union_categoricals + a = pd.Categorical(["b", "c"]) + b = pd.Categorical(["a", "b"]) + union_categoricals([a, b]) + +- ``concat`` and ``append`` now can concat ``category`` dtypes with different ``categories`` as ``object`` dtype (:issue:`13524`) + + .. ipython:: python + + s1 = pd.Series(['a', 'b'], dtype='category') + s2 = pd.Series(['b', 'c'], dtype='category') + +**Previous behavior**: + +.. code-block:: ipython + + In [1]: pd.concat([s1, s2]) + ValueError: incompatible categories in categorical concat + +**New behavior**: + +.. ipython:: python + + pd.concat([s1, s2]) + +.. _whatsnew_0190.enhancements.semi_month_offsets: + +Semi-month offsets +^^^^^^^^^^^^^^^^^^ + +Pandas has gained new frequency offsets, ``SemiMonthEnd`` ('SM') and ``SemiMonthBegin`` ('SMS'). +These provide date offsets anchored (by default) to the 15th and end of month, and 15th and 1st of month respectively. +(:issue:`1543`) + +.. ipython:: python + + from pandas.tseries.offsets import SemiMonthEnd, SemiMonthBegin + +**SemiMonthEnd**: + +.. ipython:: python + + pd.Timestamp('2016-01-01') + SemiMonthEnd() + + pd.date_range('2015-01-01', freq='SM', periods=4) + +**SemiMonthBegin**: + +.. ipython:: python + + pd.Timestamp('2016-01-01') + SemiMonthBegin() + + pd.date_range('2015-01-01', freq='SMS', periods=4) + +Using the anchoring suffix, you can also specify the day of month to use instead of the 15th. + +.. ipython:: python + + pd.date_range('2015-01-01', freq='SMS-16', periods=4) + + pd.date_range('2015-01-01', freq='SM-14', periods=4) + +.. _whatsnew_0190.enhancements.index: + +New Index methods +^^^^^^^^^^^^^^^^^ + +The following methods and options are added to ``Index``, to be more consistent with the ``Series`` and ``DataFrame`` API. + +``Index`` now supports the ``.where()`` function for same shape indexing (:issue:`13170`) + +.. ipython:: python + + idx = pd.Index(['a', 'b', 'c']) + idx.where([True, False, True]) + + +``Index`` now supports ``.dropna()`` to exclude missing values (:issue:`6194`) + +.. ipython:: python + + idx = pd.Index([1, 2, np.nan, 4]) + idx.dropna() + +For ``MultiIndex``, values are dropped if any level is missing by default. Specifying +``how='all'`` only drops values where all levels are missing. + +.. ipython:: python + + midx = pd.MultiIndex.from_arrays([[1, 2, np.nan, 4], + [1, 2, np.nan, np.nan]]) + midx + midx.dropna() + midx.dropna(how='all') + +``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, see the :ref:`docs here ` (:issue:`10008`, :issue:`13156`) + +.. ipython:: python + + idx = pd.Index(["a1a2", "b1", "c1"]) + idx.str.extractall(r"[ab](?P\d)") + +``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`) + +.. _whatsnew_0190.gbq: + +Google BigQuery Enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- The :func:`read_gbq` method has gained the ``dialect`` argument to allow users to specify whether to use BigQuery's legacy SQL or BigQuery's standard SQL. See the `docs `__ for more details (:issue:`13615`). +- The :func:`~DataFrame.to_gbq` method now allows the DataFrame column order to differ from the destination table schema (:issue:`11359`). + +.. _whatsnew_0190.errstate: + +Fine-grained numpy errstate +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previous versions of pandas would permanently silence numpy's ufunc error handling when ``pandas`` was imported. Pandas did this in order to silence the warnings that would arise from using numpy ufuncs on missing data, which are usually represented as ``NaN`` s. Unfortunately, this silenced legitimate warnings arising in non-pandas code in the application. Starting with 0.19.0, pandas will use the ``numpy.errstate`` context manager to silence these warnings in a more fine-grained manner, only around where these operations are actually used in the pandas code base. (:issue:`13109`, :issue:`13145`) + +After upgrading pandas, you may see *new* ``RuntimeWarnings`` being issued from your code. These are likely legitimate, and the underlying cause likely existed in the code when using previous versions of pandas that simply silenced the warning. Use `numpy.errstate `__ around the source of the ``RuntimeWarning`` to control how these conditions are handled. + +.. _whatsnew_0190.get_dummies_dtypes: + +``get_dummies`` now returns integer dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``pd.get_dummies`` function now returns dummy-encoded columns as small integers, rather than floats (:issue:`8725`). This should provide an improved memory footprint. + +**Previous behavior**: + +.. code-block:: ipython + + In [1]: pd.get_dummies(['a', 'b', 'a', 'c']).dtypes + + Out[1]: + a float64 + b float64 + c float64 + dtype: object + +**New behavior**: + +.. ipython:: python + + pd.get_dummies(['a', 'b', 'a', 'c']).dtypes + + +.. _whatsnew_0190.enhancements.to_numeric_downcast: + +Downcast values to smallest possible dtype in ``to_numeric`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``pd.to_numeric()`` now accepts a ``downcast`` parameter, which will downcast the data if possible to smallest specified numerical dtype (:issue:`13352`) + +.. ipython:: python + + s = ['1', 2, 3] + pd.to_numeric(s, downcast='unsigned') + pd.to_numeric(s, downcast='integer') + +.. _whatsnew_0190.dev_api: + +pandas development API +^^^^^^^^^^^^^^^^^^^^^^ + +As part of making pandas API more uniform and accessible in the future, we have created a standard +sub-package of pandas, ``pandas.api`` to hold public API's. We are starting by exposing type +introspection functions in ``pandas.api.types``. More sub-packages and officially sanctioned API's +will be published in future versions of pandas (:issue:`13147`, :issue:`13634`) + +The following are now part of this API: + +.. ipython:: python + + import pprint + from pandas.api import types + funcs = [f for f in dir(types) if not f.startswith('_')] + pprint.pprint(funcs) + +.. note:: + + Calling these functions from the internal module ``pandas.core.common`` will now show a ``DeprecationWarning`` (:issue:`13990`) + + +.. _whatsnew_0190.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- ``Timestamp`` can now accept positional and keyword parameters similar to :func:`datetime.datetime` (:issue:`10758`, :issue:`11630`) + + .. ipython:: python + + pd.Timestamp(2012, 1, 1) + + pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30) + +- The ``.resample()`` function now accepts a ``on=`` or ``level=`` parameter for resampling on a datetimelike column or ``MultiIndex`` level (:issue:`13500`) + + .. ipython:: python + + df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5), + 'a': np.arange(5)}, + index=pd.MultiIndex.from_arrays([[1, 2, 3, 4, 5], + pd.date_range('2015-01-01', + freq='W', + periods=5) + ], names=['v', 'd'])) + df + df.resample('M', on='date').sum() + df.resample('M', level='d').sum() + +- The ``.get_credentials()`` method of ``GbqConnector`` can now first try to fetch `the application default credentials `__. See the docs for more details (:issue:`13577`). +- The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behavior remains to raising a ``NonExistentTimeError`` (:issue:`13057`) +- ``.to_hdf/read_hdf()`` now accept path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path (:issue:`11773`) +- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the + ``decimal`` (:issue:`12933`), ``na_filter`` (:issue:`13321`) and the ``memory_map`` option (:issue:`13381`). +- Consistent with the Python API, ``pd.read_csv()`` will now interpret ``+inf`` as positive infinity (:issue:`13274`) +- The ``pd.read_html()`` has gained support for the ``na_values``, ``converters``, ``keep_default_na`` options (:issue:`13461`) +- ``Categorical.astype()`` now accepts an optional boolean argument ``copy``, effective when dtype is categorical (:issue:`13209`) +- ``DataFrame`` has gained the ``.asof()`` method to return the last non-NaN values according to the selected subset (:issue:`13358`) +- The ``DataFrame`` constructor will now respect key ordering if a list of ``OrderedDict`` objects are passed in (:issue:`13304`) +- ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`) +- ``Series`` has gained the properties ``.is_monotonic``, ``.is_monotonic_increasing``, ``.is_monotonic_decreasing``, similar to ``Index`` (:issue:`13336`) +- ``DataFrame.to_sql()`` now allows a single value as the SQL type for all columns (:issue:`11886`). +- ``Series.append`` now supports the ``ignore_index`` option (:issue:`13677`) +- ``.to_stata()`` and ``StataWriter`` can now write variable labels to Stata dta files using a dictionary to make column names to labels (:issue:`13535`, :issue:`13536`) +- ``.to_stata()`` and ``StataWriter`` will automatically convert ``datetime64[ns]`` columns to Stata format ``%tc``, rather than raising a ``ValueError`` (:issue:`12259`) +- ``read_stata()`` and ``StataReader`` raise with a more explicit error message when reading Stata files with repeated value labels when ``convert_categoricals=True`` (:issue:`13923`) +- ``DataFrame.style`` will now render sparsified MultiIndexes (:issue:`11655`) +- ``DataFrame.style`` will now show column level names (e.g. ``DataFrame.columns.names``) (:issue:`13775`) +- ``DataFrame`` has gained support to re-order the columns based on the values + in a row using ``df.sort_values(by='...', axis=1)`` (:issue:`10806`) + + .. ipython:: python + + df = pd.DataFrame({'A': [2, 7], 'B': [3, 5], 'C': [4, 8]}, + index=['row1', 'row2']) + df + df.sort_values(by='row2', axis=1) + +- Added documentation to :ref:`I/O` regarding the perils of reading in columns with mixed dtypes and how to handle it (:issue:`13746`) +- :meth:`~DataFrame.to_html` now has a ``border`` argument to control the value in the opening ``
    `` tag. The default is the value of the ``html.border`` option, which defaults to 1. This also affects the notebook HTML repr, but since Jupyter's CSS includes a border-width attribute, the visual effect is the same. (:issue:`11563`). +- Raise ``ImportError`` in the sql functions when ``sqlalchemy`` is not installed and a connection string is used (:issue:`11920`). +- Compatibility with matplotlib 2.0. Older versions of pandas should also work with matplotlib 2.0 (:issue:`13333`) +- ``Timestamp``, ``Period``, ``DatetimeIndex``, ``PeriodIndex`` and ``.dt`` accessor have gained a ``.is_leap_year`` property to check whether the date belongs to a leap year. (:issue:`13727`) +- ``astype()`` will now accept a dict of column name to data types mapping as the ``dtype`` argument. (:issue:`12086`) +- The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json ` (:issue:`9180`) +- :func:`read_excel` now supports the true_values and false_values keyword arguments (:issue:`13347`) +- ``groupby()`` will now accept a scalar and a single-element list for specifying ``level`` on a non-``MultiIndex`` grouper. (:issue:`13907`) +- Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`). +- ``pd.Timedelta(None)`` is now accepted and will return ``NaT``, mirroring ``pd.Timestamp`` (:issue:`13687`) +- ``pd.read_stata()`` can now handle some format 111 files, which are produced by SAS when generating Stata dta files (:issue:`11526`) +- ``Series`` and ``Index`` now support ``divmod`` which will return a tuple of + series or indices. This behaves like a standard binary operator with regards + to broadcasting rules (:issue:`14208`). + + +.. _whatsnew_0190.api: + +API changes +~~~~~~~~~~~ + +``Series.tolist()`` will now return Python types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Series.tolist()`` will now return Python types in the output, mimicking NumPy ``.tolist()`` behavior (:issue:`10904`) + + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + +**Previous behavior**: + +.. code-block:: ipython + + In [7]: type(s.tolist()[0]) + Out[7]: + + +**New behavior**: + +.. ipython:: python + + type(s.tolist()[0]) + +.. _whatsnew_0190.api.series_ops: + +``Series`` operators for different indexes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Following ``Series`` operators have been changed to make all operators consistent, +including ``DataFrame`` (:issue:`1134`, :issue:`4581`, :issue:`13538`) + +- ``Series`` comparison operators now raise ``ValueError`` when ``index`` are different. +- ``Series`` logical operators align both ``index`` of left and right hand side. + +.. warning:: + Until 0.18.1, comparing ``Series`` with the same length, would succeed even if + the ``.index`` are different (the result ignores ``.index``). As of 0.19.0, this will raises ``ValueError`` to be more strict. This section also describes how to keep previous behavior or align different indexes, using the flexible comparison methods like ``.eq``. + + +As a result, ``Series`` and ``DataFrame`` operators behave as below: + +Arithmetic operators +"""""""""""""""""""" + +Arithmetic operators align both ``index`` (no changes). + +.. ipython:: python + + s1 = pd.Series([1, 2, 3], index=list('ABC')) + s2 = pd.Series([2, 2, 2], index=list('ABD')) + s1 + s2 + + df1 = pd.DataFrame([1, 2, 3], index=list('ABC')) + df2 = pd.DataFrame([2, 2, 2], index=list('ABD')) + df1 + df2 + +Comparison operators +"""""""""""""""""""" + +Comparison operators raise ``ValueError`` when ``.index`` are different. + +**Previous behavior** (``Series``): + +``Series`` compared values ignoring the ``.index`` as long as both had the same length: + +.. code-block:: ipython + + In [1]: s1 == s2 + Out[1]: + A False + B True + C False + dtype: bool + +**New behavior** (``Series``): + +.. code-block:: ipython + + In [2]: s1 == s2 + Out[2]: + ValueError: Can only compare identically-labeled Series objects + +.. note:: + + To achieve the same result as previous versions (compare values based on locations ignoring ``.index``), compare both ``.values``. + + .. ipython:: python + + s1.values == s2.values + + If you want to compare ``Series`` aligning its ``.index``, see flexible comparison methods section below: + + .. ipython:: python + + s1.eq(s2) + +**Current behavior** (``DataFrame``, no change): + +.. code-block:: ipython + + In [3]: df1 == df2 + Out[3]: + ValueError: Can only compare identically-labeled DataFrame objects + +Logical operators +""""""""""""""""" + +Logical operators align both ``.index`` of left and right hand side. + +**Previous behavior** (``Series``), only left hand side ``index`` was kept: + +.. code-block:: ipython + + In [4]: s1 = pd.Series([True, False, True], index=list('ABC')) + In [5]: s2 = pd.Series([True, True, True], index=list('ABD')) + In [6]: s1 & s2 + Out[6]: + A True + B False + C False + dtype: bool + +**New behavior** (``Series``): + +.. ipython:: python + + s1 = pd.Series([True, False, True], index=list('ABC')) + s2 = pd.Series([True, True, True], index=list('ABD')) + s1 & s2 + +.. note:: + ``Series`` logical operators fill a ``NaN`` result with ``False``. + +.. note:: + To achieve the same result as previous versions (compare values based on only left hand side index), you can use ``reindex_like``: + + .. ipython:: python + + s1 & s2.reindex_like(s1) + +**Current behavior** (``DataFrame``, no change): + +.. ipython:: python + + df1 = pd.DataFrame([True, False, True], index=list('ABC')) + df2 = pd.DataFrame([True, True, True], index=list('ABD')) + df1 & df2 + +Flexible comparison methods +""""""""""""""""""""""""""" + +``Series`` flexible comparison methods like ``eq``, ``ne``, ``le``, ``lt``, ``ge`` and ``gt`` now align both ``index``. Use these operators if you want to compare two ``Series`` +which has the different ``index``. + +.. ipython:: python + + s1 = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + s2 = pd.Series([2, 2, 2], index=['b', 'c', 'd']) + s1.eq(s2) + s1.ge(s2) + +Previously, this worked the same as comparison operators (see above). + +.. _whatsnew_0190.api.promote: + +``Series`` type promotion on assignment +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A ``Series`` will now correctly promote its dtype for assignment with incompat values to the current dtype (:issue:`13234`) + + +.. ipython:: python + :okwarning: + + s = pd.Series() + +**Previous behavior**: + +.. code-block:: ipython + + In [2]: s["a"] = pd.Timestamp("2016-01-01") + + In [3]: s["b"] = 3.0 + TypeError: invalid type promotion + +**New behavior**: + +.. ipython:: python + + s["a"] = pd.Timestamp("2016-01-01") + s["b"] = 3.0 + s + s.dtype + +.. _whatsnew_0190.api.to_datetime_coerce: + +``.to_datetime()`` changes +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously if ``.to_datetime()`` encountered mixed integers/floats and strings, but no datetimes with ``errors='coerce'`` it would convert all to ``NaT``. + +**Previous behavior**: + +.. code-block:: ipython + + In [2]: pd.to_datetime([1, 'foo'], errors='coerce') + Out[2]: DatetimeIndex(['NaT', 'NaT'], dtype='datetime64[ns]', freq=None) + +**Current behavior**: + +This will now convert integers/floats with the default unit of ``ns``. + +.. ipython:: python + + pd.to_datetime([1, 'foo'], errors='coerce') + +Bug fixes related to ``.to_datetime()``: + +- Bug in ``pd.to_datetime()`` when passing integers or floats, and no ``unit`` and ``errors='coerce'`` (:issue:`13180`). +- Bug in ``pd.to_datetime()`` when passing invalid data types (e.g. bool); will now respect the ``errors`` keyword (:issue:`13176`) +- Bug in ``pd.to_datetime()`` which overflowed on ``int8``, and ``int16`` dtypes (:issue:`13451`) +- Bug in ``pd.to_datetime()`` raise ``AttributeError`` with ``NaN`` and the other string is not valid when ``errors='ignore'`` (:issue:`12424`) +- Bug in ``pd.to_datetime()`` did not cast floats correctly when ``unit`` was specified, resulting in truncated datetime (:issue:`13834`) + +.. _whatsnew_0190.api.merging: + +Merging changes +^^^^^^^^^^^^^^^ + +Merging will now preserve the dtype of the join keys (:issue:`8596`) + +.. ipython:: python + + df1 = pd.DataFrame({'key': [1], 'v1': [10]}) + df1 + df2 = pd.DataFrame({'key': [1, 2], 'v1': [20, 30]}) + df2 + +**Previous behavior**: + +.. code-block:: ipython + + In [5]: pd.merge(df1, df2, how='outer') + Out[5]: + key v1 + 0 1.0 10.0 + 1 1.0 20.0 + 2 2.0 30.0 + + In [6]: pd.merge(df1, df2, how='outer').dtypes + Out[6]: + key float64 + v1 float64 + dtype: object + +**New behavior**: + +We are able to preserve the join keys + +.. ipython:: python + + pd.merge(df1, df2, how='outer') + pd.merge(df1, df2, how='outer').dtypes + +Of course if you have missing values that are introduced, then the +resulting dtype will be upcast, which is unchanged from previous. + +.. ipython:: python + + pd.merge(df1, df2, how='outer', on='key') + pd.merge(df1, df2, how='outer', on='key').dtypes + +.. _whatsnew_0190.api.describe: + +``.describe()`` changes +^^^^^^^^^^^^^^^^^^^^^^^ + +Percentile identifiers in the index of a ``.describe()`` output will now be rounded to the least precision that keeps them distinct (:issue:`13104`) + +.. ipython:: python + + s = pd.Series([0, 1, 2, 3, 4]) + df = pd.DataFrame([0, 1, 2, 3, 4]) + +**Previous behavior**: + +The percentiles were rounded to at most one decimal place, which could raise ``ValueError`` for a data frame if the percentiles were duplicated. + +.. code-block:: ipython + + In [3]: s.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) + Out[3]: + count 5.000000 + mean 2.000000 + std 1.581139 + min 0.000000 + 0.0% 0.000400 + 0.1% 0.002000 + 0.1% 0.004000 + 50% 2.000000 + 99.9% 3.996000 + 100.0% 3.998000 + 100.0% 3.999600 + max 4.000000 + dtype: float64 + + In [4]: df.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) + Out[4]: + ... + ValueError: cannot reindex from a duplicate axis + +**New behavior**: + +.. ipython:: python + + s.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) + df.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) + +Furthermore: + +- Passing duplicated ``percentiles`` will now raise a ``ValueError``. +- Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`) + +.. _whatsnew_0190.api.period: + +``Period`` changes +^^^^^^^^^^^^^^^^^^ + +``PeriodIndex`` now has ``period`` dtype +"""""""""""""""""""""""""""""""""""""""" + +``PeriodIndex`` now has its own ``period`` dtype. The ``period`` dtype is a +pandas extension dtype like ``category`` or the :ref:`timezone aware dtype ` (``datetime64[ns, tz]``) (:issue:`13941`). +As a consequence of this change, ``PeriodIndex`` no longer has an integer dtype: + +**Previous behavior**: + +.. code-block:: ipython + + In [1]: pi = pd.PeriodIndex(['2016-08-01'], freq='D') + + In [2]: pi + Out[2]: PeriodIndex(['2016-08-01'], dtype='int64', freq='D') + + In [3]: pd.api.types.is_integer_dtype(pi) + Out[3]: True + + In [4]: pi.dtype + Out[4]: dtype('int64') + +**New behavior**: + +.. ipython:: python + + pi = pd.PeriodIndex(['2016-08-01'], freq='D') + pi + pd.api.types.is_integer_dtype(pi) + pd.api.types.is_period_dtype(pi) + pi.dtype + type(pi.dtype) + +.. _whatsnew_0190.api.periodnat: + +``Period('NaT')`` now returns ``pd.NaT`` +"""""""""""""""""""""""""""""""""""""""" + +Previously, ``Period`` has its own ``Period('NaT')`` representation different from ``pd.NaT``. Now ``Period('NaT')`` has been changed to return ``pd.NaT``. (:issue:`12759`, :issue:`13582`) + +**Previous behavior**: + +.. code-block:: ipython + + In [5]: pd.Period('NaT', freq='D') + Out[5]: Period('NaT', 'D') + +**New behavior**: + +These result in ``pd.NaT`` without providing ``freq`` option. + +.. ipython:: python + + pd.Period('NaT') + pd.Period(None) + + +To be compatible with ``Period`` addition and subtraction, ``pd.NaT`` now supports addition and subtraction with ``int``. Previously it raised ``ValueError``. + +**Previous behavior**: + +.. code-block:: ipython + + In [5]: pd.NaT + 1 + ... + ValueError: Cannot add integral value to Timestamp without freq. + +**New behavior**: + +.. ipython:: python + + pd.NaT + 1 + pd.NaT - 1 + +``PeriodIndex.values`` now returns array of ``Period`` object +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +``.values`` is changed to return an array of ``Period`` objects, rather than an array +of integers (:issue:`13988`). + +**Previous behavior**: + +.. code-block:: ipython + + In [6]: pi = pd.PeriodIndex(['2011-01', '2011-02'], freq='M') + In [7]: pi.values + Out[7]: array([492, 493]) + +**New behavior**: + +.. ipython:: python + + pi = pd.PeriodIndex(['2011-01', '2011-02'], freq='M') + pi.values + + +.. _whatsnew_0190.api.setops: + +Index ``+`` / ``-`` no longer used for set operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Addition and subtraction of the base Index type and of DatetimeIndex +(not the numeric index types) +previously performed set operations (set union and difference). This +behavior was already deprecated since 0.15.0 (in favor using the specific +``.union()`` and ``.difference()`` methods), and is now disabled. When +possible, ``+`` and ``-`` are now used for element-wise operations, for +example for concatenating strings or subtracting datetimes +(:issue:`8227`, :issue:`14127`). + +Previous behavior: + +.. code-block:: ipython + + In [1]: pd.Index(['a', 'b']) + pd.Index(['a', 'c']) + FutureWarning: using '+' to provide set union with Indexes is deprecated, use '|' or .union() + Out[1]: Index(['a', 'b', 'c'], dtype='object') + +**New behavior**: the same operation will now perform element-wise addition: + +.. ipython:: python + + pd.Index(['a', 'b']) + pd.Index(['a', 'c']) + +Note that numeric Index objects already performed element-wise operations. +For example, the behavior of adding two integer Indexes is unchanged. +The base ``Index`` is now made consistent with this behavior. + +.. ipython:: python + + pd.Index([1, 2, 3]) + pd.Index([2, 3, 4]) + +Further, because of this change, it is now possible to subtract two +DatetimeIndex objects resulting in a TimedeltaIndex: + +**Previous behavior**: + +.. code-block:: ipython + + In [1]: (pd.DatetimeIndex(['2016-01-01', '2016-01-02']) + ...: - pd.DatetimeIndex(['2016-01-02', '2016-01-03'])) + FutureWarning: using '-' to provide set differences with datetimelike Indexes is deprecated, use .difference() + Out[1]: DatetimeIndex(['2016-01-01'], dtype='datetime64[ns]', freq=None) + +**New behavior**: + +.. ipython:: python + + (pd.DatetimeIndex(['2016-01-01', '2016-01-02']) + - pd.DatetimeIndex(['2016-01-02', '2016-01-03'])) + + +.. _whatsnew_0190.api.difference: + +``Index.difference`` and ``.symmetric_difference`` changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Index.difference`` and ``Index.symmetric_difference`` will now, more consistently, treat ``NaN`` values as any other values. (:issue:`13514`) + +.. ipython:: python + + idx1 = pd.Index([1, 2, 3, np.nan]) + idx2 = pd.Index([0, 1, np.nan]) + +**Previous behavior**: + +.. code-block:: ipython + + In [3]: idx1.difference(idx2) + Out[3]: Float64Index([nan, 2.0, 3.0], dtype='float64') + + In [4]: idx1.symmetric_difference(idx2) + Out[4]: Float64Index([0.0, nan, 2.0, 3.0], dtype='float64') + +**New behavior**: + +.. ipython:: python + + idx1.difference(idx2) + idx1.symmetric_difference(idx2) + +.. _whatsnew_0190.api.unique_index: + +``Index.unique`` consistently returns ``Index`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Index.unique()`` now returns unique values as an +``Index`` of the appropriate ``dtype``. (:issue:`13395`). +Previously, most ``Index`` classes returned ``np.ndarray``, and ``DatetimeIndex``, +``TimedeltaIndex`` and ``PeriodIndex`` returned ``Index`` to keep metadata like timezone. + +**Previous behavior**: + +.. code-block:: ipython + + In [1]: pd.Index([1, 2, 3]).unique() + Out[1]: array([1, 2, 3]) + + In [2]: pd.DatetimeIndex(['2011-01-01', '2011-01-02', + ...: '2011-01-03'], tz='Asia/Tokyo').unique() + Out[2]: + DatetimeIndex(['2011-01-01 00:00:00+09:00', '2011-01-02 00:00:00+09:00', + '2011-01-03 00:00:00+09:00'], + dtype='datetime64[ns, Asia/Tokyo]', freq=None) + +**New behavior**: + +.. ipython:: python + + pd.Index([1, 2, 3]).unique() + pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + tz='Asia/Tokyo').unique() + +.. _whatsnew_0190.api.multiindex: + +``MultiIndex`` constructors, ``groupby`` and ``set_index`` preserve categorical dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``MultiIndex.from_arrays`` and ``MultiIndex.from_product`` will now preserve categorical dtype +in ``MultiIndex`` levels (:issue:`13743`, :issue:`13854`). + +.. ipython:: python + + cat = pd.Categorical(['a', 'b'], categories=list("bac")) + lvl1 = ['foo', 'bar'] + midx = pd.MultiIndex.from_arrays([cat, lvl1]) + midx + +**Previous behavior**: + +.. code-block:: ipython + + In [4]: midx.levels[0] + Out[4]: Index(['b', 'a', 'c'], dtype='object') + + In [5]: midx.get_level_values[0] + Out[5]: Index(['a', 'b'], dtype='object') + +**New behavior**: the single level is now a ``CategoricalIndex``: + +.. ipython:: python + + midx.levels[0] + midx.get_level_values(0) + +An analogous change has been made to ``MultiIndex.from_product``. +As a consequence, ``groupby`` and ``set_index`` also preserve categorical dtypes in indexes + +.. ipython:: python + + df = pd.DataFrame({'A': [0, 1], 'B': [10, 11], 'C': cat}) + df_grouped = df.groupby(by=['A', 'C']).first() + df_set_idx = df.set_index(['A', 'C']) + +**Previous behavior**: + +.. code-block:: ipython + + In [11]: df_grouped.index.levels[1] + Out[11]: Index(['b', 'a', 'c'], dtype='object', name='C') + In [12]: df_grouped.reset_index().dtypes + Out[12]: + A int64 + C object + B float64 + dtype: object + + In [13]: df_set_idx.index.levels[1] + Out[13]: Index(['b', 'a', 'c'], dtype='object', name='C') + In [14]: df_set_idx.reset_index().dtypes + Out[14]: + A int64 + C object + B int64 + dtype: object + +**New behavior**: + +.. ipython:: python + + df_grouped.index.levels[1] + df_grouped.reset_index().dtypes + + df_set_idx.index.levels[1] + df_set_idx.reset_index().dtypes + +.. _whatsnew_0190.api.autogenerated_chunksize_index: + +``read_csv`` will progressively enumerate chunks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When :func:`read_csv` is called with ``chunksize=n`` and without specifying an index, +each chunk used to have an independently generated index from ``0`` to ``n-1``. +They are now given instead a progressive index, starting from ``0`` for the first chunk, +from ``n`` for the second, and so on, so that, when concatenated, they are identical to +the result of calling :func:`read_csv` without the ``chunksize=`` argument +(:issue:`12185`). + +.. ipython:: python + + data = 'A,B\n0,1\n2,3\n4,5\n6,7' + +**Previous behavior**: + +.. code-block:: ipython + + In [2]: pd.concat(pd.read_csv(StringIO(data), chunksize=2)) + Out[2]: + A B + 0 0 1 + 1 2 3 + 0 4 5 + 1 6 7 + +**New behavior**: + +.. ipython:: python + + pd.concat(pd.read_csv(StringIO(data), chunksize=2)) + +.. _whatsnew_0190.sparse: + +Sparse Changes +^^^^^^^^^^^^^^ + +These changes allow pandas to handle sparse data with more dtypes, and for work to make a smoother experience with data handling. + +``int64`` and ``bool`` support enhancements +""""""""""""""""""""""""""""""""""""""""""" + +Sparse data structures now gained enhanced support of ``int64`` and ``bool`` ``dtype`` (:issue:`667`, :issue:`13849`). + +Previously, sparse data were ``float64`` dtype by default, even if all inputs were of ``int`` or ``bool`` dtype. You had to specify ``dtype`` explicitly to create sparse data with ``int64`` dtype. Also, ``fill_value`` had to be specified explicitly because the default was ``np.nan`` which doesn't appear in ``int64`` or ``bool`` data. + +.. code-block:: ipython + + In [1]: pd.SparseArray([1, 2, 0, 0]) + Out[1]: + [1.0, 2.0, 0.0, 0.0] + Fill: nan + IntIndex + Indices: array([0, 1, 2, 3], dtype=int32) + + # specifying int64 dtype, but all values are stored in sp_values because + # fill_value default is np.nan + In [2]: pd.SparseArray([1, 2, 0, 0], dtype=np.int64) + Out[2]: + [1, 2, 0, 0] + Fill: nan + IntIndex + Indices: array([0, 1, 2, 3], dtype=int32) + + In [3]: pd.SparseArray([1, 2, 0, 0], dtype=np.int64, fill_value=0) + Out[3]: + [1, 2, 0, 0] + Fill: 0 + IntIndex + Indices: array([0, 1], dtype=int32) + +As of v0.19.0, sparse data keeps the input dtype, and uses more appropriate ``fill_value`` defaults (``0`` for ``int64`` dtype, ``False`` for ``bool`` dtype). + +.. ipython:: python + :okwarning: + + pd.SparseArray([1, 2, 0, 0], dtype=np.int64) + pd.SparseArray([True, False, False, False]) + +See the :ref:`docs ` for more details. + +Operators now preserve dtypes +""""""""""""""""""""""""""""" + +- Sparse data structure now can preserve ``dtype`` after arithmetic ops (:issue:`13848`) + +.. code-block:: python + + s = pd.SparseSeries([0, 2, 0, 1], fill_value=0, dtype=np.int64) + s.dtype + + s + 1 + +- Sparse data structure now support ``astype`` to convert internal ``dtype`` (:issue:`13900`) + +.. code-block:: python + + s = pd.SparseSeries([1., 0., 2., 0.], fill_value=0) + s + s.astype(np.int64) + +``astype`` fails if data contains values which cannot be converted to specified ``dtype``. +Note that the limitation is applied to ``fill_value`` which default is ``np.nan``. + +.. code-block:: ipython + + In [7]: pd.SparseSeries([1., np.nan, 2., np.nan], fill_value=np.nan).astype(np.int64) + Out[7]: + ValueError: unable to coerce current fill_value nan to int64 dtype + +Other sparse fixes +"""""""""""""""""" + +- Subclassed ``SparseDataFrame`` and ``SparseSeries`` now preserve class types when slicing or transposing. (:issue:`13787`) +- ``SparseArray`` with ``bool`` dtype now supports logical (bool) operators (:issue:`14000`) +- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`) +- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`) +- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`) +- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`) +- Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`) +- Bug in ``SparseArray`` and ``SparseSeries`` don't apply ufunc to ``fill_value`` (:issue:`13853`) +- Bug in ``SparseSeries.abs`` incorrectly keeps negative ``fill_value`` (:issue:`13853`) +- Bug in single row slicing on multi-type ``SparseDataFrame`` s, types were previously forced to float (:issue:`13917`) +- Bug in ``SparseSeries`` slicing changes integer dtype to float (:issue:`8292`) +- Bug in ``SparseDataFarme`` comparison ops may raise ``TypeError`` (:issue:`13001`) +- Bug in ``SparseDataFarme.isnull`` raises ``ValueError`` (:issue:`8276`) +- Bug in ``SparseSeries`` representation with ``bool`` dtype may raise ``IndexError`` (:issue:`13110`) +- Bug in ``SparseSeries`` and ``SparseDataFrame`` of ``bool`` or ``int64`` dtype may display its values like ``float64`` dtype (:issue:`13110`) +- Bug in sparse indexing using ``SparseArray`` with ``bool`` dtype may return incorrect result (:issue:`13985`) +- Bug in ``SparseArray`` created from ``SparseSeries`` may lose ``dtype`` (:issue:`13999`) +- Bug in ``SparseSeries`` comparison with dense returns normal ``Series`` rather than ``SparseSeries`` (:issue:`13999`) + + +.. _whatsnew_0190.indexer_dtype: + +Indexer dtype changes +^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + This change only affects 64 bit python running on Windows, and only affects relatively advanced + indexing operations + +Methods such as ``Index.get_indexer`` that return an indexer array, coerce that array to a "platform int", so that it can be +directly used in 3rd party library operations like ``numpy.take``. Previously, a platform int was defined as ``np.int_`` +which corresponds to a C integer, but the correct type, and what is being used now, is ``np.intp``, which corresponds +to the C integer size that can hold a pointer (:issue:`3033`, :issue:`13972`). + +These types are the same on many platform, but for 64 bit python on Windows, +``np.int_`` is 32 bits, and ``np.intp`` is 64 bits. Changing this behavior improves performance for many +operations on that platform. + +**Previous behavior**: + +.. code-block:: ipython + + In [1]: i = pd.Index(['a', 'b', 'c']) + + In [2]: i.get_indexer(['b', 'b', 'c']).dtype + Out[2]: dtype('int32') + +**New behavior**: + +.. code-block:: ipython + + In [1]: i = pd.Index(['a', 'b', 'c']) + + In [2]: i.get_indexer(['b', 'b', 'c']).dtype + Out[2]: dtype('int64') + + +.. _whatsnew_0190.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- ``Timestamp.to_pydatetime`` will issue a ``UserWarning`` when ``warn=True``, and the instance has a non-zero number of nanoseconds, previously this would print a message to stdout (:issue:`14101`). +- ``Series.unique()`` with datetime and timezone now returns return array of ``Timestamp`` with timezone (:issue:`13565`). +- ``Panel.to_sparse()`` will raise a ``NotImplementedError`` exception when called (:issue:`13778`). +- ``Index.reshape()`` will raise a ``NotImplementedError`` exception when called (:issue:`12882`). +- ``.filter()`` enforces mutual exclusion of the keyword arguments (:issue:`12399`). +- ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64 (:issue:`12388`). +- An ``UnsupportedFunctionCall`` error is now raised if NumPy ufuncs like ``np.mean`` are called on groupby or resample objects (:issue:`12811`). +- ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior (:issue:`13299`). +- Calls to ``.sample()`` will respect the random seed set via ``numpy.random.seed(n)`` (:issue:`13161`) +- ``Styler.apply`` is now more strict about the outputs your function must return. For ``axis=0`` or ``axis=1``, the output shape must be identical. For ``axis=None``, the output must be a DataFrame with identical columns and index labels (:issue:`13222`). +- ``Float64Index.astype(int)`` will now raise ``ValueError`` if ``Float64Index`` contains ``NaN`` values (:issue:`13149`) +- ``TimedeltaIndex.astype(int)`` and ``DatetimeIndex.astype(int)`` will now return ``Int64Index`` instead of ``np.array`` (:issue:`13209`) +- Passing ``Period`` with multiple frequencies to normal ``Index`` now returns ``Index`` with ``object`` dtype (:issue:`13664`) +- ``PeriodIndex.fillna`` with ``Period`` has different freq now coerces to ``object`` dtype (:issue:`13664`) +- Faceted boxplots from ``DataFrame.boxplot(by=col)`` now return a ``Series`` when ``return_type`` is not None. Previously these returned an ``OrderedDict``. Note that when ``return_type=None``, the default, these still return a 2-D NumPy array (:issue:`12216`, :issue:`7096`). +- ``pd.read_hdf`` will now raise a ``ValueError`` instead of ``KeyError``, if a mode other than ``r``, ``r+`` and ``a`` is supplied. (:issue:`13623`) +- ``pd.read_csv()``, ``pd.read_table()``, and ``pd.read_hdf()`` raise the builtin ``FileNotFoundError`` exception for Python 3.x when called on a nonexistent file; this is back-ported as ``IOError`` in Python 2.x (:issue:`14086`) +- More informative exceptions are passed through the csv parser. The exception type would now be the original exception type instead of ``CParserError`` (:issue:`13652`). +- ``pd.read_csv()`` in the C engine will now issue a ``ParserWarning`` or raise a ``ValueError`` when ``sep`` encoded is more than one character long (:issue:`14065`) +- ``DataFrame.values`` will now return ``float64`` with a ``DataFrame`` of mixed ``int64`` and ``uint64`` dtypes, conforming to ``np.find_common_type`` (:issue:`10364`, :issue:`13917`) +- ``.groupby.groups`` will now return a dictionary of ``Index`` objects, rather than a dictionary of ``np.ndarray`` or ``lists`` (:issue:`14293`) + +.. _whatsnew_0190.deprecations: + +Deprecations +~~~~~~~~~~~~ +- ``Series.reshape`` and ``Categorical.reshape`` have been deprecated and will be removed in a subsequent release (:issue:`12882`, :issue:`12882`) +- ``PeriodIndex.to_datetime`` has been deprecated in favor of ``PeriodIndex.to_timestamp`` (:issue:`8254`) +- ``Timestamp.to_datetime`` has been deprecated in favor of ``Timestamp.to_pydatetime`` (:issue:`8254`) +- ``Index.to_datetime`` and ``DatetimeIndex.to_datetime`` have been deprecated in favor of ``pd.to_datetime`` (:issue:`8254`) +- ``pandas.core.datetools`` module has been deprecated and will be removed in a subsequent release (:issue:`14094`) +- ``SparseList`` has been deprecated and will be removed in a future version (:issue:`13784`) +- ``DataFrame.to_html()`` and ``DataFrame.to_latex()`` have dropped the ``colSpace`` parameter in favor of ``col_space`` (:issue:`13857`) +- ``DataFrame.to_sql()`` has deprecated the ``flavor`` parameter, as it is superfluous when SQLAlchemy is not installed (:issue:`13611`) +- Deprecated ``read_csv`` keywords: + + - ``compact_ints`` and ``use_unsigned`` have been deprecated and will be removed in a future version (:issue:`13320`) + - ``buffer_lines`` has been deprecated and will be removed in a future version (:issue:`13360`) + - ``as_recarray`` has been deprecated and will be removed in a future version (:issue:`13373`) + - ``skip_footer`` has been deprecated in favor of ``skipfooter`` and will be removed in a future version (:issue:`13349`) + +- top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`) +- ``Timestamp.offset`` property (and named arg in the constructor), has been deprecated in favor of ``freq`` (:issue:`12160`) +- ``pd.tseries.util.pivot_annual`` is deprecated. Use ``pivot_table`` as alternative, an example is :ref:`here ` (:issue:`736`) +- ``pd.tseries.util.isleapyear`` has been deprecated and will be removed in a subsequent release. Datetime-likes now have a ``.is_leap_year`` property (:issue:`13727`) +- ``Panel4D`` and ``PanelND`` constructors are deprecated and will be removed in a future version. The recommended way to represent these types of n-dimensional data are with the `xarray package `__. Pandas provides a :meth:`~Panel4D.to_xarray` method to automate this conversion (:issue:`13564`). +- ``pandas.tseries.frequencies.get_standard_freq`` is deprecated. Use ``pandas.tseries.frequencies.to_offset(freq).rule_code`` instead (:issue:`13874`) +- ``pandas.tseries.frequencies.to_offset``'s ``freqstr`` keyword is deprecated in favor of ``freq`` (:issue:`13874`) +- ``Categorical.from_array`` has been deprecated and will be removed in a future version (:issue:`13854`) + +.. _whatsnew_0190.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- The ``SparsePanel`` class has been removed (:issue:`13778`) +- The ``pd.sandbox`` module has been removed in favor of the external library ``pandas-qt`` (:issue:`13670`) +- The ``pandas.io.data`` and ``pandas.io.wb`` modules are removed in favor of + the `pandas-datareader package `__ (:issue:`13724`). +- The ``pandas.tools.rplot`` module has been removed in favor of + the `seaborn package `__ (:issue:`13855`) +- ``DataFrame.to_csv()`` has dropped the ``engine`` parameter, as was deprecated in 0.17.1 (:issue:`11274`, :issue:`13419`) +- ``DataFrame.to_dict()`` has dropped the ``outtype`` parameter in favor of ``orient`` (:issue:`13627`, :issue:`8486`) +- ``pd.Categorical`` has dropped setting of the ``ordered`` attribute directly in favor of the ``set_ordered`` method (:issue:`13671`) +- ``pd.Categorical`` has dropped the ``levels`` attribute in favor of ``categories`` (:issue:`8376`) +- ``DataFrame.to_sql()`` has dropped the ``mysql`` option for the ``flavor`` parameter (:issue:`13611`) +- ``Panel.shift()`` has dropped the ``lags`` parameter in favor of ``periods`` (:issue:`14041`) +- ``pd.Index`` has dropped the ``diff`` method in favor of ``difference`` (:issue:`13669`) +- ``pd.DataFrame`` has dropped the ``to_wide`` method in favor of ``to_panel`` (:issue:`14039`) +- ``Series.to_csv`` has dropped the ``nanRep`` parameter in favor of ``na_rep`` (:issue:`13804`) +- ``Series.xs``, ``DataFrame.xs``, ``Panel.xs``, ``Panel.major_xs``, and ``Panel.minor_xs`` have dropped the ``copy`` parameter (:issue:`13781`) +- ``str.split`` has dropped the ``return_type`` parameter in favor of ``expand`` (:issue:`13701`) +- Removal of the legacy time rules (offset aliases), deprecated since 0.17.0 (this has been alias since 0.8.0) (:issue:`13590`, :issue:`13868`). Now legacy time rules raises ``ValueError``. For the list of currently supported offsets, see :ref:`here `. +- The default value for the ``return_type`` parameter for ``DataFrame.plot.box`` and ``DataFrame.boxplot`` changed from ``None`` to ``"axes"``. These methods will now return a matplotlib axes by default instead of a dictionary of artists. See :ref:`here ` (:issue:`6581`). +- The ``tquery`` and ``uquery`` functions in the ``pandas.io.sql`` module are removed (:issue:`5950`). + + +.. _whatsnew_0190.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of sparse ``IntIndex.intersect`` (:issue:`13082`) +- Improved performance of sparse arithmetic with ``BlockIndex`` when the number of blocks are large, though recommended to use ``IntIndex`` in such cases (:issue:`13082`) +- Improved performance of ``DataFrame.quantile()`` as it now operates per-block (:issue:`11623`) +- Improved performance of float64 hash table operations, fixing some very slow indexing and groupby operations in python 3 (:issue:`13166`, :issue:`13334`) +- Improved performance of ``DataFrameGroupBy.transform`` (:issue:`12737`) +- Improved performance of ``Index`` and ``Series`` ``.duplicated`` (:issue:`10235`) +- Improved performance of ``Index.difference`` (:issue:`12044`) +- Improved performance of ``RangeIndex.is_monotonic_increasing`` and ``is_monotonic_decreasing`` (:issue:`13749`) +- Improved performance of datetime string parsing in ``DatetimeIndex`` (:issue:`13692`) +- Improved performance of hashing ``Period`` (:issue:`12817`) +- Improved performance of ``factorize`` of datetime with timezone (:issue:`13750`) +- Improved performance of by lazily creating indexing hashtables on larger Indexes (:issue:`14266`) +- Improved performance of ``groupby.groups`` (:issue:`14293`) +- Unnecessary materializing of a MultiIndex when introspecting for memory usage (:issue:`14308`) + +.. _whatsnew_0190.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Bug in ``groupby().shift()``, which could cause a segfault or corruption in rare circumstances when grouping by columns with missing values (:issue:`13813`) +- Bug in ``groupby().cumsum()`` calculating ``cumprod`` when ``axis=1``. (:issue:`13994`) +- Bug in ``pd.to_timedelta()`` in which the ``errors`` parameter was not being respected (:issue:`13613`) +- Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`) +- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`) +- Bug in area plot draws legend incorrectly if subplot is enabled or legend is moved after plot (matplotlib 1.5.0 is required to draw area plot legend properly) (:issue:`9161`, :issue:`13544`) +- Bug in ``DataFrame`` assignment with an object-dtyped ``Index`` where the resultant column is mutable to the original object. (:issue:`13522`) +- Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`) +- Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`) +- Bug in ``Categorical.from_codes()`` where an unhelpful error was raised when an invalid ``ordered`` parameter was passed in (:issue:`14058`) +- Bug in ``Series`` construction from a tuple of integers on windows not returning default dtype (int64) (:issue:`13646`) +- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow was not being caught (:issue:`14068`) +- Bug in ``.groupby(..).resample(..)`` when the same object is called multiple times (:issue:`13174`) +- Bug in ``.to_records()`` when index name is a unicode string (:issue:`13172`) +- Bug in calling ``.memory_usage()`` on object which doesn't implement (:issue:`12924`) +- Regression in ``Series.quantile`` with nans (also shows up in ``.median()`` and ``.describe()`` ); furthermore now names the ``Series`` with the quantile (:issue:`13098`, :issue:`13146`) +- Bug in ``SeriesGroupBy.transform`` with datetime values and missing groups (:issue:`13191`) +- Bug where empty ``Series`` were incorrectly coerced in datetime-like numeric operations (:issue:`13844`) +- Bug in ``Categorical`` constructor when passed a ``Categorical`` containing datetimes with timezones (:issue:`14190`) +- Bug in ``Series.str.extractall()`` with ``str`` index raises ``ValueError`` (:issue:`13156`) +- Bug in ``Series.str.extractall()`` with single group and quantifier (:issue:`13382`) +- Bug in ``DatetimeIndex`` and ``Period`` subtraction raises ``ValueError`` or ``AttributeError`` rather than ``TypeError`` (:issue:`13078`) +- Bug in ``Index`` and ``Series`` created with ``NaN`` and ``NaT`` mixed data may not have ``datetime64`` dtype (:issue:`13324`) +- Bug in ``Index`` and ``Series`` may ignore ``np.datetime64('nat')`` and ``np.timdelta64('nat')`` to infer dtype (:issue:`13324`) +- Bug in ``PeriodIndex`` and ``Period`` subtraction raises ``AttributeError`` (:issue:`13071`) +- Bug in ``PeriodIndex`` construction returning a ``float64`` index in some circumstances (:issue:`13067`) +- Bug in ``.resample(..)`` with a ``PeriodIndex`` not changing its ``freq`` appropriately when empty (:issue:`13067`) +- Bug in ``.resample(..)`` with a ``PeriodIndex`` not retaining its type or name with an empty ``DataFrame`` appropriately when empty (:issue:`13212`) +- Bug in ``groupby(..).apply(..)`` when the passed function returns scalar values per group (:issue:`13468`). +- Bug in ``groupby(..).resample(..)`` where passing some keywords would raise an exception (:issue:`13235`) +- Bug in ``.tz_convert`` on a tz-aware ``DateTimeIndex`` that relied on index being sorted for correct results (:issue:`13306`) +- Bug in ``.tz_localize`` with ``dateutil.tz.tzlocal`` may return incorrect result (:issue:`13583`) +- Bug in ``DatetimeTZDtype`` dtype with ``dateutil.tz.tzlocal`` cannot be regarded as valid dtype (:issue:`13583`) +- Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset, that had one or more categorical columns, failed unless the key argument was set to the name of the dataset. (:issue:`13231`) +- Bug in ``.rolling()`` that allowed a negative integer window in construction of the ``Rolling()`` object, but would later fail on aggregation (:issue:`13383`) +- Bug in ``Series`` indexing with tuple-valued data and a numeric index (:issue:`13509`) +- Bug in printing ``pd.DataFrame`` where unusual elements with the ``object`` dtype were causing segfaults (:issue:`13717`) +- Bug in ranking ``Series`` which could result in segfaults (:issue:`13445`) +- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`) +- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`) +- Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`) +- Bug in ``pd.concat`` and ``.append`` may coerces ``datetime64`` and ``timedelta`` to ``object`` dtype containing python built-in ``datetime`` or ``timedelta`` rather than ``Timestamp`` or ``Timedelta`` (:issue:`13626`) +- Bug in ``PeriodIndex.append`` may raises ``AttributeError`` when the result is ``object`` dtype (:issue:`13221`) +- Bug in ``CategoricalIndex.append`` may accept normal ``list`` (:issue:`13626`) +- Bug in ``pd.concat`` and ``.append`` with the same timezone get reset to UTC (:issue:`7795`) +- Bug in ``Series`` and ``DataFrame`` ``.append`` raises ``AmbiguousTimeError`` if data contains datetime near DST boundary (:issue:`13626`) +- Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`) +- Bug in ``DataFrame.describe()`` raising ``ValueError`` with only boolean columns (:issue:`13898`) +- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`) +- Bug in ``.str.replace`` does not raise ``TypeError`` for invalid replacement (:issue:`13438`) +- Bug in ``MultiIndex.from_arrays`` which didn't check for input array lengths matching (:issue:`13599`) +- Bug in ``cartesian_product`` and ``MultiIndex.from_product`` which may raise with empty input arrays (:issue:`12258`) +- Bug in ``pd.read_csv()`` which may cause a segfault or corruption when iterating in large chunks over a stream/file under rare circumstances (:issue:`13703`) +- Bug in ``pd.read_csv()`` which caused errors to be raised when a dictionary containing scalars is passed in for ``na_values`` (:issue:`12224`) +- Bug in ``pd.read_csv()`` which caused BOM files to be incorrectly parsed by not ignoring the BOM (:issue:`4793`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` which raised errors when a numpy array was passed in for ``usecols`` (:issue:`12546`) +- Bug in ``pd.read_csv()`` where the index columns were being incorrectly parsed when parsed as dates with a ``thousands`` parameter (:issue:`14066`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` in which ``NaN`` values weren't being detected after data was converted to numeric values (:issue:`13314`) +- Bug in ``pd.read_csv()`` in which the ``nrows`` argument was not properly validated for both engines (:issue:`10476`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` in which infinities of mixed-case forms were not being interpreted properly (:issue:`13274`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` in which trailing ``NaN`` values were not being parsed (:issue:`13320`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` when reading from a ``tempfile.TemporaryFile`` on Windows with Python 3 (:issue:`13398`) +- Bug in ``pd.read_csv()`` that prevents ``usecols`` kwarg from accepting single-byte unicode strings (:issue:`13219`) +- Bug in ``pd.read_csv()`` that prevents ``usecols`` from being an empty set (:issue:`13402`) +- Bug in ``pd.read_csv()`` in the C engine where the NULL character was not being parsed as NULL (:issue:`14012`) +- Bug in ``pd.read_csv()`` with ``engine='c'`` in which NULL ``quotechar`` was not accepted even though ``quoting`` was specified as ``None`` (:issue:`13411`) +- Bug in ``pd.read_csv()`` with ``engine='c'`` in which fields were not properly cast to float when quoting was specified as non-numeric (:issue:`13411`) +- Bug in ``pd.read_csv()`` in Python 2.x with non-UTF8 encoded, multi-character separated data (:issue:`3404`) +- Bug in ``pd.read_csv()``, where aliases for utf-xx (e.g. UTF-xx, UTF_xx, utf_xx) raised UnicodeDecodeError (:issue:`13549`) +- Bug in ``pd.read_csv``, ``pd.read_table``, ``pd.read_fwf``, ``pd.read_stata`` and ``pd.read_sas`` where files were opened by parsers but not closed if both ``chunksize`` and ``iterator`` were ``None``. (:issue:`13940`) +- Bug in ``StataReader``, ``StataWriter``, ``XportReader`` and ``SAS7BDATReader`` where a file was not properly closed when an error was raised. (:issue:`13940`) +- Bug in ``pd.pivot_table()`` where ``margins_name`` is ignored when ``aggfunc`` is a list (:issue:`13354`) +- Bug in ``pd.Series.str.zfill``, ``center``, ``ljust``, ``rjust``, and ``pad`` when passing non-integers, did not raise ``TypeError`` (:issue:`13598`) +- Bug in checking for any null objects in a ``TimedeltaIndex``, which always returned ``True`` (:issue:`13603`) +- Bug in ``Series`` arithmetic raises ``TypeError`` if it contains datetime-like as ``object`` dtype (:issue:`13043`) +- Bug ``Series.isnull()`` and ``Series.notnull()`` ignore ``Period('NaT')`` (:issue:`13737`) +- Bug ``Series.fillna()`` and ``Series.dropna()`` don't affect to ``Period('NaT')`` (:issue:`13737` +- Bug in ``.fillna(value=np.nan)`` incorrectly raises ``KeyError`` on a ``category`` dtyped ``Series`` (:issue:`14021`) +- Bug in extension dtype creation where the created types were not is/identical (:issue:`13285`) +- Bug in ``.resample(..)`` where incorrect warnings were triggered by IPython introspection (:issue:`13618`) +- Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`) +- Bug in ``Series`` comparison may output incorrect result if rhs contains ``NaT`` (:issue:`9005`) +- Bug in ``Series`` and ``Index`` comparison may output incorrect result if it contains ``NaT`` with ``object`` dtype (:issue:`13592`) +- Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`) +- Bug in ``Period`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`) +- Bug in ``pd.set_eng_float_format()`` that would prevent NaN and Inf from formatting (:issue:`11981`) +- Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`) +- Clean some compile time warnings in datetime parsing (:issue:`13607`) +- Bug in ``factorize`` raises ``AmbiguousTimeError`` if data contains datetime near DST boundary (:issue:`13750`) +- Bug in ``.set_index`` raises ``AmbiguousTimeError`` if new index contains DST boundary and multi levels (:issue:`12920`) +- Bug in ``.shift`` raises ``AmbiguousTimeError`` if data contains datetime near DST boundary (:issue:`13926`) +- Bug in ``pd.read_hdf()`` returns incorrect result when a ``DataFrame`` with a ``categorical`` column and a query which doesn't match any values (:issue:`13792`) +- Bug in ``.iloc`` when indexing with a non lexsorted MultiIndex (:issue:`13797`) +- Bug in ``.loc`` when indexing with date strings in a reverse sorted ``DatetimeIndex`` (:issue:`14316`) +- Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`) +- Bug in ``.combine_first`` may return incorrect ``dtype`` (:issue:`7630`, :issue:`10567`) +- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`) +- Bug in ``groupby(..).nth()`` where the group key is included inconsistently if called after ``.head()/.tail()`` (:issue:`12839`) +- Bug in ``.to_html``, ``.to_latex`` and ``.to_string`` silently ignore custom datetime formatter passed through the ``formatters`` key word (:issue:`10690`) +- Bug in ``DataFrame.iterrows()``, not yielding a ``Series`` subclasse if defined (:issue:`13977`) +- Bug in ``pd.to_numeric`` when ``errors='coerce'`` and input contains non-hashable objects (:issue:`13324`) +- Bug in invalid ``Timedelta`` arithmetic and comparison may raise ``ValueError`` rather than ``TypeError`` (:issue:`13624`) +- Bug in invalid datetime parsing in ``to_datetime`` and ``DatetimeIndex`` may raise ``TypeError`` rather than ``ValueError`` (:issue:`11169`, :issue:`11287`) +- Bug in ``Index`` created with tz-aware ``Timestamp`` and mismatched ``tz`` option incorrectly coerces timezone (:issue:`13692`) +- Bug in ``DatetimeIndex`` with nanosecond frequency does not include timestamp specified with ``end`` (:issue:`13672`) +- Bug in ```Series`` when setting a slice with a ``np.timedelta64`` (:issue:`14155`) +- Bug in ``Index`` raises ``OutOfBoundsDatetime`` if ``datetime`` exceeds ``datetime64[ns]`` bounds, rather than coercing to ``object`` dtype (:issue:`13663`) +- Bug in ``Index`` may ignore specified ``datetime64`` or ``timedelta64`` passed as ``dtype`` (:issue:`13981`) +- Bug in ``RangeIndex`` can be created without no arguments rather than raises ``TypeError`` (:issue:`13793`) +- Bug in ``.value_counts()`` raises ``OutOfBoundsDatetime`` if data exceeds ``datetime64[ns]`` bounds (:issue:`13663`) +- Bug in ``DatetimeIndex`` may raise ``OutOfBoundsDatetime`` if input ``np.datetime64`` has other unit than ``ns`` (:issue:`9114`) +- Bug in ``Series`` creation with ``np.datetime64`` which has other unit than ``ns`` as ``object`` dtype results in incorrect values (:issue:`13876`) +- Bug in ``resample`` with timedelta data where data was casted to float (:issue:`13119`). +- Bug in ``pd.isnull()`` ``pd.notnull()`` raise ``TypeError`` if input datetime-like has other unit than ``ns`` (:issue:`13389`) +- Bug in ``pd.merge()`` may raise ``TypeError`` if input datetime-like has other unit than ``ns`` (:issue:`13389`) +- Bug in ``HDFStore``/``read_hdf()`` discarded ``DatetimeIndex.name`` if ``tz`` was set (:issue:`13884`) +- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`) +- Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`) +- Bug in ``df.groupby(...)[...]`` where getitem with ``Int64Index`` raised an error (:issue:`13731`) +- Bug in the CSS classes assigned to ``DataFrame.style`` for index names. Previously they were assigned ``"col_heading level col"`` where ``n`` was the number of levels + 1. Now they are assigned ``"index_name level"``, where ``n`` is the correct level for that MultiIndex. +- Bug where ``pd.read_gbq()`` could throw ``ImportError: No module named discovery`` as a result of a naming conflict with another python package called apiclient (:issue:`13454`) +- Bug in ``Index.union`` returns an incorrect result with a named empty index (:issue:`13432`) +- Bugs in ``Index.difference`` and ``DataFrame.join`` raise in Python3 when using mixed-integer indexes (:issue:`13432`, :issue:`12814`) +- Bug in subtract tz-aware ``datetime.datetime`` from tz-aware ``datetime64`` series (:issue:`14088`) +- Bug in ``.to_excel()`` when DataFrame contains a MultiIndex which contains a label with a NaN value (:issue:`13511`) +- Bug in invalid frequency offset string like "D1", "-2-3H" may not raise ``ValueError`` (:issue:`13930`) +- Bug in ``concat`` and ``groupby`` for hierarchical frames with ``RangeIndex`` levels (:issue:`13542`). +- Bug in ``Series.str.contains()`` for Series containing only ``NaN`` values of ``object`` dtype (:issue:`14171`) +- Bug in ``agg()`` function on groupby dataframe changes dtype of ``datetime64[ns]`` column to ``float64`` (:issue:`12821`) +- Bug in using NumPy ufunc with ``PeriodIndex`` to add or subtract integer raise ``IncompatibleFrequency``. Note that using standard operator like ``+`` or ``-`` is recommended, because standard operators use more efficient path (:issue:`13980`) +- Bug in operations on ``NaT`` returning ``float`` instead of ``datetime64[ns]`` (:issue:`12941`) +- Bug in ``Series`` flexible arithmetic methods (like ``.add()``) raises ``ValueError`` when ``axis=None`` (:issue:`13894`) +- Bug in ``DataFrame.to_csv()`` with ``MultiIndex`` columns in which a stray empty line was added (:issue:`6618`) +- Bug in ``DatetimeIndex``, ``TimedeltaIndex`` and ``PeriodIndex.equals()`` may return ``True`` when input isn't ``Index`` but contains the same values (:issue:`13107`) +- Bug in assignment against datetime with timezone may not work if it contains datetime near DST boundary (:issue:`14146`) +- Bug in ``pd.eval()`` and ``HDFStore`` query truncating long float literals with python 2 (:issue:`14241`) +- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`) +- Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`) +- Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment. +- Bug in ``eval()`` where the ``resolvers`` argument would not accept a list (:issue:`14095`) +- Bugs in ``stack``, ``get_dummies``, ``make_axis_dummies`` which don't preserve categorical dtypes in (multi)indexes (:issue:`13854`) +- ``PeriodIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) +- Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`) +- Bug in ``Index.copy()`` where ``name`` parameter was ignored (:issue:`14302`) + + +.. _whatsnew_0.19.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.18.1..v0.19.0 diff --git a/doc/source/whatsnew/v0.19.1.rst b/doc/source/whatsnew/v0.19.1.rst new file mode 100644 index 00000000..a89d1461 --- /dev/null +++ b/doc/source/whatsnew/v0.19.1.rst @@ -0,0 +1,77 @@ +.. _whatsnew_0191: + +v0.19.1 (November 3, 2016) +-------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a minor bug-fix release from 0.19.0 and includes some small regression fixes, +bug fixes and performance improvements. +We recommend that all users upgrade to this version. + +.. contents:: What's new in v0.19.1 + :local: + :backlinks: none + + +.. _whatsnew_0191.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Fixed performance regression in factorization of ``Period`` data (:issue:`14338`) +- Fixed performance regression in ``Series.asof(where)`` when ``where`` is a scalar (:issue:`14461`) +- Improved performance in ``DataFrame.asof(where)`` when ``where`` is a scalar (:issue:`14461`) +- Improved performance in ``.to_json()`` when ``lines=True`` (:issue:`14408`) +- Improved performance in certain types of `loc` indexing with a MultiIndex (:issue:`14551`). + + +.. _whatsnew_0191.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Source installs from PyPI will now again work without ``cython`` installed, as in previous versions (:issue:`14204`) +- Compat with Cython 0.25 for building (:issue:`14496`) +- Fixed regression where user-provided file handles were closed in ``read_csv`` (c engine) (:issue:`14418`). +- Fixed regression in ``DataFrame.quantile`` when missing values where present in some columns (:issue:`14357`). +- Fixed regression in ``Index.difference`` where the ``freq`` of a ``DatetimeIndex`` was incorrectly set (:issue:`14323`) +- Added back ``pandas.core.common.array_equivalent`` with a deprecation warning (:issue:`14555`). +- Bug in ``pd.read_csv`` for the C engine in which quotation marks were improperly parsed in skipped rows (:issue:`14459`) +- Bug in ``pd.read_csv`` for Python 2.x in which Unicode quote characters were no longer being respected (:issue:`14477`) +- Fixed regression in ``Index.append`` when categorical indices were appended (:issue:`14545`). +- Fixed regression in ``pd.DataFrame`` where constructor fails when given dict with ``None`` value (:issue:`14381`) +- Fixed regression in ``DatetimeIndex._maybe_cast_slice_bound`` when index is empty (:issue:`14354`). +- Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`) +- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`) +- Bug in string indexing against data with ``object`` ``Index`` may raise ``AttributeError`` (:issue:`14424`) +- Correctly raise ``ValueError`` on empty input to ``pd.eval()`` and ``df.query()`` (:issue:`13139`) +- Bug in ``RangeIndex.intersection`` when result is a empty set (:issue:`14364`). +- Bug in groupby-transform broadcasting that could cause incorrect dtype coercion (:issue:`14457`) +- Bug in ``Series.__setitem__`` which allowed mutating read-only arrays (:issue:`14359`). +- Bug in ``DataFrame.insert`` where multiple calls with duplicate columns can fail (:issue:`14291`) +- ``pd.merge()`` will raise ``ValueError`` with non-boolean parameters in passed boolean type arguments (:issue:`14434`) +- Bug in ``Timestamp`` where dates very near the minimum (1677-09) could underflow on creation (:issue:`14415`) +- Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`) +- Bug in ``pd.concat`` where ``axis`` cannot take string parameters ``'rows'`` or ``'columns'`` (:issue:`14369`) +- Bug in ``pd.concat`` with dataframes heterogeneous in length and tuple ``keys`` (:issue:`14438`) +- Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`) +- Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`) +- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue:`14327`) +- Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`) +- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns`` + is not scalar and ``values`` is not specified (:issue:`14380`) + + +.. _whatsnew_0.19.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.19.0..v0.19.1 diff --git a/doc/source/whatsnew/v0.19.2.rst b/doc/source/whatsnew/v0.19.2.rst new file mode 100644 index 00000000..023bc780 --- /dev/null +++ b/doc/source/whatsnew/v0.19.2.rst @@ -0,0 +1,98 @@ +.. _whatsnew_0192: + +v0.19.2 (December 24, 2016) +--------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a minor bug-fix release in the 0.19.x series and includes some small regression fixes, +bug fixes and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + +- Compatibility with Python 3.6 +- Added a `Pandas Cheat Sheet `__. (:issue:`13202`). + + +.. contents:: What's new in v0.19.2 + :local: + :backlinks: none + + +.. _whatsnew_0192.enhancements: + +Enhancements +~~~~~~~~~~~~ + +The ``pd.merge_asof()``, added in 0.19.0, gained some improvements: + +- ``pd.merge_asof()`` gained ``left_index``/``right_index`` and ``left_by``/``right_by`` arguments (:issue:`14253`) +- ``pd.merge_asof()`` can take multiple columns in ``by`` parameter and has specialized dtypes for better performance (:issue:`13936`) + + +.. _whatsnew_0192.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Performance regression with ``PeriodIndex`` (:issue:`14822`) +- Performance regression in indexing with getitem (:issue:`14930`) +- Improved performance of ``.replace()`` (:issue:`12745`) +- Improved performance ``Series`` creation with a datetime index and dictionary data (:issue:`14894`) + + +.. _whatsnew_0192.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Compat with python 3.6 for pickling of some offsets (:issue:`14685`) +- Compat with python 3.6 for some indexing exception types (:issue:`14684`, :issue:`14689`) +- Compat with python 3.6 for deprecation warnings in the test suite (:issue:`14681`) +- Compat with python 3.6 for Timestamp pickles (:issue:`14689`) +- Compat with ``dateutil==2.6.0``; segfault reported in the testing suite (:issue:`14621`) +- Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`) +- Bug in ``pd.read_csv`` in which aliasing was being done for ``na_values`` when passed in as a dictionary (:issue:`14203`) +- Bug in ``pd.read_csv`` in which column indices for a dict-like ``na_values`` were not being respected (:issue:`14203`) +- Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`) +- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`) +- Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and ``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when reading a SAS file incrementally. +- Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when ``skipfooter`` was not being respected by Python's CSV library (:issue:`13879`) +- Bug in ``.fillna()`` in which timezone aware datetime64 values were incorrectly rounded (:issue:`14872`) +- Bug in ``.groupby(..., sort=True)`` of a non-lexsorted MultiIndex when grouping with multiple levels (:issue:`14776`) +- Bug in ``pd.cut`` with negative values and a single bin (:issue:`14652`) +- Bug in ``pd.to_numeric`` where a 0 was not unsigned on a ``downcast='unsigned'`` argument (:issue:`14401`) +- Bug in plotting regular and irregular timeseries using shared axes + (``sharex=True`` or ``ax.twinx()``) (:issue:`13341`, :issue:`14322`). +- Bug in not propagating exceptions in parsing invalid datetimes, noted in python 3.6 (:issue:`14561`) +- Bug in resampling a ``DatetimeIndex`` in local TZ, covering a DST change, which would raise ``AmbiguousTimeError`` (:issue:`14682`) +- Bug in indexing that transformed ``RecursionError`` into ``KeyError`` or ``IndexingError`` (:issue:`14554`) +- Bug in ``HDFStore`` when writing a ``MultiIndex`` when using ``data_columns=True`` (:issue:`14435`) +- Bug in ``HDFStore.append()`` when writing a ``Series`` and passing a ``min_itemsize`` argument containing a value for the ``index`` (:issue:`11412`) +- Bug when writing to a ``HDFStore`` in ``table`` format with a ``min_itemsize`` value for the ``index`` and without asking to append (:issue:`10381`) +- Bug in ``Series.groupby.nunique()`` raising an ``IndexError`` for an empty ``Series`` (:issue:`12553`) +- Bug in ``DataFrame.nlargest`` and ``DataFrame.nsmallest`` when the index had duplicate values (:issue:`13412`) +- Bug in clipboard functions on linux with python2 with unicode and separators (:issue:`13747`) +- Bug in clipboard functions on Windows 10 and python 3 (:issue:`14362`, :issue:`12807`) +- Bug in ``.to_clipboard()`` and Excel compat (:issue:`12529`) +- Bug in ``DataFrame.combine_first()`` for integer columns (:issue:`14687`). +- Bug in ``pd.read_csv()`` in which the ``dtype`` parameter was not being respected for empty data (:issue:`14712`) +- Bug in ``pd.read_csv()`` in which the ``nrows`` parameter was not being respected for large input when using the C engine for parsing (:issue:`7626`) +- Bug in ``pd.merge_asof()`` could not handle timezone-aware DatetimeIndex when a tolerance was specified (:issue:`14844`) +- Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`) +- Bug in ``.plot(kind='kde')`` which did not drop missing values to generate the KDE Plot, instead generating an empty plot. (:issue:`14821`) +- Bug in ``unstack()`` if called with a list of column(s) as an argument, regardless of the dtypes of all columns, they get coerced to ``object`` (:issue:`11847`) + + +.. _whatsnew_0.19.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.19.1..v0.19.2 diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst new file mode 100644 index 00000000..ceb1c7f2 --- /dev/null +++ b/doc/source/whatsnew/v0.20.0.rst @@ -0,0 +1,1780 @@ +.. _whatsnew_0200: + +v0.20.1 (May 5, 2017) +--------------------- + +{{ header }} + +This is a major release from 0.19.2 and includes a number of API changes, deprecations, new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +- New ``.agg()`` API for Series/DataFrame similar to the groupby-rolling-resample API's, see :ref:`here ` +- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. +- The ``.ix`` indexer has been deprecated, see :ref:`here ` +- ``Panel`` has been deprecated, see :ref:`here ` +- Addition of an ``IntervalIndex`` and ``Interval`` scalar type, see :ref:`here ` +- Improved user API when grouping by index levels in ``.groupby()``, see :ref:`here ` +- Improved support for ``UInt64`` dtypes, see :ref:`here ` +- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec and that gives the possibility for a more interactive repr in the Jupyter Notebook, see :ref:`here ` +- Experimental support for exporting styled DataFrames (``DataFrame.style``) to Excel, see :ref:`here ` +- Window binary corr/cov operations now return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here ` +- Support for S3 handling now uses ``s3fs``, see :ref:`here ` +- Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` + +.. warning:: + + Pandas has changed the internal structure and layout of the code base. + This can affect imports that are not from the top-level ``pandas.*`` namespace, please see the changes :ref:`here `. + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. note:: + + This is a combined release for 0.20.0 and 0.20.1. + Version 0.20.1 contains one additional change for backwards-compatibility with downstream projects using pandas' ``utils`` routines. (:issue:`16250`) + +.. contents:: What's new in v0.20.0 + :local: + :backlinks: none + +.. _whatsnew_0200.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0200.enhancements.agg: + +``agg`` API for DataFrame/Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Series & DataFrame have been enhanced to support the aggregation API. This is a familiar API +from groupby, window operations, and resampling. This allows aggregation operations in a concise way +by using :meth:`~DataFrame.agg` and :meth:`~DataFrame.transform`. The full documentation +is :ref:`here ` (:issue:`1623`). + +Here is a sample + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], + index=pd.date_range('1/1/2000', periods=10)) + df.iloc[3:7] = np.nan + df + +One can operate using string function names, callables, lists, or dictionaries of these. + +Using a single function is equivalent to ``.apply``. + +.. ipython:: python + + df.agg('sum') + +Multiple aggregations with a list of functions. + +.. ipython:: python + + df.agg(['sum', 'min']) + +Using a dict provides the ability to apply specific aggregations per column. +You will get a matrix-like output of all of the aggregators. The output has one column +per unique function. Those functions applied to a particular column will be ``NaN``: + +.. ipython:: python + + df.agg({'A': ['sum', 'min'], 'B': ['min', 'max']}) + +The API also supports a ``.transform()`` function for broadcasting results. + +.. ipython:: python + :okwarning: + + df.transform(['abs', lambda x: x - x.min()]) + +When presented with mixed dtypes that cannot be aggregated, ``.agg()`` will only take the valid +aggregations. This is similar to how groupby ``.agg()`` works. (:issue:`15015`) + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': [1., 2., 3.], + 'C': ['foo', 'bar', 'baz'], + 'D': pd.date_range('20130101', periods=3)}) + df.dtypes + +.. ipython:: python + + df.agg(['min', 'sum']) + +.. _whatsnew_0200.enhancements.dataio_dtype: + +``dtype`` keyword for data IO +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``'python'`` engine for :func:`read_csv`, as well as the :func:`read_fwf` function for parsing +fixed-width text files and :func:`read_excel` for parsing Excel files, now accept the ``dtype`` keyword argument for specifying the types of specific columns (:issue:`14295`). See the :ref:`io docs ` for more information. + +.. ipython:: python + :suppress: + + from io import StringIO + +.. ipython:: python + + data = "a b\n1 2\n3 4" + pd.read_fwf(StringIO(data)).dtypes + pd.read_fwf(StringIO(data), dtype={'a': 'float64', 'b': 'object'}).dtypes + +.. _whatsnew_0120.enhancements.datetime_origin: + +``.to_datetime()`` has gained an ``origin`` parameter +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`to_datetime` has gained a new parameter, ``origin``, to define a reference date +from where to compute the resulting timestamps when parsing numerical values with a specific ``unit`` specified. (:issue:`11276`, :issue:`11745`) + +For example, with 1960-01-01 as the starting date: + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01')) + +The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``, which is +commonly called 'unix epoch' or POSIX time. This was the previous default, so this is a backward compatible change. + +.. ipython:: python + + pd.to_datetime([1, 2, 3], unit='D') + + +.. _whatsnew_0200.enhancements.groupby_access: + +Groupby enhancements +^^^^^^^^^^^^^^^^^^^^ + +Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now reference either column names or index level names. Previously, only column names could be referenced. This allows to easily group by a column and index level at the same time. (:issue:`5677`) + +.. ipython:: python + + arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + + index = pd.MultiIndex.from_arrays(arrays, names=['first', 'second']) + + df = pd.DataFrame({'A': [1, 1, 1, 1, 2, 2, 3, 3], + 'B': np.arange(8)}, + index=index) + df + + df.groupby(['second', 'A']).sum() + + +.. _whatsnew_0200.enhancements.compressed_urls: + +Better support for compressed URLs in ``read_csv`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The compression code was refactored (:issue:`12688`). As a result, reading +dataframes from URLs in :func:`read_csv` or :func:`read_table` now supports +additional compression methods: ``xz``, ``bz2``, and ``zip`` (:issue:`14570`). +Previously, only ``gzip`` compression was supported. By default, compression of +URLs and paths are now inferred using their file extensions. Additionally, +support for bz2 compression in the python 2 C-engine improved (:issue:`14874`). + +.. ipython:: python + + url = ('https://github.com/{repo}/raw/{branch}/{path}' + .format(repo='pandas-dev/pandas', + branch='master', + path='pandas/tests/io/parser/data/salaries.csv.bz2')) + # default, infer compression + df = pd.read_csv(url, sep='\t', compression='infer') + # explicitly specify compression + df = pd.read_csv(url, sep='\t', compression='bz2') + df.head(2) + +.. _whatsnew_0200.enhancements.pickle_compression: + +Pickle file I/O now supports compression +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`read_pickle`, :meth:`DataFrame.to_pickle` and :meth:`Series.to_pickle` +can now read from and write to compressed pickle files. Compression methods +can be an explicit parameter or be inferred from the file extension. +See :ref:`the docs here. ` + +.. ipython:: python + + df = pd.DataFrame({'A': np.random.randn(1000), + 'B': 'foo', + 'C': pd.date_range('20130101', periods=1000, freq='s')}) + +Using an explicit compression type + +.. ipython:: python + + df.to_pickle("data.pkl.compress", compression="gzip") + rt = pd.read_pickle("data.pkl.compress", compression="gzip") + rt.head() + +The default is to infer the compression type from the extension (``compression='infer'``): + +.. ipython:: python + + df.to_pickle("data.pkl.gz") + rt = pd.read_pickle("data.pkl.gz") + rt.head() + df["A"].to_pickle("s1.pkl.bz2") + rt = pd.read_pickle("s1.pkl.bz2") + rt.head() + +.. ipython:: python + :suppress: + + import os + os.remove("data.pkl.compress") + os.remove("data.pkl.gz") + os.remove("s1.pkl.bz2") + +.. _whatsnew_0200.enhancements.uint64_support: + +UInt64 support improved +^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas has significantly improved support for operations involving unsigned, +or purely non-negative, integers. Previously, handling these integers would +result in improper rounding or data-type casting, leading to incorrect results. +Notably, a new numerical index, ``UInt64Index``, has been created (:issue:`14937`) + +.. ipython:: python + + idx = pd.UInt64Index([1, 2, 3]) + df = pd.DataFrame({'A': ['a', 'b', 'c']}, index=idx) + df.index + +- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`, :issue:`14982`) +- Bug in ``Series.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14721`) +- Bug in ``DataFrame`` construction in which unsigned 64-bit integer elements were being converted to objects (:issue:`14881`) +- Bug in ``pd.read_csv()`` in which unsigned 64-bit integer elements were being improperly converted to the wrong data types (:issue:`14983`) +- Bug in ``pd.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14915`) +- Bug in ``pd.value_counts()`` in which unsigned 64-bit integers were being erroneously truncated in the output (:issue:`14934`) + +.. _whatsnew_0200.enhancements.groupy_categorical: + +GroupBy on categoricals +^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueError`` when grouping on a categorical series with some categories not appearing in the data. (:issue:`13179`) + +.. ipython:: python + + chromosomes = np.r_[np.arange(1, 23).astype(str), ['X', 'Y']] + df = pd.DataFrame({ + 'A': np.random.randint(100), + 'B': np.random.randint(100), + 'C': np.random.randint(100), + 'chromosomes': pd.Categorical(np.random.choice(chromosomes, 100), + categories=chromosomes, + ordered=True)}) + df + +**Previous behavior**: + +.. code-block:: ipython + + In [3]: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() + --------------------------------------------------------------------------- + ValueError: items in new_categories are not the same as in old categories + +**New behavior**: + +.. ipython:: python + + df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() + +.. _whatsnew_0200.enhancements.table_schema: + +Table schema output +^^^^^^^^^^^^^^^^^^^ + +The new orient ``'table'`` for :meth:`DataFrame.to_json` +will generate a `Table Schema`_ compatible string representation of +the data. + +.. ipython:: python + + df = pd.DataFrame( + {'A': [1, 2, 3], + 'B': ['a', 'b', 'c'], + 'C': pd.date_range('2016-01-01', freq='d', periods=3)}, + index=pd.Index(range(3), name='idx')) + df + df.to_json(orient='table') + + +See :ref:`IO: Table Schema for more information `. + +Additionally, the repr for ``DataFrame`` and ``Series`` can now publish +this JSON Table schema representation of the Series or DataFrame if you are +using IPython (or another frontend like `nteract`_ using the Jupyter messaging +protocol). +This gives frontends like the Jupyter notebook and `nteract`_ +more flexibility in how they display pandas objects, since they have +more information about the data. +You must enable this by setting the ``display.html.table_schema`` option to ``True``. + +.. _Table Schema: http://specs.frictionlessdata.io/json-table-schema/ +.. _nteract: http://nteract.io/ + +.. _whatsnew_0200.enhancements.scipy_sparse: + +SciPy sparse matrix from/to SparseDataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas now supports creating sparse dataframes directly from ``scipy.sparse.spmatrix`` instances. +See the :ref:`documentation ` for more information. (:issue:`4343`) + +All sparse formats are supported, but matrices that are not in :mod:`COOrdinate ` format will be converted, copying data as needed. + +.. code-block:: python + + from scipy.sparse import csr_matrix + arr = np.random.random(size=(1000, 5)) + arr[arr < .9] = 0 + sp_arr = csr_matrix(arr) + sp_arr + sdf = pd.SparseDataFrame(sp_arr) + sdf + +To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you can use: + +.. code-block:: python + + sdf.to_coo() + +.. _whatsnew_0200.enhancements.style_excel: + +Excel output for styled DataFrames +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Experimental support has been added to export ``DataFrame.style`` formats to Excel using the ``openpyxl`` engine. (:issue:`15530`) + +For example, after running the following, ``styled.xlsx`` renders as below: + +.. ipython:: python + :okwarning: + + np.random.seed(24) + df = pd.DataFrame({'A': np.linspace(1, 10, 10)}) + df = pd.concat([df, pd.DataFrame(np.random.RandomState(24).randn(10, 4), + columns=list('BCDE'))], + axis=1) + df.iloc[0, 2] = np.nan + df + styled = (df.style + .applymap(lambda val: 'color: %s' % 'red' if val < 0 else 'black') + .highlight_max()) + styled.to_excel('styled.xlsx', engine='openpyxl') + +.. image:: ../_static/style-excel.png + +.. ipython:: python + :suppress: + + import os + os.remove('styled.xlsx') + +See the :ref:`Style documentation ` for more detail. + +.. _whatsnew_0200.enhancements.intervalindex: + +IntervalIndex +^^^^^^^^^^^^^ + +pandas has gained an ``IntervalIndex`` with its own dtype, ``interval`` as well as the ``Interval`` scalar type. These allow first-class support for interval +notation, specifically as a return type for the categories in :func:`cut` and :func:`qcut`. The ``IntervalIndex`` allows some unique indexing, see the +:ref:`docs `. (:issue:`7640`, :issue:`8625`) + +.. warning:: + + These indexing behaviors of the IntervalIndex are provisional and may change in a future version of pandas. Feedback on usage is welcome. + + +Previous behavior: + +The returned categories were strings, representing Intervals + +.. code-block:: ipython + + In [1]: c = pd.cut(range(4), bins=2) + + In [2]: c + Out[2]: + [(-0.003, 1.5], (-0.003, 1.5], (1.5, 3], (1.5, 3]] + Categories (2, object): [(-0.003, 1.5] < (1.5, 3]] + + In [3]: c.categories + Out[3]: Index(['(-0.003, 1.5]', '(1.5, 3]'], dtype='object') + +New behavior: + +.. ipython:: python + + c = pd.cut(range(4), bins=2) + c + c.categories + +Furthermore, this allows one to bin *other* data with these same bins, with ``NaN`` representing a missing +value similar to other dtypes. + +.. ipython:: python + + pd.cut([0, 3, 5, 1], bins=c.categories) + +An ``IntervalIndex`` can also be used in ``Series`` and ``DataFrame`` as the index. + +.. ipython:: python + + df = pd.DataFrame({'A': range(4), + 'B': pd.cut([0, 3, 1, 1], bins=c.categories) + }).set_index('B') + df + +Selecting via a specific interval: + +.. ipython:: python + + df.loc[pd.Interval(1.5, 3.0)] + +Selecting via a scalar value that is contained *in* the intervals. + +.. ipython:: python + + df.loc[0] + +.. _whatsnew_0200.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- ``DataFrame.rolling()`` now accepts the parameter ``closed='right'|'left'|'both'|'neither'`` to choose the rolling window-endpoint closedness. See the :ref:`documentation ` (:issue:`13965`) +- Integration with the ``feather-format``, including a new top-level ``pd.read_feather()`` and ``DataFrame.to_feather()`` method, see :ref:`here `. +- ``Series.str.replace()`` now accepts a callable, as replacement, which is passed to ``re.sub`` (:issue:`15055`) +- ``Series.str.replace()`` now accepts a compiled regular expression as a pattern (:issue:`15446`) +- ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`) +- ``DataFrame`` and ``DataFrame.groupby()`` have gained a ``nunique()`` method to count the distinct values over an axis (:issue:`14336`, :issue:`15197`). +- ``DataFrame`` has gained a ``melt()`` method, equivalent to ``pd.melt()``, for unpivoting from a wide to long format (:issue:`12640`). +- ``pd.read_excel()`` now preserves sheet order when using ``sheetname=None`` (:issue:`9930`) +- Multiple offset aliases with decimal points are now supported (e.g. ``0.5min`` is parsed as ``30s``) (:issue:`8419`) +- ``.isnull()`` and ``.notnull()`` have been added to ``Index`` object to make them more consistent with the ``Series`` API (:issue:`15300`) +- New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an + unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack + of sorting or an incorrect key. See :ref:`here ` +- ``MultiIndex`` has gained a ``.to_frame()`` method to convert to a ``DataFrame`` (:issue:`12397`) +- ``pd.cut`` and ``pd.qcut`` now support datetime64 and timedelta64 dtypes (:issue:`14714`, :issue:`14798`) +- ``pd.qcut`` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`7751`) +- ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`) +- The ``usecols`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`14154`) +- The ``skiprows`` argument in ``pd.read_csv()`` now accepts a callable function as a value (:issue:`10882`) +- The ``nrows`` and ``chunksize`` arguments in ``pd.read_csv()`` are supported if both are passed (:issue:`6774`, :issue:`15755`) +- ``DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`) +- ``DataFrame.plot`` can pass the matplotlib 2.0 default color cycle as a single string as color parameter, see `here `__. (:issue:`15516`) +- ``Series.interpolate()`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`) +- Addition of a ``level`` keyword to ``DataFrame/Series.rename`` to rename + labels in the specified level of a MultiIndex (:issue:`4160`). +- ``DataFrame.reset_index()`` will now interpret a tuple ``index.name`` as a key spanning across levels of ``columns``, if this is a ``MultiIndex`` (:issue:`16164`) +- ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs ` (:issue:`15136`) +- ``.select_dtypes()`` now allows the string ``datetimetz`` to generically select datetimes with tz (:issue:`14910`) +- The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements +- ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`) +- ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`). +- ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`). +- :func:`pandas.util.hash_pandas_object` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`) +- ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`) +- ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`) +- ``pd.read_html()`` will parse multiple header rows, creating a MultiIndex header. (:issue:`13434`). +- HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`) +- :class:`pandas.io.formats.style.Styler` template now has blocks for easier extension, see the :ref:`example notebook ` (:issue:`15649`) +- :meth:`Styler.render() ` now accepts ``**kwargs`` to allow user-defined variables in the template (:issue:`15649`) +- Compatibility with Jupyter notebook 5.0; MultiIndex column labels are left-aligned and MultiIndex row-labels are top-aligned (:issue:`15379`) +- ``TimedeltaIndex`` now has a custom date-tick formatter specifically designed for nanosecond level precision (:issue:`8711`) +- ``pd.api.types.union_categoricals`` gained the ``ignore_ordered`` argument to allow ignoring the ordered attribute of unioned categoricals (:issue:`13410`). See the :ref:`categorical union docs ` for more information. +- ``DataFrame.to_latex()`` and ``DataFrame.to_string()`` now allow optional header aliases. (:issue:`15536`) +- Re-enable the ``parse_dates`` keyword of ``pd.read_excel()`` to parse string columns as dates (:issue:`14326`) +- Added ``.empty`` property to subclasses of ``Index``. (:issue:`15270`) +- Enabled floor division for ``Timedelta`` and ``TimedeltaIndex`` (:issue:`15828`) +- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`) +- ``pandas.io.json.json_normalize()`` with an empty ``list`` will return an empty ``DataFrame`` (:issue:`15534`) +- ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`) +- :meth:`MultiIndex.remove_unused_levels` has been added to facilitate :ref:`removing unused levels `. (:issue:`15694`) +- ``pd.read_csv()`` will now raise a ``ParserError`` error whenever any parsing error occurs (:issue:`15913`, :issue:`15925`) +- ``pd.read_csv()`` now supports the ``error_bad_lines`` and ``warn_bad_lines`` arguments for the Python parser (:issue:`15925`) +- The ``display.show_dimensions`` option can now also be used to specify + whether the length of a ``Series`` should be shown in its repr (:issue:`7117`). +- ``parallel_coordinates()`` has gained a ``sort_labels`` keyword argument that sorts class labels and the colors assigned to them (:issue:`15908`) +- Options added to allow one to turn on/off using ``bottleneck`` and ``numexpr``, see :ref:`here ` (:issue:`16157`) +- ``DataFrame.style.bar()`` now accepts two more options to further customize the bar chart. Bar alignment is set with ``align='left'|'mid'|'zero'``, the default is "left", which is backward compatible; You can now pass a list of ``color=[color_negative, color_positive]``. (:issue:`14757`) + +.. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations + + +.. _whatsnew_0200.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew.api_breaking.io_compat: + +Possible incompatibility for HDF5 formats created with pandas < 0.13.0 +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``pd.TimeSeries`` was deprecated officially in 0.17.0, though has already been an alias since 0.13.0. It has +been dropped in favor of ``pd.Series``. (:issue:`15098`). + +This *may* cause HDF5 files that were created in prior versions to become unreadable if ``pd.TimeSeries`` +was used. This is most likely to be for pandas < 0.13.0. If you find yourself in this situation. +You can use a recent prior version of pandas to read in your HDF5 files, +then write them out again after applying the procedure below. + +.. code-block:: ipython + + In [2]: s = pd.TimeSeries([1, 2, 3], index=pd.date_range('20130101', periods=3)) + + In [3]: s + Out[3]: + 2013-01-01 1 + 2013-01-02 2 + 2013-01-03 3 + Freq: D, dtype: int64 + + In [4]: type(s) + Out[4]: pandas.core.series.TimeSeries + + In [5]: s = pd.Series(s) + + In [6]: s + Out[6]: + 2013-01-01 1 + 2013-01-02 2 + 2013-01-03 3 + Freq: D, dtype: int64 + + In [7]: type(s) + Out[7]: pandas.core.series.Series + + +.. _whatsnew_0200.api_breaking.index_map: + +Map on Index types now return other Index types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``map`` on an ``Index`` now returns an ``Index``, not a numpy array (:issue:`12766`) + +.. ipython:: python + + idx = pd.Index([1, 2]) + idx + mi = pd.MultiIndex.from_tuples([(1, 2), (2, 4)]) + mi + +Previous behavior: + +.. code-block:: ipython + + In [5]: idx.map(lambda x: x * 2) + Out[5]: array([2, 4]) + + In [6]: idx.map(lambda x: (x, x * 2)) + Out[6]: array([(1, 2), (2, 4)], dtype=object) + + In [7]: mi.map(lambda x: x) + Out[7]: array([(1, 2), (2, 4)], dtype=object) + + In [8]: mi.map(lambda x: x[0]) + Out[8]: array([1, 2]) + +New behavior: + +.. ipython:: python + + idx.map(lambda x: x * 2) + idx.map(lambda x: (x, x * 2)) + + mi.map(lambda x: x) + + mi.map(lambda x: x[0]) + + +``map`` on a ``Series`` with ``datetime64`` values may return ``int64`` dtypes rather than ``int32`` + +.. ipython:: python + + s = pd.Series(pd.date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H') + .tz_localize('Asia/Tokyo')) + s + +Previous behavior: + +.. code-block:: ipython + + In [9]: s.map(lambda x: x.hour) + Out[9]: + 0 0 + 1 1 + 2 2 + dtype: int32 + +New behavior: + +.. ipython:: python + + s.map(lambda x: x.hour) + + +.. _whatsnew_0200.api_breaking.index_dt_field: + +Accessing datetime fields of Index now return Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The datetime-related attributes (see :ref:`here ` +for an overview) of ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex`` previously +returned numpy arrays. They will now return a new ``Index`` object, except +in the case of a boolean field, where the result will still be a boolean ndarray. (:issue:`15022`) + +Previous behaviour: + +.. code-block:: ipython + + In [1]: idx = pd.date_range("2015-01-01", periods=5, freq='10H') + + In [2]: idx.hour + Out[2]: array([ 0, 10, 20, 6, 16], dtype=int32) + +New behavior: + +.. ipython:: python + + idx = pd.date_range("2015-01-01", periods=5, freq='10H') + idx.hour + +This has the advantage that specific ``Index`` methods are still available on the +result. On the other hand, this might have backward incompatibilities: e.g. +compared to numpy arrays, ``Index`` objects are not mutable. To get the original +ndarray, you can always convert explicitly using ``np.asarray(idx.hour)``. + +.. _whatsnew_0200.api_breaking.unique: + +pd.unique will now be consistent with extension types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In prior versions, using :meth:`Series.unique` and :func:`pandas.unique` on ``Categorical`` and tz-aware +data-types would yield different return types. These are now made consistent. (:issue:`15903`) + +- Datetime tz-aware + + Previous behaviour: + + .. code-block:: ipython + + # Series + In [5]: pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + ...: pd.Timestamp('20160101', tz='US/Eastern')]).unique() + Out[5]: array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object) + + In [6]: pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + ...: pd.Timestamp('20160101', tz='US/Eastern')])) + Out[6]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') + + # Index + In [7]: pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + ...: pd.Timestamp('20160101', tz='US/Eastern')]).unique() + Out[7]: DatetimeIndex(['2016-01-01 00:00:00-05:00'], dtype='datetime64[ns, US/Eastern]', freq=None) + + In [8]: pd.unique([pd.Timestamp('20160101', tz='US/Eastern'), + ...: pd.Timestamp('20160101', tz='US/Eastern')]) + Out[8]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') + + New behavior: + + .. ipython:: python + + # Series, returns an array of Timestamp tz-aware + pd.Series([pd.Timestamp(r'20160101', tz=r'US/Eastern'), + pd.Timestamp(r'20160101', tz=r'US/Eastern')]).unique() + pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) + + # Index, returns a DatetimeIndex + pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) + +- Categoricals + + Previous behaviour: + + .. code-block:: ipython + + In [1]: pd.Series(list('baabc'), dtype='category').unique() + Out[1]: + [b, a, c] + Categories (3, object): [b, a, c] + + In [2]: pd.unique(pd.Series(list('baabc'), dtype='category')) + Out[2]: array(['b', 'a', 'c'], dtype=object) + + New behavior: + + .. ipython:: python + + # returns a Categorical + pd.Series(list('baabc'), dtype='category').unique() + pd.unique(pd.Series(list('baabc'), dtype='category')) + +.. _whatsnew_0200.api_breaking.s3: + +S3 file handling +^^^^^^^^^^^^^^^^ + +pandas now uses `s3fs `_ for handling S3 connections. This shouldn't break +any code. However, since ``s3fs`` is not a required dependency, you will need to install it separately, like ``boto`` +in prior versions of pandas. (:issue:`11915`). + +.. _whatsnew_0200.api_breaking.partial_string_indexing: + +Partial string indexing changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:ref:`DatetimeIndex Partial String Indexing ` now works as an exact match, provided that string resolution coincides with index resolution, including a case when both are seconds (:issue:`14826`). See :ref:`Slice vs. Exact Match ` for details. + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 2, 3]}, pd.DatetimeIndex(['2011-12-31 23:59:59', + '2012-01-01 00:00:00', + '2012-01-01 00:00:01'])) +Previous behavior: + +.. code-block:: ipython + + In [4]: df['2011-12-31 23:59:59'] + Out[4]: + a + 2011-12-31 23:59:59 1 + + In [5]: df['a']['2011-12-31 23:59:59'] + Out[5]: + 2011-12-31 23:59:59 1 + Name: a, dtype: int64 + + +New behavior: + +.. code-block:: ipython + + In [4]: df['2011-12-31 23:59:59'] + KeyError: '2011-12-31 23:59:59' + + In [5]: df['a']['2011-12-31 23:59:59'] + Out[5]: 1 + +.. _whatsnew_0200.api_breaking.concat_dtypes: + +Concat of different float dtypes will not automatically upcast +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, ``concat`` of multiple objects with different ``float`` dtypes would automatically upcast results to a dtype of ``float64``. +Now the smallest acceptable dtype will be used (:issue:`13247`) + +.. ipython:: python + + df1 = pd.DataFrame(np.array([1.0], dtype=np.float32, ndmin=2)) + df1.dtypes + + df2 = pd.DataFrame(np.array([np.nan], dtype=np.float32, ndmin=2)) + df2.dtypes + +Previous behavior: + +.. code-block:: ipython + + In [7]: pd.concat([df1, df2]).dtypes + Out[7]: + 0 float64 + dtype: object + +New behavior: + +.. ipython:: python + + pd.concat([df1, df2]).dtypes + +.. _whatsnew_0200.api_breaking.gbq: + +Pandas Google BigQuery support has moved +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +pandas has split off Google BigQuery support into a separate package ``pandas-gbq``. You can ``conda install pandas-gbq -c conda-forge`` or +``pip install pandas-gbq`` to get it. The functionality of :func:`read_gbq` and :meth:`DataFrame.to_gbq` remain the same with the +currently released version of ``pandas-gbq=0.1.4``. Documentation is now hosted `here `__ (:issue:`15347`) + +.. _whatsnew_0200.api_breaking.memory_usage: + +Memory usage for Index is more accurate +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, showing ``.memory_usage()`` on a pandas structure that has an index, would only include actual index values and not include structures that facilitated fast indexing. This will generally be different for ``Index`` and ``MultiIndex`` and less-so for other index types. (:issue:`15237`) + +Previous behavior: + +.. code-block:: ipython + + In [8]: index = pd.Index(['foo', 'bar', 'baz']) + + In [9]: index.memory_usage(deep=True) + Out[9]: 180 + + In [10]: index.get_loc('foo') + Out[10]: 0 + + In [11]: index.memory_usage(deep=True) + Out[11]: 180 + +New behavior: + +.. code-block:: ipython + + In [8]: index = pd.Index(['foo', 'bar', 'baz']) + + In [9]: index.memory_usage(deep=True) + Out[9]: 180 + + In [10]: index.get_loc('foo') + Out[10]: 0 + + In [11]: index.memory_usage(deep=True) + Out[11]: 260 + +.. _whatsnew_0200.api_breaking.sort_index: + +DataFrame.sort_index changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In certain cases, calling ``.sort_index()`` on a MultiIndexed DataFrame would return the *same* DataFrame without seeming to sort. +This would happen with a ``lexsorted``, but non-monotonic levels. (:issue:`15622`, :issue:`15687`, :issue:`14015`, :issue:`13431`, :issue:`15797`) + +This is *unchanged* from prior versions, but shown for illustration purposes: + +.. ipython:: python + + df = pd.DataFrame(np.arange(6), columns=['value'], + index=pd.MultiIndex.from_product([list('BA'), range(3)])) + df + +.. ipython:: python + + df.index.is_lexsorted() + df.index.is_monotonic + +Sorting works as expected + +.. ipython:: python + + df.sort_index() + +.. ipython:: python + + df.sort_index().index.is_lexsorted() + df.sort_index().index.is_monotonic + +However, this example, which has a non-monotonic 2nd level, +doesn't behave as desired. + +.. ipython:: python + + df = pd.DataFrame({'value': [1, 2, 3, 4]}, + index=pd.MultiIndex([['a', 'b'], ['bb', 'aa']], + [[0, 0, 1, 1], [0, 1, 0, 1]])) + df + +Previous behavior: + +.. code-block:: python + + In [11]: df.sort_index() + Out[11]: + value + a bb 1 + aa 2 + b bb 3 + aa 4 + + In [14]: df.sort_index().index.is_lexsorted() + Out[14]: True + + In [15]: df.sort_index().index.is_monotonic + Out[15]: False + +New behavior: + +.. ipython:: python + + df.sort_index() + df.sort_index().index.is_lexsorted() + df.sort_index().index.is_monotonic + + +.. _whatsnew_0200.api_breaking.groupby_describe: + +Groupby describe formatting +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The output formatting of ``groupby.describe()`` now labels the ``describe()`` metrics in the columns instead of the index. +This format is consistent with ``groupby.agg()`` when applying multiple functions at once. (:issue:`4792`) + +Previous behavior: + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) + + In [2]: df.groupby('A').describe() + Out[2]: + B + A + 1 count 2.000000 + mean 1.500000 + std 0.707107 + min 1.000000 + 25% 1.250000 + 50% 1.500000 + 75% 1.750000 + max 2.000000 + 2 count 2.000000 + mean 3.500000 + std 0.707107 + min 3.000000 + 25% 3.250000 + 50% 3.500000 + 75% 3.750000 + max 4.000000 + + In [3]: df.groupby('A').agg([np.mean, np.std, np.min, np.max]) + Out[3]: + B + mean std amin amax + A + 1 1.5 0.707107 1 2 + 2 3.5 0.707107 3 4 + +New behavior: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) + + df.groupby('A').describe() + + df.groupby('A').agg([np.mean, np.std, np.min, np.max]) + +.. _whatsnew_0200.api_breaking.rolling_pairwise: + +Window binary corr/cov operations return a MultiIndex DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A binary window operation, like ``.corr()`` or ``.cov()``, when operating on a ``.rolling(..)``, ``.expanding(..)``, or ``.ewm(..)`` object, +will now return a 2-level ``MultiIndexed DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, +see :ref:`here `. These are equivalent in function, +but a MultiIndexed ``DataFrame`` enjoys more support in pandas. +See the section on :ref:`Windowed Binary Operations ` for more information. (:issue:`15677`) + +.. ipython:: python + + np.random.seed(1234) + df = pd.DataFrame(np.random.rand(100, 2), + columns=pd.Index(['A', 'B'], name='bar'), + index=pd.date_range('20160101', + periods=100, freq='D', name='foo')) + df.tail() + +Previous behavior: + +.. code-block:: ipython + + In [2]: df.rolling(12).corr() + Out[2]: + + Dimensions: 100 (items) x 2 (major_axis) x 2 (minor_axis) + Items axis: 2016-01-01 00:00:00 to 2016-04-09 00:00:00 + Major_axis axis: A to B + Minor_axis axis: A to B + +New behavior: + +.. ipython:: python + + res = df.rolling(12).corr() + res.tail() + +Retrieving a correlation matrix for a cross-section + +.. ipython:: python + + df.rolling(12).corr().loc['2016-04-07'] + +.. _whatsnew_0200.api_breaking.hdfstore_where: + +HDFStore where string comparison +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions most types could be compared to string column in a ``HDFStore`` +usually resulting in an invalid comparison, returning an empty result frame. These comparisons will now raise a +``TypeError`` (:issue:`15492`) + +.. ipython:: python + + df = pd.DataFrame({'unparsed_date': ['2014-01-01', '2014-01-01']}) + df.to_hdf('store.h5', 'key', format='table', data_columns=True) + df.dtypes + +Previous behavior: + +.. code-block:: ipython + + In [4]: pd.read_hdf('store.h5', 'key', where='unparsed_date > ts') + File "", line 1 + (unparsed_date > 1970-01-01 00:00:01.388552400) + ^ + SyntaxError: invalid token + +New behavior: + +.. code-block:: ipython + + In [18]: ts = pd.Timestamp('2014-01-01') + + In [19]: pd.read_hdf('store.h5', 'key', where='unparsed_date > ts') + TypeError: Cannot compare 2014-01-01 00:00:00 of + type to string column + +.. ipython:: python + :suppress: + + import os + os.remove('store.h5') + +.. _whatsnew_0200.api_breaking.index_order: + +Index.intersection and inner join now preserve the order of the left Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`Index.intersection` now preserves the order of the calling ``Index`` (left) +instead of the other ``Index`` (right) (:issue:`15582`). This affects inner +joins, :meth:`DataFrame.join` and :func:`merge`, and the ``.align`` method. + +- ``Index.intersection`` + + .. ipython:: python + + left = pd.Index([2, 1, 0]) + left + right = pd.Index([1, 2, 3]) + right + + Previous behavior: + + .. code-block:: ipython + + In [4]: left.intersection(right) + Out[4]: Int64Index([1, 2], dtype='int64') + + New behavior: + + .. ipython:: python + + left.intersection(right) + +- ``DataFrame.join`` and ``pd.merge`` + + .. ipython:: python + + left = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) + left + right = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3]) + right + + Previous behavior: + + .. code-block:: ipython + + In [4]: left.join(right, how='inner') + Out[4]: + a b + 1 10 100 + 2 20 200 + + New behavior: + + .. ipython:: python + + left.join(right, how='inner') + +.. _whatsnew_0200.api_breaking.pivot_table: + +Pivot table always returns a DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The documentation for :meth:`pivot_table` states that a ``DataFrame`` is *always* returned. Here a bug +is fixed that allowed this to return a ``Series`` under certain circumstance. (:issue:`4386`) + +.. ipython:: python + + df = pd.DataFrame({'col1': [3, 4, 5], + 'col2': ['C', 'D', 'E'], + 'col3': [1, 3, 9]}) + df + +Previous behavior: + +.. code-block:: ipython + + In [2]: df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum) + Out[2]: + col3 col2 + 1 C 3 + 3 D 4 + 9 E 5 + Name: col1, dtype: int64 + +New behavior: + +.. ipython:: python + + df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum) + +.. _whatsnew_0200.api: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- ``numexpr`` version is now required to be >= 2.4.6 and it will not be used at all if this requisite is not fulfilled (:issue:`15213`). +- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv()`` and will be removed in the future (:issue:`12665`) +- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) +- ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`) +- ``Series.map()`` now respects default values of dictionary subclasses with a ``__missing__`` method, such as ``collections.Counter`` (:issue:`15999`) +- ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`) +- ``interpolate()`` and ``fillna()`` will raise a ``ValueError`` if the ``limit`` keyword argument is not greater than 0. (:issue:`9217`) +- ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`) +- ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`) +- ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) +- ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype`` +- ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`) +- Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`) +- ``Series.sort_values()`` accepts a one element list of bool for consistency with the behavior of ``DataFrame.sort_values()`` (:issue:`15604`) +- ``.merge()`` and ``.join()`` on ``category`` dtype columns will now preserve the category dtype when possible (:issue:`10409`) +- ``SparseDataFrame.default_fill_value`` will be 0, previously was ``nan`` in the return from ``pd.get_dummies(..., sparse=True)`` (:issue:`15594`) +- The default behaviour of ``Series.str.match`` has changed from extracting + groups to matching the pattern. The extracting behaviour was deprecated + since pandas version 0.13.0 and can be done with the ``Series.str.extract`` + method (:issue:`5224`). As a consequence, the ``as_indexer`` keyword is + ignored (no longer needed to specify the new behaviour) and is deprecated. +- ``NaT`` will now correctly report ``False`` for datetimelike boolean operations such as ``is_month_start`` (:issue:`15781`) +- ``NaT`` will now correctly return ``np.nan`` for ``Timedelta`` and ``Period`` accessors such as ``days`` and ``quarter`` (:issue:`15782`) +- ``NaT`` will now returns ``NaT`` for ``tz_localize`` and ``tz_convert`` + methods (:issue:`15830`) +- ``DataFrame`` and ``Panel`` constructors with invalid input will now raise ``ValueError`` rather than ``PandasError``, if called with scalar inputs and not axes (:issue:`15541`) +- ``DataFrame`` and ``Panel`` constructors with invalid input will now raise ``ValueError`` rather than ``pandas.core.common.PandasError``, if called with scalar inputs and not axes; The exception ``PandasError`` is removed as well. (:issue:`15541`) +- The exception ``pandas.core.common.AmbiguousIndexError`` is removed as it is not referenced (:issue:`15541`) + + +.. _whatsnew_0200.privacy: + +Reorganization of the library: privacy changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0200.privacy.extensions: + +Modules privacy has changed +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some formerly public python/c/c++/cython extension modules have been moved and/or renamed. These are all removed from the public API. +Furthermore, the ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are now considered to be PRIVATE. +If indicated, a deprecation warning will be issued if you reference theses modules. (:issue:`12588`) + +.. csv-table:: + :header: "Previous Location", "New Location", "Deprecated" + :widths: 30, 30, 4 + + "pandas.lib", "pandas._libs.lib", "X" + "pandas.tslib", "pandas._libs.tslib", "X" + "pandas.computation", "pandas.core.computation", "X" + "pandas.msgpack", "pandas.io.msgpack", "" + "pandas.index", "pandas._libs.index", "" + "pandas.algos", "pandas._libs.algos", "" + "pandas.hashtable", "pandas._libs.hashtable", "" + "pandas.indexes", "pandas.core.indexes", "" + "pandas.json", "pandas._libs.json / pandas.io.json", "X" + "pandas.parser", "pandas._libs.parsers", "X" + "pandas.formats", "pandas.io.formats", "" + "pandas.sparse", "pandas.core.sparse", "" + "pandas.tools", "pandas.core.reshape", "X" + "pandas.types", "pandas.core.dtypes", "X" + "pandas.io.sas.saslib", "pandas.io.sas._sas", "" + "pandas._join", "pandas._libs.join", "" + "pandas._hash", "pandas._libs.hashing", "" + "pandas._period", "pandas._libs.period", "" + "pandas._sparse", "pandas._libs.sparse", "" + "pandas._testing", "pandas._libs.testing", "" + "pandas._window", "pandas._libs.window", "" + + +Some new subpackages are created with public functionality that is not directly +exposed in the top-level namespace: ``pandas.errors``, ``pandas.plotting`` and +``pandas.testing`` (more details below). Together with ``pandas.api.types`` and +certain functions in the ``pandas.io`` and ``pandas.tseries`` submodules, +these are now the public subpackages. + +Further changes: + +- The function :func:`~pandas.api.types.union_categoricals` is now importable from ``pandas.api.types``, formerly from ``pandas.types.concat`` (:issue:`15998`) +- The type import ``pandas.tslib.NaTType`` is deprecated and can be replaced by using ``type(pandas.NaT)`` (:issue:`16146`) +- The public functions in ``pandas.tools.hashing`` deprecated from that locations, but are now importable from ``pandas.util`` (:issue:`16223`) +- The modules in ``pandas.util``: ``decorators``, ``print_versions``, ``doctools``, ``validators``, ``depr_module`` are now private. Only the functions exposed in ``pandas.util`` itself are public (:issue:`16223`) + +.. _whatsnew_0200.privacy.errors: + +``pandas.errors`` +^^^^^^^^^^^^^^^^^ + +We are adding a standard public module for all pandas exceptions & warnings ``pandas.errors``. (:issue:`14800`). Previously +these exceptions & warnings could be imported from ``pandas.core.common`` or ``pandas.io.common``. These exceptions and warnings +will be removed from the ``*.common`` locations in a future release. (:issue:`15541`) + +The following are now part of this API: + +.. code-block:: python + + ['DtypeWarning', + 'EmptyDataError', + 'OutOfBoundsDatetime', + 'ParserError', + 'ParserWarning', + 'PerformanceWarning', + 'UnsortedIndexError', + 'UnsupportedFunctionCall'] + + +.. _whatsnew_0200.privacy.testing: + +``pandas.testing`` +^^^^^^^^^^^^^^^^^^ + +We are adding a standard module that exposes the public testing functions in ``pandas.testing`` (:issue:`9895`). Those functions can be used when writing tests for functionality using pandas objects. + +The following testing functions are now part of this API: + +- :func:`testing.assert_frame_equal` +- :func:`testing.assert_series_equal` +- :func:`testing.assert_index_equal` + + +.. _whatsnew_0200.privacy.plotting: + +``pandas.plotting`` +^^^^^^^^^^^^^^^^^^^ + +A new public ``pandas.plotting`` module has been added that holds plotting functionality that was previously in either ``pandas.tools.plotting`` or in the top-level namespace. See the :ref:`deprecations sections ` for more details. + +.. _whatsnew_0200.privacy.development: + +Other Development Changes +^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`) +- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) +- Switched the test framework to use `pytest `__ (:issue:`13097`) +- Reorganization of tests directory layout (:issue:`14854`, :issue:`15707`). + + +.. _whatsnew_0200.deprecations: + +Deprecations +~~~~~~~~~~~~ + +.. _whatsnew_0200.api_breaking.deprecate_ix: + +Deprecate ``.ix`` +^^^^^^^^^^^^^^^^^ + +The ``.ix`` indexer is deprecated, in favor of the more strict ``.iloc`` and ``.loc`` indexers. ``.ix`` offers a lot of magic on the inference of what the user wants to do. To wit, ``.ix`` can decide to index *positionally* OR via *labels*, depending on the data type of the index. This has caused quite a bit of user confusion over the years. The full indexing documentation is :ref:`here `. (:issue:`14218`) + +The recommended methods of indexing are: + +- ``.loc`` if you want to *label* index +- ``.iloc`` if you want to *positionally* index. + +Using ``.ix`` will now show a ``DeprecationWarning`` with a link to some examples of how to convert code :ref:`here `. + + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': [4, 5, 6]}, + index=list('abc')) + + df + +Previous behavior, where you wish to get the 0th and the 2nd elements from the index in the 'A' column. + +.. code-block:: ipython + + In [3]: df.ix[[0, 2], 'A'] + Out[3]: + a 1 + c 3 + Name: A, dtype: int64 + +Using ``.loc``. Here we will select the appropriate indexes from the index, then use *label* indexing. + +.. ipython:: python + + df.loc[df.index[[0, 2]], 'A'] + +Using ``.iloc``. Here we will get the location of the 'A' column, then use *positional* indexing to select things. + +.. ipython:: python + + df.iloc[[0, 2], df.columns.get_loc('A')] + + +.. _whatsnew_0200.api_breaking.deprecate_panel: + +Deprecate Panel +^^^^^^^^^^^^^^^ + +``Panel`` is deprecated and will be removed in a future version. The recommended way to represent 3-D data are +with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. Pandas +provides a :meth:`~Panel.to_xarray` method to automate this conversion (:issue:`13563`). + +.. code-block:: ipython + + In [133]: import pandas._testing as tm + + In [134]: p = tm.makePanel() + + In [135]: p + Out[135]: + + Dimensions: 3 (items) x 3 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D + +Convert to a MultiIndex DataFrame + +.. code-block:: ipython + + In [136]: p.to_frame() + Out[136]: + ItemA ItemB ItemC + major minor + 2000-01-03 A 0.628776 -1.409432 0.209395 + B 0.988138 -1.347533 -0.896581 + C -0.938153 1.272395 -0.161137 + D -0.223019 -0.591863 -1.051539 + 2000-01-04 A 0.186494 1.422986 -0.592886 + B -0.072608 0.363565 1.104352 + C -1.239072 -1.449567 0.889157 + D 2.123692 -0.414505 -0.319561 + 2000-01-05 A 0.952478 -2.147855 -1.473116 + B -0.550603 -0.014752 -0.431550 + C 0.139683 -1.195524 0.288377 + D 0.122273 -1.425795 -0.619993 + + [12 rows x 3 columns] + +Convert to an xarray DataArray + +.. code-block:: ipython + + In [137]: p.to_xarray() + Out[137]: + + array([[[ 0.628776, 0.988138, -0.938153, -0.223019], + [ 0.186494, -0.072608, -1.239072, 2.123692], + [ 0.952478, -0.550603, 0.139683, 0.122273]], + + [[-1.409432, -1.347533, 1.272395, -0.591863], + [ 1.422986, 0.363565, -1.449567, -0.414505], + [-2.147855, -0.014752, -1.195524, -1.425795]], + + [[ 0.209395, -0.896581, -0.161137, -1.051539], + [-0.592886, 1.104352, 0.889157, -0.319561], + [-1.473116, -0.43155 , 0.288377, -0.619993]]]) + Coordinates: + * items (items) object 'ItemA' 'ItemB' 'ItemC' + * major_axis (major_axis) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 + * minor_axis (minor_axis) object 'A' 'B' 'C' 'D' + +.. _whatsnew_0200.api_breaking.deprecate_group_agg_dict: + +Deprecate groupby.agg() with a dictionary when renaming +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``.groupby(..).agg(..)``, ``.rolling(..).agg(..)``, and ``.resample(..).agg(..)`` syntax can accept a variable of inputs, including scalars, +list, and a dict of column names to scalars or lists. This provides a useful syntax for constructing multiple +(potentially different) aggregations. + +However, ``.agg(..)`` can *also* accept a dict that allows 'renaming' of the result columns. This is a complicated and confusing syntax, as well as not consistent +between ``Series`` and ``DataFrame``. We are deprecating this 'renaming' functionality. + +- We are deprecating passing a dict to a grouped/rolled/resampled ``Series``. This allowed + one to ``rename`` the resulting aggregation, but this had a completely different + meaning than passing a dictionary to a grouped ``DataFrame``, which accepts column-to-aggregations. +- We are deprecating passing a dict-of-dicts to a grouped/rolled/resampled ``DataFrame`` in a similar manner. + +This is an illustrative example: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 1, 1, 2, 2], + 'B': range(5), + 'C': range(5)}) + df + +Here is a typical useful syntax for computing different aggregations for different columns. This +is a natural, and useful syntax. We aggregate from the dict-to-list by taking the specified +columns and applying the list of functions. This returns a ``MultiIndex`` for the columns (this is *not* deprecated). + +.. ipython:: python + + df.groupby('A').agg({'B': 'sum', 'C': 'min'}) + +Here's an example of the first deprecation, passing a dict to a grouped ``Series``. This +is a combination aggregation & renaming: + +.. code-block:: ipython + + In [6]: df.groupby('A').B.agg({'foo': 'count'}) + FutureWarning: using a dict on a Series for aggregation + is deprecated and will be removed in a future version + + Out[6]: + foo + A + 1 3 + 2 2 + +You can accomplish the same operation, more idiomatically by: + +.. ipython:: python + + df.groupby('A').B.agg(['count']).rename(columns={'count': 'foo'}) + + +Here's an example of the second deprecation, passing a dict-of-dict to a grouped ``DataFrame``: + +.. code-block:: python + + In [23]: (df.groupby('A') + ...: .agg({'B': {'foo': 'sum'}, 'C': {'bar': 'min'}}) + ...: ) + FutureWarning: using a dict with renaming is deprecated and + will be removed in a future version + + Out[23]: + B C + foo bar + A + 1 3 0 + 2 7 3 + + +You can accomplish nearly the same by: + +.. ipython:: python + + (df.groupby('A') + .agg({'B': 'sum', 'C': 'min'}) + .rename(columns={'B': 'foo', 'C': 'bar'}) + ) + + + +.. _whatsnew_0200.privacy.deprecate_plotting: + +Deprecate .plotting +^^^^^^^^^^^^^^^^^^^ + +The ``pandas.tools.plotting`` module has been deprecated, in favor of the top level ``pandas.plotting`` module. All the public plotting functions are now available +from ``pandas.plotting`` (:issue:`12548`). + +Furthermore, the top-level ``pandas.scatter_matrix`` and ``pandas.plot_params`` are deprecated. +Users can import these from ``pandas.plotting`` as well. + +Previous script: + +.. code-block:: python + + pd.tools.plotting.scatter_matrix(df) + pd.scatter_matrix(df) + +Should be changed to: + +.. code-block:: python + + pd.plotting.scatter_matrix(df) + + + +.. _whatsnew_0200.deprecations.other: + +Other deprecations +^^^^^^^^^^^^^^^^^^ + +- ``SparseArray.to_dense()`` has deprecated the ``fill`` parameter, as that parameter was not being respected (:issue:`14647`) +- ``SparseSeries.to_dense()`` has deprecated the ``sparse_only`` parameter (:issue:`14647`) +- ``Series.repeat()`` has deprecated the ``reps`` parameter in favor of ``repeats`` (:issue:`12662`) +- The ``Series`` constructor and ``.astype`` method have deprecated accepting timestamp dtypes without a frequency (e.g. ``np.datetime64``) for the ``dtype`` parameter (:issue:`15524`) +- ``Index.repeat()`` and ``MultiIndex.repeat()`` have deprecated the ``n`` parameter in favor of ``repeats`` (:issue:`12662`) +- ``Categorical.searchsorted()`` and ``Series.searchsorted()`` have deprecated the ``v`` parameter in favor of ``value`` (:issue:`12662`) +- ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`) +- ``DataFrame.astype()`` has deprecated the ``raise_on_error`` parameter in favor of ``errors`` (:issue:`14878`) +- ``Series.sortlevel`` and ``DataFrame.sortlevel`` have been deprecated in favor of ``Series.sort_index`` and ``DataFrame.sort_index`` (:issue:`15099`) +- importing ``concat`` from ``pandas.tools.merge`` has been deprecated in favor of imports from the ``pandas`` namespace. This should only affect explicit imports (:issue:`15358`) +- ``Series/DataFrame/Panel.consolidate()`` been deprecated as a public method. (:issue:`15483`) +- The ``as_indexer`` keyword of ``Series.str.match()`` has been deprecated (ignored keyword) (:issue:`15257`). +- The following top-level pandas functions have been deprecated and will be removed in a future version (:issue:`13790`, :issue:`15940`) + + * ``pd.pnow()``, replaced by ``Period.now()`` + * ``pd.Term``, is removed, as it is not applicable to user code. Instead use in-line string expressions in the where clause when searching in HDFStore + * ``pd.Expr``, is removed, as it is not applicable to user code. + * ``pd.match()``, is removed. + * ``pd.groupby()``, replaced by using the ``.groupby()`` method directly on a ``Series/DataFrame`` + * ``pd.get_store()``, replaced by a direct call to ``pd.HDFStore(...)`` +- ``is_any_int_dtype``, ``is_floating_dtype``, and ``is_sequence`` are deprecated from ``pandas.api.types`` (:issue:`16042`) + +.. _whatsnew_0200.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- The ``pandas.rpy`` module is removed. Similar functionality can be accessed + through the `rpy2 `__ project. + See the `R interfacing docs `__ for more details. +- The ``pandas.io.ga`` module with a ``google-analytics`` interface is removed (:issue:`11308`). + Similar functionality can be found in the `Google2Pandas `__ package. +- ``pd.to_datetime`` and ``pd.to_timedelta`` have dropped the ``coerce`` parameter in favor of ``errors`` (:issue:`13602`) +- ``pandas.stats.fama_macbeth``, ``pandas.stats.ols``, ``pandas.stats.plm`` and ``pandas.stats.var``, as well as the top-level ``pandas.fama_macbeth`` and ``pandas.ols`` routines are removed. Similar functionality can be found in the `statsmodels `__ package. (:issue:`11898`) +- The ``TimeSeries`` and ``SparseTimeSeries`` classes, aliases of ``Series`` + and ``SparseSeries``, are removed (:issue:`10890`, :issue:`15098`). +- ``Series.is_time_series`` is dropped in favor of ``Series.index.is_all_dates`` (:issue:`15098`) +- The deprecated ``irow``, ``icol``, ``iget`` and ``iget_value`` methods are removed + in favor of ``iloc`` and ``iat`` as explained :ref:`here ` (:issue:`10711`). +- The deprecated ``DataFrame.iterkv()`` has been removed in favor of ``DataFrame.iteritems()`` (:issue:`10711`) +- The ``Categorical`` constructor has dropped the ``name`` parameter (:issue:`10632`) +- ``Categorical`` has dropped support for ``NaN`` categories (:issue:`10748`) +- The ``take_last`` parameter has been dropped from ``duplicated()``, ``drop_duplicates()``, ``nlargest()``, and ``nsmallest()`` methods (:issue:`10236`, :issue:`10792`, :issue:`10920`) +- ``Series``, ``Index``, and ``DataFrame`` have dropped the ``sort`` and ``order`` methods (:issue:`10726`) +- Where clauses in ``pytables`` are only accepted as strings and expressions types and not other data-types (:issue:`12027`) +- ``DataFrame`` has dropped the ``combineAdd`` and ``combineMult`` methods in favor of ``add`` and ``mul`` respectively (:issue:`10735`) + +.. _whatsnew_0200.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of ``pd.wide_to_long()`` (:issue:`14779`) +- Improved performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`, :issue:`16057`) +- Improved performance of timeseries plotting with an irregular DatetimeIndex + (or with ``compat_x=True``) (:issue:`15073`). +- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`, :issue:`15635`) +- Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`) +- When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`) +- Improved performance of ``.rank()`` for categorical data (:issue:`15498`) +- Improved performance when using ``.unstack()`` (:issue:`15503`) +- Improved performance of merge/join on ``category`` columns (:issue:`10409`) +- Improved performance of ``drop_duplicates()`` on ``bool`` columns (:issue:`12963`) +- Improve performance of ``pd.core.groupby.GroupBy.apply`` when the applied + function used the ``.name`` attribute of the group DataFrame (:issue:`15062`). +- Improved performance of ``iloc`` indexing with a list or array (:issue:`15504`). +- Improved performance of ``Series.sort_index()`` with a monotonic index (:issue:`15694`) +- Improved performance in ``pd.read_csv()`` on some platforms with buffered reads (:issue:`16039`) + +.. _whatsnew_0200.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Conversion +^^^^^^^^^^ + +- Bug in ``Timestamp.replace`` now raises ``TypeError`` when incorrect argument names are given; previously this raised ``ValueError`` (:issue:`15240`) +- Bug in ``Timestamp.replace`` with compat for passing long integers (:issue:`15030`) +- Bug in ``Timestamp`` returning UTC based time/date attributes when a timezone was provided (:issue:`13303`, :issue:`6538`) +- Bug in ``Timestamp`` incorrectly localizing timezones during construction (:issue:`11481`, :issue:`15777`) +- Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) +- Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`) +- Bug in catching an overflow in ``Timestamp`` + ``Timedelta/Offset`` operations (:issue:`15126`) +- Bug in ``DatetimeIndex.round()`` and ``Timestamp.round()`` floating point accuracy when rounding by milliseconds or less (:issue:`14440`, :issue:`15578`) +- Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) +- Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) +- Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) +- Cleaned up ``PeriodIndex`` constructor, including raising on floats more consistently (:issue:`13277`) +- Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`) +- Bug in ``.replace()`` may result in incorrect dtypes. (:issue:`12747`, :issue:`15765`) +- Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) +- Bug in ``Series.replace`` which replaced a numeric by string (:issue:`15743`) +- Bug in ``Index`` construction with ``NaN`` elements and integer dtype specified (:issue:`15187`) +- Bug in ``Series`` construction with a datetimetz (:issue:`14928`) +- Bug in ``Series.dt.round()`` inconsistent behaviour on ``NaT`` 's with different arguments (:issue:`14940`) +- Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`) +- Incorrect dtyped ``Series`` was returned by comparison methods (e.g., ``lt``, ``gt``, ...) against a constant for an empty ``DataFrame`` (:issue:`15077`) +- Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`) +- Bug in ``DataFrame.fillna()`` where the argument ``downcast`` was ignored when fillna value was of type ``dict`` (:issue:`15277`) +- Bug in ``.asfreq()``, where frequency was not set for empty ``Series`` (:issue:`14320`) +- Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`) +- Bug in ``DataFrame.fillna()`` with tz-aware datetimes (:issue:`15855`) +- Bug in ``is_string_dtype``, ``is_timedelta64_ns_dtype``, and ``is_string_like_dtype`` in which an error was raised when ``None`` was passed in (:issue:`15941`) +- Bug in the return type of ``pd.unique`` on a ``Categorical``, which was returning an ndarray and not a ``Categorical`` (:issue:`15903`) +- Bug in ``Index.to_series()`` where the index was not copied (and so mutating later would change the original), (:issue:`15949`) +- Bug in indexing with partial string indexing with a len-1 DataFrame (:issue:`16071`) +- Bug in ``Series`` construction where passing invalid dtype didn't raise an error. (:issue:`15520`) + +Indexing +^^^^^^^^ + +- Bug in ``Index`` power operations with reversed operands (:issue:`14973`) +- Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`) +- Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`) +- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`) +- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`) +- Bug in ``Series.asof`` which raised if the series contained all ``np.nan`` (:issue:`15713`) +- Bug in ``.at`` when selecting from a tz-aware column (:issue:`15822`) +- Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`) +- Bug in ``Series.where()`` where TZ-aware data was converted to float representation (:issue:`15701`) +- Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`) +- Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`) +- Bug in ``Categorical.searchsorted()`` where alphabetical instead of the provided categorical order was used (:issue:`14522`) +- Bug in ``Series.iloc`` where a ``Categorical`` object for list-like indexes input was returned, where a ``Series`` was expected. (:issue:`14580`) +- Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`) +- Bug in ``.reset_index()`` when an all ``NaN`` level of a ``MultiIndex`` would fail (:issue:`6322`) +- Bug in ``.reset_index()`` when raising error for index name already present in ``MultiIndex`` columns (:issue:`16120`) +- Bug in creating a ``MultiIndex`` with tuples and not passing a list of names; this will now raise ``ValueError`` (:issue:`15110`) +- Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`) +- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`) +- Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`) +- Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` where ``na_position`` doesn't work with a ``MultiIndex`` (:issue:`14784`, :issue:`16604`) +- Bug in in ``pd.concat()`` when combining objects with a ``CategoricalIndex`` (:issue:`16111`) +- Bug in indexing with a scalar and a ``CategoricalIndex`` (:issue:`16123`) + +I/O +^^^ + +- Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`) +- Bug in ``pd.read_fwf()`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`) +- Bug in ``pd.read_csv()`` in which the ``dialect`` parameter was not being verified before processing (:issue:`14898`) +- Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`) +- Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`) +- Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`) +- Bug in ``pd.read_csv()`` with ``parse_dates`` when multi-line headers are specified (:issue:`15376`) +- Bug in ``pd.read_csv()`` with ``float_precision='round_trip'`` which caused a segfault when a text entry is parsed (:issue:`15140`) +- Bug in ``pd.read_csv()`` when an index was specified and no values were specified as null values (:issue:`15835`) +- Bug in ``pd.read_csv()`` in which certain invalid file objects caused the Python interpreter to crash (:issue:`15337`) +- Bug in ``pd.read_csv()`` in which invalid values for ``nrows`` and ``chunksize`` were allowed (:issue:`15767`) +- Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`) +- Bug in ``pd.read_csv()`` in which the ``skipfooter`` parameter was not being properly validated (:issue:`15925`) +- Bug in ``pd.to_csv()`` in which there was numeric overflow when a timestamp index was being written (:issue:`15982`) +- Bug in ``pd.util.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`) +- Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`) +- Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`) +- Bug in ``.to_json()`` for the C engine where rollover was not correctly handled for case where frac is odd and diff is exactly 0.5 (:issue:`15716`, :issue:`15864`) +- Bug in ``pd.read_json()`` for Python 2 where ``lines=True`` and contents contain non-ascii unicode characters (:issue:`15132`) +- Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) +- Bug in ``pd.read_msgpack()`` which did not allow loading of a dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``pd.read_msgpack()`` when deserializing a ``CategoricalIndex`` (:issue:`15487`) +- Bug in ``DataFrame.to_records()`` with converting a ``DatetimeIndex`` with a timezone (:issue:`13937`) +- Bug in ``DataFrame.to_records()`` which failed with unicode characters in column names (:issue:`11879`) +- Bug in ``.to_sql()`` when writing a DataFrame with numeric index names (:issue:`15404`). +- Bug in ``DataFrame.to_html()`` with ``index=False`` and ``max_rows`` raising in ``IndexError`` (:issue:`14998`) +- Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) +- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) +- Bug in ``StataReader`` and ``StataWriter`` which allows invalid encodings (:issue:`15723`) +- Bug in the ``Series`` repr not showing the length when the output was truncated (:issue:`15962`). + +Plotting +^^^^^^^^ + +- Bug in ``DataFrame.hist`` where ``plt.tight_layout`` caused an ``AttributeError`` (use ``matplotlib >= 2.0.1``) (:issue:`9351`) +- Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) +- Bug in the date and time converters pandas registers with matplotlib not handling multiple dimensions (:issue:`16026`) +- Bug in ``pd.scatter_matrix()`` could accept either ``color`` or ``c``, but not both (:issue:`14855`) + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`) +- Properly set ``__name__`` and ``__qualname__`` for ``Groupby.*`` functions (:issue:`14620`) +- Bug in ``GroupBy.get_group()`` failing with a categorical grouper (:issue:`15155`) +- Bug in ``.groupby(...).rolling(...)`` when ``on`` is specified and using a ``DatetimeIndex`` (:issue:`15130`, :issue:`13966`) +- Bug in groupby operations with ``timedelta64`` when passing ``numeric_only=False`` (:issue:`5724`) +- Bug in ``groupby.apply()`` coercing ``object`` dtypes to numeric types, when not all values were numeric (:issue:`14423`, :issue:`15421`, :issue:`15670`) +- Bug in ``resample``, where a non-string ``loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`) +- Bug in ``DataFrame.groupby().describe()`` when grouping on ``Index`` containing tuples (:issue:`14848`) +- Bug in ``groupby().nunique()`` with a datetimelike-grouper where bins counts were incorrect (:issue:`13453`) +- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) +- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) +- Bug in ``.rolling/expanding()`` functions where ``count()`` was not counting ``np.Inf``, nor handling ``object`` dtypes (:issue:`12541`) +- Bug in ``.rolling()`` where ``pd.Timedelta`` or ``datetime.timedelta`` was not accepted as a ``window`` argument (:issue:`15440`) +- Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`) +- Bug in ``DataFrame.resample().median()`` if duplicate column names are present (:issue:`14233`) + +Sparse +^^^^^^ + +- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`) +- Bug in repr-formatting a ``SparseDataFrame`` after a value was set on (a copy of) one of its series (:issue:`15488`) +- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`) +- Bug in sparse array indexing in which indices were not being validated (:issue:`15863`) + +Reshaping +^^^^^^^^^ + +- Bug in ``pd.merge_asof()`` where ``left_index`` or ``right_index`` caused a failure when multiple ``by`` was specified (:issue:`15676`) +- Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`) +- Bug in ``DataFrame.pivot_table()`` where ``dropna=True`` would not drop all-NaN columns when the columns was a ``category`` dtype (:issue:`15193`) +- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) +- Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`) +- Bug in ``pd.concat()`` in which concatenating with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) +- Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`) +- Bug in ``DataFrame.nsmallest`` and ``DataFrame.nlargest`` where identical values resulted in duplicated rows (:issue:`15297`) +- Bug in :func:`pandas.pivot_table` incorrectly raising ``UnicodeError`` when passing unicode input for ``margins`` keyword (:issue:`13292`) + +Numeric +^^^^^^^ + +- Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`) +- Bug in ``.corr()`` and ``.cov()`` where the column and index were the same object (:issue:`14617`) +- Bug in ``.mode()`` where ``mode`` was not returned if was only a single value (:issue:`15714`) +- Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`) +- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`) +- Bug in ``pandas.tools.utils.cartesian_product()`` with large input can cause overflow on windows (:issue:`15265`) +- Bug in ``.eval()`` which caused multi-line evals to fail with local variables not on the first line (:issue:`15342`) + +Other +^^^^^ + +- Compat with SciPy 0.19.0 for testing on ``.interpolate()`` (:issue:`15662`) +- Compat for 32-bit platforms for ``.qcut/cut``; bins will now be ``int64`` dtype (:issue:`14866`) +- Bug in interactions with ``Qt`` when a ``QtApplication`` already exists (:issue:`14372`) +- Avoid use of ``np.finfo()`` during ``import pandas`` removed to mitigate deadlock on Python GIL misuse (:issue:`14641`) + + +.. _whatsnew_0.20.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.19.2..v0.20.0 diff --git a/doc/source/whatsnew/v0.20.2.rst b/doc/source/whatsnew/v0.20.2.rst new file mode 100644 index 00000000..232d1d28 --- /dev/null +++ b/doc/source/whatsnew/v0.20.2.rst @@ -0,0 +1,143 @@ +.. _whatsnew_0202: + +v0.20.2 (June 4, 2017) +---------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes, +bug fixes and performance improvements. +We recommend that all users upgrade to this version. + +.. contents:: What's new in v0.20.2 + :local: + :backlinks: none + + +.. _whatsnew_0202.enhancements: + +Enhancements +~~~~~~~~~~~~ + +- Unblocked access to additional compression types supported in pytables: 'blosc:blosclz, 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd' (:issue:`14478`) +- ``Series`` provides a ``to_latex`` method (:issue:`16180`) + +- A new groupby method :meth:`~pandas.core.groupby.GroupBy.ngroup`, + parallel to the existing :meth:`~pandas.core.groupby.GroupBy.cumcount`, + has been added to return the group order (:issue:`11642`); see + :ref:`here `. + +.. _whatsnew_0202.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Performance regression fix when indexing with a list-like (:issue:`16285`) +- Performance regression fix for MultiIndexes (:issue:`16319`, :issue:`16346`) +- Improved performance of ``.clip()`` with scalar arguments (:issue:`15400`) +- Improved performance of groupby with categorical groupers (:issue:`16413`) +- Improved performance of ``MultiIndex.remove_unused_levels()`` (:issue:`16556`) + +.. _whatsnew_0202.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Silenced a warning on some Windows environments about "tput: terminal attributes: No such device or address" when + detecting the terminal size. This fix only applies to python 3 (:issue:`16496`) +- Bug in using ``pathlib.Path`` or ``py.path.local`` objects with io functions (:issue:`16291`) +- Bug in ``Index.symmetric_difference()`` on two equal MultiIndex's, results in a ``TypeError`` (:issue:`13490`) +- Bug in ``DataFrame.update()`` with ``overwrite=False`` and ``NaN values`` (:issue:`15593`) +- Passing an invalid engine to :func:`read_csv` now raises an informative + ``ValueError`` rather than ``UnboundLocalError``. (:issue:`16511`) +- Bug in :func:`unique` on an array of tuples (:issue:`16519`) +- Bug in :func:`cut` when ``labels`` are set, resulting in incorrect label ordering (:issue:`16459`) +- Fixed a compatibility issue with IPython 6.0's tab completion showing deprecation warnings on ``Categoricals`` (:issue:`16409`) + +Conversion +^^^^^^^^^^ + +- Bug in :func:`to_numeric` in which empty data inputs were causing a segfault of the interpreter (:issue:`16302`) +- Silence numpy warnings when broadcasting ``DataFrame`` to ``Series`` with comparison ops (:issue:`16378`, :issue:`16306`) + + +Indexing +^^^^^^^^ + +- Bug in ``DataFrame.reset_index(level=)`` with single level index (:issue:`16263`) +- Bug in partial string indexing with a monotonic, but not strictly-monotonic, index incorrectly reversing the slice bounds (:issue:`16515`) +- Bug in ``MultiIndex.remove_unused_levels()`` that would not return a ``MultiIndex`` equal to the original. (:issue:`16556`) + +I/O +^^^ + +- Bug in :func:`read_csv` when ``comment`` is passed in a space delimited text file (:issue:`16472`) +- Bug in :func:`read_csv` not raising an exception with nonexistent columns in ``usecols`` when it had the correct length (:issue:`14671`) +- Bug that would force importing of the clipboard routines unnecessarily, potentially causing an import error on startup (:issue:`16288`) +- Bug that raised ``IndexError`` when HTML-rendering an empty ``DataFrame`` (:issue:`15953`) +- Bug in :func:`read_csv` in which tarfile object inputs were raising an error in Python 2.x for the C engine (:issue:`16530`) +- Bug where ``DataFrame.to_html()`` ignored the ``index_names`` parameter (:issue:`16493`) +- Bug where ``pd.read_hdf()`` returns numpy strings for index names (:issue:`13492`) + +- Bug in ``HDFStore.select_as_multiple()`` where start/stop arguments were not respected (:issue:`16209`) + +Plotting +^^^^^^^^ + +- Bug in ``DataFrame.plot`` with a single column and a list-like ``color`` (:issue:`3486`) +- Bug in ``plot`` where ``NaT`` in ``DatetimeIndex`` results in ``Timestamp.min`` (:issue:`12405`) +- Bug in ``DataFrame.boxplot`` where ``figsize`` keyword was not respected for non-grouped boxplots (:issue:`11959`) + + + + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in creating a time-based rolling window on an empty ``DataFrame`` (:issue:`15819`) +- Bug in ``rolling.cov()`` with offset window (:issue:`16058`) +- Bug in ``.resample()`` and ``.groupby()`` when aggregating on integers (:issue:`16361`) + + +Sparse +^^^^^^ + +- Bug in construction of ``SparseDataFrame`` from ``scipy.sparse.dok_matrix`` (:issue:`16179`) + +Reshaping +^^^^^^^^^ + +- Bug in ``DataFrame.stack`` with unsorted levels in ``MultiIndex`` columns (:issue:`16323`) +- Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`) +- Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`) +- Bug in construction of a ``DataFrame`` with mixed dtypes including an all-NaT column. (:issue:`16395`) +- Bug in ``DataFrame.agg()`` and ``Series.agg()`` with aggregating on non-callable attributes (:issue:`16405`) + + +Numeric +^^^^^^^ +- Bug in ``.interpolate()``, where ``limit_direction`` was not respected when ``limit=None`` (default) was passed (:issue:`16282`) + +Categorical +^^^^^^^^^^^ + +- Fixed comparison operations considering the order of the categories when both categoricals are unordered (:issue:`16014`) + +Other +^^^^^ + +- Bug in ``DataFrame.drop()`` with an empty-list with non-unique indices (:issue:`16270`) + + +.. _whatsnew_0.20.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.20.0..v0.20.2 diff --git a/doc/source/whatsnew/v0.20.3.rst b/doc/source/whatsnew/v0.20.3.rst new file mode 100644 index 00000000..72faabd9 --- /dev/null +++ b/doc/source/whatsnew/v0.20.3.rst @@ -0,0 +1,76 @@ +.. _whatsnew_0203: + +v0.20.3 (July 7, 2017) +----------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a minor bug-fix release in the 0.20.x series and includes some small regression fixes +and bug fixes. We recommend that all users upgrade to this version. + +.. contents:: What's new in v0.20.3 + :local: + :backlinks: none + +.. _whatsnew_0203.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) +- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) + +Conversion +^^^^^^^^^^ + +- Bug in pickle compat prior to the v0.20.x series, when ``UTC`` is a timezone in a Series/DataFrame/Index (:issue:`16608`) +- Bug in ``Series`` construction when passing a ``Series`` with ``dtype='category'`` (:issue:`16524`). +- Bug in :meth:`DataFrame.astype` when passing a ``Series`` as the ``dtype`` kwarg. (:issue:`16717`). + +Indexing +^^^^^^^^ + +- Bug in ``Float64Index`` causing an empty array instead of ``None`` to be returned from ``.get(np.nan)`` on a Series whose index did not contain any ``NaN`` s (:issue:`8569`) +- Bug in ``MultiIndex.isin`` causing an error when passing an empty iterable (:issue:`16777`) +- Fixed a bug in a slicing DataFrame/Series that have a ``TimedeltaIndex`` (:issue:`16637`) + +I/O +^^^ + +- Bug in :func:`read_csv` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue:`16675`) +- Bug in :func:`read_hdf` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`) +- Bug in :meth:`DataFrame.to_latex` where ``bold_rows`` was wrongly specified to be ``True`` by default, whereas in reality row labels remained non-bold whatever parameter provided. (:issue:`16707`) +- Fixed an issue with :meth:`DataFrame.style` where generated element ids were not unique (:issue:`16780`) +- Fixed loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) + +Plotting +^^^^^^^^ + +- Fixed regression that prevented RGB and RGBA tuples from being used as color arguments (:issue:`16233`) +- Fixed an issue with :meth:`DataFrame.plot.scatter` that incorrectly raised a ``KeyError`` when categorical data is used for plotting (:issue:`16199`) + +Reshaping +^^^^^^^^^ + +- ``PeriodIndex`` / ``TimedeltaIndex.join`` was missing the ``sort=`` kwarg (:issue:`16541`) +- Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`). +- Bug in :func:`merge` when merging/joining with multiple categorical columns (:issue:`16767`) + +Categorical +^^^^^^^^^^^ + +- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` parameter with categorical data (:issue:`16793`) + + +.. _whatsnew_0.20.3.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.20.2..v0.20.3 diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst new file mode 100644 index 00000000..71969c4d --- /dev/null +++ b/doc/source/whatsnew/v0.21.0.rst @@ -0,0 +1,1198 @@ +.. _whatsnew_0210: + +v0.21.0 (October 27, 2017) +-------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a major release from 0.20.3 and includes a number of API changes, deprecations, new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here `. +- New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying + categoricals independent of the data, see :ref:`here `. +- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here `. +- Compatibility fixes for pypy, see :ref:`here `. +- Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here `. +- Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here `) and ``GroupBy.pipe`` (see :ref:`here `). +- Indexing with a list of labels, where one or more of the labels is missing, is deprecated and will raise a KeyError in a future version, see :ref:`here `. + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. contents:: What's new in v0.21.0 + :local: + :backlinks: none + :depth: 2 + +.. _whatsnew_0210.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0210.enhancements.parquet: + +Integration with Apache Parquet file format +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here ` (:issue:`15838`, :issue:`17438`). + +`Apache Parquet `__ provides a cross-language, binary file format for reading and writing data frames efficiently. +Parquet is designed to faithfully serialize and de-serialize ``DataFrame`` s, supporting all of the pandas +dtypes, including extension dtypes such as datetime with timezones. + +This functionality depends on either the `pyarrow `__ or `fastparquet `__ library. +For more details, see see :ref:`the IO docs on Parquet `. + + +.. _whatsnew_0210.enhancements.infer_objects: + +``infer_objects`` type conversion +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :meth:`DataFrame.infer_objects` and :meth:`Series.infer_objects` +methods have been added to perform dtype inference on object columns, replacing +some of the functionality of the deprecated ``convert_objects`` +method. See the documentation :ref:`here ` +for more details. (:issue:`11221`) + +This method only performs soft conversions on object columns, converting Python objects +to native types, but not any coercive conversions. For example: + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3], + 'B': np.array([1, 2, 3], dtype='object'), + 'C': ['1', '2', '3']}) + df.dtypes + df.infer_objects().dtypes + +Note that column ``'C'`` was not converted - only scalar numeric types +will be converted to a new type. Other types of conversion should be accomplished +using the :func:`to_numeric` function (or :func:`to_datetime`, :func:`to_timedelta`). + +.. ipython:: python + + df = df.infer_objects() + df['C'] = pd.to_numeric(df['C'], errors='coerce') + df.dtypes + +.. _whatsnew_0210.enhancements.attribute_access: + +Improved warnings when attempting to create columns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +New users are often puzzled by the relationship between column operations and +attribute access on ``DataFrame`` instances (:issue:`7175`). One specific +instance of this confusion is attempting to create a new column by setting an +attribute on the ``DataFrame``: + +.. code-block:: ipython + + In [1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In [2]: df.two = [4, 5, 6] + +This does not raise any obvious exceptions, but also does not create a new column: + +.. code-block:: ipython + + In [3]: df + Out[3]: + one + 0 1.0 + 1 2.0 + 2 3.0 + +Setting a list-like data structure into a new attribute now raises a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access `. + +.. _whatsnew_0210.enhancements.drop_api: + +``drop`` now also accepts index/columns keywords +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :meth:`~DataFrame.drop` method has gained ``index``/``columns`` keywords as an +alternative to specifying the ``axis``. This is similar to the behavior of ``reindex`` +(:issue:`12392`). + +For example: + +.. ipython:: python + + df = pd.DataFrame(np.arange(8).reshape(2, 4), + columns=['A', 'B', 'C', 'D']) + df + df.drop(['B', 'C'], axis=1) + # the following is now equivalent + df.drop(columns=['B', 'C']) + +.. _whatsnew_0210.enhancements.rename_reindex_axis: + +``rename``, ``reindex`` now also accept axis keyword +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :meth:`DataFrame.rename` and :meth:`DataFrame.reindex` methods have gained +the ``axis`` keyword to specify the axis to target with the operation +(:issue:`12392`). + +Here's ``rename``: + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df.rename(str.lower, axis='columns') + df.rename(id, axis='index') + +And ``reindex``: + +.. ipython:: python + + df.reindex(['A', 'B', 'C'], axis='columns') + df.reindex([0, 1, 3], axis='index') + +The "index, columns" style continues to work as before. + +.. ipython:: python + + df.rename(index=id, columns=str.lower) + df.reindex(index=[0, 1, 3], columns=['A', 'B', 'C']) + +We *highly* encourage using named arguments to avoid confusion when using either +style. + +.. _whatsnew_0210.enhancements.categorical_dtype: + +``CategoricalDtype`` for specifying categoricals +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`pandas.api.types.CategoricalDtype` has been added to the public API and +expanded to include the ``categories`` and ``ordered`` attributes. A +``CategoricalDtype`` can be used to specify the set of categories and +orderedness of an array, independent of the data. This can be useful for example, +when converting string data to a ``Categorical`` (:issue:`14711`, +:issue:`15078`, :issue:`16015`, :issue:`17643`): + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + + s = pd.Series(['a', 'b', 'c', 'a']) # strings + dtype = CategoricalDtype(categories=['a', 'b', 'c', 'd'], ordered=True) + s.astype(dtype) + +One place that deserves special mention is in :meth:`read_csv`. Previously, with +``dtype={'col': 'category'}``, the returned values and categories would always +be strings. + +.. ipython:: python + :suppress: + + from io import StringIO + +.. ipython:: python + + data = 'A,B\na,1\nb,2\nc,3' + pd.read_csv(StringIO(data), dtype={'B': 'category'}).B.cat.categories + +Notice the "object" dtype. + +With a ``CategoricalDtype`` of all numerics, datetimes, or +timedeltas, we can automatically convert to the correct type + +.. ipython:: python + + dtype = {'B': CategoricalDtype([1, 2, 3])} + pd.read_csv(StringIO(data), dtype=dtype).B.cat.categories + +The values have been correctly interpreted as integers. + +The ``.dtype`` property of a ``Categorical``, ``CategoricalIndex`` or a +``Series`` with categorical type will now return an instance of +``CategoricalDtype``. While the repr has changed, ``str(CategoricalDtype())`` is +still the string ``'category'``. We'll take this moment to remind users that the +*preferred* way to detect categorical data is to use +:func:`pandas.api.types.is_categorical_dtype`, and not ``str(dtype) == 'category'``. + +See the :ref:`CategoricalDtype docs ` for more. + +.. _whatsnew_0210.enhancements.GroupBy_pipe: + +``GroupBy`` objects now have a ``pipe`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``GroupBy`` objects now have a ``pipe`` method, similar to the one on +``DataFrame`` and ``Series``, that allow for functions that take a +``GroupBy`` to be composed in a clean, readable syntax. (:issue:`17871`) + +For a concrete example on combining ``.groupby`` and ``.pipe`` , imagine having a +DataFrame with columns for stores, products, revenue and sold quantity. We'd like to +do a groupwise calculation of *prices* (i.e. revenue/quantity) per store and per product. +We could do this in a multi-step operation, but expressing it in terms of piping can make the +code more readable. + +First we set the data: + +.. ipython:: python + + import numpy as np + n = 1000 + df = pd.DataFrame({'Store': np.random.choice(['Store_1', 'Store_2'], n), + 'Product': np.random.choice(['Product_1', + 'Product_2', + 'Product_3' + ], n), + 'Revenue': (np.random.random(n) * 50 + 10).round(2), + 'Quantity': np.random.randint(1, 10, size=n)}) + df.head(2) + +Now, to find prices per store/product, we can simply do: + +.. ipython:: python + + (df.groupby(['Store', 'Product']) + .pipe(lambda grp: grp.Revenue.sum() / grp.Quantity.sum()) + .unstack().round(2)) + +See the :ref:`documentation ` for more. + + +.. _whatsnew_0210.enhancements.rename_categories: + +``Categorical.rename_categories`` accepts a dict-like +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`~Series.cat.rename_categories` now accepts a dict-like argument for +``new_categories``. The previous categories are looked up in the dictionary's +keys and replaced if found. The behavior of missing and extra keys is the same +as in :meth:`DataFrame.rename`. + +.. ipython:: python + + c = pd.Categorical(['a', 'a', 'b']) + c.rename_categories({"a": "eh", "b": "bee"}) + +.. warning:: + + To assist with upgrading pandas, ``rename_categories`` treats ``Series`` as + list-like. Typically, Series are considered to be dict-like (e.g. in + ``.rename``, ``.map``). In a future version of pandas ``rename_categories`` + will change to treat them as dict-like. Follow the warning message's + recommendations for writing future-proof code. + + .. code-block:: ipython + + In [33]: c.rename_categories(pd.Series([0, 1], index=['a', 'c'])) + FutureWarning: Treating Series 'new_categories' as a list-like and using the values. + In a future version, 'rename_categories' will treat Series like a dictionary. + For dict-like, use 'new_categories.to_dict()' + For list-like, use 'new_categories.values'. + Out[33]: + [0, 0, 1] + Categories (2, int64): [0, 1] + + +.. _whatsnew_0210.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +New functions or methods +"""""""""""""""""""""""" + +- :meth:`~pandas.core.resample.Resampler.nearest` is added to support nearest-neighbor upsampling (:issue:`17496`). +- :class:`~pandas.Index` has added support for a ``to_frame`` method (:issue:`15230`). + +New keywords +"""""""""""" + +- Added a ``skipna`` parameter to :func:`~pandas.api.types.infer_dtype` to + support type inference in the presence of missing values (:issue:`17059`). +- :func:`Series.to_dict` and :func:`DataFrame.to_dict` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned. The default is ``dict``, which is backwards compatible. (:issue:`16122`) +- :func:`Series.set_axis` and :func:`DataFrame.set_axis` now support the ``inplace`` parameter. (:issue:`14636`) +- :func:`Series.to_pickle` and :func:`DataFrame.to_pickle` have gained a ``protocol`` parameter (:issue:`16252`). By default, this parameter is set to `HIGHEST_PROTOCOL `__ +- :func:`read_feather` has gained the ``nthreads`` parameter for multi-threaded operations (:issue:`16359`) +- :func:`DataFrame.clip()` and :func:`Series.clip()` have gained an ``inplace`` argument. (:issue:`15388`) +- :func:`crosstab` has gained a ``margins_name`` parameter to define the name of the row / column that will contain the totals when ``margins=True``. (:issue:`15972`) +- :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`) +- :func:`read_json` and :func:`~DataFrame.to_json` now accept a ``compression`` argument which allows them to transparently handle compressed files. (:issue:`17798`) + +Various enhancements +"""""""""""""""""""" + +- Improved the import time of pandas by about 2.25x. (:issue:`16764`) +- Support for `PEP 519 -- Adding a file system path protocol + `_ on most readers (e.g. + :func:`read_csv`) and writers (e.g. :meth:`DataFrame.to_csv`) (:issue:`13823`). +- Added a ``__fspath__`` method to ``pd.HDFStore``, ``pd.ExcelFile``, + and ``pd.ExcelWriter`` to work properly with the file system path protocol (:issue:`13823`). +- The ``validate`` argument for :func:`merge` now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here ` (:issue:`16270`) +- Added support for `PEP 518 `_ (``pyproject.toml``) to the build system (:issue:`16745`) +- :func:`RangeIndex.append` now returns a ``RangeIndex`` object when possible (:issue:`16212`) +- :func:`Series.rename_axis` and :func:`DataFrame.rename_axis` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`) +- :func:`api.types.infer_dtype` now infers decimals. (:issue:`15690`) +- :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) +- :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year. (:issue:`9313`) +- :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year. (:issue:`9313`) +- :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) +- Read/write methods that infer compression (:func:`read_csv`, :func:`read_table`, :func:`read_pickle`, and :meth:`~DataFrame.to_pickle`) can now infer from path-like objects, such as ``pathlib.Path``. (:issue:`17206`) +- :func:`read_sas` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files. (:issue:`15871`) +- :func:`DataFrame.items` and :func:`Series.items` are now present in both Python 2 and 3 and is lazy in all cases. (:issue:`13918`, :issue:`17213`) +- :meth:`pandas.io.formats.style.Styler.where` has been implemented as a convenience for :meth:`pandas.io.formats.style.Styler.applymap`. (:issue:`17474`) +- :func:`MultiIndex.is_monotonic_decreasing` has been implemented. Previously returned ``False`` in all cases. (:issue:`16554`) +- :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`) +- :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names. (:issue:`14207`) +- :func:`Series.reindex`, :func:`DataFrame.reindex`, :func:`Index.get_indexer` now support list-like argument for ``tolerance``. (:issue:`17367`) + +.. _whatsnew_0210.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0210.api_breaking.deps: + +Dependencies have increased minimum versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We have updated our minimum supported versions of dependencies (:issue:`15206`, :issue:`15543`, :issue:`15214`). +If installed, we now require: + + +--------------+-----------------+----------+ + | Package | Minimum Version | Required | + +==============+=================+==========+ + | Numpy | 1.9.0 | X | + +--------------+-----------------+----------+ + | Matplotlib | 1.4.3 | | + +--------------+-----------------+----------+ + | Scipy | 0.14.0 | | + +--------------+-----------------+----------+ + | Bottleneck | 1.0.0 | | + +--------------+-----------------+----------+ + +Additionally, support has been dropped for Python 3.4 (:issue:`15251`). + + +.. _whatsnew_0210.api_breaking.bottleneck: + +Sum/Prod of all-NaN or empty Series/DataFrames is now consistently NaN +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: + + The changes described here have been partially reverted. See + the :ref:`v0.22.0 Whatsnew ` for more. + + +The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames no longer depends on +whether `bottleneck `__ is installed, and return value of ``sum`` and ``prod`` on an empty Series has changed (:issue:`9422`, :issue:`15507`). + +Calling ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, will result in ``NaN``. See the :ref:`docs `. + +.. ipython:: python + + s = pd.Series([np.nan]) + +Previously WITHOUT ``bottleneck`` installed: + +.. code-block:: ipython + + In [2]: s.sum() + Out[2]: np.nan + +Previously WITH ``bottleneck``: + +.. code-block:: ipython + + In [2]: s.sum() + Out[2]: 0.0 + +New behavior, without regard to the bottleneck installation: + +.. ipython:: python + + s.sum() + +Note that this also changes the sum of an empty ``Series``. Previously this always returned 0 regardless of a ``bottleneck`` installation: + +.. code-block:: ipython + + In [1]: pd.Series([]).sum() + Out[1]: 0 + +but for consistency with the all-NaN case, this was changed to return NaN as well: + +.. ipython:: python + :okwarning: + + pd.Series([]).sum() + + +.. _whatsnew_0210.api_breaking.loc: + +Indexing with a list with missing labels is deprecated +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, selecting with a list of labels, where one or more labels were missing would always succeed, returning ``NaN`` for missing labels. +This will now show a ``FutureWarning``. In the future this will raise a ``KeyError`` (:issue:`15747`). +This warning will trigger on a ``DataFrame`` or a ``Series`` for using ``.loc[]`` or ``[[]]`` when passing a list-of-labels with at least 1 missing label. +See the :ref:`deprecation docs `. + + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s + +Previous behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + + +Current behavior + +.. code-block:: ipython + + In [4]: s.loc[[1, 2, 3]] + Passing list-likes to .loc or [] with any missing label will raise + KeyError in the future, you can use .reindex() as an alternative. + + See the documentation here: + https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike + + Out[4]: + 1 2.0 + 2 3.0 + 3 NaN + dtype: float64 + +The idiomatic way to achieve selecting potentially not-found elements is via ``.reindex()`` + +.. ipython:: python + + s.reindex([1, 2, 3]) + +Selection with all keys found is unchanged. + +.. ipython:: python + + s.loc[[1, 2]] + + +.. _whatsnew_0210.api.na_changes: + +NA naming changes +^^^^^^^^^^^^^^^^^ + +In order to promote more consistency among the pandas API, we have added additional top-level +functions :func:`isna` and :func:`notna` that are aliases for :func:`isnull` and :func:`notnull`. +The naming scheme is now more consistent with methods like ``.dropna()`` and ``.fillna()``. Furthermore +in all cases where ``.isnull()`` and ``.notnull()`` methods are defined, these have additional methods +named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical``, +``Index``, ``Series``, and ``DataFrame``. (:issue:`15001`). + +The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement. + + +.. _whatsnew_0210.api_breaking.iteration_scalars: + +Iteration of Series/Index will now return Python scalars +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a Python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affects int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`). + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + s + +Previously: + +.. code-block:: ipython + + In [2]: type(list(s)[0]) + Out[2]: numpy.int64 + +New behavior: + +.. ipython:: python + + type(list(s)[0]) + +Furthermore this will now correctly box the results of iteration for :func:`DataFrame.to_dict` as well. + +.. ipython:: python + + d = {'a': [1], 'b': ['b']} + df = pd.DataFrame(d) + +Previously: + +.. code-block:: ipython + + In [8]: type(df.to_dict()['a'][0]) + Out[8]: numpy.int64 + +New behavior: + +.. ipython:: python + + type(df.to_dict()['a'][0]) + + +.. _whatsnew_0210.api_breaking.loc_with_index: + +Indexing with a Boolean Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously when passing a boolean ``Index`` to ``.loc``, if the index of the ``Series/DataFrame`` had ``boolean`` labels, +you would get a label based selection, potentially duplicating result labels, rather than a boolean indexing selection +(where ``True`` selects elements), this was inconsistent how a boolean numpy array indexed. The new behavior is to +act like a boolean numpy array indexer. (:issue:`17738`) + +Previous behavior: + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=[False, True, False]) + s + +.. code-block:: ipython + + In [59]: s.loc[pd.Index([True, False, True])] + Out[59]: + True 2 + False 1 + False 3 + True 2 + dtype: int64 + +Current behavior + +.. ipython:: python + + s.loc[pd.Index([True, False, True])] + + +Furthermore, previously if you had an index that was non-numeric (e.g. strings), then a boolean Index would raise a ``KeyError``. +This will now be treated as a boolean indexer. + +Previously behavior: + +.. ipython:: python + + s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + s + +.. code-block:: ipython + + In [39]: s.loc[pd.Index([True, False, True])] + KeyError: "None of [Index([True, False, True], dtype='object')] are in the [index]" + +Current behavior + +.. ipython:: python + + s.loc[pd.Index([True, False, True])] + + +.. _whatsnew_0210.api_breaking.period_index_resampling: + +``PeriodIndex`` resampling +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions of pandas, resampling a ``Series``/``DataFrame`` indexed by a ``PeriodIndex`` returned a ``DatetimeIndex`` in some cases (:issue:`12884`). Resampling to a multiplied frequency now returns a ``PeriodIndex`` (:issue:`15944`). As a minor enhancement, resampling a ``PeriodIndex`` can now handle ``NaT`` values (:issue:`13224`) + +Previous behavior: + +.. code-block:: ipython + + In [1]: pi = pd.period_range('2017-01', periods=12, freq='M') + + In [2]: s = pd.Series(np.arange(12), index=pi) + + In [3]: resampled = s.resample('2Q').mean() + + In [4]: resampled + Out[4]: + 2017-03-31 1.0 + 2017-09-30 5.5 + 2018-03-31 10.0 + Freq: 2Q-DEC, dtype: float64 + + In [5]: resampled.index + Out[5]: DatetimeIndex(['2017-03-31', '2017-09-30', '2018-03-31'], dtype='datetime64[ns]', freq='2Q-DEC') + +New behavior: + +.. ipython:: python + + pi = pd.period_range('2017-01', periods=12, freq='M') + + s = pd.Series(np.arange(12), index=pi) + + resampled = s.resample('2Q').mean() + + resampled + + resampled.index + +Upsampling and calling ``.ohlc()`` previously returned a ``Series``, basically identical to calling ``.asfreq()``. OHLC upsampling now returns a DataFrame with columns ``open``, ``high``, ``low`` and ``close`` (:issue:`13083`). This is consistent with downsampling and ``DatetimeIndex`` behavior. + +Previous behavior: + +.. code-block:: ipython + + In [1]: pi = pd.period_range(start='2000-01-01', freq='D', periods=10) + + In [2]: s = pd.Series(np.arange(10), index=pi) + + In [3]: s.resample('H').ohlc() + Out[3]: + 2000-01-01 00:00 0.0 + ... + 2000-01-10 23:00 NaN + Freq: H, Length: 240, dtype: float64 + + In [4]: s.resample('M').ohlc() + Out[4]: + open high low close + 2000-01 0 9 0 9 + +New behavior: + +.. ipython:: python + + pi = pd.period_range(start='2000-01-01', freq='D', periods=10) + + s = pd.Series(np.arange(10), index=pi) + + s.resample('H').ohlc() + + s.resample('M').ohlc() + + +.. _whatsnew_0210.api_breaking.pandas_eval: + +Improved error handling during item assignment in pd.eval +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`eval` will now raise a ``ValueError`` when item assignment malfunctions, or +inplace operations are specified, but there is no item assignment in the expression (:issue:`16732`) + +.. ipython:: python + + arr = np.array([1, 2, 3]) + +Previously, if you attempted the following expression, you would get a not very helpful error message: + +.. code-block:: ipython + + In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True) + ... + IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) + and integer or boolean arrays are valid indices + +This is a very long way of saying numpy arrays don't support string-item indexing. With this +change, the error message is now this: + +.. code-block:: python + + In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True) + ... + ValueError: Cannot assign expression output to target + +It also used to be possible to evaluate expressions inplace, even if there was no item assignment: + +.. code-block:: ipython + + In [4]: pd.eval("1 + 2", target=arr, inplace=True) + Out[4]: 3 + +However, this input does not make much sense because the output is not being assigned to +the target. Now, a ``ValueError`` will be raised when such an input is passed in: + +.. code-block:: ipython + + In [4]: pd.eval("1 + 2", target=arr, inplace=True) + ... + ValueError: Cannot operate inplace if there is no assignment + + +.. _whatsnew_0210.api_breaking.dtype_conversions: + +Dtype conversions +^^^^^^^^^^^^^^^^^ + +Previously assignments, ``.where()`` and ``.fillna()`` with a ``bool`` assignment, would coerce to same the type (e.g. int / float), or raise for datetimelikes. These will now preserve the bools with ``object`` dtypes. (:issue:`16821`). + +.. ipython:: python + + s = pd.Series([1, 2, 3]) + +.. code-block:: python + + In [5]: s[1] = True + + In [6]: s + Out[6]: + 0 1 + 1 1 + 2 3 + dtype: int64 + +New behavior + +.. ipython:: python + + s[1] = True + s + +Previously, as assignment to a datetimelike with a non-datetimelike would coerce the +non-datetime-like item being assigned (:issue:`14145`). + +.. ipython:: python + + s = pd.Series([pd.Timestamp('2011-01-01'), pd.Timestamp('2012-01-01')]) + +.. code-block:: python + + In [1]: s[1] = 1 + + In [2]: s + Out[2]: + 0 2011-01-01 00:00:00.000000000 + 1 1970-01-01 00:00:00.000000001 + dtype: datetime64[ns] + +These now coerce to ``object`` dtype. + +.. ipython:: python + + s[1] = 1 + s + +- Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`) +- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) + + +.. _whatsnew_210.api.multiindex_single: + +MultiIndex constructor with a single level +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``MultiIndex`` constructors no longer squeezes a MultiIndex with all +length-one levels down to a regular ``Index``. This affects all the +``MultiIndex`` constructors. (:issue:`17178`) + +Previous behavior: + +.. code-block:: ipython + + In [2]: pd.MultiIndex.from_tuples([('a',), ('b',)]) + Out[2]: Index(['a', 'b'], dtype='object') + +Length 1 levels are no longer special-cased. They behave exactly as if you had +length 2+ levels, so a :class:`MultiIndex` is always returned from all of the +``MultiIndex`` constructors: + +.. ipython:: python + + pd.MultiIndex.from_tuples([('a',), ('b',)]) + +.. _whatsnew_0210.api.utc_localization_with_series: + +UTC Localization with Series +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize ``Series`` with a ``datetime64[ns, UTC]`` dtype to be consistent with how list-like and ``Index`` data are handled. (:issue:`6415`). + +Previous behavior + +.. ipython:: python + + s = pd.Series(['20130101 00:00:00'] * 3) + +.. code-block:: ipython + + In [12]: pd.to_datetime(s, utc=True) + Out[12]: + 0 2013-01-01 + 1 2013-01-01 + 2 2013-01-01 + dtype: datetime64[ns] + +New behavior + +.. ipython:: python + + pd.to_datetime(s, utc=True) + +Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns. + +.. _whatsnew_0210.api.consistency_of_range_functions: + +Consistency of range functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, there were some inconsistencies between the various range functions: :func:`date_range`, :func:`bdate_range`, :func:`period_range`, :func:`timedelta_range`, and :func:`interval_range`. (:issue:`17471`). + +One of the inconsistent behaviors occurred when the ``start``, ``end`` and ``period`` parameters were all specified, potentially leading to ambiguous ranges. When all three parameters were passed, ``interval_range`` ignored the ``period`` parameter, ``period_range`` ignored the ``end`` parameter, and the other range functions raised. To promote consistency among the range functions, and avoid potentially ambiguous ranges, ``interval_range`` and ``period_range`` will now raise when all three parameters are passed. + +Previous behavior: + +.. code-block:: ipython + + In [2]: pd.interval_range(start=0, end=4, periods=6) + Out[2]: + IntervalIndex([(0, 1], (1, 2], (2, 3]] + closed='right', + dtype='interval[int64]') + + In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') + Out[3]: PeriodIndex(['2017Q1', '2017Q2', '2017Q3', '2017Q4', '2018Q1', '2018Q2'], dtype='period[Q-DEC]', freq='Q-DEC') + +New behavior: + +.. code-block:: ipython + + In [2]: pd.interval_range(start=0, end=4, periods=6) + --------------------------------------------------------------------------- + ValueError: Of the three parameters: start, end, and periods, exactly two must be specified + + In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') + --------------------------------------------------------------------------- + ValueError: Of the three parameters: start, end, and periods, exactly two must be specified + +Additionally, the endpoint parameter ``end`` was not included in the intervals produced by ``interval_range``. However, all other range functions include ``end`` in their output. To promote consistency among the range functions, ``interval_range`` will now include ``end`` as the right endpoint of the final interval, except if ``freq`` is specified in a way which skips ``end``. + +Previous behavior: + +.. code-block:: ipython + + In [4]: pd.interval_range(start=0, end=4) + Out[4]: + IntervalIndex([(0, 1], (1, 2], (2, 3]] + closed='right', + dtype='interval[int64]') + + +New behavior: + +.. ipython:: python + + pd.interval_range(start=0, end=4) + +.. _whatsnew_0210.api.mpl_converters: + +No automatic Matplotlib converters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas no longer registers our ``date``, ``time``, ``datetime``, +``datetime64``, and ``Period`` converters with matplotlib when pandas is +imported. Matplotlib plot methods (``plt.plot``, ``ax.plot``, ...), will not +nicely format the x-axis for ``DatetimeIndex`` or ``PeriodIndex`` values. You +must explicitly register these methods: + +Pandas built-in ``Series.plot`` and ``DataFrame.plot`` *will* register these +converters on first-use (:issue:`17710`). + +.. note:: + + This change has been temporarily reverted in pandas 0.21.1, + for more details see :ref:`here `. + +.. _whatsnew_0210.api: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`) +- Accessing a non-existent attribute on a closed :class:`~pandas.HDFStore` will now + raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`) +- :func:`read_csv` now issues a ``UserWarning`` if the ``names`` parameter contains duplicates (:issue:`17095`) +- :func:`read_csv` now treats ``'null'`` and ``'n/a'`` strings as missing values by default (:issue:`16471`, :issue:`16078`) +- :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`). +- Compression defaults in HDF stores now follow pytables standards. Default is no compression and if ``complib`` is missing and ``complevel`` > 0 ``zlib`` is used (:issue:`15943`) +- ``Index.get_indexer_non_unique()`` now returns a ndarray indexer rather than an ``Index``; this is consistent with ``Index.get_indexer()`` (:issue:`16819`) +- Removed the ``@slow`` decorator from ``pandas._testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`) +- Moved definition of ``MergeError`` to the ``pandas.errors`` module. +- The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`) +- :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`) +- :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`). +- :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`) +- :func:`to_datetime` now raises a ``ValueError`` when format includes ``%W`` or ``%U`` without also including day of the week and calendar year (:issue:`16774`) +- Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) +- Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`) +- Restricted DateOffset keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`). + +.. _whatsnew_0210.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- :meth:`DataFrame.from_csv` and :meth:`Series.from_csv` have been deprecated in favor of :func:`read_csv()` (:issue:`4191`) +- :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`). +- :func:`read_excel()` has deprecated ``parse_cols`` in favor of ``usecols`` for consistency with :func:`read_csv` (:issue:`4988`) +- :func:`read_csv()` has deprecated the ``tupleize_cols`` argument. Column tuples will always be converted to a ``MultiIndex`` (:issue:`17060`) +- :meth:`DataFrame.to_csv` has deprecated the ``tupleize_cols`` argument. MultiIndex columns will be always written as rows in the CSV file (:issue:`17060`) +- The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`) +- ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`). +- :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`). +- :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`) +- ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`) +- ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation ` for more details (:issue:`17596`) +- passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype ` (:issue:`17636`) +- ``.get_value`` and ``.set_value`` on ``Series``, ``DataFrame``, ``Panel``, ``SparseSeries``, and ``SparseDataFrame`` are deprecated in favor of using ``.iat[]`` or ``.at[]`` accessors (:issue:`15269`) +- Passing a non-existent column in ``.to_excel(..., columns=)`` is deprecated and will raise a ``KeyError`` in the future (:issue:`17295`) +- ``raise_on_error`` parameter to :func:`Series.where`, :func:`Series.mask`, :func:`DataFrame.where`, :func:`DataFrame.mask` is deprecated, in favor of ``errors=`` (:issue:`14968`) +- Using :meth:`DataFrame.rename_axis` and :meth:`Series.rename_axis` to alter index or column *labels* is now deprecated in favor of using ``.rename``. ``rename_axis`` may still be used to alter the name of the index or columns (:issue:`17833`). +- :meth:`~DataFrame.reindex_axis` has been deprecated in favor of :meth:`~DataFrame.reindex`. See :ref:`here ` for more (:issue:`17833`). + +.. _whatsnew_0210.deprecations.select: + +Series.select and DataFrame.select +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :meth:`Series.select` and :meth:`DataFrame.select` methods are deprecated in favor of using ``df.loc[labels.map(crit)]`` (:issue:`12401`) + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3]}, index=['foo', 'bar', 'baz']) + +.. code-block:: ipython + + In [3]: df.select(lambda x: x in ['bar', 'baz']) + FutureWarning: select is deprecated and will be removed in a future release. You can use .loc[crit] as a replacement + Out[3]: + A + bar 2 + baz 3 + +.. ipython:: python + + df.loc[df.index.map(lambda x: x in ['bar', 'baz'])] + + +.. _whatsnew_0210.deprecations.argmin_min: + +Series.argmax and Series.argmin +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The behavior of :func:`Series.argmax` and :func:`Series.argmin` have been deprecated in favor of :func:`Series.idxmax` and :func:`Series.idxmin`, respectively (:issue:`16830`). + +For compatibility with NumPy arrays, ``pd.Series`` implements ``argmax`` and +``argmin``. Since pandas 0.13.0, ``argmax`` has been an alias for +:meth:`pandas.Series.idxmax`, and ``argmin`` has been an alias for +:meth:`pandas.Series.idxmin`. They return the *label* of the maximum or minimum, +rather than the *position*. + +We've deprecated the current behavior of ``Series.argmax`` and +``Series.argmin``. Using either of these will emit a ``FutureWarning``. Use +:meth:`Series.idxmax` if you want the label of the maximum. Use +``Series.values.argmax()`` if you want the position of the maximum. Likewise for +the minimum. In a future release ``Series.argmax`` and ``Series.argmin`` will +return the position of the maximum or minimum. + +.. _whatsnew_0210.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- :func:`read_excel()` has dropped the ``has_index_names`` parameter (:issue:`10967`) +- The ``pd.options.display.height`` configuration has been dropped (:issue:`3663`) +- The ``pd.options.display.line_width`` configuration has been dropped (:issue:`2881`) +- The ``pd.options.display.mpl_style`` configuration has been dropped (:issue:`12190`) +- ``Index`` has dropped the ``.sym_diff()`` method in favor of ``.symmetric_difference()`` (:issue:`12591`) +- ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`) +- :func:`eval` and :func:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`) +- The function ``get_offset_name`` has been dropped in favor of the ``.freqstr`` attribute for an offset (:issue:`11834`) +- pandas no longer tests for compatibility with hdf5-files created with pandas < 0.11 (:issue:`17404`). + + + +.. _whatsnew_0210.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`) +- :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`) +- Improved performance of :meth:`~Series.cat.set_categories` by not materializing the values (:issue:`17508`) +- :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`) +- Improved performance of the :class:`CategoricalIndex` for data that is already categorical dtype (:issue:`17513`) +- Improved performance of :meth:`RangeIndex.min` and :meth:`RangeIndex.max` by using ``RangeIndex`` properties to perform the computations (:issue:`17607`) + +.. _whatsnew_0210.docs: + +Documentation changes +~~~~~~~~~~~~~~~~~~~~~ + +- Several ``NaT`` method docstrings (e.g. :func:`NaT.ctime`) were incorrect (:issue:`17327`) +- The documentation has had references to versions < v0.17 removed and cleaned up (:issue:`17442`, :issue:`17442`, :issue:`17404` & :issue:`17504`) + +.. _whatsnew_0210.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Conversion +^^^^^^^^^^ + +- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) +- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) +- Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`) +- Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) +- Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) +- Bug in :attr:`Timestamp.weekday_name` returning a UTC-based weekday name when localized to a timezone (:issue:`17354`) +- Bug in ``Timestamp.replace`` when replacing ``tzinfo`` around DST changes (:issue:`15683`) +- Bug in ``Timedelta`` construction and arithmetic that would not propagate the ``Overflow`` exception (:issue:`17367`) +- Bug in :meth:`~DataFrame.astype` converting to object dtype when passed extension type classes (``DatetimeTZDtype``, ``CategoricalDtype``) rather than instances. Now a ``TypeError`` is raised when a class is passed (:issue:`17780`). +- Bug in :meth:`to_numeric` in which elements were not always being coerced to numeric when ``errors='coerce'`` (:issue:`17007`, :issue:`17125`) +- Bug in ``DataFrame`` and ``Series`` constructors where ``range`` objects are converted to ``int32`` dtype on Windows instead of ``int64`` (:issue:`16804`) + +Indexing +^^^^^^^^ + +- When called with a null slice (e.g. ``df.iloc[:]``), the ``.iloc`` and ``.loc`` indexers return a shallow copy of the original object. Previously they returned the original object. (:issue:`13873`). +- When called on an unsorted ``MultiIndex``, the ``loc`` indexer now will raise ``UnsortedIndexError`` only if proper slicing is used on non-sorted levels (:issue:`16734`). +- Fixes regression in 0.20.3 when indexing with a string on a ``TimedeltaIndex`` (:issue:`16896`). +- Fixed :func:`TimedeltaIndex.get_loc` handling of ``np.timedelta64`` inputs (:issue:`16909`). +- Fix :func:`MultiIndex.sort_index` ordering when ``ascending`` argument is a list, but not all levels are specified, or are in a different order (:issue:`16934`). +- Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`) +- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) +- Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`) +- Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`) +- Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`) +- Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`) +- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) +- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) +- Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) +- Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) +- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`) +- Bug in :func:`Series.rename` when called with a callable, incorrectly alters the name of the ``Series``, rather than the name of the ``Index``. (:issue:`17407`) +- Bug in :func:`String.str_get` raises ``IndexError`` instead of inserting NaNs when using a negative index. (:issue:`17704`) + +I/O +^^^ + +- Bug in :func:`read_hdf` when reading a timezone aware index from ``fixed`` format HDFStore (:issue:`17618`) +- Bug in :func:`read_csv` in which columns were not being thoroughly de-duplicated (:issue:`17060`) +- Bug in :func:`read_csv` in which specified column names were not being thoroughly de-duplicated (:issue:`17095`) +- Bug in :func:`read_csv` in which non integer values for the header argument generated an unhelpful / unrelated error message (:issue:`16338`) +- Bug in :func:`read_csv` in which memory management issues in exception handling, under certain conditions, would cause the interpreter to segfault (:issue:`14696`, :issue:`16798`). +- Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`). +- Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`) +- Bug in :meth:`DataFrame.to_csv` defaulting to 'ascii' encoding in Python 3, instead of 'utf-8' (:issue:`17097`) +- Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`) +- Bug in :func:`read_stata` where the index was not set (:issue:`16342`) +- Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) +- Bug in :func:`read_csv` where automatic delimiter detection caused a ``TypeError`` to be thrown when a bad line was encountered rather than the correct error message (:issue:`13374`) +- Bug in :meth:`DataFrame.to_html` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`) +- Bug in :meth:`DataFrame.to_html` in which there was no validation of the ``justify`` parameter (:issue:`17527`) +- Bug in :func:`HDFStore.select` when reading a contiguous mixed-data table featuring VLArray (:issue:`17021`) +- Bug in :func:`to_json` where several conditions (including objects with unprintable symbols, objects with deep recursion, overlong labels) caused segfaults instead of raising the appropriate exception (:issue:`14256`) + +Plotting +^^^^^^^^ +- Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`) +- Bug when plotting ``timedelta`` and ``datetime`` dtypes on y-axis (:issue:`16953`) +- Line plots no longer assume monotonic x data when calculating xlims, they show the entire lines now even for unsorted x data. (:issue:`11310`, :issue:`11471`) +- With matplotlib 2.0.0 and above, calculation of x limits for line plots is left to matplotlib, so that its new default settings are applied. (:issue:`15495`) +- Bug in ``Series.plot.bar`` or ``DataFrame.plot.bar`` with ``y`` not respecting user-passed ``color`` (:issue:`16822`) +- Bug causing ``plotting.parallel_coordinates`` to reset the random seed when using random colors (:issue:`17525`) + + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in ``DataFrame.resample(...).size()`` where an empty ``DataFrame`` did not return a ``Series`` (:issue:`14962`) +- Bug in :func:`infer_freq` causing indices with 2-day gaps during the working week to be wrongly inferred as business daily (:issue:`16624`) +- Bug in ``.rolling(...).quantile()`` which incorrectly used different defaults than :func:`Series.quantile()` and :func:`DataFrame.quantile()` (:issue:`9413`, :issue:`16211`) +- Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`) +- Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`) +- Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1`` (:issue:`15305`) +- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`) +- Bug in ``groupby.nunique()`` with ``TimeGrouper`` which cannot handle ``NaT`` correctly (:issue:`17575`) +- Bug in ``DataFrame.groupby`` where a single level selection from a ``MultiIndex`` unexpectedly sorts (:issue:`17537`) +- Bug in ``DataFrame.groupby`` where spurious warning is raised when ``Grouper`` object is used to override ambiguous column name (:issue:`17383`) +- Bug in ``TimeGrouper`` differs when passes as a list and as a scalar (:issue:`17530`) + +Sparse +^^^^^^ + +- Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`) +- Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`) +- Bug in :func:`SparseSeries.unstack` and :func:`SparseDataFrame.stack` (:issue:`16614`, :issue:`15045`) +- Bug in :func:`make_sparse` treating two numeric/boolean data, which have same bits, as same when array ``dtype`` is ``object`` (:issue:`17574`) +- :func:`SparseArray.all` and :func:`SparseArray.any` are now implemented to handle ``SparseArray``, these were used but not implemented (:issue:`17570`) + +Reshaping +^^^^^^^^^ +- Joining/Merging with a non unique ``PeriodIndex`` raised a ``TypeError`` (:issue:`16871`) +- Bug in :func:`crosstab` where non-aligned series of integers were casted to float (:issue:`17005`) +- Bug in merging with categorical dtypes with datetimelikes incorrectly raised a ``TypeError`` (:issue:`16900`) +- Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`) +- Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) +- Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`) +- Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`) +- :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`). +- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) +- Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`) +- Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`) +- Bug in :func:`pivot_table` where the result's columns did not preserve the categorical dtype of ``columns`` when ``dropna`` was ``False`` (:issue:`17842`) +- Bug in ``DataFrame.drop_duplicates`` where dropping with non-unique column names raised a ``ValueError`` (:issue:`17836`) +- Bug in :func:`unstack` which, when called on a list of levels, would discard the ``fillna`` argument (:issue:`13971`) +- Bug in the alignment of ``range`` objects and other list-likes with ``DataFrame`` leading to operations being performed row-wise instead of column-wise (:issue:`17901`) + +Numeric +^^^^^^^ +- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`) +- :func:`Series.clip()` and :func:`DataFrame.clip()` now treat NA values for upper and lower arguments as ``None`` instead of raising ``ValueError`` (:issue:`17276`). + + +Categorical +^^^^^^^^^^^ +- Bug in :func:`Series.isin` when called with a categorical (:issue:`16639`) +- Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`) +- Bug in categorical operations with :ref:`Series.cat ` not preserving the original Series' name (:issue:`17509`) +- Bug in :func:`DataFrame.merge` failing for categorical columns with boolean/int data types (:issue:`17187`) +- Bug in constructing a ``Categorical``/``CategoricalDtype`` when the specified ``categories`` are of categorical type (:issue:`17884`). + +.. _whatsnew_0210.pypy: + +PyPy +^^^^ + +- Compatibility with PyPy in :func:`read_csv` with ``usecols=[]`` and + :func:`read_json` (:issue:`17351`) +- Split tests into cases for CPython and PyPy where needed, which highlights the fragility + of index matching with ``float('nan')``, ``np.nan`` and ``NAT`` (:issue:`17351`) +- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, + so an approximation is used instead (:issue:`17228`) + +Other +^^^^^ +- Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`) +- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) + + + +.. _whatsnew_0.21.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.20.3..v0.21.0 diff --git a/doc/source/whatsnew/v0.21.1.rst b/doc/source/whatsnew/v0.21.1.rst new file mode 100644 index 00000000..64f33398 --- /dev/null +++ b/doc/source/whatsnew/v0.21.1.rst @@ -0,0 +1,187 @@ +.. _whatsnew_0211: + +v0.21.1 (December 12, 2017) +--------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a minor bug-fix release in the 0.21.x series and includes some small regression fixes, +bug fixes and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + +- Temporarily restore matplotlib datetime plotting functionality. This should + resolve issues for users who implicitly relied on pandas to plot datetimes + with matplotlib. See :ref:`here `. +- Improvements to the Parquet IO functions introduced in 0.21.0. See + :ref:`here `. + + +.. contents:: What's new in v0.21.1 + :local: + :backlinks: none + + +.. _whatsnew_0211.converters: + +Restore Matplotlib datetime converter registration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pandas implements some matplotlib converters for nicely formatting the axis +labels on plots with ``datetime`` or ``Period`` values. Prior to pandas 0.21.0, +these were implicitly registered with matplotlib, as a side effect of ``import +pandas``. + +In pandas 0.21.0, we required users to explicitly register the +converter. This caused problems for some users who relied on those converters +being present for regular ``matplotlib.pyplot`` plotting methods, so we're +temporarily reverting that change; pandas 0.21.1 again registers the converters on +import, just like before 0.21.0. + +We've added a new option to control the converters: +``pd.options.plotting.matplotlib.register_converters``. By default, they are +registered. Toggling this to ``False`` removes pandas' formatters and restore +any converters we overwrote when registering them (:issue:`18301`). + +We're working with the matplotlib developers to make this easier. We're trying +to balance user convenience (automatically registering the converters) with +import performance and best practices (importing pandas shouldn't have the side +effect of overwriting any custom converters you've already set). In the future +we hope to have most of the datetime formatting functionality in matplotlib, +with just the pandas-specific converters in pandas. We'll then gracefully +deprecate the automatic registration of converters in favor of users explicitly +registering them when they want them. + +.. _whatsnew_0211.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0211.enhancements.parquet: + +Improvements to the Parquet IO functionality +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- :func:`DataFrame.to_parquet` will now write non-default indexes when the + underlying engine supports it. The indexes will be preserved when reading + back in with :func:`read_parquet` (:issue:`18581`). +- :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`) +- :func:`read_parquet` now allows to specify kwargs which are passed to the respective engine (:issue:`18216`) + +.. _whatsnew_0211.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- :meth:`Timestamp.timestamp` is now available in Python 2.7. (:issue:`17329`) +- :class:`Grouper` and :class:`TimeGrouper` now have a friendly repr output (:issue:`18203`). + +.. _whatsnew_0211.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- ``pandas.tseries.register`` has been renamed to + :func:`pandas.plotting.register_matplotlib_converters` (:issue:`18301`) + +.. _whatsnew_0211.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of plotting large series/dataframes (:issue:`18236`). + +.. _whatsnew_0211.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Conversion +^^^^^^^^^^ + +- Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`) +- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`) +- Bug in :meth:`IntervalIndex.copy` when copying and ``IntervalIndex`` with non-default ``closed`` (:issue:`18339`) +- Bug in :func:`DataFrame.to_dict` where columns of datetime that are tz-aware were not converted to required arrays when used with ``orient='records'``, raising ``TypeError`` (:issue:`18372`) +- Bug in :class:`DateTimeIndex` and :meth:`date_range` where mismatching tz-aware ``start`` and ``end`` timezones would not raise an err if ``end.tzinfo`` is None (:issue:`18431`) +- Bug in :meth:`Series.fillna` which raised when passed a long integer on Python 2 (:issue:`18159`). + +Indexing +^^^^^^^^ + +- Bug in a boolean comparison of a ``datetime.datetime`` and a ``datetime64[ns]`` dtype Series (:issue:`17965`) +- Bug where a ``MultiIndex`` with more than a million records was not raising ``AttributeError`` when trying to access a missing attribute (:issue:`18165`) +- Bug in :class:`IntervalIndex` constructor when a list of intervals is passed with non-default ``closed`` (:issue:`18334`) +- Bug in ``Index.putmask`` when an invalid mask passed (:issue:`18368`) +- Bug in masked assignment of a ``timedelta64[ns]`` dtype ``Series``, incorrectly coerced to float (:issue:`18493`) + +I/O +^^^ + +- Bug in class:`~pandas.io.stata.StataReader` not converting date/time columns with display formatting addressed (:issue:`17990`). Previously columns with display formatting were normally left as ordinal numbers and not converted to datetime objects. +- Bug in :func:`read_csv` when reading a compressed UTF-16 encoded file (:issue:`18071`) +- Bug in :func:`read_csv` for handling null values in index columns when specifying ``na_filter=False`` (:issue:`5239`) +- Bug in :func:`read_csv` when reading numeric category fields with high cardinality (:issue:`18186`) +- Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`) +- Bug in parsing integer datetime-like columns with specified format in ``read_sql`` (:issue:`17855`). +- Bug in :meth:`DataFrame.to_msgpack` when serializing data of the ``numpy.bool_`` datatype (:issue:`18390`) +- Bug in :func:`read_json` not decoding when reading line delimited JSON from S3 (:issue:`17200`) +- Bug in :func:`pandas.io.json.json_normalize` to avoid modification of ``meta`` (:issue:`18610`) +- Bug in :func:`to_latex` where repeated MultiIndex values were not printed even though a higher level index differed from the previous row (:issue:`14484`) +- Bug when reading NaN-only categorical columns in :class:`HDFStore` (:issue:`18413`) +- Bug in :meth:`DataFrame.to_latex` with ``longtable=True`` where a latex multicolumn always spanned over three columns (:issue:`17959`) + +Plotting +^^^^^^^^ + +- Bug in ``DataFrame.plot()`` and ``Series.plot()`` with :class:`DatetimeIndex` where a figure generated by them is not pickleable in Python 3 (:issue:`18439`) + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in ``DataFrame.resample(...).apply(...)`` when there is a callable that returns different columns (:issue:`15169`) +- Bug in ``DataFrame.resample(...)`` when there is a time change (DST) and resampling frequency is 12h or higher (:issue:`15549`) +- Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) +- Bug in ``rolling.var`` where calculation is inaccurate with a zero-valued array (:issue:`18430`) + +Reshaping +^^^^^^^^^ + +- Error message in ``pd.merge_asof()`` for key datatype mismatch now includes datatype of left and right key (:issue:`18068`) +- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`) +- Bug in ``DataFrame.filter(...)`` when :class:`unicode` is passed as a condition in Python 2 (:issue:`13101`) +- Bug when merging empty DataFrames when ``np.seterr(divide='raise')`` is set (:issue:`17776`) + +Numeric +^^^^^^^ + +- Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`) + +Categorical +^^^^^^^^^^^ + +- Bug in :meth:`DataFrame.astype` where casting to 'category' on an empty ``DataFrame`` causes a segmentation fault (:issue:`18004`) +- Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`) +- ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`) +- Bug in ``Categorical.unique()`` returning read-only ``codes`` array when all categories were ``NaN`` (:issue:`18051`) +- Bug in ``DataFrame.groupby(axis=1)`` with a ``CategoricalIndex`` (:issue:`18432`) + +String +^^^^^^ + +- :meth:`Series.str.split()` will now propagate ``NaN`` values across all expanded columns instead of ``None`` (:issue:`18450`) + + +.. _whatsnew_0.21.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.21.0..v0.21.1 diff --git a/doc/source/whatsnew/v0.22.0.rst b/doc/source/whatsnew/v0.22.0.rst new file mode 100644 index 00000000..75949a90 --- /dev/null +++ b/doc/source/whatsnew/v0.22.0.rst @@ -0,0 +1,262 @@ +.. _whatsnew_0220: + +v0.22.0 (December 29, 2017) +--------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a major release from 0.21.1 and includes a single, API-breaking change. +We recommend that all users upgrade to this version after carefully reading the +release note (singular!). + +.. _whatsnew_0220.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pandas 0.22.0 changes the handling of empty and all-*NA* sums and products. The +summary is that + +* The sum of an empty or all-*NA* ``Series`` is now ``0`` +* The product of an empty or all-*NA* ``Series`` is now ``1`` +* We've added a ``min_count`` parameter to ``.sum()`` and ``.prod()`` controlling + the minimum number of valid values for the result to be valid. If fewer than + ``min_count`` non-*NA* values are present, the result is *NA*. The default is + ``0``. To return ``NaN``, the 0.21 behavior, use ``min_count=1``. + +Some background: In pandas 0.21, we fixed a long-standing inconsistency +in the return value of all-*NA* series depending on whether or not bottleneck +was installed. See :ref:`whatsnew_0210.api_breaking.bottleneck`. At the same +time, we changed the sum and prod of an empty ``Series`` to also be ``NaN``. + +Based on feedback, we've partially reverted those changes. + +Arithmetic operations +^^^^^^^^^^^^^^^^^^^^^ + +The default sum for empty or all-*NA* ``Series`` is now ``0``. + +*pandas 0.21.x* + +.. code-block:: ipython + + In [1]: pd.Series([]).sum() + Out[1]: nan + + In [2]: pd.Series([np.nan]).sum() + Out[2]: nan + +*pandas 0.22.0* + +.. ipython:: python + :okwarning: + + pd.Series([]).sum() + pd.Series([np.nan]).sum() + +The default behavior is the same as pandas 0.20.3 with bottleneck installed. It +also matches the behavior of NumPy's ``np.nansum`` on empty and all-*NA* arrays. + +To have the sum of an empty series return ``NaN`` (the default behavior of +pandas 0.20.3 without bottleneck, or pandas 0.21.x), use the ``min_count`` +keyword. + +.. ipython:: python + :okwarning: + + pd.Series([]).sum(min_count=1) + +Thanks to the ``skipna`` parameter, the ``.sum`` on an all-*NA* +series is conceptually the same as the ``.sum`` of an empty one with +``skipna=True`` (the default). + +.. ipython:: python + + pd.Series([np.nan]).sum(min_count=1) # skipna=True by default + +The ``min_count`` parameter refers to the minimum number of *non-null* values +required for a non-NA sum or product. + +:meth:`Series.prod` has been updated to behave the same as :meth:`Series.sum`, +returning ``1`` instead. + +.. ipython:: python + :okwarning: + + pd.Series([]).prod() + pd.Series([np.nan]).prod() + pd.Series([]).prod(min_count=1) + +These changes affect :meth:`DataFrame.sum` and :meth:`DataFrame.prod` as well. +Finally, a few less obvious places in pandas are affected by this change. + +Grouping by a categorical +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Grouping by a ``Categorical`` and summing now returns ``0`` instead of +``NaN`` for categories with no observations. The product now returns ``1`` +instead of ``NaN``. + +*pandas 0.21.x* + +.. code-block:: ipython + + In [8]: grouper = pd.Categorical(['a', 'a'], categories=['a', 'b']) + + In [9]: pd.Series([1, 2]).groupby(grouper).sum() + Out[9]: + a 3.0 + b NaN + dtype: float64 + +*pandas 0.22* + +.. ipython:: python + + grouper = pd.Categorical(['a', 'a'], categories=['a', 'b']) + pd.Series([1, 2]).groupby(grouper).sum() + +To restore the 0.21 behavior of returning ``NaN`` for unobserved groups, +use ``min_count>=1``. + +.. ipython:: python + + pd.Series([1, 2]).groupby(grouper).sum(min_count=1) + +Resample +^^^^^^^^ + +The sum and product of all-*NA* bins has changed from ``NaN`` to ``0`` for +sum and ``1`` for product. + +*pandas 0.21.x* + +.. code-block:: ipython + + In [11]: s = pd.Series([1, 1, np.nan, np.nan], + ....: index=pd.date_range('2017', periods=4)) + ....: s + Out[11]: + 2017-01-01 1.0 + 2017-01-02 1.0 + 2017-01-03 NaN + 2017-01-04 NaN + Freq: D, dtype: float64 + + In [12]: s.resample('2d').sum() + Out[12]: + 2017-01-01 2.0 + 2017-01-03 NaN + Freq: 2D, dtype: float64 + +*pandas 0.22.0* + +.. ipython:: python + + s = pd.Series([1, 1, np.nan, np.nan], + index=pd.date_range('2017', periods=4)) + s.resample('2d').sum() + +To restore the 0.21 behavior of returning ``NaN``, use ``min_count>=1``. + +.. ipython:: python + + s.resample('2d').sum(min_count=1) + +In particular, upsampling and taking the sum or product is affected, as +upsampling introduces missing values even if the original series was +entirely valid. + +*pandas 0.21.x* + +.. code-block:: ipython + + In [14]: idx = pd.DatetimeIndex(['2017-01-01', '2017-01-02']) + + In [15]: pd.Series([1, 2], index=idx).resample('12H').sum() + Out[15]: + 2017-01-01 00:00:00 1.0 + 2017-01-01 12:00:00 NaN + 2017-01-02 00:00:00 2.0 + Freq: 12H, dtype: float64 + +*pandas 0.22.0* + +.. ipython:: python + + idx = pd.DatetimeIndex(['2017-01-01', '2017-01-02']) + pd.Series([1, 2], index=idx).resample("12H").sum() + +Once again, the ``min_count`` keyword is available to restore the 0.21 behavior. + +.. ipython:: python + + pd.Series([1, 2], index=idx).resample("12H").sum(min_count=1) + +Rolling and expanding +^^^^^^^^^^^^^^^^^^^^^ + +Rolling and expanding already have a ``min_periods`` keyword that behaves +similar to ``min_count``. The only case that changes is when doing a rolling +or expanding sum with ``min_periods=0``. Previously this returned ``NaN``, +when fewer than ``min_periods`` non-*NA* values were in the window. Now it +returns ``0``. + +*pandas 0.21.1* + +.. code-block:: ipython + + In [17]: s = pd.Series([np.nan, np.nan]) + + In [18]: s.rolling(2, min_periods=0).sum() + Out[18]: + 0 NaN + 1 NaN + dtype: float64 + +*pandas 0.22.0* + +.. ipython:: python + + s = pd.Series([np.nan, np.nan]) + s.rolling(2, min_periods=0).sum() + +The default behavior of ``min_periods=None``, implying that ``min_periods`` +equals the window size, is unchanged. + +Compatibility +~~~~~~~~~~~~~ + +If you maintain a library that should work across pandas versions, it +may be easiest to exclude pandas 0.21 from your requirements. Otherwise, all your +``sum()`` calls would need to check if the ``Series`` is empty before summing. + +With setuptools, in your ``setup.py`` use:: + + install_requires=['pandas!=0.21.*', ...] + +With conda, use + +.. code-block:: yaml + + requirements: + run: + - pandas !=0.21.0,!=0.21.1 + +Note that the inconsistency in the return value for all-*NA* series is still +there for pandas 0.20.3 and earlier. Avoiding pandas 0.21 will only help with +the empty case. + + +.. _whatsnew_0.22.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.21.1..v0.22.0 diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst new file mode 100644 index 00000000..b9e1b506 --- /dev/null +++ b/doc/source/whatsnew/v0.23.0.rst @@ -0,0 +1,1480 @@ +.. _whatsnew_0230: + +What's new in 0.23.0 (May 15, 2018) +----------------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +This is a major release from 0.22.0 and includes a number of API changes, +deprecations, new features, enhancements, and performance improvements along +with a large number of bug fixes. We recommend that all users upgrade to this +version. + +Highlights include: + +- :ref:`Round-trippable JSON format with 'table' orient `. +- :ref:`Instantiation from dicts respects order for Python 3.6+ `. +- :ref:`Dependent column arguments for assign `. +- :ref:`Merging / sorting on a combination of columns and index levels `. +- :ref:`Extending pandas with custom types `. +- :ref:`Excluding unobserved categories from groupby `. +- :ref:`Changes to make output shape of DataFrame.apply consistent `. + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. warning:: + + Starting January 1, 2019, pandas feature releases will support Python 3 only. + See `Dropping Python 2.7 `_ for more. + +.. contents:: What's new in v0.23.0 + :local: + :backlinks: none + :depth: 2 + +.. _whatsnew_0230.enhancements: + +New features +~~~~~~~~~~~~ + +.. _whatsnew_0230.enhancements.round-trippable_json: + +JSON read/write round-trippable with ``orient='table'`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A ``DataFrame`` can now be written to and subsequently read back via JSON while preserving metadata through usage of the ``orient='table'`` argument (see :issue:`18912` and :issue:`9146`). Previously, none of the available ``orient`` values guaranteed the preservation of dtypes and index names, amongst other metadata. + +.. ipython:: python + + df = pd.DataFrame({'foo': [1, 2, 3, 4], + 'bar': ['a', 'b', 'c', 'd'], + 'baz': pd.date_range('2018-01-01', freq='d', periods=4), + 'qux': pd.Categorical(['a', 'b', 'c', 'c'])}, + index=pd.Index(range(4), name='idx')) + df + df.dtypes + df.to_json('test.json', orient='table') + new_df = pd.read_json('test.json', orient='table') + new_df + new_df.dtypes + +Please note that the string `index` is not supported with the round trip format, as it is used by default in ``write_json`` to indicate a missing index name. + +.. ipython:: python + :okwarning: + + df.index.name = 'index' + + df.to_json('test.json', orient='table') + new_df = pd.read_json('test.json', orient='table') + new_df + new_df.dtypes + +.. ipython:: python + :suppress: + + import os + os.remove('test.json') + + +.. _whatsnew_0230.enhancements.assign_dependent: + + +``.assign()`` accepts dependent arguments +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :func:`DataFrame.assign` now accepts dependent keyword arguments for python version later than 3.6 (see also `PEP 468 +`_). Later keyword arguments may now refer to earlier ones if the argument is a callable. See the +:ref:`documentation here ` (:issue:`14207`) + +.. ipython:: python + + df = pd.DataFrame({'A': [1, 2, 3]}) + df + df.assign(B=df.A, C=lambda x: x['A'] + x['B']) + +.. warning:: + + This may subtly change the behavior of your code when you're + using ``.assign()`` to update an existing column. Previously, callables + referring to other variables being updated would get the "old" values + + Previous behavior: + + .. code-block:: ipython + + In [2]: df = pd.DataFrame({"A": [1, 2, 3]}) + + In [3]: df.assign(A=lambda df: df.A + 1, C=lambda df: df.A * -1) + Out[3]: + A C + 0 2 -1 + 1 3 -2 + 2 4 -3 + + New behavior: + + .. ipython:: python + + df.assign(A=df.A + 1, C=lambda df: df.A * -1) + + + +.. _whatsnew_0230.enhancements.merge_on_columns_and_levels: + +Merging on a combination of columns and index levels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Strings passed to :meth:`DataFrame.merge` as the ``on``, ``left_on``, and ``right_on`` +parameters may now refer to either column names or index level names. +This enables merging ``DataFrame`` instances on a combination of index levels +and columns without resetting indexes. See the :ref:`Merge on columns and +levels ` documentation section. +(:issue:`14355`) + +.. ipython:: python + + left_index = pd.Index(['K0', 'K0', 'K1', 'K2'], name='key1') + + left = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], + 'B': ['B0', 'B1', 'B2', 'B3'], + 'key2': ['K0', 'K1', 'K0', 'K1']}, + index=left_index) + + right_index = pd.Index(['K0', 'K1', 'K2', 'K2'], name='key1') + + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3'], + 'key2': ['K0', 'K0', 'K0', 'K1']}, + index=right_index) + + left.merge(right, on=['key1', 'key2']) + +.. _whatsnew_0230.enhancements.sort_by_columns_and_levels: + +Sorting by a combination of columns and index levels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Strings passed to :meth:`DataFrame.sort_values` as the ``by`` parameter may +now refer to either column names or index level names. This enables sorting +``DataFrame`` instances by a combination of index levels and columns without +resetting indexes. See the :ref:`Sorting by Indexes and Values +` documentation section. +(:issue:`14353`) + +.. ipython:: python + + # Build MultiIndex + idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 2), + ('b', 2), ('b', 1), ('b', 1)]) + idx.names = ['first', 'second'] + + # Build DataFrame + df_multi = pd.DataFrame({'A': np.arange(6, 0, -1)}, + index=idx) + df_multi + + # Sort by 'second' (index) and 'A' (column) + df_multi.sort_values(by=['second', 'A']) + + +.. _whatsnew_023.enhancements.extension: + +Extending pandas with custom types (experimental) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas now supports storing array-like objects that aren't necessarily 1-D NumPy +arrays as columns in a DataFrame or values in a Series. This allows third-party +libraries to implement extensions to NumPy's types, similar to how pandas +implemented categoricals, datetimes with timezones, periods, and intervals. + +As a demonstration, we'll use cyberpandas_, which provides an ``IPArray`` type +for storing ip addresses. + +.. code-block:: ipython + + In [1]: from cyberpandas import IPArray + + In [2]: values = IPArray([ + ...: 0, + ...: 3232235777, + ...: 42540766452641154071740215577757643572 + ...: ]) + ...: + ...: + +``IPArray`` isn't a normal 1-D NumPy array, but because it's a pandas +:class:`~pandas.api.extensions.ExtensionArray`, it can be stored properly inside pandas' containers. + +.. code-block:: ipython + + In [3]: ser = pd.Series(values) + + In [4]: ser + Out[4]: + 0 0.0.0.0 + 1 192.168.1.1 + 2 2001:db8:85a3::8a2e:370:7334 + dtype: ip + +Notice that the dtype is ``ip``. The missing value semantics of the underlying +array are respected: + +.. code-block:: ipython + + In [5]: ser.isna() + Out[5]: + 0 True + 1 False + 2 False + dtype: bool + +For more, see the :ref:`extension types ` +documentation. If you build an extension array, publicize it on our +:ref:`ecosystem page `. + +.. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest/ + + +.. _whatsnew_0230.enhancements.categorical_grouping: + +New ``observed`` keyword for excluding unobserved categories in ``groupby`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Grouping by a categorical includes the unobserved categories in the output. +When grouping by multiple categorical columns, this means you get the cartesian product of all the +categories, including combinations where there are no observations, which can result in a large +number of groups. We have added a keyword ``observed`` to control this behavior, it defaults to +``observed=False`` for backward-compatibility. (:issue:`14942`, :issue:`8138`, :issue:`15217`, :issue:`17594`, :issue:`8669`, :issue:`20583`, :issue:`20902`) + +.. ipython:: python + + cat1 = pd.Categorical(["a", "a", "b", "b"], + categories=["a", "b", "z"], ordered=True) + cat2 = pd.Categorical(["c", "d", "c", "d"], + categories=["c", "d", "y"], ordered=True) + df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + df['C'] = ['foo', 'bar'] * 2 + df + +To show all values, the previous behavior: + +.. ipython:: python + + df.groupby(['A', 'B', 'C'], observed=False).count() + + +To show only observed values: + +.. ipython:: python + + df.groupby(['A', 'B', 'C'], observed=True).count() + +For pivoting operations, this behavior is *already* controlled by the ``dropna`` keyword: + +.. ipython:: python + + cat1 = pd.Categorical(["a", "a", "b", "b"], + categories=["a", "b", "z"], ordered=True) + cat2 = pd.Categorical(["c", "d", "c", "d"], + categories=["c", "d", "y"], ordered=True) + df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + df + +.. ipython:: python + + pd.pivot_table(df, values='values', index=['A', 'B'], + dropna=True) + pd.pivot_table(df, values='values', index=['A', 'B'], + dropna=False) + + +.. _whatsnew_0230.enhancements.window_raw: + +Rolling/Expanding.apply() accepts ``raw=False`` to pass a ``Series`` to the function +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, +:func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` have gained a ``raw=None`` parameter. +This is similar to :func:`DataFame.apply`. This parameter, if ``True`` allows one to send a ``np.ndarray`` to the applied function. If ``False`` a ``Series`` will be passed. The +default is ``None``, which preserves backward compatibility, so this will default to ``True``, sending an ``np.ndarray``. +In a future version the default will be changed to ``False``, sending a ``Series``. (:issue:`5071`, :issue:`20584`) + +.. ipython:: python + + s = pd.Series(np.arange(5), np.arange(5) + 1) + s + +Pass a ``Series``: + +.. ipython:: python + + s.rolling(2, min_periods=1).apply(lambda x: x.iloc[-1], raw=False) + +Mimic the original behavior of passing a ndarray: + +.. ipython:: python + + s.rolling(2, min_periods=1).apply(lambda x: x[-1], raw=True) + + +.. _whatsnew_0210.enhancements.limit_area: + +``DataFrame.interpolate`` has gained the ``limit_area`` kwarg +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.interpolate` has gained a ``limit_area`` parameter to allow further control of which ``NaN`` s are replaced. +Use ``limit_area='inside'`` to fill only NaNs surrounded by valid values or use ``limit_area='outside'`` to fill only ``NaN`` s +outside the existing valid values while preserving those inside. (:issue:`16284`) See the :ref:`full documentation here `. + + +.. ipython:: python + + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, + np.nan, 13, np.nan, np.nan]) + ser + +Fill one consecutive inside value in both directions + +.. ipython:: python + + ser.interpolate(limit_direction='both', limit_area='inside', limit=1) + +Fill all consecutive outside values backward + +.. ipython:: python + + ser.interpolate(limit_direction='backward', limit_area='outside') + +Fill all consecutive outside values in both directions + +.. ipython:: python + + ser.interpolate(limit_direction='both', limit_area='outside') + +.. _whatsnew_0210.enhancements.get_dummies_dtype: + +``get_dummies`` now supports ``dtype`` argument +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :func:`get_dummies` now accepts a ``dtype`` argument, which specifies a dtype for the new columns. The default remains uint8. (:issue:`18330`) + +.. ipython:: python + + df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]}) + pd.get_dummies(df, columns=['c']).dtypes + pd.get_dummies(df, columns=['c'], dtype=bool).dtypes + + +.. _whatsnew_0230.enhancements.timedelta_mod: + +Timedelta mod method +^^^^^^^^^^^^^^^^^^^^ + +``mod`` (%) and ``divmod`` operations are now defined on ``Timedelta`` objects +when operating with either timedelta-like or with numeric arguments. +See the :ref:`documentation here `. (:issue:`19365`) + +.. ipython:: python + + td = pd.Timedelta(hours=37) + td % pd.Timedelta(minutes=45) + +.. _whatsnew_0230.enhancements.ran_inf: + +``.rank()`` handles ``inf`` values when ``NaN`` are present +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions, ``.rank()`` would assign ``inf`` elements ``NaN`` as their ranks. Now ranks are calculated properly. (:issue:`6945`) + +.. ipython:: python + + s = pd.Series([-np.inf, 0, 1, np.nan, np.inf]) + s + +Previous behavior: + +.. code-block:: ipython + + In [11]: s.rank() + Out[11]: + 0 1.0 + 1 2.0 + 2 3.0 + 3 NaN + 4 NaN + dtype: float64 + +Current behavior: + +.. ipython:: python + + s.rank() + +Furthermore, previously if you rank ``inf`` or ``-inf`` values together with ``NaN`` values, the calculation won't distinguish ``NaN`` from infinity when using 'top' or 'bottom' argument. + +.. ipython:: python + + s = pd.Series([np.nan, np.nan, -np.inf, -np.inf]) + s + +Previous behavior: + +.. code-block:: ipython + + In [15]: s.rank(na_option='top') + Out[15]: + 0 2.5 + 1 2.5 + 2 2.5 + 3 2.5 + dtype: float64 + +Current behavior: + +.. ipython:: python + + s.rank(na_option='top') + +These bugs were squashed: + +- Bug in :meth:`DataFrame.rank` and :meth:`Series.rank` when ``method='dense'`` and ``pct=True`` in which percentile ranks were not being used with the number of distinct observations (:issue:`15630`) +- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`) +- Bug in :func:`DataFrameGroupBy.rank` where ranks were incorrect when both infinity and ``NaN`` were present (:issue:`20561`) + + +.. _whatsnew_0230.enhancements.str_cat_align: + +``Series.str.cat`` has gained the ``join`` kwarg +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, :meth:`Series.str.cat` did not -- in contrast to most of ``pandas`` -- align :class:`Series` on their index before concatenation (see :issue:`18657`). +The method has now gained a keyword ``join`` to control the manner of alignment, see examples below and :ref:`here `. + +In v.0.23 `join` will default to None (meaning no alignment), but this default will change to ``'left'`` in a future version of pandas. + +.. ipython:: python + :okwarning: + + s = pd.Series(['a', 'b', 'c', 'd']) + t = pd.Series(['b', 'd', 'e', 'c'], index=[1, 3, 4, 2]) + s.str.cat(t) + s.str.cat(t, join='left', na_rep='-') + +Furthermore, :meth:`Series.str.cat` now works for ``CategoricalIndex`` as well (previously raised a ``ValueError``; see :issue:`20842`). + +.. _whatsnew_0230.enhancements.astype_category: + +``DataFrame.astype`` performs column-wise conversion to ``Categorical`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.astype` can now perform column-wise conversion to ``Categorical`` by supplying the string ``'category'`` or +a :class:`~pandas.api.types.CategoricalDtype`. Previously, attempting this would raise a ``NotImplementedError``. See the +:ref:`categorical.objectcreation` section of the documentation for more details and examples. (:issue:`12860`, :issue:`18099`) + +Supplying the string ``'category'`` performs column-wise conversion, with only labels appearing in a given column set as categories: + +.. ipython:: python + + df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) + df = df.astype('category') + df['A'].dtype + df['B'].dtype + + +Supplying a ``CategoricalDtype`` will make the categories in each column consistent with the supplied dtype: + +.. ipython:: python + + from pandas.api.types import CategoricalDtype + df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}) + cdt = CategoricalDtype(categories=list('abcd'), ordered=True) + df = df.astype(cdt) + df['A'].dtype + df['B'].dtype + + +.. _whatsnew_0230.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- Unary ``+`` now permitted for ``Series`` and ``DataFrame`` as numeric operator (:issue:`16073`) +- Better support for :meth:`~pandas.io.formats.style.Styler.to_excel` output with the ``xlsxwriter`` engine. (:issue:`16149`) +- :func:`pandas.tseries.frequencies.to_offset` now accepts leading '+' signs e.g. '+1h'. (:issue:`18171`) +- :func:`MultiIndex.unique` now supports the ``level=`` argument, to get unique values from a specific index level (:issue:`17896`) +- :class:`pandas.io.formats.style.Styler` now has method ``hide_index()`` to determine whether the index will be rendered in output (:issue:`14194`) +- :class:`pandas.io.formats.style.Styler` now has method ``hide_columns()`` to determine whether columns will be hidden in output (:issue:`14194`) +- Improved wording of ``ValueError`` raised in :func:`to_datetime` when ``unit=`` is passed with a non-convertible value (:issue:`14350`) +- :func:`Series.fillna` now accepts a Series or a dict as a ``value`` for a categorical dtype (:issue:`17033`) +- :func:`pandas.read_clipboard` updated to use qtpy, falling back to PyQt5 and then PyQt4, adding compatibility with Python3 and multiple python-qt bindings (:issue:`17722`) +- Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`) +- :func:`DataFrame.corrwith` now silently drops non-numeric columns when passed a Series. Before, an exception was raised (:issue:`18570`). +- :class:`IntervalIndex` now supports time zone aware ``Interval`` objects (:issue:`18537`, :issue:`18538`) +- :func:`Series` / :func:`DataFrame` tab completion also returns identifiers in the first level of a :func:`MultiIndex`. (:issue:`16326`) +- :func:`read_excel()` has gained the ``nrows`` parameter (:issue:`16645`) +- :meth:`DataFrame.append` can now in more cases preserve the type of the calling dataframe's columns (e.g. if both are ``CategoricalIndex``) (:issue:`18359`) +- :meth:`DataFrame.to_json` and :meth:`Series.to_json` now accept an ``index`` argument which allows the user to exclude the index from the JSON output (:issue:`17394`) +- ``IntervalIndex.to_tuples()`` has gained the ``na_tuple`` parameter to control whether NA is returned as a tuple of NA, or NA itself (:issue:`18756`) +- ``Categorical.rename_categories``, ``CategoricalIndex.rename_categories`` and :attr:`Series.cat.rename_categories` + can now take a callable as their argument (:issue:`18862`) +- :class:`Interval` and :class:`IntervalIndex` have gained a ``length`` attribute (:issue:`18789`) +- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method. + Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). +- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). +- :func:`DataFrame.pivot` now accepts a list for the ``values=`` kwarg (:issue:`17160`). +- Added :func:`pandas.api.extensions.register_dataframe_accessor`, + :func:`pandas.api.extensions.register_series_accessor`, and + :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas + to register custom accessors like ``.cat`` on pandas objects. See + :ref:`Registering Custom Accessors ` for more (:issue:`14781`). + +- ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) +- :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) +- Added :func:`pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing` and :func:`pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) +- For subclassed ``DataFrames``, :func:`DataFrame.apply` will now preserve the ``Series`` subclass (if defined) when passing the data to the applied function (:issue:`19822`) +- :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) +- Added option ``display.html.use_mathjax`` so `MathJax `_ can be disabled when rendering tables in ``Jupyter`` notebooks (:issue:`19856`, :issue:`19824`) +- :func:`DataFrame.replace` now supports the ``method`` parameter, which can be used to specify the replacement method when ``to_replace`` is a scalar, list or tuple and ``value`` is ``None`` (:issue:`19632`) +- :meth:`Timestamp.month_name`, :meth:`DatetimeIndex.month_name`, and :meth:`Series.dt.month_name` are now available (:issue:`12805`) +- :meth:`Timestamp.day_name` and :meth:`DatetimeIndex.day_name` are now available to return day names with a specified locale (:issue:`12806`) +- :meth:`DataFrame.to_sql` now performs a multi-value insert if the underlying connection supports itk rather than inserting row by row. + ``SQLAlchemy`` dialects supporting multi-value inserts include: ``mysql``, ``postgresql``, ``sqlite`` and any dialect with ``supports_multivalues_insert``. (:issue:`14315`, :issue:`8953`) +- :func:`read_html` now accepts a ``displayed_only`` keyword argument to controls whether or not hidden elements are parsed (``True`` by default) (:issue:`20027`) +- :func:`read_html` now reads all ```` elements in a ``
    ``, not just the first. (:issue:`20690`) +- :meth:`~pandas.core.window.Rolling.quantile` and :meth:`~pandas.core.window.Expanding.quantile` now accept the ``interpolation`` keyword, ``linear`` by default (:issue:`20497`) +- zip compression is supported via ``compression=zip`` in :func:`DataFrame.to_pickle`, :func:`Series.to_pickle`, :func:`DataFrame.to_csv`, :func:`Series.to_csv`, :func:`DataFrame.to_json`, :func:`Series.to_json`. (:issue:`17778`) +- :class:`~pandas.tseries.offsets.WeekOfMonth` constructor now supports ``n=0`` (:issue:`20517`). +- :class:`DataFrame` and :class:`Series` now support matrix multiplication (``@``) operator (:issue:`10259`) for Python>=3.5 +- Updated :meth:`DataFrame.to_gbq` and :meth:`pandas.read_gbq` signature and documentation to reflect changes from + the Pandas-GBQ library version 0.4.0. Adds intersphinx mapping to Pandas-GBQ + library. (:issue:`20564`) +- Added new writer for exporting Stata dta files in version 117, ``StataWriter117``. This format supports exporting strings with lengths up to 2,000,000 characters (:issue:`16450`) +- :func:`to_hdf` and :func:`read_hdf` now accept an ``errors`` keyword argument to control encoding error handling (:issue:`20835`) +- :func:`cut` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`20947`) +- :func:`date_range`, :func:`timedelta_range`, and :func:`interval_range` now return a linearly spaced index if ``start``, ``stop``, and ``periods`` are specified, but ``freq`` is not. (:issue:`20808`, :issue:`20983`, :issue:`20976`) + +.. _whatsnew_0230.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0230.api_breaking.deps: + +Dependencies have increased minimum versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We have updated our minimum supported versions of dependencies (:issue:`15184`). +If installed, we now require: + ++-----------------+-----------------+----------+---------------+ +| Package | Minimum Version | Required | Issue | ++=================+=================+==========+===============+ +| python-dateutil | 2.5.0 | X | :issue:`15184`| ++-----------------+-----------------+----------+---------------+ +| openpyxl | 2.4.0 | | :issue:`15184`| ++-----------------+-----------------+----------+---------------+ +| beautifulsoup4 | 4.2.1 | | :issue:`20082`| ++-----------------+-----------------+----------+---------------+ +| setuptools | 24.2.0 | | :issue:`20698`| ++-----------------+-----------------+----------+---------------+ + +.. _whatsnew_0230.api_breaking.dict_insertion_order: + +Instantiation from dicts preserves dict insertion order for python 3.6+ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Until Python 3.6, dicts in Python had no formally defined ordering. For Python +version 3.6 and later, dicts are ordered by insertion order, see +`PEP 468 `_. +Pandas will use the dict's insertion order, when creating a ``Series`` or +``DataFrame`` from a dict and you're using Python version 3.6 or +higher. (:issue:`19884`) + +Previous behavior (and current behavior if on Python < 3.6): + +.. code-block:: ipython + + In [16]: pd.Series({'Income': 2000, + ....: 'Expenses': -1500, + ....: 'Taxes': -200, + ....: 'Net result': 300}) + Out[16]: + Expenses -1500 + Income 2000 + Net result 300 + Taxes -200 + dtype: int64 + +Note the Series above is ordered alphabetically by the index values. + +New behavior (for Python >= 3.6): + +.. ipython:: python + + pd.Series({'Income': 2000, + 'Expenses': -1500, + 'Taxes': -200, + 'Net result': 300}) + +Notice that the Series is now ordered by insertion order. This new behavior is +used for all relevant pandas types (``Series``, ``DataFrame``, ``SparseSeries`` +and ``SparseDataFrame``). + +If you wish to retain the old behavior while using Python >= 3.6, you can use +``.sort_index()``: + +.. ipython:: python + + pd.Series({'Income': 2000, + 'Expenses': -1500, + 'Taxes': -200, + 'Net result': 300}).sort_index() + +.. _whatsnew_0230.api_breaking.deprecate_panel: + +Deprecate Panel +^^^^^^^^^^^^^^^ + +``Panel`` was deprecated in the 0.20.x release, showing as a ``DeprecationWarning``. Using ``Panel`` will now show a ``FutureWarning``. The recommended way to represent 3-D data are +with a ``MultiIndex`` on a ``DataFrame`` via the :meth:`~Panel.to_frame` or with the `xarray package `__. Pandas +provides a :meth:`~Panel.to_xarray` method to automate this conversion (:issue:`13563`, :issue:`18324`). + +.. code-block:: ipython + + In [75]: import pandas._testing as tm + + In [76]: p = tm.makePanel() + + In [77]: p + Out[77]: + + Dimensions: 3 (items) x 3 (major_axis) x 4 (minor_axis) + Items axis: ItemA to ItemC + Major_axis axis: 2000-01-03 00:00:00 to 2000-01-05 00:00:00 + Minor_axis axis: A to D + +Convert to a MultiIndex DataFrame + +.. code-block:: ipython + + In [78]: p.to_frame() + Out[78]: + ItemA ItemB ItemC + major minor + 2000-01-03 A 0.469112 0.721555 0.404705 + B -1.135632 0.271860 -1.039268 + C 0.119209 0.276232 -1.344312 + D -2.104569 0.113648 -0.109050 + 2000-01-04 A -0.282863 -0.706771 0.577046 + B 1.212112 -0.424972 -0.370647 + C -1.044236 -1.087401 0.844885 + D -0.494929 -1.478427 1.643563 + 2000-01-05 A -1.509059 -1.039575 -1.715002 + B -0.173215 0.567020 -1.157892 + C -0.861849 -0.673690 1.075770 + D 1.071804 0.524988 -1.469388 + + [12 rows x 3 columns] + +Convert to an xarray DataArray + +.. code-block:: ipython + + In [79]: p.to_xarray() + Out[79]: + + array([[[ 0.469112, -1.135632, 0.119209, -2.104569], + [-0.282863, 1.212112, -1.044236, -0.494929], + [-1.509059, -0.173215, -0.861849, 1.071804]], + + [[ 0.721555, 0.27186 , 0.276232, 0.113648], + [-0.706771, -0.424972, -1.087401, -1.478427], + [-1.039575, 0.56702 , -0.67369 , 0.524988]], + + [[ 0.404705, -1.039268, -1.344312, -0.10905 ], + [ 0.577046, -0.370647, 0.844885, 1.643563], + [-1.715002, -1.157892, 1.07577 , -1.469388]]]) + Coordinates: + * items (items) object 'ItemA' 'ItemB' 'ItemC' + * major_axis (major_axis) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 + * minor_axis (minor_axis) object 'A' 'B' 'C' 'D' + + +.. _whatsnew_0230.api_breaking.core_common: + +pandas.core.common removals +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following error & warning messages are removed from ``pandas.core.common`` (:issue:`13634`, :issue:`19769`): + +- ``PerformanceWarning`` +- ``UnsupportedFunctionCall`` +- ``UnsortedIndexError`` +- ``AbstractMethodError`` + +These are available from import from ``pandas.errors`` (since 0.19.0). + + +.. _whatsnew_0230.api_breaking.apply: + +Changes to make output of ``DataFrame.apply`` consistent +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`DataFrame.apply` was inconsistent when applying an arbitrary user-defined-function that returned a list-like with ``axis=1``. Several bugs and inconsistencies +are resolved. If the applied function returns a Series, then pandas will return a DataFrame; otherwise a Series will be returned, this includes the case +where a list-like (e.g. ``tuple`` or ``list`` is returned) (:issue:`16353`, :issue:`17437`, :issue:`17970`, :issue:`17348`, :issue:`17892`, :issue:`18573`, +:issue:`17602`, :issue:`18775`, :issue:`18901`, :issue:`18919`). + +.. ipython:: python + + df = pd.DataFrame(np.tile(np.arange(3), 6).reshape(6, -1) + 1, + columns=['A', 'B', 'C']) + df + +Previous behavior: if the returned shape happened to match the length of original columns, this would return a ``DataFrame``. +If the return shape did not match, a ``Series`` with lists was returned. + +.. code-block:: python + + In [3]: df.apply(lambda x: [1, 2, 3], axis=1) + Out[3]: + A B C + 0 1 2 3 + 1 1 2 3 + 2 1 2 3 + 3 1 2 3 + 4 1 2 3 + 5 1 2 3 + + In [4]: df.apply(lambda x: [1, 2], axis=1) + Out[4]: + 0 [1, 2] + 1 [1, 2] + 2 [1, 2] + 3 [1, 2] + 4 [1, 2] + 5 [1, 2] + dtype: object + + +New behavior: When the applied function returns a list-like, this will now *always* return a ``Series``. + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1) + df.apply(lambda x: [1, 2], axis=1) + +To have expanded columns, you can use ``result_type='expand'`` + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1, result_type='expand') + +To broadcast the result across the original columns (the old behaviour for +list-likes of the correct length), you can use ``result_type='broadcast'``. +The shape must match the original columns. + +.. ipython:: python + + df.apply(lambda x: [1, 2, 3], axis=1, result_type='broadcast') + +Returning a ``Series`` allows one to control the exact return structure and column names: + +.. ipython:: python + + df.apply(lambda x: pd.Series([1, 2, 3], index=['D', 'E', 'F']), axis=1) + +.. _whatsnew_0230.api_breaking.concat: + +Concatenation will no longer sort +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In a future version of pandas :func:`pandas.concat` will no longer sort the non-concatenation axis when it is not already aligned. +The current behavior is the same as the previous (sorting), but now a warning is issued when ``sort`` is not specified and the non-concatenation axis is not aligned (:issue:`4588`). + +.. ipython:: python + :okwarning: + + df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=['b', 'a']) + df2 = pd.DataFrame({"a": [4, 5]}) + + pd.concat([df1, df2]) + +To keep the previous behavior (sorting) and silence the warning, pass ``sort=True`` + +.. ipython:: python + + pd.concat([df1, df2], sort=True) + +To accept the future behavior (no sorting), pass ``sort=False`` + +.. ipython + + pd.concat([df1, df2], sort=False) + +Note that this change also applies to :meth:`DataFrame.append`, which has also received a ``sort`` keyword for controlling this behavior. + + +.. _whatsnew_0230.api_breaking.build_changes: + +Build changes +^^^^^^^^^^^^^ + +- Building pandas for development now requires ``cython >= 0.24`` (:issue:`18613`) +- Building from source now explicitly requires ``setuptools`` in ``setup.py`` (:issue:`18113`) +- Updated conda recipe to be in compliance with conda-build 3.0+ (:issue:`18002`) + +.. _whatsnew_0230.api_breaking.index_division_by_zero: + +Index division by zero fills correctly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Division operations on ``Index`` and subclasses will now fill division of positive numbers by zero with ``np.inf``, division of negative numbers by zero with ``-np.inf`` and `0 / 0` with ``np.nan``. This matches existing ``Series`` behavior. (:issue:`19322`, :issue:`19347`) + +Previous behavior: + +.. code-block:: ipython + + In [6]: index = pd.Int64Index([-1, 0, 1]) + + In [7]: index / 0 + Out[7]: Int64Index([0, 0, 0], dtype='int64') + + # Previous behavior yielded different results depending on the type of zero in the divisor + In [8]: index / 0.0 + Out[8]: Float64Index([-inf, nan, inf], dtype='float64') + + In [9]: index = pd.UInt64Index([0, 1]) + + In [10]: index / np.array([0, 0], dtype=np.uint64) + Out[10]: UInt64Index([0, 0], dtype='uint64') + + In [11]: pd.RangeIndex(1, 5) / 0 + ZeroDivisionError: integer division or modulo by zero + +Current behavior: + +.. ipython:: python + + index = pd.Int64Index([-1, 0, 1]) + # division by zero gives -infinity where negative, + # +infinity where positive, and NaN for 0 / 0 + index / 0 + + # The result of division by zero should not depend on + # whether the zero is int or float + index / 0.0 + + index = pd.UInt64Index([0, 1]) + index / np.array([0, 0], dtype=np.uint64) + + pd.RangeIndex(1, 5) / 0 + +.. _whatsnew_0230.api_breaking.extract: + +Extraction of matching patterns from strings +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By default, extracting matching patterns from strings with :func:`str.extract` used to return a +``Series`` if a single group was being extracted (a ``DataFrame`` if more than one group was +extracted). As of Pandas 0.23.0 :func:`str.extract` always returns a ``DataFrame``, unless +``expand`` is set to ``False``. Finally, ``None`` was an accepted value for +the ``expand`` parameter (which was equivalent to ``False``), but now raises a ``ValueError``. (:issue:`11386`) + +Previous behavior: + +.. code-block:: ipython + + In [1]: s = pd.Series(['number 10', '12 eggs']) + + In [2]: extracted = s.str.extract(r'.*(\d\d).*') + + In [3]: extracted + Out [3]: + 0 10 + 1 12 + dtype: object + + In [4]: type(extracted) + Out [4]: + pandas.core.series.Series + +New behavior: + +.. ipython:: python + + s = pd.Series(['number 10', '12 eggs']) + extracted = s.str.extract(r'.*(\d\d).*') + extracted + type(extracted) + +To restore previous behavior, simply set ``expand`` to ``False``: + +.. ipython:: python + + s = pd.Series(['number 10', '12 eggs']) + extracted = s.str.extract(r'.*(\d\d).*', expand=False) + extracted + type(extracted) + +.. _whatsnew_0230.api_breaking.cdt_ordered: + +Default value for the ``ordered`` parameter of ``CategoricalDtype`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The default value of the ``ordered`` parameter for :class:`~pandas.api.types.CategoricalDtype` has changed from ``False`` to ``None`` to allow updating of ``categories`` without impacting ``ordered``. Behavior should remain consistent for downstream objects, such as :class:`Categorical` (:issue:`18790`) + +In previous versions, the default value for the ``ordered`` parameter was ``False``. This could potentially lead to the ``ordered`` parameter unintentionally being changed from ``True`` to ``False`` when users attempt to update ``categories`` if ``ordered`` is not explicitly specified, as it would silently default to ``False``. The new behavior for ``ordered=None`` is to retain the existing value of ``ordered``. + +New behavior: + +.. code-block:: ipython + + In [2]: from pandas.api.types import CategoricalDtype + + In [3]: cat = pd.Categorical(list('abcaba'), ordered=True, categories=list('cba')) + + In [4]: cat + Out[4]: + [a, b, c, a, b, a] + Categories (3, object): [c < b < a] + + In [5]: cdt = CategoricalDtype(categories=list('cbad')) + + In [6]: cat.astype(cdt) + Out[6]: + [a, b, c, a, b, a] + Categories (4, object): [c < b < a < d] + +Notice in the example above that the converted ``Categorical`` has retained ``ordered=True``. Had the default value for ``ordered`` remained as ``False``, the converted ``Categorical`` would have become unordered, despite ``ordered=False`` never being explicitly specified. To change the value of ``ordered``, explicitly pass it to the new dtype, e.g. ``CategoricalDtype(categories=list('cbad'), ordered=False)``. + +Note that the unintentional conversion of ``ordered`` discussed above did not arise in previous versions due to separate bugs that prevented ``astype`` from doing any type of category to category conversion (:issue:`10696`, :issue:`18593`). These bugs have been fixed in this release, and motivated changing the default value of ``ordered``. + +.. _whatsnew_0230.api_breaking.pretty_printing: + +Better pretty-printing of DataFrames in a terminal +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, the default value for the maximum number of columns was +``pd.options.display.max_columns=20``. This meant that relatively wide data +frames would not fit within the terminal width, and pandas would introduce line +breaks to display these 20 columns. This resulted in an output that was +relatively difficult to read: + +.. image:: ../_static/print_df_old.png + +If Python runs in a terminal, the maximum number of columns is now determined +automatically so that the printed data frame fits within the current terminal +width (``pd.options.display.max_columns=0``) (:issue:`17023`). If Python runs +as a Jupyter kernel (such as the Jupyter QtConsole or a Jupyter notebook, as +well as in many IDEs), this value cannot be inferred automatically and is thus +set to `20` as in previous versions. In a terminal, this results in a much +nicer output: + +.. image:: ../_static/print_df_new.png + +Note that if you don't like the new default, you can always set this option +yourself. To revert to the old setting, you can run this line: + +.. code-block:: python + + pd.options.display.max_columns = 20 + +.. _whatsnew_0230.api.datetimelike: + +Datetimelike API changes +^^^^^^^^^^^^^^^^^^^^^^^^ + +- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) +- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'`` (:issue:`18808`) +- Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`) +- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) +- Addition and subtraction of ``NaN`` from a :class:`Series` with ``dtype='timedelta64[ns]'`` will raise a ``TypeError`` instead of treating the ``NaN`` as ``NaT`` (:issue:`19274`) +- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`) +- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) +- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`) +- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`) +- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`) +- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) +- ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`) +- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`) +- :attr:`Series.first` and :attr:`DataFrame.first` will now raise a ``TypeError`` + rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex` (:issue:`20725`). +- :attr:`Series.last` and :attr:`DataFrame.last` will now raise a ``TypeError`` + rather than ``NotImplementedError`` when index is not a :class:`DatetimeIndex` (:issue:`20725`). +- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). +- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) +- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with ``freq=None``, addition or subtraction of integer-dtyped array or ``Index`` will raise ``NullFrequencyError`` instead of ``TypeError`` (:issue:`19895`) +- :class:`Timestamp` constructor now accepts a `nanosecond` keyword or positional argument (:issue:`18898`) +- :class:`DatetimeIndex` will now raise an ``AttributeError`` when the ``tz`` attribute is set after instantiation (:issue:`3746`) +- :class:`DatetimeIndex` with a ``pytz`` timezone will now return a consistent ``pytz`` timezone (:issue:`18595`) + +.. _whatsnew_0230.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- :func:`Series.astype` and :func:`Index.astype` with an incompatible dtype will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`18231`) +- ``Series`` construction with an ``object`` dtyped tz-aware datetime and ``dtype=object`` specified, will now return an ``object`` dtyped ``Series``, previously this would infer the datetime dtype (:issue:`18231`) +- A :class:`Series` of ``dtype=category`` constructed from an empty ``dict`` will now have categories of ``dtype=object`` rather than ``dtype=float64``, consistently with the case in which an empty list is passed (:issue:`18515`) +- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`). +- Levels names of a ``MultiIndex`` (when not None) are now required to be unique: trying to create a ``MultiIndex`` with repeated names will raise a ``ValueError`` (:issue:`18872`) +- Both construction and renaming of ``Index``/``MultiIndex`` with non-hashable ``name``/``names`` will now raise ``TypeError`` (:issue:`20527`) +- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`, :issue:`18482`, :issue:`18509`). +- :func:`DataFrame.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`) +- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`) +- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) +- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) +- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) +- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) +- :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`) +- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) +- Refactored ``setup.py`` to use ``find_packages`` instead of explicitly listing out all subpackages (:issue:`18535`) +- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`) +- :func:`wide_to_long` previously kept numeric-like suffixes as ``object`` dtype. Now they are cast to numeric if possible (:issue:`17627`) +- In :func:`read_excel`, the ``comment`` argument is now exposed as a named parameter (:issue:`18735`) +- Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`) +- The options ``html.border`` and ``mode.use_inf_as_null`` were deprecated in prior versions, these will now show ``FutureWarning`` rather than a ``DeprecationWarning`` (:issue:`19003`) +- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) +- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) +- ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`) +- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) +- Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) +- :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) +- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) +- ``pd.to_datetime('today')`` now returns a datetime, consistent with ``pd.Timestamp('today')``; previously ``pd.to_datetime('today')`` returned a ``.normalized()`` datetime (:issue:`19935`) +- :func:`Series.str.replace` now takes an optional `regex` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) +- :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`) +- Constructing a Series from a list of length 1 no longer broadcasts this list when a longer index is specified (:issue:`19714`, :issue:`20391`). +- :func:`DataFrame.to_dict` with ``orient='index'`` no longer casts int columns to float for a DataFrame with only int and float columns (:issue:`18580`) +- A user-defined-function that is passed to :func:`Series.rolling().aggregate() `, :func:`DataFrame.rolling().aggregate() `, or its expanding cousins, will now *always* be passed a ``Series``, rather than a ``np.array``; ``.apply()`` only has the ``raw`` keyword, see :ref:`here `. This is consistent with the signatures of ``.aggregate()`` across pandas (:issue:`20584`) +- Rolling and Expanding types raise ``NotImplementedError`` upon iteration (:issue:`11704`). + +.. _whatsnew_0230.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- ``Series.from_array`` and ``SparseSeries.from_array`` are deprecated. Use the normal constructor ``Series(..)`` and ``SparseSeries(..)`` instead (:issue:`18213`). +- ``DataFrame.as_matrix`` is deprecated. Use ``DataFrame.values`` instead (:issue:`18458`). +- ``Series.asobject``, ``DatetimeIndex.asobject``, ``PeriodIndex.asobject`` and ``TimeDeltaIndex.asobject`` have been deprecated. Use ``.astype(object)`` instead (:issue:`18572`) +- Grouping by a tuple of keys now emits a ``FutureWarning`` and is deprecated. + In the future, a tuple passed to ``'by'`` will always refer to a single key + that is the actual tuple, instead of treating the tuple as multiple keys. To + retain the previous behavior, use a list instead of a tuple (:issue:`18314`) +- ``Series.valid`` is deprecated. Use :meth:`Series.dropna` instead (:issue:`18800`). +- :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) +- :meth:`ExcelFile.parse` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with :func:`read_excel` (:issue:`20920`). +- The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). +- ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) +- ``DataFrame.from_items`` is deprecated. Use :func:`DataFrame.from_dict` instead, or ``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`, :issue:`17312`) +- Indexing a :class:`MultiIndex` or a :class:`FloatIndex` with a list containing some missing keys will now show a :class:`FutureWarning`, which is consistent with other types of indexes (:issue:`17758`). + +- The ``broadcast`` parameter of ``.apply()`` is deprecated in favor of ``result_type='broadcast'`` (:issue:`18577`) +- The ``reduce`` parameter of ``.apply()`` is deprecated in favor of ``result_type='reduce'`` (:issue:`18577`) +- The ``order`` parameter of :func:`factorize` is deprecated and will be removed in a future release (:issue:`19727`) +- :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` are deprecated in favor of :meth:`Timestamp.day_name`, :meth:`DatetimeIndex.day_name`, and :meth:`Series.dt.day_name` (:issue:`12806`) + +- ``pandas.tseries.plotting.tsplot`` is deprecated. Use :func:`Series.plot` instead (:issue:`18627`) +- ``Index.summary()`` is deprecated and will be removed in a future version (:issue:`18217`) +- ``NDFrame.get_ftype_counts()`` is deprecated and will be removed in a future version (:issue:`18243`) +- The ``convert_datetime64`` parameter in :func:`DataFrame.to_records` has been deprecated and will be removed in a future version. The NumPy bug motivating this parameter has been resolved. The default value for this parameter has also changed from ``True`` to ``None`` (:issue:`18160`). +- :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, + :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` have deprecated passing an ``np.array`` by default. One will need to pass the new ``raw`` parameter to be explicit about what is passed (:issue:`20584`) +- The ``data``, ``base``, ``strides``, ``flags`` and ``itemsize`` properties + of the ``Series`` and ``Index`` classes have been deprecated and will be + removed in a future version (:issue:`20419`). +- ``DatetimeIndex.offset`` is deprecated. Use ``DatetimeIndex.freq`` instead (:issue:`20716`) +- Floor division between an integer ndarray and a :class:`Timedelta` is deprecated. Divide by :attr:`Timedelta.value` instead (:issue:`19761`) +- Setting ``PeriodIndex.freq`` (which was not guaranteed to work correctly) is deprecated. Use :meth:`PeriodIndex.asfreq` instead (:issue:`20678`) +- ``Index.get_duplicates()`` is deprecated and will be removed in a future version (:issue:`20239`) +- The previous default behavior of negative indices in ``Categorical.take`` is deprecated. In a future version it will change from meaning missing values to meaning positional indices from the right. The future behavior is consistent with :meth:`Series.take` (:issue:`20664`). +- Passing multiple axes to the ``axis`` parameter in :func:`DataFrame.dropna` has been deprecated and will be removed in a future version (:issue:`20987`) + + +.. _whatsnew_0230.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Warnings against the obsolete usage ``Categorical(codes, categories)``, which were emitted for instance when the first two arguments to ``Categorical()`` had different dtypes, and recommended the use of ``Categorical.from_codes``, have now been removed (:issue:`8074`) +- The ``levels`` and ``labels`` attributes of a ``MultiIndex`` can no longer be set directly (:issue:`4039`). +- ``pd.tseries.util.pivot_annual`` has been removed (deprecated since v0.19). Use ``pivot_table`` instead (:issue:`18370`) +- ``pd.tseries.util.isleapyear`` has been removed (deprecated since v0.19). Use ``.is_leap_year`` property in Datetime-likes instead (:issue:`18370`) +- ``pd.ordered_merge`` has been removed (deprecated since v0.19). Use ``pd.merge_ordered`` instead (:issue:`18459`) +- The ``SparseList`` class has been removed (:issue:`14007`) +- The ``pandas.io.wb`` and ``pandas.io.data`` stub modules have been removed (:issue:`13735`) +- ``Categorical.from_array`` has been removed (:issue:`13854`) +- The ``freq`` and ``how`` parameters have been removed from the ``rolling``/``expanding``/``ewm`` methods of DataFrame + and Series (deprecated since v0.18). Instead, resample before calling the methods. (:issue:`18601` & :issue:`18668`) +- ``DatetimeIndex.to_datetime``, ``Timestamp.to_datetime``, ``PeriodIndex.to_datetime``, and ``Index.to_datetime`` have been removed (:issue:`8254`, :issue:`14096`, :issue:`14113`) +- :func:`read_csv` has dropped the ``skip_footer`` parameter (:issue:`13386`) +- :func:`read_csv` has dropped the ``as_recarray`` parameter (:issue:`13373`) +- :func:`read_csv` has dropped the ``buffer_lines`` parameter (:issue:`13360`) +- :func:`read_csv` has dropped the ``compact_ints`` and ``use_unsigned`` parameters (:issue:`13323`) +- The ``Timestamp`` class has dropped the ``offset`` attribute in favor of ``freq`` (:issue:`13593`) +- The ``Series``, ``Categorical``, and ``Index`` classes have dropped the ``reshape`` method (:issue:`13012`) +- ``pandas.tseries.frequencies.get_standard_freq`` has been removed in favor of ``pandas.tseries.frequencies.to_offset(freq).rule_code`` (:issue:`13874`) +- The ``freqstr`` keyword has been removed from ``pandas.tseries.frequencies.to_offset`` in favor of ``freq`` (:issue:`13874`) +- The ``Panel4D`` and ``PanelND`` classes have been removed (:issue:`13776`) +- The ``Panel`` class has dropped the ``to_long`` and ``toLong`` methods (:issue:`19077`) +- The options ``display.line_with`` and ``display.height`` are removed in favor of ``display.width`` and ``display.max_rows`` respectively (:issue:`4391`, :issue:`19107`) +- The ``labels`` attribute of the ``Categorical`` class has been removed in favor of :attr:`Categorical.codes` (:issue:`7768`) +- The ``flavor`` parameter have been removed from func:`to_sql` method (:issue:`13611`) +- The modules ``pandas.tools.hashing`` and ``pandas.util.hashing`` have been removed (:issue:`16223`) +- The top-level functions ``pd.rolling_*``, ``pd.expanding_*`` and ``pd.ewm*`` have been removed (Deprecated since v0.18). + Instead, use the DataFrame/Series methods :attr:`~DataFrame.rolling`, :attr:`~DataFrame.expanding` and :attr:`~DataFrame.ewm` (:issue:`18723`) +- Imports from ``pandas.core.common`` for functions such as ``is_datetime64_dtype`` are now removed. These are located in ``pandas.api.types``. (:issue:`13634`, :issue:`19769`) +- The ``infer_dst`` keyword in :meth:`Series.tz_localize`, :meth:`DatetimeIndex.tz_localize` + and :class:`DatetimeIndex` have been removed. ``infer_dst=True`` is equivalent to + ``ambiguous='infer'``, and ``infer_dst=False`` to ``ambiguous='raise'`` (:issue:`7963`). +- When ``.resample()`` was changed from an eager to a lazy operation, like ``.groupby()`` in v0.18.0, we put in place compatibility (with a ``FutureWarning``), + so operations would continue to work. This is now fully removed, so a ``Resampler`` will no longer forward compat operations (:issue:`20554`) +- Remove long deprecated ``axis=None`` parameter from ``.replace()`` (:issue:`20271`) + +.. _whatsnew_0230.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`) +- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`) +- :class:`DateOffset` arithmetic performance is improved (:issue:`18218`) +- Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`) +- Improved performance of ``.map()`` with a ``Series/dict`` input (:issue:`15081`) +- The overridden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`) +- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`) +- Improved performance of :func:`Series.dt.date` and :func:`DatetimeIndex.date` (:issue:`18058`) +- Improved performance of :func:`Series.dt.time` and :func:`DatetimeIndex.time` (:issue:`18461`) +- Improved performance of :func:`IntervalIndex.symmetric_difference()` (:issue:`18475`) +- Improved performance of ``DatetimeIndex`` and ``Series`` arithmetic operations with Business-Month and Business-Quarter frequencies (:issue:`18489`) +- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`) +- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`) +- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`) +- Improved performance of :func:`MultiIndex.remove_unused_levels` when there are no unused levels, at the cost of a reduction in performance when there are (:issue:`19289`) +- Improved performance of :func:`Index.get_loc` for non-unique indexes (:issue:`19478`) +- Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`) +- Improved performance of :func:`pandas.core.groupby.GroupBy.rank` (:issue:`15779`) +- Improved performance of variable ``.rolling()`` on ``.min()`` and ``.max()`` (:issue:`19521`) +- Improved performance of :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` (:issue:`11296`) +- Improved performance of :func:`pandas.core.groupby.GroupBy.any` and :func:`pandas.core.groupby.GroupBy.all` (:issue:`15435`) +- Improved performance of :func:`pandas.core.groupby.GroupBy.pct_change` (:issue:`19165`) +- Improved performance of :func:`Series.isin` in the case of categorical dtypes (:issue:`20003`) +- Improved performance of ``getattr(Series, attr)`` when the Series has certain index types. This manifested in slow printing of large Series with a ``DatetimeIndex`` (:issue:`19764`) +- Fixed a performance regression for :func:`GroupBy.nth` and :func:`GroupBy.last` with some object columns (:issue:`19283`) +- Improved performance of :func:`pandas.core.arrays.Categorical.from_codes` (:issue:`18501`) + +.. _whatsnew_0230.docs: + +Documentation changes +~~~~~~~~~~~~~~~~~~~~~ + +Thanks to all of the contributors who participated in the Pandas Documentation +Sprint, which took place on March 10th. We had about 500 participants from over +30 locations across the world. You should notice that many of the +:ref:`API docstrings ` have greatly improved. + +There were too many simultaneous contributions to include a release note for each +improvement, but this `GitHub search`_ should give you an idea of how many docstrings +were improved. + +Special thanks to `Marc Garcia`_ for organizing the sprint. For more information, +read the `NumFOCUS blogpost`_ recapping the sprint. + +.. _GitHub search: https://github.com/pandas-dev/pandas/pulls?utf8=%E2%9C%93&q=is%3Apr+label%3ADocs+created%3A2018-03-10..2018-03-15+ +.. _NumFOCUS blogpost: https://www.numfocus.org/blog/worldwide-pandas-sprint/ +.. _Marc Garcia: https://github.com/datapythonista + +- Changed spelling of "numpy" to "NumPy", and "python" to "Python". (:issue:`19017`) +- Consistency when introducing code samples, using either colon or period. + Rewrote some sentences for greater clarity, added more dynamic references + to functions, methods and classes. + (:issue:`18941`, :issue:`18948`, :issue:`18973`, :issue:`19017`) +- Added a reference to :func:`DataFrame.assign` in the concatenate section of the merging documentation (:issue:`18665`) + +.. _whatsnew_0230.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ + +.. warning:: + + A class of bugs were introduced in pandas 0.21 with ``CategoricalDtype`` that + affects the correctness of operations like ``merge``, ``concat``, and + indexing when comparing multiple unordered ``Categorical`` arrays that have + the same categories, but in a different order. We highly recommend upgrading + or manually aligning your categories before doing these operations. + +- Bug in ``Categorical.equals`` returning the wrong result when comparing two + unordered ``Categorical`` arrays with the same categories, but in a different + order (:issue:`16603`) +- Bug in :func:`pandas.api.types.union_categoricals` returning the wrong result + when for unordered categoricals with the categories in a different order. + This affected :func:`pandas.concat` with Categorical data (:issue:`19096`). +- Bug in :func:`pandas.merge` returning the wrong result when joining on an + unordered ``Categorical`` that had the same categories but in a different + order (:issue:`19551`) +- Bug in :meth:`CategoricalIndex.get_indexer` returning the wrong result when + ``target`` was an unordered ``Categorical`` that had the same categories as + ``self`` but in a different order (:issue:`19551`) +- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) +- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) +- Bug in :meth:`Series.str.split` with ``expand=True`` incorrectly raising an IndexError on empty strings (:issue:`20002`). +- Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19032`) +- Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (:issue:`19565`) +- Bug in ``Categorical.__iter__`` not converting to Python types (:issue:`19909`) +- Bug in :func:`pandas.factorize` returning the unique codes for the ``uniques``. This now returns a ``Categorical`` with the same dtype as the input (:issue:`19721`) +- Bug in :func:`pandas.factorize` including an item for missing values in the ``uniques`` return value (:issue:`19721`) +- Bug in :meth:`Series.take` with categorical data interpreting ``-1`` in `indices` as missing value markers, rather than the last element of the Series (:issue:`20664`) + +Datetimelike +^^^^^^^^^^^^ + +- Bug in :func:`Series.__sub__` subtracting a non-nanosecond ``np.datetime64`` object from a ``Series`` gave incorrect results (:issue:`7996`) +- Bug in :class:`DatetimeIndex`, :class:`TimedeltaIndex` addition and subtraction of zero-dimensional integer arrays gave incorrect results (:issue:`19012`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where adding or subtracting an array-like of ``DateOffset`` objects either raised (``np.array``, ``pd.Index``) or broadcast incorrectly (``pd.Series``) (:issue:`18849`) +- Bug in :func:`Series.__add__` adding Series with dtype ``timedelta64[ns]`` to a timezone-aware ``DatetimeIndex`` incorrectly dropped timezone information (:issue:`13905`) +- Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`) +- Bug in :class:`Timestamp` where comparison with an array of ``Timestamp`` objects would result in a ``RecursionError`` (:issue:`15183`) +- Bug in :class:`Series` floor-division where operating on a scalar ``timedelta`` raises an exception (:issue:`18846`) +- Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`) +- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`) +- Bug in subtracting :class:`Series` from ``NaT`` incorrectly returning ``NaT`` (:issue:`19158`) +- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) +- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`) +- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`) +- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`) +- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`) +- Bug in :func:`to_datetime` where passing an out-of-bounds datetime with ``errors='coerce'`` and ``utc=True`` would raise ``OutOfBoundsDatetime`` instead of parsing to ``NaT`` (:issue:`19612`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where name of the returned object was not always set consistently. (:issue:`19744`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` addition and subtraction where operations with numpy arrays raised ``TypeError`` (:issue:`19847`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where setting the ``freq`` attribute was not fully supported (:issue:`20678`) + +Timedelta +^^^^^^^^^ + +- Bug in :func:`Timedelta.__mul__` where multiplying by ``NaT`` returned ``NaT`` instead of raising a ``TypeError`` (:issue:`19819`) +- Bug in :class:`Series` with ``dtype='timedelta64[ns]'`` where addition or subtraction of ``TimedeltaIndex`` had results cast to ``dtype='int64'`` (:issue:`17250`) +- Bug in :class:`Series` with ``dtype='timedelta64[ns]'`` where addition or subtraction of ``TimedeltaIndex`` could return a ``Series`` with an incorrect name (:issue:`19043`) +- Bug in :func:`Timedelta.__floordiv__` and :func:`Timedelta.__rfloordiv__` dividing by many incompatible numpy objects was incorrectly allowed (:issue:`18846`) +- Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) +- Bug in :class:`TimedeltaIndex` where division by a ``Series`` would return a ``TimedeltaIndex`` instead of a ``Series`` (:issue:`19042`) +- Bug in :func:`Timedelta.__add__`, :func:`Timedelta.__sub__` where adding or subtracting a ``np.timedelta64`` object would return another ``np.timedelta64`` instead of a ``Timedelta`` (:issue:`19738`) +- Bug in :func:`Timedelta.__floordiv__`, :func:`Timedelta.__rfloordiv__` where operating with a ``Tick`` object would raise a ``TypeError`` instead of returning a numeric value (:issue:`19738`) +- Bug in :func:`Period.asfreq` where periods near ``datetime(1, 1, 1)`` could be converted incorrectly (:issue:`19643`, :issue:`19834`) +- Bug in :func:`Timedelta.total_seconds()` causing precision errors, for example ``Timedelta('30S').total_seconds()==30.000000000000004`` (:issue:`19458`) +- Bug in :func:`Timedelta.__rmod__` where operating with a ``numpy.timedelta64`` returned a ``timedelta64`` object instead of a ``Timedelta`` (:issue:`19820`) +- Multiplication of :class:`TimedeltaIndex` by ``TimedeltaIndex`` will now raise ``TypeError`` instead of raising ``ValueError`` in cases of length mis-match (:issue:`19333`) +- Bug in indexing a :class:`TimedeltaIndex` with a ``np.timedelta64`` object which was raising a ``TypeError`` (:issue:`20393`) + + +Timezones +^^^^^^^^^ + +- Bug in creating a ``Series`` from an array that contains both tz-naive and tz-aware values will result in a ``Series`` whose dtype is tz-aware instead of object (:issue:`16406`) +- Bug in comparison of timezone-aware :class:`DatetimeIndex` against ``NaT`` incorrectly raising ``TypeError`` (:issue:`19276`) +- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`) +- Bug in comparing :class:`DatetimeIndex`, which failed to raise ``TypeError`` when attempting to compare timezone-aware and timezone-naive datetimelike objects (:issue:`18162`) +- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`) +- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`) +- Bug in tz-aware :class:`DatetimeIndex` where addition/subtraction with a :class:`TimedeltaIndex` or array with ``dtype='timedelta64[ns]'`` was incorrect (:issue:`17558`) +- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) +- Bug in :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`) +- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`) +- Bug when iterating over :class:`DatetimeIndex` that was localized with fixed timezone offset that rounded nanosecond precision to microseconds (:issue:`19603`) +- Bug in :func:`DataFrame.diff` that raised an ``IndexError`` with tz-aware values (:issue:`18578`) +- Bug in :func:`melt` that converted tz-aware dtypes to tz-naive (:issue:`15785`) +- Bug in :func:`Dataframe.count` that raised an ``ValueError``, if :func:`Dataframe.dropna` was called for a single column with timezone-aware values. (:issue:`13407`) + +Offsets +^^^^^^^ + +- Bug in :class:`WeekOfMonth` and :class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`, :issue:`18672`, :issue:`18864`) +- Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`) +- Bug in :class:`FY5253Quarter`, :class:`LastWeekOfMonth` where rollback and rollforward behavior was inconsistent with addition and subtraction behavior (:issue:`18854`) +- Bug in :class:`FY5253` where ``datetime`` addition and subtraction incremented incorrectly for dates on the year-end but not normalized to midnight (:issue:`18854`) +- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operations (:issue:`14774`) + +Numeric +^^^^^^^ +- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`) +- Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) +- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) +- Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) +- Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) +- Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`) +- Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (:issue:`19873`) + +Strings +^^^^^^^ +- Bug in :func:`Series.str.get` with a dictionary in the values and the index not in the keys, raising `KeyError` (:issue:`20671`) + + +Indexing +^^^^^^^^ + +- Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`) +- Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`) +- Bug in :func:`DataFrame.drop`, :meth:`Panel.drop`, :meth:`Series.drop`, :meth:`Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`) +- Bug in indexing a datetimelike ``Index`` that raised ``ValueError`` instead of ``IndexError`` (:issue:`18386`). +- :func:`Index.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) +- :func:`DatetimeIndex.to_series` now accepts ``index`` and ``name`` kwargs (:issue:`18699`) +- Bug in indexing non-scalar value from ``Series`` having non-unique ``Index`` will return value flattened (:issue:`17610`) +- Bug in indexing with iterator containing only missing keys, which raised no error (:issue:`20748`) +- Fixed inconsistency in ``.ix`` between list and scalar keys when the index has integer dtype and does not include the desired keys (:issue:`20753`) +- Bug in ``__setitem__`` when indexing a :class:`DataFrame` with a 2-d boolean ndarray (:issue:`18582`) +- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`) +- Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`) +- Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) +- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) +- Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (:issue:`19726`) +- Bug in ``Index`` subclasses constructors that ignore unexpected keyword arguments (:issue:`19348`) +- Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`) +- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`). +- Bug in :class:`IntervalIndex` where some indexing operations were not supported for overlapping or non-monotonic ``uint64`` data (:issue:`20636`) +- Bug in ``Series.is_unique`` where extraneous output in stderr is shown if Series contains objects with ``__ne__`` defined (:issue:`20661`) +- Bug in ``.loc`` assignment with a single-element list-like incorrectly assigns as a list (:issue:`19474`) +- Bug in partial string indexing on a ``Series/DataFrame`` with a monotonic decreasing ``DatetimeIndex`` (:issue:`19362`) +- Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`) +- Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`) +- Bug in ``.loc`` with a ``uint64`` indexer (:issue:`20722`) + +MultiIndex +^^^^^^^^^^ + +- Bug in :func:`MultiIndex.__contains__` where non-tuple keys would return ``True`` even if they had been dropped (:issue:`19027`) +- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`) +- Bug in :func:`MultiIndex.get_level_values` which would return an invalid index on level of ints with missing values (:issue:`17924`) +- Bug in :func:`MultiIndex.unique` when called on empty :class:`MultiIndex` (:issue:`20568`) +- Bug in :func:`MultiIndex.unique` which would not preserve level names (:issue:`20570`) +- Bug in :func:`MultiIndex.remove_unused_levels` which would fill nan values (:issue:`18417`) +- Bug in :func:`MultiIndex.from_tuples` which would fail to take zipped tuples in python3 (:issue:`18434`) +- Bug in :func:`MultiIndex.get_loc` which would fail to automatically cast values between float and int (:issue:`18818`, :issue:`15994`) +- Bug in :func:`MultiIndex.get_loc` which would cast boolean to integer labels (:issue:`19086`) +- Bug in :func:`MultiIndex.get_loc` which would fail to locate keys containing ``NaN`` (:issue:`18485`) +- Bug in :func:`MultiIndex.get_loc` in large :class:`MultiIndex`, would fail when levels had different dtypes (:issue:`18520`) +- Bug in indexing where nested indexers having only numpy arrays are handled incorrectly (:issue:`19686`) + + +I/O +^^^ + +- :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`) +- :meth:`DataFrame.to_html` now has an option to add an id to the leading `
    ` tag (:issue:`8496`) +- Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`) +- Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) +- Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`) +- Bug in :func:`read_csv` causing heap corruption on 32-bit, big-endian architectures (:issue:`20785`) +- Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`) +- Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`) +- Bug in :func:`DataFrame.to_latex()` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`) +- Bug in :func:`DataFrame.to_latex()` where a non-string index-level name would result in an ``AttributeError`` (:issue:`19981`) +- Bug in :func:`DataFrame.to_latex()` where the combination of an index name and the `index_names=False` option would result in incorrect output (:issue:`18326`) +- Bug in :func:`DataFrame.to_latex()` where a ``MultiIndex`` with an empty string as its name would result in incorrect output (:issue:`18669`) +- Bug in :func:`DataFrame.to_latex()` where missing space characters caused wrong escaping and produced non-valid latex in some cases (:issue:`20859`) +- Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`) +- Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) +- :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) +- :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`, :issue:`9155`, :issue:`19900`) +- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`) +- Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`) +- Bug in :meth:`pandas.io.json.json_normalize` where sub-records are not properly normalized if any sub-records values are NoneType (:issue:`20030`) +- Bug in ``usecols`` parameter in :func:`read_csv` where error is not raised correctly when passing a string. (:issue:`20529`) +- Bug in :func:`HDFStore.keys` when reading a file with a soft link causes exception (:issue:`20523`) +- Bug in :func:`HDFStore.select_column` where a key which is not a valid store raised an ``AttributeError`` instead of a ``KeyError`` (:issue:`17912`) + +Plotting +^^^^^^^^ + +- Better error message when attempting to plot but matplotlib is not installed (:issue:`19810`). +- :func:`DataFrame.plot` now raises a ``ValueError`` when the ``x`` or ``y`` argument is improperly formed (:issue:`18671`) +- Bug in :func:`DataFrame.plot` when ``x`` and ``y`` arguments given as positions caused incorrect referenced columns for line, bar and area plots (:issue:`20056`) +- Bug in formatting tick labels with ``datetime.time()`` and fractional seconds (:issue:`18478`). +- :meth:`Series.plot.kde` has exposed the args ``ind`` and ``bw_method`` in the docstring (:issue:`18461`). The argument ``ind`` may now also be an integer (number of sample points). +- :func:`DataFrame.plot` now supports multiple columns to the ``y`` argument (:issue:`19699`) + + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug when grouping by a single column and aggregating with a class like ``list`` or ``tuple`` (:issue:`18079`) +- Fixed regression in :func:`DataFrame.groupby` which would not emit an error when called with a tuple key not in the index (:issue:`18798`) +- Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`) +- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) +- Bug in :func:`DataFrame.groupby` where aggregation by ``first``/``last``/``min``/``max`` was causing timestamps to lose precision (:issue:`19526`) +- Bug in :func:`DataFrame.transform` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) +- Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`) +- Bug in :func:`DataFrame.resample().aggregate ` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`) +- Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` when ``skipna`` was passed (:issue:`19806`) +- Bug in :func:`DataFrame.resample` that dropped timezone information (:issue:`13238`) +- Bug in :func:`DataFrame.groupby` where transformations using ``np.all`` and ``np.any`` were raising a ``ValueError`` (:issue:`20653`) +- Bug in :func:`DataFrame.resample` where ``ffill``, ``bfill``, ``pad``, ``backfill``, ``fillna``, ``interpolate``, and ``asfreq`` were ignoring ``loffset``. (:issue:`20744`) +- Bug in :func:`DataFrame.groupby` when applying a function that has mixed data types and the user supplied function can fail on the grouping column (:issue:`20949`) +- Bug in :func:`DataFrameGroupBy.rolling().apply() ` where operations performed against the associated :class:`DataFrameGroupBy` object could impact the inclusion of the grouped item(s) in the result (:issue:`14013`) + +Sparse +^^^^^^ + +- Bug in which creating a :class:`SparseDataFrame` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`) +- Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`) +- Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`) +- Bug in constructing a :class:`SparseArray`: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`) + +Reshaping +^^^^^^^^^ + +- Bug in :func:`DataFrame.merge` where referencing a ``CategoricalIndex`` by name, where the ``by`` kwarg would ``KeyError`` (:issue:`20777`) +- Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`) +- Bug in :func:`DataFrame.unstack` which casts int to float if ``columns`` is a ``MultiIndex`` with unused levels (:issue:`17845`) +- Bug in :func:`DataFrame.unstack` which raises an error if ``index`` is a ``MultiIndex`` with unused labels on the unstacked level (:issue:`18562`) +- Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) +- Fixed construction of a :class:`DataFrame` from a ``dict`` containing ``NaN`` as key (:issue:`18455`) +- Disabled construction of a :class:`Series` where len(index) > len(data) = 1, which previously would broadcast the data item, and now raises a ``ValueError`` (:issue:`18819`) +- Suppressed error in the construction of a :class:`DataFrame` from a ``dict`` containing scalar values when the corresponding keys are not included in the passed index (:issue:`18600`) + +- Fixed (changed from ``object`` to ``float64``) dtype of :class:`DataFrame` initialized with axes, no data, and ``dtype=int`` (:issue:`19646`) +- Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) +- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`) +- Bug in :func:`DataFrame.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) +- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`) +- Bug in :func:`DataFrame.stack`, :func:`DataFrame.unstack`, :func:`Series.unstack` which were not returning subclasses (:issue:`15563`) +- Bug in timezone comparisons, manifesting as a conversion of the index to UTC in ``.concat()`` (:issue:`18523`) +- Bug in :func:`concat` when concatenating sparse and dense series it returns only a ``SparseDataFrame``. Should be a ``DataFrame``. (:issue:`18914`, :issue:`18686`, and :issue:`16874`) +- Improved error message for :func:`DataFrame.merge` when there is no common merge key (:issue:`19427`) +- Bug in :func:`DataFrame.join` which does an ``outer`` instead of a ``left`` join when being called with multiple DataFrames and some have non-unique indices (:issue:`19624`) +- :func:`Series.rename` now accepts ``axis`` as a kwarg (:issue:`18589`) +- Bug in :func:`~DataFrame.rename` where an Index of same-length tuples was converted to a MultiIndex (:issue:`19497`) +- Comparisons between :class:`Series` and :class:`Index` would return a ``Series`` with an incorrect name, ignoring the ``Index``'s name attribute (:issue:`19582`) +- Bug in :func:`qcut` where datetime and timedelta data with ``NaT`` present raised a ``ValueError`` (:issue:`19768`) +- Bug in :func:`DataFrame.iterrows`, which would infers strings not compliant to `ISO8601 `_ to datetimes (:issue:`19671`) +- Bug in :class:`Series` constructor with ``Categorical`` where a ``ValueError`` is not raised when an index of different length is given (:issue:`19342`) +- Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`) +- Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`) +- Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`) +- Bug in :func:`get_dummies`, and :func:`select_dtypes`, where duplicate column names caused incorrect behavior (:issue:`20848`) +- Bug in :func:`isna`, which cannot handle ambiguous typed lists (:issue:`20675`) +- Bug in :func:`concat` which raises an error when concatenating TZ-aware dataframes and all-NaT dataframes (:issue:`12396`) +- Bug in :func:`concat` which raises an error when concatenating empty TZ-aware series (:issue:`18447`) + +Other +^^^^^ + +- Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) +- Bug in accessing a :func:`pandas.get_option`, which raised ``KeyError`` rather than ``OptionError`` when looking up a non-existent option key in some cases (:issue:`19789`) +- Bug in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` for Series or DataFrames with differing unicode data (:issue:`20503`) + +.. _whatsnew_0.23.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.22.0..v0.23.0 diff --git a/doc/source/whatsnew/v0.23.1.rst b/doc/source/whatsnew/v0.23.1.rst new file mode 100644 index 00000000..03b7d9db --- /dev/null +++ b/doc/source/whatsnew/v0.23.1.rst @@ -0,0 +1,151 @@ +.. _whatsnew_0231: + +What's new in 0.23.1 (June 12, 2018) +------------------------------------ + +{{ header }} + + +This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes +and bug fixes. We recommend that all users upgrade to this version. + +.. warning:: + + Starting January 1, 2019, pandas feature releases will support Python 3 only. + See `Dropping Python 2.7 `_ for more. + +.. contents:: What's new in v0.23.1 + :local: + :backlinks: none + +.. _whatsnew_0231.fixed_regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +**Comparing Series with datetime.date** + +We've reverted a 0.23.0 change to comparing a :class:`Series` holding datetimes and a ``datetime.date`` object (:issue:`21152`). +In pandas 0.22 and earlier, comparing a Series holding datetimes and ``datetime.date`` objects would coerce the ``datetime.date`` to a datetime before comparing. +This was inconsistent with Python, NumPy, and :class:`DatetimeIndex`, which never consider a datetime and ``datetime.date`` equal. + +In 0.23.0, we unified operations between DatetimeIndex and Series, and in the process changed comparisons between a Series of datetimes and ``datetime.date`` without warning. + +We've temporarily restored the 0.22.0 behavior, so datetimes and dates may again compare equal, but restore the 0.23.0 behavior in a future release. + +To summarize, here's the behavior in 0.22.0, 0.23.0, 0.23.1: + +.. code-block:: python + + # 0.22.0... Silently coerce the datetime.date + >>> import datetime + >>> pd.Series(pd.date_range('2017', periods=2)) == datetime.date(2017, 1, 1) + 0 True + 1 False + dtype: bool + + # 0.23.0... Do not coerce the datetime.date + >>> pd.Series(pd.date_range('2017', periods=2)) == datetime.date(2017, 1, 1) + 0 False + 1 False + dtype: bool + + # 0.23.1... Coerce the datetime.date with a warning + >>> pd.Series(pd.date_range('2017', periods=2)) == datetime.date(2017, 1, 1) + /bin/python:1: FutureWarning: Comparing Series of datetimes with 'datetime.date'. Currently, the + 'datetime.date' is coerced to a datetime. In the future pandas will + not coerce, and the values not compare equal to the 'datetime.date'. + To retain the current behavior, convert the 'datetime.date' to a + datetime with 'pd.Timestamp'. + #!/bin/python3 + 0 True + 1 False + dtype: bool + +In addition, ordering comparisons will raise a ``TypeError`` in the future. + +**Other fixes** + +- Reverted the ability of :func:`~DataFrame.to_sql` to perform multivalue + inserts as this caused regression in certain cases (:issue:`21103`). + In the future this will be made configurable. +- Fixed regression in the :attr:`DatetimeIndex.date` and :attr:`DatetimeIndex.time` + attributes in case of timezone-aware data: :attr:`DatetimeIndex.time` returned + a tz-aware time instead of tz-naive (:issue:`21267`) and :attr:`DatetimeIndex.date` + returned incorrect date when the input date has a non-UTC timezone (:issue:`21230`). +- Fixed regression in :meth:`pandas.io.json.json_normalize` when called with ``None`` values + in nested levels in JSON, and to not drop keys with value as `None` (:issue:`21158`, :issue:`21356`). +- Bug in :meth:`~DataFrame.to_csv` causes encoding error when compression and encoding are specified (:issue:`21241`, :issue:`21118`) +- Bug preventing pandas from being importable with -OO optimization (:issue:`21071`) +- Bug in :meth:`Categorical.fillna` incorrectly raising a ``TypeError`` when `value` the individual categories are iterable and `value` is an iterable (:issue:`21097`, :issue:`19788`) +- Fixed regression in constructors coercing NA values like ``None`` to strings when passing ``dtype=str`` (:issue:`21083`) +- Regression in :func:`pivot_table` where an ordered ``Categorical`` with missing + values for the pivot's ``index`` would give a mis-aligned result (:issue:`21133`) +- Fixed regression in merging on boolean index/columns (:issue:`21119`). + +.. _whatsnew_0231.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improved performance of :meth:`CategoricalIndex.is_monotonic_increasing`, :meth:`CategoricalIndex.is_monotonic_decreasing` and :meth:`CategoricalIndex.is_monotonic` (:issue:`21025`) +- Improved performance of :meth:`CategoricalIndex.is_unique` (:issue:`21107`) + + +.. _whatsnew_0231.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Groupby/resample/rolling** + +- Bug in :func:`DataFrame.agg` where applying multiple aggregation functions to a :class:`DataFrame` with duplicated column names would cause a stack overflow (:issue:`21063`) +- Bug in :func:`pandas.core.groupby.GroupBy.ffill` and :func:`pandas.core.groupby.GroupBy.bfill` where the fill within a grouping would not always be applied as intended due to the implementations' use of a non-stable sort (:issue:`21207`) +- Bug in :func:`pandas.core.groupby.GroupBy.rank` where results did not scale to 100% when specifying ``method='dense'`` and ``pct=True`` +- Bug in :func:`pandas.DataFrame.rolling` and :func:`pandas.Series.rolling` which incorrectly accepted a 0 window size rather than raising (:issue:`21286`) + +**Data-type specific** + +- Bug in :meth:`Series.str.replace()` where the method throws `TypeError` on Python 3.5.2 (:issue:`21078`) +- Bug in :class:`Timedelta` where passing a float with a unit would prematurely round the float precision (:issue:`14156`) +- Bug in :func:`pandas.testing.assert_index_equal` which raised ``AssertionError`` incorrectly, when comparing two :class:`CategoricalIndex` objects with param ``check_categorical=False`` (:issue:`19776`) + +**Sparse** + +- Bug in :attr:`SparseArray.shape` which previously only returned the shape :attr:`SparseArray.sp_values` (:issue:`21126`) + +**Indexing** + +- Bug in :meth:`Series.reset_index` where appropriate error was not raised with an invalid level name (:issue:`20925`) +- Bug in :func:`interval_range` when ``start``/``periods`` or ``end``/``periods`` are specified with float ``start`` or ``end`` (:issue:`21161`) +- Bug in :meth:`MultiIndex.set_names` where error raised for a ``MultiIndex`` with ``nlevels == 1`` (:issue:`21149`) +- Bug in :class:`IntervalIndex` constructors where creating an ``IntervalIndex`` from categorical data was not fully supported (:issue:`21243`, :issue:`21253`) +- Bug in :meth:`MultiIndex.sort_index` which was not guaranteed to sort correctly with ``level=1``; this was also causing data misalignment in particular :meth:`DataFrame.stack` operations (:issue:`20994`, :issue:`20945`, :issue:`21052`) + +**Plotting** + +- New keywords (sharex, sharey) to turn on/off sharing of x/y-axis by subplots generated with pandas.DataFrame().groupby().boxplot() (:issue:`20968`) + +**I/O** + +- Bug in IO methods specifying ``compression='zip'`` which produced uncompressed zip archives (:issue:`17778`, :issue:`21144`) +- Bug in :meth:`DataFrame.to_stata` which prevented exporting DataFrames to buffers and most file-like objects (:issue:`21041`) +- Bug in :meth:`read_stata` and :class:`StataReader` which did not correctly decode utf-8 strings on Python 3 from Stata 14 files (dta version 118) (:issue:`21244`) +- Bug in IO JSON :func:`read_json` reading empty JSON schema with ``orient='table'`` back to :class:`DataFrame` caused an error (:issue:`21287`) + +**Reshaping** + +- Bug in :func:`concat` where error was raised in concatenating :class:`Series` with numpy scalar and tuple names (:issue:`21015`) +- Bug in :func:`concat` warning message providing the wrong guidance for future behavior (:issue:`21101`) + +**Other** + +- Tab completion on :class:`Index` in IPython no longer outputs deprecation warnings (:issue:`21125`) +- Bug preventing pandas being used on Windows without C++ redistributable installed (:issue:`21106`) + +.. _whatsnew_0.23.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.0..v0.23.1 diff --git a/doc/source/whatsnew/v0.23.2.rst b/doc/source/whatsnew/v0.23.2.rst new file mode 100644 index 00000000..9f24092d --- /dev/null +++ b/doc/source/whatsnew/v0.23.2.rst @@ -0,0 +1,123 @@ +.. _whatsnew_0232: + +What's new in 0.23.2 (July 5, 2018) +----------------------------------- + +{{ header }} + + +This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes +and bug fixes. We recommend that all users upgrade to this version. + +.. note:: + + Pandas 0.23.2 is first pandas release that's compatible with + Python 3.7 (:issue:`20552`) + +.. warning:: + + Starting January 1, 2019, pandas feature releases will support Python 3 only. + See `Dropping Python 2.7 `_ for more. + +.. contents:: What's new in v0.23.2 + :local: + :backlinks: none + +.. _whatsnew_0232.enhancements: + +Logical reductions over entire DataFrame +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`DataFrame.all` and :meth:`DataFrame.any` now accept ``axis=None`` to reduce over all axes to a scalar (:issue:`19976`) + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2], "B": [True, False]}) + df.all(axis=None) + + +This also provides compatibility with NumPy 1.15, which now dispatches to ``DataFrame.all``. +With NumPy 1.15 and pandas 0.23.1 or earlier, :func:`numpy.all` will no longer reduce over every axis: + +.. code-block:: python + + >>> # NumPy 1.15, pandas 0.23.1 + >>> np.any(pd.DataFrame({"A": [False], "B": [False]})) + A False + B False + dtype: bool + +With pandas 0.23.2, that will correctly return False, as it did with NumPy < 1.15. + +.. ipython:: python + + np.any(pd.DataFrame({"A": [False], "B": [False]})) + + +.. _whatsnew_0232.fixed_regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`) +- Re-allowed duplicate level names of a ``MultiIndex``. Accessing a level that has a duplicate name by name still raises an error (:issue:`19029`). +- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`) +- Fixed printing of DataFrames with hierarchical columns with long names (:issue:`21180`) +- Fixed regression in :meth:`~DataFrame.reindex` and :meth:`~DataFrame.groupby` + with a MultiIndex or multiple keys that contains categorical datetime-like values (:issue:`21390`). +- Fixed regression in unary negative operations with object dtype (:issue:`21380`) +- Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`) +- Fixed regression in :func:`to_clipboard` that defaulted to copying dataframes with space delimited instead of tab delimited (:issue:`21104`) + + +Build changes +~~~~~~~~~~~~~ + +- The source and binary distributions no longer include test data files, resulting in smaller download sizes. Tests relying on these data files will be skipped when using ``pandas.test()``. (:issue:`19320`) + +.. _whatsnew_0232.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Conversion** + +- Bug in constructing :class:`Index` with an iterator or generator (:issue:`21470`) +- Bug in :meth:`Series.nlargest` for signed and unsigned integer dtypes when the minimum value is present (:issue:`21426`) + +**Indexing** + +- Bug in :meth:`Index.get_indexer_non_unique` with categorical key (:issue:`21448`) +- Bug in comparison operations for :class:`MultiIndex` where error was raised on equality / inequality comparison involving a MultiIndex with ``nlevels == 1`` (:issue:`21149`) +- Bug in :meth:`DataFrame.drop` behaviour is not consistent for unique and non-unique indexes (:issue:`21494`) +- Bug in :func:`DataFrame.duplicated` with a large number of columns causing a 'maximum recursion depth exceeded' (:issue:`21524`). + +**I/O** + +- Bug in :func:`read_csv` that caused it to incorrectly raise an error when ``nrows=0``, ``low_memory=True``, and ``index_col`` was not ``None`` (:issue:`21141`) +- Bug in :func:`json_normalize` when formatting the ``record_prefix`` with integer columns (:issue:`21536`) + +**Categorical** + +- Bug in rendering :class:`Series` with ``Categorical`` dtype in rare conditions under Python 2.7 (:issue:`21002`) + +**Timezones** + +- Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`) +- Bug in comparing :class:`DataFrame` with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`) +- Bug in :meth:`DatetimeIndex.shift` where an ``AssertionError`` would raise when shifting across DST (:issue:`8616`) +- Bug in :class:`Timestamp` constructor where passing an invalid timezone offset designator (``Z``) would not raise a ``ValueError`` (:issue:`8910`) +- Bug in :meth:`Timestamp.replace` where replacing at a DST boundary would retain an incorrect offset (:issue:`7825`) +- Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`) +- Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`) + +**Timedelta** + +- Bug in :class:`Timedelta` where non-zero timedeltas shorter than 1 microsecond were considered False (:issue:`21484`) + +.. _whatsnew_0.23.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.1..v0.23.2 diff --git a/doc/source/whatsnew/v0.23.3.rst b/doc/source/whatsnew/v0.23.3.rst new file mode 100644 index 00000000..bb8862a8 --- /dev/null +++ b/doc/source/whatsnew/v0.23.3.rst @@ -0,0 +1,16 @@ +.. _whatsnew_0233: + +What's new in 0.23.3 (July 7, 2018) +----------------------------------- + +{{ header }} + +This release fixes a build issue with the sdist for Python 3.7 (:issue:`21785`) +There are no other changes. + +.. _whatsnew_0.23.3.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.2..v0.23.3 diff --git a/doc/source/whatsnew/v0.23.4.rst b/doc/source/whatsnew/v0.23.4.rst new file mode 100644 index 00000000..eadac6f5 --- /dev/null +++ b/doc/source/whatsnew/v0.23.4.rst @@ -0,0 +1,47 @@ +.. _whatsnew_0234: + +What's new in 0.23.4 (August 3, 2018) +------------------------------------- + +{{ header }} + + +This is a minor bug-fix release in the 0.23.x series and includes some small regression fixes +and bug fixes. We recommend that all users upgrade to this version. + +.. warning:: + + Starting January 1, 2019, pandas feature releases will support Python 3 only. + See `Dropping Python 2.7 `_ for more. + +.. contents:: What's new in v0.23.4 + :local: + :backlinks: none + +.. _whatsnew_0234.fixed_regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Python 3.7 with Windows gave all missing values for rolling variance calculations (:issue:`21813`) + +.. _whatsnew_0234.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Groupby/resample/rolling** + +- Bug where calling :func:`DataFrameGroupBy.agg` with a list of functions including ``ohlc`` as the non-initial element would raise a ``ValueError`` (:issue:`21716`) +- Bug in ``roll_quantile`` caused a memory leak when calling ``.rolling(...).quantile(q)`` with ``q`` in (0,1) (:issue:`21965`) + +**Missing** + +- Bug in :func:`Series.clip` and :func:`DataFrame.clip` cannot accept list-like threshold containing ``NaN`` (:issue:`19992`) + +.. _whatsnew_0.23.4.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.3..v0.23.4 diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst new file mode 100644 index 00000000..85de0150 --- /dev/null +++ b/doc/source/whatsnew/v0.24.0.rst @@ -0,0 +1,1936 @@ +.. _whatsnew_0240: + +What's new in 0.24.0 (January 25, 2019) +--------------------------------------- + +.. warning:: + + The 0.24.x series of releases will be the last to support Python 2. Future feature + releases will support Python 3 only. See `Dropping Python 2.7 `_ for more + details. + +{{ header }} + +This is a major release from 0.23.4 and includes a number of API changes, new +features, enhancements, and performance improvements along with a large number +of bug fixes. + +Highlights include: + +* :ref:`Optional Integer NA Support ` +* :ref:`New APIs for accessing the array backing a Series or Index ` +* :ref:`A new top-level method for creating arrays ` +* :ref:`Store Interval and Period data in a Series or DataFrame ` +* :ref:`Support for joining on two MultiIndexes ` + + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +These are the changes in pandas 0.24.0. See :ref:`release` for a full changelog +including other versions of pandas. + + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_0240.enhancements.intna: + +Optional integer NA support +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas has gained the ability to hold integer dtypes with missing values. This long requested feature is enabled through the use of :ref:`extension types `. + +.. note:: + + IntegerArray is currently experimental. Its API or implementation may + change without warning. + +We can construct a ``Series`` with the specified dtype. The dtype string ``Int64`` is a pandas ``ExtensionDtype``. Specifying a list or array using the traditional missing value +marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`, :issue:`22441`, :issue:`21789`, :issue:`22346`) + +.. ipython:: python + + s = pd.Series([1, 2, np.nan], dtype='Int64') + s + + +Operations on these dtypes will propagate ``NaN`` as other pandas operations. + +.. ipython:: python + + # arithmetic + s + 1 + + # comparison + s == 1 + + # indexing + s.iloc[1:3] + + # operate with other dtypes + s + s.iloc[1:3].astype('Int8') + + # coerce when needed + s + 0.01 + +These dtypes can operate as part of a ``DataFrame``. + +.. ipython:: python + + df = pd.DataFrame({'A': s, 'B': [1, 1, 3], 'C': list('aab')}) + df + df.dtypes + + +These dtypes can be merged, reshaped, and casted. + +.. ipython:: python + + pd.concat([df[['A']], df[['B', 'C']]], axis=1).dtypes + df['A'].astype(float) + +Reduction and groupby operations such as ``sum`` work. + +.. ipython:: python + + df.sum() + df.groupby('B').A.sum() + +.. warning:: + + The Integer NA support currently uses the capitalized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date. + +See :ref:`integer_na` for more. + + +.. _whatsnew_0240.values_api: + +Accessing the values in a Series or Index +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:attr:`Series.array` and :attr:`Index.array` have been added for extracting the array backing a +``Series`` or ``Index``. (:issue:`19954`, :issue:`23623`) + +.. ipython:: python + + idx = pd.period_range('2000', periods=4) + idx.array + pd.Series(idx).array + +Historically, this would have been done with ``series.values``, but with +``.values`` it was unclear whether the returned value would be the actual array, +some transformation of it, or one of pandas custom arrays (like +``Categorical``). For example, with :class:`PeriodIndex`, ``.values`` generates +a new ndarray of period objects each time. + +.. ipython:: python + + idx.values + id(idx.values) + id(idx.values) + +If you need an actual NumPy array, use :meth:`Series.to_numpy` or :meth:`Index.to_numpy`. + +.. ipython:: python + + idx.to_numpy() + pd.Series(idx).to_numpy() + +For Series and Indexes backed by normal NumPy arrays, :attr:`Series.array` will return a +new :class:`arrays.PandasArray`, which is a thin (no-copy) wrapper around a +:class:`numpy.ndarray`. :class:`~arrays.PandasArray` isn't especially useful on its own, +but it does provide the same interface as any extension array defined in pandas or by +a third-party library. + +.. ipython:: python + + ser = pd.Series([1, 2, 3]) + ser.array + ser.to_numpy() + +We haven't removed or deprecated :attr:`Series.values` or :attr:`DataFrame.values`, but we +highly recommend and using ``.array`` or ``.to_numpy()`` instead. + +See :ref:`Dtypes ` and :ref:`Attributes and Underlying Data ` for more. + + +.. _whatsnew_0240.enhancements.array: + +``pandas.array``: a new top-level method for creating arrays +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A new top-level method :func:`array` has been added for creating 1-dimensional arrays (:issue:`22860`). +This can be used to create any :ref:`extension array `, including +extension arrays registered by :ref:`3rd party libraries `. +See the :ref:`dtypes docs ` for more on extension arrays. + +.. ipython:: python + + pd.array([1, 2, np.nan], dtype='Int64') + pd.array(['a', 'b', 'c'], dtype='category') + +Passing data for which there isn't dedicated extension type (e.g. float, integer, etc.) +will return a new :class:`arrays.PandasArray`, which is just a thin (no-copy) +wrapper around a :class:`numpy.ndarray` that satisfies the pandas extension array interface. + +.. ipython:: python + + pd.array([1, 2, 3]) + +On their own, a :class:`~arrays.PandasArray` isn't a very useful object. +But if you need write low-level code that works generically for any +:class:`~pandas.api.extensions.ExtensionArray`, :class:`~arrays.PandasArray` +satisfies that need. + +Notice that by default, if no ``dtype`` is specified, the dtype of the returned +array is inferred from the data. In particular, note that the first example of +``[1, 2, np.nan]`` would have returned a floating-point array, since ``NaN`` +is a float. + +.. ipython:: python + + pd.array([1, 2, np.nan]) + + +.. _whatsnew_0240.enhancements.interval: + +Storing Interval and Period data in Series and DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Interval` and :class:`Period` data may now be stored in a :class:`Series` or :class:`DataFrame`, in addition to an +:class:`IntervalIndex` and :class:`PeriodIndex` like previously (:issue:`19453`, :issue:`22862`). + +.. ipython:: python + + ser = pd.Series(pd.interval_range(0, 5)) + ser + ser.dtype + +For periods: + +.. ipython:: python + + pser = pd.Series(pd.period_range("2000", freq="D", periods=5)) + pser + pser.dtype + +Previously, these would be cast to a NumPy array with object dtype. In general, +this should result in better performance when storing an array of intervals or periods +in a :class:`Series` or column of a :class:`DataFrame`. + +Use :attr:`Series.array` to extract the underlying array of intervals or periods +from the ``Series``: + +.. ipython:: python + + ser.array + pser.array + +These return an instance of :class:`arrays.IntervalArray` or :class:`arrays.PeriodArray`, +the new extension arrays that back interval and period data. + +.. warning:: + + For backwards compatibility, :attr:`Series.values` continues to return + a NumPy array of objects for Interval and Period data. We recommend + using :attr:`Series.array` when you need the array of data stored in the + ``Series``, and :meth:`Series.to_numpy` when you know you need a NumPy array. + + See :ref:`Dtypes ` and :ref:`Attributes and Underlying Data ` + for more. + + +.. _whatsnew_0240.enhancements.join_with_two_multiindexes: + +Joining with two multi-indexes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`DataFrame.merge` and :func:`DataFrame.join` can now be used to join multi-indexed ``Dataframe`` instances on the overlapping index levels (:issue:`6360`) + +See the :ref:`Merge, join, and concatenate +` documentation section. + +.. ipython:: python + + index_left = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'), + ('K1', 'X2')], + names=['key', 'X']) + + left = pd.DataFrame({'A': ['A0', 'A1', 'A2'], + 'B': ['B0', 'B1', 'B2']}, index=index_left) + + index_right = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'), + ('K2', 'Y2'), ('K2', 'Y3')], + names=['key', 'Y']) + + right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], + 'D': ['D0', 'D1', 'D2', 'D3']}, index=index_right) + + left.join(right) + +For earlier versions this can be done using the following. + +.. ipython:: python + + pd.merge(left.reset_index(), right.reset_index(), + on=['key'], how='inner').set_index(['key', 'X', 'Y']) + +.. _whatsnew_0240.enhancements.read_html: + +``read_html`` Enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`read_html` previously ignored ``colspan`` and ``rowspan`` attributes. +Now it understands them, treating them as sequences of cells with the same +value. (:issue:`17054`) + +.. ipython:: python + + result = pd.read_html(""" +
    + + + + + + + + + + +
    ABC
    12
    """) + +*Previous behavior*: + +.. code-block:: ipython + + In [13]: result + Out [13]: + [ A B C + 0 1 2 NaN] + +*New behavior*: + +.. ipython:: python + + result + + +New ``Styler.pipe()`` method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The :class:`~pandas.io.formats.style.Styler` class has gained a +:meth:`~pandas.io.formats.style.Styler.pipe` method. This provides a +convenient way to apply users' predefined styling functions, and can help reduce +"boilerplate" when using DataFrame styling functionality repeatedly within a notebook. (:issue:`23229`) + +.. ipython:: python + + df = pd.DataFrame({'N': [1250, 1500, 1750], 'X': [0.25, 0.35, 0.50]}) + + def format_and_align(styler): + return (styler.format({'N': '{:,}', 'X': '{:.1%}'}) + .set_properties(**{'text-align': 'right'})) + + df.style.pipe(format_and_align).set_caption('Summary of results.') + +Similar methods already exist for other classes in pandas, including :meth:`DataFrame.pipe`, +:meth:`GroupBy.pipe() `, and :meth:`Resampler.pipe() `. + +.. _whatsnew_0240.enhancements.rename_axis: + +Renaming names in a MultiIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`DataFrame.rename_axis` now supports ``index`` and ``columns`` arguments +and :func:`Series.rename_axis` supports ``index`` argument (:issue:`19978`). + +This change allows a dictionary to be passed so that some of the names +of a ``MultiIndex`` can be changed. + +Example: + +.. ipython:: python + + mi = pd.MultiIndex.from_product([list('AB'), list('CD'), list('EF')], + names=['AB', 'CD', 'EF']) + df = pd.DataFrame(list(range(len(mi))), index=mi, columns=['N']) + df + df.rename_axis(index={'CD': 'New'}) + +See the :ref:`Advanced documentation on renaming` for more details. + +.. _whatsnew_0240.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + +- :func:`merge` now directly allows merge between objects of type ``DataFrame`` and named ``Series``, without the need to convert the ``Series`` object into a ``DataFrame`` beforehand (:issue:`21220`) +- ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`) +- ``FrozenList`` has gained the ``.union()`` and ``.difference()`` methods. This functionality greatly simplifies groupby's that rely on explicitly excluding certain columns. See :ref:`Splitting an object into groups ` for more information (:issue:`15475`, :issue:`15506`). +- :func:`DataFrame.to_parquet` now accepts ``index`` as an argument, allowing + the user to override the engine's default behavior to include or omit the + dataframe's indexes from the resulting Parquet file. (:issue:`20768`) +- :func:`read_feather` now accepts ``columns`` as an argument, allowing the user to specify which columns should be read. (:issue:`24025`) +- :meth:`DataFrame.corr` and :meth:`Series.corr` now accept a callable for generic calculation methods of correlation, e.g. histogram intersection (:issue:`22684`) +- :func:`DataFrame.to_string` now accepts ``decimal`` as an argument, allowing the user to specify which decimal separator should be used in the output. (:issue:`23614`) +- :func:`DataFrame.to_html` now accepts ``render_links`` as an argument, allowing the user to generate HTML with links to any URLs that appear in the DataFrame. + See the :ref:`section on writing HTML ` in the IO docs for example usage. (:issue:`2679`) +- :func:`pandas.read_csv` now supports pandas extension types as an argument to ``dtype``, allowing the user to use pandas extension types when reading CSVs. (:issue:`23228`) +- The :meth:`~DataFrame.shift` method now accepts `fill_value` as an argument, allowing the user to specify a value which will be used instead of NA/NaT in the empty periods. (:issue:`15486`) +- :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`) +- :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether ``NaN``/``NaT`` values should be considered (:issue:`17534`) +- :func:`DataFrame.to_csv` and :func:`Series.to_csv` now support the ``compression`` keyword when a file handle is passed. (:issue:`21227`) +- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`) +- :meth:`Series.droplevel` and :meth:`DataFrame.droplevel` are now implemented (:issue:`20342`) +- Added support for reading from/writing to Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`, :issue:`23094`) +- :func:`DataFrame.to_gbq` and :func:`read_gbq` signature and documentation updated to + reflect changes from the `Pandas-GBQ library version 0.8.0 + `__. + Adds a ``credentials`` argument, which enables the use of any kind of + `google-auth credentials + `__. (:issue:`21627`, + :issue:`22557`, :issue:`23662`) +- New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`) +- :func:`read_html` copies cell data across ``colspan`` and ``rowspan``, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`) +- :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`) +- :class:`IntervalIndex` has gained the :meth:`~IntervalIndex.set_closed` method to change the existing ``closed`` value (:issue:`21670`) +- :func:`~DataFrame.to_csv`, :func:`~Series.to_csv`, :func:`~DataFrame.to_json`, and :func:`~Series.to_json` now support ``compression='infer'`` to infer compression based on filename extension (:issue:`15008`). + The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`). +- :meth:`DataFrame.to_sql` now supports writing ``TIMESTAMP WITH TIME ZONE`` types for supported databases. For databases that don't support timezones, datetime data will be stored as timezone unaware local timestamps. See the :ref:`io.sql_datetime_data` for implications (:issue:`9086`). +- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`) +- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` objects in the constructor (:issue:`2193`) +- :class:`DatetimeIndex` has gained the :attr:`DatetimeIndex.timetz` attribute. This returns the local time with timezone information. (:issue:`21358`) +- :meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, and :meth:`~Timestamp.floor` for :class:`DatetimeIndex` and :class:`Timestamp` + now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`) + and a ``nonexistent`` argument for handling datetimes that are rounded to nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`22647`) +- The result of :meth:`~DataFrame.resample` is now iterable similar to ``groupby()`` (:issue:`15314`). +- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`pandas.core.resample.Resampler.quantile` (:issue:`15023`). +- :meth:`DataFrame.resample` and :meth:`Series.resample` with a :class:`PeriodIndex` will now respect the ``base`` argument in the same fashion as with a :class:`DatetimeIndex`. (:issue:`23882`) +- :meth:`pandas.api.types.is_list_like` has gained a keyword ``allow_sets`` which is ``True`` by default; if ``False``, + all instances of ``set`` will not be considered "list-like" anymore (:issue:`23061`) +- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). +- :meth:`Categorical.from_codes` now can take a ``dtype`` parameter as an alternative to passing ``categories`` and ``ordered`` (:issue:`24398`). +- New attribute ``__git_version__`` will return git commit sha of current build (:issue:`21295`). +- Compatibility with Matplotlib 3.0 (:issue:`22790`). +- Added :meth:`Interval.overlaps`, :meth:`arrays.IntervalArray.overlaps`, and :meth:`IntervalIndex.overlaps` for determining overlaps between interval-like objects (:issue:`21998`) +- :func:`read_fwf` now accepts keyword ``infer_nrows`` (:issue:`15138`). +- :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`) +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`, :issue:`24466`) +- :meth:`Index.difference`, :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference` now have an optional ``sort`` parameter to control whether the results should be sorted if possible (:issue:`17839`, :issue:`24471`) +- :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`) +- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object. +- :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`) +- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the ``axis`` parameter (:issue:`8839`) +- :meth:`DataFrame.to_records` now accepts ``index_dtypes`` and ``column_dtypes`` parameters to allow different data types in stored column and index records (:issue:`18146`) +- :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`) +- :func:`pandas.DataFrame.to_sql` has gained the ``method`` argument to control SQL insertion clause. See the :ref:`insertion method ` section in the documentation. (:issue:`8953`) +- :meth:`DataFrame.corrwith` now supports Spearman's rank correlation, Kendall's tau as well as callable correlation methods. (:issue:`21925`) +- :meth:`DataFrame.to_json`, :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_pickle`, and other export methods now support tilde(~) in path argument. (:issue:`23473`) + +.. _whatsnew_0240.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pandas 0.24.0 includes a number of API breaking changes. + + +.. _whatsnew_0240.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We have updated our minimum supported versions of dependencies (:issue:`21242`, :issue:`18742`, :issue:`23774`, :issue:`24767`). +If installed, we now require: + ++-----------------+-----------------+----------+ +| Package | Minimum Version | Required | ++=================+=================+==========+ +| numpy | 1.12.0 | X | ++-----------------+-----------------+----------+ +| bottleneck | 1.2.0 | | ++-----------------+-----------------+----------+ +| fastparquet | 0.2.1 | | ++-----------------+-----------------+----------+ +| matplotlib | 2.0.0 | | ++-----------------+-----------------+----------+ +| numexpr | 2.6.1 | | ++-----------------+-----------------+----------+ +| pandas-gbq | 0.8.0 | | ++-----------------+-----------------+----------+ +| pyarrow | 0.9.0 | | ++-----------------+-----------------+----------+ +| pytables | 3.4.2 | | ++-----------------+-----------------+----------+ +| scipy | 0.18.1 | | ++-----------------+-----------------+----------+ +| xlrd | 1.0.0 | | ++-----------------+-----------------+----------+ +| pytest (dev) | 3.6 | | ++-----------------+-----------------+----------+ + +Additionally we no longer depend on ``feather-format`` for feather based storage +and replaced it with references to ``pyarrow`` (:issue:`21639` and :issue:`23053`). + +.. _whatsnew_0240.api_breaking.csv_line_terminator: + +`os.linesep` is used for ``line_terminator`` of ``DataFrame.to_csv`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`DataFrame.to_csv` now uses :func:`os.linesep` rather than ``'\n'`` +for the default line terminator (:issue:`20353`). +This change only affects when running on Windows, where ``'\r\n'`` was used for line terminator +even when ``'\n'`` was passed in ``line_terminator``. + +*Previous behavior* on Windows: + +.. code-block:: ipython + + In [1]: data = pd.DataFrame({"string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"]}) + + In [2]: # When passing file PATH to to_csv, + ...: # line_terminator does not work, and csv is saved with '\r\n'. + ...: # Also, this converts all '\n's in the data to '\r\n'. + ...: data.to_csv("test.csv", index=False, line_terminator='\n') + + In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) + Out[3]: b'string_with_lf,string_with_crlf\r\n"a\r\nbc","a\r\r\nbc"\r\n' + + In [4]: # When passing file OBJECT with newline option to + ...: # to_csv, line_terminator works. + ...: with open("test2.csv", mode='w', newline='\n') as f: + ...: data.to_csv(f, index=False, line_terminator='\n') + + In [5]: with open("test2.csv", mode='rb') as f: + ...: print(f.read()) + Out[5]: b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' + + +*New behavior* on Windows: + +Passing ``line_terminator`` explicitly, set thes ``line terminator`` to that character. + +.. code-block:: ipython + + In [1]: data = pd.DataFrame({"string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"]}) + + In [2]: data.to_csv("test.csv", index=False, line_terminator='\n') + + In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) + Out[3]: b'string_with_lf,string_with_crlf\n"a\nbc","a\r\nbc"\n' + + +On Windows, the value of ``os.linesep`` is ``'\r\n'``, so if ``line_terminator`` is not +set, ``'\r\n'`` is used for line terminator. + +.. code-block:: ipython + + In [1]: data = pd.DataFrame({"string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"]}) + + In [2]: data.to_csv("test.csv", index=False) + + In [3]: with open("test.csv", mode='rb') as f: + ...: print(f.read()) + Out[3]: b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' + + +For file objects, specifying ``newline`` is not sufficient to set the line terminator. +You must pass in the ``line_terminator`` explicitly, even in this case. + +.. code-block:: ipython + + In [1]: data = pd.DataFrame({"string_with_lf": ["a\nbc"], + ...: "string_with_crlf": ["a\r\nbc"]}) + + In [2]: with open("test2.csv", mode='w', newline='\n') as f: + ...: data.to_csv(f, index=False) + + In [3]: with open("test2.csv", mode='rb') as f: + ...: print(f.read()) + Out[3]: b'string_with_lf,string_with_crlf\r\n"a\nbc","a\r\nbc"\r\n' + +.. _whatsnew_0240.bug_fixes.nan_with_str_dtype: + +Proper handling of `np.NaN` in a string data-typed column with the Python engine +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +There was bug in :func:`read_excel` and :func:`read_csv` with the Python +engine, where missing values turned to ``'nan'`` with ``dtype=str`` and +``na_filter=True``. Now, these missing values are converted to the string +missing indicator, ``np.nan``. (:issue:`20377`) + +.. ipython:: python + :suppress: + + from io import StringIO + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: data = 'a,b,c\n1,,3\n4,5,6' + In [6]: df = pd.read_csv(StringIO(data), engine='python', dtype=str, na_filter=True) + In [7]: df.loc[0, 'b'] + Out[7]: + 'nan' + +*New behavior*: + +.. ipython:: python + + data = 'a,b,c\n1,,3\n4,5,6' + df = pd.read_csv(StringIO(data), engine='python', dtype=str, na_filter=True) + df.loc[0, 'b'] + +Notice how we now instead output ``np.nan`` itself instead of a stringified form of it. + +.. _whatsnew_0240.api.timezone_offset_parsing: + +Parsing datetime strings with timezone offsets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, parsing datetime strings with UTC offsets with :func:`to_datetime` +or :class:`DatetimeIndex` would automatically convert the datetime to UTC +without timezone localization. This is inconsistent from parsing the same +datetime string with :class:`Timestamp` which would preserve the UTC +offset in the ``tz`` attribute. Now, :func:`to_datetime` preserves the UTC +offset in the ``tz`` attribute when all the datetime strings have the same +UTC offset (:issue:`17697`, :issue:`11736`, :issue:`22457`) + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: pd.to_datetime("2015-11-18 15:30:00+05:30") + Out[2]: Timestamp('2015-11-18 10:00:00') + + In [3]: pd.Timestamp("2015-11-18 15:30:00+05:30") + Out[3]: Timestamp('2015-11-18 15:30:00+0530', tz='pytz.FixedOffset(330)') + + # Different UTC offsets would automatically convert the datetimes to UTC (without a UTC timezone) + In [4]: pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"]) + Out[4]: DatetimeIndex(['2015-11-18 10:00:00', '2015-11-18 10:00:00'], dtype='datetime64[ns]', freq=None) + +*New behavior*: + +.. ipython:: python + + pd.to_datetime("2015-11-18 15:30:00+05:30") + pd.Timestamp("2015-11-18 15:30:00+05:30") + +Parsing datetime strings with the same UTC offset will preserve the UTC offset in the ``tz`` + +.. ipython:: python + + pd.to_datetime(["2015-11-18 15:30:00+05:30"] * 2) + +Parsing datetime strings with different UTC offsets will now create an Index of +``datetime.datetime`` objects with different UTC offsets + +.. ipython:: python + + idx = pd.to_datetime(["2015-11-18 15:30:00+05:30", + "2015-11-18 16:30:00+06:30"]) + idx + idx[0] + idx[1] + +Passing ``utc=True`` will mimic the previous behavior but will correctly indicate +that the dates have been converted to UTC + +.. ipython:: python + + pd.to_datetime(["2015-11-18 15:30:00+05:30", + "2015-11-18 16:30:00+06:30"], utc=True) + + +.. _whatsnew_0240.api_breaking.read_csv_mixed_tz: + +Parsing mixed-timezones with :func:`read_csv` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`read_csv` no longer silently converts mixed-timezone columns to UTC (:issue:`24987`). + +*Previous behavior* + +.. code-block:: python + + >>> import io + >>> content = """\ + ... a + ... 2000-01-01T00:00:00+05:00 + ... 2000-01-01T00:00:00+06:00""" + >>> df = pd.read_csv(io.StringIO(content), parse_dates=['a']) + >>> df.a + 0 1999-12-31 19:00:00 + 1 1999-12-31 18:00:00 + Name: a, dtype: datetime64[ns] + +*New behavior* + +.. ipython:: python + + import io + content = """\ + a + 2000-01-01T00:00:00+05:00 + 2000-01-01T00:00:00+06:00""" + df = pd.read_csv(io.StringIO(content), parse_dates=['a']) + df.a + +As can be seen, the ``dtype`` is object; each value in the column is a string. +To convert the strings to an array of datetimes, the ``date_parser`` argument + +.. ipython:: python + + df = pd.read_csv(io.StringIO(content), parse_dates=['a'], + date_parser=lambda col: pd.to_datetime(col, utc=True)) + df.a + +See :ref:`whatsnew_0240.api.timezone_offset_parsing` for more. + +.. _whatsnew_0240.api_breaking.period_end_time: + +Time values in ``dt.end_time`` and ``to_timestamp(how='end')`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The time values in :class:`Period` and :class:`PeriodIndex` objects are now set +to '23:59:59.999999999' when calling :attr:`Series.dt.end_time`, :attr:`Period.end_time`, +:attr:`PeriodIndex.end_time`, :func:`Period.to_timestamp()` with ``how='end'``, +or :func:`PeriodIndex.to_timestamp()` with ``how='end'`` (:issue:`17157`) + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: p = pd.Period('2017-01-01', 'D') + In [3]: pi = pd.PeriodIndex([p]) + + In [4]: pd.Series(pi).dt.end_time[0] + Out[4]: Timestamp(2017-01-01 00:00:00) + + In [5]: p.end_time + Out[5]: Timestamp(2017-01-01 23:59:59.999999999) + +*New behavior*: + +Calling :attr:`Series.dt.end_time` will now result in a time of '23:59:59.999999999' as +is the case with :attr:`Period.end_time`, for example + +.. ipython:: python + + p = pd.Period('2017-01-01', 'D') + pi = pd.PeriodIndex([p]) + + pd.Series(pi).dt.end_time[0] + + p.end_time + +.. _whatsnew_0240.api_breaking.datetime_unique: + +Series.unique for Timezone-Aware Data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The return type of :meth:`Series.unique` for datetime with timezone values has changed +from an :class:`numpy.ndarray` of :class:`Timestamp` objects to a :class:`arrays.DatetimeArray` (:issue:`24024`). + +.. ipython:: python + + ser = pd.Series([pd.Timestamp('2000', tz='UTC'), + pd.Timestamp('2000', tz='UTC')]) + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: ser.unique() + Out[3]: array([Timestamp('2000-01-01 00:00:00+0000', tz='UTC')], dtype=object) + + +*New behavior*: + +.. ipython:: python + + ser.unique() + + +.. _whatsnew_0240.api_breaking.sparse_values: + +Sparse data structure refactor +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``SparseArray``, the array backing ``SparseSeries`` and the columns in a ``SparseDataFrame``, +is now an extension array (:issue:`21978`, :issue:`19056`, :issue:`22835`). +To conform to this interface and for consistency with the rest of pandas, some API breaking +changes were made: + +- ``SparseArray`` is no longer a subclass of :class:`numpy.ndarray`. To convert a ``SparseArray`` to a NumPy array, use :func:`numpy.asarray`. +- ``SparseArray.dtype`` and ``SparseSeries.dtype`` are now instances of :class:`SparseDtype`, rather than ``np.dtype``. Access the underlying dtype with ``SparseDtype.subtype``. +- ``numpy.asarray(sparse_array)`` now returns a dense array with all the values, not just the non-fill-value values (:issue:`14167`) +- ``SparseArray.take`` now matches the API of :meth:`pandas.api.extensions.ExtensionArray.take` (:issue:`19506`): + + * The default value of ``allow_fill`` has changed from ``False`` to ``True``. + * The ``out`` and ``mode`` parameters are now longer accepted (previously, this raised if they were specified). + * Passing a scalar for ``indices`` is no longer allowed. + +- The result of :func:`concat` with a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``. +- ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray. +- Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed. +- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`). +- The result of :meth:`Series.where` is now a ``Series`` with sparse values, like with other extension arrays (:issue:`24077`) + +Some new warnings are issued for operations that require or are likely to materialize a large dense array: + +- A :class:`errors.PerformanceWarning` is issued when using fillna with a ``method``, as a dense array is constructed to create the filled array. Filling with a ``value`` is the efficient way to fill a sparse array. +- A :class:`errors.PerformanceWarning` is now issued when concatenating sparse Series with differing fill values. The fill value from the first sparse array continues to be used. + +In addition to these API breaking changes, many :ref:`Performance Improvements and Bug Fixes have been made `. + +Finally, a ``Series.sparse`` accessor was added to provide sparse-specific methods like :meth:`Series.sparse.from_coo`. + +.. ipython:: python + + s = pd.Series([0, 0, 1, 1, 1], dtype='Sparse[int]') + s.sparse.density + +.. _whatsnew_0240.api_breaking.get_dummies: + +:meth:`get_dummies` always returns a DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, when ``sparse=True`` was passed to :func:`get_dummies`, the return value could be either +a :class:`DataFrame` or a :class:`SparseDataFrame`, depending on whether all or a just a subset +of the columns were dummy-encoded. Now, a :class:`DataFrame` is always returned (:issue:`24284`). + +*Previous behavior* + +The first :func:`get_dummies` returns a :class:`DataFrame` because the column ``A`` +is not dummy encoded. When just ``["B", "C"]`` are passed to ``get_dummies``, +then all the columns are dummy-encoded, and a :class:`SparseDataFrame` was returned. + +.. code-block:: ipython + + In [2]: df = pd.DataFrame({"A": [1, 2], "B": ['a', 'b'], "C": ['a', 'a']}) + + In [3]: type(pd.get_dummies(df, sparse=True)) + Out[3]: pandas.core.frame.DataFrame + + In [4]: type(pd.get_dummies(df[['B', 'C']], sparse=True)) + Out[4]: pandas.core.sparse.frame.SparseDataFrame + +.. ipython:: python + :suppress: + + df = pd.DataFrame({"A": [1, 2], "B": ['a', 'b'], "C": ['a', 'a']}) + +*New behavior* + +Now, the return type is consistently a :class:`DataFrame`. + +.. ipython:: python + + type(pd.get_dummies(df, sparse=True)) + type(pd.get_dummies(df[['B', 'C']], sparse=True)) + +.. note:: + + There's no difference in memory usage between a :class:`SparseDataFrame` + and a :class:`DataFrame` with sparse values. The memory usage will + be the same as in the previous version of pandas. + +.. _whatsnew_0240.api_breaking.frame_to_dict_index_orient: + +Raise ValueError in ``DataFrame.to_dict(orient='index')`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Bug in :func:`DataFrame.to_dict` raises ``ValueError`` when used with +``orient='index'`` and a non-unique index instead of losing data (:issue:`22801`) + +.. ipython:: python + :okexcept: + + df = pd.DataFrame({'a': [1, 2], 'b': [0.5, 0.75]}, index=['A', 'A']) + df + + df.to_dict(orient='index') + +.. _whatsnew_0240.api.datetimelike.normalize: + +Tick DateOffset normalize restrictions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Creating a ``Tick`` object (:class:`Day`, :class:`Hour`, :class:`Minute`, +:class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano`) with +``normalize=True`` is no longer supported. This prevents unexpected behavior +where addition could fail to be monotone or associative. (:issue:`21427`) + +*Previous behavior*: + +.. code-block:: ipython + + + In [2]: ts = pd.Timestamp('2018-06-11 18:01:14') + + In [3]: ts + Out[3]: Timestamp('2018-06-11 18:01:14') + + In [4]: tic = pd.offsets.Hour(n=2, normalize=True) + ...: + + In [5]: tic + Out[5]: <2 * Hours> + + In [6]: ts + tic + Out[6]: Timestamp('2018-06-11 00:00:00') + + In [7]: ts + tic + tic + tic == ts + (tic + tic + tic) + Out[7]: False + +*New behavior*: + +.. ipython:: python + + ts = pd.Timestamp('2018-06-11 18:01:14') + tic = pd.offsets.Hour(n=2) + ts + tic + tic + tic == ts + (tic + tic + tic) + + +.. _whatsnew_0240.api.datetimelike: + + +.. _whatsnew_0240.api.period_subtraction: + +Period subtraction +^^^^^^^^^^^^^^^^^^ + +Subtraction of a ``Period`` from another ``Period`` will give a ``DateOffset``. +instead of an integer (:issue:`21314`) + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: june = pd.Period('June 2018') + + In [3]: april = pd.Period('April 2018') + + In [4]: june - april + Out [4]: 2 + +*New behavior*: + +.. ipython:: python + + june = pd.Period('June 2018') + april = pd.Period('April 2018') + june - april + +Similarly, subtraction of a ``Period`` from a ``PeriodIndex`` will now return +an ``Index`` of ``DateOffset`` objects instead of an ``Int64Index`` + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: pi = pd.period_range('June 2018', freq='M', periods=3) + + In [3]: pi - pi[0] + Out[3]: Int64Index([0, 1, 2], dtype='int64') + +*New behavior*: + +.. ipython:: python + + pi = pd.period_range('June 2018', freq='M', periods=3) + pi - pi[0] + + +.. _whatsnew_0240.api.timedelta64_subtract_nan: + +Addition/subtraction of ``NaN`` from :class:`DataFrame` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Adding or subtracting ``NaN`` from a :class:`DataFrame` column with +``timedelta64[ns]`` dtype will now raise a ``TypeError`` instead of returning +all-``NaT``. This is for compatibility with ``TimedeltaIndex`` and +``Series`` behavior (:issue:`22163`) + +.. ipython:: python + + df = pd.DataFrame([pd.Timedelta(days=1)]) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [4]: df = pd.DataFrame([pd.Timedelta(days=1)]) + + In [5]: df - np.nan + Out[5]: + 0 + 0 NaT + +*New behavior*: + +.. code-block:: ipython + + In [2]: df - np.nan + ... + TypeError: unsupported operand type(s) for -: 'TimedeltaIndex' and 'float' + +.. _whatsnew_0240.api.dataframe_cmp_broadcasting: + +DataFrame comparison operations broadcasting changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, the broadcasting behavior of :class:`DataFrame` comparison +operations (``==``, ``!=``, ...) was inconsistent with the behavior of +arithmetic operations (``+``, ``-``, ...). The behavior of the comparison +operations has been changed to match the arithmetic operations in these cases. +(:issue:`22880`) + +The affected cases are: + +- operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column will now broadcast the same way a ``np.ndarray`` would (:issue:`23000`). +- a list or tuple with length matching the number of rows in the :class:`DataFrame` will now raise ``ValueError`` instead of operating column-by-column (:issue:`22880`. +- a list or tuple with length matching the number of columns in the :class:`DataFrame` will now operate row-by-row instead of raising ``ValueError`` (:issue:`22880`). + +.. ipython:: python + + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: df == arr[[0], :] + ...: # comparison previously broadcast where arithmetic would raise + Out[5]: + 0 1 + 0 True True + 1 False False + 2 False False + In [6]: df + arr[[0], :] + ... + ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2) + + In [7]: df == (1, 2) + ...: # length matches number of columns; + ...: # comparison previously raised where arithmetic would broadcast + ... + ValueError: Invalid broadcasting comparison [(1, 2)] with block values + In [8]: df + (1, 2) + Out[8]: + 0 1 + 0 1 3 + 1 3 5 + 2 5 7 + + In [9]: df == (1, 2, 3) + ...: # length matches number of rows + ...: # comparison previously broadcast where arithmetic would raise + Out[9]: + 0 1 + 0 False True + 1 True False + 2 False False + In [10]: df + (1, 2, 3) + ... + ValueError: Unable to coerce to Series, length must be 2: given 3 + +*New behavior*: + +.. ipython:: python + + # Comparison operations and arithmetic operations both broadcast. + df == arr[[0], :] + df + arr[[0], :] + +.. ipython:: python + + # Comparison operations and arithmetic operations both broadcast. + df == (1, 2) + df + (1, 2) + +.. code-block:: ipython + + # Comparison operations and arithmetic operations both raise ValueError. + In [6]: df == (1, 2, 3) + ... + ValueError: Unable to coerce to Series, length must be 2: given 3 + + In [7]: df + (1, 2, 3) + ... + ValueError: Unable to coerce to Series, length must be 2: given 3 + +.. _whatsnew_0240.api.dataframe_arithmetic_broadcasting: + +DataFrame arithmetic operations broadcasting changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`DataFrame` arithmetic operations when operating with 2-dimensional +``np.ndarray`` objects now broadcast in the same way as ``np.ndarray`` +broadcast. (:issue:`23000`) + +.. ipython:: python + + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: df + arr[[0], :] # 1 row, 2 columns + ... + ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2) + In [6]: df + arr[:, [1]] # 1 column, 3 rows + ... + ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (3, 1) + +*New behavior*: + +.. ipython:: python + + df + arr[[0], :] # 1 row, 2 columns + df + arr[:, [1]] # 1 column, 3 rows + +.. _whatsnew_0240.api.incompatibilities: + +Series and Index data-dtype incompatibilities +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``Series`` and ``Index`` constructors now raise when the +data is incompatible with a passed ``dtype=`` (:issue:`15832`) + +*Previous behavior*: + +.. code-block:: ipython + + In [4]: pd.Series([-1], dtype="uint64") + Out [4]: + 0 18446744073709551615 + dtype: uint64 + +*New behavior*: + +.. code-block:: ipython + + In [4]: pd.Series([-1], dtype="uint64") + Out [4]: + ... + OverflowError: Trying to coerce negative values to unsigned integers + +.. _whatsnew_0240.api.concat_categorical: + +Concatenation Changes +^^^^^^^^^^^^^^^^^^^^^ + +Calling :func:`pandas.concat` on a ``Categorical`` of ints with NA values now +causes them to be processed as objects when concatenating with anything +other than another ``Categorical`` of ints (:issue:`19214`) + +.. ipython:: python + + s = pd.Series([0, 1, np.nan]) + c = pd.Series([0, 1, np.nan], dtype="category") + +*Previous behavior* + +.. code-block:: ipython + + In [3]: pd.concat([s, c]) + Out[3]: + 0 0.0 + 1 1.0 + 2 NaN + 0 0.0 + 1 1.0 + 2 NaN + dtype: float64 + +*New behavior* + +.. ipython:: python + + pd.concat([s, c]) + +Datetimelike API changes +^^^^^^^^^^^^^^^^^^^^^^^^ + +- For :class:`DatetimeIndex` and :class:`TimedeltaIndex` with non-``None`` ``freq`` attribute, addition or subtraction of integer-dtyped array or ``Index`` will return an object of the same class (:issue:`19959`) +- :class:`DateOffset` objects are now immutable. Attempting to alter one of these will now raise ``AttributeError`` (:issue:`21341`) +- :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`) +- :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`) +- :meth:`DatetimeIndex.to_period` and :meth:`Timestamp.to_period` will issue a warning when timezone information will be lost (:issue:`21333`) +- :meth:`PeriodIndex.tz_convert` and :meth:`PeriodIndex.tz_localize` have been removed (:issue:`21781`) + +.. _whatsnew_0240.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`) +- :meth:`Series.str.cat` will now raise if ``others`` is a ``set`` (:issue:`23009`) +- Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`) +- ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`) +- :func:`read_csv` will now raise a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`) +- The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`) +- Incorrectly passing a :class:`DatetimeIndex` to :meth:`MultiIndex.from_tuples`, rather than a sequence of tuples, now raises a ``TypeError`` rather than a ``ValueError`` (:issue:`24024`) +- :func:`pd.offsets.generate_range` argument ``time_rule`` has been removed; use ``offset`` instead (:issue:`24157`) +- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes; pandas will still raise on a merge between a numeric and an ``object`` dtyped column that is composed only of strings (:issue:`21681`) +- Accessing a level of a ``MultiIndex`` with a duplicate name (e.g. in + :meth:`~MultiIndex.get_level_values`) now raises a ``ValueError`` instead of a ``KeyError`` (:issue:`21678`). +- Invalid construction of ``IntervalDtype`` will now always raise a ``TypeError`` rather than a ``ValueError`` if the subdtype is invalid (:issue:`21185`) +- Trying to reindex a ``DataFrame`` with a non unique ``MultiIndex`` now raises a ``ValueError`` instead of an ``Exception`` (:issue:`21770`) +- :class:`Index` subtraction will attempt to operate element-wise instead of raising ``TypeError`` (:issue:`19369`) +- :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`) +- :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`) +- :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`) +- :meth:`DataFrame.set_index` now gives a better (and less frequent) KeyError, raises a ``ValueError`` for incorrect types, + and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`) +- Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`) +- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`) +- :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`). +- :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`). +- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`). +- :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`). +- The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`) +- :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`). +- :func:`Series.to_list` and :func:`Index.to_list` are now aliases of ``Series.tolist`` respectively ``Index.tolist`` (:issue:`8826`) +- The result of ``SparseSeries.unstack`` is now a :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (:issue:`24372`). +- :class:`DatetimeIndex` and :class:`TimedeltaIndex` no longer ignore the dtype precision. Passing a non-nanosecond resolution dtype will raise a ``ValueError`` (:issue:`24753`) + + +.. _whatsnew_0240.api.extension: + +Extension type changes +~~~~~~~~~~~~~~~~~~~~~~ + +**Equality and hashability** + +Pandas now requires that extension dtypes be hashable (i.e. the respective +``ExtensionDtype`` objects; hashability is not a requirement for the values +of the corresponding ``ExtensionArray``). The base class implements +a default ``__eq__`` and ``__hash__``. If you have a parametrized dtype, you should +update the ``ExtensionDtype._metadata`` tuple to match the signature of your +``__init__`` method. See :class:`pandas.api.extensions.ExtensionDtype` for more (:issue:`22476`). + +**New and changed methods** + +- :meth:`~pandas.api.types.ExtensionArray.dropna` has been added (:issue:`21185`) +- :meth:`~pandas.api.types.ExtensionArray.repeat` has been added (:issue:`24349`) +- The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) +- :meth:`pandas.api.extensions.ExtensionArray.shift` added as part of the basic ``ExtensionArray`` interface (:issue:`22387`). +- :meth:`~pandas.api.types.ExtensionArray.searchsorted` has been added (:issue:`24350`) +- Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`) +- :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`). + +**Dtype changes** + +- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore + the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`) +- Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`). +- Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`) +- Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`) + +.. _whatsnew_0240.enhancements.extension_array_operators: + +**Operator support** + +A ``Series`` based on an ``ExtensionArray`` now supports arithmetic and comparison +operators (:issue:`19577`). There are two approaches for providing operator support for an ``ExtensionArray``: + +1. Define each of the operators on your ``ExtensionArray`` subclass. +2. Use an operator implementation from pandas that depends on operators that are already defined + on the underlying elements (scalars) of the ``ExtensionArray``. + +See the :ref:`ExtensionArray Operator Support +` documentation section for details on both +ways of adding operator support. + +**Other changes** + +- A default repr for :class:`pandas.api.extensions.ExtensionArray` is now provided (:issue:`23601`). +- :meth:`ExtensionArray._formatting_values` is deprecated. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) +- An ``ExtensionArray`` with a boolean dtype now works correctly as a boolean indexer. :meth:`pandas.api.types.is_bool_dtype` now properly considers them boolean (:issue:`22326`) + +**Bug fixes** + +- Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`) +- :meth:`~Series.shift` now dispatches to :meth:`ExtensionArray.shift` (:issue:`22386`) +- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) +- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) +- :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185`). +- Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`) +- Bug when concatenating multiple ``Series`` with different extension dtypes not casting to object dtype (:issue:`22994`) +- Series backed by an ``ExtensionArray`` now work with :func:`util.hash_pandas_object` (:issue:`23066`) +- :meth:`DataFrame.stack` no longer converts to object dtype for DataFrames where each column has the same extension dtype. The output Series will have the same dtype as the columns (:issue:`23077`). +- :meth:`Series.unstack` and :meth:`DataFrame.unstack` no longer convert extension arrays to object-dtype ndarrays. Each column in the output ``DataFrame`` will now have the same dtype as the input (:issue:`23077`). +- Bug when grouping :meth:`Dataframe.groupby()` and aggregating on ``ExtensionArray`` it was not returning the actual ``ExtensionArray`` dtype (:issue:`23227`). +- Bug in :func:`pandas.merge` when merging on an extension array-backed column (:issue:`23020`). + + +.. _whatsnew_0240.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- :attr:`MultiIndex.labels` has been deprecated and replaced by :attr:`MultiIndex.codes`. + The functionality is unchanged. The new name better reflects the natures of + these codes and makes the ``MultiIndex`` API more similar to the API for :class:`CategoricalIndex` (:issue:`13443`). + As a consequence, other uses of the name ``labels`` in ``MultiIndex`` have also been deprecated and replaced with ``codes``: + + - You should initialize a ``MultiIndex`` instance using a parameter named ``codes`` rather than ``labels``. + - ``MultiIndex.set_labels`` has been deprecated in favor of :meth:`MultiIndex.set_codes`. + - For method :meth:`MultiIndex.copy`, the ``labels`` parameter has been deprecated and replaced by a ``codes`` parameter. +- :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`) +- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`) +- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`) +- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`) +- The signature of :meth:`Series.to_csv` has been uniformed to that of :meth:`DataFrame.to_csv`: the name of the first argument is now ``path_or_buf``, the order of subsequent arguments has changed, the ``header`` argument now defaults to ``True``. (:issue:`19715`) +- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`) +- :func:`pandas.read_table` is deprecated. Instead, use :func:`read_csv` passing ``sep='\t'`` if necessary. This deprecation has been removed in 0.25.0. (:issue:`21948`) +- :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain + many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`) +- :meth:`FrozenNDArray.searchsorted` has deprecated the ``v`` parameter in favor of ``value`` (:issue:`14645`) +- :func:`DatetimeIndex.shift` and :func:`PeriodIndex.shift` now accept ``periods`` argument instead of ``n`` for consistency with :func:`Index.shift` and :func:`Series.shift`. Using ``n`` throws a deprecation warning (:issue:`22458`, :issue:`22912`) +- The ``fastpath`` keyword of the different Index constructors is deprecated (:issue:`23110`). +- :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have deprecated the ``errors`` argument in favor of the ``nonexistent`` argument (:issue:`8917`) +- The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`) +- The methods :meth:`DataFrame.update` and :meth:`Panel.update` have deprecated the ``raise_conflict=False|True`` keyword in favor of ``errors='ignore'|'raise'`` (:issue:`23585`) +- The methods :meth:`Series.str.partition` and :meth:`Series.str.rpartition` have deprecated the ``pat`` keyword in favor of ``sep`` (:issue:`22676`) +- Deprecated the ``nthreads`` keyword of :func:`pandas.read_feather` in favor of ``use_threads`` to reflect the changes in ``pyarrow>=0.11.0``. (:issue:`23053`) +- :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`) +- Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) +- Constructing a :class:`DatetimeIndex` from data with ``timedelta64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23675`) +- The ``keep_tz=False`` option (the default) of the ``keep_tz`` keyword of + :meth:`DatetimeIndex.to_series` is deprecated (:issue:`17832`). +- Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`) +- :func:`pandas.api.types.is_period` is deprecated in favor of ``pandas.api.types.is_period_dtype`` (:issue:`23917`) +- :func:`pandas.api.types.is_datetimetz` is deprecated in favor of ``pandas.api.types.is_datetime64tz`` (:issue:`23917`) +- Creating a :class:`TimedeltaIndex`, :class:`DatetimeIndex`, or :class:`PeriodIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range`, :func:`date_range`, or :func:`period_range` (:issue:`23919`) +- Passing a string alias like ``'datetime64[ns, UTC]'`` as the ``unit`` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`). +- The ``skipna`` parameter of :meth:`~pandas.api.types.infer_dtype` will switch to ``True`` by default in a future version of pandas (:issue:`17066`, :issue:`24050`) +- In :meth:`Series.where` with Categorical data, providing an ``other`` that is not present in the categories is deprecated. Convert the categorical to a different dtype or add the ``other`` to the categories first (:issue:`24077`). +- :meth:`Series.clip_lower`, :meth:`Series.clip_upper`, :meth:`DataFrame.clip_lower` and :meth:`DataFrame.clip_upper` are deprecated and will be removed in a future version. Use ``Series.clip(lower=threshold)``, ``Series.clip(upper=threshold)`` and the equivalent ``DataFrame`` methods (:issue:`24203`) +- :meth:`Series.nonzero` is deprecated and will be removed in a future version (:issue:`18262`) +- Passing an integer to :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtypes is deprecated, will raise ``TypeError`` in a future version. Use ``obj.fillna(pd.Timedelta(...))`` instead (:issue:`24694`) +- ``Series.cat.categorical``, ``Series.cat.name`` and ``Series.cat.index`` have been deprecated. Use the attributes on ``Series.cat`` or ``Series`` directly. (:issue:`24751`). +- Passing a dtype without a precision like ``np.dtype('datetime64')`` or ``timedelta64`` to :class:`Index`, :class:`DatetimeIndex` and :class:`TimedeltaIndex` is now deprecated. Use the nanosecond-precision dtype instead (:issue:`24753`). + +.. _whatsnew_0240.deprecations.datetimelike_int_ops: + +Integer addition/subtraction with datetimes and timedeltas is deprecated +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In the past, users could—in some cases—add or subtract integers or integer-dtype +arrays from :class:`Timestamp`, :class:`DatetimeIndex` and :class:`TimedeltaIndex`. + +This usage is now deprecated. Instead add or subtract integer multiples of +the object's ``freq`` attribute (:issue:`21939`, :issue:`23878`). + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: ts = pd.Timestamp('1994-05-06 12:15:16', freq=pd.offsets.Hour()) + In [6]: ts + 2 + Out[6]: Timestamp('1994-05-06 14:15:16', freq='H') + + In [7]: tdi = pd.timedelta_range('1D', periods=2) + In [8]: tdi - np.array([2, 1]) + Out[8]: TimedeltaIndex(['-1 days', '1 days'], dtype='timedelta64[ns]', freq=None) + + In [9]: dti = pd.date_range('2001-01-01', periods=2, freq='7D') + In [10]: dti + pd.Index([1, 2]) + Out[10]: DatetimeIndex(['2001-01-08', '2001-01-22'], dtype='datetime64[ns]', freq=None) + +*New behavior*: + +.. ipython:: python + :okwarning: + + ts = pd.Timestamp('1994-05-06 12:15:16', freq=pd.offsets.Hour()) + ts + 2 * ts.freq + + tdi = pd.timedelta_range('1D', periods=2) + tdi - np.array([2 * tdi.freq, 1 * tdi.freq]) + + dti = pd.date_range('2001-01-01', periods=2, freq='7D') + dti + pd.Index([1 * dti.freq, 2 * dti.freq]) + + +.. _whatsnew_0240.deprecations.integer_tz: + +Passing integer data and a timezone to datetimeindex +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The behavior of :class:`DatetimeIndex` when passed integer data and +a timezone is changing in a future version of pandas. Previously, these +were interpreted as wall times in the desired timezone. In the future, +these will be interpreted as wall times in UTC, which are then converted +to the desired timezone (:issue:`24559`). + +The default behavior remains the same, but issues a warning: + +.. code-block:: ipython + + In [3]: pd.DatetimeIndex([946684800000000000], tz="US/Central") + /bin/ipython:1: FutureWarning: + Passing integer-dtype data and a timezone to DatetimeIndex. Integer values + will be interpreted differently in a future version of pandas. Previously, + these were viewed as datetime64[ns] values representing the wall time + *in the specified timezone*. In the future, these will be viewed as + datetime64[ns] values representing the wall time *in UTC*. This is similar + to a nanosecond-precision UNIX epoch. To accept the future behavior, use + + pd.to_datetime(integer_data, utc=True).tz_convert(tz) + + To keep the previous behavior, use + + pd.to_datetime(integer_data).tz_localize(tz) + + #!/bin/python3 + Out[3]: DatetimeIndex(['2000-01-01 00:00:00-06:00'], dtype='datetime64[ns, US/Central]', freq=None) + +As the warning message explains, opt in to the future behavior by specifying that +the integer values are UTC, and then converting to the final timezone: + +.. ipython:: python + + pd.to_datetime([946684800000000000], utc=True).tz_convert('US/Central') + +The old behavior can be retained with by localizing directly to the final timezone: + +.. ipython:: python + + pd.to_datetime([946684800000000000]).tz_localize('US/Central') + +.. _whatsnew_0240.deprecations.tz_aware_array: + +Converting timezone-aware Series and Index to NumPy arrays +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The conversion from a :class:`Series` or :class:`Index` with timezone-aware +datetime data will change to preserve timezones by default (:issue:`23569`). + +NumPy doesn't have a dedicated dtype for timezone-aware datetimes. +In the past, converting a :class:`Series` or :class:`DatetimeIndex` with +timezone-aware datatimes would convert to a NumPy array by + +1. converting the tz-aware data to UTC +2. dropping the timezone-info +3. returning a :class:`numpy.ndarray` with ``datetime64[ns]`` dtype + +Future versions of pandas will preserve the timezone information by returning an +object-dtype NumPy array where each value is a :class:`Timestamp` with the correct +timezone attached + +.. ipython:: python + + ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + ser + +The default behavior remains the same, but issues a warning + +.. code-block:: python + + In [8]: np.asarray(ser) + /bin/ipython:1: FutureWarning: Converting timezone-aware DatetimeArray to timezone-naive + ndarray with 'datetime64[ns]' dtype. In the future, this will return an ndarray + with 'object' dtype where each element is a 'pandas.Timestamp' with the correct 'tz'. + + To accept the future behavior, pass 'dtype=object'. + To keep the old behavior, pass 'dtype="datetime64[ns]"'. + #!/bin/python3 + Out[8]: + array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00.000000000'], + dtype='datetime64[ns]') + +The previous or future behavior can be obtained, without any warnings, by specifying +the ``dtype`` + +*Previous behavior* + +.. ipython:: python + + np.asarray(ser, dtype='datetime64[ns]') + +*Future behavior* + +.. ipython:: python + + # New behavior + np.asarray(ser, dtype=object) + + +Or by using :meth:`Series.to_numpy` + +.. ipython:: python + + ser.to_numpy() + ser.to_numpy(dtype="datetime64[ns]") + +All the above applies to a :class:`DatetimeIndex` with tz-aware values as well. + +.. _whatsnew_0240.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`) +- :meth:`Series.repeat` has renamed the ``reps`` argument to ``repeats`` (:issue:`14645`) +- Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`) +- Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`) +- Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`) +- :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats`` (:issue:`14645`) +- The ``Series`` constructor and ``.astype`` method will now raise a ``ValueError`` if timestamp dtypes are passed in without a unit (e.g. ``np.datetime64``) for the ``dtype`` parameter (:issue:`15987`) +- Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`, :issue:`6581`) +- The modules ``pandas.types``, ``pandas.computation``, and ``pandas.util.decorators`` have been removed (:issue:`16157`, :issue:`16250`) +- Removed the ``pandas.formats.style`` shim for :class:`pandas.io.formats.style.Styler` (:issue:`16059`) +- ``pandas.pnow``, ``pandas.match``, ``pandas.groupby``, ``pd.get_store``, ``pd.Expr``, and ``pd.Term`` have been removed (:issue:`15538`, :issue:`15940`) +- :meth:`Categorical.searchsorted` and :meth:`Series.searchsorted` have renamed the ``v`` argument to ``value`` (:issue:`14645`) +- ``pandas.parser``, ``pandas.lib``, and ``pandas.tslib`` have been removed (:issue:`15537`) +- :meth:`Index.searchsorted` have renamed the ``key`` argument to ``value`` (:issue:`14645`) +- ``DataFrame.consolidate`` and ``Series.consolidate`` have been removed (:issue:`15501`) +- Removal of the previously deprecated module ``pandas.json`` (:issue:`19944`) +- The module ``pandas.tools`` has been removed (:issue:`15358`, :issue:`16005`) +- :meth:`SparseArray.get_values` and :meth:`SparseArray.to_dense` have dropped the ``fill`` parameter (:issue:`14686`) +- ``DataFrame.sortlevel`` and ``Series.sortlevel`` have been removed (:issue:`15099`) +- :meth:`SparseSeries.to_dense` has dropped the ``sparse_only`` parameter (:issue:`14686`) +- :meth:`DataFrame.astype` and :meth:`Series.astype` have renamed the ``raise_on_error`` argument to ``errors`` (:issue:`14967`) +- ``is_sequence``, ``is_any_int_dtype``, and ``is_floating_dtype`` have been removed from ``pandas.api.types`` (:issue:`16163`, :issue:`16189`) + +.. _whatsnew_0240.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Slicing Series and DataFrames with an monotonically increasing :class:`CategoricalIndex` + is now very fast and has speed comparable to slicing with an ``Int64Index``. + The speed increase is both when indexing by label (using .loc) and position(.iloc) (:issue:`20395`) + Slicing a monotonically increasing :class:`CategoricalIndex` itself (i.e. ``ci[1000:2000]``) + shows similar speed improvements as above (:issue:`21659`) +- Improved performance of :meth:`CategoricalIndex.equals` when comparing to another :class:`CategoricalIndex` (:issue:`24023`) +- Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`) +- Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`) +- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`, :issue:`21606`) +- Improved performance of :meth:`Series.at` and :meth:`Index.get_value` for Extension Arrays values (e.g. :class:`Categorical`) (:issue:`24204`) +- Improved performance of membership checks in :class:`Categorical` and :class:`CategoricalIndex` + (i.e. ``x in cat``-style checks are much faster). :meth:`CategoricalIndex.contains` + is likewise much faster (:issue:`21369`, :issue:`21508`) +- Improved performance of :meth:`HDFStore.groups` (and dependent functions like + :meth:`HDFStore.keys`. (i.e. ``x in store`` checks are much faster) + (:issue:`21372`) +- Improved the performance of :func:`pandas.get_dummies` with ``sparse=True`` (:issue:`21997`) +- Improved performance of :func:`IndexEngine.get_indexer_non_unique` for sorted, non-unique indexes (:issue:`9466`) +- Improved performance of :func:`PeriodIndex.unique` (:issue:`23083`) +- Improved performance of :func:`concat` for `Series` objects (:issue:`23404`) +- Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`) +- Improved performance of :meth:`DatetimeIndex.tz_localize` and various ``DatetimeIndex`` attributes with dateutil UTC timezone (:issue:`23772`) +- Fixed a performance regression on Windows with Python 3.7 of :func:`read_csv` (:issue:`23516`) +- Improved performance of :class:`Categorical` constructor for ``Series`` objects (:issue:`23814`) +- Improved performance of :meth:`~DataFrame.where` for Categorical data (:issue:`24077`) +- Improved performance of iterating over a :class:`Series`. Using :meth:`DataFrame.itertuples` now creates iterators + without internally allocating lists of all elements (:issue:`20783`) +- Improved performance of :class:`Period` constructor, additionally benefitting ``PeriodArray`` and ``PeriodIndex`` creation (:issue:`24084`, :issue:`24118`) +- Improved performance of tz-aware :class:`DatetimeArray` binary operations (:issue:`24491`) + +.. _whatsnew_0240.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ + +- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in ``codes`` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of ``.from_codes([1.1, 2.0])``. +- Bug in :meth:`Categorical.sort_values` where ``NaN`` values were always positioned in front regardless of ``na_position`` value. (:issue:`22556`). +- Bug when indexing with a boolean-valued ``Categorical``. Now a boolean-valued ``Categorical`` is treated as a boolean mask (:issue:`22665`) +- Constructing a :class:`CategoricalIndex` with empty values and boolean categories was raising a ``ValueError`` after a change to dtype coercion (:issue:`22702`). +- Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`). +- In :meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`) +- Bug when resampling :meth:`DataFrame.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`) +- Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`) +- Bug in :meth:`Series.where` losing the categorical dtype for categorical data (:issue:`24077`) +- Bug in :meth:`Categorical.apply` where ``NaN`` values could be handled unpredictably. They now remain unchanged (:issue:`24241`) +- Bug in :class:`Categorical` comparison methods incorrectly raising ``ValueError`` when operating against a :class:`DataFrame` (:issue:`24630`) +- Bug in :meth:`Categorical.set_categories` where setting fewer new categories with ``rename=True`` caused a segmentation fault (:issue:`24675`) + +Datetimelike +^^^^^^^^^^^^ + +- Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`) +- Fixed bug where :meth:`Timestamp.resolution` incorrectly returned 1-microsecond ``timedelta`` instead of 1-nanosecond :class:`Timedelta` (:issue:`21336`, :issue:`21365`) +- Bug in :func:`to_datetime` that did not consistently return an :class:`Index` when ``box=True`` was specified (:issue:`21864`) +- Bug in :class:`DatetimeIndex` comparisons where string comparisons incorrectly raises ``TypeError`` (:issue:`22074`) +- Bug in :class:`DatetimeIndex` comparisons when comparing against ``timedelta64[ns]`` dtyped arrays; in some cases ``TypeError`` was incorrectly raised, in others it incorrectly failed to raise (:issue:`22074`) +- Bug in :class:`DatetimeIndex` comparisons when comparing against object-dtyped arrays (:issue:`22074`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``Timedelta``-like objects (:issue:`22005`, :issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``DateOffset`` objects returning an ``object`` dtype instead of ``datetime64[ns]`` dtype (:issue:`21610`, :issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype comparing against ``NaT`` incorrectly (:issue:`22242`, :issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``Timestamp``-like object incorrectly returned ``datetime64[ns]`` dtype instead of ``timedelta64[ns]`` dtype (:issue:`8554`, :issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``np.datetime64`` object with non-nanosecond unit failing to convert to nanoseconds (:issue:`18874`, :issue:`22163`) +- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`, :issue:`22163`) +- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`, :issue:`22163`) +- Bug in :attr:`DataFrame.values` returning a :class:`DatetimeIndex` for a single-column ``DataFrame`` with tz-aware datetime values. Now a 2-D :class:`numpy.ndarray` of :class:`Timestamp` objects is returned (:issue:`24024`) +- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`, :issue:`22163`) +- Bug in :class:`DatetimeIndex` subtraction that incorrectly failed to raise ``OverflowError`` (:issue:`22492`, :issue:`22508`) +- Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`) +- Bug in :class:`DatetimeIndex` where frequency was being set if original frequency was ``None`` (:issue:`22150`) +- Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`) +- Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`) +- Bug in :class:`PeriodIndex` where adding or subtracting a :class:`timedelta` or :class:`Tick` object produced incorrect results (:issue:`22988`) +- Bug in the :class:`Series` repr with period-dtype data missing a space before the data (:issue:`23601`) +- Bug in :func:`date_range` when decrementing a start date to a past end date by a negative frequency (:issue:`23270`) +- Bug in :meth:`Series.min` which would return ``NaN`` instead of ``NaT`` when called on a series of ``NaT`` (:issue:`23282`) +- Bug in :meth:`Series.combine_first` not properly aligning categoricals, so that missing values in ``self`` where not filled by valid values from ``other`` (:issue:`24147`) +- Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`) +- Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`) +- Bug in :func:`period_range` ignoring the frequency of ``start`` and ``end`` when those are provided as :class:`Period` objects (:issue:`20535`). +- Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`) +- Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`) +- Bug in :class:`DataFrame` when creating a new column from an ndarray of :class:`Timestamp` objects with timezones creating an object-dtype column, rather than datetime with timezone (:issue:`23932`) +- Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`) +- Bug in :class:`DatetimeIndex` where calling ``np.array(dtindex, dtype=object)`` would incorrectly return an array of ``long`` objects (:issue:`23524`) +- Bug in :class:`Index` where passing a timezone-aware :class:`DatetimeIndex` and `dtype=object` would incorrectly raise a ``ValueError`` (:issue:`23524`) +- Bug in :class:`Index` where calling ``np.array(dtindex, dtype=object)`` on a timezone-naive :class:`DatetimeIndex` would return an array of ``datetime`` objects instead of :class:`Timestamp` objects, potentially losing nanosecond portions of the timestamps (:issue:`23524`) +- Bug in :class:`Categorical.__setitem__` not allowing setting with another ``Categorical`` when both are unordered and have the same categories, but in a different order (:issue:`24142`) +- Bug in :func:`date_range` where using dates with millisecond resolution or higher could return incorrect values or the wrong number of values in the index (:issue:`24110`) +- Bug in :class:`DatetimeIndex` where constructing a :class:`DatetimeIndex` from a :class:`Categorical` or :class:`CategoricalIndex` would incorrectly drop timezone information (:issue:`18664`) +- Bug in :class:`DatetimeIndex` and :class:`TimedeltaIndex` where indexing with ``Ellipsis`` would incorrectly lose the index's ``freq`` attribute (:issue:`21282`) +- Clarified error message produced when passing an incorrect ``freq`` argument to :class:`DatetimeIndex` with ``NaT`` as the first entry in the passed data (:issue:`11587`) +- Bug in :func:`to_datetime` where ``box`` and ``utc`` arguments were ignored when passing a :class:`DataFrame` or ``dict`` of unit mappings (:issue:`23760`) +- Bug in :attr:`Series.dt` where the cache would not update properly after an in-place operation (:issue:`24408`) +- Bug in :class:`PeriodIndex` where comparisons against an array-like object with length 1 failed to raise ``ValueError`` (:issue:`23078`) +- Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`). +- Fixed bug in :meth:`Series.max` with ``datetime64[ns]``-dtype failing to return ``NaT`` when nulls are present and ``skipna=False`` is passed (:issue:`24265`) +- Bug in :func:`to_datetime` where arrays of ``datetime`` objects containing both timezone-aware and timezone-naive ``datetimes`` would fail to raise ``ValueError`` (:issue:`24569`) +- Bug in :func:`to_datetime` with invalid datetime format doesn't coerce input to ``NaT`` even if ``errors='coerce'`` (:issue:`24763`) + +Timedelta +^^^^^^^^^ +- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`, :issue:`22163`) +- Bug in adding a :class:`Index` with object dtype to a :class:`Series` with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`22390`) +- Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`) +- Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`) +- Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`) +- Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`) +- Fixed bug where subtracting :class:`Timedelta` from an object-dtyped array would raise ``TypeError`` (:issue:`21980`) +- Fixed bug in adding a :class:`DataFrame` with all-`timedelta64[ns]` dtypes to a :class:`DataFrame` with all-integer dtypes returning incorrect results instead of raising ``TypeError`` (:issue:`22696`) +- Bug in :class:`TimedeltaIndex` where adding a timezone-aware datetime scalar incorrectly returned a timezone-naive :class:`DatetimeIndex` (:issue:`23215`) +- Bug in :class:`TimedeltaIndex` where adding ``np.timedelta64('NaT')`` incorrectly returned an all-``NaT`` :class:`DatetimeIndex` instead of an all-``NaT`` :class:`TimedeltaIndex` (:issue:`23215`) +- Bug in :class:`Timedelta` and :func:`to_timedelta()` have inconsistencies in supported unit string (:issue:`21762`) +- Bug in :class:`TimedeltaIndex` division where dividing by another :class:`TimedeltaIndex` raised ``TypeError`` instead of returning a :class:`Float64Index` (:issue:`23829`, :issue:`22631`) +- Bug in :class:`TimedeltaIndex` comparison operations where comparing against non-``Timedelta``-like objects would raise ``TypeError`` instead of returning all-``False`` for ``__eq__`` and all-``True`` for ``__ne__`` (:issue:`24056`) +- Bug in :class:`Timedelta` comparisons when comparing with a ``Tick`` object incorrectly raising ``TypeError`` (:issue:`24710`) + +Timezones +^^^^^^^^^ + +- Bug in :meth:`Index.shift` where an ``AssertionError`` would raise when shifting across DST (:issue:`8616`) +- Bug in :class:`Timestamp` constructor where passing an invalid timezone offset designator (``Z``) would not raise a ``ValueError`` (:issue:`8910`) +- Bug in :meth:`Timestamp.replace` where replacing at a DST boundary would retain an incorrect offset (:issue:`7825`) +- Bug in :meth:`Series.replace` with ``datetime64[ns, tz]`` data when replacing ``NaT`` (:issue:`11792`) +- Bug in :class:`Timestamp` when passing different string date formats with a timezone offset would produce different timezone offsets (:issue:`12064`) +- Bug when comparing a tz-naive :class:`Timestamp` to a tz-aware :class:`DatetimeIndex` which would coerce the :class:`DatetimeIndex` to tz-naive (:issue:`12601`) +- Bug in :meth:`Series.truncate` with a tz-aware :class:`DatetimeIndex` which would cause a core dump (:issue:`9243`) +- Bug in :class:`Series` constructor which would coerce tz-aware and tz-naive :class:`Timestamp` to tz-aware (:issue:`13051`) +- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`) +- Bug in :class:`DatetimeIndex` where constructing with an integer and tz would not localize correctly (:issue:`12619`) +- Fixed bug where :meth:`DataFrame.describe` and :meth:`Series.describe` on tz-aware datetimes did not show `first` and `last` result (:issue:`21328`) +- Bug in :class:`DatetimeIndex` comparisons failing to raise ``TypeError`` when comparing timezone-aware ``DatetimeIndex`` against ``np.datetime64`` (:issue:`22074`) +- Bug in ``DataFrame`` assignment with a timezone-aware scalar (:issue:`19843`) +- Bug in :func:`DataFrame.asof` that raised a ``TypeError`` when attempting to compare tz-naive and tz-aware timestamps (:issue:`21194`) +- Bug when constructing a :class:`DatetimeIndex` with :class:`Timestamp` constructed with the ``replace`` method across DST (:issue:`18785`) +- Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`) +- Bug in :meth:`Index.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) +- Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) +- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` where an ``AmbiguousTimeError`` or ``NonExistentTimeError`` would raise if a timezone aware timeseries ended on a DST transition (:issue:`19375`, :issue:`10117`) +- Bug in :meth:`DataFrame.drop` and :meth:`Series.drop` when specifying a tz-aware Timestamp key to drop from a :class:`DatetimeIndex` with a DST transition (:issue:`21761`) +- Bug in :class:`DatetimeIndex` constructor where ``NaT`` and ``dateutil.tz.tzlocal`` would raise an ``OutOfBoundsDatetime`` error (:issue:`23807`) +- Bug in :meth:`DatetimeIndex.tz_localize` and :meth:`Timestamp.tz_localize` with ``dateutil.tz.tzlocal`` near a DST transition that would return an incorrectly localized datetime (:issue:`23807`) +- Bug in :class:`Timestamp` constructor where a ``dateutil.tz.tzutc`` timezone passed with a ``datetime.datetime`` argument would be converted to a ``pytz.UTC`` timezone (:issue:`23807`) +- Bug in :func:`to_datetime` where ``utc=True`` was not respected when specifying a ``unit`` and ``errors='ignore'`` (:issue:`23758`) +- Bug in :func:`to_datetime` where ``utc=True`` was not respected when passing a :class:`Timestamp` (:issue:`24415`) +- Bug in :meth:`DataFrame.any` returns wrong value when ``axis=1`` and the data is of datetimelike type (:issue:`23070`) +- Bug in :meth:`DatetimeIndex.to_period` where a timezone aware index was converted to UTC first before creating :class:`PeriodIndex` (:issue:`22905`) +- Bug in :meth:`DataFrame.tz_localize`, :meth:`DataFrame.tz_convert`, :meth:`Series.tz_localize`, and :meth:`Series.tz_convert` where ``copy=False`` would mutate the original argument inplace (:issue:`6326`) +- Bug in :meth:`DataFrame.max` and :meth:`DataFrame.min` with ``axis=1`` where a :class:`Series` with ``NaN`` would be returned when all columns contained the same timezone (:issue:`10390`) + +Offsets +^^^^^^^ + +- Bug in :class:`FY5253` where date offsets could incorrectly raise an ``AssertionError`` in arithmetic operations (:issue:`14774`) +- Bug in :class:`DateOffset` where keyword arguments ``week`` and ``milliseconds`` were accepted and ignored. Passing these will now raise ``ValueError`` (:issue:`19398`) +- Bug in adding :class:`DateOffset` with :class:`DataFrame` or :class:`PeriodIndex` incorrectly raising ``TypeError`` (:issue:`23215`) +- Bug in comparing :class:`DateOffset` objects with non-DateOffset objects, particularly strings, raising ``ValueError`` instead of returning ``False`` for equality checks and ``True`` for not-equal checks (:issue:`23524`) + +Numeric +^^^^^^^ + +- Bug in :class:`Series` ``__rmatmul__`` doesn't support matrix vector multiplication (:issue:`21530`) +- Bug in :func:`factorize` fails with read-only array (:issue:`12813`) +- Fixed bug in :func:`unique` handled signed zeros inconsistently: for some inputs 0.0 and -0.0 were treated as equal and for some inputs as different. Now they are treated as equal for all inputs (:issue:`21866`) +- Bug in :meth:`DataFrame.agg`, :meth:`DataFrame.transform` and :meth:`DataFrame.apply` where, + when supplied with a list of functions and ``axis=1`` (e.g. ``df.apply(['sum', 'mean'], axis=1)``), + a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`). +- Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`) +- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`, :issue:`22163`) +- Bug in :meth:`DataFrame.apply` where, when supplied with a string argument and additional positional or keyword arguments (e.g. ``df.apply('sum', min_count=1)``), a ``TypeError`` was wrongly raised (:issue:`22376`) +- Bug in :meth:`DataFrame.astype` to extension dtype may raise ``AttributeError`` (:issue:`22578`) +- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype arithmetic operations with ``ndarray`` with integer dtype incorrectly treating the narray as ``timedelta64[ns]`` dtype (:issue:`23114`) +- Bug in :meth:`Series.rpow` with object dtype ``NaN`` for ``1 ** NA`` instead of ``1`` (:issue:`22922`). +- :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`) +- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2\ :sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`) +- Calls such as :meth:`DataFrame.round` with a non-unique :meth:`CategoricalIndex` now return expected data. Previously, data would be improperly duplicated (:issue:`21809`). +- Added ``log10``, `floor` and `ceil` to the list of supported functions in :meth:`DataFrame.eval` (:issue:`24139`, :issue:`24353`) +- Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) +- Checking PEP 3141 numbers in :func:`~pandas.api.types.is_scalar` function returns ``True`` (:issue:`22903`) +- Reduction methods like :meth:`Series.sum` now accept the default value of ``keepdims=False`` when called from a NumPy ufunc, rather than raising a ``TypeError``. Full support for ``keepdims`` has not been implemented (:issue:`24356`). + +Conversion +^^^^^^^^^^ + +- Bug in :meth:`DataFrame.combine_first` in which column types were unexpectedly converted to float (:issue:`20699`) +- Bug in :meth:`DataFrame.clip` in which column types are not preserved and casted to float (:issue:`24162`) +- Bug in :meth:`DataFrame.clip` when order of columns of dataframes doesn't match, result observed is wrong in numeric values (:issue:`20911`) +- Bug in :meth:`DataFrame.astype` where converting to an extension dtype when duplicate column names are present causes a ``RecursionError`` (:issue:`24704`) + +Strings +^^^^^^^ + +- Bug in :meth:`Index.str.partition` was not nan-safe (:issue:`23558`). +- Bug in :meth:`Index.str.split` was not nan-safe (:issue:`23677`). +- Bug :func:`Series.str.contains` not respecting the ``na`` argument for a ``Categorical`` dtype ``Series`` (:issue:`22158`) +- Bug in :meth:`Index.str.cat` when the result contained only ``NaN`` (:issue:`24044`) + +Interval +^^^^^^^^ + +- Bug in the :class:`IntervalIndex` constructor where the ``closed`` parameter did not always override the inferred ``closed`` (:issue:`19370`) +- Bug in the ``IntervalIndex`` repr where a trailing comma was missing after the list of intervals (:issue:`20611`) +- Bug in :class:`Interval` where scalar arithmetic operations did not retain the ``closed`` value (:issue:`22313`) +- Bug in :class:`IntervalIndex` where indexing with datetime-like values raised a ``KeyError`` (:issue:`20636`) +- Bug in ``IntervalTree`` where data containing ``NaN`` triggered a warning and resulted in incorrect indexing queries with :class:`IntervalIndex` (:issue:`23352`) + +Indexing +^^^^^^^^ + +- Bug in :meth:`DataFrame.ne` fails if columns contain column name "dtype" (:issue:`22383`) +- The traceback from a ``KeyError`` when asking ``.loc`` for a single missing label is now shorter and more clear (:issue:`21557`) +- :class:`PeriodIndex` now emits a ``KeyError`` when a malformed string is looked up, which is consistent with the behavior of :class:`DatetimeIndex` (:issue:`22803`) +- When ``.ix`` is asked for a missing integer label in a :class:`MultiIndex` with a first level of integer type, it now raises a ``KeyError``, consistently with the case of a flat :class:`Int64Index`, rather than falling back to positional indexing (:issue:`21593`) +- Bug in :meth:`Index.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`) +- Bug in :meth:`Series.reindex` when reindexing an empty series with a ``datetime64[ns, tz]`` dtype (:issue:`20869`) +- Bug in :class:`DataFrame` when setting values with ``.loc`` and a timezone aware :class:`DatetimeIndex` (:issue:`11365`) +- ``DataFrame.__getitem__`` now accepts dictionaries and dictionary keys as list-likes of labels, consistently with ``Series.__getitem__`` (:issue:`21294`) +- Fixed ``DataFrame[np.nan]`` when columns are non-unique (:issue:`21428`) +- Bug when indexing :class:`DatetimeIndex` with nanosecond resolution dates and timezones (:issue:`11679`) +- Bug where indexing with a Numpy array containing negative values would mutate the indexer (:issue:`21867`) +- Bug where mixed indexes wouldn't allow integers for ``.at`` (:issue:`19860`) +- ``Float64Index.get_loc`` now raises ``KeyError`` when boolean key passed. (:issue:`19087`) +- Bug in :meth:`DataFrame.loc` when indexing with an :class:`IntervalIndex` (:issue:`19977`) +- :class:`Index` no longer mangles ``None``, ``NaN`` and ``NaT``, i.e. they are treated as three different keys. However, for numeric Index all three are still coerced to a ``NaN`` (:issue:`22332`) +- Bug in ``scalar in Index`` if scalar is a float while the ``Index`` is of integer dtype (:issue:`22085`) +- Bug in :func:`MultiIndex.set_levels` when levels value is not subscriptable (:issue:`23273`) +- Bug where setting a timedelta column by ``Index`` causes it to be casted to double, and therefore lose precision (:issue:`23511`) +- Bug in :func:`Index.union` and :func:`Index.intersection` where name of the ``Index`` of the result was not computed correctly for certain cases (:issue:`9943`, :issue:`9862`) +- Bug in :class:`Index` slicing with boolean :class:`Index` may raise ``TypeError`` (:issue:`22533`) +- Bug in ``PeriodArray.__setitem__`` when accepting slice and list-like value (:issue:`23978`) +- Bug in :class:`DatetimeIndex`, :class:`TimedeltaIndex` where indexing with ``Ellipsis`` would lose their ``freq`` attribute (:issue:`21282`) +- Bug in ``iat`` where using it to assign an incompatible value would create a new column (:issue:`23236`) + +Missing +^^^^^^^ + +- Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`) +- Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`) +- :func:`Series.isin` now treats all NaN-floats as equal also for ``np.object``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`) +- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for ``np.object``-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`) +- :class:`DataFrame` and :class:`Series` now properly handle numpy masked arrays with hardened masks. Previously, constructing a DataFrame or Series from a masked array with a hard mask would create a pandas object containing the underlying value, rather than the expected NaN. (:issue:`24574`) +- Bug in :class:`DataFrame` constructor where ``dtype`` argument was not honored when handling numpy masked record arrays. (:issue:`24874`) + +MultiIndex +^^^^^^^^^^ + +- Bug in :func:`io.formats.style.Styler.applymap` where ``subset=`` with :class:`MultiIndex` slice would reduce to :class:`Series` (:issue:`19861`) +- Removed compatibility for :class:`MultiIndex` pickles prior to version 0.8.0; compatibility with :class:`MultiIndex` pickles from version 0.13 forward is maintained (:issue:`21654`) +- :meth:`MultiIndex.get_loc_level` (and as a consequence, ``.loc`` on a ``Series`` or ``DataFrame`` with a :class:`MultiIndex` index) will now raise a ``KeyError``, rather than returning an empty ``slice``, if asked a label which is present in the ``levels`` but is unused (:issue:`22221`) +- :class:`MultiIndex` has gained the :meth:`MultiIndex.from_frame`, it allows constructing a :class:`MultiIndex` object from a :class:`DataFrame` (:issue:`22420`) +- Fix ``TypeError`` in Python 3 when creating :class:`MultiIndex` in which some levels have mixed types, e.g. when some labels are tuples (:issue:`15457`) + +I/O +^^^ + +- Bug in :func:`read_csv` in which a column specified with ``CategoricalDtype`` of boolean categories was not being correctly coerced from string values to booleans (:issue:`20498`) +- Bug in :func:`read_csv` in which unicode column names were not being properly recognized with Python 2.x (:issue:`13253`) +- Bug in :meth:`DataFrame.to_sql` when writing timezone aware data (``datetime64[ns, tz]`` dtype) would raise a ``TypeError`` (:issue:`9086`) +- Bug in :meth:`DataFrame.to_sql` where a naive :class:`DatetimeIndex` would be written as ``TIMESTAMP WITH TIMEZONE`` type in supported databases, e.g. PostgreSQL (:issue:`23510`) +- Bug in :meth:`read_excel()` when ``parse_cols`` is specified with an empty dataset (:issue:`9208`) +- :func:`read_html()` no longer ignores all-whitespace ```` within ```` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`) +- :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`) +- :func:`read_csv()` and :func:`read_table()` will throw ``UnicodeError`` and not coredump on badly encoded strings (:issue:`22748`) +- :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`) +- Bug in :func:`read_csv()` in which memory management was prematurely optimized for the C engine when the data was being read in chunks (:issue:`23509`) +- Bug in :func:`read_csv()` in unnamed columns were being improperly identified when extracting a multi-index (:issue:`23687`) +- :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`) +- :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) +- :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) +- Bug in :func:`read_sas()` in which an incorrect error was raised on an invalid file format. (:issue:`24548`) +- Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) +- Bug in :func:`DataFrame.to_html()` with ``index=False`` misses truncation indicators (...) on truncated DataFrame (:issue:`15019`, :issue:`22783`) +- Bug in :func:`DataFrame.to_html()` with ``index=False`` when both columns and row index are ``MultiIndex`` (:issue:`22579`) +- Bug in :func:`DataFrame.to_html()` with ``index_names=False`` displaying index name (:issue:`22747`) +- Bug in :func:`DataFrame.to_html()` with ``header=False`` not displaying row index names (:issue:`23788`) +- Bug in :func:`DataFrame.to_html()` with ``sparsify=False`` that caused it to raise ``TypeError`` (:issue:`22887`) +- Bug in :func:`DataFrame.to_string()` that broke column alignment when ``index=False`` and width of first column's values is greater than the width of first column's header (:issue:`16839`, :issue:`13032`) +- Bug in :func:`DataFrame.to_string()` that caused representations of :class:`DataFrame` to not take up the whole window (:issue:`22984`) +- Bug in :func:`DataFrame.to_csv` where a single level MultiIndex incorrectly wrote a tuple. Now just the value of the index is written (:issue:`19589`). +- :class:`HDFStore` will raise ``ValueError`` when the ``format`` kwarg is passed to the constructor (:issue:`13291`) +- Bug in :meth:`HDFStore.append` when appending a :class:`DataFrame` with an empty string column and ``min_itemsize`` < 8 (:issue:`12242`) +- Bug in :func:`read_csv()` in which memory leaks occurred in the C engine when parsing ``NaN`` values due to insufficient cleanup on completion or error (:issue:`21353`) +- Bug in :func:`read_csv()` in which incorrect error messages were being raised when ``skipfooter`` was passed in along with ``nrows``, ``iterator``, or ``chunksize`` (:issue:`23711`) +- Bug in :func:`read_csv()` in which :class:`MultiIndex` index names were being improperly handled in the cases when they were not provided (:issue:`23484`) +- Bug in :func:`read_csv()` in which unnecessary warnings were being raised when the dialect's values conflicted with the default arguments (:issue:`23761`) +- Bug in :func:`read_html()` in which the error message was not displaying the valid flavors when an invalid one was provided (:issue:`23549`) +- Bug in :meth:`read_excel()` in which extraneous header names were extracted, even though none were specified (:issue:`11733`) +- Bug in :meth:`read_excel()` in which column names were not being properly converted to string sometimes in Python 2.x (:issue:`23874`) +- Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`18792`, :issue:`20480`) +- Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`) +- Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`) +- :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`) +- Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`) +- Bug in :func:`read_csv` that caused the C engine on Python 3.6+ on Windows to improperly read CSV filenames with accented or special characters (:issue:`15086`) +- Bug in :func:`read_fwf` in which the compression type of a file was not being properly inferred (:issue:`22199`) +- Bug in :func:`pandas.io.json.json_normalize` that caused it to raise ``TypeError`` when two consecutive elements of ``record_path`` are dicts (:issue:`22706`) +- Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`) +- Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`) +- Bug in :class:`HDFStore` that caused it to raise ``ValueError`` when reading a Dataframe in Python 3 from fixed format written in Python 2 (:issue:`24510`) +- Bug in :func:`DataFrame.to_string()` and more generally in the floating ``repr`` formatter. Zeros were not trimmed if ``inf`` was present in a columns while it was the case with NA values. Zeros are now trimmed as in the presence of NA (:issue:`24861`). +- Bug in the ``repr`` when truncating the number of columns and having a wide last column (:issue:`24849`). + +Plotting +^^^^^^^^ + +- Bug in :func:`DataFrame.plot.scatter` and :func:`DataFrame.plot.hexbin` caused x-axis label and ticklabels to disappear when colorbar was on in IPython inline backend (:issue:`10611`, :issue:`10678`, and :issue:`20455`) +- Bug in plotting a Series with datetimes using :func:`matplotlib.axes.Axes.scatter` (:issue:`22039`) +- Bug in :func:`DataFrame.plot.bar` caused bars to use multiple colors instead of a single one (:issue:`20585`) +- Bug in validating color parameter caused extra color to be appended to the given color array. This happened to multiple plotting functions using matplotlib. (:issue:`20726`) + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :func:`pandas.core.window.Rolling.min` and :func:`pandas.core.window.Rolling.max` with ``closed='left'``, a datetime-like index and only one entry in the series leading to segfault (:issue:`24718`) +- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`) +- Bug in :meth:`DateFrame.resample` when downsampling across a DST boundary (:issue:`8531`) +- Bug in date anchoring for :meth:`DateFrame.resample` with offset :class:`Day` when n > 1 (:issue:`24127`) +- Bug where ``ValueError`` is wrongly raised when calling :func:`~pandas.core.groupby.SeriesGroupBy.count` method of a + ``SeriesGroupBy`` when the grouping variable only contains NaNs and numpy version < 1.13 (:issue:`21956`). +- Multiple bugs in :func:`pandas.core.window.Rolling.min` with ``closed='left'`` and a + datetime-like index leading to incorrect results and also segfault. (:issue:`21704`) +- Bug in :meth:`pandas.core.resample.Resampler.apply` when passing positional arguments to applied func (:issue:`14615`). +- Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to ``loffset`` kwarg (:issue:`7687`). +- Bug in :meth:`pandas.core.resample.Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`). +- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.mean` when values were integral but could not fit inside of int64, overflowing instead. (:issue:`22487`) +- :func:`pandas.core.groupby.RollingGroupby.agg` and :func:`pandas.core.groupby.ExpandingGroupby.agg` now support multiple aggregation functions as parameters (:issue:`15072`) +- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` when resampling by a weekly offset (``'W'``) across a DST transition (:issue:`9119`, :issue:`21459`) +- Bug in :meth:`DataFrame.expanding` in which the ``axis`` argument was not being respected during aggregations (:issue:`23372`) +- Bug in :meth:`pandas.core.groupby.GroupBy.transform` which caused missing values when the input function can accept a :class:`DataFrame` but renames it (:issue:`23455`). +- Bug in :func:`pandas.core.groupby.GroupBy.nth` where column order was not always preserved (:issue:`20760`) +- Bug in :meth:`pandas.core.groupby.GroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ``ZeroDivisionError`` (:issue:`23666`). +- Calling :meth:`pandas.core.groupby.GroupBy.rank` with empty groups and ``pct=True`` was raising a ``ZeroDivisionError`` (:issue:`22519`) +- Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`). +- Bug in :meth:`DataFrame.groupby` did not respect the ``observed`` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`) +- Bug in :func:`pandas.core.groupby.SeriesGroupBy.pct_change` or :func:`pandas.core.groupby.DataFrameGroupBy.pct_change` would previously work across groups when calculating the percent change, where it now correctly works per group (:issue:`21200`, :issue:`21235`). +- Bug preventing hash table creation with very large number (2^32) of rows (:issue:`22805`) +- Bug in groupby when grouping on categorical causes ``ValueError`` and incorrect grouping if ``observed=True`` and ``nan`` is present in categorical column (:issue:`24740`, :issue:`21151`). + +Reshaping +^^^^^^^^^ + +- Bug in :func:`pandas.concat` when joining resampled DataFrames with timezone aware index (:issue:`13783`) +- Bug in :func:`pandas.concat` when joining only `Series` the `names` argument of `concat` is no longer ignored (:issue:`23490`) +- Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`) +- Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`) +- Bug in :meth:`DataFrame.where` with an empty DataFrame and empty ``cond`` having non-bool dtype (:issue:`21947`) +- Bug in :meth:`Series.mask` and :meth:`DataFrame.mask` with ``list`` conditionals (:issue:`21891`) +- Bug in :meth:`DataFrame.replace` raises RecursionError when converting OutOfBounds ``datetime64[ns, tz]`` (:issue:`20380`) +- :func:`pandas.core.groupby.GroupBy.rank` now raises a ``ValueError`` when an invalid value is passed for argument ``na_option`` (:issue:`22124`) +- Bug in :func:`get_dummies` with Unicode attributes in Python 2 (:issue:`22084`) +- Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`) +- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when dict is used as the ``to_replace`` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) +- Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`) +- Bug in :func:`pandas.wide_to_long` when a string is passed to the stubnames argument and a column name is a substring of that stubname (:issue:`22468`) +- Bug in :func:`merge` when merging ``datetime64[ns, tz]`` data that contained a DST transition (:issue:`18885`) +- Bug in :func:`merge_asof` when merging on float values within defined tolerance (:issue:`22981`) +- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue:`22796`) +- Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`) +- Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have a :class:`MultiIndex` for columns (:issue:`23033`). +- Bug in :func:`pandas.melt` when passing column names that are not present in ``DataFrame`` (:issue:`23575`) +- Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`) +- Bug in :class:`Series` construction when passing no data and ``dtype=str`` (:issue:`22477`) +- Bug in :func:`cut` with ``bins`` as an overlapping ``IntervalIndex`` where multiple bins were returned per item instead of raising a ``ValueError`` (:issue:`23980`) +- Bug in :func:`pandas.concat` when joining ``Series`` datetimetz with ``Series`` category would lose timezone (:issue:`23816`) +- Bug in :meth:`DataFrame.join` when joining on partial MultiIndex would drop names (:issue:`20452`). +- :meth:`DataFrame.nlargest` and :meth:`DataFrame.nsmallest` now returns the correct n values when keep != 'all' also when tied on the first columns (:issue:`22752`) +- Constructing a DataFrame with an index argument that wasn't already an instance of :class:`~pandas.core.Index` was broken (:issue:`22227`). +- Bug in :class:`DataFrame` prevented list subclasses to be used to construction (:issue:`21226`) +- Bug in :func:`DataFrame.unstack` and :func:`DataFrame.pivot_table` returning a missleading error message when the resulting DataFrame has more elements than int32 can handle. Now, the error message is improved, pointing towards the actual problem (:issue:`20601`) +- Bug in :func:`DataFrame.unstack` where a ``ValueError`` was raised when unstacking timezone aware values (:issue:`18338`) +- Bug in :func:`DataFrame.stack` where timezone aware values were converted to timezone naive values (:issue:`19420`) +- Bug in :func:`merge_asof` where a ``TypeError`` was raised when ``by_col`` were timezone aware values (:issue:`21184`) +- Bug showing an incorrect shape when throwing error during ``DataFrame`` construction. (:issue:`20742`) + +.. _whatsnew_0240.bug_fixes.sparse: + +Sparse +^^^^^^ + +- Updating a boolean, datetime, or timedelta column to be Sparse now works (:issue:`22367`) +- Bug in :meth:`Series.to_sparse` with Series already holding sparse data not constructing properly (:issue:`22389`) +- Providing a ``sparse_index`` to the SparseArray constructor no longer defaults the na-value to ``np.nan`` for all dtypes. The correct na_value for ``data.dtype`` is now used. +- Bug in ``SparseArray.nbytes`` under-reporting its memory usage by not including the size of its sparse index. +- Improved performance of :meth:`Series.shift` for non-NA ``fill_value``, as values are no longer converted to a dense array. +- Bug in ``DataFrame.groupby`` not including ``fill_value`` in the groups for non-NA ``fill_value`` when grouping by a sparse column (:issue:`5078`) +- Bug in unary inversion operator (``~``) on a ``SparseSeries`` with boolean values. The performance of this has also been improved (:issue:`22835`) +- Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`) +- Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`) +- Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`) +- Bug in :func:`concat` when concatenating a list of :class:`Series` with all-sparse values changing the ``fill_value`` and converting to a dense Series (:issue:`24371`) + +Style +^^^^^ + +- :meth:`~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`) +- :meth:`~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`) +- :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly. + +Build changes +^^^^^^^^^^^^^ + +- Building pandas for development now requires ``cython >= 0.28.2`` (:issue:`21688`) +- Testing pandas now requires ``hypothesis>=3.58``. You can find `the Hypothesis docs here `_, and a pandas-specific introduction :ref:`in the contributing guide `. (:issue:`22280`) +- Building pandas on macOS now targets minimum macOS 10.9 if run on macOS 10.9 or above (:issue:`23424`) + +Other +^^^^^ + +- Bug where C variables were declared with external linkage causing import errors if certain other C libraries were imported before Pandas. (:issue:`24113`) + + +.. _whatsnew_0.24.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.23.4..v0.24.0 diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst new file mode 100644 index 00000000..aead8c48 --- /dev/null +++ b/doc/source/whatsnew/v0.24.1.rst @@ -0,0 +1,94 @@ +.. _whatsnew_0241: + +Whats new in 0.24.1 (February 3, 2019) +-------------------------------------- + +.. warning:: + + The 0.24.x series of releases will be the last to support Python 2. Future feature + releases will support Python 3 only. See `Dropping Python 2.7 `_ for more. + +{{ header }} + +These are the changes in pandas 0.24.1. See :ref:`release` for a full changelog +including other versions of pandas. See :ref:`whatsnew_0240` for the 0.24.0 changelog. + +.. _whatsnew_0241.api: + +API changes +~~~~~~~~~~~ + +Changing the ``sort`` parameter for :class:`Index` set operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None`` (:issue:`24959`). +The default *behavior*, however, remains the same: the result is sorted, unless + +1. ``self`` and ``other`` are identical +2. ``self`` or ``other`` is empty +3. ``self`` or ``other`` contain values that can not be compared (a ``RuntimeWarning`` is raised). + +This change will allow ``sort=True`` to mean "always sort" in a future release. + +The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which +would not sort the result when the values could not be compared. + +The `sort` option for :meth:`Index.intersection` has changed in three ways. + +1. The default has changed from ``True`` to ``False``, to restore the + pandas 0.23.4 and earlier behavior of not sorting by default. +2. The behavior of ``sort=True`` can now be obtained with ``sort=None``. + This will sort the result only if the values in ``self`` and ``other`` + are not identical. +3. The value ``sort=True`` is no longer allowed. A future version of pandas + will properly support ``sort=True`` meaning "always sort". + +.. _whatsnew_0241.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :meth:`DataFrame.to_dict` with ``records`` orient raising an + ``AttributeError`` when the ``DataFrame`` contained more than 255 columns, or + wrongly converting column names that were not valid python identifiers (:issue:`24939`, :issue:`24940`). +- Fixed regression in :func:`read_sql` when passing certain queries with MySQL/pymysql (:issue:`24988`). +- Fixed regression in :class:`Index.intersection` incorrectly sorting the values by default (:issue:`24959`). +- Fixed regression in :func:`merge` when merging an empty ``DataFrame`` with multiple timezone-aware columns on one of the timezone-aware columns (:issue:`25014`). +- Fixed regression in :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` where passing ``None`` failed to remove the axis name (:issue:`25034`) +- Fixed regression in :func:`to_timedelta` with `box=False` incorrectly returning a ``datetime64`` object instead of a ``timedelta64`` object (:issue:`24961`) +- Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` (:issue:`24969`) + +.. _whatsnew_0241.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Reshaping** + +- Bug in :meth:`DataFrame.groupby` with :class:`Grouper` when there is a time change (DST) and grouping frequency is ``'1d'`` (:issue:`24972`) + +**Visualization** + +- Fixed the warning for implicitly registered matplotlib converters not showing. See :ref:`whatsnew_0211.converters` for more (:issue:`24963`). + +**Other** + +- Fixed AttributeError when printing a DataFrame's HTML repr after accessing the IPython config object (:issue:`25036`) + +.. _whatsnew_0.241.contributors: + +Contributors +~~~~~~~~~~~~ + +.. Including the contributors hardcoded for this release, as backporting with + MeeseeksDev loses the commit authors + +A total of 7 people contributed patches to this release. People with a "+" by their names contributed a patch for the first time. + +* Alex Buchkovsky +* Roman Yurchak +* h-vetinari +* jbrockmendel +* Jeremy Schendel +* Joris Van den Bossche +* Tom Augspurger diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst new file mode 100644 index 00000000..d1a893f9 --- /dev/null +++ b/doc/source/whatsnew/v0.24.2.rst @@ -0,0 +1,108 @@ +.. _whatsnew_0242: + +Whats new in 0.24.2 (March 12, 2019) +------------------------------------ + +.. warning:: + + The 0.24.x series of releases will be the last to support Python 2. Future feature + releases will support Python 3 only. See `Dropping Python 2.7 `_ for more. + +{{ header }} + +These are the changes in pandas 0.24.2. See :ref:`release` for a full changelog +including other versions of pandas. + +.. _whatsnew_0242.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :meth:`DataFrame.all` and :meth:`DataFrame.any` where ``bool_only=True`` was ignored (:issue:`25101`) +- Fixed issue in ``DataFrame`` construction with passing a mixed list of mixed types could segfault. (:issue:`25075`) +- Fixed regression in :meth:`DataFrame.apply` causing ``RecursionError`` when ``dict``-like classes were passed as argument. (:issue:`25196`) +- Fixed regression in :meth:`DataFrame.replace` where ``regex=True`` was only replacing patterns matching the start of the string (:issue:`25259`) +- Fixed regression in :meth:`DataFrame.duplicated()`, where empty dataframe was not returning a boolean dtyped Series. (:issue:`25184`) +- Fixed regression in :meth:`Series.min` and :meth:`Series.max` where ``numeric_only=True`` was ignored when the ``Series`` contained ``Categorical`` data (:issue:`25299`) +- Fixed regression in subtraction between :class:`Series` objects with ``datetime64[ns]`` dtype incorrectly raising ``OverflowError`` when the ``Series`` on the right contains null values (:issue:`25317`) +- Fixed regression in :class:`TimedeltaIndex` where ``np.sum(index)`` incorrectly returned a zero-dimensional object instead of a scalar (:issue:`25282`) +- Fixed regression in ``IntervalDtype`` construction where passing an incorrect string with 'Interval' as a prefix could result in a ``RecursionError``. (:issue:`25338`) +- Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`) +- Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) +- Fixed regression in :func:`to_timedelta` losing precision when converting floating data to ``Timedelta`` data (:issue:`25077`). +- Fixed pip installing from source into an environment without NumPy (:issue:`25193`) +- Fixed regression in :meth:`DataFrame.replace` where large strings of numbers would be coerced into ``int64``, causing an ``OverflowError`` (:issue:`25616`) +- Fixed regression in :func:`factorize` when passing a custom ``na_sentinel`` value with ``sort=True`` (:issue:`25409`). +- Fixed regression in :meth:`DataFrame.to_csv` writing duplicate line endings with gzip compress (:issue:`25311`) + +.. _whatsnew_0242.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**I/O** + +- Better handling of terminal printing when the terminal dimensions are not known (:issue:`25080`) +- Bug in reading a HDF5 table-format ``DataFrame`` created in Python 2, in Python 3 (:issue:`24925`) +- Bug in reading a JSON with ``orient='table'`` generated by :meth:`DataFrame.to_json` with ``index=False`` (:issue:`25170`) +- Bug where float indexes could have misaligned values when printing (:issue:`25061`) + +**Categorical** + +- Bug where calling :meth:`Series.replace` on categorical data could return a ``Series`` with incorrect dimensions (:issue:`24971`) +- +- + +**Reshaping** + +- Bug in :meth:`~pandas.core.groupby.GroupBy.transform` where applying a function to a timezone aware column would return a timezone naive result (:issue:`24198`) +- Bug in :func:`DataFrame.join` when joining on a timezone aware :class:`DatetimeIndex` (:issue:`23931`) + +**Visualization** + +- Bug in :meth:`Series.plot` where a secondary y axis could not be set to log scale (:issue:`25545`) + +**Other** + +- Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`) +- Bug in :func:`merge` when merging an empty ``DataFrame`` with an ``Int64`` column or a non-empty ``DataFrame`` with an ``Int64`` column that is all ``NaN`` (:issue:`25183`) +- Bug in ``IntervalTree`` where a ``RecursionError`` occurs upon construction due to an overflow when adding endpoints, which also causes :class:`IntervalIndex` to crash during indexing operations (:issue:`25485`) +- Bug in :attr:`Series.size` raising for some extension-array-backed ``Series``, rather than returning the size (:issue:`25580`) +- Bug in resampling raising for nullable integer-dtype columns (:issue:`25580`) + +.. _whatsnew_0242.contributors: + +Contributors +~~~~~~~~~~~~ + +.. Including the contributors hardcoded for this release, as backporting with + MeeseeksDev loses the commit authors + +A total of 25 people contributed patches to this release. People with a "+" by their names contributed a patch for the first time. + +* Albert Villanova del Moral +* Arno Veenstra + +* chris-b1 +* Devin Petersohn + +* EternalLearner42 + +* Flavien Lambert + +* gfyoung +* Gioia Ballin +* jbrockmendel +* Jeff Reback +* Jeremy Schendel +* Johan von Forstner + +* Joris Van den Bossche +* Josh +* Justin Zheng +* Kendall Masse +* Matthew Roeschke +* Max Bolingbroke + +* rbenes + +* Sterling Paramore + +* Tao He + +* Thomas A Caswell +* Tom Augspurger +* Vibhu Agarwal + +* William Ayd +* Zach Angell diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst new file mode 100644 index 00000000..b18d0223 --- /dev/null +++ b/doc/source/whatsnew/v0.25.0.rst @@ -0,0 +1,1272 @@ +.. _whatsnew_0250: + +What's new in 0.25.0 (July 18, 2019) +------------------------------------ + +.. warning:: + + Starting with the 0.25.x series of releases, pandas only supports Python 3.5.3 and higher. + See `Dropping Python 2.7 `_ for more details. + +.. warning:: + + The minimum supported Python version will be bumped to 3.6 in a future release. + +.. warning:: + + `Panel` has been fully removed. For N-D labeled data structures, please + use `xarray `_ + +.. warning:: + + :func:`read_pickle` and :func:`read_msgpack` are only guaranteed backwards compatible back to + pandas version 0.20.3 (:issue:`27082`) + +{{ header }} + +These are the changes in pandas 0.25.0. See :ref:`release` for a full changelog +including other versions of pandas. + + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_0250.enhancements.agg_relabel: + +Groupby aggregation with relabeling +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas has added special groupby behavior, known as "named aggregation", for naming the +output columns when applying multiple aggregation functions to specific columns (:issue:`18366`, :issue:`26512`). + +.. ipython:: python + + animals = pd.DataFrame({'kind': ['cat', 'dog', 'cat', 'dog'], + 'height': [9.1, 6.0, 9.5, 34.0], + 'weight': [7.9, 7.5, 9.9, 198.0]}) + animals + animals.groupby("kind").agg( + min_height=pd.NamedAgg(column='height', aggfunc='min'), + max_height=pd.NamedAgg(column='height', aggfunc='max'), + average_weight=pd.NamedAgg(column='weight', aggfunc=np.mean), + ) + +Pass the desired columns names as the ``**kwargs`` to ``.agg``. The values of ``**kwargs`` +should be tuples where the first element is the column selection, and the second element is the +aggregation function to apply. Pandas provides the ``pandas.NamedAgg`` namedtuple to make it clearer +what the arguments to the function are, but plain tuples are accepted as well. + +.. ipython:: python + + animals.groupby("kind").agg( + min_height=('height', 'min'), + max_height=('height', 'max'), + average_weight=('weight', np.mean), + ) + +Named aggregation is the recommended replacement for the deprecated "dict-of-dicts" +approach to naming the output of column-specific aggregations (:ref:`whatsnew_0200.api_breaking.deprecate_group_agg_dict`). + +A similar approach is now available for Series groupby objects as well. Because there's no need for +column selection, the values can just be the functions to apply + +.. ipython:: python + + animals.groupby("kind").height.agg( + min_height="min", + max_height="max", + ) + + +This type of aggregation is the recommended alternative to the deprecated behavior when passing +a dict to a Series groupby aggregation (:ref:`whatsnew_0200.api_breaking.deprecate_group_agg_dict`). + +See :ref:`groupby.aggregate.named` for more. + +.. _whatsnew_0250.enhancements.multiple_lambdas: + +Groupby Aggregation with multiple lambdas +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can now provide multiple lambda functions to a list-like aggregation in +:class:`pandas.core.groupby.GroupBy.agg` (:issue:`26430`). + +.. ipython:: python + + animals.groupby('kind').height.agg([ + lambda x: x.iloc[0], lambda x: x.iloc[-1] + ]) + + animals.groupby('kind').agg([ + lambda x: x.iloc[0] - x.iloc[1], + lambda x: x.iloc[0] + x.iloc[1] + ]) + +Previously, these raised a ``SpecificationError``. + +.. _whatsnew_0250.enhancements.multi_index_repr: + +Better repr for MultiIndex +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Printing of :class:`MultiIndex` instances now shows tuples of each row and ensures +that the tuple items are vertically aligned, so it's now easier to understand +the structure of the ``MultiIndex``. (:issue:`13480`): + +The repr now looks like this: + +.. ipython:: python + + pd.MultiIndex.from_product([['a', 'abc'], range(500)]) + +Previously, outputting a :class:`MultiIndex` printed all the ``levels`` and +``codes`` of the ``MultiIndex``, which was visually unappealing and made +the output more difficult to navigate. For example (limiting the range to 5): + +.. code-block:: ipython + + In [1]: pd.MultiIndex.from_product([['a', 'abc'], range(5)]) + Out[1]: MultiIndex(levels=[['a', 'abc'], [0, 1, 2, 3]], + ...: codes=[[0, 0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 3, 0, 1, 2, 3]]) + +In the new repr, all values will be shown, if the number of rows is smaller +than :attr:`options.display.max_seq_items` (default: 100 items). Horizontally, +the output will truncate, if it's wider than :attr:`options.display.width` +(default: 80 characters). + +.. _whatsnew_0250.enhancements.shorter_truncated_repr: + +Shorter truncated repr for Series and DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Currently, the default display options of pandas ensure that when a Series +or DataFrame has more than 60 rows, its repr gets truncated to this maximum +of 60 rows (the ``display.max_rows`` option). However, this still gives +a repr that takes up a large part of the vertical screen estate. Therefore, +a new option ``display.min_rows`` is introduced with a default of 10 which +determines the number of rows showed in the truncated repr: + +- For small Series or DataFrames, up to ``max_rows`` number of rows is shown + (default: 60). +- For larger Series of DataFrame with a length above ``max_rows``, only + ``min_rows`` number of rows is shown (default: 10, i.e. the first and last + 5 rows). + +This dual option allows to still see the full content of relatively small +objects (e.g. ``df.head(20)`` shows all 20 rows), while giving a brief repr +for large objects. + +To restore the previous behaviour of a single threshold, set +``pd.options.display.min_rows = None``. + +.. _whatsnew_0250.enhancements.json_normalize_with_max_level: + +Json normalize with max_level param support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`json_normalize` normalizes the provided input dict to all +nested levels. The new max_level parameter provides more control over +which level to end normalization (:issue:`23843`): + +The repr now looks like this: + +.. code-block:: ipython + + from pandas.io.json import json_normalize + data = [{ + 'CreatedBy': {'Name': 'User001'}, + 'Lookup': {'TextField': 'Some text', + 'UserField': {'Id': 'ID001', 'Name': 'Name001'}}, + 'Image': {'a': 'b'} + }] + json_normalize(data, max_level=1) + + +.. _whatsnew_0250.enhancements.explode: + +Series.explode to split list-like values to rows +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`Series` and :class:`DataFrame` have gained the :meth:`DataFrame.explode` methods to transform list-likes to individual rows. See :ref:`section on Exploding list-like column ` in docs for more information (:issue:`16538`, :issue:`10511`) + + +Here is a typical usecase. You have comma separated string in a column. + +.. ipython:: python + + df = pd.DataFrame([{'var1': 'a,b,c', 'var2': 1}, + {'var1': 'd,e,f', 'var2': 2}]) + df + +Creating a long form ``DataFrame`` is now straightforward using chained operations + +.. ipython:: python + + df.assign(var1=df.var1.str.split(',')).explode('var1') + +.. _whatsnew_0250.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ +- :func:`DataFrame.plot` keywords ``logy``, ``logx`` and ``loglog`` can now accept the value ``'sym'`` for symlog scaling. (:issue:`24867`) +- Added support for ISO week year format ('%G-%V-%u') when parsing datetimes using :meth:`to_datetime` (:issue:`16607`) +- Indexing of ``DataFrame`` and ``Series`` now accepts zerodim ``np.ndarray`` (:issue:`24919`) +- :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`) +- :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :class:`datetime.time` objects with timezones (:issue:`24043`) +- :meth:`DataFrame.pivot_table` now accepts an ``observed`` parameter which is passed to underlying calls to :meth:`DataFrame.groupby` to speed up grouping categorical data. (:issue:`24923`) +- ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`) +- :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`) +- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behavior of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`) +- :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a monotonically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`) +- :meth:`TimedeltaIndex.intersection` now also supports the ``sort`` keyword (:issue:`24471`) +- :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`) +- Added :ref:`api.frame.sparse` for working with a ``DataFrame`` whose values are sparse (:issue:`25681`) +- :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`) +- :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`) +- :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`) +- :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`) +- :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`) +- Error message for missing required imports now includes the original import error's text (:issue:`23868`) +- :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a ``mean`` method (:issue:`24757`) +- :meth:`DataFrame.describe` now formats integer percentiles without decimal point (:issue:`26660`) +- Added support for reading SPSS .sav files using :func:`read_spss` (:issue:`26537`) +- Added new option ``plotting.backend`` to be able to select a plotting backend different than the existing ``matplotlib`` one. Use ``pandas.set_option('plotting.backend', '')`` where ``` for more details (:issue:`9070`) +- :class:`Interval`, :class:`IntervalIndex`, and :class:`~arrays.IntervalArray` have gained an :attr:`~Interval.is_empty` attribute denoting if the given interval(s) are empty (:issue:`27219`) + +.. _whatsnew_0250.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0250.api_breaking.utc_offset_indexing: + + +Indexing with date strings with UTC offsets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Indexing a :class:`DataFrame` or :class:`Series` with a :class:`DatetimeIndex` with a +date string with a UTC offset would previously ignore the UTC offset. Now, the UTC offset +is respected in indexing. (:issue:`24076`, :issue:`16785`) + +.. ipython:: python + + df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) + df + +*Previous behavior*: + +.. code-block:: ipython + + In [3]: df['2019-01-01 00:00:00+04:00':'2019-01-01 01:00:00+04:00'] + Out[3]: + 0 + 2019-01-01 00:00:00-08:00 0 + +*New behavior*: + +.. ipython:: python + + df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00'] + + +.. _whatsnew_0250.api_breaking.multi_indexing: + + +``MultiIndex`` constructed from levels and codes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Constructing a :class:`MultiIndex` with ``NaN`` levels or codes value < -1 was allowed previously. +Now, construction with codes value < -1 is not allowed and ``NaN`` levels' corresponding codes +would be reassigned as -1. (:issue:`19387`) + +*Previous behavior*: + +.. code-block:: ipython + + In [1]: pd.MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]], + ...: codes=[[0, -1, 1, 2, 3, 4]]) + ...: + Out[1]: MultiIndex(levels=[[nan, None, NaT, 128, 2]], + codes=[[0, -1, 1, 2, 3, 4]]) + + In [2]: pd.MultiIndex(levels=[[1, 2]], codes=[[0, -2]]) + Out[2]: MultiIndex(levels=[[1, 2]], + codes=[[0, -2]]) + +*New behavior*: + +.. ipython:: python + :okexcept: + + pd.MultiIndex(levels=[[np.nan, None, pd.NaT, 128, 2]], + codes=[[0, -1, 1, 2, 3, 4]]) + pd.MultiIndex(levels=[[1, 2]], codes=[[0, -2]]) + + +.. _whatsnew_0250.api_breaking.groupby_apply_first_group_once: + +``Groupby.apply`` on ``DataFrame`` evaluates first group only once +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The implementation of :meth:`DataFrameGroupBy.apply() ` +previously evaluated the supplied function consistently twice on the first group +to infer if it is safe to use a fast code path. Particularly for functions with +side effects, this was an undesired behavior and may have led to surprises. (:issue:`2936`, :issue:`2656`, :issue:`7739`, :issue:`10519`, :issue:`12155`, :issue:`20084`, :issue:`21417`) + +Now every group is evaluated only a single time. + +.. ipython:: python + + df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]}) + df + + def func(group): + print(group.name) + return group + +*Previous behavior*: + +.. code-block:: python + + In [3]: df.groupby('a').apply(func) + x + x + y + Out[3]: + a b + 0 x 1 + 1 y 2 + +*New behavior*: + +.. ipython:: python + + df.groupby("a").apply(func) + + +Concatenating sparse values +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When passed DataFrames whose values are sparse, :func:`concat` will now return a +:class:`Series` or :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (:issue:`25702`). + +.. ipython:: python + :okwarning: + + df = pd.DataFrame({"A": pd.SparseArray([0, 1])}) + +*Previous behavior*: + +.. code-block:: ipython + + In [2]: type(pd.concat([df, df])) + pandas.core.sparse.frame.SparseDataFrame + +*New behavior*: + +.. ipython:: python + + type(pd.concat([df, df])) + + +This now matches the existing behavior of :class:`concat` on ``Series`` with sparse values. +:func:`concat` will continue to return a ``SparseDataFrame`` when all the values +are instances of ``SparseDataFrame``. + +This change also affects routines using :func:`concat` internally, like :func:`get_dummies`, +which now returns a :class:`DataFrame` in all cases (previously a ``SparseDataFrame`` was +returned if all the columns were dummy encoded, and a :class:`DataFrame` otherwise). + +Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will +cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before. + +The ``.str``-accessor performs stricter type checks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Due to the lack of more fine-grained dtypes, :attr:`Series.str` so far only checked whether the data was +of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* the Series; in particular, +``'bytes'``-only data will raise an exception (except for :meth:`Series.str.decode`, :meth:`Series.str.get`, +:meth:`Series.str.len`, :meth:`Series.str.slice`), see :issue:`23163`, :issue:`23011`, :issue:`23551`. + +*Previous behavior*: + +.. code-block:: python + + In [1]: s = pd.Series(np.array(['a', 'ba', 'cba'], 'S'), dtype=object) + + In [2]: s + Out[2]: + 0 b'a' + 1 b'ba' + 2 b'cba' + dtype: object + + In [3]: s.str.startswith(b'a') + Out[3]: + 0 True + 1 False + 2 False + dtype: bool + +*New behavior*: + +.. ipython:: python + :okexcept: + + s = pd.Series(np.array(['a', 'ba', 'cba'], 'S'), dtype=object) + s + s.str.startswith(b'a') + +.. _whatsnew_0250.api_breaking.groupby_categorical: + +Categorical dtypes are preserved during groupby +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, columns that were categorical, but not the groupby key(s) would be converted to ``object`` dtype during groupby operations. Pandas now will preserve these dtypes. (:issue:`18502`) + +.. ipython:: python + + cat = pd.Categorical(["foo", "bar", "bar", "qux"], ordered=True) + df = pd.DataFrame({'payload': [-1, -2, -1, -2], 'col': cat}) + df + df.dtypes + +*Previous Behavior*: + +.. code-block:: python + + In [5]: df.groupby('payload').first().col.dtype + Out[5]: dtype('O') + +*New Behavior*: + +.. ipython:: python + + df.groupby('payload').first().col.dtype + + +.. _whatsnew_0250.api_breaking.incompatible_index_unions: + +Incompatible Index type unions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When performing :func:`Index.union` operations between objects of incompatible dtypes, +the result will be a base :class:`Index` of dtype ``object``. This behavior holds true for +unions between :class:`Index` objects that previously would have been prohibited. The dtype +of empty :class:`Index` objects will now be evaluated before performing union operations +rather than simply returning the other :class:`Index` object. :func:`Index.union` can now be +considered commutative, such that ``A.union(B) == B.union(A)`` (:issue:`23525`). + +*Previous behavior*: + +.. code-block:: python + + In [1]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) + ... + ValueError: can only call with other PeriodIndex-ed objects + + In [2]: pd.Index([], dtype=object).union(pd.Index([1, 2, 3])) + Out[2]: Int64Index([1, 2, 3], dtype='int64') + +*New behavior*: + +.. ipython:: python + + pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3])) + pd.Index([], dtype=object).union(pd.Index([1, 2, 3])) + +Note that integer- and floating-dtype indexes are considered "compatible". The integer +values are coerced to floating point, which may result in loss of precision. See +:ref:`indexing.set_ops` for more. + + +``DataFrame`` groupby ffill/bfill no longer return group labels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The methods ``ffill``, ``bfill``, ``pad`` and ``backfill`` of +:class:`DataFrameGroupBy ` +previously included the group labels in the return value, which was +inconsistent with other groupby transforms. Now only the filled values +are returned. (:issue:`21521`) + +.. ipython:: python + + df = pd.DataFrame({"a": ["x", "y"], "b": [1, 2]}) + df + +*Previous behavior*: + +.. code-block:: python + + In [3]: df.groupby("a").ffill() + Out[3]: + a b + 0 x 1 + 1 y 2 + +*New behavior*: + +.. ipython:: python + + df.groupby("a").ffill() + +``DataFrame`` describe on an empty categorical / object column will return top and freq +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When calling :meth:`DataFrame.describe` with an empty categorical / object +column, the 'top' and 'freq' columns were previously omitted, which was inconsistent with +the output for non-empty columns. Now the 'top' and 'freq' columns will always be included, +with :attr:`numpy.nan` in the case of an empty :class:`DataFrame` (:issue:`26397`) + +.. ipython:: python + + df = pd.DataFrame({"empty_col": pd.Categorical([])}) + df + +*Previous behavior*: + +.. code-block:: python + + In [3]: df.describe() + Out[3]: + empty_col + count 0 + unique 0 + +*New behavior*: + +.. ipython:: python + + df.describe() + +``__str__`` methods now call ``__repr__`` rather than vice versa +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas has until now mostly defined string representations in a Pandas objects's +``__str__``/``__unicode__``/``__bytes__`` methods, and called ``__str__`` from the ``__repr__`` +method, if a specific ``__repr__`` method is not found. This is not needed for Python3. +In Pandas 0.25, the string representations of Pandas objects are now generally +defined in ``__repr__``, and calls to ``__str__`` in general now pass the call on to +the ``__repr__``, if a specific ``__str__`` method doesn't exist, as is standard for Python. +This change is backward compatible for direct usage of Pandas, but if you subclass +Pandas objects *and* give your subclasses specific ``__str__``/``__repr__`` methods, +you may have to adjust your ``__str__``/``__repr__`` methods (:issue:`26495`). + +.. _whatsnew_0250.api_breaking.interval_indexing: + + +Indexing an ``IntervalIndex`` with ``Interval`` objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Indexing methods for :class:`IntervalIndex` have been modified to require exact matches only for :class:`Interval` queries. +``IntervalIndex`` methods previously matched on any overlapping ``Interval``. Behavior with scalar points, e.g. querying +with an integer, is unchanged (:issue:`16316`). + +.. ipython:: python + + ii = pd.IntervalIndex.from_tuples([(0, 4), (1, 5), (5, 8)]) + ii + +The ``in`` operator (``__contains__``) now only returns ``True`` for exact matches to ``Intervals`` in the ``IntervalIndex``, whereas +this would previously return ``True`` for any ``Interval`` overlapping an ``Interval`` in the ``IntervalIndex``. + +*Previous behavior*: + +.. code-block:: python + + In [4]: pd.Interval(1, 2, closed='neither') in ii + Out[4]: True + + In [5]: pd.Interval(-10, 10, closed='both') in ii + Out[5]: True + +*New behavior*: + +.. ipython:: python + + pd.Interval(1, 2, closed='neither') in ii + pd.Interval(-10, 10, closed='both') in ii + +The :meth:`~IntervalIndex.get_loc` method now only returns locations for exact matches to ``Interval`` queries, as opposed to the previous behavior of +returning locations for overlapping matches. A ``KeyError`` will be raised if an exact match is not found. + +*Previous behavior*: + +.. code-block:: python + + In [6]: ii.get_loc(pd.Interval(1, 5)) + Out[6]: array([0, 1]) + + In [7]: ii.get_loc(pd.Interval(2, 6)) + Out[7]: array([0, 1, 2]) + +*New behavior*: + +.. code-block:: python + + In [6]: ii.get_loc(pd.Interval(1, 5)) + Out[6]: 1 + + In [7]: ii.get_loc(pd.Interval(2, 6)) + --------------------------------------------------------------------------- + KeyError: Interval(2, 6, closed='right') + +Likewise, :meth:`~IntervalIndex.get_indexer` and :meth:`~IntervalIndex.get_indexer_non_unique` will also only return locations for exact matches +to ``Interval`` queries, with ``-1`` denoting that an exact match was not found. + +These indexing changes extend to querying a :class:`Series` or :class:`DataFrame` with an ``IntervalIndex`` index. + +.. ipython:: python + + s = pd.Series(list('abc'), index=ii) + s + +Selecting from a ``Series`` or ``DataFrame`` using ``[]`` (``__getitem__``) or ``loc`` now only returns exact matches for ``Interval`` queries. + +*Previous behavior*: + +.. code-block:: python + + In [8]: s[pd.Interval(1, 5)] + Out[8]: + (0, 4] a + (1, 5] b + dtype: object + + In [9]: s.loc[pd.Interval(1, 5)] + Out[9]: + (0, 4] a + (1, 5] b + dtype: object + +*New behavior*: + +.. ipython:: python + + s[pd.Interval(1, 5)] + s.loc[pd.Interval(1, 5)] + +Similarly, a ``KeyError`` will be raised for non-exact matches instead of returning overlapping matches. + +*Previous behavior*: + +.. code-block:: python + + In [9]: s[pd.Interval(2, 3)] + Out[9]: + (0, 4] a + (1, 5] b + dtype: object + + In [10]: s.loc[pd.Interval(2, 3)] + Out[10]: + (0, 4] a + (1, 5] b + dtype: object + +*New behavior*: + +.. code-block:: python + + In [6]: s[pd.Interval(2, 3)] + --------------------------------------------------------------------------- + KeyError: Interval(2, 3, closed='right') + + In [7]: s.loc[pd.Interval(2, 3)] + --------------------------------------------------------------------------- + KeyError: Interval(2, 3, closed='right') + +The :meth:`~IntervalIndex.overlaps` method can be used to create a boolean indexer that replicates the +previous behavior of returning overlapping matches. + +*New behavior*: + +.. ipython:: python + + idxr = s.index.overlaps(pd.Interval(2, 3)) + idxr + s[idxr] + s.loc[idxr] + + +.. _whatsnew_0250.api_breaking.ufunc: + +Binary ufuncs on Series now align +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Applying a binary ufunc like :func:`numpy.power` now aligns the inputs +when both are :class:`Series` (:issue:`23293`). + +.. ipython:: python + + s1 = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + s2 = pd.Series([3, 4, 5], index=['d', 'c', 'b']) + s1 + s2 + +*Previous behavior* + +.. code-block:: ipython + + In [5]: np.power(s1, s2) + Out[5]: + a 1 + b 16 + c 243 + dtype: int64 + +*New behavior* + +.. ipython:: python + + np.power(s1, s2) + +This matches the behavior of other binary operations in pandas, like :meth:`Series.add`. +To retain the previous behavior, convert the other ``Series`` to an array before +applying the ufunc. + +.. ipython:: python + + np.power(s1, s2.array) + +Categorical.argsort now places missing values at the end +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`Categorical.argsort` now places missing values at the end of the array, making it +consistent with NumPy and the rest of pandas (:issue:`21801`). + +.. ipython:: python + + cat = pd.Categorical(['b', None, 'a'], categories=['a', 'b'], ordered=True) + +*Previous behavior* + +.. code-block:: ipython + + In [2]: cat = pd.Categorical(['b', None, 'a'], categories=['a', 'b'], ordered=True) + + In [3]: cat.argsort() + Out[3]: array([1, 2, 0]) + + In [4]: cat[cat.argsort()] + Out[4]: + [NaN, a, b] + categories (2, object): [a < b] + +*New behavior* + +.. ipython:: python + + cat.argsort() + cat[cat.argsort()] + +.. _whatsnew_0250.api_breaking.list_of_dict: + +Column order is preserved when passing a list of dicts to DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Starting with Python 3.7 the key-order of ``dict`` is `guaranteed `_. In practice, this has been true since +Python 3.6. The :class:`DataFrame` constructor now treats a list of dicts in the same way as +it does a list of ``OrderedDict``, i.e. preserving the order of the dicts. +This change applies only when pandas is running on Python>=3.6 (:issue:`27309`). + +.. ipython:: python + + data = [ + {'name': 'Joe', 'state': 'NY', 'age': 18}, + {'name': 'Jane', 'state': 'KY', 'age': 19, 'hobby': 'Minecraft'}, + {'name': 'Jean', 'state': 'OK', 'age': 20, 'finances': 'good'} + ] + +*Previous Behavior*: + +The columns were lexicographically sorted previously, + +.. code-block:: python + + In [1]: pd.DataFrame(data) + Out[1]: + age finances hobby name state + 0 18 NaN NaN Joe NY + 1 19 NaN Minecraft Jane KY + 2 20 good NaN Jean OK + +*New Behavior*: + +The column order now matches the insertion-order of the keys in the ``dict``, +considering all the records from top to bottom. As a consequence, the column +order of the resulting DataFrame has changed compared to previous pandas versions. + +.. ipython:: python + + pd.DataFrame(data) + +.. _whatsnew_0250.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Due to dropping support for Python 2.7, a number of optional dependencies have updated minimum versions (:issue:`25725`, :issue:`24942`, :issue:`25752`). +Independently, some minimum supported versions of dependencies were updated (:issue:`23519`, :issue:`25554`). +If installed, we now require: + ++-----------------+-----------------+----------+ +| Package | Minimum Version | Required | ++=================+=================+==========+ +| numpy | 1.13.3 | X | ++-----------------+-----------------+----------+ +| pytz | 2015.4 | X | ++-----------------+-----------------+----------+ +| python-dateutil | 2.6.1 | X | ++-----------------+-----------------+----------+ +| bottleneck | 1.2.1 | | ++-----------------+-----------------+----------+ +| numexpr | 2.6.2 | | ++-----------------+-----------------+----------+ +| pytest (dev) | 4.0.2 | | ++-----------------+-----------------+----------+ + +For `optional libraries `_ the general recommendation is to use the latest version. +The following table lists the lowest version per library that is currently being tested throughout the development of pandas. +Optional libraries below the lowest tested version may still work, but are not considered supported. + ++-----------------+-----------------+ +| Package | Minimum Version | ++=================+=================+ +| beautifulsoup4 | 4.6.0 | ++-----------------+-----------------+ +| fastparquet | 0.2.1 | ++-----------------+-----------------+ +| gcsfs | 0.2.2 | ++-----------------+-----------------+ +| lxml | 3.8.0 | ++-----------------+-----------------+ +| matplotlib | 2.2.2 | ++-----------------+-----------------+ +| openpyxl | 2.4.8 | ++-----------------+-----------------+ +| pyarrow | 0.9.0 | ++-----------------+-----------------+ +| pymysql | 0.7.1 | ++-----------------+-----------------+ +| pytables | 3.4.2 | ++-----------------+-----------------+ +| scipy | 0.19.0 | ++-----------------+-----------------+ +| sqlalchemy | 1.1.4 | ++-----------------+-----------------+ +| xarray | 0.8.2 | ++-----------------+-----------------+ +| xlrd | 1.1.0 | ++-----------------+-----------------+ +| xlsxwriter | 0.9.8 | ++-----------------+-----------------+ +| xlwt | 1.2.0 | ++-----------------+-----------------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +.. _whatsnew_0250.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- :class:`DatetimeTZDtype` will now standardize pytz timezones to a common timezone instance (:issue:`24713`) +- :class:`Timestamp` and :class:`Timedelta` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`) +- :meth:`Timestamp.strptime` will now rise a ``NotImplementedError`` (:issue:`25016`) +- Comparing :class:`Timestamp` with unsupported objects now returns :py:obj:`NotImplemented` instead of raising ``TypeError``. This implies that unsupported rich comparisons are delegated to the other object, and are now consistent with Python 3 behavior for ``datetime`` objects (:issue:`24011`) +- Bug in :meth:`DatetimeIndex.snap` which didn't preserving the ``name`` of the input :class:`Index` (:issue:`25575`) +- The ``arg`` argument in :meth:`pandas.core.groupby.DataFrameGroupBy.agg` has been renamed to ``func`` (:issue:`26089`) +- The ``arg`` argument in :meth:`pandas.core.window._Window.aggregate` has been renamed to ``func`` (:issue:`26372`) +- Most Pandas classes had a ``__bytes__`` method, which was used for getting a python2-style bytestring representation of the object. This method has been removed as a part of dropping Python2 (:issue:`26447`) +- The ``.str``-accessor has been disabled for 1-level :class:`MultiIndex`, use :meth:`MultiIndex.to_flat_index` if necessary (:issue:`23679`) +- Removed support of gtk package for clipboards (:issue:`26563`) +- Using an unsupported version of Beautiful Soup 4 will now raise an ``ImportError`` instead of a ``ValueError`` (:issue:`27063`) +- :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` will now raise a ``ValueError`` when saving timezone aware data. (:issue:`27008`, :issue:`7056`) +- :meth:`ExtensionArray.argsort` places NA values at the end of the sorted array. (:issue:`21801`) +- :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` will now raise a ``NotImplementedError`` when saving a :class:`MultiIndex` with extension data types for a ``fixed`` format. (:issue:`7775`) +- Passing duplicate ``names`` in :meth:`read_csv` will now raise a ``ValueError`` (:issue:`17346`) + +.. _whatsnew_0250.deprecations: + +Deprecations +~~~~~~~~~~~~ + +Sparse subclasses +^^^^^^^^^^^^^^^^^ + +The ``SparseSeries`` and ``SparseDataFrame`` subclasses are deprecated. Their functionality is better-provided +by a ``Series`` or ``DataFrame`` with sparse values. + +**Previous way** + +.. code-block:: python + + df = pd.SparseDataFrame({"A": [0, 0, 1, 2]}) + df.dtypes + +**New way** + +.. ipython:: python + :okwarning: + + df = pd.DataFrame({"A": pd.SparseArray([0, 0, 1, 2])}) + df.dtypes + +The memory usage of the two approaches is identical. See :ref:`sparse.migration` for more (:issue:`19239`). + +msgpack format +^^^^^^^^^^^^^^ + +The msgpack format is deprecated as of 0.25 and will be removed in a future version. It is recommended to use pyarrow for on-the-wire transmission of pandas objects. (:issue:`27084`) + + +Other deprecations +^^^^^^^^^^^^^^^^^^ + +- The deprecated ``.ix[]`` indexer now raises a more visible ``FutureWarning`` instead of ``DeprecationWarning`` (:issue:`26438`). +- Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`) +- :meth:`pandas.concat` has deprecated the ``join_axes``-keyword. Instead, use :meth:`DataFrame.reindex` or :meth:`DataFrame.reindex_like` on the result or on the inputs (:issue:`21951`) +- The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or + the :meth:`SparseArray.to_dense` method instead (:issue:`26421`). +- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`) +- The :meth:`DataFrame.compound` and :meth:`Series.compound` methods are deprecated and will be removed in a future version (:issue:`26405`). +- The internal attributes ``_start``, ``_stop`` and ``_step`` attributes of :class:`RangeIndex` have been deprecated. + Use the public attributes :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop` and :attr:`~RangeIndex.step` instead (:issue:`26581`). +- The :meth:`Series.ftype`, :meth:`Series.ftypes` and :meth:`DataFrame.ftypes` methods are deprecated and will be removed in a future version. + Instead, use :meth:`Series.dtype` and :meth:`DataFrame.dtypes` (:issue:`26705`). +- The :meth:`Series.get_values`, :meth:`DataFrame.get_values`, :meth:`Index.get_values`, + :meth:`SparseArray.get_values` and :meth:`Categorical.get_values` methods are deprecated. + One of ``np.asarray(..)`` or :meth:`~Series.to_numpy` can be used instead (:issue:`19617`). +- The 'outer' method on NumPy ufuncs, e.g. ``np.subtract.outer`` has been deprecated on :class:`Series` objects. Convert the input to an array with :attr:`Series.array` first (:issue:`27186`) +- :meth:`Timedelta.resolution` is deprecated and replaced with :meth:`Timedelta.resolution_string`. In a future version, :meth:`Timedelta.resolution` will be changed to behave like the standard library :attr:`datetime.timedelta.resolution` (:issue:`21344`) +- :func:`read_table` has been undeprecated. (:issue:`25220`) +- :attr:`Index.dtype_str` is deprecated. (:issue:`18262`) +- :attr:`Series.imag` and :attr:`Series.real` are deprecated. (:issue:`18262`) +- :meth:`Series.put` is deprecated. (:issue:`18262`) +- :meth:`Index.item` and :meth:`Series.item` is deprecated. (:issue:`18262`) +- The default value ``ordered=None`` in :class:`~pandas.api.types.CategoricalDtype` has been deprecated in favor of ``ordered=False``. When converting between categorical types ``ordered=True`` must be explicitly passed in order to be preserved. (:issue:`26336`) +- :meth:`Index.contains` is deprecated. Use ``key in index`` (``__contains__``) instead (:issue:`17753`). +- :meth:`DataFrame.get_dtype_counts` is deprecated. (:issue:`18262`) +- :meth:`Categorical.ravel` will return a :class:`Categorical` instead of a ``np.ndarray`` (:issue:`27199`) + + +.. _whatsnew_0250.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- Removed ``Panel`` (:issue:`25047`, :issue:`25191`, :issue:`25231`) +- Removed the previously deprecated ``sheetname`` keyword in :func:`read_excel` (:issue:`16442`, :issue:`20938`) +- Removed the previously deprecated ``TimeGrouper`` (:issue:`16942`) +- Removed the previously deprecated ``parse_cols`` keyword in :func:`read_excel` (:issue:`16488`) +- Removed the previously deprecated ``pd.options.html.border`` (:issue:`16970`) +- Removed the previously deprecated ``convert_objects`` (:issue:`11221`) +- Removed the previously deprecated ``select`` method of ``DataFrame`` and ``Series`` (:issue:`17633`) +- Removed the previously deprecated behavior of :class:`Series` treated as list-like in :meth:`~Series.cat.rename_categories` (:issue:`17982`) +- Removed the previously deprecated ``DataFrame.reindex_axis`` and ``Series.reindex_axis`` (:issue:`17842`) +- Removed the previously deprecated behavior of altering column or index labels with :meth:`Series.rename_axis` or :meth:`DataFrame.rename_axis` (:issue:`17842`) +- Removed the previously deprecated ``tupleize_cols`` keyword argument in :meth:`read_html`, :meth:`read_csv`, and :meth:`DataFrame.to_csv` (:issue:`17877`, :issue:`17820`) +- Removed the previously deprecated ``DataFrame.from.csv`` and ``Series.from_csv`` (:issue:`17812`) +- Removed the previously deprecated ``raise_on_error`` keyword argument in :meth:`DataFrame.where` and :meth:`DataFrame.mask` (:issue:`17744`) +- Removed the previously deprecated ``ordered`` and ``categories`` keyword arguments in ``astype`` (:issue:`17742`) +- Removed the previously deprecated ``cdate_range`` (:issue:`17691`) +- Removed the previously deprecated ``True`` option for the ``dropna`` keyword argument in :func:`SeriesGroupBy.nth` (:issue:`17493`) +- Removed the previously deprecated ``convert`` keyword argument in :meth:`Series.take` and :meth:`DataFrame.take` (:issue:`17352`) +- Removed the previously deprecated behavior of arithmetic operations with ``datetime.date`` objects (:issue:`21152`) + +.. _whatsnew_0250.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Significant speedup in :class:`SparseArray` initialization that benefits most operations, fixing performance regression introduced in v0.20.0 (:issue:`24985`) +- :meth:`DataFrame.to_stata()` is now faster when outputting data with any string or non-native endian columns (:issue:`25045`) +- Improved performance of :meth:`Series.searchsorted`. The speedup is especially large when the dtype is + int8/int16/int32 and the searched key is within the integer bounds for the dtype (:issue:`22034`) +- Improved performance of :meth:`pandas.core.groupby.GroupBy.quantile` (:issue:`20405`) +- Improved performance of slicing and other selected operation on a :class:`RangeIndex` (:issue:`26565`, :issue:`26617`, :issue:`26722`) +- :class:`RangeIndex` now performs standard lookup without instantiating an actual hashtable, hence saving memory (:issue:`16685`) +- Improved performance of :meth:`read_csv` by faster tokenizing and faster parsing of small float numbers (:issue:`25784`) +- Improved performance of :meth:`read_csv` by faster parsing of N/A and boolean values (:issue:`25804`) +- Improved performance of :attr:`IntervalIndex.is_monotonic`, :attr:`IntervalIndex.is_monotonic_increasing` and :attr:`IntervalIndex.is_monotonic_decreasing` by removing conversion to :class:`MultiIndex` (:issue:`24813`) +- Improved performance of :meth:`DataFrame.to_csv` when writing datetime dtypes (:issue:`25708`) +- Improved performance of :meth:`read_csv` by much faster parsing of ``MM/YYYY`` and ``DD/MM/YYYY`` datetime formats (:issue:`25922`) +- Improved performance of nanops for dtypes that cannot store NaNs. Speedup is particularly prominent for :meth:`Series.all` and :meth:`Series.any` (:issue:`25070`) +- Improved performance of :meth:`Series.map` for dictionary mappers on categorical series by mapping the categories instead of mapping all values (:issue:`23785`) +- Improved performance of :meth:`IntervalIndex.intersection` (:issue:`24813`) +- Improved performance of :meth:`read_csv` by faster concatenating date columns without extra conversion to string for integer/float zero and float ``NaN``; by faster checking the string for the possibility of being a date (:issue:`25754`) +- Improved performance of :attr:`IntervalIndex.is_unique` by removing conversion to ``MultiIndex`` (:issue:`24813`) +- Restored performance of :meth:`DatetimeIndex.__iter__` by re-enabling specialized code path (:issue:`26702`) +- Improved performance when building :class:`MultiIndex` with at least one :class:`CategoricalIndex` level (:issue:`22044`) +- Improved performance by removing the need for a garbage collect when checking for ``SettingWithCopyWarning`` (:issue:`27031`) +- For :meth:`to_datetime` changed default value of cache parameter to ``True`` (:issue:`26043`) +- Improved performance of :class:`DatetimeIndex` and :class:`PeriodIndex` slicing given non-unique, monotonic data (:issue:`27136`). +- Improved performance of :meth:`pd.read_json` for index-oriented data. (:issue:`26773`) +- Improved performance of :meth:`MultiIndex.shape` (:issue:`27384`). + +.. _whatsnew_0250.bug_fixes: + +Bug fixes +~~~~~~~~~ + + +Categorical +^^^^^^^^^^^ + +- Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`) +- Fixed bug in comparison of ordered :class:`Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in ``True`` (:issue:`26504`) +- Bug in :meth:`DataFrame.dropna` when the :class:`DataFrame` has a :class:`CategoricalIndex` containing :class:`Interval` objects incorrectly raised a ``TypeError`` (:issue:`25087`) + +Datetimelike +^^^^^^^^^^^^ + +- Bug in :func:`to_datetime` which would raise an (incorrect) ``ValueError`` when called with a date far into the future and the ``format`` argument specified instead of raising ``OutOfBoundsDatetime`` (:issue:`23830`) +- Bug in :func:`to_datetime` which would raise ``InvalidIndexError: Reindexing only valid with uniquely valued Index objects`` when called with ``cache=True``, with ``arg`` including at least two different elements from the set ``{None, numpy.nan, pandas.NaT}`` (:issue:`22305`) +- Bug in :class:`DataFrame` and :class:`Series` where timezone aware data with ``dtype='datetime64[ns]`` was not cast to naive (:issue:`25843`) +- Improved :class:`Timestamp` type checking in various datetime functions to prevent exceptions when using a subclassed ``datetime`` (:issue:`25851`) +- Bug in :class:`Series` and :class:`DataFrame` repr where ``np.datetime64('NaT')`` and ``np.timedelta64('NaT')`` with ``dtype=object`` would be represented as ``NaN`` (:issue:`25445`) +- Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`) +- Bug in adding :class:`DateOffset` with nonzero month to :class:`DatetimeIndex` would raise ``ValueError`` (:issue:`26258`) +- Bug in :func:`to_datetime` which raises unhandled ``OverflowError`` when called with mix of invalid dates and ``NaN`` values with ``format='%Y%m%d'`` and ``error='coerce'`` (:issue:`25512`) +- Bug in :meth:`isin` for datetimelike indexes; :class:`DatetimeIndex`, :class:`TimedeltaIndex` and :class:`PeriodIndex` where the ``levels`` parameter was ignored. (:issue:`26675`) +- Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'`` +- Bug when comparing a :class:`PeriodIndex` against a zero-dimensional numpy array (:issue:`26689`) +- Bug in constructing a ``Series`` or ``DataFrame`` from a numpy ``datetime64`` array with a non-ns unit and out-of-bound timestamps generating rubbish data, which will now correctly raise an ``OutOfBoundsDatetime`` error (:issue:`26206`). +- Bug in :func:`date_range` with unnecessary ``OverflowError`` being raised for very large or very small dates (:issue:`26651`) +- Bug where adding :class:`Timestamp` to a ``np.timedelta64`` object would raise instead of returning a :class:`Timestamp` (:issue:`24775`) +- Bug where comparing a zero-dimensional numpy array containing a ``np.datetime64`` object to a :class:`Timestamp` would incorrect raise ``TypeError`` (:issue:`26916`) +- Bug in :func:`to_datetime` which would raise ``ValueError: Tz-aware datetime.datetime cannot be converted to datetime64 unless utc=True`` when called with ``cache=True``, with ``arg`` including datetime strings with different offset (:issue:`26097`) +- + +Timedelta +^^^^^^^^^ + +- Bug in :func:`TimedeltaIndex.intersection` where for non-monotonic indices in some cases an empty ``Index`` was returned when in fact an intersection existed (:issue:`25913`) +- Bug with comparisons between :class:`Timedelta` and ``NaT`` raising ``TypeError`` (:issue:`26039`) +- Bug when adding or subtracting a :class:`BusinessHour` to a :class:`Timestamp` with the resulting time landing in a following or prior day respectively (:issue:`26381`) +- Bug when comparing a :class:`TimedeltaIndex` against a zero-dimensional numpy array (:issue:`26689`) + +Timezones +^^^^^^^^^ + +- Bug in :func:`DatetimeIndex.to_frame` where timezone aware data would be converted to timezone naive data (:issue:`25809`) +- Bug in :func:`to_datetime` with ``utc=True`` and datetime strings that would apply previously parsed UTC offsets to subsequent arguments (:issue:`24992`) +- Bug in :func:`Timestamp.tz_localize` and :func:`Timestamp.tz_convert` does not propagate ``freq`` (:issue:`25241`) +- Bug in :func:`Series.at` where setting :class:`Timestamp` with timezone raises ``TypeError`` (:issue:`25506`) +- Bug in :func:`DataFrame.update` when updating with timezone aware data would return timezone naive data (:issue:`25807`) +- Bug in :func:`to_datetime` where an uninformative ``RuntimeError`` was raised when passing a naive :class:`Timestamp` with datetime strings with mixed UTC offsets (:issue:`25978`) +- Bug in :func:`to_datetime` with ``unit='ns'`` would drop timezone information from the parsed argument (:issue:`26168`) +- Bug in :func:`DataFrame.join` where joining a timezone aware index with a timezone aware column would result in a column of ``NaN`` (:issue:`26335`) +- Bug in :func:`date_range` where ambiguous or nonexistent start or end times were not handled by the ``ambiguous`` or ``nonexistent`` keywords respectively (:issue:`27088`) +- Bug in :meth:`DatetimeIndex.union` when combining a timezone aware and timezone unaware :class:`DatetimeIndex` (:issue:`21671`) +- Bug when applying a numpy reduction function (e.g. :meth:`numpy.minimum`) to a timezone aware :class:`Series` (:issue:`15552`) + +Numeric +^^^^^^^ + +- Bug in :meth:`to_numeric` in which large negative numbers were being improperly handled (:issue:`24910`) +- Bug in :meth:`to_numeric` in which numbers were being coerced to float, even though ``errors`` was not ``coerce`` (:issue:`24910`) +- Bug in :meth:`to_numeric` in which invalid values for ``errors`` were being allowed (:issue:`26466`) +- Bug in :class:`format` in which floating point complex numbers were not being formatted to proper display precision and trimming (:issue:`25514`) +- Bug in error messages in :meth:`DataFrame.corr` and :meth:`Series.corr`. Added the possibility of using a callable. (:issue:`25729`) +- Bug in :meth:`Series.divmod` and :meth:`Series.rdivmod` which would raise an (incorrect) ``ValueError`` rather than return a pair of :class:`Series` objects as result (:issue:`25557`) +- Raises a helpful exception when a non-numeric index is sent to :meth:`interpolate` with methods which require numeric index. (:issue:`21662`) +- Bug in :meth:`~pandas.eval` when comparing floats with scalar operators, for example: ``x < -0.1`` (:issue:`25928`) +- Fixed bug where casting all-boolean array to integer extension array failed (:issue:`25211`) +- Bug in ``divmod`` with a :class:`Series` object containing zeros incorrectly raising ``AttributeError`` (:issue:`26987`) +- Inconsistency in :class:`Series` floor-division (`//`) and ``divmod`` filling positive//zero with ``NaN`` instead of ``Inf`` (:issue:`27321`) +- + +Conversion +^^^^^^^^^^ + +- Bug in :func:`DataFrame.astype()` when passing a dict of columns and types the ``errors`` parameter was ignored. (:issue:`25905`) +- +- + +Strings +^^^^^^^ + +- Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`) +- Improved error message when passing :class:`Series` of wrong dtype to :meth:`Series.str.cat` (:issue:`22722`) +- + + +Interval +^^^^^^^^ + +- Construction of :class:`Interval` is restricted to numeric, :class:`Timestamp` and :class:`Timedelta` endpoints (:issue:`23013`) +- Fixed bug in :class:`Series`/:class:`DataFrame` not displaying ``NaN`` in :class:`IntervalIndex` with missing values (:issue:`25984`) +- Bug in :meth:`IntervalIndex.get_loc` where a ``KeyError`` would be incorrectly raised for a decreasing :class:`IntervalIndex` (:issue:`25860`) +- Bug in :class:`Index` constructor where passing mixed closed :class:`Interval` objects would result in a ``ValueError`` instead of an ``object`` dtype ``Index`` (:issue:`27172`) + +Indexing +^^^^^^^^ + +- Improved exception message when calling :meth:`DataFrame.iloc` with a list of non-numeric objects (:issue:`25753`). +- Improved exception message when calling ``.iloc`` or ``.loc`` with a boolean indexer with different length (:issue:`26658`). +- Bug in ``KeyError`` exception message when indexing a :class:`MultiIndex` with a non-existent key not displaying the original key (:issue:`27250`). +- Bug in ``.iloc`` and ``.loc`` with a boolean indexer not raising an ``IndexError`` when too few items are passed (:issue:`26658`). +- Bug in :meth:`DataFrame.loc` and :meth:`Series.loc` where ``KeyError`` was not raised for a ``MultiIndex`` when the key was less than or equal to the number of levels in the :class:`MultiIndex` (:issue:`14885`). +- Bug in which :meth:`DataFrame.append` produced an erroneous warning indicating that a ``KeyError`` will be thrown in the future when the data to be appended contains new columns (:issue:`22252`). +- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`). +- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`) +- Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`) +- Fixed a ``KeyError`` when indexing a :class:`MultiIndex`` level with a list containing exactly one label, which is missing (:issue:`27148`) +- Bug which produced ``AttributeError`` on partial matching :class:`Timestamp` in a :class:`MultiIndex` (:issue:`26944`) +- Bug in :class:`Categorical` and :class:`CategoricalIndex` with :class:`Interval` values when using the ``in`` operator (``__contains``) with objects that are not comparable to the values in the ``Interval`` (:issue:`23705`) +- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` on a :class:`DataFrame` with a single timezone-aware datetime64[ns] column incorrectly returning a scalar instead of a :class:`Series` (:issue:`27110`) +- Bug in :class:`CategoricalIndex` and :class:`Categorical` incorrectly raising ``ValueError`` instead of ``TypeError`` when a list is passed using the ``in`` operator (``__contains__``) (:issue:`21729`) +- Bug in setting a new value in a :class:`Series` with a :class:`Timedelta` object incorrectly casting the value to an integer (:issue:`22717`) +- Bug in :class:`Series` setting a new key (``__setitem__``) with a timezone-aware datetime incorrectly raising ``ValueError`` (:issue:`12862`) +- Bug in :meth:`DataFrame.iloc` when indexing with a read-only indexer (:issue:`17192`) +- Bug in :class:`Series` setting an existing tuple key (``__setitem__``) with timezone-aware datetime values incorrectly raising ``TypeError`` (:issue:`20441`) + +Missing +^^^^^^^ + +- Fixed misleading exception message in :meth:`Series.interpolate` if argument ``order`` is required, but omitted (:issue:`10633`, :issue:`24014`). +- Fixed class type displayed in exception message in :meth:`DataFrame.dropna` if invalid ``axis`` parameter passed (:issue:`25555`) +- A ``ValueError`` will now be thrown by :meth:`DataFrame.fillna` when ``limit`` is not a positive integer (:issue:`27042`) +- + +MultiIndex +^^^^^^^^^^ + +- Bug in which incorrect exception raised by :class:`Timedelta` when testing the membership of :class:`MultiIndex` (:issue:`24570`) +- + +I/O +^^^ + +- Bug in :func:`DataFrame.to_html()` where values were truncated using display options instead of outputting the full content (:issue:`17004`) +- Fixed bug in missing text when using :meth:`to_clipboard` if copying utf-16 characters in Python 3 on Windows (:issue:`25040`) +- Bug in :func:`read_json` for ``orient='table'`` when it tries to infer dtypes by default, which is not applicable as dtypes are already defined in the JSON schema (:issue:`21345`) +- Bug in :func:`read_json` for ``orient='table'`` and float index, as it infers index dtype by default, which is not applicable because index dtype is already defined in the JSON schema (:issue:`25433`) +- Bug in :func:`read_json` for ``orient='table'`` and string of float column names, as it makes a column name type conversion to :class:`Timestamp`, which is not applicable because column names are already defined in the JSON schema (:issue:`25435`) +- Bug in :func:`json_normalize` for ``errors='ignore'`` where missing values in the input data, were filled in resulting ``DataFrame`` with the string ``"nan"`` instead of ``numpy.nan`` (:issue:`25468`) +- :meth:`DataFrame.to_html` now raises ``TypeError`` when using an invalid type for the ``classes`` parameter instead of ``AssertionError`` (:issue:`25608`) +- Bug in :meth:`DataFrame.to_string` and :meth:`DataFrame.to_latex` that would lead to incorrect output when the ``header`` keyword is used (:issue:`16718`) +- Bug in :func:`read_csv` not properly interpreting the UTF8 encoded filenames on Windows on Python 3.6+ (:issue:`15086`) +- Improved performance in :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` when converting columns that have missing values (:issue:`25772`) +- Bug in :meth:`DataFrame.to_html` where header numbers would ignore display options when rounding (:issue:`17280`) +- Bug in :func:`read_hdf` where reading a table from an HDF5 file written directly with PyTables fails with a ``ValueError`` when using a sub-selection via the ``start`` or ``stop`` arguments (:issue:`11188`) +- Bug in :func:`read_hdf` not properly closing store after a ``KeyError`` is raised (:issue:`25766`) +- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`) +- Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`) +- Improved the ``col_space`` parameter in :meth:`DataFrame.to_html` to accept a string so CSS length values can be set correctly (:issue:`25941`) +- Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`) +- Adds ``use_bqstorage_api`` parameter to :func:`read_gbq` to speed up downloads of large data frames. This feature requires version 0.10.0 of the ``pandas-gbq`` library as well as the ``google-cloud-bigquery-storage`` and ``fastavro`` libraries. (:issue:`26104`) +- Fixed memory leak in :meth:`DataFrame.to_json` when dealing with numeric data (:issue:`24889`) +- Bug in :func:`read_json` where date strings with ``Z`` were not converted to a UTC timezone (:issue:`26168`) +- Added ``cache_dates=True`` parameter to :meth:`read_csv`, which allows to cache unique dates when they are parsed (:issue:`25990`) +- :meth:`DataFrame.to_excel` now raises a ``ValueError`` when the caller's dimensions exceed the limitations of Excel (:issue:`26051`) +- Fixed bug in :func:`pandas.read_csv` where a BOM would result in incorrect parsing using engine='python' (:issue:`26545`) +- :func:`read_excel` now raises a ``ValueError`` when input is of type :class:`pandas.io.excel.ExcelFile` and ``engine`` param is passed since :class:`pandas.io.excel.ExcelFile` has an engine defined (:issue:`26566`) +- Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`). +- Fixed bug in :func:`DataFrame.to_excel()` where custom objects (i.e. `PeriodIndex`) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`) +- Bug in :meth:`read_hdf` where reading a timezone aware :class:`DatetimeIndex` would raise a ``TypeError`` (:issue:`11926`) +- Bug in :meth:`to_msgpack` and :meth:`read_msgpack` which would raise a ``ValueError`` rather than a ``FileNotFoundError`` for an invalid path (:issue:`27160`) +- Fixed bug in :meth:`DataFrame.to_parquet` which would raise a ``ValueError`` when the dataframe had no columns (:issue:`27339`) +- Allow parsing of :class:`PeriodDtype` columns when using :func:`read_csv` (:issue:`26934`) + +Plotting +^^^^^^^^ + +- Fixed bug where :class:`api.extensions.ExtensionArray` could not be used in matplotlib plotting (:issue:`25587`) +- Bug in an error message in :meth:`DataFrame.plot`. Improved the error message if non-numerics are passed to :meth:`DataFrame.plot` (:issue:`25481`) +- Bug in incorrect ticklabel positions when plotting an index that are non-numeric / non-datetime (:issue:`7612`, :issue:`15912`, :issue:`22334`) +- Fixed bug causing plots of :class:`PeriodIndex` timeseries to fail if the frequency is a multiple of the frequency rule code (:issue:`14763`) +- Fixed bug when plotting a :class:`DatetimeIndex` with ``datetime.timezone.utc`` timezone (:issue:`17173`) +- +- + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :meth:`pandas.core.resample.Resampler.agg` with a timezone aware index where ``OverflowError`` would raise when passing a list of functions (:issue:`22660`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.nunique` in which the names of column levels were lost (:issue:`23222`) +- Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying an aggregation function to timezone aware data (:issue:`23683`) +- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`) +- Bug in :func:`pandas.core.groupby.GroupBy.size` when grouping only NA values (:issue:`23050`) +- Bug in :func:`Series.groupby` where ``observed`` kwarg was previously ignored (:issue:`24880`) +- Bug in :func:`Series.groupby` where using ``groupby`` with a :class:`MultiIndex` Series with a list of labels equal to the length of the series caused incorrect grouping (:issue:`25704`) +- Ensured that ordering of outputs in ``groupby`` aggregation functions is consistent across all versions of Python (:issue:`25692`) +- Ensured that result group order is correct when grouping on an ordered ``Categorical`` and specifying ``observed=True`` (:issue:`25871`, :issue:`25167`) +- Bug in :meth:`pandas.core.window.Rolling.min` and :meth:`pandas.core.window.Rolling.max` that caused a memory leak (:issue:`25893`) +- Bug in :meth:`pandas.core.window.Rolling.count` and ``pandas.core.window.Expanding.count`` was previously ignoring the ``axis`` keyword (:issue:`13503`) +- Bug in :meth:`pandas.core.groupby.GroupBy.idxmax` and :meth:`pandas.core.groupby.GroupBy.idxmin` with datetime column would return incorrect dtype (:issue:`25444`, :issue:`15306`) +- Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`) +- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`) +- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise a ``ValueError`` (:issue:`26208`) +- Bug in :meth:`pandas.core.frame.DataFrame.groupby` where passing a :class:`pandas.core.groupby.grouper.Grouper` would return incorrect groups when using the ``.groups`` accessor (:issue:`26326`) +- Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`) +- Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where MemoryError is raised with empty window (:issue:`26005`) +- Bug in :meth:`pandas.core.window.Rolling.median` and :meth:`pandas.core.window.Rolling.quantile` where incorrect results are returned with ``closed='left'`` and ``closed='neither'`` (:issue:`26005`) +- Improved :class:`pandas.core.window.Rolling`, :class:`pandas.core.window.Window` and :class:`pandas.core.window.EWM` functions to exclude nuisance columns from results instead of raising errors and raise a ``DataError`` only if all columns are nuisance (:issue:`12537`) +- Bug in :meth:`pandas.core.window.Rolling.max` and :meth:`pandas.core.window.Rolling.min` where incorrect results are returned with an empty variable window (:issue:`26005`) +- Raise a helpful exception when an unsupported weighted window function is used as an argument of :meth:`pandas.core.window.Window.aggregate` (:issue:`26597`) + +Reshaping +^^^^^^^^^ + +- Bug in :func:`pandas.merge` adds a string of ``None``, if ``None`` is assigned in suffixes instead of remain the column name as-is (:issue:`24782`). +- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (missing index values are now assigned NA) (:issue:`24212`, :issue:`25009`) +- :func:`to_records` now accepts dtypes to its ``column_dtypes`` parameter (:issue:`24895`) +- Bug in :func:`concat` where order of ``OrderedDict`` (and ``dict`` in Python 3.6+) is not respected, when passed in as ``objs`` argument (:issue:`21510`) +- Bug in :func:`pivot_table` where columns with ``NaN`` values are dropped even if ``dropna`` argument is ``False``, when the ``aggfunc`` argument contains a ``list`` (:issue:`22159`) +- Bug in :func:`concat` where the resulting ``freq`` of two :class:`DatetimeIndex` with the same ``freq`` would be dropped (:issue:`3232`). +- Bug in :func:`merge` where merging with equivalent Categorical dtypes was raising an error (:issue:`22501`) +- bug in :class:`DataFrame` instantiating with a dict of iterators or generators (e.g. ``pd.DataFrame({'A': reversed(range(3))})``) raised an error (:issue:`26349`). +- Bug in :class:`DataFrame` instantiating with a ``range`` (e.g. ``pd.DataFrame(range(3))``) raised an error (:issue:`26342`). +- Bug in :class:`DataFrame` constructor when passing non-empty tuples would cause a segmentation fault (:issue:`25691`) +- Bug in :func:`Series.apply` failed when the series is a timezone aware :class:`DatetimeIndex` (:issue:`25959`) +- Bug in :func:`pandas.cut` where large bins could incorrectly raise an error due to an integer overflow (:issue:`26045`) +- Bug in :func:`DataFrame.sort_index` where an error is thrown when a multi-indexed ``DataFrame`` is sorted on all levels with the initial level sorted last (:issue:`26053`) +- Bug in :meth:`Series.nlargest` treats ``True`` as smaller than ``False`` (:issue:`26154`) +- Bug in :func:`DataFrame.pivot_table` with a :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`) +- Bug in which :meth:`DataFrame.from_dict` ignored order of ``OrderedDict`` when ``orient='index'`` (:issue:`8425`). +- Bug in :meth:`DataFrame.transpose` where transposing a DataFrame with a timezone-aware datetime column would incorrectly raise ``ValueError`` (:issue:`26825`) +- Bug in :func:`pivot_table` when pivoting a timezone aware column as the ``values`` would remove timezone information (:issue:`14948`) +- Bug in :func:`merge_asof` when specifying multiple ``by`` columns where one is ``datetime64[ns, tz]`` dtype (:issue:`26649`) + +Sparse +^^^^^^ + +- Significant speedup in :class:`SparseArray` initialization that benefits most operations, fixing performance regression introduced in v0.20.0 (:issue:`24985`) +- Bug in :class:`SparseFrame` constructor where passing ``None`` as the data would cause ``default_fill_value`` to be ignored (:issue:`16807`) +- Bug in :class:`SparseDataFrame` when adding a column in which the length of values does not match length of index, ``AssertionError`` is raised instead of raising ``ValueError`` (:issue:`25484`) +- Introduce a better error message in :meth:`Series.sparse.from_coo` so it returns a ``TypeError`` for inputs that are not coo matrices (:issue:`26554`) +- Bug in :func:`numpy.modf` on a :class:`SparseArray`. Now a tuple of :class:`SparseArray` is returned (:issue:`26946`). + + +Build Changes +^^^^^^^^^^^^^ + +- Fix install error with PyPy on macOS (:issue:`26536`) + +ExtensionArray +^^^^^^^^^^^^^^ + +- Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`). +- :meth:`Series.count` miscounts NA values in ExtensionArrays (:issue:`26835`) +- Added ``Series.__array_ufunc__`` to better handle NumPy ufuncs applied to Series backed by extension arrays (:issue:`23293`). +- Keyword argument ``deep`` has been removed from :meth:`ExtensionArray.copy` (:issue:`27083`) + +Other +^^^^^ + +- Removed unused C functions from vendored UltraJSON implementation (:issue:`26198`) +- Allow :class:`Index` and :class:`RangeIndex` to be passed to numpy ``min`` and ``max`` functions (:issue:`26125`) +- Use actual class name in repr of empty objects of a ``Series`` subclass (:issue:`27001`). +- Bug in :class:`DataFrame` where passing an object array of timezone-aware `datetime` objects would incorrectly raise ``ValueError`` (:issue:`13287`) + +.. _whatsnew_0.250.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.24.2..v0.25.0 diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst new file mode 100644 index 00000000..944021ca --- /dev/null +++ b/doc/source/whatsnew/v0.25.1.rst @@ -0,0 +1,119 @@ +.. _whatsnew_0251: + +What's new in 0.25.1 (August 21, 2019) +-------------------------------------- + +These are the changes in pandas 0.25.1. See :ref:`release` for a full changelog +including other versions of pandas. + +I/O and LZMA +~~~~~~~~~~~~ + +Some users may unknowingly have an incomplete Python installation lacking the `lzma` module from the standard library. In this case, `import pandas` failed due to an `ImportError` (:issue:`27575`). +Pandas will now warn, rather than raising an `ImportError` if the `lzma` module is not present. Any subsequent attempt to use `lzma` methods will raise a `RuntimeError`. +A possible fix for the lack of the `lzma` module is to ensure you have the necessary libraries and then re-install Python. +For example, on MacOS installing Python with `pyenv` may lead to an incomplete Python installation due to unmet system dependencies at compilation time (like `xz`). Compilation will succeed, but Python might fail at run time. The issue can be solved by installing the necessary dependencies and then re-installing Python. + +.. _whatsnew_0251.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Categorical +^^^^^^^^^^^ + +- Bug in :meth:`Categorical.fillna` that would replace all values, not just those that are ``NaN`` (:issue:`26215`) + +Datetimelike +^^^^^^^^^^^^ + +- Bug in :func:`to_datetime` where passing a timezone-naive :class:`DatetimeArray` or :class:`DatetimeIndex` and ``utc=True`` would incorrectly return a timezone-naive result (:issue:`27733`) +- Bug in :meth:`Period.to_timestamp` where a :class:`Period` outside the :class:`Timestamp` implementation bounds (roughly 1677-09-21 to 2262-04-11) would return an incorrect :class:`Timestamp` instead of raising ``OutOfBoundsDatetime`` (:issue:`19643`) +- Bug in iterating over :class:`DatetimeIndex` when the underlying data is read-only (:issue:`28055`) + +Timezones +^^^^^^^^^ + +- Bug in :class:`Index` where a numpy object array with a timezone aware :class:`Timestamp` and ``np.nan`` would not return a :class:`DatetimeIndex` (:issue:`27011`) + +Numeric +^^^^^^^ + +- Bug in :meth:`Series.interpolate` when using a timezone aware :class:`DatetimeIndex` (:issue:`27548`) +- Bug when printing negative floating point complex numbers would raise an ``IndexError`` (:issue:`27484`) +- Bug where :class:`DataFrame` arithmetic operators such as :meth:`DataFrame.mul` with a :class:`Series` with axis=1 would raise an ``AttributeError`` on :class:`DataFrame` larger than the minimum threshold to invoke numexpr (:issue:`27636`) +- Bug in :class:`DataFrame` arithmetic where missing values in results were incorrectly masked with ``NaN`` instead of ``Inf`` (:issue:`27464`) + +Conversion +^^^^^^^^^^ + +- Improved the warnings for the deprecated methods :meth:`Series.real` and :meth:`Series.imag` (:issue:`27610`) + +Interval +^^^^^^^^ + +- Bug in :class:`IntervalIndex` where `dir(obj)` would raise ``ValueError`` (:issue:`27571`) + +Indexing +^^^^^^^^ + +- Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` (:issue:`27516`) +- Break reference cycle involving :class:`Index` and other index classes to allow garbage collection of index objects without running the GC. (:issue:`27585`, :issue:`27840`) +- Fix regression in assigning values to a single column of a DataFrame with a ``MultiIndex`` columns (:issue:`27841`). +- Fix regression in ``.ix`` fallback with an ``IntervalIndex`` (:issue:`27865`). + +Missing +^^^^^^^ + +- Bug in :func:`pandas.isnull` or :func:`pandas.isna` when the input is a type e.g. ``type(pandas.Series())`` (:issue:`27482`) + +I/O +^^^ + +- Avoid calling ``S3File.s3`` when reading parquet, as this was removed in s3fs version 0.3.0 (:issue:`27756`) +- Better error message when a negative header is passed in :func:`pandas.read_csv` (:issue:`27779`) +- Follow the ``min_rows`` display option (introduced in v0.25.0) correctly in the HTML repr in the notebook (:issue:`27991`). + +Plotting +^^^^^^^^ + +- Added a ``pandas_plotting_backends`` entrypoint group for registering plot backends. See :ref:`extending.plotting-backends` for more (:issue:`26747`). +- Fixed the re-instatement of Matplotlib datetime converters after calling + :meth:`pandas.plotting.deregister_matplotlib_converters` (:issue:`27481`). +- Fix compatibility issue with matplotlib when passing a pandas ``Index`` to a plot call (:issue:`27775`). + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Fixed regression in :meth:`pands.core.groupby.DataFrameGroupBy.quantile` raising when multiple quantiles are given (:issue:`27526`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`) +- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where ``observed=False`` was being ignored for Categorical groupers (:issue:`26385`) +- Bug in windowing over read-only arrays (:issue:`27766`) +- Fixed segfault in `pandas.core.groupby.DataFrameGroupBy.quantile` when an invalid quantile was passed (:issue:`27470`) + +Reshaping +^^^^^^^^^ + +- A ``KeyError`` is now raised if ``.unstack()`` is called on a :class:`Series` or :class:`DataFrame` with a flat :class:`Index` passing a name which is not the correct one (:issue:`18303`) +- Bug :meth:`merge_asof` could not merge :class:`Timedelta` objects when passing `tolerance` kwarg (:issue:`27642`) +- Bug in :meth:`DataFrame.crosstab` when ``margins`` set to ``True`` and ``normalize`` is not ``False``, an error is raised. (:issue:`27500`) +- :meth:`DataFrame.join` now suppresses the ``FutureWarning`` when the sort parameter is specified (:issue:`21952`) +- Bug in :meth:`DataFrame.join` raising with readonly arrays (:issue:`27943`) + +Sparse +^^^^^^ + +- Bug in reductions for :class:`Series` with Sparse dtypes (:issue:`27080`) + +Other +^^^^^ + +- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when replacing timezone-aware timestamps using a dict-like replacer (:issue:`27720`) +- Bug in :meth:`Series.rename` when using a custom type indexer. Now any value that isn't callable or dict-like is treated as a scalar. (:issue:`27814`) + +.. _whatsnew_0.251.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.25.0..v0.25.1 diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst new file mode 100644 index 00000000..c0c68ce4 --- /dev/null +++ b/doc/source/whatsnew/v0.25.2.rst @@ -0,0 +1,49 @@ +.. _whatsnew_0252: + +What's new in 0.25.2 (October 15, 2019) +--------------------------------------- + +These are the changes in pandas 0.25.2. See :ref:`release` for a full changelog +including other versions of pandas. + +.. note:: + + Pandas 0.25.2 adds compatibility for Python 3.8 (:issue:`28147`). + +.. _whatsnew_0252.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Indexing +^^^^^^^^ + +- Fix regression in :meth:`DataFrame.reindex` not following the ``limit`` argument (:issue:`28631`). +- Fix regression in :meth:`RangeIndex.get_indexer` for decreasing :class:`RangeIndex` where target values may be improperly identified as missing/present (:issue:`28678`) + +I/O +^^^ + +- Fix regression in notebook display where ```` tags were missing for :attr:`DataFrame.index` values (:issue:`28204`). +- Regression in :meth:`~DataFrame.to_csv` where writing a :class:`Series` or :class:`DataFrame` indexed by an :class:`IntervalIndex` would incorrectly raise a ``TypeError`` (:issue:`28210`) +- Fix :meth:`~DataFrame.to_csv` with ``ExtensionArray`` with list-like values (:issue:`28840`). + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug incorrectly raising an ``IndexError`` when passing a list of quantiles to :meth:`pandas.core.groupby.DataFrameGroupBy.quantile` (:issue:`28113`). +- Bug in :meth:`pandas.core.groupby.GroupBy.shift`, :meth:`pandas.core.groupby.GroupBy.bfill` and :meth:`pandas.core.groupby.GroupBy.ffill` where timezone information would be dropped (:issue:`19995`, :issue:`27992`) + +Other +^^^^^ + +- Compatibility with Python 3.8 in :meth:`DataFrame.query` (:issue:`27261`) +- Fix to ensure that tab-completion in an IPython console does not raise + warnings for deprecated attributes (:issue:`27900`). + +.. _whatsnew_0.252.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.25.1..v0.25.2 diff --git a/doc/source/whatsnew/v0.25.3.rst b/doc/source/whatsnew/v0.25.3.rst new file mode 100644 index 00000000..f7f54198 --- /dev/null +++ b/doc/source/whatsnew/v0.25.3.rst @@ -0,0 +1,22 @@ +.. _whatsnew_0253: + +What's new in 0.25.3 (October 31, 2019) +--------------------------------------- + +These are the changes in pandas 0.25.3. See :ref:`release` for a full changelog +including other versions of pandas. + +.. _whatsnew_0253.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :meth:`DataFrameGroupBy.quantile` where NA values in the grouping could cause segfaults or incorrect results (:issue:`28882`) + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.25.2..v0.25.3 diff --git a/doc/source/whatsnew/v0.4.x.rst b/doc/source/whatsnew/v0.4.x.rst new file mode 100644 index 00000000..8e41e528 --- /dev/null +++ b/doc/source/whatsnew/v0.4.x.rst @@ -0,0 +1,69 @@ +.. _whatsnew_04x: + +v.0.4.1 through v0.4.3 (September 25 - October 9, 2011) +------------------------------------------------------- + +{{ header }} + +New features +~~~~~~~~~~~~ + +- Added Python 3 support using 2to3 (:issue:`200`) +- :ref:`Added ` ``name`` attribute to ``Series``, now + prints as part of ``Series.__repr__`` +- :ref:`Added ` instance methods ``isnull`` and ``notnull`` to + Series (:issue:`209`, :issue:`203`) +- :ref:`Added ` ``Series.align`` method for aligning two series + with choice of join method (ENH56_) +- :ref:`Added ` method ``get_level_values`` to + ``MultiIndex`` (:issue:`188`) +- Set values in mixed-type ``DataFrame`` objects via ``.ix`` indexing attribute (:issue:`135`) +- Added new ``DataFrame`` :ref:`methods ` + ``get_dtype_counts`` and property ``dtypes`` (ENHdc_) +- Added :ref:`ignore_index ` option to + ``DataFrame.append`` to stack DataFrames (ENH1b_) +- ``read_csv`` tries to :ref:`sniff ` delimiters using + ``csv.Sniffer`` (:issue:`146`) +- ``read_csv`` can :ref:`read ` multiple columns into a + ``MultiIndex``; DataFrame's ``to_csv`` method writes out a corresponding + ``MultiIndex`` (:issue:`151`) +- ``DataFrame.rename`` has a new ``copy`` parameter to :ref:`rename + ` a DataFrame in place (ENHed_) +- :ref:`Enable ` unstacking by name (:issue:`142`) +- :ref:`Enable ` ``sortlevel`` to work by level (:issue:`141`) + +Performance enhancements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Altered binary operations on differently-indexed SparseSeries objects + to use the integer-based (dense) alignment logic which is faster with a + larger number of blocks (:issue:`205`) +- Wrote faster Cython data alignment / merging routines resulting in + substantial speed increases +- Improved performance of ``isnull`` and ``notnull``, a regression from v0.3.0 + (:issue:`187`) +- Refactored code related to ``DataFrame.join`` so that intermediate aligned + copies of the data in each ``DataFrame`` argument do not need to be created. + Substantial performance increases result (:issue:`176`) +- Substantially improved performance of generic ``Index.intersection`` and + ``Index.union`` +- Implemented ``BlockManager.take`` resulting in significantly faster ``take`` + performance on mixed-type ``DataFrame`` objects (:issue:`104`) +- Improved performance of ``Series.sort_index`` +- Significant groupby performance enhancement: removed unnecessary integrity + checks in DataFrame internals that were slowing down slicing operations to + retrieve groups +- Optimized ``_ensure_index`` function resulting in performance savings in + type-checking Index objects +- Wrote fast time series merging / joining methods in Cython. Will be + integrated later into DataFrame.join and related functions + +.. _ENH1b: https://github.com/pandas-dev/pandas/commit/1ba56251f0013ff7cd8834e9486cef2b10098371 +.. _ENHdc: https://github.com/pandas-dev/pandas/commit/dca3c5c5a6a3769ee01465baca04cfdfa66a4f76 +.. _ENHed: https://github.com/pandas-dev/pandas/commit/edd9f1945fc010a57fa0ae3b3444d1fffe592591 +.. _ENH56: https://github.com/pandas-dev/pandas/commit/56e0c9ffafac79ce262b55a6a13e1b10a88fbe93 + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.4.1..v0.4.3 diff --git a/doc/source/whatsnew/v0.5.0.rst b/doc/source/whatsnew/v0.5.0.rst new file mode 100644 index 00000000..37c52ac7 --- /dev/null +++ b/doc/source/whatsnew/v0.5.0.rst @@ -0,0 +1,59 @@ + +.. _whatsnew_050: + +v.0.5.0 (October 24, 2011) +-------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +New features +~~~~~~~~~~~~ + +- :ref:`Added ` ``DataFrame.align`` method with standard join options +- :ref:`Added ` ``parse_dates`` option to ``read_csv`` and ``read_table`` methods to optionally try to parse dates in the index columns +- :ref:`Added ` ``nrows``, ``chunksize``, and ``iterator`` arguments to ``read_csv`` and ``read_table``. The last two return a new ``TextParser`` class capable of lazily iterating through chunks of a flat file (:issue:`242`) +- :ref:`Added ` ability to join on multiple columns in ``DataFrame.join`` (:issue:`214`) +- Added private ``_get_duplicates`` function to ``Index`` for identifying duplicate values more easily (ENH5c_) +- :ref:`Added ` column attribute access to DataFrame. +- :ref:`Added ` Python tab completion hook for DataFrame columns. (:issue:`233`, :issue:`230`) +- :ref:`Implemented ` ``Series.describe`` for Series containing objects (:issue:`241`) +- :ref:`Added ` inner join option to ``DataFrame.join`` when joining on key(s) (:issue:`248`) +- :ref:`Implemented ` selecting DataFrame columns by passing a list to ``__getitem__`` (:issue:`253`) +- :ref:`Implemented ` & and | to intersect / union Index objects, respectively (:issue:`261`) +- :ref:`Added` ``pivot_table`` convenience function to pandas namespace (:issue:`234`) +- :ref:`Implemented ` ``Panel.rename_axis`` function (:issue:`243`) +- DataFrame will show index level names in console output (:issue:`334`) +- :ref:`Implemented ` ``Panel.take`` +- :ref:`Added` ``set_eng_float_format`` for alternate DataFrame floating point string formatting (ENH61_) +- :ref:`Added ` convenience ``set_index`` function for creating a DataFrame index from its existing columns +- :ref:`Implemented ` ``groupby`` hierarchical index level name (:issue:`223`) +- :ref:`Added ` support for different delimiters in ``DataFrame.to_csv`` (:issue:`244`) +- TODO: DOCS ABOUT TAKE METHODS + +Performance enhancements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- VBENCH Major performance improvements in file parsing functions ``read_csv`` and ``read_table`` +- VBENCH Added Cython function for converting tuples to ndarray very fast. Speeds up many MultiIndex-related operations +- VBENCH Refactored merging / joining code into a tidy class and disabled unnecessary computations in the float/object case, thus getting about 10% better performance (:issue:`211`) +- VBENCH Improved speed of ``DataFrame.xs`` on mixed-type DataFrame objects by about 5x, regression from 0.3.0 (:issue:`215`) +- VBENCH With new ``DataFrame.align`` method, speeding up binary operations between differently-indexed DataFrame objects by 10-25%. +- VBENCH Significantly sped up conversion of nested dict into DataFrame (:issue:`212`) +- VBENCH Significantly speed up DataFrame ``__repr__`` and ``count`` on large mixed-type DataFrame objects + +.. _ENH61: https://github.com/pandas-dev/pandas/commit/6141961 +.. _ENH5c: https://github.com/pandas-dev/pandas/commit/5ca6ff5d822ee4ddef1ec0d87b6d83d8b4bbd3eb + + +.. _whatsnew_0.5.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.4.0..v0.5.0 diff --git a/doc/source/whatsnew/v0.6.0.rst b/doc/source/whatsnew/v0.6.0.rst new file mode 100644 index 00000000..973ba897 --- /dev/null +++ b/doc/source/whatsnew/v0.6.0.rst @@ -0,0 +1,72 @@ +.. _whatsnew_060: + +v.0.6.0 (November 25, 2011) +--------------------------- + +{{ header }} + +.. ipython:: python + :suppress: + + from pandas import * # noqa F401, F403 + + +New features +~~~~~~~~~~~~ +- :ref:`Added ` ``melt`` function to ``pandas.core.reshape`` +- :ref:`Added ` ``level`` parameter to group by level in Series and DataFrame descriptive statistics (:issue:`313`) +- :ref:`Added ` ``head`` and ``tail`` methods to Series, analogous to to DataFrame (:issue:`296`) +- :ref:`Added ` ``Series.isin`` function which checks if each value is contained in a passed sequence (:issue:`289`) +- :ref:`Added ` ``float_format`` option to ``Series.to_string`` +- :ref:`Added ` ``skip_footer`` (:issue:`291`) and ``converters`` (:issue:`343`) options to ``read_csv`` and ``read_table`` +- :ref:`Added ` ``drop_duplicates`` and ``duplicated`` functions for removing duplicate DataFrame rows and checking for duplicate rows, respectively (:issue:`319`) +- :ref:`Implemented ` operators '&', '|', '^', '-' on DataFrame (:issue:`347`) +- :ref:`Added ` ``Series.mad``, mean absolute deviation +- :ref:`Added ` ``QuarterEnd`` DateOffset (:issue:`321`) +- :ref:`Added ` ``dot`` to DataFrame (:issue:`65`) +- Added ``orient`` option to ``Panel.from_dict`` (:issue:`359`, :issue:`301`) +- :ref:`Added ` ``orient`` option to ``DataFrame.from_dict`` +- :ref:`Added ` passing list of tuples or list of lists to ``DataFrame.from_records`` (:issue:`357`) +- :ref:`Added ` multiple levels to groupby (:issue:`103`) +- :ref:`Allow ` multiple columns in ``by`` argument of ``DataFrame.sort_index`` (:issue:`92`, :issue:`362`) +- :ref:`Added ` fast ``get_value`` and ``put_value`` methods to DataFrame (:issue:`360`) +- :ref:`Added ` ``cov`` instance methods to Series and DataFrame (:issue:`194`, :issue:`362`) +- :ref:`Added ` ``kind='bar'`` option to ``DataFrame.plot`` (:issue:`348`) +- :ref:`Added ` ``idxmin`` and ``idxmax`` to Series and DataFrame (:issue:`286`) +- :ref:`Added ` ``read_clipboard`` function to parse DataFrame from clipboard (:issue:`300`) +- :ref:`Added ` ``nunique`` function to Series for counting unique elements (:issue:`297`) +- :ref:`Made ` DataFrame constructor use Series name if no columns passed (:issue:`373`) +- :ref:`Support ` regular expressions in read_table/read_csv (:issue:`364`) +- :ref:`Added ` ``DataFrame.to_html`` for writing DataFrame to HTML (:issue:`387`) +- :ref:`Added ` support for MaskedArray data in DataFrame, masked values converted to NaN (:issue:`396`) +- :ref:`Added ` ``DataFrame.boxplot`` function (:issue:`368`) +- :ref:`Can ` pass extra args, kwds to DataFrame.apply (:issue:`376`) +- :ref:`Implement ` ``DataFrame.join`` with vector ``on`` argument (:issue:`312`) +- :ref:`Added ` ``legend`` boolean flag to ``DataFrame.plot`` (:issue:`324`) +- :ref:`Can ` pass multiple levels to ``stack`` and ``unstack`` (:issue:`370`) +- :ref:`Can ` pass multiple values columns to ``pivot_table`` (:issue:`381`) +- :ref:`Use ` Series name in GroupBy for result index (:issue:`363`) +- :ref:`Added ` ``raw`` option to ``DataFrame.apply`` for performance if only need ndarray (:issue:`309`) +- Added proper, tested weighted least squares to standard and panel OLS (:issue:`303`) + +Performance enhancements +~~~~~~~~~~~~~~~~~~~~~~~~ +- VBENCH Cythonized ``cache_readonly``, resulting in substantial micro-performance enhancements throughout the code base (:issue:`361`) +- VBENCH Special Cython matrix iterator for applying arbitrary reduction operations with 3-5x better performance than `np.apply_along_axis` (:issue:`309`) +- VBENCH Improved performance of ``MultiIndex.from_tuples`` +- VBENCH Special Cython matrix iterator for applying arbitrary reduction operations +- VBENCH + DOCUMENT Add ``raw`` option to ``DataFrame.apply`` for getting better performance when +- VBENCH Faster cythonized count by level in Series and DataFrame (:issue:`341`) +- VBENCH? Significant GroupBy performance enhancement with multiple keys with many "empty" combinations +- VBENCH New Cython vectorized function ``map_infer`` speeds up ``Series.apply`` and ``Series.map`` significantly when passed elementwise Python function, motivated by (:issue:`355`) +- VBENCH Significantly improved performance of ``Series.order``, which also makes np.unique called on a Series faster (:issue:`327`) +- VBENCH Vastly improved performance of GroupBy on axes with a MultiIndex (:issue:`299`) + + + +.. _whatsnew_0.6.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.5.0..v0.6.0 diff --git a/doc/source/whatsnew/v0.6.1.rst b/doc/source/whatsnew/v0.6.1.rst new file mode 100644 index 00000000..d0175777 --- /dev/null +++ b/doc/source/whatsnew/v0.6.1.rst @@ -0,0 +1,58 @@ + +.. _whatsnew_061: + +v.0.6.1 (December 13, 2011) +--------------------------- + +New features +~~~~~~~~~~~~ +- Can :ref:`append single rows ` (as Series) to a DataFrame +- Add Spearman and Kendall rank :ref:`correlation ` + options to Series.corr and DataFrame.corr (:issue:`428`) +- :ref:`Added ` ``get_value`` and ``set_value`` methods to + Series, DataFrame, and Panel for very low-overhead access (>2x faster in many + cases) to scalar elements (:issue:`437`, :issue:`438`). ``set_value`` is capable of + producing an enlarged object. +- Add PyQt table widget to sandbox (:issue:`435`) +- DataFrame.align can :ref:`accept Series arguments ` + and an :ref:`axis option ` (:issue:`461`) +- Implement new :ref:`SparseArray ` and `SparseList` + data structures. SparseSeries now derives from SparseArray (:issue:`463`) +- :ref:`Better console printing options ` (:issue:`453`) +- Implement fast :ref:`data ranking ` for Series and + DataFrame, fast versions of scipy.stats.rankdata (:issue:`428`) +- Implement `DataFrame.from_items` alternate + constructor (:issue:`444`) +- DataFrame.convert_objects method for :ref:`inferring better dtypes ` + for object columns (:issue:`302`) +- Add :ref:`rolling_corr_pairwise ` function for + computing Panel of correlation matrices (:issue:`189`) +- Add :ref:`margins ` option to :ref:`pivot_table + ` for computing subgroup aggregates (:issue:`114`) +- Add ``Series.from_csv`` function (:issue:`482`) +- :ref:`Can pass ` DataFrame/DataFrame and + DataFrame/Series to rolling_corr/rolling_cov (GH #462) +- MultiIndex.get_level_values can :ref:`accept the level name ` + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Improve memory usage of `DataFrame.describe` (do not copy data + unnecessarily) (PR #425) + +- Optimize scalar value lookups in the general case by 25% or more in Series + and DataFrame + +- Fix performance regression in cross-sectional count in DataFrame, affecting + DataFrame.dropna speed +- Column deletion in DataFrame copies no data (computes views on blocks) (GH + #158) + + + +.. _whatsnew_0.6.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.6.0..v0.6.1 diff --git a/doc/source/whatsnew/v0.7.0.rst b/doc/source/whatsnew/v0.7.0.rst new file mode 100644 index 00000000..a63cd37e --- /dev/null +++ b/doc/source/whatsnew/v0.7.0.rst @@ -0,0 +1,311 @@ +.. _whatsnew_0700: + +v.0.7.0 (February 9, 2012) +-------------------------- + +{{ header }} + + +New features +~~~~~~~~~~~~ + +- New unified :ref:`merge function ` for efficiently performing + full gamut of database / relational-algebra operations. Refactored existing + join methods to use the new infrastructure, resulting in substantial + performance gains (:issue:`220`, :issue:`249`, :issue:`267`) + +- New :ref:`unified concatenation function ` for concatenating + Series, DataFrame or Panel objects along an axis. Can form union or + intersection of the other axes. Improves performance of ``Series.append`` and + ``DataFrame.append`` (:issue:`468`, :issue:`479`, :issue:`273`) + +- :ref:`Can ` pass multiple DataFrames to + `DataFrame.append` to concatenate (stack) and multiple Series to + ``Series.append`` too + +- :ref:`Can` pass list of dicts (e.g., a + list of JSON objects) to DataFrame constructor (:issue:`526`) + +- You can now :ref:`set multiple columns ` in a + DataFrame via ``__getitem__``, useful for transformation (:issue:`342`) + +- Handle differently-indexed output values in ``DataFrame.apply`` (:issue:`498`) + +.. ipython:: python + + df = pd.DataFrame(np.random.randn(10, 4)) + df.apply(lambda x: x.describe()) + +- :ref:`Add` ``reorder_levels`` method to Series and + DataFrame (:issue:`534`) + +- :ref:`Add` dict-like ``get`` function to DataFrame + and Panel (:issue:`521`) + +- :ref:`Add` ``DataFrame.iterrows`` method for efficiently + iterating through the rows of a DataFrame + +- Add ``DataFrame.to_panel`` with code adapted from + ``LongPanel.to_long`` + +- :ref:`Add ` ``reindex_axis`` method added to DataFrame + +- :ref:`Add ` ``level`` option to binary arithmetic functions on + ``DataFrame`` and ``Series`` + +- :ref:`Add ` ``level`` option to the ``reindex`` + and ``align`` methods on Series and DataFrame for broadcasting values across + a level (:issue:`542`, :issue:`552`, others) + +- Add attribute-based item access to + ``Panel`` and add IPython completion (:issue:`563`) + +- :ref:`Add ` ``logy`` option to ``Series.plot`` for + log-scaling on the Y axis + +- :ref:`Add ` ``index`` and ``header`` options to + ``DataFrame.to_string`` + +- :ref:`Can ` pass multiple DataFrames to + ``DataFrame.join`` to join on index (:issue:`115`) + +- :ref:`Can ` pass multiple Panels to ``Panel.join`` + (:issue:`115`) + +- :ref:`Added ` ``justify`` argument to ``DataFrame.to_string`` + to allow different alignment of column headers + +- :ref:`Add ` ``sort`` option to GroupBy to allow disabling + sorting of the group keys for potential speedups (:issue:`595`) + +- :ref:`Can ` pass MaskedArray to Series + constructor (:issue:`563`) + +- Add Panel item access via attributes + and IPython completion (:issue:`554`) + +- Implement ``DataFrame.lookup``, fancy-indexing analogue for retrieving values + given a sequence of row and column labels (:issue:`338`) + +- Can pass a :ref:`list of functions ` to + aggregate with groupby on a DataFrame, yielding an aggregated result with + hierarchical columns (:issue:`166`) + +- Can call ``cummin`` and ``cummax`` on Series and DataFrame to get cumulative + minimum and maximum, respectively (:issue:`647`) + +- ``value_range`` added as utility function to get min and max of a dataframe + (:issue:`288`) + +- Added ``encoding`` argument to ``read_csv``, ``read_table``, ``to_csv`` and + ``from_csv`` for non-ascii text (:issue:`717`) + +- :ref:`Added ` ``abs`` method to pandas objects + +- :ref:`Added ` ``crosstab`` function for easily computing frequency tables + +- :ref:`Added ` ``isin`` method to index objects + +- :ref:`Added ` ``level`` argument to ``xs`` method of DataFrame. + + +API changes to integer indexing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the potentially riskiest API changes in 0.7.0, but also one of the most +important, was a complete review of how **integer indexes** are handled with +regard to label-based indexing. Here is an example: + +.. ipython:: python + + s = pd.Series(np.random.randn(10), index=range(0, 20, 2)) + s + s[0] + s[2] + s[4] + +This is all exactly identical to the behavior before. However, if you ask for a +key **not** contained in the Series, in versions 0.6.1 and prior, Series would +*fall back* on a location-based lookup. This now raises a ``KeyError``: + +.. code-block:: ipython + + In [2]: s[1] + KeyError: 1 + +This change also has the same impact on DataFrame: + +.. code-block:: ipython + + In [3]: df = pd.DataFrame(np.random.randn(8, 4), index=range(0, 16, 2)) + + In [4]: df + 0 1 2 3 + 0 0.88427 0.3363 -0.1787 0.03162 + 2 0.14451 -0.1415 0.2504 0.58374 + 4 -1.44779 -0.9186 -1.4996 0.27163 + 6 -0.26598 -2.4184 -0.2658 0.11503 + 8 -0.58776 0.3144 -0.8566 0.61941 + 10 0.10940 -0.7175 -1.0108 0.47990 + 12 -1.16919 -0.3087 -0.6049 -0.43544 + 14 -0.07337 0.3410 0.0424 -0.16037 + + In [5]: df.ix[3] + KeyError: 3 + +In order to support purely integer-based indexing, the following methods have +been added: + +.. csv-table:: + :header: "Method","Description" + :widths: 40,60 + + ``Series.iget_value(i)``, Retrieve value stored at location ``i`` + ``Series.iget(i)``, Alias for ``iget_value`` + ``DataFrame.irow(i)``, Retrieve the ``i``-th row + ``DataFrame.icol(j)``, Retrieve the ``j``-th column + "``DataFrame.iget_value(i, j)``", Retrieve the value at row ``i`` and column ``j`` + +API tweaks regarding label-based slicing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Label-based slicing using ``ix`` now requires that the index be sorted +(monotonic) **unless** both the start and endpoint are contained in the index: + +.. code-block:: python + + In [1]: s = pd.Series(np.random.randn(6), index=list('gmkaec')) + + In [2]: s + Out[2]: + g -1.182230 + m -0.276183 + k -0.243550 + a 1.628992 + e 0.073308 + c -0.539890 + dtype: float64 + +Then this is OK: + +.. code-block:: python + + In [3]: s.ix['k':'e'] + Out[3]: + k -0.243550 + a 1.628992 + e 0.073308 + dtype: float64 + +But this is not: + +.. code-block:: ipython + + In [12]: s.ix['b':'h'] + KeyError 'b' + +If the index had been sorted, the "range selection" would have been possible: + +.. code-block:: python + + In [4]: s2 = s.sort_index() + + In [5]: s2 + Out[5]: + a 1.628992 + c -0.539890 + e 0.073308 + g -1.182230 + k -0.243550 + m -0.276183 + dtype: float64 + + In [6]: s2.ix['b':'h'] + Out[6]: + c -0.539890 + e 0.073308 + g -1.182230 + dtype: float64 + +Changes to Series ``[]`` operator +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As as notational convenience, you can pass a sequence of labels or a label +slice to a Series when getting and setting values via ``[]`` (i.e. the +``__getitem__`` and ``__setitem__`` methods). The behavior will be the same as +passing similar input to ``ix`` **except in the case of integer indexing**: + +.. ipython:: python + + s = pd.Series(np.random.randn(6), index=list('acegkm')) + s + s[['m', 'a', 'c', 'e']] + s['b':'l'] + s['c':'k'] + +In the case of integer indexes, the behavior will be exactly as before +(shadowing ``ndarray``): + +.. ipython:: python + + s = pd.Series(np.random.randn(6), index=range(0, 12, 2)) + s[[4, 0, 2]] + s[1:5] + +If you wish to do indexing with sequences and slicing on an integer index with +label semantics, use ``ix``. + +Other API changes +~~~~~~~~~~~~~~~~~ + +- The deprecated ``LongPanel`` class has been completely removed + +- If ``Series.sort`` is called on a column of a DataFrame, an exception will + now be raised. Before it was possible to accidentally mutate a DataFrame's + column by doing ``df[col].sort()`` instead of the side-effect free method + ``df[col].order()`` (:issue:`316`) + +- Miscellaneous renames and deprecations which will (harmlessly) raise + ``FutureWarning`` + +- ``drop`` added as an optional parameter to ``DataFrame.reset_index`` (:issue:`699`) + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- :ref:`Cythonized GroupBy aggregations ` no longer + presort the data, thus achieving a significant speedup (:issue:`93`). GroupBy + aggregations with Python functions significantly sped up by clever + manipulation of the ndarray data type in Cython (:issue:`496`). +- Better error message in DataFrame constructor when passed column labels + don't match data (:issue:`497`) +- Substantially improve performance of multi-GroupBy aggregation when a + Python function is passed, reuse ndarray object in Cython (:issue:`496`) +- Can store objects indexed by tuples and floats in HDFStore (:issue:`492`) +- Don't print length by default in Series.to_string, add `length` option (:issue:`489`) +- Improve Cython code for multi-groupby to aggregate without having to sort + the data (:issue:`93`) +- Improve MultiIndex reindexing speed by storing tuples in the MultiIndex, + test for backwards unpickling compatibility +- Improve column reindexing performance by using specialized Cython take + function +- Further performance tweaking of Series.__getitem__ for standard use cases +- Avoid Index dict creation in some cases (i.e. when getting slices, etc.), + regression from prior versions +- Friendlier error message in setup.py if NumPy not installed +- Use common set of NA-handling operations (sum, mean, etc.) in Panel class + also (:issue:`536`) +- Default name assignment when calling ``reset_index`` on DataFrame with a + regular (non-hierarchical) index (:issue:`476`) +- Use Cythonized groupers when possible in Series/DataFrame stat ops with + ``level`` parameter passed (:issue:`545`) +- Ported skiplist data structure to C to speed up ``rolling_median`` by about + 5-10x in most typical use cases (:issue:`374`) + + +.. _whatsnew_0.7.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.6.1..v0.7.0 diff --git a/doc/source/whatsnew/v0.7.1.rst b/doc/source/whatsnew/v0.7.1.rst new file mode 100644 index 00000000..04b548a9 --- /dev/null +++ b/doc/source/whatsnew/v0.7.1.rst @@ -0,0 +1,41 @@ +.. _whatsnew_0701: + +v.0.7.1 (February 29, 2012) +--------------------------- + +{{ header }} + + +This release includes a few new features and addresses over a dozen bugs in +0.7.0. + +New features +~~~~~~~~~~~~ + + - Add ``to_clipboard`` function to pandas namespace for writing objects to + the system clipboard (:issue:`774`) + - Add ``itertuples`` method to DataFrame for iterating through the rows of a + dataframe as tuples (:issue:`818`) + - Add ability to pass fill_value and method to DataFrame and Series align + method (:issue:`806`, :issue:`807`) + - Add fill_value option to reindex, align methods (:issue:`784`) + - Enable concat to produce DataFrame from Series (:issue:`787`) + - Add ``between`` method to Series (:issue:`802`) + - Add HTML representation hook to DataFrame for the IPython HTML notebook + (:issue:`773`) + - Support for reading Excel 2007 XML documents using openpyxl + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + + - Improve performance and memory usage of fillna on DataFrame + - Can concatenate a list of Series along axis=1 to obtain a DataFrame (:issue:`787`) + + + +.. _whatsnew_0.7.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.7.0..v0.7.1 diff --git a/doc/source/whatsnew/v0.7.2.rst b/doc/source/whatsnew/v0.7.2.rst new file mode 100644 index 00000000..ad72b081 --- /dev/null +++ b/doc/source/whatsnew/v0.7.2.rst @@ -0,0 +1,38 @@ +.. _whatsnew_0702: + +v.0.7.2 (March 16, 2012) +--------------------------- + +{{ header }} + + +This release targets bugs in 0.7.1, and adds a few minor features. + +New features +~~~~~~~~~~~~ + + - Add additional tie-breaking methods in DataFrame.rank (:issue:`874`) + - Add ascending parameter to rank in Series, DataFrame (:issue:`875`) + - Add coerce_float option to DataFrame.from_records (:issue:`893`) + - Add sort_columns parameter to allow unsorted plots (:issue:`918`) + - Enable column access via attributes on GroupBy (:issue:`882`) + - Can pass dict of values to DataFrame.fillna (:issue:`661`) + - Can select multiple hierarchical groups by passing list of values in .ix + (:issue:`134`) + - Add ``axis`` option to DataFrame.fillna (:issue:`174`) + - Add level keyword to ``drop`` for dropping values from a level (:issue:`159`) + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + + - Use khash for Series.value_counts, add raw function to algorithms.py (:issue:`861`) + - Intercept __builtin__.sum in groupby (:issue:`885`) + + + +.. _whatsnew_0.7.2.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.7.1..v0.7.2 diff --git a/doc/source/whatsnew/v0.7.3.rst b/doc/source/whatsnew/v0.7.3.rst new file mode 100644 index 00000000..020cf3bd --- /dev/null +++ b/doc/source/whatsnew/v0.7.3.rst @@ -0,0 +1,101 @@ +.. _whatsnew_0703: + +v.0.7.3 (April 12, 2012) +------------------------ + +{{ header }} + + +This is a minor release from 0.7.2 and fixes many minor bugs and adds a number +of nice new features. There are also a couple of API changes to note; these +should not affect very many users, and we are inclined to call them "bug fixes" +even though they do constitute a change in behavior. See the :ref:`full release +notes ` or issue +tracker on GitHub for a complete list. + +New features +~~~~~~~~~~~~ + +- New :ref:`fixed width file reader `, ``read_fwf`` +- New :ref:`scatter_matrix ` function for making + a scatter plot matrix + +.. code-block:: python + + from pandas.tools.plotting import scatter_matrix + scatter_matrix(df, alpha=0.2) # noqa F821 + + +- Add ``stacked`` argument to Series and DataFrame's ``plot`` method for + :ref:`stacked bar plots `. + +.. code-block:: python + + df.plot(kind='bar', stacked=True) # noqa F821 + + +.. code-block:: python + + df.plot(kind='barh', stacked=True) # noqa F821 + + +- Add log x and y :ref:`scaling options ` to + ``DataFrame.plot`` and ``Series.plot`` +- Add ``kurt`` methods to Series and DataFrame for computing kurtosis + + +NA Boolean comparison API change +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Reverted some changes to how NA values (represented typically as ``NaN`` or +``None``) are handled in non-numeric Series: + +.. ipython:: python + + series = pd.Series(['Steve', np.nan, 'Joe']) + series == 'Steve' + series != 'Steve' + +In comparisons, NA / NaN will always come through as ``False`` except with +``!=`` which is ``True``. *Be very careful* with boolean arithmetic, especially +negation, in the presence of NA data. You may wish to add an explicit NA +filter into boolean array operations if you are worried about this: + +.. ipython:: python + + mask = series == 'Steve' + series[mask & series.notnull()] + +While propagating NA in comparisons may seem like the right behavior to some +users (and you could argue on purely technical grounds that this is the right +thing to do), the evaluation was made that propagating NA everywhere, including +in numerical arrays, would cause a large amount of problems for users. Thus, a +"practicality beats purity" approach was taken. This issue may be revisited at +some point in the future. + +Other API changes +~~~~~~~~~~~~~~~~~ + +When calling ``apply`` on a grouped Series, the return value will also be a +Series, to be more consistent with the ``groupby`` behavior with DataFrame: + +.. ipython:: python + :okwarning: + + df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), 'D': np.random.randn(8)}) + df + grouped = df.groupby('A')['C'] + grouped.describe() + grouped.apply(lambda x: x.sort_values()[-2:]) # top 2 values + + +.. _whatsnew_0.7.3.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.7.2..v0.7.3 diff --git a/doc/source/whatsnew/v0.8.0.rst b/doc/source/whatsnew/v0.8.0.rst new file mode 100644 index 00000000..072d1bae --- /dev/null +++ b/doc/source/whatsnew/v0.8.0.rst @@ -0,0 +1,281 @@ +.. _whatsnew_080: + +v0.8.0 (June 29, 2012) +------------------------ + +{{ header }} + + +This is a major release from 0.7.3 and includes extensive work on the time +series handling and processing infrastructure as well as a great deal of new +functionality throughout the library. It includes over 700 commits from more +than 20 distinct authors. Most pandas 0.7.3 and earlier users should not +experience any issues upgrading, but due to the migration to the NumPy +datetime64 dtype, there may be a number of bugs and incompatibilities +lurking. Lingering incompatibilities will be fixed ASAP in a 0.8.1 release if +necessary. See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + +Support for non-unique indexes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +All objects can now work with non-unique indexes. Data alignment / join +operations work according to SQL join semantics (including, if application, +index duplication in many-to-many joins) + +NumPy datetime64 dtype and 1.6 dependency +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Time series data are now represented using NumPy's datetime64 dtype; thus, +pandas 0.8.0 now requires at least NumPy 1.6. It has been tested and verified +to work with the development version (1.7+) of NumPy as well which includes +some significant user-facing API changes. NumPy 1.6 also has a number of bugs +having to do with nanosecond resolution data, so I recommend that you steer +clear of NumPy 1.6's datetime64 API functions (though limited as they are) and +only interact with this data using the interface that pandas provides. + +See the end of the 0.8.0 section for a "porting" guide listing potential issues +for users migrating legacy code bases from pandas 0.7 or earlier to 0.8.0. + +Bug fixes to the 0.7.x series for legacy NumPy < 1.6 users will be provided as +they arise. There will be no more further development in 0.7.x beyond bug +fixes. + +Time series changes and improvements +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + With this release, legacy scikits.timeseries users should be able to port + their code to use pandas. + +.. note:: + + See :ref:`documentation ` for overview of pandas timeseries API. + +- New datetime64 representation **speeds up join operations and data + alignment**, **reduces memory usage**, and improve serialization / + deserialization performance significantly over datetime.datetime +- High performance and flexible **resample** method for converting from + high-to-low and low-to-high frequency. Supports interpolation, user-defined + aggregation functions, and control over how the intervals and result labeling + are defined. A suite of high performance Cython/C-based resampling functions + (including Open-High-Low-Close) have also been implemented. +- Revamp of :ref:`frequency aliases ` and support for + **frequency shortcuts** like '15min', or '1h30min' +- New :ref:`DatetimeIndex class ` supports both fixed + frequency and irregular time + series. Replaces now deprecated DateRange class +- New ``PeriodIndex`` and ``Period`` classes for representing + :ref:`time spans ` and performing **calendar logic**, + including the `12 fiscal quarterly frequencies `. + This is a partial port of, and a substantial enhancement to, + elements of the scikits.timeseries code base. Support for conversion between + PeriodIndex and DatetimeIndex +- New Timestamp data type subclasses `datetime.datetime`, providing the same + interface while enabling working with nanosecond-resolution data. Also + provides :ref:`easy time zone conversions `. +- Enhanced support for :ref:`time zones `. Add + `tz_convert` and ``tz_localize`` methods to TimeSeries and DataFrame. All + timestamps are stored as UTC; Timestamps from DatetimeIndex objects with time + zone set will be localized to local time. Time zone conversions are therefore + essentially free. User needs to know very little about pytz library now; only + time zone names as as strings are required. Time zone-aware timestamps are + equal if and only if their UTC timestamps match. Operations between time + zone-aware time series with different time zones will result in a UTC-indexed + time series. +- Time series **string indexing conveniences** / shortcuts: slice years, year + and month, and index values with strings +- Enhanced time series **plotting**; adaptation of scikits.timeseries + matplotlib-based plotting code +- New ``date_range``, ``bdate_range``, and ``period_range`` :ref:`factory + functions ` +- Robust **frequency inference** function `infer_freq` and ``inferred_freq`` + property of DatetimeIndex, with option to infer frequency on construction of + DatetimeIndex +- to_datetime function efficiently **parses array of strings** to + DatetimeIndex. DatetimeIndex will parse array or list of strings to + datetime64 +- **Optimized** support for datetime64-dtype data in Series and DataFrame + columns +- New NaT (Not-a-Time) type to represent **NA** in timestamp arrays +- Optimize Series.asof for looking up **"as of" values** for arrays of + timestamps +- Milli, Micro, Nano date offset objects +- Can index time series with datetime.time objects to select all data at + particular **time of day** (``TimeSeries.at_time``) or **between two times** + (``TimeSeries.between_time``) +- Add :ref:`tshift ` method for leading/lagging + using the frequency (if any) of the index, as opposed to a naive lead/lag + using shift + +Other new features +~~~~~~~~~~~~~~~~~~ + +- New :ref:`cut ` and ``qcut`` functions (like R's cut + function) for computing a categorical variable from a continuous variable by + binning values either into value-based (``cut``) or quantile-based (``qcut``) + bins +- Rename ``Factor`` to ``Categorical`` and add a number of usability features +- Add :ref:`limit ` argument to fillna/reindex +- More flexible multiple function application in GroupBy, and can pass list + (name, function) tuples to get result in particular order with given names +- Add flexible :ref:`replace ` method for efficiently + substituting values +- Enhanced :ref:`read_csv/read_table ` for reading time series + data and converting multiple columns to dates +- Add :ref:`comments ` option to parser functions: read_csv, etc. +- Add :ref:`dayfirst ` option to parser functions for parsing + international DD/MM/YYYY dates +- Allow the user to specify the CSV reader :ref:`dialect ` to + control quoting etc. +- Handling :ref:`thousands ` separators in read_csv to improve + integer parsing. +- Enable unstacking of multiple levels in one shot. Alleviate ``pivot_table`` + bugs (empty columns being introduced) +- Move to klib-based hash tables for indexing; better performance and less + memory usage than Python's dict +- Add first, last, min, max, and prod optimized GroupBy functions +- New :ref:`ordered_merge ` function +- Add flexible :ref:`comparison ` instance methods eq, ne, lt, + gt, etc. to DataFrame, Series +- Improve :ref:`scatter_matrix ` plotting + function and add histogram or kernel density estimates to diagonal +- Add :ref:`'kde' ` plot option for density plots +- Support for converting DataFrame to R data.frame through rpy2 +- Improved support for complex numbers in Series and DataFrame +- Add :ref:`pct_change ` method to all data structures +- Add max_colwidth configuration option for DataFrame console output +- :ref:`Interpolate ` Series values using index values +- Can select multiple columns from GroupBy +- Add :ref:`update ` methods to Series/DataFrame + for updating values in place +- Add ``any`` and ``all`` method to DataFrame + +New plotting methods +~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + + import pandas as pd + fx = pd.read_pickle('data/fx_prices') + import matplotlib.pyplot as plt + +``Series.plot`` now supports a ``secondary_y`` option: + +.. code-block:: python + + plt.figure() + + fx['FR'].plot(style='g') + + fx['IT'].plot(style='k--', secondary_y=True) + +Vytautas Jancauskas, the 2012 GSOC participant, has added many new plot +types. For example, ``'kde'`` is a new option: + +.. ipython:: python + + s = pd.Series(np.concatenate((np.random.randn(1000), + np.random.randn(1000) * 0.5 + 3))) + plt.figure() + s.hist(density=True, alpha=0.2) + s.plot(kind='kde') + +See :ref:`the plotting page ` for much more. + +Other API changes +~~~~~~~~~~~~~~~~~ + +- Deprecation of ``offset``, ``time_rule``, and ``timeRule`` arguments names in + time series functions. Warnings will be printed until pandas 0.9 or 1.0. + +Potential porting issues for pandas <= 0.7.3 users +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The major change that may affect you in pandas 0.8.0 is that time series +indexes use NumPy's ``datetime64`` data type instead of ``dtype=object`` arrays +of Python's built-in ``datetime.datetime`` objects. ``DateRange`` has been +replaced by ``DatetimeIndex`` but otherwise behaved identically. But, if you +have code that converts ``DateRange`` or ``Index`` objects that used to contain +``datetime.datetime`` values to plain NumPy arrays, you may have bugs lurking +with code using scalar values because you are handing control over to NumPy: + +.. ipython:: python + + import datetime + rng = pd.date_range('1/1/2000', periods=10) + rng[5] + isinstance(rng[5], datetime.datetime) + rng_asarray = np.asarray(rng) + scalar_val = rng_asarray[5] + type(scalar_val) + +pandas's ``Timestamp`` object is a subclass of ``datetime.datetime`` that has +nanosecond support (the ``nanosecond`` field store the nanosecond value between +0 and 999). It should substitute directly into any code that used +``datetime.datetime`` values before. Thus, I recommend not casting +``DatetimeIndex`` to regular NumPy arrays. + +If you have code that requires an array of ``datetime.datetime`` objects, you +have a couple of options. First, the ``astype(object)`` method of ``DatetimeIndex`` +produces an array of ``Timestamp`` objects: + +.. ipython:: python + + stamp_array = rng.astype(object) + stamp_array + stamp_array[5] + +To get an array of proper ``datetime.datetime`` objects, use the +``to_pydatetime`` method: + +.. ipython:: python + + dt_array = rng.to_pydatetime() + dt_array + dt_array[5] + +matplotlib knows how to handle ``datetime.datetime`` but not Timestamp +objects. While I recommend that you plot time series using ``TimeSeries.plot``, +you can either use ``to_pydatetime`` or register a converter for the Timestamp +type. See `matplotlib documentation +`__ for more on this. + +.. warning:: + + There are bugs in the user-facing API with the nanosecond datetime64 unit + in NumPy 1.6. In particular, the string version of the array shows garbage + values, and conversion to ``dtype=object`` is similarly broken. + + .. ipython:: python + + rng = pd.date_range('1/1/2000', periods=10) + rng + np.asarray(rng) + converted = np.asarray(rng, dtype=object) + converted[5] + + **Trust me: don't panic**. If you are using NumPy 1.6 and restrict your + interaction with ``datetime64`` values to pandas's API you will be just + fine. There is nothing wrong with the data-type (a 64-bit integer + internally); all of the important data processing happens in pandas and is + heavily tested. I strongly recommend that you **do not work directly with + datetime64 arrays in NumPy 1.6** and only use the pandas API. + + +**Support for non-unique indexes**: In the latter case, you may have code +inside a ``try:... catch:`` block that failed due to the index not being +unique. In many cases it will no longer fail (some method like ``append`` still +check for uniqueness unless disabled). However, all is not lost: you can +inspect ``index.is_unique`` and raise an exception explicitly if it is +``False`` or go to a different code branch. + + +.. _whatsnew_0.8.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.7.3..v0.8.0 diff --git a/doc/source/whatsnew/v0.8.1.rst b/doc/source/whatsnew/v0.8.1.rst new file mode 100644 index 00000000..1e6b9746 --- /dev/null +++ b/doc/source/whatsnew/v0.8.1.rst @@ -0,0 +1,47 @@ +.. _whatsnew_0801: + +v0.8.1 (July 22, 2012) +---------------------- + +{{ header }} + + +This release includes a few new features, performance enhancements, and over 30 +bug fixes from 0.8.0. New features include notably NA friendly string +processing functionality and a series of new plot types and options. + +New features +~~~~~~~~~~~~ + + - Add :ref:`vectorized string processing methods ` + accessible via Series.str (:issue:`620`) + - Add option to disable adjustment in EWMA (:issue:`1584`) + - :ref:`Radviz plot ` (:issue:`1566`) + - :ref:`Parallel coordinates plot ` + - :ref:`Bootstrap plot ` + - Per column styles and secondary y-axis plotting (:issue:`1559`) + - New datetime converters millisecond plotting (:issue:`1599`) + - Add option to disable "sparse" display of hierarchical indexes (:issue:`1538`) + - Series/DataFrame's ``set_index`` method can :ref:`append levels + ` to an existing Index/MultiIndex (:issue:`1569`, :issue:`1577`) + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + + - Improved implementation of rolling min and max (thanks to `Bottleneck + `__ !) + - Add accelerated ``'median'`` GroupBy option (:issue:`1358`) + - Significantly improve the performance of parsing ISO8601-format date + strings with ``DatetimeIndex`` or ``to_datetime`` (:issue:`1571`) + - Improve the performance of GroupBy on single-key aggregations and use with + Categorical types + - Significant datetime parsing performance improvements + + + +.. _whatsnew_0.8.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.8.0..v0.8.1 diff --git a/doc/source/whatsnew/v0.9.0.rst b/doc/source/whatsnew/v0.9.0.rst new file mode 100644 index 00000000..3d9ff3c7 --- /dev/null +++ b/doc/source/whatsnew/v0.9.0.rst @@ -0,0 +1,107 @@ +.. _whatsnew_0900: + +{{ header }} + + +v0.9.0 (October 7, 2012) +------------------------ + +This is a major release from 0.8.1 and includes several new features and +enhancements along with a large number of bug fixes. New features include +vectorized unicode encoding/decoding for `Series.str`, `to_latex` method to +DataFrame, more flexible parsing of boolean values, and enabling the download of +options data from Yahoo! Finance. + +New features +~~~~~~~~~~~~ + + - Add ``encode`` and ``decode`` for unicode handling to :ref:`vectorized + string processing methods ` in Series.str (:issue:`1706`) + - Add ``DataFrame.to_latex`` method (:issue:`1735`) + - Add convenient expanding window equivalents of all rolling_* ops (:issue:`1785`) + - Add Options class to pandas.io.data for fetching options data from Yahoo! + Finance (:issue:`1748`, :issue:`1739`) + - More flexible parsing of boolean values (Yes, No, TRUE, FALSE, etc) + (:issue:`1691`, :issue:`1295`) + - Add ``level`` parameter to ``Series.reset_index`` + - ``TimeSeries.between_time`` can now select times across midnight (:issue:`1871`) + - Series constructor can now handle generator as input (:issue:`1679`) + - ``DataFrame.dropna`` can now take multiple axes (tuple/list) as input + (:issue:`924`) + - Enable ``skip_footer`` parameter in ``ExcelFile.parse`` (:issue:`1843`) + +API changes +~~~~~~~~~~~ + + - The default column names when ``header=None`` and no columns names passed to + functions like ``read_csv`` has changed to be more Pythonic and amenable to + attribute access: + +.. ipython:: python + + import io + + data = ('0,0,1\n' + '1,1,0\n' + '0,1,0') + df = pd.read_csv(io.StringIO(data), header=None) + df + + +- Creating a Series from another Series, passing an index, will cause reindexing + to happen inside rather than treating the Series like an ndarray. Technically + improper usages like ``Series(df[col1], index=df[col2])`` that worked before + "by accident" (this was never intended) will lead to all NA Series in some + cases. To be perfectly clear: + +.. ipython:: python + + s1 = pd.Series([1, 2, 3]) + s1 + + s2 = pd.Series(s1, index=['foo', 'bar', 'baz']) + s2 + +- Deprecated ``day_of_year`` API removed from PeriodIndex, use ``dayofyear`` + (:issue:`1723`) + +- Don't modify NumPy suppress printoption to True at import time + +- The internal HDF5 data arrangement for DataFrames has been transposed. Legacy + files will still be readable by HDFStore (:issue:`1834`, :issue:`1824`) + +- Legacy cruft removed: pandas.stats.misc.quantileTS + +- Use ISO8601 format for Period repr: monthly, daily, and on down (:issue:`1776`) + +- Empty DataFrame columns are now created as object dtype. This will prevent a + class of TypeErrors that was occurring in code where the dtype of a column + would depend on the presence of data or not (e.g. a SQL query having results) + (:issue:`1783`) + +- Setting parts of DataFrame/Panel using ix now aligns input Series/DataFrame + (:issue:`1630`) + +- ``first`` and ``last`` methods in ``GroupBy`` no longer drop non-numeric + columns (:issue:`1809`) + +- Resolved inconsistencies in specifying custom NA values in text parser. + ``na_values`` of type dict no longer override default NAs unless + ``keep_default_na`` is set to false explicitly (:issue:`1657`) + +- ``DataFrame.dot`` will not do data alignment, and also work with Series + (:issue:`1915`) + + +See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + + + +.. _whatsnew_0.9.0.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.8.1..v0.9.0 diff --git a/doc/source/whatsnew/v0.9.1.rst b/doc/source/whatsnew/v0.9.1.rst new file mode 100644 index 00000000..b8932ae2 --- /dev/null +++ b/doc/source/whatsnew/v0.9.1.rst @@ -0,0 +1,170 @@ +.. _whatsnew_0901: + +v0.9.1 (November 14, 2012) +-------------------------- + +{{ header }} + + +This is a bug fix release from 0.9.0 and includes several new features and +enhancements along with a large number of bug fixes. The new features include +by-column sort order for DataFrame and Series, improved NA handling for the rank +method, masking functions for DataFrame, and intraday time-series filtering for +DataFrame. + +New features +~~~~~~~~~~~~ + + - `Series.sort`, `DataFrame.sort`, and `DataFrame.sort_index` can now be + specified in a per-column manner to support multiple sort orders (:issue:`928`) + + .. code-block:: ipython + + In [2]: df = pd.DataFrame(np.random.randint(0, 2, (6, 3)), + ...: columns=['A', 'B', 'C']) + + In [3]: df.sort(['A', 'B'], ascending=[1, 0]) + + Out[3]: + A B C + 3 0 1 1 + 4 0 1 1 + 2 0 0 1 + 0 1 0 0 + 1 1 0 0 + 5 1 0 0 + + - `DataFrame.rank` now supports additional argument values for the + `na_option` parameter so missing values can be assigned either the largest + or the smallest rank (:issue:`1508`, :issue:`2159`) + + .. ipython:: python + + df = pd.DataFrame(np.random.randn(6, 3), columns=['A', 'B', 'C']) + + df.loc[2:4] = np.nan + + df.rank() + + df.rank(na_option='top') + + df.rank(na_option='bottom') + + + - DataFrame has new `where` and `mask` methods to select values according to a + given boolean mask (:issue:`2109`, :issue:`2151`) + + DataFrame currently supports slicing via a boolean vector the same length as the DataFrame (inside the `[]`). + The returned DataFrame has the same number of columns as the original, but is sliced on its index. + + .. ipython:: python + + df = DataFrame(np.random.randn(5, 3), columns = ['A','B','C']) + + df + + df[df['A'] > 0] + + If a DataFrame is sliced with a DataFrame based boolean condition (with the same size as the original DataFrame), + then a DataFrame the same size (index and columns) as the original is returned, with + elements that do not meet the boolean condition as `NaN`. This is accomplished via + the new method `DataFrame.where`. In addition, `where` takes an optional `other` argument for replacement. + + .. ipython:: python + + df[df>0] + + df.where(df>0) + + df.where(df>0,-df) + + Furthermore, `where` now aligns the input boolean condition (ndarray or DataFrame), such that partial selection + with setting is possible. This is analogous to partial setting via `.ix` (but on the contents rather than the axis labels) + + .. ipython:: python + + df2 = df.copy() + df2[ df2[1:4] > 0 ] = 3 + df2 + + `DataFrame.mask` is the inverse boolean operation of `where`. + + .. ipython:: python + + df.mask(df<=0) + + - Enable referencing of Excel columns by their column names (:issue:`1936`) + + .. ipython:: python + + xl = pd.ExcelFile('data/test.xls') + xl.parse('Sheet1', index_col=0, parse_dates=True, + parse_cols='A:D') + + + - Added option to disable pandas-style tick locators and formatters + using `series.plot(x_compat=True)` or `pandas.plot_params['x_compat'] = + True` (:issue:`2205`) + - Existing TimeSeries methods `at_time` and `between_time` were added to + DataFrame (:issue:`2149`) + - DataFrame.dot can now accept ndarrays (:issue:`2042`) + - DataFrame.drop now supports non-unique indexes (:issue:`2101`) + - Panel.shift now supports negative periods (:issue:`2164`) + - DataFrame now support unary ~ operator (:issue:`2110`) + +API changes +~~~~~~~~~~~ + + - Upsampling data with a PeriodIndex will result in a higher frequency + TimeSeries that spans the original time window + + .. code-block:: ipython + + In [1]: prng = pd.period_range('2012Q1', periods=2, freq='Q') + + In [2]: s = pd.Series(np.random.randn(len(prng)), prng) + + In [4]: s.resample('M') + Out[4]: + 2012-01 -1.471992 + 2012-02 NaN + 2012-03 NaN + 2012-04 -0.493593 + 2012-05 NaN + 2012-06 NaN + Freq: M, dtype: float64 + + - Period.end_time now returns the last nanosecond in the time interval + (:issue:`2124`, :issue:`2125`, :issue:`1764`) + + .. ipython:: python + + p = pd.Period('2012') + + p.end_time + + + - File parsers no longer coerce to float or bool for columns that have custom + converters specified (:issue:`2184`) + + .. ipython:: python + + import io + + data = ('A,B,C\n' + '00001,001,5\n' + '00002,002,6') + pd.read_csv(io.StringIO(data), converters={'A': lambda x: x.strip()}) + + +See the :ref:`full release notes +` or issue tracker +on GitHub for a complete list. + + +.. _whatsnew_0.9.1.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.9.0..v0.9.1 diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst new file mode 100755 index 00000000..b0b88c8b --- /dev/null +++ b/doc/source/whatsnew/v1.0.0.rst @@ -0,0 +1,1294 @@ +.. _whatsnew_100: + +What's new in 1.0.0 (January 29, 2020) +-------------------------------------- + +These are the changes in pandas 1.0.0. See :ref:`release` for a full changelog +including other versions of pandas. + +.. note:: + + The pandas 1.0 release removed a lot of functionality that was deprecated + in previous releases (see :ref:`below ` + for an overview). It is recommended to first upgrade to pandas 0.25 and to + ensure your code is working without warnings, before upgrading to pandas + 1.0. + + +New Deprecation Policy +~~~~~~~~~~~~~~~~~~~~~~ + +Starting with Pandas 1.0.0, pandas will adopt a variant of `SemVer`_ to +version releases. Briefly, + +* Deprecations will be introduced in minor releases (e.g. 1.1.0, 1.2.0, 2.1.0, ...) +* Deprecations will be enforced in major releases (e.g. 1.0.0, 2.0.0, 3.0.0, ...) +* API-breaking changes will be made only in major releases (except for experimental features) + +See :ref:`policies.version` for more. + +.. _2019 Pandas User Survey: http://dev.pandas.io/pandas-blog/2019-pandas-user-survey.html +.. _SemVer: https://semver.org + +{{ header }} + +.. --------------------------------------------------------------------------- + +Enhancements +~~~~~~~~~~~~ + +.. _whatsnew_100.numba_rolling_apply: + +Using Numba in ``rolling.apply`` and ``expanding.apply`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added an ``engine`` keyword to :meth:`~core.window.rolling.Rolling.apply` and :meth:`~core.window.expanding.Expanding.apply` +that allows the user to execute the routine using `Numba `__ instead of Cython. +Using the Numba engine can yield significant performance gains if the apply function can operate on numpy arrays and +the data set is larger (1 million rows or greater). For more details, see +:ref:`rolling apply documentation ` (:issue:`28987`, :issue:`30936`) + +.. _whatsnew_100.custom_window: + +Defining custom windows for rolling operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added a :func:`pandas.api.indexers.BaseIndexer` class that allows users to define how +window bounds are created during ``rolling`` operations. Users can define their own ``get_window_bounds`` +method on a :func:`pandas.api.indexers.BaseIndexer` subclass that will generate the start and end +indices used for each window during the rolling aggregation. For more details and example usage, see +the :ref:`custom window rolling documentation ` + +.. _whatsnew_100.to_markdown: + +Converting to Markdown +^^^^^^^^^^^^^^^^^^^^^^ + +We've added :meth:`~DataFrame.to_markdown` for creating a markdown table (:issue:`11052`) + +.. ipython:: python + + df = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}, index=['a', 'a', 'b']) + print(df.to_markdown()) + +Experimental new features +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_100.NA: + +Experimental ``NA`` scalar to denote missing values +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A new ``pd.NA`` value (singleton) is introduced to represent scalar missing +values. Up to now, pandas used several values to represent missing data: ``np.nan`` is used for this for float data, ``np.nan`` or +``None`` for object-dtype data and ``pd.NaT`` for datetime-like data. The +goal of ``pd.NA`` is to provide a "missing" indicator that can be used +consistently across data types. ``pd.NA`` is currently used by the nullable integer and boolean +data types and the new string data type (:issue:`28095`). + +.. warning:: + + Experimental: the behaviour of ``pd.NA`` can still change without warning. + +For example, creating a Series using the nullable integer dtype: + +.. ipython:: python + + s = pd.Series([1, 2, None], dtype="Int64") + s + s[2] + +Compared to ``np.nan``, ``pd.NA`` behaves differently in certain operations. +In addition to arithmetic operations, ``pd.NA`` also propagates as "missing" +or "unknown" in comparison operations: + +.. ipython:: python + + np.nan > 1 + pd.NA > 1 + +For logical operations, ``pd.NA`` follows the rules of the +`three-valued logic `__ (or +*Kleene logic*). For example: + +.. ipython:: python + + pd.NA | True + +For more, see :ref:`NA section ` in the user guide on missing +data. + + +.. _whatsnew_100.string: + +Dedicated string data type +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added :class:`StringDtype`, an extension type dedicated to string data. +Previously, strings were typically stored in object-dtype NumPy arrays. (:issue:`29975`) + +.. warning:: + + ``StringDtype`` is currently considered experimental. The implementation + and parts of the API may change without warning. + +The ``'string'`` extension type solves several issues with object-dtype NumPy arrays: + +1. You can accidentally store a *mixture* of strings and non-strings in an + ``object`` dtype array. A ``StringArray`` can only store strings. +2. ``object`` dtype breaks dtype-specific operations like :meth:`DataFrame.select_dtypes`. + There isn't a clear way to select *just* text while excluding non-text, + but still object-dtype columns. +3. When reading code, the contents of an ``object`` dtype array is less clear + than ``string``. + + +.. ipython:: python + + pd.Series(['abc', None, 'def'], dtype=pd.StringDtype()) + +You can use the alias ``"string"`` as well. + +.. ipython:: python + + s = pd.Series(['abc', None, 'def'], dtype="string") + s + +The usual string accessor methods work. Where appropriate, the return type +of the Series or columns of a DataFrame will also have string dtype. + +.. ipython:: python + + s.str.upper() + s.str.split('b', expand=True).dtypes + +String accessor methods returning integers will return a value with :class:`Int64Dtype` + +.. ipython:: python + + s.str.count("a") + +We recommend explicitly using the ``string`` data type when working with strings. +See :ref:`text.types` for more. + +.. _whatsnew_100.boolean: + +Boolean data type with missing values support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We've added :class:`BooleanDtype` / :class:`~arrays.BooleanArray`, an extension +type dedicated to boolean data that can hold missing values. The default +``bool`` data type based on a bool-dtype NumPy array, the column can only hold +``True`` or ``False``, and not missing values. This new :class:`~arrays.BooleanArray` +can store missing values as well by keeping track of this in a separate mask. +(:issue:`29555`, :issue:`30095`, :issue:`31131`) + +.. ipython:: python + + pd.Series([True, False, None], dtype=pd.BooleanDtype()) + +You can use the alias ``"boolean"`` as well. + +.. ipython:: python + + s = pd.Series([True, False, None], dtype="boolean") + s + +.. _whatsnew_100.convert_dtypes: + +``convert_dtypes`` method to ease use of supported extension dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In order to encourage use of the extension dtypes ``StringDtype``, +``BooleanDtype``, ``Int64Dtype``, ``Int32Dtype``, etc., that support ``pd.NA``, the +methods :meth:`DataFrame.convert_dtypes` and :meth:`Series.convert_dtypes` +have been introduced. (:issue:`29752`) (:issue:`30929`) + +Example: + +.. ipython:: python + + df = pd.DataFrame({'x': ['abc', None, 'def'], + 'y': [1, 2, np.nan], + 'z': [True, False, True]}) + df + df.dtypes + +.. ipython:: python + + converted = df.convert_dtypes() + converted + converted.dtypes + +This is especially useful after reading in data using readers such as :func:`read_csv` +and :func:`read_excel`. +See :ref:`here ` for a description. + + +.. _whatsnew_100.enhancements.other: + +Other enhancements +~~~~~~~~~~~~~~~~~~ + +- :meth:`DataFrame.to_string` added the ``max_colwidth`` parameter to control when wide columns are truncated (:issue:`9784`) +- Added the ``na_value`` argument to :meth:`Series.to_numpy`, :meth:`Index.to_numpy` and :meth:`DataFrame.to_numpy` to control the value used for missing data (:issue:`30322`) +- :meth:`MultiIndex.from_product` infers level names from inputs if not explicitly provided (:issue:`27292`) +- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`) +- DataFrames with :ref:`nullable integer `, the :ref:`new string dtype ` + and period data type can now be converted to ``pyarrow`` (>=0.15.0), which means that it is + supported in writing to the Parquet file format when using the ``pyarrow`` engine (:issue:`28368`). + Full roundtrip to parquet (writing and reading back in with :meth:`~DataFrame.to_parquet` / :func:`read_parquet`) + is supported starting with pyarrow >= 0.16 (:issue:`20612`). +- :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue:`30270`) +- :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) +- :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`) +- Implemented :meth:`pandas.core.window.Window.var` and :meth:`pandas.core.window.Window.std` functions (:issue:`26597`) +- Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`) +- Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) +- :meth:`Styler.background_gradient` now accepts ``vmin`` and ``vmax`` arguments (:issue:`12145`) +- :meth:`Styler.format` added the ``na_rep`` parameter to help format the missing values (:issue:`21527`, :issue:`28358`) +- :func:`read_excel` now can read binary Excel (``.xlsb``) files by passing ``engine='pyxlsb'``. For more details and example usage, see the :ref:`Binary Excel files documentation `. Closes :issue:`8540`. +- The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) +- :func:`pandas.read_json` now parses ``NaN``, ``Infinity`` and ``-Infinity`` (:issue:`12213`) +- DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) +- :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) +- :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` have gained ``ignore_index`` keyword to reset index (:issue:`30114`) +- :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`) +- Added new writer for exporting Stata dta files in versions 118 and 119, ``StataWriterUTF8``. These files formats support exporting strings containing Unicode characters. Format 119 supports data sets with more than 32,767 variables (:issue:`23573`, :issue:`30959`) +- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`) +- Added an experimental :attr:`~DataFrame.attrs` for storing global metadata about a dataset (:issue:`29062`) +- :meth:`Timestamp.fromisocalendar` is now compatible with python 3.8 and above (:issue:`28115`) +- :meth:`DataFrame.to_pickle` and :func:`read_pickle` now accept URL (:issue:`30163`) + + + +.. --------------------------------------------------------------------------- + +.. _whatsnew_100.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_100.api_breaking.MultiIndex._names: + +Avoid using names from ``MultiIndex.levels`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As part of a larger refactor to :class:`MultiIndex` the level names are now +stored separately from the levels (:issue:`27242`). We recommend using +:attr:`MultiIndex.names` to access the names, and :meth:`Index.set_names` +to update the names. + +For backwards compatibility, you can still *access* the names via the levels. + +.. ipython:: python + + mi = pd.MultiIndex.from_product([[1, 2], ['a', 'b']], names=['x', 'y']) + mi.levels[0].name + +However, it is no longer possible to *update* the names of the ``MultiIndex`` +via the level. + +.. ipython:: python + :okexcept: + + mi.levels[0].name = "new name" + mi.names + +To update, use ``MultiIndex.set_names``, which returns a new ``MultiIndex``. + +.. ipython:: python + + mi2 = mi.set_names("new name", level=0) + mi2.names + +New repr for :class:`~pandas.arrays.IntervalArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`pandas.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`) + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)]) + Out[2]: + IntervalArray([(0, 1], (2, 3]], + closed='right', + dtype='interval[int64]') + +*pandas 1.0.0* + +.. ipython:: python + + pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)]) + +``DataFrame.rename`` now only accepts one positional argument +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.rename` would previously accept positional arguments that would lead +to ambiguous or undefined behavior. From pandas 1.0, only the very first argument, which +maps labels to their new names along the default axis, is allowed to be passed by position +(:issue:`29136`). + +.. ipython:: python + :suppress: + + df = pd.DataFrame([[1]]) + +*pandas 0.25.x* + +.. code-block:: python + + >>> df = pd.DataFrame([[1]]) + >>> df.rename({0: 1}, {0: 2}) + FutureWarning: ...Use named arguments to resolve ambiguity... + 2 + 1 1 + +*pandas 1.0.0* + +.. code-block:: python + + >>> df.rename({0: 1}, {0: 2}) + Traceback (most recent call last): + ... + TypeError: rename() takes from 1 to 2 positional arguments but 3 were given + +Note that errors will now be raised when conflicting or potentially ambiguous arguments are provided. + +*pandas 0.25.x* + +.. code-block:: python + + >>> df.rename({0: 1}, index={0: 2}) + 0 + 1 1 + + >>> df.rename(mapper={0: 1}, index={0: 2}) + 0 + 2 1 + +*pandas 1.0.0* + +.. code-block:: python + + >>> df.rename({0: 1}, index={0: 2}) + Traceback (most recent call last): + ... + TypeError: Cannot specify both 'mapper' and any of 'index' or 'columns' + + >>> df.rename(mapper={0: 1}, index={0: 2}) + Traceback (most recent call last): + ... + TypeError: Cannot specify both 'mapper' and any of 'index' or 'columns' + +You can still change the axis along which the first positional argument is applied by +supplying the ``axis`` keyword argument. + +.. ipython:: python + + df.rename({0: 1}) + df.rename({0: 1}, axis=1) + +If you would like to update both the index and column labels, be sure to use the respective +keywords. + +.. ipython:: python + + df.rename(index={0: 1}, columns={0: 2}) + +Extended verbose info output for :class:`~pandas.DataFrame` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`DataFrame.info` now shows line numbers for the columns summary (:issue:`17304`) + +*pandas 0.25.x* + +.. code-block:: python + + >>> df = pd.DataFrame({"int_col": [1, 2, 3], + ... "text_col": ["a", "b", "c"], + ... "float_col": [0.0, 0.1, 0.2]}) + >>> df.info(verbose=True) + + RangeIndex: 3 entries, 0 to 2 + Data columns (total 3 columns): + int_col 3 non-null int64 + text_col 3 non-null object + float_col 3 non-null float64 + dtypes: float64(1), int64(1), object(1) + memory usage: 152.0+ bytes + +*pandas 1.0.0* + +.. ipython:: python + + df = pd.DataFrame({"int_col": [1, 2, 3], + "text_col": ["a", "b", "c"], + "float_col": [0.0, 0.1, 0.2]}) + df.info(verbose=True) + +:meth:`pandas.array` inference changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:meth:`pandas.array` now infers pandas' new extension types in several cases (:issue:`29791`): + +1. String data (including missing values) now returns a :class:`arrays.StringArray`. +2. Integer data (including missing values) now returns a :class:`arrays.IntegerArray`. +3. Boolean data (including missing values) now returns the new :class:`arrays.BooleanArray` + +*pandas 0.25.x* + +.. code-block:: python + + >>> pd.array(["a", None]) + + ['a', None] + Length: 2, dtype: object + + >>> pd.array([1, None]) + + [1, None] + Length: 2, dtype: object + + +*pandas 1.0.0* + +.. ipython:: python + + pd.array(["a", None]) + pd.array([1, None]) + +As a reminder, you can specify the ``dtype`` to disable all inference. + +:class:`arrays.IntegerArray` now uses :attr:`pandas.NA` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:class:`arrays.IntegerArray` now uses :attr:`pandas.NA` rather than +:attr:`numpy.nan` as its missing value marker (:issue:`29964`). + +*pandas 0.25.x* + +.. code-block:: python + + >>> a = pd.array([1, 2, None], dtype="Int64") + >>> a + + [1, 2, NaN] + Length: 3, dtype: Int64 + + >>> a[2] + nan + +*pandas 1.0.0* + +.. ipython:: python + + a = pd.array([1, 2, None], dtype="Int64") + a + a[2] + +This has a few API-breaking consequences. + +**Converting to a NumPy ndarray** + +When converting to a NumPy array missing values will be ``pd.NA``, which cannot +be converted to a float. So calling ``np.asarray(integer_array, dtype="float")`` +will now raise. + +*pandas 0.25.x* + +.. code-block:: python + + >>> np.asarray(a, dtype="float") + array([ 1., 2., nan]) + +*pandas 1.0.0* + +.. ipython:: python + :okexcept: + + np.asarray(a, dtype="float") + +Use :meth:`arrays.IntegerArray.to_numpy` with an explicit ``na_value`` instead. + +.. ipython:: python + + a.to_numpy(dtype="float", na_value=np.nan) + +**Reductions can return ``pd.NA``** + +When performing a reduction such as a sum with ``skipna=False``, the result +will now be ``pd.NA`` instead of ``np.nan`` in presence of missing values +(:issue:`30958`). + +*pandas 0.25.x* + +.. code-block:: python + + >>> pd.Series(a).sum(skipna=False) + nan + +*pandas 1.0.0* + +.. ipython:: python + + pd.Series(a).sum(skipna=False) + +**value_counts returns a nullable integer dtype** + +:meth:`Series.value_counts` with a nullable integer dtype now returns a nullable +integer dtype for the values. + +*pandas 0.25.x* + +.. code-block:: python + + >>> pd.Series([2, 1, 1, None], dtype="Int64").value_counts().dtype + dtype('int64') + +*pandas 1.0.0* + +.. ipython:: python + + pd.Series([2, 1, 1, None], dtype="Int64").value_counts().dtype + +See :ref:`missing_data.NA` for more on the differences between :attr:`pandas.NA` +and :attr:`numpy.nan`. + +:class:`arrays.IntegerArray` comparisons return :class:`arrays.BooleanArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Comparison operations on a :class:`arrays.IntegerArray` now returns a +:class:`arrays.BooleanArray` rather than a NumPy array (:issue:`29964`). + +*pandas 0.25.x* + +.. code-block:: python + + >>> a = pd.array([1, 2, None], dtype="Int64") + >>> a + + [1, 2, NaN] + Length: 3, dtype: Int64 + + >>> a > 1 + array([False, True, False]) + +*pandas 1.0.0* + +.. ipython:: python + + a = pd.array([1, 2, None], dtype="Int64") + a > 1 + +Note that missing values now propagate, rather than always comparing unequal +like :attr:`numpy.nan`. See :ref:`missing_data.NA` for more. + +By default :meth:`Categorical.min` now returns the minimum instead of np.nan +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When :class:`Categorical` contains ``np.nan``, +:meth:`Categorical.min` no longer return ``np.nan`` by default (skipna=True) (:issue:`25303`) + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: pd.Categorical([1, 2, np.nan], ordered=True).min() + Out[1]: nan + + +*pandas 1.0.0* + +.. ipython:: python + + pd.Categorical([1, 2, np.nan], ordered=True).min() + + +Default dtype of empty :class:`pandas.Series` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Initialising an empty :class:`pandas.Series` without specifying a dtype will raise a `DeprecationWarning` now +(:issue:`17261`). The default dtype will change from ``float64`` to ``object`` in future releases so that it is +consistent with the behaviour of :class:`DataFrame` and :class:`Index`. + +*pandas 1.0.0* + +.. code-block:: ipython + + In [1]: pd.Series() + Out[2]: + DeprecationWarning: The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning. + Series([], dtype: float64) + +Result dtype inference changes for resample operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The rules for the result dtype in :meth:`DataFrame.resample` aggregations have changed for extension types (:issue:`31359`). +Previously, pandas would attempt to convert the result back to the original dtype, falling back to the usual +inference rules if that was not possible. Now, pandas will only return a result of the original dtype if the +scalar values in the result are instances of the extension dtype's scalar type. + +.. ipython:: python + + df = pd.DataFrame({"A": ['a', 'b']}, dtype='category', + index=pd.date_range('2000', periods=2)) + df + + +*pandas 0.25.x* + +.. code-block:: python + + >>> df.resample("2D").agg(lambda x: 'a').A.dtype + CategoricalDtype(categories=['a', 'b'], ordered=False) + +*pandas 1.0.0* + +.. ipython:: python + + df.resample("2D").agg(lambda x: 'a').A.dtype + +This fixes an inconsistency between ``resample`` and ``groupby``. +This also fixes a potential bug, where the **values** of the result might change +depending on how the results are cast back to the original dtype. + +*pandas 0.25.x* + +.. code-block:: python + + >>> df.resample("2D").agg(lambda x: 'c') + + A + 0 NaN + +*pandas 1.0.0* + +.. ipython:: python + + df.resample("2D").agg(lambda x: 'c') + + +.. _whatsnew_100.api_breaking.python: + +Increased minimum version for Python +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas 1.0.0 supports Python 3.6.1 and higher (:issue:`29212`). + +.. _whatsnew_100.api_breaking.deps: + +Increased minimum versions for dependencies +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some minimum supported versions of dependencies were updated (:issue:`29766`, :issue:`29723`). +If installed, we now require: + ++-----------------+-----------------+----------+---------+ +| Package | Minimum Version | Required | Changed | ++=================+=================+==========+=========+ +| numpy | 1.13.3 | X | | ++-----------------+-----------------+----------+---------+ +| pytz | 2015.4 | X | | ++-----------------+-----------------+----------+---------+ +| python-dateutil | 2.6.1 | X | | ++-----------------+-----------------+----------+---------+ +| bottleneck | 1.2.1 | | | ++-----------------+-----------------+----------+---------+ +| numexpr | 2.6.2 | | | ++-----------------+-----------------+----------+---------+ +| pytest (dev) | 4.0.2 | | | ++-----------------+-----------------+----------+---------+ + +For `optional libraries `_ the general recommendation is to use the latest version. +The following table lists the lowest version per library that is currently being tested throughout the development of pandas. +Optional libraries below the lowest tested version may still work, but are not considered supported. + ++-----------------+-----------------+---------+ +| Package | Minimum Version | Changed | ++=================+=================+=========+ +| beautifulsoup4 | 4.6.0 | | ++-----------------+-----------------+---------+ +| fastparquet | 0.3.2 | X | ++-----------------+-----------------+---------+ +| gcsfs | 0.2.2 | | ++-----------------+-----------------+---------+ +| lxml | 3.8.0 | | ++-----------------+-----------------+---------+ +| matplotlib | 2.2.2 | | ++-----------------+-----------------+---------+ +| numba | 0.46.0 | X | ++-----------------+-----------------+---------+ +| openpyxl | 2.5.7 | X | ++-----------------+-----------------+---------+ +| pyarrow | 0.13.0 | X | ++-----------------+-----------------+---------+ +| pymysql | 0.7.1 | | ++-----------------+-----------------+---------+ +| pytables | 3.4.2 | | ++-----------------+-----------------+---------+ +| s3fs | 0.3.0 | X | ++-----------------+-----------------+---------+ +| scipy | 0.19.0 | | ++-----------------+-----------------+---------+ +| sqlalchemy | 1.1.4 | | ++-----------------+-----------------+---------+ +| xarray | 0.8.2 | | ++-----------------+-----------------+---------+ +| xlrd | 1.1.0 | | ++-----------------+-----------------+---------+ +| xlsxwriter | 0.9.8 | | ++-----------------+-----------------+---------+ +| xlwt | 1.2.0 | | ++-----------------+-----------------+---------+ + +See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +Build Changes +^^^^^^^^^^^^^ + +Pandas has added a `pyproject.toml `_ file and will no longer include +cythonized files in the source distribution uploaded to PyPI (:issue:`28341`, :issue:`20775`). If you're installing +a built distribution (wheel) or via conda, this shouldn't have any effect on you. If you're building pandas from +source, you should no longer need to install Cython into your build environment before calling ``pip install pandas``. + + +.. _whatsnew_100.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- :class:`core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`) +- :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`) +- :meth:`MultiIndex.from_arrays` will no longer infer names from arrays if ``names=None`` is explicitly provided (:issue:`27292`) +- In order to improve tab-completion, Pandas does not include most deprecated attributes when introspecting a pandas object using ``dir`` (e.g. ``dir(df)``). + To see which attributes are excluded, see an object's ``_deprecations`` attribute, for example ``pd.DataFrame._deprecations`` (:issue:`28805`). +- The returned dtype of :func:`unique` now matches the input dtype. (:issue:`27874`) +- Changed the default configuration value for ``options.matplotlib.register_converters`` from ``True`` to ``"auto"`` (:issue:`18720`). + Now, pandas custom formatters will only be applied to plots created by pandas, through :meth:`~DataFrame.plot`. + Previously, pandas' formatters would be applied to all plots created *after* a :meth:`~DataFrame.plot`. + See :ref:`units registration ` for more. +- :meth:`Series.dropna` has dropped its ``**kwargs`` argument in favor of a single ``how`` parameter. + Supplying anything else than ``how`` to ``**kwargs`` raised a ``TypeError`` previously (:issue:`29388`) +- When testing pandas, the new minimum required version of pytest is 5.0.1 (:issue:`29664`) +- :meth:`Series.str.__iter__` was deprecated and will be removed in future releases (:issue:`28277`). +- Added ```` to the list of default NA values for :meth:`read_csv` (:issue:`30821`) + + +.. _whatsnew_100.api.documentation: + +Documentation Improvements +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Added new section on :ref:`scale` (:issue:`28315`). +- Added sub-section on :ref:`io.query_multi` for HDF5 datasets (:issue:`28791`). + +.. --------------------------------------------------------------------------- + +.. _whatsnew_100.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- :meth:`Series.item` and :meth:`Index.item` have been _undeprecated_ (:issue:`29250`) +- ``Index.set_value`` has been deprecated. For a given index ``idx``, array ``arr``, + value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)`` + is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`). +- :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`) +- :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`) +- :meth:`DateOffset.isAnchored` and :meth:`DatetOffset.onOffset` are deprecated and will be removed in a future version, use :meth:`DateOffset.is_anchored` and :meth:`DateOffset.is_on_offset` instead (:issue:`30340`) +- ``pandas.tseries.frequencies.get_offset`` is deprecated and will be removed in a future version, use ``pandas.tseries.frequencies.to_offset`` instead (:issue:`4205`) +- :meth:`Categorical.take_nd` and :meth:`CategoricalIndex.take_nd` are deprecated, use :meth:`Categorical.take` and :meth:`CategoricalIndex.take` instead (:issue:`27745`) +- The parameter ``numeric_only`` of :meth:`Categorical.min` and :meth:`Categorical.max` is deprecated and replaced with ``skipna`` (:issue:`25303`) +- The parameter ``label`` in :func:`lreshape` has been deprecated and will be removed in a future version (:issue:`29742`) +- ``pandas.core.index`` has been deprecated and will be removed in a future version, the public classes are available in the top-level namespace (:issue:`19711`) +- :func:`pandas.json_normalize` is now exposed in the top-level namespace. + Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and + it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). +- The ``numpy`` argument of :meth:`pandas.read_json` is deprecated (:issue:`28512`). +- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) +- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`) +- The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`). +- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`) +- The parameter ``is_copy`` of :meth:`Series.take` and :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`) +- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`) +- The ``pandas.np`` submodule is now deprecated. Import numpy directly instead (:issue:`30296`) +- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30610`) +- :class:`~DataFrame.diff` will raise a ``TypeError`` rather than implicitly losing the dtype of extension types in the future. Convert to the correct dtype before calling ``diff`` instead (:issue:`31025`) + +**Selecting Columns from a Grouped DataFrame** + +When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated, +a list of items should be used instead. (:issue:`23566`) For example: + +.. code-block:: ipython + + df = pd.DataFrame({ + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": np.random.randn(8), + "C": np.random.randn(8), + }) + g = df.groupby('A') + + # single key, returns SeriesGroupBy + g['B'] + + # tuple of single key, returns SeriesGroupBy + g[('B',)] + + # tuple of multiple keys, returns DataFrameGroupBy, raises FutureWarning + g[('B', 'C')] + + # multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning + # (implicitly converts the passed strings into a single tuple) + g['B', 'C'] + + # proper way, returns DataFrameGroupBy + g[['B', 'C']] + +.. --------------------------------------------------------------------------- + +.. _whatsnew_100.prior_deprecations: + +Removal of prior version deprecations/changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Removed SparseSeries and SparseDataFrame** + +``SparseSeries``, ``SparseDataFrame`` and the ``DataFrame.to_sparse`` method +have been removed (:issue:`28425`). We recommend using a ``Series`` or +``DataFrame`` with sparse values instead. See :ref:`sparse.migration` for help +with migrating existing code. + +.. _whatsnew_100.matplotlib_units: + +**Matplotlib unit registration** + +Previously, pandas would register converters with matplotlib as a side effect of importing pandas (:issue:`18720`). +This changed the output of plots made via matplotlib plots after pandas was imported, even if you were using +matplotlib directly rather than :meth:`~DataFrame.plot`. + +To use pandas formatters with a matplotlib plot, specify + +.. code-block:: python + + >>> import pandas as pd + >>> pd.options.plotting.matplotlib.register_converters = True + +Note that plots created by :meth:`DataFrame.plot` and :meth:`Series.plot` *do* register the converters +automatically. The only behavior change is when plotting a date-like object via ``matplotlib.pyplot.plot`` +or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. + +**Other removals** + +- Removed the previously deprecated keyword "index" from :func:`read_stata`, :class:`StataReader`, and :meth:`StataReader.read`, use "index_col" instead (:issue:`17328`) +- Removed ``StataReader.data`` method, use :meth:`StataReader.read` instead (:issue:`9493`) +- Removed ``pandas.plotting._matplotlib.tsplot``, use :meth:`Series.plot` instead (:issue:`19980`) +- ``pandas.tseries.converter.register`` has been moved to :func:`pandas.plotting.register_matplotlib_converters` (:issue:`18307`) +- :meth:`Series.plot` no longer accepts positional arguments, pass keyword arguments instead (:issue:`30003`) +- :meth:`DataFrame.hist` and :meth:`Series.hist` no longer allows ``figsize="default"``, specify figure size by passinig a tuple instead (:issue:`30003`) +- Floordiv of integer-dtyped array by :class:`Timedelta` now raises ``TypeError`` (:issue:`21036`) +- :class:`TimedeltaIndex` and :class:`DatetimeIndex` no longer accept non-nanosecond dtype strings like "timedelta64" or "datetime64", use "timedelta64[ns]" and "datetime64[ns]" instead (:issue:`24806`) +- Changed the default "skipna" argument in :func:`pandas.api.types.infer_dtype` from ``False`` to ``True`` (:issue:`24050`) +- Removed ``Series.ix`` and ``DataFrame.ix`` (:issue:`26438`) +- Removed ``Index.summary`` (:issue:`18217`) +- Removed the previously deprecated keyword "fastpath" from the :class:`Index` constructor (:issue:`23110`) +- Removed ``Series.get_value``, ``Series.set_value``, ``DataFrame.get_value``, ``DataFrame.set_value`` (:issue:`17739`) +- Removed ``Series.compound`` and ``DataFrame.compound`` (:issue:`26405`) +- Changed the default "inplace" argument in :meth:`DataFrame.set_index` and :meth:`Series.set_axis` from ``None`` to ``False`` (:issue:`27600`) +- Removed ``Series.cat.categorical``, ``Series.cat.index``, ``Series.cat.name`` (:issue:`24751`) +- Removed the previously deprecated keyword "box" from :func:`to_datetime` and :func:`to_timedelta`; in addition these now always returns :class:`DatetimeIndex`, :class:`TimedeltaIndex`, :class:`Index`, :class:`Series`, or :class:`DataFrame` (:issue:`24486`) +- :func:`to_timedelta`, :class:`Timedelta`, and :class:`TimedeltaIndex` no longer allow "M", "y", or "Y" for the "unit" argument (:issue:`23264`) +- Removed the previously deprecated keyword "time_rule" from (non-public) ``offsets.generate_range``, which has been moved to :func:`core.arrays._ranges.generate_range` (:issue:`24157`) +- :meth:`DataFrame.loc` or :meth:`Series.loc` with listlike indexers and missing labels will no longer reindex (:issue:`17295`) +- :meth:`DataFrame.to_excel` and :meth:`Series.to_excel` with non-existent columns will no longer reindex (:issue:`17295`) +- Removed the previously deprecated keyword "join_axes" from :func:`concat`; use ``reindex_like`` on the result instead (:issue:`22318`) +- Removed the previously deprecated keyword "by" from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`) +- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`core.groupby.DataFrameGroupBy.aggregate`, :meth:`core.groupby.SeriesGroupBy.aggregate`, :meth:`core.window.rolling.Rolling.aggregate` (:issue:`18529`) +- Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`) +- Passing ``int64`` values to :class:`DatetimeIndex` and a timezone now interprets the values as nanosecond timestamps in UTC, not wall times in the given timezone (:issue:`24559`) +- A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`) +- Removed ``Index.contains``, use ``key in index`` instead (:issue:`30103`) +- Addition and subtraction of ``int`` or integer-arrays is no longer allowed in :class:`Timestamp`, :class:`DatetimeIndex`, :class:`TimedeltaIndex`, use ``obj + n * obj.freq`` instead of ``obj + n`` (:issue:`22535`) +- Removed ``Series.ptp`` (:issue:`21614`) +- Removed ``Series.from_array`` (:issue:`18258`) +- Removed ``DataFrame.from_items`` (:issue:`18458`) +- Removed ``DataFrame.as_matrix``, ``Series.as_matrix`` (:issue:`18458`) +- Removed ``Series.asobject`` (:issue:`18477`) +- Removed ``DataFrame.as_blocks``, ``Series.as_blocks``, ``DataFrame.blocks``, ``Series.blocks`` (:issue:`17656`) +- :meth:`pandas.Series.str.cat` now defaults to aligning ``others``, using ``join='left'`` (:issue:`27611`) +- :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`) +- :meth:`Series.where` with ``Categorical`` dtype (or :meth:`DataFrame.where` with ``Categorical`` column) no longer allows setting new categories (:issue:`24114`) +- Removed the previously deprecated keywords "start", "end", and "periods" from the :class:`DatetimeIndex`, :class:`TimedeltaIndex`, and :class:`PeriodIndex` constructors; use :func:`date_range`, :func:`timedelta_range`, and :func:`period_range` instead (:issue:`23919`) +- Removed the previously deprecated keyword "verify_integrity" from the :class:`DatetimeIndex` and :class:`TimedeltaIndex` constructors (:issue:`23919`) +- Removed the previously deprecated keyword "fastpath" from ``pandas.core.internals.blocks.make_block`` (:issue:`19265`) +- Removed the previously deprecated keyword "dtype" from :meth:`Block.make_block_same_class` (:issue:`19434`) +- Removed ``ExtensionArray._formatting_values``. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) +- Removed ``MultiIndex.to_hierarchical`` (:issue:`21613`) +- Removed ``MultiIndex.labels``, use :attr:`MultiIndex.codes` instead (:issue:`23752`) +- Removed the previously deprecated keyword "labels" from the :class:`MultiIndex` constructor, use "codes" instead (:issue:`23752`) +- Removed ``MultiIndex.set_labels``, use :meth:`MultiIndex.set_codes` instead (:issue:`23752`) +- Removed the previously deprecated keyword "labels" from :meth:`MultiIndex.set_codes`, :meth:`MultiIndex.copy`, :meth:`MultiIndex.drop`, use "codes" instead (:issue:`23752`) +- Removed support for legacy HDF5 formats (:issue:`29787`) +- Passing a dtype alias (e.g. 'datetime64[ns, UTC]') to :class:`DatetimeTZDtype` is no longer allowed, use :meth:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`) +- Removed the previously deprecated keyword "skip_footer" from :func:`read_excel`; use "skipfooter" instead (:issue:`18836`) +- :func:`read_excel` no longer allows an integer value for the parameter ``usecols``, instead pass a list of integers from 0 to ``usecols`` inclusive (:issue:`23635`) +- Removed the previously deprecated keyword "convert_datetime64" from :meth:`DataFrame.to_records` (:issue:`18902`) +- Removed ``IntervalIndex.from_intervals`` in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) +- Changed the default "keep_tz" argument in :meth:`DatetimeIndex.to_series` from ``None`` to ``True`` (:issue:`23739`) +- Removed ``api.types.is_period`` and ``api.types.is_datetimetz`` (:issue:`23917`) +- Ability to read pickles containing :class:`Categorical` instances created with pre-0.16 version of pandas has been removed (:issue:`27538`) +- Removed ``pandas.tseries.plotting.tsplot`` (:issue:`18627`) +- Removed the previously deprecated keywords "reduce" and "broadcast" from :meth:`DataFrame.apply` (:issue:`18577`) +- Removed the previously deprecated ``assert_raises_regex`` function in ``pandas._testing`` (:issue:`29174`) +- Removed the previously deprecated ``FrozenNDArray`` class in ``pandas.core.indexes.frozen`` (:issue:`29335`) +- Removed the previously deprecated keyword "nthreads" from :func:`read_feather`, use "use_threads" instead (:issue:`23053`) +- Removed ``Index.is_lexsorted_for_tuple`` (:issue:`29305`) +- Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`core.groupby.DataFrameGroupBy.aggregate`, :meth:`core.groupby.SeriesGroupBy.aggregate`, :meth:`core.window.rolling.Rolling.aggregate` (:issue:`29608`) +- Removed ``Series.valid``; use :meth:`Series.dropna` instead (:issue:`18800`) +- Removed ``DataFrame.is_copy``, ``Series.is_copy`` (:issue:`18812`) +- Removed ``DataFrame.get_ftype_counts``, ``Series.get_ftype_counts`` (:issue:`18243`) +- Removed ``DataFrame.ftypes``, ``Series.ftypes``, ``Series.ftype`` (:issue:`26744`) +- Removed ``Index.get_duplicates``, use ``idx[idx.duplicated()].unique()`` instead (:issue:`20239`) +- Removed ``Series.clip_upper``, ``Series.clip_lower``, ``DataFrame.clip_upper``, ``DataFrame.clip_lower`` (:issue:`24203`) +- Removed the ability to alter :attr:`DatetimeIndex.freq`, :attr:`TimedeltaIndex.freq`, or :attr:`PeriodIndex.freq` (:issue:`20772`) +- Removed ``DatetimeIndex.offset`` (:issue:`20730`) +- Removed ``DatetimeIndex.asobject``, ``TimedeltaIndex.asobject``, ``PeriodIndex.asobject``, use ``astype(object)`` instead (:issue:`29801`) +- Removed the previously deprecated keyword "order" from :func:`factorize` (:issue:`19751`) +- Removed the previously deprecated keyword "encoding" from :func:`read_stata` and :meth:`DataFrame.to_stata` (:issue:`21400`) +- Changed the default "sort" argument in :func:`concat` from ``None`` to ``False`` (:issue:`20613`) +- Removed the previously deprecated keyword "raise_conflict" from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`) +- Removed the previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) +- Removed the previously deprecated keywords "how", "fill_method", and "limit" from :meth:`DataFrame.resample` (:issue:`30139`) +- Passing an integer to :meth:`Series.fillna` or :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype now raises ``TypeError`` (:issue:`24694`) +- Passing multiple axes to :meth:`DataFrame.dropna` is no longer supported (:issue:`20995`) +- Removed ``Series.nonzero``, use ``to_numpy().nonzero()`` instead (:issue:`24048`) +- Passing floating dtype ``codes`` to :meth:`Categorical.from_codes` is no longer supported, pass ``codes.astype(np.int64)`` instead (:issue:`21775`) +- Removed the previously deprecated keyword "pat" from :meth:`Series.str.partition` and :meth:`Series.str.rpartition`, use "sep" instead (:issue:`23767`) +- Removed ``Series.put`` (:issue:`27106`) +- Removed ``Series.real``, ``Series.imag`` (:issue:`27106`) +- Removed ``Series.to_dense``, ``DataFrame.to_dense`` (:issue:`26684`) +- Removed ``Index.dtype_str``, use ``str(index.dtype)`` instead (:issue:`27106`) +- :meth:`Categorical.ravel` returns a :class:`Categorical` instead of a ``ndarray`` (:issue:`27199`) +- The 'outer' method on Numpy ufuncs, e.g. ``np.subtract.outer`` operating on :class:`Series` objects is no longer supported, and will raise ``NotImplementedError`` (:issue:`27198`) +- Removed ``Series.get_dtype_counts`` and ``DataFrame.get_dtype_counts`` (:issue:`27145`) +- Changed the default "fill_value" argument in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) +- Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` from ``None`` to ``False`` (:issue:`20584`) +- Removed deprecated behavior of :meth:`Series.argmin` and :meth:`Series.argmax`, use :meth:`Series.idxmin` and :meth:`Series.idxmax` for the old behavior (:issue:`16955`) +- Passing a tz-aware ``datetime.datetime`` or :class:`Timestamp` into the :class:`Timestamp` constructor with the ``tz`` argument now raises a ``ValueError`` (:issue:`23621`) +- Removed ``Series.base``, ``Index.base``, ``Categorical.base``, ``Series.flags``, ``Index.flags``, ``PeriodArray.flags``, ``Series.strides``, ``Index.strides``, ``Series.itemsize``, ``Index.itemsize``, ``Series.data``, ``Index.data`` (:issue:`20721`) +- Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`) +- Removed ``Timestamp.weekday_name``, ``DatetimeIndex.weekday_name``, and ``Series.dt.weekday_name`` (:issue:`18164`) +- Removed the previously deprecated keyword "errors" in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`) +- Changed the default "ordered" argument in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`) +- :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` now require "labels" as the first argument and "axis" as an optional named parameter (:issue:`30089`) +- Removed ``to_msgpack``, ``read_msgpack``, ``DataFrame.to_msgpack``, ``Series.to_msgpack`` (:issue:`27103`) +- Removed ``Series.compress`` (:issue:`21930`) +- Removed the previously deprecated keyword "fill_value" from :meth:`Categorical.fillna`, use "value" instead (:issue:`19269`) +- Removed the previously deprecated keyword "data" from :func:`andrews_curves`, use "frame" instead (:issue:`6956`) +- Removed the previously deprecated keyword "data" from :func:`parallel_coordinates`, use "frame" instead (:issue:`6956`) +- Removed the previously deprecated keyword "colors" from :func:`parallel_coordinates`, use "color" instead (:issue:`6956`) +- Removed the previously deprecated keywords "verbose" and "private_key" from :func:`read_gbq` (:issue:`30200`) +- Calling ``np.array`` and ``np.asarray`` on tz-aware :class:`Series` and :class:`DatetimeIndex` will now return an object array of tz-aware :class:`Timestamp` (:issue:`24596`) +- + +.. --------------------------------------------------------------------------- + +.. _whatsnew_100.performance: + +Performance improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + +- Performance improvement in :class:`DataFrame` arithmetic and comparison operations with scalars (:issue:`24990`, :issue:`29853`) +- Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`) +- Performance improvement in :attr:`MultiIndex.is_monotonic` (:issue:`27495`) +- Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`) +- Performance improvement when initializing a :class:`DataFrame` using a ``range`` (:issue:`30171`) +- Performance improvement in :meth:`DataFrame.corr` when ``method`` is ``"spearman"`` (:issue:`28139`) +- Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`) +- Performance improvement in :meth:`DataFrame.select_dtypes` by using vectorization instead of iterating over a loop (:issue:`28317`) +- Performance improvement in :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` (:issue:`28795`) +- Performance improvement when comparing a :class:`Categorical` with a scalar and the scalar is not found in the categories (:issue:`29750`) +- Performance improvement when checking if values in a :class:`Categorical` are equal, equal or larger or larger than a given scalar. + The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`) +- Performance improvement in :meth:`Index.equals` and :meth:`MultiIndex.equals` (:issue:`29134`) +- Performance improvement in :func:`~pandas.api.types.infer_dtype` when ``skipna`` is ``True`` (:issue:`28814`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_100.bug_fixes: + +Bug fixes +~~~~~~~~~ + + +Categorical +^^^^^^^^^^^ + +- Added test to assert the :func:`fillna` raises the correct ``ValueError`` message when the value isn't a value from categories (:issue:`13628`) +- Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`) +- :meth:`DataFrame.reindex` with a :class:`CategoricalIndex` would fail when the targets contained duplicates, and wouldn't fail if the source contained duplicates (:issue:`28107`) +- Bug in :meth:`Categorical.astype` not allowing for casting to extension dtypes (:issue:`28668`) +- Bug where :func:`merge` was unable to join on categorical and extension dtype columns (:issue:`28668`) +- :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`) +- Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`) +- Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`) +- Using date accessors on a categorical dtyped :class:`Series` of datetimes was not returning an object of the + same type as if one used the :meth:`.str.` / :meth:`.dt.` on a :class:`Series` of that type. E.g. when accessing :meth:`Series.dt.tz_localize` on a + :class:`Categorical` with duplicate entries, the accessor was skipping duplicates (:issue:`27952`) +- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` that would give incorrect results on categorical data (:issue:`26988`) +- Bug where calling :meth:`Categorical.min` or :meth:`Categorical.max` on an empty Categorical would raise a numpy exception (:issue:`30227`) +- The following methods now also correctly output values for unobserved categories when called through ``groupby(..., observed=False)`` (:issue:`17605`) + * :meth:`core.groupby.SeriesGroupBy.count` + * :meth:`core.groupby.SeriesGroupBy.size` + * :meth:`core.groupby.SeriesGroupBy.nunique` + * :meth:`core.groupby.SeriesGroupBy.nth` + + +Datetimelike +^^^^^^^^^^^^ +- Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`) +- Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`) +- Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`) +- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` with errors="coerce" could incorrectly lead to raising ``ValueError`` (:issue:`28299`) +- Bug in :meth:`core.groupby.SeriesGroupBy.nunique` where ``NaT`` values were interfering with the count of unique values (:issue:`27951`) +- Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`) +- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`) +- Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`) +- Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`) +- Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) +- Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`) +- Bug in :func:`core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`) +- Bug in :func:`pandas._config.localization.get_locales` where the ``locales -a`` encodes the locales list as windows-1252 (:issue:`23638`, :issue:`24760`, :issue:`27368`) +- Bug in :meth:`Series.var` failing to raise ``TypeError`` when called with ``timedelta64[ns]`` dtype (:issue:`28289`) +- Bug in :meth:`DatetimeIndex.strftime` and :meth:`Series.dt.strftime` where ``NaT`` was converted to the string ``'NaT'`` instead of ``np.nan`` (:issue:`29578`) +- Bug in masking datetime-like arrays with a boolean mask of an incorrect length not raising an ``IndexError`` (:issue:`30308`) +- Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`) +- Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`) +- Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) +- Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) +- Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) +- Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`) +- Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`) +- Bug in :meth:`Series.cummin` and :meth:`Series.cummax` with timezone-aware dtype incorrectly dropping its timezone (:issue:`15553`) +- Bug in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` where inplace addition and subtraction did not actually operate inplace (:issue:`24115`) +- Bug in :func:`pandas.to_datetime` when called with ``Series`` storing ``IntegerArray`` raising ``TypeError`` instead of returning ``Series`` (:issue:`30050`) +- Bug in :func:`date_range` with custom business hours as ``freq`` and given number of ``periods`` (:issue:`30593`) +- Bug in :class:`PeriodIndex` comparisons with incorrectly casting integers to :class:`Period` objects, inconsistent with the :class:`Period` comparison behavior (:issue:`30722`) +- Bug in :meth:`DatetimeIndex.insert` raising a ``ValueError`` instead of a ``TypeError`` when trying to insert a timezone-aware :class:`Timestamp` into a timezone-naive :class:`DatetimeIndex`, or vice-versa (:issue:`30806`) + +Timedelta +^^^^^^^^^ +- Bug in subtracting a :class:`TimedeltaIndex` or :class:`TimedeltaArray` from a ``np.datetime64`` object (:issue:`29558`) +- +- + +Timezones +^^^^^^^^^ + +- +- + + +Numeric +^^^^^^^ +- Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`) +- :class:`DataFrame` flex inequality comparisons methods (:meth:`DataFrame.lt`, :meth:`DataFrame.le`, :meth:`DataFrame.gt`, :meth:`DataFrame.ge`) with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) +- Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) +- Bug in :meth:`DataFrame.interpolate` where specifying axis by name references variable before it is assigned (:issue:`29142`) +- Bug in :meth:`Series.var` not computing the right value with a nullable integer dtype series not passing through ddof argument (:issue:`29128`) +- Improved error message when using `frac` > 1 and `replace` = False (:issue:`27451`) +- Bug in numeric indexes resulted in it being possible to instantiate an :class:`Int64Index`, :class:`UInt64Index`, or :class:`Float64Index` with an invalid dtype (e.g. datetime-like) (:issue:`29539`) +- Bug in :class:`UInt64Index` precision loss while constructing from a list with values in the ``np.uint64`` range (:issue:`29526`) +- Bug in :class:`NumericIndex` construction that caused indexing to fail when integers in the ``np.uint64`` range were used (:issue:`28023`) +- Bug in :class:`NumericIndex` construction that caused :class:`UInt64Index` to be casted to :class:`Float64Index` when integers in the ``np.uint64`` range were used to index a :class:`DataFrame` (:issue:`28279`) +- Bug in :meth:`Series.interpolate` when using method=`index` with an unsorted index, would previously return incorrect results. (:issue:`21037`) +- Bug in :meth:`DataFrame.round` where a :class:`DataFrame` with a :class:`CategoricalIndex` of :class:`IntervalIndex` columns would incorrectly raise a ``TypeError`` (:issue:`30063`) +- Bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` when there are duplicated indices (:issue:`30463`) +- Bug in :class:`DataFrame` cumulative operations (e.g. cumsum, cummax) incorrect casting to object-dtype (:issue:`19296`) +- Bug in :class:`~DataFrame.diff` losing the dtype for extension types (:issue:`30889`) +- Bug in :class:`DataFrame.diff` raising an ``IndexError`` when one of the columns was a nullable integer dtype (:issue:`30967`) + +Conversion +^^^^^^^^^^ + +- +- + +Strings +^^^^^^^ + +- Calling :meth:`Series.str.isalnum` (and other "ismethods") on an empty ``Series`` would return an ``object`` dtype instead of ``bool`` (:issue:`29624`) +- + + +Interval +^^^^^^^^ + +- Bug in :meth:`IntervalIndex.get_indexer` where a :class:`Categorical` or :class:`CategoricalIndex` ``target`` would incorrectly raise a ``TypeError`` (:issue:`30063`) +- Bug in ``pandas.core.dtypes.cast.infer_dtype_from_scalar`` where passing ``pandas_dtype=True`` did not infer :class:`IntervalDtype` (:issue:`30337`) +- Bug in :class:`Series` constructor where constructing a ``Series`` from a ``list`` of :class:`Interval` objects resulted in ``object`` dtype instead of :class:`IntervalDtype` (:issue:`23563`) +- Bug in :class:`IntervalDtype` where the ``kind`` attribute was incorrectly set as ``None`` instead of ``"O"`` (:issue:`30568`) +- Bug in :class:`IntervalIndex`, :class:`~arrays.IntervalArray`, and :class:`Series` with interval data where equality comparisons were incorrect (:issue:`24112`) + +Indexing +^^^^^^^^ + +- Bug in assignment using a reverse slicer (:issue:`26939`) +- Bug in :meth:`DataFrame.explode` would duplicate frame in the presence of duplicates in the index (:issue:`28010`) +- Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`) +- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`) +- Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`) +- :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`) +- Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`) +- :meth:`Index.get_indexer_non_unique` could fail with ``TypeError`` in some cases, such as when searching for ints in a string index (:issue:`28257`) +- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`) +- Bug in :meth:`DataFrame.loc` with incorrect dtype when setting Categorical value in 1-row DataFrame (:issue:`25495`) +- :meth:`MultiIndex.get_loc` can't find missing values when input includes missing values (:issue:`19132`) +- Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`) +- Bug in indexing with a :class:`PeriodIndex` incorrectly accepting integers representing years, use e.g. ``ser.loc["2007"]`` instead of ``ser.loc[2007]`` (:issue:`30763`) + +Missing +^^^^^^^ + +- +- + +MultiIndex +^^^^^^^^^^ + +- Constructor for :class:`MultiIndex` verifies that the given ``sortorder`` is compatible with the actual ``lexsort_depth`` if ``verify_integrity`` parameter is ``True`` (the default) (:issue:`28735`) +- Series and MultiIndex `.drop` with `MultiIndex` raise exception if labels not in given in level (:issue:`8594`) +- + +I/O +^^^ + +- :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`) +- Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`) +- Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`) +- Bug in :meth:`DataFrame.to_csv` where values were truncated when the length of ``na_rep`` was shorter than the text input data. (:issue:`25099`) +- Bug in :func:`DataFrame.to_string` where values were truncated using display options instead of outputting the full content (:issue:`9784`) +- Bug in :meth:`DataFrame.to_json` where a datetime column label would not be written out in ISO format with ``orient="table"`` (:issue:`28130`) +- Bug in :func:`DataFrame.to_parquet` where writing to GCS would fail with `engine='fastparquet'` if the file did not already exist (:issue:`28326`) +- Bug in :func:`read_hdf` closing stores that it didn't open when Exceptions are raised (:issue:`28699`) +- Bug in :meth:`DataFrame.read_json` where using ``orient="index"`` would not maintain the order (:issue:`28557`) +- Bug in :meth:`DataFrame.to_html` where the length of the ``formatters`` argument was not verified (:issue:`28469`) +- Bug in :meth:`DataFrame.read_excel` with ``engine='ods'`` when ``sheet_name`` argument references a non-existent sheet (:issue:`27676`) +- Bug in :meth:`pandas.io.formats.style.Styler` formatting for floating values not displaying decimals correctly (:issue:`13257`) +- Bug in :meth:`DataFrame.to_html` when using ``formatters=`` and ``max_cols`` together. (:issue:`25955`) +- Bug in :meth:`Styler.background_gradient` not able to work with dtype ``Int64`` (:issue:`28869`) +- Bug in :meth:`DataFrame.to_clipboard` which did not work reliably in ipython (:issue:`22707`) +- Bug in :func:`read_json` where default encoding was not set to ``utf-8`` (:issue:`29565`) +- Bug in :class:`PythonParser` where str and bytes were being mixed when dealing with the decimal field (:issue:`29650`) +- :meth:`read_gbq` now accepts ``progress_bar_type`` to display progress bar while the data downloads. (:issue:`29857`) +- Bug in :func:`pandas.io.json.json_normalize` where a missing value in the location specified by `record_path` would raise a ``TypeError`` (:issue:`30148`) +- :func:`read_excel` now accepts binary data (:issue:`15914`) +- Bug in :meth:`read_csv` in which encoding handling was limited to just the string `utf-16` for the C engine (:issue:`24130`) + +Plotting +^^^^^^^^ + +- Bug in :meth:`Series.plot` not able to plot boolean values (:issue:`23719`) +- Bug in :meth:`DataFrame.plot` not able to plot when no rows (:issue:`27758`) +- Bug in :meth:`DataFrame.plot` producing incorrect legend markers when plotting multiple series on the same axis (:issue:`18222`) +- Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datetime or timedelta data. These types are now automatically dropped (:issue:`22799`) +- Bug in :meth:`DataFrame.plot.line` and :meth:`DataFrame.plot.area` produce wrong xlim in x-axis (:issue:`27686`, :issue:`25160`, :issue:`24784`) +- Bug where :meth:`DataFrame.boxplot` would not accept a ``color`` parameter like :meth:`DataFrame.plot.box` (:issue:`26214`) +- Bug in the ``xticks`` argument being ignored for :meth:`DataFrame.plot.bar` (:issue:`14119`) +- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`) +- :meth:`DataFrame.plot` now allow a ``backend`` keyword argument to allow changing between backends in one session (:issue:`28619`). +- Bug in color validation incorrectly raising for non-color styles (:issue:`29122`). +- Allow :meth:`DataFrame.plot.scatter` to plot ``objects`` and ``datetime`` type data (:issue:`18755`, :issue:`30391`) +- Bug in :meth:`DataFrame.hist`, ``xrot=0`` does not work with ``by`` and subplots (:issue:`30288`). + +Groupby/resample/rolling +^^^^^^^^^^^^^^^^^^^^^^^^ + +- Bug in :meth:`core.groupby.DataFrameGroupBy.apply` only showing output from a single group when function returns an :class:`Index` (:issue:`28652`) +- Bug in :meth:`DataFrame.groupby` with multiple groups where an ``IndexError`` would be raised if any group contained all NA values (:issue:`20519`) +- Bug in :meth:`pandas.core.resample.Resampler.size` and :meth:`pandas.core.resample.Resampler.count` returning wrong dtype when used with an empty :class:`Series` or :class:`DataFrame` (:issue:`28427`) +- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue:`28192`) +- Bug in :meth:`DataFrame.rolling` not allowing rolling over multi-index levels (:issue:`15584`). +- Bug in :meth:`DataFrame.rolling` not allowing rolling on monotonic decreasing time indexes (:issue:`19248`). +- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) +- Bug in :meth:`core.groupby.DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) +- Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`) +- Remove error raised due to duplicated input functions in named aggregation in :meth:`DataFrame.groupby` and :meth:`Series.groupby`. Previously error will be raised if the same function is applied on the same column and now it is allowed if new assigned names are different. (:issue:`28426`) +- :meth:`core.groupby.SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue:`28479`) +- Bug in :meth:`core.window.rolling.Rolling.quantile` ignoring ``interpolation`` keyword argument when used within a groupby (:issue:`28779`) +- Bug in :meth:`DataFrame.groupby` where ``any``, ``all``, ``nunique`` and transform functions would incorrectly handle duplicate column labels (:issue:`21668`) +- Bug in :meth:`core.groupby.DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`) +- Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`) +- Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) +- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) +- Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) +- Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument where ``min_periods`` was ignored (:issue:`26996`) + +Reshaping +^^^^^^^^^ + +- Bug in :meth:`DataFrame.apply` that caused incorrect output with empty :class:`DataFrame` (:issue:`28202`, :issue:`21959`) +- Bug in :meth:`DataFrame.stack` not handling non-unique indexes correctly when creating MultiIndex (:issue:`28301`) +- Bug in :meth:`pivot_table` not returning correct type ``float`` when ``margins=True`` and ``aggfunc='mean'`` (:issue:`24893`) +- Bug :func:`merge_asof` could not use :class:`datetime.timedelta` for ``tolerance`` kwarg (:issue:`28098`) +- Bug in :func:`merge`, did not append suffixes correctly with MultiIndex (:issue:`28518`) +- :func:`qcut` and :func:`cut` now handle boolean input (:issue:`20303`) +- Fix to ensure all int dtypes can be used in :func:`merge_asof` when using a tolerance value. Previously every non-int64 type would raise an erroneous ``MergeError`` (:issue:`28870`). +- Better error message in :func:`get_dummies` when `columns` isn't a list-like value (:issue:`28383`) +- Bug in :meth:`Index.join` that caused infinite recursion error for mismatched ``MultiIndex`` name orders. (:issue:`25760`, :issue:`28956`) +- Bug :meth:`Series.pct_change` where supplying an anchored frequency would throw a ``ValueError`` (:issue:`28664`) +- Bug where :meth:`DataFrame.equals` returned True incorrectly in some cases when two DataFrames had the same columns in different orders (:issue:`28839`) +- Bug in :meth:`DataFrame.replace` that caused non-numeric replacer's dtype not respected (:issue:`26632`) +- Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`) +- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`) +- Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) +- Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`) +- Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`) + +Sparse +^^^^^^ +- Bug in :class:`SparseDataFrame` arithmetic operations incorrectly casting inputs to float (:issue:`28107`) +- Bug in ``DataFrame.sparse`` returning a ``Series`` when there was a column named ``sparse`` rather than the accessor (:issue:`30758`) +- Fixed :meth:`operator.xor` with a boolean-dtype ``SparseArray``. Now returns a sparse result, rather than object dtype (:issue:`31025`) + +ExtensionArray +^^^^^^^^^^^^^^ + +- Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`). +- Bug where nullable integers could not be compared to strings (:issue:`28930`) +- Bug where :class:`DataFrame` constructor raised ``ValueError`` with list-like data and ``dtype`` specified (:issue:`30280`) + +Other +^^^^^ +- Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`) +- Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`) +- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`) +- Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`) +- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`) +- Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`) +- Backtick quoting in :meth:`DataFrame.query` and :meth:`DataFrame.eval` can now also be used to use invalid identifiers like names that start with a digit, are python keywords, or are using single character operators. (:issue:`27017`) +- Bug in ``pd.core.util.hashing.hash_pandas_object`` where arrays containing tuples were incorrectly treated as non-hashable (:issue:`28969`) +- Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`) +- Fix :class:`AbstractHolidayCalendar` to return correct results for + years after 2030 (now goes up to 2200) (:issue:`27790`) +- Fixed :class:`~arrays.IntegerArray` returning ``inf`` rather than ``NaN`` for operations dividing by ``0`` (:issue:`27398`) +- Fixed ``pow`` operations for :class:`~arrays.IntegerArray` when the other value is ``0`` or ``1`` (:issue:`29997`) +- Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`) +- Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`) +- Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`) +- Bug in :meth:`DataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`) +- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`) +- Handle nested NumPy ``object`` arrays in :func:`testing.assert_series_equal` for ExtensionArray implementations (:issue:`30841`) +- Bug in :class:`Index` constructor incorrectly allowing 2-dimensional input arrays (:issue:`13601`, :issue:`27125`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_100.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v0.25.3..v1.0.0 diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst new file mode 100644 index 00000000..ef3bb816 --- /dev/null +++ b/doc/source/whatsnew/v1.0.1.rst @@ -0,0 +1,79 @@ +.. _whatsnew_101: + +What's new in 1.0.1 (February 5, 2020) +-------------------------------------- + +These are the changes in pandas 1.0.1. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_101.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fixed regression in :class:`DataFrame` setting values with a slice (e.g. ``df[-4:] = 1``) indexing by label instead of position (:issue:`31469`) +- Fixed regression when indexing a ``Series`` or ``DataFrame`` indexed by ``DatetimeIndex`` with a slice containg a :class:`datetime.date` (:issue:`31501`) +- Fixed regression in ``DataFrame.__setitem__`` raising an ``AttributeError`` with a :class:`MultiIndex` and a non-monotonic indexer (:issue:`31449`) +- Fixed regression in :class:`Series` multiplication when multiplying a numeric :class:`Series` with >10000 elements with a timedelta-like scalar (:issue:`31457`) +- Fixed regression in ``.groupby().agg()`` raising an ``AssertionError`` for some reductions like ``min`` on object-dtype columns (:issue:`31522`) +- Fixed regression in ``.groupby()`` aggregations with categorical dtype using Cythonized reduction functions (e.g. ``first``) (:issue:`31450`) +- Fixed regression in :meth:`GroupBy.apply` if called with a function which returned a non-pandas non-scalar object (e.g. a list or numpy array) (:issue:`31441`) +- Fixed regression in :meth:`DataFrame.groupby` whereby taking the minimum or maximum of a column with period dtype would raise a ``TypeError``. (:issue:`31471`) +- Fixed regression in :meth:`DataFrame.groupby` with an empty DataFrame grouping by a level of a MultiIndex (:issue:`31670`). +- Fixed regression in :meth:`DataFrame.apply` with object dtype and non-reducing function (:issue:`31505`) +- Fixed regression in :meth:`to_datetime` when parsing non-nanosecond resolution datetimes (:issue:`31491`) +- Fixed regression in :meth:`~DataFrame.to_csv` where specifying an ``na_rep`` might truncate the values written (:issue:`31447`) +- Fixed regression in :class:`Categorical` construction with ``numpy.str_`` categories (:issue:`31499`) +- Fixed regression in :meth:`DataFrame.loc` and :meth:`DataFrame.iloc` when selecting a row containing a single ``datetime64`` or ``timedelta64`` column (:issue:`31649`) +- Fixed regression where setting :attr:`pd.options.display.max_colwidth` was not accepting negative integer. In addition, this behavior has been deprecated in favor of using ``None`` (:issue:`31532`) +- Fixed regression in objTOJSON.c fix return-type warning (:issue:`31463`) +- Fixed regression in :meth:`qcut` when passed a nullable integer. (:issue:`31389`) +- Fixed regression in assigning to a :class:`Series` using a nullable integer dtype (:issue:`31446`) +- Fixed performance regression when indexing a ``DataFrame`` or ``Series`` with a :class:`MultiIndex` for the index using a list of labels (:issue:`31648`) +- Fixed regression in :meth:`read_csv` used in file like object ``RawIOBase`` is not recognize ``encoding`` option (:issue:`31575`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_101.deprecations: + +Deprecations +~~~~~~~~~~~~ + +- Support for negative integer for :attr:`pd.options.display.max_colwidth` is deprecated in favor of using ``None`` (:issue:`31532`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_101.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Datetimelike** + +- Fixed bug in :meth:`to_datetime` raising when ``cache=True`` and out-of-bound values are present (:issue:`31491`) + +**Numeric** + +- Bug in dtypes being lost in ``DataFrame.__invert__`` (``~`` operator) with mixed dtypes (:issue:`31183`) + and for extension-array backed ``Series`` and ``DataFrame`` (:issue:`23087`) + +**Plotting** + +- Plotting tz-aware timeseries no longer gives UserWarning (:issue:`31205`) + +**Interval** + +- Bug in :meth:`Series.shift` with ``interval`` dtype raising a ``TypeError`` when shifting an interval array of integers or datetimes (:issue:`34195`) + +.. --------------------------------------------------------------------------- + +.. _whatsnew_101.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.0.0..v1.0.1|HEAD diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst new file mode 100644 index 00000000..cfa3ee6a --- /dev/null +++ b/doc/source/whatsnew/v1.0.2.rst @@ -0,0 +1,126 @@ +.. _whatsnew_102: + +What's new in 1.0.2 (March 12, 2020) +------------------------------------ + +These are the changes in pandas 1.0.2. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_102.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +**Groupby** + +- Fixed regression in :meth:`groupby(..).agg() ` which was failing on frames with MultiIndex columns and a custom function (:issue:`31777`) +- Fixed regression in ``groupby(..).rolling(..).apply()`` (``RollingGroupby``) where the ``raw`` parameter was ignored (:issue:`31754`) +- Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) +- Fixed regression in :meth:`groupby(..).nunique() ` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) +- Fixed regression in ``DataFrame.groupby`` raising a ``ValueError`` from an internal operation (:issue:`31802`) +- Fixed regression in :meth:`groupby(..).agg() ` calling a user-provided function an extra time on an empty input (:issue:`31760`) + +**I/O** + +- Fixed regression in :meth:`read_csv` in which the ``encoding`` option was not recognized with certain file-like objects (:issue:`31819`) +- Fixed regression in :meth:`DataFrame.to_excel` when the ``columns`` keyword argument is passed (:issue:`31677`) +- Fixed regression in :class:`ExcelFile` where the stream passed into the function was closed by the destructor. (:issue:`31467`) +- Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). + +**Reindexing/alignment** + +- Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`) +- Fixed regression in :meth:`DataFrame.reindex` and :meth:`Series.reindex` when reindexing with (tz-aware) index and ``method=nearest`` (:issue:`26683`) +- Fixed regression in :meth:`DataFrame.reindex_like` on a :class:`DataFrame` subclass raised an ``AssertionError`` (:issue:`31925`) +- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) + +**Other** + +- Fixed regression in joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` to preserve ``freq`` in simple cases (:issue:`32166`) +- Fixed regression in :meth:`Series.shift` with ``datetime64`` dtype when passing an integer ``fill_value`` (:issue:`32591`) +- Fixed regression in the repr of an object-dtype :class:`Index` with bools and missing values (:issue:`32146`) + + +.. --------------------------------------------------------------------------- + +Indexing with Nullable Boolean Arrays +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Previously indexing with a nullable Boolean array containing ``NA`` would raise a ``ValueError``, however this is now permitted with ``NA`` being treated as ``False``. (:issue:`31503`) + +.. ipython:: python + + s = pd.Series([1, 2, 3, 4]) + mask = pd.array([True, True, False, None], dtype="boolean") + s + mask + +*pandas 1.0.0-1.0.1* + +.. code-block:: python + + >>> s[mask] + Traceback (most recent call last): + ... + ValueError: cannot mask with array containing NA / NaN values + +*pandas 1.0.2* + +.. ipython:: python + + s[mask] + +.. _whatsnew_102.bug_fixes: + +Bug fixes +~~~~~~~~~ + +**Datetimelike** + +- Bug in :meth:`Series.astype` not copying for tz-naive and tz-aware datetime64 dtype (:issue:`32490`) +- Bug where :func:`to_datetime` would raise when passed ``pd.NA`` (:issue:`32213`) +- Improved error message when subtracting two :class:`Timestamp` that result in an out-of-bounds :class:`Timedelta` (:issue:`31774`) + +**Categorical** + +- Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`) +- Fixed bug where :meth:`Categorical` constructor would raise a ``TypeError`` when given a numpy array containing ``pd.NA``. (:issue:`31927`) +- Bug in :class:`Categorical` that would ignore or crash when calling :meth:`Series.replace` with a list-like ``to_replace`` (:issue:`31720`) + +**I/O** + +- Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) +- Bug in :meth:`pandas.json_normalize` when value in meta path is not iterable (:issue:`31507`) +- Fixed pickling of ``pandas.NA``. Previously a new object was returned, which broke computations relying on ``NA`` being a singleton (:issue:`31847`) +- Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`). + +**Experimental dtypes** + +- Fixed bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`). +- Fixed bug in :meth:`DataFrame.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) +- Fixed bug in :meth:`DataFrame.convert_dtypes` where ``BooleanDtype`` columns were converted to ``Int64`` (:issue:`32287`) +- Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) +- Fixed bug where :meth:`pandas.core.groupby.GroupBy.first` and :meth:`pandas.core.groupby.GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) +- Fix bug in :meth:`Series.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) +- Fixed bug where :meth:`DataFrameGroupBy.mean`, :meth:`DataFrameGroupBy.median`, :meth:`DataFrameGroupBy.var`, and :meth:`DataFrameGroupBy.std` would raise a ``TypeError`` on ``Int64`` dtype columns (:issue:`32219`) + +**Strings** + +- Using ``pd.NA`` with :meth:`Series.str.repeat` now correctly outputs a null value instead of raising error for vector inputs (:issue:`31632`) + +**Rolling** + +- Fixed rolling operations with variable window (defined by time duration) on decreasing time index (:issue:`32385`). + +.. --------------------------------------------------------------------------- + +.. _whatsnew_102.contributors: + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.0.1..v1.0.2 diff --git a/doc/source/whatsnew/v1.0.3.rst b/doc/source/whatsnew/v1.0.3.rst new file mode 100644 index 00000000..62e6ae5b --- /dev/null +++ b/doc/source/whatsnew/v1.0.3.rst @@ -0,0 +1,29 @@ + +.. _whatsnew_103: + +What's new in 1.0.3 (March 17, 2020) +------------------------------------ + +These are the changes in pandas 1.0.3. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_103.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in ``resample.agg`` when the underlying data is non-writeable (:issue:`31710`) +- Fixed regression in :class:`DataFrame` exponentiation with reindexing (:issue:`32685`) + +.. _whatsnew_103.bug_fixes: + +Bug fixes +~~~~~~~~~ + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.0.2..v1.0.3 diff --git a/doc/source/whatsnew/v1.0.4.rst b/doc/source/whatsnew/v1.0.4.rst new file mode 100644 index 00000000..84b7e7d4 --- /dev/null +++ b/doc/source/whatsnew/v1.0.4.rst @@ -0,0 +1,48 @@ + +.. _whatsnew_104: + +What's new in 1.0.4 (May 28, 2020) +------------------------------------ + +These are the changes in pandas 1.0.4. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_104.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fix regression where :meth:`Series.isna` and :meth:`DataFrame.isna` would raise for categorical dtype when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`33594`) +- Fix regression in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`) +- Fix regression in DataFrame reductions using ``numeric_only=True`` and ExtensionArrays (:issue:`33256`). +- Fix performance regression in ``memory_usage(deep=True)`` for object dtype (:issue:`33012`) +- Fix regression where :meth:`Categorical.replace` would replace with ``NaN`` whenever the new value and replacement value were equal (:issue:`33288`) +- Fix regression where an ordered :class:`Categorical` containing only ``NaN`` values would raise rather than returning ``NaN`` when taking the minimum or maximum (:issue:`33450`) +- Fix regression in :meth:`DataFrameGroupBy.agg` with dictionary input losing ``ExtensionArray`` dtypes (:issue:`32194`) +- Fix to preserve the ability to index with the "nearest" method with xarray's CFTimeIndex, an :class:`Index` subclass (`pydata/xarray#3751 `_, :issue:`32905`). +- Fix regression in :meth:`DataFrame.describe` raising ``TypeError: unhashable type: 'dict'`` (:issue:`32409`) +- Fix regression in :meth:`DataFrame.replace` casts columns to ``object`` dtype if items in ``to_replace`` not in values (:issue:`32988`) +- Fix regression in :meth:`Series.groupby` would raise ``ValueError`` when grouping by :class:`PeriodIndex` level (:issue:`34010`) +- Fix regression in :meth:`GroupBy.rolling.apply` ignores args and kwargs parameters (:issue:`33433`) +- Fix regression in error message with ``np.min`` or ``np.max`` on unordered :class:`Categorical` (:issue:`33115`) +- Fix regression in :meth:`DataFrame.loc` and :meth:`Series.loc` throwing an error when a ``datetime64[ns, tz]`` value is provided (:issue:`32395`) + +.. _whatsnew_104.bug_fixes: + +Bug fixes +~~~~~~~~~ +- Bug in :meth:`SeriesGroupBy.first`, :meth:`SeriesGroupBy.last`, :meth:`SeriesGroupBy.min`, and :meth:`SeriesGroupBy.max` returning floats when applied to nullable Booleans (:issue:`33071`) +- Bug in :meth:`Rolling.min` and :meth:`Rolling.max`: Growing memory usage after multiple calls when using a fixed window (:issue:`30726`) +- Bug in :meth:`~DataFrame.to_parquet` was not raising ``PermissionError`` when writing to a private s3 bucket with invalid creds. (:issue:`27679`) +- Bug in :meth:`~DataFrame.to_csv` was silently failing when writing to an invalid s3 bucket. (:issue:`32486`) +- Bug in :meth:`read_parquet` was raising a ``FileNotFoundError`` when passed an s3 directory path. (:issue:`26388`) +- Bug in :meth:`~DataFrame.to_parquet` was throwing an ``AttributeError`` when writing a partitioned parquet file to s3 (:issue:`27596`) +- Bug in :meth:`GroupBy.quantile` causes the quantiles to be shifted when the ``by`` axis contains ``NaN`` (:issue:`33200`, :issue:`33569`) + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.0.3..v1.0.4 diff --git a/doc/source/whatsnew/v1.0.5.rst b/doc/source/whatsnew/v1.0.5.rst new file mode 100644 index 00000000..9a5128a0 --- /dev/null +++ b/doc/source/whatsnew/v1.0.5.rst @@ -0,0 +1,39 @@ + +.. _whatsnew_105: + +What's new in 1.0.5 (June 17, 2020) +----------------------------------- + +These are the changes in pandas 1.0.5. See :ref:`release` for a full changelog +including other versions of pandas. + +{{ header }} + +.. --------------------------------------------------------------------------- + +.. _whatsnew_105.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ + +- Fix regression in :meth:`read_parquet` when reading from file-like objects + (:issue:`34467`). +- Fix regression in reading from public S3 buckets (:issue:`34626`). + +Note this disables the ability to read Parquet files from directories on S3 +again (:issue:`26388`, :issue:`34632`), which was added in the 1.0.4 release, +but is now targeted for pandas 1.1.0. + +- Fixed regression in :meth:`~DataFrame.replace` raising an ``AssertionError`` when replacing values in an extension dtype with values of a different dtype (:issue:`34530`) + +.. _whatsnew_105.bug_fixes: + +Bug fixes +~~~~~~~~~ + +- Fixed building from source with Python 3.8 fetching the wrong version of NumPy (:issue:`34666`) + +Contributors +~~~~~~~~~~~~ + +.. contributors:: v1.0.4..v1.0.5|HEAD diff --git a/doc/source/whatsnew/whatsnew_0171_html_table.html b/doc/source/whatsnew/whatsnew_0171_html_table.html new file mode 100644 index 00000000..a76d6207 --- /dev/null +++ b/doc/source/whatsnew/whatsnew_0171_html_table.html @@ -0,0 +1,872 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + a + + b + + c + + d + + e + +
    + + 0 + + + + + -1.085631 + + + + + 0.997345 + + + + + 0.282978 + + + + + -1.506295 + + + + + -0.5786 + + +
    + + 1 + + + + + 1.651437 + + + + + -2.426679 + + + + + -0.428913 + + + + + 1.265936 + + + + + -0.86674 + + +
    + + 2 + + + + + -0.678886 + + + + + -0.094709 + + + + + 1.49139 + + + + + -0.638902 + + + + + -0.443982 + + +
    + + 3 + + + + + -0.434351 + + + + + 2.20593 + + + + + 2.186786 + + + + + 1.004054 + + + + + 0.386186 + + +
    + + 4 + + + + + 0.737369 + + + + + 1.490732 + + + + + -0.935834 + + + + + 1.175829 + + + + + -1.253881 + + +
    + + 5 + + + + + -0.637752 + + + + + 0.907105 + + + + + -1.428681 + + + + + -0.140069 + + + + + -0.861755 + + +
    + + 6 + + + + + -0.255619 + + + + + -2.798589 + + + + + -1.771533 + + + + + -0.699877 + + + + + 0.927462 + + +
    + + 7 + + + + + -0.173636 + + + + + 0.002846 + + + + + 0.688223 + + + + + -0.879536 + + + + + 0.283627 + + +
    + + 8 + + + + + -0.805367 + + + + + -1.727669 + + + + + -0.3909 + + + + + 0.573806 + + + + + 0.338589 + + +
    + + 9 + + + + + -0.01183 + + + + + 2.392365 + + + + + 0.412912 + + + + + 0.978736 + + + + + 2.238143 + + +
    diff --git a/doc/sphinxext/README.rst b/doc/sphinxext/README.rst new file mode 100644 index 00000000..2be5372b --- /dev/null +++ b/doc/sphinxext/README.rst @@ -0,0 +1,17 @@ +sphinxext +========= + +This directory contains copies of different sphinx extensions in use in the +pandas documentation. These copies originate from other projects: + +- ``numpydoc`` - Numpy's Sphinx extensions: this can be found at its own + repository: https://github.com/numpy/numpydoc +- ``ipython_directive`` and ``ipython_console_highlighting`` in the folder + `ipython_sphinxext` - Sphinx extensions from IPython: these are included + in IPython: https://github.com/ipython/ipython/tree/master/IPython/sphinxext + +.. note:: + + These copies are maintained at the respective projects, so fixes should, + to the extent possible, be pushed upstream instead of only adapting our + local copy to avoid divergence between the local and upstream version. diff --git a/doc/sphinxext/announce.py b/doc/sphinxext/announce.py new file mode 100755 index 00000000..3acc313f --- /dev/null +++ b/doc/sphinxext/announce.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# -*- encoding:utf-8 -*- +""" +Script to generate contributor and pull request lists + +This script generates contributor and pull request lists for release +announcements using Github v3 protocol. Use requires an authentication token in +order to have sufficient bandwidth, you can get one following the directions at +`_ +Don't add any scope, as the default is read access to public information. The +token may be stored in an environment variable as you only get one chance to +see it. + +Usage:: + + $ ./scripts/announce.py + +The output is utf8 rst. + +Dependencies +------------ + +- gitpython +- pygithub + +Some code was copied from scipy `tools/gh_lists.py` and `tools/authors.py`. + +Examples +-------- + +From the bash command line with $GITHUB token. + + $ ./scripts/announce.py $GITHUB v1.11.0..v1.11.1 > announce.rst + +""" +import codecs +import os +import re +import textwrap + +from git import Repo + +UTF8Writer = codecs.getwriter("utf8") +this_repo = Repo(os.path.join(os.path.dirname(__file__), "..", "..")) + +author_msg = """\ +A total of %d people contributed patches to this release. People with a +"+" by their names contributed a patch for the first time. +""" + +pull_request_msg = """\ +A total of %d pull requests were merged for this release. +""" + + +def get_authors(revision_range): + pat = "^.*\\t(.*)$" + lst_release, cur_release = [r.strip() for r in revision_range.split("..")] + + if "|" in cur_release: + # e.g. v1.0.1|HEAD + maybe_tag, head = cur_release.split("|") + assert head == "HEAD" + if maybe_tag in this_repo.tags: + cur_release = maybe_tag + else: + cur_release = head + revision_range = f"{lst_release}..{cur_release}" + + # authors, in current release and previous to current release. + # We need two passes over the log for cur and prev, one to get the + # "Co-authored by" commits, which come from backports by the bot, + # and one for regular commits. + xpr = re.compile(r"Co-authored-by: (?P[^<]+) ") + cur = set( + xpr.findall( + this_repo.git.log("--grep=Co-authored", "--pretty=%b", revision_range) + ) + ) + cur |= set(re.findall(pat, this_repo.git.shortlog("-s", revision_range), re.M)) + + pre = set( + xpr.findall(this_repo.git.log("--grep=Co-authored", "--pretty=%b", lst_release)) + ) + pre |= set(re.findall(pat, this_repo.git.shortlog("-s", lst_release), re.M)) + + # Homu is the author of auto merges, clean him out. + cur.discard("Homu") + pre.discard("Homu") + + # Append '+' to new authors. + authors = [s + " +" for s in cur - pre] + [s for s in cur & pre] + authors.sort() + return authors + + +def get_pull_requests(repo, revision_range): + prnums = [] + + # From regular merges + merges = this_repo.git.log("--oneline", "--merges", revision_range) + issues = re.findall("Merge pull request \\#(\\d*)", merges) + prnums.extend(int(s) for s in issues) + + # From Homu merges (Auto merges) + issues = re.findall("Auto merge of \\#(\\d*)", merges) + prnums.extend(int(s) for s in issues) + + # From fast forward squash-merges + commits = this_repo.git.log( + "--oneline", "--no-merges", "--first-parent", revision_range + ) + issues = re.findall("^.*\\(\\#(\\d+)\\)$", commits, re.M) + prnums.extend(int(s) for s in issues) + + # get PR data from github repo + prnums.sort() + prs = [repo.get_pull(n) for n in prnums] + return prs + + +def build_components(revision_range, heading="Contributors"): + lst_release, cur_release = [r.strip() for r in revision_range.split("..")] + authors = get_authors(revision_range) + + return { + "heading": heading, + "author_message": author_msg % len(authors), + "authors": authors, + } + + +def build_string(revision_range, heading="Contributors"): + components = build_components(revision_range, heading=heading) + components["uline"] = "=" * len(components["heading"]) + components["authors"] = "* " + "\n* ".join(components["authors"]) + + # Don't change this to an fstring. It breaks the formatting. + tpl = textwrap.dedent( + """\ + {heading} + {uline} + + {author_message} + {authors}""" + ).format(**components) + return tpl + + +def main(revision_range): + # document authors + text = build_string(revision_range) + print(text) + + +if __name__ == "__main__": + from argparse import ArgumentParser + + parser = ArgumentParser(description="Generate author lists for release") + parser.add_argument("revision_range", help="..") + args = parser.parse_args() + main(args.revision_range) diff --git a/doc/sphinxext/contributors.py b/doc/sphinxext/contributors.py new file mode 100644 index 00000000..c2b21e40 --- /dev/null +++ b/doc/sphinxext/contributors.py @@ -0,0 +1,57 @@ +"""Sphinx extension for listing code contributors to a release. + +Usage:: + + .. contributors:: v0.23.0..v0.23.1 + +This will be replaced with a message indicating the number of +code contributors and commits, and then list each contributor +individually. For development versions (before a tag is available) +use:: + + .. contributors:: v0.23.0..v0.23.1|HEAD + +While the v0.23.1 tag does not exist, that will use the HEAD of the +branch as the end of the revision range. +""" +from announce import build_components +from docutils import nodes +from docutils.parsers.rst import Directive +import git + + +class ContributorsDirective(Directive): + required_arguments = 1 + name = "contributors" + + def run(self): + range_ = self.arguments[0] + if range_.endswith("x..HEAD"): + return [nodes.paragraph(), nodes.bullet_list()] + try: + components = build_components(range_) + except git.GitCommandError as exc: + return [ + self.state.document.reporter.warning( + f"Cannot find contributors for range {repr(range_)}: {exc}", + line=self.lineno, + ) + ] + else: + message = nodes.paragraph() + message += nodes.Text(components["author_message"]) + + listnode = nodes.bullet_list() + + for author in components["authors"]: + para = nodes.paragraph() + para += nodes.Text(author) + listnode += nodes.list_item("", para) + + return [message, listnode] + + +def setup(app): + app.add_directive("contributors", ContributorsDirective) + + return {"version": "0.1", "parallel_read_safe": True, "parallel_write_safe": True} diff --git a/environment.yml b/environment.yml new file mode 100644 index 00000000..ea15bf1b --- /dev/null +++ b/environment.yml @@ -0,0 +1,109 @@ +name: pandas-dev +channels: + - conda-forge +dependencies: + # required + - numpy>=1.15 + - python=3.7 + - python-dateutil>=2.6.1 + - pytz + + # benchmarks + - asv + + # building + - cython>=0.29.13 + + # code checks + - black=19.10b0 + - cpplint + - flake8 + - flake8-comprehensions>=3.1.0 # used by flake8, linting of unnecessary comprehensions + - flake8-rst>=0.6.0,<=0.7.0 # linting of code blocks in rst files + - isort # check that imports are in the right order + - mypy=0.730 + - pycodestyle # used by flake8 + + # documentation + - gitpython # obtain contributors from git for whatsnew + - gitdb2=2.0.6 # GH-32060 + - sphinx + - numpydoc>=0.9.0 + + # documentation (jupyter notebooks) + - nbconvert>=5.4.1 + - nbsphinx + - pandoc + + # Dask and its dependencies (that dont install with dask) + - dask-core + - toolz>=0.7.3 + - fsspec>=0.5.1 + - partd>=0.3.10 + - cloudpickle>=0.2.1 + + # web (jinja2 is also needed, but it's also an optional pandas dependency) + - markdown + - feedparser + - pyyaml + - requests + + # testing + - boto3 + - botocore>=1.11 + - hypothesis>=3.82 + - moto # mock S3 + - pytest>=5.0.1 + - pytest-cov + - pytest-xdist>=1.21 + - pytest-asyncio + + # downstream tests + - seaborn + - statsmodels + + # unused (required indirectly may be?) + - ipywidgets + - nbformat + - notebook>=5.7.5 + - pip + + # optional + - blosc + - bottleneck>=1.2.1 + - ipykernel + - ipython>=7.11.1 + - jinja2 # pandas.Styler + - matplotlib>=2.2.2 # pandas.plotting, Series.plot, DataFrame.plot + - numexpr>=2.6.8 + - scipy>=1.1 + - numba>=0.46.0 + + # optional for io + # --------------- + # pd.read_html + - beautifulsoup4>=4.6.0 + - html5lib + - lxml + + # pd.read_excel, DataFrame.to_excel, pd.ExcelWriter, pd.ExcelFile + - openpyxl<=3.0.1 + - xlrd + - xlsxwriter + - xlwt + - odfpy + + - fastparquet>=0.3.2 # pandas.read_parquet, DataFrame.to_parquet + - pyarrow>=0.13.1 # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather + - python-snappy # required by pyarrow + + - pyqt>=5.9.2 # pandas.read_clipboard + - pytables>=3.4.2 # pandas.read_hdf, DataFrame.to_hdf + - s3fs # pandas.read_csv... when using 's3://...' path + - sqlalchemy # pandas.read_sql, DataFrame.to_sql + - xarray # DataFrame.to_xarray + - cftime # Needed for downstream xarray.CFTimeIndex test + - pyreadstat # pandas.read_spss + - tabulate>=0.8.3 # DataFrame.to_markdown + - pip: + - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master diff --git a/pandas/__init__.py b/pandas/__init__.py new file mode 100644 index 00000000..6eda468e --- /dev/null +++ b/pandas/__init__.py @@ -0,0 +1,407 @@ +# flake8: noqa + +__docformat__ = "restructuredtext" + +# Let users know if they're missing any of our hard dependencies +hard_dependencies = ("numpy", "pytz", "dateutil") +missing_dependencies = [] + +for dependency in hard_dependencies: + try: + __import__(dependency) + except ImportError as e: + missing_dependencies.append(f"{dependency}: {e}") + +if missing_dependencies: + raise ImportError( + "Unable to import required dependencies:\n" + "\n".join(missing_dependencies) + ) +del hard_dependencies, dependency, missing_dependencies + +# numpy compat +from pandas.compat.numpy import ( + _np_version_under1p14, + _np_version_under1p15, + _np_version_under1p16, + _np_version_under1p17, + _np_version_under1p18, + _is_numpy_dev, +) + +try: + from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib +except ImportError as e: # pragma: no cover + # hack but overkill to use re + module = str(e).replace("cannot import name ", "") + raise ImportError( + f"C extension: {module} not built. If you want to import " + "pandas from the source directory, you may need to run " + "'python setup.py build_ext --inplace --force' to build " + "the C extensions first." + ) + +from pandas._config import ( + get_option, + set_option, + reset_option, + describe_option, + option_context, + options, +) + +# let init-time option registration happen +import pandas.core.config_init + +from pandas.core.api import ( + # dtype + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, + CategoricalDtype, + PeriodDtype, + IntervalDtype, + DatetimeTZDtype, + StringDtype, + BooleanDtype, + # missing + NA, + isna, + isnull, + notna, + notnull, + # indexes + Index, + CategoricalIndex, + Int64Index, + UInt64Index, + RangeIndex, + Float64Index, + MultiIndex, + IntervalIndex, + TimedeltaIndex, + DatetimeIndex, + PeriodIndex, + IndexSlice, + # tseries + NaT, + Period, + period_range, + Timedelta, + timedelta_range, + Timestamp, + date_range, + bdate_range, + Interval, + interval_range, + DateOffset, + # conversion + to_numeric, + to_datetime, + to_timedelta, + # misc + Grouper, + factorize, + unique, + value_counts, + NamedAgg, + array, + Categorical, + set_eng_float_format, + Series, + DataFrame, +) + +from pandas.core.arrays.sparse import SparseDtype + +from pandas.tseries.api import infer_freq +from pandas.tseries import offsets + +from pandas.core.computation.api import eval + +from pandas.core.reshape.api import ( + concat, + lreshape, + melt, + wide_to_long, + merge, + merge_asof, + merge_ordered, + crosstab, + pivot, + pivot_table, + get_dummies, + cut, + qcut, +) + +import pandas.api +from pandas.util._print_versions import show_versions + +from pandas.io.api import ( + # excel + ExcelFile, + ExcelWriter, + read_excel, + # parsers + read_csv, + read_fwf, + read_table, + # pickle + read_pickle, + to_pickle, + # pytables + HDFStore, + read_hdf, + # sql + read_sql, + read_sql_query, + read_sql_table, + # misc + read_clipboard, + read_parquet, + read_orc, + read_feather, + read_gbq, + read_html, + read_json, + read_stata, + read_sas, + read_spss, +) + +from pandas.io.json import _json_normalize as json_normalize + +from pandas.util._tester import test +import pandas.testing +import pandas.arrays + +# use the closest tagged version if possible +from ._version import get_versions + +v = get_versions() +__version__ = v.get("closest-tag", v["version"]) +__git_version__ = v.get("full-revisionid") +del get_versions, v + +# GH 27101 +# TODO: remove Panel compat in 1.0 +if pandas.compat.PY37: + + def __getattr__(name): + import warnings + + if name == "Panel": + + warnings.warn( + "The Panel class is removed from pandas. Accessing it " + "from the top-level namespace will also be removed in " + "the next version", + FutureWarning, + stacklevel=2, + ) + + class Panel: + pass + + return Panel + + elif name == "datetime": + warnings.warn( + "The pandas.datetime class is deprecated " + "and will be removed from pandas in a future version. " + "Import from datetime module instead.", + FutureWarning, + stacklevel=2, + ) + + from datetime import datetime as dt + + return dt + + elif name == "np": + + warnings.warn( + "The pandas.np module is deprecated " + "and will be removed from pandas in a future version. " + "Import numpy directly instead", + FutureWarning, + stacklevel=2, + ) + import numpy as np + + return np + + elif name in {"SparseSeries", "SparseDataFrame"}: + warnings.warn( + f"The {name} class is removed from pandas. Accessing it from " + "the top-level namespace will also be removed in the next " + "version", + FutureWarning, + stacklevel=2, + ) + + return type(name, (), {}) + + elif name == "SparseArray": + + warnings.warn( + "The pandas.SparseArray class is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.arrays.SparseArray instead.", + FutureWarning, + stacklevel=2, + ) + from pandas.core.arrays.sparse import SparseArray as _SparseArray + + return _SparseArray + + raise AttributeError(f"module 'pandas' has no attribute '{name}'") + + +else: + + class Panel: + pass + + class SparseDataFrame: + pass + + class SparseSeries: + pass + + class __numpy: + def __init__(self): + import numpy as np + import warnings + + self.np = np + self.warnings = warnings + + def __getattr__(self, item): + self.warnings.warn( + "The pandas.np module is deprecated " + "and will be removed from pandas in a future version. " + "Import numpy directly instead", + FutureWarning, + stacklevel=2, + ) + + try: + return getattr(self.np, item) + except AttributeError: + raise AttributeError(f"module numpy has no attribute {item}") + + np = __numpy() + + class __Datetime(type): + + from datetime import datetime as dt + + datetime = dt + + def __getattr__(cls, item): + cls.emit_warning() + + try: + return getattr(cls.datetime, item) + except AttributeError: + raise AttributeError(f"module datetime has no attribute {item}") + + def __instancecheck__(cls, other): + return isinstance(other, cls.datetime) + + class __DatetimeSub(metaclass=__Datetime): + def emit_warning(dummy=0): + import warnings + + warnings.warn( + "The pandas.datetime class is deprecated " + "and will be removed from pandas in a future version. " + "Import from datetime instead.", + FutureWarning, + stacklevel=3, + ) + + def __new__(cls, *args, **kwargs): + cls.emit_warning() + from datetime import datetime as dt + + return dt(*args, **kwargs) + + datetime = __DatetimeSub + + class __SparseArray(type): + + from pandas.core.arrays.sparse import SparseArray as sa + + SparseArray = sa + + def __instancecheck__(cls, other): + return isinstance(other, cls.SparseArray) + + class __SparseArraySub(metaclass=__SparseArray): + def emit_warning(dummy=0): + import warnings + + warnings.warn( + "The pandas.SparseArray class is deprecated " + "and will be removed from pandas in a future version. " + "Use pandas.arrays.SparseArray instead.", + FutureWarning, + stacklevel=3, + ) + + def __new__(cls, *args, **kwargs): + cls.emit_warning() + from pandas.core.arrays.sparse import SparseArray as sa + + return sa(*args, **kwargs) + + SparseArray = __SparseArraySub + + +# module level doc-string +__doc__ = """ +pandas - a powerful data analysis and manipulation library for Python +===================================================================== + +**pandas** is a Python package providing fast, flexible, and expressive data +structures designed to make working with "relational" or "labeled" data both +easy and intuitive. It aims to be the fundamental high-level building block for +doing practical, **real world** data analysis in Python. Additionally, it has +the broader goal of becoming **the most powerful and flexible open source data +analysis / manipulation tool available in any language**. It is already well on +its way toward this goal. + +Main Features +------------- +Here are just a few of the things that pandas does well: + + - Easy handling of missing data in floating point as well as non-floating + point data. + - Size mutability: columns can be inserted and deleted from DataFrame and + higher dimensional objects + - Automatic and explicit data alignment: objects can be explicitly aligned + to a set of labels, or the user can simply ignore the labels and let + `Series`, `DataFrame`, etc. automatically align the data for you in + computations. + - Powerful, flexible group by functionality to perform split-apply-combine + operations on data sets, for both aggregating and transforming data. + - Make it easy to convert ragged, differently-indexed data in other Python + and NumPy data structures into DataFrame objects. + - Intelligent label-based slicing, fancy indexing, and subsetting of large + data sets. + - Intuitive merging and joining data sets. + - Flexible reshaping and pivoting of data sets. + - Hierarchical labeling of axes (possible to have multiple labels per tick). + - Robust IO tools for loading data from flat files (CSV and delimited), + Excel files, databases, and saving/loading data from the ultrafast HDF5 + format. + - Time series-specific functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. +""" diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py new file mode 100644 index 00000000..65936a9f --- /dev/null +++ b/pandas/_config/__init__.py @@ -0,0 +1,28 @@ +""" +pandas._config is considered explicitly upstream of everything else in pandas, +should have no intra-pandas dependencies. + +importing `dates` and `display` ensures that keys needed by _libs +are initialized. +""" +__all__ = [ + "config", + "detect_console_encoding", + "get_option", + "set_option", + "reset_option", + "describe_option", + "option_context", + "options", +] +from pandas._config import config +from pandas._config import dates # noqa:F401 +from pandas._config.config import ( + describe_option, + get_option, + option_context, + options, + reset_option, + set_option, +) +from pandas._config.display import detect_console_encoding diff --git a/pandas/_config/config.py b/pandas/_config/config.py new file mode 100644 index 00000000..1978f506 --- /dev/null +++ b/pandas/_config/config.py @@ -0,0 +1,869 @@ +""" +The config module holds package-wide configurables and provides +a uniform API for working with them. + +Overview +======== + +This module supports the following requirements: +- options are referenced using keys in dot.notation, e.g. "x.y.option - z". +- keys are case-insensitive. +- functions should accept partial/regex keys, when unambiguous. +- options can be registered by modules at import time. +- options can be registered at init-time (via core.config_init) +- options have a default value, and (optionally) a description and + validation function associated with them. +- options can be deprecated, in which case referencing them + should produce a warning. +- deprecated options can optionally be rerouted to a replacement + so that accessing a deprecated option reroutes to a differently + named option. +- options can be reset to their default value. +- all option can be reset to their default value at once. +- all options in a certain sub - namespace can be reset at once. +- the user can set / get / reset or ask for the description of an option. +- a developer can register and mark an option as deprecated. +- you can register a callback to be invoked when the option value + is set or reset. Changing the stored value is considered misuse, but + is not verboten. + +Implementation +============== + +- Data is stored using nested dictionaries, and should be accessed + through the provided API. + +- "Registered options" and "Deprecated options" have metadata associated + with them, which are stored in auxiliary dictionaries keyed on the + fully-qualified key, e.g. "x.y.z.option". + +- the config_init module is imported by the package's __init__.py file. + placing any register_option() calls there will ensure those options + are available as soon as pandas is loaded. If you use register_option + in a module, it will only be available after that module is imported, + which you should be aware of. + +- `config_prefix` is a context_manager (for use with the `with` keyword) + which can save developers some typing, see the docstring. + +""" + +from collections import namedtuple +from contextlib import contextmanager +import re +from typing import Any, Dict, Iterable, List +import warnings + +DeprecatedOption = namedtuple("DeprecatedOption", "key msg rkey removal_ver") +RegisteredOption = namedtuple("RegisteredOption", "key defval doc validator cb") + +# holds deprecated option metadata +_deprecated_options: Dict[str, DeprecatedOption] = {} + +# holds registered option metadata +_registered_options: Dict[str, RegisteredOption] = {} + +# holds the current values for registered options +_global_config: Dict[str, Any] = {} + +# keys which have a special meaning +_reserved_keys: List[str] = ["all"] + + +class OptionError(AttributeError, KeyError): + """Exception for pandas.options, backwards compatible with KeyError + checks + """ + + +# +# User API + + +def _get_single_key(pat, silent): + keys = _select_options(pat) + if len(keys) == 0: + if not silent: + _warn_if_deprecated(pat) + raise OptionError(f"No such keys(s): {repr(pat)}") + if len(keys) > 1: + raise OptionError("Pattern matched multiple keys") + key = keys[0] + + if not silent: + _warn_if_deprecated(key) + + key = _translate_key(key) + + return key + + +def _get_option(pat, silent=False): + key = _get_single_key(pat, silent) + + # walk the nested dict + root, k = _get_root(key) + return root[k] + + +def _set_option(*args, **kwargs): + # must at least 1 arg deal with constraints later + nargs = len(args) + if not nargs or nargs % 2 != 0: + raise ValueError("Must provide an even number of non-keyword arguments") + + # default to false + silent = kwargs.pop("silent", False) + + if kwargs: + kwarg = list(kwargs.keys())[0] + raise TypeError(f'_set_option() got an unexpected keyword argument "{kwarg}"') + + for k, v in zip(args[::2], args[1::2]): + key = _get_single_key(k, silent) + + o = _get_registered_option(key) + if o and o.validator: + o.validator(v) + + # walk the nested dict + root, k = _get_root(key) + root[k] = v + + if o.cb: + if silent: + with warnings.catch_warnings(record=True): + o.cb(key) + else: + o.cb(key) + + +def _describe_option(pat="", _print_desc=True): + + keys = _select_options(pat) + if len(keys) == 0: + raise OptionError("No such keys(s)") + + s = "\n".join([_build_option_description(k) for k in keys]) + + if _print_desc: + print(s) + else: + return s + + +def _reset_option(pat, silent=False): + + keys = _select_options(pat) + + if len(keys) == 0: + raise OptionError("No such keys(s)") + + if len(keys) > 1 and len(pat) < 4 and pat != "all": + raise ValueError( + "You must specify at least 4 characters when " + "resetting multiple keys, use the special keyword " + '"all" to reset all the options to their default ' + "value" + ) + + for k in keys: + _set_option(k, _registered_options[k].defval, silent=silent) + + +def get_default_val(pat): + key = _get_single_key(pat, silent=True) + return _get_registered_option(key).defval + + +class DictWrapper: + """ provide attribute-style access to a nested dict""" + + def __init__(self, d, prefix=""): + object.__setattr__(self, "d", d) + object.__setattr__(self, "prefix", prefix) + + def __setattr__(self, key, val): + prefix = object.__getattribute__(self, "prefix") + if prefix: + prefix += "." + prefix += key + # you can't set new keys + # can you can't overwrite subtrees + if key in self.d and not isinstance(self.d[key], dict): + _set_option(prefix, val) + else: + raise OptionError("You can only set the value of existing options") + + def __getattr__(self, key: str): + prefix = object.__getattribute__(self, "prefix") + if prefix: + prefix += "." + prefix += key + try: + v = object.__getattribute__(self, "d")[key] + except KeyError: + raise OptionError("No such option") + if isinstance(v, dict): + return DictWrapper(v, prefix) + else: + return _get_option(prefix) + + def __dir__(self): + return list(self.d.keys()) + + +# For user convenience, we'd like to have the available options described +# in the docstring. For dev convenience we'd like to generate the docstrings +# dynamically instead of maintaining them by hand. To this, we use the +# class below which wraps functions inside a callable, and converts +# __doc__ into a property function. The doctsrings below are templates +# using the py2.6+ advanced formatting syntax to plug in a concise list +# of options, and option descriptions. + + +class CallableDynamicDoc: + def __init__(self, func, doc_tmpl): + self.__doc_tmpl__ = doc_tmpl + self.__func__ = func + + def __call__(self, *args, **kwds): + return self.__func__(*args, **kwds) + + @property + def __doc__(self): + opts_desc = _describe_option("all", _print_desc=False) + opts_list = pp_options_list(list(_registered_options.keys())) + return self.__doc_tmpl__.format(opts_desc=opts_desc, opts_list=opts_list) + + +_get_option_tmpl = """ +get_option(pat) + +Retrieves the value of the specified option. + +Available options: + +{opts_list} + +Parameters +---------- +pat : str + Regexp which should match a single option. + Note: partial matches are supported for convenience, but unless you use the + full option name (e.g. x.y.z.option_name), your code may break in future + versions if new options with similar names are introduced. + +Returns +------- +result : the value of the option + +Raises +------ +OptionError : if no such option exists + +Notes +----- +The available options with its descriptions: + +{opts_desc} +""" + +_set_option_tmpl = """ +set_option(pat, value) + +Sets the value of the specified option. + +Available options: + +{opts_list} + +Parameters +---------- +pat : str + Regexp which should match a single option. + Note: partial matches are supported for convenience, but unless you use the + full option name (e.g. x.y.z.option_name), your code may break in future + versions if new options with similar names are introduced. +value : object + New value of option. + +Returns +------- +None + +Raises +------ +OptionError if no such option exists + +Notes +----- +The available options with its descriptions: + +{opts_desc} +""" + +_describe_option_tmpl = """ +describe_option(pat, _print_desc=False) + +Prints the description for one or more registered options. + +Call with not arguments to get a listing for all registered options. + +Available options: + +{opts_list} + +Parameters +---------- +pat : str + Regexp pattern. All matching keys will have their description displayed. +_print_desc : bool, default True + If True (default) the description(s) will be printed to stdout. + Otherwise, the description(s) will be returned as a unicode string + (for testing). + +Returns +------- +None by default, the description(s) as a unicode string if _print_desc +is False + +Notes +----- +The available options with its descriptions: + +{opts_desc} +""" + +_reset_option_tmpl = """ +reset_option(pat) + +Reset one or more options to their default value. + +Pass "all" as argument to reset all options. + +Available options: + +{opts_list} + +Parameters +---------- +pat : str/regex + If specified only options matching `prefix*` will be reset. + Note: partial matches are supported for convenience, but unless you + use the full option name (e.g. x.y.z.option_name), your code may break + in future versions if new options with similar names are introduced. + +Returns +------- +None + +Notes +----- +The available options with its descriptions: + +{opts_desc} +""" + +# bind the functions with their docstrings into a Callable +# and use that as the functions exposed in pd.api +get_option = CallableDynamicDoc(_get_option, _get_option_tmpl) +set_option = CallableDynamicDoc(_set_option, _set_option_tmpl) +reset_option = CallableDynamicDoc(_reset_option, _reset_option_tmpl) +describe_option = CallableDynamicDoc(_describe_option, _describe_option_tmpl) +options = DictWrapper(_global_config) + +# +# Functions for use by pandas developers, in addition to User - api + + +class option_context: + """ + Context manager to temporarily set options in the `with` statement context. + + You need to invoke as ``option_context(pat, val, [(pat, val), ...])``. + + Examples + -------- + + >>> with option_context('display.max_rows', 10, 'display.max_columns', 5): + ... ... + """ + + def __init__(self, *args): + if not (len(args) % 2 == 0 and len(args) >= 2): + raise ValueError( + "Need to invoke as option_context(pat, val, [(pat, val), ...])." + ) + + self.ops = list(zip(args[::2], args[1::2])) + + def __enter__(self): + self.undo = [(pat, _get_option(pat, silent=True)) for pat, val in self.ops] + + for pat, val in self.ops: + _set_option(pat, val, silent=True) + + def __exit__(self, *args): + if self.undo: + for pat, val in self.undo: + _set_option(pat, val, silent=True) + + +def register_option(key: str, defval: object, doc="", validator=None, cb=None): + """Register an option in the package-wide pandas config object + + Parameters + ---------- + key - a fully-qualified key, e.g. "x.y.option - z". + defval - the default value of the option + doc - a string description of the option + validator - a function of a single argument, should raise `ValueError` if + called with a value which is not a legal value for the option. + cb - a function of a single argument "key", which is called + immediately after an option value is set/reset. key is + the full name of the option. + + Returns + ------- + Nothing. + + Raises + ------ + ValueError if `validator` is specified and `defval` is not a valid value. + + """ + import tokenize + import keyword + + key = key.lower() + + if key in _registered_options: + raise OptionError(f"Option '{key}' has already been registered") + if key in _reserved_keys: + raise OptionError(f"Option '{key}' is a reserved key") + + # the default value should be legal + if validator: + validator(defval) + + # walk the nested dict, creating dicts as needed along the path + path = key.split(".") + + for k in path: + # NOTE: tokenize.Name is not a public constant + # error: Module has no attribute "Name" [attr-defined] + if not re.match("^" + tokenize.Name + "$", k): # type: ignore + raise ValueError(f"{k} is not a valid identifier") + if keyword.iskeyword(k): + raise ValueError(f"{k} is a python keyword") + + cursor = _global_config + msg = "Path prefix to option '{option}' is already an option" + + for i, p in enumerate(path[:-1]): + if not isinstance(cursor, dict): + raise OptionError(msg.format(option=".".join(path[:i]))) + if p not in cursor: + cursor[p] = {} + cursor = cursor[p] + + if not isinstance(cursor, dict): + raise OptionError(msg.format(option=".".join(path[:-1]))) + + cursor[path[-1]] = defval # initialize + + # save the option metadata + _registered_options[key] = RegisteredOption( + key=key, defval=defval, doc=doc, validator=validator, cb=cb + ) + + +def deprecate_option(key, msg=None, rkey=None, removal_ver=None): + """ + Mark option `key` as deprecated, if code attempts to access this option, + a warning will be produced, using `msg` if given, or a default message + if not. + if `rkey` is given, any access to the key will be re-routed to `rkey`. + + Neither the existence of `key` nor that if `rkey` is checked. If they + do not exist, any subsequence access will fail as usual, after the + deprecation warning is given. + + Parameters + ---------- + key - the name of the option to be deprecated. must be a fully-qualified + option name (e.g "x.y.z.rkey"). + + msg - (Optional) a warning message to output when the key is referenced. + if no message is given a default message will be emitted. + + rkey - (Optional) the name of an option to reroute access to. + If specified, any referenced `key` will be re-routed to `rkey` + including set/get/reset. + rkey must be a fully-qualified option name (e.g "x.y.z.rkey"). + used by the default message if no `msg` is specified. + + removal_ver - (Optional) specifies the version in which this option will + be removed. used by the default message if no `msg` + is specified. + + Returns + ------- + Nothing + + Raises + ------ + OptionError - if key has already been deprecated. + + """ + + key = key.lower() + + if key in _deprecated_options: + raise OptionError(f"Option '{key}' has already been defined as deprecated.") + + _deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver) + + +# +# functions internal to the module + + +def _select_options(pat): + """returns a list of keys matching `pat` + + if pat=="all", returns all registered options + """ + + # short-circuit for exact key + if pat in _registered_options: + return [pat] + + # else look through all of them + keys = sorted(_registered_options.keys()) + if pat == "all": # reserved key + return keys + + return [k for k in keys if re.search(pat, k, re.I)] + + +def _get_root(key): + path = key.split(".") + cursor = _global_config + for p in path[:-1]: + cursor = cursor[p] + return cursor, path[-1] + + +def _is_deprecated(key): + """ Returns True if the given option has been deprecated """ + + key = key.lower() + return key in _deprecated_options + + +def _get_deprecated_option(key): + """ + Retrieves the metadata for a deprecated option, if `key` is deprecated. + + Returns + ------- + DeprecatedOption (namedtuple) if key is deprecated, None otherwise + """ + + try: + d = _deprecated_options[key] + except KeyError: + return None + else: + return d + + +def _get_registered_option(key): + """ + Retrieves the option metadata if `key` is a registered option. + + Returns + ------- + RegisteredOption (namedtuple) if key is deprecated, None otherwise + """ + return _registered_options.get(key) + + +def _translate_key(key): + """ + if key id deprecated and a replacement key defined, will return the + replacement key, otherwise returns `key` as - is + """ + + d = _get_deprecated_option(key) + if d: + return d.rkey or key + else: + return key + + +def _warn_if_deprecated(key): + """ + Checks if `key` is a deprecated option and if so, prints a warning. + + Returns + ------- + bool - True if `key` is deprecated, False otherwise. + """ + + d = _get_deprecated_option(key) + if d: + if d.msg: + print(d.msg) + warnings.warn(d.msg, FutureWarning) + else: + msg = f"'{key}' is deprecated" + if d.removal_ver: + msg += f" and will be removed in {d.removal_ver}" + if d.rkey: + msg += f", please use '{d.rkey}' instead." + else: + msg += ", please refrain from using it." + + warnings.warn(msg, FutureWarning) + return True + return False + + +def _build_option_description(k): + """ Builds a formatted description of a registered option and prints it """ + + o = _get_registered_option(k) + d = _get_deprecated_option(k) + + s = f"{k} " + + if o.doc: + s += "\n".join(o.doc.strip().split("\n")) + else: + s += "No description available." + + if o: + s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]" + + if d: + rkey = d.rkey if d.rkey else "" + s += "\n (Deprecated" + s += f", use `{rkey}` instead." + s += ")" + + return s + + +def pp_options_list(keys, width=80, _print=False): + """ Builds a concise listing of available options, grouped by prefix """ + + from textwrap import wrap + from itertools import groupby + + def pp(name: str, ks: Iterable[str]) -> List[str]: + pfx = "- " + name + ".[" if name else "" + ls = wrap( + ", ".join(ks), + width, + initial_indent=pfx, + subsequent_indent=" ", + break_long_words=False, + ) + if ls and ls[-1] and name: + ls[-1] = ls[-1] + "]" + return ls + + ls: List[str] = [] + singles = [x for x in sorted(keys) if x.find(".") < 0] + if singles: + ls += pp("", singles) + keys = [x for x in keys if x.find(".") >= 0] + + for k, g in groupby(sorted(keys), lambda x: x[: x.rfind(".")]): + ks = [x[len(k) + 1 :] for x in list(g)] + ls += pp(k, ks) + s = "\n".join(ls) + if _print: + print(s) + else: + return s + + +# +# helpers + + +@contextmanager +def config_prefix(prefix): + """contextmanager for multiple invocations of API with a common prefix + + supported API functions: (register / get / set )__option + + Warning: This is not thread - safe, and won't work properly if you import + the API functions into your module using the "from x import y" construct. + + Example: + + import pandas._config.config as cf + with cf.config_prefix("display.font"): + cf.register_option("color", "red") + cf.register_option("size", " 5 pt") + cf.set_option(size, " 6 pt") + cf.get_option(size) + ... + + etc' + + will register options "display.font.color", "display.font.size", set the + value of "display.font.size"... and so on. + """ + + # Note: reset_option relies on set_option, and on key directly + # it does not fit in to this monkey-patching scheme + + global register_option, get_option, set_option, reset_option + + def wrap(func): + def inner(key, *args, **kwds): + pkey = f"{prefix}.{key}" + return func(pkey, *args, **kwds) + + return inner + + _register_option = register_option + _get_option = get_option + _set_option = set_option + set_option = wrap(set_option) + get_option = wrap(get_option) + register_option = wrap(register_option) + yield None + set_option = _set_option + get_option = _get_option + register_option = _register_option + + +# These factories and methods are handy for use as the validator +# arg in register_option + + +def is_type_factory(_type): + """ + + Parameters + ---------- + `_type` - a type to be compared against (e.g. type(x) == `_type`) + + Returns + ------- + validator - a function of a single argument x , which raises + ValueError if type(x) is not equal to `_type` + + """ + + def inner(x): + if type(x) != _type: + raise ValueError(f"Value must have type '{_type}'") + + return inner + + +def is_instance_factory(_type): + """ + + Parameters + ---------- + `_type` - the type to be checked against + + Returns + ------- + validator - a function of a single argument x , which raises + ValueError if x is not an instance of `_type` + + """ + + if isinstance(_type, (tuple, list)): + _type = tuple(_type) + type_repr = "|".join(map(str, _type)) + else: + type_repr = f"'{_type}'" + + def inner(x): + if not isinstance(x, _type): + raise ValueError(f"Value must be an instance of {type_repr}") + + return inner + + +def is_one_of_factory(legal_values): + + callables = [c for c in legal_values if callable(c)] + legal_values = [c for c in legal_values if not callable(c)] + + def inner(x): + if x not in legal_values: + + if not any(c(x) for c in callables): + uvals = [str(lval) for lval in legal_values] + pp_values = "|".join(uvals) + msg = f"Value must be one of {pp_values}" + if len(callables): + msg += " or a callable" + raise ValueError(msg) + + return inner + + +def is_nonnegative_int(value): + """ + Verify that value is None or a positive int. + + Parameters + ---------- + value : None or int + The `value` to be checked. + + Raises + ------ + ValueError + When the value is not None or is a negative integer + """ + + if value is None: + return + + elif isinstance(value, int): + if value >= 0: + return + + msg = "Value must be a nonnegative integer or None" + raise ValueError(msg) + + +# common type validators, for convenience +# usage: register_option(... , validator = is_int) +is_int = is_type_factory(int) +is_bool = is_type_factory(bool) +is_float = is_type_factory(float) +is_str = is_type_factory(str) +is_text = is_instance_factory((str, bytes)) + + +def is_callable(obj): + """ + + Parameters + ---------- + `obj` - the object to be checked + + Returns + ------- + validator - returns True if object is callable + raises ValueError otherwise. + + """ + if not callable(obj): + raise ValueError("Value must be a callable") + return True diff --git a/pandas/_config/dates.py b/pandas/_config/dates.py new file mode 100644 index 00000000..5bf2b49c --- /dev/null +++ b/pandas/_config/dates.py @@ -0,0 +1,23 @@ +""" +config for datetime formatting +""" +from pandas._config import config as cf + +pc_date_dayfirst_doc = """ +: boolean + When True, prints and parses dates with the day first, eg 20/01/2005 +""" + +pc_date_yearfirst_doc = """ +: boolean + When True, prints and parses dates with the year first, eg 2005/01/20 +""" + +with cf.config_prefix("display"): + # Needed upstream of `_libs` because these are used in tslibs.parsing + cf.register_option( + "date_dayfirst", False, pc_date_dayfirst_doc, validator=cf.is_bool + ) + cf.register_option( + "date_yearfirst", False, pc_date_yearfirst_doc, validator=cf.is_bool + ) diff --git a/pandas/_config/display.py b/pandas/_config/display.py new file mode 100644 index 00000000..067b7c50 --- /dev/null +++ b/pandas/_config/display.py @@ -0,0 +1,59 @@ +""" +Unopinionated display configuration. +""" +import locale +import sys + +from pandas._config import config as cf + +# ----------------------------------------------------------------------------- +# Global formatting options +_initial_defencoding = None + + +def detect_console_encoding(): + """ + Try to find the most capable encoding supported by the console. + slightly modified from the way IPython handles the same issue. + """ + global _initial_defencoding + + encoding = None + try: + encoding = sys.stdout.encoding or sys.stdin.encoding + except (AttributeError, IOError): + pass + + # try again for something better + if not encoding or "ascii" in encoding.lower(): + try: + encoding = locale.getpreferredencoding() + except locale.Error: + # can be raised by locale.setlocale(), which is + # called by getpreferredencoding + # (on some systems, see stdlib locale docs) + pass + + # when all else fails. this will usually be "ascii" + if not encoding or "ascii" in encoding.lower(): + encoding = sys.getdefaultencoding() + + # GH#3360, save the reported defencoding at import time + # MPL backends may change it. Make available for debugging. + if not _initial_defencoding: + _initial_defencoding = sys.getdefaultencoding() + + return encoding + + +pc_encoding_doc = """ +: str/unicode + Defaults to the detected encoding of the console. + Specifies the encoding to be used for strings returned by to_string, + these are generally strings meant to be displayed on the console. +""" + +with cf.config_prefix("display"): + cf.register_option( + "encoding", detect_console_encoding(), pc_encoding_doc, validator=cf.is_text + ) diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py new file mode 100644 index 00000000..dd1d4948 --- /dev/null +++ b/pandas/_config/localization.py @@ -0,0 +1,166 @@ +""" +Helpers for configuring locale settings. + +Name `localization` is chosen to avoid overlap with builtin `locale` module. +""" +from contextlib import contextmanager +import locale +import re +import subprocess + +from pandas._config.config import options + + +@contextmanager +def set_locale(new_locale, lc_var=locale.LC_ALL): + """ + Context manager for temporarily setting a locale. + + Parameters + ---------- + new_locale : str or tuple + A string of the form .. For example to set + the current locale to US English with a UTF8 encoding, you would pass + "en_US.UTF-8". + lc_var : int, default `locale.LC_ALL` + The category of the locale being set. + + Notes + ----- + This is useful when you want to run a particular block of code under a + particular locale, without globally setting the locale. This probably isn't + thread-safe. + """ + current_locale = locale.getlocale() + + try: + locale.setlocale(lc_var, new_locale) + normalized_locale = locale.getlocale() + if all(x is not None for x in normalized_locale): + yield ".".join(normalized_locale) + else: + yield new_locale + finally: + locale.setlocale(lc_var, current_locale) + + +def can_set_locale(lc, lc_var=locale.LC_ALL): + """ + Check to see if we can set a locale, and subsequently get the locale, + without raising an Exception. + + Parameters + ---------- + lc : str + The locale to attempt to set. + lc_var : int, default `locale.LC_ALL` + The category of the locale being set. + + Returns + ------- + is_valid : bool + Whether the passed locale can be set + """ + + try: + with set_locale(lc, lc_var=lc_var): + pass + except (ValueError, locale.Error): + # horrible name for a Exception subclass + return False + else: + return True + + +def _valid_locales(locales, normalize): + """ + Return a list of normalized locales that do not throw an ``Exception`` + when set. + + Parameters + ---------- + locales : str + A string where each locale is separated by a newline. + normalize : bool + Whether to call ``locale.normalize`` on each locale. + + Returns + ------- + valid_locales : list + A list of valid locales. + """ + if normalize: + normalizer = lambda x: locale.normalize(x.strip()) + else: + normalizer = lambda x: x.strip() + + return list(filter(can_set_locale, map(normalizer, locales))) + + +def _default_locale_getter(): + raw_locales = subprocess.check_output(["locale -a"], shell=True) + return raw_locales + + +def get_locales(prefix=None, normalize=True, locale_getter=_default_locale_getter): + """ + Get all the locales that are available on the system. + + Parameters + ---------- + prefix : str + If not ``None`` then return only those locales with the prefix + provided. For example to get all English language locales (those that + start with ``"en"``), pass ``prefix="en"``. + normalize : bool + Call ``locale.normalize`` on the resulting list of available locales. + If ``True``, only locales that can be set without throwing an + ``Exception`` are returned. + locale_getter : callable + The function to use to retrieve the current locales. This should return + a string with each locale separated by a newline character. + + Returns + ------- + locales : list of strings + A list of locale strings that can be set with ``locale.setlocale()``. + For example:: + + locale.setlocale(locale.LC_ALL, locale_string) + + On error will return None (no locale available, e.g. Windows) + + """ + try: + raw_locales = locale_getter() + except subprocess.CalledProcessError: + # Raised on (some? all?) Windows platforms because Note: "locale -a" + # is not defined + return None + + try: + # raw_locales is "\n" separated list of locales + # it may contain non-decodable parts, so split + # extract what we can and then rejoin. + raw_locales = raw_locales.split(b"\n") + out_locales = [] + for x in raw_locales: + try: + out_locales.append(str(x, encoding=options.display.encoding)) + except UnicodeError: + # 'locale -a' is used to populated 'raw_locales' and on + # Redhat 7 Linux (and maybe others) prints locale names + # using windows-1252 encoding. Bug only triggered by + # a few special characters and when there is an + # extensive list of installed locales. + out_locales.append(str(x, encoding="windows-1252")) + + except TypeError: + pass + + if prefix is None: + return _valid_locales(out_locales, normalize) + + pattern = re.compile(f"{prefix}.*") + found = pattern.findall("\n".join(out_locales)) + return _valid_locales(found, normalize) diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py new file mode 100644 index 00000000..af67cb3b --- /dev/null +++ b/pandas/_libs/__init__.py @@ -0,0 +1,11 @@ +# flake8: noqa + +from .tslibs import ( + NaT, + NaTType, + OutOfBoundsDatetime, + Period, + Timedelta, + Timestamp, + iNaT, +) diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd new file mode 100644 index 00000000..4bca5b33 --- /dev/null +++ b/pandas/_libs/algos.pxd @@ -0,0 +1,21 @@ +from pandas._libs.util cimport numeric + + +cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil: + cdef: + numeric t + + # cython doesn't allow pointer dereference so use array syntax + t = a[0] + a[0] = b[0] + b[0] = t + return 0 + + +cdef enum TiebreakEnumType: + TIEBREAK_AVERAGE + TIEBREAK_MIN, + TIEBREAK_MAX + TIEBREAK_FIRST + TIEBREAK_FIRST_DESCENDING + TIEBREAK_DENSE diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx new file mode 100644 index 00000000..7a2fc9dc --- /dev/null +++ b/pandas/_libs/algos.pyx @@ -0,0 +1,1237 @@ +import cython +from cython import Py_ssize_t + +from libc.stdlib cimport malloc, free +from libc.string cimport memmove +from libc.math cimport fabs, sqrt + +import numpy as np +cimport numpy as cnp +from numpy cimport (ndarray, + NPY_INT64, NPY_INT32, NPY_INT16, NPY_INT8, + NPY_UINT64, NPY_UINT32, NPY_UINT16, NPY_UINT8, + NPY_FLOAT32, NPY_FLOAT64, + NPY_OBJECT, + int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, + uint32_t, uint64_t, float32_t, float64_t) +cnp.import_array() + + +cimport pandas._libs.util as util +from pandas._libs.util cimport numeric, get_nat + +from pandas._libs.khash cimport ( + khiter_t, kh_destroy_int64, kh_put_int64, kh_init_int64, kh_int64_t, + kh_resize_int64, kh_get_int64) + +import pandas._libs.missing as missing + +cdef float64_t FP_ERR = 1e-13 + +cdef float64_t NaN = np.NaN + +cdef int64_t NPY_NAT = get_nat() + +tiebreakers = { + 'average': TIEBREAK_AVERAGE, + 'min': TIEBREAK_MIN, + 'max': TIEBREAK_MAX, + 'first': TIEBREAK_FIRST, + 'dense': TIEBREAK_DENSE, +} + + +cdef inline bint are_diff(object left, object right): + try: + return fabs(left - right) > FP_ERR + except TypeError: + return left != right + + +class Infinity: + """ + Provide a positive Infinity comparison method for ranking. + """ + __lt__ = lambda self, other: False + __le__ = lambda self, other: isinstance(other, Infinity) + __eq__ = lambda self, other: isinstance(other, Infinity) + __ne__ = lambda self, other: not isinstance(other, Infinity) + __gt__ = lambda self, other: (not isinstance(other, Infinity) and + not missing.checknull(other)) + __ge__ = lambda self, other: not missing.checknull(other) + + +class NegInfinity: + """ + Provide a negative Infinity comparison method for ranking. + """ + __lt__ = lambda self, other: (not isinstance(other, NegInfinity) and + not missing.checknull(other)) + __le__ = lambda self, other: not missing.checknull(other) + __eq__ = lambda self, other: isinstance(other, NegInfinity) + __ne__ = lambda self, other: not isinstance(other, NegInfinity) + __gt__ = lambda self, other: False + __ge__ = lambda self, other: isinstance(other, NegInfinity) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr): + """ + Efficiently find the unique first-differences of the given array. + + Parameters + ---------- + arr : ndarray[in64_t] + + Returns + ------- + ndarray[int64_t] + An ordered ndarray[int64_t] + """ + cdef: + Py_ssize_t i, n = len(arr) + int64_t val + khiter_t k + kh_int64_t *table + int ret = 0 + list uniques = [] + + table = kh_init_int64() + kh_resize_int64(table, 10) + for i in range(n - 1): + val = arr[i + 1] - arr[i] + k = kh_get_int64(table, val) + if k == table.n_buckets: + kh_put_int64(table, val, &ret) + uniques.append(val) + kh_destroy_int64(table) + + result = np.array(uniques, dtype=np.int64) + result.sort() + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +def is_lexsorted(list_of_arrays: list) -> bint: + cdef: + Py_ssize_t i + Py_ssize_t n, nlevels + int64_t k, cur, pre + ndarray arr + bint result = True + + nlevels = len(list_of_arrays) + n = len(list_of_arrays[0]) + + cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) + for i in range(nlevels): + arr = list_of_arrays[i] + assert arr.dtype.name == 'int64' + vecs[i] = cnp.PyArray_DATA(arr) + + # Assume uniqueness?? + with nogil: + for i in range(1, n): + for k in range(nlevels): + cur = vecs[k][i] + pre = vecs[k][i -1] + if cur == pre: + continue + elif cur > pre: + break + else: + result = False + break + free(vecs) + return result + + +@cython.boundscheck(False) +@cython.wraparound(False) +def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups): + """ + Compute a 1-d indexer. + + The indexer is an ordering of the passed index, + ordered by the groups. + + Parameters + ---------- + index: int64 ndarray + Mappings from group -> position. + ngroups: int64 + Number of groups. + + Returns + ------- + tuple + 1-d indexer ordered by groups, group counts. + + Notes + ----- + This is a reverse of the label factorization process. + """ + cdef: + Py_ssize_t i, loc, label, n + ndarray[int64_t] counts, where, result + + counts = np.zeros(ngroups + 1, dtype=np.int64) + n = len(index) + result = np.zeros(n, dtype=np.int64) + where = np.zeros(ngroups + 1, dtype=np.int64) + + with nogil: + + # count group sizes, location 0 for NA + for i in range(n): + counts[index[i] + 1] += 1 + + # mark the start of each contiguous group of like-indexed data + for i in range(1, ngroups + 1): + where[i] = where[i - 1] + counts[i - 1] + + # this is our indexer + for i in range(n): + label = index[i] + 1 + result[where[label]] = i + where[label] += 1 + + return result, counts + + +@cython.boundscheck(False) +@cython.wraparound(False) +def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric: + cdef: + Py_ssize_t i, j, l, m, n = a.shape[0] + numeric x + + with nogil: + l = 0 + m = n - 1 + + while l < m: + x = a[k] + i = l + j = m + + while 1: + while a[i] < x: i += 1 + while x < a[j]: j -= 1 + if i <= j: + swap(&a[i], &a[j]) + i += 1; j -= 1 + + if i > j: break + + if j < k: l = i + if k < i: m = j + return a[k] + + +# ---------------------------------------------------------------------- +# Pairwise correlation/covariance + + +@cython.boundscheck(False) +@cython.wraparound(False) +def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None): + cdef: + Py_ssize_t i, j, xi, yi, N, K + bint minpv + ndarray[float64_t, ndim=2] result + ndarray[uint8_t, ndim=2] mask + int64_t nobs = 0 + float64_t vx, vy, sumx, sumy, sumxx, sumyy, meanx, meany, divisor + + N, K = (mat).shape + + if minp is None: + minpv = 1 + else: + minpv = minp + + result = np.empty((K, K), dtype=np.float64) + mask = np.isfinite(mat).view(np.uint8) + + with nogil: + for xi in range(K): + for yi in range(xi + 1): + nobs = sumxx = sumyy = sumx = sumy = 0 + for i in range(N): + if mask[i, xi] and mask[i, yi]: + vx = mat[i, xi] + vy = mat[i, yi] + nobs += 1 + sumx += vx + sumy += vy + + if nobs < minpv: + result[xi, yi] = result[yi, xi] = NaN + else: + meanx = sumx / nobs + meany = sumy / nobs + + # now the cov numerator + sumx = 0 + + for i in range(N): + if mask[i, xi] and mask[i, yi]: + vx = mat[i, xi] - meanx + vy = mat[i, yi] - meany + + sumx += vx * vy + sumxx += vx * vx + sumyy += vy * vy + + divisor = (nobs - 1.0) if cov else sqrt(sumxx * sumyy) + + if divisor != 0: + result[xi, yi] = result[yi, xi] = sumx / divisor + else: + result[xi, yi] = result[yi, xi] = NaN + + return result + +# ---------------------------------------------------------------------- +# Pairwise Spearman correlation + + +@cython.boundscheck(False) +@cython.wraparound(False) +def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1): + cdef: + Py_ssize_t i, j, xi, yi, N, K + ndarray[float64_t, ndim=2] result + ndarray[float64_t, ndim=2] ranked_mat + ndarray[float64_t, ndim=1] maskedx + ndarray[float64_t, ndim=1] maskedy + ndarray[uint8_t, ndim=2] mask + int64_t nobs = 0 + float64_t vx, vy, sumx, sumxx, sumyy, mean, divisor + + N, K = (mat).shape + + result = np.empty((K, K), dtype=np.float64) + mask = np.isfinite(mat).view(np.uint8) + + ranked_mat = np.empty((N, K), dtype=np.float64) + + for i in range(K): + ranked_mat[:, i] = rank_1d(mat[:, i]) + + for xi in range(K): + for yi in range(xi + 1): + nobs = 0 + # Keep track of whether we need to recompute ranks + all_ranks = True + for i in range(N): + all_ranks &= not (mask[i, xi] ^ mask[i, yi]) + if mask[i, xi] and mask[i, yi]: + nobs += 1 + + if nobs < minp: + result[xi, yi] = result[yi, xi] = NaN + else: + maskedx = np.empty(nobs, dtype=np.float64) + maskedy = np.empty(nobs, dtype=np.float64) + j = 0 + + for i in range(N): + if mask[i, xi] and mask[i, yi]: + maskedx[j] = ranked_mat[i, xi] + maskedy[j] = ranked_mat[i, yi] + j += 1 + + if not all_ranks: + maskedx = rank_1d(maskedx) + maskedy = rank_1d(maskedy) + + mean = (nobs + 1) / 2. + + # now the cov numerator + sumx = sumxx = sumyy = 0 + + for i in range(nobs): + vx = maskedx[i] - mean + vy = maskedy[i] - mean + + sumx += vx * vy + sumxx += vx * vx + sumyy += vy * vy + + divisor = sqrt(sumxx * sumyy) + + if divisor != 0: + result[xi, yi] = result[yi, xi] = sumx / divisor + else: + result[xi, yi] = result[yi, xi] = NaN + + return result + + +# ---------------------------------------------------------------------- + +ctypedef fused algos_t: + float64_t + float32_t + object + int64_t + int32_t + int16_t + int8_t + uint64_t + uint32_t + uint16_t + uint8_t + + +def _validate_limit(nobs: int, limit=None) -> int: + """ + Check that the `limit` argument is a positive integer. + + Parameters + ---------- + nobs : int + limit : object + + Returns + ------- + int + The limit. + """ + if limit is None: + lim = nobs + else: + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') + lim = limit + + return lim + + +@cython.boundscheck(False) +@cython.wraparound(False) +def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[int64_t, ndim=1] indexer + algos_t cur, next_val + int lim, fill_count = 0 + + nleft = len(old) + nright = len(new) + indexer = np.empty(nright, dtype=np.int64) + indexer[:] = -1 + + lim = _validate_limit(nright, limit) + + if nleft == 0 or nright == 0 or new[nright - 1] < old[0]: + return indexer + + i = j = 0 + + cur = old[0] + + while j <= nright - 1 and new[j] < cur: + j += 1 + + while True: + if j == nright: + break + + if i == nleft - 1: + while j < nright: + if new[j] == cur: + indexer[j] = i + elif new[j] > cur and fill_count < lim: + indexer[j] = i + fill_count += 1 + j += 1 + break + + next_val = old[i + 1] + + while j < nright and cur <= new[j] < next_val: + if new[j] == cur: + indexer[j] = i + elif fill_count < lim: + indexer[j] = i + fill_count += 1 + j += 1 + + fill_count = 0 + i += 1 + cur = next_val + + return indexer + + +@cython.boundscheck(False) +@cython.wraparound(False) +def pad_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None): + cdef: + Py_ssize_t i, N + algos_t val + int lim, fill_count = 0 + + N = len(values) + + # GH#2778 + if N == 0: + return + + lim = _validate_limit(N, limit) + + val = values[0] + for i in range(N): + if mask[i]: + if fill_count >= lim: + continue + fill_count += 1 + values[i] = val + else: + fill_count = 0 + val = values[i] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def pad_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None): + cdef: + Py_ssize_t i, j, N, K + algos_t val + int lim, fill_count = 0 + + K, N = (values).shape + + # GH#2778 + if N == 0: + return + + lim = _validate_limit(N, limit) + + for j in range(K): + fill_count = 0 + val = values[j, 0] + for i in range(N): + if mask[j, i]: + if fill_count >= lim: + continue + fill_count += 1 + values[j, i] = val + else: + fill_count = 0 + val = values[j, i] + + +""" +Backfilling logic for generating fill vector + +Diagram of what's going on + +Old New Fill vector Mask + . 0 1 + . 0 1 + . 0 1 +A A 0 1 + . 1 1 + . 1 1 + . 1 1 + . 1 1 + . 1 1 +B B 1 1 + . 2 1 + . 2 1 + . 2 1 +C C 2 1 + . 0 + . 0 +D +""" + + +@cython.boundscheck(False) +@cython.wraparound(False) +def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[int64_t, ndim=1] indexer + algos_t cur, prev + int lim, fill_count = 0 + + nleft = len(old) + nright = len(new) + indexer = np.empty(nright, dtype=np.int64) + indexer[:] = -1 + + lim = _validate_limit(nright, limit) + + if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]: + return indexer + + i = nleft - 1 + j = nright - 1 + + cur = old[nleft - 1] + + while j >= 0 and new[j] > cur: + j -= 1 + + while True: + if j < 0: + break + + if i == 0: + while j >= 0: + if new[j] == cur: + indexer[j] = i + elif new[j] < cur and fill_count < lim: + indexer[j] = i + fill_count += 1 + j -= 1 + break + + prev = old[i - 1] + + while j >= 0 and prev < new[j] <= cur: + if new[j] == cur: + indexer[j] = i + elif new[j] < cur and fill_count < lim: + indexer[j] = i + fill_count += 1 + j -= 1 + + fill_count = 0 + i -= 1 + cur = prev + + return indexer + + +@cython.boundscheck(False) +@cython.wraparound(False) +def backfill_inplace(algos_t[:] values, const uint8_t[:] mask, limit=None): + cdef: + Py_ssize_t i, N + algos_t val + int lim, fill_count = 0 + + N = len(values) + + # GH#2778 + if N == 0: + return + + lim = _validate_limit(N, limit) + + val = values[N - 1] + for i in range(N - 1, -1, -1): + if mask[i]: + if fill_count >= lim: + continue + fill_count += 1 + values[i] = val + else: + fill_count = 0 + val = values[i] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def backfill_2d_inplace(algos_t[:, :] values, + const uint8_t[:, :] mask, + limit=None): + cdef: + Py_ssize_t i, j, N, K + algos_t val + int lim, fill_count = 0 + + K, N = (values).shape + + # GH#2778 + if N == 0: + return + + lim = _validate_limit(N, limit) + + for j in range(K): + fill_count = 0 + val = values[j, N - 1] + for i in range(N - 1, -1, -1): + if mask[j, i]: + if fill_count >= lim: + continue + fill_count += 1 + values[j, i] = val + else: + fill_count = 0 + val = values[j, i] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): + """ + Returns + ------- + tuple + is_monotonic_inc : bool + is_monotonic_dec : bool + is_unique : bool + """ + cdef: + Py_ssize_t i, n + algos_t prev, cur + bint is_monotonic_inc = 1 + bint is_monotonic_dec = 1 + bint is_unique = 1 + bint is_strict_monotonic = 1 + + n = len(arr) + + if n == 1: + if arr[0] != arr[0] or (timelike and arr[0] == NPY_NAT): + # single value is NaN + return False, False, True + else: + return True, True, True + elif n < 2: + return True, True, True + + if timelike and arr[0] == NPY_NAT: + return False, False, True + + if algos_t is not object: + with nogil: + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if timelike and cur == NPY_NAT: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if cur < prev: + is_monotonic_inc = 0 + elif cur > prev: + is_monotonic_dec = 0 + elif cur == prev: + is_unique = 0 + else: + # cur or prev is NaN + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if not is_monotonic_inc and not is_monotonic_dec: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + prev = cur + else: + # object-dtype, identical to above except we cannot use `with nogil` + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if timelike and cur == NPY_NAT: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if cur < prev: + is_monotonic_inc = 0 + elif cur > prev: + is_monotonic_dec = 0 + elif cur == prev: + is_unique = 0 + else: + # cur or prev is NaN + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if not is_monotonic_inc and not is_monotonic_dec: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + prev = cur + + is_strict_monotonic = is_unique and (is_monotonic_inc or is_monotonic_dec) + return is_monotonic_inc, is_monotonic_dec, is_strict_monotonic + + +# ---------------------------------------------------------------------- +# rank_1d, rank_2d +# ---------------------------------------------------------------------- + +ctypedef fused rank_t: + object + float64_t + uint64_t + int64_t + + +@cython.wraparound(False) +@cython.boundscheck(False) +def rank_1d(rank_t[:] in_arr, ties_method='average', + ascending=True, na_option='keep', pct=False): + """ + Fast NaN-friendly version of ``scipy.stats.rankdata``. + """ + cdef: + Py_ssize_t i, j, n, dups = 0, total_tie_count = 0, non_na_idx = 0 + + ndarray[rank_t] sorted_data, values + + ndarray[float64_t] ranks + ndarray[int64_t] argsorted + ndarray[uint8_t, cast=True] sorted_mask + + rank_t val, nan_value + + float64_t sum_ranks = 0 + int tiebreak = 0 + bint keep_na = 0 + bint isnan, condition + float64_t count = 0.0 + + tiebreak = tiebreakers[ties_method] + + if rank_t is float64_t: + values = np.asarray(in_arr).copy() + elif rank_t is object: + values = np.array(in_arr, copy=True) + + if values.dtype != np.object_: + values = values.astype('O') + else: + values = np.asarray(in_arr) + + keep_na = na_option == 'keep' + + if rank_t is object: + mask = missing.isnaobj(values) + elif rank_t is float64_t: + mask = np.isnan(values) + elif rank_t is int64_t: + mask = values == NPY_NAT + + # create copy in case of NPY_NAT + # values are mutated inplace + if mask.any(): + values = values.copy() + + # double sort first by mask and then by values to ensure nan values are + # either at the beginning or the end. mask/(~mask) controls padding at + # tail or the head + if rank_t is not uint64_t: + if ascending ^ (na_option == 'top'): + if rank_t is object: + nan_value = Infinity() + elif rank_t is float64_t: + nan_value = np.inf + elif rank_t is int64_t: + nan_value = np.iinfo(np.int64).max + + order = (values, mask) + else: + if rank_t is object: + nan_value = NegInfinity() + elif rank_t is float64_t: + nan_value = -np.inf + elif rank_t is int64_t: + nan_value = np.iinfo(np.int64).min + + order = (values, ~mask) + np.putmask(values, mask, nan_value) + else: + mask = np.zeros(shape=len(values), dtype=bool) + order = (values, mask) + + n = len(values) + ranks = np.empty(n, dtype='f8') + + if rank_t is object: + _as = np.lexsort(keys=order) + else: + if tiebreak == TIEBREAK_FIRST: + # need to use a stable sort here + _as = np.lexsort(keys=order) + if not ascending: + tiebreak = TIEBREAK_FIRST_DESCENDING + else: + _as = np.lexsort(keys=order) + + if not ascending: + _as = _as[::-1] + + sorted_data = values.take(_as) + sorted_mask = mask.take(_as) + _indices = np.diff(sorted_mask.astype(int)).nonzero()[0] + non_na_idx = _indices[0] if len(_indices) > 0 else -1 + argsorted = _as.astype('i8') + + if rank_t is object: + # TODO: de-duplicate once cython supports conditional nogil + for i in range(n): + sum_ranks += i + 1 + dups += 1 + + val = sorted_data[i] + + if rank_t is not uint64_t: + isnan = sorted_mask[i] + if isnan and keep_na: + ranks[argsorted[i]] = NaN + continue + + count += 1.0 + + if rank_t is object: + condition = ( + i == n - 1 or + are_diff(sorted_data[i + 1], val) or + i == non_na_idx + ) + else: + condition = ( + i == n - 1 or + sorted_data[i + 1] != val or + i == non_na_idx + ) + + if condition: + + if tiebreak == TIEBREAK_AVERAGE: + for j in range(i - dups + 1, i + 1): + ranks[argsorted[j]] = sum_ranks / dups + elif tiebreak == TIEBREAK_MIN: + for j in range(i - dups + 1, i + 1): + ranks[argsorted[j]] = i - dups + 2 + elif tiebreak == TIEBREAK_MAX: + for j in range(i - dups + 1, i + 1): + ranks[argsorted[j]] = i + 1 + elif tiebreak == TIEBREAK_FIRST: + if rank_t is object: + raise ValueError('first not supported for ' + 'non-numeric data') + else: + for j in range(i - dups + 1, i + 1): + ranks[argsorted[j]] = j + 1 + elif tiebreak == TIEBREAK_FIRST_DESCENDING: + for j in range(i - dups + 1, i + 1): + ranks[argsorted[j]] = 2 * i - j - dups + 2 + elif tiebreak == TIEBREAK_DENSE: + total_tie_count += 1 + for j in range(i - dups + 1, i + 1): + ranks[argsorted[j]] = total_tie_count + sum_ranks = dups = 0 + + else: + with nogil: + # TODO: why does the 2d version not have a nogil block? + for i in range(n): + sum_ranks += i + 1 + dups += 1 + + val = sorted_data[i] + + if rank_t is not uint64_t: + isnan = sorted_mask[i] + if isnan and keep_na: + ranks[argsorted[i]] = NaN + continue + + count += 1.0 + + if rank_t is object: + condition = ( + i == n - 1 or + are_diff(sorted_data[i + 1], val) or + i == non_na_idx + ) + else: + condition = ( + i == n - 1 or + sorted_data[i + 1] != val or + i == non_na_idx + ) + + if condition: + + if tiebreak == TIEBREAK_AVERAGE: + for j in range(i - dups + 1, i + 1): + ranks[argsorted[j]] = sum_ranks / dups + elif tiebreak == TIEBREAK_MIN: + for j in range(i - dups + 1, i + 1): + ranks[argsorted[j]] = i - dups + 2 + elif tiebreak == TIEBREAK_MAX: + for j in range(i - dups + 1, i + 1): + ranks[argsorted[j]] = i + 1 + elif tiebreak == TIEBREAK_FIRST: + if rank_t is object: + raise ValueError('first not supported for ' + 'non-numeric data') + else: + for j in range(i - dups + 1, i + 1): + ranks[argsorted[j]] = j + 1 + elif tiebreak == TIEBREAK_FIRST_DESCENDING: + for j in range(i - dups + 1, i + 1): + ranks[argsorted[j]] = 2 * i - j - dups + 2 + elif tiebreak == TIEBREAK_DENSE: + total_tie_count += 1 + for j in range(i - dups + 1, i + 1): + ranks[argsorted[j]] = total_tie_count + sum_ranks = dups = 0 + + if pct: + if tiebreak == TIEBREAK_DENSE: + return ranks / total_tie_count + else: + return ranks / count + else: + return ranks + + +def rank_2d(rank_t[:, :] in_arr, axis=0, ties_method='average', + ascending=True, na_option='keep', pct=False): + """ + Fast NaN-friendly version of ``scipy.stats.rankdata``. + """ + cdef: + Py_ssize_t i, j, z, k, n, dups = 0, total_tie_count = 0 + + Py_ssize_t infs + + ndarray[float64_t, ndim=2] ranks + ndarray[rank_t, ndim=2] values + + ndarray[int64_t, ndim=2] argsorted + + rank_t val, nan_value + + float64_t sum_ranks = 0 + int tiebreak = 0 + bint keep_na = 0 + float64_t count = 0.0 + bint condition, skip_condition + + tiebreak = tiebreakers[ties_method] + + keep_na = na_option == 'keep' + + if axis == 0: + values = np.asarray(in_arr).T.copy() + else: + values = np.asarray(in_arr).copy() + + if rank_t is object: + if values.dtype != np.object_: + values = values.astype('O') + + if rank_t is not uint64_t: + if ascending ^ (na_option == 'top'): + if rank_t is object: + nan_value = Infinity() + elif rank_t is float64_t: + nan_value = np.inf + elif rank_t is int64_t: + nan_value = np.iinfo(np.int64).max + + else: + if rank_t is object: + nan_value = NegInfinity() + elif rank_t is float64_t: + nan_value = -np.inf + elif rank_t is int64_t: + nan_value = NPY_NAT + + if rank_t is object: + mask = missing.isnaobj2d(values) + elif rank_t is float64_t: + mask = np.isnan(values) + elif rank_t is int64_t: + mask = values == NPY_NAT + + np.putmask(values, mask, nan_value) + + n, k = (values).shape + ranks = np.empty((n, k), dtype='f8') + + if rank_t is object: + try: + _as = values.argsort(1) + except TypeError: + values = in_arr + for i in range(len(values)): + ranks[i] = rank_1d(in_arr[i], ties_method=ties_method, + ascending=ascending, pct=pct) + if axis == 0: + return ranks.T + else: + return ranks + else: + if tiebreak == TIEBREAK_FIRST: + # need to use a stable sort here + _as = values.argsort(axis=1, kind='mergesort') + if not ascending: + tiebreak = TIEBREAK_FIRST_DESCENDING + else: + _as = values.argsort(1) + + if not ascending: + _as = _as[:, ::-1] + + values = _take_2d(values, _as) + argsorted = _as.astype('i8') + + for i in range(n): + if rank_t is object: + dups = sum_ranks = infs = 0 + else: + dups = sum_ranks = 0 + + total_tie_count = 0 + count = 0.0 + for j in range(k): + if rank_t is not object: + sum_ranks += j + 1 + dups += 1 + + val = values[i, j] + + if rank_t is not uint64_t: + if rank_t is object: + skip_condition = (val is nan_value) and keep_na + else: + skip_condition = (val == nan_value) and keep_na + if skip_condition: + ranks[i, argsorted[i, j]] = NaN + + if rank_t is object: + infs += 1 + + continue + + count += 1.0 + + if rank_t is object: + sum_ranks += (j - infs) + 1 + dups += 1 + + if rank_t is object: + condition = j == k - 1 or are_diff(values[i, j + 1], val) + else: + condition = j == k - 1 or values[i, j + 1] != val + + if condition: + if tiebreak == TIEBREAK_AVERAGE: + for z in range(j - dups + 1, j + 1): + ranks[i, argsorted[i, z]] = sum_ranks / dups + elif tiebreak == TIEBREAK_MIN: + for z in range(j - dups + 1, j + 1): + ranks[i, argsorted[i, z]] = j - dups + 2 + elif tiebreak == TIEBREAK_MAX: + for z in range(j - dups + 1, j + 1): + ranks[i, argsorted[i, z]] = j + 1 + elif tiebreak == TIEBREAK_FIRST: + if rank_t is object: + raise ValueError('first not supported ' + 'for non-numeric data') + else: + for z in range(j - dups + 1, j + 1): + ranks[i, argsorted[i, z]] = z + 1 + elif tiebreak == TIEBREAK_FIRST_DESCENDING: + for z in range(j - dups + 1, j + 1): + ranks[i, argsorted[i, z]] = 2 * j - z - dups + 2 + elif tiebreak == TIEBREAK_DENSE: + total_tie_count += 1 + for z in range(j - dups + 1, j + 1): + ranks[i, argsorted[i, z]] = total_tie_count + sum_ranks = dups = 0 + if pct: + if tiebreak == TIEBREAK_DENSE: + ranks[i, :] /= total_tie_count + else: + ranks[i, :] /= count + if axis == 0: + return ranks.T + else: + return ranks + + +ctypedef fused diff_t: + float64_t + float32_t + int8_t + int16_t + int32_t + int64_t + +ctypedef fused out_t: + float32_t + float64_t + + +@cython.boundscheck(False) +@cython.wraparound(False) +def diff_2d(ndarray[diff_t, ndim=2] arr, + ndarray[out_t, ndim=2] out, + Py_ssize_t periods, int axis): + cdef: + Py_ssize_t i, j, sx, sy, start, stop + bint f_contig = arr.flags.f_contiguous + + # Disable for unsupported dtype combinations, + # see https://github.com/cython/cython/issues/2646 + if (out_t is float32_t + and not (diff_t is float32_t or diff_t is int8_t or diff_t is int16_t)): + raise NotImplementedError + elif (out_t is float64_t + and (diff_t is float32_t or diff_t is int8_t or diff_t is int16_t)): + raise NotImplementedError + else: + # We put this inside an indented else block to avoid cython build + # warnings about unreachable code + sx, sy = (arr).shape + with nogil: + if f_contig: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for j in range(sy): + for i in range(start, stop): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for j in range(start, stop): + for i in range(sx): + out[i, j] = arr[i, j] - arr[i, j - periods] + else: + if axis == 0: + if periods >= 0: + start, stop = periods, sx + else: + start, stop = 0, sx + periods + for i in range(start, stop): + for j in range(sy): + out[i, j] = arr[i, j] - arr[i - periods, j] + else: + if periods >= 0: + start, stop = periods, sy + else: + start, stop = 0, sy + periods + for i in range(sx): + for j in range(start, stop): + out[i, j] = arr[i, j] - arr[i, j - periods] + + +# generated from template +include "algos_common_helper.pxi" +include "algos_take_helper.pxi" diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in new file mode 100644 index 00000000..5bfc5946 --- /dev/null +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -0,0 +1,70 @@ +""" +Template for each `dtype` helper function using 1-d template + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# ensure_dtype +# ---------------------------------------------------------------------- + +cdef int PLATFORM_INT = (np.arange(0, dtype=np.intp)).descr.type_num + + +def ensure_platform_int(object arr): + # GH3033, GH1392 + # platform int is the size of the int pointer, e.g. np.intp + if util.is_array(arr): + if (arr).descr.type_num == PLATFORM_INT: + return arr + else: + return arr.astype(np.intp) + else: + return np.array(arr, dtype=np.intp) + + +def ensure_object(object arr): + if util.is_array(arr): + if (arr).descr.type_num == NPY_OBJECT: + return arr + else: + return arr.astype(np.object_) + else: + return np.array(arr, dtype=np.object_) + +{{py: + +# name, c_type, dtype +dtypes = [('float64', 'FLOAT64', 'float64'), + ('float32', 'FLOAT32', 'float32'), + ('int8', 'INT8', 'int8'), + ('int16', 'INT16', 'int16'), + ('int32', 'INT32', 'int32'), + ('int64', 'INT64', 'int64'), + ('uint8', 'UINT8', 'uint8'), + ('uint16', 'UINT16', 'uint16'), + ('uint32', 'UINT32', 'uint32'), + ('uint64', 'UINT64', 'uint64'), + # ('platform_int', 'INT', 'int_'), + # ('object', 'OBJECT', 'object_'), +] + +def get_dispatch(dtypes): + + for name, c_type, dtype in dtypes: + yield name, c_type, dtype +}} + +{{for name, c_type, dtype in get_dispatch(dtypes)}} + + +def ensure_{{name}}(object arr, copy=True): + if util.is_array(arr): + if (arr).descr.type_num == NPY_{{c_type}}: + return arr + else: + return arr.astype(np.{{dtype}}, copy=copy) + else: + return np.array(arr, dtype=np.{{dtype}}) + +{{endfor}} diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in new file mode 100644 index 00000000..995fabbe --- /dev/null +++ b/pandas/_libs/algos_take_helper.pxi.in @@ -0,0 +1,250 @@ +""" +Template for each `dtype` helper function for take + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# take_1d, take_2d +# ---------------------------------------------------------------------- + +{{py: + +# c_type_in, c_type_out +dtypes = [ + ('uint8_t', 'uint8_t'), + ('uint8_t', 'object'), + ('int8_t', 'int8_t'), + ('int8_t', 'int32_t'), + ('int8_t', 'int64_t'), + ('int8_t', 'float64_t'), + ('int16_t', 'int16_t'), + ('int16_t', 'int32_t'), + ('int16_t', 'int64_t'), + ('int16_t', 'float64_t'), + ('int32_t', 'int32_t'), + ('int32_t', 'int64_t'), + ('int32_t', 'float64_t'), + ('int64_t', 'int64_t'), + ('int64_t', 'float64_t'), + ('float32_t', 'float32_t'), + ('float32_t', 'float64_t'), + ('float64_t', 'float64_t'), + ('object', 'object'), +] + + +def get_dispatch(dtypes): + + for (c_type_in, c_type_out) in dtypes: + + def get_name(dtype_name): + if dtype_name == "object": + return "object" + if dtype_name == "uint8_t": + return "bool" + return dtype_name[:-2] + + name = get_name(c_type_in) + dest = get_name(c_type_out) + + args = dict(name=name, dest=dest, c_type_in=c_type_in, + c_type_out=c_type_out) + + yield (name, dest, c_type_in, c_type_out) + +}} + + +{{for name, dest, c_type_in, c_type_out in get_dispatch(dtypes)}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if c_type_in != "object"}} +def take_1d_{{name}}_{{dest}}(const {{c_type_in}}[:] values, +{{else}} +def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values, +{{endif}} + const int64_t[:] indexer, + {{c_type_out}}[:] out, + fill_value=np.nan): + + cdef: + Py_ssize_t i, n, idx + {{c_type_out}} fv + + n = indexer.shape[0] + + fv = fill_value + + {{if c_type_out != "object"}} + with nogil: + {{else}} + if True: + {{endif}} + for i in range(n): + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i] = True if values[idx] > 0 else False + {{else}} + out[i] = values[idx] + {{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if c_type_in != "object"}} +def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values, +{{else}} +def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, +{{endif}} + ndarray[int64_t] indexer, + {{c_type_out}}[:, :] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + {{c_type_out}} fv + + n = len(indexer) + k = values.shape[1] + + fv = fill_value + + IF {{True if c_type_in == c_type_out != "object" else False}}: + cdef: + const {{c_type_out}} *v + {{c_type_out}} *o + + # GH#3130 + if (values.strides[1] == out.strides[1] and + values.strides[1] == sizeof({{c_type_out}}) and + sizeof({{c_type_out}}) * n >= 256): + + for i in range(n): + idx = indexer[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + v = &values[idx, 0] + o = &out[i, 0] + memmove(o, v, (sizeof({{c_type_out}}) * k)) + return + + for i in range(n): + idx = indexer[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + for j in range(k): + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[idx, j] > 0 else False + {{else}} + out[i, j] = values[idx, j] + {{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if c_type_in != "object"}} +def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values, +{{else}} +def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, +{{endif}} + ndarray[int64_t] indexer, + {{c_type_out}}[:, :] out, + fill_value=np.nan): + + cdef: + Py_ssize_t i, j, k, n, idx + {{c_type_out}} fv + + n = len(values) + k = len(indexer) + + if n == 0 or k == 0: + return + + fv = fill_value + + for i in range(n): + for j in range(k): + idx = indexer[j] + if idx == -1: + out[i, j] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[i, idx] > 0 else False + {{else}} + out[i, j] = values[i, idx] + {{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_2d_multi_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values, + indexer, + ndarray[{{c_type_out}}, ndim=2] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, j, k, n, idx + ndarray[int64_t] idx0 = indexer[0] + ndarray[int64_t] idx1 = indexer[1] + {{c_type_out}} fv + + n = len(idx0) + k = len(idx1) + + fv = fill_value + for i in range(n): + idx = idx0[i] + if idx == -1: + for j in range(k): + out[i, j] = fv + else: + for j in range(k): + if idx1[j] == -1: + out[i, j] = fv + else: + {{if c_type_in == "uint8_t" and c_type_out == "object"}} + out[i, j] = True if values[idx, idx1[j]] > 0 else False + {{else}} + out[i, j] = values[idx, idx1[j]] + {{endif}} + +{{endfor}} + +# ---------------------------------------------------------------------- +# take_2d internal function +# ---------------------------------------------------------------------- + +ctypedef fused take_t: + float64_t + uint64_t + int64_t + object + + +cdef _take_2d(ndarray[take_t, ndim=2] values, object idx): + cdef: + Py_ssize_t i, j, N, K + ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx + ndarray[take_t, ndim=2] result + + N, K = (values).shape + + if take_t is object: + # evaluated at compile-time + result = values.copy() + else: + result = np.empty_like(values) + + for i in range(N): + for j in range(K): + result[i, j] = values[i, indexer[i, j]] + return result diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx new file mode 100644 index 00000000..68f1057a --- /dev/null +++ b/pandas/_libs/groupby.pyx @@ -0,0 +1,1511 @@ +import cython +from cython import Py_ssize_t +from cython cimport floating + +from libc.stdlib cimport malloc, free + +import numpy as np +cimport numpy as cnp +from numpy cimport (ndarray, + int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, + uint32_t, uint64_t, float32_t, float64_t, complex64_t, complex128_t) +cnp.import_array() + +cdef extern from "numpy/npy_math.h": + float64_t NAN "NPY_NAN" + +from pandas._libs.util cimport numeric, get_nat + +from pandas._libs.algos cimport (swap, TiebreakEnumType, TIEBREAK_AVERAGE, + TIEBREAK_MIN, TIEBREAK_MAX, TIEBREAK_FIRST, + TIEBREAK_DENSE) +from pandas._libs.algos import (take_2d_axis1_float64_float64, + groupsort_indexer, tiebreakers) + +from pandas._libs.missing cimport checknull + +cdef int64_t NPY_NAT = get_nat() +_int64_max = np.iinfo(np.int64).max + +cdef float64_t NaN = np.NaN + +cdef enum InterpolationEnumType: + INTERPOLATION_LINEAR, + INTERPOLATION_LOWER, + INTERPOLATION_HIGHER, + INTERPOLATION_NEAREST, + INTERPOLATION_MIDPOINT + + +cdef inline float64_t median_linear(float64_t* a, int n) nogil: + cdef: + int i, j, na_count = 0 + float64_t result + float64_t* tmp + + if n == 0: + return NaN + + # count NAs + for i in range(n): + if a[i] != a[i]: + na_count += 1 + + if na_count: + if na_count == n: + return NaN + + tmp = malloc((n - na_count) * sizeof(float64_t)) + + j = 0 + for i in range(n): + if a[i] == a[i]: + tmp[j] = a[i] + j += 1 + + a = tmp + n -= na_count + + if n % 2: + result = kth_smallest_c( a, n // 2, n) + else: + result = (kth_smallest_c(a, n // 2, n) + + kth_smallest_c(a, n // 2 - 1, n)) / 2 + + if na_count: + free(a) + + return result + + +# TODO: Is this redundant with algos.kth_smallest +cdef inline float64_t kth_smallest_c(float64_t* a, + Py_ssize_t k, + Py_ssize_t n) nogil: + cdef: + Py_ssize_t i, j, l, m + float64_t x, t + + l = 0 + m = n - 1 + while l < m: + x = a[k] + i = l + j = m + + while 1: + while a[i] < x: i += 1 + while x < a[j]: j -= 1 + if i <= j: + swap(&a[i], &a[j]) + i += 1; j -= 1 + + if i > j: break + + if j < k: l = i + if k < i: m = j + return a[k] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_median_float64(ndarray[float64_t, ndim=2] out, + ndarray[int64_t] counts, + ndarray[float64_t, ndim=2] values, + ndarray[int64_t] labels, + Py_ssize_t min_count=-1): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, ngroups, size + ndarray[int64_t] _counts + ndarray[float64_t, ndim=2] data + float64_t* ptr + + assert min_count == -1, "'min_count' only used in add and prod" + + ngroups = len(counts) + N, K = (values).shape + + indexer, _counts = groupsort_indexer(labels, ngroups) + counts[:] = _counts[1:] + + data = np.empty((K, N), dtype=np.float64) + ptr = cnp.PyArray_DATA(data) + + take_2d_axis1_float64_float64(values.T, indexer, out=data) + + with nogil: + + for i in range(K): + # exclude NA group + ptr += _counts[0] + for j in range(ngroups): + size = _counts[j + 1] + out[j, i] = median_linear(ptr, size) + ptr += size + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cumprod_float64(float64_t[:, :] out, + const float64_t[:, :] values, + const int64_t[:] labels, + int ngroups, + bint is_datetimelike, + bint skipna=True): + """ + Cumulative product of columns of `values`, in row groups `labels`. + + Parameters + ---------- + out : float64 array + Array to store cumprod in. + values : float64 array + Values to take cumprod of. + labels : int64 array + Labels to group by. + ngroups : int + Number of groups, larger than all entries of `labels`. + is_datetimelike : bool + Always false, `values` is never datetime-like. + skipna : bool + If true, ignore nans in `values`. + + Notes + ----- + This method modifies the `out` parameter, rather than returning an object. + """ + cdef: + Py_ssize_t i, j, N, K, size + float64_t val + float64_t[:, :] accum + int64_t lab + + N, K = (values).shape + accum = np.ones((ngroups, K), dtype=np.float64) + + with nogil: + for i in range(N): + lab = labels[i] + + if lab < 0: + continue + for j in range(K): + val = values[i, j] + if val == val: + accum[lab, j] *= val + out[i, j] = accum[lab, j] + else: + out[i, j] = NaN + if not skipna: + accum[lab, j] = NaN + break + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cumsum(numeric[:, :] out, + numeric[:, :] values, + const int64_t[:] labels, + int ngroups, + is_datetimelike, + bint skipna=True): + """ + Cumulative sum of columns of `values`, in row groups `labels`. + + Parameters + ---------- + out : array + Array to store cumsum in. + values : array + Values to take cumsum of. + labels : int64 array + Labels to group by. + ngroups : int + Number of groups, larger than all entries of `labels`. + is_datetimelike : bool + True if `values` contains datetime-like entries. + skipna : bool + If true, ignore nans in `values`. + + Notes + ----- + This method modifies the `out` parameter, rather than returning an object. + """ + cdef: + Py_ssize_t i, j, N, K, size + numeric val + numeric[:, :] accum + int64_t lab + + N, K = (values).shape + accum = np.zeros((ngroups, K), dtype=np.asarray(values).dtype) + + with nogil: + for i in range(N): + lab = labels[i] + + if lab < 0: + continue + for j in range(K): + val = values[i, j] + + if numeric == float32_t or numeric == float64_t: + if val == val: + accum[lab, j] += val + out[i, j] = accum[lab, j] + else: + out[i, j] = NaN + if not skipna: + accum[lab, j] = NaN + break + else: + accum[lab, j] += val + out[i, j] = accum[lab, j] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_shift_indexer(int64_t[:] out, const int64_t[:] labels, + int ngroups, int periods): + cdef: + Py_ssize_t N, i, j, ii + int offset = 0, sign + int64_t lab, idxer, idxer_slot + int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64) + int64_t[:, :] label_indexer + + N, = (labels).shape + + if periods < 0: + periods = -periods + offset = N - 1 + sign = -1 + elif periods > 0: + offset = 0 + sign = 1 + + if periods == 0: + with nogil: + for i in range(N): + out[i] = i + else: + # array of each previous indexer seen + label_indexer = np.zeros((ngroups, periods), dtype=np.int64) + with nogil: + for i in range(N): + # reverse iterator if shifting backwards + ii = offset + sign * i + lab = labels[ii] + + # Skip null keys + if lab == -1: + out[ii] = -1 + continue + + label_seen[lab] += 1 + + idxer_slot = label_seen[lab] % periods + idxer = label_indexer[lab, idxer_slot] + + if label_seen[lab] > periods: + out[ii] = idxer + else: + out[ii] = -1 + + label_indexer[lab, idxer_slot] = ii + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels, + ndarray[uint8_t] mask, object direction, + int64_t limit): + """ + Indexes how to fill values forwards or backwards within a group. + + Parameters + ---------- + out : array of int64_t values which this method will write its results to + Missing values will be written to with a value of -1 + labels : array containing unique label for each group, with its ordering + matching up to the corresponding record in `values` + mask : array of int64_t values where a 1 indicates a missing value + direction : {'ffill', 'bfill'} + Direction for fill to be applied (forwards or backwards, respectively) + limit : Consecutive values to fill before stopping, or -1 for no limit + + Notes + ----- + This method modifies the `out` parameter rather than returning an object + """ + cdef: + Py_ssize_t i, N + int64_t[:] sorted_labels + int64_t idx, curr_fill_idx=-1, filled_vals=0 + + N = len(out) + + # Make sure all arrays are the same size + assert N == len(labels) == len(mask) + + sorted_labels = np.argsort(labels, kind='mergesort').astype( + np.int64, copy=False) + if direction == 'bfill': + sorted_labels = sorted_labels[::-1] + + with nogil: + for i in range(N): + idx = sorted_labels[i] + if mask[idx] == 1: # is missing + # Stop filling once we've hit the limit + if filled_vals >= limit and limit != -1: + curr_fill_idx = -1 + filled_vals += 1 + else: # reset items when not missing + filled_vals = 0 + curr_fill_idx = idx + + out[idx] = curr_fill_idx + + # If we move to the next group, reset + # the fill_idx and counter + if i == N - 1 or labels[idx] != labels[sorted_labels[i + 1]]: + curr_fill_idx = -1 + filled_vals = 0 + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_any_all(uint8_t[:] out, + const int64_t[:] labels, + const uint8_t[:] values, + const uint8_t[:] mask, + object val_test, + bint skipna): + """ + Aggregated boolean values to show truthfulness of group elements. + + Parameters + ---------- + out : array of values which this method will write its results to + labels : array containing unique label for each group, with its + ordering matching up to the corresponding record in `values` + values : array containing the truth value of each element + mask : array indicating whether a value is na or not + val_test : str {'any', 'all'} + String object dictating whether to use any or all truth testing + skipna : boolean + Flag to ignore nan values during truth testing + + Notes + ----- + This method modifies the `out` parameter rather than returning an object. + The returned values will either be 0 or 1 (False or True, respectively). + """ + cdef: + Py_ssize_t i, N = len(labels) + int64_t lab + uint8_t flag_val + + if val_test == 'all': + # Because the 'all' value of an empty iterable in Python is True we can + # start with an array full of ones and set to zero when a False value + # is encountered + flag_val = 0 + elif val_test == 'any': + # Because the 'any' value of an empty iterable in Python is False we + # can start with an array full of zeros and set to one only if any + # value encountered is True + flag_val = 1 + else: + raise ValueError("'bool_func' must be either 'any' or 'all'!") + + out[:] = 1 - flag_val + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0 or (skipna and mask[i]): + continue + + if values[i] == flag_val: + out[lab] = flag_val + + +# ---------------------------------------------------------------------- +# group_add, group_prod, group_var, group_mean, group_ohlc +# ---------------------------------------------------------------------- + +ctypedef fused complexfloating_t: + float64_t + float32_t + complex64_t + complex128_t + + +@cython.wraparound(False) +@cython.boundscheck(False) +def _group_add(complexfloating_t[:, :] out, + int64_t[:] counts, + complexfloating_t[:, :] values, + const int64_t[:] labels, + Py_ssize_t min_count=0): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + complexfloating_t val, count + complexfloating_t[:, :] sumx + int64_t[:, :] nobs + + if len(values) != len(labels): + raise ValueError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + sumx = np.zeros_like(out) + + N, K = (values).shape + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + if (complexfloating_t is complex64_t or + complexfloating_t is complex128_t): + # clang errors if we use += with these dtypes + sumx[lab, j] = sumx[lab, j] + val + else: + sumx[lab, j] += val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + out[i, j] = NAN + else: + out[i, j] = sumx[i, j] + + +group_add_float32 = _group_add['float32_t'] +group_add_float64 = _group_add['float64_t'] +group_add_complex64 = _group_add['float complex'] +group_add_complex128 = _group_add['double complex'] + + +@cython.wraparound(False) +@cython.boundscheck(False) +def _group_prod(floating[:, :] out, + int64_t[:] counts, + floating[:, :] values, + const int64_t[:] labels, + Py_ssize_t min_count=0): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + floating val, count + floating[:, :] prodx + int64_t[:, :] nobs + + if not len(values) == len(labels): + raise ValueError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + prodx = np.ones_like(out) + + N, K = (values).shape + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + prodx[lab, j] *= val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] < min_count: + out[i, j] = NAN + else: + out[i, j] = prodx[i, j] + + +group_prod_float32 = _group_prod['float'] +group_prod_float64 = _group_prod['double'] + + +@cython.wraparound(False) +@cython.boundscheck(False) +@cython.cdivision(True) +def _group_var(floating[:, :] out, + int64_t[:] counts, + floating[:, :] values, + const int64_t[:] labels, + Py_ssize_t min_count=-1): + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + floating val, ct, oldmean + floating[:, :] mean + int64_t[:, :] nobs + + assert min_count == -1, "'min_count' only used in add and prod" + + if not len(values) == len(labels): + raise ValueError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + mean = np.zeros_like(out) + + N, K = (values).shape + + out[:, :] = 0.0 + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + + for j in range(K): + val = values[i, j] + + # not nan + if val == val: + nobs[lab, j] += 1 + oldmean = mean[lab, j] + mean[lab, j] += (val - oldmean) / nobs[lab, j] + out[lab, j] += (val - mean[lab, j]) * (val - oldmean) + + for i in range(ncounts): + for j in range(K): + ct = nobs[i, j] + if ct < 2: + out[i, j] = NAN + else: + out[i, j] /= (ct - 1) + + +group_var_float32 = _group_var['float'] +group_var_float64 = _group_var['double'] + + +@cython.wraparound(False) +@cython.boundscheck(False) +def _group_mean(floating[:, :] out, + int64_t[:] counts, + floating[:, :] values, + const int64_t[:] labels, + Py_ssize_t min_count=-1): + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + floating val, count + floating[:, :] sumx + int64_t[:, :] nobs + + assert min_count == -1, "'min_count' only used in add and prod" + + if not len(values) == len(labels): + raise ValueError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + sumx = np.zeros_like(out) + + N, K = (values).shape + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + # not nan + if val == val: + nobs[lab, j] += 1 + sumx[lab, j] += val + + for i in range(ncounts): + for j in range(K): + count = nobs[i, j] + if nobs[i, j] == 0: + out[i, j] = NAN + else: + out[i, j] = sumx[i, j] / count + + +group_mean_float32 = _group_mean['float'] +group_mean_float64 = _group_mean['double'] + + +@cython.wraparound(False) +@cython.boundscheck(False) +def _group_ohlc(floating[:, :] out, + int64_t[:] counts, + floating[:, :] values, + const int64_t[:] labels, + Py_ssize_t min_count=-1): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab + floating val, count + Py_ssize_t ngroups = len(counts) + + assert min_count == -1, "'min_count' only used in add and prod" + + if len(labels) == 0: + return + + N, K = (values).shape + + if out.shape[1] != 4: + raise ValueError('Output array must have 4 columns') + + if K > 1: + raise NotImplementedError("Argument 'values' must have only " + "one dimension") + out[:] = np.nan + + with nogil: + for i in range(N): + lab = labels[i] + if lab == -1: + continue + + counts[lab] += 1 + val = values[i, 0] + if val != val: + continue + + if out[lab, 0] != out[lab, 0]: + out[lab, 0] = out[lab, 1] = out[lab, 2] = out[lab, 3] = val + else: + out[lab, 1] = max(out[lab, 1], val) + out[lab, 2] = min(out[lab, 2], val) + out[lab, 3] = val + + +group_ohlc_float32 = _group_ohlc['float'] +group_ohlc_float64 = _group_ohlc['double'] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_quantile(ndarray[float64_t] out, + ndarray[int64_t] labels, + numeric[:] values, + ndarray[uint8_t] mask, + float64_t q, + object interpolation): + """ + Calculate the quantile per group. + + Parameters + ---------- + out : ndarray + Array of aggregated values that will be written to. + labels : ndarray + Array containing the unique group labels. + values : ndarray + Array containing the values to apply the function against. + q : float + The quantile value to search for. + + Notes + ----- + Rather than explicitly returning a value, this function modifies the + provided `out` parameter. + """ + cdef: + Py_ssize_t i, N=len(labels), ngroups, grp_sz, non_na_sz + Py_ssize_t grp_start=0, idx=0 + int64_t lab + uint8_t interp + float64_t q_idx, frac, val, next_val + ndarray[int64_t] counts, non_na_counts, sort_arr + + assert values.shape[0] == N + + if not (0 <= q <= 1): + raise ValueError(f"'q' must be between 0 and 1. Got '{q}' instead") + + inter_methods = { + 'linear': INTERPOLATION_LINEAR, + 'lower': INTERPOLATION_LOWER, + 'higher': INTERPOLATION_HIGHER, + 'nearest': INTERPOLATION_NEAREST, + 'midpoint': INTERPOLATION_MIDPOINT, + } + interp = inter_methods[interpolation] + + counts = np.zeros_like(out, dtype=np.int64) + non_na_counts = np.zeros_like(out, dtype=np.int64) + ngroups = len(counts) + + # First figure out the size of every group + with nogil: + for i in range(N): + lab = labels[i] + if lab == -1: # NA group label + continue + + counts[lab] += 1 + if not mask[i]: + non_na_counts[lab] += 1 + + # Get an index of values sorted by labels and then values + if labels.any(): + # Put '-1' (NaN) labels as the last group so it does not interfere + # with the calculations. + labels_for_lexsort = np.where(labels == -1, labels.max() + 1, labels) + else: + labels_for_lexsort = labels + order = (values, labels_for_lexsort) + sort_arr = np.lexsort(order).astype(np.int64, copy=False) + + with nogil: + for i in range(ngroups): + # Figure out how many group elements there are + grp_sz = counts[i] + non_na_sz = non_na_counts[i] + + if non_na_sz == 0: + out[i] = NaN + else: + # Calculate where to retrieve the desired value + # Casting to int will intentionally truncate result + idx = grp_start + (q * (non_na_sz - 1)) + + val = values[sort_arr[idx]] + # If requested quantile falls evenly on a particular index + # then write that index's value out. Otherwise interpolate + q_idx = q * (non_na_sz - 1) + frac = q_idx % 1 + + if frac == 0.0 or interp == INTERPOLATION_LOWER: + out[i] = val + else: + next_val = values[sort_arr[idx + 1]] + if interp == INTERPOLATION_LINEAR: + out[i] = val + (next_val - val) * frac + elif interp == INTERPOLATION_HIGHER: + out[i] = next_val + elif interp == INTERPOLATION_MIDPOINT: + out[i] = (val + next_val) / 2.0 + elif interp == INTERPOLATION_NEAREST: + if frac > .5 or (frac == .5 and q > .5): # Always OK? + out[i] = next_val + else: + out[i] = val + + # Increment the index reference in sorted_arr for the next group + grp_start += grp_sz + + +# ---------------------------------------------------------------------- +# group_nth, group_last, group_rank +# ---------------------------------------------------------------------- + +ctypedef fused rank_t: + float64_t + float32_t + int64_t + uint64_t + object + + +cdef inline bint _treat_as_na(rank_t val, bint is_datetimelike) nogil: + if rank_t is object: + # Should never be used, but we need to avoid the `val != val` below + # or else cython will raise about gil acquisition. + raise NotImplementedError + + elif rank_t is int64_t: + return is_datetimelike and val == NPY_NAT + elif rank_t is uint64_t: + # There is no NA value for uint64 + return False + else: + return val != val + + +# GH#31710 use memorviews once cython 0.30 is released so we can +# use `const rank_t[:, :] values` +@cython.wraparound(False) +@cython.boundscheck(False) +def group_last(rank_t[:, :] out, + int64_t[:] counts, + ndarray[rank_t, ndim=2] values, + const int64_t[:] labels, + Py_ssize_t min_count=-1): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + rank_t val + ndarray[rank_t, ndim=2] resx + ndarray[int64_t, ndim=2] nobs + bint runtime_error = False + + assert min_count == -1, "'min_count' only used in add and prod" + + if not len(values) == len(labels): + raise AssertionError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + if rank_t is object: + resx = np.empty((out).shape, dtype=object) + else: + resx = np.empty_like(out) + + N, K = (values).shape + + if rank_t is object: + # TODO: De-duplicate once conditional-nogil is available + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # None should not be treated like other NA-like + # so that it won't be converted to nan + if not checknull(val) or val is None: + # NB: use _treat_as_na here once + # conditional-nogil is available. + nobs[lab, j] += 1 + resx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = NAN + else: + out[i, j] = resx[i, j] + else: + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if not _treat_as_na(val, True): + # TODO: Sure we always want is_datetimelike=True? + nobs[lab, j] += 1 + resx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] == 0: + if rank_t is int64_t: + out[i, j] = NPY_NAT + elif rank_t is uint64_t: + runtime_error = True + break + else: + out[i, j] = NAN + + else: + out[i, j] = resx[i, j] + + if runtime_error: + # We cannot raise directly above because that is within a nogil + # block. + raise RuntimeError("empty group with uint64_t") + + +# GH#31710 use memorviews once cython 0.30 is released so we can +# use `const rank_t[:, :] values` +@cython.wraparound(False) +@cython.boundscheck(False) +def group_nth(rank_t[:, :] out, + int64_t[:] counts, + ndarray[rank_t, ndim=2] values, + const int64_t[:] labels, int64_t rank=1, + Py_ssize_t min_count=-1): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + rank_t val + ndarray[rank_t, ndim=2] resx + ndarray[int64_t, ndim=2] nobs + bint runtime_error = False + + assert min_count == -1, "'min_count' only used in add and prod" + + if not len(values) == len(labels): + raise AssertionError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + if rank_t is object: + resx = np.empty((out).shape, dtype=object) + else: + resx = np.empty_like(out) + + N, K = (values).shape + + if rank_t is object: + # TODO: De-duplicate once conditional-nogil is available + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + # None should not be treated like other NA-like + # so that it won't be converted to nan + if not checknull(val) or val is None: + # NB: use _treat_as_na here once + # conditional-nogil is available. + nobs[lab, j] += 1 + if nobs[lab, j] == rank: + resx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] == 0: + out[i, j] = NAN + else: + out[i, j] = resx[i, j] + + else: + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if not _treat_as_na(val, True): + # TODO: Sure we always want is_datetimelike=True? + nobs[lab, j] += 1 + if nobs[lab, j] == rank: + resx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] == 0: + if rank_t is int64_t: + out[i, j] = NPY_NAT + elif rank_t is uint64_t: + runtime_error = True + break + else: + out[i, j] = NAN + else: + out[i, j] = resx[i, j] + + if runtime_error: + # We cannot raise directly above because that is within a nogil + # block. + raise RuntimeError("empty group with uint64_t") + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_rank(float64_t[:, :] out, + rank_t[:, :] values, + const int64_t[:] labels, + int ngroups, + bint is_datetimelike, object ties_method="average", + bint ascending=True, bint pct=False, object na_option="keep"): + """ + Provides the rank of values within each group. + + Parameters + ---------- + out : array of float64_t values which this method will write its results to + values : array of rank_t values to be ranked + labels : array containing unique label for each group, with its ordering + matching up to the corresponding record in `values` + ngroups : int + This parameter is not used, is needed to match signatures of other + groupby functions. + is_datetimelike : bool, default False + unused in this method but provided for call compatibility with other + Cython transformations + ties_method : {'average', 'min', 'max', 'first', 'dense'}, default + 'average' + * average: average rank of group + * min: lowest rank in group + * max: highest rank in group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups + ascending : boolean, default True + False for ranks by high (1) to low (N) + na_option : {'keep', 'top', 'bottom'}, default 'keep' + pct : boolean, default False + Compute percentage rank of data within each group + na_option : {'keep', 'top', 'bottom'}, default 'keep' + * keep: leave NA values where they are + * top: smallest rank if ascending + * bottom: smallest rank if descending + + Notes + ----- + This method modifies the `out` parameter rather than returning an object + """ + cdef: + TiebreakEnumType tiebreak + Py_ssize_t i, j, N, K, grp_start=0, dups=0, sum_ranks=0 + Py_ssize_t grp_vals_seen=1, grp_na_count=0, grp_tie_count=0 + ndarray[int64_t] _as + ndarray[float64_t, ndim=2] grp_sizes + ndarray[rank_t] masked_vals + ndarray[uint8_t] mask + bint keep_na + rank_t nan_fill_val + + if rank_t is object: + raise NotImplementedError("Cant do nogil") + + tiebreak = tiebreakers[ties_method] + keep_na = na_option == 'keep' + N, K = (values).shape + grp_sizes = np.ones_like(out) + + # Copy values into new array in order to fill missing data + # with mask, without obfuscating location of missing data + # in values array + masked_vals = np.array(values[:, 0], copy=True) + if rank_t is int64_t: + mask = (masked_vals == NPY_NAT).astype(np.uint8) + else: + mask = np.isnan(masked_vals).astype(np.uint8) + + if ascending ^ (na_option == 'top'): + if rank_t is int64_t: + nan_fill_val = np.iinfo(np.int64).max + elif rank_t is uint64_t: + nan_fill_val = np.iinfo(np.uint64).max + else: + nan_fill_val = np.inf + order = (masked_vals, mask, labels) + else: + if rank_t is int64_t: + nan_fill_val = np.iinfo(np.int64).min + elif rank_t is uint64_t: + nan_fill_val = 0 + else: + nan_fill_val = -np.inf + + order = (masked_vals, ~mask, labels) + np.putmask(masked_vals, mask, nan_fill_val) + + # lexsort using labels, then mask, then actual values + # each label corresponds to a different group value, + # the mask helps you differentiate missing values before + # performing sort on the actual values + _as = np.lexsort(order).astype(np.int64, copy=False) + + if not ascending: + _as = _as[::-1] + + with nogil: + # Loop over the length of the value array + # each incremental i value can be looked up in the _as array + # that we sorted previously, which gives us the location of + # that sorted value for retrieval back from the original + # values / masked_vals arrays + for i in range(N): + # dups and sum_ranks will be incremented each loop where + # the value / group remains the same, and should be reset + # when either of those change + # Used to calculate tiebreakers + dups += 1 + sum_ranks += i - grp_start + 1 + + # Update out only when there is a transition of values or labels. + # When a new value or group is encountered, go back #dups steps( + # the number of occurrence of current value) and assign the ranks + # based on the the starting index of the current group (grp_start) + # and the current index + if (i == N - 1 or + (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or + (mask[_as[i]] ^ mask[_as[i+1]]) or + (labels[_as[i]] != labels[_as[i+1]])): + # if keep_na, check for missing values and assign back + # to the result where appropriate + if keep_na and mask[_as[i]]: + for j in range(i - dups + 1, i + 1): + out[_as[j], 0] = NaN + grp_na_count = dups + elif tiebreak == TIEBREAK_AVERAGE: + for j in range(i - dups + 1, i + 1): + out[_as[j], 0] = sum_ranks / dups + elif tiebreak == TIEBREAK_MIN: + for j in range(i - dups + 1, i + 1): + out[_as[j], 0] = i - grp_start - dups + 2 + elif tiebreak == TIEBREAK_MAX: + for j in range(i - dups + 1, i + 1): + out[_as[j], 0] = i - grp_start + 1 + elif tiebreak == TIEBREAK_FIRST: + for j in range(i - dups + 1, i + 1): + if ascending: + out[_as[j], 0] = j + 1 - grp_start + else: + out[_as[j], 0] = 2 * i - j - dups + 2 - grp_start + elif tiebreak == TIEBREAK_DENSE: + for j in range(i - dups + 1, i + 1): + out[_as[j], 0] = grp_vals_seen + + # look forward to the next value (using the sorting in _as) + # if the value does not equal the current value then we need to + # reset the dups and sum_ranks, knowing that a new value is + # coming up. the conditional also needs to handle nan equality + # and the end of iteration + if (i == N - 1 or + (masked_vals[_as[i]] != masked_vals[_as[i+1]]) or + (mask[_as[i]] ^ mask[_as[i+1]])): + dups = sum_ranks = 0 + grp_vals_seen += 1 + grp_tie_count += 1 + + # Similar to the previous conditional, check now if we are + # moving to a new group. If so, keep track of the index where + # the new group occurs, so the tiebreaker calculations can + # decrement that from their position. fill in the size of each + # group encountered (used by pct calculations later). also be + # sure to reset any of the items helping to calculate dups + if i == N - 1 or labels[_as[i]] != labels[_as[i+1]]: + if tiebreak != TIEBREAK_DENSE: + for j in range(grp_start, i + 1): + grp_sizes[_as[j], 0] = (i - grp_start + 1 - + grp_na_count) + else: + for j in range(grp_start, i + 1): + grp_sizes[_as[j], 0] = (grp_tie_count - + (grp_na_count > 0)) + dups = sum_ranks = 0 + grp_na_count = 0 + grp_tie_count = 0 + grp_start = i + 1 + grp_vals_seen = 1 + + if pct: + for i in range(N): + # We don't include NaN values in percentage + # rankings, so we assign them percentages of NaN. + if out[i, 0] != out[i, 0] or out[i, 0] == NAN: + out[i, 0] = NAN + elif grp_sizes[i, 0] != 0: + out[i, 0] = out[i, 0] / grp_sizes[i, 0] + + +# ---------------------------------------------------------------------- +# group_min, group_max +# ---------------------------------------------------------------------- + +# TODO: consider implementing for more dtypes +ctypedef fused groupby_t: + float64_t + float32_t + int64_t + uint64_t + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_max(groupby_t[:, :] out, + int64_t[:] counts, + ndarray[groupby_t, ndim=2] values, + const int64_t[:] labels, + Py_ssize_t min_count=-1): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + groupby_t val, count, nan_val + ndarray[groupby_t, ndim=2] maxx + bint runtime_error = False + int64_t[:, :] nobs + + assert min_count == -1, "'min_count' only used in add and prod" + + if not len(values) == len(labels): + raise AssertionError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + + maxx = np.empty_like(out) + if groupby_t is int64_t: + # Note: evaluated at compile-time + maxx[:] = -_int64_max + nan_val = NPY_NAT + elif groupby_t is uint64_t: + # NB: We do not define nan_val because there is no such thing + # for uint64_t. We carefully avoid having to reference it in this + # case. + maxx[:] = 0 + else: + maxx[:] = -np.inf + nan_val = NAN + + N, K = (values).shape + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if not _treat_as_na(val, True): + # TODO: Sure we always want is_datetimelike=True? + nobs[lab, j] += 1 + if val > maxx[lab, j]: + maxx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] == 0: + if groupby_t is uint64_t: + runtime_error = True + break + else: + out[i, j] = nan_val + else: + out[i, j] = maxx[i, j] + + if runtime_error: + # We cannot raise directly above because that is within a nogil + # block. + raise RuntimeError("empty group with uint64_t") + + +@cython.wraparound(False) +@cython.boundscheck(False) +def group_min(groupby_t[:, :] out, + int64_t[:] counts, + ndarray[groupby_t, ndim=2] values, + const int64_t[:] labels, + Py_ssize_t min_count=-1): + """ + Only aggregates on axis=0 + """ + cdef: + Py_ssize_t i, j, N, K, lab, ncounts = len(counts) + groupby_t val, count, nan_val + ndarray[groupby_t, ndim=2] minx + bint runtime_error = False + int64_t[:, :] nobs + + assert min_count == -1, "'min_count' only used in add and prod" + + if not len(values) == len(labels): + raise AssertionError("len(index) != len(labels)") + + nobs = np.zeros((out).shape, dtype=np.int64) + + minx = np.empty_like(out) + if groupby_t is int64_t: + minx[:] = _int64_max + nan_val = NPY_NAT + elif groupby_t is uint64_t: + # NB: We do not define nan_val because there is no such thing + # for uint64_t. We carefully avoid having to reference it in this + # case. + minx[:] = np.iinfo(np.uint64).max + else: + minx[:] = np.inf + nan_val = NAN + + N, K = (values).shape + + with nogil: + for i in range(N): + lab = labels[i] + if lab < 0: + continue + + counts[lab] += 1 + for j in range(K): + val = values[i, j] + + if not _treat_as_na(val, True): + # TODO: Sure we always want is_datetimelike=True? + nobs[lab, j] += 1 + if val < minx[lab, j]: + minx[lab, j] = val + + for i in range(ncounts): + for j in range(K): + if nobs[i, j] == 0: + if groupby_t is uint64_t: + runtime_error = True + break + else: + out[i, j] = nan_val + else: + out[i, j] = minx[i, j] + + if runtime_error: + # We cannot raise directly above because that is within a nogil + # block. + raise RuntimeError("empty group with uint64_t") + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cummin(groupby_t[:, :] out, + groupby_t[:, :] values, + const int64_t[:] labels, + int ngroups, + bint is_datetimelike): + """ + Cumulative minimum of columns of `values`, in row groups `labels`. + + Parameters + ---------- + out : array + Array to store cummin in. + values : array + Values to take cummin of. + labels : int64 array + Labels to group by. + ngroups : int + Number of groups, larger than all entries of `labels`. + is_datetimelike : bool + True if `values` contains datetime-like entries. + + Notes + ----- + This method modifies the `out` parameter, rather than returning an object. + """ + cdef: + Py_ssize_t i, j, N, K, size + groupby_t val, mval + ndarray[groupby_t, ndim=2] accum + int64_t lab + + N, K = (values).shape + accum = np.empty((ngroups, K), dtype=np.asarray(values).dtype) + if groupby_t is int64_t: + accum[:] = _int64_max + elif groupby_t is uint64_t: + accum[:] = np.iinfo(np.uint64).max + else: + accum[:] = np.inf + + with nogil: + for i in range(N): + lab = labels[i] + + if lab < 0: + continue + for j in range(K): + val = values[i, j] + + if _treat_as_na(val, is_datetimelike): + out[i, j] = val + else: + mval = accum[lab, j] + if val < mval: + accum[lab, j] = mval = val + out[i, j] = mval + + +@cython.boundscheck(False) +@cython.wraparound(False) +def group_cummax(groupby_t[:, :] out, + groupby_t[:, :] values, + const int64_t[:] labels, + int ngroups, + bint is_datetimelike): + """ + Cumulative maximum of columns of `values`, in row groups `labels`. + + Parameters + ---------- + out : array + Array to store cummax in. + values : array + Values to take cummax of. + labels : int64 array + Labels to group by. + ngroups : int + Number of groups, larger than all entries of `labels`. + is_datetimelike : bool + True if `values` contains datetime-like entries. + + Notes + ----- + This method modifies the `out` parameter, rather than returning an object. + """ + cdef: + Py_ssize_t i, j, N, K, size + groupby_t val, mval + ndarray[groupby_t, ndim=2] accum + int64_t lab + + N, K = (values).shape + accum = np.empty((ngroups, K), dtype=np.asarray(values).dtype) + if groupby_t is int64_t: + accum[:] = -_int64_max + elif groupby_t is uint64_t: + accum[:] = 0 + else: + accum[:] = -np.inf + + with nogil: + for i in range(N): + lab = labels[i] + + if lab < 0: + continue + for j in range(K): + val = values[i, j] + + if _treat_as_na(val, is_datetimelike): + out[i, j] = val + else: + mval = accum[lab, j] + if val > mval: + accum[lab, j] = mval = val + out[i, j] = mval diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx new file mode 100644 index 00000000..5298d8c5 --- /dev/null +++ b/pandas/_libs/hashing.pyx @@ -0,0 +1,189 @@ +# Translated from the reference implementation +# at https://github.com/veorq/SipHash + +import cython +from libc.stdlib cimport malloc, free + +import numpy as np +from numpy cimport uint8_t, uint32_t, uint64_t, import_array +import_array() + +from pandas._libs.util cimport is_nan + +DEF cROUNDS = 2 +DEF dROUNDS = 4 + + +@cython.boundscheck(False) +def hash_object_array(object[:] arr, object key, object encoding='utf8'): + """ + Parameters + ---------- + arr : 1-d object ndarray of objects + key : hash key, must be 16 byte len encoded + encoding : encoding for key & arr, default to 'utf8' + + Returns + ------- + 1-d uint64 ndarray of hashes. + + Raises + ------ + TypeError + If the array contains mixed types. + + Notes + ----- + Allowed values must be strings, or nulls + mixed array types will raise TypeError. + """ + cdef: + Py_ssize_t i, l, n + uint64_t[:] result + bytes data, k + uint8_t *kb + uint64_t *lens + char **vecs + char *cdata + object val + list datas = [] + + k = key.encode(encoding) + kb = k + if len(k) != 16: + raise ValueError("key should be a 16-byte string encoded, " + f"got {k} (len {len(k)})") + + n = len(arr) + + # create an array of bytes + vecs = malloc(n * sizeof(char *)) + lens = malloc(n * sizeof(uint64_t)) + + for i in range(n): + val = arr[i] + if isinstance(val, bytes): + data = val + elif isinstance(val, str): + data = val.encode(encoding) + elif val is None or is_nan(val): + # null, stringify and encode + data = str(val).encode(encoding) + + elif isinstance(val, tuple): + # GH#28969 we could have a tuple, but need to ensure that + # the tuple entries are themselves hashable before converting + # to str + hash(val) + data = str(val).encode(encoding) + else: + raise TypeError(f"{val} of type {type(val)} is not a valid type " + "for hashing, must be string or null") + + l = len(data) + lens[i] = l + cdata = data + + # keep the references alive through the end of the + # function + datas.append(data) + vecs[i] = cdata + + result = np.empty(n, dtype=np.uint64) + with nogil: + for i in range(n): + result[i] = low_level_siphash(vecs[i], lens[i], kb) + + free(vecs) + free(lens) + return result.base # .base to retrieve underlying np.ndarray + + +cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil: + return (x << b) | (x >> (64 - b)) + + +cdef inline void u32to8_le(uint8_t* p, uint32_t v) nogil: + p[0] = (v) + p[1] = (v >> 8) + p[2] = (v >> 16) + p[3] = (v >> 24) + + +cdef inline uint64_t u8to64_le(uint8_t* p) nogil: + return (p[0] | + p[1] << 8 | + p[2] << 16 | + p[3] << 24 | + p[4] << 32 | + p[5] << 40 | + p[6] << 48 | + p[7] << 56) + + +cdef inline void _sipround(uint64_t* v0, uint64_t* v1, + uint64_t* v2, uint64_t* v3) nogil: + v0[0] += v1[0] + v1[0] = _rotl(v1[0], 13) + v1[0] ^= v0[0] + v0[0] = _rotl(v0[0], 32) + v2[0] += v3[0] + v3[0] = _rotl(v3[0], 16) + v3[0] ^= v2[0] + v0[0] += v3[0] + v3[0] = _rotl(v3[0], 21) + v3[0] ^= v0[0] + v2[0] += v1[0] + v1[0] = _rotl(v1[0], 17) + v1[0] ^= v2[0] + v2[0] = _rotl(v2[0], 32) + + +@cython.cdivision(True) +cdef uint64_t low_level_siphash(uint8_t* data, size_t datalen, + uint8_t* key) nogil: + cdef uint64_t v0 = 0x736f6d6570736575ULL + cdef uint64_t v1 = 0x646f72616e646f6dULL + cdef uint64_t v2 = 0x6c7967656e657261ULL + cdef uint64_t v3 = 0x7465646279746573ULL + cdef uint64_t b + cdef uint64_t k0 = u8to64_le(key) + cdef uint64_t k1 = u8to64_le(key + 8) + cdef uint64_t m + cdef int i + cdef uint8_t* end = data + datalen - (datalen % sizeof(uint64_t)) + cdef int left = datalen & 7 + cdef int left_byte + + b = (datalen) << 56 + v3 ^= k1 + v2 ^= k0 + v1 ^= k1 + v0 ^= k0 + + while (data != end): + m = u8to64_le(data) + v3 ^= m + for i in range(cROUNDS): + _sipround(&v0, &v1, &v2, &v3) + v0 ^= m + + data += sizeof(uint64_t) + + for i in range(left-1, -1, -1): + b |= (data[i]) << (i * 8) + + v3 ^= b + + for i in range(cROUNDS): + _sipround(&v0, &v1, &v2, &v3) + + v0 ^= b + v2 ^= 0xff + + for i in range(dROUNDS): + _sipround(&v0, &v1, &v2, &v3) + + b = v0 ^ v1 ^ v2 ^ v3 + + return b diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd new file mode 100644 index 00000000..0499eabf --- /dev/null +++ b/pandas/_libs/hashtable.pxd @@ -0,0 +1,54 @@ +from pandas._libs.khash cimport ( + kh_int64_t, kh_uint64_t, kh_float64_t, kh_pymap_t, kh_str_t, uint64_t, + int64_t, float64_t) +from numpy cimport ndarray + +# prototypes for sharing + +cdef class HashTable: + pass + +cdef class UInt64HashTable(HashTable): + cdef kh_uint64_t *table + + cpdef get_item(self, uint64_t val) + cpdef set_item(self, uint64_t key, Py_ssize_t val) + +cdef class Int64HashTable(HashTable): + cdef kh_int64_t *table + + cpdef get_item(self, int64_t val) + cpdef set_item(self, int64_t key, Py_ssize_t val) + +cdef class Float64HashTable(HashTable): + cdef kh_float64_t *table + + cpdef get_item(self, float64_t val) + cpdef set_item(self, float64_t key, Py_ssize_t val) + +cdef class PyObjectHashTable(HashTable): + cdef kh_pymap_t *table + + cpdef get_item(self, object val) + cpdef set_item(self, object key, Py_ssize_t val) + + +cdef class StringHashTable(HashTable): + cdef kh_str_t *table + + cpdef get_item(self, str val) + cpdef set_item(self, str key, Py_ssize_t val) + +cdef struct Int64VectorData: + int64_t *data + Py_ssize_t n, m + +cdef class Int64Vector: + cdef Int64VectorData *data + cdef ndarray ao + cdef bint external_view_exists + + cdef resize(self) + cpdef to_array(self) + cdef inline void append(self, int64_t x) + cdef extend(self, int64_t[:] x) diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx new file mode 100644 index 00000000..59ba1705 --- /dev/null +++ b/pandas/_libs/hashtable.pyx @@ -0,0 +1,173 @@ +cimport cython + +from cpython.ref cimport PyObject, Py_INCREF +from cpython.mem cimport PyMem_Malloc, PyMem_Free + +from libc.stdlib cimport malloc, free + +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, uint8_t, uint32_t, float64_t +cnp.import_array() + +cdef extern from "numpy/npy_math.h": + float64_t NAN "NPY_NAN" + + +from pandas._libs.khash cimport ( + khiter_t, + + kh_str_t, kh_init_str, kh_put_str, kh_exist_str, + kh_get_str, kh_destroy_str, kh_resize_str, + + kh_put_strbox, kh_get_strbox, kh_init_strbox, + + kh_int64_t, kh_init_int64, kh_resize_int64, kh_destroy_int64, + kh_get_int64, kh_exist_int64, kh_put_int64, + + kh_float64_t, kh_exist_float64, kh_put_float64, kh_init_float64, + kh_get_float64, kh_destroy_float64, kh_resize_float64, + + kh_resize_uint64, kh_exist_uint64, kh_destroy_uint64, kh_put_uint64, + kh_get_uint64, kh_init_uint64, + + kh_destroy_pymap, kh_exist_pymap, kh_init_pymap, kh_get_pymap, + kh_put_pymap, kh_resize_pymap) + + +cimport pandas._libs.util as util + +from pandas._libs.missing cimport checknull + + +cdef int64_t NPY_NAT = util.get_nat() +_SIZE_HINT_LIMIT = (1 << 20) + 7 + + +cdef Py_ssize_t _INIT_VEC_CAP = 128 + +include "hashtable_class_helper.pxi" +include "hashtable_func_helper.pxi" + +cdef class Factorizer: + cdef public: + PyObjectHashTable table + ObjectVector uniques + Py_ssize_t count + + def __init__(self, size_hint): + self.table = PyObjectHashTable(size_hint) + self.uniques = ObjectVector() + self.count = 0 + + def get_count(self): + return self.count + + def factorize(self, ndarray[object] values, sort=False, na_sentinel=-1, + na_value=None): + """ + Factorize values with nans replaced by na_sentinel + >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20) + array([ 0, 1, 20]) + """ + if self.uniques.external_view_exists: + uniques = ObjectVector() + uniques.extend(self.uniques.to_array()) + self.uniques = uniques + labels = self.table.get_labels(values, self.uniques, + self.count, na_sentinel, na_value) + mask = (labels == na_sentinel) + # sort on + if sort: + if labels.dtype != np.intp: + labels = labels.astype(np.intp) + sorter = self.uniques.to_array().argsort() + reverse_indexer = np.empty(len(sorter), dtype=np.intp) + reverse_indexer.put(sorter, np.arange(len(sorter))) + labels = reverse_indexer.take(labels, mode='clip') + labels[mask] = na_sentinel + self.count = len(self.uniques) + return labels + + def unique(self, ndarray[object] values): + # just for fun + return self.table.unique(values) + + +cdef class Int64Factorizer: + cdef public: + Int64HashTable table + Int64Vector uniques + Py_ssize_t count + + def __init__(self, size_hint): + self.table = Int64HashTable(size_hint) + self.uniques = Int64Vector() + self.count = 0 + + def get_count(self): + return self.count + + def factorize(self, const int64_t[:] values, sort=False, + na_sentinel=-1, na_value=None): + """ + Factorize values with nans replaced by na_sentinel + >>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20) + array([ 0, 1, 20]) + """ + if self.uniques.external_view_exists: + uniques = Int64Vector() + uniques.extend(self.uniques.to_array()) + self.uniques = uniques + labels = self.table.get_labels(values, self.uniques, + self.count, na_sentinel, + na_value=na_value) + + # sort on + if sort: + if labels.dtype != np.intp: + labels = labels.astype(np.intp) + + sorter = self.uniques.to_array().argsort() + reverse_indexer = np.empty(len(sorter), dtype=np.intp) + reverse_indexer.put(sorter, np.arange(len(sorter))) + + labels = reverse_indexer.take(labels) + + self.count = len(self.uniques) + return labels + + +@cython.wraparound(False) +@cython.boundscheck(False) +def unique_label_indices(const int64_t[:] labels): + """ + Indices of the first occurrences of the unique labels + *excluding* -1. equivalent to: + np.unique(labels, return_index=True)[1] + """ + cdef: + int ret = 0 + Py_ssize_t i, n = len(labels) + kh_int64_t *table = kh_init_int64() + Int64Vector idx = Int64Vector() + ndarray[int64_t, ndim=1] arr + Int64VectorData *ud = idx.data + + kh_resize_int64(table, min(n, _SIZE_HINT_LIMIT)) + + with nogil: + for i in range(n): + kh_put_int64(table, labels[i], &ret) + if ret != 0: + if needs_resize(ud): + with gil: + idx.resize() + append_data_int64(ud, i) + + kh_destroy_int64(table) + + arr = idx.to_array() + arr = arr[np.asarray(labels)[arr].argsort()] + + return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in new file mode 100644 index 00000000..811025a4 --- /dev/null +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -0,0 +1,1128 @@ +""" +Template for each `dtype` helper function for hashtable + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + + +# ---------------------------------------------------------------------- +# VectorData +# ---------------------------------------------------------------------- + +from pandas._libs.tslibs.util cimport get_c_string +from pandas._libs.missing cimport C_NA + +{{py: + +# name, dtype, c_type +# the generated StringVector is not actually used +# but is included for completeness (rather ObjectVector is used +# for uniques in hashtables) + +dtypes = [('Float64', 'float64', 'float64_t'), + ('Int64', 'int64', 'int64_t'), + ('String', 'string', 'char *'), + ('UInt64', 'uint64', 'uint64_t')] +}} + +{{for name, dtype, c_type in dtypes}} + + +{{if dtype != 'int64'}} + +ctypedef struct {{name}}VectorData: + {{c_type}} *data + Py_ssize_t n, m + +{{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline void append_data_{{dtype}}({{name}}VectorData *data, + {{c_type}} x) nogil: + + data.data[data.n] = x + data.n += 1 + +{{endfor}} + +ctypedef fused vector_data: + Int64VectorData + UInt64VectorData + Float64VectorData + StringVectorData + +cdef inline bint needs_resize(vector_data *data) nogil: + return data.n == data.m + +# ---------------------------------------------------------------------- +# Vector +# ---------------------------------------------------------------------- + +{{py: + +# name, dtype, c_type +dtypes = [('Float64', 'float64', 'float64_t'), + ('UInt64', 'uint64', 'uint64_t'), + ('Int64', 'int64', 'int64_t')] + +}} + +{{for name, dtype, c_type in dtypes}} + +cdef class {{name}}Vector: + + {{if dtype != 'int64'}} + cdef: + bint external_view_exists + {{name}}VectorData *data + ndarray ao + {{endif}} + + def __cinit__(self): + self.data = <{{name}}VectorData *>PyMem_Malloc( + sizeof({{name}}VectorData)) + if not self.data: + raise MemoryError() + self.external_view_exists = False + self.data.n = 0 + self.data.m = _INIT_VEC_CAP + self.ao = np.empty(self.data.m, dtype=np.{{dtype}}) + self.data.data = <{{c_type}}*>self.ao.data + + cdef resize(self): + self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) + self.ao.resize(self.data.m, refcheck=False) + self.data.data = <{{c_type}}*>self.ao.data + + def __dealloc__(self): + if self.data is not NULL: + PyMem_Free(self.data) + self.data = NULL + + def __len__(self) -> int: + return self.data.n + + cpdef to_array(self): + if self.data.m != self.data.n: + if self.external_view_exists: + # should never happen + raise ValueError("should have raised on append()") + self.ao.resize(self.data.n, refcheck=False) + self.data.m = self.data.n + self.external_view_exists = True + return self.ao + + cdef inline void append(self, {{c_type}} x): + + if needs_resize(self.data): + if self.external_view_exists: + raise ValueError("external reference but " + "Vector.resize() needed") + self.resize() + + append_data_{{dtype}}(self.data, x) + + cdef extend(self, const {{c_type}}[:] x): + for i in range(len(x)): + self.append(x[i]) + +{{endfor}} + +cdef class StringVector: + + cdef: + StringVectorData *data + bint external_view_exists + + def __cinit__(self): + self.data = PyMem_Malloc(sizeof(StringVectorData)) + if not self.data: + raise MemoryError() + self.external_view_exists = False + self.data.n = 0 + self.data.m = _INIT_VEC_CAP + self.data.data = malloc(self.data.m * sizeof(char *)) + if not self.data.data: + raise MemoryError() + + cdef resize(self): + cdef: + char **orig_data + Py_ssize_t i, m + + m = self.data.m + self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) + + orig_data = self.data.data + self.data.data = malloc(self.data.m * sizeof(char *)) + if not self.data.data: + raise MemoryError() + for i in range(m): + self.data.data[i] = orig_data[i] + + def __dealloc__(self): + if self.data is not NULL: + if self.data.data is not NULL: + free(self.data.data) + PyMem_Free(self.data) + self.data = NULL + + def __len__(self) -> int: + return self.data.n + + def to_array(self): + cdef: + ndarray ao + Py_ssize_t n + object val + + ao = np.empty(self.data.n, dtype=np.object) + for i in range(self.data.n): + val = self.data.data[i] + ao[i] = val + self.external_view_exists = True + self.data.m = self.data.n + return ao + + cdef inline void append(self, char *x): + + if needs_resize(self.data): + self.resize() + + append_data_string(self.data, x) + + cdef extend(self, ndarray[:] x): + for i in range(len(x)): + self.append(x[i]) + + +cdef class ObjectVector: + + cdef: + PyObject **data + Py_ssize_t n, m + ndarray ao + bint external_view_exists + + def __cinit__(self): + self.external_view_exists = False + self.n = 0 + self.m = _INIT_VEC_CAP + self.ao = np.empty(_INIT_VEC_CAP, dtype=object) + self.data = self.ao.data + + def __len__(self) -> int: + return self.n + + cdef inline append(self, object obj): + if self.n == self.m: + if self.external_view_exists: + raise ValueError("external reference but " + "Vector.resize() needed") + self.m = max(self.m * 2, _INIT_VEC_CAP) + self.ao.resize(self.m, refcheck=False) + self.data = self.ao.data + + Py_INCREF(obj) + self.data[self.n] = obj + self.n += 1 + + def to_array(self): + if self.m != self.n: + if self.external_view_exists: + raise ValueError("should have raised on append()") + self.ao.resize(self.n, refcheck=False) + self.m = self.n + self.external_view_exists = True + return self.ao + + cdef extend(self, ndarray[:] x): + for i in range(len(x)): + self.append(x[i]) + +# ---------------------------------------------------------------------- +# HashTable +# ---------------------------------------------------------------------- + + +cdef class HashTable: + + pass + +{{py: + +# name, dtype, float_group, default_na_value +dtypes = [('Float64', 'float64', True, 'np.nan'), + ('UInt64', 'uint64', False, 0), + ('Int64', 'int64', False, 'NPY_NAT')] + +}} + + +{{for name, dtype, float_group, default_na_value in dtypes}} + +cdef class {{name}}HashTable(HashTable): + + def __cinit__(self, int64_t size_hint=1): + self.table = kh_init_{{dtype}}() + if size_hint is not None: + size_hint = min(size_hint, _SIZE_HINT_LIMIT) + kh_resize_{{dtype}}(self.table, size_hint) + + def __len__(self) -> int: + return self.table.size + + def __dealloc__(self): + if self.table is not NULL: + kh_destroy_{{dtype}}(self.table) + self.table = NULL + + def __contains__(self, object key): + cdef: + khiter_t k + k = kh_get_{{dtype}}(self.table, key) + return k != self.table.n_buckets + + def sizeof(self, deep=False): + """ return the size of my table in bytes """ + return self.table.n_buckets * (sizeof({{dtype}}_t) + # keys + sizeof(Py_ssize_t) + # vals + sizeof(uint32_t)) # flags + + cpdef get_item(self, {{dtype}}_t val): + cdef: + khiter_t k + k = kh_get_{{dtype}}(self.table, val) + if k != self.table.n_buckets: + return self.table.vals[k] + else: + raise KeyError(val) + + cpdef set_item(self, {{dtype}}_t key, Py_ssize_t val): + cdef: + khiter_t k + int ret = 0 + + k = kh_put_{{dtype}}(self.table, key, &ret) + self.table.keys[k] = key + if kh_exist_{{dtype}}(self.table, k): + self.table.vals[k] = val + else: + raise KeyError(key) + + @cython.boundscheck(False) + def map(self, const {{dtype}}_t[:] keys, const int64_t[:] values): + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + {{dtype}}_t key + khiter_t k + + with nogil: + for i in range(n): + key = keys[i] + k = kh_put_{{dtype}}(self.table, key, &ret) + self.table.vals[k] = values[i] + + @cython.boundscheck(False) + def map_locations(self, const {{dtype}}_t[:] values): + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + {{dtype}}_t val + khiter_t k + + with nogil: + for i in range(n): + val = values[i] + k = kh_put_{{dtype}}(self.table, val, &ret) + self.table.vals[k] = i + + @cython.boundscheck(False) + def lookup(self, const {{dtype}}_t[:] values): + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + {{dtype}}_t val + khiter_t k + int64_t[:] locs = np.empty(n, dtype=np.int64) + + with nogil: + for i in range(n): + val = values[i] + k = kh_get_{{dtype}}(self.table, val) + if k != self.table.n_buckets: + locs[i] = self.table.vals[k] + else: + locs[i] = -1 + + return np.asarray(locs) + + @cython.boundscheck(False) + @cython.wraparound(False) + def _unique(self, const {{dtype}}_t[:] values, {{name}}Vector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None, bint ignore_na=False, + bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[{{dtype}}] + Array of values of which unique will be calculated + uniques : {{name}}Vector + Vector into which uniques will be written + count_prior : Py_ssize_t, default 0 + Number of existing entries in uniques + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then + any value "val" satisfying val != val is considered missing. + If na_value is not None, then _additionally_, any value "val" + satisfying val == na_value is considered missing. + ignore_na : boolean, default False + Whether NA-values should be ignored for calculating the uniques. If + True, the labels corresponding to missing values will be set to + na_sentinel. + return_inverse : boolean, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[{{dtype}}] + Unique values of input, not sorted + labels : ndarray[int64] (if return_inverse=True) + The labels from values to uniques + """ + cdef: + Py_ssize_t i, idx, count = count_prior, n = len(values) + int64_t[:] labels + int ret = 0 + {{dtype}}_t val, na_value2 + khiter_t k + {{name}}VectorData *ud + bint use_na_value + + if return_inverse: + labels = np.empty(n, dtype=np.int64) + ud = uniques.data + use_na_value = na_value is not None + + if use_na_value: + # We need this na_value2 because we want to allow users + # to *optionally* specify an NA sentinel *of the correct* type. + # We use None, to make it optional, which requires `object` type + # for the parameter. To please the compiler, we use na_value2, + # which is only used if it's *specified*. + na_value2 = <{{dtype}}_t>na_value + else: + na_value2 = {{default_na_value}} + + with nogil: + for i in range(n): + val = values[i] + + if ignore_na and ( + {{if not name.lower().startswith(("uint", "int"))}} + val != val or + {{endif}} + (use_na_value and val == na_value2) + ): + # if missing values do not count as unique values (i.e. if + # ignore_na is True), skip the hashtable entry for them, + # and replace the corresponding label with na_sentinel + labels[i] = na_sentinel + continue + + k = kh_get_{{dtype}}(self.table, val) + + if k == self.table.n_buckets: + # k hasn't been seen yet + k = kh_put_{{dtype}}(self.table, val, &ret) + + if needs_resize(ud): + with gil: + if uniques.external_view_exists: + raise ValueError("external reference to " + "uniques held, but " + "Vector.resize() needed") + uniques.resize() + append_data_{{dtype}}(ud, val) + if return_inverse: + self.table.vals[k] = count + labels[i] = count + count += 1 + elif return_inverse: + # k falls into a previous bucket + # only relevant in case we need to construct the inverse + idx = self.table.vals[k] + labels[i] = idx + + if return_inverse: + return uniques.to_array(), np.asarray(labels) + return uniques.to_array() + + def unique(self, const {{dtype}}_t[:] values, bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[{{dtype}}] + Array of values of which unique will be calculated + return_inverse : boolean, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[{{dtype}}] + Unique values of input, not sorted + labels : ndarray[int64] (if return_inverse) + The labels from values to uniques + """ + uniques = {{name}}Vector() + return self._unique(values, uniques, ignore_na=False, + return_inverse=return_inverse) + + def factorize(self, const {{dtype}}_t[:] values, Py_ssize_t na_sentinel=-1, + object na_value=None): + """ + Calculate unique values and labels (no sorting!) + + Missing values are not included in the "uniques" for this method. + The labels for any missing values will be set to "na_sentinel" + + Parameters + ---------- + values : ndarray[{{dtype}}] + Array of values of which unique will be calculated + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then + any value "val" satisfying val != val is considered missing. + If na_value is not None, then _additionally_, any value "val" + satisfying val == na_value is considered missing. + + Returns + ------- + uniques : ndarray[{{dtype}}] + Unique values of input, not sorted + labels : ndarray[int64] + The labels from values to uniques + """ + uniques_vector = {{name}}Vector() + return self._unique(values, uniques_vector, na_sentinel=na_sentinel, + na_value=na_value, ignore_na=True, + return_inverse=True) + + def get_labels(self, const {{dtype}}_t[:] values, {{name}}Vector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None): + _, labels = self._unique(values, uniques, count_prior=count_prior, + na_sentinel=na_sentinel, na_value=na_value, + ignore_na=True, return_inverse=True) + return labels + + @cython.boundscheck(False) + def get_labels_groupby(self, const {{dtype}}_t[:] values): + cdef: + Py_ssize_t i, n = len(values) + int64_t[:] labels + Py_ssize_t idx, count = 0 + int ret = 0 + {{dtype}}_t val + khiter_t k + {{name}}Vector uniques = {{name}}Vector() + {{name}}VectorData *ud + + labels = np.empty(n, dtype=np.int64) + ud = uniques.data + + with nogil: + for i in range(n): + val = values[i] + + # specific for groupby + {{if dtype != 'uint64'}} + if val < 0: + labels[i] = -1 + continue + {{endif}} + + k = kh_get_{{dtype}}(self.table, val) + if k != self.table.n_buckets: + idx = self.table.vals[k] + labels[i] = idx + else: + k = kh_put_{{dtype}}(self.table, val, &ret) + self.table.vals[k] = count + + if needs_resize(ud): + with gil: + uniques.resize() + append_data_{{dtype}}(ud, val) + labels[i] = count + count += 1 + + arr_uniques = uniques.to_array() + + return np.asarray(labels), arr_uniques + +{{endfor}} + + +cdef class StringHashTable(HashTable): + # these by-definition *must* be strings + # or a sentinel np.nan / None missing value + na_string_sentinel = '__nan__' + + def __init__(self, int64_t size_hint=1): + self.table = kh_init_str() + if size_hint is not None: + size_hint = min(size_hint, _SIZE_HINT_LIMIT) + kh_resize_str(self.table, size_hint) + + def __dealloc__(self): + if self.table is not NULL: + kh_destroy_str(self.table) + self.table = NULL + + def sizeof(self, deep=False): + """ return the size of my table in bytes """ + return self.table.n_buckets * (sizeof(char *) + # keys + sizeof(Py_ssize_t) + # vals + sizeof(uint32_t)) # flags + + cpdef get_item(self, str val): + cdef: + khiter_t k + const char *v + v = get_c_string(val) + + k = kh_get_str(self.table, v) + if k != self.table.n_buckets: + return self.table.vals[k] + else: + raise KeyError(val) + + cpdef set_item(self, str key, Py_ssize_t val): + cdef: + khiter_t k + int ret = 0 + const char *v + + v = get_c_string(key) + + k = kh_put_str(self.table, v, &ret) + self.table.keys[k] = v + if kh_exist_str(self.table, k): + self.table.vals[k] = val + else: + raise KeyError(key) + + @cython.boundscheck(False) + def get_indexer(self, ndarray[object] values): + cdef: + Py_ssize_t i, n = len(values) + ndarray[int64_t] labels = np.empty(n, dtype=np.int64) + int64_t *resbuf = labels.data + khiter_t k + kh_str_t *table = self.table + const char *v + const char **vecs + + vecs = malloc(n * sizeof(char *)) + for i in range(n): + val = values[i] + v = get_c_string(val) + vecs[i] = v + + with nogil: + for i in range(n): + k = kh_get_str(table, vecs[i]) + if k != table.n_buckets: + resbuf[i] = table.vals[k] + else: + resbuf[i] = -1 + + free(vecs) + return labels + + @cython.boundscheck(False) + def lookup(self, ndarray[object] values): + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + object val + const char *v + khiter_t k + int64_t[:] locs = np.empty(n, dtype=np.int64) + + # these by-definition *must* be strings + vecs = malloc(n * sizeof(char *)) + for i in range(n): + val = values[i] + + if isinstance(val, str): + # GH#31499 if we have a np.str_ get_c_string wont recognize + # it as a str, even though isinstance does. + v = get_c_string(val) + else: + v = get_c_string(self.na_string_sentinel) + vecs[i] = v + + with nogil: + for i in range(n): + v = vecs[i] + k = kh_get_str(self.table, v) + if k != self.table.n_buckets: + locs[i] = self.table.vals[k] + else: + locs[i] = -1 + + free(vecs) + return np.asarray(locs) + + @cython.boundscheck(False) + def map_locations(self, ndarray[object] values): + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + object val + const char *v + const char **vecs + khiter_t k + + # these by-definition *must* be strings + vecs = malloc(n * sizeof(char *)) + for i in range(n): + val = values[i] + + if isinstance(val, str): + # GH#31499 if we have a np.str_ get_c_string wont recognize + # it as a str, even though isinstance does. + v = get_c_string(val) + else: + v = get_c_string(self.na_string_sentinel) + vecs[i] = v + + with nogil: + for i in range(n): + v = vecs[i] + k = kh_put_str(self.table, v, &ret) + self.table.vals[k] = i + free(vecs) + + @cython.boundscheck(False) + @cython.wraparound(False) + def _unique(self, ndarray[object] values, ObjectVector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None, bint ignore_na=False, + bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + uniques : ObjectVector + Vector into which uniques will be written + count_prior : Py_ssize_t, default 0 + Number of existing entries in uniques + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then any value + that is not a string is considered missing. If na_value is + not None, then _additionally_ any value "val" satisfying + val == na_value is considered missing. + ignore_na : boolean, default False + Whether NA-values should be ignored for calculating the uniques. If + True, the labels corresponding to missing values will be set to + na_sentinel. + return_inverse : boolean, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[int64] (if return_inverse=True) + The labels from values to uniques + """ + cdef: + Py_ssize_t i, idx, count = count_prior, n = len(values) + int64_t[:] labels + int64_t[:] uindexer + int ret = 0 + object val + const char *v + const char **vecs + khiter_t k + bint use_na_value + + if return_inverse: + labels = np.zeros(n, dtype=np.int64) + uindexer = np.empty(n, dtype=np.int64) + use_na_value = na_value is not None + + # assign pointers and pre-filter out missing (if ignore_na) + vecs = malloc(n * sizeof(char *)) + for i in range(n): + val = values[i] + + if (ignore_na + and (not isinstance(val, str) + or (use_na_value and val == na_value))): + # if missing values do not count as unique values (i.e. if + # ignore_na is True), we can skip the actual value, and + # replace the label with na_sentinel directly + labels[i] = na_sentinel + else: + # if ignore_na is False, we also stringify NaN/None/etc. + v = get_c_string(val) + vecs[i] = v + + # compute + with nogil: + for i in range(n): + if ignore_na and labels[i] == na_sentinel: + # skip entries for ignored missing values (see above) + continue + + v = vecs[i] + k = kh_get_str(self.table, v) + if k == self.table.n_buckets: + # k hasn't been seen yet + k = kh_put_str(self.table, v, &ret) + uindexer[count] = i + if return_inverse: + self.table.vals[k] = count + labels[i] = count + count += 1 + elif return_inverse: + # k falls into a previous bucket + # only relevant in case we need to construct the inverse + idx = self.table.vals[k] + labels[i] = idx + + free(vecs) + + # uniques + for i in range(count): + uniques.append(values[uindexer[i]]) + + if return_inverse: + return uniques.to_array(), np.asarray(labels) + return uniques.to_array() + + def unique(self, ndarray[object] values, bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + return_inverse : boolean, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[int64] (if return_inverse) + The labels from values to uniques + """ + uniques = ObjectVector() + return self._unique(values, uniques, ignore_na=False, + return_inverse=return_inverse) + + def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1, + object na_value=None): + """ + Calculate unique values and labels (no sorting!) + + Missing values are not included in the "uniques" for this method. + The labels for any missing values will be set to "na_sentinel" + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then any value + that is not a string is considered missing. If na_value is + not None, then _additionally_ any value "val" satisfying + val == na_value is considered missing. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[int64] + The labels from values to uniques + """ + uniques_vector = ObjectVector() + return self._unique(values, uniques_vector, na_sentinel=na_sentinel, + na_value=na_value, ignore_na=True, + return_inverse=True) + + def get_labels(self, ndarray[object] values, ObjectVector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None): + _, labels = self._unique(values, uniques, count_prior=count_prior, + na_sentinel=na_sentinel, na_value=na_value, + ignore_na=True, return_inverse=True) + return labels + + +cdef class PyObjectHashTable(HashTable): + + def __init__(self, int64_t size_hint=1): + self.table = kh_init_pymap() + if size_hint is not None: + size_hint = min(size_hint, _SIZE_HINT_LIMIT) + kh_resize_pymap(self.table, size_hint) + + def __dealloc__(self): + if self.table is not NULL: + kh_destroy_pymap(self.table) + self.table = NULL + + def __len__(self) -> int: + return self.table.size + + def __contains__(self, object key): + cdef: + khiter_t k + hash(key) + + k = kh_get_pymap(self.table, key) + return k != self.table.n_buckets + + def sizeof(self, deep=False): + """ return the size of my table in bytes """ + return self.table.n_buckets * (sizeof(PyObject *) + # keys + sizeof(Py_ssize_t) + # vals + sizeof(uint32_t)) # flags + + cpdef get_item(self, object val): + cdef: + khiter_t k + + k = kh_get_pymap(self.table, val) + if k != self.table.n_buckets: + return self.table.vals[k] + else: + raise KeyError(val) + + cpdef set_item(self, object key, Py_ssize_t val): + cdef: + khiter_t k + int ret = 0 + char* buf + + hash(key) + + k = kh_put_pymap(self.table, key, &ret) + # self.table.keys[k] = key + if kh_exist_pymap(self.table, k): + self.table.vals[k] = val + else: + raise KeyError(key) + + def map_locations(self, ndarray[object] values): + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + object val + khiter_t k + + for i in range(n): + val = values[i] + hash(val) + + k = kh_put_pymap(self.table, val, &ret) + self.table.vals[k] = i + + def lookup(self, ndarray[object] values): + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + object val + khiter_t k + int64_t[:] locs = np.empty(n, dtype=np.int64) + + for i in range(n): + val = values[i] + hash(val) + + k = kh_get_pymap(self.table, val) + if k != self.table.n_buckets: + locs[i] = self.table.vals[k] + else: + locs[i] = -1 + + return np.asarray(locs) + + @cython.boundscheck(False) + @cython.wraparound(False) + def _unique(self, ndarray[object] values, ObjectVector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None, bint ignore_na=False, + bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + uniques : ObjectVector + Vector into which uniques will be written + count_prior : Py_ssize_t, default 0 + Number of existing entries in uniques + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then None _plus_ + any value "val" satisfying val != val is considered missing. + If na_value is not None, then _additionally_, any value "val" + satisfying val == na_value is considered missing. + ignore_na : boolean, default False + Whether NA-values should be ignored for calculating the uniques. If + True, the labels corresponding to missing values will be set to + na_sentinel. + return_inverse : boolean, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[int64] (if return_inverse=True) + The labels from values to uniques + """ + cdef: + Py_ssize_t i, idx, count = count_prior, n = len(values) + int64_t[:] labels + int ret = 0 + object val + khiter_t k + bint use_na_value + + if return_inverse: + labels = np.empty(n, dtype=np.int64) + use_na_value = na_value is not None + + for i in range(n): + val = values[i] + hash(val) + + if ignore_na and ( + (val is C_NA) + or (val != val) + or (val is None) + or (use_na_value and val == na_value) + ): + # if missing values do not count as unique values (i.e. if + # ignore_na is True), skip the hashtable entry for them, and + # replace the corresponding label with na_sentinel + labels[i] = na_sentinel + continue + + k = kh_get_pymap(self.table, val) + if k == self.table.n_buckets: + # k hasn't been seen yet + k = kh_put_pymap(self.table, val, &ret) + uniques.append(val) + if return_inverse: + self.table.vals[k] = count + labels[i] = count + count += 1 + elif return_inverse: + # k falls into a previous bucket + # only relevant in case we need to construct the inverse + idx = self.table.vals[k] + labels[i] = idx + + if return_inverse: + return uniques.to_array(), np.asarray(labels) + return uniques.to_array() + + def unique(self, ndarray[object] values, bint return_inverse=False): + """ + Calculate unique values and labels (no sorting!) + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + return_inverse : boolean, default False + Whether the mapping of the original array values to their location + in the vector of uniques should be returned. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[int64] (if return_inverse) + The labels from values to uniques + """ + uniques = ObjectVector() + return self._unique(values, uniques, ignore_na=False, + return_inverse=return_inverse) + + def factorize(self, ndarray[object] values, Py_ssize_t na_sentinel=-1, + object na_value=None): + """ + Calculate unique values and labels (no sorting!) + + Missing values are not included in the "uniques" for this method. + The labels for any missing values will be set to "na_sentinel" + + Parameters + ---------- + values : ndarray[object] + Array of values of which unique will be calculated + na_sentinel : Py_ssize_t, default -1 + Sentinel value used for all NA-values in inverse + na_value : object, default None + Value to identify as missing. If na_value is None, then None _plus_ + any value "val" satisfying val != val is considered missing. + If na_value is not None, then _additionally_, any value "val" + satisfying val == na_value is considered missing. + + Returns + ------- + uniques : ndarray[object] + Unique values of input, not sorted + labels : ndarray[int64] + The labels from values to uniques + """ + uniques_vector = ObjectVector() + return self._unique(values, uniques_vector, na_sentinel=na_sentinel, + na_value=na_value, ignore_na=True, + return_inverse=True) + + def get_labels(self, ndarray[object] values, ObjectVector uniques, + Py_ssize_t count_prior=0, Py_ssize_t na_sentinel=-1, + object na_value=None): + _, labels = self._unique(values, uniques, count_prior=count_prior, + na_sentinel=na_sentinel, na_value=na_value, + ignore_na=True, return_inverse=True) + return labels diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in new file mode 100644 index 00000000..f8f3858b --- /dev/null +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -0,0 +1,341 @@ +""" +Template for each `dtype` helper function for hashtable + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +{{py: + +# dtype, ttype, c_type +dtypes = [('float64', 'float64', 'float64_t'), + ('uint64', 'uint64', 'uint64_t'), + ('object', 'pymap', 'object'), + ('int64', 'int64', 'int64_t')] + +}} + +{{for dtype, ttype, c_type in dtypes}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} +cdef build_count_table_{{dtype}}(ndarray[{{dtype}}] values, + kh_{{ttype}}_t *table, bint dropna): +{{else}} +cdef build_count_table_{{dtype}}({{dtype}}_t[:] values, + kh_{{ttype}}_t *table, bint dropna): +{{endif}} + cdef: + khiter_t k + Py_ssize_t i, n = len(values) + + {{c_type}} val + + int ret = 0 + + {{if dtype == 'object'}} + kh_resize_{{ttype}}(table, n // 10) + + for i in range(n): + val = values[i] + + if not checknull(val) or not dropna: + k = kh_get_{{ttype}}(table, val) + if k != table.n_buckets: + table.vals[k] += 1 + else: + k = kh_put_{{ttype}}(table, val, &ret) + table.vals[k] = 1 + {{else}} + with nogil: + kh_resize_{{ttype}}(table, n) + + for i in range(n): + val = values[i] + + {{if dtype == 'float64'}} + if val == val or not dropna: + {{else}} + if True: + {{endif}} + k = kh_get_{{ttype}}(table, val) + if k != table.n_buckets: + table.vals[k] += 1 + else: + k = kh_put_{{ttype}}(table, val, &ret) + table.vals[k] = 1 + {{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} +cpdef value_count_{{dtype}}(ndarray[{{dtype}}] values, bint dropna): +{{else}} +cpdef value_count_{{dtype}}({{c_type}}[:] values, bint dropna): +{{endif}} + cdef: + Py_ssize_t i = 0 + kh_{{ttype}}_t *table + + {{if dtype != 'object'}} + {{dtype}}_t[:] result_keys + int64_t[:] result_counts + {{endif}} + + Py_ssize_t k + + table = kh_init_{{ttype}}() + {{if dtype == 'object'}} + build_count_table_{{dtype}}(values, table, 1) + {{else}} + build_count_table_{{dtype}}(values, table, dropna) + {{endif}} + + result_keys = np.empty(table.n_occupied, dtype=np.{{dtype}}) + result_counts = np.zeros(table.n_occupied, dtype=np.int64) + + {{if dtype == 'object'}} + for k in range(table.n_buckets): + if kh_exist_{{ttype}}(table, k): + result_keys[i] = <{{dtype}}>table.keys[k] + result_counts[i] = table.vals[k] + i += 1 + {{else}} + with nogil: + for k in range(table.n_buckets): + if kh_exist_{{ttype}}(table, k): + result_keys[i] = table.keys[k] + result_counts[i] = table.vals[k] + i += 1 + {{endif}} + + kh_destroy_{{ttype}}(table) + + {{if dtype == 'object'}} + return result_keys, result_counts + {{else}} + return np.asarray(result_keys), np.asarray(result_counts) + {{endif}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} +def duplicated_{{dtype}}(ndarray[{{dtype}}] values, object keep='first'): +{{else}} +def duplicated_{{dtype}}({{c_type}}[:] values, object keep='first'): +{{endif}} + cdef: + int ret = 0 + {{if dtype != 'object'}} + {{dtype}}_t value + {{endif}} + Py_ssize_t k, i, n = len(values) + kh_{{ttype}}_t *table = kh_init_{{ttype}}() + ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool') + + kh_resize_{{ttype}}(table, min(n, _SIZE_HINT_LIMIT)) + + if keep not in ('last', 'first', False): + raise ValueError('keep must be either "first", "last" or False') + + if keep == 'last': + {{if dtype == 'object'}} + for i in range(n - 1, -1, -1): + # equivalent: range(n)[::-1], which cython doesn't like in nogil + kh_put_{{ttype}}(table, values[i], &ret) + out[i] = ret == 0 + {{else}} + with nogil: + for i in range(n - 1, -1, -1): + # equivalent: range(n)[::-1], which cython doesn't like in nogil + kh_put_{{ttype}}(table, values[i], &ret) + out[i] = ret == 0 + {{endif}} + elif keep == 'first': + {{if dtype == 'object'}} + for i in range(n): + kh_put_{{ttype}}(table, values[i], &ret) + out[i] = ret == 0 + {{else}} + with nogil: + for i in range(n): + kh_put_{{ttype}}(table, values[i], &ret) + out[i] = ret == 0 + {{endif}} + else: + {{if dtype == 'object'}} + for i in range(n): + value = values[i] + k = kh_get_{{ttype}}(table, value) + if k != table.n_buckets: + out[table.vals[k]] = 1 + out[i] = 1 + else: + k = kh_put_{{ttype}}(table, value, &ret) + table.keys[k] = value + table.vals[k] = i + out[i] = 0 + {{else}} + with nogil: + for i in range(n): + value = values[i] + k = kh_get_{{ttype}}(table, value) + if k != table.n_buckets: + out[table.vals[k]] = 1 + out[i] = 1 + else: + k = kh_put_{{ttype}}(table, value, &ret) + table.keys[k] = value + table.vals[k] = i + out[i] = 0 + {{endif}} + kh_destroy_{{ttype}}(table) + return out + + +# ---------------------------------------------------------------------- +# Membership +# ---------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +{{if dtype == 'object'}} +def ismember_{{dtype}}(ndarray[{{c_type}}] arr, ndarray[{{c_type}}] values): +{{else}} +def ismember_{{dtype}}({{c_type}}[:] arr, {{c_type}}[:] values): +{{endif}} + """ + Return boolean of values in arr on an + element by-element basis + + Parameters + ---------- + arr : {{dtype}} ndarray + values : {{dtype}} ndarray + + Returns + ------- + boolean ndarry len of (arr) + """ + cdef: + Py_ssize_t i, n, k + int ret = 0 + ndarray[uint8_t] result + {{c_type}} val + kh_{{ttype}}_t *table = kh_init_{{ttype}}() + + # construct the table + n = len(values) + kh_resize_{{ttype}}(table, n) + + {{if dtype == 'object'}} + for i in range(n): + kh_put_{{ttype}}(table, values[i], &ret) + {{else}} + with nogil: + for i in range(n): + kh_put_{{ttype}}(table, values[i], &ret) + {{endif}} + + # test membership + n = len(arr) + result = np.empty(n, dtype=np.uint8) + + {{if dtype == 'object'}} + for i in range(n): + val = arr[i] + k = kh_get_{{ttype}}(table, val) + result[i] = (k != table.n_buckets) + {{else}} + with nogil: + for i in range(n): + val = arr[i] + k = kh_get_{{ttype}}(table, val) + result[i] = (k != table.n_buckets) + {{endif}} + + kh_destroy_{{ttype}}(table) + return result.view(np.bool_) + +{{endfor}} + + +# ---------------------------------------------------------------------- +# Mode Computations +# ---------------------------------------------------------------------- + +{{py: + +# dtype, ctype, table_type, npy_dtype +dtypes = [('float64', 'float64_t', 'float64', 'float64'), + ('int64', 'int64_t', 'int64', 'int64'), + ('uint64', 'uint64_t', 'uint64', 'uint64'), + ('object', 'object', 'pymap', 'object_')] +}} + +{{for dtype, ctype, table_type, npy_dtype in dtypes}} + + +@cython.wraparound(False) +@cython.boundscheck(False) + +{{if dtype == 'object'}} + + +def mode_{{dtype}}(ndarray[{{ctype}}] values, bint dropna): +{{else}} + + +def mode_{{dtype}}({{ctype}}[:] values, bint dropna): +{{endif}} + cdef: + int count, max_count = 1 + int j = -1 # so you can do += + Py_ssize_t k + kh_{{table_type}}_t *table + ndarray[{{ctype}}] modes + + table = kh_init_{{table_type}}() + build_count_table_{{dtype}}(values, table, dropna) + + modes = np.empty(table.n_buckets, dtype=np.{{npy_dtype}}) + + {{if dtype != 'object'}} + with nogil: + for k in range(table.n_buckets): + if kh_exist_{{table_type}}(table, k): + count = table.vals[k] + if count == max_count: + j += 1 + elif count > max_count: + max_count = count + j = 0 + else: + continue + + modes[j] = table.keys[k] + {{else}} + for k in range(table.n_buckets): + if kh_exist_{{table_type}}(table, k): + count = table.vals[k] + + if count == max_count: + j += 1 + elif count > max_count: + max_count = count + j = 0 + else: + continue + + modes[j] = table.keys[k] + {{endif}} + + kh_destroy_{{table_type}}(table) + + return modes[:j + 1] + +{{endfor}} diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx new file mode 100644 index 00000000..03ce668e --- /dev/null +++ b/pandas/_libs/index.pyx @@ -0,0 +1,731 @@ +from datetime import datetime, timedelta, date +import warnings + +import cython + +import numpy as np +cimport numpy as cnp +from numpy cimport (ndarray, intp_t, + float64_t, float32_t, + int64_t, int32_t, int16_t, int8_t, + uint64_t, uint32_t, uint16_t, uint8_t, + # Note: NPY_DATETIME, NPY_TIMEDELTA are only available + # for cimport in cython>=0.27.3 + NPY_DATETIME, NPY_TIMEDELTA) +cnp.import_array() + + +cimport pandas._libs.util as util + +from pandas._libs.tslibs.conversion cimport maybe_datetimelike_to_i8 +from pandas._libs.tslibs.nattype cimport c_NaT as NaT + +from pandas._libs.hashtable cimport HashTable + +from pandas._libs import algos, hashtable as _hash +from pandas._libs.tslibs import Timestamp, Timedelta, period as periodlib +from pandas._libs.missing import checknull + +cdef int64_t NPY_NAT = util.get_nat() + + +cdef inline bint is_definitely_invalid_key(object val): + if isinstance(val, tuple): + try: + hash(val) + except TypeError: + return True + + # we have a _data, means we are a NDFrame + return (isinstance(val, slice) or util.is_array(val) + or isinstance(val, list) or hasattr(val, '_data')) + + +cpdef get_value_at(ndarray arr, object loc, object tz=None): + obj = util.get_value_at(arr, loc) + + if arr.descr.type_num == NPY_DATETIME: + return Timestamp(obj, tz=tz) + elif arr.descr.type_num == NPY_TIMEDELTA: + return Timedelta(obj) + return obj + + +# Don't populate hash tables in monotonic indexes larger than this +_SIZE_CUTOFF = 1_000_000 + + +cdef class IndexEngine: + + cdef readonly: + object vgetter + HashTable mapping + bint over_size_threshold + + cdef: + bint unique, monotonic_inc, monotonic_dec + bint need_monotonic_check, need_unique_check + + def __init__(self, vgetter, n): + self.vgetter = vgetter + + self.over_size_threshold = n >= _SIZE_CUTOFF + self.clear_mapping() + + def __contains__(self, object val): + self._ensure_mapping_populated() + hash(val) + return val in self.mapping + + cpdef get_value(self, ndarray arr, object key, object tz=None): + """ + Parameters + ---------- + arr : 1-dimensional ndarray + """ + cdef: + object loc + void* data_ptr + + loc = self.get_loc(key) + if isinstance(loc, slice) or util.is_array(loc): + return arr[loc] + else: + return get_value_at(arr, loc, tz=tz) + + cpdef set_value(self, ndarray arr, object key, object value): + """ + Parameters + ---------- + arr : 1-dimensional ndarray + """ + cdef: + object loc + void* data_ptr + + loc = self.get_loc(key) + value = convert_scalar(arr, value) + + arr[loc] = value + + cpdef get_loc(self, object val): + cdef: + Py_ssize_t loc + + if is_definitely_invalid_key(val): + raise TypeError(f"'{val}' is an invalid key") + + if self.over_size_threshold and self.is_monotonic_increasing: + if not self.is_unique: + return self._get_loc_duplicates(val) + values = self._get_index_values() + + self._check_type(val) + loc = _bin_search(values, val) # .searchsorted(val, side='left') + if loc >= len(values): + raise KeyError(val) + if values[loc] != val: + raise KeyError(val) + return loc + + self._ensure_mapping_populated() + if not self.unique: + return self._get_loc_duplicates(val) + + self._check_type(val) + + try: + return self.mapping.get_item(val) + except (TypeError, ValueError): + raise KeyError(val) + + cdef inline _get_loc_duplicates(self, object val): + cdef: + Py_ssize_t diff + + if self.is_monotonic_increasing: + values = self._get_index_values() + try: + left = values.searchsorted(val, side='left') + right = values.searchsorted(val, side='right') + except TypeError: + # e.g. GH#29189 get_loc(None) with a Float64Index + raise KeyError(val) + + diff = right - left + if diff == 0: + raise KeyError(val) + elif diff == 1: + return left + else: + return slice(left, right) + + return self._maybe_get_bool_indexer(val) + + cdef _maybe_get_bool_indexer(self, object val): + cdef: + ndarray[uint8_t, ndim=1, cast=True] indexer + ndarray[intp_t, ndim=1] found + int count + + indexer = self._get_index_values() == val + found = np.where(indexer)[0] + count = len(found) + + if count > 1: + return indexer + if count == 1: + return int(found[0]) + + raise KeyError(val) + + def sizeof(self, deep: bool = False) -> int: + """ return the sizeof our mapping """ + if not self.is_mapping_populated: + return 0 + return self.mapping.sizeof(deep=deep) + + def __sizeof__(self) -> int: + return self.sizeof() + + @property + def is_unique(self) -> bool: + if self.need_unique_check: + self._do_unique_check() + + return self.unique == 1 + + cdef inline _do_unique_check(self): + + # this de-facto the same + self._ensure_mapping_populated() + + @property + def is_monotonic_increasing(self) -> bool: + if self.need_monotonic_check: + self._do_monotonic_check() + + return self.monotonic_inc == 1 + + @property + def is_monotonic_decreasing(self) -> bool: + if self.need_monotonic_check: + self._do_monotonic_check() + + return self.monotonic_dec == 1 + + cdef inline _do_monotonic_check(self): + cdef object is_unique + try: + values = self._get_index_values() + self.monotonic_inc, self.monotonic_dec, is_unique = \ + self._call_monotonic(values) + except TypeError: + self.monotonic_inc = 0 + self.monotonic_dec = 0 + is_unique = 0 + + self.need_monotonic_check = 0 + + # we can only be sure of uniqueness if is_unique=1 + if is_unique: + self.unique = 1 + self.need_unique_check = 0 + + cdef _get_index_values(self): + return self.vgetter() + + cdef _call_monotonic(self, values): + return algos.is_monotonic(values, timelike=False) + + def get_backfill_indexer(self, other, limit=None): + return algos.backfill(self._get_index_values(), other, limit=limit) + + def get_pad_indexer(self, other, limit=None): + return algos.pad(self._get_index_values(), other, limit=limit) + + cdef _make_hash_table(self, Py_ssize_t n): + raise NotImplementedError + + cdef _check_type(self, object val): + hash(val) + + @property + def is_mapping_populated(self) -> bool: + return self.mapping is not None + + cdef inline _ensure_mapping_populated(self): + # this populates the mapping + # if its not already populated + # also satisfies the need_unique_check + + if not self.is_mapping_populated: + + values = self._get_index_values() + self.mapping = self._make_hash_table(len(values)) + self._call_map_locations(values) + + if len(self.mapping) == len(values): + self.unique = 1 + + self.need_unique_check = 0 + + cpdef _call_map_locations(self, values): + self.mapping.map_locations(values) + + def clear_mapping(self): + self.mapping = None + self.need_monotonic_check = 1 + self.need_unique_check = 1 + + self.unique = 0 + self.monotonic_inc = 0 + self.monotonic_dec = 0 + + def get_indexer(self, values): + self._ensure_mapping_populated() + return self.mapping.lookup(values) + + def get_indexer_non_unique(self, targets): + """ + Return an indexer suitable for taking from a non unique index + return the labels in the same order ast the target + and a missing indexer into the targets (which correspond + to the -1 indices in the results + """ + cdef: + ndarray values, x + ndarray[int64_t] result, missing + set stargets, remaining_stargets + dict d = {} + object val + int count = 0, count_missing = 0 + Py_ssize_t i, j, n, n_t, n_alloc + + self._ensure_mapping_populated() + values = np.array(self._get_index_values(), copy=False) + stargets = set(targets) + n = len(values) + n_t = len(targets) + if n > 10_000: + n_alloc = 10_000 + else: + n_alloc = n + + result = np.empty(n_alloc, dtype=np.int64) + missing = np.empty(n_t, dtype=np.int64) + + # map each starget to its position in the index + if stargets and len(stargets) < 5 and self.is_monotonic_increasing: + # if there are few enough stargets and the index is monotonically + # increasing, then use binary search for each starget + remaining_stargets = set() + for starget in stargets: + try: + start = values.searchsorted(starget, side='left') + end = values.searchsorted(starget, side='right') + except TypeError: # e.g. if we tried to search for string in int array + remaining_stargets.add(starget) + else: + if start != end: + d[starget] = list(range(start, end)) + + stargets = remaining_stargets + + if stargets: + # otherwise, map by iterating through all items in the index + for i in range(n): + val = values[i] + if val in stargets: + if val not in d: + d[val] = [] + d[val].append(i) + + for i in range(n_t): + val = targets[i] + + # found + if val in d: + for j in d[val]: + + # realloc if needed + if count >= n_alloc: + n_alloc += 10_000 + result = np.resize(result, n_alloc) + + result[count] = j + count += 1 + + # value not found + else: + + if count >= n_alloc: + n_alloc += 10_000 + result = np.resize(result, n_alloc) + result[count] = -1 + count += 1 + missing[count_missing] = i + count_missing += 1 + + return result[0:count], missing[0:count_missing] + + +cdef Py_ssize_t _bin_search(ndarray values, object val) except -1: + cdef: + Py_ssize_t mid = 0, lo = 0, hi = len(values) - 1 + object pval + + if hi == 0 or (hi > 0 and val > values[hi]): + return len(values) + + while lo < hi: + mid = (lo + hi) // 2 + pval = values[mid] + if val < pval: + hi = mid + elif val > pval: + lo = mid + 1 + else: + while mid > 0 and val == values[mid - 1]: + mid -= 1 + return mid + + if val <= values[mid]: + return mid + else: + return mid + 1 + + +cdef class ObjectEngine(IndexEngine): + """ + Index Engine for use with object-dtype Index, namely the base class Index. + """ + cdef _make_hash_table(self, Py_ssize_t n): + return _hash.PyObjectHashTable(n) + + +cdef class DatetimeEngine(Int64Engine): + + cdef _get_box_dtype(self): + return 'M8[ns]' + + def __contains__(self, object val): + cdef: + int64_t loc + + if self.over_size_threshold and self.is_monotonic_increasing: + if not self.is_unique: + return self._get_loc_duplicates(val) + values = self._get_index_values() + conv = maybe_datetimelike_to_i8(val) + loc = values.searchsorted(conv, side='left') + return values[loc] == conv + + self._ensure_mapping_populated() + return maybe_datetimelike_to_i8(val) in self.mapping + + cdef _get_index_values(self): + return self.vgetter().view('i8') + + cdef _call_monotonic(self, values): + return algos.is_monotonic(values, timelike=True) + + cpdef get_loc(self, object val): + cdef: + int64_t loc + if is_definitely_invalid_key(val): + raise TypeError + + # Welcome to the spaghetti factory + if self.over_size_threshold and self.is_monotonic_increasing: + if not self.is_unique: + val = maybe_datetimelike_to_i8(val) + return self._get_loc_duplicates(val) + values = self._get_index_values() + + try: + conv = maybe_datetimelike_to_i8(val) + loc = values.searchsorted(conv, side='left') + except TypeError: + self._date_check_type(val) + raise KeyError(val) + + if loc == len(values) or values[loc] != conv: + raise KeyError(val) + return loc + + self._ensure_mapping_populated() + if not self.unique: + val = maybe_datetimelike_to_i8(val) + return self._get_loc_duplicates(val) + + try: + return self.mapping.get_item(val.value) + except KeyError: + raise KeyError(val) + except AttributeError: + pass + + try: + val = maybe_datetimelike_to_i8(val) + return self.mapping.get_item(val) + except (TypeError, ValueError): + self._date_check_type(val) + raise KeyError(val) + + cdef inline _date_check_type(self, object val): + hash(val) + if not util.is_integer_object(val): + raise KeyError(val) + + def get_indexer(self, values): + self._ensure_mapping_populated() + if values.dtype != self._get_box_dtype(): + return np.repeat(-1, len(values)).astype('i4') + values = np.asarray(values).view('i8') + return self.mapping.lookup(values) + + def get_pad_indexer(self, other, limit=None): + if other.dtype != self._get_box_dtype(): + return np.repeat(-1, len(other)).astype('i4') + other = np.asarray(other).view('i8') + return algos.pad(self._get_index_values(), other, limit=limit) + + def get_backfill_indexer(self, other, limit=None): + if other.dtype != self._get_box_dtype(): + return np.repeat(-1, len(other)).astype('i4') + other = np.asarray(other).view('i8') + return algos.backfill(self._get_index_values(), other, limit=limit) + + +cdef class TimedeltaEngine(DatetimeEngine): + + cdef _get_box_dtype(self): + return 'm8[ns]' + + +cdef class PeriodEngine(Int64Engine): + + cdef _get_index_values(self): + return super(PeriodEngine, self).vgetter() + + cpdef _call_map_locations(self, values): + # super(...) pattern doesn't seem to work with `cdef` + Int64Engine._call_map_locations(self, values.view('i8')) + + cdef _call_monotonic(self, values): + # super(...) pattern doesn't seem to work with `cdef` + return Int64Engine._call_monotonic(self, values.view('i8')) + + def get_indexer(self, values): + cdef ndarray[int64_t, ndim=1] ordinals + + super(PeriodEngine, self)._ensure_mapping_populated() + + freq = super(PeriodEngine, self).vgetter().freq + ordinals = periodlib.extract_ordinals(values, freq) + + return self.mapping.lookup(ordinals) + + def get_pad_indexer(self, other, limit=None): + freq = super(PeriodEngine, self).vgetter().freq + ordinal = periodlib.extract_ordinals(other, freq) + + return algos.pad(self._get_index_values(), + np.asarray(ordinal), limit=limit) + + def get_backfill_indexer(self, other, limit=None): + freq = super(PeriodEngine, self).vgetter().freq + ordinal = periodlib.extract_ordinals(other, freq) + + return algos.backfill(self._get_index_values(), + np.asarray(ordinal), limit=limit) + + def get_indexer_non_unique(self, targets): + freq = super(PeriodEngine, self).vgetter().freq + ordinal = periodlib.extract_ordinals(targets, freq) + ordinal_array = np.asarray(ordinal) + + return super(PeriodEngine, self).get_indexer_non_unique(ordinal_array) + + +cpdef convert_scalar(ndarray arr, object value): + # we don't turn integers + # into datetimes/timedeltas + + # we don't turn bools into int/float/complex + + if arr.descr.type_num == NPY_DATETIME: + if util.is_array(value): + pass + elif isinstance(value, (datetime, np.datetime64, date)): + return Timestamp(value).to_datetime64() + elif util.is_timedelta64_object(value): + # exclude np.timedelta64("NaT") from value != value below + pass + elif value is None or value != value: + return np.datetime64("NaT", "ns") + raise ValueError("cannot set a Timestamp with a non-timestamp " + f"{type(value).__name__}") + + elif arr.descr.type_num == NPY_TIMEDELTA: + if util.is_array(value): + pass + elif isinstance(value, timedelta) or util.is_timedelta64_object(value): + value = Timedelta(value) + if value is NaT: + return np.timedelta64("NaT", "ns") + return value.to_timedelta64() + elif util.is_datetime64_object(value): + # exclude np.datetime64("NaT") which would otherwise be picked up + # by the `value != value check below + pass + elif value is None or value != value: + return np.timedelta64("NaT", "ns") + raise ValueError("cannot set a Timedelta with a non-timedelta " + f"{type(value).__name__}") + + if (issubclass(arr.dtype.type, (np.integer, np.floating, np.complex)) and + not issubclass(arr.dtype.type, np.bool_)): + if util.is_bool_object(value): + raise ValueError("Cannot assign bool to float/integer series") + + if issubclass(arr.dtype.type, (np.integer, np.bool_)): + if util.is_float_object(value) and value != value: + raise ValueError("Cannot assign nan to integer series") + + return value + + +cdef class BaseMultiIndexCodesEngine: + """ + Base class for MultiIndexUIntEngine and MultiIndexPyIntEngine, which + represent each label in a MultiIndex as an integer, by juxtaposing the bits + encoding each level, with appropriate offsets. + + For instance: if 3 levels have respectively 3, 6 and 1 possible values, + then their labels can be represented using respectively 2, 3 and 1 bits, + as follows: + _ _ _ _____ _ __ __ __ + |0|0|0| ... |0| 0|a1|a0| -> offset 0 (first level) + — — — ————— — —— —— —— + |0|0|0| ... |0|b2|b1|b0| -> offset 2 (bits required for first level) + — — — ————— — —— —— —— + |0|0|0| ... |0| 0| 0|c0| -> offset 5 (bits required for first two levels) + ‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾ + and the resulting unsigned integer representation will be: + _ _ _ _____ _ __ __ __ __ __ __ + |0|0|0| ... |0|c0|b2|b1|b0|a1|a0| + ‾ ‾ ‾ ‾‾‾‾‾ ‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾ ‾‾ + + Offsets are calculated at initialization, labels are transformed by method + _codes_to_ints. + + Keys are located by first locating each component against the respective + level, then locating (the integer representation of) codes. + """ + def __init__(self, object levels, object labels, + ndarray[uint64_t, ndim=1] offsets): + """ + Parameters + ---------- + levels : list-like of numpy arrays + Levels of the MultiIndex. + labels : list-like of numpy arrays of integer dtype + Labels of the MultiIndex. + offsets : numpy array of uint64 dtype + Pre-calculated offsets, one for each level of the index. + """ + self.levels = levels + self.offsets = offsets + + # Transform labels in a single array, and add 1 so that we are working + # with positive integers (-1 for NaN becomes 0): + codes = (np.array(labels, dtype='int64').T + 1).astype('uint64', + copy=False) + + # Map each codes combination in the index to an integer unambiguously + # (no collisions possible), based on the "offsets", which describe the + # number of bits to switch labels for each level: + lab_ints = self._codes_to_ints(codes) + + # Initialize underlying index (e.g. libindex.UInt64Engine) with + # integers representing labels: we will use its get_loc and get_indexer + self._base.__init__(self, lambda: lab_ints, len(lab_ints)) + + def _extract_level_codes(self, object target, object method=None): + """ + Map the requested list of (tuple) keys to their integer representations + for searching in the underlying integer index. + + Parameters + ---------- + target : list-like of keys + Each key is a tuple, with a label for each level of the index. + + Returns + ------ + int_keys : 1-dimensional array of dtype uint64 or object + Integers representing one combination each + """ + level_codes = [lev.get_indexer(codes) + 1 for lev, codes + in zip(self.levels, zip(*target))] + return self._codes_to_ints(np.array(level_codes, dtype='uint64').T) + + def get_indexer(self, object target, object method=None, + object limit=None): + lab_ints = self._extract_level_codes(target) + + # All methods (exact, backfill, pad) directly map to the respective + # methods of the underlying (integers) index... + if method is not None: + # but underlying backfill and pad methods require index and keys + # to be sorted. The index already is (checked in + # Index._get_fill_indexer), sort (integer representations of) keys: + order = np.argsort(lab_ints) + lab_ints = lab_ints[order] + indexer = (getattr(self._base, f'get_{method}_indexer') + (self, lab_ints, limit=limit)) + indexer = indexer[order] + else: + indexer = self._base.get_indexer(self, lab_ints) + + return indexer + + def get_loc(self, object key): + if is_definitely_invalid_key(key): + raise TypeError(f"'{key}' is an invalid key") + if not isinstance(key, tuple): + raise KeyError(key) + try: + indices = [0 if checknull(v) else lev.get_loc(v) + 1 + for lev, v in zip(self.levels, key)] + except KeyError: + raise KeyError(key) + + # Transform indices into single integer: + lab_int = self._codes_to_ints(np.array(indices, dtype='uint64')) + + return self._base.get_loc(self, lab_int) + + def get_indexer_non_unique(self, object target): + # This needs to be overridden just because the default one works on + # target._values, and target can be itself a MultiIndex. + + lab_ints = self._extract_level_codes(target) + indexer = self._base.get_indexer_non_unique(self, lab_ints) + + return indexer + + def __contains__(self, object val): + # Default __contains__ looks in the underlying mapping, which in this + # case only contains integer representations. + try: + self.get_loc(val) + return True + except (KeyError, TypeError, ValueError): + return False + + +# Generated from template. +include "index_class_helper.pxi" diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in new file mode 100644 index 00000000..69b7db93 --- /dev/null +++ b/pandas/_libs/index_class_helper.pxi.in @@ -0,0 +1,83 @@ +""" +Template for functions of IndexEngine subclasses. + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# IndexEngine Subclass Methods +# ---------------------------------------------------------------------- + +{{py: + +# name, dtype, ctype, hashtable_name, hashtable_dtype +dtypes = [('Float64', 'float64', 'float64_t', 'Float64', 'float64'), + ('Float32', 'float32', 'float32_t', 'Float64', 'float64'), + ('Int64', 'int64', 'int64_t', 'Int64', 'int64'), + ('Int32', 'int32', 'int32_t', 'Int64', 'int64'), + ('Int16', 'int16', 'int16_t', 'Int64', 'int64'), + ('Int8', 'int8', 'int8_t', 'Int64', 'int64'), + ('UInt64', 'uint64', 'uint64_t', 'UInt64', 'uint64'), + ('UInt32', 'uint32', 'uint32_t', 'UInt64', 'uint64'), + ('UInt16', 'uint16', 'uint16_t', 'UInt64', 'uint64'), + ('UInt8', 'uint8', 'uint8_t', 'UInt64', 'uint64'), + ] +}} + +{{for name, dtype, ctype, hashtable_name, hashtable_dtype in dtypes}} + + +cdef class {{name}}Engine(IndexEngine): + + cdef _make_hash_table(self, Py_ssize_t n): + return _hash.{{hashtable_name}}HashTable(n) + + {{if name not in {'Float64', 'Float32'} }} + cdef _check_type(self, object val): + if not util.is_integer_object(val): + raise KeyError(val) + {{endif}} + + cpdef _call_map_locations(self, values): + # self.mapping is of type {{hashtable_name}}HashTable, + # so convert dtype of values + self.mapping.map_locations(algos.ensure_{{hashtable_dtype}}(values)) + + cdef _get_index_values(self): + return algos.ensure_{{dtype}}(self.vgetter()) + + cdef _maybe_get_bool_indexer(self, object val): + cdef: + ndarray[uint8_t, ndim=1, cast=True] indexer + ndarray[intp_t, ndim=1] found + ndarray[{{ctype}}] values + int count = 0 + + {{if name not in {'Float64', 'Float32'} }} + if not util.is_integer_object(val): + raise KeyError(val) + {{endif}} + + # A view is needed for some subclasses, such as PeriodEngine: + values = self._get_index_values().view('{{dtype}}') + try: + with warnings.catch_warnings(): + # e.g. if values is float64 and `val` is a str, suppress warning + warnings.filterwarnings("ignore", category=FutureWarning) + indexer = values == val + except TypeError: + # if the equality above returns a bool, cython will raise TypeError + # when trying to cast it to ndarray + raise KeyError(val) + + found = np.where(indexer)[0] + count = len(found) + + if count > 1: + return indexer + if count == 1: + return int(found[0]) + + raise KeyError(val) + +{{endfor}} diff --git a/pandas/_libs/indexing.pyx b/pandas/_libs/indexing.pyx new file mode 100644 index 00000000..01f4fb06 --- /dev/null +++ b/pandas/_libs/indexing.pyx @@ -0,0 +1,23 @@ +cdef class _NDFrameIndexerBase: + """ + A base class for _NDFrameIndexer for fast instantiation and attribute + access. + """ + cdef public object obj, name, _ndim + + def __init__(self, name, obj): + self.obj = obj + self.name = name + self._ndim = None + + @property + def ndim(self) -> int: + # Delay `ndim` instantiation until required as reading it + # from `obj` isn't entirely cheap. + ndim = self._ndim + if ndim is None: + ndim = self._ndim = self.obj.ndim + if ndim > 2: + raise ValueError("NDFrameIndexer does not support " + "NDFrame objects with ndim > 2") + return ndim diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx new file mode 100644 index 00000000..8bbbc6db --- /dev/null +++ b/pandas/_libs/internals.pyx @@ -0,0 +1,449 @@ +import cython +from cython import Py_ssize_t + +from cpython.slice cimport PySlice_GetIndicesEx + +cdef extern from "Python.h": + Py_ssize_t PY_SSIZE_T_MAX + +import numpy as np +from numpy cimport int64_t + +from pandas._libs.algos import ensure_int64 + + +cdef class BlockPlacement: + # __slots__ = '_as_slice', '_as_array', '_len' + cdef: + slice _as_slice + object _as_array + + bint _has_slice, _has_array, _is_known_slice_like + + def __init__(self, val): + cdef: + slice slc + + self._as_slice = None + self._as_array = None + self._has_slice = False + self._has_array = False + + if isinstance(val, slice): + slc = slice_canonize(val) + + if slc.start != slc.stop: + self._as_slice = slc + self._has_slice = True + else: + arr = np.empty(0, dtype=np.int64) + self._as_array = arr + self._has_array = True + else: + # Cython memoryview interface requires ndarray to be writeable. + arr = np.require(val, dtype=np.int64, requirements='W') + assert arr.ndim == 1 + self._as_array = arr + self._has_array = True + + def __str__(self) -> str: + cdef: + slice s = self._ensure_has_slice() + if s is not None: + v = self._as_slice + else: + v = self._as_array + + return f'{type(self).__name__}({v})' + + def __repr__(self) -> str: + return str(self) + + def __len__(self) -> int: + cdef: + slice s = self._ensure_has_slice() + if s is not None: + return slice_len(s) + else: + return len(self._as_array) + + def __iter__(self): + cdef: + slice s = self._ensure_has_slice() + Py_ssize_t start, stop, step, _ + if s is not None: + start, stop, step, _ = slice_get_indices_ex(s) + return iter(range(start, stop, step)) + else: + return iter(self._as_array) + + @property + def as_slice(self) -> slice: + cdef: + slice s = self._ensure_has_slice() + if s is None: + raise TypeError('Not slice-like') + else: + return s + + @property + def indexer(self): + cdef: + slice s = self._ensure_has_slice() + if s is not None: + return s + else: + return self._as_array + + def isin(self, arr): + from pandas.core.indexes.api import Int64Index + return Int64Index(self.as_array, copy=False).isin(arr) + + @property + def as_array(self): + cdef: + Py_ssize_t start, stop, end, _ + if not self._has_array: + start, stop, step, _ = slice_get_indices_ex(self._as_slice) + self._as_array = np.arange(start, stop, step, + dtype=np.int64) + self._has_array = True + return self._as_array + + @property + def is_slice_like(self) -> bool: + cdef: + slice s = self._ensure_has_slice() + return s is not None + + def __getitem__(self, loc): + cdef: + slice s = self._ensure_has_slice() + if s is not None: + val = slice_getitem(s, loc) + else: + val = self._as_array[loc] + + if not isinstance(val, slice) and val.ndim == 0: + return val + + return BlockPlacement(val) + + def delete(self, loc): + return BlockPlacement(np.delete(self.as_array, loc, axis=0)) + + def append(self, others): + if len(others) == 0: + return self + + return BlockPlacement(np.concatenate([self.as_array] + + [o.as_array for o in others])) + + cdef iadd(self, other): + cdef: + slice s = self._ensure_has_slice() + Py_ssize_t other_int, start, stop, step, l + + if isinstance(other, int) and s is not None: + other_int = other + + if other_int == 0: + # BlockPlacement is treated as immutable + return self + + start, stop, step, l = slice_get_indices_ex(s) + start += other_int + stop += other_int + + if ((step > 0 and start < 0) or + (step < 0 and stop < step)): + raise ValueError("iadd causes length change") + + if stop < 0: + val = slice(start, None, step) + else: + val = slice(start, stop, step) + + return BlockPlacement(val) + else: + newarr = self.as_array + other + if (newarr < 0).any(): + raise ValueError("iadd causes length change") + + val = newarr + return BlockPlacement(val) + + def add(self, other): + return self.iadd(other) + + def sub(self, other): + return self.add(-other) + + cdef slice _ensure_has_slice(self): + if not self._has_slice: + self._as_slice = indexer_as_slice(self._as_array) + self._has_slice = True + return self._as_slice + + +cdef slice slice_canonize(slice s): + """ + Convert slice to canonical bounded form. + """ + cdef: + Py_ssize_t start = 0, stop = 0, step = 1, length + + if s.step is None: + step = 1 + else: + step = s.step + if step == 0: + raise ValueError("slice step cannot be zero") + + if step > 0: + if s.stop is None: + raise ValueError("unbounded slice") + + stop = s.stop + if s.start is None: + start = 0 + else: + start = s.start + if start > stop: + start = stop + elif step < 0: + if s.start is None: + raise ValueError("unbounded slice") + + start = s.start + if s.stop is None: + stop = -1 + else: + stop = s.stop + if stop > start: + stop = start + + if start < 0 or (stop < 0 and s.stop is not None): + raise ValueError("unbounded slice") + + if stop < 0: + return slice(start, None, step) + else: + return slice(start, stop, step) + + +cpdef Py_ssize_t slice_len( + slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: + """ + Get length of a bounded slice. + + The slice must not have any "open" bounds that would create dependency on + container size, i.e.: + - if ``s.step is None or s.step > 0``, ``s.stop`` is not ``None`` + - if ``s.step < 0``, ``s.start`` is not ``None`` + + Otherwise, the result is unreliable. + """ + cdef: + Py_ssize_t start, stop, step, length + + if slc is None: + raise TypeError("slc must be slice") + + PySlice_GetIndicesEx(slc, objlen, + &start, &stop, &step, &length) + + return length + + +cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): + """ + Get (start, stop, step, length) tuple for a slice. + + If `objlen` is not specified, slice must be bounded, otherwise the result + will be wrong. + """ + cdef: + Py_ssize_t start, stop, step, length + + if slc is None: + raise TypeError("slc should be a slice") + + PySlice_GetIndicesEx(slc, objlen, + &start, &stop, &step, &length) + + return start, stop, step, length + + +cdef slice_getitem(slice slc, ind): + cdef: + Py_ssize_t s_start, s_stop, s_step, s_len + Py_ssize_t ind_start, ind_stop, ind_step, ind_len + + s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) + + if isinstance(ind, slice): + ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, + s_len) + + if ind_step > 0 and ind_len == s_len: + # short-cut for no-op slice + if ind_len == s_len: + return slc + + if ind_step < 0: + s_start = s_stop - s_step + ind_step = -ind_step + + s_step *= ind_step + s_stop = s_start + ind_stop * s_step + s_start = s_start + ind_start * s_step + + if s_step < 0 and s_stop < 0: + return slice(s_start, None, s_step) + else: + return slice(s_start, s_stop, s_step) + + else: + return np.arange(s_start, s_stop, s_step, dtype=np.int64)[ind] + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef slice indexer_as_slice(int64_t[:] vals): + cdef: + Py_ssize_t i, n, start, stop + int64_t d + + if vals is None: + raise TypeError("vals must be ndarray") + + n = vals.shape[0] + + if n == 0 or vals[0] < 0: + return None + + if n == 1: + return slice(vals[0], vals[0] + 1, 1) + + if vals[1] < 0: + return None + + # n > 2 + d = vals[1] - vals[0] + + if d == 0: + return None + + for i in range(2, n): + if vals[i] < 0 or vals[i] - vals[i - 1] != d: + return None + + start = vals[0] + stop = start + n * d + if stop < 0 and d < 0: + return slice(start, None, d) + else: + return slice(start, stop, d) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_blkno_indexers(int64_t[:] blknos, bint group=True): + """ + Enumerate contiguous runs of integers in ndarray. + + Iterate over elements of `blknos` yielding ``(blkno, slice(start, stop))`` + pairs for each contiguous run found. + + If `group` is True and there is more than one run for a certain blkno, + ``(blkno, array)`` with an array containing positions of all elements equal + to blkno. + + Returns + ------- + iter : iterator of (int, slice or array) + """ + # There's blkno in this function's name because it's used in block & + # blockno handling. + cdef: + int64_t cur_blkno + Py_ssize_t i, start, stop, n, diff + + object blkno + list group_order + dict group_dict + int64_t[:] res_view + + n = blknos.shape[0] + + if n == 0: + return + + start = 0 + cur_blkno = blknos[start] + + if group is False: + for i in range(1, n): + if blknos[i] != cur_blkno: + yield cur_blkno, slice(start, i) + + start = i + cur_blkno = blknos[i] + + yield cur_blkno, slice(start, n) + else: + group_order = [] + group_dict = {} + + for i in range(1, n): + if blknos[i] != cur_blkno: + if cur_blkno not in group_dict: + group_order.append(cur_blkno) + group_dict[cur_blkno] = [(start, i)] + else: + group_dict[cur_blkno].append((start, i)) + + start = i + cur_blkno = blknos[i] + + if cur_blkno not in group_dict: + group_order.append(cur_blkno) + group_dict[cur_blkno] = [(start, n)] + else: + group_dict[cur_blkno].append((start, n)) + + for blkno in group_order: + slices = group_dict[blkno] + if len(slices) == 1: + yield blkno, slice(slices[0][0], slices[0][1]) + else: + tot_len = sum(stop - start for start, stop in slices) + result = np.empty(tot_len, dtype=np.int64) + res_view = result + + i = 0 + for start, stop in slices: + for diff in range(start, stop): + res_view[i] = diff + i += 1 + + yield blkno, result + + +def get_blkno_placements(blknos, group: bool = True): + """ + Parameters + ---------- + blknos : array of int64 + group : bool, default True + + Returns + ------- + iterator + yield (BlockPlacement, blkno) + """ + blknos = ensure_int64(blknos) + + for blkno, indexer in get_blkno_indexers(blknos, group): + yield blkno, BlockPlacement(indexer) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx new file mode 100644 index 00000000..08daedf5 --- /dev/null +++ b/pandas/_libs/interval.pyx @@ -0,0 +1,531 @@ +import numbers +from operator import le, lt + +from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE, + PyObject_RichCompare) + +import cython +from cython import Py_ssize_t + +import numpy as np +cimport numpy as cnp +from numpy cimport ( + int64_t, int32_t, float64_t, float32_t, uint64_t, + ndarray, + PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take) +cnp.import_array() + + +cimport pandas._libs.util as util + +from pandas._libs.hashtable cimport Int64Vector +from pandas._libs.tslibs.util cimport is_integer_object, is_float_object + +from pandas._libs.tslibs import Timestamp +from pandas._libs.tslibs.timedeltas import Timedelta +from pandas._libs.tslibs.timezones cimport tz_compare + + +_VALID_CLOSED = frozenset(['left', 'right', 'both', 'neither']) + + +cdef class IntervalMixin: + + @property + def closed_left(self): + """ + Check if the interval is closed on the left side. + + For the meaning of `closed` and `open` see :class:`~pandas.Interval`. + + Returns + ------- + bool + True if the Interval is closed on the left-side. + """ + return self.closed in ('left', 'both') + + @property + def closed_right(self): + """ + Check if the interval is closed on the right side. + + For the meaning of `closed` and `open` see :class:`~pandas.Interval`. + + Returns + ------- + bool + True if the Interval is closed on the left-side. + """ + return self.closed in ('right', 'both') + + @property + def open_left(self): + """ + Check if the interval is open on the left side. + + For the meaning of `closed` and `open` see :class:`~pandas.Interval`. + + Returns + ------- + bool + True if the Interval is closed on the left-side. + """ + return not self.closed_left + + @property + def open_right(self): + """ + Check if the interval is open on the right side. + + For the meaning of `closed` and `open` see :class:`~pandas.Interval`. + + Returns + ------- + bool + True if the Interval is closed on the left-side. + """ + return not self.closed_right + + @property + def mid(self): + """ + Return the midpoint of the Interval. + """ + try: + return 0.5 * (self.left + self.right) + except TypeError: + # datetime safe version + return self.left + 0.5 * self.length + + @property + def length(self): + """ + Return the length of the Interval. + """ + return self.right - self.left + + @property + def is_empty(self): + """ + Indicates if an interval is empty, meaning it contains no points. + + .. versionadded:: 0.25.0 + + Returns + ------- + bool or ndarray + A boolean indicating if a scalar :class:`Interval` is empty, or a + boolean ``ndarray`` positionally indicating if an ``Interval`` in + an :class:`~arrays.IntervalArray` or :class:`IntervalIndex` is + empty. + + Examples + -------- + An :class:`Interval` that contains points is not empty: + + >>> pd.Interval(0, 1, closed='right').is_empty + False + + An ``Interval`` that does not contain any points is empty: + + >>> pd.Interval(0, 0, closed='right').is_empty + True + >>> pd.Interval(0, 0, closed='left').is_empty + True + >>> pd.Interval(0, 0, closed='neither').is_empty + True + + An ``Interval`` that contains a single point is not empty: + + >>> pd.Interval(0, 0, closed='both').is_empty + False + + An :class:`~arrays.IntervalArray` or :class:`IntervalIndex` returns a + boolean ``ndarray`` positionally indicating if an ``Interval`` is + empty: + + >>> ivs = [pd.Interval(0, 0, closed='neither'), + ... pd.Interval(1, 2, closed='neither')] + >>> pd.arrays.IntervalArray(ivs).is_empty + array([ True, False]) + + Missing values are not considered empty: + + >>> ivs = [pd.Interval(0, 0, closed='neither'), np.nan] + >>> pd.IntervalIndex(ivs).is_empty + array([ True, False]) + """ + return (self.right == self.left) & (self.closed != 'both') + + def _check_closed_matches(self, other, name='other'): + """Check if the closed attribute of `other` matches. + + Note that 'left' and 'right' are considered different from 'both'. + + Parameters + ---------- + other : Interval, IntervalIndex, IntervalArray + name : str + Name to use for 'other' in the error message. + + Raises + ------ + ValueError + When `other` is not closed exactly the same as self. + """ + if self.closed != other.closed: + raise ValueError(f"'{name}.closed' is {repr(other.closed)}, " + f"expected {repr(self.closed)}.") + + +cdef _interval_like(other): + return (hasattr(other, 'left') + and hasattr(other, 'right') + and hasattr(other, 'closed')) + + +cdef class Interval(IntervalMixin): + """ + Immutable object implementing an Interval, a bounded slice-like interval. + + Parameters + ---------- + left : orderable scalar + Left bound for the interval. + right : orderable scalar + Right bound for the interval. + closed : {'right', 'left', 'both', 'neither'}, default 'right' + Whether the interval is closed on the left-side, right-side, both or + neither. See the Notes for more detailed explanation. + + See Also + -------- + IntervalIndex : An Index of Interval objects that are all closed on the + same side. + cut : Convert continuous data into discrete bins (Categorical + of Interval objects). + qcut : Convert continuous data into bins (Categorical of Interval objects) + based on quantiles. + Period : Represents a period of time. + + Notes + ----- + The parameters `left` and `right` must be from the same type, you must be + able to compare them and they must satisfy ``left <= right``. + + A closed interval (in mathematics denoted by square brackets) contains + its endpoints, i.e. the closed interval ``[0, 5]`` is characterized by the + conditions ``0 <= x <= 5``. This is what ``closed='both'`` stands for. + An open interval (in mathematics denoted by parentheses) does not contain + its endpoints, i.e. the open interval ``(0, 5)`` is characterized by the + conditions ``0 < x < 5``. This is what ``closed='neither'`` stands for. + Intervals can also be half-open or half-closed, i.e. ``[0, 5)`` is + described by ``0 <= x < 5`` (``closed='left'``) and ``(0, 5]`` is + described by ``0 < x <= 5`` (``closed='right'``). + + Examples + -------- + It is possible to build Intervals of different types, like numeric ones: + + >>> iv = pd.Interval(left=0, right=5) + >>> iv + Interval(0, 5, closed='right') + + You can check if an element belongs to it + + >>> 2.5 in iv + True + + You can test the bounds (``closed='right'``, so ``0 < x <= 5``): + + >>> 0 in iv + False + >>> 5 in iv + True + >>> 0.0001 in iv + True + + Calculate its length + + >>> iv.length + 5 + + You can operate with `+` and `*` over an Interval and the operation + is applied to each of its bounds, so the result depends on the type + of the bound elements + + >>> shifted_iv = iv + 3 + >>> shifted_iv + Interval(3, 8, closed='right') + >>> extended_iv = iv * 10.0 + >>> extended_iv + Interval(0.0, 50.0, closed='right') + + To create a time interval you can use Timestamps as the bounds + + >>> year_2017 = pd.Interval(pd.Timestamp('2017-01-01 00:00:00'), + ... pd.Timestamp('2018-01-01 00:00:00'), + ... closed='left') + >>> pd.Timestamp('2017-01-01 00:00') in year_2017 + True + >>> year_2017.length + Timedelta('365 days 00:00:00') + + And also you can create string intervals + + >>> volume_1 = pd.Interval('Ant', 'Dog', closed='both') + >>> 'Bee' in volume_1 + True + """ + _typ = "interval" + + cdef readonly object left + """ + Left bound for the interval. + """ + + cdef readonly object right + """ + Right bound for the interval. + """ + + cdef readonly str closed + """ + Whether the interval is closed on the left-side, right-side, both or + neither. + """ + + def __init__(self, left, right, str closed='right'): + # note: it is faster to just do these checks than to use a special + # constructor (__cinit__/__new__) to avoid them + + self._validate_endpoint(left) + self._validate_endpoint(right) + + if closed not in _VALID_CLOSED: + raise ValueError(f"invalid option for 'closed': {closed}") + if not left <= right: + raise ValueError("left side of interval must be <= right side") + if (isinstance(left, Timestamp) and + not tz_compare(left.tzinfo, right.tzinfo)): + # GH 18538 + raise ValueError("left and right must have the same time zone, got " + f"{repr(left.tzinfo)}' and {repr(right.tzinfo)}") + self.left = left + self.right = right + self.closed = closed + + def _validate_endpoint(self, endpoint): + # GH 23013 + if not (is_integer_object(endpoint) or is_float_object(endpoint) or + isinstance(endpoint, (Timestamp, Timedelta))): + raise ValueError("Only numeric, Timestamp and Timedelta endpoints " + "are allowed when constructing an Interval.") + + def __hash__(self): + return hash((self.left, self.right, self.closed)) + + def __contains__(self, key) -> bool: + if _interval_like(key): + raise TypeError("__contains__ not defined for two intervals") + return ((self.left < key if self.open_left else self.left <= key) and + (key < self.right if self.open_right else key <= self.right)) + + def __richcmp__(self, other, op: int): + if hasattr(other, 'ndim'): + # let numpy (or IntervalIndex) handle vectorization + return NotImplemented + + if _interval_like(other): + self_tuple = (self.left, self.right, self.closed) + other_tuple = (other.left, other.right, other.closed) + return PyObject_RichCompare(self_tuple, other_tuple, op) + + # nb. could just return NotImplemented now, but handling this + # explicitly allows us to opt into the Python 3 behavior, even on + # Python 2. + if op == Py_EQ or op == Py_NE: + return NotImplemented + else: + name = type(self).__name__ + other = type(other).__name__ + op_str = {Py_LT: '<', Py_LE: '<=', Py_GT: '>', Py_GE: '>='}[op] + raise TypeError(f"unorderable types: {name}() {op_str} {other}()") + + def __reduce__(self): + args = (self.left, self.right, self.closed) + return (type(self), args) + + def _repr_base(self): + left = self.left + right = self.right + + # TODO: need more general formatting methodology here + if isinstance(left, Timestamp) and isinstance(right, Timestamp): + left = left._short_repr + right = right._short_repr + + return left, right + + def __repr__(self) -> str: + + left, right = self._repr_base() + name = type(self).__name__ + repr_str = f'{name}({repr(left)}, {repr(right)}, closed={repr(self.closed)})' + return repr_str + + def __str__(self) -> str: + + left, right = self._repr_base() + start_symbol = '[' if self.closed_left else '(' + end_symbol = ']' if self.closed_right else ')' + return f'{start_symbol}{left}, {right}{end_symbol}' + + def __add__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left + y, self.right + y, closed=self.closed) + elif isinstance(y, Interval) and isinstance(self, numbers.Number): + return Interval(y.left + self, y.right + self, closed=y.closed) + return NotImplemented + + def __sub__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left - y, self.right - y, closed=self.closed) + return NotImplemented + + def __mul__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left * y, self.right * y, closed=self.closed) + elif isinstance(y, Interval) and isinstance(self, numbers.Number): + return Interval(y.left * self, y.right * self, closed=y.closed) + return NotImplemented + + def __truediv__(self, y): + if isinstance(y, numbers.Number): + return Interval(self.left / y, self.right / y, closed=self.closed) + return NotImplemented + + def __floordiv__(self, y): + if isinstance(y, numbers.Number): + return Interval( + self.left // y, self.right // y, closed=self.closed) + return NotImplemented + + def overlaps(self, other): + """ + Check whether two Interval objects overlap. + + Two intervals overlap if they share a common point, including closed + endpoints. Intervals that only have an open endpoint in common do not + overlap. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + other : Interval + Interval to check against for an overlap. + + Returns + ------- + bool + True if the two intervals overlap. + + See Also + -------- + IntervalArray.overlaps : The corresponding method for IntervalArray. + IntervalIndex.overlaps : The corresponding method for IntervalIndex. + + Examples + -------- + >>> i1 = pd.Interval(0, 2) + >>> i2 = pd.Interval(1, 3) + >>> i1.overlaps(i2) + True + >>> i3 = pd.Interval(4, 5) + >>> i1.overlaps(i3) + False + + Intervals that share closed endpoints overlap: + + >>> i4 = pd.Interval(0, 1, closed='both') + >>> i5 = pd.Interval(1, 2, closed='both') + >>> i4.overlaps(i5) + True + + Intervals that only have an open endpoint in common do not overlap: + + >>> i6 = pd.Interval(1, 2, closed='neither') + >>> i4.overlaps(i6) + False + """ + if not isinstance(other, Interval): + raise TypeError("`other` must be an Interval, " + f"got {type(other).__name__}") + + # equality is okay if both endpoints are closed (overlap at a point) + op1 = le if (self.closed_left and other.closed_right) else lt + op2 = le if (other.closed_left and self.closed_right) else lt + + # overlaps is equivalent negation of two interval being disjoint: + # disjoint = (A.left > B.right) or (B.left > A.right) + # (simplifying the negation allows this to be done in less operations) + return op1(self.left, other.right) and op2(other.left, self.right) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def intervals_to_interval_bounds(ndarray intervals, + bint validate_closed=True): + """ + Parameters + ---------- + intervals : ndarray + Object array of Intervals / nulls. + + validate_closed: bool, default True + Boolean indicating if all intervals must be closed on the same side. + Mismatching closed will raise if True, else return None for closed. + + Returns + ------- + tuple of tuples + left : (ndarray, object, array) + right : (ndarray, object, array) + closed: str + """ + cdef: + object closed = None, interval + int64_t n = len(intervals) + ndarray left, right + bint seen_closed = False + + left = np.empty(n, dtype=intervals.dtype) + right = np.empty(n, dtype=intervals.dtype) + + for i in range(len(intervals)): + interval = intervals[i] + if interval is None or util.is_nan(interval): + left[i] = np.nan + right[i] = np.nan + continue + + if not isinstance(interval, Interval): + raise TypeError(f"type {type(interval)} with value " + f"{interval} is not an interval") + + left[i] = interval.left + right[i] = interval.right + if not seen_closed: + seen_closed = True + closed = interval.closed + elif closed != interval.closed: + closed = None + if validate_closed: + raise ValueError("intervals must all be closed on the same side") + + return left, right, closed + + +include "intervaltree.pxi" diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in new file mode 100644 index 00000000..d09413bf --- /dev/null +++ b/pandas/_libs/intervaltree.pxi.in @@ -0,0 +1,411 @@ +""" +Template for intervaltree + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +from pandas._libs.algos import is_monotonic + +ctypedef fused int_scalar_t: + int64_t + float64_t + +ctypedef fused uint_scalar_t: + uint64_t + float64_t + +ctypedef fused scalar_t: + int_scalar_t + uint_scalar_t + +# ---------------------------------------------------------------------- +# IntervalTree +# ---------------------------------------------------------------------- + +cdef class IntervalTree(IntervalMixin): + """A centered interval tree + + Based off the algorithm described on Wikipedia: + http://en.wikipedia.org/wiki/Interval_tree + + we are emulating the IndexEngine interface + """ + cdef readonly: + object left, right, root, dtype + str closed + object _is_overlapping, _left_sorter, _right_sorter + + def __init__(self, left, right, closed='right', leaf_size=100): + """ + Parameters + ---------- + left, right : np.ndarray[ndim=1] + Left and right bounds for each interval. Assumed to contain no + NaNs. + closed : {'left', 'right', 'both', 'neither'}, optional + Whether the intervals are closed on the left-side, right-side, both + or neither. Defaults to 'right'. + leaf_size : int, optional + Parameter that controls when the tree switches from creating nodes + to brute-force search. Tune this parameter to optimize query + performance. + """ + if closed not in ['left', 'right', 'both', 'neither']: + raise ValueError("invalid option for 'closed': %s" % closed) + + left = np.asarray(left) + right = np.asarray(right) + self.dtype = np.result_type(left, right) + self.left = np.asarray(left, dtype=self.dtype) + self.right = np.asarray(right, dtype=self.dtype) + + indices = np.arange(len(left), dtype='int64') + + self.closed = closed + + # GH 23352: ensure no nan in nodes + mask = ~np.isnan(self.left) + self.left = self.left[mask] + self.right = self.right[mask] + indices = indices[mask] + + node_cls = NODE_CLASSES[str(self.dtype), closed] + self.root = node_cls(self.left, self.right, indices, leaf_size) + + @property + def left_sorter(self): + """How to sort the left labels; this is used for binary search + """ + if self._left_sorter is None: + self._left_sorter = np.argsort(self.left) + return self._left_sorter + + @property + def right_sorter(self): + """How to sort the right labels + """ + if self._right_sorter is None: + self._right_sorter = np.argsort(self.right) + return self._right_sorter + + @property + def is_overlapping(self): + """ + Determine if the IntervalTree contains overlapping intervals. + Cached as self._is_overlapping. + """ + if self._is_overlapping is not None: + return self._is_overlapping + + # <= when both sides closed since endpoints can overlap + op = le if self.closed == 'both' else lt + + # overlap if start of current interval < end of previous interval + # (current and previous in terms of sorted order by left/start side) + current = self.left[self.left_sorter[1:]] + previous = self.right[self.left_sorter[:-1]] + self._is_overlapping = bool(op(current, previous).any()) + + return self._is_overlapping + + @property + def is_monotonic_increasing(self): + """ + Return True if the IntervalTree is monotonic increasing (only equal or + increasing values), else False + """ + values = [self.right, self.left] + + sort_order = np.lexsort(values) + return is_monotonic(sort_order, False)[0] + + def get_indexer(self, scalar_t[:] target): + """Return the positions corresponding to unique intervals that overlap + with the given array of scalar targets. + """ + + # TODO: write get_indexer_intervals + cdef: + Py_ssize_t old_len + Py_ssize_t i + Int64Vector result + + result = Int64Vector() + old_len = 0 + for i in range(len(target)): + try: + self.root.query(result, target[i]) + except OverflowError: + # overflow -> no match, which is already handled below + pass + + if result.data.n == old_len: + result.append(-1) + elif result.data.n > old_len + 1: + raise KeyError( + 'indexer does not intersect a unique set of intervals') + old_len = result.data.n + return result.to_array().astype('intp') + + def get_indexer_non_unique(self, scalar_t[:] target): + """Return the positions corresponding to intervals that overlap with + the given array of scalar targets. Non-unique positions are repeated. + """ + cdef: + Py_ssize_t old_len + Py_ssize_t i + Int64Vector result, missing + + result = Int64Vector() + missing = Int64Vector() + old_len = 0 + for i in range(len(target)): + try: + self.root.query(result, target[i]) + except OverflowError: + # overflow -> no match, which is already handled below + pass + + if result.data.n == old_len: + result.append(-1) + missing.append(i) + old_len = result.data.n + return (result.to_array().astype('intp'), + missing.to_array().astype('intp')) + + def __repr__(self) -> str: + return (''.format( + dtype=self.dtype, closed=self.closed, + n_elements=self.root.n_elements)) + + # compat with IndexEngine interface + def clear_mapping(self): + pass + + +cdef take(ndarray source, ndarray indices): + """Take the given positions from a 1D ndarray + """ + return PyArray_Take(source, indices, 0) + + +cdef sort_values_and_indices(all_values, all_indices, subset): + indices = take(all_indices, subset) + values = take(all_values, subset) + sorter = PyArray_ArgSort(values, 0, NPY_QUICKSORT) + sorted_values = take(values, sorter) + sorted_indices = take(indices, sorter) + return sorted_values, sorted_indices + + +# ---------------------------------------------------------------------- +# Nodes +# ---------------------------------------------------------------------- + +# we need specialized nodes and leaves to optimize for different dtype and +# closed values + +{{py: + +nodes = [] +for dtype in ['float64', 'int64', 'uint64']: + for closed, cmp_left, cmp_right in [ + ('left', '<=', '<'), + ('right', '<', '<='), + ('both', '<=', '<='), + ('neither', '<', '<')]: + cmp_left_converse = '<' if cmp_left == '<=' else '<=' + cmp_right_converse = '<' if cmp_right == '<=' else '<=' + if dtype.startswith('int'): + fused_prefix = 'int_' + elif dtype.startswith('uint'): + fused_prefix = 'uint_' + elif dtype.startswith('float'): + fused_prefix = '' + nodes.append((dtype, dtype.title(), + closed, closed.title(), + cmp_left, + cmp_right, + cmp_left_converse, + cmp_right_converse, + fused_prefix)) + +}} + +NODE_CLASSES = {} + +{{for dtype, dtype_title, closed, closed_title, cmp_left, cmp_right, + cmp_left_converse, cmp_right_converse, fused_prefix in nodes}} + +cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode: + """Non-terminal node for an IntervalTree + + Categorizes intervals by those that fall to the left, those that fall to + the right, and those that overlap with the pivot. + """ + cdef readonly: + {{dtype_title}}Closed{{closed_title}}IntervalNode left_node, right_node + {{dtype}}_t[:] center_left_values, center_right_values, left, right + int64_t[:] center_left_indices, center_right_indices, indices + {{dtype}}_t min_left, max_right + {{dtype}}_t pivot + int64_t n_elements, n_center, leaf_size + bint is_leaf_node + + def __init__(self, + ndarray[{{dtype}}_t, ndim=1] left, + ndarray[{{dtype}}_t, ndim=1] right, + ndarray[int64_t, ndim=1] indices, + int64_t leaf_size): + + self.n_elements = len(left) + self.leaf_size = leaf_size + + # min_left and min_right are used to speed-up query by skipping + # query on sub-nodes. If this node has size 0, query is cheap, + # so these values don't matter. + if left.size > 0: + self.min_left = left.min() + self.max_right = right.max() + else: + self.min_left = 0 + self.max_right = 0 + + if self.n_elements <= leaf_size: + # make this a terminal (leaf) node + self.is_leaf_node = True + self.left = left + self.right = right + self.indices = indices + self.n_center = 0 + else: + # calculate a pivot so we can create child nodes + self.is_leaf_node = False + self.pivot = np.median(left / 2 + right / 2) + left_set, right_set, center_set = self.classify_intervals( + left, right) + + self.left_node = self.new_child_node(left, right, + indices, left_set) + self.right_node = self.new_child_node(left, right, + indices, right_set) + + self.center_left_values, self.center_left_indices = \ + sort_values_and_indices(left, indices, center_set) + self.center_right_values, self.center_right_indices = \ + sort_values_and_indices(right, indices, center_set) + self.n_center = len(self.center_left_indices) + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef classify_intervals(self, {{dtype}}_t[:] left, {{dtype}}_t[:] right): + """Classify the given intervals based upon whether they fall to the + left, right, or overlap with this node's pivot. + """ + cdef: + Int64Vector left_ind, right_ind, overlapping_ind + Py_ssize_t i + + left_ind = Int64Vector() + right_ind = Int64Vector() + overlapping_ind = Int64Vector() + + for i in range(self.n_elements): + if right[i] {{cmp_right_converse}} self.pivot: + left_ind.append(i) + elif self.pivot {{cmp_left_converse}} left[i]: + right_ind.append(i) + else: + overlapping_ind.append(i) + + return (left_ind.to_array(), + right_ind.to_array(), + overlapping_ind.to_array()) + + cdef new_child_node(self, + ndarray[{{dtype}}_t, ndim=1] left, + ndarray[{{dtype}}_t, ndim=1] right, + ndarray[int64_t, ndim=1] indices, + ndarray[int64_t, ndim=1] subset): + """Create a new child node. + """ + left = take(left, subset) + right = take(right, subset) + indices = take(indices, subset) + return {{dtype_title}}Closed{{closed_title}}IntervalNode( + left, right, indices, self.leaf_size) + + @cython.wraparound(False) + @cython.boundscheck(False) + @cython.initializedcheck(False) + cpdef query(self, Int64Vector result, {{fused_prefix}}scalar_t point): + """Recursively query this node and its sub-nodes for intervals that + overlap with the query point. + """ + cdef: + int64_t[:] indices + {{dtype}}_t[:] values + Py_ssize_t i + + if self.is_leaf_node: + # Once we get down to a certain size, it doesn't make sense to + # continue the binary tree structure. Instead, we use linear + # search. + for i in range(self.n_elements): + if self.left[i] {{cmp_left}} point {{cmp_right}} self.right[i]: + result.append(self.indices[i]) + else: + # There are child nodes. Based on comparing our query to the pivot, + # look at the center values, then go to the relevant child. + if point < self.pivot: + values = self.center_left_values + indices = self.center_left_indices + for i in range(self.n_center): + if not values[i] {{cmp_left}} point: + break + result.append(indices[i]) + if point {{cmp_right}} self.left_node.max_right: + self.left_node.query(result, point) + elif point > self.pivot: + values = self.center_right_values + indices = self.center_right_indices + for i in range(self.n_center - 1, -1, -1): + if not point {{cmp_right}} values[i]: + break + result.append(indices[i]) + if self.right_node.min_left {{cmp_left}} point: + self.right_node.query(result, point) + else: + result.extend(self.center_left_indices) + + def __repr__(self) -> str: + if self.is_leaf_node: + return ('<{{dtype_title}}Closed{{closed_title}}IntervalNode: ' + '%s elements (terminal)>' % self.n_elements) + else: + n_left = self.left_node.n_elements + n_right = self.right_node.n_elements + n_center = self.n_elements - n_left - n_right + return ('<{{dtype_title}}Closed{{closed_title}}IntervalNode: ' + 'pivot %s, %s elements (%s left, %s right, %s ' + 'overlapping)>' % (self.pivot, self.n_elements, + n_left, n_right, n_center)) + + def counts(self): + """ + Inspect counts on this node + useful for debugging purposes + """ + if self.is_leaf_node: + return self.n_elements + else: + m = len(self.center_left_values) + l = self.left_node.counts() + r = self.right_node.counts() + return (m, (l, r)) + +NODE_CLASSES['{{dtype}}', + '{{closed}}'] = {{dtype_title}}Closed{{closed_title}}IntervalNode + +{{endfor}} diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx new file mode 100644 index 00000000..093c5379 --- /dev/null +++ b/pandas/_libs/join.pyx @@ -0,0 +1,975 @@ +import cython +from cython import Py_ssize_t + +import numpy as np +cimport numpy as cnp +from numpy cimport (ndarray, + int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, + uint32_t, uint64_t, float32_t, float64_t) +cnp.import_array() + +from pandas._libs.algos import ( + groupsort_indexer, ensure_platform_int, take_1d_int64_int64 +) + + +@cython.boundscheck(False) +def inner_join(const int64_t[:] left, const int64_t[:] right, + Py_ssize_t max_groups): + cdef: + Py_ssize_t i, j, k, count = 0 + ndarray[int64_t] left_count, right_count, left_sorter, right_sorter + ndarray[int64_t] left_indexer, right_indexer + int64_t lc, rc + Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0 + Py_ssize_t offset + + # NA group in location 0 + + left_sorter, left_count = groupsort_indexer(left, max_groups) + right_sorter, right_count = groupsort_indexer(right, max_groups) + + with nogil: + # First pass, determine size of result set, do not use the NA group + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc > 0 and lc > 0: + count += lc * rc + + # exclude the NA group + left_pos = left_count[0] + right_pos = right_count[0] + + left_indexer = np.empty(count, dtype=np.int64) + right_indexer = np.empty(count, dtype=np.int64) + + with nogil: + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc > 0 and lc > 0: + for j in range(lc): + offset = position + j * rc + for k in range(rc): + left_indexer[offset + k] = left_pos + j + right_indexer[offset + k] = right_pos + k + position += lc * rc + left_pos += lc + right_pos += rc + + return (_get_result_indexer(left_sorter, left_indexer), + _get_result_indexer(right_sorter, right_indexer)) + + +@cython.boundscheck(False) +def left_outer_join(const int64_t[:] left, const int64_t[:] right, + Py_ssize_t max_groups, sort=True): + cdef: + Py_ssize_t i, j, k, count = 0 + ndarray[int64_t] left_count, right_count, left_sorter, right_sorter + ndarray rev + ndarray[int64_t] left_indexer, right_indexer + int64_t lc, rc + Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0 + Py_ssize_t offset + + # NA group in location 0 + + left_sorter, left_count = groupsort_indexer(left, max_groups) + right_sorter, right_count = groupsort_indexer(right, max_groups) + + with nogil: + # First pass, determine size of result set, do not use the NA group + for i in range(1, max_groups + 1): + if right_count[i] > 0: + count += left_count[i] * right_count[i] + else: + count += left_count[i] + + # exclude the NA group + left_pos = left_count[0] + right_pos = right_count[0] + + left_indexer = np.empty(count, dtype=np.int64) + right_indexer = np.empty(count, dtype=np.int64) + + with nogil: + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc == 0: + for j in range(lc): + left_indexer[position + j] = left_pos + j + right_indexer[position + j] = -1 + position += lc + else: + for j in range(lc): + offset = position + j * rc + for k in range(rc): + left_indexer[offset + k] = left_pos + j + right_indexer[offset + k] = right_pos + k + position += lc * rc + left_pos += lc + right_pos += rc + + left_indexer = _get_result_indexer(left_sorter, left_indexer) + right_indexer = _get_result_indexer(right_sorter, right_indexer) + + if not sort: # if not asked to sort, revert to original order + # cast to avoid build warning GH#26757 + if len(left) == len(left_indexer): + # no multiple matches for any row on the left + # this is a short-cut to avoid groupsort_indexer + # otherwise, the `else` path also works in this case + rev = np.empty(len(left), dtype=np.intp) + rev.put(ensure_platform_int(left_sorter), np.arange(len(left))) + else: + rev, _ = groupsort_indexer(left_indexer, len(left)) + + rev = ensure_platform_int(rev) + right_indexer = right_indexer.take(rev) + left_indexer = left_indexer.take(rev) + + return left_indexer, right_indexer + + +@cython.boundscheck(False) +def full_outer_join(const int64_t[:] left, const int64_t[:] right, + Py_ssize_t max_groups): + cdef: + Py_ssize_t i, j, k, count = 0 + ndarray[int64_t] left_count, right_count, left_sorter, right_sorter + ndarray[int64_t] left_indexer, right_indexer + int64_t lc, rc + int64_t left_pos = 0, right_pos = 0 + Py_ssize_t offset, position = 0 + + # NA group in location 0 + + left_sorter, left_count = groupsort_indexer(left, max_groups) + right_sorter, right_count = groupsort_indexer(right, max_groups) + + with nogil: + # First pass, determine size of result set, do not use the NA group + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc > 0 and lc > 0: + count += lc * rc + else: + count += lc + rc + + # exclude the NA group + left_pos = left_count[0] + right_pos = right_count[0] + + left_indexer = np.empty(count, dtype=np.int64) + right_indexer = np.empty(count, dtype=np.int64) + + with nogil: + for i in range(1, max_groups + 1): + lc = left_count[i] + rc = right_count[i] + + if rc == 0: + for j in range(lc): + left_indexer[position + j] = left_pos + j + right_indexer[position + j] = -1 + position += lc + elif lc == 0: + for j in range(rc): + left_indexer[position + j] = -1 + right_indexer[position + j] = right_pos + j + position += rc + else: + for j in range(lc): + offset = position + j * rc + for k in range(rc): + left_indexer[offset + k] = left_pos + j + right_indexer[offset + k] = right_pos + k + position += lc * rc + left_pos += lc + right_pos += rc + + return (_get_result_indexer(left_sorter, left_indexer), + _get_result_indexer(right_sorter, right_indexer)) + + +cdef _get_result_indexer(ndarray[int64_t] sorter, ndarray[int64_t] indexer): + if len(sorter) > 0: + # cython-only equivalent to + # `res = algos.take_nd(sorter, indexer, fill_value=-1)` + res = np.empty(len(indexer), dtype=np.int64) + take_1d_int64_int64(sorter, indexer, res, -1) + else: + # length-0 case + res = np.empty(len(indexer), dtype=np.int64) + res[:] = -1 + + return res + + +def ffill_indexer(const int64_t[:] indexer): + cdef: + Py_ssize_t i, n = len(indexer) + ndarray[int64_t] result + int64_t val, last_obs + + result = np.empty(n, dtype=np.int64) + last_obs = -1 + + for i in range(n): + val = indexer[i] + if val == -1: + result[i] = last_obs + else: + result[i] = val + last_obs = val + + return result + + +# ---------------------------------------------------------------------- +# left_join_indexer, inner_join_indexer, outer_join_indexer +# ---------------------------------------------------------------------- + +ctypedef fused join_t: + float64_t + float32_t + object + int32_t + int64_t + uint64_t + + +# Joins on ordered, unique indices + +# right might contain non-unique values + +@cython.wraparound(False) +@cython.boundscheck(False) +def left_join_indexer_unique(join_t[:] left, join_t[:] right): + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[int64_t] indexer + join_t lval, rval + + i = 0 + j = 0 + nleft = len(left) + nright = len(right) + + indexer = np.empty(nleft, dtype=np.int64) + while True: + if i == nleft: + break + + if j == nright: + indexer[i] = -1 + i += 1 + continue + + rval = right[j] + + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + + if left[i] == right[j]: + indexer[i] = j + i += 1 + while i < nleft - 1 and left[i] == rval: + indexer[i] = j + i += 1 + j += 1 + elif left[i] > rval: + indexer[i] = -1 + j += 1 + else: + indexer[i] = -1 + i += 1 + return indexer + + +@cython.wraparound(False) +@cython.boundscheck(False) +def left_join_indexer(ndarray[join_t] left, ndarray[join_t] right): + """ + Two-pass algorithm for monotonic indexes. Handles many-to-one merges. + """ + cdef: + Py_ssize_t i, j, k, nright, nleft, count + join_t lval, rval + ndarray[int64_t] lindexer, rindexer + ndarray[join_t] result + + nleft = len(left) + nright = len(right) + + i = 0 + j = 0 + count = 0 + if nleft > 0: + while i < nleft: + if j == nright: + count += nleft - i + break + + lval = left[i] + rval = right[j] + + if lval == rval: + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + count += 1 + i += 1 + else: + j += 1 + + # do it again now that result size is known + + lindexer = np.empty(count, dtype=np.int64) + rindexer = np.empty(count, dtype=np.int64) + result = np.empty(count, dtype=left.dtype) + + i = 0 + j = 0 + count = 0 + if nleft > 0: + while i < nleft: + if j == nright: + while i < nleft: + lindexer[count] = i + rindexer[count] = -1 + result[count] = left[i] + i += 1 + count += 1 + break + + lval = left[i] + rval = right[j] + + if lval == rval: + lindexer[count] = i + rindexer[count] = j + result[count] = lval + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + lindexer[count] = i + rindexer[count] = -1 + result[count] = left[i] + count += 1 + i += 1 + else: + j += 1 + + return result, lindexer, rindexer + + +@cython.wraparound(False) +@cython.boundscheck(False) +def inner_join_indexer(ndarray[join_t] left, ndarray[join_t] right): + """ + Two-pass algorithm for monotonic indexes. Handles many-to-one merges. + """ + cdef: + Py_ssize_t i, j, k, nright, nleft, count + join_t lval, rval + ndarray[int64_t] lindexer, rindexer + ndarray[join_t] result + + nleft = len(left) + nright = len(right) + + i = 0 + j = 0 + count = 0 + if nleft > 0 and nright > 0: + while True: + if i == nleft: + break + if j == nright: + break + + lval = left[i] + rval = right[j] + if lval == rval: + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + i += 1 + else: + j += 1 + + # do it again now that result size is known + + lindexer = np.empty(count, dtype=np.int64) + rindexer = np.empty(count, dtype=np.int64) + result = np.empty(count, dtype=left.dtype) + + i = 0 + j = 0 + count = 0 + if nleft > 0 and nright > 0: + while True: + if i == nleft: + break + if j == nright: + break + + lval = left[i] + rval = right[j] + if lval == rval: + lindexer[count] = i + rindexer[count] = j + result[count] = rval + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + i += 1 + else: + j += 1 + + return result, lindexer, rindexer + + +@cython.wraparound(False) +@cython.boundscheck(False) +def outer_join_indexer(ndarray[join_t] left, ndarray[join_t] right): + cdef: + Py_ssize_t i, j, nright, nleft, count + join_t lval, rval + ndarray[int64_t] lindexer, rindexer + ndarray[join_t] result + + nleft = len(left) + nright = len(right) + + i = 0 + j = 0 + count = 0 + if nleft == 0: + count = nright + elif nright == 0: + count = nleft + else: + while True: + if i == nleft: + count += nright - j + break + if j == nright: + count += nleft - i + break + + lval = left[i] + rval = right[j] + if lval == rval: + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + count += 1 + i += 1 + else: + count += 1 + j += 1 + + lindexer = np.empty(count, dtype=np.int64) + rindexer = np.empty(count, dtype=np.int64) + result = np.empty(count, dtype=left.dtype) + + # do it again, but populate the indexers / result + + i = 0 + j = 0 + count = 0 + if nleft == 0: + for j in range(nright): + lindexer[j] = -1 + rindexer[j] = j + result[j] = right[j] + elif nright == 0: + for i in range(nleft): + lindexer[i] = i + rindexer[i] = -1 + result[i] = left[i] + else: + while True: + if i == nleft: + while j < nright: + lindexer[count] = -1 + rindexer[count] = j + result[count] = right[j] + count += 1 + j += 1 + break + if j == nright: + while i < nleft: + lindexer[count] = i + rindexer[count] = -1 + result[count] = left[i] + count += 1 + i += 1 + break + + lval = left[i] + rval = right[j] + + if lval == rval: + lindexer[count] = i + rindexer[count] = j + result[count] = lval + count += 1 + if i < nleft - 1: + if j < nright - 1 and right[j + 1] == rval: + j += 1 + else: + i += 1 + if left[i] != rval: + j += 1 + elif j < nright - 1: + j += 1 + if lval != right[j]: + i += 1 + else: + # end of the road + break + elif lval < rval: + lindexer[count] = i + rindexer[count] = -1 + result[count] = lval + count += 1 + i += 1 + else: + lindexer[count] = -1 + rindexer[count] = j + result[count] = rval + count += 1 + j += 1 + + return result, lindexer, rindexer + + +# ---------------------------------------------------------------------- +# asof_join_by +# ---------------------------------------------------------------------- + +from pandas._libs.hashtable cimport ( + HashTable, PyObjectHashTable, UInt64HashTable, Int64HashTable) + +ctypedef fused asof_t: + uint8_t + uint16_t + uint32_t + uint64_t + int8_t + int16_t + int32_t + int64_t + float + float64_t + +ctypedef fused by_t: + object + int64_t + uint64_t + + +def asof_join_backward_on_X_by_Y(asof_t[:] left_values, + asof_t[:] right_values, + by_t[:] left_by_values, + by_t[:] right_by_values, + bint allow_exact_matches=1, + tolerance=None): + + cdef: + Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos + ndarray[int64_t] left_indexer, right_indexer + bint has_tolerance = 0 + asof_t tolerance_ = 0 + asof_t diff = 0 + HashTable hash_table + by_t by_value + + # if we are using tolerance, set our objects + if tolerance is not None: + has_tolerance = 1 + tolerance_ = tolerance + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.int64) + right_indexer = np.empty(left_size, dtype=np.int64) + + if by_t is object: + hash_table = PyObjectHashTable(right_size) + elif by_t is int64_t: + hash_table = Int64HashTable(right_size) + elif by_t is uint64_t: + hash_table = UInt64HashTable(right_size) + + right_pos = 0 + for left_pos in range(left_size): + # restart right_pos if it went negative in a previous iteration + if right_pos < 0: + right_pos = 0 + + # find last position in right whose value is less than left's + if allow_exact_matches: + while (right_pos < right_size and + right_values[right_pos] <= left_values[left_pos]): + hash_table.set_item(right_by_values[right_pos], right_pos) + right_pos += 1 + else: + while (right_pos < right_size and + right_values[right_pos] < left_values[left_pos]): + hash_table.set_item(right_by_values[right_pos], right_pos) + right_pos += 1 + right_pos -= 1 + + # save positions as the desired index + by_value = left_by_values[left_pos] + found_right_pos = (hash_table.get_item(by_value) + if by_value in hash_table else -1) + left_indexer[left_pos] = left_pos + right_indexer[left_pos] = found_right_pos + + # if needed, verify that tolerance is met + if has_tolerance and found_right_pos != -1: + diff = left_values[left_pos] - right_values[found_right_pos] + if diff > tolerance_: + right_indexer[left_pos] = -1 + + return left_indexer, right_indexer + + +def asof_join_forward_on_X_by_Y(asof_t[:] left_values, + asof_t[:] right_values, + by_t[:] left_by_values, + by_t[:] right_by_values, + bint allow_exact_matches=1, + tolerance=None): + + cdef: + Py_ssize_t left_pos, right_pos, left_size, right_size, found_right_pos + ndarray[int64_t] left_indexer, right_indexer + bint has_tolerance = 0 + asof_t tolerance_ = 0 + asof_t diff = 0 + HashTable hash_table + by_t by_value + + # if we are using tolerance, set our objects + if tolerance is not None: + has_tolerance = 1 + tolerance_ = tolerance + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.int64) + right_indexer = np.empty(left_size, dtype=np.int64) + + if by_t is object: + hash_table = PyObjectHashTable(right_size) + elif by_t is int64_t: + hash_table = Int64HashTable(right_size) + elif by_t is uint64_t: + hash_table = UInt64HashTable(right_size) + + right_pos = right_size - 1 + for left_pos in range(left_size - 1, -1, -1): + # restart right_pos if it went over in a previous iteration + if right_pos == right_size: + right_pos = right_size - 1 + + # find first position in right whose value is greater than left's + if allow_exact_matches: + while (right_pos >= 0 and + right_values[right_pos] >= left_values[left_pos]): + hash_table.set_item(right_by_values[right_pos], right_pos) + right_pos -= 1 + else: + while (right_pos >= 0 and + right_values[right_pos] > left_values[left_pos]): + hash_table.set_item(right_by_values[right_pos], right_pos) + right_pos -= 1 + right_pos += 1 + + # save positions as the desired index + by_value = left_by_values[left_pos] + found_right_pos = (hash_table.get_item(by_value) + if by_value in hash_table else -1) + left_indexer[left_pos] = left_pos + right_indexer[left_pos] = found_right_pos + + # if needed, verify that tolerance is met + if has_tolerance and found_right_pos != -1: + diff = right_values[found_right_pos] - left_values[left_pos] + if diff > tolerance_: + right_indexer[left_pos] = -1 + + return left_indexer, right_indexer + + +def asof_join_nearest_on_X_by_Y(asof_t[:] left_values, + asof_t[:] right_values, + by_t[:] left_by_values, + by_t[:] right_by_values, + bint allow_exact_matches=1, + tolerance=None): + + cdef: + Py_ssize_t left_size, right_size, i + ndarray[int64_t] left_indexer, right_indexer, bli, bri, fli, fri + asof_t bdiff, fdiff + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.int64) + right_indexer = np.empty(left_size, dtype=np.int64) + + # search both forward and backward + bli, bri = asof_join_backward_on_X_by_Y(left_values, + right_values, + left_by_values, + right_by_values, + allow_exact_matches, + tolerance) + fli, fri = asof_join_forward_on_X_by_Y(left_values, + right_values, + left_by_values, + right_by_values, + allow_exact_matches, + tolerance) + + for i in range(len(bri)): + # choose timestamp from right with smaller difference + if bri[i] != -1 and fri[i] != -1: + bdiff = left_values[bli[i]] - right_values[bri[i]] + fdiff = right_values[fri[i]] - left_values[fli[i]] + right_indexer[i] = bri[i] if bdiff <= fdiff else fri[i] + else: + right_indexer[i] = bri[i] if bri[i] != -1 else fri[i] + left_indexer[i] = bli[i] + + return left_indexer, right_indexer + + +# ---------------------------------------------------------------------- +# asof_join +# ---------------------------------------------------------------------- + +def asof_join_backward(asof_t[:] left_values, + asof_t[:] right_values, + bint allow_exact_matches=1, + tolerance=None): + + cdef: + Py_ssize_t left_pos, right_pos, left_size, right_size + ndarray[int64_t] left_indexer, right_indexer + bint has_tolerance = 0 + asof_t tolerance_ = 0 + asof_t diff = 0 + + # if we are using tolerance, set our objects + if tolerance is not None: + has_tolerance = 1 + tolerance_ = tolerance + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.int64) + right_indexer = np.empty(left_size, dtype=np.int64) + + right_pos = 0 + for left_pos in range(left_size): + # restart right_pos if it went negative in a previous iteration + if right_pos < 0: + right_pos = 0 + + # find last position in right whose value is less than left's + if allow_exact_matches: + while (right_pos < right_size and + right_values[right_pos] <= left_values[left_pos]): + right_pos += 1 + else: + while (right_pos < right_size and + right_values[right_pos] < left_values[left_pos]): + right_pos += 1 + right_pos -= 1 + + # save positions as the desired index + left_indexer[left_pos] = left_pos + right_indexer[left_pos] = right_pos + + # if needed, verify that tolerance is met + if has_tolerance and right_pos != -1: + diff = left_values[left_pos] - right_values[right_pos] + if diff > tolerance_: + right_indexer[left_pos] = -1 + + return left_indexer, right_indexer + + +def asof_join_forward(asof_t[:] left_values, + asof_t[:] right_values, + bint allow_exact_matches=1, + tolerance=None): + + cdef: + Py_ssize_t left_pos, right_pos, left_size, right_size + ndarray[int64_t] left_indexer, right_indexer + bint has_tolerance = 0 + asof_t tolerance_ = 0 + asof_t diff = 0 + + # if we are using tolerance, set our objects + if tolerance is not None: + has_tolerance = 1 + tolerance_ = tolerance + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.int64) + right_indexer = np.empty(left_size, dtype=np.int64) + + right_pos = right_size - 1 + for left_pos in range(left_size - 1, -1, -1): + # restart right_pos if it went over in a previous iteration + if right_pos == right_size: + right_pos = right_size - 1 + + # find first position in right whose value is greater than left's + if allow_exact_matches: + while (right_pos >= 0 and + right_values[right_pos] >= left_values[left_pos]): + right_pos -= 1 + else: + while (right_pos >= 0 and + right_values[right_pos] > left_values[left_pos]): + right_pos -= 1 + right_pos += 1 + + # save positions as the desired index + left_indexer[left_pos] = left_pos + right_indexer[left_pos] = (right_pos + if right_pos != right_size else -1) + + # if needed, verify that tolerance is met + if has_tolerance and right_pos != right_size: + diff = right_values[right_pos] - left_values[left_pos] + if diff > tolerance_: + right_indexer[left_pos] = -1 + + return left_indexer, right_indexer + + +def asof_join_nearest(asof_t[:] left_values, + asof_t[:] right_values, + bint allow_exact_matches=1, + tolerance=None): + + cdef: + Py_ssize_t left_size, right_size, i + ndarray[int64_t] left_indexer, right_indexer, bli, bri, fli, fri + asof_t bdiff, fdiff + + left_size = len(left_values) + right_size = len(right_values) + + left_indexer = np.empty(left_size, dtype=np.int64) + right_indexer = np.empty(left_size, dtype=np.int64) + + # search both forward and backward + bli, bri = asof_join_backward(left_values, right_values, + allow_exact_matches, tolerance) + fli, fri = asof_join_forward(left_values, right_values, + allow_exact_matches, tolerance) + + for i in range(len(bri)): + # choose timestamp from right with smaller difference + if bri[i] != -1 and fri[i] != -1: + bdiff = left_values[bli[i]] - right_values[bri[i]] + fdiff = right_values[fri[i]] - left_values[fli[i]] + right_indexer[i] = bri[i] if bdiff <= fdiff else fri[i] + else: + right_indexer[i] = bri[i] if bri[i] != -1 else fri[i] + left_indexer[i] = bli[i] + + return left_indexer, right_indexer diff --git a/pandas/_libs/khash.pxd b/pandas/_libs/khash.pxd new file mode 100644 index 00000000..ca3b8385 --- /dev/null +++ b/pandas/_libs/khash.pxd @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- +from cpython.object cimport PyObject +from numpy cimport int64_t, uint64_t, int32_t, uint32_t, float64_t + +cdef extern from "khash_python.h": + ctypedef uint32_t khint_t + ctypedef khint_t khiter_t + + ctypedef struct kh_pymap_t: + khint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + PyObject **keys + size_t *vals + + kh_pymap_t* kh_init_pymap() + void kh_destroy_pymap(kh_pymap_t*) + void kh_clear_pymap(kh_pymap_t*) + khint_t kh_get_pymap(kh_pymap_t*, PyObject*) + void kh_resize_pymap(kh_pymap_t*, khint_t) + khint_t kh_put_pymap(kh_pymap_t*, PyObject*, int*) + void kh_del_pymap(kh_pymap_t*, khint_t) + + bint kh_exist_pymap(kh_pymap_t*, khiter_t) + + ctypedef struct kh_pyset_t: + khint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + PyObject **keys + size_t *vals + + kh_pyset_t* kh_init_pyset() + void kh_destroy_pyset(kh_pyset_t*) + void kh_clear_pyset(kh_pyset_t*) + khint_t kh_get_pyset(kh_pyset_t*, PyObject*) + void kh_resize_pyset(kh_pyset_t*, khint_t) + khint_t kh_put_pyset(kh_pyset_t*, PyObject*, int*) + void kh_del_pyset(kh_pyset_t*, khint_t) + + bint kh_exist_pyset(kh_pyset_t*, khiter_t) + + ctypedef char* kh_cstr_t + + ctypedef struct kh_str_t: + khint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + kh_cstr_t *keys + size_t *vals + + kh_str_t* kh_init_str() nogil + void kh_destroy_str(kh_str_t*) nogil + void kh_clear_str(kh_str_t*) nogil + khint_t kh_get_str(kh_str_t*, kh_cstr_t) nogil + void kh_resize_str(kh_str_t*, khint_t) nogil + khint_t kh_put_str(kh_str_t*, kh_cstr_t, int*) nogil + void kh_del_str(kh_str_t*, khint_t) nogil + + bint kh_exist_str(kh_str_t*, khiter_t) nogil + + ctypedef struct kh_str_starts_t: + kh_str_t *table + int starts[256] + + kh_str_starts_t* kh_init_str_starts() nogil + khint_t kh_put_str_starts_item(kh_str_starts_t* table, char* key, + int* ret) nogil + khint_t kh_get_str_starts_item(kh_str_starts_t* table, char* key) nogil + void kh_destroy_str_starts(kh_str_starts_t*) nogil + void kh_resize_str_starts(kh_str_starts_t*, khint_t) nogil + + ctypedef struct kh_int64_t: + khint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + int64_t *keys + size_t *vals + + kh_int64_t* kh_init_int64() nogil + void kh_destroy_int64(kh_int64_t*) nogil + void kh_clear_int64(kh_int64_t*) nogil + khint_t kh_get_int64(kh_int64_t*, int64_t) nogil + void kh_resize_int64(kh_int64_t*, khint_t) nogil + khint_t kh_put_int64(kh_int64_t*, int64_t, int*) nogil + void kh_del_int64(kh_int64_t*, khint_t) nogil + + bint kh_exist_int64(kh_int64_t*, khiter_t) nogil + + ctypedef uint64_t khuint64_t + + ctypedef struct kh_uint64_t: + khint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + khuint64_t *keys + size_t *vals + + kh_uint64_t* kh_init_uint64() nogil + void kh_destroy_uint64(kh_uint64_t*) nogil + void kh_clear_uint64(kh_uint64_t*) nogil + khint_t kh_get_uint64(kh_uint64_t*, uint64_t) nogil + void kh_resize_uint64(kh_uint64_t*, khint_t) nogil + khint_t kh_put_uint64(kh_uint64_t*, uint64_t, int*) nogil + void kh_del_uint64(kh_uint64_t*, khint_t) nogil + + bint kh_exist_uint64(kh_uint64_t*, khiter_t) nogil + + ctypedef struct kh_float64_t: + khint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + float64_t *keys + size_t *vals + + kh_float64_t* kh_init_float64() nogil + void kh_destroy_float64(kh_float64_t*) nogil + void kh_clear_float64(kh_float64_t*) nogil + khint_t kh_get_float64(kh_float64_t*, float64_t) nogil + void kh_resize_float64(kh_float64_t*, khint_t) nogil + khint_t kh_put_float64(kh_float64_t*, float64_t, int*) nogil + void kh_del_float64(kh_float64_t*, khint_t) nogil + + bint kh_exist_float64(kh_float64_t*, khiter_t) nogil + + ctypedef struct kh_int32_t: + khint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + int32_t *keys + size_t *vals + + kh_int32_t* kh_init_int32() nogil + void kh_destroy_int32(kh_int32_t*) nogil + void kh_clear_int32(kh_int32_t*) nogil + khint_t kh_get_int32(kh_int32_t*, int32_t) nogil + void kh_resize_int32(kh_int32_t*, khint_t) nogil + khint_t kh_put_int32(kh_int32_t*, int32_t, int*) nogil + void kh_del_int32(kh_int32_t*, khint_t) nogil + + bint kh_exist_int32(kh_int32_t*, khiter_t) nogil + + # sweep factorize + + ctypedef struct kh_strbox_t: + khint_t n_buckets, size, n_occupied, upper_bound + uint32_t *flags + kh_cstr_t *keys + PyObject **vals + + kh_strbox_t* kh_init_strbox() nogil + void kh_destroy_strbox(kh_strbox_t*) nogil + void kh_clear_strbox(kh_strbox_t*) nogil + khint_t kh_get_strbox(kh_strbox_t*, kh_cstr_t) nogil + void kh_resize_strbox(kh_strbox_t*, khint_t) nogil + khint_t kh_put_strbox(kh_strbox_t*, kh_cstr_t, int*) nogil + void kh_del_strbox(kh_strbox_t*, khint_t) nogil + + bint kh_exist_strbox(kh_strbox_t*, khiter_t) nogil diff --git a/pandas/_libs/lib.pxd b/pandas/_libs/lib.pxd new file mode 100644 index 00000000..12aca9da --- /dev/null +++ b/pandas/_libs/lib.pxd @@ -0,0 +1 @@ +cdef bint c_is_list_like(object, bint) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx new file mode 100644 index 00000000..87d9394b --- /dev/null +++ b/pandas/_libs/lib.pyx @@ -0,0 +1,2475 @@ +from collections import abc +from decimal import Decimal +from fractions import Fraction +from numbers import Number + +import sys + +import cython +from cython import Py_ssize_t + +from cpython.object cimport PyObject_RichCompareBool, Py_EQ +from cpython.ref cimport Py_INCREF +from cpython.tuple cimport PyTuple_SET_ITEM, PyTuple_New + +from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, + PyTime_Check, PyDelta_Check, + PyDateTime_IMPORT) +PyDateTime_IMPORT + +import numpy as np +cimport numpy as cnp +from numpy cimport (ndarray, PyArray_Check, PyArray_GETITEM, + PyArray_ITER_DATA, PyArray_ITER_NEXT, PyArray_IterNew, + flatiter, NPY_OBJECT, + int64_t, float32_t, float64_t, + uint8_t, uint64_t, complex128_t) +cnp.import_array() + +cdef extern from "numpy/arrayobject.h": + # cython's numpy.dtype specification is incorrect, which leads to + # errors in issubclass(self.dtype.type, np.bool_), so we directly + # include the correct version + # https://github.com/cython/cython/issues/2022 + + ctypedef class numpy.dtype [object PyArray_Descr]: + # Use PyDataType_* macros when possible, however there are no macros + # for accessing some of the fields, so some are defined. Please + # ask on cython-dev if you need more. + cdef: + int type_num + int itemsize "elsize" + char byteorder + object fields + tuple names + + +cdef extern from "src/parse_helper.h": + int floatify(object, float64_t *result, int *maybe_int) except -1 + +cimport pandas._libs.util as util +from pandas._libs.util cimport is_nan, UINT64_MAX, INT64_MAX, INT64_MIN + +from pandas._libs.tslib import array_to_datetime +from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT +from pandas._libs.tslibs.conversion cimport convert_to_tsobject +from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64 +from pandas._libs.tslibs.timezones cimport get_timezone, tz_compare + +from pandas._libs.missing cimport ( + checknull, isnaobj, is_null_datetime64, is_null_timedelta64, is_null_period, C_NA +) + + +# constants that will be compared to potentially arbitrarily large +# python int +cdef: + object oINT64_MAX = INT64_MAX + object oINT64_MIN = INT64_MIN + object oUINT64_MAX = UINT64_MAX + + float64_t NaN = np.NaN + + +def values_from_object(obj: object): + """ + Return my values or the object if we are say an ndarray. + """ + func: object + + if getattr(obj, '_typ', '') == 'dataframe': + return obj.values + + func = getattr(obj, '_internal_get_values', None) + if func is not None: + obj = func() + + return obj + + +@cython.wraparound(False) +@cython.boundscheck(False) +def memory_usage_of_objects(arr: object[:]) -> int64_t: + """ + Return the memory usage of an object array in bytes. + + Does not include the actual bytes of the pointers + """ + i: Py_ssize_t + n: Py_ssize_t + size: int64_t + + size = 0 + n = len(arr) + for i in range(n): + size += arr[i].__sizeof__() + return size + + +# ---------------------------------------------------------------------- + + +def is_scalar(val: object) -> bool: + """ + Parameters + ---------- + val : object + This includes: + + - numpy array scalar (e.g. np.int64) + - Python builtin numerics + - Python builtin byte arrays and strings + - None + - datetime.datetime + - datetime.timedelta + - Period + - decimal.Decimal + - Interval + - DateOffset + - Fraction + - Number. + + Returns + ------- + bool + Return True if given object is scalar. + + Examples + -------- + >>> dt = datetime.datetime(2018, 10, 3) + >>> pd.api.types.is_scalar(dt) + True + + >>> pd.api.types.is_scalar([2, 3]) + False + + >>> pd.api.types.is_scalar({0: 1, 2: 3}) + False + + >>> pd.api.types.is_scalar((0, 2)) + False + + pandas supports PEP 3141 numbers: + + >>> from fractions import Fraction + >>> pd.api.types.is_scalar(Fraction(3, 5)) + True + """ + + return (cnp.PyArray_IsAnyScalar(val) + # PyArray_IsAnyScalar is always False for bytearrays on Py3 + or PyDate_Check(val) + or PyDelta_Check(val) + or PyTime_Check(val) + # We differ from numpy, which claims that None is not scalar; + # see np.isscalar + or val is C_NA + or val is None + or isinstance(val, (Fraction, Number)) + or util.is_period_object(val) + or is_decimal(val) + or is_interval(val) + or util.is_offset_object(val)) + + +def item_from_zerodim(val: object) -> object: + """ + If the value is a zerodim array, return the item it contains. + + Parameters + ---------- + val : object + + Returns + ------- + object + + Examples + -------- + >>> item_from_zerodim(1) + 1 + >>> item_from_zerodim('foobar') + 'foobar' + >>> item_from_zerodim(np.array(1)) + 1 + >>> item_from_zerodim(np.array([1])) + array([1]) + """ + if cnp.PyArray_IsZeroDim(val): + return cnp.PyArray_ToScalar(cnp.PyArray_DATA(val), val) + return val + + +@cython.wraparound(False) +@cython.boundscheck(False) +def fast_unique_multiple(list arrays, sort: bool=True): + """ + Generate a list of unique values from a list of arrays. + + Parameters + ---------- + list : array-like + List of array-like objects. + sort : bool + Whether or not to sort the resulting unique list. + + Returns + ------- + list of unique values + """ + cdef: + ndarray[object] buf + Py_ssize_t k = len(arrays) + Py_ssize_t i, j, n + list uniques = [] + dict table = {} + object val, stub = 0 + + for i in range(k): + buf = arrays[i] + n = len(buf) + for j in range(n): + val = buf[j] + if val not in table: + table[val] = stub + uniques.append(val) + if sort is None: + try: + uniques.sort() + except TypeError: + # TODO: RuntimeWarning? + pass + + return uniques + + +@cython.wraparound(False) +@cython.boundscheck(False) +def fast_unique_multiple_list(lists: list, sort: bool=True) -> list: + cdef: + list buf + Py_ssize_t k = len(lists) + Py_ssize_t i, j, n + list uniques = [] + dict table = {} + object val, stub = 0 + + for i in range(k): + buf = lists[i] + n = len(buf) + for j in range(n): + val = buf[j] + if val not in table: + table[val] = stub + uniques.append(val) + if sort: + try: + uniques.sort() + except TypeError: + pass + + return uniques + + +@cython.wraparound(False) +@cython.boundscheck(False) +def fast_unique_multiple_list_gen(object gen, bint sort=True): + """ + Generate a list of unique values from a generator of lists. + + Parameters + ---------- + gen : generator object + Generator of lists from which the unique list is created. + sort : bool + Whether or not to sort the resulting unique list. + + Returns + ------- + list of unique values + """ + cdef: + list buf + Py_ssize_t j, n + list uniques = [] + dict table = {} + object val, stub = 0 + + for buf in gen: + n = len(buf) + for j in range(n): + val = buf[j] + if val not in table: + table[val] = stub + uniques.append(val) + if sort: + try: + uniques.sort() + except TypeError: + pass + + return uniques + + +@cython.wraparound(False) +@cython.boundscheck(False) +def dicts_to_array(dicts: list, columns: list): + cdef: + Py_ssize_t i, j, k, n + ndarray[object, ndim=2] result + dict row + object col, onan = np.nan + + k = len(columns) + n = len(dicts) + + result = np.empty((n, k), dtype='O') + + for i in range(n): + row = dicts[i] + for j in range(k): + col = columns[j] + if col in row: + result[i, j] = row[col] + else: + result[i, j] = onan + + return result + + +def fast_zip(list ndarrays): + """ + For zipping multiple ndarrays into an ndarray of tuples. + """ + cdef: + Py_ssize_t i, j, k, n + ndarray[object] result + flatiter it + object val, tup + + k = len(ndarrays) + n = len(ndarrays[0]) + + result = np.empty(n, dtype=object) + + # initialize tuples on first pass + arr = ndarrays[0] + it = PyArray_IterNew(arr) + for i in range(n): + val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) + tup = PyTuple_New(k) + + PyTuple_SET_ITEM(tup, 0, val) + Py_INCREF(val) + result[i] = tup + PyArray_ITER_NEXT(it) + + for j in range(1, k): + arr = ndarrays[j] + it = PyArray_IterNew(arr) + if len(arr) != n: + raise ValueError("all arrays must be same length") + + for i in range(n): + val = PyArray_GETITEM(arr, PyArray_ITER_DATA(it)) + PyTuple_SET_ITEM(result[i], j, val) + Py_INCREF(val) + PyArray_ITER_NEXT(it) + + return result + + +def get_reverse_indexer(const int64_t[:] indexer, Py_ssize_t length): + """ + Reverse indexing operation. + + Given `indexer`, make `indexer_inv` of it, such that:: + + indexer_inv[indexer[x]] = x + + .. note:: If indexer is not unique, only first occurrence is accounted. + """ + cdef: + Py_ssize_t i, n = len(indexer) + ndarray[int64_t] rev_indexer + int64_t idx + + rev_indexer = np.empty(length, dtype=np.int64) + rev_indexer[:] = -1 + for i in range(n): + idx = indexer[i] + if idx != -1: + rev_indexer[idx] = i + + return rev_indexer + + +@cython.wraparound(False) +@cython.boundscheck(False) +def has_infs_f4(const float32_t[:] arr) -> bool: + cdef: + Py_ssize_t i, n = len(arr) + float32_t inf, neginf, val + + inf = np.inf + neginf = -inf + + for i in range(n): + val = arr[i] + if val == inf or val == neginf: + return True + return False + + +@cython.wraparound(False) +@cython.boundscheck(False) +def has_infs_f8(const float64_t[:] arr) -> bool: + cdef: + Py_ssize_t i, n = len(arr) + float64_t inf, neginf, val + + inf = np.inf + neginf = -inf + + for i in range(n): + val = arr[i] + if val == inf or val == neginf: + return True + return False + + +def maybe_indices_to_slice(ndarray[int64_t] indices, int max_len): + cdef: + Py_ssize_t i, n = len(indices) + int k, vstart, vlast, v + + if n == 0: + return slice(0, 0) + + vstart = indices[0] + if vstart < 0 or max_len <= vstart: + return indices + + if n == 1: + return slice(vstart, vstart + 1) + + vlast = indices[n - 1] + if vlast < 0 or max_len <= vlast: + return indices + + k = indices[1] - indices[0] + if k == 0: + return indices + else: + for i in range(2, n): + v = indices[i] + if v - indices[i - 1] != k: + return indices + + if k > 0: + return slice(vstart, vlast + 1, k) + else: + if vlast == 0: + return slice(vstart, None, k) + else: + return slice(vstart, vlast - 1, k) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def maybe_booleans_to_slice(ndarray[uint8_t] mask): + cdef: + Py_ssize_t i, n = len(mask) + Py_ssize_t start = 0, end = 0 + bint started = 0, finished = 0 + + for i in range(n): + if mask[i]: + if finished: + return mask.view(np.bool_) + if not started: + started = 1 + start = i + else: + if finished: + continue + + if started: + end = i + finished = 1 + + if not started: + return slice(0, 0) + if not finished: + return slice(start, None) + else: + return slice(start, end) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def array_equivalent_object(left: object[:], right: object[:]) -> bool: + """ + Perform an element by element comparison on 1-d object arrays + taking into account nan positions. + """ + cdef: + Py_ssize_t i, n = left.shape[0] + object x, y + + for i in range(n): + x = left[i] + y = right[i] + + # we are either not equal or both nan + # I think None == None will be true here + try: + if PyArray_Check(x) and PyArray_Check(y): + if not array_equivalent_object(x, y): + return False + elif (x is C_NA) ^ (y is C_NA): + return False + elif not (PyObject_RichCompareBool(x, y, Py_EQ) or + (x is None or is_nan(x)) and (y is None or is_nan(y))): + return False + except TypeError as err: + # Avoid raising TypeError on tzawareness mismatch + # TODO: This try/except can be removed if/when Timestamp + # comparisons are change dto match datetime, see GH#28507 + if "tz-naive and tz-aware" in str(err): + return False + raise + + return True + + +@cython.wraparound(False) +@cython.boundscheck(False) +def astype_intsafe(ndarray[object] arr, new_dtype): + cdef: + Py_ssize_t i, n = len(arr) + object val + bint is_datelike + ndarray result + + is_datelike = new_dtype == 'm8[ns]' + result = np.empty(n, dtype=new_dtype) + for i in range(n): + val = arr[i] + if is_datelike and checknull(val): + result[i] = NPY_NAT + else: + result[i] = val + + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +def astype_str(arr: ndarray, skipna: bool=False) -> ndarray[object]: + """ + Convert all elements in an array to string. + + Parameters + ---------- + arr : ndarray + The array whose elements we are casting. + skipna : bool, default False + Whether or not to coerce nulls to their stringified form + (e.g. NaN becomes 'nan'). + + Returns + ------- + ndarray + A new array with the input array's elements casted. + """ + cdef: + object arr_i + Py_ssize_t i, n = arr.size + ndarray[object] result = np.empty(n, dtype=object) + + for i in range(n): + arr_i = arr[i] + + if not (skipna and checknull(arr_i)): + arr_i = str(arr_i) + + result[i] = arr_i + + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +def clean_index_list(obj: list): + """ + Utility used in ``pandas.core.indexes.api.ensure_index``. + """ + cdef: + Py_ssize_t i, n = len(obj) + object val + bint all_arrays = 1 + + for i in range(n): + val = obj[i] + if not (isinstance(val, list) or + util.is_array(val) or hasattr(val, '_data')): + all_arrays = 0 + break + + if all_arrays: + return obj, all_arrays + + # don't force numpy coerce with nan's + inferred = infer_dtype(obj, skipna=False) + if inferred in ['string', 'bytes', 'mixed', 'mixed-integer']: + return np.asarray(obj, dtype=object), 0 + elif inferred in ['integer']: + # TODO: we infer an integer but it *could* be a uint64 + try: + return np.asarray(obj, dtype='int64'), 0 + except OverflowError: + return np.asarray(obj, dtype='object'), 0 + + return np.asarray(obj), 0 + + +# ------------------------------------------------------------------------------ +# Groupby-related functions + +# TODO: could do even better if we know something about the data. eg, index has +# 1-min data, binner has 5-min data, then bins are just strides in index. This +# is a general, O(max(len(values), len(binner))) method. +@cython.boundscheck(False) +@cython.wraparound(False) +def generate_bins_dt64(ndarray[int64_t] values, const int64_t[:] binner, + object closed='left', bint hasnans=0): + """ + Int64 (datetime64) version of generic python version in ``groupby.py``. + """ + cdef: + Py_ssize_t lenidx, lenbin, i, j, bc, vc + ndarray[int64_t] bins + int64_t l_bin, r_bin, nat_count + bint right_closed = closed == 'right' + + nat_count = 0 + if hasnans: + mask = values == NPY_NAT + nat_count = np.sum(mask) + values = values[~mask] + + lenidx = len(values) + lenbin = len(binner) + + if lenidx <= 0 or lenbin <= 0: + raise ValueError("Invalid length for values or for binner") + + # check binner fits data + if values[0] < binner[0]: + raise ValueError("Values falls before first bin") + + if values[lenidx - 1] > binner[lenbin - 1]: + raise ValueError("Values falls after last bin") + + bins = np.empty(lenbin - 1, dtype=np.int64) + + j = 0 # index into values + bc = 0 # bin count + + # linear scan + if right_closed: + for i in range(0, lenbin - 1): + r_bin = binner[i + 1] + # count values in current bin, advance to next bin + while j < lenidx and values[j] <= r_bin: + j += 1 + bins[bc] = j + bc += 1 + else: + for i in range(0, lenbin - 1): + r_bin = binner[i + 1] + # count values in current bin, advance to next bin + while j < lenidx and values[j] < r_bin: + j += 1 + bins[bc] = j + bc += 1 + + if nat_count > 0: + # shift bins by the number of NaT + bins = bins + nat_count + bins = np.insert(bins, 0, nat_count) + + return bins + + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_level_sorter(const int64_t[:] label, const int64_t[:] starts): + """ + Argsort for a single level of a multi-index, keeping the order of higher + levels unchanged. `starts` points to starts of same-key indices w.r.t + to leading levels; equivalent to: + np.hstack([label[starts[i]:starts[i+1]].argsort(kind='mergesort') + + starts[i] for i in range(len(starts) - 1)]) + """ + cdef: + int64_t l, r + Py_ssize_t i + ndarray[int64_t, ndim=1] out = np.empty(len(label), dtype=np.int64) + ndarray[int64_t, ndim=1] label_arr = np.asarray(label) + + for i in range(len(starts) - 1): + l, r = starts[i], starts[i + 1] + out[l:r] = l + label_arr[l:r].argsort(kind='mergesort') + + return out + + +@cython.boundscheck(False) +@cython.wraparound(False) +def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, + const int64_t[:] labels, + Py_ssize_t max_bin, + int axis): + cdef: + Py_ssize_t i, j, k, n + ndarray[int64_t, ndim=2] counts + + assert (axis == 0 or axis == 1) + n, k = (mask).shape + + if axis == 0: + counts = np.zeros((max_bin, k), dtype='i8') + with nogil: + for i in range(n): + for j in range(k): + counts[labels[i], j] += mask[i, j] + + else: # axis == 1 + counts = np.zeros((n, max_bin), dtype='i8') + with nogil: + for i in range(n): + for j in range(k): + counts[i, labels[j]] += mask[i, j] + + return counts + + +def generate_slices(const int64_t[:] labels, Py_ssize_t ngroups): + cdef: + Py_ssize_t i, group_size, n, start + int64_t lab + object slobj + ndarray[int64_t] starts, ends + + n = len(labels) + + starts = np.zeros(ngroups, dtype=np.int64) + ends = np.zeros(ngroups, dtype=np.int64) + + start = 0 + group_size = 0 + for i in range(n): + lab = labels[i] + if lab < 0: + start += 1 + else: + group_size += 1 + if i == n - 1 or lab != labels[i + 1]: + starts[lab] = start + ends[lab] = start + group_size + start += group_size + group_size = 0 + + return starts, ends + + +def indices_fast(ndarray index, const int64_t[:] labels, list keys, + list sorted_labels): + """ + Parameters + ---------- + index : ndarray + labels : ndarray[int64] + keys : list + sorted_labels : list[ndarray[int64]] + """ + cdef: + Py_ssize_t i, j, k, lab, cur, start, n = len(labels) + dict result = {} + object tup + + k = len(keys) + + if n == 0: + return result + + start = 0 + cur = labels[0] + for i in range(1, n): + lab = labels[i] + + if lab != cur: + if lab != -1: + tup = PyTuple_New(k) + for j in range(k): + val = keys[j][sorted_labels[j][i - 1]] + PyTuple_SET_ITEM(tup, j, val) + Py_INCREF(val) + + result[tup] = index[start:i] + start = i + cur = lab + + tup = PyTuple_New(k) + for j in range(k): + val = keys[j][sorted_labels[j][n - 1]] + PyTuple_SET_ITEM(tup, j, val) + Py_INCREF(val) + result[tup] = index[start:] + + return result + + +# core.common import for fast inference checks + +def is_float(obj: object) -> bool: + """ + Returns + ------- + bool + """ + return util.is_float_object(obj) + + +def is_integer(obj: object) -> bool: + """ + Returns + ------- + bool + """ + return util.is_integer_object(obj) + + +def is_bool(obj: object) -> bool: + """ + Returns + ------- + bool + """ + return util.is_bool_object(obj) + + +def is_complex(obj: object) -> bool: + """ + Returns + ------- + bool + """ + return util.is_complex_object(obj) + + +cpdef bint is_decimal(object obj): + return isinstance(obj, Decimal) + + +cpdef bint is_interval(object obj): + return getattr(obj, '_typ', '_typ') == 'interval' + + +def is_period(val: object) -> bool: + """ + Return a boolean if this is a Period object. + + Returns + ------- + bool + """ + return util.is_period_object(val) + + +def is_list_like(obj: object, allow_sets: bool = True) -> bool: + """ + Check if the object is list-like. + + Objects that are considered list-like are for example Python + lists, tuples, sets, NumPy arrays, and Pandas Series. + + Strings and datetime objects, however, are not considered list-like. + + Parameters + ---------- + obj : object + Object to check. + allow_sets : bool, default True + If this parameter is False, sets will not be considered list-like. + + .. versionadded:: 0.24.0 + + Returns + ------- + bool + Whether `obj` has list-like properties. + + Examples + -------- + >>> is_list_like([1, 2, 3]) + True + >>> is_list_like({1, 2, 3}) + True + >>> is_list_like(datetime(2017, 1, 1)) + False + >>> is_list_like("foo") + False + >>> is_list_like(1) + False + >>> is_list_like(np.array([2])) + True + >>> is_list_like(np.array(2))) + False + """ + return c_is_list_like(obj, allow_sets) + + +cdef inline bint c_is_list_like(object obj, bint allow_sets): + return ( + isinstance(obj, abc.Iterable) + # we do not count strings/unicode/bytes as list-like + and not isinstance(obj, (str, bytes)) + # exclude zero-dimensional numpy arrays, effectively scalars + and not (util.is_array(obj) and obj.ndim == 0) + # exclude sets if allow_sets is False + and not (allow_sets is False and isinstance(obj, abc.Set)) + ) + + +_TYPE_MAP = { + 'categorical': 'categorical', + 'category': 'categorical', + 'int8': 'integer', + 'int16': 'integer', + 'int32': 'integer', + 'int64': 'integer', + 'i': 'integer', + 'uint8': 'integer', + 'uint16': 'integer', + 'uint32': 'integer', + 'uint64': 'integer', + 'u': 'integer', + 'float32': 'floating', + 'float64': 'floating', + 'f': 'floating', + 'complex64': 'complex', + 'complex128': 'complex', + 'c': 'complex', + 'string': 'string', + 'S': 'bytes', + 'U': 'string', + 'bool': 'boolean', + 'b': 'boolean', + 'datetime64[ns]': 'datetime64', + 'M': 'datetime64', + 'timedelta64[ns]': 'timedelta64', + 'm': 'timedelta64', + 'interval': 'interval', +} + +# types only exist on certain platform +try: + np.float128 + _TYPE_MAP['float128'] = 'floating' +except AttributeError: + pass +try: + np.complex256 + _TYPE_MAP['complex256'] = 'complex' +except AttributeError: + pass +try: + np.float16 + _TYPE_MAP['float16'] = 'floating' +except AttributeError: + pass + + +cdef class Seen: + """ + Class for keeping track of the types of elements + encountered when trying to perform type conversions. + """ + + cdef: + bint int_ # seen_int + bint nat_ # seen nat + bint bool_ # seen_bool + bint null_ # seen_null + bint nan_ # seen_np.nan + bint uint_ # seen_uint (unsigned integer) + bint sint_ # seen_sint (signed integer) + bint float_ # seen_float + bint object_ # seen_object + bint complex_ # seen_complex + bint datetime_ # seen_datetime + bint coerce_numeric # coerce data to numeric + bint timedelta_ # seen_timedelta + bint datetimetz_ # seen_datetimetz + + def __cinit__(self, bint coerce_numeric=0): + """ + Initialize a Seen instance. + + Parameters + ---------- + coerce_numeric : bint, default 0 + Whether or not to force conversion to a numeric data type if + initial methods to convert to numeric fail. + """ + self.int_ = 0 + self.nat_ = 0 + self.bool_ = 0 + self.null_ = 0 + self.nan_ = 0 + self.uint_ = 0 + self.sint_ = 0 + self.float_ = 0 + self.object_ = 0 + self.complex_ = 0 + self.datetime_ = 0 + self.timedelta_ = 0 + self.datetimetz_ = 0 + self.coerce_numeric = coerce_numeric + + cdef inline bint check_uint64_conflict(self) except -1: + """ + Check whether we can safely convert a uint64 array to a numeric dtype. + + There are two cases when conversion to numeric dtype with a uint64 + array is not safe (and will therefore not be performed) + + 1) A NaN element is encountered. + + uint64 cannot be safely cast to float64 due to truncation issues + at the extreme ends of the range. + + 2) A negative number is encountered. + + There is no numerical dtype that can hold both negative numbers + and numbers greater than INT64_MAX. Hence, at least one number + will be improperly cast if we convert to a numeric dtype. + + Returns + ------- + bool + Whether or not we should return the original input array to avoid + data truncation. + + Raises + ------ + ValueError + uint64 elements were detected, and at least one of the + two conflict cases was also detected. However, we are + trying to force conversion to a numeric dtype. + """ + return (self.uint_ and (self.null_ or self.sint_) + and not self.coerce_numeric) + + cdef inline saw_null(self): + """ + Set flags indicating that a null value was encountered. + """ + self.null_ = 1 + self.float_ = 1 + + cdef saw_int(self, object val): + """ + Set flags indicating that an integer value was encountered. + + In addition to setting a flag that an integer was seen, we + also set two flags depending on the type of integer seen: + + 1) sint_ : a negative (signed) number in the + range of [-2**63, 0) was encountered + 2) uint_ : a positive number in the range of + [2**63, 2**64) was encountered + + Parameters + ---------- + val : Python int + Value with which to set the flags. + """ + self.int_ = 1 + self.sint_ = self.sint_ or (oINT64_MIN <= val < 0) + self.uint_ = self.uint_ or (oINT64_MAX < val <= oUINT64_MAX) + + @property + def numeric_(self): + return self.complex_ or self.float_ or self.int_ + + @property + def is_bool(self): + return not (self.datetime_ or self.numeric_ or self.timedelta_ + or self.nat_) + + @property + def is_float_or_complex(self): + return not (self.bool_ or self.datetime_ or self.timedelta_ + or self.nat_) + + +cdef _try_infer_map(v): + """ + If its in our map, just return the dtype. + """ + cdef: + object attr, val + for attr in ['name', 'kind', 'base']: + val = getattr(v.dtype, attr) + if val in _TYPE_MAP: + return _TYPE_MAP[val] + return None + + +def infer_dtype(value: object, skipna: bool = True) -> str: + """ + Efficiently infer the type of a passed val, or list-like + array of values. Return a string describing the type. + + Parameters + ---------- + value : scalar, list, ndarray, or pandas type + skipna : bool, default True + Ignore NaN values when inferring the type. + + .. versionadded:: 0.21.0 + + Returns + ------- + str + Describing the common type of the input data. + Results can include: + + - string + - bytes + - floating + - integer + - mixed-integer + - mixed-integer-float + - decimal + - complex + - categorical + - boolean + - datetime64 + - datetime + - date + - timedelta64 + - timedelta + - time + - period + - mixed + + Raises + ------ + TypeError + If ndarray-like but cannot infer the dtype + + Notes + ----- + - 'mixed' is the catchall for anything that is not otherwise + specialized + - 'mixed-integer-float' are floats and integers + - 'mixed-integer' are integers mixed with non-integers + + Examples + -------- + >>> infer_dtype(['foo', 'bar']) + 'string' + + >>> infer_dtype(['a', np.nan, 'b'], skipna=True) + 'string' + + >>> infer_dtype(['a', np.nan, 'b'], skipna=False) + 'mixed' + + >>> infer_dtype([b'foo', b'bar']) + 'bytes' + + >>> infer_dtype([1, 2, 3]) + 'integer' + + >>> infer_dtype([1, 2, 3.5]) + 'mixed-integer-float' + + >>> infer_dtype([1.0, 2.0, 3.5]) + 'floating' + + >>> infer_dtype(['a', 1]) + 'mixed-integer' + + >>> infer_dtype([Decimal(1), Decimal(2.0)]) + 'decimal' + + >>> infer_dtype([True, False]) + 'boolean' + + >>> infer_dtype([True, False, np.nan]) + 'mixed' + + >>> infer_dtype([pd.Timestamp('20130101')]) + 'datetime' + + >>> infer_dtype([datetime.date(2013, 1, 1)]) + 'date' + + >>> infer_dtype([np.datetime64('2013-01-01')]) + 'datetime64' + + >>> infer_dtype([datetime.timedelta(0, 1, 1)]) + 'timedelta' + + >>> infer_dtype(pd.Series(list('aabc')).astype('category')) + 'categorical' + """ + cdef: + Py_ssize_t i, n + object val + ndarray values + bint seen_pdnat = False + bint seen_val = False + + if util.is_array(value): + values = value + elif hasattr(value, 'dtype'): + # this will handle ndarray-like + # e.g. categoricals + try: + values = getattr(value, '_values', getattr(value, 'values', value)) + except TypeError: + # This gets hit if we have an EA, since cython expects `values` + # to be an ndarray + value = _try_infer_map(value) + if value is not None: + return value + + # its ndarray like but we can't handle + raise ValueError(f"cannot infer type for {type(value)}") + + else: + if not isinstance(value, list): + value = list(value) + from pandas.core.dtypes.cast import ( + construct_1d_object_array_from_listlike) + values = construct_1d_object_array_from_listlike(value) + + # make contiguous + values = values.ravel() + + val = _try_infer_map(values) + if val is not None: + return val + + if values.dtype != np.object_: + values = values.astype('O') + + if skipna: + values = values[~isnaobj(values)] + + n = len(values) + if n == 0: + return 'empty' + + # try to use a valid value + for i in range(n): + val = values[i] + + # do not use is_nul_datetimelike to keep + # np.datetime64('nat') and np.timedelta64('nat') + if val is None or util.is_nan(val): + pass + elif val is NaT: + seen_pdnat = True + else: + seen_val = True + break + + # if all values are nan/NaT + if seen_val is False and seen_pdnat is True: + return "datetime" + # float/object nan is handled in latter logic + + if util.is_datetime64_object(val): + if is_datetime64_array(values): + return "datetime64" + + elif is_timedelta(val): + if is_timedelta_or_timedelta64_array(values): + return "timedelta" + + elif util.is_integer_object(val): + # ordering matters here; this check must come after the is_timedelta + # check otherwise numpy timedelta64 objects would come through here + + if is_integer_array(values): + return "integer" + elif is_integer_float_array(values): + if is_integer_na_array(values): + return "integer-na" + else: + return "mixed-integer-float" + return "mixed-integer" + + elif PyDateTime_Check(val): + if is_datetime_array(values): + return "datetime" + + elif PyDate_Check(val): + if is_date_array(values, skipna=skipna): + return "date" + + elif PyTime_Check(val): + if is_time_array(values, skipna=skipna): + return "time" + + elif is_decimal(val): + return "decimal" + + elif is_complex(val): + return "complex" + + elif util.is_float_object(val): + if is_float_array(values): + return "floating" + elif is_integer_float_array(values): + if is_integer_na_array(values): + return "integer-na" + else: + return "mixed-integer-float" + + elif util.is_bool_object(val): + if is_bool_array(values, skipna=skipna): + return "boolean" + + elif isinstance(val, str): + if is_string_array(values, skipna=skipna): + return "string" + + elif isinstance(val, bytes): + if is_bytes_array(values, skipna=skipna): + return "bytes" + + elif util.is_period_object(val): + if is_period_array(values): + return "period" + + elif is_interval(val): + if is_interval_array(values): + return "interval" + + for i in range(n): + val = values[i] + if (util.is_integer_object(val) and + not util.is_timedelta64_object(val) and + not util.is_datetime64_object(val)): + return "mixed-integer" + + return "mixed" + + +def infer_datetimelike_array(arr: object) -> object: + """ + Infer if we have a datetime or timedelta array. + - date: we have *only* date and maybe strings, nulls + - datetime: we have *only* datetimes and maybe strings, nulls + - timedelta: we have *only* timedeltas and maybe strings, nulls + - nat: we do not have *any* date, datetimes or timedeltas, but do have + at least a NaT + - mixed: other objects (strings, a mix of tz-aware and tz-naive, or + actual objects) + + Parameters + ---------- + arr : object array + + Returns + ------- + str: {datetime, timedelta, date, nat, mixed} + """ + cdef: + Py_ssize_t i, n = len(arr) + bint seen_timedelta = 0, seen_date = 0, seen_datetime = 0 + bint seen_tz_aware = 0, seen_tz_naive = 0 + bint seen_nat = 0 + list objs = [] + object v + + for i in range(n): + v = arr[i] + if isinstance(v, str): + objs.append(v) + + if len(objs) == 3: + break + + elif v is None or util.is_nan(v): + # nan or None + pass + elif v is NaT: + seen_nat = 1 + elif PyDateTime_Check(v): + # datetime + seen_datetime = 1 + + # disambiguate between tz-naive and tz-aware + if v.tzinfo is None: + seen_tz_naive = 1 + else: + seen_tz_aware = 1 + + if seen_tz_naive and seen_tz_aware: + return 'mixed' + elif util.is_datetime64_object(v): + # np.datetime64 + seen_datetime = 1 + elif PyDate_Check(v): + seen_date = 1 + elif is_timedelta(v): + # timedelta, or timedelta64 + seen_timedelta = 1 + else: + return "mixed" + + if seen_date and not (seen_datetime or seen_timedelta): + return "date" + elif seen_datetime and not seen_timedelta: + return "datetime" + elif seen_timedelta and not seen_datetime: + return "timedelta" + elif seen_nat: + return "nat" + + # short-circuit by trying to + # actually convert these strings + # this is for performance as we don't need to try + # convert *every* string array + if len(objs): + try: + array_to_datetime(objs, errors="raise") + return "datetime" + except (ValueError, TypeError): + pass + + # we are *not* going to infer from strings + # for timedelta as too much ambiguity + + return 'mixed' + + +cdef inline bint is_timedelta(object o): + return PyDelta_Check(o) or util.is_timedelta64_object(o) + + +cdef class Validator: + + cdef: + Py_ssize_t n + dtype dtype + bint skipna + + def __cinit__(self, Py_ssize_t n, dtype dtype=np.dtype(np.object_), + bint skipna=False): + self.n = n + self.dtype = dtype + self.skipna = skipna + + cdef bint validate(self, ndarray values) except -1: + if not self.n: + return False + + if self.is_array_typed(): + return True + elif self.dtype.type_num == NPY_OBJECT: + if self.skipna: + return self._validate_skipna(values) + else: + return self._validate(values) + else: + return False + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef bint _validate(self, ndarray values) except -1: + cdef: + Py_ssize_t i + Py_ssize_t n = self.n + + for i in range(n): + if not self.is_valid(values[i]): + return False + + return self.finalize_validate() + + @cython.wraparound(False) + @cython.boundscheck(False) + cdef bint _validate_skipna(self, ndarray values) except -1: + cdef: + Py_ssize_t i + Py_ssize_t n = self.n + + for i in range(n): + if not self.is_valid_skipna(values[i]): + return False + + return self.finalize_validate_skipna() + + cdef bint is_valid(self, object value) except -1: + return self.is_value_typed(value) + + cdef bint is_valid_skipna(self, object value) except -1: + return self.is_valid(value) or self.is_valid_null(value) + + cdef bint is_value_typed(self, object value) except -1: + raise NotImplementedError(f"{type(self).__name__} child class " + "must define is_value_typed") + + cdef bint is_valid_null(self, object value) except -1: + return value is None or value is C_NA or util.is_nan(value) + + cdef bint is_array_typed(self) except -1: + return False + + cdef inline bint finalize_validate(self): + return True + + cdef bint finalize_validate_skipna(self): + # TODO(phillipc): Remove the existing validate methods and replace them + # with the skipna versions upon full deprecation of skipna=False + return True + + +cdef class BoolValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_bool_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.bool_) + + +cpdef bint is_bool_array(ndarray values, bint skipna=False): + cdef: + BoolValidator validator = BoolValidator(len(values), + values.dtype, + skipna=skipna) + return validator.validate(values) + + +cdef class IntegerValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_integer_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.integer) + + +cpdef bint is_integer_array(ndarray values): + cdef: + IntegerValidator validator = IntegerValidator(len(values), + values.dtype) + return validator.validate(values) + + +cdef class IntegerNaValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return (util.is_integer_object(value) + or (util.is_nan(value) and util.is_float_object(value))) + + +cdef bint is_integer_na_array(ndarray values): + cdef: + IntegerNaValidator validator = IntegerNaValidator(len(values), + values.dtype) + return validator.validate(values) + + +cdef class IntegerFloatValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_integer_object(value) or util.is_float_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.integer) + + +cdef bint is_integer_float_array(ndarray values): + cdef: + IntegerFloatValidator validator = IntegerFloatValidator(len(values), + values.dtype) + return validator.validate(values) + + +cdef class FloatValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_float_object(value) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.floating) + + +cpdef bint is_float_array(ndarray values): + cdef: + FloatValidator validator = FloatValidator(len(values), values.dtype) + return validator.validate(values) + + +cdef class StringValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return isinstance(value, str) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.str_) + + cdef bint is_valid_null(self, object value) except -1: + # We deliberately exclude None / NaN here since StringArray uses NA + return value is C_NA + + +cpdef bint is_string_array(ndarray values, bint skipna=False): + cdef: + StringValidator validator = StringValidator(len(values), + values.dtype, + skipna=skipna) + return validator.validate(values) + + +cdef class BytesValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return isinstance(value, bytes) + + cdef inline bint is_array_typed(self) except -1: + return issubclass(self.dtype.type, np.bytes_) + + +cdef bint is_bytes_array(ndarray values, bint skipna=False): + cdef: + BytesValidator validator = BytesValidator(len(values), values.dtype, + skipna=skipna) + return validator.validate(values) + + +cdef class TemporalValidator(Validator): + cdef: + Py_ssize_t generic_null_count + + def __cinit__(self, Py_ssize_t n, dtype dtype=np.dtype(np.object_), + bint skipna=False): + self.n = n + self.dtype = dtype + self.skipna = skipna + self.generic_null_count = 0 + + cdef inline bint is_valid(self, object value) except -1: + return self.is_value_typed(value) or self.is_valid_null(value) + + cdef bint is_valid_null(self, object value) except -1: + raise NotImplementedError(f"{type(self).__name__} child class " + "must define is_valid_null") + + cdef inline bint is_valid_skipna(self, object value) except -1: + cdef: + bint is_typed_null = self.is_valid_null(value) + bint is_generic_null = value is None or util.is_nan(value) + self.generic_null_count += is_typed_null and is_generic_null + return self.is_value_typed(value) or is_typed_null or is_generic_null + + cdef inline bint finalize_validate_skipna(self): + return self.generic_null_count != self.n + + +cdef class DatetimeValidator(TemporalValidator): + cdef bint is_value_typed(self, object value) except -1: + return PyDateTime_Check(value) + + cdef inline bint is_valid_null(self, object value) except -1: + return is_null_datetime64(value) + + +cpdef bint is_datetime_array(ndarray values): + cdef: + DatetimeValidator validator = DatetimeValidator(len(values), + skipna=True) + return validator.validate(values) + + +cdef class Datetime64Validator(DatetimeValidator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_datetime64_object(value) + + +cpdef bint is_datetime64_array(ndarray values): + cdef: + Datetime64Validator validator = Datetime64Validator(len(values), + skipna=True) + return validator.validate(values) + + +# TODO: only non-here use is in test +def is_datetime_with_singletz_array(values: ndarray) -> bool: + """ + Check values have the same tzinfo attribute. + Doesn't check values are datetime-like types. + """ + cdef: + Py_ssize_t i = 0, j, n = len(values) + object base_val, base_tz, val, tz + + if n == 0: + return False + # Get a reference timezone to compare with the rest of the tzs in the array + for i in range(n): + base_val = values[i] + if base_val is not NaT: + base_tz = get_timezone(getattr(base_val, 'tzinfo', None)) + break + + for j in range(i, n): + # Compare val's timezone with the reference timezone + # NaT can coexist with tz-aware datetimes, so skip if encountered + val = values[j] + if val is not NaT: + tz = getattr(val, 'tzinfo', None) + if not tz_compare(base_tz, tz): + return False + + return True + + +cdef class TimedeltaValidator(TemporalValidator): + cdef bint is_value_typed(self, object value) except -1: + return PyDelta_Check(value) + + cdef inline bint is_valid_null(self, object value) except -1: + return is_null_timedelta64(value) + + +cdef class AnyTimedeltaValidator(TimedeltaValidator): + cdef inline bint is_value_typed(self, object value) except -1: + return is_timedelta(value) + + +# TODO: only non-here use is in test +cpdef bint is_timedelta_or_timedelta64_array(ndarray values): + """ + Infer with timedeltas and/or nat/none. + """ + cdef: + AnyTimedeltaValidator validator = AnyTimedeltaValidator(len(values), + skipna=True) + return validator.validate(values) + + +cdef class DateValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return PyDate_Check(value) + + +cpdef bint is_date_array(ndarray values, bint skipna=False): + cdef: + DateValidator validator = DateValidator(len(values), skipna=skipna) + return validator.validate(values) + + +cdef class TimeValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return PyTime_Check(value) + + +cpdef bint is_time_array(ndarray values, bint skipna=False): + cdef: + TimeValidator validator = TimeValidator(len(values), skipna=skipna) + return validator.validate(values) + + +cdef class PeriodValidator(TemporalValidator): + cdef inline bint is_value_typed(self, object value) except -1: + return util.is_period_object(value) + + cdef inline bint is_valid_null(self, object value) except -1: + return is_null_period(value) + + +cpdef bint is_period_array(ndarray values): + cdef: + PeriodValidator validator = PeriodValidator(len(values), skipna=True) + return validator.validate(values) + + +cdef class IntervalValidator(Validator): + cdef inline bint is_value_typed(self, object value) except -1: + return is_interval(value) + + +cpdef bint is_interval_array(ndarray values): + cdef: + IntervalValidator validator = IntervalValidator(len(values), + skipna=True) + return validator.validate(values) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def maybe_convert_numeric(ndarray[object] values, set na_values, + bint convert_empty=True, bint coerce_numeric=False): + """ + Convert object array to a numeric array if possible. + + Parameters + ---------- + values : ndarray + Array of object elements to convert. + na_values : set + Set of values that should be interpreted as NaN. + convert_empty : bool, default True + If an empty array-like object is encountered, whether to interpret + that element as NaN or not. If set to False, a ValueError will be + raised if such an element is encountered and 'coerce_numeric' is False. + coerce_numeric : bool, default False + If initial attempts to convert to numeric have failed, whether to + force conversion to numeric via alternative methods or by setting the + element to NaN. Otherwise, an Exception will be raised when such an + element is encountered. + + This boolean also has an impact on how conversion behaves when a + numeric array has no suitable numerical dtype to return (i.e. uint64, + int32, uint8). If set to False, the original object array will be + returned. Otherwise, a ValueError will be raised. + + Returns + ------- + Array of converted object values to numerical ones. + """ + if len(values) == 0: + return np.array([], dtype='i8') + + # fastpath for ints - try to convert all based on first value + cdef: + object val = values[0] + + if util.is_integer_object(val): + try: + maybe_ints = values.astype('i8') + if (maybe_ints == values).all(): + return maybe_ints + except (ValueError, OverflowError, TypeError): + pass + + # Otherwise, iterate and do full inference. + cdef: + int status, maybe_int + Py_ssize_t i, n = values.size + Seen seen = Seen(coerce_numeric) + ndarray[float64_t] floats = np.empty(n, dtype='f8') + ndarray[complex128_t] complexes = np.empty(n, dtype='c16') + ndarray[int64_t] ints = np.empty(n, dtype='i8') + ndarray[uint64_t] uints = np.empty(n, dtype='u8') + ndarray[uint8_t] bools = np.empty(n, dtype='u1') + float64_t fval + + for i in range(n): + val = values[i] + + if val.__hash__ is not None and val in na_values: + seen.saw_null() + floats[i] = complexes[i] = NaN + elif util.is_float_object(val): + fval = val + if fval != fval: + seen.null_ = True + + floats[i] = complexes[i] = fval + seen.float_ = True + elif util.is_integer_object(val): + floats[i] = complexes[i] = val + + val = int(val) + seen.saw_int(val) + + if val >= 0: + if val <= oUINT64_MAX: + uints[i] = val + else: + seen.float_ = True + + if oINT64_MIN <= val <= oINT64_MAX: + ints[i] = val + + if val < oINT64_MIN or (seen.sint_ and seen.uint_): + seen.float_ = True + + elif util.is_bool_object(val): + floats[i] = uints[i] = ints[i] = bools[i] = val + seen.bool_ = True + elif val is None: + seen.saw_null() + floats[i] = complexes[i] = NaN + elif hasattr(val, '__len__') and len(val) == 0: + if convert_empty or seen.coerce_numeric: + seen.saw_null() + floats[i] = complexes[i] = NaN + else: + raise ValueError("Empty string encountered") + elif util.is_complex_object(val): + complexes[i] = val + seen.complex_ = True + elif is_decimal(val): + floats[i] = complexes[i] = val + seen.float_ = True + else: + try: + status = floatify(val, &fval, &maybe_int) + + if fval in na_values: + seen.saw_null() + floats[i] = complexes[i] = NaN + else: + if fval != fval: + seen.null_ = True + + floats[i] = fval + + if maybe_int: + as_int = int(val) + + if as_int in na_values: + seen.saw_null() + else: + seen.saw_int(as_int) + + if as_int not in na_values: + if as_int < oINT64_MIN or as_int > oUINT64_MAX: + if seen.coerce_numeric: + seen.float_ = True + else: + raise ValueError("Integer out of range.") + else: + if as_int >= 0: + uints[i] = as_int + + if as_int <= oINT64_MAX: + ints[i] = as_int + + seen.float_ = seen.float_ or (seen.uint_ and seen.sint_) + else: + seen.float_ = True + except (TypeError, ValueError) as err: + if not seen.coerce_numeric: + raise type(err)(f"{err} at position {i}") + elif "uint64" in str(err): # Exception from check functions. + raise + + seen.saw_null() + floats[i] = NaN + + if seen.check_uint64_conflict(): + return values + + if seen.complex_: + return complexes + elif seen.float_: + return floats + elif seen.int_: + if seen.uint_: + return uints + else: + return ints + elif seen.bool_: + return bools.view(np.bool_) + elif seen.uint_: + return uints + return ints + + +@cython.boundscheck(False) +@cython.wraparound(False) +def maybe_convert_objects(ndarray[object] objects, bint try_float=0, + bint safe=0, bint convert_datetime=0, + bint convert_timedelta=0, + bint convert_to_nullable_integer=0): + """ + Type inference function-- convert object array to proper dtype + + Parameters + ---------- + values : ndarray + Array of object elements to convert. + try_float : bool, default False + If an array-like object contains only float or NaN values is + encountered, whether to convert and return an array of float dtype. + safe : bool, default False + Whether to upcast numeric type (e.g. int cast to float). If set to + True, no upcasting will be performed. + convert_datetime : bool, default False + If an array-like object contains only datetime values or NaT is + encountered, whether to convert and return an array of M8[ns] dtype. + convert_timedelta : bool, default False + If an array-like object contains only timedelta values or NaT is + encountered, whether to convert and return an array of m8[ns] dtype. + convert_to_nullable_integer : bool, default False + If an array-like object contains only interger values (and NaN) is + encountered, whether to convert and return an IntegerArray. + + Returns + ------- + Array of converted object values to more specific dtypes if applicable. + """ + cdef: + Py_ssize_t i, n + ndarray[float64_t] floats + ndarray[complex128_t] complexes + ndarray[int64_t] ints + ndarray[uint64_t] uints + ndarray[uint8_t] bools + int64_t[:] idatetimes + int64_t[:] itimedeltas + Seen seen = Seen() + object val + float64_t fval, fnan + + n = len(objects) + + floats = np.empty(n, dtype='f8') + complexes = np.empty(n, dtype='c16') + ints = np.empty(n, dtype='i8') + uints = np.empty(n, dtype='u8') + bools = np.empty(n, dtype=np.uint8) + mask = np.full(n, False) + + if convert_datetime: + datetimes = np.empty(n, dtype='M8[ns]') + idatetimes = datetimes.view(np.int64) + + if convert_timedelta: + timedeltas = np.empty(n, dtype='m8[ns]') + itimedeltas = timedeltas.view(np.int64) + + fnan = np.nan + + for i in range(n): + val = objects[i] + + if val is None: + seen.null_ = 1 + floats[i] = complexes[i] = fnan + mask[i] = True + elif val is NaT: + seen.nat_ = 1 + if convert_datetime: + idatetimes[i] = NPY_NAT + if convert_timedelta: + itimedeltas[i] = NPY_NAT + if not (convert_datetime or convert_timedelta): + seen.object_ = 1 + break + elif val is np.nan: + seen.nan_ = 1 + mask[i] = True + floats[i] = complexes[i] = val + elif util.is_bool_object(val): + seen.bool_ = 1 + bools[i] = val + elif util.is_float_object(val): + floats[i] = complexes[i] = val + seen.float_ = 1 + elif util.is_datetime64_object(val): + if convert_datetime: + idatetimes[i] = convert_to_tsobject( + val, None, None, 0, 0).value + seen.datetime_ = 1 + else: + seen.object_ = 1 + break + elif is_timedelta(val): + if convert_timedelta: + itimedeltas[i] = convert_to_timedelta64(val, 'ns') + seen.timedelta_ = 1 + else: + seen.object_ = 1 + break + elif util.is_integer_object(val): + seen.int_ = 1 + floats[i] = val + complexes[i] = val + if not seen.null_: + val = int(val) + seen.saw_int(val) + + if ((seen.uint_ and seen.sint_) or + val > oUINT64_MAX or val < oINT64_MIN): + seen.object_ = 1 + break + + if seen.uint_: + uints[i] = val + elif seen.sint_: + ints[i] = val + else: + uints[i] = val + ints[i] = val + + elif util.is_complex_object(val): + complexes[i] = val + seen.complex_ = 1 + elif PyDateTime_Check(val) or util.is_datetime64_object(val): + + # if we have an tz's attached then return the objects + if convert_datetime: + if getattr(val, 'tzinfo', None) is not None: + seen.datetimetz_ = 1 + break + else: + seen.datetime_ = 1 + idatetimes[i] = convert_to_tsobject( + val, None, None, 0, 0).value + else: + seen.object_ = 1 + break + elif try_float and not isinstance(val, str): + # this will convert Decimal objects + try: + floats[i] = float(val) + complexes[i] = complex(val) + seen.float_ = 1 + except (ValueError, TypeError): + seen.object_ = 1 + break + else: + seen.object_ = 1 + break + + # we try to coerce datetime w/tz but must all have the same tz + if seen.datetimetz_: + if is_datetime_with_singletz_array(objects): + from pandas import DatetimeIndex + return DatetimeIndex(objects) + seen.object_ = 1 + + if not seen.object_: + if not safe: + if seen.null_ or seen.nan_: + if seen.is_float_or_complex: + if seen.complex_: + return complexes + elif seen.float_: + return floats + elif seen.int_: + if convert_to_nullable_integer: + from pandas.core.arrays import IntegerArray + return IntegerArray(ints, mask) + else: + return floats + elif seen.nan_: + return floats + else: + if not seen.bool_: + if seen.datetime_: + if not seen.numeric_ and not seen.timedelta_: + return datetimes + elif seen.timedelta_: + if not seen.numeric_: + return timedeltas + elif seen.nat_: + if not seen.numeric_: + if convert_datetime and convert_timedelta: + # TODO: array full of NaT ambiguity resolve here needed + pass + elif convert_datetime: + return datetimes + elif convert_timedelta: + return timedeltas + else: + if seen.complex_: + return complexes + elif seen.float_: + return floats + elif seen.int_: + if seen.uint_: + return uints + else: + return ints + elif seen.is_bool: + return bools.view(np.bool_) + + else: + # don't cast int to float, etc. + if seen.null_: + if seen.is_float_or_complex: + if seen.complex_: + if not seen.int_: + return complexes + elif seen.float_ or seen.nan_: + if not seen.int_: + return floats + else: + if not seen.bool_: + if seen.datetime_: + if not seen.numeric_ and not seen.timedelta_: + return datetimes + elif seen.timedelta_: + if not seen.numeric_: + return timedeltas + elif seen.nat_: + if not seen.numeric_: + if convert_datetime and convert_timedelta: + # TODO: array full of NaT ambiguity resolve here needed + pass + elif convert_datetime: + return datetimes + elif convert_timedelta: + return timedeltas + else: + if seen.complex_: + if not seen.int_: + return complexes + elif seen.float_ or seen.nan_: + if not seen.int_: + return floats + elif seen.int_: + if seen.uint_: + return uints + else: + return ints + elif seen.is_bool and not seen.nan_: + return bools.view(np.bool_) + + return objects + + +# Note: no_default is exported to the public API in pandas.api.extensions +no_default = object() #: Sentinel indicating the default value. + + +@cython.boundscheck(False) +@cython.wraparound(False) +def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=1, + object na_value=no_default, object dtype=object): + """ + Substitute for np.vectorize with pandas-friendly dtype inference. + + Parameters + ---------- + arr : ndarray + f : function + mask : ndarray + uint8 dtype ndarray indicating values not to apply `f` to. + convert : bool, default True + Whether to call `maybe_convert_objects` on the resulting ndarray + na_value : Any, optional + The result value to use for masked values. By default, the + input value is used + dtype : numpy.dtype + The numpy dtype to use for the result ndarray. + + Returns + ------- + ndarray + """ + cdef: + Py_ssize_t i, n + ndarray result + object val + + n = len(arr) + result = np.empty(n, dtype=dtype) + for i in range(n): + if mask[i]: + if na_value is no_default: + val = arr[i] + else: + val = na_value + else: + val = f(arr[i]) + + if cnp.PyArray_IsZeroDim(val): + # unbox 0-dim arrays, GH#690 + # TODO: is there a faster way to unbox? + # item_from_zerodim? + val = val.item() + + result[i] = val + + if convert: + return maybe_convert_objects(result, + try_float=0, + convert_datetime=0, + convert_timedelta=0) + + return result + + +@cython.boundscheck(False) +@cython.wraparound(False) +def map_infer(ndarray arr, object f, bint convert=1): + """ + Substitute for np.vectorize with pandas-friendly dtype inference. + + Parameters + ---------- + arr : ndarray + f : function + + Returns + ------- + ndarray + """ + cdef: + Py_ssize_t i, n + ndarray[object] result + object val + + n = len(arr) + result = np.empty(n, dtype=object) + for i in range(n): + val = f(arr[i]) + + if cnp.PyArray_IsZeroDim(val): + # unbox 0-dim arrays, GH#690 + # TODO: is there a faster way to unbox? + # item_from_zerodim? + val = val.item() + + result[i] = val + + if convert: + return maybe_convert_objects(result, + try_float=0, + convert_datetime=0, + convert_timedelta=0) + + return result + + +def to_object_array(rows: object, int min_width=0): + """ + Convert a list of lists into an object array. + + Parameters + ---------- + rows : 2-d array (N, K) + List of lists to be converted into an array. + min_width : int + Minimum width of the object array. If a list + in `rows` contains fewer than `width` elements, + the remaining elements in the corresponding row + will all be `NaN`. + + Returns + ------- + numpy array of the object dtype. + """ + cdef: + Py_ssize_t i, j, n, k, tmp + ndarray[object, ndim=2] result + list row + + rows = list(rows) + n = len(rows) + + k = min_width + for i in range(n): + tmp = len(rows[i]) + if tmp > k: + k = tmp + + result = np.empty((n, k), dtype=object) + + for i in range(n): + row = list(rows[i]) + + for j in range(len(row)): + result[i, j] = row[j] + + return result + + +def tuples_to_object_array(ndarray[object] tuples): + cdef: + Py_ssize_t i, j, n, k, tmp + ndarray[object, ndim=2] result + tuple tup + + n = len(tuples) + k = len(tuples[0]) + result = np.empty((n, k), dtype=object) + for i in range(n): + tup = tuples[i] + for j in range(k): + result[i, j] = tup[j] + + return result + + +def to_object_array_tuples(rows: object): + """ + Convert a list of tuples into an object array. Any subclass of + tuple in `rows` will be casted to tuple. + + Parameters + ---------- + rows : 2-d array (N, K) + List of tuples to be converted into an array. + + Returns + ------- + numpy array of the object dtype. + """ + cdef: + Py_ssize_t i, j, n, k, tmp + ndarray[object, ndim=2] result + tuple row + + rows = list(rows) + n = len(rows) + + k = 0 + for i in range(n): + tmp = 1 if checknull(rows[i]) else len(rows[i]) + if tmp > k: + k = tmp + + result = np.empty((n, k), dtype=object) + + try: + for i in range(n): + row = rows[i] + for j in range(len(row)): + result[i, j] = row[j] + except TypeError: + # e.g. "Expected tuple, got list" + # upcast any subclasses to tuple + for i in range(n): + row = (rows[i],) if checknull(rows[i]) else tuple(rows[i]) + for j in range(len(row)): + result[i, j] = row[j] + + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +def fast_multiget(dict mapping, ndarray keys, default=np.nan): + cdef: + Py_ssize_t i, n = len(keys) + object val + ndarray[object] output = np.empty(n, dtype='O') + + if n == 0: + # kludge, for Series + return np.empty(0, dtype='f8') + + keys = getattr(keys, 'values', keys) + + for i in range(n): + val = keys[i] + if val in mapping: + output[i] = mapping[val] + else: + output[i] = default + + return maybe_convert_objects(output) diff --git a/pandas/_libs/missing.pxd b/pandas/_libs/missing.pxd new file mode 100644 index 00000000..d4303ac2 --- /dev/null +++ b/pandas/_libs/missing.pxd @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- + +from numpy cimport ndarray, uint8_t + +cpdef bint checknull(object val) +cpdef bint checknull_old(object val) +cpdef ndarray[uint8_t] isnaobj(ndarray arr) + +cdef bint is_null_datetime64(v) +cdef bint is_null_timedelta64(v) +cdef bint is_null_period(v) + +cdef class C_NAType: + pass + +cdef C_NAType C_NA diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx new file mode 100644 index 00000000..c54cb652 --- /dev/null +++ b/pandas/_libs/missing.pyx @@ -0,0 +1,488 @@ +import cython +from cython import Py_ssize_t + +import numbers + +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, int64_t, uint8_t, float64_t +cnp.import_array() + +cimport pandas._libs.util as util + +from pandas._libs.tslibs.np_datetime cimport ( + get_timedelta64_value, get_datetime64_value) +from pandas._libs.tslibs.nattype cimport ( + checknull_with_nat, c_NaT as NaT, is_null_datetimelike) +from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op + +from pandas.compat import is_platform_32bit + + +cdef: + float64_t INF = np.inf + float64_t NEGINF = -INF + + int64_t NPY_NAT = util.get_nat() + + bint is_32bit = is_platform_32bit() + + +cpdef bint checknull(object val): + """ + Return boolean describing of the input is NA-like, defined here as any + of: + - None + - nan + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + + Parameters + ---------- + val : object + + Returns + ------- + result : bool + + Notes + ----- + The difference between `checknull` and `checknull_old` is that `checknull` + does *not* consider INF or NEGINF to be NA. + """ + return val is C_NA or is_null_datetimelike(val, inat_is_null=False) + + +cpdef bint checknull_old(object val): + """ + Return boolean describing of the input is NA-like, defined here as any + of: + - None + - nan + - INF + - NEGINF + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + + Parameters + ---------- + val : object + + Returns + ------- + result : bool + + Notes + ----- + The difference between `checknull` and `checknull_old` is that `checknull` + does *not* consider INF or NEGINF to be NA. + """ + if checknull(val): + return True + elif util.is_float_object(val) or util.is_complex_object(val): + return val == INF or val == NEGINF + return False + + +cdef inline bint _check_none_nan_inf_neginf(object val): + return val is None or (isinstance(val, float) and + (val != val or val == INF or val == NEGINF)) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef ndarray[uint8_t] isnaobj(ndarray arr): + """ + Return boolean mask denoting which elements of a 1-D array are na-like, + according to the criteria defined in `checknull`: + - None + - nan + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + + Parameters + ---------- + arr : ndarray + + Returns + ------- + result : ndarray (dtype=np.bool_) + """ + cdef: + Py_ssize_t i, n + object val + ndarray[uint8_t] result + + assert arr.ndim == 1, "'arr' must be 1-D." + + n = len(arr) + result = np.empty(n, dtype=np.uint8) + for i in range(n): + val = arr[i] + result[i] = checknull(val) + return result.view(np.bool_) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def isnaobj_old(arr: ndarray) -> ndarray: + """ + Return boolean mask denoting which elements of a 1-D array are na-like, + defined as being any of: + - None + - nan + - INF + - NEGINF + - NaT + + Parameters + ---------- + arr : ndarray + + Returns + ------- + result : ndarray (dtype=np.bool_) + """ + cdef: + Py_ssize_t i, n + object val + ndarray[uint8_t] result + + assert arr.ndim == 1, "'arr' must be 1-D." + + n = len(arr) + result = np.zeros(n, dtype=np.uint8) + for i in range(n): + val = arr[i] + result[i] = val is NaT or _check_none_nan_inf_neginf(val) + return result.view(np.bool_) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def isnaobj2d(arr: ndarray) -> ndarray: + """ + Return boolean mask denoting which elements of a 2-D array are na-like, + according to the criteria defined in `checknull`: + - None + - nan + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + + Parameters + ---------- + arr : ndarray + + Returns + ------- + result : ndarray (dtype=np.bool_) + + Notes + ----- + The difference between `isnaobj2d` and `isnaobj2d_old` is that `isnaobj2d` + does *not* consider INF or NEGINF to be NA. + """ + cdef: + Py_ssize_t i, j, n, m + object val + ndarray[uint8_t, ndim=2] result + + assert arr.ndim == 2, "'arr' must be 2-D." + + n, m = (arr).shape + result = np.zeros((n, m), dtype=np.uint8) + for i in range(n): + for j in range(m): + val = arr[i, j] + if checknull(val): + result[i, j] = 1 + return result.view(np.bool_) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def isnaobj2d_old(arr: ndarray) -> ndarray: + """ + Return boolean mask denoting which elements of a 2-D array are na-like, + according to the criteria defined in `checknull_old`: + - None + - nan + - INF + - NEGINF + - NaT + - np.datetime64 representation of NaT + - np.timedelta64 representation of NaT + + Parameters + ---------- + arr : ndarray + + Returns + ------- + result : ndarray (dtype=np.bool_) + + Notes + ----- + The difference between `isnaobj2d` and `isnaobj2d_old` is that `isnaobj2d` + does *not* consider INF or NEGINF to be NA. + """ + cdef: + Py_ssize_t i, j, n, m + object val + ndarray[uint8_t, ndim=2] result + + assert arr.ndim == 2, "'arr' must be 2-D." + + n, m = (arr).shape + result = np.zeros((n, m), dtype=np.uint8) + for i in range(n): + for j in range(m): + val = arr[i, j] + if checknull_old(val): + result[i, j] = 1 + return result.view(np.bool_) + + +def isposinf_scalar(val: object) -> bool: + if util.is_float_object(val) and val == INF: + return True + else: + return False + + +def isneginf_scalar(val: object) -> bool: + if util.is_float_object(val) and val == NEGINF: + return True + else: + return False + + +cdef inline bint is_null_datetime64(v): + # determine if we have a null for a datetime (or integer versions), + # excluding np.timedelta64('nat') + if checknull_with_nat(v): + return True + elif util.is_datetime64_object(v): + return get_datetime64_value(v) == NPY_NAT + return False + + +cdef inline bint is_null_timedelta64(v): + # determine if we have a null for a timedelta (or integer versions), + # excluding np.datetime64('nat') + if checknull_with_nat(v): + return True + elif util.is_timedelta64_object(v): + return get_timedelta64_value(v) == NPY_NAT + return False + + +cdef inline bint is_null_period(v): + # determine if we have a null for a Period (or integer versions), + # excluding np.datetime64('nat') and np.timedelta64('nat') + return checknull_with_nat(v) + + +# ----------------------------------------------------------------------------- +# Implementation of NA singleton + + +def _create_binary_propagating_op(name, is_divmod=False): + + def method(self, other): + if (other is C_NA or isinstance(other, str) + or isinstance(other, (numbers.Number, np.bool_)) + or isinstance(other, np.ndarray) and not other.shape): + # Need the other.shape clause to handle NumPy scalars, + # since we do a setitem on `out` below, which + # won't work for NumPy scalars. + if is_divmod: + return NA, NA + else: + return NA + + elif isinstance(other, np.ndarray): + out = np.empty(other.shape, dtype=object) + out[:] = NA + + if is_divmod: + return out, out.copy() + else: + return out + + return NotImplemented + + method.__name__ = name + return method + + +def _create_unary_propagating_op(name): + def method(self): + return NA + + method.__name__ = name + return method + + +cdef class C_NAType: + pass + + +class NAType(C_NAType): + """ + NA ("not available") missing value indicator. + + .. warning:: + + Experimental: the behaviour of NA can still change without warning. + + .. versionadded:: 1.0.0 + + The NA singleton is a missing value indicator defined by pandas. It is + used in certain new extension dtypes (currently the "string" dtype). + """ + + _instance = None + + def __new__(cls, *args, **kwargs): + if NAType._instance is None: + NAType._instance = C_NAType.__new__(cls, *args, **kwargs) + return NAType._instance + + def __repr__(self) -> str: + return "" + + def __bool__(self): + raise TypeError("boolean value of NA is ambiguous") + + def __hash__(self): + # GH 30013: Ensure hash is large enough to avoid hash collisions with integers + exponent = 31 if is_32bit else 61 + return 2 ** exponent - 1 + + def __reduce__(self): + return "NA" + + # Binary arithmetic and comparison ops -> propagate + + __add__ = _create_binary_propagating_op("__add__") + __radd__ = _create_binary_propagating_op("__radd__") + __sub__ = _create_binary_propagating_op("__sub__") + __rsub__ = _create_binary_propagating_op("__rsub__") + __mul__ = _create_binary_propagating_op("__mul__") + __rmul__ = _create_binary_propagating_op("__rmul__") + __matmul__ = _create_binary_propagating_op("__matmul__") + __rmatmul__ = _create_binary_propagating_op("__rmatmul__") + __truediv__ = _create_binary_propagating_op("__truediv__") + __rtruediv__ = _create_binary_propagating_op("__rtruediv__") + __floordiv__ = _create_binary_propagating_op("__floordiv__") + __rfloordiv__ = _create_binary_propagating_op("__rfloordiv__") + __mod__ = _create_binary_propagating_op("__mod__") + __rmod__ = _create_binary_propagating_op("__rmod__") + __divmod__ = _create_binary_propagating_op("__divmod__", is_divmod=True) + __rdivmod__ = _create_binary_propagating_op("__rdivmod__", is_divmod=True) + # __lshift__ and __rshift__ are not implemented + + __eq__ = _create_binary_propagating_op("__eq__") + __ne__ = _create_binary_propagating_op("__ne__") + __le__ = _create_binary_propagating_op("__le__") + __lt__ = _create_binary_propagating_op("__lt__") + __gt__ = _create_binary_propagating_op("__gt__") + __ge__ = _create_binary_propagating_op("__ge__") + + # Unary ops + + __neg__ = _create_unary_propagating_op("__neg__") + __pos__ = _create_unary_propagating_op("__pos__") + __abs__ = _create_unary_propagating_op("__abs__") + __invert__ = _create_unary_propagating_op("__invert__") + + # pow has special + def __pow__(self, other): + if other is C_NA: + return NA + elif isinstance(other, (numbers.Number, np.bool_)): + if other == 0: + # returning positive is correct for +/- 0. + return type(other)(1) + else: + return NA + elif isinstance(other, np.ndarray): + return np.where(other == 0, other.dtype.type(1), NA) + + return NotImplemented + + def __rpow__(self, other): + if other is C_NA: + return NA + elif isinstance(other, (numbers.Number, np.bool_)): + if other == 1: + return other + else: + return NA + elif isinstance(other, np.ndarray): + return np.where(other == 1, other, NA) + + return NotImplemented + + # Logical ops using Kleene logic + + def __and__(self, other): + if other is False: + return False + elif other is True or other is C_NA: + return NA + else: + return NotImplemented + + __rand__ = __and__ + + def __or__(self, other): + if other is True: + return True + elif other is False or other is C_NA: + return NA + else: + return NotImplemented + + __ror__ = __or__ + + def __xor__(self, other): + if other is False or other is True or other is C_NA: + return NA + return NotImplemented + + __rxor__ = __xor__ + + __array_priority__ = 1000 + _HANDLED_TYPES = (np.ndarray, numbers.Number, str, np.bool_) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + types = self._HANDLED_TYPES + (NAType,) + for x in inputs: + if not isinstance(x, types): + return NotImplemented + + if method != "__call__": + raise ValueError(f"ufunc method '{method}' not supported for NA") + result = maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is NotImplemented: + # For a NumPy ufunc that's not a binop, like np.logaddexp + index = [i for i, x in enumerate(inputs) if x is NA][0] + result = np.broadcast_arrays(*inputs)[index] + if result.ndim == 0: + result = result.item() + if ufunc.nout > 1: + result = (NA,) * ufunc.nout + + return result + + +C_NA = NAType() # C-visible +NA = C_NA # Python-visible diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx new file mode 100644 index 00000000..abe1484e --- /dev/null +++ b/pandas/_libs/ops.pyx @@ -0,0 +1,292 @@ +import operator + +from cpython.object cimport (PyObject_RichCompareBool, + Py_EQ, Py_NE, Py_LT, Py_LE, Py_GT, Py_GE) + +import cython +from cython import Py_ssize_t + +import numpy as np +from numpy cimport ndarray, uint8_t, import_array +import_array() + + +from pandas._libs.util cimport UINT8_MAX, is_nan + +from pandas._libs.missing cimport checknull + + +@cython.wraparound(False) +@cython.boundscheck(False) +def scalar_compare(object[:] values, object val, object op): + """ + Compare each element of `values` array with the scalar `val`, with + the comparison operation described by `op`. + + Parameters + ---------- + values : ndarray[object] + val : object + op : {operator.eq, operator.ne, + operator.le, operator.lt, + operator.ge, operator.gt} + + Returns + ------- + result : ndarray[bool] + """ + cdef: + Py_ssize_t i, n = len(values) + ndarray[uint8_t, cast=True] result + bint isnull_val + int flag + object x + + if op is operator.lt: + flag = Py_LT + elif op is operator.le: + flag = Py_LE + elif op is operator.gt: + flag = Py_GT + elif op is operator.ge: + flag = Py_GE + elif op is operator.eq: + flag = Py_EQ + elif op is operator.ne: + flag = Py_NE + else: + raise ValueError('Unrecognized operator') + + result = np.empty(n, dtype=bool).view(np.uint8) + isnull_val = checknull(val) + + if flag == Py_NE: + for i in range(n): + x = values[i] + if checknull(x): + result[i] = True + elif isnull_val: + result[i] = True + else: + try: + result[i] = PyObject_RichCompareBool(x, val, flag) + except TypeError: + result[i] = True + elif flag == Py_EQ: + for i in range(n): + x = values[i] + if checknull(x): + result[i] = False + elif isnull_val: + result[i] = False + else: + try: + result[i] = PyObject_RichCompareBool(x, val, flag) + except TypeError: + result[i] = False + + else: + for i in range(n): + x = values[i] + if checknull(x): + result[i] = False + elif isnull_val: + result[i] = False + else: + result[i] = PyObject_RichCompareBool(x, val, flag) + + return result.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def vec_compare(object[:] left, object[:] right, object op): + """ + Compare the elements of `left` with the elements of `right` pointwise, + with the comparison operation described by `op`. + + Parameters + ---------- + left : ndarray[object] + right : ndarray[object] + op : {operator.eq, operator.ne, + operator.le, operator.lt, + operator.ge, operator.gt} + + Returns + ------- + result : ndarray[bool] + """ + cdef: + Py_ssize_t i, n = len(left) + ndarray[uint8_t, cast=True] result + int flag + + if n != len(right): + raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}') + + if op is operator.lt: + flag = Py_LT + elif op is operator.le: + flag = Py_LE + elif op is operator.gt: + flag = Py_GT + elif op is operator.ge: + flag = Py_GE + elif op is operator.eq: + flag = Py_EQ + elif op is operator.ne: + flag = Py_NE + else: + raise ValueError('Unrecognized operator') + + result = np.empty(n, dtype=bool).view(np.uint8) + + if flag == Py_NE: + for i in range(n): + x = left[i] + y = right[i] + + if checknull(x) or checknull(y): + result[i] = True + else: + result[i] = PyObject_RichCompareBool(x, y, flag) + else: + for i in range(n): + x = left[i] + y = right[i] + + if checknull(x) or checknull(y): + result[i] = False + else: + result[i] = PyObject_RichCompareBool(x, y, flag) + + return result.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def scalar_binop(object[:] values, object val, object op): + """ + Apply the given binary operator `op` between each element of the array + `values` and the scalar `val`. + + Parameters + ---------- + values : ndarray[object] + val : object + op : binary operator + + Returns + ------- + result : ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(values) + object[:] result + object x + + result = np.empty(n, dtype=object) + if val is None or is_nan(val): + result[:] = val + return result.base # `.base` to access underlying np.ndarray + + for i in range(n): + x = values[i] + if x is None or is_nan(x): + result[i] = x + else: + result[i] = op(x, val) + + return maybe_convert_bool(result.base) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def vec_binop(object[:] left, object[:] right, object op): + """ + Apply the given binary operator `op` pointwise to the elements of + arrays `left` and `right`. + + Parameters + ---------- + left : ndarray[object] + right : ndarray[object] + op : binary operator + + Returns + ------- + result : ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(left) + object[:] result + + if n != len(right): + raise ValueError(f'Arrays were different lengths: {n} vs {len(right)}') + + result = np.empty(n, dtype=object) + + for i in range(n): + x = left[i] + y = right[i] + try: + result[i] = op(x, y) + except TypeError: + if x is None or is_nan(x): + result[i] = x + elif y is None or is_nan(y): + result[i] = y + else: + raise + + return maybe_convert_bool(result.base) # `.base` to access np.ndarray + + +def maybe_convert_bool(ndarray[object] arr, + true_values=None, false_values=None): + cdef: + Py_ssize_t i, n + ndarray[uint8_t] result + object val + set true_vals, false_vals + int na_count = 0 + + n = len(arr) + result = np.empty(n, dtype=np.uint8) + + # the defaults + true_vals = {'True', 'TRUE', 'true'} + false_vals = {'False', 'FALSE', 'false'} + + if true_values is not None: + true_vals = true_vals | set(true_values) + + if false_values is not None: + false_vals = false_vals | set(false_values) + + for i in range(n): + val = arr[i] + + if isinstance(val, bool): + if val is True: + result[i] = 1 + else: + result[i] = 0 + elif val in true_vals: + result[i] = 1 + elif val in false_vals: + result[i] = 0 + elif isinstance(val, float): + result[i] = UINT8_MAX + na_count += 1 + else: + return arr + + if na_count > 0: + mask = result == UINT8_MAX + arr = result.view(np.bool_).astype(object) + np.putmask(arr, mask, np.nan) + return arr + else: + return result.view(np.bool_) diff --git a/pandas/_libs/ops_dispatch.pyx b/pandas/_libs/ops_dispatch.pyx new file mode 100644 index 00000000..f6ecef20 --- /dev/null +++ b/pandas/_libs/ops_dispatch.pyx @@ -0,0 +1,94 @@ +DISPATCHED_UFUNCS = { + "add", + "sub", + "mul", + "pow", + "mod", + "floordiv", + "truediv", + "divmod", + "eq", + "ne", + "lt", + "gt", + "le", + "ge", + "remainder", + "matmul", + "or", + "xor", + "and", +} +UFUNC_ALIASES = { + "subtract": "sub", + "multiply": "mul", + "floor_divide": "floordiv", + "true_divide": "truediv", + "power": "pow", + "remainder": "mod", + "divide": "div", + "equal": "eq", + "not_equal": "ne", + "less": "lt", + "less_equal": "le", + "greater": "gt", + "greater_equal": "ge", + "bitwise_or": "or", + "bitwise_and": "and", + "bitwise_xor": "xor", +} + +# For op(., Array) -> Array.__r{op}__ +REVERSED_NAMES = { + "lt": "__gt__", + "le": "__ge__", + "gt": "__lt__", + "ge": "__le__", + "eq": "__eq__", + "ne": "__ne__", +} + + +def maybe_dispatch_ufunc_to_dunder_op( + object self, object ufunc, str method, *inputs, **kwargs +): + """ + Dispatch a ufunc to the equivalent dunder method. + + Parameters + ---------- + self : ArrayLike + The array whose dunder method we dispatch to + ufunc : Callable + A NumPy ufunc + method : {'reduce', 'accumulate', 'reduceat', 'outer', 'at', '__call__'} + inputs : ArrayLike + The input arrays. + kwargs : Any + The additional keyword arguments, e.g. ``out``. + + Returns + ------- + result : Any + The result of applying the ufunc + """ + # special has the ufuncs we dispatch to the dunder op on + + op_name = ufunc.__name__ + op_name = UFUNC_ALIASES.get(op_name, op_name) + + def not_implemented(*args, **kwargs): + return NotImplemented + + if (method == "__call__" + and op_name in DISPATCHED_UFUNCS + and kwargs.get("out") is None): + if isinstance(inputs[0], type(self)): + name = f"__{op_name}__" + return getattr(self, name, not_implemented)(inputs[1]) + else: + name = REVERSED_NAMES.get(op_name, f"__r{op_name}__") + result = getattr(self, name, not_implemented)(inputs[0]) + return result + else: + return NotImplemented diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx new file mode 100644 index 00000000..ef887444 --- /dev/null +++ b/pandas/_libs/parsers.pyx @@ -0,0 +1,2218 @@ +# Copyright (c) 2012, Lambda Foundry, Inc. +# See LICENSE for the license +import bz2 +import gzip +import io +import os +import sys +import time +import warnings +import zipfile + +from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE +from errno import ENOENT + +from libc.stdlib cimport free +from libc.string cimport strncpy, strlen, strcasecmp + +import cython +from cython import Py_ssize_t + +from cpython.bytes cimport PyBytes_AsString, PyBytes_FromString +from cpython.exc cimport PyErr_Occurred, PyErr_Fetch +from cpython.object cimport PyObject +from cpython.ref cimport Py_XDECREF +from cpython.unicode cimport PyUnicode_AsUTF8String, PyUnicode_Decode + + +cdef extern from "Python.h": + object PyUnicode_FromString(char *v) + + +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, uint8_t, uint64_t, int64_t, float64_t +cnp.import_array() + +from pandas._libs.util cimport UINT64_MAX, INT64_MAX, INT64_MIN +import pandas._libs.lib as lib + +from pandas._libs.khash cimport ( + khiter_t, + kh_str_t, kh_init_str, kh_put_str, kh_exist_str, + kh_get_str, kh_destroy_str, + kh_float64_t, kh_get_float64, kh_destroy_float64, + kh_put_float64, kh_init_float64, kh_resize_float64, + kh_strbox_t, kh_put_strbox, kh_get_strbox, kh_init_strbox, + kh_destroy_strbox, + kh_str_starts_t, kh_put_str_starts_item, kh_init_str_starts, + kh_get_str_starts_item, kh_destroy_str_starts, kh_resize_str_starts) + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_integer_dtype, is_float_dtype, + is_bool_dtype, is_object_dtype, + is_datetime64_dtype, + pandas_dtype, is_extension_array_dtype) +from pandas.core.arrays import Categorical +from pandas.core.dtypes.concat import union_categoricals +import pandas.io.common as icom + +from pandas.compat import _import_lzma, _get_lzma_file +from pandas.errors import (ParserError, DtypeWarning, + EmptyDataError, ParserWarning) + +lzma = _import_lzma() + +cdef: + float64_t INF = np.inf + float64_t NEGINF = -INF + + +cdef extern from "headers/portable.h": + # I *think* this is here so that strcasecmp is defined on Windows + # so we don't get + # `parsers.obj : error LNK2001: unresolved external symbol strcasecmp` + # in Appveyor. + # In a sane world, the `from libc.string cimport` above would fail + # loudly. + pass + + +cdef extern from "parser/tokenizer.h": + + ctypedef enum ParserState: + START_RECORD + START_FIELD + ESCAPED_CHAR + IN_FIELD + IN_QUOTED_FIELD + ESCAPE_IN_QUOTED_FIELD + QUOTE_IN_QUOTED_FIELD + EAT_CRNL + EAT_CRNL_NOP + EAT_WHITESPACE + EAT_COMMENT + EAT_LINE_COMMENT + WHITESPACE_LINE + SKIP_LINE + FINISHED + + enum: ERROR_OVERFLOW + + ctypedef void* (*io_callback)(void *src, size_t nbytes, size_t *bytes_read, + int *status) + ctypedef int (*io_cleanup)(void *src) + + ctypedef struct parser_t: + void *source + io_callback cb_io + io_cleanup cb_cleanup + + int64_t chunksize # Number of bytes to prepare for each chunk + char *data # pointer to data to be processed + int64_t datalen # amount of data available + int64_t datapos + + # where to write out tokenized data + char *stream + uint64_t stream_len + uint64_t stream_cap + + # Store words in (potentially ragged) matrix for now, hmm + char **words + int64_t *word_starts # where we are in the stream + uint64_t words_len + uint64_t words_cap + uint64_t max_words_cap # maximum word cap encountered + + char *pword_start # pointer to stream start of current field + int64_t word_start # position start of current field + + int64_t *line_start # position in words for start of line + int64_t *line_fields # Number of fields in each line + uint64_t lines # Number of lines observed + uint64_t file_lines # Number of lines observed (with bad/skipped) + uint64_t lines_cap # Vector capacity + + # Tokenizing stuff + ParserState state + int doublequote # is " represented by ""? */ + char delimiter # field separator */ + int delim_whitespace # consume tabs / spaces instead + char quotechar # quote character */ + char escapechar # escape character */ + char lineterminator + int skipinitialspace # ignore spaces following delimiter? */ + int quoting # style of quoting to write */ + + char commentchar + int allow_embedded_newline + int strict # raise exception on bad CSV */ + + int usecols + + int expected_fields + int error_bad_lines + int warn_bad_lines + + # floating point options + char decimal + char sci + + # thousands separator (comma, period) + char thousands + + int header # Boolean: 1: has header, 0: no header + int64_t header_start # header row start + uint64_t header_end # header row end + + void *skipset + PyObject *skipfunc + int64_t skip_first_N_rows + int64_t skipfooter + # pick one, depending on whether the converter requires GIL + float64_t (*double_converter)(const char *, char **, + char, char, char, + int, int *, int *) nogil + + # error handling + char *warn_msg + char *error_msg + + int64_t skip_empty_lines + + ctypedef struct coliter_t: + char **words + int64_t *line_start + int64_t col + + ctypedef struct uint_state: + int seen_sint + int seen_uint + int seen_null + + void uint_state_init(uint_state *self) + int uint64_conflict(uint_state *self) + + void coliter_setup(coliter_t *it, parser_t *parser, + int64_t i, int64_t start) nogil + void COLITER_NEXT(coliter_t, const char *) nogil + + parser_t* parser_new() + + int parser_init(parser_t *self) nogil + void parser_free(parser_t *self) nogil + void parser_del(parser_t *self) nogil + int parser_add_skiprow(parser_t *self, int64_t row) + + int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) + + void parser_set_default_options(parser_t *self) + + int parser_consume_rows(parser_t *self, size_t nrows) + + int parser_trim_buffers(parser_t *self) + + int tokenize_all_rows(parser_t *self) nogil + int tokenize_nrows(parser_t *self, size_t nrows) nogil + + int64_t str_to_int64(char *p_item, int64_t int_min, + int64_t int_max, int *error, char tsep) nogil + uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max, + uint64_t uint_max, int *error, char tsep) nogil + + float64_t xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + float64_t precise_xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + float64_t round_trip(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + + int to_boolean(const char *item, uint8_t *val) nogil + + +cdef extern from "parser/io.h": + void *new_mmap(char *fname) + int del_mmap(void *src) + void* buffer_mmap_bytes(void *source, size_t nbytes, + size_t *bytes_read, int *status) + + void *new_file_source(char *fname, size_t buffer_size) + + void *new_rd_source(object obj) + + int del_file_source(void *src) + int del_rd_source(void *src) + + void* buffer_file_bytes(void *source, size_t nbytes, + size_t *bytes_read, int *status) + + void* buffer_rd_bytes(void *source, size_t nbytes, + size_t *bytes_read, int *status) + + +DEFAULT_CHUNKSIZE = 256 * 1024 + + +cdef class TextReader: + """ + + # source: StringIO or file object + + """ + + cdef: + parser_t *parser + object file_handle, na_fvalues + object true_values, false_values + object handle + bint na_filter, keep_default_na, verbose, has_usecols, has_mi_columns + uint64_t parser_start + list clocks + char *c_encoding + kh_str_starts_t *false_set + kh_str_starts_t *true_set + + cdef public: + int64_t leading_cols, table_width, skipfooter, buffer_lines + object allow_leading_cols + object delimiter, converters, delim_whitespace + object na_values + object memory_map + object header, orig_header, names, header_start, header_end + object index_col + object low_memory + object skiprows + object dtype + object encoding + object compression + object mangle_dupe_cols + object usecols + list dtype_cast_order + set unnamed_cols + set noconvert + + def __cinit__(self, source, + delimiter=b',', + + header=0, + header_start=0, + header_end=0, + index_col=None, + names=None, + + memory_map=False, + tokenize_chunksize=DEFAULT_CHUNKSIZE, + delim_whitespace=False, + + compression=None, + + converters=None, + + skipinitialspace=False, + escapechar=None, + doublequote=True, + quotechar=b'"', + quoting=0, + lineterminator=None, + + encoding=None, + + comment=None, + decimal=b'.', + thousands=None, + + dtype=None, + usecols=None, + error_bad_lines=True, + warn_bad_lines=True, + + na_filter=True, + na_values=None, + na_fvalues=None, + keep_default_na=True, + + true_values=None, + false_values=None, + allow_leading_cols=True, + low_memory=False, + skiprows=None, + skipfooter=0, + verbose=False, + mangle_dupe_cols=True, + float_precision=None, + skip_blank_lines=True): + + # set encoding for native Python and C library + if encoding is not None: + if not isinstance(encoding, bytes): + encoding = encoding.encode('utf-8') + encoding = encoding.lower() + self.c_encoding = encoding + else: + self.c_encoding = NULL + + self.encoding = encoding + + self.parser = parser_new() + self.parser.chunksize = tokenize_chunksize + + self.mangle_dupe_cols = mangle_dupe_cols + + # For timekeeping + self.clocks = [] + + self.compression = compression + self.memory_map = memory_map + + self.parser.usecols = (usecols is not None) + + self._setup_parser_source(source) + parser_set_default_options(self.parser) + + parser_init(self.parser) + + if delim_whitespace: + self.parser.delim_whitespace = delim_whitespace + else: + if len(delimiter) > 1: + raise ValueError('only length-1 separators excluded right now') + self.parser.delimiter = ord(delimiter) + + # ---------------------------------------- + # parser options + + self.parser.doublequote = doublequote + self.parser.skipinitialspace = skipinitialspace + self.parser.skip_empty_lines = skip_blank_lines + + if lineterminator is not None: + if len(lineterminator) != 1: + raise ValueError('Only length-1 line terminators supported') + self.parser.lineterminator = ord(lineterminator) + + if len(decimal) != 1: + raise ValueError('Only length-1 decimal markers supported') + self.parser.decimal = ord(decimal) + + if thousands is not None: + if len(thousands) != 1: + raise ValueError('Only length-1 thousands markers supported') + self.parser.thousands = ord(thousands) + + if escapechar is not None: + if len(escapechar) != 1: + raise ValueError('Only length-1 escapes supported') + self.parser.escapechar = ord(escapechar) + + self._set_quoting(quotechar, quoting) + + dtype_order = ['int64', 'float64', 'bool', 'object'] + if quoting == QUOTE_NONNUMERIC: + # consistent with csv module semantics, cast all to float + dtype_order = dtype_order[1:] + self.dtype_cast_order = [np.dtype(x) for x in dtype_order] + + if comment is not None: + if len(comment) > 1: + raise ValueError('Only length-1 comment characters supported') + self.parser.commentchar = ord(comment) + + # error handling of bad lines + self.parser.error_bad_lines = int(error_bad_lines) + self.parser.warn_bad_lines = int(warn_bad_lines) + + self.skiprows = skiprows + if skiprows is not None: + self._make_skiprow_set() + + self.skipfooter = skipfooter + + # suboptimal + if usecols is not None: + self.has_usecols = 1 + # GH-20558, validate usecols at higher level and only pass clean + # usecols into TextReader. + self.usecols = usecols + + # XXX + if skipfooter > 0: + self.parser.error_bad_lines = 0 + self.parser.warn_bad_lines = 0 + + self.delimiter = delimiter + self.delim_whitespace = delim_whitespace + + self.na_values = na_values + if na_fvalues is None: + na_fvalues = set() + self.na_fvalues = na_fvalues + + self.true_values = _maybe_encode(true_values) + _true_values + self.false_values = _maybe_encode(false_values) + _false_values + + self.true_set = kset_from_list(self.true_values) + self.false_set = kset_from_list(self.false_values) + + self.keep_default_na = keep_default_na + self.converters = converters + self.na_filter = na_filter + + self.verbose = verbose + self.low_memory = low_memory + + if float_precision == "round_trip": + # see gh-15140 + self.parser.double_converter = round_trip + elif float_precision == "high": + self.parser.double_converter = precise_xstrtod + else: + self.parser.double_converter = xstrtod + + if isinstance(dtype, dict): + dtype = {k: pandas_dtype(dtype[k]) + for k in dtype} + elif dtype is not None: + dtype = pandas_dtype(dtype) + + self.dtype = dtype + + # XXX + self.noconvert = set() + + self.index_col = index_col + + # ---------------------------------------- + # header stuff + + self.allow_leading_cols = allow_leading_cols + self.leading_cols = 0 + + # TODO: no header vs. header is not the first row + self.has_mi_columns = 0 + self.orig_header = header + if header is None: + # sentinel value + self.parser.header_start = -1 + self.parser.header_end = -1 + self.parser.header = -1 + self.parser_start = 0 + self.header = [] + else: + if isinstance(header, list): + if len(header) > 1: + # need to artificially skip the final line + # which is still a header line + header = list(header) + header.append(header[-1] + 1) + self.parser.header_end = header[-1] + self.has_mi_columns = 1 + else: + self.parser.header_end = header[0] + + self.parser_start = header[-1] + 1 + self.parser.header_start = header[0] + self.parser.header = header[0] + self.header = header + else: + self.parser.header_start = header + self.parser.header_end = header + self.parser_start = header + 1 + self.parser.header = header + self.header = [ header ] + + self.names = names + self.header, self.table_width, self.unnamed_cols = self._get_header() + + if not self.table_width: + raise EmptyDataError("No columns to parse from file") + + # Compute buffer_lines as function of table width. + heuristic = 2**20 // self.table_width + self.buffer_lines = 1 + while self.buffer_lines * 2 < heuristic: + self.buffer_lines *= 2 + + def __init__(self, *args, **kwargs): + pass + + def __dealloc__(self): + parser_free(self.parser) + if self.true_set: + kh_destroy_str_starts(self.true_set) + self.true_set = NULL + if self.false_set: + kh_destroy_str_starts(self.false_set) + self.false_set = NULL + parser_del(self.parser) + + def close(self): + # we need to properly close an open derived + # filehandle here, e.g. and UTFRecoder + if self.handle is not None: + self.handle.close() + + # also preemptively free all allocated memory + parser_free(self.parser) + if self.true_set: + kh_destroy_str_starts(self.true_set) + self.true_set = NULL + if self.false_set: + kh_destroy_str_starts(self.false_set) + self.false_set = NULL + + def set_error_bad_lines(self, int status): + self.parser.error_bad_lines = status + + def _set_quoting(self, quote_char, quoting): + if not isinstance(quoting, int): + raise TypeError('"quoting" must be an integer') + + if not QUOTE_MINIMAL <= quoting <= QUOTE_NONE: + raise TypeError('bad "quoting" value') + + if not isinstance(quote_char, (str, bytes)) and quote_char is not None: + dtype = type(quote_char).__name__ + raise TypeError(f'"quotechar" must be string, not {dtype}') + + if quote_char is None or quote_char == '': + if quoting != QUOTE_NONE: + raise TypeError("quotechar must be set if quoting enabled") + self.parser.quoting = quoting + self.parser.quotechar = -1 + elif len(quote_char) > 1: # 0-len case handled earlier + raise TypeError('"quotechar" must be a 1-character string') + else: + self.parser.quoting = quoting + self.parser.quotechar = ord(quote_char) + + cdef _make_skiprow_set(self): + if isinstance(self.skiprows, (int, np.integer)): + parser_set_skipfirstnrows(self.parser, self.skiprows) + elif not callable(self.skiprows): + for i in self.skiprows: + parser_add_skiprow(self.parser, i) + else: + self.parser.skipfunc = self.skiprows + + cdef _setup_parser_source(self, source): + cdef: + int status + void *ptr + + self.parser.cb_io = NULL + self.parser.cb_cleanup = NULL + + if self.compression: + if self.compression == 'gzip': + if isinstance(source, str): + source = gzip.GzipFile(source, 'rb') + else: + source = gzip.GzipFile(fileobj=source) + elif self.compression == 'bz2': + source = bz2.BZ2File(source, 'rb') + elif self.compression == 'zip': + zip_file = zipfile.ZipFile(source) + zip_names = zip_file.namelist() + + if len(zip_names) == 1: + file_name = zip_names.pop() + source = zip_file.open(file_name) + + elif len(zip_names) == 0: + raise ValueError(f'Zero files found in compressed ' + f'zip file {source}') + else: + raise ValueError(f'Multiple files found in compressed ' + f'zip file {zip_names}') + elif self.compression == 'xz': + if isinstance(source, str): + source = _get_lzma_file(lzma)(source, 'rb') + else: + source = _get_lzma_file(lzma)(filename=source) + else: + raise ValueError(f'Unrecognized compression type: ' + f'{self.compression}') + + if (self.encoding and hasattr(source, "read") and + not hasattr(source, "encoding")): + source = io.TextIOWrapper( + source, self.encoding.decode('utf-8'), newline='') + + self.encoding = b'utf-8' + self.c_encoding = self.encoding + + self.handle = source + + if isinstance(source, str): + encoding = sys.getfilesystemencoding() or "utf-8" + usource = source + source = source.encode(encoding) + + if self.memory_map: + ptr = new_mmap(source) + if ptr == NULL: + # fall back + ptr = new_file_source(source, self.parser.chunksize) + self.parser.cb_io = &buffer_file_bytes + self.parser.cb_cleanup = &del_file_source + else: + self.parser.cb_io = &buffer_mmap_bytes + self.parser.cb_cleanup = &del_mmap + else: + ptr = new_file_source(source, self.parser.chunksize) + self.parser.cb_io = &buffer_file_bytes + self.parser.cb_cleanup = &del_file_source + + if ptr == NULL: + if not os.path.exists(source): + + raise FileNotFoundError( + ENOENT, + f'File {usource} does not exist', + usource) + raise IOError('Initializing from file failed') + + self.parser.source = ptr + + elif hasattr(source, 'read'): + # e.g., StringIO + + ptr = new_rd_source(source) + if ptr == NULL: + raise IOError('Initializing parser from file-like ' + 'object failed') + + self.parser.source = ptr + self.parser.cb_io = &buffer_rd_bytes + self.parser.cb_cleanup = &del_rd_source + else: + raise IOError(f'Expected file path name or file-like object, ' + f'got {type(source)} type') + + cdef _get_header(self): + # header is now a list of lists, so field_count should use header[0] + + cdef: + Py_ssize_t i, start, field_count, passed_count, unnamed_count + char *word + object name, old_name + int status + uint64_t hr, data_line + char *errors = "strict" + StringPath path = _string_path(self.c_encoding) + + header = [] + unnamed_cols = set() + + if self.parser.header_start >= 0: + + # Header is in the file + for level, hr in enumerate(self.header): + + this_header = [] + + if self.parser.lines < hr + 1: + self._tokenize_rows(hr + 2) + + if self.parser.lines == 0: + field_count = 0 + start = self.parser.line_start[0] + + # e.g., if header=3 and file only has 2 lines + elif (self.parser.lines < hr + 1 + and not isinstance(self.orig_header, list)) or ( + self.parser.lines < hr): + msg = self.orig_header + if isinstance(msg, list): + joined = ','.join(str(m) for m in msg) + msg = f"[{joined}], len of {len(msg)}," + raise ParserError( + f'Passed header={msg} but only ' + f'{self.parser.lines} lines in file') + + else: + field_count = self.parser.line_fields[hr] + start = self.parser.line_start[hr] + + counts = {} + unnamed_count = 0 + + for i in range(field_count): + word = self.parser.words[start + i] + + if path == UTF8: + name = PyUnicode_FromString(word) + elif path == ENCODED: + name = PyUnicode_Decode(word, strlen(word), + self.c_encoding, errors) + + # We use this later when collecting placeholder names. + old_name = name + + if name == '': + if self.has_mi_columns: + name = f'Unnamed: {i}_level_{level}' + else: + name = f'Unnamed: {i}' + unnamed_count += 1 + + count = counts.get(name, 0) + + if not self.has_mi_columns and self.mangle_dupe_cols: + while count > 0: + counts[name] = count + 1 + name = f'{name}.{count}' + count = counts.get(name, 0) + + if old_name == '': + unnamed_cols.add(name) + + this_header.append(name) + counts[name] = count + 1 + + if self.has_mi_columns: + + # If we have grabbed an extra line, but it's not in our + # format, save in the buffer, and create an blank extra + # line for the rest of the parsing code. + if hr == self.header[-1]: + lc = len(this_header) + ic = (len(self.index_col) if self.index_col + is not None else 0) + + if lc != unnamed_count and lc - ic > unnamed_count: + hr -= 1 + self.parser_start -= 1 + this_header = [None] * lc + + data_line = hr + 1 + header.append(this_header) + + if self.names is not None: + header = [ self.names ] + + elif self.names is not None: + # Enforce this unless usecols + if not self.has_usecols: + self.parser.expected_fields = len(self.names) + + # Names passed + if self.parser.lines < 1: + self._tokenize_rows(1) + + header = [ self.names ] + data_line = 0 + + if self.parser.lines < 1: + field_count = len(header[0]) + else: + field_count = self.parser.line_fields[data_line] + else: + # No header passed nor to be found in the file + if self.parser.lines < 1: + self._tokenize_rows(1) + + return None, self.parser.line_fields[0], unnamed_cols + + # Corner case, not enough lines in the file + if self.parser.lines < data_line + 1: + field_count = len(header[0]) + else: # not self.has_usecols: + + field_count = self.parser.line_fields[data_line] + + # #2981 + if self.names is not None: + field_count = max(field_count, len(self.names)) + + passed_count = len(header[0]) + + if (self.has_usecols and self.allow_leading_cols and + not callable(self.usecols)): + nuse = len(self.usecols) + if nuse == passed_count: + self.leading_cols = 0 + elif self.names is None and nuse < passed_count: + self.leading_cols = field_count - passed_count + elif passed_count != field_count: + raise ValueError('Passed header names ' + 'mismatches usecols') + # oh boy, #2442, #2981 + elif self.allow_leading_cols and passed_count < field_count: + self.leading_cols = field_count - passed_count + + return header, field_count, unnamed_cols + + def read(self, rows=None): + """ + rows=None --> read all rows + """ + cdef: + int status + + if self.low_memory: + # Conserve intermediate space + columns = self._read_low_memory(rows) + else: + # Don't care about memory usage + columns = self._read_rows(rows, 1) + + return columns + + cdef _read_low_memory(self, rows): + cdef: + size_t rows_read = 0 + chunks = [] + + if rows is None: + while True: + try: + chunk = self._read_rows(self.buffer_lines, 0) + if len(chunk) == 0: + break + except StopIteration: + break + else: + chunks.append(chunk) + else: + while rows_read < rows: + try: + crows = min(self.buffer_lines, rows - rows_read) + + chunk = self._read_rows(crows, 0) + if len(chunk) == 0: + break + + rows_read += len(list(chunk.values())[0]) + except StopIteration: + break + else: + chunks.append(chunk) + + parser_trim_buffers(self.parser) + + if len(chunks) == 0: + raise StopIteration + + # destructive to chunks + return _concatenate_chunks(chunks) + + cdef _tokenize_rows(self, size_t nrows): + cdef int status + with nogil: + status = tokenize_nrows(self.parser, nrows) + + if self.parser.warn_msg != NULL: + print(self.parser.warn_msg, file=sys.stderr) + free(self.parser.warn_msg) + self.parser.warn_msg = NULL + + if status < 0: + raise_parser_error('Error tokenizing data', self.parser) + + cdef _read_rows(self, rows, bint trim): + cdef: + int64_t buffered_lines + int64_t irows, footer = 0 + + self._start_clock() + + if rows is not None: + irows = rows + buffered_lines = self.parser.lines - self.parser_start + if buffered_lines < irows: + self._tokenize_rows(irows - buffered_lines) + + if self.skipfooter > 0: + raise ValueError('skipfooter can only be used to read ' + 'the whole file') + else: + with nogil: + status = tokenize_all_rows(self.parser) + + if self.parser.warn_msg != NULL: + print(self.parser.warn_msg, file=sys.stderr) + free(self.parser.warn_msg) + self.parser.warn_msg = NULL + + if status < 0: + raise_parser_error('Error tokenizing data', self.parser) + footer = self.skipfooter + + if self.parser_start >= self.parser.lines: + raise StopIteration + self._end_clock('Tokenization') + + self._start_clock() + columns = self._convert_column_data(rows=rows, + footer=footer, + upcast_na=True) + self._end_clock('Type conversion') + self._start_clock() + if len(columns) > 0: + rows_read = len(list(columns.values())[0]) + # trim + parser_consume_rows(self.parser, rows_read) + if trim: + parser_trim_buffers(self.parser) + self.parser_start -= rows_read + + self._end_clock('Parser memory cleanup') + + return columns + + cdef _start_clock(self): + self.clocks.append(time.time()) + + cdef _end_clock(self, what): + if self.verbose: + elapsed = time.time() - self.clocks.pop(-1) + print(f'{what} took: {elapsed * 1000:.2f} ms') + + def set_noconvert(self, i): + self.noconvert.add(i) + + def remove_noconvert(self, i): + self.noconvert.remove(i) + + def _convert_column_data(self, rows=None, upcast_na=False, footer=0): + cdef: + int64_t i + int nused + kh_str_starts_t *na_hashset = NULL + int64_t start, end + object name, na_flist, col_dtype = None + bint na_filter = 0 + int64_t num_cols + + start = self.parser_start + + if rows is None: + end = self.parser.lines + else: + end = min(start + rows, self.parser.lines) + + # FIXME: dont leave commented-out + # # skip footer + # if footer > 0: + # end -= footer + + num_cols = -1 + # Py_ssize_t cast prevents build warning + for i in range(self.parser.lines): + num_cols = (num_cols < self.parser.line_fields[i]) * \ + self.parser.line_fields[i] + \ + (num_cols >= self.parser.line_fields[i]) * num_cols + + if self.table_width - self.leading_cols > num_cols: + raise ParserError(f"Too many columns specified: expected " + f"{self.table_width - self.leading_cols} " + f"and found {num_cols}") + + results = {} + nused = 0 + for i in range(self.table_width): + if i < self.leading_cols: + # Pass through leading columns always + name = i + elif (self.usecols and not callable(self.usecols) and + nused == len(self.usecols)): + # Once we've gathered all requested columns, stop. GH5766 + break + else: + name = self._get_column_name(i, nused) + usecols = set() + if callable(self.usecols): + if self.usecols(name): + usecols = {i} + else: + usecols = self.usecols + if self.has_usecols and not (i in usecols or + name in usecols): + continue + nused += 1 + + conv = self._get_converter(i, name) + + col_dtype = None + if self.dtype is not None: + if isinstance(self.dtype, dict): + if name in self.dtype: + col_dtype = self.dtype[name] + elif i in self.dtype: + col_dtype = self.dtype[i] + else: + if self.dtype.names: + # structured array + col_dtype = np.dtype(self.dtype.descr[i][1]) + else: + col_dtype = self.dtype + + if conv: + if col_dtype is not None: + warnings.warn((f"Both a converter and dtype were specified " + f"for column {name} - only the converter will " + f"be used"), ParserWarning, + stacklevel=5) + results[i] = _apply_converter(conv, self.parser, i, start, end, + self.c_encoding) + continue + + # Collect the list of NaN values associated with the column. + # If we aren't supposed to do that, or none are collected, + # we set `na_filter` to `0` (`1` otherwise). + na_flist = set() + + if self.na_filter: + na_list, na_flist = self._get_na_list(i, name) + if na_list is None: + na_filter = 0 + else: + na_filter = 1 + na_hashset = kset_from_list(na_list) + else: + na_filter = 0 + + # Attempt to parse tokens and infer dtype of the column. + # Should return as the desired dtype (inferred or specified). + try: + col_res, na_count = self._convert_tokens( + i, start, end, name, na_filter, na_hashset, + na_flist, col_dtype) + finally: + # gh-21353 + # + # Cleanup the NaN hash that we generated + # to avoid memory leaks. + if na_filter: + self._free_na_set(na_hashset) + + # don't try to upcast EAs + try_upcast = upcast_na and na_count > 0 + if try_upcast and not is_extension_array_dtype(col_dtype): + col_res = _maybe_upcast(col_res) + + if col_res is None: + raise ParserError(f'Unable to parse column {i}') + + results[i] = col_res + + self.parser_start += end - start + + return results + + cdef inline _convert_tokens(self, Py_ssize_t i, int start, int end, + object name, bint na_filter, + kh_str_starts_t *na_hashset, + object na_flist, object col_dtype): + + if col_dtype is not None: + col_res, na_count = self._convert_with_dtype( + col_dtype, i, start, end, na_filter, + 1, na_hashset, na_flist) + + # Fallback on the parse (e.g. we requested int dtype, + # but its actually a float). + if col_res is not None: + return col_res, na_count + + if i in self.noconvert: + return self._string_convert(i, start, end, na_filter, na_hashset) + else: + col_res = None + for dt in self.dtype_cast_order: + try: + col_res, na_count = self._convert_with_dtype( + dt, i, start, end, na_filter, 0, na_hashset, na_flist) + except ValueError: + # This error is raised from trying to convert to uint64, + # and we discover that we cannot convert to any numerical + # dtype successfully. As a result, we leave the data + # column AS IS with object dtype. + col_res, na_count = self._convert_with_dtype( + np.dtype('object'), i, start, end, 0, + 0, na_hashset, na_flist) + except OverflowError: + col_res, na_count = self._convert_with_dtype( + np.dtype('object'), i, start, end, na_filter, + 0, na_hashset, na_flist) + + if col_res is not None: + break + + # we had a fallback parse on the dtype, so now try to cast + # only allow safe casts, eg. with a nan you cannot safely cast to int + if col_res is not None and col_dtype is not None: + try: + col_res = col_res.astype(col_dtype, casting='safe') + except TypeError: + + # float -> int conversions can fail the above + # even with no nans + col_res_orig = col_res + col_res = col_res.astype(col_dtype) + if (col_res != col_res_orig).any(): + raise ValueError( + f"cannot safely convert passed user dtype of " + f"{col_dtype} for {col_res_orig.dtype.name} dtyped data in " + f"column {i}") + + return col_res, na_count + + cdef _convert_with_dtype(self, object dtype, Py_ssize_t i, + int64_t start, int64_t end, + bint na_filter, + bint user_dtype, + kh_str_starts_t *na_hashset, + object na_flist): + if is_categorical_dtype(dtype): + # TODO: I suspect that _categorical_convert could be + # optimized when dtype is an instance of CategoricalDtype + codes, cats, na_count = _categorical_convert( + self.parser, i, start, end, na_filter, + na_hashset, self.c_encoding) + + # Method accepts list of strings, not encoded ones. + true_values = [x.decode() for x in self.true_values] + cat = Categorical._from_inferred_categories( + cats, codes, dtype, true_values=true_values) + return cat, na_count + + elif is_extension_array_dtype(dtype): + result, na_count = self._string_convert(i, start, end, na_filter, + na_hashset) + array_type = dtype.construct_array_type() + try: + # use _from_sequence_of_strings if the class defines it + result = array_type._from_sequence_of_strings(result, + dtype=dtype) + except NotImplementedError: + raise NotImplementedError( + f"Extension Array: {array_type} must implement " + f"_from_sequence_of_strings in order " + f"to be used in parser methods") + + return result, na_count + + elif is_integer_dtype(dtype): + try: + result, na_count = _try_int64(self.parser, i, start, + end, na_filter, na_hashset) + if user_dtype and na_count is not None: + if na_count > 0: + raise ValueError(f"Integer column has NA values in column {i}") + except OverflowError: + result = _try_uint64(self.parser, i, start, end, + na_filter, na_hashset) + na_count = 0 + + if result is not None and dtype != 'int64': + result = result.astype(dtype) + + return result, na_count + + elif is_float_dtype(dtype): + result, na_count = _try_double(self.parser, i, start, end, + na_filter, na_hashset, na_flist) + + if result is not None and dtype != 'float64': + result = result.astype(dtype) + return result, na_count + elif is_bool_dtype(dtype): + result, na_count = _try_bool_flex(self.parser, i, start, end, + na_filter, na_hashset, + self.true_set, self.false_set) + if user_dtype and na_count is not None: + if na_count > 0: + raise ValueError(f"Bool column has NA values in column {i}") + return result, na_count + + elif dtype.kind == 'S': + # TODO: na handling + width = dtype.itemsize + if width > 0: + result = _to_fw_string(self.parser, i, start, end, width) + return result, 0 + + # treat as a regular string parsing + return self._string_convert(i, start, end, na_filter, + na_hashset) + elif dtype.kind == 'U': + width = dtype.itemsize + if width > 0: + raise TypeError(f"the dtype {dtype} is not supported for parsing") + + # unicode variable width + return self._string_convert(i, start, end, na_filter, + na_hashset) + elif is_object_dtype(dtype): + return self._string_convert(i, start, end, na_filter, + na_hashset) + elif is_datetime64_dtype(dtype): + raise TypeError(f"the dtype {dtype} is not supported " + f"for parsing, pass this column " + f"using parse_dates instead") + else: + raise TypeError(f"the dtype {dtype} is not supported for parsing") + + cdef _string_convert(self, Py_ssize_t i, int64_t start, int64_t end, + bint na_filter, kh_str_starts_t *na_hashset): + + cdef StringPath path = _string_path(self.c_encoding) + + if path == UTF8: + return _string_box_utf8(self.parser, i, start, end, na_filter, + na_hashset) + elif path == ENCODED: + return _string_box_decode(self.parser, i, start, end, + na_filter, na_hashset, self.c_encoding) + + def _get_converter(self, i, name): + if self.converters is None: + return None + + if name is not None and name in self.converters: + return self.converters[name] + + # Converter for position, if any + return self.converters.get(i) + + cdef _get_na_list(self, i, name): + if self.na_values is None: + return None, set() + + if isinstance(self.na_values, dict): + key = None + values = None + + if name is not None and name in self.na_values: + key = name + elif i in self.na_values: + key = i + else: # No na_values provided for this column. + if self.keep_default_na: + return _NA_VALUES, set() + + return list(), set() + + values = self.na_values[key] + if values is not None and not isinstance(values, list): + values = list(values) + + fvalues = self.na_fvalues[key] + if fvalues is not None and not isinstance(fvalues, set): + fvalues = set(fvalues) + + return _ensure_encoded(values), fvalues + else: + if not isinstance(self.na_values, list): + self.na_values = list(self.na_values) + if not isinstance(self.na_fvalues, set): + self.na_fvalues = set(self.na_fvalues) + + return _ensure_encoded(self.na_values), self.na_fvalues + + cdef _free_na_set(self, kh_str_starts_t *table): + kh_destroy_str_starts(table) + + cdef _get_column_name(self, Py_ssize_t i, Py_ssize_t nused): + cdef int64_t j + if self.has_usecols and self.names is not None: + if (not callable(self.usecols) and + len(self.names) == len(self.usecols)): + return self.names[nused] + else: + return self.names[i - self.leading_cols] + else: + if self.header is not None: + j = i - self.leading_cols + # hack for #2442 + if j == len(self.header[0]): + return j + else: + return self.header[0][j] + else: + return None + + +cdef: + object _true_values = [b'True', b'TRUE', b'true'] + object _false_values = [b'False', b'FALSE', b'false'] + + +def _ensure_encoded(list lst): + cdef list result = [] + for x in lst: + if isinstance(x, str): + x = PyUnicode_AsUTF8String(x) + elif not isinstance(x, bytes): + x = str(x).encode('utf-8') + + result.append(x) + return result + + +# common NA values +# no longer excluding inf representations +# '1.#INF','-1.#INF', '1.#INF000000', +STR_NA_VALUES = { + "-1.#IND", + "1.#QNAN", + "1.#IND", + "-1.#QNAN", + "#N/A N/A", + "#N/A", + "N/A", + "n/a", + "NA", + "", + "#NA", + "NULL", + "null", + "NaN", + "-NaN", + "nan", + "-nan", + "", +} +_NA_VALUES = _ensure_encoded(list(STR_NA_VALUES)) + + +def _maybe_upcast(arr): + """ + + """ + if issubclass(arr.dtype.type, np.integer): + na_value = na_values[arr.dtype] + arr = arr.astype(float) + np.putmask(arr, arr == na_value, np.nan) + elif arr.dtype == np.bool_: + mask = arr.view(np.uint8) == na_values[np.uint8] + arr = arr.astype(object) + np.putmask(arr, mask, np.nan) + + return arr + + +cdef enum StringPath: + UTF8 + ENCODED + + +# factored out logic to pick string converter +cdef inline StringPath _string_path(char *encoding): + if encoding != NULL and encoding != b"utf-8": + return ENCODED + return UTF8 + + +# ---------------------------------------------------------------------- +# Type conversions / inference support code + + +cdef _string_box_utf8(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset): + cdef: + int error, na_count = 0 + Py_ssize_t i, lines + coliter_t it + const char *word = NULL + ndarray[object] result + + int ret = 0 + kh_strbox_t *table + + object pyval + + object NA = na_values[np.object_] + khiter_t k + + table = kh_init_strbox() + lines = line_end - line_start + result = np.empty(lines, dtype=np.object_) + coliter_setup(&it, parser, col, line_start) + + for i in range(lines): + COLITER_NEXT(it, word) + + if na_filter: + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count += 1 + result[i] = NA + continue + + k = kh_get_strbox(table, word) + + # in the hash table + if k != table.n_buckets: + # this increments the refcount, but need to test + pyval = table.vals[k] + else: + # box it. new ref? + pyval = PyUnicode_FromString(word) + + k = kh_put_strbox(table, word, &ret) + table.vals[k] = pyval + + result[i] = pyval + + kh_destroy_strbox(table) + + return result, na_count + + +cdef _string_box_decode(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset, + char *encoding): + cdef: + int error, na_count = 0 + Py_ssize_t i, size, lines + coliter_t it + const char *word = NULL + ndarray[object] result + + int ret = 0 + kh_strbox_t *table + + char *errors = "strict" + + object pyval + + object NA = na_values[np.object_] + khiter_t k + + table = kh_init_strbox() + lines = line_end - line_start + result = np.empty(lines, dtype=np.object_) + coliter_setup(&it, parser, col, line_start) + + for i in range(lines): + COLITER_NEXT(it, word) + + if na_filter: + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count += 1 + result[i] = NA + continue + + k = kh_get_strbox(table, word) + + # in the hash table + if k != table.n_buckets: + # this increments the refcount, but need to test + pyval = table.vals[k] + else: + # box it. new ref? + size = strlen(word) + pyval = PyUnicode_Decode(word, size, encoding, errors) + + k = kh_put_strbox(table, word, &ret) + table.vals[k] = pyval + + result[i] = pyval + + kh_destroy_strbox(table) + + return result, na_count + + +@cython.boundscheck(False) +cdef _categorical_convert(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset, + char *encoding): + "Convert column data into codes, categories" + cdef: + int error, na_count = 0 + Py_ssize_t i, size, lines + coliter_t it + const char *word = NULL + + int64_t NA = -1 + int64_t[:] codes + int64_t current_category = 0 + + char *errors = "strict" + StringPath path = _string_path(encoding) + + int ret = 0 + kh_str_t *table + khiter_t k + + lines = line_end - line_start + codes = np.empty(lines, dtype=np.int64) + + # factorize parsed values, creating a hash table + # bytes -> category code + with nogil: + table = kh_init_str() + coliter_setup(&it, parser, col, line_start) + + for i in range(lines): + COLITER_NEXT(it, word) + + if na_filter: + if kh_get_str_starts_item(na_hashset, word): + # is in NA values + na_count += 1 + codes[i] = NA + continue + + k = kh_get_str(table, word) + # not in the hash table + if k == table.n_buckets: + k = kh_put_str(table, word, &ret) + table.vals[k] = current_category + current_category += 1 + + codes[i] = table.vals[k] + + # parse and box categories to python strings + result = np.empty(table.n_occupied, dtype=np.object_) + if path == ENCODED: + for k in range(table.n_buckets): + if kh_exist_str(table, k): + size = strlen(table.keys[k]) + result[table.vals[k]] = PyUnicode_Decode( + table.keys[k], size, encoding, errors) + elif path == UTF8: + for k in range(table.n_buckets): + if kh_exist_str(table, k): + result[table.vals[k]] = PyUnicode_FromString(table.keys[k]) + + kh_destroy_str(table) + return np.asarray(codes), result, na_count + + +cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start, + int64_t line_end, int64_t width): + cdef: + Py_ssize_t i + coliter_t it + const char *word = NULL + char *data + ndarray result + + result = np.empty(line_end - line_start, dtype=f'|S{width}') + data = result.data + + with nogil: + _to_fw_string_nogil(parser, col, line_start, line_end, width, data) + + return result + + +cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + size_t width, char *data) nogil: + cdef: + int64_t i + coliter_t it + const char *word = NULL + + coliter_setup(&it, parser, col, line_start) + + for i in range(line_end - line_start): + COLITER_NEXT(it, word) + strncpy(data, word, width) + data += width + + +cdef: + char* cinf = b'inf' + char* cposinf = b'+inf' + char* cneginf = b'-inf' + + char* cinfty = b'Infinity' + char* cposinfty = b'+Infinity' + char* cneginfty = b'-Infinity' + + +cdef _try_double(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset, object na_flist): + cdef: + int error, na_count = 0 + Py_ssize_t i, lines + coliter_t it + const char *word = NULL + char *p_end + float64_t *data + float64_t NA = na_values[np.float64] + kh_float64_t *na_fset + ndarray result + khiter_t k + bint use_na_flist = len(na_flist) > 0 + + lines = line_end - line_start + result = np.empty(lines, dtype=np.float64) + data = result.data + na_fset = kset_float64_from_list(na_flist) + with nogil: + error = _try_double_nogil(parser, parser.double_converter, + col, line_start, line_end, + na_filter, na_hashset, use_na_flist, + na_fset, NA, data, &na_count) + + kh_destroy_float64(na_fset) + if error != 0: + return None, None + return result, na_count + + +cdef inline int _try_double_nogil(parser_t *parser, + float64_t (*double_converter)( + const char *, char **, char, + char, char, int, int *, int *) nogil, + int col, int line_start, int line_end, + bint na_filter, kh_str_starts_t *na_hashset, + bint use_na_flist, + const kh_float64_t *na_flist, + float64_t NA, float64_t *data, + int *na_count) nogil: + cdef: + int error = 0, + Py_ssize_t i, lines = line_end - line_start + coliter_t it + const char *word = NULL + char *p_end + khiter_t k, k64 + + na_count[0] = 0 + coliter_setup(&it, parser, col, line_start) + + if na_filter: + for i in range(lines): + COLITER_NEXT(it, word) + + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count[0] += 1 + data[0] = NA + else: + data[0] = double_converter(word, &p_end, parser.decimal, + parser.sci, parser.thousands, + 1, &error, NULL) + if error != 0 or p_end == word or p_end[0]: + error = 0 + if (strcasecmp(word, cinf) == 0 or + strcasecmp(word, cposinf) == 0 or + strcasecmp(word, cinfty) == 0 or + strcasecmp(word, cposinfty) == 0): + data[0] = INF + elif (strcasecmp(word, cneginf) == 0 or + strcasecmp(word, cneginfty) == 0 ): + data[0] = NEGINF + else: + return 1 + if use_na_flist: + k64 = kh_get_float64(na_flist, data[0]) + if k64 != na_flist.n_buckets: + na_count[0] += 1 + data[0] = NA + data += 1 + else: + for i in range(lines): + COLITER_NEXT(it, word) + data[0] = double_converter(word, &p_end, parser.decimal, + parser.sci, parser.thousands, + 1, &error, NULL) + if error != 0 or p_end == word or p_end[0]: + error = 0 + if (strcasecmp(word, cinf) == 0 or + strcasecmp(word, cposinf) == 0 or + strcasecmp(word, cinfty) == 0 or + strcasecmp(word, cposinfty) == 0): + data[0] = INF + elif (strcasecmp(word, cneginf) == 0 or + strcasecmp(word, cneginfty) == 0): + data[0] = NEGINF + else: + return 1 + data += 1 + + return 0 + + +cdef _try_uint64(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset): + cdef: + int error + Py_ssize_t i, lines + coliter_t it + uint64_t *data + ndarray result + khiter_t k + uint_state state + + lines = line_end - line_start + result = np.empty(lines, dtype=np.uint64) + data = result.data + + uint_state_init(&state) + coliter_setup(&it, parser, col, line_start) + with nogil: + error = _try_uint64_nogil(parser, col, line_start, line_end, + na_filter, na_hashset, data, &state) + if error != 0: + if error == ERROR_OVERFLOW: + # Can't get the word variable + raise OverflowError('Overflow') + return None + + if uint64_conflict(&state): + raise ValueError('Cannot convert to numerical dtype') + + if state.seen_sint: + raise OverflowError('Overflow') + + return result + + +cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, + const kh_str_starts_t *na_hashset, + uint64_t *data, uint_state *state) nogil: + cdef: + int error + Py_ssize_t i, lines = line_end - line_start + coliter_t it + const char *word = NULL + khiter_t k + + coliter_setup(&it, parser, col, line_start) + + if na_filter: + for i in range(lines): + COLITER_NEXT(it, word) + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + state.seen_null = 1 + data[i] = 0 + continue + + data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX, + &error, parser.thousands) + if error != 0: + return error + else: + for i in range(lines): + COLITER_NEXT(it, word) + data[i] = str_to_uint64(state, word, INT64_MAX, UINT64_MAX, + &error, parser.thousands) + if error != 0: + return error + + return 0 + + +cdef _try_int64(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset): + cdef: + int error, na_count = 0 + Py_ssize_t i, lines + coliter_t it + int64_t *data + ndarray result + + int64_t NA = na_values[np.int64] + khiter_t k + + lines = line_end - line_start + result = np.empty(lines, dtype=np.int64) + data = result.data + coliter_setup(&it, parser, col, line_start) + with nogil: + error = _try_int64_nogil(parser, col, line_start, line_end, + na_filter, na_hashset, NA, data, &na_count) + if error != 0: + if error == ERROR_OVERFLOW: + # Can't get the word variable + raise OverflowError('Overflow') + return None, None + + return result, na_count + + +cdef inline int _try_int64_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, + const kh_str_starts_t *na_hashset, int64_t NA, + int64_t *data, int *na_count) nogil: + cdef: + int error + Py_ssize_t i, lines = line_end - line_start + coliter_t it + const char *word = NULL + khiter_t k + + na_count[0] = 0 + coliter_setup(&it, parser, col, line_start) + + if na_filter: + for i in range(lines): + COLITER_NEXT(it, word) + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count[0] += 1 + data[i] = NA + continue + + data[i] = str_to_int64(word, INT64_MIN, INT64_MAX, + &error, parser.thousands) + if error != 0: + return error + else: + for i in range(lines): + COLITER_NEXT(it, word) + data[i] = str_to_int64(word, INT64_MIN, INT64_MAX, + &error, parser.thousands) + if error != 0: + return error + + return 0 + + +cdef _try_bool_flex(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + bint na_filter, const kh_str_starts_t *na_hashset, + const kh_str_starts_t *true_hashset, + const kh_str_starts_t *false_hashset): + cdef: + int error, na_count = 0 + Py_ssize_t i, lines + coliter_t it + const char *word = NULL + uint8_t *data + ndarray result + + uint8_t NA = na_values[np.bool_] + khiter_t k + + lines = line_end - line_start + result = np.empty(lines, dtype=np.uint8) + data = result.data + with nogil: + error = _try_bool_flex_nogil(parser, col, line_start, line_end, + na_filter, na_hashset, true_hashset, + false_hashset, NA, data, &na_count) + if error != 0: + return None, None + return result.view(np.bool_), na_count + + +cdef inline int _try_bool_flex_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, + const kh_str_starts_t *na_hashset, + const kh_str_starts_t *true_hashset, + const kh_str_starts_t *false_hashset, + uint8_t NA, uint8_t *data, + int *na_count) nogil: + cdef: + int error = 0 + Py_ssize_t i, lines = line_end - line_start + coliter_t it + const char *word = NULL + khiter_t k + + na_count[0] = 0 + coliter_setup(&it, parser, col, line_start) + + if na_filter: + for i in range(lines): + COLITER_NEXT(it, word) + + if kh_get_str_starts_item(na_hashset, word): + # in the hash table + na_count[0] += 1 + data[0] = NA + data += 1 + continue + + if kh_get_str_starts_item(true_hashset, word): + data[0] = 1 + data += 1 + continue + if kh_get_str_starts_item(false_hashset, word): + data[0] = 0 + data += 1 + continue + + error = to_boolean(word, data) + if error != 0: + return error + data += 1 + else: + for i in range(lines): + COLITER_NEXT(it, word) + + if kh_get_str_starts_item(true_hashset, word): + data[0] = 1 + data += 1 + continue + + if kh_get_str_starts_item(false_hashset, word): + data[0] = 0 + data += 1 + continue + + error = to_boolean(word, data) + if error != 0: + return error + data += 1 + + return 0 + + +cdef kh_str_starts_t* kset_from_list(list values) except NULL: + # caller takes responsibility for freeing the hash table + cdef: + Py_ssize_t i + khiter_t k + kh_str_starts_t *table + int ret = 0 + + object val + + table = kh_init_str_starts() + + for i in range(len(values)): + val = values[i] + + # None creeps in sometimes, which isn't possible here + if not isinstance(val, bytes): + kh_destroy_str_starts(table) + raise ValueError('Must be all encoded bytes') + + kh_put_str_starts_item(table, PyBytes_AsString(val), &ret) + + if table.table.n_buckets <= 128: + # Resize the hash table to make it almost empty, this + # reduces amount of hash collisions on lookup thus + # "key not in table" case is faster. + # Note that this trades table memory footprint for lookup speed. + kh_resize_str_starts(table, table.table.n_buckets * 8) + + return table + + +cdef kh_float64_t* kset_float64_from_list(values) except NULL: + # caller takes responsibility for freeing the hash table + cdef: + Py_ssize_t i + khiter_t k + kh_float64_t *table + int ret = 0 + float64_t val + object value + + table = kh_init_float64() + + for value in values: + val = float(value) + + k = kh_put_float64(table, val, &ret) + + if table.n_buckets <= 128: + # See reasoning in kset_from_list + kh_resize_float64(table, table.n_buckets * 8) + return table + + +cdef raise_parser_error(object base, parser_t *parser): + cdef: + object old_exc + object exc_type + PyObject *type + PyObject *value + PyObject *traceback + + if PyErr_Occurred(): + PyErr_Fetch(&type, &value, &traceback) + Py_XDECREF(traceback) + + if value != NULL: + old_exc = value + Py_XDECREF(value) + + # PyErr_Fetch only returned the error message in *value, + # so the Exception class must be extracted from *type. + if isinstance(old_exc, str): + if type != NULL: + exc_type = type + else: + exc_type = ParserError + + Py_XDECREF(type) + raise exc_type(old_exc) + else: + Py_XDECREF(type) + raise old_exc + + message = f'{base}. C error: ' + if parser.error_msg != NULL: + message += parser.error_msg.decode('utf-8') + else: + message += 'no error message set' + + raise ParserError(message) + + +def _concatenate_chunks(list chunks): + cdef: + list names = list(chunks[0].keys()) + object name + list warning_columns + object warning_names + object common_type + + result = {} + warning_columns = list() + for name in names: + arrs = [chunk.pop(name) for chunk in chunks] + # Check each arr for consistent types. + dtypes = {a.dtype for a in arrs} + numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)} + if len(numpy_dtypes) > 1: + common_type = np.find_common_type(numpy_dtypes, []) + if common_type == np.object: + warning_columns.append(str(name)) + + dtype = dtypes.pop() + if is_categorical_dtype(dtype): + sort_categories = isinstance(dtype, str) + result[name] = union_categoricals(arrs, + sort_categories=sort_categories) + else: + if is_extension_array_dtype(dtype): + array_type = dtype.construct_array_type() + result[name] = array_type._concat_same_type(arrs) + else: + result[name] = np.concatenate(arrs) + + if warning_columns: + warning_names = ','.join(warning_columns) + warning_message = " ".join([ + f"Columns ({warning_names}) have mixed types." + f"Specify dtype option on import or set low_memory=False." + ]) + warnings.warn(warning_message, DtypeWarning, stacklevel=8) + return result + + +# ---------------------------------------------------------------------- +# NA values +def _compute_na_values(): + int64info = np.iinfo(np.int64) + int32info = np.iinfo(np.int32) + int16info = np.iinfo(np.int16) + int8info = np.iinfo(np.int8) + uint64info = np.iinfo(np.uint64) + uint32info = np.iinfo(np.uint32) + uint16info = np.iinfo(np.uint16) + uint8info = np.iinfo(np.uint8) + na_values = { + np.float64: np.nan, + np.int64: int64info.min, + np.int32: int32info.min, + np.int16: int16info.min, + np.int8: int8info.min, + np.uint64: uint64info.max, + np.uint32: uint32info.max, + np.uint16: uint16info.max, + np.uint8: uint8info.max, + np.bool_: uint8info.max, + np.object_: np.nan # oof + } + return na_values + + +na_values = _compute_na_values() + +for k in list(na_values): + na_values[np.dtype(k)] = na_values[k] + + +cdef _apply_converter(object f, parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + char* c_encoding): + cdef: + int error + Py_ssize_t i, lines + coliter_t it + const char *word = NULL + char *errors = "strict" + ndarray[object] result + object val + + lines = line_end - line_start + result = np.empty(lines, dtype=np.object_) + + coliter_setup(&it, parser, col, line_start) + + if c_encoding == NULL or c_encoding == b'utf-8': + for i in range(lines): + COLITER_NEXT(it, word) + val = PyUnicode_FromString(word) + result[i] = f(val) + else: + for i in range(lines): + COLITER_NEXT(it, word) + val = PyUnicode_Decode(word, strlen(word), + c_encoding, errors) + result[i] = f(val) + + return lib.maybe_convert_objects(result) + + +def _maybe_encode(values): + if values is None: + return [] + return [x.encode('utf-8') if isinstance(x, str) else x for x in values] + + +def sanitize_objects(ndarray[object] values, set na_values, + convert_empty=True): + """ + Convert specified values, including the given set na_values and empty + strings if convert_empty is True, to np.nan. + + Parameters + ---------- + values : ndarray[object] + na_values : set + convert_empty : bool (default True) + """ + cdef: + Py_ssize_t i, n + object val, onan + Py_ssize_t na_count = 0 + dict memo = {} + + n = len(values) + onan = np.nan + + for i in range(n): + val = values[i] + if (convert_empty and val == '') or (val in na_values): + values[i] = onan + na_count += 1 + elif val in memo: + values[i] = memo[val] + else: + memo[val] = val + + return na_count diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx new file mode 100644 index 00000000..85711978 --- /dev/null +++ b/pandas/_libs/properties.pyx @@ -0,0 +1,67 @@ +from cython import Py_ssize_t + +from cpython.dict cimport ( + PyDict_Contains, PyDict_GetItem, PyDict_SetItem) + + +cdef class CachedProperty: + + cdef readonly: + object func, name, __doc__ + + def __init__(self, func): + self.func = func + self.name = func.__name__ + self.__doc__ = getattr(func, '__doc__', None) + + def __get__(self, obj, typ): + if obj is None: + # accessed on the class, not the instance + return self + + # Get the cache or set a default one if needed + cache = getattr(obj, '_cache', None) + if cache is None: + try: + cache = obj._cache = {} + except (AttributeError): + return self + + if PyDict_Contains(cache, self.name): + # not necessary to Py_INCREF + val = PyDict_GetItem(cache, self.name) + else: + val = self.func(obj) + PyDict_SetItem(cache, self.name, val) + return val + + def __set__(self, obj, value): + raise AttributeError("Can't set attribute") + + +cache_readonly = CachedProperty + + +cdef class AxisProperty: + + cdef readonly: + Py_ssize_t axis + object __doc__ + + def __init__(self, axis=0, doc=""): + self.axis = axis + self.__doc__ = doc + + def __get__(self, obj, type): + cdef: + list axes + + if obj is None: + # Only instances have _data, not classes + return self + else: + axes = obj._data.axes + return axes[self.axis] + + def __set__(self, obj, value): + obj._set_axis(self.axis, value) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx new file mode 100644 index 00000000..2e0e4152 --- /dev/null +++ b/pandas/_libs/reduction.pyx @@ -0,0 +1,620 @@ +from copy import copy +from distutils.version import LooseVersion + +from cython import Py_ssize_t +from cpython.ref cimport Py_INCREF + +from libc.stdlib cimport malloc, free + +import numpy as np +cimport numpy as cnp +from numpy cimport (ndarray, + int64_t, + PyArray_SETITEM, + PyArray_ITER_NEXT, PyArray_ITER_DATA, PyArray_IterNew, + flatiter) +cnp.import_array() + +cimport pandas._libs.util as util +from pandas._libs.lib import maybe_convert_objects, is_scalar + + +cdef _check_result_array(object obj, Py_ssize_t cnt): + + if (util.is_array(obj) or + (isinstance(obj, list) and len(obj) == cnt) or + getattr(obj, 'shape', None) == (cnt,)): + raise ValueError('Function does not reduce') + + +cdef class Reducer: + """ + Performs generic reduction operation on a C or Fortran-contiguous ndarray + while avoiding ndarray construction overhead + """ + cdef: + Py_ssize_t increment, chunksize, nresults + object dummy, f, labels, typ, ityp, index + ndarray arr + + def __init__(self, ndarray arr, object f, axis=1, dummy=None, labels=None): + n, k = (arr).shape + + if axis == 0: + if not arr.flags.f_contiguous: + arr = arr.copy('F') + + self.nresults = k + self.chunksize = n + self.increment = n * arr.dtype.itemsize + else: + if not arr.flags.c_contiguous: + arr = arr.copy('C') + + self.nresults = n + self.chunksize = k + self.increment = k * arr.dtype.itemsize + + self.f = f + self.arr = arr + self.labels = labels + self.dummy, self.typ, self.index, self.ityp = self._check_dummy( + dummy=dummy) + + cdef _check_dummy(self, dummy=None): + cdef: + object index = None, typ = None, ityp = None + + if dummy is None: + dummy = np.empty(self.chunksize, dtype=self.arr.dtype) + + # our ref is stolen later since we are creating this array + # in cython, so increment first + Py_INCREF(dummy) + + else: + + # we passed a Series + typ = type(dummy) + index = dummy.index + dummy = dummy.values + + if dummy.dtype != self.arr.dtype: + raise ValueError('Dummy array must be same dtype') + if len(dummy) != self.chunksize: + raise ValueError(f'Dummy array must be length {self.chunksize}') + + return dummy, typ, index, ityp + + def get_result(self): + cdef: + char* dummy_buf + ndarray arr, result, chunk + Py_ssize_t i + flatiter it + object res, name, labels + object cached_typ = None + + arr = self.arr + chunk = self.dummy + dummy_buf = chunk.data + chunk.data = arr.data + labels = self.labels + + result = np.empty(self.nresults, dtype='O') + it = PyArray_IterNew(result) + + try: + for i in range(self.nresults): + + # create the cached type + # each time just reassign the data + if i == 0: + + if self.typ is not None: + # In this case, we also have self.index + name = labels[i] + cached_typ = self.typ( + chunk, index=self.index, name=name, dtype=arr.dtype) + + # use the cached_typ if possible + if cached_typ is not None: + # In this case, we also have non-None labels + name = labels[i] + + object.__setattr__( + cached_typ._data._block, 'values', chunk) + object.__setattr__(cached_typ, 'name', name) + res = self.f(cached_typ) + else: + res = self.f(chunk) + + # TODO: reason for not squeezing here? + res = _extract_result(res, squeeze=False) + if i == 0: + # On the first pass, we check the output shape to see + # if this looks like a reduction. + _check_result_array(res, len(self.dummy)) + + PyArray_SETITEM(result, PyArray_ITER_DATA(it), res) + chunk.data = chunk.data + self.increment + PyArray_ITER_NEXT(it) + finally: + # so we don't free the wrong memory + chunk.data = dummy_buf + + result = maybe_convert_objects(result) + return result + + +cdef class _BaseGrouper: + cdef _check_dummy(self, dummy): + # both values and index must be an ndarray! + + values = dummy.values + # GH 23683: datetimetz types are equivalent to datetime types here + if (dummy.dtype != self.arr.dtype + and values.dtype != self.arr.dtype): + raise ValueError('Dummy array must be same dtype') + if util.is_array(values) and not values.flags.contiguous: + # e.g. Categorical has no `flags` attribute + values = values.copy() + index = dummy.index.values + if not index.flags.contiguous: + index = index.copy() + + return values, index + + cdef inline _update_cached_objs(self, object cached_typ, object cached_ityp, + Slider islider, Slider vslider): + if cached_typ is None: + cached_ityp = self.ityp(islider.buf) + cached_typ = self.typ(vslider.buf, index=cached_ityp, name=self.name) + else: + # See the comment in indexes/base.py about _index_data. + # We need this for EA-backed indexes that have a reference + # to a 1-d ndarray like datetime / timedelta / period. + object.__setattr__(cached_ityp, '_index_data', islider.buf) + cached_ityp._engine.clear_mapping() + object.__setattr__(cached_typ._data._block, 'values', vslider.buf) + object.__setattr__(cached_typ._data._block, 'mgr_locs', + slice(len(vslider.buf))) + object.__setattr__(cached_typ, '_index', cached_ityp) + object.__setattr__(cached_typ, 'name', self.name) + + return cached_typ, cached_ityp + + cdef inline object _apply_to_group(self, + object cached_typ, object cached_ityp, + Slider islider, Slider vslider, + Py_ssize_t group_size, bint initialized): + """ + Call self.f on our new group, then update to the next group. + """ + cached_ityp._engine.clear_mapping() + res = self.f(cached_typ) + res = _extract_result(res) + if not initialized: + # On the first pass, we check the output shape to see + # if this looks like a reduction. + initialized = 1 + _check_result_array(res, len(self.dummy_arr)) + + islider.advance(group_size) + vslider.advance(group_size) + + return res, initialized + + +cdef class SeriesBinGrouper(_BaseGrouper): + """ + Performs grouping operation according to bin edges, rather than labels + """ + cdef: + Py_ssize_t nresults, ngroups + + cdef public: + ndarray arr, index, dummy_arr, dummy_index + object values, f, bins, typ, ityp, name + + def __init__(self, object series, object f, object bins, object dummy): + + assert dummy is not None # always obj[:0] + assert len(bins) > 0 # otherwise we get IndexError in get_result + + self.bins = bins + self.f = f + + values = series.values + if util.is_array(values) and not values.flags.c_contiguous: + # e.g. Categorical has no `flags` attribute + values = values.copy('C') + self.arr = values + self.typ = series._constructor + self.ityp = series.index._constructor + self.index = series.index.values + self.name = series.name + + self.dummy_arr, self.dummy_index = self._check_dummy(dummy) + + # kludge for #1688 + if len(bins) > 0 and bins[-1] == len(series): + self.ngroups = len(bins) + else: + self.ngroups = len(bins) + 1 + + def get_result(self): + cdef: + ndarray arr, result + ndarray[int64_t] counts + Py_ssize_t i, n, group_size + object res + bint initialized = 0 + Slider vslider, islider + object cached_typ = None, cached_ityp = None + + counts = np.zeros(self.ngroups, dtype=np.int64) + + if self.ngroups > 0: + counts[0] = self.bins[0] + for i in range(1, self.ngroups): + if i == self.ngroups - 1: + counts[i] = len(self.arr) - self.bins[i - 1] + else: + counts[i] = self.bins[i] - self.bins[i - 1] + + group_size = 0 + n = len(self.arr) + + vslider = Slider(self.arr, self.dummy_arr) + islider = Slider(self.index, self.dummy_index) + + result = np.empty(self.ngroups, dtype='O') + + try: + for i in range(self.ngroups): + group_size = counts[i] + + islider.set_length(group_size) + vslider.set_length(group_size) + + cached_typ, cached_ityp = self._update_cached_objs( + cached_typ, cached_ityp, islider, vslider) + + res, initialized = self._apply_to_group(cached_typ, cached_ityp, + islider, vslider, + group_size, initialized) + + result[i] = res + + finally: + # so we don't free the wrong memory + islider.reset() + vslider.reset() + + result = maybe_convert_objects(result) + return result, counts + + +cdef class SeriesGrouper(_BaseGrouper): + """ + Performs generic grouping operation while avoiding ndarray construction + overhead + """ + cdef: + Py_ssize_t nresults, ngroups + + cdef public: + ndarray arr, index, dummy_arr, dummy_index + object f, labels, values, typ, ityp, name + + def __init__(self, object series, object f, object labels, + Py_ssize_t ngroups, object dummy): + + # in practice we always pass either obj[:0] or the + # safer obj._get_values(slice(None, 0)) + assert dummy is not None + + if len(series) == 0: + # get_result would never assign `result` + raise ValueError("SeriesGrouper requires non-empty `series`") + + self.labels = labels + self.f = f + + values = series.values + if util.is_array(values) and not values.flags.c_contiguous: + # e.g. Categorical has no `flags` attribute + values = values.copy('C') + self.arr = values + self.typ = series._constructor + self.ityp = series.index._constructor + self.index = series.index.values + self.name = series.name + + self.dummy_arr, self.dummy_index = self._check_dummy(dummy) + self.ngroups = ngroups + + def get_result(self): + cdef: + # Define result to avoid UnboundLocalError + ndarray arr, result = None + ndarray[int64_t] labels, counts + Py_ssize_t i, n, group_size, lab + object res + bint initialized = 0 + Slider vslider, islider + object cached_typ = None, cached_ityp = None + + labels = self.labels + counts = np.zeros(self.ngroups, dtype=np.int64) + group_size = 0 + n = len(self.arr) + + vslider = Slider(self.arr, self.dummy_arr) + islider = Slider(self.index, self.dummy_index) + + result = np.empty(self.ngroups, dtype='O') + + try: + for i in range(n): + group_size += 1 + + lab = labels[i] + + if i == n - 1 or lab != labels[i + 1]: + if lab == -1: + islider.advance(group_size) + vslider.advance(group_size) + group_size = 0 + continue + + islider.set_length(group_size) + vslider.set_length(group_size) + + cached_typ, cached_ityp = self._update_cached_objs( + cached_typ, cached_ityp, islider, vslider) + + res, initialized = self._apply_to_group(cached_typ, cached_ityp, + islider, vslider, + group_size, initialized) + + result[lab] = res + counts[lab] = group_size + group_size = 0 + + finally: + # so we don't free the wrong memory + islider.reset() + vslider.reset() + + # We check for empty series in the constructor, so should always + # have result initialized by this point. + assert initialized, "`result` has not been initialized." + + result = maybe_convert_objects(result) + + return result, counts + + +cdef inline _extract_result(object res, bint squeeze=True): + """ extract the result object, it might be a 0-dim ndarray + or a len-1 0-dim, or a scalar """ + if hasattr(res, 'values') and util.is_array(res.values): + res = res.values + if util.is_array(res): + if res.ndim == 0: + res = res.item() + elif squeeze and res.ndim == 1 and len(res) == 1: + res = res[0] + return res + + +cdef class Slider: + """ + Only handles contiguous data for now + """ + cdef: + ndarray values, buf + Py_ssize_t stride, orig_len, orig_stride + char *orig_data + + def __init__(self, ndarray values, ndarray buf): + assert values.ndim == 1 + assert values.dtype == buf.dtype + + if not values.flags.contiguous: + values = values.copy() + + self.values = values + self.buf = buf + self.stride = values.strides[0] + + self.orig_data = self.buf.data + self.orig_len = self.buf.shape[0] + self.orig_stride = self.buf.strides[0] + + self.buf.data = self.values.data + self.buf.strides[0] = self.stride + + cdef advance(self, Py_ssize_t k): + self.buf.data = self.buf.data + self.stride * k + + cdef move(self, int start, int end): + """ + For slicing + """ + self.buf.data = self.values.data + self.stride * start + self.buf.shape[0] = end - start + + cdef set_length(self, Py_ssize_t length): + self.buf.shape[0] = length + + cdef reset(self): + + self.buf.shape[0] = self.orig_len + self.buf.data = self.orig_data + self.buf.strides[0] = self.orig_stride + + +class InvalidApply(Exception): + pass + + +def apply_frame_axis0(object frame, object f, object names, + const int64_t[:] starts, const int64_t[:] ends): + cdef: + BlockSlider slider + Py_ssize_t i, n = len(starts) + list results + object piece + dict item_cache + + # We have already checked that we don't have a MultiIndex before calling + assert frame.index.nlevels == 1 + + results = [] + + slider = BlockSlider(frame) + + mutated = False + item_cache = slider.dummy._item_cache + try: + for i in range(n): + slider.move(starts[i], ends[i]) + + item_cache.clear() # ugh + chunk = slider.dummy + object.__setattr__(chunk, 'name', names[i]) + + try: + piece = f(chunk) + except Exception: + # We can't be more specific without knowing something about `f` + raise InvalidApply('Let this error raise above us') + + # Need to infer if low level index slider will cause segfaults + require_slow_apply = i == 0 and piece is chunk + try: + if piece.index is not chunk.index: + mutated = True + except AttributeError: + # `piece` might not have an index, could be e.g. an int + pass + + if not is_scalar(piece): + # Need to copy data to avoid appending references + try: + piece = piece.copy(deep="all") + except (TypeError, AttributeError): + piece = copy(piece) + + results.append(piece) + + # If the data was modified inplace we need to + # take the slow path to not risk segfaults + # we have already computed the first piece + if require_slow_apply: + break + finally: + slider.reset() + + return results, mutated + + +cdef class BlockSlider: + """ + Only capable of sliding on axis=0 + """ + + cdef public: + object frame, dummy, index + int nblocks + Slider idx_slider + list blocks + + cdef: + char **base_ptrs + + def __init__(self, frame): + self.frame = frame + self.dummy = frame[:0] + self.index = self.dummy.index + + self.blocks = [b.values for b in self.dummy._data.blocks] + + for x in self.blocks: + util.set_array_not_contiguous(x) + + self.nblocks = len(self.blocks) + # See the comment in indexes/base.py about _index_data. + # We need this for EA-backed indexes that have a reference to a 1-d + # ndarray like datetime / timedelta / period. + self.idx_slider = Slider( + self.frame.index._index_data, self.dummy.index._index_data) + + self.base_ptrs = malloc(sizeof(char*) * len(self.blocks)) + for i, block in enumerate(self.blocks): + self.base_ptrs[i] = (block).data + + def __dealloc__(self): + free(self.base_ptrs) + + cdef move(self, int start, int end): + cdef: + ndarray arr + Py_ssize_t i + + # move blocks + for i in range(self.nblocks): + arr = self.blocks[i] + + # axis=1 is the frame's axis=0 + arr.data = self.base_ptrs[i] + arr.strides[1] * start + arr.shape[1] = end - start + + # move and set the index + self.idx_slider.move(start, end) + + object.__setattr__(self.index, '_index_data', self.idx_slider.buf) + self.index._engine.clear_mapping() + + cdef reset(self): + cdef: + ndarray arr + Py_ssize_t i + + # reset blocks + for i in range(self.nblocks): + arr = self.blocks[i] + + # axis=1 is the frame's axis=0 + arr.data = self.base_ptrs[i] + arr.shape[1] = 0 + + +def compute_reduction(arr: np.ndarray, f, axis: int = 0, dummy=None, labels=None): + """ + + Parameters + ----------- + arr : np.ndarray + f : function + axis : integer axis + dummy : type of reduced output (series) + labels : Index or None + """ + + # We either have both dummy and labels, or neither of them + if (labels is None) ^ (dummy is None): + raise ValueError("Must pass either dummy and labels, or neither") + + if labels is not None: + # Caller is responsible for ensuring we don't have MultiIndex + assert labels.nlevels == 1 + + # pass as an ndarray/ExtensionArray + labels = labels._values + + reducer = Reducer(arr, f, axis=axis, dummy=dummy, labels=labels) + return reducer.get_result() diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx new file mode 100644 index 00000000..4e831081 --- /dev/null +++ b/pandas/_libs/reshape.pyx @@ -0,0 +1,139 @@ +import cython +from cython import Py_ssize_t + +from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, + uint32_t, uint64_t, float32_t, float64_t, ndarray) +cimport numpy as cnp +import numpy as np +from pandas._libs.lib cimport c_is_list_like +cnp.import_array() + +ctypedef fused reshape_t: + uint8_t + uint16_t + uint32_t + uint64_t + int8_t + int16_t + int32_t + int64_t + float32_t + float64_t + object + + +@cython.wraparound(False) +@cython.boundscheck(False) +def unstack(reshape_t[:, :] values, uint8_t[:] mask, + Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width, + reshape_t[:, :] new_values, uint8_t[:, :] new_mask): + """ + Transform long values to wide new_values. + + Parameters + ---------- + values : typed ndarray + mask : boolean ndarray + stride : int + length : int + width : int + new_values : typed ndarray + result array + new_mask : boolean ndarray + result mask + """ + cdef: + Py_ssize_t i, j, w, nulls, s, offset + + if reshape_t is not object: + # evaluated at compile-time + with nogil: + for i in range(stride): + + nulls = 0 + for j in range(length): + + for w in range(width): + + offset = j * width + w + + if mask[offset]: + s = i * width + w + new_values[j, s] = values[offset - nulls, i] + new_mask[j, s] = 1 + else: + nulls += 1 + + else: + # object-dtype, identical to above but we cannot use nogil + for i in range(stride): + + nulls = 0 + for j in range(length): + + for w in range(width): + + offset = j * width + w + + if mask[offset]: + s = i * width + w + new_values[j, s] = values[offset - nulls, i] + new_mask[j, s] = 1 + else: + nulls += 1 + + +@cython.wraparound(False) +@cython.boundscheck(False) +def explode(ndarray[object] values): + """ + transform array list-likes to long form + preserve non-list entries + + Parameters + ---------- + values : object ndarray + + Returns + ------- + tuple(values, counts) + """ + cdef: + Py_ssize_t i, j, count, n + object v + ndarray[object] result + ndarray[int64_t] counts + + # find the resulting len + n = len(values) + counts = np.zeros(n, dtype='int64') + for i in range(n): + v = values[i] + if c_is_list_like(v, False): + if len(v): + counts[i] += len(v) + else: + # empty list-like, use a nan marker + counts[i] += 1 + else: + counts[i] += 1 + + result = np.empty(counts.sum(), dtype='object') + count = 0 + for i in range(n): + v = values[i] + + if c_is_list_like(v, False): + if len(v): + for j in range(len(v)): + result[count] = v[j] + count += 1 + else: + # empty list-like, use a nan marker + result[count] = np.nan + count += 1 + else: + # replace with the existing scalar + result[count] = v + count += 1 + return result, counts diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx new file mode 100644 index 00000000..ee839010 --- /dev/null +++ b/pandas/_libs/sparse.pyx @@ -0,0 +1,796 @@ +import cython + +import numpy as np +cimport numpy as cnp +from numpy cimport (ndarray, uint8_t, int64_t, int32_t, int16_t, int8_t, + float64_t, float32_t) +cnp.import_array() + + +# ----------------------------------------------------------------------------- +# Preamble stuff + +cdef float64_t NaN = np.NaN +cdef float64_t INF = np.inf + +# ----------------------------------------------------------------------------- + + +cdef class SparseIndex: + """ + Abstract superclass for sparse index types. + """ + + def __init__(self): + raise NotImplementedError + + +cdef class IntIndex(SparseIndex): + """ + Object for holding exact integer sparse indexing information + + Parameters + ---------- + length : integer + indices : array-like + Contains integers corresponding to the indices. + """ + + cdef readonly: + Py_ssize_t length, npoints + ndarray indices + + def __init__(self, Py_ssize_t length, indices): + self.length = length + self.indices = np.ascontiguousarray(indices, dtype=np.int32) + self.npoints = len(self.indices) + + self.check_integrity() + + def __reduce__(self): + args = (self.length, self.indices) + return IntIndex, args + + def __repr__(self) -> str: + output = 'IntIndex\n' + output += f'Indices: {repr(self.indices)}\n' + return output + + @property + def nbytes(self) -> int: + return self.indices.nbytes + + def check_integrity(self): + """ + Checks the following: + + - Indices are strictly ascending + - Number of indices is at most self.length + - Indices are at least 0 and at most the total length less one + + A ValueError is raised if any of these conditions is violated. + """ + + if self.npoints > self.length: + msg = (f"Too many indices. Expected " + f"{self.length} but found {self.npoints}") + raise ValueError(msg) + + # Indices are vacuously ordered and non-negative + # if the sequence of indices is empty. + if self.npoints == 0: + return + + if self.indices.min() < 0: + raise ValueError("No index can be less than zero") + + if self.indices.max() >= self.length: + raise ValueError("All indices must be less than the length") + + monotonic = np.all(self.indices[:-1] < self.indices[1:]) + if not monotonic: + raise ValueError("Indices must be strictly increasing") + + def equals(self, other) -> bool: + if not isinstance(other, IntIndex): + return False + + if self is other: + return True + + same_length = self.length == other.length + same_indices = np.array_equal(self.indices, other.indices) + return same_length and same_indices + + @property + def ngaps(self) -> int: + return self.length - self.npoints + + def to_int_index(self): + return self + + def to_block_index(self): + locs, lens = get_blocks(self.indices) + return BlockIndex(self.length, locs, lens) + + cpdef IntIndex intersect(self, SparseIndex y_): + cdef: + Py_ssize_t out_length, xi, yi = 0, result_indexer = 0 + int32_t xind + ndarray[int32_t, ndim=1] xindices, yindices, new_indices + IntIndex y + + # if is one already, returns self + y = y_.to_int_index() + + if self.length != y.length: + raise Exception('Indices must reference same underlying length') + + xindices = self.indices + yindices = y.indices + new_indices = np.empty(min( + len(xindices), len(yindices)), dtype=np.int32) + + for xi in range(self.npoints): + xind = xindices[xi] + + while yi < y.npoints and yindices[yi] < xind: + yi += 1 + + if yi >= y.npoints: + break + + # TODO: would a two-pass algorithm be faster? + if yindices[yi] == xind: + new_indices[result_indexer] = xind + result_indexer += 1 + + new_indices = new_indices[:result_indexer] + return IntIndex(self.length, new_indices) + + cpdef IntIndex make_union(self, SparseIndex y_): + + cdef: + ndarray[int32_t, ndim=1] new_indices + IntIndex y + + # if is one already, returns self + y = y_.to_int_index() + + if self.length != y.length: + raise ValueError('Indices must reference same underlying length') + + new_indices = np.union1d(self.indices, y.indices) + return IntIndex(self.length, new_indices) + + @cython.wraparound(False) + cpdef int32_t lookup(self, Py_ssize_t index): + """ + Return the internal location if value exists on given index. + Return -1 otherwise. + """ + cdef: + int32_t res + ndarray[int32_t, ndim=1] inds + + inds = self.indices + if self.npoints == 0: + return -1 + elif index < 0 or self.length <= index: + return -1 + + res = inds.searchsorted(index) + if res == self.npoints: + return -1 + elif inds[res] == index: + return res + else: + return -1 + + @cython.wraparound(False) + cpdef ndarray[int32_t] lookup_array(self, ndarray[ + int32_t, ndim=1] indexer): + """ + Vectorized lookup, returns ndarray[int32_t] + """ + cdef: + Py_ssize_t n, i, ind_val + ndarray[int32_t, ndim=1] inds + ndarray[uint8_t, ndim=1, cast=True] mask + ndarray[int32_t, ndim=1] masked + ndarray[int32_t, ndim=1] res + ndarray[int32_t, ndim=1] results + + n = len(indexer) + results = np.empty(n, dtype=np.int32) + results[:] = -1 + + if self.npoints == 0: + return results + + inds = self.indices + mask = (inds[0] <= indexer) & (indexer <= inds[len(inds) - 1]) + + masked = indexer[mask] + res = inds.searchsorted(masked).astype(np.int32) + + res[inds[res] != masked] = -1 + results[mask] = res + return results + + cpdef ndarray reindex(self, ndarray[float64_t, ndim=1] values, + float64_t fill_value, SparseIndex other_): + cdef: + Py_ssize_t i = 0, j = 0 + IntIndex other + ndarray[float64_t, ndim=1] result + ndarray[int32_t, ndim=1] sinds, oinds + + other = other_.to_int_index() + + oinds = other.indices + sinds = self.indices + + result = np.empty(other.npoints, dtype=np.float64) + result[:] = fill_value + + for i in range(other.npoints): + while oinds[i] > sinds[j] and j < self.npoints: + j += 1 + + if j == self.npoints: + break + + if oinds[i] < sinds[j]: + continue + elif oinds[i] == sinds[j]: + result[i] = values[j] + j += 1 + + return result + + cpdef put(self, ndarray[float64_t, ndim=1] values, + ndarray[int32_t, ndim=1] indices, object to_put): + pass + + cpdef take(self, ndarray[float64_t, ndim=1] values, + ndarray[int32_t, ndim=1] indices): + pass + + +cpdef get_blocks(ndarray[int32_t, ndim=1] indices): + cdef: + Py_ssize_t init_len, i, npoints, result_indexer = 0 + int32_t block, length = 1, cur, prev + ndarray[int32_t, ndim=1] locs, lens + + npoints = len(indices) + + # just handle the special empty case separately + if npoints == 0: + return np.array([], dtype=np.int32), np.array([], dtype=np.int32) + + # block size can't be longer than npoints + locs = np.empty(npoints, dtype=np.int32) + lens = np.empty(npoints, dtype=np.int32) + + # TODO: two-pass algorithm faster? + prev = block = indices[0] + for i in range(1, npoints): + cur = indices[i] + if cur - prev > 1: + # new block + locs[result_indexer] = block + lens[result_indexer] = length + block = cur + length = 1 + result_indexer += 1 + else: + # same block, increment length + length += 1 + + prev = cur + + locs[result_indexer] = block + lens[result_indexer] = length + result_indexer += 1 + locs = locs[:result_indexer] + lens = lens[:result_indexer] + return locs, lens + + +# ----------------------------------------------------------------------------- +# BlockIndex + +cdef class BlockIndex(SparseIndex): + """ + Object for holding block-based sparse indexing information + + Parameters + ---------- + """ + cdef readonly: + int32_t nblocks, npoints, length + ndarray blocs, blengths + + cdef: + object __weakref__ # need to be picklable + int32_t *locbuf + int32_t *lenbuf + + def __init__(self, length, blocs, blengths): + + self.blocs = np.ascontiguousarray(blocs, dtype=np.int32) + self.blengths = np.ascontiguousarray(blengths, dtype=np.int32) + + # in case we need + self.locbuf = self.blocs.data + self.lenbuf = self.blengths.data + + self.length = length + self.nblocks = np.int32(len(self.blocs)) + self.npoints = self.blengths.sum() + + # self.block_start = blocs + # self.block_end = blocs + blengths + + self.check_integrity() + + def __reduce__(self): + args = (self.length, self.blocs, self.blengths) + return BlockIndex, args + + def __repr__(self) -> str: + output = 'BlockIndex\n' + output += f'Block locations: {repr(self.blocs)}\n' + output += f'Block lengths: {repr(self.blengths)}' + + return output + + @property + def nbytes(self) -> int: + return self.blocs.nbytes + self.blengths.nbytes + + @property + def ngaps(self) -> int: + return self.length - self.npoints + + cpdef check_integrity(self): + """ + Check: + - Locations are in ascending order + - No overlapping blocks + - Blocks to not start after end of index, nor extend beyond end + """ + cdef: + Py_ssize_t i + ndarray[int32_t, ndim=1] blocs, blengths + + blocs = self.blocs + blengths = self.blengths + + if len(blocs) != len(blengths): + raise ValueError('block bound arrays must be same length') + + for i in range(self.nblocks): + if i > 0: + if blocs[i] <= blocs[i - 1]: + raise ValueError('Locations not in ascending order') + + if i < self.nblocks - 1: + if blocs[i] + blengths[i] > blocs[i + 1]: + raise ValueError(f'Block {i} overlaps') + else: + if blocs[i] + blengths[i] > self.length: + raise ValueError(f'Block {i} extends beyond end') + + # no zero-length blocks + if blengths[i] == 0: + raise ValueError(f'Zero-length block {i}') + + def equals(self, other) -> bool: + if not isinstance(other, BlockIndex): + return False + + if self is other: + return True + + same_length = self.length == other.length + same_blocks = (np.array_equal(self.blocs, other.blocs) and + np.array_equal(self.blengths, other.blengths)) + return same_length and same_blocks + + def to_block_index(self): + return self + + def to_int_index(self): + cdef: + int32_t i = 0, j, b + int32_t offset + ndarray[int32_t, ndim=1] indices + + indices = np.empty(self.npoints, dtype=np.int32) + + for b in range(self.nblocks): + offset = self.locbuf[b] + + for j in range(self.lenbuf[b]): + indices[i] = offset + j + i += 1 + + return IntIndex(self.length, indices) + + cpdef BlockIndex intersect(self, SparseIndex other): + """ + Intersect two BlockIndex objects + + Parameters + ---------- + + Returns + ------- + intersection : BlockIndex + """ + cdef: + BlockIndex y + ndarray[int32_t, ndim=1] xloc, xlen, yloc, ylen, out_bloc, out_blen + Py_ssize_t xi = 0, yi = 0, max_len, result_indexer = 0 + int32_t cur_loc, cur_length, diff + + y = other.to_block_index() + + if self.length != y.length: + raise Exception('Indices must reference same underlying length') + + xloc = self.blocs + xlen = self.blengths + yloc = y.blocs + ylen = y.blengths + + # block may be split, but can't exceed original len / 2 + 1 + max_len = int(min(self.length, y.length) / 2) + 1 + out_bloc = np.empty(max_len, dtype=np.int32) + out_blen = np.empty(max_len, dtype=np.int32) + + while True: + # we are done (or possibly never began) + if xi >= self.nblocks or yi >= y.nblocks: + break + + # completely symmetric...would like to avoid code dup but oh well + if xloc[xi] >= yloc[yi]: + cur_loc = xloc[xi] + diff = xloc[xi] - yloc[yi] + + if ylen[yi] <= diff: + # have to skip this block + yi += 1 + continue + + if ylen[yi] - diff < xlen[xi]: + # take end of y block, move onward + cur_length = ylen[yi] - diff + yi += 1 + else: + # take end of x block + cur_length = xlen[xi] + xi += 1 + + else: # xloc[xi] < yloc[yi] + cur_loc = yloc[yi] + diff = yloc[yi] - xloc[xi] + + if xlen[xi] <= diff: + # have to skip this block + xi += 1 + continue + + if xlen[xi] - diff < ylen[yi]: + # take end of x block, move onward + cur_length = xlen[xi] - diff + xi += 1 + else: + # take end of y block + cur_length = ylen[yi] + yi += 1 + + out_bloc[result_indexer] = cur_loc + out_blen[result_indexer] = cur_length + result_indexer += 1 + + out_bloc = out_bloc[:result_indexer] + out_blen = out_blen[:result_indexer] + + return BlockIndex(self.length, out_bloc, out_blen) + + cpdef BlockIndex make_union(self, SparseIndex y): + """ + Combine together two BlockIndex objects, accepting indices if contained + in one or the other + + Parameters + ---------- + other : SparseIndex + + Notes + ----- + union is a protected keyword in Cython, hence make_union + + Returns + ------- + union : BlockIndex + """ + return BlockUnion(self, y.to_block_index()).result + + cpdef Py_ssize_t lookup(self, Py_ssize_t index): + """ + Return the internal location if value exists on given index. + Return -1 otherwise. + """ + cdef: + Py_ssize_t i, cum_len + ndarray[int32_t, ndim=1] locs, lens + + locs = self.blocs + lens = self.blengths + + if self.nblocks == 0: + return -1 + elif index < locs[0]: + return -1 + + cum_len = 0 + for i in range(self.nblocks): + if index >= locs[i] and index < locs[i] + lens[i]: + return cum_len + index - locs[i] + cum_len += lens[i] + + return -1 + + @cython.wraparound(False) + cpdef ndarray[int32_t] lookup_array(self, ndarray[ + int32_t, ndim=1] indexer): + """ + Vectorized lookup, returns ndarray[int32_t] + """ + cdef: + Py_ssize_t n, i, j, ind_val + ndarray[int32_t, ndim=1] locs, lens + ndarray[int32_t, ndim=1] results + + locs = self.blocs + lens = self.blengths + + n = len(indexer) + results = np.empty(n, dtype=np.int32) + results[:] = -1 + + if self.npoints == 0: + return results + + for i in range(n): + ind_val = indexer[i] + if not (ind_val < 0 or self.length <= ind_val): + cum_len = 0 + for j in range(self.nblocks): + if ind_val >= locs[j] and ind_val < locs[j] + lens[j]: + results[i] = cum_len + ind_val - locs[j] + cum_len += lens[j] + return results + + cpdef ndarray reindex(self, ndarray[float64_t, ndim=1] values, + float64_t fill_value, SparseIndex other_): + cdef: + Py_ssize_t i = 0, j = 0, ocur, ocurlen + BlockIndex other + ndarray[float64_t, ndim=1] result + ndarray[int32_t, ndim=1] slocs, slens, olocs, olens + + other = other_.to_block_index() + + olocs = other.blocs + olens = other.blengths + slocs = self.blocs + slens = self.blengths + + result = np.empty(other.npoints, dtype=np.float64) + + for i in range(other.nblocks): + ocur = olocs[i] + ocurlen = olens[i] + + while slocs[j] + slens[j] < ocur: + j += 1 + + cpdef put(self, ndarray[float64_t, ndim=1] values, + ndarray[int32_t, ndim=1] indices, object to_put): + pass + + cpdef take(self, ndarray[float64_t, ndim=1] values, + ndarray[int32_t, ndim=1] indices): + pass + + +cdef class BlockMerge: + """ + Object-oriented approach makes sharing state between recursive functions a + lot easier and reduces code duplication + """ + cdef: + BlockIndex x, y, result + ndarray xstart, xlen, xend, ystart, ylen, yend + int32_t xi, yi # block indices + + def __init__(self, BlockIndex x, BlockIndex y): + self.x = x + self.y = y + + if x.length != y.length: + raise Exception('Indices must reference same underlying length') + + self.xstart = self.x.blocs + self.ystart = self.y.blocs + + self.xend = self.x.blocs + self.x.blengths + self.yend = self.y.blocs + self.y.blengths + + # self.xlen = self.x.blengths + # self.ylen = self.y.blengths + + self.xi = 0 + self.yi = 0 + + self.result = self._make_merged_blocks() + + cdef _make_merged_blocks(self): + raise NotImplementedError + + cdef _set_current_indices(self, int32_t xi, int32_t yi, bint mode): + if mode == 0: + self.xi = xi + self.yi = yi + else: + self.xi = yi + self.yi = xi + + +cdef class BlockUnion(BlockMerge): + """ + Object-oriented approach makes sharing state between recursive functions a + lot easier and reduces code duplication + """ + + cdef _make_merged_blocks(self): + cdef: + ndarray[int32_t, ndim=1] xstart, xend, ystart + ndarray[int32_t, ndim=1] yend, out_bloc, out_blen + int32_t nstart, nend, diff + Py_ssize_t max_len, result_indexer = 0 + + xstart = self.xstart + xend = self.xend + ystart = self.ystart + yend = self.yend + + max_len = int(min(self.x.length, self.y.length) / 2) + 1 + out_bloc = np.empty(max_len, dtype=np.int32) + out_blen = np.empty(max_len, dtype=np.int32) + + while True: + # we are done (or possibly never began) + if self.xi >= self.x.nblocks and self.yi >= self.y.nblocks: + break + elif self.yi >= self.y.nblocks: + # through with y, just pass through x blocks + nstart = xstart[self.xi] + nend = xend[self.xi] + self.xi += 1 + elif self.xi >= self.x.nblocks: + # through with x, just pass through y blocks + nstart = ystart[self.yi] + nend = yend[self.yi] + self.yi += 1 + else: + # find end of new block + if xstart[self.xi] < ystart[self.yi]: + nstart = xstart[self.xi] + nend = self._find_next_block_end(0) + else: + nstart = ystart[self.yi] + nend = self._find_next_block_end(1) + + out_bloc[result_indexer] = nstart + out_blen[result_indexer] = nend - nstart + result_indexer += 1 + + out_bloc = out_bloc[:result_indexer] + out_blen = out_blen[:result_indexer] + + return BlockIndex(self.x.length, out_bloc, out_blen) + + cdef int32_t _find_next_block_end(self, bint mode) except -1: + """ + Wow, this got complicated in a hurry + + mode 0: block started in index x + mode 1: block started in index y + """ + cdef: + ndarray[int32_t, ndim=1] xstart, xend, ystart, yend + int32_t xi, yi, xnblocks, ynblocks, nend + + if mode != 0 and mode != 1: + raise Exception('Mode must be 0 or 1') + + # so symmetric code will work + if mode == 0: + xstart = self.xstart + xend = self.xend + xi = self.xi + + ystart = self.ystart + yend = self.yend + yi = self.yi + ynblocks = self.y.nblocks + else: + xstart = self.ystart + xend = self.yend + xi = self.yi + + ystart = self.xstart + yend = self.xend + yi = self.xi + ynblocks = self.x.nblocks + + nend = xend[xi] + + # done with y? + if yi == ynblocks: + self._set_current_indices(xi + 1, yi, mode) + return nend + elif nend < ystart[yi]: + # block ends before y block + self._set_current_indices(xi + 1, yi, mode) + return nend + else: + while yi < ynblocks and nend > yend[yi]: + yi += 1 + + self._set_current_indices(xi + 1, yi, mode) + + if yi == ynblocks: + return nend + + if nend < ystart[yi]: + # we're done, return the block end + return nend + else: + # merge blocks, continue searching + # this also catches the case where blocks + return self._find_next_block_end(1 - mode) + + +# ----------------------------------------------------------------------------- +# Sparse arithmetic + +include "sparse_op_helper.pxi" + + +# ----------------------------------------------------------------------------- +# SparseArray mask create operations + +def make_mask_object_ndarray(ndarray[object, ndim=1] arr, object fill_value): + cdef: + object value + Py_ssize_t i + Py_ssize_t new_length = len(arr) + ndarray[int8_t, ndim=1] mask + + mask = np.ones(new_length, dtype=np.int8) + + for i in range(new_length): + value = arr[i] + if value == fill_value and type(value) == type(fill_value): + mask[i] = 0 + + return mask.view(dtype=np.bool) diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in new file mode 100644 index 00000000..996da4ca --- /dev/null +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -0,0 +1,309 @@ +""" +Template for each `dtype` helper function for sparse ops + +WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in +""" + +# ---------------------------------------------------------------------- +# Sparse op +# ---------------------------------------------------------------------- + +ctypedef fused sparse_t: + float64_t + int64_t + + +cdef inline float64_t __div__(sparse_t a, sparse_t b): + if b == 0: + if a > 0: + return INF + elif a < 0: + return -INF + else: + return NaN + else: + return float(a) / b + + +cdef inline float64_t __truediv__(sparse_t a, sparse_t b): + return __div__(a, b) + + +cdef inline sparse_t __mod__(sparse_t a, sparse_t b): + if b == 0: + if sparse_t is float64_t: + return NaN + else: + return 0 + else: + return a % b + + +cdef inline sparse_t __floordiv__(sparse_t a, sparse_t b): + if b == 0: + if sparse_t is float64_t: + return NaN + else: + return 0 + else: + return a // b + + +# ---------------------------------------------------------------------- +# sparse array op +# ---------------------------------------------------------------------- + +{{py: + +# dtype, arith_comp_group, logical_group +dtypes = [('float64', True, False), + ('int64', True, True), + ('uint8', False, True)] +# do not generate arithmetic / comparison template for uint8, +# it should be done in fused types + +def get_op(tup): + assert isinstance(tup, tuple) + assert len(tup) == 4 + + opname, lval, rval, dtype = tup + + ops_dict = {'add': '{0} + {1}', + 'sub': '{0} - {1}', + 'mul': '{0} * {1}', + 'div': '__div__({0}, {1})', + 'mod': '__mod__({0}, {1})', + 'truediv': '__truediv__({0}, {1})', + 'floordiv': '__floordiv__({0}, {1})', + 'pow': '{0} ** {1}', + 'eq': '{0} == {1}', + 'ne': '{0} != {1}', + 'lt': '{0} < {1}', + 'gt': '{0} > {1}', + 'le': '{0} <= {1}', + 'ge': '{0} >= {1}', + + 'and': '{0} & {1}', # logical op + 'or': '{0} | {1}', + 'xor': '{0} ^ {1}'} + + return ops_dict[opname].format(lval, rval) + + +def get_dispatch(dtypes): + + ops_list = ['add', 'sub', 'mul', 'div', 'mod', 'truediv', + 'floordiv', 'pow', + 'eq', 'ne', 'lt', 'gt', 'le', 'ge', + 'and', 'or', 'xor'] + + for opname in ops_list: + for dtype, arith_comp_group, logical_group in dtypes: + + if opname in ('div', 'truediv'): + rdtype = 'float64' + elif opname in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'): + # comparison op + rdtype = 'uint8' + elif opname in ('and', 'or', 'xor'): + # logical op + rdtype = 'uint8' + else: + rdtype = dtype + + if opname in ('and', 'or', 'xor'): + if logical_group: + yield opname, dtype, rdtype + else: + if arith_comp_group: + yield opname, dtype, rdtype + +}} + + +{{for opname, dtype, rdtype in get_dispatch(dtypes)}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple block_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, + BlockIndex xindex, + {{dtype}}_t xfill, + {{dtype}}_t[:] y_, + BlockIndex yindex, + {{dtype}}_t yfill): + ''' + Binary operator on BlockIndex objects with fill values + ''' + + cdef: + BlockIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xbp = 0, ybp = 0 # block positions + int32_t xloc, yloc + Py_ssize_t xblock = 0, yblock = 0 # block numbers + + {{dtype}}_t[:] x, y + ndarray[{{rdtype}}_t, ndim=1] out + + # to suppress Cython warning + x = x_ + y = y_ + + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.{{rdtype}}) + + # Wow, what a hack job. Need to do something about this + + # walk the two SparseVectors, adding matched locations... + for out_i in range(out_index.npoints): + if yblock == yindex.nblocks: + # use y fill value + out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + continue + + if xblock == xindex.nblocks: + # use x fill value + out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + continue + + yloc = yindex.locbuf[yblock] + ybp + xloc = xindex.locbuf[xblock] + xbp + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = {{(opname, 'x[xi]', 'y[yi]', dtype) | get_op}} + xi += 1 + yi += 1 + + # advance both locations + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + elif xloc < yloc: + # use y fill value + out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} + xi += 1 + + # advance x location + xbp += 1 + if xbp == xindex.lenbuf[xblock]: + xblock += 1 + xbp = 0 + else: + # use x fill value + out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} + yi += 1 + + # advance y location + ybp += 1 + if ybp == yindex.lenbuf[yblock]: + yblock += 1 + ybp = 0 + + return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}} + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline tuple int_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, + IntIndex xindex, + {{dtype}}_t xfill, + {{dtype}}_t[:] y_, + IntIndex yindex, + {{dtype}}_t yfill): + cdef: + IntIndex out_index + Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices + int32_t xloc, yloc + int32_t[:] xindices, yindices, out_indices + {{dtype}}_t[:] x, y + ndarray[{{rdtype}}_t, ndim=1] out + + # suppress Cython compiler warnings due to inlining + x = x_ + y = y_ + + # need to do this first to know size of result array + out_index = xindex.make_union(yindex) + out = np.empty(out_index.npoints, dtype=np.{{rdtype}}) + + xindices = xindex.indices + yindices = yindex.indices + out_indices = out_index.indices + + # walk the two SparseVectors, adding matched locations... + for out_i in range(out_index.npoints): + if xi == xindex.npoints: + # use x fill value + out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} + yi += 1 + continue + + if yi == yindex.npoints: + # use y fill value + out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} + xi += 1 + continue + + xloc = xindices[xi] + yloc = yindices[yi] + + # each index in the out_index had to come from either x, y, or both + if xloc == yloc: + out[out_i] = {{(opname, 'x[xi]', 'y[yi]', dtype) | get_op}} + xi += 1 + yi += 1 + elif xloc < yloc: + # use y fill value + out[out_i] = {{(opname, 'x[xi]', 'yfill', dtype) | get_op}} + xi += 1 + else: + # use x fill value + out[out_i] = {{(opname, 'xfill', 'y[yi]', dtype) | get_op}} + yi += 1 + + return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}} + + +cpdef sparse_{{opname}}_{{dtype}}({{dtype}}_t[:] x, + SparseIndex xindex, {{dtype}}_t xfill, + {{dtype}}_t[:] y, + SparseIndex yindex, {{dtype}}_t yfill): + + if isinstance(xindex, BlockIndex): + return block_op_{{opname}}_{{dtype}}(x, xindex.to_block_index(), xfill, + y, yindex.to_block_index(), yfill) + elif isinstance(xindex, IntIndex): + return int_op_{{opname}}_{{dtype}}(x, xindex.to_int_index(), xfill, + y, yindex.to_int_index(), yfill) + else: + raise NotImplementedError + + +cpdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill, + {{dtype}}_t yfill): + return {{(opname, 'xfill', 'yfill', dtype) | get_op}} + +{{endfor}} diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath new file mode 100644 index 00000000..632e1fc2 --- /dev/null +++ b/pandas/_libs/src/headers/cmath @@ -0,0 +1,36 @@ +#ifndef _PANDAS_MATH_H_ +#define _PANDAS_MATH_H_ + +// MSVC 2017 has a bug where `x == x` can be true for NaNs. +// MSC_VER from https://stackoverflow.com/a/70630/1889400 +// Place upper bound on this check once a fixed MSVC is released. +#if defined(_MSC_VER) && (_MSC_VER < 1800) +#include +// In older versions of Visual Studio there wasn't a std::signbit defined +// This defines it using _copysign +namespace std { + __inline int isnan(double x) { return _isnan(x); } + __inline int signbit(double num) { return _copysign(1.0, num) < 0; } + __inline int notnan(double x) { return !isnan(x); } +} +#elif defined(_MSC_VER) && (_MSC_VER >= 1900) +#include +namespace std { + __inline int isnan(double x) { return _isnan(x); } + __inline int notnan(double x) { return !isnan(x); } +} +#elif defined(_MSC_VER) +#include +namespace std { + __inline int isnan(double x) { return _isnan(x); } + __inline int notnan(double x) { return x == x; } +} +#else +#include + +namespace std { + __inline int notnan(double x) { return x == x; } +} + +#endif +#endif diff --git a/pandas/_libs/src/headers/ms_inttypes.h b/pandas/_libs/src/headers/ms_inttypes.h new file mode 100644 index 00000000..1be38033 --- /dev/null +++ b/pandas/_libs/src/headers/ms_inttypes.h @@ -0,0 +1,305 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "ms_stdint.h" + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +#endif // __STDC_FORMAT_MACROS ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ] diff --git a/pandas/_libs/src/headers/ms_stdint.h b/pandas/_libs/src/headers/ms_stdint.h new file mode 100644 index 00000000..c66fbb81 --- /dev/null +++ b/pandas/_libs/src/headers/ms_stdint.h @@ -0,0 +1,247 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2008 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. The name of the author may be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +#define INTMAX_C INT64_C +#define UINTMAX_C UINT64_C + +#endif // __STDC_CONSTANT_MACROS ] + + +#endif // _MSC_STDINT_H_ ] diff --git a/pandas/_libs/src/headers/portable.h b/pandas/_libs/src/headers/portable.h new file mode 100644 index 00000000..cb8e5ba8 --- /dev/null +++ b/pandas/_libs/src/headers/portable.h @@ -0,0 +1,16 @@ +#ifndef _PANDAS_PORTABLE_H_ +#define _PANDAS_PORTABLE_H_ + +#if defined(_MSC_VER) +#define strcasecmp( s1, s2 ) _stricmp( s1, s2 ) +#endif + +// GH-23516 - works around locale perf issues +// from MUSL libc, MIT Licensed - see LICENSES +#define isdigit_ascii(c) (((unsigned)(c) - '0') < 10u) +#define getdigit_ascii(c, default) (isdigit_ascii(c) ? ((int)((c) - '0')) : default) +#define isspace_ascii(c) (((c) == ' ') || (((unsigned)(c) - '\t') < 5)) +#define toupper_ascii(c) ((((unsigned)(c) - 'a') < 26) ? ((c) & 0x5f) : (c)) +#define tolower_ascii(c) ((((unsigned)(c) - 'A') < 26) ? ((c) | 0x20) : (c)) + +#endif diff --git a/pandas/_libs/src/headers/stdint.h b/pandas/_libs/src/headers/stdint.h new file mode 100644 index 00000000..8746bf13 --- /dev/null +++ b/pandas/_libs/src/headers/stdint.h @@ -0,0 +1,10 @@ +#ifndef _PANDAS_STDINT_H_ +#define _PANDAS_STDINT_H_ + +#if defined(_MSC_VER) && (_MSC_VER < 1900) +#include "ms_stdint.h" +#else +#include +#endif + +#endif diff --git a/pandas/_libs/src/inline_helper.h b/pandas/_libs/src/inline_helper.h new file mode 100644 index 00000000..e203a05d --- /dev/null +++ b/pandas/_libs/src/inline_helper.h @@ -0,0 +1,25 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_INLINE_HELPER_H_ +#define PANDAS__LIBS_SRC_INLINE_HELPER_H_ + +#ifndef PANDAS_INLINE + #if defined(__GNUC__) + #define PANDAS_INLINE static __inline__ + #elif defined(_MSC_VER) + #define PANDAS_INLINE static __inline + #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + #define PANDAS_INLINE static inline + #else + #define PANDAS_INLINE + #endif // __GNUC__ +#endif // PANDAS_INLINE + +#endif // PANDAS__LIBS_SRC_INLINE_HELPER_H_ diff --git a/pandas/_libs/src/klib/khash.h b/pandas/_libs/src/klib/khash.h new file mode 100644 index 00000000..bcf6350a --- /dev/null +++ b/pandas/_libs/src/klib/khash.h @@ -0,0 +1,569 @@ +/* The MIT License + + Copyright (c) 2008, 2009, 2011 by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* + An example: + +#include "khash.h" +KHASH_MAP_INIT_INT(32, char) +int main() { + int ret, is_missing; + khiter_t k; + khash_t(32) *h = kh_init(32); + k = kh_put(32, h, 5, &ret); + if (!ret) kh_del(32, h, k); + kh_value(h, k) = 10; + k = kh_get(32, h, 10); + is_missing = (k == kh_end(h)); + k = kh_get(32, h, 5); + kh_del(32, h, k); + for (k = kh_begin(h); k != kh_end(h); ++k) + if (kh_exist(h, k)) kh_value(h, k) = 1; + kh_destroy(32, h); + return 0; +} +*/ + +/* + 2011-09-16 (0.2.6): + + * The capacity is a power of 2. This seems to dramatically improve the + speed for simple keys. Thank Zilong Tan for the suggestion. Reference: + + - https://github.com/stefanocasazza/ULib + - http://nothings.org/computer/judy/ + + * Allow to optionally use linear probing which usually has better + performance for random input. Double hashing is still the default as it + is more robust to certain non-random input. + + * Added Wang's integer hash function (not used by default). This hash + function is more robust to certain non-random input. + + 2011-02-14 (0.2.5): + + * Allow to declare global functions. + + 2009-09-26 (0.2.4): + + * Improve portability + + 2008-09-19 (0.2.3): + + * Corrected the example + * Improved interfaces + + 2008-09-11 (0.2.2): + + * Improved speed a little in kh_put() + + 2008-09-10 (0.2.1): + + * Added kh_clear() + * Fixed a compiling error + + 2008-09-02 (0.2.0): + + * Changed to token concatenation which increases flexibility. + + 2008-08-31 (0.1.2): + + * Fixed a bug in kh_get(), which has not been tested previously. + + 2008-08-31 (0.1.1): + + * Added destructor +*/ + + +#ifndef __AC_KHASH_H +#define __AC_KHASH_H + +/*! + @header + + Generic hash table library. + */ + +#define AC_VERSION_KHASH_H "0.2.6" + +#include +#include +#include +#include "../inline_helper.h" + + +#if UINT_MAX == 0xffffffffu +typedef unsigned int khint32_t; +#elif ULONG_MAX == 0xffffffffu +typedef unsigned long khint32_t; +#endif + +#if ULONG_MAX == ULLONG_MAX +typedef unsigned long khuint64_t; +typedef signed long khint64_t; +#else +typedef unsigned long long khuint64_t; +typedef signed long long khint64_t; +#endif + +typedef double khfloat64_t; + +typedef khint32_t khint_t; +typedef khint_t khiter_t; + +#define __ac_isempty(flag, i) ((flag[i>>5]>>(i&0x1fU))&1) +#define __ac_isdel(flag, i) (0) +#define __ac_iseither(flag, i) __ac_isempty(flag, i) +#define __ac_set_isdel_false(flag, i) (0) +#define __ac_set_isempty_false(flag, i) (flag[i>>5]&=~(1ul<<(i&0x1fU))) +#define __ac_set_isempty_true(flag, i) (flag[i>>5]|=(1ul<<(i&0x1fU))) +#define __ac_set_isboth_false(flag, i) __ac_set_isempty_false(flag, i) +#define __ac_set_isdel_true(flag, i) ((void)0) + +#ifdef KHASH_LINEAR +#define __ac_inc(k, m) 1 +#else +#define __ac_inc(k, m) (((k)>>3 ^ (k)<<3) | 1) & (m) +#endif + +#define __ac_fsize(m) ((m) < 32? 1 : (m)>>5) + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +static const double __ac_HASH_UPPER = 0.77; + +#define KHASH_DECLARE(name, khkey_t, khval_t) \ + typedef struct { \ + khint_t n_buckets, size, n_occupied, upper_bound; \ + khint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; \ + extern kh_##name##_t *kh_init_##name(); \ + extern void kh_destroy_##name(kh_##name##_t *h); \ + extern void kh_clear_##name(kh_##name##_t *h); \ + extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \ + extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \ + extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \ + extern void kh_del_##name(kh_##name##_t *h, khint_t x); + +#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + typedef struct { \ + khint_t n_buckets, size, n_occupied, upper_bound; \ + khint32_t *flags; \ + khkey_t *keys; \ + khval_t *vals; \ + } kh_##name##_t; \ + SCOPE kh_##name##_t *kh_init_##name(void) { \ + return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \ + } \ + SCOPE void kh_destroy_##name(kh_##name##_t *h) \ + { \ + if (h) { \ + free(h->keys); free(h->flags); \ + free(h->vals); \ + free(h); \ + } \ + } \ + SCOPE void kh_clear_##name(kh_##name##_t *h) \ + { \ + if (h && h->flags) { \ + memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \ + h->size = h->n_occupied = 0; \ + } \ + } \ + SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \ + { \ + if (h->n_buckets) { \ + khint_t inc, k, i, last, mask; \ + mask = h->n_buckets - 1; \ + k = __hash_func(key); i = k & mask; \ + inc = __ac_inc(k, mask); last = i; /* inc==1 for linear probing */ \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + i = (i + inc) & mask; \ + if (i == last) return h->n_buckets; \ + } \ + return __ac_iseither(h->flags, i)? h->n_buckets : i; \ + } else return 0; \ + } \ + SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \ + { /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \ + khint32_t *new_flags = 0; \ + khint_t j = 1; \ + { \ + kroundup32(new_n_buckets); \ + if (new_n_buckets < 4) new_n_buckets = 4; \ + if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \ + else { /* hash table size to be changed (shrink or expand); rehash */ \ + new_flags = (khint32_t*)malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + memset(new_flags, 0xff, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \ + if (h->n_buckets < new_n_buckets) { /* expand */ \ + h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ + } /* otherwise shrink */ \ + } \ + } \ + if (j) { /* rehashing is needed */ \ + for (j = 0; j != h->n_buckets; ++j) { \ + if (__ac_iseither(h->flags, j) == 0) { \ + khkey_t key = h->keys[j]; \ + khval_t val; \ + khint_t new_mask; \ + new_mask = new_n_buckets - 1; \ + if (kh_is_map) val = h->vals[j]; \ + __ac_set_isempty_true(h->flags, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t inc, k, i; \ + k = __hash_func(key); \ + i = k & new_mask; \ + inc = __ac_inc(k, new_mask); \ + while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \ + __ac_set_isempty_false(new_flags, i); \ + if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \ + __ac_set_isempty_true(h->flags, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + if (kh_is_map) h->vals[i] = val; \ + break; \ + } \ + } \ + } \ + } \ + if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \ + h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \ + if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \ + } \ + free(h->flags); /* free the working space */ \ + h->flags = new_flags; \ + h->n_buckets = new_n_buckets; \ + h->n_occupied = h->size; \ + h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ + } \ + } \ + SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \ + { \ + khint_t x; \ + if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \ + if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \ + else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ + { \ + khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \ + x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \ + if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \ + else { \ + inc = __ac_inc(k, mask); last = i; \ + while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \ + if (__ac_isdel(h->flags, i)) site = i; \ + i = (i + inc) & mask; \ + if (i == last) { x = site; break; } \ + } \ + if (x == h->n_buckets) { \ + if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \ + else x = i; \ + } \ + } \ + } \ + if (__ac_isempty(h->flags, x)) { /* not present at all */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; ++h->n_occupied; \ + *ret = 1; \ + } else if (__ac_isdel(h->flags, x)) { /* deleted */ \ + h->keys[x] = key; \ + __ac_set_isboth_false(h->flags, x); \ + ++h->size; \ + *ret = 2; \ + } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ + return x; \ + } \ + SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \ + { \ + if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \ + __ac_set_isdel_true(h->flags, x); \ + --h->size; \ + } \ + } + +#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \ + KHASH_INIT2(name, PANDAS_INLINE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) + +/* --- BEGIN OF HASH FUNCTIONS --- */ + +/*! @function + @abstract Integer hash function + @param key The integer [khint32_t] + @return The hash value [khint_t] + */ +#define kh_int_hash_func(key) (khint32_t)(key) +/*! @function + @abstract Integer comparison function + */ +#define kh_int_hash_equal(a, b) ((a) == (b)) +/*! @function + @abstract 64-bit integer hash function + @param key The integer [khint64_t] + @return The hash value [khint_t] + */ +#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11) +/*! @function + @abstract 64-bit integer comparison function + */ +#define kh_int64_hash_equal(a, b) ((a) == (b)) + +/*! @function + @abstract const char* hash function + @param s Pointer to a null terminated string + @return The hash value + */ +PANDAS_INLINE khint_t __ac_X31_hash_string(const char *s) +{ + khint_t h = *s; + if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s; + return h; +} +/*! @function + @abstract Another interface to const char* hash function + @param key Pointer to a null terminated string [const char*] + @return The hash value [khint_t] + */ +#define kh_str_hash_func(key) __ac_X31_hash_string(key) +/*! @function + @abstract Const char* comparison function + */ +#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0) + +PANDAS_INLINE khint_t __ac_Wang_hash(khint_t key) +{ + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} +#define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key) + +/* --- END OF HASH FUNCTIONS --- */ + +/* Other convenient macros... */ + +/*! + @abstract Type of the hash table. + @param name Name of the hash table [symbol] + */ +#define khash_t(name) kh_##name##_t + +/*! @function + @abstract Initiate a hash table. + @param name Name of the hash table [symbol] + @return Pointer to the hash table [khash_t(name)*] + */ +#define kh_init(name) kh_init_##name(void) + +/*! @function + @abstract Destroy a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_destroy(name, h) kh_destroy_##name(h) + +/*! @function + @abstract Reset a hash table without deallocating memory. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + */ +#define kh_clear(name, h) kh_clear_##name(h) + +/*! @function + @abstract Resize a hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param s New size [khint_t] + */ +#define kh_resize(name, h, s) kh_resize_##name(h, s) + +/*! @function + @abstract Insert a key to the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @param r Extra return code: 0 if the key is present in the hash table; + 1 if the bucket is empty (never used); 2 if the element in + the bucket has been deleted [int*] + @return Iterator to the inserted element [khint_t] + */ +#define kh_put(name, h, k, r) kh_put_##name(h, k, r) + +/*! @function + @abstract Retrieve a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Key [type of keys] + @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t] + */ +#define kh_get(name, h, k) kh_get_##name(h, k) + +/*! @function + @abstract Remove a key from the hash table. + @param name Name of the hash table [symbol] + @param h Pointer to the hash table [khash_t(name)*] + @param k Iterator to the element to be deleted [khint_t] + */ +#define kh_del(name, h, k) kh_del_##name(h, k) + +/*! @function + @abstract Test whether a bucket contains data. + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return 1 if containing data; 0 otherwise [int] + */ +#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x))) + +/*! @function + @abstract Get key given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Key [type of keys] + */ +#define kh_key(h, x) ((h)->keys[x]) + +/*! @function + @abstract Get value given an iterator + @param h Pointer to the hash table [khash_t(name)*] + @param x Iterator to the bucket [khint_t] + @return Value [type of values] + @discussion For hash sets, calling this results in segfault. + */ +#define kh_val(h, x) ((h)->vals[x]) + +/*! @function + @abstract Alias of kh_val() + */ +#define kh_value(h, x) ((h)->vals[x]) + +/*! @function + @abstract Get the start iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The start iterator [khint_t] + */ +#define kh_begin(h) (khint_t)(0) + +/*! @function + @abstract Get the end iterator + @param h Pointer to the hash table [khash_t(name)*] + @return The end iterator [khint_t] + */ +#define kh_end(h) ((h)->n_buckets) + +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of elements in the hash table [khint_t] + */ +#define kh_size(h) ((h)->size) + +/*! @function + @abstract Get the number of buckets in the hash table + @param h Pointer to the hash table [khash_t(name)*] + @return Number of buckets in the hash table [khint_t] + */ +#define kh_n_buckets(h) ((h)->n_buckets) + +/* More convenient interfaces */ + +/*! @function + @abstract Instantiate a hash set containing integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_INT(name) \ + KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_INT(name, khval_t) \ + KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_UINT64(name) \ + KHASH_INIT(name, khuint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + +#define KHASH_SET_INIT_INT64(name) \ + KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing 64-bit integer keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_UINT64(name, khval_t) \ + KHASH_INIT(name, khuint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + +#define KHASH_MAP_INIT_INT64(name, khval_t) \ + KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal) + + +typedef const char *kh_cstr_t; +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + */ +#define KHASH_SET_INIT_STR(name) \ + KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal) + +/*! @function + @abstract Instantiate a hash map containing const char* keys + @param name Name of the hash table [symbol] + @param khval_t Type of values [type] + */ +#define KHASH_MAP_INIT_STR(name, khval_t) \ + KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal) + + +#define kh_exist_str(h, k) (kh_exist(h, k)) +#define kh_exist_float64(h, k) (kh_exist(h, k)) +#define kh_exist_uint64(h, k) (kh_exist(h, k)) +#define kh_exist_int64(h, k) (kh_exist(h, k)) +#define kh_exist_int32(h, k) (kh_exist(h, k)) + +KHASH_MAP_INIT_STR(str, size_t) +KHASH_MAP_INIT_INT(int32, size_t) +KHASH_MAP_INIT_INT64(int64, size_t) +KHASH_MAP_INIT_UINT64(uint64, size_t) + + +#endif /* __AC_KHASH_H */ diff --git a/pandas/_libs/src/klib/khash_python.h b/pandas/_libs/src/klib/khash_python.h new file mode 100644 index 00000000..82251744 --- /dev/null +++ b/pandas/_libs/src/klib/khash_python.h @@ -0,0 +1,124 @@ +#include +#include + +#include "khash.h" + +// Previously we were using the built in cpython hash function for doubles +// python 2.7 https://github.com/python/cpython/blob/2.7/Objects/object.c#L1021 +// python 3.5 https://github.com/python/cpython/blob/3.5/Python/pyhash.c#L85 + +// The python 3 hash function has the invariant hash(x) == hash(int(x)) == hash(decimal(x)) +// and the size of hash may be different by platform / version (long in py2, Py_ssize_t in py3). +// We don't need those invariants because types will be cast before hashing, and if Py_ssize_t +// is 64 bits the truncation causes collission issues. Given all that, we use our own +// simple hash, viewing the double bytes as an int64 and using khash's default +// hash for 64 bit integers. +// GH 13436 +khint64_t PANDAS_INLINE asint64(double key) { + khint64_t val; + memcpy(&val, &key, sizeof(double)); + return val; +} + +// correct for all inputs but not -0.0 and NaNs +#define kh_float64_hash_func_0_NAN(key) (khint32_t)((asint64(key))>>33^(asint64(key))^(asint64(key))<<11) + +// correct for all inputs but not NaNs +#define kh_float64_hash_func_NAN(key) ((key) == 0.0 ? \ + kh_float64_hash_func_0_NAN(0.0) : \ + kh_float64_hash_func_0_NAN(key)) + +// correct for all +#define kh_float64_hash_func(key) ((key) != (key) ? \ + kh_float64_hash_func_NAN(Py_NAN) : \ + kh_float64_hash_func_NAN(key)) + +#define kh_float64_hash_equal(a, b) ((a) == (b) || ((b) != (b) && (a) != (a))) + +#define KHASH_MAP_INIT_FLOAT64(name, khval_t) \ + KHASH_INIT(name, khfloat64_t, khval_t, 1, kh_float64_hash_func, kh_float64_hash_equal) + +KHASH_MAP_INIT_FLOAT64(float64, size_t) + + +int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) { + int result = PyObject_RichCompareBool(a, b, Py_EQ); + if (result < 0) { + PyErr_Clear(); + return 0; + } + if (result == 0) { // still could be two NaNs + return PyFloat_CheckExact(a) && + PyFloat_CheckExact(b) && + Py_IS_NAN(PyFloat_AS_DOUBLE(a)) && + Py_IS_NAN(PyFloat_AS_DOUBLE(b)); + } + return result; +} + +// For PyObject_Hash holds: +// hash(0.0) == 0 == hash(-0.0) +// hash(X) == 0 if X is a NaN-value +// so it is OK to use it directly +#define kh_python_hash_func(key) (PyObject_Hash(key)) +#define kh_python_hash_equal(a, b) (pyobject_cmp(a, b)) + + +// Python object + +typedef PyObject* kh_pyobject_t; + +#define KHASH_MAP_INIT_PYOBJECT(name, khval_t) \ + KHASH_INIT(name, kh_pyobject_t, khval_t, 1, \ + kh_python_hash_func, kh_python_hash_equal) + +KHASH_MAP_INIT_PYOBJECT(pymap, Py_ssize_t) + +#define KHASH_SET_INIT_PYOBJECT(name) \ + KHASH_INIT(name, kh_pyobject_t, char, 0, \ + kh_python_hash_func, kh_python_hash_equal) + +KHASH_SET_INIT_PYOBJECT(pyset) + +#define kh_exist_pymap(h, k) (kh_exist(h, k)) +#define kh_exist_pyset(h, k) (kh_exist(h, k)) + +KHASH_MAP_INIT_STR(strbox, kh_pyobject_t) + +typedef struct { + kh_str_t *table; + int starts[256]; +} kh_str_starts_t; + +typedef kh_str_starts_t* p_kh_str_starts_t; + +p_kh_str_starts_t PANDAS_INLINE kh_init_str_starts(void) { + kh_str_starts_t *result = (kh_str_starts_t*)calloc(1, sizeof(kh_str_starts_t)); + result->table = kh_init_str(); + return result; +} + +khint_t PANDAS_INLINE kh_put_str_starts_item(kh_str_starts_t* table, char* key, int* ret) { + khint_t result = kh_put_str(table->table, key, ret); + if (*ret != 0) { + table->starts[(unsigned char)key[0]] = 1; + } + return result; +} + +khint_t PANDAS_INLINE kh_get_str_starts_item(const kh_str_starts_t* table, const char* key) { + unsigned char ch = *key; + if (table->starts[ch]) { + if (ch == '\0' || kh_get_str(table->table, key) != table->table->n_buckets) return 1; + } + return 0; +} + +void PANDAS_INLINE kh_destroy_str_starts(kh_str_starts_t* table) { + kh_destroy_str(table->table); + free(table); +} + +void PANDAS_INLINE kh_resize_str_starts(kh_str_starts_t* table, khint_t val) { + kh_resize_str(table->table, val); +} \ No newline at end of file diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h new file mode 100644 index 00000000..7fbe7a04 --- /dev/null +++ b/pandas/_libs/src/parse_helper.h @@ -0,0 +1,95 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_PARSE_HELPER_H_ +#define PANDAS__LIBS_SRC_PARSE_HELPER_H_ + +#include +#include "parser/tokenizer.h" + +int to_double(char *item, double *p_value, char sci, char decimal, + int *maybe_int) { + char *p_end = NULL; + int error = 0; + + *p_value = xstrtod(item, &p_end, decimal, sci, '\0', 1, &error, maybe_int); + + return (error == 0) && (!*p_end); +} + +int floatify(PyObject *str, double *result, int *maybe_int) { + int status; + char *data; + PyObject *tmp = NULL; + const char sci = 'E'; + const char dec = '.'; + + if (PyBytes_Check(str)) { + data = PyBytes_AS_STRING(str); + } else if (PyUnicode_Check(str)) { + tmp = PyUnicode_AsUTF8String(str); + data = PyBytes_AS_STRING(tmp); + } else { + PyErr_SetString(PyExc_TypeError, "Invalid object type"); + return -1; + } + + status = to_double(data, result, sci, dec, maybe_int); + + if (!status) { + /* handle inf/-inf infinity/-infinity */ + if (strlen(data) == 3) { + if (0 == strcasecmp(data, "inf")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 4) { + if (0 == strcasecmp(data, "-inf")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+inf")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 8) { + if (0 == strcasecmp(data, "infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 9) { + if (0 == strcasecmp(data, "-infinity")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else { + goto parsingerror; + } + } + + Py_XDECREF(tmp); + return 0; + +parsingerror: + PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data); + Py_XDECREF(tmp); + return -1; +} + +#endif // PANDAS__LIBS_SRC_PARSE_HELPER_H_ diff --git a/pandas/_libs/src/parser/io.c b/pandas/_libs/src/parser/io.c new file mode 100644 index 00000000..1e3295fc --- /dev/null +++ b/pandas/_libs/src/parser/io.c @@ -0,0 +1,312 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#include "io.h" + +#include +#include + +#ifndef O_BINARY +#define O_BINARY 0 +#endif // O_BINARY + +#ifdef _WIN32 +#define USE_WIN_UTF16 +#include +#endif + +/* + On-disk FILE, uncompressed +*/ + +void *new_file_source(char *fname, size_t buffer_size) { + file_source *fs = (file_source *)malloc(sizeof(file_source)); + if (fs == NULL) { + return NULL; + } + +#ifdef USE_WIN_UTF16 + // Fix gh-15086 properly - convert UTF8 to UTF16 that Windows widechar API + // accepts. This is needed because UTF8 might _not_ be convertible to MBCS + // for some conditions, as MBCS is locale-dependent, and not all unicode + // symbols can be expressed in it. + { + wchar_t* wname = NULL; + int required = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0); + if (required == 0) { + free(fs); + return NULL; + } + wname = (wchar_t*)malloc(required * sizeof(wchar_t)); + if (wname == NULL) { + free(fs); + return NULL; + } + if (MultiByteToWideChar(CP_UTF8, 0, fname, -1, wname, required) < + required) { + free(wname); + free(fs); + return NULL; + } + fs->fd = _wopen(wname, O_RDONLY | O_BINARY); + free(wname); + } +#else + fs->fd = open(fname, O_RDONLY | O_BINARY); +#endif + if (fs->fd == -1) { + free(fs); + return NULL; + } + + // Only allocate this heap memory if we are not memory-mapping the file + fs->buffer = (char *)malloc((buffer_size + 1) * sizeof(char)); + + if (fs->buffer == NULL) { + close(fs->fd); + free(fs); + return NULL; + } + + memset(fs->buffer, '\0', buffer_size + 1); + fs->size = buffer_size; + + return (void *)fs; +} + +void *new_rd_source(PyObject *obj) { + rd_source *rds = (rd_source *)malloc(sizeof(rd_source)); + + /* hold on to this object */ + Py_INCREF(obj); + rds->obj = obj; + rds->buffer = NULL; + rds->position = 0; + + return (void *)rds; +} + +/* + + Cleanup callbacks + + */ + +int del_file_source(void *ptr) { + file_source *fs = ptr; + if (fs == NULL) return 0; + + free(fs->buffer); + close(fs->fd); + free(fs); + + return 0; +} + +int del_rd_source(void *rds) { + Py_XDECREF(RDS(rds)->obj); + Py_XDECREF(RDS(rds)->buffer); + free(rds); + + return 0; +} + +/* + + IO callbacks + + */ + +void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read, + int *status) { + file_source *fs = FS(source); + ssize_t rv; + + if (nbytes > fs->size) { + nbytes = fs->size; + } + + rv = read(fs->fd, fs->buffer, nbytes); + switch (rv) { + case -1: + *status = CALLING_READ_FAILED; + *bytes_read = 0; + return NULL; + case 0: + *status = REACHED_EOF; + *bytes_read = 0; + return NULL; + default: + *status = 0; + *bytes_read = rv; + fs->buffer[rv] = '\0'; + break; + } + + return (void *)fs->buffer; +} + +void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, + int *status) { + PyGILState_STATE state; + PyObject *result, *func, *args, *tmp; + + void *retval; + + size_t length; + rd_source *src = RDS(source); + state = PyGILState_Ensure(); + + /* delete old object */ + Py_XDECREF(src->buffer); + src->buffer = NULL; + args = Py_BuildValue("(i)", nbytes); + + func = PyObject_GetAttrString(src->obj, "read"); + + /* TODO: does this release the GIL? */ + result = PyObject_CallObject(func, args); + Py_XDECREF(args); + Py_XDECREF(func); + + if (result == NULL) { + PyGILState_Release(state); + *bytes_read = 0; + *status = CALLING_READ_FAILED; + return NULL; + } else if (!PyBytes_Check(result)) { + tmp = PyUnicode_AsUTF8String(result); + Py_DECREF(result); + if (tmp == NULL) { + PyGILState_Release(state); + return NULL; + } + result = tmp; + } + + length = PySequence_Length(result); + + if (length == 0) + *status = REACHED_EOF; + else + *status = 0; + + /* hang on to the Python object */ + src->buffer = result; + retval = (void *)PyBytes_AsString(result); + + PyGILState_Release(state); + + /* TODO: more error handling */ + *bytes_read = length; + + return retval; +} + +#ifdef HAVE_MMAP + +#include + +void *new_mmap(char *fname) { + memory_map *mm; + struct stat stat; + size_t filesize; + + mm = (memory_map *)malloc(sizeof(memory_map)); + if (mm == NULL) { + fprintf(stderr, "new_file_buffer: malloc() failed.\n"); + return (NULL); + } + mm->fd = open(fname, O_RDONLY | O_BINARY); + if (mm->fd == -1) { + fprintf(stderr, "new_file_buffer: open(%s) failed. errno =%d\n", + fname, errno); + free(mm); + return NULL; + } + + if (fstat(mm->fd, &stat) == -1) { + fprintf(stderr, "new_file_buffer: fstat() failed. errno =%d\n", + errno); + close(mm->fd); + free(mm); + return NULL; + } + filesize = stat.st_size; /* XXX This might be 32 bits. */ + + mm->memmap = mmap(NULL, filesize, PROT_READ, MAP_SHARED, mm->fd, 0); + if (mm->memmap == MAP_FAILED) { + /* XXX Eventually remove this print statement. */ + fprintf(stderr, "new_file_buffer: mmap() failed.\n"); + close(mm->fd); + free(mm); + return NULL; + } + + mm->size = (off_t)filesize; + mm->position = 0; + + return mm; +} + +int del_mmap(void *ptr) { + memory_map *mm = ptr; + + if (mm == NULL) return 0; + + munmap(mm->memmap, mm->size); + close(mm->fd); + free(mm); + + return 0; +} + +void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read, + int *status) { + void *retval; + memory_map *src = source; + size_t remaining = src->size - src->position; + + if (remaining == 0) { + *bytes_read = 0; + *status = REACHED_EOF; + return NULL; + } + + if (nbytes > remaining) { + nbytes = remaining; + } + + retval = src->memmap + src->position; + + /* advance position in mmap data structure */ + src->position += nbytes; + + *bytes_read = nbytes; + *status = 0; + + return retval; +} + +#else + +/* kludgy */ + +void *new_mmap(char *fname) { return NULL; } + +int del_mmap(void *src) { return 0; } + +/* don't use this! */ + +void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read, + int *status) { + return NULL; +} + +#endif // HAVE_MMAP diff --git a/pandas/_libs/src/parser/io.h b/pandas/_libs/src/parser/io.h new file mode 100644 index 00000000..aac41845 --- /dev/null +++ b/pandas/_libs/src/parser/io.h @@ -0,0 +1,69 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#ifndef PANDAS__LIBS_SRC_PARSER_IO_H_ +#define PANDAS__LIBS_SRC_PARSER_IO_H_ + +#define PY_SSIZE_T_CLEAN +#include +#include "tokenizer.h" + +typedef struct _file_source { + /* The file being read. */ + int fd; + + char *buffer; + size_t size; +} file_source; + +#define FS(source) ((file_source *)source) + +#if !defined(_WIN32) && !defined(HAVE_MMAP) +#define HAVE_MMAP +#endif // HAVE_MMAP + +typedef struct _memory_map { + int fd; + + /* Size of the file, in bytes. */ + char *memmap; + size_t size; + + size_t position; +} memory_map; + +void *new_mmap(char *fname); + +int del_mmap(void *src); + +void *buffer_mmap_bytes(void *source, size_t nbytes, size_t *bytes_read, + int *status); + +typedef struct _rd_source { + PyObject *obj; + PyObject *buffer; + size_t position; +} rd_source; + +#define RDS(source) ((rd_source *)source) + +void *new_file_source(char *fname, size_t buffer_size); + +void *new_rd_source(PyObject *obj); + +int del_file_source(void *src); +int del_rd_source(void *src); + +void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read, + int *status); + +void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read, + int *status); + +#endif // PANDAS__LIBS_SRC_PARSER_IO_H_ diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c new file mode 100644 index 00000000..2188ff6b --- /dev/null +++ b/pandas/_libs/src/parser/tokenizer.c @@ -0,0 +1,2008 @@ +/* + +Copyright (c) 2012, Lambda Foundry, Inc., except where noted + +Incorporates components of WarrenWeckesser/textreader, licensed under 3-clause +BSD + +See LICENSE for the license + +*/ + +/* + +Low-level ascii-file processing for pandas. Combines some elements from +Python's built-in csv module and Warren Weckesser's textreader project on +GitHub. See Python Software Foundation License and BSD licenses for these. + +*/ + +#include "tokenizer.h" + +#include +#include +#include + +#include "../headers/portable.h" + +void coliter_setup(coliter_t *self, parser_t *parser, int i, int start) { + // column i, starting at 0 + self->words = parser->words; + self->col = i; + self->line_start = parser->line_start + start; +} + +static void free_if_not_null(void **ptr) { + TRACE(("free_if_not_null %p\n", *ptr)) + if (*ptr != NULL) { + free(*ptr); + *ptr = NULL; + } +} + +/* + + Parser / tokenizer + +*/ + +static void *grow_buffer(void *buffer, uint64_t length, uint64_t *capacity, + int64_t space, int64_t elsize, int *error) { + uint64_t cap = *capacity; + void *newbuffer = buffer; + + // Can we fit potentially nbytes tokens (+ null terminators) in the stream? + while ((length + space >= cap) && (newbuffer != NULL)) { + cap = cap ? cap << 1 : 2; + buffer = newbuffer; + newbuffer = realloc(newbuffer, elsize * cap); + } + + if (newbuffer == NULL) { + // realloc failed so don't change *capacity, set *error to errno + // and return the last good realloc'd buffer so it can be freed + *error = errno; + newbuffer = buffer; + } else { + // realloc worked, update *capacity and set *error to 0 + // sigh, multiple return values + *capacity = cap; + *error = 0; + } + return newbuffer; +} + +void parser_set_default_options(parser_t *self) { + self->decimal = '.'; + self->sci = 'E'; + + // For tokenization + self->state = START_RECORD; + + self->delimiter = ','; // XXX + self->delim_whitespace = 0; + + self->doublequote = 0; + self->quotechar = '"'; + self->escapechar = 0; + + self->lineterminator = '\0'; /* NUL->standard logic */ + + self->skipinitialspace = 0; + self->quoting = QUOTE_MINIMAL; + self->allow_embedded_newline = 1; + self->strict = 0; + + self->expected_fields = -1; + self->error_bad_lines = 0; + self->warn_bad_lines = 0; + + self->commentchar = '#'; + self->thousands = '\0'; + + self->skipset = NULL; + self->skipfunc = NULL; + self->skip_first_N_rows = -1; + self->skip_footer = 0; +} + +parser_t *parser_new() { return (parser_t *)calloc(1, sizeof(parser_t)); } + +int parser_clear_data_buffers(parser_t *self) { + free_if_not_null((void *)&self->stream); + free_if_not_null((void *)&self->words); + free_if_not_null((void *)&self->word_starts); + free_if_not_null((void *)&self->line_start); + free_if_not_null((void *)&self->line_fields); + return 0; +} + +int parser_cleanup(parser_t *self) { + int status = 0; + + // XXX where to put this + free_if_not_null((void *)&self->error_msg); + free_if_not_null((void *)&self->warn_msg); + + if (self->skipset != NULL) { + kh_destroy_int64((kh_int64_t *)self->skipset); + self->skipset = NULL; + } + + if (parser_clear_data_buffers(self) < 0) { + status = -1; + } + + if (self->cb_cleanup != NULL) { + if (self->cb_cleanup(self->source) < 0) { + status = -1; + } + self->cb_cleanup = NULL; + } + + return status; +} + +int parser_init(parser_t *self) { + int64_t sz; + + /* + Initialize data buffers + */ + + self->stream = NULL; + self->words = NULL; + self->word_starts = NULL; + self->line_start = NULL; + self->line_fields = NULL; + self->error_msg = NULL; + self->warn_msg = NULL; + + // token stream + self->stream = (char *)malloc(STREAM_INIT_SIZE * sizeof(char)); + if (self->stream == NULL) { + parser_cleanup(self); + return PARSER_OUT_OF_MEMORY; + } + self->stream_cap = STREAM_INIT_SIZE; + self->stream_len = 0; + + // word pointers and metadata + sz = STREAM_INIT_SIZE / 10; + sz = sz ? sz : 1; + self->words = (char **)malloc(sz * sizeof(char *)); + self->word_starts = (int64_t *)malloc(sz * sizeof(int64_t)); + self->max_words_cap = sz; + self->words_cap = sz; + self->words_len = 0; + + // line pointers and metadata + self->line_start = (int64_t *)malloc(sz * sizeof(int64_t)); + + self->line_fields = (int64_t *)malloc(sz * sizeof(int64_t)); + + self->lines_cap = sz; + self->lines = 0; + self->file_lines = 0; + + if (self->stream == NULL || self->words == NULL || + self->word_starts == NULL || self->line_start == NULL || + self->line_fields == NULL) { + parser_cleanup(self); + + return PARSER_OUT_OF_MEMORY; + } + + /* amount of bytes buffered */ + self->datalen = 0; + self->datapos = 0; + + self->line_start[0] = 0; + self->line_fields[0] = 0; + + self->pword_start = self->stream; + self->word_start = 0; + + self->state = START_RECORD; + + self->error_msg = NULL; + self->warn_msg = NULL; + + self->commentchar = '\0'; + + return 0; +} + +void parser_free(parser_t *self) { + // opposite of parser_init + parser_cleanup(self); +} + +void parser_del(parser_t *self) { + free(self); +} + +static int make_stream_space(parser_t *self, size_t nbytes) { + uint64_t i, cap, length; + int status; + void *orig_ptr, *newptr; + + // Can we fit potentially nbytes tokens (+ null terminators) in the stream? + + /* + TOKEN STREAM + */ + + orig_ptr = (void *)self->stream; + TRACE( + ("\n\nmake_stream_space: nbytes = %zu. grow_buffer(self->stream...)\n", + nbytes)) + self->stream = (char *)grow_buffer((void *)self->stream, self->stream_len, + &self->stream_cap, nbytes * 2, + sizeof(char), &status); + TRACE( + ("make_stream_space: self->stream=%p, self->stream_len = %zu, " + "self->stream_cap=%zu, status=%zu\n", + self->stream, self->stream_len, self->stream_cap, status)) + + if (status != 0) { + return PARSER_OUT_OF_MEMORY; + } + + // realloc sets errno when moving buffer? + if (self->stream != orig_ptr) { + self->pword_start = self->stream + self->word_start; + + for (i = 0; i < self->words_len; ++i) { + self->words[i] = self->stream + self->word_starts[i]; + } + } + + /* + WORD VECTORS + */ + + cap = self->words_cap; + + /** + * If we are reading in chunks, we need to be aware of the maximum number + * of words we have seen in previous chunks (self->max_words_cap), so + * that way, we can properly allocate when reading subsequent ones. + * + * Otherwise, we risk a buffer overflow if we mistakenly under-allocate + * just because a recent chunk did not have as many words. + */ + if (self->words_len + nbytes < self->max_words_cap) { + length = self->max_words_cap - nbytes - 1; + } else { + length = self->words_len; + } + + self->words = + (char **)grow_buffer((void *)self->words, length, + &self->words_cap, nbytes, + sizeof(char *), &status); + TRACE( + ("make_stream_space: grow_buffer(self->self->words, %zu, %zu, %zu, " + "%d)\n", + self->words_len, self->words_cap, nbytes, status)) + if (status != 0) { + return PARSER_OUT_OF_MEMORY; + } + + // realloc took place + if (cap != self->words_cap) { + TRACE( + ("make_stream_space: cap != self->words_cap, nbytes = %d, " + "self->words_cap=%d\n", + nbytes, self->words_cap)) + newptr = realloc((void *)self->word_starts, + sizeof(int64_t) * self->words_cap); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->word_starts = (int64_t *)newptr; + } + } + + /* + LINE VECTORS + */ + cap = self->lines_cap; + self->line_start = + (int64_t *)grow_buffer((void *)self->line_start, self->lines + 1, + &self->lines_cap, nbytes, + sizeof(int64_t), &status); + TRACE(( + "make_stream_space: grow_buffer(self->line_start, %zu, %zu, %zu, %d)\n", + self->lines + 1, self->lines_cap, nbytes, status)) + if (status != 0) { + return PARSER_OUT_OF_MEMORY; + } + + // realloc took place + if (cap != self->lines_cap) { + TRACE(("make_stream_space: cap != self->lines_cap, nbytes = %d\n", + nbytes)) + newptr = realloc((void *)self->line_fields, + sizeof(int64_t) * self->lines_cap); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->line_fields = (int64_t *)newptr; + } + } + + return 0; +} + +static int push_char(parser_t *self, char c) { + TRACE(("push_char: self->stream[%zu] = %x, stream_cap=%zu\n", + self->stream_len + 1, c, self->stream_cap)) + if (self->stream_len >= self->stream_cap) { + TRACE( + ("push_char: ERROR!!! self->stream_len(%d) >= " + "self->stream_cap(%d)\n", + self->stream_len, self->stream_cap)) + int64_t bufsize = 100; + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, + "Buffer overflow caught - possible malformed input file.\n"); + return PARSER_OUT_OF_MEMORY; + } + self->stream[self->stream_len++] = c; + return 0; +} + +int PANDAS_INLINE end_field(parser_t *self) { + // XXX cruft + if (self->words_len >= self->words_cap) { + TRACE( + ("end_field: ERROR!!! self->words_len(%zu) >= " + "self->words_cap(%zu)\n", + self->words_len, self->words_cap)) + int64_t bufsize = 100; + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, + "Buffer overflow caught - possible malformed input file.\n"); + return PARSER_OUT_OF_MEMORY; + } + + // null terminate token + push_char(self, '\0'); + + // set pointer and metadata + self->words[self->words_len] = self->pword_start; + + TRACE(("end_field: Char diff: %d\n", self->pword_start - self->words[0])); + + TRACE(("end_field: Saw word %s at: %d. Total: %d\n", self->pword_start, + self->word_start, self->words_len + 1)) + + self->word_starts[self->words_len] = self->word_start; + self->words_len++; + + // increment line field count + self->line_fields[self->lines]++; + + // New field begin in stream + self->pword_start = self->stream + self->stream_len; + self->word_start = self->stream_len; + + return 0; +} + +static void append_warning(parser_t *self, const char *msg) { + int64_t ex_length; + int64_t length = strlen(msg); + void *newptr; + + if (self->warn_msg == NULL) { + self->warn_msg = (char *)malloc(length + 1); + snprintf(self->warn_msg, length + 1, "%s", msg); + } else { + ex_length = strlen(self->warn_msg); + newptr = realloc(self->warn_msg, ex_length + length + 1); + if (newptr != NULL) { + self->warn_msg = (char *)newptr; + snprintf(self->warn_msg + ex_length, length + 1, "%s", msg); + } + } +} + +static int end_line(parser_t *self) { + char *msg; + int64_t fields; + int ex_fields = self->expected_fields; + int64_t bufsize = 100; // for error or warning messages + + fields = self->line_fields[self->lines]; + + TRACE(("end_line: Line end, nfields: %d\n", fields)); + + TRACE(("end_line: lines: %d\n", self->lines)); + if (self->lines > 0) { + if (self->expected_fields >= 0) { + ex_fields = self->expected_fields; + } else { + ex_fields = self->line_fields[self->lines - 1]; + } + } + TRACE(("end_line: ex_fields: %d\n", ex_fields)); + + if (self->state == START_FIELD_IN_SKIP_LINE || + self->state == IN_FIELD_IN_SKIP_LINE || + self->state == IN_QUOTED_FIELD_IN_SKIP_LINE || + self->state == QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE) { + TRACE(("end_line: Skipping row %d\n", self->file_lines)); + // increment file line count + self->file_lines++; + + // skip the tokens from this bad line + self->line_start[self->lines] += fields; + + // reset field count + self->line_fields[self->lines] = 0; + return 0; + } + + if (!(self->lines <= self->header_end + 1) && + (self->expected_fields < 0 && fields > ex_fields) && !(self->usecols)) { + // increment file line count + self->file_lines++; + + // skip the tokens from this bad line + self->line_start[self->lines] += fields; + + // reset field count + self->line_fields[self->lines] = 0; + + // file_lines is now the actual file line number (starting at 1) + if (self->error_bad_lines) { + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, + "Expected %d fields in line %lld, saw %lld\n", + ex_fields, (long long)self->file_lines, (long long)fields); + + TRACE(("Error at line %d, %d fields\n", self->file_lines, fields)); + + return -1; + } else { + // simply skip bad lines + if (self->warn_bad_lines) { + // pass up error message + msg = (char *)malloc(bufsize); + snprintf(msg, bufsize, + "Skipping line %lld: expected %d fields, saw %lld\n", + (long long)self->file_lines, ex_fields, + (long long)fields); + append_warning(self, msg); + free(msg); + } + } + } else { + // missing trailing delimiters + if ((self->lines >= self->header_end + 1) && + fields < ex_fields) { + // might overrun the buffer when closing fields + if (make_stream_space(self, ex_fields - fields) < 0) { + int64_t bufsize = 100; + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, "out of memory"); + return -1; + } + + while (fields < ex_fields) { + end_field(self); + fields++; + } + } + + // increment both line counts + self->file_lines++; + self->lines++; + + // good line, set new start point + if (self->lines >= self->lines_cap) { + TRACE(( + "end_line: ERROR!!! self->lines(%zu) >= self->lines_cap(%zu)\n", + self->lines, self->lines_cap)) + int64_t bufsize = 100; + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, + "Buffer overflow caught - " + "possible malformed input file.\n"); + return PARSER_OUT_OF_MEMORY; + } + self->line_start[self->lines] = + (self->line_start[self->lines - 1] + fields); + + TRACE( + ("end_line: new line start: %d\n", self->line_start[self->lines])); + + // new line start with 0 fields + self->line_fields[self->lines] = 0; + } + + TRACE(("end_line: Finished line, at %d\n", self->lines)); + + return 0; +} + +int parser_add_skiprow(parser_t *self, int64_t row) { + khiter_t k; + kh_int64_t *set; + int ret = 0; + + if (self->skipset == NULL) { + self->skipset = (void *)kh_init_int64(); + } + + set = (kh_int64_t *)self->skipset; + + k = kh_put_int64(set, row, &ret); + set->keys[k] = row; + + return 0; +} + +int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) { + // self->file_lines is zero based so subtract 1 from nrows + if (nrows > 0) { + self->skip_first_N_rows = nrows - 1; + } + + return 0; +} + +static int parser_buffer_bytes(parser_t *self, size_t nbytes) { + int status; + size_t bytes_read; + + status = 0; + self->datapos = 0; + self->data = self->cb_io(self->source, nbytes, &bytes_read, &status); + TRACE(( + "parser_buffer_bytes self->cb_io: nbytes=%zu, datalen: %d, status=%d\n", + nbytes, bytes_read, status)); + self->datalen = bytes_read; + + if (status != REACHED_EOF && self->data == NULL) { + int64_t bufsize = 200; + self->error_msg = (char *)malloc(bufsize); + + if (status == CALLING_READ_FAILED) { + snprintf(self->error_msg, bufsize, + "Calling read(nbytes) on source failed. " + "Try engine='python'."); + } else { + snprintf(self->error_msg, bufsize, "Unknown error in IO callback"); + } + return -1; + } + + TRACE(("datalen: %d\n", self->datalen)); + + return status; +} + +/* + + Tokenization macros and state machine code + +*/ + +#define PUSH_CHAR(c) \ + TRACE( \ + ("PUSH_CHAR: Pushing %c, slen= %d, stream_cap=%zu, stream_len=%zu\n", \ + c, slen, self->stream_cap, self->stream_len)) \ + if (slen >= self->stream_cap) { \ + TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= stream_cap(%d)\n", slen, \ + self->stream_cap)) \ + int64_t bufsize = 100; \ + self->error_msg = (char *)malloc(bufsize); \ + snprintf(self->error_msg, bufsize, \ + "Buffer overflow caught - possible malformed input file.\n");\ + return PARSER_OUT_OF_MEMORY; \ + } \ + *stream++ = c; \ + slen++; + +// This is a little bit of a hack but works for now + +#define END_FIELD() \ + self->stream_len = slen; \ + if (end_field(self) < 0) { \ + goto parsingerror; \ + } \ + stream = self->stream + self->stream_len; \ + slen = self->stream_len; + +#define END_LINE_STATE(STATE) \ + self->stream_len = slen; \ + if (end_line(self) < 0) { \ + goto parsingerror; \ + } \ + stream = self->stream + self->stream_len; \ + slen = self->stream_len; \ + self->state = STATE; \ + if (line_limit > 0 && self->lines == start_lines + line_limit) { \ + goto linelimit; \ + } + +#define END_LINE_AND_FIELD_STATE(STATE) \ + self->stream_len = slen; \ + if (end_line(self) < 0) { \ + goto parsingerror; \ + } \ + if (end_field(self) < 0) { \ + goto parsingerror; \ + } \ + stream = self->stream + self->stream_len; \ + slen = self->stream_len; \ + self->state = STATE; \ + if (line_limit > 0 && self->lines == start_lines + line_limit) { \ + goto linelimit; \ + } + +#define END_LINE() END_LINE_STATE(START_RECORD) + +#define IS_TERMINATOR(c) \ + (c == line_terminator) + +#define IS_QUOTE(c) ((c == self->quotechar && self->quoting != QUOTE_NONE)) + +// don't parse '\r' with a custom line terminator +#define IS_CARRIAGE(c) (c == carriage_symbol) + +#define IS_COMMENT_CHAR(c) (c == comment_symbol) + +#define IS_ESCAPE_CHAR(c) (c == escape_symbol) + +#define IS_SKIPPABLE_SPACE(c) \ + ((!self->delim_whitespace && c == ' ' && self->skipinitialspace)) + +// applied when in a field +#define IS_DELIMITER(c) \ + ((!self->delim_whitespace && c == self->delimiter) || \ + (self->delim_whitespace && isblank(c))) + +#define _TOKEN_CLEANUP() \ + self->stream_len = slen; \ + self->datapos = i; \ + TRACE(("_TOKEN_CLEANUP: datapos: %d, datalen: %d\n", self->datapos, \ + self->datalen)); + +#define CHECK_FOR_BOM() \ + if (*buf == '\xef' && *(buf + 1) == '\xbb' && *(buf + 2) == '\xbf') { \ + buf += 3; \ + self->datapos += 3; \ + } + +int skip_this_line(parser_t *self, int64_t rownum) { + int should_skip; + PyObject *result; + PyGILState_STATE state; + + if (self->skipfunc != NULL) { + state = PyGILState_Ensure(); + result = PyObject_CallFunction(self->skipfunc, "i", rownum); + + // Error occurred. It will be processed + // and caught at the Cython level. + if (result == NULL) { + should_skip = -1; + } else { + should_skip = PyObject_IsTrue(result); + } + + Py_XDECREF(result); + PyGILState_Release(state); + + return should_skip; + } else if (self->skipset != NULL) { + return (kh_get_int64((kh_int64_t *)self->skipset, self->file_lines) != + ((kh_int64_t *)self->skipset)->n_buckets); + } else { + return (rownum <= self->skip_first_N_rows); + } +} + +int tokenize_bytes(parser_t *self, + size_t line_limit, int64_t start_lines) { + int64_t i; + uint64_t slen; + int should_skip; + char c; + char *stream; + char *buf = self->data + self->datapos; + + const char line_terminator = (self->lineterminator == '\0') ? + '\n' : self->lineterminator; + + // 1000 is something that couldn't fit in "char" + // thus comparing a char to it would always be "false" + const int carriage_symbol = (self->lineterminator == '\0') ? '\r' : 1000; + const int comment_symbol = (self->commentchar != '\0') ? + self->commentchar : 1000; + const int escape_symbol = (self->escapechar != '\0') ? + self->escapechar : 1000; + + if (make_stream_space(self, self->datalen - self->datapos) < 0) { + int64_t bufsize = 100; + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, "out of memory"); + return -1; + } + + stream = self->stream + self->stream_len; + slen = self->stream_len; + + TRACE(("%s\n", buf)); + + if (self->file_lines == 0) { + CHECK_FOR_BOM(); + } + + for (i = self->datapos; i < self->datalen; ++i) { + // next character in file + c = *buf++; + + TRACE( + ("tokenize_bytes - Iter: %d Char: 0x%x Line %d field_count %d, " + "state %d\n", + i, c, self->file_lines + 1, self->line_fields[self->lines], + self->state)); + + switch (self->state) { + case START_FIELD_IN_SKIP_LINE: + if (IS_TERMINATOR(c)) { + END_LINE(); + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } else if (IS_QUOTE(c)) { + self->state = IN_QUOTED_FIELD_IN_SKIP_LINE; + } else if (IS_DELIMITER(c)) { + // Do nothing, we're starting a new field again. + } else { + self->state = IN_FIELD_IN_SKIP_LINE; + } + break; + + case IN_FIELD_IN_SKIP_LINE: + if (IS_TERMINATOR(c)) { + END_LINE(); + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } else if (IS_DELIMITER(c)) { + self->state = START_FIELD_IN_SKIP_LINE; + } + break; + + case IN_QUOTED_FIELD_IN_SKIP_LINE: + if (IS_QUOTE(c)) { + if (self->doublequote) { + self->state = QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE; + } else { + self->state = IN_FIELD_IN_SKIP_LINE; + } + } + break; + + case QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE: + if (IS_QUOTE(c)) { + self->state = IN_QUOTED_FIELD_IN_SKIP_LINE; + } else if (IS_TERMINATOR(c)) { + END_LINE(); + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } else if (IS_DELIMITER(c)) { + self->state = START_FIELD_IN_SKIP_LINE; + } else { + self->state = IN_FIELD_IN_SKIP_LINE; + } + break; + + case WHITESPACE_LINE: + if (IS_TERMINATOR(c)) { + self->file_lines++; + self->state = START_RECORD; + break; + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + break; + } else if (!self->delim_whitespace) { + if (isblank(c) && c != self->delimiter) { + } else { // backtrack + // use i + 1 because buf has been incremented but not i + do { + --buf; + --i; + } while (i + 1 > self->datapos && !IS_TERMINATOR(*buf)); + + // reached a newline rather than the beginning + if (IS_TERMINATOR(*buf)) { + ++buf; // move pointer to first char after newline + ++i; + } + self->state = START_FIELD; + } + break; + } + // fall through + + case EAT_WHITESPACE: + if (IS_TERMINATOR(c)) { + END_LINE(); + self->state = START_RECORD; + break; + } else if (IS_CARRIAGE(c)) { + self->state = EAT_CRNL; + break; + } else if (IS_COMMENT_CHAR(c)) { + self->state = EAT_COMMENT; + break; + } else if (!isblank(c)) { + self->state = START_FIELD; + // fall through to subsequent state + } else { + // if whitespace char, keep slurping + break; + } + + case START_RECORD: + // start of record + should_skip = skip_this_line(self, self->file_lines); + + if (should_skip == -1) { + goto parsingerror; + } else if (should_skip) { + if (IS_QUOTE(c)) { + self->state = IN_QUOTED_FIELD_IN_SKIP_LINE; + } else { + self->state = IN_FIELD_IN_SKIP_LINE; + + if (IS_TERMINATOR(c)) { + END_LINE(); + } + } + break; + } else if (IS_TERMINATOR(c)) { + // \n\r possible? + if (self->skip_empty_lines) { + self->file_lines++; + } else { + END_LINE(); + } + break; + } else if (IS_CARRIAGE(c)) { + if (self->skip_empty_lines) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } else { + self->state = EAT_CRNL; + } + break; + } else if (IS_COMMENT_CHAR(c)) { + self->state = EAT_LINE_COMMENT; + break; + } else if (isblank(c)) { + if (self->delim_whitespace) { + if (self->skip_empty_lines) { + self->state = WHITESPACE_LINE; + } else { + self->state = EAT_WHITESPACE; + } + break; + } else if (c != self->delimiter && self->skip_empty_lines) { + self->state = WHITESPACE_LINE; + break; + } + // fall through + } + + // normal character - fall through + // to handle as START_FIELD + self->state = START_FIELD; + + case START_FIELD: + // expecting field + if (IS_TERMINATOR(c)) { + END_FIELD(); + END_LINE(); + } else if (IS_CARRIAGE(c)) { + END_FIELD(); + self->state = EAT_CRNL; + } else if (IS_QUOTE(c)) { + // start quoted field + self->state = IN_QUOTED_FIELD; + } else if (IS_ESCAPE_CHAR(c)) { + // possible escaped character + self->state = ESCAPED_CHAR; + } else if (IS_SKIPPABLE_SPACE(c)) { + // ignore space at start of field + } else if (IS_DELIMITER(c)) { + if (self->delim_whitespace) { + self->state = EAT_WHITESPACE; + } else { + // save empty field + END_FIELD(); + } + } else if (IS_COMMENT_CHAR(c)) { + END_FIELD(); + self->state = EAT_COMMENT; + } else { + // begin new unquoted field + PUSH_CHAR(c); + self->state = IN_FIELD; + } + break; + + case ESCAPED_CHAR: + PUSH_CHAR(c); + self->state = IN_FIELD; + break; + + case EAT_LINE_COMMENT: + if (IS_TERMINATOR(c)) { + self->file_lines++; + self->state = START_RECORD; + } else if (IS_CARRIAGE(c)) { + self->file_lines++; + self->state = EAT_CRNL_NOP; + } + break; + + case IN_FIELD: + // in unquoted field + if (IS_TERMINATOR(c)) { + END_FIELD(); + END_LINE(); + } else if (IS_CARRIAGE(c)) { + END_FIELD(); + self->state = EAT_CRNL; + } else if (IS_ESCAPE_CHAR(c)) { + // possible escaped character + self->state = ESCAPED_CHAR; + } else if (IS_DELIMITER(c)) { + // end of field - end of line not reached yet + END_FIELD(); + + if (self->delim_whitespace) { + self->state = EAT_WHITESPACE; + } else { + self->state = START_FIELD; + } + } else if (IS_COMMENT_CHAR(c)) { + END_FIELD(); + self->state = EAT_COMMENT; + } else { + // normal character - save in field + PUSH_CHAR(c); + } + break; + + case IN_QUOTED_FIELD: + // in quoted field + if (IS_ESCAPE_CHAR(c)) { + // possible escape character + self->state = ESCAPE_IN_QUOTED_FIELD; + } else if (IS_QUOTE(c)) { + if (self->doublequote) { + // double quote - " represented by "" + self->state = QUOTE_IN_QUOTED_FIELD; + } else { + // end of quote part of field + self->state = IN_FIELD; + } + } else { + // normal character - save in field + PUSH_CHAR(c); + } + break; + + case ESCAPE_IN_QUOTED_FIELD: + PUSH_CHAR(c); + self->state = IN_QUOTED_FIELD; + break; + + case QUOTE_IN_QUOTED_FIELD: + // double quote - seen a quote in an quoted field + if (IS_QUOTE(c)) { + // save "" as " + + PUSH_CHAR(c); + self->state = IN_QUOTED_FIELD; + } else if (IS_DELIMITER(c)) { + // end of field - end of line not reached yet + END_FIELD(); + + if (self->delim_whitespace) { + self->state = EAT_WHITESPACE; + } else { + self->state = START_FIELD; + } + } else if (IS_TERMINATOR(c)) { + END_FIELD(); + END_LINE(); + } else if (IS_CARRIAGE(c)) { + END_FIELD(); + self->state = EAT_CRNL; + } else if (!self->strict) { + PUSH_CHAR(c); + self->state = IN_FIELD; + } else { + int64_t bufsize = 100; + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, + "delimiter expected after quote in quote"); + goto parsingerror; + } + break; + + case EAT_COMMENT: + if (IS_TERMINATOR(c)) { + END_LINE(); + } else if (IS_CARRIAGE(c)) { + self->state = EAT_CRNL; + } + break; + + // only occurs with non-custom line terminator, + // which is why we directly check for '\n' + case EAT_CRNL: + if (c == '\n') { + END_LINE(); + } else if (IS_DELIMITER(c)) { + if (self->delim_whitespace) { + END_LINE_STATE(EAT_WHITESPACE); + } else { + // Handle \r-delimited files + END_LINE_AND_FIELD_STATE(START_FIELD); + } + } else { + if (self->delim_whitespace) { + /* XXX + * first character of a new record--need to back up and + * reread + * to handle properly... + */ + i--; + buf--; // back up one character (HACK!) + END_LINE_STATE(START_RECORD); + } else { + // \r line terminator + // UGH. we don't actually want + // to consume the token. fix this later + self->stream_len = slen; + if (end_line(self) < 0) { + goto parsingerror; + } + + stream = self->stream + self->stream_len; + slen = self->stream_len; + self->state = START_RECORD; + + --i; + buf--; // let's try this character again (HACK!) + if (line_limit > 0 && + self->lines == start_lines + line_limit) { + goto linelimit; + } + } + } + break; + + // only occurs with non-custom line terminator, + // which is why we directly check for '\n' + case EAT_CRNL_NOP: // inside an ignored comment line + self->state = START_RECORD; + // \r line terminator -- parse this character again + if (c != '\n' && !IS_DELIMITER(c)) { + --i; + --buf; + } + break; + default: + break; + } + } + + _TOKEN_CLEANUP(); + + TRACE(("Finished tokenizing input\n")) + + return 0; + +parsingerror: + i++; + _TOKEN_CLEANUP(); + + return -1; + +linelimit: + i++; + _TOKEN_CLEANUP(); + + return 0; +} + +static int parser_handle_eof(parser_t *self) { + int64_t bufsize = 100; + + TRACE( + ("handling eof, datalen: %d, pstate: %d\n", self->datalen, self->state)) + + if (self->datalen != 0) return -1; + + switch (self->state) { + case START_RECORD: + case WHITESPACE_LINE: + case EAT_CRNL_NOP: + case EAT_LINE_COMMENT: + return 0; + + case ESCAPE_IN_QUOTED_FIELD: + case IN_QUOTED_FIELD: + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, + "EOF inside string starting at row %lld", + (long long)self->file_lines); + return -1; + + case ESCAPED_CHAR: + self->error_msg = (char *)malloc(bufsize); + snprintf(self->error_msg, bufsize, + "EOF following escape character"); + return -1; + + case IN_FIELD: + case START_FIELD: + case QUOTE_IN_QUOTED_FIELD: + if (end_field(self) < 0) return -1; + break; + + default: + break; + } + + if (end_line(self) < 0) + return -1; + else + return 0; +} + +int parser_consume_rows(parser_t *self, size_t nrows) { + int64_t offset, word_deletions; + uint64_t char_count, i; + + if (nrows > self->lines) { + nrows = self->lines; + } + + /* do nothing */ + if (nrows == 0) return 0; + + /* cannot guarantee that nrows + 1 has been observed */ + word_deletions = self->line_start[nrows - 1] + self->line_fields[nrows - 1]; + char_count = (self->word_starts[word_deletions - 1] + + strlen(self->words[word_deletions - 1]) + 1); + + TRACE(("parser_consume_rows: Deleting %d words, %d chars\n", word_deletions, + char_count)); + + /* move stream, only if something to move */ + if (char_count < self->stream_len) { + memmove((void *)self->stream, (void *)(self->stream + char_count), + self->stream_len - char_count); + } + /* buffer counts */ + self->stream_len -= char_count; + + /* move token metadata */ + // Note: We should always have words_len < word_deletions, so this + // subtraction will remain appropriately-typed. + for (i = 0; i < self->words_len - word_deletions; ++i) { + offset = i + word_deletions; + + self->words[i] = self->words[offset] - char_count; + self->word_starts[i] = self->word_starts[offset] - char_count; + } + self->words_len -= word_deletions; + + /* move current word pointer to stream */ + self->pword_start -= char_count; + self->word_start -= char_count; + + /* move line metadata */ + // Note: We should always have self->lines - nrows + 1 >= 0, so this + // subtraction will remain appropriately-typed. + for (i = 0; i < self->lines - nrows + 1; ++i) { + offset = i + nrows; + self->line_start[i] = self->line_start[offset] - word_deletions; + self->line_fields[i] = self->line_fields[offset]; + } + self->lines -= nrows; + + return 0; +} + +static size_t _next_pow2(size_t sz) { + size_t result = 1; + while (result < sz) result *= 2; + return result; +} + +int parser_trim_buffers(parser_t *self) { + /* + Free memory + */ + size_t new_cap; + void *newptr; + + uint64_t i; + + /** + * Before we free up space and trim, we should + * save how many words we saw when parsing, if + * it exceeds the maximum number we saw before. + * + * This is important for when we read in chunks, + * so that we can inform subsequent chunk parsing + * as to how many words we could possibly see. + */ + if (self->words_cap > self->max_words_cap) { + self->max_words_cap = self->words_cap; + } + + /* trim words, word_starts */ + new_cap = _next_pow2(self->words_len) + 1; + if (new_cap < self->words_cap) { + TRACE(("parser_trim_buffers: new_cap < self->words_cap\n")); + newptr = realloc((void *)self->words, new_cap * sizeof(char *)); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->words = (char **)newptr; + } + newptr = realloc((void *)self->word_starts, + new_cap * sizeof(int64_t)); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->word_starts = (int64_t *)newptr; + self->words_cap = new_cap; + } + } + + /* trim stream */ + new_cap = _next_pow2(self->stream_len) + 1; + TRACE( + ("parser_trim_buffers: new_cap = %zu, stream_cap = %zu, lines_cap = " + "%zu\n", + new_cap, self->stream_cap, self->lines_cap)); + if (new_cap < self->stream_cap) { + TRACE( + ("parser_trim_buffers: new_cap < self->stream_cap, calling " + "realloc\n")); + newptr = realloc((void *)self->stream, new_cap); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + // Update the pointers in the self->words array (char **) if + // `realloc` + // moved the `self->stream` buffer. This block mirrors a similar + // block in + // `make_stream_space`. + if (self->stream != newptr) { + self->pword_start = (char *)newptr + self->word_start; + + for (i = 0; i < self->words_len; ++i) { + self->words[i] = (char *)newptr + self->word_starts[i]; + } + } + + self->stream = newptr; + self->stream_cap = new_cap; + } + } + + /* trim line_start, line_fields */ + new_cap = _next_pow2(self->lines) + 1; + if (new_cap < self->lines_cap) { + TRACE(("parser_trim_buffers: new_cap < self->lines_cap\n")); + newptr = realloc((void *)self->line_start, + new_cap * sizeof(int64_t)); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->line_start = (int64_t *)newptr; + } + newptr = realloc((void *)self->line_fields, + new_cap * sizeof(int64_t)); + if (newptr == NULL) { + return PARSER_OUT_OF_MEMORY; + } else { + self->line_fields = (int64_t *)newptr; + self->lines_cap = new_cap; + } + } + + return 0; +} + +/* + nrows : number of rows to tokenize (or until reach EOF) + all : tokenize all the data vs. certain number of rows + */ + +int _tokenize_helper(parser_t *self, size_t nrows, int all) { + int status = 0; + int64_t start_lines = self->lines; + + if (self->state == FINISHED) { + return 0; + } + + TRACE(( + "_tokenize_helper: Asked to tokenize %d rows, datapos=%d, datalen=%d\n", + nrows, self->datapos, self->datalen)); + + while (1) { + if (!all && self->lines - start_lines >= nrows) break; + + if (self->datapos == self->datalen) { + status = parser_buffer_bytes(self, self->chunksize); + + if (status == REACHED_EOF) { + // close out last line + status = parser_handle_eof(self); + self->state = FINISHED; + break; + } else if (status != 0) { + return status; + } + } + + TRACE( + ("_tokenize_helper: Trying to process %d bytes, datalen=%d, " + "datapos= %d\n", + self->datalen - self->datapos, self->datalen, self->datapos)); + + status = tokenize_bytes(self, nrows, start_lines); + + if (status < 0) { + // XXX + TRACE( + ("_tokenize_helper: Status %d returned from tokenize_bytes, " + "breaking\n", + status)); + status = -1; + break; + } + } + TRACE(("leaving tokenize_helper\n")); + return status; +} + +int tokenize_nrows(parser_t *self, size_t nrows) { + int status = _tokenize_helper(self, nrows, 0); + return status; +} + +int tokenize_all_rows(parser_t *self) { + int status = _tokenize_helper(self, -1, 1); + return status; +} + +/* + * Function: to_boolean + * -------------------- + * + * Validate if item should be recognized as a boolean field. + * + * item: const char* representing parsed text + * val : pointer to a uint8_t of boolean representation + * + * If item is determined to be boolean, this method will set + * the appropriate value of val and return 0. A non-zero exit + * status means that item was not inferred to be boolean, and + * leaves the value of *val unmodified. + */ +int to_boolean(const char *item, uint8_t *val) { + if (strcasecmp(item, "TRUE") == 0) { + *val = 1; + return 0; + } else if (strcasecmp(item, "FALSE") == 0) { + *val = 0; + return 0; + } + + return -1; +} + +// --------------------------------------------------------------------------- +// Implementation of xstrtod + +// +// strtod.c +// +// Convert string to double +// +// Copyright (C) 2002 Michael Ringgaard. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// 3. Neither the name of the project nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND +// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +// SUCH DAMAGE. +// +// ----------------------------------------------------------------------- +// Modifications by Warren Weckesser, March 2011: +// * Rename strtod() to xstrtod(). +// * Added decimal and sci arguments. +// * Skip trailing spaces. +// * Commented out the other functions. +// Modifications by Richard T Guy, August 2013: +// * Add tsep argument for thousands separator +// + +// pessimistic but quick assessment, +// assuming that each decimal digit requires 4 bits to store +const int max_int_decimal_digits = (sizeof(unsigned int) * 8) / 4; + +double xstrtod(const char *str, char **endptr, char decimal, char sci, + char tsep, int skip_trailing, int *error, int *maybe_int) { + double number; + unsigned int i_number = 0; + int exponent; + int negative; + char *p = (char *)str; + double p10; + int n; + int num_digits; + int num_decimals; + + if (maybe_int != NULL) *maybe_int = 1; + // Skip leading whitespace. + while (isspace_ascii(*p)) p++; + + // Handle optional sign. + negative = 0; + switch (*p) { + case '-': + negative = 1; // Fall through to increment position. + case '+': + p++; + } + + exponent = 0; + num_digits = 0; + num_decimals = 0; + + // Process string of digits. + while (isdigit_ascii(*p) && num_digits <= max_int_decimal_digits) { + i_number = i_number * 10 + (*p - '0'); + p++; + num_digits++; + + p += (tsep != '\0' && *p == tsep); + } + number = i_number; + + if (num_digits > max_int_decimal_digits) { + // process what's left as double + while (isdigit_ascii(*p)) { + number = number * 10. + (*p - '0'); + p++; + num_digits++; + + p += (tsep != '\0' && *p == tsep); + } + } + + // Process decimal part. + if (*p == decimal) { + if (maybe_int != NULL) *maybe_int = 0; + p++; + + while (isdigit_ascii(*p)) { + number = number * 10. + (*p - '0'); + p++; + num_digits++; + num_decimals++; + } + + exponent -= num_decimals; + } + + if (num_digits == 0) { + *error = ERANGE; + return 0.0; + } + + // Correct for sign. + if (negative) number = -number; + + // Process an exponent string. + if (toupper_ascii(*p) == toupper_ascii(sci)) { + if (maybe_int != NULL) *maybe_int = 0; + + // Handle optional sign. + negative = 0; + switch (*++p) { + case '-': + negative = 1; // Fall through to increment pos. + case '+': + p++; + } + + // Process string of digits. + num_digits = 0; + n = 0; + while (isdigit_ascii(*p)) { + n = n * 10 + (*p - '0'); + num_digits++; + p++; + } + + if (negative) + exponent -= n; + else + exponent += n; + + // If no digits, after the 'e'/'E', un-consume it + if (num_digits == 0) p--; + } + + if (exponent < DBL_MIN_EXP || exponent > DBL_MAX_EXP) { + *error = ERANGE; + return HUGE_VAL; + } + + // Scale the result. + p10 = 10.; + n = exponent; + if (n < 0) n = -n; + while (n) { + if (n & 1) { + if (exponent < 0) + number /= p10; + else + number *= p10; + } + n >>= 1; + p10 *= p10; + } + + if (number == HUGE_VAL) { + *error = ERANGE; + } + + if (skip_trailing) { + // Skip trailing whitespace. + while (isspace_ascii(*p)) p++; + } + + if (endptr) *endptr = p; + return number; +} + +double precise_xstrtod(const char *str, char **endptr, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) { + double number; + int exponent; + int negative; + char *p = (char *)str; + int num_digits; + int num_decimals; + int max_digits = 17; + int n; + + if (maybe_int != NULL) *maybe_int = 1; + // Cache powers of 10 in memory. + static double e[] = { + 1., 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, + 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, + 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, + 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, + 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, + 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79, + 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88, 1e89, + 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, 1e97, 1e98, 1e99, + 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, + 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, + 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, + 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, + 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, + 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, + 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169, + 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179, + 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189, + 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198, 1e199, + 1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209, + 1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, + 1e220, 1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, + 1e230, 1e231, 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, + 1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, + 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259, + 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269, + 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279, + 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286, 1e287, 1e288, 1e289, + 1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299, + 1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308}; + + // Skip leading whitespace. + while (isspace_ascii(*p)) p++; + + // Handle optional sign. + negative = 0; + switch (*p) { + case '-': + negative = 1; // Fall through to increment position. + case '+': + p++; + } + + number = 0.; + exponent = 0; + num_digits = 0; + num_decimals = 0; + + // Process string of digits. + while (isdigit_ascii(*p)) { + if (num_digits < max_digits) { + number = number * 10. + (*p - '0'); + num_digits++; + } else { + ++exponent; + } + + p++; + p += (tsep != '\0' && *p == tsep); + } + + // Process decimal part + if (*p == decimal) { + if (maybe_int != NULL) *maybe_int = 0; + p++; + + while (num_digits < max_digits && isdigit_ascii(*p)) { + number = number * 10. + (*p - '0'); + p++; + num_digits++; + num_decimals++; + } + + if (num_digits >= max_digits) // Consume extra decimal digits. + while (isdigit_ascii(*p)) ++p; + + exponent -= num_decimals; + } + + if (num_digits == 0) { + *error = ERANGE; + return 0.0; + } + + // Correct for sign. + if (negative) number = -number; + + // Process an exponent string. + if (toupper_ascii(*p) == toupper_ascii(sci)) { + if (maybe_int != NULL) *maybe_int = 0; + + // Handle optional sign + negative = 0; + switch (*++p) { + case '-': + negative = 1; // Fall through to increment pos. + case '+': + p++; + } + + // Process string of digits. + num_digits = 0; + n = 0; + while (isdigit_ascii(*p)) { + n = n * 10 + (*p - '0'); + num_digits++; + p++; + } + + if (negative) + exponent -= n; + else + exponent += n; + + // If no digits after the 'e'/'E', un-consume it. + if (num_digits == 0) p--; + } + + if (exponent > 308) { + *error = ERANGE; + return HUGE_VAL; + } else if (exponent > 0) { + number *= e[exponent]; + } else if (exponent < -308) { // Subnormal + if (exponent < -616) // Prevent invalid array access. + number = 0.; + number /= e[-308 - exponent]; + number /= e[308]; + } else { + number /= e[-exponent]; + } + + if (number == HUGE_VAL || number == -HUGE_VAL) *error = ERANGE; + + if (skip_trailing) { + // Skip trailing whitespace. + while (isspace_ascii(*p)) p++; + } + + if (endptr) *endptr = p; + return number; +} + +double round_trip(const char *p, char **q, char decimal, char sci, char tsep, + int skip_trailing, int *error, int *maybe_int) { + // This is called from a nogil block in parsers.pyx + // so need to explicitly get GIL before Python calls + PyGILState_STATE gstate; + gstate = PyGILState_Ensure(); + + double r = PyOS_string_to_double(p, q, 0); + if (maybe_int != NULL) *maybe_int = 0; + if (PyErr_Occurred() != NULL) *error = -1; + else if (r == Py_HUGE_VAL) *error = (int)Py_HUGE_VAL; + PyErr_Clear(); + + PyGILState_Release(gstate); + return r; +} + +// End of xstrtod code +// --------------------------------------------------------------------------- + +void uint_state_init(uint_state *self) { + self->seen_sint = 0; + self->seen_uint = 0; + self->seen_null = 0; +} + +int uint64_conflict(uint_state *self) { + return self->seen_uint && (self->seen_sint || self->seen_null); +} + +int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, + int *error, char tsep) { + const char *p = (const char *)p_item; + int isneg = 0; + int64_t number = 0; + int d; + + // Skip leading spaces. + while (isspace_ascii(*p)) { + ++p; + } + + // Handle sign. + if (*p == '-') { + isneg = 1; + ++p; + } else if (*p == '+') { + p++; + } + + // Check that there is a first digit. + if (!isdigit_ascii(*p)) { + // Error... + *error = ERROR_NO_DIGITS; + return 0; + } + + if (isneg) { + // If number is greater than pre_min, at least one more digit + // can be processed without overflowing. + int dig_pre_min = -(int_min % 10); + int64_t pre_min = int_min / 10; + + // Process the digits. + d = *p; + if (tsep != '\0') { + while (1) { + if (d == tsep) { + d = *++p; + continue; + } else if (!isdigit_ascii(d)) { + break; + } + if ((number > pre_min) || + ((number == pre_min) && (d - '0' <= dig_pre_min))) { + number = number * 10 - (d - '0'); + d = *++p; + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } else { + while (isdigit_ascii(d)) { + if ((number > pre_min) || + ((number == pre_min) && (d - '0' <= dig_pre_min))) { + number = number * 10 - (d - '0'); + d = *++p; + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } + } else { + // If number is less than pre_max, at least one more digit + // can be processed without overflowing. + int64_t pre_max = int_max / 10; + int dig_pre_max = int_max % 10; + + // Process the digits. + d = *p; + if (tsep != '\0') { + while (1) { + if (d == tsep) { + d = *++p; + continue; + } else if (!isdigit_ascii(d)) { + break; + } + if ((number < pre_max) || + ((number == pre_max) && (d - '0' <= dig_pre_max))) { + number = number * 10 + (d - '0'); + d = *++p; + + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } else { + while (isdigit_ascii(d)) { + if ((number < pre_max) || + ((number == pre_max) && (d - '0' <= dig_pre_max))) { + number = number * 10 + (d - '0'); + d = *++p; + + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } + } + + // Skip trailing spaces. + while (isspace_ascii(*p)) { + ++p; + } + + // Did we use up all the characters? + if (*p) { + *error = ERROR_INVALID_CHARS; + return 0; + } + + *error = 0; + return number; +} + +uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, + uint64_t uint_max, int *error, char tsep) { + const char *p = (const char *)p_item; + uint64_t pre_max = uint_max / 10; + int dig_pre_max = uint_max % 10; + uint64_t number = 0; + int d; + + // Skip leading spaces. + while (isspace_ascii(*p)) { + ++p; + } + + // Handle sign. + if (*p == '-') { + state->seen_sint = 1; + *error = 0; + return 0; + } else if (*p == '+') { + p++; + } + + // Check that there is a first digit. + if (!isdigit_ascii(*p)) { + // Error... + *error = ERROR_NO_DIGITS; + return 0; + } + + // If number is less than pre_max, at least one more digit + // can be processed without overflowing. + // + // Process the digits. + d = *p; + if (tsep != '\0') { + while (1) { + if (d == tsep) { + d = *++p; + continue; + } else if (!isdigit_ascii(d)) { + break; + } + if ((number < pre_max) || + ((number == pre_max) && (d - '0' <= dig_pre_max))) { + number = number * 10 + (d - '0'); + d = *++p; + + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } else { + while (isdigit_ascii(d)) { + if ((number < pre_max) || + ((number == pre_max) && (d - '0' <= dig_pre_max))) { + number = number * 10 + (d - '0'); + d = *++p; + + } else { + *error = ERROR_OVERFLOW; + return 0; + } + } + } + + // Skip trailing spaces. + while (isspace_ascii(*p)) { + ++p; + } + + // Did we use up all the characters? + if (*p) { + *error = ERROR_INVALID_CHARS; + return 0; + } + + if (number > (uint64_t)int_max) { + state->seen_uint = 1; + } + + *error = 0; + return number; +} diff --git a/pandas/_libs/src/parser/tokenizer.h b/pandas/_libs/src/parser/tokenizer.h new file mode 100644 index 00000000..4fd2065c --- /dev/null +++ b/pandas/_libs/src/parser/tokenizer.h @@ -0,0 +1,232 @@ +/* + +Copyright (c) 2012, Lambda Foundry, Inc., except where noted + +Incorporates components of WarrenWeckesser/textreader, licensed under 3-clause +BSD + +See LICENSE for the license + +*/ + +#ifndef PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ +#define PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ + +#define PY_SSIZE_T_CLEAN +#include + +#define ERROR_NO_DIGITS 1 +#define ERROR_OVERFLOW 2 +#define ERROR_INVALID_CHARS 3 + +#include "../headers/stdint.h" +#include "../inline_helper.h" +#include "../headers/portable.h" + +#include "khash.h" + +#define STREAM_INIT_SIZE 32 + +#define REACHED_EOF 1 +#define CALLING_READ_FAILED 2 + + +/* + + C flat file parsing low level code for pandas / NumPy + + */ + +/* + * Common set of error types for the read_rows() and tokenize() + * functions. + */ + +// #define VERBOSE +#if defined(VERBOSE) +#define TRACE(X) printf X; +#else +#define TRACE(X) +#endif // VERBOSE + +#define PARSER_OUT_OF_MEMORY -1 + +/* + * XXX Might want to couple count_rows() with read_rows() to avoid duplication + * of some file I/O. + */ + +typedef enum { + START_RECORD, + START_FIELD, + ESCAPED_CHAR, + IN_FIELD, + IN_QUOTED_FIELD, + ESCAPE_IN_QUOTED_FIELD, + QUOTE_IN_QUOTED_FIELD, + EAT_CRNL, + EAT_CRNL_NOP, + EAT_WHITESPACE, + EAT_COMMENT, + EAT_LINE_COMMENT, + WHITESPACE_LINE, + START_FIELD_IN_SKIP_LINE, + IN_FIELD_IN_SKIP_LINE, + IN_QUOTED_FIELD_IN_SKIP_LINE, + QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE, + FINISHED +} ParserState; + +typedef enum { + QUOTE_MINIMAL, + QUOTE_ALL, + QUOTE_NONNUMERIC, + QUOTE_NONE +} QuoteStyle; + +typedef void *(*io_callback)(void *src, size_t nbytes, size_t *bytes_read, + int *status); +typedef int (*io_cleanup)(void *src); + +typedef struct parser_t { + void *source; + io_callback cb_io; + io_cleanup cb_cleanup; + + int64_t chunksize; // Number of bytes to prepare for each chunk + char *data; // pointer to data to be processed + int64_t datalen; // amount of data available + int64_t datapos; + + // where to write out tokenized data + char *stream; + uint64_t stream_len; + uint64_t stream_cap; + + // Store words in (potentially ragged) matrix for now, hmm + char **words; + int64_t *word_starts; // where we are in the stream + uint64_t words_len; + uint64_t words_cap; + uint64_t max_words_cap; // maximum word cap encountered + + char *pword_start; // pointer to stream start of current field + int64_t word_start; // position start of current field + + int64_t *line_start; // position in words for start of line + int64_t *line_fields; // Number of fields in each line + uint64_t lines; // Number of (good) lines observed + uint64_t file_lines; // Number of lines (including bad or skipped) + uint64_t lines_cap; // Vector capacity + + // Tokenizing stuff + ParserState state; + int doublequote; /* is " represented by ""? */ + char delimiter; /* field separator */ + int delim_whitespace; /* delimit by consuming space/tabs instead */ + char quotechar; /* quote character */ + char escapechar; /* escape character */ + char lineterminator; + int skipinitialspace; /* ignore spaces following delimiter? */ + int quoting; /* style of quoting to write */ + + char commentchar; + int allow_embedded_newline; + int strict; /* raise exception on bad CSV */ + + int usecols; // Boolean: 1: usecols provided, 0: none provided + + int expected_fields; + int error_bad_lines; + int warn_bad_lines; + + // floating point options + char decimal; + char sci; + + // thousands separator (comma, period) + char thousands; + + int header; // Boolean: 1: has header, 0: no header + int64_t header_start; // header row start + uint64_t header_end; // header row end + + void *skipset; + PyObject *skipfunc; + int64_t skip_first_N_rows; + int64_t skip_footer; + double (*double_converter)(const char *, char **, + char, char, char, int, int *, int *); + + // error handling + char *warn_msg; + char *error_msg; + + int skip_empty_lines; +} parser_t; + +typedef struct coliter_t { + char **words; + int64_t *line_start; + int64_t col; +} coliter_t; + +void coliter_setup(coliter_t *self, parser_t *parser, int i, int start); + +#define COLITER_NEXT(iter, word) \ + do { \ + const int64_t i = *iter.line_start++ + iter.col; \ + word = i >= *iter.line_start ? "" : iter.words[i]; \ + } while (0) + +parser_t *parser_new(void); + +int parser_init(parser_t *self); + +int parser_consume_rows(parser_t *self, size_t nrows); + +int parser_trim_buffers(parser_t *self); + +int parser_add_skiprow(parser_t *self, int64_t row); + +int parser_set_skipfirstnrows(parser_t *self, int64_t nrows); + +void parser_free(parser_t *self); + +void parser_del(parser_t *self); + +void parser_set_default_options(parser_t *self); + +int tokenize_nrows(parser_t *self, size_t nrows); + +int tokenize_all_rows(parser_t *self); + +// Have parsed / type-converted a chunk of data +// and want to free memory from the token stream + +typedef struct uint_state { + int seen_sint; + int seen_uint; + int seen_null; +} uint_state; + +void uint_state_init(uint_state *self); + +int uint64_conflict(uint_state *self); + +uint64_t str_to_uint64(uint_state *state, const char *p_item, int64_t int_max, + uint64_t uint_max, int *error, char tsep); +int64_t str_to_int64(const char *p_item, int64_t int_min, int64_t int_max, + int *error, char tsep); +double xstrtod(const char *p, char **q, char decimal, char sci, char tsep, + int skip_trailing, int *error, int *maybe_int); +double precise_xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int); + +// GH-15140 - round_trip requires and acquires the GIL on its own +double round_trip(const char *p, char **q, char decimal, char sci, char tsep, + int skip_trailing, int *error, int *maybe_int); +int to_boolean(const char *item, uint8_t *val); + +#endif // PANDAS__LIBS_SRC_PARSER_TOKENIZER_H_ diff --git a/pandas/_libs/src/skiplist.h b/pandas/_libs/src/skiplist.h new file mode 100644 index 00000000..60c1a567 --- /dev/null +++ b/pandas/_libs/src/skiplist.h @@ -0,0 +1,279 @@ +/* +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Flexibly-sized, index-able skiplist data structure for maintaining a sorted +list of values + +Port of Wes McKinney's Cython version of Raymond Hettinger's original pure +Python recipe (http://rhettinger.wordpress.com/2010/02/06/lost-knowledge/) +*/ + +#ifndef PANDAS__LIBS_SRC_SKIPLIST_H_ +#define PANDAS__LIBS_SRC_SKIPLIST_H_ + +#include +#include +#include +#include +#include "inline_helper.h" + +PANDAS_INLINE float __skiplist_nanf(void) { + const union { + int __i; + float __f; + } __bint = {0x7fc00000UL}; + return __bint.__f; +} +#define PANDAS_NAN ((double)__skiplist_nanf()) + +PANDAS_INLINE double Log2(double val) { return log(val) / log(2.); } + +typedef struct node_t node_t; + +struct node_t { + node_t **next; + int *width; + double value; + int is_nil; + int levels; + int ref_count; +}; + +typedef struct { + node_t *head; + node_t **tmp_chain; + int *tmp_steps; + int size; + int maxlevels; +} skiplist_t; + +PANDAS_INLINE double urand(void) { + return ((double)rand() + 1) / ((double)RAND_MAX + 2); +} + +PANDAS_INLINE int int_min(int a, int b) { return a < b ? a : b; } + +PANDAS_INLINE node_t *node_init(double value, int levels) { + node_t *result; + result = (node_t *)malloc(sizeof(node_t)); + if (result) { + result->value = value; + result->levels = levels; + result->is_nil = 0; + result->ref_count = 0; + result->next = (node_t **)malloc(levels * sizeof(node_t *)); + result->width = (int *)malloc(levels * sizeof(int)); + if (!(result->next && result->width) && (levels != 0)) { + free(result->next); + free(result->width); + free(result); + return NULL; + } + } + return result; +} + +// do this ourselves +PANDAS_INLINE void node_incref(node_t *node) { ++(node->ref_count); } + +PANDAS_INLINE void node_decref(node_t *node) { --(node->ref_count); } + +static void node_destroy(node_t *node) { + int i; + if (node) { + if (node->ref_count <= 1) { + for (i = 0; i < node->levels; ++i) { + node_destroy(node->next[i]); + } + free(node->next); + free(node->width); + // printf("Reference count was 1, freeing\n"); + free(node); + } else { + node_decref(node); + } + // pretty sure that freeing the struct above will be enough + } +} + +PANDAS_INLINE void skiplist_destroy(skiplist_t *skp) { + if (skp) { + node_destroy(skp->head); + free(skp->tmp_steps); + free(skp->tmp_chain); + free(skp); + } +} + +PANDAS_INLINE skiplist_t *skiplist_init(int expected_size) { + skiplist_t *result; + node_t *NIL, *head; + int maxlevels, i; + + maxlevels = 1 + Log2((double)expected_size); + result = (skiplist_t *)malloc(sizeof(skiplist_t)); + if (!result) { + return NULL; + } + result->tmp_chain = (node_t **)malloc(maxlevels * sizeof(node_t *)); + result->tmp_steps = (int *)malloc(maxlevels * sizeof(int)); + result->maxlevels = maxlevels; + result->size = 0; + + head = result->head = node_init(PANDAS_NAN, maxlevels); + NIL = node_init(0.0, 0); + + if (!(result->tmp_chain && result->tmp_steps && result->head && NIL)) { + skiplist_destroy(result); + node_destroy(NIL); + return NULL; + } + + node_incref(head); + + NIL->is_nil = 1; + + for (i = 0; i < maxlevels; ++i) { + head->next[i] = NIL; + head->width[i] = 1; + node_incref(NIL); + } + + return result; +} + +// 1 if left < right, 0 if left == right, -1 if left > right +PANDAS_INLINE int _node_cmp(node_t *node, double value) { + if (node->is_nil || node->value > value) { + return -1; + } else if (node->value < value) { + return 1; + } else { + return 0; + } +} + +PANDAS_INLINE double skiplist_get(skiplist_t *skp, int i, int *ret) { + node_t *node; + int level; + + if (i < 0 || i >= skp->size) { + *ret = 0; + return 0; + } + + node = skp->head; + ++i; + for (level = skp->maxlevels - 1; level >= 0; --level) { + while (node->width[level] <= i) { + i -= node->width[level]; + node = node->next[level]; + } + } + + *ret = 1; + return node->value; +} + +PANDAS_INLINE int skiplist_insert(skiplist_t *skp, double value) { + node_t *node, *prevnode, *newnode, *next_at_level; + int *steps_at_level; + int size, steps, level; + node_t **chain; + + chain = skp->tmp_chain; + + steps_at_level = skp->tmp_steps; + memset(steps_at_level, 0, skp->maxlevels * sizeof(int)); + + node = skp->head; + + for (level = skp->maxlevels - 1; level >= 0; --level) { + next_at_level = node->next[level]; + while (_node_cmp(next_at_level, value) >= 0) { + steps_at_level[level] += node->width[level]; + node = next_at_level; + next_at_level = node->next[level]; + } + chain[level] = node; + } + + size = int_min(skp->maxlevels, 1 - ((int)Log2(urand()))); + + newnode = node_init(value, size); + if (!newnode) { + return -1; + } + steps = 0; + + for (level = 0; level < size; ++level) { + prevnode = chain[level]; + newnode->next[level] = prevnode->next[level]; + + prevnode->next[level] = newnode; + node_incref(newnode); // increment the reference count + + newnode->width[level] = prevnode->width[level] - steps; + prevnode->width[level] = steps + 1; + + steps += steps_at_level[level]; + } + + for (level = size; level < skp->maxlevels; ++level) { + chain[level]->width[level] += 1; + } + + ++(skp->size); + + return 1; +} + +PANDAS_INLINE int skiplist_remove(skiplist_t *skp, double value) { + int level, size; + node_t *node, *prevnode, *tmpnode, *next_at_level; + node_t **chain; + + chain = skp->tmp_chain; + node = skp->head; + + for (level = skp->maxlevels - 1; level >= 0; --level) { + next_at_level = node->next[level]; + while (_node_cmp(next_at_level, value) > 0) { + node = next_at_level; + next_at_level = node->next[level]; + } + chain[level] = node; + } + + if (value != chain[0]->next[0]->value) { + return 0; + } + + size = chain[0]->next[0]->levels; + + for (level = 0; level < size; ++level) { + prevnode = chain[level]; + + tmpnode = prevnode->next[level]; + + prevnode->width[level] += tmpnode->width[level] - 1; + prevnode->next[level] = tmpnode->next[level]; + + tmpnode->next[level] = NULL; + node_destroy(tmpnode); // decrement refcount or free + } + + for (level = size; level < skp->maxlevels; ++level) { + --(chain[level]->width[level]); + } + + --(skp->size); + return 1; +} + +#endif // PANDAS__LIBS_SRC_SKIPLIST_H_ diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h new file mode 100644 index 00000000..8d04874b --- /dev/null +++ b/pandas/_libs/src/ujson/lib/ultrajson.h @@ -0,0 +1,318 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +/* +Ultra fast JSON encoder and decoder +Developed by Jonas Tarnstrom (jonas@esn.me). + +Encoder notes: +------------------ + +:: Cyclic references :: +Cyclic referenced objects are not detected. +Set JSONObjectEncoder.recursionMax to suitable value or make sure input object +tree doesn't have cyclic references. + +*/ + +#ifndef PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ +#define PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ + +#include +#include + +// Don't output any extra whitespaces when encoding +#define JSON_NO_EXTRA_WHITESPACE + +// Max decimals to encode double floating point numbers with +#ifndef JSON_DOUBLE_MAX_DECIMALS +#define JSON_DOUBLE_MAX_DECIMALS 15 +#endif + +// Max recursion depth, default for encoder +#ifndef JSON_MAX_RECURSION_DEPTH +#define JSON_MAX_RECURSION_DEPTH 1024 +#endif + +// Max recursion depth, default for decoder +#ifndef JSON_MAX_OBJECT_DEPTH +#define JSON_MAX_OBJECT_DEPTH 1024 +#endif + +/* +Dictates and limits how much stack space for buffers UltraJSON will use before resorting to provided heap functions */ +#ifndef JSON_MAX_STACK_BUFFER_SIZE +#define JSON_MAX_STACK_BUFFER_SIZE 131072 +#endif + +#ifdef _WIN32 + +typedef __int64 JSINT64; +typedef unsigned __int64 JSUINT64; + +typedef __int32 JSINT32; +typedef unsigned __int32 JSUINT32; +typedef unsigned __int8 JSUINT8; +typedef unsigned __int16 JSUTF16; +typedef unsigned __int32 JSUTF32; +typedef __int64 JSLONG; + +#define EXPORTFUNCTION __declspec(dllexport) + +#define FASTCALL_MSVC __fastcall +#define FASTCALL_ATTR +#define INLINE_PREFIX static __inline + +#else + +#include +typedef int64_t JSINT64; +typedef uint64_t JSUINT64; + +typedef int32_t JSINT32; +typedef uint32_t JSUINT32; + +#define FASTCALL_MSVC + +#if !defined __x86_64__ +#define FASTCALL_ATTR __attribute__((fastcall)) +#else +#define FASTCALL_ATTR +#endif + +#define INLINE_PREFIX static inline + +typedef uint8_t JSUINT8; +typedef uint16_t JSUTF16; +typedef uint32_t JSUTF32; + +typedef int64_t JSLONG; + +#define EXPORTFUNCTION +#endif + +#if !(defined(__LITTLE_ENDIAN__) || defined(__BIG_ENDIAN__)) + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define __LITTLE_ENDIAN__ +#else + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define __BIG_ENDIAN__ +#endif + +#endif + +#endif + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +#error "Endianness not supported" +#endif + +enum JSTYPES { + JT_NULL, // NULL + JT_TRUE, // boolean true + JT_FALSE, // boolean false + JT_INT, // (JSINT32 (signed 32-bit)) + JT_LONG, // (JSINT64 (signed 64-bit)) + JT_DOUBLE, // (double) + JT_UTF8, // (char 8-bit) + JT_ARRAY, // Array structure + JT_OBJECT, // Key/Value structure + JT_INVALID, // Internal, do not return nor expect + JT_POS_INF, // Positive infinity + JT_NEG_INF, // Negative infinity +}; + +typedef void * JSOBJ; +typedef void * JSITER; + +typedef struct __JSONTypeContext { + int type; + void *encoder; + void *prv; +} JSONTypeContext; + +/* +Function pointer declarations, suitable for implementing UltraJSON */ +typedef void (*JSPFN_ITERBEGIN)(JSOBJ obj, JSONTypeContext *tc); +typedef int (*JSPFN_ITERNEXT)(JSOBJ obj, JSONTypeContext *tc); +typedef void (*JSPFN_ITEREND)(JSOBJ obj, JSONTypeContext *tc); +typedef JSOBJ (*JSPFN_ITERGETVALUE)(JSOBJ obj, JSONTypeContext *tc); +typedef char *(*JSPFN_ITERGETNAME)(JSOBJ obj, JSONTypeContext *tc, + size_t *outLen); +typedef void *(*JSPFN_MALLOC)(size_t size); +typedef void (*JSPFN_FREE)(void *pptr); +typedef void *(*JSPFN_REALLOC)(void *base, size_t size); + +typedef struct __JSONObjectEncoder { + void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc); + void (*endTypeContext)(JSOBJ obj, JSONTypeContext *tc); + const char *(*getStringValue)(JSOBJ obj, JSONTypeContext *tc, + size_t *_outLen); + JSINT64 (*getLongValue)(JSOBJ obj, JSONTypeContext *tc); + JSINT32 (*getIntValue)(JSOBJ obj, JSONTypeContext *tc); + double (*getDoubleValue)(JSOBJ obj, JSONTypeContext *tc); + + /* + Begin iteration of an iteratable object (JS_ARRAY or JS_OBJECT) + Implementor should setup iteration state in ti->prv + */ + JSPFN_ITERBEGIN iterBegin; + + /* + Retrieve next object in an iteration. Should return 0 to indicate iteration has reached end or 1 if there are more items. + Implementor is responsible for keeping state of the iteration. Use ti->prv fields for this + */ + JSPFN_ITERNEXT iterNext; + + /* + Ends the iteration of an iteratable object. + Any iteration state stored in ti->prv can be freed here + */ + JSPFN_ITEREND iterEnd; + + /* + Returns a reference to the value object of an iterator + The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object + */ + JSPFN_ITERGETVALUE iterGetValue; + + /* + Return name of iterator. + The is responsible for the life-cycle of the returned string. Use iterNext/iterEnd and ti->prv to keep track of current object + */ + JSPFN_ITERGETNAME iterGetName; + + /* + Release a value as indicated by setting ti->release = 1 in the previous getValue call. + The ti->prv array should contain the necessary context to release the value + */ + void (*releaseObject)(JSOBJ obj); + + /* Library functions + Set to NULL to use STDLIB malloc,realloc,free */ + JSPFN_MALLOC malloc; + JSPFN_REALLOC realloc; + JSPFN_FREE free; + + /* + Configuration for max recursion, set to 0 to use default (see JSON_MAX_RECURSION_DEPTH)*/ + int recursionMax; + + /* + Configuration for max decimals of double floating point numbers to encode (0-9) */ + int doublePrecision; + + /* + If true output will be ASCII with all characters above 127 encoded as \uXXXX. If false output will be UTF-8 or what ever charset strings are brought as */ + int forceASCII; + + /* + If true, '<', '>', and '&' characters will be encoded as \u003c, \u003e, and \u0026, respectively. If false, no special encoding will be used. */ + int encodeHTMLChars; + + /* + Configuration for spaces of indent */ + int indent; + + /* + Set to an error message if error occurred */ + const char *errorMsg; + JSOBJ errorObj; + + /* Buffer stuff */ + char *start; + char *offset; + char *end; + int heap; + int level; +} JSONObjectEncoder; + +/* +Encode an object structure into JSON. + +Arguments: +obj - An anonymous type representing the object +enc - Function definitions for querying JSOBJ type +buffer - Preallocated buffer to store result in. If NULL function allocates own buffer +cbBuffer - Length of buffer (ignored if buffer is NULL) + +Returns: +Encoded JSON object as a null terminated char string. + +NOTE: +If the supplied buffer wasn't enough to hold the result the function will allocate a new buffer. +Life cycle of the provided buffer must still be handled by caller. + +If the return value doesn't equal the specified buffer caller must release the memory using +JSONObjectEncoder.free or free() as specified when calling this function. +*/ +EXPORTFUNCTION char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, + char *buffer, size_t cbBuffer); + +typedef struct __JSONObjectDecoder { + JSOBJ (*newString)(void *prv, wchar_t *start, wchar_t *end); + int (*objectAddKey)(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value); + int (*arrayAddItem)(void *prv, JSOBJ obj, JSOBJ value); + JSOBJ (*newTrue)(void *prv); + JSOBJ (*newFalse)(void *prv); + JSOBJ (*newNull)(void *prv); + JSOBJ (*newPosInf)(void *prv); + JSOBJ (*newNegInf)(void *prv); + JSOBJ (*newObject)(void *prv, void *decoder); + JSOBJ (*endObject)(void *prv, JSOBJ obj); + JSOBJ (*newArray)(void *prv, void *decoder); + JSOBJ (*endArray)(void *prv, JSOBJ obj); + JSOBJ (*newInt)(void *prv, JSINT32 value); + JSOBJ (*newLong)(void *prv, JSINT64 value); + JSOBJ (*newDouble)(void *prv, double value); + void (*releaseObject)(void *prv, JSOBJ obj, void *decoder); + JSPFN_MALLOC malloc; + JSPFN_FREE free; + JSPFN_REALLOC realloc; + char *errorStr; + char *errorOffset; + int preciseFloat; + void *prv; +} JSONObjectDecoder; + +EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, + const char *buffer, size_t cbBuffer); +EXPORTFUNCTION void encode(JSOBJ, JSONObjectEncoder *, const char *, size_t); + +#endif // PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_ diff --git a/pandas/_libs/src/ujson/lib/ultrajsondec.c b/pandas/_libs/src/ujson/lib/ultrajsondec.c new file mode 100644 index 00000000..4eb18ee1 --- /dev/null +++ b/pandas/_libs/src/ujson/lib/ultrajsondec.c @@ -0,0 +1,1202 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "ultrajson.h" + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif +#ifndef NULL +#define NULL 0 +#endif + +struct DecoderState { + char *start; + char *end; + wchar_t *escStart; + wchar_t *escEnd; + int escHeap; + int lastType; + JSUINT32 objDepth; + void *prv; + JSONObjectDecoder *dec; +}; + +JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) FASTCALL_ATTR; +typedef JSOBJ (*PFN_DECODER)(struct DecoderState *ds); + +static JSOBJ SetError(struct DecoderState *ds, int offset, + const char *message) { + ds->dec->errorOffset = ds->start + offset; + ds->dec->errorStr = (char *)message; + return NULL; +} + +double createDouble(double intNeg, double intValue, double frcValue, + int frcDecimalCount) { + static const double g_pow10[] = {1.0, + 0.1, + 0.01, + 0.001, + 0.0001, + 0.00001, + 0.000001, + 0.0000001, + 0.00000001, + 0.000000001, + 0.0000000001, + 0.00000000001, + 0.000000000001, + 0.0000000000001, + 0.00000000000001, + 0.000000000000001}; + return (intValue + (frcValue * g_pow10[frcDecimalCount])) * intNeg; +} + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decodePreciseFloat(struct DecoderState *ds) { + char *end; + double value; + errno = 0; + + value = strtod(ds->start, &end); + + if (errno == ERANGE) { + return SetError(ds, -1, "Range error when decoding numeric as double"); + } + + ds->start = end; + return ds->dec->newDouble(ds->prv, value); +} + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric(struct DecoderState *ds) { + int intNeg = 1; + int mantSize = 0; + JSUINT64 intValue; + int chr; + int decimalCount = 0; + double frcValue = 0.0; + double expNeg; + double expValue; + char *offset = ds->start; + + JSUINT64 overflowLimit = LLONG_MAX; + + if (*(offset) == 'I') { + goto DECODE_INF; + } else if (*(offset) == 'N') { + goto DECODE_NAN; + } else if (*(offset) == '-') { + offset++; + intNeg = -1; + if (*(offset) == 'I') { + goto DECODE_INF; + } + overflowLimit = LLONG_MIN; + } + + // Scan integer part + intValue = 0; + + while (1) { + chr = (int)(unsigned char)*(offset); + + switch (chr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + // FIXME: Check for arithmetic overflow here + // PERF: Don't do 64-bit arithmetic here unless we know we have + // to + intValue = intValue * 10ULL + (JSLONG)(chr - 48); + + if (intValue > overflowLimit) { + return SetError(ds, -1, overflowLimit == LLONG_MAX + ? "Value is too big" + : "Value is too small"); + } + + offset++; + mantSize++; + break; + } + case '.': { + offset++; + goto DECODE_FRACTION; + break; + } + case 'e': + case 'E': { + offset++; + goto DECODE_EXPONENT; + break; + } + + default: { + goto BREAK_INT_LOOP; + break; + } + } + } + +BREAK_INT_LOOP: + + ds->lastType = JT_INT; + ds->start = offset; + + if ((intValue >> 31)) { + return ds->dec->newLong(ds->prv, (JSINT64)(intValue * (JSINT64)intNeg)); + } else { + return ds->dec->newInt(ds->prv, (JSINT32)(intValue * intNeg)); + } + +DECODE_FRACTION: + + if (ds->dec->preciseFloat) { + return decodePreciseFloat(ds); + } + + // Scan fraction part + frcValue = 0.0; + for (;;) { + chr = (int)(unsigned char)*(offset); + + switch (chr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (decimalCount < JSON_DOUBLE_MAX_DECIMALS) { + frcValue = frcValue * 10.0 + (double)(chr - 48); + decimalCount++; + } + offset++; + break; + } + case 'e': + case 'E': { + offset++; + goto DECODE_EXPONENT; + break; + } + default: { goto BREAK_FRC_LOOP; } + } + } + +BREAK_FRC_LOOP: + // FIXME: Check for arithmetic overflow here + ds->lastType = JT_DOUBLE; + ds->start = offset; + return ds->dec->newDouble( + ds->prv, + createDouble((double)intNeg, (double)intValue, frcValue, decimalCount)); + +DECODE_EXPONENT: + if (ds->dec->preciseFloat) { + return decodePreciseFloat(ds); + } + + expNeg = 1.0; + + if (*(offset) == '-') { + expNeg = -1.0; + offset++; + } else if (*(offset) == '+') { + expNeg = +1.0; + offset++; + } + + expValue = 0.0; + + for (;;) { + chr = (int)(unsigned char)*(offset); + + switch (chr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + expValue = expValue * 10.0 + (double)(chr - 48); + offset++; + break; + } + default: { goto BREAK_EXP_LOOP; } + } + } + +DECODE_NAN: + offset++; + if (*(offset++) != 'a') goto SET_NAN_ERROR; + if (*(offset++) != 'N') goto SET_NAN_ERROR; + + ds->lastType = JT_NULL; + ds->start = offset; + return ds->dec->newNull(ds->prv); + +SET_NAN_ERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'NaN'"); + +DECODE_INF: + offset++; + if (*(offset++) != 'n') goto SET_INF_ERROR; + if (*(offset++) != 'f') goto SET_INF_ERROR; + if (*(offset++) != 'i') goto SET_INF_ERROR; + if (*(offset++) != 'n') goto SET_INF_ERROR; + if (*(offset++) != 'i') goto SET_INF_ERROR; + if (*(offset++) != 't') goto SET_INF_ERROR; + if (*(offset++) != 'y') goto SET_INF_ERROR; + + ds->start = offset; + + if (intNeg == 1) { + ds->lastType = JT_POS_INF; + return ds->dec->newPosInf(ds->prv); + } else { + ds->lastType = JT_NEG_INF; + return ds->dec->newNegInf(ds->prv); + } + +SET_INF_ERROR: + if (intNeg == 1) { + const char *msg = "Unexpected character found when decoding 'Infinity'"; + return SetError(ds, -1, msg); + } else { + const char *msg = "Unexpected character found when decoding '-Infinity'"; + return SetError(ds, -1, msg); + } + + +BREAK_EXP_LOOP: + // FIXME: Check for arithmetic overflow here + ds->lastType = JT_DOUBLE; + ds->start = offset; + return ds->dec->newDouble( + ds->prv, + createDouble((double)intNeg, (double)intValue, frcValue, decimalCount) * + pow(10.0, expValue * expNeg)); +} + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_true(struct DecoderState *ds) { + char *offset = ds->start; + offset++; + + if (*(offset++) != 'r') goto SETERROR; + if (*(offset++) != 'u') goto SETERROR; + if (*(offset++) != 'e') goto SETERROR; + + ds->lastType = JT_TRUE; + ds->start = offset; + return ds->dec->newTrue(ds->prv); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'true'"); +} + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_false(struct DecoderState *ds) { + char *offset = ds->start; + offset++; + + if (*(offset++) != 'a') goto SETERROR; + if (*(offset++) != 'l') goto SETERROR; + if (*(offset++) != 's') goto SETERROR; + if (*(offset++) != 'e') goto SETERROR; + + ds->lastType = JT_FALSE; + ds->start = offset; + return ds->dec->newFalse(ds->prv); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'false'"); +} + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_null(struct DecoderState *ds) { + char *offset = ds->start; + offset++; + + if (*(offset++) != 'u') goto SETERROR; + if (*(offset++) != 'l') goto SETERROR; + if (*(offset++) != 'l') goto SETERROR; + + ds->lastType = JT_NULL; + ds->start = offset; + return ds->dec->newNull(ds->prv); + +SETERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'null'"); +} + +FASTCALL_ATTR void FASTCALL_MSVC SkipWhitespace(struct DecoderState *ds) { + char *offset; + + for (offset = ds->start; (ds->end - offset) > 0; offset++) { + switch (*offset) { + case ' ': + case '\t': + case '\r': + case '\n': + break; + + default: + ds->start = offset; + return; + } + } + + if (offset == ds->end) { + ds->start = ds->end; + } +} + +enum DECODESTRINGSTATE { + DS_ISNULL = 0x32, + DS_ISQUOTE, + DS_ISESCAPE, + DS_UTFLENERROR, +}; + +static const JSUINT8 g_decoderLookup[256] = { + /* 0x00 */ DS_ISNULL, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x10 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x20 */ 1, + 1, + DS_ISQUOTE, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x30 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x40 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x50 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + DS_ISESCAPE, + 1, + 1, + 1, + /* 0x60 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x70 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x80 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x90 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xa0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xb0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xc0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xd0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xe0 */ 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + /* 0xf0 */ 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, + DS_UTFLENERROR, +}; + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_string(struct DecoderState *ds) { + JSUTF16 sur[2] = {0}; + int iSur = 0; + int index; + wchar_t *escOffset; + wchar_t *escStart; + size_t escLen = (ds->escEnd - ds->escStart); + JSUINT8 *inputOffset; + JSUINT8 oct; + JSUTF32 ucs; + ds->lastType = JT_INVALID; + ds->start++; + + if ((size_t)(ds->end - ds->start) > escLen) { + size_t newSize = (ds->end - ds->start); + + if (ds->escHeap) { + if (newSize > (SIZE_MAX / sizeof(wchar_t))) { + return SetError(ds, -1, "Could not reserve memory block"); + } + escStart = (wchar_t *)ds->dec->realloc(ds->escStart, + newSize * sizeof(wchar_t)); + if (!escStart) { + ds->dec->free(ds->escStart); + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escStart = escStart; + } else { + wchar_t *oldStart = ds->escStart; + if (newSize > (SIZE_MAX / sizeof(wchar_t))) { + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escStart = + (wchar_t *)ds->dec->malloc(newSize * sizeof(wchar_t)); + if (!ds->escStart) { + return SetError(ds, -1, "Could not reserve memory block"); + } + ds->escHeap = 1; + memcpy(ds->escStart, oldStart, escLen * sizeof(wchar_t)); + } + + ds->escEnd = ds->escStart + newSize; + } + + escOffset = ds->escStart; + inputOffset = (JSUINT8 *)ds->start; + + for (;;) { + switch (g_decoderLookup[(JSUINT8)(*inputOffset)]) { + case DS_ISNULL: { + return SetError(ds, -1, + "Unmatched ''\"' when when decoding 'string'"); + } + case DS_ISQUOTE: { + ds->lastType = JT_UTF8; + inputOffset++; + ds->start += ((char *)inputOffset - (ds->start)); + return ds->dec->newString(ds->prv, ds->escStart, escOffset); + } + case DS_UTFLENERROR: { + return SetError( + ds, -1, + "Invalid UTF-8 sequence length when decoding 'string'"); + } + case DS_ISESCAPE: + inputOffset++; + switch (*inputOffset) { + case '\\': + *(escOffset++) = L'\\'; + inputOffset++; + continue; + case '\"': + *(escOffset++) = L'\"'; + inputOffset++; + continue; + case '/': + *(escOffset++) = L'/'; + inputOffset++; + continue; + case 'b': + *(escOffset++) = L'\b'; + inputOffset++; + continue; + case 'f': + *(escOffset++) = L'\f'; + inputOffset++; + continue; + case 'n': + *(escOffset++) = L'\n'; + inputOffset++; + continue; + case 'r': + *(escOffset++) = L'\r'; + inputOffset++; + continue; + case 't': + *(escOffset++) = L'\t'; + inputOffset++; + continue; + + case 'u': { + int index; + inputOffset++; + + for (index = 0; index < 4; index++) { + switch (*inputOffset) { + case '\0': + return SetError(ds, -1, + "Unterminated unicode " + "escape sequence when " + "decoding 'string'"); + default: + return SetError(ds, -1, + "Unexpected character in " + "unicode escape sequence " + "when decoding 'string'"); + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + sur[iSur] = (sur[iSur] << 4) + + (JSUTF16)(*inputOffset - '0'); + break; + + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + sur[iSur] = (sur[iSur] << 4) + 10 + + (JSUTF16)(*inputOffset - 'a'); + break; + + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + sur[iSur] = (sur[iSur] << 4) + 10 + + (JSUTF16)(*inputOffset - 'A'); + break; + } + + inputOffset++; + } + + if (iSur == 0) { + if ((sur[iSur] & 0xfc00) == 0xd800) { + // First of a surrogate pair, continue parsing + iSur++; + break; + } + (*escOffset++) = (wchar_t)sur[iSur]; + iSur = 0; + } else { + // Decode pair + if ((sur[1] & 0xfc00) != 0xdc00) { + return SetError(ds, -1, + "Unpaired high surrogate when " + "decoding 'string'"); + } +#if WCHAR_MAX == 0xffff + (*escOffset++) = (wchar_t)sur[0]; + (*escOffset++) = (wchar_t)sur[1]; +#else + (*escOffset++) = + (wchar_t)0x10000 + + (((sur[0] - 0xd800) << 10) | (sur[1] - 0xdc00)); +#endif + iSur = 0; + } + break; + } + + case '\0': + return SetError(ds, -1, + "Unterminated escape sequence when " + "decoding 'string'"); + default: + return SetError(ds, -1, + "Unrecognized escape sequence when " + "decoding 'string'"); + } + break; + + case 1: { + *(escOffset++) = (wchar_t)(*inputOffset++); + break; + } + + case 2: { + ucs = (*inputOffset++) & 0x1f; + ucs <<= 6; + if (((*inputOffset) & 0x80) != 0x80) { + return SetError(ds, -1, + "Invalid octet in UTF-8 sequence when " + "decoding 'string'"); + } + ucs |= (*inputOffset++) & 0x3f; + if (ucs < 0x80) + return SetError(ds, -1, + "Overlong 2 byte UTF-8 sequence detected " + "when decoding 'string'"); + *(escOffset++) = (wchar_t)ucs; + break; + } + + case 3: { + JSUTF32 ucs = 0; + ucs |= (*inputOffset++) & 0x0f; + + for (index = 0; index < 2; index++) { + ucs <<= 6; + oct = (*inputOffset++); + + if ((oct & 0x80) != 0x80) { + return SetError(ds, -1, + "Invalid octet in UTF-8 sequence when " + "decoding 'string'"); + } + + ucs |= oct & 0x3f; + } + + if (ucs < 0x800) + return SetError(ds, -1, + "Overlong 3 byte UTF-8 sequence detected " + "when encoding string"); + *(escOffset++) = (wchar_t)ucs; + break; + } + + case 4: { + JSUTF32 ucs = 0; + ucs |= (*inputOffset++) & 0x07; + + for (index = 0; index < 3; index++) { + ucs <<= 6; + oct = (*inputOffset++); + + if ((oct & 0x80) != 0x80) { + return SetError(ds, -1, + "Invalid octet in UTF-8 sequence when " + "decoding 'string'"); + } + + ucs |= oct & 0x3f; + } + + if (ucs < 0x10000) + return SetError(ds, -1, + "Overlong 4 byte UTF-8 sequence detected " + "when decoding 'string'"); + +#if WCHAR_MAX == 0xffff + if (ucs >= 0x10000) { + ucs -= 0x10000; + *(escOffset++) = (wchar_t)(ucs >> 10) + 0xd800; + *(escOffset++) = (wchar_t)(ucs & 0x3ff) + 0xdc00; + } else { + *(escOffset++) = (wchar_t)ucs; + } +#else + *(escOffset++) = (wchar_t)ucs; +#endif + break; + } + } + } +} + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_array(struct DecoderState *ds) { + JSOBJ itemValue; + JSOBJ newObj; + int len; + ds->objDepth++; + if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { + return SetError(ds, -1, "Reached object decoding depth limit"); + } + + newObj = ds->dec->newArray(ds->prv, ds->dec); + len = 0; + + ds->lastType = JT_INVALID; + ds->start++; + + for (;;) { + SkipWhitespace(ds); + + if ((*ds->start) == ']') { + ds->objDepth--; + if (len == 0) { + ds->start++; + return ds->dec->endArray(ds->prv, newObj); + } + + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError( + ds, -1, + "Unexpected character found when decoding array value (1)"); + } + + itemValue = decode_any(ds); + + if (itemValue == NULL) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } + + if (!ds->dec->arrayAddItem(ds->prv, newObj, itemValue)) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } + + SkipWhitespace(ds); + + switch (*(ds->start++)) { + case ']': { + ds->objDepth--; + return ds->dec->endArray(ds->prv, newObj); + } + case ',': + break; + + default: + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError( + ds, -1, + "Unexpected character found when decoding array value (2)"); + } + + len++; + } +} + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_object(struct DecoderState *ds) { + JSOBJ itemName; + JSOBJ itemValue; + JSOBJ newObj; + + ds->objDepth++; + if (ds->objDepth > JSON_MAX_OBJECT_DEPTH) { + return SetError(ds, -1, "Reached object decoding depth limit"); + } + + newObj = ds->dec->newObject(ds->prv, ds->dec); + + ds->start++; + + for (;;) { + SkipWhitespace(ds); + + if ((*ds->start) == '}') { + ds->objDepth--; + ds->start++; + return ds->dec->endObject(ds->prv, newObj); + } + + ds->lastType = JT_INVALID; + itemName = decode_any(ds); + + if (itemName == NULL) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return NULL; + } + + if (ds->lastType != JT_UTF8) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return SetError( + ds, -1, + "Key name of object must be 'string' when decoding 'object'"); + } + + SkipWhitespace(ds); + + if (*(ds->start++) != ':') { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return SetError(ds, -1, "No ':' found when decoding object value"); + } + + SkipWhitespace(ds); + + itemValue = decode_any(ds); + + if (itemValue == NULL) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + return NULL; + } + + if (!ds->dec->objectAddKey(ds->prv, newObj, itemName, itemValue)) { + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + ds->dec->releaseObject(ds->prv, itemName, ds->dec); + ds->dec->releaseObject(ds->prv, itemValue, ds->dec); + return NULL; + } + + SkipWhitespace(ds); + + switch (*(ds->start++)) { + case '}': { + ds->objDepth--; + return ds->dec->endObject(ds->prv, newObj); + } + case ',': + break; + + default: + ds->dec->releaseObject(ds->prv, newObj, ds->dec); + return SetError( + ds, -1, + "Unexpected character found when decoding object value"); + } + } +} + +FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) { + for (;;) { + switch (*ds->start) { + case '\"': + return decode_string(ds); + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'I': + case 'N': + case '-': + return decode_numeric(ds); + + case '[': + return decode_array(ds); + case '{': + return decode_object(ds); + case 't': + return decode_true(ds); + case 'f': + return decode_false(ds); + case 'n': + return decode_null(ds); + + case ' ': + case '\t': + case '\r': + case '\n': + // White space + ds->start++; + break; + + default: + return SetError(ds, -1, "Expected object or value"); + } + } +} + +JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec, const char *buffer, + size_t cbBuffer) { + /* + FIXME: Base the size of escBuffer of that of cbBuffer so that the unicode + escaping doesn't run into the wall each time */ + char *locale; + struct DecoderState ds; + wchar_t escBuffer[(JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t))]; + JSOBJ ret; + + ds.start = (char *)buffer; + ds.end = ds.start + cbBuffer; + + ds.escStart = escBuffer; + ds.escEnd = ds.escStart + (JSON_MAX_STACK_BUFFER_SIZE / sizeof(wchar_t)); + ds.escHeap = 0; + ds.prv = dec->prv; + ds.dec = dec; + ds.dec->errorStr = NULL; + ds.dec->errorOffset = NULL; + ds.objDepth = 0; + + ds.dec = dec; + + locale = setlocale(LC_NUMERIC, NULL); + if (strcmp(locale, "C")) { + locale = strdup(locale); + if (!locale) { + return SetError(&ds, -1, "Could not reserve memory block"); + } + setlocale(LC_NUMERIC, "C"); + ret = decode_any(&ds); + setlocale(LC_NUMERIC, locale); + free(locale); + } else { + ret = decode_any(&ds); + } + + if (ds.escHeap) { + dec->free(ds.escStart); + } + + SkipWhitespace(&ds); + + if (ds.start != ds.end && ret) { + dec->releaseObject(ds.prv, ret, ds.dec); + return SetError(&ds, -1, "Trailing data"); + } + + return ret; +} diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c new file mode 100644 index 00000000..51c9b924 --- /dev/null +++ b/pandas/_libs/src/ujson/lib/ultrajsonenc.c @@ -0,0 +1,1173 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "ultrajson.h" + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +/* +Worst cases being: + +Control characters (ASCII < 32) +0x00 (1 byte) input => \u0000 output (6 bytes) +1 * 6 => 6 (6 bytes required) + +or UTF-16 surrogate pairs +4 bytes input in UTF-8 => \uXXXX\uYYYY (12 bytes). + +4 * 6 => 24 bytes (12 bytes required) + +The extra 2 bytes are for the quotes around the string + +*/ +#define RESERVE_STRING(_len) (2 + ((_len)*6)) + +static const double g_pow10[] = {1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000}; +static const char g_hexChars[] = "0123456789abcdef"; +static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\\\/"; + +/* +FIXME: While this is fine dandy and working it's a magic value mess which +probably only the author understands. +Needs a cleanup and more documentation */ + +/* +Table for pure ascii output escaping all characters above 127 to \uXXXX */ +static const JSUINT8 g_asciiOutputTable[256] = { + /* 0x00 */ 0, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 10, + 12, + 14, + 30, + 16, + 18, + 30, + 30, + /* 0x10 */ 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + 30, + /* 0x20 */ 1, + 1, + 20, + 1, + 1, + 1, + 29, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 24, + /* 0x30 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 29, + 1, + 29, + 1, + /* 0x40 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x50 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 22, + 1, + 1, + 1, + /* 0x60 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x70 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x80 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0x90 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xa0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xb0 */ 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + /* 0xc0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xd0 */ 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + /* 0xe0 */ 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + /* 0xf0 */ 4, + 4, + 4, + 4, + 4, + 4, + 4, + 4, + 5, + 5, + 5, + 5, + 6, + 6, + 1, + 1}; + +static void SetError(JSOBJ obj, JSONObjectEncoder *enc, const char *message) { + enc->errorMsg = message; + enc->errorObj = obj; +} + +/* +FIXME: Keep track of how big these get across several encoder calls and try to +make an estimate +That way we won't run our head into the wall each call */ +void Buffer_Realloc(JSONObjectEncoder *enc, size_t cbNeeded) { + size_t curSize = enc->end - enc->start; + size_t newSize = curSize * 2; + size_t offset = enc->offset - enc->start; + + while (newSize < curSize + cbNeeded) { + newSize *= 2; + } + + if (enc->heap) { + enc->start = (char *)enc->realloc(enc->start, newSize); + if (!enc->start) { + SetError(NULL, enc, "Could not reserve memory block"); + return; + } + } else { + char *oldStart = enc->start; + enc->heap = 1; + enc->start = (char *)enc->malloc(newSize); + if (!enc->start) { + SetError(NULL, enc, "Could not reserve memory block"); + return; + } + memcpy(enc->start, oldStart, offset); + } + enc->offset = enc->start + offset; + enc->end = enc->start + newSize; +} + +FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC +Buffer_AppendShortHexUnchecked(char *outputOffset, unsigned short value) { + *(outputOffset++) = g_hexChars[(value & 0xf000) >> 12]; + *(outputOffset++) = g_hexChars[(value & 0x0f00) >> 8]; + *(outputOffset++) = g_hexChars[(value & 0x00f0) >> 4]; + *(outputOffset++) = g_hexChars[(value & 0x000f) >> 0]; +} + +int Buffer_EscapeStringUnvalidated(JSONObjectEncoder *enc, const char *io, + const char *end) { + char *of = (char *)enc->offset; + + for (;;) { + switch (*io) { + case 0x00: { + if (io < end) { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + break; + } else { + enc->offset += (of - enc->offset); + return TRUE; + } + } + case '\"': + (*of++) = '\\'; + (*of++) = '\"'; + break; + case '\\': + (*of++) = '\\'; + (*of++) = '\\'; + break; + case '/': + (*of++) = '\\'; + (*of++) = '/'; + break; + case '\b': + (*of++) = '\\'; + (*of++) = 'b'; + break; + case '\f': + (*of++) = '\\'; + (*of++) = 'f'; + break; + case '\n': + (*of++) = '\\'; + (*of++) = 'n'; + break; + case '\r': + (*of++) = '\\'; + (*of++) = 'r'; + break; + case '\t': + (*of++) = '\\'; + (*of++) = 't'; + break; + + case 0x26: // '/' + case 0x3c: // '<' + case 0x3e: // '>' + { + if (enc->encodeHTMLChars) { + // Fall through to \u00XX case below. + } else { + // Same as default case below. + (*of++) = (*io); + break; + } + } + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0e: + case 0x0f: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = g_hexChars[(unsigned char)(((*io) & 0xf0) >> 4)]; + *(of++) = g_hexChars[(unsigned char)((*io) & 0x0f)]; + break; + } + default: + (*of++) = (*io); + break; + } + io++; + } +} + +int Buffer_EscapeStringValidated(JSOBJ obj, JSONObjectEncoder *enc, + const char *io, const char *end) { + JSUTF32 ucs; + char *of = (char *)enc->offset; + + for (;;) { + JSUINT8 utflen = g_asciiOutputTable[(unsigned char)*io]; + + switch (utflen) { + case 0: { + if (io < end) { + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = '0'; + io++; + continue; + } else { + enc->offset += (of - enc->offset); + return TRUE; + } + } + + case 1: { + *(of++) = (*io++); + continue; + } + + case 2: { + JSUTF32 in; + JSUTF16 in16; + + if (end - io < 1) { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in16, io, sizeof(JSUTF16)); + in = (JSUTF32)in16; + +#ifdef __LITTLE_ENDIAN__ + ucs = ((in & 0x1f) << 6) | ((in >> 8) & 0x3f); +#else + ucs = ((in & 0x1f00) >> 2) | (in & 0x3f); +#endif + + if (ucs < 0x80) { + enc->offset += (of - enc->offset); + SetError(obj, enc, + "Overlong 2 byte UTF-8 sequence detected when " + "encoding string"); + return FALSE; + } + + io += 2; + break; + } + + case 3: { + JSUTF32 in; + JSUTF16 in16; + JSUINT8 in8; + + if (end - io < 2) { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in16, io, sizeof(JSUTF16)); + memcpy(&in8, io + 2, sizeof(JSUINT8)); +#ifdef __LITTLE_ENDIAN__ + in = (JSUTF32)in16; + in |= in8 << 16; + ucs = ((in & 0x0f) << 12) | ((in & 0x3f00) >> 2) | + ((in & 0x3f0000) >> 16); +#else + in = in16 << 8; + in |= in8; + ucs = + ((in & 0x0f0000) >> 4) | ((in & 0x3f00) >> 2) | (in & 0x3f); +#endif + + if (ucs < 0x800) { + enc->offset += (of - enc->offset); + SetError(obj, enc, + "Overlong 3 byte UTF-8 sequence detected when " + "encoding string"); + return FALSE; + } + + io += 3; + break; + } + case 4: { + JSUTF32 in; + + if (end - io < 3) { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unterminated UTF-8 sequence when encoding string"); + return FALSE; + } + + memcpy(&in, io, sizeof(JSUTF32)); +#ifdef __LITTLE_ENDIAN__ + ucs = ((in & 0x07) << 18) | ((in & 0x3f00) << 4) | + ((in & 0x3f0000) >> 10) | ((in & 0x3f000000) >> 24); +#else + ucs = ((in & 0x07000000) >> 6) | ((in & 0x3f0000) >> 4) | + ((in & 0x3f00) >> 2) | (in & 0x3f); +#endif + if (ucs < 0x10000) { + enc->offset += (of - enc->offset); + SetError(obj, enc, + "Overlong 4 byte UTF-8 sequence detected when " + "encoding string"); + return FALSE; + } + + io += 4; + break; + } + + case 5: + case 6: { + enc->offset += (of - enc->offset); + SetError( + obj, enc, + "Unsupported UTF-8 sequence length when encoding string"); + return FALSE; + } + + case 29: { + if (enc->encodeHTMLChars) { + // Fall through to \u00XX case 30 below. + } else { + // Same as case 1 above. + *(of++) = (*io++); + continue; + } + } + + case 30: { + // \uXXXX encode + *(of++) = '\\'; + *(of++) = 'u'; + *(of++) = '0'; + *(of++) = '0'; + *(of++) = g_hexChars[(unsigned char)(((*io) & 0xf0) >> 4)]; + *(of++) = g_hexChars[(unsigned char)((*io) & 0x0f)]; + io++; + continue; + } + case 10: + case 12: + case 14: + case 16: + case 18: + case 20: + case 22: + case 24: { + *(of++) = *((char *)(g_escapeChars + utflen + 0)); + *(of++) = *((char *)(g_escapeChars + utflen + 1)); + io++; + continue; + } + // This can never happen, it's here to make L4 VC++ happy + default: { + ucs = 0; + break; + } + } + + /* + If the character is a UTF8 sequence of length > 1 we end up here */ + if (ucs >= 0x10000) { + ucs -= 0x10000; + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked( + of, (unsigned short)(ucs >> 10) + 0xd800); + of += 4; + + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked( + of, (unsigned short)(ucs & 0x3ff) + 0xdc00); + of += 4; + } else { + *(of++) = '\\'; + *(of++) = 'u'; + Buffer_AppendShortHexUnchecked(of, (unsigned short)ucs); + of += 4; + } + } +} + +#define Buffer_Reserve(__enc, __len) \ + if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len)) \ + { \ + Buffer_Realloc((__enc), (__len));\ + } \ + +#define Buffer_AppendCharUnchecked(__enc, __chr) *((__enc)->offset++) = __chr; + +FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin, + char *end) { + char aux; + while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux; +} + +void Buffer_AppendIndentNewlineUnchecked(JSONObjectEncoder *enc) +{ + if (enc->indent > 0) Buffer_AppendCharUnchecked(enc, '\n'); +} + +// This function could be refactored to only accept enc as an argument, +// but this is a straight vendor from ujson source +void Buffer_AppendIndentUnchecked(JSONObjectEncoder *enc, JSINT32 value) +{ + int i; + if (enc->indent > 0) + while (value-- > 0) + for (i = 0; i < enc->indent; i++) + Buffer_AppendCharUnchecked(enc, ' '); +} + +void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSINT32 value) { + char *wstr; + JSUINT32 uvalue = (value < 0) ? -value : value; + wstr = enc->offset; + + // Conversion. Number is reversed. + do { + *wstr++ = (char)(48 + (uvalue % 10)); + } while (uvalue /= 10); + if (value < 0) *wstr++ = '-'; + + // Reverse string + strreverse(enc->offset, wstr - 1); + enc->offset += (wstr - (enc->offset)); +} + +void Buffer_AppendLongUnchecked(JSONObjectEncoder *enc, JSINT64 value) { + char *wstr; + JSUINT64 uvalue = (value < 0) ? -value : value; + + wstr = enc->offset; + // Conversion. Number is reversed. + + do { + *wstr++ = (char)(48 + (uvalue % 10ULL)); + } while (uvalue /= 10ULL); + if (value < 0) *wstr++ = '-'; + + // Reverse string + strreverse(enc->offset, wstr - 1); + enc->offset += (wstr - (enc->offset)); +} + +int Buffer_AppendDoubleUnchecked(JSOBJ obj, JSONObjectEncoder *enc, + double value) { + /* if input is beyond the thresholds, revert to exponential */ + const double thres_max = (double)1e16 - 1; + const double thres_min = (double)1e-15; + char precision_str[20]; + int count; + double diff = 0.0; + char *str = enc->offset; + char *wstr = str; + unsigned long long whole; + double tmp; + unsigned long long frac; + int neg; + double pow10; + + if (value == HUGE_VAL || value == -HUGE_VAL) { + SetError(obj, enc, "Invalid Inf value when encoding double"); + return FALSE; + } + + if (!(value == value)) { + SetError(obj, enc, "Invalid Nan value when encoding double"); + return FALSE; + } + + /* we'll work in positive values and deal with the + negative sign issue later */ + neg = 0; + if (value < 0) { + neg = 1; + value = -value; + } + + /* + for very large or small numbers switch back to native sprintf for + exponentials. anyone want to write code to replace this? */ + if (value > thres_max || (value != 0.0 && fabs(value) < thres_min)) { + precision_str[0] = '%'; + precision_str[1] = '.'; +#if defined(_WIN32) && defined(_MSC_VER) + sprintf_s(precision_str + 2, sizeof(precision_str) - 2, "%ug", + enc->doublePrecision); + enc->offset += sprintf_s(str, enc->end - enc->offset, precision_str, + neg ? -value : value); +#else + snprintf(precision_str + 2, sizeof(precision_str) - 2, "%ug", + enc->doublePrecision); + enc->offset += snprintf(str, enc->end - enc->offset, precision_str, + neg ? -value : value); +#endif + return TRUE; + } + + pow10 = g_pow10[enc->doublePrecision]; + + whole = (unsigned long long)value; + tmp = (value - whole) * pow10; + frac = (unsigned long long)(tmp); + diff = tmp - frac; + + if (diff > 0.5) { + ++frac; + } else if (diff == 0.5 && ((frac == 0) || (frac & 1))) { + /* if halfway, round up if odd, OR + if last digit is 0. That last part is strange */ + ++frac; + } + + // handle rollover, e.g. + // case 0.99 with prec 1 is 1.0 and case 0.95 with prec is 1.0 as well + if (frac >= pow10) { + frac = 0; + ++whole; + } + + if (enc->doublePrecision == 0) { + diff = value - whole; + + if (diff > 0.5) { + /* greater than 0.5, round up, e.g. 1.6 -> 2 */ + ++whole; + } else if (diff == 0.5 && (whole & 1)) { + /* exactly 0.5 and ODD, then round up */ + /* 1.5 -> 2, but 2.5 -> 2 */ + ++whole; + } + + // vvvvvvvvvvvvvvvvvvv Diff from modp_dto2 + } else if (frac) { + count = enc->doublePrecision; + // now do fractional part, as an unsigned number + // we know it is not 0 but we can have leading zeros, these + // should be removed + while (!(frac % 10)) { + --count; + frac /= 10; + } + //^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2 + + // now do fractional part, as an unsigned number + do { + --count; + *wstr++ = (char)(48 + (frac % 10)); + } while (frac /= 10); + // add extra 0s + while (count-- > 0) { + *wstr++ = '0'; + } + // add decimal + *wstr++ = '.'; + } else { + *wstr++ = '0'; + *wstr++ = '.'; + } + + // Do whole part. Take care of sign + // conversion. Number is reversed. + do { + *wstr++ = (char)(48 + (whole % 10)); + } while (whole /= 10); + + if (neg) { + *wstr++ = '-'; + } + strreverse(str, wstr - 1); + enc->offset += (wstr - (enc->offset)); + + return TRUE; +} + +/* +FIXME: +Handle integration functions returning NULL here */ + +/* +FIXME: +Perhaps implement recursion detection */ + +void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, + size_t cbName) { + const char *value; + char *objName; + int count; + JSOBJ iterObj; + size_t szlen; + JSONTypeContext tc; + tc.encoder = enc; + + if (enc->level > enc->recursionMax) { + SetError(obj, enc, "Maximum recursion level reached"); + return; + } + + /* + This reservation must hold + + length of _name as encoded worst case + + maxLength of double to string OR maxLength of JSLONG to string + */ + + Buffer_Reserve(enc, 256 + RESERVE_STRING(cbName)); + if (enc->errorMsg) { + return; + } + + if (name) { + Buffer_AppendCharUnchecked(enc, '\"'); + + if (enc->forceASCII) { + if (!Buffer_EscapeStringValidated(obj, enc, name, name + cbName)) { + return; + } + } else { + if (!Buffer_EscapeStringUnvalidated(enc, name, name + cbName)) { + return; + } + } + + Buffer_AppendCharUnchecked(enc, '\"'); + + Buffer_AppendCharUnchecked(enc, ':'); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked(enc, ' '); +#endif + } + + enc->beginTypeContext(obj, &tc); + + switch (tc.type) { + case JT_INVALID: { + return; + } + + case JT_ARRAY: { + count = 0; + enc->iterBegin(obj, &tc); + + Buffer_AppendCharUnchecked(enc, '['); + Buffer_AppendIndentNewlineUnchecked (enc); + + while (enc->iterNext(obj, &tc)) { + if (count > 0) { + Buffer_AppendCharUnchecked(enc, ','); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked(buffer, ' '); +#endif + Buffer_AppendIndentNewlineUnchecked (enc); + } + + iterObj = enc->iterGetValue(obj, &tc); + + enc->level++; + Buffer_AppendIndentUnchecked (enc, enc->level); + encode(iterObj, enc, NULL, 0); + count++; + } + + enc->iterEnd(obj, &tc); + Buffer_AppendIndentNewlineUnchecked (enc); + Buffer_AppendIndentUnchecked (enc, enc->level); + Buffer_AppendCharUnchecked(enc, ']'); + break; + } + + case JT_OBJECT: { + count = 0; + enc->iterBegin(obj, &tc); + + Buffer_AppendCharUnchecked(enc, '{'); + Buffer_AppendIndentNewlineUnchecked (enc); + + while (enc->iterNext(obj, &tc)) { + if (count > 0) { + Buffer_AppendCharUnchecked(enc, ','); +#ifndef JSON_NO_EXTRA_WHITESPACE + Buffer_AppendCharUnchecked(enc, ' '); +#endif + Buffer_AppendIndentNewlineUnchecked (enc); + } + + iterObj = enc->iterGetValue(obj, &tc); + objName = enc->iterGetName(obj, &tc, &szlen); + + enc->level++; + Buffer_AppendIndentUnchecked (enc, enc->level); + encode(iterObj, enc, objName, szlen); + count++; + } + + enc->iterEnd(obj, &tc); + Buffer_AppendIndentNewlineUnchecked (enc); + Buffer_AppendIndentUnchecked (enc, enc->level); + Buffer_AppendCharUnchecked(enc, '}'); + break; + } + + case JT_LONG: { + Buffer_AppendLongUnchecked(enc, enc->getLongValue(obj, &tc)); + break; + } + + case JT_INT: { + Buffer_AppendIntUnchecked(enc, enc->getIntValue(obj, &tc)); + break; + } + + case JT_TRUE: { + Buffer_AppendCharUnchecked(enc, 't'); + Buffer_AppendCharUnchecked(enc, 'r'); + Buffer_AppendCharUnchecked(enc, 'u'); + Buffer_AppendCharUnchecked(enc, 'e'); + break; + } + + case JT_FALSE: { + Buffer_AppendCharUnchecked(enc, 'f'); + Buffer_AppendCharUnchecked(enc, 'a'); + Buffer_AppendCharUnchecked(enc, 'l'); + Buffer_AppendCharUnchecked(enc, 's'); + Buffer_AppendCharUnchecked(enc, 'e'); + break; + } + + case JT_NULL: { + Buffer_AppendCharUnchecked(enc, 'n'); + Buffer_AppendCharUnchecked(enc, 'u'); + Buffer_AppendCharUnchecked(enc, 'l'); + Buffer_AppendCharUnchecked(enc, 'l'); + break; + } + + case JT_DOUBLE: { + if (!Buffer_AppendDoubleUnchecked(obj, enc, + enc->getDoubleValue(obj, &tc))) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + break; + } + + case JT_UTF8: { + value = enc->getStringValue(obj, &tc, &szlen); + Buffer_Reserve(enc, RESERVE_STRING(szlen)); + if (enc->errorMsg) { + enc->endTypeContext(obj, &tc); + return; + } + Buffer_AppendCharUnchecked(enc, '\"'); + + if (enc->forceASCII) { + if (!Buffer_EscapeStringValidated(obj, enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } else { + if (!Buffer_EscapeStringUnvalidated(enc, value, + value + szlen)) { + enc->endTypeContext(obj, &tc); + enc->level--; + return; + } + } + + Buffer_AppendCharUnchecked(enc, '\"'); + break; + } + } + + enc->endTypeContext(obj, &tc); + enc->level--; +} + +char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, + size_t _cbBuffer) { + char *locale; + enc->malloc = enc->malloc ? enc->malloc : malloc; + enc->free = enc->free ? enc->free : free; + enc->realloc = enc->realloc ? enc->realloc : realloc; + enc->errorMsg = NULL; + enc->errorObj = NULL; + enc->level = 0; + + if (enc->recursionMax < 1) { + enc->recursionMax = JSON_MAX_RECURSION_DEPTH; + } + + if (enc->doublePrecision < 0 || + enc->doublePrecision > JSON_DOUBLE_MAX_DECIMALS) { + enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS; + } + + if (_buffer == NULL) { + _cbBuffer = 32768; + enc->start = (char *)enc->malloc(_cbBuffer); + if (!enc->start) { + SetError(obj, enc, "Could not reserve memory block"); + return NULL; + } + enc->heap = 1; + } else { + enc->start = _buffer; + enc->heap = 0; + } + + enc->end = enc->start + _cbBuffer; + enc->offset = enc->start; + + locale = setlocale(LC_NUMERIC, NULL); + if (strcmp(locale, "C")) { + locale = strdup(locale); + if (!locale) { + SetError(NULL, enc, "Could not reserve memory block"); + return NULL; + } + setlocale(LC_NUMERIC, "C"); + encode(obj, enc, NULL, 0); + setlocale(LC_NUMERIC, locale); + free(locale); + } else { + encode(obj, enc, NULL, 0); + } + + Buffer_Reserve(enc, 1); + if (enc->errorMsg) { + return NULL; + } + Buffer_AppendCharUnchecked(enc, '\0'); + + return enc->start; +} diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c new file mode 100644 index 00000000..b2fc7884 --- /dev/null +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -0,0 +1,596 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY +#define NO_IMPORT_ARRAY +#define PY_SSIZE_T_CLEAN +#include +#include +#include + +#define PRINTMARK() + +typedef struct __PyObjectDecoder { + JSONObjectDecoder dec; + + void *npyarr; // Numpy context buffer + void *npyarr_addr; // Ref to npyarr ptr to track DECREF calls + npy_intp curdim; // Current array dimension + + PyArray_Descr *dtype; +} PyObjectDecoder; + +typedef struct __NpyArrContext { + PyObject *ret; + PyObject *labels[2]; + PyArray_Dims shape; + + PyObjectDecoder *dec; + + npy_intp i; + npy_intp elsize; + npy_intp elcount; +} NpyArrContext; + +// Numpy handling based on numpy internal code, specifically the function +// PyArray_FromIter. + +// numpy related functions are inter-dependent so declare them all here, +// to ensure the compiler catches any errors + +// standard numpy array handling +JSOBJ Object_npyNewArray(void *prv, void *decoder); +JSOBJ Object_npyEndArray(void *prv, JSOBJ obj); +int Object_npyArrayAddItem(void *prv, JSOBJ obj, JSOBJ value); + +// for more complex dtypes (object and string) fill a standard Python list +// and convert to a numpy array when done. +JSOBJ Object_npyNewArrayList(void *prv, void *decoder); +JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj); +int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value); + +// labelled support, encode keys and values of JS object into separate numpy +// arrays +JSOBJ Object_npyNewObject(void *prv, void *decoder); +JSOBJ Object_npyEndObject(void *prv, JSOBJ obj); +int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value); + +// free the numpy context buffer +void Npy_releaseContext(NpyArrContext *npyarr) { + PRINTMARK(); + if (npyarr) { + if (npyarr->shape.ptr) { + PyObject_Free(npyarr->shape.ptr); + } + if (npyarr->dec) { + npyarr->dec->npyarr = NULL; + npyarr->dec->curdim = 0; + } + Py_XDECREF(npyarr->labels[0]); + Py_XDECREF(npyarr->labels[1]); + Py_XDECREF(npyarr->ret); + PyObject_Free(npyarr); + } +} + +JSOBJ Object_npyNewArray(void *prv, void *_decoder) { + NpyArrContext *npyarr; + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + PRINTMARK(); + if (decoder->curdim <= 0) { + // start of array - initialise the context buffer + npyarr = decoder->npyarr = PyObject_Malloc(sizeof(NpyArrContext)); + decoder->npyarr_addr = npyarr; + + if (!npyarr) { + PyErr_NoMemory(); + return NULL; + } + + npyarr->dec = decoder; + npyarr->labels[0] = npyarr->labels[1] = NULL; + + npyarr->shape.ptr = PyObject_Malloc(sizeof(npy_intp) * NPY_MAXDIMS); + npyarr->shape.len = 1; + npyarr->ret = NULL; + + npyarr->elsize = 0; + npyarr->elcount = 4; + npyarr->i = 0; + } else { + // starting a new dimension continue the current array (and reshape + // after) + npyarr = (NpyArrContext *)decoder->npyarr; + if (decoder->curdim >= npyarr->shape.len) { + npyarr->shape.len++; + } + } + + npyarr->shape.ptr[decoder->curdim] = 0; + decoder->curdim++; + return npyarr; +} + +PyObject *Npy_returnLabelled(NpyArrContext *npyarr) { + PyObject *ret = npyarr->ret; + npy_intp i; + + if (npyarr->labels[0] || npyarr->labels[1]) { + // finished decoding, build tuple with values and labels + ret = PyTuple_New(npyarr->shape.len + 1); + for (i = 0; i < npyarr->shape.len; i++) { + if (npyarr->labels[i]) { + PyTuple_SET_ITEM(ret, i + 1, npyarr->labels[i]); + npyarr->labels[i] = NULL; + } else { + Py_INCREF(Py_None); + PyTuple_SET_ITEM(ret, i + 1, Py_None); + } + } + PyTuple_SET_ITEM(ret, 0, npyarr->ret); + } + + return ret; +} + +JSOBJ Object_npyEndArray(void *prv, JSOBJ obj) { + PyObject *ret; + char *new_data; + NpyArrContext *npyarr = (NpyArrContext *)obj; + int emptyType = NPY_DEFAULT_TYPE; + npy_intp i; + PRINTMARK(); + if (!npyarr) { + return NULL; + } + + ret = npyarr->ret; + i = npyarr->i; + + npyarr->dec->curdim--; + + if (i == 0 || !npyarr->ret) { + // empty array would not have been initialised so do it now. + if (npyarr->dec->dtype) { + emptyType = npyarr->dec->dtype->type_num; + } + npyarr->ret = ret = + PyArray_EMPTY(npyarr->shape.len, npyarr->shape.ptr, emptyType, 0); + } else if (npyarr->dec->curdim <= 0) { + // realloc to final size + new_data = PyDataMem_RENEW(PyArray_DATA(ret), i * npyarr->elsize); + if (new_data == NULL) { + PyErr_NoMemory(); + Npy_releaseContext(npyarr); + return NULL; + } + ((PyArrayObject *)ret)->data = (void *)new_data; + // PyArray_BYTES(ret) = new_data; + } + + if (npyarr->dec->curdim <= 0) { + // finished decoding array, reshape if necessary + if (npyarr->shape.len > 1) { + npyarr->ret = PyArray_Newshape((PyArrayObject *)ret, &npyarr->shape, + NPY_ANYORDER); + Py_DECREF(ret); + } + + ret = Npy_returnLabelled(npyarr); + + npyarr->ret = NULL; + Npy_releaseContext(npyarr); + } + + return ret; +} + +int Object_npyArrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { + PyObject *type; + PyArray_Descr *dtype; + npy_intp i; + char *new_data, *item; + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return 0; + } + + i = npyarr->i; + + npyarr->shape.ptr[npyarr->dec->curdim - 1]++; + + if (PyArray_Check((PyObject *)value)) { + // multidimensional array, keep decoding values. + return 1; + } + + if (!npyarr->ret) { + // Array not initialised yet. + // We do it here so we can 'sniff' the data type if none was provided + if (!npyarr->dec->dtype) { + type = PyObject_Type(value); + if (!PyArray_DescrConverter(type, &dtype)) { + Py_DECREF(type); + goto fail; + } + Py_INCREF(dtype); + Py_DECREF(type); + } else { + dtype = PyArray_DescrNew(npyarr->dec->dtype); + } + + // If it's an object or string then fill a Python list and subsequently + // convert. Otherwise we would need to somehow mess about with + // reference counts when renewing memory. + npyarr->elsize = dtype->elsize; + if (PyDataType_REFCHK(dtype) || npyarr->elsize == 0) { + Py_XDECREF(dtype); + + if (npyarr->dec->curdim > 1) { + PyErr_SetString(PyExc_ValueError, + "Cannot decode multidimensional arrays with " + "variable length elements to numpy"); + goto fail; + } + npyarr->elcount = 0; + npyarr->ret = PyList_New(0); + if (!npyarr->ret) { + goto fail; + } + ((JSONObjectDecoder *)npyarr->dec)->newArray = + Object_npyNewArrayList; + ((JSONObjectDecoder *)npyarr->dec)->arrayAddItem = + Object_npyArrayListAddItem; + ((JSONObjectDecoder *)npyarr->dec)->endArray = + Object_npyEndArrayList; + return Object_npyArrayListAddItem(prv, obj, value); + } + + npyarr->ret = PyArray_NewFromDescr( + &PyArray_Type, dtype, 1, &npyarr->elcount, NULL, NULL, 0, NULL); + + if (!npyarr->ret) { + goto fail; + } + } + + if (i >= npyarr->elcount) { + // Grow PyArray_DATA(ret): + // this is similar for the strategy for PyListObject, but we use + // 50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ... + if (npyarr->elsize == 0) { + PyErr_SetString(PyExc_ValueError, + "Cannot decode multidimensional arrays with " + "variable length elements to numpy"); + goto fail; + } + + npyarr->elcount = (i >> 1) + (i < 4 ? 4 : 2) + i; + if (npyarr->elcount <= NPY_MAX_INTP / npyarr->elsize) { + new_data = PyDataMem_RENEW(PyArray_DATA(npyarr->ret), + npyarr->elcount * npyarr->elsize); + } else { + PyErr_NoMemory(); + goto fail; + } + ((PyArrayObject *)npyarr->ret)->data = (void *)new_data; + + // PyArray_BYTES(npyarr->ret) = new_data; + } + + PyArray_DIMS(npyarr->ret)[0] = i + 1; + + if ((item = PyArray_GETPTR1(npyarr->ret, i)) == NULL || + PyArray_SETITEM(npyarr->ret, item, value) == -1) { + goto fail; + } + + Py_DECREF((PyObject *)value); + npyarr->i++; + return 1; + +fail: + + Npy_releaseContext(npyarr); + return 0; +} + +JSOBJ Object_npyNewArrayList(void *prv, void *_decoder) { + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + PRINTMARK(); + PyErr_SetString( + PyExc_ValueError, + "nesting not supported for object or variable length dtypes"); + Npy_releaseContext(decoder->npyarr); + return NULL; +} + +JSOBJ Object_npyEndArrayList(void *prv, JSOBJ obj) { + PyObject *list, *ret; + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return NULL; + } + + // convert decoded list to numpy array + list = (PyObject *)npyarr->ret; + npyarr->ret = PyArray_FROM_O(list); + + ret = Npy_returnLabelled(npyarr); + npyarr->ret = list; + + ((JSONObjectDecoder *)npyarr->dec)->newArray = Object_npyNewArray; + ((JSONObjectDecoder *)npyarr->dec)->arrayAddItem = Object_npyArrayAddItem; + ((JSONObjectDecoder *)npyarr->dec)->endArray = Object_npyEndArray; + Npy_releaseContext(npyarr); + return ret; +} + +int Object_npyArrayListAddItem(void *prv, JSOBJ obj, JSOBJ value) { + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return 0; + } + PyList_Append((PyObject *)npyarr->ret, value); + Py_DECREF((PyObject *)value); + npyarr->elcount++; + return 1; +} + +JSOBJ Object_npyNewObject(void *prv, void *_decoder) { + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + PRINTMARK(); + if (decoder->curdim > 1) { + PyErr_SetString(PyExc_ValueError, + "labels only supported up to 2 dimensions"); + return NULL; + } + + return ((JSONObjectDecoder *)decoder)->newArray(prv, decoder); +} + +JSOBJ Object_npyEndObject(void *prv, JSOBJ obj) { + PyObject *list; + npy_intp labelidx; + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return NULL; + } + + labelidx = npyarr->dec->curdim - 1; + + list = npyarr->labels[labelidx]; + if (list) { + npyarr->labels[labelidx] = PyArray_FROM_O(list); + Py_DECREF(list); + } + + return (PyObject *)((JSONObjectDecoder *)npyarr->dec)->endArray(prv, obj); +} + +int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { + PyObject *label, *labels; + npy_intp labelidx; + // add key to label array, value to values array + NpyArrContext *npyarr = (NpyArrContext *)obj; + PRINTMARK(); + if (!npyarr) { + return 0; + } + + label = (PyObject *)name; + labelidx = npyarr->dec->curdim - 1; + + if (!npyarr->labels[labelidx]) { + npyarr->labels[labelidx] = PyList_New(0); + } + labels = npyarr->labels[labelidx]; + // only fill label array once, assumes all column labels are the same + // for 2-dimensional arrays. + if (PyList_Check(labels) && PyList_GET_SIZE(labels) <= npyarr->elcount) { + PyList_Append(labels, label); + } + + if (((JSONObjectDecoder *)npyarr->dec)->arrayAddItem(prv, obj, value)) { + Py_DECREF(label); + return 1; + } + return 0; +} + +int Object_objectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { + int ret = PyDict_SetItem(obj, name, value); + Py_DECREF((PyObject *)name); + Py_DECREF((PyObject *)value); + return ret == 0 ? 1 : 0; +} + +int Object_arrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { + int ret = PyList_Append(obj, value); + Py_DECREF((PyObject *)value); + return ret == 0 ? 1 : 0; +} + +JSOBJ Object_newString(void *prv, wchar_t *start, wchar_t *end) { + return PyUnicode_FromWideChar(start, (end - start)); +} + +JSOBJ Object_newTrue(void *prv) { Py_RETURN_TRUE; } + +JSOBJ Object_newFalse(void *prv) { Py_RETURN_FALSE; } + +JSOBJ Object_newNull(void *prv) { Py_RETURN_NONE; } + +JSOBJ Object_newPosInf(void *prv) { return PyFloat_FromDouble(Py_HUGE_VAL); } + +JSOBJ Object_newNegInf(void *prv) { return PyFloat_FromDouble(-Py_HUGE_VAL); } + +JSOBJ Object_newObject(void *prv, void *decoder) { return PyDict_New(); } + +JSOBJ Object_endObject(void *prv, JSOBJ obj) { return obj; } + +JSOBJ Object_newArray(void *prv, void *decoder) { return PyList_New(0); } + +JSOBJ Object_endArray(void *prv, JSOBJ obj) { return obj; } + +JSOBJ Object_newInteger(void *prv, JSINT32 value) { + return PyLong_FromLong((long)value); +} + +JSOBJ Object_newLong(void *prv, JSINT64 value) { + return PyLong_FromLongLong(value); +} + +JSOBJ Object_newDouble(void *prv, double value) { + return PyFloat_FromDouble(value); +} + +static void Object_releaseObject(void *prv, JSOBJ obj, void *_decoder) { + PyObjectDecoder *decoder = (PyObjectDecoder *)_decoder; + if (obj != decoder->npyarr_addr) { + Py_XDECREF(((PyObject *)obj)); + } +} + +static char *g_kwlist[] = {"obj", "precise_float", "numpy", + "labelled", "dtype", NULL}; + +PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs) { + PyObject *ret; + PyObject *sarg; + PyObject *arg; + PyObject *opreciseFloat = NULL; + JSONObjectDecoder *decoder; + PyObjectDecoder pyDecoder; + PyArray_Descr *dtype = NULL; + int numpy = 0, labelled = 0; + + JSONObjectDecoder dec = { + Object_newString, Object_objectAddKey, Object_arrayAddItem, + Object_newTrue, Object_newFalse, Object_newNull, + Object_newPosInf, Object_newNegInf, Object_newObject, + Object_endObject, Object_newArray, Object_endArray, + Object_newInteger, Object_newLong, Object_newDouble, + Object_releaseObject, PyObject_Malloc, PyObject_Free, + PyObject_Realloc}; + + dec.preciseFloat = 0; + dec.prv = NULL; + + pyDecoder.dec = dec; + pyDecoder.curdim = 0; + pyDecoder.npyarr = NULL; + pyDecoder.npyarr_addr = NULL; + + decoder = (JSONObjectDecoder *)&pyDecoder; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiiO&", g_kwlist, &arg, + &opreciseFloat, &numpy, &labelled, + PyArray_DescrConverter2, &dtype)) { + Npy_releaseContext(pyDecoder.npyarr); + return NULL; + } + + if (opreciseFloat && PyObject_IsTrue(opreciseFloat)) { + decoder->preciseFloat = 1; + } + + if (PyBytes_Check(arg)) { + sarg = arg; + } else if (PyUnicode_Check(arg)) { + sarg = PyUnicode_AsUTF8String(arg); + if (sarg == NULL) { + // Exception raised above us by codec according to docs + return NULL; + } + } else { + PyErr_Format(PyExc_TypeError, "Expected 'str' or 'bytes'"); + return NULL; + } + + decoder->errorStr = NULL; + decoder->errorOffset = NULL; + + if (numpy) { + pyDecoder.dtype = dtype; + decoder->newArray = Object_npyNewArray; + decoder->endArray = Object_npyEndArray; + decoder->arrayAddItem = Object_npyArrayAddItem; + + if (labelled) { + decoder->newObject = Object_npyNewObject; + decoder->endObject = Object_npyEndObject; + decoder->objectAddKey = Object_npyObjectAddKey; + } + } + + ret = JSON_DecodeObject(decoder, PyBytes_AS_STRING(sarg), + PyBytes_GET_SIZE(sarg)); + + if (sarg != arg) { + Py_DECREF(sarg); + } + + if (PyErr_Occurred()) { + if (ret) { + Py_DECREF((PyObject *)ret); + } + Npy_releaseContext(pyDecoder.npyarr); + return NULL; + } + + if (decoder->errorStr) { + /* + FIXME: It's possible to give a much nicer error message here with actual + failing element in input etc*/ + + PyErr_Format(PyExc_ValueError, "%s", decoder->errorStr); + + if (ret) { + Py_DECREF((PyObject *)ret); + } + Npy_releaseContext(pyDecoder.npyarr); + + return NULL; + } + + return ret; +} diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c new file mode 100644 index 00000000..5cb782a0 --- /dev/null +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -0,0 +1,2464 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF +THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights +reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ +#define PY_ARRAY_UNIQUE_SYMBOL UJSON_NUMPY + +#define PY_SSIZE_T_CLEAN +#include +#include +#include +#include +#include +#include +#include +#include <../../../tslibs/src/datetime/np_datetime.h> +#include <../../../tslibs/src/datetime/np_datetime_strings.h> +#include "datetime.h" + +static PyTypeObject *type_decimal; +static PyTypeObject *cls_dataframe; +static PyTypeObject *cls_series; +static PyTypeObject *cls_index; +static PyTypeObject *cls_nat; +static PyTypeObject *cls_na; +PyObject *cls_timedelta; + +npy_int64 get_nat(void) { return NPY_MIN_INT64; } + +typedef char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti, + size_t *_outLen); + +typedef struct __NpyArrContext { + PyObject *array; + char *dataptr; + int curdim; // current dimension in array's order + int stridedim; // dimension we are striding over + int inc; // stride dimension increment (+/- 1) + npy_intp dim; + npy_intp stride; + npy_intp ndim; + npy_intp index[NPY_MAXDIMS]; + int type_num; + PyArray_GetItemFunc *getitem; + + char **rowLabels; + char **columnLabels; +} NpyArrContext; + +typedef struct __PdBlockContext { + int colIdx; + int ncols; + int transpose; + + int *cindices; // frame column -> block column map + NpyArrContext **npyCtxts; // NpyArrContext for each column +} PdBlockContext; + +typedef struct __TypeContext { + JSPFN_ITERBEGIN iterBegin; + JSPFN_ITEREND iterEnd; + JSPFN_ITERNEXT iterNext; + JSPFN_ITERGETNAME iterGetName; + JSPFN_ITERGETVALUE iterGetValue; + PFN_PyTypeToUTF8 PyTypeToUTF8; + PyObject *newObj; + PyObject *dictObj; + Py_ssize_t index; + Py_ssize_t size; + PyObject *itemValue; + PyObject *itemName; + PyObject *attrList; + PyObject *iterator; + + double doubleValue; + JSINT64 longValue; + + char *cStr; + NpyArrContext *npyarr; + PdBlockContext *pdblock; + int transpose; + char **rowLabels; + char **columnLabels; + npy_intp rowLabelsLen; + npy_intp columnLabelsLen; +} TypeContext; + +typedef struct __PyObjectEncoder { + JSONObjectEncoder enc; + + // pass through the NpyArrContext when encoding multi-dimensional arrays + NpyArrContext *npyCtxtPassthru; + + // pass through the PdBlockContext when encoding blocks + PdBlockContext *blkCtxtPassthru; + + // pass-through to encode numpy data directly + int npyType; + void *npyValue; + TypeContext basicTypeContext; + + int datetimeIso; + NPY_DATETIMEUNIT datetimeUnit; + + // output format style for pandas data types + int outputFormat; + int originalOutputFormat; + + PyObject *defaultHandler; +} PyObjectEncoder; + +#define GET_TC(__ptrtc) ((TypeContext *)((__ptrtc)->prv)) + +enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES }; + +#define PRINTMARK() + +int PdBlock_iterNext(JSOBJ, JSONTypeContext *); + +void *initObjToJSON(void) { + PyObject *mod_pandas; + PyObject *mod_nattype; + PyObject *mod_natype; + PyObject *mod_decimal = PyImport_ImportModule("decimal"); + type_decimal = + (PyTypeObject *)PyObject_GetAttrString(mod_decimal, "Decimal"); + Py_DECREF(mod_decimal); + + PyDateTime_IMPORT; + + mod_pandas = PyImport_ImportModule("pandas"); + if (mod_pandas) { + cls_dataframe = + (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "DataFrame"); + cls_index = (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Index"); + cls_series = + (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Series"); + cls_timedelta = PyObject_GetAttrString(mod_pandas, "Timedelta"); + Py_DECREF(mod_pandas); + } + + mod_nattype = PyImport_ImportModule("pandas._libs.tslibs.nattype"); + if (mod_nattype) { + cls_nat = + (PyTypeObject *)PyObject_GetAttrString(mod_nattype, "NaTType"); + Py_DECREF(mod_nattype); + } + + mod_natype = PyImport_ImportModule("pandas._libs.missing"); + if (mod_natype) { + cls_na = (PyTypeObject *)PyObject_GetAttrString(mod_natype, "NAType"); + Py_DECREF(mod_natype); + } + + /* Initialise numpy API */ + import_array(); + // GH 31463 + return NULL; +} + +static TypeContext *createTypeContext(void) { + TypeContext *pc; + + pc = PyObject_Malloc(sizeof(TypeContext)); + if (!pc) { + PyErr_NoMemory(); + return NULL; + } + pc->newObj = NULL; + pc->dictObj = NULL; + pc->itemValue = NULL; + pc->itemName = NULL; + pc->attrList = NULL; + pc->index = 0; + pc->size = 0; + pc->longValue = 0; + pc->doubleValue = 0.0; + pc->cStr = NULL; + pc->npyarr = NULL; + pc->pdblock = NULL; + pc->rowLabels = NULL; + pc->columnLabels = NULL; + pc->transpose = 0; + pc->rowLabelsLen = 0; + pc->columnLabelsLen = 0; + + return pc; +} + +/* + * Function: scaleNanosecToUnit + * ----------------------------- + * + * Scales an integer value representing time in nanoseconds to provided unit. + * + * Mutates the provided value directly. Returns 0 on success, non-zero on error. + */ +static int scaleNanosecToUnit(npy_int64 *value, NPY_DATETIMEUNIT unit) { + switch (unit) { + case NPY_FR_ns: + break; + case NPY_FR_us: + *value /= 1000LL; + break; + case NPY_FR_ms: + *value /= 1000000LL; + break; + case NPY_FR_s: + *value /= 1000000000LL; + break; + default: + return -1; + } + + return 0; +} + +static PyObject *get_values(PyObject *obj) { + PyObject *values = NULL; + + PRINTMARK(); + + if (PyObject_HasAttrString(obj, "_internal_get_values")) { + PRINTMARK(); + values = PyObject_CallMethod(obj, "_internal_get_values", NULL); + + if (values == NULL) { + // Clear so we can subsequently try another method + PyErr_Clear(); + } else if (!PyArray_CheckExact(values)) { + // Didn't get a numpy array, so keep trying + PRINTMARK(); + Py_DECREF(values); + values = NULL; + } + } + + if ((values == NULL) && PyObject_HasAttrString(obj, "get_block_values")) { + PRINTMARK(); + values = PyObject_CallMethod(obj, "get_block_values", NULL); + + if (values == NULL) { + // Clear so we can subsequently try another method + PyErr_Clear(); + } else if (!PyArray_CheckExact(values)) { + // Didn't get a numpy array, so keep trying + PRINTMARK(); + Py_DECREF(values); + values = NULL; + } + } + + if (values == NULL) { + PyObject *typeRepr = PyObject_Repr((PyObject *)Py_TYPE(obj)); + PyObject *repr; + PRINTMARK(); + if (PyObject_HasAttrString(obj, "dtype")) { + PyObject *dtype = PyObject_GetAttrString(obj, "dtype"); + repr = PyObject_Repr(dtype); + Py_DECREF(dtype); + } else { + repr = PyUnicode_FromString(""); + } + + PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet", + repr, typeRepr); + Py_DECREF(repr); + Py_DECREF(typeRepr); + + return NULL; + } + + return values; +} + +static PyObject *get_sub_attr(PyObject *obj, char *attr, char *subAttr) { + PyObject *tmp = PyObject_GetAttrString(obj, attr); + PyObject *ret; + + if (tmp == 0) { + return 0; + } + ret = PyObject_GetAttrString(tmp, subAttr); + Py_DECREF(tmp); + + return ret; +} + +static int is_simple_frame(PyObject *obj) { + PyObject *check = get_sub_attr(obj, "_data", "is_mixed_type"); + int ret = (check == Py_False); + + if (!check) { + return 0; + } + + Py_DECREF(check); + return ret; +} + +static Py_ssize_t get_attr_length(PyObject *obj, char *attr) { + PyObject *tmp = PyObject_GetAttrString(obj, attr); + Py_ssize_t ret; + + if (tmp == 0) { + return 0; + } + ret = PyObject_Length(tmp); + Py_DECREF(tmp); + + if (ret == -1) { + return 0; + } + + return ret; +} + +static npy_int64 get_long_attr(PyObject *o, const char *attr) { + npy_int64 long_val; + PyObject *value = PyObject_GetAttrString(o, attr); + long_val = + (PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value)); + Py_DECREF(value); + return long_val; +} + +static npy_float64 total_seconds(PyObject *td) { + npy_float64 double_val; + PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL); + double_val = PyFloat_AS_DOUBLE(value); + Py_DECREF(value); + return double_val; +} + +static PyObject *get_item(PyObject *obj, Py_ssize_t i) { + PyObject *tmp = PyLong_FromSsize_t(i); + PyObject *ret; + + if (tmp == 0) { + return 0; + } + ret = PyObject_GetItem(obj, tmp); + Py_DECREF(tmp); + + return ret; +} + +static char *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc), + size_t *_outLen) { + PyObject *obj = (PyObject *)_obj; + *_outLen = PyBytes_GET_SIZE(obj); + return PyBytes_AS_STRING(obj); +} + +static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *Py_UNUSED(tc), + size_t *_outLen) { + return (char *)PyUnicode_AsUTF8AndSize(_obj, (Py_ssize_t *)_outLen); +} + +/* Converts the int64_t representation of a datetime to ISO; mutates len */ +static char *int64ToIso(int64_t value, NPY_DATETIMEUNIT base, size_t *len) { + npy_datetimestruct dts; + int ret_code; + + pandas_datetime_to_datetimestruct(value, NPY_FR_ns, &dts); + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + + if (result == NULL) { + PyErr_NoMemory(); + return NULL; + } + + ret_code = make_iso_8601_datetime(&dts, result, *len, base); + if (ret_code != 0) { + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +/* JSON callback. returns a char* and mutates the pointer to *len */ +static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused), + JSONTypeContext *tc, size_t *len) { + NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + return int64ToIso(GET_TC(tc)->longValue, base, len); +} + +static npy_datetime NpyDateTimeToEpoch(npy_datetime dt, NPY_DATETIMEUNIT base) { + scaleNanosecToUnit(&dt, base); + return dt; +} + +/* Convert PyDatetime To ISO C-string. mutates len */ +static char *PyDateTimeToIso(PyDateTime_Date *obj, NPY_DATETIMEUNIT base, + size_t *len) { + npy_datetimestruct dts; + int ret; + + ret = convert_pydatetime_to_datetimestruct(obj, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + return NULL; + } + + *len = (size_t)get_datetime_iso_8601_strlen(0, base); + char *result = PyObject_Malloc(*len); + ret = make_iso_8601_datetime(&dts, result, *len, base); + + if (ret != 0) { + PRINTMARK(); + PyErr_SetString(PyExc_ValueError, + "Could not convert datetime value to string"); + PyObject_Free(result); + return NULL; + } + + // Note that get_datetime_iso_8601_strlen just gives a generic size + // for ISO string conversion, not the actual size used + *len = strlen(result); + return result; +} + +/* JSON callback */ +static char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc, + size_t *len) { + + if (!PyDate_Check(obj)) { + PyErr_SetString(PyExc_TypeError, "Expected date object"); + return NULL; + } + + NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + return PyDateTimeToIso(obj, base, len); +} + +static npy_datetime PyDateTimeToEpoch(PyObject *obj, NPY_DATETIMEUNIT base) { + npy_datetimestruct dts; + int ret; + + if (!PyDate_Check(obj)) { + // TODO: raise TypeError + } + PyDateTime_Date *dt = (PyDateTime_Date *)obj; + + ret = convert_pydatetime_to_datetimestruct(dt, &dts); + if (ret != 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, + "Could not convert PyDateTime to numpy datetime"); + } + // TODO: is setting errMsg required? + //((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + // return NULL; + } + + npy_datetime npy_dt = npy_datetimestruct_to_datetime(NPY_FR_ns, &dts); + return NpyDateTimeToEpoch(npy_dt, base); +} + +static char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, size_t *outLen) { + PyObject *obj = (PyObject *)_obj; + PyObject *str; + PyObject *tmp; + + str = PyObject_CallMethod(obj, "isoformat", NULL); + if (str == NULL) { + PRINTMARK(); + *outLen = 0; + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_ValueError, "Failed to convert time"); + } + ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + return NULL; + } + if (PyUnicode_Check(str)) { + tmp = str; + str = PyUnicode_AsUTF8String(str); + Py_DECREF(tmp); + } + + GET_TC(tc)->newObj = str; + + *outLen = PyBytes_GET_SIZE(str); + char *outValue = PyBytes_AS_STRING(str); + return outValue; +} + +//============================================================================= +// Numpy array iteration functions +//============================================================================= + +static void NpyArr_freeItemValue(JSOBJ Py_UNUSED(_obj), JSONTypeContext *tc) { + if (GET_TC(tc)->npyarr && + GET_TC(tc)->itemValue != GET_TC(tc)->npyarr->array) { + PRINTMARK(); + Py_XDECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } +} + +int NpyArr_iterNextNone(JSOBJ Py_UNUSED(_obj), JSONTypeContext *Py_UNUSED(tc)) { + return 0; +} + +void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { + PyArrayObject *obj; + NpyArrContext *npyarr; + + if (GET_TC(tc)->newObj) { + obj = (PyArrayObject *)GET_TC(tc)->newObj; + } else { + obj = (PyArrayObject *)_obj; + } + + PRINTMARK(); + npyarr = PyObject_Malloc(sizeof(NpyArrContext)); + GET_TC(tc)->npyarr = npyarr; + + if (!npyarr) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + return; + } + + npyarr->array = (PyObject *)obj; + npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem; + npyarr->dataptr = PyArray_DATA(obj); + npyarr->ndim = PyArray_NDIM(obj) - 1; + npyarr->curdim = 0; + npyarr->type_num = PyArray_DESCR(obj)->type_num; + + if (GET_TC(tc)->transpose) { + npyarr->dim = PyArray_DIM(obj, npyarr->ndim); + npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim); + npyarr->stridedim = npyarr->ndim; + npyarr->index[npyarr->ndim] = 0; + npyarr->inc = -1; + } else { + npyarr->dim = PyArray_DIM(obj, 0); + npyarr->stride = PyArray_STRIDE(obj, 0); + npyarr->stridedim = 0; + npyarr->index[0] = 0; + npyarr->inc = 1; + } + + npyarr->columnLabels = GET_TC(tc)->columnLabels; + npyarr->rowLabels = GET_TC(tc)->rowLabels; +} + +void NpyArr_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + PRINTMARK(); + + if (npyarr) { + NpyArr_freeItemValue(obj, tc); + PyObject_Free(npyarr); + } +} + +void NpyArrPassThru_iterBegin(JSOBJ Py_UNUSED(obj), + JSONTypeContext *Py_UNUSED(tc)) { + PRINTMARK(); +} + +void NpyArrPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + PRINTMARK(); + // finished this dimension, reset the data pointer + npyarr->curdim--; + npyarr->dataptr -= npyarr->stride * npyarr->index[npyarr->stridedim]; + npyarr->stridedim -= npyarr->inc; + npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + npyarr->dataptr += npyarr->stride; + + NpyArr_freeItemValue(obj, tc); +} + +int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + PRINTMARK(); + + if (PyErr_Occurred()) { + return 0; + } + + if (npyarr->index[npyarr->stridedim] >= npyarr->dim) { + PRINTMARK(); + return 0; + } + + NpyArr_freeItemValue(obj, tc); + + if (PyArray_ISDATETIME(npyarr->array)) { + PRINTMARK(); + GET_TC(tc)->itemValue = obj; + Py_INCREF(obj); + ((PyObjectEncoder *)tc->encoder)->npyType = PyArray_TYPE(npyarr->array); + ((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr; + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; + } else { + PRINTMARK(); + GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array); + } + + npyarr->dataptr += npyarr->stride; + npyarr->index[npyarr->stridedim]++; + return 1; +} + +int NpyArr_iterNext(JSOBJ _obj, JSONTypeContext *tc) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + PRINTMARK(); + + if (PyErr_Occurred()) { + PRINTMARK(); + return 0; + } + + if (npyarr->curdim >= npyarr->ndim || + npyarr->index[npyarr->stridedim] >= npyarr->dim) { + PRINTMARK(); + // innermost dimension, start retrieving item values + GET_TC(tc)->iterNext = NpyArr_iterNextItem; + return NpyArr_iterNextItem(_obj, tc); + } + + // dig a dimension deeper + npyarr->index[npyarr->stridedim]++; + + npyarr->curdim++; + npyarr->stridedim += npyarr->inc; + npyarr->dim = PyArray_DIM(npyarr->array, npyarr->stridedim); + npyarr->stride = PyArray_STRIDE(npyarr->array, npyarr->stridedim); + npyarr->index[npyarr->stridedim] = 0; + + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr; + GET_TC(tc)->itemValue = npyarr->array; + return 1; +} + +JSOBJ NpyArr_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PRINTMARK(); + return GET_TC(tc)->itemValue; +} + +char *NpyArr_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + NpyArrContext *npyarr = GET_TC(tc)->npyarr; + npy_intp idx; + PRINTMARK(); + char *cStr; + + if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { + idx = npyarr->index[npyarr->stridedim] - 1; + cStr = npyarr->columnLabels[idx]; + } else { + idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1; + cStr = npyarr->rowLabels[idx]; + } + + *outLen = strlen(cStr); + + return cStr; +} + +//============================================================================= +// Pandas block iteration functions +// +// Serialises a DataFrame column by column to avoid unnecessary data copies and +// more representative serialisation when dealing with mixed dtypes. +// +// Uses a dedicated NpyArrContext for each column. +//============================================================================= + +void PdBlockPassThru_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + PRINTMARK(); + + if (blkCtxt->transpose) { + blkCtxt->colIdx++; + } else { + blkCtxt->colIdx = 0; + } + + NpyArr_freeItemValue(obj, tc); +} + +int PdBlock_iterNextItem(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + PRINTMARK(); + + if (blkCtxt->colIdx >= blkCtxt->ncols) { + return 0; + } + + GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; + blkCtxt->colIdx++; + return NpyArr_iterNextItem(obj, tc); +} + +char *PdBlock_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + NpyArrContext *npyarr = blkCtxt->npyCtxts[0]; + npy_intp idx; + char *cStr; + PRINTMARK(); + + if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) { + idx = blkCtxt->colIdx - 1; + cStr = npyarr->columnLabels[idx]; + } else { + idx = GET_TC(tc)->iterNext != PdBlock_iterNext + ? npyarr->index[npyarr->stridedim - npyarr->inc] - 1 + : npyarr->index[npyarr->stridedim]; + + cStr = npyarr->rowLabels[idx]; + } + + *outLen = strlen(cStr); + return cStr; +} + +char *PdBlock_iterGetName_Transpose(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + NpyArrContext *npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; + npy_intp idx; + char *cStr; + PRINTMARK(); + + if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) { + idx = npyarr->index[npyarr->stridedim] - 1; + cStr = npyarr->columnLabels[idx]; + } else { + idx = blkCtxt->colIdx; + cStr = npyarr->rowLabels[idx]; + } + + *outLen = strlen(cStr); + return cStr; +} + +int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + NpyArrContext *npyarr; + PRINTMARK(); + + if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { + return 0; + } + + if (blkCtxt->transpose) { + if (blkCtxt->colIdx >= blkCtxt->ncols) { + return 0; + } + } else { + npyarr = blkCtxt->npyCtxts[0]; + if (npyarr->index[npyarr->stridedim] >= npyarr->dim) { + return 0; + } + } + + ((PyObjectEncoder *)tc->encoder)->blkCtxtPassthru = blkCtxt; + GET_TC(tc)->itemValue = obj; + + return 1; +} + +void PdBlockPassThru_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PdBlockContext *blkCtxt = GET_TC(tc)->pdblock; + PRINTMARK(); + + if (blkCtxt->transpose) { + // if transposed we exhaust each column before moving to the next + GET_TC(tc)->iterNext = NpyArr_iterNextItem; + GET_TC(tc)->iterGetName = PdBlock_iterGetName_Transpose; + GET_TC(tc)->npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx]; + } +} + +void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) { + PyObject *obj, *blocks, *block, *values, *tmp; + PyArrayObject *locs; + PdBlockContext *blkCtxt; + NpyArrContext *npyarr; + Py_ssize_t i; + PyArray_Descr *dtype; + NpyIter *iter; + NpyIter_IterNextFunc *iternext; + npy_int64 **dataptr; + npy_int64 colIdx; + npy_intp idx; + + PRINTMARK(); + + i = 0; + blocks = NULL; + dtype = PyArray_DescrFromType(NPY_INT64); + obj = (PyObject *)_obj; + + GET_TC(tc)->iterGetName = GET_TC(tc)->transpose + ? PdBlock_iterGetName_Transpose + : PdBlock_iterGetName; + + blkCtxt = PyObject_Malloc(sizeof(PdBlockContext)); + if (!blkCtxt) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto BLKRET; + } + GET_TC(tc)->pdblock = blkCtxt; + + blkCtxt->colIdx = 0; + blkCtxt->transpose = GET_TC(tc)->transpose; + blkCtxt->ncols = get_attr_length(obj, "columns"); + + if (blkCtxt->ncols == 0) { + blkCtxt->npyCtxts = NULL; + blkCtxt->cindices = NULL; + + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto BLKRET; + } + + blkCtxt->npyCtxts = + PyObject_Malloc(sizeof(NpyArrContext *) * blkCtxt->ncols); + if (!blkCtxt->npyCtxts) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto BLKRET; + } + for (i = 0; i < blkCtxt->ncols; i++) { + blkCtxt->npyCtxts[i] = NULL; + } + + blkCtxt->cindices = PyObject_Malloc(sizeof(int) * blkCtxt->ncols); + if (!blkCtxt->cindices) { + PyErr_NoMemory(); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto BLKRET; + } + + blocks = get_sub_attr(obj, "_data", "blocks"); + if (!blocks) { + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto BLKRET; + } + + // force transpose so each NpyArrContext strides down its column + GET_TC(tc)->transpose = 1; + + for (i = 0; i < PyObject_Length(blocks); i++) { + block = get_item(blocks, i); + if (!block) { + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto BLKRET; + } + + tmp = get_values(block); + if (!tmp) { + ((JSONObjectEncoder *)tc->encoder)->errorMsg = ""; + Py_DECREF(block); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto BLKRET; + } + + values = PyArray_Transpose((PyArrayObject *)tmp, NULL); + Py_DECREF(tmp); + if (!values) { + Py_DECREF(block); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto BLKRET; + } + + locs = (PyArrayObject *)get_sub_attr(block, "mgr_locs", "as_array"); + if (!locs) { + Py_DECREF(block); + Py_DECREF(values); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto BLKRET; + } + + iter = NpyIter_New(locs, NPY_ITER_READONLY, NPY_KEEPORDER, + NPY_NO_CASTING, dtype); + if (!iter) { + Py_DECREF(block); + Py_DECREF(values); + Py_DECREF(locs); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto BLKRET; + } + iternext = NpyIter_GetIterNext(iter, NULL); + if (!iternext) { + NpyIter_Deallocate(iter); + Py_DECREF(block); + Py_DECREF(values); + Py_DECREF(locs); + GET_TC(tc)->iterNext = NpyArr_iterNextNone; + goto BLKRET; + } + dataptr = (npy_int64 **)NpyIter_GetDataPtrArray(iter); + do { + colIdx = **dataptr; + idx = NpyIter_GetIterIndex(iter); + + blkCtxt->cindices[colIdx] = idx; + + // Reference freed in Pdblock_iterend + Py_INCREF(values); + GET_TC(tc)->newObj = values; + + // init a dedicated context for this column + NpyArr_iterBegin(obj, tc); + npyarr = GET_TC(tc)->npyarr; + + // set the dataptr to our desired column and initialise + if (npyarr != NULL) { + npyarr->dataptr += npyarr->stride * idx; + NpyArr_iterNext(obj, tc); + } + GET_TC(tc)->itemValue = NULL; + ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = NULL; + + blkCtxt->npyCtxts[colIdx] = npyarr; + GET_TC(tc)->newObj = NULL; + } while (iternext(iter)); + + NpyIter_Deallocate(iter); + Py_DECREF(block); + Py_DECREF(values); + Py_DECREF(locs); + } + GET_TC(tc)->npyarr = blkCtxt->npyCtxts[0]; + +BLKRET: + Py_XDECREF(dtype); + Py_XDECREF(blocks); +} + +void PdBlock_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + PdBlockContext *blkCtxt; + NpyArrContext *npyarr; + int i; + PRINTMARK(); + + GET_TC(tc)->itemValue = NULL; + npyarr = GET_TC(tc)->npyarr; + + blkCtxt = GET_TC(tc)->pdblock; + + if (blkCtxt) { + for (i = 0; i < blkCtxt->ncols; i++) { + npyarr = blkCtxt->npyCtxts[i]; + if (npyarr) { + if (npyarr->array) { + Py_DECREF(npyarr->array); + npyarr->array = NULL; + } + + GET_TC(tc)->npyarr = npyarr; + NpyArr_iterEnd(obj, tc); + + blkCtxt->npyCtxts[i] = NULL; + } + } + + if (blkCtxt->npyCtxts) { + PyObject_Free(blkCtxt->npyCtxts); + } + if (blkCtxt->cindices) { + PyObject_Free(blkCtxt->cindices); + } + PyObject_Free(blkCtxt); + } +} + +//============================================================================= +// Tuple iteration functions +// itemValue is borrowed reference, no ref counting +//============================================================================= +void Tuple_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyTuple_GET_SIZE((PyObject *)obj); + GET_TC(tc)->itemValue = NULL; +} + +int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc) { + PyObject *item; + + if (GET_TC(tc)->index >= GET_TC(tc)->size) { + return 0; + } + + item = PyTuple_GET_ITEM(obj, GET_TC(tc)->index); + + GET_TC(tc)->itemValue = item; + GET_TC(tc)->index++; + return 1; +} + +void Tuple_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} + +JSOBJ Tuple_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Tuple_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), + size_t *Py_UNUSED(outLen)) { + return NULL; +} + +//============================================================================= +// Iterator iteration functions +// itemValue is borrowed reference, no ref counting +//============================================================================= +void Iter_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->itemValue = NULL; + GET_TC(tc)->iterator = PyObject_GetIter(obj); +} + +int Iter_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObject *item; + + if (GET_TC(tc)->itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + item = PyIter_Next(GET_TC(tc)->iterator); + + if (item == NULL) { + return 0; + } + + GET_TC(tc)->itemValue = item; + return 1; +} + +void Iter_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (GET_TC(tc)->itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + if (GET_TC(tc)->iterator) { + Py_DECREF(GET_TC(tc)->iterator); + GET_TC(tc)->iterator = NULL; + } +} + +JSOBJ Iter_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Iter_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), + size_t *Py_UNUSED(outLen)) { + return NULL; +} + +//============================================================================= +// Dir iteration functions +// itemName ref is borrowed from PyObject_Dir (attrList). No refcount +// itemValue ref is from PyObject_GetAttr. Ref counted +//============================================================================= +void Dir_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->attrList = PyObject_Dir(obj); + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyList_GET_SIZE(GET_TC(tc)->attrList); + PRINTMARK(); +} + +void Dir_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (GET_TC(tc)->itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = NULL; + } + + if (GET_TC(tc)->itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + + Py_DECREF((PyObject *)GET_TC(tc)->attrList); + PRINTMARK(); +} + +int Dir_iterNext(JSOBJ _obj, JSONTypeContext *tc) { + PyObject *obj = (PyObject *)_obj; + PyObject *itemValue = GET_TC(tc)->itemValue; + PyObject *itemName = GET_TC(tc)->itemName; + PyObject *attr; + PyObject *attrName; + char *attrStr; + + if (PyErr_Occurred() || ((JSONObjectEncoder *)tc->encoder)->errorMsg) { + return 0; + } + + if (itemValue) { + Py_DECREF(GET_TC(tc)->itemValue); + GET_TC(tc)->itemValue = itemValue = NULL; + } + + if (itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = itemName = NULL; + } + + for (; GET_TC(tc)->index < GET_TC(tc)->size; GET_TC(tc)->index++) { + attrName = PyList_GET_ITEM(GET_TC(tc)->attrList, GET_TC(tc)->index); + attr = PyUnicode_AsUTF8String(attrName); + attrStr = PyBytes_AS_STRING(attr); + + if (attrStr[0] == '_') { + PRINTMARK(); + Py_DECREF(attr); + continue; + } + + itemValue = PyObject_GetAttr(obj, attrName); + if (itemValue == NULL) { + PyErr_Clear(); + Py_DECREF(attr); + PRINTMARK(); + continue; + } + + if (PyCallable_Check(itemValue)) { + Py_DECREF(itemValue); + Py_DECREF(attr); + PRINTMARK(); + continue; + } + + GET_TC(tc)->itemName = itemName; + GET_TC(tc)->itemValue = itemValue; + GET_TC(tc)->index++; + + PRINTMARK(); + itemName = attr; + break; + } + + if (itemName == NULL) { + GET_TC(tc)->index = GET_TC(tc)->size; + GET_TC(tc)->itemValue = NULL; + return 0; + } + + GET_TC(tc)->itemName = itemName; + GET_TC(tc)->itemValue = itemValue; + GET_TC(tc)->index++; + + PRINTMARK(); + return 1; +} + +JSOBJ Dir_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PRINTMARK(); + return GET_TC(tc)->itemValue; +} + +char *Dir_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + PRINTMARK(); + *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName); + return PyBytes_AS_STRING(GET_TC(tc)->itemName); +} + +//============================================================================= +// List iteration functions +// itemValue is borrowed from object (which is list). No refcounting +//============================================================================= +void List_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + GET_TC(tc)->size = PyList_GET_SIZE((PyObject *)obj); +} + +int List_iterNext(JSOBJ obj, JSONTypeContext *tc) { + if (GET_TC(tc)->index >= GET_TC(tc)->size) { + PRINTMARK(); + return 0; + } + + GET_TC(tc)->itemValue = PyList_GET_ITEM(obj, GET_TC(tc)->index); + GET_TC(tc)->index++; + return 1; +} + +void List_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) {} + +JSOBJ List_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *List_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc), + size_t *Py_UNUSED(outLen)) { + return NULL; +} + +//============================================================================= +// pandas Index iteration functions +//============================================================================= +void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } + PRINTMARK(); +} + +int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) { + Py_ssize_t index; + if (!GET_TC(tc)->cStr) { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) { + memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); + } else if (index == 1) { + memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); + GET_TC(tc)->itemValue = get_values(obj); + if (!GET_TC(tc)->itemValue) { + return 0; + } + } else { + PRINTMARK(); + return 0; + } + + GET_TC(tc)->index++; + PRINTMARK(); + return 1; +} + +void Index_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *Py_UNUSED(tc)) { + PRINTMARK(); +} + +JSOBJ Index_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Index_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = strlen(GET_TC(tc)->cStr); + return GET_TC(tc)->cStr; +} + +//============================================================================= +// pandas Series iteration functions +//============================================================================= +void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); + enc->outputFormat = VALUES; // for contained series + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } + PRINTMARK(); +} + +int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) { + Py_ssize_t index; + if (!GET_TC(tc)->cStr) { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) { + memcpy(GET_TC(tc)->cStr, "name", sizeof(char) * 5); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); + } else if (index == 1) { + memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); + } else if (index == 2) { + memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); + GET_TC(tc)->itemValue = get_values(obj); + if (!GET_TC(tc)->itemValue) { + return 0; + } + } else { + PRINTMARK(); + return 0; + } + + GET_TC(tc)->index++; + PRINTMARK(); + return 1; +} + +void Series_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + enc->outputFormat = enc->originalOutputFormat; + PRINTMARK(); +} + +JSOBJ Series_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Series_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = strlen(GET_TC(tc)->cStr); + return GET_TC(tc)->cStr; +} + +//============================================================================= +// pandas DataFrame iteration functions +//============================================================================= +void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char)); + enc->outputFormat = VALUES; // for contained series & index + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } + PRINTMARK(); +} + +int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) { + Py_ssize_t index; + if (!GET_TC(tc)->cStr) { + return 0; + } + + index = GET_TC(tc)->index; + Py_XDECREF(GET_TC(tc)->itemValue); + if (index == 0) { + memcpy(GET_TC(tc)->cStr, "columns", sizeof(char) * 8); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns"); + } else if (index == 1) { + memcpy(GET_TC(tc)->cStr, "index", sizeof(char) * 6); + GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); + } else if (index == 2) { + memcpy(GET_TC(tc)->cStr, "data", sizeof(char) * 5); + if (is_simple_frame(obj)) { + GET_TC(tc)->itemValue = get_values(obj); + if (!GET_TC(tc)->itemValue) { + return 0; + } + } else { + Py_INCREF(obj); + GET_TC(tc)->itemValue = obj; + } + } else { + PRINTMARK(); + return 0; + } + + GET_TC(tc)->index++; + PRINTMARK(); + return 1; +} + +void DataFrame_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; + enc->outputFormat = enc->originalOutputFormat; + PRINTMARK(); +} + +JSOBJ DataFrame_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *DataFrame_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = strlen(GET_TC(tc)->cStr); + return GET_TC(tc)->cStr; +} + +//============================================================================= +// Dict iteration functions +// itemName might converted to string (Python_Str). Do refCounting +// itemValue is borrowed from object (which is dict). No refCounting +//============================================================================= +void Dict_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + GET_TC(tc)->index = 0; + PRINTMARK(); +} + +int Dict_iterNext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PyObject *itemNameTmp; + + if (GET_TC(tc)->itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + + if (!PyDict_Next((PyObject *)GET_TC(tc)->dictObj, &GET_TC(tc)->index, + &GET_TC(tc)->itemName, &GET_TC(tc)->itemValue)) { + PRINTMARK(); + return 0; + } + + if (PyUnicode_Check(GET_TC(tc)->itemName)) { + GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName); + } else if (!PyBytes_Check(GET_TC(tc)->itemName)) { + GET_TC(tc)->itemName = PyObject_Str(GET_TC(tc)->itemName); + itemNameTmp = GET_TC(tc)->itemName; + GET_TC(tc)->itemName = PyUnicode_AsUTF8String(GET_TC(tc)->itemName); + Py_DECREF(itemNameTmp); + } else { + Py_INCREF(GET_TC(tc)->itemName); + } + PRINTMARK(); + return 1; +} + +void Dict_iterEnd(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + if (GET_TC(tc)->itemName) { + Py_DECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = NULL; + } + Py_DECREF(GET_TC(tc)->dictObj); + PRINTMARK(); +} + +JSOBJ Dict_iterGetValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->itemValue; +} + +char *Dict_iterGetName(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc, + size_t *outLen) { + *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName); + return PyBytes_AS_STRING(GET_TC(tc)->itemName); +} + +void NpyArr_freeLabels(char **labels, npy_intp len) { + npy_intp i; + + if (labels) { + for (i = 0; i < len; i++) { + PyObject_Free(labels[i]); + } + PyObject_Free(labels); + } +} + +/* + * Function: NpyArr_encodeLabels + * ----------------------------- + * + * Builds an array of "encoded" labels. + * + * labels: PyArrayObject pointer for labels to be "encoded" + * num : number of labels + * + * "encode" is quoted above because we aren't really doing encoding + * For historical reasons this function would actually encode the entire + * array into a separate buffer with a separate call to JSON_Encode + * and would leave it to complex pointer manipulation from there to + * unpack values as needed. To make things simpler and more idiomatic + * this has instead just stringified any input save for datetime values, + * which may need to be represented in various formats. + */ +char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc, + npy_intp num) { + // NOTE this function steals a reference to labels. + PyObject *item = NULL; + size_t len; + npy_intp i, stride; + char **ret; + char *dataptr, *cLabel; + int type_num; + NPY_DATETIMEUNIT base = enc->datetimeUnit; + PRINTMARK(); + + if (!labels) { + return 0; + } + + if (PyArray_SIZE(labels) < num) { + PyErr_SetString( + PyExc_ValueError, + "Label array sizes do not match corresponding data shape"); + Py_DECREF(labels); + return 0; + } + + ret = PyObject_Malloc(sizeof(char *) * num); + if (!ret) { + PyErr_NoMemory(); + Py_DECREF(labels); + return 0; + } + + for (i = 0; i < num; i++) { + ret[i] = NULL; + } + + stride = PyArray_STRIDE(labels, 0); + dataptr = PyArray_DATA(labels); + type_num = PyArray_TYPE(labels); + + for (i = 0; i < num; i++) { + item = PyArray_GETITEM(labels, dataptr); + if (!item) { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + int is_datetimelike = 0; + npy_int64 nanosecVal; + if (PyTypeNum_ISDATETIME(type_num)) { + is_datetimelike = 1; + PyArray_VectorUnaryFunc *castfunc = + PyArray_GetCastFunc(PyArray_DescrFromType(type_num), NPY_INT64); + if (!castfunc) { + PyErr_Format(PyExc_ValueError, + "Cannot cast numpy dtype %d to long", + enc->npyType); + } + castfunc(dataptr, &nanosecVal, 1, NULL, NULL); + } else if (PyDate_Check(item) || PyDelta_Check(item)) { + is_datetimelike = 1; + if (PyObject_HasAttrString(item, "value")) { + nanosecVal = get_long_attr(item, "value"); + } else { + if (PyDelta_Check(item)) { + nanosecVal = total_seconds(item) * + 1000000000LL; // nanoseconds per second + } else { + // datetime.* objects don't follow above rules + nanosecVal = PyDateTimeToEpoch(item, NPY_FR_ns); + } + } + } + + if (is_datetimelike) { + if (nanosecVal == get_nat()) { + len = 5; // TODO: shouldn't require extra space for terminator + cLabel = PyObject_Malloc(len); + strncpy(cLabel, "null", len); + } else { + if (enc->datetimeIso) { + // TODO: Vectorized Timedelta function + if ((type_num == NPY_TIMEDELTA) || (PyDelta_Check(item))) { + PyObject *td = + PyObject_CallFunction(cls_timedelta, "(O)", item); + if (td == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + PyObject *iso = + PyObject_CallMethod(td, "isoformat", NULL); + Py_DECREF(td); + if (iso == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + len = strlen(PyUnicode_AsUTF8(iso)); + cLabel = PyObject_Malloc(len + 1); + memcpy(cLabel, PyUnicode_AsUTF8(iso), len + 1); + Py_DECREF(iso); + } else { + if (type_num == NPY_DATETIME) { + cLabel = int64ToIso(nanosecVal, base, &len); + } else { + cLabel = PyDateTimeToIso((PyDateTime_Date *)item, + base, &len); + } + } + if (cLabel == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + } else { + cLabel = PyObject_Malloc(21); // 21 chars for int64 + sprintf(cLabel, "%" NPY_DATETIME_FMT, + NpyDateTimeToEpoch(nanosecVal, base)); + len = strlen(cLabel); + } + } + } else { // Fallback to string representation + PyObject *str = PyObject_Str(item); + if (str == NULL) { + Py_DECREF(item); + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + cLabel = (char *)PyUnicode_AsUTF8(str); + Py_DECREF(str); + len = strlen(cLabel); + } + + Py_DECREF(item); + // Add 1 to include NULL terminator + ret[i] = PyObject_Malloc(len + 1); + memcpy(ret[i], cLabel, len + 1); + + if (is_datetimelike) { + PyObject_Free(cLabel); + } + + if (PyErr_Occurred()) { + NpyArr_freeLabels(ret, num); + ret = 0; + break; + } + + if (!ret[i]) { + PyErr_NoMemory(); + ret = 0; + break; + } + + dataptr += stride; + } + + Py_DECREF(labels); + return ret; +} + +void Object_invokeDefaultHandler(PyObject *obj, PyObjectEncoder *enc) { + PyObject *tmpObj = NULL; + PRINTMARK(); + tmpObj = PyObject_CallFunctionObjArgs(enc->defaultHandler, obj, NULL); + if (!PyErr_Occurred()) { + if (tmpObj == NULL) { + PyErr_SetString(PyExc_TypeError, + "Failed to execute default handler"); + } else { + encode(tmpObj, (JSONObjectEncoder *)enc, NULL, 0); + } + } + Py_XDECREF(tmpObj); + return; +} + +void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) { + PyObject *obj, *exc, *toDictFunc, *tmpObj, *values; + TypeContext *pc; + PyObjectEncoder *enc; + double val; + npy_int64 value; + int unit; + PRINTMARK(); + + tc->prv = NULL; + + if (!_obj) { + tc->type = JT_INVALID; + return; + } + + obj = (PyObject *)_obj; + enc = (PyObjectEncoder *)tc->encoder; + + if (PyBool_Check(obj)) { + PRINTMARK(); + tc->type = (obj == Py_True) ? JT_TRUE : JT_FALSE; + return; + } else if (obj == Py_None) { + PRINTMARK(); + tc->type = JT_NULL; + return; + } + + pc = createTypeContext(); + if (!pc) { + tc->type = JT_INVALID; + return; + } + tc->prv = pc; + + if (PyTypeNum_ISDATETIME(enc->npyType)) { + PRINTMARK(); + int64_t longVal; + PyArray_VectorUnaryFunc *castfunc = + PyArray_GetCastFunc(PyArray_DescrFromType(enc->npyType), NPY_INT64); + if (!castfunc) { + PyErr_Format(PyExc_ValueError, "Cannot cast numpy dtype %d to long", + enc->npyType); + } + castfunc(enc->npyValue, &longVal, 1, NULL, NULL); + if (longVal == get_nat()) { + PRINTMARK(); + tc->type = JT_NULL; + } else { + + if (enc->datetimeIso) { + PRINTMARK(); + pc->PyTypeToUTF8 = NpyDateTimeToIsoCallback; + // Currently no way to pass longVal to iso function, so use + // state management + GET_TC(tc)->longValue = longVal; + tc->type = JT_UTF8; + } else { + PRINTMARK(); + NPY_DATETIMEUNIT base = + ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->longValue = NpyDateTimeToEpoch(longVal, base); + tc->type = JT_LONG; + } + } + + // TODO: this prevents infinite loop with mixed-type DataFrames; + // refactor + enc->npyCtxtPassthru = NULL; + enc->npyType = -1; + return; + } + + if (PyIter_Check(obj) || + (PyArray_Check(obj) && !PyArray_CheckScalar(obj))) { + PRINTMARK(); + goto ISITERABLE; + } + + if (PyLong_Check(obj)) { + PRINTMARK(); + tc->type = JT_LONG; + GET_TC(tc)->longValue = PyLong_AsLongLong(obj); + + exc = PyErr_Occurred(); + + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { + PRINTMARK(); + goto INVALID; + } + + return; + } else if (PyFloat_Check(obj)) { + PRINTMARK(); + val = PyFloat_AS_DOUBLE(obj); + if (npy_isnan(val) || npy_isinf(val)) { + tc->type = JT_NULL; + } else { + GET_TC(tc)->doubleValue = val; + tc->type = JT_DOUBLE; + } + return; + } else if (PyBytes_Check(obj)) { + PRINTMARK(); + pc->PyTypeToUTF8 = PyBytesToUTF8; + tc->type = JT_UTF8; + return; + } else if (PyUnicode_Check(obj)) { + PRINTMARK(); + pc->PyTypeToUTF8 = PyUnicodeToUTF8; + tc->type = JT_UTF8; + return; + } else if (PyObject_TypeCheck(obj, type_decimal)) { + PRINTMARK(); + GET_TC(tc)->doubleValue = PyFloat_AsDouble(obj); + tc->type = JT_DOUBLE; + return; + } else if (PyDateTime_Check(obj) || PyDate_Check(obj)) { + if (PyObject_TypeCheck(obj, cls_nat)) { + PRINTMARK(); + tc->type = JT_NULL; + return; + } + + PRINTMARK(); + if (enc->datetimeIso) { + PRINTMARK(); + pc->PyTypeToUTF8 = PyDateTimeToIsoCallback; + tc->type = JT_UTF8; + } else { + PRINTMARK(); + NPY_DATETIMEUNIT base = + ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); + tc->type = JT_LONG; + } + return; + } else if (PyTime_Check(obj)) { + PRINTMARK(); + pc->PyTypeToUTF8 = PyTimeToJSON; + tc->type = JT_UTF8; + return; + } else if (PyArray_IsScalar(obj, Datetime)) { + PRINTMARK(); + if (((PyDatetimeScalarObject *)obj)->obval == get_nat()) { + PRINTMARK(); + tc->type = JT_NULL; + return; + } + + PRINTMARK(); + if (enc->datetimeIso) { + PRINTMARK(); + pc->PyTypeToUTF8 = PyDateTimeToIsoCallback; + tc->type = JT_UTF8; + } else { + PRINTMARK(); + NPY_DATETIMEUNIT base = + ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + GET_TC(tc)->longValue = PyDateTimeToEpoch(obj, base); + tc->type = JT_LONG; + } + return; + } else if (PyDelta_Check(obj)) { + if (PyObject_HasAttrString(obj, "value")) { + PRINTMARK(); + value = get_long_attr(obj, "value"); + } else { + PRINTMARK(); + value = total_seconds(obj) * 1000000000LL; // nanoseconds per second + } + + unit = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; + if (scaleNanosecToUnit(&value, unit) != 0) { + // TODO: Add some kind of error handling here + } + + exc = PyErr_Occurred(); + + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { + PRINTMARK(); + goto INVALID; + } + + if (value == get_nat()) { + PRINTMARK(); + tc->type = JT_NULL; + return; + } + + GET_TC(tc)->longValue = value; + + PRINTMARK(); + tc->type = JT_LONG; + return; + } else if (PyArray_IsScalar(obj, Integer)) { + PRINTMARK(); + tc->type = JT_LONG; + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), + PyArray_DescrFromType(NPY_INT64)); + + exc = PyErr_Occurred(); + + if (exc && PyErr_ExceptionMatches(PyExc_OverflowError)) { + PRINTMARK(); + goto INVALID; + } + + return; + } else if (PyArray_IsScalar(obj, Bool)) { + PRINTMARK(); + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->longValue), + PyArray_DescrFromType(NPY_BOOL)); + tc->type = (GET_TC(tc)->longValue) ? JT_TRUE : JT_FALSE; + return; + } else if (PyArray_IsScalar(obj, Float) || PyArray_IsScalar(obj, Double)) { + PRINTMARK(); + PyArray_CastScalarToCtype(obj, &(GET_TC(tc)->doubleValue), + PyArray_DescrFromType(NPY_DOUBLE)); + tc->type = JT_DOUBLE; + return; + } else if (PyArray_Check(obj) && PyArray_CheckScalar(obj)) { + PyErr_Format(PyExc_TypeError, + "%R (0d array) is not JSON serializable at the moment", + obj); + goto INVALID; + } else if (PyObject_TypeCheck(obj, cls_na)) { + PRINTMARK(); + tc->type = JT_NULL; + return; + } + +ISITERABLE: + + if (PyObject_TypeCheck(obj, cls_index)) { + if (enc->outputFormat == SPLIT) { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Index_iterBegin; + pc->iterEnd = Index_iterEnd; + pc->iterNext = Index_iterNext; + pc->iterGetValue = Index_iterGetValue; + pc->iterGetName = Index_iterGetName; + return; + } + + pc->newObj = get_values(obj); + if (pc->newObj) { + PRINTMARK(); + tc->type = JT_ARRAY; + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + } else { + goto INVALID; + } + + return; + } else if (PyObject_TypeCheck(obj, cls_series)) { + if (enc->outputFormat == SPLIT) { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Series_iterBegin; + pc->iterEnd = Series_iterEnd; + pc->iterNext = Series_iterNext; + pc->iterGetValue = Series_iterGetValue; + pc->iterGetName = Series_iterGetName; + return; + } + + pc->newObj = get_values(obj); + if (!pc->newObj) { + goto INVALID; + } + + if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) { + PRINTMARK(); + tc->type = JT_OBJECT; + tmpObj = PyObject_GetAttrString(obj, "index"); + if (!tmpObj) { + goto INVALID; + } + values = get_values(tmpObj); + Py_DECREF(tmpObj); + if (!values) { + goto INVALID; + } + pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->columnLabelsLen); + if (!pc->columnLabels) { + goto INVALID; + } + } else { + PRINTMARK(); + tc->type = JT_ARRAY; + } + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } else if (PyArray_Check(obj)) { + if (enc->npyCtxtPassthru) { + PRINTMARK(); + pc->npyarr = enc->npyCtxtPassthru; + tc->type = (pc->npyarr->columnLabels ? JT_OBJECT : JT_ARRAY); + + pc->iterBegin = NpyArrPassThru_iterBegin; + pc->iterNext = NpyArr_iterNext; + pc->iterEnd = NpyArrPassThru_iterEnd; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + + enc->npyCtxtPassthru = NULL; + return; + } + + PRINTMARK(); + tc->type = JT_ARRAY; + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetValue = NpyArr_iterGetValue; + pc->iterGetName = NpyArr_iterGetName; + return; + } else if (PyObject_TypeCheck(obj, cls_dataframe)) { + if (enc->blkCtxtPassthru) { + PRINTMARK(); + pc->pdblock = enc->blkCtxtPassthru; + tc->type = + (pc->pdblock->npyCtxts[0]->columnLabels ? JT_OBJECT : JT_ARRAY); + + pc->iterBegin = PdBlockPassThru_iterBegin; + pc->iterEnd = PdBlockPassThru_iterEnd; + pc->iterNext = PdBlock_iterNextItem; + pc->iterGetName = PdBlock_iterGetName; + pc->iterGetValue = NpyArr_iterGetValue; + + enc->blkCtxtPassthru = NULL; + return; + } + + if (enc->outputFormat == SPLIT) { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = DataFrame_iterBegin; + pc->iterEnd = DataFrame_iterEnd; + pc->iterNext = DataFrame_iterNext; + pc->iterGetValue = DataFrame_iterGetValue; + pc->iterGetName = DataFrame_iterGetName; + return; + } + + PRINTMARK(); + if (is_simple_frame(obj)) { + pc->iterBegin = NpyArr_iterBegin; + pc->iterEnd = NpyArr_iterEnd; + pc->iterNext = NpyArr_iterNext; + pc->iterGetName = NpyArr_iterGetName; + + pc->newObj = get_values(obj); + if (!pc->newObj) { + goto INVALID; + } + } else { + pc->iterBegin = PdBlock_iterBegin; + pc->iterEnd = PdBlock_iterEnd; + pc->iterNext = PdBlock_iterNext; + pc->iterGetName = PdBlock_iterGetName; + } + pc->iterGetValue = NpyArr_iterGetValue; + + if (enc->outputFormat == VALUES) { + PRINTMARK(); + tc->type = JT_ARRAY; + } else if (enc->outputFormat == RECORDS) { + PRINTMARK(); + tc->type = JT_ARRAY; + tmpObj = PyObject_GetAttrString(obj, "columns"); + if (!tmpObj) { + goto INVALID; + } + values = get_values(tmpObj); + if (!values) { + Py_DECREF(tmpObj); + goto INVALID; + } + pc->columnLabelsLen = PyObject_Size(tmpObj); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->columnLabelsLen); + Py_DECREF(tmpObj); + if (!pc->columnLabels) { + goto INVALID; + } + } else if (enc->outputFormat == INDEX || enc->outputFormat == COLUMNS) { + PRINTMARK(); + tc->type = JT_OBJECT; + tmpObj = (enc->outputFormat == INDEX + ? PyObject_GetAttrString(obj, "index") + : PyObject_GetAttrString(obj, "columns")); + if (!tmpObj) { + goto INVALID; + } + values = get_values(tmpObj); + if (!values) { + Py_DECREF(tmpObj); + goto INVALID; + } + pc->rowLabelsLen = PyObject_Size(tmpObj); + pc->rowLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->rowLabelsLen); + Py_DECREF(tmpObj); + tmpObj = (enc->outputFormat == INDEX + ? PyObject_GetAttrString(obj, "columns") + : PyObject_GetAttrString(obj, "index")); + if (!tmpObj) { + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + values = get_values(tmpObj); + if (!values) { + Py_DECREF(tmpObj); + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + pc->columnLabelsLen = PyObject_Size(tmpObj); + pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc, + pc->columnLabelsLen); + Py_DECREF(tmpObj); + if (!pc->columnLabels) { + NpyArr_freeLabels(pc->rowLabels, pc->rowLabelsLen); + pc->rowLabels = NULL; + goto INVALID; + } + + if (enc->outputFormat == COLUMNS) { + PRINTMARK(); + pc->transpose = 1; + } + } else { + goto INVALID; + } + return; + } else if (PyDict_Check(obj)) { + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Dict_iterBegin; + pc->iterEnd = Dict_iterEnd; + pc->iterNext = Dict_iterNext; + pc->iterGetValue = Dict_iterGetValue; + pc->iterGetName = Dict_iterGetName; + pc->dictObj = obj; + Py_INCREF(obj); + + return; + } else if (PyList_Check(obj)) { + PRINTMARK(); + tc->type = JT_ARRAY; + pc->iterBegin = List_iterBegin; + pc->iterEnd = List_iterEnd; + pc->iterNext = List_iterNext; + pc->iterGetValue = List_iterGetValue; + pc->iterGetName = List_iterGetName; + return; + } else if (PyTuple_Check(obj)) { + PRINTMARK(); + tc->type = JT_ARRAY; + pc->iterBegin = Tuple_iterBegin; + pc->iterEnd = Tuple_iterEnd; + pc->iterNext = Tuple_iterNext; + pc->iterGetValue = Tuple_iterGetValue; + pc->iterGetName = Tuple_iterGetName; + return; + } else if (PyAnySet_Check(obj)) { + PRINTMARK(); + tc->type = JT_ARRAY; + pc->iterBegin = Iter_iterBegin; + pc->iterEnd = Iter_iterEnd; + pc->iterNext = Iter_iterNext; + pc->iterGetValue = Iter_iterGetValue; + pc->iterGetName = Iter_iterGetName; + return; + } + + toDictFunc = PyObject_GetAttrString(obj, "toDict"); + + if (toDictFunc) { + PyObject *tuple = PyTuple_New(0); + PyObject *toDictResult = PyObject_Call(toDictFunc, tuple, NULL); + Py_DECREF(tuple); + Py_DECREF(toDictFunc); + + if (toDictResult == NULL) { + PyErr_Clear(); + tc->type = JT_NULL; + return; + } + + if (!PyDict_Check(toDictResult)) { + Py_DECREF(toDictResult); + tc->type = JT_NULL; + return; + } + + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Dict_iterBegin; + pc->iterEnd = Dict_iterEnd; + pc->iterNext = Dict_iterNext; + pc->iterGetValue = Dict_iterGetValue; + pc->iterGetName = Dict_iterGetName; + pc->dictObj = toDictResult; + return; + } + + PyErr_Clear(); + + if (enc->defaultHandler) { + Object_invokeDefaultHandler(obj, enc); + goto INVALID; + } + + PRINTMARK(); + tc->type = JT_OBJECT; + pc->iterBegin = Dir_iterBegin; + pc->iterEnd = Dir_iterEnd; + pc->iterNext = Dir_iterNext; + pc->iterGetValue = Dir_iterGetValue; + pc->iterGetName = Dir_iterGetName; + return; + +INVALID: + tc->type = JT_INVALID; + PyObject_Free(tc->prv); + tc->prv = NULL; + return; +} + +void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + PRINTMARK(); + if (tc->prv) { + Py_XDECREF(GET_TC(tc)->newObj); + GET_TC(tc)->newObj = NULL; + NpyArr_freeLabels(GET_TC(tc)->rowLabels, GET_TC(tc)->rowLabelsLen); + GET_TC(tc)->rowLabels = NULL; + NpyArr_freeLabels(GET_TC(tc)->columnLabels, + GET_TC(tc)->columnLabelsLen); + GET_TC(tc)->columnLabels = NULL; + + PyObject_Free(GET_TC(tc)->cStr); + GET_TC(tc)->cStr = NULL; + if (tc->prv != + &(((PyObjectEncoder *)tc->encoder)->basicTypeContext)) { // NOLINT + PyObject_Free(tc->prv); + } + tc->prv = NULL; + } +} + +const char *Object_getStringValue(JSOBJ obj, JSONTypeContext *tc, + size_t *_outLen) { + return GET_TC(tc)->PyTypeToUTF8(obj, tc, _outLen); +} + +JSINT64 Object_getLongValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->longValue; +} + +double Object_getDoubleValue(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { + return GET_TC(tc)->doubleValue; +} + +static void Object_releaseObject(JSOBJ _obj) { Py_DECREF((PyObject *)_obj); } + +void Object_iterBegin(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->iterBegin(obj, tc); +} + +int Object_iterNext(JSOBJ obj, JSONTypeContext *tc) { + return GET_TC(tc)->iterNext(obj, tc); +} + +void Object_iterEnd(JSOBJ obj, JSONTypeContext *tc) { + GET_TC(tc)->iterEnd(obj, tc); +} + +JSOBJ Object_iterGetValue(JSOBJ obj, JSONTypeContext *tc) { + return GET_TC(tc)->iterGetValue(obj, tc); +} + +char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) { + return GET_TC(tc)->iterGetName(obj, tc, outLen); +} + +PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, + PyObject *kwargs) { + static char *kwlist[] = {"obj", + "ensure_ascii", + "double_precision", + "encode_html_chars", + "orient", + "date_unit", + "iso_dates", + "default_handler", + "indent", + NULL}; + + char buffer[65536]; + char *ret; + PyObject *newobj; + PyObject *oinput = NULL; + PyObject *oensureAscii = NULL; + int idoublePrecision = 10; // default double precision setting + PyObject *oencodeHTMLChars = NULL; + char *sOrient = NULL; + char *sdateFormat = NULL; + PyObject *oisoDates = 0; + PyObject *odefHandler = 0; + int indent = 0; + + PyObjectEncoder pyEncoder = {{ + Object_beginTypeContext, + Object_endTypeContext, + Object_getStringValue, + Object_getLongValue, + NULL, // getIntValue is unused + Object_getDoubleValue, + Object_iterBegin, + Object_iterNext, + Object_iterEnd, + Object_iterGetValue, + Object_iterGetName, + Object_releaseObject, + PyObject_Malloc, + PyObject_Realloc, + PyObject_Free, + -1, // recursionMax + idoublePrecision, + 1, // forceAscii + 0, // encodeHTMLChars + 0, // indent + }}; + JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder; + + pyEncoder.npyCtxtPassthru = NULL; + pyEncoder.blkCtxtPassthru = NULL; + pyEncoder.npyType = -1; + pyEncoder.npyValue = NULL; + pyEncoder.datetimeIso = 0; + pyEncoder.datetimeUnit = NPY_FR_ms; + pyEncoder.outputFormat = COLUMNS; + pyEncoder.defaultHandler = 0; + pyEncoder.basicTypeContext.newObj = NULL; + pyEncoder.basicTypeContext.dictObj = NULL; + pyEncoder.basicTypeContext.itemValue = NULL; + pyEncoder.basicTypeContext.itemName = NULL; + pyEncoder.basicTypeContext.attrList = NULL; + pyEncoder.basicTypeContext.iterator = NULL; + pyEncoder.basicTypeContext.cStr = NULL; + pyEncoder.basicTypeContext.npyarr = NULL; + pyEncoder.basicTypeContext.rowLabels = NULL; + pyEncoder.basicTypeContext.columnLabels = NULL; + + PRINTMARK(); + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OiOssOOi", kwlist, + &oinput, &oensureAscii, &idoublePrecision, + &oencodeHTMLChars, &sOrient, &sdateFormat, + &oisoDates, &odefHandler, &indent)) { + return NULL; + } + + if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii)) { + encoder->forceASCII = 0; + } + + if (oencodeHTMLChars != NULL && PyObject_IsTrue(oencodeHTMLChars)) { + encoder->encodeHTMLChars = 1; + } + + if (idoublePrecision > JSON_DOUBLE_MAX_DECIMALS || idoublePrecision < 0) { + PyErr_Format( + PyExc_ValueError, + "Invalid value '%d' for option 'double_precision', max is '%u'", + idoublePrecision, JSON_DOUBLE_MAX_DECIMALS); + return NULL; + } + encoder->doublePrecision = idoublePrecision; + + if (sOrient != NULL) { + if (strcmp(sOrient, "records") == 0) { + pyEncoder.outputFormat = RECORDS; + } else if (strcmp(sOrient, "index") == 0) { + pyEncoder.outputFormat = INDEX; + } else if (strcmp(sOrient, "split") == 0) { + pyEncoder.outputFormat = SPLIT; + } else if (strcmp(sOrient, "values") == 0) { + pyEncoder.outputFormat = VALUES; + } else if (strcmp(sOrient, "columns") != 0) { + PyErr_Format(PyExc_ValueError, + "Invalid value '%s' for option 'orient'", sOrient); + return NULL; + } + } + + if (sdateFormat != NULL) { + if (strcmp(sdateFormat, "s") == 0) { + pyEncoder.datetimeUnit = NPY_FR_s; + } else if (strcmp(sdateFormat, "ms") == 0) { + pyEncoder.datetimeUnit = NPY_FR_ms; + } else if (strcmp(sdateFormat, "us") == 0) { + pyEncoder.datetimeUnit = NPY_FR_us; + } else if (strcmp(sdateFormat, "ns") == 0) { + pyEncoder.datetimeUnit = NPY_FR_ns; + } else { + PyErr_Format(PyExc_ValueError, + "Invalid value '%s' for option 'date_unit'", + sdateFormat); + return NULL; + } + } + + if (oisoDates != NULL && PyObject_IsTrue(oisoDates)) { + pyEncoder.datetimeIso = 1; + } + + if (odefHandler != NULL && odefHandler != Py_None) { + if (!PyCallable_Check(odefHandler)) { + PyErr_SetString(PyExc_TypeError, "Default handler is not callable"); + return NULL; + } + pyEncoder.defaultHandler = odefHandler; + } + + encoder->indent = indent; + + pyEncoder.originalOutputFormat = pyEncoder.outputFormat; + PRINTMARK(); + ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer)); + PRINTMARK(); + if (PyErr_Occurred()) { + PRINTMARK(); + return NULL; + } + + if (encoder->errorMsg) { + PRINTMARK(); + if (ret != buffer) { + encoder->free(ret); + } + + PyErr_Format(PyExc_OverflowError, "%s", encoder->errorMsg); + return NULL; + } + + newobj = PyUnicode_FromString(ret); + + if (ret != buffer) { + encoder->free(ret); + } + + PRINTMARK(); + + return newobj; +} diff --git a/pandas/_libs/src/ujson/python/ujson.c b/pandas/_libs/src/ujson/python/ujson.c new file mode 100644 index 00000000..4a88fb7a --- /dev/null +++ b/pandas/_libs/src/ujson/python/ujson.c @@ -0,0 +1,79 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +* Neither the name of the ESN Social Software AB nor the +names of its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms +* Copyright (c) 1988-1993 The Regents of the University of California. +* Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#include "version.h" +#define PY_SSIZE_T_CLEAN +#include + +/* objToJSON */ +PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs); +void initObjToJSON(void); + +/* JSONToObj */ +PyObject *JSONToObj(PyObject *self, PyObject *args, PyObject *kwargs); + +#define ENCODER_HELP_TEXT \ + "Use ensure_ascii=false to output UTF-8. Pass in double_precision to " \ + "alter the maximum digit precision of doubles. Set " \ + "encode_html_chars=True to encode < > & as unicode escape sequences." + +static PyMethodDef ujsonMethods[] = { + {"encode", (PyCFunction)objToJSON, METH_VARARGS | METH_KEYWORDS, + "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT}, + {"decode", (PyCFunction)JSONToObj, METH_VARARGS | METH_KEYWORDS, + "Converts JSON as string to dict object structure. Use precise_float=True " + "to use high precision float decoder."}, + {"dumps", (PyCFunction)objToJSON, METH_VARARGS | METH_KEYWORDS, + "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT}, + {"loads", (PyCFunction)JSONToObj, METH_VARARGS | METH_KEYWORDS, + "Converts JSON as string to dict object structure. Use precise_float=True " + "to use high precision float decoder."}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static PyModuleDef moduledef = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_libjson", + .m_methods = ujsonMethods +}; + + +PyMODINIT_FUNC PyInit_json(void) { + initObjToJSON(); // TODO: clean up, maybe via tp_free? + return PyModuleDef_Init(&moduledef); + +} diff --git a/pandas/_libs/src/ujson/python/version.h b/pandas/_libs/src/ujson/python/version.h new file mode 100644 index 00000000..ef6d28bf --- /dev/null +++ b/pandas/_libs/src/ujson/python/version.h @@ -0,0 +1,43 @@ +/* +Copyright (c) 2011-2013, ESN Social Software AB and Jonas Tarnstrom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the ESN Social Software AB nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc) +https://github.com/client9/stringencoders +Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved. + +Numeric decoder derived from from TCL library +http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms + * Copyright (c) 1988-1993 The Regents of the University of California. + * Copyright (c) 1994 Sun Microsystems, Inc. +*/ + +#ifndef PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ +#define PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ + +#define UJSON_VERSION "1.33" + +#endif // PANDAS__LIBS_SRC_UJSON_PYTHON_VERSION_H_ diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx new file mode 100644 index 00000000..5a30b71a --- /dev/null +++ b/pandas/_libs/testing.pyx @@ -0,0 +1,217 @@ +import numpy as np + +from pandas.core.dtypes.missing import isna, array_equivalent +from pandas.core.dtypes.common import is_dtype_equal + +cdef NUMERIC_TYPES = ( + bool, + int, + float, + np.bool, + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.float16, + np.float32, + np.float64, +) + + +cdef bint is_comparable_as_number(obj): + return isinstance(obj, NUMERIC_TYPES) + + +cdef bint isiterable(obj): + return hasattr(obj, '__iter__') + + +cdef bint has_length(obj): + return hasattr(obj, '__len__') + + +cdef bint is_dictlike(obj): + return hasattr(obj, 'keys') and hasattr(obj, '__getitem__') + + +cdef bint decimal_almost_equal(double desired, double actual, int decimal): + # Code from + # http://docs.scipy.org/doc/numpy/reference/generated + # /numpy.testing.assert_almost_equal.html + return abs(desired - actual) < (0.5 * 10.0 ** -decimal) + + +cpdef assert_dict_equal(a, b, bint compare_keys=True): + assert is_dictlike(a) and is_dictlike(b), ( + "Cannot compare dict objects, one or both is not dict-like" + ) + + a_keys = frozenset(a.keys()) + b_keys = frozenset(b.keys()) + + if compare_keys: + assert a_keys == b_keys + + for k in a_keys: + assert_almost_equal(a[k], b[k]) + + return True + + +cpdef assert_almost_equal(a, b, + check_less_precise=False, + bint check_dtype=True, + obj=None, lobj=None, robj=None): + """ + Check that left and right objects are almost equal. + + Parameters + ---------- + a : object + b : object + check_less_precise : bool or int, default False + Specify comparison precision. + 5 digits (False) or 3 digits (True) after decimal points are + compared. If an integer, then this will be the number of decimal + points to compare + check_dtype: bool, default True + check dtype if both a and b are np.ndarray + obj : str, default None + Specify object name being compared, internally used to show + appropriate assertion message + lobj : str, default None + Specify left object name being compared, internally used to show + appropriate assertion message + robj : str, default None + Specify right object name being compared, internally used to show + appropriate assertion message + """ + cdef: + int decimal + double diff = 0.0 + Py_ssize_t i, na, nb + double fa, fb + bint is_unequal = False, a_is_ndarray, b_is_ndarray + + if lobj is None: + lobj = a + if robj is None: + robj = b + + assert isinstance(check_less_precise, (int, bool)) + + if isinstance(a, dict) or isinstance(b, dict): + return assert_dict_equal(a, b) + + if isinstance(a, str) or isinstance(b, str): + assert a == b, f"{a} != {b}" + return True + + a_is_ndarray = isinstance(a, np.ndarray) + b_is_ndarray = isinstance(b, np.ndarray) + + if obj is None: + if a_is_ndarray or b_is_ndarray: + obj = 'numpy array' + else: + obj = 'Iterable' + + if isiterable(a): + + if not isiterable(b): + from pandas._testing import assert_class_equal + # classes can't be the same, to raise error + assert_class_equal(a, b, obj=obj) + + assert has_length(a) and has_length(b), ("Can't compare objects without " + "length, one or both is invalid: " + f"({a}, {b})") + + if a_is_ndarray and b_is_ndarray: + na, nb = a.size, b.size + if a.shape != b.shape: + from pandas._testing import raise_assert_detail + raise_assert_detail( + obj, f'{obj} shapes are different', a.shape, b.shape) + + if check_dtype and not is_dtype_equal(a.dtype, b.dtype): + from pandas._testing import assert_attr_equal + assert_attr_equal('dtype', a, b, obj=obj) + + if array_equivalent(a, b, strict_nan=True): + return True + + else: + na, nb = len(a), len(b) + + if na != nb: + from pandas._testing import raise_assert_detail + + # if we have a small diff set, print it + if abs(na - nb) < 10: + r = list(set(a) ^ set(b)) + else: + r = None + + raise_assert_detail(obj, f"{obj} length are different", na, nb, r) + + for i in range(len(a)): + try: + assert_almost_equal(a[i], b[i], + check_less_precise=check_less_precise) + except AssertionError: + is_unequal = True + diff += 1 + + if is_unequal: + from pandas._testing import raise_assert_detail + msg = (f"{obj} values are different " + f"({np.round(diff * 100.0 / na, 5)} %)") + raise_assert_detail(obj, msg, lobj, robj) + + return True + + elif isiterable(b): + from pandas._testing import assert_class_equal + # classes can't be the same, to raise error + assert_class_equal(a, b, obj=obj) + + if isna(a) and isna(b): + # TODO: Should require same-dtype NA? + # nan / None comparison + return True + + if a == b: + # object comparison + return True + + if is_comparable_as_number(a) and is_comparable_as_number(b): + if array_equivalent(a, b, strict_nan=True): + # inf comparison + return True + + if check_less_precise is True: + decimal = 3 + elif check_less_precise is False: + decimal = 5 + else: + decimal = check_less_precise + + fa, fb = a, b + + # case for zero + if abs(fa) < 1e-5: + if not decimal_almost_equal(fa, fb, decimal): + assert False, (f'(very low values) expected {fb:.5f} ' + f'but got {fa:.5f}, with decimal {decimal}') + else: + if not decimal_almost_equal(1, fb / fa, decimal): + assert False, (f'expected {fb:.5f} but got {fa:.5f}, ' + f'with decimal {decimal}') + return True + + raise AssertionError(f"{a} != {b}") diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx new file mode 100644 index 00000000..53e3354c --- /dev/null +++ b/pandas/_libs/tslib.pyx @@ -0,0 +1,856 @@ +import cython + +from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, + PyDateTime_IMPORT, + timedelta, datetime, date, time) +# import datetime C API +PyDateTime_IMPORT + + +cimport numpy as cnp +from numpy cimport int64_t, ndarray, float64_t +import numpy as np +cnp.import_array() + +import pytz + +from pandas._libs.util cimport ( + is_integer_object, is_float_object, is_datetime64_object) + +from pandas._libs.tslibs.c_timestamp cimport _Timestamp + +from pandas._libs.tslibs.np_datetime cimport ( + check_dts_bounds, npy_datetimestruct, _string_to_dts, dt64_to_dtstruct, + dtstruct_to_dt64, pydatetime_to_dt64, pydate_to_dt64, get_datetime64_value) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime + +from pandas._libs.tslibs.parsing import parse_datetime_string + +from pandas._libs.tslibs.timedeltas cimport cast_from_unit +from pandas._libs.tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info +from pandas._libs.tslibs.timezones import UTC +from pandas._libs.tslibs.conversion cimport ( + _TSObject, convert_datetime_to_tsobject, + get_datetime64_nanos) + +# many modules still look for NaT and iNaT here despite them not being needed +from pandas._libs.tslibs.nattype import nat_strings, iNaT # noqa:F821 +from pandas._libs.tslibs.nattype cimport ( + checknull_with_nat, NPY_NAT, c_NaT as NaT) + +from pandas._libs.tslibs.offsets cimport to_offset + +from pandas._libs.tslibs.timestamps cimport create_timestamp_from_ts +from pandas._libs.tslibs.timestamps import Timestamp + +from pandas._libs.tslibs.tzconversion cimport ( + tz_convert_single, tz_convert_utc_to_tzlocal) + + +cdef inline object create_datetime_from_ts( + int64_t value, npy_datetimestruct dts, + object tz, object freq): + """ convenience routine to construct a datetime.datetime from its parts """ + return datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, tz) + + +cdef inline object create_date_from_ts( + int64_t value, npy_datetimestruct dts, + object tz, object freq): + """ convenience routine to construct a datetime.date from its parts """ + return date(dts.year, dts.month, dts.day) + + +cdef inline object create_time_from_ts( + int64_t value, npy_datetimestruct dts, + object tz, object freq): + """ convenience routine to construct a datetime.time from its parts """ + return time(dts.hour, dts.min, dts.sec, dts.us, tz) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None, + str box="datetime"): + """ + Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp + + Parameters + ---------- + arr : array of i8 + tz : str, default None + convert to this timezone + freq : str/Offset, default None + freq to convert + box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime' + If datetime, convert to datetime.datetime + If date, convert to datetime.date + If time, convert to datetime.time + If Timestamp, convert to pandas.Timestamp + + Returns + ------- + result : array of dtype specified by box + """ + + cdef: + Py_ssize_t i, n = len(arr) + ndarray[int64_t] trans + int64_t[:] deltas + Py_ssize_t pos + npy_datetimestruct dts + object dt, new_tz + str typ + int64_t value, delta, local_value + ndarray[object] result = np.empty(n, dtype=object) + object (*func_create)(int64_t, npy_datetimestruct, object, object) + + if box == "date": + assert (tz is None), "tz should be None when converting to date" + + func_create = create_date_from_ts + elif box == "timestamp": + func_create = create_timestamp_from_ts + + if isinstance(freq, str): + freq = to_offset(freq) + elif box == "time": + func_create = create_time_from_ts + elif box == "datetime": + func_create = create_datetime_from_ts + else: + raise ValueError("box must be one of 'datetime', 'date', 'time' or 'timestamp'") + + if is_utc(tz) or tz is None: + for i in range(n): + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + dt64_to_dtstruct(value, &dts) + result[i] = func_create(value, dts, tz, freq) + elif is_tzlocal(tz): + for i in range(n): + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + # Python datetime objects do not support nanosecond + # resolution (yet, PEP 564). Need to compute new value + # using the i8 representation. + local_value = tz_convert_utc_to_tzlocal(value, tz) + dt64_to_dtstruct(local_value, &dts) + result[i] = func_create(value, dts, tz, freq) + else: + trans, deltas, typ = get_dst_info(tz) + + if typ not in ['pytz', 'dateutil']: + # static/fixed; in this case we know that len(delta) == 1 + delta = deltas[0] + for i in range(n): + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + # Adjust datetime64 timestamp, recompute datetimestruct + dt64_to_dtstruct(value + delta, &dts) + result[i] = func_create(value, dts, tz, freq) + + elif typ == 'dateutil': + # no zone-name change for dateutil tzs - dst etc + # represented in single object. + for i in range(n): + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + # Adjust datetime64 timestamp, recompute datetimestruct + pos = trans.searchsorted(value, side='right') - 1 + dt64_to_dtstruct(value + deltas[pos], &dts) + result[i] = func_create(value, dts, tz, freq) + else: + # pytz + for i in range(n): + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + # Adjust datetime64 timestamp, recompute datetimestruct + pos = trans.searchsorted(value, side='right') - 1 + # find right representation of dst etc in pytz timezone + new_tz = tz._tzinfos[tz._transition_info[pos]] + + dt64_to_dtstruct(value + deltas[pos], &dts) + result[i] = func_create(value, dts, new_tz, freq) + + return result + + +def _test_parse_iso8601(ts: str): + """ + TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used + only for testing, actual construction uses `convert_str_to_tsobject` + """ + cdef: + _TSObject obj + int out_local = 0, out_tzoffset = 0 + + obj = _TSObject() + + if ts == 'now': + return Timestamp.utcnow() + elif ts == 'today': + return Timestamp.now().normalize() + + _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True) + obj.value = dtstruct_to_dt64(&obj.dts) + check_dts_bounds(&obj.dts) + if out_local == 1: + obj.tzinfo = pytz.FixedOffset(out_tzoffset) + obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC) + return Timestamp(obj.value, tz=obj.tzinfo) + else: + return Timestamp(obj.value) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def format_array_from_datetime(ndarray[int64_t] values, object tz=None, + object format=None, object na_rep=None): + """ + return a np object array of the string formatted values + + Parameters + ---------- + values : a 1-d i8 array + tz : the timezone (or None) + format : optional, default is None + a strftime capable string + na_rep : optional, default is None + a nat format + + """ + cdef: + int64_t val, ns, N = len(values) + ndarray[int64_t] consider_values + bint show_ms = 0, show_us = 0, show_ns = 0, basic_format = 0 + ndarray[object] result = np.empty(N, dtype=object) + object ts, res + npy_datetimestruct dts + + if na_rep is None: + na_rep = 'NaT' + + # if we don't have a format nor tz, then choose + # a format based on precision + basic_format = format is None and tz is None + if basic_format: + consider_values = values[values != NPY_NAT] + show_ns = (consider_values % 1000).any() + + if not show_ns: + consider_values //= 1000 + show_us = (consider_values % 1000).any() + + if not show_ms: + consider_values //= 1000 + show_ms = (consider_values % 1000).any() + + for i in range(N): + val = values[i] + + if val == NPY_NAT: + result[i] = na_rep + elif basic_format: + + dt64_to_dtstruct(val, &dts) + res = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} ' + f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}') + + if show_ns: + ns = dts.ps // 1000 + res += f'.{ns + dts.us * 1000:09d}' + elif show_us: + res += f'.{dts.us:06d}' + elif show_ms: + res += f'.{dts.us // 1000:03d}' + + result[i] = res + + else: + + ts = Timestamp(val, tz=tz) + if format is None: + result[i] = str(ts) + else: + + # invalid format string + # requires dates > 1900 + try: + result[i] = ts.strftime(format) + except ValueError: + result[i] = str(ts) + + return result + + +def array_with_unit_to_datetime(ndarray values, ndarray mask, object unit, + str errors='coerce'): + """ + Convert the ndarray to datetime according to the time unit. + + This function converts an array of objects into a numpy array of + datetime64[ns]. It returns the converted array + and also returns the timezone offset + + if errors: + - raise: return converted values or raise OutOfBoundsDatetime + if out of range on the conversion or + ValueError for other conversions (e.g. a string) + - ignore: return non-convertible values as the same unit + - coerce: NaT for non-convertibles + + Parameters + ---------- + values : ndarray of object + Date-like objects to convert + mask : ndarray of bool + Not-a-time mask for non-nullable integer types conversion, + can be None + unit : object + Time unit to use during conversion + errors : str, default 'raise' + Error behavior when parsing + + Returns + ------- + result : ndarray of m8 values + tz : parsed timezone offset or None + """ + cdef: + Py_ssize_t i, j, n=len(values) + int64_t m + ndarray[float64_t] fvalues + bint is_ignore = errors=='ignore' + bint is_coerce = errors=='coerce' + bint is_raise = errors=='raise' + bint need_to_iterate = True + ndarray[int64_t] iresult + ndarray[object] oresult + object tz = None + + assert is_ignore or is_coerce or is_raise + + if unit == 'ns': + if issubclass(values.dtype.type, np.integer): + result = values.astype('M8[ns]') + else: + result, tz = array_to_datetime(values.astype(object), errors=errors) + if mask is not None: + iresult = result.view('i8') + iresult[mask] = NPY_NAT + return result, tz + + m = cast_from_unit(None, unit) + + if is_raise: + + # try a quick conversion to i8 + # if we have nulls that are not type-compat + # then need to iterate + if values.dtype.kind == "i": + # Note: this condition makes the casting="same_kind" redundant + iresult = values.astype('i8', casting='same_kind', copy=False) + # If no mask, fill mask by comparing to NPY_NAT constant + if mask is None: + mask = iresult == NPY_NAT + iresult[mask] = 0 + fvalues = iresult.astype('f8') * m + need_to_iterate = False + + # check the bounds + if not need_to_iterate: + + if ((fvalues < Timestamp.min.value).any() + or (fvalues > Timestamp.max.value).any()): + raise OutOfBoundsDatetime(f"cannot convert input with unit " + f"'{unit}'") + result = (iresult * m).astype('M8[ns]') + iresult = result.view('i8') + iresult[mask] = NPY_NAT + return result, tz + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + + try: + for i in range(n): + val = values[i] + + if checknull_with_nat(val): + iresult[i] = NPY_NAT + + elif is_integer_object(val) or is_float_object(val): + + if val != val or val == NPY_NAT: + iresult[i] = NPY_NAT + else: + try: + iresult[i] = cast_from_unit(val, unit) + except OverflowError: + if is_raise: + raise OutOfBoundsDatetime( + f"cannot convert input {val} with the unit " + f"'{unit}'") + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + + elif isinstance(val, str): + if len(val) == 0 or val in nat_strings: + iresult[i] = NPY_NAT + + else: + try: + iresult[i] = cast_from_unit(float(val), unit) + except ValueError: + if is_raise: + raise ValueError( + f"non convertible value {val} with the unit " + f"'{unit}'") + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + except OverflowError: + if is_raise: + raise OutOfBoundsDatetime( + f"cannot convert input {val} with the unit " + f"'{unit}'") + elif is_ignore: + raise AssertionError + iresult[i] = NPY_NAT + + else: + + if is_raise: + raise ValueError(f"unit='{unit}' not valid with non-numerical " + f"val='{val}'") + if is_ignore: + raise AssertionError + + iresult[i] = NPY_NAT + + return result, tz + + except AssertionError: + pass + + # we have hit an exception + # and are in ignore mode + # redo as object + + oresult = np.empty(n, dtype=object) + for i in range(n): + val = values[i] + + if checknull_with_nat(val): + oresult[i] = NaT + elif is_integer_object(val) or is_float_object(val): + + if val != val or val == NPY_NAT: + oresult[i] = NaT + else: + try: + oresult[i] = Timestamp(cast_from_unit(val, unit)) + except OverflowError: + oresult[i] = val + + elif isinstance(val, str): + if len(val) == 0 or val in nat_strings: + oresult[i] = NaT + + else: + oresult[i] = val + + return oresult, tz + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef array_to_datetime(ndarray[object] values, str errors='raise', + bint dayfirst=False, bint yearfirst=False, + object utc=None, bint require_iso8601=False): + """ + Converts a 1D array of date-like values to a numpy array of either: + 1) datetime64[ns] data + 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError + is encountered + + Also returns a pytz.FixedOffset if an array of strings with the same + timezone offset is passed and utc=True is not passed. Otherwise, None + is returned + + Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric, + strings + + Parameters + ---------- + values : ndarray of object + date-like objects to convert + errors : str, default 'raise' + error behavior when parsing + dayfirst : bool, default False + dayfirst parsing behavior when encountering datetime strings + yearfirst : bool, default False + yearfirst parsing behavior when encountering datetime strings + utc : bool, default None + indicator whether the dates should be UTC + require_iso8601 : bool, default False + indicator whether the datetime string should be iso8601 + + Returns + ------- + tuple (ndarray, tzoffset) + """ + cdef: + Py_ssize_t i, n = len(values) + object val, py_dt, tz, tz_out = None + ndarray[int64_t] iresult + ndarray[object] oresult + npy_datetimestruct dts + bint utc_convert = bool(utc) + bint seen_integer = 0 + bint seen_string = 0 + bint seen_datetime = 0 + bint seen_datetime_offset = 0 + bint is_raise = errors=='raise' + bint is_ignore = errors=='ignore' + bint is_coerce = errors=='coerce' + bint is_same_offsets + _TSObject _ts + int64_t value + int out_local=0, out_tzoffset=0 + float offset_seconds, tz_offset + set out_tzoffset_vals = set() + bint string_to_dts_failed + + # specify error conditions + assert is_raise or is_ignore or is_coerce + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + + try: + for i in range(n): + val = values[i] + + try: + if checknull_with_nat(val): + iresult[i] = NPY_NAT + + elif PyDateTime_Check(val): + seen_datetime = 1 + if val.tzinfo is not None: + if utc_convert: + _ts = convert_datetime_to_tsobject(val, None) + iresult[i] = _ts.value + else: + raise ValueError('Tz-aware datetime.datetime ' + 'cannot be converted to ' + 'datetime64 unless utc=True') + else: + iresult[i] = pydatetime_to_dt64(val, &dts) + if isinstance(val, _Timestamp): + iresult[i] += val.nanosecond + check_dts_bounds(&dts) + + elif PyDate_Check(val): + seen_datetime = 1 + iresult[i] = pydate_to_dt64(val, &dts) + check_dts_bounds(&dts) + + elif is_datetime64_object(val): + seen_datetime = 1 + iresult[i] = get_datetime64_nanos(val) + + elif is_integer_object(val) or is_float_object(val): + # these must be ns unit by-definition + seen_integer = 1 + + if val != val or val == NPY_NAT: + iresult[i] = NPY_NAT + elif is_raise or is_ignore: + iresult[i] = val + else: + # coerce + # we now need to parse this as if unit='ns' + # we can ONLY accept integers at this point + # if we have previously (or in future accept + # datetimes/strings, then we must coerce) + try: + iresult[i] = cast_from_unit(val, 'ns') + except OverflowError: + iresult[i] = NPY_NAT + + elif isinstance(val, str): + # string + seen_string = 1 + + if len(val) == 0 or val in nat_strings: + iresult[i] = NPY_NAT + continue + + string_to_dts_failed = _string_to_dts( + val, &dts, &out_local, + &out_tzoffset, False + ) + if string_to_dts_failed: + # An error at this point is a _parsing_ error + # specifically _not_ OutOfBoundsDatetime + if _parse_today_now(val, &iresult[i]): + continue + elif require_iso8601: + # if requiring iso8601 strings, skip trying + # other formats + if is_coerce: + iresult[i] = NPY_NAT + continue + elif is_raise: + raise ValueError(f"time data {val} doesn't " + f"match format specified") + return values, tz_out + + try: + py_dt = parse_datetime_string(val, + dayfirst=dayfirst, + yearfirst=yearfirst) + # If the dateutil parser returned tzinfo, capture it + # to check if all arguments have the same tzinfo + tz = py_dt.utcoffset() + + except (ValueError, OverflowError): + if is_coerce: + iresult[i] = NPY_NAT + continue + raise TypeError("invalid string coercion to " + "datetime") + + if tz is not None: + seen_datetime_offset = 1 + # dateutil timezone objects cannot be hashed, so + # store the UTC offsets in seconds instead + out_tzoffset_vals.add(tz.total_seconds()) + else: + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzoffset_vals.add('naive') + + _ts = convert_datetime_to_tsobject(py_dt, None) + iresult[i] = _ts.value + if not string_to_dts_failed: + # No error reported by string_to_dts, pick back up + # where we left off + value = dtstruct_to_dt64(&dts) + if out_local == 1: + seen_datetime_offset = 1 + # Store the out_tzoffset in seconds + # since we store the total_seconds of + # dateutil.tz.tzoffset objects + out_tzoffset_vals.add(out_tzoffset * 60.) + tz = pytz.FixedOffset(out_tzoffset) + value = tz_convert_single(value, tz, UTC) + out_local = 0 + out_tzoffset = 0 + else: + # Add a marker for naive string, to track if we are + # parsing mixed naive and aware strings + out_tzoffset_vals.add('naive') + iresult[i] = value + check_dts_bounds(&dts) + + else: + if is_coerce: + iresult[i] = NPY_NAT + else: + raise TypeError(f"{type(val)} is not convertible to datetime") + + except OutOfBoundsDatetime: + if is_coerce: + iresult[i] = NPY_NAT + continue + elif require_iso8601 and isinstance(val, str): + # GH#19382 for just-barely-OutOfBounds falling back to + # dateutil parser will return incorrect result because + # it will ignore nanoseconds + if is_raise: + + # Still raise OutOfBoundsDatetime, + # as error message is informative. + raise + + assert is_ignore + return values, tz_out + raise + + except OutOfBoundsDatetime: + if is_raise: + raise + + return ignore_errors_out_of_bounds_fallback(values), tz_out + + except TypeError: + return array_to_datetime_object(values, errors, + dayfirst, yearfirst) + + if seen_datetime and seen_integer: + # we have mixed datetimes & integers + + if is_coerce: + # coerce all of the integers/floats to NaT, preserve + # the datetimes and other convertibles + for i in range(n): + val = values[i] + if is_integer_object(val) or is_float_object(val): + result[i] = NPY_NAT + elif is_raise: + raise ValueError("mixed datetimes and integers in passed array") + else: + return array_to_datetime_object(values, errors, + dayfirst, yearfirst) + + if seen_datetime_offset and not utc_convert: + # GH#17697 + # 1) If all the offsets are equal, return one offset for + # the parsed dates to (maybe) pass to DatetimeIndex + # 2) If the offsets are different, then force the parsing down the + # object path where an array of datetimes + # (with individual dateutil.tzoffsets) are returned + is_same_offsets = len(out_tzoffset_vals) == 1 + if not is_same_offsets: + return array_to_datetime_object(values, errors, + dayfirst, yearfirst) + else: + tz_offset = out_tzoffset_vals.pop() + tz_out = pytz.FixedOffset(tz_offset / 60.) + return result, tz_out + + +cdef ignore_errors_out_of_bounds_fallback(ndarray[object] values): + """ + Fallback for array_to_datetime if an OutOfBoundsDatetime is raised + and errors == "ignore" + + Parameters + ---------- + values : ndarray[object] + + Returns + ------- + ndarray[object] + """ + cdef: + Py_ssize_t i, n = len(values) + object val + + oresult = np.empty(n, dtype=object) + + for i in range(n): + val = values[i] + + # set as nan except if its a NaT + if checknull_with_nat(val): + if isinstance(val, float): + oresult[i] = np.nan + else: + oresult[i] = NaT + elif is_datetime64_object(val): + if get_datetime64_value(val) == NPY_NAT: + oresult[i] = NaT + else: + oresult[i] = val.item() + else: + oresult[i] = val + return oresult + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef array_to_datetime_object(ndarray[object] values, str errors, + bint dayfirst=False, bint yearfirst=False): + """ + Fall back function for array_to_datetime + + Attempts to parse datetime strings with dateutil to return an array + of datetime objects + + Parameters + ---------- + values : ndarray of object + date-like objects to convert + errors : str, default 'raise' + error behavior when parsing + dayfirst : bool, default False + dayfirst parsing behavior when encountering datetime strings + yearfirst : bool, default False + yearfirst parsing behavior when encountering datetime strings + + Returns + ------- + tuple (ndarray, None) + """ + cdef: + Py_ssize_t i, n = len(values) + object val, + bint is_ignore = errors == 'ignore' + bint is_coerce = errors == 'coerce' + bint is_raise = errors == 'raise' + ndarray[object] oresult + npy_datetimestruct dts + + assert is_raise or is_ignore or is_coerce + + oresult = np.empty(n, dtype=object) + + # We return an object array and only attempt to parse: + # 1) NaT or NaT-like values + # 2) datetime strings, which we return as datetime.datetime + for i in range(n): + val = values[i] + if checknull_with_nat(val) or PyDateTime_Check(val): + # GH 25978. No need to parse NaT-like or datetime-like vals + oresult[i] = val + elif isinstance(val, str): + if len(val) == 0 or val in nat_strings: + oresult[i] = 'NaT' + continue + try: + oresult[i] = parse_datetime_string(val, dayfirst=dayfirst, + yearfirst=yearfirst) + pydatetime_to_dt64(oresult[i], &dts) + check_dts_bounds(&dts) + except (ValueError, OverflowError): + if is_coerce: + oresult[i] = NaT + continue + if is_raise: + raise + return values, None + else: + if is_raise: + raise + return values, None + return oresult, None + + +cdef inline bint _parse_today_now(str val, int64_t* iresult): + # We delay this check for as long as possible + # because it catches relatively rare cases + if val == 'now': + # Note: this is *not* the same as Timestamp('now') + iresult[0] = Timestamp.utcnow().value + return True + elif val == 'today': + iresult[0] = Timestamp.today().value + return True + return False diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py new file mode 100644 index 00000000..8d3b00e4 --- /dev/null +++ b/pandas/_libs/tslibs/__init__.py @@ -0,0 +1,12 @@ +# flake8: noqa + +from .conversion import localize_pydatetime, normalize_date +from .nattype import NaT, NaTType, iNaT, is_null_datetimelike +from .np_datetime import OutOfBoundsDatetime +from .period import IncompatibleFrequency, Period +from .timedeltas import Timedelta, delta_to_nanoseconds, ints_to_pytimedelta +from .timestamps import Timestamp +from .tzconversion import tz_convert_single + +# import fails if we do this before np_datetime +from .c_timestamp import NullFrequencyError # isort:skip diff --git a/pandas/_libs/tslibs/c_timestamp.pxd b/pandas/_libs/tslibs/c_timestamp.pxd new file mode 100644 index 00000000..e41197d0 --- /dev/null +++ b/pandas/_libs/tslibs/c_timestamp.pxd @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- + +from cpython.datetime cimport datetime + +from numpy cimport int64_t + +cdef class _Timestamp(datetime): + cdef readonly: + int64_t value, nanosecond + object freq + list _date_attributes + cpdef bint _get_start_end_field(self, str field) + cpdef _get_date_name_field(self, object field, object locale) + cdef int64_t _maybe_convert_value_to_local(self) + cpdef to_datetime64(self) + cdef _assert_tzawareness_compat(_Timestamp self, datetime other) + cpdef datetime to_pydatetime(_Timestamp self, bint warn=*) + cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, + int op) except -1 diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx new file mode 100644 index 00000000..62a039d1 --- /dev/null +++ b/pandas/_libs/tslibs/c_timestamp.pyx @@ -0,0 +1,408 @@ +""" +_Timestamp is a c-defined subclass of datetime.datetime + +It is separate from timestamps.pyx to prevent circular cimports + +This allows _Timestamp to be imported in other modules +so that isinstance(obj, _Timestamp) checks can be performed + +_Timestamp is PITA. Because we inherit from datetime, which has very specific +construction requirements, we need to do object instantiation in python +(see Timestamp class below). This will serve as a C extension type that +shadows the python class, where we do any heavy lifting. +""" + +import warnings + +from cpython.object cimport (PyObject_RichCompareBool, PyObject_RichCompare, + Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE) + +import numpy as np +cimport numpy as cnp +from numpy cimport int64_t, int8_t, uint8_t, ndarray +cnp.import_array() + +from cpython.datetime cimport (datetime, + PyDateTime_Check, PyDelta_Check, + PyDateTime_IMPORT) +PyDateTime_IMPORT + +from pandas._libs.tslibs.util cimport ( + is_datetime64_object, is_timedelta64_object, is_integer_object, + is_array) + +from pandas._libs.tslibs.fields import get_start_end_field, get_date_name_field +from pandas._libs.tslibs.nattype cimport c_NaT as NaT +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime +from pandas._libs.tslibs.np_datetime cimport ( + reverse_ops, cmp_scalar) +from pandas._libs.tslibs.timezones cimport ( + get_timezone, is_utc, tz_compare) +from pandas._libs.tslibs.timezones import UTC +from pandas._libs.tslibs.tzconversion cimport tz_convert_single + + +class NullFrequencyError(ValueError): + """ + Error raised when a null `freq` attribute is used in an operation + that needs a non-null frequency, particularly `DatetimeIndex.shift`, + `TimedeltaIndex.shift`, `PeriodIndex.shift`. + """ + pass + + +def integer_op_not_supported(obj): + # GH#22535 add/sub of integers and int-arrays is no longer allowed + # Note we return rather than raise the exception so we can raise in + # the caller; mypy finds this more palatable. + cls = type(obj).__name__ + + # GH#30886 using an fstring raises SystemError + int_addsub_msg = ( + "Addition/subtraction of integers and integer-arrays with {cls} is " + "no longer supported. Instead of adding/subtracting `n`, " + "use `n * obj.freq`" + ).format(cls=cls) + return TypeError(int_addsub_msg) + + +cdef class _Timestamp(datetime): + + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + + def __hash__(_Timestamp self): + if self.nanosecond: + return hash(self.value) + return datetime.__hash__(self) + + def __richcmp__(_Timestamp self, object other, int op): + cdef: + _Timestamp ots + int ndim + + if isinstance(other, _Timestamp): + ots = other + elif other is NaT: + return op == Py_NE + elif PyDateTime_Check(other): + if self.nanosecond == 0: + val = self.to_pydatetime() + return PyObject_RichCompareBool(val, other, op) + + try: + ots = type(self)(other) + except ValueError: + return self._compare_outside_nanorange(other, op) + else: + ndim = getattr(other, "ndim", -1) + + if ndim != -1: + if ndim == 0: + if is_datetime64_object(other): + other = type(self)(other) + elif is_array(other): + # zero-dim array, occurs if try comparison with + # datetime64 scalar on the left hand side + # Unfortunately, for datetime64 values, other.item() + # incorrectly returns an integer, so we need to use + # the numpy C api to extract it. + other = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), + other) + other = type(self)(other) + else: + return NotImplemented + elif is_array(other): + # avoid recursion error GH#15183 + return PyObject_RichCompare(np.array([self]), other, op) + return PyObject_RichCompare(other, self, reverse_ops[op]) + else: + return NotImplemented + + self._assert_tzawareness_compat(other) + return cmp_scalar(self.value, ots.value, op) + + def __reduce_ex__(self, protocol): + # python 3.6 compat + # http://bugs.python.org/issue28730 + # now __reduce_ex__ is defined and higher priority than __reduce__ + return self.__reduce__() + + def __repr__(self) -> str: + stamp = self._repr_base + zone = None + + try: + stamp += self.strftime('%z') + if self.tzinfo: + zone = get_timezone(self.tzinfo) + except ValueError: + year2000 = self.replace(year=2000) + stamp += year2000.strftime('%z') + if self.tzinfo: + zone = get_timezone(self.tzinfo) + + try: + stamp += zone.strftime(' %%Z') + except AttributeError: + # e.g. tzlocal has no `strftime` + pass + + tz = f", tz='{zone}'" if zone is not None else "" + freq = "" if self.freq is None else f", freq='{self.freqstr}'" + + return f"Timestamp('{stamp}'{tz}{freq})" + + cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, + int op) except -1: + cdef: + datetime dtval = self.to_pydatetime() + + self._assert_tzawareness_compat(other) + + if self.nanosecond == 0: + return PyObject_RichCompareBool(dtval, other, op) + else: + if op == Py_EQ: + return False + elif op == Py_NE: + return True + elif op == Py_LT: + return dtval < other + elif op == Py_LE: + return dtval < other + elif op == Py_GT: + return dtval >= other + elif op == Py_GE: + return dtval >= other + + cdef _assert_tzawareness_compat(_Timestamp self, datetime other): + if self.tzinfo is None: + if other.tzinfo is not None: + raise TypeError('Cannot compare tz-naive and tz-aware ' + 'timestamps') + elif other.tzinfo is None: + raise TypeError('Cannot compare tz-naive and tz-aware timestamps') + + cpdef datetime to_pydatetime(_Timestamp self, bint warn=True): + """ + Convert a Timestamp object to a native Python datetime object. + + If warn=True, issue a warning if nanoseconds is nonzero. + """ + if self.nanosecond != 0 and warn: + warnings.warn("Discarding nonzero nanoseconds in conversion", + UserWarning, stacklevel=2) + + return datetime(self.year, self.month, self.day, + self.hour, self.minute, self.second, + self.microsecond, self.tzinfo) + + cpdef to_datetime64(self): + """ + Return a numpy.datetime64 object with 'ns' precision. + """ + return np.datetime64(self.value, 'ns') + + def to_numpy(self, dtype=None, copy=False) -> np.datetime64: + """ + Convert the Timestamp to a NumPy datetime64. + + .. versionadded:: 0.25.0 + + This is an alias method for `Timestamp.to_datetime64()`. The dtype and + copy parameters are available here only for compatibility. Their values + will not affect the return value. + + Returns + ------- + numpy.datetime64 + + See Also + -------- + DatetimeIndex.to_numpy : Similar method for DatetimeIndex. + """ + return self.to_datetime64() + + def __add__(self, other): + cdef: + int64_t other_int, nanos = 0 + + if is_timedelta64_object(other): + other_int = other.astype('timedelta64[ns]').view('i8') + return type(self)(self.value + other_int, tz=self.tzinfo, freq=self.freq) + + elif is_integer_object(other): + raise integer_op_not_supported(self) + + elif PyDelta_Check(other) or hasattr(other, 'delta'): + # delta --> offsets.Tick + # logic copied from delta_to_nanoseconds to prevent circular import + if hasattr(other, 'nanos'): + nanos = other.nanos + elif hasattr(other, 'delta'): + nanos = other.delta + elif PyDelta_Check(other): + nanos = (other.days * 24 * 60 * 60 * 1000000 + + other.seconds * 1000000 + + other.microseconds) * 1000 + + result = type(self)(self.value + nanos, tz=self.tzinfo, freq=self.freq) + return result + + elif is_array(other): + if other.dtype.kind in ['i', 'u']: + raise integer_op_not_supported(self) + + # index/series like + elif hasattr(other, '_typ'): + return NotImplemented + + result = datetime.__add__(self, other) + if PyDateTime_Check(result): + result = type(self)(result) + result.nanosecond = self.nanosecond + return result + + def __sub__(self, other): + + if (is_timedelta64_object(other) or is_integer_object(other) or + PyDelta_Check(other) or hasattr(other, 'delta')): + # `delta` attribute is for offsets.Tick or offsets.Week obj + neg_other = -other + return self + neg_other + + elif is_array(other): + if other.dtype.kind in ['i', 'u']: + raise integer_op_not_supported(self) + + typ = getattr(other, '_typ', None) + if typ is not None: + return NotImplemented + + if other is NaT: + return NaT + + # coerce if necessary if we are a Timestamp-like + if (PyDateTime_Check(self) + and (PyDateTime_Check(other) or is_datetime64_object(other))): + # both_timestamps is to determine whether Timedelta(self - other) + # should raise the OOB error, or fall back returning a timedelta. + both_timestamps = (isinstance(other, _Timestamp) and + isinstance(self, _Timestamp)) + if isinstance(self, _Timestamp): + other = type(self)(other) + else: + self = type(other)(self) + + # validate tz's + if not tz_compare(self.tzinfo, other.tzinfo): + raise TypeError("Timestamp subtraction must have the " + "same timezones or no timezones") + + # scalar Timestamp/datetime - Timestamp/datetime -> yields a + # Timedelta + from pandas._libs.tslibs.timedeltas import Timedelta + try: + return Timedelta(self.value - other.value) + except (OverflowError, OutOfBoundsDatetime) as err: + if isinstance(other, _Timestamp): + if both_timestamps: + raise OutOfBoundsDatetime( + "Result is too large for pandas.Timedelta. Convert inputs " + "to datetime.datetime with 'Timestamp.to_pydatetime()' " + "before subtracting." + ) from err + pass + elif is_datetime64_object(self): + # GH#28286 cython semantics for __rsub__, `other` is actually + # the Timestamp + return type(other)(self) - other + + return NotImplemented + + cdef int64_t _maybe_convert_value_to_local(self): + """Convert UTC i8 value to local i8 value if tz exists""" + cdef: + int64_t val + val = self.value + if self.tz is not None and not is_utc(self.tz): + val = tz_convert_single(self.value, UTC, self.tz) + return val + + cpdef bint _get_start_end_field(self, str field): + cdef: + int64_t val + dict kwds + ndarray[uint8_t, cast=True] out + int month_kw + + freq = self.freq + if freq: + kwds = freq.kwds + month_kw = kwds.get('startingMonth', kwds.get('month', 12)) + freqstr = self.freqstr + else: + month_kw = 12 + freqstr = None + + val = self._maybe_convert_value_to_local() + out = get_start_end_field(np.array([val], dtype=np.int64), + field, freqstr, month_kw) + return out[0] + + cpdef _get_date_name_field(self, object field, object locale): + cdef: + int64_t val + object[:] out + + val = self._maybe_convert_value_to_local() + out = get_date_name_field(np.array([val], dtype=np.int64), + field, locale=locale) + return out[0] + + @property + def _repr_base(self) -> str: + return f"{self._date_repr} {self._time_repr}" + + @property + def _date_repr(self) -> str: + # Ideal here would be self.strftime("%Y-%m-%d"), but + # the datetime strftime() methods require year >= 1900 + return f'{self.year}-{self.month:02d}-{self.day:02d}' + + @property + def _time_repr(self) -> str: + result = f'{self.hour:02d}:{self.minute:02d}:{self.second:02d}' + + if self.nanosecond != 0: + result += f'.{self.nanosecond + 1000 * self.microsecond:09d}' + elif self.microsecond != 0: + result += f'.{self.microsecond:06d}' + + return result + + @property + def _short_repr(self) -> str: + # format a Timestamp with only _date_repr if possible + # otherwise _repr_base + if (self.hour == 0 and + self.minute == 0 and + self.second == 0 and + self.microsecond == 0 and + self.nanosecond == 0): + return self._date_repr + return self._repr_base + + @property + def asm8(self) -> np.datetime64: + """ + Return numpy datetime64 format in nanoseconds. + """ + return np.datetime64(self.value, 'ns') + + def timestamp(self): + """Return POSIX timestamp as float.""" + # GH 17329 + # Note: Naive timestamps will not match datetime.stdlib + return round(self.value / 1e9, 6) diff --git a/pandas/_libs/tslibs/ccalendar.pxd b/pandas/_libs/tslibs/ccalendar.pxd new file mode 100644 index 00000000..08f539a7 --- /dev/null +++ b/pandas/_libs/tslibs/ccalendar.pxd @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- + +from cython cimport Py_ssize_t + +from numpy cimport int64_t, int32_t + + +cdef int dayofweek(int y, int m, int d) nogil +cdef bint is_leapyear(int64_t year) nogil +cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil +cpdef int32_t get_week_of_year(int year, int month, int day) nogil +cpdef int32_t get_day_of_year(int year, int month, int day) nogil diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx new file mode 100644 index 00000000..0588dfe2 --- /dev/null +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -0,0 +1,230 @@ +# cython: boundscheck=False +""" +Cython implementations of functions resembling the stdlib calendar module +""" + +import cython + +from numpy cimport int64_t, int32_t + +from locale import LC_TIME + +from pandas._config.localization import set_locale +from pandas._libs.tslibs.strptime import LocaleTime + +# ---------------------------------------------------------------------- +# Constants + +# Slightly more performant cython lookups than a 2D table +# The first 12 entries correspond to month lengths for non-leap years. +# The remaining 12 entries give month lengths for leap years +cdef int32_t* days_per_month_array = [ + 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, + 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] + +cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4] + +# The first 13 entries give the month days elapsed as of the first of month N +# (or the total number of days in the year for N=13) in non-leap years. +# The remaining 13 entries give the days elapsed in leap years. +cdef int32_t* _month_offset = [ + 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, + 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366] + +# Canonical location for other modules to find name constants +MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', + 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] +# The first blank line is consistent with calendar.month_name in the calendar +# standard library +MONTHS_FULL = ['', 'January', 'February', 'March', 'April', 'May', 'June', + 'July', 'August', 'September', 'October', 'November', + 'December'] +MONTH_NUMBERS = {name: num for num, name in enumerate(MONTHS)} +MONTH_ALIASES = {(num + 1): name for num, name in enumerate(MONTHS)} +MONTH_TO_CAL_NUM = {name: num + 1 for num, name in enumerate(MONTHS)} + +DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] +DAYS_FULL = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', + 'Saturday', 'Sunday'] +int_to_weekday = {num: name for num, name in enumerate(DAYS)} +weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday} + +DAY_SECONDS = 86400 +HOUR_SECONDS = 3600 + +# ---------------------------------------------------------------------- + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil: + """ + Return the number of days in the given month of the given year. + + Parameters + ---------- + year : int + month : int + + Returns + ------- + days_in_month : int + + Notes + ----- + Assumes that the arguments are valid. Passing a month not between 1 and 12 + risks a segfault. + """ + return days_per_month_array[12 * is_leapyear(year) + month - 1] + + +@cython.wraparound(False) +@cython.boundscheck(False) +@cython.cdivision +cdef int dayofweek(int y, int m, int d) nogil: + """ + Find the day of week for the date described by the Y/M/D triple y, m, d + using Sakamoto's method, from wikipedia. + + 0 represents Monday. See [1]_. + + Parameters + ---------- + y : int + m : int + d : int + + Returns + ------- + weekday : int + + Notes + ----- + Assumes that y, m, d, represents a valid date. + + See Also + -------- + [1] https://docs.python.org/3/library/calendar.html#calendar.weekday + + [2] https://en.wikipedia.org/wiki/\ + Determination_of_the_day_of_the_week#Sakamoto.27s_methods + """ + cdef: + int day + + y -= m < 3 + day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7 + # convert to python day + return (day + 6) % 7 + + +cdef bint is_leapyear(int64_t year) nogil: + """ + Returns 1 if the given year is a leap year, 0 otherwise. + + Parameters + ---------- + year : int + + Returns + ------- + is_leap : bool + """ + return ((year & 0x3) == 0 and # year % 4 == 0 + ((year % 100) != 0 or (year % 400) == 0)) + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef int32_t get_week_of_year(int year, int month, int day) nogil: + """ + Return the ordinal week-of-year for the given day. + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + week_of_year : int32_t + + Notes + ----- + Assumes the inputs describe a valid date. + """ + cdef: + int32_t doy, dow + int woy + + doy = get_day_of_year(year, month, day) + dow = dayofweek(year, month, day) + + # estimate + woy = (doy - 1) - dow + 3 + if woy >= 0: + woy = woy // 7 + 1 + + # verify + if woy < 0: + if (woy > -2) or (woy == -2 and is_leapyear(year - 1)): + woy = 53 + else: + woy = 52 + elif woy == 53: + if 31 - day + dow < 3: + woy = 1 + + return woy + + +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef int32_t get_day_of_year(int year, int month, int day) nogil: + """ + Return the ordinal day-of-year for the given day. + + Parameters + ---------- + year : int + month : int + day : int + + Returns + ------- + day_of_year : int32_t + + Notes + ----- + Assumes the inputs describe a valid date. + """ + cdef: + bint isleap + int32_t mo_off + int day_of_year + + isleap = is_leapyear(year) + + mo_off = _month_offset[isleap * 13 + month - 1] + + day_of_year = mo_off + day + return day_of_year + + +def get_locale_names(name_type: str, locale: object = None): + """ + Returns an array of localized day or month names. + + Parameters + ---------- + name_type : string, attribute of LocaleTime() in which to return localized + names + locale : string + + Returns + ------- + list of locale names + """ + with set_locale(locale, LC_TIME): + return getattr(LocaleTime(), name_type) diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd new file mode 100644 index 00000000..36e6b14b --- /dev/null +++ b/pandas/_libs/tslibs/conversion.pxd @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- + +from cpython.datetime cimport datetime + +from numpy cimport int64_t, int32_t + +from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct + + +cdef class _TSObject: + cdef: + npy_datetimestruct dts # npy_datetimestruct + int64_t value # numpy dt64 + object tzinfo + + +cdef convert_to_tsobject(object ts, object tz, object unit, + bint dayfirst, bint yearfirst, + int32_t nanos=*) + +cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, + int32_t nanos=*) + +cdef int64_t get_datetime64_nanos(object val) except? -1 + +cpdef int64_t pydt_to_i8(object pydt) except? -1 + +cdef maybe_datetimelike_to_i8(object val) + +cpdef datetime localize_pydatetime(datetime dt, object tz) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx new file mode 100644 index 00000000..2988d7ba --- /dev/null +++ b/pandas/_libs/tslibs/conversion.pyx @@ -0,0 +1,855 @@ +import cython + +import numpy as np +cimport numpy as cnp +from numpy cimport int64_t, int32_t, intp_t, ndarray +cnp.import_array() + +import pytz + +# stdlib datetime imports +from datetime import time as datetime_time +from cpython.datetime cimport (datetime, tzinfo, + PyDateTime_Check, PyDate_Check, + PyDateTime_IMPORT) +PyDateTime_IMPORT + +from pandas._libs.tslibs.c_timestamp cimport _Timestamp + +from pandas._libs.tslibs.np_datetime cimport ( + check_dts_bounds, npy_datetimestruct, pandas_datetime_to_datetimestruct, + _string_to_dts, npy_datetime, dt64_to_dtstruct, dtstruct_to_dt64, + get_datetime64_unit, get_datetime64_value, pydatetime_to_dt64, + NPY_DATETIMEUNIT, NPY_FR_ns) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime + +from pandas._libs.tslibs.util cimport ( + is_datetime64_object, is_integer_object, is_float_object) + +from pandas._libs.tslibs.timedeltas cimport cast_from_unit +from pandas._libs.tslibs.timezones cimport ( + is_utc, is_tzlocal, is_fixed_offset, get_utcoffset, get_dst_info, + get_timezone, maybe_get_tz, tz_compare) +from pandas._libs.tslibs.timezones import UTC +from pandas._libs.tslibs.parsing import parse_datetime_string + +from pandas._libs.tslibs.nattype import nat_strings +from pandas._libs.tslibs.nattype cimport ( + NPY_NAT, checknull_with_nat, c_NaT as NaT) + +from pandas._libs.tslibs.tzconversion import ( + tz_localize_to_utc, tz_convert_single) +from pandas._libs.tslibs.tzconversion cimport _tz_convert_tzlocal_utc + +# ---------------------------------------------------------------------- +# Constants + +NS_DTYPE = np.dtype('M8[ns]') +TD_DTYPE = np.dtype('m8[ns]') + + +# ---------------------------------------------------------------------- +# Misc Helpers + +cdef inline int64_t get_datetime64_nanos(object val) except? -1: + """ + Extract the value and unit from a np.datetime64 object, then convert the + value to nanoseconds if necessary. + """ + cdef: + npy_datetimestruct dts + NPY_DATETIMEUNIT unit + npy_datetime ival + + ival = get_datetime64_value(val) + if ival == NPY_NAT: + return NPY_NAT + + unit = get_datetime64_unit(val) + + if unit != NPY_FR_ns: + pandas_datetime_to_datetimestruct(ival, unit, &dts) + check_dts_bounds(&dts) + ival = dtstruct_to_dt64(&dts) + + return ival + + +@cython.boundscheck(False) +@cython.wraparound(False) +def ensure_datetime64ns(arr: ndarray, copy: bool=True): + """ + Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]' + + Parameters + ---------- + arr : ndarray + copy : boolean, default True + + Returns + ------- + result : ndarray with dtype datetime64[ns] + + """ + cdef: + Py_ssize_t i, n = arr.size + int64_t[:] ivalues, iresult + NPY_DATETIMEUNIT unit + npy_datetimestruct dts + + shape = (arr).shape + + ivalues = arr.view(np.int64).ravel() + + result = np.empty(shape, dtype=NS_DTYPE) + iresult = result.ravel().view(np.int64) + + if len(iresult) == 0: + result = arr.view(NS_DTYPE) + if copy: + result = result.copy() + return result + + unit = get_datetime64_unit(arr.flat[0]) + if unit == NPY_FR_ns: + if copy: + arr = arr.copy() + result = arr + else: + for i in range(n): + if ivalues[i] != NPY_NAT: + pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts) + iresult[i] = dtstruct_to_dt64(&dts) + check_dts_bounds(&dts) + else: + iresult[i] = NPY_NAT + + return result + + +def ensure_timedelta64ns(arr: ndarray, copy: bool=True): + """ + Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]' + + Parameters + ---------- + arr : ndarray + copy : boolean, default True + + Returns + ------- + result : ndarray with dtype timedelta64[ns] + + """ + return arr.astype(TD_DTYPE, copy=copy) + # TODO: check for overflows when going from a lower-resolution to nanos + + +@cython.boundscheck(False) +@cython.wraparound(False) +def datetime_to_datetime64(object[:] values): + """ + Convert ndarray of datetime-like objects to int64 array representing + nanosecond timestamps. + + Parameters + ---------- + values : ndarray[object] + + Returns + ------- + result : ndarray[int64_t] + inferred_tz : tzinfo or None + """ + cdef: + Py_ssize_t i, n = len(values) + object val, inferred_tz = None + int64_t[:] iresult + npy_datetimestruct dts + _TSObject _ts + bint found_naive = False + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + for i in range(n): + val = values[i] + if checknull_with_nat(val): + iresult[i] = NPY_NAT + elif PyDateTime_Check(val): + if val.tzinfo is not None: + if found_naive: + raise ValueError('Cannot mix tz-aware with ' + 'tz-naive values') + if inferred_tz is not None: + if not tz_compare(val.tzinfo, inferred_tz): + raise ValueError('Array must be all same time zone') + else: + inferred_tz = get_timezone(val.tzinfo) + + _ts = convert_datetime_to_tsobject(val, None) + iresult[i] = _ts.value + check_dts_bounds(&_ts.dts) + else: + found_naive = True + if inferred_tz is not None: + raise ValueError('Cannot mix tz-aware with ' + 'tz-naive values') + iresult[i] = pydatetime_to_dt64(val, &dts) + check_dts_bounds(&dts) + else: + raise TypeError(f'Unrecognized value type: {type(val)}') + + return result, inferred_tz + + +cdef inline maybe_datetimelike_to_i8(object val): + """ + Try to convert to a nanosecond timestamp. Fall back to returning the + input value. + + Parameters + ---------- + val : object + + Returns + ------- + val : int64 timestamp or original input + """ + cdef: + npy_datetimestruct dts + try: + return val.value + except AttributeError: + if is_datetime64_object(val): + return get_datetime64_value(val) + elif PyDateTime_Check(val): + return convert_datetime_to_tsobject(val, None).value + return val + + +# ---------------------------------------------------------------------- +# _TSObject Conversion + +# lightweight C object to hold datetime & int64 pair +cdef class _TSObject: + # cdef: + # npy_datetimestruct dts # npy_datetimestruct + # int64_t value # numpy dt64 + # object tzinfo + + @property + def value(self): + # This is needed in order for `value` to be accessible in lib.pyx + return self.value + + +cpdef int64_t pydt_to_i8(object pydt) except? -1: + """ + Convert to int64 representation compatible with numpy datetime64; converts + to UTC + + Parameters + ---------- + pydt : object + + Returns + ------- + i8value : np.int64 + """ + cdef: + _TSObject ts + + ts = convert_to_tsobject(pydt, None, None, 0, 0) + + return ts.value + + +cdef convert_to_tsobject(object ts, object tz, object unit, + bint dayfirst, bint yearfirst, int32_t nanos=0): + """ + Extract datetime and int64 from any of: + - np.int64 (with unit providing a possible modifier) + - np.datetime64 + - a float (with unit providing a possible modifier) + - python int or long object (with unit providing a possible modifier) + - iso8601 string object + - python datetime object + - another timestamp object + + Raises + ------ + OutOfBoundsDatetime : ts cannot be converted within implementation bounds + """ + cdef: + _TSObject obj + + if tz is not None: + tz = maybe_get_tz(tz) + + obj = _TSObject() + + if isinstance(ts, str): + return convert_str_to_tsobject(ts, tz, unit, dayfirst, yearfirst) + + if ts is None or ts is NaT: + obj.value = NPY_NAT + elif is_datetime64_object(ts): + obj.value = get_datetime64_nanos(ts) + if obj.value != NPY_NAT: + dt64_to_dtstruct(obj.value, &obj.dts) + elif is_integer_object(ts): + try: + ts = ts + except OverflowError: + # GH#26651 re-raise as OutOfBoundsDatetime + raise OutOfBoundsDatetime(ts) + if ts == NPY_NAT: + obj.value = NPY_NAT + else: + ts = ts * cast_from_unit(None, unit) + obj.value = ts + dt64_to_dtstruct(ts, &obj.dts) + elif is_float_object(ts): + if ts != ts or ts == NPY_NAT: + obj.value = NPY_NAT + else: + ts = cast_from_unit(ts, unit) + obj.value = ts + dt64_to_dtstruct(ts, &obj.dts) + elif PyDateTime_Check(ts): + return convert_datetime_to_tsobject(ts, tz, nanos) + elif PyDate_Check(ts): + # Keep the converter same as PyDateTime's + ts = datetime.combine(ts, datetime_time()) + return convert_datetime_to_tsobject(ts, tz) + elif getattr(ts, '_typ', None) == 'period': + raise ValueError("Cannot convert Period to Timestamp " + "unambiguously. Use to_timestamp") + else: + raise TypeError(f'Cannot convert input [{ts}] of type {type(ts)} to ' + f'Timestamp') + + if tz is not None: + localize_tso(obj, tz) + + if obj.value != NPY_NAT: + # check_overflows needs to run after localize_tso + check_dts_bounds(&obj.dts) + check_overflows(obj) + return obj + + +cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, + int32_t nanos=0): + """ + Convert a datetime (or Timestamp) input `ts`, along with optional timezone + object `tz` to a _TSObject. + + The optional argument `nanos` allows for cases where datetime input + needs to be supplemented with higher-precision information. + + Parameters + ---------- + ts : datetime or Timestamp + Value to be converted to _TSObject + tz : tzinfo or None + timezone for the timezone-aware output + nanos : int32_t, default is 0 + nanoseconds supplement the precision of the datetime input ts + + Returns + ------- + obj : _TSObject + """ + cdef: + _TSObject obj = _TSObject() + + if tz is not None: + tz = maybe_get_tz(tz) + + if ts.tzinfo is not None: + # Convert the current timezone to the passed timezone + ts = ts.astimezone(tz) + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + elif not is_utc(tz): + ts = _localize_pydatetime(ts, tz) + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + else: + # UTC + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = tz + else: + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = ts.tzinfo + + if obj.tzinfo is not None and not is_utc(obj.tzinfo): + offset = get_utcoffset(obj.tzinfo, ts) + obj.value -= int(offset.total_seconds() * 1e9) + + if isinstance(ts, _Timestamp): + obj.value += ts.nanosecond + obj.dts.ps = ts.nanosecond * 1000 + + if nanos: + obj.value += nanos + obj.dts.ps = nanos * 1000 + + check_dts_bounds(&obj.dts) + check_overflows(obj) + return obj + + +cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts, + int tzoffset, object tz=None): + """ + Convert a datetimestruct `dts`, along with initial timezone offset + `tzoffset` to a _TSObject (with timezone object `tz` - optional). + + Parameters + ---------- + dts: npy_datetimestruct + tzoffset: int + tz : tzinfo or None + timezone for the timezone-aware output. + + Returns + ------- + obj : _TSObject + """ + cdef: + _TSObject obj = _TSObject() + int64_t value # numpy dt64 + datetime dt + + value = dtstruct_to_dt64(&dts) + obj.dts = dts + obj.tzinfo = pytz.FixedOffset(tzoffset) + obj.value = tz_convert_single(value, obj.tzinfo, UTC) + if tz is None: + check_overflows(obj) + return obj + + # Keep the converter same as PyDateTime's + dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, + obj.dts.hour, obj.dts.min, obj.dts.sec, + obj.dts.us, obj.tzinfo) + obj = convert_datetime_to_tsobject( + dt, tz, nanos=obj.dts.ps // 1000) + return obj + + +cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, + bint dayfirst=False, + bint yearfirst=False): + """ + Convert a string input `ts`, along with optional timezone object`tz` + to a _TSObject. + + The optional arguments `dayfirst` and `yearfirst` are passed to the + dateutil parser. + + Parameters + ---------- + ts : str + Value to be converted to _TSObject + tz : tzinfo or None + timezone for the timezone-aware output + dayfirst : bool, default False + When parsing an ambiguous date string, interpret e.g. "3/4/1975" as + April 3, as opposed to the standard US interpretation March 4. + yearfirst : bool, default False + When parsing an ambiguous date string, interpret e.g. "01/05/09" + as "May 9, 2001", as opposed to the default "Jan 5, 2009" + + Returns + ------- + obj : _TSObject + """ + cdef: + npy_datetimestruct dts + int out_local = 0, out_tzoffset = 0 + bint do_parse_datetime_string = False + + if tz is not None: + tz = maybe_get_tz(tz) + + assert isinstance(ts, str) + + if len(ts) == 0 or ts in nat_strings: + ts = NaT + elif ts == 'now': + # Issue 9000, we short-circuit rather than going + # into np_datetime_strings which returns utc + ts = datetime.now(tz) + elif ts == 'today': + # Issue 9000, we short-circuit rather than going + # into np_datetime_strings which returns a normalized datetime + ts = datetime.now(tz) + # equiv: datetime.today().replace(tzinfo=tz) + else: + string_to_dts_failed = _string_to_dts( + ts, &dts, &out_local, + &out_tzoffset, False + ) + try: + if not string_to_dts_failed: + check_dts_bounds(&dts) + if out_local == 1: + return create_tsobject_tz_using_offset(dts, + out_tzoffset, tz) + else: + ts = dtstruct_to_dt64(&dts) + if tz is not None: + # shift for localize_tso + ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, + ambiguous='raise')[0] + + except OutOfBoundsDatetime: + # GH#19382 for just-barely-OutOfBounds falling back to dateutil + # parser will return incorrect result because it will ignore + # nanoseconds + raise + + except ValueError: + do_parse_datetime_string = True + + if string_to_dts_failed or do_parse_datetime_string: + try: + ts = parse_datetime_string(ts, dayfirst=dayfirst, + yearfirst=yearfirst) + except (ValueError, OverflowError): + raise ValueError("could not convert string to Timestamp") + + return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst) + + +cdef inline check_overflows(_TSObject obj): + """ + Check that we haven't silently overflowed in timezone conversion + + Parameters + ---------- + obj : _TSObject + + Returns + ------- + None + + Raises + ------ + OutOfBoundsDatetime + """ + # GH#12677 + if obj.dts.year == 1677: + if not (obj.value < 0): + raise OutOfBoundsDatetime + elif obj.dts.year == 2262: + if not (obj.value > 0): + raise OutOfBoundsDatetime + + +# ---------------------------------------------------------------------- +# Localization + +cdef inline void localize_tso(_TSObject obj, tzinfo tz): + """ + Given the UTC nanosecond timestamp in obj.value, find the wall-clock + representation of that timestamp in the given timezone. + + Parameters + ---------- + obj : _TSObject + tz : tzinfo + + Returns + ------- + None + + Notes + ----- + Sets obj.tzinfo inplace, alters obj.dts inplace. + """ + cdef: + ndarray[int64_t] trans + int64_t[:] deltas + int64_t local_val + Py_ssize_t pos + str typ + + assert obj.tzinfo is None + + if is_utc(tz): + pass + elif obj.value == NPY_NAT: + pass + elif is_tzlocal(tz): + local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False) + dt64_to_dtstruct(local_val, &obj.dts) + else: + # Adjust datetime64 timestamp, recompute datetimestruct + trans, deltas, typ = get_dst_info(tz) + + if is_fixed_offset(tz): + # static/fixed tzinfo; in this case we know len(deltas) == 1 + # This can come back with `typ` of either "fixed" or None + dt64_to_dtstruct(obj.value + deltas[0], &obj.dts) + elif typ == 'pytz': + # i.e. treat_tz_as_pytz(tz) + pos = trans.searchsorted(obj.value, side='right') - 1 + tz = tz._tzinfos[tz._transition_info[pos]] + dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) + elif typ == 'dateutil': + # i.e. treat_tz_as_dateutil(tz) + pos = trans.searchsorted(obj.value, side='right') - 1 + dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) + else: + # Note: as of 2018-07-17 all tzinfo objects that are _not_ + # either pytz or dateutil have is_fixed_offset(tz) == True, + # so this branch will never be reached. + pass + + obj.tzinfo = tz + + +cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): + """ + Take a datetime/Timestamp in UTC and localizes to timezone tz. + + NB: Unlike the public version, this treats datetime and Timestamp objects + identically, i.e. discards nanos from Timestamps. + It also assumes that the `tz` input is not None. + """ + try: + # datetime.replace with pytz may be incorrect result + return tz.localize(dt) + except AttributeError: + return dt.replace(tzinfo=tz) + + +cpdef inline datetime localize_pydatetime(datetime dt, object tz): + """ + Take a datetime/Timestamp in UTC and localizes to timezone tz. + + Parameters + ---------- + dt : datetime or Timestamp + tz : tzinfo, "UTC", or None + + Returns + ------- + localized : datetime or Timestamp + """ + if tz is None: + return dt + elif isinstance(dt, _Timestamp): + return dt.tz_localize(tz) + elif is_utc(tz): + return _localize_pydatetime(dt, tz) + try: + # datetime.replace with pytz may be incorrect result + return tz.localize(dt) + except AttributeError: + return dt.replace(tzinfo=tz) + + +# ---------------------------------------------------------------------- +# Normalization + + +def normalize_date(dt: object) -> datetime: + """ + Normalize datetime.datetime value to midnight. Returns datetime.date as a + datetime.datetime at midnight + + Parameters + ---------- + dt : date, datetime, or Timestamp + + Returns + ------- + normalized : datetime.datetime or Timestamp + + Raises + ------ + TypeError : if input is not datetime.date, datetime.datetime, or Timestamp + """ + if PyDateTime_Check(dt): + if isinstance(dt, _Timestamp): + return dt.replace(hour=0, minute=0, second=0, microsecond=0, + nanosecond=0) + else: + # regular datetime object + return dt.replace(hour=0, minute=0, second=0, microsecond=0) + # TODO: Make sure DST crossing is handled correctly here + elif PyDate_Check(dt): + return datetime(dt.year, dt.month, dt.day) + else: + raise TypeError(f'Unrecognized type: {type(dt)}') + + +@cython.wraparound(False) +@cython.boundscheck(False) +def normalize_i8_timestamps(int64_t[:] stamps, object tz): + """ + Normalize each of the (nanosecond) timezone aware timestamps in the given + array by rounding down to the beginning of the day (i.e. midnight). + This is midnight for timezone, `tz`. + + Parameters + ---------- + stamps : int64 ndarray + tz : tzinfo or None + + Returns + ------- + result : int64 ndarray of converted of normalized nanosecond timestamps + """ + cdef: + Py_ssize_t n = len(stamps) + int64_t[:] result = np.empty(n, dtype=np.int64) + + result = _normalize_local(stamps, tz) + + return result.base # .base to access underlying np.ndarray + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef int64_t[:] _normalize_local(int64_t[:] stamps, tzinfo tz): + """ + Normalize each of the (nanosecond) timestamps in the given array by + rounding down to the beginning of the day (i.e. midnight) for the + given timezone `tz`. + + Parameters + ---------- + stamps : int64 ndarray + tz : tzinfo + + Returns + ------- + result : int64 ndarray of converted of normalized nanosecond timestamps + """ + cdef: + Py_ssize_t i, n = len(stamps) + int64_t[:] result = np.empty(n, dtype=np.int64) + ndarray[int64_t] trans + int64_t[:] deltas + str typ + Py_ssize_t[:] pos + npy_datetimestruct dts + int64_t delta, local_val + + if is_tzlocal(tz): + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + local_val = _tz_convert_tzlocal_utc(stamps[i], tz, to_utc=False) + dt64_to_dtstruct(local_val, &dts) + result[i] = _normalized_stamp(&dts) + else: + # Adjust datetime64 timestamp, recompute datetimestruct + trans, deltas, typ = get_dst_info(tz) + + if typ not in ['pytz', 'dateutil']: + # static/fixed; in this case we know that len(delta) == 1 + delta = deltas[0] + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + dt64_to_dtstruct(stamps[i] + delta, &dts) + result[i] = _normalized_stamp(&dts) + else: + pos = trans.searchsorted(stamps, side='right') - 1 + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts) + result[i] = _normalized_stamp(&dts) + + return result + + +cdef inline int64_t _normalized_stamp(npy_datetimestruct *dts) nogil: + """ + Normalize the given datetimestruct to midnight, then convert to int64_t. + + Parameters + ---------- + *dts : pointer to npy_datetimestruct + + Returns + ------- + stamp : int64 + """ + dts.hour = 0 + dts.min = 0 + dts.sec = 0 + dts.us = 0 + dts.ps = 0 + return dtstruct_to_dt64(dts) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def is_date_array_normalized(int64_t[:] stamps, object tz=None): + """ + Check if all of the given (nanosecond) timestamps are normalized to + midnight, i.e. hour == minute == second == 0. If the optional timezone + `tz` is not None, then this is midnight for this timezone. + + Parameters + ---------- + stamps : int64 ndarray + tz : tzinfo or None + + Returns + ------- + is_normalized : bool True if all stamps are normalized + """ + cdef: + Py_ssize_t i, n = len(stamps) + ndarray[int64_t] trans + int64_t[:] deltas + intp_t[:] pos + npy_datetimestruct dts + int64_t local_val, delta + str typ + + if tz is None or is_utc(tz): + for i in range(n): + dt64_to_dtstruct(stamps[i], &dts) + if (dts.hour + dts.min + dts.sec + dts.us) > 0: + return False + elif is_tzlocal(tz): + for i in range(n): + local_val = _tz_convert_tzlocal_utc(stamps[i], tz, to_utc=False) + dt64_to_dtstruct(local_val, &dts) + if (dts.hour + dts.min + dts.sec + dts.us) > 0: + return False + else: + trans, deltas, typ = get_dst_info(tz) + + if typ not in ['pytz', 'dateutil']: + # static/fixed; in this case we know that len(delta) == 1 + delta = deltas[0] + for i in range(n): + # Adjust datetime64 timestamp, recompute datetimestruct + dt64_to_dtstruct(stamps[i] + delta, &dts) + if (dts.hour + dts.min + dts.sec + dts.us) > 0: + return False + + else: + pos = trans.searchsorted(stamps) - 1 + for i in range(n): + # Adjust datetime64 timestamp, recompute datetimestruct + dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts) + if (dts.hour + dts.min + dts.sec + dts.us) > 0: + return False + + return True diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx new file mode 100644 index 00000000..8bee7da6 --- /dev/null +++ b/pandas/_libs/tslibs/fields.pyx @@ -0,0 +1,667 @@ +""" +Functions for accessing attributes of Timestamp/datetime64/datetime-like +objects and arrays +""" + +import cython +from cython import Py_ssize_t + +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, int64_t, int32_t, int8_t +cnp.import_array() + +from pandas._libs.tslibs.ccalendar import ( + get_locale_names, MONTHS_FULL, DAYS_FULL, DAY_SECONDS) +from pandas._libs.tslibs.ccalendar cimport ( + get_days_in_month, is_leapyear, dayofweek, get_week_of_year, + get_day_of_year) +from pandas._libs.tslibs.np_datetime cimport ( + npy_datetimestruct, pandas_timedeltastruct, dt64_to_dtstruct, + td64_to_tdstruct) +from pandas._libs.tslibs.nattype cimport NPY_NAT + + +def get_time_micros(const int64_t[:] dtindex): + """ + Return the number of microseconds in the time component of a + nanosecond timestamp. + + Parameters + ---------- + dtindex : ndarray[int64_t] + + Returns + ------- + micros : ndarray[int64_t] + """ + cdef: + ndarray[int64_t] micros + + micros = np.mod(dtindex, DAY_SECONDS * 1000000000, dtype=np.int64) + micros //= 1000 + return micros + + +@cython.wraparound(False) +@cython.boundscheck(False) +def build_field_sarray(const int64_t[:] dtindex): + """ + Datetime as int64 representation to a structured array of fields + """ + cdef: + Py_ssize_t i, count = len(dtindex) + npy_datetimestruct dts + ndarray[int32_t] years, months, days, hours, minutes, seconds, mus + + sa_dtype = [('Y', 'i4'), # year + ('M', 'i4'), # month + ('D', 'i4'), # day + ('h', 'i4'), # hour + ('m', 'i4'), # min + ('s', 'i4'), # second + ('u', 'i4')] # microsecond + + out = np.empty(count, dtype=sa_dtype) + + years = out['Y'] + months = out['M'] + days = out['D'] + hours = out['h'] + minutes = out['m'] + seconds = out['s'] + mus = out['u'] + + for i in range(count): + dt64_to_dtstruct(dtindex[i], &dts) + years[i] = dts.year + months[i] = dts.month + days[i] = dts.day + hours[i] = dts.hour + minutes[i] = dts.min + seconds[i] = dts.sec + mus[i] = dts.us + + return out + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_date_name_field(const int64_t[:] dtindex, object field, object locale=None): + """ + Given a int64-based datetime index, return array of strings of date + name based on requested field (e.g. day_name) + """ + cdef: + Py_ssize_t i, count = len(dtindex) + ndarray[object] out, names + npy_datetimestruct dts + int dow + + out = np.empty(count, dtype=object) + + if field == 'day_name': + if locale is None: + names = np.array(DAYS_FULL, dtype=np.object_) + else: + names = np.array(get_locale_names('f_weekday', locale), + dtype=np.object_) + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = np.nan + continue + + dt64_to_dtstruct(dtindex[i], &dts) + dow = dayofweek(dts.year, dts.month, dts.day) + out[i] = names[dow].capitalize() + + elif field == 'month_name': + if locale is None: + names = np.array(MONTHS_FULL, dtype=np.object_) + else: + names = np.array(get_locale_names('f_month', locale), + dtype=np.object_) + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = np.nan + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = names[dts.month].capitalize() + + else: + raise ValueError(f"Field {field} not supported") + + return out + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_start_end_field(const int64_t[:] dtindex, object field, + object freqstr=None, int month_kw=12): + """ + Given an int64-based datetime index return array of indicators + of whether timestamps are at the start/end of the month/quarter/year + (defined by frequency). + """ + cdef: + Py_ssize_t i + int count = len(dtindex) + bint is_business = 0 + int end_month = 12 + int start_month = 1 + ndarray[int8_t] out + ndarray[int32_t, ndim=2] _month_offset + bint isleap + npy_datetimestruct dts + int mo_off, dom, doy, dow, ldom + + _month_offset = np.array( + [[0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365], + [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]], + dtype=np.int32) + + out = np.zeros(count, dtype='int8') + + if freqstr: + if freqstr == 'C': + raise ValueError(f"Custom business days is not supported by {field}") + is_business = freqstr[0] == 'B' + + # YearBegin(), BYearBegin() use month = starting month of year. + # QuarterBegin(), BQuarterBegin() use startingMonth = starting + # month of year. Other offsets use month, startingMonth as ending + # month of year. + + if (freqstr[0:2] in ['MS', 'QS', 'AS']) or ( + freqstr[1:3] in ['MS', 'QS', 'AS']): + end_month = 12 if month_kw == 1 else month_kw - 1 + start_month = month_kw + else: + end_month = month_kw + start_month = (end_month % 12) + 1 + else: + end_month = 12 + start_month = 1 + + if field == 'is_month_start': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + dom = dts.day + dow = dayofweek(dts.year, dts.month, dts.day) + + if (dom == 1 and dow < 5) or (dom <= 3 and dow == 0): + out[i] = 1 + + else: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + dom = dts.day + + if dom == 1: + out[i] = 1 + + elif field == 'is_month_end': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + dow = dayofweek(dts.year, dts.month, dts.day) + + if (ldom == doy and dow < 5) or ( + dow == 4 and (ldom - doy <= 2)): + out[i] = 1 + + else: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + + if ldom == doy: + out[i] = 1 + + elif field == 'is_quarter_start': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + dom = dts.day + dow = dayofweek(dts.year, dts.month, dts.day) + + if ((dts.month - start_month) % 3 == 0) and ( + (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): + out[i] = 1 + + else: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + dom = dts.day + + if ((dts.month - start_month) % 3 == 0) and dom == 1: + out[i] = 1 + + elif field == 'is_quarter_end': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + dow = dayofweek(dts.year, dts.month, dts.day) + + if ((dts.month - end_month) % 3 == 0) and ( + (ldom == doy and dow < 5) or ( + dow == 4 and (ldom - doy <= 2))): + out[i] = 1 + + else: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + + if ((dts.month - end_month) % 3 == 0) and (ldom == doy): + out[i] = 1 + + elif field == 'is_year_start': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + dom = dts.day + dow = dayofweek(dts.year, dts.month, dts.day) + + if (dts.month == start_month) and ( + (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)): + out[i] = 1 + + else: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + dom = dts.day + + if (dts.month == start_month) and dom == 1: + out[i] = 1 + + elif field == 'is_year_end': + if is_business: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + isleap = is_leapyear(dts.year) + dom = dts.day + mo_off = _month_offset[isleap, dts.month - 1] + doy = mo_off + dom + dow = dayofweek(dts.year, dts.month, dts.day) + ldom = _month_offset[isleap, dts.month] + + if (dts.month == end_month) and ( + (ldom == doy and dow < 5) or ( + dow == 4 and (ldom - doy <= 2))): + out[i] = 1 + + else: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = 0 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + isleap = is_leapyear(dts.year) + mo_off = _month_offset[isleap, dts.month - 1] + dom = dts.day + doy = mo_off + dom + ldom = _month_offset[isleap, dts.month] + + if (dts.month == end_month) and (ldom == doy): + out[i] = 1 + + else: + raise ValueError(f"Field {field} not supported") + + return out.view(bool) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_date_field(const int64_t[:] dtindex, object field): + """ + Given a int64-based datetime index, extract the year, month, etc., + field and return an array of these values. + """ + cdef: + Py_ssize_t i, count = len(dtindex) + ndarray[int32_t] out + npy_datetimestruct dts + + out = np.empty(count, dtype='i4') + + if field == 'Y': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.year + return out + + elif field == 'M': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.month + return out + + elif field == 'D': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.day + return out + + elif field == 'h': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.hour + return out + + elif field == 'm': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.min + return out + + elif field == 's': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.sec + return out + + elif field == 'us': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.us + return out + + elif field == 'ns': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.ps // 1000 + return out + elif field == 'doy': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = get_day_of_year(dts.year, dts.month, dts.day) + return out + + elif field == 'dow': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dayofweek(dts.year, dts.month, dts.day) + return out + + elif field == 'woy': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = get_week_of_year(dts.year, dts.month, dts.day) + return out + + elif field == 'q': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = dts.month + out[i] = ((out[i] - 1) // 3) + 1 + return out + + elif field == 'dim': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = -1 + continue + + dt64_to_dtstruct(dtindex[i], &dts) + out[i] = get_days_in_month(dts.year, dts.month) + return out + elif field == 'is_leap_year': + return isleapyear_arr(get_date_field(dtindex, 'Y')) + + raise ValueError(f"Field {field} not supported") + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_timedelta_field(const int64_t[:] tdindex, object field): + """ + Given a int64-based timedelta index, extract the days, hrs, sec., + field and return an array of these values. + """ + cdef: + Py_ssize_t i, count = len(tdindex) + ndarray[int32_t] out + pandas_timedeltastruct tds + + out = np.empty(count, dtype='i4') + + if field == 'days': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.days + return out + + elif field == 'h': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.hrs + return out + + elif field == 's': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.sec + return out + + elif field == 'seconds': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.seconds + return out + + elif field == 'ms': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.ms + return out + + elif field == 'microseconds': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.microseconds + return out + + elif field == 'us': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.us + return out + + elif field == 'ns': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.ns + return out + + elif field == 'nanoseconds': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.nanoseconds + return out + + raise ValueError(f"Field {field} not supported") + + +cpdef isleapyear_arr(ndarray years): + """vectorized version of isleapyear; NaT evaluates as False""" + cdef: + ndarray[int8_t] out + + out = np.zeros(len(years), dtype='int8') + out[np.logical_or(years % 400 == 0, + np.logical_and(years % 4 == 0, + years % 100 > 0))] = 1 + return out.view(bool) diff --git a/pandas/_libs/tslibs/frequencies.pxd b/pandas/_libs/tslibs/frequencies.pxd new file mode 100644 index 00000000..6ec67ce2 --- /dev/null +++ b/pandas/_libs/tslibs/frequencies.pxd @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +cpdef str get_rule_month(object source, str default=*) + +cpdef get_freq_code(freqstr) +cpdef object get_freq(object freq) +cpdef str get_base_alias(freqstr) +cpdef int get_to_timestamp_base(int base) +cpdef str get_freq_str(base, mult=*) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx new file mode 100644 index 00000000..d60f5cfd --- /dev/null +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -0,0 +1,515 @@ +import re + +cimport numpy as cnp +cnp.import_array() + +from pandas._libs.tslibs.util cimport is_integer_object + +from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS + +# ---------------------------------------------------------------------- +# Constants + +# hack to handle WOM-1MON +opattern = re.compile( + r'([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' +) + +INVALID_FREQ_ERR_MSG = "Invalid frequency: {0}" + +# --------------------------------------------------------------------- +# Period codes + + +class FreqGroup: + FR_ANN = 1000 + FR_QTR = 2000 + FR_MTH = 3000 + FR_WK = 4000 + FR_BUS = 5000 + FR_DAY = 6000 + FR_HR = 7000 + FR_MIN = 8000 + FR_SEC = 9000 + FR_MS = 10000 + FR_US = 11000 + FR_NS = 12000 + + +# period frequency constants corresponding to scikits timeseries +# originals +_period_code_map = { + # Annual freqs with various fiscal year ends. + # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005 + "A-DEC": 1000, # Annual - December year end + "A-JAN": 1001, # Annual - January year end + "A-FEB": 1002, # Annual - February year end + "A-MAR": 1003, # Annual - March year end + "A-APR": 1004, # Annual - April year end + "A-MAY": 1005, # Annual - May year end + "A-JUN": 1006, # Annual - June year end + "A-JUL": 1007, # Annual - July year end + "A-AUG": 1008, # Annual - August year end + "A-SEP": 1009, # Annual - September year end + "A-OCT": 1010, # Annual - October year end + "A-NOV": 1011, # Annual - November year end + + # Quarterly frequencies with various fiscal year ends. + # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005 + "Q-DEC": 2000, # Quarterly - December year end + "Q-JAN": 2001, # Quarterly - January year end + "Q-FEB": 2002, # Quarterly - February year end + "Q-MAR": 2003, # Quarterly - March year end + "Q-APR": 2004, # Quarterly - April year end + "Q-MAY": 2005, # Quarterly - May year end + "Q-JUN": 2006, # Quarterly - June year end + "Q-JUL": 2007, # Quarterly - July year end + "Q-AUG": 2008, # Quarterly - August year end + "Q-SEP": 2009, # Quarterly - September year end + "Q-OCT": 2010, # Quarterly - October year end + "Q-NOV": 2011, # Quarterly - November year end + + "M": 3000, # Monthly + + "W-SUN": 4000, # Weekly - Sunday end of week + "W-MON": 4001, # Weekly - Monday end of week + "W-TUE": 4002, # Weekly - Tuesday end of week + "W-WED": 4003, # Weekly - Wednesday end of week + "W-THU": 4004, # Weekly - Thursday end of week + "W-FRI": 4005, # Weekly - Friday end of week + "W-SAT": 4006, # Weekly - Saturday end of week + + "B": 5000, # Business days + "D": 6000, # Daily + "H": 7000, # Hourly + "T": 8000, # Minutely + "S": 9000, # Secondly + "L": 10000, # Millisecondly + "U": 11000, # Microsecondly + "N": 12000} # Nanosecondly + + +_reverse_period_code_map = { + _period_code_map[key]: key for key in _period_code_map} + +# Yearly aliases; careful not to put these in _reverse_period_code_map +_period_code_map.update({'Y' + key[1:]: _period_code_map[key] + for key in _period_code_map + if key.startswith('A-')}) + +_period_code_map.update({ + "Q": 2000, # Quarterly - December year end (default quarterly) + "A": 1000, # Annual + "W": 4000, # Weekly + "C": 5000}) # Custom Business Day + +_lite_rule_alias = { + 'W': 'W-SUN', + 'Q': 'Q-DEC', + + 'A': 'A-DEC', # YearEnd(month=12), + 'Y': 'A-DEC', + 'AS': 'AS-JAN', # YearBegin(month=1), + 'YS': 'AS-JAN', + 'BA': 'BA-DEC', # BYearEnd(month=12), + 'BY': 'BA-DEC', + 'BAS': 'BAS-JAN', # BYearBegin(month=1), + 'BYS': 'BAS-JAN', + + 'Min': 'T', + 'min': 'T', + 'ms': 'L', + 'us': 'U', + 'ns': 'N'} + +_dont_uppercase = {'MS', 'ms'} + +# ---------------------------------------------------------------------- + +cpdef get_freq_code(freqstr): + """ + Return freq str or tuple to freq code and stride (mult) + + Parameters + ---------- + freqstr : str or tuple + + Returns + ------- + return : tuple of base frequency code and stride (mult) + + Raises + ------ + TypeError : if passed a tuple witth incorrect types + + Examples + -------- + >>> get_freq_code('3D') + (6000, 3) + + >>> get_freq_code('D') + (6000, 1) + + >>> get_freq_code(('D', 3)) + (6000, 3) + """ + if getattr(freqstr, '_typ', None) == 'dateoffset': + freqstr = (freqstr.rule_code, freqstr.n) + + if isinstance(freqstr, tuple): + if is_integer_object(freqstr[0]) and is_integer_object(freqstr[1]): + # e.g., freqstr = (2000, 1) + return freqstr + elif is_integer_object(freqstr[0]): + # Note: passing freqstr[1] below will raise TypeError if that + # is not a str + code = _period_str_to_code(freqstr[1]) + stride = freqstr[0] + return code, stride + else: + # e.g., freqstr = ('T', 5) + code = _period_str_to_code(freqstr[0]) + stride = freqstr[1] + return code, stride + + if is_integer_object(freqstr): + return freqstr, 1 + + base, stride = _base_and_stride(freqstr) + code = _period_str_to_code(base) + + return code, stride + + +cpdef _base_and_stride(str freqstr): + """ + Return base freq and stride info from string representation + + Returns + ------- + base : str + stride : int + + Examples + -------- + _freq_and_stride('5Min') -> 'Min', 5 + """ + groups = opattern.match(freqstr) + + if not groups: + raise ValueError(f"Could not evaluate {freqstr}") + + stride = groups.group(1) + + if len(stride): + stride = int(stride) + else: + stride = 1 + + base = groups.group(2) + + return base, stride + + +cpdef _period_str_to_code(str freqstr): + freqstr = _lite_rule_alias.get(freqstr, freqstr) + + if freqstr not in _dont_uppercase: + lower = freqstr.lower() + freqstr = _lite_rule_alias.get(lower, freqstr) + + if freqstr not in _dont_uppercase: + freqstr = freqstr.upper() + try: + return _period_code_map[freqstr] + except KeyError: + raise ValueError(INVALID_FREQ_ERR_MSG.format(freqstr)) + + +cpdef str get_freq_str(base, mult=1): + """ + Return the summary string associated with this offset code, possibly + adjusted by a multiplier. + + Parameters + ---------- + base : int (member of FreqGroup) + + Returns + ------- + freq_str : str + + Examples + -------- + >>> get_freq_str(1000) + 'A-DEC' + + >>> get_freq_str(2000, 2) + '2Q-DEC' + + >>> get_freq_str("foo") + """ + code = _reverse_period_code_map.get(base) + if mult == 1: + return code + return str(mult) + code + + +cpdef str get_base_alias(freqstr): + """ + Returns the base frequency alias, e.g., '5D' -> 'D' + + Parameters + ---------- + freqstr : str + + Returns + ------- + base_alias : str + """ + return _base_and_stride(freqstr)[0] + + +cpdef int get_to_timestamp_base(int base): + """ + Return frequency code group used for base of to_timestamp against + frequency code. + + Parameters + ---------- + base : int (member of FreqGroup) + + Returns + ------- + base : int + + Examples + -------- + # Return day freq code against longer freq than day + >>> get_to_timestamp_base(get_freq_code('D')[0]) + 6000 + >>> get_to_timestamp_base(get_freq_code('W')[0]) + 6000 + >>> get_to_timestamp_base(get_freq_code('M')[0]) + 6000 + + # Return second freq code against hour between second + >>> get_to_timestamp_base(get_freq_code('H')[0]) + 9000 + >>> get_to_timestamp_base(get_freq_code('S')[0]) + 9000 + """ + if base < FreqGroup.FR_BUS: + return FreqGroup.FR_DAY + elif FreqGroup.FR_HR <= base <= FreqGroup.FR_SEC: + return FreqGroup.FR_SEC + return base + + +cpdef object get_freq(object freq): + """ + Return frequency code of given frequency str. + If input is not string, return input as it is. + + Examples + -------- + >>> get_freq('A') + 1000 + + >>> get_freq('3A') + 1000 + """ + if isinstance(freq, str): + base, mult = get_freq_code(freq) + freq = base + return freq + + +# ---------------------------------------------------------------------- +# Frequency comparison + +def is_subperiod(source, target) -> bint: + """ + Returns True if downsampling is possible between source and target + frequencies + + Parameters + ---------- + source : string or DateOffset + Frequency converting from + target : string or DateOffset + Frequency converting to + + Returns + ------- + is_subperiod : boolean + """ + + if target is None or source is None: + return False + source = _maybe_coerce_freq(source) + target = _maybe_coerce_freq(target) + + if _is_annual(target): + if _is_quarterly(source): + return _quarter_months_conform(get_rule_month(source), + get_rule_month(target)) + return source in {'D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N'} + elif _is_quarterly(target): + return source in {'D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N'} + elif _is_monthly(target): + return source in {'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'} + elif _is_weekly(target): + return source in {target, 'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'} + elif target == 'B': + return source in {'B', 'H', 'T', 'S', 'L', 'U', 'N'} + elif target == 'C': + return source in {'C', 'H', 'T', 'S', 'L', 'U', 'N'} + elif target == 'D': + return source in {'D', 'H', 'T', 'S', 'L', 'U', 'N'} + elif target == 'H': + return source in {'H', 'T', 'S', 'L', 'U', 'N'} + elif target == 'T': + return source in {'T', 'S', 'L', 'U', 'N'} + elif target == 'S': + return source in {'S', 'L', 'U', 'N'} + elif target == 'L': + return source in {'L', 'U', 'N'} + elif target == 'U': + return source in {'U', 'N'} + elif target == 'N': + return source in {'N'} + + +def is_superperiod(source, target) -> bint: + """ + Returns True if upsampling is possible between source and target + frequencies + + Parameters + ---------- + source : string + Frequency converting from + target : string + Frequency converting to + + Returns + ------- + is_superperiod : boolean + """ + if target is None or source is None: + return False + source = _maybe_coerce_freq(source) + target = _maybe_coerce_freq(target) + + if _is_annual(source): + if _is_annual(target): + return get_rule_month(source) == get_rule_month(target) + + if _is_quarterly(target): + smonth = get_rule_month(source) + tmonth = get_rule_month(target) + return _quarter_months_conform(smonth, tmonth) + return target in {'D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N'} + elif _is_quarterly(source): + return target in {'D', 'C', 'B', 'M', 'H', 'T', 'S', 'L', 'U', 'N'} + elif _is_monthly(source): + return target in {'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'} + elif _is_weekly(source): + return target in {source, 'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'} + elif source == 'B': + return target in {'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'} + elif source == 'C': + return target in {'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'} + elif source == 'D': + return target in {'D', 'C', 'B', 'H', 'T', 'S', 'L', 'U', 'N'} + elif source == 'H': + return target in {'H', 'T', 'S', 'L', 'U', 'N'} + elif source == 'T': + return target in {'T', 'S', 'L', 'U', 'N'} + elif source == 'S': + return target in {'S', 'L', 'U', 'N'} + elif source == 'L': + return target in {'L', 'U', 'N'} + elif source == 'U': + return target in {'U', 'N'} + elif source == 'N': + return target in {'N'} + + +cdef str _maybe_coerce_freq(code): + """ we might need to coerce a code to a rule_code + and uppercase it + + Parameters + ---------- + source : string or DateOffset + Frequency converting from + + Returns + ------- + code : string + """ + assert code is not None + if getattr(code, '_typ', None) == 'dateoffset': + # i.e. isinstance(code, ABCDateOffset): + code = code.rule_code + return code.upper() + + +cdef bint _quarter_months_conform(str source, str target): + snum = MONTH_NUMBERS[source] + tnum = MONTH_NUMBERS[target] + return snum % 3 == tnum % 3 + + +cdef bint _is_annual(str rule): + rule = rule.upper() + return rule == 'A' or rule.startswith('A-') + + +cdef bint _is_quarterly(str rule): + rule = rule.upper() + return rule == 'Q' or rule.startswith('Q-') or rule.startswith('BQ') + + +cdef bint _is_monthly(str rule): + rule = rule.upper() + return rule == 'M' or rule == 'BM' + + +cdef bint _is_weekly(str rule): + rule = rule.upper() + return rule == 'W' or rule.startswith('W-') + + +# ---------------------------------------------------------------------- + +cpdef str get_rule_month(object source, str default="DEC"): + """ + Return starting month of given freq, default is December. + + Parameters + ---------- + source : object + default : str, default "DEC" + + Returns + ------- + rule_month: str + + Examples + -------- + >>> get_rule_month('D') + 'DEC' + + >>> get_rule_month('A-JAN') + 'JAN' + """ + if hasattr(source, 'freqstr'): + source = source.freqstr + source = source.upper() + if '-' not in source: + return default + else: + return source.split('-')[1] diff --git a/pandas/_libs/tslibs/nattype.pxd b/pandas/_libs/tslibs/nattype.pxd new file mode 100644 index 00000000..dae5bdc3 --- /dev/null +++ b/pandas/_libs/tslibs/nattype.pxd @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +from cpython.datetime cimport datetime + +from numpy cimport int64_t +cdef int64_t NPY_NAT + +cdef bint _nat_scalar_rules[6] + + +cdef class _NaT(datetime): + cdef readonly: + int64_t value + object freq + +cdef _NaT c_NaT + + +cdef bint checknull_with_nat(object val) +cpdef bint is_null_datetimelike(object val, bint inat_is_null=*) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx new file mode 100644 index 00000000..fe447f57 --- /dev/null +++ b/pandas/_libs/tslibs/nattype.pyx @@ -0,0 +1,800 @@ +from cpython.object cimport ( + PyObject_RichCompare, + Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE) + +from cpython.datetime cimport (datetime, + PyDateTime_Check, PyDelta_Check, + PyDateTime_IMPORT) + +from cpython.version cimport PY_MINOR_VERSION + +PyDateTime_IMPORT + +import numpy as np +cimport numpy as cnp +from numpy cimport int64_t +cnp.import_array() + +from pandas._libs.tslibs.np_datetime cimport ( + get_datetime64_value, get_timedelta64_value) +cimport pandas._libs.tslibs.util as util +from pandas._libs.tslibs.util cimport ( + get_nat, is_integer_object, is_float_object, is_datetime64_object, + is_timedelta64_object) + +from pandas._libs.missing cimport C_NA + + +# ---------------------------------------------------------------------- +# Constants +nat_strings = {'NaT', 'nat', 'NAT', 'nan', 'NaN', 'NAN'} + +cdef int64_t NPY_NAT = get_nat() +iNaT = NPY_NAT # python-visible constant + +cdef bint _nat_scalar_rules[6] +_nat_scalar_rules[Py_EQ] = False +_nat_scalar_rules[Py_NE] = True +_nat_scalar_rules[Py_LT] = False +_nat_scalar_rules[Py_LE] = False +_nat_scalar_rules[Py_GT] = False +_nat_scalar_rules[Py_GE] = False + +# ---------------------------------------------------------------------- + + +def _make_nan_func(func_name, doc): + def f(*args, **kwargs): + return np.nan + f.__name__ = func_name + f.__doc__ = doc + return f + + +def _make_nat_func(func_name, doc): + def f(*args, **kwargs): + return c_NaT + f.__name__ = func_name + f.__doc__ = doc + return f + + +def _make_error_func(func_name, cls): + def f(*args, **kwargs): + raise ValueError("NaTType does not support " + func_name) + + f.__name__ = func_name + if isinstance(cls, str): + # passed the literal docstring directly + f.__doc__ = cls + elif cls is not None: + f.__doc__ = getattr(cls, func_name).__doc__ + return f + + +cdef _nat_divide_op(self, other): + if PyDelta_Check(other) or is_timedelta64_object(other) or other is c_NaT: + return np.nan + if is_integer_object(other) or is_float_object(other): + return c_NaT + return NotImplemented + + +cdef _nat_rdivide_op(self, other): + if PyDelta_Check(other): + return np.nan + return NotImplemented + + +def __nat_unpickle(*args): + # return constant defined in the module + return c_NaT + +# ---------------------------------------------------------------------- + + +cdef class _NaT(datetime): + # cdef readonly: + # int64_t value + # object freq + + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + + def __richcmp__(_NaT self, object other, int op): + cdef: + int ndim = getattr(other, 'ndim', -1) + + if ndim == -1: + return _nat_scalar_rules[op] + + elif util.is_array(other): + result = np.empty(other.shape, dtype=np.bool_) + result.fill(_nat_scalar_rules[op]) + return result + + elif ndim == 0: + if is_datetime64_object(other): + return _nat_scalar_rules[op] + else: + raise TypeError(f'Cannot compare type {type(self).__name__} ' + f'with type {type(other).__name__}') + + # Note: instead of passing "other, self, _reverse_ops[op]", we observe + # that `_nat_scalar_rules` is invariant under `_reverse_ops`, + # rendering it unnecessary. + return PyObject_RichCompare(other, self, op) + + def __add__(self, other): + if self is not c_NaT: + # cython __radd__ semantics + self, other = other, self + + if PyDateTime_Check(other): + return c_NaT + elif PyDelta_Check(other): + return c_NaT + elif is_datetime64_object(other) or is_timedelta64_object(other): + return c_NaT + elif hasattr(other, 'delta'): + # Timedelta, offsets.Tick, offsets.Week + return c_NaT + + elif is_integer_object(other) or util.is_period_object(other): + # For Period compat + # TODO: the integer behavior is deprecated, remove it + return c_NaT + + elif util.is_array(other): + if other.dtype.kind in 'mM': + # If we are adding to datetime64, we treat NaT as timedelta + # Either way, result dtype is datetime64 + result = np.empty(other.shape, dtype="datetime64[ns]") + result.fill("NaT") + return result + raise TypeError(f"Cannot add NaT to ndarray with dtype {other.dtype}") + + return NotImplemented + + def __sub__(self, other): + # Duplicate some logic from _Timestamp.__sub__ to avoid needing + # to subclass; allows us to @final(_Timestamp.__sub__) + cdef: + bint is_rsub = False + + if self is not c_NaT: + # cython __rsub__ semantics + self, other = other, self + is_rsub = True + + if PyDateTime_Check(other): + return c_NaT + elif PyDelta_Check(other): + return c_NaT + elif is_datetime64_object(other) or is_timedelta64_object(other): + return c_NaT + elif hasattr(other, 'delta'): + # offsets.Tick, offsets.Week + return c_NaT + + elif is_integer_object(other) or util.is_period_object(other): + # For Period compat + # TODO: the integer behavior is deprecated, remove it + return c_NaT + + elif util.is_array(other): + if other.dtype.kind == 'm': + if not is_rsub: + # NaT - timedelta64 we treat NaT as datetime64, so result + # is datetime64 + result = np.empty(other.shape, dtype="datetime64[ns]") + result.fill("NaT") + return result + + # timedelta64 - NaT we have to treat NaT as timedelta64 + # for this to be meaningful, and the result is timedelta64 + result = np.empty(other.shape, dtype="timedelta64[ns]") + result.fill("NaT") + return result + + elif other.dtype.kind == 'M': + # We treat NaT as a datetime, so regardless of whether this is + # NaT - other or other - NaT, the result is timedelta64 + result = np.empty(other.shape, dtype="timedelta64[ns]") + result.fill("NaT") + return result + + raise TypeError(f"Cannot subtract NaT from ndarray with " + f"dtype {other.dtype}") + + return NotImplemented + + def __pos__(self): + return NaT + + def __neg__(self): + return NaT + + def __truediv__(self, other): + return _nat_divide_op(self, other) + + def __floordiv__(self, other): + return _nat_divide_op(self, other) + + def __mul__(self, other): + if is_integer_object(other) or is_float_object(other): + return NaT + return NotImplemented + + @property + def asm8(self) -> np.datetime64: + return np.datetime64(NPY_NAT, 'ns') + + def to_datetime64(self) -> np.datetime64: + """ + Return a numpy.datetime64 object with 'ns' precision. + """ + return np.datetime64('NaT', 'ns') + + def to_numpy(self, dtype=None, copy=False) -> np.datetime64: + """ + Convert the Timestamp to a NumPy datetime64. + + .. versionadded:: 0.25.0 + + This is an alias method for `Timestamp.to_datetime64()`. The dtype and + copy parameters are available here only for compatibility. Their values + will not affect the return value. + + Returns + ------- + numpy.datetime64 + + See Also + -------- + DatetimeIndex.to_numpy : Similar method for DatetimeIndex. + """ + return self.to_datetime64() + + def __repr__(self) -> str: + return 'NaT' + + def __str__(self) -> str: + return 'NaT' + + def isoformat(self, sep='T') -> str: + # This allows Timestamp(ts.isoformat()) to always correctly roundtrip. + return 'NaT' + + def __hash__(self): + return NPY_NAT + + def __int__(self): + return NPY_NAT + + def __long__(self): + return NPY_NAT + + def total_seconds(self): + """ + Total duration of timedelta in seconds (to ns precision). + """ + # GH#10939 + return np.nan + + @property + def is_leap_year(self): + return False + + @property + def is_month_start(self): + return False + + @property + def is_quarter_start(self): + return False + + @property + def is_year_start(self): + return False + + @property + def is_month_end(self): + return False + + @property + def is_quarter_end(self): + return False + + @property + def is_year_end(self): + return False + + +class NaTType(_NaT): + """(N)ot-(A)-(T)ime, the time equivalent of NaN""" + + def __new__(cls): + cdef _NaT base + + base = _NaT.__new__(cls, 1, 1, 1) + base.value = NPY_NAT + base.freq = None + + return base + + def __reduce_ex__(self, protocol): + # python 3.6 compat + # http://bugs.python.org/issue28730 + # now __reduce_ex__ is defined and higher priority than __reduce__ + return self.__reduce__() + + def __reduce__(self): + return (__nat_unpickle, (None, )) + + def __rdiv__(self, other): + return _nat_rdivide_op(self, other) + + def __rtruediv__(self, other): + return _nat_rdivide_op(self, other) + + def __rfloordiv__(self, other): + return _nat_rdivide_op(self, other) + + def __rmul__(self, other): + if is_integer_object(other) or is_float_object(other): + return c_NaT + return NotImplemented + + # ---------------------------------------------------------------------- + # inject the Timestamp field properties + # these by definition return np.nan + + year = property(fget=lambda self: np.nan) + quarter = property(fget=lambda self: np.nan) + month = property(fget=lambda self: np.nan) + day = property(fget=lambda self: np.nan) + hour = property(fget=lambda self: np.nan) + minute = property(fget=lambda self: np.nan) + second = property(fget=lambda self: np.nan) + millisecond = property(fget=lambda self: np.nan) + microsecond = property(fget=lambda self: np.nan) + nanosecond = property(fget=lambda self: np.nan) + + week = property(fget=lambda self: np.nan) + dayofyear = property(fget=lambda self: np.nan) + weekofyear = property(fget=lambda self: np.nan) + days_in_month = property(fget=lambda self: np.nan) + daysinmonth = property(fget=lambda self: np.nan) + dayofweek = property(fget=lambda self: np.nan) + + # inject Timedelta properties + days = property(fget=lambda self: np.nan) + seconds = property(fget=lambda self: np.nan) + microseconds = property(fget=lambda self: np.nan) + nanoseconds = property(fget=lambda self: np.nan) + + # inject pd.Period properties + qyear = property(fget=lambda self: np.nan) + + # ---------------------------------------------------------------------- + # GH9513 NaT methods (except to_datetime64) to raise, return np.nan, or + # return NaT create functions that raise, for binding to NaTType + # These are the ones that can get their docstrings from datetime. + + # nan methods + weekday = _make_nan_func('weekday', datetime.weekday.__doc__) + isoweekday = _make_nan_func('isoweekday', datetime.isoweekday.__doc__) + month_name = _make_nan_func('month_name', # noqa:E128 + """ + Return the month name of the Timestamp with specified locale. + + Parameters + ---------- + locale : string, default None (English locale) + Locale determining the language in which to return the month name. + + Returns + ------- + month_name : string + + .. versionadded:: 0.23.0 + """) + day_name = _make_nan_func('day_name', # noqa:E128 + """ + Return the day name of the Timestamp with specified locale. + + Parameters + ---------- + locale : string, default None (English locale) + Locale determining the language in which to return the day name. + + Returns + ------- + day_name : string + + .. versionadded:: 0.23.0 + """) + # _nat_methods + date = _make_nat_func('date', datetime.date.__doc__) + + utctimetuple = _make_error_func('utctimetuple', datetime) + timetz = _make_error_func('timetz', datetime) + timetuple = _make_error_func('timetuple', datetime) + strftime = _make_error_func('strftime', datetime) + isocalendar = _make_error_func('isocalendar', datetime) + dst = _make_error_func('dst', datetime) + ctime = _make_error_func('ctime', datetime) + time = _make_error_func('time', datetime) + toordinal = _make_error_func('toordinal', datetime) + tzname = _make_error_func('tzname', datetime) + utcoffset = _make_error_func('utcoffset', datetime) + + # "fromisocalendar" was introduced in 3.8 + if PY_MINOR_VERSION >= 8: + fromisocalendar = _make_error_func('fromisocalendar', datetime) + + # ---------------------------------------------------------------------- + # The remaining methods have docstrings copy/pasted from the analogous + # Timestamp methods. + + strptime = _make_error_func('strptime', # noqa:E128 + """ + Timestamp.strptime(string, format) + + Function is not implemented. Use pd.to_datetime(). + """ + ) + + utcfromtimestamp = _make_error_func('utcfromtimestamp', # noqa:E128 + """ + Timestamp.utcfromtimestamp(ts) + + Construct a naive UTC datetime from a POSIX timestamp. + """ + ) + fromtimestamp = _make_error_func('fromtimestamp', # noqa:E128 + """ + Timestamp.fromtimestamp(ts) + + timestamp[, tz] -> tz's local time from POSIX timestamp. + """ + ) + combine = _make_error_func('combine', # noqa:E128 + """ + Timestamp.combine(date, time) + + date, time -> datetime with same date and time fields. + """ + ) + utcnow = _make_error_func('utcnow', # noqa:E128 + """ + Timestamp.utcnow() + + Return a new Timestamp representing UTC day and time. + """ + ) + + timestamp = _make_error_func('timestamp', # noqa:E128 + """Return POSIX timestamp as float.""") + + # GH9513 NaT methods (except to_datetime64) to raise, return np.nan, or + # return NaT create functions that raise, for binding to NaTType + astimezone = _make_error_func('astimezone', # noqa:E128 + """ + Convert tz-aware Timestamp to another time zone. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding UTC time. + + Returns + ------- + converted : Timestamp + + Raises + ------ + TypeError + If Timestamp is tz-naive. + """) + fromordinal = _make_error_func('fromordinal', # noqa:E128 + """ + Timestamp.fromordinal(ordinal, freq=None, tz=None) + + Passed an ordinal, translate and convert to a ts. + Note: by definition there cannot be any tz info on the ordinal itself. + + Parameters + ---------- + ordinal : int + Date corresponding to a proleptic Gregorian ordinal. + freq : str, DateOffset + Offset to apply to the Timestamp. + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for the Timestamp. + """) + + # _nat_methods + to_pydatetime = _make_nat_func('to_pydatetime', # noqa:E128 + """ + Convert a Timestamp object to a native Python datetime object. + + If warn=True, issue a warning if nanoseconds is nonzero. + """) + + now = _make_nat_func('now', # noqa:E128 + """ + Timestamp.now(tz=None) + + Return new Timestamp object representing current time local to + tz. + + Parameters + ---------- + tz : str or timezone object, default None + Timezone to localize to. + """) + today = _make_nat_func('today', # noqa:E128 + """ + Timestamp.today(cls, tz=None) + + Return the current time in the local timezone. This differs + from datetime.today() in that it can be localized to a + passed timezone. + + Parameters + ---------- + tz : str or timezone object, default None + Timezone to localize to. + """) + round = _make_nat_func('round', # noqa:E128 + """ + Round the Timestamp to the specified resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the rounding resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + .. versionadded:: 0.24.0 + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + .. versionadded:: 0.24.0 + + Returns + ------- + a new Timestamp rounded to the given resolution of `freq` + + Raises + ------ + ValueError if the freq cannot be converted + """) + floor = _make_nat_func('floor', # noqa:E128 + """ + return a new Timestamp floored to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the flooring resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + .. versionadded:: 0.24.0 + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + .. versionadded:: 0.24.0 + + Raises + ------ + ValueError if the freq cannot be converted. + """) + ceil = _make_nat_func('ceil', # noqa:E128 + """ + return a new Timestamp ceiled to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the ceiling resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + .. versionadded:: 0.24.0 + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + .. versionadded:: 0.24.0 + + Raises + ------ + ValueError if the freq cannot be converted. + """) + + tz_convert = _make_nat_func('tz_convert', # noqa:E128 + """ + Convert tz-aware Timestamp to another time zone. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding UTC time. + + Returns + ------- + converted : Timestamp + + Raises + ------ + TypeError + If Timestamp is tz-naive. + """) + tz_localize = _make_nat_func('tz_localize', # noqa:E128 + """ + Convert naive Timestamp to local time zone, or remove + timezone from tz-aware Timestamp. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding local time. + + ambiguous : bool, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ +default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + The behavior is as follows: + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + .. versionadded:: 0.24.0 + + Returns + ------- + localized : Timestamp + + Raises + ------ + TypeError + If the Timestamp is tz-aware and tz is not None. + """) + replace = _make_nat_func('replace', # noqa:E128 + """ + implements datetime.replace, handles nanoseconds. + + Parameters + ---------- + year : int, optional + month : int, optional + day : int, optional + hour : int, optional + minute : int, optional + second : int, optional + microsecond : int, optional + nanosecond : int, optional + tzinfo : tz-convertible, optional + fold : int, optional, default is 0 + + Returns + ------- + Timestamp with fields replaced + """) + + +c_NaT = NaTType() # C-visible +NaT = c_NaT # Python-visible + + +# ---------------------------------------------------------------------- + +cdef inline bint checknull_with_nat(object val): + """ utility to check if a value is a nat or not """ + return val is None or util.is_nan(val) or val is c_NaT or val is C_NA + + +cpdef bint is_null_datetimelike(object val, bint inat_is_null=True): + """ + Determine if we have a null for a timedelta/datetime (or integer versions) + + Parameters + ---------- + val : object + inat_is_null : bool, default True + Whether to treat integer iNaT value as null + + Returns + ------- + null_datetimelike : bool + """ + if val is None: + return True + elif val is c_NaT: + return True + elif util.is_float_object(val) or util.is_complex_object(val): + return val != val + elif util.is_timedelta64_object(val): + return get_timedelta64_value(val) == NPY_NAT + elif util.is_datetime64_object(val): + return get_datetime64_value(val) == NPY_NAT + elif inat_is_null and util.is_integer_object(val): + return val == NPY_NAT + return False diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd new file mode 100644 index 00000000..ebedee79 --- /dev/null +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- + +from cpython.datetime cimport date, datetime + +from numpy cimport int64_t, int32_t + +cdef extern from "numpy/ndarrayobject.h": + ctypedef int64_t npy_timedelta + ctypedef int64_t npy_datetime + +cdef extern from "numpy/ndarraytypes.h": + ctypedef struct PyArray_DatetimeMetaData: + NPY_DATETIMEUNIT base + int64_t num + +cdef extern from "numpy/arrayscalars.h": + ctypedef struct PyDatetimeScalarObject: + # PyObject_HEAD + npy_datetime obval + PyArray_DatetimeMetaData obmeta + + ctypedef struct PyTimedeltaScalarObject: + # PyObject_HEAD + npy_timedelta obval + PyArray_DatetimeMetaData obmeta + +cdef extern from "numpy/ndarraytypes.h": + ctypedef struct npy_datetimestruct: + int64_t year + int32_t month, day, hour, min, sec, us, ps, as + + ctypedef enum NPY_DATETIMEUNIT: + NPY_FR_Y + NPY_FR_M + NPY_FR_W + NPY_FR_D + NPY_FR_B + NPY_FR_h + NPY_FR_m + NPY_FR_s + NPY_FR_ms + NPY_FR_us + NPY_FR_ns + NPY_FR_ps + NPY_FR_fs + NPY_FR_as + +cdef extern from "src/datetime/np_datetime.h": + ctypedef struct pandas_timedeltastruct: + int64_t days + int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds + + void pandas_datetime_to_datetimestruct(npy_datetime val, + NPY_DATETIMEUNIT fr, + npy_datetimestruct *result) nogil + + +cdef int reverse_ops[6] + +cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 + +cdef check_dts_bounds(npy_datetimestruct *dts) + +cdef int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil +cdef void dt64_to_dtstruct(int64_t dt64, npy_datetimestruct* out) nogil +cdef void td64_to_tdstruct(int64_t td64, pandas_timedeltastruct* out) nogil + +cdef int64_t pydatetime_to_dt64(datetime val, npy_datetimestruct *dts) +cdef int64_t pydate_to_dt64(date val, npy_datetimestruct *dts) + +cdef npy_datetime get_datetime64_value(object obj) nogil +cdef npy_timedelta get_timedelta64_value(object obj) nogil +cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil + +cdef int _string_to_dts(str val, npy_datetimestruct* dts, + int* out_local, int* out_tzoffset, + bint want_exc) except? -1 diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx new file mode 100644 index 00000000..b59a1101 --- /dev/null +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -0,0 +1,179 @@ +from cpython.object cimport Py_EQ, Py_NE, Py_GE, Py_GT, Py_LT, Py_LE + +from cpython.datetime cimport (datetime, date, + PyDateTime_IMPORT, + PyDateTime_GET_YEAR, PyDateTime_GET_MONTH, + PyDateTime_GET_DAY, PyDateTime_DATE_GET_HOUR, + PyDateTime_DATE_GET_MINUTE, + PyDateTime_DATE_GET_SECOND, + PyDateTime_DATE_GET_MICROSECOND) +PyDateTime_IMPORT + +from numpy cimport int64_t +from pandas._libs.tslibs.util cimport get_c_string_buf_and_size + +cdef extern from "src/datetime/np_datetime.h": + int cmp_npy_datetimestruct(npy_datetimestruct *a, + npy_datetimestruct *b) + + npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, + npy_datetimestruct *d) nogil + + void pandas_datetime_to_datetimestruct(npy_datetime val, + NPY_DATETIMEUNIT fr, + npy_datetimestruct *result) nogil + + void pandas_timedelta_to_timedeltastruct(npy_timedelta val, + NPY_DATETIMEUNIT fr, + pandas_timedeltastruct *result + ) nogil + + npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS + +cdef extern from "src/datetime/np_datetime_strings.h": + int parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + int *out_local, int *out_tzoffset) + + +# ---------------------------------------------------------------------- +# numpy object inspection + +cdef inline npy_datetime get_datetime64_value(object obj) nogil: + """ + returns the int64 value underlying scalar numpy datetime64 object + + Note that to interpret this as a datetime, the corresponding unit is + also needed. That can be found using `get_datetime64_unit`. + """ + return (obj).obval + + +cdef inline npy_timedelta get_timedelta64_value(object obj) nogil: + """ + returns the int64 value underlying scalar numpy timedelta64 object + """ + return (obj).obval + + +cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: + """ + returns the unit part of the dtype for a numpy datetime64 object. + """ + return (obj).obmeta.base + +# ---------------------------------------------------------------------- +# Comparison + +cdef int reverse_ops[6] + +reverse_ops[Py_LT] = Py_GT +reverse_ops[Py_LE] = Py_GE +reverse_ops[Py_EQ] = Py_EQ +reverse_ops[Py_NE] = Py_NE +reverse_ops[Py_GT] = Py_LT +reverse_ops[Py_GE] = Py_LE + + +cdef inline bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: + """ + cmp_scalar is a more performant version of PyObject_RichCompare + typed for int64_t arguments. + """ + if op == Py_EQ: + return lhs == rhs + elif op == Py_NE: + return lhs != rhs + elif op == Py_LT: + return lhs < rhs + elif op == Py_LE: + return lhs <= rhs + elif op == Py_GT: + return lhs > rhs + elif op == Py_GE: + return lhs >= rhs + + +class OutOfBoundsDatetime(ValueError): + pass + + +cdef inline check_dts_bounds(npy_datetimestruct *dts): + """Raises OutOfBoundsDatetime if the given date is outside the range that + can be represented by nanosecond-resolution 64-bit integers.""" + cdef: + bint error = False + + if (dts.year <= 1677 and + cmp_npy_datetimestruct(dts, &_NS_MIN_DTS) == -1): + error = True + elif (dts.year >= 2262 and + cmp_npy_datetimestruct(dts, &_NS_MAX_DTS) == 1): + error = True + + if error: + fmt = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} ' + f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}') + raise OutOfBoundsDatetime(f'Out of bounds nanosecond timestamp: {fmt}') + + +# ---------------------------------------------------------------------- +# Conversion + +cdef inline int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil: + """Convenience function to call npy_datetimestruct_to_datetime + with the by-far-most-common frequency NPY_FR_ns""" + return npy_datetimestruct_to_datetime(NPY_FR_ns, dts) + + +cdef inline void dt64_to_dtstruct(int64_t dt64, + npy_datetimestruct* out) nogil: + """Convenience function to call pandas_datetime_to_datetimestruct + with the by-far-most-common frequency NPY_FR_ns""" + pandas_datetime_to_datetimestruct(dt64, NPY_FR_ns, out) + return + + +cdef inline void td64_to_tdstruct(int64_t td64, + pandas_timedeltastruct* out) nogil: + """Convenience function to call pandas_timedelta_to_timedeltastruct + with the by-far-most-common frequency NPY_FR_ns""" + pandas_timedelta_to_timedeltastruct(td64, NPY_FR_ns, out) + return + + +cdef inline int64_t pydatetime_to_dt64(datetime val, + npy_datetimestruct *dts): + """ + Note we are assuming that the datetime object is timezone-naive. + """ + dts.year = PyDateTime_GET_YEAR(val) + dts.month = PyDateTime_GET_MONTH(val) + dts.day = PyDateTime_GET_DAY(val) + dts.hour = PyDateTime_DATE_GET_HOUR(val) + dts.min = PyDateTime_DATE_GET_MINUTE(val) + dts.sec = PyDateTime_DATE_GET_SECOND(val) + dts.us = PyDateTime_DATE_GET_MICROSECOND(val) + dts.ps = dts.as = 0 + return dtstruct_to_dt64(dts) + + +cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts): + dts.year = PyDateTime_GET_YEAR(val) + dts.month = PyDateTime_GET_MONTH(val) + dts.day = PyDateTime_GET_DAY(val) + dts.hour = dts.min = dts.sec = dts.us = 0 + dts.ps = dts.as = 0 + return dtstruct_to_dt64(dts) + + +cdef inline int _string_to_dts(str val, npy_datetimestruct* dts, + int* out_local, int* out_tzoffset, + bint want_exc) except? -1: + cdef: + Py_ssize_t length + const char* buf + + buf = get_c_string_buf_and_size(val, &length) + return parse_iso_8601_datetime(buf, length, want_exc, + dts, out_local, out_tzoffset) diff --git a/pandas/_libs/tslibs/offsets.pxd b/pandas/_libs/tslibs/offsets.pxd new file mode 100644 index 00000000..2829a27b --- /dev/null +++ b/pandas/_libs/tslibs/offsets.pxd @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- + +cdef to_offset(object obj) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx new file mode 100644 index 00000000..f24dce28 --- /dev/null +++ b/pandas/_libs/tslibs/offsets.pyx @@ -0,0 +1,1133 @@ +import cython + +import time +from typing import Any +from cpython.datetime cimport (PyDateTime_IMPORT, + PyDateTime_Check, + PyDelta_Check, + datetime, timedelta, + time as dt_time) +PyDateTime_IMPORT + +from dateutil.relativedelta import relativedelta + +import numpy as np +cimport numpy as cnp +from numpy cimport int64_t +cnp.import_array() + + +from pandas._libs.tslibs cimport util +from pandas._libs.tslibs.util cimport is_integer_object + +from pandas._libs.tslibs.ccalendar import MONTHS, DAYS +from pandas._libs.tslibs.ccalendar cimport get_days_in_month, dayofweek +from pandas._libs.tslibs.conversion cimport pydt_to_i8, localize_pydatetime +from pandas._libs.tslibs.nattype cimport NPY_NAT +from pandas._libs.tslibs.np_datetime cimport ( + npy_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct) +from pandas._libs.tslibs.timezones import UTC +from pandas._libs.tslibs.tzconversion cimport tz_convert_single + + +# --------------------------------------------------------------------- +# Constants + + +_offset_to_period_map = { + 'WEEKDAY': 'D', + 'EOM': 'M', + 'BM': 'M', + 'BQS': 'Q', + 'QS': 'Q', + 'BQ': 'Q', + 'BA': 'A', + 'AS': 'A', + 'BAS': 'A', + 'MS': 'M', + 'D': 'D', + 'C': 'C', + 'B': 'B', + 'T': 'T', + 'S': 'S', + 'L': 'L', + 'U': 'U', + 'N': 'N', + 'H': 'H', + 'Q': 'Q', + 'A': 'A', + 'W': 'W', + 'M': 'M', + 'Y': 'A', + 'BY': 'A', + 'YS': 'A', + 'BYS': 'A'} + +need_suffix = ['QS', 'BQ', 'BQS', 'YS', 'AS', 'BY', 'BA', 'BYS', 'BAS'] + +for __prefix in need_suffix: + for _m in MONTHS: + key = f'{__prefix}-{_m}' + _offset_to_period_map[key] = _offset_to_period_map[__prefix] + +for __prefix in ['A', 'Q']: + for _m in MONTHS: + _alias = f'{__prefix}-{_m}' + _offset_to_period_map[_alias] = _alias + +for _d in DAYS: + _offset_to_period_map[f'W-{_d}'] = f'W-{_d}' + + +# --------------------------------------------------------------------- +# Misc Helpers + +cdef to_offset(object obj): + """ + Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime + imports + """ + if isinstance(obj, _BaseOffset): + return obj + from pandas.tseries.frequencies import to_offset + return to_offset(obj) + + +def as_datetime(obj): + f = getattr(obj, 'to_pydatetime', None) + if f is not None: + obj = f() + return obj + + +cpdef bint _is_normalized(dt): + if (dt.hour != 0 or dt.minute != 0 or dt.second != 0 or + dt.microsecond != 0 or getattr(dt, 'nanosecond', 0) != 0): + return False + return True + + +def apply_index_wraps(func): + # Note: normally we would use `@functools.wraps(func)`, but this does + # not play nicely with cython class methods + def wrapper(self, other): + result = func(self, other) + if self.normalize: + result = result.to_period('D').to_timestamp() + return result + + # do @functools.wraps(func) manually since it doesn't work on cdef funcs + wrapper.__name__ = func.__name__ + wrapper.__doc__ = func.__doc__ + try: + wrapper.__module__ = func.__module__ + except AttributeError: + # AttributeError: 'method_descriptor' object has no + # attribute '__module__' + pass + return wrapper + + +cdef _wrap_timedelta_result(result): + """ + Tick operations dispatch to their Timedelta counterparts. Wrap the result + of these operations in a Tick if possible. + + Parameters + ---------- + result : object + + Returns + ------- + object + """ + if PyDelta_Check(result): + # convert Timedelta back to a Tick + from pandas.tseries.offsets import _delta_to_tick + return _delta_to_tick(result) + + return result + +# --------------------------------------------------------------------- +# Business Helpers + +cpdef int get_lastbday(int year, int month) nogil: + """ + Find the last day of the month that is a business day. + + Parameters + ---------- + year : int + month : int + + Returns + ------- + last_bday : int + """ + cdef: + int wkday, days_in_month + + wkday = dayofweek(year, month, 1) + days_in_month = get_days_in_month(year, month) + return days_in_month - max(((wkday + days_in_month - 1) % 7) - 4, 0) + + +cpdef int get_firstbday(int year, int month) nogil: + """ + Find the first day of the month that is a business day. + + Parameters + ---------- + year : int + month : int + + Returns + ------- + first_bday : int + """ + cdef: + int first, wkday + + wkday = dayofweek(year, month, 1) + first = 1 + if wkday == 5: # on Saturday + first = 3 + elif wkday == 6: # on Sunday + first = 2 + return first + + +def _get_calendar(weekmask, holidays, calendar): + """Generate busdaycalendar""" + if isinstance(calendar, np.busdaycalendar): + if not holidays: + holidays = tuple(calendar.holidays) + elif not isinstance(holidays, tuple): + holidays = tuple(holidays) + else: + # trust that calendar.holidays and holidays are + # consistent + pass + return calendar, holidays + + if holidays is None: + holidays = [] + try: + holidays = holidays + calendar.holidays().tolist() + except AttributeError: + pass + holidays = [_to_dt64(dt, dtype='datetime64[D]') for dt in holidays] + holidays = tuple(sorted(holidays)) + + kwargs = {'weekmask': weekmask} + if holidays: + kwargs['holidays'] = holidays + + busdaycalendar = np.busdaycalendar(**kwargs) + return busdaycalendar, holidays + + +def _to_dt64(dt, dtype='datetime64'): + # Currently + # > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]') + # numpy.datetime64('2013-05-01T02:00:00.000000+0200') + # Thus astype is needed to cast datetime to datetime64[D] + if getattr(dt, 'tzinfo', None) is not None: + i8 = pydt_to_i8(dt) + dt = tz_convert_single(i8, UTC, dt.tzinfo) + dt = np.int64(dt).astype('datetime64[ns]') + else: + dt = np.datetime64(dt) + if dt.dtype.name != dtype: + dt = dt.astype(dtype) + return dt + + +# --------------------------------------------------------------------- +# Validation + + +def _validate_business_time(t_input): + if isinstance(t_input, str): + try: + t = time.strptime(t_input, '%H:%M') + return dt_time(hour=t.tm_hour, minute=t.tm_min) + except ValueError: + raise ValueError("time data must match '%H:%M' format") + elif isinstance(t_input, dt_time): + if t_input.second != 0 or t_input.microsecond != 0: + raise ValueError( + "time data must be specified only with hour and minute") + return t_input + else: + raise ValueError("time data must be string or datetime.time") + + +# --------------------------------------------------------------------- +# Constructor Helpers + +relativedelta_kwds = {'years', 'months', 'weeks', 'days', 'year', 'month', + 'day', 'weekday', 'hour', 'minute', 'second', + 'microsecond', 'nanosecond', 'nanoseconds', 'hours', + 'minutes', 'seconds', 'microseconds'} + + +def _determine_offset(kwds): + # timedelta is used for sub-daily plural offsets and all singular + # offsets relativedelta is used for plural offsets of daily length or + # more nanosecond(s) are handled by apply_wraps + kwds_no_nanos = dict( + (k, v) for k, v in kwds.items() + if k not in ('nanosecond', 'nanoseconds') + ) + # TODO: Are nanosecond and nanoseconds allowed somewhere? + + _kwds_use_relativedelta = ('years', 'months', 'weeks', 'days', + 'year', 'month', 'week', 'day', 'weekday', + 'hour', 'minute', 'second', 'microsecond') + + use_relativedelta = False + if len(kwds_no_nanos) > 0: + if any(k in _kwds_use_relativedelta for k in kwds_no_nanos): + offset = relativedelta(**kwds_no_nanos) + use_relativedelta = True + else: + # sub-daily offset - use timedelta (tz-aware) + offset = timedelta(**kwds_no_nanos) + else: + offset = timedelta(1) + return offset, use_relativedelta + + +# --------------------------------------------------------------------- +# Mixins & Singletons + + +class ApplyTypeError(TypeError): + # sentinel class for catching the apply error to return NotImplemented + pass + + +# --------------------------------------------------------------------- +# Base Classes + +class _BaseOffset: + """ + Base class for DateOffset methods that are not overridden by subclasses + and will (after pickle errors are resolved) go into a cdef class. + """ + _typ = "dateoffset" + _day_opt = None + _attributes = frozenset(['n', 'normalize']) + + def __init__(self, n=1, normalize=False): + n = self._validate_n(n) + object.__setattr__(self, "n", n) + object.__setattr__(self, "normalize", normalize) + object.__setattr__(self, "_cache", {}) + + def __setattr__(self, name, value): + raise AttributeError("DateOffset objects are immutable.") + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str): + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False + try: + return self._params == other._params + except AttributeError: + # other is not a DateOffset object + return False + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._params) + + @property + def _params(self): + """ + Returns a tuple containing all of the attributes needed to evaluate + equality between two DateOffset objects. + """ + # NB: non-cython subclasses override property with cache_readonly + all_paras = self.__dict__.copy() + if 'holidays' in all_paras and not all_paras['holidays']: + all_paras.pop('holidays') + exclude = ['kwds', 'name', 'calendar'] + attrs = [(k, v) for k, v in all_paras.items() + if (k not in exclude) and (k[0] != '_')] + attrs = sorted(set(attrs)) + params = tuple([str(type(self))] + attrs) + return params + + @property + def kwds(self): + # for backwards-compatibility + kwds = {name: getattr(self, name, None) for name in self._attributes + if name not in ['n', 'normalize']} + return {name: kwds[name] for name in kwds if kwds[name] is not None} + + @property + def base(self): + """ + Returns a copy of the calling offset object with n=1 and all other + attributes equal. + """ + return type(self)(n=1, normalize=self.normalize, **self.kwds) + + def __add__(self, other): + if getattr(other, "_typ", None) in ["datetimeindex", "periodindex", + "datetimearray", "periodarray", + "series", "period", "dataframe"]: + # defer to the other class's implementation + return other + self + try: + return self.apply(other) + except ApplyTypeError: + return NotImplemented + + def __sub__(self, other): + if PyDateTime_Check(other): + raise TypeError('Cannot subtract datetime from offset.') + elif type(other) == type(self): + return type(self)(self.n - other.n, normalize=self.normalize, + **self.kwds) + else: # pragma: no cover + return NotImplemented + + def __call__(self, other): + return self.apply(other) + + def __mul__(self, other): + if hasattr(other, "_typ"): + return NotImplemented + if util.is_array(other): + return np.array([self * x for x in other]) + return type(self)(n=other * self.n, normalize=self.normalize, + **self.kwds) + + def __neg__(self): + # Note: we are deferring directly to __mul__ instead of __rmul__, as + # that allows us to use methods that can go in a `cdef class` + return self * -1 + + def copy(self): + # Note: we are deferring directly to __mul__ instead of __rmul__, as + # that allows us to use methods that can go in a `cdef class` + return self * 1 + + def __repr__(self) -> str: + className = getattr(self, '_outputName', type(self).__name__) + + if abs(self.n) != 1: + plural = 's' + else: + plural = '' + + n_str = "" + if self.n != 1: + n_str = f"{self.n} * " + + out = f'<{n_str}{className}{plural}{self._repr_attrs()}>' + return out + + def _get_offset_day(self, datetime other): + # subclass must implement `_day_opt`; calling from the base class + # will raise NotImplementedError. + return get_day_of_month(other, self._day_opt) + + def _validate_n(self, n): + """ + Require that `n` be a nonzero integer. + + Parameters + ---------- + n : int + + Returns + ------- + nint : int + + Raises + ------ + TypeError if `int(n)` raises + ValueError if n != int(n) + """ + if util.is_timedelta64_object(n): + raise TypeError(f'`n` argument must be an integer, got {type(n)}') + try: + nint = int(n) + except (ValueError, TypeError): + raise TypeError(f'`n` argument must be an integer, got {type(n)}') + if n != nint: + raise ValueError(f'`n` argument must be an integer, got {n}') + return nint + + def __setstate__(self, state): + """Reconstruct an instance from a pickled state""" + if 'offset' in state: + # Older (<0.22.0) versions have offset attribute instead of _offset + if '_offset' in state: # pragma: no cover + raise AssertionError('Unexpected key `_offset`') + state['_offset'] = state.pop('offset') + state['kwds']['offset'] = state['_offset'] + + if '_offset' in state and not isinstance(state['_offset'], timedelta): + # relativedelta, we need to populate using its kwds + offset = state['_offset'] + odict = offset.__dict__ + kwds = {key: odict[key] for key in odict if odict[key]} + state.update(kwds) + + if '_cache' not in state: + state['_cache'] = {} + + self.__dict__.update(state) + + if 'weekmask' in state and 'holidays' in state: + calendar, holidays = _get_calendar(weekmask=self.weekmask, + holidays=self.holidays, + calendar=None) + object.__setattr__(self, "calendar", calendar) + object.__setattr__(self, "holidays", holidays) + + def __getstate__(self): + """Return a pickleable state""" + state = self.__dict__.copy() + + # we don't want to actually pickle the calendar object + # as its a np.busyday; we recreate on deserilization + if 'calendar' in state: + del state['calendar'] + try: + state['kwds'].pop('calendar') + except KeyError: + pass + + return state + + +class BaseOffset(_BaseOffset): + # Here we add __rfoo__ methods that don't play well with cdef classes + def __rmul__(self, other): + return self.__mul__(other) + + def __radd__(self, other): + return self.__add__(other) + + def __rsub__(self, other): + if getattr(other, '_typ', None) in ['datetimeindex', 'series']: + # i.e. isinstance(other, (ABCDatetimeIndex, ABCSeries)) + return other - self + return -self + other + + +class _Tick: + """ + dummy class to mix into tseries.offsets.Tick so that in tslibs.period we + can do isinstance checks on _Tick and avoid importing tseries.offsets + """ + + # ensure that reversed-ops with numpy scalars return NotImplemented + __array_priority__ = 1000 + + def __truediv__(self, other): + result = self.delta.__truediv__(other) + return _wrap_timedelta_result(result) + + def __rtruediv__(self, other): + result = self.delta.__rtruediv__(other) + return _wrap_timedelta_result(result) + + +# ---------------------------------------------------------------------- +# RelativeDelta Arithmetic + +def shift_day(other: datetime, days: int) -> datetime: + """ + Increment the datetime `other` by the given number of days, retaining + the time-portion of the datetime. For tz-naive datetimes this is + equivalent to adding a timedelta. For tz-aware datetimes it is similar to + dateutil's relativedelta.__add__, but handles pytz tzinfo objects. + + Parameters + ---------- + other : datetime or Timestamp + days : int + + Returns + ------- + shifted: datetime or Timestamp + """ + if other.tzinfo is None: + return other + timedelta(days=days) + + tz = other.tzinfo + naive = other.replace(tzinfo=None) + shifted = naive + timedelta(days=days) + return localize_pydatetime(shifted, tz) + + +cdef inline int year_add_months(npy_datetimestruct dts, int months) nogil: + """new year number after shifting npy_datetimestruct number of months""" + return dts.year + (dts.month + months - 1) // 12 + + +cdef inline int month_add_months(npy_datetimestruct dts, int months) nogil: + """ + New month number after shifting npy_datetimestruct + number of months. + """ + cdef: + int new_month = (dts.month + months) % 12 + return 12 if new_month == 0 else new_month + + +@cython.wraparound(False) +@cython.boundscheck(False) +def shift_quarters(int64_t[:] dtindex, int quarters, + int q1start_month, object day, int modby=3): + """ + Given an int64 array representing nanosecond timestamps, shift all elements + by the specified number of quarters using DateOffset semantics. + + Parameters + ---------- + dtindex : int64_t[:] timestamps for input dates + quarters : int number of quarters to shift + q1start_month : int month in which Q1 begins by convention + day : {'start', 'end', 'business_start', 'business_end'} + modby : int (3 for quarters, 12 for years) + + Returns + ------- + out : ndarray[int64_t] + """ + cdef: + Py_ssize_t i + npy_datetimestruct dts + int count = len(dtindex) + int months_to_roll, months_since, n, compare_day + bint roll_check + int64_t[:] out = np.empty(count, dtype='int64') + + if day == 'start': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + n = quarters + + months_since = (dts.month - q1start_month) % modby + + # offset semantics - if on the anchor point and going backwards + # shift to next + if n <= 0 and (months_since != 0 or + (months_since == 0 and dts.day > 1)): + n += 1 + + dts.year = year_add_months(dts, modby * n - months_since) + dts.month = month_add_months(dts, modby * n - months_since) + dts.day = 1 + + out[i] = dtstruct_to_dt64(&dts) + + elif day == 'end': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + n = quarters + + months_since = (dts.month - q1start_month) % modby + + if n <= 0 and months_since != 0: + # The general case of this condition would be + # `months_since != 0 or (months_since == 0 and + # dts.day > get_days_in_month(dts.year, dts.month))` + # but the get_days_in_month inequality would never hold. + n += 1 + elif n > 0 and (months_since == 0 and + dts.day < get_days_in_month(dts.year, + dts.month)): + n -= 1 + + dts.year = year_add_months(dts, modby * n - months_since) + dts.month = month_add_months(dts, modby * n - months_since) + dts.day = get_days_in_month(dts.year, dts.month) + + out[i] = dtstruct_to_dt64(&dts) + + elif day == 'business_start': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + n = quarters + + months_since = (dts.month - q1start_month) % modby + # compare_day is only relevant for comparison in the case + # where months_since == 0. + compare_day = get_firstbday(dts.year, dts.month) + + if n <= 0 and (months_since != 0 or + (months_since == 0 and dts.day > compare_day)): + # make sure to roll forward, so negate + n += 1 + elif n > 0 and (months_since == 0 and dts.day < compare_day): + # pretend to roll back if on same month but + # before compare_day + n -= 1 + + dts.year = year_add_months(dts, modby * n - months_since) + dts.month = month_add_months(dts, modby * n - months_since) + + dts.day = get_firstbday(dts.year, dts.month) + + out[i] = dtstruct_to_dt64(&dts) + + elif day == 'business_end': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + n = quarters + + months_since = (dts.month - q1start_month) % modby + # compare_day is only relevant for comparison in the case + # where months_since == 0. + compare_day = get_lastbday(dts.year, dts.month) + + if n <= 0 and (months_since != 0 or + (months_since == 0 and dts.day > compare_day)): + # make sure to roll forward, so negate + n += 1 + elif n > 0 and (months_since == 0 and dts.day < compare_day): + # pretend to roll back if on same month but + # before compare_day + n -= 1 + + dts.year = year_add_months(dts, modby * n - months_since) + dts.month = month_add_months(dts, modby * n - months_since) + + dts.day = get_lastbday(dts.year, dts.month) + + out[i] = dtstruct_to_dt64(&dts) + + else: + raise ValueError("day must be None, 'start', 'end', " + "'business_start', or 'business_end'") + + return np.asarray(out) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def shift_months(int64_t[:] dtindex, int months, object day=None): + """ + Given an int64-based datetime index, shift all elements + specified number of months using DateOffset semantics + + day: {None, 'start', 'end'} + * None: day of month + * 'start' 1st day of month + * 'end' last day of month + """ + cdef: + Py_ssize_t i + npy_datetimestruct dts + int count = len(dtindex) + int months_to_roll + bint roll_check + int64_t[:] out = np.empty(count, dtype='int64') + + if day is None: + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + dts.year = year_add_months(dts, months) + dts.month = month_add_months(dts, months) + + dts.day = min(dts.day, get_days_in_month(dts.year, dts.month)) + out[i] = dtstruct_to_dt64(&dts) + elif day == 'start': + roll_check = False + if months <= 0: + months += 1 + roll_check = True + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + months_to_roll = months + + # offset semantics - if on the anchor point and going backwards + # shift to next + if roll_check and dts.day == 1: + months_to_roll -= 1 + + dts.year = year_add_months(dts, months_to_roll) + dts.month = month_add_months(dts, months_to_roll) + dts.day = 1 + + out[i] = dtstruct_to_dt64(&dts) + elif day == 'end': + roll_check = False + if months > 0: + months -= 1 + roll_check = True + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + months_to_roll = months + + # similar semantics - when adding shift forward by one + # month if already at an end of month + if roll_check and dts.day == get_days_in_month(dts.year, + dts.month): + months_to_roll += 1 + + dts.year = year_add_months(dts, months_to_roll) + dts.month = month_add_months(dts, months_to_roll) + + dts.day = get_days_in_month(dts.year, dts.month) + out[i] = dtstruct_to_dt64(&dts) + + elif day == 'business_start': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + months_to_roll = months + compare_day = get_firstbday(dts.year, dts.month) + + months_to_roll = roll_convention(dts.day, months_to_roll, + compare_day) + + dts.year = year_add_months(dts, months_to_roll) + dts.month = month_add_months(dts, months_to_roll) + + dts.day = get_firstbday(dts.year, dts.month) + out[i] = dtstruct_to_dt64(&dts) + + elif day == 'business_end': + with nogil: + for i in range(count): + if dtindex[i] == NPY_NAT: + out[i] = NPY_NAT + continue + + dt64_to_dtstruct(dtindex[i], &dts) + months_to_roll = months + compare_day = get_lastbday(dts.year, dts.month) + + months_to_roll = roll_convention(dts.day, months_to_roll, + compare_day) + + dts.year = year_add_months(dts, months_to_roll) + dts.month = month_add_months(dts, months_to_roll) + + dts.day = get_lastbday(dts.year, dts.month) + out[i] = dtstruct_to_dt64(&dts) + + else: + raise ValueError("day must be None, 'start', 'end', " + "'business_start', or 'business_end'") + + return np.asarray(out) + + +def shift_month(stamp: datetime, months: int, + day_opt: object=None) -> datetime: + """ + Given a datetime (or Timestamp) `stamp`, an integer `months` and an + option `day_opt`, return a new datetimelike that many months later, + with day determined by `day_opt` using relativedelta semantics. + + Scalar analogue of shift_months + + Parameters + ---------- + stamp : datetime or Timestamp + months : int + day_opt : None, 'start', 'end', 'business_start', 'business_end', or int + None: returned datetimelike has the same day as the input, or the + last day of the month if the new month is too short + 'start': returned datetimelike has day=1 + 'end': returned datetimelike has day on the last day of the month + 'business_start': returned datetimelike has day on the first + business day of the month + 'business_end': returned datetimelike has day on the last + business day of the month + int: returned datetimelike has day equal to day_opt + + Returns + ------- + shifted : datetime or Timestamp (same as input `stamp`) + """ + cdef: + int year, month, day + int days_in_month, dy + + dy = (stamp.month + months) // 12 + month = (stamp.month + months) % 12 + + if month == 0: + month = 12 + dy -= 1 + year = stamp.year + dy + + if day_opt is None: + days_in_month = get_days_in_month(year, month) + day = min(stamp.day, days_in_month) + elif day_opt == 'start': + day = 1 + elif day_opt == 'end': + day = get_days_in_month(year, month) + elif day_opt == 'business_start': + # first business day of month + day = get_firstbday(year, month) + elif day_opt == 'business_end': + # last business day of month + day = get_lastbday(year, month) + elif is_integer_object(day_opt): + days_in_month = get_days_in_month(year, month) + day = min(day_opt, days_in_month) + else: + raise ValueError(day_opt) + return stamp.replace(year=year, month=month, day=day) + + +cpdef int get_day_of_month(datetime other, day_opt) except? -1: + """ + Find the day in `other`'s month that satisfies a DateOffset's is_on_offset + policy, as described by the `day_opt` argument. + + Parameters + ---------- + other : datetime or Timestamp + day_opt : 'start', 'end', 'business_start', 'business_end', or int + 'start': returns 1 + 'end': returns last day of the month + 'business_start': returns the first business day of the month + 'business_end': returns the last business day of the month + int: returns the day in the month indicated by `other`, or the last of + day the month if the value exceeds in that month's number of days. + + Returns + ------- + day_of_month : int + + Examples + ------- + >>> other = datetime(2017, 11, 14) + >>> get_day_of_month(other, 'start') + 1 + >>> get_day_of_month(other, 'end') + 30 + + """ + cdef: + int days_in_month + + if day_opt == 'start': + return 1 + elif day_opt == 'end': + days_in_month = get_days_in_month(other.year, other.month) + return days_in_month + elif day_opt == 'business_start': + # first business day of month + return get_firstbday(other.year, other.month) + elif day_opt == 'business_end': + # last business day of month + return get_lastbday(other.year, other.month) + elif is_integer_object(day_opt): + days_in_month = get_days_in_month(other.year, other.month) + return min(day_opt, days_in_month) + elif day_opt is None: + # Note: unlike `shift_month`, get_day_of_month does not + # allow day_opt = None + raise NotImplementedError + else: + raise ValueError(day_opt) + + +cpdef int roll_convention(int other, int n, int compare) nogil: + """ + Possibly increment or decrement the number of periods to shift + based on rollforward/rollbackward conventions. + + Parameters + ---------- + other : int, generally the day component of a datetime + n : number of periods to increment, before adjusting for rolling + compare : int, generally the day component of a datetime, in the same + month as the datetime form which `other` was taken. + + Returns + ------- + n : int number of periods to increment + """ + if n > 0 and other < compare: + n -= 1 + elif n <= 0 and other > compare: + # as if rolled forward already + n += 1 + return n + + +def roll_qtrday(other: datetime, n: int, month: int, + day_opt: object, modby: int=3) -> int: + """ + Possibly increment or decrement the number of periods to shift + based on rollforward/rollbackward conventions. + + Parameters + ---------- + other : datetime or Timestamp + n : number of periods to increment, before adjusting for rolling + month : int reference month giving the first month of the year + day_opt : 'start', 'end', 'business_start', 'business_end', or int + The convention to use in finding the day in a given month against + which to compare for rollforward/rollbackward decisions. + modby : int 3 for quarters, 12 for years + + Returns + ------- + n : int number of periods to increment + + See Also + -------- + get_day_of_month : Find the day in a month provided an offset. + """ + cdef: + int months_since + # TODO: Merge this with roll_yearday by setting modby=12 there? + # code de-duplication versus perf hit? + # TODO: with small adjustments this could be used in shift_quarters + months_since = other.month % modby - month % modby + + if n > 0: + if months_since < 0 or (months_since == 0 and + other.day < get_day_of_month(other, + day_opt)): + # pretend to roll back if on same month but + # before compare_day + n -= 1 + else: + if months_since > 0 or (months_since == 0 and + other.day > get_day_of_month(other, + day_opt)): + # make sure to roll forward, so negate + n += 1 + return n + + +def roll_yearday(other: datetime, n: int, month: int, day_opt: object) -> int: + """ + Possibly increment or decrement the number of periods to shift + based on rollforward/rollbackward conventions. + + Parameters + ---------- + other : datetime or Timestamp + n : number of periods to increment, before adjusting for rolling + month : reference month giving the first month of the year + day_opt : 'start', 'end', 'business_start', 'business_end', or int + The day of the month to compare against that of `other` when + incrementing or decrementing the number of periods: + + 'start': 1 + 'end': last day of the month + 'business_start': first business day of the month + 'business_end': last business day of the month + int: day in the month indicated by `other`, or the last of day + the month if the value exceeds in that month's number of days. + + Returns + ------- + n : int number of periods to increment + + Notes + ----- + * Mirrors `roll_check` in shift_months + + Examples + ------- + >>> month = 3 + >>> day_opt = 'start' # `other` will be compared to March 1 + >>> other = datetime(2017, 2, 10) # before March 1 + >>> roll_yearday(other, 2, month, day_opt) + 1 + >>> roll_yearday(other, -7, month, day_opt) + -7 + >>> + >>> other = Timestamp('2014-03-15', tz='US/Eastern') # after March 1 + >>> roll_yearday(other, 2, month, day_opt) + 2 + >>> roll_yearday(other, -7, month, day_opt) + -6 + + >>> month = 6 + >>> day_opt = 'end' # `other` will be compared to June 30 + >>> other = datetime(1999, 6, 29) # before June 30 + >>> roll_yearday(other, 5, month, day_opt) + 4 + >>> roll_yearday(other, -7, month, day_opt) + -7 + >>> + >>> other = Timestamp(2072, 8, 24, 6, 17, 18) # after June 30 + >>> roll_yearday(other, 5, month, day_opt) + 5 + >>> roll_yearday(other, -7, month, day_opt) + -6 + + """ + # Note: The other.day < ... condition will never hold when day_opt=='start' + # and the other.day > ... condition will never hold when day_opt=='end'. + # At some point these extra checks may need to be optimized away. + # But that point isn't today. + if n > 0: + if other.month < month or (other.month == month and + other.day < get_day_of_month(other, + day_opt)): + n -= 1 + else: + if other.month > month or (other.month == month and + other.day > get_day_of_month(other, + day_opt)): + n += 1 + return n diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx new file mode 100644 index 00000000..3705b0a4 --- /dev/null +++ b/pandas/_libs/tslibs/parsing.pyx @@ -0,0 +1,986 @@ +""" +Parsing functions for datetime and datetime-like strings. +""" +import re +import time + +from libc.string cimport strchr + +import cython +from cython import Py_ssize_t + +from cpython.object cimport PyObject_Str + +from cpython.datetime cimport datetime, datetime_new, import_datetime, tzinfo +from cpython.version cimport PY_VERSION_HEX +import_datetime() + +import numpy as np +cimport numpy as cnp +from numpy cimport (PyArray_GETITEM, PyArray_ITER_DATA, PyArray_ITER_NEXT, + PyArray_IterNew, flatiter, float64_t) +cnp.import_array() + +# dateutil compat +from dateutil.tz import (tzoffset, + tzlocal as _dateutil_tzlocal, + tzutc as _dateutil_tzutc, + tzstr as _dateutil_tzstr) +from dateutil.relativedelta import relativedelta +from dateutil.parser import DEFAULTPARSER +from dateutil.parser import parse as du_parse + +from pandas._config import get_option + +from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS +from pandas._libs.tslibs.nattype import nat_strings, NaT +from pandas._libs.tslibs.util cimport is_array, get_c_string_buf_and_size +from pandas._libs.tslibs.frequencies cimport get_rule_month + +cdef extern from "../src/headers/portable.h": + int getdigit_ascii(char c, int default) nogil + +cdef extern from "../src/parser/tokenizer.h": + double xstrtod(const char *p, char **q, char decimal, char sci, char tsep, + int skip_trailing, int *error, int *maybe_int) + + +# ---------------------------------------------------------------------- +# Constants + + +class DateParseError(ValueError): + pass + + +_DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, + second=0, microsecond=0) + +cdef: + set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'} + +# ---------------------------------------------------------------------- +cdef: + const char* delimiters = " /-." + int MAX_DAYS_IN_MONTH = 31, MAX_MONTH = 12 + + +cdef inline bint _is_not_delimiter(const char ch): + return strchr(delimiters, ch) == NULL + + +cdef inline int _parse_2digit(const char* s): + cdef int result = 0 + result += getdigit_ascii(s[0], -10) * 10 + result += getdigit_ascii(s[1], -100) * 1 + return result + + +cdef inline int _parse_4digit(const char* s): + cdef int result = 0 + result += getdigit_ascii(s[0], -10) * 1000 + result += getdigit_ascii(s[1], -100) * 100 + result += getdigit_ascii(s[2], -1000) * 10 + result += getdigit_ascii(s[3], -10000) * 1 + return result + + +cdef inline object _parse_delimited_date(str date_string, bint dayfirst): + """ + Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY. + + At the beginning function tries to parse date in MM/DD/YYYY format, but + if month > 12 - in DD/MM/YYYY (`dayfirst == False`). + With `dayfirst == True` function makes an attempt to parse date in + DD/MM/YYYY, if an attempt is wrong - in DD/MM/YYYY + + For MM/DD/YYYY, DD/MM/YYYY: delimiter can be a space or one of /-. + For MM/YYYY: delimiter can be a space or one of /- + If `date_string` can't be converted to date, then function returns + None, None + + Parameters + ---------- + date_string : str + dayfirst : bool + + Returns: + -------- + datetime or None + str or None + Describing resolution of the parsed string. + """ + cdef: + const char* buf + Py_ssize_t length + int day = 1, month = 1, year + bint can_swap = 0 + + buf = get_c_string_buf_and_size(date_string, &length) + if length == 10: + # parsing MM?DD?YYYY and DD?MM?YYYY dates + if _is_not_delimiter(buf[2]) or _is_not_delimiter(buf[5]): + return None, None + month = _parse_2digit(buf) + day = _parse_2digit(buf + 3) + year = _parse_4digit(buf + 6) + reso = 'day' + can_swap = 1 + elif length == 7: + # parsing MM?YYYY dates + if buf[2] == b'.' or _is_not_delimiter(buf[2]): + # we cannot reliably tell whether e.g. 10.2010 is a float + # or a date, thus we refuse to parse it here + return None, None + month = _parse_2digit(buf) + year = _parse_4digit(buf + 3) + reso = 'month' + else: + return None, None + + if month < 0 or day < 0 or year < 1000: + # some part is not an integer, so + # date_string can't be converted to date, above format + return None, None + + if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ + and (month <= MAX_MONTH or day <= MAX_MONTH): + if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap: + day, month = month, day + if PY_VERSION_HEX >= 0x03060100: + # In Python <= 3.6.0 there is no range checking for invalid dates + # in C api, thus we call faster C version for 3.6.1 or newer + return datetime_new(year, month, day, 0, 0, 0, 0, None), reso + return datetime(year, month, day, 0, 0, 0, 0, None), reso + + raise DateParseError(f"Invalid date specified ({month}/{day})") + + +cdef inline bint does_string_look_like_time(str parse_string): + """ + Checks whether given string is a time: it has to start either from + H:MM or from HH:MM, and hour and minute values must be valid. + + Parameters + ---------- + parse_string : str + + Returns: + -------- + bool + Whether given string is potentially a time. + """ + cdef: + const char* buf + Py_ssize_t length + int hour = -1, minute = -1 + + buf = get_c_string_buf_and_size(parse_string, &length) + if length >= 4: + if buf[1] == b':': + # h:MM format + hour = getdigit_ascii(buf[0], -1) + minute = _parse_2digit(buf + 2) + elif buf[2] == b':': + # HH:MM format + hour = _parse_2digit(buf) + minute = _parse_2digit(buf + 3) + + return 0 <= hour <= 23 and 0 <= minute <= 59 + + +def parse_datetime_string(date_string: str, freq=None, dayfirst=False, + yearfirst=False, **kwargs): + """ + Parse datetime string, only returns datetime. + Also cares special handling matching time patterns. + + Returns + ------- + datetime + """ + + cdef: + object dt + + if not _does_string_look_like_datetime(date_string): + raise ValueError('Given date string not likely a datetime.') + + if does_string_look_like_time(date_string): + # use current datetime as default, not pass _DEFAULT_DATETIME + dt = du_parse(date_string, dayfirst=dayfirst, + yearfirst=yearfirst, **kwargs) + return dt + + dt, _ = _parse_delimited_date(date_string, dayfirst) + if dt is not None: + return dt + + try: + dt, _, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq) + return dt + except DateParseError: + raise + except ValueError: + pass + + try: + dt = du_parse(date_string, default=_DEFAULT_DATETIME, + dayfirst=dayfirst, yearfirst=yearfirst, **kwargs) + except TypeError: + # following may be raised from dateutil + # TypeError: 'NoneType' object is not iterable + raise ValueError('Given date string not likely a datetime.') + + return dt + + +def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None): + """ + Try hard to parse datetime string, leveraging dateutil plus some extra + goodies like quarter recognition. + + Parameters + ---------- + arg : str + freq : str or DateOffset, default None + Helps with interpreting time string if supplied + dayfirst : bool, default None + If None uses default from print_config + yearfirst : bool, default None + If None uses default from print_config + + Returns + ------- + datetime, datetime/dateutil.parser._result, str + """ + if not isinstance(arg, str): + raise TypeError("parse_time_string argument must be str") + + if getattr(freq, "_typ", None) == "dateoffset": + freq = freq.rule_code + + if dayfirst is None or yearfirst is None: + if dayfirst is None: + dayfirst = get_option("display.date_dayfirst") + if yearfirst is None: + yearfirst = get_option("display.date_yearfirst") + + res = parse_datetime_string_with_reso(arg, freq=freq, + dayfirst=dayfirst, + yearfirst=yearfirst) + return res + + +cdef parse_datetime_string_with_reso(str date_string, freq=None, dayfirst=False, + yearfirst=False): + """ + Parse datetime string and try to identify its resolution. + + Returns + ------- + datetime + datetime/dateutil.parser._result + str + Inferred resolution of the parsed string. + + Raises + ------ + ValueError : preliminary check suggests string is not datetime + DateParseError : error within dateutil + """ + cdef: + object parsed, reso + + if not _does_string_look_like_datetime(date_string): + raise ValueError('Given date string not likely a datetime.') + + parsed, reso = _parse_delimited_date(date_string, dayfirst) + if parsed is not None: + return parsed, parsed, reso + + try: + return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq) + except DateParseError: + raise + except ValueError: + pass + + try: + parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME, + dayfirst=dayfirst, yearfirst=yearfirst, + ignoretz=False, tzinfos=None) + except (ValueError, OverflowError) as err: + # TODO: allow raise of errors within instead + raise DateParseError(err) + if parsed is None: + raise DateParseError(f"Could not parse {date_string}") + return parsed, parsed, reso + + +cpdef bint _does_string_look_like_datetime(str py_string): + """ + Checks whether given string is a datetime: it has to start with '0' or + be greater than 1000. + + Parameters + ---------- + py_string: str + + Returns + ------- + bool + Whether given string is potentially a datetime. + """ + cdef: + const char *buf + char *endptr = NULL + Py_ssize_t length = -1 + double converted_date + char first + int error = 0 + + buf = get_c_string_buf_and_size(py_string, &length) + if length >= 1: + first = buf[0] + if first == b'0': + # Strings starting with 0 are more consistent with a + # date-like string than a number + return True + elif py_string in _not_datelike_strings: + return False + else: + # xstrtod with such paramaters copies behavior of python `float` + # cast; for example, " 35.e-1 " is valid string for this cast so, + # for correctly xstrtod call necessary to pass these params: + # b'.' - a dot is used as separator, b'e' - an exponential form of + # a float number can be used, b'\0' - not to use a thousand + # separator, 1 - skip extra spaces before and after, + converted_date = xstrtod(buf, &endptr, + b'.', b'e', b'\0', 1, &error, NULL) + # if there were no errors and the whole line was parsed, then ... + if error == 0 and endptr == buf + length: + return converted_date >= 1000 + + return True + + +cdef inline object _parse_dateabbr_string(object date_string, object default, + object freq): + cdef: + object ret + int year, quarter = -1, month, mnum, date_len + + # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1 + assert isinstance(date_string, str) + + if date_string in nat_strings: + return NaT, NaT, '' + + date_string = date_string.upper() + date_len = len(date_string) + + if date_len == 4: + # parse year only like 2000 + try: + ret = default.replace(year=int(date_string)) + return ret, ret, 'year' + except ValueError: + pass + + try: + if 4 <= date_len <= 7: + i = date_string.index('Q', 1, 6) + if i == 1: + quarter = int(date_string[0]) + if date_len == 4 or (date_len == 5 + and date_string[i + 1] == '-'): + # r'(\d)Q-?(\d\d)') + year = 2000 + int(date_string[-2:]) + elif date_len == 6 or (date_len == 7 + and date_string[i + 1] == '-'): + # r'(\d)Q-?(\d\d\d\d)') + year = int(date_string[-4:]) + else: + raise ValueError + elif i == 2 or i == 3: + # r'(\d\d)-?Q(\d)' + if date_len == 4 or (date_len == 5 + and date_string[i - 1] == '-'): + quarter = int(date_string[-1]) + year = 2000 + int(date_string[:2]) + else: + raise ValueError + elif i == 4 or i == 5: + if date_len == 6 or (date_len == 7 + and date_string[i - 1] == '-'): + # r'(\d\d\d\d)-?Q(\d)' + quarter = int(date_string[-1]) + year = int(date_string[:4]) + else: + raise ValueError + + if not (1 <= quarter <= 4): + raise DateParseError(f'Incorrect quarterly string is given, ' + f'quarter must be ' + f'between 1 and 4: {date_string}') + + if freq is not None: + # hack attack, #1228 + try: + mnum = MONTH_NUMBERS[get_rule_month(freq)] + 1 + except (KeyError, ValueError): + raise DateParseError(f'Unable to retrieve month ' + f'information from given ' + f'freq: {freq}') + + month = (mnum + (quarter - 1) * 3) % 12 + 1 + if month > mnum: + year -= 1 + else: + month = (quarter - 1) * 3 + 1 + + ret = default.replace(year=year, month=month) + return ret, ret, 'quarter' + + except DateParseError: + raise + except ValueError: + pass + + if date_len == 6 and (freq == 'M' or + getattr(freq, 'rule_code', None) == 'M'): + year = int(date_string[:4]) + month = int(date_string[4:6]) + try: + ret = default.replace(year=year, month=month) + return ret, ret, 'month' + except ValueError: + pass + + for pat in ['%Y-%m', '%b %Y', '%b-%Y']: + try: + ret = datetime.strptime(date_string, pat) + return ret, ret, 'month' + except ValueError: + pass + + raise ValueError(f'Unable to parse {date_string}') + + +cdef dateutil_parse(str timestr, object default, ignoretz=False, + tzinfos=None, dayfirst=None, yearfirst=None): + """ lifted from dateutil to get resolution""" + + cdef: + object res, attr, ret, tzdata + object reso = None + dict repl = {} + + res, _ = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst) + + if res is None: + raise ValueError(f"Unknown datetime string format, unable to parse: {timestr}") + + for attr in ["year", "month", "day", "hour", + "minute", "second", "microsecond"]: + value = getattr(res, attr) + if value is not None: + repl[attr] = value + reso = attr + + if reso is None: + raise ValueError(f"Unable to parse datetime string: {timestr}") + + if reso == 'microsecond': + if repl['microsecond'] == 0: + reso = 'second' + elif repl['microsecond'] % 1000 == 0: + reso = 'millisecond' + + ret = default.replace(**repl) + if res.weekday is not None and not res.day: + ret = ret + relativedelta.relativedelta(weekday=res.weekday) + if not ignoretz: + if callable(tzinfos) or tzinfos and res.tzname in tzinfos: + # Note: as of 1.0 this is not reached because + # we never pass tzinfos, see GH#22234 + if callable(tzinfos): + tzdata = tzinfos(res.tzname, res.tzoffset) + else: + tzdata = tzinfos.get(res.tzname) + if isinstance(tzdata, tzinfo): + new_tzinfo = tzdata + elif isinstance(tzdata, str): + new_tzinfo = _dateutil_tzstr(tzdata) + elif isinstance(tzdata, int): + new_tzinfo = tzoffset(res.tzname, tzdata) + else: + raise ValueError("offset must be tzinfo subclass, " + "tz string, or int offset") + ret = ret.replace(tzinfo=new_tzinfo) + elif res.tzname and res.tzname in time.tzname: + ret = ret.replace(tzinfo=_dateutil_tzlocal()) + elif res.tzoffset == 0: + ret = ret.replace(tzinfo=_dateutil_tzutc()) + elif res.tzoffset: + ret = ret.replace(tzinfo=tzoffset(res.tzname, res.tzoffset)) + return ret, reso + + +# ---------------------------------------------------------------------- +# Parsing for type-inference + + +def try_parse_dates(object[:] values, parser=None, + dayfirst=False, default=None): + cdef: + Py_ssize_t i, n + object[:] result + + n = len(values) + result = np.empty(n, dtype='O') + + if parser is None: + if default is None: # GH2618 + date = datetime.now() + default = datetime(date.year, date.month, 1) + + parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + + # EAFP here + try: + for i in range(n): + if values[i] == '': + result[i] = np.nan + else: + result[i] = parse_date(values[i]) + except Exception: + # Since parser is user-defined, we can't guess what it might raise + return values + else: + parse_date = parser + + for i in range(n): + if values[i] == '': + result[i] = np.nan + else: + result[i] = parse_date(values[i]) + + return result.base # .base to access underlying ndarray + + +def try_parse_date_and_time(object[:] dates, object[:] times, + date_parser=None, time_parser=None, + dayfirst=False, default=None): + cdef: + Py_ssize_t i, n + object[:] result + + n = len(dates) + # Cast to avoid build warning see GH#26757 + if len(times) != n: + raise ValueError('Length of dates and times must be equal') + result = np.empty(n, dtype='O') + + if date_parser is None: + if default is None: # GH2618 + date = datetime.now() + default = datetime(date.year, date.month, 1) + + parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) + + else: + parse_date = date_parser + + if time_parser is None: + parse_time = lambda x: du_parse(x) + + else: + parse_time = time_parser + + for i in range(n): + d = parse_date(str(dates[i])) + t = parse_time(str(times[i])) + result[i] = datetime(d.year, d.month, d.day, + t.hour, t.minute, t.second) + + return result.base # .base to access underlying ndarray + + +def try_parse_year_month_day(object[:] years, object[:] months, + object[:] days): + cdef: + Py_ssize_t i, n + object[:] result + + n = len(years) + # Cast to avoid build warning see GH#26757 + if len(months) != n or len(days) != n: + raise ValueError('Length of years/months/days must all be equal') + result = np.empty(n, dtype='O') + + for i in range(n): + result[i] = datetime(int(years[i]), int(months[i]), int(days[i])) + + return result.base # .base to access underlying ndarray + + +def try_parse_datetime_components(object[:] years, + object[:] months, + object[:] days, + object[:] hours, + object[:] minutes, + object[:] seconds): + + cdef: + Py_ssize_t i, n + object[:] result + int secs + double float_secs + double micros + + n = len(years) + # Cast to avoid build warning see GH#26757 + if (len(months) != n or len(days) != n or + len(hours) != n or len(minutes) != n or + len(seconds) != n): + raise ValueError('Length of all datetime components must be equal') + result = np.empty(n, dtype='O') + + for i in range(n): + float_secs = float(seconds[i]) + secs = int(float_secs) + + micros = float_secs - secs + if micros > 0: + micros = micros * 1000000 + + result[i] = datetime(int(years[i]), int(months[i]), int(days[i]), + int(hours[i]), int(minutes[i]), secs, + int(micros)) + + return result.base # .base to access underlying ndarray + + +# ---------------------------------------------------------------------- +# Miscellaneous + + +# Class copied verbatim from https://github.com/dateutil/dateutil/pull/732 +# +# We use this class to parse and tokenize date strings. However, as it is +# a private class in the dateutil library, relying on backwards compatibility +# is not practical. In fact, using this class issues warnings (xref gh-21322). +# Thus, we port the class over so that both issues are resolved. +# +# Copyright (c) 2017 - dateutil contributors +class _timelex: + def __init__(self, instream): + if getattr(instream, 'decode', None) is not None: + instream = instream.decode() + + if isinstance(instream, str): + self.stream = instream + elif getattr(instream, 'read', None) is None: + raise TypeError( + 'Parser must be a string or character stream, not ' + f'{type(instream).__name__}') + else: + self.stream = instream.read() + + def get_tokens(self): + """ + This function breaks the time string into lexical units (tokens), which + can be parsed by the parser. Lexical units are demarcated by changes in + the character set, so any continuous string of letters is considered + one unit, any continuous string of numbers is considered one unit. + The main complication arises from the fact that dots ('.') can be used + both as separators (e.g. "Sep.20.2009") or decimal points (e.g. + "4:30:21.447"). As such, it is necessary to read the full context of + any dot-separated strings before breaking it into tokens; as such, this + function maintains a "token stack", for when the ambiguous context + demands that multiple tokens be parsed at once. + """ + stream = self.stream.replace('\x00', '') + + # TODO: Change \s --> \s+ (this doesn't match existing behavior) + # TODO: change the punctuation block to punc+ (does not match existing) + # TODO: can we merge the two digit patterns? + tokens = re.findall('\s|' + '(? bint: + """ + Does format match the iso8601 set that can be handled by the C parser? + Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different + but must be consistent. Leading 0s in dates and times are optional. + """ + iso_template = '%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S.%f'.format + excluded_formats = ['%Y%m%d', '%Y%m', '%Y'] + + for date_sep in [' ', '/', '\\', '-', '.', '']: + for time_sep in [' ', 'T']: + if (iso_template(date_sep=date_sep, + time_sep=time_sep + ).startswith(f) and f not in excluded_formats): + return True + return False + + +def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse, + dt_str_split=_DATEUTIL_LEXER_SPLIT): + """ + Guess the datetime format of a given datetime string. + + Parameters + ---------- + dt_str : string, datetime string to guess the format of + dayfirst : boolean, default False + If True parses dates with the day first, eg 20/01/2005 + Warning: dayfirst=True is not strict, but will prefer to parse + with day first (this is a known bug). + dt_str_parse : function, defaults to `dateutil.parser.parse` + This function should take in a datetime string and return + a `datetime.datetime` guess that the datetime string represents + dt_str_split : function, defaults to `_DATEUTIL_LEXER_SPLIT` (dateutil) + This function should take in a datetime string and return + a list of strings, the guess of the various specific parts + e.g. '2011/12/30' -> ['2011', '/', '12', '/', '30'] + + Returns + ------- + ret : datetime format string (for `strftime` or `strptime`) + """ + if dt_str_parse is None or dt_str_split is None: + return None + + if not isinstance(dt_str, str): + return None + + day_attribute_and_format = (('day',), '%d', 2) + + # attr name, format, padding (if any) + datetime_attrs_to_format = [ + (('year', 'month', 'day'), '%Y%m%d', 0), + (('year',), '%Y', 0), + (('month',), '%B', 0), + (('month',), '%b', 0), + (('month',), '%m', 2), + day_attribute_and_format, + (('hour',), '%H', 2), + (('minute',), '%M', 2), + (('second',), '%S', 2), + (('microsecond',), '%f', 6), + (('second', 'microsecond'), '%S.%f', 0), + ] + + if dayfirst: + datetime_attrs_to_format.remove(day_attribute_and_format) + datetime_attrs_to_format.insert(0, day_attribute_and_format) + + try: + parsed_datetime = dt_str_parse(dt_str, dayfirst=dayfirst) + except (ValueError, OverflowError): + # In case the datetime can't be parsed, its format cannot be guessed + return None + + if parsed_datetime is None: + return None + + # the default dt_str_split from dateutil will never raise here; we assume + # that any user-provided function will not either. + tokens = dt_str_split(dt_str) + + format_guess = [None] * len(tokens) + found_attrs = set() + + for attrs, attr_format, padding in datetime_attrs_to_format: + # If a given attribute has been placed in the format string, skip + # over other formats for that same underlying attribute (IE, month + # can be represented in multiple different ways) + if set(attrs) & found_attrs: + continue + + if all(getattr(parsed_datetime, attr) is not None for attr in attrs): + for i, token_format in enumerate(format_guess): + token_filled = tokens[i].zfill(padding) + if (token_format is None and + token_filled == parsed_datetime.strftime(attr_format)): + format_guess[i] = attr_format + tokens[i] = token_filled + found_attrs.update(attrs) + break + + # Only consider it a valid guess if we have a year, month and day + if len({'year', 'month', 'day'} & found_attrs) != 3: + return None + + output_format = [] + for i, guess in enumerate(format_guess): + if guess is not None: + # Either fill in the format placeholder (like %Y) + output_format.append(guess) + else: + # Or just the token separate (IE, the dashes in "01-01-2013") + try: + # If the token is numeric, then we likely didn't parse it + # properly, so our guess is wrong + float(tokens[i]) + return None + except ValueError: + pass + + output_format.append(tokens[i]) + + guessed_format = ''.join(output_format) + + # rebuild string, capturing any inferred padding + dt_str = ''.join(tokens) + if parsed_datetime.strftime(guessed_format) == dt_str: + return guessed_format + else: + return None + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline object convert_to_unicode(object item, + bint keep_trivial_numbers): + """ + Convert `item` to str. + + Parameters + ---------- + item : object + keep_trivial_numbers : bool + if True, then conversion (to string from integer/float zero) + is not performed + + Returns + ------- + str or int or float + """ + cdef: + float64_t float_item + + if keep_trivial_numbers: + if isinstance(item, int): + if item == 0: + return item + elif isinstance(item, float): + float_item = item + if float_item == 0.0 or float_item != float_item: + return item + + if not isinstance(item, str): + item = PyObject_Str(item) + + return item + + +@cython.wraparound(False) +@cython.boundscheck(False) +def _concat_date_cols(tuple date_cols, bint keep_trivial_numbers=True): + """ + Concatenates elements from numpy arrays in `date_cols` into strings. + + Parameters + ---------- + date_cols : tuple[ndarray] + keep_trivial_numbers : bool, default True + if True and len(date_cols) == 1, then + conversion (to string from integer/float zero) is not performed + + Returns + ------- + arr_of_rows : ndarray[object] + + Examples + -------- + >>> dates=np.array(['3/31/2019', '4/31/2019'], dtype=object) + >>> times=np.array(['11:20', '10:45'], dtype=object) + >>> result = _concat_date_cols((dates, times)) + >>> result + array(['3/31/2019 11:20', '4/31/2019 10:45'], dtype=object) + """ + cdef: + Py_ssize_t rows_count = 0, col_count = len(date_cols) + Py_ssize_t col_idx, row_idx + list list_to_join + cnp.ndarray[object] iters + object[::1] iters_view + flatiter it + cnp.ndarray[object] result + object[:] result_view + + if col_count == 0: + return np.zeros(0, dtype=object) + + if not all(is_array(array) for array in date_cols): + raise ValueError("not all elements from date_cols are numpy arrays") + + rows_count = min(len(array) for array in date_cols) + result = np.zeros(rows_count, dtype=object) + result_view = result + + if col_count == 1: + array = date_cols[0] + it = PyArray_IterNew(array) + for row_idx in range(rows_count): + item = PyArray_GETITEM(array, PyArray_ITER_DATA(it)) + result_view[row_idx] = convert_to_unicode(item, + keep_trivial_numbers) + PyArray_ITER_NEXT(it) + else: + # create fixed size list - more efficient memory allocation + list_to_join = [None] * col_count + iters = np.zeros(col_count, dtype=object) + + # create memoryview of iters ndarray, that will contain some + # flatiter's for each array in `date_cols` - more efficient indexing + iters_view = iters + for col_idx, array in enumerate(date_cols): + iters_view[col_idx] = PyArray_IterNew(array) + + # array elements that are on the same line are converted to one string + for row_idx in range(rows_count): + for col_idx, array in enumerate(date_cols): + # this cast is needed, because we did not find a way + # to efficiently store `flatiter` type objects in ndarray + it = iters_view[col_idx] + item = PyArray_GETITEM(array, PyArray_ITER_DATA(it)) + list_to_join[col_idx] = convert_to_unicode(item, False) + PyArray_ITER_NEXT(it) + result_view[row_idx] = " ".join(list_to_join) + + return result diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx new file mode 100644 index 00000000..bd57e75c --- /dev/null +++ b/pandas/_libs/tslibs/period.pyx @@ -0,0 +1,2554 @@ +from datetime import datetime + +from cpython.object cimport ( + PyObject_RichCompareBool, + Py_EQ, Py_NE) + +from numpy cimport int64_t, import_array, ndarray +import numpy as np +import_array() + +from libc.stdlib cimport free, malloc +from libc.time cimport strftime, tm +from libc.string cimport strlen, memset + +import cython + +from cpython.datetime cimport (PyDateTime_Check, PyDelta_Check, PyDate_Check, + PyDateTime_IMPORT) +# import datetime C API +PyDateTime_IMPORT + +from pandas._libs.tslibs.np_datetime cimport ( + npy_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct, + pandas_datetime_to_datetimestruct, check_dts_bounds, + NPY_DATETIMEUNIT, NPY_FR_D) + +cdef extern from "src/datetime/np_datetime.h": + int64_t npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT fr, + npy_datetimestruct *d) nogil + +cimport pandas._libs.tslibs.util as util +from pandas._libs.tslibs.util cimport is_period_object + +from pandas._libs.tslibs.timestamps import Timestamp +from pandas._libs.tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info +from pandas._libs.tslibs.timedeltas import Timedelta +from pandas._libs.tslibs.timedeltas cimport delta_to_nanoseconds + +cimport pandas._libs.tslibs.ccalendar as ccalendar +from pandas._libs.tslibs.ccalendar cimport ( + dayofweek, get_day_of_year, is_leapyear) +from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS +from pandas._libs.tslibs.frequencies cimport ( + get_freq_code, get_base_alias, get_to_timestamp_base, get_freq_str, + get_rule_month) +from pandas._libs.tslibs.parsing import parse_time_string +from pandas._libs.tslibs.resolution import Resolution +from pandas._libs.tslibs.nattype import nat_strings +from pandas._libs.tslibs.nattype cimport ( + _nat_scalar_rules, NPY_NAT, is_null_datetimelike, c_NaT as NaT) +from pandas._libs.tslibs.offsets cimport to_offset +from pandas._libs.tslibs.offsets import _Tick +from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal + + +cdef: + enum: + INT32_MIN = -2147483648 + + +ctypedef struct asfreq_info: + int64_t intraday_conversion_factor + int is_end + int to_end + int from_end + +ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) nogil + + +cdef extern from *: + """ + /*** FREQUENCY CONSTANTS ***/ + // See frequencies.pyx for more detailed variants + + #define FR_ANN 1000 /* Annual */ + #define FR_QTR 2000 /* Quarterly - December year end (default Q) */ + #define FR_MTH 3000 /* Monthly */ + #define FR_WK 4000 /* Weekly */ + #define FR_BUS 5000 /* Business days */ + #define FR_DAY 6000 /* Daily */ + #define FR_HR 7000 /* Hourly */ + #define FR_MIN 8000 /* Minutely */ + #define FR_SEC 9000 /* Secondly */ + #define FR_MS 10000 /* Millisecondly */ + #define FR_US 11000 /* Microsecondly */ + #define FR_NS 12000 /* Nanosecondly */ + #define FR_UND -10000 /* Undefined */ + + // must use npy typedef b/c int64_t is aliased in cython-generated c + static npy_int64 daytime_conversion_factor_matrix[7][7] = { + {1, 24, 1440, 86400, 86400000, 86400000000, 86400000000000}, + {0, 1, 60, 3600, 3600000, 3600000000, 3600000000000}, + {0, 0, 1, 60, 60000, 60000000, 60000000000}, + {0, 0, 0, 1, 1000, 1000000, 1000000000}, + {0, 0, 0, 0, 1, 1000, 1000000}, + {0, 0, 0, 0, 0, 1, 1000}, + {0, 0, 0, 0, 0, 0, 1}}; + """ + int64_t daytime_conversion_factor_matrix[7][7] + # TODO: Can we get these frequencies from frequencies.FreqGroup? + int FR_ANN + int FR_QTR + int FR_MTH + int FR_WK + int FR_DAY + int FR_HR + int FR_MIN + int FR_SEC + int FR_MS + int FR_US + int FR_NS + int FR_BUS + int FR_UND + + +cdef int max_value(int left, int right) nogil: + if left > right: + return left + return right + + +cdef int min_value(int left, int right) nogil: + if left < right: + return left + return right + + +cdef int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil: + cdef: + int row = min_value(from_index, to_index) + int col = max_value(from_index, to_index) + # row or col < 6 means frequency strictly lower than Daily, which + # do not use daytime_conversion_factors + if row < 6: + return 0 + elif col < 6: + return 0 + return daytime_conversion_factor_matrix[row - 6][col - 6] + + +cdef int64_t nofunc(int64_t ordinal, asfreq_info *af_info) nogil: + return INT32_MIN + + +cdef int64_t no_op(int64_t ordinal, asfreq_info *af_info) nogil: + return ordinal + + +cdef freq_conv_func get_asfreq_func(int from_freq, int to_freq) nogil: + cdef: + int from_group = get_freq_group(from_freq) + int to_group = get_freq_group(to_freq) + + if from_group == FR_UND: + from_group = FR_DAY + + if from_group == FR_BUS: + if to_group == FR_ANN: + return asfreq_BtoA + elif to_group == FR_QTR: + return asfreq_BtoQ + elif to_group == FR_MTH: + return asfreq_BtoM + elif to_group == FR_WK: + return asfreq_BtoW + elif to_group == FR_BUS: + return no_op + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_BtoDT + else: + return nofunc + + elif to_group == FR_BUS: + if from_group == FR_ANN: + return asfreq_AtoB + elif from_group == FR_QTR: + return asfreq_QtoB + elif from_group == FR_MTH: + return asfreq_MtoB + elif from_group == FR_WK: + return asfreq_WtoB + elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, + FR_MS, FR_US, FR_NS]: + return asfreq_DTtoB + else: + return nofunc + + elif from_group == FR_ANN: + if to_group == FR_ANN: + return asfreq_AtoA + elif to_group == FR_QTR: + return asfreq_AtoQ + elif to_group == FR_MTH: + return asfreq_AtoM + elif to_group == FR_WK: + return asfreq_AtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_AtoDT + else: + return nofunc + + elif from_group == FR_QTR: + if to_group == FR_ANN: + return asfreq_QtoA + elif to_group == FR_QTR: + return asfreq_QtoQ + elif to_group == FR_MTH: + return asfreq_QtoM + elif to_group == FR_WK: + return asfreq_QtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_QtoDT + else: + return nofunc + + elif from_group == FR_MTH: + if to_group == FR_ANN: + return asfreq_MtoA + elif to_group == FR_QTR: + return asfreq_MtoQ + elif to_group == FR_MTH: + return no_op + elif to_group == FR_WK: + return asfreq_MtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_MtoDT + else: + return nofunc + + elif from_group == FR_WK: + if to_group == FR_ANN: + return asfreq_WtoA + elif to_group == FR_QTR: + return asfreq_WtoQ + elif to_group == FR_MTH: + return asfreq_WtoM + elif to_group == FR_WK: + return asfreq_WtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + return asfreq_WtoDT + else: + return nofunc + + elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + if to_group == FR_ANN: + return asfreq_DTtoA + elif to_group == FR_QTR: + return asfreq_DTtoQ + elif to_group == FR_MTH: + return asfreq_DTtoM + elif to_group == FR_WK: + return asfreq_DTtoW + elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]: + if from_group > to_group: + return downsample_daytime + else: + return upsample_daytime + + else: + return nofunc + + else: + return nofunc + + +# -------------------------------------------------------------------- +# Frequency Conversion Helpers + +cdef int64_t DtoB_weekday(int64_t unix_date) nogil: + return ((unix_date + 4) // 7) * 5 + ((unix_date + 4) % 7) - 4 + + +cdef int64_t DtoB(npy_datetimestruct *dts, int roll_back, + int64_t unix_date) nogil: + cdef: + int day_of_week = dayofweek(dts.year, dts.month, dts.day) + + if roll_back == 1: + if day_of_week > 4: + # change to friday before weekend + unix_date -= (day_of_week - 4) + else: + if day_of_week > 4: + # change to Monday after weekend + unix_date += (7 - day_of_week) + + return DtoB_weekday(unix_date) + + +cdef inline int64_t upsample_daytime(int64_t ordinal, + asfreq_info *af_info) nogil: + if (af_info.is_end): + return (ordinal + 1) * af_info.intraday_conversion_factor - 1 + else: + return ordinal * af_info.intraday_conversion_factor + + +cdef inline int64_t downsample_daytime(int64_t ordinal, + asfreq_info *af_info) nogil: + return ordinal // (af_info.intraday_conversion_factor) + + +cdef inline int64_t transform_via_day(int64_t ordinal, + asfreq_info *af_info, + freq_conv_func first_func, + freq_conv_func second_func) nogil: + cdef: + int64_t result + + result = first_func(ordinal, af_info) + result = second_func(result, af_info) + return result + + +# -------------------------------------------------------------------- +# Conversion _to_ Daily Freq + +cdef void AtoD_ym(int64_t ordinal, int64_t *year, + int *month, asfreq_info *af_info) nogil: + year[0] = ordinal + 1970 + month[0] = 1 + + if af_info.from_end != 12: + month[0] += af_info.from_end + if month[0] > 12: + # This case is never reached, but is kept for symmetry + # with QtoD_ym + month[0] -= 12 + else: + year[0] -= 1 + + +cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int64_t unix_date, year + int month + + ordinal += af_info.is_end + AtoD_ym(ordinal, &year, &month, af_info) + + unix_date = unix_date_from_ymd(year, month, 1) + unix_date -= af_info.is_end + return upsample_daytime(unix_date, af_info) + + +cdef void QtoD_ym(int64_t ordinal, int *year, + int *month, asfreq_info *af_info) nogil: + year[0] = ordinal // 4 + 1970 + month[0] = (ordinal % 4) * 3 + 1 + + if af_info.from_end != 12: + month[0] += af_info.from_end + if month[0] > 12: + month[0] -= 12 + else: + year[0] -= 1 + + +cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int64_t unix_date + int year, month + + ordinal += af_info.is_end + QtoD_ym(ordinal, &year, &month, af_info) + + unix_date = unix_date_from_ymd(year, month, 1) + unix_date -= af_info.is_end + return upsample_daytime(unix_date, af_info) + + +cdef void MtoD_ym(int64_t ordinal, int *year, int *month) nogil: + year[0] = ordinal // 12 + 1970 + month[0] = ordinal % 12 + 1 + + +cdef int64_t asfreq_MtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int64_t unix_date + int year, month + + ordinal += af_info.is_end + MtoD_ym(ordinal, &year, &month) + + unix_date = unix_date_from_ymd(year, month, 1) + unix_date -= af_info.is_end + return upsample_daytime(unix_date, af_info) + + +cdef int64_t asfreq_WtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + ordinal = (ordinal * 7 + af_info.from_end - 4 + + (7 - 1) * (af_info.is_end - 1)) + return upsample_daytime(ordinal, af_info) + + +# -------------------------------------------------------------------- +# Conversion _to_ BusinessDay Freq + +cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = asfreq_AtoDT(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + roll_back = af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = asfreq_QtoDT(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + roll_back = af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +cdef int64_t asfreq_MtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = asfreq_MtoDT(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + roll_back = af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +cdef int64_t asfreq_WtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = asfreq_WtoDT(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + roll_back = af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +cdef int64_t asfreq_DTtoB(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int roll_back + npy_datetimestruct dts + int64_t unix_date = downsample_daytime(ordinal, af_info) + + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts) + # This usage defines roll_back the opposite way from the others + roll_back = 1 - af_info.is_end + return DtoB(&dts, roll_back, unix_date) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Daily Freq + +cdef int64_t asfreq_DTtoA(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + npy_datetimestruct dts + + ordinal = downsample_daytime(ordinal, af_info) + pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, &dts) + if dts.month > af_info.to_end: + return (dts.year + 1 - 1970) + else: + return (dts.year - 1970) + + +cdef int DtoQ_yq(int64_t ordinal, asfreq_info *af_info, int *year) nogil: + cdef: + npy_datetimestruct dts + int quarter + + pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, &dts) + # TODO: Another version of this function used + # date_info_from_days_and_time(&dts, unix_date, 0) + # instead of pandas_datetime_to_datetimestruct; is one more performant? + if af_info.to_end != 12: + dts.month -= af_info.to_end + if dts.month <= 0: + dts.month += 12 + else: + dts.year += 1 + + year[0] = dts.year + quarter = month_to_quarter(dts.month) + return quarter + + +cdef int64_t asfreq_DTtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + int year, quarter + + ordinal = downsample_daytime(ordinal, af_info) + + quarter = DtoQ_yq(ordinal, af_info, &year) + return ((year - 1970) * 4 + quarter - 1) + + +cdef int64_t asfreq_DTtoM(int64_t ordinal, asfreq_info *af_info) nogil: + cdef: + npy_datetimestruct dts + + ordinal = downsample_daytime(ordinal, af_info) + pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, &dts) + return ((dts.year - 1970) * 12 + dts.month - 1) + + +cdef int64_t asfreq_DTtoW(int64_t ordinal, asfreq_info *af_info) nogil: + ordinal = downsample_daytime(ordinal, af_info) + return (ordinal + 3 - af_info.to_end) // 7 + 1 + + +# -------------------------------------------------------------------- +# Conversion _from_ BusinessDay Freq + +cdef int64_t asfreq_BtoDT(int64_t ordinal, asfreq_info *af_info) nogil: + ordinal = ((ordinal + 3) // 5) * 7 + (ordinal + 3) % 5 -3 + return upsample_daytime(ordinal, af_info) + + +cdef int64_t asfreq_BtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_BtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_BtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_BtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_BtoM(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_BtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_BtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Annual Freq + +cdef int64_t asfreq_AtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_AtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_AtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_AtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_AtoM(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_AtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_AtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Quarterly Freq + +cdef int64_t asfreq_QtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_QtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_QtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_QtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_QtoM(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_QtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_QtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Monthly Freq + +cdef int64_t asfreq_MtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_MtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_MtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_MtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_MtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- +# Conversion _from_ Weekly Freq + +cdef int64_t asfreq_WtoA(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_WtoDT, + asfreq_DTtoA) + + +cdef int64_t asfreq_WtoQ(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_WtoDT, + asfreq_DTtoQ) + + +cdef int64_t asfreq_WtoM(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_WtoDT, + asfreq_DTtoM) + + +cdef int64_t asfreq_WtoW(int64_t ordinal, asfreq_info *af_info) nogil: + return transform_via_day(ordinal, af_info, + asfreq_WtoDT, + asfreq_DTtoW) + + +# ---------------------------------------------------------------------- + +@cython.cdivision +cdef char* c_strftime(npy_datetimestruct *dts, char *fmt): + """ + Generate a nice string representation of the period + object, originally from DateObject_strftime + + Parameters + ---------- + dts : npy_datetimestruct* + fmt : char* + + Returns + ------- + result : char* + """ + cdef: + tm c_date + char *result + int result_len = strlen(fmt) + 50 + + c_date.tm_sec = dts.sec + c_date.tm_min = dts.min + c_date.tm_hour = dts.hour + c_date.tm_mday = dts.day + c_date.tm_mon = dts.month - 1 + c_date.tm_year = dts.year - 1900 + c_date.tm_wday = (dayofweek(dts.year, dts.month, dts.day) + 1) % 7 + c_date.tm_yday = get_day_of_year(dts.year, dts.month, dts.day) - 1 + c_date.tm_isdst = -1 + + result = malloc(result_len * sizeof(char)) + + strftime(result, result_len, fmt, &c_date) + + return result + + +# ---------------------------------------------------------------------- +# Conversion between date_info and npy_datetimestruct + +cdef inline int get_freq_group(int freq) nogil: + return (freq // 1000) * 1000 + + +cdef inline int get_freq_group_index(int freq) nogil: + return freq // 1000 + + +# Find the unix_date (days elapsed since datetime(1970, 1, 1) +# for the given year/month/day. +# Assumes GREGORIAN_CALENDAR */ +cdef int64_t unix_date_from_ymd(int year, int month, int day) nogil: + # Calculate the absolute date + cdef: + npy_datetimestruct dts + int64_t unix_date + + memset(&dts, 0, sizeof(npy_datetimestruct)) + dts.year = year + dts.month = month + dts.day = day + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, &dts) + return unix_date + + +# specifically _dont_ use cdvision or else ordinals near -1 are assigned to +# incorrect dates GH#19643 +@cython.cdivision(False) +cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil: + """ + Generate an ordinal in period space + + Parameters + ---------- + dts: npy_datetimestruct* + freq : int + + Returns + ------- + period_ordinal : int64_t + """ + cdef: + int64_t unix_date, seconds, delta + int64_t weeks + int64_t day_adj + int freq_group, fmonth, mdiff + + freq_group = get_freq_group(freq) + + if freq_group == FR_ANN: + fmonth = freq - FR_ANN + if fmonth == 0: + fmonth = 12 + + mdiff = dts.month - fmonth + if mdiff <= 0: + return dts.year - 1970 + else: + return dts.year - 1970 + 1 + + elif freq_group == FR_QTR: + fmonth = freq - FR_QTR + if fmonth == 0: + fmonth = 12 + + mdiff = dts.month - fmonth + # TODO: Aren't the next two conditions equivalent to + # unconditional incrementing? + if mdiff < 0: + mdiff += 12 + if dts.month >= fmonth: + mdiff += 12 + + return (dts.year - 1970) * 4 + (mdiff - 1) // 3 + + elif freq == FR_MTH: + return (dts.year - 1970) * 12 + dts.month - 1 + + unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts) + + if freq >= FR_SEC: + seconds = unix_date * 86400 + dts.hour * 3600 + dts.min * 60 + dts.sec + + if freq == FR_MS: + return seconds * 1000 + dts.us // 1000 + + elif freq == FR_US: + return seconds * 1000000 + dts.us + + elif freq == FR_NS: + return (seconds * 1000000000 + + dts.us * 1000 + dts.ps // 1000) + + else: + return seconds + + elif freq == FR_MIN: + return unix_date * 1440 + dts.hour * 60 + dts.min + + elif freq == FR_HR: + return unix_date * 24 + dts.hour + + elif freq == FR_DAY: + return unix_date + + elif freq == FR_UND: + return unix_date + + elif freq == FR_BUS: + # calculate the current week (counting from 1970-01-01) treating + # sunday as last day of a week + weeks = (unix_date + 3) // 7 + # calculate the current weekday (in range 1 .. 7) + delta = (unix_date + 3) % 7 + 1 + # return the number of business days in full weeks plus the business + # days in the last - possible partial - week + if delta <= 5: + return (5 * weeks) + delta - 4 + else: + return (5 * weeks) + (5 + 1) - 4 + + elif freq_group == FR_WK: + day_adj = freq - FR_WK + return (unix_date + 3 - day_adj) // 7 + 1 + + # raise ValueError + + +cdef void get_date_info(int64_t ordinal, int freq, + npy_datetimestruct *dts) nogil: + cdef: + int64_t unix_date + double abstime + + unix_date = get_unix_date(ordinal, freq) + abstime = get_abs_time(freq, unix_date, ordinal) + + while abstime < 0: + abstime += 86400 + unix_date -= 1 + + while abstime >= 86400: + abstime -= 86400 + unix_date += 1 + + date_info_from_days_and_time(dts, unix_date, abstime) + + +cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil: + """ + Returns the proleptic Gregorian ordinal of the date, as an integer. + This corresponds to the number of days since Jan., 1st, 1970 AD. + When the instance has a frequency less than daily, the proleptic date + is calculated for the last day of the period. + + Parameters + ---------- + period_ordinal : int64_t + freq : int + + Returns + ------- + unix_date : int64_t number of days since datetime(1970, 1, 1) + """ + cdef: + asfreq_info af_info + freq_conv_func toDaily = NULL + + if freq == FR_DAY: + return period_ordinal + + toDaily = get_asfreq_func(freq, FR_DAY) + get_asfreq_info(freq, FR_DAY, True, &af_info) + return toDaily(period_ordinal, &af_info) + + +@cython.cdivision +cdef void date_info_from_days_and_time(npy_datetimestruct *dts, + int64_t unix_date, + double abstime) nogil: + """ + Set the instance's value using the given date and time. + + Parameters + ---------- + dts : npy_datetimestruct* + unix_date : int64_t + days elapsed since datetime(1970, 1, 1) + abstime : double + seconds elapsed since beginning of day described by unix_date + + Notes + ----- + Updates dts inplace + """ + cdef: + int inttime + int hour, minute + double second, subsecond_fraction + + # Bounds check + # The calling function is responsible for ensuring that + # abstime >= 0.0 and abstime <= 86400 + + # Calculate the date + pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts) + + # Calculate the time + inttime = abstime + hour = inttime / 3600 + minute = (inttime % 3600) / 60 + second = abstime - (hour * 3600 + minute * 60) + + dts.hour = hour + dts.min = minute + dts.sec = second + + subsecond_fraction = second - dts.sec + dts.us = int((subsecond_fraction) * 1e6) + dts.ps = int(((subsecond_fraction) * 1e6 - dts.us) * 1e6) + + +@cython.cdivision +cdef double get_abs_time(int freq, int64_t unix_date, int64_t ordinal) nogil: + cdef: + int freq_index, day_index, base_index + int64_t per_day, start_ord + double unit, result + + if freq <= FR_DAY: + return 0 + + freq_index = freq // 1000 + day_index = FR_DAY // 1000 + base_index = FR_SEC // 1000 + + per_day = get_daytime_conversion_factor(day_index, freq_index) + unit = get_daytime_conversion_factor(freq_index, base_index) + + if base_index < freq_index: + unit = 1 / unit + + start_ord = unix_date * per_day + result = (unit * (ordinal - start_ord)) + return result + + +cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): + """ + Find the year and quarter of a Period with the given ordinal and frequency + + Parameters + ---------- + ordinal : int64_t + freq : int + quarter : *int + year : *int + + Returns + ------- + qtr_freq : int + describes the implied quarterly frequency associated with `freq` + + Notes + ----- + Sets quarter and year inplace + """ + cdef: + asfreq_info af_info + int qtr_freq + int64_t unix_date + + unix_date = get_unix_date(ordinal, freq) + + if get_freq_group(freq) == FR_QTR: + qtr_freq = freq + else: + qtr_freq = FR_QTR + + assert (qtr_freq % 1000) <= 12 + get_asfreq_info(FR_DAY, qtr_freq, True, &af_info) + + quarter[0] = DtoQ_yq(unix_date, &af_info, year) + return qtr_freq + + +cdef inline int month_to_quarter(int month) nogil: + return (month - 1) // 3 + 1 + + +# ---------------------------------------------------------------------- +# Period logic + +@cython.wraparound(False) +@cython.boundscheck(False) +def dt64arr_to_periodarr(int64_t[:] dtarr, int freq, tz=None): + """ + Convert array of datetime64 values (passed in as 'i8' dtype) to a set of + periods corresponding to desired frequency, per period convention. + """ + cdef: + int64_t[:] out + Py_ssize_t i, l + npy_datetimestruct dts + + l = len(dtarr) + + out = np.empty(l, dtype='i8') + + if tz is None: + with nogil: + for i in range(l): + if dtarr[i] == NPY_NAT: + out[i] = NPY_NAT + continue + dt64_to_dtstruct(dtarr[i], &dts) + out[i] = get_period_ordinal(&dts, freq) + else: + out = localize_dt64arr_to_period(dtarr, freq, tz) + return out.base # .base to access underlying np.ndarray + + +@cython.wraparound(False) +@cython.boundscheck(False) +def periodarr_to_dt64arr(const int64_t[:] periodarr, int freq): + """ + Convert array to datetime64 values from a set of ordinals corresponding to + periods per period convention. + """ + cdef: + int64_t[:] out + Py_ssize_t i, l + + l = len(periodarr) + + out = np.empty(l, dtype='i8') + + for i in range(l): + out[i] = period_ordinal_to_dt64(periodarr[i], freq) + + return out.base # .base to access underlying np.ndarray + + +cpdef int64_t period_asfreq(int64_t ordinal, int freq1, int freq2, bint end): + """ + Convert period ordinal from one frequency to another, and if upsampling, + choose to use start ('S') or end ('E') of period. + """ + cdef: + int64_t retval + freq_conv_func func + asfreq_info af_info + + if ordinal == NPY_NAT: + return NPY_NAT + + func = get_asfreq_func(freq1, freq2) + get_asfreq_info(freq1, freq2, end, &af_info) + retval = func(ordinal, &af_info) + + if retval == INT32_MIN: + raise ValueError('Frequency conversion failed') + + return retval + + +cdef void get_asfreq_info(int from_freq, int to_freq, + bint is_end, asfreq_info *af_info) nogil: + """ + Construct the `asfreq_info` object used to convert an ordinal from + `from_freq` to `to_freq`. + + Parameters + ---------- + from_freq : int + to_freq int + is_end : bool + af_info : *asfreq_info + """ + cdef: + int from_group = get_freq_group(from_freq) + int to_group = get_freq_group(to_freq) + + af_info.is_end = is_end + + af_info.intraday_conversion_factor = get_daytime_conversion_factor( + get_freq_group_index(max_value(from_group, FR_DAY)), + get_freq_group_index(max_value(to_group, FR_DAY))) + + if from_group == FR_WK: + af_info.from_end = calc_week_end(from_freq, from_group) + elif from_group == FR_ANN: + af_info.from_end = calc_a_year_end(from_freq, from_group) + elif from_group == FR_QTR: + af_info.from_end = calc_a_year_end(from_freq, from_group) + + if to_group == FR_WK: + af_info.to_end = calc_week_end(to_freq, to_group) + elif to_group == FR_ANN: + af_info.to_end = calc_a_year_end(to_freq, to_group) + elif to_group == FR_QTR: + af_info.to_end = calc_a_year_end(to_freq, to_group) + + +@cython.cdivision +cdef int calc_a_year_end(int freq, int group) nogil: + cdef: + int result = (freq - group) % 12 + if result == 0: + return 12 + else: + return result + + +cdef inline int calc_week_end(int freq, int group) nogil: + return freq - group + + +@cython.wraparound(False) +@cython.boundscheck(False) +def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): + """ + Convert int64-array of period ordinals from one frequency to another, and + if upsampling, choose to use start ('S') or end ('E') of period. + """ + cdef: + int64_t[:] result + Py_ssize_t i, n + freq_conv_func func + asfreq_info af_info + int64_t val + + n = len(arr) + result = np.empty(n, dtype=np.int64) + + func = get_asfreq_func(freq1, freq2) + get_asfreq_info(freq1, freq2, end, &af_info) + + mask = arr == NPY_NAT + if mask.any(): # NaT process + for i in range(n): + val = arr[i] + if val != NPY_NAT: + val = func(val, &af_info) + if val == INT32_MIN: + raise ValueError("Unable to convert to desired frequency.") + result[i] = val + else: + for i in range(n): + val = func(arr[i], &af_info) + if val == INT32_MIN: + raise ValueError("Unable to convert to desired frequency.") + result[i] = val + + return result.base # .base to access underlying np.ndarray + + +cpdef int64_t period_ordinal(int y, int m, int d, int h, int min, + int s, int us, int ps, int freq): + """ + Find the ordinal representation of the given datetime components at the + frequency `freq`. + + Parameters + ---------- + y : int + m : int + d : int + h : int + min : int + s : int + us : int + ps : int + + Returns + ------- + ordinal : int64_t + """ + cdef: + npy_datetimestruct dts + dts.year = y + dts.month = m + dts.day = d + dts.hour = h + dts.min = min + dts.sec = s + dts.us = us + dts.ps = ps + return get_period_ordinal(&dts, freq) + + +cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1: + cdef: + npy_datetimestruct dts + + if ordinal == NPY_NAT: + return NPY_NAT + + get_date_info(ordinal, freq, &dts) + check_dts_bounds(&dts) + return dtstruct_to_dt64(&dts) + + +cdef str period_format(int64_t value, int freq, object fmt=None): + cdef: + int freq_group + + if value == NPY_NAT: + return "NaT" + + if isinstance(fmt, str): + fmt = fmt.encode("utf-8") + + if fmt is None: + freq_group = get_freq_group(freq) + if freq_group == 1000: # FR_ANN + fmt = b'%Y' + elif freq_group == 2000: # FR_QTR + fmt = b'%FQ%q' + elif freq_group == 3000: # FR_MTH + fmt = b'%Y-%m' + elif freq_group == 4000: # WK + left = period_asfreq(value, freq, 6000, 0) + right = period_asfreq(value, freq, 6000, 1) + return f"{period_format(left, 6000)}/{period_format(right, 6000)}" + elif (freq_group == 5000 # BUS + or freq_group == 6000): # DAY + fmt = b'%Y-%m-%d' + elif freq_group == 7000: # HR + fmt = b'%Y-%m-%d %H:00' + elif freq_group == 8000: # MIN + fmt = b'%Y-%m-%d %H:%M' + elif freq_group == 9000: # SEC + fmt = b'%Y-%m-%d %H:%M:%S' + elif freq_group == 10000: # MILLISEC + fmt = b'%Y-%m-%d %H:%M:%S.%l' + elif freq_group == 11000: # MICROSEC + fmt = b'%Y-%m-%d %H:%M:%S.%u' + elif freq_group == 12000: # NANOSEC + fmt = b'%Y-%m-%d %H:%M:%S.%n' + else: + raise ValueError(f"Unknown freq: {freq}") + + return _period_strftime(value, freq, fmt) + + +cdef list extra_fmts = [(b"%q", b"^`AB`^"), + (b"%f", b"^`CD`^"), + (b"%F", b"^`EF`^"), + (b"%l", b"^`GH`^"), + (b"%u", b"^`IJ`^"), + (b"%n", b"^`KL`^")] + +cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", + "^`GH`^", "^`IJ`^", "^`KL`^"] + +cdef str _period_strftime(int64_t value, int freq, bytes fmt): + cdef: + Py_ssize_t i + npy_datetimestruct dts + char *formatted + bytes pat, brepl + list found_pat = [False] * len(extra_fmts) + int year, quarter + str result, repl + + get_date_info(value, freq, &dts) + for i in range(len(extra_fmts)): + pat = extra_fmts[i][0] + brepl = extra_fmts[i][1] + if pat in fmt: + fmt = fmt.replace(pat, brepl) + found_pat[i] = True + + formatted = c_strftime(&dts, fmt) + + result = util.char_to_string(formatted) + free(formatted) + + for i in range(len(extra_fmts)): + if found_pat[i]: + if get_yq(value, freq, &quarter, &year) < 0: + raise ValueError('Unable to get quarter and year') + + if i == 0: + repl = str(quarter) + elif i == 1: # %f, 2-digit year + repl = f"{(year % 100):02d}" + elif i == 2: + repl = str(year) + elif i == 3: + repl = f"{(value % 1_000):03d}" + elif i == 4: + repl = f"{(value % 1_000_000):06d}" + elif i == 5: + repl = f"{(value % 1_000_000_000):09d}" + + result = result.replace(str_extra_fmts[i], repl) + + return result + + +# ---------------------------------------------------------------------- +# period accessors + +ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN + + +cdef int pyear(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.year + + +@cython.cdivision +cdef int pqyear(int64_t ordinal, int freq): + cdef: + int year, quarter + get_yq(ordinal, freq, &quarter, &year) + return year + + +cdef int pquarter(int64_t ordinal, int freq): + cdef: + int year, quarter + get_yq(ordinal, freq, &quarter, &year) + return quarter + + +cdef int pmonth(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.month + + +cdef int pday(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.day + + +cdef int pweekday(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dayofweek(dts.year, dts.month, dts.day) + + +cdef int pday_of_year(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return get_day_of_year(dts.year, dts.month, dts.day) + + +cdef int pweek(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return ccalendar.get_week_of_year(dts.year, dts.month, dts.day) + + +cdef int phour(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.hour + + +cdef int pminute(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.min + + +cdef int psecond(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return dts.sec + + +cdef int pdays_in_month(int64_t ordinal, int freq): + cdef: + npy_datetimestruct dts + get_date_info(ordinal, freq, &dts) + return ccalendar.get_days_in_month(dts.year, dts.month) + + +@cython.wraparound(False) +@cython.boundscheck(False) +def get_period_field_arr(int code, int64_t[:] arr, int freq): + cdef: + Py_ssize_t i, sz + int64_t[:] out + accessor f + + func = _get_accessor_func(code) + if func is NULL: + raise ValueError(f"Unrecognized period code: {code}") + + sz = len(arr) + out = np.empty(sz, dtype=np.int64) + + for i in range(sz): + if arr[i] == NPY_NAT: + out[i] = -1 + continue + out[i] = func(arr[i], freq) + + return out.base # .base to access underlying np.ndarray + + +cdef accessor _get_accessor_func(int code): + if code == 0: + return pyear + elif code == 1: + return pqyear + elif code == 2: + return pquarter + elif code == 3: + return pmonth + elif code == 4: + return pday + elif code == 5: + return phour + elif code == 6: + return pminute + elif code == 7: + return psecond + elif code == 8: + return pweek + elif code == 9: + return pday_of_year + elif code == 10: + return pweekday + elif code == 11: + return pdays_in_month + return NULL + + +@cython.wraparound(False) +@cython.boundscheck(False) +def extract_ordinals(ndarray[object] values, freq): + # TODO: Change type to const object[:] when Cython supports that. + + cdef: + Py_ssize_t i, n = len(values) + int64_t[:] ordinals = np.empty(n, dtype=np.int64) + object p + + freqstr = Period._maybe_convert_freq(freq).freqstr + + for i in range(n): + p = values[i] + + if is_null_datetimelike(p): + ordinals[i] = NPY_NAT + else: + try: + ordinals[i] = p.ordinal + + if p.freqstr != freqstr: + msg = DIFFERENT_FREQ.format(cls="PeriodIndex", + own_freq=freqstr, + other_freq=p.freqstr) + raise IncompatibleFrequency(msg) + + except AttributeError: + p = Period(p, freq=freq) + if p is NaT: + # input may contain NaT-like string + ordinals[i] = NPY_NAT + else: + ordinals[i] = p.ordinal + + return ordinals.base # .base to access underlying np.ndarray + + +def extract_freq(ndarray[object] values): + # TODO: Change type to const object[:] when Cython supports that. + + cdef: + Py_ssize_t i, n = len(values) + object p + + for i in range(n): + p = values[i] + + try: + # now Timestamp / NaT has freq attr + if is_period_object(p): + return p.freq + except AttributeError: + pass + + raise ValueError('freq not specified and cannot be inferred') + + +# ----------------------------------------------------------------------- +# period helpers + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef int64_t[:] localize_dt64arr_to_period(int64_t[:] stamps, + int freq, object tz): + cdef: + Py_ssize_t n = len(stamps) + int64_t[:] result = np.empty(n, dtype=np.int64) + ndarray[int64_t] trans + int64_t[:] deltas + Py_ssize_t[:] pos + npy_datetimestruct dts + int64_t local_val + + if is_utc(tz) or tz is None: + with nogil: + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + dt64_to_dtstruct(stamps[i], &dts) + result[i] = get_period_ordinal(&dts, freq) + + elif is_tzlocal(tz): + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + dt64_to_dtstruct(local_val, &dts) + result[i] = get_period_ordinal(&dts, freq) + else: + # Adjust datetime64 timestamp, recompute datetimestruct + trans, deltas, typ = get_dst_info(tz) + + if typ not in ['pytz', 'dateutil']: + # static/fixed; in this case we know that len(delta) == 1 + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + dt64_to_dtstruct(stamps[i] + deltas[0], &dts) + result[i] = get_period_ordinal(&dts, freq) + else: + pos = trans.searchsorted(stamps, side='right') - 1 + + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts) + result[i] = get_period_ordinal(&dts, freq) + + return result + + +DIFFERENT_FREQ = ("Input has different freq={other_freq} " + "from {cls}(freq={own_freq})") + + +class IncompatibleFrequency(ValueError): + pass + + +cdef class _Period: + + cdef readonly: + int64_t ordinal + object freq + + _typ = 'period' + + def __cinit__(self, ordinal, freq): + self.ordinal = ordinal + self.freq = freq + + @classmethod + def _maybe_convert_freq(cls, object freq): + if isinstance(freq, (int, tuple)): + code, stride = get_freq_code(freq) + freq = get_freq_str(code, stride) + + freq = to_offset(freq) + + if freq.n <= 0: + raise ValueError("Frequency must be positive, because it " + f"represents span: {freq.freqstr}") + + return freq + + @classmethod + def _from_ordinal(cls, ordinal, freq): + """ + Fast creation from an ordinal and freq that are already validated! + """ + if ordinal == NPY_NAT: + return NaT + else: + freq = cls._maybe_convert_freq(freq) + self = _Period.__new__(cls, ordinal, freq) + return self + + def __richcmp__(self, other, op): + if is_period_object(other): + if other.freq != self.freq: + msg = DIFFERENT_FREQ.format(cls=type(self).__name__, + own_freq=self.freqstr, + other_freq=other.freqstr) + raise IncompatibleFrequency(msg) + return PyObject_RichCompareBool(self.ordinal, other.ordinal, op) + elif other is NaT: + return _nat_scalar_rules[op] + # index/series like + elif hasattr(other, '_typ'): + return NotImplemented + else: + if op == Py_EQ: + return NotImplemented + elif op == Py_NE: + return NotImplemented + raise TypeError(f"Cannot compare type {type(self).__name__} " + f"with type {type(other).__name__}") + + def __hash__(self): + return hash((self.ordinal, self.freqstr)) + + def _add_delta(self, other): + cdef: + int64_t nanos, offset_nanos + + if (PyDelta_Check(other) or util.is_timedelta64_object(other) or + isinstance(other, _Tick)): + offset = to_offset(self.freq.rule_code) + if isinstance(offset, _Tick): + nanos = delta_to_nanoseconds(other) + offset_nanos = delta_to_nanoseconds(offset) + if nanos % offset_nanos == 0: + ordinal = self.ordinal + (nanos // offset_nanos) + return Period(ordinal=ordinal, freq=self.freq) + raise IncompatibleFrequency("Input cannot be converted to " + f"Period(freq={self.freqstr})") + elif util.is_offset_object(other): + freqstr = other.rule_code + base = get_base_alias(freqstr) + if base == self.freq.rule_code: + ordinal = self.ordinal + other.n + return Period(ordinal=ordinal, freq=self.freq) + msg = DIFFERENT_FREQ.format(cls=type(self).__name__, + own_freq=self.freqstr, + other_freq=other.freqstr) + raise IncompatibleFrequency(msg) + else: # pragma no cover + return NotImplemented + + def __add__(self, other): + if is_period_object(self): + if (PyDelta_Check(other) or util.is_timedelta64_object(other) or + util.is_offset_object(other)): + return self._add_delta(other) + elif other is NaT: + return NaT + elif util.is_integer_object(other): + ordinal = self.ordinal + other * self.freq.n + return Period(ordinal=ordinal, freq=self.freq) + elif (PyDateTime_Check(other) or + is_period_object(other) or util.is_datetime64_object(other)): + # can't add datetime-like + # GH#17983 + sname = type(self).__name__ + oname = type(other).__name__ + raise TypeError(f"unsupported operand type(s) for +: '{sname}' " + f"and '{oname}'") + else: # pragma: no cover + return NotImplemented + elif is_period_object(other): + # this can be reached via __radd__ because of cython rules + return other + self + else: + return NotImplemented + + def __sub__(self, other): + if is_period_object(self): + if (PyDelta_Check(other) or util.is_timedelta64_object(other) or + util.is_offset_object(other)): + neg_other = -other + return self + neg_other + elif util.is_integer_object(other): + ordinal = self.ordinal - other * self.freq.n + return Period(ordinal=ordinal, freq=self.freq) + elif is_period_object(other): + if other.freq != self.freq: + msg = DIFFERENT_FREQ.format(cls=type(self).__name__, + own_freq=self.freqstr, + other_freq=other.freqstr) + raise IncompatibleFrequency(msg) + # GH 23915 - mul by base freq since __add__ is agnostic of n + return (self.ordinal - other.ordinal) * self.freq.base + elif getattr(other, '_typ', None) == 'periodindex': + # GH#21314 PeriodIndex - Period returns an object-index + # of DateOffset objects, for which we cannot use __neg__ + # directly, so we have to apply it pointwise + return other.__sub__(self).map(lambda x: -x) + else: # pragma: no cover + return NotImplemented + elif is_period_object(other): + if self is NaT: + return NaT + return NotImplemented + else: + return NotImplemented + + def asfreq(self, freq, how='E'): + """ + Convert Period to desired frequency, at the start or end of the interval. + + Parameters + ---------- + freq : str + The desired frequency. + how : {'E', 'S', 'end', 'start'}, default 'end' + Start or end of the timespan. + + Returns + ------- + resampled : Period + """ + freq = self._maybe_convert_freq(freq) + how = _validate_end_alias(how) + base1, mult1 = get_freq_code(self.freq) + base2, mult2 = get_freq_code(freq) + + # mult1 can't be negative or 0 + end = how == 'E' + if end: + ordinal = self.ordinal + mult1 - 1 + else: + ordinal = self.ordinal + ordinal = period_asfreq(ordinal, base1, base2, end) + + return Period(ordinal=ordinal, freq=freq) + + @property + def start_time(self): + """ + Get the Timestamp for the start of the period. + + Returns + ------- + Timestamp + + See Also + -------- + Period.end_time : Return the end Timestamp. + Period.dayofyear : Return the day of year. + Period.daysinmonth : Return the days in that month. + Period.dayofweek : Return the day of the week. + + Examples + -------- + >>> period = pd.Period('2012-1-1', freq='D') + >>> period + Period('2012-01-01', 'D') + + >>> period.start_time + Timestamp('2012-01-01 00:00:00') + + >>> period.end_time + Timestamp('2012-01-01 23:59:59.999999999') + """ + return self.to_timestamp(how='S') + + @property + def end_time(self): + # freq.n can't be negative or 0 + # ordinal = (self + self.freq.n).start_time.value - 1 + ordinal = (self + self.freq).start_time.value - 1 + return Timestamp(ordinal) + + def to_timestamp(self, freq=None, how='start', tz=None): + """ + Return the Timestamp representation of the Period. + + Uses the target frequency specified at the part of the period specified + by `how`, which is either `Start` or `Finish`. + + Parameters + ---------- + freq : str or DateOffset + Target frequency. Default is 'D' if self.freq is week or + longer and 'S' otherwise. + how : str, default 'S' (start) + One of 'S', 'E'. Can be aliased as case insensitive + 'Start', 'Finish', 'Begin', 'End'. + + Returns + ------- + Timestamp + """ + if freq is not None: + freq = self._maybe_convert_freq(freq) + how = _validate_end_alias(how) + + end = how == 'E' + if end: + endpoint = (self + self.freq).to_timestamp(how='start') + return endpoint - Timedelta(1, 'ns') + + if freq is None: + base, mult = get_freq_code(self.freq) + freq = get_to_timestamp_base(base) + + base, mult = get_freq_code(freq) + val = self.asfreq(freq, how) + + dt64 = period_ordinal_to_dt64(val.ordinal, base) + return Timestamp(dt64, tz=tz) + + @property + def year(self): + base, mult = get_freq_code(self.freq) + return pyear(self.ordinal, base) + + @property + def month(self): + base, mult = get_freq_code(self.freq) + return pmonth(self.ordinal, base) + + @property + def day(self): + """ + Get day of the month that a Period falls on. + + Returns + ------- + int + + See Also + -------- + Period.dayofweek : Get the day of the week. + Period.dayofyear : Get the day of the year. + + Examples + -------- + >>> p = pd.Period("2018-03-11", freq='H') + >>> p.day + 11 + """ + base, mult = get_freq_code(self.freq) + return pday(self.ordinal, base) + + @property + def hour(self): + """ + Get the hour of the day component of the Period. + + Returns + ------- + int + The hour as an integer, between 0 and 23. + + See Also + -------- + Period.second : Get the second component of the Period. + Period.minute : Get the minute component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11 13:03:12.050000") + >>> p.hour + 13 + + Period longer than a day + + >>> p = pd.Period("2018-03-11", freq="M") + >>> p.hour + 0 + """ + base, mult = get_freq_code(self.freq) + return phour(self.ordinal, base) + + @property + def minute(self): + """ + Get minute of the hour component of the Period. + + Returns + ------- + int + The minute as an integer, between 0 and 59. + + See Also + -------- + Period.hour : Get the hour component of the Period. + Period.second : Get the second component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11 13:03:12.050000") + >>> p.minute + 3 + """ + base, mult = get_freq_code(self.freq) + return pminute(self.ordinal, base) + + @property + def second(self): + """ + Get the second component of the Period. + + Returns + ------- + int + The second of the Period (ranges from 0 to 59). + + See Also + -------- + Period.hour : Get the hour component of the Period. + Period.minute : Get the minute component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11 13:03:12.050000") + >>> p.second + 12 + """ + base, mult = get_freq_code(self.freq) + return psecond(self.ordinal, base) + + @property + def weekofyear(self): + base, mult = get_freq_code(self.freq) + return pweek(self.ordinal, base) + + @property + def week(self): + """ + Get the week of the year on the given Period. + + Returns + ------- + int + + See Also + -------- + Period.dayofweek : Get the day component of the Period. + Period.weekday : Get the day component of the Period. + + Examples + -------- + >>> p = pd.Period("2018-03-11", "H") + >>> p.week + 10 + + >>> p = pd.Period("2018-02-01", "D") + >>> p.week + 5 + + >>> p = pd.Period("2018-01-06", "D") + >>> p.week + 1 + """ + return self.weekofyear + + @property + def dayofweek(self): + """ + Day of the week the period lies in, with Monday=0 and Sunday=6. + + If the period frequency is lower than daily (e.g. hourly), and the + period spans over multiple days, the day at the start of the period is + used. + + If the frequency is higher than daily (e.g. monthly), the last day + of the period is used. + + Returns + ------- + int + Day of the week. + + See Also + -------- + Period.dayofweek : Day of the week the period lies in. + Period.weekday : Alias of Period.dayofweek. + Period.day : Day of the month. + Period.dayofyear : Day of the year. + + Examples + -------- + >>> per = pd.Period('2017-12-31 22:00', 'H') + >>> per.dayofweek + 6 + + For periods that span over multiple days, the day at the beginning of + the period is returned. + + >>> per = pd.Period('2017-12-31 22:00', '4H') + >>> per.dayofweek + 6 + >>> per.start_time.dayofweek + 6 + + For periods with a frequency higher than days, the last day of the + period is returned. + + >>> per = pd.Period('2018-01', 'M') + >>> per.dayofweek + 2 + >>> per.end_time.dayofweek + 2 + """ + base, mult = get_freq_code(self.freq) + return pweekday(self.ordinal, base) + + @property + def weekday(self): + """ + Day of the week the period lies in, with Monday=0 and Sunday=6. + + If the period frequency is lower than daily (e.g. hourly), and the + period spans over multiple days, the day at the start of the period is + used. + + If the frequency is higher than daily (e.g. monthly), the last day + of the period is used. + + Returns + ------- + int + Day of the week. + + See Also + -------- + Period.dayofweek : Day of the week the period lies in. + Period.weekday : Alias of Period.dayofweek. + Period.day : Day of the month. + Period.dayofyear : Day of the year. + + Examples + -------- + >>> per = pd.Period('2017-12-31 22:00', 'H') + >>> per.dayofweek + 6 + + For periods that span over multiple days, the day at the beginning of + the period is returned. + + >>> per = pd.Period('2017-12-31 22:00', '4H') + >>> per.dayofweek + 6 + >>> per.start_time.dayofweek + 6 + + For periods with a frequency higher than days, the last day of the + period is returned. + + >>> per = pd.Period('2018-01', 'M') + >>> per.dayofweek + 2 + >>> per.end_time.dayofweek + 2 + """ + # Docstring is a duplicate from dayofweek. Reusing docstrings with + # Appender doesn't work for properties in Cython files, and setting + # the __doc__ attribute is also not possible. + return self.dayofweek + + @property + def dayofyear(self): + """ + Return the day of the year. + + This attribute returns the day of the year on which the particular + date occurs. The return value ranges between 1 to 365 for regular + years and 1 to 366 for leap years. + + Returns + ------- + int + The day of year. + + See Also + -------- + Period.day : Return the day of the month. + Period.dayofweek : Return the day of week. + PeriodIndex.dayofyear : Return the day of year of all indexes. + + Examples + -------- + >>> period = pd.Period("2015-10-23", freq='H') + >>> period.dayofyear + 296 + >>> period = pd.Period("2012-12-31", freq='D') + >>> period.dayofyear + 366 + >>> period = pd.Period("2013-01-01", freq='D') + >>> period.dayofyear + 1 + """ + base, mult = get_freq_code(self.freq) + return pday_of_year(self.ordinal, base) + + @property + def quarter(self): + base, mult = get_freq_code(self.freq) + return pquarter(self.ordinal, base) + + @property + def qyear(self): + """ + Fiscal year the Period lies in according to its starting-quarter. + + The `year` and the `qyear` of the period will be the same if the fiscal + and calendar years are the same. When they are not, the fiscal year + can be different from the calendar year of the period. + + Returns + ------- + int + The fiscal year of the period. + + See Also + -------- + Period.year : Return the calendar year of the period. + + Examples + -------- + If the natural and fiscal year are the same, `qyear` and `year` will + be the same. + + >>> per = pd.Period('2018Q1', freq='Q') + >>> per.qyear + 2018 + >>> per.year + 2018 + + If the fiscal year starts in April (`Q-MAR`), the first quarter of + 2018 will start in April 2017. `year` will then be 2018, but `qyear` + will be the fiscal year, 2018. + + >>> per = pd.Period('2018Q1', freq='Q-MAR') + >>> per.start_time + Timestamp('2017-04-01 00:00:00') + >>> per.qyear + 2018 + >>> per.year + 2017 + """ + base, mult = get_freq_code(self.freq) + return pqyear(self.ordinal, base) + + @property + def days_in_month(self): + """ + Get the total number of days in the month that this period falls on. + + Returns + ------- + int + + See Also + -------- + Period.daysinmonth : Gets the number of days in the month. + DatetimeIndex.daysinmonth : Gets the number of days in the month. + calendar.monthrange : Returns a tuple containing weekday + (0-6 ~ Mon-Sun) and number of days (28-31). + + Examples + -------- + >>> p = pd.Period('2018-2-17') + >>> p.days_in_month + 28 + + >>> pd.Period('2018-03-01').days_in_month + 31 + + Handles the leap year case as well: + + >>> p = pd.Period('2016-2-17') + >>> p.days_in_month + 29 + """ + base, mult = get_freq_code(self.freq) + return pdays_in_month(self.ordinal, base) + + @property + def daysinmonth(self): + """ + Get the total number of days of the month that the Period falls in. + + Returns + ------- + int + + See Also + -------- + Period.days_in_month : Return the days of the month. + Period.dayofyear : Return the day of the year. + + Examples + -------- + >>> p = pd.Period("2018-03-11", freq='H') + >>> p.daysinmonth + 31 + """ + return self.days_in_month + + @property + def is_leap_year(self) -> bool: + return bool(is_leapyear(self.year)) + + @classmethod + def now(cls, freq=None): + return Period(datetime.now(), freq=freq) + + @property + def freqstr(self): + return self.freq.freqstr + + def __repr__(self) -> str: + base, mult = get_freq_code(self.freq) + formatted = period_format(self.ordinal, base) + return f"Period('{formatted}', '{self.freqstr}')" + + def __str__(self) -> str: + """ + Return a string representation for a particular DataFrame + """ + base, mult = get_freq_code(self.freq) + formatted = period_format(self.ordinal, base) + value = str(formatted) + return value + + def __setstate__(self, state): + self.freq = state[1] + self.ordinal = state[2] + + def __reduce__(self): + object_state = None, self.freq, self.ordinal + return (Period, object_state) + + def strftime(self, fmt: str) -> str: + """ + Returns the string representation of the :class:`Period`, depending + on the selected ``fmt``. ``fmt`` must be a string + containing one or several directives. The method recognizes the same + directives as the :func:`time.strftime` function of the standard Python + distribution, as well as the specific additional directives ``%f``, + ``%F``, ``%q``. (formatting & docs originally from scikits.timeries). + + +-----------+--------------------------------+-------+ + | Directive | Meaning | Notes | + +===========+================================+=======+ + | ``%a`` | Locale's abbreviated weekday | | + | | name. | | + +-----------+--------------------------------+-------+ + | ``%A`` | Locale's full weekday name. | | + +-----------+--------------------------------+-------+ + | ``%b`` | Locale's abbreviated month | | + | | name. | | + +-----------+--------------------------------+-------+ + | ``%B`` | Locale's full month name. | | + +-----------+--------------------------------+-------+ + | ``%c`` | Locale's appropriate date and | | + | | time representation. | | + +-----------+--------------------------------+-------+ + | ``%d`` | Day of the month as a decimal | | + | | number [01,31]. | | + +-----------+--------------------------------+-------+ + | ``%f`` | 'Fiscal' year without a | \(1) | + | | century as a decimal number | | + | | [00,99] | | + +-----------+--------------------------------+-------+ + | ``%F`` | 'Fiscal' year with a century | \(2) | + | | as a decimal number | | + +-----------+--------------------------------+-------+ + | ``%H`` | Hour (24-hour clock) as a | | + | | decimal number [00,23]. | | + +-----------+--------------------------------+-------+ + | ``%I`` | Hour (12-hour clock) as a | | + | | decimal number [01,12]. | | + +-----------+--------------------------------+-------+ + | ``%j`` | Day of the year as a decimal | | + | | number [001,366]. | | + +-----------+--------------------------------+-------+ + | ``%m`` | Month as a decimal number | | + | | [01,12]. | | + +-----------+--------------------------------+-------+ + | ``%M`` | Minute as a decimal number | | + | | [00,59]. | | + +-----------+--------------------------------+-------+ + | ``%p`` | Locale's equivalent of either | \(3) | + | | AM or PM. | | + +-----------+--------------------------------+-------+ + | ``%q`` | Quarter as a decimal number | | + | | [01,04] | | + +-----------+--------------------------------+-------+ + | ``%S`` | Second as a decimal number | \(4) | + | | [00,61]. | | + +-----------+--------------------------------+-------+ + | ``%U`` | Week number of the year | \(5) | + | | (Sunday as the first day of | | + | | the week) as a decimal number | | + | | [00,53]. All days in a new | | + | | year preceding the first | | + | | Sunday are considered to be in | | + | | week 0. | | + +-----------+--------------------------------+-------+ + | ``%w`` | Weekday as a decimal number | | + | | [0(Sunday),6]. | | + +-----------+--------------------------------+-------+ + | ``%W`` | Week number of the year | \(5) | + | | (Monday as the first day of | | + | | the week) as a decimal number | | + | | [00,53]. All days in a new | | + | | year preceding the first | | + | | Monday are considered to be in | | + | | week 0. | | + +-----------+--------------------------------+-------+ + | ``%x`` | Locale's appropriate date | | + | | representation. | | + +-----------+--------------------------------+-------+ + | ``%X`` | Locale's appropriate time | | + | | representation. | | + +-----------+--------------------------------+-------+ + | ``%y`` | Year without century as a | | + | | decimal number [00,99]. | | + +-----------+--------------------------------+-------+ + | ``%Y`` | Year with century as a decimal | | + | | number. | | + +-----------+--------------------------------+-------+ + | ``%Z`` | Time zone name (no characters | | + | | if no time zone exists). | | + +-----------+--------------------------------+-------+ + | ``%%`` | A literal ``'%'`` character. | | + +-----------+--------------------------------+-------+ + + Notes + ----- + + (1) + The ``%f`` directive is the same as ``%y`` if the frequency is + not quarterly. + Otherwise, it corresponds to the 'fiscal' year, as defined by + the :attr:`qyear` attribute. + + (2) + The ``%F`` directive is the same as ``%Y`` if the frequency is + not quarterly. + Otherwise, it corresponds to the 'fiscal' year, as defined by + the :attr:`qyear` attribute. + + (3) + The ``%p`` directive only affects the output hour field + if the ``%I`` directive is used to parse the hour. + + (4) + The range really is ``0`` to ``61``; this accounts for leap + seconds and the (very rare) double leap seconds. + + (5) + The ``%U`` and ``%W`` directives are only used in calculations + when the day of the week and the year are specified. + + Examples + -------- + + >>> a = Period(freq='Q-JUL', year=2006, quarter=1) + >>> a.strftime('%F-Q%q') + '2006-Q1' + >>> # Output the last month in the quarter of this date + >>> a.strftime('%b-%Y') + 'Oct-2005' + >>> + >>> a = Period(freq='D', year=2001, month=1, day=1) + >>> a.strftime('%d-%b-%Y') + '01-Jan-2006' + >>> a.strftime('%b. %d, %Y was a %A') + 'Jan. 01, 2001 was a Monday' + """ + base, mult = get_freq_code(self.freq) + return period_format(self.ordinal, base, fmt) + + +class Period(_Period): + """ + Represents a period of time. + + Parameters + ---------- + value : Period or str, default None + The time period represented (e.g., '4Q2005'). + freq : str, default None + One of pandas period strings or corresponding objects. + ordinal : int, default None + The period offset from the gregorian proleptic epoch. + year : int, default None + Year value of the period. + month : int, default 1 + Month value of the period. + quarter : int, default None + Quarter value of the period. + day : int, default 1 + Day value of the period. + hour : int, default 0 + Hour value of the period. + minute : int, default 0 + Minute value of the period. + second : int, default 0 + Second value of the period. + """ + + def __new__(cls, value=None, freq=None, ordinal=None, + year=None, month=None, quarter=None, day=None, + hour=None, minute=None, second=None): + # freq points to a tuple (base, mult); base is one of the defined + # periods such as A, Q, etc. Every five minutes would be, e.g., + # ('T', 5) but may be passed in as a string like '5T' + + # ordinal is the period offset from the gregorian proleptic epoch + cdef _Period self + + if freq is not None: + freq = cls._maybe_convert_freq(freq) + + if ordinal is not None and value is not None: + raise ValueError("Only value or ordinal but not both should be " + "given but not both") + elif ordinal is not None: + if not util.is_integer_object(ordinal): + raise ValueError("Ordinal must be an integer") + if freq is None: + raise ValueError('Must supply freq for ordinal value') + + elif value is None: + if (year is None and month is None and + quarter is None and day is None and + hour is None and minute is None and second is None): + ordinal = NPY_NAT + else: + if freq is None: + raise ValueError("If value is None, freq cannot be None") + + # set defaults + month = 1 if month is None else month + day = 1 if day is None else day + hour = 0 if hour is None else hour + minute = 0 if minute is None else minute + second = 0 if second is None else second + + ordinal = _ordinal_from_fields(year, month, quarter, day, + hour, minute, second, freq) + + elif is_period_object(value): + other = value + if freq is None or get_freq_code( + freq) == get_freq_code(other.freq): + ordinal = other.ordinal + freq = other.freq + else: + converted = other.asfreq(freq) + ordinal = converted.ordinal + + elif is_null_datetimelike(value) or (isinstance(value, str) and + value in nat_strings): + # explicit str check is necessary to avoid raising incorrectly + # if we have a non-hashable value. + ordinal = NPY_NAT + + elif isinstance(value, str) or util.is_integer_object(value): + if util.is_integer_object(value): + value = str(value) + value = value.upper() + dt, _, reso = parse_time_string(value, freq) + if dt is NaT: + ordinal = NPY_NAT + + if freq is None: + try: + freq = Resolution.get_freq(reso) + except KeyError: + raise ValueError(f"Invalid frequency or could not " + f"infer: {reso}") + + elif PyDateTime_Check(value): + dt = value + if freq is None: + raise ValueError('Must supply freq for datetime value') + elif util.is_datetime64_object(value): + dt = Timestamp(value) + if freq is None: + raise ValueError('Must supply freq for datetime value') + elif PyDate_Check(value): + dt = datetime(year=value.year, month=value.month, day=value.day) + if freq is None: + raise ValueError('Must supply freq for datetime value') + else: + msg = "Value must be Period, string, integer, or datetime" + raise ValueError(msg) + + if ordinal is None: + base, mult = get_freq_code(freq) + ordinal = period_ordinal(dt.year, dt.month, dt.day, + dt.hour, dt.minute, dt.second, + dt.microsecond, 0, base) + + return cls._from_ordinal(ordinal, freq) + + +cdef int64_t _ordinal_from_fields(int year, int month, quarter, int day, + int hour, int minute, int second, freq): + base, mult = get_freq_code(freq) + if quarter is not None: + year, month = quarter_to_myear(year, quarter, freq) + + return period_ordinal(year, month, day, hour, + minute, second, 0, 0, base) + + +def quarter_to_myear(year: int, quarter: int, freq): + """ + A quarterly frequency defines a "year" which may not coincide with + the calendar-year. Find the calendar-year and calendar-month associated + with the given year and quarter under the `freq`-derived calendar. + + Parameters + ---------- + year : int + quarter : int + freq : DateOffset + + Returns + ------- + year : int + month : int + + See Also + -------- + Period.qyear + """ + if quarter <= 0 or quarter > 4: + raise ValueError('Quarter must be 1 <= q <= 4') + + mnum = MONTH_NUMBERS[get_rule_month(freq)] + 1 + month = (mnum + (quarter - 1) * 3) % 12 + 1 + if month > mnum: + year -= 1 + + return year, month + + +def _validate_end_alias(how): + how_dict = {'S': 'S', 'E': 'E', + 'START': 'S', 'FINISH': 'E', + 'BEGIN': 'S', 'END': 'E'} + how = how_dict.get(str(how).upper()) + if how not in {'S', 'E'}: + raise ValueError('How must be one of S or E') + return how diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx new file mode 100644 index 00000000..c0b20c14 --- /dev/null +++ b/pandas/_libs/tslibs/resolution.pyx @@ -0,0 +1,353 @@ +import numpy as np +from numpy cimport ndarray, int64_t, int32_t + +from pandas._libs.tslibs.util cimport get_nat + +from pandas._libs.tslibs.np_datetime cimport ( + npy_datetimestruct, dt64_to_dtstruct) +from pandas._libs.tslibs.frequencies cimport get_freq_code +from pandas._libs.tslibs.timezones cimport ( + is_utc, is_tzlocal, maybe_get_tz, get_dst_info) +from pandas._libs.tslibs.ccalendar cimport get_days_in_month +from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal + +# ---------------------------------------------------------------------- +# Constants + +cdef: + int64_t NPY_NAT = get_nat() + + int RESO_NS = 0 + int RESO_US = 1 + int RESO_MS = 2 + int RESO_SEC = 3 + int RESO_MIN = 4 + int RESO_HR = 5 + int RESO_DAY = 6 + +# ---------------------------------------------------------------------- + +cpdef resolution(int64_t[:] stamps, tz=None): + cdef: + Py_ssize_t i, n = len(stamps) + npy_datetimestruct dts + int reso = RESO_DAY, curr_reso + + if tz is not None: + tz = maybe_get_tz(tz) + return _reso_local(stamps, tz) + + +cdef _reso_local(int64_t[:] stamps, object tz): + cdef: + Py_ssize_t i, n = len(stamps) + int reso = RESO_DAY, curr_reso + ndarray[int64_t] trans + int64_t[:] deltas + Py_ssize_t[:] pos + npy_datetimestruct dts + int64_t local_val, delta + + if is_utc(tz) or tz is None: + for i in range(n): + if stamps[i] == NPY_NAT: + continue + dt64_to_dtstruct(stamps[i], &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + elif is_tzlocal(tz): + for i in range(n): + if stamps[i] == NPY_NAT: + continue + local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) + dt64_to_dtstruct(local_val, &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + else: + # Adjust datetime64 timestamp, recompute datetimestruct + trans, deltas, typ = get_dst_info(tz) + + if typ not in ['pytz', 'dateutil']: + # static/fixed; in this case we know that len(delta) == 1 + delta = deltas[0] + for i in range(n): + if stamps[i] == NPY_NAT: + continue + dt64_to_dtstruct(stamps[i] + delta, &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + else: + pos = trans.searchsorted(stamps, side='right') - 1 + for i in range(n): + if stamps[i] == NPY_NAT: + continue + dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + + return reso + + +cdef inline int _reso_stamp(npy_datetimestruct *dts): + if dts.us != 0: + if dts.us % 1000 == 0: + return RESO_MS + return RESO_US + elif dts.sec != 0: + return RESO_SEC + elif dts.min != 0: + return RESO_MIN + elif dts.hour != 0: + return RESO_HR + return RESO_DAY + + +def get_freq_group(freq): + """ + Return frequency code group of given frequency str or offset. + + Example + ------- + >>> get_freq_group('W-MON') + 4000 + + >>> get_freq_group('W-FRI') + 4000 + """ + if getattr(freq, '_typ', None) == 'dateoffset': + freq = freq.rule_code + + if isinstance(freq, str): + base, mult = get_freq_code(freq) + freq = base + elif isinstance(freq, int): + pass + else: + raise ValueError('input must be str, offset or int') + return (freq // 1000) * 1000 + + +class Resolution: + + # Note: cython won't allow us to reference the cdef versions at the + # module level + RESO_NS = 0 + RESO_US = 1 + RESO_MS = 2 + RESO_SEC = 3 + RESO_MIN = 4 + RESO_HR = 5 + RESO_DAY = 6 + + _reso_str_map = { + RESO_NS: 'nanosecond', + RESO_US: 'microsecond', + RESO_MS: 'millisecond', + RESO_SEC: 'second', + RESO_MIN: 'minute', + RESO_HR: 'hour', + RESO_DAY: 'day'} + + # factor to multiply a value by to convert it to the next finer grained + # resolution + _reso_mult_map = { + RESO_NS: None, + RESO_US: 1000, + RESO_MS: 1000, + RESO_SEC: 1000, + RESO_MIN: 60, + RESO_HR: 60, + RESO_DAY: 24} + + _reso_str_bump_map = { + 'D': 'H', + 'H': 'T', + 'T': 'S', + 'S': 'L', + 'L': 'U', + 'U': 'N', + 'N': None} + + _str_reso_map = {v: k for k, v in _reso_str_map.items()} + + _reso_freq_map = { + 'year': 'A', + 'quarter': 'Q', + 'month': 'M', + 'day': 'D', + 'hour': 'H', + 'minute': 'T', + 'second': 'S', + 'millisecond': 'L', + 'microsecond': 'U', + 'nanosecond': 'N'} + + _freq_reso_map = {v: k for k, v in _reso_freq_map.items()} + + @classmethod + def get_str(cls, reso): + """ + Return resolution str against resolution code. + + Example + ------- + >>> Resolution.get_str(Resolution.RESO_SEC) + 'second' + """ + return cls._reso_str_map.get(reso, 'day') + + @classmethod + def get_reso(cls, resostr): + """ + Return resolution str against resolution code. + + Example + ------- + >>> Resolution.get_reso('second') + 2 + + >>> Resolution.get_reso('second') == Resolution.RESO_SEC + True + """ + return cls._str_reso_map.get(resostr, cls.RESO_DAY) + + @classmethod + def get_freq_group(cls, resostr): + """ + Return frequency str against resolution str. + + Example + ------- + >>> f.Resolution.get_freq_group('day') + 4000 + """ + return get_freq_group(cls.get_freq(resostr)) + + @classmethod + def get_freq(cls, resostr): + """ + Return frequency str against resolution str. + + Example + ------- + >>> f.Resolution.get_freq('day') + 'D' + """ + return cls._reso_freq_map[resostr] + + @classmethod + def get_str_from_freq(cls, freq): + """ + Return resolution str against frequency str. + + Example + ------- + >>> Resolution.get_str_from_freq('H') + 'hour' + """ + return cls._freq_reso_map.get(freq, 'day') + + @classmethod + def get_reso_from_freq(cls, freq): + """ + Return resolution code against frequency str. + + Example + ------- + >>> Resolution.get_reso_from_freq('H') + 4 + + >>> Resolution.get_reso_from_freq('H') == Resolution.RESO_HR + True + """ + return cls.get_reso(cls.get_str_from_freq(freq)) + + @classmethod + def get_stride_from_decimal(cls, value, freq): + """ + Convert freq with decimal stride into a higher freq with integer stride + + Parameters + ---------- + value : integer or float + freq : string + Frequency string + + Raises + ------ + ValueError + If the float cannot be converted to an integer at any resolution. + + Example + ------- + >>> Resolution.get_stride_from_decimal(1.5, 'T') + (90, 'S') + + >>> Resolution.get_stride_from_decimal(1.04, 'H') + (3744, 'S') + + >>> Resolution.get_stride_from_decimal(1, 'D') + (1, 'D') + """ + if np.isclose(value % 1, 0): + return int(value), freq + else: + start_reso = cls.get_reso_from_freq(freq) + if start_reso == 0: + raise ValueError("Could not convert to integer offset " + "at any resolution") + + next_value = cls._reso_mult_map[start_reso] * value + next_name = cls._reso_str_bump_map[freq] + return cls.get_stride_from_decimal(next_value, next_name) + + +# ---------------------------------------------------------------------- +# Frequency Inference + +def month_position_check(fields, weekdays): + cdef: + int32_t daysinmonth, y, m, d + bint calendar_end = True + bint business_end = True + bint calendar_start = True + bint business_start = True + bint cal + int32_t[:] years + int32_t[:] months + int32_t[:] days + + years = fields['Y'] + months = fields['M'] + days = fields['D'] + + for y, m, d, wd in zip(years, months, days, weekdays): + if calendar_start: + calendar_start &= d == 1 + if business_start: + business_start &= d == 1 or (d <= 3 and wd == 0) + + if calendar_end or business_end: + daysinmonth = get_days_in_month(y, m) + cal = d == daysinmonth + if calendar_end: + calendar_end &= cal + if business_end: + business_end &= cal or (daysinmonth - d < 3 and wd == 4) + elif not calendar_start and not business_start: + break + + if calendar_end: + return 'ce' + elif business_end: + return 'be' + elif calendar_start: + return 'cs' + elif business_start: + return 'bs' + else: + return None diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c new file mode 100644 index 00000000..a8a47e2e --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -0,0 +1,768 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt + +*/ + +#define NO_IMPORT + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include +#include + +#include +#include +#include +#include "np_datetime.h" + +#if PY_MAJOR_VERSION >= 3 +#define PyInt_AsLong PyLong_AsLong +#endif // PyInt_AsLong + +const npy_datetimestruct _NS_MIN_DTS = { + 1677, 9, 21, 0, 12, 43, 145225, 0, 0}; +const npy_datetimestruct _NS_MAX_DTS = { + 2262, 4, 11, 23, 47, 16, 854775, 807000, 0}; + + +const int days_per_month_table[2][12] = { + {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, + {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; + +/* + * Returns 1 if the given year is a leap year, 0 otherwise. + */ +int is_leapyear(npy_int64 year) { + return (year & 0x3) == 0 && /* year % 4 == 0 */ + ((year % 100) != 0 || (year % 400) == 0); +} + +/* + * Adjusts a datetimestruct based on a minutes offset. Assumes + * the current values are valid.g + */ +void add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes) { + int isleap; + + /* MINUTES */ + dts->min += minutes; + while (dts->min < 0) { + dts->min += 60; + dts->hour--; + } + while (dts->min >= 60) { + dts->min -= 60; + dts->hour++; + } + + /* HOURS */ + while (dts->hour < 0) { + dts->hour += 24; + dts->day--; + } + while (dts->hour >= 24) { + dts->hour -= 24; + dts->day++; + } + + /* DAYS */ + if (dts->day < 1) { + dts->month--; + if (dts->month < 1) { + dts->year--; + dts->month = 12; + } + isleap = is_leapyear(dts->year); + dts->day += days_per_month_table[isleap][dts->month - 1]; + } else if (dts->day > 28) { + isleap = is_leapyear(dts->year); + if (dts->day > days_per_month_table[isleap][dts->month - 1]) { + dts->day -= days_per_month_table[isleap][dts->month - 1]; + dts->month++; + if (dts->month > 12) { + dts->year++; + dts->month = 1; + } + } + } +} + +/* + * Calculates the days offset from the 1970 epoch. + */ +npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) { + int i, month; + npy_int64 year, days = 0; + const int *month_lengths; + + year = dts->year - 1970; + days = year * 365; + + /* Adjust for leap years */ + if (days >= 0) { + /* + * 1968 is the closest leap year before 1970. + * Exclude the current year, so add 1. + */ + year += 1; + /* Add one day for each 4 years */ + days += year / 4; + /* 1900 is the closest previous year divisible by 100 */ + year += 68; + /* Subtract one day for each 100 years */ + days -= year / 100; + /* 1600 is the closest previous year divisible by 400 */ + year += 300; + /* Add one day for each 400 years */ + days += year / 400; + } else { + /* + * 1972 is the closest later year after 1970. + * Include the current year, so subtract 2. + */ + year -= 2; + /* Subtract one day for each 4 years */ + days += year / 4; + /* 2000 is the closest later year divisible by 100 */ + year -= 28; + /* Add one day for each 100 years */ + days -= year / 100; + /* 2000 is also the closest later year divisible by 400 */ + /* Subtract one day for each 400 years */ + days += year / 400; + } + + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + month = dts->month - 1; + + /* Add the months */ + for (i = 0; i < month; ++i) { + days += month_lengths[i]; + } + + /* Add the days */ + days += dts->day - 1; + + return days; +} + +/* + * Modifies '*days_' to be the day offset within the year, + * and returns the year. + */ +static npy_int64 days_to_yearsdays(npy_int64 *days_) { + const npy_int64 days_per_400years = (400 * 365 + 100 - 4 + 1); + /* Adjust so it's relative to the year 2000 (divisible by 400) */ + npy_int64 days = (*days_) - (365 * 30 + 7); + npy_int64 year; + + /* Break down the 400 year cycle to get the year and day within the year */ + if (days >= 0) { + year = 400 * (days / days_per_400years); + days = days % days_per_400years; + } else { + year = 400 * ((days - (days_per_400years - 1)) / days_per_400years); + days = days % days_per_400years; + if (days < 0) { + days += days_per_400years; + } + } + + /* Work out the year/day within the 400 year cycle */ + if (days >= 366) { + year += 100 * ((days - 1) / (100 * 365 + 25 - 1)); + days = (days - 1) % (100 * 365 + 25 - 1); + if (days >= 365) { + year += 4 * ((days + 1) / (4 * 365 + 1)); + days = (days + 1) % (4 * 365 + 1); + if (days >= 366) { + year += (days - 1) / 365; + days = (days - 1) % 365; + } + } + } + + *days_ = days; + return year + 2000; +} + +/* + * Adjusts a datetimestruct based on a seconds offset. Assumes + * the current values are valid. + */ +NPY_NO_EXPORT void add_seconds_to_datetimestruct(npy_datetimestruct *dts, + int seconds) { + int minutes; + + dts->sec += seconds; + if (dts->sec < 0) { + minutes = dts->sec / 60; + dts->sec = dts->sec % 60; + if (dts->sec < 0) { + --minutes; + dts->sec += 60; + } + add_minutes_to_datetimestruct(dts, minutes); + } else if (dts->sec >= 60) { + minutes = dts->sec / 60; + dts->sec = dts->sec % 60; + add_minutes_to_datetimestruct(dts, minutes); + } +} + +/* + * Fills in the year, month, day in 'dts' based on the days + * offset from 1970. + */ +static void set_datetimestruct_days(npy_int64 days, npy_datetimestruct *dts) { + const int *month_lengths; + int i; + + dts->year = days_to_yearsdays(&days); + month_lengths = days_per_month_table[is_leapyear(dts->year)]; + + for (i = 0; i < 12; ++i) { + if (days < month_lengths[i]) { + dts->month = i + 1; + dts->day = days + 1; + return; + } else { + days -= month_lengths[i]; + } + } +} + +/* + * Compares two npy_datetimestruct objects chronologically + */ +int cmp_npy_datetimestruct(const npy_datetimestruct *a, + const npy_datetimestruct *b) { + if (a->year > b->year) { + return 1; + } else if (a->year < b->year) { + return -1; + } + + if (a->month > b->month) { + return 1; + } else if (a->month < b->month) { + return -1; + } + + if (a->day > b->day) { + return 1; + } else if (a->day < b->day) { + return -1; + } + + if (a->hour > b->hour) { + return 1; + } else if (a->hour < b->hour) { + return -1; + } + + if (a->min > b->min) { + return 1; + } else if (a->min < b->min) { + return -1; + } + + if (a->sec > b->sec) { + return 1; + } else if (a->sec < b->sec) { + return -1; + } + + if (a->us > b->us) { + return 1; + } else if (a->us < b->us) { + return -1; + } + + if (a->ps > b->ps) { + return 1; + } else if (a->ps < b->ps) { + return -1; + } + + if (a->as > b->as) { + return 1; + } else if (a->as < b->as) { + return -1; + } + + return 0; +} + +/* + * + * Converts a Python datetime.datetime or datetime.date + * object into a NumPy npy_datetimestruct. Uses tzinfo (if present) + * to convert to UTC time. + * + * While the C API has PyDate_* and PyDateTime_* functions, the following + * implementation just asks for attributes, and thus supports + * datetime duck typing. The tzinfo time zone conversion would require + * this style of access anyway. + * + * Returns -1 on error, 0 on success, and 1 (with no error set) + * if obj doesn't have the needed date or datetime attributes. + */ +int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj, + npy_datetimestruct *out) { + // Assumes that obj is a valid datetime object + PyObject *tmp; + PyObject *obj = (PyObject*)dtobj; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + out->year = PyInt_AsLong(PyObject_GetAttrString(obj, "year")); + out->month = PyInt_AsLong(PyObject_GetAttrString(obj, "month")); + out->day = PyInt_AsLong(PyObject_GetAttrString(obj, "day")); + + // TODO(anyone): If we can get PyDateTime_IMPORT to work, we could use + // PyDateTime_Check here, and less verbose attribute lookups. + + /* Check for time attributes (if not there, return success as a date) */ + if (!PyObject_HasAttrString(obj, "hour") || + !PyObject_HasAttrString(obj, "minute") || + !PyObject_HasAttrString(obj, "second") || + !PyObject_HasAttrString(obj, "microsecond")) { + return 0; + } + + out->hour = PyInt_AsLong(PyObject_GetAttrString(obj, "hour")); + out->min = PyInt_AsLong(PyObject_GetAttrString(obj, "minute")); + out->sec = PyInt_AsLong(PyObject_GetAttrString(obj, "second")); + out->us = PyInt_AsLong(PyObject_GetAttrString(obj, "microsecond")); + + /* Apply the time zone offset if datetime obj is tz-aware */ + if (PyObject_HasAttrString((PyObject*)obj, "tzinfo")) { + tmp = PyObject_GetAttrString(obj, "tzinfo"); + if (tmp == NULL) { + return -1; + } + if (tmp == Py_None) { + Py_DECREF(tmp); + } else { + PyObject *offset; + int seconds_offset, minutes_offset; + + /* The utcoffset function should return a timedelta */ + offset = PyObject_CallMethod(tmp, "utcoffset", "O", obj); + if (offset == NULL) { + Py_DECREF(tmp); + return -1; + } + Py_DECREF(tmp); + + /* + * The timedelta should have a function "total_seconds" + * which contains the value we want. + */ + tmp = PyObject_CallMethod(offset, "total_seconds", ""); + if (tmp == NULL) { + return -1; + } + seconds_offset = PyInt_AsLong(tmp); + if (seconds_offset == -1 && PyErr_Occurred()) { + Py_DECREF(tmp); + return -1; + } + Py_DECREF(tmp); + + /* Convert to a minutes offset and apply it */ + minutes_offset = seconds_offset / 60; + + add_minutes_to_datetimestruct(out, -minutes_offset); + } + } + + return 0; +} + + +/* + * Converts a datetime from a datetimestruct to a datetime based + * on a metadata unit. The date is assumed to be valid. + */ +npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, + const npy_datetimestruct *dts) { + npy_datetime ret; + + if (base == NPY_FR_Y) { + /* Truncate to the year */ + ret = dts->year - 1970; + } else if (base == NPY_FR_M) { + /* Truncate to the month */ + ret = 12 * (dts->year - 1970) + (dts->month - 1); + } else { + /* Otherwise calculate the number of days to start */ + npy_int64 days = get_datetimestruct_days(dts); + + switch (base) { + case NPY_FR_W: + /* Truncate to weeks */ + if (days >= 0) { + ret = days / 7; + } else { + ret = (days - 6) / 7; + } + break; + case NPY_FR_D: + ret = days; + break; + case NPY_FR_h: + ret = days * 24 + dts->hour; + break; + case NPY_FR_m: + ret = (days * 24 + dts->hour) * 60 + dts->min; + break; + case NPY_FR_s: + ret = ((days * 24 + dts->hour) * 60 + dts->min) * 60 + dts->sec; + break; + case NPY_FR_ms: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000 + + dts->us / 1000; + break; + case NPY_FR_us: + ret = (((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us; + break; + case NPY_FR_ns: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000 + + dts->ps / 1000; + break; + case NPY_FR_ps: + ret = ((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps; + break; + case NPY_FR_fs: + /* only 2.6 hours */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000 + + dts->as / 1000; + break; + case NPY_FR_as: + /* only 9.2 secs */ + ret = (((((days * 24 + dts->hour) * 60 + dts->min) * 60 + + dts->sec) * + 1000000 + + dts->us) * + 1000000 + + dts->ps) * + 1000000 + + dts->as; + break; + default: + /* Something got corrupted */ + PyErr_SetString( + PyExc_ValueError, + "NumPy datetime metadata with corrupt unit value"); + return -1; + } + } + return ret; +} + +/* + * Port numpy#13188 https://github.com/numpy/numpy/pull/13188/ + * + * Computes the python `ret, d = divmod(d, unit)`. + * + * Note that GCC is smart enough at -O2 to eliminate the `if(*d < 0)` branch + * for subsequent calls to this command - it is able to deduce that `*d >= 0`. + */ +npy_int64 extract_unit(npy_datetime *d, npy_datetime unit) { + assert(unit > 0); + npy_int64 div = *d / unit; + npy_int64 mod = *d % unit; + if (mod < 0) { + mod += unit; + div -= 1; + } + assert(mod >= 0); + *d = mod; + return div; +} + +/* + * Converts a datetime based on the given metadata into a datetimestruct + */ +void pandas_datetime_to_datetimestruct(npy_datetime dt, + NPY_DATETIMEUNIT base, + npy_datetimestruct *out) { + npy_int64 perday; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->year = 1970; + out->month = 1; + out->day = 1; + + /* + * Note that care must be taken with the / and % operators + * for negative values. + */ + switch (base) { + case NPY_FR_Y: + out->year = 1970 + dt; + break; + + case NPY_FR_M: + out->year = 1970 + extract_unit(&dt, 12); + out->month = dt + 1; + break; + + case NPY_FR_W: + /* A week is 7 days */ + set_datetimestruct_days(dt * 7, out); + break; + + case NPY_FR_D: + set_datetimestruct_days(dt, out); + break; + + case NPY_FR_h: + perday = 24LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = dt; + break; + + case NPY_FR_m: + perday = 24LL * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60); + out->min = (int)dt; + break; + + case NPY_FR_s: + perday = 24LL * 60 * 60; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 60 * 60); + out->min = (int)extract_unit(&dt, 60); + out->sec = (int)dt; + break; + + case NPY_FR_ms: + perday = 24LL * 60 * 60 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 60); + out->sec = (int)extract_unit(&dt, 1000LL); + out->us = (int)(dt * 1000); + break; + + case NPY_FR_us: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000); + out->us = (int)dt; + break; + + case NPY_FR_ns: + perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_ps: + perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000; + + set_datetimestruct_days(extract_unit(&dt, perday), out); + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60); + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->us = (int)extract_unit(&dt, 1000LL); + out->ps = (int)(dt * 1000); + break; + + case NPY_FR_fs: + /* entire range is only +- 2.6 hours */ + out->hour = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 60 * 60); + if (out->hour < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour += 24; + assert(out->hour >= 0); + } + out->min = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 60); + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000); + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL); + out->as = (int)(dt * 1000); + break; + + case NPY_FR_as: + /* entire range is only +- 9.2 seconds */ + out->sec = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * + 1000 * 1000); + if (out->sec < 0) { + out->year = 1969; + out->month = 12; + out->day = 31; + out->hour = 23; + out->min = 59; + out->sec += 60; + assert(out->sec >= 0); + } + out->us = (int)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000); + out->ps = (int)extract_unit(&dt, 1000LL * 1000); + out->as = (int)dt; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy datetime metadata is corrupted with invalid " + "base unit"); + } +} + +/* + * Converts a timedelta from a timedeltastruct to a timedelta based + * on a metadata unit. The timedelta is assumed to be valid. + * + * Returns 0 on success, -1 on failure. + */ +void pandas_timedelta_to_timedeltastruct(npy_timedelta td, + NPY_DATETIMEUNIT base, + pandas_timedeltastruct *out) { + npy_int64 frac; + npy_int64 sfrac; + npy_int64 ifrac; + int sign; + npy_int64 DAY_NS = 86400000000000LL; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(pandas_timedeltastruct)); + + switch (base) { + case NPY_FR_ns: + + // put frac in seconds + if (td < 0 && td % (1000LL * 1000LL * 1000LL) != 0) + frac = td / (1000LL * 1000LL * 1000LL) - 1; + else + frac = td / (1000LL * 1000LL * 1000LL); + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * (1000LL * 1000LL * 1000LL); + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * DAY_NS + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / (1000LL * 1000LL); + ifrac -= out->ms * 1000LL * 1000LL; + out->us = ifrac / 1000LL; + ifrac -= out->us * 1000LL; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + + out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; + out->microseconds = out->ms * 1000 + out->us; + out->nanoseconds = out->ns; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy timedelta metadata is corrupted with " + "invalid base unit"); + } +} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h new file mode 100644 index 00000000..549d3840 --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -0,0 +1,80 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt + +*/ + +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include +#include + +typedef struct { + npy_int64 days; + npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds; +} pandas_timedeltastruct; + +extern const npy_datetimestruct _NS_MIN_DTS; +extern const npy_datetimestruct _NS_MAX_DTS; + +// stuff pandas needs +// ---------------------------------------------------------------------------- + +int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj, + npy_datetimestruct *out); + +npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, + const npy_datetimestruct *dts); + +void pandas_datetime_to_datetimestruct(npy_datetime val, NPY_DATETIMEUNIT fr, + npy_datetimestruct *result); + +void pandas_timedelta_to_timedeltastruct(npy_timedelta val, + NPY_DATETIMEUNIT fr, + pandas_timedeltastruct *result); + +extern const int days_per_month_table[2][12]; + +// stuff numpy-derived code needs in header +// ---------------------------------------------------------------------------- + +int is_leapyear(npy_int64 year); + +/* + * Calculates the days offset from the 1970 epoch. + */ +npy_int64 +get_datetimestruct_days(const npy_datetimestruct *dts); + + +/* + * Compares two npy_datetimestruct objects chronologically + */ +int cmp_npy_datetimestruct(const npy_datetimestruct *a, + const npy_datetimestruct *b); + + +/* + * Adjusts a datetimestruct based on a minutes offset. Assumes + * the current values are valid. + */ +void +add_minutes_to_datetimestruct(npy_datetimestruct *dts, int minutes); + + +#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_H_ diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c new file mode 100644 index 00000000..54ed6ecf --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.c @@ -0,0 +1,907 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Written by Mark Wiebe (mwwiebe@gmail.com) +Copyright (c) 2011 by Enthought, Inc. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +See NUMPY_LICENSE.txt for the license. + +This file implements string parsing and creation for NumPy datetime. + +*/ + +#define PY_SSIZE_T_CLEAN +#define NO_IMPORT + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include + +#include + +#include +#include +#include + +#include "np_datetime.h" +#include "np_datetime_strings.h" + + +/* + * Parses (almost) standard ISO 8601 date strings. The differences are: + * + * + Only seconds may have a decimal point, with up to 18 digits after it + * (maximum attoseconds precision). + * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate + * the date and the time. Both are treated equivalently. + * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. + * + Doesn't handle leap seconds (seconds value has 60 in these cases). + * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow + * + Accepts special values "NaT" (not a time), "Today", (current + * day according to local time) and "Now" (current time in UTC). + * + ':' separator between hours, minutes, and seconds is optional. When + * omitted, each component must be 2 digits if it appears. (GH-10041) + * + * 'str' must be a NULL-terminated string, and 'len' must be its length. + * + * 'out' gets filled with the parsed date-time. + * 'out_local' gets set to 1 if the parsed time contains timezone, + * to 0 otherwise. + * 'out_tzoffset' gets set to timezone offset by minutes + * if the parsed time was in local time, + * to 0 otherwise. The values 'now' and 'today' don't get counted + * as local, and neither do UTC +/-#### timezone offsets, because + * they aren't using the computer's local timezone offset. + * + * Returns 0 on success, -1 on failure. + */ +int parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + int *out_local, int *out_tzoffset) { + int year_leap = 0; + int i, numdigits; + const char *substr; + int sublen; + + /* If year-month-day are separated by a valid separator, + * months/days without leading zeroes will be parsed + * (though not iso8601). If the components aren't separated, + * 4 (YYYY) or 8 (YYYYMMDD) digits are expected. 6 digits are + * forbidden here (but parsed as YYMMDD elsewhere). + */ + int has_ymd_sep = 0; + char ymd_sep = '\0'; + char valid_ymd_sep[] = {'-', '.', '/', '\\', ' '}; + int valid_ymd_sep_len = sizeof(valid_ymd_sep); + + /* hour-minute-second may or may not separated by ':'. If not, then + * each component must be 2 digits. */ + int has_hms_sep = 0; + int hour_was_2_digits = 0; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(npy_datetimestruct)); + out->month = 1; + out->day = 1; + + substr = str; + sublen = len; + + /* Skip leading whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + } + + /* Leading '-' sign for negative year */ + if (*substr == '-') { + ++substr; + --sublen; + } + + if (sublen == 0) { + goto parse_error; + } + + /* PARSE THE YEAR (4 digits) */ + out->year = 0; + if (sublen >= 4 && isdigit(substr[0]) && isdigit(substr[1]) && + isdigit(substr[2]) && isdigit(substr[3])) { + out->year = 1000 * (substr[0] - '0') + 100 * (substr[1] - '0') + + 10 * (substr[2] - '0') + (substr[3] - '0'); + + substr += 4; + sublen -= 4; + } + + /* Negate the year if necessary */ + if (str[0] == '-') { + out->year = -out->year; + } + /* Check whether it's a leap-year */ + year_leap = is_leapyear(out->year); + + /* Next character must be a separator, start of month, or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + goto finish; + } + + if (!isdigit(*substr)) { + for (i = 0; i < valid_ymd_sep_len; ++i) { + if (*substr == valid_ymd_sep[i]) { + break; + } + } + if (i == valid_ymd_sep_len) { + goto parse_error; + } + has_ymd_sep = 1; + ymd_sep = valid_ymd_sep[i]; + ++substr; + --sublen; + /* Cannot have trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } + + /* PARSE THE MONTH */ + /* First digit required */ + out->month = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->month = 10 * out->month + (*substr - '0'); + ++substr; + --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->month < 1 || out->month > 12) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Month out of range in datetime string \"%s\"", str); + } + goto error; + } + + /* Next character must be the separator, start of day, or end of string */ + if (sublen == 0) { + /* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */ + if (!has_ymd_sep) { + goto parse_error; + } + if (out_local != NULL) { + *out_local = 0; + } + goto finish; + } + + if (has_ymd_sep) { + /* Must have separator, but cannot be trailing */ + if (*substr != ymd_sep || sublen == 1) { + goto parse_error; + } + ++substr; + --sublen; + } + + /* PARSE THE DAY */ + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->day = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->day = 10 * out->day + (*substr - '0'); + ++substr; + --sublen; + } else if (!has_ymd_sep) { + goto parse_error; + } + if (out->day < 1 || + out->day > days_per_month_table[year_leap][out->month - 1]) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Day out of range in datetime string \"%s\"", str); + } + goto error; + } + + /* Next character must be a 'T', ' ', or end of string */ + if (sublen == 0) { + if (out_local != NULL) { + *out_local = 0; + } + goto finish; + } + + if ((*substr != 'T' && *substr != ' ') || sublen == 1) { + goto parse_error; + } + ++substr; + --sublen; + + /* PARSE THE HOURS */ + /* First digit required */ + if (!isdigit(*substr)) { + goto parse_error; + } + out->hour = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional */ + if (isdigit(*substr)) { + hour_was_2_digits = 1; + out->hour = 10 * out->hour + (*substr - '0'); + ++substr; + --sublen; + if (out->hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Hours out of range in datetime string \"%s\"", + str); + } + goto error; + } + } + + /* Next character must be a ':' or the end of the string */ + if (sublen == 0) { + if (!hour_was_2_digits) { + goto parse_error; + } + goto finish; + } + + if (*substr == ':') { + has_hms_sep = 1; + ++substr; + --sublen; + /* Cannot have a trailing separator */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } else if (!isdigit(*substr)) { + if (!hour_was_2_digits) { + goto parse_error; + } + goto parse_timezone; + } + + /* PARSE THE MINUTES */ + /* First digit required */ + out->min = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->min = 10 * out->min + (*substr - '0'); + ++substr; + --sublen; + if (out->min >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Minutes out of range in datetime string \"%s\"", + str); + } + goto error; + } + } else if (!has_hms_sep) { + goto parse_error; + } + + if (sublen == 0) { + goto finish; + } + + /* If we make it through this condition block, then the next + * character is a digit. */ + if (has_hms_sep && *substr == ':') { + ++substr; + --sublen; + /* Cannot have a trailing ':' */ + if (sublen == 0 || !isdigit(*substr)) { + goto parse_error; + } + } else if (!has_hms_sep && isdigit(*substr)) { + } else { + goto parse_timezone; + } + + /* PARSE THE SECONDS */ + /* First digit required */ + out->sec = (*substr - '0'); + ++substr; + --sublen; + /* Second digit optional if there was a separator */ + if (isdigit(*substr)) { + out->sec = 10 * out->sec + (*substr - '0'); + ++substr; + --sublen; + if (out->sec >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Seconds out of range in datetime string \"%s\"", + str); + } + goto error; + } + } else if (!has_hms_sep) { + goto parse_error; + } + + /* Next character may be a '.' indicating fractional seconds */ + if (sublen > 0 && *substr == '.') { + ++substr; + --sublen; + } else { + goto parse_timezone; + } + + /* PARSE THE MICROSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->us *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->us += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + goto parse_timezone; + } + + /* PARSE THE PICOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->ps *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->ps += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + + if (sublen == 0 || !isdigit(*substr)) { + goto parse_timezone; + } + + /* PARSE THE ATTOSECONDS (0 to 6 digits) */ + numdigits = 0; + for (i = 0; i < 6; ++i) { + out->as *= 10; + if (sublen > 0 && isdigit(*substr)) { + out->as += (*substr - '0'); + ++substr; + --sublen; + ++numdigits; + } + } + +parse_timezone: + /* trim any whitepsace between time/timeezone */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + } + + if (sublen == 0) { + // Unlike NumPy, treating no time zone as naive + goto finish; + } + + /* UTC specifier */ + if (*substr == 'Z') { + /* "Z" should be equivalent to tz offset "+00:00" */ + if (out_local != NULL) { + *out_local = 1; + } + + if (out_tzoffset != NULL) { + *out_tzoffset = 0; + } + + if (sublen == 1) { + goto finish; + } else { + ++substr; + --sublen; + } + } else if (*substr == '-' || *substr == '+') { + /* Time zone offset */ + int offset_neg = 0, offset_hour = 0, offset_minute = 0; + + /* + * Since "local" means local with respect to the current + * machine, we say this is non-local. + */ + + if (*substr == '-') { + offset_neg = 1; + } + ++substr; + --sublen; + + /* The hours offset */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_hour = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_hour >= 24) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone hours offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_hour = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } + + /* The minutes offset is optional */ + if (sublen > 0) { + /* Optional ':' */ + if (*substr == ':') { + ++substr; + --sublen; + } + + /* The minutes offset (at the end of the string) */ + if (sublen >= 2 && isdigit(substr[0]) && isdigit(substr[1])) { + offset_minute = 10 * (substr[0] - '0') + (substr[1] - '0'); + substr += 2; + sublen -= 2; + if (offset_minute >= 60) { + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Timezone minutes offset out of range " + "in datetime string \"%s\"", + str); + } + goto error; + } + } else if (sublen >= 1 && isdigit(substr[0])) { + offset_minute = substr[0] - '0'; + ++substr; + --sublen; + } else { + goto parse_error; + } + } + + /* Apply the time zone offset */ + if (offset_neg) { + offset_hour = -offset_hour; + offset_minute = -offset_minute; + } + if (out_local != NULL) { + *out_local = 1; + // Unlike NumPy, do not change internal value to local time + *out_tzoffset = 60 * offset_hour + offset_minute; + } + } + + /* Skip trailing whitespace */ + while (sublen > 0 && isspace(*substr)) { + ++substr; + --sublen; + } + + if (sublen != 0) { + goto parse_error; + } + +finish: + return 0; + +parse_error: + if (want_exc) { + PyErr_Format(PyExc_ValueError, + "Error parsing datetime string \"%s\" at position %d", str, + (int)(substr - str)); + } + return -1; + +error: + return -1; +} + +/* + * Provides a string length to use for converting datetime + * objects with the given local and unit settings. + */ +int get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base) { + int len = 0; + + switch (base) { + /* Generic units can only be used to represent NaT */ + /* return 4;*/ + case NPY_FR_as: + len += 3; /* "###" */ + case NPY_FR_fs: + len += 3; /* "###" */ + case NPY_FR_ps: + len += 3; /* "###" */ + case NPY_FR_ns: + len += 3; /* "###" */ + case NPY_FR_us: + len += 3; /* "###" */ + case NPY_FR_ms: + len += 4; /* ".###" */ + case NPY_FR_s: + len += 3; /* ":##" */ + case NPY_FR_m: + len += 3; /* ":##" */ + case NPY_FR_h: + len += 3; /* "T##" */ + case NPY_FR_D: + case NPY_FR_W: + len += 3; /* "-##" */ + case NPY_FR_M: + len += 3; /* "-##" */ + case NPY_FR_Y: + len += 21; /* 64-bit year */ + break; + default: + len += 3; /* handle the now defunct NPY_FR_B */ + break; + } + + if (base >= NPY_FR_h) { + if (local) { + len += 5; /* "+####" or "-####" */ + } else { + len += 1; /* "Z" */ + } + } + + len += 1; /* NULL terminator */ + + return len; +} + + +/* + * Converts an npy_datetimestruct to an (almost) ISO 8601 + * NULL-terminated string using timezone Z (UTC). If the string fits in + * the space exactly, it leaves out the NULL terminator and returns success. + * + * The differences from ISO 8601 are the 'NaT' string, and + * the number of year digits is >= 4 instead of strictly 4. + * + * 'base' restricts the output to that unit. Set 'base' to + * -1 to auto-detect a base after which all the values are zero. + * + * Returns 0 on success, -1 on failure (for example if the output + * string was too short). + */ +int make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + NPY_DATETIMEUNIT base) { + char *substr = outstr; + int sublen = outlen; + int tmplen; + + /* + * Print weeks with the same precision as days. + * + * TODO: Could print weeks with YYYY-Www format if the week + * epoch is a Monday. + */ + if (base == NPY_FR_W) { + base = NPY_FR_D; + } + +/* YEAR */ +/* + * Can't use PyOS_snprintf, because it always produces a '\0' + * character at the end, and NumPy string types are permitted + * to have data all the way to the end of the buffer. + */ +#ifdef _WIN32 + tmplen = _snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#else + tmplen = snprintf(substr, sublen, "%04" NPY_INT64_FMT, dts->year); +#endif // _WIN32 + /* If it ran out of space or there isn't space for the NULL terminator */ + if (tmplen < 0 || tmplen > sublen) { + goto string_too_short; + } + substr += tmplen; + sublen -= tmplen; + + /* Stop if the unit is years */ + if (base == NPY_FR_Y) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* MONTH */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->month / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->month % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is months */ + if (base == NPY_FR_M) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* DAY */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '-'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->day / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->day % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is days */ + if (base == NPY_FR_D) { + if (sublen > 0) { + *substr = '\0'; + } + return 0; + } + + /* HOUR */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'T'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->hour / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->hour % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is hours */ + if (base == NPY_FR_h) { + goto add_time_zone; + } + + /* MINUTE */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->min / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->min % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is minutes */ + if (base == NPY_FR_m) { + goto add_time_zone; + } + + /* SECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = ':'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->sec / 10) + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->sec % 10) + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is seconds */ + if (base == NPY_FR_s) { + goto add_time_zone; + } + + /* MILLISECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = '.'; + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 100000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->us / 10000) % 10 + '0'); + if (sublen < 4) { + goto string_too_short; + } + substr[3] = (char)((dts->us / 1000) % 10 + '0'); + substr += 4; + sublen -= 4; + + /* Stop if the unit is milliseconds */ + if (base == NPY_FR_ms) { + goto add_time_zone; + } + + /* MICROSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->us / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->us / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->us % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is microseconds */ + if (base == NPY_FR_us) { + goto add_time_zone; + } + + /* NANOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->ps / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is nanoseconds */ + if (base == NPY_FR_ns) { + goto add_time_zone; + } + + /* PICOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->ps / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->ps / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->ps % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is picoseconds */ + if (base == NPY_FR_ps) { + goto add_time_zone; + } + + /* FEMTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100000) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10000) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)((dts->as / 1000) % 10 + '0'); + substr += 3; + sublen -= 3; + + /* Stop if the unit is femtoseconds */ + if (base == NPY_FR_fs) { + goto add_time_zone; + } + + /* ATTOSECOND */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = (char)((dts->as / 100) % 10 + '0'); + if (sublen < 2) { + goto string_too_short; + } + substr[1] = (char)((dts->as / 10) % 10 + '0'); + if (sublen < 3) { + goto string_too_short; + } + substr[2] = (char)(dts->as % 10 + '0'); + substr += 3; + sublen -= 3; + +add_time_zone: + /* UTC "Zulu" time */ + if (sublen < 1) { + goto string_too_short; + } + substr[0] = 'Z'; + substr += 1; + sublen -= 1; + + /* Add a NULL terminator, and return */ + if (sublen > 0) { + substr[0] = '\0'; + } + + return 0; + +string_too_short: + PyErr_Format(PyExc_RuntimeError, + "The string provided for NumPy ISO datetime formatting " + "was too short, with length %d", + outlen); + return -1; +} diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h new file mode 100644 index 00000000..880c34ea --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/np_datetime_strings.h @@ -0,0 +1,82 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Written by Mark Wiebe (mwwiebe@gmail.com) +Copyright (c) 2011 by Enthought, Inc. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +See NUMPY_LICENSE.txt for the license. + +This file implements string parsing and creation for NumPy datetime. + +*/ + +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +/* + * Parses (almost) standard ISO 8601 date strings. The differences are: + * + * + The date "20100312" is parsed as the year 20100312, not as + * equivalent to "2010-03-12". The '-' in the dates are not optional. + * + Only seconds may have a decimal point, with up to 18 digits after it + * (maximum attoseconds precision). + * + Either a 'T' as in ISO 8601 or a ' ' may be used to separate + * the date and the time. Both are treated equivalently. + * + Doesn't (yet) handle the "YYYY-DDD" or "YYYY-Www" formats. + * + Doesn't handle leap seconds (seconds value has 60 in these cases). + * + Doesn't handle 24:00:00 as synonym for midnight (00:00:00) tomorrow + * + Accepts special values "NaT" (not a time), "Today", (current + * day according to local time) and "Now" (current time in UTC). + * + * 'str' must be a NULL-terminated string, and 'len' must be its length. + * + * 'out' gets filled with the parsed date-time. + * 'out_local' gets whether returned value contains timezone. 0 for UTC, 1 for local time. + * 'out_tzoffset' gets set to timezone offset by minutes + * if the parsed time was in local time, + * to 0 otherwise. The values 'now' and 'today' don't get counted + * as local, and neither do UTC +/-#### timezone offsets, because + * they aren't using the computer's local timezone offset. + * + * Returns 0 on success, -1 on failure. + */ +int +parse_iso_8601_datetime(const char *str, int len, int want_exc, + npy_datetimestruct *out, + int *out_local, + int *out_tzoffset); + +/* + * Provides a string length to use for converting datetime + * objects with the given local and unit settings. + */ +int +get_datetime_iso_8601_strlen(int local, NPY_DATETIMEUNIT base); + +/* + * Converts an npy_datetimestruct to an (almost) ISO 8601 + * NULL-terminated string using timezone Z (UTC). + * + * 'base' restricts the output to that unit. Set 'base' to + * -1 to auto-detect a base after which all the values are zero. + * + * Returns 0 on success, -1 on failure (for example if the output + * string was too short). + */ +int +make_iso_8601_datetime(npy_datetimestruct *dts, char *outstr, int outlen, + NPY_DATETIMEUNIT base); +#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_NP_DATETIME_STRINGS_H_ diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx new file mode 100644 index 00000000..5508b208 --- /dev/null +++ b/pandas/_libs/tslibs/strptime.pyx @@ -0,0 +1,754 @@ +"""Strptime-related classes and functions. +""" +import time +import locale +import calendar +import re +from datetime import date as datetime_date + +from _thread import allocate_lock as _thread_allocate_lock + +import pytz + +import numpy as np +from numpy cimport int64_t + + +from pandas._libs.tslibs.np_datetime cimport ( + check_dts_bounds, dtstruct_to_dt64, npy_datetimestruct) + +from pandas._libs.tslibs.nattype cimport checknull_with_nat, NPY_NAT +from pandas._libs.tslibs.nattype import nat_strings + +cdef dict _parse_code_table = {'y': 0, + 'Y': 1, + 'm': 2, + 'B': 3, + 'b': 4, + 'd': 5, + 'H': 6, + 'I': 7, + 'M': 8, + 'S': 9, + 'f': 10, + 'A': 11, + 'a': 12, + 'w': 13, + 'j': 14, + 'U': 15, + 'W': 16, + 'Z': 17, + 'p': 18, # an additional key, only with I + 'z': 19, + 'G': 20, + 'V': 21, + 'u': 22} + + +def array_strptime(object[:] values, object fmt, + bint exact=True, errors='raise'): + """ + Calculates the datetime structs represented by the passed array of strings + + Parameters + ---------- + values : ndarray of string-like objects + fmt : string-like regex + exact : matches must be exact if True, search if False + errors : string specifying error handling, {'raise', 'ignore', 'coerce'} + """ + + cdef: + Py_ssize_t i, n = len(values) + npy_datetimestruct dts + int64_t[:] iresult + object[:] result_timezone + int year, month, day, minute, hour, second, weekday, julian + int week_of_year, week_of_year_start, parse_code, ordinal + int iso_week, iso_year + int64_t us, ns + object val, group_key, ampm, found, timezone + dict found_key + bint is_raise = errors=='raise' + bint is_ignore = errors=='ignore' + bint is_coerce = errors=='coerce' + + assert is_raise or is_ignore or is_coerce + + if fmt is not None: + if '%W' in fmt or '%U' in fmt: + if '%Y' not in fmt and '%y' not in fmt: + raise ValueError("Cannot use '%W' or '%U' without " + "day and year") + if ('%A' not in fmt and '%a' not in fmt and '%w' not + in fmt): + raise ValueError("Cannot use '%W' or '%U' without " + "day and year") + elif '%Z' in fmt and '%z' in fmt: + raise ValueError("Cannot parse both %Z and %z") + + global _TimeRE_cache, _regex_cache + with _cache_lock: + if _getlang() != _TimeRE_cache.locale_time.lang: + _TimeRE_cache = TimeRE() + _regex_cache.clear() + if len(_regex_cache) > _CACHE_MAX_SIZE: + _regex_cache.clear() + locale_time = _TimeRE_cache.locale_time + format_regex = _regex_cache.get(fmt) + if not format_regex: + try: + format_regex = _TimeRE_cache.compile(fmt) + # KeyError raised when a bad format is found; can be specified as + # \\, in which case it was a stray % but with a space after it + except KeyError, err: + bad_directive = err.args[0] + if bad_directive == "\\": + bad_directive = "%" + del err + raise ValueError(f"'{bad_directive}' is a bad directive " + f"in format '{fmt}'") + # IndexError only occurs when the format string is "%" + except IndexError: + raise ValueError(f"stray % in format '{fmt}'") + _regex_cache[fmt] = format_regex + + result = np.empty(n, dtype='M8[ns]') + iresult = result.view('i8') + result_timezone = np.empty(n, dtype='object') + + dts.us = dts.ps = dts.as = 0 + + for i in range(n): + val = values[i] + if isinstance(val, str): + if val in nat_strings: + iresult[i] = NPY_NAT + continue + else: + if checknull_with_nat(val): + iresult[i] = NPY_NAT + continue + else: + val = str(val) + + # exact matching + if exact: + found = format_regex.match(val) + if not found: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise ValueError(f"time data '{val}' does not match " + f"format '{fmt}' (match)") + if len(val) != found.end(): + if is_coerce: + iresult[i] = NPY_NAT + continue + raise ValueError(f"unconverted data remains: {val[found.end():]}") + + # search + else: + found = format_regex.search(val) + if not found: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise ValueError(f"time data {repr(val)} does not match format " + f"{repr(fmt)} (search)") + + iso_year = -1 + year = 1900 + month = day = 1 + hour = minute = second = ns = us = 0 + timezone = None + # Default to -1 to signify that values not known; not critical to have, + # though + iso_week = week_of_year = -1 + week_of_year_start = -1 + # weekday and julian defaulted to -1 so as to signal need to calculate + # values + weekday = julian = -1 + found_dict = found.groupdict() + for group_key in found_dict.iterkeys(): + # Directives not explicitly handled below: + # c, x, X + # handled by making out of other directives + # U, W + # worthless without day of the week + parse_code = _parse_code_table[group_key] + + if parse_code == 0: + year = int(found_dict['y']) + # Open Group specification for strptime() states that a %y + # value in the range of [00, 68] is in the century 2000, while + # [69,99] is in the century 1900 + if year <= 68: + year += 2000 + else: + year += 1900 + elif parse_code == 1: + year = int(found_dict['Y']) + elif parse_code == 2: + month = int(found_dict['m']) + elif parse_code == 3: + # elif group_key == 'B': + month = locale_time.f_month.index(found_dict['B'].lower()) + elif parse_code == 4: + # elif group_key == 'b': + month = locale_time.a_month.index(found_dict['b'].lower()) + elif parse_code == 5: + # elif group_key == 'd': + day = int(found_dict['d']) + elif parse_code == 6: + # elif group_key == 'H': + hour = int(found_dict['H']) + elif parse_code == 7: + hour = int(found_dict['I']) + ampm = found_dict.get('p', '').lower() + # If there was no AM/PM indicator, we'll treat this like AM + if ampm in ('', locale_time.am_pm[0]): + # We're in AM so the hour is correct unless we're + # looking at 12 midnight. + # 12 midnight == 12 AM == hour 0 + if hour == 12: + hour = 0 + elif ampm == locale_time.am_pm[1]: + # We're in PM so we need to add 12 to the hour unless + # we're looking at 12 noon. + # 12 noon == 12 PM == hour 12 + if hour != 12: + hour += 12 + elif parse_code == 8: + minute = int(found_dict['M']) + elif parse_code == 9: + second = int(found_dict['S']) + elif parse_code == 10: + s = found_dict['f'] + # Pad to always return nanoseconds + s += "0" * (9 - len(s)) + us = long(s) + ns = us % 1000 + us = us // 1000 + elif parse_code == 11: + weekday = locale_time.f_weekday.index(found_dict['A'].lower()) + elif parse_code == 12: + weekday = locale_time.a_weekday.index(found_dict['a'].lower()) + elif parse_code == 13: + weekday = int(found_dict['w']) + if weekday == 0: + weekday = 6 + else: + weekday -= 1 + elif parse_code == 14: + julian = int(found_dict['j']) + elif parse_code == 15 or parse_code == 16: + week_of_year = int(found_dict[group_key]) + if group_key == 'U': + # U starts week on Sunday. + week_of_year_start = 6 + else: + # W starts week on Monday. + week_of_year_start = 0 + elif parse_code == 17: + timezone = pytz.timezone(found_dict['Z']) + elif parse_code == 19: + timezone = parse_timezone_directive(found_dict['z']) + elif parse_code == 20: + iso_year = int(found_dict['G']) + elif parse_code == 21: + iso_week = int(found_dict['V']) + elif parse_code == 22: + weekday = int(found_dict['u']) + weekday -= 1 + + # don't assume default values for ISO week/year + if iso_year != -1: + if iso_week == -1 or weekday == -1: + raise ValueError("ISO year directive '%G' must be used with " + "the ISO week directive '%V' and a weekday " + "directive '%A', '%a', '%w', or '%u'.") + if julian != -1: + raise ValueError("Day of the year directive '%j' is not " + "compatible with ISO year directive '%G'. " + "Use '%Y' instead.") + elif year != -1 and week_of_year == -1 and iso_week != -1: + if weekday == -1: + raise ValueError("ISO week directive '%V' must be used with " + "the ISO year directive '%G' and a weekday " + "directive '%A', '%a', '%w', or '%u'.") + else: + raise ValueError("ISO week directive '%V' is incompatible with " + "the year directive '%Y'. Use the ISO year " + "'%G' instead.") + + # If we know the wk of the year and what day of that wk, we can figure + # out the Julian day of the year. + if julian == -1 and weekday != -1: + if week_of_year != -1: + week_starts_Mon = week_of_year_start == 0 + julian = _calc_julian_from_U_or_W(year, week_of_year, weekday, + week_starts_Mon) + elif iso_year != -1 and iso_week != -1: + year, julian = _calc_julian_from_V(iso_year, iso_week, + weekday + 1) + # Cannot pre-calculate datetime_date() since can change in Julian + # calculation and thus could have different value for the day of the wk + # calculation. + try: + if julian == -1: + # Need to add 1 to result since first day of the year is 1, not + # 0. + ordinal = datetime_date(year, month, day).toordinal() + julian = ordinal - datetime_date(year, 1, 1).toordinal() + 1 + else: + # Assume that if they bothered to include Julian day it will + # be accurate. + datetime_result = datetime_date.fromordinal( + (julian - 1) + datetime_date(year, 1, 1).toordinal()) + year = datetime_result.year + month = datetime_result.month + day = datetime_result.day + except ValueError: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise + if weekday == -1: + weekday = datetime_date(year, month, day).weekday() + + dts.year = year + dts.month = month + dts.day = day + dts.hour = hour + dts.min = minute + dts.sec = second + dts.us = us + dts.ps = ns * 1000 + + iresult[i] = dtstruct_to_dt64(&dts) + try: + check_dts_bounds(&dts) + except ValueError: + if is_coerce: + iresult[i] = NPY_NAT + continue + raise + + result_timezone[i] = timezone + + return result, result_timezone.base + + +""" +_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored +from the standard library, see +https://github.com/python/cpython/blob/master/Lib/_strptime.py +The original module-level docstring follows. + +Strptime-related classes and functions. +CLASSES: + LocaleTime -- Discovers and stores locale-specific time information + TimeRE -- Creates regexes for pattern matching a string of text containing + time information +FUNCTIONS: + _getlang -- Figure out what language is being used for the locale + strptime -- Calculates the time struct represented by the passed-in string +""" + + +def _getlang(): + """Figure out what language is being used for the locale""" + return locale.getlocale(locale.LC_TIME) + + +class LocaleTime: + """ + Stores and handles locale-specific information related to time. + + ATTRIBUTES: + f_weekday -- full weekday names (7-item list) + a_weekday -- abbreviated weekday names (7-item list) + f_month -- full month names (13-item list; dummy value in [0], which + is added by code) + a_month -- abbreviated month names (13-item list, dummy value in + [0], which is added by code) + am_pm -- AM/PM representation (2-item list) + LC_date_time -- format string for date/time representation (string) + LC_date -- format string for date representation (string) + LC_time -- format string for time representation (string) + timezone -- daylight- and non-daylight-savings timezone representation + (2-item list of sets) + lang -- Language used by instance (2-item tuple) + """ + + def __init__(self): + """ + Set all attributes. + + Order of methods called matters for dependency reasons. + + The locale language is set at the offset and then checked again before + exiting. This is to make sure that the attributes were not set with a + mix of information from more than one locale. This would most likely + happen when using threads where one thread calls a locale-dependent + function while another thread changes the locale while the function in + the other thread is still running. Proper coding would call for + locks to prevent changing the locale while locale-dependent code is + running. The check here is done in case someone does not think about + doing this. + + Only other possible issue is if someone changed the timezone and did + not call tz.tzset . That is an issue for the programmer, though, + since changing the timezone is worthless without that call. + """ + self.lang = _getlang() + self.__calc_weekday() + self.__calc_month() + self.__calc_am_pm() + self.__calc_timezone() + self.__calc_date_time() + if _getlang() != self.lang: + raise ValueError("locale changed during initialization") + + def __pad(self, seq, front): + # Add '' to seq to either the front (is True), else the back. + seq = list(seq) + if front: + seq.insert(0, '') + else: + seq.append('') + return seq + + def __calc_weekday(self): + # Set self.a_weekday and self.f_weekday using the calendar + # module. + a_weekday = [calendar.day_abbr[i].lower() for i in range(7)] + f_weekday = [calendar.day_name[i].lower() for i in range(7)] + self.a_weekday = a_weekday + self.f_weekday = f_weekday + + def __calc_month(self): + # Set self.f_month and self.a_month using the calendar module. + a_month = [calendar.month_abbr[i].lower() for i in range(13)] + f_month = [calendar.month_name[i].lower() for i in range(13)] + self.a_month = a_month + self.f_month = f_month + + def __calc_am_pm(self): + # Set self.am_pm by using time.strftime(). + + # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that + # magical; just happened to have used it everywhere else where a + # static date was needed. + am_pm = [] + for hour in (01, 22): + time_tuple = time.struct_time( + (1999, 3, 17, hour, 44, 55, 2, 76, 0)) + am_pm.append(time.strftime("%p", time_tuple).lower()) + self.am_pm = am_pm + + def __calc_date_time(self): + # Set self.date_time, self.date, & self.time by using + # time.strftime(). + + # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of + # overloaded numbers is minimized. The order in which searches for + # values within the format string is very important; it eliminates + # possible ambiguity for what something represents. + time_tuple = time.struct_time((1999, 3, 17, 22, 44, 55, 2, 76, 0)) + date_time = [None, None, None] + date_time[0] = time.strftime("%c", time_tuple).lower() + date_time[1] = time.strftime("%x", time_tuple).lower() + date_time[2] = time.strftime("%X", time_tuple).lower() + replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), + (self.f_month[3], '%B'), + (self.a_weekday[2], '%a'), + (self.a_month[3], '%b'), (self.am_pm[1], '%p'), + ('1999', '%Y'), ('99', '%y'), ('22', '%H'), + ('44', '%M'), ('55', '%S'), ('76', '%j'), + ('17', '%d'), ('03', '%m'), ('3', '%m'), + # '3' needed for when no leading zero. + ('2', '%w'), ('10', '%I')] + replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone + for tz in tz_values]) + for offset, directive in ((0, '%c'), (1, '%x'), (2, '%X')): + current_format = date_time[offset] + for old, new in replacement_pairs: + # Must deal with possible lack of locale info + # manifesting itself as the empty string (e.g., Swedish's + # lack of AM/PM info) or a platform returning a tuple of empty + # strings (e.g., MacOS 9 having timezone as ('','')). + if old: + current_format = current_format.replace(old, new) + # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since + # 2005-01-03 occurs before the first Monday of the year. Otherwise + # %U is used. + time_tuple = time.struct_time((1999, 1, 3, 1, 1, 1, 6, 3, 0)) + if '00' in time.strftime(directive, time_tuple): + U_W = '%W' + else: + U_W = '%U' + date_time[offset] = current_format.replace('11', U_W) + self.LC_date_time = date_time[0] + self.LC_date = date_time[1] + self.LC_time = date_time[2] + + def __calc_timezone(self): + # Set self.timezone by using time.tzname. + # Do not worry about possibility of time.tzname[0] == timetzname[1] + # and time.daylight; handle that in strptime . + try: + time.tzset() + except AttributeError: + pass + no_saving = frozenset(["utc", "gmt", time.tzname[0].lower()]) + if time.daylight: + has_saving = frozenset([time.tzname[1].lower()]) + else: + has_saving = frozenset() + self.timezone = (no_saving, has_saving) + + +class TimeRE(dict): + """ + Handle conversion from format directives to regexes. + + Creates regexes for pattern matching a string of text containing + time information + """ + + def __init__(self, locale_time=None): + """ + Create keys/values. + + Order of execution is important for dependency reasons. + """ + if locale_time: + self.locale_time = locale_time + else: + self.locale_time = LocaleTime() + self._Z = None + base = super() + base.__init__({ + # The " \d" part of the regex is to make %c from ANSI C work + 'd': r"(?P3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", + 'f': r"(?P[0-9]{1,9})", + 'G': r"(?P\d\d\d\d)", + 'H': r"(?P2[0-3]|[0-1]\d|\d)", + 'I': r"(?P1[0-2]|0[1-9]|[1-9])", + 'j': (r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|" + r"[1-9]\d|0[1-9]|[1-9])"), + 'm': r"(?P1[0-2]|0[1-9]|[1-9])", + 'M': r"(?P[0-5]\d|\d)", + 'S': r"(?P6[0-1]|[0-5]\d|\d)", + 'u': r"(?P[1-7])", + 'U': r"(?P5[0-3]|[0-4]\d|\d)", + 'V': r"(?P5[0-3]|0[1-9]|[1-4]\d|\d)", + 'w': r"(?P[0-6])", + # W is set below by using 'U' + 'y': r"(?P\d\d)", + # XXX: Does 'Y' need to worry about having less or more than + # 4 digits? + 'Y': r"(?P\d\d\d\d)", + 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|Z)", + 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), + 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), + 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), + 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), + 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), + # 'Z' key is generated lazily via __getitem__ + '%': '%'}) + base.__setitem__('W', base.__getitem__('U').replace('U', 'W')) + base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) + base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + base.__setitem__('X', self.pattern(self.locale_time.LC_time)) + + def __getitem__(self, key): + if key == "Z": + # lazy computation + if self._Z is None: + self._Z = self.__seqToRE(pytz.all_timezones, 'Z') + return self._Z + return super().__getitem__(key) + + def __seqToRE(self, to_convert, directive): + """ + Convert a list to a regex string for matching a directive. + + Want possible matching values to be from longest to shortest. This + prevents the possibility of a match occurring for a value that also + a substring of a larger value that should have matched (e.g., 'abc' + matching when 'abcdef' should have been the match). + """ + to_convert = sorted(to_convert, key=len, reverse=True) + for value in to_convert: + if value != '': + break + else: + return '' + regex = '|'.join(re.escape(stuff) for stuff in to_convert) + regex = f"(?P<{directive}>{regex})" + return regex + + def pattern(self, format): + """ + Return regex pattern for the format string. + + Need to make sure that any characters that might be interpreted as + regex syntax are escaped. + """ + processed_format = '' + # The sub() call escapes all characters that might be misconstrued + # as regex syntax. Cannot use re.escape since we have to deal with + # format directives (%m, etc.). + regex_chars = re.compile(r"([\\.^$*+?\(\){}\[\]|])") + format = regex_chars.sub(r"\\\1", format) + whitespace_replacement = re.compile(r'\s+') + format = whitespace_replacement.sub(r'\\s+', format) + while '%' in format: + directive_index = format.index('%') +1 + processed_format = (f"{processed_format}" + f"{format[:directive_index -1]}" + f"{self[format[directive_index]]}") + format = format[directive_index +1:] + return f"{processed_format}{format}" + + def compile(self, format): + """Return a compiled re object for the format string.""" + return re.compile(self.pattern(format), re.IGNORECASE) + + +_cache_lock = _thread_allocate_lock() +# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock +# first! +_TimeRE_cache = TimeRE() +_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache +_regex_cache = {} + + +cdef int _calc_julian_from_U_or_W(int year, int week_of_year, + int day_of_week, int week_starts_Mon): + """ + Calculate the Julian day based on the year, week of the year, and day of + the week, with week_start_day representing whether the week of the year + assumes the week starts on Sunday or Monday (6 or 0). + + Parameters + ---------- + year : int + the year + week_of_year : int + week taken from format U or W + week_starts_Mon : int + represents whether the week of the year + assumes the week starts on Sunday or Monday (6 or 0) + + Returns + ------- + int + converted julian day + """ + + cdef: + int first_weekday, week_0_length, days_to_week + + first_weekday = datetime_date(year, 1, 1).weekday() + # If we are dealing with the %U directive (week starts on Sunday), it's + # easier to just shift the view to Sunday being the first day of the + # week. + if not week_starts_Mon: + first_weekday = (first_weekday + 1) % 7 + day_of_week = (day_of_week + 1) % 7 + + # Need to watch out for a week 0 (when the first day of the year is not + # the same as that specified by %U or %W). + week_0_length = (7 - first_weekday) % 7 + if week_of_year == 0: + return 1 + day_of_week - first_weekday + else: + days_to_week = week_0_length + (7 * (week_of_year - 1)) + return 1 + days_to_week + day_of_week + + +cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday): + """ + Calculate the Julian day based on the ISO 8601 year, week, and weekday. + + ISO weeks start on Mondays, with week 01 being the week containing 4 Jan. + ISO week days range from 1 (Monday) to 7 (Sunday). + + Parameters + ---------- + iso_year : int + the year taken from format %G + iso_week : int + the week taken from format %V + iso_weekday : int + weekday taken from format %u + + Returns + ------- + (int, int) + the iso year and the Gregorian ordinal date / julian date + """ + + cdef: + int correction, ordinal + + correction = datetime_date(iso_year, 1, 4).isoweekday() + 3 + ordinal = (iso_week * 7) + iso_weekday - correction + # ordinal may be negative or 0 now, which means the date is in the previous + # calendar year + if ordinal < 1: + ordinal += datetime_date(iso_year, 1, 1).toordinal() + iso_year -= 1 + ordinal -= datetime_date(iso_year, 1, 1).toordinal() + return iso_year, ordinal + + +cdef parse_timezone_directive(str z): + """ + Parse the '%z' directive and return a pytz.FixedOffset + + Parameters + ---------- + z : string of the UTC offset + + Returns + ------- + pytz.FixedOffset + + Notes + ----- + This is essentially similar to the cpython implementation + https://github.com/python/cpython/blob/master/Lib/_strptime.py#L457-L479 + """ + + cdef: + int gmtoff_fraction, hours, minutes, seconds, pad_number, microseconds + int total_minutes + object gmtoff_remainder, gmtoff_remainder_padding + + if z == 'Z': + return pytz.FixedOffset(0) + if z[3] == ':': + z = z[:3] + z[4:] + if len(z) > 5: + if z[5] != ':': + raise ValueError(f"Inconsistent use of : in {z}") + z = z[:5] + z[6:] + hours = int(z[1:3]) + minutes = int(z[3:5]) + seconds = int(z[5:7] or 0) + + # Pad to always return microseconds. + gmtoff_remainder = z[8:] + pad_number = 6 - len(gmtoff_remainder) + gmtoff_remainder_padding = "0" * pad_number + microseconds = int(gmtoff_remainder + gmtoff_remainder_padding) + + total_minutes = ((hours * 60) + minutes + (seconds // 60) + + (microseconds // 60000000)) + total_minutes = -total_minutes if z.startswith("-") else total_minutes + return pytz.FixedOffset(total_minutes) diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd new file mode 100644 index 00000000..097309b1 --- /dev/null +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- + +from numpy cimport int64_t + +# Exposed for tslib, not intended for outside use. +cdef int64_t cast_from_unit(object ts, object unit) except? -1 +cpdef int64_t delta_to_nanoseconds(delta) except? -1 +cdef convert_to_timedelta64(object ts, object unit) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx new file mode 100644 index 00000000..0a773b8a --- /dev/null +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -0,0 +1,1555 @@ +import collections +import textwrap + +import cython + +from cpython.object cimport Py_NE, Py_EQ, PyObject_RichCompare + +import numpy as np +cimport numpy as cnp +from numpy cimport int64_t +cnp.import_array() + +from cpython.datetime cimport (timedelta, + PyDateTime_Check, PyDelta_Check, + PyDateTime_IMPORT) +PyDateTime_IMPORT + + +cimport pandas._libs.tslibs.util as util +from pandas._libs.tslibs.util cimport ( + is_timedelta64_object, is_datetime64_object, is_integer_object, + is_float_object) + +from pandas._libs.tslibs.c_timestamp cimport _Timestamp + +from pandas._libs.tslibs.ccalendar import DAY_SECONDS + +from pandas._libs.tslibs.np_datetime cimport ( + cmp_scalar, reverse_ops, td64_to_tdstruct, pandas_timedeltastruct) + +from pandas._libs.tslibs.nattype import nat_strings +from pandas._libs.tslibs.nattype cimport ( + checknull_with_nat, NPY_NAT, c_NaT as NaT) +from pandas._libs.tslibs.offsets cimport to_offset +from pandas._libs.tslibs.offsets import _Tick as Tick + +# ---------------------------------------------------------------------- +# Constants + +# components named tuple +Components = collections.namedtuple('Components', [ + 'days', 'hours', 'minutes', 'seconds', + 'milliseconds', 'microseconds', 'nanoseconds']) + + +cdef dict timedelta_abbrevs = { 'Y': 'Y', + 'y': 'Y', + 'M': 'M', + 'W': 'W', + 'w': 'W', + 'D': 'D', + 'd': 'D', + 'days': 'D', + 'day': 'D', + 'hours': 'h', + 'hour': 'h', + 'hr': 'h', + 'h': 'h', + 'm': 'm', + 'minute': 'm', + 'min': 'm', + 'minutes': 'm', + 't': 'm', + 's': 's', + 'seconds': 's', + 'sec': 's', + 'second': 's', + 'ms': 'ms', + 'milliseconds': 'ms', + 'millisecond': 'ms', + 'milli': 'ms', + 'millis': 'ms', + 'l': 'ms', + 'us': 'us', + 'microseconds': 'us', + 'microsecond': 'us', + 'micro': 'us', + 'micros': 'us', + 'u': 'us', + 'ns': 'ns', + 'nanoseconds': 'ns', + 'nano': 'ns', + 'nanos': 'ns', + 'nanosecond': 'ns', + 'n': 'ns'} + +_no_input = object() + + +# ---------------------------------------------------------------------- +# API + +@cython.boundscheck(False) +@cython.wraparound(False) +def ints_to_pytimedelta(int64_t[:] arr, box=False): + """ + convert an i8 repr to an ndarray of timedelta or Timedelta (if box == + True) + + Parameters + ---------- + arr : ndarray[int64_t] + box : bool, default False + + Returns + ------- + result : ndarray[object] + array of Timedelta or timedeltas objects + """ + cdef: + Py_ssize_t i, n = len(arr) + int64_t value + object[:] result = np.empty(n, dtype=object) + + for i in range(n): + + value = arr[i] + if value == NPY_NAT: + result[i] = NaT + else: + if box: + result[i] = Timedelta(value) + else: + result[i] = timedelta(microseconds=int(value) / 1000) + + return result.base # .base to access underlying np.ndarray + + +# ---------------------------------------------------------------------- + +cpdef int64_t delta_to_nanoseconds(delta) except? -1: + if hasattr(delta, 'nanos'): + return delta.nanos + if hasattr(delta, 'delta'): + delta = delta.delta + if is_timedelta64_object(delta): + return delta.astype("timedelta64[ns]").item() + if is_integer_object(delta): + return delta + if PyDelta_Check(delta): + return (delta.days * 24 * 60 * 60 * 1000000 + + delta.seconds * 1000000 + + delta.microseconds) * 1000 + + raise TypeError(type(delta)) + + +cdef convert_to_timedelta64(object ts, object unit): + """ + Convert an incoming object to a timedelta64 if possible. + Before calling, unit must be standardized to avoid repeated unit conversion + + Handle these types of objects: + - timedelta/Timedelta + - timedelta64 + - an offset + - np.int64 (with unit providing a possible modifier) + - None/NaT + + Return an ns based int64 + """ + if checknull_with_nat(ts): + return np.timedelta64(NPY_NAT) + elif isinstance(ts, Timedelta): + # already in the proper format + ts = np.timedelta64(ts.value) + elif is_datetime64_object(ts): + # only accept a NaT here + if ts.astype('int64') == NPY_NAT: + return np.timedelta64(NPY_NAT) + elif is_timedelta64_object(ts): + ts = ts.astype(f"m8[{unit.lower()}]") + elif is_integer_object(ts): + if ts == NPY_NAT: + return np.timedelta64(NPY_NAT) + else: + if unit in ['Y', 'M', 'W']: + ts = np.timedelta64(ts, unit) + else: + ts = cast_from_unit(ts, unit) + ts = np.timedelta64(ts) + elif is_float_object(ts): + if unit in ['Y', 'M', 'W']: + ts = np.timedelta64(int(ts), unit) + else: + ts = cast_from_unit(ts, unit) + ts = np.timedelta64(ts) + elif isinstance(ts, str): + if len(ts) > 0 and ts[0] == 'P': + ts = parse_iso_format_string(ts) + else: + ts = parse_timedelta_string(ts) + ts = np.timedelta64(ts) + elif hasattr(ts, 'delta'): + ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns') + + if PyDelta_Check(ts): + ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns') + elif not is_timedelta64_object(ts): + raise ValueError(f"Invalid type for timedelta scalar: {type(ts)}") + return ts.astype('timedelta64[ns]') + + +@cython.boundscheck(False) +@cython.wraparound(False) +def array_to_timedelta64(object[:] values, unit='ns', errors='raise'): + """ + Convert an ndarray to an array of timedeltas. If errors == 'coerce', + coerce non-convertible objects to NaT. Otherwise, raise. + """ + + cdef: + Py_ssize_t i, n + int64_t[:] iresult + + if errors not in ('ignore', 'raise', 'coerce'): + raise ValueError("errors must be one of 'ignore', " + "'raise', or 'coerce'}") + + n = values.shape[0] + result = np.empty(n, dtype='m8[ns]') + iresult = result.view('i8') + + # Usually, we have all strings. If so, we hit the fast path. + # If this path fails, we try conversion a different way, and + # this is where all of the error handling will take place. + try: + for i in range(n): + if values[i] is NaT: + # we allow this check in the fast-path because NaT is a C-object + # so this is an inexpensive check + iresult[i] = NPY_NAT + else: + result[i] = parse_timedelta_string(values[i]) + except (TypeError, ValueError): + unit = parse_timedelta_unit(unit) + for i in range(n): + try: + result[i] = convert_to_timedelta64(values[i], unit) + except ValueError: + if errors == 'coerce': + result[i] = NPY_NAT + else: + raise + + return iresult.base # .base to access underlying np.ndarray + + +cpdef inline object precision_from_unit(object unit): + """ + Return a casting of the unit represented to nanoseconds + the precision + to round the fractional part. + """ + cdef: + int64_t m + int p + + if unit == 'Y': + m = 1000000000L * 31556952 + p = 9 + elif unit == 'M': + m = 1000000000L * 2629746 + p = 9 + elif unit == 'W': + m = 1000000000L * DAY_SECONDS * 7 + p = 9 + elif unit == 'D' or unit == 'd': + m = 1000000000L * DAY_SECONDS + p = 9 + elif unit == 'h': + m = 1000000000L * 3600 + p = 9 + elif unit == 'm': + m = 1000000000L * 60 + p = 9 + elif unit == 's': + m = 1000000000L + p = 9 + elif unit == 'ms': + m = 1000000L + p = 6 + elif unit == 'us': + m = 1000L + p = 3 + elif unit == 'ns' or unit is None: + m = 1L + p = 0 + else: + raise ValueError(f"cannot cast unit {unit}") + return m, p + + +cdef inline int64_t cast_from_unit(object ts, object unit) except? -1: + """ return a casting of the unit represented to nanoseconds + round the fractional part of a float to our precision, p """ + cdef: + int64_t m + int p + + m, p = precision_from_unit(unit) + + # just give me the unit back + if ts is None: + return m + + # cast the unit, multiply base/frace separately + # to avoid precision issues from float -> int + base = ts + frac = ts - base + if p: + frac = round(frac, p) + return (base * m) + (frac * m) + + +cdef inline int64_t parse_timedelta_string(str ts) except? -1: + """ + Parse a regular format timedelta string. Return an int64_t (in ns) + or raise a ValueError on an invalid parse. + """ + + cdef: + unicode c + bint neg = 0, have_dot = 0, have_value = 0, have_hhmmss = 0 + object current_unit = None + int64_t result = 0, m = 0, r + list number = [], frac = [], unit = [] + + # neg : tracks if we have a leading negative for the value + # have_dot : tracks if we are processing a dot (either post hhmmss or + # inside an expression) + # have_value : track if we have at least 1 leading unit + # have_hhmmss : tracks if we have a regular format hh:mm:ss + + if len(ts) == 0 or ts in nat_strings: + return NPY_NAT + + for c in ts: + + # skip whitespace / commas + if c == ' ' or c == ',': + pass + + # positive signs are ignored + elif c == '+': + pass + + # neg + elif c == '-': + + if neg or have_value or have_hhmmss: + raise ValueError("only leading negative signs are allowed") + + neg = 1 + + # number (ascii codes) + elif ord(c) >= 48 and ord(c) <= 57: + + if have_dot: + + # we found a dot, but now its just a fraction + if len(unit): + number.append(c) + have_dot = 0 + else: + frac.append(c) + + elif not len(unit): + number.append(c) + + else: + r = timedelta_from_spec(number, frac, unit) + unit, number, frac = [], [c], [] + + result += timedelta_as_neg(r, neg) + + # hh:mm:ss. + elif c == ':': + + # we flip this off if we have a leading value + if have_value: + neg = 0 + + # we are in the pattern hh:mm:ss pattern + if len(number): + if current_unit is None: + current_unit = 'h' + m = 1000000000L * 3600 + elif current_unit == 'h': + current_unit = 'm' + m = 1000000000L * 60 + elif current_unit == 'm': + current_unit = 's' + m = 1000000000L + r = int(''.join(number)) * m + result += timedelta_as_neg(r, neg) + have_hhmmss = 1 + else: + raise ValueError(f"expecting hh:mm:ss format, received: {ts}") + + unit, number = [], [] + + # after the decimal point + elif c == '.': + + if len(number) and current_unit is not None: + + # by definition we had something like + # so we need to evaluate the final field from a + # hh:mm:ss (so current_unit is 'm') + if current_unit != 'm': + raise ValueError("expected hh:mm:ss format before .") + m = 1000000000L + r = int(''.join(number)) * m + result += timedelta_as_neg(r, neg) + have_value = 1 + unit, number, frac = [], [], [] + + have_dot = 1 + + # unit + else: + unit.append(c) + have_value = 1 + have_dot = 0 + + # we had a dot, but we have a fractional + # value since we have an unit + if have_dot and len(unit): + r = timedelta_from_spec(number, frac, unit) + result += timedelta_as_neg(r, neg) + + # we have a dot as part of a regular format + # e.g. hh:mm:ss.fffffff + elif have_dot: + + if ((len(number) or len(frac)) and not len(unit) + and current_unit is None): + raise ValueError("no units specified") + + if len(frac) > 0 and len(frac) <= 3: + m = 10**(3 -len(frac)) * 1000L * 1000L + elif len(frac) > 3 and len(frac) <= 6: + m = 10**(6 -len(frac)) * 1000L + else: + m = 10**(9 -len(frac)) + + r = int(''.join(frac)) * m + result += timedelta_as_neg(r, neg) + + # we have a regular format + # we must have seconds at this point (hence the unit is still 'm') + elif current_unit is not None: + if current_unit != 'm': + raise ValueError("expected hh:mm:ss format") + m = 1000000000L + r = int(''.join(number)) * m + result += timedelta_as_neg(r, neg) + + # we have a last abbreviation + elif len(unit): + if len(number): + r = timedelta_from_spec(number, frac, unit) + result += timedelta_as_neg(r, neg) + else: + raise ValueError("unit abbreviation w/o a number") + + # treat as nanoseconds + # but only if we don't have anything else + else: + if have_value: + raise ValueError("have leftover units") + if len(number): + r = timedelta_from_spec(number, frac, 'ns') + result += timedelta_as_neg(r, neg) + + return result + + +cdef inline int64_t timedelta_as_neg(int64_t value, bint neg): + """ + + Parameters + ---------- + value : int64_t of the timedelta value + neg : boolean if the a negative value + """ + if neg: + return -value + return value + + +cdef inline timedelta_from_spec(object number, object frac, object unit): + """ + + Parameters + ---------- + number : a list of number digits + frac : a list of frac digits + unit : a list of unit characters + """ + cdef object n + + try: + unit = ''.join(unit) + if unit == 'M': + # To parse ISO 8601 string, 'M' should be treated as minute, + # not month + unit = 'm' + unit = parse_timedelta_unit(unit) + except KeyError: + raise ValueError(f"invalid abbreviation: {unit}") + + n = ''.join(number) + '.' + ''.join(frac) + return cast_from_unit(float(n), unit) + + +cpdef inline object parse_timedelta_unit(object unit): + """ + Parameters + ---------- + unit : an unit string + """ + if unit is None: + return 'ns' + elif unit == 'M': + return unit + try: + return timedelta_abbrevs[unit.lower()] + except (KeyError, AttributeError): + raise ValueError(f"invalid unit abbreviation: {unit}") + +# ---------------------------------------------------------------------- +# Timedelta ops utilities + +cdef bint _validate_ops_compat(other): + # return True if we are compat with operating + if checknull_with_nat(other): + return True + elif PyDelta_Check(other) or is_timedelta64_object(other): + return True + elif isinstance(other, str): + return True + elif hasattr(other, 'delta'): + return True + return False + + +def _op_unary_method(func, name): + def f(self): + return Timedelta(func(self.value), unit='ns') + f.__name__ = name + return f + + +def _binary_op_method_timedeltalike(op, name): + # define a binary operation that only works if the other argument is + # timedelta like or an array of timedeltalike + def f(self, other): + if hasattr(other, '_typ'): + # Series, DataFrame, ... + if other._typ == 'dateoffset' and hasattr(other, 'delta'): + # Tick offset + return op(self, other.delta) + return NotImplemented + + elif other is NaT: + return NaT + + elif is_timedelta64_object(other): + # convert to Timedelta below; avoid catching this in + # has-dtype check before then + pass + + elif is_datetime64_object(other) or ( + PyDateTime_Check(other) and not isinstance(other, _Timestamp)): + # this case is for a datetime object that is specifically + # *not* a Timestamp, as the Timestamp case will be + # handled after `_validate_ops_compat` returns False below + from pandas._libs.tslibs.timestamps import Timestamp + return op(self, Timestamp(other)) + # We are implicitly requiring the canonical behavior to be + # defined by Timestamp methods. + + elif hasattr(other, 'dtype'): + # nd-array like + if other.dtype.kind in ['m', 'M']: + return op(self.to_timedelta64(), other) + elif other.dtype.kind == 'O': + return np.array([op(self, x) for x in other]) + else: + return NotImplemented + + elif not _validate_ops_compat(other): + return NotImplemented + + try: + other = Timedelta(other) + except ValueError: + # failed to parse as timedelta + return NotImplemented + + if other is NaT: + # e.g. if original other was timedelta64('NaT') + return NaT + return Timedelta(op(self.value, other.value), unit='ns') + + f.__name__ = name + return f + + +# ---------------------------------------------------------------------- +# Timedelta Construction + +cdef inline int64_t parse_iso_format_string(object ts) except? -1: + """ + Extracts and cleanses the appropriate values from a match object with + groups for each component of an ISO 8601 duration + + Parameters + ---------- + ts: + ISO 8601 Duration formatted string + + Returns + ------- + ns: int64_t + Precision in nanoseconds of matched ISO 8601 duration + + Raises + ------ + ValueError + If ``ts`` cannot be parsed + """ + + cdef: + unicode c + int64_t result = 0, r + int p = 0 + object dec_unit = 'ms', err_msg + bint have_dot = 0, have_value = 0, neg = 0 + list number = [], unit = [] + + err_msg = "Invalid ISO 8601 Duration format - {}".format(ts) + + for c in ts: + # number (ascii codes) + if ord(c) >= 48 and ord(c) <= 57: + + have_value = 1 + if have_dot: + if p == 3 and dec_unit != 'ns': + unit.append(dec_unit) + if dec_unit == 'ms': + dec_unit = 'us' + elif dec_unit == 'us': + dec_unit = 'ns' + p = 0 + p += 1 + + if not len(unit): + number.append(c) + else: + # if in days, pop trailing T + if unit[-1] == 'T': + unit.pop() + elif 'H' in unit or 'M' in unit: + if len(number) > 2: + raise ValueError(err_msg) + r = timedelta_from_spec(number, '0', unit) + result += timedelta_as_neg(r, neg) + + neg = 0 + unit, number = [], [c] + else: + if c == 'P': + pass # ignore leading character + elif c == '-': + if neg or have_value: + raise ValueError(err_msg) + else: + neg = 1 + elif c in ['D', 'T', 'H', 'M']: + unit.append(c) + elif c == '.': + # append any seconds + if len(number): + r = timedelta_from_spec(number, '0', 'S') + result += timedelta_as_neg(r, neg) + unit, number = [], [] + have_dot = 1 + elif c == 'S': + if have_dot: # ms, us, or ns + if not len(number) or p > 3: + raise ValueError(err_msg) + # pad to 3 digits as required + pad = 3 - p + while pad > 0: + number.append('0') + pad -= 1 + + r = timedelta_from_spec(number, '0', dec_unit) + result += timedelta_as_neg(r, neg) + else: # seconds + if len(number) <= 2: + r = timedelta_from_spec(number, '0', 'S') + result += timedelta_as_neg(r, neg) + else: + raise ValueError(err_msg) + else: + raise ValueError(err_msg) + + if not have_value: + # Received string only - never parsed any values + raise ValueError(err_msg) + + return result + + +cdef _to_py_int_float(v): + # Note: This used to be defined inside Timedelta.__new__ + # but cython will not allow `cdef` functions to be defined dynamically. + if is_integer_object(v): + return int(v) + elif is_float_object(v): + return float(v) + raise TypeError(f"Invalid type {type(v)}. Must be int or float.") + + +# Similar to Timestamp/datetime, this is a construction requirement for +# timedeltas that we need to do object instantiation in python. This will +# serve as a C extension type that shadows the Python class, where we do any +# heavy lifting. +cdef class _Timedelta(timedelta): + cdef readonly: + int64_t value # nanoseconds + object freq # frequency reference + bint is_populated # are my components populated + int64_t _d, _h, _m, _s, _ms, _us, _ns + + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + + def __hash__(_Timedelta self): + if self._has_ns(): + return hash(self.value) + else: + return timedelta.__hash__(self) + + def __richcmp__(_Timedelta self, object other, int op): + cdef: + _Timedelta ots + int ndim + + if isinstance(other, _Timedelta): + ots = other + elif PyDelta_Check(other) or isinstance(other, Tick): + ots = Timedelta(other) + else: + ndim = getattr(other, "ndim", -1) + + if ndim != -1: + if ndim == 0: + if is_timedelta64_object(other): + other = Timedelta(other) + else: + if op == Py_EQ: + return False + elif op == Py_NE: + return True + # only allow ==, != ops + raise TypeError(f'Cannot compare type ' + f'{type(self).__name__} with ' + f'type {type(other).__name__}') + if util.is_array(other): + return PyObject_RichCompare(np.array([self]), other, op) + return PyObject_RichCompare(other, self, reverse_ops[op]) + else: + if other is NaT: + return PyObject_RichCompare(other, self, reverse_ops[op]) + elif op == Py_EQ: + return False + elif op == Py_NE: + return True + raise TypeError(f'Cannot compare type {type(self).__name__} with ' + f'type {type(other).__name__}') + + return cmp_scalar(self.value, ots.value, op) + + cpdef bint _has_ns(self): + return self.value % 1000 != 0 + + def _ensure_components(_Timedelta self): + """ + compute the components + """ + if self.is_populated: + return + + cdef: + pandas_timedeltastruct tds + + td64_to_tdstruct(self.value, &tds) + self._d = tds.days + self._h = tds.hrs + self._m = tds.min + self._s = tds.sec + self._ms = tds.ms + self._us = tds.us + self._ns = tds.ns + self._seconds = tds.seconds + self._microseconds = tds.microseconds + + self.is_populated = 1 + + cpdef timedelta to_pytimedelta(_Timedelta self): + """ + Convert a pandas Timedelta object into a python timedelta object. + + Timedelta objects are internally saved as numpy datetime64[ns] dtype. + Use to_pytimedelta() to convert to object dtype. + + Returns + ------- + datetime.timedelta or numpy.array of datetime.timedelta + + See Also + -------- + to_timedelta : Convert argument to Timedelta type. + + Notes + ----- + Any nanosecond resolution will be lost. + """ + return timedelta(microseconds=int(self.value) / 1000) + + def to_timedelta64(self) -> np.timedelta64: + """ + Return a numpy.timedelta64 object with 'ns' precision. + """ + return np.timedelta64(self.value, 'ns') + + def to_numpy(self, dtype=None, copy=False) -> np.timedelta64: + """ + Convert the Timedelta to a NumPy timedelta64. + + .. versionadded:: 0.25.0 + + This is an alias method for `Timedelta.to_timedelta64()`. The dtype and + copy parameters are available here only for compatibility. Their values + will not affect the return value. + + Returns + ------- + numpy.timedelta64 + + See Also + -------- + Series.to_numpy : Similar method for Series. + """ + return self.to_timedelta64() + + def total_seconds(self): + """ + Total duration of timedelta in seconds (to ns precision). + """ + return self.value / 1e9 + + def view(self, dtype): + """ + Array view compatibility. + """ + return np.timedelta64(self.value).view(dtype) + + @property + def components(self): + """ + Return a components namedtuple-like. + """ + self._ensure_components() + # return the named tuple + return Components(self._d, self._h, self._m, self._s, + self._ms, self._us, self._ns) + + @property + def delta(self): + """ + Return the timedelta in nanoseconds (ns), for internal compatibility. + + Returns + ------- + int + Timedelta in nanoseconds. + + Examples + -------- + >>> td = pd.Timedelta('1 days 42 ns') + >>> td.delta + 86400000000042 + + >>> td = pd.Timedelta('3 s') + >>> td.delta + 3000000000 + + >>> td = pd.Timedelta('3 ms 5 us') + >>> td.delta + 3005000 + + >>> td = pd.Timedelta(42, unit='ns') + >>> td.delta + 42 + """ + return self.value + + @property + def asm8(self) -> np.timedelta64: + """ + Return a numpy timedelta64 array scalar view. + + Provides access to the array scalar view (i.e. a combination of the + value and the units) associated with the numpy.timedelta64().view(), + including a 64-bit integer representation of the timedelta in + nanoseconds (Python int compatible). + + Returns + ------- + numpy timedelta64 array scalar view + Array scalar view of the timedelta in nanoseconds. + + Examples + -------- + >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + >>> td.asm8 + numpy.timedelta64(86520000003042,'ns') + + >>> td = pd.Timedelta('2 min 3 s') + >>> td.asm8 + numpy.timedelta64(123000000000,'ns') + + >>> td = pd.Timedelta('3 ms 5 us') + >>> td.asm8 + numpy.timedelta64(3005000,'ns') + + >>> td = pd.Timedelta(42, unit='ns') + >>> td.asm8 + numpy.timedelta64(42,'ns') + """ + return np.int64(self.value).view('m8[ns]') + + @property + def resolution_string(self) -> str: + """ + Return a string representing the lowest timedelta resolution. + + Each timedelta has a defined resolution that represents the lowest OR + most granular level of precision. Each level of resolution is + represented by a short string as defined below: + + Resolution: Return value + + * Days: 'D' + * Hours: 'H' + * Minutes: 'T' + * Seconds: 'S' + * Milliseconds: 'L' + * Microseconds: 'U' + * Nanoseconds: 'N' + + Returns + ------- + str + Timedelta resolution. + + Examples + -------- + >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + >>> td.resolution + 'N' + + >>> td = pd.Timedelta('1 days 2 min 3 us') + >>> td.resolution + 'U' + + >>> td = pd.Timedelta('2 min 3 s') + >>> td.resolution + 'S' + + >>> td = pd.Timedelta(36, unit='us') + >>> td.resolution + 'U' + """ + self._ensure_components() + if self._ns: + return "N" + elif self._us: + return "U" + elif self._ms: + return "L" + elif self._s: + return "S" + elif self._m: + return "T" + elif self._h: + return "H" + else: + return "D" + + @property + def nanoseconds(self): + """ + Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. + + Returns + ------- + int + Number of nanoseconds. + + See Also + -------- + Timedelta.components : Return all attributes with assigned values + (i.e. days, hours, minutes, seconds, milliseconds, microseconds, + nanoseconds). + + Examples + -------- + **Using string input** + + >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') + >>> td.nanoseconds + 42 + + **Using integer input** + + >>> td = pd.Timedelta(42, unit='ns') + >>> td.nanoseconds + 42 + """ + self._ensure_components() + return self._ns + + def _repr_base(self, format=None) -> str: + """ + + Parameters + ---------- + format : None|all|sub_day|long + + Returns + ------- + converted : string of a Timedelta + + """ + cdef object sign, seconds_pretty, subs, fmt, comp_dict + + self._ensure_components() + + if self._d < 0: + sign = " +" + else: + sign = " " + + if format == 'all': + fmt = ("{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}." + "{milliseconds:03}{microseconds:03}{nanoseconds:03}") + else: + # if we have a partial day + subs = (self._h or self._m or self._s or + self._ms or self._us or self._ns) + + # by default not showing nano + if self._ms or self._us or self._ns: + seconds_fmt = "{seconds:02}.{milliseconds:03}{microseconds:03}" + else: + seconds_fmt = "{seconds:02}" + + if format == 'sub_day' and not self._d: + fmt = "{hours:02}:{minutes:02}:" + seconds_fmt + elif subs or format == 'long': + fmt = "{days} days{sign}{hours:02}:{minutes:02}:" + seconds_fmt + else: + fmt = "{days} days" + + comp_dict = self.components._asdict() + comp_dict['sign'] = sign + + return fmt.format(**comp_dict) + + def __repr__(self) -> str: + repr_based = self._repr_base(format='long') + return f"Timedelta('{repr_based}')" + + def __str__(self) -> str: + return self._repr_base(format='long') + + def __bool__(self) -> bool: + return self.value != 0 + + def isoformat(self) -> str: + """ + Format Timedelta as ISO 8601 Duration like + ``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the + values. See https://en.wikipedia.org/wiki/ISO_8601#Durations. + + Returns + ------- + formatted : str + + See Also + -------- + Timestamp.isoformat + + Notes + ----- + The longest component is days, whose value may be larger than + 365. + Every component is always included, even if its value is 0. + Pandas uses nanosecond precision, so up to 9 decimal places may + be included in the seconds component. + Trailing 0's are removed from the seconds component after the decimal. + We do not 0 pad components, so it's `...T5H...`, not `...T05H...` + + Examples + -------- + >>> td = pd.Timedelta(days=6, minutes=50, seconds=3, + ... milliseconds=10, microseconds=10, nanoseconds=12) + >>> td.isoformat() + 'P6DT0H50M3.010010012S' + >>> pd.Timedelta(hours=1, seconds=10).isoformat() + 'P0DT0H0M10S' + >>> pd.Timedelta(hours=1, seconds=10).isoformat() + 'P0DT0H0M10S' + >>> pd.Timedelta(days=500.5).isoformat() + 'P500DT12H0MS' + """ + components = self.components + seconds = (f'{components.seconds}.' + f'{components.milliseconds:0>3}' + f'{components.microseconds:0>3}' + f'{components.nanoseconds:0>3}') + # Trim unnecessary 0s, 1.000000000 -> 1 + seconds = seconds.rstrip('0').rstrip('.') + tpl = (f'P{components.days}DT{components.hours}' + f'H{components.minutes}M{seconds}S') + return tpl + + +# Python front end to C extension type _Timedelta +# This serves as the box for timedelta64 + +class Timedelta(_Timedelta): + """ + Represents a duration, the difference between two dates or times. + + Timedelta is the pandas equivalent of python's ``datetime.timedelta`` + and is interchangeable with it in most cases. + + Parameters + ---------- + value : Timedelta, timedelta, np.timedelta64, string, or integer + unit : str, default 'ns' + Denote the unit of the input, if input is an integer. + + Possible values: + + * 'Y', 'M', 'W', 'D', 'T', 'S', 'L', 'U', or 'N' + * 'days' or 'day' + * 'hours', 'hour', 'hr', or 'h' + * 'minutes', 'minute', 'min', or 'm' + * 'seconds', 'second', or 'sec' + * 'milliseconds', 'millisecond', 'millis', or 'milli' + * 'microseconds', 'microsecond', 'micros', or 'micro' + * 'nanoseconds', 'nanosecond', 'nanos', 'nano', or 'ns'. + + **kwargs + Available kwargs: {days, seconds, microseconds, + milliseconds, minutes, hours, weeks}. + Values for construction in compat with datetime.timedelta. + Numpy ints and floats will be coerced to python ints and floats. + + Notes + ----- + The ``.value`` attribute is always in ns. + """ + + def __new__(cls, object value=_no_input, unit=None, **kwargs): + cdef _Timedelta td_base + + if value is _no_input: + if not len(kwargs): + raise ValueError("cannot construct a Timedelta without a " + "value/unit or descriptive keywords " + "(days,seconds....)") + + kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs} + + nano = np.timedelta64(kwargs.pop('nanoseconds', 0), 'ns') + try: + value = nano + convert_to_timedelta64(timedelta(**kwargs), + 'ns') + except TypeError as e: + raise ValueError("cannot construct a Timedelta from the " + "passed arguments, allowed keywords are " + "[weeks, days, hours, minutes, seconds, " + "milliseconds, microseconds, nanoseconds]") + + if unit in {'Y', 'y', 'M'}: + raise ValueError( + "Units 'M' and 'Y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." + ) + + if isinstance(value, Timedelta): + value = value.value + elif isinstance(value, str): + if len(value) > 0 and value[0] == 'P': + value = parse_iso_format_string(value) + else: + value = parse_timedelta_string(value) + value = np.timedelta64(value) + elif PyDelta_Check(value): + value = convert_to_timedelta64(value, 'ns') + elif is_timedelta64_object(value): + if unit is not None: + value = value.astype(f'timedelta64[{unit}]') + value = value.astype('timedelta64[ns]') + elif hasattr(value, 'delta'): + value = np.timedelta64(delta_to_nanoseconds(value.delta), 'ns') + elif is_integer_object(value) or is_float_object(value): + # unit=None is de-facto 'ns' + unit = parse_timedelta_unit(unit) + value = convert_to_timedelta64(value, unit) + elif checknull_with_nat(value): + return NaT + else: + raise ValueError( + f"Value must be Timedelta, string, integer, " + f"float, timedelta or convertible, not {type(value).__name__}") + + if is_timedelta64_object(value): + value = value.view('i8') + + # nat + if value == NPY_NAT: + return NaT + + # make timedelta happy + td_base = _Timedelta.__new__(cls, microseconds=int(value) / 1000) + td_base.value = value + td_base.is_populated = 0 + return td_base + + def __setstate__(self, state): + (value) = state + self.value = value + + def __reduce__(self): + object_state = self.value, + return (Timedelta, object_state) + + def _round(self, freq, rounder): + cdef: + int64_t result, unit + + unit = to_offset(freq).nanos + result = unit * rounder(self.value / float(unit)) + return Timedelta(result, unit='ns') + + def round(self, freq): + """ + Round the Timedelta to the specified resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the rounding resolution. + + Returns + ------- + a new Timedelta rounded to the given resolution of `freq` + + Raises + ------ + ValueError if the freq cannot be converted + """ + return self._round(freq, np.round) + + def floor(self, freq): + """ + Return a new Timedelta floored to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the flooring resolution. + """ + return self._round(freq, np.floor) + + def ceil(self, freq): + """ + Return a new Timedelta ceiled to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the ceiling resolution. + """ + return self._round(freq, np.ceil) + + # ---------------------------------------------------------------- + # Arithmetic Methods + # TODO: Can some of these be defined in the cython class? + + __inv__ = _op_unary_method(lambda x: -x, '__inv__') + __neg__ = _op_unary_method(lambda x: -x, '__neg__') + __pos__ = _op_unary_method(lambda x: x, '__pos__') + __abs__ = _op_unary_method(lambda x: abs(x), '__abs__') + + __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__add__') + __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, '__radd__') + __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, '__sub__') + __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, '__rsub__') + + def __mul__(self, other): + if hasattr(other, '_typ'): + # Series, DataFrame, ... + if other._typ == 'dateoffset' and hasattr(other, 'delta'): + # Tick offset; this op will raise TypeError + return other.delta * self + return NotImplemented + + elif util.is_nan(other): + # i.e. np.nan, but also catch np.float64("NaN") which would + # otherwise get caught by the hasattr(other, "dtype") branch + # incorrectly return a np.timedelta64 object. + return NaT + + elif hasattr(other, 'dtype'): + # ndarray-like + return other * self.to_timedelta64() + + elif other is NaT: + raise TypeError('Cannot multiply Timedelta with NaT') + + elif not (is_integer_object(other) or is_float_object(other)): + # only integers and floats allowed + return NotImplemented + + return Timedelta(other * self.value, unit='ns') + + __rmul__ = __mul__ + + def __truediv__(self, other): + if hasattr(other, '_typ'): + # Series, DataFrame, ... + if other._typ == 'dateoffset' and hasattr(other, 'delta'): + # Tick offset + return self / other.delta + return NotImplemented + + elif is_timedelta64_object(other): + # convert to Timedelta below + pass + + elif util.is_nan(other): + # i.e. np.nan, but also catch np.float64("NaN") which would + # otherwise get caught by the hasattr(other, "dtype") branch + # incorrectly return a np.timedelta64 object. + return NaT + + elif hasattr(other, 'dtype'): + return self.to_timedelta64() / other + + elif is_integer_object(other) or is_float_object(other): + # integers or floats + return Timedelta(self.value / other, unit='ns') + + elif not _validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return np.nan + return self.value / float(other.value) + + def __rtruediv__(self, other): + if hasattr(other, '_typ'): + # Series, DataFrame, ... + if other._typ == 'dateoffset' and hasattr(other, 'delta'): + # Tick offset + return other.delta / self + return NotImplemented + + elif is_timedelta64_object(other): + # convert to Timedelta below + pass + + elif hasattr(other, 'dtype'): + return other / self.to_timedelta64() + + elif not _validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return NaT + return float(other.value) / self.value + + def __floordiv__(self, other): + # numpy does not implement floordiv for timedelta64 dtype, so we cannot + # just defer + if hasattr(other, '_typ'): + # Series, DataFrame, ... + if other._typ == 'dateoffset' and hasattr(other, 'delta'): + # Tick offset + return self // other.delta + return NotImplemented + + elif is_timedelta64_object(other): + # convert to Timedelta below + pass + + elif hasattr(other, 'dtype'): + if other.dtype.kind == 'm': + # also timedelta-like + return _broadcast_floordiv_td64(self.value, other, _floordiv) + elif other.dtype.kind in ['i', 'u', 'f']: + if other.ndim == 0: + return Timedelta(self.value // other) + else: + return self.to_timedelta64() // other + + raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') + + elif is_integer_object(other) or is_float_object(other): + return Timedelta(self.value // other, unit='ns') + + elif not _validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return np.nan + return self.value // other.value + + def __rfloordiv__(self, other): + # numpy does not implement floordiv for timedelta64 dtype, so we cannot + # just defer + if hasattr(other, '_typ'): + # Series, DataFrame, ... + if other._typ == 'dateoffset' and hasattr(other, 'delta'): + # Tick offset + return other.delta // self + return NotImplemented + + elif is_timedelta64_object(other): + # convert to Timedelta below + pass + + elif hasattr(other, 'dtype'): + if other.dtype.kind == 'm': + # also timedelta-like + return _broadcast_floordiv_td64(self.value, other, _rfloordiv) + + # Includes integer array // Timedelta, disallowed in GH#19761 + raise TypeError(f'Invalid dtype {other.dtype} for __floordiv__') + + elif is_float_object(other) and util.is_nan(other): + # i.e. np.nan + return NotImplemented + + elif not _validate_ops_compat(other): + return NotImplemented + + other = Timedelta(other) + if other is NaT: + return np.nan + return other.value // self.value + + def __mod__(self, other): + # Naive implementation, room for optimization + return self.__divmod__(other)[1] + + def __rmod__(self, other): + # Naive implementation, room for optimization + if hasattr(other, 'dtype') and other.dtype.kind == 'i': + # TODO: Remove this check with backwards-compat shim + # for integer / Timedelta is removed. + raise TypeError(f'Invalid dtype {other.dtype} for __mod__') + return self.__rdivmod__(other)[1] + + def __divmod__(self, other): + # Naive implementation, room for optimization + div = self // other + return div, self - div * other + + def __rdivmod__(self, other): + # Naive implementation, room for optimization + if hasattr(other, 'dtype') and other.dtype.kind == 'i': + # TODO: Remove this check with backwards-compat shim + # for integer / Timedelta is removed. + raise TypeError(f'Invalid dtype {other.dtype} for __mod__') + div = other // self + return div, other - div * self + + +cdef _floordiv(int64_t value, right): + return value // right + + +cdef _rfloordiv(int64_t value, right): + # analogous to referencing operator.div, but there is no operator.rfloordiv + return right // value + + +cdef _broadcast_floordiv_td64(int64_t value, object other, + object (*operation)(int64_t value, + object right)): + """Boilerplate code shared by Timedelta.__floordiv__ and + Timedelta.__rfloordiv__ because np.timedelta64 does not implement these. + + Parameters + ---------- + value : int64_t; `self.value` from a Timedelta object + other : object + operation : function, either _floordiv or _rfloordiv + + Returns + ------- + result : varies based on `other` + """ + # assumes other.dtype.kind == 'm', i.e. other is timedelta-like + cdef: + int ndim = getattr(other, 'ndim', -1) + + # We need to watch out for np.timedelta64('NaT'). + mask = other.view('i8') == NPY_NAT + + if ndim == 0: + if mask: + return np.nan + + return operation(value, other.astype('m8[ns]').astype('i8')) + + else: + res = operation(value, other.astype('m8[ns]').astype('i8')) + + if mask.any(): + res = res.astype('f8') + res[mask] = np.nan + return res + + +# resolution in ns +Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1) +Timedelta.max = Timedelta(np.iinfo(np.int64).max) +Timedelta.resolution = Timedelta(nanoseconds=1) diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd new file mode 100644 index 00000000..b7282e02 --- /dev/null +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- + +from numpy cimport int64_t +from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct + +cdef object create_timestamp_from_ts(int64_t value, + npy_datetimestruct dts, + object tz, object freq) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx new file mode 100644 index 00000000..abe7f9e5 --- /dev/null +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -0,0 +1,1040 @@ +import sys +import warnings + +import numpy as np +cimport numpy as cnp +from numpy cimport int64_t +cnp.import_array() + +from datetime import time as datetime_time, timedelta +from cpython.datetime cimport (datetime, + PyTZInfo_Check, PyDateTime_IMPORT) +PyDateTime_IMPORT + +from pandas._libs.tslibs.util cimport ( + is_integer_object, is_offset_object) + +from pandas._libs.tslibs.c_timestamp cimport _Timestamp +cimport pandas._libs.tslibs.ccalendar as ccalendar +from pandas._libs.tslibs.ccalendar import DAY_SECONDS +from pandas._libs.tslibs.conversion import normalize_i8_timestamps +from pandas._libs.tslibs.conversion cimport ( + _TSObject, convert_to_tsobject, + convert_datetime_to_tsobject) +from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT +from pandas._libs.tslibs.np_datetime cimport ( + check_dts_bounds, npy_datetimestruct, dt64_to_dtstruct) +from pandas._libs.tslibs.offsets cimport to_offset +from pandas._libs.tslibs.timedeltas import Timedelta +from pandas._libs.tslibs.timezones cimport ( + is_utc, maybe_get_tz, treat_tz_as_pytz) +from pandas._libs.tslibs.timezones import UTC +from pandas._libs.tslibs.tzconversion import ( + tz_localize_to_utc, tz_convert_single) + +# ---------------------------------------------------------------------- +# Constants +_zero_time = datetime_time(0, 0) +_no_input = object() + +# ---------------------------------------------------------------------- + + +cdef inline object create_timestamp_from_ts(int64_t value, + npy_datetimestruct dts, + object tz, object freq): + """ convenience routine to construct a Timestamp from its parts """ + cdef _Timestamp ts_base + ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month, + dts.day, dts.hour, dts.min, + dts.sec, dts.us, tz) + ts_base.value = value + ts_base.freq = freq + ts_base.nanosecond = dts.ps // 1000 + + return ts_base + + +class RoundTo: + """ + enumeration defining the available rounding modes + + Attributes + ---------- + MINUS_INFTY + round towards -∞, or floor [2]_ + PLUS_INFTY + round towards +∞, or ceil [3]_ + NEAREST_HALF_EVEN + round to nearest, tie-break half to even [6]_ + NEAREST_HALF_MINUS_INFTY + round to nearest, tie-break half to -∞ [5]_ + NEAREST_HALF_PLUS_INFTY + round to nearest, tie-break half to +∞ [4]_ + + + References + ---------- + .. [1] "Rounding - Wikipedia" + https://en.wikipedia.org/wiki/Rounding + .. [2] "Rounding down" + https://en.wikipedia.org/wiki/Rounding#Rounding_down + .. [3] "Rounding up" + https://en.wikipedia.org/wiki/Rounding#Rounding_up + .. [4] "Round half up" + https://en.wikipedia.org/wiki/Rounding#Round_half_up + .. [5] "Round half down" + https://en.wikipedia.org/wiki/Rounding#Round_half_down + .. [6] "Round half to even" + https://en.wikipedia.org/wiki/Rounding#Round_half_to_even + """ + @property + def MINUS_INFTY(self) -> int: + return 0 + + @property + def PLUS_INFTY(self) -> int: + return 1 + + @property + def NEAREST_HALF_EVEN(self) -> int: + return 2 + + @property + def NEAREST_HALF_PLUS_INFTY(self) -> int: + return 3 + + @property + def NEAREST_HALF_MINUS_INFTY(self) -> int: + return 4 + + +cdef inline _floor_int64(values, unit): + return values - np.remainder(values, unit) + +cdef inline _ceil_int64(values, unit): + return values + np.remainder(-values, unit) + +cdef inline _rounddown_int64(values, unit): + return _ceil_int64(values - unit//2, unit) + +cdef inline _roundup_int64(values, unit): + return _floor_int64(values + unit//2, unit) + + +def round_nsint64(values, mode, freq): + """ + Applies rounding mode at given frequency + + Parameters + ---------- + values : :obj:`ndarray` + mode : instance of `RoundTo` enumeration + freq : str, obj + + Returns + ------- + :obj:`ndarray` + """ + + unit = to_offset(freq).nanos + + if mode == RoundTo.MINUS_INFTY: + return _floor_int64(values, unit) + elif mode == RoundTo.PLUS_INFTY: + return _ceil_int64(values, unit) + elif mode == RoundTo.NEAREST_HALF_MINUS_INFTY: + return _rounddown_int64(values, unit) + elif mode == RoundTo.NEAREST_HALF_PLUS_INFTY: + return _roundup_int64(values, unit) + elif mode == RoundTo.NEAREST_HALF_EVEN: + # for odd unit there is no need of a tie break + if unit % 2: + return _rounddown_int64(values, unit) + quotient, remainder = np.divmod(values, unit) + mask = np.logical_or( + remainder > (unit // 2), + np.logical_and(remainder == (unit // 2), quotient % 2) + ) + quotient[mask] += 1 + return quotient * unit + + # if/elif above should catch all rounding modes defined in enum 'RoundTo': + # if flow of control arrives here, it is a bug + raise ValueError("round_nsint64 called with an unrecognized " + "rounding mode") + + +# ---------------------------------------------------------------------- + +# Python front end to C extension type _Timestamp +# This serves as the box for datetime64 + + +class Timestamp(_Timestamp): + """ + Pandas replacement for python datetime.datetime object. + + Timestamp is the pandas equivalent of python's Datetime + and is interchangeable with it in most cases. It's the type used + for the entries that make up a DatetimeIndex, and other timeseries + oriented data structures in pandas. + + Parameters + ---------- + ts_input : datetime-like, str, int, float + Value to be converted to Timestamp. + freq : str, DateOffset + Offset which Timestamp will have. + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will have. + unit : str + Unit used for conversion if ts_input is of type int or float. The + valid values are 'D', 'h', 'm', 's', 'ms', 'us', and 'ns'. For + example, 's' means seconds and 'ms' means milliseconds. + year, month, day : int + hour, minute, second, microsecond : int, optional, default 0 + nanosecond : int, optional, default 0 + .. versionadded:: 0.23.0 + tzinfo : datetime.tzinfo, optional, default None + + Notes + ----- + There are essentially three calling conventions for the constructor. The + primary form accepts four parameters. They can be passed by position or + keyword. + + The other two forms mimic the parameters from ``datetime.datetime``. They + can be passed by either position or keyword, but not both mixed together. + + Examples + -------- + Using the primary calling convention: + + This converts a datetime-like string + + >>> pd.Timestamp('2017-01-01T12') + Timestamp('2017-01-01 12:00:00') + + This converts a float representing a Unix epoch in units of seconds + + >>> pd.Timestamp(1513393355.5, unit='s') + Timestamp('2017-12-16 03:02:35.500000') + + This converts an int representing a Unix-epoch in units of seconds + and for a particular timezone + + >>> pd.Timestamp(1513393355, unit='s', tz='US/Pacific') + Timestamp('2017-12-15 19:02:35-0800', tz='US/Pacific') + + Using the other two forms that mimic the API for ``datetime.datetime``: + + >>> pd.Timestamp(2017, 1, 1, 12) + Timestamp('2017-01-01 12:00:00') + + >>> pd.Timestamp(year=2017, month=1, day=1, hour=12) + Timestamp('2017-01-01 12:00:00') + """ + + @classmethod + def fromordinal(cls, ordinal, freq=None, tz=None): + """ + Timestamp.fromordinal(ordinal, freq=None, tz=None) + + Passed an ordinal, translate and convert to a ts. + Note: by definition there cannot be any tz info on the ordinal itself. + + Parameters + ---------- + ordinal : int + Date corresponding to a proleptic Gregorian ordinal. + freq : str, DateOffset + Offset to apply to the Timestamp. + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for the Timestamp. + """ + return cls(datetime.fromordinal(ordinal), + freq=freq, tz=tz) + + @classmethod + def now(cls, tz=None): + """ + Timestamp.now(tz=None) + + Return new Timestamp object representing current time local to + tz. + + Parameters + ---------- + tz : str or timezone object, default None + Timezone to localize to. + """ + if isinstance(tz, str): + tz = maybe_get_tz(tz) + return cls(datetime.now(tz)) + + @classmethod + def today(cls, tz=None): + """ + Timestamp.today(cls, tz=None) + + Return the current time in the local timezone. This differs + from datetime.today() in that it can be localized to a + passed timezone. + + Parameters + ---------- + tz : str or timezone object, default None + Timezone to localize to. + """ + return cls.now(tz) + + @classmethod + def utcnow(cls): + """ + Timestamp.utcnow() + + Return a new Timestamp representing UTC day and time. + """ + return cls.now(UTC) + + @classmethod + def utcfromtimestamp(cls, ts): + """ + Timestamp.utcfromtimestamp(ts) + + Construct a naive UTC datetime from a POSIX timestamp. + """ + return cls(datetime.utcfromtimestamp(ts)) + + @classmethod + def fromtimestamp(cls, ts): + """ + Timestamp.fromtimestamp(ts) + + timestamp[, tz] -> tz's local time from POSIX timestamp. + """ + return cls(datetime.fromtimestamp(ts)) + + # Issue 25016. + @classmethod + def strptime(cls, date_string, format): + """ + Timestamp.strptime(string, format) + + Function is not implemented. Use pd.to_datetime(). + """ + raise NotImplementedError("Timestamp.strptime() is not implemented." + "Use to_datetime() to parse date strings.") + + @classmethod + def combine(cls, date, time): + """ + Timestamp.combine(date, time) + + date, time -> datetime with same date and time fields. + """ + return cls(datetime.combine(date, time)) + + def __new__( + cls, + object ts_input=_no_input, + object freq=None, + tz=None, + unit=None, + year=None, + month=None, + day=None, + hour=None, + minute=None, + second=None, + microsecond=None, + nanosecond=None, + tzinfo=None + ): + # The parameter list folds together legacy parameter names (the first + # four) and positional and keyword parameter names from pydatetime. + # + # There are three calling forms: + # + # - In the legacy form, the first parameter, ts_input, is required + # and may be datetime-like, str, int, or float. The second + # parameter, offset, is optional and may be str or DateOffset. + # + # - ints in the first, second, and third arguments indicate + # pydatetime positional arguments. Only the first 8 arguments + # (standing in for year, month, day, hour, minute, second, + # microsecond, tzinfo) may be non-None. As a shortcut, we just + # check that the second argument is an int. + # + # - Nones for the first four (legacy) arguments indicate pydatetime + # keyword arguments. year, month, and day are required. As a + # shortcut, we just check that the first argument was not passed. + # + # Mixing pydatetime positional and keyword arguments is forbidden! + + cdef _TSObject ts + + _date_attributes = [year, month, day, hour, minute, second, + microsecond, nanosecond] + + if tzinfo is not None: + if not PyTZInfo_Check(tzinfo): + # tzinfo must be a datetime.tzinfo object, GH#17690 + raise TypeError(f'tzinfo must be a datetime.tzinfo object, ' + f'not {type(tzinfo)}') + elif tz is not None: + raise ValueError('Can provide at most one of tz, tzinfo') + + # User passed tzinfo instead of tz; avoid silently ignoring + tz, tzinfo = tzinfo, None + + if isinstance(ts_input, str): + # User passed a date string to parse. + # Check that the user didn't also pass a date attribute kwarg. + if any(arg is not None for arg in _date_attributes): + raise ValueError('Cannot pass a date attribute keyword ' + 'argument when passing a date string') + + elif ts_input is _no_input: + # User passed keyword arguments. + ts_input = datetime(year, month, day, hour or 0, + minute or 0, second or 0, + microsecond or 0) + elif is_integer_object(freq): + # User passed positional arguments: + # Timestamp(year, month, day[, hour[, minute[, second[, + # microsecond[, nanosecond[, tzinfo]]]]]]) + ts_input = datetime(ts_input, freq, tz, unit or 0, + year or 0, month or 0, day or 0) + nanosecond = hour + tz = minute + freq = None + + if getattr(ts_input, 'tzinfo', None) is not None and tz is not None: + raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " + "the tz parameter. Use tz_convert instead.") + + ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0) + + if ts.value == NPY_NAT: + return NaT + + if freq is None: + # GH 22311: Try to extract the frequency of a given Timestamp input + freq = getattr(ts_input, 'freq', None) + elif not is_offset_object(freq): + freq = to_offset(freq) + + return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq) + + def _round(self, freq, mode, ambiguous='raise', nonexistent='raise'): + if self.tz is not None: + value = self.tz_localize(None).value + else: + value = self.value + + value = np.array([value], dtype=np.int64) + + # Will only ever contain 1 element for timestamp + r = round_nsint64(value, mode, freq)[0] + result = Timestamp(r, unit='ns') + if self.tz is not None: + result = result.tz_localize( + self.tz, ambiguous=ambiguous, nonexistent=nonexistent + ) + return result + + def round(self, freq, ambiguous='raise', nonexistent='raise'): + """ + Round the Timestamp to the specified resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the rounding resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + .. versionadded:: 0.24.0 + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + .. versionadded:: 0.24.0 + + Returns + ------- + a new Timestamp rounded to the given resolution of `freq` + + Raises + ------ + ValueError if the freq cannot be converted + """ + return self._round( + freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent + ) + + def floor(self, freq, ambiguous='raise', nonexistent='raise'): + """ + return a new Timestamp floored to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the flooring resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + .. versionadded:: 0.24.0 + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + .. versionadded:: 0.24.0 + + Raises + ------ + ValueError if the freq cannot be converted. + """ + return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) + + def ceil(self, freq, ambiguous='raise', nonexistent='raise'): + """ + return a new Timestamp ceiled to this resolution. + + Parameters + ---------- + freq : str + Frequency string indicating the ceiling resolution. + ambiguous : bool or {'raise', 'NaT'}, default 'raise' + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + .. versionadded:: 0.24.0 + nonexistent : {'raise', 'shift_forward', 'shift_backward, 'NaT', \ +timedelta}, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + .. versionadded:: 0.24.0 + + Raises + ------ + ValueError if the freq cannot be converted. + """ + return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) + + @property + def tz(self): + """ + Alias for tzinfo. + """ + return self.tzinfo + + @tz.setter + def tz(self, value): + # GH 3746: Prevent localizing or converting the index by setting tz + raise AttributeError("Cannot directly set timezone. Use tz_localize() " + "or tz_convert() as appropriate") + + def __setstate__(self, state): + self.value = state[0] + self.freq = state[1] + self.tzinfo = state[2] + + def __reduce__(self): + object_state = self.value, self.freq, self.tzinfo + return (Timestamp, object_state) + + def to_period(self, freq=None): + """ + Return an period of which this timestamp is an observation. + """ + from pandas import Period + + if self.tz is not None: + # GH#21333 + warnings.warn("Converting to Period representation will " + "drop timezone information.", + UserWarning) + + if freq is None: + freq = self.freq + + return Period(self, freq=freq) + + @property + def dayofweek(self): + """ + Return day of the week. + """ + return self.weekday() + + def day_name(self, locale=None) -> str: + """ + Return the day name of the Timestamp with specified locale. + + Parameters + ---------- + locale : string, default None (English locale) + Locale determining the language in which to return the day name. + + Returns + ------- + day_name : string + + .. versionadded:: 0.23.0 + """ + return self._get_date_name_field('day_name', locale) + + def month_name(self, locale=None) -> str: + """ + Return the month name of the Timestamp with specified locale. + + Parameters + ---------- + locale : string, default None (English locale) + Locale determining the language in which to return the month name. + + Returns + ------- + month_name : string + + .. versionadded:: 0.23.0 + """ + return self._get_date_name_field('month_name', locale) + + @property + def dayofyear(self): + """ + Return the day of the year. + """ + return ccalendar.get_day_of_year(self.year, self.month, self.day) + + @property + def week(self) -> int: + """ + Return the week number of the year. + """ + return ccalendar.get_week_of_year(self.year, self.month, self.day) + + weekofyear = week + + @property + def quarter(self) -> int: + """ + Return the quarter of the year. + """ + return ((self.month - 1) // 3) + 1 + + @property + def days_in_month(self): + """ + Return the number of days in the month. + """ + return ccalendar.get_days_in_month(self.year, self.month) + + daysinmonth = days_in_month + + @property + def freqstr(self): + """ + Return the total number of days in the month. + """ + return getattr(self.freq, 'freqstr', self.freq) + + @property + def is_month_start(self) -> bool: + """ + Return True if date is first day of month. + """ + if self.freq is None: + # fast-path for non-business frequencies + return self.day == 1 + return self._get_start_end_field('is_month_start') + + @property + def is_month_end(self) -> bool: + """ + Return True if date is last day of month. + """ + if self.freq is None: + # fast-path for non-business frequencies + return self.day == self.days_in_month + return self._get_start_end_field('is_month_end') + + @property + def is_quarter_start(self) -> bool: + """ + Return True if date is first day of the quarter. + """ + if self.freq is None: + # fast-path for non-business frequencies + return self.day == 1 and self.month % 3 == 1 + return self._get_start_end_field('is_quarter_start') + + @property + def is_quarter_end(self) -> bool: + """ + Return True if date is last day of the quarter. + """ + if self.freq is None: + # fast-path for non-business frequencies + return (self.month % 3) == 0 and self.day == self.days_in_month + return self._get_start_end_field('is_quarter_end') + + @property + def is_year_start(self) -> bool: + """ + Return True if date is first day of the year. + """ + if self.freq is None: + # fast-path for non-business frequencies + return self.day == self.month == 1 + return self._get_start_end_field('is_year_start') + + @property + def is_year_end(self) -> bool: + """ + Return True if date is last day of the year. + """ + if self.freq is None: + # fast-path for non-business frequencies + return self.month == 12 and self.day == 31 + return self._get_start_end_field('is_year_end') + + @property + def is_leap_year(self) -> bool: + """ + Return True if year is a leap year. + """ + return bool(ccalendar.is_leapyear(self.year)) + + def tz_localize(self, tz, ambiguous='raise', nonexistent='raise'): + """ + Convert naive Timestamp to local time zone, or remove + timezone from tz-aware Timestamp. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding local time. + + ambiguous : bool, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + The behavior is as follows: + + * bool contains flags to determine if time is dst or not (note + that this flag is only applicable for ambiguous fall dst dates). + * 'NaT' will return NaT for an ambiguous time. + * 'raise' will raise an AmbiguousTimeError for an ambiguous time. + + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ +default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + The behavior is as follows: + + * 'shift_forward' will shift the nonexistent time forward to the + closest existing time. + * 'shift_backward' will shift the nonexistent time backward to the + closest existing time. + * 'NaT' will return NaT where there are nonexistent times. + * timedelta objects will shift nonexistent times by the timedelta. + * 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + .. versionadded:: 0.24.0 + + Returns + ------- + localized : Timestamp + + Raises + ------ + TypeError + If the Timestamp is tz-aware and tz is not None. + """ + if ambiguous == 'infer': + raise ValueError('Cannot infer offset with only one time.') + + nonexistent_options = ('raise', 'NaT', 'shift_forward', + 'shift_backward') + if nonexistent not in nonexistent_options and not isinstance( + nonexistent, timedelta): + raise ValueError("The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or " + "a timedelta object") + + if self.tzinfo is None: + # tz naive, localize + tz = maybe_get_tz(tz) + if not isinstance(ambiguous, str): + ambiguous = [ambiguous] + value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz, + ambiguous=ambiguous, + nonexistent=nonexistent)[0] + return Timestamp(value, tz=tz, freq=self.freq) + else: + if tz is None: + # reset tz + value = tz_convert_single(self.value, UTC, self.tz) + return Timestamp(value, tz=tz, freq=self.freq) + else: + raise TypeError('Cannot localize tz-aware Timestamp, use ' + 'tz_convert for conversions') + + def tz_convert(self, tz): + """ + Convert tz-aware Timestamp to another time zone. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will be converted to. + None will remove timezone holding UTC time. + + Returns + ------- + converted : Timestamp + + Raises + ------ + TypeError + If Timestamp is tz-naive. + """ + if self.tzinfo is None: + # tz naive, use tz_localize + raise TypeError('Cannot convert tz-naive Timestamp, use ' + 'tz_localize to localize') + else: + # Same UTC timestamp, different time zone + return Timestamp(self.value, tz=tz, freq=self.freq) + + astimezone = tz_convert + + def replace(self, year=None, month=None, day=None, + hour=None, minute=None, second=None, microsecond=None, + nanosecond=None, tzinfo=object, fold=0): + """ + implements datetime.replace, handles nanoseconds. + + Parameters + ---------- + year : int, optional + month : int, optional + day : int, optional + hour : int, optional + minute : int, optional + second : int, optional + microsecond : int, optional + nanosecond : int, optional + tzinfo : tz-convertible, optional + fold : int, optional, default is 0 + + Returns + ------- + Timestamp with fields replaced + """ + + cdef: + npy_datetimestruct dts + int64_t value, value_tz, offset + object _tzinfo, result, k, v + datetime ts_input + + # set to naive if needed + _tzinfo = self.tzinfo + value = self.value + if _tzinfo is not None: + value_tz = tz_convert_single(value, _tzinfo, UTC) + value += value - value_tz + + # setup components + dt64_to_dtstruct(value, &dts) + dts.ps = self.nanosecond * 1000 + + # replace + def validate(k, v): + """ validate integers """ + if not is_integer_object(v): + raise ValueError(f"value must be an integer, received " + f"{type(v)} for {k}") + return v + + if year is not None: + dts.year = validate('year', year) + if month is not None: + dts.month = validate('month', month) + if day is not None: + dts.day = validate('day', day) + if hour is not None: + dts.hour = validate('hour', hour) + if minute is not None: + dts.min = validate('minute', minute) + if second is not None: + dts.sec = validate('second', second) + if microsecond is not None: + dts.us = validate('microsecond', microsecond) + if nanosecond is not None: + dts.ps = validate('nanosecond', nanosecond) * 1000 + if tzinfo is not object: + _tzinfo = tzinfo + + # reconstruct & check bounds + if _tzinfo is not None and treat_tz_as_pytz(_tzinfo): + # replacing across a DST boundary may induce a new tzinfo object + # see GH#18319 + ts_input = _tzinfo.localize(datetime(dts.year, dts.month, dts.day, + dts.hour, dts.min, dts.sec, + dts.us), + is_dst=not bool(fold)) + _tzinfo = ts_input.tzinfo + else: + kwargs = {'year': dts.year, 'month': dts.month, 'day': dts.day, + 'hour': dts.hour, 'minute': dts.min, 'second': dts.sec, + 'microsecond': dts.us, 'tzinfo': _tzinfo, + 'fold': fold} + ts_input = datetime(**kwargs) + + ts = convert_datetime_to_tsobject(ts_input, _tzinfo) + value = ts.value + (dts.ps // 1000) + if value != NPY_NAT: + check_dts_bounds(&dts) + + return create_timestamp_from_ts(value, dts, _tzinfo, self.freq) + + def isoformat(self, sep='T'): + base = super(_Timestamp, self).isoformat(sep=sep) + if self.nanosecond == 0: + return base + + if self.tzinfo is not None: + base1, base2 = base[:-6], base[-6:] + else: + base1, base2 = base, "" + + if self.microsecond != 0: + base1 += f"{self.nanosecond:03d}" + else: + base1 += f".{self.nanosecond:09d}" + + return base1 + base2 + + def _has_time_component(self) -> bool: + """ + Returns if the Timestamp has a time component + in addition to the date part + """ + return (self.time() != _zero_time + or self.tzinfo is not None + or self.nanosecond != 0) + + def to_julian_date(self): + """ + Convert TimeStamp to a Julian Date. + 0 Julian date is noon January 1, 4713 BC. + """ + year = self.year + month = self.month + day = self.day + if month <= 2: + year -= 1 + month += 12 + return (day + + np.fix((153 * month - 457) / 5) + + 365 * year + + np.floor(year / 4) - + np.floor(year / 100) + + np.floor(year / 400) + + 1721118.5 + + (self.hour + + self.minute / 60.0 + + self.second / 3600.0 + + self.microsecond / 3600.0 / 1e+6 + + self.nanosecond / 3600.0 / 1e+9 + ) / 24.0) + + def normalize(self): + """ + Normalize Timestamp to midnight, preserving + tz information. + """ + if self.tz is None or is_utc(self.tz): + DAY_NS = DAY_SECONDS * 1000000000 + normalized_value = self.value - (self.value % DAY_NS) + return Timestamp(normalized_value).tz_localize(self.tz) + normalized_value = normalize_i8_timestamps( + np.array([self.value], dtype='i8'), tz=self.tz)[0] + return Timestamp(normalized_value).tz_localize(self.tz) + + def __radd__(self, other): + # __radd__ on cython extension types like _Timestamp is not used, so + # define it here instead + return self + other + + +# Add the min and max fields at the class level +cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max +# the smallest value we could actually represent is +# INT64_MIN + 1 == -9223372036854775807 +# but to allow overflow free conversion with a microsecond resolution +# use the smallest value with a 0 nanosecond unit (0s in last 3 digits) +cdef int64_t _NS_LOWER_BOUND = -9223372036854775000 + +# Resolution is in nanoseconds +Timestamp.min = Timestamp(_NS_LOWER_BOUND) +Timestamp.max = Timestamp(_NS_UPPER_BOUND) +Timestamp.resolution = Timedelta(nanoseconds=1) # GH#21336, GH#21365 diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd new file mode 100644 index 00000000..50c4a41f --- /dev/null +++ b/pandas/_libs/tslibs/timezones.pxd @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- + +cpdef bint is_utc(object tz) +cdef bint is_tzlocal(object tz) + +cdef bint treat_tz_as_pytz(object tz) +cdef bint treat_tz_as_dateutil(object tz) + +cpdef bint tz_compare(object start, object end) +cpdef object get_timezone(object tz) +cpdef object maybe_get_tz(object tz) + +cdef get_utcoffset(tzinfo, obj) +cdef bint is_fixed_offset(object tz) + +cdef object get_dst_info(object tz) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx new file mode 100644 index 00000000..35ee87e7 --- /dev/null +++ b/pandas/_libs/tslibs/timezones.pyx @@ -0,0 +1,358 @@ +from datetime import timezone + +# dateutil compat +from dateutil.tz import ( + tzutc as _dateutil_tzutc, + tzlocal as _dateutil_tzlocal, + tzfile as _dateutil_tzfile) + +from dateutil.tz import gettz as dateutil_gettz + +from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo +import pytz +UTC = pytz.utc + + +import numpy as np +cimport numpy as cnp +from numpy cimport int64_t +cnp.import_array() + +# ---------------------------------------------------------------------- +from pandas._libs.tslibs.util cimport is_integer_object, get_nat + +cdef int64_t NPY_NAT = get_nat() +cdef object utc_stdlib = timezone.utc + +# ---------------------------------------------------------------------- + +cpdef inline bint is_utc(object tz): + return tz is UTC or tz is utc_stdlib or isinstance(tz, _dateutil_tzutc) + + +cdef inline bint is_tzlocal(object tz): + return isinstance(tz, _dateutil_tzlocal) + + +cdef inline bint treat_tz_as_pytz(object tz): + return (hasattr(tz, '_utc_transition_times') and + hasattr(tz, '_transition_info')) + + +cdef inline bint treat_tz_as_dateutil(object tz): + return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx') + + +cpdef inline object get_timezone(object tz): + """ + We need to do several things here: + 1) Distinguish between pytz and dateutil timezones + 2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone* + but a different tz object) + 3) Provide something to serialize when we're storing a datetime object + in pytables. + + We return a string prefaced with dateutil if it's a dateutil tz, else just + the tz name. It needs to be a string so that we can serialize it with + UJSON/pytables. maybe_get_tz (below) is the inverse of this process. + """ + if is_utc(tz): + return tz + else: + if treat_tz_as_dateutil(tz): + if '.tar.gz' in tz._filename: + raise ValueError( + 'Bad tz filename. Dateutil on python 3 on windows has a ' + 'bug which causes tzfile._filename to be the same for all ' + 'timezone files. Please construct dateutil timezones ' + 'implicitly by passing a string like "dateutil/Europe' + '/London" when you construct your pandas objects instead ' + 'of passing a timezone object. See ' + 'https://github.com/pandas-dev/pandas/pull/7362') + return 'dateutil/' + tz._filename + else: + # tz is a pytz timezone or unknown. + try: + zone = tz.zone + if zone is None: + return tz + return zone + except AttributeError: + return tz + + +cpdef inline object maybe_get_tz(object tz): + """ + (Maybe) Construct a timezone object from a string. If tz is a string, use + it to construct a timezone object. Otherwise, just return tz. + """ + if isinstance(tz, str): + if tz == 'tzlocal()': + tz = _dateutil_tzlocal() + elif tz.startswith('dateutil/'): + zone = tz[9:] + tz = dateutil_gettz(zone) + # On Python 3 on Windows, the filename is not always set correctly. + if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename: + tz._filename = zone + else: + tz = pytz.timezone(tz) + elif is_integer_object(tz): + tz = pytz.FixedOffset(tz / 60) + return tz + + +def _p_tz_cache_key(tz): + """ Python interface for cache function to facilitate testing.""" + return tz_cache_key(tz) + + +# Timezone data caches, key is the pytz string or dateutil file name. +dst_cache = {} + + +cdef inline object tz_cache_key(object tz): + """ + Return the key in the cache for the timezone info object or None + if unknown. + + The key is currently the tz string for pytz timezones, the filename for + dateutil timezones. + + Notes + ===== + This cannot just be the hash of a timezone object. Unfortunately, the + hashes of two dateutil tz objects which represent the same timezone are + not equal (even though the tz objects will compare equal and represent + the same tz file). Also, pytz objects are not always hashable so we use + str(tz) instead. + """ + if isinstance(tz, _pytz_BaseTzInfo): + return tz.zone + elif isinstance(tz, _dateutil_tzfile): + if '.tar.gz' in tz._filename: + raise ValueError('Bad tz filename. Dateutil on python 3 on ' + 'windows has a bug which causes tzfile._filename ' + 'to be the same for all timezone files. Please ' + 'construct dateutil timezones implicitly by ' + 'passing a string like "dateutil/Europe/London" ' + 'when you construct your pandas objects instead ' + 'of passing a timezone object. See ' + 'https://github.com/pandas-dev/pandas/pull/7362') + return 'dateutil' + tz._filename + else: + return None + + +# ---------------------------------------------------------------------- +# UTC Offsets + + +cdef get_utcoffset(tzinfo, obj): + try: + return tzinfo._utcoffset + except AttributeError: + return tzinfo.utcoffset(obj) + + +cdef inline bint is_fixed_offset(object tz): + if treat_tz_as_dateutil(tz): + if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0: + return 1 + else: + return 0 + elif treat_tz_as_pytz(tz): + if (len(tz._transition_info) == 0 + and len(tz._utc_transition_times) == 0): + return 1 + else: + return 0 + # This also implicitly accepts datetime.timezone objects which are + # considered fixed + return 1 + + +cdef object get_utc_trans_times_from_dateutil_tz(object tz): + """ + Transition times in dateutil timezones are stored in local non-dst + time. This code converts them to UTC. It's the reverse of the code + in dateutil.tz.tzfile.__init__. + """ + new_trans = list(tz._trans_list) + last_std_offset = 0 + for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)): + if not tti.isdst: + last_std_offset = tti.offset + new_trans[i] = trans - last_std_offset + return new_trans + + +cdef int64_t[:] unbox_utcoffsets(object transinfo): + cdef: + Py_ssize_t i, sz + int64_t[:] arr + + sz = len(transinfo) + arr = np.empty(sz, dtype='i8') + + for i in range(sz): + arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000 + + return arr + + +# ---------------------------------------------------------------------- +# Daylight Savings + + +cdef object get_dst_info(object tz): + """ + return a tuple of : + (UTC times of DST transitions, + UTC offsets in microseconds corresponding to DST transitions, + string of type of transitions) + + """ + cache_key = tz_cache_key(tz) + if cache_key is None: + # e.g. pytz.FixedOffset, matplotlib.dates._UTC, + # psycopg2.tz.FixedOffsetTimezone + num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000 + return (np.array([NPY_NAT + 1], dtype=np.int64), + np.array([num], dtype=np.int64), + None) + + if cache_key not in dst_cache: + if treat_tz_as_pytz(tz): + trans = np.array(tz._utc_transition_times, dtype='M8[ns]') + trans = trans.view('i8') + if tz._utc_transition_times[0].year == 1: + trans[0] = NPY_NAT + 1 + deltas = unbox_utcoffsets(tz._transition_info) + typ = 'pytz' + + elif treat_tz_as_dateutil(tz): + if len(tz._trans_list): + # get utc trans times + trans_list = get_utc_trans_times_from_dateutil_tz(tz) + trans = np.hstack([ + np.array([0], dtype='M8[s]'), # place holder for 1st item + np.array(trans_list, dtype='M8[s]')]).astype( + 'M8[ns]') # all trans listed + trans = trans.view('i8') + trans[0] = NPY_NAT + 1 + + # deltas + deltas = np.array([v.offset for v in ( + tz._ttinfo_before,) + tz._trans_idx], dtype='i8') + deltas *= 1000000000 + typ = 'dateutil' + + elif is_fixed_offset(tz): + trans = np.array([NPY_NAT + 1], dtype=np.int64) + deltas = np.array([tz._ttinfo_std.offset], + dtype='i8') * 1000000000 + typ = 'fixed' + else: + # 2018-07-12 this is not reached in the tests, and this case + # is not handled in any of the functions that call + # get_dst_info. If this case _were_ hit the calling + # functions would then hit an IndexError because they assume + # `deltas` is non-empty. + # (under the just-deleted code that returned empty arrays) + raise AssertionError("dateutil tzinfo is not a FixedOffset " + "and has an empty `_trans_list`.", tz) + + else: + # static tzinfo + # TODO: This case is not hit in tests (2018-07-17); is it possible? + trans = np.array([NPY_NAT + 1], dtype=np.int64) + num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000 + deltas = np.array([num], dtype=np.int64) + typ = 'static' + + dst_cache[cache_key] = (trans, deltas, typ) + + return dst_cache[cache_key] + + +def infer_tzinfo(start, end): + if start is not None and end is not None: + tz = start.tzinfo + if not tz_compare(tz, end.tzinfo): + raise AssertionError(f'Inputs must both have the same timezone, ' + f'{tz} != {end.tzinfo}') + elif start is not None: + tz = start.tzinfo + elif end is not None: + tz = end.tzinfo + else: + tz = None + return tz + + +cpdef bint tz_compare(object start, object end): + """ + Compare string representations of timezones + + The same timezone can be represented as different instances of + timezones. For example + `` and + `` are essentially same + timezones but aren't evaluated such, but the string representation + for both of these is `'Europe/Paris'`. + + This exists only to add a notion of equality to pytz-style zones + that is compatible with the notion of equality expected of tzinfo + subclasses. + + Parameters + ---------- + start : tzinfo + end : tzinfo + + Returns: + ------- + compare : bint + + """ + # GH 18523 + return get_timezone(start) == get_timezone(end) + + +def tz_standardize(tz: object): + """ + If the passed tz is a pytz timezone object, "normalize" it to the a + consistent version + + Parameters + ---------- + tz : tz object + + Returns: + ------- + tz object + + Examples: + -------- + >>> tz + + + >>> tz_standardize(tz) + + + >>> tz + + + >>> tz_standardize(tz) + + + >>> tz + dateutil.tz.tz.tzutc + + >>> tz_standardize(tz) + dateutil.tz.tz.tzutc + """ + if treat_tz_as_pytz(tz): + return pytz.timezone(str(tz)) + return tz diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd new file mode 100644 index 00000000..9c86057b --- /dev/null +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -0,0 +1,7 @@ +from cpython.datetime cimport tzinfo +from numpy cimport int64_t + + +cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz) +cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=*) +cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2) diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx new file mode 100644 index 00000000..b368f0fd --- /dev/null +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -0,0 +1,541 @@ +""" +timezone conversion +""" +import cython +from cython import Py_ssize_t + +from cpython.datetime cimport ( + PyDateTime_IMPORT, PyDelta_Check, datetime, tzinfo) +PyDateTime_IMPORT + +import pytz +from dateutil.tz import tzutc + +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, int64_t, uint8_t, intp_t +cnp.import_array() + +from pandas._libs.tslibs.ccalendar import DAY_SECONDS, HOUR_SECONDS +from pandas._libs.tslibs.nattype cimport NPY_NAT +from pandas._libs.tslibs.np_datetime cimport ( + npy_datetimestruct, dt64_to_dtstruct) +from pandas._libs.tslibs.timedeltas cimport delta_to_nanoseconds +from pandas._libs.tslibs.timezones cimport ( + get_dst_info, is_tzlocal, is_utc, get_timezone, get_utcoffset) + + +# TODO: cdef scalar version to call from convert_str_to_tsobject +@cython.boundscheck(False) +@cython.wraparound(False) +def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, + object nonexistent=None): + """ + Localize tzinfo-naive i8 to given time zone (using pytz). If + there are ambiguities in the values, raise AmbiguousTimeError. + + Parameters + ---------- + vals : ndarray[int64_t] + tz : tzinfo or None + ambiguous : str, bool, or arraylike + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from 03:00 + DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC + and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter + dictates how ambiguous times should be handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for ambiguous + times, but the array must have the same length as vals) + - bool if True, treat all vals as DST. If False, treat them as non-DST + - 'NaT' will return NaT where there are ambiguous times + + nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", \ +timedelta-like} + How to handle non-existent times when converting wall times to UTC + + .. versionadded:: 0.24.0 + + Returns + ------- + localized : ndarray[int64_t] + """ + cdef: + int64_t[:] deltas, idx_shifted, idx_shifted_left, idx_shifted_right + ndarray[uint8_t, cast=True] ambiguous_array, both_nat, both_eq + Py_ssize_t i, idx, pos, ntrans, n = len(vals) + Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right + int64_t *tdata + int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins + int64_t first_delta + int64_t HOURS_NS = HOUR_SECONDS * 1000000000, shift_delta = 0 + ndarray[int64_t] trans, result, result_a, result_b, dst_hours, delta + ndarray trans_idx, grp, a_idx, b_idx, one_diff + npy_datetimestruct dts + bint infer_dst = False, is_dst = False, fill = False + bint shift_forward = False, shift_backward = False + bint fill_nonexist = False + list trans_grp + str stamp + + # Vectorized version of DstTzInfo.localize + if is_utc(tz) or tz is None: + return vals + + result = np.empty(n, dtype=np.int64) + + if is_tzlocal(tz): + for i in range(n): + v = vals[i] + if v == NPY_NAT: + result[i] = NPY_NAT + else: + result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True) + return result + + # silence false-positive compiler warning + ambiguous_array = np.empty(0, dtype=bool) + if isinstance(ambiguous, str): + if ambiguous == 'infer': + infer_dst = True + elif ambiguous == 'NaT': + fill = True + elif isinstance(ambiguous, bool): + is_dst = True + if ambiguous: + ambiguous_array = np.ones(len(vals), dtype=bool) + else: + ambiguous_array = np.zeros(len(vals), dtype=bool) + elif hasattr(ambiguous, '__iter__'): + is_dst = True + if len(ambiguous) != len(vals): + raise ValueError("Length of ambiguous bool-array must be " + "the same size as vals") + ambiguous_array = np.asarray(ambiguous, dtype=bool) + + if nonexistent == 'NaT': + fill_nonexist = True + elif nonexistent == 'shift_forward': + shift_forward = True + elif nonexistent == 'shift_backward': + shift_backward = True + elif PyDelta_Check(nonexistent): + shift_delta = delta_to_nanoseconds(nonexistent) + elif nonexistent not in ('raise', None): + msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', " + "shift_backwards} or a timedelta object") + raise ValueError(msg) + + trans, deltas, _ = get_dst_info(tz) + + tdata = cnp.PyArray_DATA(trans) + ntrans = len(trans) + + # Determine whether each date lies left of the DST transition (store in + # result_a) or right of the DST transition (store in result_b) + result_a = np.empty(n, dtype=np.int64) + result_b = np.empty(n, dtype=np.int64) + result_a[:] = NPY_NAT + result_b[:] = NPY_NAT + + idx_shifted_left = (np.maximum(0, trans.searchsorted( + vals - DAY_SECONDS * 1000000000, side='right') - 1)).astype(np.int64) + + idx_shifted_right = (np.maximum(0, trans.searchsorted( + vals + DAY_SECONDS * 1000000000, side='right') - 1)).astype(np.int64) + + for i in range(n): + val = vals[i] + v_left = val - deltas[idx_shifted_left[i]] + pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1 + # timestamp falls to the left side of the DST transition + if v_left + deltas[pos_left] == val: + result_a[i] = v_left + + v_right = val - deltas[idx_shifted_right[i]] + pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1 + # timestamp falls to the right side of the DST transition + if v_right + deltas[pos_right] == val: + result_b[i] = v_right + + # silence false-positive compiler warning + dst_hours = np.empty(0, dtype=np.int64) + if infer_dst: + dst_hours = np.empty(n, dtype=np.int64) + dst_hours[:] = NPY_NAT + + # Get the ambiguous hours (given the above, these are the hours + # where result_a != result_b and neither of them are NAT) + both_nat = np.logical_and(result_a != NPY_NAT, result_b != NPY_NAT) + both_eq = result_a == result_b + trans_idx = np.squeeze(np.nonzero(np.logical_and(both_nat, ~both_eq))) + if trans_idx.size == 1: + stamp = _render_tstamp(vals[trans_idx]) + raise pytz.AmbiguousTimeError( + f"Cannot infer dst time from {stamp} as there " + f"are no repeated times") + # Split the array into contiguous chunks (where the difference between + # indices is 1). These are effectively dst transitions in different + # years which is useful for checking that there is not an ambiguous + # transition in an individual year. + if trans_idx.size > 0: + one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1 + trans_grp = np.array_split(trans_idx, one_diff) + + # Iterate through each day, if there are no hours where the + # delta is negative (indicates a repeat of hour) the switch + # cannot be inferred + for grp in trans_grp: + + delta = np.diff(result_a[grp]) + if grp.size == 1 or np.all(delta > 0): + stamp = _render_tstamp(vals[grp[0]]) + raise pytz.AmbiguousTimeError(stamp) + + # Find the index for the switch and pull from a for dst and b + # for standard + switch_idx = (delta <= 0).nonzero()[0] + if switch_idx.size > 1: + raise pytz.AmbiguousTimeError( + f"There are {switch_idx.size} dst switches when " + f"there should only be 1.") + switch_idx = switch_idx[0] + 1 + # Pull the only index and adjust + a_idx = grp[:switch_idx] + b_idx = grp[switch_idx:] + dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) + + for i in range(n): + val = vals[i] + left = result_a[i] + right = result_b[i] + if val == NPY_NAT: + result[i] = val + elif left != NPY_NAT and right != NPY_NAT: + if left == right: + result[i] = left + else: + if infer_dst and dst_hours[i] != NPY_NAT: + result[i] = dst_hours[i] + elif is_dst: + if ambiguous_array[i]: + result[i] = left + else: + result[i] = right + elif fill: + result[i] = NPY_NAT + else: + stamp = _render_tstamp(val) + raise pytz.AmbiguousTimeError( + f"Cannot infer dst time from {stamp}, try using the " + f"'ambiguous' argument") + elif left != NPY_NAT: + result[i] = left + elif right != NPY_NAT: + result[i] = right + else: + # Handle nonexistent times + if shift_forward or shift_backward or shift_delta != 0: + # Shift the nonexistent time to the closest existing time + remaining_mins = val % HOURS_NS + if shift_delta != 0: + # Validate that we don't relocalize on another nonexistent + # time + if -1 < shift_delta + remaining_mins < HOURS_NS: + raise ValueError( + f"The provided timedelta will relocalize on a " + f"nonexistent time: {nonexistent}" + ) + new_local = val + shift_delta + elif shift_forward: + new_local = val + (HOURS_NS - remaining_mins) + else: + # Subtract 1 since the beginning hour is _inclusive_ of + # nonexistent times + new_local = val - remaining_mins - 1 + delta_idx = trans.searchsorted(new_local, side='right') + # Shift the delta_idx by if the UTC offset of + # the target tz is greater than 0 and we're moving forward + # or vice versa + first_delta = deltas[0] + if (shift_forward or shift_delta > 0) and first_delta > 0: + delta_idx_offset = 1 + elif (shift_backward or shift_delta < 0) and first_delta < 0: + delta_idx_offset = 1 + else: + delta_idx_offset = 0 + delta_idx = delta_idx - delta_idx_offset + result[i] = new_local - deltas[delta_idx] + elif fill_nonexist: + result[i] = NPY_NAT + else: + stamp = _render_tstamp(val) + raise pytz.NonExistentTimeError(stamp) + + return result + + +cdef inline Py_ssize_t bisect_right_i8(int64_t *data, + int64_t val, Py_ssize_t n): + cdef: + Py_ssize_t pivot, left = 0, right = n + + assert n >= 1 + + # edge cases + if val > data[n - 1]: + return n + + if val < data[0]: + return 0 + + while left < right: + pivot = left + (right - left) // 2 + + if data[pivot] <= val: + left = pivot + 1 + else: + right = pivot + + return left + + +cdef inline str _render_tstamp(int64_t val): + """ Helper function to render exception messages""" + from pandas._libs.tslibs.timestamps import Timestamp + return str(Timestamp(val)) + + +# ---------------------------------------------------------------------- +# Timezone Conversion + +cdef int64_t tz_convert_utc_to_tzlocal(int64_t utc_val, tzinfo tz): + """ + Parameters + ---------- + utc_val : int64_t + tz : tzinfo + + Returns + ------- + local_val : int64_t + """ + return _tz_convert_tzlocal_utc(utc_val, tz, to_utc=False) + + +cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): + """ + Convert the val (in i8) from timezone1 to timezone2 + + This is a single timezone version of tz_convert + + Parameters + ---------- + val : int64 + tz1 : string / timezone object + tz2 : string / timezone object + + Returns + ------- + converted: int64 + """ + cdef: + int64_t[:] deltas + Py_ssize_t pos + int64_t v, offset, utc_date + npy_datetimestruct dts + int64_t arr[1] + + # See GH#17734 We should always be converting either from UTC or to UTC + assert is_utc(tz1) or is_utc(tz2) + + if val == NPY_NAT: + return val + + # Convert to UTC + if is_tzlocal(tz1): + utc_date = _tz_convert_tzlocal_utc(val, tz1, to_utc=True) + elif not is_utc(get_timezone(tz1)): + arr[0] = val + utc_date = _tz_convert_dst(arr, tz1, to_utc=True)[0] + else: + utc_date = val + + if is_utc(get_timezone(tz2)): + return utc_date + elif is_tzlocal(tz2): + return _tz_convert_tzlocal_utc(utc_date, tz2, to_utc=False) + else: + # Convert UTC to other timezone + arr[0] = utc_date + # Note: at least with cython 0.28.3, doing a lookup `[0]` in the next + # line is sensitive to the declared return type of _tz_convert_dst; + # if it is declared as returning ndarray[int64_t], a compile-time error + # is raised. + return _tz_convert_dst(arr, tz2, to_utc=False)[0] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def tz_convert(int64_t[:] vals, object tz1, object tz2): + """ + Convert the values (in i8) from timezone1 to timezone2 + + Parameters + ---------- + vals : int64 ndarray + tz1 : string / timezone object + tz2 : string / timezone object + + Returns + ------- + int64 ndarray of converted + """ + cdef: + int64_t[:] utc_dates, converted + + if len(vals) == 0: + return np.array([], dtype=np.int64) + + # Convert to UTC + utc_dates = _tz_convert_one_way(vals, tz1, to_utc=True) + converted = _tz_convert_one_way(utc_dates, tz2, to_utc=False) + return np.array(converted, dtype=np.int64) + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz, bint to_utc): + """ + Convert the given values (in i8) either to UTC or from UTC. + + Parameters + ---------- + vals : int64 ndarray + tz1 : string / timezone object + to_utc : bint + + Returns + ------- + converted : ndarray[int64_t] + """ + cdef: + int64_t[:] converted, result + Py_ssize_t i, n = len(vals) + int64_t val + + if not is_utc(get_timezone(tz)): + converted = np.empty(n, dtype=np.int64) + if is_tzlocal(tz): + for i in range(n): + val = vals[i] + if val == NPY_NAT: + converted[i] = NPY_NAT + else: + converted[i] = _tz_convert_tzlocal_utc(val, tz, to_utc) + else: + converted = _tz_convert_dst(vals, tz, to_utc) + else: + converted = vals + + return converted + + +cdef int64_t _tz_convert_tzlocal_utc(int64_t val, tzinfo tz, bint to_utc=True): + """ + Convert the i8 representation of a datetime from a tzlocal timezone to + UTC, or vice-versa. + + Private, not intended for use outside of tslibs.conversion + + Parameters + ---------- + val : int64_t + tz : tzinfo + to_utc : bint + True if converting tzlocal _to_ UTC, False if going the other direction + + Returns + ------- + result : int64_t + """ + cdef: + npy_datetimestruct dts + int64_t delta + datetime dt + + dt64_to_dtstruct(val, &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us) + # get_utcoffset (tz.utcoffset under the hood) only makes sense if datetime + # is _wall time_, so if val is a UTC timestamp convert to wall time + if not to_utc: + dt = dt.replace(tzinfo=tzutc()) + dt = dt.astimezone(tz) + delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + + if not to_utc: + return val + delta + return val - delta + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz, + bint to_utc=True): + """ + tz_convert for non-UTC non-tzlocal cases where we have to check + DST transitions pointwise. + + Parameters + ---------- + values : ndarray[int64_t] + tz : tzinfo + to_utc : bool + True if converting _to_ UTC, False if converting _from_ utc + + Returns + ------- + result : ndarray[int64_t] + """ + cdef: + Py_ssize_t n = len(values) + Py_ssize_t i + intp_t[:] pos + int64_t[:] result = np.empty(n, dtype=np.int64) + ndarray[int64_t] trans + int64_t[:] deltas + int64_t v + bint tz_is_local + + tz_is_local = is_tzlocal(tz) + + if not tz_is_local: + # get_dst_info cannot extract offsets from tzlocal because its + # dependent on a datetime + trans, deltas, _ = get_dst_info(tz) + if not to_utc: + # We add `offset` below instead of subtracting it + deltas = -1 * np.array(deltas, dtype='i8') + + # Previously, this search was done pointwise to try and benefit + # from getting to skip searches for iNaTs. However, it seems call + # overhead dominates the search time so doing it once in bulk + # is substantially faster (GH#24603) + pos = trans.searchsorted(values, side='right') - 1 + + for i in range(n): + v = values[i] + if v == NPY_NAT: + result[i] = v + elif tz_is_local: + result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=to_utc) + else: + if pos[i] < 0: + raise ValueError('First time before start of DST info') + result[i] = v - deltas[pos[i]] + + return result diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd new file mode 100644 index 00000000..936532a8 --- /dev/null +++ b/pandas/_libs/tslibs/util.pxd @@ -0,0 +1,249 @@ + +from cpython.object cimport PyTypeObject + +cdef extern from *: + """ + PyObject* char_to_string(const char* data) { + return PyUnicode_FromString(data); + } + """ + object char_to_string(const char* data) + + +cdef extern from "Python.h": + # Note: importing extern-style allows us to declare these as nogil + # functions, whereas `from cpython cimport` does not. + bint PyUnicode_Check(object obj) nogil + bint PyBool_Check(object obj) nogil + bint PyFloat_Check(object obj) nogil + bint PyComplex_Check(object obj) nogil + bint PyObject_TypeCheck(object obj, PyTypeObject* type) nogil + + # Note that following functions can potentially raise an exception, + # thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can + # potentially allocate memory inside in unlikely case of when underlying + # unicode object was stored as non-utf8 and utf8 wasn't requested before. + bint PyBytes_AsStringAndSize(object obj, char** buf, + Py_ssize_t* length) except -1 + const char* PyUnicode_AsUTF8AndSize(object obj, + Py_ssize_t* length) except NULL + +from numpy cimport int64_t, float64_t + +cdef extern from "numpy/arrayobject.h": + PyTypeObject PyFloatingArrType_Type + +cdef extern from "numpy/ndarrayobject.h": + PyTypeObject PyTimedeltaArrType_Type + PyTypeObject PyDatetimeArrType_Type + PyTypeObject PyComplexFloatingArrType_Type + PyTypeObject PyBoolArrType_Type + + bint PyArray_IsIntegerScalar(obj) nogil + bint PyArray_Check(obj) nogil + +cdef extern from "numpy/npy_common.h": + int64_t NPY_MIN_INT64 + + +cdef inline int64_t get_nat(): + return NPY_MIN_INT64 + + +# -------------------------------------------------------------------- +# Type Checking + +cdef inline bint is_integer_object(object obj) nogil: + """ + Cython equivalent of + + `isinstance(val, (int, long, np.integer)) and not isinstance(val, bool)` + + Parameters + ---------- + val : object + + Returns + ------- + is_integer : bool + + Notes + ----- + This counts np.timedelta64 objects as integers. + """ + return (not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) + and not is_timedelta64_object(obj)) + + +cdef inline bint is_float_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (float, np.complex_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_float : bool + """ + return (PyFloat_Check(obj) or + (PyObject_TypeCheck(obj, &PyFloatingArrType_Type))) + + +cdef inline bint is_complex_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (complex, np.complex_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_complex : bool + """ + return (PyComplex_Check(obj) or + PyObject_TypeCheck(obj, &PyComplexFloatingArrType_Type)) + + +cdef inline bint is_bool_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, (bool, np.bool_))` + + Parameters + ---------- + val : object + + Returns + ------- + is_bool : bool + """ + return (PyBool_Check(obj) or + PyObject_TypeCheck(obj, &PyBoolArrType_Type)) + + +cdef inline bint is_timedelta64_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, np.timedelta64)` + + Parameters + ---------- + val : object + + Returns + ------- + is_timedelta64 : bool + """ + return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type) + + +cdef inline bint is_datetime64_object(object obj) nogil: + """ + Cython equivalent of `isinstance(val, np.datetime64)` + + Parameters + ---------- + val : object + + Returns + ------- + is_datetime64 : bool + """ + return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type) + + +cdef inline bint is_array(object val): + """ + Cython equivalent of `isinstance(val, np.ndarray)` + + Parameters + ---------- + val : object + + Returns + ------- + is_ndarray : bool + """ + return PyArray_Check(val) + + +cdef inline bint is_period_object(object val): + """ + Cython equivalent of `isinstance(val, pd.Period)` + + Parameters + ---------- + val : object + + Returns + ------- + is_period : bool + """ + return getattr(val, '_typ', '_typ') == 'period' + + +cdef inline bint is_offset_object(object val): + """ + Check if an object is a DateOffset object. + + Parameters + ---------- + val : object + + Returns + ------- + is_date_offset : bool + """ + return getattr(val, '_typ', None) == "dateoffset" + + +cdef inline bint is_nan(object val): + """ + Check if val is a Not-A-Number float or complex, including + float('NaN') and np.nan. + + Parameters + ---------- + val : object + + Returns + ------- + is_nan : bool + """ + cdef float64_t fval + if is_float_object(val): + fval = val + return fval != fval + return is_complex_object(val) and val != val + + +cdef inline const char* get_c_string_buf_and_size(str py_string, + Py_ssize_t *length): + """ + Extract internal char* buffer of unicode or bytes object `py_string` with + getting length of this internal buffer saved in `length`. + + Notes + ----- + Python object owns memory, thus returned char* must not be freed. + `length` can be NULL if getting buffer length is not needed. + + Parameters + ---------- + py_string : str + length : Py_ssize_t* + + Returns + ------- + buf : const char* + """ + cdef: + const char *buf + + buf = PyUnicode_AsUTF8AndSize(py_string, length) + return buf + + +cdef inline const char* get_c_string(str py_string): + return get_c_string_buf_and_size(py_string, NULL) diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd new file mode 100644 index 00000000..15fedbb2 --- /dev/null +++ b/pandas/_libs/util.pxd @@ -0,0 +1,99 @@ +from pandas._libs.tslibs.util cimport * + +from cython cimport Py_ssize_t + +cimport numpy as cnp +from numpy cimport ndarray + +cdef extern from "numpy/ndarraytypes.h": + void PyArray_CLEARFLAGS(ndarray arr, int flags) nogil + + +cdef extern from "numpy/arrayobject.h": + enum: + NPY_ARRAY_C_CONTIGUOUS + NPY_ARRAY_F_CONTIGUOUS + + +cdef extern from "src/headers/stdint.h": + enum: UINT8_MAX + enum: UINT16_MAX + enum: UINT32_MAX + enum: UINT64_MAX + enum: INT8_MIN + enum: INT8_MAX + enum: INT16_MIN + enum: INT16_MAX + enum: INT32_MAX + enum: INT32_MIN + enum: INT64_MAX + enum: INT64_MIN + + +ctypedef fused numeric: + cnp.int8_t + cnp.int16_t + cnp.int32_t + cnp.int64_t + + cnp.uint8_t + cnp.uint16_t + cnp.uint32_t + cnp.uint64_t + + cnp.float32_t + cnp.float64_t + + +cdef inline void set_array_not_contiguous(ndarray ao) nogil: + # Numpy>=1.8-compliant equivalent to: + # ao->flags &= ~(NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS); + PyArray_CLEARFLAGS(ao, + (NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS)) + + +cdef inline Py_ssize_t validate_indexer(ndarray arr, object loc) except -1: + """ + Cast the given indexer `loc` to an integer. If it is negative, i.e. a + python-style indexing-from-the-end indexer, translate it to a + from-the-front indexer. Raise if this is not possible. + + Parameters + ---------- + arr : ndarray + loc : object + + Returns + ------- + idx : Py_ssize_t + + Raises + ------ + IndexError + """ + cdef: + Py_ssize_t idx, size + int casted + + if is_float_object(loc): + casted = int(loc) + if casted == loc: + loc = casted + + idx = loc + size = cnp.PyArray_SIZE(arr) + + if idx < 0 and size > 0: + idx += size + if idx >= size or size == 0 or idx < 0: + raise IndexError('index out of bounds') + + return idx + + +cdef inline object get_value_at(ndarray arr, object loc): + cdef: + Py_ssize_t i + + i = validate_indexer(arr, loc) + return arr[i] diff --git a/pandas/_libs/window/__init__.py b/pandas/_libs/window/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx new file mode 100644 index 00000000..e998c7e3 --- /dev/null +++ b/pandas/_libs/window/aggregations.pyx @@ -0,0 +1,1951 @@ +# cython: boundscheck=False, wraparound=False, cdivision=True + +import cython +from cython import Py_ssize_t +from libcpp.deque cimport deque + +from libc.stdlib cimport malloc, free + +import numpy as np +cimport numpy as cnp +from numpy cimport ndarray, int64_t, float64_t, float32_t +cnp.import_array() + + +cdef extern from "src/headers/cmath" namespace "std": + bint isnan(float64_t) nogil + bint notnan(float64_t) nogil + int signbit(float64_t) nogil + float64_t sqrt(float64_t x) nogil + +from pandas._libs.algos import is_monotonic + +from pandas._libs.util cimport numeric + +cdef extern from "../src/skiplist.h": + ctypedef struct node_t: + node_t **next + int *width + double value + int is_nil + int levels + int ref_count + + ctypedef struct skiplist_t: + node_t *head + node_t **tmp_chain + int *tmp_steps + int size + int maxlevels + + skiplist_t* skiplist_init(int) nogil + void skiplist_destroy(skiplist_t*) nogil + double skiplist_get(skiplist_t*, int, int*) nogil + int skiplist_insert(skiplist_t*, double) nogil + int skiplist_remove(skiplist_t*, double) nogil + +cdef: + float32_t MINfloat32 = np.NINF + float64_t MINfloat64 = np.NINF + + float32_t MAXfloat32 = np.inf + float64_t MAXfloat64 = np.inf + + float64_t NaN = np.NaN + +cdef inline int int_max(int a, int b): return a if a >= b else b +cdef inline int int_min(int a, int b): return a if a <= b else b + +cdef inline bint is_monotonic_start_end_bounds(ndarray[int64_t, ndim=1] start, + ndarray[int64_t, ndim=1] end): + return is_monotonic(start, False)[0] and is_monotonic(end, False)[0] + +# Cython implementations of rolling sum, mean, variance, skewness, +# other statistical moment functions +# +# Misc implementation notes +# ------------------------- +# +# - In Cython x * x is faster than x ** 2 for C types, this should be +# periodically revisited to see if it's still true. +# + +# original C implementation by N. Devillard. +# This code in public domain. +# Function : kth_smallest() +# In : array of elements, # of elements in the array, rank k +# Out : one element +# Job : find the kth smallest element in the array + +# Reference: + +# Author: Wirth, Niklaus +# Title: Algorithms + data structures = programs +# Publisher: Englewood Cliffs: Prentice-Hall, 1976 +# Physical description: 366 p. +# Series: Prentice-Hall Series in Automatic Computation + +# ---------------------------------------------------------------------- +# Rolling count +# this is only an impl for index not None, IOW, freq aware + + +def roll_count(ndarray[float64_t] values, ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp): + cdef: + float64_t val, count_x = 0.0 + int64_t s, e, nobs, N = len(values) + Py_ssize_t i, j + ndarray[float64_t] output + + output = np.empty(N, dtype=float) + + with nogil: + + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0: + + # setup + count_x = 0.0 + for j in range(s, e): + val = values[j] + if notnan(val): + count_x += 1.0 + + else: + + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + if notnan(val): + count_x -= 1.0 + + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + if notnan(val): + count_x += 1.0 + + if count_x >= minp: + output[i] = count_x + else: + output[i] = NaN + + return output + + +# ---------------------------------------------------------------------- +# Rolling sum + + +cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x) nogil: + cdef: + float64_t result + + if nobs >= minp: + result = sum_x + else: + result = NaN + + return result + + +cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogil: + """ add a value from the sum calc """ + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] + 1 + sum_x[0] = sum_x[0] + val + + +cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogil: + """ remove a value from the sum calc """ + + if notnan(val): + nobs[0] = nobs[0] - 1 + sum_x[0] = sum_x[0] - val + + +def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp): + cdef: + float64_t sum_x = 0 + int64_t s, e + int64_t nobs = 0, i, j, N = len(values) + ndarray[float64_t] output + bint is_monotonic_bounds + + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) + output = np.empty(N, dtype=float) + + with nogil: + + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0 or not is_monotonic_bounds: + + # setup + + for j in range(s, e): + add_sum(values[j], &nobs, &sum_x) + + else: + + # calculate deletes + for j in range(start[i - 1], s): + remove_sum(values[j], &nobs, &sum_x) + + # calculate adds + for j in range(end[i - 1], e): + add_sum(values[j], &nobs, &sum_x) + + output[i] = calc_sum(minp, nobs, sum_x) + + if not is_monotonic_bounds: + for j in range(s, e): + remove_sum(values[j], &nobs, &sum_x) + + return output + + +def roll_sum_fixed(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win): + cdef: + float64_t val, prev_x, sum_x = 0 + int64_t range_endpoint + int64_t nobs = 0, i, N = len(values) + ndarray[float64_t] output + + output = np.empty(N, dtype=float) + + range_endpoint = int_max(minp, 1) - 1 + + with nogil: + + for i in range(0, range_endpoint): + add_sum(values[i], &nobs, &sum_x) + output[i] = NaN + + for i in range(range_endpoint, N): + val = values[i] + add_sum(val, &nobs, &sum_x) + + if i > win - 1: + prev_x = values[i - win] + remove_sum(prev_x, &nobs, &sum_x) + + output[i] = calc_sum(minp, nobs, sum_x) + + return output + +# ---------------------------------------------------------------------- +# Rolling mean + + +cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, + Py_ssize_t neg_ct, float64_t sum_x) nogil: + cdef: + float64_t result + + if nobs >= minp: + result = sum_x / nobs + if neg_ct == 0 and result < 0: + # all positive + result = 0 + elif neg_ct == nobs and result > 0: + # all negative + result = 0 + else: + pass + else: + result = NaN + return result + + +cdef inline void add_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, + Py_ssize_t *neg_ct) nogil: + """ add a value from the mean calc """ + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] + 1 + sum_x[0] = sum_x[0] + val + if signbit(val): + neg_ct[0] = neg_ct[0] + 1 + + +cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, + Py_ssize_t *neg_ct) nogil: + """ remove a value from the mean calc """ + + if notnan(val): + nobs[0] = nobs[0] - 1 + sum_x[0] = sum_x[0] - val + if signbit(val): + neg_ct[0] = neg_ct[0] - 1 + + +def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win): + cdef: + float64_t val, prev_x, sum_x = 0 + Py_ssize_t nobs = 0, i, neg_ct = 0, N = len(values) + ndarray[float64_t] output + + output = np.empty(N, dtype=float) + + with nogil: + for i in range(minp - 1): + val = values[i] + add_mean(val, &nobs, &sum_x, &neg_ct) + output[i] = NaN + + for i in range(minp - 1, N): + val = values[i] + add_mean(val, &nobs, &sum_x, &neg_ct) + + if i > win - 1: + prev_x = values[i - win] + remove_mean(prev_x, &nobs, &sum_x, &neg_ct) + + output[i] = calc_mean(minp, nobs, neg_ct, sum_x) + + return output + + +def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp): + cdef: + float64_t val, sum_x = 0 + int64_t s, e + Py_ssize_t nobs = 0, i, j, neg_ct = 0, N = len(values) + ndarray[float64_t] output + bint is_monotonic_bounds + + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) + output = np.empty(N, dtype=float) + + with nogil: + + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0 or not is_monotonic_bounds: + + # setup + for j in range(s, e): + val = values[j] + add_mean(val, &nobs, &sum_x, &neg_ct) + + else: + + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + remove_mean(val, &nobs, &sum_x, &neg_ct) + + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + add_mean(val, &nobs, &sum_x, &neg_ct) + + output[i] = calc_mean(minp, nobs, neg_ct, sum_x) + + if not is_monotonic_bounds: + for j in range(s, e): + val = values[j] + remove_mean(val, &nobs, &sum_x, &neg_ct) + return output + +# ---------------------------------------------------------------------- +# Rolling variance + + +cdef inline float64_t calc_var(int64_t minp, int ddof, float64_t nobs, + float64_t ssqdm_x) nogil: + cdef: + float64_t result + + # Variance is unchanged if no observation is added or removed + if (nobs >= minp) and (nobs > ddof): + + # pathological case + if nobs == 1: + result = 0 + else: + result = ssqdm_x / (nobs - ddof) + if result < 0: + result = 0 + else: + result = NaN + + return result + + +cdef inline void add_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x) nogil: + """ add a value from the var calc """ + cdef: + float64_t delta + + # `isnan` instead of equality as fix for GH-21813, msvc 2017 bug + if isnan(val): + return + + nobs[0] = nobs[0] + 1 + # a part of Welford's method for the online variance-calculation + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + delta = val - mean_x[0] + mean_x[0] = mean_x[0] + delta / nobs[0] + ssqdm_x[0] = ssqdm_x[0] + ((nobs[0] - 1) * delta ** 2) / nobs[0] + + +cdef inline void remove_var(float64_t val, float64_t *nobs, float64_t *mean_x, + float64_t *ssqdm_x) nogil: + """ remove a value from the var calc """ + cdef: + float64_t delta + + if notnan(val): + nobs[0] = nobs[0] - 1 + if nobs[0]: + # a part of Welford's method for the online variance-calculation + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + delta = val - mean_x[0] + mean_x[0] = mean_x[0] - delta / nobs[0] + ssqdm_x[0] = ssqdm_x[0] - ((nobs[0] + 1) * delta ** 2) / nobs[0] + else: + mean_x[0] = 0 + ssqdm_x[0] = 0 + + +def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win, int ddof=1): + """ + Numerically stable implementation using Welford's method. + """ + cdef: + float64_t mean_x = 0, ssqdm_x = 0, nobs = 0, + float64_t val, prev, delta, mean_x_old + int64_t s, e + Py_ssize_t i, j, N = len(values) + ndarray[float64_t] output + + output = np.empty(N, dtype=float) + + # Check for windows larger than array, addresses #7297 + win = min(win, N) + + with nogil: + + # Over the first window, observations can only be added, never + # removed + for i in range(win): + add_var(values[i], &nobs, &mean_x, &ssqdm_x) + output[i] = calc_var(minp, ddof, nobs, ssqdm_x) + + # a part of Welford's method for the online variance-calculation + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + + # After the first window, observations can both be added and + # removed + for i in range(win, N): + val = values[i] + prev = values[i - win] + + if notnan(val): + if prev == prev: + + # Adding one observation and removing another one + delta = val - prev + mean_x_old = mean_x + + mean_x += delta / nobs + ssqdm_x += ((nobs - 1) * val + + (nobs + 1) * prev + - 2 * nobs * mean_x_old) * delta / nobs + + else: + add_var(val, &nobs, &mean_x, &ssqdm_x) + elif prev == prev: + remove_var(prev, &nobs, &mean_x, &ssqdm_x) + + output[i] = calc_var(minp, ddof, nobs, ssqdm_x) + + return output + + +def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int ddof=1): + """ + Numerically stable implementation using Welford's method. + """ + cdef: + float64_t mean_x = 0, ssqdm_x = 0, nobs = 0, + float64_t val, prev, delta, mean_x_old + int64_t s, e + Py_ssize_t i, j, N = len(values) + ndarray[float64_t] output + bint is_monotonic_bounds + + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) + output = np.empty(N, dtype=float) + + with nogil: + + for i in range(0, N): + + s = start[i] + e = end[i] + + # Over the first window, observations can only be added + # never removed + if i == 0 or not is_monotonic_bounds: + + for j in range(s, e): + add_var(values[j], &nobs, &mean_x, &ssqdm_x) + + else: + + # After the first window, observations can both be added + # and removed + + # calculate adds + for j in range(end[i - 1], e): + add_var(values[j], &nobs, &mean_x, &ssqdm_x) + + # calculate deletes + for j in range(start[i - 1], s): + remove_var(values[j], &nobs, &mean_x, &ssqdm_x) + + output[i] = calc_var(minp, ddof, nobs, ssqdm_x) + + if not is_monotonic_bounds: + for j in range(s, e): + remove_var(values[j], &nobs, &mean_x, &ssqdm_x) + + return output + +# ---------------------------------------------------------------------- +# Rolling skewness + + +cdef inline float64_t calc_skew(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, R + + if nobs >= minp: + dnobs = nobs + A = x / dnobs + B = xx / dnobs - A * A + C = xxx / dnobs - A * A * A - 3 * A * B + + # #18044: with uniform distribution, floating issue will + # cause B != 0. and cause the result is a very + # large number. + # + # in core/nanops.py nanskew/nankurt call the function + # _zero_out_fperr(m2) to fix floating error. + # if the variance is less than 1e-14, it could be + # treat as zero, here we follow the original + # skew/kurt behaviour to check B <= 1e-14 + if B <= 1e-14 or nobs < 3: + result = NaN + else: + R = sqrt(B) + result = ((sqrt(dnobs * (dnobs - 1.)) * C) / + ((dnobs - 2) * R * R * R)) + else: + result = NaN + + return result + + +cdef inline void add_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx) nogil: + """ add a value from the skew calc """ + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] + 1 + + # seriously don't ask me why this is faster + x[0] = x[0] + val + xx[0] = xx[0] + val * val + xxx[0] = xxx[0] + val * val * val + + +cdef inline void remove_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx) nogil: + """ remove a value from the skew calc """ + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] - 1 + + # seriously don't ask me why this is faster + x[0] = x[0] - val + xx[0] = xx[0] - val * val + xxx[0] = xxx[0] - val * val * val + + +def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win): + cdef: + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0 + int64_t nobs = 0, i, j, N = len(values) + int64_t s, e + ndarray[float64_t] output + + output = np.empty(N, dtype=float) + + with nogil: + for i in range(minp - 1): + val = values[i] + add_skew(val, &nobs, &x, &xx, &xxx) + output[i] = NaN + + for i in range(minp - 1, N): + val = values[i] + add_skew(val, &nobs, &x, &xx, &xxx) + + if i > win - 1: + prev = values[i - win] + remove_skew(prev, &nobs, &x, &xx, &xxx) + + output[i] = calc_skew(minp, nobs, x, xx, xxx) + + return output + + +def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp): + cdef: + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0 + int64_t nobs = 0, i, j, N = len(values) + int64_t s, e + ndarray[float64_t] output + bint is_monotonic_bounds + + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) + output = np.empty(N, dtype=float) + + with nogil: + + for i in range(0, N): + + s = start[i] + e = end[i] + + # Over the first window, observations can only be added + # never removed + if i == 0 or not is_monotonic_bounds: + + for j in range(s, e): + val = values[j] + add_skew(val, &nobs, &x, &xx, &xxx) + + else: + + # After the first window, observations can both be added + # and removed + + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + add_skew(val, &nobs, &x, &xx, &xxx) + + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + remove_skew(val, &nobs, &x, &xx, &xxx) + + output[i] = calc_skew(minp, nobs, x, xx, xxx) + + if not is_monotonic_bounds: + for j in range(s, e): + val = values[j] + remove_skew(val, &nobs, &x, &xx, &xxx) + + return output + +# ---------------------------------------------------------------------- +# Rolling kurtosis + + +cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx, float64_t xxxx) nogil: + cdef: + float64_t result, dnobs + float64_t A, B, C, D, R, K + + if nobs >= minp: + dnobs = nobs + A = x / dnobs + R = A * A + B = xx / dnobs - R + R = R * A + C = xxx / dnobs - R - 3 * A * B + R = R * A + D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A + + # #18044: with uniform distribution, floating issue will + # cause B != 0. and cause the result is a very + # large number. + # + # in core/nanops.py nanskew/nankurt call the function + # _zero_out_fperr(m2) to fix floating error. + # if the variance is less than 1e-14, it could be + # treat as zero, here we follow the original + # skew/kurt behaviour to check B <= 1e-14 + if B <= 1e-14 or nobs < 4: + result = NaN + else: + K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2) + result = K / ((dnobs - 2.) * (dnobs - 3.)) + else: + result = NaN + + return result + + +cdef inline void add_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx) nogil: + """ add a value from the kurotic calc """ + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] + 1 + + # seriously don't ask me why this is faster + x[0] = x[0] + val + xx[0] = xx[0] + val * val + xxx[0] = xxx[0] + val * val * val + xxxx[0] = xxxx[0] + val * val * val * val + + +cdef inline void remove_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx) nogil: + """ remove a value from the kurotic calc """ + + # Not NaN + if notnan(val): + nobs[0] = nobs[0] - 1 + + # seriously don't ask me why this is faster + x[0] = x[0] - val + xx[0] = xx[0] - val * val + xxx[0] = xxx[0] - val * val * val + xxxx[0] = xxxx[0] - val * val * val * val + + +def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win): + cdef: + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 + int64_t nobs = 0, i, j, N = len(values) + int64_t s, e + ndarray[float64_t] output + + output = np.empty(N, dtype=float) + + with nogil: + + for i in range(minp - 1): + add_kurt(values[i], &nobs, &x, &xx, &xxx, &xxxx) + output[i] = NaN + + for i in range(minp - 1, N): + add_kurt(values[i], &nobs, &x, &xx, &xxx, &xxxx) + + if i > win - 1: + prev = values[i - win] + remove_kurt(prev, &nobs, &x, &xx, &xxx, &xxxx) + + output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx) + + return output + + +def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp): + cdef: + float64_t val, prev + float64_t x = 0, xx = 0, xxx = 0, xxxx = 0 + int64_t nobs = 0, i, j, s, e, N = len(values) + ndarray[float64_t] output + bint is_monotonic_bounds + + is_monotonic_bounds = is_monotonic_start_end_bounds(start, end) + output = np.empty(N, dtype=float) + + with nogil: + + for i in range(0, N): + + s = start[i] + e = end[i] + + # Over the first window, observations can only be added + # never removed + if i == 0 or not is_monotonic_bounds: + + for j in range(s, e): + add_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx) + + else: + + # After the first window, observations can both be added + # and removed + + # calculate adds + for j in range(end[i - 1], e): + add_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx) + + # calculate deletes + for j in range(start[i - 1], s): + remove_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx) + + output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx) + + if not is_monotonic_bounds: + for j in range(s, e): + remove_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx) + + return output + + +# ---------------------------------------------------------------------- +# Rolling median, min, max + + +def roll_median_c(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win): + cdef: + float64_t val, res, prev + bint err = 0 + int ret = 0 + skiplist_t *sl + Py_ssize_t i, j + int64_t nobs = 0, N = len(values), s, e + int midpoint + ndarray[float64_t] output + + # we use the Fixed/Variable Indexer here as the + # actual skiplist ops outweigh any window computation costs + output = np.empty(N, dtype=float) + + if win == 0 or (end - start).max() == 0: + output[:] = NaN + return output + win = (end - start).max() + sl = skiplist_init(win) + if sl == NULL: + raise MemoryError("skiplist_init failed") + + with nogil: + + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0: + + # setup + for j in range(s, e): + val = values[j] + if notnan(val): + nobs += 1 + err = skiplist_insert(sl, val) != 1 + if err: + break + + else: + + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + if notnan(val): + nobs += 1 + err = skiplist_insert(sl, val) != 1 + if err: + break + + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + if notnan(val): + skiplist_remove(sl, val) + nobs -= 1 + + if nobs >= minp: + midpoint = (nobs / 2) + if nobs % 2: + res = skiplist_get(sl, midpoint, &ret) + else: + res = (skiplist_get(sl, midpoint, &ret) + + skiplist_get(sl, (midpoint - 1), &ret)) / 2 + else: + res = NaN + + output[i] = res + + skiplist_destroy(sl) + if err: + raise MemoryError("skiplist_insert failed") + return output + + +# ---------------------------------------------------------------------- + +# Moving maximum / minimum code taken from Bottleneck under the terms +# of its Simplified BSD license +# https://github.com/pydata/bottleneck + + +cdef inline numeric init_mm(numeric ai, Py_ssize_t *nobs, bint is_max) nogil: + + if numeric in cython.floating: + if ai == ai: + nobs[0] = nobs[0] + 1 + elif is_max: + if numeric == cython.float: + ai = MINfloat32 + else: + ai = MINfloat64 + else: + if numeric == cython.float: + ai = MAXfloat32 + else: + ai = MAXfloat64 + + else: + nobs[0] = nobs[0] + 1 + + return ai + + +cdef inline void remove_mm(numeric aold, Py_ssize_t *nobs) nogil: + """ remove a value from the mm calc """ + if numeric in cython.floating and aold == aold: + nobs[0] = nobs[0] - 1 + + +cdef inline numeric calc_mm(int64_t minp, Py_ssize_t nobs, + numeric value) nogil: + cdef: + numeric result + + if numeric in cython.floating: + if nobs >= minp: + result = value + else: + result = NaN + else: + result = value + + return result + + +def roll_max_fixed(float64_t[:] values, int64_t[:] start, + int64_t[:] end, int64_t minp, int64_t win): + """ + Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + + Parameters + ---------- + values : np.ndarray[np.float64] + window : int, size of rolling window + minp : if number of observations in window + is below this, output a NaN + index : ndarray, optional + index for window computation + closed : 'right', 'left', 'both', 'neither' + make the interval closed on the right, left, + both or neither endpoints + """ + return _roll_min_max_fixed(values, minp, win, is_max=1) + + +def roll_max_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp): + """ + Moving max of 1d array of any numeric type along axis=0 ignoring NaNs. + + Parameters + ---------- + values : np.ndarray[np.float64] + window : int, size of rolling window + minp : if number of observations in window + is below this, output a NaN + index : ndarray, optional + index for window computation + closed : 'right', 'left', 'both', 'neither' + make the interval closed on the right, left, + both or neither endpoints + """ + return _roll_min_max_variable(values, start, end, minp, is_max=1) + + +def roll_min_fixed(float64_t[:] values, int64_t[:] start, + int64_t[:] end, int64_t minp, int64_t win): + """ + Moving min of 1d array of any numeric type along axis=0 ignoring NaNs. + + Parameters + ---------- + values : np.ndarray[np.float64] + window : int, size of rolling window + minp : if number of observations in window + is below this, output a NaN + index : ndarray, optional + index for window computation + """ + return _roll_min_max_fixed(values, minp, win, is_max=0) + + +def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp): + """ + Moving min of 1d array of any numeric type along axis=0 ignoring NaNs. + + Parameters + ---------- + values : np.ndarray[np.float64] + window : int, size of rolling window + minp : if number of observations in window + is below this, output a NaN + index : ndarray, optional + index for window computation + """ + return _roll_min_max_variable(values, start, end, minp, is_max=0) + + +cdef _roll_min_max_variable(ndarray[numeric] values, + ndarray[int64_t] starti, + ndarray[int64_t] endi, + int64_t minp, + bint is_max): + cdef: + numeric ai + int64_t i, close_offset, curr_win_size + Py_ssize_t nobs = 0, N = len(values) + deque Q[int64_t] # min/max always the front + deque W[int64_t] # track the whole window for nobs compute + ndarray[float64_t, ndim=1] output + + output = np.empty(N, dtype=float) + Q = deque[int64_t]() + W = deque[int64_t]() + + with nogil: + + # This is using a modified version of the C++ code in this + # SO post: http://bit.ly/2nOoHlY + # The original impl didn't deal with variable window sizes + # So the code was optimized for that + + for i in range(starti[0], endi[0]): + ai = init_mm(values[i], &nobs, is_max) + + # Discard previous entries if we find new min or max + if is_max: + while not Q.empty() and ((ai >= values[Q.back()]) or + values[Q.back()] != values[Q.back()]): + Q.pop_back() + else: + while not Q.empty() and ((ai <= values[Q.back()]) or + values[Q.back()] != values[Q.back()]): + Q.pop_back() + Q.push_back(i) + W.push_back(i) + + # if right is open then the first window is empty + close_offset = 0 if endi[0] > starti[0] else 1 + # first window's size + curr_win_size = endi[0] - starti[0] + + for i in range(endi[0], endi[N-1]): + if not Q.empty() and curr_win_size > 0: + output[i-1+close_offset] = calc_mm( + minp, nobs, values[Q.front()]) + else: + output[i-1+close_offset] = NaN + + ai = init_mm(values[i], &nobs, is_max) + + # Discard previous entries if we find new min or max + if is_max: + while not Q.empty() and ((ai >= values[Q.back()]) or + values[Q.back()] != values[Q.back()]): + Q.pop_back() + else: + while not Q.empty() and ((ai <= values[Q.back()]) or + values[Q.back()] != values[Q.back()]): + Q.pop_back() + + # Maintain window/nobs retention + curr_win_size = endi[i + close_offset] - starti[i + close_offset] + while not Q.empty() and Q.front() <= i - curr_win_size: + Q.pop_front() + while not W.empty() and W.front() <= i - curr_win_size: + remove_mm(values[W.front()], &nobs) + W.pop_front() + + Q.push_back(i) + W.push_back(i) + + if not Q.empty() and curr_win_size > 0: + output[N-1] = calc_mm(minp, nobs, values[Q.front()]) + else: + output[N-1] = NaN + + return output + + +cdef _roll_min_max_fixed(numeric[:] values, + int64_t minp, + int64_t win, + bint is_max): + cdef: + numeric ai + bint should_replace + int64_t i, removed, window_i, + Py_ssize_t nobs = 0, N = len(values) + int64_t* death + numeric* ring + numeric* minvalue + numeric* end + numeric* last + ndarray[float64_t, ndim=1] output + + output = np.empty(N, dtype=float) + # setup the rings of death! + ring = malloc(win * sizeof(numeric)) + death = malloc(win * sizeof(int64_t)) + + end = ring + win + last = ring + minvalue = ring + ai = values[0] + minvalue[0] = init_mm(values[0], &nobs, is_max) + death[0] = win + nobs = 0 + + with nogil: + + for i in range(N): + ai = init_mm(values[i], &nobs, is_max) + + if i >= win: + remove_mm(values[i - win], &nobs) + + if death[minvalue - ring] == i: + minvalue = minvalue + 1 + if minvalue >= end: + minvalue = ring + + if is_max: + should_replace = ai >= minvalue[0] + else: + should_replace = ai <= minvalue[0] + if should_replace: + + minvalue[0] = ai + death[minvalue - ring] = i + win + last = minvalue + + else: + + if is_max: + should_replace = last[0] <= ai + else: + should_replace = last[0] >= ai + while should_replace: + if last == ring: + last = end + last -= 1 + if is_max: + should_replace = last[0] <= ai + else: + should_replace = last[0] >= ai + + last += 1 + if last == end: + last = ring + last[0] = ai + death[last - ring] = i + win + + output[i] = calc_mm(minp, nobs, minvalue[0]) + + for i in range(minp - 1): + if numeric in cython.floating: + output[i] = NaN + else: + output[i] = 0 + + free(ring) + free(death) + + return output + + +cdef enum InterpolationType: + LINEAR, + LOWER, + HIGHER, + NEAREST, + MIDPOINT + + +interpolation_types = { + 'linear': LINEAR, + 'lower': LOWER, + 'higher': HIGHER, + 'nearest': NEAREST, + 'midpoint': MIDPOINT, +} + + +def roll_quantile(ndarray[float64_t, cast=True] values, ndarray[int64_t] start, + ndarray[int64_t] end, int64_t minp, int64_t win, + float64_t quantile, str interpolation): + """ + O(N log(window)) implementation using skip list + """ + cdef: + float64_t val, prev, midpoint, idx_with_fraction + skiplist_t *skiplist + int64_t nobs = 0, i, j, s, e, N = len(values) + Py_ssize_t idx + ndarray[float64_t] output + float64_t vlow, vhigh + InterpolationType interpolation_type + int ret = 0 + + if quantile <= 0.0 or quantile >= 1.0: + raise ValueError(f"quantile value {quantile} not in [0, 1]") + + try: + interpolation_type = interpolation_types[interpolation] + except KeyError: + raise ValueError(f"Interpolation '{interpolation}' is not supported") + + # we use the Fixed/Variable Indexer here as the + # actual skiplist ops outweigh any window computation costs + output = np.empty(N, dtype=float) + + if win == 0 or (end - start).max() == 0: + output[:] = NaN + return output + win = (end - start).max() + skiplist = skiplist_init(win) + if skiplist == NULL: + raise MemoryError("skiplist_init failed") + + with nogil: + for i in range(0, N): + s = start[i] + e = end[i] + + if i == 0: + + # setup + for j in range(s, e): + val = values[j] + if notnan(val): + nobs += 1 + skiplist_insert(skiplist, val) + + else: + + # calculate adds + for j in range(end[i - 1], e): + val = values[j] + if notnan(val): + nobs += 1 + skiplist_insert(skiplist, val) + + # calculate deletes + for j in range(start[i - 1], s): + val = values[j] + if notnan(val): + skiplist_remove(skiplist, val) + nobs -= 1 + + if nobs >= minp: + if nobs == 1: + # Single value in skip list + output[i] = skiplist_get(skiplist, 0, &ret) + else: + idx_with_fraction = quantile * (nobs - 1) + idx = idx_with_fraction + + if idx_with_fraction == idx: + # no need to interpolate + output[i] = skiplist_get(skiplist, idx, &ret) + continue + + if interpolation_type == LINEAR: + vlow = skiplist_get(skiplist, idx, &ret) + vhigh = skiplist_get(skiplist, idx + 1, &ret) + output[i] = ((vlow + (vhigh - vlow) * + (idx_with_fraction - idx))) + elif interpolation_type == LOWER: + output[i] = skiplist_get(skiplist, idx, &ret) + elif interpolation_type == HIGHER: + output[i] = skiplist_get(skiplist, idx + 1, &ret) + elif interpolation_type == NEAREST: + # the same behaviour as round() + if idx_with_fraction - idx == 0.5: + if idx % 2 == 0: + output[i] = skiplist_get(skiplist, idx, &ret) + else: + output[i] = skiplist_get( + skiplist, idx + 1, &ret) + elif idx_with_fraction - idx < 0.5: + output[i] = skiplist_get(skiplist, idx, &ret) + else: + output[i] = skiplist_get(skiplist, idx + 1, &ret) + elif interpolation_type == MIDPOINT: + vlow = skiplist_get(skiplist, idx, &ret) + vhigh = skiplist_get(skiplist, idx + 1, &ret) + output[i] = (vlow + vhigh) / 2 + else: + output[i] = NaN + + skiplist_destroy(skiplist) + + return output + + +def roll_generic_fixed(object obj, + ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, int64_t win, + int offset, object func, bint raw, + object args, object kwargs): + cdef: + ndarray[float64_t] output, counts, bufarr + ndarray[float64_t, cast=True] arr + float64_t *buf + float64_t *oldbuf + int64_t nobs = 0, i, j, s, e, N = len(start) + + n = len(obj) + if n == 0: + return obj + + arr = np.asarray(obj) + + # ndarray input + if raw: + if not arr.flags.c_contiguous: + arr = arr.copy('C') + + counts = roll_sum_fixed(np.concatenate([np.isfinite(arr).astype(float), + np.array([0.] * offset)]), + start, end, minp, win)[offset:] + + output = np.empty(N, dtype=float) + + if not raw: + # series + for i in range(N): + if counts[i] >= minp: + sl = slice(int_max(i + offset - win + 1, 0), + int_min(i + offset + 1, N)) + output[i] = func(obj.iloc[sl], *args, **kwargs) + else: + output[i] = NaN + + else: + + # truncated windows at the beginning, through first full-length window + for i in range((int_min(win, N) - offset)): + if counts[i] >= minp: + output[i] = func(arr[0: (i + offset + 1)], *args, **kwargs) + else: + output[i] = NaN + + # remaining full-length windows + buf = arr.data + bufarr = np.empty(win, dtype=float) + oldbuf = bufarr.data + for i in range((win - offset), (N - offset)): + buf = buf + 1 + bufarr.data = buf + if counts[i] >= minp: + output[i] = func(bufarr, *args, **kwargs) + else: + output[i] = NaN + bufarr.data = oldbuf + + # truncated windows at the end + for i in range(int_max(N - offset, 0), N): + if counts[i] >= minp: + output[i] = func(arr[int_max(i + offset - win + 1, 0): N], + *args, + **kwargs) + else: + output[i] = NaN + + return output + + +def roll_generic_variable(object obj, + ndarray[int64_t] start, ndarray[int64_t] end, + int64_t minp, + int offset, object func, bint raw, + object args, object kwargs): + cdef: + ndarray[float64_t] output, counts, bufarr + ndarray[float64_t, cast=True] arr + float64_t *buf + float64_t *oldbuf + int64_t nobs = 0, i, j, s, e, N = len(start) + + n = len(obj) + if n == 0: + return obj + + arr = np.asarray(obj) + + # ndarray input + if raw: + if not arr.flags.c_contiguous: + arr = arr.copy('C') + + counts = roll_sum_variable(np.concatenate([np.isfinite(arr).astype(float), + np.array([0.] * offset)]), + start, end, minp)[offset:] + + output = np.empty(N, dtype=float) + + if offset != 0: + raise ValueError("unable to roll_generic with a non-zero offset") + + for i in range(0, N): + s = start[i] + e = end[i] + + if counts[i] >= minp: + if raw: + output[i] = func(arr[s:e], *args, **kwargs) + else: + output[i] = func(obj.iloc[s:e], *args, **kwargs) + else: + output[i] = NaN + + return output + + +# ---------------------------------------------------------------------- +# Rolling sum and mean for weighted window + + +def roll_weighted_sum(float64_t[:] values, float64_t[:] weights, int minp): + return _roll_weighted_sum_mean(values, weights, minp, avg=0) + + +def roll_weighted_mean(float64_t[:] values, float64_t[:] weights, int minp): + return _roll_weighted_sum_mean(values, weights, minp, avg=1) + + +cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values, + float64_t[:] weights, + int minp, bint avg): + """ + Assume len(weights) << len(values) + """ + cdef: + float64_t[:] output, tot_wgt, counts + Py_ssize_t in_i, win_i, win_n, in_n + float64_t val_in, val_win, c, w + + in_n = len(values) + win_n = len(weights) + + output = np.zeros(in_n, dtype=np.float64) + counts = np.zeros(in_n, dtype=np.float64) + if avg: + tot_wgt = np.zeros(in_n, dtype=np.float64) + + if minp > win_n: + raise ValueError(f"min_periods (minp) must be <= " + f"window (win)") + elif minp > in_n: + minp = in_n + 1 + elif minp < 0: + raise ValueError('min_periods must be >= 0') + + minp = max(minp, 1) + + with nogil: + if avg: + for win_i in range(win_n): + val_win = weights[win_i] + if val_win != val_win: + continue + + for in_i in range(in_n - (win_n - win_i) + 1): + val_in = values[in_i] + if val_in == val_in: + output[in_i + (win_n - win_i) - 1] += val_in * val_win + counts[in_i + (win_n - win_i) - 1] += 1 + tot_wgt[in_i + (win_n - win_i) - 1] += val_win + + for in_i in range(in_n): + c = counts[in_i] + if c < minp: + output[in_i] = NaN + else: + w = tot_wgt[in_i] + if w == 0: + output[in_i] = NaN + else: + output[in_i] /= tot_wgt[in_i] + + else: + for win_i in range(win_n): + val_win = weights[win_i] + if val_win != val_win: + continue + + for in_i in range(in_n - (win_n - win_i) + 1): + val_in = values[in_i] + + if val_in == val_in: + output[in_i + (win_n - win_i) - 1] += val_in * val_win + counts[in_i + (win_n - win_i) - 1] += 1 + + for in_i in range(in_n): + c = counts[in_i] + if c < minp: + output[in_i] = NaN + + return np.asarray(output) + + +# ---------------------------------------------------------------------- +# Rolling var for weighted window + + +cdef inline float64_t calc_weighted_var(float64_t t, + float64_t sum_w, + Py_ssize_t win_n, + unsigned int ddof, + float64_t nobs, + int64_t minp) nogil: + """ + Calculate weighted variance for a window using West's method. + + Paper: https://dl.acm.org/citation.cfm?id=359153 + + Parameters + ---------- + t: float64_t + sum of weighted squared differences + sum_w: float64_t + sum of weights + win_n: Py_ssize_t + window size + ddof: unsigned int + delta degrees of freedom + nobs: float64_t + number of observations + minp: int64_t + minimum number of observations + + Returns + ------- + result : float64_t + weighted variance of the window + """ + + cdef: + float64_t result + + # Variance is unchanged if no observation is added or removed + if (nobs >= minp) and (nobs > ddof): + + # pathological case + if nobs == 1: + result = 0 + else: + result = t * win_n / ((win_n - ddof) * sum_w) + if result < 0: + result = 0 + else: + result = NaN + + return result + + +cdef inline void add_weighted_var(float64_t val, + float64_t w, + float64_t *t, + float64_t *sum_w, + float64_t *mean, + float64_t *nobs) nogil: + """ + Update weighted mean, sum of weights and sum of weighted squared + differences to include value and weight pair in weighted variance + calculation using West's method. + + Paper: https://dl.acm.org/citation.cfm?id=359153 + + Parameters + ---------- + val: float64_t + window values + w: float64_t + window weights + t: float64_t + sum of weighted squared differences + sum_w: float64_t + sum of weights + mean: float64_t + weighted mean + nobs: float64_t + number of observations + """ + + cdef: + float64_t temp, q, r + + if isnan(val): + return + + nobs[0] = nobs[0] + 1 + + q = val - mean[0] + temp = sum_w[0] + w + r = q * w / temp + + mean[0] = mean[0] + r + t[0] = t[0] + r * sum_w[0] * q + sum_w[0] = temp + + +cdef inline void remove_weighted_var(float64_t val, + float64_t w, + float64_t *t, + float64_t *sum_w, + float64_t *mean, + float64_t *nobs) nogil: + """ + Update weighted mean, sum of weights and sum of weighted squared + differences to remove value and weight pair from weighted variance + calculation using West's method. + + Paper: https://dl.acm.org/citation.cfm?id=359153 + + Parameters + ---------- + val: float64_t + window values + w: float64_t + window weights + t: float64_t + sum of weighted squared differences + sum_w: float64_t + sum of weights + mean: float64_t + weighted mean + nobs: float64_t + number of observations + """ + + cdef: + float64_t temp, q, r + + if notnan(val): + nobs[0] = nobs[0] - 1 + + if nobs[0]: + q = val - mean[0] + temp = sum_w[0] - w + r = q * w / temp + + mean[0] = mean[0] - r + t[0] = t[0] - r * sum_w[0] * q + sum_w[0] = temp + + else: + t[0] = 0 + sum_w[0] = 0 + mean[0] = 0 + + +def roll_weighted_var(float64_t[:] values, float64_t[:] weights, + int64_t minp, unsigned int ddof): + """ + Calculates weighted rolling variance using West's online algorithm. + + Paper: https://dl.acm.org/citation.cfm?id=359153 + + Parameters + ---------- + values: float64_t[:] + values to roll window over + weights: float64_t[:] + array of weights whose length is window size + minp: int64_t + minimum number of observations to calculate + variance of a window + ddof: unsigned int + the divisor used in variance calculations + is the window size - ddof + + Returns + ------- + output: float64_t[:] + weighted variances of windows + """ + + cdef: + float64_t t = 0, sum_w = 0, mean = 0, nobs = 0 + float64_t val, pre_val, w, pre_w + Py_ssize_t i, n, win_n + float64_t[:] output + + n = len(values) + win_n = len(weights) + output = np.empty(n, dtype=float) + + with nogil: + + for i in range(win_n): + add_weighted_var(values[i], weights[i], &t, + &sum_w, &mean, &nobs) + + output[i] = calc_weighted_var(t, sum_w, win_n, + ddof, nobs, minp) + + for i in range(win_n, n): + val = values[i] + pre_val = values[i - win_n] + + w = weights[i % win_n] + pre_w = weights[(i - win_n) % win_n] + + if notnan(val): + if pre_val == pre_val: + remove_weighted_var(pre_val, pre_w, &t, + &sum_w, &mean, &nobs) + + add_weighted_var(val, w, &t, &sum_w, &mean, &nobs) + + elif pre_val == pre_val: + remove_weighted_var(pre_val, pre_w, &t, + &sum_w, &mean, &nobs) + + output[i] = calc_weighted_var(t, sum_w, win_n, + ddof, nobs, minp) + + return output + + +# ---------------------------------------------------------------------- +# Exponentially weighted moving average + + +def ewma(float64_t[:] vals, float64_t com, int adjust, bint ignore_na, int minp): + """ + Compute exponentially-weighted moving average using center-of-mass. + + Parameters + ---------- + vals : ndarray (float64 type) + com : float64 + adjust: int + ignore_na: bool + minp: int + + Returns + ------- + ndarray + """ + + cdef: + Py_ssize_t N = len(vals) + ndarray[float64_t] output = np.empty(N, dtype=float) + float64_t alpha, old_wt_factor, new_wt, weighted_avg, old_wt, cur + Py_ssize_t i, nobs + bint is_observation + + if N == 0: + return output + + minp = max(minp, 1) + + alpha = 1. / (1. + com) + old_wt_factor = 1. - alpha + new_wt = 1. if adjust else alpha + + weighted_avg = vals[0] + is_observation = (weighted_avg == weighted_avg) + nobs = int(is_observation) + output[0] = weighted_avg if (nobs >= minp) else NaN + old_wt = 1. + + with nogil: + for i in range(1, N): + cur = vals[i] + is_observation = (cur == cur) + nobs += is_observation + if weighted_avg == weighted_avg: + + if is_observation or (not ignore_na): + + old_wt *= old_wt_factor + if is_observation: + + # avoid numerical errors on constant series + if weighted_avg != cur: + weighted_avg = ((old_wt * weighted_avg) + + (new_wt * cur)) / (old_wt + new_wt) + if adjust: + old_wt += new_wt + else: + old_wt = 1. + elif is_observation: + weighted_avg = cur + + output[i] = weighted_avg if (nobs >= minp) else NaN + + return output + + +# ---------------------------------------------------------------------- +# Exponentially weighted moving covariance + + +def ewmcov(float64_t[:] input_x, float64_t[:] input_y, + float64_t com, int adjust, bint ignore_na, int minp, int bias): + """ + Compute exponentially-weighted moving variance using center-of-mass. + + Parameters + ---------- + input_x : ndarray (float64 type) + input_y : ndarray (float64 type) + com : float64 + adjust: int + ignore_na: bool + minp: int + bias: int + + Returns + ------- + ndarray + """ + + cdef: + Py_ssize_t N = len(input_x) + float64_t alpha, old_wt_factor, new_wt, mean_x, mean_y, cov + float64_t sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y + float64_t numerator, denominator + Py_ssize_t i, nobs + ndarray[float64_t] output + bint is_observation + + if len(input_y) != N: + raise ValueError(f"arrays are of different lengths " + f"({N} and {len(input_y)})") + + output = np.empty(N, dtype=float) + if N == 0: + return output + + minp = max(minp, 1) + + alpha = 1. / (1. + com) + old_wt_factor = 1. - alpha + new_wt = 1. if adjust else alpha + + mean_x = input_x[0] + mean_y = input_y[0] + is_observation = ((mean_x == mean_x) and (mean_y == mean_y)) + nobs = int(is_observation) + if not is_observation: + mean_x = NaN + mean_y = NaN + output[0] = (0. if bias else NaN) if (nobs >= minp) else NaN + cov = 0. + sum_wt = 1. + sum_wt2 = 1. + old_wt = 1. + + with nogil: + + for i in range(1, N): + cur_x = input_x[i] + cur_y = input_y[i] + is_observation = ((cur_x == cur_x) and (cur_y == cur_y)) + nobs += is_observation + if mean_x == mean_x: + if is_observation or (not ignore_na): + sum_wt *= old_wt_factor + sum_wt2 *= (old_wt_factor * old_wt_factor) + old_wt *= old_wt_factor + if is_observation: + old_mean_x = mean_x + old_mean_y = mean_y + + # avoid numerical errors on constant series + if mean_x != cur_x: + mean_x = ((old_wt * old_mean_x) + + (new_wt * cur_x)) / (old_wt + new_wt) + + # avoid numerical errors on constant series + if mean_y != cur_y: + mean_y = ((old_wt * old_mean_y) + + (new_wt * cur_y)) / (old_wt + new_wt) + cov = ((old_wt * (cov + ((old_mean_x - mean_x) * + (old_mean_y - mean_y)))) + + (new_wt * ((cur_x - mean_x) * + (cur_y - mean_y)))) / (old_wt + new_wt) + sum_wt += new_wt + sum_wt2 += (new_wt * new_wt) + old_wt += new_wt + if not adjust: + sum_wt /= old_wt + sum_wt2 /= (old_wt * old_wt) + old_wt = 1. + elif is_observation: + mean_x = cur_x + mean_y = cur_y + + if nobs >= minp: + if not bias: + numerator = sum_wt * sum_wt + denominator = numerator - sum_wt2 + if (denominator > 0.): + output[i] = ((numerator / denominator) * cov) + else: + output[i] = NaN + else: + output[i] = cov + else: + output[i] = NaN + + return output diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx new file mode 100644 index 00000000..8a1e7feb --- /dev/null +++ b/pandas/_libs/window/indexers.pyx @@ -0,0 +1,109 @@ +# cython: boundscheck=False, wraparound=False, cdivision=True + +import numpy as np +from numpy cimport ndarray, int64_t + +# Cython routines for window indexers + + +def calculate_variable_window_bounds( + int64_t num_values, + int64_t window_size, + object min_periods, # unused but here to match get_window_bounds signature + object center, # unused but here to match get_window_bounds signature + object closed, + const int64_t[:] index +): + """ + Calculate window boundaries for rolling windows from a time offset. + + Parameters + ---------- + num_values : int64 + total number of values + + window_size : int64 + window size calculated from the offset + + min_periods : object + ignored, exists for compatibility + + center : object + ignored, exists for compatibility + + closed : str + string of side of the window that should be closed + + index : ndarray[int64] + time series index to roll over + + Returns + ------- + (ndarray[int64], ndarray[int64]) + """ + cdef: + bint left_closed = False + bint right_closed = False + int index_growth_sign = 1 + ndarray[int64_t, ndim=1] start, end + int64_t start_bound, end_bound + Py_ssize_t i, j + + # if windows is variable, default is 'right', otherwise default is 'both' + if closed is None: + closed = 'right' if index is not None else 'both' + + if closed in ['right', 'both']: + right_closed = True + + if closed in ['left', 'both']: + left_closed = True + + if index[num_values - 1] < index[0]: + index_growth_sign = -1 + + start = np.empty(num_values, dtype='int64') + start.fill(-1) + end = np.empty(num_values, dtype='int64') + end.fill(-1) + + start[0] = 0 + + # right endpoint is closed + if right_closed: + end[0] = 1 + # right endpoint is open + else: + end[0] = 0 + + with nogil: + + # start is start of slice interval (including) + # end is end of slice interval (not including) + for i in range(1, num_values): + end_bound = index[i] + start_bound = index[i] - index_growth_sign * window_size + + # left endpoint is closed + if left_closed: + start_bound -= 1 + + # advance the start bound until we are + # within the constraint + start[i] = i + for j in range(start[i - 1], i): + if (index[j] - start_bound) * index_growth_sign > 0: + start[i] = j + break + + # end bound is previous end + # or current index + if (index[end[i - 1]] - end_bound) * index_growth_sign <= 0: + end[i] = i + 1 + else: + end[i] = end[i - 1] + + # right endpoint is open + if not right_closed: + end[i] -= 1 + return start, end diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx new file mode 100644 index 00000000..e841ff78 --- /dev/null +++ b/pandas/_libs/writers.pyx @@ -0,0 +1,167 @@ +import cython +from cython import Py_ssize_t + +from cpython.bytes cimport PyBytes_GET_SIZE +from cpython.unicode cimport PyUnicode_GET_SIZE + +import numpy as np +from numpy cimport ndarray, uint8_t + + +ctypedef fused pandas_string: + str + bytes + + +@cython.boundscheck(False) +@cython.wraparound(False) +def write_csv_rows(list data, ndarray data_index, + Py_ssize_t nlevels, ndarray cols, object writer): + """ + Write the given data to the writer object, pre-allocating where possible + for performance improvements. + + Parameters + ---------- + data : list + data_index : ndarray + nlevels : int + cols : ndarray + writer : object + """ + # In crude testing, N>100 yields little marginal improvement + cdef: + Py_ssize_t i, j, k = len(data_index), N = 100, ncols = len(cols) + list rows + + # pre-allocate rows + rows = [[None] * (nlevels + ncols) for _ in range(N)] + + if nlevels == 1: + for j in range(k): + row = rows[j % N] + row[0] = data_index[j] + for i in range(ncols): + row[1 + i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + elif nlevels > 1: + for j in range(k): + row = rows[j % N] + row[:nlevels] = list(data_index[j]) + for i in range(ncols): + row[nlevels + i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + else: + for j in range(k): + row = rows[j % N] + for i in range(ncols): + row[i] = data[i][j] + + if j >= N - 1 and j % N == N - 1: + writer.writerows(rows) + + if j >= 0 and (j < N - 1 or (j % N) != N - 1): + writer.writerows(rows[:((j + 1) % N)]) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def convert_json_to_lines(arr: object) -> str: + """ + replace comma separated json with line feeds, paying special attention + to quotes & brackets + """ + cdef: + Py_ssize_t i = 0, num_open_brackets_seen = 0, length + bint in_quotes = 0, is_escaping = 0 + ndarray[uint8_t, ndim=1] narr + unsigned char val, newline, comma, left_bracket, right_bracket, quote + unsigned char backslash + + newline = ord('\n') + comma = ord(',') + left_bracket = ord('{') + right_bracket = ord('}') + quote = ord('"') + backslash = ord('\\') + + narr = np.frombuffer(arr.encode('utf-8'), dtype='u1').copy() + length = narr.shape[0] + for i in range(length): + val = narr[i] + if val == quote and i > 0 and not is_escaping: + in_quotes = ~in_quotes + if val == backslash or is_escaping: + is_escaping = ~is_escaping + if val == comma: # commas that should be \n + if num_open_brackets_seen == 0 and not in_quotes: + narr[i] = newline + elif val == left_bracket: + if not in_quotes: + num_open_brackets_seen += 1 + elif val == right_bracket: + if not in_quotes: + num_open_brackets_seen -= 1 + + return narr.tobytes().decode('utf-8') + + +# stata, pytables +@cython.boundscheck(False) +@cython.wraparound(False) +def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: + """ return the maximum size of elements in a 1-dim string array """ + cdef: + Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] + pandas_string val + + for i in range(length): + val = arr[i] + l = word_len(val) + + if l > m: + m = l + + return m + + +cpdef inline Py_ssize_t word_len(object val): + """ return the maximum length of a string or bytes value """ + cdef: + Py_ssize_t l = 0 + + if isinstance(val, str): + l = PyUnicode_GET_SIZE(val) + elif isinstance(val, bytes): + l = PyBytes_GET_SIZE(val) + + return l + +# ------------------------------------------------------------------ +# PyTables Helpers + + +@cython.boundscheck(False) +@cython.wraparound(False) +def string_array_replace_from_nan_rep( + ndarray[object, ndim=1] arr, object nan_rep, + object replace=None): + """ + Replace the values in the array with 'replacement' if + they are 'nan_rep'. Return the same array. + """ + cdef: + Py_ssize_t length = len(arr), i = 0 + + if replace is None: + replace = np.nan + + for i in range(length): + if arr[i] == nan_rep: + arr[i] = replace + + return arr diff --git a/pandas/_testing.py b/pandas/_testing.py new file mode 100644 index 00000000..ca378e5c --- /dev/null +++ b/pandas/_testing.py @@ -0,0 +1,2755 @@ +import bz2 +from collections import Counter +from contextlib import contextmanager +from datetime import datetime +from functools import wraps +import gzip +import os +from shutil import rmtree +import string +import tempfile +from typing import Any, List, Optional, Union, cast +import warnings +import zipfile + +import numpy as np +from numpy.random import rand, randn + +from pandas._config.localization import ( # noqa:F401 + can_set_locale, + get_locales, + set_locale, +) + +import pandas._libs.testing as _testing +from pandas._typing import FilePathOrBuffer, FrameOrSeries +from pandas.compat import _get_lzma_file, _import_lzma + +from pandas.core.dtypes.common import ( + is_bool, + is_categorical_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_extension_array_dtype, + is_interval_dtype, + is_list_like, + is_number, + is_period_dtype, + is_sequence, + is_timedelta64_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.missing import array_equivalent + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + RangeIndex, + Series, + bdate_range, +) +from pandas.core.algorithms import take_1d +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + IntervalArray, + PeriodArray, + TimedeltaArray, + period_array, +) + +from pandas.io.common import urlopen +from pandas.io.formats.printing import pprint_thing + +lzma = _import_lzma() + +N = 30 +K = 4 +_RAISE_NETWORK_ERROR_DEFAULT = False + +# set testing_mode +_testing_mode_warnings = (DeprecationWarning, ResourceWarning) + + +def set_testing_mode(): + # set the testing mode filters + testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") + if "deprecate" in testing_mode: + warnings.simplefilter("always", _testing_mode_warnings) + + +def reset_testing_mode(): + # reset the testing mode filters + testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") + if "deprecate" in testing_mode: + warnings.simplefilter("ignore", _testing_mode_warnings) + + +set_testing_mode() + + +def reset_display_options(): + """ + Reset the display options for printing and representing objects. + """ + pd.reset_option("^display.", silent=True) + + +def round_trip_pickle( + obj: Any, path: Optional[FilePathOrBuffer] = None +) -> FrameOrSeries: + """ + Pickle an object and then read it again. + + Parameters + ---------- + obj : any object + The object to pickle and then re-read. + path : str, path object or file-like object, default None + The path where the pickled object is written and then read. + + Returns + ------- + pandas object + The original object that was pickled and then re-read. + """ + _path = path + if _path is None: + _path = f"__{rands(10)}__.pickle" + with ensure_clean(_path) as temp_path: + pd.to_pickle(obj, temp_path) + return pd.read_pickle(temp_path) + + +def round_trip_pathlib(writer, reader, path: Optional[str] = None): + """ + Write an object to file specified by a pathlib.Path and read it back + + Parameters + ---------- + writer : callable bound to pandas object + IO writing function (e.g. DataFrame.to_csv ) + reader : callable + IO reading function (e.g. pd.read_csv ) + path : str, default None + The path where the object is written and then read. + + Returns + ------- + pandas object + The original object that was serialized and then re-read. + """ + import pytest + + Path = pytest.importorskip("pathlib").Path + if path is None: + path = "___pathlib___" + with ensure_clean(path) as path: + writer(Path(path)) + obj = reader(Path(path)) + return obj + + +def round_trip_localpath(writer, reader, path: Optional[str] = None): + """ + Write an object to file specified by a py.path LocalPath and read it back. + + Parameters + ---------- + writer : callable bound to pandas object + IO writing function (e.g. DataFrame.to_csv ) + reader : callable + IO reading function (e.g. pd.read_csv ) + path : str, default None + The path where the object is written and then read. + + Returns + ------- + pandas object + The original object that was serialized and then re-read. + """ + import pytest + + LocalPath = pytest.importorskip("py.path").local + if path is None: + path = "___localpath___" + with ensure_clean(path) as path: + writer(LocalPath(path)) + obj = reader(LocalPath(path)) + return obj + + +@contextmanager +def decompress_file(path, compression): + """ + Open a compressed file and return a file object. + + Parameters + ---------- + path : str + The path where the file is read from. + + compression : {'gzip', 'bz2', 'zip', 'xz', None} + Name of the decompression to use + + Returns + ------- + file object + """ + if compression is None: + f = open(path, "rb") + elif compression == "gzip": + f = gzip.open(path, "rb") + elif compression == "bz2": + f = bz2.BZ2File(path, "rb") + elif compression == "xz": + f = _get_lzma_file(lzma)(path, "rb") + elif compression == "zip": + zip_file = zipfile.ZipFile(path) + zip_names = zip_file.namelist() + if len(zip_names) == 1: + f = zip_file.open(zip_names.pop()) + else: + raise ValueError(f"ZIP file {path} error. Only one file per ZIP.") + else: + raise ValueError(f"Unrecognized compression type: {compression}") + + try: + yield f + finally: + f.close() + if compression == "zip": + zip_file.close() + + +def write_to_compressed(compression, path, data, dest="test"): + """ + Write data to a compressed file. + + Parameters + ---------- + compression : {'gzip', 'bz2', 'zip', 'xz'} + The compression type to use. + path : str + The file path to write the data. + data : str + The data to write. + dest : str, default "test" + The destination file (for ZIP only) + + Raises + ------ + ValueError : An invalid compression value was passed in. + """ + if compression == "zip": + import zipfile + + compress_method = zipfile.ZipFile + elif compression == "gzip": + import gzip + + compress_method = gzip.GzipFile + elif compression == "bz2": + import bz2 + + compress_method = bz2.BZ2File + elif compression == "xz": + compress_method = _get_lzma_file(lzma) + else: + raise ValueError(f"Unrecognized compression type: {compression}") + + if compression == "zip": + mode = "w" + args = (dest, data) + method = "writestr" + else: + mode = "wb" + args = (data,) + method = "write" + + with compress_method(path, mode=mode) as f: + getattr(f, method)(*args) + + +def assert_almost_equal( + left, + right, + check_dtype: Union[bool, str] = "equiv", + check_less_precise: Union[bool, int] = False, + **kwargs, +): + """ + Check that the left and right objects are approximately equal. + + By approximately equal, we refer to objects that are numbers or that + contain numbers which may be equivalent to specific levels of precision. + + Parameters + ---------- + left : object + right : object + check_dtype : bool or {'equiv'}, default 'equiv' + Check dtype if both a and b are the same type. If 'equiv' is passed in, + then `RangeIndex` and `Int64Index` are also considered equivalent + when doing type checking. + check_less_precise : bool or int, default False + Specify comparison precision. 5 digits (False) or 3 digits (True) + after decimal points are compared. If int, then specify the number + of digits to compare. + + When comparing two numbers, if the first number has magnitude less + than 1e-5, we compare the two numbers directly and check whether + they are equivalent within the specified precision. Otherwise, we + compare the **ratio** of the second number to the first number and + check whether it is equivalent to 1 within the specified precision. + """ + if isinstance(left, pd.Index): + assert_index_equal( + left, + right, + check_exact=False, + exact=check_dtype, + check_less_precise=check_less_precise, + **kwargs, + ) + + elif isinstance(left, pd.Series): + assert_series_equal( + left, + right, + check_exact=False, + check_dtype=check_dtype, + check_less_precise=check_less_precise, + **kwargs, + ) + + elif isinstance(left, pd.DataFrame): + assert_frame_equal( + left, + right, + check_exact=False, + check_dtype=check_dtype, + check_less_precise=check_less_precise, + **kwargs, + ) + + else: + # Other sequences. + if check_dtype: + if is_number(left) and is_number(right): + # Do not compare numeric classes, like np.float64 and float. + pass + elif is_bool(left) and is_bool(right): + # Do not compare bool classes, like np.bool_ and bool. + pass + else: + if isinstance(left, np.ndarray) or isinstance(right, np.ndarray): + obj = "numpy array" + else: + obj = "Input" + assert_class_equal(left, right, obj=obj) + _testing.assert_almost_equal( + left, + right, + check_dtype=check_dtype, + check_less_precise=check_less_precise, + **kwargs, + ) + + +def _check_isinstance(left, right, cls): + """ + Helper method for our assert_* methods that ensures that + the two objects being compared have the right type before + proceeding with the comparison. + + Parameters + ---------- + left : The first object being compared. + right : The second object being compared. + cls : The class type to check against. + + Raises + ------ + AssertionError : Either `left` or `right` is not an instance of `cls`. + """ + cls_name = cls.__name__ + + if not isinstance(left, cls): + raise AssertionError( + f"{cls_name} Expected type {cls}, found {type(left)} instead" + ) + if not isinstance(right, cls): + raise AssertionError( + f"{cls_name} Expected type {cls}, found {type(right)} instead" + ) + + +def assert_dict_equal(left, right, compare_keys: bool = True): + + _check_isinstance(left, right, dict) + _testing.assert_dict_equal(left, right, compare_keys=compare_keys) + + +def randbool(size=(), p: float = 0.5): + return rand(*size) <= p + + +RANDS_CHARS = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1)) +RANDU_CHARS = np.array( + list("".join(map(chr, range(1488, 1488 + 26))) + string.digits), + dtype=(np.unicode_, 1), +) + + +def rands_array(nchars, size, dtype="O"): + """ + Generate an array of byte strings. + """ + retval = ( + np.random.choice(RANDS_CHARS, size=nchars * np.prod(size)) + .view((np.str_, nchars)) + .reshape(size) + ) + if dtype is None: + return retval + else: + return retval.astype(dtype) + + +def randu_array(nchars, size, dtype="O"): + """ + Generate an array of unicode strings. + """ + retval = ( + np.random.choice(RANDU_CHARS, size=nchars * np.prod(size)) + .view((np.unicode_, nchars)) + .reshape(size) + ) + if dtype is None: + return retval + else: + return retval.astype(dtype) + + +def rands(nchars): + """ + Generate one random byte string. + + See `rands_array` if you want to create an array of random strings. + + """ + return "".join(np.random.choice(RANDS_CHARS, nchars)) + + +def randu(nchars): + """ + Generate one random unicode string. + + See `randu_array` if you want to create an array of random unicode strings. + + """ + return "".join(np.random.choice(RANDU_CHARS, nchars)) + + +def close(fignum=None): + from matplotlib.pyplot import get_fignums, close as _close + + if fignum is None: + for fignum in get_fignums(): + _close(fignum) + else: + _close(fignum) + + +# ----------------------------------------------------------------------------- +# contextmanager to ensure the file cleanup + + +@contextmanager +def ensure_clean(filename=None, return_filelike=False): + """ + Gets a temporary path and agrees to remove on close. + + Parameters + ---------- + filename : str (optional) + if None, creates a temporary file which is then removed when out of + scope. if passed, creates temporary file with filename as ending. + return_filelike : bool (default False) + if True, returns a file-like which is *always* cleaned. Necessary for + savefig and other functions which want to append extensions. + """ + filename = filename or "" + fd = None + + if return_filelike: + f = tempfile.TemporaryFile(suffix=filename) + try: + yield f + finally: + f.close() + else: + # don't generate tempfile if using a path with directory specified + if len(os.path.dirname(filename)): + raise ValueError("Can't pass a qualified name to ensure_clean()") + + try: + fd, filename = tempfile.mkstemp(suffix=filename) + except UnicodeEncodeError: + import pytest + + pytest.skip("no unicode file names on this system") + + try: + yield filename + finally: + try: + os.close(fd) + except OSError: + print(f"Couldn't close file descriptor: {fd} (file: {filename})") + try: + if os.path.exists(filename): + os.remove(filename) + except OSError as e: + print(f"Exception on removing file: {e}") + + +@contextmanager +def ensure_clean_dir(): + """ + Get a temporary directory path and agrees to remove on close. + + Yields + ------ + Temporary directory path + """ + directory_name = tempfile.mkdtemp(suffix="") + try: + yield directory_name + finally: + try: + rmtree(directory_name) + except OSError: + pass + + +@contextmanager +def ensure_safe_environment_variables(): + """ + Get a context manager to safely set environment variables + + All changes will be undone on close, hence environment variables set + within this contextmanager will neither persist nor change global state. + """ + saved_environ = dict(os.environ) + try: + yield + finally: + os.environ.clear() + os.environ.update(saved_environ) + + +# ----------------------------------------------------------------------------- +# Comparators + + +def equalContents(arr1, arr2) -> bool: + """ + Checks if the set of unique elements of arr1 and arr2 are equivalent. + """ + return frozenset(arr1) == frozenset(arr2) + + +def assert_index_equal( + left: Index, + right: Index, + exact: Union[bool, str] = "equiv", + check_names: bool = True, + check_less_precise: Union[bool, int] = False, + check_exact: bool = True, + check_categorical: bool = True, + obj: str = "Index", +) -> None: + """ + Check that left and right Index are equal. + + Parameters + ---------- + left : Index + right : Index + exact : bool or {'equiv'}, default 'equiv' + Whether to check the Index class, dtype and inferred_type + are identical. If 'equiv', then RangeIndex can be substituted for + Int64Index as well. + check_names : bool, default True + Whether to check the names attribute. + check_less_precise : bool or int, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + If int, then specify the digits to compare. + check_exact : bool, default True + Whether to compare number exactly. + check_categorical : bool, default True + Whether to compare internal Categorical exactly. + obj : str, default 'Index' + Specify object name being compared, internally used to show appropriate + assertion message. + """ + __tracebackhide__ = True + + def _check_types(l, r, obj="Index"): + if exact: + assert_class_equal(l, r, exact=exact, obj=obj) + + # Skip exact dtype checking when `check_categorical` is False + if check_categorical: + assert_attr_equal("dtype", l, r, obj=obj) + + # allow string-like to have different inferred_types + if l.inferred_type in ("string", "unicode"): + assert r.inferred_type in ("string", "unicode") + else: + assert_attr_equal("inferred_type", l, r, obj=obj) + + def _get_ilevel_values(index, level): + # accept level number only + unique = index.levels[level] + level_codes = index.codes[level] + filled = take_1d(unique._values, level_codes, fill_value=unique._na_value) + values = unique._shallow_copy(filled, name=index.names[level]) + return values + + # instance validation + _check_isinstance(left, right, Index) + + # class / dtype comparison + _check_types(left, right, obj=obj) + + # level comparison + if left.nlevels != right.nlevels: + msg1 = f"{obj} levels are different" + msg2 = f"{left.nlevels}, {left}" + msg3 = f"{right.nlevels}, {right}" + raise_assert_detail(obj, msg1, msg2, msg3) + + # length comparison + if len(left) != len(right): + msg1 = f"{obj} length are different" + msg2 = f"{len(left)}, {left}" + msg3 = f"{len(right)}, {right}" + raise_assert_detail(obj, msg1, msg2, msg3) + + # MultiIndex special comparison for little-friendly error messages + if left.nlevels > 1: + left = cast(MultiIndex, left) + right = cast(MultiIndex, right) + + for level in range(left.nlevels): + # cannot use get_level_values here because it can change dtype + llevel = _get_ilevel_values(left, level) + rlevel = _get_ilevel_values(right, level) + + lobj = f"MultiIndex level [{level}]" + assert_index_equal( + llevel, + rlevel, + exact=exact, + check_names=check_names, + check_less_precise=check_less_precise, + check_exact=check_exact, + obj=lobj, + ) + # get_level_values may change dtype + _check_types(left.levels[level], right.levels[level], obj=obj) + + # skip exact index checking when `check_categorical` is False + if check_exact and check_categorical: + if not left.equals(right): + diff = np.sum((left.values != right.values).astype(int)) * 100.0 / len(left) + msg = f"{obj} values are different ({np.round(diff, 5)} %)" + raise_assert_detail(obj, msg, left, right) + else: + _testing.assert_almost_equal( + left.values, + right.values, + check_less_precise=check_less_precise, + check_dtype=exact, + obj=obj, + lobj=left, + robj=right, + ) + + # metadata comparison + if check_names: + assert_attr_equal("names", left, right, obj=obj) + if isinstance(left, pd.PeriodIndex) or isinstance(right, pd.PeriodIndex): + assert_attr_equal("freq", left, right, obj=obj) + if isinstance(left, pd.IntervalIndex) or isinstance(right, pd.IntervalIndex): + assert_interval_array_equal(left.values, right.values) + + if check_categorical: + if is_categorical_dtype(left) or is_categorical_dtype(right): + assert_categorical_equal(left.values, right.values, obj=f"{obj} category") + + +def assert_class_equal(left, right, exact: Union[bool, str] = True, obj="Input"): + """ + Checks classes are equal. + """ + __tracebackhide__ = True + + def repr_class(x): + if isinstance(x, Index): + # return Index as it is to include values in the error message + return x + + try: + return type(x).__name__ + except AttributeError: + return repr(type(x)) + + if exact == "equiv": + if type(left) != type(right): + # allow equivalence of Int64Index/RangeIndex + types = {type(left).__name__, type(right).__name__} + if len(types - {"Int64Index", "RangeIndex"}): + msg = f"{obj} classes are not equivalent" + raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) + elif exact: + if type(left) != type(right): + msg = f"{obj} classes are different" + raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) + + +def assert_attr_equal(attr, left, right, obj="Attributes"): + """checks attributes are equal. Both objects must have attribute. + + Parameters + ---------- + attr : str + Attribute name being compared. + left : object + right : object + obj : str, default 'Attributes' + Specify object name being compared, internally used to show appropriate + assertion message + """ + __tracebackhide__ = True + + left_attr = getattr(left, attr) + right_attr = getattr(right, attr) + + if left_attr is right_attr: + return True + elif ( + is_number(left_attr) + and np.isnan(left_attr) + and is_number(right_attr) + and np.isnan(right_attr) + ): + # np.nan + return True + + try: + result = left_attr == right_attr + except TypeError: + # datetimetz on rhs may raise TypeError + result = False + if not isinstance(result, bool): + result = result.all() + + if result: + return True + else: + msg = f'Attribute "{attr}" are different' + raise_assert_detail(obj, msg, left_attr, right_attr) + + +def assert_is_valid_plot_return_object(objs): + import matplotlib.pyplot as plt + + if isinstance(objs, (pd.Series, np.ndarray)): + for el in objs.ravel(): + msg = ( + "one of 'objs' is not a matplotlib Axes instance, " + f"type encountered {repr(type(el).__name__)}" + ) + assert isinstance(el, (plt.Axes, dict)), msg + else: + msg = ( + "objs is neither an ndarray of Artist instances nor a single " + "ArtistArtist instance, tuple, or dict, 'objs' is a " + f"{repr(type(objs).__name__)}" + ) + assert isinstance(objs, (plt.Artist, tuple, dict)), msg + + +def isiterable(obj): + return hasattr(obj, "__iter__") + + +def assert_is_sorted(seq): + """Assert that the sequence is sorted.""" + if isinstance(seq, (Index, Series)): + seq = seq.values + # sorting does not change precisions + assert_numpy_array_equal(seq, np.sort(np.array(seq))) + + +def assert_categorical_equal( + left, right, check_dtype=True, check_category_order=True, obj="Categorical" +): + """Test that Categoricals are equivalent. + + Parameters + ---------- + left : Categorical + right : Categorical + check_dtype : bool, default True + Check that integer dtype of the codes are the same + check_category_order : bool, default True + Whether the order of the categories should be compared, which + implies identical integer codes. If False, only the resulting + values are compared. The ordered attribute is + checked regardless. + obj : str, default 'Categorical' + Specify object name being compared, internally used to show appropriate + assertion message + """ + _check_isinstance(left, right, Categorical) + + if check_category_order: + assert_index_equal(left.categories, right.categories, obj=f"{obj}.categories") + assert_numpy_array_equal( + left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes", + ) + else: + assert_index_equal( + left.categories.sort_values(), + right.categories.sort_values(), + obj=f"{obj}.categories", + ) + assert_index_equal( + left.categories.take(left.codes), + right.categories.take(right.codes), + obj=f"{obj}.values", + ) + + assert_attr_equal("ordered", left, right, obj=obj) + + +def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray"): + """Test that two IntervalArrays are equivalent. + + Parameters + ---------- + left, right : IntervalArray + The IntervalArrays to compare. + exact : bool or {'equiv'}, default 'equiv' + Whether to check the Index class, dtype and inferred_type + are identical. If 'equiv', then RangeIndex can be substituted for + Int64Index as well. + obj : str, default 'IntervalArray' + Specify object name being compared, internally used to show appropriate + assertion message + """ + _check_isinstance(left, right, IntervalArray) + + assert_index_equal(left.left, right.left, exact=exact, obj=f"{obj}.left") + assert_index_equal(left.right, right.right, exact=exact, obj=f"{obj}.left") + assert_attr_equal("closed", left, right, obj=obj) + + +def assert_period_array_equal(left, right, obj="PeriodArray"): + _check_isinstance(left, right, PeriodArray) + + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}.values") + assert_attr_equal("freq", left, right, obj=obj) + + +def assert_datetime_array_equal(left, right, obj="DatetimeArray"): + __tracebackhide__ = True + _check_isinstance(left, right, DatetimeArray) + + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") + assert_attr_equal("freq", left, right, obj=obj) + assert_attr_equal("tz", left, right, obj=obj) + + +def assert_timedelta_array_equal(left, right, obj="TimedeltaArray"): + __tracebackhide__ = True + _check_isinstance(left, right, TimedeltaArray) + assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") + assert_attr_equal("freq", left, right, obj=obj) + + +def raise_assert_detail(obj, message, left, right, diff=None): + __tracebackhide__ = True + + if isinstance(left, np.ndarray): + left = pprint_thing(left) + elif is_categorical_dtype(left): + left = repr(left) + + if isinstance(right, np.ndarray): + right = pprint_thing(right) + elif is_categorical_dtype(right): + right = repr(right) + + msg = f"""{obj} are different + +{message} +[left]: {left} +[right]: {right}""" + + if diff is not None: + msg += f"\n[diff]: {diff}" + + raise AssertionError(msg) + + +def assert_numpy_array_equal( + left, + right, + strict_nan=False, + check_dtype=True, + err_msg=None, + check_same=None, + obj="numpy array", +): + """ + Check that 'np.ndarray' is equivalent. + + Parameters + ---------- + left, right : numpy.ndarray or iterable + The two arrays to be compared. + strict_nan : bool, default False + If True, consider NaN and None to be different. + check_dtype : bool, default True + Check dtype if both a and b are np.ndarray. + err_msg : str, default None + If provided, used as assertion message. + check_same : None|'copy'|'same', default None + Ensure left and right refer/do not refer to the same memory area. + obj : str, default 'numpy array' + Specify object name being compared, internally used to show appropriate + assertion message. + """ + __tracebackhide__ = True + + # instance validation + # Show a detailed error message when classes are different + assert_class_equal(left, right, obj=obj) + # both classes must be an np.ndarray + _check_isinstance(left, right, np.ndarray) + + def _get_base(obj): + return obj.base if getattr(obj, "base", None) is not None else obj + + left_base = _get_base(left) + right_base = _get_base(right) + + if check_same == "same": + if left_base is not right_base: + raise AssertionError(f"{repr(left_base)} is not {repr(right_base)}") + elif check_same == "copy": + if left_base is right_base: + raise AssertionError(f"{repr(left_base)} is {repr(right_base)}") + + def _raise(left, right, err_msg): + if err_msg is None: + if left.shape != right.shape: + raise_assert_detail( + obj, f"{obj} shapes are different", left.shape, right.shape, + ) + + diff = 0 + for l, r in zip(left, right): + # count up differences + if not array_equivalent(l, r, strict_nan=strict_nan): + diff += 1 + + diff = diff * 100.0 / left.size + msg = f"{obj} values are different ({np.round(diff, 5)} %)" + raise_assert_detail(obj, msg, left, right) + + raise AssertionError(err_msg) + + # compare shape and values + if not array_equivalent(left, right, strict_nan=strict_nan): + _raise(left, right, err_msg) + + if check_dtype: + if isinstance(left, np.ndarray) and isinstance(right, np.ndarray): + assert_attr_equal("dtype", left, right, obj=obj) + + +def assert_extension_array_equal( + left, right, check_dtype=True, check_less_precise=False, check_exact=False +): + """Check that left and right ExtensionArrays are equal. + + Parameters + ---------- + left, right : ExtensionArray + The two arrays to compare + check_dtype : bool, default True + Whether to check if the ExtensionArray dtypes are identical. + check_less_precise : bool or int, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + If int, then specify the digits to compare. + check_exact : bool, default False + Whether to compare number exactly. + + Notes + ----- + Missing values are checked separately from valid values. + A mask of missing values is computed for each and checked to match. + The remaining all-valid values are cast to object dtype and checked. + """ + assert isinstance(left, ExtensionArray), "left is not an ExtensionArray" + assert isinstance(right, ExtensionArray), "right is not an ExtensionArray" + if check_dtype: + assert_attr_equal("dtype", left, right, obj="ExtensionArray") + + if hasattr(left, "asi8") and type(right) == type(left): + # Avoid slow object-dtype comparisons + assert_numpy_array_equal(left.asi8, right.asi8) + return + + left_na = np.asarray(left.isna()) + right_na = np.asarray(right.isna()) + assert_numpy_array_equal(left_na, right_na, obj="ExtensionArray NA mask") + + left_valid = np.asarray(left[~left_na].astype(object)) + right_valid = np.asarray(right[~right_na].astype(object)) + if check_exact: + assert_numpy_array_equal(left_valid, right_valid, obj="ExtensionArray") + else: + _testing.assert_almost_equal( + left_valid, + right_valid, + check_dtype=check_dtype, + check_less_precise=check_less_precise, + obj="ExtensionArray", + ) + + +# This could be refactored to use the NDFrame.equals method +def assert_series_equal( + left, + right, + check_dtype=True, + check_index_type="equiv", + check_series_type=True, + check_less_precise=False, + check_names=True, + check_exact=False, + check_datetimelike_compat=False, + check_categorical=True, + check_category_order=True, + obj="Series", +): + """ + Check that left and right Series are equal. + + Parameters + ---------- + left : Series + right : Series + check_dtype : bool, default True + Whether to check the Series dtype is identical. + check_index_type : bool or {'equiv'}, default 'equiv' + Whether to check the Index class, dtype and inferred_type + are identical. + check_series_type : bool, default True + Whether to check the Series class is identical. + check_less_precise : bool or int, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + If int, then specify the digits to compare. + + When comparing two numbers, if the first number has magnitude less + than 1e-5, we compare the two numbers directly and check whether + they are equivalent within the specified precision. Otherwise, we + compare the **ratio** of the second number to the first number and + check whether it is equivalent to 1 within the specified precision. + check_names : bool, default True + Whether to check the Series and Index names attribute. + check_exact : bool, default False + Whether to compare number exactly. + check_datetimelike_compat : bool, default False + Compare datetime-like which is comparable ignoring dtype. + check_categorical : bool, default True + Whether to compare internal Categorical exactly. + check_category_order : bool, default True + Whether to compare category order of internal Categoricals + + .. versionadded:: 1.0.2 + obj : str, default 'Series' + Specify object name being compared, internally used to show appropriate + assertion message. + """ + __tracebackhide__ = True + + # instance validation + _check_isinstance(left, right, Series) + + if check_series_type: + # ToDo: There are some tests using rhs is sparse + # lhs is dense. Should use assert_class_equal in future + assert isinstance(left, type(right)) + # assert_class_equal(left, right, obj=obj) + + # length comparison + if len(left) != len(right): + msg1 = f"{len(left)}, {left.index}" + msg2 = f"{len(right)}, {right.index}" + raise_assert_detail(obj, "Series length are different", msg1, msg2) + + # index comparison + assert_index_equal( + left.index, + right.index, + exact=check_index_type, + check_names=check_names, + check_less_precise=check_less_precise, + check_exact=check_exact, + check_categorical=check_categorical, + obj=f"{obj}.index", + ) + + if check_dtype: + # We want to skip exact dtype checking when `check_categorical` + # is False. We'll still raise if only one is a `Categorical`, + # regardless of `check_categorical` + if ( + is_categorical_dtype(left) + and is_categorical_dtype(right) + and not check_categorical + ): + pass + else: + assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") + + if check_exact: + assert_numpy_array_equal( + left._internal_get_values(), + right._internal_get_values(), + check_dtype=check_dtype, + obj=str(obj), + ) + elif check_datetimelike_compat: + # we want to check only if we have compat dtypes + # e.g. integer and M|m are NOT compat, but we can simply check + # the values in that case + if needs_i8_conversion(left) or needs_i8_conversion(right): + + # datetimelike may have different objects (e.g. datetime.datetime + # vs Timestamp) but will compare equal + if not Index(left.values).equals(Index(right.values)): + msg = ( + f"[datetimelike_compat=True] {left.values} " + f"is not equal to {right.values}." + ) + raise AssertionError(msg) + else: + assert_numpy_array_equal( + left._internal_get_values(), + right._internal_get_values(), + check_dtype=check_dtype, + ) + elif is_interval_dtype(left) or is_interval_dtype(right): + assert_interval_array_equal(left.array, right.array) + elif is_extension_array_dtype(left.dtype) and is_datetime64tz_dtype(left.dtype): + # .values is an ndarray, but ._values is the ExtensionArray. + # TODO: Use .array + assert is_extension_array_dtype(right.dtype) + assert_extension_array_equal(left._values, right._values) + elif ( + is_extension_array_dtype(left) + and not is_categorical_dtype(left) + and is_extension_array_dtype(right) + and not is_categorical_dtype(right) + ): + assert_extension_array_equal(left.array, right.array) + else: + _testing.assert_almost_equal( + left._internal_get_values(), + right._internal_get_values(), + check_less_precise=check_less_precise, + check_dtype=check_dtype, + obj=str(obj), + ) + + # metadata comparison + if check_names: + assert_attr_equal("name", left, right, obj=obj) + + if check_categorical: + if is_categorical_dtype(left) or is_categorical_dtype(right): + assert_categorical_equal( + left.values, + right.values, + obj=f"{obj} category", + check_category_order=check_category_order, + ) + + +# This could be refactored to use the NDFrame.equals method +def assert_frame_equal( + left, + right, + check_dtype=True, + check_index_type="equiv", + check_column_type="equiv", + check_frame_type=True, + check_less_precise=False, + check_names=True, + by_blocks=False, + check_exact=False, + check_datetimelike_compat=False, + check_categorical=True, + check_like=False, + obj="DataFrame", +): + """ + Check that left and right DataFrame are equal. + + This function is intended to compare two DataFrames and output any + differences. Is is mostly intended for use in unit tests. + Additional parameters allow varying the strictness of the + equality checks performed. + + Parameters + ---------- + left : DataFrame + First DataFrame to compare. + right : DataFrame + Second DataFrame to compare. + check_dtype : bool, default True + Whether to check the DataFrame dtype is identical. + check_index_type : bool or {'equiv'}, default 'equiv' + Whether to check the Index class, dtype and inferred_type + are identical. + check_column_type : bool or {'equiv'}, default 'equiv' + Whether to check the columns class, dtype and inferred_type + are identical. Is passed as the ``exact`` argument of + :func:`assert_index_equal`. + check_frame_type : bool, default True + Whether to check the DataFrame class is identical. + check_less_precise : bool or int, default False + Specify comparison precision. Only used when check_exact is False. + 5 digits (False) or 3 digits (True) after decimal points are compared. + If int, then specify the digits to compare. + + When comparing two numbers, if the first number has magnitude less + than 1e-5, we compare the two numbers directly and check whether + they are equivalent within the specified precision. Otherwise, we + compare the **ratio** of the second number to the first number and + check whether it is equivalent to 1 within the specified precision. + check_names : bool, default True + Whether to check that the `names` attribute for both the `index` + and `column` attributes of the DataFrame is identical. + by_blocks : bool, default False + Specify how to compare internal data. If False, compare by columns. + If True, compare by blocks. + check_exact : bool, default False + Whether to compare number exactly. + check_datetimelike_compat : bool, default False + Compare datetime-like which is comparable ignoring dtype. + check_categorical : bool, default True + Whether to compare internal Categorical exactly. + check_like : bool, default False + If True, ignore the order of index & columns. + Note: index labels must match their respective rows + (same as in columns) - same labels must be with the same data. + obj : str, default 'DataFrame' + Specify object name being compared, internally used to show appropriate + assertion message. + + See Also + -------- + assert_series_equal : Equivalent method for asserting Series equality. + DataFrame.equals : Check DataFrame equality. + + Examples + -------- + This example shows comparing two DataFrames that are equal + but with columns of differing dtypes. + + >>> from pandas._testing import assert_frame_equal + >>> df1 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) + >>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]}) + + df1 equals itself. + + >>> assert_frame_equal(df1, df1) + + df1 differs from df2 as column 'b' is of a different type. + + >>> assert_frame_equal(df1, df2) + Traceback (most recent call last): + ... + AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="b") are different + + Attribute "dtype" are different + [left]: int64 + [right]: float64 + + Ignore differing dtypes in columns with check_dtype. + + >>> assert_frame_equal(df1, df2, check_dtype=False) + """ + __tracebackhide__ = True + + # instance validation + _check_isinstance(left, right, DataFrame) + + if check_frame_type: + assert isinstance(left, type(right)) + # assert_class_equal(left, right, obj=obj) + + # shape comparison + if left.shape != right.shape: + raise_assert_detail( + obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}", + ) + + if check_like: + left, right = left.reindex_like(right), right + + # index comparison + assert_index_equal( + left.index, + right.index, + exact=check_index_type, + check_names=check_names, + check_less_precise=check_less_precise, + check_exact=check_exact, + check_categorical=check_categorical, + obj=f"{obj}.index", + ) + + # column comparison + assert_index_equal( + left.columns, + right.columns, + exact=check_column_type, + check_names=check_names, + check_less_precise=check_less_precise, + check_exact=check_exact, + check_categorical=check_categorical, + obj=f"{obj}.columns", + ) + + # compare by blocks + if by_blocks: + rblocks = right._to_dict_of_blocks() + lblocks = left._to_dict_of_blocks() + for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))): + assert dtype in lblocks + assert dtype in rblocks + assert_frame_equal( + lblocks[dtype], rblocks[dtype], check_dtype=check_dtype, obj=obj + ) + + # compare by columns + else: + for i, col in enumerate(left.columns): + assert col in right + lcol = left.iloc[:, i] + rcol = right.iloc[:, i] + assert_series_equal( + lcol, + rcol, + check_dtype=check_dtype, + check_index_type=check_index_type, + check_less_precise=check_less_precise, + check_exact=check_exact, + check_names=check_names, + check_datetimelike_compat=check_datetimelike_compat, + check_categorical=check_categorical, + obj=f'{obj}.iloc[:, {i}] (column name="{col}")', + ) + + +def assert_equal(left, right, **kwargs): + """ + Wrapper for tm.assert_*_equal to dispatch to the appropriate test function. + + Parameters + ---------- + left, right : Index, Series, DataFrame, ExtensionArray, or np.ndarray + The two items to be compared. + **kwargs + All keyword arguments are passed through to the underlying assert method. + """ + __tracebackhide__ = True + + if isinstance(left, pd.Index): + assert_index_equal(left, right, **kwargs) + elif isinstance(left, pd.Series): + assert_series_equal(left, right, **kwargs) + elif isinstance(left, pd.DataFrame): + assert_frame_equal(left, right, **kwargs) + elif isinstance(left, IntervalArray): + assert_interval_array_equal(left, right, **kwargs) + elif isinstance(left, PeriodArray): + assert_period_array_equal(left, right, **kwargs) + elif isinstance(left, DatetimeArray): + assert_datetime_array_equal(left, right, **kwargs) + elif isinstance(left, TimedeltaArray): + assert_timedelta_array_equal(left, right, **kwargs) + elif isinstance(left, ExtensionArray): + assert_extension_array_equal(left, right, **kwargs) + elif isinstance(left, np.ndarray): + assert_numpy_array_equal(left, right, **kwargs) + elif isinstance(left, str): + assert kwargs == {} + assert left == right + else: + raise NotImplementedError(type(left)) + + +def box_expected(expected, box_cls, transpose=True): + """ + Helper function to wrap the expected output of a test in a given box_class. + + Parameters + ---------- + expected : np.ndarray, Index, Series + box_cls : {Index, Series, DataFrame} + + Returns + ------- + subclass of box_cls + """ + if box_cls is pd.Index: + expected = pd.Index(expected) + elif box_cls is pd.Series: + expected = pd.Series(expected) + elif box_cls is pd.DataFrame: + expected = pd.Series(expected).to_frame() + if transpose: + # for vector operations, we we need a DataFrame to be a single-row, + # not a single-column, in order to operate against non-DataFrame + # vectors of the same length. + expected = expected.T + elif box_cls is PeriodArray: + # the PeriodArray constructor is not as flexible as period_array + expected = period_array(expected) + elif box_cls is DatetimeArray: + expected = DatetimeArray(expected) + elif box_cls is TimedeltaArray: + expected = TimedeltaArray(expected) + elif box_cls is np.ndarray: + expected = np.array(expected) + elif box_cls is to_array: + expected = to_array(expected) + else: + raise NotImplementedError(box_cls) + return expected + + +def to_array(obj): + # temporary implementation until we get pd.array in place + if is_period_dtype(obj): + return period_array(obj) + elif is_datetime64_dtype(obj) or is_datetime64tz_dtype(obj): + return DatetimeArray._from_sequence(obj) + elif is_timedelta64_dtype(obj): + return TimedeltaArray._from_sequence(obj) + else: + return np.array(obj) + + +# ----------------------------------------------------------------------------- +# Sparse + + +def assert_sp_array_equal( + left, + right, + check_dtype=True, + check_kind=True, + check_fill_value=True, + consolidate_block_indices=False, +): + """Check that the left and right SparseArray are equal. + + Parameters + ---------- + left : SparseArray + right : SparseArray + check_dtype : bool, default True + Whether to check the data dtype is identical. + check_kind : bool, default True + Whether to just the kind of the sparse index for each column. + check_fill_value : bool, default True + Whether to check that left.fill_value matches right.fill_value + consolidate_block_indices : bool, default False + Whether to consolidate contiguous blocks for sparse arrays with + a BlockIndex. Some operations, e.g. concat, will end up with + block indices that could be consolidated. Setting this to true will + create a new BlockIndex for that array, with consolidated + block indices. + """ + + _check_isinstance(left, right, pd.arrays.SparseArray) + + assert_numpy_array_equal(left.sp_values, right.sp_values, check_dtype=check_dtype) + + # SparseIndex comparison + assert isinstance(left.sp_index, pd._libs.sparse.SparseIndex) + assert isinstance(right.sp_index, pd._libs.sparse.SparseIndex) + + if not check_kind: + left_index = left.sp_index.to_block_index() + right_index = right.sp_index.to_block_index() + else: + left_index = left.sp_index + right_index = right.sp_index + + if consolidate_block_indices and left.kind == "block": + # we'll probably remove this hack... + left_index = left_index.to_int_index().to_block_index() + right_index = right_index.to_int_index().to_block_index() + + if not left_index.equals(right_index): + raise_assert_detail( + "SparseArray.index", "index are not equal", left_index, right_index + ) + else: + # Just ensure a + pass + + if check_fill_value: + assert_attr_equal("fill_value", left, right) + if check_dtype: + assert_attr_equal("dtype", left, right) + assert_numpy_array_equal(left.to_dense(), right.to_dense(), check_dtype=check_dtype) + + +# ----------------------------------------------------------------------------- +# Others + + +def assert_contains_all(iterable, dic): + for k in iterable: + assert k in dic, f"Did not contain item: {repr(k)}" + + +def assert_copy(iter1, iter2, **eql_kwargs): + """ + iter1, iter2: iterables that produce elements + comparable with assert_almost_equal + + Checks that the elements are equal, but not + the same object. (Does not check that items + in sequences are also not the same object) + """ + for elem1, elem2 in zip(iter1, iter2): + assert_almost_equal(elem1, elem2, **eql_kwargs) + msg = ( + f"Expected object {repr(type(elem1))} and object {repr(type(elem2))} to be " + "different objects, but they were the same object." + ) + assert elem1 is not elem2, msg + + +def getCols(k): + return string.ascii_uppercase[:k] + + +# make index +def makeStringIndex(k=10, name=None): + return Index(rands_array(nchars=10, size=k), name=name) + + +def makeUnicodeIndex(k=10, name=None): + return Index(randu_array(nchars=10, size=k), name=name) + + +def makeCategoricalIndex(k=10, n=3, name=None, **kwargs): + """ make a length k index or n categories """ + x = rands_array(nchars=4, size=n) + return CategoricalIndex( + Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs + ) + + +def makeIntervalIndex(k=10, name=None, **kwargs): + """ make a length k IntervalIndex """ + x = np.linspace(0, 100, num=(k + 1)) + return IntervalIndex.from_breaks(x, name=name, **kwargs) + + +def makeBoolIndex(k=10, name=None): + if k == 1: + return Index([True], name=name) + elif k == 2: + return Index([False, True], name=name) + return Index([False, True] + [False] * (k - 2), name=name) + + +def makeIntIndex(k=10, name=None): + return Index(list(range(k)), name=name) + + +def makeUIntIndex(k=10, name=None): + return Index([2 ** 63 + i for i in range(k)], name=name) + + +def makeRangeIndex(k=10, name=None, **kwargs): + return RangeIndex(0, k, 1, name=name, **kwargs) + + +def makeFloatIndex(k=10, name=None): + values = sorted(np.random.random_sample(k)) - np.random.random_sample(1) + return Index(values * (10 ** np.random.randint(0, 9)), name=name) + + +def makeDateIndex(k=10, freq="B", name=None, **kwargs): + dt = datetime(2000, 1, 1) + dr = bdate_range(dt, periods=k, freq=freq, name=name) + return DatetimeIndex(dr, name=name, **kwargs) + + +def makeTimedeltaIndex(k=10, freq="D", name=None, **kwargs): + return pd.timedelta_range(start="1 day", periods=k, freq=freq, name=name, **kwargs) + + +def makePeriodIndex(k=10, name=None, **kwargs): + dt = datetime(2000, 1, 1) + dr = pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs) + return dr + + +def makeMultiIndex(k=10, names=None, **kwargs): + return MultiIndex.from_product((("foo", "bar"), (1, 2)), names=names, **kwargs) + + +_names = [ + "Alice", + "Bob", + "Charlie", + "Dan", + "Edith", + "Frank", + "George", + "Hannah", + "Ingrid", + "Jerry", + "Kevin", + "Laura", + "Michael", + "Norbert", + "Oliver", + "Patricia", + "Quinn", + "Ray", + "Sarah", + "Tim", + "Ursula", + "Victor", + "Wendy", + "Xavier", + "Yvonne", + "Zelda", +] + + +def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None): + """ + Make a DataFrame with a DatetimeIndex + + Parameters + ---------- + start : str or Timestamp, default "2000-01-01" + The start of the index. Passed to date_range with `freq`. + end : str or Timestamp, default "2000-12-31" + The end of the index. Passed to date_range with `freq`. + freq : str or Freq + The frequency to use for the DatetimeIndex + seed : int, optional + The random state seed. + + * name : object dtype with string names + * id : int dtype with + * x, y : float dtype + + Examples + -------- + >>> _make_timeseries() + id name x y + timestamp + 2000-01-01 982 Frank 0.031261 0.986727 + 2000-01-02 1025 Edith -0.086358 -0.032920 + 2000-01-03 982 Edith 0.473177 0.298654 + 2000-01-04 1009 Sarah 0.534344 -0.750377 + 2000-01-05 963 Zelda -0.271573 0.054424 + ... ... ... ... ... + 2000-12-27 980 Ingrid -0.132333 -0.422195 + 2000-12-28 972 Frank -0.376007 -0.298687 + 2000-12-29 1009 Ursula -0.865047 -0.503133 + 2000-12-30 1000 Hannah -0.063757 -0.507336 + 2000-12-31 972 Tim -0.869120 0.531685 + """ + index = pd.date_range(start=start, end=end, freq=freq, name="timestamp") + n = len(index) + state = np.random.RandomState(seed) + columns = { + "name": state.choice(_names, size=n), + "id": state.poisson(1000, size=n), + "x": state.rand(n) * 2 - 1, + "y": state.rand(n) * 2 - 1, + } + df = pd.DataFrame(columns, index=index, columns=sorted(columns)) + if df.index[-1] == end: + df = df.iloc[:-1] + return df + + +def all_index_generator(k=10): + """Generator which can be iterated over to get instances of all the various + index classes. + + Parameters + ---------- + k: length of each of the index instances + """ + all_make_index_funcs = [ + makeIntIndex, + makeFloatIndex, + makeStringIndex, + makeUnicodeIndex, + makeDateIndex, + makePeriodIndex, + makeTimedeltaIndex, + makeBoolIndex, + makeRangeIndex, + makeIntervalIndex, + makeCategoricalIndex, + ] + for make_index_func in all_make_index_funcs: + yield make_index_func(k=k) + + +def index_subclass_makers_generator(): + make_index_funcs = [ + makeDateIndex, + makePeriodIndex, + makeTimedeltaIndex, + makeRangeIndex, + makeIntervalIndex, + makeCategoricalIndex, + makeMultiIndex, + ] + for make_index_func in make_index_funcs: + yield make_index_func + + +def all_timeseries_index_generator(k=10): + """Generator which can be iterated over to get instances of all the classes + which represent time-series. + + Parameters + ---------- + k: length of each of the index instances + """ + make_index_funcs = [makeDateIndex, makePeriodIndex, makeTimedeltaIndex] + for make_index_func in make_index_funcs: + yield make_index_func(k=k) + + +# make series +def makeFloatSeries(name=None): + index = makeStringIndex(N) + return Series(randn(N), index=index, name=name) + + +def makeStringSeries(name=None): + index = makeStringIndex(N) + return Series(randn(N), index=index, name=name) + + +def makeObjectSeries(name=None): + data = makeStringIndex(N) + data = Index(data, dtype=object) + index = makeStringIndex(N) + return Series(data, index=index, name=name) + + +def getSeriesData(): + index = makeStringIndex(N) + return {c: Series(randn(N), index=index) for c in getCols(K)} + + +def makeTimeSeries(nper=None, freq="B", name=None): + if nper is None: + nper = N + return Series(randn(nper), index=makeDateIndex(nper, freq=freq), name=name) + + +def makePeriodSeries(nper=None, name=None): + if nper is None: + nper = N + return Series(randn(nper), index=makePeriodIndex(nper), name=name) + + +def getTimeSeriesData(nper=None, freq="B"): + return {c: makeTimeSeries(nper, freq) for c in getCols(K)} + + +def getPeriodData(nper=None): + return {c: makePeriodSeries(nper) for c in getCols(K)} + + +# make frame +def makeTimeDataFrame(nper=None, freq="B"): + data = getTimeSeriesData(nper, freq) + return DataFrame(data) + + +def makeDataFrame(): + data = getSeriesData() + return DataFrame(data) + + +def getMixedTypeDict(): + index = Index(["a", "b", "c", "d", "e"]) + + data = { + "A": [0.0, 1.0, 2.0, 3.0, 4.0], + "B": [0.0, 1.0, 0.0, 1.0, 0.0], + "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], + "D": bdate_range("1/1/2009", periods=5), + } + + return index, data + + +def makeMixedDataFrame(): + return DataFrame(getMixedTypeDict()[1]) + + +def makePeriodFrame(nper=None): + data = getPeriodData(nper) + return DataFrame(data) + + +def makeCustomIndex( + nentries, nlevels, prefix="#", names=False, ndupe_l=None, idx_type=None +): + """Create an index/multindex with given dimensions, levels, names, etc' + + nentries - number of entries in index + nlevels - number of levels (> 1 produces multindex) + prefix - a string prefix for labels + names - (Optional), bool or list of strings. if True will use default + names, if false will use no names, if a list is given, the name of + each level in the index will be taken from the list. + ndupe_l - (Optional), list of ints, the number of rows for which the + label will repeated at the corresponding level, you can specify just + the first few, the rest will use the default ndupe_l of 1. + len(ndupe_l) <= nlevels. + idx_type - "i"/"f"/"s"/"u"/"dt"/"p"/"td". + If idx_type is not None, `idx_nlevels` must be 1. + "i"/"f" creates an integer/float index, + "s"/"u" creates a string/unicode index + "dt" create a datetime index. + "td" create a datetime index. + + if unspecified, string labels will be generated. + """ + + if ndupe_l is None: + ndupe_l = [1] * nlevels + assert is_sequence(ndupe_l) and len(ndupe_l) <= nlevels + assert names is None or names is False or names is True or len(names) is nlevels + assert idx_type is None or ( + idx_type in ("i", "f", "s", "u", "dt", "p", "td") and nlevels == 1 + ) + + if names is True: + # build default names + names = [prefix + str(i) for i in range(nlevels)] + if names is False: + # pass None to index constructor for no name + names = None + + # make singleton case uniform + if isinstance(names, str) and nlevels == 1: + names = [names] + + # specific 1D index type requested? + idx_func = dict( + i=makeIntIndex, + f=makeFloatIndex, + s=makeStringIndex, + u=makeUnicodeIndex, + dt=makeDateIndex, + td=makeTimedeltaIndex, + p=makePeriodIndex, + ).get(idx_type) + if idx_func: + idx = idx_func(nentries) + # but we need to fill in the name + if names: + idx.name = names[0] + return idx + elif idx_type is not None: + raise ValueError( + f"{repr(idx_type)} is not a legal value for `idx_type`, " + "use 'i'/'f'/'s'/'u'/'dt'/'p'/'td'." + ) + + if len(ndupe_l) < nlevels: + ndupe_l.extend([1] * (nlevels - len(ndupe_l))) + assert len(ndupe_l) == nlevels + + assert all(x > 0 for x in ndupe_l) + + tuples = [] + for i in range(nlevels): + + def keyfunc(x): + import re + + numeric_tuple = re.sub(r"[^\d_]_?", "", x).split("_") + return [int(num) for num in numeric_tuple] + + # build a list of lists to create the index from + div_factor = nentries // ndupe_l[i] + 1 + cnt = Counter() + for j in range(div_factor): + label = f"{prefix}_l{i}_g{j}" + cnt[label] = ndupe_l[i] + # cute Counter trick + result = sorted(cnt.elements(), key=keyfunc)[:nentries] + tuples.append(result) + + tuples = list(zip(*tuples)) + + # convert tuples to index + if nentries == 1: + # we have a single level of tuples, i.e. a regular Index + index = Index(tuples[0], name=names[0]) + elif nlevels == 1: + name = None if names is None else names[0] + index = Index((x[0] for x in tuples), name=name) + else: + index = MultiIndex.from_tuples(tuples, names=names) + return index + + +def makeCustomDataframe( + nrows, + ncols, + c_idx_names=True, + r_idx_names=True, + c_idx_nlevels=1, + r_idx_nlevels=1, + data_gen_f=None, + c_ndupe_l=None, + r_ndupe_l=None, + dtype=None, + c_idx_type=None, + r_idx_type=None, +): + """ + nrows, ncols - number of data rows/cols + c_idx_names, idx_names - False/True/list of strings, yields No names , + default names or uses the provided names for the levels of the + corresponding index. You can provide a single string when + c_idx_nlevels ==1. + c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex + r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex + data_gen_f - a function f(row,col) which return the data value + at that position, the default generator used yields values of the form + "RxCy" based on position. + c_ndupe_l, r_ndupe_l - list of integers, determines the number + of duplicates for each label at a given level of the corresponding + index. The default `None` value produces a multiplicity of 1 across + all levels, i.e. a unique index. Will accept a partial list of length + N < idx_nlevels, for just the first N levels. If ndupe doesn't divide + nrows/ncol, the last label might have lower multiplicity. + dtype - passed to the DataFrame constructor as is, in case you wish to + have more control in conjunction with a custom `data_gen_f` + r_idx_type, c_idx_type - "i"/"f"/"s"/"u"/"dt"/"td". + If idx_type is not None, `idx_nlevels` must be 1. + "i"/"f" creates an integer/float index, + "s"/"u" creates a string/unicode index + "dt" create a datetime index. + "td" create a timedelta index. + + if unspecified, string labels will be generated. + + Examples: + + # 5 row, 3 columns, default names on both, single index on both axis + >> makeCustomDataframe(5,3) + + # make the data a random int between 1 and 100 + >> mkdf(5,3,data_gen_f=lambda r,c:randint(1,100)) + + # 2-level multiindex on rows with each label duplicated + # twice on first level, default names on both axis, single + # index on both axis + >> a=makeCustomDataframe(5,3,r_idx_nlevels=2,r_ndupe_l=[2]) + + # DatetimeIndex on row, index with unicode labels on columns + # no names on either axis + >> a=makeCustomDataframe(5,3,c_idx_names=False,r_idx_names=False, + r_idx_type="dt",c_idx_type="u") + + # 4-level multindex on rows with names provided, 2-level multindex + # on columns with default labels and default names. + >> a=makeCustomDataframe(5,3,r_idx_nlevels=4, + r_idx_names=["FEE","FI","FO","FAM"], + c_idx_nlevels=2) + + >> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) + """ + + assert c_idx_nlevels > 0 + assert r_idx_nlevels > 0 + assert r_idx_type is None or ( + r_idx_type in ("i", "f", "s", "u", "dt", "p", "td") and r_idx_nlevels == 1 + ) + assert c_idx_type is None or ( + c_idx_type in ("i", "f", "s", "u", "dt", "p", "td") and c_idx_nlevels == 1 + ) + + columns = makeCustomIndex( + ncols, + nlevels=c_idx_nlevels, + prefix="C", + names=c_idx_names, + ndupe_l=c_ndupe_l, + idx_type=c_idx_type, + ) + index = makeCustomIndex( + nrows, + nlevels=r_idx_nlevels, + prefix="R", + names=r_idx_names, + ndupe_l=r_ndupe_l, + idx_type=r_idx_type, + ) + + # by default, generate data based on location + if data_gen_f is None: + data_gen_f = lambda r, c: f"R{r}C{c}" + + data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)] + + return DataFrame(data, index, columns, dtype=dtype) + + +def _create_missing_idx(nrows, ncols, density, random_state=None): + if random_state is None: + random_state = np.random + else: + random_state = np.random.RandomState(random_state) + + # below is cribbed from scipy.sparse + size = int(np.round((1 - density) * nrows * ncols)) + # generate a few more to ensure unique values + min_rows = 5 + fac = 1.02 + extra_size = min(size + min_rows, fac * size) + + def _gen_unique_rand(rng, _extra_size): + ind = rng.rand(int(_extra_size)) + return np.unique(np.floor(ind * nrows * ncols))[:size] + + ind = _gen_unique_rand(random_state, extra_size) + while ind.size < size: + extra_size *= 1.05 + ind = _gen_unique_rand(random_state, extra_size) + + j = np.floor(ind * 1.0 / nrows).astype(int) + i = (ind - j * nrows).astype(int) + return i.tolist(), j.tolist() + + +def makeMissingCustomDataframe( + nrows, + ncols, + density=0.9, + random_state=None, + c_idx_names=True, + r_idx_names=True, + c_idx_nlevels=1, + r_idx_nlevels=1, + data_gen_f=None, + c_ndupe_l=None, + r_ndupe_l=None, + dtype=None, + c_idx_type=None, + r_idx_type=None, +): + """ + Parameters + ---------- + Density : float, optional + Float in (0, 1) that gives the percentage of non-missing numbers in + the DataFrame. + random_state : {np.random.RandomState, int}, optional + Random number generator or random seed. + + See makeCustomDataframe for descriptions of the rest of the parameters. + """ + df = makeCustomDataframe( + nrows, + ncols, + c_idx_names=c_idx_names, + r_idx_names=r_idx_names, + c_idx_nlevels=c_idx_nlevels, + r_idx_nlevels=r_idx_nlevels, + data_gen_f=data_gen_f, + c_ndupe_l=c_ndupe_l, + r_ndupe_l=r_ndupe_l, + dtype=dtype, + c_idx_type=c_idx_type, + r_idx_type=r_idx_type, + ) + + i, j = _create_missing_idx(nrows, ncols, density, random_state) + df.values[i, j] = np.nan + return df + + +def makeMissingDataframe(density=0.9, random_state=None): + df = makeDataFrame() + i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state) + df.values[i, j] = np.nan + return df + + +def optional_args(decorator): + """allows a decorator to take optional positional and keyword arguments. + Assumes that taking a single, callable, positional argument means that + it is decorating a function, i.e. something like this:: + + @my_decorator + def function(): pass + + Calls decorator with decorator(f, *args, **kwargs)""" + + @wraps(decorator) + def wrapper(*args, **kwargs): + def dec(f): + return decorator(f, *args, **kwargs) + + is_decorating = not kwargs and len(args) == 1 and callable(args[0]) + if is_decorating: + f = args[0] + args = [] + return dec(f) + else: + return dec + + return wrapper + + +# skip tests on exceptions with this message +_network_error_messages = ( + # 'urlopen error timed out', + # 'timeout: timed out', + # 'socket.timeout: timed out', + "timed out", + "Server Hangup", + "HTTP Error 503: Service Unavailable", + "502: Proxy Error", + "HTTP Error 502: internal error", + "HTTP Error 502", + "HTTP Error 503", + "HTTP Error 403", + "HTTP Error 400", + "Temporary failure in name resolution", + "Name or service not known", + "Connection refused", + "certificate verify", +) + +# or this e.errno/e.reason.errno +_network_errno_vals = ( + 101, # Network is unreachable + 111, # Connection refused + 110, # Connection timed out + 104, # Connection reset Error + 54, # Connection reset by peer + 60, # urllib.error.URLError: [Errno 60] Connection timed out +) + +# Both of the above shouldn't mask real issues such as 404's +# or refused connections (changed DNS). +# But some tests (test_data yahoo) contact incredibly flakey +# servers. + +# and conditionally raise on exception types in _get_default_network_errors + + +def _get_default_network_errors(): + # Lazy import for http.client because it imports many things from the stdlib + import http.client + + return (IOError, http.client.HTTPException, TimeoutError) + + +def can_connect(url, error_classes=None): + """Try to connect to the given url. True if succeeds, False if IOError + raised + + Parameters + ---------- + url : basestring + The URL to try to connect to + + Returns + ------- + connectable : bool + Return True if no IOError (unable to connect) or URLError (bad url) was + raised + """ + + if error_classes is None: + error_classes = _get_default_network_errors() + + try: + with urlopen(url): + pass + except error_classes: + return False + else: + return True + + +@optional_args +def network( + t, + url="http://www.google.com", + raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT, + check_before_test=False, + error_classes=None, + skip_errnos=_network_errno_vals, + _skip_on_messages=_network_error_messages, +): + """ + Label a test as requiring network connection and, if an error is + encountered, only raise if it does not find a network connection. + + In comparison to ``network``, this assumes an added contract to your test: + you must assert that, under normal conditions, your test will ONLY fail if + it does not have network connectivity. + + You can call this in 3 ways: as a standard decorator, with keyword + arguments, or with a positional argument that is the url to check. + + Parameters + ---------- + t : callable + The test requiring network connectivity. + url : path + The url to test via ``pandas.io.common.urlopen`` to check + for connectivity. Defaults to 'http://www.google.com'. + raise_on_error : bool + If True, never catches errors. + check_before_test : bool + If True, checks connectivity before running the test case. + error_classes : tuple or Exception + error classes to ignore. If not in ``error_classes``, raises the error. + defaults to IOError. Be careful about changing the error classes here. + skip_errnos : iterable of int + Any exception that has .errno or .reason.erno set to one + of these values will be skipped with an appropriate + message. + _skip_on_messages: iterable of string + any exception e for which one of the strings is + a substring of str(e) will be skipped with an appropriate + message. Intended to suppress errors where an errno isn't available. + + Notes + ----- + * ``raise_on_error`` supercedes ``check_before_test`` + + Returns + ------- + t : callable + The decorated test ``t``, with checks for connectivity errors. + + Example + ------- + + Tests decorated with @network will fail if it's possible to make a network + connection to another URL (defaults to google.com):: + + >>> from pandas._testing import network + >>> from pandas.io.common import urlopen + >>> @network + ... def test_network(): + ... with urlopen("rabbit://bonanza.com"): + ... pass + Traceback + ... + URLError: + + You can specify alternative URLs:: + + >>> @network("http://www.yahoo.com") + ... def test_something_with_yahoo(): + ... raise IOError("Failure Message") + >>> test_something_with_yahoo() + Traceback (most recent call last): + ... + IOError: Failure Message + + If you set check_before_test, it will check the url first and not run the + test on failure:: + + >>> @network("failing://url.blaher", check_before_test=True) + ... def test_something(): + ... print("I ran!") + ... raise ValueError("Failure") + >>> test_something() + Traceback (most recent call last): + ... + + Errors not related to networking will always be raised. + """ + from pytest import skip + + if error_classes is None: + error_classes = _get_default_network_errors() + + t.network = True + + @wraps(t) + def wrapper(*args, **kwargs): + if check_before_test and not raise_on_error: + if not can_connect(url, error_classes): + skip() + try: + return t(*args, **kwargs) + except Exception as err: + errno = getattr(err, "errno", None) + if not errno and hasattr(errno, "reason"): + errno = getattr(err.reason, "errno", None) + + if errno in skip_errnos: + skip(f"Skipping test due to known errno and error {err}") + + e_str = str(err) + + if any(m.lower() in e_str.lower() for m in _skip_on_messages): + skip( + f"Skipping test because exception message is known and error {err}" + ) + + if not isinstance(err, error_classes): + raise + + if raise_on_error or can_connect(url, error_classes): + raise + else: + skip(f"Skipping test due to lack of connectivity and error {err}") + + return wrapper + + +with_connectivity_check = network + + +@contextmanager +def assert_produces_warning( + expected_warning=Warning, + filter_level="always", + clear=None, + check_stacklevel=True, + raise_on_extra_warnings=True, +): + """ + Context manager for running code expected to either raise a specific + warning, or not raise any warnings. Verifies that the code raises the + expected warning, and that it does not raise any other unexpected + warnings. It is basically a wrapper around ``warnings.catch_warnings``. + + Parameters + ---------- + expected_warning : {Warning, False, None}, default Warning + The type of Exception raised. ``exception.Warning`` is the base + class for all warnings. To check that no warning is returned, + specify ``False`` or ``None``. + filter_level : str or None, default "always" + Specifies whether warnings are ignored, displayed, or turned + into errors. + Valid values are: + + * "error" - turns matching warnings into exceptions + * "ignore" - discard the warning + * "always" - always emit a warning + * "default" - print the warning the first time it is generated + from each location + * "module" - print the warning the first time it is generated + from each module + * "once" - print the warning the first time it is generated + + clear : str, default None + If not ``None`` then remove any previously raised warnings from + the ``__warningsregistry__`` to ensure that no warning messages are + suppressed by this context manager. If ``None`` is specified, + the ``__warningsregistry__`` keeps track of which warnings have been + shown, and does not show them again. + check_stacklevel : bool, default True + If True, displays the line that called the function containing + the warning to show were the function is called. Otherwise, the + line that implements the function is displayed. + raise_on_extra_warnings : bool, default True + Whether extra warnings not of the type `expected_warning` should + cause the test to fail. + + Examples + -------- + >>> import warnings + >>> with assert_produces_warning(): + ... warnings.warn(UserWarning()) + ... + >>> with assert_produces_warning(False): + ... warnings.warn(RuntimeWarning()) + ... + Traceback (most recent call last): + ... + AssertionError: Caused unexpected warning(s): ['RuntimeWarning']. + >>> with assert_produces_warning(UserWarning): + ... warnings.warn(RuntimeWarning()) + Traceback (most recent call last): + ... + AssertionError: Did not see expected warning of class 'UserWarning'. + + ..warn:: This is *not* thread-safe. + """ + __tracebackhide__ = True + + with warnings.catch_warnings(record=True) as w: + + if clear is not None: + # make sure that we are clearing these warnings + # if they have happened before + # to guarantee that we will catch them + if not is_list_like(clear): + clear = [clear] + for m in clear: + try: + m.__warningregistry__.clear() + except AttributeError: + # module may not have __warningregistry__ + pass + + saw_warning = False + warnings.simplefilter(filter_level) + yield w + extra_warnings = [] + + for actual_warning in w: + if expected_warning and issubclass( + actual_warning.category, expected_warning + ): + saw_warning = True + + if check_stacklevel and issubclass( + actual_warning.category, (FutureWarning, DeprecationWarning) + ): + from inspect import getframeinfo, stack + + caller = getframeinfo(stack()[2][0]) + msg = ( + "Warning not set with correct stacklevel. " + f"File where warning is raised: {actual_warning.filename} != " + f"{caller.filename}. Warning message: {actual_warning.message}" + ) + assert actual_warning.filename == caller.filename, msg + else: + extra_warnings.append( + ( + actual_warning.category.__name__, + actual_warning.message, + actual_warning.filename, + actual_warning.lineno, + ) + ) + if expected_warning: + msg = ( + f"Did not see expected warning of class " + f"{repr(expected_warning.__name__)}" + ) + assert saw_warning, msg + if raise_on_extra_warnings and extra_warnings: + raise AssertionError( + f"Caused unexpected warning(s): {repr(extra_warnings)}" + ) + + +class RNGContext: + """ + Context manager to set the numpy random number generator speed. Returns + to the original value upon exiting the context manager. + + Parameters + ---------- + seed : int + Seed for numpy.random.seed + + Examples + -------- + + with RNGContext(42): + np.random.randn() + """ + + def __init__(self, seed): + self.seed = seed + + def __enter__(self): + + self.start_state = np.random.get_state() + np.random.seed(self.seed) + + def __exit__(self, exc_type, exc_value, traceback): + + np.random.set_state(self.start_state) + + +@contextmanager +def with_csv_dialect(name, **kwargs): + """ + Context manager to temporarily register a CSV dialect for parsing CSV. + + Parameters + ---------- + name : str + The name of the dialect. + kwargs : mapping + The parameters for the dialect. + + Raises + ------ + ValueError : the name of the dialect conflicts with a builtin one. + + See Also + -------- + csv : Python's CSV library. + """ + import csv + + _BUILTIN_DIALECTS = {"excel", "excel-tab", "unix"} + + if name in _BUILTIN_DIALECTS: + raise ValueError("Cannot override builtin dialect.") + + csv.register_dialect(name, **kwargs) + yield + csv.unregister_dialect(name) + + +@contextmanager +def use_numexpr(use, min_elements=None): + from pandas.core.computation import expressions as expr + + if min_elements is None: + min_elements = expr._MIN_ELEMENTS + + olduse = expr._USE_NUMEXPR + oldmin = expr._MIN_ELEMENTS + expr.set_use_numexpr(use) + expr._MIN_ELEMENTS = min_elements + yield + expr._MIN_ELEMENTS = oldmin + expr.set_use_numexpr(olduse) + + +def test_parallel(num_threads=2, kwargs_list=None): + """Decorator to run the same function multiple times in parallel. + + Parameters + ---------- + num_threads : int, optional + The number of times the function is run in parallel. + kwargs_list : list of dicts, optional + The list of kwargs to update original + function kwargs on different threads. + Notes + ----- + This decorator does not pass the return value of the decorated function. + + Original from scikit-image: + + https://github.com/scikit-image/scikit-image/pull/1519 + + """ + + assert num_threads > 0 + has_kwargs_list = kwargs_list is not None + if has_kwargs_list: + assert len(kwargs_list) == num_threads + import threading + + def wrapper(func): + @wraps(func) + def inner(*args, **kwargs): + if has_kwargs_list: + update_kwargs = lambda i: dict(kwargs, **kwargs_list[i]) + else: + update_kwargs = lambda i: kwargs + threads = [] + for i in range(num_threads): + updated_kwargs = update_kwargs(i) + thread = threading.Thread(target=func, args=args, kwargs=updated_kwargs) + threads.append(thread) + for thread in threads: + thread.start() + for thread in threads: + thread.join() + + return inner + + return wrapper + + +class SubclassedSeries(Series): + _metadata = ["testattr", "name"] + + @property + def _constructor(self): + return SubclassedSeries + + @property + def _constructor_expanddim(self): + return SubclassedDataFrame + + +class SubclassedDataFrame(DataFrame): + _metadata = ["testattr"] + + @property + def _constructor(self): + return SubclassedDataFrame + + @property + def _constructor_sliced(self): + return SubclassedSeries + + +class SubclassedCategorical(Categorical): + @property + def _constructor(self): + return SubclassedCategorical + + +@contextmanager +def set_timezone(tz: str): + """ + Context manager for temporarily setting a timezone. + + Parameters + ---------- + tz : str + A string representing a valid timezone. + + Examples + -------- + + >>> from datetime import datetime + >>> from dateutil.tz import tzlocal + >>> tzlocal().tzname(datetime.now()) + 'IST' + + >>> with set_timezone('US/Eastern'): + ... tzlocal().tzname(datetime.now()) + ... + 'EDT' + """ + + import os + import time + + def setTZ(tz): + if tz is None: + try: + del os.environ["TZ"] + except KeyError: + pass + else: + os.environ["TZ"] = tz + time.tzset() + + orig_tz = os.environ.get("TZ") + setTZ(tz) + try: + yield + finally: + setTZ(orig_tz) + + +def _make_skipna_wrapper(alternative, skipna_alternative=None): + """ + Create a function for calling on an array. + + Parameters + ---------- + alternative : function + The function to be called on the array with no NaNs. + Only used when 'skipna_alternative' is None. + skipna_alternative : function + The function to be called on the original array + + Returns + ------- + function + """ + if skipna_alternative: + + def skipna_wrapper(x): + return skipna_alternative(x.values) + + else: + + def skipna_wrapper(x): + nona = x.dropna() + if len(nona) == 0: + return np.nan + return alternative(nona) + + return skipna_wrapper + + +def convert_rows_list_to_csv_str(rows_list: List[str]): + """ + Convert list of CSV rows to single CSV-formatted string for current OS. + + This method is used for creating expected value of to_csv() method. + + Parameters + ---------- + rows_list : List[str] + Each element represents the row of csv. + + Returns + ------- + str + Expected output of to_csv() in current OS. + """ + sep = os.linesep + expected = sep.join(rows_list) + sep + return expected diff --git a/pandas/_typing.py b/pandas/_typing.py new file mode 100644 index 00000000..171b76b4 --- /dev/null +++ b/pandas/_typing.py @@ -0,0 +1,73 @@ +from pathlib import Path +from typing import ( + IO, + TYPE_CHECKING, + Any, + AnyStr, + Callable, + Collection, + Dict, + Hashable, + List, + Mapping, + Optional, + TypeVar, + Union, +) + +import numpy as np + +# To prevent import cycles place any internal imports in the branch below +# and use a string literal forward reference to it in subsequent types +# https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles +if TYPE_CHECKING: + from pandas._libs import Period, Timedelta, Timestamp # noqa: F401 + from pandas.core.arrays.base import ExtensionArray # noqa: F401 + from pandas.core.dtypes.dtypes import ExtensionDtype # noqa: F401 + from pandas.core.indexes.base import Index # noqa: F401 + from pandas.core.generic import NDFrame # noqa: F401 + from pandas import Interval # noqa: F401 + from pandas.core.series import Series # noqa: F401 + from pandas.core.frame import DataFrame # noqa: F401 + +# array-like + +AnyArrayLike = TypeVar("AnyArrayLike", "ExtensionArray", "Index", "Series", np.ndarray) +ArrayLike = TypeVar("ArrayLike", "ExtensionArray", np.ndarray) + +# scalars + +PythonScalar = Union[str, int, float, bool] +DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", "Period", "Timestamp", "Timedelta") +PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"] +Scalar = Union[PythonScalar, PandasScalar] + +# other + +Dtype = Union[str, np.dtype, "ExtensionDtype"] +FilePathOrBuffer = Union[str, Path, IO[AnyStr]] + +# FrameOrSeriesUnion means either a DataFrame or a Series. E.g. +# `def func(a: FrameOrSeriesUnion) -> FrameOrSeriesUnion: ...` means that if a Series +# is passed in, either a Series or DataFrame is returned, and if a DataFrame is passed +# in, either a DataFrame or a Series is returned. +FrameOrSeriesUnion = Union["DataFrame", "Series"] + +# FrameOrSeries is stricter and ensures that the same subclass of NDFrame always is +# used. E.g. `def func(a: FrameOrSeries) -> FrameOrSeries: ...` means that if a +# Series is passed into a function, a Series is always returned and if a DataFrame is +# passed in, a DataFrame is always returned. +FrameOrSeries = TypeVar("FrameOrSeries", bound="NDFrame") + +Axis = Union[str, int] +Label = Optional[Hashable] +Level = Union[Label, int] +Ordered = Optional[bool] +JSONSerializable = Union[PythonScalar, List, Dict] +Axes = Collection + +# For functions like rename that convert one label to another +Renamer = Union[Mapping[Label, Any], Callable[[Label], Label]] + +# to maintain type information across generic functions and parametrization +T = TypeVar("T") diff --git a/pandas/_version.py b/pandas/_version.py new file mode 100644 index 00000000..771aec8d --- /dev/null +++ b/pandas/_version.py @@ -0,0 +1,479 @@ +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.15 (https://github.com/warner/python-versioneer) + +import errno +import os +import re +import subprocess +import sys +from typing import Callable, Dict + + +def get_keywords(): + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = " (tag: v1.0.5, 1.0.x)" + git_full = "b687cd4d9e520666a956a60849568a98dd00c672" + keywords = {"refnames": git_refnames, "full": git_full} + return keywords + + +class VersioneerConfig: + pass + + +def get_config(): + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "v" + cfg.parentdir_prefix = "pandas-" + cfg.versionfile_source = "pandas/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + pass + + +HANDLERS: Dict[str, Dict[str, Callable]] = {} + + +def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator + def decorate(f: Callable) -> Callable: + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen( + [c] + args, + cwd=cwd, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print(f"unable to run {dispcmd}") + print(e) + return None + else: + if verbose: + print(f"unable to find command, tried {commands}") + return None + stdout = p.communicate()[0].strip().decode() + if p.returncode != 0: + if verbose: + print(f"unable to run {dispcmd} (error)") + return None + return stdout + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + # Source tarballs conventionally unpack into a directory that includes + # both the project name and a version string. + dirname = os.path.basename(root) + if not dirname.startswith(parentdir_prefix): + if verbose: + print( + f"guessing rootdir is '{root}', but '{dirname}' " + f"doesn't start with prefix '{parentdir_prefix}'" + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + } + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + if not keywords: + raise NotThisMethod("no keywords at all, weird") + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r"\d", r)} + if verbose: + print(f"discarding '{','.join(refs - tags)}', no digits") + if verbose: + print(f"likely tags: {','.join(sorted(tags))}") + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix) :] + if verbose: + print(f"picking {r}") + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + # this runs 'git' from the root of the source tree. This only gets called + # if the git-archive 'subst' keywords were *not* expanded, and + # _version.py hasn't already been rewritten with a short version string, + # meaning we're inside a checked out source tree. + + if not os.path.exists(os.path.join(root, ".git")): + if verbose: + print(f"no .git in {root}") + raise NotThisMethod("no .git directory") + + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + # if there is a tag, this yields TAG-NUM-gHEX[-dirty] + # if there are no tags, this yields HEX[-dirty] (no NUM) + describe_out = run_command( + GITS, ["describe", "--tags", "--dirty", "--always", "--long"], cwd=root + ) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[: git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = f"unable to parse git-describe output: '{describe_out}'" + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + msg = f"tag '{full_tag}' doesn't start with prefix '{tag_prefix}'" + if verbose: + print(msg) + pieces["error"] = msg + return pieces + + pieces["closest-tag"] = full_tag[len(tag_prefix) :] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + return pieces + + +def plus_or_dot(pieces): + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + # now build up version string, with post-release "local version + # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + # exceptions: + # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += f"{pieces['distance']:d}.g{pieces['short']}" + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = f"0+untagged.{pieces['distance']:d}.g{pieces['short']}" + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + # TAG[.post.devDISTANCE] . No -dirty + + # exceptions: + # 1: no tags. 0.post.devDISTANCE + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += f".post.dev{pieces['distance']:d}" + else: + # exception #1 + rendered = f"0.post.dev{pieces['distance']:d}" + return rendered + + +def render_pep440_post(pieces): + # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that + # .dev0 sorts backwards (a dirty tree will appear "older" than the + # corresponding clean one), but you shouldn't be releasing software with + # -dirty anyways. + + # exceptions: + # 1: no tags. 0.postDISTANCE[.dev0] + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += f".post{pieces['distance']:d}" + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += f"g{pieces['short']}" + else: + # exception #1 + rendered = f"0.pos{pieces['distance']:d}" + if pieces["dirty"]: + rendered += ".dev0" + rendered += f"+g{pieces['short']}" + return rendered + + +def render_pep440_old(pieces): + # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. + + # exceptions: + # 1: no tags. 0.postDISTANCE[.dev0] + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += f".post{pieces['distance']:d}" + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = f"0.post{pieces['distance']:d}" + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty + # --always' + + # exceptions: + # 1: no tags. HEX[-dirty] (note: no 'g' prefix) + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += f"-{pieces['distance']:d}-g{pieces['short']}" + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty + # --always -long'. The distance/hash is unconditional. + + # exceptions: + # 1: no tags. HEX[-dirty] (note: no 'g' prefix) + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += f"-{pieces['distance']:d}-g{pieces['short']}" + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError(f"unknown style '{style}'") + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + } + + +def get_versions(): + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split("/"): + root = os.path.dirname(root) + except NameError: + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + } + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + } diff --git a/pandas/api/__init__.py b/pandas/api/__init__.py new file mode 100644 index 00000000..bebbb38b --- /dev/null +++ b/pandas/api/__init__.py @@ -0,0 +1,2 @@ +""" public toolkit API """ +from pandas.api import extensions, indexers, types # noqa diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py new file mode 100644 index 00000000..3019dd0e --- /dev/null +++ b/pandas/api/extensions/__init__.py @@ -0,0 +1,27 @@ +""" +Public API for extending pandas objects. +""" + +from pandas._libs.lib import no_default + +from pandas.core.dtypes.dtypes import ExtensionDtype, register_extension_dtype + +from pandas.core.accessor import ( + register_dataframe_accessor, + register_index_accessor, + register_series_accessor, +) +from pandas.core.algorithms import take +from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin + +__all__ = [ + "no_default", + "ExtensionDtype", + "register_extension_dtype", + "register_dataframe_accessor", + "register_index_accessor", + "register_series_accessor", + "take", + "ExtensionArray", + "ExtensionScalarOpsMixin", +] diff --git a/pandas/api/indexers/__init__.py b/pandas/api/indexers/__init__.py new file mode 100644 index 00000000..826297e6 --- /dev/null +++ b/pandas/api/indexers/__init__.py @@ -0,0 +1,8 @@ +""" +Public API for Rolling Window Indexers. +""" + +from pandas.core.indexers import check_array_indexer +from pandas.core.window.indexers import BaseIndexer + +__all__ = ["check_array_indexer", "BaseIndexer"] diff --git a/pandas/api/types/__init__.py b/pandas/api/types/__init__.py new file mode 100644 index 00000000..3495b493 --- /dev/null +++ b/pandas/api/types/__init__.py @@ -0,0 +1,23 @@ +""" +Public toolkit API. +""" + +from pandas._libs.lib import infer_dtype + +from pandas.core.dtypes.api import * # noqa: F403, F401 +from pandas.core.dtypes.concat import union_categoricals +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) + +__all__ = [ + "infer_dtype", + "union_categoricals", + "CategoricalDtype", + "DatetimeTZDtype", + "IntervalDtype", + "PeriodDtype", +] diff --git a/pandas/arrays/__init__.py b/pandas/arrays/__init__.py new file mode 100644 index 00000000..61832a8b --- /dev/null +++ b/pandas/arrays/__init__.py @@ -0,0 +1,30 @@ +""" +All of pandas' ExtensionArrays. + +See :ref:`extending.extension-types` for more. +""" +from pandas.core.arrays import ( + BooleanArray, + Categorical, + DatetimeArray, + IntegerArray, + IntervalArray, + PandasArray, + PeriodArray, + SparseArray, + StringArray, + TimedeltaArray, +) + +__all__ = [ + "BooleanArray", + "Categorical", + "DatetimeArray", + "IntegerArray", + "IntervalArray", + "PandasArray", + "PeriodArray", + "SparseArray", + "StringArray", + "TimedeltaArray", +] diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py new file mode 100644 index 00000000..60cfecd5 --- /dev/null +++ b/pandas/compat/__init__.py @@ -0,0 +1,140 @@ +""" +compat +====== + +Cross-compatible functions for different versions of Python. + +Other items: +* platform checker +""" +import platform +import struct +import sys +import warnings + +PY37 = sys.version_info >= (3, 7) +PY38 = sys.version_info >= (3, 8) +PYPY = platform.python_implementation() == "PyPy" + + +# ---------------------------------------------------------------------------- +# functions largely based / taken from the six module + +# Much of the code in this module comes from Benjamin Peterson's six library. +# The license for this library can be found in LICENSES/SIX and the code can be +# found at https://bitbucket.org/gutworth/six + + +def set_function_name(f, name, cls): + """ + Bind the name/qualname attributes of the function. + """ + f.__name__ = name + f.__qualname__ = f"{cls.__name__}.{name}" + f.__module__ = cls.__module__ + return f + + +# https://github.com/pandas-dev/pandas/pull/9123 +def is_platform_little_endian() -> bool: + """ + Checking if the running platform is little endian. + + Returns + ------- + bool + True if the running platform is little endian. + """ + return sys.byteorder == "little" + + +def is_platform_windows() -> bool: + """ + Checking if the running platform is windows. + + Returns + ------- + bool + True if the running platform is windows. + """ + return sys.platform == "win32" or sys.platform == "cygwin" + + +def is_platform_linux() -> bool: + """ + Checking if the running platform is linux. + + Returns + ------- + bool + True if the running platform is linux. + """ + return sys.platform == "linux2" + + +def is_platform_mac() -> bool: + """ + Checking if the running platform is mac. + + Returns + ------- + bool + True if the running platform is mac. + """ + return sys.platform == "darwin" + + +def is_platform_32bit() -> bool: + """ + Checking if the running platform is 32-bit. + + Returns + ------- + bool + True if the running platform is 32-bit. + """ + return struct.calcsize("P") * 8 < 64 + + +def _import_lzma(): + """ + Importing the `lzma` module. + + Warns + ----- + When the `lzma` module is not available. + """ + try: + import lzma + + return lzma + except ImportError: + msg = ( + "Could not import the lzma module. " + "Your installed Python is incomplete. " + "Attempting to use lzma compression will result in a RuntimeError." + ) + warnings.warn(msg) + + +def _get_lzma_file(lzma): + """ + Importing the `LZMAFile` class from the `lzma` module. + + Returns + ------- + class + The `LZMAFile` class from the `lzma` module. + + Raises + ------ + RuntimeError + If the `lzma` module was not imported correctly, or didn't exist. + """ + if lzma is None: + raise RuntimeError( + "lzma module not available. " + "A Python re-install with the proper dependencies, " + "might be required to solve this issue." + ) + return lzma.LZMAFile diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py new file mode 100644 index 00000000..cd711bca --- /dev/null +++ b/pandas/compat/_optional.py @@ -0,0 +1,111 @@ +import distutils.version +import importlib +import types +import warnings + +# Update install.rst when updating versions! + +VERSIONS = { + "bs4": "4.6.0", + "bottleneck": "1.2.1", + "fastparquet": "0.3.2", + "gcsfs": "0.2.2", + "lxml.etree": "3.8.0", + "matplotlib": "2.2.2", + "numexpr": "2.6.2", + "odfpy": "1.3.0", + "openpyxl": "2.5.7", + "pandas_gbq": "0.8.0", + "pyarrow": "0.13.0", + "pytables": "3.4.2", + "pytest": "5.0.1", + "pyxlsb": "1.0.6", + "s3fs": "0.3.0", + "scipy": "0.19.0", + "sqlalchemy": "1.1.4", + "tables": "3.4.2", + "tabulate": "0.8.3", + "xarray": "0.8.2", + "xlrd": "1.1.0", + "xlwt": "1.2.0", + "xlsxwriter": "0.9.8", + "numba": "0.46.0", +} + + +def _get_version(module: types.ModuleType) -> str: + version = getattr(module, "__version__", None) + if version is None: + # xlrd uses a capitalized attribute name + version = getattr(module, "__VERSION__", None) + + if version is None: + raise ImportError(f"Can't determine version for {module.__name__}") + return version + + +def import_optional_dependency( + name: str, extra: str = "", raise_on_missing: bool = True, on_version: str = "raise" +): + """ + Import an optional dependency. + + By default, if a dependency is missing an ImportError with a nice + message will be raised. If a dependency is present, but too old, + we raise. + + Parameters + ---------- + name : str + The module name. This should be top-level only, so that the + version may be checked. + extra : str + Additional text to include in the ImportError message. + raise_on_missing : bool, default True + Whether to raise if the optional dependency is not found. + When False and the module is not present, None is returned. + on_version : str {'raise', 'warn'} + What to do when a dependency's version is too old. + + * raise : Raise an ImportError + * warn : Warn that the version is too old. Returns None + * ignore: Return the module, even if the version is too old. + It's expected that users validate the version locally when + using ``on_version="ignore"`` (see. ``io/html.py``) + + Returns + ------- + maybe_module : Optional[ModuleType] + The imported module, when found and the version is correct. + None is returned when the package is not found and `raise_on_missing` + is False, or when the package's version is too old and `on_version` + is ``'warn'``. + """ + msg = ( + f"Missing optional dependency '{name}'. {extra} " + f"Use pip or conda to install {name}." + ) + try: + module = importlib.import_module(name) + except ImportError: + if raise_on_missing: + raise ImportError(msg) from None + else: + return None + + minimum_version = VERSIONS.get(name) + if minimum_version: + version = _get_version(module) + if distutils.version.LooseVersion(version) < minimum_version: + assert on_version in {"warn", "raise", "ignore"} + msg = ( + f"Pandas requires version '{minimum_version}' or newer of '{name}' " + f"(version '{version}' currently installed)." + ) + if on_version == "warn": + warnings.warn(msg, UserWarning) + return None + elif on_version == "raise": + raise ImportError(msg) + + return module diff --git a/pandas/compat/chainmap.py b/pandas/compat/chainmap.py new file mode 100644 index 00000000..588bd24d --- /dev/null +++ b/pandas/compat/chainmap.py @@ -0,0 +1,33 @@ +from typing import ChainMap, MutableMapping, TypeVar, cast + +_KT = TypeVar("_KT") +_VT = TypeVar("_VT") + + +class DeepChainMap(ChainMap[_KT, _VT]): + """Variant of ChainMap that allows direct updates to inner scopes. + + Only works when all passed mapping are mutable. + """ + + def __setitem__(self, key: _KT, value: _VT) -> None: + for mapping in self.maps: + mutable_mapping = cast(MutableMapping[_KT, _VT], mapping) + if key in mutable_mapping: + mutable_mapping[key] = value + return + cast(MutableMapping[_KT, _VT], self.maps[0])[key] = value + + def __delitem__(self, key: _KT) -> None: + """ + Raises + ------ + KeyError + If `key` doesn't exist. + """ + for mapping in self.maps: + mutable_mapping = cast(MutableMapping[_KT, _VT], mapping) + if key in mapping: + del mutable_mapping[key] + return + raise KeyError(key) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py new file mode 100644 index 00000000..7691eea2 --- /dev/null +++ b/pandas/compat/numpy/__init__.py @@ -0,0 +1,77 @@ +""" support numpy compatibility across versions """ + +from distutils.version import LooseVersion +import re + +import numpy as np + +# numpy versioning +_np_version = np.__version__ +_nlv = LooseVersion(_np_version) +_np_version_under1p14 = _nlv < LooseVersion("1.14") +_np_version_under1p15 = _nlv < LooseVersion("1.15") +_np_version_under1p16 = _nlv < LooseVersion("1.16") +_np_version_under1p17 = _nlv < LooseVersion("1.17") +_np_version_under1p18 = _nlv < LooseVersion("1.18") +_np_version_under1p19 = _nlv < LooseVersion("1.19") +_np_version_under1p20 = _nlv < LooseVersion("1.20") +_is_numpy_dev = ".dev" in str(_nlv) + + +if _nlv < "1.13.3": + raise ImportError( + f"this version of pandas is incompatible with " + f"numpy < 1.13.3\n" + f"your numpy version is {_np_version}.\n" + f"Please upgrade numpy to >= 1.13.3 to use " + f"this pandas version" + ) + + +_tz_regex = re.compile("[+-]0000$") + + +def tz_replacer(s): + if isinstance(s, str): + if s.endswith("Z"): + s = s[:-1] + elif _tz_regex.search(s): + s = s[:-5] + return s + + +def np_datetime64_compat(s, *args, **kwargs): + """ + provide compat for construction of strings to numpy datetime64's with + tz-changes in 1.11 that make '2015-01-01 09:00:00Z' show a deprecation + warning, when need to pass '2015-01-01 09:00:00' + """ + s = tz_replacer(s) + return np.datetime64(s, *args, **kwargs) + + +def np_array_datetime64_compat(arr, *args, **kwargs): + """ + provide compat for construction of an array of strings to a + np.array(..., dtype=np.datetime64(..)) + tz-changes in 1.11 that make '2015-01-01 09:00:00Z' show a deprecation + warning, when need to pass '2015-01-01 09:00:00' + """ + # is_list_like + if hasattr(arr, "__iter__") and not isinstance(arr, (str, bytes)): + arr = [tz_replacer(s) for s in arr] + else: + arr = tz_replacer(arr) + + return np.array(arr, *args, **kwargs) + + +__all__ = [ + "np", + "_np_version", + "_np_version_under1p14", + "_np_version_under1p15", + "_np_version_under1p16", + "_np_version_under1p17", + "_is_numpy_dev", +] diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py new file mode 100644 index 00000000..9cae127a --- /dev/null +++ b/pandas/compat/numpy/function.py @@ -0,0 +1,405 @@ +""" +For compatibility with numpy libraries, pandas functions or +methods have to accept '*args' and '**kwargs' parameters to +accommodate numpy arguments that are not actually used or +respected in the pandas implementation. + +To ensure that users do not abuse these parameters, validation +is performed in 'validators.py' to make sure that any extra +parameters passed correspond ONLY to those in the numpy signature. +Part of that validation includes whether or not the user attempted +to pass in non-default values for these extraneous parameters. As we +want to discourage users from relying on these parameters when calling +the pandas implementation, we want them only to pass in the default values +for these parameters. + +This module provides a set of commonly used default arguments for functions +and methods that are spread throughout the codebase. This module will make it +easier to adjust to future upstream changes in the analogous numpy signatures. +""" +from collections import OrderedDict +from distutils.version import LooseVersion +from typing import Any, Dict, Optional, Union + +from numpy import __version__ as _np_version, ndarray + +from pandas._libs.lib import is_bool, is_integer +from pandas.errors import UnsupportedFunctionCall +from pandas.util._validators import ( + validate_args, + validate_args_and_kwargs, + validate_kwargs, +) + + +class CompatValidator: + def __init__(self, defaults, fname=None, method=None, max_fname_arg_count=None): + self.fname = fname + self.method = method + self.defaults = defaults + self.max_fname_arg_count = max_fname_arg_count + + def __call__(self, args, kwargs, fname=None, max_fname_arg_count=None, method=None): + if args or kwargs: + fname = self.fname if fname is None else fname + max_fname_arg_count = ( + self.max_fname_arg_count + if max_fname_arg_count is None + else max_fname_arg_count + ) + method = self.method if method is None else method + + if method == "args": + validate_args(fname, args, max_fname_arg_count, self.defaults) + elif method == "kwargs": + validate_kwargs(fname, kwargs, self.defaults) + elif method == "both": + validate_args_and_kwargs( + fname, args, kwargs, max_fname_arg_count, self.defaults + ) + else: + raise ValueError(f"invalid validation method '{method}'") + + +ARGMINMAX_DEFAULTS = dict(out=None) +validate_argmin = CompatValidator( + ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1 +) +validate_argmax = CompatValidator( + ARGMINMAX_DEFAULTS, fname="argmax", method="both", max_fname_arg_count=1 +) + + +def process_skipna(skipna, args): + if isinstance(skipna, ndarray) or skipna is None: + args = (skipna,) + args + skipna = True + + return skipna, args + + +def validate_argmin_with_skipna(skipna, args, kwargs): + """ + If 'Series.argmin' is called via the 'numpy' library, + the third parameter in its signature is 'out', which + takes either an ndarray or 'None', so check if the + 'skipna' parameter is either an instance of ndarray or + is None, since 'skipna' itself should be a boolean + """ + + skipna, args = process_skipna(skipna, args) + validate_argmin(args, kwargs) + return skipna + + +def validate_argmax_with_skipna(skipna, args, kwargs): + """ + If 'Series.argmax' is called via the 'numpy' library, + the third parameter in its signature is 'out', which + takes either an ndarray or 'None', so check if the + 'skipna' parameter is either an instance of ndarray or + is None, since 'skipna' itself should be a boolean + """ + + skipna, args = process_skipna(skipna, args) + validate_argmax(args, kwargs) + return skipna + + +ARGSORT_DEFAULTS: "OrderedDict[str, Optional[Union[int, str]]]" = OrderedDict() +ARGSORT_DEFAULTS["axis"] = -1 +ARGSORT_DEFAULTS["kind"] = "quicksort" +ARGSORT_DEFAULTS["order"] = None + +if LooseVersion(_np_version) >= LooseVersion("1.17.0"): + # GH-26361. NumPy added radix sort and changed default to None. + ARGSORT_DEFAULTS["kind"] = None + + +validate_argsort = CompatValidator( + ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both" +) + +# two different signatures of argsort, this second validation +# for when the `kind` param is supported +ARGSORT_DEFAULTS_KIND: "OrderedDict[str, Optional[int]]" = OrderedDict() +ARGSORT_DEFAULTS_KIND["axis"] = -1 +ARGSORT_DEFAULTS_KIND["order"] = None +validate_argsort_kind = CompatValidator( + ARGSORT_DEFAULTS_KIND, fname="argsort", max_fname_arg_count=0, method="both" +) + + +def validate_argsort_with_ascending(ascending, args, kwargs): + """ + If 'Categorical.argsort' is called via the 'numpy' library, the + first parameter in its signature is 'axis', which takes either + an integer or 'None', so check if the 'ascending' parameter has + either integer type or is None, since 'ascending' itself should + be a boolean + """ + + if is_integer(ascending) or ascending is None: + args = (ascending,) + args + ascending = True + + validate_argsort_kind(args, kwargs, max_fname_arg_count=3) + return ascending + + +CLIP_DEFAULTS = dict(out=None) # type Dict[str, Any] +validate_clip = CompatValidator( + CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3 +) + + +def validate_clip_with_axis(axis, args, kwargs): + """ + If 'NDFrame.clip' is called via the numpy library, the third + parameter in its signature is 'out', which can takes an ndarray, + so check if the 'axis' parameter is an instance of ndarray, since + 'axis' itself should either be an integer or None + """ + + if isinstance(axis, ndarray): + args = (axis,) + args + axis = None + + validate_clip(args, kwargs) + return axis + + +CUM_FUNC_DEFAULTS: "OrderedDict[str, Any]" = OrderedDict() +CUM_FUNC_DEFAULTS["dtype"] = None +CUM_FUNC_DEFAULTS["out"] = None +validate_cum_func = CompatValidator( + CUM_FUNC_DEFAULTS, method="both", max_fname_arg_count=1 +) +validate_cumsum = CompatValidator( + CUM_FUNC_DEFAULTS, fname="cumsum", method="both", max_fname_arg_count=1 +) + + +def validate_cum_func_with_skipna(skipna, args, kwargs, name): + """ + If this function is called via the 'numpy' library, the third + parameter in its signature is 'dtype', which takes either a + 'numpy' dtype or 'None', so check if the 'skipna' parameter is + a boolean or not + """ + if not is_bool(skipna): + args = (skipna,) + args + skipna = True + + validate_cum_func(args, kwargs, fname=name) + return skipna + + +ALLANY_DEFAULTS: "OrderedDict[str, Optional[bool]]" = OrderedDict() +ALLANY_DEFAULTS["dtype"] = None +ALLANY_DEFAULTS["out"] = None +ALLANY_DEFAULTS["keepdims"] = False +validate_all = CompatValidator( + ALLANY_DEFAULTS, fname="all", method="both", max_fname_arg_count=1 +) +validate_any = CompatValidator( + ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1 +) + +LOGICAL_FUNC_DEFAULTS = dict(out=None, keepdims=False) +validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs") + +MINMAX_DEFAULTS = dict(axis=None, out=None, keepdims=False) +validate_min = CompatValidator( + MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1 +) +validate_max = CompatValidator( + MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1 +) + +RESHAPE_DEFAULTS: Dict[str, str] = dict(order="C") +validate_reshape = CompatValidator( + RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1 +) + +REPEAT_DEFAULTS: Dict[str, Any] = dict(axis=None) +validate_repeat = CompatValidator( + REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1 +) + +ROUND_DEFAULTS: Dict[str, Any] = dict(out=None) +validate_round = CompatValidator( + ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1 +) + +SORT_DEFAULTS: "OrderedDict[str, Optional[Union[int, str]]]" = OrderedDict() +SORT_DEFAULTS["axis"] = -1 +SORT_DEFAULTS["kind"] = "quicksort" +SORT_DEFAULTS["order"] = None +validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs") + +STAT_FUNC_DEFAULTS: "OrderedDict[str, Optional[Any]]" = OrderedDict() +STAT_FUNC_DEFAULTS["dtype"] = None +STAT_FUNC_DEFAULTS["out"] = None + +PROD_DEFAULTS = SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy() +SUM_DEFAULTS["keepdims"] = False +SUM_DEFAULTS["initial"] = None + +MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy() +MEDIAN_DEFAULTS["overwrite_input"] = False +MEDIAN_DEFAULTS["keepdims"] = False + +STAT_FUNC_DEFAULTS["keepdims"] = False + +validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, method="kwargs") +validate_sum = CompatValidator( + SUM_DEFAULTS, fname="sum", method="both", max_fname_arg_count=1 +) +validate_prod = CompatValidator( + PROD_DEFAULTS, fname="prod", method="both", max_fname_arg_count=1 +) +validate_mean = CompatValidator( + STAT_FUNC_DEFAULTS, fname="mean", method="both", max_fname_arg_count=1 +) +validate_median = CompatValidator( + MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1 +) + +STAT_DDOF_FUNC_DEFAULTS: "OrderedDict[str, Optional[bool]]" = OrderedDict() +STAT_DDOF_FUNC_DEFAULTS["dtype"] = None +STAT_DDOF_FUNC_DEFAULTS["out"] = None +STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False +validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs") + +TAKE_DEFAULTS: "OrderedDict[str, Optional[str]]" = OrderedDict() +TAKE_DEFAULTS["out"] = None +TAKE_DEFAULTS["mode"] = "raise" +validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs") + + +def validate_take_with_convert(convert, args, kwargs): + """ + If this function is called via the 'numpy' library, the third + parameter in its signature is 'axis', which takes either an + ndarray or 'None', so check if the 'convert' parameter is either + an instance of ndarray or is None + """ + + if isinstance(convert, ndarray) or convert is None: + args = (convert,) + args + convert = True + + validate_take(args, kwargs, max_fname_arg_count=3, method="both") + return convert + + +TRANSPOSE_DEFAULTS = dict(axes=None) +validate_transpose = CompatValidator( + TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0 +) + + +def validate_window_func(name, args, kwargs): + numpy_args = ("axis", "dtype", "out") + msg = ( + f"numpy operations are not valid with window objects. " + f"Use .{name}() directly instead " + ) + + if len(args) > 0: + raise UnsupportedFunctionCall(msg) + + for arg in numpy_args: + if arg in kwargs: + raise UnsupportedFunctionCall(msg) + + +def validate_rolling_func(name, args, kwargs): + numpy_args = ("axis", "dtype", "out") + msg = ( + f"numpy operations are not valid with window objects. " + f"Use .rolling(...).{name}() instead " + ) + + if len(args) > 0: + raise UnsupportedFunctionCall(msg) + + for arg in numpy_args: + if arg in kwargs: + raise UnsupportedFunctionCall(msg) + + +def validate_expanding_func(name, args, kwargs): + numpy_args = ("axis", "dtype", "out") + msg = ( + f"numpy operations are not valid with window objects. " + f"Use .expanding(...).{name}() instead " + ) + + if len(args) > 0: + raise UnsupportedFunctionCall(msg) + + for arg in numpy_args: + if arg in kwargs: + raise UnsupportedFunctionCall(msg) + + +def validate_groupby_func(name, args, kwargs, allowed=None): + """ + 'args' and 'kwargs' should be empty, except for allowed + kwargs because all of + their necessary parameters are explicitly listed in + the function signature + """ + if allowed is None: + allowed = [] + + kwargs = set(kwargs) - set(allowed) + + if len(args) + len(kwargs) > 0: + raise UnsupportedFunctionCall( + f"numpy operations are not valid with " + f"groupby. Use .groupby(...).{name}() " + f"instead" + ) + + +RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var") + + +def validate_resampler_func(method, args, kwargs): + """ + 'args' and 'kwargs' should be empty because all of + their necessary parameters are explicitly listed in + the function signature + """ + if len(args) + len(kwargs) > 0: + if method in RESAMPLER_NUMPY_OPS: + raise UnsupportedFunctionCall( + f"numpy operations are not " + f"valid with resample. Use " + f".resample(...).{method}() instead" + ) + else: + raise TypeError("too many arguments passed in") + + +def validate_minmax_axis(axis): + """ + Ensure that the axis argument passed to min, max, argmin, or argmax is + zero or None, as otherwise it will be incorrectly ignored. + + Parameters + ---------- + axis : int or None + + Raises + ------ + ValueError + """ + ndim = 1 # hard-coded for Index + if axis is None: + return + if axis >= ndim or (axis < 0 and ndim + axis < 0): + raise ValueError(f"`axis` must be fewer than the number of dimensions ({ndim})") diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py new file mode 100644 index 00000000..0a1a1376 --- /dev/null +++ b/pandas/compat/pickle_compat.py @@ -0,0 +1,243 @@ +""" +Support pre-0.12 series pickle compatibility. +""" + +import copy +import pickle as pkl +from typing import TYPE_CHECKING, Optional +import warnings + +from pandas import Index + +if TYPE_CHECKING: + from pandas import Series, DataFrame + + +def load_reduce(self): + stack = self.stack + args = stack.pop() + func = stack[-1] + + if len(args) and type(args[0]) is type: + n = args[0].__name__ # noqa + + try: + stack[-1] = func(*args) + return + except TypeError as err: + + # If we have a deprecated function, + # try to replace and try again. + + msg = "_reconstruct: First argument must be a sub-type of ndarray" + + if msg in str(err): + try: + cls = args[0] + stack[-1] = object.__new__(cls) + return + except TypeError: + pass + + raise + + +_sparse_msg = """\ + +Loading a saved '{cls}' as a {new} with sparse values. +'{cls}' is now removed. You should re-save this dataset in its new format. +""" + + +class _LoadSparseSeries: + # To load a SparseSeries as a Series[Sparse] + + # https://github.com/python/mypy/issues/1020 + # error: Incompatible return type for "__new__" (returns "Series", but must return + # a subtype of "_LoadSparseSeries") + def __new__(cls) -> "Series": # type: ignore + from pandas import Series + + warnings.warn( + _sparse_msg.format(cls="SparseSeries", new="Series"), + FutureWarning, + stacklevel=6, + ) + + return Series(dtype=object) + + +class _LoadSparseFrame: + # To load a SparseDataFrame as a DataFrame[Sparse] + + # https://github.com/python/mypy/issues/1020 + # error: Incompatible return type for "__new__" (returns "DataFrame", but must + # return a subtype of "_LoadSparseFrame") + def __new__(cls) -> "DataFrame": # type: ignore + from pandas import DataFrame + + warnings.warn( + _sparse_msg.format(cls="SparseDataFrame", new="DataFrame"), + FutureWarning, + stacklevel=6, + ) + + return DataFrame() + + +# If classes are moved, provide compat here. +_class_locations_map = { + ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"), + # 15477 + ("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"), + ("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"), + ("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"), + # 10890 + ("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"), + ("pandas.sparse.series", "SparseTimeSeries"): ( + "pandas.core.sparse.series", + "SparseSeries", + ), + # 12588, extensions moving + ("pandas._sparse", "BlockIndex"): ("pandas._libs.sparse", "BlockIndex"), + ("pandas.tslib", "Timestamp"): ("pandas._libs.tslib", "Timestamp"), + # 18543 moving period + ("pandas._period", "Period"): ("pandas._libs.tslibs.period", "Period"), + ("pandas._libs.period", "Period"): ("pandas._libs.tslibs.period", "Period"), + # 18014 moved __nat_unpickle from _libs.tslib-->_libs.tslibs.nattype + ("pandas.tslib", "__nat_unpickle"): ( + "pandas._libs.tslibs.nattype", + "__nat_unpickle", + ), + ("pandas._libs.tslib", "__nat_unpickle"): ( + "pandas._libs.tslibs.nattype", + "__nat_unpickle", + ), + # 15998 top-level dirs moving + ("pandas.sparse.array", "SparseArray"): ( + "pandas.core.arrays.sparse", + "SparseArray", + ), + ("pandas.sparse.series", "SparseSeries"): ( + "pandas.compat.pickle_compat", + "_LoadSparseSeries", + ), + ("pandas.sparse.frame", "SparseDataFrame"): ( + "pandas.core.sparse.frame", + "_LoadSparseFrame", + ), + ("pandas.indexes.base", "_new_Index"): ("pandas.core.indexes.base", "_new_Index"), + ("pandas.indexes.base", "Index"): ("pandas.core.indexes.base", "Index"), + ("pandas.indexes.numeric", "Int64Index"): ( + "pandas.core.indexes.numeric", + "Int64Index", + ), + ("pandas.indexes.range", "RangeIndex"): ("pandas.core.indexes.range", "RangeIndex"), + ("pandas.indexes.multi", "MultiIndex"): ("pandas.core.indexes.multi", "MultiIndex"), + ("pandas.tseries.index", "_new_DatetimeIndex"): ( + "pandas.core.indexes.datetimes", + "_new_DatetimeIndex", + ), + ("pandas.tseries.index", "DatetimeIndex"): ( + "pandas.core.indexes.datetimes", + "DatetimeIndex", + ), + ("pandas.tseries.period", "PeriodIndex"): ( + "pandas.core.indexes.period", + "PeriodIndex", + ), + # 19269, arrays moving + ("pandas.core.categorical", "Categorical"): ("pandas.core.arrays", "Categorical"), + # 19939, add timedeltaindex, float64index compat from 15998 move + ("pandas.tseries.tdi", "TimedeltaIndex"): ( + "pandas.core.indexes.timedeltas", + "TimedeltaIndex", + ), + ("pandas.indexes.numeric", "Float64Index"): ( + "pandas.core.indexes.numeric", + "Float64Index", + ), + ("pandas.core.sparse.series", "SparseSeries"): ( + "pandas.compat.pickle_compat", + "_LoadSparseSeries", + ), + ("pandas.core.sparse.frame", "SparseDataFrame"): ( + "pandas.compat.pickle_compat", + "_LoadSparseFrame", + ), +} + + +# our Unpickler sub-class to override methods and some dispatcher +# functions for compat and uses a non-public class of the pickle module. + +# error: Name 'pkl._Unpickler' is not defined +class Unpickler(pkl._Unpickler): # type: ignore + def find_class(self, module, name): + # override superclass + key = (module, name) + module, name = _class_locations_map.get(key, key) + return super().find_class(module, name) + + +Unpickler.dispatch = copy.copy(Unpickler.dispatch) +Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce + + +def load_newobj(self): + args = self.stack.pop() + cls = self.stack[-1] + + # compat + if issubclass(cls, Index): + obj = object.__new__(cls) + else: + obj = cls.__new__(cls, *args) + + self.stack[-1] = obj + + +Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj + + +def load_newobj_ex(self): + kwargs = self.stack.pop() + args = self.stack.pop() + cls = self.stack.pop() + + # compat + if issubclass(cls, Index): + obj = object.__new__(cls) + else: + obj = cls.__new__(cls, *args, **kwargs) + self.append(obj) + + +try: + Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex +except (AttributeError, KeyError): + pass + + +def load(fh, encoding: Optional[str] = None, is_verbose: bool = False): + """ + Load a pickle, with a provided encoding, + + Parameters + ---------- + fh : a filelike object + encoding : an optional encoding + is_verbose : show exception output + """ + + try: + fh.seek(0) + if encoding is not None: + up = Unpickler(fh, encoding=encoding) + else: + up = Unpickler(fh) + up.is_verbose = is_verbose + + return up.load() + except (ValueError, TypeError): + raise diff --git a/pandas/conftest.py b/pandas/conftest.py new file mode 100644 index 00000000..0b5efdc6 --- /dev/null +++ b/pandas/conftest.py @@ -0,0 +1,934 @@ +from collections import abc +from datetime import date, time, timedelta, timezone +from decimal import Decimal +import operator +import os + +from dateutil.tz import tzlocal, tzutc +import hypothesis +from hypothesis import strategies as st +import numpy as np +import pytest +from pytz import FixedOffset, utc + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm +from pandas.core import ops + +hypothesis.settings.register_profile( + "ci", + # Hypothesis timing checks are tuned for scalars by default, so we bump + # them from 200ms to 500ms per test case as the global default. If this + # is too short for a specific test, (a) try to make it faster, and (b) + # if it really is slow add `@settings(deadline=...)` with a working value, + # or `deadline=None` to entirely disable timeouts for that test. + deadline=500, + suppress_health_check=(hypothesis.HealthCheck.too_slow,), +) +hypothesis.settings.load_profile("ci") + + +def pytest_addoption(parser): + parser.addoption("--skip-slow", action="store_true", help="skip slow tests") + parser.addoption("--skip-network", action="store_true", help="skip network tests") + parser.addoption("--skip-db", action="store_true", help="skip db tests") + parser.addoption( + "--run-high-memory", action="store_true", help="run high memory tests" + ) + parser.addoption("--only-slow", action="store_true", help="run only slow tests") + parser.addoption( + "--strict-data-files", + action="store_true", + help="Fail if a test is skipped for missing data file.", + ) + + +def pytest_runtest_setup(item): + if "slow" in item.keywords and item.config.getoption("--skip-slow"): + pytest.skip("skipping due to --skip-slow") + + if "slow" not in item.keywords and item.config.getoption("--only-slow"): + pytest.skip("skipping due to --only-slow") + + if "network" in item.keywords and item.config.getoption("--skip-network"): + pytest.skip("skipping due to --skip-network") + + if "db" in item.keywords and item.config.getoption("--skip-db"): + pytest.skip("skipping due to --skip-db") + + if "high_memory" in item.keywords and not item.config.getoption( + "--run-high-memory" + ): + pytest.skip("skipping high memory test since --run-high-memory was not set") + + +# Configurations for all tests and all test modules + + +@pytest.fixture(autouse=True) +def configure_tests(): + pd.set_option("chained_assignment", "raise") + + +# For running doctests: make np and pd names available + + +@pytest.fixture(autouse=True) +def add_imports(doctest_namespace): + doctest_namespace["np"] = np + doctest_namespace["pd"] = pd + + +@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"]) +def spmatrix(request): + from scipy import sparse + + return getattr(sparse, request.param + "_matrix") + + +@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: f"axis {repr(x)}") +def axis(request): + """ + Fixture for returning the axis numbers of a DataFrame. + """ + return request.param + + +axis_frame = axis + + +@pytest.fixture(params=[0, "index"], ids=lambda x: f"axis {repr(x)}") +def axis_series(request): + """ + Fixture for returning the axis numbers of a Series. + """ + return request.param + + +@pytest.fixture +def ip(): + """ + Get an instance of IPython.InteractiveShell. + + Will raise a skip if IPython is not installed. + """ + + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.interactiveshell import InteractiveShell + + return InteractiveShell() + + +@pytest.fixture(params=[True, False, None]) +def observed(request): + """ + Pass in the observed keyword to groupby for [True, False] + This indicates whether categoricals should return values for + values which are not in the grouper [False / None], or only values which + appear in the grouper [True]. [None] is supported for future compatibility + if we decide to change the default (and would need to warn if this + parameter is not passed). + """ + return request.param + + +@pytest.fixture(params=[True, False, None]) +def ordered_fixture(request): + """ + Boolean 'ordered' parameter for Categorical. + """ + return request.param + + +_all_arithmetic_operators = [ + "__add__", + "__radd__", + "__sub__", + "__rsub__", + "__mul__", + "__rmul__", + "__floordiv__", + "__rfloordiv__", + "__truediv__", + "__rtruediv__", + "__pow__", + "__rpow__", + "__mod__", + "__rmod__", +] + + +@pytest.fixture(params=_all_arithmetic_operators) +def all_arithmetic_operators(request): + """ + Fixture for dunder names for common arithmetic operations. + """ + return request.param + + +@pytest.fixture( + params=[ + operator.add, + ops.radd, + operator.sub, + ops.rsub, + operator.mul, + ops.rmul, + operator.truediv, + ops.rtruediv, + operator.floordiv, + ops.rfloordiv, + operator.mod, + ops.rmod, + operator.pow, + ops.rpow, + ] +) +def all_arithmetic_functions(request): + """ + Fixture for operator and roperator arithmetic functions. + + Notes + ----- + This includes divmod and rdivmod, whereas all_arithmetic_operators + does not. + """ + return request.param + + +_all_numeric_reductions = [ + "sum", + "max", + "min", + "mean", + "prod", + "std", + "var", + "median", + "kurt", + "skew", +] + + +@pytest.fixture(params=_all_numeric_reductions) +def all_numeric_reductions(request): + """ + Fixture for numeric reduction names. + """ + return request.param + + +_all_boolean_reductions = ["all", "any"] + + +@pytest.fixture(params=_all_boolean_reductions) +def all_boolean_reductions(request): + """ + Fixture for boolean reduction names. + """ + return request.param + + +_cython_table = pd.core.base.SelectionMixin._cython_table.items() + + +@pytest.fixture(params=list(_cython_table)) +def cython_table_items(request): + return request.param + + +def _get_cython_table_params(ndframe, func_names_and_expected): + """ + Combine frame, functions from SelectionMixin._cython_table + keys and expected result. + + Parameters + ---------- + ndframe : DataFrame or Series + func_names_and_expected : Sequence of two items + The first item is a name of a NDFrame method ('sum', 'prod') etc. + The second item is the expected return value. + + Returns + ------- + list + List of three items (DataFrame, function, expected result) + """ + results = [] + for func_name, expected in func_names_and_expected: + results.append((ndframe, func_name, expected)) + results += [ + (ndframe, func, expected) + for func, name in _cython_table + if name == func_name + ] + return results + + +@pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"]) +def all_compare_operators(request): + """ + Fixture for dunder names for common compare operations + + * >= + * > + * == + * != + * < + * <= + """ + return request.param + + +@pytest.fixture(params=["__le__", "__lt__", "__ge__", "__gt__"]) +def compare_operators_no_eq_ne(request): + """ + Fixture for dunder names for compare operations except == and != + + * >= + * > + * < + * <= + """ + return request.param + + +@pytest.fixture( + params=["__and__", "__rand__", "__or__", "__ror__", "__xor__", "__rxor__"] +) +def all_logical_operators(request): + """ + Fixture for dunder names for common logical operations + + * | + * & + * ^ + """ + return request.param + + +@pytest.fixture(params=[None, "gzip", "bz2", "zip", "xz"]) +def compression(request): + """ + Fixture for trying common compression types in compression tests. + """ + return request.param + + +@pytest.fixture(params=["gzip", "bz2", "zip", "xz"]) +def compression_only(request): + """ + Fixture for trying common compression types in compression tests excluding + uncompressed case. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def writable(request): + """ + Fixture that an array is writable. + """ + return request.param + + +@pytest.fixture(scope="module") +def datetime_tz_utc(): + return timezone.utc + + +@pytest.fixture(params=["utc", "dateutil/UTC", utc, tzutc(), timezone.utc]) +def utc_fixture(request): + """ + Fixture to provide variants of UTC timezone strings and tzinfo objects. + """ + return request.param + + +@pytest.fixture(params=["inner", "outer", "left", "right"]) +def join_type(request): + """ + Fixture for trying all types of join operations. + """ + return request.param + + +@pytest.fixture +def strict_data_files(pytestconfig): + return pytestconfig.getoption("--strict-data-files") + + +@pytest.fixture +def datapath(strict_data_files): + """ + Get the path to a data file. + + Parameters + ---------- + path : str + Path to the file, relative to ``pandas/tests/`` + + Returns + ------- + path including ``pandas/tests``. + + Raises + ------ + ValueError + If the path doesn't exist and the --strict-data-files option is set. + """ + BASE_PATH = os.path.join(os.path.dirname(__file__), "tests") + + def deco(*args): + path = os.path.join(BASE_PATH, *args) + if not os.path.exists(path): + if strict_data_files: + raise ValueError( + f"Could not find file {path} and --strict-data-files is set." + ) + else: + pytest.skip(f"Could not find {path}.") + return path + + return deco + + +@pytest.fixture +def iris(datapath): + """ + The iris dataset as a DataFrame. + """ + return pd.read_csv(datapath("io", "data", "csv", "iris.csv")) + + +@pytest.fixture(params=["nlargest", "nsmallest"]) +def nselect_method(request): + """ + Fixture for trying all nselect methods. + """ + return request.param + + +@pytest.fixture(params=["left", "right", "both", "neither"]) +def closed(request): + """ + Fixture for trying all interval closed parameters. + """ + return request.param + + +@pytest.fixture(params=["left", "right", "both", "neither"]) +def other_closed(request): + """ + Secondary closed fixture to allow parametrizing over all pairs of closed. + """ + return request.param + + +@pytest.fixture(params=[None, np.nan, pd.NaT, float("nan"), np.float("NaN")]) +def nulls_fixture(request): + """ + Fixture for each null type in pandas. + """ + return request.param + + +nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture + + +@pytest.fixture(params=[None, np.nan, pd.NaT]) +def unique_nulls_fixture(request): + """ + Fixture for each null type in pandas, each null type exactly once. + """ + return request.param + + +# Generate cartesian product of unique_nulls_fixture: +unique_nulls_fixture2 = unique_nulls_fixture + + +TIMEZONES = [ + None, + "UTC", + "US/Eastern", + "Asia/Tokyo", + "dateutil/US/Pacific", + "dateutil/Asia/Singapore", + tzutc(), + tzlocal(), + FixedOffset(300), + FixedOffset(0), + FixedOffset(-300), + timezone.utc, + timezone(timedelta(hours=1)), + timezone(timedelta(hours=-1), name="foo"), +] +TIMEZONE_IDS = [repr(i) for i in TIMEZONES] + + +@td.parametrize_fixture_doc(str(TIMEZONE_IDS)) +@pytest.fixture(params=TIMEZONES, ids=TIMEZONE_IDS) +def tz_naive_fixture(request): + """ + Fixture for trying timezones including default (None): {0} + """ + return request.param + + +@td.parametrize_fixture_doc(str(TIMEZONE_IDS[1:])) +@pytest.fixture(params=TIMEZONES[1:], ids=TIMEZONE_IDS[1:]) +def tz_aware_fixture(request): + """ + Fixture for trying explicit timezones: {0} + """ + return request.param + + +# Generate cartesian product of tz_aware_fixture: +tz_aware_fixture2 = tz_aware_fixture + + +# ---------------------------------------------------------------- +# Dtypes +# ---------------------------------------------------------------- + +UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"] +UNSIGNED_EA_INT_DTYPES = ["UInt8", "UInt16", "UInt32", "UInt64"] +SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"] +SIGNED_EA_INT_DTYPES = ["Int8", "Int16", "Int32", "Int64"] +ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES +ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES + +FLOAT_DTYPES = [float, "float32", "float64"] +COMPLEX_DTYPES = [complex, "complex64", "complex128"] +STRING_DTYPES = [str, "str", "U"] + +DATETIME64_DTYPES = ["datetime64[ns]", "M8[ns]"] +TIMEDELTA64_DTYPES = ["timedelta64[ns]", "m8[ns]"] + +BOOL_DTYPES = [bool, "bool"] +BYTES_DTYPES = [bytes, "bytes"] +OBJECT_DTYPES = [object, "object"] + +ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES +ALL_NUMPY_DTYPES = ( + ALL_REAL_DTYPES + + COMPLEX_DTYPES + + STRING_DTYPES + + DATETIME64_DTYPES + + TIMEDELTA64_DTYPES + + BOOL_DTYPES + + OBJECT_DTYPES + + BYTES_DTYPES +) + + +@pytest.fixture(params=STRING_DTYPES) +def string_dtype(request): + """ + Parametrized fixture for string dtypes. + + * str + * 'str' + * 'U' + """ + return request.param + + +@pytest.fixture(params=BYTES_DTYPES) +def bytes_dtype(request): + """ + Parametrized fixture for bytes dtypes. + + * bytes + * 'bytes' + """ + return request.param + + +@pytest.fixture(params=OBJECT_DTYPES) +def object_dtype(request): + """ + Parametrized fixture for object dtypes. + + * object + * 'object' + """ + return request.param + + +@pytest.fixture(params=DATETIME64_DTYPES) +def datetime64_dtype(request): + """ + Parametrized fixture for datetime64 dtypes. + + * 'datetime64[ns]' + * 'M8[ns]' + """ + return request.param + + +@pytest.fixture(params=TIMEDELTA64_DTYPES) +def timedelta64_dtype(request): + """ + Parametrized fixture for timedelta64 dtypes. + + * 'timedelta64[ns]' + * 'm8[ns]' + """ + return request.param + + +@pytest.fixture(params=FLOAT_DTYPES) +def float_dtype(request): + """ + Parameterized fixture for float dtypes. + + * float + * 'float32' + * 'float64' + """ + return request.param + + +@pytest.fixture(params=COMPLEX_DTYPES) +def complex_dtype(request): + """ + Parameterized fixture for complex dtypes. + + * complex + * 'complex64' + * 'complex128' + """ + return request.param + + +@pytest.fixture(params=SIGNED_INT_DTYPES) +def sint_dtype(request): + """ + Parameterized fixture for signed integer dtypes. + + * int + * 'int8' + * 'int16' + * 'int32' + * 'int64' + """ + return request.param + + +@pytest.fixture(params=UNSIGNED_INT_DTYPES) +def uint_dtype(request): + """ + Parameterized fixture for unsigned integer dtypes. + + * 'uint8' + * 'uint16' + * 'uint32' + * 'uint64' + """ + return request.param + + +@pytest.fixture(params=ALL_INT_DTYPES) +def any_int_dtype(request): + """ + Parameterized fixture for any integer dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + """ + return request.param + + +@pytest.fixture(params=ALL_EA_INT_DTYPES) +def any_nullable_int_dtype(request): + """ + Parameterized fixture for any nullable integer dtype. + + * 'UInt8' + * 'Int8' + * 'UInt16' + * 'Int16' + * 'UInt32' + * 'Int32' + * 'UInt64' + * 'Int64' + """ + + return request.param + + +@pytest.fixture(params=ALL_REAL_DTYPES) +def any_real_dtype(request): + """ + Parameterized fixture for any (purely) real numeric dtype. + + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + """ + return request.param + + +@pytest.fixture(params=ALL_NUMPY_DTYPES) +def any_numpy_dtype(request): + """ + Parameterized fixture for all numpy dtypes. + + * bool + * 'bool' + * int + * 'int8' + * 'uint8' + * 'int16' + * 'uint16' + * 'int32' + * 'uint32' + * 'int64' + * 'uint64' + * float + * 'float32' + * 'float64' + * complex + * 'complex64' + * 'complex128' + * str + * 'str' + * 'U' + * bytes + * 'bytes' + * 'datetime64[ns]' + * 'M8[ns]' + * 'timedelta64[ns]' + * 'm8[ns]' + * object + * 'object' + """ + return request.param + + +# categoricals are handled separately +_any_skipna_inferred_dtype = [ + ("string", ["a", np.nan, "c"]), + ("string", ["a", pd.NA, "c"]), + ("bytes", [b"a", np.nan, b"c"]), + ("empty", [np.nan, np.nan, np.nan]), + ("empty", []), + ("mixed-integer", ["a", np.nan, 2]), + ("mixed", ["a", np.nan, 2.0]), + ("floating", [1.0, np.nan, 2.0]), + ("integer", [1, np.nan, 2]), + ("mixed-integer-float", [1, np.nan, 2.0]), + ("decimal", [Decimal(1), np.nan, Decimal(2)]), + ("boolean", [True, np.nan, False]), + ("boolean", [True, pd.NA, False]), + ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]), + ("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]), + ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), + # The following two dtypes are commented out due to GH 23554 + # ('complex', [1 + 1j, np.nan, 2 + 2j]), + # ('timedelta64', [np.timedelta64(1, 'D'), + # np.nan, np.timedelta64(2, 'D')]), + ("timedelta", [timedelta(1), np.nan, timedelta(2)]), + ("time", [time(1), np.nan, time(2)]), + ("period", [pd.Period(2013), pd.NaT, pd.Period(2018)]), + ("interval", [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]), +] +ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id + + +@pytest.fixture(params=_any_skipna_inferred_dtype, ids=ids) +def any_skipna_inferred_dtype(request): + """ + Fixture for all inferred dtypes from _libs.lib.infer_dtype + + The covered (inferred) types are: + * 'string' + * 'empty' + * 'bytes' + * 'mixed' + * 'mixed-integer' + * 'mixed-integer-float' + * 'floating' + * 'integer' + * 'decimal' + * 'boolean' + * 'datetime64' + * 'datetime' + * 'date' + * 'timedelta' + * 'time' + * 'period' + * 'interval' + + Returns + ------- + inferred_dtype : str + The string for the inferred dtype from _libs.lib.infer_dtype + values : np.ndarray + An array of object dtype that will be inferred to have + `inferred_dtype` + + Examples + -------- + >>> import pandas._libs.lib as lib + >>> + >>> def test_something(any_skipna_inferred_dtype): + ... inferred_dtype, values = any_skipna_inferred_dtype + ... # will pass + ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype + """ + inferred_dtype, values = request.param + values = np.array(values, dtype=object) # object dtype to avoid casting + + # correctness of inference tested in tests/dtypes/test_inference.py + return inferred_dtype, values + + +@pytest.fixture( + params=[ + getattr(pd.offsets, o) + for o in pd.offsets.__all__ + if issubclass(getattr(pd.offsets, o), pd.offsets.Tick) + ] +) +def tick_classes(request): + """ + Fixture for Tick based datetime offsets available for a time series. + """ + return request.param + + +# ---------------------------------------------------------------- +# Global setup for tests using Hypothesis + + +# Registering these strategies makes them globally available via st.from_type, +# which is use for offsets in tests/tseries/offsets/test_offsets_properties.py +for name in "MonthBegin MonthEnd BMonthBegin BMonthEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, st.builds(cls, n=st.integers(-99, 99), normalize=st.booleans()) + ) + +for name in "YearBegin YearEnd BYearBegin BYearEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, + st.builds( + cls, + n=st.integers(-5, 5), + normalize=st.booleans(), + month=st.integers(min_value=1, max_value=12), + ), + ) + +for name in "QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd".split(): + cls = getattr(pd.tseries.offsets, name) + st.register_type_strategy( + cls, + st.builds( + cls, + n=st.integers(-24, 24), + normalize=st.booleans(), + startingMonth=st.integers(min_value=1, max_value=12), + ), + ) + + +@pytest.fixture +def float_frame(): + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + + A B C D + P7GACiRnxd -0.465578 -0.361863 0.886172 -0.053465 + qZKh6afn8n -0.466693 -0.373773 0.266873 1.673901 + tkp0r6Qble 0.148691 -0.059051 0.174817 1.598433 + wP70WOCtv8 0.133045 -0.581994 -0.992240 0.261651 + M2AeYQMnCz -1.207959 -0.185775 0.588206 0.563938 + QEPzyGDYDo -0.381843 -0.758281 0.502575 -0.565053 + r78Jwns6dn -0.653707 0.883127 0.682199 0.206159 + ... ... ... ... ... + IHEGx9NO0T -0.277360 0.113021 -1.018314 0.196316 + lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999 + qa66YMWQa5 1.110525 0.475310 -0.747865 0.032121 + yOa0ATsmcE -0.431457 0.067094 0.096567 -0.264962 + 65znX3uRNG 1.528446 0.160416 -0.109635 -0.032987 + eCOBvKqf3e 0.235281 1.622222 0.781255 0.392871 + xSucinXxuV -1.263557 0.252799 -0.552247 0.400426 + + [30 rows x 4 columns] + """ + return DataFrame(tm.getSeriesData()) + + +@pytest.fixture(params=[pd.Index, pd.Series], ids=["index", "series"]) +def index_or_series(request): + """ + Fixture to parametrize over Index and Series, made necessary by a mypy + bug, giving an error: + + List item 0 has incompatible type "Type[Series]"; expected "Type[PandasObject]" + + See GH#29725 + """ + return request.param + + +@pytest.fixture +def dict_subclass(): + """ + Fixture for a dictionary subclass. + """ + + class TestSubDict(dict): + def __init__(self, *args, **kwargs): + dict.__init__(self, *args, **kwargs) + + return TestSubDict + + +@pytest.fixture +def non_mapping_dict_subclass(): + """ + Fixture for a non-mapping dictionary subclass. + """ + + class TestNonDictMapping(abc.Mapping): + def __init__(self, underlying_dict): + self._data = underlying_dict + + def __getitem__(self, key): + return self._data.__getitem__(key) + + def __iter__(self): + return self._data.__iter__() + + def __len__(self): + return self._data.__len__() + + return TestNonDictMapping diff --git a/pandas/core/__init__.py b/pandas/core/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py new file mode 100644 index 00000000..3f1c7b1c --- /dev/null +++ b/pandas/core/accessor.py @@ -0,0 +1,316 @@ +""" + +accessor.py contains base classes for implementing accessor properties +that can be mixed into or pinned onto other pandas classes. + +""" +from typing import FrozenSet, Set +import warnings + +from pandas.util._decorators import Appender + + +class DirNamesMixin: + _accessors: Set[str] = set() + _deprecations: FrozenSet[str] = frozenset() + + def _dir_deletions(self): + """ + Delete unwanted __dir__ for this object. + """ + return self._accessors | self._deprecations + + def _dir_additions(self): + """ + Add additional __dir__ for this object. + """ + rv = set() + for accessor in self._accessors: + try: + getattr(self, accessor) + rv.add(accessor) + except AttributeError: + pass + return rv + + def __dir__(self): + """ + Provide method name lookup and completion. + + Notes + ----- + Only provide 'public' methods. + """ + rv = set(dir(type(self))) + rv = (rv - self._dir_deletions()) | self._dir_additions() + return sorted(rv) + + +class PandasDelegate: + """ + Abstract base class for delegating methods/properties. + """ + + def _delegate_property_get(self, name, *args, **kwargs): + raise TypeError(f"You cannot access the property {name}") + + def _delegate_property_set(self, name, value, *args, **kwargs): + raise TypeError(f"The property {name} cannot be set") + + def _delegate_method(self, name, *args, **kwargs): + raise TypeError(f"You cannot call method {name}") + + @classmethod + def _add_delegate_accessors( + cls, delegate, accessors, typ: str, overwrite: bool = False + ): + """ + Add accessors to cls from the delegate class. + + Parameters + ---------- + cls + Class to add the methods/properties to. + delegate + Class to get methods/properties and doc-strings. + accessors : list of str + List of accessors to add. + typ : {'property', 'method'} + overwrite : bool, default False + Overwrite the method/property in the target class if it exists. + """ + + def _create_delegator_property(name): + def _getter(self): + return self._delegate_property_get(name) + + def _setter(self, new_values): + return self._delegate_property_set(name, new_values) + + _getter.__name__ = name + _setter.__name__ = name + + return property( + fget=_getter, fset=_setter, doc=getattr(delegate, name).__doc__ + ) + + def _create_delegator_method(name): + def f(self, *args, **kwargs): + return self._delegate_method(name, *args, **kwargs) + + f.__name__ = name + f.__doc__ = getattr(delegate, name).__doc__ + + return f + + for name in accessors: + + if typ == "property": + f = _create_delegator_property(name) + else: + f = _create_delegator_method(name) + + # don't overwrite existing methods/properties + if overwrite or not hasattr(cls, name): + setattr(cls, name, f) + + +def delegate_names(delegate, accessors, typ: str, overwrite: bool = False): + """ + Add delegated names to a class using a class decorator. This provides + an alternative usage to directly calling `_add_delegate_accessors` + below a class definition. + + Parameters + ---------- + delegate : object + The class to get methods/properties & doc-strings. + accessors : Sequence[str] + List of accessor to add. + typ : {'property', 'method'} + overwrite : bool, default False + Overwrite the method/property in the target class if it exists. + + Returns + ------- + callable + A class decorator. + + Examples + -------- + @delegate_names(Categorical, ["categories", "ordered"], "property") + class CategoricalAccessor(PandasDelegate): + [...] + """ + + def add_delegate_accessors(cls): + cls._add_delegate_accessors(delegate, accessors, typ, overwrite=overwrite) + return cls + + return add_delegate_accessors + + +# Ported with modifications from xarray +# https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py +# 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors +# 2. We use a UserWarning instead of a custom Warning + + +class CachedAccessor: + """ + Custom property-like object. + + A descriptor for caching accessors. + + Parameters + ---------- + name : str + Namespace that will be accessed under, e.g. ``df.foo``. + accessor : cls + Class with the extension methods. + + Notes + ----- + For accessor, The class's __init__ method assumes that one of + ``Series``, ``DataFrame`` or ``Index`` as the + single argument ``data``. + """ + + def __init__(self, name: str, accessor) -> None: + self._name = name + self._accessor = accessor + + def __get__(self, obj, cls): + if obj is None: + # we're accessing the attribute of the class, i.e., Dataset.geo + return self._accessor + accessor_obj = self._accessor(obj) + # Replace the property with the accessor object. Inspired by: + # http://www.pydanny.com/cached-property.html + # We need to use object.__setattr__ because we overwrite __setattr__ on + # NDFrame + object.__setattr__(obj, self._name, accessor_obj) + return accessor_obj + + +def _register_accessor(name, cls): + def decorator(accessor): + if hasattr(cls, name): + warnings.warn( + f"registration of accessor {repr(accessor)} under name " + f"{repr(name)} for type {repr(cls)} is overriding a preexisting" + f"attribute with the same name.", + UserWarning, + stacklevel=2, + ) + setattr(cls, name, CachedAccessor(name, accessor)) + cls._accessors.add(name) + return accessor + + return decorator + + +_doc = """ +Register a custom accessor on %(klass)s objects. + +Parameters +---------- +name : str + Name under which the accessor should be registered. A warning is issued + if this name conflicts with a preexisting attribute. + +Returns +------- +callable + A class decorator. + +See Also +-------- +%(others)s + +Notes +----- +When accessed, your accessor will be initialized with the pandas object +the user is interacting with. So the signature must be + +.. code-block:: python + + def __init__(self, pandas_object): # noqa: E999 + ... + +For consistency with pandas methods, you should raise an ``AttributeError`` +if the data passed to your accessor has an incorrect dtype. + +>>> pd.Series(['a', 'b']).dt +Traceback (most recent call last): +... +AttributeError: Can only use .dt accessor with datetimelike values + +Examples +-------- + +In your library code:: + + import pandas as pd + + @pd.api.extensions.register_dataframe_accessor("geo") + class GeoAccessor: + def __init__(self, pandas_obj): + self._obj = pandas_obj + + @property + def center(self): + # return the geographic center point of this DataFrame + lat = self._obj.latitude + lon = self._obj.longitude + return (float(lon.mean()), float(lat.mean())) + + def plot(self): + # plot this array's data on a map, e.g., using Cartopy + pass + +Back in an interactive IPython session: + + >>> ds = pd.DataFrame({'longitude': np.linspace(0, 10), + ... 'latitude': np.linspace(0, 20)}) + >>> ds.geo.center + (5.0, 10.0) + >>> ds.geo.plot() + # plots data on a map +""" + + +@Appender( + _doc + % dict( + klass="DataFrame", others=("register_series_accessor, register_index_accessor") + ) +) +def register_dataframe_accessor(name): + from pandas import DataFrame + + return _register_accessor(name, DataFrame) + + +@Appender( + _doc + % dict( + klass="Series", others=("register_dataframe_accessor, register_index_accessor") + ) +) +def register_series_accessor(name): + from pandas import Series + + return _register_accessor(name, Series) + + +@Appender( + _doc + % dict( + klass="Index", others=("register_dataframe_accessor, register_series_accessor") + ) +) +def register_index_accessor(name): + from pandas import Index + + return _register_accessor(name, Index) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py new file mode 100644 index 00000000..c6080628 --- /dev/null +++ b/pandas/core/algorithms.py @@ -0,0 +1,2067 @@ +""" +Generic data algorithms. This module is experimental at the moment and not +intended for public consumption +""" +import operator +from textwrap import dedent +from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union +from warnings import catch_warnings, simplefilter, warn + +import numpy as np + +from pandas._libs import Timestamp, algos, hashtable as htable, lib +from pandas._libs.tslib import iNaT +from pandas.util._decorators import Appender, Substitution + +from pandas.core.dtypes.cast import ( + construct_1d_object_array_from_listlike, + infer_dtype_from_array, + maybe_promote, +) +from pandas.core.dtypes.common import ( + ensure_float64, + ensure_int64, + ensure_object, + ensure_platform_int, + ensure_uint64, + is_array_like, + is_bool_dtype, + is_categorical_dtype, + is_complex_dtype, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_extension_array_dtype, + is_float_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_numeric_dtype, + is_object_dtype, + is_period_dtype, + is_scalar, + is_signed_integer_dtype, + is_timedelta64_dtype, + is_unsigned_integer_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries +from pandas.core.dtypes.missing import isna, na_value_for_dtype + +import pandas.core.common as com +from pandas.core.construction import array, extract_array +from pandas.core.indexers import validate_indices + +if TYPE_CHECKING: + from pandas import Series + +_shared_docs: Dict[str, str] = {} + + +# --------------- # +# dtype access # +# --------------- # +def _ensure_data(values, dtype=None): + """ + routine to ensure that our data is of the correct + input dtype for lower-level routines + + This will coerce: + - ints -> int64 + - uint -> uint64 + - bool -> uint64 (TODO this should be uint8) + - datetimelike -> i8 + - datetime64tz -> i8 (in local tz) + - categorical -> codes + + Parameters + ---------- + values : array-like + dtype : pandas_dtype, optional + coerce to this dtype + + Returns + ------- + values : ndarray + pandas_dtype : str or dtype + """ + + # we check some simple dtypes first + if is_object_dtype(dtype): + return ensure_object(np.asarray(values)), "object" + elif is_object_dtype(values) and dtype is None: + return ensure_object(np.asarray(values)), "object" + + try: + if is_bool_dtype(values) or is_bool_dtype(dtype): + # we are actually coercing to uint64 + # until our algos support uint8 directly (see TODO) + return np.asarray(values).astype("uint64"), "bool" + elif is_signed_integer_dtype(values) or is_signed_integer_dtype(dtype): + return ensure_int64(values), "int64" + elif is_unsigned_integer_dtype(values) or is_unsigned_integer_dtype(dtype): + return ensure_uint64(values), "uint64" + elif is_float_dtype(values) or is_float_dtype(dtype): + return ensure_float64(values), "float64" + elif is_complex_dtype(values) or is_complex_dtype(dtype): + + # ignore the fact that we are casting to float + # which discards complex parts + with catch_warnings(): + simplefilter("ignore", np.ComplexWarning) + values = ensure_float64(values) + return values, "float64" + + except (TypeError, ValueError, OverflowError): + # if we are trying to coerce to a dtype + # and it is incompat this will fall through to here + return ensure_object(values), "object" + + # datetimelike + if ( + needs_i8_conversion(values) + or is_period_dtype(dtype) + or is_datetime64_any_dtype(dtype) + or is_timedelta64_dtype(dtype) + ): + if is_period_dtype(values) or is_period_dtype(dtype): + from pandas import PeriodIndex + + values = PeriodIndex(values) + dtype = values.dtype + elif is_timedelta64_dtype(values) or is_timedelta64_dtype(dtype): + from pandas import TimedeltaIndex + + values = TimedeltaIndex(values) + dtype = values.dtype + else: + # Datetime + if values.ndim > 1 and is_datetime64_ns_dtype(values): + # Avoid calling the DatetimeIndex constructor as it is 1D only + # Note: this is reached by DataFrame.rank calls GH#27027 + asi8 = values.view("i8") + dtype = values.dtype + return asi8, dtype + + from pandas import DatetimeIndex + + values = DatetimeIndex(values) + dtype = values.dtype + + return values.asi8, dtype + + elif is_categorical_dtype(values) and ( + is_categorical_dtype(dtype) or dtype is None + ): + values = getattr(values, "values", values) + values = values.codes + dtype = "category" + + # we are actually coercing to int64 + # until our algos support int* directly (not all do) + values = ensure_int64(values) + + return values, dtype + + # we have failed, return object + values = np.asarray(values, dtype=np.object) + return ensure_object(values), "object" + + +def _reconstruct_data(values, dtype, original): + """ + reverse of _ensure_data + + Parameters + ---------- + values : ndarray + dtype : pandas_dtype + original : ndarray-like + + Returns + ------- + Index for extension types, otherwise ndarray casted to dtype + """ + + if is_extension_array_dtype(dtype): + values = dtype.construct_array_type()._from_sequence(values) + elif is_bool_dtype(dtype): + values = values.astype(dtype, copy=False) + + # we only support object dtypes bool Index + if isinstance(original, ABCIndexClass): + values = values.astype(object, copy=False) + elif dtype is not None: + if is_datetime64_dtype(dtype): + dtype = "datetime64[ns]" + elif is_timedelta64_dtype(dtype): + dtype = "timedelta64[ns]" + + values = values.astype(dtype, copy=False) + + return values + + +def _ensure_arraylike(values): + """ + ensure that we are arraylike if not already + """ + if not is_array_like(values): + inferred = lib.infer_dtype(values, skipna=False) + if inferred in ["mixed", "string", "unicode"]: + if isinstance(values, tuple): + values = list(values) + values = construct_1d_object_array_from_listlike(values) + else: + values = np.asarray(values) + return values + + +_hashtables = { + "float64": htable.Float64HashTable, + "uint64": htable.UInt64HashTable, + "int64": htable.Int64HashTable, + "string": htable.StringHashTable, + "object": htable.PyObjectHashTable, +} + + +def _get_hashtable_algo(values): + """ + Parameters + ---------- + values : arraylike + + Returns + ------- + htable : HashTable subclass + values : ndarray + """ + values, _ = _ensure_data(values) + + ndtype = _check_object_for_strings(values) + htable = _hashtables[ndtype] + return htable, values + + +def _get_values_for_rank(values): + if is_categorical_dtype(values): + values = values._values_for_rank() + + values, _ = _ensure_data(values) + return values + + +def _get_data_algo(values): + values = _get_values_for_rank(values) + + ndtype = _check_object_for_strings(values) + htable = _hashtables.get(ndtype, _hashtables["object"]) + + return htable, values + + +def _check_object_for_strings(values) -> str: + """ + Check if we can use string hashtable instead of object hashtable. + + Parameters + ---------- + values : ndarray + ndtype : str + + Returns + ------- + str + """ + ndtype = values.dtype.name + if ndtype == "object": + + # it's cheaper to use a String Hash Table than Object; we infer + # including nulls because that is the only difference between + # StringHashTable and ObjectHashtable + if lib.infer_dtype(values, skipna=False) in ["string"]: + ndtype = "string" + return ndtype + + +# --------------- # +# top-level algos # +# --------------- # + + +def unique(values): + """ + Hash table-based unique. Uniques are returned in order + of appearance. This does NOT sort. + + Significantly faster than numpy.unique. Includes NA values. + + Parameters + ---------- + values : 1d array-like + + Returns + ------- + numpy.ndarray or ExtensionArray + + The return can be: + + * Index : when the input is an Index + * Categorical : when the input is a Categorical dtype + * ndarray : when the input is a Series/ndarray + + Return numpy.ndarray or ExtensionArray. + + See Also + -------- + Index.unique + Series.unique + + Examples + -------- + >>> pd.unique(pd.Series([2, 1, 3, 3])) + array([2, 1, 3]) + + >>> pd.unique(pd.Series([2] + [1] * 5)) + array([2, 1]) + + >>> pd.unique(pd.Series([pd.Timestamp('20160101'), + ... pd.Timestamp('20160101')])) + array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') + + >>> pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + ... pd.Timestamp('20160101', tz='US/Eastern')])) + array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], + dtype=object) + + >>> pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + ... pd.Timestamp('20160101', tz='US/Eastern')])) + DatetimeIndex(['2016-01-01 00:00:00-05:00'], + ... dtype='datetime64[ns, US/Eastern]', freq=None) + + >>> pd.unique(list('baabc')) + array(['b', 'a', 'c'], dtype=object) + + An unordered Categorical will return categories in the + order of appearance. + + >>> pd.unique(pd.Series(pd.Categorical(list('baabc')))) + [b, a, c] + Categories (3, object): [b, a, c] + + >>> pd.unique(pd.Series(pd.Categorical(list('baabc'), + ... categories=list('abc')))) + [b, a, c] + Categories (3, object): [b, a, c] + + An ordered Categorical preserves the category ordering. + + >>> pd.unique(pd.Series(pd.Categorical(list('baabc'), + ... categories=list('abc'), + ... ordered=True))) + [b, a, c] + Categories (3, object): [a < b < c] + + An array of tuples + + >>> pd.unique([('a', 'b'), ('b', 'a'), ('a', 'c'), ('b', 'a')]) + array([('a', 'b'), ('b', 'a'), ('a', 'c')], dtype=object) + """ + + values = _ensure_arraylike(values) + + if is_extension_array_dtype(values): + # Dispatch to extension dtype's unique. + return values.unique() + + original = values + htable, values = _get_hashtable_algo(values) + + table = htable(len(values)) + uniques = table.unique(values) + uniques = _reconstruct_data(uniques, original.dtype, original) + return uniques + + +unique1d = unique + + +def isin(comps, values) -> np.ndarray: + """ + Compute the isin boolean array. + + Parameters + ---------- + comps : array-like + values : array-like + + Returns + ------- + ndarray[bool] + Same length as `comps`. + """ + if not is_list_like(comps): + raise TypeError( + "only list-like objects are allowed to be passed " + f"to isin(), you passed a [{type(comps).__name__}]" + ) + if not is_list_like(values): + raise TypeError( + "only list-like objects are allowed to be passed " + f"to isin(), you passed a [{type(values).__name__}]" + ) + + if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)): + values = construct_1d_object_array_from_listlike(list(values)) + + if is_categorical_dtype(comps): + # TODO(extension) + # handle categoricals + return comps._values.isin(values) + + comps = com.values_from_object(comps) + + comps, dtype = _ensure_data(comps) + values, _ = _ensure_data(values, dtype=dtype) + + # faster for larger cases to use np.in1d + f = htable.ismember_object + + # GH16012 + # Ensure np.in1d doesn't get object types or it *may* throw an exception + if len(comps) > 1_000_000 and not is_object_dtype(comps): + f = np.in1d + elif is_integer_dtype(comps): + try: + values = values.astype("int64", copy=False) + comps = comps.astype("int64", copy=False) + f = htable.ismember_int64 + except (TypeError, ValueError, OverflowError): + values = values.astype(object) + comps = comps.astype(object) + + elif is_float_dtype(comps): + try: + values = values.astype("float64", copy=False) + comps = comps.astype("float64", copy=False) + f = htable.ismember_float64 + except (TypeError, ValueError): + values = values.astype(object) + comps = comps.astype(object) + + return f(comps, values) + + +def _factorize_array( + values, na_sentinel: int = -1, size_hint=None, na_value=None +) -> Tuple[np.ndarray, np.ndarray]: + """ + Factorize an array-like to codes and uniques. + + This doesn't do any coercion of types or unboxing before factorization. + + Parameters + ---------- + values : ndarray + na_sentinel : int, default -1 + size_hint : int, optional + Passsed through to the hashtable's 'get_labels' method + na_value : object, optional + A value in `values` to consider missing. Note: only use this + parameter when you know that you don't have any values pandas would + consider missing in the array (NaN for float data, iNaT for + datetimes, etc.). + + Returns + ------- + codes : ndarray + uniques : ndarray + """ + hash_klass, values = _get_data_algo(values) + + table = hash_klass(size_hint or len(values)) + uniques, codes = table.factorize(values, na_sentinel=na_sentinel, na_value=na_value) + + codes = ensure_platform_int(codes) + return codes, uniques + + +_shared_docs[ + "factorize" +] = """ + Encode the object as an enumerated type or categorical variable. + + This method is useful for obtaining a numeric representation of an + array when all that matters is identifying distinct values. `factorize` + is available as both a top-level function :func:`pandas.factorize`, + and as a method :meth:`Series.factorize` and :meth:`Index.factorize`. + + Parameters + ---------- + %(values)s%(sort)s + na_sentinel : int, default -1 + Value to mark "not found". + %(size_hint)s\ + + Returns + ------- + codes : ndarray + An integer ndarray that's an indexer into `uniques`. + ``uniques.take(codes)`` will have the same values as `values`. + uniques : ndarray, Index, or Categorical + The unique valid values. When `values` is Categorical, `uniques` + is a Categorical. When `values` is some other pandas object, an + `Index` is returned. Otherwise, a 1-D ndarray is returned. + + .. note :: + + Even if there's a missing value in `values`, `uniques` will + *not* contain an entry for it. + + See Also + -------- + cut : Discretize continuous-valued array. + unique : Find the unique value in an array. + + Examples + -------- + These examples all show factorize as a top-level method like + ``pd.factorize(values)``. The results are identical for methods like + :meth:`Series.factorize`. + + >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b']) + >>> codes + array([0, 0, 1, 2, 0]) + >>> uniques + array(['b', 'a', 'c'], dtype=object) + + With ``sort=True``, the `uniques` will be sorted, and `codes` will be + shuffled so that the relationship is the maintained. + + >>> codes, uniques = pd.factorize(['b', 'b', 'a', 'c', 'b'], sort=True) + >>> codes + array([1, 1, 0, 2, 1]) + >>> uniques + array(['a', 'b', 'c'], dtype=object) + + Missing values are indicated in `codes` with `na_sentinel` + (``-1`` by default). Note that missing values are never + included in `uniques`. + + >>> codes, uniques = pd.factorize(['b', None, 'a', 'c', 'b']) + >>> codes + array([ 0, -1, 1, 2, 0]) + >>> uniques + array(['b', 'a', 'c'], dtype=object) + + Thus far, we've only factorized lists (which are internally coerced to + NumPy arrays). When factorizing pandas objects, the type of `uniques` + will differ. For Categoricals, a `Categorical` is returned. + + >>> cat = pd.Categorical(['a', 'a', 'c'], categories=['a', 'b', 'c']) + >>> codes, uniques = pd.factorize(cat) + >>> codes + array([0, 0, 1]) + >>> uniques + [a, c] + Categories (3, object): [a, b, c] + + Notice that ``'b'`` is in ``uniques.categories``, despite not being + present in ``cat.values``. + + For all other pandas objects, an Index of the appropriate type is + returned. + + >>> cat = pd.Series(['a', 'a', 'c']) + >>> codes, uniques = pd.factorize(cat) + >>> codes + array([0, 0, 1]) + >>> uniques + Index(['a', 'c'], dtype='object') + """ + + +@Substitution( + values=dedent( + """\ + values : sequence + A 1-D sequence. Sequences that aren't pandas objects are + coerced to ndarrays before factorization. + """ + ), + sort=dedent( + """\ + sort : bool, default False + Sort `uniques` and shuffle `codes` to maintain the + relationship. + """ + ), + size_hint=dedent( + """\ + size_hint : int, optional + Hint to the hashtable sizer. + """ + ), +) +@Appender(_shared_docs["factorize"]) +def factorize( + values, sort: bool = False, na_sentinel: int = -1, size_hint: Optional[int] = None +) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]: + # Implementation notes: This method is responsible for 3 things + # 1.) coercing data to array-like (ndarray, Index, extension array) + # 2.) factorizing codes and uniques + # 3.) Maybe boxing the uniques in an Index + # + # Step 2 is dispatched to extension types (like Categorical). They are + # responsible only for factorization. All data coercion, sorting and boxing + # should happen here. + + values = _ensure_arraylike(values) + original = values + + if is_extension_array_dtype(values): + values = extract_array(values) + codes, uniques = values.factorize(na_sentinel=na_sentinel) + dtype = original.dtype + else: + values, dtype = _ensure_data(values) + + if original.dtype.kind in ["m", "M"]: + na_value = na_value_for_dtype(original.dtype) + else: + na_value = None + + codes, uniques = _factorize_array( + values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value + ) + + if sort and len(uniques) > 0: + uniques, codes = safe_sort( + uniques, codes, na_sentinel=na_sentinel, assume_unique=True, verify=False + ) + + uniques = _reconstruct_data(uniques, dtype, original) + + # return original tenor + if isinstance(original, ABCIndexClass): + uniques = original._shallow_copy(uniques, name=None) + elif isinstance(original, ABCSeries): + from pandas import Index + + uniques = Index(uniques) + + return codes, uniques + + +def value_counts( + values, + sort: bool = True, + ascending: bool = False, + normalize: bool = False, + bins=None, + dropna: bool = True, +) -> "Series": + """ + Compute a histogram of the counts of non-null values. + + Parameters + ---------- + values : ndarray (1-d) + sort : bool, default True + Sort by values + ascending : bool, default False + Sort in ascending order + normalize: bool, default False + If True then compute a relative histogram + bins : integer, optional + Rather than count values, group them into half-open bins, + convenience for pd.cut, only works with numeric data + dropna : bool, default True + Don't include counts of NaN + + Returns + ------- + Series + """ + from pandas.core.series import Series + + name = getattr(values, "name", None) + + if bins is not None: + from pandas.core.reshape.tile import cut + + values = Series(values) + try: + ii = cut(values, bins, include_lowest=True) + except TypeError: + raise TypeError("bins argument only works with numeric data.") + + # count, remove nulls (from the index), and but the bins + result = ii.value_counts(dropna=dropna) + result = result[result.index.notna()] + result.index = result.index.astype("interval") + result = result.sort_index() + + # if we are dropna and we have NO values + if dropna and (result.values == 0).all(): + result = result.iloc[0:0] + + # normalizing is by len of all (regardless of dropna) + counts = np.array([len(ii)]) + + else: + + if is_extension_array_dtype(values): + + # handle Categorical and sparse, + result = Series(values)._values.value_counts(dropna=dropna) + result.name = name + counts = result.values + + else: + keys, counts = _value_counts_arraylike(values, dropna) + + result = Series(counts, index=keys, name=name) + + if sort: + result = result.sort_values(ascending=ascending) + + if normalize: + result = result / float(counts.sum()) + + return result + + +def _value_counts_arraylike(values, dropna: bool): + """ + Parameters + ---------- + values : arraylike + dropna : bool + + Returns + ------- + uniques : np.ndarray or ExtensionArray + counts : np.ndarray + """ + values = _ensure_arraylike(values) + original = values + values, _ = _ensure_data(values) + ndtype = values.dtype.name + + if needs_i8_conversion(original.dtype): + # datetime, timedelta, or period + + keys, counts = htable.value_count_int64(values, dropna) + + if dropna: + msk = keys != iNaT + keys, counts = keys[msk], counts[msk] + + else: + # ndarray like + + # TODO: handle uint8 + f = getattr(htable, f"value_count_{ndtype}") + keys, counts = f(values, dropna) + + mask = isna(values) + if not dropna and mask.any(): + if not isna(keys).any(): + keys = np.insert(keys, 0, np.NaN) + counts = np.insert(counts, 0, mask.sum()) + + keys = _reconstruct_data(keys, original.dtype, original) + + return keys, counts + + +def duplicated(values, keep="first") -> np.ndarray: + """ + Return boolean ndarray denoting duplicate values. + + Parameters + ---------- + values : ndarray-like + Array over which to check for duplicate values. + keep : {'first', 'last', False}, default 'first' + - ``first`` : Mark duplicates as ``True`` except for the first + occurrence. + - ``last`` : Mark duplicates as ``True`` except for the last + occurrence. + - False : Mark all duplicates as ``True``. + + Returns + ------- + duplicated : ndarray + """ + + values, _ = _ensure_data(values) + ndtype = values.dtype.name + f = getattr(htable, f"duplicated_{ndtype}") + return f(values, keep=keep) + + +def mode(values, dropna: bool = True) -> "Series": + """ + Returns the mode(s) of an array. + + Parameters + ---------- + values : array-like + Array over which to check for duplicate values. + dropna : boolean, default True + Don't consider counts of NaN/NaT. + + .. versionadded:: 0.24.0 + + Returns + ------- + mode : Series + """ + from pandas import Series + + values = _ensure_arraylike(values) + original = values + + # categorical is a fast-path + if is_categorical_dtype(values): + if isinstance(values, Series): + return Series(values.values.mode(dropna=dropna), name=values.name) + return values.mode(dropna=dropna) + + if dropna and needs_i8_conversion(values.dtype): + mask = values.isnull() + values = values[~mask] + + values, _ = _ensure_data(values) + ndtype = values.dtype.name + + f = getattr(htable, f"mode_{ndtype}") + result = f(values, dropna=dropna) + try: + result = np.sort(result) + except TypeError as err: + warn(f"Unable to sort modes: {err}") + + result = _reconstruct_data(result, original.dtype, original) + return Series(result) + + +def rank( + values, + axis: int = 0, + method: str = "average", + na_option: str = "keep", + ascending: bool = True, + pct: bool = False, +): + """ + Rank the values along a given axis. + + Parameters + ---------- + values : array-like + Array whose values will be ranked. The number of dimensions in this + array must not exceed 2. + axis : int, default 0 + Axis over which to perform rankings. + method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + The method by which tiebreaks are broken during the ranking. + na_option : {'keep', 'top'}, default 'keep' + The method by which NaNs are placed in the ranking. + - ``keep``: rank each NaN value with a NaN ranking + - ``top``: replace each NaN with either +/- inf so that they + there are ranked at the top + ascending : boolean, default True + Whether or not the elements should be ranked in ascending order. + pct : boolean, default False + Whether or not to the display the returned rankings in integer form + (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1). + """ + if values.ndim == 1: + values = _get_values_for_rank(values) + ranks = algos.rank_1d( + values, + ties_method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + elif values.ndim == 2: + values = _get_values_for_rank(values) + ranks = algos.rank_2d( + values, + axis=axis, + ties_method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + else: + raise TypeError("Array with ndim > 2 are not supported.") + + return ranks + + +def checked_add_with_arr(arr, b, arr_mask=None, b_mask=None): + """ + Perform array addition that checks for underflow and overflow. + + Performs the addition of an int64 array and an int64 integer (or array) + but checks that they do not result in overflow first. For elements that + are indicated to be NaN, whether or not there is overflow for that element + is automatically ignored. + + Parameters + ---------- + arr : array addend. + b : array or scalar addend. + arr_mask : boolean array or None + array indicating which elements to exclude from checking + b_mask : boolean array or boolean or None + array or scalar indicating which element(s) to exclude from checking + + Returns + ------- + sum : An array for elements x + b for each element x in arr if b is + a scalar or an array for elements x + y for each element pair + (x, y) in (arr, b). + + Raises + ------ + OverflowError if any x + y exceeds the maximum or minimum int64 value. + """ + # For performance reasons, we broadcast 'b' to the new array 'b2' + # so that it has the same size as 'arr'. + b2 = np.broadcast_to(b, arr.shape) + if b_mask is not None: + # We do the same broadcasting for b_mask as well. + b2_mask = np.broadcast_to(b_mask, arr.shape) + else: + b2_mask = None + + # For elements that are NaN, regardless of their value, we should + # ignore whether they overflow or not when doing the checked add. + if arr_mask is not None and b2_mask is not None: + not_nan = np.logical_not(arr_mask | b2_mask) + elif arr_mask is not None: + not_nan = np.logical_not(arr_mask) + elif b_mask is not None: + not_nan = np.logical_not(b2_mask) + else: + not_nan = np.empty(arr.shape, dtype=bool) + not_nan.fill(True) + + # gh-14324: For each element in 'arr' and its corresponding element + # in 'b2', we check the sign of the element in 'b2'. If it is positive, + # we then check whether its sum with the element in 'arr' exceeds + # np.iinfo(np.int64).max. If so, we have an overflow error. If it + # it is negative, we then check whether its sum with the element in + # 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow + # error as well. + mask1 = b2 > 0 + mask2 = b2 < 0 + + if not mask1.any(): + to_raise = ((np.iinfo(np.int64).min - b2 > arr) & not_nan).any() + elif not mask2.any(): + to_raise = ((np.iinfo(np.int64).max - b2 < arr) & not_nan).any() + else: + to_raise = ( + ((np.iinfo(np.int64).max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any() + or ( + (np.iinfo(np.int64).min - b2[mask2] > arr[mask2]) & not_nan[mask2] + ).any() + ) + + if to_raise: + raise OverflowError("Overflow in int64 addition") + return arr + b + + +def quantile(x, q, interpolation_method="fraction"): + """ + Compute sample quantile or quantiles of the input array. For example, q=0.5 + computes the median. + + The `interpolation_method` parameter supports three values, namely + `fraction` (default), `lower` and `higher`. Interpolation is done only, + if the desired quantile lies between two data points `i` and `j`. For + `fraction`, the result is an interpolated value between `i` and `j`; + for `lower`, the result is `i`, for `higher` the result is `j`. + + Parameters + ---------- + x : ndarray + Values from which to extract score. + q : scalar or array + Percentile at which to extract score. + interpolation_method : {'fraction', 'lower', 'higher'}, optional + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + - fraction: `i + (j - i)*fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + -lower: `i`. + - higher: `j`. + + Returns + ------- + score : float + Score at percentile. + + Examples + -------- + >>> from scipy import stats + >>> a = np.arange(100) + >>> stats.scoreatpercentile(a, 50) + 49.5 + + """ + x = np.asarray(x) + mask = isna(x) + + x = x[~mask] + + values = np.sort(x) + + def _interpolate(a, b, fraction): + """ + Returns the point at the given fraction between a and b, where + 'fraction' must be between 0 and 1. + """ + return a + (b - a) * fraction + + def _get_score(at): + if len(values) == 0: + return np.nan + + idx = at * (len(values) - 1) + if idx % 1 == 0: + score = values[int(idx)] + else: + if interpolation_method == "fraction": + score = _interpolate(values[int(idx)], values[int(idx) + 1], idx % 1) + elif interpolation_method == "lower": + score = values[np.floor(idx)] + elif interpolation_method == "higher": + score = values[np.ceil(idx)] + else: + raise ValueError( + "interpolation_method can only be 'fraction' " + ", 'lower' or 'higher'" + ) + + return score + + if is_scalar(q): + return _get_score(q) + else: + q = np.asarray(q, np.float64) + result = [_get_score(x) for x in q] + result = np.array(result, dtype=np.float64) + return result + + +# --------------- # +# select n # +# --------------- # + + +class SelectN: + def __init__(self, obj, n: int, keep: str): + self.obj = obj + self.n = n + self.keep = keep + + if self.keep not in ("first", "last", "all"): + raise ValueError('keep must be either "first", "last" or "all"') + + def nlargest(self): + return self.compute("nlargest") + + def nsmallest(self): + return self.compute("nsmallest") + + @staticmethod + def is_valid_dtype_n_method(dtype) -> bool: + """ + Helper function to determine if dtype is valid for + nsmallest/nlargest methods + """ + return ( + is_numeric_dtype(dtype) and not is_complex_dtype(dtype) + ) or needs_i8_conversion(dtype) + + +class SelectNSeries(SelectN): + """ + Implement n largest/smallest for Series + + Parameters + ---------- + obj : Series + n : int + keep : {'first', 'last'}, default 'first' + + Returns + ------- + nordered : Series + """ + + def compute(self, method): + + n = self.n + dtype = self.obj.dtype + if not self.is_valid_dtype_n_method(dtype): + raise TypeError(f"Cannot use method '{method}' with dtype {dtype}") + + if n <= 0: + return self.obj[[]] + + dropped = self.obj.dropna() + + # slow method + if n >= len(self.obj): + reverse_it = self.keep == "last" or method == "nlargest" + ascending = method == "nsmallest" + slc = np.s_[::-1] if reverse_it else np.s_[:] + return dropped[slc].sort_values(ascending=ascending).head(n) + + # fast method + arr, pandas_dtype = _ensure_data(dropped.values) + if method == "nlargest": + arr = -arr + if is_integer_dtype(pandas_dtype): + # GH 21426: ensure reverse ordering at boundaries + arr -= 1 + + elif is_bool_dtype(pandas_dtype): + # GH 26154: ensure False is smaller than True + arr = 1 - (-arr) + + if self.keep == "last": + arr = arr[::-1] + + narr = len(arr) + n = min(n, narr) + + kth_val = algos.kth_smallest(arr.copy(), n - 1) + (ns,) = np.nonzero(arr <= kth_val) + inds = ns[arr[ns].argsort(kind="mergesort")] + + if self.keep != "all": + inds = inds[:n] + + if self.keep == "last": + # reverse indices + inds = narr - 1 - inds + + return dropped.iloc[inds] + + +class SelectNFrame(SelectN): + """ + Implement n largest/smallest for DataFrame + + Parameters + ---------- + obj : DataFrame + n : int + keep : {'first', 'last'}, default 'first' + columns : list or str + + Returns + ------- + nordered : DataFrame + """ + + def __init__(self, obj, n: int, keep: str, columns): + super().__init__(obj, n, keep) + if not is_list_like(columns) or isinstance(columns, tuple): + columns = [columns] + columns = list(columns) + self.columns = columns + + def compute(self, method): + + from pandas import Int64Index + + n = self.n + frame = self.obj + columns = self.columns + + for column in columns: + dtype = frame[column].dtype + if not self.is_valid_dtype_n_method(dtype): + raise TypeError( + f"Column {repr(column)} has dtype {dtype}, " + f"cannot use method {repr(method)} with this dtype" + ) + + def get_indexer(current_indexer, other_indexer): + """ + Helper function to concat `current_indexer` and `other_indexer` + depending on `method` + """ + if method == "nsmallest": + return current_indexer.append(other_indexer) + else: + return other_indexer.append(current_indexer) + + # Below we save and reset the index in case index contains duplicates + original_index = frame.index + cur_frame = frame = frame.reset_index(drop=True) + cur_n = n + indexer = Int64Index([]) + + for i, column in enumerate(columns): + # For each column we apply method to cur_frame[column]. + # If it's the last column or if we have the number of + # results desired we are done. + # Otherwise there are duplicates of the largest/smallest + # value and we need to look at the rest of the columns + # to determine which of the rows with the largest/smallest + # value in the column to keep. + series = cur_frame[column] + is_last_column = len(columns) - 1 == i + values = getattr(series, method)( + cur_n, keep=self.keep if is_last_column else "all" + ) + + if is_last_column or len(values) <= cur_n: + indexer = get_indexer(indexer, values.index) + break + + # Now find all values which are equal to + # the (nsmallest: largest)/(nlarrgest: smallest) + # from our series. + border_value = values == values[values.index[-1]] + + # Some of these values are among the top-n + # some aren't. + unsafe_values = values[border_value] + + # These values are definitely among the top-n + safe_values = values[~border_value] + indexer = get_indexer(indexer, safe_values.index) + + # Go on and separate the unsafe_values on the remaining + # columns. + cur_frame = cur_frame.loc[unsafe_values.index] + cur_n = n - len(indexer) + + frame = frame.take(indexer) + + # Restore the index on frame + frame.index = original_index.take(indexer) + + # If there is only one column, the frame is already sorted. + if len(columns) == 1: + return frame + + ascending = method == "nsmallest" + + return frame.sort_values(columns, ascending=ascending, kind="mergesort") + + +# ---- # +# take # +# ---- # + + +def _view_wrapper(f, arr_dtype=None, out_dtype=None, fill_wrap=None): + def wrapper(arr, indexer, out, fill_value=np.nan): + if arr_dtype is not None: + arr = arr.view(arr_dtype) + if out_dtype is not None: + out = out.view(out_dtype) + if fill_wrap is not None: + fill_value = fill_wrap(fill_value) + f(arr, indexer, out, fill_value=fill_value) + + return wrapper + + +def _convert_wrapper(f, conv_dtype): + def wrapper(arr, indexer, out, fill_value=np.nan): + arr = arr.astype(conv_dtype) + f(arr, indexer, out, fill_value=fill_value) + + return wrapper + + +def _take_2d_multi_object(arr, indexer, out, fill_value, mask_info): + # this is not ideal, performance-wise, but it's better than raising + # an exception (best to optimize in Cython to avoid getting here) + row_idx, col_idx = indexer + if mask_info is not None: + (row_mask, col_mask), (row_needs, col_needs) = mask_info + else: + row_mask = row_idx == -1 + col_mask = col_idx == -1 + row_needs = row_mask.any() + col_needs = col_mask.any() + if fill_value is not None: + if row_needs: + out[row_mask, :] = fill_value + if col_needs: + out[:, col_mask] = fill_value + for i in range(len(row_idx)): + u_ = row_idx[i] + for j in range(len(col_idx)): + v = col_idx[j] + out[i, j] = arr[u_, v] + + +def _take_nd_object(arr, indexer, out, axis: int, fill_value, mask_info): + if mask_info is not None: + mask, needs_masking = mask_info + else: + mask = indexer == -1 + needs_masking = mask.any() + if arr.dtype != out.dtype: + arr = arr.astype(out.dtype) + if arr.shape[axis] > 0: + arr.take(ensure_platform_int(indexer), axis=axis, out=out) + if needs_masking: + outindexer = [slice(None)] * arr.ndim + outindexer[axis] = mask + out[tuple(outindexer)] = fill_value + + +_take_1d_dict = { + ("int8", "int8"): algos.take_1d_int8_int8, + ("int8", "int32"): algos.take_1d_int8_int32, + ("int8", "int64"): algos.take_1d_int8_int64, + ("int8", "float64"): algos.take_1d_int8_float64, + ("int16", "int16"): algos.take_1d_int16_int16, + ("int16", "int32"): algos.take_1d_int16_int32, + ("int16", "int64"): algos.take_1d_int16_int64, + ("int16", "float64"): algos.take_1d_int16_float64, + ("int32", "int32"): algos.take_1d_int32_int32, + ("int32", "int64"): algos.take_1d_int32_int64, + ("int32", "float64"): algos.take_1d_int32_float64, + ("int64", "int64"): algos.take_1d_int64_int64, + ("int64", "float64"): algos.take_1d_int64_float64, + ("float32", "float32"): algos.take_1d_float32_float32, + ("float32", "float64"): algos.take_1d_float32_float64, + ("float64", "float64"): algos.take_1d_float64_float64, + ("object", "object"): algos.take_1d_object_object, + ("bool", "bool"): _view_wrapper(algos.take_1d_bool_bool, np.uint8, np.uint8), + ("bool", "object"): _view_wrapper(algos.take_1d_bool_object, np.uint8, None), + ("datetime64[ns]", "datetime64[ns]"): _view_wrapper( + algos.take_1d_int64_int64, np.int64, np.int64, np.int64 + ), +} + +_take_2d_axis0_dict = { + ("int8", "int8"): algos.take_2d_axis0_int8_int8, + ("int8", "int32"): algos.take_2d_axis0_int8_int32, + ("int8", "int64"): algos.take_2d_axis0_int8_int64, + ("int8", "float64"): algos.take_2d_axis0_int8_float64, + ("int16", "int16"): algos.take_2d_axis0_int16_int16, + ("int16", "int32"): algos.take_2d_axis0_int16_int32, + ("int16", "int64"): algos.take_2d_axis0_int16_int64, + ("int16", "float64"): algos.take_2d_axis0_int16_float64, + ("int32", "int32"): algos.take_2d_axis0_int32_int32, + ("int32", "int64"): algos.take_2d_axis0_int32_int64, + ("int32", "float64"): algos.take_2d_axis0_int32_float64, + ("int64", "int64"): algos.take_2d_axis0_int64_int64, + ("int64", "float64"): algos.take_2d_axis0_int64_float64, + ("float32", "float32"): algos.take_2d_axis0_float32_float32, + ("float32", "float64"): algos.take_2d_axis0_float32_float64, + ("float64", "float64"): algos.take_2d_axis0_float64_float64, + ("object", "object"): algos.take_2d_axis0_object_object, + ("bool", "bool"): _view_wrapper(algos.take_2d_axis0_bool_bool, np.uint8, np.uint8), + ("bool", "object"): _view_wrapper(algos.take_2d_axis0_bool_object, np.uint8, None), + ("datetime64[ns]", "datetime64[ns]"): _view_wrapper( + algos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), +} + +_take_2d_axis1_dict = { + ("int8", "int8"): algos.take_2d_axis1_int8_int8, + ("int8", "int32"): algos.take_2d_axis1_int8_int32, + ("int8", "int64"): algos.take_2d_axis1_int8_int64, + ("int8", "float64"): algos.take_2d_axis1_int8_float64, + ("int16", "int16"): algos.take_2d_axis1_int16_int16, + ("int16", "int32"): algos.take_2d_axis1_int16_int32, + ("int16", "int64"): algos.take_2d_axis1_int16_int64, + ("int16", "float64"): algos.take_2d_axis1_int16_float64, + ("int32", "int32"): algos.take_2d_axis1_int32_int32, + ("int32", "int64"): algos.take_2d_axis1_int32_int64, + ("int32", "float64"): algos.take_2d_axis1_int32_float64, + ("int64", "int64"): algos.take_2d_axis1_int64_int64, + ("int64", "float64"): algos.take_2d_axis1_int64_float64, + ("float32", "float32"): algos.take_2d_axis1_float32_float32, + ("float32", "float64"): algos.take_2d_axis1_float32_float64, + ("float64", "float64"): algos.take_2d_axis1_float64_float64, + ("object", "object"): algos.take_2d_axis1_object_object, + ("bool", "bool"): _view_wrapper(algos.take_2d_axis1_bool_bool, np.uint8, np.uint8), + ("bool", "object"): _view_wrapper(algos.take_2d_axis1_bool_object, np.uint8, None), + ("datetime64[ns]", "datetime64[ns]"): _view_wrapper( + algos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), +} + +_take_2d_multi_dict = { + ("int8", "int8"): algos.take_2d_multi_int8_int8, + ("int8", "int32"): algos.take_2d_multi_int8_int32, + ("int8", "int64"): algos.take_2d_multi_int8_int64, + ("int8", "float64"): algos.take_2d_multi_int8_float64, + ("int16", "int16"): algos.take_2d_multi_int16_int16, + ("int16", "int32"): algos.take_2d_multi_int16_int32, + ("int16", "int64"): algos.take_2d_multi_int16_int64, + ("int16", "float64"): algos.take_2d_multi_int16_float64, + ("int32", "int32"): algos.take_2d_multi_int32_int32, + ("int32", "int64"): algos.take_2d_multi_int32_int64, + ("int32", "float64"): algos.take_2d_multi_int32_float64, + ("int64", "int64"): algos.take_2d_multi_int64_int64, + ("int64", "float64"): algos.take_2d_multi_int64_float64, + ("float32", "float32"): algos.take_2d_multi_float32_float32, + ("float32", "float64"): algos.take_2d_multi_float32_float64, + ("float64", "float64"): algos.take_2d_multi_float64_float64, + ("object", "object"): algos.take_2d_multi_object_object, + ("bool", "bool"): _view_wrapper(algos.take_2d_multi_bool_bool, np.uint8, np.uint8), + ("bool", "object"): _view_wrapper(algos.take_2d_multi_bool_object, np.uint8, None), + ("datetime64[ns]", "datetime64[ns]"): _view_wrapper( + algos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64 + ), +} + + +def _get_take_nd_function( + ndim: int, arr_dtype, out_dtype, axis: int = 0, mask_info=None +): + if ndim <= 2: + tup = (arr_dtype.name, out_dtype.name) + if ndim == 1: + func = _take_1d_dict.get(tup, None) + elif ndim == 2: + if axis == 0: + func = _take_2d_axis0_dict.get(tup, None) + else: + func = _take_2d_axis1_dict.get(tup, None) + if func is not None: + return func + + tup = (out_dtype.name, out_dtype.name) + if ndim == 1: + func = _take_1d_dict.get(tup, None) + elif ndim == 2: + if axis == 0: + func = _take_2d_axis0_dict.get(tup, None) + else: + func = _take_2d_axis1_dict.get(tup, None) + if func is not None: + func = _convert_wrapper(func, out_dtype) + return func + + def func2(arr, indexer, out, fill_value=np.nan): + indexer = ensure_int64(indexer) + _take_nd_object( + arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info + ) + + return func2 + + +def take(arr, indices, axis: int = 0, allow_fill: bool = False, fill_value=None): + """ + Take elements from an array. + + .. versionadded:: 0.23.0 + + Parameters + ---------- + arr : sequence + Non array-likes (sequences without a dtype) are coerced + to an ndarray. + indices : sequence of integers + Indices to be taken. + axis : int, default 0 + The axis over which to select values. + allow_fill : bool, default False + How to handle negative values in `indices`. + + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to :func:`numpy.take`. + + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + other negative values raise a ``ValueError``. + + fill_value : any, optional + Fill value to use for NA-indices when `allow_fill` is True. + This may be ``None``, in which case the default NA value for + the type (``self.dtype.na_value``) is used. + + For multi-dimensional `arr`, each *element* is filled with + `fill_value`. + + Returns + ------- + ndarray or ExtensionArray + Same type as the input. + + Raises + ------ + IndexError + When `indices` is out of bounds for the array. + ValueError + When the indexer contains negative values other than ``-1`` + and `allow_fill` is True. + + Notes + ----- + When `allow_fill` is False, `indices` may be whatever dimensionality + is accepted by NumPy for `arr`. + + When `allow_fill` is True, `indices` should be 1-D. + + See Also + -------- + numpy.take + + Examples + -------- + >>> from pandas.api.extensions import take + + With the default ``allow_fill=False``, negative numbers indicate + positional indices from the right. + + >>> take(np.array([10, 20, 30]), [0, 0, -1]) + array([10, 10, 30]) + + Setting ``allow_fill=True`` will place `fill_value` in those positions. + + >>> take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True) + array([10., 10., nan]) + + >>> take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True, + ... fill_value=-10) + array([ 10, 10, -10]) + """ + if not is_array_like(arr): + arr = np.asarray(arr) + + indices = np.asarray(indices, dtype=np.intp) + + if allow_fill: + # Pandas style, -1 means NA + validate_indices(indices, arr.shape[axis]) + result = take_1d( + arr, indices, axis=axis, allow_fill=True, fill_value=fill_value + ) + else: + # NumPy style + result = arr.take(indices, axis=axis) + return result + + +def take_nd( + arr, indexer, axis: int = 0, out=None, fill_value=np.nan, allow_fill: bool = True +): + """ + Specialized Cython take which sets NaN values in one pass + + This dispatches to ``take`` defined on ExtensionArrays. It does not + currently dispatch to ``SparseArray.take`` for sparse ``arr``. + + Parameters + ---------- + arr : array-like + Input array. + indexer : ndarray + 1-D array of indices to take, subarrays corresponding to -1 value + indices are filed with fill_value + axis : int, default 0 + Axis to take from + out : ndarray or None, default None + Optional output array, must be appropriate type to hold input and + fill_value together, if indexer has any -1 value entries; call + maybe_promote to determine this type for any fill_value + fill_value : any, default np.nan + Fill value to replace -1 values with + allow_fill : boolean, default True + If False, indexer is assumed to contain no -1 values so no filling + will be done. This short-circuits computation of a mask. Result is + undefined if allow_fill == False and -1 is present in indexer. + + Returns + ------- + subarray : array-like + May be the same type as the input, or cast to an ndarray. + """ + mask_info = None + + if is_extension_array_dtype(arr): + return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) + + arr = extract_array(arr) + arr = np.asarray(arr) + + if indexer is None: + indexer = np.arange(arr.shape[axis], dtype=np.int64) + dtype, fill_value = arr.dtype, arr.dtype.type() + else: + indexer = ensure_int64(indexer, copy=False) + if not allow_fill: + dtype, fill_value = arr.dtype, arr.dtype.type() + mask_info = None, False + else: + # check for promotion based on types only (do this first because + # it's faster than computing a mask) + dtype, fill_value = maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype and (out is None or out.dtype != dtype): + # check if promotion is actually required based on indexer + mask = indexer == -1 + needs_masking = mask.any() + mask_info = mask, needs_masking + if needs_masking: + if out is not None and out.dtype != dtype: + raise TypeError("Incompatible type for fill_value") + else: + # if not, then depromote, set fill_value to dummy + # (it won't be used but we don't want the cython code + # to crash when trying to cast it to dtype) + dtype, fill_value = arr.dtype, arr.dtype.type() + + flip_order = False + if arr.ndim == 2: + if arr.flags.f_contiguous: + flip_order = True + + if flip_order: + arr = arr.T + axis = arr.ndim - axis - 1 + if out is not None: + out = out.T + + # at this point, it's guaranteed that dtype can hold both the arr values + # and the fill_value + if out is None: + out_shape_ = list(arr.shape) + out_shape_[axis] = len(indexer) + out_shape = tuple(out_shape_) + if arr.flags.f_contiguous and axis == arr.ndim - 1: + # minor tweak that can make an order-of-magnitude difference + # for dataframes initialized directly from 2-d ndarrays + # (s.t. df.values is c-contiguous and df._data.blocks[0] is its + # f-contiguous transpose) + out = np.empty(out_shape, dtype=dtype, order="F") + else: + out = np.empty(out_shape, dtype=dtype) + + func = _get_take_nd_function( + arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info + ) + func(arr, indexer, out, fill_value) + + if flip_order: + out = out.T + return out + + +take_1d = take_nd + + +def take_2d_multi(arr, indexer, fill_value=np.nan): + """ + Specialized Cython take which sets NaN values in one pass. + """ + # This is only called from one place in DataFrame._reindex_multi, + # so we know indexer is well-behaved. + assert indexer is not None + assert indexer[0] is not None + assert indexer[1] is not None + + row_idx, col_idx = indexer + + row_idx = ensure_int64(row_idx) + col_idx = ensure_int64(col_idx) + indexer = row_idx, col_idx + mask_info = None + + # check for promotion based on types only (do this first because + # it's faster than computing a mask) + dtype, fill_value = maybe_promote(arr.dtype, fill_value) + if dtype != arr.dtype: + # check if promotion is actually required based on indexer + row_mask = row_idx == -1 + col_mask = col_idx == -1 + row_needs = row_mask.any() + col_needs = col_mask.any() + mask_info = (row_mask, col_mask), (row_needs, col_needs) + + if not (row_needs or col_needs): + # if not, then depromote, set fill_value to dummy + # (it won't be used but we don't want the cython code + # to crash when trying to cast it to dtype) + dtype, fill_value = arr.dtype, arr.dtype.type() + + # at this point, it's guaranteed that dtype can hold both the arr values + # and the fill_value + out_shape = len(row_idx), len(col_idx) + out = np.empty(out_shape, dtype=dtype) + + func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None) + if func is None and arr.dtype != out.dtype: + func = _take_2d_multi_dict.get((out.dtype.name, out.dtype.name), None) + if func is not None: + func = _convert_wrapper(func, out.dtype) + if func is None: + + def func(arr, indexer, out, fill_value=np.nan): + _take_2d_multi_object( + arr, indexer, out, fill_value=fill_value, mask_info=mask_info + ) + + func(arr, indexer, out=out, fill_value=fill_value) + return out + + +# ------------ # +# searchsorted # +# ------------ # + + +def searchsorted(arr, value, side="left", sorter=None): + """ + Find indices where elements should be inserted to maintain order. + + .. versionadded:: 0.25.0 + + Find the indices into a sorted array `arr` (a) such that, if the + corresponding elements in `value` were inserted before the indices, + the order of `arr` would be preserved. + + Assuming that `arr` is sorted: + + ====== ================================ + `side` returned index `i` satisfies + ====== ================================ + left ``arr[i-1] < value <= self[i]`` + right ``arr[i-1] <= value < self[i]`` + ====== ================================ + + Parameters + ---------- + arr: array-like + Input array. If `sorter` is None, then it must be sorted in + ascending order, otherwise `sorter` must be an array of indices + that sort it. + value : array_like + Values to insert into `arr`. + side : {'left', 'right'}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `self`). + sorter : 1-D array_like, optional + Optional array of integer indices that sort array a into ascending + order. They are typically the result of argsort. + + Returns + ------- + array of ints + Array of insertion points with the same shape as `value`. + + See Also + -------- + numpy.searchsorted : Similar method from NumPy. + """ + if sorter is not None: + sorter = ensure_platform_int(sorter) + + if ( + isinstance(arr, np.ndarray) + and is_integer_dtype(arr) + and (is_integer(value) or is_integer_dtype(value)) + ): + # if `arr` and `value` have different dtypes, `arr` would be + # recast by numpy, causing a slow search. + # Before searching below, we therefore try to give `value` the + # same dtype as `arr`, while guarding against integer overflows. + iinfo = np.iinfo(arr.dtype.type) + value_arr = np.array([value]) if is_scalar(value) else np.array(value) + if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all(): + # value within bounds, so no overflow, so can convert value dtype + # to dtype of arr + dtype = arr.dtype + else: + dtype = value_arr.dtype + + if is_scalar(value): + value = dtype.type(value) + else: + value = array(value, dtype=dtype) + elif not ( + is_object_dtype(arr) or is_numeric_dtype(arr) or is_categorical_dtype(arr) + ): + # E.g. if `arr` is an array with dtype='datetime64[ns]' + # and `value` is a pd.Timestamp, we may need to convert value + value_ser = array([value]) if is_scalar(value) else array(value) + value = value_ser[0] if is_scalar(value) else value_ser + if isinstance(value, Timestamp) and value.tzinfo is None: + value = value.to_datetime64() + + result = arr.searchsorted(value, side=side, sorter=sorter) + return result + + +# ---- # +# diff # +# ---- # + +_diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"} + + +def diff(arr, n: int, axis: int = 0, stacklevel=3): + """ + difference of n between self, + analogous to s-s.shift(n) + + Parameters + ---------- + arr : ndarray + n : int + number of periods + axis : int + axis to shift on + stacklevel : int + The stacklevel for the lost dtype warning. + + Returns + ------- + shifted + """ + from pandas.core.arrays import PandasDtype + + n = int(n) + na = np.nan + dtype = arr.dtype + + if dtype.kind == "b": + op = operator.xor + else: + op = operator.sub + + if isinstance(dtype, PandasDtype): + # PandasArray cannot necessarily hold shifted versions of itself. + arr = np.asarray(arr) + dtype = arr.dtype + + if is_extension_array_dtype(dtype): + if hasattr(arr, f"__{op.__name__}__"): + return op(arr, arr.shift(n)) + else: + warn( + "dtype lost in 'diff()'. In the future this will raise a " + "TypeError. Convert to a suitable dtype prior to calling 'diff'.", + FutureWarning, + stacklevel=stacklevel, + ) + arr = np.asarray(arr) + dtype = arr.dtype + + is_timedelta = False + is_bool = False + if needs_i8_conversion(arr): + dtype = np.float64 + arr = arr.view("i8") + na = iNaT + is_timedelta = True + + elif is_bool_dtype(dtype): + dtype = np.object_ + is_bool = True + + elif is_integer_dtype(dtype): + dtype = np.float64 + + dtype = np.dtype(dtype) + out_arr = np.empty(arr.shape, dtype=dtype) + + na_indexer = [slice(None)] * arr.ndim + na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None) + out_arr[tuple(na_indexer)] = na + + if arr.ndim == 2 and arr.dtype.name in _diff_special: + # TODO: can diff_2d dtype specialization troubles be fixed by defining + # out_arr inside diff_2d? + algos.diff_2d(arr, out_arr, n, axis) + else: + # To keep mypy happy, _res_indexer is a list while res_indexer is + # a tuple, ditto for lag_indexer. + _res_indexer = [slice(None)] * arr.ndim + _res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n) + res_indexer = tuple(_res_indexer) + + _lag_indexer = [slice(None)] * arr.ndim + _lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) + lag_indexer = tuple(_lag_indexer) + + # need to make sure that we account for na for datelike/timedelta + # we don't actually want to subtract these i8 numbers + if is_timedelta: + res = arr[res_indexer] + lag = arr[lag_indexer] + + mask = (arr[res_indexer] == na) | (arr[lag_indexer] == na) + if mask.any(): + res = res.copy() + res[mask] = 0 + lag = lag.copy() + lag[mask] = 0 + + result = res - lag + result[mask] = na + out_arr[res_indexer] = result + elif is_bool: + out_arr[res_indexer] = arr[res_indexer] ^ arr[lag_indexer] + else: + out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer] + + if is_timedelta: + out_arr = out_arr.astype("int64").view("timedelta64[ns]") + + return out_arr + + +# -------------------------------------------------------------------- +# Helper functions + +# Note: safe_sort is in algorithms.py instead of sorting.py because it is +# low-dependency, is used in this module, and used private methods from +# this module. +def safe_sort( + values, + codes=None, + na_sentinel: int = -1, + assume_unique: bool = False, + verify: bool = True, +) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: + """ + Sort ``values`` and reorder corresponding ``codes``. + + ``values`` should be unique if ``codes`` is not None. + Safe for use with mixed types (int, str), orders ints before strs. + + Parameters + ---------- + values : list-like + Sequence; must be unique if ``codes`` is not None. + codes : list_like, optional + Indices to ``values``. All out of bound indices are treated as + "not found" and will be masked with ``na_sentinel``. + na_sentinel : int, default -1 + Value in ``codes`` to mark "not found". + Ignored when ``codes`` is None. + assume_unique : bool, default False + When True, ``values`` are assumed to be unique, which can speed up + the calculation. Ignored when ``codes`` is None. + verify : bool, default True + Check if codes are out of bound for the values and put out of bound + codes equal to na_sentinel. If ``verify=False``, it is assumed there + are no out of bound codes. Ignored when ``codes`` is None. + + .. versionadded:: 0.25.0 + + Returns + ------- + ordered : ndarray + Sorted ``values`` + new_codes : ndarray + Reordered ``codes``; returned when ``codes`` is not None. + + Raises + ------ + TypeError + * If ``values`` is not list-like or if ``codes`` is neither None + nor list-like + * If ``values`` cannot be sorted + ValueError + * If ``codes`` is not None and ``values`` contain duplicates. + """ + if not is_list_like(values): + raise TypeError( + "Only list-like objects are allowed to be passed to safe_sort as values" + ) + + if not isinstance(values, np.ndarray) and not is_extension_array_dtype(values): + # don't convert to string types + dtype, _ = infer_dtype_from_array(values) + values = np.asarray(values, dtype=dtype) + + def sort_mixed(values): + # order ints before strings, safe in py3 + str_pos = np.array([isinstance(x, str) for x in values], dtype=bool) + nums = np.sort(values[~str_pos]) + strs = np.sort(values[str_pos]) + return np.concatenate([nums, np.asarray(strs, dtype=object)]) + + sorter = None + if ( + not is_extension_array_dtype(values) + and lib.infer_dtype(values, skipna=False) == "mixed-integer" + ): + # unorderable in py3 if mixed str/int + ordered = sort_mixed(values) + else: + try: + sorter = values.argsort() + ordered = values.take(sorter) + except TypeError: + # try this anyway + ordered = sort_mixed(values) + + # codes: + + if codes is None: + return ordered + + if not is_list_like(codes): + raise TypeError( + "Only list-like objects or None are allowed to " + "be passed to safe_sort as codes" + ) + codes = ensure_platform_int(np.asarray(codes)) + + from pandas import Index + + if not assume_unique and not Index(values).is_unique: + raise ValueError("values should be unique if codes is not None") + + if sorter is None: + # mixed types + hash_klass, values = _get_data_algo(values) + t = hash_klass(len(values)) + t.map_locations(values) + sorter = ensure_platform_int(t.lookup(ordered)) + + if na_sentinel == -1: + # take_1d is faster, but only works for na_sentinels of -1 + order2 = sorter.argsort() + new_codes = take_1d(order2, codes, fill_value=-1) + if verify: + mask = (codes < -len(values)) | (codes >= len(values)) + else: + mask = None + else: + reverse_indexer = np.empty(len(sorter), dtype=np.int_) + reverse_indexer.put(sorter, np.arange(len(sorter))) + # Out of bound indices will be masked with `na_sentinel` next, so we + # may deal with them here without performance loss using `mode='wrap'` + new_codes = reverse_indexer.take(codes, mode="wrap") + + mask = codes == na_sentinel + if verify: + mask = mask | (codes < -len(values)) | (codes >= len(values)) + + if mask is not None: + np.putmask(new_codes, mask, na_sentinel) + + return ordered, ensure_platform_int(new_codes) diff --git a/pandas/core/api.py b/pandas/core/api.py new file mode 100644 index 00000000..b0b65f9d --- /dev/null +++ b/pandas/core/api.py @@ -0,0 +1,57 @@ +# flake8: noqa + +from pandas._libs import NaT, Period, Timedelta, Timestamp +from pandas._libs.missing import NA + +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.missing import isna, isnull, notna, notnull + +from pandas.core.algorithms import factorize, unique, value_counts +from pandas.core.arrays import Categorical +from pandas.core.arrays.boolean import BooleanDtype +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) +from pandas.core.arrays.string_ import StringDtype +from pandas.core.construction import array +from pandas.core.groupby import Grouper, NamedAgg +from pandas.core.indexes.api import ( + CategoricalIndex, + DatetimeIndex, + Float64Index, + Index, + Int64Index, + IntervalIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + TimedeltaIndex, + UInt64Index, +) +from pandas.core.indexes.datetimes import bdate_range, date_range +from pandas.core.indexes.interval import Interval, interval_range +from pandas.core.indexes.period import period_range +from pandas.core.indexes.timedeltas import timedelta_range +from pandas.core.indexing import IndexSlice +from pandas.core.series import Series +from pandas.core.tools.datetimes import to_datetime +from pandas.core.tools.numeric import to_numeric +from pandas.core.tools.timedeltas import to_timedelta + +from pandas.io.formats.format import set_eng_float_format +from pandas.tseries.offsets import DateOffset + +# DataFrame needs to be imported after NamedAgg to avoid a circular import +from pandas.core.frame import DataFrame # isort:skip diff --git a/pandas/core/apply.py b/pandas/core/apply.py new file mode 100644 index 00000000..a496afee --- /dev/null +++ b/pandas/core/apply.py @@ -0,0 +1,466 @@ +import abc +import inspect +from typing import TYPE_CHECKING, Any, Dict, Iterator, Tuple, Type, Union + +import numpy as np + +from pandas._libs import reduction as libreduction +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.common import ( + is_dict_like, + is_extension_array_dtype, + is_list_like, + is_sequence, +) +from pandas.core.dtypes.generic import ABCSeries + +from pandas.core.construction import create_series_with_explicit_dtype + +if TYPE_CHECKING: + from pandas import DataFrame, Series, Index + +ResType = Dict[int, Any] + + +def frame_apply( + obj: "DataFrame", + func, + axis=0, + raw: bool = False, + result_type=None, + ignore_failures: bool = False, + args=None, + kwds=None, +): + """ construct and return a row or column based frame apply object """ + + axis = obj._get_axis_number(axis) + klass: Type[FrameApply] + if axis == 0: + klass = FrameRowApply + elif axis == 1: + klass = FrameColumnApply + + return klass( + obj, + func, + raw=raw, + result_type=result_type, + ignore_failures=ignore_failures, + args=args, + kwds=kwds, + ) + + +class FrameApply(metaclass=abc.ABCMeta): + + # --------------------------------------------------------------- + # Abstract Methods + axis: int + + @property + @abc.abstractmethod + def result_index(self) -> "Index": + pass + + @property + @abc.abstractmethod + def result_columns(self) -> "Index": + pass + + @property + @abc.abstractmethod + def series_generator(self) -> Iterator["Series"]: + pass + + @abc.abstractmethod + def wrap_results_for_axis( + self, results: ResType, res_index: "Index" + ) -> Union["Series", "DataFrame"]: + pass + + # --------------------------------------------------------------- + + def __init__( + self, + obj: "DataFrame", + func, + raw: bool, + result_type, + ignore_failures: bool, + args, + kwds, + ): + self.obj = obj + self.raw = raw + self.ignore_failures = ignore_failures + self.args = args or () + self.kwds = kwds or {} + + if result_type not in [None, "reduce", "broadcast", "expand"]: + raise ValueError( + "invalid value for result_type, must be one " + "of {None, 'reduce', 'broadcast', 'expand'}" + ) + + self.result_type = result_type + + # curry if needed + if (kwds or args) and not isinstance(func, (np.ufunc, str)): + + def f(x): + return func(x, *args, **kwds) + + else: + f = func + + self.f = f + + @property + def res_columns(self) -> "Index": + return self.result_columns + + @property + def columns(self) -> "Index": + return self.obj.columns + + @property + def index(self) -> "Index": + return self.obj.index + + @cache_readonly + def values(self): + return self.obj.values + + @cache_readonly + def dtypes(self) -> "Series": + return self.obj.dtypes + + @property + def agg_axis(self) -> "Index": + return self.obj._get_agg_axis(self.axis) + + def get_result(self): + """ compute the results """ + + # dispatch to agg + if is_list_like(self.f) or is_dict_like(self.f): + return self.obj.aggregate(self.f, axis=self.axis, *self.args, **self.kwds) + + # all empty + if len(self.columns) == 0 and len(self.index) == 0: + return self.apply_empty_result() + + # string dispatch + if isinstance(self.f, str): + # Support for `frame.transform('method')` + # Some methods (shift, etc.) require the axis argument, others + # don't, so inspect and insert if necessary. + func = getattr(self.obj, self.f) + sig = inspect.getfullargspec(func) + if "axis" in sig.args: + self.kwds["axis"] = self.axis + return func(*self.args, **self.kwds) + + # ufunc + elif isinstance(self.f, np.ufunc): + with np.errstate(all="ignore"): + results = self.obj._data.apply("apply", func=self.f) + return self.obj._constructor( + data=results, index=self.index, columns=self.columns, copy=False + ) + + # broadcasting + if self.result_type == "broadcast": + return self.apply_broadcast(self.obj) + + # one axis empty + elif not all(self.obj.shape): + return self.apply_empty_result() + + # raw + elif self.raw and not self.obj._is_mixed_type: + return self.apply_raw() + + return self.apply_standard() + + def apply_empty_result(self): + """ + we have an empty result; at least 1 axis is 0 + + we will try to apply the function to an empty + series in order to see if this is a reduction function + """ + + # we are not asked to reduce or infer reduction + # so just return a copy of the existing object + if self.result_type not in ["reduce", None]: + return self.obj.copy() + + # we may need to infer + should_reduce = self.result_type == "reduce" + + from pandas import Series + + if not should_reduce: + try: + r = self.f(Series([], dtype=np.float64)) + except Exception: + pass + else: + should_reduce = not isinstance(r, Series) + + if should_reduce: + if len(self.agg_axis): + r = self.f(Series([], dtype=np.float64)) + else: + r = np.nan + + return self.obj._constructor_sliced(r, index=self.agg_axis) + else: + return self.obj.copy() + + def apply_raw(self): + """ apply to the values as a numpy array """ + try: + result = libreduction.compute_reduction(self.values, self.f, axis=self.axis) + except ValueError as err: + if "Function does not reduce" not in str(err): + # catch only ValueError raised intentionally in libreduction + raise + # We expect np.apply_along_axis to give a two-dimensional result, or + # also raise. + result = np.apply_along_axis(self.f, self.axis, self.values) + + # TODO: mixed type case + if result.ndim == 2: + return self.obj._constructor(result, index=self.index, columns=self.columns) + else: + return self.obj._constructor_sliced(result, index=self.agg_axis) + + def apply_broadcast(self, target: "DataFrame") -> "DataFrame": + result_values = np.empty_like(target.values) + + # axis which we want to compare compliance + result_compare = target.shape[0] + + for i, col in enumerate(target.columns): + res = self.f(target[col]) + ares = np.asarray(res).ndim + + # must be a scalar or 1d + if ares > 1: + raise ValueError("too many dims to broadcast") + elif ares == 1: + + # must match return dim + if result_compare != len(res): + raise ValueError("cannot broadcast result") + + result_values[:, i] = res + + # we *always* preserve the original index / columns + result = self.obj._constructor( + result_values, index=target.index, columns=target.columns + ) + return result + + def apply_standard(self): + + # try to reduce first (by default) + # this only matters if the reduction in values is of different dtype + # e.g. if we want to apply to a SparseFrame, then can't directly reduce + + # we cannot reduce using non-numpy dtypes, + # as demonstrated in gh-12244 + if ( + self.result_type in ["reduce", None] + and not self.dtypes.apply(is_extension_array_dtype).any() + # Disallow complex_internals since libreduction shortcut raises a TypeError + and not self.agg_axis._has_complex_internals + ): + + values = self.values + index = self.obj._get_axis(self.axis) + labels = self.agg_axis + empty_arr = np.empty(len(index), dtype=values.dtype) + + # Preserve subclass for e.g. test_subclassed_apply + dummy = self.obj._constructor_sliced( + empty_arr, index=index, dtype=values.dtype + ) + + try: + result = libreduction.compute_reduction( + values, self.f, axis=self.axis, dummy=dummy, labels=labels + ) + except ValueError as err: + if "Function does not reduce" not in str(err): + # catch only ValueError raised intentionally in libreduction + raise + except TypeError: + # e.g. test_apply_ignore_failures we just ignore + if not self.ignore_failures: + raise + except ZeroDivisionError: + # reached via numexpr; fall back to python implementation + pass + else: + return self.obj._constructor_sliced(result, index=labels) + + # compute the result using the series generator + results, res_index = self.apply_series_generator() + + # wrap results + return self.wrap_results(results, res_index) + + def apply_series_generator(self) -> Tuple[ResType, "Index"]: + series_gen = self.series_generator + res_index = self.result_index + + keys = [] + results = {} + if self.ignore_failures: + successes = [] + for i, v in enumerate(series_gen): + try: + results[i] = self.f(v) + except Exception: + pass + else: + keys.append(v.name) + successes.append(i) + + # so will work with MultiIndex + if len(successes) < len(res_index): + res_index = res_index.take(successes) + + else: + for i, v in enumerate(series_gen): + results[i] = self.f(v) + keys.append(v.name) + + return results, res_index + + def wrap_results( + self, results: ResType, res_index: "Index" + ) -> Union["Series", "DataFrame"]: + from pandas import Series + + # see if we can infer the results + if len(results) > 0 and 0 in results and is_sequence(results[0]): + + return self.wrap_results_for_axis(results, res_index) + + # dict of scalars + + # the default dtype of an empty Series will be `object`, but this + # code can be hit by df.mean() where the result should have dtype + # float64 even if it's an empty Series. + constructor_sliced = self.obj._constructor_sliced + if constructor_sliced is Series: + result = create_series_with_explicit_dtype( + results, dtype_if_empty=np.float64 + ) + else: + result = constructor_sliced(results) + result.index = res_index + + return result + + +class FrameRowApply(FrameApply): + axis = 0 + + def apply_broadcast(self, target: "DataFrame") -> "DataFrame": + return super().apply_broadcast(target) + + @property + def series_generator(self): + return (self.obj._ixs(i, axis=1) for i in range(len(self.columns))) + + @property + def result_index(self) -> "Index": + return self.columns + + @property + def result_columns(self) -> "Index": + return self.index + + def wrap_results_for_axis( + self, results: ResType, res_index: "Index" + ) -> "DataFrame": + """ return the results for the rows """ + + result = self.obj._constructor(data=results) + + if not isinstance(results[0], ABCSeries): + if len(result.index) == len(self.res_columns): + result.index = self.res_columns + + if len(result.columns) == len(res_index): + result.columns = res_index + + return result + + +class FrameColumnApply(FrameApply): + axis = 1 + + def apply_broadcast(self, target: "DataFrame") -> "DataFrame": + result = super().apply_broadcast(target.T) + return result.T + + @property + def series_generator(self): + constructor = self.obj._constructor_sliced + return ( + constructor(arr, index=self.columns, name=name) + for i, (arr, name) in enumerate(zip(self.values, self.index)) + ) + + @property + def result_index(self) -> "Index": + return self.index + + @property + def result_columns(self) -> "Index": + return self.columns + + def wrap_results_for_axis( + self, results: ResType, res_index: "Index" + ) -> Union["Series", "DataFrame"]: + """ return the results for the columns """ + result: Union["Series", "DataFrame"] + + # we have requested to expand + if self.result_type == "expand": + result = self.infer_to_same_shape(results, res_index) + + # we have a non-series and don't want inference + elif not isinstance(results[0], ABCSeries): + from pandas import Series + + result = Series(results) + result.index = res_index + + # we may want to infer results + else: + result = self.infer_to_same_shape(results, res_index) + + return result + + def infer_to_same_shape(self, results: ResType, res_index: "Index") -> "DataFrame": + """ infer the results to the same shape as the input object """ + + result = self.obj._constructor(data=results) + result = result.T + + # set the index + result.index = res_index + + # infer dtypes + result = result.infer_objects() + + return result diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py new file mode 100644 index 00000000..bf346992 --- /dev/null +++ b/pandas/core/arrays/__init__.py @@ -0,0 +1,36 @@ +from pandas.core.arrays.base import ( + ExtensionArray, + ExtensionOpsMixin, + ExtensionScalarOpsMixin, + try_cast_to_ea, +) +from pandas.core.arrays.boolean import BooleanArray +from pandas.core.arrays.categorical import Categorical +from pandas.core.arrays.datetimes import DatetimeArray +from pandas.core.arrays.integer import IntegerArray, integer_array +from pandas.core.arrays.interval import IntervalArray +from pandas.core.arrays.numpy_ import PandasArray, PandasDtype +from pandas.core.arrays.period import PeriodArray, period_array +from pandas.core.arrays.sparse import SparseArray +from pandas.core.arrays.string_ import StringArray +from pandas.core.arrays.timedeltas import TimedeltaArray + +__all__ = [ + "ExtensionArray", + "ExtensionOpsMixin", + "ExtensionScalarOpsMixin", + "try_cast_to_ea", + "BooleanArray", + "Categorical", + "DatetimeArray", + "IntegerArray", + "integer_array", + "IntervalArray", + "PandasArray", + "PandasDtype", + "PeriodArray", + "period_array", + "SparseArray", + "StringArray", + "TimedeltaArray", +] diff --git a/pandas/core/arrays/_arrow_utils.py b/pandas/core/arrays/_arrow_utils.py new file mode 100644 index 00000000..e0d33beb --- /dev/null +++ b/pandas/core/arrays/_arrow_utils.py @@ -0,0 +1,124 @@ +from distutils.version import LooseVersion +import json + +import numpy as np +import pyarrow + +from pandas.core.arrays.interval import _VALID_CLOSED + +_pyarrow_version_ge_015 = LooseVersion(pyarrow.__version__) >= LooseVersion("0.15") + + +def pyarrow_array_to_numpy_and_mask(arr, dtype): + """ + Convert a primitive pyarrow.Array to a numpy array and boolean mask based + on the buffers of the Array. + + Parameters + ---------- + arr : pyarrow.Array + dtype : numpy.dtype + + Returns + ------- + (data, mask) + Tuple of two numpy arrays with the raw data (with specified dtype) and + a boolean mask (validity mask, so False means missing) + """ + buflist = arr.buffers() + data = np.frombuffer(buflist[1], dtype=dtype)[arr.offset : arr.offset + len(arr)] + bitmask = buflist[0] + if bitmask is not None: + mask = pyarrow.BooleanArray.from_buffers( + pyarrow.bool_(), len(arr), [None, bitmask] + ) + mask = np.asarray(mask) + else: + mask = np.ones(len(arr), dtype=bool) + return data, mask + + +if _pyarrow_version_ge_015: + # the pyarrow extension types are only available for pyarrow 0.15+ + + class ArrowPeriodType(pyarrow.ExtensionType): + def __init__(self, freq): + # attributes need to be set first before calling + # super init (as that calls serialize) + self._freq = freq + pyarrow.ExtensionType.__init__(self, pyarrow.int64(), "pandas.period") + + @property + def freq(self): + return self._freq + + def __arrow_ext_serialize__(self): + metadata = {"freq": self.freq} + return json.dumps(metadata).encode() + + @classmethod + def __arrow_ext_deserialize__(cls, storage_type, serialized): + metadata = json.loads(serialized.decode()) + return ArrowPeriodType(metadata["freq"]) + + def __eq__(self, other): + if isinstance(other, pyarrow.BaseExtensionType): + return type(self) == type(other) and self.freq == other.freq + else: + return NotImplemented + + def __hash__(self): + return hash((str(self), self.freq)) + + # register the type with a dummy instance + _period_type = ArrowPeriodType("D") + pyarrow.register_extension_type(_period_type) + + class ArrowIntervalType(pyarrow.ExtensionType): + def __init__(self, subtype, closed): + # attributes need to be set first before calling + # super init (as that calls serialize) + assert closed in _VALID_CLOSED + self._closed = closed + if not isinstance(subtype, pyarrow.DataType): + subtype = pyarrow.type_for_alias(str(subtype)) + self._subtype = subtype + + storage_type = pyarrow.struct([("left", subtype), ("right", subtype)]) + pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval") + + @property + def subtype(self): + return self._subtype + + @property + def closed(self): + return self._closed + + def __arrow_ext_serialize__(self): + metadata = {"subtype": str(self.subtype), "closed": self.closed} + return json.dumps(metadata).encode() + + @classmethod + def __arrow_ext_deserialize__(cls, storage_type, serialized): + metadata = json.loads(serialized.decode()) + subtype = pyarrow.type_for_alias(metadata["subtype"]) + closed = metadata["closed"] + return ArrowIntervalType(subtype, closed) + + def __eq__(self, other): + if isinstance(other, pyarrow.BaseExtensionType): + return ( + type(self) == type(other) + and self.subtype == other.subtype + and self.closed == other.closed + ) + else: + return NotImplemented + + def __hash__(self): + return hash((str(self), str(self.subtype), self.closed)) + + # register the type with a dummy instance + _interval_type = ArrowIntervalType(pyarrow.int64(), "left") + pyarrow.register_extension_type(_interval_type) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py new file mode 100644 index 00000000..20e4cf70 --- /dev/null +++ b/pandas/core/arrays/_ranges.py @@ -0,0 +1,190 @@ +""" +Helper functions to generate range-like data for DatetimeArray +(and possibly TimedeltaArray/PeriodArray) +""" + +from typing import Tuple + +import numpy as np + +from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp + +from pandas.tseries.offsets import DateOffset, Tick, generate_range + + +def generate_regular_range( + start: Timestamp, end: Timestamp, periods: int, freq: DateOffset +) -> Tuple[np.ndarray, str]: + """ + Generate a range of dates with the spans between dates described by + the given `freq` DateOffset. + + Parameters + ---------- + start : Timestamp or None + first point of produced date range + end : Timestamp or None + last point of produced date range + periods : int + number of periods in produced date range + freq : DateOffset + describes space between dates in produced date range + + Returns + ------- + ndarray[np.int64] representing nanosecond unix timestamps + """ + if isinstance(freq, Tick): + stride = freq.nanos + if periods is None: + b = Timestamp(start).value + # cannot just use e = Timestamp(end) + 1 because arange breaks when + # stride is too large, see GH10887 + e = b + (Timestamp(end).value - b) // stride * stride + stride // 2 + 1 + # end.tz == start.tz by this point due to _generate implementation + tz = start.tz + elif start is not None: + b = Timestamp(start).value + e = _generate_range_overflow_safe(b, periods, stride, side="start") + tz = start.tz + elif end is not None: + e = Timestamp(end).value + stride + b = _generate_range_overflow_safe(e, periods, stride, side="end") + tz = end.tz + else: + raise ValueError( + "at least 'start' or 'end' should be specified " + "if a 'period' is given." + ) + + with np.errstate(over="raise"): + # If the range is sufficiently large, np.arange may overflow + # and incorrectly return an empty array if not caught. + try: + values = np.arange(b, e, stride, dtype=np.int64) + except FloatingPointError: + xdr = [b] + while xdr[-1] != e: + xdr.append(xdr[-1] + stride) + values = np.array(xdr[:-1], dtype=np.int64) + + else: + tz = None + # start and end should have the same timezone by this point + if start is not None: + tz = start.tz + elif end is not None: + tz = end.tz + + xdr = generate_range(start=start, end=end, periods=periods, offset=freq) + + values = np.array([x.value for x in xdr], dtype=np.int64) + + return values, tz + + +def _generate_range_overflow_safe( + endpoint: int, periods: int, stride: int, side: str = "start" +) -> int: + """ + Calculate the second endpoint for passing to np.arange, checking + to avoid an integer overflow. Catch OverflowError and re-raise + as OutOfBoundsDatetime. + + Parameters + ---------- + endpoint : int + nanosecond timestamp of the known endpoint of the desired range + periods : int + number of periods in the desired range + stride : int + nanoseconds between periods in the desired range + side : {'start', 'end'} + which end of the range `endpoint` refers to + + Returns + ------- + other_end : int + + Raises + ------ + OutOfBoundsDatetime + """ + # GH#14187 raise instead of incorrectly wrapping around + assert side in ["start", "end"] + + i64max = np.uint64(np.iinfo(np.int64).max) + msg = f"Cannot generate range with {side}={endpoint} and periods={periods}" + + with np.errstate(over="raise"): + # if periods * strides cannot be multiplied within the *uint64* bounds, + # we cannot salvage the operation by recursing, so raise + try: + addend = np.uint64(periods) * np.uint64(np.abs(stride)) + except FloatingPointError: + raise OutOfBoundsDatetime(msg) + + if np.abs(addend) <= i64max: + # relatively easy case without casting concerns + return _generate_range_overflow_safe_signed(endpoint, periods, stride, side) + + elif (endpoint > 0 and side == "start" and stride > 0) or ( + endpoint < 0 and side == "end" and stride > 0 + ): + # no chance of not-overflowing + raise OutOfBoundsDatetime(msg) + + elif side == "end" and endpoint > i64max and endpoint - stride <= i64max: + # in _generate_regular_range we added `stride` thereby overflowing + # the bounds. Adjust to fix this. + return _generate_range_overflow_safe( + endpoint - stride, periods - 1, stride, side + ) + + # split into smaller pieces + mid_periods = periods // 2 + remaining = periods - mid_periods + assert 0 < remaining < periods, (remaining, periods, endpoint, stride) + + midpoint = _generate_range_overflow_safe(endpoint, mid_periods, stride, side) + return _generate_range_overflow_safe(midpoint, remaining, stride, side) + + +def _generate_range_overflow_safe_signed( + endpoint: int, periods: int, stride: int, side: str +) -> int: + """ + A special case for _generate_range_overflow_safe where `periods * stride` + can be calculated without overflowing int64 bounds. + """ + assert side in ["start", "end"] + if side == "end": + stride *= -1 + + with np.errstate(over="raise"): + addend = np.int64(periods) * np.int64(stride) + try: + # easy case with no overflows + return np.int64(endpoint) + addend + except (FloatingPointError, OverflowError): + # with endpoint negative and addend positive we risk + # FloatingPointError; with reversed signed we risk OverflowError + pass + + # if stride and endpoint had opposite signs, then endpoint + addend + # should never overflow. so they must have the same signs + assert (stride > 0 and endpoint >= 0) or (stride < 0 and endpoint <= 0) + + if stride > 0: + # watch out for very special case in which we just slightly + # exceed implementation bounds, but when passing the result to + # np.arange will get a result slightly within the bounds + result = np.uint64(endpoint) + np.uint64(addend) + i64max = np.uint64(np.iinfo(np.int64).max) + assert result > i64max + if result <= i64max + np.uint64(stride): + return result + + raise OutOfBoundsDatetime( + f"Cannot generate range with {side}={endpoint} and periods={periods}" + ) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py new file mode 100644 index 00000000..4ab2a25d --- /dev/null +++ b/pandas/core/arrays/base.py @@ -0,0 +1,1246 @@ +"""An interface for extending pandas with custom arrays. + +.. warning:: + + This is an experimental API and subject to breaking changes + without warning. +""" +import operator +from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union + +import numpy as np + +from pandas._libs import lib +from pandas._typing import ArrayLike +from pandas.compat import set_function_name +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import Appender, Substitution +from pandas.util._validators import validate_fillna_kwargs + +from pandas.core.dtypes.common import is_array_like, is_list_like +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.generic import ABCExtensionArray, ABCIndexClass, ABCSeries +from pandas.core.dtypes.missing import isna + +from pandas.core import ops +from pandas.core.algorithms import _factorize_array, unique +from pandas.core.missing import backfill_1d, pad_1d +from pandas.core.sorting import nargsort + +_extension_array_shared_docs: Dict[str, str] = dict() + + +def try_cast_to_ea(cls_or_instance, obj, dtype=None): + """ + Call to `_from_sequence` that returns the object unchanged on Exception. + + Parameters + ---------- + cls_or_instance : ExtensionArray subclass or instance + obj : arraylike + Values to pass to cls._from_sequence + dtype : ExtensionDtype, optional + + Returns + ------- + ExtensionArray or obj + """ + try: + result = cls_or_instance._from_sequence(obj, dtype=dtype) + except Exception: + # We can't predict what downstream EA constructors may raise + result = obj + return result + + +class ExtensionArray: + """ + Abstract base class for custom 1-D array types. + + pandas will recognize instances of this class as proper arrays + with a custom type and will not attempt to coerce them to objects. They + may be stored directly inside a :class:`DataFrame` or :class:`Series`. + + .. versionadded:: 0.23.0 + + Attributes + ---------- + dtype + nbytes + ndim + shape + + Methods + ------- + argsort + astype + copy + dropna + factorize + fillna + isna + ravel + repeat + searchsorted + shift + take + unique + view + _concat_same_type + _formatter + _from_factorized + _from_sequence + _from_sequence_of_strings + _ndarray_values + _reduce + _values_for_argsort + _values_for_factorize + + Notes + ----- + The interface includes the following abstract methods that must be + implemented by subclasses: + + * _from_sequence + * _from_factorized + * __getitem__ + * __len__ + * dtype + * nbytes + * isna + * take + * copy + * _concat_same_type + + A default repr displaying the type, (truncated) data, length, + and dtype is provided. It can be customized or replaced by + by overriding: + + * __repr__ : A default repr for the ExtensionArray. + * _formatter : Print scalars inside a Series or DataFrame. + + Some methods require casting the ExtensionArray to an ndarray of Python + objects with ``self.astype(object)``, which may be expensive. When + performance is a concern, we highly recommend overriding the following + methods: + + * fillna + * dropna + * unique + * factorize / _values_for_factorize + * argsort / _values_for_argsort + * searchsorted + + The remaining methods implemented on this class should be performant, + as they only compose abstract methods. Still, a more efficient + implementation may be available, and these methods can be overridden. + + One can implement methods to handle array reductions. + + * _reduce + + One can implement methods to handle parsing from strings that will be used + in methods such as ``pandas.io.parsers.read_csv``. + + * _from_sequence_of_strings + + This class does not inherit from 'abc.ABCMeta' for performance reasons. + Methods and properties required by the interface raise + ``pandas.errors.AbstractMethodError`` and no ``register`` method is + provided for registering virtual subclasses. + + ExtensionArrays are limited to 1 dimension. + + They may be backed by none, one, or many NumPy arrays. For example, + ``pandas.Categorical`` is an extension array backed by two arrays, + one for codes and one for categories. An array of IPv6 address may + be backed by a NumPy structured array with two fields, one for the + lower 64 bits and one for the upper 64 bits. Or they may be backed + by some other storage type, like Python lists. Pandas makes no + assumptions on how the data are stored, just that it can be converted + to a NumPy array. + The ExtensionArray interface does not impose any rules on how this data + is stored. However, currently, the backing data cannot be stored in + attributes called ``.values`` or ``._values`` to ensure full compatibility + with pandas internals. But other names as ``.data``, ``._data``, + ``._items``, ... can be freely used. + + If implementing NumPy's ``__array_ufunc__`` interface, pandas expects + that + + 1. You defer by returning ``NotImplemented`` when any Series are present + in `inputs`. Pandas will extract the arrays and call the ufunc again. + 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class. + Pandas inspect this to determine whether the ufunc is valid for the + types present. + + See :ref:`extending.extension.ufunc` for more. + """ + + # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. + # Don't override this. + _typ = "extension" + + # ------------------------------------------------------------------------ + # Constructors + # ------------------------------------------------------------------------ + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + """ + Construct a new ExtensionArray from a sequence of scalars. + + Parameters + ---------- + scalars : Sequence + Each element will be an instance of the scalar type for this + array, ``cls.dtype.type``. + dtype : dtype, optional + Construct for this particular dtype. This should be a Dtype + compatible with the ExtensionArray. + copy : bool, default False + If True, copy the underlying data. + + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(cls) + + @classmethod + def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + """Construct a new ExtensionArray from a sequence of strings. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + strings : Sequence + Each element will be an instance of the scalar type for this + array, ``cls.dtype.type``. + dtype : dtype, optional + Construct for this particular dtype. This should be a Dtype + compatible with the ExtensionArray. + copy : bool, default False + If True, copy the underlying data. + + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(cls) + + @classmethod + def _from_factorized(cls, values, original): + """ + Reconstruct an ExtensionArray after factorization. + + Parameters + ---------- + values : ndarray + An integer ndarray with the factorized values. + original : ExtensionArray + The original ExtensionArray that factorize was called on. + + See Also + -------- + factorize + ExtensionArray.factorize + """ + raise AbstractMethodError(cls) + + # ------------------------------------------------------------------------ + # Must be a Sequence + # ------------------------------------------------------------------------ + + def __getitem__(self, item): + # type (Any) -> Any + """ + Select a subset of self. + + Parameters + ---------- + item : int, slice, or ndarray + * int: The position in 'self' to get. + + * slice: A slice object, where 'start', 'stop', and 'step' are + integers or None + + * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' + + Returns + ------- + item : scalar or ExtensionArray + + Notes + ----- + For scalar ``item``, return a scalar value suitable for the array's + type. This should be an instance of ``self.dtype.type``. + + For slice ``key``, return an instance of ``ExtensionArray``, even + if the slice is length 0 or 1. + + For a boolean mask, return an instance of ``ExtensionArray``, filtered + to the values where ``item`` is True. + """ + raise AbstractMethodError(self) + + def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None: + """ + Set one or more values inplace. + + This method is not required to satisfy the pandas extension array + interface. + + Parameters + ---------- + key : int, ndarray, or slice + When called from, e.g. ``Series.__setitem__``, ``key`` will be + one of + + * scalar int + * ndarray of integers. + * boolean ndarray + * slice object + + value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object + value or values to be set of ``key``. + + Returns + ------- + None + """ + # Some notes to the ExtensionArray implementor who may have ended up + # here. While this method is not required for the interface, if you + # *do* choose to implement __setitem__, then some semantics should be + # observed: + # + # * Setting multiple values : ExtensionArrays should support setting + # multiple values at once, 'key' will be a sequence of integers and + # 'value' will be a same-length sequence. + # + # * Broadcasting : For a sequence 'key' and a scalar 'value', + # each position in 'key' should be set to 'value'. + # + # * Coercion : Most users will expect basic coercion to work. For + # example, a string like '2018-01-01' is coerced to a datetime + # when setting on a datetime64ns array. In general, if the + # __init__ method coerces that value, then so should __setitem__ + # Note, also, that Series/DataFrame.where internally use __setitem__ + # on a copy of the data. + raise NotImplementedError(f"{type(self)} does not implement __setitem__.") + + def __len__(self) -> int: + """ + Length of this array + + Returns + ------- + length : int + """ + raise AbstractMethodError(self) + + def __iter__(self): + """ + Iterate over elements of the array. + """ + # This needs to be implemented so that pandas recognizes extension + # arrays as list-like. The default implementation makes successive + # calls to ``__getitem__``, which may be slower than necessary. + for i in range(len(self)): + yield self[i] + + def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default): + """ + Convert to a NumPy ndarray. + + .. versionadded:: 1.0.0 + + This is similar to :meth:`numpy.asarray`, but may provide additional control + over how the conversion is done. + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to pass to :meth:`numpy.asarray`. + copy : bool, default False + Whether to ensure that the returned value is a not a view on + another array. Note that ``copy=False`` does not *ensure* that + ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that + a copy is made, even if not strictly necessary. + na_value : Any, optional + The value to use for missing values. The default value depends + on `dtype` and the type of the array. + + Returns + ------- + numpy.ndarray + """ + result = np.asarray(self, dtype=dtype) + if copy or na_value is not lib.no_default: + result = result.copy() + if na_value is not lib.no_default: + result[self.isna()] = na_value + return result + + # ------------------------------------------------------------------------ + # Required attributes + # ------------------------------------------------------------------------ + + @property + def dtype(self) -> ExtensionDtype: + """ + An instance of 'ExtensionDtype'. + """ + raise AbstractMethodError(self) + + @property + def shape(self) -> Tuple[int, ...]: + """ + Return a tuple of the array dimensions. + """ + return (len(self),) + + @property + def size(self) -> int: + """ + The number of elements in the array. + """ + return np.prod(self.shape) + + @property + def ndim(self) -> int: + """ + Extension Arrays are only allowed to be 1-dimensional. + """ + return 1 + + @property + def nbytes(self) -> int: + """ + The number of bytes needed to store this object in memory. + """ + # If this is expensive to compute, return an approximate lower bound + # on the number of bytes needed. + raise AbstractMethodError(self) + + # ------------------------------------------------------------------------ + # Additional Methods + # ------------------------------------------------------------------------ + + def astype(self, dtype, copy=True): + """ + Cast to a NumPy array with 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + array : ndarray + NumPy ndarray with 'dtype' for its dtype. + """ + return np.array(self, dtype=dtype, copy=copy) + + def isna(self) -> ArrayLike: + """ + A 1-D array indicating if each value is missing. + + Returns + ------- + na_values : Union[np.ndarray, ExtensionArray] + In most cases, this should return a NumPy ndarray. For + exceptional cases like ``SparseArray``, where returning + an ndarray would be expensive, an ExtensionArray may be + returned. + + Notes + ----- + If returning an ExtensionArray, then + + * ``na_values._is_boolean`` should be True + * `na_values` should implement :func:`ExtensionArray._reduce` + * ``na_values.any`` and ``na_values.all`` should be implemented + """ + raise AbstractMethodError(self) + + def _values_for_argsort(self) -> np.ndarray: + """ + Return values for sorting. + + Returns + ------- + ndarray + The transformed values should maintain the ordering between values + within the array. + + See Also + -------- + ExtensionArray.argsort + """ + # Note: this is used in `ExtensionArray.argsort`. + return np.array(self) + + def argsort( + self, ascending: bool = True, kind: str = "quicksort", *args, **kwargs + ) -> np.ndarray: + """ + Return the indices that would sort this array. + + Parameters + ---------- + ascending : bool, default True + Whether the indices should result in an ascending + or descending sort. + kind : {'quicksort', 'mergesort', 'heapsort'}, optional + Sorting algorithm. + *args, **kwargs: + passed through to :func:`numpy.argsort`. + + Returns + ------- + ndarray + Array of indices that sort ``self``. If NaN values are contained, + NaN values are placed at the end. + + See Also + -------- + numpy.argsort : Sorting implementation used internally. + """ + # Implementor note: You have two places to override the behavior of + # argsort. + # 1. _values_for_argsort : construct the values passed to np.argsort + # 2. argsort : total control over sorting. + ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) + + result = nargsort(self, kind=kind, ascending=ascending, na_position="last") + return result + + def fillna(self, value=None, method=None, limit=None): + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + value : scalar, array-like + If a scalar value is passed it is used to fill all missing values. + Alternatively, an array-like 'value' can be given. It's expected + that the array-like have the same length as 'self'. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap. + limit : int, default None + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. + + Returns + ------- + ExtensionArray + With NA/NaN filled. + """ + value, method = validate_fillna_kwargs(value, method) + + mask = self.isna() + + if is_array_like(value): + if len(value) != len(self): + raise ValueError( + f"Length of 'value' does not match. Got ({len(value)}) " + f"expected {len(self)}" + ) + value = value[mask] + + if mask.any(): + if method is not None: + func = pad_1d if method == "pad" else backfill_1d + new_values = func(self.astype(object), limit=limit, mask=mask) + new_values = self._from_sequence(new_values, dtype=self.dtype) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + new_values = self.copy() + return new_values + + def dropna(self): + """ + Return ExtensionArray without NA values. + + Returns + ------- + valid : ExtensionArray + """ + return self[~self.isna()] + + def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArray: + """ + Shift values by desired number. + + Newly introduced missing values are filled with + ``self.dtype.na_value``. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + periods : int, default 1 + The number of periods to shift. Negative values are allowed + for shifting backwards. + + fill_value : object, optional + The scalar value to use for newly introduced missing values. + The default is ``self.dtype.na_value``. + + .. versionadded:: 0.24.0 + + Returns + ------- + ExtensionArray + Shifted. + + Notes + ----- + If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is + returned. + + If ``periods > len(self)``, then an array of size + len(self) is returned, with all values filled with + ``self.dtype.na_value``. + """ + # Note: this implementation assumes that `self.dtype.na_value` can be + # stored in an instance of your ExtensionArray with `self.dtype`. + if not len(self) or periods == 0: + return self.copy() + + if isna(fill_value): + fill_value = self.dtype.na_value + + empty = self._from_sequence( + [fill_value] * min(abs(periods), len(self)), dtype=self.dtype + ) + if periods > 0: + a = empty + b = self[:-periods] + else: + a = self[abs(periods) :] + b = empty + return self._concat_same_type([a, b]) + + def unique(self): + """ + Compute the ExtensionArray of unique values. + + Returns + ------- + uniques : ExtensionArray + """ + uniques = unique(self.astype(object)) + return self._from_sequence(uniques, dtype=self.dtype) + + def searchsorted(self, value, side="left", sorter=None): + """ + Find indices where elements should be inserted to maintain order. + + .. versionadded:: 0.24.0 + + Find the indices into a sorted array `self` (a) such that, if the + corresponding elements in `value` were inserted before the indices, + the order of `self` would be preserved. + + Assuming that `self` is sorted: + + ====== ================================ + `side` returned index `i` satisfies + ====== ================================ + left ``self[i-1] < value <= self[i]`` + right ``self[i-1] <= value < self[i]`` + ====== ================================ + + Parameters + ---------- + value : array_like + Values to insert into `self`. + side : {'left', 'right'}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `self`). + sorter : 1-D array_like, optional + Optional array of integer indices that sort array a into ascending + order. They are typically the result of argsort. + + Returns + ------- + array of ints + Array of insertion points with the same shape as `value`. + + See Also + -------- + numpy.searchsorted : Similar method from NumPy. + """ + # Note: the base tests provided by pandas only test the basics. + # We do not test + # 1. Values outside the range of the `data_for_sorting` fixture + # 2. Values between the values in the `data_for_sorting` fixture + # 3. Missing values. + arr = self.astype(object) + return arr.searchsorted(value, side=side, sorter=sorter) + + def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: + """ + Return an array and missing value suitable for factorization. + + Returns + ------- + values : ndarray + + An array suitable for factorization. This should maintain order + and be a supported dtype (Float64, Int64, UInt64, String, Object). + By default, the extension array is cast to object dtype. + na_value : object + The value in `values` to consider missing. This will be treated + as NA in the factorization routines, so it will be coded as + `na_sentinal` and not included in `uniques`. By default, + ``np.nan`` is used. + + Notes + ----- + The values returned by this method are also used in + :func:`pandas.util.hash_pandas_object`. + """ + return self.astype(object), np.nan + + def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ABCExtensionArray]: + """ + Encode the extension array as an enumerated type. + + Parameters + ---------- + na_sentinel : int, default -1 + Value to use in the `codes` array to indicate missing values. + + Returns + ------- + codes : ndarray + An integer NumPy array that's an indexer into the original + ExtensionArray. + uniques : ExtensionArray + An ExtensionArray containing the unique values of `self`. + + .. note:: + + uniques will *not* contain an entry for the NA value of + the ExtensionArray if there are any missing values present + in `self`. + + See Also + -------- + factorize : Top-level factorize method that dispatches here. + + Notes + ----- + :meth:`pandas.factorize` offers a `sort` keyword as well. + """ + # Implementer note: There are two ways to override the behavior of + # pandas.factorize + # 1. _values_for_factorize and _from_factorize. + # Specify the values passed to pandas' internal factorization + # routines, and how to convert from those values back to the + # original ExtensionArray. + # 2. ExtensionArray.factorize. + # Complete control over factorization. + arr, na_value = self._values_for_factorize() + + codes, uniques = _factorize_array( + arr, na_sentinel=na_sentinel, na_value=na_value + ) + + uniques = self._from_factorized(uniques, self) + return codes, uniques + + _extension_array_shared_docs[ + "repeat" + ] = """ + Repeat elements of a %(klass)s. + + Returns a new %(klass)s where each element of the current %(klass)s + is repeated consecutively a given number of times. + + Parameters + ---------- + repeats : int or array of ints + The number of repetitions for each element. This should be a + non-negative integer. Repeating 0 times will return an empty + %(klass)s. + axis : None + Must be ``None``. Has no effect but is accepted for compatibility + with numpy. + + Returns + ------- + repeated_array : %(klass)s + Newly created %(klass)s with repeated elements. + + See Also + -------- + Series.repeat : Equivalent function for Series. + Index.repeat : Equivalent function for Index. + numpy.repeat : Similar method for :class:`numpy.ndarray`. + ExtensionArray.take : Take arbitrary positions. + + Examples + -------- + >>> cat = pd.Categorical(['a', 'b', 'c']) + >>> cat + [a, b, c] + Categories (3, object): [a, b, c] + >>> cat.repeat(2) + [a, a, b, b, c, c] + Categories (3, object): [a, b, c] + >>> cat.repeat([1, 2, 3]) + [a, b, b, c, c, c] + Categories (3, object): [a, b, c] + """ + + @Substitution(klass="ExtensionArray") + @Appender(_extension_array_shared_docs["repeat"]) + def repeat(self, repeats, axis=None): + nv.validate_repeat(tuple(), dict(axis=axis)) + ind = np.arange(len(self)).repeat(repeats) + return self.take(ind) + + # ------------------------------------------------------------------------ + # Indexing methods + # ------------------------------------------------------------------------ + + def take( + self, indices: Sequence[int], allow_fill: bool = False, fill_value: Any = None + ) -> ABCExtensionArray: + """ + Take elements from an array. + + Parameters + ---------- + indices : sequence of int + Indices to be taken. + allow_fill : bool, default False + How to handle negative values in `indices`. + + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to + :func:`numpy.take`. + + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + other negative values raise a ``ValueError``. + + fill_value : any, optional + Fill value to use for NA-indices when `allow_fill` is True. + This may be ``None``, in which case the default NA value for + the type, ``self.dtype.na_value``, is used. + + For many ExtensionArrays, there will be two representations of + `fill_value`: a user-facing "boxed" scalar, and a low-level + physical NA value. `fill_value` should be the user-facing version, + and the implementation should handle translating that to the + physical version for processing the take if necessary. + + Returns + ------- + ExtensionArray + + Raises + ------ + IndexError + When the indices are out of bounds for the array. + ValueError + When `indices` contains negative values other than ``-1`` + and `allow_fill` is True. + + See Also + -------- + numpy.take + api.extensions.take + + Notes + ----- + ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, + ``iloc``, when `indices` is a sequence of values. Additionally, + it's called by :meth:`Series.reindex`, or any other method + that causes realignment, with a `fill_value`. + + Examples + -------- + Here's an example implementation, which relies on casting the + extension array to object dtype. This uses the helper method + :func:`pandas.api.extensions.take`. + + .. code-block:: python + + def take(self, indices, allow_fill=False, fill_value=None): + from pandas.core.algorithms import take + + # If the ExtensionArray is backed by an ndarray, then + # just pass that here instead of coercing to object. + data = self.astype(object) + + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + # fill value should always be translated from the scalar + # type for the array, to the physical storage type for + # the data, before passing to take. + + result = take(data, indices, fill_value=fill_value, + allow_fill=allow_fill) + return self._from_sequence(result, dtype=self.dtype) + """ + # Implementer note: The `fill_value` parameter should be a user-facing + # value, an instance of self.dtype.type. When passed `fill_value=None`, + # the default of `self.dtype.na_value` should be used. + # This may differ from the physical storage type your ExtensionArray + # uses. In this case, your implementation is responsible for casting + # the user-facing type to the storage type, before using + # pandas.api.extensions.take + raise AbstractMethodError(self) + + def copy(self) -> ABCExtensionArray: + """ + Return a copy of the array. + + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(self) + + def view(self, dtype=None) -> Union[ABCExtensionArray, np.ndarray]: + """ + Return a view on the array. + + Parameters + ---------- + dtype : str, np.dtype, or ExtensionDtype, optional + Default None. + + Returns + ------- + ExtensionArray + A view of the :class:`ExtensionArray`. + """ + # NB: + # - This must return a *new* object referencing the same data, not self. + # - The only case that *must* be implemented is with dtype=None, + # giving a view with the same dtype as self. + if dtype is not None: + raise NotImplementedError(dtype) + return self[:] + + # ------------------------------------------------------------------------ + # Printing + # ------------------------------------------------------------------------ + + def __repr__(self) -> str: + from pandas.io.formats.printing import format_object_summary + + # the short repr has no trailing newline, while the truncated + # repr does. So we include a newline in our template, and strip + # any trailing newlines from format_object_summary + data = format_object_summary( + self, self._formatter(), indent_for_name=False + ).rstrip(", \n") + class_name = f"<{type(self).__name__}>\n" + return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" + + def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]: + """Formatting function for scalar values. + + This is used in the default '__repr__'. The returned formatting + function receives instances of your scalar type. + + Parameters + ---------- + boxed : bool, default False + An indicated for whether or not your array is being printed + within a Series, DataFrame, or Index (True), or just by + itself (False). This may be useful if you want scalar values + to appear differently within a Series versus on its own (e.g. + quoted or not). + + Returns + ------- + Callable[[Any], str] + A callable that gets instances of the scalar type and + returns a string. By default, :func:`repr` is used + when ``boxed=False`` and :func:`str` is used when + ``boxed=True``. + """ + if boxed: + return str + return repr + + # ------------------------------------------------------------------------ + # Reshaping + # ------------------------------------------------------------------------ + + def ravel(self, order="C") -> ABCExtensionArray: + """ + Return a flattened view on this array. + + Parameters + ---------- + order : {None, 'C', 'F', 'A', 'K'}, default 'C' + + Returns + ------- + ExtensionArray + + Notes + ----- + - Because ExtensionArrays are 1D-only, this is a no-op. + - The "order" argument is ignored, is for compatibility with NumPy. + """ + return self + + @classmethod + def _concat_same_type( + cls, to_concat: Sequence[ABCExtensionArray] + ) -> ABCExtensionArray: + """ + Concatenate multiple array. + + Parameters + ---------- + to_concat : sequence of this type + + Returns + ------- + ExtensionArray + """ + raise AbstractMethodError(cls) + + # The _can_hold_na attribute is set to True so that pandas internals + # will use the ExtensionDtype.na_value as the NA value in operations + # such as take(), reindex(), shift(), etc. In addition, those results + # will then be of the ExtensionArray subclass rather than an array + # of objects + _can_hold_na = True + + @property + def _ndarray_values(self) -> np.ndarray: + """ + Internal pandas method for lossy conversion to a NumPy ndarray. + + This method is not part of the pandas interface. + + The expectation is that this is cheap to compute, and is primarily + used for interacting with our indexers. + + Returns + ------- + array : ndarray + """ + return np.array(self) + + def _reduce(self, name, skipna=True, **kwargs): + """ + Return a scalar result of performing the reduction operation. + + Parameters + ---------- + name : str + Name of the function, supported values are: + { any, all, min, max, sum, mean, median, prod, + std, var, sem, kurt, skew }. + skipna : bool, default True + If True, skip NaN values. + **kwargs + Additional keyword arguments passed to the reduction function. + Currently, `ddof` is the only supported kwarg. + + Returns + ------- + scalar + + Raises + ------ + TypeError : subclass does not define reductions + """ + raise TypeError(f"cannot perform {name} with type {self.dtype}") + + +class ExtensionOpsMixin: + """ + A base class for linking the operators to their dunder names. + + .. note:: + + You may want to set ``__array_priority__`` if you want your + implementation to be called when involved in binary operations + with NumPy arrays. + """ + + @classmethod + def _add_arithmetic_ops(cls): + cls.__add__ = cls._create_arithmetic_method(operator.add) + cls.__radd__ = cls._create_arithmetic_method(ops.radd) + cls.__sub__ = cls._create_arithmetic_method(operator.sub) + cls.__rsub__ = cls._create_arithmetic_method(ops.rsub) + cls.__mul__ = cls._create_arithmetic_method(operator.mul) + cls.__rmul__ = cls._create_arithmetic_method(ops.rmul) + cls.__pow__ = cls._create_arithmetic_method(operator.pow) + cls.__rpow__ = cls._create_arithmetic_method(ops.rpow) + cls.__mod__ = cls._create_arithmetic_method(operator.mod) + cls.__rmod__ = cls._create_arithmetic_method(ops.rmod) + cls.__floordiv__ = cls._create_arithmetic_method(operator.floordiv) + cls.__rfloordiv__ = cls._create_arithmetic_method(ops.rfloordiv) + cls.__truediv__ = cls._create_arithmetic_method(operator.truediv) + cls.__rtruediv__ = cls._create_arithmetic_method(ops.rtruediv) + cls.__divmod__ = cls._create_arithmetic_method(divmod) + cls.__rdivmod__ = cls._create_arithmetic_method(ops.rdivmod) + + @classmethod + def _add_comparison_ops(cls): + cls.__eq__ = cls._create_comparison_method(operator.eq) + cls.__ne__ = cls._create_comparison_method(operator.ne) + cls.__lt__ = cls._create_comparison_method(operator.lt) + cls.__gt__ = cls._create_comparison_method(operator.gt) + cls.__le__ = cls._create_comparison_method(operator.le) + cls.__ge__ = cls._create_comparison_method(operator.ge) + + @classmethod + def _add_logical_ops(cls): + cls.__and__ = cls._create_logical_method(operator.and_) + cls.__rand__ = cls._create_logical_method(ops.rand_) + cls.__or__ = cls._create_logical_method(operator.or_) + cls.__ror__ = cls._create_logical_method(ops.ror_) + cls.__xor__ = cls._create_logical_method(operator.xor) + cls.__rxor__ = cls._create_logical_method(ops.rxor) + + +class ExtensionScalarOpsMixin(ExtensionOpsMixin): + """ + A mixin for defining ops on an ExtensionArray. + + It is assumed that the underlying scalar objects have the operators + already defined. + + Notes + ----- + If you have defined a subclass MyExtensionArray(ExtensionArray), then + use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to + get the arithmetic operators. After the definition of MyExtensionArray, + insert the lines + + MyExtensionArray._add_arithmetic_ops() + MyExtensionArray._add_comparison_ops() + + to link the operators to your class. + + .. note:: + + You may want to set ``__array_priority__`` if you want your + implementation to be called when involved in binary operations + with NumPy arrays. + """ + + @classmethod + def _create_method(cls, op, coerce_to_dtype=True): + """ + A class method that returns a method that will correspond to an + operator for an ExtensionArray subclass, by dispatching to the + relevant operator defined on the individual elements of the + ExtensionArray. + + Parameters + ---------- + op : function + An operator that takes arguments op(a, b) + coerce_to_dtype : bool, default True + boolean indicating whether to attempt to convert + the result to the underlying ExtensionArray dtype. + If it's not possible to create a new ExtensionArray with the + values, an ndarray is returned instead. + + Returns + ------- + Callable[[Any, Any], Union[ndarray, ExtensionArray]] + A method that can be bound to a class. When used, the method + receives the two arguments, one of which is the instance of + this class, and should return an ExtensionArray or an ndarray. + + Returning an ndarray may be necessary when the result of the + `op` cannot be stored in the ExtensionArray. The dtype of the + ndarray uses NumPy's normal inference rules. + + Examples + -------- + Given an ExtensionArray subclass called MyExtensionArray, use + + >>> __add__ = cls._create_method(operator.add) + + in the class definition of MyExtensionArray to create the operator + for addition, that will be based on the operator implementation + of the underlying elements of the ExtensionArray + """ + + def _binop(self, other): + def convert_values(param): + if isinstance(param, ExtensionArray) or is_list_like(param): + ovalues = param + else: # Assume its an object + ovalues = [param] * len(self) + return ovalues + + if isinstance(other, (ABCSeries, ABCIndexClass)): + # rely on pandas to unbox and dispatch to us + return NotImplemented + + lvalues = self + rvalues = convert_values(other) + + # If the operator is not defined for the underlying objects, + # a TypeError should be raised + res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] + + def _maybe_convert(arr): + if coerce_to_dtype: + # https://github.com/pandas-dev/pandas/issues/22850 + # We catch all regular exceptions here, and fall back + # to an ndarray. + res = try_cast_to_ea(self, arr) + if not isinstance(res, type(self)): + # exception raised in _from_sequence; ensure we have ndarray + res = np.asarray(arr) + else: + res = np.asarray(arr) + return res + + if op.__name__ in {"divmod", "rdivmod"}: + a, b = zip(*res) + return _maybe_convert(a), _maybe_convert(b) + + return _maybe_convert(res) + + op_name = ops._get_op_name(op, True) + return set_function_name(_binop, op_name, cls) + + @classmethod + def _create_arithmetic_method(cls, op): + return cls._create_method(op) + + @classmethod + def _create_comparison_method(cls, op): + return cls._create_method(op, coerce_to_dtype=False) diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py new file mode 100644 index 00000000..9eeed421 --- /dev/null +++ b/pandas/core/arrays/boolean.py @@ -0,0 +1,791 @@ +import numbers +from typing import TYPE_CHECKING, Any, List, Tuple, Type +import warnings + +import numpy as np + +from pandas._libs import lib, missing as libmissing +from pandas.compat import set_function_name +from pandas.compat.numpy import function as nv + +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.cast import astype_nansafe +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_integer_dtype, + is_list_like, + is_numeric_dtype, + is_scalar, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import register_extension_dtype +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries +from pandas.core.dtypes.missing import isna, notna + +from pandas.core import nanops, ops +from pandas.core.indexers import check_array_indexer + +from .masked import BaseMaskedArray + +if TYPE_CHECKING: + from pandas._typing import Scalar + + +@register_extension_dtype +class BooleanDtype(ExtensionDtype): + """ + Extension dtype for boolean data. + + .. versionadded:: 1.0.0 + + .. warning:: + + BooleanDtype is considered experimental. The implementation and + parts of the API may change without warning. + + Attributes + ---------- + None + + Methods + ------- + None + + Examples + -------- + >>> pd.BooleanDtype() + BooleanDtype + """ + + name = "boolean" + + @property + def na_value(self) -> "Scalar": + """ + BooleanDtype uses :attr:`pandas.NA` as the missing NA value. + + .. warning:: + + `na_value` may change in a future release. + """ + return libmissing.NA + + @property + def type(self) -> Type: + return np.bool_ + + @property + def kind(self) -> str: + return "b" + + @classmethod + def construct_array_type(cls) -> "Type[BooleanArray]": + return BooleanArray + + def __repr__(self) -> str: + return "BooleanDtype" + + @property + def _is_boolean(self) -> bool: + return True + + def __from_arrow__(self, array): + """Construct BooleanArray from passed pyarrow Array/ChunkedArray""" + import pyarrow + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + # pyarrow.ChunkedArray + chunks = array.chunks + + results = [] + for arr in chunks: + # TODO should optimize this without going through object array + bool_arr = BooleanArray._from_sequence(np.array(arr)) + results.append(bool_arr) + + return BooleanArray._concat_same_type(results) + + +def coerce_to_array(values, mask=None, copy: bool = False): + """ + Coerce the input values array to numpy arrays with a mask. + + Parameters + ---------- + values : 1D list-like + mask : bool 1D array, optional + copy : bool, default False + if True, copy the input + + Returns + ------- + tuple of (values, mask) + """ + if isinstance(values, BooleanArray): + if mask is not None: + raise ValueError("cannot pass mask for BooleanArray input") + values, mask = values._data, values._mask + if copy: + values = values.copy() + mask = mask.copy() + return values, mask + + mask_values = None + if isinstance(values, np.ndarray) and values.dtype == np.bool_: + if copy: + values = values.copy() + elif isinstance(values, np.ndarray) and is_numeric_dtype(values.dtype): + mask_values = isna(values) + + values_bool = np.zeros(len(values), dtype=bool) + values_bool[~mask_values] = values[~mask_values].astype(bool) + + if not np.all( + values_bool[~mask_values].astype(values.dtype) == values[~mask_values] + ): + raise TypeError("Need to pass bool-like values") + + values = values_bool + else: + values_object = np.asarray(values, dtype=object) + + inferred_dtype = lib.infer_dtype(values_object, skipna=True) + integer_like = ("floating", "integer", "mixed-integer-float") + if inferred_dtype not in ("boolean", "empty") + integer_like: + raise TypeError("Need to pass bool-like values") + + mask_values = isna(values_object) + values = np.zeros(len(values), dtype=bool) + values[~mask_values] = values_object[~mask_values].astype(bool) + + # if the values were integer-like, validate it were actually 0/1's + if inferred_dtype in integer_like: + if not np.all( + values[~mask_values].astype(float) + == values_object[~mask_values].astype(float) + ): + raise TypeError("Need to pass bool-like values") + + if mask is None and mask_values is None: + mask = np.zeros(len(values), dtype=bool) + elif mask is None: + mask = mask_values + else: + if isinstance(mask, np.ndarray) and mask.dtype == np.bool_: + if mask_values is not None: + mask = mask | mask_values + else: + if copy: + mask = mask.copy() + else: + mask = np.array(mask, dtype=bool) + if mask_values is not None: + mask = mask | mask_values + + if not values.ndim == 1: + raise ValueError("values must be a 1D list-like") + if not mask.ndim == 1: + raise ValueError("mask must be a 1D list-like") + + return values, mask + + +class BooleanArray(BaseMaskedArray): + """ + Array of boolean (True/False) data with missing values. + + This is a pandas Extension array for boolean data, under the hood + represented by 2 numpy arrays: a boolean array with the data and + a boolean array with the mask (True indicating missing). + + BooleanArray implements Kleene logic (sometimes called three-value + logic) for logical operations. See :ref:`boolean.kleene` for more. + + To construct an BooleanArray from generic array-like input, use + :func:`pandas.array` specifying ``dtype="boolean"`` (see examples + below). + + .. versionadded:: 1.0.0 + + .. warning:: + + BooleanArray is considered experimental. The implementation and + parts of the API may change without warning. + + Parameters + ---------- + values : numpy.ndarray + A 1-d boolean-dtype array with the data. + mask : numpy.ndarray + A 1-d boolean-dtype array indicating missing values (True + indicates missing). + copy : bool, default False + Whether to copy the `values` and `mask` arrays. + + Attributes + ---------- + None + + Methods + ------- + None + + Returns + ------- + BooleanArray + + Examples + -------- + Create an BooleanArray with :func:`pandas.array`: + + >>> pd.array([True, False, None], dtype="boolean") + + [True, False, ] + Length: 3, dtype: boolean + """ + + # The value used to fill '_data' to avoid upcasting + _internal_fill_value = False + + def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False): + if not (isinstance(values, np.ndarray) and values.dtype == np.bool_): + raise TypeError( + "values should be boolean numpy array. Use " + "the 'array' function instead" + ) + if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_): + raise TypeError( + "mask should be boolean numpy array. Use " + "the 'array' function instead" + ) + if not values.ndim == 1: + raise ValueError("values must be a 1D array") + if not mask.ndim == 1: + raise ValueError("mask must be a 1D array") + + if copy: + values = values.copy() + mask = mask.copy() + + self._data = values + self._mask = mask + self._dtype = BooleanDtype() + + @property + def dtype(self): + return self._dtype + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy: bool = False): + if dtype: + assert dtype == "boolean" + values, mask = coerce_to_array(scalars, copy=copy) + return BooleanArray(values, mask) + + @classmethod + def _from_sequence_of_strings( + cls, strings: List[str], dtype=None, copy: bool = False + ): + def map_string(s): + if isna(s): + return s + elif s in ["True", "TRUE", "true"]: + return True + elif s in ["False", "FALSE", "false"]: + return False + else: + raise ValueError(f"{s} cannot be cast to bool") + + scalars = [map_string(x) for x in strings] + return cls._from_sequence(scalars, dtype, copy) + + def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: + data = self._data.astype("int8") + data[self._mask] = -1 + return data, -1 + + @classmethod + def _from_factorized(cls, values, original: "BooleanArray"): + return cls._from_sequence(values, dtype=original.dtype) + + _HANDLED_TYPES = (np.ndarray, numbers.Number, bool, np.bool_) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # For BooleanArray inputs, we apply the ufunc to ._data + # and mask the result. + if method == "reduce": + # Not clear how to handle missing values in reductions. Raise. + raise NotImplementedError("The 'reduce' method is not supported.") + out = kwargs.get("out", ()) + + for x in inputs + out: + if not isinstance(x, self._HANDLED_TYPES + (BooleanArray,)): + return NotImplemented + + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + mask = np.zeros(len(self), dtype=bool) + inputs2 = [] + for x in inputs: + if isinstance(x, BooleanArray): + mask |= x._mask + inputs2.append(x._data) + else: + inputs2.append(x) + + def reconstruct(x): + # we don't worry about scalar `x` here, since we + # raise for reduce up above. + + if is_bool_dtype(x.dtype): + m = mask.copy() + return BooleanArray(x, m) + else: + x[mask] = np.nan + return x + + result = getattr(ufunc, method)(*inputs2, **kwargs) + if isinstance(result, tuple): + tuple(reconstruct(x) for x in result) + else: + return reconstruct(result) + + def __setitem__(self, key, value): + _is_scalar = is_scalar(value) + if _is_scalar: + value = [value] + value, mask = coerce_to_array(value) + + if _is_scalar: + value = value[0] + mask = mask[0] + + key = check_array_indexer(self, key) + self._data[key] = value + self._mask[key] = mask + + def astype(self, dtype, copy=True): + """ + Cast to a NumPy array or ExtensionArray with 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + array : ndarray or ExtensionArray + NumPy ndarray, BooleanArray or IntergerArray with 'dtype' for its dtype. + + Raises + ------ + TypeError + if incompatible type with an BooleanDtype, equivalent of same_kind + casting + """ + dtype = pandas_dtype(dtype) + + if isinstance(dtype, BooleanDtype): + values, mask = coerce_to_array(self, copy=copy) + return BooleanArray(values, mask, copy=False) + + if is_bool_dtype(dtype): + # astype_nansafe converts np.nan to True + if self._hasna: + raise ValueError("cannot convert float NaN to bool") + else: + return self._data.astype(dtype, copy=copy) + if is_extension_array_dtype(dtype) and is_integer_dtype(dtype): + from pandas.core.arrays import IntegerArray + + return IntegerArray( + self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False + ) + # for integer, error if there are missing values + if is_integer_dtype(dtype): + if self._hasna: + raise ValueError("cannot convert NA to integer") + # for float dtype, ensure we use np.nan before casting (numpy cannot + # deal with pd.NA) + na_value = self._na_value + if is_float_dtype(dtype): + na_value = np.nan + # coerce + data = self.to_numpy(na_value=na_value) + return astype_nansafe(data, dtype, copy=False) + + def _values_for_argsort(self) -> np.ndarray: + """ + Return values for sorting. + + Returns + ------- + ndarray + The transformed values should maintain the ordering between values + within the array. + + See Also + -------- + ExtensionArray.argsort + """ + data = self._data.copy() + data[self._mask] = -1 + return data + + def any(self, skipna: bool = True, **kwargs): + """ + Return whether any element is True. + + Returns False unless there is at least one element that is True. + By default, NAs are skipped. If ``skipna=False`` is specified and + missing values are present, similar :ref:`Kleene logic ` + is used as for logical operations. + + Parameters + ---------- + skipna : bool, default True + Exclude NA values. If the entire array is NA and `skipna` is + True, then the result will be False, as for an empty array. + If `skipna` is False, the result will still be True if there is + at least one element that is True, otherwise NA will be returned + if there are NA's present. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + bool or :attr:`pandas.NA` + + See Also + -------- + numpy.any : Numpy version of this method. + BooleanArray.all : Return whether all elements are True. + + Examples + -------- + + The result indicates whether any element is True (and by default + skips NAs): + + >>> pd.array([True, False, True]).any() + True + >>> pd.array([True, False, pd.NA]).any() + True + >>> pd.array([False, False, pd.NA]).any() + False + >>> pd.array([], dtype="boolean").any() + False + >>> pd.array([pd.NA], dtype="boolean").any() + False + + With ``skipna=False``, the result can be NA if this is logically + required (whether ``pd.NA`` is True or False influences the result): + + >>> pd.array([True, False, pd.NA]).any(skipna=False) + True + >>> pd.array([False, False, pd.NA]).any(skipna=False) + + """ + kwargs.pop("axis", None) + nv.validate_any((), kwargs) + + values = self._data.copy() + np.putmask(values, self._mask, False) + result = values.any() + if skipna: + return result + else: + if result or len(self) == 0: + return result + else: + return self.dtype.na_value + + def all(self, skipna: bool = True, **kwargs): + """ + Return whether all elements are True. + + Returns True unless there is at least one element that is False. + By default, NAs are skipped. If ``skipna=False`` is specified and + missing values are present, similar :ref:`Kleene logic ` + is used as for logical operations. + + Parameters + ---------- + skipna : bool, default True + Exclude NA values. If the entire array is NA and `skipna` is + True, then the result will be True, as for an empty array. + If `skipna` is False, the result will still be False if there is + at least one element that is False, otherwise NA will be returned + if there are NA's present. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + bool or :attr:`pandas.NA` + + See Also + -------- + numpy.all : Numpy version of this method. + BooleanArray.any : Return whether any element is True. + + Examples + -------- + + The result indicates whether any element is True (and by default + skips NAs): + + >>> pd.array([True, True, pd.NA]).all() + True + >>> pd.array([True, False, pd.NA]).all() + False + >>> pd.array([], dtype="boolean").all() + True + >>> pd.array([pd.NA], dtype="boolean").all() + True + + With ``skipna=False``, the result can be NA if this is logically + required (whether ``pd.NA`` is True or False influences the result): + + >>> pd.array([True, True, pd.NA]).all(skipna=False) + + >>> pd.array([True, False, pd.NA]).all(skipna=False) + False + """ + kwargs.pop("axis", None) + nv.validate_all((), kwargs) + + values = self._data.copy() + np.putmask(values, self._mask, True) + result = values.all() + + if skipna: + return result + else: + if not result or len(self) == 0: + return result + else: + return self.dtype.na_value + + @classmethod + def _create_logical_method(cls, op): + def logical_method(self, other): + if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)): + # Rely on pandas to unbox and dispatch to us. + return NotImplemented + + assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"} + other = lib.item_from_zerodim(other) + other_is_booleanarray = isinstance(other, BooleanArray) + other_is_scalar = lib.is_scalar(other) + mask = None + + if other_is_booleanarray: + other, mask = other._data, other._mask + elif is_list_like(other): + other = np.asarray(other, dtype="bool") + if other.ndim > 1: + raise NotImplementedError( + "can only perform ops with 1-d structures" + ) + other, mask = coerce_to_array(other, copy=False) + elif isinstance(other, np.bool_): + other = other.item() + + if other_is_scalar and not (other is libmissing.NA or lib.is_bool(other)): + raise TypeError( + "'other' should be pandas.NA or a bool. " + f"Got {type(other).__name__} instead." + ) + + if not other_is_scalar and len(self) != len(other): + raise ValueError("Lengths must match to compare") + + if op.__name__ in {"or_", "ror_"}: + result, mask = ops.kleene_or(self._data, other, self._mask, mask) + elif op.__name__ in {"and_", "rand_"}: + result, mask = ops.kleene_and(self._data, other, self._mask, mask) + elif op.__name__ in {"xor", "rxor"}: + result, mask = ops.kleene_xor(self._data, other, self._mask, mask) + + return BooleanArray(result, mask) + + name = f"__{op.__name__}__" + return set_function_name(logical_method, name, cls) + + @classmethod + def _create_comparison_method(cls, op): + def cmp_method(self, other): + from pandas.arrays import IntegerArray + + if isinstance( + other, (ABCDataFrame, ABCSeries, ABCIndexClass, IntegerArray) + ): + # Rely on pandas to unbox and dispatch to us. + return NotImplemented + + other = lib.item_from_zerodim(other) + mask = None + + if isinstance(other, BooleanArray): + other, mask = other._data, other._mask + + elif is_list_like(other): + other = np.asarray(other) + if other.ndim > 1: + raise NotImplementedError( + "can only perform ops with 1-d structures" + ) + if len(self) != len(other): + raise ValueError("Lengths must match to compare") + + if other is libmissing.NA: + # numpy does not handle pd.NA well as "other" scalar (it returns + # a scalar False instead of an array) + result = np.zeros_like(self._data) + mask = np.ones_like(self._data) + else: + # numpy will show a DeprecationWarning on invalid elementwise + # comparisons, this will raise in the future + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "elementwise", FutureWarning) + with np.errstate(all="ignore"): + result = op(self._data, other) + + # nans propagate + if mask is None: + mask = self._mask.copy() + else: + mask = self._mask | mask + + return BooleanArray(result, mask, copy=False) + + name = f"__{op.__name__}" + return set_function_name(cmp_method, name, cls) + + def _reduce(self, name, skipna=True, **kwargs): + + if name in {"any", "all"}: + return getattr(self, name)(skipna=skipna, **kwargs) + + data = self._data + mask = self._mask + + # coerce to a nan-aware float if needed + if self._hasna: + data = self.to_numpy("float64", na_value=np.nan) + + op = getattr(nanops, "nan" + name) + result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) + + if np.isnan(result): + return libmissing.NA + + # if we have numeric op that would result in an int, coerce to int if possible + if name in ["sum", "prod"] and notna(result): + int_result = np.int64(result) + if int_result == result: + result = int_result + + elif name in ["min", "max"] and notna(result): + result = np.bool_(result) + + return result + + def _maybe_mask_result(self, result, mask, other, op_name): + """ + Parameters + ---------- + result : array-like + mask : array-like bool + other : scalar or array-like + op_name : str + """ + # if we have a float operand we are by-definition + # a float result + # or our op is a divide + if (is_float_dtype(other) or is_float(other)) or ( + op_name in ["rtruediv", "truediv"] + ): + result[mask] = np.nan + return result + + if is_bool_dtype(result): + return BooleanArray(result, mask, copy=False) + + elif is_integer_dtype(result): + from pandas.core.arrays import IntegerArray + + return IntegerArray(result, mask, copy=False) + else: + result[mask] = np.nan + return result + + @classmethod + def _create_arithmetic_method(cls, op): + op_name = op.__name__ + + def boolean_arithmetic_method(self, other): + + if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)): + # Rely on pandas to unbox and dispatch to us. + return NotImplemented + + other = lib.item_from_zerodim(other) + mask = None + + if isinstance(other, BooleanArray): + other, mask = other._data, other._mask + + elif is_list_like(other): + other = np.asarray(other) + if other.ndim > 1: + raise NotImplementedError( + "can only perform ops with 1-d structures" + ) + if len(self) != len(other): + raise ValueError("Lengths must match") + + # nans propagate + if mask is None: + mask = self._mask + else: + mask = self._mask | mask + + with np.errstate(all="ignore"): + result = op(self._data, other) + + # divmod returns a tuple + if op_name == "divmod": + div, mod = result + return ( + self._maybe_mask_result(div, mask, other, "floordiv"), + self._maybe_mask_result(mod, mask, other, "mod"), + ) + + return self._maybe_mask_result(result, mask, other, op_name) + + name = f"__{op_name}__" + return set_function_name(boolean_arithmetic_method, name, cls) + + +BooleanArray._add_logical_ops() +BooleanArray._add_comparison_ops() +BooleanArray._add_arithmetic_ops() diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py new file mode 100644 index 00000000..c7f99866 --- /dev/null +++ b/pandas/core/arrays/categorical.py @@ -0,0 +1,2712 @@ +import operator +from shutil import get_terminal_size +from typing import Dict, Hashable, List, Type, Union, cast +from warnings import warn + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import algos as libalgos, hashtable as htable +from pandas._typing import ArrayLike, Dtype, Ordered, Scalar +from pandas.compat.numpy import function as nv +from pandas.util._decorators import ( + Appender, + Substitution, + cache_readonly, + deprecate_kwarg, +) +from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs + +from pandas.core.dtypes.cast import coerce_indexer_dtype, maybe_infer_to_datetimelike +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_object, + ensure_platform_int, + is_categorical_dtype, + is_datetime64_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_integer_dtype, + is_iterator, + is_list_like, + is_object_dtype, + is_scalar, + is_sequence, + is_timedelta64_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries +from pandas.core.dtypes.inference import is_hashable +from pandas.core.dtypes.missing import isna, notna + +from pandas.core import ops +from pandas.core.accessor import PandasDelegate, delegate_names +import pandas.core.algorithms as algorithms +from pandas.core.algorithms import _get_data_algo, factorize, take, take_1d, unique1d +from pandas.core.arrays.base import ( + ExtensionArray, + _extension_array_shared_docs, + try_cast_to_ea, +) +from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs +import pandas.core.common as com +from pandas.core.construction import array, extract_array, sanitize_array +from pandas.core.indexers import check_array_indexer, deprecate_ndim_indexing +from pandas.core.missing import interpolate_2d +from pandas.core.ops.common import unpack_zerodim_and_defer +from pandas.core.sorting import nargsort + +from pandas.io.formats import console + + +def _cat_compare_op(op): + opname = f"__{op.__name__}__" + + @unpack_zerodim_and_defer(opname) + def func(self, other): + if is_list_like(other) and len(other) != len(self): + # TODO: Could this fail if the categories are listlike objects? + raise ValueError("Lengths must match.") + + if not self.ordered: + if opname in ["__lt__", "__gt__", "__le__", "__ge__"]: + raise TypeError( + "Unordered Categoricals can only compare equality or not" + ) + if isinstance(other, Categorical): + # Two Categoricals can only be be compared if the categories are + # the same (maybe up to ordering, depending on ordered) + + msg = "Categoricals can only be compared if 'categories' are the same." + if len(self.categories) != len(other.categories): + raise TypeError(msg + " Categories are different lengths") + elif self.ordered and not (self.categories == other.categories).all(): + raise TypeError(msg) + elif not set(self.categories) == set(other.categories): + raise TypeError(msg) + + if not (self.ordered == other.ordered): + raise TypeError( + "Categoricals can only be compared if 'ordered' is the same" + ) + if not self.ordered and not self.categories.equals(other.categories): + # both unordered and different order + other_codes = _get_codes_for_values(other, self.categories) + else: + other_codes = other._codes + + f = getattr(self._codes, opname) + ret = f(other_codes) + mask = (self._codes == -1) | (other_codes == -1) + if mask.any(): + # In other series, the leads to False, so do that here too + ret[mask] = False + return ret + + if is_scalar(other): + if other in self.categories: + i = self.categories.get_loc(other) + ret = getattr(self._codes, opname)(i) + + if opname not in {"__eq__", "__ge__", "__gt__"}: + # check for NaN needed if we are not equal or larger + mask = self._codes == -1 + ret[mask] = False + return ret + else: + if opname == "__eq__": + return np.zeros(len(self), dtype=bool) + elif opname == "__ne__": + return np.ones(len(self), dtype=bool) + else: + raise TypeError( + f"Cannot compare a Categorical for op {opname} with a " + "scalar, which is not a category." + ) + else: + + # allow categorical vs object dtype array comparisons for equality + # these are only positional comparisons + if opname in ["__eq__", "__ne__"]: + return getattr(np.array(self), opname)(np.array(other)) + + raise TypeError( + f"Cannot compare a Categorical for op {opname} with " + f"type {type(other)}.\nIf you want to compare values, " + "use 'np.asarray(cat) other'." + ) + + func.__name__ = opname + + return func + + +def contains(cat, key, container): + """ + Helper for membership check for ``key`` in ``cat``. + + This is a helper method for :method:`__contains__` + and :class:`CategoricalIndex.__contains__`. + + Returns True if ``key`` is in ``cat.categories`` and the + location of ``key`` in ``categories`` is in ``container``. + + Parameters + ---------- + cat : :class:`Categorical`or :class:`categoricalIndex` + key : a hashable object + The key to check membership for. + container : Container (e.g. list-like or mapping) + The container to check for membership in. + + Returns + ------- + is_in : bool + True if ``key`` is in ``self.categories`` and location of + ``key`` in ``categories`` is in ``container``, else False. + + Notes + ----- + This method does not check for NaN values. Do that separately + before calling this method. + """ + hash(key) + + # get location of key in categories. + # If a KeyError, the key isn't in categories, so logically + # can't be in container either. + try: + loc = cat.categories.get_loc(key) + except (KeyError, TypeError): + return False + + # loc is the location of key in categories, but also the *value* + # for key in container. So, `key` may be in categories, + # but still not in `container`. Example ('b' in categories, + # but not in values): + # 'b' in Categorical(['a'], categories=['a', 'b']) # False + if is_scalar(loc): + return loc in container + else: + # if categories is an IntervalIndex, loc is an array. + return any(loc_ in container for loc_ in loc) + + +_codes_doc = """ +The category codes of this categorical. + +Level codes are an array if integer which are the positions of the real +values in the categories array. + +There is not setter, use the other categorical methods and the normal item +setter to change values in the categorical. +""" + + +class Categorical(ExtensionArray, PandasObject): + """ + Represent a categorical variable in classic R / S-plus fashion. + + `Categoricals` can only take on only a limited, and usually fixed, number + of possible values (`categories`). In contrast to statistical categorical + variables, a `Categorical` might have an order, but numerical operations + (additions, divisions, ...) are not possible. + + All values of the `Categorical` are either in `categories` or `np.nan`. + Assigning values outside of `categories` will raise a `ValueError`. Order + is defined by the order of the `categories`, not lexical order of the + values. + + Parameters + ---------- + values : list-like + The values of the categorical. If categories are given, values not in + categories will be replaced with NaN. + categories : Index-like (unique), optional + The unique categories for this categorical. If not given, the + categories are assumed to be the unique values of `values` (sorted, if + possible, otherwise in the order in which they appear). + ordered : bool, default False + Whether or not this categorical is treated as a ordered categorical. + If True, the resulting categorical will be ordered. + An ordered categorical respects, when sorted, the order of its + `categories` attribute (which in turn is the `categories` argument, if + provided). + dtype : CategoricalDtype + An instance of ``CategoricalDtype`` to use for this categorical. + + .. versionadded:: 0.21.0 + + Attributes + ---------- + categories : Index + The categories of this categorical + codes : ndarray + The codes (integer positions, which point to the categories) of this + categorical, read only. + ordered : bool + Whether or not this Categorical is ordered. + dtype : CategoricalDtype + The instance of ``CategoricalDtype`` storing the ``categories`` + and ``ordered``. + + .. versionadded:: 0.21.0 + + Methods + ------- + from_codes + __array__ + + Raises + ------ + ValueError + If the categories do not validate. + TypeError + If an explicit ``ordered=True`` is given but no `categories` and the + `values` are not sortable. + + See Also + -------- + CategoricalDtype : Type for categorical data. + CategoricalIndex : An Index with an underlying ``Categorical``. + + Notes + ----- + See the `user guide + `_ + for more. + + Examples + -------- + >>> pd.Categorical([1, 2, 3, 1, 2, 3]) + [1, 2, 3, 1, 2, 3] + Categories (3, int64): [1, 2, 3] + + >>> pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']) + [a, b, c, a, b, c] + Categories (3, object): [a, b, c] + + Ordered `Categoricals` can be sorted according to the custom order + of the categories and can have a min and max value. + + >>> c = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'], ordered=True, + ... categories=['c', 'b', 'a']) + >>> c + [a, b, c, a, b, c] + Categories (3, object): [c < b < a] + >>> c.min() + 'c' + """ + + # For comparisons, so that numpy uses our implementation if the compare + # ops, which raise + __array_priority__ = 1000 + _dtype = CategoricalDtype(ordered=False) + # tolist is not actually deprecated, just suppressed in the __dir__ + _deprecations = PandasObject._deprecations | frozenset(["tolist"]) + _typ = "categorical" + + def __init__( + self, values, categories=None, ordered=None, dtype=None, fastpath=False + ): + + dtype = CategoricalDtype._from_values_or_dtype( + values, categories, ordered, dtype + ) + # At this point, dtype is always a CategoricalDtype, but + # we may have dtype.categories be None, and we need to + # infer categories in a factorization step further below + + if fastpath: + self._codes = coerce_indexer_dtype(values, dtype.categories) + self._dtype = self._dtype.update_dtype(dtype) + return + + # null_mask indicates missing values we want to exclude from inference. + # This means: only missing values in list-likes (not arrays/ndframes). + null_mask = np.array(False) + + # sanitize input + if is_categorical_dtype(values): + if dtype.categories is None: + dtype = CategoricalDtype(values.categories, dtype.ordered) + elif not isinstance(values, (ABCIndexClass, ABCSeries)): + # sanitize_array coerces np.nan to a string under certain versions + # of numpy + values = maybe_infer_to_datetimelike(values, convert_dates=True) + if not isinstance(values, np.ndarray): + values = _convert_to_list_like(values) + + # By convention, empty lists result in object dtype: + if len(values) == 0: + sanitize_dtype = "object" + else: + sanitize_dtype = None + null_mask = isna(values) + if null_mask.any(): + values = [values[idx] for idx in np.where(~null_mask)[0]] + values = sanitize_array(values, None, dtype=sanitize_dtype) + + if dtype.categories is None: + try: + codes, categories = factorize(values, sort=True) + except TypeError: + codes, categories = factorize(values, sort=False) + if dtype.ordered: + # raise, as we don't have a sortable data structure and so + # the user should give us one by specifying categories + raise TypeError( + "'values' is not ordered, please " + "explicitly specify the categories order " + "by passing in a categories argument." + ) + except ValueError: + + # FIXME + raise NotImplementedError( + "> 1 ndim Categorical are not supported at this time" + ) + + # we're inferring from values + dtype = CategoricalDtype(categories, dtype.ordered) + + elif is_categorical_dtype(values): + old_codes = ( + values._values.codes if isinstance(values, ABCSeries) else values.codes + ) + codes = _recode_for_categories( + old_codes, values.dtype.categories, dtype.categories + ) + + else: + codes = _get_codes_for_values(values, dtype.categories) + + if null_mask.any(): + # Reinsert -1 placeholders for previously removed missing values + full_codes = -np.ones(null_mask.shape, dtype=codes.dtype) + full_codes[~null_mask] = codes + codes = full_codes + + self._dtype = self._dtype.update_dtype(dtype) + self._codes = coerce_indexer_dtype(codes, dtype.categories) + + @property + def categories(self): + """ + The categories of this categorical. + + Setting assigns new values to each category (effectively a rename of + each individual category). + + The assigned value has to be a list-like object. All items must be + unique and the number of items in the new categories must be the same + as the number of items in the old categories. + + Assigning to `categories` is a inplace operation! + + Raises + ------ + ValueError + If the new categories do not validate as categories or if the + number of new categories is unequal the number of old categories + + See Also + -------- + rename_categories + reorder_categories + add_categories + remove_categories + remove_unused_categories + set_categories + """ + return self.dtype.categories + + @categories.setter + def categories(self, categories): + new_dtype = CategoricalDtype(categories, ordered=self.ordered) + if self.dtype.categories is not None and len(self.dtype.categories) != len( + new_dtype.categories + ): + raise ValueError( + "new categories need to have the same number of " + "items as the old categories!" + ) + self._dtype = new_dtype + + @property + def ordered(self) -> Ordered: + """ + Whether the categories have an ordered relationship. + """ + return self.dtype.ordered + + @property + def dtype(self) -> CategoricalDtype: + """ + The :class:`~pandas.api.types.CategoricalDtype` for this instance. + """ + return self._dtype + + @property + def _ndarray_values(self) -> np.ndarray: + return self.codes + + @property + def _constructor(self) -> Type["Categorical"]: + return Categorical + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return Categorical(scalars, dtype=dtype) + + def _formatter(self, boxed=False): + # Defer to CategoricalFormatter's formatter. + return None + + def copy(self) -> "Categorical": + """ + Copy constructor. + """ + return self._constructor( + values=self._codes.copy(), dtype=self.dtype, fastpath=True + ) + + def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike: + """ + Coerce this type to another dtype + + Parameters + ---------- + dtype : numpy dtype or pandas type + copy : bool, default True + By default, astype always returns a newly allocated object. + If copy is set to False and dtype is categorical, the original + object is returned. + """ + if is_categorical_dtype(dtype): + dtype = cast(Union[str, CategoricalDtype], dtype) + + # GH 10696/18593 + dtype = self.dtype.update_dtype(dtype) + self = self.copy() if copy else self + if dtype == self.dtype: + return self + return self._set_dtype(dtype) + if is_extension_array_dtype(dtype): + return array(self, dtype=dtype, copy=copy) # type: ignore # GH 28770 + if is_integer_dtype(dtype) and self.isna().any(): + raise ValueError("Cannot convert float NaN to integer") + return np.array(self, dtype=dtype, copy=copy) + + @cache_readonly + def size(self) -> int: + """ + Return the len of myself. + """ + return self._codes.size + + @cache_readonly + def itemsize(self) -> int: + """ + return the size of a single category + """ + return self.categories.itemsize + + def tolist(self) -> List[Scalar]: + """ + Return a list of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) + """ + return list(self) + + to_list = tolist + + @classmethod + def _from_inferred_categories( + cls, inferred_categories, inferred_codes, dtype, true_values=None + ): + """ + Construct a Categorical from inferred values. + + For inferred categories (`dtype` is None) the categories are sorted. + For explicit `dtype`, the `inferred_categories` are cast to the + appropriate type. + + Parameters + ---------- + inferred_categories : Index + inferred_codes : Index + dtype : CategoricalDtype or 'category' + true_values : list, optional + If none are provided, the default ones are + "True", "TRUE", and "true." + + Returns + ------- + Categorical + """ + from pandas import Index, to_numeric, to_datetime, to_timedelta + + cats = Index(inferred_categories) + known_categories = ( + isinstance(dtype, CategoricalDtype) and dtype.categories is not None + ) + + if known_categories: + # Convert to a specialized type with `dtype` if specified. + if dtype.categories.is_numeric(): + cats = to_numeric(inferred_categories, errors="coerce") + elif is_datetime64_dtype(dtype.categories): + cats = to_datetime(inferred_categories, errors="coerce") + elif is_timedelta64_dtype(dtype.categories): + cats = to_timedelta(inferred_categories, errors="coerce") + elif dtype.categories.is_boolean(): + if true_values is None: + true_values = ["True", "TRUE", "true"] + + cats = cats.isin(true_values) + + if known_categories: + # Recode from observation order to dtype.categories order. + categories = dtype.categories + codes = _recode_for_categories(inferred_codes, cats, categories) + elif not cats.is_monotonic_increasing: + # Sort categories and recode for unknown categories. + unsorted = cats.copy() + categories = cats.sort_values() + + codes = _recode_for_categories(inferred_codes, unsorted, categories) + dtype = CategoricalDtype(categories, ordered=False) + else: + dtype = CategoricalDtype(cats, ordered=False) + codes = inferred_codes + + return cls(codes, dtype=dtype, fastpath=True) + + @classmethod + def from_codes(cls, codes, categories=None, ordered=None, dtype=None): + """ + Make a Categorical type from codes and categories or dtype. + + This constructor is useful if you already have codes and + categories/dtype and so do not need the (computation intensive) + factorization step, which is usually done on the constructor. + + If your data does not follow this convention, please use the normal + constructor. + + Parameters + ---------- + codes : array-like of int + An integer array, where each integer points to a category in + categories or dtype.categories, or else is -1 for NaN. + categories : index-like, optional + The categories for the categorical. Items need to be unique. + If the categories are not given here, then they must be provided + in `dtype`. + ordered : bool, optional + Whether or not this categorical is treated as an ordered + categorical. If not given here or in `dtype`, the resulting + categorical will be unordered. + dtype : CategoricalDtype or "category", optional + If :class:`CategoricalDtype`, cannot be used together with + `categories` or `ordered`. + + .. versionadded:: 0.24.0 + + When `dtype` is provided, neither `categories` nor `ordered` + should be provided. + + Returns + ------- + Categorical + + Examples + -------- + >>> dtype = pd.CategoricalDtype(['a', 'b'], ordered=True) + >>> pd.Categorical.from_codes(codes=[0, 1, 0, 1], dtype=dtype) + [a, b, a, b] + Categories (2, object): [a < b] + """ + dtype = CategoricalDtype._from_values_or_dtype( + categories=categories, ordered=ordered, dtype=dtype + ) + if dtype.categories is None: + msg = ( + "The categories must be provided in 'categories' or " + "'dtype'. Both were None." + ) + raise ValueError(msg) + + if is_extension_array_dtype(codes) and is_integer_dtype(codes): + # Avoid the implicit conversion of Int to object + if isna(codes).any(): + raise ValueError("codes cannot contain NA values") + codes = codes.to_numpy(dtype=np.int64) + else: + codes = np.asarray(codes) + if len(codes) and not is_integer_dtype(codes): + raise ValueError("codes need to be array-like integers") + + if len(codes) and (codes.max() >= len(dtype.categories) or codes.min() < -1): + raise ValueError("codes need to be between -1 and len(categories)-1") + + return cls(codes, dtype=dtype, fastpath=True) + + def _get_codes(self): + """ + Get the codes. + + Returns + ------- + codes : integer array view + A non writable view of the `codes` array. + """ + v = self._codes.view() + v.flags.writeable = False + return v + + def _set_codes(self, codes): + """ + Not settable by the user directly + """ + raise ValueError("cannot set Categorical codes directly") + + codes = property(fget=_get_codes, fset=_set_codes, doc=_codes_doc) + + def _set_categories(self, categories, fastpath=False): + """ + Sets new categories inplace + + Parameters + ---------- + fastpath : bool, default False + Don't perform validation of the categories for uniqueness or nulls + + Examples + -------- + >>> c = pd.Categorical(['a', 'b']) + >>> c + [a, b] + Categories (2, object): [a, b] + + >>> c._set_categories(pd.Index(['a', 'c'])) + >>> c + [a, c] + Categories (2, object): [a, c] + """ + + if fastpath: + new_dtype = CategoricalDtype._from_fastpath(categories, self.ordered) + else: + new_dtype = CategoricalDtype(categories, ordered=self.ordered) + if ( + not fastpath + and self.dtype.categories is not None + and len(new_dtype.categories) != len(self.dtype.categories) + ): + raise ValueError( + "new categories need to have the same number of " + "items than the old categories!" + ) + + self._dtype = new_dtype + + def _set_dtype(self, dtype: CategoricalDtype) -> "Categorical": + """ + Internal method for directly updating the CategoricalDtype + + Parameters + ---------- + dtype : CategoricalDtype + + Notes + ----- + We don't do any validation here. It's assumed that the dtype is + a (valid) instance of `CategoricalDtype`. + """ + codes = _recode_for_categories(self.codes, self.categories, dtype.categories) + return type(self)(codes, dtype=dtype, fastpath=True) + + def set_ordered(self, value, inplace=False): + """ + Set the ordered attribute to the boolean value. + + Parameters + ---------- + value : bool + Set whether this categorical is ordered (True) or not (False). + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to the value. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + new_dtype = CategoricalDtype(self.categories, ordered=value) + cat = self if inplace else self.copy() + cat._dtype = new_dtype + if not inplace: + return cat + + def as_ordered(self, inplace=False): + """ + Set the Categorical to be ordered. + + Parameters + ---------- + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to True. + + Returns + ------- + Categorical + Ordered Categorical. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + return self.set_ordered(True, inplace=inplace) + + def as_unordered(self, inplace=False): + """ + Set the Categorical to be unordered. + + Parameters + ---------- + inplace : bool, default False + Whether or not to set the ordered attribute in-place or return + a copy of this categorical with ordered set to False. + + Returns + ------- + Categorical + Unordered Categorical. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + return self.set_ordered(False, inplace=inplace) + + def set_categories(self, new_categories, ordered=None, rename=False, inplace=False): + """ + Set the categories to the specified new_categories. + + `new_categories` can include new categories (which will result in + unused categories) or remove old categories (which results in values + set to NaN). If `rename==True`, the categories will simple be renamed + (less or more items than in old categories will result in values set to + NaN or in unused categories respectively). + + This method can be used to perform more than one action of adding, + removing, and reordering simultaneously and is therefore faster than + performing the individual steps via the more specialised methods. + + On the other hand this methods does not do checks (e.g., whether the + old categories are included in the new categories on a reorder), which + can result in surprising changes, for example when using special string + dtypes, which does not considers a S1 string equal to a single char + python string. + + Parameters + ---------- + new_categories : Index-like + The categories in new order. + ordered : bool, default False + Whether or not the categorical is treated as a ordered categorical. + If not given, do not change the ordered information. + rename : bool, default False + Whether or not the new_categories should be considered as a rename + of the old categories or as reordered categories. + inplace : bool, default False + Whether or not to reorder the categories in-place or return a copy + of this categorical with reordered categories. + + Returns + ------- + Categorical with reordered categories or None if inplace. + + Raises + ------ + ValueError + If new_categories does not validate as categories + + See Also + -------- + rename_categories + reorder_categories + add_categories + remove_categories + remove_unused_categories + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if ordered is None: + ordered = self.dtype.ordered + new_dtype = CategoricalDtype(new_categories, ordered=ordered) + + cat = self if inplace else self.copy() + if rename: + if cat.dtype.categories is not None and len(new_dtype.categories) < len( + cat.dtype.categories + ): + # remove all _codes which are larger and set to -1/NaN + cat._codes[cat._codes >= len(new_dtype.categories)] = -1 + else: + codes = _recode_for_categories( + cat.codes, cat.categories, new_dtype.categories + ) + cat._codes = codes + cat._dtype = new_dtype + + if not inplace: + return cat + + def rename_categories(self, new_categories, inplace=False): + """ + Rename categories. + + Parameters + ---------- + new_categories : list-like, dict-like or callable + + New categories which will replace old categories. + + * list-like: all items must be unique and the number of items in + the new categories must match the existing number of categories. + + * dict-like: specifies a mapping from + old categories to new. Categories not contained in the mapping + are passed through and extra categories in the mapping are + ignored. + + .. versionadded:: 0.21.0. + + * callable : a callable that is called on all items in the old + categories and whose return values comprise the new categories. + + .. versionadded:: 0.23.0. + + inplace : bool, default False + Whether or not to rename the categories inplace or return a copy of + this categorical with renamed categories. + + Returns + ------- + cat : Categorical or None + With ``inplace=False``, the new categorical is returned. + With ``inplace=True``, there is no return value. + + Raises + ------ + ValueError + If new categories are list-like and do not have the same number of + items than the current categories or do not validate as categories + + See Also + -------- + reorder_categories + add_categories + remove_categories + remove_unused_categories + set_categories + + Examples + -------- + >>> c = pd.Categorical(['a', 'a', 'b']) + >>> c.rename_categories([0, 1]) + [0, 0, 1] + Categories (2, int64): [0, 1] + + For dict-like ``new_categories``, extra keys are ignored and + categories not in the dictionary are passed through + + >>> c.rename_categories({'a': 'A', 'c': 'C'}) + [A, A, b] + Categories (2, object): [A, b] + + You may also provide a callable to create the new categories + + >>> c.rename_categories(lambda x: x.upper()) + [A, A, B] + Categories (2, object): [A, B] + """ + inplace = validate_bool_kwarg(inplace, "inplace") + cat = self if inplace else self.copy() + + if is_dict_like(new_categories): + cat.categories = [new_categories.get(item, item) for item in cat.categories] + elif callable(new_categories): + cat.categories = [new_categories(item) for item in cat.categories] + else: + cat.categories = new_categories + if not inplace: + return cat + + def reorder_categories(self, new_categories, ordered=None, inplace=False): + """ + Reorder categories as specified in new_categories. + + `new_categories` need to include all old categories and no new category + items. + + Parameters + ---------- + new_categories : Index-like + The categories in new order. + ordered : bool, optional + Whether or not the categorical is treated as a ordered categorical. + If not given, do not change the ordered information. + inplace : bool, default False + Whether or not to reorder the categories inplace or return a copy of + this categorical with reordered categories. + + Returns + ------- + cat : Categorical with reordered categories or None if inplace. + + Raises + ------ + ValueError + If the new categories do not contain all old category items or any + new ones + + See Also + -------- + rename_categories + add_categories + remove_categories + remove_unused_categories + set_categories + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if set(self.dtype.categories) != set(new_categories): + raise ValueError( + "items in new_categories are not the same as in old categories" + ) + return self.set_categories(new_categories, ordered=ordered, inplace=inplace) + + def add_categories(self, new_categories, inplace=False): + """ + Add new categories. + + `new_categories` will be included at the last/highest place in the + categories and will be unused directly after this call. + + Parameters + ---------- + new_categories : category or list-like of category + The new categories to be included. + inplace : bool, default False + Whether or not to add the categories inplace or return a copy of + this categorical with added categories. + + Returns + ------- + cat : Categorical with new categories added or None if inplace. + + Raises + ------ + ValueError + If the new categories include old categories or do not validate as + categories + + See Also + -------- + rename_categories + reorder_categories + remove_categories + remove_unused_categories + set_categories + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if not is_list_like(new_categories): + new_categories = [new_categories] + already_included = set(new_categories) & set(self.dtype.categories) + if len(already_included) != 0: + raise ValueError( + f"new categories must not include old categories: {already_included}" + ) + new_categories = list(self.dtype.categories) + list(new_categories) + new_dtype = CategoricalDtype(new_categories, self.ordered) + + cat = self if inplace else self.copy() + cat._dtype = new_dtype + cat._codes = coerce_indexer_dtype(cat._codes, new_dtype.categories) + if not inplace: + return cat + + def remove_categories(self, removals, inplace=False): + """ + Remove the specified categories. + + `removals` must be included in the old categories. Values which were in + the removed categories will be set to NaN + + Parameters + ---------- + removals : category or list of categories + The categories which should be removed. + inplace : bool, default False + Whether or not to remove the categories inplace or return a copy of + this categorical with removed categories. + + Returns + ------- + cat : Categorical with removed categories or None if inplace. + + Raises + ------ + ValueError + If the removals are not contained in the categories + + See Also + -------- + rename_categories + reorder_categories + add_categories + remove_unused_categories + set_categories + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if not is_list_like(removals): + removals = [removals] + + removal_set = set(removals) + not_included = removal_set - set(self.dtype.categories) + new_categories = [c for c in self.dtype.categories if c not in removal_set] + + # GH 10156 + if any(isna(removals)): + not_included = {x for x in not_included if notna(x)} + new_categories = [x for x in new_categories if notna(x)] + + if len(not_included) != 0: + raise ValueError(f"removals must all be in old categories: {not_included}") + + return self.set_categories( + new_categories, ordered=self.ordered, rename=False, inplace=inplace + ) + + def remove_unused_categories(self, inplace=False): + """ + Remove categories which are not used. + + Parameters + ---------- + inplace : bool, default False + Whether or not to drop unused categories inplace or return a copy of + this categorical with unused categories dropped. + + Returns + ------- + cat : Categorical with unused categories dropped or None if inplace. + + See Also + -------- + rename_categories + reorder_categories + add_categories + remove_categories + set_categories + """ + inplace = validate_bool_kwarg(inplace, "inplace") + cat = self if inplace else self.copy() + idx, inv = np.unique(cat._codes, return_inverse=True) + + if idx.size != 0 and idx[0] == -1: # na sentinel + idx, inv = idx[1:], inv - 1 + + new_categories = cat.dtype.categories.take(idx) + new_dtype = CategoricalDtype._from_fastpath( + new_categories, ordered=self.ordered + ) + cat._dtype = new_dtype + cat._codes = coerce_indexer_dtype(inv, new_dtype.categories) + + if not inplace: + return cat + + def map(self, mapper): + """ + Map categories using input correspondence (dict, Series, or function). + + Maps the categories to new categories. If the mapping correspondence is + one-to-one the result is a :class:`~pandas.Categorical` which has the + same order property as the original, otherwise a :class:`~pandas.Index` + is returned. NaN values are unaffected. + + If a `dict` or :class:`~pandas.Series` is used any unmapped category is + mapped to `NaN`. Note that if this happens an :class:`~pandas.Index` + will be returned. + + Parameters + ---------- + mapper : function, dict, or Series + Mapping correspondence. + + Returns + ------- + pandas.Categorical or pandas.Index + Mapped categorical. + + See Also + -------- + CategoricalIndex.map : Apply a mapping correspondence on a + :class:`~pandas.CategoricalIndex`. + Index.map : Apply a mapping correspondence on an + :class:`~pandas.Index`. + Series.map : Apply a mapping correspondence on a + :class:`~pandas.Series`. + Series.apply : Apply more complex functions on a + :class:`~pandas.Series`. + + Examples + -------- + >>> cat = pd.Categorical(['a', 'b', 'c']) + >>> cat + [a, b, c] + Categories (3, object): [a, b, c] + >>> cat.map(lambda x: x.upper()) + [A, B, C] + Categories (3, object): [A, B, C] + >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}) + [first, second, third] + Categories (3, object): [first, second, third] + + If the mapping is one-to-one the ordering of the categories is + preserved: + + >>> cat = pd.Categorical(['a', 'b', 'c'], ordered=True) + >>> cat + [a, b, c] + Categories (3, object): [a < b < c] + >>> cat.map({'a': 3, 'b': 2, 'c': 1}) + [3, 2, 1] + Categories (3, int64): [3 < 2 < 1] + + If the mapping is not one-to-one an :class:`~pandas.Index` is returned: + + >>> cat.map({'a': 'first', 'b': 'second', 'c': 'first'}) + Index(['first', 'second', 'first'], dtype='object') + + If a `dict` is used, all unmapped categories are mapped to `NaN` and + the result is an :class:`~pandas.Index`: + + >>> cat.map({'a': 'first', 'b': 'second'}) + Index(['first', 'second', nan], dtype='object') + """ + new_categories = self.categories.map(mapper) + try: + return self.from_codes( + self._codes.copy(), categories=new_categories, ordered=self.ordered + ) + except ValueError: + # NA values are represented in self._codes with -1 + # np.take causes NA values to take final element in new_categories + if np.any(self._codes == -1): + new_categories = new_categories.insert(len(new_categories), np.nan) + return np.take(new_categories, self._codes) + + __eq__ = _cat_compare_op(operator.eq) + __ne__ = _cat_compare_op(operator.ne) + __lt__ = _cat_compare_op(operator.lt) + __gt__ = _cat_compare_op(operator.gt) + __le__ = _cat_compare_op(operator.le) + __ge__ = _cat_compare_op(operator.ge) + + # for Series/ndarray like compat + @property + def shape(self): + """ + Shape of the Categorical. + + For internal compatibility with numpy arrays. + + Returns + ------- + shape : tuple + """ + + return tuple([len(self._codes)]) + + def shift(self, periods, fill_value=None): + """ + Shift Categorical by desired number of periods. + + Parameters + ---------- + periods : int + Number of periods to move, can be positive or negative + fill_value : object, optional + The scalar value to use for newly introduced missing values. + + .. versionadded:: 0.24.0 + + Returns + ------- + shifted : Categorical + """ + # since categoricals always have ndim == 1, an axis parameter + # doesn't make any sense here. + codes = self.codes + if codes.ndim > 1: + raise NotImplementedError("Categorical with ndim > 1.") + if np.prod(codes.shape) and (periods != 0): + codes = np.roll(codes, ensure_platform_int(periods), axis=0) + if isna(fill_value): + fill_value = -1 + elif fill_value in self.categories: + fill_value = self.categories.get_loc(fill_value) + else: + raise ValueError( + f"'fill_value={fill_value}' is not present " + "in this Categorical's categories" + ) + if periods > 0: + codes[:periods] = fill_value + else: + codes[periods:] = fill_value + + return self.from_codes(codes, dtype=self.dtype) + + def __array__(self, dtype=None) -> np.ndarray: + """ + The numpy array interface. + + Returns + ------- + numpy.array + A numpy array of either the specified dtype or, + if dtype==None (default), the same dtype as + categorical.categories.dtype. + """ + ret = take_1d(self.categories.values, self._codes) + if dtype and not is_dtype_equal(dtype, self.categories.dtype): + return np.asarray(ret, dtype) + if is_extension_array_dtype(ret): + # When we're a Categorical[ExtensionArray], like Interval, + # we need to ensure __array__ get's all the way to an + # ndarray. + ret = np.asarray(ret) + return ret + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + # for all other cases, raise for now (similarly as what happens in + # Series.__array_prepare__) + raise TypeError( + f"Object with dtype {self.dtype} cannot perform " + f"the numpy op {ufunc.__name__}" + ) + + def __setstate__(self, state): + """Necessary for making this object picklable""" + if not isinstance(state, dict): + raise Exception("invalid pickle state") + + # compat with pre 0.21.0 CategoricalDtype change + if "_dtype" not in state: + state["_dtype"] = CategoricalDtype(state["_categories"], state["_ordered"]) + + for k, v in state.items(): + setattr(self, k, v) + + @property + def T(self): + """ + Return transposed numpy array. + """ + return self + + @property + def nbytes(self): + return self._codes.nbytes + self.dtype.categories.values.nbytes + + def memory_usage(self, deep=False): + """ + Memory usage of my values + + Parameters + ---------- + deep : bool + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption + + Returns + ------- + bytes used + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False + + See Also + -------- + numpy.ndarray.nbytes + """ + return self._codes.nbytes + self.dtype.categories.memory_usage(deep=deep) + + @Substitution(klass="Categorical") + @Appender(_shared_docs["searchsorted"]) + def searchsorted(self, value, side="left", sorter=None): + # searchsorted is very performance sensitive. By converting codes + # to same dtype as self.codes, we get much faster performance. + if is_scalar(value): + codes = self.categories.get_loc(value) + codes = self.codes.dtype.type(codes) + else: + locs = [self.categories.get_loc(x) for x in value] + codes = np.array(locs, dtype=self.codes.dtype) + return self.codes.searchsorted(codes, side=side, sorter=sorter) + + def isna(self): + """ + Detect missing values + + Missing values (-1 in .codes) are detected. + + Returns + ------- + a boolean array of whether my values are null + + See Also + -------- + isna : Top-level isna. + isnull : Alias of isna. + Categorical.notna : Boolean inverse of Categorical.isna. + + """ + + ret = self._codes == -1 + return ret + + isnull = isna + + def notna(self): + """ + Inverse of isna + + Both missing values (-1 in .codes) and NA as a category are detected as + null. + + Returns + ------- + a boolean array of whether my values are not null + + See Also + -------- + notna : Top-level notna. + notnull : Alias of notna. + Categorical.isna : Boolean inverse of Categorical.notna. + + """ + return ~self.isna() + + notnull = notna + + def put(self, *args, **kwargs): + """ + Replace specific elements in the Categorical with given values. + """ + raise NotImplementedError(("'put' is not yet implemented for Categorical")) + + def dropna(self): + """ + Return the Categorical without null values. + + Missing values (-1 in .codes) are detected. + + Returns + ------- + valid : Categorical + """ + result = self[self.notna()] + + return result + + def value_counts(self, dropna=True): + """ + Return a Series containing counts of each category. + + Every category will have an entry, even those with a count of 0. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of NaN. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + """ + from pandas import Series, CategoricalIndex + + code, cat = self._codes, self.categories + ncat, mask = len(cat), 0 <= code + ix, clean = np.arange(ncat), mask.all() + + if dropna or clean: + obs = code if clean else code[mask] + count = np.bincount(obs, minlength=ncat or 0) + else: + count = np.bincount(np.where(mask, code, ncat)) + ix = np.append(ix, -1) + + ix = self._constructor(ix, dtype=self.dtype, fastpath=True) + + return Series(count, index=CategoricalIndex(ix), dtype="int64") + + def _internal_get_values(self): + """ + Return the values. + + For internal compatibility with pandas formatting. + + Returns + ------- + np.ndarray or Index + A numpy array of the same dtype as categorical.categories.dtype or + Index if datetime / periods. + """ + # if we are a datetime and period index, return Index to keep metadata + if needs_i8_conversion(self.categories): + return self.categories.take(self._codes, fill_value=np.nan) + elif is_integer_dtype(self.categories) and -1 in self._codes: + return self.categories.astype("object").take(self._codes, fill_value=np.nan) + return np.array(self) + + def check_for_ordered(self, op): + """ assert that we are ordered """ + if not self.ordered: + raise TypeError( + f"Categorical is not ordered for operation {op}\n" + "you can use .as_ordered() to change the " + "Categorical to an ordered one\n" + ) + + def _values_for_argsort(self): + return self._codes.copy() + + def argsort(self, ascending=True, kind="quicksort", *args, **kwargs): + """ + Return the indices that would sort the Categorical. + + .. versionchanged:: 0.25.0 + + Changed to sort missing values at the end. + + Parameters + ---------- + ascending : bool, default True + Whether the indices should result in an ascending + or descending sort. + kind : {'quicksort', 'mergesort', 'heapsort'}, optional + Sorting algorithm. + *args, **kwargs: + passed through to :func:`numpy.argsort`. + + Returns + ------- + numpy.array + + See Also + -------- + numpy.ndarray.argsort + + Notes + ----- + While an ordering is applied to the category values, arg-sorting + in this context refers more to organizing and grouping together + based on matching category values. Thus, this function can be + called on an unordered Categorical instance unlike the functions + 'Categorical.min' and 'Categorical.max'. + + Examples + -------- + >>> pd.Categorical(['b', 'b', 'a', 'c']).argsort() + array([2, 0, 1, 3]) + + >>> cat = pd.Categorical(['b', 'b', 'a', 'c'], + ... categories=['c', 'b', 'a'], + ... ordered=True) + >>> cat.argsort() + array([3, 0, 1, 2]) + + Missing values are placed at the end + + >>> cat = pd.Categorical([2, None, 1]) + >>> cat.argsort() + array([2, 0, 1]) + """ + return super().argsort(ascending=ascending, kind=kind, *args, **kwargs) + + def sort_values(self, inplace=False, ascending=True, na_position="last"): + """ + Sort the Categorical by category value returning a new + Categorical by default. + + While an ordering is applied to the category values, sorting in this + context refers more to organizing and grouping together based on + matching category values. Thus, this function can be called on an + unordered Categorical instance unlike the functions 'Categorical.min' + and 'Categorical.max'. + + Parameters + ---------- + inplace : bool, default False + Do operation in place. + ascending : bool, default True + Order ascending. Passing False orders descending. The + ordering parameter provides the method by which the + category values are organized. + na_position : {'first', 'last'} (optional, default='last') + 'first' puts NaNs at the beginning + 'last' puts NaNs at the end + + Returns + ------- + Categorical or None + + See Also + -------- + Categorical.sort + Series.sort_values + + Examples + -------- + >>> c = pd.Categorical([1, 2, 2, 1, 5]) + >>> c + [1, 2, 2, 1, 5] + Categories (3, int64): [1, 2, 5] + >>> c.sort_values() + [1, 1, 2, 2, 5] + Categories (3, int64): [1, 2, 5] + >>> c.sort_values(ascending=False) + [5, 2, 2, 1, 1] + Categories (3, int64): [1, 2, 5] + + Inplace sorting can be done as well: + + >>> c.sort_values(inplace=True) + >>> c + [1, 1, 2, 2, 5] + Categories (3, int64): [1, 2, 5] + >>> + >>> c = pd.Categorical([1, 2, 2, 1, 5]) + + 'sort_values' behaviour with NaNs. Note that 'na_position' + is independent of the 'ascending' parameter: + + >>> c = pd.Categorical([np.nan, 2, 2, np.nan, 5]) + >>> c + [NaN, 2.0, 2.0, NaN, 5.0] + Categories (2, int64): [2, 5] + >>> c.sort_values() + [2.0, 2.0, 5.0, NaN, NaN] + Categories (2, int64): [2, 5] + >>> c.sort_values(ascending=False) + [5.0, 2.0, 2.0, NaN, NaN] + Categories (2, int64): [2, 5] + >>> c.sort_values(na_position='first') + [NaN, NaN, 2.0, 2.0, 5.0] + Categories (2, int64): [2, 5] + >>> c.sort_values(ascending=False, na_position='first') + [NaN, NaN, 5.0, 2.0, 2.0] + Categories (2, int64): [2, 5] + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if na_position not in ["last", "first"]: + raise ValueError(f"invalid na_position: {repr(na_position)}") + + sorted_idx = nargsort(self, ascending=ascending, na_position=na_position) + + if inplace: + self._codes = self._codes[sorted_idx] + else: + return self._constructor( + values=self._codes[sorted_idx], dtype=self.dtype, fastpath=True + ) + + def _values_for_rank(self): + """ + For correctly ranking ordered categorical data. See GH#15420 + + Ordered categorical data should be ranked on the basis of + codes with -1 translated to NaN. + + Returns + ------- + numpy.array + + """ + from pandas import Series + + if self.ordered: + values = self.codes + mask = values == -1 + if mask.any(): + values = values.astype("float64") + values[mask] = np.nan + elif self.categories.is_numeric(): + values = np.array(self) + else: + # reorder the categories (so rank can use the float codes) + # instead of passing an object array to rank + values = np.array( + self.rename_categories(Series(self.categories).rank().values) + ) + return values + + def view(self, dtype=None): + if dtype is not None: + raise NotImplementedError(dtype) + return self._constructor(values=self._codes, dtype=self.dtype, fastpath=True) + + def to_dense(self): + """ + Return my 'dense' representation + + For internal compatibility with numpy arrays. + + Returns + ------- + dense : array + """ + return np.asarray(self) + + def fillna(self, value=None, method=None, limit=None): + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + value : scalar, dict, Series + If a scalar value is passed it is used to fill all missing values. + Alternatively, a Series or dict can be used to fill in different + values for each index. The value should not be a list. The + value(s) passed should either be in the categories or should be + NaN. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap + limit : int, default None + (Not implemented yet for Categorical!) + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. + + Returns + ------- + filled : Categorical with NA/NaN filled + """ + value, method = validate_fillna_kwargs( + value, method, validate_scalar_dict_value=False + ) + + if value is None: + value = np.nan + if limit is not None: + raise NotImplementedError( + "specifying a limit for fillna has not been implemented yet" + ) + + codes = self._codes + + # pad / bfill + if method is not None: + + values = self.to_dense().reshape(-1, len(self)) + values = interpolate_2d(values, method, 0, None, value).astype( + self.categories.dtype + )[0] + codes = _get_codes_for_values(values, self.categories) + + else: + + # If value is a dict or a Series (a dict value has already + # been converted to a Series) + if isinstance(value, ABCSeries): + if not value[~value.isin(self.categories)].isna().all(): + raise ValueError("fill value must be in categories") + + values_codes = _get_codes_for_values(value, self.categories) + indexer = np.where(codes == -1) + codes[indexer] = values_codes[indexer] + + # If value is not a dict or Series it should be a scalar + elif is_hashable(value): + if not isna(value) and value not in self.categories: + raise ValueError("fill value must be in categories") + + mask = codes == -1 + if mask.any(): + codes = codes.copy() + if isna(value): + codes[mask] = -1 + else: + codes[mask] = self.categories.get_loc(value) + + else: + raise TypeError( + f"'value' parameter must be a scalar, dict " + f"or Series, but you passed a {type(value).__name__}" + ) + + return self._constructor(codes, dtype=self.dtype, fastpath=True) + + def take(self, indexer, allow_fill: bool = False, fill_value=None): + """ + Take elements from the Categorical. + + Parameters + ---------- + indexer : sequence of int + The indices in `self` to take. The meaning of negative values in + `indexer` depends on the value of `allow_fill`. + allow_fill : bool, default False + How to handle negative values in `indexer`. + + * False: negative values in `indices` indicate positional indices + from the right. This is similar to + :func:`numpy.take`. + + * True: negative values in `indices` indicate missing values + (the default). These values are set to `fill_value`. Any other + other negative values raise a ``ValueError``. + + .. versionchanged:: 1.0.0 + + Default value changed from ``True`` to ``False``. + + fill_value : object + The value to use for `indices` that are missing (-1), when + ``allow_fill=True``. This should be the category, i.e. a value + in ``self.categories``, not a code. + + Returns + ------- + Categorical + This Categorical will have the same categories and ordered as + `self`. + + See Also + -------- + Series.take : Similar method for Series. + numpy.ndarray.take : Similar method for NumPy arrays. + + Examples + -------- + >>> cat = pd.Categorical(['a', 'a', 'b']) + >>> cat + [a, a, b] + Categories (2, object): [a, b] + + Specify ``allow_fill==False`` to have negative indices mean indexing + from the right. + + >>> cat.take([0, -1, -2], allow_fill=False) + [a, b, a] + Categories (2, object): [a, b] + + With ``allow_fill=True``, indices equal to ``-1`` mean "missing" + values that should be filled with the `fill_value`, which is + ``np.nan`` by default. + + >>> cat.take([0, -1, -1], allow_fill=True) + [a, NaN, NaN] + Categories (2, object): [a, b] + + The fill value can be specified. + + >>> cat.take([0, -1, -1], allow_fill=True, fill_value='a') + [a, a, a] + Categories (3, object): [a, b] + + Specifying a fill value that's not in ``self.categories`` + will raise a ``TypeError``. + """ + indexer = np.asarray(indexer, dtype=np.intp) + + dtype = self.dtype + + if isna(fill_value): + fill_value = -1 + elif allow_fill: + # convert user-provided `fill_value` to codes + if fill_value in self.categories: + fill_value = self.categories.get_loc(fill_value) + else: + msg = ( + f"'fill_value' ('{fill_value}') is not in this " + "Categorical's categories." + ) + raise TypeError(msg) + + codes = take(self._codes, indexer, allow_fill=allow_fill, fill_value=fill_value) + result = type(self).from_codes(codes, dtype=dtype) + return result + + def take_nd(self, indexer, allow_fill: bool = False, fill_value=None): + # GH#27745 deprecate alias that other EAs dont have + warn( + "Categorical.take_nd is deprecated, use Categorical.take instead", + FutureWarning, + stacklevel=2, + ) + return self.take(indexer, allow_fill=allow_fill, fill_value=fill_value) + + def __len__(self) -> int: + """ + The length of this Categorical. + """ + return len(self._codes) + + def __iter__(self): + """ + Returns an Iterator over the values of this Categorical. + """ + return iter(self._internal_get_values().tolist()) + + def __contains__(self, key) -> bool: + """ + Returns True if `key` is in this Categorical. + """ + # if key is a NaN, check if any NaN is in self. + if is_scalar(key) and isna(key): + return self.isna().any() + + return contains(self, key, container=self._codes) + + def _tidy_repr(self, max_vals=10, footer=True) -> str: + """ a short repr displaying only max_vals and an optional (but default + footer) + """ + num = max_vals // 2 + head = self[:num]._get_repr(length=False, footer=False) + tail = self[-(max_vals - num) :]._get_repr(length=False, footer=False) + + result = f"{head[:-1]}, ..., {tail[1:]}" + if footer: + result = f"{result}\n{self._repr_footer()}" + + return str(result) + + def _repr_categories(self): + """ + return the base repr for the categories + """ + max_categories = ( + 10 + if get_option("display.max_categories") == 0 + else get_option("display.max_categories") + ) + from pandas.io.formats import format as fmt + + if len(self.categories) > max_categories: + num = max_categories // 2 + head = fmt.format_array(self.categories[:num], None) + tail = fmt.format_array(self.categories[-num:], None) + category_strs = head + ["..."] + tail + else: + category_strs = fmt.format_array(self.categories, None) + + # Strip all leading spaces, which format_array adds for columns... + category_strs = [x.strip() for x in category_strs] + return category_strs + + def _repr_categories_info(self) -> str: + """ + Returns a string representation of the footer. + """ + + category_strs = self._repr_categories() + dtype = str(self.categories.dtype) + levheader = f"Categories ({len(self.categories)}, {dtype}): " + width, height = get_terminal_size() + max_width = get_option("display.width") or width + if console.in_ipython_frontend(): + # 0 = no breaks + max_width = 0 + levstring = "" + start = True + cur_col_len = len(levheader) # header + sep_len, sep = (3, " < ") if self.ordered else (2, ", ") + linesep = sep.rstrip() + "\n" # remove whitespace + for val in category_strs: + if max_width != 0 and cur_col_len + sep_len + len(val) > max_width: + levstring += linesep + (" " * (len(levheader) + 1)) + cur_col_len = len(levheader) + 1 # header + a whitespace + elif not start: + levstring += sep + cur_col_len += len(val) + levstring += val + start = False + # replace to simple save space by + return levheader + "[" + levstring.replace(" < ... < ", " ... ") + "]" + + def _repr_footer(self) -> str: + info = self._repr_categories_info() + return f"Length: {len(self)}\n{info}" + + def _get_repr(self, length=True, na_rep="NaN", footer=True) -> str: + from pandas.io.formats import format as fmt + + formatter = fmt.CategoricalFormatter( + self, length=length, na_rep=na_rep, footer=footer + ) + result = formatter.to_string() + return str(result) + + def __repr__(self) -> str: + """ + String representation. + """ + _maxlen = 10 + if len(self._codes) > _maxlen: + result = self._tidy_repr(_maxlen) + elif len(self._codes) > 0: + result = self._get_repr(length=len(self) > _maxlen) + else: + msg = self._get_repr(length=False, footer=True).replace("\n", ", ") + result = f"[], {msg}" + + return result + + def _maybe_coerce_indexer(self, indexer): + """ + return an indexer coerced to the codes dtype + """ + if isinstance(indexer, np.ndarray) and indexer.dtype.kind == "i": + indexer = indexer.astype(self._codes.dtype) + return indexer + + def __getitem__(self, key): + """ + Return an item. + """ + if isinstance(key, (int, np.integer)): + i = self._codes[key] + if i == -1: + return np.nan + else: + return self.categories[i] + + key = check_array_indexer(self, key) + + result = self._codes[key] + if result.ndim > 1: + deprecate_ndim_indexing(result) + return result + return self._constructor(result, dtype=self.dtype, fastpath=True) + + def __setitem__(self, key, value): + """ + Item assignment. + + Raises + ------ + ValueError + If (one or more) Value is not in categories or if a assigned + `Categorical` does not have the same categories + """ + value = extract_array(value, extract_numpy=True) + + # require identical categories set + if isinstance(value, Categorical): + if not is_dtype_equal(self, value): + raise ValueError( + "Cannot set a Categorical with another, " + "without identical categories" + ) + if not self.categories.equals(value.categories): + new_codes = _recode_for_categories( + value.codes, value.categories, self.categories + ) + value = Categorical.from_codes(new_codes, dtype=self.dtype) + + rvalue = value if is_list_like(value) else [value] + + from pandas import Index + + to_add = Index(rvalue).difference(self.categories) + + # no assignments of values not in categories, but it's always ok to set + # something to np.nan + if len(to_add) and not isna(to_add).all(): + raise ValueError( + "Cannot setitem on a Categorical with a new " + "category, set the categories first" + ) + + # set by position + if isinstance(key, (int, np.integer)): + pass + + # tuple of indexers (dataframe) + elif isinstance(key, tuple): + # only allow 1 dimensional slicing, but can + # in a 2-d case be passd (slice(None),....) + if len(key) == 2: + if not com.is_null_slice(key[0]): + raise AssertionError("invalid slicing for a 1-ndim categorical") + key = key[1] + elif len(key) == 1: + key = key[0] + else: + raise AssertionError("invalid slicing for a 1-ndim categorical") + + # slicing in Series or Categorical + elif isinstance(key, slice): + pass + + # else: array of True/False in Series or Categorical + + lindexer = self.categories.get_indexer(rvalue) + lindexer = self._maybe_coerce_indexer(lindexer) + + key = check_array_indexer(self, key) + self._codes[key] = lindexer + + def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]: + """ + Compute the inverse of a categorical, returning + a dict of categories -> indexers. + + *This is an internal function* + + Returns + ------- + dict of categories -> indexers + + Examples + -------- + >>> c = pd.Categorical(list('aabca')) + >>> c + [a, a, b, c, a] + Categories (3, object): [a, b, c] + >>> c.categories + Index(['a', 'b', 'c'], dtype='object') + >>> c.codes + array([0, 0, 1, 2, 0], dtype=int8) + >>> c._reverse_indexer() + {'a': array([0, 1, 4]), 'b': array([2]), 'c': array([3])} + + """ + categories = self.categories + r, counts = libalgos.groupsort_indexer( + self.codes.astype("int64"), categories.size + ) + counts = counts.cumsum() + _result = (r[start:end] for start, end in zip(counts, counts[1:])) + result = dict(zip(categories, _result)) + return result + + # reduction ops # + def _reduce(self, name, axis=0, **kwargs): + func = getattr(self, name, None) + if func is None: + raise TypeError(f"Categorical cannot perform the operation {name}") + return func(**kwargs) + + @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") + def min(self, skipna=True, **kwargs): + """ + The minimum value of the object. + + Only ordered `Categoricals` have a minimum! + + .. versionchanged:: 1.0.0 + + Returns an NA value on empty arrays + + Raises + ------ + TypeError + If the `Categorical` is not `ordered`. + + Returns + ------- + min : the minimum of this `Categorical` + """ + nv.validate_min((), kwargs) + self.check_for_ordered("min") + + if not len(self._codes): + return self.dtype.na_value + + good = self._codes != -1 + if not good.all(): + if skipna and good.any(): + pointer = self._codes[good].min() + else: + return np.nan + else: + pointer = self._codes.min() + return self.categories[pointer] + + @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") + def max(self, skipna=True, **kwargs): + """ + The maximum value of the object. + + Only ordered `Categoricals` have a maximum! + + .. versionchanged:: 1.0.0 + + Returns an NA value on empty arrays + + Raises + ------ + TypeError + If the `Categorical` is not `ordered`. + + Returns + ------- + max : the maximum of this `Categorical` + """ + nv.validate_max((), kwargs) + self.check_for_ordered("max") + + if not len(self._codes): + return self.dtype.na_value + + good = self._codes != -1 + if not good.all(): + if skipna and good.any(): + pointer = self._codes[good].max() + else: + return np.nan + else: + pointer = self._codes.max() + return self.categories[pointer] + + def mode(self, dropna=True): + """ + Returns the mode(s) of the Categorical. + + Always returns `Categorical` even if only one value. + + Parameters + ---------- + dropna : bool, default True + Don't consider counts of NaN/NaT. + + .. versionadded:: 0.24.0 + + Returns + ------- + modes : `Categorical` (sorted) + """ + codes = self._codes + if dropna: + good = self._codes != -1 + codes = self._codes[good] + codes = sorted(htable.mode_int64(ensure_int64(codes), dropna)) + return self._constructor(values=codes, dtype=self.dtype, fastpath=True) + + def unique(self): + """ + Return the ``Categorical`` which ``categories`` and ``codes`` are + unique. Unused categories are NOT returned. + + - unordered category: values and categories are sorted by appearance + order. + - ordered category: values are sorted by appearance order, categories + keeps existing order. + + Returns + ------- + unique values : ``Categorical`` + + Examples + -------- + An unordered Categorical will return categories in the + order of appearance. + + >>> pd.Categorical(list('baabc')) + [b, a, c] + Categories (3, object): [b, a, c] + + >>> pd.Categorical(list('baabc'), categories=list('abc')) + [b, a, c] + Categories (3, object): [b, a, c] + + An ordered Categorical preserves the category ordering. + + >>> pd.Categorical(list('baabc'), + ... categories=list('abc'), + ... ordered=True) + [b, a, c] + Categories (3, object): [a < b < c] + + See Also + -------- + unique + CategoricalIndex.unique + Series.unique + + """ + + # unlike np.unique, unique1d does not sort + unique_codes = unique1d(self.codes) + cat = self.copy() + + # keep nan in codes + cat._codes = unique_codes + + # exclude nan from indexer for categories + take_codes = unique_codes[unique_codes != -1] + if self.ordered: + take_codes = np.sort(take_codes) + return cat.set_categories(cat.categories.take(take_codes)) + + def _values_for_factorize(self): + codes = self.codes.astype("int64") + return codes, -1 + + @classmethod + def _from_factorized(cls, uniques, original): + return original._constructor( + original.categories.take(uniques), dtype=original.dtype + ) + + def equals(self, other): + """ + Returns True if categorical arrays are equal. + + Parameters + ---------- + other : `Categorical` + + Returns + ------- + bool + """ + if self.is_dtype_equal(other): + if self.categories.equals(other.categories): + # fastpath to avoid re-coding + other_codes = other._codes + else: + other_codes = _recode_for_categories( + other.codes, other.categories, self.categories + ) + return np.array_equal(self._codes, other_codes) + return False + + def is_dtype_equal(self, other): + """ + Returns True if categoricals are the same dtype + same categories, and same ordered + + Parameters + ---------- + other : Categorical + + Returns + ------- + bool + """ + + try: + return hash(self.dtype) == hash(other.dtype) + except (AttributeError, TypeError): + return False + + def describe(self): + """ + Describes this Categorical + + Returns + ------- + description: `DataFrame` + A dataframe with frequency and counts by category. + """ + counts = self.value_counts(dropna=False) + freqs = counts / float(counts.sum()) + + from pandas.core.reshape.concat import concat + + result = concat([counts, freqs], axis=1) + result.columns = ["counts", "freqs"] + result.index.name = "categories" + + return result + + @Substitution(klass="Categorical") + @Appender(_extension_array_shared_docs["repeat"]) + def repeat(self, repeats, axis=None): + nv.validate_repeat(tuple(), dict(axis=axis)) + codes = self._codes.repeat(repeats) + return self._constructor(values=codes, dtype=self.dtype, fastpath=True) + + # Implement the ExtensionArray interface + @property + def _can_hold_na(self): + return True + + @classmethod + def _concat_same_type(self, to_concat): + from pandas.core.dtypes.concat import concat_categorical + + return concat_categorical(to_concat) + + def isin(self, values): + """ + Check whether `values` are contained in Categorical. + + Return a boolean NumPy Array showing whether each element in + the Categorical matches an element in the passed sequence of + `values` exactly. + + Parameters + ---------- + values : set or list-like + The sequence of values to test. Passing in a single string will + raise a ``TypeError``. Instead, turn a single string into a + list of one element. + + Returns + ------- + isin : numpy.ndarray (bool dtype) + + Raises + ------ + TypeError + * If `values` is not a set or list-like + + See Also + -------- + pandas.Series.isin : Equivalent method on Series. + + Examples + -------- + + >>> s = pd.Categorical(['lama', 'cow', 'lama', 'beetle', 'lama', + ... 'hippo']) + >>> s.isin(['cow', 'lama']) + array([ True, True, True, False, True, False]) + + Passing a single string as ``s.isin('lama')`` will raise an error. Use + a list of one element instead: + + >>> s.isin(['lama']) + array([ True, False, True, False, True, False]) + """ + if not is_list_like(values): + values_type = type(values).__name__ + raise TypeError( + "only list-like objects are allowed to be passed" + f" to isin(), you passed a [{values_type}]" + ) + values = sanitize_array(values, None, None) + null_mask = np.asarray(isna(values)) + code_values = self.categories.get_indexer(values) + code_values = code_values[null_mask | (code_values >= 0)] + return algorithms.isin(self.codes, code_values) + + def replace(self, to_replace, value, inplace: bool = False): + """ + Replaces all instances of one value with another + + Parameters + ---------- + to_replace: object + The value to be replaced + + value: object + The value to replace it with + + inplace: bool + Whether the operation is done in-place + + Returns + ------- + None if inplace is True, otherwise the new Categorical after replacement + + + Examples + -------- + >>> s = pd.Categorical([1, 2, 1, 3]) + >>> s.replace(1, 3) + [3, 3, 2, 3] + Categories (2, int64): [2, 3] + """ + inplace = validate_bool_kwarg(inplace, "inplace") + cat = self if inplace else self.copy() + + # build a dict of (to replace -> value) pairs + if is_list_like(to_replace): + # if to_replace is list-like and value is scalar + replace_dict = {replace_value: value for replace_value in to_replace} + else: + # if both to_replace and value are scalar + replace_dict = {to_replace: value} + + # other cases, like if both to_replace and value are list-like or if + # to_replace is a dict, are handled separately in NDFrame + for replace_value, new_value in replace_dict.items(): + if new_value == replace_value: + continue + if replace_value in cat.categories: + if isna(new_value): + cat.remove_categories(replace_value, inplace=True) + continue + categories = cat.categories.tolist() + index = categories.index(replace_value) + if new_value in cat.categories: + value_index = categories.index(new_value) + cat._codes[cat._codes == index] = value_index + cat.remove_categories(replace_value, inplace=True) + else: + categories[index] = new_value + cat.rename_categories(categories, inplace=True) + if not inplace: + return cat + + +# The Series.cat accessor + + +@delegate_names( + delegate=Categorical, accessors=["categories", "ordered"], typ="property" +) +@delegate_names( + delegate=Categorical, + accessors=[ + "rename_categories", + "reorder_categories", + "add_categories", + "remove_categories", + "remove_unused_categories", + "set_categories", + "as_ordered", + "as_unordered", + ], + typ="method", +) +class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): + """ + Accessor object for categorical properties of the Series values. + + Be aware that assigning to `categories` is a inplace operation, while all + methods return new categorical data per default (but can be called with + `inplace=True`). + + Parameters + ---------- + data : Series or CategoricalIndex + + Examples + -------- + >>> s.cat.categories + >>> s.cat.categories = list('abc') + >>> s.cat.rename_categories(list('cab')) + >>> s.cat.reorder_categories(list('cab')) + >>> s.cat.add_categories(['d','e']) + >>> s.cat.remove_categories(['d']) + >>> s.cat.remove_unused_categories() + >>> s.cat.set_categories(list('abcde')) + >>> s.cat.as_ordered() + >>> s.cat.as_unordered() + """ + + _deprecations = PandasObject._deprecations | frozenset( + ["categorical", "index", "name"] + ) + + def __init__(self, data): + self._validate(data) + self._parent = data.values + self._index = data.index + self._name = data.name + self._freeze() + + @staticmethod + def _validate(data): + if not is_categorical_dtype(data.dtype): + raise AttributeError("Can only use .cat accessor with a 'category' dtype") + + def _delegate_property_get(self, name): + return getattr(self._parent, name) + + def _delegate_property_set(self, name, new_values): + return setattr(self._parent, name, new_values) + + @property + def codes(self): + """ + Return Series of codes as well as the index. + """ + from pandas import Series + + return Series(self._parent.codes, index=self._index) + + def _delegate_method(self, name, *args, **kwargs): + from pandas import Series + + method = getattr(self._parent, name) + res = method(*args, **kwargs) + if res is not None: + return Series(res, index=self._index, name=self._name) + + +# utility routines + + +def _get_codes_for_values(values, categories): + """ + utility routine to turn values into codes given the specified categories + """ + dtype_equal = is_dtype_equal(values.dtype, categories.dtype) + + if dtype_equal: + # To prevent erroneous dtype coercion in _get_data_algo, retrieve + # the underlying numpy array. gh-22702 + values = getattr(values, "_ndarray_values", values) + categories = getattr(categories, "_ndarray_values", categories) + elif is_extension_array_dtype(categories.dtype) and is_object_dtype(values): + # Support inferring the correct extension dtype from an array of + # scalar objects. e.g. + # Categorical(array[Period, Period], categories=PeriodIndex(...)) + cls = categories.dtype.construct_array_type() + values = try_cast_to_ea(cls, values) + if not isinstance(values, cls): + # exception raised in _from_sequence + values = ensure_object(values) + categories = ensure_object(categories) + else: + values = ensure_object(values) + categories = ensure_object(categories) + + hash_klass, vals = _get_data_algo(values) + _, cats = _get_data_algo(categories) + t = hash_klass(len(cats)) + t.map_locations(cats) + return coerce_indexer_dtype(t.lookup(vals), cats) + + +def _recode_for_categories(codes: np.ndarray, old_categories, new_categories): + """ + Convert a set of codes for to a new set of categories + + Parameters + ---------- + codes : np.ndarray + old_categories, new_categories : Index + + Returns + ------- + new_codes : np.ndarray[np.int64] + + Examples + -------- + >>> old_cat = pd.Index(['b', 'a', 'c']) + >>> new_cat = pd.Index(['a', 'b']) + >>> codes = np.array([0, 1, 1, 2]) + >>> _recode_for_categories(codes, old_cat, new_cat) + array([ 1, 0, 0, -1]) + """ + if len(old_categories) == 0: + # All null anyway, so just retain the nulls + return codes.copy() + elif new_categories.equals(old_categories): + # Same categories, so no need to actually recode + return codes.copy() + indexer = coerce_indexer_dtype( + new_categories.get_indexer(old_categories), new_categories + ) + new_codes = take_1d(indexer, codes.copy(), fill_value=-1) + return new_codes + + +def _convert_to_list_like(list_like): + if hasattr(list_like, "dtype"): + return list_like + if isinstance(list_like, list): + return list_like + if is_sequence(list_like) or isinstance(list_like, tuple) or is_iterator(list_like): + return list(list_like) + elif is_scalar(list_like): + return [list_like] + else: + # TODO: is this reached? + return [list_like] + + +def factorize_from_iterable(values): + """ + Factorize an input `values` into `categories` and `codes`. Preserves + categorical dtype in `categories`. + + *This is an internal function* + + Parameters + ---------- + values : list-like + + Returns + ------- + codes : ndarray + categories : Index + If `values` has a categorical dtype, then `categories` is + a CategoricalIndex keeping the categories and order of `values`. + """ + if not is_list_like(values): + raise TypeError("Input must be list-like") + + if is_categorical_dtype(values): + values = extract_array(values) + # The Categorical we want to build has the same categories + # as values but its codes are by def [0, ..., len(n_categories) - 1] + cat_codes = np.arange(len(values.categories), dtype=values.codes.dtype) + categories = Categorical.from_codes(cat_codes, dtype=values.dtype) + codes = values.codes + else: + # The value of ordered is irrelevant since we don't use cat as such, + # but only the resulting categories, the order of which is independent + # from ordered. Set ordered to False as default. See GH #15457 + cat = Categorical(values, ordered=False) + categories = cat.categories + codes = cat.codes + return codes, categories + + +def factorize_from_iterables(iterables): + """ + A higher-level wrapper over `factorize_from_iterable`. + + *This is an internal function* + + Parameters + ---------- + iterables : list-like of list-likes + + Returns + ------- + codes_list : list of ndarrays + categories_list : list of Indexes + + Notes + ----- + See `factorize_from_iterable` for more info. + """ + if len(iterables) == 0: + # For consistency, it should return a list of 2 lists. + return [[], []] + return map(list, zip(*(factorize_from_iterable(it) for it in iterables))) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py new file mode 100644 index 00000000..3d39d851 --- /dev/null +++ b/pandas/core/arrays/datetimelike.py @@ -0,0 +1,1714 @@ +from datetime import datetime, timedelta +import operator +from typing import Any, Sequence, Type, Union, cast +import warnings + +import numpy as np + +from pandas._libs import NaT, NaTType, Timestamp, algos, iNaT, lib +from pandas._libs.tslibs.c_timestamp import integer_op_not_supported +from pandas._libs.tslibs.period import DIFFERENT_FREQ, IncompatibleFrequency, Period +from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds +from pandas._libs.tslibs.timestamps import RoundTo, round_nsint64 +from pandas._typing import DatetimeLikeScalar +from pandas.compat import set_function_name +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError, NullFrequencyError, PerformanceWarning +from pandas.util._decorators import Appender, Substitution +from pandas.util._validators import validate_fillna_kwargs + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, + is_dtype_equal, + is_float_dtype, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_period_dtype, + is_string_dtype, + is_timedelta64_dtype, + is_unsigned_integer_dtype, + pandas_dtype, +) +from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.inference import is_array_like +from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna + +from pandas.core import missing, nanops, ops +from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts +from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin +import pandas.core.common as com +from pandas.core.indexers import check_array_indexer +from pandas.core.ops.common import unpack_zerodim_and_defer +from pandas.core.ops.invalid import invalid_comparison, make_invalid_op + +from pandas.tseries import frequencies +from pandas.tseries.offsets import DateOffset, Tick + + +def _datetimelike_array_cmp(cls, op): + """ + Wrap comparison operations to convert Timestamp/Timedelta/Period-like to + boxed scalars/arrays. + """ + opname = f"__{op.__name__}__" + nat_result = opname == "__ne__" + + @unpack_zerodim_and_defer(opname) + def wrapper(self, other): + + if isinstance(other, str): + try: + # GH#18435 strings get a pass from tzawareness compat + other = self._scalar_from_string(other) + except ValueError: + # failed to parse as Timestamp/Timedelta/Period + return invalid_comparison(self, other, op) + + if isinstance(other, self._recognized_scalars) or other is NaT: + other = self._scalar_type(other) + self._check_compatible_with(other) + + other_i8 = self._unbox_scalar(other) + + result = op(self.view("i8"), other_i8) + if isna(other): + result.fill(nat_result) + + elif not is_list_like(other): + return invalid_comparison(self, other, op) + + elif len(other) != len(self): + raise ValueError("Lengths must match") + + else: + if isinstance(other, list): + # TODO: could use pd.Index to do inference? + other = np.array(other) + + if not isinstance(other, (np.ndarray, type(self))): + return invalid_comparison(self, other, op) + + if is_object_dtype(other): + # We have to use comp_method_OBJECT_ARRAY instead of numpy + # comparison otherwise it would fail to raise when + # comparing tz-aware and tz-naive + with np.errstate(all="ignore"): + result = ops.comp_method_OBJECT_ARRAY( + op, self.astype(object), other + ) + o_mask = isna(other) + + elif not type(self)._is_recognized_dtype(other.dtype): + return invalid_comparison(self, other, op) + + else: + # For PeriodDType this casting is unnecessary + other = type(self)._from_sequence(other) + self._check_compatible_with(other) + + result = op(self.view("i8"), other.view("i8")) + o_mask = other._isnan + + if o_mask.any(): + result[o_mask] = nat_result + + if self._hasnans: + result[self._isnan] = nat_result + + return result + + return set_function_name(wrapper, opname, cls) + + +class AttributesMixin: + _data: np.ndarray + + @classmethod + def _simple_new(cls, values, **kwargs): + raise AbstractMethodError(cls) + + @property + def _scalar_type(self) -> Type[DatetimeLikeScalar]: + """The scalar associated with this datelike + + * PeriodArray : Period + * DatetimeArray : Timestamp + * TimedeltaArray : Timedelta + """ + raise AbstractMethodError(self) + + def _scalar_from_string( + self, value: str + ) -> Union[Period, Timestamp, Timedelta, NaTType]: + """ + Construct a scalar type from a string. + + Parameters + ---------- + value : str + + Returns + ------- + Period, Timestamp, or Timedelta, or NaT + Whatever the type of ``self._scalar_type`` is. + + Notes + ----- + This should call ``self._check_compatible_with`` before + unboxing the result. + """ + raise AbstractMethodError(self) + + def _unbox_scalar(self, value: Union[Period, Timestamp, Timedelta, NaTType]) -> int: + """ + Unbox the integer value of a scalar `value`. + + Parameters + ---------- + value : Union[Period, Timestamp, Timedelta] + + Returns + ------- + int + + Examples + -------- + >>> self._unbox_scalar(Timedelta('10s')) # DOCTEST: +SKIP + 10000000000 + """ + raise AbstractMethodError(self) + + def _check_compatible_with( + self, other: Union[Period, Timestamp, Timedelta, NaTType], setitem: bool = False + ) -> None: + """ + Verify that `self` and `other` are compatible. + + * DatetimeArray verifies that the timezones (if any) match + * PeriodArray verifies that the freq matches + * Timedelta has no verification + + In each case, NaT is considered compatible. + + Parameters + ---------- + other + setitem : bool, default False + For __setitem__ we may have stricter compatiblity resrictions than + for comparisons. + + Raises + ------ + Exception + """ + raise AbstractMethodError(self) + + +class DatelikeOps: + """ + Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex. + """ + + @Substitution( + URL="https://docs.python.org/3/library/datetime.html" + "#strftime-and-strptime-behavior" + ) + def strftime(self, date_format): + """ + Convert to Index using specified date_format. + + Return an Index of formatted strings specified by date_format, which + supports the same string format as the python standard library. Details + of the string format can be found in `python string format + doc <%(URL)s>`__. + + Parameters + ---------- + date_format : str + Date format string (e.g. "%%Y-%%m-%%d"). + + Returns + ------- + ndarray + NumPy ndarray of formatted strings. + + See Also + -------- + to_datetime : Convert the given argument to datetime. + DatetimeIndex.normalize : Return DatetimeIndex with times to midnight. + DatetimeIndex.round : Round the DatetimeIndex to the specified freq. + DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq. + + Examples + -------- + >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"), + ... periods=3, freq='s') + >>> rng.strftime('%%B %%d, %%Y, %%r') + Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM', + 'March 10, 2018, 09:00:02 AM'], + dtype='object') + """ + result = self._format_native_types(date_format=date_format, na_rep=np.nan) + return result.astype(object) + + +class TimelikeOps: + """ + Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex. + """ + + _round_doc = """ + Perform {op} operation on the data to the specified `freq`. + + Parameters + ---------- + freq : str or Offset + The frequency level to {op} the index to. Must be a fixed + frequency like 'S' (second) not 'ME' (month end). See + :ref:`frequency aliases ` for + a list of possible `freq` values. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + Only relevant for DatetimeIndex: + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times. + + .. versionadded:: 0.24.0 + + nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, \ +default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + .. versionadded:: 0.24.0 + + Returns + ------- + DatetimeIndex, TimedeltaIndex, or Series + Index of the same type for a DatetimeIndex or TimedeltaIndex, + or a Series with the same index for a Series. + + Raises + ------ + ValueError if the `freq` cannot be converted. + + Examples + -------- + **DatetimeIndex** + + >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min') + >>> rng + DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00', + '2018-01-01 12:01:00'], + dtype='datetime64[ns]', freq='T') + """ + + _round_example = """>>> rng.round('H') + DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', + '2018-01-01 12:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.round("H") + 0 2018-01-01 12:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: datetime64[ns] + """ + + _floor_example = """>>> rng.floor('H') + DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00', + '2018-01-01 12:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.floor("H") + 0 2018-01-01 11:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 12:00:00 + dtype: datetime64[ns] + """ + + _ceil_example = """>>> rng.ceil('H') + DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00', + '2018-01-01 13:00:00'], + dtype='datetime64[ns]', freq=None) + + **Series** + + >>> pd.Series(rng).dt.ceil("H") + 0 2018-01-01 12:00:00 + 1 2018-01-01 12:00:00 + 2 2018-01-01 13:00:00 + dtype: datetime64[ns] + """ + + def _round(self, freq, mode, ambiguous, nonexistent): + # round the local times + if is_datetime64tz_dtype(self): + # operate on naive timestamps, then convert back to aware + naive = self.tz_localize(None) + result = naive._round(freq, mode, ambiguous, nonexistent) + aware = result.tz_localize( + self.tz, ambiguous=ambiguous, nonexistent=nonexistent + ) + return aware + + values = self.view("i8") + result = round_nsint64(values, mode, freq) + result = self._maybe_mask_results(result, fill_value=NaT) + return self._simple_new(result, dtype=self.dtype) + + @Appender((_round_doc + _round_example).format(op="round")) + def round(self, freq, ambiguous="raise", nonexistent="raise"): + return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent) + + @Appender((_round_doc + _floor_example).format(op="floor")) + def floor(self, freq, ambiguous="raise", nonexistent="raise"): + return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent) + + @Appender((_round_doc + _ceil_example).format(op="ceil")) + def ceil(self, freq, ambiguous="raise", nonexistent="raise"): + return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent) + + +class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin, ExtensionArray): + """ + Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray + + Assumes that __new__/__init__ defines: + _data + _freq + + and that the inheriting class has methods: + _generate_range + """ + + @property + def ndim(self) -> int: + return self._data.ndim + + @property + def shape(self): + return self._data.shape + + def reshape(self, *args, **kwargs): + # Note: we drop any freq + data = self._data.reshape(*args, **kwargs) + return type(self)(data, dtype=self.dtype) + + def ravel(self, *args, **kwargs): + # Note: we drop any freq + data = self._data.ravel(*args, **kwargs) + return type(self)(data, dtype=self.dtype) + + @property + def _box_func(self): + """ + box function to get object from internal representation + """ + raise AbstractMethodError(self) + + def _box_values(self, values): + """ + apply box func to passed values + """ + return lib.map_infer(values, self._box_func) + + def __iter__(self): + return (self._box_func(v) for v in self.asi8) + + @property + def asi8(self) -> np.ndarray: + """ + Integer representation of the values. + + Returns + ------- + ndarray + An ndarray with int64 dtype. + """ + # do not cache or you'll create a memory leak + return self._data.view("i8") + + @property + def _ndarray_values(self): + return self._data + + # ---------------------------------------------------------------- + # Rendering Methods + + def _format_native_types(self, na_rep="NaT", date_format=None): + """ + Helper method for astype when converting to strings. + + Returns + ------- + ndarray[str] + """ + raise AbstractMethodError(self) + + def _formatter(self, boxed=False): + # TODO: Remove Datetime & DatetimeTZ formatters. + return "'{}'".format + + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + + @property + def nbytes(self): + return self._data.nbytes + + def __array__(self, dtype=None) -> np.ndarray: + # used for Timedelta/DatetimeArray, overwritten by PeriodArray + if is_object_dtype(dtype): + return np.array(list(self), dtype=object) + return self._data + + @property + def size(self) -> int: + """The number of elements in this array.""" + return np.prod(self.shape) + + def __len__(self) -> int: + return len(self._data) + + def __getitem__(self, key): + """ + This getitem defers to the underlying array, which by-definition can + only handle list-likes, slices, and integer scalars + """ + + is_int = lib.is_integer(key) + if lib.is_scalar(key) and not is_int: + raise IndexError( + "only integers, slices (`:`), ellipsis (`...`), " + "numpy.newaxis (`None`) and integer or boolean " + "arrays are valid indices" + ) + + getitem = self._data.__getitem__ + if is_int: + val = getitem(key) + if lib.is_scalar(val): + # i.e. self.ndim == 1 + return self._box_func(val) + return type(self)(val, dtype=self.dtype) + + if com.is_bool_indexer(key): + # first convert to boolean, because check_array_indexer doesn't + # allow object dtype + if is_object_dtype(key): + key = np.asarray(key, dtype=bool) + + key = check_array_indexer(self, key) + if key.all(): + key = slice(0, None, None) + else: + key = lib.maybe_booleans_to_slice(key.view(np.uint8)) + elif isinstance(key, list) and len(key) == 1 and isinstance(key[0], slice): + # see https://github.com/pandas-dev/pandas/issues/31299, need to allow + # this for now (would otherwise raise in check_array_indexer) + pass + else: + key = check_array_indexer(self, key) + + is_period = is_period_dtype(self) + if is_period: + freq = self.freq + else: + freq = None + if isinstance(key, slice): + if self.freq is not None and key.step is not None: + freq = key.step * self.freq + else: + freq = self.freq + elif key is Ellipsis: + # GH#21282 indexing with Ellipsis is similar to a full slice, + # should preserve `freq` attribute + freq = self.freq + + result = getitem(key) + if result.ndim > 1: + # To support MPL which performs slicing with 2 dim + # even though it only has 1 dim by definition + return result + + return self._simple_new(result, dtype=self.dtype, freq=freq) + + def __setitem__( + self, + key: Union[int, Sequence[int], Sequence[bool], slice], + value: Union[NaTType, Any, Sequence[Any]], + ) -> None: + # I'm fudging the types a bit here. "Any" above really depends + # on type(self). For PeriodArray, it's Period (or stuff coercible + # to a period in from_sequence). For DatetimeArray, it's Timestamp... + # I don't know if mypy can do that, possibly with Generics. + # https://mypy.readthedocs.io/en/latest/generics.html + if lib.is_scalar(value) and not isna(value): + value = com.maybe_box_datetimelike(value) + + if is_list_like(value): + is_slice = isinstance(key, slice) + + if lib.is_scalar(key): + raise ValueError("setting an array element with a sequence.") + + if not is_slice: + key = cast(Sequence, key) + if len(key) != len(value) and not com.is_bool_indexer(key): + msg = ( + f"shape mismatch: value array of length '{len(key)}' " + "does not match indexing result of length " + f"'{len(value)}'." + ) + raise ValueError(msg) + elif not len(key): + return + + value = type(self)._from_sequence(value, dtype=self.dtype) + self._check_compatible_with(value, setitem=True) + value = value.asi8 + elif isinstance(value, self._scalar_type): + self._check_compatible_with(value, setitem=True) + value = self._unbox_scalar(value) + elif is_valid_nat_for_dtype(value, self.dtype): + value = iNaT + else: + msg = ( + f"'value' should be a '{self._scalar_type.__name__}', 'NaT', " + f"or array of those. Got '{type(value).__name__}' instead." + ) + raise TypeError(msg) + + key = check_array_indexer(self, key) + self._data[key] = value + self._maybe_clear_freq() + + def _maybe_clear_freq(self): + # inplace operations like __setitem__ may invalidate the freq of + # DatetimeArray and TimedeltaArray + pass + + def astype(self, dtype, copy=True): + # Some notes on cases we don't have to handle here in the base class: + # 1. PeriodArray.astype handles period -> period + # 2. DatetimeArray.astype handles conversion between tz. + # 3. DatetimeArray.astype handles datetime -> period + from pandas import Categorical + + dtype = pandas_dtype(dtype) + + if is_object_dtype(dtype): + return self._box_values(self.asi8) + elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): + return self._format_native_types() + elif is_integer_dtype(dtype): + # we deliberately ignore int32 vs. int64 here. + # See https://github.com/pandas-dev/pandas/issues/24381 for more. + values = self.asi8 + + if is_unsigned_integer_dtype(dtype): + # Again, we ignore int32 vs. int64 + values = values.view("uint64") + + if copy: + values = values.copy() + return values + elif ( + is_datetime_or_timedelta_dtype(dtype) + and not is_dtype_equal(self.dtype, dtype) + ) or is_float_dtype(dtype): + # disallow conversion between datetime/timedelta, + # and conversions for any datetimelike to float + msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" + raise TypeError(msg) + elif is_categorical_dtype(dtype): + return Categorical(self, dtype=dtype) + else: + return np.asarray(self, dtype=dtype) + + def view(self, dtype=None): + if dtype is None or dtype is self.dtype: + return type(self)(self._data, dtype=self.dtype) + return self._data.view(dtype=dtype) + + # ------------------------------------------------------------------ + # ExtensionArray Interface + + def unique(self): + result = unique1d(self.asi8) + return type(self)(result, dtype=self.dtype) + + def _validate_fill_value(self, fill_value): + """ + If a fill_value is passed to `take` convert it to an i8 representation, + raising ValueError if this is not possible. + + Parameters + ---------- + fill_value : object + + Returns + ------- + fill_value : np.int64 + + Raises + ------ + ValueError + """ + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, self._recognized_scalars): + self._check_compatible_with(fill_value) + fill_value = self._scalar_type(fill_value) + fill_value = self._unbox_scalar(fill_value) + else: + raise ValueError( + f"'fill_value' should be a {self._scalar_type}. Got '{fill_value}'." + ) + return fill_value + + def take(self, indices, allow_fill=False, fill_value=None): + if allow_fill: + fill_value = self._validate_fill_value(fill_value) + + new_values = take( + self.asi8, indices, allow_fill=allow_fill, fill_value=fill_value + ) + + return type(self)(new_values, dtype=self.dtype) + + @classmethod + def _concat_same_type(cls, to_concat): + dtypes = {x.dtype for x in to_concat} + assert len(dtypes) == 1 + dtype = list(dtypes)[0] + + values = np.concatenate([x.asi8 for x in to_concat]) + return cls(values, dtype=dtype) + + def copy(self): + values = self.asi8.copy() + return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq) + + def _values_for_factorize(self): + return self.asi8, iNaT + + @classmethod + def _from_factorized(cls, values, original): + return cls(values, dtype=original.dtype) + + def _values_for_argsort(self): + return self._data + + @Appender(ExtensionArray.shift.__doc__) + def shift(self, periods=1, fill_value=None, axis=0): + if not self.size or periods == 0: + return self.copy() + + if is_valid_nat_for_dtype(fill_value, self.dtype): + fill_value = NaT + elif not isinstance(fill_value, self._recognized_scalars): + # only warn if we're not going to raise + if self._scalar_type is Period and lib.is_integer(fill_value): + # kludge for #31971 since Period(integer) tries to cast to str + new_fill = Period._from_ordinal(fill_value, freq=self.freq) + else: + new_fill = self._scalar_type(fill_value) + + # stacklevel here is chosen to be correct when called from + # DataFrame.shift or Series.shift + warnings.warn( + f"Passing {type(fill_value)} to shift is deprecated and " + "will raise in a future version, pass " + f"{self._scalar_type.__name__} instead.", + FutureWarning, + stacklevel=7, + ) + fill_value = new_fill + + fill_value = self._unbox_scalar(fill_value) + + new_values = self._data + + # make sure array sent to np.roll is c_contiguous + f_ordered = new_values.flags.f_contiguous + if f_ordered: + new_values = new_values.T + axis = new_values.ndim - axis - 1 + + new_values = np.roll(new_values, periods, axis=axis) + + axis_indexer = [slice(None)] * self.ndim + if periods > 0: + axis_indexer[axis] = slice(None, periods) + else: + axis_indexer[axis] = slice(periods, None) + new_values[tuple(axis_indexer)] = fill_value + + # restore original order + if f_ordered: + new_values = new_values.T + + return type(self)._simple_new(new_values, dtype=self.dtype) + + # ------------------------------------------------------------------ + # Additional array methods + # These are not part of the EA API, but we implement them because + # pandas assumes they're there. + + def searchsorted(self, value, side="left", sorter=None): + """ + Find indices where elements should be inserted to maintain order. + + Find the indices into a sorted array `self` such that, if the + corresponding elements in `value` were inserted before the indices, + the order of `self` would be preserved. + + Parameters + ---------- + value : array_like + Values to insert into `self`. + side : {'left', 'right'}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `self`). + sorter : 1-D array_like, optional + Optional array of integer indices that sort `self` into ascending + order. They are typically the result of ``np.argsort``. + + Returns + ------- + indices : array of ints + Array of insertion points with the same shape as `value`. + """ + if isinstance(value, str): + value = self._scalar_from_string(value) + + if not (isinstance(value, (self._scalar_type, type(self))) or isna(value)): + raise ValueError(f"Unexpected type for 'value': {type(value)}") + + self._check_compatible_with(value) + if isinstance(value, type(self)): + value = value.asi8 + else: + value = self._unbox_scalar(value) + + return self.asi8.searchsorted(value, side=side, sorter=sorter) + + def repeat(self, repeats, *args, **kwargs): + """ + Repeat elements of an array. + + See Also + -------- + numpy.ndarray.repeat + """ + nv.validate_repeat(args, kwargs) + values = self._data.repeat(repeats) + return type(self)(values.view("i8"), dtype=self.dtype) + + def value_counts(self, dropna=False): + """ + Return a Series containing counts of unique values. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of NaT values. + + Returns + ------- + Series + """ + from pandas import Series, Index + + if dropna: + values = self[~self.isna()]._data + else: + values = self._data + + cls = type(self) + + result = value_counts(values, sort=False, dropna=dropna) + index = Index( + cls(result.index.view("i8"), dtype=self.dtype), name=result.index.name + ) + return Series(result.values, index=index, name=result.name) + + def map(self, mapper): + # TODO(GH-23179): Add ExtensionArray.map + # Need to figure out if we want ExtensionArray.map first. + # If so, then we can refactor IndexOpsMixin._map_values to + # a standalone function and call from here.. + # Else, just rewrite _map_infer_values to do the right thing. + from pandas import Index + + return Index(self).map(mapper).array + + # ------------------------------------------------------------------ + # Null Handling + + def isna(self): + return self._isnan + + @property # NB: override with cache_readonly in immutable subclasses + def _isnan(self): + """ + return if each value is nan + """ + return self.asi8 == iNaT + + @property # NB: override with cache_readonly in immutable subclasses + def _hasnans(self): + """ + return if I have any nans; enables various perf speedups + """ + return bool(self._isnan.any()) + + def _maybe_mask_results(self, result, fill_value=iNaT, convert=None): + """ + Parameters + ---------- + result : a ndarray + fill_value : object, default iNaT + convert : str, dtype or None + + Returns + ------- + result : ndarray with values replace by the fill_value + + mask the result if needed, convert to the provided dtype if its not + None + + This is an internal routine. + """ + + if self._hasnans: + if convert: + result = result.astype(convert) + if fill_value is None: + fill_value = np.nan + result[self._isnan] = fill_value + return result + + def fillna(self, value=None, method=None, limit=None): + # TODO(GH-20300): remove this + # Just overriding to ensure that we avoid an astype(object). + # Either 20300 or a `_values_for_fillna` would avoid this duplication. + if isinstance(value, ABCSeries): + value = value.array + + value, method = validate_fillna_kwargs(value, method) + + mask = self.isna() + + if is_array_like(value): + if len(value) != len(self): + raise ValueError( + f"Length of 'value' does not match. Got ({len(value)}) " + f" expected {len(self)}" + ) + value = value[mask] + + if mask.any(): + if method is not None: + if method == "pad": + func = missing.pad_1d + else: + func = missing.backfill_1d + + values = self._data + if not is_period_dtype(self): + # For PeriodArray self._data is i8, which gets copied + # by `func`. Otherwise we need to make a copy manually + # to avoid modifying `self` in-place. + values = values.copy() + + new_values = func(values, limit=limit, mask=mask) + if is_datetime64tz_dtype(self): + # we need to pass int64 values to the constructor to avoid + # re-localizing incorrectly + new_values = new_values.view("i8") + new_values = type(self)(new_values, dtype=self.dtype) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + new_values = self.copy() + return new_values + + # ------------------------------------------------------------------ + # Frequency Properties/Methods + + @property + def freq(self): + """ + Return the frequency object if it is set, otherwise None. + """ + return self._freq + + @freq.setter + def freq(self, value): + if value is not None: + value = frequencies.to_offset(value) + self._validate_frequency(self, value) + + self._freq = value + + @property + def freqstr(self): + """ + Return the frequency object as a string if its set, otherwise None + """ + if self.freq is None: + return None + return self.freq.freqstr + + @property # NB: override with cache_readonly in immutable subclasses + def inferred_freq(self): + """ + Tryies to return a string representing a frequency guess, + generated by infer_freq. Returns None if it can't autodetect the + frequency. + """ + if self.ndim != 1: + return None + try: + return frequencies.infer_freq(self) + except ValueError: + return None + + @property # NB: override with cache_readonly in immutable subclasses + def _resolution(self): + return frequencies.Resolution.get_reso_from_freq(self.freqstr) + + @property # NB: override with cache_readonly in immutable subclasses + def resolution(self): + """ + Returns day, hour, minute, second, millisecond or microsecond + """ + return frequencies.Resolution.get_str(self._resolution) + + @classmethod + def _validate_frequency(cls, index, freq, **kwargs): + """ + Validate that a frequency is compatible with the values of a given + Datetime Array/Index or Timedelta Array/Index + + Parameters + ---------- + index : DatetimeIndex or TimedeltaIndex + The index on which to determine if the given frequency is valid + freq : DateOffset + The frequency to validate + """ + if is_period_dtype(cls): + # Frequency validation is not meaningful for Period Array/Index + return None + + inferred = index.inferred_freq + if index.size == 0 or inferred == freq.freqstr: + return None + + try: + on_freq = cls._generate_range( + start=index[0], end=None, periods=len(index), freq=freq, **kwargs + ) + if not np.array_equal(index.asi8, on_freq.asi8): + raise ValueError + except ValueError as e: + if "non-fixed" in str(e): + # non-fixed frequencies are not meaningful for timedelta64; + # we retain that error message + raise e + # GH#11587 the main way this is reached is if the `np.array_equal` + # check above is False. This can also be reached if index[0] + # is `NaT`, in which case the call to `cls._generate_range` will + # raise a ValueError, which we re-raise with a more targeted + # message. + raise ValueError( + f"Inferred frequency {inferred} from passed values " + f"does not conform to passed frequency {freq.freqstr}" + ) + + # monotonicity/uniqueness properties are called via frequencies.infer_freq, + # see GH#23789 + + @property + def _is_monotonic_increasing(self): + return algos.is_monotonic(self.asi8, timelike=True)[0] + + @property + def _is_monotonic_decreasing(self): + return algos.is_monotonic(self.asi8, timelike=True)[1] + + @property + def _is_unique(self): + return len(unique1d(self.asi8)) == len(self) + + # ------------------------------------------------------------------ + # Arithmetic Methods + _create_comparison_method = classmethod(_datetimelike_array_cmp) + + # pow is invalid for all three subclasses; TimedeltaArray will override + # the multiplication and division ops + __pow__ = make_invalid_op("__pow__") + __rpow__ = make_invalid_op("__rpow__") + __mul__ = make_invalid_op("__mul__") + __rmul__ = make_invalid_op("__rmul__") + __truediv__ = make_invalid_op("__truediv__") + __rtruediv__ = make_invalid_op("__rtruediv__") + __floordiv__ = make_invalid_op("__floordiv__") + __rfloordiv__ = make_invalid_op("__rfloordiv__") + __mod__ = make_invalid_op("__mod__") + __rmod__ = make_invalid_op("__rmod__") + __divmod__ = make_invalid_op("__divmod__") + __rdivmod__ = make_invalid_op("__rdivmod__") + + def _add_datetimelike_scalar(self, other): + # Overridden by TimedeltaArray + raise TypeError(f"cannot add {type(self).__name__} and {type(other).__name__}") + + _add_datetime_arraylike = _add_datetimelike_scalar + + def _sub_datetimelike_scalar(self, other): + # Overridden by DatetimeArray + assert other is not NaT + raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}") + + _sub_datetime_arraylike = _sub_datetimelike_scalar + + def _sub_period(self, other): + # Overridden by PeriodArray + raise TypeError(f"cannot subtract Period from a {type(self).__name__}") + + def _add_offset(self, offset): + raise AbstractMethodError(self) + + def _add_delta(self, other): + """ + Add a timedelta-like, Tick or TimedeltaIndex-like object + to self, yielding an int64 numpy array + + Parameters + ---------- + delta : {timedelta, np.timedelta64, Tick, + TimedeltaIndex, ndarray[timedelta64]} + + Returns + ------- + result : ndarray[int64] + + Notes + ----- + The result's name is set outside of _add_delta by the calling + method (__add__ or __sub__), if necessary (i.e. for Indexes). + """ + if isinstance(other, (Tick, timedelta, np.timedelta64)): + new_values = self._add_timedeltalike_scalar(other) + elif is_timedelta64_dtype(other): + # ndarray[timedelta64] or TimedeltaArray/index + new_values = self._add_delta_tdi(other) + + return new_values + + def _add_timedeltalike_scalar(self, other): + """ + Add a delta of a timedeltalike + return the i8 result view + """ + if isna(other): + # i.e np.timedelta64("NaT"), not recognized by delta_to_nanoseconds + new_values = np.empty(self.shape, dtype="i8") + new_values[:] = iNaT + return new_values + + inc = delta_to_nanoseconds(other) + new_values = checked_add_with_arr(self.asi8, inc, arr_mask=self._isnan).view( + "i8" + ) + new_values = self._maybe_mask_results(new_values) + return new_values.view("i8") + + def _add_delta_tdi(self, other): + """ + Add a delta of a TimedeltaIndex + return the i8 result view + """ + if len(self) != len(other): + raise ValueError("cannot add indices of unequal length") + + if isinstance(other, np.ndarray): + # ndarray[timedelta64]; wrap in TimedeltaIndex for op + from pandas.core.arrays import TimedeltaArray + + other = TimedeltaArray._from_sequence(other) + + self_i8 = self.asi8 + other_i8 = other.asi8 + new_values = checked_add_with_arr( + self_i8, other_i8, arr_mask=self._isnan, b_mask=other._isnan + ) + if self._hasnans or other._hasnans: + mask = (self._isnan) | (other._isnan) + new_values[mask] = iNaT + return new_values.view("i8") + + def _add_nat(self): + """ + Add pd.NaT to self + """ + if is_period_dtype(self): + raise TypeError( + f"Cannot add {type(self).__name__} and {type(NaT).__name__}" + ) + + # GH#19124 pd.NaT is treated like a timedelta for both timedelta + # and datetime dtypes + result = np.zeros(self.shape, dtype=np.int64) + result.fill(iNaT) + return type(self)(result, dtype=self.dtype, freq=None) + + def _sub_nat(self): + """ + Subtract pd.NaT from self + """ + # GH#19124 Timedelta - datetime is not in general well-defined. + # We make an exception for pd.NaT, which in this case quacks + # like a timedelta. + # For datetime64 dtypes by convention we treat NaT as a datetime, so + # this subtraction returns a timedelta64 dtype. + # For period dtype, timedelta64 is a close-enough return dtype. + result = np.zeros(self.shape, dtype=np.int64) + result.fill(iNaT) + return result.view("timedelta64[ns]") + + def _sub_period_array(self, other): + """ + Subtract a Period Array/Index from self. This is only valid if self + is itself a Period Array/Index, raises otherwise. Both objects must + have the same frequency. + + Parameters + ---------- + other : PeriodIndex or PeriodArray + + Returns + ------- + result : np.ndarray[object] + Array of DateOffset objects; nulls represented by NaT. + """ + if not is_period_dtype(self): + raise TypeError( + f"cannot subtract {other.dtype}-dtype from {type(self).__name__}" + ) + + if self.freq != other.freq: + msg = DIFFERENT_FREQ.format( + cls=type(self).__name__, own_freq=self.freqstr, other_freq=other.freqstr + ) + raise IncompatibleFrequency(msg) + + new_values = checked_add_with_arr( + self.asi8, -other.asi8, arr_mask=self._isnan, b_mask=other._isnan + ) + + new_values = np.array([self.freq.base * x for x in new_values]) + if self._hasnans or other._hasnans: + mask = (self._isnan) | (other._isnan) + new_values[mask] = NaT + return new_values + + def _addsub_object_array(self, other: np.ndarray, op): + """ + Add or subtract array-like of DateOffset objects + + Parameters + ---------- + other : np.ndarray[object] + op : {operator.add, operator.sub} + + Returns + ------- + result : same class as self + """ + assert op in [operator.add, operator.sub] + if len(other) == 1: + return op(self, other[0]) + + warnings.warn( + "Adding/subtracting array of DateOffsets to " + f"{type(self).__name__} not vectorized", + PerformanceWarning, + ) + + # For EA self.astype('O') returns a numpy array, not an Index + left = self.astype("O") + + res_values = op(left, np.array(other)) + kwargs = {} + if not is_period_dtype(self): + kwargs["freq"] = "infer" + try: + res = type(self)._from_sequence(res_values, **kwargs) + except ValueError: + # e.g. we've passed a Timestamp to TimedeltaArray + res = res_values + return res + + def _time_shift(self, periods, freq=None): + """ + Shift each value by `periods`. + + Note this is different from ExtensionArray.shift, which + shifts the *position* of each element, padding the end with + missing values. + + Parameters + ---------- + periods : int + Number of periods to shift by. + freq : pandas.DateOffset, pandas.Timedelta, or str + Frequency increment to shift by. + """ + if freq is not None and freq != self.freq: + if isinstance(freq, str): + freq = frequencies.to_offset(freq) + offset = periods * freq + result = self + offset + return result + + if periods == 0: + # immutable so OK + return self.copy() + + if self.freq is None: + raise NullFrequencyError("Cannot shift with no freq") + + start = self[0] + periods * self.freq + end = self[-1] + periods * self.freq + + # Note: in the DatetimeTZ case, _generate_range will infer the + # appropriate timezone from `start` and `end`, so tz does not need + # to be passed explicitly. + return self._generate_range(start=start, end=end, periods=None, freq=self.freq) + + @unpack_zerodim_and_defer("__add__") + def __add__(self, other): + + # scalar others + if other is NaT: + result = self._add_nat() + elif isinstance(other, (Tick, timedelta, np.timedelta64)): + result = self._add_delta(other) + elif isinstance(other, DateOffset): + # specifically _not_ a Tick + result = self._add_offset(other) + elif isinstance(other, (datetime, np.datetime64)): + result = self._add_datetimelike_scalar(other) + elif lib.is_integer(other): + # This check must come after the check for np.timedelta64 + # as is_integer returns True for these + if not is_period_dtype(self): + raise integer_op_not_supported(self) + result = self._time_shift(other) + + # array-like others + elif is_timedelta64_dtype(other): + # TimedeltaIndex, ndarray[timedelta64] + result = self._add_delta(other) + elif is_object_dtype(other): + # e.g. Array/Index of DateOffset objects + result = self._addsub_object_array(other, operator.add) + elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): + # DatetimeIndex, ndarray[datetime64] + return self._add_datetime_arraylike(other) + elif is_integer_dtype(other): + if not is_period_dtype(self): + raise integer_op_not_supported(self) + result = self._addsub_int_array(other, operator.add) + else: + # Includes Categorical, other ExtensionArrays + # For PeriodDtype, if self is a TimedeltaArray and other is a + # PeriodArray with a timedelta-like (i.e. Tick) freq, this + # operation is valid. Defer to the PeriodArray implementation. + # In remaining cases, this will end up raising TypeError. + return NotImplemented + + if is_timedelta64_dtype(result) and isinstance(result, np.ndarray): + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray(result) + return result + + def __radd__(self, other): + # alias for __add__ + return self.__add__(other) + + @unpack_zerodim_and_defer("__sub__") + def __sub__(self, other): + + # scalar others + if other is NaT: + result = self._sub_nat() + elif isinstance(other, (Tick, timedelta, np.timedelta64)): + result = self._add_delta(-other) + elif isinstance(other, DateOffset): + # specifically _not_ a Tick + result = self._add_offset(-other) + elif isinstance(other, (datetime, np.datetime64)): + result = self._sub_datetimelike_scalar(other) + elif lib.is_integer(other): + # This check must come after the check for np.timedelta64 + # as is_integer returns True for these + if not is_period_dtype(self): + raise integer_op_not_supported(self) + result = self._time_shift(-other) + + elif isinstance(other, Period): + result = self._sub_period(other) + + # array-like others + elif is_timedelta64_dtype(other): + # TimedeltaIndex, ndarray[timedelta64] + result = self._add_delta(-other) + elif is_object_dtype(other): + # e.g. Array/Index of DateOffset objects + result = self._addsub_object_array(other, operator.sub) + elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): + # DatetimeIndex, ndarray[datetime64] + result = self._sub_datetime_arraylike(other) + elif is_period_dtype(other): + # PeriodIndex + result = self._sub_period_array(other) + elif is_integer_dtype(other): + if not is_period_dtype(self): + raise integer_op_not_supported(self) + result = self._addsub_int_array(other, operator.sub) + else: + # Includes ExtensionArrays, float_dtype + return NotImplemented + + if is_timedelta64_dtype(result) and isinstance(result, np.ndarray): + from pandas.core.arrays import TimedeltaArray + + return TimedeltaArray(result) + return result + + def __rsub__(self, other): + if is_datetime64_any_dtype(other) and is_timedelta64_dtype(self.dtype): + # ndarray[datetime64] cannot be subtracted from self, so + # we need to wrap in DatetimeArray/Index and flip the operation + if lib.is_scalar(other): + # i.e. np.datetime64 object + return Timestamp(other) - self + if not isinstance(other, DatetimeLikeArrayMixin): + # Avoid down-casting DatetimeIndex + from pandas.core.arrays import DatetimeArray + + other = DatetimeArray(other) + return other - self + elif ( + is_datetime64_any_dtype(self.dtype) + and hasattr(other, "dtype") + and not is_datetime64_any_dtype(other.dtype) + ): + # GH#19959 datetime - datetime is well-defined as timedelta, + # but any other type - datetime is not well-defined. + raise TypeError( + f"cannot subtract {type(self).__name__} from {type(other).__name__}" + ) + elif is_period_dtype(self.dtype) and is_timedelta64_dtype(other): + # TODO: Can we simplify/generalize these cases at all? + raise TypeError(f"cannot subtract {type(self).__name__} from {other.dtype}") + elif is_timedelta64_dtype(self.dtype): + if lib.is_integer(other) or is_integer_dtype(other): + # need to subtract before negating, since that flips freq + # -self flips self.freq, messing up results + return -(self - other) + + return (-self) + other + + return -(self - other) + + def __iadd__(self, other): # type: ignore + result = self + other + self[:] = result[:] + + if not is_period_dtype(self): + # restore freq, which is invalidated by setitem + self._freq = result._freq + return self + + def __isub__(self, other): # type: ignore + result = self - other + self[:] = result[:] + + if not is_period_dtype(self): + # restore freq, which is invalidated by setitem + self._freq = result._freq + return self + + # -------------------------------------------------------------- + # Reductions + + def _reduce(self, name, axis=0, skipna=True, **kwargs): + op = getattr(self, name, None) + if op: + return op(skipna=skipna, **kwargs) + else: + return super()._reduce(name, skipna, **kwargs) + + def min(self, axis=None, skipna=True, *args, **kwargs): + """ + Return the minimum value of the Array or minimum along + an axis. + + See Also + -------- + numpy.ndarray.min + Index.min : Return the minimum value in an Index. + Series.min : Return the minimum value in a Series. + """ + nv.validate_min(args, kwargs) + nv.validate_minmax_axis(axis) + + result = nanops.nanmin(self.asi8, skipna=skipna, mask=self.isna()) + if isna(result): + # Period._from_ordinal does not handle np.nan gracefully + return NaT + return self._box_func(result) + + def max(self, axis=None, skipna=True, *args, **kwargs): + """ + Return the maximum value of the Array or maximum along + an axis. + + See Also + -------- + numpy.ndarray.max + Index.max : Return the maximum value in an Index. + Series.max : Return the maximum value in a Series. + """ + # TODO: skipna is broken with max. + # See https://github.com/pandas-dev/pandas/issues/24265 + nv.validate_max(args, kwargs) + nv.validate_minmax_axis(axis) + + mask = self.isna() + if skipna: + values = self[~mask].asi8 + elif mask.any(): + return NaT + else: + values = self.asi8 + + if not len(values): + # short-circuit for empty max / min + return NaT + + result = nanops.nanmax(values, skipna=skipna) + # Don't have to worry about NA `result`, since no NA went in. + return self._box_func(result) + + def mean(self, skipna=True): + """ + Return the mean value of the Array. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + skipna : bool, default True + Whether to ignore any NaT elements. + + Returns + ------- + scalar + Timestamp or Timedelta. + + See Also + -------- + numpy.ndarray.mean : Returns the average of array elements along a given axis. + Series.mean : Return the mean value in a Series. + + Notes + ----- + mean is only defined for Datetime and Timedelta dtypes, not for Period. + """ + if is_period_dtype(self): + # See discussion in GH#24757 + raise TypeError( + f"mean is not implemented for {type(self).__name__} since the " + "meaning is ambiguous. An alternative is " + "obj.to_timestamp(how='start').mean()" + ) + + mask = self.isna() + if skipna: + values = self[~mask] + elif mask.any(): + return NaT + else: + values = self + + if not len(values): + # short-circuit for empty max / min + return NaT + + result = nanops.nanmean(values.view("i8"), skipna=skipna) + # Don't have to worry about NA `result`, since no NA went in. + return self._box_func(result) + + +DatetimeLikeArrayMixin._add_comparison_ops() + +# ------------------------------------------------------------------- +# Shared Constructor Helpers + + +def validate_periods(periods): + """ + If a `periods` argument is passed to the Datetime/Timedelta Array/Index + constructor, cast it to an integer. + + Parameters + ---------- + periods : None, float, int + + Returns + ------- + periods : None or int + + Raises + ------ + TypeError + if periods is None, float, or int + """ + if periods is not None: + if lib.is_float(periods): + periods = int(periods) + elif not lib.is_integer(periods): + raise TypeError(f"periods must be a number, got {periods}") + return periods + + +def validate_endpoints(closed): + """ + Check that the `closed` argument is among [None, "left", "right"] + + Parameters + ---------- + closed : {None, "left", "right"} + + Returns + ------- + left_closed : bool + right_closed : bool + + Raises + ------ + ValueError : if argument is not among valid values + """ + left_closed = False + right_closed = False + + if closed is None: + left_closed = True + right_closed = True + elif closed == "left": + left_closed = True + elif closed == "right": + right_closed = True + else: + raise ValueError("Closed has to be either 'left', 'right' or None") + + return left_closed, right_closed + + +def validate_inferred_freq(freq, inferred_freq, freq_infer): + """ + If the user passes a freq and another freq is inferred from passed data, + require that they match. + + Parameters + ---------- + freq : DateOffset or None + inferred_freq : DateOffset or None + freq_infer : bool + + Returns + ------- + freq : DateOffset or None + freq_infer : bool + + Notes + ----- + We assume at this point that `maybe_infer_freq` has been called, so + `freq` is either a DateOffset object or None. + """ + if inferred_freq is not None: + if freq is not None and freq != inferred_freq: + raise ValueError( + f"Inferred frequency {inferred_freq} from passed " + "values does not conform to passed frequency " + f"{freq.freqstr}" + ) + elif freq is None: + freq = inferred_freq + freq_infer = False + + return freq, freq_infer + + +def maybe_infer_freq(freq): + """ + Comparing a DateOffset to the string "infer" raises, so we need to + be careful about comparisons. Make a dummy variable `freq_infer` to + signify the case where the given freq is "infer" and set freq to None + to avoid comparison trouble later on. + + Parameters + ---------- + freq : {DateOffset, None, str} + + Returns + ------- + freq : {DateOffset, None} + freq_infer : bool + """ + freq_infer = False + if not isinstance(freq, DateOffset): + # if a passed freq is None, don't infer automatically + if freq != "infer": + freq = frequencies.to_offset(freq) + else: + freq_infer = True + freq = None + return freq, freq_infer diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py new file mode 100644 index 00000000..b19bd224 --- /dev/null +++ b/pandas/core/arrays/datetimes.py @@ -0,0 +1,2156 @@ +from datetime import datetime, time, timedelta +from typing import Union +import warnings + +import numpy as np +from pytz import utc + +from pandas._libs import lib, tslib +from pandas._libs.tslibs import ( + NaT, + Timestamp, + ccalendar, + conversion, + fields, + iNaT, + normalize_date, + resolution as libresolution, + timezones, + tzconversion, +) +from pandas.errors import PerformanceWarning + +from pandas.core.dtypes.common import ( + _INT64_DTYPE, + _NS_DTYPE, + is_categorical_dtype, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_float_dtype, + is_object_dtype, + is_period_dtype, + is_string_dtype, + is_timedelta64_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.generic import ABCIndexClass, ABCPandasArray, ABCSeries +from pandas.core.dtypes.missing import isna + +from pandas.core.algorithms import checked_add_with_arr +from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays._ranges import generate_regular_range +import pandas.core.common as com + +from pandas.tseries.frequencies import get_period_alias, to_offset +from pandas.tseries.offsets import Day, Tick + +_midnight = time(0, 0) + + +def tz_to_dtype(tz): + """ + Return a datetime64[ns] dtype appropriate for the given timezone. + + Parameters + ---------- + tz : tzinfo or None + + Returns + ------- + np.dtype or Datetime64TZDType + """ + if tz is None: + return _NS_DTYPE + else: + return DatetimeTZDtype(tz=tz) + + +def _field_accessor(name, field, docstring=None): + def f(self): + values = self.asi8 + if self.tz is not None and not timezones.is_utc(self.tz): + values = self._local_timestamps() + + if field in self._bool_ops: + if field.endswith(("start", "end")): + freq = self.freq + month_kw = 12 + if freq: + kwds = freq.kwds + month_kw = kwds.get("startingMonth", kwds.get("month", 12)) + + result = fields.get_start_end_field( + values, field, self.freqstr, month_kw + ) + else: + result = fields.get_date_field(values, field) + + # these return a boolean by-definition + return result + + if field in self._object_ops: + result = fields.get_date_name_field(values, field) + result = self._maybe_mask_results(result, fill_value=None) + + else: + result = fields.get_date_field(values, field) + result = self._maybe_mask_results( + result, fill_value=None, convert="float64" + ) + + return result + + f.__name__ = name + f.__doc__ = docstring + return property(f) + + +class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps): + """ + Pandas ExtensionArray for tz-naive or tz-aware datetime data. + + .. versionadded:: 0.24.0 + + .. warning:: + + DatetimeArray is currently experimental, and its API may change + without warning. In particular, :attr:`DatetimeArray.dtype` is + expected to change to always be an instance of an ``ExtensionDtype`` + subclass. + + Parameters + ---------- + values : Series, Index, DatetimeArray, ndarray + The datetime data. + + For DatetimeArray `values` (or a Series or Index boxing one), + `dtype` and `freq` will be extracted from `values`. + + dtype : numpy.dtype or DatetimeTZDtype + Note that the only NumPy dtype allowed is 'datetime64[ns]'. + freq : str or Offset, optional + The frequency. + copy : bool, default False + Whether to copy the underlying array of values. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + _typ = "datetimearray" + _scalar_type = Timestamp + _recognized_scalars = (datetime, np.datetime64) + _is_recognized_dtype = is_datetime64_any_dtype + + # define my properties & methods for delegation + _bool_ops = [ + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + "is_leap_year", + ] + _object_ops = ["freq", "tz"] + _field_ops = [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "weekofyear", + "week", + "weekday", + "dayofweek", + "dayofyear", + "quarter", + "days_in_month", + "daysinmonth", + "microsecond", + "nanosecond", + ] + _other_ops = ["date", "time", "timetz"] + _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops + _datetimelike_methods = [ + "to_period", + "tz_localize", + "tz_convert", + "normalize", + "strftime", + "round", + "floor", + "ceil", + "month_name", + "day_name", + ] + + # ndim is inherited from ExtensionArray, must exist to ensure + # Timestamp.__richcmp__(DateTimeArray) operates pointwise + + # ensure that operations with numpy arrays defer to our implementation + __array_priority__ = 1000 + + # ----------------------------------------------------------------- + # Constructors + + _dtype: Union[np.dtype, DatetimeTZDtype] + _freq = None + + def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): + if isinstance(values, (ABCSeries, ABCIndexClass)): + values = values._values + + inferred_freq = getattr(values, "_freq", None) + + if isinstance(values, type(self)): + # validation + dtz = getattr(dtype, "tz", None) + if dtz and values.tz is None: + dtype = DatetimeTZDtype(tz=dtype.tz) + elif dtz and values.tz: + if not timezones.tz_compare(dtz, values.tz): + msg = ( + "Timezone of the array and 'dtype' do not match. " + f"'{dtz}' != '{values.tz}'" + ) + raise TypeError(msg) + elif values.tz: + dtype = values.dtype + + if freq is None: + freq = values.freq + values = values._data + + if not isinstance(values, np.ndarray): + msg = ( + f"Unexpected type '{type(values).__name__}'. 'values' must be " + "a DatetimeArray ndarray, or Series or Index containing one of those." + ) + raise ValueError(msg) + if values.ndim not in [1, 2]: + raise ValueError("Only 1-dimensional input arrays are supported.") + + if values.dtype == "i8": + # for compat with datetime/timedelta/period shared methods, + # we can sometimes get here with int64 values. These represent + # nanosecond UTC (or tz-naive) unix timestamps + values = values.view(_NS_DTYPE) + + if values.dtype != _NS_DTYPE: + msg = ( + "The dtype of 'values' is incorrect. Must be 'datetime64[ns]'." + f" Got {values.dtype} instead." + ) + raise ValueError(msg) + + dtype = _validate_dt64_dtype(dtype) + + if freq == "infer": + msg = ( + "Frequency inference not allowed in DatetimeArray.__init__. " + "Use 'pd.array()' instead." + ) + raise ValueError(msg) + + if copy: + values = values.copy() + if freq: + freq = to_offset(freq) + if getattr(dtype, "tz", None): + # https://github.com/pandas-dev/pandas/issues/18595 + # Ensure that we have a standard timezone for pytz objects. + # Without this, things like adding an array of timedeltas and + # a tz-aware Timestamp (with a tz specific to its datetime) will + # be incorrect(ish?) for the array as a whole + dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz)) + + self._data = values + self._dtype = dtype + self._freq = freq + + if inferred_freq is None and freq is not None: + type(self)._validate_frequency(self, freq) + + @classmethod + def _simple_new(cls, values, freq=None, dtype=_NS_DTYPE): + assert isinstance(values, np.ndarray) + if values.dtype == "i8": + values = values.view(_NS_DTYPE) + + result = object.__new__(cls) + result._data = values + result._freq = freq + result._dtype = dtype + return result + + @classmethod + def _from_sequence( + cls, + data, + dtype=None, + copy=False, + tz=None, + freq=None, + dayfirst=False, + yearfirst=False, + ambiguous="raise", + ): + + freq, freq_infer = dtl.maybe_infer_freq(freq) + + subarr, tz, inferred_freq = sequence_to_dt64ns( + data, + dtype=dtype, + copy=copy, + tz=tz, + dayfirst=dayfirst, + yearfirst=yearfirst, + ambiguous=ambiguous, + ) + + freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) + + dtype = tz_to_dtype(tz) + result = cls._simple_new(subarr, freq=freq, dtype=dtype) + + if inferred_freq is None and freq is not None: + # this condition precludes `freq_infer` + cls._validate_frequency(result, freq, ambiguous=ambiguous) + + elif freq_infer: + # Set _freq directly to bypass duplicative _validate_frequency + # check. + result._freq = to_offset(result.inferred_freq) + + return result + + @classmethod + def _generate_range( + cls, + start, + end, + periods, + freq, + tz=None, + normalize=False, + ambiguous="raise", + nonexistent="raise", + closed=None, + ): + + periods = dtl.validate_periods(periods) + if freq is None and any(x is None for x in [periods, start, end]): + raise ValueError("Must provide freq argument if no data is supplied") + + if com.count_not_none(start, end, periods, freq) != 3: + raise ValueError( + "Of the four parameters: start, end, periods, " + "and freq, exactly three must be specified" + ) + freq = to_offset(freq) + + if start is not None: + start = Timestamp(start) + + if end is not None: + end = Timestamp(end) + + if start is None and end is None: + if closed is not None: + raise ValueError( + "Closed has to be None if not both of start and end are defined" + ) + if start is NaT or end is NaT: + raise ValueError("Neither `start` nor `end` can be NaT") + + left_closed, right_closed = dtl.validate_endpoints(closed) + + start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) + + tz = _infer_tz_from_endpoints(start, end, tz) + + if tz is not None: + # Localize the start and end arguments + start = _maybe_localize_point( + start, + getattr(start, "tz", None), + start, + freq, + tz, + ambiguous, + nonexistent, + ) + end = _maybe_localize_point( + end, getattr(end, "tz", None), end, freq, tz, ambiguous, nonexistent + ) + if freq is not None: + # We break Day arithmetic (fixed 24 hour) here and opt for + # Day to mean calendar day (23/24/25 hour). Therefore, strip + # tz info from start and day to avoid DST arithmetic + if isinstance(freq, Day): + if start is not None: + start = start.tz_localize(None) + if end is not None: + end = end.tz_localize(None) + # TODO: consider re-implementing _cached_range; GH#17914 + values, _tz = generate_regular_range(start, end, periods, freq) + index = cls._simple_new(values, freq=freq, dtype=tz_to_dtype(_tz)) + + if tz is not None and index.tz is None: + arr = conversion.tz_localize_to_utc( + index.asi8, tz, ambiguous=ambiguous, nonexistent=nonexistent + ) + + index = cls(arr) + + # index is localized datetime64 array -> have to convert + # start/end as well to compare + if start is not None: + start = start.tz_localize(tz).asm8 + if end is not None: + end = end.tz_localize(tz).asm8 + else: + # Create a linearly spaced date_range in local time + # Nanosecond-granularity timestamps aren't always correctly + # representable with doubles, so we limit the range that we + # pass to np.linspace as much as possible + arr = ( + np.linspace(0, end.value - start.value, periods, dtype="int64") + + start.value + ) + dtype = tz_to_dtype(tz) + index = cls._simple_new( + arr.astype("M8[ns]", copy=False), freq=None, dtype=dtype + ) + + if not left_closed and len(index) and index[0] == start: + index = index[1:] + if not right_closed and len(index) and index[-1] == end: + index = index[:-1] + + dtype = tz_to_dtype(tz) + return cls._simple_new(index.asi8, freq=freq, dtype=dtype) + + # ----------------------------------------------------------------- + # DatetimeLike Interface + + def _unbox_scalar(self, value): + if not isinstance(value, self._scalar_type) and value is not NaT: + raise ValueError("'value' should be a Timestamp.") + if not isna(value): + self._check_compatible_with(value) + return value.value + + def _scalar_from_string(self, value): + return Timestamp(value, tz=self.tz) + + def _check_compatible_with(self, other, setitem: bool = False): + if other is NaT: + return + self._assert_tzawareness_compat(other) + if setitem: + # Stricter check for setitem vs comparison methods + if not timezones.tz_compare(self.tz, other.tz): + raise ValueError(f"Timezones don't match. '{self.tz} != {other.tz}'") + + def _maybe_clear_freq(self): + self._freq = None + + # ----------------------------------------------------------------- + # Descriptive Properties + + @property + def _box_func(self): + return lambda x: Timestamp(x, freq=self.freq, tz=self.tz) + + @property + def dtype(self) -> Union[np.dtype, DatetimeTZDtype]: + """ + The dtype for the DatetimeArray. + + .. warning:: + + A future version of pandas will change dtype to never be a + ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will + always be an instance of an ``ExtensionDtype`` subclass. + + Returns + ------- + numpy.dtype or DatetimeTZDtype + If the values are tz-naive, then ``np.dtype('datetime64[ns]')`` + is returned. + + If the values are tz-aware, then the ``DatetimeTZDtype`` + is returned. + """ + return self._dtype + + @property + def tz(self): + """ + Return timezone, if any. + + Returns + ------- + datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None + Returns None when the array is tz-naive. + """ + # GH 18595 + return getattr(self.dtype, "tz", None) + + @tz.setter + def tz(self, value): + # GH 3746: Prevent localizing or converting the index by setting tz + raise AttributeError( + "Cannot directly set timezone. Use tz_localize() " + "or tz_convert() as appropriate" + ) + + @property + def tzinfo(self): + """ + Alias for tz attribute + """ + return self.tz + + @property # NB: override with cache_readonly in immutable subclasses + def _timezone(self): + """ + Comparable timezone both for pytz / dateutil + """ + return timezones.get_timezone(self.tzinfo) + + @property # NB: override with cache_readonly in immutable subclasses + def is_normalized(self): + """ + Returns True if all of the dates are at midnight ("no time") + """ + return conversion.is_date_array_normalized(self.asi8, self.tz) + + @property # NB: override with cache_readonly in immutable subclasses + def _resolution(self): + return libresolution.resolution(self.asi8, self.tz) + + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + + def __array__(self, dtype=None) -> np.ndarray: + if dtype is None and self.tz: + # The default for tz-aware is object, to preserve tz info + dtype = object + + return super().__array__(dtype=dtype) + + def __iter__(self): + """ + Return an iterator over the boxed values + + Yields + ------ + tstamp : Timestamp + """ + + # convert in chunks of 10k for efficiency + data = self.asi8 + length = len(self) + chunksize = 10000 + chunks = int(length / chunksize) + 1 + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, length) + converted = tslib.ints_to_pydatetime( + data[start_i:end_i], tz=self.tz, freq=self.freq, box="timestamp" + ) + for v in converted: + yield v + + def astype(self, dtype, copy=True): + # We handle + # --> datetime + # --> period + # DatetimeLikeArrayMixin Super handles the rest. + dtype = pandas_dtype(dtype) + + if is_datetime64_ns_dtype(dtype) and not is_dtype_equal(dtype, self.dtype): + # GH#18951: datetime64_ns dtype but not equal means different tz + new_tz = getattr(dtype, "tz", None) + if getattr(self.dtype, "tz", None) is None: + return self.tz_localize(new_tz) + result = self.tz_convert(new_tz) + if copy: + result = result.copy() + if new_tz is None: + # Do we want .astype('datetime64[ns]') to be an ndarray. + # The astype in Block._astype expects this to return an + # ndarray, but we could maybe work around it there. + result = result._data + return result + elif is_datetime64tz_dtype(self.dtype) and is_dtype_equal(self.dtype, dtype): + if copy: + return self.copy() + return self + elif is_period_dtype(dtype): + return self.to_period(freq=dtype.freq) + return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy) + + # ----------------------------------------------------------------- + # Rendering Methods + + def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): + from pandas.io.formats.format import _get_format_datetime64_from_values + + fmt = _get_format_datetime64_from_values(self, date_format) + + return tslib.format_array_from_datetime( + self.asi8, tz=self.tz, format=fmt, na_rep=na_rep + ) + + # ----------------------------------------------------------------- + # Comparison Methods + + def _has_same_tz(self, other): + zzone = self._timezone + + # vzone shouldn't be None if value is non-datetime like + if isinstance(other, np.datetime64): + # convert to Timestamp as np.datetime64 doesn't have tz attr + other = Timestamp(other) + vzone = timezones.get_timezone(getattr(other, "tzinfo", "__no_tz__")) + return zzone == vzone + + def _assert_tzawareness_compat(self, other): + # adapted from _Timestamp._assert_tzawareness_compat + other_tz = getattr(other, "tzinfo", None) + if is_datetime64tz_dtype(other): + # Get tzinfo from Series dtype + other_tz = other.dtype.tz + if other is NaT: + # pd.NaT quacks both aware and naive + pass + elif self.tz is None: + if other_tz is not None: + raise TypeError( + "Cannot compare tz-naive and tz-aware datetime-like objects." + ) + elif other_tz is None: + raise TypeError( + "Cannot compare tz-naive and tz-aware datetime-like objects" + ) + + # ----------------------------------------------------------------- + # Arithmetic Methods + + def _sub_datetime_arraylike(self, other): + """subtract DatetimeArray/Index or ndarray[datetime64]""" + if len(self) != len(other): + raise ValueError("cannot add indices of unequal length") + + if isinstance(other, np.ndarray): + assert is_datetime64_dtype(other) + other = type(self)(other) + + if not self._has_same_tz(other): + # require tz compat + raise TypeError( + f"{type(self).__name__} subtraction must have the same " + "timezones or no timezones" + ) + + self_i8 = self.asi8 + other_i8 = other.asi8 + arr_mask = self._isnan | other._isnan + new_values = checked_add_with_arr(self_i8, -other_i8, arr_mask=arr_mask) + if self._hasnans or other._hasnans: + new_values[arr_mask] = iNaT + return new_values.view("timedelta64[ns]") + + def _add_offset(self, offset): + if self.ndim == 2: + return self.ravel()._add_offset(offset).reshape(self.shape) + + assert not isinstance(offset, Tick) + try: + if self.tz is not None: + values = self.tz_localize(None) + else: + values = self + result = offset.apply_index(values).tz_localize(self.tz) + + except NotImplementedError: + warnings.warn( + "Non-vectorized DateOffset being applied to Series or DatetimeIndex", + PerformanceWarning, + ) + result = self.astype("O") + offset + if not len(self): + # GH#30336 _from_sequence won't be able to infer self.tz + return type(self)._from_sequence(result).tz_localize(self.tz) + + return type(self)._from_sequence(result, freq="infer") + + def _sub_datetimelike_scalar(self, other): + # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]] + assert isinstance(other, (datetime, np.datetime64)) + assert other is not NaT + other = Timestamp(other) + if other is NaT: + return self - NaT + + if not self._has_same_tz(other): + # require tz compat + raise TypeError( + "Timestamp subtraction must have the same timezones or no timezones" + ) + + i8 = self.asi8 + result = checked_add_with_arr(i8, -other.value, arr_mask=self._isnan) + result = self._maybe_mask_results(result) + return result.view("timedelta64[ns]") + + def _add_delta(self, delta): + """ + Add a timedelta-like, Tick, or TimedeltaIndex-like object + to self, yielding a new DatetimeArray + + Parameters + ---------- + other : {timedelta, np.timedelta64, Tick, + TimedeltaIndex, ndarray[timedelta64]} + + Returns + ------- + result : DatetimeArray + """ + new_values = super()._add_delta(delta) + return type(self)._from_sequence(new_values, tz=self.tz, freq="infer") + + # ----------------------------------------------------------------- + # Timezone Conversion and Localization Methods + + def _local_timestamps(self): + """ + Convert to an i8 (unix-like nanosecond timestamp) representation + while keeping the local timezone and not using UTC. + This is used to calculate time-of-day information as if the timestamps + were timezone-naive. + """ + return tzconversion.tz_convert(self.asi8, utc, self.tz) + + def tz_convert(self, tz): + """ + Convert tz-aware Datetime Array/Index from one time zone to another. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time. Corresponding timestamps would be converted + to this time zone of the Datetime Array/Index. A `tz` of None will + convert to UTC and remove the timezone information. + + Returns + ------- + Array or Index + + Raises + ------ + TypeError + If Datetime Array/Index is tz-naive. + + See Also + -------- + DatetimeIndex.tz : A timezone that has a variable offset from UTC. + DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a + given time zone, or remove timezone from a tz-aware DatetimeIndex. + + Examples + -------- + With the `tz` parameter, we can change the DatetimeIndex + to other time zones: + + >>> dti = pd.date_range(start='2014-08-01 09:00', + ... freq='H', periods=3, tz='Europe/Berlin') + + >>> dti + DatetimeIndex(['2014-08-01 09:00:00+02:00', + '2014-08-01 10:00:00+02:00', + '2014-08-01 11:00:00+02:00'], + dtype='datetime64[ns, Europe/Berlin]', freq='H') + + >>> dti.tz_convert('US/Central') + DatetimeIndex(['2014-08-01 02:00:00-05:00', + '2014-08-01 03:00:00-05:00', + '2014-08-01 04:00:00-05:00'], + dtype='datetime64[ns, US/Central]', freq='H') + + With the ``tz=None``, we can remove the timezone (after converting + to UTC if necessary): + + >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H', + ... periods=3, tz='Europe/Berlin') + + >>> dti + DatetimeIndex(['2014-08-01 09:00:00+02:00', + '2014-08-01 10:00:00+02:00', + '2014-08-01 11:00:00+02:00'], + dtype='datetime64[ns, Europe/Berlin]', freq='H') + + >>> dti.tz_convert(None) + DatetimeIndex(['2014-08-01 07:00:00', + '2014-08-01 08:00:00', + '2014-08-01 09:00:00'], + dtype='datetime64[ns]', freq='H') + """ + tz = timezones.maybe_get_tz(tz) + + if self.tz is None: + # tz naive, use tz_localize + raise TypeError( + "Cannot convert tz-naive timestamps, use tz_localize to localize" + ) + + # No conversion since timestamps are all UTC to begin with + dtype = tz_to_dtype(tz) + return self._simple_new(self.asi8, dtype=dtype, freq=self.freq) + + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): + """ + Localize tz-naive Datetime Array/Index to tz-aware + Datetime Array/Index. + + This method takes a time zone (tz) naive Datetime Array/Index object + and makes this time zone aware. It does not move the time to another + time zone. + Time zone localization helps to switch from time zone aware to time + zone unaware objects. + + Parameters + ---------- + tz : str, pytz.timezone, dateutil.tz.tzfile or None + Time zone to convert timestamps to. Passing ``None`` will + remove the time zone information preserving local time. + ambiguous : 'infer', 'NaT', bool array, default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times. + + nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \ +default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + .. versionadded:: 0.24.0 + + Returns + ------- + Same type as self + Array/Index converted to the specified time zone. + + Raises + ------ + TypeError + If the Datetime Array/Index is tz-aware and tz is not None. + + See Also + -------- + DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from + one time zone to another. + + Examples + -------- + >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3) + >>> tz_naive + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq='D') + + Localize DatetimeIndex in US/Eastern time zone: + + >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern') + >>> tz_aware + DatetimeIndex(['2018-03-01 09:00:00-05:00', + '2018-03-02 09:00:00-05:00', + '2018-03-03 09:00:00-05:00'], + dtype='datetime64[ns, US/Eastern]', freq='D') + + With the ``tz=None``, we can remove the time zone information + while keeping the local time (not converted to UTC): + + >>> tz_aware.tz_localize(None) + DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', + '2018-03-03 09:00:00'], + dtype='datetime64[ns]', freq='D') + + Be careful with DST changes. When there is sequential data, pandas can + infer the DST time: + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.dt.tz_localize('CET', ambiguous='infer') + 0 2018-10-28 01:30:00+02:00 + 1 2018-10-28 02:00:00+02:00 + 2 2018-10-28 02:30:00+02:00 + 3 2018-10-28 02:00:00+01:00 + 4 2018-10-28 02:30:00+01:00 + 5 2018-10-28 03:00:00+01:00 + 6 2018-10-28 03:30:00+01:00 + dtype: datetime64[ns, CET] + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False])) + 0 2015-03-29 03:00:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, Europe/Warsaw] + + If the DST transition causes nonexistent times, you can shift these + dates forward or backwards with a timedelta object or `'shift_forward'` + or `'shift_backwards'`. + + >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'])) + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + 0 2015-03-29 03:00:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, 'Europe/Warsaw'] + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + 0 2015-03-29 01:59:59.999999999+01:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, 'Europe/Warsaw'] + >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) + 0 2015-03-29 03:30:00+02:00 + 1 2015-03-29 03:30:00+02:00 + dtype: datetime64[ns, 'Europe/Warsaw'] + """ + nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") + if nonexistent not in nonexistent_options and not isinstance( + nonexistent, timedelta + ): + raise ValueError( + "The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or " + "a timedelta object" + ) + + if self.tz is not None: + if tz is None: + new_dates = tzconversion.tz_convert(self.asi8, timezones.UTC, self.tz) + else: + raise TypeError("Already tz-aware, use tz_convert to convert.") + else: + tz = timezones.maybe_get_tz(tz) + # Convert to UTC + + new_dates = conversion.tz_localize_to_utc( + self.asi8, tz, ambiguous=ambiguous, nonexistent=nonexistent + ) + new_dates = new_dates.view(_NS_DTYPE) + dtype = tz_to_dtype(tz) + return self._simple_new(new_dates, dtype=dtype, freq=self.freq) + + # ---------------------------------------------------------------- + # Conversion Methods - Vectorized analogues of Timestamp methods + + def to_pydatetime(self): + """ + Return Datetime Array/Index as object ndarray of datetime.datetime + objects. + + Returns + ------- + datetimes : ndarray + """ + return tslib.ints_to_pydatetime(self.asi8, tz=self.tz) + + def normalize(self): + """ + Convert times to midnight. + + The time component of the date-time is converted to midnight i.e. + 00:00:00. This is useful in cases, when the time does not matter. + Length is unaltered. The timezones are unaffected. + + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on Datetime Array/Index. + + Returns + ------- + DatetimeArray, DatetimeIndex or Series + The same type as the original data. Series will have the same + name and index. DatetimeIndex will have the same name. + + See Also + -------- + floor : Floor the datetimes to the specified freq. + ceil : Ceil the datetimes to the specified freq. + round : Round the datetimes to the specified freq. + + Examples + -------- + >>> idx = pd.date_range(start='2014-08-01 10:00', freq='H', + ... periods=3, tz='Asia/Calcutta') + >>> idx + DatetimeIndex(['2014-08-01 10:00:00+05:30', + '2014-08-01 11:00:00+05:30', + '2014-08-01 12:00:00+05:30'], + dtype='datetime64[ns, Asia/Calcutta]', freq='H') + >>> idx.normalize() + DatetimeIndex(['2014-08-01 00:00:00+05:30', + '2014-08-01 00:00:00+05:30', + '2014-08-01 00:00:00+05:30'], + dtype='datetime64[ns, Asia/Calcutta]', freq=None) + """ + if self.tz is None or timezones.is_utc(self.tz): + not_null = ~self.isna() + DAY_NS = ccalendar.DAY_SECONDS * 1_000_000_000 + new_values = self.asi8.copy() + adjustment = new_values[not_null] % DAY_NS + new_values[not_null] = new_values[not_null] - adjustment + else: + new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz) + return type(self)._from_sequence(new_values, freq="infer").tz_localize(self.tz) + + def to_period(self, freq=None): + """ + Cast to PeriodArray/Index at a particular frequency. + + Converts DatetimeArray/Index to PeriodArray/Index. + + Parameters + ---------- + freq : str or Offset, optional + One of pandas' :ref:`offset strings ` + or an Offset object. Will be inferred by default. + + Returns + ------- + PeriodArray/Index + + Raises + ------ + ValueError + When converting a DatetimeArray/Index with non-regular values, + so that a frequency cannot be inferred. + + See Also + -------- + PeriodIndex: Immutable ndarray holding ordinal values. + DatetimeIndex.to_pydatetime: Return DatetimeIndex as object. + + Examples + -------- + >>> df = pd.DataFrame({"y": [1, 2, 3]}, + ... index=pd.to_datetime(["2000-03-31 00:00:00", + ... "2000-05-31 00:00:00", + ... "2000-08-31 00:00:00"])) + >>> df.index.to_period("M") + PeriodIndex(['2000-03', '2000-05', '2000-08'], + dtype='period[M]', freq='M') + + Infer the daily frequency + + >>> idx = pd.date_range("2017-01-01", periods=2) + >>> idx.to_period() + PeriodIndex(['2017-01-01', '2017-01-02'], + dtype='period[D]', freq='D') + """ + from pandas.core.arrays import PeriodArray + + if self.tz is not None: + warnings.warn( + "Converting to PeriodArray/Index representation " + "will drop timezone information.", + UserWarning, + ) + + if freq is None: + freq = self.freqstr or self.inferred_freq + + if freq is None: + raise ValueError( + "You must pass a freq argument as current index has none." + ) + + freq = get_period_alias(freq) + + return PeriodArray._from_datetime64(self._data, freq, tz=self.tz) + + def to_perioddelta(self, freq): + """ + Calculate TimedeltaArray of difference between index + values and index converted to PeriodArray at specified + freq. Used for vectorized offsets. + + Parameters + ---------- + freq : Period frequency + + Returns + ------- + TimedeltaArray/Index + """ + # TODO: consider privatizing (discussion in GH#23113) + from pandas.core.arrays.timedeltas import TimedeltaArray + + i8delta = self.asi8 - self.to_period(freq).to_timestamp().asi8 + m8delta = i8delta.view("m8[ns]") + return TimedeltaArray(m8delta) + + # ----------------------------------------------------------------- + # Properties - Vectorized Timestamp Properties/Methods + + def month_name(self, locale=None): + """ + Return the month names of the DateTimeIndex with specified locale. + + .. versionadded:: 0.23.0 + + Parameters + ---------- + locale : str, optional + Locale determining the language in which to return the month name. + Default is English locale. + + Returns + ------- + Index + Index of month names. + + Examples + -------- + >>> idx = pd.date_range(start='2018-01', freq='M', periods=3) + >>> idx + DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], + dtype='datetime64[ns]', freq='M') + >>> idx.month_name() + Index(['January', 'February', 'March'], dtype='object') + """ + if self.tz is not None and not timezones.is_utc(self.tz): + values = self._local_timestamps() + else: + values = self.asi8 + + result = fields.get_date_name_field(values, "month_name", locale=locale) + result = self._maybe_mask_results(result, fill_value=None) + return result + + def day_name(self, locale=None): + """ + Return the day names of the DateTimeIndex with specified locale. + + .. versionadded:: 0.23.0 + + Parameters + ---------- + locale : str, optional + Locale determining the language in which to return the day name. + Default is English locale. + + Returns + ------- + Index + Index of day names. + + Examples + -------- + >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3) + >>> idx + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], + dtype='datetime64[ns]', freq='D') + >>> idx.day_name() + Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object') + """ + if self.tz is not None and not timezones.is_utc(self.tz): + values = self._local_timestamps() + else: + values = self.asi8 + + result = fields.get_date_name_field(values, "day_name", locale=locale) + result = self._maybe_mask_results(result, fill_value=None) + return result + + @property + def time(self): + """ + Returns numpy array of datetime.time. The time part of the Timestamps. + """ + # If the Timestamps have a timezone that is not UTC, + # convert them into their i8 representation while + # keeping their timezone and not using UTC + if self.tz is not None and not timezones.is_utc(self.tz): + timestamps = self._local_timestamps() + else: + timestamps = self.asi8 + + return tslib.ints_to_pydatetime(timestamps, box="time") + + @property + def timetz(self): + """ + Returns numpy array of datetime.time also containing timezone + information. The time part of the Timestamps. + """ + return tslib.ints_to_pydatetime(self.asi8, self.tz, box="time") + + @property + def date(self): + """ + Returns numpy array of python datetime.date objects (namely, the date + part of Timestamps without timezone information). + """ + # If the Timestamps have a timezone that is not UTC, + # convert them into their i8 representation while + # keeping their timezone and not using UTC + if self.tz is not None and not timezones.is_utc(self.tz): + timestamps = self._local_timestamps() + else: + timestamps = self.asi8 + + return tslib.ints_to_pydatetime(timestamps, box="date") + + year = _field_accessor( + "year", + "Y", + """ + The year of the datetime. + """, + ) + month = _field_accessor( + "month", + "M", + """ + The month as January=1, December=12. + """, + ) + day = _field_accessor( + "day", + "D", + """ + The month as January=1, December=12. + """, + ) + hour = _field_accessor( + "hour", + "h", + """ + The hours of the datetime. + """, + ) + minute = _field_accessor( + "minute", + "m", + """ + The minutes of the datetime. + """, + ) + second = _field_accessor( + "second", + "s", + """ + The seconds of the datetime. + """, + ) + microsecond = _field_accessor( + "microsecond", + "us", + """ + The microseconds of the datetime. + """, + ) + nanosecond = _field_accessor( + "nanosecond", + "ns", + """ + The nanoseconds of the datetime. + """, + ) + weekofyear = _field_accessor( + "weekofyear", + "woy", + """ + The week ordinal of the year. + """, + ) + week = weekofyear + _dayofweek_doc = """ + The day of the week with Monday=0, Sunday=6. + + Return the day of the week. It is assumed the week starts on + Monday, which is denoted by 0 and ends on Sunday which is denoted + by 6. This method is available on both Series with datetime + values (using the `dt` accessor) or DatetimeIndex. + + Returns + ------- + Series or Index + Containing integers indicating the day number. + + See Also + -------- + Series.dt.dayofweek : Alias. + Series.dt.weekday : Alias. + Series.dt.day_name : Returns the name of the day of the week. + + Examples + -------- + >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series() + >>> s.dt.dayofweek + 2016-12-31 5 + 2017-01-01 6 + 2017-01-02 0 + 2017-01-03 1 + 2017-01-04 2 + 2017-01-05 3 + 2017-01-06 4 + 2017-01-07 5 + 2017-01-08 6 + Freq: D, dtype: int64 + """ + dayofweek = _field_accessor("dayofweek", "dow", _dayofweek_doc) + weekday = dayofweek + + dayofyear = _field_accessor( + "dayofyear", + "doy", + """ + The ordinal day of the year. + """, + ) + quarter = _field_accessor( + "quarter", + "q", + """ + The quarter of the date. + """, + ) + days_in_month = _field_accessor( + "days_in_month", + "dim", + """ + The number of days in the month. + """, + ) + daysinmonth = days_in_month + _is_month_doc = """ + Indicates whether the date is the {first_or_last} day of the month. + + Returns + ------- + Series or array + For Series, returns a Series with boolean values. + For DatetimeIndex, returns a boolean array. + + See Also + -------- + is_month_start : Return a boolean indicating whether the date + is the first day of the month. + is_month_end : Return a boolean indicating whether the date + is the last day of the month. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> s = pd.Series(pd.date_range("2018-02-27", periods=3)) + >>> s + 0 2018-02-27 + 1 2018-02-28 + 2 2018-03-01 + dtype: datetime64[ns] + >>> s.dt.is_month_start + 0 False + 1 False + 2 True + dtype: bool + >>> s.dt.is_month_end + 0 False + 1 True + 2 False + dtype: bool + + >>> idx = pd.date_range("2018-02-27", periods=3) + >>> idx.is_month_start + array([False, False, True]) + >>> idx.is_month_end + array([False, True, False]) + """ + is_month_start = _field_accessor( + "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first") + ) + + is_month_end = _field_accessor( + "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last") + ) + + is_quarter_start = _field_accessor( + "is_quarter_start", + "is_quarter_start", + """ + Indicator for whether the date is the first day of a quarter. + + Returns + ------- + is_quarter_start : Series or DatetimeIndex + The same type as the original data with boolean values. Series will + have the same name and index. DatetimeIndex will have the same + name. + + See Also + -------- + quarter : Return the quarter of the date. + is_quarter_end : Similar property for indicating the quarter start. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30", + ... periods=4)}) + >>> df.assign(quarter=df.dates.dt.quarter, + ... is_quarter_start=df.dates.dt.is_quarter_start) + dates quarter is_quarter_start + 0 2017-03-30 1 False + 1 2017-03-31 1 False + 2 2017-04-01 2 True + 3 2017-04-02 2 False + + >>> idx = pd.date_range('2017-03-30', periods=4) + >>> idx + DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'], + dtype='datetime64[ns]', freq='D') + + >>> idx.is_quarter_start + array([False, False, True, False]) + """, + ) + is_quarter_end = _field_accessor( + "is_quarter_end", + "is_quarter_end", + """ + Indicator for whether the date is the last day of a quarter. + + Returns + ------- + is_quarter_end : Series or DatetimeIndex + The same type as the original data with boolean values. Series will + have the same name and index. DatetimeIndex will have the same + name. + + See Also + -------- + quarter : Return the quarter of the date. + is_quarter_start : Similar property indicating the quarter start. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30", + ... periods=4)}) + >>> df.assign(quarter=df.dates.dt.quarter, + ... is_quarter_end=df.dates.dt.is_quarter_end) + dates quarter is_quarter_end + 0 2017-03-30 1 False + 1 2017-03-31 1 True + 2 2017-04-01 2 False + 3 2017-04-02 2 False + + >>> idx = pd.date_range('2017-03-30', periods=4) + >>> idx + DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'], + dtype='datetime64[ns]', freq='D') + + >>> idx.is_quarter_end + array([False, True, False, False]) + """, + ) + is_year_start = _field_accessor( + "is_year_start", + "is_year_start", + """ + Indicate whether the date is the first day of a year. + + Returns + ------- + Series or DatetimeIndex + The same type as the original data with boolean values. Series will + have the same name and index. DatetimeIndex will have the same + name. + + See Also + -------- + is_year_end : Similar property indicating the last day of the year. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3)) + >>> dates + 0 2017-12-30 + 1 2017-12-31 + 2 2018-01-01 + dtype: datetime64[ns] + + >>> dates.dt.is_year_start + 0 False + 1 False + 2 True + dtype: bool + + >>> idx = pd.date_range("2017-12-30", periods=3) + >>> idx + DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'], + dtype='datetime64[ns]', freq='D') + + >>> idx.is_year_start + array([False, False, True]) + """, + ) + is_year_end = _field_accessor( + "is_year_end", + "is_year_end", + """ + Indicate whether the date is the last day of the year. + + Returns + ------- + Series or DatetimeIndex + The same type as the original data with boolean values. Series will + have the same name and index. DatetimeIndex will have the same + name. + + See Also + -------- + is_year_start : Similar property indicating the start of the year. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3)) + >>> dates + 0 2017-12-30 + 1 2017-12-31 + 2 2018-01-01 + dtype: datetime64[ns] + + >>> dates.dt.is_year_end + 0 False + 1 True + 2 False + dtype: bool + + >>> idx = pd.date_range("2017-12-30", periods=3) + >>> idx + DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'], + dtype='datetime64[ns]', freq='D') + + >>> idx.is_year_end + array([False, True, False]) + """, + ) + is_leap_year = _field_accessor( + "is_leap_year", + "is_leap_year", + """ + Boolean indicator if the date belongs to a leap year. + + A leap year is a year, which has 366 days (instead of 365) including + 29th of February as an intercalary day. + Leap years are years which are multiples of four with the exception + of years divisible by 100 but not by 400. + + Returns + ------- + Series or ndarray + Booleans indicating if dates belong to a leap year. + + Examples + -------- + This method is available on Series with datetime values under + the ``.dt`` accessor, and directly on DatetimeIndex. + + >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="Y") + >>> idx + DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'], + dtype='datetime64[ns]', freq='A-DEC') + >>> idx.is_leap_year + array([ True, False, False], dtype=bool) + + >>> dates = pd.Series(idx) + >>> dates_series + 0 2012-12-31 + 1 2013-12-31 + 2 2014-12-31 + dtype: datetime64[ns] + >>> dates_series.dt.is_leap_year + 0 True + 1 False + 2 False + dtype: bool + """, + ) + + def to_julian_date(self): + """ + Convert Datetime Array to float64 ndarray of Julian Dates. + 0 Julian date is noon January 1, 4713 BC. + http://en.wikipedia.org/wiki/Julian_day + """ + + # http://mysite.verizon.net/aesir_research/date/jdalg2.htm + year = np.asarray(self.year) + month = np.asarray(self.month) + day = np.asarray(self.day) + testarr = month < 3 + year[testarr] -= 1 + month[testarr] += 12 + return ( + day + + np.fix((153 * month - 457) / 5) + + 365 * year + + np.floor(year / 4) + - np.floor(year / 100) + + np.floor(year / 400) + + 1_721_118.5 + + ( + self.hour + + self.minute / 60.0 + + self.second / 3600.0 + + self.microsecond / 3600.0 / 1e6 + + self.nanosecond / 3600.0 / 1e9 + ) + / 24.0 + ) + + +# ------------------------------------------------------------------- +# Constructor Helpers + + +def sequence_to_dt64ns( + data, + dtype=None, + copy=False, + tz=None, + dayfirst=False, + yearfirst=False, + ambiguous="raise", +): + """ + Parameters + ---------- + data : list-like + dtype : dtype, str, or None, default None + copy : bool, default False + tz : tzinfo, str, or None, default None + dayfirst : bool, default False + yearfirst : bool, default False + ambiguous : str, bool, or arraylike, default 'raise' + See pandas._libs.tslibs.conversion.tz_localize_to_utc. + + Returns + ------- + result : numpy.ndarray + The sequence converted to a numpy array with dtype ``datetime64[ns]``. + tz : tzinfo or None + Either the user-provided tzinfo or one inferred from the data. + inferred_freq : Tick or None + The inferred frequency of the sequence. + + Raises + ------ + TypeError : PeriodDType data is passed + """ + + inferred_freq = None + + dtype = _validate_dt64_dtype(dtype) + + if not hasattr(data, "dtype"): + # e.g. list, tuple + if np.ndim(data) == 0: + # i.e. generator + data = list(data) + data = np.asarray(data) + copy = False + elif isinstance(data, ABCSeries): + data = data._values + if isinstance(data, ABCPandasArray): + data = data.to_numpy() + + if hasattr(data, "freq"): + # i.e. DatetimeArray/Index + inferred_freq = data.freq + + # if dtype has an embedded tz, capture it + tz = validate_tz_from_dtype(dtype, tz) + + if isinstance(data, ABCIndexClass): + if data.nlevels > 1: + # Without this check, data._data below is None + raise TypeError("Cannot create a DatetimeArray from a MultiIndex.") + data = data._data + + # By this point we are assured to have either a numpy array or Index + data, copy = maybe_convert_dtype(data, copy) + + if is_object_dtype(data) or is_string_dtype(data): + # TODO: We do not have tests specific to string-dtypes, + # also complex or categorical or other extension + copy = False + if lib.infer_dtype(data, skipna=False) == "integer": + data = data.astype(np.int64) + else: + # data comes back here as either i8 to denote UTC timestamps + # or M8[ns] to denote wall times + data, inferred_tz = objects_to_datetime64ns( + data, dayfirst=dayfirst, yearfirst=yearfirst + ) + tz = maybe_infer_tz(tz, inferred_tz) + + # `data` may have originally been a Categorical[datetime64[ns, tz]], + # so we need to handle these types. + if is_datetime64tz_dtype(data): + # DatetimeArray -> ndarray + tz = maybe_infer_tz(tz, data.tz) + result = data._data + + elif is_datetime64_dtype(data): + # tz-naive DatetimeArray or ndarray[datetime64] + data = getattr(data, "_data", data) + if data.dtype != _NS_DTYPE: + data = conversion.ensure_datetime64ns(data) + + if tz is not None: + # Convert tz-naive to UTC + tz = timezones.maybe_get_tz(tz) + data = conversion.tz_localize_to_utc( + data.view("i8"), tz, ambiguous=ambiguous + ) + data = data.view(_NS_DTYPE) + + assert data.dtype == _NS_DTYPE, data.dtype + result = data + + else: + # must be integer dtype otherwise + # assume this data are epoch timestamps + if tz: + tz = timezones.maybe_get_tz(tz) + + if data.dtype != _INT64_DTYPE: + data = data.astype(np.int64, copy=False) + result = data.view(_NS_DTYPE) + + if copy: + # TODO: should this be deepcopy? + result = result.copy() + + assert isinstance(result, np.ndarray), type(result) + assert result.dtype == "M8[ns]", result.dtype + + # We have to call this again after possibly inferring a tz above + validate_tz_from_dtype(dtype, tz) + + return result, tz, inferred_freq + + +def objects_to_datetime64ns( + data, + dayfirst, + yearfirst, + utc=False, + errors="raise", + require_iso8601=False, + allow_object=False, +): + """ + Convert data to array of timestamps. + + Parameters + ---------- + data : np.ndarray[object] + dayfirst : bool + yearfirst : bool + utc : bool, default False + Whether to convert timezone-aware timestamps to UTC. + errors : {'raise', 'ignore', 'coerce'} + allow_object : bool + Whether to return an object-dtype ndarray instead of raising if the + data contains more than one timezone. + + Returns + ------- + result : ndarray + np.int64 dtype if returned values represent UTC timestamps + np.datetime64[ns] if returned values represent wall times + object if mixed timezones + inferred_tz : tzinfo or None + + Raises + ------ + ValueError : if data cannot be converted to datetimes + """ + assert errors in ["raise", "ignore", "coerce"] + + # if str-dtype, convert + data = np.array(data, copy=False, dtype=np.object_) + + try: + result, tz_parsed = tslib.array_to_datetime( + data, + errors=errors, + utc=utc, + dayfirst=dayfirst, + yearfirst=yearfirst, + require_iso8601=require_iso8601, + ) + except ValueError as e: + try: + values, tz_parsed = conversion.datetime_to_datetime64(data) + # If tzaware, these values represent unix timestamps, so we + # return them as i8 to distinguish from wall times + return values.view("i8"), tz_parsed + except (ValueError, TypeError): + raise e + + if tz_parsed is not None: + # We can take a shortcut since the datetime64 numpy array + # is in UTC + # Return i8 values to denote unix timestamps + return result.view("i8"), tz_parsed + elif is_datetime64_dtype(result): + # returning M8[ns] denotes wall-times; since tz is None + # the distinction is a thin one + return result, tz_parsed + elif is_object_dtype(result): + # GH#23675 when called via `pd.to_datetime`, returning an object-dtype + # array is allowed. When called via `pd.DatetimeIndex`, we can + # only accept datetime64 dtype, so raise TypeError if object-dtype + # is returned, as that indicates the values can be recognized as + # datetimes but they have conflicting timezones/awareness + if allow_object: + return result, tz_parsed + raise TypeError(result) + else: # pragma: no cover + # GH#23675 this TypeError should never be hit, whereas the TypeError + # in the object-dtype branch above is reachable. + raise TypeError(result) + + +def maybe_convert_dtype(data, copy): + """ + Convert data based on dtype conventions, issuing deprecation warnings + or errors where appropriate. + + Parameters + ---------- + data : np.ndarray or pd.Index + copy : bool + + Returns + ------- + data : np.ndarray or pd.Index + copy : bool + + Raises + ------ + TypeError : PeriodDType data is passed + """ + if is_float_dtype(data): + # Note: we must cast to datetime64[ns] here in order to treat these + # as wall-times instead of UTC timestamps. + data = data.astype(_NS_DTYPE) + copy = False + # TODO: deprecate this behavior to instead treat symmetrically + # with integer dtypes. See discussion in GH#23675 + + elif is_timedelta64_dtype(data): + # GH#29794 enforcing deprecation introduced in GH#23539 + raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]") + elif is_period_dtype(data): + # Note: without explicitly raising here, PeriodIndex + # test_setops.test_join_does_not_recur fails + raise TypeError( + "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead" + ) + + elif is_categorical_dtype(data): + # GH#18664 preserve tz in going DTI->Categorical->DTI + # TODO: cases where we need to do another pass through this func, + # e.g. the categories are timedelta64s + data = data.categories.take(data.codes, fill_value=NaT)._values + copy = False + + elif is_extension_array_dtype(data) and not is_datetime64tz_dtype(data): + # Includes categorical + # TODO: We have no tests for these + data = np.array(data, dtype=np.object_) + copy = False + + return data, copy + + +# ------------------------------------------------------------------- +# Validation and Inference + + +def maybe_infer_tz(tz, inferred_tz): + """ + If a timezone is inferred from data, check that it is compatible with + the user-provided timezone, if any. + + Parameters + ---------- + tz : tzinfo or None + inferred_tz : tzinfo or None + + Returns + ------- + tz : tzinfo or None + + Raises + ------ + TypeError : if both timezones are present but do not match + """ + if tz is None: + tz = inferred_tz + elif inferred_tz is None: + pass + elif not timezones.tz_compare(tz, inferred_tz): + raise TypeError( + f"data is already tz-aware {inferred_tz}, unable to " + f"set specified tz: {tz}" + ) + return tz + + +def _validate_dt64_dtype(dtype): + """ + Check that a dtype, if passed, represents either a numpy datetime64[ns] + dtype or a pandas DatetimeTZDtype. + + Parameters + ---------- + dtype : object + + Returns + ------- + dtype : None, numpy.dtype, or DatetimeTZDtype + + Raises + ------ + ValueError : invalid dtype + + Notes + ----- + Unlike validate_tz_from_dtype, this does _not_ allow non-existent + tz errors to go through + """ + if dtype is not None: + dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, np.dtype("M8")): + # no precision, disallowed GH#24806 + msg = ( + "Passing in 'datetime64' dtype with no precision is not allowed. " + "Please pass in 'datetime64[ns]' instead." + ) + raise ValueError(msg) + + if (isinstance(dtype, np.dtype) and dtype != _NS_DTYPE) or not isinstance( + dtype, (np.dtype, DatetimeTZDtype) + ): + raise ValueError( + f"Unexpected value for 'dtype': '{dtype}'. " + "Must be 'datetime64[ns]' or DatetimeTZDtype'." + ) + return dtype + + +def validate_tz_from_dtype(dtype, tz): + """ + If the given dtype is a DatetimeTZDtype, extract the implied + tzinfo object from it and check that it does not conflict with the given + tz. + + Parameters + ---------- + dtype : dtype, str + tz : None, tzinfo + + Returns + ------- + tz : consensus tzinfo + + Raises + ------ + ValueError : on tzinfo mismatch + """ + if dtype is not None: + if isinstance(dtype, str): + try: + dtype = DatetimeTZDtype.construct_from_string(dtype) + except TypeError: + # Things like `datetime64[ns]`, which is OK for the + # constructors, but also nonsense, which should be validated + # but not by us. We *do* allow non-existent tz errors to + # go through + pass + dtz = getattr(dtype, "tz", None) + if dtz is not None: + if tz is not None and not timezones.tz_compare(tz, dtz): + raise ValueError("cannot supply both a tz and a dtype with a tz") + tz = dtz + + if tz is not None and is_datetime64_dtype(dtype): + # We also need to check for the case where the user passed a + # tz-naive dtype (i.e. datetime64[ns]) + if tz is not None and not timezones.tz_compare(tz, dtz): + raise ValueError( + "cannot supply both a tz and a " + "timezone-naive dtype (i.e. datetime64[ns])" + ) + + return tz + + +def _infer_tz_from_endpoints(start, end, tz): + """ + If a timezone is not explicitly given via `tz`, see if one can + be inferred from the `start` and `end` endpoints. If more than one + of these inputs provides a timezone, require that they all agree. + + Parameters + ---------- + start : Timestamp + end : Timestamp + tz : tzinfo or None + + Returns + ------- + tz : tzinfo or None + + Raises + ------ + TypeError : if start and end timezones do not agree + """ + try: + inferred_tz = timezones.infer_tzinfo(start, end) + except AssertionError: + # infer_tzinfo raises AssertionError if passed mismatched timezones + raise TypeError( + "Start and end cannot both be tz-aware with different timezones" + ) + + inferred_tz = timezones.maybe_get_tz(inferred_tz) + tz = timezones.maybe_get_tz(tz) + + if tz is not None and inferred_tz is not None: + if not timezones.tz_compare(inferred_tz, tz): + raise AssertionError("Inferred time zone not equal to passed time zone") + + elif inferred_tz is not None: + tz = inferred_tz + + return tz + + +def _maybe_normalize_endpoints(start, end, normalize): + _normalized = True + + if start is not None: + if normalize: + start = normalize_date(start) + _normalized = True + else: + _normalized = _normalized and start.time() == _midnight + + if end is not None: + if normalize: + end = normalize_date(end) + _normalized = True + else: + _normalized = _normalized and end.time() == _midnight + + return start, end, _normalized + + +def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous, nonexistent): + """ + Localize a start or end Timestamp to the timezone of the corresponding + start or end Timestamp + + Parameters + ---------- + ts : start or end Timestamp to potentially localize + is_none : argument that should be None + is_not_none : argument that should not be None + freq : Tick, DateOffset, or None + tz : str, timezone object or None + ambiguous: str, localization behavior for ambiguous times + nonexistent: str, localization behavior for nonexistent times + + Returns + ------- + ts : Timestamp + """ + # Make sure start and end are timezone localized if: + # 1) freq = a Timedelta-like frequency (Tick) + # 2) freq = None i.e. generating a linspaced range + if is_none is None and is_not_none is not None: + # Note: We can't ambiguous='infer' a singular ambiguous time; however, + # we have historically defaulted ambiguous=False + ambiguous = ambiguous if ambiguous != "infer" else False + localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None} + if isinstance(freq, Tick) or freq is None: + localize_args["tz"] = tz + ts = ts.tz_localize(**localize_args) + return ts diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py new file mode 100644 index 00000000..96fdd8ee --- /dev/null +++ b/pandas/core/arrays/integer.py @@ -0,0 +1,811 @@ +import numbers +from typing import Any, Tuple, Type +import warnings + +import numpy as np + +from pandas._libs import lib, missing as libmissing +from pandas.compat import set_function_name +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.cast import astype_nansafe +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_scalar, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import register_extension_dtype +from pandas.core.dtypes.missing import isna + +from pandas.core import nanops, ops +from pandas.core.indexers import check_array_indexer +from pandas.core.ops import invalid_comparison +from pandas.core.ops.common import unpack_zerodim_and_defer +from pandas.core.tools.numeric import to_numeric + +from .masked import BaseMaskedArray + + +class _IntegerDtype(ExtensionDtype): + """ + An ExtensionDtype to hold a single size & kind of integer dtype. + + These specific implementations are subclasses of the non-public + _IntegerDtype. For example we have Int8Dtype to represent signed int 8s. + + The attributes name & type are set when these subclasses are created. + """ + + name: str + base = None + type: Type + na_value = libmissing.NA + + def __repr__(self) -> str: + sign = "U" if self.is_unsigned_integer else "" + return f"{sign}Int{8 * self.itemsize}Dtype()" + + @cache_readonly + def is_signed_integer(self): + return self.kind == "i" + + @cache_readonly + def is_unsigned_integer(self): + return self.kind == "u" + + @property + def _is_numeric(self): + return True + + @cache_readonly + def numpy_dtype(self): + """ Return an instance of our numpy dtype """ + return np.dtype(self.type) + + @cache_readonly + def kind(self): + return self.numpy_dtype.kind + + @cache_readonly + def itemsize(self): + """ Return the number of bytes in this dtype """ + return self.numpy_dtype.itemsize + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return IntegerArray + + def __from_arrow__(self, array): + """Construct IntegerArray from passed pyarrow Array/ChunkedArray""" + import pyarrow + from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask + + pyarrow_type = pyarrow.from_numpy_dtype(self.type) + if not array.type.equals(pyarrow_type): + array = array.cast(pyarrow_type) + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + # pyarrow.ChunkedArray + chunks = array.chunks + + results = [] + for arr in chunks: + data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type) + int_arr = IntegerArray(data.copy(), ~mask, copy=False) + results.append(int_arr) + + return IntegerArray._concat_same_type(results) + + +def integer_array(values, dtype=None, copy=False): + """ + Infer and return an integer array of the values. + + Parameters + ---------- + values : 1D list-like + dtype : dtype, optional + dtype to coerce + copy : bool, default False + + Returns + ------- + IntegerArray + + Raises + ------ + TypeError if incompatible types + """ + values, mask = coerce_to_array(values, dtype=dtype, copy=copy) + return IntegerArray(values, mask) + + +def safe_cast(values, dtype, copy): + """ + Safely cast the values to the dtype if they + are equivalent, meaning floats must be equivalent to the + ints. + + """ + + try: + return values.astype(dtype, casting="safe", copy=copy) + except TypeError: + + casted = values.astype(dtype, copy=copy) + if (casted == values).all(): + return casted + + raise TypeError( + f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}" + ) + + +def coerce_to_array(values, dtype, mask=None, copy=False): + """ + Coerce the input values array to numpy arrays with a mask + + Parameters + ---------- + values : 1D list-like + dtype : integer dtype + mask : bool 1D array, optional + copy : bool, default False + if True, copy the input + + Returns + ------- + tuple of (values, mask) + """ + # if values is integer numpy array, preserve it's dtype + if dtype is None and hasattr(values, "dtype"): + if is_integer_dtype(values.dtype): + dtype = values.dtype + + if dtype is not None: + if isinstance(dtype, str) and ( + dtype.startswith("Int") or dtype.startswith("UInt") + ): + # Avoid DeprecationWarning from NumPy about np.dtype("Int64") + # https://github.com/numpy/numpy/pull/7476 + dtype = dtype.lower() + + if not issubclass(type(dtype), _IntegerDtype): + try: + dtype = _dtypes[str(np.dtype(dtype))] + except KeyError: + raise ValueError(f"invalid dtype specified {dtype}") + + if isinstance(values, IntegerArray): + values, mask = values._data, values._mask + if dtype is not None: + values = values.astype(dtype.numpy_dtype, copy=False) + + if copy: + values = values.copy() + mask = mask.copy() + return values, mask + + values = np.array(values, copy=copy) + if is_object_dtype(values): + inferred_type = lib.infer_dtype(values, skipna=True) + if inferred_type == "empty": + values = np.empty(len(values)) + values.fill(np.nan) + elif inferred_type not in [ + "floating", + "integer", + "mixed-integer", + "integer-na", + "mixed-integer-float", + ]: + raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype") + + elif is_bool_dtype(values) and is_integer_dtype(dtype): + values = np.array(values, dtype=int, copy=copy) + + elif not (is_integer_dtype(values) or is_float_dtype(values)): + raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype") + + if mask is None: + mask = isna(values) + else: + assert len(mask) == len(values) + + if not values.ndim == 1: + raise TypeError("values must be a 1D list-like") + if not mask.ndim == 1: + raise TypeError("mask must be a 1D list-like") + + # infer dtype if needed + if dtype is None: + dtype = np.dtype("int64") + else: + dtype = dtype.type + + # if we are float, let's make sure that we can + # safely cast + + # we copy as need to coerce here + if mask.any(): + values = values.copy() + values[mask] = 1 + values = safe_cast(values, dtype, copy=False) + else: + values = safe_cast(values, dtype, copy=False) + + return values, mask + + +class IntegerArray(BaseMaskedArray): + """ + Array of integer (optional missing) values. + + .. versionadded:: 0.24.0 + + .. versionchanged:: 1.0.0 + + Now uses :attr:`pandas.NA` as the missing value rather + than :attr:`numpy.nan`. + + .. warning:: + + IntegerArray is currently experimental, and its API or internal + implementation may change without warning. + + We represent an IntegerArray with 2 numpy arrays: + + - data: contains a numpy integer array of the appropriate dtype + - mask: a boolean array holding a mask on the data, True is missing + + To construct an IntegerArray from generic array-like input, use + :func:`pandas.array` with one of the integer dtypes (see examples). + + See :ref:`integer_na` for more. + + Parameters + ---------- + values : numpy.ndarray + A 1-d integer-dtype array. + mask : numpy.ndarray + A 1-d boolean-dtype array indicating missing values. + copy : bool, default False + Whether to copy the `values` and `mask`. + + Attributes + ---------- + None + + Methods + ------- + None + + Returns + ------- + IntegerArray + + Examples + -------- + Create an IntegerArray with :func:`pandas.array`. + + >>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype()) + >>> int_array + + [1, , 3] + Length: 3, dtype: Int32 + + String aliases for the dtypes are also available. They are capitalized. + + >>> pd.array([1, None, 3], dtype='Int32') + + [1, , 3] + Length: 3, dtype: Int32 + + >>> pd.array([1, None, 3], dtype='UInt16') + + [1, , 3] + Length: 3, dtype: UInt16 + """ + + # The value used to fill '_data' to avoid upcasting + _internal_fill_value = 1 + + @cache_readonly + def dtype(self): + return _dtypes[str(self._data.dtype)] + + def __init__(self, values, mask, copy=False): + if not (isinstance(values, np.ndarray) and is_integer_dtype(values.dtype)): + raise TypeError( + "values should be integer numpy array. Use " + "the 'integer_array' function instead" + ) + if not (isinstance(mask, np.ndarray) and is_bool_dtype(mask.dtype)): + raise TypeError( + "mask should be boolean numpy array. Use " + "the 'integer_array' function instead" + ) + + if copy: + values = values.copy() + mask = mask.copy() + + self._data = values + self._mask = mask + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return integer_array(scalars, dtype=dtype, copy=copy) + + @classmethod + def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + scalars = to_numeric(strings, errors="raise") + return cls._from_sequence(scalars, dtype, copy) + + @classmethod + def _from_factorized(cls, values, original): + return integer_array(values, dtype=original.dtype) + + _HANDLED_TYPES = (np.ndarray, numbers.Number) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # For IntegerArray inputs, we apply the ufunc to ._data + # and mask the result. + if method == "reduce": + # Not clear how to handle missing values in reductions. Raise. + raise NotImplementedError("The 'reduce' method is not supported.") + out = kwargs.get("out", ()) + + for x in inputs + out: + if not isinstance(x, self._HANDLED_TYPES + (IntegerArray,)): + return NotImplemented + + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + mask = np.zeros(len(self), dtype=bool) + inputs2 = [] + for x in inputs: + if isinstance(x, IntegerArray): + mask |= x._mask + inputs2.append(x._data) + else: + inputs2.append(x) + + def reconstruct(x): + # we don't worry about scalar `x` here, since we + # raise for reduce up above. + + if is_integer_dtype(x.dtype): + m = mask.copy() + return IntegerArray(x, m) + else: + x[mask] = np.nan + return x + + result = getattr(ufunc, method)(*inputs2, **kwargs) + if isinstance(result, tuple): + tuple(reconstruct(x) for x in result) + else: + return reconstruct(result) + + def __setitem__(self, key, value): + _is_scalar = is_scalar(value) + if _is_scalar: + value = [value] + value, mask = coerce_to_array(value, dtype=self.dtype) + + if _is_scalar: + value = value[0] + mask = mask[0] + + key = check_array_indexer(self, key) + self._data[key] = value + self._mask[key] = mask + + def astype(self, dtype, copy=True): + """ + Cast to a NumPy array or IntegerArray with 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + array : ndarray or IntegerArray + NumPy ndarray or IntergerArray with 'dtype' for its dtype. + + Raises + ------ + TypeError + if incompatible type with an IntegerDtype, equivalent of same_kind + casting + """ + from pandas.core.arrays.boolean import BooleanArray, BooleanDtype + + dtype = pandas_dtype(dtype) + + # if we are astyping to an existing IntegerDtype we can fastpath + if isinstance(dtype, _IntegerDtype): + result = self._data.astype(dtype.numpy_dtype, copy=False) + return type(self)(result, mask=self._mask, copy=False) + elif isinstance(dtype, BooleanDtype): + result = self._data.astype("bool", copy=False) + return BooleanArray(result, mask=self._mask, copy=False) + + # coerce + if is_float_dtype(dtype): + # In astype, we consider dtype=float to also mean na_value=np.nan + kwargs = dict(na_value=np.nan) + else: + kwargs = {} + + data = self.to_numpy(dtype=dtype, **kwargs) + return astype_nansafe(data, dtype, copy=False) + + @property + def _ndarray_values(self) -> np.ndarray: + """Internal pandas method for lossy conversion to a NumPy ndarray. + + This method is not part of the pandas interface. + + The expectation is that this is cheap to compute, and is primarily + used for interacting with our indexers. + """ + return self._data + + def _values_for_factorize(self) -> Tuple[np.ndarray, Any]: + # TODO: https://github.com/pandas-dev/pandas/issues/30037 + # use masked algorithms, rather than object-dtype / np.nan. + return self.to_numpy(na_value=np.nan), np.nan + + def _values_for_argsort(self) -> np.ndarray: + """Return values for sorting. + + Returns + ------- + ndarray + The transformed values should maintain the ordering between values + within the array. + + See Also + -------- + ExtensionArray.argsort + """ + data = self._data.copy() + data[self._mask] = data.min() - 1 + return data + + @classmethod + def _create_comparison_method(cls, op): + op_name = op.__name__ + + @unpack_zerodim_and_defer(op.__name__) + def cmp_method(self, other): + from pandas.arrays import BooleanArray + + mask = None + + if isinstance(other, (BooleanArray, IntegerArray)): + other, mask = other._data, other._mask + + elif is_list_like(other): + other = np.asarray(other) + if other.ndim > 1: + raise NotImplementedError( + "can only perform ops with 1-d structures" + ) + if len(self) != len(other): + raise ValueError("Lengths must match to compare") + + if other is libmissing.NA: + # numpy does not handle pd.NA well as "other" scalar (it returns + # a scalar False instead of an array) + # This may be fixed by NA.__array_ufunc__. Revisit this check + # once that's implemented. + result = np.zeros(self._data.shape, dtype="bool") + mask = np.ones(self._data.shape, dtype="bool") + else: + with warnings.catch_warnings(): + # numpy may show a FutureWarning: + # elementwise comparison failed; returning scalar instead, + # but in the future will perform elementwise comparison + # before returning NotImplemented. We fall back to the correct + # behavior today, so that should be fine to ignore. + warnings.filterwarnings("ignore", "elementwise", FutureWarning) + with np.errstate(all="ignore"): + method = getattr(self._data, f"__{op_name}__") + result = method(other) + + if result is NotImplemented: + result = invalid_comparison(self._data, other, op) + + # nans propagate + if mask is None: + mask = self._mask.copy() + else: + mask = self._mask | mask + + return BooleanArray(result, mask) + + name = f"__{op.__name__}__" + return set_function_name(cmp_method, name, cls) + + def _reduce(self, name, skipna=True, **kwargs): + data = self._data + mask = self._mask + + # coerce to a nan-aware float if needed + # (we explicitly use NaN within reductions) + if self._hasna: + data = self.to_numpy("float64", na_value=np.nan) + + op = getattr(nanops, "nan" + name) + result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs) + + if np.isnan(result): + return libmissing.NA + + # if we have a boolean op, don't coerce + if name in ["any", "all"]: + pass + + # if we have a preservable numeric op, + # provide coercion back to an integer type if possible + elif name in ["sum", "min", "max", "prod"]: + int_result = int(result) + if int_result == result: + result = int_result + + return result + + def _maybe_mask_result(self, result, mask, other, op_name): + """ + Parameters + ---------- + result : array-like + mask : array-like bool + other : scalar or array-like + op_name : str + """ + + # if we have a float operand we are by-definition + # a float result + # or our op is a divide + if (is_float_dtype(other) or is_float(other)) or ( + op_name in ["rtruediv", "truediv"] + ): + result[mask] = np.nan + return result + + return type(self)(result, mask, copy=False) + + @classmethod + def _create_arithmetic_method(cls, op): + op_name = op.__name__ + + @unpack_zerodim_and_defer(op.__name__) + def integer_arithmetic_method(self, other): + + omask = None + + if getattr(other, "ndim", 0) > 1: + raise NotImplementedError("can only perform ops with 1-d structures") + + if isinstance(other, IntegerArray): + other, omask = other._data, other._mask + + elif is_list_like(other): + other = np.asarray(other) + if other.ndim > 1: + raise NotImplementedError( + "can only perform ops with 1-d structures" + ) + if len(self) != len(other): + raise ValueError("Lengths must match") + if not (is_float_dtype(other) or is_integer_dtype(other)): + raise TypeError("can only perform ops with numeric values") + + else: + if not (is_float(other) or is_integer(other) or other is libmissing.NA): + raise TypeError("can only perform ops with numeric values") + + if omask is None: + mask = self._mask.copy() + if other is libmissing.NA: + mask |= True + else: + mask = self._mask | omask + + if op_name == "pow": + # 1 ** x is 1. + mask = np.where((self._data == 1) & ~self._mask, False, mask) + # x ** 0 is 1. + if omask is not None: + mask = np.where((other == 0) & ~omask, False, mask) + elif other is not libmissing.NA: + mask = np.where(other == 0, False, mask) + + elif op_name == "rpow": + # 1 ** x is 1. + if omask is not None: + mask = np.where((other == 1) & ~omask, False, mask) + elif other is not libmissing.NA: + mask = np.where(other == 1, False, mask) + # x ** 0 is 1. + mask = np.where((self._data == 0) & ~self._mask, False, mask) + + if other is libmissing.NA: + result = np.ones_like(self._data) + else: + with np.errstate(all="ignore"): + result = op(self._data, other) + + # divmod returns a tuple + if op_name == "divmod": + div, mod = result + return ( + self._maybe_mask_result(div, mask, other, "floordiv"), + self._maybe_mask_result(mod, mask, other, "mod"), + ) + + return self._maybe_mask_result(result, mask, other, op_name) + + name = f"__{op.__name__}__" + return set_function_name(integer_arithmetic_method, name, cls) + + +IntegerArray._add_arithmetic_ops() +IntegerArray._add_comparison_ops() + + +_dtype_docstring = """ +An ExtensionDtype for {dtype} integer data. + +.. versionchanged:: 1.0.0 + + Now uses :attr:`pandas.NA` as its missing value, + rather than :attr:`numpy.nan`. + +Attributes +---------- +None + +Methods +------- +None +""" + +# create the Dtype +Int8Dtype = register_extension_dtype( + type( + "Int8Dtype", + (_IntegerDtype,), + { + "type": np.int8, + "name": "Int8", + "__doc__": _dtype_docstring.format(dtype="int8"), + }, + ) +) + +Int16Dtype = register_extension_dtype( + type( + "Int16Dtype", + (_IntegerDtype,), + { + "type": np.int16, + "name": "Int16", + "__doc__": _dtype_docstring.format(dtype="int16"), + }, + ) +) + +Int32Dtype = register_extension_dtype( + type( + "Int32Dtype", + (_IntegerDtype,), + { + "type": np.int32, + "name": "Int32", + "__doc__": _dtype_docstring.format(dtype="int32"), + }, + ) +) + +Int64Dtype = register_extension_dtype( + type( + "Int64Dtype", + (_IntegerDtype,), + { + "type": np.int64, + "name": "Int64", + "__doc__": _dtype_docstring.format(dtype="int64"), + }, + ) +) + +UInt8Dtype = register_extension_dtype( + type( + "UInt8Dtype", + (_IntegerDtype,), + { + "type": np.uint8, + "name": "UInt8", + "__doc__": _dtype_docstring.format(dtype="uint8"), + }, + ) +) + +UInt16Dtype = register_extension_dtype( + type( + "UInt16Dtype", + (_IntegerDtype,), + { + "type": np.uint16, + "name": "UInt16", + "__doc__": _dtype_docstring.format(dtype="uint16"), + }, + ) +) + +UInt32Dtype = register_extension_dtype( + type( + "UInt32Dtype", + (_IntegerDtype,), + { + "type": np.uint32, + "name": "UInt32", + "__doc__": _dtype_docstring.format(dtype="uint32"), + }, + ) +) + +UInt64Dtype = register_extension_dtype( + type( + "UInt64Dtype", + (_IntegerDtype,), + { + "type": np.uint64, + "name": "UInt64", + "__doc__": _dtype_docstring.format(dtype="uint64"), + }, + ) +) + +_dtypes = { + "int8": Int8Dtype(), + "int16": Int16Dtype(), + "int32": Int32Dtype(), + "int64": Int64Dtype(), + "uint8": UInt8Dtype(), + "uint16": UInt16Dtype(), + "uint32": UInt32Dtype(), + "uint64": UInt64Dtype(), +} diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py new file mode 100644 index 00000000..cc41ac1d --- /dev/null +++ b/pandas/core/arrays/interval.py @@ -0,0 +1,1361 @@ +from operator import le, lt +import textwrap + +import numpy as np + +from pandas._config import get_option + +from pandas._libs.interval import Interval, IntervalMixin, intervals_to_interval_bounds +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender + +from pandas.core.dtypes.cast import maybe_convert_platform +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64_any_dtype, + is_float_dtype, + is_integer_dtype, + is_interval, + is_interval_dtype, + is_list_like, + is_object_dtype, + is_scalar, + is_string_dtype, + is_timedelta64_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import IntervalDtype +from pandas.core.dtypes.generic import ( + ABCDatetimeIndex, + ABCExtensionArray, + ABCIndexClass, + ABCInterval, + ABCIntervalIndex, + ABCPeriodIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import isna, notna + +from pandas.core.algorithms import take, value_counts +from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs +from pandas.core.arrays.categorical import Categorical +import pandas.core.common as com +from pandas.core.construction import array +from pandas.core.indexers import check_array_indexer +from pandas.core.indexes.base import ensure_index + +_VALID_CLOSED = {"left", "right", "both", "neither"} +_interval_shared_docs = {} + +_shared_docs_kwargs = dict( + klass="IntervalArray", qualname="arrays.IntervalArray", name="" +) + + +_interval_shared_docs[ + "class" +] = """ +%(summary)s + +.. versionadded:: %(versionadded)s + +Parameters +---------- +data : array-like (1-dimensional) + Array-like containing Interval objects from which to build the + %(klass)s. +closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both or + neither. +dtype : dtype or None, default None + If None, dtype will be inferred. + + .. versionadded:: 0.23.0 +copy : bool, default False + Copy the input data. +%(name)s\ +verify_integrity : bool, default True + Verify that the %(klass)s is valid. + +Attributes +---------- +left +right +closed +mid +length +is_empty +is_non_overlapping_monotonic +%(extra_attributes)s\ + +Methods +------- +from_arrays +from_tuples +from_breaks +contains +overlaps +set_closed +to_tuples +%(extra_methods)s\ + +See Also +-------- +Index : The base pandas Index type. +Interval : A bounded slice-like interval; the elements of an %(klass)s. +interval_range : Function to create a fixed frequency IntervalIndex. +cut : Bin values into discrete Intervals. +qcut : Bin values into equal-sized Intervals based on rank or sample quantiles. + +Notes +----- +See the `user guide +`_ +for more. + +%(examples)s\ +""" + + +@Appender( + _interval_shared_docs["class"] + % dict( + klass="IntervalArray", + summary="Pandas array for interval data that are closed on the same side.", + versionadded="0.24.0", + name="", + extra_attributes="", + extra_methods="", + examples=textwrap.dedent( + """\ + Examples + -------- + A new ``IntervalArray`` can be constructed directly from an array-like of + ``Interval`` objects: + + >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]) + + [(0, 1], (1, 5]] + Length: 2, closed: right, dtype: interval[int64] + + It may also be constructed using one of the constructor + methods: :meth:`IntervalArray.from_arrays`, + :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`. + """ + ), + ) +) +class IntervalArray(IntervalMixin, ExtensionArray): + ndim = 1 + can_hold_na = True + _na_value = _fill_value = np.nan + + def __new__(cls, data, closed=None, dtype=None, copy=False, verify_integrity=True): + + if isinstance(data, ABCSeries) and is_interval_dtype(data): + data = data.values + + if isinstance(data, (cls, ABCIntervalIndex)): + left = data.left + right = data.right + closed = closed or data.closed + else: + + # don't allow scalars + if is_scalar(data): + msg = ( + f"{cls.__name__}(...) must be called with a collection " + f"of some kind, {data} was passed" + ) + raise TypeError(msg) + + # might need to convert empty or purely na data + data = maybe_convert_platform_interval(data) + left, right, infer_closed = intervals_to_interval_bounds( + data, validate_closed=closed is None + ) + closed = closed or infer_closed + + return cls._simple_new( + left, + right, + closed, + copy=copy, + dtype=dtype, + verify_integrity=verify_integrity, + ) + + @classmethod + def _simple_new( + cls, left, right, closed=None, copy=False, dtype=None, verify_integrity=True + ): + result = IntervalMixin.__new__(cls) + + closed = closed or "right" + left = ensure_index(left, copy=copy) + right = ensure_index(right, copy=copy) + + if dtype is not None: + # GH 19262: dtype must be an IntervalDtype to override inferred + dtype = pandas_dtype(dtype) + if not is_interval_dtype(dtype): + msg = f"dtype must be an IntervalDtype, got {dtype}" + raise TypeError(msg) + elif dtype.subtype is not None: + left = left.astype(dtype.subtype) + right = right.astype(dtype.subtype) + + # coerce dtypes to match if needed + if is_float_dtype(left) and is_integer_dtype(right): + right = right.astype(left.dtype) + elif is_float_dtype(right) and is_integer_dtype(left): + left = left.astype(right.dtype) + + if type(left) != type(right): + msg = ( + f"must not have differing left [{type(left).__name__}] and " + f"right [{type(right).__name__}] types" + ) + raise ValueError(msg) + elif is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype): + # GH 19016 + msg = ( + "category, object, and string subtypes are not supported " + "for IntervalArray" + ) + raise TypeError(msg) + elif isinstance(left, ABCPeriodIndex): + msg = "Period dtypes are not supported, use a PeriodIndex instead" + raise ValueError(msg) + elif isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz): + msg = ( + "left and right must have the same time zone, got " + f"'{left.tz}' and '{right.tz}'" + ) + raise ValueError(msg) + + result._left = left + result._right = right + result._closed = closed + if verify_integrity: + result._validate() + return result + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls(scalars, dtype=dtype, copy=copy) + + @classmethod + def _from_factorized(cls, values, original): + if len(values) == 0: + # An empty array returns object-dtype here. We can't create + # a new IA from an (empty) object-dtype array, so turn it into the + # correct dtype. + values = values.astype(original.dtype.subtype) + return cls(values, closed=original.closed) + + _interval_shared_docs["from_breaks"] = textwrap.dedent( + """ + Construct an %(klass)s from an array of splits. + + Parameters + ---------- + breaks : array-like (1-dimensional) + Left and right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + copy : bool, default False + Copy the data. + dtype : dtype or None, default None + If None, dtype will be inferred. + + .. versionadded:: 0.23.0 + + Returns + ------- + %(klass)s + + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + %(klass)s.from_arrays : Construct from a left and right array. + %(klass)s.from_tuples : Construct from a sequence of tuples. + + %(examples)s\ + """ + ) + + @classmethod + @Appender( + _interval_shared_docs["from_breaks"] + % dict( + klass="IntervalArray", + examples=textwrap.dedent( + """\ + Examples + -------- + >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3]) + + [(0, 1], (1, 2], (2, 3]] + Length: 3, closed: right, dtype: interval[int64] + """ + ), + ) + ) + def from_breaks(cls, breaks, closed="right", copy=False, dtype=None): + breaks = maybe_convert_platform_interval(breaks) + + return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype) + + _interval_shared_docs["from_arrays"] = textwrap.dedent( + """ + Construct from two arrays defining the left and right bounds. + + Parameters + ---------- + left : array-like (1-dimensional) + Left bounds for each interval. + right : array-like (1-dimensional) + Right bounds for each interval. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + copy : bool, default False + Copy the data. + dtype : dtype, optional + If None, dtype will be inferred. + + .. versionadded:: 0.23.0 + + Returns + ------- + %(klass)s + + Raises + ------ + ValueError + When a value is missing in only one of `left` or `right`. + When a value in `left` is greater than the corresponding value + in `right`. + + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + %(klass)s.from_breaks : Construct an %(klass)s from an array of + splits. + %(klass)s.from_tuples : Construct an %(klass)s from an + array-like of tuples. + + Notes + ----- + Each element of `left` must be less than or equal to the `right` + element at the same position. If an element is missing, it must be + missing in both `left` and `right`. A TypeError is raised when + using an unsupported type for `left` or `right`. At the moment, + 'category', 'object', and 'string' subtypes are not supported. + + %(examples)s\ + """ + ) + + @classmethod + @Appender( + _interval_shared_docs["from_arrays"] + % dict( + klass="IntervalArray", + examples=textwrap.dedent( + """\ + >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3]) + + [(0, 1], (1, 2], (2, 3]] + Length: 3, closed: right, dtype: interval[int64] + """ + ), + ) + ) + def from_arrays(cls, left, right, closed="right", copy=False, dtype=None): + left = maybe_convert_platform_interval(left) + right = maybe_convert_platform_interval(right) + + return cls._simple_new( + left, right, closed, copy=copy, dtype=dtype, verify_integrity=True + ) + + _interval_shared_docs["from_tuples"] = textwrap.dedent( + """ + Construct an %(klass)s from an array-like of tuples. + + Parameters + ---------- + data : array-like (1-dimensional) + Array of tuples. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + copy : bool, default False + By-default copy the data, this is compat only and ignored. + dtype : dtype or None, default None + If None, dtype will be inferred. + + .. versionadded:: 0.23.0 + + Returns + ------- + %(klass)s + + See Also + -------- + interval_range : Function to create a fixed frequency IntervalIndex. + %(klass)s.from_arrays : Construct an %(klass)s from a left and + right array. + %(klass)s.from_breaks : Construct an %(klass)s from an array of + splits. + + %(examples)s\ + """ + ) + + @classmethod + @Appender( + _interval_shared_docs["from_tuples"] + % dict( + klass="IntervalArray", + examples=textwrap.dedent( + """\ + Examples + -------- + >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)]) + + [(0, 1], (1, 2]] + Length: 2, closed: right, dtype: interval[int64] + """ + ), + ) + ) + def from_tuples(cls, data, closed="right", copy=False, dtype=None): + if len(data): + left, right = [], [] + else: + # ensure that empty data keeps input dtype + left = right = data + + for d in data: + if isna(d): + lhs = rhs = np.nan + else: + name = cls.__name__ + try: + # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...] + lhs, rhs = d + except ValueError: + msg = f"{name}.from_tuples requires tuples of length 2, got {d}" + raise ValueError(msg) + except TypeError: + msg = f"{name}.from_tuples received an invalid item, {d}" + raise TypeError(msg) + left.append(lhs) + right.append(rhs) + + return cls.from_arrays(left, right, closed, copy=False, dtype=dtype) + + def _validate(self): + """Verify that the IntervalArray is valid. + + Checks that + + * closed is valid + * left and right match lengths + * left and right have the same missing values + * left is always below right + """ + if self.closed not in _VALID_CLOSED: + msg = f"invalid option for 'closed': {self.closed}" + raise ValueError(msg) + if len(self.left) != len(self.right): + msg = "left and right must have the same length" + raise ValueError(msg) + left_mask = notna(self.left) + right_mask = notna(self.right) + if not (left_mask == right_mask).all(): + msg = ( + "missing values must be missing in the same " + "location both left and right sides" + ) + raise ValueError(msg) + if not (self.left[left_mask] <= self.right[left_mask]).all(): + msg = "left side of interval must be <= right side" + raise ValueError(msg) + + # --------- + # Interface + # --------- + def __iter__(self): + return iter(np.asarray(self)) + + def __len__(self) -> int: + return len(self.left) + + def __getitem__(self, value): + value = check_array_indexer(self, value) + left = self.left[value] + right = self.right[value] + + # scalar + if not isinstance(left, ABCIndexClass): + if is_scalar(left) and isna(left): + return self._fill_value + if np.ndim(left) > 1: + # GH#30588 multi-dimensional indexer disallowed + raise ValueError("multi-dimensional indexing not allowed") + return Interval(left, right, self.closed) + + return self._shallow_copy(left, right) + + def __setitem__(self, key, value): + # na value: need special casing to set directly on numpy arrays + needs_float_conversion = False + if is_scalar(value) and isna(value): + if is_integer_dtype(self.dtype.subtype): + # can't set NaN on a numpy integer array + needs_float_conversion = True + elif is_datetime64_any_dtype(self.dtype.subtype): + # need proper NaT to set directly on the numpy array + value = np.datetime64("NaT") + elif is_timedelta64_dtype(self.dtype.subtype): + # need proper NaT to set directly on the numpy array + value = np.timedelta64("NaT") + value_left, value_right = value, value + + # scalar interval + elif is_interval_dtype(value) or isinstance(value, ABCInterval): + self._check_closed_matches(value, name="value") + value_left, value_right = value.left, value.right + + else: + # list-like of intervals + try: + array = IntervalArray(value) + value_left, value_right = array.left, array.right + except TypeError: + # wrong type: not interval or NA + msg = f"'value' should be an interval type, got {type(value)} instead." + raise TypeError(msg) + + key = check_array_indexer(self, key) + # Need to ensure that left and right are updated atomically, so we're + # forced to copy, update the copy, and swap in the new values. + left = self.left.copy(deep=True) + if needs_float_conversion: + left = left.astype("float") + left.values[key] = value_left + self._left = left + + right = self.right.copy(deep=True) + if needs_float_conversion: + right = right.astype("float") + right.values[key] = value_right + self._right = right + + def __eq__(self, other): + # ensure pandas array for list-like and eliminate non-interval scalars + if is_list_like(other): + if len(self) != len(other): + raise ValueError("Lengths must match to compare") + other = array(other) + elif not isinstance(other, Interval): + # non-interval scalar -> no matches + return np.zeros(len(self), dtype=bool) + + # determine the dtype of the elements we want to compare + if isinstance(other, Interval): + other_dtype = "interval" + elif not is_categorical_dtype(other): + other_dtype = other.dtype + else: + # for categorical defer to categories for dtype + other_dtype = other.categories.dtype + + # extract intervals if we have interval categories with matching closed + if is_interval_dtype(other_dtype): + if self.closed != other.categories.closed: + return np.zeros(len(self), dtype=bool) + other = other.categories.take(other.codes) + + # interval-like -> need same closed and matching endpoints + if is_interval_dtype(other_dtype): + if self.closed != other.closed: + return np.zeros(len(self), dtype=bool) + return (self.left == other.left) & (self.right == other.right) + + # non-interval/non-object dtype -> no matches + if not is_object_dtype(other_dtype): + return np.zeros(len(self), dtype=bool) + + # object dtype -> iteratively check for intervals + result = np.zeros(len(self), dtype=bool) + for i, obj in enumerate(other): + # need object to be an Interval with same closed and endpoints + if ( + isinstance(obj, Interval) + and self.closed == obj.closed + and self.left[i] == obj.left + and self.right[i] == obj.right + ): + result[i] = True + + return result + + def __ne__(self, other): + return ~self.__eq__(other) + + def fillna(self, value=None, method=None, limit=None): + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + value : scalar, dict, Series + If a scalar value is passed it is used to fill all missing values. + Alternatively, a Series or dict can be used to fill in different + values for each index. The value should not be a list. The + value(s) passed should be either Interval objects or NA/NaN. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + (Not implemented yet for IntervalArray) + Method to use for filling holes in reindexed Series + limit : int, default None + (Not implemented yet for IntervalArray) + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. + + Returns + ------- + filled : IntervalArray with NA/NaN filled + """ + if method is not None: + raise TypeError("Filling by method is not supported for IntervalArray.") + if limit is not None: + raise TypeError("limit is not supported for IntervalArray.") + + if not isinstance(value, ABCInterval): + msg = ( + "'IntervalArray.fillna' only supports filling with a " + f"scalar 'pandas.Interval'. Got a '{type(value).__name__}' instead." + ) + raise TypeError(msg) + + value = getattr(value, "_values", value) + self._check_closed_matches(value, name="value") + + left = self.left.fillna(value=value.left) + right = self.right.fillna(value=value.right) + return self._shallow_copy(left, right) + + @property + def dtype(self): + return IntervalDtype(self.left.dtype) + + def astype(self, dtype, copy=True): + """ + Cast to an ExtensionArray or NumPy array with dtype 'dtype'. + + Parameters + ---------- + dtype : str or dtype + Typecode or data-type to which the array is cast. + + copy : bool, default True + Whether to copy the data, even if not necessary. If False, + a copy is made only if the old dtype does not match the + new dtype. + + Returns + ------- + array : ExtensionArray or ndarray + ExtensionArray or NumPy ndarray with 'dtype' for its dtype. + """ + dtype = pandas_dtype(dtype) + if is_interval_dtype(dtype): + if dtype == self.dtype: + return self.copy() if copy else self + + # need to cast to different subtype + try: + new_left = self.left.astype(dtype.subtype) + new_right = self.right.astype(dtype.subtype) + except TypeError: + msg = ( + f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" + ) + raise TypeError(msg) + return self._shallow_copy(new_left, new_right) + elif is_categorical_dtype(dtype): + return Categorical(np.asarray(self)) + # TODO: This try/except will be repeated. + try: + return np.asarray(self).astype(dtype, copy=copy) + except (TypeError, ValueError): + msg = f"Cannot cast {type(self).__name__} to dtype {dtype}" + raise TypeError(msg) + + @classmethod + def _concat_same_type(cls, to_concat): + """ + Concatenate multiple IntervalArray + + Parameters + ---------- + to_concat : sequence of IntervalArray + + Returns + ------- + IntervalArray + """ + closed = {interval.closed for interval in to_concat} + if len(closed) != 1: + raise ValueError("Intervals must all be closed on the same side.") + closed = closed.pop() + + left = np.concatenate([interval.left for interval in to_concat]) + right = np.concatenate([interval.right for interval in to_concat]) + return cls._simple_new(left, right, closed=closed, copy=False) + + def _shallow_copy(self, left=None, right=None, closed=None): + """ + Return a new IntervalArray with the replacement attributes + + Parameters + ---------- + left : array-like + Values to be used for the left-side of the the intervals. + If None, the existing left and right values will be used. + + right : array-like + Values to be used for the right-side of the the intervals. + If None and left is IntervalArray-like, the left and right + of the IntervalArray-like will be used. + + closed : {'left', 'right', 'both', 'neither'}, optional + Whether the intervals are closed on the left-side, right-side, both + or neither. If None, the existing closed will be used. + """ + if left is None: + + # no values passed + left, right = self.left, self.right + + elif right is None: + + # only single value passed, could be an IntervalArray + # or array of Intervals + if not isinstance(left, (type(self), ABCIntervalIndex)): + left = type(self)(left) + + left, right = left.left, left.right + else: + + # both left and right are values + pass + + closed = closed or self.closed + return self._simple_new(left, right, closed=closed, verify_integrity=False) + + def copy(self): + """ + Return a copy of the array. + + Returns + ------- + IntervalArray + """ + left = self.left.copy(deep=True) + right = self.right.copy(deep=True) + closed = self.closed + # TODO: Could skip verify_integrity here. + return type(self).from_arrays(left, right, closed=closed) + + def isna(self): + return isna(self.left) + + @property + def nbytes(self) -> int: + return self.left.nbytes + self.right.nbytes + + @property + def size(self) -> int: + # Avoid materializing self.values + return self.left.size + + def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArray: + if not len(self) or periods == 0: + return self.copy() + + if isna(fill_value): + fill_value = self.dtype.na_value + + # ExtensionArray.shift doesn't work for two reasons + # 1. IntervalArray.dtype.na_value may not be correct for the dtype. + # 2. IntervalArray._from_sequence only accepts NaN for missing values, + # not other values like NaT + + empty_len = min(abs(periods), len(self)) + if isna(fill_value): + fill_value = self.left._na_value + empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1)) + else: + empty = self._from_sequence([fill_value] * empty_len) + + if periods > 0: + a = empty + b = self[:-periods] + else: + a = self[abs(periods) :] + b = empty + return self._concat_same_type([a, b]) + + def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs): + """ + Take elements from the IntervalArray. + + Parameters + ---------- + indices : sequence of integers + Indices to be taken. + + allow_fill : bool, default False + How to handle negative values in `indices`. + + * False: negative values in `indices` indicate positional indices + from the right (the default). This is similar to + :func:`numpy.take`. + + * True: negative values in `indices` indicate + missing values. These values are set to `fill_value`. Any other + other negative values raise a ``ValueError``. + + fill_value : Interval or NA, optional + Fill value to use for NA-indices when `allow_fill` is True. + This may be ``None``, in which case the default NA value for + the type, ``self.dtype.na_value``, is used. + + For many ExtensionArrays, there will be two representations of + `fill_value`: a user-facing "boxed" scalar, and a low-level + physical NA value. `fill_value` should be the user-facing version, + and the implementation should handle translating that to the + physical version for processing the take if necessary. + + axis : any, default None + Present for compat with IntervalIndex; does nothing. + + Returns + ------- + IntervalArray + + Raises + ------ + IndexError + When the indices are out of bounds for the array. + ValueError + When `indices` contains negative values other than ``-1`` + and `allow_fill` is True. + """ + nv.validate_take(tuple(), kwargs) + + fill_left = fill_right = fill_value + if allow_fill: + if fill_value is None: + fill_left = fill_right = self.left._na_value + elif is_interval(fill_value): + self._check_closed_matches(fill_value, name="fill_value") + fill_left, fill_right = fill_value.left, fill_value.right + elif not is_scalar(fill_value) and notna(fill_value): + msg = ( + "'IntervalArray.fillna' only supports filling with a " + "'scalar pandas.Interval or NA'. " + f"Got a '{type(fill_value).__name__}' instead." + ) + raise ValueError(msg) + + left_take = take( + self.left, indices, allow_fill=allow_fill, fill_value=fill_left + ) + right_take = take( + self.right, indices, allow_fill=allow_fill, fill_value=fill_right + ) + + return self._shallow_copy(left_take, right_take) + + def value_counts(self, dropna=True): + """ + Returns a Series containing counts of each interval. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of NaN. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + """ + # TODO: implement this is a non-naive way! + return value_counts(np.asarray(self), dropna=dropna) + + # Formatting + + def _format_data(self): + + # TODO: integrate with categorical and make generic + # name argument is unused here; just for compat with base / categorical + n = len(self) + max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10) + + formatter = str + + if n == 0: + summary = "[]" + elif n == 1: + first = formatter(self[0]) + summary = f"[{first}]" + elif n == 2: + first = formatter(self[0]) + last = formatter(self[-1]) + summary = f"[{first}, {last}]" + else: + + if n > max_seq_items: + n = min(max_seq_items // 2, 10) + head = [formatter(x) for x in self[:n]] + tail = [formatter(x) for x in self[-n:]] + head_str = ", ".join(head) + tail_str = ", ".join(tail) + summary = f"[{head_str} ... {tail_str}]" + else: + tail = [formatter(x) for x in self] + tail_str = ", ".join(tail) + summary = f"[{tail_str}]" + + return summary + + def __repr__(self) -> str: + # the short repr has no trailing newline, while the truncated + # repr does. So we include a newline in our template, and strip + # any trailing newlines from format_object_summary + data = self._format_data() + class_name = f"<{type(self).__name__}>\n" + + template = ( + f"{class_name}" + f"{data}\n" + f"Length: {len(self)}, closed: {self.closed}, dtype: {self.dtype}" + ) + return template + + def _format_space(self): + space = " " * (len(type(self).__name__) + 1) + return f"\n{space}" + + @property + def left(self): + """ + Return the left endpoints of each Interval in the IntervalArray as + an Index. + """ + return self._left + + @property + def right(self): + """ + Return the right endpoints of each Interval in the IntervalArray as + an Index. + """ + return self._right + + @property + def closed(self): + """ + Whether the intervals are closed on the left-side, right-side, both or + neither. + """ + return self._closed + + _interval_shared_docs["set_closed"] = textwrap.dedent( + """ + Return an %(klass)s identical to the current one, but closed on the + specified side. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + closed : {'left', 'right', 'both', 'neither'} + Whether the intervals are closed on the left-side, right-side, both + or neither. + + Returns + ------- + new_index : %(klass)s + + %(examples)s\ + """ + ) + + @Appender( + _interval_shared_docs["set_closed"] + % dict( + klass="IntervalArray", + examples=textwrap.dedent( + """\ + Examples + -------- + >>> index = pd.arrays.IntervalArray.from_breaks(range(4)) + >>> index + + [(0, 1], (1, 2], (2, 3]] + Length: 3, closed: right, dtype: interval[int64] + >>> index.set_closed('both') + + [[0, 1], [1, 2], [2, 3]] + Length: 3, closed: both, dtype: interval[int64] + """ + ), + ) + ) + def set_closed(self, closed): + if closed not in _VALID_CLOSED: + msg = f"invalid option for 'closed': {closed}" + raise ValueError(msg) + + return self._shallow_copy(closed=closed) + + @property + def length(self): + """ + Return an Index with entries denoting the length of each Interval in + the IntervalArray. + """ + try: + return self.right - self.left + except TypeError: + # length not defined for some types, e.g. string + msg = ( + "IntervalArray contains Intervals without defined length, " + "e.g. Intervals with string endpoints" + ) + raise TypeError(msg) + + @property + def mid(self): + """ + Return the midpoint of each Interval in the IntervalArray as an Index. + """ + try: + return 0.5 * (self.left + self.right) + except TypeError: + # datetime safe version + return self.left + 0.5 * self.length + + _interval_shared_docs[ + "is_non_overlapping_monotonic" + ] = """ + Return True if the %(klass)s is non-overlapping (no Intervals share + points) and is either monotonic increasing or monotonic decreasing, + else False. + """ + # https://github.com/python/mypy/issues/1362 + # Mypy does not support decorated properties + @property # type: ignore + @Appender( + _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs + ) + def is_non_overlapping_monotonic(self): + # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... ) + # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...) + # we already require left <= right + + # strict inequality for closed == 'both'; equality implies overlapping + # at a point when both sides of intervals are included + if self.closed == "both": + return bool( + (self.right[:-1] < self.left[1:]).all() + or (self.left[:-1] > self.right[1:]).all() + ) + + # non-strict inequality when closed != 'both'; at least one side is + # not included in the intervals, so equality does not imply overlapping + return bool( + (self.right[:-1] <= self.left[1:]).all() + or (self.left[:-1] >= self.right[1:]).all() + ) + + # Conversion + def __array__(self, dtype=None) -> np.ndarray: + """ + Return the IntervalArray's data as a numpy array of Interval + objects (with dtype='object') + """ + left = self.left + right = self.right + mask = self.isna() + closed = self._closed + + result = np.empty(len(left), dtype=object) + for i in range(len(left)): + if mask[i]: + result[i] = np.nan + else: + result[i] = Interval(left[i], right[i], closed) + return result + + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow + from pandas.core.arrays._arrow_utils import ArrowIntervalType + + try: + subtype = pyarrow.from_numpy_dtype(self.dtype.subtype) + except TypeError: + raise TypeError( + "Conversion to arrow with subtype '{}' " + "is not supported".format(self.dtype.subtype) + ) + interval_type = ArrowIntervalType(subtype, self.closed) + storage_array = pyarrow.StructArray.from_arrays( + [ + pyarrow.array(self.left, type=subtype, from_pandas=True), + pyarrow.array(self.right, type=subtype, from_pandas=True), + ], + names=["left", "right"], + ) + mask = self.isna() + if mask.any(): + # if there are missing values, set validity bitmap also on the array level + null_bitmap = pyarrow.array(~mask).buffers()[1] + storage_array = pyarrow.StructArray.from_buffers( + storage_array.type, + len(storage_array), + [null_bitmap], + children=[storage_array.field(0), storage_array.field(1)], + ) + + if type is not None: + if type.equals(interval_type.storage_type): + return storage_array + elif isinstance(type, ArrowIntervalType): + # ensure we have the same subtype and closed attributes + if not type.equals(interval_type): + raise TypeError( + "Not supported to convert IntervalArray to type with " + "different 'subtype' ({0} vs {1}) and 'closed' ({2} vs {3}) " + "attributes".format( + self.dtype.subtype, type.subtype, self.closed, type.closed + ) + ) + else: + raise TypeError( + "Not supported to convert IntervalArray to '{0}' type".format(type) + ) + + return pyarrow.ExtensionArray.from_storage(interval_type, storage_array) + + _interval_shared_docs[ + "to_tuples" + ] = """ + Return an %(return_type)s of tuples of the form (left, right). + + Parameters + ---------- + na_tuple : boolean, default True + Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA + value itself if False, ``nan``. + + .. versionadded:: 0.23.0 + + Returns + ------- + tuples: %(return_type)s + %(examples)s\ + """ + + @Appender( + _interval_shared_docs["to_tuples"] % dict(return_type="ndarray", examples="") + ) + def to_tuples(self, na_tuple=True): + tuples = com.asarray_tuplesafe(zip(self.left, self.right)) + if not na_tuple: + # GH 18756 + tuples = np.where(~self.isna(), tuples, np.nan) + return tuples + + @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs) + def repeat(self, repeats, axis=None): + nv.validate_repeat(tuple(), dict(axis=axis)) + left_repeat = self.left.repeat(repeats) + right_repeat = self.right.repeat(repeats) + return self._shallow_copy(left=left_repeat, right=right_repeat) + + _interval_shared_docs["contains"] = textwrap.dedent( + """ + Check elementwise if the Intervals contain the value. + + Return a boolean mask whether the value is contained in the Intervals + of the %(klass)s. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + other : scalar + The value to check whether it is contained in the Intervals. + + Returns + ------- + boolean array + + See Also + -------- + Interval.contains : Check whether Interval object contains value. + %(klass)s.overlaps : Check if an Interval overlaps the values in the + %(klass)s. + + Examples + -------- + %(examples)s + >>> intervals.contains(0.5) + array([ True, False, False]) + """ + ) + + @Appender( + _interval_shared_docs["contains"] + % dict( + klass="IntervalArray", + examples=textwrap.dedent( + """\ + >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)]) + >>> intervals + + [(0, 1], (1, 3], (2, 4]] + Length: 3, closed: right, dtype: interval[int64] + """ + ), + ) + ) + def contains(self, other): + if isinstance(other, Interval): + raise NotImplementedError("contains not implemented for two intervals") + + return (self.left < other if self.open_left else self.left <= other) & ( + other < self.right if self.open_right else other <= self.right + ) + + _interval_shared_docs["overlaps"] = textwrap.dedent( + """ + Check elementwise if an Interval overlaps the values in the %(klass)s. + + Two intervals overlap if they share a common point, including closed + endpoints. Intervals that only have an open endpoint in common do not + overlap. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + other : %(klass)s + Interval to check against for an overlap. + + Returns + ------- + ndarray + Boolean array positionally indicating where an overlap occurs. + + See Also + -------- + Interval.overlaps : Check whether two Interval objects overlap. + + Examples + -------- + %(examples)s + >>> intervals.overlaps(pd.Interval(0.5, 1.5)) + array([ True, True, False]) + + Intervals that share closed endpoints overlap: + + >>> intervals.overlaps(pd.Interval(1, 3, closed='left')) + array([ True, True, True]) + + Intervals that only have an open endpoint in common do not overlap: + + >>> intervals.overlaps(pd.Interval(1, 2, closed='right')) + array([False, True, False]) + """ + ) + + @Appender( + _interval_shared_docs["overlaps"] + % dict( + klass="IntervalArray", + examples=textwrap.dedent( + """\ + >>> data = [(0, 1), (1, 3), (2, 4)] + >>> intervals = pd.arrays.IntervalArray.from_tuples(data) + >>> intervals + + [(0, 1], (1, 3], (2, 4]] + Length: 3, closed: right, dtype: interval[int64] + """ + ), + ) + ) + def overlaps(self, other): + if isinstance(other, (IntervalArray, ABCIntervalIndex)): + raise NotImplementedError + elif not isinstance(other, Interval): + msg = f"`other` must be Interval-like, got {type(other).__name__}" + raise TypeError(msg) + + # equality is okay if both endpoints are closed (overlap at a point) + op1 = le if (self.closed_left and other.closed_right) else lt + op2 = le if (other.closed_left and self.closed_right) else lt + + # overlaps is equivalent negation of two interval being disjoint: + # disjoint = (A.left > B.right) or (B.left > A.right) + # (simplifying the negation allows this to be done in less operations) + return op1(self.left, other.right) & op2(other.left, self.right) + + +def maybe_convert_platform_interval(values): + """ + Try to do platform conversion, with special casing for IntervalArray. + Wrapper around maybe_convert_platform that alters the default return + dtype in certain cases to be compatible with IntervalArray. For example, + empty lists return with integer dtype instead of object dtype, which is + prohibited for IntervalArray. + + Parameters + ---------- + values : array-like + + Returns + ------- + array + """ + if isinstance(values, (list, tuple)) and len(values) == 0: + # GH 19016 + # empty lists/tuples get object dtype by default, but this is + # prohibited for IntervalArray, so coerce to integer instead + return np.array([], dtype=np.int64) + elif is_categorical_dtype(values): + values = np.asarray(values) + + return maybe_convert_platform(values) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py new file mode 100644 index 00000000..80e31712 --- /dev/null +++ b/pandas/core/arrays/masked.py @@ -0,0 +1,251 @@ +from typing import TYPE_CHECKING + +import numpy as np + +from pandas._libs import lib, missing as libmissing + +from pandas.core.dtypes.common import is_integer, is_object_dtype, is_string_dtype +from pandas.core.dtypes.missing import isna, notna + +from pandas.core.algorithms import take +from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin +from pandas.core.indexers import check_array_indexer + +if TYPE_CHECKING: + from pandas._typing import Scalar + + +class BaseMaskedArray(ExtensionArray, ExtensionOpsMixin): + """ + Base class for masked arrays (which use _data and _mask to store the data). + + numpy based + """ + + _data: np.ndarray + _mask: np.ndarray + + # The value used to fill '_data' to avoid upcasting + _internal_fill_value: "Scalar" + + def __getitem__(self, item): + if is_integer(item): + if self._mask[item]: + return self.dtype.na_value + return self._data[item] + + item = check_array_indexer(self, item) + + return type(self)(self._data[item], self._mask[item]) + + def __iter__(self): + for i in range(len(self)): + if self._mask[i]: + yield self.dtype.na_value + else: + yield self._data[i] + + def __len__(self) -> int: + return len(self._data) + + def __invert__(self): + return type(self)(~self._data, self._mask) + + def to_numpy( + self, dtype=None, copy=False, na_value: "Scalar" = lib.no_default, + ): + """ + Convert to a NumPy Array. + + By default converts to an object-dtype NumPy array. Specify the `dtype` and + `na_value` keywords to customize the conversion. + + Parameters + ---------- + dtype : dtype, default object + The numpy dtype to convert to. + copy : bool, default False + Whether to ensure that the returned value is a not a view on + the array. Note that ``copy=False`` does not *ensure* that + ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that + a copy is made, even if not strictly necessary. This is typically + only possible when no missing values are present and `dtype` + is the equivalent numpy dtype. + na_value : scalar, optional + Scalar missing value indicator to use in numpy array. Defaults + to the native missing value indicator of this array (pd.NA). + + Returns + ------- + numpy.ndarray + + Examples + -------- + An object-dtype is the default result + + >>> a = pd.array([True, False, pd.NA], dtype="boolean") + >>> a.to_numpy() + array([True, False, NA], dtype=object) + + When no missing values are present, an equivalent dtype can be used. + + >>> pd.array([True, False], dtype="boolean").to_numpy(dtype="bool") + array([ True, False]) + >>> pd.array([1, 2], dtype="Int64").to_numpy("int64") + array([1, 2]) + + However, requesting such dtype will raise a ValueError if + missing values are present and the default missing value :attr:`NA` + is used. + + >>> a = pd.array([True, False, pd.NA], dtype="boolean") + >>> a + + [True, False, NA] + Length: 3, dtype: boolean + + >>> a.to_numpy(dtype="bool") + Traceback (most recent call last): + ... + ValueError: cannot convert to bool numpy array in presence of missing values + + Specify a valid `na_value` instead + + >>> a.to_numpy(dtype="bool", na_value=False) + array([ True, False, False]) + """ + if na_value is lib.no_default: + na_value = libmissing.NA + if dtype is None: + dtype = object + if self._hasna: + if ( + not (is_object_dtype(dtype) or is_string_dtype(dtype)) + and na_value is libmissing.NA + ): + raise ValueError( + f"cannot convert to '{dtype}'-dtype NumPy array " + "with missing values. Specify an appropriate 'na_value' " + "for this dtype." + ) + # don't pass copy to astype -> always need a copy since we are mutating + data = self._data.astype(dtype) + data[self._mask] = na_value + else: + data = self._data.astype(dtype, copy=copy) + return data + + __array_priority__ = 1000 # higher than ndarray so ops dispatch to us + + def __array__(self, dtype=None) -> np.ndarray: + """ + the array interface, return my values + We return an object array here to preserve our scalar values + """ + return self.to_numpy(dtype=dtype) + + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow as pa + + return pa.array(self._data, mask=self._mask, type=type) + + @property + def _hasna(self) -> bool: + # Note: this is expensive right now! The hope is that we can + # make this faster by having an optional mask, but not have to change + # source code using it.. + return self._mask.any() + + def isna(self): + return self._mask + + @property + def _na_value(self): + return self.dtype.na_value + + @property + def nbytes(self): + return self._data.nbytes + self._mask.nbytes + + @classmethod + def _concat_same_type(cls, to_concat): + data = np.concatenate([x._data for x in to_concat]) + mask = np.concatenate([x._mask for x in to_concat]) + return cls(data, mask) + + def take(self, indexer, allow_fill=False, fill_value=None): + # we always fill with 1 internally + # to avoid upcasting + data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value + result = take( + self._data, indexer, fill_value=data_fill_value, allow_fill=allow_fill + ) + + mask = take(self._mask, indexer, fill_value=True, allow_fill=allow_fill) + + # if we are filling + # we only fill where the indexer is null + # not existing missing values + # TODO(jreback) what if we have a non-na float as a fill value? + if allow_fill and notna(fill_value): + fill_mask = np.asarray(indexer) == -1 + result[fill_mask] = fill_value + mask = mask ^ fill_mask + + return type(self)(result, mask, copy=False) + + def copy(self): + data, mask = self._data, self._mask + data = data.copy() + mask = mask.copy() + return type(self)(data, mask, copy=False) + + def value_counts(self, dropna=True): + """ + Returns a Series containing counts of each unique value. + + Parameters + ---------- + dropna : bool, default True + Don't include counts of missing values. + + Returns + ------- + counts : Series + + See Also + -------- + Series.value_counts + """ + from pandas import Index, Series + from pandas.arrays import IntegerArray + + # compute counts on the data with no nans + data = self._data[~self._mask] + value_counts = Index(data).value_counts() + + # TODO(extension) + # if we have allow Index to hold an ExtensionArray + # this is easier + index = value_counts.index.values.astype(object) + + # if we want nans, count the mask + if dropna: + counts = value_counts.values + else: + counts = np.empty(len(value_counts) + 1, dtype="int64") + counts[:-1] = value_counts + counts[-1] = self._mask.sum() + + index = Index( + np.concatenate([index, np.array([self.dtype.na_value], dtype=object)]), + dtype=object, + ) + + mask = np.zeros(len(counts), dtype="bool") + counts = IntegerArray(counts, mask) + + return Series(counts, index=index) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py new file mode 100644 index 00000000..10248074 --- /dev/null +++ b/pandas/core/arrays/numpy_.py @@ -0,0 +1,462 @@ +import numbers +from typing import Union + +import numpy as np +from numpy.lib.mixins import NDArrayOperatorsMixin + +from pandas._libs import lib +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender +from pandas.util._validators import validate_fillna_kwargs + +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries +from pandas.core.dtypes.inference import is_array_like +from pandas.core.dtypes.missing import isna + +from pandas import compat +from pandas.core import nanops +from pandas.core.algorithms import searchsorted, take, unique +from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin +from pandas.core.construction import extract_array +from pandas.core.indexers import check_array_indexer +from pandas.core.missing import backfill_1d, pad_1d + + +class PandasDtype(ExtensionDtype): + """ + A Pandas ExtensionDtype for NumPy dtypes. + + .. versionadded:: 0.24.0 + + This is mostly for internal compatibility, and is not especially + useful on its own. + + Parameters + ---------- + dtype : numpy.dtype + """ + + _metadata = ("_dtype",) + + def __init__(self, dtype): + dtype = np.dtype(dtype) + self._dtype = dtype + self._type = dtype.type + + def __repr__(self) -> str: + return f"PandasDtype({repr(self.name)})" + + @property + def numpy_dtype(self): + """The NumPy dtype this PandasDtype wraps.""" + return self._dtype + + @property + def name(self): + return self._dtype.name + + @property + def type(self): + return self._type + + @property + def _is_numeric(self): + # exclude object, str, unicode, void. + return self.kind in set("biufc") + + @property + def _is_boolean(self): + return self.kind == "b" + + @classmethod + def construct_from_string(cls, string): + try: + return cls(np.dtype(string)) + except TypeError as err: + raise TypeError( + f"Cannot construct a 'PandasDtype' from '{string}'" + ) from err + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return PandasArray + + @property + def kind(self): + return self._dtype.kind + + @property + def itemsize(self): + """The element size of this data-type object.""" + return self._dtype.itemsize + + +class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin): + """ + A pandas ExtensionArray for NumPy data. + + .. versionadded:: 0.24.0 + + This is mostly for internal compatibility, and is not especially + useful on its own. + + Parameters + ---------- + values : ndarray + The NumPy ndarray to wrap. Must be 1-dimensional. + copy : bool, default False + Whether to copy `values`. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + # If you're wondering why pd.Series(cls) doesn't put the array in an + # ExtensionBlock, search for `ABCPandasArray`. We check for + # that _typ to ensure that that users don't unnecessarily use EAs inside + # pandas internals, which turns off things like block consolidation. + _typ = "npy_extension" + __array_priority__ = 1000 + _ndarray: np.ndarray + + # ------------------------------------------------------------------------ + # Constructors + + def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False): + if isinstance(values, type(self)): + values = values._ndarray + if not isinstance(values, np.ndarray): + raise ValueError( + f"'values' must be a NumPy array, not {type(values).__name__}" + ) + + if values.ndim != 1: + raise ValueError("PandasArray must be 1-dimensional.") + + if copy: + values = values.copy() + + self._ndarray = values + self._dtype = PandasDtype(values.dtype) + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + if isinstance(dtype, PandasDtype): + dtype = dtype._dtype + + result = np.asarray(scalars, dtype=dtype) + if copy and result is scalars: + result = result.copy() + return cls(result) + + @classmethod + def _from_factorized(cls, values, original): + return cls(values) + + @classmethod + def _concat_same_type(cls, to_concat): + return cls(np.concatenate(to_concat)) + + # ------------------------------------------------------------------------ + # Data + + @property + def dtype(self): + return self._dtype + + # ------------------------------------------------------------------------ + # NumPy Array Interface + + def __array__(self, dtype=None) -> np.ndarray: + return np.asarray(self._ndarray, dtype=dtype) + + _HANDLED_TYPES = (np.ndarray, numbers.Number) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # Lightly modified version of + # https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/\ + # numpy.lib.mixins.NDArrayOperatorsMixin.html + # The primary modification is not boxing scalar return values + # in PandasArray, since pandas' ExtensionArrays are 1-d. + out = kwargs.get("out", ()) + for x in inputs + out: + # Only support operations with instances of _HANDLED_TYPES. + # Use PandasArray instead of type(self) for isinstance to + # allow subclasses that don't override __array_ufunc__ to + # handle PandasArray objects. + if not isinstance(x, self._HANDLED_TYPES + (PandasArray,)): + return NotImplemented + + # Defer to the implementation of the ufunc on unwrapped values. + inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs) + if out: + kwargs["out"] = tuple( + x._ndarray if isinstance(x, PandasArray) else x for x in out + ) + result = getattr(ufunc, method)(*inputs, **kwargs) + + if type(result) is tuple and len(result): + # multiple return values + if not lib.is_scalar(result[0]): + # re-box array-like results + return tuple(type(self)(x) for x in result) + else: + # but not scalar reductions + return result + elif method == "at": + # no return value + return None + else: + # one return value + if not lib.is_scalar(result): + # re-box array-like results, but not scalar reductions + result = type(self)(result) + return result + + # ------------------------------------------------------------------------ + # Pandas ExtensionArray Interface + + def __getitem__(self, item): + if isinstance(item, type(self)): + item = item._ndarray + + item = check_array_indexer(self, item) + + result = self._ndarray[item] + if not lib.is_scalar(item): + result = type(self)(result) + return result + + def __setitem__(self, key, value): + value = extract_array(value, extract_numpy=True) + + key = check_array_indexer(self, key) + scalar_value = lib.is_scalar(value) + + if not scalar_value: + value = np.asarray(value, dtype=self._ndarray.dtype) + + self._ndarray[key] = value + + def __len__(self) -> int: + return len(self._ndarray) + + @property + def nbytes(self) -> int: + return self._ndarray.nbytes + + def isna(self): + return isna(self._ndarray) + + def fillna(self, value=None, method=None, limit=None): + # TODO(_values_for_fillna): remove this + value, method = validate_fillna_kwargs(value, method) + + mask = self.isna() + + if is_array_like(value): + if len(value) != len(self): + raise ValueError( + f"Length of 'value' does not match. Got ({len(value)}) " + f" expected {len(self)}" + ) + value = value[mask] + + if mask.any(): + if method is not None: + func = pad_1d if method == "pad" else backfill_1d + new_values = func(self._ndarray, limit=limit, mask=mask) + new_values = self._from_sequence(new_values, dtype=self.dtype) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + new_values = self.copy() + return new_values + + def take(self, indices, allow_fill=False, fill_value=None): + if fill_value is None: + # Primarily for subclasses + fill_value = self.dtype.na_value + result = take( + self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value + ) + return type(self)(result) + + def copy(self): + return type(self)(self._ndarray.copy()) + + def _values_for_argsort(self): + return self._ndarray + + def _values_for_factorize(self): + return self._ndarray, -1 + + def unique(self): + return type(self)(unique(self._ndarray)) + + # ------------------------------------------------------------------------ + # Reductions + + def _reduce(self, name, skipna=True, **kwargs): + meth = getattr(self, name, None) + if meth: + return meth(skipna=skipna, **kwargs) + else: + msg = f"'{type(self).__name__}' does not implement reduction '{name}'" + raise TypeError(msg) + + def any(self, axis=None, out=None, keepdims=False, skipna=True): + nv.validate_any((), dict(out=out, keepdims=keepdims)) + return nanops.nanany(self._ndarray, axis=axis, skipna=skipna) + + def all(self, axis=None, out=None, keepdims=False, skipna=True): + nv.validate_all((), dict(out=out, keepdims=keepdims)) + return nanops.nanall(self._ndarray, axis=axis, skipna=skipna) + + def min(self, axis=None, out=None, keepdims=False, skipna=True): + nv.validate_min((), dict(out=out, keepdims=keepdims)) + return nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) + + def max(self, axis=None, out=None, keepdims=False, skipna=True): + nv.validate_max((), dict(out=out, keepdims=keepdims)) + return nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) + + def sum( + self, + axis=None, + dtype=None, + out=None, + keepdims=False, + initial=None, + skipna=True, + min_count=0, + ): + nv.validate_sum( + (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial) + ) + return nanops.nansum( + self._ndarray, axis=axis, skipna=skipna, min_count=min_count + ) + + def prod( + self, + axis=None, + dtype=None, + out=None, + keepdims=False, + initial=None, + skipna=True, + min_count=0, + ): + nv.validate_prod( + (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial) + ) + return nanops.nanprod( + self._ndarray, axis=axis, skipna=skipna, min_count=min_count + ) + + def mean(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True): + nv.validate_mean((), dict(dtype=dtype, out=out, keepdims=keepdims)) + return nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) + + def median( + self, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True + ): + nv.validate_median( + (), dict(out=out, overwrite_input=overwrite_input, keepdims=keepdims) + ) + return nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) + + def std(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True): + nv.validate_stat_ddof_func( + (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std" + ) + return nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + + def var(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True): + nv.validate_stat_ddof_func( + (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="var" + ) + return nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + + def sem(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True): + nv.validate_stat_ddof_func( + (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="sem" + ) + return nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) + + def kurt(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True): + nv.validate_stat_ddof_func( + (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="kurt" + ) + return nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) + + def skew(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True): + nv.validate_stat_ddof_func( + (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="skew" + ) + return nanops.nanskew(self._ndarray, axis=axis, skipna=skipna) + + # ------------------------------------------------------------------------ + # Additional Methods + def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default): + result = np.asarray(self._ndarray, dtype=dtype) + + if (copy or na_value is not lib.no_default) and result is self._ndarray: + result = result.copy() + + if na_value is not lib.no_default: + result[self.isna()] = na_value + + return result + + @Appender(ExtensionArray.searchsorted.__doc__) + def searchsorted(self, value, side="left", sorter=None): + return searchsorted(self.to_numpy(), value, side=side, sorter=sorter) + + # ------------------------------------------------------------------------ + # Ops + + def __invert__(self): + return type(self)(~self._ndarray) + + @classmethod + def _create_arithmetic_method(cls, op): + def arithmetic_method(self, other): + if isinstance(other, (ABCIndexClass, ABCSeries)): + return NotImplemented + + elif isinstance(other, cls): + other = other._ndarray + + with np.errstate(all="ignore"): + result = op(self._ndarray, other) + + if op is divmod: + a, b = result + return cls(a), cls(b) + + return cls(result) + + return compat.set_function_name(arithmetic_method, f"__{op.__name__}__", cls) + + _create_comparison_method = _create_arithmetic_method + + +PandasArray._add_arithmetic_ops() +PandasArray._add_comparison_ops() diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py new file mode 100644 index 00000000..8b49c218 --- /dev/null +++ b/pandas/core/arrays/period.py @@ -0,0 +1,1054 @@ +from datetime import timedelta +import operator +from typing import Any, Callable, List, Optional, Sequence, Union + +import numpy as np + +from pandas._libs.tslibs import ( + NaT, + NaTType, + frequencies as libfrequencies, + iNaT, + period as libperiod, +) +from pandas._libs.tslibs.fields import isleapyear_arr +from pandas._libs.tslibs.period import ( + DIFFERENT_FREQ, + IncompatibleFrequency, + Period, + get_period_field_arr, + period_asfreq_arr, +) +from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.common import ( + _TD_DTYPE, + ensure_object, + is_datetime64_dtype, + is_float_dtype, + is_period_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.dtypes.generic import ( + ABCIndexClass, + ABCPeriodArray, + ABCPeriodIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import isna, notna + +import pandas.core.algorithms as algos +from pandas.core.arrays import datetimelike as dtl +import pandas.core.common as com + +from pandas.tseries import frequencies +from pandas.tseries.offsets import DateOffset, Tick, _delta_to_tick + + +def _field_accessor(name, alias, docstring=None): + def f(self): + base, mult = libfrequencies.get_freq_code(self.freq) + result = get_period_field_arr(alias, self.asi8, base) + return result + + f.__name__ = name + f.__doc__ = docstring + return property(f) + + +class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps): + """ + Pandas ExtensionArray for storing Period data. + + Users should use :func:`period_array` to create new instances. + + Parameters + ---------- + values : Union[PeriodArray, Series[period], ndarray[int], PeriodIndex] + The data to store. These should be arrays that can be directly + converted to ordinals without inference or copy (PeriodArray, + ndarray[int64]), or a box around such an array (Series[period], + PeriodIndex). + freq : str or DateOffset + The `freq` to use for the array. Mostly applicable when `values` + is an ndarray of integers, when `freq` is required. When `values` + is a PeriodArray (or box around), it's checked that ``values.freq`` + matches `freq`. + dtype : PeriodDtype, optional + A PeriodDtype instance from which to extract a `freq`. If both + `freq` and `dtype` are specified, then the frequencies must match. + copy : bool, default False + Whether to copy the ordinals before storing. + + Attributes + ---------- + None + + Methods + ------- + None + + See Also + -------- + period_array : Create a new PeriodArray. + PeriodIndex : Immutable Index for period data. + + Notes + ----- + There are two components to a PeriodArray + + - ordinals : integer ndarray + - freq : pd.tseries.offsets.Offset + + The values are physically stored as a 1-D ndarray of integers. These are + called "ordinals" and represent some kind of offset from a base. + + The `freq` indicates the span covered by each element of the array. + All elements in the PeriodArray have the same `freq`. + """ + + # array priority higher than numpy scalars + __array_priority__ = 1000 + _typ = "periodarray" # ABCPeriodArray + _scalar_type = Period + _recognized_scalars = (Period,) + _is_recognized_dtype = is_period_dtype + + # Names others delegate to us + _other_ops: List[str] = [] + _bool_ops = ["is_leap_year"] + _object_ops = ["start_time", "end_time", "freq"] + _field_ops = [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "weekofyear", + "weekday", + "week", + "dayofweek", + "dayofyear", + "quarter", + "qyear", + "days_in_month", + "daysinmonth", + ] + _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _datetimelike_methods = ["strftime", "to_timestamp", "asfreq"] + + # -------------------------------------------------------------------- + # Constructors + + def __init__(self, values, freq=None, dtype=None, copy=False): + freq = validate_dtype_freq(dtype, freq) + + if freq is not None: + freq = Period._maybe_convert_freq(freq) + + if isinstance(values, ABCSeries): + values = values._values + if not isinstance(values, type(self)): + raise TypeError("Incorrect dtype") + + elif isinstance(values, ABCPeriodIndex): + values = values._values + + if isinstance(values, type(self)): + if freq is not None and freq != values.freq: + raise raise_on_incompatible(values, freq) + values, freq = values._data, values.freq + + values = np.array(values, dtype="int64", copy=copy) + self._data = values + if freq is None: + raise ValueError("freq is not specified and cannot be inferred") + self._dtype = PeriodDtype(freq) + + @classmethod + def _simple_new(cls, values, freq=None, **kwargs): + # alias for PeriodArray.__init__ + return cls(values, freq=freq, **kwargs) + + @classmethod + def _from_sequence( + cls, + scalars: Sequence[Optional[Period]], + dtype: Optional[PeriodDtype] = None, + copy: bool = False, + ) -> ABCPeriodArray: + if dtype: + freq = dtype.freq + else: + freq = None + + if isinstance(scalars, cls): + validate_dtype_freq(scalars.dtype, freq) + if copy: + scalars = scalars.copy() + return scalars + + periods = np.asarray(scalars, dtype=object) + if copy: + periods = periods.copy() + + freq = freq or libperiod.extract_freq(periods) + ordinals = libperiod.extract_ordinals(periods, freq) + return cls(ordinals, freq=freq) + + @classmethod + def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + return cls._from_sequence(strings, dtype, copy) + + @classmethod + def _from_datetime64(cls, data, freq, tz=None): + """ + Construct a PeriodArray from a datetime64 array + + Parameters + ---------- + data : ndarray[datetime64[ns], datetime64[ns, tz]] + freq : str or Tick + tz : tzinfo, optional + + Returns + ------- + PeriodArray[freq] + """ + data, freq = dt64arr_to_periodarr(data, freq, tz) + return cls(data, freq=freq) + + @classmethod + def _generate_range(cls, start, end, periods, freq, fields): + periods = dtl.validate_periods(periods) + + if freq is not None: + freq = Period._maybe_convert_freq(freq) + + field_count = len(fields) + if start is not None or end is not None: + if field_count > 0: + raise ValueError( + "Can either instantiate from fields or endpoints, but not both" + ) + subarr, freq = _get_ordinal_range(start, end, periods, freq) + elif field_count > 0: + subarr, freq = _range_from_fields(freq=freq, **fields) + else: + raise ValueError("Not enough parameters to construct Period range") + + return subarr, freq + + # ----------------------------------------------------------------- + # DatetimeLike Interface + + def _unbox_scalar(self, value: Union[Period, NaTType]) -> int: + if value is NaT: + return value.value + elif isinstance(value, self._scalar_type): + if not isna(value): + self._check_compatible_with(value) + return value.ordinal + else: + raise ValueError(f"'value' should be a Period. Got '{value}' instead.") + + def _scalar_from_string(self, value: str) -> Period: + return Period(value, freq=self.freq) + + def _check_compatible_with(self, other, setitem: bool = False): + if other is NaT: + return + if self.freqstr != other.freqstr: + raise raise_on_incompatible(self, other) + + # -------------------------------------------------------------------- + # Data / Attributes + + @cache_readonly + def dtype(self): + return self._dtype + + # error: Read-only property cannot override read-write property [misc] + @property # type: ignore + def freq(self): + """ + Return the frequency object for this PeriodArray. + """ + return self.dtype.freq + + def __array__(self, dtype=None) -> np.ndarray: + # overriding DatetimelikeArray + return np.array(list(self), dtype=object) + + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + if type is not None: + if pyarrow.types.is_integer(type): + return pyarrow.array(self._data, mask=self.isna(), type=type) + elif isinstance(type, ArrowPeriodType): + # ensure we have the same freq + if self.freqstr != type.freq: + raise TypeError( + "Not supported to convert PeriodArray to array with different" + " 'freq' ({0} vs {1})".format(self.freqstr, type.freq) + ) + else: + raise TypeError( + "Not supported to convert PeriodArray to '{0}' type".format(type) + ) + + period_type = ArrowPeriodType(self.freqstr) + storage_array = pyarrow.array(self._data, mask=self.isna(), type="int64") + return pyarrow.ExtensionArray.from_storage(period_type, storage_array) + + # -------------------------------------------------------------------- + # Vectorized analogues of Period properties + + year = _field_accessor( + "year", + 0, + """ + The year of the period. + """, + ) + month = _field_accessor( + "month", + 3, + """ + The month as January=1, December=12. + """, + ) + day = _field_accessor( + "day", + 4, + """ + The days of the period. + """, + ) + hour = _field_accessor( + "hour", + 5, + """ + The hour of the period. + """, + ) + minute = _field_accessor( + "minute", + 6, + """ + The minute of the period. + """, + ) + second = _field_accessor( + "second", + 7, + """ + The second of the period. + """, + ) + weekofyear = _field_accessor( + "week", + 8, + """ + The week ordinal of the year. + """, + ) + week = weekofyear + dayofweek = _field_accessor( + "dayofweek", + 10, + """ + The day of the week with Monday=0, Sunday=6. + """, + ) + weekday = dayofweek + dayofyear = day_of_year = _field_accessor( + "dayofyear", + 9, + """ + The ordinal day of the year. + """, + ) + quarter = _field_accessor( + "quarter", + 2, + """ + The quarter of the date. + """, + ) + qyear = _field_accessor("qyear", 1) + days_in_month = _field_accessor( + "days_in_month", + 11, + """ + The number of days in the month. + """, + ) + daysinmonth = days_in_month + + @property + def is_leap_year(self): + """ + Logical indicating if the date belongs to a leap year. + """ + return isleapyear_arr(np.asarray(self.year)) + + @property + def start_time(self): + return self.to_timestamp(how="start") + + @property + def end_time(self): + return self.to_timestamp(how="end") + + def to_timestamp(self, freq=None, how="start"): + """ + Cast to DatetimeArray/Index. + + Parameters + ---------- + freq : str or DateOffset, optional + Target frequency. The default is 'D' for week or longer, + 'S' otherwise. + how : {'s', 'e', 'start', 'end'} + Whether to use the start or end of the time period being converted. + + Returns + ------- + DatetimeArray/Index + """ + from pandas.core.arrays import DatetimeArray + + how = libperiod._validate_end_alias(how) + + end = how == "E" + if end: + if freq == "B": + # roll forward to ensure we land on B date + adjust = Timedelta(1, "D") - Timedelta(1, "ns") + return self.to_timestamp(how="start") + adjust + else: + adjust = Timedelta(1, "ns") + return (self + self.freq).to_timestamp(how="start") - adjust + + if freq is None: + base, mult = libfrequencies.get_freq_code(self.freq) + freq = libfrequencies.get_to_timestamp_base(base) + else: + freq = Period._maybe_convert_freq(freq) + + base, mult = libfrequencies.get_freq_code(freq) + new_data = self.asfreq(freq, how=how) + + new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) + return DatetimeArray._from_sequence(new_data, freq="infer") + + # -------------------------------------------------------------------- + # Array-like / EA-Interface Methods + + def _values_for_argsort(self): + return self._data + + # -------------------------------------------------------------------- + + def _time_shift(self, periods, freq=None): + """ + Shift each value by `periods`. + + Note this is different from ExtensionArray.shift, which + shifts the *position* of each element, padding the end with + missing values. + + Parameters + ---------- + periods : int + Number of periods to shift by. + freq : pandas.DateOffset, pandas.Timedelta, or str + Frequency increment to shift by. + """ + if freq is not None: + raise TypeError( + "`freq` argument is not supported for " + f"{type(self).__name__}._time_shift" + ) + values = self.asi8 + periods * self.freq.n + if self._hasnans: + values[self._isnan] = iNaT + return type(self)(values, freq=self.freq) + + @property + def _box_func(self): + return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) + + def asfreq(self, freq=None, how="E"): + """ + Convert the Period Array/Index to the specified frequency `freq`. + + Parameters + ---------- + freq : str + A frequency. + how : str {'E', 'S'} + Whether the elements should be aligned to the end + or start within pa period. + + * 'E', 'END', or 'FINISH' for end, + * 'S', 'START', or 'BEGIN' for start. + + January 31st ('END') vs. January 1st ('START') for example. + + Returns + ------- + Period Array/Index + Constructed with the new frequency. + + Examples + -------- + >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A') + >>> pidx + PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'], + dtype='period[A-DEC]', freq='A-DEC') + + >>> pidx.asfreq('M') + PeriodIndex(['2010-12', '2011-12', '2012-12', '2013-12', '2014-12', + '2015-12'], dtype='period[M]', freq='M') + + >>> pidx.asfreq('M', how='S') + PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01', + '2015-01'], dtype='period[M]', freq='M') + """ + how = libperiod._validate_end_alias(how) + + freq = Period._maybe_convert_freq(freq) + + base1, mult1 = libfrequencies.get_freq_code(self.freq) + base2, mult2 = libfrequencies.get_freq_code(freq) + + asi8 = self.asi8 + # mult1 can't be negative or 0 + end = how == "E" + if end: + ordinal = asi8 + mult1 - 1 + else: + ordinal = asi8 + + new_data = period_asfreq_arr(ordinal, base1, base2, end) + + if self._hasnans: + new_data[self._isnan] = iNaT + + return type(self)(new_data, freq=freq) + + # ------------------------------------------------------------------ + # Rendering Methods + + def _formatter(self, boxed=False): + if boxed: + return str + return "'{}'".format + + def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): + """ + actually format my specific types + """ + values = self.astype(object) + + if date_format: + formatter = lambda dt: dt.strftime(date_format) + else: + formatter = lambda dt: str(dt) + + if self._hasnans: + mask = self._isnan + values[mask] = na_rep + imask = ~mask + values[imask] = np.array([formatter(dt) for dt in values[imask]]) + else: + values = np.array([formatter(dt) for dt in values]) + return values + + # ------------------------------------------------------------------ + + def astype(self, dtype, copy=True): + # We handle Period[T] -> Period[U] + # Our parent handles everything else. + dtype = pandas_dtype(dtype) + + if is_period_dtype(dtype): + return self.asfreq(dtype.freq) + return super().astype(dtype, copy=copy) + + # ------------------------------------------------------------------ + # Arithmetic Methods + + def _sub_datelike(self, other): + assert other is not NaT + return NotImplemented + + def _sub_period(self, other): + # If the operation is well-defined, we return an object-Index + # of DateOffsets. Null entries are filled with pd.NaT + self._check_compatible_with(other) + asi8 = self.asi8 + new_data = asi8 - other.ordinal + new_data = np.array([self.freq * x for x in new_data]) + + if self._hasnans: + new_data[self._isnan] = NaT + + return new_data + + def _addsub_int_array( + self, other: np.ndarray, op: Callable[[Any, Any], Any], + ) -> "PeriodArray": + """ + Add or subtract array of integers; equivalent to applying + `_time_shift` pointwise. + + Parameters + ---------- + other : np.ndarray[integer-dtype] + op : {operator.add, operator.sub} + + Returns + ------- + result : PeriodArray + """ + + assert op in [operator.add, operator.sub] + if op is operator.sub: + other = -other + res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan) + res_values = res_values.view("i8") + res_values[self._isnan] = iNaT + return type(self)(res_values, freq=self.freq) + + def _add_offset(self, other): + assert not isinstance(other, Tick) + base = libfrequencies.get_base_alias(other.rule_code) + if base != self.freq.rule_code: + raise raise_on_incompatible(self, other) + + # Note: when calling parent class's _add_timedeltalike_scalar, + # it will call delta_to_nanoseconds(delta). Because delta here + # is an integer, delta_to_nanoseconds will return it unchanged. + result = super()._add_timedeltalike_scalar(other.n) + return type(self)(result, freq=self.freq) + + def _add_timedeltalike_scalar(self, other): + """ + Parameters + ---------- + other : timedelta, Tick, np.timedelta64 + + Returns + ------- + result : ndarray[int64] + """ + assert isinstance(self.freq, Tick) # checked by calling function + assert isinstance(other, (timedelta, np.timedelta64, Tick)) + + if notna(other): + # special handling for np.timedelta64("NaT"), avoid calling + # _check_timedeltalike_freq_compat as that would raise TypeError + other = self._check_timedeltalike_freq_compat(other) + + # Note: when calling parent class's _add_timedeltalike_scalar, + # it will call delta_to_nanoseconds(delta). Because delta here + # is an integer, delta_to_nanoseconds will return it unchanged. + ordinals = super()._add_timedeltalike_scalar(other) + return ordinals + + def _add_delta_tdi(self, other): + """ + Parameters + ---------- + other : TimedeltaArray or ndarray[timedelta64] + + Returns + ------- + result : ndarray[int64] + """ + assert isinstance(self.freq, Tick) # checked by calling function + + if not np.all(isna(other)): + delta = self._check_timedeltalike_freq_compat(other) + else: + # all-NaT TimedeltaIndex is equivalent to a single scalar td64 NaT + return self + np.timedelta64("NaT") + + return self._addsub_int_array(delta, operator.add).asi8 + + def _add_delta(self, other): + """ + Add a timedelta-like, Tick, or TimedeltaIndex-like object + to self, yielding a new PeriodArray + + Parameters + ---------- + other : {timedelta, np.timedelta64, Tick, + TimedeltaIndex, ndarray[timedelta64]} + + Returns + ------- + result : PeriodArray + """ + if not isinstance(self.freq, Tick): + # We cannot add timedelta-like to non-tick PeriodArray + raise raise_on_incompatible(self, other) + + new_ordinals = super()._add_delta(other) + return type(self)(new_ordinals, freq=self.freq) + + def _check_timedeltalike_freq_compat(self, other): + """ + Arithmetic operations with timedelta-like scalars or array `other` + are only valid if `other` is an integer multiple of `self.freq`. + If the operation is valid, find that integer multiple. Otherwise, + raise because the operation is invalid. + + Parameters + ---------- + other : timedelta, np.timedelta64, Tick, + ndarray[timedelta64], TimedeltaArray, TimedeltaIndex + + Returns + ------- + multiple : int or ndarray[int64] + + Raises + ------ + IncompatibleFrequency + """ + assert isinstance(self.freq, Tick) # checked by calling function + own_offset = frequencies.to_offset(self.freq.rule_code) + base_nanos = delta_to_nanoseconds(own_offset) + + if isinstance(other, (timedelta, np.timedelta64, Tick)): + nanos = delta_to_nanoseconds(other) + + elif isinstance(other, np.ndarray): + # numpy timedelta64 array; all entries must be compatible + assert other.dtype.kind == "m" + if other.dtype != _TD_DTYPE: + # i.e. non-nano unit + # TODO: disallow unit-less timedelta64 + other = other.astype(_TD_DTYPE) + nanos = other.view("i8") + else: + # TimedeltaArray/Index + nanos = other.asi8 + + if np.all(nanos % base_nanos == 0): + # nanos being added is an integer multiple of the + # base-frequency to self.freq + delta = nanos // base_nanos + # delta is the integer (or integer-array) number of periods + # by which will be added to self. + return delta + + raise raise_on_incompatible(self, other) + + +def raise_on_incompatible(left, right): + """ + Helper function to render a consistent error message when raising + IncompatibleFrequency. + + Parameters + ---------- + left : PeriodArray + right : None, DateOffset, Period, ndarray, or timedelta-like + + Returns + ------- + IncompatibleFrequency + Exception to be raised by the caller. + """ + # GH#24283 error message format depends on whether right is scalar + if isinstance(right, np.ndarray) or right is None: + other_freq = None + elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period, DateOffset)): + other_freq = right.freqstr + else: + other_freq = _delta_to_tick(Timedelta(right)).freqstr + + msg = DIFFERENT_FREQ.format( + cls=type(left).__name__, own_freq=left.freqstr, other_freq=other_freq + ) + return IncompatibleFrequency(msg) + + +# ------------------------------------------------------------------- +# Constructor Helpers + + +def period_array( + data: Sequence[Optional[Period]], + freq: Optional[Union[str, Tick]] = None, + copy: bool = False, +) -> PeriodArray: + """ + Construct a new PeriodArray from a sequence of Period scalars. + + Parameters + ---------- + data : Sequence of Period objects + A sequence of Period objects. These are required to all have + the same ``freq.`` Missing values can be indicated by ``None`` + or ``pandas.NaT``. + freq : str, Tick, or Offset + The frequency of every element of the array. This can be specified + to avoid inferring the `freq` from `data`. + copy : bool, default False + Whether to ensure a copy of the data is made. + + Returns + ------- + PeriodArray + + See Also + -------- + PeriodArray + pandas.PeriodIndex + + Examples + -------- + >>> period_array([pd.Period('2017', freq='A'), + ... pd.Period('2018', freq='A')]) + + ['2017', '2018'] + Length: 2, dtype: period[A-DEC] + + >>> period_array([pd.Period('2017', freq='A'), + ... pd.Period('2018', freq='A'), + ... pd.NaT]) + + ['2017', '2018', 'NaT'] + Length: 3, dtype: period[A-DEC] + + Integers that look like years are handled + + >>> period_array([2000, 2001, 2002], freq='D') + ['2000-01-01', '2001-01-01', '2002-01-01'] + Length: 3, dtype: period[D] + + Datetime-like strings may also be passed + + >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q') + + ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] + Length: 4, dtype: period[Q-DEC] + """ + if is_datetime64_dtype(data): + return PeriodArray._from_datetime64(data, freq) + if isinstance(data, (ABCPeriodIndex, ABCSeries, PeriodArray)): + return PeriodArray(data, freq) + + # other iterable of some kind + if not isinstance(data, (np.ndarray, list, tuple)): + data = list(data) + + data = np.asarray(data) + + dtype: Optional[PeriodDtype] + if freq: + dtype = PeriodDtype(freq) + else: + dtype = None + + if is_float_dtype(data) and len(data) > 0: + raise TypeError("PeriodIndex does not allow floating point in construction") + + data = ensure_object(data) + + return PeriodArray._from_sequence(data, dtype=dtype) + + +def validate_dtype_freq(dtype, freq): + """ + If both a dtype and a freq are available, ensure they match. If only + dtype is available, extract the implied freq. + + Parameters + ---------- + dtype : dtype + freq : DateOffset or None + + Returns + ------- + freq : DateOffset + + Raises + ------ + ValueError : non-period dtype + IncompatibleFrequency : mismatch between dtype and freq + """ + if freq is not None: + freq = frequencies.to_offset(freq) + + if dtype is not None: + dtype = pandas_dtype(dtype) + if not is_period_dtype(dtype): + raise ValueError("dtype must be PeriodDtype") + if freq is None: + freq = dtype.freq + elif freq != dtype.freq: + raise IncompatibleFrequency("specified freq and dtype are different") + return freq + + +def dt64arr_to_periodarr(data, freq, tz=None): + """ + Convert an datetime-like array to values Period ordinals. + + Parameters + ---------- + data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]] + freq : Optional[Union[str, Tick]] + Must match the `freq` on the `data` if `data` is a DatetimeIndex + or Series. + tz : Optional[tzinfo] + + Returns + ------- + ordinals : ndarray[int] + freq : Tick + The frequency extracted from the Series or DatetimeIndex if that's + used. + + """ + if data.dtype != np.dtype("M8[ns]"): + raise ValueError(f"Wrong dtype: {data.dtype}") + + if freq is None: + if isinstance(data, ABCIndexClass): + data, freq = data._values, data.freq + elif isinstance(data, ABCSeries): + data, freq = data._values, data.dt.freq + + freq = Period._maybe_convert_freq(freq) + + if isinstance(data, (ABCIndexClass, ABCSeries)): + data = data._values + + base, mult = libfrequencies.get_freq_code(freq) + return libperiod.dt64arr_to_periodarr(data.view("i8"), base, tz), freq + + +def _get_ordinal_range(start, end, periods, freq, mult=1): + if com.count_not_none(start, end, periods) != 2: + raise ValueError( + "Of the three parameters: start, end, and periods, " + "exactly two must be specified" + ) + + if freq is not None: + _, mult = libfrequencies.get_freq_code(freq) + + if start is not None: + start = Period(start, freq) + if end is not None: + end = Period(end, freq) + + is_start_per = isinstance(start, Period) + is_end_per = isinstance(end, Period) + + if is_start_per and is_end_per and start.freq != end.freq: + raise ValueError("start and end must have same freq") + if start is NaT or end is NaT: + raise ValueError("start and end must not be NaT") + + if freq is None: + if is_start_per: + freq = start.freq + elif is_end_per: + freq = end.freq + else: # pragma: no cover + raise ValueError("Could not infer freq from start/end") + + if periods is not None: + periods = periods * mult + if start is None: + data = np.arange( + end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64 + ) + else: + data = np.arange( + start.ordinal, start.ordinal + periods, mult, dtype=np.int64 + ) + else: + data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64) + + return data, freq + + +def _range_from_fields( + year=None, + month=None, + quarter=None, + day=None, + hour=None, + minute=None, + second=None, + freq=None, +): + if hour is None: + hour = 0 + if minute is None: + minute = 0 + if second is None: + second = 0 + if day is None: + day = 1 + + ordinals = [] + + if quarter is not None: + if freq is None: + freq = "Q" + base = libfrequencies.FreqGroup.FR_QTR + else: + base, mult = libfrequencies.get_freq_code(freq) + if base != libfrequencies.FreqGroup.FR_QTR: + raise AssertionError("base must equal FR_QTR") + + year, quarter = _make_field_arrays(year, quarter) + for y, q in zip(year, quarter): + y, m = libperiod.quarter_to_myear(y, q, freq) + val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base) + ordinals.append(val) + else: + base, mult = libfrequencies.get_freq_code(freq) + arrays = _make_field_arrays(year, month, day, hour, minute, second) + for y, mth, d, h, mn, s in zip(*arrays): + ordinals.append(libperiod.period_ordinal(y, mth, d, h, mn, s, 0, 0, base)) + + return np.array(ordinals, dtype=np.int64), freq + + +def _make_field_arrays(*fields): + length = None + for x in fields: + if isinstance(x, (list, np.ndarray, ABCSeries)): + if length is not None and len(x) != length: + raise ValueError("Mismatched Period array lengths") + elif length is None: + length = len(x) + + arrays = [ + np.asarray(x) + if isinstance(x, (np.ndarray, list, ABCSeries)) + else np.repeat(x, length) + for x in fields + ] + + return arrays diff --git a/pandas/core/arrays/sparse/__init__.py b/pandas/core/arrays/sparse/__init__.py new file mode 100644 index 00000000..e928db49 --- /dev/null +++ b/pandas/core/arrays/sparse/__init__.py @@ -0,0 +1,10 @@ +# flake8: noqa: F401 + +from pandas.core.arrays.sparse.accessor import SparseAccessor, SparseFrameAccessor +from pandas.core.arrays.sparse.array import ( + BlockIndex, + IntIndex, + SparseArray, + _make_index, +) +from pandas.core.arrays.sparse.dtype import SparseDtype diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py new file mode 100644 index 00000000..92c05f44 --- /dev/null +++ b/pandas/core/arrays/sparse/accessor.py @@ -0,0 +1,328 @@ +"""Sparse accessor""" + +import numpy as np + +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.cast import find_common_type + +from pandas.core.accessor import PandasDelegate, delegate_names +from pandas.core.arrays.sparse.array import SparseArray +from pandas.core.arrays.sparse.dtype import SparseDtype + + +class BaseAccessor: + _validation_msg = "Can only use the '.sparse' accessor with Sparse data." + + def __init__(self, data=None): + self._parent = data + self._validate(data) + + def _validate(self, data): + raise NotImplementedError + + +@delegate_names( + SparseArray, ["npoints", "density", "fill_value", "sp_values"], typ="property" +) +class SparseAccessor(BaseAccessor, PandasDelegate): + """ + Accessor for SparseSparse from other sparse matrix data types. + """ + + def _validate(self, data): + if not isinstance(data.dtype, SparseDtype): + raise AttributeError(self._validation_msg) + + def _delegate_property_get(self, name, *args, **kwargs): + return getattr(self._parent.array, name) + + def _delegate_method(self, name, *args, **kwargs): + if name == "from_coo": + return self.from_coo(*args, **kwargs) + elif name == "to_coo": + return self.to_coo(*args, **kwargs) + else: + raise ValueError + + @classmethod + def from_coo(cls, A, dense_index=False): + """ + Create a Series with sparse values from a scipy.sparse.coo_matrix. + + Parameters + ---------- + A : scipy.sparse.coo_matrix + dense_index : bool, default False + If False (default), the SparseSeries index consists of only the + coords of the non-null entries of the original coo_matrix. + If True, the SparseSeries index consists of the full sorted + (row, col) coordinates of the coo_matrix. + + Returns + ------- + s : Series + A Series with sparse values. + + Examples + -------- + >>> from scipy import sparse + >>> A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), + shape=(3, 4)) + >>> A + <3x4 sparse matrix of type '' + with 3 stored elements in COOrdinate format> + >>> A.todense() + matrix([[ 0., 0., 1., 2.], + [ 3., 0., 0., 0.], + [ 0., 0., 0., 0.]]) + >>> ss = pd.Series.sparse.from_coo(A) + >>> ss + 0 2 1 + 3 2 + 1 0 3 + dtype: float64 + BlockIndex + Block locations: array([0], dtype=int32) + Block lengths: array([3], dtype=int32) + """ + from pandas.core.arrays.sparse.scipy_sparse import _coo_to_sparse_series + from pandas import Series + + result = _coo_to_sparse_series(A, dense_index=dense_index) + result = Series(result.array, index=result.index, copy=False) + + return result + + def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False): + """ + Create a scipy.sparse.coo_matrix from a Series with MultiIndex. + + Use row_levels and column_levels to determine the row and column + coordinates respectively. row_levels and column_levels are the names + (labels) or numbers of the levels. {row_levels, column_levels} must be + a partition of the MultiIndex level names (or numbers). + + Parameters + ---------- + row_levels : tuple/list + column_levels : tuple/list + sort_labels : bool, default False + Sort the row and column labels before forming the sparse matrix. + + Returns + ------- + y : scipy.sparse.coo_matrix + rows : list (row labels) + columns : list (column labels) + + Examples + -------- + >>> s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) + >>> s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0), + (1, 2, 'a', 1), + (1, 1, 'b', 0), + (1, 1, 'b', 1), + (2, 1, 'b', 0), + (2, 1, 'b', 1)], + names=['A', 'B', 'C', 'D']) + >>> ss = s.astype("Sparse") + >>> A, rows, columns = ss.sparse.to_coo(row_levels=['A', 'B'], + ... column_levels=['C', 'D'], + ... sort_labels=True) + >>> A + <3x4 sparse matrix of type '' + with 3 stored elements in COOrdinate format> + >>> A.todense() + matrix([[ 0., 0., 1., 3.], + [ 3., 0., 0., 0.], + [ 0., 0., 0., 0.]]) + >>> rows + [(1, 1), (1, 2), (2, 1)] + >>> columns + [('a', 0), ('a', 1), ('b', 0), ('b', 1)] + """ + from pandas.core.arrays.sparse.scipy_sparse import _sparse_series_to_coo + + A, rows, columns = _sparse_series_to_coo( + self._parent, row_levels, column_levels, sort_labels=sort_labels + ) + return A, rows, columns + + def to_dense(self): + """ + Convert a Series from sparse values to dense. + + .. versionadded:: 0.25.0 + + Returns + ------- + Series: + A Series with the same values, stored as a dense array. + + Examples + -------- + >>> series = pd.Series(pd.arrays.SparseArray([0, 1, 0])) + >>> series + 0 0 + 1 1 + 2 0 + dtype: Sparse[int64, 0] + + >>> series.sparse.to_dense() + 0 0 + 1 1 + 2 0 + dtype: int64 + """ + from pandas import Series + + return Series( + self._parent.array.to_dense(), + index=self._parent.index, + name=self._parent.name, + ) + + +class SparseFrameAccessor(BaseAccessor, PandasDelegate): + """ + DataFrame accessor for sparse data. + + .. versionadded:: 0.25.0 + """ + + def _validate(self, data): + dtypes = data.dtypes + if not all(isinstance(t, SparseDtype) for t in dtypes): + raise AttributeError(self._validation_msg) + + @classmethod + def from_spmatrix(cls, data, index=None, columns=None): + """ + Create a new DataFrame from a scipy sparse matrix. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + data : scipy.sparse.spmatrix + Must be convertible to csc format. + index, columns : Index, optional + Row and column labels to use for the resulting DataFrame. + Defaults to a RangeIndex. + + Returns + ------- + DataFrame + Each column of the DataFrame is stored as a + :class:`arrays.SparseArray`. + + Examples + -------- + >>> import scipy.sparse + >>> mat = scipy.sparse.eye(3) + >>> pd.DataFrame.sparse.from_spmatrix(mat) + 0 1 2 + 0 1.0 0.0 0.0 + 1 0.0 1.0 0.0 + 2 0.0 0.0 1.0 + """ + from pandas import DataFrame + + data = data.tocsc() + index, columns = cls._prep_index(data, index, columns) + sparrays = [SparseArray.from_spmatrix(data[:, i]) for i in range(data.shape[1])] + data = dict(enumerate(sparrays)) + result = DataFrame(data, index=index) + result.columns = columns + return result + + def to_dense(self): + """ + Convert a DataFrame with sparse values to dense. + + .. versionadded:: 0.25.0 + + Returns + ------- + DataFrame + A DataFrame with the same values stored as dense arrays. + + Examples + -------- + >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0])}) + >>> df.sparse.to_dense() + A + 0 0 + 1 1 + 2 0 + """ + from pandas import DataFrame + + data = {k: v.array.to_dense() for k, v in self._parent.items()} + return DataFrame(data, index=self._parent.index, columns=self._parent.columns) + + def to_coo(self): + """ + Return the contents of the frame as a sparse SciPy COO matrix. + + .. versionadded:: 0.25.0 + + Returns + ------- + coo_matrix : scipy.sparse.spmatrix + If the caller is heterogeneous and contains booleans or objects, + the result will be of dtype=object. See Notes. + + Notes + ----- + The dtype will be the lowest-common-denominator type (implicit + upcasting); that is to say if the dtypes (even of numeric types) + are mixed, the one that accommodates all will be chosen. + + e.g. If the dtypes are float16 and float32, dtype will be upcast to + float32. By numpy.find_common_type convention, mixing int64 and + and uint64 will result in a float64 dtype. + """ + import_optional_dependency("scipy") + from scipy.sparse import coo_matrix + + dtype = find_common_type(self._parent.dtypes) + if isinstance(dtype, SparseDtype): + dtype = dtype.subtype + + cols, rows, datas = [], [], [] + for col, name in enumerate(self._parent): + s = self._parent[name] + row = s.array.sp_index.to_int_index().indices + cols.append(np.repeat(col, len(row))) + rows.append(row) + datas.append(s.array.sp_values.astype(dtype, copy=False)) + + cols = np.concatenate(cols) + rows = np.concatenate(rows) + datas = np.concatenate(datas) + return coo_matrix((datas, (rows, cols)), shape=self._parent.shape) + + @property + def density(self) -> float: + """ + Ratio of non-sparse points to total (dense) data points. + """ + return np.mean([column.array.density for _, column in self._parent.items()]) + + @staticmethod + def _prep_index(data, index, columns): + import pandas.core.indexes.base as ibase + + N, K = data.shape + if index is None: + index = ibase.default_index(N) + if columns is None: + columns = ibase.default_index(K) + + if len(columns) != K: + raise ValueError(f"Column length mismatch: {len(columns)} vs. {K}") + if len(index) != N: + raise ValueError(f"Index length mismatch: {len(index)} vs. {N}") + return index, columns diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py new file mode 100644 index 00000000..b476a019 --- /dev/null +++ b/pandas/core/arrays/sparse/array.py @@ -0,0 +1,1552 @@ +""" +SparseArray data structure +""" +from collections import abc +import numbers +import operator +from typing import Any, Callable +import warnings + +import numpy as np + +from pandas._libs import index as libindex, lib +import pandas._libs.sparse as splib +from pandas._libs.sparse import BlockIndex, IntIndex, SparseIndex +from pandas._libs.tslibs import NaT +import pandas.compat as compat +from pandas.compat.numpy import function as nv +from pandas.errors import PerformanceWarning + +from pandas.core.dtypes.cast import ( + astype_nansafe, + construct_1d_arraylike_from_scalar, + find_common_type, + infer_dtype_from_scalar, +) +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_datetime64_any_dtype, + is_dtype_equal, + is_integer, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries, ABCSparseArray +from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna + +import pandas.core.algorithms as algos +from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin +from pandas.core.arrays.sparse.dtype import SparseDtype +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.construction import sanitize_array +from pandas.core.indexers import check_array_indexer +from pandas.core.missing import interpolate_2d +import pandas.core.ops as ops +from pandas.core.ops.common import unpack_zerodim_and_defer + +import pandas.io.formats.printing as printing + +# ---------------------------------------------------------------------------- +# Array + + +_sparray_doc_kwargs = dict(klass="SparseArray") + + +def _get_fill(arr: ABCSparseArray) -> np.ndarray: + """ + Create a 0-dim ndarray containing the fill value + + Parameters + ---------- + arr : SparseArray + + Returns + ------- + fill_value : ndarray + 0-dim ndarray with just the fill value. + + Notes + ----- + coerce fill_value to arr dtype if possible + int64 SparseArray can have NaN as fill_value if there is no missing + """ + try: + return np.asarray(arr.fill_value, dtype=arr.dtype.subtype) + except ValueError: + return np.asarray(arr.fill_value) + + +def _sparse_array_op( + left: ABCSparseArray, right: ABCSparseArray, op: Callable, name: str +) -> Any: + """ + Perform a binary operation between two arrays. + + Parameters + ---------- + left : Union[SparseArray, ndarray] + right : Union[SparseArray, ndarray] + op : Callable + The binary operation to perform + name str + Name of the callable. + + Returns + ------- + SparseArray + """ + if name.startswith("__"): + # For lookups in _libs.sparse we need non-dunder op name + name = name[2:-2] + + # dtype used to find corresponding sparse method + ltype = left.dtype.subtype + rtype = right.dtype.subtype + + if not is_dtype_equal(ltype, rtype): + subtype = find_common_type([ltype, rtype]) + ltype = SparseDtype(subtype, left.fill_value) + rtype = SparseDtype(subtype, right.fill_value) + + # TODO(GH-23092): pass copy=False. Need to fix astype_nansafe + left = left.astype(ltype) + right = right.astype(rtype) + dtype = ltype.subtype + else: + dtype = ltype + + # dtype the result must have + result_dtype = None + + if left.sp_index.ngaps == 0 or right.sp_index.ngaps == 0: + with np.errstate(all="ignore"): + result = op(left.to_dense(), right.to_dense()) + fill = op(_get_fill(left), _get_fill(right)) + + if left.sp_index.ngaps == 0: + index = left.sp_index + else: + index = right.sp_index + elif left.sp_index.equals(right.sp_index): + with np.errstate(all="ignore"): + result = op(left.sp_values, right.sp_values) + fill = op(_get_fill(left), _get_fill(right)) + index = left.sp_index + else: + if name[0] == "r": + left, right = right, left + name = name[1:] + + if name in ("and", "or", "xor") and dtype == "bool": + opname = f"sparse_{name}_uint8" + # to make template simple, cast here + left_sp_values = left.sp_values.view(np.uint8) + right_sp_values = right.sp_values.view(np.uint8) + result_dtype = np.bool + else: + opname = f"sparse_{name}_{dtype}" + left_sp_values = left.sp_values + right_sp_values = right.sp_values + + sparse_op = getattr(splib, opname) + + with np.errstate(all="ignore"): + result, index, fill = sparse_op( + left_sp_values, + left.sp_index, + left.fill_value, + right_sp_values, + right.sp_index, + right.fill_value, + ) + + if result_dtype is None: + result_dtype = result.dtype + + return _wrap_result(name, result, index, fill, dtype=result_dtype) + + +def _wrap_result(name, data, sparse_index, fill_value, dtype=None): + """ + wrap op result to have correct dtype + """ + if name.startswith("__"): + # e.g. __eq__ --> eq + name = name[2:-2] + + if name in ("eq", "ne", "lt", "gt", "le", "ge"): + dtype = np.bool + + fill_value = lib.item_from_zerodim(fill_value) + + if is_bool_dtype(dtype): + # fill_value may be np.bool_ + fill_value = bool(fill_value) + return SparseArray( + data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype + ) + + +class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin): + """ + An ExtensionArray for storing sparse data. + + .. versionchanged:: 0.24.0 + + Implements the ExtensionArray interface. + + Parameters + ---------- + data : array-like + A dense array of values to store in the SparseArray. This may contain + `fill_value`. + sparse_index : SparseIndex, optional + index : Index + fill_value : scalar, optional + Elements in `data` that are `fill_value` are not stored in the + SparseArray. For memory savings, this should be the most common value + in `data`. By default, `fill_value` depends on the dtype of `data`: + + =========== ========== + data.dtype na_value + =========== ========== + float ``np.nan`` + int ``0`` + bool False + datetime64 ``pd.NaT`` + timedelta64 ``pd.NaT`` + =========== ========== + + The fill value is potentially specified in three ways. In order of + precedence, these are + + 1. The `fill_value` argument + 2. ``dtype.fill_value`` if `fill_value` is None and `dtype` is + a ``SparseDtype`` + 3. ``data.dtype.fill_value`` if `fill_value` is None and `dtype` + is not a ``SparseDtype`` and `data` is a ``SparseArray``. + + kind : {'integer', 'block'}, default 'integer' + The type of storage for sparse locations. + + * 'block': Stores a `block` and `block_length` for each + contiguous *span* of sparse values. This is best when + sparse data tends to be clumped together, with large + regions of ``fill-value`` values between sparse values. + * 'integer': uses an integer to store the location of + each sparse value. + + dtype : np.dtype or SparseDtype, optional + The dtype to use for the SparseArray. For numpy dtypes, this + determines the dtype of ``self.sp_values``. For SparseDtype, + this determines ``self.sp_values`` and ``self.fill_value``. + copy : bool, default False + Whether to explicitly copy the incoming `data` array. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + _pandas_ftype = "sparse" + _subtyp = "sparse_array" # register ABCSparseArray + _deprecations = PandasObject._deprecations | frozenset(["get_values"]) + _sparse_index: SparseIndex + + def __init__( + self, + data, + sparse_index=None, + index=None, + fill_value=None, + kind="integer", + dtype=None, + copy=False, + ): + + if fill_value is None and isinstance(dtype, SparseDtype): + fill_value = dtype.fill_value + + if isinstance(data, type(self)): + # disable normal inference on dtype, sparse_index, & fill_value + if sparse_index is None: + sparse_index = data.sp_index + if fill_value is None: + fill_value = data.fill_value + if dtype is None: + dtype = data.dtype + # TODO: make kind=None, and use data.kind? + data = data.sp_values + + # Handle use-provided dtype + if isinstance(dtype, str): + # Two options: dtype='int', regular numpy dtype + # or dtype='Sparse[int]', a sparse dtype + try: + dtype = SparseDtype.construct_from_string(dtype) + except TypeError: + dtype = pandas_dtype(dtype) + + if isinstance(dtype, SparseDtype): + if fill_value is None: + fill_value = dtype.fill_value + dtype = dtype.subtype + + if index is not None and not is_scalar(data): + raise Exception("must only pass scalars with an index ") + + if is_scalar(data): + if index is not None: + if data is None: + data = np.nan + + if index is not None: + npoints = len(index) + elif sparse_index is None: + npoints = 1 + else: + npoints = sparse_index.length + + dtype = infer_dtype_from_scalar(data)[0] + data = construct_1d_arraylike_from_scalar(data, npoints, dtype) + + if dtype is not None: + dtype = pandas_dtype(dtype) + + # TODO: disentangle the fill_value dtype inference from + # dtype inference + if data is None: + # XXX: What should the empty dtype be? Object or float? + data = np.array([], dtype=dtype) + + if not is_array_like(data): + try: + # probably shared code in sanitize_series + + data = sanitize_array(data, index=None) + except ValueError: + # NumPy may raise a ValueError on data like [1, []] + # we retry with object dtype here. + if dtype is None: + dtype = object + data = np.atleast_1d(np.asarray(data, dtype=dtype)) + else: + raise + + if copy: + # TODO: avoid double copy when dtype forces cast. + data = data.copy() + + if fill_value is None: + fill_value_dtype = data.dtype if dtype is None else dtype + if fill_value_dtype is None: + fill_value = np.nan + else: + fill_value = na_value_for_dtype(fill_value_dtype) + + if isinstance(data, type(self)) and sparse_index is None: + sparse_index = data._sparse_index + sparse_values = np.asarray(data.sp_values, dtype=dtype) + elif sparse_index is None: + sparse_values, sparse_index, fill_value = make_sparse( + data, kind=kind, fill_value=fill_value, dtype=dtype + ) + else: + sparse_values = np.asarray(data, dtype=dtype) + if len(sparse_values) != sparse_index.npoints: + raise AssertionError( + f"Non array-like type {type(sparse_values)} must " + "have the same length as the index" + ) + self._sparse_index = sparse_index + self._sparse_values = sparse_values + self._dtype = SparseDtype(sparse_values.dtype, fill_value) + + @classmethod + def _simple_new( + cls, sparse_array: np.ndarray, sparse_index: SparseIndex, dtype: SparseDtype + ) -> "SparseArray": + new = cls([]) + new._sparse_index = sparse_index + new._sparse_values = sparse_array + new._dtype = dtype + return new + + @classmethod + def from_spmatrix(cls, data): + """ + Create a SparseArray from a scipy.sparse matrix. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + data : scipy.sparse.sp_matrix + This should be a SciPy sparse matrix where the size + of the second dimension is 1. In other words, a + sparse matrix with a single column. + + Returns + ------- + SparseArray + + Examples + -------- + >>> import scipy.sparse + >>> mat = scipy.sparse.coo_matrix((4, 1)) + >>> pd.arrays.SparseArray.from_spmatrix(mat) + [0.0, 0.0, 0.0, 0.0] + Fill: 0.0 + IntIndex + Indices: array([], dtype=int32) + """ + length, ncol = data.shape + + if ncol != 1: + raise ValueError(f"'data' must have a single column, not '{ncol}'") + + # our sparse index classes require that the positions be strictly + # increasing. So we need to sort loc, and arr accordingly. + arr = data.data + idx, _ = data.nonzero() + loc = np.argsort(idx) + arr = arr.take(loc) + idx.sort() + + zero = np.array(0, dtype=arr.dtype).item() + dtype = SparseDtype(arr.dtype, zero) + index = IntIndex(length, idx) + + return cls._simple_new(arr, index, dtype) + + def __array__(self, dtype=None, copy=True) -> np.ndarray: + fill_value = self.fill_value + + if self.sp_index.ngaps == 0: + # Compat for na dtype and int values. + return self.sp_values + if dtype is None: + # Can NumPy represent this type? + # If not, `np.result_type` will raise. We catch that + # and return object. + if is_datetime64_any_dtype(self.sp_values.dtype): + # However, we *do* special-case the common case of + # a datetime64 with pandas NaT. + if fill_value is NaT: + # Can't put pd.NaT in a datetime64[ns] + fill_value = np.datetime64("NaT") + try: + dtype = np.result_type(self.sp_values.dtype, type(fill_value)) + except TypeError: + dtype = object + + out = np.full(self.shape, fill_value, dtype=dtype) + out[self.sp_index.to_int_index().indices] = self.sp_values + return out + + def __setitem__(self, key, value): + # I suppose we could allow setting of non-fill_value elements. + # TODO(SparseArray.__setitem__): remove special cases in + # ExtensionBlock.where + msg = "SparseArray does not support item assignment via setitem" + raise TypeError(msg) + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls(scalars, dtype=dtype) + + @classmethod + def _from_factorized(cls, values, original): + return cls(values, dtype=original.dtype) + + # ------------------------------------------------------------------------ + # Data + # ------------------------------------------------------------------------ + @property + def sp_index(self): + """ + The SparseIndex containing the location of non- ``fill_value`` points. + """ + return self._sparse_index + + @property + def sp_values(self): + """ + An ndarray containing the non- ``fill_value`` values. + + Examples + -------- + >>> s = SparseArray([0, 0, 1, 0, 2], fill_value=0) + >>> s.sp_values + array([1, 2]) + """ + return self._sparse_values + + @property + def dtype(self): + return self._dtype + + @property + def fill_value(self): + """ + Elements in `data` that are `fill_value` are not stored. + + For memory savings, this should be the most common value in the array. + """ + return self.dtype.fill_value + + @fill_value.setter + def fill_value(self, value): + self._dtype = SparseDtype(self.dtype.subtype, value) + + @property + def kind(self) -> str: + """ + The kind of sparse index for this array. One of {'integer', 'block'}. + """ + if isinstance(self.sp_index, IntIndex): + return "integer" + else: + return "block" + + @property + def _valid_sp_values(self): + sp_vals = self.sp_values + mask = notna(sp_vals) + return sp_vals[mask] + + def __len__(self) -> int: + return self.sp_index.length + + @property + def _null_fill_value(self): + return self._dtype._is_na_fill_value + + def _fill_value_matches(self, fill_value): + if self._null_fill_value: + return isna(fill_value) + else: + return self.fill_value == fill_value + + @property + def nbytes(self) -> int: + return self.sp_values.nbytes + self.sp_index.nbytes + + @property + def density(self): + """ + The percent of non- ``fill_value`` points, as decimal. + + Examples + -------- + >>> s = SparseArray([0, 0, 1, 1, 1], fill_value=0) + >>> s.density + 0.6 + """ + r = float(self.sp_index.npoints) / float(self.sp_index.length) + return r + + @property + def npoints(self) -> int: + """ + The number of non- ``fill_value`` points. + + Examples + -------- + >>> s = SparseArray([0, 0, 1, 1, 1], fill_value=0) + >>> s.npoints + 3 + """ + return self.sp_index.npoints + + def isna(self): + # If null fill value, we want SparseDtype[bool, true] + # to preserve the same memory usage. + dtype = SparseDtype(bool, self._null_fill_value) + return type(self)._simple_new(isna(self.sp_values), self.sp_index, dtype) + + def fillna(self, value=None, method=None, limit=None): + """ + Fill missing values with `value`. + + Parameters + ---------- + value : scalar, optional + method : str, optional + + .. warning:: + + Using 'method' will result in high memory use, + as all `fill_value` methods will be converted to + an in-memory ndarray + + limit : int, optional + + Returns + ------- + SparseArray + + Notes + ----- + When `value` is specified, the result's ``fill_value`` depends on + ``self.fill_value``. The goal is to maintain low-memory use. + + If ``self.fill_value`` is NA, the result dtype will be + ``SparseDtype(self.dtype, fill_value=value)``. This will preserve + amount of memory used before and after filling. + + When ``self.fill_value`` is not NA, the result dtype will be + ``self.dtype``. Again, this preserves the amount of memory used. + """ + if (method is None and value is None) or ( + method is not None and value is not None + ): + raise ValueError("Must specify one of 'method' or 'value'.") + + elif method is not None: + msg = "fillna with 'method' requires high memory usage." + warnings.warn(msg, PerformanceWarning) + filled = interpolate_2d(np.asarray(self), method=method, limit=limit) + return type(self)(filled, fill_value=self.fill_value) + + else: + new_values = np.where(isna(self.sp_values), value, self.sp_values) + + if self._null_fill_value: + # This is essentially just updating the dtype. + new_dtype = SparseDtype(self.dtype.subtype, fill_value=value) + else: + new_dtype = self.dtype + + return self._simple_new(new_values, self._sparse_index, new_dtype) + + def shift(self, periods=1, fill_value=None): + + if not len(self) or periods == 0: + return self.copy() + + if isna(fill_value): + fill_value = self.dtype.na_value + + subtype = np.result_type(fill_value, self.dtype.subtype) + + if subtype != self.dtype.subtype: + # just coerce up front + arr = self.astype(SparseDtype(subtype, self.fill_value)) + else: + arr = self + + empty = self._from_sequence( + [fill_value] * min(abs(periods), len(self)), dtype=arr.dtype + ) + + if periods > 0: + a = empty + b = arr[:-periods] + else: + a = arr[abs(periods) :] + b = empty + return arr._concat_same_type([a, b]) + + def _first_fill_value_loc(self): + """ + Get the location of the first missing value. + + Returns + ------- + int + """ + if len(self) == 0 or self.sp_index.npoints == len(self): + return -1 + + indices = self.sp_index.to_int_index().indices + if not len(indices) or indices[0] > 0: + return 0 + + diff = indices[1:] - indices[:-1] + return np.searchsorted(diff, 2) + 1 + + def unique(self): + uniques = list(algos.unique(self.sp_values)) + fill_loc = self._first_fill_value_loc() + if fill_loc >= 0: + uniques.insert(fill_loc, self.fill_value) + return type(self)._from_sequence(uniques, dtype=self.dtype) + + def _values_for_factorize(self): + # Still override this for hash_pandas_object + return np.asarray(self), self.fill_value + + def factorize(self, na_sentinel=-1): + # Currently, ExtensionArray.factorize -> Tuple[ndarray, EA] + # The sparsity on this is backwards from what Sparse would want. Want + # ExtensionArray.factorize -> Tuple[EA, EA] + # Given that we have to return a dense array of codes, why bother + # implementing an efficient factorize? + codes, uniques = algos.factorize(np.asarray(self), na_sentinel=na_sentinel) + uniques = SparseArray(uniques, dtype=self.dtype) + return codes, uniques + + def value_counts(self, dropna=True): + """ + Returns a Series containing counts of unique values. + + Parameters + ---------- + dropna : boolean, default True + Don't include counts of NaN, even if NaN is in sp_values. + + Returns + ------- + counts : Series + """ + from pandas import Index, Series + + keys, counts = algos._value_counts_arraylike(self.sp_values, dropna=dropna) + fcounts = self.sp_index.ngaps + if fcounts > 0: + if self._null_fill_value and dropna: + pass + else: + if self._null_fill_value: + mask = isna(keys) + else: + mask = keys == self.fill_value + + if mask.any(): + counts[mask] += fcounts + else: + keys = np.insert(keys, 0, self.fill_value) + counts = np.insert(counts, 0, fcounts) + + if not isinstance(keys, ABCIndexClass): + keys = Index(keys) + result = Series(counts, index=keys) + return result + + # -------- + # Indexing + # -------- + + def __getitem__(self, key): + # avoid mypy issues when importing at the top-level + from pandas.core.indexing import check_bool_indexer + + if isinstance(key, tuple): + if len(key) > 1: + raise IndexError("too many indices for array.") + key = key[0] + + if is_integer(key): + return self._get_val_at(key) + elif isinstance(key, tuple): + data_slice = self.to_dense()[key] + elif isinstance(key, slice): + # special case to preserve dtypes + if key == slice(None): + return self.copy() + # TODO: this logic is surely elsewhere + # TODO: this could be more efficient + indices = np.arange(len(self), dtype=np.int32)[key] + return self.take(indices) + else: + # TODO: I think we can avoid densifying when masking a + # boolean SparseArray with another. Need to look at the + # key's fill_value for True / False, and then do an intersection + # on the indicies of the sp_values. + if isinstance(key, SparseArray): + if is_bool_dtype(key): + key = key.to_dense() + else: + key = np.asarray(key) + + key = check_array_indexer(self, key) + + if com.is_bool_indexer(key): + key = check_bool_indexer(self, key) + + return self.take(np.arange(len(key), dtype=np.int32)[key]) + elif hasattr(key, "__len__"): + return self.take(key) + else: + raise ValueError(f"Cannot slice with '{key}'") + + return type(self)(data_slice, kind=self.kind) + + def _get_val_at(self, loc): + n = len(self) + if loc < 0: + loc += n + + if loc >= n or loc < 0: + raise IndexError("Out of bounds access") + + sp_loc = self.sp_index.lookup(loc) + if sp_loc == -1: + return self.fill_value + else: + return libindex.get_value_at(self.sp_values, sp_loc) + + def take(self, indices, allow_fill=False, fill_value=None): + if is_scalar(indices): + raise ValueError(f"'indices' must be an array, not a scalar '{indices}'.") + indices = np.asarray(indices, dtype=np.int32) + + if indices.size == 0: + result = [] + kwargs = {"dtype": self.dtype} + elif allow_fill: + result = self._take_with_fill(indices, fill_value=fill_value) + kwargs = {} + else: + result = self._take_without_fill(indices) + kwargs = {"dtype": self.dtype} + + return type(self)(result, fill_value=self.fill_value, kind=self.kind, **kwargs) + + def _take_with_fill(self, indices, fill_value=None): + if fill_value is None: + fill_value = self.dtype.na_value + + if indices.min() < -1: + raise ValueError( + "Invalid value in 'indices'. Must be between -1 " + "and the length of the array." + ) + + if indices.max() >= len(self): + raise IndexError("out of bounds value in 'indices'.") + + if len(self) == 0: + # Empty... Allow taking only if all empty + if (indices == -1).all(): + dtype = np.result_type(self.sp_values, type(fill_value)) + taken = np.empty_like(indices, dtype=dtype) + taken.fill(fill_value) + return taken + else: + raise IndexError("cannot do a non-empty take from an empty axes.") + + sp_indexer = self.sp_index.lookup_array(indices) + + if self.sp_index.npoints == 0: + # Avoid taking from the empty self.sp_values + taken = np.full( + sp_indexer.shape, + fill_value=fill_value, + dtype=np.result_type(type(fill_value)), + ) + else: + taken = self.sp_values.take(sp_indexer) + + # sp_indexer may be -1 for two reasons + # 1.) we took for an index of -1 (new) + # 2.) we took a value that was self.fill_value (old) + new_fill_indices = indices == -1 + old_fill_indices = (sp_indexer == -1) & ~new_fill_indices + + # Fill in two steps. + # Old fill values + # New fill values + # potentially coercing to a new dtype at each stage. + + m0 = sp_indexer[old_fill_indices] < 0 + m1 = sp_indexer[new_fill_indices] < 0 + + result_type = taken.dtype + + if m0.any(): + result_type = np.result_type(result_type, type(self.fill_value)) + taken = taken.astype(result_type) + taken[old_fill_indices] = self.fill_value + + if m1.any(): + result_type = np.result_type(result_type, type(fill_value)) + taken = taken.astype(result_type) + taken[new_fill_indices] = fill_value + + return taken + + def _take_without_fill(self, indices): + to_shift = indices < 0 + indices = indices.copy() + + n = len(self) + + if (indices.max() >= n) or (indices.min() < -n): + if n == 0: + raise IndexError("cannot do a non-empty take from an empty axes.") + else: + raise IndexError("out of bounds value in 'indices'.") + + if to_shift.any(): + indices[to_shift] += n + + if self.sp_index.npoints == 0: + # edge case in take... + # I think just return + out = np.full( + indices.shape, + self.fill_value, + dtype=np.result_type(type(self.fill_value)), + ) + arr, sp_index, fill_value = make_sparse(out, fill_value=self.fill_value) + return type(self)(arr, sparse_index=sp_index, fill_value=fill_value) + + sp_indexer = self.sp_index.lookup_array(indices) + taken = self.sp_values.take(sp_indexer) + fillable = sp_indexer < 0 + + if fillable.any(): + # TODO: may need to coerce array to fill value + result_type = np.result_type(taken, type(self.fill_value)) + taken = taken.astype(result_type) + taken[fillable] = self.fill_value + + return taken + + def searchsorted(self, v, side="left", sorter=None): + msg = "searchsorted requires high memory usage." + warnings.warn(msg, PerformanceWarning, stacklevel=2) + if not is_scalar(v): + v = np.asarray(v) + v = np.asarray(v) + return np.asarray(self, dtype=self.dtype.subtype).searchsorted(v, side, sorter) + + def copy(self): + values = self.sp_values.copy() + return self._simple_new(values, self.sp_index, self.dtype) + + @classmethod + def _concat_same_type(cls, to_concat): + fill_values = [x.fill_value for x in to_concat] + + fill_value = fill_values[0] + + # np.nan isn't a singleton, so we may end up with multiple + # NaNs here, so we ignore tha all NA case too. + if not (len(set(fill_values)) == 1 or isna(fill_values).all()): + warnings.warn( + "Concatenating sparse arrays with multiple fill " + f"values: '{fill_values}'. Picking the first and " + "converting the rest.", + PerformanceWarning, + stacklevel=6, + ) + keep = to_concat[0] + to_concat2 = [keep] + + for arr in to_concat[1:]: + to_concat2.append(cls(np.asarray(arr), fill_value=fill_value)) + + to_concat = to_concat2 + + values = [] + length = 0 + + if to_concat: + sp_kind = to_concat[0].kind + else: + sp_kind = "integer" + + if sp_kind == "integer": + indices = [] + + for arr in to_concat: + idx = arr.sp_index.to_int_index().indices.copy() + idx += length # TODO: wraparound + length += arr.sp_index.length + + values.append(arr.sp_values) + indices.append(idx) + + data = np.concatenate(values) + indices = np.concatenate(indices) + sp_index = IntIndex(length, indices) + + else: + # when concatenating block indices, we don't claim that you'll + # get an identical index as concating the values and then + # creating a new index. We don't want to spend the time trying + # to merge blocks across arrays in `to_concat`, so the resulting + # BlockIndex may have more blocs. + blengths = [] + blocs = [] + + for arr in to_concat: + idx = arr.sp_index.to_block_index() + + values.append(arr.sp_values) + blocs.append(idx.blocs.copy() + length) + blengths.append(idx.blengths) + length += arr.sp_index.length + + data = np.concatenate(values) + blocs = np.concatenate(blocs) + blengths = np.concatenate(blengths) + + sp_index = BlockIndex(length, blocs, blengths) + + return cls(data, sparse_index=sp_index, fill_value=fill_value) + + def astype(self, dtype=None, copy=True): + """ + Change the dtype of a SparseArray. + + The output will always be a SparseArray. To convert to a dense + ndarray with a certain dtype, use :meth:`numpy.asarray`. + + Parameters + ---------- + dtype : np.dtype or ExtensionDtype + For SparseDtype, this changes the dtype of + ``self.sp_values`` and the ``self.fill_value``. + + For other dtypes, this only changes the dtype of + ``self.sp_values``. + + copy : bool, default True + Whether to ensure a copy is made, even if not necessary. + + Returns + ------- + SparseArray + + Examples + -------- + >>> arr = SparseArray([0, 0, 1, 2]) + >>> arr + [0, 0, 1, 2] + Fill: 0 + IntIndex + Indices: array([2, 3], dtype=int32) + + >>> arr.astype(np.dtype('int32')) + [0, 0, 1, 2] + Fill: 0 + IntIndex + Indices: array([2, 3], dtype=int32) + + Using a NumPy dtype with a different kind (e.g. float) will coerce + just ``self.sp_values``. + + >>> arr.astype(np.dtype('float64')) + ... # doctest: +NORMALIZE_WHITESPACE + [0, 0, 1.0, 2.0] + Fill: 0 + IntIndex + Indices: array([2, 3], dtype=int32) + + Use a SparseDtype if you wish to be change the fill value as well. + + >>> arr.astype(SparseDtype("float64", fill_value=np.nan)) + ... # doctest: +NORMALIZE_WHITESPACE + [nan, nan, 1.0, 2.0] + Fill: nan + IntIndex + Indices: array([2, 3], dtype=int32) + """ + dtype = self.dtype.update_dtype(dtype) + subtype = dtype._subtype_with_str + sp_values = astype_nansafe(self.sp_values, subtype, copy=copy) + if sp_values is self.sp_values and copy: + sp_values = sp_values.copy() + + return self._simple_new(sp_values, self.sp_index, dtype) + + def map(self, mapper): + """ + Map categories using input correspondence (dict, Series, or function). + + Parameters + ---------- + mapper : dict, Series, callable + The correspondence from old values to new. + + Returns + ------- + SparseArray + The output array will have the same density as the input. + The output fill value will be the result of applying the + mapping to ``self.fill_value`` + + Examples + -------- + >>> arr = pd.arrays.SparseArray([0, 1, 2]) + >>> arr.apply(lambda x: x + 10) + [10, 11, 12] + Fill: 10 + IntIndex + Indices: array([1, 2], dtype=int32) + + >>> arr.apply({0: 10, 1: 11, 2: 12}) + [10, 11, 12] + Fill: 10 + IntIndex + Indices: array([1, 2], dtype=int32) + + >>> arr.apply(pd.Series([10, 11, 12], index=[0, 1, 2])) + [10, 11, 12] + Fill: 10 + IntIndex + Indices: array([1, 2], dtype=int32) + """ + # this is used in apply. + # We get hit since we're an "is_extension_type" but regular extension + # types are not hit. This may be worth adding to the interface. + if isinstance(mapper, ABCSeries): + mapper = mapper.to_dict() + + if isinstance(mapper, abc.Mapping): + fill_value = mapper.get(self.fill_value, self.fill_value) + sp_values = [mapper.get(x, None) for x in self.sp_values] + else: + fill_value = mapper(self.fill_value) + sp_values = [mapper(x) for x in self.sp_values] + + return type(self)(sp_values, sparse_index=self.sp_index, fill_value=fill_value) + + def to_dense(self): + """ + Convert SparseArray to a NumPy array. + + Returns + ------- + arr : NumPy array + """ + return np.asarray(self, dtype=self.sp_values.dtype) + + _internal_get_values = to_dense + + # ------------------------------------------------------------------------ + # IO + # ------------------------------------------------------------------------ + def __setstate__(self, state): + """Necessary for making this object picklable""" + if isinstance(state, tuple): + # Compat for pandas < 0.24.0 + nd_state, (fill_value, sp_index) = state + sparse_values = np.array([]) + sparse_values.__setstate__(nd_state) + + self._sparse_values = sparse_values + self._sparse_index = sp_index + self._dtype = SparseDtype(sparse_values.dtype, fill_value) + else: + self.__dict__.update(state) + + def nonzero(self): + if self.fill_value == 0: + return (self.sp_index.to_int_index().indices,) + else: + return (self.sp_index.to_int_index().indices[self.sp_values != 0],) + + # ------------------------------------------------------------------------ + # Reductions + # ------------------------------------------------------------------------ + + def _reduce(self, name, skipna=True, **kwargs): + method = getattr(self, name, None) + + if method is None: + raise TypeError(f"cannot perform {name} with type {self.dtype}") + + if skipna: + arr = self + else: + arr = self.dropna() + + # we don't support these kwargs. + # They should only be present when called via pandas, so do it here. + # instead of in `any` / `all` (which will raise if they're present, + # thanks to nv.validate + kwargs.pop("filter_type", None) + kwargs.pop("numeric_only", None) + kwargs.pop("op", None) + return getattr(arr, name)(**kwargs) + + def all(self, axis=None, *args, **kwargs): + """ + Tests whether all elements evaluate True + + Returns + ------- + all : bool + + See Also + -------- + numpy.all + """ + nv.validate_all(args, kwargs) + + values = self.sp_values + + if len(values) != len(self) and not np.all(self.fill_value): + return False + + return values.all() + + def any(self, axis=0, *args, **kwargs): + """ + Tests whether at least one of elements evaluate True + + Returns + ------- + any : bool + + See Also + -------- + numpy.any + """ + nv.validate_any(args, kwargs) + + values = self.sp_values + + if len(values) != len(self) and np.any(self.fill_value): + return True + + return values.any().item() + + def sum(self, axis=0, *args, **kwargs): + """ + Sum of non-NA/null values + + Returns + ------- + sum : float + """ + nv.validate_sum(args, kwargs) + valid_vals = self._valid_sp_values + sp_sum = valid_vals.sum() + if self._null_fill_value: + return sp_sum + else: + nsparse = self.sp_index.ngaps + return sp_sum + self.fill_value * nsparse + + def cumsum(self, axis=0, *args, **kwargs): + """ + Cumulative sum of non-NA/null values. + + When performing the cumulative summation, any non-NA/null values will + be skipped. The resulting SparseArray will preserve the locations of + NaN values, but the fill value will be `np.nan` regardless. + + Parameters + ---------- + axis : int or None + Axis over which to perform the cumulative summation. If None, + perform cumulative summation over flattened array. + + Returns + ------- + cumsum : SparseArray + """ + nv.validate_cumsum(args, kwargs) + + if axis is not None and axis >= self.ndim: # Mimic ndarray behaviour. + raise ValueError(f"axis(={axis}) out of bounds") + + if not self._null_fill_value: + return SparseArray(self.to_dense()).cumsum() + + return SparseArray( + self.sp_values.cumsum(), + sparse_index=self.sp_index, + fill_value=self.fill_value, + ) + + def mean(self, axis=0, *args, **kwargs): + """ + Mean of non-NA/null values + + Returns + ------- + mean : float + """ + nv.validate_mean(args, kwargs) + valid_vals = self._valid_sp_values + sp_sum = valid_vals.sum() + ct = len(valid_vals) + + if self._null_fill_value: + return sp_sum / ct + else: + nsparse = self.sp_index.ngaps + return (sp_sum + self.fill_value * nsparse) / (ct + nsparse) + + def transpose(self, *axes): + """ + Returns the SparseArray. + """ + return self + + @property + def T(self): + """ + Returns the SparseArray. + """ + return self + + # ------------------------------------------------------------------------ + # Ufuncs + # ------------------------------------------------------------------------ + + _HANDLED_TYPES = (np.ndarray, numbers.Number) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + out = kwargs.get("out", ()) + + for x in inputs + out: + if not isinstance(x, self._HANDLED_TYPES + (SparseArray,)): + return NotImplemented + + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + if len(inputs) == 1: + # No alignment necessary. + sp_values = getattr(ufunc, method)(self.sp_values, **kwargs) + fill_value = getattr(ufunc, method)(self.fill_value, **kwargs) + + if isinstance(sp_values, tuple): + # multiple outputs. e.g. modf + arrays = tuple( + self._simple_new( + sp_value, self.sp_index, SparseDtype(sp_value.dtype, fv) + ) + for sp_value, fv in zip(sp_values, fill_value) + ) + return arrays + elif is_scalar(sp_values): + # e.g. reductions + return sp_values + + return self._simple_new( + sp_values, self.sp_index, SparseDtype(sp_values.dtype, fill_value) + ) + + result = getattr(ufunc, method)(*[np.asarray(x) for x in inputs], **kwargs) + if out: + if len(out) == 1: + out = out[0] + return out + + if type(result) is tuple: + return tuple(type(self)(x) for x in result) + elif method == "at": + # no return value + return None + else: + return type(self)(result) + + def __abs__(self): + return np.abs(self) + + # ------------------------------------------------------------------------ + # Ops + # ------------------------------------------------------------------------ + + @classmethod + def _create_unary_method(cls, op) -> Callable[["SparseArray"], "SparseArray"]: + def sparse_unary_method(self) -> "SparseArray": + fill_value = op(np.array(self.fill_value)).item() + values = op(self.sp_values) + dtype = SparseDtype(values.dtype, fill_value) + return cls._simple_new(values, self.sp_index, dtype) + + name = f"__{op.__name__}__" + return compat.set_function_name(sparse_unary_method, name, cls) + + @classmethod + def _create_arithmetic_method(cls, op): + op_name = op.__name__ + + @unpack_zerodim_and_defer(op_name) + def sparse_arithmetic_method(self, other): + + if isinstance(other, SparseArray): + return _sparse_array_op(self, other, op, op_name) + + elif is_scalar(other): + with np.errstate(all="ignore"): + fill = op(_get_fill(self), np.asarray(other)) + result = op(self.sp_values, other) + + if op_name == "divmod": + left, right = result + lfill, rfill = fill + return ( + _wrap_result(op_name, left, self.sp_index, lfill), + _wrap_result(op_name, right, self.sp_index, rfill), + ) + + return _wrap_result(op_name, result, self.sp_index, fill) + + else: + other = np.asarray(other) + with np.errstate(all="ignore"): + # TODO: look into _wrap_result + if len(self) != len(other): + raise AssertionError( + (f"length mismatch: {len(self)} vs. {len(other)}") + ) + if not isinstance(other, SparseArray): + dtype = getattr(other, "dtype", None) + other = SparseArray( + other, fill_value=self.fill_value, dtype=dtype + ) + return _sparse_array_op(self, other, op, op_name) + + name = f"__{op.__name__}__" + return compat.set_function_name(sparse_arithmetic_method, name, cls) + + @classmethod + def _create_comparison_method(cls, op): + op_name = op.__name__ + if op_name in {"and_", "or_"}: + op_name = op_name[:-1] + + @unpack_zerodim_and_defer(op_name) + def cmp_method(self, other): + + if not is_scalar(other) and not isinstance(other, type(self)): + # convert list-like to ndarray + other = np.asarray(other) + + if isinstance(other, np.ndarray): + # TODO: make this more flexible than just ndarray... + if len(self) != len(other): + raise AssertionError( + f"length mismatch: {len(self)} vs. {len(other)}" + ) + other = SparseArray(other, fill_value=self.fill_value) + + if isinstance(other, SparseArray): + return _sparse_array_op(self, other, op, op_name) + else: + with np.errstate(all="ignore"): + fill_value = op(self.fill_value, other) + result = op(self.sp_values, other) + + return type(self)( + result, + sparse_index=self.sp_index, + fill_value=fill_value, + dtype=np.bool_, + ) + + name = f"__{op.__name__}__" + return compat.set_function_name(cmp_method, name, cls) + + @classmethod + def _add_unary_ops(cls): + cls.__pos__ = cls._create_unary_method(operator.pos) + cls.__neg__ = cls._create_unary_method(operator.neg) + cls.__invert__ = cls._create_unary_method(operator.invert) + + @classmethod + def _add_comparison_ops(cls): + cls.__and__ = cls._create_comparison_method(operator.and_) + cls.__or__ = cls._create_comparison_method(operator.or_) + cls.__xor__ = cls._create_arithmetic_method(operator.xor) + super()._add_comparison_ops() + + # ---------- + # Formatting + # ----------- + def __repr__(self) -> str: + pp_str = printing.pprint_thing(self) + pp_fill = printing.pprint_thing(self.fill_value) + pp_index = printing.pprint_thing(self.sp_index) + return f"{pp_str}\nFill: {pp_fill}\n{pp_index}" + + def _formatter(self, boxed=False): + # Defer to the formatter from the GenericArrayFormatter calling us. + # This will infer the correct formatter from the dtype of the values. + return None + + +SparseArray._add_arithmetic_ops() +SparseArray._add_comparison_ops() +SparseArray._add_unary_ops() + + +def make_sparse(arr, kind="block", fill_value=None, dtype=None, copy=False): + """ + Convert ndarray to sparse format + + Parameters + ---------- + arr : ndarray + kind : {'block', 'integer'} + fill_value : NaN or another value + dtype : np.dtype, optional + copy : bool, default False + + Returns + ------- + (sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar) + """ + + arr = com.values_from_object(arr) + + if arr.ndim > 1: + raise TypeError("expected dimension <= 1 data") + + if fill_value is None: + fill_value = na_value_for_dtype(arr.dtype) + + if isna(fill_value): + mask = notna(arr) + else: + # cast to object comparison to be safe + if is_string_dtype(arr): + arr = arr.astype(object) + + if is_object_dtype(arr.dtype): + # element-wise equality check method in numpy doesn't treat + # each element type, eg. 0, 0.0, and False are treated as + # same. So we have to check the both of its type and value. + mask = splib.make_mask_object_ndarray(arr, fill_value) + else: + mask = arr != fill_value + + length = len(arr) + if length != len(mask): + # the arr is a SparseArray + indices = mask.sp_index.indices + else: + indices = mask.nonzero()[0].astype(np.int32) + + index = _make_index(length, indices, kind) + sparsified_values = arr[mask] + if dtype is not None: + sparsified_values = astype_nansafe(sparsified_values, dtype=dtype) + # TODO: copy + return sparsified_values, index, fill_value + + +def _make_index(length, indices, kind): + + if kind == "block" or isinstance(kind, BlockIndex): + locs, lens = splib.get_blocks(indices) + index = BlockIndex(length, locs, lens) + elif kind == "integer" or isinstance(kind, IntIndex): + index = IntIndex(length, indices) + else: # pragma: no cover + raise ValueError("must be block or integer type") + return index diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py new file mode 100644 index 00000000..6f15681c --- /dev/null +++ b/pandas/core/arrays/sparse/dtype.py @@ -0,0 +1,348 @@ +"""Sparse Dtype""" + +import re +from typing import Any, Tuple + +import numpy as np + +from pandas._typing import Dtype + +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.cast import astype_nansafe +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import register_extension_dtype +from pandas.core.dtypes.missing import isna, na_value_for_dtype + + +@register_extension_dtype +class SparseDtype(ExtensionDtype): + """ + Dtype for data stored in :class:`SparseArray`. + + This dtype implements the pandas ExtensionDtype interface. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + dtype : str, ExtensionDtype, numpy.dtype, type, default numpy.float64 + The dtype of the underlying array storing the non-fill value values. + fill_value : scalar, optional + The scalar value not stored in the SparseArray. By default, this + depends on `dtype`. + + =========== ========== + dtype na_value + =========== ========== + float ``np.nan`` + int ``0`` + bool ``False`` + datetime64 ``pd.NaT`` + timedelta64 ``pd.NaT`` + =========== ========== + + The default value may be overridden by specifying a `fill_value`. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + # We include `_is_na_fill_value` in the metadata to avoid hash collisions + # between SparseDtype(float, 0.0) and SparseDtype(float, nan). + # Without is_na_fill_value in the comparison, those would be equal since + # hash(nan) is (sometimes?) 0. + _metadata = ("_dtype", "_fill_value", "_is_na_fill_value") + + def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None): + + if isinstance(dtype, type(self)): + if fill_value is None: + fill_value = dtype.fill_value + dtype = dtype.subtype + + dtype = pandas_dtype(dtype) + if is_string_dtype(dtype): + dtype = np.dtype("object") + + if fill_value is None: + fill_value = na_value_for_dtype(dtype) + + if not is_scalar(fill_value): + raise ValueError(f"fill_value must be a scalar. Got {fill_value} instead") + self._dtype = dtype + self._fill_value = fill_value + + def __hash__(self): + # Python3 doesn't inherit __hash__ when a base class overrides + # __eq__, so we explicitly do it here. + return super().__hash__() + + def __eq__(self, other: Any) -> bool: + # We have to override __eq__ to handle NA values in _metadata. + # The base class does simple == checks, which fail for NA. + if isinstance(other, str): + try: + other = self.construct_from_string(other) + except TypeError: + return False + + if isinstance(other, type(self)): + subtype = self.subtype == other.subtype + if self._is_na_fill_value: + # this case is complicated by two things: + # SparseDtype(float, float(nan)) == SparseDtype(float, np.nan) + # SparseDtype(float, np.nan) != SparseDtype(float, pd.NaT) + # i.e. we want to treat any floating-point NaN as equal, but + # not a floating-point NaN and a datetime NaT. + fill_value = ( + other._is_na_fill_value + and isinstance(self.fill_value, type(other.fill_value)) + or isinstance(other.fill_value, type(self.fill_value)) + ) + else: + fill_value = self.fill_value == other.fill_value + + return subtype and fill_value + return False + + @property + def fill_value(self): + """ + The fill value of the array. + + Converting the SparseArray to a dense ndarray will fill the + array with this value. + + .. warning:: + + It's possible to end up with a SparseArray that has ``fill_value`` + values in ``sp_values``. This can occur, for example, when setting + ``SparseArray.fill_value`` directly. + """ + return self._fill_value + + @property + def _is_na_fill_value(self): + return isna(self.fill_value) + + @property + def _is_numeric(self): + return not is_object_dtype(self.subtype) + + @property + def _is_boolean(self): + return is_bool_dtype(self.subtype) + + @property + def kind(self): + """ + The sparse kind. Either 'integer', or 'block'. + """ + return self.subtype.kind + + @property + def type(self): + return self.subtype.type + + @property + def subtype(self): + return self._dtype + + @property + def name(self): + return f"Sparse[{self.subtype.name}, {self.fill_value}]" + + def __repr__(self) -> str: + return self.name + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays.sparse.array import SparseArray + + return SparseArray + + @classmethod + def construct_from_string(cls, string): + """ + Construct a SparseDtype from a string form. + + Parameters + ---------- + string : str + Can take the following forms. + + string dtype + ================ ============================ + 'int' SparseDtype[np.int64, 0] + 'Sparse' SparseDtype[np.float64, nan] + 'Sparse[int]' SparseDtype[np.int64, 0] + 'Sparse[int, 0]' SparseDtype[np.int64, 0] + ================ ============================ + + It is not possible to specify non-default fill values + with a string. An argument like ``'Sparse[int, 1]'`` + will raise a ``TypeError`` because the default fill value + for integers is 0. + + Returns + ------- + SparseDtype + """ + msg = f"Cannot construct a 'SparseDtype' from '{string}'" + if string.startswith("Sparse"): + try: + sub_type, has_fill_value = cls._parse_subtype(string) + except ValueError: + raise TypeError(msg) + else: + result = SparseDtype(sub_type) + msg = ( + f"Cannot construct a 'SparseDtype' from '{string}'.\n\nIt " + "looks like the fill_value in the string is not " + "the default for the dtype. Non-default fill_values " + "are not supported. Use the 'SparseDtype()' " + "constructor instead." + ) + if has_fill_value and str(result) != string: + raise TypeError(msg) + return result + else: + raise TypeError(msg) + + @staticmethod + def _parse_subtype(dtype: str) -> Tuple[str, bool]: + """ + Parse a string to get the subtype + + Parameters + ---------- + dtype : str + A string like + + * Sparse[subtype] + * Sparse[subtype, fill_value] + + Returns + ------- + subtype : str + + Raises + ------ + ValueError + When the subtype cannot be extracted. + """ + xpr = re.compile(r"Sparse\[(?P[^,]*)(, )?(?P.*?)?\]$") + m = xpr.match(dtype) + has_fill_value = False + if m: + subtype = m.groupdict()["subtype"] + has_fill_value = bool(m.groupdict()["fill_value"]) + elif dtype == "Sparse": + subtype = "float64" + else: + raise ValueError(f"Cannot parse {dtype}") + return subtype, has_fill_value + + @classmethod + def is_dtype(cls, dtype): + dtype = getattr(dtype, "dtype", dtype) + if isinstance(dtype, str) and dtype.startswith("Sparse"): + sub_type, _ = cls._parse_subtype(dtype) + dtype = np.dtype(sub_type) + elif isinstance(dtype, cls): + return True + return isinstance(dtype, np.dtype) or dtype == "Sparse" + + def update_dtype(self, dtype): + """ + Convert the SparseDtype to a new dtype. + + This takes care of converting the ``fill_value``. + + Parameters + ---------- + dtype : Union[str, numpy.dtype, SparseDtype] + The new dtype to use. + + * For a SparseDtype, it is simply returned + * For a NumPy dtype (or str), the current fill value + is converted to the new dtype, and a SparseDtype + with `dtype` and the new fill value is returned. + + Returns + ------- + SparseDtype + A new SparseDtype with the correct `dtype` and fill value + for that `dtype`. + + Raises + ------ + ValueError + When the current fill value cannot be converted to the + new `dtype` (e.g. trying to convert ``np.nan`` to an + integer dtype). + + + Examples + -------- + >>> SparseDtype(int, 0).update_dtype(float) + Sparse[float64, 0.0] + + >>> SparseDtype(int, 1).update_dtype(SparseDtype(float, np.nan)) + Sparse[float64, nan] + """ + cls = type(self) + dtype = pandas_dtype(dtype) + + if not isinstance(dtype, cls): + fill_value = astype_nansafe(np.array(self.fill_value), dtype).item() + dtype = cls(dtype, fill_value=fill_value) + + return dtype + + @property + def _subtype_with_str(self): + """ + Whether the SparseDtype's subtype should be considered ``str``. + + Typically, pandas will store string data in an object-dtype array. + When converting values to a dtype, e.g. in ``.astype``, we need to + be more specific, we need the actual underlying type. + + Returns + ------- + + >>> SparseDtype(int, 1)._subtype_with_str + dtype('int64') + + >>> SparseDtype(object, 1)._subtype_with_str + dtype('O') + + >>> dtype = SparseDtype(str, '') + >>> dtype.subtype + dtype('O') + + >>> dtype._subtype_with_str + str + """ + if isinstance(self.fill_value, str): + return type(self.fill_value) + return self.subtype diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py new file mode 100644 index 00000000..88d63071 --- /dev/null +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -0,0 +1,146 @@ +""" +Interaction with scipy.sparse matrices. + +Currently only includes to_coo helpers. +""" +from pandas.core.indexes.api import Index, MultiIndex +from pandas.core.series import Series + + +def _check_is_partition(parts, whole): + whole = set(whole) + parts = [set(x) for x in parts] + if set.intersection(*parts) != set(): + raise ValueError("Is not a partition because intersection is not null.") + if set.union(*parts) != whole: + raise ValueError("Is not a partition because union is not the whole.") + + +def _to_ijv(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): + """ For arbitrary (MultiIndexed) SparseSeries return + (v, i, j, ilabels, jlabels) where (v, (i, j)) is suitable for + passing to scipy.sparse.coo constructor. """ + # index and column levels must be a partition of the index + _check_is_partition([row_levels, column_levels], range(ss.index.nlevels)) + + # from the SparseSeries: get the labels and data for non-null entries + values = ss._data.internal_values()._valid_sp_values + + nonnull_labels = ss.dropna() + + def get_indexers(levels): + """ Return sparse coords and dense labels for subset levels """ + + # TODO: how to do this better? cleanly slice nonnull_labels given the + # coord + values_ilabels = [tuple(x[i] for i in levels) for x in nonnull_labels.index] + if len(levels) == 1: + values_ilabels = [x[0] for x in values_ilabels] + + # # performance issues with groupby ################################### + # TODO: these two lines can replace the code below but + # groupby is too slow (in some cases at least) + # labels_to_i = ss.groupby(level=levels, sort=sort_labels).first() + # labels_to_i[:] = np.arange(labels_to_i.shape[0]) + + def _get_label_to_i_dict(labels, sort_labels=False): + """ Return dict of unique labels to number. + Optionally sort by label. + """ + labels = Index(map(tuple, labels)).unique().tolist() # squish + if sort_labels: + labels = sorted(labels) + return {k: i for i, k in enumerate(labels)} + + def _get_index_subset_to_coord_dict(index, subset, sort_labels=False): + ilabels = list(zip(*[index._get_level_values(i) for i in subset])) + labels_to_i = _get_label_to_i_dict(ilabels, sort_labels=sort_labels) + labels_to_i = Series(labels_to_i) + if len(subset) > 1: + labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index) + labels_to_i.index.names = [index.names[i] for i in subset] + else: + labels_to_i.index = Index(x[0] for x in labels_to_i.index) + labels_to_i.index.name = index.names[subset[0]] + + labels_to_i.name = "value" + return labels_to_i + + labels_to_i = _get_index_subset_to_coord_dict( + ss.index, levels, sort_labels=sort_labels + ) + # ##################################################################### + # ##################################################################### + + i_coord = labels_to_i[values_ilabels].tolist() + i_labels = labels_to_i.index.tolist() + + return i_coord, i_labels + + i_coord, i_labels = get_indexers(row_levels) + j_coord, j_labels = get_indexers(column_levels) + + return values, i_coord, j_coord, i_labels, j_labels + + +def _sparse_series_to_coo(ss, row_levels=(0,), column_levels=(1,), sort_labels=False): + """ + Convert a SparseSeries to a scipy.sparse.coo_matrix using index + levels row_levels, column_levels as the row and column + labels respectively. Returns the sparse_matrix, row and column labels. + """ + + import scipy.sparse + + if ss.index.nlevels < 2: + raise ValueError("to_coo requires MultiIndex with nlevels > 2") + if not ss.index.is_unique: + raise ValueError( + "Duplicate index entries are not allowed in to_coo transformation." + ) + + # to keep things simple, only rely on integer indexing (not labels) + row_levels = [ss.index._get_level_number(x) for x in row_levels] + column_levels = [ss.index._get_level_number(x) for x in column_levels] + + v, i, j, rows, columns = _to_ijv( + ss, row_levels=row_levels, column_levels=column_levels, sort_labels=sort_labels + ) + sparse_matrix = scipy.sparse.coo_matrix( + (v, (i, j)), shape=(len(rows), len(columns)) + ) + return sparse_matrix, rows, columns + + +def _coo_to_sparse_series(A, dense_index: bool = False): + """ + Convert a scipy.sparse.coo_matrix to a SparseSeries. + + Parameters + ---------- + A : scipy.sparse.coo.coo_matrix + dense_index : bool, default False + + Returns + ------- + Series + + Raises + ------ + TypeError if A is not a coo_matrix + """ + from pandas import SparseDtype + + try: + s = Series(A.data, MultiIndex.from_arrays((A.row, A.col))) + except AttributeError: + raise TypeError(f"Expected coo_matrix. Got {type(A).__name__} instead.") + s = s.sort_index() + s = s.astype(SparseDtype(s.dtype)) + if dense_index: + # is there a better constructor method to use here? + i = range(A.shape[0]) + j = range(A.shape[1]) + ind = MultiIndex.from_product([i, j]) + s = s.reindex(ind) + return s diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py new file mode 100644 index 00000000..b53484e1 --- /dev/null +++ b/pandas/core/arrays/string_.py @@ -0,0 +1,323 @@ +import operator +from typing import Type + +import numpy as np + +from pandas._libs import lib, missing as libmissing + +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.common import pandas_dtype +from pandas.core.dtypes.dtypes import register_extension_dtype +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries +from pandas.core.dtypes.inference import is_array_like + +from pandas import compat +from pandas.core import ops +from pandas.core.arrays import PandasArray +from pandas.core.construction import extract_array +from pandas.core.indexers import check_array_indexer +from pandas.core.missing import isna + + +@register_extension_dtype +class StringDtype(ExtensionDtype): + """ + Extension dtype for string data. + + .. versionadded:: 1.0.0 + + .. warning:: + + StringDtype is considered experimental. The implementation and + parts of the API may change without warning. + + In particular, StringDtype.na_value may change to no longer be + ``numpy.nan``. + + Attributes + ---------- + None + + Methods + ------- + None + + Examples + -------- + >>> pd.StringDtype() + StringDtype + """ + + name = "string" + + #: StringDtype.na_value uses pandas.NA + na_value = libmissing.NA + + @property + def type(self) -> Type: + return str + + @classmethod + def construct_array_type(cls) -> "Type[StringArray]": + return StringArray + + def __repr__(self) -> str: + return "StringDtype" + + def __from_arrow__(self, array): + """Construct StringArray from passed pyarrow Array/ChunkedArray""" + import pyarrow + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + # pyarrow.ChunkedArray + chunks = array.chunks + + results = [] + for arr in chunks: + # using _from_sequence to ensure None is converted to NA + str_arr = StringArray._from_sequence(np.array(arr)) + results.append(str_arr) + + return StringArray._concat_same_type(results) + + +class StringArray(PandasArray): + """ + Extension array for string data. + + .. versionadded:: 1.0.0 + + .. warning:: + + StringArray is considered experimental. The implementation and + parts of the API may change without warning. + + Parameters + ---------- + values : array-like + The array of data. + + .. warning:: + + Currently, this expects an object-dtype ndarray + where the elements are Python strings or :attr:`pandas.NA`. + This may change without warning in the future. Use + :meth:`pandas.array` with ``dtype="string"`` for a stable way of + creating a `StringArray` from any sequence. + + copy : bool, default False + Whether to copy the array of data. + + Attributes + ---------- + None + + Methods + ------- + None + + See Also + -------- + array + The recommended function for creating a StringArray. + Series.str + The string methods are available on Series backed by + a StringArray. + + Notes + ----- + StringArray returns a BooleanArray for comparison methods. + + Examples + -------- + >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string") + + ['This is', 'some text', , 'data.'] + Length: 4, dtype: string + + Unlike ``object`` dtype arrays, ``StringArray`` doesn't allow non-string + values. + + >>> pd.array(['1', 1], dtype="string") + Traceback (most recent call last): + ... + ValueError: StringArray requires an object-dtype ndarray of strings. + + For comparison methods, this returns a :class:`pandas.BooleanArray` + + >>> pd.array(["a", None, "c"], dtype="string") == "a" + + [True, , False] + Length: 3, dtype: boolean + """ + + # undo the PandasArray hack + _typ = "extension" + + def __init__(self, values, copy=False): + values = extract_array(values) + skip_validation = isinstance(values, type(self)) + + super().__init__(values, copy=copy) + self._dtype = StringDtype() + if not skip_validation: + self._validate() + + def _validate(self): + """Validate that we only store NA or strings.""" + if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True): + raise ValueError("StringArray requires a sequence of strings or pandas.NA") + if self._ndarray.dtype != "object": + raise ValueError( + "StringArray requires a sequence of strings or pandas.NA. Got " + f"'{self._ndarray.dtype}' dtype instead." + ) + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + if dtype: + assert dtype == "string" + + result = np.asarray(scalars, dtype="object") + if copy and result is scalars: + result = result.copy() + + # Standardize all missing-like values to NA + # TODO: it would be nice to do this in _validate / lib.is_string_array + # We are already doing a scan over the values there. + na_values = isna(result) + if na_values.any(): + if result is scalars: + # force a copy now, if we haven't already + result = result.copy() + result[na_values] = StringDtype.na_value + + return cls(result) + + @classmethod + def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + return cls._from_sequence(strings, dtype=dtype, copy=copy) + + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow as pa + + if type is None: + type = pa.string() + + values = self._ndarray.copy() + values[self.isna()] = None + return pa.array(values, type=type, from_pandas=True) + + def _values_for_factorize(self): + arr = self._ndarray.copy() + mask = self.isna() + arr[mask] = -1 + return arr, -1 + + def __setitem__(self, key, value): + value = extract_array(value, extract_numpy=True) + if isinstance(value, type(self)): + # extract_array doesn't extract PandasArray subclasses + value = value._ndarray + + key = check_array_indexer(self, key) + scalar_key = lib.is_scalar(key) + scalar_value = lib.is_scalar(value) + if scalar_key and not scalar_value: + raise ValueError("setting an array element with a sequence.") + + # validate new items + if scalar_value: + if isna(value): + value = StringDtype.na_value + elif not isinstance(value, str): + raise ValueError( + f"Cannot set non-string value '{value}' into a StringArray." + ) + else: + if not is_array_like(value): + value = np.asarray(value, dtype=object) + if len(value) and not lib.is_string_array(value, skipna=True): + raise ValueError("Must provide strings.") + + super().__setitem__(key, value) + + def fillna(self, value=None, method=None, limit=None): + # TODO: validate dtype + return super().fillna(value, method, limit) + + def astype(self, dtype, copy=True): + dtype = pandas_dtype(dtype) + if isinstance(dtype, StringDtype): + if copy: + return self.copy() + return self + return super().astype(dtype, copy) + + def _reduce(self, name, skipna=True, **kwargs): + raise TypeError(f"Cannot perform reduction '{name}' with string dtype") + + def value_counts(self, dropna=False): + from pandas import value_counts + + return value_counts(self._ndarray, dropna=dropna).astype("Int64") + + # Overrride parent because we have different return types. + @classmethod + def _create_arithmetic_method(cls, op): + # Note: this handles both arithmetic and comparison methods. + def method(self, other): + from pandas.arrays import BooleanArray + + assert op.__name__ in ops.ARITHMETIC_BINOPS | ops.COMPARISON_BINOPS + + if isinstance(other, (ABCIndexClass, ABCSeries, ABCDataFrame)): + return NotImplemented + + elif isinstance(other, cls): + other = other._ndarray + + mask = isna(self) | isna(other) + valid = ~mask + + if not lib.is_scalar(other): + if len(other) != len(self): + # prevent improper broadcasting when other is 2D + raise ValueError( + f"Lengths of operands do not match: {len(self)} != {len(other)}" + ) + + other = np.asarray(other) + other = other[valid] + + if op.__name__ in ops.ARITHMETIC_BINOPS: + result = np.empty_like(self._ndarray, dtype="object") + result[mask] = StringDtype.na_value + result[valid] = op(self._ndarray[valid], other) + return StringArray(result) + else: + # logical + result = np.zeros(len(self._ndarray), dtype="bool") + result[valid] = op(self._ndarray[valid], other) + return BooleanArray(result, mask) + + return compat.set_function_name(method, f"__{op.__name__}__", cls) + + @classmethod + def _add_arithmetic_ops(cls): + cls.__add__ = cls._create_arithmetic_method(operator.add) + cls.__radd__ = cls._create_arithmetic_method(ops.radd) + + cls.__mul__ = cls._create_arithmetic_method(operator.mul) + cls.__rmul__ = cls._create_arithmetic_method(ops.rmul) + + _create_comparison_method = _create_arithmetic_method + + +StringArray._add_arithmetic_ops() +StringArray._add_comparison_ops() diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py new file mode 100644 index 00000000..c34d14f1 --- /dev/null +++ b/pandas/core/arrays/timedeltas.py @@ -0,0 +1,1090 @@ +from datetime import timedelta +from typing import List + +import numpy as np + +from pandas._libs import lib, tslibs +from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT +from pandas._libs.tslibs.fields import get_timedelta_field +from pandas._libs.tslibs.timedeltas import ( + array_to_timedelta64, + parse_timedelta_unit, + precision_from_unit, +) +from pandas.compat.numpy import function as nv + +from pandas.core.dtypes.common import ( + _NS_DTYPE, + _TD_DTYPE, + is_dtype_equal, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndexClass, + ABCSeries, + ABCTimedeltaIndex, +) +from pandas.core.dtypes.missing import isna + +from pandas.core import nanops +from pandas.core.algorithms import checked_add_with_arr +from pandas.core.arrays import datetimelike as dtl +import pandas.core.common as com + +from pandas.tseries.frequencies import to_offset +from pandas.tseries.offsets import Tick + +_BAD_DTYPE = "dtype {dtype} cannot be converted to timedelta64[ns]" + + +def _is_convertible_to_td(key): + return isinstance(key, (Tick, timedelta, np.timedelta64, str)) + + +def _field_accessor(name, alias, docstring=None): + def f(self): + values = self.asi8 + result = get_timedelta_field(values, alias) + if self._hasnans: + result = self._maybe_mask_results( + result, fill_value=None, convert="float64" + ) + + return result + + f.__name__ = name + f.__doc__ = f"\n{docstring}\n" + return property(f) + + +class TimedeltaArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps): + """ + Pandas ExtensionArray for timedelta data. + + .. versionadded:: 0.24.0 + + .. warning:: + + TimedeltaArray is currently experimental, and its API may change + without warning. In particular, :attr:`TimedeltaArray.dtype` is + expected to change to be an instance of an ``ExtensionDtype`` + subclass. + + Parameters + ---------- + values : array-like + The timedelta data. + + dtype : numpy.dtype + Currently, only ``numpy.dtype("timedelta64[ns]")`` is accepted. + freq : Offset, optional + copy : bool, default False + Whether to copy the underlying array of data. + + Attributes + ---------- + None + + Methods + ------- + None + """ + + _typ = "timedeltaarray" + _scalar_type = Timedelta + _recognized_scalars = (timedelta, np.timedelta64, Tick) + _is_recognized_dtype = is_timedelta64_dtype + + __array_priority__ = 1000 + # define my properties & methods for delegation + _other_ops: List[str] = [] + _bool_ops: List[str] = [] + _object_ops = ["freq"] + _field_ops = ["days", "seconds", "microseconds", "nanoseconds"] + _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _datetimelike_methods = [ + "to_pytimedelta", + "total_seconds", + "round", + "floor", + "ceil", + ] + + # Note: ndim must be defined to ensure NaT.__richcmp(TimedeltaArray) + # operates pointwise. + + @property + def _box_func(self): + return lambda x: Timedelta(x, unit="ns") + + @property + def dtype(self): + """ + The dtype for the TimedeltaArray. + + .. warning:: + + A future version of pandas will change dtype to be an instance + of a :class:`pandas.api.extensions.ExtensionDtype` subclass, + not a ``numpy.dtype``. + + Returns + ------- + numpy.dtype + """ + return _TD_DTYPE + + # ---------------------------------------------------------------- + # Constructors + + def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): + if isinstance(values, (ABCSeries, ABCIndexClass)): + values = values._values + + inferred_freq = getattr(values, "_freq", None) + + if isinstance(values, type(self)): + if freq is None: + freq = values.freq + elif freq and values.freq: + freq = to_offset(freq) + freq, _ = dtl.validate_inferred_freq(freq, values.freq, False) + values = values._data + + if not isinstance(values, np.ndarray): + msg = ( + f"Unexpected type '{type(values).__name__}'. 'values' must be a " + "TimedeltaArray ndarray, or Series or Index containing one of those." + ) + raise ValueError(msg) + if values.ndim not in [1, 2]: + raise ValueError("Only 1-dimensional input arrays are supported.") + + if values.dtype == "i8": + # for compat with datetime/timedelta/period shared methods, + # we can sometimes get here with int64 values. These represent + # nanosecond UTC (or tz-naive) unix timestamps + values = values.view(_TD_DTYPE) + + _validate_td64_dtype(values.dtype) + dtype = _validate_td64_dtype(dtype) + + if freq == "infer": + msg = ( + "Frequency inference not allowed in TimedeltaArray.__init__. " + "Use 'pd.array()' instead." + ) + raise ValueError(msg) + + if copy: + values = values.copy() + if freq: + freq = to_offset(freq) + + self._data = values + self._dtype = dtype + self._freq = freq + + if inferred_freq is None and freq is not None: + type(self)._validate_frequency(self, freq) + + @classmethod + def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): + assert dtype == _TD_DTYPE, dtype + assert isinstance(values, np.ndarray), type(values) + + result = object.__new__(cls) + result._data = values.view(_TD_DTYPE) + result._freq = to_offset(freq) + result._dtype = _TD_DTYPE + return result + + @classmethod + def _from_sequence(cls, data, dtype=_TD_DTYPE, copy=False, freq=None, unit=None): + if dtype: + _validate_td64_dtype(dtype) + freq, freq_infer = dtl.maybe_infer_freq(freq) + + data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) + freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) + + result = cls._simple_new(data, freq=freq) + + if inferred_freq is None and freq is not None: + # this condition precludes `freq_infer` + cls._validate_frequency(result, freq) + + elif freq_infer: + # Set _freq directly to bypass duplicative _validate_frequency + # check. + result._freq = to_offset(result.inferred_freq) + + return result + + @classmethod + def _generate_range(cls, start, end, periods, freq, closed=None): + + periods = dtl.validate_periods(periods) + if freq is None and any(x is None for x in [periods, start, end]): + raise ValueError("Must provide freq argument if no data is supplied") + + if com.count_not_none(start, end, periods, freq) != 3: + raise ValueError( + "Of the four parameters: start, end, periods, " + "and freq, exactly three must be specified" + ) + + if start is not None: + start = Timedelta(start) + + if end is not None: + end = Timedelta(end) + + if start is None and end is None: + if closed is not None: + raise ValueError( + "Closed has to be None if not both of startand end are defined" + ) + + left_closed, right_closed = dtl.validate_endpoints(closed) + + if freq is not None: + index = _generate_regular_range(start, end, periods, freq) + else: + index = np.linspace(start.value, end.value, periods).astype("i8") + + if not left_closed: + index = index[1:] + if not right_closed: + index = index[:-1] + + return cls._simple_new(index, freq=freq) + + # ---------------------------------------------------------------- + # DatetimeLike Interface + + def _unbox_scalar(self, value): + if not isinstance(value, self._scalar_type) and value is not NaT: + raise ValueError("'value' should be a Timedelta.") + self._check_compatible_with(value) + return value.value + + def _scalar_from_string(self, value): + return Timedelta(value) + + def _check_compatible_with(self, other, setitem: bool = False): + # we don't have anything to validate. + pass + + def _maybe_clear_freq(self): + self._freq = None + + # ---------------------------------------------------------------- + # Array-Like / EA-Interface Methods + + def astype(self, dtype, copy=True): + # We handle + # --> timedelta64[ns] + # --> timedelta64 + # DatetimeLikeArrayMixin super call handles other cases + dtype = pandas_dtype(dtype) + + if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype): + # by pandas convention, converting to non-nano timedelta64 + # returns an int64-dtyped array with ints representing multiples + # of the desired timedelta unit. This is essentially division + if self._hasnans: + # avoid double-copying + result = self._data.astype(dtype, copy=False) + values = self._maybe_mask_results( + result, fill_value=None, convert="float64" + ) + return values + result = self._data.astype(dtype, copy=copy) + return result.astype("i8") + elif is_timedelta64_ns_dtype(dtype): + if copy: + return self.copy() + return self + return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy) + + # ---------------------------------------------------------------- + # Reductions + + def sum( + self, + axis=None, + dtype=None, + out=None, + keepdims: bool = False, + initial=None, + skipna: bool = True, + min_count: int = 0, + ): + nv.validate_sum( + (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial) + ) + if not len(self): + return NaT + if not skipna and self._hasnans: + return NaT + + result = nanops.nansum( + self._data, axis=axis, skipna=skipna, min_count=min_count + ) + return Timedelta(result) + + def std( + self, + axis=None, + dtype=None, + out=None, + ddof: int = 1, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_stat_ddof_func( + (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std" + ) + if not len(self): + return NaT + if not skipna and self._hasnans: + return NaT + + result = nanops.nanstd(self._data, axis=axis, skipna=skipna, ddof=ddof) + return Timedelta(result) + + def median( + self, + axis=None, + out=None, + overwrite_input: bool = False, + keepdims: bool = False, + skipna: bool = True, + ): + nv.validate_median( + (), dict(out=out, overwrite_input=overwrite_input, keepdims=keepdims) + ) + return nanops.nanmedian(self._data, axis=axis, skipna=skipna) + + # ---------------------------------------------------------------- + # Rendering Methods + + def _formatter(self, boxed=False): + from pandas.io.formats.format import _get_format_timedelta64 + + return _get_format_timedelta64(self, box=True) + + def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): + from pandas.io.formats.format import _get_format_timedelta64 + + formatter = _get_format_timedelta64(self._data, na_rep) + return np.array([formatter(x) for x in self._data]) + + # ---------------------------------------------------------------- + # Arithmetic Methods + + def _add_offset(self, other): + assert not isinstance(other, Tick) + raise TypeError( + f"cannot add the type {type(other).__name__} to a {type(self).__name__}" + ) + + def _add_delta(self, delta): + """ + Add a timedelta-like, Tick, or TimedeltaIndex-like object + to self, yielding a new TimedeltaArray. + + Parameters + ---------- + other : {timedelta, np.timedelta64, Tick, + TimedeltaIndex, ndarray[timedelta64]} + + Returns + ------- + result : TimedeltaArray + """ + new_values = super()._add_delta(delta) + return type(self)._from_sequence(new_values, freq="infer") + + def _add_datetime_arraylike(self, other): + """ + Add DatetimeArray/Index or ndarray[datetime64] to TimedeltaArray. + """ + if isinstance(other, np.ndarray): + # At this point we have already checked that dtype is datetime64 + from pandas.core.arrays import DatetimeArray + + other = DatetimeArray(other) + + # defer to implementation in DatetimeArray + return other + self + + def _add_datetimelike_scalar(self, other): + # adding a timedeltaindex to a datetimelike + from pandas.core.arrays import DatetimeArray + + assert other is not NaT + other = Timestamp(other) + if other is NaT: + # In this case we specifically interpret NaT as a datetime, not + # the timedelta interpretation we would get by returning self + NaT + result = self.asi8.view("m8[ms]") + NaT.to_datetime64() + return DatetimeArray(result) + + i8 = self.asi8 + result = checked_add_with_arr(i8, other.value, arr_mask=self._isnan) + result = self._maybe_mask_results(result) + dtype = DatetimeTZDtype(tz=other.tz) if other.tz else _NS_DTYPE + return DatetimeArray(result, dtype=dtype, freq=self.freq) + + def _addsub_object_array(self, other, op): + # Add or subtract Array-like of objects + try: + # TimedeltaIndex can only operate with a subset of DateOffset + # subclasses. Incompatible classes will raise AttributeError, + # which we re-raise as TypeError + return super()._addsub_object_array(other, op) + except AttributeError: + raise TypeError( + f"Cannot add/subtract non-tick DateOffset to {type(self).__name__}" + ) + + def __mul__(self, other): + other = lib.item_from_zerodim(other) + + if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)): + return NotImplemented + + if is_scalar(other): + # numpy will accept float and int, raise TypeError for others + result = self._data * other + freq = None + if self.freq is not None and not isna(other): + freq = self.freq * other + return type(self)(result, freq=freq) + + if not hasattr(other, "dtype"): + # list, tuple + other = np.array(other) + if len(other) != len(self) and not is_timedelta64_dtype(other): + # Exclude timedelta64 here so we correctly raise TypeError + # for that instead of ValueError + raise ValueError("Cannot multiply with unequal lengths") + + if is_object_dtype(other.dtype): + # this multiplication will succeed only if all elements of other + # are int or float scalars, so we will end up with + # timedelta64[ns]-dtyped result + result = [self[n] * other[n] for n in range(len(self))] + result = np.array(result) + return type(self)(result) + + # numpy will accept float or int dtype, raise TypeError for others + result = self._data * other + return type(self)(result) + + __rmul__ = __mul__ + + def __truediv__(self, other): + # timedelta / X is well-defined for timedelta-like or numeric X + other = lib.item_from_zerodim(other) + + if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): + return NotImplemented + + if isinstance(other, (timedelta, np.timedelta64, Tick)): + other = Timedelta(other) + if other is NaT: + # specifically timedelta64-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # otherwise, dispatch to Timedelta implementation + return self._data / other + + elif lib.is_scalar(other): + # assume it is numeric + result = self._data / other + freq = None + if self.freq is not None: + # Tick division is not implemented, so operate on Timedelta + freq = self.freq.delta / other + return type(self)(result, freq=freq) + + if not hasattr(other, "dtype"): + # e.g. list, tuple + other = np.array(other) + + if len(other) != len(self): + raise ValueError("Cannot divide vectors with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): + # let numpy handle it + return self._data / other + + elif is_object_dtype(other.dtype): + # Note: we do not do type inference on the result, so either + # an object array or numeric-dtyped (if numpy does inference) + # will be returned. GH#23829 + result = [self[n] / other[n] for n in range(len(self))] + result = np.array(result) + return result + + else: + result = self._data / other + return type(self)(result) + + def __rtruediv__(self, other): + # X / timedelta is defined only for timedelta-like X + other = lib.item_from_zerodim(other) + + if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): + return NotImplemented + + if isinstance(other, (timedelta, np.timedelta64, Tick)): + other = Timedelta(other) + if other is NaT: + # specifically timedelta64-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # otherwise, dispatch to Timedelta implementation + return other / self._data + + elif lib.is_scalar(other): + raise TypeError( + f"Cannot divide {type(other).__name__} by {type(self).__name__}" + ) + + if not hasattr(other, "dtype"): + # e.g. list, tuple + other = np.array(other) + + if len(other) != len(self): + raise ValueError("Cannot divide vectors with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): + # let numpy handle it + return other / self._data + + elif is_object_dtype(other.dtype): + # Note: unlike in __truediv__, we do not _need_ to do type + # inference on the result. It does not raise, a numeric array + # is returned. GH#23829 + result = [other[n] / self[n] for n in range(len(self))] + return np.array(result) + + else: + raise TypeError( + f"Cannot divide {other.dtype} data by {type(self).__name__}" + ) + + def __floordiv__(self, other): + if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): + return NotImplemented + + other = lib.item_from_zerodim(other) + if is_scalar(other): + if isinstance(other, (timedelta, np.timedelta64, Tick)): + other = Timedelta(other) + if other is NaT: + # treat this specifically as timedelta-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # dispatch to Timedelta implementation + result = other.__rfloordiv__(self._data) + return result + + # at this point we should only have numeric scalars; anything + # else will raise + result = self.asi8 // other + result[self._isnan] = iNaT + freq = None + if self.freq is not None: + # Note: freq gets division, not floor-division + freq = self.freq / other + return type(self)(result.view("m8[ns]"), freq=freq) + + if not hasattr(other, "dtype"): + # list, tuple + other = np.array(other) + if len(other) != len(self): + raise ValueError("Cannot divide with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): + other = type(self)(other) + + # numpy timedelta64 does not natively support floordiv, so operate + # on the i8 values + result = self.asi8 // other.asi8 + mask = self._isnan | other._isnan + if mask.any(): + result = result.astype(np.int64) + result[mask] = np.nan + return result + + elif is_object_dtype(other.dtype): + result = [self[n] // other[n] for n in range(len(self))] + result = np.array(result) + if lib.infer_dtype(result, skipna=False) == "timedelta": + result, _ = sequence_to_td64ns(result) + return type(self)(result) + return result + + elif is_integer_dtype(other.dtype) or is_float_dtype(other.dtype): + result = self._data // other + return type(self)(result) + + else: + dtype = getattr(other, "dtype", type(other).__name__) + raise TypeError(f"Cannot divide {dtype} by {type(self).__name__}") + + def __rfloordiv__(self, other): + if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): + return NotImplemented + + other = lib.item_from_zerodim(other) + if is_scalar(other): + if isinstance(other, (timedelta, np.timedelta64, Tick)): + other = Timedelta(other) + if other is NaT: + # treat this specifically as timedelta-NaT + result = np.empty(self.shape, dtype=np.float64) + result.fill(np.nan) + return result + + # dispatch to Timedelta implementation + result = other.__floordiv__(self._data) + return result + + raise TypeError( + f"Cannot divide {type(other).__name__} by {type(self).__name__}" + ) + + if not hasattr(other, "dtype"): + # list, tuple + other = np.array(other) + if len(other) != len(self): + raise ValueError("Cannot divide with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): + other = type(self)(other) + + # numpy timedelta64 does not natively support floordiv, so operate + # on the i8 values + result = other.asi8 // self.asi8 + mask = self._isnan | other._isnan + if mask.any(): + result = result.astype(np.int64) + result[mask] = np.nan + return result + + elif is_object_dtype(other.dtype): + result = [other[n] // self[n] for n in range(len(self))] + result = np.array(result) + return result + + else: + dtype = getattr(other, "dtype", type(other).__name__) + raise TypeError(f"Cannot divide {dtype} by {type(self).__name__}") + + def __mod__(self, other): + # Note: This is a naive implementation, can likely be optimized + if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): + return NotImplemented + + other = lib.item_from_zerodim(other) + if isinstance(other, (timedelta, np.timedelta64, Tick)): + other = Timedelta(other) + return self - (self // other) * other + + def __rmod__(self, other): + # Note: This is a naive implementation, can likely be optimized + if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): + return NotImplemented + + other = lib.item_from_zerodim(other) + if isinstance(other, (timedelta, np.timedelta64, Tick)): + other = Timedelta(other) + return other - (other // self) * self + + def __divmod__(self, other): + # Note: This is a naive implementation, can likely be optimized + if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): + return NotImplemented + + other = lib.item_from_zerodim(other) + if isinstance(other, (timedelta, np.timedelta64, Tick)): + other = Timedelta(other) + + res1 = self // other + res2 = self - res1 * other + return res1, res2 + + def __rdivmod__(self, other): + # Note: This is a naive implementation, can likely be optimized + if isinstance(other, (ABCSeries, ABCDataFrame, ABCIndexClass)): + return NotImplemented + + other = lib.item_from_zerodim(other) + if isinstance(other, (timedelta, np.timedelta64, Tick)): + other = Timedelta(other) + + res1 = other // self + res2 = other - res1 * self + return res1, res2 + + def __neg__(self): + if self.freq is not None: + return type(self)(-self._data, freq=-self.freq) + return type(self)(-self._data) + + def __pos__(self): + return type(self)(self._data, freq=self.freq) + + def __abs__(self): + # Note: freq is not preserved + return type(self)(np.abs(self._data)) + + # ---------------------------------------------------------------- + # Conversion Methods - Vectorized analogues of Timedelta methods + + def total_seconds(self): + """ + Return total duration of each element expressed in seconds. + + This method is available directly on TimedeltaArray, TimedeltaIndex + and on Series containing timedelta values under the ``.dt`` namespace. + + Returns + ------- + seconds : [ndarray, Float64Index, Series] + When the calling object is a TimedeltaArray, the return type + is ndarray. When the calling object is a TimedeltaIndex, + the return type is a Float64Index. When the calling object + is a Series, the return type is Series of type `float64` whose + index is the same as the original. + + See Also + -------- + datetime.timedelta.total_seconds : Standard library version + of this method. + TimedeltaIndex.components : Return a DataFrame with components of + each Timedelta. + + Examples + -------- + **Series** + + >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='d')) + >>> s + 0 0 days + 1 1 days + 2 2 days + 3 3 days + 4 4 days + dtype: timedelta64[ns] + + >>> s.dt.total_seconds() + 0 0.0 + 1 86400.0 + 2 172800.0 + 3 259200.0 + 4 345600.0 + dtype: float64 + + **TimedeltaIndex** + + >>> idx = pd.to_timedelta(np.arange(5), unit='d') + >>> idx + TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq=None) + + >>> idx.total_seconds() + Float64Index([0.0, 86400.0, 172800.0, 259200.00000000003, 345600.0], + dtype='float64') + """ + return self._maybe_mask_results(1e-9 * self.asi8, fill_value=None) + + def to_pytimedelta(self): + """ + Return Timedelta Array/Index as object ndarray of datetime.timedelta + objects. + + Returns + ------- + datetimes : ndarray + """ + return tslibs.ints_to_pytimedelta(self.asi8) + + days = _field_accessor("days", "days", "Number of days for each element.") + seconds = _field_accessor( + "seconds", + "seconds", + "Number of seconds (>= 0 and less than 1 day) for each element.", + ) + microseconds = _field_accessor( + "microseconds", + "microseconds", + "Number of microseconds (>= 0 and less than 1 second) for each element.", + ) + nanoseconds = _field_accessor( + "nanoseconds", + "nanoseconds", + "Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.", + ) + + @property + def components(self): + """ + Return a dataframe of the components (days, hours, minutes, + seconds, milliseconds, microseconds, nanoseconds) of the Timedeltas. + + Returns + ------- + a DataFrame + """ + from pandas import DataFrame + + columns = [ + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + "nanoseconds", + ] + hasnans = self._hasnans + if hasnans: + + def f(x): + if isna(x): + return [np.nan] * len(columns) + return x.components + + else: + + def f(x): + return x.components + + result = DataFrame([f(x) for x in self], columns=columns) + if not hasnans: + result = result.astype("int64") + return result + + +# --------------------------------------------------------------------- +# Constructor Helpers + + +def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): + """ + Parameters + ---------- + array : list-like + copy : bool, default False + unit : str, default "ns" + The timedelta unit to treat integers as multiples of. + errors : {"raise", "coerce", "ignore"}, default "raise" + How to handle elements that cannot be converted to timedelta64[ns]. + See ``pandas.to_timedelta`` for details. + + Returns + ------- + converted : numpy.ndarray + The sequence converted to a numpy array with dtype ``timedelta64[ns]``. + inferred_freq : Tick or None + The inferred frequency of the sequence. + + Raises + ------ + ValueError : Data cannot be converted to timedelta64[ns]. + + Notes + ----- + Unlike `pandas.to_timedelta`, if setting ``errors=ignore`` will not cause + errors to be ignored; they are caught and subsequently ignored at a + higher level. + """ + inferred_freq = None + unit = parse_timedelta_unit(unit) + + # Unwrap whatever we have into a np.ndarray + if not hasattr(data, "dtype"): + # e.g. list, tuple + if np.ndim(data) == 0: + # i.e. generator + data = list(data) + data = np.array(data, copy=False) + elif isinstance(data, ABCSeries): + data = data._values + elif isinstance(data, (ABCTimedeltaIndex, TimedeltaArray)): + inferred_freq = data.freq + data = data._data + + # Convert whatever we have into timedelta64[ns] dtype + if is_object_dtype(data.dtype) or is_string_dtype(data.dtype): + # no need to make a copy, need to convert if string-dtyped + data = objects_to_td64ns(data, unit=unit, errors=errors) + copy = False + + elif is_integer_dtype(data.dtype): + # treat as multiples of the given unit + data, copy_made = ints_to_td64ns(data, unit=unit) + copy = copy and not copy_made + + elif is_float_dtype(data.dtype): + # cast the unit, multiply base/frace separately + # to avoid precision issues from float -> int + mask = np.isnan(data) + m, p = precision_from_unit(unit) + base = data.astype(np.int64) + frac = data - base + if p: + frac = np.round(frac, p) + data = (base * m + (frac * m).astype(np.int64)).view("timedelta64[ns]") + data[mask] = iNaT + copy = False + + elif is_timedelta64_dtype(data.dtype): + if data.dtype != _TD_DTYPE: + # non-nano unit + # TODO: watch out for overflows + data = data.astype(_TD_DTYPE) + copy = False + + else: + # This includes datetime64-dtype, see GH#23539, GH#29794 + raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]") + + data = np.array(data, copy=copy) + + assert data.dtype == "m8[ns]", data + return data, inferred_freq + + +def ints_to_td64ns(data, unit="ns"): + """ + Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating + the integers as multiples of the given timedelta unit. + + Parameters + ---------- + data : numpy.ndarray with integer-dtype + unit : str, default "ns" + The timedelta unit to treat integers as multiples of. + + Returns + ------- + numpy.ndarray : timedelta64[ns] array converted from data + bool : whether a copy was made + """ + copy_made = False + unit = unit if unit is not None else "ns" + + if data.dtype != np.int64: + # converting to int64 makes a copy, so we can avoid + # re-copying later + data = data.astype(np.int64) + copy_made = True + + if unit != "ns": + dtype_str = f"timedelta64[{unit}]" + data = data.view(dtype_str) + + # TODO: watch out for overflows when converting from lower-resolution + data = data.astype("timedelta64[ns]") + # the astype conversion makes a copy, so we can avoid re-copying later + copy_made = True + + else: + data = data.view("timedelta64[ns]") + + return data, copy_made + + +def objects_to_td64ns(data, unit="ns", errors="raise"): + """ + Convert a object-dtyped or string-dtyped array into an + timedelta64[ns]-dtyped array. + + Parameters + ---------- + data : ndarray or Index + unit : str, default "ns" + The timedelta unit to treat integers as multiples of. + errors : {"raise", "coerce", "ignore"}, default "raise" + How to handle elements that cannot be converted to timedelta64[ns]. + See ``pandas.to_timedelta`` for details. + + Returns + ------- + numpy.ndarray : timedelta64[ns] array converted from data + + Raises + ------ + ValueError : Data cannot be converted to timedelta64[ns]. + + Notes + ----- + Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause + errors to be ignored; they are caught and subsequently ignored at a + higher level. + """ + # coerce Index to np.ndarray, converting string-dtype if necessary + values = np.array(data, dtype=np.object_, copy=False) + + result = array_to_timedelta64(values, unit=unit, errors=errors) + return result.view("timedelta64[ns]") + + +def _validate_td64_dtype(dtype): + dtype = pandas_dtype(dtype) + if is_dtype_equal(dtype, np.dtype("timedelta64")): + # no precision disallowed GH#24806 + msg = ( + "Passing in 'timedelta' dtype with no precision is not allowed. " + "Please pass in 'timedelta64[ns]' instead." + ) + raise ValueError(msg) + + if not is_dtype_equal(dtype, _TD_DTYPE): + raise ValueError(_BAD_DTYPE.format(dtype=dtype)) + + return dtype + + +def _generate_regular_range(start, end, periods, offset): + stride = offset.nanos + if periods is None: + b = Timedelta(start).value + e = Timedelta(end).value + e += stride - e % stride + elif start is not None: + b = Timedelta(start).value + e = b + periods * stride + elif end is not None: + e = Timedelta(end).value + stride + b = e - periods * stride + else: + raise ValueError( + "at least 'start' or 'end' should be specified if a 'period' is given." + ) + + data = np.arange(b, e, stride, dtype=np.int64) + return data diff --git a/pandas/core/base.py b/pandas/core/base.py new file mode 100644 index 00000000..2709601e --- /dev/null +++ b/pandas/core/base.py @@ -0,0 +1,1498 @@ +""" +Base and utility classes for pandas objects. +""" +import builtins +import textwrap +from typing import Dict, FrozenSet, List, Optional + +import numpy as np + +import pandas._libs.lib as lib +from pandas.compat import PYPY +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import Appender, Substitution, cache_readonly +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.cast import is_nested_object +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_dict_like, + is_extension_array_dtype, + is_list_like, + is_object_dtype, + is_scalar, + needs_i8_conversion, +) +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries +from pandas.core.dtypes.missing import isna + +from pandas.core import algorithms, common as com +from pandas.core.accessor import DirNamesMixin +from pandas.core.algorithms import duplicated, unique1d, value_counts +from pandas.core.arrays import ExtensionArray +from pandas.core.construction import create_series_with_explicit_dtype +import pandas.core.nanops as nanops + +_shared_docs: Dict[str, str] = dict() +_indexops_doc_kwargs = dict( + klass="IndexOpsMixin", + inplace="", + unique="IndexOpsMixin", + duplicated="IndexOpsMixin", +) + + +class PandasObject(DirNamesMixin): + """baseclass for various pandas objects""" + + @property + def _constructor(self): + """class constructor (for this class it's just `__class__`""" + return type(self) + + def __repr__(self) -> str: + """ + Return a string representation for a particular object. + """ + # Should be overwritten by base classes + return object.__repr__(self) + + def _reset_cache(self, key=None): + """ + Reset cached properties. If ``key`` is passed, only clears that key. + """ + if getattr(self, "_cache", None) is None: + return + if key is None: + self._cache.clear() + else: + self._cache.pop(key, None) + + def __sizeof__(self): + """ + Generates the total memory usage for an object that returns + either a value or Series of values + """ + if hasattr(self, "memory_usage"): + mem = self.memory_usage(deep=True) + if not is_scalar(mem): + mem = mem.sum() + return int(mem) + + # no memory_usage attribute, so fall back to + # object's 'sizeof' + return super().__sizeof__() + + +class NoNewAttributesMixin: + """Mixin which prevents adding new attributes. + + Prevents additional attributes via xxx.attribute = "something" after a + call to `self.__freeze()`. Mainly used to prevent the user from using + wrong attributes on an accessor (`Series.cat/.str/.dt`). + + If you really want to add a new attribute at a later time, you need to use + `object.__setattr__(self, key, value)`. + """ + + def _freeze(self): + """Prevents setting additional attributes""" + object.__setattr__(self, "__frozen", True) + + # prevent adding any attribute via s.xxx.new_attribute = ... + def __setattr__(self, key, value): + # _cache is used by a decorator + # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key) + # because + # 1.) getattr is false for attributes that raise errors + # 2.) cls.__dict__ doesn't traverse into base classes + if getattr(self, "__frozen", False) and not ( + key == "_cache" + or key in type(self).__dict__ + or getattr(self, key, None) is not None + ): + raise AttributeError(f"You cannot add any new attribute '{key}'") + object.__setattr__(self, key, value) + + +class GroupByError(Exception): + pass + + +class DataError(GroupByError): + pass + + +class SpecificationError(GroupByError): + pass + + +class SelectionMixin: + """ + mixin implementing the selection & aggregation interface on a group-like + object sub-classes need to define: obj, exclusions + """ + + _selection = None + _internal_names = ["_cache", "__setstate__"] + _internal_names_set = set(_internal_names) + + _builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min} + + _cython_table = { + builtins.sum: "sum", + builtins.max: "max", + builtins.min: "min", + np.all: "all", + np.any: "any", + np.sum: "sum", + np.nansum: "sum", + np.mean: "mean", + np.nanmean: "mean", + np.prod: "prod", + np.nanprod: "prod", + np.std: "std", + np.nanstd: "std", + np.var: "var", + np.nanvar: "var", + np.median: "median", + np.nanmedian: "median", + np.max: "max", + np.nanmax: "max", + np.min: "min", + np.nanmin: "min", + np.cumprod: "cumprod", + np.nancumprod: "cumprod", + np.cumsum: "cumsum", + np.nancumsum: "cumsum", + } + + @property + def _selection_name(self): + """ + return a name for myself; this would ideally be called + the 'name' property, but we cannot conflict with the + Series.name property which can be set + """ + if self._selection is None: + return None # 'result' + else: + return self._selection + + @property + def _selection_list(self): + if not isinstance( + self._selection, (list, tuple, ABCSeries, ABCIndexClass, np.ndarray) + ): + return [self._selection] + return self._selection + + @cache_readonly + def _selected_obj(self): + + if self._selection is None or isinstance(self.obj, ABCSeries): + return self.obj + else: + return self.obj[self._selection] + + @cache_readonly + def ndim(self) -> int: + return self._selected_obj.ndim + + @cache_readonly + def _obj_with_exclusions(self): + if self._selection is not None and isinstance(self.obj, ABCDataFrame): + return self.obj.reindex(columns=self._selection_list) + + if len(self.exclusions) > 0: + return self.obj.drop(self.exclusions, axis=1) + else: + return self.obj + + def __getitem__(self, key): + if self._selection is not None: + raise IndexError(f"Column(s) {self._selection} already selected") + + if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass, np.ndarray)): + if len(self.obj.columns.intersection(key)) != len(key): + bad_keys = list(set(key).difference(self.obj.columns)) + raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}") + return self._gotitem(list(key), ndim=2) + + elif not getattr(self, "as_index", False): + if key not in self.obj.columns: + raise KeyError(f"Column not found: {key}") + return self._gotitem(key, ndim=2) + + else: + if key not in self.obj: + raise KeyError(f"Column not found: {key}") + return self._gotitem(key, ndim=1) + + def _gotitem(self, key, ndim, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + + """ + raise AbstractMethodError(self) + + def aggregate(self, func, *args, **kwargs): + raise AbstractMethodError(self) + + agg = aggregate + + def _try_aggregate_string_function(self, arg: str, *args, **kwargs): + """ + if arg is a string, then try to operate on it: + - try to find a function (or attribute) on ourselves + - try to find a numpy function + - raise + + """ + assert isinstance(arg, str) + + f = getattr(self, arg, None) + if f is not None: + if callable(f): + return f(*args, **kwargs) + + # people may try to aggregate on a non-callable attribute + # but don't let them think they can pass args to it + assert len(args) == 0 + assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0 + return f + + f = getattr(np, arg, None) + if f is not None: + if hasattr(self, "__array__"): + # in particular exclude Window + return f(self, *args, **kwargs) + + raise AttributeError( + f"'{arg}' is not a valid function for '{type(self).__name__}' object" + ) + + def _aggregate(self, arg, *args, **kwargs): + """ + provide an implementation for the aggregators + + Parameters + ---------- + arg : string, dict, function + *args : args to pass on to the function + **kwargs : kwargs to pass on to the function + + Returns + ------- + tuple of result, how + + Notes + ----- + how can be a string describe the required post-processing, or + None if not required + """ + is_aggregator = lambda x: isinstance(x, (list, tuple, dict)) + + _axis = kwargs.pop("_axis", None) + if _axis is None: + _axis = getattr(self, "axis", 0) + + if isinstance(arg, str): + return self._try_aggregate_string_function(arg, *args, **kwargs), None + + if isinstance(arg, dict): + # aggregate based on the passed dict + if _axis != 0: # pragma: no cover + raise ValueError("Can only pass dict with axis=0") + + obj = self._selected_obj + + # if we have a dict of any non-scalars + # eg. {'A' : ['mean']}, normalize all to + # be list-likes + if any(is_aggregator(x) for x in arg.values()): + new_arg = {} + for k, v in arg.items(): + if not isinstance(v, (tuple, list, dict)): + new_arg[k] = [v] + else: + new_arg[k] = v + + # the keys must be in the columns + # for ndim=2, or renamers for ndim=1 + + # ok for now, but deprecated + # {'A': { 'ra': 'mean' }} + # {'A': { 'ra': ['mean'] }} + # {'ra': ['mean']} + + # not ok + # {'ra' : { 'A' : 'mean' }} + if isinstance(v, dict): + raise SpecificationError("nested renamer is not supported") + elif isinstance(obj, ABCSeries): + raise SpecificationError("nested renamer is not supported") + elif isinstance(obj, ABCDataFrame) and k not in obj.columns: + raise KeyError(f"Column '{k}' does not exist!") + + arg = new_arg + + else: + # deprecation of renaming keys + # GH 15931 + keys = list(arg.keys()) + if isinstance(obj, ABCDataFrame) and len( + obj.columns.intersection(keys) + ) != len(keys): + raise SpecificationError("nested renamer is not supported") + + from pandas.core.reshape.concat import concat + + def _agg_1dim(name, how, subset=None): + """ + aggregate a 1-dim with how + """ + colg = self._gotitem(name, ndim=1, subset=subset) + if colg.ndim != 1: + raise SpecificationError( + "nested dictionary is ambiguous in aggregation" + ) + return colg.aggregate(how) + + def _agg_2dim(name, how): + """ + aggregate a 2-dim with how + """ + colg = self._gotitem(self._selection, ndim=2, subset=obj) + return colg.aggregate(how) + + def _agg(arg, func): + """ + run the aggregations over the arg with func + return a dict + """ + result = {} + for fname, agg_how in arg.items(): + result[fname] = func(fname, agg_how) + return result + + # set the final keys + keys = list(arg.keys()) + result = {} + + if self._selection is not None: + + sl = set(self._selection_list) + + # we are a Series like object, + # but may have multiple aggregations + if len(sl) == 1: + + result = _agg( + arg, lambda fname, agg_how: _agg_1dim(self._selection, agg_how) + ) + + # we are selecting the same set as we are aggregating + elif not len(sl - set(keys)): + + result = _agg(arg, _agg_1dim) + + # we are a DataFrame, with possibly multiple aggregations + else: + + result = _agg(arg, _agg_2dim) + + # no selection + else: + + try: + result = _agg(arg, _agg_1dim) + except SpecificationError: + + # we are aggregating expecting all 1d-returns + # but we have 2d + result = _agg(arg, _agg_2dim) + + # combine results + + def is_any_series() -> bool: + # return a boolean if we have *any* nested series + return any(isinstance(r, ABCSeries) for r in result.values()) + + def is_any_frame() -> bool: + # return a boolean if we have *any* nested series + return any(isinstance(r, ABCDataFrame) for r in result.values()) + + if isinstance(result, list): + return concat(result, keys=keys, axis=1, sort=True), True + + elif is_any_frame(): + # we have a dict of DataFrames + # return a MI DataFrame + + return concat([result[k] for k in keys], keys=keys, axis=1), True + + elif isinstance(self, ABCSeries) and is_any_series(): + + # we have a dict of Series + # return a MI Series + try: + result = concat(result) + except TypeError: + # we want to give a nice error here if + # we have non-same sized objects, so + # we don't automatically broadcast + + raise ValueError( + "cannot perform both aggregation " + "and transformation operations " + "simultaneously" + ) + + return result, True + + # fall thru + from pandas import DataFrame, Series + + try: + result = DataFrame(result) + except ValueError: + + # we have a dict of scalars + result = Series(result, name=getattr(self, "name", None)) + + return result, True + elif is_list_like(arg): + # we require a list, but not an 'str' + return self._aggregate_multiple_funcs(arg, _axis=_axis), None + else: + result = None + + f = self._get_cython_func(arg) + if f and not args and not kwargs: + return getattr(self, f)(), None + + # caller can react + return result, True + + def _aggregate_multiple_funcs(self, arg, _axis): + from pandas.core.reshape.concat import concat + + if _axis != 0: + raise NotImplementedError("axis other than 0 is not supported") + + if self._selected_obj.ndim == 1: + obj = self._selected_obj + else: + obj = self._obj_with_exclusions + + results = [] + keys = [] + + # degenerate case + if obj.ndim == 1: + for a in arg: + colg = self._gotitem(obj.name, ndim=1, subset=obj) + try: + new_res = colg.aggregate(a) + + except TypeError: + pass + else: + results.append(new_res) + + # make sure we find a good name + name = com.get_callable_name(a) or a + keys.append(name) + + # multiples + else: + for index, col in enumerate(obj): + colg = self._gotitem(col, ndim=1, subset=obj.iloc[:, index]) + try: + new_res = colg.aggregate(arg) + except (TypeError, DataError): + pass + except ValueError as err: + # cannot aggregate + if "Must produce aggregated value" in str(err): + # raised directly in _aggregate_named + pass + elif "no results" in str(err): + # raised direcly in _aggregate_multiple_funcs + pass + else: + raise + else: + results.append(new_res) + keys.append(col) + + # if we are empty + if not len(results): + raise ValueError("no results") + + try: + return concat(results, keys=keys, axis=1, sort=False) + except TypeError: + + # we are concatting non-NDFrame objects, + # e.g. a list of scalars + + from pandas import Series + + result = Series(results, index=keys, name=self.name) + if is_nested_object(result): + raise ValueError("cannot combine transform and aggregation operations") + return result + + def _get_cython_func(self, arg: str) -> Optional[str]: + """ + if we define an internal function for this argument, return it + """ + return self._cython_table.get(arg) + + def _is_builtin_func(self, arg): + """ + if we define an builtin function for this argument, return it, + otherwise return the arg + """ + return self._builtin_table.get(arg, arg) + + +class ShallowMixin: + _attributes: List[str] = [] + + def _shallow_copy(self, obj=None, **kwargs): + """ + return a new object with the replacement attributes + """ + if obj is None: + obj = self._selected_obj.copy() + + if isinstance(obj, self._constructor): + obj = obj.obj + for attr in self._attributes: + if attr not in kwargs: + kwargs[attr] = getattr(self, attr) + return self._constructor(obj, **kwargs) + + +class IndexOpsMixin: + """ + Common ops mixin to support a unified interface / docs for Series / Index + """ + + # ndarray compatibility + __array_priority__ = 1000 + _deprecations: FrozenSet[str] = frozenset( + ["tolist"] # tolist is not deprecated, just suppressed in the __dir__ + ) + + def transpose(self, *args, **kwargs): + """ + Return the transpose, which is by definition self. + + Returns + ------- + %(klass)s + """ + nv.validate_transpose(args, kwargs) + return self + + T = property( + transpose, + doc=""" + Return the transpose, which is by definition self. + """, + ) + + @property + def shape(self): + """ + Return a tuple of the shape of the underlying data. + """ + return self._values.shape + + @property + def ndim(self) -> int: + """ + Number of dimensions of the underlying data, by definition 1. + """ + return 1 + + def item(self): + """ + Return the first element of the underlying data as a python scalar. + + Returns + ------- + scalar + The first element of %(klass)s. + + Raises + ------ + ValueError + If the data is not length-1. + """ + if not ( + is_extension_array_dtype(self.dtype) or needs_i8_conversion(self.dtype) + ): + # numpy returns ints instead of datetime64/timedelta64 objects, + # which we need to wrap in Timestamp/Timedelta/Period regardless. + return self.values.item() + + if len(self) == 1: + return next(iter(self)) + else: + raise ValueError("can only convert an array of size 1 to a Python scalar") + + @property + def nbytes(self): + """ + Return the number of bytes in the underlying data. + """ + return self._values.nbytes + + @property + def size(self): + """ + Return the number of elements in the underlying data. + """ + return len(self._values) + + @property + def array(self) -> ExtensionArray: + """ + The ExtensionArray of the data backing this Series or Index. + + .. versionadded:: 0.24.0 + + Returns + ------- + ExtensionArray + An ExtensionArray of the values stored within. For extension + types, this is the actual array. For NumPy native types, this + is a thin (no copy) wrapper around :class:`numpy.ndarray`. + + ``.array`` differs ``.values`` which may require converting the + data to a different form. + + See Also + -------- + Index.to_numpy : Similar method that always returns a NumPy array. + Series.to_numpy : Similar method that always returns a NumPy array. + + Notes + ----- + This table lays out the different array types for each extension + dtype within pandas. + + ================== ============================= + dtype array type + ================== ============================= + category Categorical + period PeriodArray + interval IntervalArray + IntegerNA IntegerArray + string StringArray + boolean BooleanArray + datetime64[ns, tz] DatetimeArray + ================== ============================= + + For any 3rd-party extension types, the array type will be an + ExtensionArray. + + For all remaining dtypes ``.array`` will be a + :class:`arrays.NumpyExtensionArray` wrapping the actual ndarray + stored within. If you absolutely need a NumPy array (possibly with + copying / coercing data), then use :meth:`Series.to_numpy` instead. + + Examples + -------- + + For regular NumPy types like int, and float, a PandasArray + is returned. + + >>> pd.Series([1, 2, 3]).array + + [1, 2, 3] + Length: 3, dtype: int64 + + For extension types, like Categorical, the actual ExtensionArray + is returned + + >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) + >>> ser.array + [a, b, a] + Categories (2, object): [a, b] + """ + raise AbstractMethodError(self) + + def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs): + """ + A NumPy ndarray representing the values in this Series or Index. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to pass to :meth:`numpy.asarray`. + copy : bool, default False + Whether to ensure that the returned value is a not a view on + another array. Note that ``copy=False`` does not *ensure* that + ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that + a copy is made, even if not strictly necessary. + na_value : Any, optional + The value to use for missing values. The default value depends + on `dtype` and the type of the array. + + .. versionadded:: 1.0.0 + + **kwargs + Additional keywords passed through to the ``to_numpy`` method + of the underlying array (for extension arrays). + + .. versionadded:: 1.0.0 + + Returns + ------- + numpy.ndarray + + See Also + -------- + Series.array : Get the actual data stored within. + Index.array : Get the actual data stored within. + DataFrame.to_numpy : Similar method for DataFrame. + + Notes + ----- + The returned array will be the same up to equality (values equal + in `self` will be equal in the returned array; likewise for values + that are not equal). When `self` contains an ExtensionArray, the + dtype may be different. For example, for a category-dtype Series, + ``to_numpy()`` will return a NumPy array and the categorical dtype + will be lost. + + For NumPy dtypes, this will be a reference to the actual data stored + in this Series or Index (assuming ``copy=False``). Modifying the result + in place will modify the data stored in the Series or Index (not that + we recommend doing that). + + For extension types, ``to_numpy()`` *may* require copying data and + coercing the result to a NumPy type (possibly object), which may be + expensive. When you need a no-copy reference to the underlying data, + :attr:`Series.array` should be used instead. + + This table lays out the different dtypes and default return types of + ``to_numpy()`` for various dtypes within pandas. + + ================== ================================ + dtype array type + ================== ================================ + category[T] ndarray[T] (same dtype as input) + period ndarray[object] (Periods) + interval ndarray[object] (Intervals) + IntegerNA ndarray[object] + datetime64[ns] datetime64[ns] + datetime64[ns, tz] ndarray[object] (Timestamps) + ================== ================================ + + Examples + -------- + >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) + >>> ser.to_numpy() + array(['a', 'b', 'a'], dtype=object) + + Specify the `dtype` to control how datetime-aware data is represented. + Use ``dtype=object`` to return an ndarray of pandas :class:`Timestamp` + objects, each with the correct ``tz``. + + >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + >>> ser.to_numpy(dtype=object) + array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'), + Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')], + dtype=object) + + Or ``dtype='datetime64[ns]'`` to return an ndarray of native + datetime64 values. The values are converted to UTC and the timezone + info is dropped. + + >>> ser.to_numpy(dtype="datetime64[ns]") + ... # doctest: +ELLIPSIS + array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'], + dtype='datetime64[ns]') + """ + if is_extension_array_dtype(self.dtype): + return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs) + else: + if kwargs: + msg = "to_numpy() got an unexpected keyword argument '{}'".format( + list(kwargs.keys())[0] + ) + raise TypeError(msg) + + result = np.asarray(self._values, dtype=dtype) + # TODO(GH-24345): Avoid potential double copy + if copy or na_value is not lib.no_default: + result = result.copy() + if na_value is not lib.no_default: + result[self.isna()] = na_value + return result + + @property + def _ndarray_values(self) -> np.ndarray: + """ + The data as an ndarray, possibly losing information. + + The expectation is that this is cheap to compute, and is primarily + used for interacting with our indexers. + + - categorical -> codes + """ + if is_extension_array_dtype(self): + return self.array._ndarray_values + # As a mixin, we depend on the mixing class having values. + # Special mixin syntax may be developed in the future: + # https://github.com/python/typing/issues/246 + return self.values # type: ignore + + @property + def empty(self): + return not self.size + + def max(self, axis=None, skipna=True, *args, **kwargs): + """ + Return the maximum value of the Index. + + Parameters + ---------- + axis : int, optional + For compatibility with NumPy. Only 0 or None are allowed. + skipna : bool, default True + + Returns + ------- + scalar + Maximum value. + + See Also + -------- + Index.min : Return the minimum value in an Index. + Series.max : Return the maximum value in a Series. + DataFrame.max : Return the maximum values in a DataFrame. + + Examples + -------- + >>> idx = pd.Index([3, 2, 1]) + >>> idx.max() + 3 + + >>> idx = pd.Index(['c', 'b', 'a']) + >>> idx.max() + 'c' + + For a MultiIndex, the maximum is determined lexicographically. + + >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) + >>> idx.max() + ('b', 2) + """ + nv.validate_minmax_axis(axis) + nv.validate_max(args, kwargs) + return nanops.nanmax(self._values, skipna=skipna) + + def argmax(self, axis=None, skipna=True, *args, **kwargs): + """ + Return an ndarray of the maximum argument indexer. + + Parameters + ---------- + axis : {None} + Dummy argument for consistency with Series. + skipna : bool, default True + + Returns + ------- + numpy.ndarray + Indices of the maximum values. + + See Also + -------- + numpy.ndarray.argmax + """ + nv.validate_minmax_axis(axis) + nv.validate_argmax_with_skipna(skipna, args, kwargs) + return nanops.nanargmax(self._values, skipna=skipna) + + def min(self, axis=None, skipna=True, *args, **kwargs): + """ + Return the minimum value of the Index. + + Parameters + ---------- + axis : {None} + Dummy argument for consistency with Series. + skipna : bool, default True + + Returns + ------- + scalar + Minimum value. + + See Also + -------- + Index.max : Return the maximum value of the object. + Series.min : Return the minimum value in a Series. + DataFrame.min : Return the minimum values in a DataFrame. + + Examples + -------- + >>> idx = pd.Index([3, 2, 1]) + >>> idx.min() + 1 + + >>> idx = pd.Index(['c', 'b', 'a']) + >>> idx.min() + 'a' + + For a MultiIndex, the minimum is determined lexicographically. + + >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) + >>> idx.min() + ('a', 1) + """ + nv.validate_minmax_axis(axis) + nv.validate_min(args, kwargs) + return nanops.nanmin(self._values, skipna=skipna) + + def argmin(self, axis=None, skipna=True, *args, **kwargs): + """ + Return a ndarray of the minimum argument indexer. + + Parameters + ---------- + axis : {None} + Dummy argument for consistency with Series. + skipna : bool, default True + + Returns + ------- + numpy.ndarray + + See Also + -------- + numpy.ndarray.argmin + """ + nv.validate_minmax_axis(axis) + nv.validate_argmax_with_skipna(skipna, args, kwargs) + return nanops.nanargmin(self._values, skipna=skipna) + + def tolist(self): + """ + Return a list of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) + + Returns + ------- + list + + See Also + -------- + numpy.ndarray.tolist + """ + if self.dtype.kind in ["m", "M"]: + return [com.maybe_box_datetimelike(x) for x in self._values] + elif is_extension_array_dtype(self._values): + return list(self._values) + else: + return self._values.tolist() + + to_list = tolist + + def __iter__(self): + """ + Return an iterator of the values. + + These are each a scalar type, which is a Python scalar + (for str, int, float) or a pandas scalar + (for Timestamp/Timedelta/Interval/Period) + + Returns + ------- + iterator + """ + # We are explicitly making element iterators. + if self.dtype.kind in ["m", "M"]: + return map(com.maybe_box_datetimelike, self._values) + elif is_extension_array_dtype(self._values): + return iter(self._values) + else: + return map(self._values.item, range(self._values.size)) + + @cache_readonly + def hasnans(self): + """ + Return if I have any nans; enables various perf speedups. + """ + return bool(isna(self).any()) + + def _reduce( + self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds + ): + """ perform the reduction type operation if we can """ + func = getattr(self, name, None) + if func is None: + raise TypeError( + f"{type(self).__name__} cannot perform the operation {name}" + ) + return func(skipna=skipna, **kwds) + + def _map_values(self, mapper, na_action=None): + """ + An internal function that maps values using the input + correspondence (which can be a dict, Series, or function). + + Parameters + ---------- + mapper : function, dict, or Series + The input correspondence object + na_action : {None, 'ignore'} + If 'ignore', propagate NA values, without passing them to the + mapping function + + Returns + ------- + Union[Index, MultiIndex], inferred + The output of the mapping function applied to the index. + If the function returns a tuple with more than one element + a MultiIndex will be returned. + + """ + + # we can fastpath dict/Series to an efficient map + # as we know that we are not going to have to yield + # python types + if is_dict_like(mapper): + if isinstance(mapper, dict) and hasattr(mapper, "__missing__"): + # If a dictionary subclass defines a default value method, + # convert mapper to a lookup function (GH #15999). + dict_with_default = mapper + mapper = lambda x: dict_with_default[x] + else: + # Dictionary does not have a default. Thus it's safe to + # convert to an Series for efficiency. + # we specify the keys here to handle the + # possibility that they are tuples + + # The return value of mapping with an empty mapper is + # expected to be pd.Series(np.nan, ...). As np.nan is + # of dtype float64 the return value of this method should + # be float64 as well + mapper = create_series_with_explicit_dtype( + mapper, dtype_if_empty=np.float64 + ) + + if isinstance(mapper, ABCSeries): + # Since values were input this means we came from either + # a dict or a series and mapper should be an index + if is_categorical_dtype(self._values): + # use the built in categorical series mapper which saves + # time by mapping the categories instead of all values + return self._values.map(mapper) + if is_extension_array_dtype(self.dtype): + values = self._values + else: + values = self.values + + indexer = mapper.index.get_indexer(values) + new_values = algorithms.take_1d(mapper._values, indexer) + + return new_values + + # we must convert to python types + if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"): + # GH#23179 some EAs do not have `map` + values = self._values + if na_action is not None: + raise NotImplementedError + map_f = lambda values, f: values.map(f) + else: + values = self.astype(object) + values = getattr(values, "values", values) + if na_action == "ignore": + + def map_f(values, f): + return lib.map_infer_mask(values, f, isna(values).view(np.uint8)) + + else: + map_f = lib.map_infer + + # mapper is a function + new_values = map_f(values, mapper) + + return new_values + + def value_counts( + self, normalize=False, sort=True, ascending=False, bins=None, dropna=True + ): + """ + Return a Series containing counts of unique values. + + The resulting object will be in descending order so that the + first element is the most frequently-occurring element. + Excludes NA values by default. + + Parameters + ---------- + normalize : bool, default False + If True then the object returned will contain the relative + frequencies of the unique values. + sort : bool, default True + Sort by frequencies. + ascending : bool, default False + Sort in ascending order. + bins : int, optional + Rather than count values, group them into half-open bins, + a convenience for ``pd.cut``, only works with numeric data. + dropna : bool, default True + Don't include counts of NaN. + + Returns + ------- + Series + + See Also + -------- + Series.count: Number of non-NA elements in a Series. + DataFrame.count: Number of non-NA elements in a DataFrame. + + Examples + -------- + >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) + >>> index.value_counts() + 3.0 2 + 4.0 1 + 2.0 1 + 1.0 1 + dtype: int64 + + With `normalize` set to `True`, returns the relative frequency by + dividing all values by the sum of values. + + >>> s = pd.Series([3, 1, 2, 3, 4, np.nan]) + >>> s.value_counts(normalize=True) + 3.0 0.4 + 4.0 0.2 + 2.0 0.2 + 1.0 0.2 + dtype: float64 + + **bins** + + Bins can be useful for going from a continuous variable to a + categorical variable; instead of counting unique + apparitions of values, divide the index in the specified + number of half-open bins. + + >>> s.value_counts(bins=3) + (2.0, 3.0] 2 + (0.996, 2.0] 2 + (3.0, 4.0] 1 + dtype: int64 + + **dropna** + + With `dropna` set to `False` we can also see NaN index values. + + >>> s.value_counts(dropna=False) + 3.0 2 + NaN 1 + 4.0 1 + 2.0 1 + 1.0 1 + dtype: int64 + """ + result = value_counts( + self, + sort=sort, + ascending=ascending, + normalize=normalize, + bins=bins, + dropna=dropna, + ) + return result + + def unique(self): + values = self._values + + if hasattr(values, "unique"): + + result = values.unique() + else: + result = unique1d(values) + + return result + + def nunique(self, dropna=True): + """ + Return number of unique elements in the object. + + Excludes NA values by default. + + Parameters + ---------- + dropna : bool, default True + Don't include NaN in the count. + + Returns + ------- + int + + See Also + -------- + DataFrame.nunique: Method nunique for DataFrame. + Series.count: Count non-NA/null observations in the Series. + + Examples + -------- + >>> s = pd.Series([1, 3, 5, 7, 7]) + >>> s + 0 1 + 1 3 + 2 5 + 3 7 + 4 7 + dtype: int64 + + >>> s.nunique() + 4 + """ + uniqs = self.unique() + n = len(uniqs) + if dropna and isna(uniqs).any(): + n -= 1 + return n + + @property + def is_unique(self): + """ + Return boolean if values in the object are unique. + + Returns + ------- + bool + """ + return self.nunique(dropna=False) == len(self) + + @property + def is_monotonic(self): + """ + Return boolean if values in the object are + monotonic_increasing. + + Returns + ------- + bool + """ + from pandas import Index + + return Index(self).is_monotonic + + is_monotonic_increasing = is_monotonic + + @property + def is_monotonic_decreasing(self) -> bool: + """ + Return boolean if values in the object are + monotonic_decreasing. + + Returns + ------- + bool + """ + from pandas import Index + + return Index(self).is_monotonic_decreasing + + def memory_usage(self, deep=False): + """ + Memory usage of the values. + + Parameters + ---------- + deep : bool + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption. + + Returns + ------- + bytes used + + See Also + -------- + numpy.ndarray.nbytes + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False or if used on PyPy + """ + if hasattr(self.array, "memory_usage"): + return self.array.memory_usage(deep=deep) + + v = self.array.nbytes + if deep and is_object_dtype(self) and not PYPY: + v += lib.memory_usage_of_objects(self._values) + return v + + @Substitution( + values="", + order="", + size_hint="", + sort=textwrap.dedent( + """\ + sort : bool, default False + Sort `uniques` and shuffle `codes` to maintain the + relationship. + """ + ), + ) + @Appender(algorithms._shared_docs["factorize"]) + def factorize(self, sort=False, na_sentinel=-1): + return algorithms.factorize(self, sort=sort, na_sentinel=na_sentinel) + + _shared_docs[ + "searchsorted" + ] = """ + Find indices where elements should be inserted to maintain order. + + Find the indices into a sorted %(klass)s `self` such that, if the + corresponding elements in `value` were inserted before the indices, + the order of `self` would be preserved. + + .. note:: + + The %(klass)s *must* be monotonically sorted, otherwise + wrong locations will likely be returned. Pandas does *not* + check this for you. + + Parameters + ---------- + value : array_like + Values to insert into `self`. + side : {'left', 'right'}, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable + index, return either 0 or N (where N is the length of `self`). + sorter : 1-D array_like, optional + Optional array of integer indices that sort `self` into ascending + order. They are typically the result of ``np.argsort``. + + Returns + ------- + int or array of int + A scalar or array of insertion points with the + same shape as `value`. + + .. versionchanged:: 0.24.0 + If `value` is a scalar, an int is now always returned. + Previously, scalar inputs returned an 1-item array for + :class:`Series` and :class:`Categorical`. + + See Also + -------- + sort_values + numpy.searchsorted + + Notes + ----- + Binary search is used to find the required insertion points. + + Examples + -------- + + >>> x = pd.Series([1, 2, 3]) + >>> x + 0 1 + 1 2 + 2 3 + dtype: int64 + + >>> x.searchsorted(4) + 3 + + >>> x.searchsorted([0, 4]) + array([0, 3]) + + >>> x.searchsorted([1, 3], side='left') + array([0, 2]) + + >>> x.searchsorted([1, 3], side='right') + array([1, 3]) + + >>> x = pd.Categorical(['apple', 'bread', 'bread', + 'cheese', 'milk'], ordered=True) + [apple, bread, bread, cheese, milk] + Categories (4, object): [apple < bread < cheese < milk] + + >>> x.searchsorted('bread') + 1 + + >>> x.searchsorted(['bread'], side='right') + array([3]) + + If the values are not monotonically sorted, wrong locations + may be returned: + + >>> x = pd.Series([2, 1, 3]) + >>> x.searchsorted(1) + 0 # wrong result, correct would be 1 + """ + + @Substitution(klass="Index") + @Appender(_shared_docs["searchsorted"]) + def searchsorted(self, value, side="left", sorter=None): + return algorithms.searchsorted(self._values, value, side=side, sorter=sorter) + + def drop_duplicates(self, keep="first", inplace=False): + inplace = validate_bool_kwarg(inplace, "inplace") + if isinstance(self, ABCIndexClass): + if self.is_unique: + return self._shallow_copy() + + duplicated = self.duplicated(keep=keep) + result = self[np.logical_not(duplicated)] + if inplace: + return self._update_inplace(result) + else: + return result + + def duplicated(self, keep="first"): + if isinstance(self, ABCIndexClass): + if self.is_unique: + return np.zeros(len(self), dtype=np.bool) + return duplicated(self, keep=keep) + else: + return self._constructor( + duplicated(self, keep=keep), index=self.index + ).__finalize__(self) + + # ---------------------------------------------------------------------- + # abstracts + + def _update_inplace(self, result, verify_is_copy=True, **kwargs): + raise AbstractMethodError(self) diff --git a/pandas/core/common.py b/pandas/core/common.py new file mode 100644 index 00000000..673e223b --- /dev/null +++ b/pandas/core/common.py @@ -0,0 +1,480 @@ +""" +Misc tools for implementing data structures + +Note: pandas.core.common is *not* part of the public API. +""" + +import collections +from collections import abc +from datetime import datetime, timedelta +from functools import partial +import inspect +from typing import Any, Collection, Iterable, Union + +import numpy as np + +from pandas._libs import lib, tslibs +from pandas._typing import T + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_extension_array_dtype, + is_integer, +) +from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries +from pandas.core.dtypes.inference import _iterable_not_string +from pandas.core.dtypes.missing import isna, isnull, notnull # noqa + + +class SettingWithCopyError(ValueError): + pass + + +class SettingWithCopyWarning(Warning): + pass + + +def flatten(l): + """ + Flatten an arbitrarily nested sequence. + + Parameters + ---------- + l : sequence + The non string sequence to flatten + + Notes + ----- + This doesn't consider strings sequences. + + Returns + ------- + flattened : generator + """ + for el in l: + if _iterable_not_string(el): + for s in flatten(el): + yield s + else: + yield el + + +def consensus_name_attr(objs): + name = objs[0].name + for obj in objs[1:]: + try: + if obj.name != name: + name = None + except ValueError: + name = None + return name + + +def maybe_box(indexer, values, obj, key): + + # if we have multiples coming back, box em + if isinstance(values, np.ndarray): + return obj[indexer.get_loc(key)] + + # return the value + return values + + +def maybe_box_datetimelike(value): + # turn a datetime like into a Timestamp/timedelta as needed + + if isinstance(value, (np.datetime64, datetime)): + value = tslibs.Timestamp(value) + elif isinstance(value, (np.timedelta64, timedelta)): + value = tslibs.Timedelta(value) + + return value + + +values_from_object = lib.values_from_object + + +def is_bool_indexer(key: Any) -> bool: + """ + Check whether `key` is a valid boolean indexer. + + Parameters + ---------- + key : Any + Only list-likes may be considered boolean indexers. + All other types are not considered a boolean indexer. + For array-like input, boolean ndarrays or ExtensionArrays + with ``_is_boolean`` set are considered boolean indexers. + + Returns + ------- + bool + Whether `key` is a valid boolean indexer. + + Raises + ------ + ValueError + When the array is an object-dtype ndarray or ExtensionArray + and contains missing values. + + See Also + -------- + check_array_indexer : Check that `key` is a valid array to index, + and convert to an ndarray. + """ + if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( + is_array_like(key) and is_extension_array_dtype(key.dtype) + ): + if key.dtype == np.object_: + key = np.asarray(values_from_object(key)) + + if not lib.is_bool_array(key): + na_msg = "Cannot mask with non-boolean array containing NA / NaN values" + if isna(key).any(): + raise ValueError(na_msg) + return False + return True + elif is_bool_dtype(key.dtype): + return True + elif isinstance(key, list): + try: + arr = np.asarray(key) + return arr.dtype == np.bool_ and len(arr) == len(key) + except TypeError: # pragma: no cover + return False + + return False + + +def cast_scalar_indexer(val): + """ + To avoid numpy DeprecationWarnings, cast float to integer where valid. + + Parameters + ---------- + val : scalar + + Returns + ------- + outval : scalar + """ + # assumes lib.is_scalar(val) + if lib.is_float(val) and val == int(val): + return int(val) + return val + + +def not_none(*args): + """ + Returns a generator consisting of the arguments that are not None. + """ + return (arg for arg in args if arg is not None) + + +def any_none(*args): + """ + Returns a boolean indicating if any argument is None. + """ + return any(arg is None for arg in args) + + +def all_none(*args): + """ + Returns a boolean indicating if all arguments are None. + """ + return all(arg is None for arg in args) + + +def any_not_none(*args): + """ + Returns a boolean indicating if any argument is not None. + """ + return any(arg is not None for arg in args) + + +def all_not_none(*args): + """ + Returns a boolean indicating if all arguments are not None. + """ + return all(arg is not None for arg in args) + + +def count_not_none(*args): + """ + Returns the count of arguments that are not None. + """ + return sum(x is not None for x in args) + + +def try_sort(iterable): + listed = list(iterable) + try: + return sorted(listed) + except TypeError: + return listed + + +def asarray_tuplesafe(values, dtype=None): + + if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")): + values = list(values) + elif isinstance(values, ABCIndexClass): + return values.values + + if isinstance(values, list) and dtype in [np.object_, object]: + return construct_1d_object_array_from_listlike(values) + + result = np.asarray(values, dtype=dtype) + + if issubclass(result.dtype.type, str): + result = np.asarray(values, dtype=object) + + if result.ndim == 2: + # Avoid building an array of arrays: + values = [tuple(x) for x in values] + result = construct_1d_object_array_from_listlike(values) + + return result + + +def index_labels_to_array(labels, dtype=None): + """ + Transform label or iterable of labels to array, for use in Index. + + Parameters + ---------- + dtype : dtype + If specified, use as dtype of the resulting array, otherwise infer. + + Returns + ------- + array + """ + if isinstance(labels, (str, tuple)): + labels = [labels] + + if not isinstance(labels, (list, np.ndarray)): + try: + labels = list(labels) + except TypeError: # non-iterable + labels = [labels] + + labels = asarray_tuplesafe(labels, dtype=dtype) + + return labels + + +def maybe_make_list(obj): + if obj is not None and not isinstance(obj, (tuple, list)): + return [obj] + return obj + + +def maybe_iterable_to_list(obj: Union[Iterable[T], T]) -> Union[Collection[T], T]: + """ + If obj is Iterable but not list-like, consume into list. + """ + if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized): + return list(obj) + return obj + + +def is_null_slice(obj): + """ + We have a null slice. + """ + return ( + isinstance(obj, slice) + and obj.start is None + and obj.stop is None + and obj.step is None + ) + + +def is_true_slices(l): + """ + Find non-trivial slices in "l": return a list of booleans with same length. + """ + return [isinstance(k, slice) and not is_null_slice(k) for k in l] + + +# TODO: used only once in indexing; belongs elsewhere? +def is_full_slice(obj, l): + """ + We have a full length slice. + """ + return ( + isinstance(obj, slice) and obj.start == 0 and obj.stop == l and obj.step is None + ) + + +def get_callable_name(obj): + # typical case has name + if hasattr(obj, "__name__"): + return getattr(obj, "__name__") + # some objects don't; could recurse + if isinstance(obj, partial): + return get_callable_name(obj.func) + # fall back to class name + if hasattr(obj, "__call__"): + return type(obj).__name__ + # everything failed (probably because the argument + # wasn't actually callable); we return None + # instead of the empty string in this case to allow + # distinguishing between no name and a name of '' + return None + + +def apply_if_callable(maybe_callable, obj, **kwargs): + """ + Evaluate possibly callable input using obj and kwargs if it is callable, + otherwise return as it is. + + Parameters + ---------- + maybe_callable : possibly a callable + obj : NDFrame + **kwargs + """ + + if callable(maybe_callable): + return maybe_callable(obj, **kwargs) + + return maybe_callable + + +def dict_compat(d): + """ + Helper function to convert datetimelike-keyed dicts + to Timestamp-keyed dict. + + Parameters + ---------- + d: dict like object + + Returns + ------- + dict + + """ + return {maybe_box_datetimelike(key): value for key, value in d.items()} + + +def standardize_mapping(into): + """ + Helper function to standardize a supplied mapping. + + .. versionadded:: 0.21.0 + + Parameters + ---------- + into : instance or subclass of collections.abc.Mapping + Must be a class, an initialized collections.defaultdict, + or an instance of a collections.abc.Mapping subclass. + + Returns + ------- + mapping : a collections.abc.Mapping subclass or other constructor + a callable object that can accept an iterator to create + the desired Mapping. + + See Also + -------- + DataFrame.to_dict + Series.to_dict + """ + if not inspect.isclass(into): + if isinstance(into, collections.defaultdict): + return partial(collections.defaultdict, into.default_factory) + into = type(into) + if not issubclass(into, abc.Mapping): + raise TypeError(f"unsupported type: {into}") + elif into == collections.defaultdict: + raise TypeError("to_dict() only accepts initialized defaultdicts") + return into + + +def random_state(state=None): + """ + Helper function for processing random_state arguments. + + Parameters + ---------- + state : int, np.random.RandomState, None. + If receives an int, passes to np.random.RandomState() as seed. + If receives an np.random.RandomState object, just returns object. + If receives `None`, returns np.random. + If receives anything else, raises an informative ValueError. + Default None. + + Returns + ------- + np.random.RandomState + """ + + if is_integer(state): + return np.random.RandomState(state) + elif isinstance(state, np.random.RandomState): + return state + elif state is None: + return np.random + else: + raise ValueError( + "random_state must be an integer, a numpy RandomState, or None" + ) + + +def pipe(obj, func, *args, **kwargs): + """ + Apply a function ``func`` to object ``obj`` either by passing obj as the + first argument to the function or, in the case that the func is a tuple, + interpret the first element of the tuple as a function and pass the obj to + that function as a keyword argument whose key is the value of the second + element of the tuple. + + Parameters + ---------- + func : callable or tuple of (callable, str) + Function to apply to this object or, alternatively, a + ``(callable, data_keyword)`` tuple where ``data_keyword`` is a + string indicating the keyword of `callable`` that expects the + object. + *args : iterable, optional + Positional arguments passed into ``func``. + **kwargs : dict, optional + A dictionary of keyword arguments passed into ``func``. + + Returns + ------- + object : the return type of ``func``. + """ + if isinstance(func, tuple): + func, target = func + if target in kwargs: + msg = f"{target} is both the pipe target and a keyword argument" + raise ValueError(msg) + kwargs[target] = obj + return func(*args, **kwargs) + else: + return func(obj, *args, **kwargs) + + +def get_rename_function(mapper): + """ + Returns a function that will map names/labels, dependent if mapper + is a dict, Series or just a function. + """ + if isinstance(mapper, (abc.Mapping, ABCSeries)): + + def f(x): + if x in mapper: + return mapper[x] + else: + return x + + else: + f = mapper + + return f diff --git a/pandas/core/computation/__init__.py b/pandas/core/computation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/computation/align.py b/pandas/core/computation/align.py new file mode 100644 index 00000000..a1b1cffd --- /dev/null +++ b/pandas/core/computation/align.py @@ -0,0 +1,192 @@ +"""Core eval alignment algorithms +""" + +from functools import partial, wraps +from typing import Dict, Optional, Sequence, Tuple, Type, Union +import warnings + +import numpy as np + +from pandas._typing import FrameOrSeries +from pandas.errors import PerformanceWarning + +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries + +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.computation.common import result_type_many + + +def _align_core_single_unary_op( + term, +) -> Tuple[Union[partial, Type[FrameOrSeries]], Optional[Dict[str, int]]]: + + typ: Union[partial, Type[FrameOrSeries]] + axes: Optional[Dict[str, int]] = None + + if isinstance(term.value, np.ndarray): + typ = partial(np.asanyarray, dtype=term.value.dtype) + else: + typ = type(term.value) + if hasattr(term.value, "axes"): + axes = _zip_axes_from_type(typ, term.value.axes) + + return typ, axes + + +def _zip_axes_from_type( + typ: Type[FrameOrSeries], new_axes: Sequence[int] +) -> Dict[str, int]: + axes = {name: new_axes[i] for i, name in typ._AXIS_NAMES.items()} + return axes + + +def _any_pandas_objects(terms) -> bool: + """ + Check a sequence of terms for instances of PandasObject. + """ + return any(isinstance(term.value, PandasObject) for term in terms) + + +def _filter_special_cases(f): + @wraps(f) + def wrapper(terms): + # single unary operand + if len(terms) == 1: + return _align_core_single_unary_op(terms[0]) + + term_values = (term.value for term in terms) + + # we don't have any pandas objects + if not _any_pandas_objects(terms): + return result_type_many(*term_values), None + + return f(terms) + + return wrapper + + +@_filter_special_cases +def _align_core(terms): + term_index = [i for i, term in enumerate(terms) if hasattr(term.value, "axes")] + term_dims = [terms[i].value.ndim for i in term_index] + + from pandas import Series + + ndims = Series(dict(zip(term_index, term_dims))) + + # initial axes are the axes of the largest-axis'd term + biggest = terms[ndims.idxmax()].value + typ = biggest._constructor + axes = biggest.axes + naxes = len(axes) + gt_than_one_axis = naxes > 1 + + for value in (terms[i].value for i in term_index): + is_series = isinstance(value, ABCSeries) + is_series_and_gt_one_axis = is_series and gt_than_one_axis + + for axis, items in enumerate(value.axes): + if is_series_and_gt_one_axis: + ax, itm = naxes - 1, value.index + else: + ax, itm = axis, items + + if not axes[ax].is_(itm): + axes[ax] = axes[ax].join(itm, how="outer") + + for i, ndim in ndims.items(): + for axis, items in zip(range(ndim), axes): + ti = terms[i].value + + if hasattr(ti, "reindex"): + transpose = isinstance(ti, ABCSeries) and naxes > 1 + reindexer = axes[naxes - 1] if transpose else items + + term_axis_size = len(ti.axes[axis]) + reindexer_size = len(reindexer) + + ordm = np.log10(max(1, abs(reindexer_size - term_axis_size))) + if ordm >= 1 and reindexer_size >= 10000: + w = ( + f"Alignment difference on axis {axis} is larger " + f"than an order of magnitude on term {repr(terms[i].name)}, " + f"by more than {ordm:.4g}; performance may suffer" + ) + warnings.warn(w, category=PerformanceWarning, stacklevel=6) + + f = partial(ti.reindex, reindexer, axis=axis, copy=False) + + terms[i].update(f()) + + terms[i].update(terms[i].value.values) + + return typ, _zip_axes_from_type(typ, axes) + + +def align_terms(terms): + """ + Align a set of terms. + """ + try: + # flatten the parse tree (a nested list, really) + terms = list(com.flatten(terms)) + except TypeError: + # can't iterate so it must just be a constant or single variable + if isinstance(terms.value, (ABCSeries, ABCDataFrame)): + typ = type(terms.value) + return typ, _zip_axes_from_type(typ, terms.value.axes) + return np.result_type(terms.type), None + + # if all resolved variables are numeric scalars + if all(term.is_scalar for term in terms): + return result_type_many(*(term.value for term in terms)).type, None + + # perform the main alignment + typ, axes = _align_core(terms) + return typ, axes + + +def reconstruct_object(typ, obj, axes, dtype): + """ + Reconstruct an object given its type, raw value, and possibly empty + (None) axes. + + Parameters + ---------- + typ : object + A type + obj : object + The value to use in the type constructor + axes : dict + The axes to use to construct the resulting pandas object + + Returns + ------- + ret : typ + An object of type ``typ`` with the value `obj` and possible axes + `axes`. + """ + try: + typ = typ.type + except AttributeError: + pass + + res_t = np.result_type(obj.dtype, dtype) + + if not isinstance(typ, partial) and issubclass(typ, PandasObject): + return typ(obj, dtype=res_t, **axes) + + # special case for pathological things like ~True/~False + if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_: + ret_value = res_t.type(obj) + else: + ret_value = typ(obj).astype(res_t) + # The condition is to distinguish 0-dim array (returned in case of + # scalar) and 1 element array + # e.g. np.array(0) and np.array([0]) + if len(obj.shape) == 1 and len(obj) == 1: + if not isinstance(ret_value, np.ndarray): + ret_value = np.array([ret_value]).astype(res_t) + + return ret_value diff --git a/pandas/core/computation/api.py b/pandas/core/computation/api.py new file mode 100644 index 00000000..31e8a487 --- /dev/null +++ b/pandas/core/computation/api.py @@ -0,0 +1,3 @@ +# flake8: noqa + +from pandas.core.computation.eval import eval diff --git a/pandas/core/computation/check.py b/pandas/core/computation/check.py new file mode 100644 index 00000000..4d205909 --- /dev/null +++ b/pandas/core/computation/check.py @@ -0,0 +1,10 @@ +from pandas.compat._optional import import_optional_dependency + +ne = import_optional_dependency("numexpr", raise_on_missing=False, on_version="warn") +_NUMEXPR_INSTALLED = ne is not None +if _NUMEXPR_INSTALLED: + _NUMEXPR_VERSION = ne.__version__ +else: + _NUMEXPR_VERSION = None + +__all__ = ["_NUMEXPR_INSTALLED", "_NUMEXPR_VERSION"] diff --git a/pandas/core/computation/common.py b/pandas/core/computation/common.py new file mode 100644 index 00000000..19a8898a --- /dev/null +++ b/pandas/core/computation/common.py @@ -0,0 +1,30 @@ +from functools import reduce + +import numpy as np + +from pandas._config import get_option + + +def _ensure_decoded(s): + """ + If we have bytes, decode them to unicode. + """ + if isinstance(s, (np.bytes_, bytes)): + s = s.decode(get_option("display.encoding")) + return s + + +def result_type_many(*arrays_and_dtypes): + """ + Wrapper around numpy.result_type which overcomes the NPY_MAXARGS (32) + argument limit. + """ + try: + return np.result_type(*arrays_and_dtypes) + except ValueError: + # we have > NPY_MAXARGS terms in our expression + return reduce(np.result_type, arrays_and_dtypes) + + +class NameResolutionError(NameError): + pass diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py new file mode 100644 index 00000000..9c5388fa --- /dev/null +++ b/pandas/core/computation/engines.py @@ -0,0 +1,136 @@ +""" +Engine classes for :func:`~pandas.eval` +""" + +import abc +from typing import Dict, Type + +from pandas.core.computation.align import align_terms, reconstruct_object +from pandas.core.computation.ops import _mathops, _reductions + +import pandas.io.formats.printing as printing + +_ne_builtins = frozenset(_mathops + _reductions) + + +class NumExprClobberingError(NameError): + pass + + +def _check_ne_builtin_clash(expr): + """ + Attempt to prevent foot-shooting in a helpful way. + + Parameters + ---------- + terms : Term + Terms can contain + """ + names = expr.names + overlap = names & _ne_builtins + + if overlap: + s = ", ".join(repr(x) for x in overlap) + raise NumExprClobberingError( + f'Variables in expression "{expr}" overlap with builtins: ({s})' + ) + + +class AbstractEngine(metaclass=abc.ABCMeta): + """Object serving as a base class for all engines.""" + + has_neg_frac = False + + def __init__(self, expr): + self.expr = expr + self.aligned_axes = None + self.result_type = None + + def convert(self) -> str: + """ + Convert an expression for evaluation. + + Defaults to return the expression as a string. + """ + return printing.pprint_thing(self.expr) + + def evaluate(self) -> object: + """ + Run the engine on the expression. + + This method performs alignment which is necessary no matter what engine + is being used, thus its implementation is in the base class. + + Returns + ------- + object + The result of the passed expression. + """ + if not self._is_aligned: + self.result_type, self.aligned_axes = align_terms(self.expr.terms) + + # make sure no names in resolvers and locals/globals clash + res = self._evaluate() + return reconstruct_object( + self.result_type, res, self.aligned_axes, self.expr.terms.return_type + ) + + @property + def _is_aligned(self) -> bool: + return self.aligned_axes is not None and self.result_type is not None + + @abc.abstractmethod + def _evaluate(self): + """ + Return an evaluated expression. + + Parameters + ---------- + env : Scope + The local and global environment in which to evaluate an + expression. + + Notes + ----- + Must be implemented by subclasses. + """ + pass + + +class NumExprEngine(AbstractEngine): + """NumExpr engine class""" + + has_neg_frac = True + + def _evaluate(self): + import numexpr as ne + + # convert the expression to a valid numexpr expression + s = self.convert() + + env = self.expr.env + scope = env.full_scope + _check_ne_builtin_clash(self.expr) + return ne.evaluate(s, local_dict=scope) + + +class PythonEngine(AbstractEngine): + """ + Evaluate an expression in Python space. + + Mostly for testing purposes. + """ + + has_neg_frac = False + + def evaluate(self): + return self.expr() + + def _evaluate(self) -> None: + pass + + +_engines: Dict[str, Type[AbstractEngine]] = { + "numexpr": NumExprEngine, + "python": PythonEngine, +} diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py new file mode 100644 index 00000000..51892b8c --- /dev/null +++ b/pandas/core/computation/eval.py @@ -0,0 +1,390 @@ +#!/usr/bin/env python + +""" +Top level ``eval`` module. +""" + +import tokenize +from typing import Optional +import warnings + +from pandas._libs.lib import no_default +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.computation.engines import _engines +from pandas.core.computation.expr import Expr, _parsers +from pandas.core.computation.parsing import tokenize_string +from pandas.core.computation.scope import ensure_scope + +from pandas.io.formats.printing import pprint_thing + + +def _check_engine(engine: Optional[str]) -> str: + """ + Make sure a valid engine is passed. + + Parameters + ---------- + engine : str + + Raises + ------ + KeyError + * If an invalid engine is passed + ImportError + * If numexpr was requested but doesn't exist + + Returns + ------- + string engine + """ + from pandas.core.computation.check import _NUMEXPR_INSTALLED + + if engine is None: + if _NUMEXPR_INSTALLED: + engine = "numexpr" + else: + engine = "python" + + if engine not in _engines: + valid = list(_engines.keys()) + raise KeyError( + f"Invalid engine {repr(engine)} passed, valid engines are {valid}" + ) + + # TODO: validate this in a more general way (thinking of future engines + # that won't necessarily be import-able) + # Could potentially be done on engine instantiation + if engine == "numexpr": + if not _NUMEXPR_INSTALLED: + raise ImportError( + "'numexpr' is not installed or an " + "unsupported version. Cannot use " + "engine='numexpr' for query/eval " + "if 'numexpr' is not installed" + ) + + return engine + + +def _check_parser(parser: str): + """ + Make sure a valid parser is passed. + + Parameters + ---------- + parser : str + + Raises + ------ + KeyError + * If an invalid parser is passed + """ + + if parser not in _parsers: + raise KeyError( + f"Invalid parser {repr(parser)} passed, " + f"valid parsers are {_parsers.keys()}" + ) + + +def _check_resolvers(resolvers): + if resolvers is not None: + for resolver in resolvers: + if not hasattr(resolver, "__getitem__"): + name = type(resolver).__name__ + raise TypeError( + f"Resolver of type {repr(name)} does not " + f"implement the __getitem__ method" + ) + + +def _check_expression(expr): + """ + Make sure an expression is not an empty string + + Parameters + ---------- + expr : object + An object that can be converted to a string + + Raises + ------ + ValueError + * If expr is an empty string + """ + if not expr: + raise ValueError("expr cannot be an empty string") + + +def _convert_expression(expr) -> str: + """ + Convert an object to an expression. + + This function converts an object to an expression (a unicode string) and + checks to make sure it isn't empty after conversion. This is used to + convert operators to their string representation for recursive calls to + :func:`~pandas.eval`. + + Parameters + ---------- + expr : object + The object to be converted to a string. + + Returns + ------- + str + The string representation of an object. + + Raises + ------ + ValueError + * If the expression is empty. + """ + s = pprint_thing(expr) + _check_expression(s) + return s + + +def _check_for_locals(expr: str, stack_level: int, parser: str): + + at_top_of_stack = stack_level == 0 + not_pandas_parser = parser != "pandas" + + if not_pandas_parser: + msg = "The '@' prefix is only supported by the pandas parser" + elif at_top_of_stack: + msg = ( + "The '@' prefix is not allowed in " + "top-level eval calls, \nplease refer to " + "your variables by name without the '@' " + "prefix" + ) + + if at_top_of_stack or not_pandas_parser: + for toknum, tokval in tokenize_string(expr): + if toknum == tokenize.OP and tokval == "@": + raise SyntaxError(msg) + + +def eval( + expr, + parser="pandas", + engine: Optional[str] = None, + truediv=no_default, + local_dict=None, + global_dict=None, + resolvers=(), + level=0, + target=None, + inplace=False, +): + """ + Evaluate a Python expression as a string using various backends. + + The following arithmetic operations are supported: ``+``, ``-``, ``*``, + ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following + boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not). + Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`, + :keyword:`or`, and :keyword:`not` with the same semantics as the + corresponding bitwise operators. :class:`~pandas.Series` and + :class:`~pandas.DataFrame` objects are supported and behave as they would + with plain ol' Python evaluation. + + Parameters + ---------- + expr : str + The expression to evaluate. This string cannot contain any Python + `statements + `__, + only Python `expressions + `__. + parser : {'pandas', 'python'}, default 'pandas' + The parser to use to construct the syntax tree from the expression. The + default of ``'pandas'`` parses code slightly different than standard + Python. Alternatively, you can parse an expression using the + ``'python'`` parser to retain strict Python semantics. See the + :ref:`enhancing performance ` documentation for + more details. + engine : {'python', 'numexpr'}, default 'numexpr' + + The engine used to evaluate the expression. Supported engines are + + - None : tries to use ``numexpr``, falls back to ``python`` + - ``'numexpr'``: This default engine evaluates pandas objects using + numexpr for large speed ups in complex expressions + with large frames. + - ``'python'``: Performs operations as if you had ``eval``'d in top + level python. This engine is generally not that useful. + + More backends may be available in the future. + + truediv : bool, optional + Whether to use true division, like in Python >= 3. + deprecated:: 1.0.0 + + local_dict : dict or None, optional + A dictionary of local variables, taken from locals() by default. + global_dict : dict or None, optional + A dictionary of global variables, taken from globals() by default. + resolvers : list of dict-like or None, optional + A list of objects implementing the ``__getitem__`` special method that + you can use to inject an additional collection of namespaces to use for + variable lookup. For example, this is used in the + :meth:`~DataFrame.query` method to inject the + ``DataFrame.index`` and ``DataFrame.columns`` + variables that refer to their respective :class:`~pandas.DataFrame` + instance attributes. + level : int, optional + The number of prior stack frames to traverse and add to the current + scope. Most users will **not** need to change this parameter. + target : object, optional, default None + This is the target object for assignment. It is used when there is + variable assignment in the expression. If so, then `target` must + support item assignment with string keys, and if a copy is being + returned, it must also support `.copy()`. + inplace : bool, default False + If `target` is provided, and the expression mutates `target`, whether + to modify `target` inplace. Otherwise, return a copy of `target` with + the mutation. + + Returns + ------- + ndarray, numeric scalar, DataFrame, Series + + Raises + ------ + ValueError + There are many instances where such an error can be raised: + + - `target=None`, but the expression is multiline. + - The expression is multiline, but not all them have item assignment. + An example of such an arrangement is this: + + a = b + 1 + a + 2 + + Here, there are expressions on different lines, making it multiline, + but the last line has no variable assigned to the output of `a + 2`. + - `inplace=True`, but the expression is missing item assignment. + - Item assignment is provided, but the `target` does not support + string item assignment. + - Item assignment is provided and `inplace=False`, but the `target` + does not support the `.copy()` method + + See Also + -------- + DataFrame.query + DataFrame.eval + + Notes + ----- + The ``dtype`` of any objects involved in an arithmetic ``%`` operation are + recursively cast to ``float64``. + + See the :ref:`enhancing performance ` documentation for + more details. + """ + + inplace = validate_bool_kwarg(inplace, "inplace") + + if truediv is not no_default: + warnings.warn( + "The `truediv` parameter in pd.eval is deprecated and will be " + "removed in a future version.", + FutureWarning, + stacklevel=2, + ) + + if isinstance(expr, str): + _check_expression(expr) + exprs = [e.strip() for e in expr.splitlines() if e.strip() != ""] + else: + exprs = [expr] + multi_line = len(exprs) > 1 + + if multi_line and target is None: + raise ValueError( + "multi-line expressions are only valid in the " + "context of data, use DataFrame.eval" + ) + engine = _check_engine(engine) + _check_parser(parser) + _check_resolvers(resolvers) + + ret = None + first_expr = True + target_modified = False + + for expr in exprs: + expr = _convert_expression(expr) + _check_for_locals(expr, level, parser) + + # get our (possibly passed-in) scope + env = ensure_scope( + level + 1, + global_dict=global_dict, + local_dict=local_dict, + resolvers=resolvers, + target=target, + ) + + parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) + + # construct the engine and evaluate the parsed expression + eng = _engines[engine] + eng_inst = eng(parsed_expr) + ret = eng_inst.evaluate() + + if parsed_expr.assigner is None: + if multi_line: + raise ValueError( + "Multi-line expressions are only valid " + "if all expressions contain an assignment" + ) + elif inplace: + raise ValueError("Cannot operate inplace if there is no assignment") + + # assign if needed + assigner = parsed_expr.assigner + if env.target is not None and assigner is not None: + target_modified = True + + # if returning a copy, copy only on the first assignment + if not inplace and first_expr: + try: + target = env.target.copy() + except AttributeError: + raise ValueError("Cannot return a copy of the target") + else: + target = env.target + + # TypeError is most commonly raised (e.g. int, list), but you + # get IndexError if you try to do this assignment on np.ndarray. + # we will ignore numpy warnings here; e.g. if trying + # to use a non-numeric indexer + try: + with warnings.catch_warnings(record=True): + # TODO: Filter the warnings we actually care about here. + target[assigner] = ret + except (TypeError, IndexError): + raise ValueError("Cannot assign expression output to target") + + if not resolvers: + resolvers = ({assigner: ret},) + else: + # existing resolver needs updated to handle + # case of mutating existing column in copy + for resolver in resolvers: + if assigner in resolver: + resolver[assigner] = ret + break + else: + resolvers += ({assigner: ret},) + + ret = None + first_expr = False + + # We want to exclude `inplace=None` as being False. + if inplace is False: + return target if target_modified else ret diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py new file mode 100644 index 00000000..1350587b --- /dev/null +++ b/pandas/core/computation/expr.py @@ -0,0 +1,791 @@ +""":func:`~pandas.eval` parsers +""" + +import ast +from functools import partial, reduce +from keyword import iskeyword +import tokenize +from typing import Optional, Type + +import numpy as np + +import pandas.core.common as com +from pandas.core.computation.ops import ( + _LOCAL_TAG, + BinOp, + Constant, + Div, + FuncNode, + Op, + Term, + UnaryOp, + UndefinedVariableError, + _arith_ops_syms, + _bool_ops_syms, + _cmp_ops_syms, + _mathops, + _reductions, + _unary_ops_syms, + is_term, +) +from pandas.core.computation.parsing import clean_backtick_quoted_toks, tokenize_string +from pandas.core.computation.scope import Scope + +import pandas.io.formats.printing as printing + + +def _rewrite_assign(tok): + """Rewrite the assignment operator for PyTables expressions that use ``=`` + as a substitute for ``==``. + + Parameters + ---------- + tok : tuple of int, str + ints correspond to the all caps constants in the tokenize module + + Returns + ------- + t : tuple of int, str + Either the input or token or the replacement values + """ + toknum, tokval = tok + return toknum, "==" if tokval == "=" else tokval + + +def _replace_booleans(tok): + """Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise + precedence is changed to boolean precedence. + + Parameters + ---------- + tok : tuple of int, str + ints correspond to the all caps constants in the tokenize module + + Returns + ------- + t : tuple of int, str + Either the input or token or the replacement values + """ + toknum, tokval = tok + if toknum == tokenize.OP: + if tokval == "&": + return tokenize.NAME, "and" + elif tokval == "|": + return tokenize.NAME, "or" + return toknum, tokval + return toknum, tokval + + +def _replace_locals(tok): + """Replace local variables with a syntactically valid name. + + Parameters + ---------- + tok : tuple of int, str + ints correspond to the all caps constants in the tokenize module + + Returns + ------- + t : tuple of int, str + Either the input or token or the replacement values + + Notes + ----- + This is somewhat of a hack in that we rewrite a string such as ``'@a'`` as + ``'__pd_eval_local_a'`` by telling the tokenizer that ``__pd_eval_local_`` + is a ``tokenize.OP`` and to replace the ``'@'`` symbol with it. + """ + toknum, tokval = tok + if toknum == tokenize.OP and tokval == "@": + return tokenize.OP, _LOCAL_TAG + return toknum, tokval + + +def _compose2(f, g): + """Compose 2 callables""" + return lambda *args, **kwargs: f(g(*args, **kwargs)) + + +def _compose(*funcs): + """Compose 2 or more callables""" + assert len(funcs) > 1, "At least 2 callables must be passed to compose" + return reduce(_compose2, funcs) + + +def _preparse( + source: str, + f=_compose( + _replace_locals, _replace_booleans, _rewrite_assign, clean_backtick_quoted_toks + ), +): + """Compose a collection of tokenization functions + + Parameters + ---------- + source : str + A Python source code string + f : callable + This takes a tuple of (toknum, tokval) as its argument and returns a + tuple with the same structure but possibly different elements. Defaults + to the composition of ``_rewrite_assign``, ``_replace_booleans``, and + ``_replace_locals``. + + Returns + ------- + s : str + Valid Python source code + + Notes + ----- + The `f` parameter can be any callable that takes *and* returns input of the + form ``(toknum, tokval)``, where ``toknum`` is one of the constants from + the ``tokenize`` module and ``tokval`` is a string. + """ + assert callable(f), "f must be callable" + return tokenize.untokenize((f(x) for x in tokenize_string(source))) + + +def _is_type(t): + """Factory for a type checking function of type ``t`` or tuple of types.""" + return lambda x: isinstance(x.value, t) + + +_is_list = _is_type(list) +_is_str = _is_type(str) + + +# partition all AST nodes +_all_nodes = frozenset( + filter( + lambda x: isinstance(x, type) and issubclass(x, ast.AST), + (getattr(ast, node) for node in dir(ast)), + ) +) + + +def _filter_nodes(superclass, all_nodes=_all_nodes): + """Filter out AST nodes that are subclasses of ``superclass``.""" + node_names = (node.__name__ for node in all_nodes if issubclass(node, superclass)) + return frozenset(node_names) + + +_all_node_names = frozenset(map(lambda x: x.__name__, _all_nodes)) +_mod_nodes = _filter_nodes(ast.mod) +_stmt_nodes = _filter_nodes(ast.stmt) +_expr_nodes = _filter_nodes(ast.expr) +_expr_context_nodes = _filter_nodes(ast.expr_context) +_slice_nodes = _filter_nodes(ast.slice) +_boolop_nodes = _filter_nodes(ast.boolop) +_operator_nodes = _filter_nodes(ast.operator) +_unary_op_nodes = _filter_nodes(ast.unaryop) +_cmp_op_nodes = _filter_nodes(ast.cmpop) +_comprehension_nodes = _filter_nodes(ast.comprehension) +_handler_nodes = _filter_nodes(ast.excepthandler) +_arguments_nodes = _filter_nodes(ast.arguments) +_keyword_nodes = _filter_nodes(ast.keyword) +_alias_nodes = _filter_nodes(ast.alias) + + +# nodes that we don't support directly but are needed for parsing +_hacked_nodes = frozenset(["Assign", "Module", "Expr"]) + + +_unsupported_expr_nodes = frozenset( + [ + "Yield", + "GeneratorExp", + "IfExp", + "DictComp", + "SetComp", + "Repr", + "Lambda", + "Set", + "AST", + "Is", + "IsNot", + ] +) + +# these nodes are low priority or won't ever be supported (e.g., AST) +_unsupported_nodes = ( + _stmt_nodes + | _mod_nodes + | _handler_nodes + | _arguments_nodes + | _keyword_nodes + | _alias_nodes + | _expr_context_nodes + | _unsupported_expr_nodes +) - _hacked_nodes + +# we're adding a different assignment in some cases to be equality comparison +# and we don't want `stmt` and friends in their so get only the class whose +# names are capitalized +_base_supported_nodes = (_all_node_names - _unsupported_nodes) | _hacked_nodes +intersection = _unsupported_nodes & _base_supported_nodes +_msg = f"cannot both support and not support {intersection}" +assert not intersection, _msg + + +def _node_not_implemented(node_name, cls): + """Return a function that raises a NotImplementedError with a passed node + name. + """ + + def f(self, *args, **kwargs): + raise NotImplementedError(f"{repr(node_name)} nodes are not implemented") + + return f + + +def disallow(nodes): + """Decorator to disallow certain nodes from parsing. Raises a + NotImplementedError instead. + + Returns + ------- + disallowed : callable + """ + + def disallowed(cls): + cls.unsupported_nodes = () + for node in nodes: + new_method = _node_not_implemented(node, cls) + name = f"visit_{node}" + cls.unsupported_nodes += (name,) + setattr(cls, name, new_method) + return cls + + return disallowed + + +def _op_maker(op_class, op_symbol): + """Return a function to create an op class with its symbol already passed. + + Returns + ------- + f : callable + """ + + def f(self, node, *args, **kwargs): + """Return a partial function with an Op subclass with an operator + already passed. + + Returns + ------- + f : callable + """ + return partial(op_class, op_symbol, *args, **kwargs) + + return f + + +_op_classes = {"binary": BinOp, "unary": UnaryOp} + + +def add_ops(op_classes): + """Decorator to add default implementation of ops.""" + + def f(cls): + for op_attr_name, op_class in op_classes.items(): + ops = getattr(cls, f"{op_attr_name}_ops") + ops_map = getattr(cls, f"{op_attr_name}_op_nodes_map") + for op in ops: + op_node = ops_map[op] + if op_node is not None: + made_op = _op_maker(op_class, op) + setattr(cls, f"visit_{op_node}", made_op) + return cls + + return f + + +@disallow(_unsupported_nodes) +@add_ops(_op_classes) +class BaseExprVisitor(ast.NodeVisitor): + """ + Custom ast walker. Parsers of other engines should subclass this class + if necessary. + + Parameters + ---------- + env : Scope + engine : str + parser : str + preparser : callable + """ + + const_type: Type[Term] = Constant + term_type = Term + + binary_ops = _cmp_ops_syms + _bool_ops_syms + _arith_ops_syms + binary_op_nodes = ( + "Gt", + "Lt", + "GtE", + "LtE", + "Eq", + "NotEq", + "In", + "NotIn", + "BitAnd", + "BitOr", + "And", + "Or", + "Add", + "Sub", + "Mult", + None, + "Pow", + "FloorDiv", + "Mod", + ) + binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes)) + + unary_ops = _unary_ops_syms + unary_op_nodes = "UAdd", "USub", "Invert", "Not" + unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes)) + + rewrite_map = { + ast.Eq: ast.In, + ast.NotEq: ast.NotIn, + ast.In: ast.In, + ast.NotIn: ast.NotIn, + } + + def __init__(self, env, engine, parser, preparser=_preparse): + self.env = env + self.engine = engine + self.parser = parser + self.preparser = preparser + self.assigner = None + + def visit(self, node, **kwargs): + if isinstance(node, str): + clean = self.preparser(node) + try: + node = ast.fix_missing_locations(ast.parse(clean)) + except SyntaxError as e: + if any(iskeyword(x) for x in clean.split()): + e.msg = "Python keyword not valid identifier in numexpr query" + raise e + + method = "visit_" + type(node).__name__ + visitor = getattr(self, method) + return visitor(node, **kwargs) + + def visit_Module(self, node, **kwargs): + if len(node.body) != 1: + raise SyntaxError("only a single expression is allowed") + expr = node.body[0] + return self.visit(expr, **kwargs) + + def visit_Expr(self, node, **kwargs): + return self.visit(node.value, **kwargs) + + def _rewrite_membership_op(self, node, left, right): + # the kind of the operator (is actually an instance) + op_instance = node.op + op_type = type(op_instance) + + # must be two terms and the comparison operator must be ==/!=/in/not in + if is_term(left) and is_term(right) and op_type in self.rewrite_map: + + left_list, right_list = map(_is_list, (left, right)) + left_str, right_str = map(_is_str, (left, right)) + + # if there are any strings or lists in the expression + if left_list or right_list or left_str or right_str: + op_instance = self.rewrite_map[op_type]() + + # pop the string variable out of locals and replace it with a list + # of one string, kind of a hack + if right_str: + name = self.env.add_tmp([right.value]) + right = self.term_type(name, self.env) + + if left_str: + name = self.env.add_tmp([left.value]) + left = self.term_type(name, self.env) + + op = self.visit(op_instance) + return op, op_instance, left, right + + def _maybe_transform_eq_ne(self, node, left=None, right=None): + if left is None: + left = self.visit(node.left, side="left") + if right is None: + right = self.visit(node.right, side="right") + op, op_class, left, right = self._rewrite_membership_op(node, left, right) + return op, op_class, left, right + + def _maybe_downcast_constants(self, left, right): + f32 = np.dtype(np.float32) + if ( + left.is_scalar + and hasattr(left, "value") + and not right.is_scalar + and right.return_type == f32 + ): + # right is a float32 array, left is a scalar + name = self.env.add_tmp(np.float32(left.value)) + left = self.term_type(name, self.env) + if ( + right.is_scalar + and hasattr(right, "value") + and not left.is_scalar + and left.return_type == f32 + ): + # left is a float32 array, right is a scalar + name = self.env.add_tmp(np.float32(right.value)) + right = self.term_type(name, self.env) + + return left, right + + def _maybe_eval(self, binop, eval_in_python): + # eval `in` and `not in` (for now) in "partial" python space + # things that can be evaluated in "eval" space will be turned into + # temporary variables. for example, + # [1,2] in a + 2 * b + # in that case a + 2 * b will be evaluated using numexpr, and the "in" + # call will be evaluated using isin (in python space) + return binop.evaluate( + self.env, self.engine, self.parser, self.term_type, eval_in_python + ) + + def _maybe_evaluate_binop( + self, + op, + op_class, + lhs, + rhs, + eval_in_python=("in", "not in"), + maybe_eval_in_python=("==", "!=", "<", ">", "<=", ">="), + ): + res = op(lhs, rhs) + + if res.has_invalid_return_type: + raise TypeError( + f"unsupported operand type(s) for {res.op}:" + f" '{lhs.type}' and '{rhs.type}'" + ) + + if self.engine != "pytables": + if ( + res.op in _cmp_ops_syms + and getattr(lhs, "is_datetime", False) + or getattr(rhs, "is_datetime", False) + ): + # all date ops must be done in python bc numexpr doesn't work + # well with NaT + return self._maybe_eval(res, self.binary_ops) + + if res.op in eval_in_python: + # "in"/"not in" ops are always evaluated in python + return self._maybe_eval(res, eval_in_python) + elif self.engine != "pytables": + if ( + getattr(lhs, "return_type", None) == object + or getattr(rhs, "return_type", None) == object + ): + # evaluate "==" and "!=" in python if either of our operands + # has an object return type + return self._maybe_eval(res, eval_in_python + maybe_eval_in_python) + return res + + def visit_BinOp(self, node, **kwargs): + op, op_class, left, right = self._maybe_transform_eq_ne(node) + left, right = self._maybe_downcast_constants(left, right) + return self._maybe_evaluate_binop(op, op_class, left, right) + + def visit_Div(self, node, **kwargs): + return lambda lhs, rhs: Div(lhs, rhs) + + def visit_UnaryOp(self, node, **kwargs): + op = self.visit(node.op) + operand = self.visit(node.operand) + return op(operand) + + def visit_Name(self, node, **kwargs): + return self.term_type(node.id, self.env, **kwargs) + + def visit_NameConstant(self, node, **kwargs): + return self.const_type(node.value, self.env) + + def visit_Num(self, node, **kwargs): + return self.const_type(node.n, self.env) + + def visit_Constant(self, node, **kwargs): + return self.const_type(node.n, self.env) + + def visit_Str(self, node, **kwargs): + name = self.env.add_tmp(node.s) + return self.term_type(name, self.env) + + def visit_List(self, node, **kwargs): + name = self.env.add_tmp([self.visit(e)(self.env) for e in node.elts]) + return self.term_type(name, self.env) + + visit_Tuple = visit_List + + def visit_Index(self, node, **kwargs): + """ df.index[4] """ + return self.visit(node.value) + + def visit_Subscript(self, node, **kwargs): + import pandas as pd + + value = self.visit(node.value) + slobj = self.visit(node.slice) + result = pd.eval( + slobj, local_dict=self.env, engine=self.engine, parser=self.parser + ) + try: + # a Term instance + v = value.value[result] + except AttributeError: + # an Op instance + lhs = pd.eval( + value, local_dict=self.env, engine=self.engine, parser=self.parser + ) + v = lhs[result] + name = self.env.add_tmp(v) + return self.term_type(name, env=self.env) + + def visit_Slice(self, node, **kwargs): + """ df.index[slice(4,6)] """ + lower = node.lower + if lower is not None: + lower = self.visit(lower).value + upper = node.upper + if upper is not None: + upper = self.visit(upper).value + step = node.step + if step is not None: + step = self.visit(step).value + + return slice(lower, upper, step) + + def visit_Assign(self, node, **kwargs): + """ + support a single assignment node, like + + c = a + b + + set the assigner at the top level, must be a Name node which + might or might not exist in the resolvers + + """ + + if len(node.targets) != 1: + raise SyntaxError("can only assign a single expression") + if not isinstance(node.targets[0], ast.Name): + raise SyntaxError("left hand side of an assignment must be a single name") + if self.env.target is None: + raise ValueError("cannot assign without a target object") + + try: + assigner = self.visit(node.targets[0], **kwargs) + except UndefinedVariableError: + assigner = node.targets[0].id + + self.assigner = getattr(assigner, "name", assigner) + if self.assigner is None: + raise SyntaxError( + "left hand side of an assignment must be a single resolvable name" + ) + + return self.visit(node.value, **kwargs) + + def visit_Attribute(self, node, **kwargs): + attr = node.attr + value = node.value + + ctx = node.ctx + if isinstance(ctx, ast.Load): + # resolve the value + resolved = self.visit(value).value + try: + v = getattr(resolved, attr) + name = self.env.add_tmp(v) + return self.term_type(name, self.env) + except AttributeError: + # something like datetime.datetime where scope is overridden + if isinstance(value, ast.Name) and value.id == attr: + return resolved + + raise ValueError(f"Invalid Attribute context {ctx.__name__}") + + def visit_Call(self, node, side=None, **kwargs): + + if isinstance(node.func, ast.Attribute): + res = self.visit_Attribute(node.func) + elif not isinstance(node.func, ast.Name): + raise TypeError("Only named functions are supported") + else: + try: + res = self.visit(node.func) + except UndefinedVariableError: + # Check if this is a supported function name + try: + res = FuncNode(node.func.id) + except ValueError: + # Raise original error + raise + + if res is None: + raise ValueError(f"Invalid function call {node.func.id}") + if hasattr(res, "value"): + res = res.value + + if isinstance(res, FuncNode): + + new_args = [self.visit(arg) for arg in node.args] + + if node.keywords: + raise TypeError( + f'Function "{res.name}" does not support keyword arguments' + ) + + return res(*new_args, **kwargs) + + else: + + new_args = [self.visit(arg).value for arg in node.args] + + for key in node.keywords: + if not isinstance(key, ast.keyword): + raise ValueError(f"keyword error in function call '{node.func.id}'") + + if key.arg: + kwargs[key.arg] = self.visit(key.value).value + + return self.const_type(res(*new_args, **kwargs), self.env) + + def translate_In(self, op): + return op + + def visit_Compare(self, node, **kwargs): + ops = node.ops + comps = node.comparators + + # base case: we have something like a CMP b + if len(comps) == 1: + op = self.translate_In(ops[0]) + binop = ast.BinOp(op=op, left=node.left, right=comps[0]) + return self.visit(binop) + + # recursive case: we have a chained comparison, a CMP b CMP c, etc. + left = node.left + values = [] + for op, comp in zip(ops, comps): + new_node = self.visit( + ast.Compare(comparators=[comp], left=left, ops=[self.translate_In(op)]) + ) + left = comp + values.append(new_node) + return self.visit(ast.BoolOp(op=ast.And(), values=values)) + + def _try_visit_binop(self, bop): + if isinstance(bop, (Op, Term)): + return bop + return self.visit(bop) + + def visit_BoolOp(self, node, **kwargs): + def visitor(x, y): + lhs = self._try_visit_binop(x) + rhs = self._try_visit_binop(y) + + op, op_class, lhs, rhs = self._maybe_transform_eq_ne(node, lhs, rhs) + return self._maybe_evaluate_binop(op, node.op, lhs, rhs) + + operands = node.values + return reduce(visitor, operands) + + +_python_not_supported = frozenset(["Dict", "BoolOp", "In", "NotIn"]) +_numexpr_supported_calls = frozenset(_reductions + _mathops) + + +@disallow( + (_unsupported_nodes | _python_not_supported) + - (_boolop_nodes | frozenset(["BoolOp", "Attribute", "In", "NotIn", "Tuple"])) +) +class PandasExprVisitor(BaseExprVisitor): + def __init__( + self, + env, + engine, + parser, + preparser=partial( + _preparse, + f=_compose(_replace_locals, _replace_booleans, clean_backtick_quoted_toks), + ), + ): + super().__init__(env, engine, parser, preparser) + + +@disallow(_unsupported_nodes | _python_not_supported | frozenset(["Not"])) +class PythonExprVisitor(BaseExprVisitor): + def __init__(self, env, engine, parser, preparser=lambda x: x): + super().__init__(env, engine, parser, preparser=preparser) + + +class Expr: + """ + Object encapsulating an expression. + + Parameters + ---------- + expr : str + engine : str, optional, default 'numexpr' + parser : str, optional, default 'pandas' + env : Scope, optional, default None + level : int, optional, default 2 + """ + + env: Scope + engine: str + parser: str + + def __init__( + self, + expr, + engine: str = "numexpr", + parser: str = "pandas", + env: Optional[Scope] = None, + level: int = 0, + ): + self.expr = expr + self.env = env or Scope(level=level + 1) + self.engine = engine + self.parser = parser + self._visitor = _parsers[parser](self.env, self.engine, self.parser) + self.terms = self.parse() + + @property + def assigner(self): + return getattr(self._visitor, "assigner", None) + + def __call__(self): + return self.terms(self.env) + + def __repr__(self) -> str: + return printing.pprint_thing(self.terms) + + def __len__(self) -> int: + return len(self.expr) + + def parse(self): + """Parse an expression""" + return self._visitor.visit(self.expr) + + @property + def names(self): + """Get the names in an expression""" + if is_term(self.terms): + return frozenset([self.terms.name]) + return frozenset(term.name for term in com.flatten(self.terms)) + + +_parsers = {"python": PythonExprVisitor, "pandas": PandasExprVisitor} diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py new file mode 100644 index 00000000..7e959889 --- /dev/null +++ b/pandas/core/computation/expressions.py @@ -0,0 +1,252 @@ +""" +Expressions +----------- + +Offer fast expression evaluation through numexpr + +""" + +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._libs.lib import values_from_object + +from pandas.core.dtypes.generic import ABCDataFrame + +from pandas.core.computation.check import _NUMEXPR_INSTALLED + +if _NUMEXPR_INSTALLED: + import numexpr as ne + +_TEST_MODE = None +_TEST_RESULT = None +_USE_NUMEXPR = _NUMEXPR_INSTALLED +_evaluate = None +_where = None + +# the set of dtypes that we will allow pass to numexpr +_ALLOWED_DTYPES = { + "evaluate": {"int64", "int32", "float64", "float32", "bool"}, + "where": {"int64", "float64", "bool"}, +} + +# the minimum prod shape that we will use numexpr +_MIN_ELEMENTS = 10000 + + +def set_use_numexpr(v=True): + # set/unset to use numexpr + global _USE_NUMEXPR + if _NUMEXPR_INSTALLED: + _USE_NUMEXPR = v + + # choose what we are going to do + global _evaluate, _where + if not _USE_NUMEXPR: + _evaluate = _evaluate_standard + _where = _where_standard + else: + _evaluate = _evaluate_numexpr + _where = _where_numexpr + + +def set_numexpr_threads(n=None): + # if we are using numexpr, set the threads to n + # otherwise reset + if _NUMEXPR_INSTALLED and _USE_NUMEXPR: + if n is None: + n = ne.detect_number_of_cores() + ne.set_num_threads(n) + + +def _evaluate_standard(op, op_str, a, b): + """ standard evaluation """ + if _TEST_MODE: + _store_test_result(False) + with np.errstate(all="ignore"): + return op(a, b) + + +def _can_use_numexpr(op, op_str, a, b, dtype_check): + """ return a boolean if we WILL be using numexpr """ + if op_str is not None: + + # required min elements (otherwise we are adding overhead) + if np.prod(a.shape) > _MIN_ELEMENTS: + # check for dtype compatibility + dtypes = set() + for o in [a, b]: + # Series implements dtypes, check for dimension count as well + if hasattr(o, "dtypes") and o.ndim > 1: + s = o.dtypes.value_counts() + if len(s) > 1: + return False + dtypes |= set(s.index.astype(str)) + # ndarray and Series Case + elif hasattr(o, "dtype"): + dtypes |= {o.dtype.name} + + # allowed are a superset + if not len(dtypes) or _ALLOWED_DTYPES[dtype_check] >= dtypes: + return True + + return False + + +def _evaluate_numexpr(op, op_str, a, b): + result = None + + if _can_use_numexpr(op, op_str, a, b, "evaluate"): + is_reversed = op.__name__.strip("_").startswith("r") + if is_reversed: + # we were originally called by a reversed op method + a, b = b, a + + a_value = getattr(a, "values", a) + b_value = getattr(b, "values", b) + + result = ne.evaluate( + f"a_value {op_str} b_value", + local_dict={"a_value": a_value, "b_value": b_value}, + casting="safe", + ) + + if _TEST_MODE: + _store_test_result(result is not None) + + if result is None: + result = _evaluate_standard(op, op_str, a, b) + + return result + + +def _where_standard(cond, a, b): + return np.where( + values_from_object(cond), values_from_object(a), values_from_object(b) + ) + + +def _where_numexpr(cond, a, b): + result = None + + if _can_use_numexpr(None, "where", a, b, "where"): + cond_value = getattr(cond, "values", cond) + a_value = getattr(a, "values", a) + b_value = getattr(b, "values", b) + + result = ne.evaluate( + "where(cond_value, a_value, b_value)", + local_dict={ + "cond_value": cond_value, + "a_value": a_value, + "b_value": b_value, + }, + casting="safe", + ) + + if result is None: + result = _where_standard(cond, a, b) + + return result + + +# turn myself on +set_use_numexpr(get_option("compute.use_numexpr")) + + +def _has_bool_dtype(x): + if isinstance(x, ABCDataFrame): + return "bool" in x.dtypes + try: + return x.dtype == bool + except AttributeError: + return isinstance(x, (bool, np.bool_)) + + +def _bool_arith_check( + op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None +): + if unsupported is None: + unsupported = {"+": "|", "*": "&", "-": "^"} + + if _has_bool_dtype(a) and _has_bool_dtype(b): + if op_str in unsupported: + warnings.warn( + f"evaluating in Python space because the {repr(op_str)} " + f"operator is not supported by numexpr for " + f"the bool dtype, use {repr(unsupported[op_str])} instead" + ) + return False + + if op_str in not_allowed: + raise NotImplementedError( + f"operator {repr(op_str)} not implemented for bool dtypes" + ) + return True + + +def evaluate(op, op_str, a, b, use_numexpr=True): + """ + Evaluate and return the expression of the op on a and b. + + Parameters + ---------- + op : the actual operand + op_str : str + The string version of the op. + a : left operand + b : right operand + use_numexpr : bool, default True + Whether to try to use numexpr. + """ + + use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b) + if use_numexpr: + return _evaluate(op, op_str, a, b) + return _evaluate_standard(op, op_str, a, b) + + +def where(cond, a, b, use_numexpr=True): + """ + Evaluate the where condition cond on a and b. + + Parameters + ---------- + cond : np.ndarray[bool] + a : return if cond is True + b : return if cond is False + use_numexpr : bool, default True + Whether to try to use numexpr. + """ + + if use_numexpr: + return _where(cond, a, b) + return _where_standard(cond, a, b) + + +def set_test_mode(v=True): + """ + Keeps track of whether numexpr was used. Stores an additional ``True`` + for every successful use of evaluate with numexpr since the last + ``get_test_result`` + """ + global _TEST_MODE, _TEST_RESULT + _TEST_MODE = v + _TEST_RESULT = [] + + +def _store_test_result(used_numexpr): + global _TEST_RESULT + if used_numexpr: + _TEST_RESULT.append(used_numexpr) + + +def get_test_result(): + """get test result and reset test_results""" + global _TEST_RESULT + res = _TEST_RESULT + _TEST_RESULT = [] + return res diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py new file mode 100644 index 00000000..cb166ba6 --- /dev/null +++ b/pandas/core/computation/ops.py @@ -0,0 +1,601 @@ +"""Operator classes for eval. +""" + +from datetime import datetime +from distutils.version import LooseVersion +from functools import partial +import operator + +import numpy as np + +from pandas._libs.tslibs import Timestamp + +from pandas.core.dtypes.common import is_list_like, is_scalar + +import pandas.core.common as com +from pandas.core.computation.common import _ensure_decoded, result_type_many +from pandas.core.computation.scope import _DEFAULT_GLOBALS + +from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded + +_reductions = ("sum", "prod") + +_unary_math_ops = ( + "sin", + "cos", + "exp", + "log", + "expm1", + "log1p", + "sqrt", + "sinh", + "cosh", + "tanh", + "arcsin", + "arccos", + "arctan", + "arccosh", + "arcsinh", + "arctanh", + "abs", + "log10", + "floor", + "ceil", +) +_binary_math_ops = ("arctan2",) + +_mathops = _unary_math_ops + _binary_math_ops + + +_LOCAL_TAG = "__pd_eval_local_" + + +class UndefinedVariableError(NameError): + """ + NameError subclass for local variables. + """ + + def __init__(self, name, is_local: bool): + base_msg = f"{repr(name)} is not defined" + if is_local: + msg = f"local variable {base_msg}" + else: + msg = f"name {base_msg}" + super().__init__(msg) + + +class Term: + def __new__(cls, name, env, side=None, encoding=None): + klass = Constant if not isinstance(name, str) else cls + supr_new = super(Term, klass).__new__ + return supr_new(klass) + + is_local: bool + + def __init__(self, name, env, side=None, encoding=None): + # name is a str for Term, but may be something else for subclasses + self._name = name + self.env = env + self.side = side + tname = str(name) + self.is_local = tname.startswith(_LOCAL_TAG) or tname in _DEFAULT_GLOBALS + self._value = self._resolve_name() + self.encoding = encoding + + @property + def local_name(self) -> str: + return self.name.replace(_LOCAL_TAG, "") + + def __repr__(self) -> str: + return pprint_thing(self.name) + + def __call__(self, *args, **kwargs): + return self.value + + def evaluate(self, *args, **kwargs): + return self + + def _resolve_name(self): + res = self.env.resolve(self.local_name, is_local=self.is_local) + self.update(res) + + if hasattr(res, "ndim") and res.ndim > 2: + raise NotImplementedError( + "N-dimensional objects, where N > 2, are not supported with eval" + ) + return res + + def update(self, value): + """ + search order for local (i.e., @variable) variables: + + scope, key_variable + [('locals', 'local_name'), + ('globals', 'local_name'), + ('locals', 'key'), + ('globals', 'key')] + """ + key = self.name + + # if it's a variable name (otherwise a constant) + if isinstance(key, str): + self.env.swapkey(self.local_name, key, new_value=value) + + self.value = value + + @property + def is_scalar(self) -> bool: + return is_scalar(self._value) + + @property + def type(self): + try: + # potentially very slow for large, mixed dtype frames + return self._value.values.dtype + except AttributeError: + try: + # ndarray + return self._value.dtype + except AttributeError: + # scalar + return type(self._value) + + return_type = type + + @property + def raw(self) -> str: + return f"{type(self).__name__}(name={repr(self.name)}, type={self.type})" + + @property + def is_datetime(self) -> bool: + try: + t = self.type.type + except AttributeError: + t = self.type + + return issubclass(t, (datetime, np.datetime64)) + + @property + def value(self): + return self._value + + @value.setter + def value(self, new_value): + self._value = new_value + + @property + def name(self): + return self._name + + @property + def ndim(self) -> int: + return self._value.ndim + + +class Constant(Term): + def __init__(self, value, env, side=None, encoding=None): + super().__init__(value, env, side=side, encoding=encoding) + + def _resolve_name(self): + return self._name + + @property + def name(self): + return self.value + + def __repr__(self) -> str: + # in python 2 str() of float + # can truncate shorter than repr() + return repr(self.name) + + +_bool_op_map = {"not": "~", "and": "&", "or": "|"} + + +class Op: + """ + Hold an operator of arbitrary arity. + """ + + op: str + + def __init__(self, op: str, operands, *args, **kwargs): + self.op = _bool_op_map.get(op, op) + self.operands = operands + self.encoding = kwargs.get("encoding", None) + + def __iter__(self): + return iter(self.operands) + + def __repr__(self) -> str: + """ + Print a generic n-ary operator and its operands using infix notation. + """ + # recurse over the operands + parened = (f"({pprint_thing(opr)})" for opr in self.operands) + return pprint_thing(f" {self.op} ".join(parened)) + + @property + def return_type(self): + # clobber types to bool if the op is a boolean operator + if self.op in (_cmp_ops_syms + _bool_ops_syms): + return np.bool_ + return result_type_many(*(term.type for term in com.flatten(self))) + + @property + def has_invalid_return_type(self) -> bool: + types = self.operand_types + obj_dtype_set = frozenset([np.dtype("object")]) + return self.return_type == object and types - obj_dtype_set + + @property + def operand_types(self): + return frozenset(term.type for term in com.flatten(self)) + + @property + def is_scalar(self) -> bool: + return all(operand.is_scalar for operand in self.operands) + + @property + def is_datetime(self) -> bool: + try: + t = self.return_type.type + except AttributeError: + t = self.return_type + + return issubclass(t, (datetime, np.datetime64)) + + +def _in(x, y): + """Compute the vectorized membership of ``x in y`` if possible, otherwise + use Python. + """ + try: + return x.isin(y) + except AttributeError: + if is_list_like(x): + try: + return y.isin(x) + except AttributeError: + pass + return x in y + + +def _not_in(x, y): + """Compute the vectorized membership of ``x not in y`` if possible, + otherwise use Python. + """ + try: + return ~x.isin(y) + except AttributeError: + if is_list_like(x): + try: + return ~y.isin(x) + except AttributeError: + pass + return x not in y + + +_cmp_ops_syms = (">", "<", ">=", "<=", "==", "!=", "in", "not in") +_cmp_ops_funcs = ( + operator.gt, + operator.lt, + operator.ge, + operator.le, + operator.eq, + operator.ne, + _in, + _not_in, +) +_cmp_ops_dict = dict(zip(_cmp_ops_syms, _cmp_ops_funcs)) + +_bool_ops_syms = ("&", "|", "and", "or") +_bool_ops_funcs = (operator.and_, operator.or_, operator.and_, operator.or_) +_bool_ops_dict = dict(zip(_bool_ops_syms, _bool_ops_funcs)) + +_arith_ops_syms = ("+", "-", "*", "/", "**", "//", "%") +_arith_ops_funcs = ( + operator.add, + operator.sub, + operator.mul, + operator.truediv, + operator.pow, + operator.floordiv, + operator.mod, +) +_arith_ops_dict = dict(zip(_arith_ops_syms, _arith_ops_funcs)) + +_special_case_arith_ops_syms = ("**", "//", "%") +_special_case_arith_ops_funcs = (operator.pow, operator.floordiv, operator.mod) +_special_case_arith_ops_dict = dict( + zip(_special_case_arith_ops_syms, _special_case_arith_ops_funcs) +) + +_binary_ops_dict = {} + +for d in (_cmp_ops_dict, _bool_ops_dict, _arith_ops_dict): + _binary_ops_dict.update(d) + + +def _cast_inplace(terms, acceptable_dtypes, dtype): + """ + Cast an expression inplace. + + Parameters + ---------- + terms : Op + The expression that should cast. + acceptable_dtypes : list of acceptable numpy.dtype + Will not cast if term's dtype in this list. + dtype : str or numpy.dtype + The dtype to cast to. + """ + dt = np.dtype(dtype) + for term in terms: + if term.type in acceptable_dtypes: + continue + + try: + new_value = term.value.astype(dt) + except AttributeError: + new_value = dt.type(term.value) + term.update(new_value) + + +def is_term(obj) -> bool: + return isinstance(obj, Term) + + +class BinOp(Op): + """ + Hold a binary operator and its operands. + + Parameters + ---------- + op : str + left : Term or Op + right : Term or Op + """ + + def __init__(self, op: str, lhs, rhs, **kwargs): + super().__init__(op, (lhs, rhs)) + self.lhs = lhs + self.rhs = rhs + + self._disallow_scalar_only_bool_ops() + + self.convert_values() + + try: + self.func = _binary_ops_dict[op] + except KeyError: + # has to be made a list for python3 + keys = list(_binary_ops_dict.keys()) + raise ValueError( + f"Invalid binary operator {repr(op)}, valid operators are {keys}" + ) + + def __call__(self, env): + """ + Recursively evaluate an expression in Python space. + + Parameters + ---------- + env : Scope + + Returns + ------- + object + The result of an evaluated expression. + """ + + # recurse over the left/right nodes + left = self.lhs(env) + right = self.rhs(env) + + return self.func(left, right) + + def evaluate(self, env, engine: str, parser, term_type, eval_in_python): + """ + Evaluate a binary operation *before* being passed to the engine. + + Parameters + ---------- + env : Scope + engine : str + parser : str + term_type : type + eval_in_python : list + + Returns + ------- + term_type + The "pre-evaluated" expression as an instance of ``term_type`` + """ + if engine == "python": + res = self(env) + else: + # recurse over the left/right nodes + left = self.lhs.evaluate( + env, + engine=engine, + parser=parser, + term_type=term_type, + eval_in_python=eval_in_python, + ) + right = self.rhs.evaluate( + env, + engine=engine, + parser=parser, + term_type=term_type, + eval_in_python=eval_in_python, + ) + + # base cases + if self.op in eval_in_python: + res = self.func(left.value, right.value) + else: + from pandas.core.computation.eval import eval + + res = eval(self, local_dict=env, engine=engine, parser=parser) + + name = env.add_tmp(res) + return term_type(name, env=env) + + def convert_values(self): + """Convert datetimes to a comparable value in an expression. + """ + + def stringify(value): + if self.encoding is not None: + encoder = partial(pprint_thing_encoded, encoding=self.encoding) + else: + encoder = pprint_thing + return encoder(value) + + lhs, rhs = self.lhs, self.rhs + + if is_term(lhs) and lhs.is_datetime and is_term(rhs) and rhs.is_scalar: + v = rhs.value + if isinstance(v, (int, float)): + v = stringify(v) + v = Timestamp(_ensure_decoded(v)) + if v.tz is not None: + v = v.tz_convert("UTC") + self.rhs.update(v) + + if is_term(rhs) and rhs.is_datetime and is_term(lhs) and lhs.is_scalar: + v = lhs.value + if isinstance(v, (int, float)): + v = stringify(v) + v = Timestamp(_ensure_decoded(v)) + if v.tz is not None: + v = v.tz_convert("UTC") + self.lhs.update(v) + + def _disallow_scalar_only_bool_ops(self): + if ( + (self.lhs.is_scalar or self.rhs.is_scalar) + and self.op in _bool_ops_dict + and ( + not ( + issubclass(self.rhs.return_type, (bool, np.bool_)) + and issubclass(self.lhs.return_type, (bool, np.bool_)) + ) + ) + ): + raise NotImplementedError("cannot evaluate scalar only bool ops") + + +def isnumeric(dtype) -> bool: + return issubclass(np.dtype(dtype).type, np.number) + + +class Div(BinOp): + """ + Div operator to special case casting. + + Parameters + ---------- + lhs, rhs : Term or Op + The Terms or Ops in the ``/`` expression. + """ + + def __init__(self, lhs, rhs, **kwargs): + super().__init__("/", lhs, rhs, **kwargs) + + if not isnumeric(lhs.return_type) or not isnumeric(rhs.return_type): + raise TypeError( + f"unsupported operand type(s) for {self.op}: " + f"'{lhs.return_type}' and '{rhs.return_type}'" + ) + + # do not upcast float32s to float64 un-necessarily + acceptable_dtypes = [np.float32, np.float_] + _cast_inplace(com.flatten(self), acceptable_dtypes, np.float_) + + +_unary_ops_syms = ("+", "-", "~", "not") +_unary_ops_funcs = (operator.pos, operator.neg, operator.invert, operator.invert) +_unary_ops_dict = dict(zip(_unary_ops_syms, _unary_ops_funcs)) + + +class UnaryOp(Op): + """ + Hold a unary operator and its operands. + + Parameters + ---------- + op : str + The token used to represent the operator. + operand : Term or Op + The Term or Op operand to the operator. + + Raises + ------ + ValueError + * If no function associated with the passed operator token is found. + """ + + def __init__(self, op: str, operand): + super().__init__(op, (operand,)) + self.operand = operand + + try: + self.func = _unary_ops_dict[op] + except KeyError: + raise ValueError( + f"Invalid unary operator {repr(op)}, " + f"valid operators are {_unary_ops_syms}" + ) + + def __call__(self, env): + operand = self.operand(env) + return self.func(operand) + + def __repr__(self) -> str: + return pprint_thing(f"{self.op}({self.operand})") + + @property + def return_type(self) -> np.dtype: + operand = self.operand + if operand.return_type == np.dtype("bool"): + return np.dtype("bool") + if isinstance(operand, Op) and ( + operand.op in _cmp_ops_dict or operand.op in _bool_ops_dict + ): + return np.dtype("bool") + return np.dtype("int") + + +class MathCall(Op): + def __init__(self, func, args): + super().__init__(func.name, args) + self.func = func + + def __call__(self, env): + operands = [op(env) for op in self.operands] + with np.errstate(all="ignore"): + return self.func.func(*operands) + + def __repr__(self) -> str: + operands = map(str, self.operands) + return pprint_thing(f"{self.op}({','.join(operands)})") + + +class FuncNode: + def __init__(self, name: str): + from pandas.core.computation.check import _NUMEXPR_INSTALLED, _NUMEXPR_VERSION + + if name not in _mathops or ( + _NUMEXPR_INSTALLED + and _NUMEXPR_VERSION < LooseVersion("2.6.9") + and name in ("floor", "ceil") + ): + raise ValueError(f'"{name}" is not a supported function') + + self.name = name + self.func = getattr(np, name) + + def __call__(self, *args): + return MathCall(self, args) diff --git a/pandas/core/computation/parsing.py b/pandas/core/computation/parsing.py new file mode 100644 index 00000000..ce213c85 --- /dev/null +++ b/pandas/core/computation/parsing.py @@ -0,0 +1,190 @@ +""":func:`~pandas.eval` source string parsing functions +""" + +from io import StringIO +from keyword import iskeyword +import token +import tokenize +from typing import Iterator, Tuple + +# A token value Python's tokenizer probably will never use. +BACKTICK_QUOTED_STRING = 100 + + +def create_valid_python_identifier(name: str) -> str: + """ + Create valid Python identifiers from any string. + + Check if name contains any special characters. If it contains any + special characters, the special characters will be replaced by + a special string and a prefix is added. + + Raises + ------ + SyntaxError + If the returned name is not a Python valid identifier, raise an exception. + This can happen if there is a hashtag in the name, as the tokenizer will + than terminate and not find the backtick. + But also for characters that fall out of the range of (U+0001..U+007F). + """ + if name.isidentifier() and not iskeyword(name): + return name + + # Create a dict with the special characters and their replacement string. + # EXACT_TOKEN_TYPES contains these special characters + # toke.tok_name contains a readable description of the replacement string. + special_characters_replacements = { + char: f"_{token.tok_name[tokval]}_" + # The ignore here is because of a bug in mypy that is resolved in 0.740 + for char, tokval in tokenize.EXACT_TOKEN_TYPES.items() # type: ignore + } + special_characters_replacements.update( + { + " ": "_", + "?": "_QUESTIONMARK_", + "!": "_EXCLAMATIONMARK_", + "$": "_DOLLARSIGN_", + "€": "_EUROSIGN_", + # Including quotes works, but there are exceptions. + "'": "_SINGLEQUOTE_", + '"': "_DOUBLEQUOTE_", + # Currently not possible. Terminates parser and won't find backtick. + # "#": "_HASH_", + } + ) + + name = "".join(special_characters_replacements.get(char, char) for char in name) + name = "BACKTICK_QUOTED_STRING_" + name + + if not name.isidentifier(): + raise SyntaxError(f"Could not convert '{name}' to a valid Python identifier.") + + return name + + +def clean_backtick_quoted_toks(tok: Tuple[int, str]) -> Tuple[int, str]: + """ + Clean up a column name if surrounded by backticks. + + Backtick quoted string are indicated by a certain tokval value. If a string + is a backtick quoted token it will processed by + :func:`_create_valid_python_identifier` so that the parser can find this + string when the query is executed. + In this case the tok will get the NAME tokval. + + Parameters + ---------- + tok : tuple of int, str + ints correspond to the all caps constants in the tokenize module + + Returns + ------- + tok : Tuple[int, str] + Either the input or token or the replacement values + """ + toknum, tokval = tok + if toknum == BACKTICK_QUOTED_STRING: + return tokenize.NAME, create_valid_python_identifier(tokval) + return toknum, tokval + + +def clean_column_name(name: str) -> str: + """ + Function to emulate the cleaning of a backtick quoted name. + + The purpose for this function is to see what happens to the name of + identifier if it goes to the process of being parsed a Python code + inside a backtick quoted string and than being cleaned + (removed of any special characters). + + Parameters + ---------- + name : str + Name to be cleaned. + + Returns + ------- + name : str + Returns the name after tokenizing and cleaning. + + Notes + ----- + For some cases, a name cannot be converted to a valid Python identifier. + In that case :func:`tokenize_string` raises a SyntaxError. + In that case, we just return the name unmodified. + + If this name was used in the query string (this makes the query call impossible) + an error will be raised by :func:`tokenize_backtick_quoted_string` instead, + which is not catched and propogates to the user level. + """ + try: + tokenized = tokenize_string(f"`{name}`") + tokval = next(tokenized)[1] + return create_valid_python_identifier(tokval) + except SyntaxError: + return name + + +def tokenize_backtick_quoted_string( + token_generator: Iterator[tokenize.TokenInfo], source: str, string_start: int +) -> Tuple[int, str]: + """ + Creates a token from a backtick quoted string. + + Moves the token_generator forwards till right after the next backtick. + + Parameters + ---------- + token_generator : Iterator[tokenize.TokenInfo] + The generator that yields the tokens of the source string (Tuple[int, str]). + The generator is at the first token after the backtick (`) + + source : str + The Python source code string. + + string_start : int + This is the start of backtick quoted string inside the source string. + + Returns + ------- + tok: Tuple[int, str] + The token that represents the backtick quoted string. + The integer is equal to BACKTICK_QUOTED_STRING (100). + """ + for _, tokval, start, _, _ in token_generator: + if tokval == "`": + string_end = start[1] + break + + return BACKTICK_QUOTED_STRING, source[string_start:string_end] + + +def tokenize_string(source: str) -> Iterator[Tuple[int, str]]: + """ + Tokenize a Python source code string. + + Parameters + ---------- + source : str + The Python source code string. + + Returns + ------- + tok_generator : Iterator[Tuple[int, str]] + An iterator yielding all tokens with only toknum and tokval (Tuple[ing, str]). + """ + line_reader = StringIO(source).readline + token_generator = tokenize.generate_tokens(line_reader) + + # Loop over all tokens till a backtick (`) is found. + # Then, take all tokens till the next backtick to form a backtick quoted string + for toknum, tokval, start, _, _ in token_generator: + if tokval == "`": + try: + yield tokenize_backtick_quoted_string( + token_generator, source, string_start=start[1] + 1 + ) + except Exception: + raise SyntaxError(f"Failed to parse backticks in '{source}'.") + else: + yield toknum, tokval diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py new file mode 100644 index 00000000..be652ca0 --- /dev/null +++ b/pandas/core/computation/pytables.py @@ -0,0 +1,624 @@ +""" manage PyTables query interface via Expressions """ + +import ast +from functools import partial +from typing import Any, Dict, Optional, Tuple + +import numpy as np + +from pandas._libs.tslibs import Timedelta, Timestamp +from pandas.compat.chainmap import DeepChainMap + +from pandas.core.dtypes.common import is_list_like + +import pandas as pd +import pandas.core.common as com +from pandas.core.computation import expr, ops, scope as _scope +from pandas.core.computation.common import _ensure_decoded +from pandas.core.computation.expr import BaseExprVisitor +from pandas.core.computation.ops import UndefinedVariableError, is_term + +from pandas.io.formats.printing import pprint_thing, pprint_thing_encoded + + +class PyTablesScope(_scope.Scope): + __slots__ = ("queryables",) + + queryables: Dict[str, Any] + + def __init__( + self, + level: int, + global_dict=None, + local_dict=None, + queryables: Optional[Dict[str, Any]] = None, + ): + super().__init__(level + 1, global_dict=global_dict, local_dict=local_dict) + self.queryables = queryables or dict() + + +class Term(ops.Term): + env: PyTablesScope + + def __new__(cls, name, env, side=None, encoding=None): + klass = Constant if not isinstance(name, str) else cls + return object.__new__(klass) + + def __init__(self, name, env: PyTablesScope, side=None, encoding=None): + super().__init__(name, env, side=side, encoding=encoding) + + def _resolve_name(self): + # must be a queryables + if self.side == "left": + # Note: The behavior of __new__ ensures that self.name is a str here + if self.name not in self.env.queryables: + raise NameError(f"name {repr(self.name)} is not defined") + return self.name + + # resolve the rhs (and allow it to be None) + try: + return self.env.resolve(self.name, is_local=False) + except UndefinedVariableError: + return self.name + + # read-only property overwriting read/write property + @property # type: ignore + def value(self): + return self._value + + +class Constant(Term): + def __init__(self, value, env: PyTablesScope, side=None, encoding=None): + assert isinstance(env, PyTablesScope), type(env) + super().__init__(value, env, side=side, encoding=encoding) + + def _resolve_name(self): + return self._name + + +class BinOp(ops.BinOp): + + _max_selectors = 31 + + op: str + queryables: Dict[str, Any] + + def __init__(self, op: str, lhs, rhs, queryables: Dict[str, Any], encoding): + super().__init__(op, lhs, rhs) + self.queryables = queryables + self.encoding = encoding + self.condition = None + + def _disallow_scalar_only_bool_ops(self): + pass + + def prune(self, klass): + def pr(left, right): + """ create and return a new specialized BinOp from myself """ + + if left is None: + return right + elif right is None: + return left + + k = klass + if isinstance(left, ConditionBinOp): + if isinstance(right, ConditionBinOp): + k = JointConditionBinOp + elif isinstance(left, k): + return left + elif isinstance(right, k): + return right + + elif isinstance(left, FilterBinOp): + if isinstance(right, FilterBinOp): + k = JointFilterBinOp + elif isinstance(left, k): + return left + elif isinstance(right, k): + return right + + return k( + self.op, left, right, queryables=self.queryables, encoding=self.encoding + ).evaluate() + + left, right = self.lhs, self.rhs + + if is_term(left) and is_term(right): + res = pr(left.value, right.value) + elif not is_term(left) and is_term(right): + res = pr(left.prune(klass), right.value) + elif is_term(left) and not is_term(right): + res = pr(left.value, right.prune(klass)) + elif not (is_term(left) or is_term(right)): + res = pr(left.prune(klass), right.prune(klass)) + + return res + + def conform(self, rhs): + """ inplace conform rhs """ + if not is_list_like(rhs): + rhs = [rhs] + if isinstance(rhs, np.ndarray): + rhs = rhs.ravel() + return rhs + + @property + def is_valid(self) -> bool: + """ return True if this is a valid field """ + return self.lhs in self.queryables + + @property + def is_in_table(self) -> bool: + """ return True if this is a valid column name for generation (e.g. an + actual column in the table) """ + return self.queryables.get(self.lhs) is not None + + @property + def kind(self): + """ the kind of my field """ + return getattr(self.queryables.get(self.lhs), "kind", None) + + @property + def meta(self): + """ the meta of my field """ + return getattr(self.queryables.get(self.lhs), "meta", None) + + @property + def metadata(self): + """ the metadata of my field """ + return getattr(self.queryables.get(self.lhs), "metadata", None) + + def generate(self, v) -> str: + """ create and return the op string for this TermValue """ + val = v.tostring(self.encoding) + return f"({self.lhs} {self.op} {val})" + + def convert_value(self, v) -> "TermValue": + """ convert the expression that is in the term to something that is + accepted by pytables """ + + def stringify(value): + if self.encoding is not None: + encoder = partial(pprint_thing_encoded, encoding=self.encoding) + else: + encoder = pprint_thing + return encoder(value) + + kind = _ensure_decoded(self.kind) + meta = _ensure_decoded(self.meta) + if kind == "datetime64" or kind == "datetime": + if isinstance(v, (int, float)): + v = stringify(v) + v = _ensure_decoded(v) + v = Timestamp(v) + if v.tz is not None: + v = v.tz_convert("UTC") + return TermValue(v, v.value, kind) + elif kind == "timedelta64" or kind == "timedelta": + v = Timedelta(v, unit="s").value + return TermValue(int(v), v, kind) + elif meta == "category": + metadata = com.values_from_object(self.metadata) + result = metadata.searchsorted(v, side="left") + + # result returns 0 if v is first element or if v is not in metadata + # check that metadata contains v + if not result and v not in metadata: + result = -1 + return TermValue(result, result, "integer") + elif kind == "integer": + v = int(float(v)) + return TermValue(v, v, kind) + elif kind == "float": + v = float(v) + return TermValue(v, v, kind) + elif kind == "bool": + if isinstance(v, str): + v = not v.strip().lower() in [ + "false", + "f", + "no", + "n", + "none", + "0", + "[]", + "{}", + "", + ] + else: + v = bool(v) + return TermValue(v, v, kind) + elif isinstance(v, str): + # string quoting + return TermValue(v, stringify(v), "string") + else: + raise TypeError(f"Cannot compare {v} of type {type(v)} to {kind} column") + + def convert_values(self): + pass + + +class FilterBinOp(BinOp): + filter: Optional[Tuple[Any, Any, pd.Index]] = None + + def __repr__(self) -> str: + if self.filter is None: + return "Filter: Not Initialized" + return pprint_thing(f"[Filter : [{self.filter[0]}] -> [{self.filter[1]}]") + + def invert(self): + """ invert the filter """ + if self.filter is not None: + f = list(self.filter) + f[1] = self.generate_filter_op(invert=True) + self.filter = tuple(f) + return self + + def format(self): + """ return the actual filter format """ + return [self.filter] + + def evaluate(self): + + if not self.is_valid: + raise ValueError(f"query term is not valid [{self}]") + + rhs = self.conform(self.rhs) + values = list(rhs) + + if self.is_in_table: + + # if too many values to create the expression, use a filter instead + if self.op in ["==", "!="] and len(values) > self._max_selectors: + + filter_op = self.generate_filter_op() + self.filter = (self.lhs, filter_op, pd.Index(values)) + + return self + return None + + # equality conditions + if self.op in ["==", "!="]: + + filter_op = self.generate_filter_op() + self.filter = (self.lhs, filter_op, pd.Index(values)) + + else: + raise TypeError( + f"passing a filterable condition to a non-table indexer [{self}]" + ) + + return self + + def generate_filter_op(self, invert: bool = False): + if (self.op == "!=" and not invert) or (self.op == "==" and invert): + return lambda axis, vals: ~axis.isin(vals) + else: + return lambda axis, vals: axis.isin(vals) + + +class JointFilterBinOp(FilterBinOp): + def format(self): + raise NotImplementedError("unable to collapse Joint Filters") + + def evaluate(self): + return self + + +class ConditionBinOp(BinOp): + def __repr__(self) -> str: + return pprint_thing(f"[Condition : [{self.condition}]]") + + def invert(self): + """ invert the condition """ + # if self.condition is not None: + # self.condition = "~(%s)" % self.condition + # return self + raise NotImplementedError( + "cannot use an invert condition when passing to numexpr" + ) + + def format(self): + """ return the actual ne format """ + return self.condition + + def evaluate(self): + + if not self.is_valid: + raise ValueError(f"query term is not valid [{self}]") + + # convert values if we are in the table + if not self.is_in_table: + return None + + rhs = self.conform(self.rhs) + values = [self.convert_value(v) for v in rhs] + + # equality conditions + if self.op in ["==", "!="]: + + # too many values to create the expression? + if len(values) <= self._max_selectors: + vs = [self.generate(v) for v in values] + self.condition = f"({' | '.join(vs)})" + + # use a filter after reading + else: + return None + else: + self.condition = self.generate(values[0]) + + return self + + +class JointConditionBinOp(ConditionBinOp): + def evaluate(self): + self.condition = f"({self.lhs.condition} {self.op} {self.rhs.condition})" + return self + + +class UnaryOp(ops.UnaryOp): + def prune(self, klass): + + if self.op != "~": + raise NotImplementedError("UnaryOp only support invert type ops") + + operand = self.operand + operand = operand.prune(klass) + + if operand is not None: + if issubclass(klass, ConditionBinOp): + if operand.condition is not None: + return operand.invert() + elif issubclass(klass, FilterBinOp): + if operand.filter is not None: + return operand.invert() + + return None + + +class PyTablesExprVisitor(BaseExprVisitor): + const_type = Constant + term_type = Term + + def __init__(self, env, engine, parser, **kwargs): + super().__init__(env, engine, parser) + for bin_op in self.binary_ops: + bin_node = self.binary_op_nodes_map[bin_op] + setattr( + self, + f"visit_{bin_node}", + lambda node, bin_op=bin_op: partial(BinOp, bin_op, **kwargs), + ) + + def visit_UnaryOp(self, node, **kwargs): + if isinstance(node.op, (ast.Not, ast.Invert)): + return UnaryOp("~", self.visit(node.operand)) + elif isinstance(node.op, ast.USub): + return self.const_type(-self.visit(node.operand).value, self.env) + elif isinstance(node.op, ast.UAdd): + raise NotImplementedError("Unary addition not supported") + + def visit_Index(self, node, **kwargs): + return self.visit(node.value).value + + def visit_Assign(self, node, **kwargs): + cmpr = ast.Compare( + ops=[ast.Eq()], left=node.targets[0], comparators=[node.value] + ) + return self.visit(cmpr) + + def visit_Subscript(self, node, **kwargs): + # only allow simple subscripts + + value = self.visit(node.value) + slobj = self.visit(node.slice) + try: + value = value.value + except AttributeError: + pass + + try: + return self.const_type(value[slobj], self.env) + except TypeError: + raise ValueError(f"cannot subscript {repr(value)} with {repr(slobj)}") + + def visit_Attribute(self, node, **kwargs): + attr = node.attr + value = node.value + + ctx = type(node.ctx) + if ctx == ast.Load: + # resolve the value + resolved = self.visit(value) + + # try to get the value to see if we are another expression + try: + resolved = resolved.value + except (AttributeError): + pass + + try: + return self.term_type(getattr(resolved, attr), self.env) + except AttributeError: + + # something like datetime.datetime where scope is overridden + if isinstance(value, ast.Name) and value.id == attr: + return resolved + + raise ValueError(f"Invalid Attribute context {ctx.__name__}") + + def translate_In(self, op): + return ast.Eq() if isinstance(op, ast.In) else op + + def _rewrite_membership_op(self, node, left, right): + return self.visit(node.op), node.op, left, right + + +def _validate_where(w): + """ + Validate that the where statement is of the right type. + + The type may either be String, Expr, or list-like of Exprs. + + Parameters + ---------- + w : String term expression, Expr, or list-like of Exprs. + + Returns + ------- + where : The original where clause if the check was successful. + + Raises + ------ + TypeError : An invalid data type was passed in for w (e.g. dict). + """ + + if not (isinstance(w, (PyTablesExpr, str)) or is_list_like(w)): + raise TypeError( + "where must be passed as a string, PyTablesExpr, " + "or list-like of PyTablesExpr" + ) + + return w + + +class PyTablesExpr(expr.Expr): + """ + Hold a pytables-like expression, comprised of possibly multiple 'terms'. + + Parameters + ---------- + where : string term expression, PyTablesExpr, or list-like of PyTablesExprs + queryables : a "kinds" map (dict of column name -> kind), or None if column + is non-indexable + encoding : an encoding that will encode the query terms + + Returns + ------- + a PyTablesExpr object + + Examples + -------- + + 'index>=date' + "columns=['A', 'D']" + 'columns=A' + 'columns==A' + "~(columns=['A','B'])" + 'index>df.index[3] & string="bar"' + '(index>df.index[3] & index<=df.index[6]) | string="bar"' + "ts>=Timestamp('2012-02-01')" + "major_axis>=20130101" + """ + + _visitor: Optional[PyTablesExprVisitor] + env: PyTablesScope + + def __init__( + self, + where, + queryables: Optional[Dict[str, Any]] = None, + encoding=None, + scope_level: int = 0, + ): + + where = _validate_where(where) + + self.encoding = encoding + self.condition = None + self.filter = None + self.terms = None + self._visitor = None + + # capture the environment if needed + local_dict: DeepChainMap[Any, Any] = DeepChainMap() + + if isinstance(where, PyTablesExpr): + local_dict = where.env.scope + _where = where.expr + + elif isinstance(where, (list, tuple)): + where = list(where) + for idx, w in enumerate(where): + if isinstance(w, PyTablesExpr): + local_dict = w.env.scope + else: + w = _validate_where(w) + where[idx] = w + _where = " & ".join((f"({w})" for w in com.flatten(where))) + else: + _where = where + + self.expr = _where + self.env = PyTablesScope(scope_level + 1, local_dict=local_dict) + + if queryables is not None and isinstance(self.expr, str): + self.env.queryables.update(queryables) + self._visitor = PyTablesExprVisitor( + self.env, + queryables=queryables, + parser="pytables", + engine="pytables", + encoding=encoding, + ) + self.terms = self.parse() + + def __repr__(self) -> str: + if self.terms is not None: + return pprint_thing(self.terms) + return pprint_thing(self.expr) + + def evaluate(self): + """ create and return the numexpr condition and filter """ + + try: + self.condition = self.terms.prune(ConditionBinOp) + except AttributeError: + raise ValueError( + f"cannot process expression [{self.expr}], [{self}] " + "is not a valid condition" + ) + try: + self.filter = self.terms.prune(FilterBinOp) + except AttributeError: + raise ValueError( + f"cannot process expression [{self.expr}], [{self}] " + "is not a valid filter" + ) + + return self.condition, self.filter + + +class TermValue: + """ hold a term value the we use to construct a condition/filter """ + + def __init__(self, value, converted, kind: str): + assert isinstance(kind, str), kind + self.value = value + self.converted = converted + self.kind = kind + + def tostring(self, encoding) -> str: + """ quote the string if not encoded + else encode and return """ + if self.kind == "string": + if encoding is not None: + return str(self.converted) + return f'"{self.converted}"' + elif self.kind == "float": + # python 2 str(float) is not always + # round-trippable so use repr() + return repr(self.converted) + return str(self.converted) + + +def maybe_expression(s) -> bool: + """ loose checking if s is a pytables-acceptable expression """ + if not isinstance(s, str): + return False + ops = PyTablesExprVisitor.binary_ops + PyTablesExprVisitor.unary_ops + ("=",) + + # make sure we have an op at least + return any(op in s for op in ops) diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py new file mode 100644 index 00000000..70dcf4de --- /dev/null +++ b/pandas/core/computation/scope.py @@ -0,0 +1,314 @@ +""" +Module for scope operations +""" + +import datetime +import inspect +from io import StringIO +import itertools +import pprint +import struct +import sys +from typing import List + +import numpy as np + +from pandas._libs.tslibs import Timestamp +from pandas.compat.chainmap import DeepChainMap + + +def ensure_scope( + level: int, global_dict=None, local_dict=None, resolvers=(), target=None, **kwargs +) -> "Scope": + """Ensure that we are grabbing the correct scope.""" + return Scope( + level + 1, + global_dict=global_dict, + local_dict=local_dict, + resolvers=resolvers, + target=target, + ) + + +def _replacer(x) -> str: + """Replace a number with its hexadecimal representation. Used to tag + temporary variables with their calling scope's id. + """ + # get the hex repr of the binary char and remove 0x and pad by pad_size + # zeros + try: + hexin = ord(x) + except TypeError: + # bytes literals masquerade as ints when iterating in py3 + hexin = x + + return hex(hexin) + + +def _raw_hex_id(obj) -> str: + """Return the padded hexadecimal id of ``obj``.""" + # interpret as a pointer since that's what really what id returns + packed = struct.pack("@P", id(obj)) + return "".join(_replacer(x) for x in packed) + + +_DEFAULT_GLOBALS = { + "Timestamp": Timestamp, + "datetime": datetime.datetime, + "True": True, + "False": False, + "list": list, + "tuple": tuple, + "inf": np.inf, + "Inf": np.inf, +} + + +def _get_pretty_string(obj) -> str: + """ + Return a prettier version of obj. + + Parameters + ---------- + obj : object + Object to pretty print + + Returns + ------- + str + Pretty print object repr + """ + sio = StringIO() + pprint.pprint(obj, stream=sio) + return sio.getvalue() + + +class Scope: + """ + Object to hold scope, with a few bells to deal with some custom syntax + and contexts added by pandas. + + Parameters + ---------- + level : int + global_dict : dict or None, optional, default None + local_dict : dict or Scope or None, optional, default None + resolvers : list-like or None, optional, default None + target : object + + Attributes + ---------- + level : int + scope : DeepChainMap + target : object + temps : dict + """ + + __slots__ = ["level", "scope", "target", "resolvers", "temps"] + + def __init__( + self, level, global_dict=None, local_dict=None, resolvers=(), target=None + ): + self.level = level + 1 + + # shallow copy because we don't want to keep filling this up with what + # was there before if there are multiple calls to Scope/_ensure_scope + self.scope = DeepChainMap(_DEFAULT_GLOBALS.copy()) + self.target = target + + if isinstance(local_dict, Scope): + self.scope.update(local_dict.scope) + if local_dict.target is not None: + self.target = local_dict.target + self._update(local_dict.level) + + frame = sys._getframe(self.level) + + try: + # shallow copy here because we don't want to replace what's in + # scope when we align terms (alignment accesses the underlying + # numpy array of pandas objects) + self.scope = self.scope.new_child((global_dict or frame.f_globals).copy()) + if not isinstance(local_dict, Scope): + self.scope = self.scope.new_child((local_dict or frame.f_locals).copy()) + finally: + del frame + + # assumes that resolvers are going from outermost scope to inner + if isinstance(local_dict, Scope): + resolvers += tuple(local_dict.resolvers.maps) + self.resolvers = DeepChainMap(*resolvers) + self.temps = {} + + def __repr__(self) -> str: + scope_keys = _get_pretty_string(list(self.scope.keys())) + res_keys = _get_pretty_string(list(self.resolvers.keys())) + unicode_str = f"{type(self).__name__}(scope={scope_keys}, resolvers={res_keys})" + return unicode_str + + @property + def has_resolvers(self) -> bool: + """ + Return whether we have any extra scope. + + For example, DataFrames pass Their columns as resolvers during calls to + ``DataFrame.eval()`` and ``DataFrame.query()``. + + Returns + ------- + hr : bool + """ + return bool(len(self.resolvers)) + + def resolve(self, key: str, is_local: bool): + """ + Resolve a variable name in a possibly local context. + + Parameters + ---------- + key : str + A variable name + is_local : bool + Flag indicating whether the variable is local or not (prefixed with + the '@' symbol) + + Returns + ------- + value : object + The value of a particular variable + """ + try: + # only look for locals in outer scope + if is_local: + return self.scope[key] + + # not a local variable so check in resolvers if we have them + if self.has_resolvers: + return self.resolvers[key] + + # if we're here that means that we have no locals and we also have + # no resolvers + assert not is_local and not self.has_resolvers + return self.scope[key] + except KeyError: + try: + # last ditch effort we look in temporaries + # these are created when parsing indexing expressions + # e.g., df[df > 0] + return self.temps[key] + except KeyError: + # runtime import because ops imports from scope + from pandas.core.computation.ops import UndefinedVariableError + + raise UndefinedVariableError(key, is_local) + + def swapkey(self, old_key: str, new_key: str, new_value=None): + """ + Replace a variable name, with a potentially new value. + + Parameters + ---------- + old_key : str + Current variable name to replace + new_key : str + New variable name to replace `old_key` with + new_value : object + Value to be replaced along with the possible renaming + """ + if self.has_resolvers: + maps = self.resolvers.maps + self.scope.maps + else: + maps = self.scope.maps + + maps.append(self.temps) + + for mapping in maps: + if old_key in mapping: + mapping[new_key] = new_value + return + + def _get_vars(self, stack, scopes: List[str]): + """ + Get specifically scoped variables from a list of stack frames. + + Parameters + ---------- + stack : list + A list of stack frames as returned by ``inspect.stack()`` + scopes : sequence of strings + A sequence containing valid stack frame attribute names that + evaluate to a dictionary. For example, ('locals', 'globals') + """ + variables = itertools.product(scopes, stack) + for scope, (frame, _, _, _, _, _) in variables: + try: + d = getattr(frame, "f_" + scope) + self.scope = self.scope.new_child(d) + finally: + # won't remove it, but DECREF it + # in Py3 this probably isn't necessary since frame won't be + # scope after the loop + del frame + + def _update(self, level: int): + """ + Update the current scope by going back `level` levels. + + Parameters + ---------- + level : int + """ + sl = level + 1 + + # add sl frames to the scope starting with the + # most distant and overwriting with more current + # makes sure that we can capture variable scope + stack = inspect.stack() + + try: + self._get_vars(stack[:sl], scopes=["locals"]) + finally: + del stack[:], stack + + def add_tmp(self, value) -> str: + """ + Add a temporary variable to the scope. + + Parameters + ---------- + value : object + An arbitrary object to be assigned to a temporary variable. + + Returns + ------- + str + The name of the temporary variable created. + """ + name = f"{type(value).__name__}_{self.ntemps}_{_raw_hex_id(self)}" + + # add to inner most scope + assert name not in self.temps + self.temps[name] = value + assert name in self.temps + + # only increment if the variable gets put in the scope + return name + + @property + def ntemps(self) -> int: + """The number of temporary variables in this scope""" + return len(self.temps) + + @property + def full_scope(self): + """ + Return the full scope for use with passing to engines transparently + as a mapping. + + Returns + ------- + vars : DeepChainMap + All variables in this scope. + """ + maps = [self.temps] + self.resolvers.maps + self.scope.maps + return DeepChainMap(*maps) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py new file mode 100644 index 00000000..6fb4359d --- /dev/null +++ b/pandas/core/config_init.py @@ -0,0 +1,655 @@ +""" +This module is imported from the pandas package __init__.py file +in order to ensure that the core.config options registered here will +be available as soon as the user loads the package. if register_option +is invoked inside specific modules, they will not be registered until that +module is imported, which may or may not be a problem. + +If you need to make sure options are available even before a certain +module is imported, register them here rather then in the module. + +""" +import warnings + +import pandas._config.config as cf +from pandas._config.config import ( + is_bool, + is_callable, + is_instance_factory, + is_int, + is_nonnegative_int, + is_one_of_factory, + is_text, +) + +# compute + +use_bottleneck_doc = """ +: bool + Use the bottleneck library to accelerate if it is installed, + the default is True + Valid values: False,True +""" + + +def use_bottleneck_cb(key): + from pandas.core import nanops + + nanops.set_use_bottleneck(cf.get_option(key)) + + +use_numexpr_doc = """ +: bool + Use the numexpr library to accelerate computation if it is installed, + the default is True + Valid values: False,True +""" + + +def use_numexpr_cb(key): + from pandas.core.computation import expressions + + expressions.set_use_numexpr(cf.get_option(key)) + + +with cf.config_prefix("compute"): + cf.register_option( + "use_bottleneck", + True, + use_bottleneck_doc, + validator=is_bool, + cb=use_bottleneck_cb, + ) + cf.register_option( + "use_numexpr", True, use_numexpr_doc, validator=is_bool, cb=use_numexpr_cb + ) +# +# options from the "display" namespace + +pc_precision_doc = """ +: int + Floating point output precision (number of significant digits). This is + only a suggestion +""" + +pc_colspace_doc = """ +: int + Default space for DataFrame columns. +""" + +pc_max_rows_doc = """ +: int + If max_rows is exceeded, switch to truncate view. Depending on + `large_repr`, objects are either centrally truncated or printed as + a summary view. 'None' value means unlimited. + + In case python/IPython is running in a terminal and `large_repr` + equals 'truncate' this can be set to 0 and pandas will auto-detect + the height of the terminal and print a truncated object which fits + the screen height. The IPython notebook, IPython qtconsole, or + IDLE do not run in a terminal and hence it is not possible to do + correct auto-detection. +""" + +pc_min_rows_doc = """ +: int + The numbers of rows to show in a truncated view (when `max_rows` is + exceeded). Ignored when `max_rows` is set to None or 0. When set to + None, follows the value of `max_rows`. +""" + +pc_max_cols_doc = """ +: int + If max_cols is exceeded, switch to truncate view. Depending on + `large_repr`, objects are either centrally truncated or printed as + a summary view. 'None' value means unlimited. + + In case python/IPython is running in a terminal and `large_repr` + equals 'truncate' this can be set to 0 and pandas will auto-detect + the width of the terminal and print a truncated object which fits + the screen width. The IPython notebook, IPython qtconsole, or IDLE + do not run in a terminal and hence it is not possible to do + correct auto-detection. +""" + +pc_max_categories_doc = """ +: int + This sets the maximum number of categories pandas should output when + printing out a `Categorical` or a Series of dtype "category". +""" + +pc_max_info_cols_doc = """ +: int + max_info_columns is used in DataFrame.info method to decide if + per column information will be printed. +""" + +pc_nb_repr_h_doc = """ +: boolean + When True, IPython notebook will use html representation for + pandas objects (if it is available). +""" + +pc_pprint_nest_depth = """ +: int + Controls the number of nested levels to process when pretty-printing +""" + +pc_multi_sparse_doc = """ +: boolean + "sparsify" MultiIndex display (don't display repeated + elements in outer levels within groups) +""" + +float_format_doc = """ +: callable + The callable should accept a floating point number and return + a string with the desired format of the number. This is used + in some places like SeriesFormatter. + See formats.format.EngFormatter for an example. +""" + +max_colwidth_doc = """ +: int or None + The maximum width in characters of a column in the repr of + a pandas data structure. When the column overflows, a "..." + placeholder is embedded in the output. A 'None' value means unlimited. +""" + +colheader_justify_doc = """ +: 'left'/'right' + Controls the justification of column headers. used by DataFrameFormatter. +""" + +pc_expand_repr_doc = """ +: boolean + Whether to print out the full DataFrame repr for wide DataFrames across + multiple lines, `max_columns` is still respected, but the output will + wrap-around across multiple "pages" if its width exceeds `display.width`. +""" + +pc_show_dimensions_doc = """ +: boolean or 'truncate' + Whether to print out dimensions at the end of DataFrame repr. + If 'truncate' is specified, only print out the dimensions if the + frame is truncated (e.g. not display all rows and/or columns) +""" + +pc_east_asian_width_doc = """ +: boolean + Whether to use the Unicode East Asian Width to calculate the display text + width. + Enabling this may affect to the performance (default: False) +""" + +pc_ambiguous_as_wide_doc = """ +: boolean + Whether to handle Unicode characters belong to Ambiguous as Wide (width=2) + (default: False) +""" + +pc_latex_repr_doc = """ +: boolean + Whether to produce a latex DataFrame representation for jupyter + environments that support it. + (default: False) +""" + +pc_table_schema_doc = """ +: boolean + Whether to publish a Table Schema representation for frontends + that support it. + (default: False) +""" + +pc_html_border_doc = """ +: int + A ``border=value`` attribute is inserted in the ```` tag + for the DataFrame HTML repr. +""" + +pc_html_use_mathjax_doc = """\ +: boolean + When True, Jupyter notebook will process table contents using MathJax, + rendering mathematical expressions enclosed by the dollar symbol. + (default: True) +""" + +pc_width_doc = """ +: int + Width of the display in characters. In case python/IPython is running in + a terminal this can be set to None and pandas will correctly auto-detect + the width. + Note that the IPython notebook, IPython qtconsole, or IDLE do not run in a + terminal and hence it is not possible to correctly detect the width. +""" + +pc_chop_threshold_doc = """ +: float or None + if set to a float value, all float values smaller then the given threshold + will be displayed as exactly 0 by repr and friends. +""" + +pc_max_seq_items = """ +: int or None + when pretty-printing a long sequence, no more then `max_seq_items` + will be printed. If items are omitted, they will be denoted by the + addition of "..." to the resulting string. + + If set to None, the number of items to be printed is unlimited. +""" + +pc_max_info_rows_doc = """ +: int or None + df.info() will usually show null-counts for each column. + For large frames this can be quite slow. max_info_rows and max_info_cols + limit this null check only to frames with smaller dimensions than + specified. +""" + +pc_large_repr_doc = """ +: 'truncate'/'info' + For DataFrames exceeding max_rows/max_cols, the repr (and HTML repr) can + show a truncated table (the default from 0.13), or switch to the view from + df.info() (the behaviour in earlier versions of pandas). +""" + +pc_memory_usage_doc = """ +: bool, string or None + This specifies if the memory usage of a DataFrame should be displayed when + df.info() is called. Valid values True,False,'deep' +""" + +pc_latex_escape = """ +: bool + This specifies if the to_latex method of a Dataframe uses escapes special + characters. + Valid values: False,True +""" + +pc_latex_longtable = """ +:bool + This specifies if the to_latex method of a Dataframe uses the longtable + format. + Valid values: False,True +""" + +pc_latex_multicolumn = """ +: bool + This specifies if the to_latex method of a Dataframe uses multicolumns + to pretty-print MultiIndex columns. + Valid values: False,True +""" + +pc_latex_multicolumn_format = """ +: string + This specifies the format for multicolumn headers. + Can be surrounded with '|'. + Valid values: 'l', 'c', 'r', 'p{}' +""" + +pc_latex_multirow = """ +: bool + This specifies if the to_latex method of a Dataframe uses multirows + to pretty-print MultiIndex rows. + Valid values: False,True +""" + + +def table_schema_cb(key): + from pandas.io.formats.printing import _enable_data_resource_formatter + + _enable_data_resource_formatter(cf.get_option(key)) + + +def is_terminal() -> bool: + """ + Detect if Python is running in a terminal. + + Returns True if Python is running in a terminal or False if not. + """ + try: + # error: Name 'get_ipython' is not defined + ip = get_ipython() # type: ignore + except NameError: # assume standard Python interpreter in a terminal + return True + else: + if hasattr(ip, "kernel"): # IPython as a Jupyter kernel + return False + else: # IPython in a terminal + return True + + +with cf.config_prefix("display"): + cf.register_option("precision", 6, pc_precision_doc, validator=is_nonnegative_int) + cf.register_option( + "float_format", + None, + float_format_doc, + validator=is_one_of_factory([None, is_callable]), + ) + cf.register_option("column_space", 12, validator=is_int) + cf.register_option( + "max_info_rows", + 1690785, + pc_max_info_rows_doc, + validator=is_instance_factory((int, type(None))), + ) + cf.register_option("max_rows", 60, pc_max_rows_doc, validator=is_nonnegative_int) + cf.register_option( + "min_rows", + 10, + pc_min_rows_doc, + validator=is_instance_factory([type(None), int]), + ) + cf.register_option("max_categories", 8, pc_max_categories_doc, validator=is_int) + + def _deprecate_negative_int_max_colwidth(key): + value = cf.get_option(key) + if value is not None and value < 0: + warnings.warn( + "Passing a negative integer is deprecated in version 1.0 and " + "will not be supported in future version. Instead, use None " + "to not limit the column width.", + FutureWarning, + stacklevel=4, + ) + + cf.register_option( + # FIXME: change `validator=is_nonnegative_int` + # in version 1.2 + "max_colwidth", + 50, + max_colwidth_doc, + validator=is_instance_factory([type(None), int]), + cb=_deprecate_negative_int_max_colwidth, + ) + if is_terminal(): + max_cols = 0 # automatically determine optimal number of columns + else: + max_cols = 20 # cannot determine optimal number of columns + cf.register_option( + "max_columns", max_cols, pc_max_cols_doc, validator=is_nonnegative_int + ) + cf.register_option( + "large_repr", + "truncate", + pc_large_repr_doc, + validator=is_one_of_factory(["truncate", "info"]), + ) + cf.register_option("max_info_columns", 100, pc_max_info_cols_doc, validator=is_int) + cf.register_option( + "colheader_justify", "right", colheader_justify_doc, validator=is_text + ) + cf.register_option("notebook_repr_html", True, pc_nb_repr_h_doc, validator=is_bool) + cf.register_option("pprint_nest_depth", 3, pc_pprint_nest_depth, validator=is_int) + cf.register_option("multi_sparse", True, pc_multi_sparse_doc, validator=is_bool) + cf.register_option("expand_frame_repr", True, pc_expand_repr_doc) + cf.register_option( + "show_dimensions", + "truncate", + pc_show_dimensions_doc, + validator=is_one_of_factory([True, False, "truncate"]), + ) + cf.register_option("chop_threshold", None, pc_chop_threshold_doc) + cf.register_option("max_seq_items", 100, pc_max_seq_items) + cf.register_option( + "width", 80, pc_width_doc, validator=is_instance_factory([type(None), int]) + ) + cf.register_option( + "memory_usage", + True, + pc_memory_usage_doc, + validator=is_one_of_factory([None, True, False, "deep"]), + ) + cf.register_option( + "unicode.east_asian_width", False, pc_east_asian_width_doc, validator=is_bool + ) + cf.register_option( + "unicode.ambiguous_as_wide", False, pc_east_asian_width_doc, validator=is_bool + ) + cf.register_option("latex.repr", False, pc_latex_repr_doc, validator=is_bool) + cf.register_option("latex.escape", True, pc_latex_escape, validator=is_bool) + cf.register_option("latex.longtable", False, pc_latex_longtable, validator=is_bool) + cf.register_option( + "latex.multicolumn", True, pc_latex_multicolumn, validator=is_bool + ) + cf.register_option( + "latex.multicolumn_format", "l", pc_latex_multicolumn, validator=is_text + ) + cf.register_option("latex.multirow", False, pc_latex_multirow, validator=is_bool) + cf.register_option( + "html.table_schema", + False, + pc_table_schema_doc, + validator=is_bool, + cb=table_schema_cb, + ) + cf.register_option("html.border", 1, pc_html_border_doc, validator=is_int) + cf.register_option( + "html.use_mathjax", True, pc_html_use_mathjax_doc, validator=is_bool + ) + +tc_sim_interactive_doc = """ +: boolean + Whether to simulate interactive mode for purposes of testing +""" + +with cf.config_prefix("mode"): + cf.register_option("sim_interactive", False, tc_sim_interactive_doc) + +use_inf_as_null_doc = """ +: boolean + use_inf_as_null had been deprecated and will be removed in a future + version. Use `use_inf_as_na` instead. +""" + +use_inf_as_na_doc = """ +: boolean + True means treat None, NaN, INF, -INF as NA (old way), + False means None and NaN are null, but INF, -INF are not NA + (new way). +""" + +# We don't want to start importing everything at the global context level +# or we'll hit circular deps. + + +def use_inf_as_na_cb(key): + from pandas.core.dtypes.missing import _use_inf_as_na + + _use_inf_as_na(key) + + +with cf.config_prefix("mode"): + cf.register_option("use_inf_as_na", False, use_inf_as_na_doc, cb=use_inf_as_na_cb) + cf.register_option( + "use_inf_as_null", False, use_inf_as_null_doc, cb=use_inf_as_na_cb + ) + +cf.deprecate_option( + "mode.use_inf_as_null", msg=use_inf_as_null_doc, rkey="mode.use_inf_as_na" +) + + +# user warnings +chained_assignment = """ +: string + Raise an exception, warn, or no action if trying to use chained assignment, + The default is warn +""" + +with cf.config_prefix("mode"): + cf.register_option( + "chained_assignment", + "warn", + chained_assignment, + validator=is_one_of_factory([None, "warn", "raise"]), + ) + + +# Set up the io.excel specific reader configuration. +reader_engine_doc = """ +: string + The default Excel reader engine for '{ext}' files. Available options: + auto, {others}. +""" + +_xls_options = ["xlrd"] +_xlsm_options = ["xlrd", "openpyxl"] +_xlsx_options = ["xlrd", "openpyxl"] +_ods_options = ["odf"] +_xlsb_options = ["pyxlsb"] + + +with cf.config_prefix("io.excel.xls"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="xls", others=", ".join(_xls_options)), + validator=str, + ) + +with cf.config_prefix("io.excel.xlsm"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="xlsm", others=", ".join(_xlsm_options)), + validator=str, + ) + + +with cf.config_prefix("io.excel.xlsx"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="xlsx", others=", ".join(_xlsx_options)), + validator=str, + ) + + +with cf.config_prefix("io.excel.ods"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="ods", others=", ".join(_ods_options)), + validator=str, + ) + +with cf.config_prefix("io.excel.xlsb"): + cf.register_option( + "reader", + "auto", + reader_engine_doc.format(ext="xlsb", others=", ".join(_xlsb_options)), + validator=str, + ) + +# Set up the io.excel specific writer configuration. +writer_engine_doc = """ +: string + The default Excel writer engine for '{ext}' files. Available options: + auto, {others}. +""" + +_xls_options = ["xlwt"] +_xlsm_options = ["openpyxl"] +_xlsx_options = ["openpyxl", "xlsxwriter"] + + +with cf.config_prefix("io.excel.xls"): + cf.register_option( + "writer", + "auto", + writer_engine_doc.format(ext="xls", others=", ".join(_xls_options)), + validator=str, + ) + +with cf.config_prefix("io.excel.xlsm"): + cf.register_option( + "writer", + "auto", + writer_engine_doc.format(ext="xlsm", others=", ".join(_xlsm_options)), + validator=str, + ) + + +with cf.config_prefix("io.excel.xlsx"): + cf.register_option( + "writer", + "auto", + writer_engine_doc.format(ext="xlsx", others=", ".join(_xlsx_options)), + validator=str, + ) + + +# Set up the io.parquet specific configuration. +parquet_engine_doc = """ +: string + The default parquet reader/writer engine. Available options: + 'auto', 'pyarrow', 'fastparquet', the default is 'auto' +""" + +with cf.config_prefix("io.parquet"): + cf.register_option( + "engine", + "auto", + parquet_engine_doc, + validator=is_one_of_factory(["auto", "pyarrow", "fastparquet"]), + ) + +# -------- +# Plotting +# --------- + +plotting_backend_doc = """ +: str + The plotting backend to use. The default value is "matplotlib", the + backend provided with pandas. Other backends can be specified by + prodiving the name of the module that implements the backend. +""" + + +def register_plotting_backend_cb(key): + if key == "matplotlib": + # We defer matplotlib validation, since it's the default + return + from pandas.plotting._core import _get_plot_backend + + _get_plot_backend(key) + + +with cf.config_prefix("plotting"): + cf.register_option( + "backend", + defval="matplotlib", + doc=plotting_backend_doc, + validator=register_plotting_backend_cb, + ) + + +register_converter_doc = """ +: bool or 'auto'. + Whether to register converters with matplotlib's units registry for + dates, times, datetimes, and Periods. Toggling to False will remove + the converters, restoring any converters that pandas overwrote. +""" + + +def register_converter_cb(key): + from pandas.plotting import register_matplotlib_converters + from pandas.plotting import deregister_matplotlib_converters + + if cf.get_option(key): + register_matplotlib_converters() + else: + deregister_matplotlib_converters() + + +with cf.config_prefix("plotting.matplotlib"): + cf.register_option( + "register_converters", + "auto", + register_converter_doc, + validator=is_one_of_factory(["auto", True, False]), + cb=register_converter_cb, + ) diff --git a/pandas/core/construction.py b/pandas/core/construction.py new file mode 100644 index 00000000..203ef3ec --- /dev/null +++ b/pandas/core/construction.py @@ -0,0 +1,626 @@ +""" +Constructor functions intended to be shared by pd.array, Series.__init__, +and Index.__new__. + +These should not depend on core.internals. +""" +from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast + +import numpy as np +import numpy.ma as ma + +from pandas._libs import lib +from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime +from pandas._typing import ArrayLike, Dtype + +from pandas.core.dtypes.cast import ( + construct_1d_arraylike_from_scalar, + construct_1d_ndarray_preserving_na, + construct_1d_object_array_from_listlike, + infer_dtype_from_scalar, + maybe_cast_to_datetime, + maybe_cast_to_integer_array, + maybe_castable, + maybe_convert_platform, + maybe_upcast, +) +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64_ns_dtype, + is_extension_array_dtype, + is_float_dtype, + is_integer_dtype, + is_iterator, + is_list_like, + is_object_dtype, + is_timedelta64_ns_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype, registry +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndexClass, + ABCPandasArray, + ABCSeries, +) +from pandas.core.dtypes.missing import isna + +import pandas.core.common as com + +if TYPE_CHECKING: + from pandas.core.series import Series # noqa: F401 + from pandas.core.indexes.api import Index # noqa: F401 + + +def array( + data: Sequence[object], + dtype: Optional[Union[str, np.dtype, ExtensionDtype]] = None, + copy: bool = True, +) -> ABCExtensionArray: + """ + Create an array. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + data : Sequence of objects + The scalars inside `data` should be instances of the + scalar type for `dtype`. It's expected that `data` + represents a 1-dimensional array of data. + + When `data` is an Index or Series, the underlying array + will be extracted from `data`. + + dtype : str, np.dtype, or ExtensionDtype, optional + The dtype to use for the array. This may be a NumPy + dtype or an extension type registered with pandas using + :meth:`pandas.api.extensions.register_extension_dtype`. + + If not specified, there are two possibilities: + + 1. When `data` is a :class:`Series`, :class:`Index`, or + :class:`ExtensionArray`, the `dtype` will be taken + from the data. + 2. Otherwise, pandas will attempt to infer the `dtype` + from the data. + + Note that when `data` is a NumPy array, ``data.dtype`` is + *not* used for inferring the array type. This is because + NumPy cannot represent all the types of data that can be + held in extension arrays. + + Currently, pandas will infer an extension dtype for sequences of + + ============================== ===================================== + Scalar Type Array Type + ============================== ===================================== + :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray` + :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` + :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` + :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` + :class:`int` :class:`pandas.arrays.IntegerArray` + :class:`str` :class:`pandas.arrays.StringArray` + :class:`bool` :class:`pandas.arrays.BooleanArray` + ============================== ===================================== + + For all other cases, NumPy's usual inference rules will be used. + + .. versionchanged:: 1.0.0 + + Pandas infers nullable-integer dtype for integer data, + string dtype for string data, and nullable-boolean dtype + for boolean data. + + copy : bool, default True + Whether to copy the data, even if not necessary. Depending + on the type of `data`, creating the new array may require + copying data, even if ``copy=False``. + + Returns + ------- + ExtensionArray + The newly created array. + + Raises + ------ + ValueError + When `data` is not 1-dimensional. + + See Also + -------- + numpy.array : Construct a NumPy array. + Series : Construct a pandas Series. + Index : Construct a pandas Index. + arrays.PandasArray : ExtensionArray wrapping a NumPy array. + Series.array : Extract the array stored within a Series. + + Notes + ----- + Omitting the `dtype` argument means pandas will attempt to infer the + best array type from the values in the data. As new array types are + added by pandas and 3rd party libraries, the "best" array type may + change. We recommend specifying `dtype` to ensure that + + 1. the correct array type for the data is returned + 2. the returned array type doesn't change as new extension types + are added by pandas and third-party libraries + + Additionally, if the underlying memory representation of the returned + array matters, we recommend specifying the `dtype` as a concrete object + rather than a string alias or allowing it to be inferred. For example, + a future version of pandas or a 3rd-party library may include a + dedicated ExtensionArray for string data. In this event, the following + would no longer return a :class:`arrays.PandasArray` backed by a NumPy + array. + + >>> pd.array(['a', 'b'], dtype=str) + + ['a', 'b'] + Length: 2, dtype: str32 + + This would instead return the new ExtensionArray dedicated for string + data. If you really need the new array to be backed by a NumPy array, + specify that in the dtype. + + >>> pd.array(['a', 'b'], dtype=np.dtype(" + ['a', 'b'] + Length: 2, dtype: str32 + + Finally, Pandas has arrays that mostly overlap with NumPy + + * :class:`arrays.DatetimeArray` + * :class:`arrays.TimedeltaArray` + + When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is + passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray`` + rather than a ``PandasArray``. This is for symmetry with the case of + timezone-aware data, which NumPy does not natively support. + + >>> pd.array(['2015', '2016'], dtype='datetime64[ns]') + + ['2015-01-01 00:00:00', '2016-01-01 00:00:00'] + Length: 2, dtype: datetime64[ns] + + >>> pd.array(["1H", "2H"], dtype='timedelta64[ns]') + + ['01:00:00', '02:00:00'] + Length: 2, dtype: timedelta64[ns] + + Examples + -------- + If a dtype is not specified, pandas will infer the best dtype from the values. + See the description of `dtype` for the types pandas infers for. + + >>> pd.array([1, 2]) + + [1, 2] + Length: 2, dtype: Int64 + + >>> pd.array([1, 2, np.nan]) + + [1, 2, NaN] + Length: 3, dtype: Int64 + + >>> pd.array(["a", None, "c"]) + + ['a', nan, 'c'] + Length: 3, dtype: string + + >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) + + ['2000-01-01', '2000-01-01'] + Length: 2, dtype: period[D] + + You can use the string alias for `dtype` + + >>> pd.array(['a', 'b', 'a'], dtype='category') + [a, b, a] + Categories (2, object): [a, b] + + Or specify the actual dtype + + >>> pd.array(['a', 'b', 'a'], + ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) + [a, b, a] + Categories (3, object): [a < b < c] + + If pandas does not infer a dedicated extension type a + :class:`arrays.PandasArray` is returned. + + >>> pd.array([1.1, 2.2]) + + [1.1, 2.2] + Length: 2, dtype: float64 + + As mentioned in the "Notes" section, new extension types may be added + in the future (by pandas or 3rd party libraries), causing the return + value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype` + as a NumPy dtype if you need to ensure there's no future change in + behavior. + + >>> pd.array([1, 2], dtype=np.dtype("int32")) + + [1, 2] + Length: 2, dtype: int32 + + `data` must be 1-dimensional. A ValueError is raised when the input + has the wrong dimensionality. + + >>> pd.array(1) + Traceback (most recent call last): + ... + ValueError: Cannot pass scalar '1' to 'pandas.array'. + """ + from pandas.core.arrays import ( + period_array, + BooleanArray, + IntegerArray, + IntervalArray, + PandasArray, + DatetimeArray, + TimedeltaArray, + StringArray, + ) + + if lib.is_scalar(data): + msg = f"Cannot pass scalar '{data}' to 'pandas.array'." + raise ValueError(msg) + + if dtype is None and isinstance( + data, (ABCSeries, ABCIndexClass, ABCExtensionArray) + ): + dtype = data.dtype + + data = extract_array(data, extract_numpy=True) + + # this returns None for not-found dtypes. + if isinstance(dtype, str): + dtype = registry.find(dtype) or dtype + + if is_extension_array_dtype(dtype): + cls = cast(ExtensionDtype, dtype).construct_array_type() + return cls._from_sequence(data, dtype=dtype, copy=copy) + + if dtype is None: + inferred_dtype = lib.infer_dtype(data, skipna=True) + if inferred_dtype == "period": + try: + return period_array(data, copy=copy) + except IncompatibleFrequency: + # We may have a mixture of frequencies. + # We choose to return an ndarray, rather than raising. + pass + elif inferred_dtype == "interval": + try: + return IntervalArray(data, copy=copy) + except ValueError: + # We may have a mixture of `closed` here. + # We choose to return an ndarray, rather than raising. + pass + + elif inferred_dtype.startswith("datetime"): + # datetime, datetime64 + try: + return DatetimeArray._from_sequence(data, copy=copy) + except ValueError: + # Mixture of timezones, fall back to PandasArray + pass + + elif inferred_dtype.startswith("timedelta"): + # timedelta, timedelta64 + return TimedeltaArray._from_sequence(data, copy=copy) + + elif inferred_dtype == "string": + return StringArray._from_sequence(data, copy=copy) + + elif inferred_dtype == "integer": + return IntegerArray._from_sequence(data, copy=copy) + + elif inferred_dtype == "boolean": + return BooleanArray._from_sequence(data, copy=copy) + + # Pandas overrides NumPy for + # 1. datetime64[ns] + # 2. timedelta64[ns] + # so that a DatetimeArray is returned. + if is_datetime64_ns_dtype(dtype): + return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy) + elif is_timedelta64_ns_dtype(dtype): + return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) + + result = PandasArray._from_sequence(data, dtype=dtype, copy=copy) + return result + + +def extract_array(obj, extract_numpy=False): + """ + Extract the ndarray or ExtensionArray from a Series or Index. + + For all other types, `obj` is just returned as is. + + Parameters + ---------- + obj : object + For Series / Index, the underlying ExtensionArray is unboxed. + For Numpy-backed ExtensionArrays, the ndarray is extracted. + + extract_numpy : bool, default False + Whether to extract the ndarray from a PandasArray + + Returns + ------- + arr : object + + Examples + -------- + >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category')) + [a, b, c] + Categories (3, object): [a, b, c] + + Other objects like lists, arrays, and DataFrames are just passed through. + + >>> extract_array([1, 2, 3]) + [1, 2, 3] + + For an ndarray-backed Series / Index a PandasArray is returned. + + >>> extract_array(pd.Series([1, 2, 3])) + + [1, 2, 3] + Length: 3, dtype: int64 + + To extract all the way down to the ndarray, pass ``extract_numpy=True``. + + >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True) + array([1, 2, 3]) + """ + if isinstance(obj, (ABCIndexClass, ABCSeries)): + obj = obj.array + + if extract_numpy and isinstance(obj, ABCPandasArray): + obj = obj.to_numpy() + + return obj + + +def sanitize_array( + data, index, dtype=None, copy: bool = False, raise_cast_failure: bool = False +): + """ + Sanitize input data to an ndarray, copy if specified, coerce to the + dtype if specified. + """ + if dtype is not None: + dtype = pandas_dtype(dtype) + + if isinstance(data, ma.MaskedArray): + mask = ma.getmaskarray(data) + if mask.any(): + data, fill_value = maybe_upcast(data, copy=True) + data.soften_mask() # set hardmask False if it was True + data[mask] = fill_value + else: + data = data.copy() + + # extract ndarray or ExtensionArray, ensure we have no PandasArray + data = extract_array(data, extract_numpy=True) + + # GH#846 + if isinstance(data, np.ndarray): + + if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype): + # possibility of nan -> garbage + try: + subarr = _try_cast(data, dtype, copy, True) + except ValueError: + if copy: + subarr = data.copy() + else: + subarr = np.array(data, copy=False) + else: + # we will try to copy be-definition here + subarr = _try_cast(data, dtype, copy, raise_cast_failure) + + elif isinstance(data, ABCExtensionArray): + # it is already ensured above this is not a PandasArray + subarr = data + + if dtype is not None: + subarr = subarr.astype(dtype, copy=copy) + elif copy: + subarr = subarr.copy() + return subarr + + elif isinstance(data, (list, tuple)) and len(data) > 0: + if dtype is not None: + subarr = _try_cast(data, dtype, copy, raise_cast_failure) + else: + subarr = maybe_convert_platform(data) + + subarr = maybe_cast_to_datetime(subarr, dtype) + + elif isinstance(data, range): + # GH#16804 + arr = np.arange(data.start, data.stop, data.step, dtype="int64") + subarr = _try_cast(arr, dtype, copy, raise_cast_failure) + else: + subarr = _try_cast(data, dtype, copy, raise_cast_failure) + + # scalar like, GH + if getattr(subarr, "ndim", 0) == 0: + if isinstance(data, list): # pragma: no cover + subarr = np.array(data, dtype=object) + elif index is not None: + value = data + + # figure out the dtype from the value (upcast if necessary) + if dtype is None: + dtype, value = infer_dtype_from_scalar(value) + else: + # need to possibly convert the value here + value = maybe_cast_to_datetime(value, dtype) + + subarr = construct_1d_arraylike_from_scalar(value, len(index), dtype) + + else: + return subarr.item() + + # the result that we want + elif subarr.ndim == 1: + if index is not None: + + # a 1-element ndarray + if len(subarr) != len(index) and len(subarr) == 1: + subarr = construct_1d_arraylike_from_scalar( + subarr[0], len(index), subarr.dtype + ) + + elif subarr.ndim > 1: + if isinstance(data, np.ndarray): + raise Exception("Data must be 1-dimensional") + else: + subarr = com.asarray_tuplesafe(data, dtype=dtype) + + if not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)): + # This is to prevent mixed-type Series getting all casted to + # NumPy string type, e.g. NaN --> '-1#IND'. + if issubclass(subarr.dtype.type, str): + # GH#16605 + # If not empty convert the data to dtype + # GH#19853: If data is a scalar, subarr has already the result + if not lib.is_scalar(data): + if not np.all(isna(data)): + data = np.array(data, dtype=dtype, copy=False) + subarr = np.array(data, dtype=object, copy=copy) + + if is_object_dtype(subarr.dtype) and not is_object_dtype(dtype): + inferred = lib.infer_dtype(subarr, skipna=False) + if inferred in {"interval", "period"}: + subarr = array(subarr) + + return subarr + + +def _try_cast( + arr, + dtype: Optional[Union[np.dtype, "ExtensionDtype"]], + copy: bool, + raise_cast_failure: bool, +): + """ + Convert input to numpy ndarray and optionally cast to a given dtype. + + Parameters + ---------- + arr : ndarray, list, tuple, iterator (catchall) + Excludes: ExtensionArray, Series, Index. + dtype : np.dtype, ExtensionDtype or None + copy : bool + If False, don't copy the data if not needed. + raise_cast_failure : bool + If True, and if a dtype is specified, raise errors during casting. + Otherwise an object array is returned. + """ + # perf shortcut as this is the most common case + if isinstance(arr, np.ndarray): + if maybe_castable(arr) and not copy and dtype is None: + return arr + + try: + # GH#15832: Check if we are requesting a numeric dype and + # that we can convert the data to the requested dtype. + if is_integer_dtype(dtype): + subarr = maybe_cast_to_integer_array(arr, dtype) + + subarr = maybe_cast_to_datetime(arr, dtype) + # Take care in creating object arrays (but iterators are not + # supported): + if is_object_dtype(dtype) and ( + is_list_like(subarr) + and not (is_iterator(subarr) or isinstance(subarr, np.ndarray)) + ): + subarr = construct_1d_object_array_from_listlike(subarr) + elif not is_extension_array_dtype(subarr): + subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy) + except OutOfBoundsDatetime: + # in case of out of bound datetime64 -> always raise + raise + except (ValueError, TypeError): + if is_categorical_dtype(dtype): + # We *do* allow casting to categorical, since we know + # that Categorical is the only array type for 'category'. + dtype = cast(CategoricalDtype, dtype) + subarr = dtype.construct_array_type()( + arr, dtype.categories, ordered=dtype.ordered + ) + elif is_extension_array_dtype(dtype): + # create an extension array from its dtype + dtype = cast(ExtensionDtype, dtype) + array_type = dtype.construct_array_type()._from_sequence + subarr = array_type(arr, dtype=dtype, copy=copy) + elif dtype is not None and raise_cast_failure: + raise + else: + subarr = np.array(arr, dtype=object, copy=copy) + return subarr + + +def is_empty_data(data: Any) -> bool: + """ + Utility to check if a Series is instantiated with empty data, + which does not contain dtype information. + + Parameters + ---------- + data : array-like, Iterable, dict, or scalar value + Contains data stored in Series. + + Returns + ------- + bool + """ + is_none = data is None + is_list_like_without_dtype = is_list_like(data) and not hasattr(data, "dtype") + is_simple_empty = is_list_like_without_dtype and not data + return is_none or is_simple_empty + + +def create_series_with_explicit_dtype( + data: Any = None, + index: Optional[Union[ArrayLike, "Index"]] = None, + dtype: Optional[Dtype] = None, + name: Optional[str] = None, + copy: bool = False, + fastpath: bool = False, + dtype_if_empty: Dtype = object, +) -> "Series": + """ + Helper to pass an explicit dtype when instantiating an empty Series. + + This silences a DeprecationWarning described in GitHub-17261. + + Parameters + ---------- + data : Mirrored from Series.__init__ + index : Mirrored from Series.__init__ + dtype : Mirrored from Series.__init__ + name : Mirrored from Series.__init__ + copy : Mirrored from Series.__init__ + fastpath : Mirrored from Series.__init__ + dtype_if_empty : str, numpy.dtype, or ExtensionDtype + This dtype will be passed explicitly if an empty Series will + be instantiated. + + Returns + ------- + Series + """ + from pandas.core.series import Series + + if is_empty_data(data) and dtype is None: + dtype = dtype_if_empty + return Series( + data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath + ) diff --git a/pandas/core/dtypes/__init__.py b/pandas/core/dtypes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py new file mode 100644 index 00000000..051affd0 --- /dev/null +++ b/pandas/core/dtypes/api.py @@ -0,0 +1,45 @@ +# flake8: noqa + +from pandas.core.dtypes.common import ( + is_array_like, + is_bool, + is_bool_dtype, + is_categorical, + is_categorical_dtype, + is_complex, + is_complex_dtype, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_extension_type, + is_file_like, + is_float, + is_float_dtype, + is_hashable, + is_int64_dtype, + is_integer, + is_integer_dtype, + is_interval, + is_interval_dtype, + is_iterator, + is_list_like, + is_named_tuple, + is_number, + is_numeric_dtype, + is_object_dtype, + is_period_dtype, + is_re, + is_re_compilable, + is_scalar, + is_signed_integer_dtype, + is_sparse, + is_string_dtype, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + is_unsigned_integer_dtype, + pandas_dtype, +) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py new file mode 100644 index 00000000..1b4e7062 --- /dev/null +++ b/pandas/core/dtypes/base.py @@ -0,0 +1,317 @@ +"""Extend pandas with custom array types""" +from typing import Any, List, Optional, Tuple, Type + +import numpy as np + +from pandas.errors import AbstractMethodError + +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries + + +class ExtensionDtype: + """ + A custom data type, to be paired with an ExtensionArray. + + .. versionadded:: 0.23.0 + + See Also + -------- + extensions.register_extension_dtype + extensions.ExtensionArray + + Notes + ----- + The interface includes the following abstract methods that must + be implemented by subclasses: + + * type + * name + * construct_from_string + + The following attributes influence the behavior of the dtype in + pandas operations + + * _is_numeric + * _is_boolean + + Optionally one can override construct_array_type for construction + with the name of this dtype via the Registry. See + :meth:`extensions.register_extension_dtype`. + + * construct_array_type + + The `na_value` class attribute can be used to set the default NA value + for this type. :attr:`numpy.nan` is used by default. + + ExtensionDtypes are required to be hashable. The base class provides + a default implementation, which relies on the ``_metadata`` class + attribute. ``_metadata`` should be a tuple containing the strings + that define your data type. For example, with ``PeriodDtype`` that's + the ``freq`` attribute. + + **If you have a parametrized dtype you should set the ``_metadata`` + class property**. + + Ideally, the attributes in ``_metadata`` will match the + parameters to your ``ExtensionDtype.__init__`` (if any). If any of + the attributes in ``_metadata`` don't implement the standard + ``__eq__`` or ``__hash__``, the default implementations here will not + work. + + .. versionchanged:: 0.24.0 + + Added ``_metadata``, ``__hash__``, and changed the default definition + of ``__eq__``. + + For interaction with Apache Arrow (pyarrow), a ``__from_arrow__`` method + can be implemented: this method receives a pyarrow Array or ChunkedArray + as only argument and is expected to return the appropriate pandas + ExtensionArray for this dtype and the passed values:: + + class ExtensionDtype: + + def __from_arrow__( + self, array: pyarrow.Array/ChunkedArray + ) -> ExtensionArray: + ... + + This class does not inherit from 'abc.ABCMeta' for performance reasons. + Methods and properties required by the interface raise + ``pandas.errors.AbstractMethodError`` and no ``register`` method is + provided for registering virtual subclasses. + """ + + _metadata: Tuple[str, ...] = () + + def __str__(self) -> str: + return self.name + + def __eq__(self, other: Any) -> bool: + """ + Check whether 'other' is equal to self. + + By default, 'other' is considered equal if either + + * it's a string matching 'self.name'. + * it's an instance of this type and all of the + the attributes in ``self._metadata`` are equal between + `self` and `other`. + + Parameters + ---------- + other : Any + + Returns + ------- + bool + """ + if isinstance(other, str): + try: + other = self.construct_from_string(other) + except TypeError: + return False + if isinstance(other, type(self)): + return all( + getattr(self, attr) == getattr(other, attr) for attr in self._metadata + ) + return False + + def __hash__(self) -> int: + return hash(tuple(getattr(self, attr) for attr in self._metadata)) + + def __ne__(self, other) -> bool: + return not self.__eq__(other) + + @property + def na_value(self): + """ + Default NA value to use for this type. + + This is used in e.g. ExtensionArray.take. This should be the + user-facing "boxed" version of the NA value, not the physical NA value + for storage. e.g. for JSONArray, this is an empty dictionary. + """ + return np.nan + + @property + def type(self) -> Type: + """ + The scalar type for the array, e.g. ``int`` + + It's expected ``ExtensionArray[item]`` returns an instance + of ``ExtensionDtype.type`` for scalar ``item``, assuming + that value is valid (not NA). NA values do not need to be + instances of `type`. + """ + raise AbstractMethodError(self) + + @property + def kind(self) -> str: + """ + A character code (one of 'biufcmMOSUV'), default 'O' + + This should match the NumPy dtype used when the array is + converted to an ndarray, which is probably 'O' for object if + the extension type cannot be represented as a built-in NumPy + type. + + See Also + -------- + numpy.dtype.kind + """ + return "O" + + @property + def name(self) -> str: + """ + A string identifying the data type. + + Will be used for display in, e.g. ``Series.dtype`` + """ + raise AbstractMethodError(self) + + @property + def names(self) -> Optional[List[str]]: + """ + Ordered list of field names, or None if there are no fields. + + This is for compatibility with NumPy arrays, and may be removed in the + future. + """ + return None + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + raise NotImplementedError + + @classmethod + def construct_from_string(cls, string: str): + r""" + Construct this type from a string. + + This is useful mainly for data types that accept parameters. + For example, a period dtype accepts a frequency parameter that + can be set as ``period[H]`` (where H means hourly frequency). + + By default, in the abstract class, just the name of the type is + expected. But subclasses can overwrite this method to accept + parameters. + + Parameters + ---------- + string : str + The name of the type, for example ``category``. + + Returns + ------- + ExtensionDtype + Instance of the dtype. + + Raises + ------ + TypeError + If a class cannot be constructed from this 'string'. + + Examples + -------- + For extension dtypes with arguments the following may be an + adequate implementation. + + >>> @classmethod + ... def construct_from_string(cls, string): + ... pattern = re.compile(r"^my_type\[(?P.+)\]$") + ... match = pattern.match(string) + ... if match: + ... return cls(**match.groupdict()) + ... else: + ... raise TypeError(f"Cannot construct a '{cls.__name__}' from + ... " "'{string}'") + """ + if not isinstance(string, str): + raise TypeError(f"Expects a string, got {type(string).__name__}") + + # error: Non-overlapping equality check (left operand type: "str", right + # operand type: "Callable[[ExtensionDtype], str]") [comparison-overlap] + assert isinstance(cls.name, str), (cls, type(cls.name)) + if string != cls.name: + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + return cls() + + @classmethod + def is_dtype(cls, dtype) -> bool: + """ + Check if we match 'dtype'. + + Parameters + ---------- + dtype : object + The object to check. + + Returns + ------- + is_dtype : bool + + Notes + ----- + The default implementation is True if + + 1. ``cls.construct_from_string(dtype)`` is an instance + of ``cls``. + 2. ``dtype`` is an object and is an instance of ``cls`` + 3. ``dtype`` has a ``dtype`` attribute, and any of the above + conditions is true for ``dtype.dtype``. + """ + dtype = getattr(dtype, "dtype", dtype) + + if isinstance(dtype, (ABCSeries, ABCIndexClass, ABCDataFrame, np.dtype)): + # https://github.com/pandas-dev/pandas/issues/22960 + # avoid passing data to `construct_from_string`. This could + # cause a FutureWarning from numpy about failing elementwise + # comparison from, e.g., comparing DataFrame == 'category'. + return False + elif dtype is None: + return False + elif isinstance(dtype, cls): + return True + if isinstance(dtype, str): + try: + return cls.construct_from_string(dtype) is not None + except TypeError: + return False + return False + + @property + def _is_numeric(self) -> bool: + """ + Whether columns with this dtype should be considered numeric. + + By default ExtensionDtypes are assumed to be non-numeric. + They'll be excluded from operations that exclude non-numeric + columns, like (groupby) reductions, plotting, etc. + """ + return False + + @property + def _is_boolean(self) -> bool: + """ + Whether this dtype should be considered boolean. + + By default, ExtensionDtypes are assumed to be non-numeric. + Setting this to True will affect the behavior of several places, + e.g. + + * is_bool + * boolean indexing + + Returns + ------- + bool + """ + return False diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py new file mode 100644 index 00000000..d4b60ec1 --- /dev/null +++ b/pandas/core/dtypes/cast.py @@ -0,0 +1,1592 @@ +""" routings for casting """ + +from datetime import datetime, timedelta + +import numpy as np + +from pandas._libs import lib, tslib, tslibs +from pandas._libs.tslibs import NaT, OutOfBoundsDatetime, Period, iNaT +from pandas._libs.tslibs.timezones import tz_compare +from pandas._typing import Dtype +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.common import ( + _INT64_DTYPE, + _NS_DTYPE, + _POSSIBLY_CAST_DTYPES, + _TD_DTYPE, + ensure_int8, + ensure_int16, + ensure_int32, + ensure_int64, + ensure_object, + ensure_str, + is_bool, + is_bool_dtype, + is_complex, + is_complex_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + is_unsigned_integer_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + ExtensionDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCDatetimeArray, + ABCDatetimeIndex, + ABCPeriodArray, + ABCPeriodIndex, + ABCSeries, +) +from pandas.core.dtypes.inference import is_list_like +from pandas.core.dtypes.missing import isna, notna + +_int8_max = np.iinfo(np.int8).max +_int16_max = np.iinfo(np.int16).max +_int32_max = np.iinfo(np.int32).max +_int64_max = np.iinfo(np.int64).max + + +def maybe_convert_platform(values): + """ try to do platform conversion, allow ndarray or list here """ + + if isinstance(values, (list, tuple, range)): + values = construct_1d_object_array_from_listlike(values) + if getattr(values, "dtype", None) == np.object_: + if hasattr(values, "_values"): + values = values._values + values = lib.maybe_convert_objects(values) + + return values + + +def is_nested_object(obj) -> bool: + """ + return a boolean if we have a nested object, e.g. a Series with 1 or + more Series elements + + This may not be necessarily be performant. + + """ + + if isinstance(obj, ABCSeries) and is_object_dtype(obj): + + if any(isinstance(v, ABCSeries) for v in obj.values): + return True + + return False + + +def maybe_downcast_to_dtype(result, dtype): + """ try to cast to the specified dtype (e.g. convert back to bool/int + or could be an astype of float64->float32 + """ + do_round = False + + if is_scalar(result): + return result + elif isinstance(result, ABCDataFrame): + # occurs in pivot_table doctest + return result + + if isinstance(dtype, str): + if dtype == "infer": + inferred_type = lib.infer_dtype(ensure_object(result.ravel()), skipna=False) + if inferred_type == "boolean": + dtype = "bool" + elif inferred_type == "integer": + dtype = "int64" + elif inferred_type == "datetime64": + dtype = "datetime64[ns]" + elif inferred_type == "timedelta64": + dtype = "timedelta64[ns]" + + # try to upcast here + elif inferred_type == "floating": + dtype = "int64" + if issubclass(result.dtype.type, np.number): + do_round = True + + else: + dtype = "object" + + dtype = np.dtype(dtype) + + converted = maybe_downcast_numeric(result, dtype, do_round) + if converted is not result: + return converted + + # a datetimelike + # GH12821, iNaT is casted to float + if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]: + if hasattr(dtype, "tz"): + # not a numpy dtype + if dtype.tz: + # convert to datetime and change timezone + from pandas import to_datetime + + result = to_datetime(result).tz_localize("utc") + result = result.tz_convert(dtype.tz) + else: + result = result.astype(dtype) + + elif dtype.type is Period: + # TODO(DatetimeArray): merge with previous elif + from pandas.core.arrays import PeriodArray + + try: + return PeriodArray(result, freq=dtype.freq) + except TypeError: + # e.g. TypeError: int() argument must be a string, a + # bytes-like object or a number, not 'Period + pass + + return result + + +def maybe_downcast_numeric(result, dtype, do_round: bool = False): + """ + Subset of maybe_downcast_to_dtype restricted to numeric dtypes. + + Parameters + ---------- + result : ndarray or ExtensionArray + dtype : np.dtype or ExtensionDtype + do_round : bool + + Returns + ------- + ndarray or ExtensionArray + """ + if not isinstance(dtype, np.dtype): + # e.g. SparseDtype has no itemsize attr + return result + + if isinstance(result, list): + # reached via groupoby.agg _ohlc; really this should be handled + # earlier + result = np.array(result) + + def trans(x): + if do_round: + return x.round() + return x + + if dtype.kind == result.dtype.kind: + # don't allow upcasts here (except if empty) + if result.dtype.itemsize <= dtype.itemsize and result.size: + return result + + if is_bool_dtype(dtype) or is_integer_dtype(dtype): + + if not result.size: + # if we don't have any elements, just astype it + return trans(result).astype(dtype) + + # do a test on the first element, if it fails then we are done + r = result.ravel() + arr = np.array([r[0]]) + + if isna(arr).any(): + # if we have any nulls, then we are done + return result + + elif not isinstance(r[0], (np.integer, np.floating, np.bool, int, float, bool)): + # a comparable, e.g. a Decimal may slip in here + return result + + if ( + issubclass(result.dtype.type, (np.object_, np.number)) + and notna(result).all() + ): + new_result = trans(result).astype(dtype) + if new_result.dtype.kind == "O" or result.dtype.kind == "O": + # np.allclose may raise TypeError on object-dtype + if (new_result == result).all(): + return new_result + else: + if np.allclose(new_result, result, rtol=0): + return new_result + + elif ( + issubclass(dtype.type, np.floating) + and not is_bool_dtype(result.dtype) + and not is_string_dtype(result.dtype) + ): + return result.astype(dtype) + + return result + + +def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray, other): + """ + A safe version of putmask that potentially upcasts the result. + The result is replaced with the first N elements of other, + where N is the number of True values in mask. + If the length of other is shorter than N, other will be repeated. + + Parameters + ---------- + result : ndarray + The destination array. This will be mutated in-place if no upcasting is + necessary. + mask : boolean ndarray + other : scalar + The source value. + + Returns + ------- + result : ndarray + changed : bool + Set to true if the result array was upcasted. + + Examples + -------- + >>> result, _ = maybe_upcast_putmask(np.arange(1,6), + np.array([False, True, False, True, True]), np.arange(21,23)) + >>> result + array([1, 21, 3, 22, 21]) + """ + + if not isinstance(result, np.ndarray): + raise ValueError("The result input must be a ndarray.") + if not is_scalar(other): + # We _could_ support non-scalar other, but until we have a compelling + # use case, we assume away the possibility. + raise ValueError("other must be a scalar") + + if mask.any(): + # Two conversions for date-like dtypes that can't be done automatically + # in np.place: + # NaN -> NaT + # integer or integer array -> date-like array + if result.dtype.kind in ["m", "M"]: + if is_scalar(other): + if isna(other): + other = result.dtype.type("nat") + elif is_integer(other): + other = np.array(other, dtype=result.dtype) + elif is_integer_dtype(other): + other = np.array(other, dtype=result.dtype) + + def changeit(): + + # try to directly set by expanding our array to full + # length of the boolean + try: + om = other[mask] + except (IndexError, TypeError): + # IndexError occurs in test_upcast when we have a boolean + # mask of the wrong shape + # TypeError occurs in test_upcast when `other` is a bool + pass + else: + om_at = om.astype(result.dtype) + if (om == om_at).all(): + new_result = result.values.copy() + new_result[mask] = om_at + result[:] = new_result + return result, False + + # we are forced to change the dtype of the result as the input + # isn't compatible + r, _ = maybe_upcast(result, fill_value=other, copy=True) + np.place(r, mask, other) + + return r, True + + # we want to decide whether place will work + # if we have nans in the False portion of our mask then we need to + # upcast (possibly), otherwise we DON't want to upcast (e.g. if we + # have values, say integers, in the success portion then it's ok to not + # upcast) + new_dtype, _ = maybe_promote(result.dtype, other) + if new_dtype != result.dtype: + + # we have a scalar or len 0 ndarray + # and its nan and we are changing some values + if is_scalar(other) or (isinstance(other, np.ndarray) and other.ndim < 1): + if isna(other): + return changeit() + + # we have an ndarray and the masking has nans in it + else: + + if isna(other).any(): + return changeit() + + try: + np.place(result, mask, other) + except TypeError: + # e.g. int-dtype result and float-dtype other + return changeit() + + return result, False + + +def maybe_promote(dtype, fill_value=np.nan): + """ + Find the minimal dtype that can hold both the given dtype and fill_value. + + Parameters + ---------- + dtype : np.dtype or ExtensionDtype + fill_value : scalar, default np.nan + + Returns + ------- + dtype + Upcasted from dtype argument if necessary. + fill_value + Upcasted from fill_value argument if necessary. + """ + if not is_scalar(fill_value) and not is_object_dtype(dtype): + # with object dtype there is nothing to promote, and the user can + # pass pretty much any weird fill_value they like + raise ValueError("fill_value must be a scalar") + + # if we passed an array here, determine the fill value by dtype + if isinstance(fill_value, np.ndarray): + if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)): + fill_value = fill_value.dtype.type("NaT", "ns") + else: + + # we need to change to object type as our + # fill_value is of object type + if fill_value.dtype == np.object_: + dtype = np.dtype(np.object_) + fill_value = np.nan + + if dtype == np.object_ or dtype.kind in ["U", "S"]: + # We treat string-like dtypes as object, and _always_ fill + # with np.nan + fill_value = np.nan + dtype = np.dtype(np.object_) + + # returns tuple of (dtype, fill_value) + if issubclass(dtype.type, np.datetime64): + if isinstance(fill_value, datetime) and fill_value.tzinfo is not None: + # Trying to insert tzaware into tznaive, have to cast to object + dtype = np.dtype(np.object_) + elif is_integer(fill_value) or (is_float(fill_value) and not isna(fill_value)): + dtype = np.dtype(np.object_) + else: + try: + fill_value = tslibs.Timestamp(fill_value).to_datetime64() + except (TypeError, ValueError): + dtype = np.dtype(np.object_) + elif issubclass(dtype.type, np.timedelta64): + if ( + is_integer(fill_value) + or (is_float(fill_value) and not np.isnan(fill_value)) + or isinstance(fill_value, str) + ): + # TODO: What about str that can be a timedelta? + dtype = np.dtype(np.object_) + else: + try: + fv = tslibs.Timedelta(fill_value) + except ValueError: + dtype = np.dtype(np.object_) + else: + if fv is NaT: + # NaT has no `to_timedelta64` method + fill_value = np.timedelta64("NaT", "ns") + else: + fill_value = fv.to_timedelta64() + elif is_datetime64tz_dtype(dtype): + if isna(fill_value): + fill_value = NaT + elif not isinstance(fill_value, datetime): + dtype = np.dtype(np.object_) + elif fill_value.tzinfo is None: + dtype = np.dtype(np.object_) + elif not tz_compare(fill_value.tzinfo, dtype.tz): + # TODO: sure we want to cast here? + dtype = np.dtype(np.object_) + + elif is_extension_array_dtype(dtype) and isna(fill_value): + fill_value = dtype.na_value + + elif is_float(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.dtype(np.object_) + + elif issubclass(dtype.type, np.integer): + dtype = np.dtype(np.float64) + + elif dtype.kind == "f": + mst = np.min_scalar_type(fill_value) + if mst > dtype: + # e.g. mst is np.float64 and dtype is np.float32 + dtype = mst + + elif dtype.kind == "c": + mst = np.min_scalar_type(fill_value) + dtype = np.promote_types(dtype, mst) + + elif is_bool(fill_value): + if not issubclass(dtype.type, np.bool_): + dtype = np.dtype(np.object_) + + elif is_integer(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.dtype(np.object_) + + elif issubclass(dtype.type, np.integer): + if not np.can_cast(fill_value, dtype): + # upcast to prevent overflow + mst = np.min_scalar_type(fill_value) + dtype = np.promote_types(dtype, mst) + if dtype.kind == "f": + # Case where we disagree with numpy + dtype = np.dtype(np.object_) + + elif is_complex(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.dtype(np.object_) + + elif issubclass(dtype.type, (np.integer, np.floating)): + mst = np.min_scalar_type(fill_value) + dtype = np.promote_types(dtype, mst) + + elif dtype.kind == "c": + mst = np.min_scalar_type(fill_value) + if mst > dtype: + # e.g. mst is np.complex128 and dtype is np.complex64 + dtype = mst + + elif fill_value is None: + if is_float_dtype(dtype) or is_complex_dtype(dtype): + fill_value = np.nan + elif is_integer_dtype(dtype): + dtype = np.float64 + fill_value = np.nan + elif is_datetime_or_timedelta_dtype(dtype): + fill_value = dtype.type("NaT", "ns") + else: + dtype = np.dtype(np.object_) + fill_value = np.nan + else: + dtype = np.dtype(np.object_) + + # in case we have a string that looked like a number + if is_extension_array_dtype(dtype): + pass + elif issubclass(np.dtype(dtype).type, (bytes, str)): + dtype = np.dtype(np.object_) + + fill_value = _ensure_dtype_type(fill_value, dtype) + return dtype, fill_value + + +def _ensure_dtype_type(value, dtype): + """ + Ensure that the given value is an instance of the given dtype. + + e.g. if out dtype is np.complex64, we should have an instance of that + as opposed to a python complex object. + + Parameters + ---------- + value : object + dtype : np.dtype or ExtensionDtype + + Returns + ------- + object + """ + + # Start with exceptions in which we do _not_ cast to numpy types + if is_extension_array_dtype(dtype): + return value + elif dtype == np.object_: + return value + elif isna(value): + # e.g. keep np.nan rather than try to cast to np.float32(np.nan) + return value + + return dtype.type(value) + + +def infer_dtype_from(val, pandas_dtype: bool = False): + """ + Interpret the dtype from a scalar or array. + + Parameters + ---------- + val : object + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, scalar/array belongs to pandas extension types is inferred as + object + """ + if is_scalar(val): + return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) + return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) + + +def infer_dtype_from_scalar(val, pandas_dtype: bool = False): + """ + Interpret the dtype from a scalar. + + Parameters + ---------- + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, scalar belongs to pandas extension types is inferred as + object + """ + + dtype = np.dtype(np.object_) + + # a 1-element ndarray + if isinstance(val, np.ndarray): + msg = "invalid ndarray passed to infer_dtype_from_scalar" + if val.ndim != 0: + raise ValueError(msg) + + dtype = val.dtype + val = val.item() + + elif isinstance(val, str): + + # If we create an empty array using a string to infer + # the dtype, NumPy will only allocate one character per entry + # so this is kind of bad. Alternately we could use np.repeat + # instead of np.empty (but then you still don't want things + # coming out as np.str_! + + dtype = np.dtype(np.object_) + + elif isinstance(val, (np.datetime64, datetime)): + val = tslibs.Timestamp(val) + if val is tslibs.NaT or val.tz is None: + dtype = np.dtype("M8[ns]") + else: + if pandas_dtype: + dtype = DatetimeTZDtype(unit="ns", tz=val.tz) + else: + # return datetimetz as object + return np.dtype(np.object_), val + val = val.value + + elif isinstance(val, (np.timedelta64, timedelta)): + val = tslibs.Timedelta(val).value + dtype = np.dtype("m8[ns]") + + elif is_bool(val): + dtype = np.dtype(np.bool_) + + elif is_integer(val): + if isinstance(val, np.integer): + dtype = np.dtype(type(val)) + else: + dtype = np.dtype(np.int64) + + elif is_float(val): + if isinstance(val, np.floating): + dtype = np.dtype(type(val)) + else: + dtype = np.dtype(np.float64) + + elif is_complex(val): + dtype = np.dtype(np.complex_) + + elif pandas_dtype: + if lib.is_period(val): + dtype = PeriodDtype(freq=val.freq) + val = val.ordinal + elif lib.is_interval(val): + subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0] + dtype = IntervalDtype(subtype=subtype) + + return dtype, val + + +def infer_dtype_from_array(arr, pandas_dtype: bool = False): + """ + Infer the dtype from an array. + + Parameters + ---------- + arr : array + pandas_dtype : bool, default False + whether to infer dtype including pandas extension types. + If False, array belongs to pandas extension types + is inferred as object + + Returns + ------- + tuple (numpy-compat/pandas-compat dtype, array) + + Notes + ----- + if pandas_dtype=False. these infer to numpy dtypes + exactly with the exception that mixed / object dtypes + are not coerced by stringifying or conversion + + if pandas_dtype=True. datetime64tz-aware/categorical + types will retain there character. + + Examples + -------- + >>> np.asarray([1, '1']) + array(['1', '1'], dtype='>> infer_dtype_from_array([1, '1']) + (numpy.object_, [1, '1']) + """ + + if isinstance(arr, np.ndarray): + return arr.dtype, arr + + if not is_list_like(arr): + arr = [arr] + + if pandas_dtype and is_extension_array_dtype(arr): + return arr.dtype, arr + + elif isinstance(arr, ABCSeries): + return arr.dtype, np.asarray(arr) + + # don't force numpy coerce with nan's + inferred = lib.infer_dtype(arr, skipna=False) + if inferred in ["string", "bytes", "unicode", "mixed", "mixed-integer"]: + return (np.object_, arr) + + arr = np.asarray(arr) + return arr.dtype, arr + + +def maybe_infer_dtype_type(element): + """ + Try to infer an object's dtype, for use in arithmetic ops. + + Uses `element.dtype` if that's available. + Objects implementing the iterator protocol are cast to a NumPy array, + and from there the array's type is used. + + Parameters + ---------- + element : object + Possibly has a `.dtype` attribute, and possibly the iterator + protocol. + + Returns + ------- + tipo : type + + Examples + -------- + >>> from collections import namedtuple + >>> Foo = namedtuple("Foo", "dtype") + >>> maybe_infer_dtype_type(Foo(np.dtype("i8"))) + numpy.int64 + """ + tipo = None + if hasattr(element, "dtype"): + tipo = element.dtype + elif is_list_like(element): + element = np.asarray(element) + tipo = element.dtype + return tipo + + +def maybe_upcast(values, fill_value=np.nan, dtype=None, copy: bool = False): + """ + Provide explicit type promotion and coercion. + + Parameters + ---------- + values : ndarray or ExtensionArray + The array that we want to maybe upcast. + fill_value : what we want to fill with + dtype : if None, then use the dtype of the values, else coerce to this type + copy : bool, default True + If True always make a copy even if no upcast is required. + """ + if not is_scalar(fill_value) and not is_object_dtype(values.dtype): + # We allow arbitrary fill values for object dtype + raise ValueError("fill_value must be a scalar") + + if is_extension_array_dtype(values): + if copy: + values = values.copy() + else: + if dtype is None: + dtype = values.dtype + new_dtype, fill_value = maybe_promote(dtype, fill_value) + if new_dtype != values.dtype: + values = values.astype(new_dtype) + elif copy: + values = values.copy() + + return values, fill_value + + +def invalidate_string_dtypes(dtype_set): + """Change string like dtypes to object for + ``DataFrame.select_dtypes()``. + """ + non_string_dtypes = dtype_set - {np.dtype("S").type, np.dtype(" 1 and coerce: + raise ValueError( + "Only one of 'datetime', 'numeric' or " + "'timedelta' can be True when when coerce=True." + ) + + if not is_object_dtype(values.dtype): + # If not object, do not attempt conversion + values = values.copy() if copy else values + return values + + # If 1 flag is coerce, ensure 2 others are False + if coerce: + # Immediate return if coerce + if datetime: + from pandas import to_datetime + + return to_datetime(values, errors="coerce").to_numpy() + elif timedelta: + from pandas import to_timedelta + + return to_timedelta(values, errors="coerce").to_numpy() + elif numeric: + from pandas import to_numeric + + return to_numeric(values, errors="coerce") + + # Soft conversions + if datetime: + # GH 20380, when datetime is beyond year 2262, hence outside + # bound of nanosecond-resolution 64-bit integers. + try: + values = lib.maybe_convert_objects(values, convert_datetime=True) + except OutOfBoundsDatetime: + pass + + if timedelta and is_object_dtype(values.dtype): + # Object check to ensure only run if previous did not convert + values = lib.maybe_convert_objects(values, convert_timedelta=True) + + if numeric and is_object_dtype(values.dtype): + try: + converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) + except (ValueError, TypeError): + pass + else: + # If all NaNs, then do not-alter + values = converted if not isna(converted).all() else values + values = values.copy() if copy else values + + return values + + +def convert_dtypes( + input_array, + convert_string: bool = True, + convert_integer: bool = True, + convert_boolean: bool = True, +) -> Dtype: + """ + Convert objects to best possible type, and optionally, + to types supporting ``pd.NA``. + + Parameters + ---------- + input_array : ExtensionArray or PandasArray + convert_string : bool, default True + Whether object dtypes should be converted to ``StringDtype()``. + convert_integer : bool, default True + Whether, if possible, conversion can be done to integer extension types. + convert_boolean : bool, defaults True + Whether object dtypes should be converted to ``BooleanDtypes()``. + + Returns + ------- + dtype + new dtype + """ + is_extension = is_extension_array_dtype(input_array.dtype) + if (convert_string or convert_integer or convert_boolean) and not is_extension: + try: + inferred_dtype = lib.infer_dtype(input_array) + except ValueError: + # Required to catch due to Period. Can remove once GH 23553 is fixed + inferred_dtype = input_array.dtype + + if not convert_string and is_string_dtype(inferred_dtype): + inferred_dtype = input_array.dtype + + if convert_integer: + target_int_dtype = "Int64" + + if is_integer_dtype(input_array.dtype): + from pandas.core.arrays.integer import _dtypes + + inferred_dtype = _dtypes.get(input_array.dtype.name, target_int_dtype) + if not is_integer_dtype(input_array.dtype) and is_numeric_dtype( + input_array.dtype + ): + inferred_dtype = target_int_dtype + + else: + if is_integer_dtype(inferred_dtype): + inferred_dtype = input_array.dtype + + if convert_boolean: + if is_bool_dtype(input_array.dtype): + inferred_dtype = "boolean" + else: + if isinstance(inferred_dtype, str) and inferred_dtype == "boolean": + inferred_dtype = input_array.dtype + + else: + inferred_dtype = input_array.dtype + + return inferred_dtype + + +def maybe_castable(arr) -> bool: + # return False to force a non-fastpath + + # check datetime64[ns]/timedelta64[ns] are valid + # otherwise try to coerce + kind = arr.dtype.kind + if kind == "M": + return is_datetime64_ns_dtype(arr.dtype) + elif kind == "m": + return is_timedelta64_ns_dtype(arr.dtype) + + return arr.dtype.name not in _POSSIBLY_CAST_DTYPES + + +def maybe_infer_to_datetimelike(value, convert_dates: bool = False): + """ + we might have a array (or single object) that is datetime like, + and no dtype is passed don't change the value unless we find a + datetime/timedelta set + + this is pretty strict in that a datetime/timedelta is REQUIRED + in addition to possible nulls/string likes + + Parameters + ---------- + value : np.array / Series / Index / list-like + convert_dates : bool, default False + if True try really hard to convert dates (such as datetime.date), other + leave inferred dtype 'date' alone + + """ + + # TODO: why not timedelta? + if isinstance( + value, (ABCDatetimeIndex, ABCPeriodIndex, ABCDatetimeArray, ABCPeriodArray) + ): + return value + elif isinstance(value, ABCSeries): + if isinstance(value._values, ABCDatetimeIndex): + return value._values + + v = value + + if not is_list_like(v): + v = [v] + v = np.array(v, copy=False) + + # we only care about object dtypes + if not is_object_dtype(v): + return value + + shape = v.shape + if not v.ndim == 1: + v = v.ravel() + + if not len(v): + return value + + def try_datetime(v): + # safe coerce to datetime64 + try: + # GH19671 + v = tslib.array_to_datetime(v, require_iso8601=True, errors="raise")[0] + except ValueError: + + # we might have a sequence of the same-datetimes with tz's + # if so coerce to a DatetimeIndex; if they are not the same, + # then these stay as object dtype, xref GH19671 + from pandas._libs.tslibs import conversion + from pandas import DatetimeIndex + + try: + + values, tz = conversion.datetime_to_datetime64(v) + return DatetimeIndex(values).tz_localize("UTC").tz_convert(tz=tz) + except (ValueError, TypeError): + pass + + except Exception: + pass + + return v.reshape(shape) + + def try_timedelta(v): + # safe coerce to timedelta64 + + # will try first with a string & object conversion + from pandas import to_timedelta + + try: + return to_timedelta(v)._ndarray_values.reshape(shape) + except ValueError: + return v.reshape(shape) + + inferred_type = lib.infer_datetimelike_array(ensure_object(v)) + + if inferred_type == "date" and convert_dates: + value = try_datetime(v) + elif inferred_type == "datetime": + value = try_datetime(v) + elif inferred_type == "timedelta": + value = try_timedelta(v) + elif inferred_type == "nat": + + # if all NaT, return as datetime + if isna(v).all(): + value = try_datetime(v) + else: + + # We have at least a NaT and a string + # try timedelta first to avoid spurious datetime conversions + # e.g. '00:00:01' is a timedelta but technically is also a datetime + value = try_timedelta(v) + if lib.infer_dtype(value, skipna=False) in ["mixed"]: + # cannot skip missing values, as NaT implies that the string + # is actually a datetime + value = try_datetime(v) + + return value + + +def maybe_cast_to_datetime(value, dtype, errors: str = "raise"): + """ try to cast the array/value to a datetimelike dtype, converting float + nan to iNaT + """ + from pandas.core.tools.timedeltas import to_timedelta + from pandas.core.tools.datetimes import to_datetime + + if dtype is not None: + if isinstance(dtype, str): + dtype = np.dtype(dtype) + + is_datetime64 = is_datetime64_dtype(dtype) + is_datetime64tz = is_datetime64tz_dtype(dtype) + is_timedelta64 = is_timedelta64_dtype(dtype) + + if is_datetime64 or is_datetime64tz or is_timedelta64: + + # Force the dtype if needed. + msg = ( + f"The '{dtype.name}' dtype has no unit. " + f"Please pass in '{dtype.name}[ns]' instead." + ) + + if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): + + # pandas supports dtype whose granularity is less than [ns] + # e.g., [ps], [fs], [as] + if dtype <= np.dtype("M8[ns]"): + if dtype.name == "datetime64": + raise ValueError(msg) + dtype = _NS_DTYPE + else: + raise TypeError(f"cannot convert datetimelike to dtype [{dtype}]") + elif is_datetime64tz: + + # our NaT doesn't support tz's + # this will coerce to DatetimeIndex with + # a matching dtype below + if is_scalar(value) and isna(value): + value = [value] + + elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): + + # pandas supports dtype whose granularity is less than [ns] + # e.g., [ps], [fs], [as] + if dtype <= np.dtype("m8[ns]"): + if dtype.name == "timedelta64": + raise ValueError(msg) + dtype = _TD_DTYPE + else: + raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]") + + if is_scalar(value): + if value == iNaT or isna(value): + value = iNaT + else: + value = np.array(value, copy=False) + + # have a scalar array-like (e.g. NaT) + if value.ndim == 0: + value = iNaT + + # we have an array of datetime or timedeltas & nulls + elif np.prod(value.shape) or not is_dtype_equal(value.dtype, dtype): + try: + if is_datetime64: + value = to_datetime(value, errors=errors) + # GH 25843: Remove tz information since the dtype + # didn't specify one + if value.tz is not None: + value = value.tz_localize(None) + value = value._values + elif is_datetime64tz: + # The string check can be removed once issue #13712 + # is solved. String data that is passed with a + # datetime64tz is assumed to be naive which should + # be localized to the timezone. + is_dt_string = is_string_dtype(value) + value = to_datetime(value, errors=errors).array + if is_dt_string: + # Strings here are naive, so directly localize + value = value.tz_localize(dtype.tz) + else: + # Numeric values are UTC at this point, + # so localize and convert + value = value.tz_localize("UTC").tz_convert(dtype.tz) + elif is_timedelta64: + value = to_timedelta(value, errors=errors)._values + except OutOfBoundsDatetime: + raise + except (AttributeError, ValueError, TypeError): + pass + + # coerce datetimelike to object + elif is_datetime64_dtype(value) and not is_datetime64_dtype(dtype): + if is_object_dtype(dtype): + if value.dtype != _NS_DTYPE: + value = value.astype(_NS_DTYPE) + ints = np.asarray(value).view("i8") + return tslib.ints_to_pydatetime(ints) + + # we have a non-castable dtype that was passed + raise TypeError(f"Cannot cast datetime64 to {dtype}") + + else: + + is_array = isinstance(value, np.ndarray) + + # catch a datetime/timedelta that is not of ns variety + # and no coercion specified + if is_array and value.dtype.kind in ["M", "m"]: + dtype = value.dtype + + if dtype.kind == "M" and dtype != _NS_DTYPE: + value = tslibs.conversion.ensure_datetime64ns(value) + + elif dtype.kind == "m" and dtype != _TD_DTYPE: + value = to_timedelta(value) + + # only do this if we have an array and the dtype of the array is not + # setup already we are not an integer/object, so don't bother with this + # conversion + elif not ( + is_array + and not ( + issubclass(value.dtype.type, np.integer) or value.dtype == np.object_ + ) + ): + value = maybe_infer_to_datetimelike(value) + + return value + + +def find_common_type(types): + """ + Find a common data type among the given dtypes. + + Parameters + ---------- + types : list of dtypes + + Returns + ------- + pandas extension or numpy dtype + + See Also + -------- + numpy.find_common_type + + """ + + if len(types) == 0: + raise ValueError("no types given") + + first = types[0] + + # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2) + # => object + if all(is_dtype_equal(first, t) for t in types[1:]): + return first + + if any(isinstance(t, ExtensionDtype) for t in types): + return np.object + + # take lowest unit + if all(is_datetime64_dtype(t) for t in types): + return np.dtype("datetime64[ns]") + if all(is_timedelta64_dtype(t) for t in types): + return np.dtype("timedelta64[ns]") + + # don't mix bool / int or float or complex + # this is different from numpy, which casts bool with float/int as int + has_bools = any(is_bool_dtype(t) for t in types) + if has_bools: + for t in types: + if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t): + return np.object + + return np.find_common_type(types, []) + + +def cast_scalar_to_array(shape, value, dtype=None): + """ + Create np.ndarray of specified shape and dtype, filled with values. + + Parameters + ---------- + shape : tuple + value : scalar value + dtype : np.dtype, optional + dtype to coerce + + Returns + ------- + ndarray of shape, filled with value, of specified / inferred dtype + + """ + + if dtype is None: + dtype, fill_value = infer_dtype_from_scalar(value) + else: + fill_value = value + + values = np.empty(shape, dtype=dtype) + values.fill(fill_value) + + return values + + +def construct_1d_arraylike_from_scalar(value, length: int, dtype): + """ + create a np.ndarray / pandas type of specified shape and dtype + filled with values + + Parameters + ---------- + value : scalar value + length : int + dtype : pandas_dtype / np.dtype + + Returns + ------- + np.ndarray / pandas type of length, filled with value + + """ + if is_extension_array_dtype(dtype): + cls = dtype.construct_array_type() + subarr = cls._from_sequence([value] * length, dtype=dtype) + + else: + if not isinstance(dtype, (np.dtype, type(np.dtype))): + dtype = dtype.dtype + + if length and is_integer_dtype(dtype) and isna(value): + # coerce if we have nan for an integer dtype + dtype = np.dtype("float64") + elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): + # we need to coerce to object dtype to avoid + # to allow numpy to take our string as a scalar value + dtype = object + if not isna(value): + value = ensure_str(value) + + subarr = np.empty(length, dtype=dtype) + subarr.fill(value) + + return subarr + + +def construct_1d_object_array_from_listlike(values): + """ + Transform any list-like object in a 1-dimensional numpy array of object + dtype. + + Parameters + ---------- + values : any iterable which has a len() + + Raises + ------ + TypeError + * If `values` does not have a len() + + Returns + ------- + 1-dimensional numpy array of dtype object + """ + # numpy will try to interpret nested lists as further dimensions, hence + # making a 1D array that contains list-likes is a bit tricky: + result = np.empty(len(values), dtype="object") + result[:] = values + return result + + +def construct_1d_ndarray_preserving_na(values, dtype=None, copy: bool = False): + """ + Construct a new ndarray, coercing `values` to `dtype`, preserving NA. + + Parameters + ---------- + values : Sequence + dtype : numpy.dtype, optional + copy : bool, default False + Note that copies may still be made with ``copy=False`` if casting + is required. + + Returns + ------- + arr : ndarray[dtype] + + Examples + -------- + >>> np.array([1.0, 2.0, None], dtype='str') + array(['1.0', '2.0', 'None'], dtype='>> construct_1d_ndarray_preserving_na([1.0, 2.0, None], dtype=np.dtype('str')) + array(['1.0', '2.0', None], dtype=object) + """ + subarr = np.array(values, dtype=dtype, copy=copy) + + if dtype is not None and dtype.kind in ("U", "S"): + # GH-21083 + # We can't just return np.array(subarr, dtype='str') since + # NumPy will convert the non-string objects into strings + # Including NA values. Se we have to go + # string -> object -> update NA, which requires an + # additional pass over the data. + na_values = isna(values) + subarr2 = subarr.astype(object) + subarr2[na_values] = np.asarray(values, dtype=object)[na_values] + subarr = subarr2 + + return subarr + + +def maybe_cast_to_integer_array(arr, dtype, copy: bool = False): + """ + Takes any dtype and returns the casted version, raising for when data is + incompatible with integer/unsigned integer dtypes. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + arr : array-like + The array to cast. + dtype : str, np.dtype + The integer dtype to cast the array to. + copy: bool, default False + Whether to make a copy of the array before returning. + + Returns + ------- + int_arr : ndarray + An array of integer or unsigned integer dtype + + Raises + ------ + OverflowError : the dtype is incompatible with the data + ValueError : loss of precision has occurred during casting + + Examples + -------- + If you try to coerce negative values to unsigned integers, it raises: + + >>> Series([-1], dtype="uint64") + Traceback (most recent call last): + ... + OverflowError: Trying to coerce negative values to unsigned integers + + Also, if you try to coerce float values to integers, it raises: + + >>> Series([1, 2, 3.5], dtype="int64") + Traceback (most recent call last): + ... + ValueError: Trying to coerce float values to integers + """ + + try: + if not hasattr(arr, "astype"): + casted = np.array(arr, dtype=dtype, copy=copy) + else: + casted = arr.astype(dtype, copy=copy) + except OverflowError: + raise OverflowError( + "The elements provided in the data cannot all be " + f"casted to the dtype {dtype}" + ) + + if np.array_equal(arr, casted): + return casted + + # We do this casting to allow for proper + # data and dtype checking. + # + # We didn't do this earlier because NumPy + # doesn't handle `uint64` correctly. + arr = np.asarray(arr) + + if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): + raise OverflowError("Trying to coerce negative values to unsigned integers") + + if is_integer_dtype(dtype) and (is_float_dtype(arr) or is_object_dtype(arr)): + raise ValueError("Trying to coerce float values to integers") diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py new file mode 100644 index 00000000..5a007f28 --- /dev/null +++ b/pandas/core/dtypes/common.py @@ -0,0 +1,1891 @@ +""" common type operations """ +from typing import Any, Callable, Union +import warnings + +import numpy as np + +from pandas._libs import algos, lib +from pandas._libs.tslibs import conversion +from pandas._typing import ArrayLike + +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + DatetimeTZDtype, + ExtensionDtype, + IntervalDtype, + PeriodDtype, + registry, +) +from pandas.core.dtypes.generic import ( + ABCCategorical, + ABCDatetimeIndex, + ABCIndexClass, + ABCPeriodArray, + ABCPeriodIndex, + ABCSeries, +) +from pandas.core.dtypes.inference import ( # noqa:F401 + is_array_like, + is_bool, + is_complex, + is_decimal, + is_dict_like, + is_file_like, + is_float, + is_hashable, + is_integer, + is_interval, + is_iterator, + is_list_like, + is_named_tuple, + is_nested_list_like, + is_number, + is_re, + is_re_compilable, + is_scalar, + is_sequence, +) + +_POSSIBLY_CAST_DTYPES = { + np.dtype(t).name + for t in [ + "O", + "int8", + "uint8", + "int16", + "uint16", + "int32", + "uint32", + "int64", + "uint64", + ] +} + +_NS_DTYPE = conversion.NS_DTYPE +_TD_DTYPE = conversion.TD_DTYPE +_INT64_DTYPE = np.dtype(np.int64) + +# oh the troubles to reduce import time +_is_scipy_sparse = None + +ensure_float64 = algos.ensure_float64 +ensure_float32 = algos.ensure_float32 + +_ensure_datetime64ns = conversion.ensure_datetime64ns +_ensure_timedelta64ns = conversion.ensure_timedelta64ns + + +def ensure_float(arr): + """ + Ensure that an array object has a float dtype if possible. + + Parameters + ---------- + arr : array-like + The array whose data type we want to enforce as float. + + Returns + ------- + float_arr : The original array cast to the float dtype if + possible. Otherwise, the original array is returned. + """ + + if issubclass(arr.dtype.type, (np.integer, np.bool_)): + arr = arr.astype(float) + return arr + + +ensure_uint64 = algos.ensure_uint64 +ensure_int64 = algos.ensure_int64 +ensure_int32 = algos.ensure_int32 +ensure_int16 = algos.ensure_int16 +ensure_int8 = algos.ensure_int8 +ensure_platform_int = algos.ensure_platform_int +ensure_object = algos.ensure_object + + +def ensure_str(value: Union[bytes, Any]) -> str: + """ + Ensure that bytes and non-strings get converted into ``str`` objects. + """ + if isinstance(value, bytes): + value = value.decode("utf-8") + elif not isinstance(value, str): + value = str(value) + return value + + +def ensure_categorical(arr): + """ + Ensure that an array-like object is a Categorical (if not already). + + Parameters + ---------- + arr : array-like + The array that we want to convert into a Categorical. + + Returns + ------- + cat_arr : The original array cast as a Categorical. If it already + is a Categorical, we return as is. + """ + + if not is_categorical(arr): + from pandas import Categorical + + arr = Categorical(arr) + return arr + + +def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.array: + """ + Ensure that an dtype array of some integer dtype + has an int64 dtype if possible. + If it's not possible, potentially because of overflow, + convert the array to float64 instead. + + Parameters + ---------- + arr : array-like + The array whose data type we want to enforce. + copy: bool + Whether to copy the original array or reuse + it in place, if possible. + + Returns + ------- + out_arr : The input array cast as int64 if + possible without overflow. + Otherwise the input array cast to float64. + + Notes + ----- + If the array is explicitly of type uint64 the type + will remain unchanged. + """ + # TODO: GH27506 potential bug with ExtensionArrays + try: + return arr.astype("int64", copy=copy, casting="safe") # type: ignore + except TypeError: + pass + try: + return arr.astype("uint64", copy=copy, casting="safe") # type: ignore + except TypeError: + if is_extension_array_dtype(arr.dtype): + return arr.to_numpy(dtype="float64", na_value=np.nan) + return arr.astype("float64", copy=copy) + + +def ensure_python_int(value: Union[int, np.integer]) -> int: + """ + Ensure that a value is a python int. + + Parameters + ---------- + value: int or numpy.integer + + Returns + ------- + int + + Raises + ------ + TypeError: if the value isn't an int or can't be converted to one. + """ + if not is_scalar(value): + raise TypeError(f"Value needs to be a scalar value, was type {type(value)}") + msg = "Wrong type {} for value {}" + try: + new_value = int(value) + assert new_value == value + except (TypeError, ValueError, AssertionError): + raise TypeError(msg.format(type(value), value)) + return new_value + + +def classes(*klasses) -> Callable: + """ evaluate if the tipo is a subclass of the klasses """ + return lambda tipo: issubclass(tipo, klasses) + + +def classes_and_not_datetimelike(*klasses) -> Callable: + """ + evaluate if the tipo is a subclass of the klasses + and not a datetimelike + """ + return lambda tipo: ( + issubclass(tipo, klasses) + and not issubclass(tipo, (np.datetime64, np.timedelta64)) + ) + + +def is_object_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the object dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the object dtype. + + Examples + -------- + >>> is_object_dtype(object) + True + >>> is_object_dtype(int) + False + >>> is_object_dtype(np.array([], dtype=object)) + True + >>> is_object_dtype(np.array([], dtype=int)) + False + >>> is_object_dtype([1, 2, 3]) + False + """ + return _is_dtype_type(arr_or_dtype, classes(np.object_)) + + +def is_sparse(arr) -> bool: + """ + Check whether an array-like is a 1-D pandas sparse array. + + Check that the one-dimensional array-like is a pandas sparse array. + Returns True if it is a pandas sparse array, not another type of + sparse array. + + Parameters + ---------- + arr : array-like + Array-like to check. + + Returns + ------- + bool + Whether or not the array-like is a pandas sparse array. + + Examples + -------- + Returns `True` if the parameter is a 1-D pandas sparse array. + + >>> is_sparse(pd.arrays.SparseArray([0, 0, 1, 0])) + True + >>> is_sparse(pd.Series(pd.arrays.SparseArray([0, 0, 1, 0]))) + True + + Returns `False` if the parameter is not sparse. + + >>> is_sparse(np.array([0, 0, 1, 0])) + False + >>> is_sparse(pd.Series([0, 1, 0, 0])) + False + + Returns `False` if the parameter is not a pandas sparse array. + + >>> from scipy.sparse import bsr_matrix + >>> is_sparse(bsr_matrix([0, 1, 0, 0])) + False + + Returns `False` if the parameter has more than one dimension. + """ + from pandas.core.arrays.sparse import SparseDtype + + dtype = getattr(arr, "dtype", arr) + return isinstance(dtype, SparseDtype) + + +def is_scipy_sparse(arr) -> bool: + """ + Check whether an array-like is a scipy.sparse.spmatrix instance. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean + Whether or not the array-like is a scipy.sparse.spmatrix instance. + + Notes + ----- + If scipy is not installed, this function will always return False. + + Examples + -------- + >>> from scipy.sparse import bsr_matrix + >>> is_scipy_sparse(bsr_matrix([1, 2, 3])) + True + >>> is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3])) + False + """ + + global _is_scipy_sparse + + if _is_scipy_sparse is None: + try: + from scipy.sparse import issparse as _is_scipy_sparse + except ImportError: + _is_scipy_sparse = lambda _: False + + assert _is_scipy_sparse is not None + return _is_scipy_sparse(arr) + + +def is_categorical(arr) -> bool: + """ + Check whether an array-like is a Categorical instance. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean + Whether or not the array-like is of a Categorical instance. + + Examples + -------- + >>> is_categorical([1, 2, 3]) + False + + Categoricals, Series Categoricals, and CategoricalIndex will return True. + + >>> cat = pd.Categorical([1, 2, 3]) + >>> is_categorical(cat) + True + >>> is_categorical(pd.Series(cat)) + True + >>> is_categorical(pd.CategoricalIndex([1, 2, 3])) + True + """ + + return isinstance(arr, ABCCategorical) or is_categorical_dtype(arr) + + +def is_datetime64_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the datetime64 dtype. + + Examples + -------- + >>> is_datetime64_dtype(object) + False + >>> is_datetime64_dtype(np.datetime64) + True + >>> is_datetime64_dtype(np.array([], dtype=int)) + False + >>> is_datetime64_dtype(np.array([], dtype=np.datetime64)) + True + >>> is_datetime64_dtype([1, 2, 3]) + False + """ + + return _is_dtype_type(arr_or_dtype, classes(np.datetime64)) + + +def is_datetime64tz_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of a DatetimeTZDtype dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of a DatetimeTZDtype dtype. + + Examples + -------- + >>> is_datetime64tz_dtype(object) + False + >>> is_datetime64tz_dtype([1, 2, 3]) + False + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive + False + >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_datetime64tz_dtype(dtype) + True + >>> is_datetime64tz_dtype(s) + True + """ + + if arr_or_dtype is None: + return False + return DatetimeTZDtype.is_dtype(arr_or_dtype) + + +def is_timedelta64_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the timedelta64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the timedelta64 dtype. + + Examples + -------- + >>> is_timedelta64_dtype(object) + False + >>> is_timedelta64_dtype(np.timedelta64) + True + >>> is_timedelta64_dtype([1, 2, 3]) + False + >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + True + >>> is_timedelta64_dtype('0 days') + False + """ + + return _is_dtype_type(arr_or_dtype, classes(np.timedelta64)) + + +def is_period_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the Period dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the Period dtype. + + Examples + -------- + >>> is_period_dtype(object) + False + >>> is_period_dtype(PeriodDtype(freq="D")) + True + >>> is_period_dtype([1, 2, 3]) + False + >>> is_period_dtype(pd.Period("2017-01-01")) + False + >>> is_period_dtype(pd.PeriodIndex([], freq="A")) + True + """ + + # TODO: Consider making Period an instance of PeriodDtype + if arr_or_dtype is None: + return False + return PeriodDtype.is_dtype(arr_or_dtype) + + +def is_interval_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the Interval dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the Interval dtype. + + Examples + -------- + >>> is_interval_dtype(object) + False + >>> is_interval_dtype(IntervalDtype()) + True + >>> is_interval_dtype([1, 2, 3]) + False + >>> + >>> interval = pd.Interval(1, 2, closed="right") + >>> is_interval_dtype(interval) + False + >>> is_interval_dtype(pd.IntervalIndex([interval])) + True + """ + + # TODO: Consider making Interval an instance of IntervalDtype + if arr_or_dtype is None: + return False + return IntervalDtype.is_dtype(arr_or_dtype) + + +def is_categorical_dtype(arr_or_dtype) -> bool: + """ + Check whether an array-like or dtype is of the Categorical dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype to check. + + Returns + ------- + boolean + Whether or not the array-like or dtype is of the Categorical dtype. + + Examples + -------- + >>> is_categorical_dtype(object) + False + >>> is_categorical_dtype(CategoricalDtype()) + True + >>> is_categorical_dtype([1, 2, 3]) + False + >>> is_categorical_dtype(pd.Categorical([1, 2, 3])) + True + >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) + True + """ + + if arr_or_dtype is None: + return False + return CategoricalDtype.is_dtype(arr_or_dtype) + + +def is_string_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the string dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the string dtype. + + Examples + -------- + >>> is_string_dtype(str) + True + >>> is_string_dtype(object) + True + >>> is_string_dtype(int) + False + >>> + >>> is_string_dtype(np.array(['a', 'b'])) + True + >>> is_string_dtype(pd.Series([1, 2])) + False + """ + + # TODO: gh-15585: consider making the checks stricter. + def condition(dtype) -> bool: + return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype) + + def is_excluded_dtype(dtype) -> bool: + """ + These have kind = "O" but aren't string dtypes so need to be explicitly excluded + """ + is_excluded_checks = (is_period_dtype, is_interval_dtype) + return any(is_excluded(dtype) for is_excluded in is_excluded_checks) + + return _is_dtype(arr_or_dtype, condition) + + +def is_period_arraylike(arr) -> bool: + """ + Check whether an array-like is a periodical array-like or PeriodIndex. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean + Whether or not the array-like is a periodical array-like or + PeriodIndex instance. + + Examples + -------- + >>> is_period_arraylike([1, 2, 3]) + False + >>> is_period_arraylike(pd.Index([1, 2, 3])) + False + >>> is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D")) + True + """ + + if isinstance(arr, (ABCPeriodIndex, ABCPeriodArray)): + return True + elif isinstance(arr, (np.ndarray, ABCSeries)): + return is_period_dtype(arr.dtype) + return getattr(arr, "inferred_type", None) == "period" + + +def is_datetime_arraylike(arr) -> bool: + """ + Check whether an array-like is a datetime array-like or DatetimeIndex. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean + Whether or not the array-like is a datetime array-like or + DatetimeIndex. + + Examples + -------- + >>> is_datetime_arraylike([1, 2, 3]) + False + >>> is_datetime_arraylike(pd.Index([1, 2, 3])) + False + >>> is_datetime_arraylike(pd.DatetimeIndex([1, 2, 3])) + True + """ + + if isinstance(arr, ABCDatetimeIndex): + return True + elif isinstance(arr, (np.ndarray, ABCSeries)): + return ( + is_object_dtype(arr.dtype) + and lib.infer_dtype(arr, skipna=False) == "datetime" + ) + return getattr(arr, "inferred_type", None) == "datetime" + + +def is_dtype_equal(source, target) -> bool: + """ + Check if two dtypes are equal. + + Parameters + ---------- + source : The first dtype to compare + target : The second dtype to compare + + Returns + ------- + boolean + Whether or not the two dtypes are equal. + + Examples + -------- + >>> is_dtype_equal(int, float) + False + >>> is_dtype_equal("int", int) + True + >>> is_dtype_equal(object, "category") + False + >>> is_dtype_equal(CategoricalDtype(), "category") + True + >>> is_dtype_equal(DatetimeTZDtype(), "datetime64") + False + """ + + try: + source = _get_dtype(source) + target = _get_dtype(target) + return source == target + except (TypeError, AttributeError): + + # invalid comparison + # object == category will hit this + return False + + +def is_any_int_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of an integer dtype. + + In this function, timedelta64 instances are also considered "any-integer" + type objects and will return True. + + This function is internal and should not be exposed in the public API. + + .. versionchanged:: 0.24.0 + + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of an integer dtype. + + Examples + -------- + >>> is_any_int_dtype(str) + False + >>> is_any_int_dtype(int) + True + >>> is_any_int_dtype(float) + False + >>> is_any_int_dtype(np.uint64) + True + >>> is_any_int_dtype(np.datetime64) + False + >>> is_any_int_dtype(np.timedelta64) + True + >>> is_any_int_dtype(np.array(['a', 'b'])) + False + >>> is_any_int_dtype(pd.Series([1, 2])) + True + >>> is_any_int_dtype(np.array([], dtype=np.timedelta64)) + True + >>> is_any_int_dtype(pd.Index([1, 2.])) # float + False + """ + + return _is_dtype_type(arr_or_dtype, classes(np.integer, np.timedelta64)) + + +def is_integer_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of an integer dtype. + + Unlike in `in_any_int_dtype`, timedelta64 instances will return False. + + .. versionchanged:: 0.24.0 + + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of an integer dtype and + not an instance of timedelta64. + + Examples + -------- + >>> is_integer_dtype(str) + False + >>> is_integer_dtype(int) + True + >>> is_integer_dtype(float) + False + >>> is_integer_dtype(np.uint64) + True + >>> is_integer_dtype('int8') + True + >>> is_integer_dtype('Int8') + True + >>> is_integer_dtype(pd.Int8Dtype) + True + >>> is_integer_dtype(np.datetime64) + False + >>> is_integer_dtype(np.timedelta64) + False + >>> is_integer_dtype(np.array(['a', 'b'])) + False + >>> is_integer_dtype(pd.Series([1, 2])) + True + >>> is_integer_dtype(np.array([], dtype=np.timedelta64)) + False + >>> is_integer_dtype(pd.Index([1, 2.])) # float + False + """ + + return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.integer)) + + +def is_signed_integer_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a signed integer dtype. + + Unlike in `in_any_int_dtype`, timedelta64 instances will return False. + + .. versionchanged:: 0.24.0 + + The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered + as integer by this function. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a signed integer dtype + and not an instance of timedelta64. + + Examples + -------- + >>> is_signed_integer_dtype(str) + False + >>> is_signed_integer_dtype(int) + True + >>> is_signed_integer_dtype(float) + False + >>> is_signed_integer_dtype(np.uint64) # unsigned + False + >>> is_signed_integer_dtype('int8') + True + >>> is_signed_integer_dtype('Int8') + True + >>> is_signed_dtype(pd.Int8Dtype) + True + >>> is_signed_integer_dtype(np.datetime64) + False + >>> is_signed_integer_dtype(np.timedelta64) + False + >>> is_signed_integer_dtype(np.array(['a', 'b'])) + False + >>> is_signed_integer_dtype(pd.Series([1, 2])) + True + >>> is_signed_integer_dtype(np.array([], dtype=np.timedelta64)) + False + >>> is_signed_integer_dtype(pd.Index([1, 2.])) # float + False + >>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned + False + """ + + return _is_dtype_type(arr_or_dtype, classes_and_not_datetimelike(np.signedinteger)) + + +def is_unsigned_integer_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of an unsigned integer dtype. + + .. versionchanged:: 0.24.0 + + The nullable Integer dtypes (e.g. pandas.UInt64Dtype) are also + considered as integer by this function. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of an unsigned integer dtype. + + Examples + -------- + >>> is_unsigned_integer_dtype(str) + False + >>> is_unsigned_integer_dtype(int) # signed + False + >>> is_unsigned_integer_dtype(float) + False + >>> is_unsigned_integer_dtype(np.uint64) + True + >>> is_unsigned_integer_dtype('uint8') + True + >>> is_unsigned_integer_dtype('UInt8') + True + >>> is_unsigned_integer_dtype(pd.UInt8Dtype) + True + >>> is_unsigned_integer_dtype(np.array(['a', 'b'])) + False + >>> is_unsigned_integer_dtype(pd.Series([1, 2])) # signed + False + >>> is_unsigned_integer_dtype(pd.Index([1, 2.])) # float + False + >>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32)) + True + """ + return _is_dtype_type( + arr_or_dtype, classes_and_not_datetimelike(np.unsignedinteger) + ) + + +def is_int64_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the int64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the int64 dtype. + + Notes + ----- + Depending on system architecture, the return value of `is_int64_dtype( + int)` will be True if the OS uses 64-bit integers and False if the OS + uses 32-bit integers. + + Examples + -------- + >>> is_int64_dtype(str) + False + >>> is_int64_dtype(np.int32) + False + >>> is_int64_dtype(np.int64) + True + >>> is_int64_dtype('int8') + False + >>> is_int64_dtype('Int8') + False + >>> is_int64_dtype(pd.Int64Dtype) + True + >>> is_int64_dtype(float) + False + >>> is_int64_dtype(np.uint64) # unsigned + False + >>> is_int64_dtype(np.array(['a', 'b'])) + False + >>> is_int64_dtype(np.array([1, 2], dtype=np.int64)) + True + >>> is_int64_dtype(pd.Index([1, 2.])) # float + False + >>> is_int64_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned + False + """ + + return _is_dtype_type(arr_or_dtype, classes(np.int64)) + + +def is_datetime64_any_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the datetime64 dtype. + + Examples + -------- + >>> is_datetime64_any_dtype(str) + False + >>> is_datetime64_any_dtype(int) + False + >>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive + True + >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) + True + >>> is_datetime64_any_dtype(np.array(['a', 'b'])) + False + >>> is_datetime64_any_dtype(np.array([1, 2])) + False + >>> is_datetime64_any_dtype(np.array([], dtype=np.datetime64)) + True + >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], + dtype=np.datetime64)) + True + """ + + if arr_or_dtype is None: + return False + return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype) + + +def is_datetime64_ns_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the datetime64[ns] dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the datetime64[ns] dtype. + + Examples + -------- + >>> is_datetime64_ns_dtype(str) + False + >>> is_datetime64_ns_dtype(int) + False + >>> is_datetime64_ns_dtype(np.datetime64) # no unit + False + >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) + True + >>> is_datetime64_ns_dtype(np.array(['a', 'b'])) + False + >>> is_datetime64_ns_dtype(np.array([1, 2])) + False + >>> is_datetime64_ns_dtype(np.array([], dtype=np.datetime64)) # no unit + False + >>> is_datetime64_ns_dtype(np.array([], + dtype="datetime64[ps]")) # wrong unit + False + >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], + dtype=np.datetime64)) # has 'ns' unit + True + """ + + if arr_or_dtype is None: + return False + try: + tipo = _get_dtype(arr_or_dtype) + except TypeError: + if is_datetime64tz_dtype(arr_or_dtype): + tipo = _get_dtype(arr_or_dtype.dtype) + else: + return False + return tipo == _NS_DTYPE or getattr(tipo, "base", None) == _NS_DTYPE + + +def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of the timedelta64[ns] dtype. + + This is a very specific dtype, so generic ones like `np.timedelta64` + will return False if passed into this function. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the timedelta64[ns] dtype. + + Examples + -------- + >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]')) + True + >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency + False + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) + True + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) + False + """ + return _is_dtype(arr_or_dtype, lambda dtype: dtype == _TD_DTYPE) + + +def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of + a timedelta64 or datetime64 dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a timedelta64, + or datetime64 dtype. + + Examples + -------- + >>> is_datetime_or_timedelta_dtype(str) + False + >>> is_datetime_or_timedelta_dtype(int) + False + >>> is_datetime_or_timedelta_dtype(np.datetime64) + True + >>> is_datetime_or_timedelta_dtype(np.timedelta64) + True + >>> is_datetime_or_timedelta_dtype(np.array(['a', 'b'])) + False + >>> is_datetime_or_timedelta_dtype(pd.Series([1, 2])) + False + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) + True + >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) + True + """ + + return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64)) + + +def _is_unorderable_exception(e: TypeError) -> bool: + """ + Check if the exception raised is an unorderable exception. + + Parameters + ---------- + e : Exception or sub-class + The exception object to check. + + Returns + ------- + bool + Whether or not the exception raised is an unorderable exception. + """ + return "'>' not supported between instances of" in str(e) + + +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_numeric_v_string_like(a, b): + """ + Check if we are comparing a string-like object to a numeric ndarray. + NumPy doesn't like to compare such objects, especially numeric arrays + and scalar string-likes. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a string-like object to a numeric array. + + Examples + -------- + >>> is_numeric_v_string_like(1, 1) + False + >>> is_numeric_v_string_like("foo", "foo") + False + >>> is_numeric_v_string_like(1, "foo") # non-array numeric + False + >>> is_numeric_v_string_like(np.array([1]), "foo") + True + >>> is_numeric_v_string_like("foo", np.array([1])) # symmetric check + True + >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + True + >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + True + >>> is_numeric_v_string_like(np.array([1]), np.array([2])) + False + >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + False + """ + + is_a_array = isinstance(a, np.ndarray) + is_b_array = isinstance(b, np.ndarray) + + is_a_numeric_array = is_a_array and is_numeric_dtype(a) + is_b_numeric_array = is_b_array and is_numeric_dtype(b) + is_a_string_array = is_a_array and is_string_like_dtype(a) + is_b_string_array = is_b_array and is_string_like_dtype(b) + + is_a_scalar_string_like = not is_a_array and isinstance(a, str) + is_b_scalar_string_like = not is_b_array and isinstance(b, str) + + return ( + (is_a_numeric_array and is_b_scalar_string_like) + or (is_b_numeric_array and is_a_scalar_string_like) + or (is_a_numeric_array and is_b_string_array) + or (is_b_numeric_array and is_a_string_array) + ) + + +# This exists to silence numpy deprecation warnings, see GH#29553 +def is_datetimelike_v_numeric(a, b): + """ + Check if we are comparing a datetime-like object to a numeric object. + By "numeric," we mean an object that is either of an int or float dtype. + + Parameters + ---------- + a : array-like, scalar + The first object to check. + b : array-like, scalar + The second object to check. + + Returns + ------- + boolean + Whether we return a comparing a datetime-like to a numeric object. + + Examples + -------- + >>> dt = np.datetime64(pd.datetime(2017, 1, 1)) + >>> + >>> is_datetimelike_v_numeric(1, 1) + False + >>> is_datetimelike_v_numeric(dt, dt) + False + >>> is_datetimelike_v_numeric(1, dt) + True + >>> is_datetimelike_v_numeric(dt, 1) # symmetric check + True + >>> is_datetimelike_v_numeric(np.array([dt]), 1) + True + >>> is_datetimelike_v_numeric(np.array([1]), dt) + True + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + True + >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) + False + >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + False + """ + + if not hasattr(a, "dtype"): + a = np.asarray(a) + if not hasattr(b, "dtype"): + b = np.asarray(b) + + def is_numeric(x): + """ + Check if an object has a numeric dtype (i.e. integer or float). + """ + return is_integer_dtype(x) or is_float_dtype(x) + + return (needs_i8_conversion(a) and is_numeric(b)) or ( + needs_i8_conversion(b) and is_numeric(a) + ) + + +def needs_i8_conversion(arr_or_dtype) -> bool: + """ + Check whether the array or dtype should be converted to int64. + + An array-like or dtype "needs" such a conversion if the array-like + or dtype is of a datetime-like dtype + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype should be converted to int64. + + Examples + -------- + >>> needs_i8_conversion(str) + False + >>> needs_i8_conversion(np.int64) + False + >>> needs_i8_conversion(np.datetime64) + True + >>> needs_i8_conversion(np.array(['a', 'b'])) + False + >>> needs_i8_conversion(pd.Series([1, 2])) + False + >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + True + >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + """ + + if arr_or_dtype is None: + return False + return ( + is_datetime_or_timedelta_dtype(arr_or_dtype) + or is_datetime64tz_dtype(arr_or_dtype) + or is_period_dtype(arr_or_dtype) + ) + + +def is_numeric_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a numeric dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a numeric dtype. + + Examples + -------- + >>> is_numeric_dtype(str) + False + >>> is_numeric_dtype(int) + True + >>> is_numeric_dtype(float) + True + >>> is_numeric_dtype(np.uint64) + True + >>> is_numeric_dtype(np.datetime64) + False + >>> is_numeric_dtype(np.timedelta64) + False + >>> is_numeric_dtype(np.array(['a', 'b'])) + False + >>> is_numeric_dtype(pd.Series([1, 2])) + True + >>> is_numeric_dtype(pd.Index([1, 2.])) + True + >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) + False + """ + + return _is_dtype_type( + arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_) + ) + + +def is_string_like_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a string-like dtype. + + Unlike `is_string_dtype`, the object dtype is excluded because it + is a mixed dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of the string dtype. + + Examples + -------- + >>> is_string_like_dtype(str) + True + >>> is_string_like_dtype(object) + False + >>> is_string_like_dtype(np.array(['a', 'b'])) + True + >>> is_string_like_dtype(pd.Series([1, 2])) + False + """ + + return _is_dtype(arr_or_dtype, lambda dtype: dtype.kind in ("S", "U")) + + +def is_float_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a float dtype. + + This function is internal and should not be exposed in the public API. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a float dtype. + + Examples + -------- + >>> is_float_dtype(str) + False + >>> is_float_dtype(int) + False + >>> is_float_dtype(float) + True + >>> is_float_dtype(np.array(['a', 'b'])) + False + >>> is_float_dtype(pd.Series([1, 2])) + False + >>> is_float_dtype(pd.Index([1, 2.])) + True + """ + return _is_dtype_type(arr_or_dtype, classes(np.floating)) + + +def is_bool_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a boolean dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a boolean dtype. + + Notes + ----- + An ExtensionArray is considered boolean when the ``_is_boolean`` + attribute is set to True. + + Examples + -------- + >>> is_bool_dtype(str) + False + >>> is_bool_dtype(int) + False + >>> is_bool_dtype(bool) + True + >>> is_bool_dtype(np.bool) + True + >>> is_bool_dtype(np.array(['a', 'b'])) + False + >>> is_bool_dtype(pd.Series([1, 2])) + False + >>> is_bool_dtype(np.array([True, False])) + True + >>> is_bool_dtype(pd.Categorical([True, False])) + True + >>> is_bool_dtype(pd.arrays.SparseArray([True, False])) + True + """ + if arr_or_dtype is None: + return False + try: + dtype = _get_dtype(arr_or_dtype) + except TypeError: + return False + + if isinstance(arr_or_dtype, CategoricalDtype): + arr_or_dtype = arr_or_dtype.categories + # now we use the special definition for Index + + if isinstance(arr_or_dtype, ABCIndexClass): + + # TODO(jreback) + # we don't have a boolean Index class + # so its object, we need to infer to + # guess this + return arr_or_dtype.is_object and arr_or_dtype.inferred_type == "boolean" + elif is_extension_array_dtype(arr_or_dtype): + dtype = getattr(arr_or_dtype, "dtype", arr_or_dtype) + return dtype._is_boolean + + return issubclass(dtype.type, np.bool_) + + +def is_extension_type(arr) -> bool: + """ + Check whether an array-like is of a pandas extension class instance. + + .. deprecated:: 1.0.0 + Use ``is_extension_array_dtype`` instead. + + Extension classes include categoricals, pandas sparse objects (i.e. + classes represented within the pandas library and not ones external + to it like scipy sparse matrices), and datetime-like arrays. + + Parameters + ---------- + arr : array-like + The array-like to check. + + Returns + ------- + boolean + Whether or not the array-like is of a pandas extension class instance. + + Examples + -------- + >>> is_extension_type([1, 2, 3]) + False + >>> is_extension_type(np.array([1, 2, 3])) + False + >>> + >>> cat = pd.Categorical([1, 2, 3]) + >>> + >>> is_extension_type(cat) + True + >>> is_extension_type(pd.Series(cat)) + True + >>> is_extension_type(pd.arrays.SparseArray([1, 2, 3])) + True + >>> from scipy.sparse import bsr_matrix + >>> is_extension_type(bsr_matrix([1, 2, 3])) + False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3])) + False + >>> is_extension_type(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern")) + True + >>> + >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern") + >>> s = pd.Series([], dtype=dtype) + >>> is_extension_type(s) + True + """ + warnings.warn( + "'is_extension_type' is deprecated and will be removed in a future " + "version. Use 'is_extension_array_dtype' instead.", + FutureWarning, + stacklevel=2, + ) + + if is_categorical(arr): + return True + elif is_sparse(arr): + return True + elif is_datetime64tz_dtype(arr): + return True + return False + + +def is_extension_array_dtype(arr_or_dtype) -> bool: + """ + Check if an object is a pandas extension array type. + + See the :ref:`Use Guide ` for more. + + Parameters + ---------- + arr_or_dtype : object + For array-like input, the ``.dtype`` attribute will + be extracted. + + Returns + ------- + bool + Whether the `arr_or_dtype` is an extension array type. + + Notes + ----- + This checks whether an object implements the pandas extension + array interface. In pandas, this includes: + + * Categorical + * Sparse + * Interval + * Period + * DatetimeArray + * TimedeltaArray + + Third-party libraries may implement arrays or types satisfying + this interface as well. + + Examples + -------- + >>> from pandas.api.types import is_extension_array_dtype + >>> arr = pd.Categorical(['a', 'b']) + >>> is_extension_array_dtype(arr) + True + >>> is_extension_array_dtype(arr.dtype) + True + + >>> arr = np.array(['a', 'b']) + >>> is_extension_array_dtype(arr.dtype) + False + """ + dtype = getattr(arr_or_dtype, "dtype", arr_or_dtype) + return isinstance(dtype, ExtensionDtype) or registry.find(dtype) is not None + + +def is_complex_dtype(arr_or_dtype) -> bool: + """ + Check whether the provided array or dtype is of a complex dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array or dtype to check. + + Returns + ------- + boolean + Whether or not the array or dtype is of a complex dtype. + + Examples + -------- + >>> is_complex_dtype(str) + False + >>> is_complex_dtype(int) + False + >>> is_complex_dtype(np.complex) + True + >>> is_complex_dtype(np.array(['a', 'b'])) + False + >>> is_complex_dtype(pd.Series([1, 2])) + False + >>> is_complex_dtype(np.array([1 + 1j, 5])) + True + """ + + return _is_dtype_type(arr_or_dtype, classes(np.complexfloating)) + + +def _is_dtype(arr_or_dtype, condition) -> bool: + """ + Return a boolean if the condition is satisfied for the arr_or_dtype. + + Parameters + ---------- + arr_or_dtype : array-like, str, np.dtype, or ExtensionArrayType + The array-like or dtype object whose dtype we want to extract. + condition : callable[Union[np.dtype, ExtensionDtype]] + + Returns + ------- + bool + + """ + + if arr_or_dtype is None: + return False + try: + dtype = _get_dtype(arr_or_dtype) + except (TypeError, ValueError, UnicodeEncodeError): + return False + return condition(dtype) + + +def _get_dtype(arr_or_dtype): + """ + Get the dtype instance associated with an array + or dtype object. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype object whose dtype we want to extract. + + Returns + ------- + obj_dtype : The extract dtype instance from the + passed in array or dtype object. + + Raises + ------ + TypeError : The passed in object is None. + """ + + if arr_or_dtype is None: + raise TypeError("Cannot deduce dtype from null object") + + # fastpath + elif isinstance(arr_or_dtype, np.dtype): + return arr_or_dtype + elif isinstance(arr_or_dtype, type): + return np.dtype(arr_or_dtype) + + # if we have an array-like + elif hasattr(arr_or_dtype, "dtype"): + arr_or_dtype = arr_or_dtype.dtype + + return pandas_dtype(arr_or_dtype) + + +def _is_dtype_type(arr_or_dtype, condition) -> bool: + """ + Return a boolean if the condition is satisfied for the arr_or_dtype. + + Parameters + ---------- + arr_or_dtype : array-like + The array-like or dtype object whose dtype we want to extract. + condition : callable[Union[np.dtype, ExtensionDtypeType]] + + Returns + ------- + bool : if the condition is satisfied for the arr_or_dtype + """ + + if arr_or_dtype is None: + return condition(type(None)) + + # fastpath + if isinstance(arr_or_dtype, np.dtype): + return condition(arr_or_dtype.type) + elif isinstance(arr_or_dtype, type): + if issubclass(arr_or_dtype, ExtensionDtype): + arr_or_dtype = arr_or_dtype.type + return condition(np.dtype(arr_or_dtype).type) + + # if we have an array-like + if hasattr(arr_or_dtype, "dtype"): + arr_or_dtype = arr_or_dtype.dtype + + # we are not possibly a dtype + elif is_list_like(arr_or_dtype): + return condition(type(None)) + + try: + tipo = pandas_dtype(arr_or_dtype).type + except (TypeError, ValueError, UnicodeEncodeError): + if is_scalar(arr_or_dtype): + return condition(type(None)) + + return False + + return condition(tipo) + + +def infer_dtype_from_object(dtype): + """ + Get a numpy dtype.type-style object for a dtype object. + + This methods also includes handling of the datetime64[ns] and + datetime64[ns, TZ] objects. + + If no dtype can be found, we return ``object``. + + Parameters + ---------- + dtype : dtype, type + The dtype object whose numpy dtype.type-style + object we want to extract. + + Returns + ------- + dtype_object : The extracted numpy dtype.type-style object. + """ + + if isinstance(dtype, type) and issubclass(dtype, np.generic): + # Type object from a dtype + return dtype + elif isinstance(dtype, (np.dtype, ExtensionDtype)): + # dtype object + try: + _validate_date_like_dtype(dtype) + except TypeError: + # Should still pass if we don't have a date-like + pass + return dtype.type + + try: + dtype = pandas_dtype(dtype) + except TypeError: + pass + + if is_extension_array_dtype(dtype): + return dtype.type + elif isinstance(dtype, str): + + # TODO(jreback) + # should deprecate these + if dtype in ["datetimetz", "datetime64tz"]: + return DatetimeTZDtype.type + elif dtype in ["period"]: + raise NotImplementedError + + if dtype == "datetime" or dtype == "timedelta": + dtype += "64" + try: + return infer_dtype_from_object(getattr(np, dtype)) + except (AttributeError, TypeError): + # Handles cases like _get_dtype(int) i.e., + # Python objects that are valid dtypes + # (unlike user-defined types, in general) + # + # TypeError handles the float16 type code of 'e' + # further handle internal types + pass + + return infer_dtype_from_object(np.dtype(dtype)) + + +def _validate_date_like_dtype(dtype) -> None: + """ + Check whether the dtype is a date-like dtype. Raises an error if invalid. + + Parameters + ---------- + dtype : dtype, type + The dtype to check. + + Raises + ------ + TypeError : The dtype could not be casted to a date-like dtype. + ValueError : The dtype is an illegal date-like dtype (e.g. the + the frequency provided is too specific) + """ + + try: + typ = np.datetime_data(dtype)[0] + except ValueError as e: + raise TypeError(e) + if typ != "generic" and typ != "ns": + raise ValueError( + f"{repr(dtype.name)} is too specific of a frequency, " + f"try passing {repr(dtype.type.__name__)}" + ) + + +def pandas_dtype(dtype): + """ + Convert input into a pandas only dtype object or a numpy dtype object. + + Parameters + ---------- + dtype : object to be converted + + Returns + ------- + np.dtype or a pandas dtype + + Raises + ------ + TypeError if not a dtype + """ + # short-circuit + if isinstance(dtype, np.ndarray): + return dtype.dtype + elif isinstance(dtype, (np.dtype, ExtensionDtype)): + return dtype + + # registered extension types + result = registry.find(dtype) + if result is not None: + return result + + # try a numpy dtype + # raise a consistent TypeError if failed + try: + npdtype = np.dtype(dtype) + except SyntaxError: + # np.dtype uses `eval` which can raise SyntaxError + raise TypeError(f"data type '{dtype}' not understood") + + # Any invalid dtype (such as pd.Timestamp) should raise an error. + # np.dtype(invalid_type).kind = 0 for such objects. However, this will + # also catch some valid dtypes such as object, np.object_ and 'object' + # which we safeguard against by catching them earlier and returning + # np.dtype(valid_dtype) before this condition is evaluated. + if is_hashable(dtype) and dtype in [object, np.object_, "object", "O"]: + # check hashability to avoid errors/DeprecationWarning when we get + # here and `dtype` is an array + return npdtype + elif npdtype.kind == "O": + raise TypeError(f"dtype '{dtype}' not understood") + + return npdtype diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py new file mode 100644 index 00000000..cd4b5af4 --- /dev/null +++ b/pandas/core/dtypes/concat.py @@ -0,0 +1,481 @@ +""" +Utility functions related to concat +""" + +import numpy as np + +from pandas._libs import tslib, tslibs + +from pandas.core.dtypes.common import ( + _NS_DTYPE, + _TD_DTYPE, + is_bool_dtype, + is_categorical_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_object_dtype, + is_sparse, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ( + ABCCategoricalIndex, + ABCDatetimeArray, + ABCIndexClass, + ABCRangeIndex, + ABCSeries, +) + + +def get_dtype_kinds(l): + """ + Parameters + ---------- + l : list of arrays + + Returns + ------- + a set of kinds that exist in this list of arrays + """ + + typs = set() + for arr in l: + + dtype = arr.dtype + if is_categorical_dtype(dtype): + typ = "category" + elif is_sparse(arr): + typ = "sparse" + elif isinstance(arr, ABCRangeIndex): + typ = "range" + elif is_datetime64tz_dtype(arr): + # if to_concat contains different tz, + # the result must be object dtype + typ = str(arr.dtype) + elif is_datetime64_dtype(dtype): + typ = "datetime" + elif is_timedelta64_dtype(dtype): + typ = "timedelta" + elif is_object_dtype(dtype): + typ = "object" + elif is_bool_dtype(dtype): + typ = "bool" + elif is_extension_array_dtype(dtype): + typ = str(arr.dtype) + else: + typ = dtype.kind + typs.add(typ) + return typs + + +def concat_compat(to_concat, axis: int = 0): + """ + provide concatenation of an array of arrays each of which is a single + 'normalized' dtypes (in that for example, if it's object, then it is a + non-datetimelike and provide a combined dtype for the resulting array that + preserves the overall dtype if possible) + + Parameters + ---------- + to_concat : array of arrays + axis : axis to provide concatenation + + Returns + ------- + a single array, preserving the combined dtypes + """ + + # filter empty arrays + # 1-d dtypes always are included here + def is_nonempty(x) -> bool: + if x.ndim <= axis: + return True + return x.shape[axis] > 0 + + # If all arrays are empty, there's nothing to convert, just short-cut to + # the concatenation, #3121. + # + # Creating an empty array directly is tempting, but the winnings would be + # marginal given that it would still require shape & dtype calculation and + # np.concatenate which has them both implemented is compiled. + + typs = get_dtype_kinds(to_concat) + _contains_datetime = any(typ.startswith("datetime") for typ in typs) + _contains_period = any(typ.startswith("period") for typ in typs) + + if "category" in typs: + # this must be prior to concat_datetime, + # to support Categorical + datetime-like + return concat_categorical(to_concat, axis=axis) + + elif _contains_datetime or "timedelta" in typs or _contains_period: + return concat_datetime(to_concat, axis=axis, typs=typs) + + # these are mandated to handle empties as well + elif "sparse" in typs: + return _concat_sparse(to_concat, axis=axis, typs=typs) + + all_empty = all(not is_nonempty(x) for x in to_concat) + if any(is_extension_array_dtype(x) for x in to_concat) and axis == 1: + to_concat = [np.atleast_2d(x.astype("object")) for x in to_concat] + + if all_empty: + # we have all empties, but may need to coerce the result dtype to + # object if we have non-numeric type operands (numpy would otherwise + # cast this to float) + typs = get_dtype_kinds(to_concat) + if len(typs) != 1: + + if not len(typs - {"i", "u", "f"}) or not len(typs - {"bool", "i", "u"}): + # let numpy coerce + pass + else: + # coerce to object + to_concat = [x.astype("object") for x in to_concat] + + return np.concatenate(to_concat, axis=axis) + + +def concat_categorical(to_concat, axis: int = 0): + """Concatenate an object/categorical array of arrays, each of which is a + single dtype + + Parameters + ---------- + to_concat : array of arrays + axis : int + Axis to provide concatenation in the current implementation this is + always 0, e.g. we only have 1D categoricals + + Returns + ------- + Categorical + A single array, preserving the combined dtypes + """ + + # we could have object blocks and categoricals here + # if we only have a single categoricals then combine everything + # else its a non-compat categorical + categoricals = [x for x in to_concat if is_categorical_dtype(x.dtype)] + + # validate the categories + if len(categoricals) != len(to_concat): + pass + else: + # when all categories are identical + first = to_concat[0] + if all(first.is_dtype_equal(other) for other in to_concat[1:]): + return union_categoricals(categoricals) + + # extract the categoricals & coerce to object if needed + to_concat = [ + x._internal_get_values() + if is_categorical_dtype(x.dtype) + else np.asarray(x).ravel() + if not is_datetime64tz_dtype(x) + else np.asarray(x.astype(object)) + for x in to_concat + ] + result = concat_compat(to_concat) + if axis == 1: + result = result.reshape(1, len(result)) + return result + + +def union_categoricals( + to_union, sort_categories: bool = False, ignore_order: bool = False +): + """ + Combine list-like of Categorical-like, unioning categories. + + All categories must have the same dtype. + + Parameters + ---------- + to_union : list-like + Categorical, CategoricalIndex, or Series with dtype='category'. + sort_categories : bool, default False + If true, resulting categories will be lexsorted, otherwise + they will be ordered as they appear in the data. + ignore_order : bool, default False + If true, the ordered attribute of the Categoricals will be ignored. + Results in an unordered categorical. + + Returns + ------- + Categorical + + Raises + ------ + TypeError + - all inputs do not have the same dtype + - all inputs do not have the same ordered property + - all inputs are ordered and their categories are not identical + - sort_categories=True and Categoricals are ordered + ValueError + Empty list of categoricals passed + + Notes + ----- + + To learn more about categories, see `link + `__ + + Examples + -------- + + >>> from pandas.api.types import union_categoricals + + If you want to combine categoricals that do not necessarily have + the same categories, `union_categoricals` will combine a list-like + of categoricals. The new categories will be the union of the + categories being combined. + + >>> a = pd.Categorical(["b", "c"]) + >>> b = pd.Categorical(["a", "b"]) + >>> union_categoricals([a, b]) + [b, c, a, b] + Categories (3, object): [b, c, a] + + By default, the resulting categories will be ordered as they appear + in the `categories` of the data. If you want the categories to be + lexsorted, use `sort_categories=True` argument. + + >>> union_categoricals([a, b], sort_categories=True) + [b, c, a, b] + Categories (3, object): [a, b, c] + + `union_categoricals` also works with the case of combining two + categoricals of the same categories and order information (e.g. what + you could also `append` for). + + >>> a = pd.Categorical(["a", "b"], ordered=True) + >>> b = pd.Categorical(["a", "b", "a"], ordered=True) + >>> union_categoricals([a, b]) + [a, b, a, b, a] + Categories (2, object): [a < b] + + Raises `TypeError` because the categories are ordered and not identical. + + >>> a = pd.Categorical(["a", "b"], ordered=True) + >>> b = pd.Categorical(["a", "b", "c"], ordered=True) + >>> union_categoricals([a, b]) + TypeError: to union ordered Categoricals, all categories must be the same + + New in version 0.20.0 + + Ordered categoricals with different categories or orderings can be + combined by using the `ignore_ordered=True` argument. + + >>> a = pd.Categorical(["a", "b", "c"], ordered=True) + >>> b = pd.Categorical(["c", "b", "a"], ordered=True) + >>> union_categoricals([a, b], ignore_order=True) + [a, b, c, c, b, a] + Categories (3, object): [a, b, c] + + `union_categoricals` also works with a `CategoricalIndex`, or `Series` + containing categorical data, but note that the resulting array will + always be a plain `Categorical` + + >>> a = pd.Series(["b", "c"], dtype='category') + >>> b = pd.Series(["a", "b"], dtype='category') + >>> union_categoricals([a, b]) + [b, c, a, b] + Categories (3, object): [b, c, a] + """ + from pandas import Index, Categorical + from pandas.core.arrays.categorical import _recode_for_categories + + if len(to_union) == 0: + raise ValueError("No Categoricals to union") + + def _maybe_unwrap(x): + if isinstance(x, (ABCCategoricalIndex, ABCSeries)): + return x.values + elif isinstance(x, Categorical): + return x + else: + raise TypeError("all components to combine must be Categorical") + + to_union = [_maybe_unwrap(x) for x in to_union] + first = to_union[0] + + if not all( + is_dtype_equal(other.categories.dtype, first.categories.dtype) + for other in to_union[1:] + ): + raise TypeError("dtype of categories must be the same") + + ordered = False + if all(first.is_dtype_equal(other) for other in to_union[1:]): + # identical categories - fastpath + categories = first.categories + ordered = first.ordered + + if all(first.categories.equals(other.categories) for other in to_union[1:]): + new_codes = np.concatenate([c.codes for c in to_union]) + else: + codes = [first.codes] + [ + _recode_for_categories(other.codes, other.categories, first.categories) + for other in to_union[1:] + ] + new_codes = np.concatenate(codes) + + if sort_categories and not ignore_order and ordered: + raise TypeError("Cannot use sort_categories=True with ordered Categoricals") + + if sort_categories and not categories.is_monotonic_increasing: + categories = categories.sort_values() + indexer = categories.get_indexer(first.categories) + + from pandas.core.algorithms import take_1d + + new_codes = take_1d(indexer, new_codes, fill_value=-1) + elif ignore_order or all(not c.ordered for c in to_union): + # different categories - union and recode + cats = first.categories.append([c.categories for c in to_union[1:]]) + categories = Index(cats.unique()) + if sort_categories: + categories = categories.sort_values() + + new_codes = [ + _recode_for_categories(c.codes, c.categories, categories) for c in to_union + ] + new_codes = np.concatenate(new_codes) + else: + # ordered - to show a proper error message + if all(c.ordered for c in to_union): + msg = "to union ordered Categoricals, all categories must be the same" + raise TypeError(msg) + else: + raise TypeError("Categorical.ordered must be the same") + + if ignore_order: + ordered = False + + return Categorical(new_codes, categories=categories, ordered=ordered, fastpath=True) + + +def _concatenate_2d(to_concat, axis: int): + # coerce to 2d if needed & concatenate + if axis == 1: + to_concat = [np.atleast_2d(x) for x in to_concat] + return np.concatenate(to_concat, axis=axis) + + +def concat_datetime(to_concat, axis=0, typs=None): + """ + provide concatenation of an datetimelike array of arrays each of which is a + single M8[ns], datetimet64[ns, tz] or m8[ns] dtype + + Parameters + ---------- + to_concat : array of arrays + axis : axis to provide concatenation + typs : set of to_concat dtypes + + Returns + ------- + a single array, preserving the combined dtypes + """ + + if typs is None: + typs = get_dtype_kinds(to_concat) + + # multiple types, need to coerce to object + if len(typs) != 1: + return _concatenate_2d( + [_convert_datetimelike_to_object(x) for x in to_concat], axis=axis + ) + + # must be single dtype + if any(typ.startswith("datetime") for typ in typs): + + if "datetime" in typs: + to_concat = [x.astype(np.int64, copy=False) for x in to_concat] + return _concatenate_2d(to_concat, axis=axis).view(_NS_DTYPE) + else: + # when to_concat has different tz, len(typs) > 1. + # thus no need to care + return _concat_datetimetz(to_concat) + + elif "timedelta" in typs: + return _concatenate_2d([x.view(np.int64) for x in to_concat], axis=axis).view( + _TD_DTYPE + ) + + elif any(typ.startswith("period") for typ in typs): + assert len(typs) == 1 + cls = to_concat[0] + new_values = cls._concat_same_type(to_concat) + return new_values + + +def _convert_datetimelike_to_object(x): + # coerce datetimelike array to object dtype + + # if dtype is of datetimetz or timezone + if x.dtype.kind == _NS_DTYPE.kind: + if getattr(x, "tz", None) is not None: + x = np.asarray(x.astype(object)) + else: + shape = x.shape + x = tslib.ints_to_pydatetime(x.view(np.int64).ravel(), box="timestamp") + x = x.reshape(shape) + + elif x.dtype == _TD_DTYPE: + shape = x.shape + x = tslibs.ints_to_pytimedelta(x.view(np.int64).ravel(), box=True) + x = x.reshape(shape) + + return x + + +def _concat_datetimetz(to_concat, name=None): + """ + concat DatetimeIndex with the same tz + all inputs must be DatetimeIndex + it is used in DatetimeIndex.append also + """ + # Right now, internals will pass a List[DatetimeArray] here + # for reductions like quantile. I would like to disentangle + # all this before we get here. + sample = to_concat[0] + + if isinstance(sample, ABCIndexClass): + return sample._concat_same_dtype(to_concat, name=name) + elif isinstance(sample, ABCDatetimeArray): + return sample._concat_same_type(to_concat) + + +def _concat_sparse(to_concat, axis=0, typs=None): + """ + provide concatenation of an sparse/dense array of arrays each of which is a + single dtype + + Parameters + ---------- + to_concat : array of arrays + axis : axis to provide concatenation + typs : set of to_concat dtypes + + Returns + ------- + a single array, preserving the combined dtypes + """ + + from pandas.core.arrays import SparseArray + + fill_values = [x.fill_value for x in to_concat if isinstance(x, SparseArray)] + fill_value = fill_values[0] + + # TODO: Fix join unit generation so we aren't passed this. + to_concat = [ + x + if isinstance(x, SparseArray) + else SparseArray(x.squeeze(), fill_value=fill_value) + for x in to_concat + ] + + return SparseArray._concat_same_type(to_concat) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py new file mode 100644 index 00000000..466ed815 --- /dev/null +++ b/pandas/core/dtypes/dtypes.py @@ -0,0 +1,1161 @@ +""" define extension dtypes """ +import re +from typing import Any, Dict, List, MutableMapping, Optional, Tuple, Type, Union, cast + +import numpy as np +import pytz + +from pandas._libs.interval import Interval +from pandas._libs.tslibs import NaT, Period, Timestamp, timezones +from pandas._typing import Ordered + +from pandas.core.dtypes.base import ExtensionDtype +from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCDateOffset, ABCIndexClass +from pandas.core.dtypes.inference import is_bool, is_list_like + +str_type = str + + +def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]: + """ + Register an ExtensionType with pandas as class decorator. + + .. versionadded:: 0.24.0 + + This enables operations like ``.astype(name)`` for the name + of the ExtensionDtype. + + Returns + ------- + callable + A class decorator. + + Examples + -------- + >>> from pandas.api.extensions import register_extension_dtype + >>> from pandas.api.extensions import ExtensionDtype + >>> @register_extension_dtype + ... class MyExtensionDtype(ExtensionDtype): + ... pass + """ + registry.register(cls) + return cls + + +class Registry: + """ + Registry for dtype inference. + + The registry allows one to map a string repr of a extension + dtype to an extension dtype. The string alias can be used in several + places, including + + * Series and Index constructors + * :meth:`pandas.array` + * :meth:`pandas.Series.astype` + + Multiple extension types can be registered. + These are tried in order. + """ + + def __init__(self): + self.dtypes: List[Type[ExtensionDtype]] = [] + + def register(self, dtype: Type[ExtensionDtype]) -> None: + """ + Parameters + ---------- + dtype : ExtensionDtype + """ + if not issubclass(dtype, ExtensionDtype): + raise ValueError("can only register pandas extension dtypes") + + self.dtypes.append(dtype) + + def find( + self, dtype: Union[Type[ExtensionDtype], str] + ) -> Optional[Type[ExtensionDtype]]: + """ + Parameters + ---------- + dtype : Type[ExtensionDtype] or str + + Returns + ------- + return the first matching dtype, otherwise return None + """ + if not isinstance(dtype, str): + dtype_type = dtype + if not isinstance(dtype, type): + dtype_type = type(dtype) + if issubclass(dtype_type, ExtensionDtype): + return dtype + + return None + + for dtype_type in self.dtypes: + try: + return dtype_type.construct_from_string(dtype) + except TypeError: + pass + + return None + + +registry = Registry() + + +class PandasExtensionDtype(ExtensionDtype): + """ + A np.dtype duck-typed class, suitable for holding a custom dtype. + + THIS IS NOT A REAL NUMPY DTYPE + """ + + type: Any + kind: Any + # The Any type annotations above are here only because mypy seems to have a + # problem dealing with with multiple inheritance from PandasExtensionDtype + # and ExtensionDtype's @properties in the subclasses below. The kind and + # type variables in those subclasses are explicitly typed below. + subdtype = None + str: Optional[str_type] = None + num = 100 + shape: Tuple[int, ...] = tuple() + itemsize = 8 + base = None + isbuiltin = 0 + isnative = 0 + _cache: Dict[str_type, "PandasExtensionDtype"] = {} + + def __str__(self) -> str_type: + """ + Return a string representation for a particular Object + """ + return self.name + + def __repr__(self) -> str_type: + """ + Return a string representation for a particular object. + """ + return str(self) + + def __hash__(self) -> int: + raise NotImplementedError("sub-classes should implement an __hash__ method") + + def __getstate__(self) -> Dict[str_type, Any]: + # pickle support; we don't want to pickle the cache + return {k: getattr(self, k, None) for k in self._metadata} + + @classmethod + def reset_cache(cls) -> None: + """ clear the cache """ + cls._cache = {} + + +class CategoricalDtypeType(type): + """ + the type of CategoricalDtype, this metaclass determines subclass ability + """ + + pass + + +@register_extension_dtype +class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): + """ + Type for categorical data with the categories and orderedness. + + .. versionchanged:: 0.21.0 + + Parameters + ---------- + categories : sequence, optional + Must be unique, and must not contain any nulls. + ordered : bool or None, default False + Whether or not this categorical is treated as a ordered categorical. + None can be used to maintain the ordered value of existing categoricals when + used in operations that combine categoricals, e.g. astype, and will resolve to + False if there is no existing ordered to maintain. + + Attributes + ---------- + categories + ordered + + Methods + ------- + None + + See Also + -------- + Categorical + + Notes + ----- + This class is useful for specifying the type of a ``Categorical`` + independent of the values. See :ref:`categorical.categoricaldtype` + for more. + + Examples + -------- + >>> t = pd.CategoricalDtype(categories=['b', 'a'], ordered=True) + >>> pd.Series(['a', 'b', 'a', 'c'], dtype=t) + 0 a + 1 b + 2 a + 3 NaN + dtype: category + Categories (2, object): [b < a] + """ + + # TODO: Document public vs. private API + name = "category" + type: Type[CategoricalDtypeType] = CategoricalDtypeType + kind: str_type = "O" + str = "|O08" + base = np.dtype("O") + _metadata = ("categories", "ordered") + _cache: Dict[str_type, PandasExtensionDtype] = {} + + def __init__(self, categories=None, ordered: Ordered = False): + self._finalize(categories, ordered, fastpath=False) + + @classmethod + def _from_fastpath( + cls, categories=None, ordered: Optional[bool] = None + ) -> "CategoricalDtype": + self = cls.__new__(cls) + self._finalize(categories, ordered, fastpath=True) + return self + + @classmethod + def _from_categorical_dtype( + cls, dtype: "CategoricalDtype", categories=None, ordered: Ordered = None + ) -> "CategoricalDtype": + if categories is ordered is None: + return dtype + if categories is None: + categories = dtype.categories + if ordered is None: + ordered = dtype.ordered + return cls(categories, ordered) + + @classmethod + def _from_values_or_dtype( + cls, + values=None, + categories=None, + ordered: Optional[bool] = None, + dtype: Optional["CategoricalDtype"] = None, + ) -> "CategoricalDtype": + """ + Construct dtype from the input parameters used in :class:`Categorical`. + + This constructor method specifically does not do the factorization + step, if that is needed to find the categories. This constructor may + therefore return ``CategoricalDtype(categories=None, ordered=None)``, + which may not be useful. Additional steps may therefore have to be + taken to create the final dtype. + + The return dtype is specified from the inputs in this prioritized + order: + 1. if dtype is a CategoricalDtype, return dtype + 2. if dtype is the string 'category', create a CategoricalDtype from + the supplied categories and ordered parameters, and return that. + 3. if values is a categorical, use value.dtype, but override it with + categories and ordered if either/both of those are not None. + 4. if dtype is None and values is not a categorical, construct the + dtype from categories and ordered, even if either of those is None. + + Parameters + ---------- + values : list-like, optional + The list-like must be 1-dimensional. + categories : list-like, optional + Categories for the CategoricalDtype. + ordered : bool, optional + Designating if the categories are ordered. + dtype : CategoricalDtype or the string "category", optional + If ``CategoricalDtype``, cannot be used together with + `categories` or `ordered`. + + Returns + ------- + CategoricalDtype + + Examples + -------- + >>> CategoricalDtype._from_values_or_dtype() + CategoricalDtype(categories=None, ordered=None) + >>> CategoricalDtype._from_values_or_dtype(categories=['a', 'b'], + ... ordered=True) + CategoricalDtype(categories=['a', 'b'], ordered=True) + >>> dtype1 = CategoricalDtype(['a', 'b'], ordered=True) + >>> dtype2 = CategoricalDtype(['x', 'y'], ordered=False) + >>> c = Categorical([0, 1], dtype=dtype1, fastpath=True) + >>> CategoricalDtype._from_values_or_dtype(c, ['x', 'y'], ordered=True, + ... dtype=dtype2) + ValueError: Cannot specify `categories` or `ordered` together with + `dtype`. + + The supplied dtype takes precedence over values' dtype: + + >>> CategoricalDtype._from_values_or_dtype(c, dtype=dtype2) + CategoricalDtype(['x', 'y'], ordered=False) + """ + from pandas.core.dtypes.common import is_categorical + + if dtype is not None: + # The dtype argument takes precedence over values.dtype (if any) + if isinstance(dtype, str): + if dtype == "category": + dtype = CategoricalDtype(categories, ordered) + else: + raise ValueError(f"Unknown dtype {repr(dtype)}") + elif categories is not None or ordered is not None: + raise ValueError( + "Cannot specify `categories` or `ordered` together with `dtype`." + ) + elif is_categorical(values): + # If no "dtype" was passed, use the one from "values", but honor + # the "ordered" and "categories" arguments + dtype = values.dtype._from_categorical_dtype( + values.dtype, categories, ordered + ) + else: + # If dtype=None and values is not categorical, create a new dtype. + # Note: This could potentially have categories=None and + # ordered=None. + dtype = CategoricalDtype(categories, ordered) + + return dtype + + @classmethod + def construct_from_string(cls, string: str_type) -> "CategoricalDtype": + """ + Construct a CategoricalDtype from a string. + + Parameters + ---------- + string : str + Must be the string "category" in order to be successfully constructed. + + Returns + ------- + CategoricalDtype + Instance of the dtype. + + Raises + ------ + TypeError + If a CategoricalDtype cannot be constructed from the input. + """ + if not isinstance(string, str): + raise TypeError(f"Expects a string, got {type(string)}") + if string != cls.name: + raise TypeError(f"Cannot construct a 'CategoricalDtype' from '{string}'") + + # need ordered=None to ensure that operations specifying dtype="category" don't + # override the ordered value for existing categoricals + return cls(ordered=None) + + def _finalize(self, categories, ordered: Ordered, fastpath: bool = False) -> None: + + if ordered is not None: + self.validate_ordered(ordered) + + if categories is not None: + categories = self.validate_categories(categories, fastpath=fastpath) + + self._categories = categories + self._ordered = ordered + + def __setstate__(self, state: MutableMapping[str_type, Any]) -> None: + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) + self._categories = state.pop("categories", None) + self._ordered = state.pop("ordered", False) + + def __hash__(self) -> int: + # _hash_categories returns a uint64, so use the negative + # space for when we have unknown categories to avoid a conflict + if self.categories is None: + if self.ordered: + return -1 + else: + return -2 + # We *do* want to include the real self.ordered here + return int(self._hash_categories(self.categories, self.ordered)) + + def __eq__(self, other: Any) -> bool: + """ + Rules for CDT equality: + 1) Any CDT is equal to the string 'category' + 2) Any CDT is equal to itself + 3) Any CDT is equal to a CDT with categories=None regardless of ordered + 4) A CDT with ordered=True is only equal to another CDT with + ordered=True and identical categories in the same order + 5) A CDT with ordered={False, None} is only equal to another CDT with + ordered={False, None} and identical categories, but same order is + not required. There is no distinction between False/None. + 6) Any other comparison returns False + """ + if isinstance(other, str): + return other == self.name + elif other is self: + return True + elif not (hasattr(other, "ordered") and hasattr(other, "categories")): + return False + elif self.categories is None or other.categories is None: + # We're forced into a suboptimal corner thanks to math and + # backwards compatibility. We require that `CDT(...) == 'category'` + # for all CDTs **including** `CDT(None, ...)`. Therefore, *all* + # CDT(., .) = CDT(None, False) and *all* + # CDT(., .) = CDT(None, True). + return True + elif self.ordered or other.ordered: + # At least one has ordered=True; equal if both have ordered=True + # and the same values for categories in the same order. + return (self.ordered == other.ordered) and self.categories.equals( + other.categories + ) + else: + # Neither has ordered=True; equal if both have the same categories, + # but same order is not necessary. There is no distinction between + # ordered=False and ordered=None: CDT(., False) and CDT(., None) + # will be equal if they have the same categories. + if ( + self.categories.dtype == other.categories.dtype + and self.categories.equals(other.categories) + ): + # Check and see if they happen to be identical categories + return True + return hash(self) == hash(other) + + def __repr__(self) -> str_type: + tpl = "CategoricalDtype(categories={data}ordered={ordered})" + if self.categories is None: + data = "None, " + else: + data = self.categories._format_data(name=type(self).__name__) + return tpl.format(data=data, ordered=self.ordered) + + @staticmethod + def _hash_categories(categories, ordered: Ordered = True) -> int: + from pandas.core.util.hashing import ( + hash_array, + _combine_hash_arrays, + hash_tuples, + ) + from pandas.core.dtypes.common import is_datetime64tz_dtype, _NS_DTYPE + + if len(categories) and isinstance(categories[0], tuple): + # assumes if any individual category is a tuple, then all our. ATM + # I don't really want to support just some of the categories being + # tuples. + categories = list(categories) # breaks if a np.array of categories + cat_array = hash_tuples(categories) + else: + if categories.dtype == "O": + if len({type(x) for x in categories}) != 1: + # TODO: hash_array doesn't handle mixed types. It casts + # everything to a str first, which means we treat + # {'1', '2'} the same as {'1', 2} + # find a better solution + hashed = hash((tuple(categories), ordered)) + return hashed + + if is_datetime64tz_dtype(categories.dtype): + # Avoid future warning. + categories = categories.astype(_NS_DTYPE) + + cat_array = hash_array(np.asarray(categories), categorize=False) + if ordered: + cat_array = np.vstack( + [cat_array, np.arange(len(cat_array), dtype=cat_array.dtype)] + ) + else: + cat_array = [cat_array] + hashed = _combine_hash_arrays(iter(cat_array), num_items=len(cat_array)) + return np.bitwise_xor.reduce(hashed) + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas import Categorical + + return Categorical + + @staticmethod + def validate_ordered(ordered: Ordered) -> None: + """ + Validates that we have a valid ordered parameter. If + it is not a boolean, a TypeError will be raised. + + Parameters + ---------- + ordered : object + The parameter to be verified. + + Raises + ------ + TypeError + If 'ordered' is not a boolean. + """ + if not is_bool(ordered): + raise TypeError("'ordered' must either be 'True' or 'False'") + + @staticmethod + def validate_categories(categories, fastpath: bool = False): + """ + Validates that we have good categories + + Parameters + ---------- + categories : array-like + fastpath : bool + Whether to skip nan and uniqueness checks + + Returns + ------- + categories : Index + """ + from pandas.core.indexes.base import Index + + if not fastpath and not is_list_like(categories): + raise TypeError( + f"Parameter 'categories' must be list-like, was {repr(categories)}" + ) + elif not isinstance(categories, ABCIndexClass): + categories = Index(categories, tupleize_cols=False) + + if not fastpath: + + if categories.hasnans: + raise ValueError("Categorial categories cannot be null") + + if not categories.is_unique: + raise ValueError("Categorical categories must be unique") + + if isinstance(categories, ABCCategoricalIndex): + categories = categories.categories + + return categories + + def update_dtype( + self, dtype: Union[str_type, "CategoricalDtype"] + ) -> "CategoricalDtype": + """ + Returns a CategoricalDtype with categories and ordered taken from dtype + if specified, otherwise falling back to self if unspecified + + Parameters + ---------- + dtype : CategoricalDtype + + Returns + ------- + new_dtype : CategoricalDtype + """ + if isinstance(dtype, str) and dtype == "category": + # dtype='category' should not change anything + return self + elif not self.is_dtype(dtype): + raise ValueError( + f"a CategoricalDtype must be passed to perform an update, " + f"got {repr(dtype)}" + ) + else: + # from here on, dtype is a CategoricalDtype + dtype = cast(CategoricalDtype, dtype) + + # update categories/ordered unless they've been explicitly passed as None + new_categories = ( + dtype.categories if dtype.categories is not None else self.categories + ) + new_ordered = dtype.ordered if dtype.ordered is not None else self.ordered + + return CategoricalDtype(new_categories, new_ordered) + + @property + def categories(self): + """ + An ``Index`` containing the unique categories allowed. + """ + return self._categories + + @property + def ordered(self) -> Ordered: + """ + Whether the categories have an ordered relationship. + """ + return self._ordered + + @property + def _is_boolean(self) -> bool: + from pandas.core.dtypes.common import is_bool_dtype + + return is_bool_dtype(self.categories) + + +@register_extension_dtype +class DatetimeTZDtype(PandasExtensionDtype): + """ + An ExtensionDtype for timezone-aware datetime data. + + **This is not an actual numpy dtype**, but a duck type. + + Parameters + ---------- + unit : str, default "ns" + The precision of the datetime data. Currently limited + to ``"ns"``. + tz : str, int, or datetime.tzinfo + The timezone. + + Attributes + ---------- + unit + tz + + Methods + ------- + None + + Raises + ------ + pytz.UnknownTimeZoneError + When the requested timezone cannot be found. + + Examples + -------- + >>> pd.DatetimeTZDtype(tz='UTC') + datetime64[ns, UTC] + + >>> pd.DatetimeTZDtype(tz='dateutil/US/Central') + datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')] + """ + + type: Type[Timestamp] = Timestamp + kind: str_type = "M" + str = "|M8[ns]" + num = 101 + base = np.dtype("M8[ns]") + na_value = NaT + _metadata = ("unit", "tz") + _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") + _cache: Dict[str_type, PandasExtensionDtype] = {} + + def __init__(self, unit="ns", tz=None): + if isinstance(unit, DatetimeTZDtype): + unit, tz = unit.unit, unit.tz + + if unit != "ns": + if isinstance(unit, str) and tz is None: + # maybe a string like datetime64[ns, tz], which we support for + # now. + result = type(self).construct_from_string(unit) + unit = result.unit + tz = result.tz + msg = ( + f"Passing a dtype alias like 'datetime64[ns, {tz}]' " + "to DatetimeTZDtype is no longer supported. Use " + "'DatetimeTZDtype.construct_from_string()' instead." + ) + raise ValueError(msg) + else: + raise ValueError("DatetimeTZDtype only supports ns units") + + if tz: + tz = timezones.maybe_get_tz(tz) + tz = timezones.tz_standardize(tz) + elif tz is not None: + raise pytz.UnknownTimeZoneError(tz) + if tz is None: + raise TypeError("A 'tz' is required.") + + self._unit = unit + self._tz = tz + + @property + def unit(self): + """ + The precision of the datetime data. + """ + return self._unit + + @property + def tz(self): + """ + The timezone. + """ + return self._tz + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays import DatetimeArray + + return DatetimeArray + + @classmethod + def construct_from_string(cls, string: str_type): + """ + Construct a DatetimeTZDtype from a string. + + Parameters + ---------- + string : str + The string alias for this DatetimeTZDtype. + Should be formatted like ``datetime64[ns, ]``, + where ```` is the timezone name. + + Examples + -------- + >>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]') + datetime64[ns, UTC] + """ + if isinstance(string, str): + msg = f"Cannot construct a 'DatetimeTZDtype' from '{string}'" + match = cls._match.match(string) + if match: + d = match.groupdict() + try: + return cls(unit=d["unit"], tz=d["tz"]) + except (KeyError, TypeError, ValueError) as err: + # KeyError if maybe_get_tz tries and fails to get a + # pytz timezone (actually pytz.UnknownTimeZoneError). + # TypeError if we pass a nonsense tz; + # ValueError if we pass a unit other than "ns" + raise TypeError(msg) from err + raise TypeError(msg) + + raise TypeError("Cannot construct a 'DatetimeTZDtype'") + + def __str__(self) -> str_type: + return f"datetime64[{self.unit}, {self.tz}]" + + @property + def name(self) -> str_type: + """A string representation of the dtype.""" + return str(self) + + def __hash__(self) -> int: + # make myself hashable + # TODO: update this. + return hash(str(self)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str): + return other == self.name + + return ( + isinstance(other, DatetimeTZDtype) + and self.unit == other.unit + and str(self.tz) == str(other.tz) + ) + + def __setstate__(self, state): + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) + self._tz = state["tz"] + self._unit = state["unit"] + + +@register_extension_dtype +class PeriodDtype(PandasExtensionDtype): + """ + An ExtensionDtype for Period data. + + **This is not an actual numpy dtype**, but a duck type. + + Parameters + ---------- + freq : str or DateOffset + The frequency of this PeriodDtype. + + Attributes + ---------- + freq + + Methods + ------- + None + + Examples + -------- + >>> pd.PeriodDtype(freq='D') + period[D] + + >>> pd.PeriodDtype(freq=pd.offsets.MonthEnd()) + period[M] + """ + + type: Type[Period] = Period + kind: str_type = "O" + str = "|O08" + base = np.dtype("O") + num = 102 + _metadata = ("freq",) + _match = re.compile(r"(P|p)eriod\[(?P.+)\]") + _cache: Dict[str_type, PandasExtensionDtype] = {} + + def __new__(cls, freq=None): + """ + Parameters + ---------- + freq : frequency + """ + + if isinstance(freq, PeriodDtype): + return freq + + elif freq is None: + # empty constructor for pickle compat + u = object.__new__(cls) + u._freq = None + return u + + if not isinstance(freq, ABCDateOffset): + freq = cls._parse_dtype_strict(freq) + + try: + return cls._cache[freq.freqstr] + except KeyError: + u = object.__new__(cls) + u._freq = freq + cls._cache[freq.freqstr] = u + return u + + @property + def freq(self): + """ + The frequency object of this PeriodDtype. + """ + return self._freq + + @classmethod + def _parse_dtype_strict(cls, freq): + if isinstance(freq, str): + if freq.startswith("period[") or freq.startswith("Period["): + m = cls._match.search(freq) + if m is not None: + freq = m.group("freq") + from pandas.tseries.frequencies import to_offset + + freq = to_offset(freq) + if freq is not None: + return freq + + raise ValueError("could not construct PeriodDtype") + + @classmethod + def construct_from_string(cls, string): + """ + Strict construction from a string, raise a TypeError if not + possible + """ + if ( + isinstance(string, str) + and (string.startswith("period[") or string.startswith("Period[")) + or isinstance(string, ABCDateOffset) + ): + # do not parse string like U as period[U] + # avoid tuple to be regarded as freq + try: + return cls(freq=string) + except ValueError: + pass + if isinstance(string, str): + msg = f"Cannot construct a 'PeriodDtype' from '{string}'" + else: + msg = f"'construct_from_string' expects a string, got {type(string)}" + raise TypeError(msg) + + def __str__(self) -> str_type: + return self.name + + @property + def name(self) -> str_type: + return f"period[{self.freq.freqstr}]" + + @property + def na_value(self): + return NaT + + def __hash__(self) -> int: + # make myself hashable + return hash(str(self)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str): + return other == self.name or other == self.name.title() + + return isinstance(other, PeriodDtype) and self.freq == other.freq + + def __setstate__(self, state): + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) + self._freq = state["freq"] + + @classmethod + def is_dtype(cls, dtype) -> bool: + """ + Return a boolean if we if the passed type is an actual dtype that we + can match (via string or type) + """ + + if isinstance(dtype, str): + # PeriodDtype can be instantiated from freq string like "U", + # but doesn't regard freq str like "U" as dtype. + if dtype.startswith("period[") or dtype.startswith("Period["): + try: + if cls._parse_dtype_strict(dtype) is not None: + return True + else: + return False + except ValueError: + return False + else: + return False + return super().is_dtype(dtype) + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays import PeriodArray + + return PeriodArray + + def __from_arrow__(self, array): + """Construct PeriodArray from pyarrow Array/ChunkedArray.""" + import pyarrow + from pandas.core.arrays import PeriodArray + from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + chunks = array.chunks + + results = [] + for arr in chunks: + data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype="int64") + parr = PeriodArray(data.copy(), freq=self.freq, copy=False) + parr[~mask] = NaT + results.append(parr) + + return PeriodArray._concat_same_type(results) + + +@register_extension_dtype +class IntervalDtype(PandasExtensionDtype): + """ + An ExtensionDtype for Interval data. + + **This is not an actual numpy dtype**, but a duck type. + + Parameters + ---------- + subtype : str, np.dtype + The dtype of the Interval bounds. + + Attributes + ---------- + subtype + + Methods + ------- + None + + Examples + -------- + >>> pd.IntervalDtype(subtype='int64') + interval[int64] + """ + + name = "interval" + kind: str_type = "O" + str = "|O08" + base = np.dtype("O") + num = 103 + _metadata = ("subtype",) + _match = re.compile(r"(I|i)nterval\[(?P.+)\]") + _cache: Dict[str_type, PandasExtensionDtype] = {} + + def __new__(cls, subtype=None): + from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_string_dtype, + pandas_dtype, + ) + + if isinstance(subtype, IntervalDtype): + return subtype + elif subtype is None: + # we are called as an empty constructor + # generally for pickle compat + u = object.__new__(cls) + u._subtype = None + return u + elif isinstance(subtype, str) and subtype.lower() == "interval": + subtype = None + else: + if isinstance(subtype, str): + m = cls._match.search(subtype) + if m is not None: + subtype = m.group("subtype") + + try: + subtype = pandas_dtype(subtype) + except TypeError: + raise TypeError("could not construct IntervalDtype") + + if is_categorical_dtype(subtype) or is_string_dtype(subtype): + # GH 19016 + msg = ( + "category, object, and string subtypes are not supported " + "for IntervalDtype" + ) + raise TypeError(msg) + + try: + return cls._cache[str(subtype)] + except KeyError: + u = object.__new__(cls) + u._subtype = subtype + cls._cache[str(subtype)] = u + return u + + @property + def subtype(self): + """ + The dtype of the Interval bounds. + """ + return self._subtype + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + from pandas.core.arrays import IntervalArray + + return IntervalArray + + @classmethod + def construct_from_string(cls, string): + """ + attempt to construct this type from a string, raise a TypeError + if its not possible + """ + if not isinstance(string, str): + raise TypeError(f"a string needs to be passed, got type {type(string)}") + + if string.lower() == "interval" or cls._match.search(string) is not None: + return cls(string) + + msg = ( + f"Cannot construct a 'IntervalDtype' from '{string}'.\n\n" + "Incorrectly formatted string passed to constructor. " + "Valid formats include Interval or Interval[dtype] " + "where dtype is numeric, datetime, or timedelta" + ) + raise TypeError(msg) + + @property + def type(self): + return Interval + + def __str__(self) -> str_type: + if self.subtype is None: + return "interval" + return f"interval[{self.subtype}]" + + def __hash__(self) -> int: + # make myself hashable + return hash(str(self)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str): + return other.lower() in (self.name.lower(), str(self).lower()) + elif not isinstance(other, IntervalDtype): + return False + elif self.subtype is None or other.subtype is None: + # None should match any subtype + return True + else: + from pandas.core.dtypes.common import is_dtype_equal + + return is_dtype_equal(self.subtype, other.subtype) + + def __setstate__(self, state): + # for pickle compat. __get_state__ is defined in the + # PandasExtensionDtype superclass and uses the public properties to + # pickle -> need to set the settable private ones here (see GH26067) + self._subtype = state["subtype"] + + @classmethod + def is_dtype(cls, dtype) -> bool: + """ + Return a boolean if we if the passed type is an actual dtype that we + can match (via string or type) + """ + + if isinstance(dtype, str): + if dtype.lower().startswith("interval"): + try: + if cls.construct_from_string(dtype) is not None: + return True + else: + return False + except (ValueError, TypeError): + return False + else: + return False + return super().is_dtype(dtype) + + def __from_arrow__(self, array): + """Construct IntervalArray from pyarrow Array/ChunkedArray.""" + import pyarrow + from pandas.core.arrays import IntervalArray + + if isinstance(array, pyarrow.Array): + chunks = [array] + else: + chunks = array.chunks + + results = [] + for arr in chunks: + left = np.asarray(arr.storage.field("left"), dtype=self.subtype) + right = np.asarray(arr.storage.field("right"), dtype=self.subtype) + iarr = IntervalArray.from_arrays(left, right, closed=array.type.closed) + results.append(iarr) + + return IntervalArray._concat_same_type(results) diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py new file mode 100644 index 00000000..4c3f8b73 --- /dev/null +++ b/pandas/core/dtypes/generic.py @@ -0,0 +1,84 @@ +""" define generic base classes for pandas objects """ + + +# define abstract base classes to enable isinstance type checking on our +# objects +def create_pandas_abc_type(name, attr, comp): + + # https://github.com/python/mypy/issues/1006 + # error: 'classmethod' used with a non-method + @classmethod # type: ignore + def _check(cls, inst) -> bool: + return getattr(inst, attr, "_typ") in comp + + dct = dict(__instancecheck__=_check, __subclasscheck__=_check) + meta = type("ABCBase", (type,), dct) + return meta(name, tuple(), dct) + + +ABCIndex = create_pandas_abc_type("ABCIndex", "_typ", ("index",)) +ABCInt64Index = create_pandas_abc_type("ABCInt64Index", "_typ", ("int64index",)) +ABCUInt64Index = create_pandas_abc_type("ABCUInt64Index", "_typ", ("uint64index",)) +ABCRangeIndex = create_pandas_abc_type("ABCRangeIndex", "_typ", ("rangeindex",)) +ABCFloat64Index = create_pandas_abc_type("ABCFloat64Index", "_typ", ("float64index",)) +ABCMultiIndex = create_pandas_abc_type("ABCMultiIndex", "_typ", ("multiindex",)) +ABCDatetimeIndex = create_pandas_abc_type( + "ABCDatetimeIndex", "_typ", ("datetimeindex",) +) +ABCTimedeltaIndex = create_pandas_abc_type( + "ABCTimedeltaIndex", "_typ", ("timedeltaindex",) +) +ABCPeriodIndex = create_pandas_abc_type("ABCPeriodIndex", "_typ", ("periodindex",)) +ABCCategoricalIndex = create_pandas_abc_type( + "ABCCategoricalIndex", "_typ", ("categoricalindex",) +) +ABCIntervalIndex = create_pandas_abc_type( + "ABCIntervalIndex", "_typ", ("intervalindex",) +) +ABCIndexClass = create_pandas_abc_type( + "ABCIndexClass", + "_typ", + ( + "index", + "int64index", + "rangeindex", + "float64index", + "uint64index", + "multiindex", + "datetimeindex", + "timedeltaindex", + "periodindex", + "categoricalindex", + "intervalindex", + ), +) + +ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",)) +ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",)) + +ABCSparseArray = create_pandas_abc_type( + "ABCSparseArray", "_subtyp", ("sparse_array", "sparse_series") +) +ABCCategorical = create_pandas_abc_type("ABCCategorical", "_typ", ("categorical")) +ABCDatetimeArray = create_pandas_abc_type("ABCDatetimeArray", "_typ", ("datetimearray")) +ABCTimedeltaArray = create_pandas_abc_type( + "ABCTimedeltaArray", "_typ", ("timedeltaarray") +) +ABCPeriodArray = create_pandas_abc_type("ABCPeriodArray", "_typ", ("periodarray",)) +ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period",)) +ABCDateOffset = create_pandas_abc_type("ABCDateOffset", "_typ", ("dateoffset",)) +ABCInterval = create_pandas_abc_type("ABCInterval", "_typ", ("interval",)) +ABCExtensionArray = create_pandas_abc_type( + "ABCExtensionArray", + "_typ", + ("extension", "categorical", "periodarray", "datetimearray", "timedeltaarray"), +) +ABCPandasArray = create_pandas_abc_type("ABCPandasArray", "_typ", ("npy_extension",)) + + +class _ABCGeneric(type): + def __instancecheck__(cls, inst) -> bool: + return hasattr(inst, "_data") + + +ABCGeneric = _ABCGeneric("ABCGeneric", tuple(), {}) diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py new file mode 100644 index 00000000..9e927805 --- /dev/null +++ b/pandas/core/dtypes/inference.py @@ -0,0 +1,424 @@ +""" basic inference routines """ + +from collections import abc +from numbers import Number +import re +from typing import Pattern + +import numpy as np + +from pandas._libs import lib + +is_bool = lib.is_bool + +is_integer = lib.is_integer + +is_float = lib.is_float + +is_complex = lib.is_complex + +is_scalar = lib.is_scalar + +is_decimal = lib.is_decimal + +is_interval = lib.is_interval + +is_list_like = lib.is_list_like + + +def is_number(obj) -> bool: + """ + Check if the object is a number. + + Returns True when the object is a number, and False if is not. + + Parameters + ---------- + obj : any type + The object to check if is a number. + + Returns + ------- + is_number : bool + Whether `obj` is a number or not. + + See Also + -------- + api.types.is_integer: Checks a subgroup of numbers. + + Examples + -------- + >>> pd.api.types.is_number(1) + True + >>> pd.api.types.is_number(7.15) + True + + Booleans are valid because they are int subclass. + + >>> pd.api.types.is_number(False) + True + + >>> pd.api.types.is_number("foo") + False + >>> pd.api.types.is_number("5") + False + """ + + return isinstance(obj, (Number, np.number)) + + +def _iterable_not_string(obj) -> bool: + """ + Check if the object is an iterable but not a string. + + Parameters + ---------- + obj : The object to check. + + Returns + ------- + is_iter_not_string : bool + Whether `obj` is a non-string iterable. + + Examples + -------- + >>> _iterable_not_string([1, 2, 3]) + True + >>> _iterable_not_string("foo") + False + >>> _iterable_not_string(1) + False + """ + + return isinstance(obj, abc.Iterable) and not isinstance(obj, str) + + +def is_iterator(obj) -> bool: + """ + Check if the object is an iterator. + + For example, lists are considered iterators + but not strings or datetime objects. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_iter : bool + Whether `obj` is an iterator. + + Examples + -------- + >>> is_iterator([1, 2, 3]) + True + >>> is_iterator(datetime(2017, 1, 1)) + False + >>> is_iterator("foo") + False + >>> is_iterator(1) + False + """ + + if not hasattr(obj, "__iter__"): + return False + + return hasattr(obj, "__next__") + + +def is_file_like(obj) -> bool: + """ + Check if the object is a file-like object. + + For objects to be considered file-like, they must + be an iterator AND have either a `read` and/or `write` + method as an attribute. + + Note: file-like objects must be iterable, but + iterable objects need not be file-like. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_file_like : bool + Whether `obj` has file-like properties. + + Examples + -------- + >>> buffer(StringIO("data")) + >>> is_file_like(buffer) + True + >>> is_file_like([1, 2, 3]) + False + """ + + if not (hasattr(obj, "read") or hasattr(obj, "write")): + return False + + if not hasattr(obj, "__iter__"): + return False + + return True + + +def is_re(obj) -> bool: + """ + Check if the object is a regex pattern instance. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_regex : bool + Whether `obj` is a regex pattern. + + Examples + -------- + >>> is_re(re.compile(".*")) + True + >>> is_re("foo") + False + """ + return isinstance(obj, Pattern) + + +def is_re_compilable(obj) -> bool: + """ + Check if the object can be compiled into a regex pattern instance. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_regex_compilable : bool + Whether `obj` can be compiled as a regex pattern. + + Examples + -------- + >>> is_re_compilable(".*") + True + >>> is_re_compilable(1) + False + """ + + try: + re.compile(obj) + except TypeError: + return False + else: + return True + + +def is_array_like(obj) -> bool: + """ + Check if the object is array-like. + + For an object to be considered array-like, it must be list-like and + have a `dtype` attribute. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_array_like : bool + Whether `obj` has array-like properties. + + Examples + -------- + >>> is_array_like(np.array([1, 2, 3])) + True + >>> is_array_like(pd.Series(["a", "b"])) + True + >>> is_array_like(pd.Index(["2016-01-01"])) + True + >>> is_array_like([1, 2, 3]) + False + >>> is_array_like(("a", "b")) + False + """ + + return is_list_like(obj) and hasattr(obj, "dtype") + + +def is_nested_list_like(obj) -> bool: + """ + Check if the object is list-like, and that all of its elements + are also list-like. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_list_like : bool + Whether `obj` has list-like properties. + + Examples + -------- + >>> is_nested_list_like([[1, 2, 3]]) + True + >>> is_nested_list_like([{1, 2, 3}, {1, 2, 3}]) + True + >>> is_nested_list_like(["foo"]) + False + >>> is_nested_list_like([]) + False + >>> is_nested_list_like([[1, 2, 3], 1]) + False + + Notes + ----- + This won't reliably detect whether a consumable iterator (e. g. + a generator) is a nested-list-like without consuming the iterator. + To avoid consuming it, we always return False if the outer container + doesn't define `__len__`. + + See Also + -------- + is_list_like + """ + return ( + is_list_like(obj) + and hasattr(obj, "__len__") + and len(obj) > 0 + and all(is_list_like(item) for item in obj) + ) + + +def is_dict_like(obj) -> bool: + """ + Check if the object is dict-like. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_dict_like : bool + Whether `obj` has dict-like properties. + + Examples + -------- + >>> is_dict_like({1: 2}) + True + >>> is_dict_like([1, 2, 3]) + False + >>> is_dict_like(dict) + False + >>> is_dict_like(dict()) + True + """ + dict_like_attrs = ("__getitem__", "keys", "__contains__") + return ( + all(hasattr(obj, attr) for attr in dict_like_attrs) + # [GH 25196] exclude classes + and not isinstance(obj, type) + ) + + +def is_named_tuple(obj) -> bool: + """ + Check if the object is a named tuple. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_named_tuple : bool + Whether `obj` is a named tuple. + + Examples + -------- + >>> Point = namedtuple("Point", ["x", "y"]) + >>> p = Point(1, 2) + >>> + >>> is_named_tuple(p) + True + >>> is_named_tuple((1, 2)) + False + """ + + return isinstance(obj, tuple) and hasattr(obj, "_fields") + + +def is_hashable(obj) -> bool: + """ + Return True if hash(obj) will succeed, False otherwise. + + Some types will pass a test against collections.abc.Hashable but fail when + they are actually hashed with hash(). + + Distinguish between these and other types by trying the call to hash() and + seeing if they raise TypeError. + + Returns + ------- + bool + + Examples + -------- + >>> a = ([],) + >>> isinstance(a, collections.abc.Hashable) + True + >>> is_hashable(a) + False + """ + # Unfortunately, we can't use isinstance(obj, collections.abc.Hashable), + # which can be faster than calling hash. That is because numpy scalars + # fail this test. + + # Reconsider this decision once this numpy bug is fixed: + # https://github.com/numpy/numpy/issues/5562 + + try: + hash(obj) + except TypeError: + return False + else: + return True + + +def is_sequence(obj) -> bool: + """ + Check if the object is a sequence of objects. + String types are not included as sequences here. + + Parameters + ---------- + obj : The object to check + + Returns + ------- + is_sequence : bool + Whether `obj` is a sequence of objects. + + Examples + -------- + >>> l = [1, 2, 3] + >>> + >>> is_sequence(l) + True + >>> is_sequence(iter(l)) + False + """ + + try: + iter(obj) # Can iterate over it. + len(obj) # Has a length associated with it. + return not isinstance(obj, (str, bytes)) + except (TypeError, AttributeError): + return False diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py new file mode 100644 index 00000000..269f7315 --- /dev/null +++ b/pandas/core/dtypes/missing.py @@ -0,0 +1,617 @@ +""" +missing types & inference +""" +import numpy as np + +from pandas._config import get_option + +from pandas._libs import lib +import pandas._libs.missing as libmissing +from pandas._libs.tslibs import NaT, iNaT + +from pandas.core.dtypes.common import ( + _NS_DTYPE, + _TD_DTYPE, + ensure_object, + is_bool_dtype, + is_complex_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_datetimelike_v_numeric, + is_dtype_equal, + is_extension_array_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_period_dtype, + is_scalar, + is_string_dtype, + is_string_like_dtype, + is_timedelta64_dtype, + needs_i8_conversion, + pandas_dtype, +) +from pandas.core.dtypes.generic import ( + ABCDatetimeArray, + ABCExtensionArray, + ABCGeneric, + ABCIndexClass, + ABCMultiIndex, + ABCSeries, + ABCTimedeltaArray, +) +from pandas.core.dtypes.inference import is_list_like + +isposinf_scalar = libmissing.isposinf_scalar +isneginf_scalar = libmissing.isneginf_scalar + + +def isna(obj): + """ + Detect missing values for an array-like object. + + This function takes a scalar or array-like object and indicates + whether values are missing (``NaN`` in numeric arrays, ``None`` or ``NaN`` + in object arrays, ``NaT`` in datetimelike). + + Parameters + ---------- + obj : scalar or array-like + Object to check for null or missing values. + + Returns + ------- + bool or array-like of bool + For scalar input, returns a scalar boolean. + For array input, returns an array of boolean indicating whether each + corresponding element is missing. + + See Also + -------- + notna : Boolean inverse of pandas.isna. + Series.isna : Detect missing values in a Series. + DataFrame.isna : Detect missing values in a DataFrame. + Index.isna : Detect missing values in an Index. + + Examples + -------- + Scalar arguments (including strings) result in a scalar boolean. + + >>> pd.isna('dog') + False + + >>> pd.isna(pd.NA) + True + + >>> pd.isna(np.nan) + True + + ndarrays result in an ndarray of booleans. + + >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) + >>> array + array([[ 1., nan, 3.], + [ 4., 5., nan]]) + >>> pd.isna(array) + array([[False, True, False], + [False, False, True]]) + + For indexes, an ndarray of booleans is returned. + + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, + ... "2017-07-08"]) + >>> index + DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], + dtype='datetime64[ns]', freq=None) + >>> pd.isna(index) + array([False, False, True, False]) + + For Series and DataFrame, the same type is returned, containing booleans. + + >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) + >>> df + 0 1 2 + 0 ant bee cat + 1 dog None fly + >>> pd.isna(df) + 0 1 2 + 0 False False False + 1 False True False + + >>> pd.isna(df[1]) + 0 False + 1 True + Name: 1, dtype: bool + """ + return _isna(obj) + + +isnull = isna + + +def _isna_new(obj): + + if is_scalar(obj): + return libmissing.checknull(obj) + # hack (for now) because MI registers as ndarray + elif isinstance(obj, ABCMultiIndex): + raise NotImplementedError("isna is not defined for MultiIndex") + elif isinstance(obj, type): + return False + elif isinstance( + obj, + ( + ABCSeries, + np.ndarray, + ABCIndexClass, + ABCExtensionArray, + ABCDatetimeArray, + ABCTimedeltaArray, + ), + ): + return _isna_ndarraylike(obj) + elif isinstance(obj, ABCGeneric): + return obj._constructor(obj._data.isna(func=isna)) + elif isinstance(obj, list): + return _isna_ndarraylike(np.asarray(obj, dtype=object)) + elif hasattr(obj, "__array__"): + return _isna_ndarraylike(np.asarray(obj)) + else: + return obj is None + + +def _isna_old(obj): + """ + Detect missing values, treating None, NaN, INF, -INF as null. + + Parameters + ---------- + arr: ndarray or object value + + Returns + ------- + boolean ndarray or boolean + """ + if is_scalar(obj): + return libmissing.checknull_old(obj) + # hack (for now) because MI registers as ndarray + elif isinstance(obj, ABCMultiIndex): + raise NotImplementedError("isna is not defined for MultiIndex") + elif isinstance(obj, type): + return False + elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, ABCExtensionArray)): + return _isna_ndarraylike_old(obj) + elif isinstance(obj, ABCGeneric): + return obj._constructor(obj._data.isna(func=_isna_old)) + elif isinstance(obj, list): + return _isna_ndarraylike_old(np.asarray(obj, dtype=object)) + elif hasattr(obj, "__array__"): + return _isna_ndarraylike_old(np.asarray(obj)) + else: + return obj is None + + +_isna = _isna_new + + +def _use_inf_as_na(key): + """ + Option change callback for na/inf behaviour. + + Choose which replacement for numpy.isnan / -numpy.isfinite is used. + + Parameters + ---------- + flag: bool + True means treat None, NaN, INF, -INF as null (old way), + False means None and NaN are null, but INF, -INF are not null + (new way). + + Notes + ----- + This approach to setting global module values is discussed and + approved here: + + * https://stackoverflow.com/questions/4859217/ + programmatically-creating-variables-in-python/4859312#4859312 + """ + flag = get_option(key) + if flag: + globals()["_isna"] = _isna_old + else: + globals()["_isna"] = _isna_new + + +def _isna_ndarraylike(obj): + is_extension = is_extension_array_dtype(obj) + + if not is_extension: + # Avoid accessing `.values` on things like + # PeriodIndex, which may be expensive. + values = getattr(obj, "values", obj) + else: + values = obj + + dtype = values.dtype + + if is_extension: + if isinstance(obj, (ABCIndexClass, ABCSeries)): + values = obj._values + else: + values = obj + result = values.isna() + elif isinstance(obj, ABCDatetimeArray): + return obj.isna() + elif is_string_dtype(dtype): + # Working around NumPy ticket 1542 + shape = values.shape + + if is_string_like_dtype(dtype): + # object array of strings + result = np.zeros(values.shape, dtype=bool) + else: + # object array of non-strings + result = np.empty(shape, dtype=bool) + vec = libmissing.isnaobj(values.ravel()) + result[...] = vec.reshape(shape) + + elif needs_i8_conversion(dtype): + # this is the NaT pattern + result = values.view("i8") == iNaT + else: + result = np.isnan(values) + + # box + if isinstance(obj, ABCSeries): + result = obj._constructor(result, index=obj.index, name=obj.name, copy=False) + + return result + + +def _isna_ndarraylike_old(obj): + is_extension = is_extension_array_dtype(obj) + + values = getattr(obj, "values", obj) + dtype = values.dtype + + if is_extension: + if isinstance(obj, (ABCIndexClass, ABCSeries)): + values = obj._values + else: + values = obj + result = values.isna() | (values == -np.inf) | (values == np.inf) + elif is_string_dtype(dtype): + # Working around NumPy ticket 1542 + shape = values.shape + + if is_string_like_dtype(dtype): + result = np.zeros(values.shape, dtype=bool) + else: + result = np.empty(shape, dtype=bool) + vec = libmissing.isnaobj_old(values.ravel()) + result[:] = vec.reshape(shape) + + elif is_datetime64_dtype(dtype): + # this is the NaT pattern + result = values.view("i8") == iNaT + else: + result = ~np.isfinite(values) + + # box + if isinstance(obj, ABCSeries): + result = obj._constructor(result, index=obj.index, name=obj.name, copy=False) + + return result + + +def notna(obj): + """ + Detect non-missing values for an array-like object. + + This function takes a scalar or array-like object and indicates + whether values are valid (not missing, which is ``NaN`` in numeric + arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike). + + Parameters + ---------- + obj : array-like or object value + Object to check for *not* null or *non*-missing values. + + Returns + ------- + bool or array-like of bool + For scalar input, returns a scalar boolean. + For array input, returns an array of boolean indicating whether each + corresponding element is valid. + + See Also + -------- + isna : Boolean inverse of pandas.notna. + Series.notna : Detect valid values in a Series. + DataFrame.notna : Detect valid values in a DataFrame. + Index.notna : Detect valid values in an Index. + + Examples + -------- + Scalar arguments (including strings) result in a scalar boolean. + + >>> pd.notna('dog') + True + + >>> pd.notna(pd.NA) + False + + >>> pd.notna(np.nan) + False + + ndarrays result in an ndarray of booleans. + + >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) + >>> array + array([[ 1., nan, 3.], + [ 4., 5., nan]]) + >>> pd.notna(array) + array([[ True, False, True], + [ True, True, False]]) + + For indexes, an ndarray of booleans is returned. + + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, + ... "2017-07-08"]) + >>> index + DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], + dtype='datetime64[ns]', freq=None) + >>> pd.notna(index) + array([ True, True, False, True]) + + For Series and DataFrame, the same type is returned, containing booleans. + + >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) + >>> df + 0 1 2 + 0 ant bee cat + 1 dog None fly + >>> pd.notna(df) + 0 1 2 + 0 True True True + 1 True False True + + >>> pd.notna(df[1]) + 0 True + 1 False + Name: 1, dtype: bool + """ + res = isna(obj) + if is_scalar(res): + return not res + return ~res + + +notnull = notna + + +def _isna_compat(arr, fill_value=np.nan) -> bool: + """ + Parameters + ---------- + arr: a numpy array + fill_value: fill value, default to np.nan + + Returns + ------- + True if we can fill using this fill_value + """ + dtype = arr.dtype + if isna(fill_value): + return not (is_bool_dtype(dtype) or is_integer_dtype(dtype)) + return True + + +def array_equivalent(left, right, strict_nan: bool = False) -> bool: + """ + True if two arrays, left and right, have equal non-NaN elements, and NaNs + in corresponding locations. False otherwise. It is assumed that left and + right are NumPy arrays of the same dtype. The behavior of this function + (particularly with respect to NaNs) is not defined if the dtypes are + different. + + Parameters + ---------- + left, right : ndarrays + strict_nan : bool, default False + If True, consider NaN and None to be different. + + Returns + ------- + b : bool + Returns True if the arrays are equivalent. + + Examples + -------- + >>> array_equivalent( + ... np.array([1, 2, np.nan]), + ... np.array([1, 2, np.nan])) + True + >>> array_equivalent( + ... np.array([1, np.nan, 2]), + ... np.array([1, 2, np.nan])) + False + """ + + left, right = np.asarray(left), np.asarray(right) + + # shape compat + if left.shape != right.shape: + return False + + # Object arrays can contain None, NaN and NaT. + # string dtypes must be come to this path for NumPy 1.7.1 compat + if is_string_dtype(left) or is_string_dtype(right): + + if not strict_nan: + # isna considers NaN and None to be equivalent. + return lib.array_equivalent_object( + ensure_object(left.ravel()), ensure_object(right.ravel()) + ) + + for left_value, right_value in zip(left, right): + if left_value is NaT and right_value is not NaT: + return False + + elif left_value is libmissing.NA and right_value is not libmissing.NA: + return False + + elif isinstance(left_value, float) and np.isnan(left_value): + if not isinstance(right_value, float) or not np.isnan(right_value): + return False + else: + try: + if np.any(np.asarray(left_value != right_value)): + return False + except TypeError as err: + if "Cannot compare tz-naive" in str(err): + # tzawareness compat failure, see GH#28507 + return False + elif "boolean value of NA is ambiguous" in str(err): + return False + raise + return True + + # NaNs can occur in float and complex arrays. + if is_float_dtype(left) or is_complex_dtype(left): + + # empty + if not (np.prod(left.shape) and np.prod(right.shape)): + return True + return ((left == right) | (isna(left) & isna(right))).all() + + elif is_datetimelike_v_numeric(left, right): + # GH#29553 avoid numpy deprecation warning + return False + + elif needs_i8_conversion(left) or needs_i8_conversion(right): + # datetime64, timedelta64, Period + if not is_dtype_equal(left.dtype, right.dtype): + return False + + left = left.view("i8") + right = right.view("i8") + + # if we have structured dtypes, compare first + if left.dtype.type is np.void or right.dtype.type is np.void: + if left.dtype != right.dtype: + return False + + return np.array_equal(left, right) + + +def _infer_fill_value(val): + """ + infer the fill value for the nan/NaT from the provided + scalar/ndarray/list-like if we are a NaT, return the correct dtyped + element to provide proper block construction + """ + + if not is_list_like(val): + val = [val] + val = np.array(val, copy=False) + if needs_i8_conversion(val): + return np.array("NaT", dtype=val.dtype) + elif is_object_dtype(val.dtype): + dtype = lib.infer_dtype(ensure_object(val), skipna=False) + if dtype in ["datetime", "datetime64"]: + return np.array("NaT", dtype=_NS_DTYPE) + elif dtype in ["timedelta", "timedelta64"]: + return np.array("NaT", dtype=_TD_DTYPE) + return np.nan + + +def _maybe_fill(arr, fill_value=np.nan): + """ + if we have a compatible fill_value and arr dtype, then fill + """ + if _isna_compat(arr, fill_value): + arr.fill(fill_value) + return arr + + +def na_value_for_dtype(dtype, compat: bool = True): + """ + Return a dtype compat na value + + Parameters + ---------- + dtype : string / dtype + compat : bool, default True + + Returns + ------- + np.dtype or a pandas dtype + + Examples + -------- + >>> na_value_for_dtype(np.dtype('int64')) + 0 + >>> na_value_for_dtype(np.dtype('int64'), compat=False) + nan + >>> na_value_for_dtype(np.dtype('float64')) + nan + >>> na_value_for_dtype(np.dtype('bool')) + False + >>> na_value_for_dtype(np.dtype('datetime64[ns]')) + NaT + """ + dtype = pandas_dtype(dtype) + + if is_extension_array_dtype(dtype): + return dtype.na_value + if ( + is_datetime64_dtype(dtype) + or is_datetime64tz_dtype(dtype) + or is_timedelta64_dtype(dtype) + or is_period_dtype(dtype) + ): + return NaT + elif is_float_dtype(dtype): + return np.nan + elif is_integer_dtype(dtype): + if compat: + return 0 + return np.nan + elif is_bool_dtype(dtype): + return False + return np.nan + + +def remove_na_arraylike(arr): + """ + Return array-like containing only true/non-NaN values, possibly empty. + """ + if is_extension_array_dtype(arr): + return arr[notna(arr)] + else: + return arr[notna(lib.values_from_object(arr))] + + +def is_valid_nat_for_dtype(obj, dtype) -> bool: + """ + isna check that excludes incompatible dtypes + + Parameters + ---------- + obj : object + dtype : np.datetime64, np.timedelta64, DatetimeTZDtype, or PeriodDtype + + Returns + ------- + bool + """ + if not lib.is_scalar(obj) or not isna(obj): + return False + if dtype.kind == "M": + return not isinstance(obj, np.timedelta64) + if dtype.kind == "m": + return not isinstance(obj, np.datetime64) + + # must be PeriodDType + return not isinstance(obj, (np.datetime64, np.timedelta64)) diff --git a/pandas/core/frame.py b/pandas/core/frame.py new file mode 100644 index 00000000..94f70f7e --- /dev/null +++ b/pandas/core/frame.py @@ -0,0 +1,8480 @@ +""" +DataFrame +--------- +An efficient 2D container for potentially mixed-type time series or other +labeled data series. + +Similar to its R counterpart, data.frame, except providing automatic data +alignment and a host of useful data manipulation methods having to do with the +labeling information +""" +import collections +from collections import abc +from io import StringIO +import itertools +import sys +from textwrap import dedent +from typing import ( + IO, + TYPE_CHECKING, + Any, + FrozenSet, + Hashable, + Iterable, + List, + Optional, + Sequence, + Set, + Tuple, + Type, + Union, + cast, +) +import warnings + +import numpy as np +import numpy.ma as ma + +from pandas._config import get_option + +from pandas._libs import algos as libalgos, lib +from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Level, Renamer +from pandas.compat import PY37 +from pandas.compat._optional import import_optional_dependency +from pandas.compat.numpy import function as nv +from pandas.util._decorators import ( + Appender, + Substitution, + deprecate_kwarg, + rewrite_axis_style_signature, +) +from pandas.util._validators import ( + validate_axis_style_args, + validate_bool_kwarg, + validate_percentile, +) + +from pandas.core.dtypes.cast import ( + cast_scalar_to_array, + coerce_to_dtypes, + find_common_type, + infer_dtype_from_scalar, + invalidate_string_dtypes, + maybe_cast_to_datetime, + maybe_convert_platform, + maybe_downcast_to_dtype, + maybe_infer_to_datetimelike, + maybe_upcast, + maybe_upcast_putmask, +) +from pandas.core.dtypes.common import ( + ensure_float64, + ensure_int64, + ensure_platform_int, + infer_dtype_from_object, + is_bool_dtype, + is_dict_like, + is_dtype_equal, + is_extension_array_dtype, + is_float_dtype, + is_hashable, + is_integer, + is_integer_dtype, + is_iterator, + is_list_like, + is_named_tuple, + is_object_dtype, + is_scalar, + is_sequence, + needs_i8_conversion, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndexClass, + ABCMultiIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import isna, notna + +from pandas.core import algorithms, common as com, nanops, ops +from pandas.core.accessor import CachedAccessor +from pandas.core.arrays import Categorical, ExtensionArray +from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray +from pandas.core.arrays.sparse import SparseFrameAccessor +from pandas.core.generic import NDFrame, _shared_docs +from pandas.core.groupby import generic as groupby_generic +from pandas.core.indexes import base as ibase +from pandas.core.indexes.api import Index, ensure_index, ensure_index_from_sequences +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.multi import maybe_droplevels +from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable +from pandas.core.internals import BlockManager +from pandas.core.internals.construction import ( + arrays_to_mgr, + get_names_from_index, + init_dict, + init_ndarray, + masked_rec_array_to_mgr, + reorder_arrays, + sanitize_index, + to_arrays, +) +from pandas.core.ops.missing import dispatch_fill_zeros +from pandas.core.series import Series + +from pandas.io.common import get_filepath_or_buffer +from pandas.io.formats import console, format as fmt +from pandas.io.formats.printing import pprint_thing +import pandas.plotting + +if TYPE_CHECKING: + from pandas.io.formats.style import Styler + +# --------------------------------------------------------------------- +# Docstring templates + +_shared_doc_kwargs = dict( + axes="index, columns", + klass="DataFrame", + axes_single_arg="{0 or 'index', 1 or 'columns'}", + axis="""axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index': apply function to each column. + If 1 or 'columns': apply function to each row.""", + optional_by=""" + by : str or list of str + Name or list of names to sort by. + + - if `axis` is 0 or `'index'` then `by` may contain index + levels and/or column labels. + - if `axis` is 1 or `'columns'` then `by` may contain column + levels and/or index labels. + + .. versionchanged:: 0.23.0 + + Allow specifying index or column level names.""", + versionadded_to_excel="", + optional_labels="""labels : array-like, optional + New labels / index to conform the axis specified by 'axis' to.""", + optional_axis="""axis : int or str, optional + Axis to target. Can be either the axis name ('index', 'columns') + or number (0, 1).""", +) + +_numeric_only_doc = """numeric_only : boolean, default None + Include only float, int, boolean data. If None, will attempt to use + everything, then use only numeric data +""" + +_merge_doc = """ +Merge DataFrame or named Series objects with a database-style join. + +The join is done on columns or indexes. If joining columns on +columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes +on indexes or indexes on a column or columns, the index will be passed on. + +Parameters +----------%s +right : DataFrame or named Series + Object to merge with. +how : {'left', 'right', 'outer', 'inner'}, default 'inner' + Type of merge to be performed. + + * left: use only keys from left frame, similar to a SQL left outer join; + preserve key order. + * right: use only keys from right frame, similar to a SQL right outer join; + preserve key order. + * outer: use union of keys from both frames, similar to a SQL full outer + join; sort keys lexicographically. + * inner: use intersection of keys from both frames, similar to a SQL inner + join; preserve the order of the left keys. +on : label or list + Column or index level names to join on. These must be found in both + DataFrames. If `on` is None and not merging on indexes then this defaults + to the intersection of the columns in both DataFrames. +left_on : label or list, or array-like + Column or index level names to join on in the left DataFrame. Can also + be an array or list of arrays of the length of the left DataFrame. + These arrays are treated as if they are columns. +right_on : label or list, or array-like + Column or index level names to join on in the right DataFrame. Can also + be an array or list of arrays of the length of the right DataFrame. + These arrays are treated as if they are columns. +left_index : bool, default False + Use the index from the left DataFrame as the join key(s). If it is a + MultiIndex, the number of keys in the other DataFrame (either the index + or a number of columns) must match the number of levels. +right_index : bool, default False + Use the index from the right DataFrame as the join key. Same caveats as + left_index. +sort : bool, default False + Sort the join keys lexicographically in the result DataFrame. If False, + the order of the join keys depends on the join type (how keyword). +suffixes : tuple of (str, str), default ('_x', '_y') + Suffix to apply to overlapping column names in the left and right + side, respectively. To raise an exception on overlapping columns use + (False, False). +copy : bool, default True + If False, avoid copy if possible. +indicator : bool or str, default False + If True, adds a column to output DataFrame called "_merge" with + information on the source of each row. + If string, column with information on source of each row will be added to + output DataFrame, and column will be named value of string. + Information column is Categorical-type and takes on a value of "left_only" + for observations whose merge key only appears in 'left' DataFrame, + "right_only" for observations whose merge key only appears in 'right' + DataFrame, and "both" if the observation's merge key is found in both. + +validate : str, optional + If specified, checks if merge is of specified type. + + * "one_to_one" or "1:1": check if merge keys are unique in both + left and right datasets. + * "one_to_many" or "1:m": check if merge keys are unique in left + dataset. + * "many_to_one" or "m:1": check if merge keys are unique in right + dataset. + * "many_to_many" or "m:m": allowed, but does not result in checks. + + .. versionadded:: 0.21.0 + +Returns +------- +DataFrame + A DataFrame of the two merged objects. + +See Also +-------- +merge_ordered : Merge with optional filling/interpolation. +merge_asof : Merge on nearest keys. +DataFrame.join : Similar method using indices. + +Notes +----- +Support for specifying index levels as the `on`, `left_on`, and +`right_on` parameters was added in version 0.23.0 +Support for merging named Series objects was added in version 0.24.0 + +Examples +-------- + +>>> df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], +... 'value': [1, 2, 3, 5]}) +>>> df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], +... 'value': [5, 6, 7, 8]}) +>>> df1 + lkey value +0 foo 1 +1 bar 2 +2 baz 3 +3 foo 5 +>>> df2 + rkey value +0 foo 5 +1 bar 6 +2 baz 7 +3 foo 8 + +Merge df1 and df2 on the lkey and rkey columns. The value columns have +the default suffixes, _x and _y, appended. + +>>> df1.merge(df2, left_on='lkey', right_on='rkey') + lkey value_x rkey value_y +0 foo 1 foo 5 +1 foo 1 foo 8 +2 foo 5 foo 5 +3 foo 5 foo 8 +4 bar 2 bar 6 +5 baz 3 baz 7 + +Merge DataFrames df1 and df2 with specified left and right suffixes +appended to any overlapping columns. + +>>> df1.merge(df2, left_on='lkey', right_on='rkey', +... suffixes=('_left', '_right')) + lkey value_left rkey value_right +0 foo 1 foo 5 +1 foo 1 foo 8 +2 foo 5 foo 5 +3 foo 5 foo 8 +4 bar 2 bar 6 +5 baz 3 baz 7 + +Merge DataFrames df1 and df2, but raise an exception if the DataFrames have +any overlapping columns. + +>>> df1.merge(df2, left_on='lkey', right_on='rkey', suffixes=(False, False)) +Traceback (most recent call last): +... +ValueError: columns overlap but no suffix specified: + Index(['value'], dtype='object') +""" + + +# ----------------------------------------------------------------------- +# DataFrame class + + +class DataFrame(NDFrame): + """ + Two-dimensional, size-mutable, potentially heterogeneous tabular data. + + Data structure also contains labeled axes (rows and columns). + Arithmetic operations align on both row and column labels. Can be + thought of as a dict-like container for Series objects. The primary + pandas data structure. + + Parameters + ---------- + data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame + Dict can contain Series, arrays, constants, or list-like objects. + + .. versionchanged:: 0.23.0 + If data is a dict, column order follows insertion-order for + Python 3.6 and later. + + .. versionchanged:: 0.25.0 + If data is a list of dicts, column order follows insertion-order + for Python 3.6 and later. + + index : Index or array-like + Index to use for resulting frame. Will default to RangeIndex if + no indexing information part of input data and no index provided. + columns : Index or array-like + Column labels to use for resulting frame. Will default to + RangeIndex (0, 1, 2, ..., n) if no column labels are provided. + dtype : dtype, default None + Data type to force. Only a single dtype is allowed. If None, infer. + copy : bool, default False + Copy data from inputs. Only affects DataFrame / 2d ndarray input. + + See Also + -------- + DataFrame.from_records : Constructor from tuples, also record arrays. + DataFrame.from_dict : From dicts of Series, arrays, or dicts. + read_csv + read_table + read_clipboard + + Examples + -------- + Constructing DataFrame from a dictionary. + + >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> df = pd.DataFrame(data=d) + >>> df + col1 col2 + 0 1 3 + 1 2 4 + + Notice that the inferred dtype is int64. + + >>> df.dtypes + col1 int64 + col2 int64 + dtype: object + + To enforce a single dtype: + + >>> df = pd.DataFrame(data=d, dtype=np.int8) + >>> df.dtypes + col1 int8 + col2 int8 + dtype: object + + Constructing DataFrame from numpy ndarray: + + >>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), + ... columns=['a', 'b', 'c']) + >>> df2 + a b c + 0 1 2 3 + 1 4 5 6 + 2 7 8 9 + """ + + _typ = "dataframe" + + @property + def _constructor(self) -> Type["DataFrame"]: + return DataFrame + + _constructor_sliced: Type[Series] = Series + _deprecations: FrozenSet[str] = NDFrame._deprecations | frozenset([]) + _accessors: Set[str] = {"sparse"} + + @property + def _constructor_expanddim(self): + raise NotImplementedError("Not supported for DataFrames!") + + # ---------------------------------------------------------------------- + # Constructors + + def __init__( + self, + data=None, + index: Optional[Axes] = None, + columns: Optional[Axes] = None, + dtype: Optional[Dtype] = None, + copy: bool = False, + ): + if data is None: + data = {} + if dtype is not None: + dtype = self._validate_dtype(dtype) + + if isinstance(data, DataFrame): + data = data._data + + if isinstance(data, BlockManager): + mgr = self._init_mgr( + data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy + ) + elif isinstance(data, dict): + mgr = init_dict(data, index, columns, dtype=dtype) + elif isinstance(data, ma.MaskedArray): + import numpy.ma.mrecords as mrecords + + # masked recarray + if isinstance(data, mrecords.MaskedRecords): + mgr = masked_rec_array_to_mgr(data, index, columns, dtype, copy) + + # a masked array + else: + mask = ma.getmaskarray(data) + if mask.any(): + data, fill_value = maybe_upcast(data, copy=True) + data.soften_mask() # set hardmask False if it was True + data[mask] = fill_value + else: + data = data.copy() + mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy) + + elif isinstance(data, (np.ndarray, Series, Index)): + if data.dtype.names: + data_columns = list(data.dtype.names) + data = {k: data[k] for k in data_columns} + if columns is None: + columns = data_columns + mgr = init_dict(data, index, columns, dtype=dtype) + elif getattr(data, "name", None) is not None: + mgr = init_dict({data.name: data}, index, columns, dtype=dtype) + else: + mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy) + + # For data is list-like, or Iterable (will consume into list) + elif isinstance(data, abc.Iterable) and not isinstance(data, (str, bytes)): + if not isinstance(data, (abc.Sequence, ExtensionArray)): + data = list(data) + if len(data) > 0: + if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1: + if is_named_tuple(data[0]) and columns is None: + columns = data[0]._fields + arrays, columns = to_arrays(data, columns, dtype=dtype) + columns = ensure_index(columns) + + # set the index + if index is None: + if isinstance(data[0], Series): + index = get_names_from_index(data) + elif isinstance(data[0], Categorical): + index = ibase.default_index(len(data[0])) + else: + index = ibase.default_index(len(data)) + + mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype) + else: + mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy) + else: + mgr = init_dict({}, index, columns, dtype=dtype) + else: + try: + arr = np.array(data, dtype=dtype, copy=copy) + except (ValueError, TypeError) as e: + exc = TypeError( + "DataFrame constructor called with " + f"incompatible data and dtype: {e}" + ) + raise exc from e + + if arr.ndim == 0 and index is not None and columns is not None: + values = cast_scalar_to_array( + (len(index), len(columns)), data, dtype=dtype + ) + mgr = init_ndarray( + values, index, columns, dtype=values.dtype, copy=False + ) + else: + raise ValueError("DataFrame constructor not properly called!") + + NDFrame.__init__(self, mgr, fastpath=True) + + # ---------------------------------------------------------------------- + + @property + def axes(self) -> List[Index]: + """ + Return a list representing the axes of the DataFrame. + + It has the row axis labels and column axis labels as the only members. + They are returned in that order. + + Examples + -------- + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.axes + [RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'], + dtype='object')] + """ + return [self.index, self.columns] + + @property + def shape(self) -> Tuple[int, int]: + """ + Return a tuple representing the dimensionality of the DataFrame. + + See Also + -------- + ndarray.shape + + Examples + -------- + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.shape + (2, 2) + + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4], + ... 'col3': [5, 6]}) + >>> df.shape + (2, 3) + """ + return len(self.index), len(self.columns) + + @property + def _is_homogeneous_type(self) -> bool: + """ + Whether all the columns in a DataFrame have the same type. + + Returns + ------- + bool + + See Also + -------- + Index._is_homogeneous_type : Whether the object has a single + dtype. + MultiIndex._is_homogeneous_type : Whether all the levels of a + MultiIndex have the same dtype. + + Examples + -------- + >>> DataFrame({"A": [1, 2], "B": [3, 4]})._is_homogeneous_type + True + >>> DataFrame({"A": [1, 2], "B": [3.0, 4.0]})._is_homogeneous_type + False + + Items with the same type but different sizes are considered + different types. + + >>> DataFrame({ + ... "A": np.array([1, 2], dtype=np.int32), + ... "B": np.array([1, 2], dtype=np.int64)})._is_homogeneous_type + False + """ + if self._data.any_extension_types: + return len({block.dtype for block in self._data.blocks}) == 1 + else: + return not self._data.is_mixed_type + + # ---------------------------------------------------------------------- + # Rendering Methods + + def _repr_fits_vertical_(self) -> bool: + """ + Check length against max_rows. + """ + max_rows = get_option("display.max_rows") + return len(self) <= max_rows + + def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: + """ + Check if full repr fits in horizontal boundaries imposed by the display + options width and max_columns. + + In case off non-interactive session, no boundaries apply. + + `ignore_width` is here so ipnb+HTML output can behave the way + users expect. display.max_columns remains in effect. + GH3541, GH3573 + """ + width, height = console.get_console_size() + max_columns = get_option("display.max_columns") + nb_columns = len(self.columns) + + # exceed max columns + if (max_columns and nb_columns > max_columns) or ( + (not ignore_width) and width and nb_columns > (width // 2) + ): + return False + + # used by repr_html under IPython notebook or scripts ignore terminal + # dims + if ignore_width or not console.in_interactive_session(): + return True + + if get_option("display.width") is not None or console.in_ipython_frontend(): + # check at least the column row for excessive width + max_rows = 1 + else: + max_rows = get_option("display.max_rows") + + # when auto-detecting, so width=None and not in ipython front end + # check whether repr fits horizontal by actually checking + # the width of the rendered repr + buf = StringIO() + + # only care about the stuff we'll actually print out + # and to_string on entire frame may be expensive + d = self + + if not (max_rows is None): # unlimited rows + # min of two, where one may be None + d = d.iloc[: min(max_rows, len(d))] + else: + return True + + d.to_string(buf=buf) + value = buf.getvalue() + repr_width = max(len(l) for l in value.split("\n")) + + return repr_width < width + + def _info_repr(self) -> bool: + """ + True if the repr should show the info view. + """ + info_repr_option = get_option("display.large_repr") == "info" + return info_repr_option and not ( + self._repr_fits_horizontal_() and self._repr_fits_vertical_() + ) + + def __repr__(self) -> str: + """ + Return a string representation for a particular DataFrame. + """ + buf = StringIO("") + if self._info_repr(): + self.info(buf=buf) + return buf.getvalue() + + max_rows = get_option("display.max_rows") + min_rows = get_option("display.min_rows") + max_cols = get_option("display.max_columns") + max_colwidth = get_option("display.max_colwidth") + show_dimensions = get_option("display.show_dimensions") + if get_option("display.expand_frame_repr"): + width, _ = console.get_console_size() + else: + width = None + self.to_string( + buf=buf, + max_rows=max_rows, + min_rows=min_rows, + max_cols=max_cols, + line_width=width, + max_colwidth=max_colwidth, + show_dimensions=show_dimensions, + ) + + return buf.getvalue() + + def _repr_html_(self) -> Optional[str]: + """ + Return a html representation for a particular DataFrame. + + Mainly for IPython notebook. + """ + if self._info_repr(): + buf = StringIO("") + self.info(buf=buf) + # need to escape the , should be the first line. + val = buf.getvalue().replace("<", r"<", 1) + val = val.replace(">", r">", 1) + return "
    " + val + "
    " + + if get_option("display.notebook_repr_html"): + max_rows = get_option("display.max_rows") + min_rows = get_option("display.min_rows") + max_cols = get_option("display.max_columns") + show_dimensions = get_option("display.show_dimensions") + + formatter = fmt.DataFrameFormatter( + self, + columns=None, + col_space=None, + na_rep="NaN", + formatters=None, + float_format=None, + sparsify=None, + justify=None, + index_names=True, + header=True, + index=True, + bold_rows=True, + escape=True, + max_rows=max_rows, + min_rows=min_rows, + max_cols=max_cols, + show_dimensions=show_dimensions, + decimal=".", + table_id=None, + render_links=False, + ) + return formatter.to_html(notebook=True) + else: + return None + + @Substitution( + header_type="bool or sequence", + header="Write out the column names. If a list of strings " + "is given, it is assumed to be aliases for the " + "column names", + col_space_type="int", + col_space="The minimum width of each column", + ) + @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) + def to_string( + self, + buf: Optional[FilePathOrBuffer[str]] = None, + columns: Optional[Sequence[str]] = None, + col_space: Optional[int] = None, + header: Union[bool, Sequence[str]] = True, + index: bool = True, + na_rep: str = "NaN", + formatters: Optional[fmt.formatters_type] = None, + float_format: Optional[fmt.float_format_type] = None, + sparsify: Optional[bool] = None, + index_names: bool = True, + justify: Optional[str] = None, + max_rows: Optional[int] = None, + min_rows: Optional[int] = None, + max_cols: Optional[int] = None, + show_dimensions: bool = False, + decimal: str = ".", + line_width: Optional[int] = None, + max_colwidth: Optional[int] = None, + encoding: Optional[str] = None, + ) -> Optional[str]: + """ + Render a DataFrame to a console-friendly tabular output. + %(shared_params)s + line_width : int, optional + Width to wrap a line in characters. + max_colwidth : int, optional + Max width to truncate each column in characters. By default, no limit. + + .. versionadded:: 1.0.0 + encoding : str, default "utf-8" + Set character encoding. + + .. versionadded:: 1.0 + %(returns)s + See Also + -------- + to_html : Convert DataFrame to HTML. + + Examples + -------- + >>> d = {'col1': [1, 2, 3], 'col2': [4, 5, 6]} + >>> df = pd.DataFrame(d) + >>> print(df.to_string()) + col1 col2 + 0 1 4 + 1 2 5 + 2 3 6 + """ + + from pandas import option_context + + with option_context("display.max_colwidth", max_colwidth): + formatter = fmt.DataFrameFormatter( + self, + columns=columns, + col_space=col_space, + na_rep=na_rep, + formatters=formatters, + float_format=float_format, + sparsify=sparsify, + justify=justify, + index_names=index_names, + header=header, + index=index, + min_rows=min_rows, + max_rows=max_rows, + max_cols=max_cols, + show_dimensions=show_dimensions, + decimal=decimal, + line_width=line_width, + ) + return formatter.to_string(buf=buf, encoding=encoding) + + # ---------------------------------------------------------------------- + + @property + def style(self) -> "Styler": + """ + Returns a Styler object. + + Contains methods for building a styled HTML representation of the DataFrame. + a styled HTML representation fo the DataFrame. + + See Also + -------- + io.formats.style.Styler + """ + from pandas.io.formats.style import Styler + + return Styler(self) + + _shared_docs[ + "items" + ] = r""" + Iterate over (column name, Series) pairs. + + Iterates over the DataFrame columns, returning a tuple with + the column name and the content as a Series. + + Yields + ------ + label : object + The column names for the DataFrame being iterated over. + content : Series + The column entries belonging to each label, as a Series. + + See Also + -------- + DataFrame.iterrows : Iterate over DataFrame rows as + (index, Series) pairs. + DataFrame.itertuples : Iterate over DataFrame rows as namedtuples + of the values. + + Examples + -------- + >>> df = pd.DataFrame({'species': ['bear', 'bear', 'marsupial'], + ... 'population': [1864, 22000, 80000]}, + ... index=['panda', 'polar', 'koala']) + >>> df + species population + panda bear 1864 + polar bear 22000 + koala marsupial 80000 + >>> for label, content in df.items(): + ... print('label:', label) + ... print('content:', content, sep='\n') + ... + label: species + content: + panda bear + polar bear + koala marsupial + Name: species, dtype: object + label: population + content: + panda 1864 + polar 22000 + koala 80000 + Name: population, dtype: int64 + """ + + @Appender(_shared_docs["items"]) + def items(self) -> Iterable[Tuple[Optional[Hashable], Series]]: + if self.columns.is_unique and hasattr(self, "_item_cache"): + for k in self.columns: + yield k, self._get_item_cache(k) + else: + for i, k in enumerate(self.columns): + yield k, self._ixs(i, axis=1) + + @Appender(_shared_docs["items"]) + def iteritems(self) -> Iterable[Tuple[Optional[Hashable], Series]]: + yield from self.items() + + def iterrows(self) -> Iterable[Tuple[Optional[Hashable], Series]]: + """ + Iterate over DataFrame rows as (index, Series) pairs. + + Yields + ------ + index : label or tuple of label + The index of the row. A tuple for a `MultiIndex`. + data : Series + The data of the row as a Series. + + it : generator + A generator that iterates over the rows of the frame. + + See Also + -------- + DataFrame.itertuples : Iterate over DataFrame rows as namedtuples of the values. + DataFrame.items : Iterate over (column name, Series) pairs. + + Notes + ----- + + 1. Because ``iterrows`` returns a Series for each row, + it does **not** preserve dtypes across the rows (dtypes are + preserved across columns for DataFrames). For example, + + >>> df = pd.DataFrame([[1, 1.5]], columns=['int', 'float']) + >>> row = next(df.iterrows())[1] + >>> row + int 1.0 + float 1.5 + Name: 0, dtype: float64 + >>> print(row['int'].dtype) + float64 + >>> print(df['int'].dtype) + int64 + + To preserve dtypes while iterating over the rows, it is better + to use :meth:`itertuples` which returns namedtuples of the values + and which is generally faster than ``iterrows``. + + 2. You should **never modify** something you are iterating over. + This is not guaranteed to work in all cases. Depending on the + data types, the iterator returns a copy and not a view, and writing + to it will have no effect. + """ + columns = self.columns + klass = self._constructor_sliced + for k, v in zip(self.index, self.values): + s = klass(v, index=columns, name=k) + yield k, s + + def itertuples(self, index=True, name="Pandas"): + """ + Iterate over DataFrame rows as namedtuples. + + Parameters + ---------- + index : bool, default True + If True, return the index as the first element of the tuple. + name : str or None, default "Pandas" + The name of the returned namedtuples or None to return regular + tuples. + + Returns + ------- + iterator + An object to iterate over namedtuples for each row in the + DataFrame with the first field possibly being the index and + following fields being the column values. + + See Also + -------- + DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) + pairs. + DataFrame.items : Iterate over (column name, Series) pairs. + + Notes + ----- + The column names will be renamed to positional names if they are + invalid Python identifiers, repeated, or start with an underscore. + On python versions < 3.7 regular tuples are returned for DataFrames + with a large number of columns (>254). + + Examples + -------- + >>> df = pd.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]}, + ... index=['dog', 'hawk']) + >>> df + num_legs num_wings + dog 4 0 + hawk 2 2 + >>> for row in df.itertuples(): + ... print(row) + ... + Pandas(Index='dog', num_legs=4, num_wings=0) + Pandas(Index='hawk', num_legs=2, num_wings=2) + + By setting the `index` parameter to False we can remove the index + as the first element of the tuple: + + >>> for row in df.itertuples(index=False): + ... print(row) + ... + Pandas(num_legs=4, num_wings=0) + Pandas(num_legs=2, num_wings=2) + + With the `name` parameter set we set a custom name for the yielded + namedtuples: + + >>> for row in df.itertuples(name='Animal'): + ... print(row) + ... + Animal(Index='dog', num_legs=4, num_wings=0) + Animal(Index='hawk', num_legs=2, num_wings=2) + """ + arrays = [] + fields = list(self.columns) + if index: + arrays.append(self.index) + fields.insert(0, "Index") + + # use integer indexing because of possible duplicate column names + arrays.extend(self.iloc[:, k] for k in range(len(self.columns))) + + # Python versions before 3.7 support at most 255 arguments to constructors + can_return_named_tuples = PY37 or len(self.columns) + index < 255 + if name is not None and can_return_named_tuples: + itertuple = collections.namedtuple(name, fields, rename=True) + return map(itertuple._make, zip(*arrays)) + + # fallback to regular tuples + return zip(*arrays) + + def __len__(self) -> int: + """ + Returns length of info axis, but here we use the index. + """ + return len(self.index) + + def dot(self, other): + """ + Compute the matrix multiplication between the DataFrame and other. + + This method computes the matrix product between the DataFrame and the + values of an other Series, DataFrame or a numpy array. + + It can also be called using ``self @ other`` in Python >= 3.5. + + Parameters + ---------- + other : Series, DataFrame or array-like + The other object to compute the matrix product with. + + Returns + ------- + Series or DataFrame + If other is a Series, return the matrix product between self and + other as a Serie. If other is a DataFrame or a numpy.array, return + the matrix product of self and other in a DataFrame of a np.array. + + See Also + -------- + Series.dot: Similar method for Series. + + Notes + ----- + The dimensions of DataFrame and other must be compatible in order to + compute the matrix multiplication. In addition, the column names of + DataFrame and the index of other must contain the same values, as they + will be aligned prior to the multiplication. + + The dot method for Series computes the inner product, instead of the + matrix product here. + + Examples + -------- + Here we multiply a DataFrame with a Series. + + >>> df = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + >>> s = pd.Series([1, 1, 2, 1]) + >>> df.dot(s) + 0 -4 + 1 5 + dtype: int64 + + Here we multiply a DataFrame with another DataFrame. + + >>> other = pd.DataFrame([[0, 1], [1, 2], [-1, -1], [2, 0]]) + >>> df.dot(other) + 0 1 + 0 1 4 + 1 2 2 + + Note that the dot method give the same result as @ + + >>> df @ other + 0 1 + 0 1 4 + 1 2 2 + + The dot method works also if other is an np.array. + + >>> arr = np.array([[0, 1], [1, 2], [-1, -1], [2, 0]]) + >>> df.dot(arr) + 0 1 + 0 1 4 + 1 2 2 + + Note how shuffling of the objects does not change the result. + + >>> s2 = s.reindex([1, 0, 2, 3]) + >>> df.dot(s2) + 0 -4 + 1 5 + dtype: int64 + """ + if isinstance(other, (Series, DataFrame)): + common = self.columns.union(other.index) + if len(common) > len(self.columns) or len(common) > len(other.index): + raise ValueError("matrices are not aligned") + + left = self.reindex(columns=common, copy=False) + right = other.reindex(index=common, copy=False) + lvals = left.values + rvals = right.values + else: + left = self + lvals = self.values + rvals = np.asarray(other) + if lvals.shape[1] != rvals.shape[0]: + raise ValueError( + f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}" + ) + + if isinstance(other, DataFrame): + return self._constructor( + np.dot(lvals, rvals), index=left.index, columns=other.columns + ) + elif isinstance(other, Series): + return Series(np.dot(lvals, rvals), index=left.index) + elif isinstance(rvals, (np.ndarray, Index)): + result = np.dot(lvals, rvals) + if result.ndim == 2: + return self._constructor(result, index=left.index) + else: + return Series(result, index=left.index) + else: # pragma: no cover + raise TypeError(f"unsupported type: {type(other)}") + + def __matmul__(self, other): + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ + return self.dot(other) + + def __rmatmul__(self, other): + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ + return self.T.dot(np.transpose(other)).T + + # ---------------------------------------------------------------------- + # IO methods (to / from other formats) + + @classmethod + def from_dict(cls, data, orient="columns", dtype=None, columns=None) -> "DataFrame": + """ + Construct DataFrame from dict of array-like or dicts. + + Creates DataFrame object from dictionary by columns or by index + allowing dtype specification. + + Parameters + ---------- + data : dict + Of the form {field : array-like} or {field : dict}. + orient : {'columns', 'index'}, default 'columns' + The "orientation" of the data. If the keys of the passed dict + should be the columns of the resulting DataFrame, pass 'columns' + (default). Otherwise if the keys should be rows, pass 'index'. + dtype : dtype, default None + Data type to force, otherwise infer. + columns : list, default None + Column labels to use when ``orient='index'``. Raises a ValueError + if used with ``orient='columns'``. + + .. versionadded:: 0.23.0 + + Returns + ------- + DataFrame + + See Also + -------- + DataFrame.from_records : DataFrame from ndarray (structured + dtype), list of tuples, dict, or DataFrame. + DataFrame : DataFrame object creation using constructor. + + Examples + -------- + By default the keys of the dict become the DataFrame columns: + + >>> data = {'col_1': [3, 2, 1, 0], 'col_2': ['a', 'b', 'c', 'd']} + >>> pd.DataFrame.from_dict(data) + col_1 col_2 + 0 3 a + 1 2 b + 2 1 c + 3 0 d + + Specify ``orient='index'`` to create the DataFrame using dictionary + keys as rows: + + >>> data = {'row_1': [3, 2, 1, 0], 'row_2': ['a', 'b', 'c', 'd']} + >>> pd.DataFrame.from_dict(data, orient='index') + 0 1 2 3 + row_1 3 2 1 0 + row_2 a b c d + + When using the 'index' orientation, the column names can be + specified manually: + + >>> pd.DataFrame.from_dict(data, orient='index', + ... columns=['A', 'B', 'C', 'D']) + A B C D + row_1 3 2 1 0 + row_2 a b c d + """ + index = None + orient = orient.lower() + if orient == "index": + if len(data) > 0: + # TODO speed up Series case + if isinstance(list(data.values())[0], (Series, dict)): + data = _from_nested_dict(data) + else: + data, index = list(data.values()), list(data.keys()) + elif orient == "columns": + if columns is not None: + raise ValueError("cannot use columns parameter with orient='columns'") + else: # pragma: no cover + raise ValueError("only recognize index or columns for orient") + + return cls(data, index=index, columns=columns, dtype=dtype) + + def to_numpy(self, dtype=None, copy=False) -> np.ndarray: + """ + Convert the DataFrame to a NumPy array. + + .. versionadded:: 0.24.0 + + By default, the dtype of the returned array will be the common NumPy + dtype of all types in the DataFrame. For example, if the dtypes are + ``float16`` and ``float32``, the results dtype will be ``float32``. + This may require copying data and coercing values, which may be + expensive. + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to pass to :meth:`numpy.asarray`. + copy : bool, default False + Whether to ensure that the returned value is a not a view on + another array. Note that ``copy=False`` does not *ensure* that + ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that + a copy is made, even if not strictly necessary. + + Returns + ------- + numpy.ndarray + + See Also + -------- + Series.to_numpy : Similar method for Series. + + Examples + -------- + >>> pd.DataFrame({"A": [1, 2], "B": [3, 4]}).to_numpy() + array([[1, 3], + [2, 4]]) + + With heterogeneous data, the lowest common type will have to + be used. + + >>> df = pd.DataFrame({"A": [1, 2], "B": [3.0, 4.5]}) + >>> df.to_numpy() + array([[1. , 3. ], + [2. , 4.5]]) + + For a mix of numeric and non-numeric types, the output array will + have object dtype. + + >>> df['C'] = pd.date_range('2000', periods=2) + >>> df.to_numpy() + array([[1, 3.0, Timestamp('2000-01-01 00:00:00')], + [2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object) + """ + result = np.array(self.values, dtype=dtype, copy=copy) + return result + + def to_dict(self, orient="dict", into=dict): + """ + Convert the DataFrame to a dictionary. + + The type of the key-value pairs can be customized with the parameters + (see below). + + Parameters + ---------- + orient : str {'dict', 'list', 'series', 'split', 'records', 'index'} + Determines the type of the values of the dictionary. + + - 'dict' (default) : dict like {column -> {index -> value}} + - 'list' : dict like {column -> [values]} + - 'series' : dict like {column -> Series(values)} + - 'split' : dict like + {'index' -> [index], 'columns' -> [columns], 'data' -> [values]} + - 'records' : list like + [{column -> value}, ... , {column -> value}] + - 'index' : dict like {index -> {column -> value}} + + Abbreviations are allowed. `s` indicates `series` and `sp` + indicates `split`. + + into : class, default dict + The collections.abc.Mapping subclass used for all Mappings + in the return value. Can be the actual class or an empty + instance of the mapping type you want. If you want a + collections.defaultdict, you must pass it initialized. + + .. versionadded:: 0.21.0 + + Returns + ------- + dict, list or collections.abc.Mapping + Return a collections.abc.Mapping object representing the DataFrame. + The resulting transformation depends on the `orient` parameter. + + See Also + -------- + DataFrame.from_dict: Create a DataFrame from a dictionary. + DataFrame.to_json: Convert a DataFrame to JSON format. + + Examples + -------- + >>> df = pd.DataFrame({'col1': [1, 2], + ... 'col2': [0.5, 0.75]}, + ... index=['row1', 'row2']) + >>> df + col1 col2 + row1 1 0.50 + row2 2 0.75 + >>> df.to_dict() + {'col1': {'row1': 1, 'row2': 2}, 'col2': {'row1': 0.5, 'row2': 0.75}} + + You can specify the return orientation. + + >>> df.to_dict('series') + {'col1': row1 1 + row2 2 + Name: col1, dtype: int64, + 'col2': row1 0.50 + row2 0.75 + Name: col2, dtype: float64} + + >>> df.to_dict('split') + {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'], + 'data': [[1, 0.5], [2, 0.75]]} + + >>> df.to_dict('records') + [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}] + + >>> df.to_dict('index') + {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}} + + You can also specify the mapping type. + + >>> from collections import OrderedDict, defaultdict + >>> df.to_dict(into=OrderedDict) + OrderedDict([('col1', OrderedDict([('row1', 1), ('row2', 2)])), + ('col2', OrderedDict([('row1', 0.5), ('row2', 0.75)]))]) + + If you want a `defaultdict`, you need to initialize it: + + >>> dd = defaultdict(list) + >>> df.to_dict('records', into=dd) + [defaultdict(, {'col1': 1, 'col2': 0.5}), + defaultdict(, {'col1': 2, 'col2': 0.75})] + """ + if not self.columns.is_unique: + warnings.warn( + "DataFrame columns are not unique, some columns will be omitted.", + UserWarning, + stacklevel=2, + ) + # GH16122 + into_c = com.standardize_mapping(into) + if orient.lower().startswith("d"): + return into_c((k, v.to_dict(into)) for k, v in self.items()) + elif orient.lower().startswith("l"): + return into_c((k, v.tolist()) for k, v in self.items()) + elif orient.lower().startswith("sp"): + return into_c( + ( + ("index", self.index.tolist()), + ("columns", self.columns.tolist()), + ( + "data", + [ + list(map(com.maybe_box_datetimelike, t)) + for t in self.itertuples(index=False, name=None) + ], + ), + ) + ) + elif orient.lower().startswith("s"): + return into_c((k, com.maybe_box_datetimelike(v)) for k, v in self.items()) + elif orient.lower().startswith("r"): + columns = self.columns.tolist() + rows = ( + dict(zip(columns, row)) + for row in self.itertuples(index=False, name=None) + ) + return [ + into_c((k, com.maybe_box_datetimelike(v)) for k, v in row.items()) + for row in rows + ] + elif orient.lower().startswith("i"): + if not self.index.is_unique: + raise ValueError("DataFrame index must be unique for orient='index'.") + return into_c( + (t[0], dict(zip(self.columns, t[1:]))) + for t in self.itertuples(name=None) + ) + else: + raise ValueError(f"orient '{orient}' not understood") + + def to_gbq( + self, + destination_table, + project_id=None, + chunksize=None, + reauth=False, + if_exists="fail", + auth_local_webserver=False, + table_schema=None, + location=None, + progress_bar=True, + credentials=None, + ) -> None: + """ + Write a DataFrame to a Google BigQuery table. + + This function requires the `pandas-gbq package + `__. + + See the `How to authenticate with Google BigQuery + `__ + guide for authentication instructions. + + Parameters + ---------- + destination_table : str + Name of table to be written, in the form ``dataset.tablename``. + project_id : str, optional + Google BigQuery Account project ID. Optional when available from + the environment. + chunksize : int, optional + Number of rows to be inserted in each chunk from the dataframe. + Set to ``None`` to load the whole dataframe at once. + reauth : bool, default False + Force Google BigQuery to re-authenticate the user. This is useful + if multiple accounts are used. + if_exists : str, default 'fail' + Behavior when the destination table exists. Value can be one of: + + ``'fail'`` + If table exists raise pandas_gbq.gbq.TableCreationError. + ``'replace'`` + If table exists, drop it, recreate it, and insert data. + ``'append'`` + If table exists, insert data. Create if does not exist. + auth_local_webserver : bool, default False + Use the `local webserver flow`_ instead of the `console flow`_ + when getting user credentials. + + .. _local webserver flow: + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. _console flow: + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + + *New in version 0.2.0 of pandas-gbq*. + table_schema : list of dicts, optional + List of BigQuery table fields to which according DataFrame + columns conform to, e.g. ``[{'name': 'col1', 'type': + 'STRING'},...]``. If schema is not provided, it will be + generated according to dtypes of DataFrame columns. See + BigQuery API documentation on available names of a field. + + *New in version 0.3.1 of pandas-gbq*. + location : str, optional + Location where the load job should run. See the `BigQuery locations + documentation + `__ for a + list of available locations. The location must match that of the + target dataset. + + *New in version 0.5.0 of pandas-gbq*. + progress_bar : bool, default True + Use the library `tqdm` to show the progress bar for the upload, + chunk by chunk. + + *New in version 0.5.0 of pandas-gbq*. + credentials : google.auth.credentials.Credentials, optional + Credentials for accessing Google APIs. Use this parameter to + override default credentials, such as to use Compute Engine + :class:`google.auth.compute_engine.Credentials` or Service + Account :class:`google.oauth2.service_account.Credentials` + directly. + + *New in version 0.8.0 of pandas-gbq*. + + .. versionadded:: 0.24.0 + + See Also + -------- + pandas_gbq.to_gbq : This function in the pandas-gbq library. + read_gbq : Read a DataFrame from Google BigQuery. + """ + from pandas.io import gbq + + gbq.to_gbq( + self, + destination_table, + project_id=project_id, + chunksize=chunksize, + reauth=reauth, + if_exists=if_exists, + auth_local_webserver=auth_local_webserver, + table_schema=table_schema, + location=location, + progress_bar=progress_bar, + credentials=credentials, + ) + + @classmethod + def from_records( + cls, + data, + index=None, + exclude=None, + columns=None, + coerce_float=False, + nrows=None, + ) -> "DataFrame": + """ + Convert structured or record ndarray to DataFrame. + + Parameters + ---------- + data : ndarray (structured dtype), list of tuples, dict, or DataFrame + index : str, list of fields, array-like + Field of array to use as the index, alternately a specific set of + input labels to use. + exclude : sequence, default None + Columns or fields to exclude. + columns : sequence, default None + Column names to use. If the passed data do not have names + associated with them, this argument provides names for the + columns. Otherwise this argument indicates the order of the columns + in the result (any names not found in the data will become all-NA + columns). + coerce_float : bool, default False + Attempt to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets. + nrows : int, default None + Number of rows to read if data is an iterator. + + Returns + ------- + DataFrame + """ + + # Make a copy of the input columns so we can modify it + if columns is not None: + columns = ensure_index(columns) + + if is_iterator(data): + if nrows == 0: + return cls() + + try: + first_row = next(data) + except StopIteration: + return cls(index=index, columns=columns) + + dtype = None + if hasattr(first_row, "dtype") and first_row.dtype.names: + dtype = first_row.dtype + + values = [first_row] + + if nrows is None: + values += data + else: + values.extend(itertools.islice(data, nrows - 1)) + + if dtype is not None: + data = np.array(values, dtype=dtype) + else: + data = values + + if isinstance(data, dict): + if columns is None: + columns = arr_columns = ensure_index(sorted(data)) + arrays = [data[k] for k in columns] + else: + arrays = [] + arr_columns = [] + for k, v in data.items(): + if k in columns: + arr_columns.append(k) + arrays.append(v) + + arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns) + + elif isinstance(data, (np.ndarray, DataFrame)): + arrays, columns = to_arrays(data, columns) + if columns is not None: + columns = ensure_index(columns) + arr_columns = columns + else: + arrays, arr_columns = to_arrays(data, columns, coerce_float=coerce_float) + + arr_columns = ensure_index(arr_columns) + if columns is not None: + columns = ensure_index(columns) + else: + columns = arr_columns + + if exclude is None: + exclude = set() + else: + exclude = set(exclude) + + result_index = None + if index is not None: + if isinstance(index, str) or not hasattr(index, "__iter__"): + i = columns.get_loc(index) + exclude.add(index) + if len(arrays) > 0: + result_index = Index(arrays[i], name=index) + else: + result_index = Index([], name=index) + else: + try: + index_data = [arrays[arr_columns.get_loc(field)] for field in index] + except (KeyError, TypeError): + # raised by get_loc, see GH#29258 + result_index = index + else: + result_index = ensure_index_from_sequences(index_data, names=index) + exclude.update(index) + + if any(exclude): + arr_exclude = [x for x in exclude if x in arr_columns] + to_remove = [arr_columns.get_loc(col) for col in arr_exclude] + arrays = [v for i, v in enumerate(arrays) if i not in to_remove] + + arr_columns = arr_columns.drop(arr_exclude) + columns = columns.drop(exclude) + + mgr = arrays_to_mgr(arrays, arr_columns, result_index, columns) + + return cls(mgr) + + def to_records( + self, index=True, column_dtypes=None, index_dtypes=None + ) -> np.recarray: + """ + Convert DataFrame to a NumPy record array. + + Index will be included as the first field of the record array if + requested. + + Parameters + ---------- + index : bool, default True + Include index in resulting record array, stored in 'index' + field or using the index label, if set. + column_dtypes : str, type, dict, default None + .. versionadded:: 0.24.0 + + If a string or type, the data type to store all columns. If + a dictionary, a mapping of column names and indices (zero-indexed) + to specific data types. + index_dtypes : str, type, dict, default None + .. versionadded:: 0.24.0 + + If a string or type, the data type to store all index levels. If + a dictionary, a mapping of index level names and indices + (zero-indexed) to specific data types. + + This mapping is applied only if `index=True`. + + Returns + ------- + numpy.recarray + NumPy ndarray with the DataFrame labels as fields and each row + of the DataFrame as entries. + + See Also + -------- + DataFrame.from_records: Convert structured or record ndarray + to DataFrame. + numpy.recarray: An ndarray that allows field access using + attributes, analogous to typed columns in a + spreadsheet. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2], 'B': [0.5, 0.75]}, + ... index=['a', 'b']) + >>> df + A B + a 1 0.50 + b 2 0.75 + >>> df.to_records() + rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)], + dtype=[('index', 'O'), ('A', '>> df.index = df.index.rename("I") + >>> df.to_records() + rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)], + dtype=[('I', 'O'), ('A', '>> df.to_records(index=False) + rec.array([(1, 0.5 ), (2, 0.75)], + dtype=[('A', '>> df.to_records(column_dtypes={"A": "int32"}) + rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)], + dtype=[('I', 'O'), ('A', '>> df.to_records(index_dtypes=">> index_dtypes = f">> df.to_records(index_dtypes=index_dtypes) + rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)], + dtype=[('I', 'S1'), ('A', ' "DataFrame": + mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype) + return cls(mgr) + + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") + def to_stata( + self, + path, + convert_dates=None, + write_index=True, + byteorder=None, + time_stamp=None, + data_label=None, + variable_labels=None, + version=114, + convert_strl=None, + ): + """ + Export DataFrame object to Stata dta format. + + Writes the DataFrame to a Stata dataset file. + "dta" files contain a Stata dataset. + + Parameters + ---------- + path : str, buffer or path object + String, path object (pathlib.Path or py._path.local.LocalPath) or + object implementing a binary write() function. If using a buffer + then the buffer will not be automatically closed after the file + data has been written. + + .. versionchanged:: 1.0.0 + + Previously this was "fname" + + convert_dates : dict + Dictionary mapping columns containing datetime types to stata + internal format to use when writing the dates. Options are 'tc', + 'td', 'tm', 'tw', 'th', 'tq', 'ty'. Column can be either an integer + or a name. Datetime columns that do not have a conversion type + specified will be converted to 'tc'. Raises NotImplementedError if + a datetime column has timezone information. + write_index : bool + Write the index to Stata dataset. + byteorder : str + Can be ">", "<", "little", or "big". default is `sys.byteorder`. + time_stamp : datetime + A datetime to use as file creation date. Default is the current + time. + data_label : str, optional + A label for the data set. Must be 80 characters or smaller. + variable_labels : dict + Dictionary containing columns as keys and variable labels as + values. Each label must be 80 characters or smaller. + version : {114, 117, 118, 119, None}, default 114 + Version to use in the output dta file. Set to None to let pandas + decide between 118 or 119 formats depending on the number of + columns in the frame. Version 114 can be read by Stata 10 and + later. Version 117 can be read by Stata 13 or later. Version 118 + is supported in Stata 14 and later. Version 119 is supported in + Stata 15 and later. Version 114 limits string variables to 244 + characters or fewer while versions 117 and later allow strings + with lengths up to 2,000,000 characters. Versions 118 and 119 + support Unicode characters, and version 119 supports more than + 32,767 variables. + + .. versionadded:: 0.23.0 + .. versionchanged:: 1.0.0 + + Added support for formats 118 and 119. + + convert_strl : list, optional + List of column names to convert to string columns to Stata StrL + format. Only available if version is 117. Storing strings in the + StrL format can produce smaller dta files if strings have more than + 8 characters and values are repeated. + + .. versionadded:: 0.23.0 + + Raises + ------ + NotImplementedError + * If datetimes contain timezone information + * Column dtype is not representable in Stata + ValueError + * Columns listed in convert_dates are neither datetime64[ns] + or datetime.datetime + * Column listed in convert_dates is not in DataFrame + * Categorical label contains more than 32,000 characters + + See Also + -------- + read_stata : Import Stata data files. + io.stata.StataWriter : Low-level writer for Stata data files. + io.stata.StataWriter117 : Low-level writer for version 117 files. + + Examples + -------- + >>> df = pd.DataFrame({'animal': ['falcon', 'parrot', 'falcon', + ... 'parrot'], + ... 'speed': [350, 18, 361, 15]}) + >>> df.to_stata('animals.dta') # doctest: +SKIP + """ + if version not in (114, 117, 118, 119, None): + raise ValueError("Only formats 114, 117, 118 and 119 are supported.") + if version == 114: + if convert_strl is not None: + raise ValueError("strl is not supported in format 114") + from pandas.io.stata import StataWriter as statawriter + elif version == 117: + from pandas.io.stata import StataWriter117 as statawriter + else: # versions 118 and 119 + from pandas.io.stata import StataWriterUTF8 as statawriter + + kwargs = {} + if version is None or version >= 117: + # strl conversion is only supported >= 117 + kwargs["convert_strl"] = convert_strl + if version is None or version >= 118: + # Specifying the version is only supported for UTF8 (118 or 119) + kwargs["version"] = version + + writer = statawriter( + path, + self, + convert_dates=convert_dates, + byteorder=byteorder, + time_stamp=time_stamp, + data_label=data_label, + write_index=write_index, + variable_labels=variable_labels, + **kwargs, + ) + writer.write_file() + + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") + def to_feather(self, path) -> None: + """ + Write out the binary feather-format for DataFrames. + + Parameters + ---------- + path : str + String file path. + """ + from pandas.io.feather_format import to_feather + + to_feather(self, path) + + @Appender( + """ + Examples + -------- + >>> df = pd.DataFrame( + ... data={"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]} + ... ) + >>> print(df.to_markdown()) + | | animal_1 | animal_2 | + |---:|:-----------|:-----------| + | 0 | elk | dog | + | 1 | pig | quetzal | + """ + ) + @Substitution(klass="DataFrame") + @Appender(_shared_docs["to_markdown"]) + def to_markdown( + self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs + ) -> Optional[str]: + kwargs.setdefault("headers", "keys") + kwargs.setdefault("tablefmt", "pipe") + tabulate = import_optional_dependency("tabulate") + result = tabulate.tabulate(self, **kwargs) + if buf is None: + return result + buf, _, _, _ = get_filepath_or_buffer(buf, mode=mode) + assert buf is not None # Help mypy. + buf.writelines(result) + return None + + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") + def to_parquet( + self, + path, + engine="auto", + compression="snappy", + index=None, + partition_cols=None, + **kwargs, + ) -> None: + """ + Write a DataFrame to the binary parquet format. + + .. versionadded:: 0.21.0 + + This function writes the dataframe as a `parquet file + `_. You can choose different parquet + backends, and have the option of compression. See + :ref:`the user guide ` for more details. + + Parameters + ---------- + path : str + File path or Root Directory path. Will be used as Root Directory + path while writing a partitioned dataset. + + .. versionchanged:: 1.0.0 + + Previously this was "fname" + + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. + compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' + Name of the compression to use. Use ``None`` for no compression. + index : bool, default None + If ``True``, include the dataframe's index(es) in the file output. + If ``False``, they will not be written to the file. + If ``None``, similar to ``True`` the dataframe's index(es) + will be saved. However, instead of being saved as values, + the RangeIndex will be stored as a range in the metadata so it + doesn't require much space and is faster. Other indexes will + be included as columns in the file output. + + .. versionadded:: 0.24.0 + + partition_cols : list, optional, default None + Column names by which to partition the dataset. + Columns are partitioned in the order they are given. + + .. versionadded:: 0.24.0 + + **kwargs + Additional arguments passed to the parquet library. See + :ref:`pandas io ` for more details. + + See Also + -------- + read_parquet : Read a parquet file. + DataFrame.to_csv : Write a csv file. + DataFrame.to_sql : Write to a sql table. + DataFrame.to_hdf : Write to hdf. + + Notes + ----- + This function requires either the `fastparquet + `_ or `pyarrow + `_ library. + + Examples + -------- + >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]}) + >>> df.to_parquet('df.parquet.gzip', + ... compression='gzip') # doctest: +SKIP + >>> pd.read_parquet('df.parquet.gzip') # doctest: +SKIP + col1 col2 + 0 1 3 + 1 2 4 + """ + from pandas.io.parquet import to_parquet + + to_parquet( + self, + path, + engine, + compression=compression, + index=index, + partition_cols=partition_cols, + **kwargs, + ) + + @Substitution( + header_type="bool", + header="Whether to print column labels, default True", + col_space_type="str or int", + col_space="The minimum width of each column in CSS length " + "units. An int is assumed to be px units.\n\n" + " .. versionadded:: 0.25.0\n" + " Ability to use str", + ) + @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) + def to_html( + self, + buf=None, + columns=None, + col_space=None, + header=True, + index=True, + na_rep="NaN", + formatters=None, + float_format=None, + sparsify=None, + index_names=True, + justify=None, + max_rows=None, + max_cols=None, + show_dimensions=False, + decimal=".", + bold_rows=True, + classes=None, + escape=True, + notebook=False, + border=None, + table_id=None, + render_links=False, + encoding=None, + ): + """ + Render a DataFrame as an HTML table. + %(shared_params)s + bold_rows : bool, default True + Make the row labels bold in the output. + classes : str or list or tuple, default None + CSS class(es) to apply to the resulting html table. + escape : bool, default True + Convert the characters <, >, and & to HTML-safe sequences. + notebook : {True, False}, default False + Whether the generated HTML is for IPython Notebook. + border : int + A ``border=border`` attribute is included in the opening + `
    ` tag. Default ``pd.options.display.html.border``. + encoding : str, default "utf-8" + Set character encoding. + + .. versionadded:: 1.0 + + table_id : str, optional + A css id is included in the opening `
    ` tag if specified. + + .. versionadded:: 0.23.0 + + render_links : bool, default False + Convert URLs to HTML links. + + .. versionadded:: 0.24.0 + %(returns)s + See Also + -------- + to_string : Convert DataFrame to a string. + """ + + if justify is not None and justify not in fmt._VALID_JUSTIFY_PARAMETERS: + raise ValueError("Invalid value for justify parameter") + + formatter = fmt.DataFrameFormatter( + self, + columns=columns, + col_space=col_space, + na_rep=na_rep, + formatters=formatters, + float_format=float_format, + sparsify=sparsify, + justify=justify, + index_names=index_names, + header=header, + index=index, + bold_rows=bold_rows, + escape=escape, + max_rows=max_rows, + max_cols=max_cols, + show_dimensions=show_dimensions, + decimal=decimal, + table_id=table_id, + render_links=render_links, + ) + # TODO: a generic formatter wld b in DataFrameFormatter + return formatter.to_html( + buf=buf, + classes=classes, + notebook=notebook, + border=border, + encoding=encoding, + ) + + # ---------------------------------------------------------------------- + + def info( + self, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None + ) -> None: + """ + Print a concise summary of a DataFrame. + + This method prints information about a DataFrame including + the index dtype and column dtypes, non-null values and memory usage. + + Parameters + ---------- + verbose : bool, optional + Whether to print the full summary. By default, the setting in + ``pandas.options.display.max_info_columns`` is followed. + buf : writable buffer, defaults to sys.stdout + Where to send the output. By default, the output is printed to + sys.stdout. Pass a writable buffer if you need to further process + the output. + max_cols : int, optional + When to switch from the verbose to the truncated output. If the + DataFrame has more than `max_cols` columns, the truncated output + is used. By default, the setting in + ``pandas.options.display.max_info_columns`` is used. + memory_usage : bool, str, optional + Specifies whether total memory usage of the DataFrame + elements (including the index) should be displayed. By default, + this follows the ``pandas.options.display.memory_usage`` setting. + + True always show memory usage. False never shows memory usage. + A value of 'deep' is equivalent to "True with deep introspection". + Memory usage is shown in human-readable units (base-2 + representation). Without deep introspection a memory estimation is + made based in column dtype and number of rows assuming values + consume the same memory amount for corresponding dtypes. With deep + memory introspection, a real memory usage calculation is performed + at the cost of computational resources. + null_counts : bool, optional + Whether to show the non-null counts. By default, this is shown + only if the frame is smaller than + ``pandas.options.display.max_info_rows`` and + ``pandas.options.display.max_info_columns``. A value of True always + shows the counts, and False never shows the counts. + + Returns + ------- + None + This method prints a summary of a DataFrame and returns None. + + See Also + -------- + DataFrame.describe: Generate descriptive statistics of DataFrame + columns. + DataFrame.memory_usage: Memory usage of DataFrame columns. + + Examples + -------- + >>> int_values = [1, 2, 3, 4, 5] + >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] + >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] + >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, + ... "float_col": float_values}) + >>> df + int_col text_col float_col + 0 1 alpha 0.00 + 1 2 beta 0.25 + 2 3 gamma 0.50 + 3 4 delta 0.75 + 4 5 epsilon 1.00 + + Prints information of all columns: + + >>> df.info(verbose=True) + + RangeIndex: 5 entries, 0 to 4 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 int_col 5 non-null int64 + 1 text_col 5 non-null object + 2 float_col 5 non-null float64 + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Prints a summary of columns count and its dtypes but not per column + information: + + >>> df.info(verbose=False) + + RangeIndex: 5 entries, 0 to 4 + Columns: 3 entries, int_col to float_col + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Pipe output of DataFrame.info to buffer instead of sys.stdout, get + buffer content and writes to a text file: + + >>> import io + >>> buffer = io.StringIO() + >>> df.info(buf=buffer) + >>> s = buffer.getvalue() + >>> with open("df_info.txt", "w", + ... encoding="utf-8") as f: # doctest: +SKIP + ... f.write(s) + 260 + + The `memory_usage` parameter allows deep introspection mode, specially + useful for big DataFrames and fine-tune memory optimization: + + >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) + >>> df = pd.DataFrame({ + ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), + ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), + ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) + ... }) + >>> df.info() + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 22.9+ MB + + >>> df.info(memory_usage='deep') + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 188.8 MB + """ + + if buf is None: # pragma: no cover + buf = sys.stdout + + lines = [] + + lines.append(str(type(self))) + lines.append(self.index._summary()) + + if len(self.columns) == 0: + lines.append(f"Empty {type(self).__name__}") + fmt.buffer_put_lines(buf, lines) + return + + cols = self.columns + col_count = len(self.columns) + + # hack + if max_cols is None: + max_cols = get_option("display.max_info_columns", len(self.columns) + 1) + + max_rows = get_option("display.max_info_rows", len(self) + 1) + + if null_counts is None: + show_counts = (col_count <= max_cols) and (len(self) < max_rows) + else: + show_counts = null_counts + exceeds_info_cols = col_count > max_cols + + def _verbose_repr(): + lines.append(f"Data columns (total {len(self.columns)} columns):") + + id_head = " # " + column_head = "Column" + col_space = 2 + + max_col = max(len(pprint_thing(k)) for k in cols) + len_column = len(pprint_thing(column_head)) + space = max(max_col, len_column) + col_space + + max_id = len(pprint_thing(col_count)) + len_id = len(pprint_thing(id_head)) + space_num = max(max_id, len_id) + col_space + counts = None + + header = _put_str(id_head, space_num) + _put_str(column_head, space) + if show_counts: + counts = self.count() + if len(cols) != len(counts): # pragma: no cover + raise AssertionError( + f"Columns must equal counts ({len(cols)} != {len(counts)})" + ) + count_header = "Non-Null Count" + len_count = len(count_header) + non_null = " non-null" + max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null) + space_count = max(len_count, max_count) + col_space + count_temp = "{count}" + non_null + else: + count_header = "" + space_count = len(count_header) + len_count = space_count + count_temp = "{count}" + + dtype_header = "Dtype" + len_dtype = len(dtype_header) + max_dtypes = max(len(pprint_thing(k)) for k in self.dtypes) + space_dtype = max(len_dtype, max_dtypes) + header += _put_str(count_header, space_count) + _put_str( + dtype_header, space_dtype + ) + + lines.append(header) + lines.append( + _put_str("-" * len_id, space_num) + + _put_str("-" * len_column, space) + + _put_str("-" * len_count, space_count) + + _put_str("-" * len_dtype, space_dtype) + ) + + for i, col in enumerate(self.columns): + dtype = self.dtypes.iloc[i] + col = pprint_thing(col) + + line_no = _put_str(" {num}".format(num=i), space_num) + count = "" + if show_counts: + count = counts.iloc[i] + + lines.append( + line_no + + _put_str(col, space) + + _put_str(count_temp.format(count=count), space_count) + + _put_str(dtype, space_dtype) + ) + + def _non_verbose_repr(): + lines.append(self.columns._summary(name="Columns")) + + def _sizeof_fmt(num, size_qualifier): + # returns size in human readable format + for x in ["bytes", "KB", "MB", "GB", "TB"]: + if num < 1024.0: + return f"{num:3.1f}{size_qualifier} {x}" + num /= 1024.0 + return f"{num:3.1f}{size_qualifier} PB" + + if verbose: + _verbose_repr() + elif verbose is False: # specifically set to False, not nesc None + _non_verbose_repr() + else: + if exceeds_info_cols: + _non_verbose_repr() + else: + _verbose_repr() + + counts = self._data.get_dtype_counts() + dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())] + lines.append(f"dtypes: {', '.join(dtypes)}") + + if memory_usage is None: + memory_usage = get_option("display.memory_usage") + if memory_usage: + # append memory usage of df to display + size_qualifier = "" + if memory_usage == "deep": + deep = True + else: + # size_qualifier is just a best effort; not guaranteed to catch + # all cases (e.g., it misses categorical data even with object + # categories) + deep = False + if "object" in counts or self.index._is_memory_usage_qualified(): + size_qualifier = "+" + mem_usage = self.memory_usage(index=True, deep=deep).sum() + lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n") + fmt.buffer_put_lines(buf, lines) + + def memory_usage(self, index=True, deep=False) -> Series: + """ + Return the memory usage of each column in bytes. + + The memory usage can optionally include the contribution of + the index and elements of `object` dtype. + + This value is displayed in `DataFrame.info` by default. This can be + suppressed by setting ``pandas.options.display.memory_usage`` to False. + + Parameters + ---------- + index : bool, default True + Specifies whether to include the memory usage of the DataFrame's + index in returned Series. If ``index=True``, the memory usage of + the index is the first item in the output. + deep : bool, default False + If True, introspect the data deeply by interrogating + `object` dtypes for system-level memory consumption, and include + it in the returned values. + + Returns + ------- + Series + A Series whose index is the original column names and whose values + is the memory usage of each column in bytes. + + See Also + -------- + numpy.ndarray.nbytes : Total bytes consumed by the elements of an + ndarray. + Series.memory_usage : Bytes consumed by a Series. + Categorical : Memory-efficient array for string values with + many repeated values. + DataFrame.info : Concise summary of a DataFrame. + + Examples + -------- + >>> dtypes = ['int64', 'float64', 'complex128', 'object', 'bool'] + >>> data = dict([(t, np.ones(shape=5000).astype(t)) + ... for t in dtypes]) + >>> df = pd.DataFrame(data) + >>> df.head() + int64 float64 complex128 object bool + 0 1 1.0 1.000000+0.000000j 1 True + 1 1 1.0 1.000000+0.000000j 1 True + 2 1 1.0 1.000000+0.000000j 1 True + 3 1 1.0 1.000000+0.000000j 1 True + 4 1 1.0 1.000000+0.000000j 1 True + + >>> df.memory_usage() + Index 128 + int64 40000 + float64 40000 + complex128 80000 + object 40000 + bool 5000 + dtype: int64 + + >>> df.memory_usage(index=False) + int64 40000 + float64 40000 + complex128 80000 + object 40000 + bool 5000 + dtype: int64 + + The memory footprint of `object` dtype columns is ignored by default: + + >>> df.memory_usage(deep=True) + Index 128 + int64 40000 + float64 40000 + complex128 80000 + object 160000 + bool 5000 + dtype: int64 + + Use a Categorical for efficient storage of an object-dtype column with + many repeated values. + + >>> df['object'].astype('category').memory_usage(deep=True) + 5216 + """ + result = Series( + [c.memory_usage(index=False, deep=deep) for col, c in self.items()], + index=self.columns, + ) + if index: + result = Series(self.index.memory_usage(deep=deep), index=["Index"]).append( + result + ) + return result + + def transpose(self, *args, copy: bool = False) -> "DataFrame": + """ + Transpose index and columns. + + Reflect the DataFrame over its main diagonal by writing rows as columns + and vice-versa. The property :attr:`.T` is an accessor to the method + :meth:`transpose`. + + Parameters + ---------- + *args : tuple, optional + Accepted for compatibility with NumPy. + copy : bool, default False + Whether to copy the data after transposing, even for DataFrames + with a single dtype. + + Note that a copy is always required for mixed dtype DataFrames, + or for DataFrames with any extension types. + + Returns + ------- + DataFrame + The transposed DataFrame. + + See Also + -------- + numpy.transpose : Permute the dimensions of a given array. + + Notes + ----- + Transposing a DataFrame with mixed dtypes will result in a homogeneous + DataFrame with the `object` dtype. In such a case, a copy of the data + is always made. + + Examples + -------- + **Square DataFrame with homogeneous dtype** + + >>> d1 = {'col1': [1, 2], 'col2': [3, 4]} + >>> df1 = pd.DataFrame(data=d1) + >>> df1 + col1 col2 + 0 1 3 + 1 2 4 + + >>> df1_transposed = df1.T # or df1.transpose() + >>> df1_transposed + 0 1 + col1 1 2 + col2 3 4 + + When the dtype is homogeneous in the original DataFrame, we get a + transposed DataFrame with the same dtype: + + >>> df1.dtypes + col1 int64 + col2 int64 + dtype: object + >>> df1_transposed.dtypes + 0 int64 + 1 int64 + dtype: object + + **Non-square DataFrame with mixed dtypes** + + >>> d2 = {'name': ['Alice', 'Bob'], + ... 'score': [9.5, 8], + ... 'employed': [False, True], + ... 'kids': [0, 0]} + >>> df2 = pd.DataFrame(data=d2) + >>> df2 + name score employed kids + 0 Alice 9.5 False 0 + 1 Bob 8.0 True 0 + + >>> df2_transposed = df2.T # or df2.transpose() + >>> df2_transposed + 0 1 + name Alice Bob + score 9.5 8 + employed False True + kids 0 0 + + When the DataFrame has mixed dtypes, we get a transposed DataFrame with + the `object` dtype: + + >>> df2.dtypes + name object + score float64 + employed bool + kids int64 + dtype: object + >>> df2_transposed.dtypes + 0 object + 1 object + dtype: object + """ + nv.validate_transpose(args, dict()) + # construct the args + + dtypes = list(self.dtypes) + if self._is_homogeneous_type and dtypes and is_extension_array_dtype(dtypes[0]): + # We have EAs with the same dtype. We can preserve that dtype in transpose. + dtype = dtypes[0] + arr_type = dtype.construct_array_type() + values = self.values + + new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values] + result = self._constructor( + dict(zip(self.index, new_values)), index=self.columns + ) + + else: + new_values = self.values.T + if copy: + new_values = new_values.copy() + result = self._constructor( + new_values, index=self.columns, columns=self.index + ) + + return result.__finalize__(self) + + T = property(transpose) + + # ---------------------------------------------------------------------- + # Indexing Methods + + def _ixs(self, i: int, axis: int = 0): + """ + Parameters + ---------- + i : int + axis : int + + Notes + ----- + If slice passed, the resulting data will be a view. + """ + # irow + if axis == 0: + new_values = self._data.fast_xs(i) + + # if we are a copy, mark as such + copy = isinstance(new_values, np.ndarray) and new_values.base is None + result = self._constructor_sliced( + new_values, + index=self.columns, + name=self.index[i], + dtype=new_values.dtype, + ) + result._set_is_copy(self, copy=copy) + return result + + # icol + else: + label = self.columns[i] + + # if the values returned are not the same length + # as the index (iow a not found value), iget returns + # a 0-len ndarray. This is effectively catching + # a numpy error (as numpy should really raise) + values = self._data.iget(i) + + if len(self.index) and not len(values): + values = np.array([np.nan] * len(self.index), dtype=object) + result = self._box_col_values(values, label) + + # this is a cached value, mark it so + result._set_as_cached(label, self) + + return result + + def __getitem__(self, key): + key = lib.item_from_zerodim(key) + key = com.apply_if_callable(key, self) + + if is_hashable(key): + # shortcut if the key is in columns + if self.columns.is_unique and key in self.columns: + if self.columns.nlevels > 1: + return self._getitem_multilevel(key) + return self._get_item_cache(key) + + # Do we have a slicer (on rows)? + indexer = convert_to_index_sliceable(self, key) + if indexer is not None: + # either we have a slice or we have a string that can be converted + # to a slice for partial-string date indexing + return self._slice(indexer, axis=0) + + # Do we have a (boolean) DataFrame? + if isinstance(key, DataFrame): + return self.where(key) + + # Do we have a (boolean) 1d indexer? + if com.is_bool_indexer(key): + return self._getitem_bool_array(key) + + # We are left with two options: a single key, and a collection of keys, + # We interpret tuples as collections only for non-MultiIndex + is_single_key = isinstance(key, tuple) or not is_list_like(key) + + if is_single_key: + if self.columns.nlevels > 1: + return self._getitem_multilevel(key) + indexer = self.columns.get_loc(key) + if is_integer(indexer): + indexer = [indexer] + else: + if is_iterator(key): + key = list(key) + indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1] + + # take() does not accept boolean indexers + if getattr(indexer, "dtype", None) == bool: + indexer = np.where(indexer)[0] + + data = self._take_with_is_copy(indexer, axis=1) + + if is_single_key: + # What does looking for a single key in a non-unique index return? + # The behavior is inconsistent. It returns a Series, except when + # - the key itself is repeated (test on data.shape, #9519), or + # - we have a MultiIndex on columns (test on self.columns, #21309) + if data.shape[1] == 1 and not isinstance(self.columns, ABCMultiIndex): + data = data[key] + + return data + + def _getitem_bool_array(self, key): + # also raises Exception if object array with NA values + # warning here just in case -- previously __setitem__ was + # reindexing but __getitem__ was not; it seems more reasonable to + # go with the __setitem__ behavior since that is more consistent + # with all other indexing behavior + if isinstance(key, Series) and not key.index.equals(self.index): + warnings.warn( + "Boolean Series key will be reindexed to match DataFrame index.", + UserWarning, + stacklevel=3, + ) + elif len(key) != len(self.index): + raise ValueError( + f"Item wrong length {len(key)} instead of {len(self.index)}." + ) + + # check_bool_indexer will throw exception if Series key cannot + # be reindexed to match DataFrame rows + key = check_bool_indexer(self.index, key) + indexer = key.nonzero()[0] + return self._take_with_is_copy(indexer, axis=0) + + def _getitem_multilevel(self, key): + # self.columns is a MultiIndex + loc = self.columns.get_loc(key) + if isinstance(loc, (slice, Series, np.ndarray, Index)): + new_columns = self.columns[loc] + result_columns = maybe_droplevels(new_columns, key) + if self._is_mixed_type: + result = self.reindex(columns=new_columns) + result.columns = result_columns + else: + new_values = self.values[:, loc] + result = self._constructor( + new_values, index=self.index, columns=result_columns + ) + result = result.__finalize__(self) + + # If there is only one column being returned, and its name is + # either an empty string, or a tuple with an empty string as its + # first element, then treat the empty string as a placeholder + # and return the column as if the user had provided that empty + # string in the key. If the result is a Series, exclude the + # implied empty string from its name. + if len(result.columns) == 1: + top = result.columns[0] + if isinstance(top, tuple): + top = top[0] + if top == "": + result = result[""] + if isinstance(result, Series): + result = self._constructor_sliced( + result, index=self.index, name=key + ) + + result._set_is_copy(self) + return result + else: + return self._get_item_cache(key) + + def _get_value(self, index, col, takeable: bool = False): + """ + Quickly retrieve single value at passed column and index. + + Parameters + ---------- + index : row label + col : column label + takeable : interpret the index/col as indexers, default False + + Returns + ------- + scalar + """ + if takeable: + series = self._iget_item_cache(col) + return com.maybe_box_datetimelike(series._values[index]) + + series = self._get_item_cache(col) + engine = self.index._engine + + try: + return engine.get_value(series._values, index) + except KeyError: + # GH 20629 + if self.index.nlevels > 1: + # partial indexing forbidden + raise + except (TypeError, ValueError): + pass + + # we cannot handle direct indexing + # use positional + col = self.columns.get_loc(col) + index = self.index.get_loc(index) + return self._get_value(index, col, takeable=True) + + def __setitem__(self, key, value): + key = com.apply_if_callable(key, self) + + # see if we can slice the rows + indexer = convert_to_index_sliceable(self, key) + if indexer is not None: + # either we have a slice or we have a string that can be converted + # to a slice for partial-string date indexing + return self._setitem_slice(indexer, value) + + if isinstance(key, DataFrame) or getattr(key, "ndim", None) == 2: + self._setitem_frame(key, value) + elif isinstance(key, (Series, np.ndarray, list, Index)): + self._setitem_array(key, value) + else: + # set column + self._set_item(key, value) + + def _setitem_slice(self, key, value): + # NB: we can't just use self.loc[key] = value because that + # operates on labels and we need to operate positional for + # backwards-compat, xref GH#31469 + self._check_setitem_copy() + self.loc._setitem_with_indexer(key, value) + + def _setitem_array(self, key, value): + # also raises Exception if object array with NA values + if com.is_bool_indexer(key): + if len(key) != len(self.index): + raise ValueError( + f"Item wrong length {len(key)} instead of {len(self.index)}!" + ) + key = check_bool_indexer(self.index, key) + indexer = key.nonzero()[0] + self._check_setitem_copy() + self.loc._setitem_with_indexer(indexer, value) + else: + if isinstance(value, DataFrame): + if len(value.columns) != len(key): + raise ValueError("Columns must be same length as key") + for k1, k2 in zip(key, value.columns): + self[k1] = value[k2] + else: + indexer = self.loc._get_listlike_indexer( + key, axis=1, raise_missing=False + )[1] + self._check_setitem_copy() + self.loc._setitem_with_indexer((slice(None), indexer), value) + + def _setitem_frame(self, key, value): + # support boolean setting with DataFrame input, e.g. + # df[df > df2] = 0 + if isinstance(key, np.ndarray): + if key.shape != self.shape: + raise ValueError("Array conditional must be same shape as self") + key = self._constructor(key, **self._construct_axes_dict()) + + if key.values.size and not is_bool_dtype(key.values): + raise TypeError( + "Must pass DataFrame or 2-d ndarray with boolean values only" + ) + + self._check_inplace_setting(value) + self._check_setitem_copy() + self._where(-key, value, inplace=True) + + def _set_item(self, key, value): + """ + Add series to DataFrame in specified column. + + If series is a numpy-array (not a Series/TimeSeries), it must be the + same length as the DataFrames index or an error will be thrown. + + Series/TimeSeries will be conformed to the DataFrames index to + ensure homogeneity. + """ + + self._ensure_valid_index(value) + value = self._sanitize_column(key, value) + NDFrame._set_item(self, key, value) + + # check if we are modifying a copy + # try to set first as we want an invalid + # value exception to occur first + if len(self): + self._check_setitem_copy() + + def _set_value(self, index, col, value, takeable: bool = False): + """ + Put single value at passed column and index. + + Parameters + ---------- + index : row label + col : column label + value : scalar + takeable : interpret the index/col as indexers, default False + + Returns + ------- + DataFrame + If label pair is contained, will be reference to calling DataFrame, + otherwise a new object. + """ + try: + if takeable is True: + series = self._iget_item_cache(col) + return series._set_value(index, value, takeable=True) + + series = self._get_item_cache(col) + engine = self.index._engine + engine.set_value(series._values, index, value) + return self + except (KeyError, TypeError): + + # set using a non-recursive method & reset the cache + if takeable: + self.iloc[index, col] = value + else: + self.loc[index, col] = value + self._item_cache.pop(col, None) + + return self + + def _ensure_valid_index(self, value): + """ + Ensure that if we don't have an index, that we can create one from the + passed value. + """ + # GH5632, make sure that we are a Series convertible + if not len(self.index) and is_list_like(value) and len(value): + try: + value = Series(value) + except (ValueError, NotImplementedError, TypeError): + raise ValueError( + "Cannot set a frame with no defined index " + "and a value that cannot be converted to a " + "Series" + ) + + self._data = self._data.reindex_axis( + value.index.copy(), axis=1, fill_value=np.nan + ) + + def _box_item_values(self, key, values): + items = self.columns[self.columns.get_loc(key)] + if values.ndim == 2: + return self._constructor(values.T, columns=items, index=self.index) + else: + return self._box_col_values(values, items) + + def _box_col_values(self, values, items): + """ + Provide boxed values for a column. + """ + klass = self._constructor_sliced + return klass(values, index=self.index, name=items, fastpath=True) + + # ---------------------------------------------------------------------- + # Unsorted + + def query(self, expr, inplace=False, **kwargs): + """ + Query the columns of a DataFrame with a boolean expression. + + Parameters + ---------- + expr : str + The query string to evaluate. + + You can refer to variables + in the environment by prefixing them with an '@' character like + ``@a + b``. + + You can refer to column names that contain spaces or operators by + surrounding them in backticks. This way you can also escape + names that start with a digit, or those that are a Python keyword. + Basically when it is not valid Python identifier. See notes down + for more details. + + For example, if one of your columns is called ``a a`` and you want + to sum it with ``b``, your query should be ```a a` + b``. + + .. versionadded:: 0.25.0 + Backtick quoting introduced. + + .. versionadded:: 1.0.0 + Expanding functionality of backtick quoting for more than only spaces. + + inplace : bool + Whether the query should modify the data in place or return + a modified copy. + **kwargs + See the documentation for :func:`eval` for complete details + on the keyword arguments accepted by :meth:`DataFrame.query`. + + Returns + ------- + DataFrame + DataFrame resulting from the provided query expression. + + See Also + -------- + eval : Evaluate a string describing operations on + DataFrame columns. + DataFrame.eval : Evaluate a string describing operations on + DataFrame columns. + + Notes + ----- + The result of the evaluation of this expression is first passed to + :attr:`DataFrame.loc` and if that fails because of a + multidimensional key (e.g., a DataFrame) then the result will be passed + to :meth:`DataFrame.__getitem__`. + + This method uses the top-level :func:`eval` function to + evaluate the passed query. + + The :meth:`~pandas.DataFrame.query` method uses a slightly + modified Python syntax by default. For example, the ``&`` and ``|`` + (bitwise) operators have the precedence of their boolean cousins, + :keyword:`and` and :keyword:`or`. This *is* syntactically valid Python, + however the semantics are different. + + You can change the semantics of the expression by passing the keyword + argument ``parser='python'``. This enforces the same semantics as + evaluation in Python space. Likewise, you can pass ``engine='python'`` + to evaluate an expression using Python itself as a backend. This is not + recommended as it is inefficient compared to using ``numexpr`` as the + engine. + + The :attr:`DataFrame.index` and + :attr:`DataFrame.columns` attributes of the + :class:`~pandas.DataFrame` instance are placed in the query namespace + by default, which allows you to treat both the index and columns of the + frame as a column in the frame. + The identifier ``index`` is used for the frame index; you can also + use the name of the index to identify it in a query. Please note that + Python keywords may not be used as identifiers. + + For further details and examples see the ``query`` documentation in + :ref:`indexing `. + + *Backtick quoted variables* + + Backtick quoted variables are parsed as literal Python code and + are converted internally to a Python valid identifier. + This can lead to the following problems. + + During parsing a number of disallowed characters inside the backtick + quoted string are replaced by strings that are allowed as a Python identifier. + These characters include all operators in Python, the space character, the + question mark, the exclamation mark, the dollar sign, and the euro sign. + For other characters that fall outside the ASCII range (U+0001..U+007F) + and those that are not further specified in PEP 3131, + the query parser will raise an error. + This excludes whitespace different than the space character, + but also the hashtag (as it is used for comments) and the backtick + itself (backtick can also not be escaped). + + In a special case, quotes that make a pair around a backtick can + confuse the parser. + For example, ```it's` > `that's``` will raise an error, + as it forms a quoted string (``'s > `that'``) with a backtick inside. + + See also the Python documentation about lexical analysis + (https://docs.python.org/3/reference/lexical_analysis.html) + in combination with the source code in :mod:`pandas.core.computation.parsing`. + + Examples + -------- + >>> df = pd.DataFrame({'A': range(1, 6), + ... 'B': range(10, 0, -2), + ... 'C C': range(10, 5, -1)}) + >>> df + A B C C + 0 1 10 10 + 1 2 8 9 + 2 3 6 8 + 3 4 4 7 + 4 5 2 6 + >>> df.query('A > B') + A B C C + 4 5 2 6 + + The previous expression is equivalent to + + >>> df[df.A > df.B] + A B C C + 4 5 2 6 + + For columns with spaces in their name, you can use backtick quoting. + + >>> df.query('B == `C C`') + A B C C + 0 1 10 10 + + The previous expression is equivalent to + + >>> df[df.B == df['C C']] + A B C C + 0 1 10 10 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if not isinstance(expr, str): + msg = f"expr must be a string to be evaluated, {type(expr)} given" + raise ValueError(msg) + kwargs["level"] = kwargs.pop("level", 0) + 1 + kwargs["target"] = None + res = self.eval(expr, **kwargs) + + try: + new_data = self.loc[res] + except ValueError: + # when res is multi-dimensional loc raises, but this is sometimes a + # valid query + new_data = self[res] + + if inplace: + self._update_inplace(new_data) + else: + return new_data + + def eval(self, expr, inplace=False, **kwargs): + """ + Evaluate a string describing operations on DataFrame columns. + + Operates on columns only, not specific rows or elements. This allows + `eval` to run arbitrary code, which can make you vulnerable to code + injection if you pass user input to this function. + + Parameters + ---------- + expr : str + The expression string to evaluate. + inplace : bool, default False + If the expression contains an assignment, whether to perform the + operation inplace and mutate the existing DataFrame. Otherwise, + a new DataFrame is returned. + **kwargs + See the documentation for :func:`eval` for complete details + on the keyword arguments accepted by + :meth:`~pandas.DataFrame.query`. + + Returns + ------- + ndarray, scalar, or pandas object + The result of the evaluation. + + See Also + -------- + DataFrame.query : Evaluates a boolean expression to query the columns + of a frame. + DataFrame.assign : Can evaluate an expression or function to create new + values for a column. + eval : Evaluate a Python expression as a string using various + backends. + + Notes + ----- + For more details see the API documentation for :func:`~eval`. + For detailed examples see :ref:`enhancing performance with eval + `. + + Examples + -------- + >>> df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2)}) + >>> df + A B + 0 1 10 + 1 2 8 + 2 3 6 + 3 4 4 + 4 5 2 + >>> df.eval('A + B') + 0 11 + 1 10 + 2 9 + 3 8 + 4 7 + dtype: int64 + + Assignment is allowed though by default the original DataFrame is not + modified. + + >>> df.eval('C = A + B') + A B C + 0 1 10 11 + 1 2 8 10 + 2 3 6 9 + 3 4 4 8 + 4 5 2 7 + >>> df + A B + 0 1 10 + 1 2 8 + 2 3 6 + 3 4 4 + 4 5 2 + + Use ``inplace=True`` to modify the original DataFrame. + + >>> df.eval('C = A + B', inplace=True) + >>> df + A B C + 0 1 10 11 + 1 2 8 10 + 2 3 6 9 + 3 4 4 8 + 4 5 2 7 + """ + from pandas.core.computation.eval import eval as _eval + + inplace = validate_bool_kwarg(inplace, "inplace") + resolvers = kwargs.pop("resolvers", None) + kwargs["level"] = kwargs.pop("level", 0) + 1 + if resolvers is None: + index_resolvers = self._get_index_resolvers() + column_resolvers = self._get_cleaned_column_resolvers() + resolvers = column_resolvers, index_resolvers + if "target" not in kwargs: + kwargs["target"] = self + kwargs["resolvers"] = kwargs.get("resolvers", ()) + tuple(resolvers) + + return _eval(expr, inplace=inplace, **kwargs) + + def select_dtypes(self, include=None, exclude=None) -> "DataFrame": + """ + Return a subset of the DataFrame's columns based on the column dtypes. + + Parameters + ---------- + include, exclude : scalar or list-like + A selection of dtypes or strings to be included/excluded. At least + one of these parameters must be supplied. + + Returns + ------- + DataFrame + The subset of the frame including the dtypes in ``include`` and + excluding the dtypes in ``exclude``. + + Raises + ------ + ValueError + * If both of ``include`` and ``exclude`` are empty + * If ``include`` and ``exclude`` have overlapping elements + * If any kind of string dtype is passed in. + + Notes + ----- + * To select all *numeric* types, use ``np.number`` or ``'number'`` + * To select strings you must use the ``object`` dtype, but note that + this will return *all* object dtype columns + * See the `numpy dtype hierarchy + `__ + * To select datetimes, use ``np.datetime64``, ``'datetime'`` or + ``'datetime64'`` + * To select timedeltas, use ``np.timedelta64``, ``'timedelta'`` or + ``'timedelta64'`` + * To select Pandas categorical dtypes, use ``'category'`` + * To select Pandas datetimetz dtypes, use ``'datetimetz'`` (new in + 0.20.0) or ``'datetime64[ns, tz]'`` + + Examples + -------- + >>> df = pd.DataFrame({'a': [1, 2] * 3, + ... 'b': [True, False] * 3, + ... 'c': [1.0, 2.0] * 3}) + >>> df + a b c + 0 1 True 1.0 + 1 2 False 2.0 + 2 1 True 1.0 + 3 2 False 2.0 + 4 1 True 1.0 + 5 2 False 2.0 + + >>> df.select_dtypes(include='bool') + b + 0 True + 1 False + 2 True + 3 False + 4 True + 5 False + + >>> df.select_dtypes(include=['float64']) + c + 0 1.0 + 1 2.0 + 2 1.0 + 3 2.0 + 4 1.0 + 5 2.0 + + >>> df.select_dtypes(exclude=['int']) + b c + 0 True 1.0 + 1 False 2.0 + 2 True 1.0 + 3 False 2.0 + 4 True 1.0 + 5 False 2.0 + """ + + if not is_list_like(include): + include = (include,) if include is not None else () + if not is_list_like(exclude): + exclude = (exclude,) if exclude is not None else () + + selection = (frozenset(include), frozenset(exclude)) + + if not any(selection): + raise ValueError("at least one of include or exclude must be nonempty") + + # convert the myriad valid dtypes object to a single representation + include = frozenset(infer_dtype_from_object(x) for x in include) + exclude = frozenset(infer_dtype_from_object(x) for x in exclude) + for dtypes in (include, exclude): + invalidate_string_dtypes(dtypes) + + # can't both include AND exclude! + if not include.isdisjoint(exclude): + raise ValueError(f"include and exclude overlap on {(include & exclude)}") + + # We raise when both include and exclude are empty + # Hence, we can just shrink the columns we want to keep + keep_these = np.full(self.shape[1], True) + + def extract_unique_dtypes_from_dtypes_set( + dtypes_set: FrozenSet[Dtype], unique_dtypes: np.ndarray + ) -> List[Dtype]: + extracted_dtypes = [ + unique_dtype + for unique_dtype in unique_dtypes + if issubclass(unique_dtype.type, tuple(dtypes_set)) # type: ignore + ] + return extracted_dtypes + + unique_dtypes = self.dtypes.unique() + + if include: + included_dtypes = extract_unique_dtypes_from_dtypes_set( + include, unique_dtypes + ) + keep_these &= self.dtypes.isin(included_dtypes) + + if exclude: + excluded_dtypes = extract_unique_dtypes_from_dtypes_set( + exclude, unique_dtypes + ) + keep_these &= ~self.dtypes.isin(excluded_dtypes) + + return self.iloc[:, keep_these.values] + + def insert(self, loc, column, value, allow_duplicates=False) -> None: + """ + Insert column into DataFrame at specified location. + + Raises a ValueError if `column` is already contained in the DataFrame, + unless `allow_duplicates` is set to True. + + Parameters + ---------- + loc : int + Insertion index. Must verify 0 <= loc <= len(columns). + column : str, number, or hashable object + Label of the inserted column. + value : int, Series, or array-like + allow_duplicates : bool, optional + """ + self._ensure_valid_index(value) + value = self._sanitize_column(column, value, broadcast=False) + self._data.insert(loc, column, value, allow_duplicates=allow_duplicates) + + def assign(self, **kwargs) -> "DataFrame": + r""" + Assign new columns to a DataFrame. + + Returns a new object with all original columns in addition to new ones. + Existing columns that are re-assigned will be overwritten. + + Parameters + ---------- + **kwargs : dict of {str: callable or Series} + The column names are keywords. If the values are + callable, they are computed on the DataFrame and + assigned to the new columns. The callable must not + change input DataFrame (though pandas doesn't check it). + If the values are not callable, (e.g. a Series, scalar, or array), + they are simply assigned. + + Returns + ------- + DataFrame + A new DataFrame with the new columns in addition to + all the existing columns. + + Notes + ----- + Assigning multiple columns within the same ``assign`` is possible. + Later items in '\*\*kwargs' may refer to newly created or modified + columns in 'df'; items are computed and assigned into 'df' in order. + + .. versionchanged:: 0.23.0 + + Keyword argument order is maintained. + + Examples + -------- + >>> df = pd.DataFrame({'temp_c': [17.0, 25.0]}, + ... index=['Portland', 'Berkeley']) + >>> df + temp_c + Portland 17.0 + Berkeley 25.0 + + Where the value is a callable, evaluated on `df`: + + >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32) + temp_c temp_f + Portland 17.0 62.6 + Berkeley 25.0 77.0 + + Alternatively, the same behavior can be achieved by directly + referencing an existing Series or sequence: + + >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32) + temp_c temp_f + Portland 17.0 62.6 + Berkeley 25.0 77.0 + + You can create multiple columns within the same assign where one + of the columns depends on another one defined within the same assign: + + >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32, + ... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9) + temp_c temp_f temp_k + Portland 17.0 62.6 290.15 + Berkeley 25.0 77.0 298.15 + """ + data = self.copy() + + for k, v in kwargs.items(): + data[k] = com.apply_if_callable(v, data) + return data + + def _sanitize_column(self, key, value, broadcast=True): + """ + Ensures new columns (which go into the BlockManager as new blocks) are + always copied and converted into an array. + + Parameters + ---------- + key : object + value : scalar, Series, or array-like + broadcast : bool, default True + If ``key`` matches multiple duplicate column names in the + DataFrame, this parameter indicates whether ``value`` should be + tiled so that the returned array contains a (duplicated) column for + each occurrence of the key. If False, ``value`` will not be tiled. + + Returns + ------- + numpy.ndarray + """ + + def reindexer(value): + # reindex if necessary + + if value.index.equals(self.index) or not len(self.index): + value = value._values.copy() + else: + + # GH 4107 + try: + value = value.reindex(self.index)._values + except ValueError as err: + # raised in MultiIndex.from_tuples, see test_insert_error_msmgs + if not value.index.is_unique: + # duplicate axis + raise err + + # other + raise TypeError( + "incompatible index of inserted column with frame index" + ) + return value + + if isinstance(value, Series): + value = reindexer(value) + + elif isinstance(value, DataFrame): + # align right-hand-side columns if self.columns + # is multi-index and self[key] is a sub-frame + if isinstance(self.columns, ABCMultiIndex) and key in self.columns: + loc = self.columns.get_loc(key) + if isinstance(loc, (slice, Series, np.ndarray, Index)): + cols = maybe_droplevels(self.columns[loc], key) + if len(cols) and not cols.equals(value.columns): + value = value.reindex(cols, axis=1) + # now align rows + value = reindexer(value).T + + elif isinstance(value, ExtensionArray): + # Explicitly copy here, instead of in sanitize_index, + # as sanitize_index won't copy an EA, even with copy=True + value = value.copy() + value = sanitize_index(value, self.index, copy=False) + + elif isinstance(value, Index) or is_sequence(value): + + # turn me into an ndarray + value = sanitize_index(value, self.index, copy=False) + if not isinstance(value, (np.ndarray, Index)): + if isinstance(value, list) and len(value) > 0: + value = maybe_convert_platform(value) + else: + value = com.asarray_tuplesafe(value) + elif value.ndim == 2: + value = value.copy().T + elif isinstance(value, Index): + value = value.copy(deep=True) + else: + value = value.copy() + + # possibly infer to datetimelike + if is_object_dtype(value.dtype): + value = maybe_infer_to_datetimelike(value) + + else: + # cast ignores pandas dtypes. so save the dtype first + infer_dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True) + + # upcast + value = cast_scalar_to_array(len(self.index), value) + value = maybe_cast_to_datetime(value, infer_dtype) + + # return internal types directly + if is_extension_array_dtype(value): + return value + + # broadcast across multiple columns if necessary + if broadcast and key in self.columns and value.ndim == 1: + if not self.columns.is_unique or isinstance(self.columns, ABCMultiIndex): + existing_piece = self[key] + if isinstance(existing_piece, DataFrame): + value = np.tile(value, (len(existing_piece.columns), 1)) + + return np.atleast_2d(np.asarray(value)) + + @property + def _series(self): + return { + item: Series(self._data.iget(idx), index=self.index, name=item) + for idx, item in enumerate(self.columns) + } + + def lookup(self, row_labels, col_labels) -> np.ndarray: + """ + Label-based "fancy indexing" function for DataFrame. + + Given equal-length arrays of row and column labels, return an + array of the values corresponding to each (row, col) pair. + + Parameters + ---------- + row_labels : sequence + The row labels to use for lookup. + col_labels : sequence + The column labels to use for lookup. + + Returns + ------- + numpy.ndarray + + Examples + -------- + values : ndarray + The found values + """ + n = len(row_labels) + if n != len(col_labels): + raise ValueError("Row labels must have same size as column labels") + + thresh = 1000 + if not self._is_mixed_type or n > thresh: + values = self.values + ridx = self.index.get_indexer(row_labels) + cidx = self.columns.get_indexer(col_labels) + if (ridx == -1).any(): + raise KeyError("One or more row labels was not found") + if (cidx == -1).any(): + raise KeyError("One or more column labels was not found") + flat_index = ridx * len(self.columns) + cidx + result = values.flat[flat_index] + else: + result = np.empty(n, dtype="O") + for i, (r, c) in enumerate(zip(row_labels, col_labels)): + result[i] = self._get_value(r, c) + + if is_object_dtype(result): + result = lib.maybe_convert_objects(result) + + return result + + # ---------------------------------------------------------------------- + # Reindexing and alignment + + def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy): + frame = self + + columns = axes["columns"] + if columns is not None: + frame = frame._reindex_columns( + columns, method, copy, level, fill_value, limit, tolerance + ) + + index = axes["index"] + if index is not None: + frame = frame._reindex_index( + index, method, copy, level, fill_value, limit, tolerance + ) + + return frame + + def _reindex_index( + self, + new_index, + method, + copy, + level, + fill_value=np.nan, + limit=None, + tolerance=None, + ): + new_index, indexer = self.index.reindex( + new_index, method=method, level=level, limit=limit, tolerance=tolerance + ) + return self._reindex_with_indexers( + {0: [new_index, indexer]}, + copy=copy, + fill_value=fill_value, + allow_dups=False, + ) + + def _reindex_columns( + self, + new_columns, + method, + copy, + level, + fill_value=None, + limit=None, + tolerance=None, + ): + new_columns, indexer = self.columns.reindex( + new_columns, method=method, level=level, limit=limit, tolerance=tolerance + ) + return self._reindex_with_indexers( + {1: [new_columns, indexer]}, + copy=copy, + fill_value=fill_value, + allow_dups=False, + ) + + def _reindex_multi(self, axes, copy, fill_value) -> "DataFrame": + """ + We are guaranteed non-Nones in the axes. + """ + + new_index, row_indexer = self.index.reindex(axes["index"]) + new_columns, col_indexer = self.columns.reindex(axes["columns"]) + + if row_indexer is not None and col_indexer is not None: + indexer = row_indexer, col_indexer + new_values = algorithms.take_2d_multi( + self.values, indexer, fill_value=fill_value + ) + return self._constructor(new_values, index=new_index, columns=new_columns) + else: + return self._reindex_with_indexers( + {0: [new_index, row_indexer], 1: [new_columns, col_indexer]}, + copy=copy, + fill_value=fill_value, + ) + + @Appender(_shared_docs["align"] % _shared_doc_kwargs) + def align( + self, + other, + join="outer", + axis=None, + level=None, + copy=True, + fill_value=None, + method=None, + limit=None, + fill_axis=0, + broadcast_axis=None, + ) -> "DataFrame": + return super().align( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + broadcast_axis=broadcast_axis, + ) + + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.reindex.__doc__) + @rewrite_axis_style_signature( + "labels", + [ + ("method", None), + ("copy", True), + ("level", None), + ("fill_value", np.nan), + ("limit", None), + ("tolerance", None), + ], + ) + def reindex(self, *args, **kwargs) -> "DataFrame": + axes = validate_axis_style_args(self, args, kwargs, "labels", "reindex") + kwargs.update(axes) + # Pop these, since the values are in `kwargs` under different names + kwargs.pop("axis", None) + kwargs.pop("labels", None) + return super().reindex(**kwargs) + + def drop( + self, + labels=None, + axis=0, + index=None, + columns=None, + level=None, + inplace=False, + errors="raise", + ): + """ + Drop specified labels from rows or columns. + + Remove rows or columns by specifying label names and corresponding + axis, or by specifying directly index or column names. When using a + multi-index, labels on different levels can be removed by specifying + the level. + + Parameters + ---------- + labels : single label or list-like + Index or column labels to drop. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Whether to drop labels from the index (0 or 'index') or + columns (1 or 'columns'). + index : single label or list-like + Alternative to specifying axis (``labels, axis=0`` + is equivalent to ``index=labels``). + + .. versionadded:: 0.21.0 + columns : single label or list-like + Alternative to specifying axis (``labels, axis=1`` + is equivalent to ``columns=labels``). + + .. versionadded:: 0.21.0 + level : int or level name, optional + For MultiIndex, level from which the labels will be removed. + inplace : bool, default False + If True, do operation inplace and return None. + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and only existing labels are + dropped. + + Returns + ------- + DataFrame + DataFrame without the removed index or column labels. + + Raises + ------ + KeyError + If any of the labels is not found in the selected axis. + + See Also + -------- + DataFrame.loc : Label-location based indexer for selection by label. + DataFrame.dropna : Return DataFrame with labels on given axis omitted + where (all or any) data are missing. + DataFrame.drop_duplicates : Return DataFrame with duplicate rows + removed, optionally only considering certain columns. + Series.drop : Return Series with specified index labels removed. + + Examples + -------- + >>> df = pd.DataFrame(np.arange(12).reshape(3, 4), + ... columns=['A', 'B', 'C', 'D']) + >>> df + A B C D + 0 0 1 2 3 + 1 4 5 6 7 + 2 8 9 10 11 + + Drop columns + + >>> df.drop(['B', 'C'], axis=1) + A D + 0 0 3 + 1 4 7 + 2 8 11 + + >>> df.drop(columns=['B', 'C']) + A D + 0 0 3 + 1 4 7 + 2 8 11 + + Drop a row by index + + >>> df.drop([0, 1]) + A B C D + 2 8 9 10 11 + + Drop columns and/or rows of MultiIndex DataFrame + + >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'], + ... ['speed', 'weight', 'length']], + ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], + ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) + >>> df = pd.DataFrame(index=midx, columns=['big', 'small'], + ... data=[[45, 30], [200, 100], [1.5, 1], [30, 20], + ... [250, 150], [1.5, 0.8], [320, 250], + ... [1, 0.8], [0.3, 0.2]]) + >>> df + big small + lama speed 45.0 30.0 + weight 200.0 100.0 + length 1.5 1.0 + cow speed 30.0 20.0 + weight 250.0 150.0 + length 1.5 0.8 + falcon speed 320.0 250.0 + weight 1.0 0.8 + length 0.3 0.2 + + >>> df.drop(index='cow', columns='small') + big + lama speed 45.0 + weight 200.0 + length 1.5 + falcon speed 320.0 + weight 1.0 + length 0.3 + + >>> df.drop(index='length', level=1) + big small + lama speed 45.0 30.0 + weight 200.0 100.0 + cow speed 30.0 20.0 + weight 250.0 150.0 + falcon speed 320.0 250.0 + weight 1.0 0.8 + """ + return super().drop( + labels=labels, + axis=axis, + index=index, + columns=columns, + level=level, + inplace=inplace, + errors=errors, + ) + + @rewrite_axis_style_signature( + "mapper", + [("copy", True), ("inplace", False), ("level", None), ("errors", "ignore")], + ) + def rename( + self, + mapper: Optional[Renamer] = None, + *, + index: Optional[Renamer] = None, + columns: Optional[Renamer] = None, + axis: Optional[Axis] = None, + copy: bool = True, + inplace: bool = False, + level: Optional[Level] = None, + errors: str = "ignore", + ) -> Optional["DataFrame"]: + + """ + Alter axes labels. + + Function / dict values must be unique (1-to-1). Labels not contained in + a dict / Series will be left as-is. Extra labels listed don't throw an + error. + + See the :ref:`user guide ` for more. + + Parameters + ---------- + mapper : dict-like or function + Dict-like or functions transformations to apply to + that axis' values. Use either ``mapper`` and ``axis`` to + specify the axis to target with ``mapper``, or ``index`` and + ``columns``. + index : dict-like or function + Alternative to specifying axis (``mapper, axis=0`` + is equivalent to ``index=mapper``). + columns : dict-like or function + Alternative to specifying axis (``mapper, axis=1`` + is equivalent to ``columns=mapper``). + axis : int or str + Axis to target with ``mapper``. Can be either the axis name + ('index', 'columns') or number (0, 1). The default is 'index'. + copy : bool, default True + Also copy underlying data. + inplace : bool, default False + Whether to return a new DataFrame. If True then value of copy is + ignored. + level : int or level name, default None + In case of a MultiIndex, only rename labels in the specified + level. + errors : {'ignore', 'raise'}, default 'ignore' + If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`, + or `columns` contains labels that are not present in the Index + being transformed. + If 'ignore', existing keys will be renamed and extra keys will be + ignored. + + Returns + ------- + DataFrame + DataFrame with the renamed axis labels. + + Raises + ------ + KeyError + If any of the labels is not found in the selected axis and + "errors='raise'". + + See Also + -------- + DataFrame.rename_axis : Set the name of the axis. + + Examples + -------- + + ``DataFrame.rename`` supports two calling conventions + + * ``(index=index_mapper, columns=columns_mapper, ...)`` + * ``(mapper, axis={'index', 'columns'}, ...)`` + + We *highly* recommend using keyword arguments to clarify your + intent. + + Rename columns using a mapping: + + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df.rename(columns={"A": "a", "B": "c"}) + a c + 0 1 4 + 1 2 5 + 2 3 6 + + Rename index using a mapping: + + >>> df.rename(index={0: "x", 1: "y", 2: "z"}) + A B + x 1 4 + y 2 5 + z 3 6 + + Cast index labels to a different type: + + >>> df.index + RangeIndex(start=0, stop=3, step=1) + >>> df.rename(index=str).index + Index(['0', '1', '2'], dtype='object') + + >>> df.rename(columns={"A": "a", "B": "b", "C": "c"}, errors="raise") + Traceback (most recent call last): + KeyError: ['C'] not found in axis + + Using axis-style parameters + + >>> df.rename(str.lower, axis='columns') + a b + 0 1 4 + 1 2 5 + 2 3 6 + + >>> df.rename({1: 2, 2: 4}, axis='index') + A B + 0 1 4 + 2 2 5 + 4 3 6 + """ + return super().rename( + mapper=mapper, + index=index, + columns=columns, + axis=axis, + copy=copy, + inplace=inplace, + level=level, + errors=errors, + ) + + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.fillna.__doc__) + def fillna( + self, + value=None, + method=None, + axis=None, + inplace=False, + limit=None, + downcast=None, + ) -> Optional["DataFrame"]: + return super().fillna( + value=value, + method=method, + axis=axis, + inplace=inplace, + limit=limit, + downcast=downcast, + ) + + @Appender(_shared_docs["replace"] % _shared_doc_kwargs) + def replace( + self, + to_replace=None, + value=None, + inplace=False, + limit=None, + regex=False, + method="pad", + ): + return super().replace( + to_replace=to_replace, + value=value, + inplace=inplace, + limit=limit, + regex=regex, + method=method, + ) + + @Appender(_shared_docs["shift"] % _shared_doc_kwargs) + def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> "DataFrame": + return super().shift( + periods=periods, freq=freq, axis=axis, fill_value=fill_value + ) + + def set_index( + self, keys, drop=True, append=False, inplace=False, verify_integrity=False + ): + """ + Set the DataFrame index using existing columns. + + Set the DataFrame index (row labels) using one or more existing + columns or arrays (of the correct length). The index can replace the + existing index or expand on it. + + Parameters + ---------- + keys : label or array-like or list of labels/arrays + This parameter can be either a single column key, a single array of + the same length as the calling DataFrame, or a list containing an + arbitrary combination of column keys and arrays. Here, "array" + encompasses :class:`Series`, :class:`Index`, ``np.ndarray``, and + instances of :class:`~collections.abc.Iterator`. + drop : bool, default True + Delete columns to be used as the new index. + append : bool, default False + Whether to append columns to existing index. + inplace : bool, default False + Modify the DataFrame in place (do not create a new object). + verify_integrity : bool, default False + Check the new index for duplicates. Otherwise defer the check until + necessary. Setting to False will improve the performance of this + method. + + Returns + ------- + DataFrame + Changed row labels. + + See Also + -------- + DataFrame.reset_index : Opposite of set_index. + DataFrame.reindex : Change to new indices or expand indices. + DataFrame.reindex_like : Change to same indices as other DataFrame. + + Examples + -------- + >>> df = pd.DataFrame({'month': [1, 4, 7, 10], + ... 'year': [2012, 2014, 2013, 2014], + ... 'sale': [55, 40, 84, 31]}) + >>> df + month year sale + 0 1 2012 55 + 1 4 2014 40 + 2 7 2013 84 + 3 10 2014 31 + + Set the index to become the 'month' column: + + >>> df.set_index('month') + year sale + month + 1 2012 55 + 4 2014 40 + 7 2013 84 + 10 2014 31 + + Create a MultiIndex using columns 'year' and 'month': + + >>> df.set_index(['year', 'month']) + sale + year month + 2012 1 55 + 2014 4 40 + 2013 7 84 + 2014 10 31 + + Create a MultiIndex using an Index and a column: + + >>> df.set_index([pd.Index([1, 2, 3, 4]), 'year']) + month sale + year + 1 2012 1 55 + 2 2014 4 40 + 3 2013 7 84 + 4 2014 10 31 + + Create a MultiIndex using two Series: + + >>> s = pd.Series([1, 2, 3, 4]) + >>> df.set_index([s, s**2]) + month year sale + 1 1 1 2012 55 + 2 4 4 2014 40 + 3 9 7 2013 84 + 4 16 10 2014 31 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if not isinstance(keys, list): + keys = [keys] + + err_msg = ( + 'The parameter "keys" may be a column key, one-dimensional ' + "array, or a list containing only valid column keys and " + "one-dimensional arrays." + ) + + missing: List[Optional[Hashable]] = [] + for col in keys: + if isinstance( + col, (ABCIndexClass, ABCSeries, np.ndarray, list, abc.Iterator) + ): + # arrays are fine as long as they are one-dimensional + # iterators get converted to list below + if getattr(col, "ndim", 1) != 1: + raise ValueError(err_msg) + else: + # everything else gets tried as a key; see GH 24969 + try: + found = col in self.columns + except TypeError: + raise TypeError(f"{err_msg}. Received column of type {type(col)}") + else: + if not found: + missing.append(col) + + if missing: + raise KeyError(f"None of {missing} are in the columns") + + if inplace: + frame = self + else: + frame = self.copy() + + arrays = [] + names = [] + if append: + names = list(self.index.names) + if isinstance(self.index, ABCMultiIndex): + for i in range(self.index.nlevels): + arrays.append(self.index._get_level_values(i)) + else: + arrays.append(self.index) + + to_remove: List[Optional[Hashable]] = [] + for col in keys: + if isinstance(col, ABCMultiIndex): + for n in range(col.nlevels): + arrays.append(col._get_level_values(n)) + names.extend(col.names) + elif isinstance(col, (ABCIndexClass, ABCSeries)): + # if Index then not MultiIndex (treated above) + arrays.append(col) + names.append(col.name) + elif isinstance(col, (list, np.ndarray)): + arrays.append(col) + names.append(None) + elif isinstance(col, abc.Iterator): + arrays.append(list(col)) + names.append(None) + # from here, col can only be a column label + else: + arrays.append(frame[col]._values) + names.append(col) + if drop: + to_remove.append(col) + + if len(arrays[-1]) != len(self): + # check newest element against length of calling frame, since + # ensure_index_from_sequences would not raise for append=False. + raise ValueError( + f"Length mismatch: Expected {len(self)} rows, " + f"received array of length {len(arrays[-1])}" + ) + + index = ensure_index_from_sequences(arrays, names) + + if verify_integrity and not index.is_unique: + duplicates = index[index.duplicated()].unique() + raise ValueError(f"Index has duplicate keys: {duplicates}") + + # use set to handle duplicate column names gracefully in case of drop + for c in set(to_remove): + del frame[c] + + # clear up memory usage + index._cleanup() + + frame.index = index + + if not inplace: + return frame + + def reset_index( + self, + level: Optional[Union[Hashable, Sequence[Hashable]]] = None, + drop: bool = False, + inplace: bool = False, + col_level: Hashable = 0, + col_fill: Optional[Hashable] = "", + ) -> Optional["DataFrame"]: + """ + Reset the index, or a level of it. + + Reset the index of the DataFrame, and use the default one instead. + If the DataFrame has a MultiIndex, this method can remove one or more + levels. + + Parameters + ---------- + level : int, str, tuple, or list, default None + Only remove the given levels from the index. Removes all levels by + default. + drop : bool, default False + Do not try to insert index into dataframe columns. This resets + the index to the default integer index. + inplace : bool, default False + Modify the DataFrame in place (do not create a new object). + col_level : int or str, default 0 + If the columns have multiple levels, determines which level the + labels are inserted into. By default it is inserted into the first + level. + col_fill : object, default '' + If the columns have multiple levels, determines how the other + levels are named. If None then the index name is repeated. + + Returns + ------- + DataFrame or None + DataFrame with the new index or None if ``inplace=True``. + + See Also + -------- + DataFrame.set_index : Opposite of reset_index. + DataFrame.reindex : Change to new indices or expand indices. + DataFrame.reindex_like : Change to same indices as other DataFrame. + + Examples + -------- + >>> df = pd.DataFrame([('bird', 389.0), + ... ('bird', 24.0), + ... ('mammal', 80.5), + ... ('mammal', np.nan)], + ... index=['falcon', 'parrot', 'lion', 'monkey'], + ... columns=('class', 'max_speed')) + >>> df + class max_speed + falcon bird 389.0 + parrot bird 24.0 + lion mammal 80.5 + monkey mammal NaN + + When we reset the index, the old index is added as a column, and a + new sequential index is used: + + >>> df.reset_index() + index class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN + + We can use the `drop` parameter to avoid the old index being added as + a column: + + >>> df.reset_index(drop=True) + class max_speed + 0 bird 389.0 + 1 bird 24.0 + 2 mammal 80.5 + 3 mammal NaN + + You can also use `reset_index` with `MultiIndex`. + + >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'), + ... ('bird', 'parrot'), + ... ('mammal', 'lion'), + ... ('mammal', 'monkey')], + ... names=['class', 'name']) + >>> columns = pd.MultiIndex.from_tuples([('speed', 'max'), + ... ('species', 'type')]) + >>> df = pd.DataFrame([(389.0, 'fly'), + ... ( 24.0, 'fly'), + ... ( 80.5, 'run'), + ... (np.nan, 'jump')], + ... index=index, + ... columns=columns) + >>> df + speed species + max type + class name + bird falcon 389.0 fly + parrot 24.0 fly + mammal lion 80.5 run + monkey NaN jump + + If the index has multiple levels, we can reset a subset of them: + + >>> df.reset_index(level='class') + class speed species + max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + + If we are not dropping the index, by default, it is placed in the top + level. We can place it in another level: + + >>> df.reset_index(level='class', col_level=1) + speed species + class max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + + When the index is inserted under another level, we can specify under + which one with the parameter `col_fill`: + + >>> df.reset_index(level='class', col_level=1, col_fill='species') + species speed species + class max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + + If we specify a nonexistent level for `col_fill`, it is created: + + >>> df.reset_index(level='class', col_level=1, col_fill='genus') + genus speed species + class max type + name + falcon bird 389.0 fly + parrot bird 24.0 fly + lion mammal 80.5 run + monkey mammal NaN jump + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if inplace: + new_obj = self + else: + new_obj = self.copy() + + def _maybe_casted_values(index, labels=None): + values = index._values + if not isinstance(index, (PeriodIndex, DatetimeIndex)): + if values.dtype == np.object_: + values = lib.maybe_convert_objects(values) + + # if we have the labels, extract the values with a mask + if labels is not None: + mask = labels == -1 + + # we can have situations where the whole mask is -1, + # meaning there is nothing found in labels, so make all nan's + if mask.all(): + values = np.empty(len(mask)) + values.fill(np.nan) + else: + values = values.take(labels) + + # TODO(https://github.com/pandas-dev/pandas/issues/24206) + # Push this into maybe_upcast_putmask? + # We can't pass EAs there right now. Looks a bit + # complicated. + # So we unbox the ndarray_values, op, re-box. + values_type = type(values) + values_dtype = values.dtype + + if issubclass(values_type, DatetimeLikeArray): + values = values._data + + if mask.any(): + values, _ = maybe_upcast_putmask(values, mask, np.nan) + + if issubclass(values_type, DatetimeLikeArray): + values = values_type(values, dtype=values_dtype) + + return values + + new_index = ibase.default_index(len(new_obj)) + if level is not None: + if not isinstance(level, (tuple, list)): + level = [level] + level = [self.index._get_level_number(lev) for lev in level] + if len(level) < self.index.nlevels: + new_index = self.index.droplevel(level) + + if not drop: + to_insert: Iterable[Tuple[Any, Optional[Any]]] + if isinstance(self.index, ABCMultiIndex): + names = [ + (n if n is not None else f"level_{i}") + for i, n in enumerate(self.index.names) + ] + to_insert = zip(self.index.levels, self.index.codes) + else: + default = "index" if "index" not in self else "level_0" + names = [default] if self.index.name is None else [self.index.name] + to_insert = ((self.index, None),) + + multi_col = isinstance(self.columns, ABCMultiIndex) + for i, (lev, lab) in reversed(list(enumerate(to_insert))): + if not (level is None or i in level): + continue + name = names[i] + if multi_col: + col_name = list(name) if isinstance(name, tuple) else [name] + if col_fill is None: + if len(col_name) not in (1, self.columns.nlevels): + raise ValueError( + "col_fill=None is incompatible " + f"with incomplete column name {name}" + ) + col_fill = col_name[0] + + lev_num = self.columns._get_level_number(col_level) + name_lst = [col_fill] * lev_num + col_name + missing = self.columns.nlevels - len(name_lst) + name_lst += [col_fill] * missing + name = tuple(name_lst) + # to ndarray and maybe infer different dtype + level_values = _maybe_casted_values(lev, lab) + new_obj.insert(0, name, level_values) + + new_obj.index = new_index + if not inplace: + return new_obj + + return None + + # ---------------------------------------------------------------------- + # Reindex-based selection methods + + @Appender(_shared_docs["isna"] % _shared_doc_kwargs) + def isna(self) -> "DataFrame": + return super().isna() + + @Appender(_shared_docs["isna"] % _shared_doc_kwargs) + def isnull(self) -> "DataFrame": + return super().isnull() + + @Appender(_shared_docs["notna"] % _shared_doc_kwargs) + def notna(self) -> "DataFrame": + return super().notna() + + @Appender(_shared_docs["notna"] % _shared_doc_kwargs) + def notnull(self) -> "DataFrame": + return super().notnull() + + def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): + """ + Remove missing values. + + See the :ref:`User Guide ` for more on which values are + considered missing, and how to work with missing data. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + Determine if rows or columns which contain missing values are + removed. + + * 0, or 'index' : Drop rows which contain missing values. + * 1, or 'columns' : Drop columns which contain missing value. + + .. versionchanged:: 1.0.0 + + Pass tuple or list to drop on multiple axes. + Only a single axis is allowed. + + how : {'any', 'all'}, default 'any' + Determine if row or column is removed from DataFrame, when we have + at least one NA or all NA. + + * 'any' : If any NA values are present, drop that row or column. + * 'all' : If all values are NA, drop that row or column. + + thresh : int, optional + Require that many non-NA values. + subset : array-like, optional + Labels along other axis to consider, e.g. if you are dropping rows + these would be a list of columns to include. + inplace : bool, default False + If True, do operation inplace and return None. + + Returns + ------- + DataFrame + DataFrame with NA entries dropped from it. + + See Also + -------- + DataFrame.isna: Indicate missing values. + DataFrame.notna : Indicate existing (non-missing) values. + DataFrame.fillna : Replace missing values. + Series.dropna : Drop missing values. + Index.dropna : Drop missing indices. + + Examples + -------- + >>> df = pd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'], + ... "toy": [np.nan, 'Batmobile', 'Bullwhip'], + ... "born": [pd.NaT, pd.Timestamp("1940-04-25"), + ... pd.NaT]}) + >>> df + name toy born + 0 Alfred NaN NaT + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip NaT + + Drop the rows where at least one element is missing. + + >>> df.dropna() + name toy born + 1 Batman Batmobile 1940-04-25 + + Drop the columns where at least one element is missing. + + >>> df.dropna(axis='columns') + name + 0 Alfred + 1 Batman + 2 Catwoman + + Drop the rows where all elements are missing. + + >>> df.dropna(how='all') + name toy born + 0 Alfred NaN NaT + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip NaT + + Keep only the rows with at least 2 non-NA values. + + >>> df.dropna(thresh=2) + name toy born + 1 Batman Batmobile 1940-04-25 + 2 Catwoman Bullwhip NaT + + Define in which columns to look for missing values. + + >>> df.dropna(subset=['name', 'born']) + name toy born + 1 Batman Batmobile 1940-04-25 + + Keep the DataFrame with valid entries in the same variable. + + >>> df.dropna(inplace=True) + >>> df + name toy born + 1 Batman Batmobile 1940-04-25 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if isinstance(axis, (tuple, list)): + # GH20987 + raise TypeError("supplying multiple axes to axis is no longer supported.") + + axis = self._get_axis_number(axis) + agg_axis = 1 - axis + + agg_obj = self + if subset is not None: + ax = self._get_axis(agg_axis) + indices = ax.get_indexer_for(subset) + check = indices == -1 + if check.any(): + raise KeyError(list(np.compress(check, subset))) + agg_obj = self.take(indices, axis=agg_axis) + + count = agg_obj.count(axis=agg_axis) + + if thresh is not None: + mask = count >= thresh + elif how == "any": + mask = count == len(agg_obj._get_axis(agg_axis)) + elif how == "all": + mask = count > 0 + else: + if how is not None: + raise ValueError(f"invalid how option: {how}") + else: + raise TypeError("must specify how or thresh") + + result = self.loc(axis=axis)[mask] + + if inplace: + self._update_inplace(result) + else: + return result + + def drop_duplicates( + self, + subset: Optional[Union[Hashable, Sequence[Hashable]]] = None, + keep: Union[str, bool] = "first", + inplace: bool = False, + ignore_index: bool = False, + ) -> Optional["DataFrame"]: + """ + Return DataFrame with duplicate rows removed. + + Considering certain columns is optional. Indexes, including time indexes + are ignored. + + Parameters + ---------- + subset : column label or sequence of labels, optional + Only consider certain columns for identifying duplicates, by + default use all of the columns. + keep : {'first', 'last', False}, default 'first' + Determines which duplicates (if any) to keep. + - ``first`` : Drop duplicates except for the first occurrence. + - ``last`` : Drop duplicates except for the last occurrence. + - False : Drop all duplicates. + inplace : bool, default False + Whether to drop duplicates in place or to return a copy. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + Returns + ------- + DataFrame + DataFrame with duplicates removed or None if ``inplace=True``. + """ + if self.empty: + return self.copy() + + inplace = validate_bool_kwarg(inplace, "inplace") + duplicated = self.duplicated(subset, keep=keep) + + if inplace: + (inds,) = (-duplicated)._ndarray_values.nonzero() + new_data = self._data.take(inds) + + if ignore_index: + new_data.axes[1] = ibase.default_index(len(inds)) + self._update_inplace(new_data) + else: + result = self[-duplicated] + + if ignore_index: + result.index = ibase.default_index(len(result)) + return result + + return None + + def duplicated( + self, + subset: Optional[Union[Hashable, Sequence[Hashable]]] = None, + keep: Union[str, bool] = "first", + ) -> "Series": + """ + Return boolean Series denoting duplicate rows. + + Considering certain columns is optional. + + Parameters + ---------- + subset : column label or sequence of labels, optional + Only consider certain columns for identifying duplicates, by + default use all of the columns. + keep : {'first', 'last', False}, default 'first' + Determines which duplicates (if any) to mark. + + - ``first`` : Mark duplicates as ``True`` except for the first occurrence. + - ``last`` : Mark duplicates as ``True`` except for the last occurrence. + - False : Mark all duplicates as ``True``. + + Returns + ------- + Series + """ + from pandas.core.sorting import get_group_index + from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT + + if self.empty: + return Series(dtype=bool) + + def f(vals): + labels, shape = algorithms.factorize( + vals, size_hint=min(len(self), _SIZE_HINT_LIMIT) + ) + return labels.astype("i8", copy=False), len(shape) + + if subset is None: + subset = self.columns + elif ( + not np.iterable(subset) + or isinstance(subset, str) + or isinstance(subset, tuple) + and subset in self.columns + ): + subset = (subset,) + + # needed for mypy since can't narrow types using np.iterable + subset = cast(Iterable, subset) + + # Verify all columns in subset exist in the queried dataframe + # Otherwise, raise a KeyError, same as if you try to __getitem__ with a + # key that doesn't exist. + diff = Index(subset).difference(self.columns) + if not diff.empty: + raise KeyError(diff) + + vals = (col.values for name, col in self.items() if name in subset) + labels, shape = map(list, zip(*map(f, vals))) + + ids = get_group_index(labels, shape, sort=False, xnull=False) + return Series(duplicated_int64(ids, keep), index=self.index) + + # ---------------------------------------------------------------------- + # Sorting + + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.sort_values.__doc__) + def sort_values( + self, + by, + axis=0, + ascending=True, + inplace=False, + kind="quicksort", + na_position="last", + ignore_index=False, + ): + inplace = validate_bool_kwarg(inplace, "inplace") + axis = self._get_axis_number(axis) + + if not isinstance(by, list): + by = [by] + if is_sequence(ascending) and len(by) != len(ascending): + raise ValueError( + f"Length of ascending ({len(ascending)}) != length of by ({len(by)})" + ) + if len(by) > 1: + from pandas.core.sorting import lexsort_indexer + + keys = [self._get_label_or_level_values(x, axis=axis) for x in by] + indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position) + indexer = ensure_platform_int(indexer) + else: + from pandas.core.sorting import nargsort + + by = by[0] + k = self._get_label_or_level_values(by, axis=axis) + + if isinstance(ascending, (tuple, list)): + ascending = ascending[0] + + indexer = nargsort( + k, kind=kind, ascending=ascending, na_position=na_position + ) + + new_data = self._data.take( + indexer, axis=self._get_block_manager_axis(axis), verify=False + ) + + if ignore_index: + new_data.axes[1] = ibase.default_index(len(indexer)) + + if inplace: + return self._update_inplace(new_data) + else: + return self._constructor(new_data).__finalize__(self) + + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.sort_index.__doc__) + def sort_index( + self, + axis=0, + level=None, + ascending=True, + inplace=False, + kind="quicksort", + na_position="last", + sort_remaining=True, + ignore_index: bool = False, + ): + + # TODO: this can be combined with Series.sort_index impl as + # almost identical + + inplace = validate_bool_kwarg(inplace, "inplace") + + axis = self._get_axis_number(axis) + labels = self._get_axis(axis) + + # make sure that the axis is lexsorted to start + # if not we need to reconstruct to get the correct indexer + labels = labels._sort_levels_monotonic() + if level is not None: + + new_axis, indexer = labels.sortlevel( + level, ascending=ascending, sort_remaining=sort_remaining + ) + + elif isinstance(labels, ABCMultiIndex): + from pandas.core.sorting import lexsort_indexer + + indexer = lexsort_indexer( + labels._get_codes_for_sorting(), + orders=ascending, + na_position=na_position, + ) + else: + from pandas.core.sorting import nargsort + + # Check monotonic-ness before sort an index + # GH11080 + if (ascending and labels.is_monotonic_increasing) or ( + not ascending and labels.is_monotonic_decreasing + ): + if inplace: + return + else: + return self.copy() + + indexer = nargsort( + labels, kind=kind, ascending=ascending, na_position=na_position + ) + + baxis = self._get_block_manager_axis(axis) + new_data = self._data.take(indexer, axis=baxis, verify=False) + + # reconstruct axis if needed + new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic() + + if ignore_index: + new_data.axes[1] = ibase.default_index(len(indexer)) + + if inplace: + return self._update_inplace(new_data) + else: + return self._constructor(new_data).__finalize__(self) + + def nlargest(self, n, columns, keep="first") -> "DataFrame": + """ + Return the first `n` rows ordered by `columns` in descending order. + + Return the first `n` rows with the largest values in `columns`, in + descending order. The columns that are not specified are returned as + well, but not used for ordering. + + This method is equivalent to + ``df.sort_values(columns, ascending=False).head(n)``, but more + performant. + + Parameters + ---------- + n : int + Number of rows to return. + columns : label or list of labels + Column label(s) to order by. + keep : {'first', 'last', 'all'}, default 'first' + Where there are duplicate values: + + - `first` : prioritize the first occurrence(s) + - `last` : prioritize the last occurrence(s) + - ``all`` : do not drop any duplicates, even it means + selecting more than `n` items. + + .. versionadded:: 0.24.0 + + Returns + ------- + DataFrame + The first `n` rows ordered by the given columns in descending + order. + + See Also + -------- + DataFrame.nsmallest : Return the first `n` rows ordered by `columns` in + ascending order. + DataFrame.sort_values : Sort DataFrame by the values. + DataFrame.head : Return the first `n` rows without re-ordering. + + Notes + ----- + This function cannot be used with all column types. For example, when + specifying columns with `object` or `category` dtypes, ``TypeError`` is + raised. + + Examples + -------- + >>> df = pd.DataFrame({'population': [59000000, 65000000, 434000, + ... 434000, 434000, 337000, 11300, + ... 11300, 11300], + ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128, + ... 17036, 182, 38, 311], + ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN", + ... "IS", "NR", "TV", "AI"]}, + ... index=["Italy", "France", "Malta", + ... "Maldives", "Brunei", "Iceland", + ... "Nauru", "Tuvalu", "Anguilla"]) + >>> df + population GDP alpha-2 + Italy 59000000 1937894 IT + France 65000000 2583560 FR + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + Iceland 337000 17036 IS + Nauru 11300 182 NR + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + + In the following example, we will use ``nlargest`` to select the three + rows having the largest values in column "population". + + >>> df.nlargest(3, 'population') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Malta 434000 12011 MT + + When using ``keep='last'``, ties are resolved in reverse order: + + >>> df.nlargest(3, 'population', keep='last') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Brunei 434000 12128 BN + + When using ``keep='all'``, all duplicate items are maintained: + + >>> df.nlargest(3, 'population', keep='all') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + + To order by the largest values in column "population" and then "GDP", + we can specify multiple columns like in the next example. + + >>> df.nlargest(3, ['population', 'GDP']) + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Brunei 434000 12128 BN + """ + return algorithms.SelectNFrame(self, n=n, keep=keep, columns=columns).nlargest() + + def nsmallest(self, n, columns, keep="first") -> "DataFrame": + """ + Return the first `n` rows ordered by `columns` in ascending order. + + Return the first `n` rows with the smallest values in `columns`, in + ascending order. The columns that are not specified are returned as + well, but not used for ordering. + + This method is equivalent to + ``df.sort_values(columns, ascending=True).head(n)``, but more + performant. + + Parameters + ---------- + n : int + Number of items to retrieve. + columns : list or str + Column name or names to order by. + keep : {'first', 'last', 'all'}, default 'first' + Where there are duplicate values: + + - ``first`` : take the first occurrence. + - ``last`` : take the last occurrence. + - ``all`` : do not drop any duplicates, even it means + selecting more than `n` items. + + .. versionadded:: 0.24.0 + + Returns + ------- + DataFrame + + See Also + -------- + DataFrame.nlargest : Return the first `n` rows ordered by `columns` in + descending order. + DataFrame.sort_values : Sort DataFrame by the values. + DataFrame.head : Return the first `n` rows without re-ordering. + + Examples + -------- + >>> df = pd.DataFrame({'population': [59000000, 65000000, 434000, + ... 434000, 434000, 337000, 11300, + ... 11300, 11300], + ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128, + ... 17036, 182, 38, 311], + ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN", + ... "IS", "NR", "TV", "AI"]}, + ... index=["Italy", "France", "Malta", + ... "Maldives", "Brunei", "Iceland", + ... "Nauru", "Tuvalu", "Anguilla"]) + >>> df + population GDP alpha-2 + Italy 59000000 1937894 IT + France 65000000 2583560 FR + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + Iceland 337000 17036 IS + Nauru 11300 182 NR + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + + In the following example, we will use ``nsmallest`` to select the + three rows having the smallest values in column "a". + + >>> df.nsmallest(3, 'population') + population GDP alpha-2 + Nauru 11300 182 NR + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + + When using ``keep='last'``, ties are resolved in reverse order: + + >>> df.nsmallest(3, 'population', keep='last') + population GDP alpha-2 + Anguilla 11300 311 AI + Tuvalu 11300 38 TV + Nauru 11300 182 NR + + When using ``keep='all'``, all duplicate items are maintained: + + >>> df.nsmallest(3, 'population', keep='all') + population GDP alpha-2 + Nauru 11300 182 NR + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + + To order by the largest values in column "a" and then "c", we can + specify multiple columns like in the next example. + + >>> df.nsmallest(3, ['population', 'GDP']) + population GDP alpha-2 + Tuvalu 11300 38 TV + Nauru 11300 182 NR + Anguilla 11300 311 AI + """ + return algorithms.SelectNFrame( + self, n=n, keep=keep, columns=columns + ).nsmallest() + + def swaplevel(self, i=-2, j=-1, axis=0) -> "DataFrame": + """ + Swap levels i and j in a MultiIndex on a particular axis. + + Parameters + ---------- + i, j : int or str + Levels of the indices to be swapped. Can pass level name as string. + + Returns + ------- + DataFrame + """ + result = self.copy() + + axis = self._get_axis_number(axis) + if axis == 0: + result.index = result.index.swaplevel(i, j) + else: + result.columns = result.columns.swaplevel(i, j) + return result + + def reorder_levels(self, order, axis=0) -> "DataFrame": + """ + Rearrange index levels using input order. May not drop or duplicate levels. + + Parameters + ---------- + order : list of int or list of str + List representing new level order. Reference level by number + (position) or by key (label). + axis : int + Where to reorder levels. + + Returns + ------- + DataFrame + """ + axis = self._get_axis_number(axis) + if not isinstance(self._get_axis(axis), ABCMultiIndex): # pragma: no cover + raise TypeError("Can only reorder levels on a hierarchical axis.") + + result = self.copy() + + if axis == 0: + result.index = result.index.reorder_levels(order) + else: + result.columns = result.columns.reorder_levels(order) + return result + + # ---------------------------------------------------------------------- + # Arithmetic / combination related + + def _combine_frame(self, other, func, fill_value=None, level=None): + # at this point we have `self._indexed_same(other)` + + if fill_value is None: + # since _arith_op may be called in a loop, avoid function call + # overhead if possible by doing this check once + _arith_op = func + + else: + + def _arith_op(left, right): + # for the mixed_type case where we iterate over columns, + # _arith_op(left, right) is equivalent to + # left._binop(right, func, fill_value=fill_value) + left, right = ops.fill_binop(left, right, fill_value) + return func(left, right) + + if ops.should_series_dispatch(self, other, func): + # iterate over columns + new_data = ops.dispatch_to_series(self, other, _arith_op) + else: + with np.errstate(all="ignore"): + res_values = _arith_op(self.values, other.values) + new_data = dispatch_fill_zeros(func, self.values, other.values, res_values) + + return new_data + + def _combine_match_index(self, other, func): + # at this point we have `self.index.equals(other.index)` + + if ops.should_series_dispatch(self, other, func): + # operate column-wise; avoid costly object-casting in `.values` + new_data = ops.dispatch_to_series(self, other, func) + else: + # fastpath --> operate directly on values + with np.errstate(all="ignore"): + new_data = func(self.values.T, other.values).T + return new_data + + def _construct_result(self, result) -> "DataFrame": + """ + Wrap the result of an arithmetic, comparison, or logical operation. + + Parameters + ---------- + result : DataFrame + + Returns + ------- + DataFrame + """ + out = self._constructor(result, index=self.index, copy=False) + # Pin columns instead of passing to constructor for compat with + # non-unique columns case + out.columns = self.columns + return out + + def combine( + self, other: "DataFrame", func, fill_value=None, overwrite=True + ) -> "DataFrame": + """ + Perform column-wise combine with another DataFrame. + + Combines a DataFrame with `other` DataFrame using `func` + to element-wise combine columns. The row and column indexes of the + resulting DataFrame will be the union of the two. + + Parameters + ---------- + other : DataFrame + The DataFrame to merge column-wise. + func : function + Function that takes two series as inputs and return a Series or a + scalar. Used to merge the two dataframes column by columns. + fill_value : scalar value, default None + The value to fill NaNs with prior to passing any column to the + merge func. + overwrite : bool, default True + If True, columns in `self` that do not exist in `other` will be + overwritten with NaNs. + + Returns + ------- + DataFrame + Combination of the provided DataFrames. + + See Also + -------- + DataFrame.combine_first : Combine two DataFrame objects and default to + non-null values in frame calling the method. + + Examples + -------- + Combine using a simple function that chooses the smaller column. + + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 + >>> df1.combine(df2, take_smaller) + A B + 0 0 3 + 1 0 3 + + Example using a true element-wise combine function. + + >>> df1 = pd.DataFrame({'A': [5, 0], 'B': [2, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1.combine(df2, np.minimum) + A B + 0 1 2 + 1 0 3 + + Using `fill_value` fills Nones prior to passing the column to the + merge function. + + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1.combine(df2, take_smaller, fill_value=-5) + A B + 0 0 -5.0 + 1 0 4.0 + + However, if the same element in both dataframes is None, that None + is preserved + + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [None, 3]}) + >>> df1.combine(df2, take_smaller, fill_value=-5) + A B + 0 0 -5.0 + 1 0 3.0 + + Example that demonstrates the use of `overwrite` and behavior when + the axis differ between the dataframes. + + >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]}) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [-10, 1], }, index=[1, 2]) + >>> df1.combine(df2, take_smaller) + A B C + 0 NaN NaN NaN + 1 NaN 3.0 -10.0 + 2 NaN 3.0 1.0 + + >>> df1.combine(df2, take_smaller, overwrite=False) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 -10.0 + 2 NaN 3.0 1.0 + + Demonstrating the preference of the passed in dataframe. + + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1], }, index=[1, 2]) + >>> df2.combine(df1, take_smaller) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 NaN + 2 NaN 3.0 NaN + + >>> df2.combine(df1, take_smaller, overwrite=False) + A B C + 0 0.0 NaN NaN + 1 0.0 3.0 1.0 + 2 NaN 3.0 1.0 + """ + other_idxlen = len(other.index) # save for compare + + this, other = self.align(other, copy=False) + new_index = this.index + + if other.empty and len(new_index) == len(self.index): + return self.copy() + + if self.empty and len(other) == other_idxlen: + return other.copy() + + # sorts if possible + new_columns = this.columns.union(other.columns) + do_fill = fill_value is not None + result = {} + for col in new_columns: + series = this[col] + otherSeries = other[col] + + this_dtype = series.dtype + other_dtype = otherSeries.dtype + + this_mask = isna(series) + other_mask = isna(otherSeries) + + # don't overwrite columns unnecessarily + # DO propagate if this column is not in the intersection + if not overwrite and other_mask.all(): + result[col] = this[col].copy() + continue + + if do_fill: + series = series.copy() + otherSeries = otherSeries.copy() + series[this_mask] = fill_value + otherSeries[other_mask] = fill_value + + if col not in self.columns: + # If self DataFrame does not have col in other DataFrame, + # try to promote series, which is all NaN, as other_dtype. + new_dtype = other_dtype + try: + series = series.astype(new_dtype, copy=False) + except ValueError: + # e.g. new_dtype is integer types + pass + else: + # if we have different dtypes, possibly promote + new_dtype = find_common_type([this_dtype, other_dtype]) + if not is_dtype_equal(this_dtype, new_dtype): + series = series.astype(new_dtype) + if not is_dtype_equal(other_dtype, new_dtype): + otherSeries = otherSeries.astype(new_dtype) + + arr = func(series, otherSeries) + arr = maybe_downcast_to_dtype(arr, this_dtype) + + result[col] = arr + + # convert_objects just in case + return self._constructor(result, index=new_index, columns=new_columns) + + def combine_first(self, other: "DataFrame") -> "DataFrame": + """ + Update null elements with value in the same location in `other`. + + Combine two DataFrame objects by filling null values in one DataFrame + with non-null values from other DataFrame. The row and column indexes + of the resulting DataFrame will be the union of the two. + + Parameters + ---------- + other : DataFrame + Provided DataFrame to use to fill null values. + + Returns + ------- + DataFrame + + See Also + -------- + DataFrame.combine : Perform series-wise operation on two DataFrames + using a given function. + + Examples + -------- + + >>> df1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]}) + >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1.combine_first(df2) + A B + 0 1.0 3.0 + 1 0.0 4.0 + + Null values still persist if the location of that null value + does not exist in `other` + + >>> df1 = pd.DataFrame({'A': [None, 0], 'B': [4, None]}) + >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2]) + >>> df1.combine_first(df2) + A B C + 0 NaN 4.0 NaN + 1 0.0 3.0 1.0 + 2 NaN 3.0 1.0 + """ + import pandas.core.computation.expressions as expressions + + def extract_values(arr): + # Does two things: + # 1. maybe gets the values from the Series / Index + # 2. convert datelike to i8 + if isinstance(arr, (ABCIndexClass, ABCSeries)): + arr = arr._values + + if needs_i8_conversion(arr): + if is_extension_array_dtype(arr.dtype): + arr = arr.asi8 + else: + arr = arr.view("i8") + return arr + + def combiner(x, y): + mask = isna(x) + if isinstance(mask, (ABCIndexClass, ABCSeries)): + mask = mask._values + + x_values = extract_values(x) + y_values = extract_values(y) + + # If the column y in other DataFrame is not in first DataFrame, + # just return y_values. + if y.name not in self.columns: + return y_values + + return expressions.where(mask, y_values, x_values) + + return self.combine(other, combiner, overwrite=False) + + def update( + self, other, join="left", overwrite=True, filter_func=None, errors="ignore" + ) -> None: + """ + Modify in place using non-NA values from another DataFrame. + + Aligns on indices. There is no return value. + + Parameters + ---------- + other : DataFrame, or object coercible into a DataFrame + Should have at least one matching index/column label + with the original DataFrame. If a Series is passed, + its name attribute must be set, and that will be + used as the column name to align with the original DataFrame. + join : {'left'}, default 'left' + Only left join is implemented, keeping the index and columns of the + original object. + overwrite : bool, default True + How to handle non-NA values for overlapping keys: + + * True: overwrite original DataFrame's values + with values from `other`. + * False: only update values that are NA in + the original DataFrame. + + filter_func : callable(1d-array) -> bool 1d-array, optional + Can choose to replace values other than NA. Return True for values + that should be updated. + errors : {'raise', 'ignore'}, default 'ignore' + If 'raise', will raise a ValueError if the DataFrame and `other` + both contain non-NA data in the same place. + + .. versionchanged:: 0.24.0 + Changed from `raise_conflict=False|True` + to `errors='ignore'|'raise'`. + + Returns + ------- + None : method directly changes calling object + + Raises + ------ + ValueError + * When `errors='raise'` and there's overlapping non-NA data. + * When `errors` is not either `'ignore'` or `'raise'` + NotImplementedError + * If `join != 'left'` + + See Also + -------- + dict.update : Similar method for dictionaries. + DataFrame.merge : For column(s)-on-columns(s) operations. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3], + ... 'B': [400, 500, 600]}) + >>> new_df = pd.DataFrame({'B': [4, 5, 6], + ... 'C': [7, 8, 9]}) + >>> df.update(new_df) + >>> df + A B + 0 1 4 + 1 2 5 + 2 3 6 + + The DataFrame's length does not increase as a result of the update, + only values at matching index/column labels are updated. + + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], + ... 'B': ['x', 'y', 'z']}) + >>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']}) + >>> df.update(new_df) + >>> df + A B + 0 a d + 1 b e + 2 c f + + For Series, it's name attribute must be set. + + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], + ... 'B': ['x', 'y', 'z']}) + >>> new_column = pd.Series(['d', 'e'], name='B', index=[0, 2]) + >>> df.update(new_column) + >>> df + A B + 0 a d + 1 b y + 2 c e + >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], + ... 'B': ['x', 'y', 'z']}) + >>> new_df = pd.DataFrame({'B': ['d', 'e']}, index=[1, 2]) + >>> df.update(new_df) + >>> df + A B + 0 a x + 1 b d + 2 c e + + If `other` contains NaNs the corresponding values are not updated + in the original dataframe. + + >>> df = pd.DataFrame({'A': [1, 2, 3], + ... 'B': [400, 500, 600]}) + >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]}) + >>> df.update(new_df) + >>> df + A B + 0 1 4.0 + 1 2 500.0 + 2 3 6.0 + """ + import pandas.core.computation.expressions as expressions + + # TODO: Support other joins + if join != "left": # pragma: no cover + raise NotImplementedError("Only left join is supported") + if errors not in ["ignore", "raise"]: + raise ValueError("The parameter errors must be either 'ignore' or 'raise'") + + if not isinstance(other, DataFrame): + other = DataFrame(other) + + other = other.reindex_like(self) + + for col in self.columns: + this = self[col]._values + that = other[col]._values + if filter_func is not None: + with np.errstate(all="ignore"): + mask = ~filter_func(this) | isna(that) + else: + if errors == "raise": + mask_this = notna(that) + mask_that = notna(this) + if any(mask_this & mask_that): + raise ValueError("Data overlaps.") + + if overwrite: + mask = isna(that) + else: + mask = notna(this) + + # don't overwrite columns unnecessarily + if mask.all(): + continue + + self[col] = expressions.where(mask, this, that) + + # ---------------------------------------------------------------------- + # Data reshaping + @Appender( + """ +Examples +-------- +>>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon', +... 'Parrot', 'Parrot'], +... 'Max Speed': [380., 370., 24., 26.]}) +>>> df + Animal Max Speed +0 Falcon 380.0 +1 Falcon 370.0 +2 Parrot 24.0 +3 Parrot 26.0 +>>> df.groupby(['Animal']).mean() + Max Speed +Animal +Falcon 375.0 +Parrot 25.0 + +**Hierarchical Indexes** + +We can groupby different levels of a hierarchical index +using the `level` parameter: + +>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], +... ['Captive', 'Wild', 'Captive', 'Wild']] +>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) +>>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]}, +... index=index) +>>> df + Max Speed +Animal Type +Falcon Captive 390.0 + Wild 350.0 +Parrot Captive 30.0 + Wild 20.0 +>>> df.groupby(level=0).mean() + Max Speed +Animal +Falcon 370.0 +Parrot 25.0 +>>> df.groupby(level="Type").mean() + Max Speed +Type +Captive 210.0 +Wild 185.0 +""" + ) + @Appender(_shared_docs["groupby"] % _shared_doc_kwargs) + def groupby( + self, + by=None, + axis=0, + level=None, + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + squeeze: bool = False, + observed: bool = False, + ) -> "groupby_generic.DataFrameGroupBy": + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + axis = self._get_axis_number(axis) + + return groupby_generic.DataFrameGroupBy( + obj=self, + keys=by, + axis=axis, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + squeeze=squeeze, + observed=observed, + ) + + _shared_docs[ + "pivot" + ] = """ + Return reshaped DataFrame organized by given index / column values. + + Reshape data (produce a "pivot" table) based on column values. Uses + unique values from specified `index` / `columns` to form axes of the + resulting DataFrame. This function does not support data + aggregation, multiple values will result in a MultiIndex in the + columns. See the :ref:`User Guide ` for more on reshaping. + + Parameters + ----------%s + index : str or object, optional + Column to use to make new frame's index. If None, uses + existing index. + columns : str or object + Column to use to make new frame's columns. + values : str, object or a list of the previous, optional + Column(s) to use for populating new frame's values. If not + specified, all remaining columns will be used and the result will + have hierarchically indexed columns. + + .. versionchanged:: 0.23.0 + Also accept list of column names. + + Returns + ------- + DataFrame + Returns reshaped DataFrame. + + Raises + ------ + ValueError: + When there are any `index`, `columns` combinations with multiple + values. `DataFrame.pivot_table` when you need to aggregate. + + See Also + -------- + DataFrame.pivot_table : Generalization of pivot that can handle + duplicate values for one index/column pair. + DataFrame.unstack : Pivot based on the index values instead of a + column. + + Notes + ----- + For finer-tuned control, see hierarchical indexing documentation along + with the related stack/unstack methods. + + Examples + -------- + >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', + ... 'two'], + ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], + ... 'baz': [1, 2, 3, 4, 5, 6], + ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + >>> df + foo bar baz zoo + 0 one A 1 x + 1 one B 2 y + 2 one C 3 z + 3 two A 4 q + 4 two B 5 w + 5 two C 6 t + + >>> df.pivot(index='foo', columns='bar', values='baz') + bar A B C + foo + one 1 2 3 + two 4 5 6 + + >>> df.pivot(index='foo', columns='bar')['baz'] + bar A B C + foo + one 1 2 3 + two 4 5 6 + + >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) + baz zoo + bar A B C A B C + foo + one 1 2 3 x y z + two 4 5 6 q w t + + A ValueError is raised if there are any duplicates. + + >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], + ... "bar": ['A', 'A', 'B', 'C'], + ... "baz": [1, 2, 3, 4]}) + >>> df + foo bar baz + 0 one A 1 + 1 one A 2 + 2 two B 3 + 3 two C 4 + + Notice that the first two rows are the same for our `index` + and `columns` arguments. + + >>> df.pivot(index='foo', columns='bar', values='baz') + Traceback (most recent call last): + ... + ValueError: Index contains duplicate entries, cannot reshape + """ + + @Substitution("") + @Appender(_shared_docs["pivot"]) + def pivot(self, index=None, columns=None, values=None) -> "DataFrame": + from pandas.core.reshape.pivot import pivot + + return pivot(self, index=index, columns=columns, values=values) + + _shared_docs[ + "pivot_table" + ] = """ + Create a spreadsheet-style pivot table as a DataFrame. + + The levels in the pivot table will be stored in MultiIndex objects + (hierarchical indexes) on the index and columns of the result DataFrame. + + Parameters + ----------%s + values : column to aggregate, optional + index : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table index. If an array is passed, + it is being used as the same manner as column values. + columns : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table column. If an array is passed, + it is being used as the same manner as column values. + aggfunc : function, list of functions, dict, default numpy.mean + If list of functions passed, the resulting pivot table will have + hierarchical columns whose top level are the function names + (inferred from the function objects themselves) + If dict is passed, the key is column to aggregate and value + is function or list of functions. + fill_value : scalar, default None + Value to replace missing values with. + margins : bool, default False + Add all row / columns (e.g. for subtotal / grand totals). + dropna : bool, default True + Do not include columns whose entries are all NaN. + margins_name : str, default 'All' + Name of the row / column that will contain the totals + when margins is True. + observed : bool, default False + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. + + .. versionchanged:: 0.25.0 + + Returns + ------- + DataFrame + An Excel style pivot table. + + See Also + -------- + DataFrame.pivot : Pivot without aggregation that can handle + non-numeric data. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) + >>> df + A B C D E + 0 foo one small 1 2 + 1 foo one large 2 4 + 2 foo one large 2 5 + 3 foo two small 3 5 + 4 foo two small 3 6 + 5 bar one large 4 6 + 6 bar one small 5 8 + 7 bar two small 6 9 + 8 bar two large 7 9 + + This first example aggregates values by taking the sum. + + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc=np.sum) + >>> table + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 + + We can also fill missing values using the `fill_value` parameter. + + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc=np.sum, fill_value=0) + >>> table + C large small + A B + bar one 4 5 + two 7 6 + foo one 4 1 + two 0 6 + + The next example aggregates by taking the mean across multiple columns. + + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': np.mean, + ... 'E': np.mean}) + >>> table + D E + A C + bar large 5.500000 7.500000 + small 5.500000 8.500000 + foo large 2.000000 4.500000 + small 2.333333 4.333333 + + We can also calculate multiple types of aggregations for any given + value column. + + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': np.mean, + ... 'E': [min, max, np.mean]}) + >>> table + D E + mean max mean min + A C + bar large 5.500000 9.0 7.500000 6.0 + small 5.500000 9.0 8.500000 8.0 + foo large 2.000000 5.0 4.500000 4.0 + small 2.333333 6.0 4.333333 2.0 + """ + + @Substitution("") + @Appender(_shared_docs["pivot_table"]) + def pivot_table( + self, + values=None, + index=None, + columns=None, + aggfunc="mean", + fill_value=None, + margins=False, + dropna=True, + margins_name="All", + observed=False, + ) -> "DataFrame": + from pandas.core.reshape.pivot import pivot_table + + return pivot_table( + self, + values=values, + index=index, + columns=columns, + aggfunc=aggfunc, + fill_value=fill_value, + margins=margins, + dropna=dropna, + margins_name=margins_name, + observed=observed, + ) + + def stack(self, level=-1, dropna=True): + """ + Stack the prescribed level(s) from columns to index. + + Return a reshaped DataFrame or Series having a multi-level + index with one or more new inner-most levels compared to the current + DataFrame. The new inner-most levels are created by pivoting the + columns of the current dataframe: + + - if the columns have a single level, the output is a Series; + - if the columns have multiple levels, the new index + level(s) is (are) taken from the prescribed level(s) and + the output is a DataFrame. + + The new index levels are sorted. + + Parameters + ---------- + level : int, str, list, default -1 + Level(s) to stack from the column axis onto the index + axis, defined as one index or label, or a list of indices + or labels. + dropna : bool, default True + Whether to drop rows in the resulting Frame/Series with + missing values. Stacking a column level onto the index + axis can create combinations of index and column values + that are missing from the original dataframe. See Examples + section. + + Returns + ------- + DataFrame or Series + Stacked dataframe or series. + + See Also + -------- + DataFrame.unstack : Unstack prescribed level(s) from index axis + onto column axis. + DataFrame.pivot : Reshape dataframe from long format to wide + format. + DataFrame.pivot_table : Create a spreadsheet-style pivot table + as a DataFrame. + + Notes + ----- + The function is named by analogy with a collection of books + being reorganized from being side by side on a horizontal + position (the columns of the dataframe) to being stacked + vertically on top of each other (in the index of the + dataframe). + + Examples + -------- + **Single level columns** + + >>> df_single_level_cols = pd.DataFrame([[0, 1], [2, 3]], + ... index=['cat', 'dog'], + ... columns=['weight', 'height']) + + Stacking a dataframe with a single level column axis returns a Series: + + >>> df_single_level_cols + weight height + cat 0 1 + dog 2 3 + >>> df_single_level_cols.stack() + cat weight 0 + height 1 + dog weight 2 + height 3 + dtype: int64 + + **Multi level columns: simple case** + + >>> multicol1 = pd.MultiIndex.from_tuples([('weight', 'kg'), + ... ('weight', 'pounds')]) + >>> df_multi_level_cols1 = pd.DataFrame([[1, 2], [2, 4]], + ... index=['cat', 'dog'], + ... columns=multicol1) + + Stacking a dataframe with a multi-level column axis: + + >>> df_multi_level_cols1 + weight + kg pounds + cat 1 2 + dog 2 4 + >>> df_multi_level_cols1.stack() + weight + cat kg 1 + pounds 2 + dog kg 2 + pounds 4 + + **Missing values** + + >>> multicol2 = pd.MultiIndex.from_tuples([('weight', 'kg'), + ... ('height', 'm')]) + >>> df_multi_level_cols2 = pd.DataFrame([[1.0, 2.0], [3.0, 4.0]], + ... index=['cat', 'dog'], + ... columns=multicol2) + + It is common to have missing values when stacking a dataframe + with multi-level columns, as the stacked dataframe typically + has more values than the original dataframe. Missing values + are filled with NaNs: + + >>> df_multi_level_cols2 + weight height + kg m + cat 1.0 2.0 + dog 3.0 4.0 + >>> df_multi_level_cols2.stack() + height weight + cat kg NaN 1.0 + m 2.0 NaN + dog kg NaN 3.0 + m 4.0 NaN + + **Prescribing the level(s) to be stacked** + + The first parameter controls which level or levels are stacked: + + >>> df_multi_level_cols2.stack(0) + kg m + cat height NaN 2.0 + weight 1.0 NaN + dog height NaN 4.0 + weight 3.0 NaN + >>> df_multi_level_cols2.stack([0, 1]) + cat height m 2.0 + weight kg 1.0 + dog height m 4.0 + weight kg 3.0 + dtype: float64 + + **Dropping missing values** + + >>> df_multi_level_cols3 = pd.DataFrame([[None, 1.0], [2.0, 3.0]], + ... index=['cat', 'dog'], + ... columns=multicol2) + + Note that rows where all values are missing are dropped by + default but this behaviour can be controlled via the dropna + keyword parameter: + + >>> df_multi_level_cols3 + weight height + kg m + cat NaN 1.0 + dog 2.0 3.0 + >>> df_multi_level_cols3.stack(dropna=False) + height weight + cat kg NaN NaN + m 1.0 NaN + dog kg NaN 2.0 + m 3.0 NaN + >>> df_multi_level_cols3.stack(dropna=True) + height weight + cat m 1.0 NaN + dog kg NaN 2.0 + m 3.0 NaN + """ + from pandas.core.reshape.reshape import stack, stack_multiple + + if isinstance(level, (tuple, list)): + return stack_multiple(self, level, dropna=dropna) + else: + return stack(self, level, dropna=dropna) + + def explode(self, column: Union[str, Tuple]) -> "DataFrame": + """ + Transform each element of a list-like to a row, replicating index values. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + column : str or tuple + Column to explode. + + Returns + ------- + DataFrame + Exploded lists to rows of the subset columns; + index will be duplicated for these rows. + + Raises + ------ + ValueError : + if columns of the frame are not unique. + + See Also + -------- + DataFrame.unstack : Pivot a level of the (necessarily hierarchical) + index labels. + DataFrame.melt : Unpivot a DataFrame from wide format to long format. + Series.explode : Explode a DataFrame from list-like columns to long format. + + Notes + ----- + This routine will explode list-likes including lists, tuples, + Series, and np.ndarray. The result dtype of the subset rows will + be object. Scalars will be returned unchanged. Empty list-likes will + result in a np.nan for that row. + + Examples + -------- + >>> df = pd.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]], 'B': 1}) + >>> df + A B + 0 [1, 2, 3] 1 + 1 foo 1 + 2 [] 1 + 3 [3, 4] 1 + + >>> df.explode('A') + A B + 0 1 1 + 0 2 1 + 0 3 1 + 1 foo 1 + 2 NaN 1 + 3 3 1 + 3 4 1 + """ + + if not (is_scalar(column) or isinstance(column, tuple)): + raise ValueError("column must be a scalar") + if not self.columns.is_unique: + raise ValueError("columns must be unique") + + df = self.reset_index(drop=True) + # TODO: use overload to refine return type of reset_index + assert df is not None # needed for mypy + result = df[column].explode() + result = df.drop([column], axis=1).join(result) + result.index = self.index.take(result.index) + result = result.reindex(columns=self.columns, copy=False) + + return result + + def unstack(self, level=-1, fill_value=None): + """ + Pivot a level of the (necessarily hierarchical) index labels. + + Returns a DataFrame having a new level of column labels whose inner-most level + consists of the pivoted index labels. + + If the index is not a MultiIndex, the output will be a Series + (the analogue of stack when the columns are not a MultiIndex). + + The level involved will automatically get sorted. + + Parameters + ---------- + level : int, str, or list of these, default -1 (last level) + Level(s) of index to unstack, can pass level name. + fill_value : int, str or dict + Replace NaN with this value if the unstack produces missing values. + + Returns + ------- + Series or DataFrame + + See Also + -------- + DataFrame.pivot : Pivot a table based on column values. + DataFrame.stack : Pivot a level of the column labels (inverse operation + from `unstack`). + + Examples + -------- + >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), + ... ('two', 'a'), ('two', 'b')]) + >>> s = pd.Series(np.arange(1.0, 5.0), index=index) + >>> s + one a 1.0 + b 2.0 + two a 3.0 + b 4.0 + dtype: float64 + + >>> s.unstack(level=-1) + a b + one 1.0 2.0 + two 3.0 4.0 + + >>> s.unstack(level=0) + one two + a 1.0 3.0 + b 2.0 4.0 + + >>> df = s.unstack(level=0) + >>> df.unstack() + one a 1.0 + b 2.0 + two a 3.0 + b 4.0 + dtype: float64 + """ + from pandas.core.reshape.reshape import unstack + + return unstack(self, level, fill_value) + + _shared_docs[ + "melt" + ] = """ + Unpivot a DataFrame from wide to long format, optionally leaving identifiers set. + + This function is useful to massage a DataFrame into a format where one + or more columns are identifier variables (`id_vars`), while all other + columns, considered measured variables (`value_vars`), are "unpivoted" to + the row axis, leaving just two non-identifier columns, 'variable' and + 'value'. + %(versionadded)s + Parameters + ---------- + id_vars : tuple, list, or ndarray, optional + Column(s) to use as identifier variables. + value_vars : tuple, list, or ndarray, optional + Column(s) to unpivot. If not specified, uses all columns that + are not set as `id_vars`. + var_name : scalar + Name to use for the 'variable' column. If None it uses + ``frame.columns.name`` or 'variable'. + value_name : scalar, default 'value' + Name to use for the 'value' column. + col_level : int or str, optional + If columns are a MultiIndex then use this level to melt. + + Returns + ------- + DataFrame + Unpivoted DataFrame. + + See Also + -------- + %(other)s + pivot_table + DataFrame.pivot + Series.explode + + Examples + -------- + >>> df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'}, + ... 'B': {0: 1, 1: 3, 2: 5}, + ... 'C': {0: 2, 1: 4, 2: 6}}) + >>> df + A B C + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 + + >>> %(caller)sid_vars=['A'], value_vars=['B']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> %(caller)sid_vars=['A'], value_vars=['B', 'C']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + 3 a C 2 + 4 b C 4 + 5 c C 6 + + The names of 'variable' and 'value' columns can be customized: + + >>> %(caller)sid_vars=['A'], value_vars=['B'], + ... var_name='myVarname', value_name='myValname') + A myVarname myValname + 0 a B 1 + 1 b B 3 + 2 c B 5 + + If you have multi-index columns: + + >>> df.columns = [list('ABC'), list('DEF')] + >>> df + A B C + D E F + 0 a 1 2 + 1 b 3 4 + 2 c 5 6 + + >>> %(caller)scol_level=0, id_vars=['A'], value_vars=['B']) + A variable value + 0 a B 1 + 1 b B 3 + 2 c B 5 + + >>> %(caller)sid_vars=[('A', 'D')], value_vars=[('B', 'E')]) + (A, D) variable_0 variable_1 value + 0 a B E 1 + 1 b B E 3 + 2 c B E 5 + """ + + @Appender( + _shared_docs["melt"] + % dict( + caller="df.melt(", versionadded=".. versionadded:: 0.20.0\n", other="melt" + ) + ) + def melt( + self, + id_vars=None, + value_vars=None, + var_name=None, + value_name="value", + col_level=None, + ) -> "DataFrame": + from pandas.core.reshape.melt import melt + + return melt( + self, + id_vars=id_vars, + value_vars=value_vars, + var_name=var_name, + value_name=value_name, + col_level=col_level, + ) + + # ---------------------------------------------------------------------- + # Time series-related + + def diff(self, periods=1, axis=0) -> "DataFrame": + """ + First discrete difference of element. + + Calculates the difference of a DataFrame element compared with another + element in the DataFrame (default is the element in the same column + of the previous row). + + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating difference, accepts negative + values. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Take difference over rows (0) or columns (1). + + Returns + ------- + DataFrame + + See Also + -------- + Series.diff: First discrete difference for a Series. + DataFrame.pct_change: Percent change over given number of periods. + DataFrame.shift: Shift index by desired number of periods with an + optional time freq. + + Notes + ----- + For boolean dtypes, this uses :meth:`operator.xor` rather than + :meth:`operator.sub`. + + Examples + -------- + Difference with previous row + + >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], + ... 'b': [1, 1, 2, 3, 5, 8], + ... 'c': [1, 4, 9, 16, 25, 36]}) + >>> df + a b c + 0 1 1 1 + 1 2 1 4 + 2 3 2 9 + 3 4 3 16 + 4 5 5 25 + 5 6 8 36 + + >>> df.diff() + a b c + 0 NaN NaN NaN + 1 1.0 0.0 3.0 + 2 1.0 1.0 5.0 + 3 1.0 1.0 7.0 + 4 1.0 2.0 9.0 + 5 1.0 3.0 11.0 + + Difference with previous column + + >>> df.diff(axis=1) + a b c + 0 NaN 0.0 0.0 + 1 NaN -1.0 3.0 + 2 NaN -1.0 7.0 + 3 NaN -1.0 13.0 + 4 NaN 0.0 20.0 + 5 NaN 2.0 28.0 + + Difference with 3rd previous row + + >>> df.diff(periods=3) + a b c + 0 NaN NaN NaN + 1 NaN NaN NaN + 2 NaN NaN NaN + 3 3.0 2.0 15.0 + 4 3.0 4.0 21.0 + 5 3.0 6.0 27.0 + + Difference with following row + + >>> df.diff(periods=-1) + a b c + 0 -1.0 0.0 -3.0 + 1 -1.0 -1.0 -5.0 + 2 -1.0 -1.0 -7.0 + 3 -1.0 -2.0 -9.0 + 4 -1.0 -3.0 -11.0 + 5 NaN NaN NaN + """ + bm_axis = self._get_block_manager_axis(axis) + new_data = self._data.diff(n=periods, axis=bm_axis) + return self._constructor(new_data) + + # ---------------------------------------------------------------------- + # Function application + + def _gotitem( + self, + key: Union[str, List[str]], + ndim: int, + subset: Optional[Union[Series, ABCDataFrame]] = None, + ) -> Union[Series, ABCDataFrame]: + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ + if subset is None: + subset = self + elif subset.ndim == 1: # is Series + return subset + + # TODO: _shallow_copy(subset)? + return subset[key] + + _agg_summary_and_see_also_doc = dedent( + """ + The aggregation operations are always performed over an axis, either the + index (default) or the column axis. This behavior is different from + `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, + `var`), where the default is to compute the aggregation of the flattened + array, e.g., ``numpy.mean(arr_2d)`` as opposed to + ``numpy.mean(arr_2d, axis=0)``. + + `agg` is an alias for `aggregate`. Use the alias. + + See Also + -------- + DataFrame.apply : Perform any type of operations. + DataFrame.transform : Perform transformation type operations. + core.groupby.GroupBy : Perform operations over groups. + core.resample.Resampler : Perform operations over resampled bins. + core.window.Rolling : Perform operations over rolling window. + core.window.Expanding : Perform operations over expanding window. + core.window.EWM : Perform operation over exponential weighted + window. + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> df = pd.DataFrame([[1, 2, 3], + ... [4, 5, 6], + ... [7, 8, 9], + ... [np.nan, np.nan, np.nan]], + ... columns=['A', 'B', 'C']) + + Aggregate these functions over the rows. + + >>> df.agg(['sum', 'min']) + A B C + sum 12.0 15.0 18.0 + min 1.0 2.0 3.0 + + Different aggregations per column. + + >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) + A B + max NaN 8.0 + min 1.0 2.0 + sum 12.0 NaN + + Aggregate over the columns. + + >>> df.agg("mean", axis="columns") + 0 2.0 + 1 5.0 + 2 8.0 + 3 NaN + dtype: float64 + """ + ) + + @Substitution( + see_also=_agg_summary_and_see_also_doc, + examples=_agg_examples_doc, + versionadded="\n.. versionadded:: 0.20.0\n", + **_shared_doc_kwargs, + ) + @Appender(_shared_docs["aggregate"]) + def aggregate(self, func, axis=0, *args, **kwargs): + axis = self._get_axis_number(axis) + + result = None + try: + result, how = self._aggregate(func, axis=axis, *args, **kwargs) + except TypeError: + pass + if result is None: + return self.apply(func, axis=axis, args=args, **kwargs) + return result + + def _aggregate(self, arg, axis=0, *args, **kwargs): + if axis == 1: + # NDFrame.aggregate returns a tuple, and we need to transpose + # only result + result, how = self.T._aggregate(arg, *args, **kwargs) + result = result.T if result is not None else result + return result, how + return super()._aggregate(arg, *args, **kwargs) + + agg = aggregate + + @Appender(_shared_docs["transform"] % _shared_doc_kwargs) + def transform(self, func, axis=0, *args, **kwargs) -> "DataFrame": + axis = self._get_axis_number(axis) + if axis == 1: + return self.T.transform(func, *args, **kwargs).T + return super().transform(func, *args, **kwargs) + + def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwds): + """ + Apply a function along an axis of the DataFrame. + + Objects passed to the function are Series objects whose index is + either the DataFrame's index (``axis=0``) or the DataFrame's columns + (``axis=1``). By default (``result_type=None``), the final return type + is inferred from the return type of the applied function. Otherwise, + it depends on the `result_type` argument. + + Parameters + ---------- + func : function + Function to apply to each column or row. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Axis along which the function is applied: + + * 0 or 'index': apply function to each column. + * 1 or 'columns': apply function to each row. + + raw : bool, default False + Determines if row or column is passed as a Series or ndarray object: + + * ``False`` : passes each row or column as a Series to the + function. + * ``True`` : the passed function will receive ndarray objects + instead. + If you are just applying a NumPy reduction function this will + achieve much better performance. + + result_type : {'expand', 'reduce', 'broadcast', None}, default None + These only act when ``axis=1`` (columns): + + * 'expand' : list-like results will be turned into columns. + * 'reduce' : returns a Series if possible rather than expanding + list-like results. This is the opposite of 'expand'. + * 'broadcast' : results will be broadcast to the original shape + of the DataFrame, the original index and columns will be + retained. + + The default behaviour (None) depends on the return value of the + applied function: list-like results will be returned as a Series + of those. However if the apply function returns a Series these + are expanded to columns. + + .. versionadded:: 0.23.0 + + args : tuple + Positional arguments to pass to `func` in addition to the + array/series. + **kwds + Additional keyword arguments to pass as keywords arguments to + `func`. + + Returns + ------- + Series or DataFrame + Result of applying ``func`` along the given axis of the + DataFrame. + + See Also + -------- + DataFrame.applymap: For elementwise operations. + DataFrame.aggregate: Only perform aggregating type operations. + DataFrame.transform: Only perform transforming type operations. + + Examples + -------- + + >>> df = pd.DataFrame([[4, 9]] * 3, columns=['A', 'B']) + >>> df + A B + 0 4 9 + 1 4 9 + 2 4 9 + + Using a numpy universal function (in this case the same as + ``np.sqrt(df)``): + + >>> df.apply(np.sqrt) + A B + 0 2.0 3.0 + 1 2.0 3.0 + 2 2.0 3.0 + + Using a reducing function on either axis + + >>> df.apply(np.sum, axis=0) + A 12 + B 27 + dtype: int64 + + >>> df.apply(np.sum, axis=1) + 0 13 + 1 13 + 2 13 + dtype: int64 + + Returning a list-like will result in a Series + + >>> df.apply(lambda x: [1, 2], axis=1) + 0 [1, 2] + 1 [1, 2] + 2 [1, 2] + dtype: object + + Passing result_type='expand' will expand list-like results + to columns of a Dataframe + + >>> df.apply(lambda x: [1, 2], axis=1, result_type='expand') + 0 1 + 0 1 2 + 1 1 2 + 2 1 2 + + Returning a Series inside the function is similar to passing + ``result_type='expand'``. The resulting column names + will be the Series index. + + >>> df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1) + foo bar + 0 1 2 + 1 1 2 + 2 1 2 + + Passing ``result_type='broadcast'`` will ensure the same shape + result, whether list-like or scalar is returned by the function, + and broadcast it along the axis. The resulting column names will + be the originals. + + >>> df.apply(lambda x: [1, 2], axis=1, result_type='broadcast') + A B + 0 1 2 + 1 1 2 + 2 1 2 + """ + from pandas.core.apply import frame_apply + + op = frame_apply( + self, + func=func, + axis=axis, + raw=raw, + result_type=result_type, + args=args, + kwds=kwds, + ) + return op.get_result() + + def applymap(self, func) -> "DataFrame": + """ + Apply a function to a Dataframe elementwise. + + This method applies a function that accepts and returns a scalar + to every element of a DataFrame. + + Parameters + ---------- + func : callable + Python function, returns a single value from a single value. + + Returns + ------- + DataFrame + Transformed DataFrame. + + See Also + -------- + DataFrame.apply : Apply a function along input axis of DataFrame. + + Notes + ----- + In the current implementation applymap calls `func` twice on the + first column/row to decide whether it can take a fast or slow + code path. This can lead to unexpected behavior if `func` has + side-effects, as they will take effect twice for the first + column/row. + + Examples + -------- + >>> df = pd.DataFrame([[1, 2.12], [3.356, 4.567]]) + >>> df + 0 1 + 0 1.000 2.120 + 1 3.356 4.567 + + >>> df.applymap(lambda x: len(str(x))) + 0 1 + 0 3 4 + 1 5 5 + + Note that a vectorized version of `func` often exists, which will + be much faster. You could square each number elementwise. + + >>> df.applymap(lambda x: x**2) + 0 1 + 0 1.000000 4.494400 + 1 11.262736 20.857489 + + But it's better to avoid applymap in that case. + + >>> df ** 2 + 0 1 + 0 1.000000 4.494400 + 1 11.262736 20.857489 + """ + + # if we have a dtype == 'M8[ns]', provide boxed values + def infer(x): + if x.empty: + return lib.map_infer(x, func) + return lib.map_infer(x.astype(object).values, func) + + return self.apply(infer) + + # ---------------------------------------------------------------------- + # Merging / joining methods + + def append( + self, other, ignore_index=False, verify_integrity=False, sort=False + ) -> "DataFrame": + """ + Append rows of `other` to the end of caller, returning a new object. + + Columns in `other` that are not in the caller are added as new columns. + + Parameters + ---------- + other : DataFrame or Series/dict-like object, or list of these + The data to append. + ignore_index : bool, default False + If True, do not use the index labels. + verify_integrity : bool, default False + If True, raise ValueError on creating index with duplicates. + sort : bool, default False + Sort columns if the columns of `self` and `other` are not aligned. + + .. versionadded:: 0.23.0 + .. versionchanged:: 1.0.0 + + Changed to not sort by default. + + Returns + ------- + DataFrame + + See Also + -------- + concat : General function to concatenate DataFrame or Series objects. + + Notes + ----- + If a list of dict/series is passed and the keys are all contained in + the DataFrame's index, the order of the columns in the resulting + DataFrame will be unchanged. + + Iteratively appending rows to a DataFrame can be more computationally + intensive than a single concatenate. A better solution is to append + those rows to a list and then concatenate the list with the original + DataFrame all at once. + + Examples + -------- + + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB')) + >>> df + A B + 0 1 2 + 1 3 4 + >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB')) + >>> df.append(df2) + A B + 0 1 2 + 1 3 4 + 0 5 6 + 1 7 8 + + With `ignore_index` set to True: + + >>> df.append(df2, ignore_index=True) + A B + 0 1 2 + 1 3 4 + 2 5 6 + 3 7 8 + + The following, while not recommended methods for generating DataFrames, + show two ways to generate a DataFrame from multiple data sources. + + Less efficient: + + >>> df = pd.DataFrame(columns=['A']) + >>> for i in range(5): + ... df = df.append({'A': i}, ignore_index=True) + >>> df + A + 0 0 + 1 1 + 2 2 + 3 3 + 4 4 + + More efficient: + + >>> pd.concat([pd.DataFrame([i], columns=['A']) for i in range(5)], + ... ignore_index=True) + A + 0 0 + 1 1 + 2 2 + 3 3 + 4 4 + """ + if isinstance(other, (Series, dict)): + if isinstance(other, dict): + other = Series(other) + if other.name is None and not ignore_index: + raise TypeError( + "Can only append a Series if ignore_index=True " + "or if the Series has a name" + ) + + index = Index([other.name], name=self.index.name) + idx_diff = other.index.difference(self.columns) + try: + combined_columns = self.columns.append(idx_diff) + except TypeError: + combined_columns = self.columns.astype(object).append(idx_diff) + other = ( + other.reindex(combined_columns, copy=False) + .to_frame() + .T.infer_objects() + .rename_axis(index.names, copy=False) + ) + if not self.columns.equals(combined_columns): + self = self.reindex(columns=combined_columns) + elif isinstance(other, list): + if not other: + pass + elif not isinstance(other[0], DataFrame): + other = DataFrame(other) + if (self.columns.get_indexer(other.columns) >= 0).all(): + other = other.reindex(columns=self.columns) + + from pandas.core.reshape.concat import concat + + if isinstance(other, (list, tuple)): + to_concat = [self, *other] + else: + to_concat = [self, other] + return concat( + to_concat, + ignore_index=ignore_index, + verify_integrity=verify_integrity, + sort=sort, + ) + + def join( + self, other, on=None, how="left", lsuffix="", rsuffix="", sort=False + ) -> "DataFrame": + """ + Join columns of another DataFrame. + + Join columns with `other` DataFrame either on index or on a key + column. Efficiently join multiple DataFrame objects by index at once by + passing a list. + + Parameters + ---------- + other : DataFrame, Series, or list of DataFrame + Index should be similar to one of the columns in this one. If a + Series is passed, its name attribute must be set, and that will be + used as the column name in the resulting joined DataFrame. + on : str, list of str, or array-like, optional + Column or index level name(s) in the caller to join on the index + in `other`, otherwise joins index-on-index. If multiple + values given, the `other` DataFrame must have a MultiIndex. Can + pass an array as the join key if it is not already contained in + the calling DataFrame. Like an Excel VLOOKUP operation. + how : {'left', 'right', 'outer', 'inner'}, default 'left' + How to handle the operation of the two objects. + + * left: use calling frame's index (or column if on is specified) + * right: use `other`'s index. + * outer: form union of calling frame's index (or column if on is + specified) with `other`'s index, and sort it. + lexicographically. + * inner: form intersection of calling frame's index (or column if + on is specified) with `other`'s index, preserving the order + of the calling's one. + lsuffix : str, default '' + Suffix to use from left frame's overlapping columns. + rsuffix : str, default '' + Suffix to use from right frame's overlapping columns. + sort : bool, default False + Order result DataFrame lexicographically by the join key. If False, + the order of the join key depends on the join type (how keyword). + + Returns + ------- + DataFrame + A dataframe containing columns from both the caller and `other`. + + See Also + -------- + DataFrame.merge : For column(s)-on-columns(s) operations. + + Notes + ----- + Parameters `on`, `lsuffix`, and `rsuffix` are not supported when + passing a list of `DataFrame` objects. + + Support for specifying index levels as the `on` parameter was added + in version 0.23.0. + + Examples + -------- + >>> df = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], + ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']}) + + >>> df + key A + 0 K0 A0 + 1 K1 A1 + 2 K2 A2 + 3 K3 A3 + 4 K4 A4 + 5 K5 A5 + + >>> other = pd.DataFrame({'key': ['K0', 'K1', 'K2'], + ... 'B': ['B0', 'B1', 'B2']}) + + >>> other + key B + 0 K0 B0 + 1 K1 B1 + 2 K2 B2 + + Join DataFrames using their indexes. + + >>> df.join(other, lsuffix='_caller', rsuffix='_other') + key_caller A key_other B + 0 K0 A0 K0 B0 + 1 K1 A1 K1 B1 + 2 K2 A2 K2 B2 + 3 K3 A3 NaN NaN + 4 K4 A4 NaN NaN + 5 K5 A5 NaN NaN + + If we want to join using the key columns, we need to set key to be + the index in both `df` and `other`. The joined DataFrame will have + key as its index. + + >>> df.set_index('key').join(other.set_index('key')) + A B + key + K0 A0 B0 + K1 A1 B1 + K2 A2 B2 + K3 A3 NaN + K4 A4 NaN + K5 A5 NaN + + Another option to join using the key columns is to use the `on` + parameter. DataFrame.join always uses `other`'s index but we can use + any column in `df`. This method preserves the original DataFrame's + index in the result. + + >>> df.join(other.set_index('key'), on='key') + key A B + 0 K0 A0 B0 + 1 K1 A1 B1 + 2 K2 A2 B2 + 3 K3 A3 NaN + 4 K4 A4 NaN + 5 K5 A5 NaN + """ + return self._join_compat( + other, on=on, how=how, lsuffix=lsuffix, rsuffix=rsuffix, sort=sort + ) + + def _join_compat( + self, other, on=None, how="left", lsuffix="", rsuffix="", sort=False + ): + from pandas.core.reshape.merge import merge + from pandas.core.reshape.concat import concat + + if isinstance(other, Series): + if other.name is None: + raise ValueError("Other Series must have a name") + other = DataFrame({other.name: other}) + + if isinstance(other, DataFrame): + return merge( + self, + other, + left_on=on, + how=how, + left_index=on is None, + right_index=True, + suffixes=(lsuffix, rsuffix), + sort=sort, + ) + else: + if on is not None: + raise ValueError( + "Joining multiple DataFrames only supported for joining on index" + ) + + frames = [self] + list(other) + + can_concat = all(df.index.is_unique for df in frames) + + # join indexes only using concat + if can_concat: + if how == "left": + res = concat( + frames, axis=1, join="outer", verify_integrity=True, sort=sort + ) + return res.reindex(self.index, copy=False) + else: + return concat( + frames, axis=1, join=how, verify_integrity=True, sort=sort + ) + + joined = frames[0] + + for frame in frames[1:]: + joined = merge( + joined, frame, how=how, left_index=True, right_index=True + ) + + return joined + + @Substitution("") + @Appender(_merge_doc, indents=2) + def merge( + self, + right, + how="inner", + on=None, + left_on=None, + right_on=None, + left_index=False, + right_index=False, + sort=False, + suffixes=("_x", "_y"), + copy=True, + indicator=False, + validate=None, + ) -> "DataFrame": + from pandas.core.reshape.merge import merge + + return merge( + self, + right, + how=how, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + sort=sort, + suffixes=suffixes, + copy=copy, + indicator=indicator, + validate=validate, + ) + + def round(self, decimals=0, *args, **kwargs) -> "DataFrame": + """ + Round a DataFrame to a variable number of decimal places. + + Parameters + ---------- + decimals : int, dict, Series + Number of decimal places to round each column to. If an int is + given, round each column to the same number of places. + Otherwise dict and Series round to variable numbers of places. + Column names should be in the keys if `decimals` is a + dict-like, or in the index if `decimals` is a Series. Any + columns not included in `decimals` will be left as is. Elements + of `decimals` which are not columns of the input will be + ignored. + *args + Additional keywords have no effect but might be accepted for + compatibility with numpy. + **kwargs + Additional keywords have no effect but might be accepted for + compatibility with numpy. + + Returns + ------- + DataFrame + A DataFrame with the affected columns rounded to the specified + number of decimal places. + + See Also + -------- + numpy.around : Round a numpy array to the given number of decimals. + Series.round : Round a Series to the given number of decimals. + + Examples + -------- + >>> df = pd.DataFrame([(.21, .32), (.01, .67), (.66, .03), (.21, .18)], + ... columns=['dogs', 'cats']) + >>> df + dogs cats + 0 0.21 0.32 + 1 0.01 0.67 + 2 0.66 0.03 + 3 0.21 0.18 + + By providing an integer each column is rounded to the same number + of decimal places + + >>> df.round(1) + dogs cats + 0 0.2 0.3 + 1 0.0 0.7 + 2 0.7 0.0 + 3 0.2 0.2 + + With a dict, the number of places for specific columns can be + specified with the column names as key and the number of decimal + places as value + + >>> df.round({'dogs': 1, 'cats': 0}) + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 + + Using a Series, the number of places for specific columns can be + specified with the column names as index and the number of + decimal places as value + + >>> decimals = pd.Series([0, 1], index=['cats', 'dogs']) + >>> df.round(decimals) + dogs cats + 0 0.2 0.0 + 1 0.0 1.0 + 2 0.7 0.0 + 3 0.2 0.0 + """ + from pandas.core.reshape.concat import concat + + def _dict_round(df, decimals): + for col, vals in df.items(): + try: + yield _series_round(vals, decimals[col]) + except KeyError: + yield vals + + def _series_round(s, decimals): + if is_integer_dtype(s) or is_float_dtype(s): + return s.round(decimals) + return s + + nv.validate_round(args, kwargs) + + if isinstance(decimals, (dict, Series)): + if isinstance(decimals, Series): + if not decimals.index.is_unique: + raise ValueError("Index of decimals must be unique") + new_cols = list(_dict_round(self, decimals)) + elif is_integer(decimals): + # Dispatch to Series.round + new_cols = [_series_round(v, decimals) for _, v in self.items()] + else: + raise TypeError("decimals must be an integer, a dict-like or a Series") + + if len(new_cols) > 0: + return self._constructor( + concat(new_cols, axis=1), index=self.index, columns=self.columns + ) + else: + return self + + # ---------------------------------------------------------------------- + # Statistical methods, etc. + + def corr(self, method="pearson", min_periods=1) -> "DataFrame": + """ + Compute pairwise correlation of columns, excluding NA/null values. + + Parameters + ---------- + method : {'pearson', 'kendall', 'spearman'} or callable + Method of correlation: + + * pearson : standard correlation coefficient + * kendall : Kendall Tau correlation coefficient + * spearman : Spearman rank correlation + * callable: callable with input two 1d ndarrays + and returning a float. Note that the returned matrix from corr + will have 1 along the diagonals and will be symmetric + regardless of the callable's behavior. + + .. versionadded:: 0.24.0 + + min_periods : int, optional + Minimum number of observations required per pair of columns + to have a valid result. Currently only available for Pearson + and Spearman correlation. + + Returns + ------- + DataFrame + Correlation matrix. + + See Also + -------- + DataFrame.corrwith + Series.corr + + Examples + -------- + >>> def histogram_intersection(a, b): + ... v = np.minimum(a, b).sum().round(decimals=1) + ... return v + >>> df = pd.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], + ... columns=['dogs', 'cats']) + >>> df.corr(method=histogram_intersection) + dogs cats + dogs 1.0 0.3 + cats 0.3 1.0 + """ + numeric_df = self._get_numeric_data() + cols = numeric_df.columns + idx = cols.copy() + mat = numeric_df.values + + if method == "pearson": + correl = libalgos.nancorr(ensure_float64(mat), minp=min_periods) + elif method == "spearman": + correl = libalgos.nancorr_spearman(ensure_float64(mat), minp=min_periods) + elif method == "kendall" or callable(method): + if min_periods is None: + min_periods = 1 + mat = ensure_float64(mat).T + corrf = nanops.get_corr_func(method) + K = len(cols) + correl = np.empty((K, K), dtype=float) + mask = np.isfinite(mat) + for i, ac in enumerate(mat): + for j, bc in enumerate(mat): + if i > j: + continue + + valid = mask[i] & mask[j] + if valid.sum() < min_periods: + c = np.nan + elif i == j: + c = 1.0 + elif not valid.all(): + c = corrf(ac[valid], bc[valid]) + else: + c = corrf(ac, bc) + correl[i, j] = c + correl[j, i] = c + else: + raise ValueError( + "method must be either 'pearson', " + "'spearman', 'kendall', or a callable, " + f"'{method}' was supplied" + ) + + return self._constructor(correl, index=idx, columns=cols) + + def cov(self, min_periods=None) -> "DataFrame": + """ + Compute pairwise covariance of columns, excluding NA/null values. + + Compute the pairwise covariance among the series of a DataFrame. + The returned data frame is the `covariance matrix + `__ of the columns + of the DataFrame. + + Both NA and null values are automatically excluded from the + calculation. (See the note below about bias from missing values.) + A threshold can be set for the minimum number of + observations for each value created. Comparisons with observations + below this threshold will be returned as ``NaN``. + + This method is generally used for the analysis of time series data to + understand the relationship between different measures + across time. + + Parameters + ---------- + min_periods : int, optional + Minimum number of observations required per pair of columns + to have a valid result. + + Returns + ------- + DataFrame + The covariance matrix of the series of the DataFrame. + + See Also + -------- + Series.cov : Compute covariance with another Series. + core.window.EWM.cov: Exponential weighted sample covariance. + core.window.Expanding.cov : Expanding sample covariance. + core.window.Rolling.cov : Rolling sample covariance. + + Notes + ----- + Returns the covariance matrix of the DataFrame's time series. + The covariance is normalized by N-1. + + For DataFrames that have Series that are missing data (assuming that + data is `missing at random + `__) + the returned covariance matrix will be an unbiased estimate + of the variance and covariance between the member Series. + + However, for many applications this estimate may not be acceptable + because the estimate covariance matrix is not guaranteed to be positive + semi-definite. This could lead to estimate correlations having + absolute values which are greater than one, and/or a non-invertible + covariance matrix. See `Estimation of covariance matrices + `__ for more details. + + Examples + -------- + >>> df = pd.DataFrame([(1, 2), (0, 3), (2, 0), (1, 1)], + ... columns=['dogs', 'cats']) + >>> df.cov() + dogs cats + dogs 0.666667 -1.000000 + cats -1.000000 1.666667 + + >>> np.random.seed(42) + >>> df = pd.DataFrame(np.random.randn(1000, 5), + ... columns=['a', 'b', 'c', 'd', 'e']) + >>> df.cov() + a b c d e + a 0.998438 -0.020161 0.059277 -0.008943 0.014144 + b -0.020161 1.059352 -0.008543 -0.024738 0.009826 + c 0.059277 -0.008543 1.010670 -0.001486 -0.000271 + d -0.008943 -0.024738 -0.001486 0.921297 -0.013692 + e 0.014144 0.009826 -0.000271 -0.013692 0.977795 + + **Minimum number of periods** + + This method also supports an optional ``min_periods`` keyword + that specifies the required minimum number of non-NA observations for + each column pair in order to have a valid result: + + >>> np.random.seed(42) + >>> df = pd.DataFrame(np.random.randn(20, 3), + ... columns=['a', 'b', 'c']) + >>> df.loc[df.index[:5], 'a'] = np.nan + >>> df.loc[df.index[5:10], 'b'] = np.nan + >>> df.cov(min_periods=12) + a b c + a 0.316741 NaN -0.150812 + b NaN 1.248003 0.191417 + c -0.150812 0.191417 0.895202 + """ + numeric_df = self._get_numeric_data() + cols = numeric_df.columns + idx = cols.copy() + mat = numeric_df.values + + if notna(mat).all(): + if min_periods is not None and min_periods > len(mat): + baseCov = np.empty((mat.shape[1], mat.shape[1])) + baseCov.fill(np.nan) + else: + baseCov = np.cov(mat.T) + baseCov = baseCov.reshape((len(cols), len(cols))) + else: + baseCov = libalgos.nancorr(ensure_float64(mat), cov=True, minp=min_periods) + + return self._constructor(baseCov, index=idx, columns=cols) + + def corrwith(self, other, axis=0, drop=False, method="pearson") -> Series: + """ + Compute pairwise correlation. + + Pairwise correlation is computed between rows or columns of + DataFrame with rows or columns of Series or DataFrame. DataFrames + are first aligned along both axes before computing the + correlations. + + Parameters + ---------- + other : DataFrame, Series + Object with which to compute correlations. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. 0 or 'index' to compute column-wise, 1 or 'columns' for + row-wise. + drop : bool, default False + Drop missing indices from result. + method : {'pearson', 'kendall', 'spearman'} or callable + Method of correlation: + + * pearson : standard correlation coefficient + * kendall : Kendall Tau correlation coefficient + * spearman : Spearman rank correlation + * callable: callable with input two 1d ndarrays + and returning a float. + + .. versionadded:: 0.24.0 + + Returns + ------- + Series + Pairwise correlations. + + See Also + -------- + DataFrame.corr + """ + axis = self._get_axis_number(axis) + this = self._get_numeric_data() + + if isinstance(other, Series): + return this.apply(lambda x: other.corr(x, method=method), axis=axis) + + other = other._get_numeric_data() + left, right = this.align(other, join="inner", copy=False) + + if axis == 1: + left = left.T + right = right.T + + if method == "pearson": + # mask missing values + left = left + right * 0 + right = right + left * 0 + + # demeaned data + ldem = left - left.mean() + rdem = right - right.mean() + + num = (ldem * rdem).sum() + dom = (left.count() - 1) * left.std() * right.std() + + correl = num / dom + + elif method in ["kendall", "spearman"] or callable(method): + + def c(x): + return nanops.nancorr(x[0], x[1], method=method) + + correl = Series( + map(c, zip(left.values.T, right.values.T)), index=left.columns + ) + + else: + raise ValueError( + f"Invalid method {method} was passed, " + "valid methods are: 'pearson', 'kendall', " + "'spearman', or callable" + ) + + if not drop: + # Find non-matching labels along the given axis + # and append missing correlations (GH 22375) + raxis = 1 if axis == 0 else 0 + result_index = this._get_axis(raxis).union(other._get_axis(raxis)) + idx_diff = result_index.difference(correl.index) + + if len(idx_diff) > 0: + correl = correl.append(Series([np.nan] * len(idx_diff), index=idx_diff)) + + return correl + + # ---------------------------------------------------------------------- + # ndarray-like stats methods + + def count(self, axis=0, level=None, numeric_only=False): + """ + Count non-NA cells for each column or row. + + The values `None`, `NaN`, `NaT`, and optionally `numpy.inf` (depending + on `pandas.options.mode.use_inf_as_na`) are considered NA. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index' counts are generated for each column. + If 1 or 'columns' counts are generated for each **row**. + level : int or str, optional + If the axis is a `MultiIndex` (hierarchical), count along a + particular `level`, collapsing into a `DataFrame`. + A `str` specifies the level name. + numeric_only : bool, default False + Include only `float`, `int` or `boolean` data. + + Returns + ------- + Series or DataFrame + For each column/row the number of non-NA/null entries. + If `level` is specified returns a `DataFrame`. + + See Also + -------- + Series.count: Number of non-NA elements in a Series. + DataFrame.shape: Number of DataFrame rows and columns (including NA + elements). + DataFrame.isna: Boolean same-sized DataFrame showing places of NA + elements. + + Examples + -------- + Constructing DataFrame from a dictionary: + + >>> df = pd.DataFrame({"Person": + ... ["John", "Myla", "Lewis", "John", "Myla"], + ... "Age": [24., np.nan, 21., 33, 26], + ... "Single": [False, True, True, True, False]}) + >>> df + Person Age Single + 0 John 24.0 False + 1 Myla NaN True + 2 Lewis 21.0 True + 3 John 33.0 True + 4 Myla 26.0 False + + Notice the uncounted NA values: + + >>> df.count() + Person 5 + Age 4 + Single 5 + dtype: int64 + + Counts for each **row**: + + >>> df.count(axis='columns') + 0 3 + 1 2 + 2 3 + 3 3 + 4 3 + dtype: int64 + + Counts for one level of a `MultiIndex`: + + >>> df.set_index(["Person", "Single"]).count(level="Person") + Age + Person + John 2 + Lewis 1 + Myla 1 + """ + axis = self._get_axis_number(axis) + if level is not None: + return self._count_level(level, axis=axis, numeric_only=numeric_only) + + if numeric_only: + frame = self._get_numeric_data() + else: + frame = self + + # GH #423 + if len(frame._get_axis(axis)) == 0: + result = Series(0, index=frame._get_agg_axis(axis)) + else: + if frame._is_mixed_type or frame._data.any_extension_types: + # the or any_extension_types is really only hit for single- + # column frames with an extension array + result = notna(frame).sum(axis=axis) + else: + # GH13407 + series_counts = notna(frame).sum(axis=axis) + counts = series_counts.values + result = Series(counts, index=frame._get_agg_axis(axis)) + + return result.astype("int64") + + def _count_level(self, level, axis=0, numeric_only=False): + if numeric_only: + frame = self._get_numeric_data() + else: + frame = self + + count_axis = frame._get_axis(axis) + agg_axis = frame._get_agg_axis(axis) + + if not isinstance(count_axis, ABCMultiIndex): + raise TypeError( + f"Can only count levels on hierarchical {self._get_axis_name(axis)}." + ) + + if frame._is_mixed_type: + # Since we have mixed types, calling notna(frame.values) might + # upcast everything to object + mask = notna(frame).values + else: + # But use the speedup when we have homogeneous dtypes + mask = notna(frame.values) + + if axis == 1: + # We're transposing the mask rather than frame to avoid potential + # upcasts to object, which induces a ~20x slowdown + mask = mask.T + + if isinstance(level, str): + level = count_axis._get_level_number(level) + + level_name = count_axis._names[level] + level_index = count_axis.levels[level]._shallow_copy(name=level_name) + level_codes = ensure_int64(count_axis.codes[level]) + counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=0) + + result = DataFrame(counts, index=level_index, columns=agg_axis) + + if axis == 1: + # Undo our earlier transpose + return result.T + else: + return result + + def _reduce( + self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds + ): + if axis is None and filter_type == "bool": + labels = None + constructor = None + else: + # TODO: Make other agg func handle axis=None properly + axis = self._get_axis_number(axis) + labels = self._get_agg_axis(axis) + constructor = self._constructor + + def f(x): + return op(x, axis=axis, skipna=skipna, **kwds) + + def _get_data(axis_matters): + if filter_type is None or filter_type == "numeric": + data = self._get_numeric_data() + elif filter_type == "bool": + if axis_matters: + # GH#25101, GH#24434 + data = self._get_bool_data() if axis == 0 else self + else: + data = self._get_bool_data() + else: # pragma: no cover + msg = ( + f"Generating numeric_only data with filter_type {filter_type} " + "not supported." + ) + raise NotImplementedError(msg) + return data + + if numeric_only is not None and axis in [0, 1]: + df = self + if numeric_only is True: + df = _get_data(axis_matters=True) + if axis == 1: + df = df.T + axis = 0 + + out_dtype = "bool" if filter_type == "bool" else None + + def blk_func(values): + if isinstance(values, ExtensionArray): + return values._reduce(name, skipna=skipna, **kwds) + else: + return op(values, axis=1, skipna=skipna, **kwds) + + # After possibly _get_data and transposing, we are now in the + # simple case where we can use BlockManager._reduce + res = df._data.reduce(blk_func) + assert isinstance(res, dict) + if len(res): + assert len(res) == max(list(res.keys())) + 1, res.keys() + out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype) + out.index = df.columns + return out + + if numeric_only is None: + values = self.values + try: + result = f(values) + + if filter_type == "bool" and is_object_dtype(values) and axis is None: + # work around https://github.com/numpy/numpy/issues/10489 + # TODO: combine with hasattr(result, 'dtype') further down + # hard since we don't have `values` down there. + result = np.bool_(result) + except TypeError: + # e.g. in nanops trying to convert strs to float + + # try by-column first + if filter_type is None and axis == 0: + # this can end up with a non-reduction + # but not always. if the types are mixed + # with datelike then need to make sure a series + + # we only end up here if we have not specified + # numeric_only and yet we have tried a + # column-by-column reduction, where we have mixed type. + # So let's just do what we can + from pandas.core.apply import frame_apply + + opa = frame_apply( + self, func=f, result_type="expand", ignore_failures=True + ) + result = opa.get_result() + if result.ndim == self.ndim: + result = result.iloc[0] + return result + + # TODO: why doesnt axis matter here? + data = _get_data(axis_matters=False) + with np.errstate(all="ignore"): + result = f(data.values) + labels = data._get_agg_axis(axis) + else: + if numeric_only: + data = _get_data(axis_matters=True) + + values = data.values + labels = data._get_agg_axis(axis) + else: + values = self.values + result = f(values) + + if hasattr(result, "dtype") and is_object_dtype(result.dtype): + try: + if filter_type is None or filter_type == "numeric": + result = result.astype(np.float64) + elif filter_type == "bool" and notna(result).all(): + result = result.astype(np.bool_) + except (ValueError, TypeError): + + # try to coerce to the original dtypes item by item if we can + if axis == 0: + result = coerce_to_dtypes(result, self.dtypes) + + if constructor is not None: + result = Series(result, index=labels) + return result + + def nunique(self, axis=0, dropna=True) -> Series: + """ + Count distinct observations over requested axis. + + Return Series with number of distinct observations. Can ignore NaN + values. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for + column-wise. + dropna : bool, default True + Don't include NaN in the counts. + + Returns + ------- + Series + + See Also + -------- + Series.nunique: Method nunique for Series. + DataFrame.count: Count non-NA cells for each column or row. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [1, 1, 1]}) + >>> df.nunique() + A 3 + B 1 + dtype: int64 + + >>> df.nunique(axis=1) + 0 1 + 1 2 + 2 2 + dtype: int64 + """ + return self.apply(Series.nunique, axis=axis, dropna=dropna) + + def idxmin(self, axis=0, skipna=True) -> Series: + """ + Return index of first occurrence of minimum over requested axis. + + NA/null values are excluded. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + + Returns + ------- + Series + Indexes of minima along the specified axis. + + Raises + ------ + ValueError + * If the row/column is empty + + See Also + -------- + Series.idxmin + + Notes + ----- + This method is the DataFrame version of ``ndarray.argmin``. + """ + axis = self._get_axis_number(axis) + indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) + index = self._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + return Series(result, index=self._get_agg_axis(axis)) + + def idxmax(self, axis=0, skipna=True) -> Series: + """ + Return index of first occurrence of maximum over requested axis. + + NA/null values are excluded. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + + Returns + ------- + Series + Indexes of maxima along the specified axis. + + Raises + ------ + ValueError + * If the row/column is empty + + See Also + -------- + Series.idxmax + + Notes + ----- + This method is the DataFrame version of ``ndarray.argmax``. + """ + axis = self._get_axis_number(axis) + indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) + index = self._get_axis(axis) + result = [index[i] if i >= 0 else np.nan for i in indices] + return Series(result, index=self._get_agg_axis(axis)) + + def _get_agg_axis(self, axis_num): + """ + Let's be explicit about this. + """ + if axis_num == 0: + return self.columns + elif axis_num == 1: + return self.index + else: + raise ValueError(f"Axis must be 0 or 1 (got {repr(axis_num)})") + + def mode(self, axis=0, numeric_only=False, dropna=True) -> "DataFrame": + """ + Get the mode(s) of each element along the selected axis. + + The mode of a set of values is the value that appears most often. + It can be multiple values. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to iterate over while searching for the mode: + + * 0 or 'index' : get mode of each column + * 1 or 'columns' : get mode of each row. + + numeric_only : bool, default False + If True, only apply to numeric columns. + dropna : bool, default True + Don't consider counts of NaN/NaT. + + .. versionadded:: 0.24.0 + + Returns + ------- + DataFrame + The modes of each column or row. + + See Also + -------- + Series.mode : Return the highest frequency value in a Series. + Series.value_counts : Return the counts of values in a Series. + + Examples + -------- + >>> df = pd.DataFrame([('bird', 2, 2), + ... ('mammal', 4, np.nan), + ... ('arthropod', 8, 0), + ... ('bird', 2, np.nan)], + ... index=('falcon', 'horse', 'spider', 'ostrich'), + ... columns=('species', 'legs', 'wings')) + >>> df + species legs wings + falcon bird 2 2.0 + horse mammal 4 NaN + spider arthropod 8 0.0 + ostrich bird 2 NaN + + By default, missing values are not considered, and the mode of wings + are both 0 and 2. The second row of species and legs contains ``NaN``, + because they have only one mode, but the DataFrame has two rows. + + >>> df.mode() + species legs wings + 0 bird 2.0 0.0 + 1 NaN NaN 2.0 + + Setting ``dropna=False`` ``NaN`` values are considered and they can be + the mode (like for wings). + + >>> df.mode(dropna=False) + species legs wings + 0 bird 2 NaN + + Setting ``numeric_only=True``, only the mode of numeric columns is + computed, and columns of other types are ignored. + + >>> df.mode(numeric_only=True) + legs wings + 0 2.0 0.0 + 1 NaN 2.0 + + To compute the mode over columns and not rows, use the axis parameter: + + >>> df.mode(axis='columns', numeric_only=True) + 0 1 + falcon 2.0 NaN + horse 4.0 NaN + spider 0.0 8.0 + ostrich 2.0 NaN + """ + data = self if not numeric_only else self._get_numeric_data() + + def f(s): + return s.mode(dropna=dropna) + + return data.apply(f, axis=axis) + + def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"): + """ + Return values at the given quantile over requested axis. + + Parameters + ---------- + q : float or array-like, default 0.5 (50% quantile) + Value between 0 <= q <= 1, the quantile(s) to compute. + axis : {0, 1, 'index', 'columns'} (default 0) + Equals 0 or 'index' for row-wise, 1 or 'columns' for column-wise. + numeric_only : bool, default True + If False, the quantile of datetime and timedelta data will be + computed as well. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + + Returns + ------- + Series or DataFrame + + If ``q`` is an array, a DataFrame will be returned where the + index is ``q``, the columns are the columns of self, and the + values are the quantiles. + If ``q`` is a float, a Series will be returned where the + index is the columns of self and the values are the quantiles. + + See Also + -------- + core.window.Rolling.quantile: Rolling quantile. + numpy.percentile: Numpy function to compute the percentile. + + Examples + -------- + >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), + ... columns=['a', 'b']) + >>> df.quantile(.1) + a 1.3 + b 3.7 + Name: 0.1, dtype: float64 + >>> df.quantile([.1, .5]) + a b + 0.1 1.3 3.7 + 0.5 2.5 55.0 + + Specifying `numeric_only=False` will also compute the quantile of + datetime and timedelta data. + + >>> df = pd.DataFrame({'A': [1, 2], + ... 'B': [pd.Timestamp('2010'), + ... pd.Timestamp('2011')], + ... 'C': [pd.Timedelta('1 days'), + ... pd.Timedelta('2 days')]}) + >>> df.quantile(0.5, numeric_only=False) + A 1.5 + B 2010-07-02 12:00:00 + C 1 days 12:00:00 + Name: 0.5, dtype: object + """ + validate_percentile(q) + + data = self._get_numeric_data() if numeric_only else self + axis = self._get_axis_number(axis) + is_transposed = axis == 1 + + if is_transposed: + data = data.T + + if len(data.columns) == 0: + # GH#23925 _get_numeric_data may have dropped all columns + cols = Index([], name=self.columns.name) + if is_list_like(q): + return self._constructor([], index=q, columns=cols) + return self._constructor_sliced([], index=cols, name=q, dtype=np.float64) + + result = data._data.quantile( + qs=q, axis=1, interpolation=interpolation, transposed=is_transposed + ) + + if result.ndim == 2: + result = self._constructor(result) + else: + result = self._constructor_sliced(result, name=q) + + if is_transposed: + result = result.T + + return result + + def to_timestamp(self, freq=None, how="start", axis=0, copy=True) -> "DataFrame": + """ + Cast to DatetimeIndex of timestamps, at *beginning* of period. + + Parameters + ---------- + freq : str, default frequency of PeriodIndex + Desired frequency. + how : {'s', 'e', 'start', 'end'} + Convention for converting period to timestamp; start of period + vs. end. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to convert (the index by default). + copy : bool, default True + If False then underlying input data is not copied. + + Returns + ------- + DataFrame with DatetimeIndex + """ + new_data = self._data + if copy: + new_data = new_data.copy() + + axis = self._get_axis_number(axis) + if axis == 0: + new_data.set_axis(1, self.index.to_timestamp(freq=freq, how=how)) + elif axis == 1: + new_data.set_axis(0, self.columns.to_timestamp(freq=freq, how=how)) + else: # pragma: no cover + raise AssertionError(f"Axis must be 0 or 1. Got {axis}") + + return self._constructor(new_data) + + def to_period(self, freq=None, axis=0, copy=True) -> "DataFrame": + """ + Convert DataFrame from DatetimeIndex to PeriodIndex. + + Convert DataFrame from DatetimeIndex to PeriodIndex with desired + frequency (inferred from index if not passed). + + Parameters + ---------- + freq : str, default + Frequency of the PeriodIndex. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to convert (the index by default). + copy : bool, default True + If False then underlying input data is not copied. + + Returns + ------- + TimeSeries with PeriodIndex + """ + new_data = self._data + if copy: + new_data = new_data.copy() + + axis = self._get_axis_number(axis) + if axis == 0: + new_data.set_axis(1, self.index.to_period(freq=freq)) + elif axis == 1: + new_data.set_axis(0, self.columns.to_period(freq=freq)) + else: # pragma: no cover + raise AssertionError(f"Axis must be 0 or 1. Got {axis}") + + return self._constructor(new_data) + + def isin(self, values) -> "DataFrame": + """ + Whether each element in the DataFrame is contained in values. + + Parameters + ---------- + values : iterable, Series, DataFrame or dict + The result will only be true at a location if all the + labels match. If `values` is a Series, that's the index. If + `values` is a dict, the keys must be the column names, + which must match. If `values` is a DataFrame, + then both the index and column labels must match. + + Returns + ------- + DataFrame + DataFrame of booleans showing whether each element in the DataFrame + is contained in values. + + See Also + -------- + DataFrame.eq: Equality test for DataFrame. + Series.isin: Equivalent method on Series. + Series.str.contains: Test if pattern or regex is contained within a + string of a Series or Index. + + Examples + -------- + + >>> df = pd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]}, + ... index=['falcon', 'dog']) + >>> df + num_legs num_wings + falcon 2 2 + dog 4 0 + + When ``values`` is a list check whether every value in the DataFrame + is present in the list (which animals have 0 or 2 legs or wings) + + >>> df.isin([0, 2]) + num_legs num_wings + falcon True True + dog False True + + When ``values`` is a dict, we can pass values to check for each + column separately: + + >>> df.isin({'num_wings': [0, 3]}) + num_legs num_wings + falcon False False + dog False True + + When ``values`` is a Series or DataFrame the index and column must + match. Note that 'falcon' does not match based on the number of legs + in df2. + + >>> other = pd.DataFrame({'num_legs': [8, 2], 'num_wings': [0, 2]}, + ... index=['spider', 'falcon']) + >>> df.isin(other) + num_legs num_wings + falcon True True + dog False False + """ + if isinstance(values, dict): + from pandas.core.reshape.concat import concat + + values = collections.defaultdict(list, values) + return concat( + ( + self.iloc[:, [i]].isin(values[col]) + for i, col in enumerate(self.columns) + ), + axis=1, + ) + elif isinstance(values, Series): + if not values.index.is_unique: + raise ValueError("cannot compute isin with a duplicate axis.") + return self.eq(values.reindex_like(self), axis="index") + elif isinstance(values, DataFrame): + if not (values.columns.is_unique and values.index.is_unique): + raise ValueError("cannot compute isin with a duplicate axis.") + return self.eq(values.reindex_like(self)) + else: + if not is_list_like(values): + raise TypeError( + "only list-like or dict-like objects are allowed " + "to be passed to DataFrame.isin(), " + f"you passed a {repr(type(values).__name__)}" + ) + return DataFrame( + algorithms.isin(self.values.ravel(), values).reshape(self.shape), + self.index, + self.columns, + ) + + # ---------------------------------------------------------------------- + # Add plotting methods to DataFrame + plot = CachedAccessor("plot", pandas.plotting.PlotAccessor) + hist = pandas.plotting.hist_frame + boxplot = pandas.plotting.boxplot_frame + sparse = CachedAccessor("sparse", SparseFrameAccessor) + + +DataFrame._setup_axes( + ["index", "columns"], + docs={ + "index": "The index (row labels) of the DataFrame.", + "columns": "The column labels of the DataFrame.", + }, +) +DataFrame._add_numeric_operations() +DataFrame._add_series_or_dataframe_operations() + +ops.add_flex_arithmetic_methods(DataFrame) +ops.add_special_arithmetic_methods(DataFrame) + + +def _from_nested_dict(data): + # TODO: this should be seriously cythonized + new_data = {} + for index, s in data.items(): + for col, v in s.items(): + new_data[col] = new_data.get(col, {}) + new_data[col][index] = v + return new_data + + +def _put_str(s, space): + return str(s)[:space].ljust(space) diff --git a/pandas/core/generic.py b/pandas/core/generic.py new file mode 100644 index 00000000..3e86e986 --- /dev/null +++ b/pandas/core/generic.py @@ -0,0 +1,11368 @@ +import collections +from datetime import timedelta +import functools +import gc +import json +import operator +import pickle +import re +from textwrap import dedent +from typing import ( + Any, + Callable, + Dict, + FrozenSet, + Hashable, + List, + Mapping, + Optional, + Sequence, + Set, + Tuple, + Type, + Union, +) +import warnings +import weakref + +import numpy as np + +from pandas._config import config + +from pandas._libs import Timestamp, iNaT, lib, properties +from pandas._typing import ( + Axis, + Dtype, + FilePathOrBuffer, + FrameOrSeries, + JSONSerializable, + Level, + Renamer, +) +from pandas.compat import set_function_name +from pandas.compat._optional import import_optional_dependency +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature +from pandas.util._validators import ( + validate_bool_kwarg, + validate_fillna_kwargs, + validate_percentile, +) + +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_object, + ensure_str, + is_bool, + is_bool_dtype, + is_datetime64_any_dtype, + is_datetime64tz_dtype, + is_dict_like, + is_extension_array_dtype, + is_float, + is_integer, + is_list_like, + is_number, + is_numeric_dtype, + is_object_dtype, + is_period_arraylike, + is_re_compilable, + is_scalar, + is_timedelta64_dtype, + pandas_dtype, +) +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.inference import is_hashable +from pandas.core.dtypes.missing import isna, notna + +import pandas as pd +from pandas.core import missing, nanops +import pandas.core.algorithms as algos +from pandas.core.base import PandasObject, SelectionMixin +import pandas.core.common as com +from pandas.core.construction import create_series_with_explicit_dtype +from pandas.core.indexes.api import ( + Index, + InvalidIndexError, + MultiIndex, + RangeIndex, + ensure_index, +) +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.period import Period, PeriodIndex +import pandas.core.indexing as indexing +from pandas.core.internals import BlockManager +from pandas.core.missing import find_valid_index +from pandas.core.ops import _align_method_FRAME + +from pandas.io.formats import format as fmt +from pandas.io.formats.format import DataFrameFormatter, format_percentiles +from pandas.io.formats.printing import pprint_thing +from pandas.tseries.frequencies import to_offset + +# goal is to be able to define the docs close to function, while still being +# able to share +_shared_docs: Dict[str, str] = dict() +_shared_doc_kwargs = dict( + axes="keywords for axes", + klass="Series/DataFrame", + axes_single_arg="int or labels for object", + args_transpose="axes to permute (int or label for object)", + optional_by=""" + by : str or list of str + Name or list of names to sort by""", +) + + +def _single_replace(self, to_replace, method, inplace, limit): + """ + Replaces values in a Series using the fill method specified when no + replacement value is given in the replace method + """ + if self.ndim != 1: + raise TypeError( + f"cannot replace {to_replace} with method {method} on a " + f"{type(self).__name__}" + ) + + orig_dtype = self.dtype + result = self if inplace else self.copy() + fill_f = missing.get_fill_func(method) + + mask = missing.mask_missing(result.values, to_replace) + values = fill_f(result.values, limit=limit, mask=mask) + + if values.dtype == orig_dtype and inplace: + return + + result = pd.Series(values, index=self.index, dtype=self.dtype).__finalize__(self) + + if inplace: + self._update_inplace(result._data) + return + + return result + + +bool_t = bool # Need alias because NDFrame has def bool: + + +class NDFrame(PandasObject, SelectionMixin, indexing.IndexingMixin): + """ + N-dimensional analogue of DataFrame. Store multi-dimensional in a + size-mutable, labeled data structure + + Parameters + ---------- + data : BlockManager + axes : list + copy : bool, default False + """ + + _internal_names: List[str] = [ + "_data", + "_cacher", + "_item_cache", + "_cache", + "_is_copy", + "_subtyp", + "_name", + "_index", + "_default_kind", + "_default_fill_value", + "_metadata", + "__array_struct__", + "__array_interface__", + ] + _internal_names_set: Set[str] = set(_internal_names) + _accessors: Set[str] = set() + _deprecations: FrozenSet[str] = frozenset(["get_values", "ix"]) + _metadata: List[str] = [] + _is_copy = None + _data: BlockManager + _attrs: Dict[Optional[Hashable], Any] + _typ: str + + # ---------------------------------------------------------------------- + # Constructors + + def __init__( + self, + data: BlockManager, + axes: Optional[List[Index]] = None, + copy: bool = False, + dtype: Optional[Dtype] = None, + attrs: Optional[Mapping[Optional[Hashable], Any]] = None, + fastpath: bool = False, + ): + + if not fastpath: + if dtype is not None: + data = data.astype(dtype) + elif copy: + data = data.copy() + + if axes is not None: + for i, ax in enumerate(axes): + data = data.reindex_axis(ax, axis=i) + + object.__setattr__(self, "_is_copy", None) + object.__setattr__(self, "_data", data) + object.__setattr__(self, "_item_cache", {}) + if attrs is None: + attrs = {} + else: + attrs = dict(attrs) + object.__setattr__(self, "_attrs", attrs) + + def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): + """ passed a manager and a axes dict """ + for a, axe in axes.items(): + if axe is not None: + mgr = mgr.reindex_axis( + axe, axis=self._get_block_manager_axis(a), copy=False + ) + + # make a copy if explicitly requested + if copy: + mgr = mgr.copy() + if dtype is not None: + # avoid further copies if we can + if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype: + mgr = mgr.astype(dtype=dtype) + return mgr + + # ---------------------------------------------------------------------- + + @property + def attrs(self) -> Dict[Optional[Hashable], Any]: + """ + Dictionary of global attributes on this object. + + .. warning:: + + attrs is experimental and may change without warning. + """ + if self._attrs is None: + self._attrs = {} + return self._attrs + + @attrs.setter + def attrs(self, value: Mapping[Optional[Hashable], Any]) -> None: + self._attrs = dict(value) + + def _validate_dtype(self, dtype): + """ validate the passed dtype """ + + if dtype is not None: + dtype = pandas_dtype(dtype) + + # a compound dtype + if dtype.kind == "V": + raise NotImplementedError( + "compound dtypes are not implemented" + f" in the {type(self).__name__} constructor" + ) + + return dtype + + # ---------------------------------------------------------------------- + # Construction + + @property + def _constructor(self: FrameOrSeries) -> Type[FrameOrSeries]: + """Used when a manipulation result has the same dimensions as the + original. + """ + raise AbstractMethodError(self) + + @property + def _constructor_sliced(self): + """Used when a manipulation result has one lower dimension(s) as the + original, such as DataFrame single columns slicing. + """ + raise AbstractMethodError(self) + + @property + def _constructor_expanddim(self): + """Used when a manipulation result has one higher dimension as the + original, such as Series.to_frame() + """ + raise NotImplementedError + + # ---------------------------------------------------------------------- + # Axis + _AXIS_ALIASES = {"rows": 0} + _AXIS_IALIASES = {0: "rows"} + _stat_axis_number = 0 + _stat_axis_name = "index" + _ix = None + _AXIS_ORDERS: List[str] + _AXIS_NUMBERS: Dict[str, int] + _AXIS_NAMES: Dict[int, str] + _AXIS_REVERSED: bool + _info_axis_number: int + _info_axis_name: str + _AXIS_LEN: int + + @classmethod + def _setup_axes(cls, axes: List[str], docs: Dict[str, str]) -> None: + """ + Provide axes setup for the major PandasObjects. + + Parameters + ---------- + axes : the names of the axes in order (lowest to highest) + docs : docstrings for the axis properties + """ + info_axis = len(axes) - 1 + axes_are_reversed = len(axes) > 1 + + cls._AXIS_ORDERS = axes + cls._AXIS_NUMBERS = {a: i for i, a in enumerate(axes)} + cls._AXIS_LEN = len(axes) + cls._AXIS_NAMES = dict(enumerate(axes)) + cls._AXIS_REVERSED = axes_are_reversed + + cls._info_axis_number = info_axis + cls._info_axis_name = axes[info_axis] + + # setup the actual axis + def set_axis(a, i): + setattr(cls, a, properties.AxisProperty(i, docs.get(a, a))) + cls._internal_names_set.add(a) + + if axes_are_reversed: + for i, a in cls._AXIS_NAMES.items(): + set_axis(a, 1 - i) + else: + for i, a in cls._AXIS_NAMES.items(): + set_axis(a, i) + + def _construct_axes_dict(self, axes=None, **kwargs): + """Return an axes dictionary for myself.""" + d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)} + d.update(kwargs) + return d + + @staticmethod + def _construct_axes_dict_from(self, axes, **kwargs): + """Return an axes dictionary for the passed axes.""" + d = {a: ax for a, ax in zip(self._AXIS_ORDERS, axes)} + d.update(kwargs) + return d + + def _construct_axes_from_arguments( + self, args, kwargs, require_all: bool = False, sentinel=None + ): + """Construct and returns axes if supplied in args/kwargs. + + If require_all, raise if all axis arguments are not supplied + return a tuple of (axes, kwargs). + + sentinel specifies the default parameter when an axis is not + supplied; useful to distinguish when a user explicitly passes None + in scenarios where None has special meaning. + """ + + # construct the args + args = list(args) + for a in self._AXIS_ORDERS: + + # look for a argument by position + if a not in kwargs: + try: + kwargs[a] = args.pop(0) + except IndexError: + if require_all: + raise TypeError("not enough/duplicate arguments specified!") + + axes = {a: kwargs.pop(a, sentinel) for a in self._AXIS_ORDERS} + return axes, kwargs + + @classmethod + def _from_axes(cls: Type[FrameOrSeries], data, axes, **kwargs) -> FrameOrSeries: + # for construction from BlockManager + if isinstance(data, BlockManager): + return cls(data, **kwargs) + else: + if cls._AXIS_REVERSED: + axes = axes[::-1] + d = cls._construct_axes_dict_from(cls, axes, copy=False) + d.update(kwargs) + return cls(data, **d) + + @classmethod + def _get_axis_number(cls, axis): + axis = cls._AXIS_ALIASES.get(axis, axis) + if is_integer(axis): + if axis in cls._AXIS_NAMES: + return axis + else: + try: + return cls._AXIS_NUMBERS[axis] + except KeyError: + pass + raise ValueError(f"No axis named {axis} for object type {cls}") + + @classmethod + def _get_axis_name(cls, axis): + axis = cls._AXIS_ALIASES.get(axis, axis) + if isinstance(axis, str): + if axis in cls._AXIS_NUMBERS: + return axis + else: + try: + return cls._AXIS_NAMES[axis] + except KeyError: + pass + raise ValueError(f"No axis named {axis} for object type {cls}") + + def _get_axis(self, axis): + name = self._get_axis_name(axis) + return getattr(self, name) + + @classmethod + def _get_block_manager_axis(cls, axis): + """Map the axis to the block_manager axis.""" + axis = cls._get_axis_number(axis) + if cls._AXIS_REVERSED: + m = cls._AXIS_LEN - 1 + return m - axis + return axis + + def _get_axis_resolvers(self, axis: str) -> Dict[str, ABCSeries]: + # index or columns + axis_index = getattr(self, axis) + d = dict() + prefix = axis[0] + + for i, name in enumerate(axis_index.names): + if name is not None: + key = level = name + else: + # prefix with 'i' or 'c' depending on the input axis + # e.g., you must do ilevel_0 for the 0th level of an unnamed + # multiiindex + key = f"{prefix}level_{i}" + level = i + + level_values = axis_index.get_level_values(level) + s = level_values.to_series() + s.index = axis_index + d[key] = s + + # put the index/columns itself in the dict + if isinstance(axis_index, MultiIndex): + dindex = axis_index + else: + dindex = axis_index.to_series() + + d[axis] = dindex + return d + + def _get_index_resolvers(self) -> Dict[str, ABCSeries]: + from pandas.core.computation.parsing import clean_column_name + + d: Dict[str, ABCSeries] = {} + for axis_name in self._AXIS_ORDERS: + d.update(self._get_axis_resolvers(axis_name)) + + return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)} + + def _get_cleaned_column_resolvers(self) -> Dict[str, ABCSeries]: + """ + Return the special character free column resolvers of a dataframe. + + Column names with special characters are 'cleaned up' so that they can + be referred to by backtick quoting. + Used in :meth:`DataFrame.eval`. + """ + from pandas.core.computation.parsing import clean_column_name + + if isinstance(self, ABCSeries): + return {clean_column_name(self.name): self} + + return { + clean_column_name(k): v for k, v in self.items() if not isinstance(k, int) + } + + @property + def _info_axis(self): + return getattr(self, self._info_axis_name) + + @property + def _stat_axis(self): + return getattr(self, self._stat_axis_name) + + @property + def shape(self) -> Tuple[int, ...]: + """ + Return a tuple of axis dimensions + """ + return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS) + + @property + def axes(self) -> List[Index]: + """ + Return index label(s) of the internal NDFrame + """ + # we do it this way because if we have reversed axes, then + # the block manager shows then reversed + return [self._get_axis(a) for a in self._AXIS_ORDERS] + + @property + def ndim(self) -> int: + """ + Return an int representing the number of axes / array dimensions. + + Return 1 if Series. Otherwise return 2 if DataFrame. + + See Also + -------- + ndarray.ndim : Number of array dimensions. + + Examples + -------- + >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) + >>> s.ndim + 1 + + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.ndim + 2 + """ + return self._data.ndim + + @property + def size(self): + """ + Return an int representing the number of elements in this object. + + Return the number of rows if Series. Otherwise return the number of + rows times number of columns if DataFrame. + + See Also + -------- + ndarray.size : Number of elements in the array. + + Examples + -------- + >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) + >>> s.size + 3 + + >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df.size + 4 + """ + return np.prod(self.shape) + + @property + def _selected_obj(self: FrameOrSeries) -> FrameOrSeries: + """ internal compat with SelectionMixin """ + return self + + @property + def _obj_with_exclusions(self: FrameOrSeries) -> FrameOrSeries: + """ internal compat with SelectionMixin """ + return self + + def set_axis(self, labels, axis=0, inplace=False): + """ + Assign desired index to given axis. + + Indexes for column or row labels can be changed by assigning + a list-like or Index. + + .. versionchanged:: 0.21.0 + + The signature is now `labels` and `axis`, consistent with + the rest of pandas API. Previously, the `axis` and `labels` + arguments were respectively the first and second positional + arguments. + + Parameters + ---------- + labels : list-like, Index + The values for the new index. + + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to update. The value 0 identifies the rows, and 1 + identifies the columns. + + inplace : bool, default False + Whether to return a new %(klass)s instance. + + Returns + ------- + renamed : %(klass)s or None + An object of same type as caller if inplace=False, None otherwise. + + See Also + -------- + DataFrame.rename_axis : Alter the name of the index or columns. + + Examples + -------- + **Series** + + >>> s = pd.Series([1, 2, 3]) + >>> s + 0 1 + 1 2 + 2 3 + dtype: int64 + + >>> s.set_axis(['a', 'b', 'c'], axis=0) + a 1 + b 2 + c 3 + dtype: int64 + + **DataFrame** + + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + Change the row labels. + + >>> df.set_axis(['a', 'b', 'c'], axis='index') + A B + a 1 4 + b 2 5 + c 3 6 + + Change the column labels. + + >>> df.set_axis(['I', 'II'], axis='columns') + I II + 0 1 4 + 1 2 5 + 2 3 6 + + Now, update the labels inplace. + + >>> df.set_axis(['i', 'ii'], axis='columns', inplace=True) + >>> df + i ii + 0 1 4 + 1 2 5 + 2 3 6 + """ + if inplace: + setattr(self, self._get_axis_name(axis), labels) + else: + obj = self.copy() + obj.set_axis(labels, axis=axis, inplace=True) + return obj + + def _set_axis(self, axis, labels) -> None: + self._data.set_axis(axis, labels) + self._clear_item_cache() + + def swapaxes(self: FrameOrSeries, axis1, axis2, copy=True) -> FrameOrSeries: + """ + Interchange axes and swap values axes appropriately. + + Returns + ------- + y : same as input + """ + i = self._get_axis_number(axis1) + j = self._get_axis_number(axis2) + + if i == j: + if copy: + return self.copy() + return self + + mapping = {i: j, j: i} + + new_axes = (self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)) + new_values = self.values.swapaxes(i, j) + if copy: + new_values = new_values.copy() + + return self._constructor(new_values, *new_axes).__finalize__(self) + + def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: + """ + Return DataFrame with requested index / column level(s) removed. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + level : int, str, or list-like + If a string is given, must be the name of a level + If list-like, elements must be names or positional indexes + of levels. + + axis : {0 or 'index', 1 or 'columns'}, default 0 + + Returns + ------- + DataFrame + DataFrame with requested index / column level(s) removed. + + Examples + -------- + >>> df = pd.DataFrame([ + ... [1, 2, 3, 4], + ... [5, 6, 7, 8], + ... [9, 10, 11, 12] + ... ]).set_index([0, 1]).rename_axis(['a', 'b']) + + >>> df.columns = pd.MultiIndex.from_tuples([ + ... ('c', 'e'), ('d', 'f') + ... ], names=['level_1', 'level_2']) + + >>> df + level_1 c d + level_2 e f + a b + 1 2 3 4 + 5 6 7 8 + 9 10 11 12 + + >>> df.droplevel('a') + level_1 c d + level_2 e f + b + 2 3 4 + 6 7 8 + 10 11 12 + + >>> df.droplevel('level2', axis=1) + level_1 c d + a b + 1 2 3 4 + 5 6 7 8 + 9 10 11 12 + """ + labels = self._get_axis(axis) + new_labels = labels.droplevel(level) + result = self.set_axis(new_labels, axis=axis, inplace=False) + return result + + def pop(self: FrameOrSeries, item) -> FrameOrSeries: + """ + Return item and drop from frame. Raise KeyError if not found. + + Parameters + ---------- + item : str + Label of column to be popped. + + Returns + ------- + Series + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ... ('parrot', 'bird', 24.0), + ... ('lion', 'mammal', 80.5), + ... ('monkey', 'mammal', np.nan)], + ... columns=('name', 'class', 'max_speed')) + >>> df + name class max_speed + 0 falcon bird 389.0 + 1 parrot bird 24.0 + 2 lion mammal 80.5 + 3 monkey mammal NaN + + >>> df.pop('class') + 0 bird + 1 bird + 2 mammal + 3 mammal + Name: class, dtype: object + + >>> df + name max_speed + 0 falcon 389.0 + 1 parrot 24.0 + 2 lion 80.5 + 3 monkey NaN + """ + result = self[item] + del self[item] + try: + result._reset_cacher() + except AttributeError: + pass + + return result + + def squeeze(self, axis=None): + """ + Squeeze 1 dimensional axis objects into scalars. + + Series or DataFrames with a single element are squeezed to a scalar. + DataFrames with a single column or a single row are squeezed to a + Series. Otherwise the object is unchanged. + + This method is most useful when you don't know if your + object is a Series or DataFrame, but you do know it has just a single + column. In that case you can safely call `squeeze` to ensure you have a + Series. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns', None}, default None + A specific axis to squeeze. By default, all length-1 axes are + squeezed. + + Returns + ------- + DataFrame, Series, or scalar + The projection after squeezing `axis` or all the axes. + + See Also + -------- + Series.iloc : Integer-location based indexing for selecting scalars. + DataFrame.iloc : Integer-location based indexing for selecting Series. + Series.to_frame : Inverse of DataFrame.squeeze for a + single-column DataFrame. + + Examples + -------- + >>> primes = pd.Series([2, 3, 5, 7]) + + Slicing might produce a Series with a single value: + + >>> even_primes = primes[primes % 2 == 0] + >>> even_primes + 0 2 + dtype: int64 + + >>> even_primes.squeeze() + 2 + + Squeezing objects with more than one value in every axis does nothing: + + >>> odd_primes = primes[primes % 2 == 1] + >>> odd_primes + 1 3 + 2 5 + 3 7 + dtype: int64 + + >>> odd_primes.squeeze() + 1 3 + 2 5 + 3 7 + dtype: int64 + + Squeezing is even more effective when used with DataFrames. + + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b']) + >>> df + a b + 0 1 2 + 1 3 4 + + Slicing a single column will produce a DataFrame with the columns + having only one value: + + >>> df_a = df[['a']] + >>> df_a + a + 0 1 + 1 3 + + So the columns can be squeezed down, resulting in a Series: + + >>> df_a.squeeze('columns') + 0 1 + 1 3 + Name: a, dtype: int64 + + Slicing a single row from a single column will produce a single + scalar DataFrame: + + >>> df_0a = df.loc[df.index < 1, ['a']] + >>> df_0a + a + 0 1 + + Squeezing the rows produces a single scalar Series: + + >>> df_0a.squeeze('rows') + a 1 + Name: 0, dtype: int64 + + Squeezing all axes will project directly into a scalar: + + >>> df_0a.squeeze() + 1 + """ + axis = self._AXIS_NAMES if axis is None else (self._get_axis_number(axis),) + return self.iloc[ + tuple( + 0 if i in axis and len(a) == 1 else slice(None) + for i, a in enumerate(self.axes) + ) + ] + + def swaplevel(self: FrameOrSeries, i=-2, j=-1, axis=0) -> FrameOrSeries: + """ + Swap levels i and j in a MultiIndex on a particular axis + + Parameters + ---------- + i, j : int, str (can be mixed) + Level of index to be swapped. Can pass level name as string. + + Returns + ------- + swapped : same type as caller (new object) + """ + axis = self._get_axis_number(axis) + result = self.copy() + labels = result._data.axes[axis] + result._data.set_axis(axis, labels.swaplevel(i, j)) + return result + + # ---------------------------------------------------------------------- + # Rename + + def rename( + self: FrameOrSeries, + mapper: Optional[Renamer] = None, + *, + index: Optional[Renamer] = None, + columns: Optional[Renamer] = None, + axis: Optional[Axis] = None, + copy: bool = True, + inplace: bool = False, + level: Optional[Level] = None, + errors: str = "ignore", + ) -> Optional[FrameOrSeries]: + """ + Alter axes input function or functions. Function / dict values must be + unique (1-to-1). Labels not contained in a dict / Series will be left + as-is. Extra labels listed don't throw an error. Alternatively, change + ``Series.name`` with a scalar value (Series only). + + Parameters + ---------- + %(axes)s : scalar, list-like, dict-like or function, optional + Scalar or list-like will alter the ``Series.name`` attribute, + and raise on DataFrame. + dict-like or functions are transformations to apply to + that axis' values + copy : bool, default True + Also copy underlying data. + inplace : bool, default False + Whether to return a new %(klass)s. If True then value of copy is + ignored. + level : int or level name, default None + In case of a MultiIndex, only rename labels in the specified + level. + errors : {'ignore', 'raise'}, default 'ignore' + If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`, + or `columns` contains labels that are not present in the Index + being transformed. + If 'ignore', existing keys will be renamed and extra keys will be + ignored. + + Returns + ------- + renamed : %(klass)s (new object) + + Raises + ------ + KeyError + If any of the labels is not found in the selected axis and + "errors='raise'". + + See Also + -------- + NDFrame.rename_axis + + Examples + -------- + + >>> s = pd.Series([1, 2, 3]) + >>> s + 0 1 + 1 2 + 2 3 + dtype: int64 + >>> s.rename("my_name") # scalar, changes Series.name + 0 1 + 1 2 + 2 3 + Name: my_name, dtype: int64 + >>> s.rename(lambda x: x ** 2) # function, changes labels + 0 1 + 1 2 + 4 3 + dtype: int64 + >>> s.rename({1: 3, 2: 5}) # mapping, changes labels + 0 1 + 3 2 + 5 3 + dtype: int64 + + Since ``DataFrame`` doesn't have a ``.name`` attribute, + only mapping-type arguments are allowed. + + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + >>> df.rename(2) + Traceback (most recent call last): + ... + TypeError: 'int' object is not callable + + ``DataFrame.rename`` supports two calling conventions + + * ``(index=index_mapper, columns=columns_mapper, ...)`` + * ``(mapper, axis={'index', 'columns'}, ...)`` + + We *highly* recommend using keyword arguments to clarify your + intent. + + >>> df.rename(index=str, columns={"A": "a", "B": "c"}) + a c + 0 1 4 + 1 2 5 + 2 3 6 + + >>> df.rename(index=str, columns={"A": "a", "C": "c"}) + a B + 0 1 4 + 1 2 5 + 2 3 6 + + Using axis-style parameters + + >>> df.rename(str.lower, axis='columns') + a b + 0 1 4 + 1 2 5 + 2 3 6 + + >>> df.rename({1: 2, 2: 4}, axis='index') + A B + 0 1 4 + 2 2 5 + 4 3 6 + + See the :ref:`user guide ` for more. + """ + if mapper is None and index is None and columns is None: + raise TypeError("must pass an index to rename") + + if index is not None or columns is not None: + if axis is not None: + raise TypeError( + "Cannot specify both 'axis' and any of 'index' or 'columns'" + ) + elif mapper is not None: + raise TypeError( + "Cannot specify both 'mapper' and any of 'index' or 'columns'" + ) + else: + # use the mapper argument + if axis and self._get_axis_number(axis) == 1: + columns = mapper + else: + index = mapper + + result = self if inplace else self.copy(deep=copy) + + for axis_no, replacements in enumerate((index, columns)): + if replacements is None: + continue + + ax = self._get_axis(axis_no) + baxis = self._get_block_manager_axis(axis_no) + f = com.get_rename_function(replacements) + + if level is not None: + level = ax._get_level_number(level) + + # GH 13473 + if not callable(replacements): + indexer = ax.get_indexer_for(replacements) + if errors == "raise" and len(indexer[indexer == -1]): + missing_labels = [ + label + for index, label in enumerate(replacements) + if indexer[index] == -1 + ] + raise KeyError(f"{missing_labels} not found in axis") + + result._data = result._data.rename_axis( + f, axis=baxis, copy=copy, level=level + ) + result._clear_item_cache() + + if inplace: + self._update_inplace(result._data) + return None + else: + return result.__finalize__(self) + + @rewrite_axis_style_signature("mapper", [("copy", True), ("inplace", False)]) + def rename_axis(self, mapper=lib.no_default, **kwargs): + """ + Set the name of the axis for the index or columns. + + Parameters + ---------- + mapper : scalar, list-like, optional + Value to set the axis name attribute. + index, columns : scalar, list-like, dict-like or function, optional + A scalar, list-like, dict-like or functions transformations to + apply to that axis' values. + + Use either ``mapper`` and ``axis`` to + specify the axis to target with ``mapper``, or ``index`` + and/or ``columns``. + + .. versionchanged:: 0.24.0 + + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to rename. + copy : bool, default True + Also copy underlying data. + inplace : bool, default False + Modifies the object directly, instead of creating a new Series + or DataFrame. + + Returns + ------- + Series, DataFrame, or None + The same type as the caller or None if `inplace` is True. + + See Also + -------- + Series.rename : Alter Series index labels or name. + DataFrame.rename : Alter DataFrame index labels or name. + Index.rename : Set new names on index. + + Notes + ----- + ``DataFrame.rename_axis`` supports two calling conventions + + * ``(index=index_mapper, columns=columns_mapper, ...)`` + * ``(mapper, axis={'index', 'columns'}, ...)`` + + The first calling convention will only modify the names of + the index and/or the names of the Index object that is the columns. + In this case, the parameter ``copy`` is ignored. + + The second calling convention will modify the names of the + the corresponding index if mapper is a list or a scalar. + However, if mapper is dict-like or a function, it will use the + deprecated behavior of modifying the axis *labels*. + + We *highly* recommend using keyword arguments to clarify your + intent. + + Examples + -------- + **Series** + + >>> s = pd.Series(["dog", "cat", "monkey"]) + >>> s + 0 dog + 1 cat + 2 monkey + dtype: object + >>> s.rename_axis("animal") + animal + 0 dog + 1 cat + 2 monkey + dtype: object + + **DataFrame** + + >>> df = pd.DataFrame({"num_legs": [4, 4, 2], + ... "num_arms": [0, 0, 2]}, + ... ["dog", "cat", "monkey"]) + >>> df + num_legs num_arms + dog 4 0 + cat 4 0 + monkey 2 2 + >>> df = df.rename_axis("animal") + >>> df + num_legs num_arms + animal + dog 4 0 + cat 4 0 + monkey 2 2 + >>> df = df.rename_axis("limbs", axis="columns") + >>> df + limbs num_legs num_arms + animal + dog 4 0 + cat 4 0 + monkey 2 2 + + **MultiIndex** + + >>> df.index = pd.MultiIndex.from_product([['mammal'], + ... ['dog', 'cat', 'monkey']], + ... names=['type', 'name']) + >>> df + limbs num_legs num_arms + type name + mammal dog 4 0 + cat 4 0 + monkey 2 2 + + >>> df.rename_axis(index={'type': 'class'}) + limbs num_legs num_arms + class name + mammal dog 4 0 + cat 4 0 + monkey 2 2 + + >>> df.rename_axis(columns=str.upper) + LIMBS num_legs num_arms + type name + mammal dog 4 0 + cat 4 0 + monkey 2 2 + """ + axes, kwargs = self._construct_axes_from_arguments( + (), kwargs, sentinel=lib.no_default + ) + copy = kwargs.pop("copy", True) + inplace = kwargs.pop("inplace", False) + axis = kwargs.pop("axis", 0) + if axis is not None: + axis = self._get_axis_number(axis) + + if kwargs: + raise TypeError( + "rename_axis() got an unexpected keyword " + f'argument "{list(kwargs.keys())[0]}"' + ) + + inplace = validate_bool_kwarg(inplace, "inplace") + + if mapper is not lib.no_default: + # Use v0.23 behavior if a scalar or list + non_mapper = is_scalar(mapper) or ( + is_list_like(mapper) and not is_dict_like(mapper) + ) + if non_mapper: + return self._set_axis_name(mapper, axis=axis, inplace=inplace) + else: + raise ValueError("Use `.rename` to alter labels with a mapper.") + else: + # Use new behavior. Means that index and/or columns + # is specified + result = self if inplace else self.copy(deep=copy) + + for axis in range(self._AXIS_LEN): + v = axes.get(self._AXIS_NAMES[axis]) + if v is lib.no_default: + continue + non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v)) + if non_mapper: + newnames = v + else: + f = com.get_rename_function(v) + curnames = self._get_axis(axis).names + newnames = [f(name) for name in curnames] + result._set_axis_name(newnames, axis=axis, inplace=True) + if not inplace: + return result + + def _set_axis_name(self, name, axis=0, inplace=False): + """ + Set the name(s) of the axis. + + Parameters + ---------- + name : str or list of str + Name(s) to set. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to set the label. The value 0 or 'index' specifies index, + and the value 1 or 'columns' specifies columns. + inplace : bool, default False + If `True`, do operation inplace and return None. + + .. versionadded:: 0.21.0 + + Returns + ------- + Series, DataFrame, or None + The same type as the caller or `None` if `inplace` is `True`. + + See Also + -------- + DataFrame.rename : Alter the axis labels of :class:`DataFrame`. + Series.rename : Alter the index labels or set the index name + of :class:`Series`. + Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`. + + Examples + -------- + >>> df = pd.DataFrame({"num_legs": [4, 4, 2]}, + ... ["dog", "cat", "monkey"]) + >>> df + num_legs + dog 4 + cat 4 + monkey 2 + >>> df._set_axis_name("animal") + num_legs + animal + dog 4 + cat 4 + monkey 2 + >>> df.index = pd.MultiIndex.from_product( + ... [["mammal"], ['dog', 'cat', 'monkey']]) + >>> df._set_axis_name(["type", "name"]) + legs + type name + mammal dog 4 + cat 4 + monkey 2 + """ + axis = self._get_axis_number(axis) + idx = self._get_axis(axis).set_names(name) + + inplace = validate_bool_kwarg(inplace, "inplace") + renamed = self if inplace else self.copy() + renamed.set_axis(idx, axis=axis, inplace=True) + if not inplace: + return renamed + + # ---------------------------------------------------------------------- + # Comparison Methods + + def _indexed_same(self, other) -> bool: + return all( + self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS + ) + + def equals(self, other): + """ + Test whether two objects contain the same elements. + + This function allows two Series or DataFrames to be compared against + each other to see if they have the same shape and elements. NaNs in + the same location are considered equal. The column headers do not + need to have the same type, but the elements within the columns must + be the same dtype. + + Parameters + ---------- + other : Series or DataFrame + The other Series or DataFrame to be compared with the first. + + Returns + ------- + bool + True if all elements are the same in both objects, False + otherwise. + + See Also + -------- + Series.eq : Compare two Series objects of the same length + and return a Series where each element is True if the element + in each Series is equal, False otherwise. + DataFrame.eq : Compare two DataFrame objects of the same shape and + return a DataFrame where each element is True if the respective + element in each DataFrame is equal, False otherwise. + testing.assert_series_equal : Raises an AssertionError if left and + right are not equal. Provides an easy interface to ignore + inequality in dtypes, indexes and precision among others. + testing.assert_frame_equal : Like assert_series_equal, but targets + DataFrames. + numpy.array_equal : Return True if two arrays have the same shape + and elements, False otherwise. + + Notes + ----- + This function requires that the elements have the same dtype as their + respective elements in the other Series or DataFrame. However, the + column labels do not need to have the same type, as long as they are + still considered equal. + + Examples + -------- + >>> df = pd.DataFrame({1: [10], 2: [20]}) + >>> df + 1 2 + 0 10 20 + + DataFrames df and exactly_equal have the same types and values for + their elements and column labels, which will return True. + + >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]}) + >>> exactly_equal + 1 2 + 0 10 20 + >>> df.equals(exactly_equal) + True + + DataFrames df and different_column_type have the same element + types and values, but have different types for the column labels, + which will still return True. + + >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]}) + >>> different_column_type + 1.0 2.0 + 0 10 20 + >>> df.equals(different_column_type) + True + + DataFrames df and different_data_type have different types for the + same values for their elements, and will return False even though + their column labels are the same values and types. + + >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]}) + >>> different_data_type + 1 2 + 0 10.0 20.0 + >>> df.equals(different_data_type) + False + """ + if not isinstance(other, self._constructor): + return False + return self._data.equals(other._data) + + # ------------------------------------------------------------------------- + # Unary Methods + + def __neg__(self): + values = com.values_from_object(self) + if is_bool_dtype(values): + arr = operator.inv(values) + elif ( + is_numeric_dtype(values) + or is_timedelta64_dtype(values) + or is_object_dtype(values) + ): + arr = operator.neg(values) + else: + raise TypeError(f"Unary negative expects numeric dtype, not {values.dtype}") + return self.__array_wrap__(arr) + + def __pos__(self): + values = com.values_from_object(self) + if is_bool_dtype(values) or is_period_arraylike(values): + arr = values + elif ( + is_numeric_dtype(values) + or is_timedelta64_dtype(values) + or is_object_dtype(values) + ): + arr = operator.pos(values) + else: + raise TypeError(f"Unary plus expects numeric dtype, not {values.dtype}") + return self.__array_wrap__(arr) + + def __invert__(self): + if not self.size: + # inv fails with 0 len + return self + + new_data = self._data.apply(operator.invert) + result = self._constructor(new_data).__finalize__(self) + return result + + def __nonzero__(self): + raise ValueError( + f"The truth value of a {type(self).__name__} is ambiguous. " + "Use a.empty, a.bool(), a.item(), a.any() or a.all()." + ) + + __bool__ = __nonzero__ + + def bool(self): + """ + Return the bool of a single element PandasObject. + + This must be a boolean scalar value, either True or False. Raise a + ValueError if the PandasObject does not have exactly 1 element, or that + element is not boolean + + Returns + ------- + bool + Same single boolean value converted to bool type. + """ + v = self.squeeze() + if isinstance(v, (bool, np.bool_)): + return bool(v) + elif is_scalar(v): + raise ValueError( + "bool cannot act on a non-boolean single element " + f"{type(self).__name__}" + ) + + self.__nonzero__() + + def __abs__(self: FrameOrSeries) -> FrameOrSeries: + return self.abs() + + def __round__(self: FrameOrSeries, decimals: int = 0) -> FrameOrSeries: + return self.round(decimals) + + # ------------------------------------------------------------------------- + # Label or Level Combination Helpers + # + # A collection of helper methods for DataFrame/Series operations that + # accept a combination of column/index labels and levels. All such + # operations should utilize/extend these methods when possible so that we + # have consistent precedence and validation logic throughout the library. + + def _is_level_reference(self, key, axis=0): + """ + Test whether a key is a level reference for a given axis. + + To be considered a level reference, `key` must be a string that: + - (axis=0): Matches the name of an index level and does NOT match + a column label. + - (axis=1): Matches the name of a column level and does NOT match + an index label. + + Parameters + ---------- + key : str + Potential level name for the given axis + axis : int, default 0 + Axis that levels are associated with (0 for index, 1 for columns) + + Returns + ------- + is_level : bool + """ + axis = self._get_axis_number(axis) + + return ( + key is not None + and is_hashable(key) + and key in self.axes[axis].names + and not self._is_label_reference(key, axis=axis) + ) + + def _is_label_reference(self, key, axis=0) -> bool_t: + """ + Test whether a key is a label reference for a given axis. + + To be considered a label reference, `key` must be a string that: + - (axis=0): Matches a column label + - (axis=1): Matches an index label + + Parameters + ---------- + key: str + Potential label name + axis: int, default 0 + Axis perpendicular to the axis that labels are associated with + (0 means search for column labels, 1 means search for index labels) + + Returns + ------- + is_label: bool + """ + axis = self._get_axis_number(axis) + other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis) + + return ( + key is not None + and is_hashable(key) + and any(key in self.axes[ax] for ax in other_axes) + ) + + def _is_label_or_level_reference(self, key: str, axis: int = 0) -> bool_t: + """ + Test whether a key is a label or level reference for a given axis. + + To be considered either a label or a level reference, `key` must be a + string that: + - (axis=0): Matches a column label or an index level + - (axis=1): Matches an index label or a column level + + Parameters + ---------- + key: str + Potential label or level name + axis: int, default 0 + Axis that levels are associated with (0 for index, 1 for columns) + + Returns + ------- + is_label_or_level: bool + """ + return self._is_level_reference(key, axis=axis) or self._is_label_reference( + key, axis=axis + ) + + def _check_label_or_level_ambiguity(self, key, axis: int = 0) -> None: + """ + Check whether `key` is ambiguous. + + By ambiguous, we mean that it matches both a level of the input + `axis` and a label of the other axis. + + Parameters + ---------- + key: str or object + Label or level name. + axis: int, default 0 + Axis that levels are associated with (0 for index, 1 for columns). + + Raises + ------ + ValueError: `key` is ambiguous + """ + axis = self._get_axis_number(axis) + other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis) + + if ( + key is not None + and is_hashable(key) + and key in self.axes[axis].names + and any(key in self.axes[ax] for ax in other_axes) + ): + + # Build an informative and grammatical warning + level_article, level_type = ( + ("an", "index") if axis == 0 else ("a", "column") + ) + + label_article, label_type = ( + ("a", "column") if axis == 0 else ("an", "index") + ) + + msg = ( + f"'{key}' is both {level_article} {level_type} level and " + f"{label_article} {label_type} label, which is ambiguous." + ) + raise ValueError(msg) + + def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray: + """ + Return a 1-D array of values associated with `key`, a label or level + from the given `axis`. + + Retrieval logic: + - (axis=0): Return column values if `key` matches a column label. + Otherwise return index level values if `key` matches an index + level. + - (axis=1): Return row values if `key` matches an index label. + Otherwise return column level values if 'key' matches a column + level + + Parameters + ---------- + key: str + Label or level name. + axis: int, default 0 + Axis that levels are associated with (0 for index, 1 for columns) + + Returns + ------- + values: np.ndarray + + Raises + ------ + KeyError + if `key` matches neither a label nor a level + ValueError + if `key` matches multiple labels + FutureWarning + if `key` is ambiguous. This will become an ambiguity error in a + future version + """ + axis = self._get_axis_number(axis) + other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis] + + if self._is_label_reference(key, axis=axis): + self._check_label_or_level_ambiguity(key, axis=axis) + values = self.xs(key, axis=other_axes[0])._values + elif self._is_level_reference(key, axis=axis): + values = self.axes[axis].get_level_values(key)._values + else: + raise KeyError(key) + + # Check for duplicates + if values.ndim > 1: + + if other_axes and isinstance(self._get_axis(other_axes[0]), MultiIndex): + multi_message = ( + "\n" + "For a multi-index, the label must be a " + "tuple with elements corresponding to " + "each level." + ) + else: + multi_message = "" + + label_axis_name = "column" if axis == 0 else "index" + raise ValueError( + ( + f"The {label_axis_name} label '{key}' " + f"is not unique.{multi_message}" + ) + ) + + return values + + def _drop_labels_or_levels(self, keys, axis: int = 0): + """ + Drop labels and/or levels for the given `axis`. + + For each key in `keys`: + - (axis=0): If key matches a column label then drop the column. + Otherwise if key matches an index level then drop the level. + - (axis=1): If key matches an index label then drop the row. + Otherwise if key matches a column level then drop the level. + + Parameters + ---------- + keys: str or list of str + labels or levels to drop + axis: int, default 0 + Axis that levels are associated with (0 for index, 1 for columns) + + Returns + ------- + dropped: DataFrame + + Raises + ------ + ValueError + if any `keys` match neither a label nor a level + """ + axis = self._get_axis_number(axis) + + # Validate keys + keys = com.maybe_make_list(keys) + invalid_keys = [ + k for k in keys if not self._is_label_or_level_reference(k, axis=axis) + ] + + if invalid_keys: + raise ValueError( + ( + "The following keys are not valid labels or " + f"levels for axis {axis}: {invalid_keys}" + ) + ) + + # Compute levels and labels to drop + levels_to_drop = [k for k in keys if self._is_level_reference(k, axis=axis)] + + labels_to_drop = [k for k in keys if not self._is_level_reference(k, axis=axis)] + + # Perform copy upfront and then use inplace operations below. + # This ensures that we always perform exactly one copy. + # ``copy`` and/or ``inplace`` options could be added in the future. + dropped = self.copy() + + if axis == 0: + # Handle dropping index levels + if levels_to_drop: + dropped.reset_index(levels_to_drop, drop=True, inplace=True) + + # Handle dropping columns labels + if labels_to_drop: + dropped.drop(labels_to_drop, axis=1, inplace=True) + else: + # Handle dropping column levels + if levels_to_drop: + if isinstance(dropped.columns, MultiIndex): + # Drop the specified levels from the MultiIndex + dropped.columns = dropped.columns.droplevel(levels_to_drop) + else: + # Drop the last level of Index by replacing with + # a RangeIndex + dropped.columns = RangeIndex(dropped.columns.size) + + # Handle dropping index labels + if labels_to_drop: + dropped.drop(labels_to_drop, axis=0, inplace=True) + + return dropped + + # ---------------------------------------------------------------------- + # Iteration + + def __hash__(self): + raise TypeError( + f"{repr(type(self).__name__)} objects are mutable, " + f"thus they cannot be hashed" + ) + + def __iter__(self): + """ + Iterate over info axis. + + Returns + ------- + iterator + Info axis as iterator. + """ + return iter(self._info_axis) + + # can we get a better explanation of this? + def keys(self): + """ + Get the 'info axis' (see Indexing for more). + + This is index for Series, columns for DataFrame. + + Returns + ------- + Index + Info axis. + """ + return self._info_axis + + def items(self): + """Iterate over (label, values) on info axis + + This is index for Series and columns for DataFrame. + + Returns + ------- + Generator + """ + for h in self._info_axis: + yield h, self[h] + + @Appender(items.__doc__) + def iteritems(self): + return self.items() + + def __len__(self) -> int: + """Returns length of info axis""" + return len(self._info_axis) + + def __contains__(self, key) -> bool_t: + """True if the key is in the info axis""" + return key in self._info_axis + + @property + def empty(self) -> bool_t: + """ + Indicator whether DataFrame is empty. + + True if DataFrame is entirely empty (no items), meaning any of the + axes are of length 0. + + Returns + ------- + bool + If DataFrame is empty, return True, if not return False. + + See Also + -------- + Series.dropna + DataFrame.dropna + + Notes + ----- + If DataFrame contains only NaNs, it is still not considered empty. See + the example below. + + Examples + -------- + An example of an actual empty DataFrame. Notice the index is empty: + + >>> df_empty = pd.DataFrame({'A' : []}) + >>> df_empty + Empty DataFrame + Columns: [A] + Index: [] + >>> df_empty.empty + True + + If we only have NaNs in our DataFrame, it is not considered empty! We + will need to drop the NaNs to make the DataFrame empty: + + >>> df = pd.DataFrame({'A' : [np.nan]}) + >>> df + A + 0 NaN + >>> df.empty + False + >>> df.dropna().empty + True + """ + return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS) + + # ---------------------------------------------------------------------- + # Array Interface + + # This is also set in IndexOpsMixin + # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented + __array_priority__ = 1000 + + def __array__(self, dtype=None) -> np.ndarray: + return com.values_from_object(self) + + def __array_wrap__(self, result, context=None): + result = lib.item_from_zerodim(result) + if is_scalar(result): + # e.g. we get here with np.ptp(series) + # ptp also requires the item_from_zerodim + return result + d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False) + return self._constructor(result, **d).__finalize__(self) + + # ideally we would define this to avoid the getattr checks, but + # is slower + # @property + # def __array_interface__(self): + # """ provide numpy array interface method """ + # values = self.values + # return dict(typestr=values.dtype.str,shape=values.shape,data=values) + + # ---------------------------------------------------------------------- + # Picklability + + def __getstate__(self) -> Dict[str, Any]: + meta = {k: getattr(self, k, None) for k in self._metadata} + return dict( + _data=self._data, + _typ=self._typ, + _metadata=self._metadata, + attrs=self.attrs, + **meta, + ) + + def __setstate__(self, state): + + if isinstance(state, BlockManager): + self._data = state + elif isinstance(state, dict): + typ = state.get("_typ") + if typ is not None: + attrs = state.get("_attrs", {}) + object.__setattr__(self, "_attrs", attrs) + + # set in the order of internal names + # to avoid definitional recursion + # e.g. say fill_value needing _data to be + # defined + meta = set(self._internal_names + self._metadata) + for k in list(meta): + if k in state: + v = state[k] + object.__setattr__(self, k, v) + + for k, v in state.items(): + if k not in meta: + object.__setattr__(self, k, v) + + else: + self._unpickle_series_compat(state) + elif len(state) == 2: + self._unpickle_series_compat(state) + + self._item_cache = {} + + # ---------------------------------------------------------------------- + # Rendering Methods + + def __repr__(self) -> str: + # string representation based upon iterating over self + # (since, by definition, `PandasContainers` are iterable) + prepr = f"[{','.join(map(pprint_thing, self))}]" + return f"{type(self).__name__}({prepr})" + + def _repr_latex_(self): + """ + Returns a LaTeX representation for a particular object. + Mainly for use with nbconvert (jupyter notebook conversion to pdf). + """ + if config.get_option("display.latex.repr"): + return self.to_latex() + else: + return None + + def _repr_data_resource_(self): + """ + Not a real Jupyter special repr method, but we use the same + naming convention. + """ + if config.get_option("display.html.table_schema"): + data = self.head(config.get_option("display.max_rows")) + payload = json.loads( + data.to_json(orient="table"), object_pairs_hook=collections.OrderedDict + ) + return payload + + # ---------------------------------------------------------------------- + # I/O Methods + + _shared_docs[ + "to_markdown" + ] = """ + Print %(klass)s in Markdown-friendly format. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + buf : writable buffer, defaults to sys.stdout + Where to send the output. By default, the output is printed to + sys.stdout. Pass a writable buffer if you need to further process + the output. + mode : str, optional + Mode in which file is opened. + **kwargs + These parameters will be passed to `tabulate`. + + Returns + ------- + str + %(klass)s in Markdown-friendly format. + """ + + _shared_docs[ + "to_excel" + ] = """ + Write %(klass)s to an Excel sheet. + + To write a single %(klass)s to an Excel .xlsx file it is only necessary to + specify a target file name. To write to multiple sheets it is necessary to + create an `ExcelWriter` object with a target file name, and specify a sheet + in the file to write to. + + Multiple sheets may be written to by specifying unique `sheet_name`. + With all data written to the file it is necessary to save the changes. + Note that creating an `ExcelWriter` object with a file name that already + exists will result in the contents of the existing file being erased. + + Parameters + ---------- + excel_writer : str or ExcelWriter object + File path or existing ExcelWriter. + sheet_name : str, default 'Sheet1' + Name of sheet which will contain DataFrame. + na_rep : str, default '' + Missing data representation. + float_format : str, optional + Format string for floating point numbers. For example + ``float_format="%%.2f"`` will format 0.1234 to 0.12. + columns : sequence or list of str, optional + Columns to write. + header : bool or list of str, default True + Write out the column names. If a list of string is given it is + assumed to be aliases for the column names. + index : bool, default True + Write row names (index). + index_label : str or sequence, optional + Column label for index column(s) if desired. If not specified, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. + startrow : int, default 0 + Upper left cell row to dump data frame. + startcol : int, default 0 + Upper left cell column to dump data frame. + engine : str, optional + Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this + via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and + ``io.excel.xlsm.writer``. + merge_cells : bool, default True + Write MultiIndex and Hierarchical Rows as merged cells. + encoding : str, optional + Encoding of the resulting excel file. Only necessary for xlwt, + other writers support unicode natively. + inf_rep : str, default 'inf' + Representation for infinity (there is no native representation for + infinity in Excel). + verbose : bool, default True + Display more information in the error logs. + freeze_panes : tuple of int (length 2), optional + Specifies the one-based bottommost row and rightmost column that + is to be frozen. + + See Also + -------- + to_csv : Write DataFrame to a comma-separated values (csv) file. + ExcelWriter : Class for writing DataFrame objects into excel sheets. + read_excel : Read an Excel file into a pandas DataFrame. + read_csv : Read a comma-separated values (csv) file into DataFrame. + + Notes + ----- + For compatibility with :meth:`~DataFrame.to_csv`, + to_excel serializes lists and dicts to strings before writing. + + Once a workbook has been saved it is not possible write further data + without rewriting the whole workbook. + + Examples + -------- + + Create, write to and save a workbook: + + >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']], + ... index=['row 1', 'row 2'], + ... columns=['col 1', 'col 2']) + >>> df1.to_excel("output.xlsx") # doctest: +SKIP + + To specify the sheet name: + + >>> df1.to_excel("output.xlsx", + ... sheet_name='Sheet_name_1') # doctest: +SKIP + + If you wish to write to more than one sheet in the workbook, it is + necessary to specify an ExcelWriter object: + + >>> df2 = df1.copy() + >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP + ... df1.to_excel(writer, sheet_name='Sheet_name_1') + ... df2.to_excel(writer, sheet_name='Sheet_name_2') + + ExcelWriter can also be used to append to an existing Excel file: + + >>> with pd.ExcelWriter('output.xlsx', + ... mode='a') as writer: # doctest: +SKIP + ... df.to_excel(writer, sheet_name='Sheet_name_3') + + To set the library that is used to write the Excel file, + you can pass the `engine` keyword (the default engine is + automatically chosen depending on the file extension): + + >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP + """ + + @Appender(_shared_docs["to_excel"] % dict(klass="object")) + def to_excel( + self, + excel_writer, + sheet_name="Sheet1", + na_rep="", + float_format=None, + columns=None, + header=True, + index=True, + index_label=None, + startrow=0, + startcol=0, + engine=None, + merge_cells=True, + encoding=None, + inf_rep="inf", + verbose=True, + freeze_panes=None, + ) -> None: + df = self if isinstance(self, ABCDataFrame) else self.to_frame() + + from pandas.io.formats.excel import ExcelFormatter + + formatter = ExcelFormatter( + df, + na_rep=na_rep, + cols=columns, + header=header, + float_format=float_format, + index=index, + index_label=index_label, + merge_cells=merge_cells, + inf_rep=inf_rep, + ) + formatter.write( + excel_writer, + sheet_name=sheet_name, + startrow=startrow, + startcol=startcol, + freeze_panes=freeze_panes, + engine=engine, + ) + + def to_json( + self, + path_or_buf: Optional[FilePathOrBuffer] = None, + orient: Optional[str] = None, + date_format: Optional[str] = None, + double_precision: int = 10, + force_ascii: bool_t = True, + date_unit: str = "ms", + default_handler: Optional[Callable[[Any], JSONSerializable]] = None, + lines: bool_t = False, + compression: Optional[str] = "infer", + index: bool_t = True, + indent: Optional[int] = None, + ) -> Optional[str]: + """ + Convert the object to a JSON string. + + Note NaN's and None will be converted to null and datetime objects + will be converted to UNIX timestamps. + + Parameters + ---------- + path_or_buf : str or file handle, optional + File path or object. If not specified, the result is returned as + a string. + orient : str + Indication of expected JSON string format. + + * Series: + + - default is 'index' + - allowed values are: {'split','records','index','table'}. + + * DataFrame: + + - default is 'columns' + - allowed values are: {'split', 'records', 'index', 'columns', + 'values', 'table'}. + + * The format of the JSON string: + + - 'split' : dict like {'index' -> [index], 'columns' -> [columns], + 'data' -> [values]} + - 'records' : list like [{column -> value}, ... , {column -> value}] + - 'index' : dict like {index -> {column -> value}} + - 'columns' : dict like {column -> {index -> value}} + - 'values' : just the values array + - 'table' : dict like {'schema': {schema}, 'data': {data}} + + Describing the data, where data component is like ``orient='records'``. + + .. versionchanged:: 0.20.0 + + date_format : {None, 'epoch', 'iso'} + Type of date conversion. 'epoch' = epoch milliseconds, + 'iso' = ISO8601. The default depends on the `orient`. For + ``orient='table'``, the default is 'iso'. For all other orients, + the default is 'epoch'. + double_precision : int, default 10 + The number of decimal places to use when encoding + floating point values. + force_ascii : bool, default True + Force encoded string to be ASCII. + date_unit : str, default 'ms' (milliseconds) + The time unit to encode to, governs timestamp and ISO8601 + precision. One of 's', 'ms', 'us', 'ns' for second, millisecond, + microsecond, and nanosecond respectively. + default_handler : callable, default None + Handler to call if object cannot otherwise be converted to a + suitable format for JSON. Should receive a single argument which is + the object to convert and return a serialisable object. + lines : bool, default False + If 'orient' is 'records' write out line delimited json format. Will + throw ValueError if incorrect 'orient' since others are not list + like. + + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} + + A string representing the compression to use in the output file, + only used when the first argument is a filename. By default, the + compression is inferred from the filename. + + .. versionadded:: 0.21.0 + .. versionchanged:: 0.24.0 + 'infer' option added and set to default + index : bool, default True + Whether to include the index values in the JSON string. Not + including the index (``index=False``) is only supported when + orient is 'split' or 'table'. + + .. versionadded:: 0.23.0 + + indent : int, optional + Length of whitespace used to indent each record. + + .. versionadded:: 1.0.0 + + Returns + ------- + None or str + If path_or_buf is None, returns the resulting json format as a + string. Otherwise returns None. + + See Also + -------- + read_json + + Notes + ----- + The behavior of ``indent=0`` varies from the stdlib, which does not + indent the output but does insert newlines. Currently, ``indent=0`` + and the default ``indent=None`` are equivalent in pandas, though this + may change in a future release. + + Examples + -------- + + >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], + ... index=['row 1', 'row 2'], + ... columns=['col 1', 'col 2']) + >>> df.to_json(orient='split') + '{"columns":["col 1","col 2"], + "index":["row 1","row 2"], + "data":[["a","b"],["c","d"]]}' + + Encoding/decoding a Dataframe using ``'records'`` formatted JSON. + Note that index labels are not preserved with this encoding. + + >>> df.to_json(orient='records') + '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' + + Encoding/decoding a Dataframe using ``'index'`` formatted JSON: + + >>> df.to_json(orient='index') + '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' + + Encoding/decoding a Dataframe using ``'columns'`` formatted JSON: + + >>> df.to_json(orient='columns') + '{"col 1":{"row 1":"a","row 2":"c"},"col 2":{"row 1":"b","row 2":"d"}}' + + Encoding/decoding a Dataframe using ``'values'`` formatted JSON: + + >>> df.to_json(orient='values') + '[["a","b"],["c","d"]]' + + Encoding with Table Schema + + >>> df.to_json(orient='table') + '{"schema": {"fields": [{"name": "index", "type": "string"}, + {"name": "col 1", "type": "string"}, + {"name": "col 2", "type": "string"}], + "primaryKey": "index", + "pandas_version": "0.20.0"}, + "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, + {"index": "row 2", "col 1": "c", "col 2": "d"}]}' + """ + + from pandas.io import json + + if date_format is None and orient == "table": + date_format = "iso" + elif date_format is None: + date_format = "epoch" + + config.is_nonnegative_int(indent) + indent = indent or 0 + + return json.to_json( + path_or_buf=path_or_buf, + obj=self, + orient=orient, + date_format=date_format, + double_precision=double_precision, + force_ascii=force_ascii, + date_unit=date_unit, + default_handler=default_handler, + lines=lines, + compression=compression, + index=index, + indent=indent, + ) + + def to_hdf( + self, + path_or_buf, + key: str, + mode: str = "a", + complevel: Optional[int] = None, + complib: Optional[str] = None, + append: bool_t = False, + format: Optional[str] = None, + index: bool_t = True, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + nan_rep=None, + dropna: Optional[bool_t] = None, + data_columns: Optional[List[str]] = None, + errors: str = "strict", + encoding: str = "UTF-8", + ) -> None: + """ + Write the contained data to an HDF5 file using HDFStore. + + Hierarchical Data Format (HDF) is self-describing, allowing an + application to interpret the structure and contents of a file with + no outside information. One HDF file can hold a mix of related objects + which can be accessed as a group or as individual objects. + + In order to add another DataFrame or Series to an existing HDF file + please use append mode and a different a key. + + For more information see the :ref:`user guide `. + + Parameters + ---------- + path_or_buf : str or pandas.HDFStore + File path or HDFStore object. + key : str + Identifier for the group in the store. + mode : {'a', 'w', 'r+'}, default 'a' + Mode to open file: + + - 'w': write, a new file is created (an existing file with + the same name would be deleted). + - 'a': append, an existing file is opened for reading and + writing, and if the file does not exist it is created. + - 'r+': similar to 'a', but the file must already exist. + complevel : {0-9}, optional + Specifies a compression level for data. + A value of 0 disables compression. + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' + Specifies the compression library to be used. + As of v0.20.2 these additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. + append : bool, default False + For Table formats, append the input data to the existing. + format : {'fixed', 'table', None}, default 'fixed' + Possible values: + + - 'fixed': Fixed format. Fast writing/reading. Not-appendable, + nor searchable. + - 'table': Table format. Write as a PyTables Table structure + which may perform worse but allow more flexible operations + like searching / selecting subsets of the data. + - If None, pd.get_option('io.hdf.default_format') is checked, + followed by fallback to "fixed" + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + encoding : str, default "UTF-8" + min_itemsize : dict or int, optional + Map column names to minimum string sizes for columns. + nan_rep : Any, optional + How to represent null values as str. + Not allowed with append=True. + data_columns : list of columns or True, optional + List of columns to create as indexed data columns for on-disk + queries, or True to use all columns. By default only the axes + of the object are indexed. See :ref:`io.hdf5-query-data-columns`. + Applicable only to format='table'. + + See Also + -------- + DataFrame.read_hdf : Read from HDF file. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + DataFrame.to_sql : Write to a sql table. + DataFrame.to_feather : Write out feather-format for DataFrames. + DataFrame.to_csv : Write out to a csv file. + + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + ... index=['a', 'b', 'c']) + >>> df.to_hdf('data.h5', key='df', mode='w') + + We can add another object to the same file: + + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.to_hdf('data.h5', key='s') + + Reading from HDF file: + + >>> pd.read_hdf('data.h5', 'df') + A B + a 1 4 + b 2 5 + c 3 6 + >>> pd.read_hdf('data.h5', 's') + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + Deleting file with data: + + >>> import os + >>> os.remove('data.h5') + """ + from pandas.io import pytables + + pytables.to_hdf( + path_or_buf, + key, + self, + mode=mode, + complevel=complevel, + complib=complib, + append=append, + format=format, + index=index, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + dropna=dropna, + data_columns=data_columns, + errors=errors, + encoding=encoding, + ) + + def to_sql( + self, + name: str, + con, + schema=None, + if_exists: str = "fail", + index: bool_t = True, + index_label=None, + chunksize=None, + dtype=None, + method=None, + ) -> None: + """ + Write records stored in a DataFrame to a SQL database. + + Databases supported by SQLAlchemy [1]_ are supported. Tables can be + newly created, appended to, or overwritten. + + Parameters + ---------- + name : str + Name of SQL table. + con : sqlalchemy.engine.Engine or sqlite3.Connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. Legacy support is provided for sqlite3.Connection objects. The user + is responsible for engine disposal and connection closure for the SQLAlchemy + connectable See `here \ + `_ + + schema : str, optional + Specify the schema (if database flavor supports this). If None, use + default schema. + if_exists : {'fail', 'replace', 'append'}, default 'fail' + How to behave if the table already exists. + + * fail: Raise a ValueError. + * replace: Drop the table before inserting new values. + * append: Insert new values to the existing table. + + index : bool, default True + Write DataFrame index as a column. Uses `index_label` as the column + name in the table. + index_label : str or sequence, default None + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + chunksize : int, optional + Specify the number of rows in each batch to be written at a time. + By default, all rows will be written at once. + dtype : dict or scalar, optional + Specifying the datatype for columns. If a dictionary is used, the + keys should be the column names and the values should be the + SQLAlchemy types or strings for the sqlite3 legacy mode. If a + scalar is provided, it will be applied to all columns. + method : {None, 'multi', callable}, optional + Controls the SQL insertion clause used: + + * None : Uses standard SQL ``INSERT`` clause (one per row). + * 'multi': Pass multiple values in a single ``INSERT`` clause. + * callable with signature ``(pd_table, conn, keys, data_iter)``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + + .. versionadded:: 0.24.0 + + Raises + ------ + ValueError + When the table already exists and `if_exists` is 'fail' (the + default). + + See Also + -------- + read_sql : Read a DataFrame from a table. + + Notes + ----- + Timezone aware datetime columns will be written as + ``Timestamp with timezone`` type with SQLAlchemy if supported by the + database. Otherwise, the datetimes will be stored as timezone unaware + timestamps local to the original timezone. + + .. versionadded:: 0.24.0 + + References + ---------- + .. [1] http://docs.sqlalchemy.org + .. [2] https://www.python.org/dev/peps/pep-0249/ + + Examples + -------- + + Create an in-memory SQLite database. + + >>> from sqlalchemy import create_engine + >>> engine = create_engine('sqlite://', echo=False) + + Create a table from scratch with 3 rows. + + >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']}) + >>> df + name + 0 User 1 + 1 User 2 + 2 User 3 + + >>> df.to_sql('users', con=engine) + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')] + + >>> df1 = pd.DataFrame({'name' : ['User 4', 'User 5']}) + >>> df1.to_sql('users', con=engine, if_exists='append') + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'), + (0, 'User 4'), (1, 'User 5')] + + Overwrite the table with just ``df1``. + + >>> df1.to_sql('users', con=engine, if_exists='replace', + ... index_label='id') + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 4'), (1, 'User 5')] + + Specify the dtype (especially useful for integers with missing values). + Notice that while pandas is forced to store the data as floating point, + the database supports nullable integers. When fetching the data with + Python, we get back integer scalars. + + >>> df = pd.DataFrame({"A": [1, None, 2]}) + >>> df + A + 0 1.0 + 1 NaN + 2 2.0 + + >>> from sqlalchemy.types import Integer + >>> df.to_sql('integers', con=engine, index=False, + ... dtype={"A": Integer()}) + + >>> engine.execute("SELECT * FROM integers").fetchall() + [(1,), (None,), (2,)] + """ + from pandas.io import sql + + sql.to_sql( + self, + name, + con, + schema=schema, + if_exists=if_exists, + index=index, + index_label=index_label, + chunksize=chunksize, + dtype=dtype, + method=method, + ) + + def to_pickle( + self, + path, + compression: Optional[str] = "infer", + protocol: int = pickle.HIGHEST_PROTOCOL, + ) -> None: + """ + Pickle (serialize) object to file. + + Parameters + ---------- + path : str + File path where the pickled object will be stored. + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, \ + default 'infer' + A string representing the compression to use in the output file. By + default, infers from the file extension in specified path. + protocol : int + Int which indicates which protocol should be used by the pickler, + default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible + values are 0, 1, 2, 3, 4. A negative value for the protocol + parameter is equivalent to setting its value to HIGHEST_PROTOCOL. + + .. [1] https://docs.python.org/3/library/pickle.html. + .. versionadded:: 0.21.0. + + See Also + -------- + read_pickle : Load pickled pandas object (or any object) from file. + DataFrame.to_hdf : Write DataFrame to an HDF5 file. + DataFrame.to_sql : Write DataFrame to a SQL database. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + + Examples + -------- + >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)}) + >>> original_df + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + >>> original_df.to_pickle("./dummy.pkl") + + >>> unpickled_df = pd.read_pickle("./dummy.pkl") + >>> unpickled_df + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + + >>> import os + >>> os.remove("./dummy.pkl") + """ + from pandas.io.pickle import to_pickle + + to_pickle(self, path, compression=compression, protocol=protocol) + + def to_clipboard( + self, excel: bool_t = True, sep: Optional[str] = None, **kwargs + ) -> None: + r""" + Copy object to the system clipboard. + + Write a text representation of object to the system clipboard. + This can be pasted into Excel, for example. + + Parameters + ---------- + excel : bool, default True + Produce output in a csv format for easy pasting into excel. + + - True, use the provided separator for csv pasting. + - False, write a string representation of the object to the clipboard. + + sep : str, default ``'\t'`` + Field delimiter. + **kwargs + These parameters will be passed to DataFrame.to_csv. + + See Also + -------- + DataFrame.to_csv : Write a DataFrame to a comma-separated values + (csv) file. + read_clipboard : Read text from clipboard and pass to read_table. + + Notes + ----- + Requirements for your platform. + + - Linux : `xclip`, or `xsel` (with `PyQt4` modules) + - Windows : none + - OS X : none + + Examples + -------- + Copy the contents of a DataFrame to the clipboard. + + >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) + >>> df.to_clipboard(sep=',') + ... # Wrote the following to the system clipboard: + ... # ,A,B,C + ... # 0,1,2,3 + ... # 1,4,5,6 + + We can omit the the index by passing the keyword `index` and setting + it to false. + + >>> df.to_clipboard(sep=',', index=False) + ... # Wrote the following to the system clipboard: + ... # A,B,C + ... # 1,2,3 + ... # 4,5,6 + """ + from pandas.io import clipboards + + clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs) + + def to_xarray(self): + """ + Return an xarray object from the pandas object. + + Returns + ------- + xarray.DataArray or xarray.Dataset + Data in the pandas structure converted to Dataset if the object is + a DataFrame, or a DataArray if the object is a Series. + + See Also + -------- + DataFrame.to_hdf : Write DataFrame to an HDF5 file. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + + Notes + ----- + See the `xarray docs `__ + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2), + ... ('parrot', 'bird', 24.0, 2), + ... ('lion', 'mammal', 80.5, 4), + ... ('monkey', 'mammal', np.nan, 4)], + ... columns=['name', 'class', 'max_speed', + ... 'num_legs']) + >>> df + name class max_speed num_legs + 0 falcon bird 389.0 2 + 1 parrot bird 24.0 2 + 2 lion mammal 80.5 4 + 3 monkey mammal NaN 4 + + >>> df.to_xarray() + + Dimensions: (index: 4) + Coordinates: + * index (index) int64 0 1 2 3 + Data variables: + name (index) object 'falcon' 'parrot' 'lion' 'monkey' + class (index) object 'bird' 'bird' 'mammal' 'mammal' + max_speed (index) float64 389.0 24.0 80.5 nan + num_legs (index) int64 2 2 4 4 + + >>> df['max_speed'].to_xarray() + + array([389. , 24. , 80.5, nan]) + Coordinates: + * index (index) int64 0 1 2 3 + + >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01', + ... '2018-01-02', '2018-01-02']) + >>> df_multiindex = pd.DataFrame({'date': dates, + ... 'animal': ['falcon', 'parrot', + ... 'falcon', 'parrot'], + ... 'speed': [350, 18, 361, 15]}) + >>> df_multiindex = df_multiindex.set_index(['date', 'animal']) + + >>> df_multiindex + speed + date animal + 2018-01-01 falcon 350 + parrot 18 + 2018-01-02 falcon 361 + parrot 15 + + >>> df_multiindex.to_xarray() + + Dimensions: (animal: 2, date: 2) + Coordinates: + * date (date) datetime64[ns] 2018-01-01 2018-01-02 + * animal (animal) object 'falcon' 'parrot' + Data variables: + speed (date, animal) int64 350 18 361 15 + """ + xarray = import_optional_dependency("xarray") + + if self.ndim == 1: + return xarray.DataArray.from_series(self) + else: + return xarray.Dataset.from_dataframe(self) + + @Substitution(returns=fmt.return_docstring) + def to_latex( + self, + buf=None, + columns=None, + col_space=None, + header=True, + index=True, + na_rep="NaN", + formatters=None, + float_format=None, + sparsify=None, + index_names=True, + bold_rows=False, + column_format=None, + longtable=None, + escape=None, + encoding=None, + decimal=".", + multicolumn=None, + multicolumn_format=None, + multirow=None, + caption=None, + label=None, + ): + r""" + Render object to a LaTeX tabular, longtable, or nested table/tabular. + + Requires ``\usepackage{booktabs}``. The output can be copy/pasted + into a main LaTeX document or read from an external file + with ``\input{table.tex}``. + + .. versionchanged:: 0.20.2 + Added to Series. + + .. versionchanged:: 1.0.0 + Added caption and label arguments. + + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + columns : list of label, optional + The subset of columns to write. Writes all columns by default. + col_space : int, optional + The minimum width of each column. + header : bool or list of str, default True + Write out the column names. If a list of strings is given, + it is assumed to be aliases for the column names. + index : bool, default True + Write row names (index). + na_rep : str, default 'NaN' + Missing data representation. + formatters : list of functions or dict of {str: function}, optional + Formatter functions to apply to columns' elements by position or + name. The result of each function must be a unicode string. + List must be of length equal to the number of columns. + float_format : one-parameter function or str, optional, default None + Formatter for floating point numbers. For example + ``float_format="%%.2f"`` and ``float_format="{:0.2f}".format`` will + both result in 0.1234 being formatted as 0.12. + sparsify : bool, optional + Set to False for a DataFrame with a hierarchical index to print + every multiindex key at each row. By default, the value will be + read from the config module. + index_names : bool, default True + Prints the names of the indexes. + bold_rows : bool, default False + Make the row labels bold in the output. + column_format : str, optional + The columns format as specified in `LaTeX table format + `__ e.g. 'rcl' for 3 + columns. By default, 'l' will be used for all columns except + columns of numbers, which default to 'r'. + longtable : bool, optional + By default, the value will be read from the pandas config + module. Use a longtable environment instead of tabular. Requires + adding a \usepackage{longtable} to your LaTeX preamble. + escape : bool, optional + By default, the value will be read from the pandas config + module. When set to False prevents from escaping latex special + characters in column names. + encoding : str, optional + A string representing the encoding to use in the output file, + defaults to 'utf-8'. + decimal : str, default '.' + Character recognized as decimal separator, e.g. ',' in Europe. + multicolumn : bool, default True + Use \multicolumn to enhance MultiIndex columns. + The default will be read from the config module. + multicolumn_format : str, default 'l' + The alignment for multicolumns, similar to `column_format` + The default will be read from the config module. + multirow : bool, default False + Use \multirow to enhance MultiIndex rows. Requires adding a + \usepackage{multirow} to your LaTeX preamble. Will print + centered labels (instead of top-aligned) across the contained + rows, separating groups via clines. The default will be read + from the pandas config module. + caption : str, optional + The LaTeX caption to be placed inside ``\caption{}`` in the output. + + .. versionadded:: 1.0.0 + + label : str, optional + The LaTeX label to be placed inside ``\label{}`` in the output. + This is used with ``\ref{}`` in the main ``.tex`` file. + + .. versionadded:: 1.0.0 + %(returns)s + See Also + -------- + DataFrame.to_string : Render a DataFrame to a console-friendly + tabular output. + DataFrame.to_html : Render a DataFrame as an HTML table. + + Examples + -------- + >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'], + ... 'mask': ['red', 'purple'], + ... 'weapon': ['sai', 'bo staff']}) + >>> print(df.to_latex(index=False)) # doctest: +NORMALIZE_WHITESPACE + \begin{tabular}{lll} + \toprule + name & mask & weapon \\ + \midrule + Raphael & red & sai \\ + Donatello & purple & bo staff \\ + \bottomrule + \end{tabular} + """ + # Get defaults from the pandas config + if self.ndim == 1: + self = self.to_frame() + if longtable is None: + longtable = config.get_option("display.latex.longtable") + if escape is None: + escape = config.get_option("display.latex.escape") + if multicolumn is None: + multicolumn = config.get_option("display.latex.multicolumn") + if multicolumn_format is None: + multicolumn_format = config.get_option("display.latex.multicolumn_format") + if multirow is None: + multirow = config.get_option("display.latex.multirow") + + formatter = DataFrameFormatter( + self, + columns=columns, + col_space=col_space, + na_rep=na_rep, + header=header, + index=index, + formatters=formatters, + float_format=float_format, + bold_rows=bold_rows, + sparsify=sparsify, + index_names=index_names, + escape=escape, + decimal=decimal, + ) + return formatter.to_latex( + buf=buf, + column_format=column_format, + longtable=longtable, + encoding=encoding, + multicolumn=multicolumn, + multicolumn_format=multicolumn_format, + multirow=multirow, + caption=caption, + label=label, + ) + + def to_csv( + self, + path_or_buf: Optional[FilePathOrBuffer] = None, + sep: str = ",", + na_rep: str = "", + float_format: Optional[str] = None, + columns: Optional[Sequence[Optional[Hashable]]] = None, + header: Union[bool_t, List[str]] = True, + index: bool_t = True, + index_label: Optional[Union[bool_t, str, Sequence[Optional[Hashable]]]] = None, + mode: str = "w", + encoding: Optional[str] = None, + compression: Optional[Union[str, Mapping[str, str]]] = "infer", + quoting: Optional[int] = None, + quotechar: str = '"', + line_terminator: Optional[str] = None, + chunksize: Optional[int] = None, + date_format: Optional[str] = None, + doublequote: bool_t = True, + escapechar: Optional[str] = None, + decimal: Optional[str] = ".", + ) -> Optional[str]: + r""" + Write object to a comma-separated values (csv) file. + + .. versionchanged:: 0.24.0 + The order of arguments for Series was changed. + + Parameters + ---------- + path_or_buf : str or file handle, default None + File path or object, if None is provided the result is returned as + a string. If a file object is passed it should be opened with + `newline=''`, disabling universal newlines. + + .. versionchanged:: 0.24.0 + + Was previously named "path" for Series. + + sep : str, default ',' + String of length 1. Field delimiter for the output file. + na_rep : str, default '' + Missing data representation. + float_format : str, default None + Format string for floating point numbers. + columns : sequence, optional + Columns to write. + header : bool or list of str, default True + Write out the column names. If a list of strings is given it is + assumed to be aliases for the column names. + + .. versionchanged:: 0.24.0 + + Previously defaulted to False for Series. + + index : bool, default True + Write row names (index). + index_label : str or sequence, or False, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the object uses MultiIndex. If + False do not print fields for index names. Use index_label=False + for easier importing in R. + mode : str + Python write mode, default 'w'. + encoding : str, optional + A string representing the encoding to use in the output file, + defaults to 'utf-8'. + compression : str or dict, default 'infer' + If str, represents compression mode. If dict, value at 'method' is + the compression mode. Compression mode may be any of the following + possible values: {'infer', 'gzip', 'bz2', 'zip', 'xz', None}. If + compression mode is 'infer' and `path_or_buf` is path-like, then + detect compression mode from the following extensions: '.gz', + '.bz2', '.zip' or '.xz'. (otherwise no compression). If dict given + and mode is 'zip' or inferred as 'zip', other entries passed as + additional compression options. + + .. versionchanged:: 1.0.0 + + May now be a dict with key 'method' as compression mode + and other entries as additional compression options if + compression mode is 'zip'. + + quoting : optional constant from csv module + Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` + then floats are converted to strings and thus csv.QUOTE_NONNUMERIC + will treat them as non-numeric. + quotechar : str, default '\"' + String of length 1. Character used to quote fields. + line_terminator : str, optional + The newline character or character sequence to use in the output + file. Defaults to `os.linesep`, which depends on the OS in which + this method is called ('\n' for linux, '\r\n' for Windows, i.e.). + + .. versionchanged:: 0.24.0 + chunksize : int or None + Rows to write at a time. + date_format : str, default None + Format string for datetime objects. + doublequote : bool, default True + Control quoting of `quotechar` inside a field. + escapechar : str, default None + String of length 1. Character used to escape `sep` and `quotechar` + when appropriate. + decimal : str, default '.' + Character recognized as decimal separator. E.g. use ',' for + European data. + + Returns + ------- + None or str + If path_or_buf is None, returns the resulting csv format as a + string. Otherwise returns None. + + See Also + -------- + read_csv : Load a CSV file into a DataFrame. + to_excel : Write DataFrame to an Excel file. + + Examples + -------- + >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'], + ... 'mask': ['red', 'purple'], + ... 'weapon': ['sai', 'bo staff']}) + >>> df.to_csv(index=False) + 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n' + + Create 'out.zip' containing 'out.csv' + + >>> compression_opts = dict(method='zip', + ... archive_name='out.csv') # doctest: +SKIP + >>> df.to_csv('out.zip', index=False, + ... compression=compression_opts) # doctest: +SKIP + """ + + df = self if isinstance(self, ABCDataFrame) else self.to_frame() + + from pandas.io.formats.csvs import CSVFormatter + + formatter = CSVFormatter( + df, + path_or_buf, + line_terminator=line_terminator, + sep=sep, + encoding=encoding, + compression=compression, + quoting=quoting, + na_rep=na_rep, + float_format=float_format, + cols=columns, + header=header, + index=index, + index_label=index_label, + mode=mode, + chunksize=chunksize, + quotechar=quotechar, + date_format=date_format, + doublequote=doublequote, + escapechar=escapechar, + decimal=decimal, + ) + formatter.save() + + if path_or_buf is None: + return formatter.path_or_buf.getvalue() + + return None + + # ---------------------------------------------------------------------- + # Fancy Indexing + + @classmethod + def _create_indexer(cls, name: str, indexer) -> None: + """Create an indexer like _name in the class. + + Kept for compatibility with geopandas. To be removed in the future. See GH27258 + """ + if getattr(cls, name, None) is None: + _indexer = functools.partial(indexer, name) + setattr(cls, name, property(_indexer, doc=indexer.__doc__)) + + # ---------------------------------------------------------------------- + # Lookup Caching + + def _set_as_cached(self, item, cacher) -> None: + """Set the _cacher attribute on the calling object with a weakref to + cacher. + """ + self._cacher = (item, weakref.ref(cacher)) + + def _reset_cacher(self) -> None: + """Reset the cacher.""" + if hasattr(self, "_cacher"): + del self._cacher + + def _maybe_cache_changed(self, item, value) -> None: + """The object has called back to us saying maybe it has changed. + """ + self._data.set(item, value) + + @property + def _is_cached(self) -> bool_t: + """Return boolean indicating if self is cached or not.""" + return getattr(self, "_cacher", None) is not None + + def _get_cacher(self): + """return my cacher or None""" + cacher = getattr(self, "_cacher", None) + if cacher is not None: + cacher = cacher[1]() + return cacher + + def _maybe_update_cacher( + self, clear: bool_t = False, verify_is_copy: bool_t = True + ) -> None: + """ + See if we need to update our parent cacher if clear, then clear our + cache. + + Parameters + ---------- + clear : bool, default False + Clear the item cache. + verify_is_copy : bool, default True + Provide is_copy checks. + """ + + cacher = getattr(self, "_cacher", None) + if cacher is not None: + ref = cacher[1]() + + # we are trying to reference a dead referant, hence + # a copy + if ref is None: + del self._cacher + else: + # Note: we need to call ref._maybe_cache_changed even in the + # case where it will raise. (Uh, not clear why) + try: + ref._maybe_cache_changed(cacher[0], self) + except AssertionError: + # ref._data.setitem can raise + # AssertionError because of shape mismatch + pass + + if verify_is_copy: + self._check_setitem_copy(stacklevel=5, t="referant") + + if clear: + self._clear_item_cache() + + def _clear_item_cache(self) -> None: + self._item_cache.clear() + + # ---------------------------------------------------------------------- + # Indexing Methods + + def take( + self: FrameOrSeries, indices, axis=0, is_copy: Optional[bool_t] = None, **kwargs + ) -> FrameOrSeries: + """ + Return the elements in the given *positional* indices along an axis. + + This means that we are not indexing according to actual values in + the index attribute of the object. We are indexing according to the + actual position of the element in the object. + + Parameters + ---------- + indices : array-like + An array of ints indicating which positions to take. + axis : {0 or 'index', 1 or 'columns', None}, default 0 + The axis on which to select elements. ``0`` means that we are + selecting rows, ``1`` means that we are selecting columns. + is_copy : bool + Before pandas 1.0, ``is_copy=False`` can be specified to ensure + that the return value is an actual copy. Starting with pandas 1.0, + ``take`` always returns a copy, and the keyword is therefore + deprecated. + + .. deprecated:: 1.0.0 + **kwargs + For compatibility with :meth:`numpy.take`. Has no effect on the + output. + + Returns + ------- + taken : same type as caller + An array-like containing the elements taken from the object. + + See Also + -------- + DataFrame.loc : Select a subset of a DataFrame by labels. + DataFrame.iloc : Select a subset of a DataFrame by positions. + numpy.take : Take elements from an array along an axis. + + Examples + -------- + >>> df = pd.DataFrame([('falcon', 'bird', 389.0), + ... ('parrot', 'bird', 24.0), + ... ('lion', 'mammal', 80.5), + ... ('monkey', 'mammal', np.nan)], + ... columns=['name', 'class', 'max_speed'], + ... index=[0, 2, 3, 1]) + >>> df + name class max_speed + 0 falcon bird 389.0 + 2 parrot bird 24.0 + 3 lion mammal 80.5 + 1 monkey mammal NaN + + Take elements at positions 0 and 3 along the axis 0 (default). + + Note how the actual indices selected (0 and 1) do not correspond to + our selected indices 0 and 3. That's because we are selecting the 0th + and 3rd rows, not rows whose indices equal 0 and 3. + + >>> df.take([0, 3]) + name class max_speed + 0 falcon bird 389.0 + 1 monkey mammal NaN + + Take elements at indices 1 and 2 along the axis 1 (column selection). + + >>> df.take([1, 2], axis=1) + class max_speed + 0 bird 389.0 + 2 bird 24.0 + 3 mammal 80.5 + 1 mammal NaN + + We may take elements using negative integers for positive indices, + starting from the end of the object, just like with Python lists. + + >>> df.take([-1, -2]) + name class max_speed + 1 monkey mammal NaN + 3 lion mammal 80.5 + """ + if is_copy is not None: + warnings.warn( + "is_copy is deprecated and will be removed in a future version. " + "'take' always returns a copy, so there is no need to specify this.", + FutureWarning, + stacklevel=2, + ) + + nv.validate_take(tuple(), kwargs) + + self._consolidate_inplace() + + new_data = self._data.take( + indices, axis=self._get_block_manager_axis(axis), verify=True + ) + return self._constructor(new_data).__finalize__(self) + + def _take_with_is_copy( + self: FrameOrSeries, indices, axis=0, **kwargs + ) -> FrameOrSeries: + """ + Internal version of the `take` method that sets the `_is_copy` + attribute to keep track of the parent dataframe (using in indexing + for the SettingWithCopyWarning). + + See the docstring of `take` for full explanation of the parameters. + """ + result = self.take(indices=indices, axis=axis, **kwargs) + # Maybe set copy if we didn't actually change the index. + if not result._get_axis(axis).equals(self._get_axis(axis)): + result._set_is_copy(self) + return result + + def xs(self, key, axis=0, level=None, drop_level: bool_t = True): + """ + Return cross-section from the Series/DataFrame. + + This method takes a `key` argument to select data at a particular + level of a MultiIndex. + + Parameters + ---------- + key : label or tuple of label + Label contained in the index, or partially in a MultiIndex. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Axis to retrieve cross-section on. + level : object, defaults to first n levels (n=1 or len(key)) + In case of a key partially contained in a MultiIndex, indicate + which levels are used. Levels can be referred by label or position. + drop_level : bool, default True + If False, returns object with same levels as self. + + Returns + ------- + Series or DataFrame + Cross-section from the original Series or DataFrame + corresponding to the selected index levels. + + See Also + -------- + DataFrame.loc : Access a group of rows and columns + by label(s) or a boolean array. + DataFrame.iloc : Purely integer-location based indexing + for selection by position. + + Notes + ----- + `xs` can not be used to set values. + + MultiIndex Slicers is a generic way to get/set values on + any level or levels. + It is a superset of `xs` functionality, see + :ref:`MultiIndex Slicers `. + + Examples + -------- + >>> d = {'num_legs': [4, 4, 2, 2], + ... 'num_wings': [0, 0, 2, 2], + ... 'class': ['mammal', 'mammal', 'mammal', 'bird'], + ... 'animal': ['cat', 'dog', 'bat', 'penguin'], + ... 'locomotion': ['walks', 'walks', 'flies', 'walks']} + >>> df = pd.DataFrame(data=d) + >>> df = df.set_index(['class', 'animal', 'locomotion']) + >>> df + num_legs num_wings + class animal locomotion + mammal cat walks 4 0 + dog walks 4 0 + bat flies 2 2 + bird penguin walks 2 2 + + Get values at specified index + + >>> df.xs('mammal') + num_legs num_wings + animal locomotion + cat walks 4 0 + dog walks 4 0 + bat flies 2 2 + + Get values at several indexes + + >>> df.xs(('mammal', 'dog')) + num_legs num_wings + locomotion + walks 4 0 + + Get values at specified index and level + + >>> df.xs('cat', level=1) + num_legs num_wings + class locomotion + mammal walks 4 0 + + Get values at several indexes and levels + + >>> df.xs(('bird', 'walks'), + ... level=[0, 'locomotion']) + num_legs num_wings + animal + penguin 2 2 + + Get values at specified column and axis + + >>> df.xs('num_wings', axis=1) + class animal locomotion + mammal cat walks 0 + dog walks 0 + bat flies 2 + bird penguin walks 2 + Name: num_wings, dtype: int64 + """ + axis = self._get_axis_number(axis) + labels = self._get_axis(axis) + if level is not None: + loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level) + + # create the tuple of the indexer + _indexer = [slice(None)] * self.ndim + _indexer[axis] = loc + indexer = tuple(_indexer) + + result = self.iloc[indexer] + setattr(result, result._get_axis_name(axis), new_ax) + return result + + if axis == 1: + return self[key] + + self._consolidate_inplace() + + index = self.index + if isinstance(index, MultiIndex): + loc, new_index = self.index.get_loc_level(key, drop_level=drop_level) + else: + loc = self.index.get_loc(key) + + if isinstance(loc, np.ndarray): + if loc.dtype == np.bool_: + (inds,) = loc.nonzero() + return self._take_with_is_copy(inds, axis=axis) + else: + return self._take_with_is_copy(loc, axis=axis) + + if not is_scalar(loc): + new_index = self.index[loc] + + if is_scalar(loc): + new_values = self._data.fast_xs(loc) + + # may need to box a datelike-scalar + # + # if we encounter an array-like and we only have 1 dim + # that means that their are list/ndarrays inside the Series! + # so just return them (GH 6394) + if not is_list_like(new_values) or self.ndim == 1: + return com.maybe_box_datetimelike(new_values) + + result = self._constructor_sliced( + new_values, + index=self.columns, + name=self.index[loc], + dtype=new_values.dtype, + ) + + else: + result = self.iloc[loc] + result.index = new_index + + # this could be a view + # but only in a single-dtyped view sliceable case + result._set_is_copy(self, copy=not result._is_view) + return result + + _xs: Callable = xs + + def __getitem__(self, item): + raise AbstractMethodError(self) + + def _get_item_cache(self, item): + """Return the cached item, item represents a label indexer.""" + cache = self._item_cache + res = cache.get(item) + if res is None: + values = self._data.get(item) + res = self._box_item_values(item, values) + cache[item] = res + res._set_as_cached(item, self) + + # for a chain + res._is_copy = self._is_copy + return res + + def _iget_item_cache(self, item): + """Return the cached item, item represents a positional indexer.""" + ax = self._info_axis + if ax.is_unique: + lower = self._get_item_cache(ax[item]) + else: + lower = self._take_with_is_copy(item, axis=self._info_axis_number) + return lower + + def _box_item_values(self, key, values): + raise AbstractMethodError(self) + + def _slice(self: FrameOrSeries, slobj: slice, axis=0, kind=None) -> FrameOrSeries: + """ + Construct a slice of this container. + + kind parameter is maintained for compatibility with Series slicing. + """ + axis = self._get_block_manager_axis(axis) + result = self._constructor(self._data.get_slice(slobj, axis=axis)) + result = result.__finalize__(self) + + # this could be a view + # but only in a single-dtyped view sliceable case + is_copy = axis != 0 or result._is_view + result._set_is_copy(self, copy=is_copy) + return result + + def _set_item(self, key, value) -> None: + self._data.set(key, value) + self._clear_item_cache() + + def _set_is_copy(self, ref=None, copy: bool_t = True) -> None: + if not copy: + self._is_copy = None + else: + if ref is not None: + self._is_copy = weakref.ref(ref) + else: + self._is_copy = None + + def _check_is_chained_assignment_possible(self) -> bool_t: + """ + Check if we are a view, have a cacher, and are of mixed type. + If so, then force a setitem_copy check. + + Should be called just near setting a value + + Will return a boolean if it we are a view and are cached, but a + single-dtype meaning that the cacher should be updated following + setting. + """ + if self._is_view and self._is_cached: + ref = self._get_cacher() + if ref is not None and ref._is_mixed_type: + self._check_setitem_copy(stacklevel=4, t="referant", force=True) + return True + elif self._is_copy: + self._check_setitem_copy(stacklevel=4, t="referant") + return False + + def _check_setitem_copy(self, stacklevel=4, t="setting", force=False): + """ + + Parameters + ---------- + stacklevel : int, default 4 + the level to show of the stack when the error is output + t : str, the type of setting error + force : bool, default False + If True, then force showing an error. + + validate if we are doing a setitem on a chained copy. + + If you call this function, be sure to set the stacklevel such that the + user will see the error *at the level of setting* + + It is technically possible to figure out that we are setting on + a copy even WITH a multi-dtyped pandas object. In other words, some + blocks may be views while other are not. Currently _is_view will ALWAYS + return False for multi-blocks to avoid having to handle this case. + + df = DataFrame(np.arange(0,9), columns=['count']) + df['group'] = 'b' + + # This technically need not raise SettingWithCopy if both are view + # (which is not # generally guaranteed but is usually True. However, + # this is in general not a good practice and we recommend using .loc. + df.iloc[0:5]['group'] = 'a' + + """ + + # return early if the check is not needed + if not (force or self._is_copy): + return + + value = config.get_option("mode.chained_assignment") + if value is None: + return + + # see if the copy is not actually referred; if so, then dissolve + # the copy weakref + if self._is_copy is not None and not isinstance(self._is_copy, str): + r = self._is_copy() + if not gc.get_referents(r) or r.shape == self.shape: + self._is_copy = None + return + + # a custom message + if isinstance(self._is_copy, str): + t = self._is_copy + + elif t == "referant": + t = ( + "\n" + "A value is trying to be set on a copy of a slice from a " + "DataFrame\n\n" + "See the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" + ) + + else: + t = ( + "\n" + "A value is trying to be set on a copy of a slice from a " + "DataFrame.\n" + "Try using .loc[row_indexer,col_indexer] = value " + "instead\n\nSee the caveats in the documentation: " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/" + "indexing.html#returning-a-view-versus-a-copy" + ) + + if value == "raise": + raise com.SettingWithCopyError(t) + elif value == "warn": + warnings.warn(t, com.SettingWithCopyWarning, stacklevel=stacklevel) + + def __delitem__(self, key) -> None: + """ + Delete item + """ + deleted = False + + maybe_shortcut = False + if self.ndim == 2 and isinstance(self.columns, MultiIndex): + try: + maybe_shortcut = key not in self.columns._engine + except TypeError: + pass + + if maybe_shortcut: + # Allow shorthand to delete all columns whose first len(key) + # elements match key: + if not isinstance(key, tuple): + key = (key,) + for col in self.columns: + if isinstance(col, tuple) and col[: len(key)] == key: + del self[col] + deleted = True + if not deleted: + # If the above loop ran and didn't delete anything because + # there was no match, this call should raise the appropriate + # exception: + self._data.delete(key) + + # delete from the caches + try: + del self._item_cache[key] + except KeyError: + pass + + # ---------------------------------------------------------------------- + # Unsorted + + def get(self, key, default=None): + """ + Get item from object for given key (ex: DataFrame column). + + Returns default value if not found. + + Parameters + ---------- + key : object + + Returns + ------- + value : same type as items contained in object + """ + try: + return self[key] + except (KeyError, ValueError, IndexError): + return default + + @property + def _is_view(self): + """Return boolean indicating if self is view of another array """ + return self._data.is_view + + def reindex_like( + self: FrameOrSeries, + other, + method: Optional[str] = None, + copy: bool_t = True, + limit=None, + tolerance=None, + ) -> FrameOrSeries: + """ + Return an object with matching indices as other object. + + Conform the object to the same index on all axes. Optional + filling logic, placing NaN in locations having no value + in the previous index. A new object is produced unless the + new index is equivalent to the current one and copy=False. + + Parameters + ---------- + other : Object of the same data type + Its row and column indices are used to define the new indices + of this object. + method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} + Method to use for filling holes in reindexed DataFrame. + Please note: this is only applicable to DataFrames/Series with a + monotonically increasing/decreasing index. + + * None (default): don't fill gaps + * pad / ffill: propagate last valid observation forward to next + valid + * backfill / bfill: use next valid observation to fill gap + * nearest: use nearest valid observations to fill gap. + + copy : bool, default True + Return a new object, even if the passed indexes are the same. + limit : int, default None + Maximum number of consecutive labels to fill for inexact matches. + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations most + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + .. versionadded:: 0.21.0 (list-like tolerance) + + Returns + ------- + Series or DataFrame + Same type as caller, but with changed indices on each axis. + + See Also + -------- + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex : Change to new indices or expand indices. + + Notes + ----- + Same as calling + ``.reindex(index=other.index, columns=other.columns,...)``. + + Examples + -------- + >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'], + ... [31, 87.8, 'high'], + ... [22, 71.6, 'medium'], + ... [35, 95, 'medium']], + ... columns=['temp_celsius', 'temp_fahrenheit', + ... 'windspeed'], + ... index=pd.date_range(start='2014-02-12', + ... end='2014-02-15', freq='D')) + + >>> df1 + temp_celsius temp_fahrenheit windspeed + 2014-02-12 24.3 75.7 high + 2014-02-13 31.0 87.8 high + 2014-02-14 22.0 71.6 medium + 2014-02-15 35.0 95.0 medium + + >>> df2 = pd.DataFrame([[28, 'low'], + ... [30, 'low'], + ... [35.1, 'medium']], + ... columns=['temp_celsius', 'windspeed'], + ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13', + ... '2014-02-15'])) + + >>> df2 + temp_celsius windspeed + 2014-02-12 28.0 low + 2014-02-13 30.0 low + 2014-02-15 35.1 medium + + >>> df2.reindex_like(df1) + temp_celsius temp_fahrenheit windspeed + 2014-02-12 28.0 NaN low + 2014-02-13 30.0 NaN low + 2014-02-14 NaN NaN NaN + 2014-02-15 35.1 NaN medium + """ + d = other._construct_axes_dict( + axes=self._AXIS_ORDERS, + method=method, + copy=copy, + limit=limit, + tolerance=tolerance, + ) + + return self.reindex(**d) + + def drop( + self, + labels=None, + axis=0, + index=None, + columns=None, + level=None, + inplace: bool_t = False, + errors: str = "raise", + ): + + inplace = validate_bool_kwarg(inplace, "inplace") + + if labels is not None: + if index is not None or columns is not None: + raise ValueError("Cannot specify both 'labels' and 'index'/'columns'") + axis_name = self._get_axis_name(axis) + axes = {axis_name: labels} + elif index is not None or columns is not None: + axes, _ = self._construct_axes_from_arguments((index, columns), {}) + else: + raise ValueError( + "Need to specify at least one of 'labels', 'index' or 'columns'" + ) + + obj = self + + for axis, labels in axes.items(): + if labels is not None: + obj = obj._drop_axis(labels, axis, level=level, errors=errors) + + if inplace: + self._update_inplace(obj) + else: + return obj + + def _drop_axis( + self: FrameOrSeries, labels, axis, level=None, errors: str = "raise" + ) -> FrameOrSeries: + """ + Drop labels from specified axis. Used in the ``drop`` method + internally. + + Parameters + ---------- + labels : single label or list-like + axis : int or axis name + level : int or level name, default None + For MultiIndex + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and existing labels are dropped. + + """ + axis = self._get_axis_number(axis) + axis_name = self._get_axis_name(axis) + axis = self._get_axis(axis) + + if axis.is_unique: + if level is not None: + if not isinstance(axis, MultiIndex): + raise AssertionError("axis must be a MultiIndex") + new_axis = axis.drop(labels, level=level, errors=errors) + else: + new_axis = axis.drop(labels, errors=errors) + result = self.reindex(**{axis_name: new_axis}) + + # Case for non-unique axis + else: + labels = ensure_object(com.index_labels_to_array(labels)) + if level is not None: + if not isinstance(axis, MultiIndex): + raise AssertionError("axis must be a MultiIndex") + indexer = ~axis.get_level_values(level).isin(labels) + + # GH 18561 MultiIndex.drop should raise if label is absent + if errors == "raise" and indexer.all(): + raise KeyError(f"{labels} not found in axis") + else: + indexer = ~axis.isin(labels) + # Check if label doesn't exist along axis + labels_missing = (axis.get_indexer_for(labels) == -1).any() + if errors == "raise" and labels_missing: + raise KeyError(f"{labels} not found in axis") + + slicer = [slice(None)] * self.ndim + slicer[self._get_axis_number(axis_name)] = indexer + + result = self.loc[tuple(slicer)] + + return result + + def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: + """ + Replace self internals with result. + + Parameters + ---------- + verify_is_copy : bool, default True + Provide is_copy checks. + """ + # NOTE: This does *not* call __finalize__ and that's an explicit + # decision that we may revisit in the future. + + self._reset_cache() + self._clear_item_cache() + self._data = getattr(result, "_data", result) + self._maybe_update_cacher(verify_is_copy=verify_is_copy) + + def add_prefix(self: FrameOrSeries, prefix: str) -> FrameOrSeries: + """ + Prefix labels with string `prefix`. + + For Series, the row labels are prefixed. + For DataFrame, the column labels are prefixed. + + Parameters + ---------- + prefix : str + The string to add before each label. + + Returns + ------- + Series or DataFrame + New Series or DataFrame with updated labels. + + See Also + -------- + Series.add_suffix: Suffix row labels with string `suffix`. + DataFrame.add_suffix: Suffix column labels with string `suffix`. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.add_prefix('item_') + item_0 1 + item_1 2 + item_2 3 + item_3 4 + dtype: int64 + + >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) + >>> df + A B + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + + >>> df.add_prefix('col_') + col_A col_B + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + """ + f = functools.partial("{prefix}{}".format, prefix=prefix) + + mapper = {self._info_axis_name: f} + return self.rename(**mapper) # type: ignore + + def add_suffix(self: FrameOrSeries, suffix: str) -> FrameOrSeries: + """ + Suffix labels with string `suffix`. + + For Series, the row labels are suffixed. + For DataFrame, the column labels are suffixed. + + Parameters + ---------- + suffix : str + The string to add after each label. + + Returns + ------- + Series or DataFrame + New Series or DataFrame with updated labels. + + See Also + -------- + Series.add_prefix: Prefix row labels with string `prefix`. + DataFrame.add_prefix: Prefix column labels with string `prefix`. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.add_suffix('_item') + 0_item 1 + 1_item 2 + 2_item 3 + 3_item 4 + dtype: int64 + + >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) + >>> df + A B + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + + >>> df.add_suffix('_col') + A_col B_col + 0 1 3 + 1 2 4 + 2 3 5 + 3 4 6 + """ + f = functools.partial("{}{suffix}".format, suffix=suffix) + + mapper = {self._info_axis_name: f} + return self.rename(**mapper) # type: ignore + + def sort_values( + self, + by=None, + axis=0, + ascending=True, + inplace: bool_t = False, + kind: str = "quicksort", + na_position: str = "last", + ignore_index: bool_t = False, + ): + """ + Sort by the values along either axis. + + Parameters + ----------%(optional_by)s + axis : %(axes_single_arg)s, default 0 + Axis to be sorted. + ascending : bool or list of bool, default True + Sort ascending vs. descending. Specify list for multiple sort + orders. If this is a list of bools, must match the length of + the by. + inplace : bool, default False + If True, perform operation in-place. + kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' + Choice of sorting algorithm. See also ndarray.np.sort for more + information. `mergesort` is the only stable algorithm. For + DataFrames, this option is only applied when sorting on a single + column or label. + na_position : {'first', 'last'}, default 'last' + Puts NaNs at the beginning if `first`; `last` puts NaNs at the + end. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + Returns + ------- + sorted_obj : DataFrame or None + DataFrame with sorted values if inplace=False, None otherwise. + + Examples + -------- + >>> df = pd.DataFrame({ + ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'], + ... 'col2': [2, 1, 9, 8, 7, 4], + ... 'col3': [0, 1, 9, 4, 2, 3], + ... }) + >>> df + col1 col2 col3 + 0 A 2 0 + 1 A 1 1 + 2 B 9 9 + 3 NaN 8 4 + 4 D 7 2 + 5 C 4 3 + + Sort by col1 + + >>> df.sort_values(by=['col1']) + col1 col2 col3 + 0 A 2 0 + 1 A 1 1 + 2 B 9 9 + 5 C 4 3 + 4 D 7 2 + 3 NaN 8 4 + + Sort by multiple columns + + >>> df.sort_values(by=['col1', 'col2']) + col1 col2 col3 + 1 A 1 1 + 0 A 2 0 + 2 B 9 9 + 5 C 4 3 + 4 D 7 2 + 3 NaN 8 4 + + Sort Descending + + >>> df.sort_values(by='col1', ascending=False) + col1 col2 col3 + 4 D 7 2 + 5 C 4 3 + 2 B 9 9 + 0 A 2 0 + 1 A 1 1 + 3 NaN 8 4 + + Putting NAs first + + >>> df.sort_values(by='col1', ascending=False, na_position='first') + col1 col2 col3 + 3 NaN 8 4 + 4 D 7 2 + 5 C 4 3 + 2 B 9 9 + 0 A 2 0 + 1 A 1 1 + """ + raise AbstractMethodError(self) + + def sort_index( + self, + axis=0, + level=None, + ascending: bool_t = True, + inplace: bool_t = False, + kind: str = "quicksort", + na_position: str = "last", + sort_remaining: bool_t = True, + ignore_index: bool_t = False, + ): + """ + Sort object by labels (along an axis). + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis along which to sort. The value 0 identifies the rows, + and 1 identifies the columns. + level : int or level name or list of ints or list of level names + If not None, sort on values in specified index level(s). + ascending : bool, default True + Sort ascending vs. descending. + inplace : bool, default False + If True, perform operation in-place. + kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' + Choice of sorting algorithm. See also ndarray.np.sort for more + information. `mergesort` is the only stable algorithm. For + DataFrames, this option is only applied when sorting on a single + column or label. + na_position : {'first', 'last'}, default 'last' + Puts NaNs at the beginning if `first`; `last` puts NaNs at the end. + Not implemented for MultiIndex. + sort_remaining : bool, default True + If True and sorting by level and index is multilevel, sort by other + levels too (in order) after sorting by specified level. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + Returns + ------- + sorted_obj : DataFrame or None + DataFrame with sorted index if inplace=False, None otherwise. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + axis = self._get_axis_number(axis) + axis_name = self._get_axis_name(axis) + labels = self._get_axis(axis) + + if level is not None: + raise NotImplementedError("level is not implemented") + if inplace: + raise NotImplementedError("inplace is not implemented") + + sort_index = labels.argsort() + if not ascending: + sort_index = sort_index[::-1] + + new_axis = labels.take(sort_index) + return self.reindex(**{axis_name: new_axis}) + + def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries: + """ + Conform %(klass)s to new index with optional filling logic. + + Places NA/NaN in locations having no value in the previous index. A new object + is produced unless the new index is equivalent to the current one and + ``copy=False``. + + Parameters + ---------- + %(optional_labels)s + %(axes)s : array-like, optional + New labels / index to conform to, should be specified using + keywords. Preferably an Index object to avoid duplicating data. + %(optional_axis)s + method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} + Method to use for filling holes in reindexed DataFrame. + Please note: this is only applicable to DataFrames/Series with a + monotonically increasing/decreasing index. + + * None (default): don't fill gaps + * pad / ffill: Propagate last valid observation forward to next + valid. + * backfill / bfill: Use next valid observation to fill gap. + * nearest: Use nearest valid observations to fill gap. + + copy : bool, default True + Return a new object, even if the passed indexes are the same. + level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : scalar, default np.NaN + Value to use for missing values. Defaults to NaN, but can be any + "compatible" value. + limit : int, default None + Maximum number of consecutive elements to forward or backward fill. + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations most + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + .. versionadded:: 0.21.0 (list-like tolerance) + + Returns + ------- + %(klass)s with changed index. + + See Also + -------- + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex_like : Change to same indices as other DataFrame. + + Examples + -------- + + ``DataFrame.reindex`` supports two calling conventions + + * ``(index=index_labels, columns=column_labels, ...)`` + * ``(labels, axis={'index', 'columns'}, ...)`` + + We *highly* recommend using keyword arguments to clarify your + intent. + + Create a dataframe with some fictional data. + + >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror'] + >>> df = pd.DataFrame({'http_status': [200, 200, 404, 404, 301], + ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}, + ... index=index) + >>> df + http_status response_time + Firefox 200 0.04 + Chrome 200 0.02 + Safari 404 0.07 + IE10 404 0.08 + Konqueror 301 1.00 + + Create a new index and reindex the dataframe. By default + values in the new index that do not have corresponding + records in the dataframe are assigned ``NaN``. + + >>> new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', + ... 'Chrome'] + >>> df.reindex(new_index) + http_status response_time + Safari 404.0 0.07 + Iceweasel NaN NaN + Comodo Dragon NaN NaN + IE10 404.0 0.08 + Chrome 200.0 0.02 + + We can fill in the missing values by passing a value to + the keyword ``fill_value``. Because the index is not monotonically + increasing or decreasing, we cannot use arguments to the keyword + ``method`` to fill the ``NaN`` values. + + >>> df.reindex(new_index, fill_value=0) + http_status response_time + Safari 404 0.07 + Iceweasel 0 0.00 + Comodo Dragon 0 0.00 + IE10 404 0.08 + Chrome 200 0.02 + + >>> df.reindex(new_index, fill_value='missing') + http_status response_time + Safari 404 0.07 + Iceweasel missing missing + Comodo Dragon missing missing + IE10 404 0.08 + Chrome 200 0.02 + + We can also reindex the columns. + + >>> df.reindex(columns=['http_status', 'user_agent']) + http_status user_agent + Firefox 200 NaN + Chrome 200 NaN + Safari 404 NaN + IE10 404 NaN + Konqueror 301 NaN + + Or we can use "axis-style" keyword arguments + + >>> df.reindex(['http_status', 'user_agent'], axis="columns") + http_status user_agent + Firefox 200 NaN + Chrome 200 NaN + Safari 404 NaN + IE10 404 NaN + Konqueror 301 NaN + + To further illustrate the filling functionality in + ``reindex``, we will create a dataframe with a + monotonically increasing index (for example, a sequence + of dates). + + >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D') + >>> df2 = pd.DataFrame({"prices": [100, 101, np.nan, 100, 89, 88]}, + ... index=date_index) + >>> df2 + prices + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + + Suppose we decide to expand the dataframe to cover a wider + date range. + + >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D') + >>> df2.reindex(date_index2) + prices + 2009-12-29 NaN + 2009-12-30 NaN + 2009-12-31 NaN + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + 2010-01-07 NaN + + The index entries that did not have a value in the original data frame + (for example, '2009-12-29') are by default filled with ``NaN``. + If desired, we can fill in the missing values using one of several + options. + + For example, to back-propagate the last valid value to fill the ``NaN`` + values, pass ``bfill`` as an argument to the ``method`` keyword. + + >>> df2.reindex(date_index2, method='bfill') + prices + 2009-12-29 100.0 + 2009-12-30 100.0 + 2009-12-31 100.0 + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + 2010-01-07 NaN + + Please note that the ``NaN`` value present in the original dataframe + (at index value 2010-01-03) will not be filled by any of the + value propagation schemes. This is because filling while reindexing + does not look at dataframe values, but only compares the original and + desired indexes. If you do want to fill in the ``NaN`` values present + in the original dataframe, use the ``fillna()`` method. + + See the :ref:`user guide ` for more. + """ + # TODO: Decide if we care about having different examples for different + # kinds + + # construct the args + axes, kwargs = self._construct_axes_from_arguments(args, kwargs) + method = missing.clean_reindex_fill_method(kwargs.pop("method", None)) + level = kwargs.pop("level", None) + copy = kwargs.pop("copy", True) + limit = kwargs.pop("limit", None) + tolerance = kwargs.pop("tolerance", None) + fill_value = kwargs.pop("fill_value", None) + + # Series.reindex doesn't use / need the axis kwarg + # We pop and ignore it here, to make writing Series/Frame generic code + # easier + kwargs.pop("axis", None) + + if kwargs: + raise TypeError( + "reindex() got an unexpected keyword " + f'argument "{list(kwargs.keys())[0]}"' + ) + + self._consolidate_inplace() + + # if all axes that are requested to reindex are equal, then only copy + # if indicated must have index names equal here as well as values + if all( + self._get_axis(axis).identical(ax) + for axis, ax in axes.items() + if ax is not None + ): + if copy: + return self.copy() + return self + + # check if we are a multi reindex + if self._needs_reindex_multi(axes, method, level): + return self._reindex_multi(axes, copy, fill_value) + + # perform the reindex on the axes + return self._reindex_axes( + axes, level, limit, tolerance, method, fill_value, copy + ).__finalize__(self) + + def _reindex_axes( + self: FrameOrSeries, axes, level, limit, tolerance, method, fill_value, copy + ) -> FrameOrSeries: + """Perform the reindex for all the axes.""" + obj = self + for a in self._AXIS_ORDERS: + labels = axes[a] + if labels is None: + continue + + ax = self._get_axis(a) + new_index, indexer = ax.reindex( + labels, level=level, limit=limit, tolerance=tolerance, method=method + ) + + axis = self._get_axis_number(a) + obj = obj._reindex_with_indexers( + {axis: [new_index, indexer]}, + fill_value=fill_value, + copy=copy, + allow_dups=False, + ) + + return obj + + def _needs_reindex_multi(self, axes, method, level) -> bool_t: + """Check if we do need a multi reindex.""" + return ( + (com.count_not_none(*axes.values()) == self._AXIS_LEN) + and method is None + and level is None + and not self._is_mixed_type + ) + + def _reindex_multi(self, axes, copy, fill_value): + raise AbstractMethodError(self) + + def _reindex_with_indexers( + self: FrameOrSeries, + reindexers, + fill_value=None, + copy: bool_t = False, + allow_dups: bool_t = False, + ) -> FrameOrSeries: + """allow_dups indicates an internal call here """ + + # reindex doing multiple operations on different axes if indicated + new_data = self._data + for axis in sorted(reindexers.keys()): + index, indexer = reindexers[axis] + baxis = self._get_block_manager_axis(axis) + + if index is None: + continue + + index = ensure_index(index) + if indexer is not None: + indexer = ensure_int64(indexer) + + # TODO: speed up on homogeneous DataFrame objects + new_data = new_data.reindex_indexer( + index, + indexer, + axis=baxis, + fill_value=fill_value, + allow_dups=allow_dups, + copy=copy, + ) + + if copy and new_data is self._data: + new_data = new_data.copy() + + return self._constructor(new_data).__finalize__(self) + + def filter( + self: FrameOrSeries, + items=None, + like: Optional[str] = None, + regex: Optional[str] = None, + axis=None, + ) -> FrameOrSeries: + """ + Subset the dataframe rows or columns according to the specified index labels. + + Note that this routine does not filter a dataframe on its + contents. The filter is applied to the labels of the index. + + Parameters + ---------- + items : list-like + Keep labels from axis which are in items. + like : str + Keep labels from axis for which "like in label == True". + regex : str (regular expression) + Keep labels from axis for which re.search(regex, label) == True. + axis : {0 or ‘index’, 1 or ‘columns’, None}, default None + The axis to filter on, expressed either as an index (int) + or axis name (str). By default this is the info axis, + 'index' for Series, 'columns' for DataFrame. + + Returns + ------- + same type as input object + + See Also + -------- + DataFrame.loc + + Notes + ----- + The ``items``, ``like``, and ``regex`` parameters are + enforced to be mutually exclusive. + + ``axis`` defaults to the info axis that is used when indexing + with ``[]``. + + Examples + -------- + >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])), + ... index=['mouse', 'rabbit'], + ... columns=['one', 'two', 'three']) + + >>> # select columns by name + >>> df.filter(items=['one', 'three']) + one three + mouse 1 3 + rabbit 4 6 + + >>> # select columns by regular expression + >>> df.filter(regex='e$', axis=1) + one three + mouse 1 3 + rabbit 4 6 + + >>> # select rows containing 'bbi' + >>> df.filter(like='bbi', axis=0) + one two three + rabbit 4 5 6 + """ + nkw = com.count_not_none(items, like, regex) + if nkw > 1: + raise TypeError( + "Keyword arguments `items`, `like`, or `regex` " + "are mutually exclusive" + ) + + if axis is None: + axis = self._info_axis_name + labels = self._get_axis(axis) + + if items is not None: + name = self._get_axis_name(axis) + return self.reindex(**{name: [r for r in items if r in labels]}) + elif like: + + def f(x): + return like in ensure_str(x) + + values = labels.map(f) + return self.loc(axis=axis)[values] + elif regex: + + def f(x): + return matcher.search(ensure_str(x)) is not None + + matcher = re.compile(regex) + values = labels.map(f) + return self.loc(axis=axis)[values] + else: + raise TypeError("Must pass either `items`, `like`, or `regex`") + + def head(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: + """ + Return the first `n` rows. + + This function returns the first `n` rows for the object based + on position. It is useful for quickly testing if your object + has the right type of data in it. + + For negative values of `n`, this function returns all rows except + the last `n` rows, equivalent to ``df[:-n]``. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + same type as caller + The first `n` rows of the caller object. + + See Also + -------- + DataFrame.tail: Returns the last `n` rows. + + Examples + -------- + >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', + ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) + >>> df + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + + Viewing the first 5 lines + + >>> df.head() + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + + Viewing the first `n` lines (three in this case) + + >>> df.head(3) + animal + 0 alligator + 1 bee + 2 falcon + + For negative values of `n` + + >>> df.head(-3) + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + 5 parrot + """ + + return self.iloc[:n] + + def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: + """ + Return the last `n` rows. + + This function returns last `n` rows from the object based on + position. It is useful for quickly verifying data, for example, + after sorting or appending rows. + + For negative values of `n`, this function returns all rows except + the first `n` rows, equivalent to ``df[n:]``. + + Parameters + ---------- + n : int, default 5 + Number of rows to select. + + Returns + ------- + type of caller + The last `n` rows of the caller object. + + See Also + -------- + DataFrame.head : The first `n` rows of the caller object. + + Examples + -------- + >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', + ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) + >>> df + animal + 0 alligator + 1 bee + 2 falcon + 3 lion + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + + Viewing the last 5 lines + + >>> df.tail() + animal + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + + Viewing the last `n` lines (three in this case) + + >>> df.tail(3) + animal + 6 shark + 7 whale + 8 zebra + + For negative values of `n` + + >>> df.tail(-3) + animal + 3 lion + 4 monkey + 5 parrot + 6 shark + 7 whale + 8 zebra + """ + + if n == 0: + return self.iloc[0:0] + return self.iloc[-n:] + + def sample( + self: FrameOrSeries, + n=None, + frac=None, + replace=False, + weights=None, + random_state=None, + axis=None, + ) -> FrameOrSeries: + """ + Return a random sample of items from an axis of object. + + You can use `random_state` for reproducibility. + + Parameters + ---------- + n : int, optional + Number of items from axis to return. Cannot be used with `frac`. + Default = 1 if `frac` = None. + frac : float, optional + Fraction of axis items to return. Cannot be used with `n`. + replace : bool, default False + Allow or disallow sampling of the same row more than once. + weights : str or ndarray-like, optional + Default 'None' results in equal probability weighting. + If passed a Series, will align with target object on index. Index + values in weights not found in sampled object will be ignored and + index values in sampled object not in weights will be assigned + weights of zero. + If called on a DataFrame, will accept the name of a column + when axis = 0. + Unless weights are a Series, weights must be same length as axis + being sampled. + If weights do not sum to 1, they will be normalized to sum to 1. + Missing values in the weights column will be treated as zero. + Infinite values not allowed. + random_state : int or numpy.random.RandomState, optional + Seed for the random number generator (if int), or numpy RandomState + object. + axis : {0 or ‘index’, 1 or ‘columns’, None}, default None + Axis to sample. Accepts axis number or name. Default is stat axis + for given data type (0 for Series and DataFrames). + + Returns + ------- + Series or DataFrame + A new object of same type as caller containing `n` items randomly + sampled from the caller object. + + See Also + -------- + numpy.random.choice: Generates a random sample from a given 1-D numpy + array. + + Notes + ----- + If `frac` > 1, `replacement` should be set to `True`. + + Examples + -------- + >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0], + ... 'num_wings': [2, 0, 0, 0], + ... 'num_specimen_seen': [10, 2, 1, 8]}, + ... index=['falcon', 'dog', 'spider', 'fish']) + >>> df + num_legs num_wings num_specimen_seen + falcon 2 2 10 + dog 4 0 2 + spider 8 0 1 + fish 0 0 8 + + Extract 3 random elements from the ``Series`` ``df['num_legs']``: + Note that we use `random_state` to ensure the reproducibility of + the examples. + + >>> df['num_legs'].sample(n=3, random_state=1) + fish 0 + spider 8 + falcon 2 + Name: num_legs, dtype: int64 + + A random 50% sample of the ``DataFrame`` with replacement: + + >>> df.sample(frac=0.5, replace=True, random_state=1) + num_legs num_wings num_specimen_seen + dog 4 0 2 + fish 0 0 8 + + An upsample sample of the ``DataFrame`` with replacement: + Note that `replace` parameter has to be `True` for `frac` parameter > 1. + + >>> df.sample(frac=2, replace=True, random_state=1) + num_legs num_wings num_specimen_seen + dog 4 0 2 + fish 0 0 8 + falcon 2 2 10 + falcon 2 2 10 + fish 0 0 8 + dog 4 0 2 + fish 0 0 8 + dog 4 0 2 + + Using a DataFrame column as weights. Rows with larger value in the + `num_specimen_seen` column are more likely to be sampled. + + >>> df.sample(n=2, weights='num_specimen_seen', random_state=1) + num_legs num_wings num_specimen_seen + falcon 2 2 10 + fish 0 0 8 + """ + + if axis is None: + axis = self._stat_axis_number + + axis = self._get_axis_number(axis) + axis_length = self.shape[axis] + + # Process random_state argument + rs = com.random_state(random_state) + + # Check weights for compliance + if weights is not None: + + # If a series, align with frame + if isinstance(weights, ABCSeries): + weights = weights.reindex(self.axes[axis]) + + # Strings acceptable if a dataframe and axis = 0 + if isinstance(weights, str): + if isinstance(self, ABCDataFrame): + if axis == 0: + try: + weights = self[weights] + except KeyError: + raise KeyError( + "String passed to weights not a valid column" + ) + else: + raise ValueError( + "Strings can only be passed to " + "weights when sampling from rows on " + "a DataFrame" + ) + else: + raise ValueError( + "Strings cannot be passed as weights " + "when sampling from a Series." + ) + + weights = pd.Series(weights, dtype="float64") + + if len(weights) != axis_length: + raise ValueError( + "Weights and axis to be sampled must be of same length" + ) + + if (weights == np.inf).any() or (weights == -np.inf).any(): + raise ValueError("weight vector may not include `inf` values") + + if (weights < 0).any(): + raise ValueError("weight vector many not include negative values") + + # If has nan, set to zero. + weights = weights.fillna(0) + + # Renormalize if don't sum to 1 + if weights.sum() != 1: + if weights.sum() != 0: + weights = weights / weights.sum() + else: + raise ValueError("Invalid weights: weights sum to zero") + + weights = weights.values + + # If no frac or n, default to n=1. + if n is None and frac is None: + n = 1 + elif frac is not None and frac > 1 and not replace: + raise ValueError( + "Replace has to be set to `True` when " + "upsampling the population `frac` > 1." + ) + elif n is not None and frac is None and n % 1 != 0: + raise ValueError("Only integers accepted as `n` values") + elif n is None and frac is not None: + n = int(round(frac * axis_length)) + elif n is not None and frac is not None: + raise ValueError("Please enter a value for `frac` OR `n`, not both") + + # Check for negative sizes + if n < 0: + raise ValueError( + "A negative number of rows requested. Please provide positive value." + ) + + locs = rs.choice(axis_length, size=n, replace=replace, p=weights) + return self.take(locs, axis=axis) + + _shared_docs[ + "pipe" + ] = r""" + Apply func(self, \*args, \*\*kwargs). + + Parameters + ---------- + func : function + Function to apply to the %(klass)s. + ``args``, and ``kwargs`` are passed into ``func``. + Alternatively a ``(callable, data_keyword)`` tuple where + ``data_keyword`` is a string indicating the keyword of + ``callable`` that expects the %(klass)s. + args : iterable, optional + Positional arguments passed into ``func``. + kwargs : mapping, optional + A dictionary of keyword arguments passed into ``func``. + + Returns + ------- + object : the return type of ``func``. + + See Also + -------- + DataFrame.apply + DataFrame.applymap + Series.map + + Notes + ----- + + Use ``.pipe`` when chaining together functions that expect + Series, DataFrames or GroupBy objects. Instead of writing + + >>> f(g(h(df), arg1=a), arg2=b, arg3=c) + + You can write + + >>> (df.pipe(h) + ... .pipe(g, arg1=a) + ... .pipe(f, arg2=b, arg3=c) + ... ) + + If you have a function that takes the data as (say) the second + argument, pass a tuple indicating which keyword expects the + data. For example, suppose ``f`` takes its data as ``arg2``: + + >>> (df.pipe(h) + ... .pipe(g, arg1=a) + ... .pipe((f, 'arg2'), arg1=a, arg3=c) + ... ) + """ + + @Appender(_shared_docs["pipe"] % _shared_doc_kwargs) + def pipe(self, func, *args, **kwargs): + return com.pipe(self, func, *args, **kwargs) + + _shared_docs["aggregate"] = dedent( + """ + Aggregate using one or more operations over the specified axis. + %(versionadded)s + Parameters + ---------- + func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a %(klass)s or when passed to %(klass)s.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. + %(axis)s + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + scalar, Series or DataFrame + + The return can be: + + * scalar : when Series.agg is called with single function + * Series : when DataFrame.agg is called with a single function + * DataFrame : when DataFrame.agg is called with several functions + + Return scalar, Series or DataFrame. + %(see_also)s + Notes + ----- + `agg` is an alias for `aggregate`. Use the alias. + + A passed user-defined-function will be passed a Series for evaluation. + %(examples)s""" + ) + + _shared_docs[ + "transform" + ] = """ + Call ``func`` on self producing a %(klass)s with transformed values. + + Produced %(klass)s will have same axis length as self. + + Parameters + ---------- + func : function, str, list or dict + Function to use for transforming the data. If a function, must either + work when passed a %(klass)s or when passed to %(klass)s.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.exp. 'sqrt']`` + - dict of axis labels -> functions, function names or list of such. + %(axis)s + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + %(klass)s + A %(klass)s that must have the same length as self. + + Raises + ------ + ValueError : If the returned %(klass)s has a different length than self. + + See Also + -------- + %(klass)s.agg : Only perform aggregating type operations. + %(klass)s.apply : Invoke function on a %(klass)s. + + Examples + -------- + >>> df = pd.DataFrame({'A': range(3), 'B': range(1, 4)}) + >>> df + A B + 0 0 1 + 1 1 2 + 2 2 3 + >>> df.transform(lambda x: x + 1) + A B + 0 1 2 + 1 2 3 + 2 3 4 + + Even though the resulting %(klass)s must have the same length as the + input %(klass)s, it is possible to provide several input functions: + + >>> s = pd.Series(range(3)) + >>> s + 0 0 + 1 1 + 2 2 + dtype: int64 + >>> s.transform([np.sqrt, np.exp]) + sqrt exp + 0 0.000000 1.000000 + 1 1.000000 2.718282 + 2 1.414214 7.389056 + """ + + # ---------------------------------------------------------------------- + # Attribute access + + def __finalize__( + self: FrameOrSeries, other, method=None, **kwargs + ) -> FrameOrSeries: + """ + Propagate metadata from other to self. + + Parameters + ---------- + other : the object from which to get the attributes that we are going + to propagate + method : optional, a passed method name ; possibly to take different + types of propagation actions based on this + + """ + if isinstance(other, NDFrame): + for name in other.attrs: + self.attrs[name] = other.attrs[name] + # For subclasses using _metadata. + for name in self._metadata: + object.__setattr__(self, name, getattr(other, name, None)) + return self + + def __getattr__(self, name: str): + """After regular attribute access, try looking up the name + This allows simpler access to columns for interactive use. + """ + + # Note: obj.x will always call obj.__getattribute__('x') prior to + # calling obj.__getattr__('x'). + + if ( + name in self._internal_names_set + or name in self._metadata + or name in self._accessors + ): + return object.__getattribute__(self, name) + else: + if self._info_axis._can_hold_identifiers_and_holds_name(name): + return self[name] + return object.__getattribute__(self, name) + + def __setattr__(self, name: str, value) -> None: + """After regular attribute access, try setting the name + This allows simpler access to columns for interactive use. + """ + + # first try regular attribute access via __getattribute__, so that + # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify + # the same attribute. + + try: + object.__getattribute__(self, name) + return object.__setattr__(self, name, value) + except AttributeError: + pass + + # if this fails, go on to more involved attribute setting + # (note that this matches __getattr__, above). + if name in self._internal_names_set: + object.__setattr__(self, name, value) + elif name in self._metadata: + object.__setattr__(self, name, value) + else: + try: + existing = getattr(self, name) + if isinstance(existing, Index): + object.__setattr__(self, name, value) + elif name in self._info_axis: + self[name] = value + else: + object.__setattr__(self, name, value) + except (AttributeError, TypeError): + if isinstance(self, ABCDataFrame) and (is_list_like(value)): + warnings.warn( + "Pandas doesn't allow columns to be " + "created via a new attribute name - see " + "https://pandas.pydata.org/pandas-docs/" + "stable/indexing.html#attribute-access", + stacklevel=2, + ) + object.__setattr__(self, name, value) + + def _dir_additions(self): + """ add the string-like attributes from the info_axis. + If info_axis is a MultiIndex, it's first level values are used. + """ + additions = { + c + for c in self._info_axis.unique(level=0)[:100] + if isinstance(c, str) and c.isidentifier() + } + return super()._dir_additions().union(additions) + + # ---------------------------------------------------------------------- + # Consolidation of internals + + def _protect_consolidate(self, f): + """Consolidate _data -- if the blocks have changed, then clear the + cache + """ + blocks_before = len(self._data.blocks) + result = f() + if len(self._data.blocks) != blocks_before: + self._clear_item_cache() + return result + + def _consolidate_inplace(self) -> None: + """Consolidate data in place and return None""" + + def f(): + self._data = self._data.consolidate() + + self._protect_consolidate(f) + + def _consolidate(self, inplace: bool_t = False): + """ + Compute NDFrame with "consolidated" internals (data of each dtype + grouped together in a single ndarray). + + Parameters + ---------- + inplace : bool, default False + If False return new object, otherwise modify existing object. + + Returns + ------- + consolidated : same type as caller + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if inplace: + self._consolidate_inplace() + else: + f = lambda: self._data.consolidate() + cons_data = self._protect_consolidate(f) + return self._constructor(cons_data).__finalize__(self) + + @property + def _is_mixed_type(self): + f = lambda: self._data.is_mixed_type + return self._protect_consolidate(f) + + @property + def _is_numeric_mixed_type(self): + f = lambda: self._data.is_numeric_mixed_type + return self._protect_consolidate(f) + + @property + def _is_datelike_mixed_type(self): + f = lambda: self._data.is_datelike_mixed_type + return self._protect_consolidate(f) + + def _check_inplace_setting(self, value) -> bool_t: + """ check whether we allow in-place setting with this type of value """ + + if self._is_mixed_type: + if not self._is_numeric_mixed_type: + + # allow an actual np.nan thru + if is_float(value) and np.isnan(value): + return True + + raise TypeError( + "Cannot do inplace boolean setting on " + "mixed-types with a non np.nan value" + ) + + return True + + def _get_numeric_data(self): + return self._constructor(self._data.get_numeric_data()).__finalize__(self) + + def _get_bool_data(self): + return self._constructor(self._data.get_bool_data()).__finalize__(self) + + # ---------------------------------------------------------------------- + # Internal Interface Methods + + @property + def values(self) -> np.ndarray: + """ + Return a Numpy representation of the DataFrame. + + .. warning:: + + We recommend using :meth:`DataFrame.to_numpy` instead. + + Only the values in the DataFrame will be returned, the axes labels + will be removed. + + Returns + ------- + numpy.ndarray + The values of the DataFrame. + + See Also + -------- + DataFrame.to_numpy : Recommended alternative to this method. + DataFrame.index : Retrieve the index labels. + DataFrame.columns : Retrieving the column names. + + Notes + ----- + The dtype will be a lower-common-denominator dtype (implicit + upcasting); that is to say if the dtypes (even of numeric types) + are mixed, the one that accommodates all will be chosen. Use this + with care if you are not dealing with the blocks. + + e.g. If the dtypes are float16 and float32, dtype will be upcast to + float32. If dtypes are int32 and uint8, dtype will be upcast to + int32. By :func:`numpy.find_common_type` convention, mixing int64 + and uint64 will result in a float64 dtype. + + Examples + -------- + A DataFrame where all columns are the same type (e.g., int64) results + in an array of the same type. + + >>> df = pd.DataFrame({'age': [ 3, 29], + ... 'height': [94, 170], + ... 'weight': [31, 115]}) + >>> df + age height weight + 0 3 94 31 + 1 29 170 115 + >>> df.dtypes + age int64 + height int64 + weight int64 + dtype: object + >>> df.values + array([[ 3, 94, 31], + [ 29, 170, 115]], dtype=int64) + + A DataFrame with mixed type columns(e.g., str/object, int64, float32) + results in an ndarray of the broadest type that accommodates these + mixed types (e.g., object). + + >>> df2 = pd.DataFrame([('parrot', 24.0, 'second'), + ... ('lion', 80.5, 1), + ... ('monkey', np.nan, None)], + ... columns=('name', 'max_speed', 'rank')) + >>> df2.dtypes + name object + max_speed float64 + rank object + dtype: object + >>> df2.values + array([['parrot', 24.0, 'second'], + ['lion', 80.5, 1], + ['monkey', nan, None]], dtype=object) + """ + self._consolidate_inplace() + return self._data.as_array(transpose=self._AXIS_REVERSED) + + @property + def _values(self) -> np.ndarray: + """internal implementation""" + return self.values + + @property + def _get_values(self) -> np.ndarray: + # compat + return self.values + + def _internal_get_values(self) -> np.ndarray: + """ + Return an ndarray after converting sparse values to dense. + + This is the same as ``.values`` for non-sparse data. For sparse + data contained in a `SparseArray`, the data are first + converted to a dense representation. + + Returns + ------- + numpy.ndarray + Numpy representation of DataFrame. + + See Also + -------- + values : Numpy representation of DataFrame. + SparseArray : Container for sparse data. + """ + return self.values + + @property + def dtypes(self): + """ + Return the dtypes in the DataFrame. + + This returns a Series with the data type of each column. + The result's index is the original DataFrame's columns. Columns + with mixed types are stored with the ``object`` dtype. See + :ref:`the User Guide ` for more. + + Returns + ------- + pandas.Series + The data type of each column. + + Examples + -------- + >>> df = pd.DataFrame({'float': [1.0], + ... 'int': [1], + ... 'datetime': [pd.Timestamp('20180310')], + ... 'string': ['foo']}) + >>> df.dtypes + float float64 + int int64 + datetime datetime64[ns] + string object + dtype: object + """ + from pandas import Series + + return Series(self._data.get_dtypes(), index=self._info_axis, dtype=np.object_) + + def _to_dict_of_blocks(self, copy: bool_t = True): + """ + Return a dict of dtype -> Constructor Types that + each is a homogeneous dtype. + + Internal ONLY + """ + return { + k: self._constructor(v).__finalize__(self) + for k, v, in self._data.to_dict(copy=copy).items() + } + + def astype( + self: FrameOrSeries, dtype, copy: bool_t = True, errors: str = "raise" + ) -> FrameOrSeries: + """ + Cast a pandas object to a specified dtype ``dtype``. + + Parameters + ---------- + dtype : data type, or dict of column name -> data type + Use a numpy.dtype or Python type to cast entire pandas object to + the same type. Alternatively, use {col: dtype, ...}, where col is a + column label and dtype is a numpy.dtype or Python type to cast one + or more of the DataFrame's columns to column-specific types. + copy : bool, default True + Return a copy when ``copy=True`` (be very careful setting + ``copy=False`` as changes to values then may propagate to other + pandas objects). + errors : {'raise', 'ignore'}, default 'raise' + Control raising of exceptions on invalid data for provided dtype. + + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object. + + Returns + ------- + casted : same type as caller + + See Also + -------- + to_datetime : Convert argument to datetime. + to_timedelta : Convert argument to timedelta. + to_numeric : Convert argument to a numeric type. + numpy.ndarray.astype : Cast a numpy array to a specified type. + + Examples + -------- + Create a DataFrame: + + >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> df = pd.DataFrame(data=d) + >>> df.dtypes + col1 int64 + col2 int64 + dtype: object + + Cast all columns to int32: + + >>> df.astype('int32').dtypes + col1 int32 + col2 int32 + dtype: object + + Cast col1 to int32 using a dictionary: + + >>> df.astype({'col1': 'int32'}).dtypes + col1 int32 + col2 int64 + dtype: object + + Create a series: + + >>> ser = pd.Series([1, 2], dtype='int32') + >>> ser + 0 1 + 1 2 + dtype: int32 + >>> ser.astype('int64') + 0 1 + 1 2 + dtype: int64 + + Convert to categorical type: + + >>> ser.astype('category') + 0 1 + 1 2 + dtype: category + Categories (2, int64): [1, 2] + + Convert to ordered categorical type with custom ordering: + + >>> cat_dtype = pd.api.types.CategoricalDtype( + ... categories=[2, 1], ordered=True) + >>> ser.astype(cat_dtype) + 0 1 + 1 2 + dtype: category + Categories (2, int64): [2 < 1] + + Note that using ``copy=False`` and changing data on a new + pandas object may propagate changes: + + >>> s1 = pd.Series([1, 2]) + >>> s2 = s1.astype('int64', copy=False) + >>> s2[0] = 10 + >>> s1 # note that s1[0] has changed too + 0 10 + 1 2 + dtype: int64 + """ + if is_dict_like(dtype): + if self.ndim == 1: # i.e. Series + if len(dtype) > 1 or self.name not in dtype: + raise KeyError( + "Only the Series name can be used for " + "the key in Series dtype mappings." + ) + new_type = dtype[self.name] + return self.astype(new_type, copy, errors) + + for col_name in dtype.keys(): + if col_name not in self: + raise KeyError( + "Only a column name can be used for the " + "key in a dtype mappings argument." + ) + results = [] + for col_name, col in self.items(): + if col_name in dtype: + results.append( + col.astype(dtype=dtype[col_name], copy=copy, errors=errors) + ) + else: + results.append(col.copy() if copy else col) + + elif is_extension_array_dtype(dtype) and self.ndim > 1: + # GH 18099/22869: columnwise conversion to extension dtype + # GH 24704: use iloc to handle duplicate column names + results = [ + self.iloc[:, i].astype(dtype, copy=copy) + for i in range(len(self.columns)) + ] + + else: + # else, only a single dtype is given + new_data = self._data.astype(dtype=dtype, copy=copy, errors=errors) + return self._constructor(new_data).__finalize__(self) + + # GH 19920: retain column metadata after concat + result = pd.concat(results, axis=1, copy=False) + result.columns = self.columns + return result + + def copy(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries: + """ + Make a copy of this object's indices and data. + + When ``deep=True`` (default), a new object will be created with a + copy of the calling object's data and indices. Modifications to + the data or indices of the copy will not be reflected in the + original object (see notes below). + + When ``deep=False``, a new object will be created without copying + the calling object's data or index (only references to the data + and index are copied). Any changes to the data of the original + will be reflected in the shallow copy (and vice versa). + + Parameters + ---------- + deep : bool, default True + Make a deep copy, including a copy of the data and the indices. + With ``deep=False`` neither the indices nor the data are copied. + + Returns + ------- + copy : Series or DataFrame + Object type matches caller. + + Notes + ----- + When ``deep=True``, data is copied but actual Python objects + will not be copied recursively, only the reference to the object. + This is in contrast to `copy.deepcopy` in the Standard Library, + which recursively copies object data (see examples below). + + While ``Index`` objects are copied when ``deep=True``, the underlying + numpy array is not copied for performance reasons. Since ``Index`` is + immutable, the underlying data can be safely shared and a copy + is not needed. + + Examples + -------- + >>> s = pd.Series([1, 2], index=["a", "b"]) + >>> s + a 1 + b 2 + dtype: int64 + + >>> s_copy = s.copy() + >>> s_copy + a 1 + b 2 + dtype: int64 + + **Shallow copy versus default (deep) copy:** + + >>> s = pd.Series([1, 2], index=["a", "b"]) + >>> deep = s.copy() + >>> shallow = s.copy(deep=False) + + Shallow copy shares data and index with original. + + >>> s is shallow + False + >>> s.values is shallow.values and s.index is shallow.index + True + + Deep copy has own copy of data and index. + + >>> s is deep + False + >>> s.values is deep.values or s.index is deep.index + False + + Updates to the data shared by shallow copy and original is reflected + in both; deep copy remains unchanged. + + >>> s[0] = 3 + >>> shallow[1] = 4 + >>> s + a 3 + b 4 + dtype: int64 + >>> shallow + a 3 + b 4 + dtype: int64 + >>> deep + a 1 + b 2 + dtype: int64 + + Note that when copying an object containing Python objects, a deep copy + will copy the data, but will not do so recursively. Updating a nested + data object will be reflected in the deep copy. + + >>> s = pd.Series([[1, 2], [3, 4]]) + >>> deep = s.copy() + >>> s[0][0] = 10 + >>> s + 0 [10, 2] + 1 [3, 4] + dtype: object + >>> deep + 0 [10, 2] + 1 [3, 4] + dtype: object + """ + data = self._data.copy(deep=deep) + return self._constructor(data).__finalize__(self) + + def __copy__(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries: + return self.copy(deep=deep) + + def __deepcopy__(self: FrameOrSeries, memo=None) -> FrameOrSeries: + """ + Parameters + ---------- + memo, default None + Standard signature. Unused + """ + return self.copy(deep=True) + + def _convert( + self: FrameOrSeries, + datetime: bool_t = False, + numeric: bool_t = False, + timedelta: bool_t = False, + coerce: bool_t = False, + copy: bool_t = True, + ) -> FrameOrSeries: + """ + Attempt to infer better dtype for object columns + + Parameters + ---------- + datetime : bool, default False + If True, convert to date where possible. + numeric : bool, default False + If True, attempt to convert to numbers (including strings), with + unconvertible values becoming NaN. + timedelta : bool, default False + If True, convert to timedelta where possible. + coerce : bool, default False + If True, force conversion with unconvertible values converted to + nulls (NaN or NaT). + copy : bool, default True + If True, return a copy even if no copy is necessary (e.g. no + conversion was done). Note: This is meant for internal use, and + should not be confused with inplace. + + Returns + ------- + converted : same as input object + """ + validate_bool_kwarg(datetime, "datetime") + validate_bool_kwarg(numeric, "numeric") + validate_bool_kwarg(timedelta, "timedelta") + validate_bool_kwarg(coerce, "coerce") + validate_bool_kwarg(copy, "copy") + return self._constructor( + self._data.convert( + datetime=datetime, + numeric=numeric, + timedelta=timedelta, + coerce=coerce, + copy=copy, + ) + ).__finalize__(self) + + def infer_objects(self: FrameOrSeries) -> FrameOrSeries: + """ + Attempt to infer better dtypes for object columns. + + Attempts soft conversion of object-dtyped + columns, leaving non-object and unconvertible + columns unchanged. The inference rules are the + same as during normal Series/DataFrame construction. + + .. versionadded:: 0.21.0 + + Returns + ------- + converted : same type as input object + + See Also + -------- + to_datetime : Convert argument to datetime. + to_timedelta : Convert argument to timedelta. + to_numeric : Convert argument to numeric type. + convert_dtypes : Convert argument to best possible dtype. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]}) + >>> df = df.iloc[1:] + >>> df + A + 1 1 + 2 2 + 3 3 + + >>> df.dtypes + A object + dtype: object + + >>> df.infer_objects().dtypes + A int64 + dtype: object + """ + # numeric=False necessary to only soft convert; + # python objects will still be converted to + # native numpy numeric types + return self._constructor( + self._data.convert( + datetime=True, numeric=False, timedelta=True, coerce=False, copy=True + ) + ).__finalize__(self) + + def convert_dtypes( + self: FrameOrSeries, + infer_objects: bool_t = True, + convert_string: bool_t = True, + convert_integer: bool_t = True, + convert_boolean: bool_t = True, + ) -> FrameOrSeries: + """ + Convert columns to best possible dtypes using dtypes supporting ``pd.NA``. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + infer_objects : bool, default True + Whether object dtypes should be converted to the best possible types. + convert_string : bool, default True + Whether object dtypes should be converted to ``StringDtype()``. + convert_integer : bool, default True + Whether, if possible, conversion can be done to integer extension types. + convert_boolean : bool, defaults True + Whether object dtypes should be converted to ``BooleanDtypes()``. + + Returns + ------- + Series or DataFrame + Copy of input object with new dtype. + + See Also + -------- + infer_objects : Infer dtypes of objects. + to_datetime : Convert argument to datetime. + to_timedelta : Convert argument to timedelta. + to_numeric : Convert argument to a numeric type. + + Notes + ----- + + By default, ``convert_dtypes`` will attempt to convert a Series (or each + Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options + ``convert_string``, ``convert_integer``, and ``convert_boolean``, it is + possible to turn off individual conversions to ``StringDtype``, the integer + extension types or ``BooleanDtype``, respectively. + + For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference + rules as during normal Series/DataFrame construction. Then, if possible, + convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer extension + type, otherwise leave as ``object``. + + If the dtype is integer, convert to an appropriate integer extension type. + + If the dtype is numeric, and consists of all integers, convert to an + appropriate integer extension type. + + In the future, as new dtypes are added that support ``pd.NA``, the results + of this method will change to support those new dtypes. + + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), + ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")), + ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")), + ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")), + ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")), + ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")), + ... } + ... ) + + Start with a DataFrame with default dtypes. + + >>> df + a b c d e f + 0 1 x True h 10.0 NaN + 1 2 y False i NaN 100.5 + 2 3 z NaN NaN 20.0 200.0 + + >>> df.dtypes + a int32 + b object + c object + d object + e float64 + f float64 + dtype: object + + Convert the DataFrame to use best possible dtypes. + + >>> dfn = df.convert_dtypes() + >>> dfn + a b c d e f + 0 1 x True h 10 NaN + 1 2 y False i 100.5 + 2 3 z 20 200.0 + + >>> dfn.dtypes + a Int32 + b string + c boolean + d string + e Int64 + f float64 + dtype: object + + Start with a Series of strings and missing data represented by ``np.nan``. + + >>> s = pd.Series(["a", "b", np.nan]) + >>> s + 0 a + 1 b + 2 NaN + dtype: object + + Obtain a Series with dtype ``StringDtype``. + + >>> s.convert_dtypes() + 0 a + 1 b + 2 + dtype: string + """ + if self.ndim == 1: + return self._convert_dtypes( + infer_objects, convert_string, convert_integer, convert_boolean + ) + else: + results = [ + col._convert_dtypes( + infer_objects, convert_string, convert_integer, convert_boolean + ) + for col_name, col in self.items() + ] + result = pd.concat(results, axis=1, copy=False) + return result + + # ---------------------------------------------------------------------- + # Filling NA's + + def fillna( + self: FrameOrSeries, + value=None, + method=None, + axis=None, + inplace: bool_t = False, + limit=None, + downcast=None, + ) -> Optional[FrameOrSeries]: + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + value : scalar, dict, Series, or DataFrame + Value to use to fill holes (e.g. 0), alternately a + dict/Series/DataFrame of values specifying which value to use for + each index (for a Series) or column (for a DataFrame). Values not + in the dict/Series/DataFrame will not be filled. This value cannot + be a list. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use next valid observation to fill gap. + axis : %(axes_single_arg)s + Axis along which to fill missing values. + inplace : bool, default False + If True, fill in-place. Note: this will modify any + other views on this object (e.g., a no-copy slice for a column in a + DataFrame). + limit : int, default None + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. Must be greater than 0 if not None. + downcast : dict, default is None + A dict of item->dtype of what to downcast if possible, + or the string 'infer' which will try to downcast to an appropriate + equal type (e.g. float64 to int64 if possible). + + Returns + ------- + %(klass)s or None + Object with missing values filled or None if ``inplace=True``. + + See Also + -------- + interpolate : Fill NaN values using interpolation. + reindex : Conform object to new index. + asfreq : Convert TimeSeries to specified frequency. + + Examples + -------- + >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], + ... [3, 4, np.nan, 1], + ... [np.nan, np.nan, np.nan, 5], + ... [np.nan, 3, np.nan, 4]], + ... columns=list('ABCD')) + >>> df + A B C D + 0 NaN 2.0 NaN 0 + 1 3.0 4.0 NaN 1 + 2 NaN NaN NaN 5 + 3 NaN 3.0 NaN 4 + + Replace all NaN elements with 0s. + + >>> df.fillna(0) + A B C D + 0 0.0 2.0 0.0 0 + 1 3.0 4.0 0.0 1 + 2 0.0 0.0 0.0 5 + 3 0.0 3.0 0.0 4 + + We can also propagate non-null values forward or backward. + + >>> df.fillna(method='ffill') + A B C D + 0 NaN 2.0 NaN 0 + 1 3.0 4.0 NaN 1 + 2 3.0 4.0 NaN 5 + 3 3.0 3.0 NaN 4 + + Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1, + 2, and 3 respectively. + + >>> values = {'A': 0, 'B': 1, 'C': 2, 'D': 3} + >>> df.fillna(value=values) + A B C D + 0 0.0 2.0 2.0 0 + 1 3.0 4.0 2.0 1 + 2 0.0 1.0 2.0 5 + 3 0.0 3.0 2.0 4 + + Only replace the first NaN element. + + >>> df.fillna(value=values, limit=1) + A B C D + 0 0.0 2.0 2.0 0 + 1 3.0 4.0 NaN 1 + 2 NaN 1.0 NaN 5 + 3 NaN 3.0 NaN 4 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + value, method = validate_fillna_kwargs(value, method) + + self._consolidate_inplace() + + # set the default here, so functions examining the signaure + # can detect if something was set (e.g. in groupby) (GH9221) + if axis is None: + axis = 0 + axis = self._get_axis_number(axis) + + if value is None: + + if self._is_mixed_type and axis == 1: + if inplace: + raise NotImplementedError() + result = self.T.fillna(method=method, limit=limit).T + + # need to downcast here because of all of the transposes + result._data = result._data.downcast() + + return result + + new_data = self._data.interpolate( + method=method, + axis=axis, + limit=limit, + inplace=inplace, + coerce=True, + downcast=downcast, + ) + else: + if len(self._get_axis(axis)) == 0: + return self + + if self.ndim == 1: + if isinstance(value, (dict, ABCSeries)): + value = create_series_with_explicit_dtype( + value, dtype_if_empty=object + ) + elif not is_list_like(value): + pass + else: + raise TypeError( + '"value" parameter must be a scalar, dict ' + "or Series, but you passed a " + f'"{type(value).__name__}"' + ) + + new_data = self._data.fillna( + value=value, limit=limit, inplace=inplace, downcast=downcast + ) + + elif isinstance(value, (dict, ABCSeries)): + if axis == 1: + raise NotImplementedError( + "Currently only can fill " + "with dict/Series column " + "by column" + ) + + result = self if inplace else self.copy() + for k, v in value.items(): + if k not in result: + continue + obj = result[k] + obj.fillna(v, limit=limit, inplace=True, downcast=downcast) + return result if not inplace else None + + elif not is_list_like(value): + new_data = self._data.fillna( + value=value, limit=limit, inplace=inplace, downcast=downcast + ) + elif isinstance(value, ABCDataFrame) and self.ndim == 2: + new_data = self.where(self.notna(), value) + else: + raise ValueError(f"invalid fill value with a {type(value)}") + + if inplace: + self._update_inplace(new_data) + return None + else: + return self._constructor(new_data).__finalize__(self) + + def ffill( + self: FrameOrSeries, + axis=None, + inplace: bool_t = False, + limit=None, + downcast=None, + ) -> Optional[FrameOrSeries]: + """ + Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. + + Returns + ------- + %(klass)s or None + Object with missing values filled or None if ``inplace=True``. + """ + return self.fillna( + method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast + ) + + def bfill( + self: FrameOrSeries, + axis=None, + inplace: bool_t = False, + limit=None, + downcast=None, + ) -> Optional[FrameOrSeries]: + """ + Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. + + Returns + ------- + %(klass)s or None + Object with missing values filled or None if ``inplace=True``. + """ + return self.fillna( + method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast + ) + + _shared_docs[ + "replace" + ] = """ + Replace values given in `to_replace` with `value`. + + Values of the %(klass)s are replaced with other values dynamically. + This differs from updating with ``.loc`` or ``.iloc``, which require + you to specify a location to update with some value. + + Parameters + ---------- + to_replace : str, regex, list, dict, Series, int, float, or None + How to find the values that will be replaced. + + * numeric, str or regex: + + - numeric: numeric values equal to `to_replace` will be + replaced with `value` + - str: string exactly matching `to_replace` will be replaced + with `value` + - regex: regexs matching `to_replace` will be replaced with + `value` + + * list of str, regex, or numeric: + + - First, if `to_replace` and `value` are both lists, they + **must** be the same length. + - Second, if ``regex=True`` then all of the strings in **both** + lists will be interpreted as regexs otherwise they will match + directly. This doesn't matter much for `value` since there + are only a few possible substitution regexes you can use. + - str, regex and numeric rules apply as above. + + * dict: + + - Dicts can be used to specify different replacement values + for different existing values. For example, + ``{'a': 'b', 'y': 'z'}`` replaces the value 'a' with 'b' and + 'y' with 'z'. To use a dict in this way the `value` + parameter should be `None`. + - For a DataFrame a dict can specify that different values + should be replaced in different columns. For example, + ``{'a': 1, 'b': 'z'}`` looks for the value 1 in column 'a' + and the value 'z' in column 'b' and replaces these values + with whatever is specified in `value`. The `value` parameter + should not be ``None`` in this case. You can treat this as a + special case of passing two lists except that you are + specifying the column to search in. + - For a DataFrame nested dictionaries, e.g., + ``{'a': {'b': np.nan}}``, are read as follows: look in column + 'a' for the value 'b' and replace it with NaN. The `value` + parameter should be ``None`` to use a nested dict in this + way. You can nest regular expressions as well. Note that + column names (the top-level dictionary keys in a nested + dictionary) **cannot** be regular expressions. + + * None: + + - This means that the `regex` argument must be a string, + compiled regular expression, or list, dict, ndarray or + Series of such elements. If `value` is also ``None`` then + this **must** be a nested dictionary or Series. + + See the examples section for examples of each of these. + value : scalar, dict, list, str, regex, default None + Value to replace any values matching `to_replace` with. + For a DataFrame a dict of values can be used to specify which + value to use for each column (columns not in the dict will not be + filled). Regular expressions, strings and lists or dicts of such + objects are also allowed. + inplace : bool, default False + If True, in place. Note: this will modify any + other views on this object (e.g. a column from a DataFrame). + Returns the caller if this is True. + limit : int, default None + Maximum size gap to forward or backward fill. + regex : bool or same types as `to_replace`, default False + Whether to interpret `to_replace` and/or `value` as regular + expressions. If this is ``True`` then `to_replace` *must* be a + string. Alternatively, this could be a regular expression or a + list, dict, or array of regular expressions in which case + `to_replace` must be ``None``. + method : {'pad', 'ffill', 'bfill', `None`} + The method to use when for replacement, when `to_replace` is a + scalar, list or tuple and `value` is ``None``. + + .. versionchanged:: 0.23.0 + Added to DataFrame. + + Returns + ------- + %(klass)s + Object after replacement. + + Raises + ------ + AssertionError + * If `regex` is not a ``bool`` and `to_replace` is not + ``None``. + TypeError + * If `to_replace` is a ``dict`` and `value` is not a ``list``, + ``dict``, ``ndarray``, or ``Series`` + * If `to_replace` is ``None`` and `regex` is not compilable + into a regular expression or is a list, dict, ndarray, or + Series. + * When replacing multiple ``bool`` or ``datetime64`` objects and + the arguments to `to_replace` does not match the type of the + value being replaced + ValueError + * If a ``list`` or an ``ndarray`` is passed to `to_replace` and + `value` but they are not the same length. + + See Also + -------- + %(klass)s.fillna : Fill NA values. + %(klass)s.where : Replace values based on boolean condition. + Series.str.replace : Simple string replacement. + + Notes + ----- + * Regex substitution is performed under the hood with ``re.sub``. The + rules for substitution for ``re.sub`` are the same. + * Regular expressions will only substitute on strings, meaning you + cannot provide, for example, a regular expression matching floating + point numbers and expect the columns in your frame that have a + numeric dtype to be matched. However, if those floating point + numbers *are* strings, then you can do this. + * This method has *a lot* of options. You are encouraged to experiment + and play with this method to gain intuition about how it works. + * When dict is used as the `to_replace` value, it is like + key(s) in the dict are the to_replace part and + value(s) in the dict are the value parameter. + + Examples + -------- + + **Scalar `to_replace` and `value`** + + >>> s = pd.Series([0, 1, 2, 3, 4]) + >>> s.replace(0, 5) + 0 5 + 1 1 + 2 2 + 3 3 + 4 4 + dtype: int64 + + >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4], + ... 'B': [5, 6, 7, 8, 9], + ... 'C': ['a', 'b', 'c', 'd', 'e']}) + >>> df.replace(0, 5) + A B C + 0 5 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + **List-like `to_replace`** + + >>> df.replace([0, 1, 2, 3], 4) + A B C + 0 4 5 a + 1 4 6 b + 2 4 7 c + 3 4 8 d + 4 4 9 e + + >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1]) + A B C + 0 4 5 a + 1 3 6 b + 2 2 7 c + 3 1 8 d + 4 4 9 e + + >>> s.replace([1, 2], method='bfill') + 0 0 + 1 3 + 2 3 + 3 3 + 4 4 + dtype: int64 + + **dict-like `to_replace`** + + >>> df.replace({0: 10, 1: 100}) + A B C + 0 10 5 a + 1 100 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({'A': 0, 'B': 5}, 100) + A B C + 0 100 100 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 4 9 e + + >>> df.replace({'A': {0: 100, 4: 400}}) + A B C + 0 100 5 a + 1 1 6 b + 2 2 7 c + 3 3 8 d + 4 400 9 e + + **Regular expression `to_replace`** + + >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'], + ... 'B': ['abc', 'bar', 'xyz']}) + >>> df.replace(to_replace=r'^ba.$', value='new', regex=True) + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True) + A B + 0 new abc + 1 foo bar + 2 bait xyz + + >>> df.replace(regex=r'^ba.$', value='new') + A B + 0 new abc + 1 foo new + 2 bait xyz + + >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'}) + A B + 0 new abc + 1 xyz new + 2 bait xyz + + >>> df.replace(regex=[r'^ba.$', 'foo'], value='new') + A B + 0 new abc + 1 new new + 2 bait xyz + + Note that when replacing multiple ``bool`` or ``datetime64`` objects, + the data types in the `to_replace` parameter must match the data + type of the value being replaced: + + >>> df = pd.DataFrame({'A': [True, False, True], + ... 'B': [False, True, False]}) + >>> df.replace({'a string': 'new value', True: False}) # raises + Traceback (most recent call last): + ... + TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' + + This raises a ``TypeError`` because one of the ``dict`` keys is not of + the correct type for replacement. + + Compare the behavior of ``s.replace({'a': None})`` and + ``s.replace('a', None)`` to understand the peculiarities + of the `to_replace` parameter: + + >>> s = pd.Series([10, 'a', 'a', 'b', 'a']) + + When one uses a dict as the `to_replace` value, it is like the + value(s) in the dict are equal to the `value` parameter. + ``s.replace({'a': None})`` is equivalent to + ``s.replace(to_replace={'a': None}, value=None, method=None)``: + + >>> s.replace({'a': None}) + 0 10 + 1 None + 2 None + 3 b + 4 None + dtype: object + + When ``value=None`` and `to_replace` is a scalar, list or + tuple, `replace` uses the method parameter (default 'pad') to do the + replacement. So this is why the 'a' values are being replaced by 10 + in rows 1 and 2 and 'b' in row 4 in this case. + The command ``s.replace('a', None)`` is actually equivalent to + ``s.replace(to_replace='a', value=None, method='pad')``: + + >>> s.replace('a', None) + 0 10 + 1 10 + 2 10 + 3 b + 4 b + dtype: object + """ + + @Appender(_shared_docs["replace"] % _shared_doc_kwargs) + def replace( + self, + to_replace=None, + value=None, + inplace=False, + limit=None, + regex=False, + method="pad", + ): + inplace = validate_bool_kwarg(inplace, "inplace") + if not is_bool(regex) and to_replace is not None: + raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool") + + self._consolidate_inplace() + + if value is None: + # passing a single value that is scalar like + # when value is None (GH5319), for compat + if not is_dict_like(to_replace) and not is_dict_like(regex): + to_replace = [to_replace] + + if isinstance(to_replace, (tuple, list)): + if isinstance(self, ABCDataFrame): + return self.apply( + _single_replace, args=(to_replace, method, inplace, limit) + ) + return _single_replace(self, to_replace, method, inplace, limit) + + if not is_dict_like(to_replace): + if not is_dict_like(regex): + raise TypeError( + 'If "to_replace" and "value" are both None ' + 'and "to_replace" is not a list, then ' + "regex must be a mapping" + ) + to_replace = regex + regex = True + + items = list(to_replace.items()) + keys, values = zip(*items) if items else ([], []) + + are_mappings = [is_dict_like(v) for v in values] + + if any(are_mappings): + if not all(are_mappings): + raise TypeError( + "If a nested mapping is passed, all values " + "of the top level mapping must be mappings" + ) + # passed a nested dict/Series + to_rep_dict = {} + value_dict = {} + + for k, v in items: + keys, values = list(zip(*v.items())) or ([], []) + + to_rep_dict[k] = list(keys) + value_dict[k] = list(values) + + to_replace, value = to_rep_dict, value_dict + else: + to_replace, value = keys, values + + return self.replace( + to_replace, value, inplace=inplace, limit=limit, regex=regex + ) + else: + + # need a non-zero len on all axes + if not self.size: + return self + + new_data = self._data + if is_dict_like(to_replace): + if is_dict_like(value): # {'A' : NA} -> {'A' : 0} + res = self if inplace else self.copy() + for c, src in to_replace.items(): + if c in value and c in self: + # object conversion is handled in + # series.replace which is called recursively + res[c] = res[c].replace( + to_replace=src, + value=value[c], + inplace=False, + regex=regex, + ) + return None if inplace else res + + # {'A': NA} -> 0 + elif not is_list_like(value): + keys = [(k, src) for k, src in to_replace.items() if k in self] + keys_len = len(keys) - 1 + for i, (k, src) in enumerate(keys): + convert = i == keys_len + new_data = new_data.replace( + to_replace=src, + value=value, + filter=[k], + inplace=inplace, + regex=regex, + convert=convert, + ) + else: + raise TypeError("value argument must be scalar, dict, or Series") + + elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] + if is_list_like(value): + if len(to_replace) != len(value): + raise ValueError( + f"Replacement lists must match in length. " + f"Expecting {len(to_replace)} got {len(value)} " + ) + + new_data = self._data.replace_list( + src_list=to_replace, + dest_list=value, + inplace=inplace, + regex=regex, + ) + + else: # [NA, ''] -> 0 + new_data = self._data.replace( + to_replace=to_replace, value=value, inplace=inplace, regex=regex + ) + elif to_replace is None: + if not ( + is_re_compilable(regex) + or is_list_like(regex) + or is_dict_like(regex) + ): + raise TypeError( + f"'regex' must be a string or a compiled regular expression " + f"or a list or dict of strings or regular expressions, " + f"you passed a {repr(type(regex).__name__)}" + ) + return self.replace( + regex, value, inplace=inplace, limit=limit, regex=True + ) + else: + + # dest iterable dict-like + if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} + new_data = self._data + + for k, v in value.items(): + if k in self: + new_data = new_data.replace( + to_replace=to_replace, + value=v, + filter=[k], + inplace=inplace, + regex=regex, + ) + + elif not is_list_like(value): # NA -> 0 + new_data = self._data.replace( + to_replace=to_replace, value=value, inplace=inplace, regex=regex + ) + else: + raise TypeError( + f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}' + ) + + if inplace: + self._update_inplace(new_data) + else: + return self._constructor(new_data).__finalize__(self) + + _shared_docs[ + "interpolate" + ] = """ + Please note that only ``method='linear'`` is supported for + DataFrame/Series with a MultiIndex. + + Parameters + ---------- + method : str, default 'linear' + Interpolation technique to use. One of: + + * 'linear': Ignore the index and treat the values as equally + spaced. This is the only method supported on MultiIndexes. + * 'time': Works on daily and higher resolution data to interpolate + given length of interval. + * 'index', 'values': use the actual numerical values of the index. + * 'pad': Fill in NaNs using existing values. + * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'spline', + 'barycentric', 'polynomial': Passed to + `scipy.interpolate.interp1d`. These methods use the numerical + values of the index. Both 'polynomial' and 'spline' require that + you also specify an `order` (int), e.g. + ``df.interpolate(method='polynomial', order=5)``. + * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima': + Wrappers around the SciPy interpolation methods of similar + names. See `Notes`. + * 'from_derivatives': Refers to + `scipy.interpolate.BPoly.from_derivatives` which + replaces 'piecewise_polynomial' interpolation method in + scipy 0.18. + axis : {0 or 'index', 1 or 'columns', None}, default None + Axis to interpolate along. + limit : int, optional + Maximum number of consecutive NaNs to fill. Must be greater than + 0. + inplace : bool, default False + Update the data in place if possible. + limit_direction : {'forward', 'backward', 'both'}, default 'forward' + If limit is specified, consecutive NaNs will be filled in this + direction. + limit_area : {`None`, 'inside', 'outside'}, default None + If limit is specified, consecutive NaNs will be filled with this + restriction. + + * ``None``: No fill restriction. + * 'inside': Only fill NaNs surrounded by valid values + (interpolate). + * 'outside': Only fill NaNs outside valid values (extrapolate). + + .. versionadded:: 0.23.0 + + downcast : optional, 'infer' or None, defaults to None + Downcast dtypes if possible. + **kwargs + Keyword arguments to pass on to the interpolating function. + + Returns + ------- + Series or DataFrame + Returns the same object type as the caller, interpolated at + some or all ``NaN`` values. + + See Also + -------- + fillna : Fill missing values using different methods. + scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials + (Akima interpolator). + scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the + Bernstein basis. + scipy.interpolate.interp1d : Interpolate a 1-D function. + scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh + interpolator). + scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic + interpolation. + scipy.interpolate.CubicSpline : Cubic spline data interpolator. + + Notes + ----- + The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima' + methods are wrappers around the respective SciPy implementations of + similar names. These use the actual numerical values of the index. + For more information on their behavior, see the + `SciPy documentation + `__ + and `SciPy tutorial + `__. + + Examples + -------- + Filling in ``NaN`` in a :class:`~pandas.Series` via linear + interpolation. + + >>> s = pd.Series([0, 1, np.nan, 3]) + >>> s + 0 0.0 + 1 1.0 + 2 NaN + 3 3.0 + dtype: float64 + >>> s.interpolate() + 0 0.0 + 1 1.0 + 2 2.0 + 3 3.0 + dtype: float64 + + Filling in ``NaN`` in a Series by padding, but filling at most two + consecutive ``NaN`` at a time. + + >>> s = pd.Series([np.nan, "single_one", np.nan, + ... "fill_two_more", np.nan, np.nan, np.nan, + ... 4.71, np.nan]) + >>> s + 0 NaN + 1 single_one + 2 NaN + 3 fill_two_more + 4 NaN + 5 NaN + 6 NaN + 7 4.71 + 8 NaN + dtype: object + >>> s.interpolate(method='pad', limit=2) + 0 NaN + 1 single_one + 2 single_one + 3 fill_two_more + 4 fill_two_more + 5 fill_two_more + 6 NaN + 7 4.71 + 8 4.71 + dtype: object + + Filling in ``NaN`` in a Series via polynomial interpolation or splines: + Both 'polynomial' and 'spline' methods require that you also specify + an ``order`` (int). + + >>> s = pd.Series([0, 2, np.nan, 8]) + >>> s.interpolate(method='polynomial', order=2) + 0 0.000000 + 1 2.000000 + 2 4.666667 + 3 8.000000 + dtype: float64 + + Fill the DataFrame forward (that is, going down) along each column + using linear interpolation. + + Note how the last entry in column 'a' is interpolated differently, + because there is no entry after it to use for interpolation. + Note how the first entry in column 'b' remains ``NaN``, because there + is no entry before it to use for interpolation. + + >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), + ... (np.nan, 2.0, np.nan, np.nan), + ... (2.0, 3.0, np.nan, 9.0), + ... (np.nan, 4.0, -4.0, 16.0)], + ... columns=list('abcd')) + >>> df + a b c d + 0 0.0 NaN -1.0 1.0 + 1 NaN 2.0 NaN NaN + 2 2.0 3.0 NaN 9.0 + 3 NaN 4.0 -4.0 16.0 + >>> df.interpolate(method='linear', limit_direction='forward', axis=0) + a b c d + 0 0.0 NaN -1.0 1.0 + 1 1.0 2.0 -2.0 5.0 + 2 2.0 3.0 -3.0 9.0 + 3 2.0 4.0 -4.0 16.0 + + Using polynomial interpolation. + + >>> df['d'].interpolate(method='polynomial', order=2) + 0 1.0 + 1 4.0 + 2 9.0 + 3 16.0 + Name: d, dtype: float64 + """ + + @Appender(_shared_docs["interpolate"] % _shared_doc_kwargs) + def interpolate( + self, + method="linear", + axis=0, + limit=None, + inplace=False, + limit_direction="forward", + limit_area=None, + downcast=None, + **kwargs, + ): + """ + Interpolate values according to different methods. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + axis = self._get_axis_number(axis) + + if axis == 0: + ax = self._info_axis_name + _maybe_transposed_self = self + elif axis == 1: + _maybe_transposed_self = self.T + ax = 1 + + ax = _maybe_transposed_self._get_axis_number(ax) + + if _maybe_transposed_self.ndim == 2: + alt_ax = 1 - ax + else: + alt_ax = ax + + if isinstance(_maybe_transposed_self.index, MultiIndex) and method != "linear": + raise ValueError( + "Only `method=linear` interpolation is supported on MultiIndexes." + ) + + if _maybe_transposed_self._data.get_dtype_counts().get("object") == len( + _maybe_transposed_self.T + ): + raise TypeError( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + + # create/use the index + if method == "linear": + # prior default + index = np.arange(len(_maybe_transposed_self._get_axis(alt_ax))) + else: + index = _maybe_transposed_self._get_axis(alt_ax) + methods = {"index", "values", "nearest", "time"} + is_numeric_or_datetime = ( + is_numeric_dtype(index) + or is_datetime64_any_dtype(index) + or is_timedelta64_dtype(index) + ) + if method not in methods and not is_numeric_or_datetime: + raise ValueError( + "Index column must be numeric or datetime type when " + f"using {method} method other than linear. " + "Try setting a numeric or datetime index column before " + "interpolating." + ) + + if isna(index).any(): + raise NotImplementedError( + "Interpolation with NaNs in the index " + "has not been implemented. Try filling " + "those NaNs before interpolating." + ) + data = _maybe_transposed_self._data + new_data = data.interpolate( + method=method, + axis=ax, + index=index, + values=_maybe_transposed_self, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + inplace=inplace, + downcast=downcast, + **kwargs, + ) + + if inplace: + if axis == 1: + new_data = self._constructor(new_data).T._data + self._update_inplace(new_data) + else: + res = self._constructor(new_data).__finalize__(self) + if axis == 1: + res = res.T + return res + + # ---------------------------------------------------------------------- + # Timeseries methods Methods + + def asof(self, where, subset=None): + """ + Return the last row(s) without any NaNs before `where`. + + The last row (for each element in `where`, if list) without any + NaN is taken. + In case of a :class:`~pandas.DataFrame`, the last row without NaN + considering only the subset of columns (if not `None`) + + If there is no good value, NaN is returned for a Series or + a Series of NaN values for a DataFrame + + Parameters + ---------- + where : date or array-like of dates + Date(s) before which the last row(s) are returned. + subset : str or array-like of str, default `None` + For DataFrame, if not `None`, only use these columns to + check for NaNs. + + Returns + ------- + scalar, Series, or DataFrame + + The return can be: + + * scalar : when `self` is a Series and `where` is a scalar + * Series: when `self` is a Series and `where` is an array-like, + or when `self` is a DataFrame and `where` is a scalar + * DataFrame : when `self` is a DataFrame and `where` is an + array-like + + Return scalar, Series, or DataFrame. + + See Also + -------- + merge_asof : Perform an asof merge. Similar to left join. + + Notes + ----- + Dates are assumed to be sorted. Raises if this is not the case. + + Examples + -------- + A Series and a scalar `where`. + + >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40]) + >>> s + 10 1.0 + 20 2.0 + 30 NaN + 40 4.0 + dtype: float64 + + >>> s.asof(20) + 2.0 + + For a sequence `where`, a Series is returned. The first value is + NaN, because the first element of `where` is before the first + index value. + + >>> s.asof([5, 20]) + 5 NaN + 20 2.0 + dtype: float64 + + Missing values are not considered. The following is ``2.0``, not + NaN, even though NaN is at the index location for ``30``. + + >>> s.asof(30) + 2.0 + + Take all columns into consideration + + >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50], + ... 'b': [None, None, None, None, 500]}, + ... index=pd.DatetimeIndex(['2018-02-27 09:01:00', + ... '2018-02-27 09:02:00', + ... '2018-02-27 09:03:00', + ... '2018-02-27 09:04:00', + ... '2018-02-27 09:05:00'])) + >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', + ... '2018-02-27 09:04:30'])) + a b + 2018-02-27 09:03:30 NaN NaN + 2018-02-27 09:04:30 NaN NaN + + Take a single column into consideration + + >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', + ... '2018-02-27 09:04:30']), + ... subset=['a']) + a b + 2018-02-27 09:03:30 30.0 NaN + 2018-02-27 09:04:30 40.0 NaN + """ + if isinstance(where, str): + where = Timestamp(where) + + if not self.index.is_monotonic: + raise ValueError("asof requires a sorted index") + + is_series = isinstance(self, ABCSeries) + if is_series: + if subset is not None: + raise ValueError("subset is not valid for Series") + else: + if subset is None: + subset = self.columns + if not is_list_like(subset): + subset = [subset] + + is_list = is_list_like(where) + if not is_list: + start = self.index[0] + if isinstance(self.index, PeriodIndex): + where = Period(where, freq=self.index.freq) + + if where < start: + if not is_series: + from pandas import Series + + return Series(index=self.columns, name=where, dtype=np.float64) + return np.nan + + # It's always much faster to use a *while* loop here for + # Series than pre-computing all the NAs. However a + # *while* loop is extremely expensive for DataFrame + # so we later pre-compute all the NAs and use the same + # code path whether *where* is a scalar or list. + # See PR: https://github.com/pandas-dev/pandas/pull/14476 + if is_series: + loc = self.index.searchsorted(where, side="right") + if loc > 0: + loc -= 1 + + values = self._values + while loc > 0 and isna(values[loc]): + loc -= 1 + return values[loc] + + if not isinstance(where, Index): + where = Index(where) if is_list else Index([where]) + + nulls = self.isna() if is_series else self[subset].isna().any(1) + if nulls.all(): + if is_series: + return self._constructor(np.nan, index=where, name=self.name) + elif is_list: + from pandas import DataFrame + + return DataFrame(np.nan, index=where, columns=self.columns) + else: + from pandas import Series + + return Series(np.nan, index=self.columns, name=where[0]) + + locs = self.index.asof_locs(where, ~(nulls.values)) + + # mask the missing + missing = locs == -1 + data = self.take(locs) + data.index = where + data.loc[missing] = np.nan + return data if is_list else data.iloc[-1] + + # ---------------------------------------------------------------------- + # Action Methods + + _shared_docs[ + "isna" + ] = """ + Detect missing values. + + Return a boolean same-sized object indicating if the values are NA. + NA values, such as None or :attr:`numpy.NaN`, gets mapped to True + values. + Everything else gets mapped to False values. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values + (unless you set ``pandas.options.mode.use_inf_as_na = True``). + + Returns + ------- + %(klass)s + Mask of bool values for each element in %(klass)s that + indicates whether an element is not an NA value. + + See Also + -------- + %(klass)s.isnull : Alias of isna. + %(klass)s.notna : Boolean inverse of isna. + %(klass)s.dropna : Omit axes labels with missing values. + isna : Top-level isna. + + Examples + -------- + Show which entries in a DataFrame are NA. + + >>> df = pd.DataFrame({'age': [5, 6, np.NaN], + ... 'born': [pd.NaT, pd.Timestamp('1939-05-27'), + ... pd.Timestamp('1940-04-25')], + ... 'name': ['Alfred', 'Batman', ''], + ... 'toy': [None, 'Batmobile', 'Joker']}) + >>> df + age born name toy + 0 5.0 NaT Alfred None + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.isna() + age born name toy + 0 False True False True + 1 False False False False + 2 True False False False + + Show which entries in a Series are NA. + + >>> ser = pd.Series([5, 6, np.NaN]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.isna() + 0 False + 1 False + 2 True + dtype: bool + """ + + @Appender(_shared_docs["isna"] % _shared_doc_kwargs) + def isna(self: FrameOrSeries) -> FrameOrSeries: + return isna(self).__finalize__(self) + + @Appender(_shared_docs["isna"] % _shared_doc_kwargs) + def isnull(self: FrameOrSeries) -> FrameOrSeries: + return isna(self).__finalize__(self) + + _shared_docs[ + "notna" + ] = """ + Detect existing (non-missing) values. + + Return a boolean same-sized object indicating if the values are not NA. + Non-missing values get mapped to True. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values + (unless you set ``pandas.options.mode.use_inf_as_na = True``). + NA values, such as None or :attr:`numpy.NaN`, get mapped to False + values. + + Returns + ------- + %(klass)s + Mask of bool values for each element in %(klass)s that + indicates whether an element is not an NA value. + + See Also + -------- + %(klass)s.notnull : Alias of notna. + %(klass)s.isna : Boolean inverse of notna. + %(klass)s.dropna : Omit axes labels with missing values. + notna : Top-level notna. + + Examples + -------- + Show which entries in a DataFrame are not NA. + + >>> df = pd.DataFrame({'age': [5, 6, np.NaN], + ... 'born': [pd.NaT, pd.Timestamp('1939-05-27'), + ... pd.Timestamp('1940-04-25')], + ... 'name': ['Alfred', 'Batman', ''], + ... 'toy': [None, 'Batmobile', 'Joker']}) + >>> df + age born name toy + 0 5.0 NaT Alfred None + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.notna() + age born name toy + 0 True False True False + 1 True True True True + 2 False True True True + + Show which entries in a Series are not NA. + + >>> ser = pd.Series([5, 6, np.NaN]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.notna() + 0 True + 1 True + 2 False + dtype: bool + """ + + @Appender(_shared_docs["notna"] % _shared_doc_kwargs) + def notna(self: FrameOrSeries) -> FrameOrSeries: + return notna(self).__finalize__(self) + + @Appender(_shared_docs["notna"] % _shared_doc_kwargs) + def notnull(self: FrameOrSeries) -> FrameOrSeries: + return notna(self).__finalize__(self) + + def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): + if (lower is not None and np.any(isna(lower))) or ( + upper is not None and np.any(isna(upper)) + ): + raise ValueError("Cannot use an NA value as a clip threshold") + + result = self + mask = isna(self.values) + + with np.errstate(all="ignore"): + if upper is not None: + subset = self.to_numpy() <= upper + result = result.where(subset, upper, axis=None, inplace=False) + if lower is not None: + subset = self.to_numpy() >= lower + result = result.where(subset, lower, axis=None, inplace=False) + + if np.any(mask): + result[mask] = np.nan + + if inplace: + self._update_inplace(result) + else: + return result + + def _clip_with_one_bound(self, threshold, method, axis, inplace): + + if axis is not None: + axis = self._get_axis_number(axis) + + # method is self.le for upper bound and self.ge for lower bound + if is_scalar(threshold) and is_number(threshold): + if method.__name__ == "le": + return self._clip_with_scalar(None, threshold, inplace=inplace) + return self._clip_with_scalar(threshold, None, inplace=inplace) + + subset = method(threshold, axis=axis) | isna(self) + + # GH #15390 + # In order for where method to work, the threshold must + # be transformed to NDFrame from other array like structure. + if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold): + if isinstance(self, ABCSeries): + threshold = self._constructor(threshold, index=self.index) + else: + threshold = _align_method_FRAME(self, threshold, axis) + return self.where(subset, threshold, axis=axis, inplace=inplace) + + def clip( + self: FrameOrSeries, + lower=None, + upper=None, + axis=None, + inplace: bool_t = False, + *args, + **kwargs, + ) -> FrameOrSeries: + """ + Trim values at input threshold(s). + + Assigns values outside boundary to boundary values. Thresholds + can be singular values or array like, and in the latter case + the clipping is performed element-wise in the specified axis. + + Parameters + ---------- + lower : float or array_like, default None + Minimum threshold value. All values below this + threshold will be set to it. + upper : float or array_like, default None + Maximum threshold value. All values above this + threshold will be set to it. + axis : int or str axis name, optional + Align object with lower and upper along the given axis. + inplace : bool, default False + Whether to perform the operation in place on the data. + + .. versionadded:: 0.21.0 + *args, **kwargs + Additional keywords have no effect but might be accepted + for compatibility with numpy. + + Returns + ------- + Series or DataFrame + Same type as calling object with the values outside the + clip boundaries replaced. + + Examples + -------- + >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]} + >>> df = pd.DataFrame(data) + >>> df + col_0 col_1 + 0 9 -2 + 1 -3 -7 + 2 0 6 + 3 -1 8 + 4 5 -5 + + Clips per column using lower and upper thresholds: + + >>> df.clip(-4, 6) + col_0 col_1 + 0 6 -2 + 1 -3 -4 + 2 0 6 + 3 -1 6 + 4 5 -4 + + Clips using specific lower and upper thresholds per column element: + + >>> t = pd.Series([2, -4, -1, 6, 3]) + >>> t + 0 2 + 1 -4 + 2 -1 + 3 6 + 4 3 + dtype: int64 + + >>> df.clip(t, t + 4, axis=0) + col_0 col_1 + 0 6 2 + 1 -3 -4 + 2 0 3 + 3 6 8 + 4 5 3 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + axis = nv.validate_clip_with_axis(axis, args, kwargs) + if axis is not None: + axis = self._get_axis_number(axis) + + # GH 17276 + # numpy doesn't like NaN as a clip value + # so ignore + # GH 19992 + # numpy doesn't drop a list-like bound containing NaN + if not is_list_like(lower) and np.any(isna(lower)): + lower = None + if not is_list_like(upper) and np.any(isna(upper)): + upper = None + + # GH 2747 (arguments were reversed) + if lower is not None and upper is not None: + if is_scalar(lower) and is_scalar(upper): + lower, upper = min(lower, upper), max(lower, upper) + + # fast-path for scalars + if (lower is None or (is_scalar(lower) and is_number(lower))) and ( + upper is None or (is_scalar(upper) and is_number(upper)) + ): + return self._clip_with_scalar(lower, upper, inplace=inplace) + + result = self + if lower is not None: + result = result._clip_with_one_bound( + lower, method=self.ge, axis=axis, inplace=inplace + ) + if upper is not None: + if inplace: + result = self + result = result._clip_with_one_bound( + upper, method=self.le, axis=axis, inplace=inplace + ) + + return result + + _shared_docs[ + "groupby" + ] = """ + Group %(klass)s using a mapper or by a Series of columns. + + A groupby operation involves some combination of splitting the + object, applying a function, and combining the results. This can be + used to group large amounts of data and compute operations on these + groups. + + Parameters + ---------- + by : mapping, function, label, or list of labels + Used to determine the groups for the groupby. + If ``by`` is a function, it's called on each value of the object's + index. If a dict or Series is passed, the Series or dict VALUES + will be used to determine the groups (the Series' values are first + aligned; see ``.align()`` method). If an ndarray is passed, the + values are used as-is determine the groups. A label or list of + labels may be passed to group by the columns in ``self``. Notice + that a tuple is interpreted as a (single) key. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Split along rows (0) or columns (1). + level : int, level name, or sequence of such, default None + If the axis is a MultiIndex (hierarchical), group by a particular + level or levels. + as_index : bool, default True + For aggregated output, return object with group labels as the + index. Only relevant for DataFrame input. as_index=False is + effectively "SQL-style" grouped output. + sort : bool, default True + Sort group keys. Get better performance by turning this off. + Note this does not influence the order of observations within each + group. Groupby preserves the order of rows within each group. + group_keys : bool, default True + When calling apply, add group keys to index to identify pieces. + squeeze : bool, default False + Reduce the dimensionality of the return type if possible, + otherwise return a consistent type. + observed : bool, default False + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. + + .. versionadded:: 0.23.0 + + Returns + ------- + %(klass)sGroupBy + Returns a groupby object that contains information about the groups. + + See Also + -------- + resample : Convenience method for frequency conversion and resampling + of time series. + + Notes + ----- + See the `user guide + `_ for more. + """ + + def asfreq( + self: FrameOrSeries, + freq, + method=None, + how: Optional[str] = None, + normalize: bool_t = False, + fill_value=None, + ) -> FrameOrSeries: + """ + Convert TimeSeries to specified frequency. + + Optionally provide filling method to pad/backfill missing values. + + Returns the original data conformed to a new index with the specified + frequency. ``resample`` is more appropriate if an operation, such as + summarization, is necessary to represent the data at the new frequency. + + Parameters + ---------- + freq : DateOffset or str + method : {'backfill'/'bfill', 'pad'/'ffill'}, default None + Method to use for filling holes in reindexed Series (note this + does not fill NaNs that already were present): + + * 'pad' / 'ffill': propagate last valid observation forward to next + valid + * 'backfill' / 'bfill': use NEXT valid observation to fill. + how : {'start', 'end'}, default end + For PeriodIndex only (see PeriodIndex.asfreq). + normalize : bool, default False + Whether to reset output index to midnight. + fill_value : scalar, optional + Value to use for missing values, applied during upsampling (note + this does not fill NaNs that already were present). + + Returns + ------- + converted : same type as caller + + See Also + -------- + reindex + + Notes + ----- + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + + Start by creating a series with 4 one minute timestamps. + + >>> index = pd.date_range('1/1/2000', periods=4, freq='T') + >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index) + >>> df = pd.DataFrame({'s':series}) + >>> df + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:01:00 NaN + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:03:00 3.0 + + Upsample the series into 30 second bins. + + >>> df.asfreq(freq='30S') + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 NaN + 2000-01-01 00:01:00 NaN + 2000-01-01 00:01:30 NaN + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:02:30 NaN + 2000-01-01 00:03:00 3.0 + + Upsample again, providing a ``fill value``. + + >>> df.asfreq(freq='30S', fill_value=9.0) + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 9.0 + 2000-01-01 00:01:00 NaN + 2000-01-01 00:01:30 9.0 + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:02:30 9.0 + 2000-01-01 00:03:00 3.0 + + Upsample again, providing a ``method``. + + >>> df.asfreq(freq='30S', method='bfill') + s + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 NaN + 2000-01-01 00:01:00 NaN + 2000-01-01 00:01:30 2.0 + 2000-01-01 00:02:00 2.0 + 2000-01-01 00:02:30 3.0 + 2000-01-01 00:03:00 3.0 + """ + from pandas.core.resample import asfreq + + return asfreq( + self, + freq, + method=method, + how=how, + normalize=normalize, + fill_value=fill_value, + ) + + def at_time( + self: FrameOrSeries, time, asof: bool_t = False, axis=None + ) -> FrameOrSeries: + """ + Select values at particular time of day (e.g. 9:30AM). + + Parameters + ---------- + time : datetime.time or str + axis : {0 or 'index', 1 or 'columns'}, default 0 + + .. versionadded:: 0.24.0 + + Returns + ------- + Series or DataFrame + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + between_time : Select values between particular times of the day. + first : Select initial periods of time series based on a date offset. + last : Select final periods of time series based on a date offset. + DatetimeIndex.indexer_at_time : Get just the index locations for + values at particular time of the day. + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='12H') + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> ts + A + 2018-04-09 00:00:00 1 + 2018-04-09 12:00:00 2 + 2018-04-10 00:00:00 3 + 2018-04-10 12:00:00 4 + + >>> ts.at_time('12:00') + A + 2018-04-09 12:00:00 2 + 2018-04-10 12:00:00 4 + """ + if axis is None: + axis = self._stat_axis_number + axis = self._get_axis_number(axis) + + index = self._get_axis(axis) + try: + indexer = index.indexer_at_time(time, asof=asof) + except AttributeError: + raise TypeError("Index must be DatetimeIndex") + + return self._take_with_is_copy(indexer, axis=axis) + + def between_time( + self: FrameOrSeries, + start_time, + end_time, + include_start: bool_t = True, + include_end: bool_t = True, + axis=None, + ) -> FrameOrSeries: + """ + Select values between particular times of the day (e.g., 9:00-9:30 AM). + + By setting ``start_time`` to be later than ``end_time``, + you can get the times that are *not* between the two times. + + Parameters + ---------- + start_time : datetime.time or str + end_time : datetime.time or str + include_start : bool, default True + include_end : bool, default True + axis : {0 or 'index', 1 or 'columns'}, default 0 + + .. versionadded:: 0.24.0 + + Returns + ------- + Series or DataFrame + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + at_time : Select values at a particular time of the day. + first : Select initial periods of time series based on a date offset. + last : Select final periods of time series based on a date offset. + DatetimeIndex.indexer_between_time : Get just the index locations for + values between particular times of the day. + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min') + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> ts + A + 2018-04-09 00:00:00 1 + 2018-04-10 00:20:00 2 + 2018-04-11 00:40:00 3 + 2018-04-12 01:00:00 4 + + >>> ts.between_time('0:15', '0:45') + A + 2018-04-10 00:20:00 2 + 2018-04-11 00:40:00 3 + + You get the times that are *not* between two times by setting + ``start_time`` later than ``end_time``: + + >>> ts.between_time('0:45', '0:15') + A + 2018-04-09 00:00:00 1 + 2018-04-12 01:00:00 4 + """ + if axis is None: + axis = self._stat_axis_number + axis = self._get_axis_number(axis) + + index = self._get_axis(axis) + try: + indexer = index.indexer_between_time( + start_time, + end_time, + include_start=include_start, + include_end=include_end, + ) + except AttributeError: + raise TypeError("Index must be DatetimeIndex") + + return self._take_with_is_copy(indexer, axis=axis) + + def resample( + self, + rule, + axis=0, + closed: Optional[str] = None, + label: Optional[str] = None, + convention: str = "start", + kind: Optional[str] = None, + loffset=None, + base: int = 0, + on=None, + level=None, + ): + """ + Resample time-series data. + + Convenience method for frequency conversion and resampling of time + series. Object must have a datetime-like index (`DatetimeIndex`, + `PeriodIndex`, or `TimedeltaIndex`), or pass datetime-like values + to the `on` or `level` keyword. + + Parameters + ---------- + rule : DateOffset, Timedelta or str + The offset string or object representing target conversion. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Which axis to use for up- or down-sampling. For `Series` this + will default to 0, i.e. along the rows. Must be + `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`. + closed : {'right', 'left'}, default None + Which side of bin interval is closed. The default is 'left' + for all frequency offsets except for 'M', 'A', 'Q', 'BM', + 'BA', 'BQ', and 'W' which all have a default of 'right'. + label : {'right', 'left'}, default None + Which bin edge label to label bucket with. The default is 'left' + for all frequency offsets except for 'M', 'A', 'Q', 'BM', + 'BA', 'BQ', and 'W' which all have a default of 'right'. + convention : {'start', 'end', 's', 'e'}, default 'start' + For `PeriodIndex` only, controls whether to use the start or + end of `rule`. + kind : {'timestamp', 'period'}, optional, default None + Pass 'timestamp' to convert the resulting index to a + `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`. + By default the input representation is retained. + loffset : timedelta, default None + Adjust the resampled time labels. + base : int, default 0 + For frequencies that evenly subdivide 1 day, the "origin" of the + aggregated intervals. For example, for '5min' frequency, base could + range from 0 through 4. Defaults to 0. + on : str, optional + For a DataFrame, column to use instead of index for resampling. + Column must be datetime-like. + + level : str or int, optional + For a MultiIndex, level (name or number) to use for + resampling. `level` must be datetime-like. + + Returns + ------- + Resampler object + + See Also + -------- + groupby : Group by mapping, function, label, or list of labels. + Series.resample : Resample a Series. + DataFrame.resample: Resample a DataFrame. + + Notes + ----- + See the `user guide + `_ + for more. + + To learn more about the offset strings, please see `this link + `__. + + Examples + -------- + + Start by creating a series with 9 one minute timestamps. + + >>> index = pd.date_range('1/1/2000', periods=9, freq='T') + >>> series = pd.Series(range(9), index=index) + >>> series + 2000-01-01 00:00:00 0 + 2000-01-01 00:01:00 1 + 2000-01-01 00:02:00 2 + 2000-01-01 00:03:00 3 + 2000-01-01 00:04:00 4 + 2000-01-01 00:05:00 5 + 2000-01-01 00:06:00 6 + 2000-01-01 00:07:00 7 + 2000-01-01 00:08:00 8 + Freq: T, dtype: int64 + + Downsample the series into 3 minute bins and sum the values + of the timestamps falling into a bin. + + >>> series.resample('3T').sum() + 2000-01-01 00:00:00 3 + 2000-01-01 00:03:00 12 + 2000-01-01 00:06:00 21 + Freq: 3T, dtype: int64 + + Downsample the series into 3 minute bins as above, but label each + bin using the right edge instead of the left. Please note that the + value in the bucket used as the label is not included in the bucket, + which it labels. For example, in the original series the + bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed + value in the resampled bucket with the label ``2000-01-01 00:03:00`` + does not include 3 (if it did, the summed value would be 6, not 3). + To include this value close the right side of the bin interval as + illustrated in the example below this one. + + >>> series.resample('3T', label='right').sum() + 2000-01-01 00:03:00 3 + 2000-01-01 00:06:00 12 + 2000-01-01 00:09:00 21 + Freq: 3T, dtype: int64 + + Downsample the series into 3 minute bins as above, but close the right + side of the bin interval. + + >>> series.resample('3T', label='right', closed='right').sum() + 2000-01-01 00:00:00 0 + 2000-01-01 00:03:00 6 + 2000-01-01 00:06:00 15 + 2000-01-01 00:09:00 15 + Freq: 3T, dtype: int64 + + Upsample the series into 30 second bins. + + >>> series.resample('30S').asfreq()[0:5] # Select first 5 rows + 2000-01-01 00:00:00 0.0 + 2000-01-01 00:00:30 NaN + 2000-01-01 00:01:00 1.0 + 2000-01-01 00:01:30 NaN + 2000-01-01 00:02:00 2.0 + Freq: 30S, dtype: float64 + + Upsample the series into 30 second bins and fill the ``NaN`` + values using the ``pad`` method. + + >>> series.resample('30S').pad()[0:5] + 2000-01-01 00:00:00 0 + 2000-01-01 00:00:30 0 + 2000-01-01 00:01:00 1 + 2000-01-01 00:01:30 1 + 2000-01-01 00:02:00 2 + Freq: 30S, dtype: int64 + + Upsample the series into 30 second bins and fill the + ``NaN`` values using the ``bfill`` method. + + >>> series.resample('30S').bfill()[0:5] + 2000-01-01 00:00:00 0 + 2000-01-01 00:00:30 1 + 2000-01-01 00:01:00 1 + 2000-01-01 00:01:30 2 + 2000-01-01 00:02:00 2 + Freq: 30S, dtype: int64 + + Pass a custom function via ``apply`` + + >>> def custom_resampler(array_like): + ... return np.sum(array_like) + 5 + ... + >>> series.resample('3T').apply(custom_resampler) + 2000-01-01 00:00:00 8 + 2000-01-01 00:03:00 17 + 2000-01-01 00:06:00 26 + Freq: 3T, dtype: int64 + + For a Series with a PeriodIndex, the keyword `convention` can be + used to control whether to use the start or end of `rule`. + + Resample a year by quarter using 'start' `convention`. Values are + assigned to the first quarter of the period. + + >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01', + ... freq='A', + ... periods=2)) + >>> s + 2012 1 + 2013 2 + Freq: A-DEC, dtype: int64 + >>> s.resample('Q', convention='start').asfreq() + 2012Q1 1.0 + 2012Q2 NaN + 2012Q3 NaN + 2012Q4 NaN + 2013Q1 2.0 + 2013Q2 NaN + 2013Q3 NaN + 2013Q4 NaN + Freq: Q-DEC, dtype: float64 + + Resample quarters by month using 'end' `convention`. Values are + assigned to the last month of the period. + + >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01', + ... freq='Q', + ... periods=4)) + >>> q + 2018Q1 1 + 2018Q2 2 + 2018Q3 3 + 2018Q4 4 + Freq: Q-DEC, dtype: int64 + >>> q.resample('M', convention='end').asfreq() + 2018-03 1.0 + 2018-04 NaN + 2018-05 NaN + 2018-06 2.0 + 2018-07 NaN + 2018-08 NaN + 2018-09 3.0 + 2018-10 NaN + 2018-11 NaN + 2018-12 4.0 + Freq: M, dtype: float64 + + For DataFrame objects, the keyword `on` can be used to specify the + column instead of the index for resampling. + + >>> d = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19], + ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}) + >>> df = pd.DataFrame(d) + >>> df['week_starting'] = pd.date_range('01/01/2018', + ... periods=8, + ... freq='W') + >>> df + price volume week_starting + 0 10 50 2018-01-07 + 1 11 60 2018-01-14 + 2 9 40 2018-01-21 + 3 13 100 2018-01-28 + 4 14 50 2018-02-04 + 5 18 100 2018-02-11 + 6 17 40 2018-02-18 + 7 19 50 2018-02-25 + >>> df.resample('M', on='week_starting').mean() + price volume + week_starting + 2018-01-31 10.75 62.5 + 2018-02-28 17.00 60.0 + + For a DataFrame with MultiIndex, the keyword `level` can be used to + specify on which level the resampling needs to take place. + + >>> days = pd.date_range('1/1/2000', periods=4, freq='D') + >>> d2 = dict({'price': [10, 11, 9, 13, 14, 18, 17, 19], + ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}) + >>> df2 = pd.DataFrame(d2, + ... index=pd.MultiIndex.from_product([days, + ... ['morning', + ... 'afternoon']] + ... )) + >>> df2 + price volume + 2000-01-01 morning 10 50 + afternoon 11 60 + 2000-01-02 morning 9 40 + afternoon 13 100 + 2000-01-03 morning 14 50 + afternoon 18 100 + 2000-01-04 morning 17 40 + afternoon 19 50 + >>> df2.resample('D', level=0).sum() + price volume + 2000-01-01 21 110 + 2000-01-02 22 140 + 2000-01-03 32 150 + 2000-01-04 36 90 + """ + + from pandas.core.resample import resample + + axis = self._get_axis_number(axis) + return resample( + self, + freq=rule, + label=label, + closed=closed, + axis=axis, + kind=kind, + loffset=loffset, + convention=convention, + base=base, + key=on, + level=level, + ) + + def first(self: FrameOrSeries, offset) -> FrameOrSeries: + """ + Method to subset initial periods of time series data based on a date offset. + + Parameters + ---------- + offset : str, DateOffset, dateutil.relativedelta + + Returns + ------- + subset : same type as caller + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + last : Select final periods of time series based on a date offset. + at_time : Select values at a particular time of the day. + between_time : Select values between particular times of the day. + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') + >>> ts = pd.DataFrame({'A': [1,2,3,4]}, index=i) + >>> ts + A + 2018-04-09 1 + 2018-04-11 2 + 2018-04-13 3 + 2018-04-15 4 + + Get the rows for the first 3 days: + + >>> ts.first('3D') + A + 2018-04-09 1 + 2018-04-11 2 + + Notice the data for 3 first calender days were returned, not the first + 3 days observed in the dataset, and therefore data for 2018-04-13 was + not returned. + """ + if not isinstance(self.index, DatetimeIndex): + raise TypeError("'first' only supports a DatetimeIndex index") + + if len(self.index) == 0: + return self + + offset = to_offset(offset) + end_date = end = self.index[0] + offset + + # Tick-like, e.g. 3 weeks + if not offset.is_anchored() and hasattr(offset, "_inc"): + if end_date in self.index: + end = self.index.searchsorted(end_date, side="left") + return self.iloc[:end] + + return self.loc[:end] + + def last(self: FrameOrSeries, offset) -> FrameOrSeries: + """ + Method to subset final periods of time series data based on a date offset. + + Parameters + ---------- + offset : str, DateOffset, dateutil.relativedelta + + Returns + ------- + subset : same type as caller + + Raises + ------ + TypeError + If the index is not a :class:`DatetimeIndex` + + See Also + -------- + first : Select initial periods of time series based on a date offset. + at_time : Select values at a particular time of the day. + between_time : Select values between particular times of the day. + + Examples + -------- + >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') + >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> ts + A + 2018-04-09 1 + 2018-04-11 2 + 2018-04-13 3 + 2018-04-15 4 + + Get the rows for the last 3 days: + + >>> ts.last('3D') + A + 2018-04-13 3 + 2018-04-15 4 + + Notice the data for 3 last calender days were returned, not the last + 3 observed days in the dataset, and therefore data for 2018-04-11 was + not returned. + """ + if not isinstance(self.index, DatetimeIndex): + raise TypeError("'last' only supports a DatetimeIndex index") + + if len(self.index) == 0: + return self + + offset = to_offset(offset) + + start_date = self.index[-1] - offset + start = self.index.searchsorted(start_date, side="right") + return self.iloc[start:] + + def rank( + self: FrameOrSeries, + axis=0, + method: str = "average", + numeric_only: Optional[bool_t] = None, + na_option: str = "keep", + ascending: bool_t = True, + pct: bool_t = False, + ) -> FrameOrSeries: + """ + Compute numerical data ranks (1 through n) along axis. + + By default, equal values are assigned a rank that is the average of the + ranks of those values. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + Index to direct ranking. + method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + How to rank the group of records that have the same value (i.e. ties): + + * average: average rank of the group + * min: lowest rank in the group + * max: highest rank in the group + * first: ranks assigned in order they appear in the array + * dense: like 'min', but rank always increases by 1 between groups. + + numeric_only : bool, optional + For DataFrame objects, rank only numeric columns if set to True. + na_option : {'keep', 'top', 'bottom'}, default 'keep' + How to rank NaN values: + + * keep: assign NaN rank to NaN values + * top: assign smallest rank to NaN values if ascending + * bottom: assign highest rank to NaN values if ascending. + + ascending : bool, default True + Whether or not the elements should be ranked in ascending order. + pct : bool, default False + Whether or not to display the returned rankings in percentile + form. + + Returns + ------- + same type as caller + Return a Series or DataFrame with data ranks as values. + + See Also + -------- + core.groupby.GroupBy.rank : Rank of values within each group. + + Examples + -------- + + >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog', + ... 'spider', 'snake'], + ... 'Number_legs': [4, 2, 4, 8, np.nan]}) + >>> df + Animal Number_legs + 0 cat 4.0 + 1 penguin 2.0 + 2 dog 4.0 + 3 spider 8.0 + 4 snake NaN + + The following example shows how the method behaves with the above + parameters: + + * default_rank: this is the default behaviour obtained without using + any parameter. + * max_rank: setting ``method = 'max'`` the records that have the + same values are ranked using the highest rank (e.g.: since 'cat' + and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.) + * NA_bottom: choosing ``na_option = 'bottom'``, if there are records + with NaN values they are placed at the bottom of the ranking. + * pct_rank: when setting ``pct = True``, the ranking is expressed as + percentile rank. + + >>> df['default_rank'] = df['Number_legs'].rank() + >>> df['max_rank'] = df['Number_legs'].rank(method='max') + >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom') + >>> df['pct_rank'] = df['Number_legs'].rank(pct=True) + >>> df + Animal Number_legs default_rank max_rank NA_bottom pct_rank + 0 cat 4.0 2.5 3.0 2.5 0.625 + 1 penguin 2.0 1.0 1.0 1.0 0.250 + 2 dog 4.0 2.5 3.0 2.5 0.625 + 3 spider 8.0 4.0 4.0 4.0 1.000 + 4 snake NaN NaN NaN 5.0 NaN + """ + axis = self._get_axis_number(axis) + + if na_option not in {"keep", "top", "bottom"}: + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + raise ValueError(msg) + + def ranker(data): + ranks = algos.rank( + data.values, + axis=axis, + method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + ) + ranks = self._constructor(ranks, **data._construct_axes_dict()) + return ranks.__finalize__(self) + + # if numeric_only is None, and we can't get anything, we try with + # numeric_only=True + if numeric_only is None: + try: + return ranker(self) + except TypeError: + numeric_only = True + + if numeric_only: + data = self._get_numeric_data() + else: + data = self + + return ranker(data) + + _shared_docs[ + "align" + ] = """ + Align two objects on their axes with the specified join method. + + Join method is specified for each axis Index. + + Parameters + ---------- + other : DataFrame or Series + join : {'outer', 'inner', 'left', 'right'}, default 'outer' + axis : allowed axis of the other object, default None + Align on index (0), columns (1), or both (None). + level : int or level name, default None + Broadcast across a level, matching Index values on the + passed MultiIndex level. + copy : bool, default True + Always returns new objects. If copy=False and no reindexing is + required then original objects are returned. + fill_value : scalar, default np.NaN + Value to use for missing values. Defaults to NaN, but can be any + "compatible" value. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series: + + - pad / ffill: propagate last valid observation forward to next valid. + - backfill / bfill: use NEXT valid observation to fill gap. + + limit : int, default None + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. Must be greater than 0 if not None. + fill_axis : %(axes_single_arg)s, default 0 + Filling axis, method and limit. + broadcast_axis : %(axes_single_arg)s, default None + Broadcast values along this axis, if aligning two objects of + different dimensions. + + Returns + ------- + (left, right) : (%(klass)s, type of other) + Aligned objects. + """ + + @Appender(_shared_docs["align"] % _shared_doc_kwargs) + def align( + self, + other, + join="outer", + axis=None, + level=None, + copy=True, + fill_value=None, + method=None, + limit=None, + fill_axis=0, + broadcast_axis=None, + ): + method = missing.clean_fill_method(method) + + if broadcast_axis == 1 and self.ndim != other.ndim: + if isinstance(self, ABCSeries): + # this means other is a DataFrame, and we need to broadcast + # self + cons = self._constructor_expanddim + df = cons( + {c: self for c in other.columns}, **other._construct_axes_dict() + ) + return df._align_frame( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + ) + elif isinstance(other, ABCSeries): + # this means self is a DataFrame, and we need to broadcast + # other + cons = other._constructor_expanddim + df = cons( + {c: other for c in self.columns}, **self._construct_axes_dict() + ) + return self._align_frame( + df, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + ) + + if axis is not None: + axis = self._get_axis_number(axis) + if isinstance(other, ABCDataFrame): + return self._align_frame( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + ) + elif isinstance(other, ABCSeries): + return self._align_series( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + ) + else: # pragma: no cover + raise TypeError(f"unsupported type: {type(other)}") + + def _align_frame( + self, + other, + join="outer", + axis=None, + level=None, + copy: bool_t = True, + fill_value=None, + method=None, + limit=None, + fill_axis=0, + ): + # defaults + join_index, join_columns = None, None + ilidx, iridx = None, None + clidx, cridx = None, None + + is_series = isinstance(self, ABCSeries) + + if axis is None or axis == 0: + if not self.index.equals(other.index): + join_index, ilidx, iridx = self.index.join( + other.index, how=join, level=level, return_indexers=True + ) + + if axis is None or axis == 1: + if not is_series and not self.columns.equals(other.columns): + join_columns, clidx, cridx = self.columns.join( + other.columns, how=join, level=level, return_indexers=True + ) + + if is_series: + reindexers = {0: [join_index, ilidx]} + else: + reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]} + + left = self._reindex_with_indexers( + reindexers, copy=copy, fill_value=fill_value, allow_dups=True + ) + # other must be always DataFrame + right = other._reindex_with_indexers( + {0: [join_index, iridx], 1: [join_columns, cridx]}, + copy=copy, + fill_value=fill_value, + allow_dups=True, + ) + + if method is not None: + _left = left.fillna(method=method, axis=fill_axis, limit=limit) + assert _left is not None # needed for mypy + left = _left + right = right.fillna(method=method, axis=fill_axis, limit=limit) + + # if DatetimeIndex have different tz, convert to UTC + if is_datetime64tz_dtype(left.index): + if left.index.tz != right.index.tz: + if join_index is not None: + left.index = join_index + right.index = join_index + + return left.__finalize__(self), right.__finalize__(other) + + def _align_series( + self, + other, + join="outer", + axis=None, + level=None, + copy: bool_t = True, + fill_value=None, + method=None, + limit=None, + fill_axis=0, + ): + + is_series = isinstance(self, ABCSeries) + + # series/series compat, other must always be a Series + if is_series: + if axis: + raise ValueError("cannot align series to a series other than axis 0") + + # equal + if self.index.equals(other.index): + join_index, lidx, ridx = None, None, None + else: + join_index, lidx, ridx = self.index.join( + other.index, how=join, level=level, return_indexers=True + ) + + left = self._reindex_indexer(join_index, lidx, copy) + right = other._reindex_indexer(join_index, ridx, copy) + + else: + # one has > 1 ndim + fdata = self._data + if axis == 0: + join_index = self.index + lidx, ridx = None, None + if not self.index.equals(other.index): + join_index, lidx, ridx = self.index.join( + other.index, how=join, level=level, return_indexers=True + ) + + if lidx is not None: + fdata = fdata.reindex_indexer(join_index, lidx, axis=1) + + elif axis == 1: + join_index = self.columns + lidx, ridx = None, None + if not self.columns.equals(other.index): + join_index, lidx, ridx = self.columns.join( + other.index, how=join, level=level, return_indexers=True + ) + + if lidx is not None: + fdata = fdata.reindex_indexer(join_index, lidx, axis=0) + else: + raise ValueError("Must specify axis=0 or 1") + + if copy and fdata is self._data: + fdata = fdata.copy() + + left = self._constructor(fdata) + + if ridx is None: + right = other + else: + right = other.reindex(join_index, level=level) + + # fill + fill_na = notna(fill_value) or (method is not None) + if fill_na: + left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis) + right = right.fillna(fill_value, method=method, limit=limit) + + # if DatetimeIndex have different tz, convert to UTC + if is_series or (not is_series and axis == 0): + if is_datetime64tz_dtype(left.index): + if left.index.tz != right.index.tz: + if join_index is not None: + left.index = join_index + right.index = join_index + + return left.__finalize__(self), right.__finalize__(other) + + def _where( + self, + cond, + other=np.nan, + inplace=False, + axis=None, + level=None, + errors="raise", + try_cast=False, + ): + """ + Equivalent to public method `where`, except that `other` is not + applied as a function even if callable. Used in __setitem__. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + # align the cond to same shape as myself + cond = com.apply_if_callable(cond, self) + if isinstance(cond, NDFrame): + cond, _ = cond.align(self, join="right", broadcast_axis=1) + else: + if not hasattr(cond, "shape"): + cond = np.asanyarray(cond) + if cond.shape != self.shape: + raise ValueError("Array conditional must be same shape as self") + cond = self._constructor(cond, **self._construct_axes_dict()) + + # make sure we are boolean + fill_value = bool(inplace) + cond = cond.fillna(fill_value) + + msg = "Boolean array expected for the condition, not {dtype}" + + if not isinstance(cond, ABCDataFrame): + # This is a single-dimensional object. + if not is_bool_dtype(cond): + raise ValueError(msg.format(dtype=cond.dtype)) + elif not cond.empty: + for dt in cond.dtypes: + if not is_bool_dtype(dt): + raise ValueError(msg.format(dtype=dt)) + + cond = -cond if inplace else cond + + # try to align with other + try_quick = True + if hasattr(other, "align"): + + # align with me + if other.ndim <= self.ndim: + + _, other = self.align( + other, join="left", axis=axis, level=level, fill_value=np.nan + ) + + # if we are NOT aligned, raise as we cannot where index + if axis is None and not all( + other._get_axis(i).equals(ax) for i, ax in enumerate(self.axes) + ): + raise InvalidIndexError + + # slice me out of the other + else: + raise NotImplementedError( + "cannot align with a higher dimensional NDFrame" + ) + + if isinstance(other, np.ndarray): + + if other.shape != self.shape: + + if self.ndim == 1: + + icond = cond.values + + # GH 2745 / GH 4192 + # treat like a scalar + if len(other) == 1: + other = np.array(other[0]) + + # GH 3235 + # match True cond to other + elif len(cond[icond]) == len(other): + + # try to not change dtype at first (if try_quick) + if try_quick: + new_other = com.values_from_object(self) + new_other = new_other.copy() + new_other[icond] = other + other = new_other + + else: + raise ValueError( + "Length of replacements must equal series length" + ) + + else: + raise ValueError( + "other must be the same shape as self when an ndarray" + ) + + # we are the same shape, so create an actual object for alignment + else: + other = self._constructor(other, **self._construct_axes_dict()) + + if axis is None: + axis = 0 + + if self.ndim == getattr(other, "ndim", 0): + align = True + else: + align = self._get_axis_number(axis) == 1 + + block_axis = self._get_block_manager_axis(axis) + + if inplace: + # we may have different type blocks come out of putmask, so + # reconstruct the block manager + + self._check_inplace_setting(other) + new_data = self._data.putmask( + mask=cond, + new=other, + align=align, + inplace=True, + axis=block_axis, + transpose=self._AXIS_REVERSED, + ) + self._update_inplace(new_data) + + else: + new_data = self._data.where( + other=other, + cond=cond, + align=align, + errors=errors, + try_cast=try_cast, + axis=block_axis, + ) + + return self._constructor(new_data).__finalize__(self) + + _shared_docs[ + "where" + ] = """ + Replace values where the condition is %(cond_rev)s. + + Parameters + ---------- + cond : bool %(klass)s, array-like, or callable + Where `cond` is %(cond)s, keep the original value. Where + %(cond_rev)s, replace with corresponding value from `other`. + If `cond` is callable, it is computed on the %(klass)s and + should return boolean %(klass)s or array. The callable must + not change input %(klass)s (though pandas doesn't check it). + other : scalar, %(klass)s, or callable + Entries where `cond` is %(cond_rev)s are replaced with + corresponding value from `other`. + If other is callable, it is computed on the %(klass)s and + should return scalar or %(klass)s. The callable must not + change input %(klass)s (though pandas doesn't check it). + inplace : bool, default False + Whether to perform the operation in place on the data. + axis : int, default None + Alignment axis if needed. + level : int, default None + Alignment level if needed. + errors : str, {'raise', 'ignore'}, default 'raise' + Note that currently this parameter won't affect + the results and will always coerce to a suitable dtype. + + - 'raise' : allow exceptions to be raised. + - 'ignore' : suppress exceptions. On error return original object. + + try_cast : bool, default False + Try to cast the result back to the input type (if possible). + + Returns + ------- + Same type as caller + + See Also + -------- + :func:`DataFrame.%(name_other)s` : Return an object of same shape as + self. + + Notes + ----- + The %(name)s method is an application of the if-then idiom. For each + element in the calling DataFrame, if ``cond`` is ``%(cond)s`` the + element is used; otherwise the corresponding element from the DataFrame + ``other`` is used. + + The signature for :func:`DataFrame.where` differs from + :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to + ``np.where(m, df1, df2)``. + + For further details and examples see the ``%(name)s`` documentation in + :ref:`indexing `. + + Examples + -------- + >>> s = pd.Series(range(5)) + >>> s.where(s > 0) + 0 NaN + 1 1.0 + 2 2.0 + 3 3.0 + 4 4.0 + dtype: float64 + + >>> s.mask(s > 0) + 0 0.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + >>> s.where(s > 1, 10) + 0 10 + 1 10 + 2 2 + 3 3 + 4 4 + dtype: int64 + + >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B']) + >>> df + A B + 0 0 1 + 1 2 3 + 2 4 5 + 3 6 7 + 4 8 9 + >>> m = df %% 3 == 0 + >>> df.where(m, -df) + A B + 0 0 -1 + 1 -2 3 + 2 -4 -5 + 3 6 -7 + 4 -8 9 + >>> df.where(m, -df) == np.where(m, df, -df) + A B + 0 True True + 1 True True + 2 True True + 3 True True + 4 True True + >>> df.where(m, -df) == df.mask(~m, -df) + A B + 0 True True + 1 True True + 2 True True + 3 True True + 4 True True + """ + + @Appender( + _shared_docs["where"] + % dict( + _shared_doc_kwargs, + cond="True", + cond_rev="False", + name="where", + name_other="mask", + ) + ) + def where( + self, + cond, + other=np.nan, + inplace=False, + axis=None, + level=None, + errors="raise", + try_cast=False, + ): + + other = com.apply_if_callable(other, self) + return self._where( + cond, other, inplace, axis, level, errors=errors, try_cast=try_cast + ) + + @Appender( + _shared_docs["where"] + % dict( + _shared_doc_kwargs, + cond="False", + cond_rev="True", + name="mask", + name_other="where", + ) + ) + def mask( + self, + cond, + other=np.nan, + inplace=False, + axis=None, + level=None, + errors="raise", + try_cast=False, + ): + + inplace = validate_bool_kwarg(inplace, "inplace") + cond = com.apply_if_callable(cond, self) + + # see gh-21891 + if not hasattr(cond, "__invert__"): + cond = np.array(cond) + + return self.where( + ~cond, + other=other, + inplace=inplace, + axis=axis, + level=level, + try_cast=try_cast, + errors=errors, + ) + + _shared_docs[ + "shift" + ] = """ + Shift index by desired number of periods with an optional time `freq`. + + When `freq` is not passed, shift the index without realigning the data. + If `freq` is passed (in this case, the index must be date or datetime, + or it will raise a `NotImplementedError`), the index will be + increased using the periods and the `freq`. + + Parameters + ---------- + periods : int + Number of periods to shift. Can be positive or negative. + freq : DateOffset, tseries.offsets, timedelta, or str, optional + Offset to use from the tseries module or time rule (e.g. 'EOM'). + If `freq` is specified then the index values are shifted but the + data is not realigned. That is, use `freq` if you would like to + extend the index when shifting and preserve the original data. + axis : {0 or 'index', 1 or 'columns', None}, default None + Shift direction. + fill_value : object, optional + The scalar value to use for newly introduced missing values. + the default depends on the dtype of `self`. + For numeric data, ``np.nan`` is used. + For datetime, timedelta, or period data, etc. :attr:`NaT` is used. + For extension dtypes, ``self.dtype.na_value`` is used. + + .. versionchanged:: 0.24.0 + + Returns + ------- + %(klass)s + Copy of input object, shifted. + + See Also + -------- + Index.shift : Shift values of Index. + DatetimeIndex.shift : Shift values of DatetimeIndex. + PeriodIndex.shift : Shift values of PeriodIndex. + tshift : Shift the time index, using the index's frequency if + available. + + Examples + -------- + >>> df = pd.DataFrame({'Col1': [10, 20, 15, 30, 45], + ... 'Col2': [13, 23, 18, 33, 48], + ... 'Col3': [17, 27, 22, 37, 52]}) + + >>> df.shift(periods=3) + Col1 Col2 Col3 + 0 NaN NaN NaN + 1 NaN NaN NaN + 2 NaN NaN NaN + 3 10.0 13.0 17.0 + 4 20.0 23.0 27.0 + + >>> df.shift(periods=1, axis='columns') + Col1 Col2 Col3 + 0 NaN 10.0 13.0 + 1 NaN 20.0 23.0 + 2 NaN 15.0 18.0 + 3 NaN 30.0 33.0 + 4 NaN 45.0 48.0 + + >>> df.shift(periods=3, fill_value=0) + Col1 Col2 Col3 + 0 0 0 0 + 1 0 0 0 + 2 0 0 0 + 3 10 13 17 + 4 20 23 27 + """ + + @Appender(_shared_docs["shift"] % _shared_doc_kwargs) + def shift( + self: FrameOrSeries, periods=1, freq=None, axis=0, fill_value=None + ) -> FrameOrSeries: + if periods == 0: + return self.copy() + + block_axis = self._get_block_manager_axis(axis) + if freq is None: + new_data = self._data.shift( + periods=periods, axis=block_axis, fill_value=fill_value + ) + else: + return self.tshift(periods, freq) + + return self._constructor(new_data).__finalize__(self) + + def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries: + """ + Equivalent to `shift` without copying data. + + The shifted data will not include the dropped periods and the + shifted axis will be smaller than the original. + + Parameters + ---------- + periods : int + Number of periods to move, can be positive or negative. + + Returns + ------- + shifted : same type as caller + + Notes + ----- + While the `slice_shift` is faster than `shift`, you may pay for it + later during alignment. + """ + if periods == 0: + return self + + if periods > 0: + vslicer = slice(None, -periods) + islicer = slice(periods, None) + else: + vslicer = slice(-periods, None) + islicer = slice(None, periods) + + new_obj = self._slice(vslicer, axis=axis) + shifted_axis = self._get_axis(axis)[islicer] + new_obj.set_axis(shifted_axis, axis=axis, inplace=True) + + return new_obj.__finalize__(self) + + def tshift( + self: FrameOrSeries, periods: int = 1, freq=None, axis=0 + ) -> FrameOrSeries: + """ + Shift the time index, using the index's frequency if available. + + Parameters + ---------- + periods : int + Number of periods to move, can be positive or negative. + freq : DateOffset, timedelta, or str, default None + Increment to use from the tseries module + or time rule expressed as a string (e.g. 'EOM'). + axis : {0 or ‘index’, 1 or ‘columns’, None}, default 0 + Corresponds to the axis that contains the Index. + + Returns + ------- + shifted : Series/DataFrame + + Notes + ----- + If freq is not specified then tries to use the freq or inferred_freq + attributes of the index. If neither of those attributes exist, a + ValueError is thrown + """ + + index = self._get_axis(axis) + if freq is None: + freq = getattr(index, "freq", None) + + if freq is None: + freq = getattr(index, "inferred_freq", None) + + if freq is None: + msg = "Freq was not given and was not set in the index" + raise ValueError(msg) + + if periods == 0: + return self + + if isinstance(freq, str): + freq = to_offset(freq) + + block_axis = self._get_block_manager_axis(axis) + if isinstance(index, PeriodIndex): + orig_freq = to_offset(index.freq) + if freq == orig_freq: + new_data = self._data.copy() + new_data.axes[block_axis] = index.shift(periods) + elif orig_freq is not None: + msg = ( + f"Given freq {freq.rule_code} does not match" + f" PeriodIndex freq {orig_freq.rule_code}" + ) + raise ValueError(msg) + else: + new_data = self._data.copy() + new_data.axes[block_axis] = index.shift(periods, freq) + + return self._constructor(new_data).__finalize__(self) + + def truncate( + self: FrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True + ) -> FrameOrSeries: + """ + Truncate a Series or DataFrame before and after some index value. + + This is a useful shorthand for boolean indexing based on index + values above or below certain thresholds. + + Parameters + ---------- + before : date, str, int + Truncate all rows before this index value. + after : date, str, int + Truncate all rows after this index value. + axis : {0 or 'index', 1 or 'columns'}, optional + Axis to truncate. Truncates the index (rows) by default. + copy : bool, default is True, + Return a copy of the truncated section. + + Returns + ------- + type of caller + The truncated Series or DataFrame. + + See Also + -------- + DataFrame.loc : Select a subset of a DataFrame by label. + DataFrame.iloc : Select a subset of a DataFrame by position. + + Notes + ----- + If the index being truncated contains only datetime values, + `before` and `after` may be specified as strings instead of + Timestamps. + + Examples + -------- + >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'], + ... 'B': ['f', 'g', 'h', 'i', 'j'], + ... 'C': ['k', 'l', 'm', 'n', 'o']}, + ... index=[1, 2, 3, 4, 5]) + >>> df + A B C + 1 a f k + 2 b g l + 3 c h m + 4 d i n + 5 e j o + + >>> df.truncate(before=2, after=4) + A B C + 2 b g l + 3 c h m + 4 d i n + + The columns of a DataFrame can be truncated. + + >>> df.truncate(before="A", after="B", axis="columns") + A B + 1 a f + 2 b g + 3 c h + 4 d i + 5 e j + + For Series, only rows can be truncated. + + >>> df['A'].truncate(before=2, after=4) + 2 b + 3 c + 4 d + Name: A, dtype: object + + The index values in ``truncate`` can be datetimes or string + dates. + + >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s') + >>> df = pd.DataFrame(index=dates, data={'A': 1}) + >>> df.tail() + A + 2016-01-31 23:59:56 1 + 2016-01-31 23:59:57 1 + 2016-01-31 23:59:58 1 + 2016-01-31 23:59:59 1 + 2016-02-01 00:00:00 1 + + >>> df.truncate(before=pd.Timestamp('2016-01-05'), + ... after=pd.Timestamp('2016-01-10')).tail() + A + 2016-01-09 23:59:56 1 + 2016-01-09 23:59:57 1 + 2016-01-09 23:59:58 1 + 2016-01-09 23:59:59 1 + 2016-01-10 00:00:00 1 + + Because the index is a DatetimeIndex containing only dates, we can + specify `before` and `after` as strings. They will be coerced to + Timestamps before truncation. + + >>> df.truncate('2016-01-05', '2016-01-10').tail() + A + 2016-01-09 23:59:56 1 + 2016-01-09 23:59:57 1 + 2016-01-09 23:59:58 1 + 2016-01-09 23:59:59 1 + 2016-01-10 00:00:00 1 + + Note that ``truncate`` assumes a 0 value for any unspecified time + component (midnight). This differs from partial string slicing, which + returns any partially matching dates. + + >>> df.loc['2016-01-05':'2016-01-10', :].tail() + A + 2016-01-10 23:59:55 1 + 2016-01-10 23:59:56 1 + 2016-01-10 23:59:57 1 + 2016-01-10 23:59:58 1 + 2016-01-10 23:59:59 1 + """ + if axis is None: + axis = self._stat_axis_number + axis = self._get_axis_number(axis) + ax = self._get_axis(axis) + + # GH 17935 + # Check that index is sorted + if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing: + raise ValueError("truncate requires a sorted index") + + # if we have a date index, convert to dates, otherwise + # treat like a slice + if ax.is_all_dates: + from pandas.core.tools.datetimes import to_datetime + + before = to_datetime(before) + after = to_datetime(after) + + if before is not None and after is not None: + if before > after: + raise ValueError(f"Truncate: {after} must be after {before}") + + slicer = [slice(None, None)] * self._AXIS_LEN + slicer[axis] = slice(before, after) + result = self.loc[tuple(slicer)] + + if isinstance(ax, MultiIndex): + setattr(result, self._get_axis_name(axis), ax.truncate(before, after)) + + if copy: + result = result.copy() + + return result + + def tz_convert( + self: FrameOrSeries, tz, axis=0, level=None, copy: bool_t = True + ) -> FrameOrSeries: + """ + Convert tz-aware axis to target time zone. + + Parameters + ---------- + tz : str or tzinfo object + axis : the axis to convert + level : int, str, default None + If axis is a MultiIndex, convert a specific level. Otherwise + must be None. + copy : bool, default True + Also make a copy of the underlying data. + + Returns + ------- + %(klass)s + Object with time zone converted axis. + + Raises + ------ + TypeError + If the axis is tz-naive. + """ + axis = self._get_axis_number(axis) + ax = self._get_axis(axis) + + def _tz_convert(ax, tz): + if not hasattr(ax, "tz_convert"): + if len(ax) > 0: + ax_name = self._get_axis_name(axis) + raise TypeError( + f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" + ) + else: + ax = DatetimeIndex([], tz=tz) + else: + ax = ax.tz_convert(tz) + return ax + + # if a level is given it must be a MultiIndex level or + # equivalent to the axis name + if isinstance(ax, MultiIndex): + level = ax._get_level_number(level) + new_level = _tz_convert(ax.levels[level], tz) + ax = ax.set_levels(new_level, level=level) + else: + if level not in (None, 0, ax.name): + raise ValueError(f"The level {level} is not valid") + ax = _tz_convert(ax, tz) + + result = self._constructor(self._data, copy=copy) + result = result.set_axis(ax, axis=axis, inplace=False) + return result.__finalize__(self) + + def tz_localize( + self: FrameOrSeries, + tz, + axis=0, + level=None, + copy: bool_t = True, + ambiguous="raise", + nonexistent: str = "raise", + ) -> FrameOrSeries: + """ + Localize tz-naive index of a Series or DataFrame to target time zone. + + This operation localizes the Index. To localize the values in a + timezone-naive Series, use :meth:`Series.dt.tz_localize`. + + Parameters + ---------- + tz : str or tzinfo + axis : the axis to localize + level : int, str, default None + If axis ia a MultiIndex, localize a specific level. Otherwise + must be None. + copy : bool, default True + Also make a copy of the underlying data. + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from + 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at + 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the + `ambiguous` parameter dictates how ambiguous times should be + handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False designates + a non-DST time (note that this flag is only applicable for + ambiguous times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous + times. + nonexistent : str, default 'raise' + A nonexistent time does not exist in a particular timezone + where clocks moved forward due to DST. Valid values are: + + - 'shift_forward' will shift the nonexistent time forward to the + closest existing time + - 'shift_backward' will shift the nonexistent time backward to the + closest existing time + - 'NaT' will return NaT where there are nonexistent times + - timedelta objects will shift nonexistent times by the timedelta + - 'raise' will raise an NonExistentTimeError if there are + nonexistent times. + + .. versionadded:: 0.24.0 + + Returns + ------- + Series or DataFrame + Same type as the input. + + Raises + ------ + TypeError + If the TimeSeries is tz-aware and tz is not None. + + Examples + -------- + + Localize local times: + + >>> s = pd.Series([1], + ... index=pd.DatetimeIndex(['2018-09-15 01:30:00'])) + >>> s.tz_localize('CET') + 2018-09-15 01:30:00+02:00 1 + dtype: int64 + + Be careful with DST changes. When there is sequential data, pandas + can infer the DST time: + + >>> s = pd.Series(range(7), + ... index=pd.DatetimeIndex(['2018-10-28 01:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 02:00:00', + ... '2018-10-28 02:30:00', + ... '2018-10-28 03:00:00', + ... '2018-10-28 03:30:00'])) + >>> s.tz_localize('CET', ambiguous='infer') + 2018-10-28 01:30:00+02:00 0 + 2018-10-28 02:00:00+02:00 1 + 2018-10-28 02:30:00+02:00 2 + 2018-10-28 02:00:00+01:00 3 + 2018-10-28 02:30:00+01:00 4 + 2018-10-28 03:00:00+01:00 5 + 2018-10-28 03:30:00+01:00 6 + dtype: int64 + + In some cases, inferring the DST is impossible. In such cases, you can + pass an ndarray to the ambiguous parameter to set the DST explicitly + + >>> s = pd.Series(range(3), + ... index=pd.DatetimeIndex(['2018-10-28 01:20:00', + ... '2018-10-28 02:36:00', + ... '2018-10-28 03:46:00'])) + >>> s.tz_localize('CET', ambiguous=np.array([True, True, False])) + 2018-10-28 01:20:00+02:00 0 + 2018-10-28 02:36:00+02:00 1 + 2018-10-28 03:46:00+01:00 2 + dtype: int64 + + If the DST transition causes nonexistent times, you can shift these + dates forward or backwards with a timedelta object or `'shift_forward'` + or `'shift_backwards'`. + >>> s = pd.Series(range(2), + ... index=pd.DatetimeIndex(['2015-03-29 02:30:00', + ... '2015-03-29 03:30:00'])) + >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + 2015-03-29 03:00:00+02:00 0 + 2015-03-29 03:30:00+02:00 1 + dtype: int64 + >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + 2015-03-29 01:59:59.999999999+01:00 0 + 2015-03-29 03:30:00+02:00 1 + dtype: int64 + >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H')) + 2015-03-29 03:30:00+02:00 0 + 2015-03-29 03:30:00+02:00 1 + dtype: int64 + """ + nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") + if nonexistent not in nonexistent_options and not isinstance( + nonexistent, timedelta + ): + raise ValueError( + "The nonexistent argument must be one of 'raise', " + "'NaT', 'shift_forward', 'shift_backward' or " + "a timedelta object" + ) + + axis = self._get_axis_number(axis) + ax = self._get_axis(axis) + + def _tz_localize(ax, tz, ambiguous, nonexistent): + if not hasattr(ax, "tz_localize"): + if len(ax) > 0: + ax_name = self._get_axis_name(axis) + raise TypeError( + f"{ax_name} is not a valid DatetimeIndex or PeriodIndex" + ) + else: + ax = DatetimeIndex([], tz=tz) + else: + ax = ax.tz_localize(tz, ambiguous=ambiguous, nonexistent=nonexistent) + return ax + + # if a level is given it must be a MultiIndex level or + # equivalent to the axis name + if isinstance(ax, MultiIndex): + level = ax._get_level_number(level) + new_level = _tz_localize(ax.levels[level], tz, ambiguous, nonexistent) + ax = ax.set_levels(new_level, level=level) + else: + if level not in (None, 0, ax.name): + raise ValueError(f"The level {level} is not valid") + ax = _tz_localize(ax, tz, ambiguous, nonexistent) + + result = self._constructor(self._data, copy=copy) + result = result.set_axis(ax, axis=axis, inplace=False) + return result.__finalize__(self) + + # ---------------------------------------------------------------------- + # Numeric Methods + def abs(self: FrameOrSeries) -> FrameOrSeries: + """ + Return a Series/DataFrame with absolute numeric value of each element. + + This function only applies to elements that are all numeric. + + Returns + ------- + abs + Series/DataFrame containing the absolute value of each element. + + See Also + -------- + numpy.absolute : Calculate the absolute value element-wise. + + Notes + ----- + For ``complex`` inputs, ``1.2 + 1j``, the absolute value is + :math:`\\sqrt{ a^2 + b^2 }`. + + Examples + -------- + Absolute numeric values in a Series. + + >>> s = pd.Series([-1.10, 2, -3.33, 4]) + >>> s.abs() + 0 1.10 + 1 2.00 + 2 3.33 + 3 4.00 + dtype: float64 + + Absolute numeric values in a Series with complex numbers. + + >>> s = pd.Series([1.2 + 1j]) + >>> s.abs() + 0 1.56205 + dtype: float64 + + Absolute numeric values in a Series with a Timedelta element. + + >>> s = pd.Series([pd.Timedelta('1 days')]) + >>> s.abs() + 0 1 days + dtype: timedelta64[ns] + + Select rows with data closest to certain value using argsort (from + `StackOverflow `__). + + >>> df = pd.DataFrame({ + ... 'a': [4, 5, 6, 7], + ... 'b': [10, 20, 30, 40], + ... 'c': [100, 50, -30, -50] + ... }) + >>> df + a b c + 0 4 10 100 + 1 5 20 50 + 2 6 30 -30 + 3 7 40 -50 + >>> df.loc[(df.c - 43).abs().argsort()] + a b c + 1 5 20 50 + 0 4 10 100 + 2 6 30 -30 + 3 7 40 -50 + """ + return np.abs(self) + + def describe( + self: FrameOrSeries, percentiles=None, include=None, exclude=None + ) -> FrameOrSeries: + """ + Generate descriptive statistics. + + Descriptive statistics include those that summarize the central + tendency, dispersion and shape of a + dataset's distribution, excluding ``NaN`` values. + + Analyzes both numeric and object series, as well + as ``DataFrame`` column sets of mixed data types. The output + will vary depending on what is provided. Refer to the notes + below for more detail. + + Parameters + ---------- + percentiles : list-like of numbers, optional + The percentiles to include in the output. All should + fall between 0 and 1. The default is + ``[.25, .5, .75]``, which returns the 25th, 50th, and + 75th percentiles. + include : 'all', list-like of dtypes or None (default), optional + A white list of data types to include in the result. Ignored + for ``Series``. Here are the options: + + - 'all' : All columns of the input will be included in the output. + - A list-like of dtypes : Limits the results to the + provided data types. + To limit the result to numeric types submit + ``numpy.number``. To limit it instead to object columns submit + the ``numpy.object`` data type. Strings + can also be used in the style of + ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To + select pandas categorical columns, use ``'category'`` + - None (default) : The result will include all numeric columns. + exclude : list-like of dtypes or None (default), optional, + A black list of data types to omit from the result. Ignored + for ``Series``. Here are the options: + + - A list-like of dtypes : Excludes the provided data types + from the result. To exclude numeric types submit + ``numpy.number``. To exclude object columns submit the data + type ``numpy.object``. Strings can also be used in the style of + ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To + exclude pandas categorical columns, use ``'category'`` + - None (default) : The result will exclude nothing. + + Returns + ------- + Series or DataFrame + Summary statistics of the Series or Dataframe provided. + + See Also + -------- + DataFrame.count: Count number of non-NA/null observations. + DataFrame.max: Maximum of the values in the object. + DataFrame.min: Minimum of the values in the object. + DataFrame.mean: Mean of the values. + DataFrame.std: Standard deviation of the observations. + DataFrame.select_dtypes: Subset of a DataFrame including/excluding + columns based on their dtype. + + Notes + ----- + For numeric data, the result's index will include ``count``, + ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and + upper percentiles. By default the lower percentile is ``25`` and the + upper percentile is ``75``. The ``50`` percentile is the + same as the median. + + For object data (e.g. strings or timestamps), the result's index + will include ``count``, ``unique``, ``top``, and ``freq``. The ``top`` + is the most common value. The ``freq`` is the most common value's + frequency. Timestamps also include the ``first`` and ``last`` items. + + If multiple object values have the highest count, then the + ``count`` and ``top`` results will be arbitrarily chosen from + among those with the highest count. + + For mixed data types provided via a ``DataFrame``, the default is to + return only an analysis of numeric columns. If the dataframe consists + only of object and categorical data without any numeric columns, the + default is to return an analysis of both the object and categorical + columns. If ``include='all'`` is provided as an option, the result + will include a union of attributes of each type. + + The `include` and `exclude` parameters can be used to limit + which columns in a ``DataFrame`` are analyzed for the output. + The parameters are ignored when analyzing a ``Series``. + + Examples + -------- + Describing a numeric ``Series``. + + >>> s = pd.Series([1, 2, 3]) + >>> s.describe() + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + dtype: float64 + + Describing a categorical ``Series``. + + >>> s = pd.Series(['a', 'a', 'b', 'c']) + >>> s.describe() + count 4 + unique 3 + top a + freq 2 + dtype: object + + Describing a timestamp ``Series``. + + >>> s = pd.Series([ + ... np.datetime64("2000-01-01"), + ... np.datetime64("2010-01-01"), + ... np.datetime64("2010-01-01") + ... ]) + >>> s.describe() + count 3 + unique 2 + top 2010-01-01 00:00:00 + freq 2 + first 2000-01-01 00:00:00 + last 2010-01-01 00:00:00 + dtype: object + + Describing a ``DataFrame``. By default only numeric fields + are returned. + + >>> df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']), + ... 'numeric': [1, 2, 3], + ... 'object': ['a', 'b', 'c'] + ... }) + >>> df.describe() + numeric + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + + Describing all columns of a ``DataFrame`` regardless of data type. + + >>> df.describe(include='all') + categorical numeric object + count 3 3.0 3 + unique 3 NaN 3 + top f NaN c + freq 1 NaN 1 + mean NaN 2.0 NaN + std NaN 1.0 NaN + min NaN 1.0 NaN + 25% NaN 1.5 NaN + 50% NaN 2.0 NaN + 75% NaN 2.5 NaN + max NaN 3.0 NaN + + Describing a column from a ``DataFrame`` by accessing it as + an attribute. + + >>> df.numeric.describe() + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + Name: numeric, dtype: float64 + + Including only numeric columns in a ``DataFrame`` description. + + >>> df.describe(include=[np.number]) + numeric + count 3.0 + mean 2.0 + std 1.0 + min 1.0 + 25% 1.5 + 50% 2.0 + 75% 2.5 + max 3.0 + + Including only string columns in a ``DataFrame`` description. + + >>> df.describe(include=[np.object]) + object + count 3 + unique 3 + top c + freq 1 + + Including only categorical columns from a ``DataFrame`` description. + + >>> df.describe(include=['category']) + categorical + count 3 + unique 3 + top f + freq 1 + + Excluding numeric columns from a ``DataFrame`` description. + + >>> df.describe(exclude=[np.number]) + categorical object + count 3 3 + unique 3 3 + top f c + freq 1 1 + + Excluding object columns from a ``DataFrame`` description. + + >>> df.describe(exclude=[np.object]) + categorical numeric + count 3 3.0 + unique 3 NaN + top f NaN + freq 1 NaN + mean NaN 2.0 + std NaN 1.0 + min NaN 1.0 + 25% NaN 1.5 + 50% NaN 2.0 + 75% NaN 2.5 + max NaN 3.0 + """ + if self.ndim == 2 and self.columns.size == 0: + raise ValueError("Cannot describe a DataFrame without columns") + + if percentiles is not None: + # explicit conversion of `percentiles` to list + percentiles = list(percentiles) + + # get them all to be in [0, 1] + validate_percentile(percentiles) + + # median should always be included + if 0.5 not in percentiles: + percentiles.append(0.5) + percentiles = np.asarray(percentiles) + else: + percentiles = np.array([0.25, 0.5, 0.75]) + + # sort and check for duplicates + unique_pcts = np.unique(percentiles) + if len(unique_pcts) < len(percentiles): + raise ValueError("percentiles cannot contain duplicates") + percentiles = unique_pcts + + formatted_percentiles = format_percentiles(percentiles) + + def describe_numeric_1d(series): + stat_index = ( + ["count", "mean", "std", "min"] + formatted_percentiles + ["max"] + ) + d = ( + [series.count(), series.mean(), series.std(), series.min()] + + series.quantile(percentiles).tolist() + + [series.max()] + ) + return pd.Series(d, index=stat_index, name=series.name) + + def describe_categorical_1d(data): + names = ["count", "unique"] + objcounts = data.value_counts() + count_unique = len(objcounts[objcounts != 0]) + result = [data.count(), count_unique] + dtype = None + if result[1] > 0: + top, freq = objcounts.index[0], objcounts.iloc[0] + + if is_datetime64_any_dtype(data): + tz = data.dt.tz + asint = data.dropna().values.view("i8") + top = Timestamp(top) + if top.tzinfo is not None and tz is not None: + # Don't tz_localize(None) if key is already tz-aware + top = top.tz_convert(tz) + else: + top = top.tz_localize(tz) + names += ["top", "freq", "first", "last"] + result += [ + top, + freq, + Timestamp(asint.min(), tz=tz), + Timestamp(asint.max(), tz=tz), + ] + else: + names += ["top", "freq"] + result += [top, freq] + + # If the DataFrame is empty, set 'top' and 'freq' to None + # to maintain output shape consistency + else: + names += ["top", "freq"] + result += [np.nan, np.nan] + dtype = "object" + + return pd.Series(result, index=names, name=data.name, dtype=dtype) + + def describe_1d(data): + if is_bool_dtype(data): + return describe_categorical_1d(data) + elif is_numeric_dtype(data): + return describe_numeric_1d(data) + elif is_timedelta64_dtype(data): + return describe_numeric_1d(data) + else: + return describe_categorical_1d(data) + + if self.ndim == 1: + return describe_1d(self) + elif (include is None) and (exclude is None): + # when some numerics are found, keep only numerics + data = self.select_dtypes(include=[np.number]) + if len(data.columns) == 0: + data = self + elif include == "all": + if exclude is not None: + msg = "exclude must be None when include is 'all'" + raise ValueError(msg) + data = self + else: + data = self.select_dtypes(include=include, exclude=exclude) + + ldesc = [describe_1d(s) for _, s in data.items()] + # set a convenient order for rows + names: List[Optional[Hashable]] = [] + ldesc_indexes = sorted((x.index for x in ldesc), key=len) + for idxnames in ldesc_indexes: + for name in idxnames: + if name not in names: + names.append(name) + + d = pd.concat([x.reindex(names, copy=False) for x in ldesc], axis=1, sort=False) + d.columns = data.columns.copy() + return d + + _shared_docs[ + "pct_change" + ] = """ + Percentage change between the current and a prior element. + + Computes the percentage change from the immediately previous row by + default. This is useful in comparing the percentage of change in a time + series of elements. + + Parameters + ---------- + periods : int, default 1 + Periods to shift for forming percent change. + fill_method : str, default 'pad' + How to handle NAs before computing percent changes. + limit : int, default None + The number of consecutive NAs to fill before stopping. + freq : DateOffset, timedelta, or str, optional + Increment to use from time series API (e.g. 'M' or BDay()). + **kwargs + Additional keyword arguments are passed into + `DataFrame.shift` or `Series.shift`. + + Returns + ------- + chg : Series or DataFrame + The same type as the calling object. + + See Also + -------- + Series.diff : Compute the difference of two elements in a Series. + DataFrame.diff : Compute the difference of two elements in a DataFrame. + Series.shift : Shift the index by some number of periods. + DataFrame.shift : Shift the index by some number of periods. + + Examples + -------- + **Series** + + >>> s = pd.Series([90, 91, 85]) + >>> s + 0 90 + 1 91 + 2 85 + dtype: int64 + + >>> s.pct_change() + 0 NaN + 1 0.011111 + 2 -0.065934 + dtype: float64 + + >>> s.pct_change(periods=2) + 0 NaN + 1 NaN + 2 -0.055556 + dtype: float64 + + See the percentage change in a Series where filling NAs with last + valid observation forward to next valid. + + >>> s = pd.Series([90, 91, None, 85]) + >>> s + 0 90.0 + 1 91.0 + 2 NaN + 3 85.0 + dtype: float64 + + >>> s.pct_change(fill_method='ffill') + 0 NaN + 1 0.011111 + 2 0.000000 + 3 -0.065934 + dtype: float64 + + **DataFrame** + + Percentage change in French franc, Deutsche Mark, and Italian lira from + 1980-01-01 to 1980-03-01. + + >>> df = pd.DataFrame({ + ... 'FR': [4.0405, 4.0963, 4.3149], + ... 'GR': [1.7246, 1.7482, 1.8519], + ... 'IT': [804.74, 810.01, 860.13]}, + ... index=['1980-01-01', '1980-02-01', '1980-03-01']) + >>> df + FR GR IT + 1980-01-01 4.0405 1.7246 804.74 + 1980-02-01 4.0963 1.7482 810.01 + 1980-03-01 4.3149 1.8519 860.13 + + >>> df.pct_change() + FR GR IT + 1980-01-01 NaN NaN NaN + 1980-02-01 0.013810 0.013684 0.006549 + 1980-03-01 0.053365 0.059318 0.061876 + + Percentage of change in GOOG and APPL stock volume. Shows computing + the percentage change between columns. + + >>> df = pd.DataFrame({ + ... '2016': [1769950, 30586265], + ... '2015': [1500923, 40912316], + ... '2014': [1371819, 41403351]}, + ... index=['GOOG', 'APPL']) + >>> df + 2016 2015 2014 + GOOG 1769950 1500923 1371819 + APPL 30586265 40912316 41403351 + + >>> df.pct_change(axis='columns') + 2016 2015 2014 + GOOG NaN -0.151997 -0.086016 + APPL NaN 0.337604 0.012002 + """ + + @Appender(_shared_docs["pct_change"] % _shared_doc_kwargs) + def pct_change( + self: FrameOrSeries, + periods=1, + fill_method="pad", + limit=None, + freq=None, + **kwargs, + ) -> FrameOrSeries: + # TODO: Not sure if above is correct - need someone to confirm. + axis = self._get_axis_number(kwargs.pop("axis", self._stat_axis_name)) + if fill_method is None: + data = self + else: + _data = self.fillna(method=fill_method, axis=axis, limit=limit) + assert _data is not None # needed for mypy + data = _data + + rs = data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1 + if freq is not None: + # Shift method is implemented differently when freq is not None + # We want to restore the original index + rs = rs.loc[~rs.index.duplicated()] + rs = rs.reindex_like(data) + return rs + + def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs): + if axis is None: + raise ValueError("Must specify 'axis' when aggregating by level.") + grouped = self.groupby(level=level, axis=axis, sort=False) + if hasattr(grouped, name) and skipna: + return getattr(grouped, name)(**kwargs) + axis = self._get_axis_number(axis) + method = getattr(type(self), name) + applyf = lambda x: method(x, axis=axis, skipna=skipna, **kwargs) + return grouped.aggregate(applyf) + + @classmethod + def _add_numeric_operations(cls): + """ + Add the operations to the cls; evaluate the doc strings again + """ + + axis_descr, name, name2 = _doc_parms(cls) + + cls.any = _make_logical_function( + cls, + "any", + name, + name2, + axis_descr, + _any_desc, + nanops.nanany, + _any_see_also, + _any_examples, + empty_value=False, + ) + cls.all = _make_logical_function( + cls, + "all", + name, + name2, + axis_descr, + _all_desc, + nanops.nanall, + _all_see_also, + _all_examples, + empty_value=True, + ) + + @Substitution( + desc="Return the mean absolute deviation of the values " + "for the requested axis.", + name1=name, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also="", + examples="", + ) + @Appender(_num_doc) + def mad(self, axis=None, skipna=None, level=None): + if skipna is None: + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level("mad", axis=axis, level=level, skipna=skipna) + + data = self._get_numeric_data() + if axis == 0: + demeaned = data - data.mean(axis=0) + else: + demeaned = data.sub(data.mean(axis=1), axis=0) + return np.abs(demeaned).mean(axis=axis, skipna=skipna) + + cls.mad = mad + + cls.sem = _make_stat_function_ddof( + cls, + "sem", + name, + name2, + axis_descr, + "Return unbiased standard error of the mean over requested " + "axis.\n\nNormalized by N-1 by default. This can be changed " + "using the ddof argument", + nanops.nansem, + ) + cls.var = _make_stat_function_ddof( + cls, + "var", + name, + name2, + axis_descr, + "Return unbiased variance over requested axis.\n\nNormalized by " + "N-1 by default. This can be changed using the ddof argument", + nanops.nanvar, + ) + cls.std = _make_stat_function_ddof( + cls, + "std", + name, + name2, + axis_descr, + "Return sample standard deviation over requested axis." + "\n\nNormalized by N-1 by default. This can be changed using the " + "ddof argument", + nanops.nanstd, + ) + + cls.cummin = _make_cum_function( + cls, + "cummin", + name, + name2, + axis_descr, + "minimum", + np.minimum.accumulate, + "min", + np.inf, + np.nan, + _cummin_examples, + ) + cls.cumsum = _make_cum_function( + cls, + "cumsum", + name, + name2, + axis_descr, + "sum", + np.cumsum, + "sum", + 0.0, + np.nan, + _cumsum_examples, + ) + cls.cumprod = _make_cum_function( + cls, + "cumprod", + name, + name2, + axis_descr, + "product", + np.cumprod, + "prod", + 1.0, + np.nan, + _cumprod_examples, + ) + cls.cummax = _make_cum_function( + cls, + "cummax", + name, + name2, + axis_descr, + "maximum", + np.maximum.accumulate, + "max", + -np.inf, + np.nan, + _cummax_examples, + ) + + cls.sum = _make_min_count_stat_function( + cls, + "sum", + name, + name2, + axis_descr, + """Return the sum of the values for the requested axis.\n + This is equivalent to the method ``numpy.sum``.""", + nanops.nansum, + _stat_func_see_also, + _sum_examples, + ) + cls.mean = _make_stat_function( + cls, + "mean", + name, + name2, + axis_descr, + "Return the mean of the values for the requested axis.", + nanops.nanmean, + ) + cls.skew = _make_stat_function( + cls, + "skew", + name, + name2, + axis_descr, + "Return unbiased skew over requested axis.\n\nNormalized by N-1.", + nanops.nanskew, + ) + cls.kurt = _make_stat_function( + cls, + "kurt", + name, + name2, + axis_descr, + "Return unbiased kurtosis over requested axis.\n\n" + "Kurtosis obtained using Fisher's definition of\n" + "kurtosis (kurtosis of normal == 0.0). Normalized " + "by N-1.", + nanops.nankurt, + ) + cls.kurtosis = cls.kurt + cls.prod = _make_min_count_stat_function( + cls, + "prod", + name, + name2, + axis_descr, + "Return the product of the values for the requested axis.", + nanops.nanprod, + examples=_prod_examples, + ) + cls.product = cls.prod + cls.median = _make_stat_function( + cls, + "median", + name, + name2, + axis_descr, + "Return the median of the values for the requested axis.", + nanops.nanmedian, + ) + cls.max = _make_stat_function( + cls, + "max", + name, + name2, + axis_descr, + """Return the maximum of the values for the requested axis.\n + If you want the *index* of the maximum, use ``idxmax``. This is + the equivalent of the ``numpy.ndarray`` method ``argmax``.""", + nanops.nanmax, + _stat_func_see_also, + _max_examples, + ) + cls.min = _make_stat_function( + cls, + "min", + name, + name2, + axis_descr, + """Return the minimum of the values for the requested axis.\n + If you want the *index* of the minimum, use ``idxmin``. This is + the equivalent of the ``numpy.ndarray`` method ``argmin``.""", + nanops.nanmin, + _stat_func_see_also, + _min_examples, + ) + + @classmethod + def _add_series_or_dataframe_operations(cls): + """ + Add the series or dataframe only operations to the cls; evaluate + the doc strings again. + """ + + from pandas.core.window import EWM, Expanding, Rolling, Window + + @Appender(Rolling.__doc__) + def rolling( + self, + window, + min_periods=None, + center=False, + win_type=None, + on=None, + axis=0, + closed=None, + ): + axis = self._get_axis_number(axis) + + if win_type is not None: + return Window( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + ) + + return Rolling( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + ) + + cls.rolling = rolling + + @Appender(Expanding.__doc__) + def expanding(self, min_periods=1, center=False, axis=0): + axis = self._get_axis_number(axis) + return Expanding(self, min_periods=min_periods, center=center, axis=axis) + + cls.expanding = expanding + + @Appender(EWM.__doc__) + def ewm( + self, + com=None, + span=None, + halflife=None, + alpha=None, + min_periods=0, + adjust=True, + ignore_na=False, + axis=0, + ): + axis = self._get_axis_number(axis) + return EWM( + self, + com=com, + span=span, + halflife=halflife, + alpha=alpha, + min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na, + axis=axis, + ) + + cls.ewm = ewm + + @Appender(_shared_docs["transform"] % dict(axis="", **_shared_doc_kwargs)) + def transform(self, func, *args, **kwargs): + result = self.agg(func, *args, **kwargs) + if is_scalar(result) or len(result) != len(self): + raise ValueError("transforms cannot produce aggregated results") + + return result + + # ---------------------------------------------------------------------- + # Misc methods + + _shared_docs[ + "valid_index" + ] = """ + Return index for %(position)s non-NA/null value. + + Returns + ------- + scalar : type of index + + Notes + ----- + If all elements are non-NA/null, returns None. + Also returns None for empty %(klass)s. + """ + + def _find_valid_index(self, how: str): + """ + Retrieves the index of the first valid value. + + Parameters + ---------- + how : {'first', 'last'} + Use this parameter to change between the first or last valid index. + + Returns + ------- + idx_first_valid : type of index + """ + + idxpos = find_valid_index(self._values, how) + if idxpos is None: + return None + return self.index[idxpos] + + @Appender( + _shared_docs["valid_index"] % {"position": "first", "klass": "Series/DataFrame"} + ) + def first_valid_index(self): + return self._find_valid_index("first") + + @Appender( + _shared_docs["valid_index"] % {"position": "last", "klass": "Series/DataFrame"} + ) + def last_valid_index(self): + return self._find_valid_index("last") + + +def _doc_parms(cls): + """Return a tuple of the doc parms.""" + axis_descr = ( + f"{{{', '.join(f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS))}}}" + ) + name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar" + name2 = cls.__name__ + return axis_descr, name, name2 + + +_num_doc = """ +%(desc)s + +Parameters +---------- +axis : %(axis_descr)s + Axis for the function to be applied on. +skipna : bool, default True + Exclude NA/null values when computing the result. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a %(name1)s. +numeric_only : bool, default None + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. Not implemented for Series. +%(min_count)s\ +**kwargs + Additional keyword arguments to be passed to the function. + +Returns +------- +%(name1)s or %(name2)s (if level specified)\ +%(see_also)s\ +%(examples)s +""" + +_num_ddof_doc = """ +%(desc)s + +Parameters +---------- +axis : %(axis_descr)s +skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a %(name1)s. +ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. +numeric_only : bool, default None + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. Not implemented for Series. + +Returns +------- +%(name1)s or %(name2)s (if level specified)\n""" + +_bool_doc = """ +%(desc)s + +Parameters +---------- +axis : {0 or 'index', 1 or 'columns', None}, default 0 + Indicate which axis or axes should be reduced. + + * 0 / 'index' : reduce the index, return a Series whose index is the + original column labels. + * 1 / 'columns' : reduce the columns, return a Series whose index is the + original index. + * None : reduce all axes, return a scalar. + +bool_only : bool, default None + Include only boolean columns. If None, will attempt to use everything, + then use only boolean data. Not implemented for Series. +skipna : bool, default True + Exclude NA/null values. If the entire row/column is NA and skipna is + True, then the result will be %(empty_value)s, as for an empty row/column. + If skipna is False, then NA are treated as True, because these are not + equal to zero. +level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a %(name1)s. +**kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + +Returns +------- +%(name1)s or %(name2)s + If level is specified, then, %(name2)s is returned; otherwise, %(name1)s + is returned. + +%(see_also)s +%(examples)s""" + +_all_desc = """\ +Return whether all elements are True, potentially over an axis. + +Returns True unless there at least one element within a series or +along a Dataframe axis that is False or equivalent (e.g. zero or +empty).""" + +_all_examples = """\ +Examples +-------- +**Series** + +>>> pd.Series([True, True]).all() +True +>>> pd.Series([True, False]).all() +False +>>> pd.Series([]).all() +True +>>> pd.Series([np.nan]).all() +True +>>> pd.Series([np.nan]).all(skipna=False) +True + +**DataFrames** + +Create a dataframe from a dictionary. + +>>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]}) +>>> df + col1 col2 +0 True True +1 True False + +Default behaviour checks if column-wise values all return True. + +>>> df.all() +col1 True +col2 False +dtype: bool + +Specify ``axis='columns'`` to check if row-wise values all return True. + +>>> df.all(axis='columns') +0 True +1 False +dtype: bool + +Or ``axis=None`` for whether every value is True. + +>>> df.all(axis=None) +False +""" + +_all_see_also = """\ +See Also +-------- +Series.all : Return True if all elements are True. +DataFrame.any : Return True if one (or more) elements are True. +""" + +_cnum_doc = """ +Return cumulative %(desc)s over a DataFrame or Series axis. + +Returns a DataFrame or Series of the same size containing the cumulative +%(desc)s. + +Parameters +---------- +axis : {0 or 'index', 1 or 'columns'}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. +skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. +*args, **kwargs : + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + +Returns +------- +%(name1)s or %(name2)s + +See Also +-------- +core.window.Expanding.%(accum_func_name)s : Similar functionality + but ignores ``NaN`` values. +%(name2)s.%(accum_func_name)s : Return the %(desc)s over + %(name2)s axis. +%(name2)s.cummax : Return cumulative maximum over %(name2)s axis. +%(name2)s.cummin : Return cumulative minimum over %(name2)s axis. +%(name2)s.cumsum : Return cumulative sum over %(name2)s axis. +%(name2)s.cumprod : Return cumulative product over %(name2)s axis. + +%(examples)s""" + +_cummin_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([2, np.nan, 5, -1, 0]) +>>> s +0 2.0 +1 NaN +2 5.0 +3 -1.0 +4 0.0 +dtype: float64 + +By default, NA values are ignored. + +>>> s.cummin() +0 2.0 +1 NaN +2 2.0 +3 -1.0 +4 -1.0 +dtype: float64 + +To include NA values in the operation, use ``skipna=False`` + +>>> s.cummin(skipna=False) +0 2.0 +1 NaN +2 NaN +3 NaN +4 NaN +dtype: float64 + +**DataFrame** + +>>> df = pd.DataFrame([[2.0, 1.0], +... [3.0, np.nan], +... [1.0, 0.0]], +... columns=list('AB')) +>>> df + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 + +By default, iterates over rows and finds the minimum +in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + +>>> df.cummin() + A B +0 2.0 1.0 +1 2.0 NaN +2 1.0 0.0 + +To iterate over columns and find the minimum in each row, +use ``axis=1`` + +>>> df.cummin(axis=1) + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 +""" + +_cumsum_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([2, np.nan, 5, -1, 0]) +>>> s +0 2.0 +1 NaN +2 5.0 +3 -1.0 +4 0.0 +dtype: float64 + +By default, NA values are ignored. + +>>> s.cumsum() +0 2.0 +1 NaN +2 7.0 +3 6.0 +4 6.0 +dtype: float64 + +To include NA values in the operation, use ``skipna=False`` + +>>> s.cumsum(skipna=False) +0 2.0 +1 NaN +2 NaN +3 NaN +4 NaN +dtype: float64 + +**DataFrame** + +>>> df = pd.DataFrame([[2.0, 1.0], +... [3.0, np.nan], +... [1.0, 0.0]], +... columns=list('AB')) +>>> df + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 + +By default, iterates over rows and finds the sum +in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + +>>> df.cumsum() + A B +0 2.0 1.0 +1 5.0 NaN +2 6.0 1.0 + +To iterate over columns and find the sum in each row, +use ``axis=1`` + +>>> df.cumsum(axis=1) + A B +0 2.0 3.0 +1 3.0 NaN +2 1.0 1.0 +""" + +_cumprod_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([2, np.nan, 5, -1, 0]) +>>> s +0 2.0 +1 NaN +2 5.0 +3 -1.0 +4 0.0 +dtype: float64 + +By default, NA values are ignored. + +>>> s.cumprod() +0 2.0 +1 NaN +2 10.0 +3 -10.0 +4 -0.0 +dtype: float64 + +To include NA values in the operation, use ``skipna=False`` + +>>> s.cumprod(skipna=False) +0 2.0 +1 NaN +2 NaN +3 NaN +4 NaN +dtype: float64 + +**DataFrame** + +>>> df = pd.DataFrame([[2.0, 1.0], +... [3.0, np.nan], +... [1.0, 0.0]], +... columns=list('AB')) +>>> df + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 + +By default, iterates over rows and finds the product +in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + +>>> df.cumprod() + A B +0 2.0 1.0 +1 6.0 NaN +2 6.0 0.0 + +To iterate over columns and find the product in each row, +use ``axis=1`` + +>>> df.cumprod(axis=1) + A B +0 2.0 2.0 +1 3.0 NaN +2 1.0 0.0 +""" + +_cummax_examples = """\ +Examples +-------- +**Series** + +>>> s = pd.Series([2, np.nan, 5, -1, 0]) +>>> s +0 2.0 +1 NaN +2 5.0 +3 -1.0 +4 0.0 +dtype: float64 + +By default, NA values are ignored. + +>>> s.cummax() +0 2.0 +1 NaN +2 5.0 +3 5.0 +4 5.0 +dtype: float64 + +To include NA values in the operation, use ``skipna=False`` + +>>> s.cummax(skipna=False) +0 2.0 +1 NaN +2 NaN +3 NaN +4 NaN +dtype: float64 + +**DataFrame** + +>>> df = pd.DataFrame([[2.0, 1.0], +... [3.0, np.nan], +... [1.0, 0.0]], +... columns=list('AB')) +>>> df + A B +0 2.0 1.0 +1 3.0 NaN +2 1.0 0.0 + +By default, iterates over rows and finds the maximum +in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + +>>> df.cummax() + A B +0 2.0 1.0 +1 3.0 NaN +2 3.0 1.0 + +To iterate over columns and find the maximum in each row, +use ``axis=1`` + +>>> df.cummax(axis=1) + A B +0 2.0 2.0 +1 3.0 NaN +2 1.0 1.0 +""" + +_any_see_also = """\ +See Also +-------- +numpy.any : Numpy version of this method. +Series.any : Return whether any element is True. +Series.all : Return whether all elements are True. +DataFrame.any : Return whether any element is True over requested axis. +DataFrame.all : Return whether all elements are True over requested axis. +""" + +_any_desc = """\ +Return whether any element is True, potentially over an axis. + +Returns False unless there at least one element within a series or +along a Dataframe axis that is True or equivalent (e.g. non-zero or +non-empty).""" + +_any_examples = """\ +Examples +-------- +**Series** + +For Series input, the output is a scalar indicating whether any element +is True. + +>>> pd.Series([False, False]).any() +False +>>> pd.Series([True, False]).any() +True +>>> pd.Series([]).any() +False +>>> pd.Series([np.nan]).any() +False +>>> pd.Series([np.nan]).any(skipna=False) +True + +**DataFrame** + +Whether each column contains at least one True element (the default). + +>>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]}) +>>> df + A B C +0 1 0 0 +1 2 2 0 + +>>> df.any() +A True +B True +C False +dtype: bool + +Aggregating over the columns. + +>>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]}) +>>> df + A B +0 True 1 +1 False 2 + +>>> df.any(axis='columns') +0 True +1 True +dtype: bool + +>>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]}) +>>> df + A B +0 True 1 +1 False 0 + +>>> df.any(axis='columns') +0 True +1 False +dtype: bool + +Aggregating over the entire DataFrame with ``axis=None``. + +>>> df.any(axis=None) +True + +`any` for an empty DataFrame is an empty Series. + +>>> pd.DataFrame([]).any() +Series([], dtype: bool) +""" + +_shared_docs[ + "stat_func_example" +] = """ + +Examples +-------- +>>> idx = pd.MultiIndex.from_arrays([ +... ['warm', 'warm', 'cold', 'cold'], +... ['dog', 'falcon', 'fish', 'spider']], +... names=['blooded', 'animal']) +>>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx) +>>> s +blooded animal +warm dog 4 + falcon 2 +cold fish 0 + spider 8 +Name: legs, dtype: int64 + +>>> s.{stat_func}() +{default_output} + +{verb} using level names, as well as indices. + +>>> s.{stat_func}(level='blooded') +blooded +warm {level_output_0} +cold {level_output_1} +Name: legs, dtype: int64 + +>>> s.{stat_func}(level=0) +blooded +warm {level_output_0} +cold {level_output_1} +Name: legs, dtype: int64""" + +_sum_examples = _shared_docs["stat_func_example"].format( + stat_func="sum", verb="Sum", default_output=14, level_output_0=6, level_output_1=8 +) + +_sum_examples += """ + +By default, the sum of an empty or all-NA Series is ``0``. + +>>> pd.Series([]).sum() # min_count=0 is the default +0.0 + +This can be controlled with the ``min_count`` parameter. For example, if +you'd like the sum of an empty series to be NaN, pass ``min_count=1``. + +>>> pd.Series([]).sum(min_count=1) +nan + +Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and +empty series identically. + +>>> pd.Series([np.nan]).sum() +0.0 + +>>> pd.Series([np.nan]).sum(min_count=1) +nan""" + +_max_examples = _shared_docs["stat_func_example"].format( + stat_func="max", verb="Max", default_output=8, level_output_0=4, level_output_1=8 +) + +_min_examples = _shared_docs["stat_func_example"].format( + stat_func="min", verb="Min", default_output=0, level_output_0=2, level_output_1=0 +) + +_stat_func_see_also = """ + +See Also +-------- +Series.sum : Return the sum. +Series.min : Return the minimum. +Series.max : Return the maximum. +Series.idxmin : Return the index of the minimum. +Series.idxmax : Return the index of the maximum. +DataFrame.sum : Return the sum over the requested axis. +DataFrame.min : Return the minimum over the requested axis. +DataFrame.max : Return the maximum over the requested axis. +DataFrame.idxmin : Return the index of the minimum over the requested axis. +DataFrame.idxmax : Return the index of the maximum over the requested axis.""" + +_prod_examples = """ + +Examples +-------- +By default, the product of an empty or all-NA Series is ``1`` + +>>> pd.Series([]).prod() +1.0 + +This can be controlled with the ``min_count`` parameter + +>>> pd.Series([]).prod(min_count=1) +nan + +Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and +empty series identically. + +>>> pd.Series([np.nan]).prod() +1.0 + +>>> pd.Series([np.nan]).prod(min_count=1) +nan""" + +_min_count_stub = """\ +min_count : int, default 0 + The required number of valid values to perform the operation. If fewer than + ``min_count`` non-NA values are present the result will be NA. + + .. versionadded:: 0.22.0 + + Added with the default being 0. This means the sum of an all-NA + or empty Series is 0, and the product of an all-NA or empty + Series is 1. +""" + + +def _make_min_count_stat_function( + cls, name, name1, name2, axis_descr, desc, f, see_also: str = "", examples: str = "" +): + @Substitution( + desc=desc, + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count=_min_count_stub, + see_also=see_also, + examples=examples, + ) + @Appender(_num_doc) + def stat_func( + self, + axis=None, + skipna=None, + level=None, + numeric_only=None, + min_count=0, + **kwargs, + ): + if name == "sum": + nv.validate_sum(tuple(), kwargs) + elif name == "prod": + nv.validate_prod(tuple(), kwargs) + else: + nv.validate_stat_func(tuple(), kwargs, fname=name) + if skipna is None: + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level( + name, axis=axis, level=level, skipna=skipna, min_count=min_count + ) + return self._reduce( + f, + name, + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + min_count=min_count, + ) + + return set_function_name(stat_func, name, cls) + + +def _make_stat_function( + cls, name, name1, name2, axis_descr, desc, f, see_also: str = "", examples: str = "" +): + @Substitution( + desc=desc, + name1=name1, + name2=name2, + axis_descr=axis_descr, + min_count="", + see_also=see_also, + examples=examples, + ) + @Appender(_num_doc) + def stat_func( + self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs + ): + if name == "median": + nv.validate_median(tuple(), kwargs) + else: + nv.validate_stat_func(tuple(), kwargs, fname=name) + if skipna is None: + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) + return self._reduce( + f, name, axis=axis, skipna=skipna, numeric_only=numeric_only + ) + + return set_function_name(stat_func, name, cls) + + +def _make_stat_function_ddof(cls, name, name1, name2, axis_descr, desc, f): + @Substitution(desc=desc, name1=name1, name2=name2, axis_descr=axis_descr) + @Appender(_num_ddof_doc) + def stat_func( + self, axis=None, skipna=None, level=None, ddof=1, numeric_only=None, **kwargs + ): + nv.validate_stat_ddof_func(tuple(), kwargs, fname=name) + if skipna is None: + skipna = True + if axis is None: + axis = self._stat_axis_number + if level is not None: + return self._agg_by_level( + name, axis=axis, level=level, skipna=skipna, ddof=ddof + ) + return self._reduce( + f, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof + ) + + return set_function_name(stat_func, name, cls) + + +def _make_cum_function( + cls, + name, + name1, + name2, + axis_descr, + desc, + accum_func, + accum_func_name, + mask_a, + mask_b, + examples, +): + @Substitution( + desc=desc, + name1=name1, + name2=name2, + axis_descr=axis_descr, + accum_func_name=accum_func_name, + examples=examples, + ) + @Appender(_cnum_doc) + def cum_func(self, axis=None, skipna=True, *args, **kwargs): + skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name) + if axis is None: + axis = self._stat_axis_number + else: + axis = self._get_axis_number(axis) + + if axis == 1: + return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T + + def na_accum_func(blk_values): + # We will be applying this function to block values + if blk_values.dtype.kind in ["m", "M"]: + # GH#30460, GH#29058 + # numpy 1.18 started sorting NaTs at the end instead of beginning, + # so we need to work around to maintain backwards-consistency. + orig_dtype = blk_values.dtype + + # We need to define mask before masking NaTs + mask = isna(blk_values) + + if accum_func == np.minimum.accumulate: + # Note: the accum_func comparison fails as an "is" comparison + y = blk_values.view("i8") + y[mask] = np.iinfo(np.int64).max + changed = True + else: + y = blk_values + changed = False + + result = accum_func(y.view("i8"), axis) + if skipna: + np.putmask(result, mask, iNaT) + elif accum_func == np.minimum.accumulate: + # Restore NaTs that we masked previously + nz = (~np.asarray(mask)).nonzero()[0] + if len(nz): + # everything up to the first non-na entry stays NaT + result[: nz[0]] = iNaT + + if changed: + # restore NaT elements + y[mask] = iNaT # TODO: could try/finally for this? + + if isinstance(blk_values, np.ndarray): + result = result.view(orig_dtype) + else: + # DatetimeArray + result = type(blk_values)._from_sequence(result, dtype=orig_dtype) + + elif skipna and not issubclass( + blk_values.dtype.type, (np.integer, np.bool_) + ): + vals = blk_values.copy().T + mask = isna(vals) + np.putmask(vals, mask, mask_a) + result = accum_func(vals, axis) + np.putmask(result, mask, mask_b) + else: + result = accum_func(blk_values.T, axis) + + # transpose back for ndarray, not for EA + return result.T if hasattr(result, "T") else result + + result = self._data.apply(na_accum_func) + + d = self._construct_axes_dict() + d["copy"] = False + return self._constructor(result, **d).__finalize__(self) + + return set_function_name(cum_func, name, cls) + + +def _make_logical_function( + cls, name, name1, name2, axis_descr, desc, f, see_also, examples, empty_value +): + @Substitution( + desc=desc, + name1=name1, + name2=name2, + axis_descr=axis_descr, + see_also=see_also, + examples=examples, + empty_value=empty_value, + ) + @Appender(_bool_doc) + def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs): + nv.validate_logical_func(tuple(), kwargs, fname=name) + if level is not None: + if bool_only is not None: + raise NotImplementedError( + "Option bool_only is not implemented with option level." + ) + return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) + return self._reduce( + f, + name, + axis=axis, + skipna=skipna, + numeric_only=bool_only, + filter_type="bool", + ) + + return set_function_name(logical_func, name, cls) diff --git a/pandas/core/groupby/__init__.py b/pandas/core/groupby/__init__.py new file mode 100644 index 00000000..0c5d2658 --- /dev/null +++ b/pandas/core/groupby/__init__.py @@ -0,0 +1,11 @@ +from pandas.core.groupby.generic import DataFrameGroupBy, NamedAgg, SeriesGroupBy +from pandas.core.groupby.groupby import GroupBy +from pandas.core.groupby.grouper import Grouper + +__all__ = [ + "DataFrameGroupBy", + "NamedAgg", + "SeriesGroupBy", + "GroupBy", + "Grouper", +] diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py new file mode 100644 index 00000000..700d8d50 --- /dev/null +++ b/pandas/core/groupby/base.py @@ -0,0 +1,188 @@ +""" +Provide basic components for groupby. These definitions +hold the whitelist of methods that are exposed on the +SeriesGroupBy and the DataFrameGroupBy objects. +""" +import collections + +from pandas.core.dtypes.common import is_list_like, is_scalar + +OutputKey = collections.namedtuple("OutputKey", ["label", "position"]) + + +class GroupByMixin: + """ + Provide the groupby facilities to the mixed object. + """ + + def _gotitem(self, key, ndim, subset=None): + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ + # create a new object to prevent aliasing + if subset is None: + subset = self.obj + + # we need to make a shallow copy of ourselves + # with the same groupby + kwargs = {attr: getattr(self, attr) for attr in self._attributes} + + # Try to select from a DataFrame, falling back to a Series + try: + groupby = self._groupby[key] + except IndexError: + groupby = self._groupby + + self = type(self)(subset, groupby=groupby, parent=self, **kwargs) + self._reset_cache() + if subset.ndim == 2: + if is_scalar(key) and key in subset or is_list_like(key): + self._selection = key + return self + + +# special case to prevent duplicate plots when catching exceptions when +# forwarding methods from NDFrames +plotting_methods = frozenset(["plot", "hist"]) + +common_apply_whitelist = ( + frozenset( + [ + "quantile", + "fillna", + "mad", + "take", + "idxmax", + "idxmin", + "tshift", + "skew", + "corr", + "cov", + "diff", + ] + ) + | plotting_methods +) + +series_apply_whitelist = ( + ( + common_apply_whitelist + | { + "nlargest", + "nsmallest", + "is_monotonic_increasing", + "is_monotonic_decreasing", + } + ) +) | frozenset(["dtype", "unique"]) + +dataframe_apply_whitelist = common_apply_whitelist | frozenset(["dtypes", "corrwith"]) + +# cythonized transformations or canned "agg+broadcast", which do not +# require postprocessing of the result by transform. +cythonized_kernels = frozenset(["cumprod", "cumsum", "shift", "cummin", "cummax"]) + +cython_cast_blacklist = frozenset(["rank", "count", "size", "idxmin", "idxmax"]) + +# List of aggregation/reduction functions. +# These map each group to a single numeric value +reduction_kernels = frozenset( + [ + "all", + "any", + "count", + "first", + "idxmax", + "idxmin", + "last", + "mad", + "max", + "mean", + "median", + "min", + "ngroup", + "nth", + "nunique", + "prod", + # as long as `quantile`'s signature accepts only + # a single quantile value, it's a reduction. + # GH#27526 might change that. + "quantile", + "sem", + "size", + "skew", + "std", + "sum", + "var", + ] +) + +# List of transformation functions. +# a transformation is a function that, for each group, +# produces a result that has the same shape as the group. +transformation_kernels = frozenset( + [ + "backfill", + "bfill", + "corrwith", + "cumcount", + "cummax", + "cummin", + "cumprod", + "cumsum", + "diff", + "ffill", + "fillna", + "pad", + "pct_change", + "rank", + "shift", + "tshift", + ] +) + +# these are all the public methods on Grouper which don't belong +# in either of the above lists +groupby_other_methods = frozenset( + [ + "agg", + "aggregate", + "apply", + "boxplot", + # corr and cov return ngroups*ncolumns rows, so they + # are neither a transformation nor a reduction + "corr", + "cov", + "describe", + "dtypes", + "expanding", + "filter", + "get_group", + "groups", + "head", + "hist", + "indices", + "ndim", + "ngroups", + "ohlc", + "pipe", + "plot", + "resample", + "rolling", + "tail", + "take", + "transform", + ] +) +# Valid values of `name` for `groupby.transform(name)` +# NOTE: do NOT edit this directly. New additions should be inserted +# into the appropriate list above. +transform_kernel_whitelist = reduction_kernels | transformation_kernels diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py new file mode 100644 index 00000000..399ed9dd --- /dev/null +++ b/pandas/core/groupby/categorical.py @@ -0,0 +1,99 @@ +import numpy as np + +from pandas.core.algorithms import unique1d +from pandas.core.arrays.categorical import ( + Categorical, + CategoricalDtype, + _recode_for_categories, +) + + +def recode_for_groupby(c: Categorical, sort: bool, observed: bool): + """ + Code the categories to ensure we can groupby for categoricals. + + If observed=True, we return a new Categorical with the observed + categories only. + + If sort=False, return a copy of self, coded with categories as + returned by .unique(), followed by any categories not appearing in + the data. If sort=True, return self. + + This method is needed solely to ensure the categorical index of the + GroupBy result has categories in the order of appearance in the data + (GH-8868). + + Parameters + ---------- + c : Categorical + sort : boolean + The value of the sort parameter groupby was called with. + observed : boolean + Account only for the observed values + + Returns + ------- + New Categorical + If sort=False, the new categories are set to the order of + appearance in codes (unless ordered=True, in which case the + original order is preserved), followed by any unrepresented + categories in the original order. + Categorical or None + If we are observed, return the original categorical, otherwise None + """ + + # we only care about observed values + if observed: + unique_codes = unique1d(c.codes) + + take_codes = unique_codes[unique_codes != -1] + if c.ordered: + take_codes = np.sort(take_codes) + + # we recode according to the uniques + categories = c.categories.take(take_codes) + codes = _recode_for_categories(c.codes, c.categories, categories) + + # return a new categorical that maps our new codes + # and categories + dtype = CategoricalDtype(categories, ordered=c.ordered) + return Categorical(codes, dtype=dtype, fastpath=True), c + + # Already sorted according to c.categories; all is fine + if sort: + return c, None + + # sort=False should order groups in as-encountered order (GH-8868) + cat = c.unique() + + # But for groupby to work, all categories should be present, + # including those missing from the data (GH-13179), which .unique() + # above dropped + cat = cat.add_categories(c.categories[~c.categories.isin(cat.categories)]) + + return c.reorder_categories(cat.categories), None + + +def recode_from_groupby(c: Categorical, sort: bool, ci): + """ + Reverse the codes_to_groupby to account for sort / observed. + + Parameters + ---------- + c : Categorical + sort : boolean + The value of the sort parameter groupby was called with. + ci : CategoricalIndex + The codes / categories to recode + + Returns + ------- + CategoricalIndex + """ + + # we re-order to the original category orderings + if sort: + return ci.set_categories(c.categories) + + # we are not sorting, so add unobserved to the end + return ci.add_categories(c.categories[~c.categories.isin(ci.categories)]) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py new file mode 100644 index 00000000..b1882329 --- /dev/null +++ b/pandas/core/groupby/generic.py @@ -0,0 +1,2090 @@ +""" +Define the SeriesGroupBy and DataFrameGroupBy +classes that hold the groupby interfaces (and some implementations). + +These are user facing as the result of the ``df.groupby(...)`` operations, +which here returns a DataFrameGroupBy object. +""" +from collections import abc, defaultdict, namedtuple +import copy +from functools import partial +from textwrap import dedent +import typing +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + FrozenSet, + Iterable, + List, + Mapping, + Sequence, + Tuple, + Type, + Union, + cast, +) +import warnings + +import numpy as np + +from pandas._libs import Timestamp, lib +from pandas._typing import FrameOrSeries +from pandas.util._decorators import Appender, Substitution + +from pandas.core.dtypes.cast import ( + maybe_convert_objects, + maybe_downcast_numeric, + maybe_downcast_to_dtype, +) +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_platform_int, + is_bool, + is_dict_like, + is_integer_dtype, + is_interval_dtype, + is_list_like, + is_numeric_dtype, + is_object_dtype, + is_scalar, + needs_i8_conversion, +) +from pandas.core.dtypes.missing import _isna_ndarraylike, isna, notna + +import pandas.core.algorithms as algorithms +from pandas.core.base import DataError, SpecificationError +import pandas.core.common as com +from pandas.core.construction import create_series_with_explicit_dtype +from pandas.core.frame import DataFrame +from pandas.core.generic import ABCDataFrame, ABCSeries, NDFrame, _shared_docs +from pandas.core.groupby import base +from pandas.core.groupby.groupby import ( + GroupBy, + _apply_docs, + _transform_template, + get_groupby, +) +from pandas.core.indexes.api import Index, MultiIndex, all_indexes_same +import pandas.core.indexes.base as ibase +from pandas.core.internals import BlockManager, make_block +from pandas.core.series import Series + +from pandas.plotting import boxplot_frame_groupby + +if TYPE_CHECKING: + from pandas.core.internals import Block + + +NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"]) +# TODO(typing) the return value on this callable should be any *scalar*. +AggScalar = Union[str, Callable[..., Any]] +# TODO: validate types on ScalarResult and move to _typing +# Blocked from using by https://github.com/python/mypy/issues/1484 +# See note at _mangle_lambda_list +ScalarResult = typing.TypeVar("ScalarResult") + + +def generate_property(name: str, klass: Type[FrameOrSeries]): + """ + Create a property for a GroupBy subclass to dispatch to DataFrame/Series. + + Parameters + ---------- + name : str + klass : {DataFrame, Series} + + Returns + ------- + property + """ + + def prop(self): + return self._make_wrapper(name) + + parent_method = getattr(klass, name) + prop.__doc__ = parent_method.__doc__ or "" + prop.__name__ = name + return property(prop) + + +def pin_whitelisted_properties(klass: Type[FrameOrSeries], whitelist: FrozenSet[str]): + """ + Create GroupBy member defs for DataFrame/Series names in a whitelist. + + Parameters + ---------- + klass : DataFrame or Series class + class where members are defined. + whitelist : frozenset[str] + Set of names of klass methods to be constructed + + Returns + ------- + class decorator + + Notes + ----- + Since we don't want to override methods explicitly defined in the + base class, any such name is skipped. + """ + + def pinner(cls): + for name in whitelist: + if hasattr(cls, name): + # don't override anything that was explicitly defined + # in the base class + continue + + prop = generate_property(name, klass) + setattr(cls, name, prop) + + return cls + + return pinner + + +@pin_whitelisted_properties(Series, base.series_apply_whitelist) +class SeriesGroupBy(GroupBy): + _apply_whitelist = base.series_apply_whitelist + + def _iterate_slices(self) -> Iterable[Series]: + yield self._selected_obj + + @property + def _selection_name(self): + """ + since we are a series, we by definition only have + a single name, but may be the result of a selection or + the name of our object + """ + if self._selection is None: + return self.obj.name + else: + return self._selection + + _agg_see_also_doc = dedent( + """ + See Also + -------- + pandas.Series.groupby.apply + pandas.Series.groupby.transform + pandas.Series.aggregate + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.groupby([1, 1, 2, 2]).min() + 1 1 + 2 3 + dtype: int64 + + >>> s.groupby([1, 1, 2, 2]).agg('min') + 1 1 + 2 3 + dtype: int64 + + >>> s.groupby([1, 1, 2, 2]).agg(['min', 'max']) + min max + 1 1 2 + 2 3 4 + + The output column names can be controlled by passing + the desired column names and aggregations as keyword arguments. + + >>> s.groupby([1, 1, 2, 2]).agg( + ... minimum='min', + ... maximum='max', + ... ) + minimum maximum + 1 1 2 + 2 3 4 + """ + ) + + @Appender( + _apply_docs["template"].format( + input="series", examples=_apply_docs["series_examples"] + ) + ) + def apply(self, func, *args, **kwargs): + return super().apply(func, *args, **kwargs) + + @Substitution( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded="", + klass="Series", + axis="", + ) + @Appender(_shared_docs["aggregate"]) + def aggregate(self, func=None, *args, **kwargs): + + relabeling = func is None + columns = None + no_arg_message = "Must provide 'func' or named aggregation **kwargs." + if relabeling: + columns = list(kwargs) + func = [kwargs[col] for col in columns] + kwargs = {} + if not columns: + raise TypeError(no_arg_message) + + if isinstance(func, str): + return getattr(self, func)(*args, **kwargs) + + elif isinstance(func, abc.Iterable): + # Catch instances of lists / tuples + # but not the class list / tuple itself. + func = _maybe_mangle_lambdas(func) + ret = self._aggregate_multiple_funcs(func) + if relabeling: + ret.columns = columns + else: + cyfunc = self._get_cython_func(func) + if cyfunc and not args and not kwargs: + return getattr(self, cyfunc)() + + if self.grouper.nkeys > 1: + return self._python_agg_general(func, *args, **kwargs) + + try: + return self._python_agg_general(func, *args, **kwargs) + except (ValueError, KeyError): + # TODO: KeyError is raised in _python_agg_general, + # see see test_groupby.test_basic + result = self._aggregate_named(func, *args, **kwargs) + + index = Index(sorted(result), name=self.grouper.names[0]) + ret = create_series_with_explicit_dtype( + result, index=index, dtype_if_empty=object + ) + + if not self.as_index: # pragma: no cover + print("Warning, ignoring as_index=True") + + if isinstance(ret, dict): + from pandas import concat + + ret = concat(ret, axis=1) + return ret + + agg = aggregate + + def _aggregate_multiple_funcs(self, arg): + if isinstance(arg, dict): + + # show the deprecation, but only if we + # have not shown a higher level one + # GH 15931 + if isinstance(self._selected_obj, Series): + raise SpecificationError("nested renamer is not supported") + + columns = list(arg.keys()) + arg = arg.items() + elif any(isinstance(x, (tuple, list)) for x in arg): + arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] + + # indicated column order + columns = next(zip(*arg)) + else: + # list of functions / function names + columns = [] + for f in arg: + columns.append(com.get_callable_name(f) or f) + + arg = zip(columns, arg) + + results = {} + for name, func in arg: + obj = self + + # reset the cache so that we + # only include the named selection + if name in self._selected_obj: + obj = copy.copy(obj) + obj._reset_cache() + obj._selection = name + results[name] = obj.aggregate(func) + + if any(isinstance(x, DataFrame) for x in results.values()): + # let higher level handle + return results + + return DataFrame(results, columns=columns) + + def _wrap_series_output( + self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index + ) -> Union[Series, DataFrame]: + """ + Wraps the output of a SeriesGroupBy operation into the expected result. + + Parameters + ---------- + output : Mapping[base.OutputKey, Union[Series, np.ndarray]] + Data to wrap. + index : pd.Index + Index to apply to the output. + + Returns + ------- + Series or DataFrame + + Notes + ----- + In the vast majority of cases output and columns will only contain one + element. The exception is operations that expand dimensions, like ohlc. + """ + indexed_output = {key.position: val for key, val in output.items()} + columns = Index(key.label for key in output) + + result: Union[Series, DataFrame] + if len(output) > 1: + result = DataFrame(indexed_output, index=index) + result.columns = columns + else: + result = Series(indexed_output[0], index=index, name=columns[0]) + + return result + + def _wrap_aggregated_output( + self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]] + ) -> Union[Series, DataFrame]: + """ + Wraps the output of a SeriesGroupBy aggregation into the expected result. + + Parameters + ---------- + output : Mapping[base.OutputKey, Union[Series, np.ndarray]] + Data to wrap. + + Returns + ------- + Series or DataFrame + + Notes + ----- + In the vast majority of cases output will only contain one element. + The exception is operations that expand dimensions, like ohlc. + """ + result = self._wrap_series_output( + output=output, index=self.grouper.result_index + ) + return self._reindex_output(result)._convert(datetime=True) + + def _wrap_transformed_output( + self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]] + ) -> Series: + """ + Wraps the output of a SeriesGroupBy aggregation into the expected result. + + Parameters + ---------- + output : dict[base.OutputKey, Union[Series, np.ndarray]] + Dict with a sole key of 0 and a value of the result values. + + Returns + ------- + Series + + Notes + ----- + output should always contain one element. It is specified as a dict + for consistency with DataFrame methods and _wrap_aggregated_output. + """ + assert len(output) == 1 + result = self._wrap_series_output(output=output, index=self.obj.index) + + # No transformations increase the ndim of the result + assert isinstance(result, Series) + return result + + def _wrap_applied_output(self, keys, values, not_indexed_same=False): + if len(keys) == 0: + # GH #6265 + return Series([], name=self._selection_name, index=keys, dtype=np.float64) + + def _get_index() -> Index: + if self.grouper.nkeys > 1: + index = MultiIndex.from_tuples(keys, names=self.grouper.names) + else: + index = Index(keys, name=self.grouper.names[0]) + return index + + if isinstance(values[0], dict): + # GH #823 #24880 + index = _get_index() + result = self._reindex_output(DataFrame(values, index=index)) + # if self.observed is False, + # keep all-NaN rows created while re-indexing + result = result.stack(dropna=self.observed) + result.name = self._selection_name + return result + + if isinstance(values[0], Series): + return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) + elif isinstance(values[0], DataFrame): + # possible that Series -> DataFrame by applied function + return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) + else: + # GH #6265 #24880 + result = Series(data=values, index=_get_index(), name=self._selection_name) + return self._reindex_output(result) + + def _aggregate_named(self, func, *args, **kwargs): + result = {} + + for name, group in self: + group.name = name + output = func(group, *args, **kwargs) + if isinstance(output, (Series, Index, np.ndarray)): + raise ValueError("Must produce aggregated value") + result[name] = output + + return result + + @Substitution(klass="Series", selected="A.") + @Appender(_transform_template) + def transform(self, func, *args, **kwargs): + func = self._get_cython_func(func) or func + + if not isinstance(func, str): + return self._transform_general(func, *args, **kwargs) + + elif func not in base.transform_kernel_whitelist: + msg = f"'{func}' is not a valid function name for transform(name)" + raise ValueError(msg) + elif func in base.cythonized_kernels: + # cythonized transform or canned "agg+broadcast" + return getattr(self, func)(*args, **kwargs) + + # If func is a reduction, we need to broadcast the + # result to the whole group. Compute func result + # and deal with possible broadcasting below. + result = getattr(self, func)(*args, **kwargs) + return self._transform_fast(result, func) + + def _transform_general(self, func, *args, **kwargs): + """ + Transform with a non-str `func`. + """ + klass = type(self._selected_obj) + + results = [] + for name, group in self: + object.__setattr__(group, "name", name) + res = func(group, *args, **kwargs) + + if isinstance(res, (ABCDataFrame, ABCSeries)): + res = res._values + + indexer = self._get_index(name) + ser = klass(res, indexer) + results.append(ser) + + # check for empty "results" to avoid concat ValueError + if results: + from pandas.core.reshape.concat import concat + + result = concat(results).sort_index() + else: + result = Series(dtype=np.float64) + + # we will only try to coerce the result type if + # we have a numeric dtype, as these are *always* user-defined funcs + # the cython take a different path (and casting) + dtype = self._selected_obj.dtype + if is_numeric_dtype(dtype): + result = maybe_downcast_to_dtype(result, dtype) + + result.name = self._selected_obj.name + result.index = self._selected_obj.index + return result + + def _transform_fast(self, result, func_nm: str) -> Series: + """ + fast version of transform, only applicable to + builtin/cythonizable functions + """ + ids, _, ngroup = self.grouper.group_info + cast = self._transform_should_cast(func_nm) + out = algorithms.take_1d(result._values, ids) + if cast: + out = self._try_cast(out, self.obj) + return Series(out, index=self.obj.index, name=self.obj.name) + + def filter(self, func, dropna=True, *args, **kwargs): + """ + Return a copy of a Series excluding elements from groups that + do not satisfy the boolean criterion specified by func. + + Parameters + ---------- + func : function + To apply to each group. Should return True or False. + dropna : Drop groups that do not pass the filter. True by default; + if False, groups that evaluate False are filled with NaNs. + + Examples + -------- + >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + ... 'foo', 'bar'], + ... 'B' : [1, 2, 3, 4, 5, 6], + ... 'C' : [2.0, 5., 8., 1., 2., 9.]}) + >>> grouped = df.groupby('A') + >>> df.groupby('A').B.filter(lambda x: x.mean() > 3.) + 1 2 + 3 4 + 5 6 + Name: B, dtype: int64 + + Returns + ------- + filtered : Series + """ + if isinstance(func, str): + wrapper = lambda x: getattr(x, func)(*args, **kwargs) + else: + wrapper = lambda x: func(x, *args, **kwargs) + + # Interpret np.nan as False. + def true_and_notna(x, *args, **kwargs) -> bool: + b = wrapper(x, *args, **kwargs) + return b and notna(b) + + try: + indices = [ + self._get_index(name) for name, group in self if true_and_notna(group) + ] + except (ValueError, TypeError): + raise TypeError("the filter must return a boolean result") + + filtered = self._apply_filter(indices, dropna) + return filtered + + def nunique(self, dropna: bool = True) -> Series: + """ + Return number of unique elements in the group. + + Returns + ------- + Series + Number of unique values within each group. + """ + ids, _, _ = self.grouper.group_info + + val = self.obj._internal_get_values() + + codes, _ = algorithms.factorize(val, sort=False) + sorter = np.lexsort((codes, ids)) + codes = codes[sorter] + ids = ids[sorter] + + # group boundaries are where group ids change + # unique observations are where sorted values change + idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] + inc = np.r_[1, codes[1:] != codes[:-1]] + + # 1st item of each group is a new unique observation + mask = codes == -1 + if dropna: + inc[idx] = 1 + inc[mask] = 0 + else: + inc[mask & np.r_[False, mask[:-1]]] = 0 + inc[idx] = 1 + + out = np.add.reduceat(inc, idx).astype("int64", copy=False) + if len(ids): + # NaN/NaT group exists if the head of ids is -1, + # so remove it from res and exclude its index from idx + if ids[0] == -1: + res = out[1:] + idx = idx[np.flatnonzero(idx)] + else: + res = out + else: + res = out[1:] + ri = self.grouper.result_index + + # we might have duplications among the bins + if len(res) != len(ri): + res, out = np.zeros(len(ri), dtype=out.dtype), res + res[ids[idx]] = out + + result = Series(res, index=ri, name=self._selection_name) + return self._reindex_output(result, fill_value=0) + + @Appender(Series.describe.__doc__) + def describe(self, **kwargs): + result = self.apply(lambda x: x.describe(**kwargs)) + if self.axis == 1: + return result.T + return result.unstack() + + def value_counts( + self, normalize=False, sort=True, ascending=False, bins=None, dropna=True + ): + + from pandas.core.reshape.tile import cut + from pandas.core.reshape.merge import _get_join_indexers + + if bins is not None and not np.iterable(bins): + # scalar bins cannot be done at top level + # in a backward compatible way + return self.apply( + Series.value_counts, + normalize=normalize, + sort=sort, + ascending=ascending, + bins=bins, + ) + + ids, _, _ = self.grouper.group_info + val = self.obj._internal_get_values() + + # groupby removes null keys from groupings + mask = ids != -1 + ids, val = ids[mask], val[mask] + + if bins is None: + lab, lev = algorithms.factorize(val, sort=True) + llab = lambda lab, inc: lab[inc] + else: + + # lab is a Categorical with categories an IntervalIndex + lab = cut(Series(val), bins, include_lowest=True) + lev = lab.cat.categories + lab = lev.take(lab.cat.codes) + llab = lambda lab, inc: lab[inc]._multiindex.codes[-1] + + if is_interval_dtype(lab): + # TODO: should we do this inside II? + sorter = np.lexsort((lab.left, lab.right, ids)) + else: + sorter = np.lexsort((lab, ids)) + + ids, lab = ids[sorter], lab[sorter] + + # group boundaries are where group ids change + idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] + + # new values are where sorted labels change + lchanges = llab(lab, slice(1, None)) != llab(lab, slice(None, -1)) + inc = np.r_[True, lchanges] + inc[idx] = True # group boundaries are also new values + out = np.diff(np.nonzero(np.r_[inc, True])[0]) # value counts + + # num. of times each group should be repeated + rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx)) + + # multi-index components + codes = self.grouper.reconstructed_codes + codes = [rep(level_codes) for level_codes in codes] + [llab(lab, inc)] + levels = [ping.group_index for ping in self.grouper.groupings] + [lev] + names = self.grouper.names + [self._selection_name] + + if dropna: + mask = codes[-1] != -1 + if mask.all(): + dropna = False + else: + out, codes = out[mask], [level_codes[mask] for level_codes in codes] + + if normalize: + out = out.astype("float") + d = np.diff(np.r_[idx, len(ids)]) + if dropna: + m = ids[lab == -1] + np.add.at(d, m, -1) + acc = rep(d)[mask] + else: + acc = rep(d) + out /= acc + + if sort and bins is None: + cat = ids[inc][mask] if dropna else ids[inc] + sorter = np.lexsort((out if ascending else -out, cat)) + out, codes[-1] = out[sorter], codes[-1][sorter] + + if bins is None: + mi = MultiIndex( + levels=levels, codes=codes, names=names, verify_integrity=False + ) + + if is_integer_dtype(out): + out = ensure_int64(out) + return Series(out, index=mi, name=self._selection_name) + + # for compat. with libgroupby.value_counts need to ensure every + # bin is present at every index level, null filled with zeros + diff = np.zeros(len(out), dtype="bool") + for level_codes in codes[:-1]: + diff |= np.r_[True, level_codes[1:] != level_codes[:-1]] + + ncat, nbin = diff.sum(), len(levels[-1]) + + left = [np.repeat(np.arange(ncat), nbin), np.tile(np.arange(nbin), ncat)] + + right = [diff.cumsum() - 1, codes[-1]] + + _, idx = _get_join_indexers(left, right, sort=False, how="left") + out = np.where(idx != -1, out[idx], 0) + + if sort: + sorter = np.lexsort((out if ascending else -out, left[0])) + out, left[-1] = out[sorter], left[-1][sorter] + + # build the multi-index w/ full levels + def build_codes(lev_codes: np.ndarray) -> np.ndarray: + return np.repeat(lev_codes[diff], nbin) + + codes = [build_codes(lev_codes) for lev_codes in codes[:-1]] + codes.append(left[-1]) + + mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False) + + if is_integer_dtype(out): + out = ensure_int64(out) + return Series(out, index=mi, name=self._selection_name) + + def count(self) -> Series: + """ + Compute count of group, excluding missing values. + + Returns + ------- + Series + Count of values within each group. + """ + ids, _, ngroups = self.grouper.group_info + val = self.obj._internal_get_values() + + mask = (ids != -1) & ~isna(val) + ids = ensure_platform_int(ids) + minlength = ngroups or 0 + out = np.bincount(ids[mask], minlength=minlength) + + result = Series( + out, + index=self.grouper.result_index, + name=self._selection_name, + dtype="int64", + ) + return self._reindex_output(result, fill_value=0) + + def _apply_to_column_groupbys(self, func): + """ return a pass thru """ + return func(self) + + def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None): + """Calculate pct_change of each value to previous entry in group""" + # TODO: Remove this conditional when #23918 is fixed + if freq: + return self.apply( + lambda x: x.pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq + ) + ) + if fill_method is None: # GH30463 + fill_method = "pad" + limit = 0 + filled = getattr(self, fill_method)(limit=limit) + fill_grp = filled.groupby(self.grouper.codes) + shifted = fill_grp.shift(periods=periods, freq=freq) + + return (filled / shifted) - 1 + + +@pin_whitelisted_properties(DataFrame, base.dataframe_apply_whitelist) +class DataFrameGroupBy(GroupBy): + + _apply_whitelist = base.dataframe_apply_whitelist + + _agg_see_also_doc = dedent( + """ + See Also + -------- + pandas.DataFrame.groupby.apply + pandas.DataFrame.groupby.transform + pandas.DataFrame.aggregate + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + + >>> df = pd.DataFrame({'A': [1, 1, 2, 2], + ... 'B': [1, 2, 3, 4], + ... 'C': np.random.randn(4)}) + + >>> df + A B C + 0 1 1 0.362838 + 1 1 2 0.227877 + 2 2 3 1.267767 + 3 2 4 -0.562860 + + The aggregation is for each column. + + >>> df.groupby('A').agg('min') + B C + A + 1 1 0.227877 + 2 3 -0.562860 + + Multiple aggregations + + >>> df.groupby('A').agg(['min', 'max']) + B C + min max min max + A + 1 1 2 0.227877 0.362838 + 2 3 4 -0.562860 1.267767 + + Select a column for aggregation + + >>> df.groupby('A').B.agg(['min', 'max']) + min max + A + 1 1 2 + 2 3 4 + + Different aggregations per column + + >>> df.groupby('A').agg({'B': ['min', 'max'], 'C': 'sum'}) + B C + min max sum + A + 1 1 2 0.590716 + 2 3 4 0.704907 + + To control the output names with different aggregations per column, + pandas supports "named aggregation" + + >>> df.groupby("A").agg( + ... b_min=pd.NamedAgg(column="B", aggfunc="min"), + ... c_sum=pd.NamedAgg(column="C", aggfunc="sum")) + b_min c_sum + A + 1 1 -1.956929 + 2 3 -0.322183 + + - The keywords are the *output* column names + - The values are tuples whose first element is the column to select + and the second element is the aggregation to apply to that column. + Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields + ``['column', 'aggfunc']`` to make it clearer what the arguments are. + As usual, the aggregation can be a callable or a string alias. + + See :ref:`groupby.aggregate.named` for more. + """ + ) + + @Substitution( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded="", + klass="DataFrame", + axis="", + ) + @Appender(_shared_docs["aggregate"]) + def aggregate(self, func=None, *args, **kwargs): + + relabeling = func is None and _is_multi_agg_with_relabel(**kwargs) + if relabeling: + func, columns, order = _normalize_keyword_aggregation(kwargs) + + kwargs = {} + elif isinstance(func, list) and len(func) > len(set(func)): + + # GH 28426 will raise error if duplicated function names are used and + # there is no reassigned name + raise SpecificationError( + "Function names must be unique if there is no new column " + "names assigned" + ) + elif func is None: + # nicer error message + raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).") + + func = _maybe_mangle_lambdas(func) + + result, how = self._aggregate(func, *args, **kwargs) + if how is None: + return result + + if result is None: + + # grouper specific aggregations + if self.grouper.nkeys > 1: + return self._python_agg_general(func, *args, **kwargs) + elif args or kwargs: + result = self._aggregate_frame(func, *args, **kwargs) + + elif self.axis == 1: + # _aggregate_multiple_funcs does not allow self.axis == 1 + result = self._aggregate_frame(func) + + else: + + # try to treat as if we are passing a list + try: + result = self._aggregate_multiple_funcs([func], _axis=self.axis) + except ValueError as err: + if "no results" not in str(err): + # raised directly by _aggregate_multiple_funcs + raise + result = self._aggregate_frame(func) + else: + # select everything except for the last level, which is the one + # containing the name of the function(s), see GH 32040 + result.columns = result.columns.rename( + [self._selected_obj.columns.name] * result.columns.nlevels + ).droplevel(-1) + + if not self.as_index: + self._insert_inaxis_grouper_inplace(result) + result.index = np.arange(len(result)) + + if relabeling: + + # used reordered index of columns + result = result.iloc[:, order] + result.columns = columns + + return result._convert(datetime=True) + + agg = aggregate + + def _iterate_slices(self) -> Iterable[Series]: + obj = self._selected_obj + if self.axis == 1: + obj = obj.T + + if isinstance(obj, Series) and obj.name not in self.exclusions: + # Occurs when doing DataFrameGroupBy(...)["X"] + yield obj + else: + for label, values in obj.items(): + if label in self.exclusions: + continue + + yield values + + def _cython_agg_general( + self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 + ) -> DataFrame: + agg_blocks, agg_items = self._cython_agg_blocks( + how, alt=alt, numeric_only=numeric_only, min_count=min_count + ) + return self._wrap_agged_blocks(agg_blocks, items=agg_items) + + def _cython_agg_blocks( + self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 + ) -> "Tuple[List[Block], Index]": + # TODO: the actual managing of mgr_locs is a PITA + # here, it should happen via BlockManager.combine + + data: BlockManager = self._get_data_to_aggregate() + + if numeric_only: + data = data.get_numeric_data(copy=False) + + agg_blocks: List[Block] = [] + new_items: List[np.ndarray] = [] + deleted_items: List[np.ndarray] = [] + # Some object-dtype blocks might be split into List[Block[T], Block[U]] + split_items: List[np.ndarray] = [] + split_frames: List[DataFrame] = [] + + no_result = object() + for block in data.blocks: + # Avoid inheriting result from earlier in the loop + result = no_result + locs = block.mgr_locs.as_array + try: + result, _ = self.grouper.aggregate( + block.values, how, axis=1, min_count=min_count + ) + except NotImplementedError: + # generally if we have numeric_only=False + # and non-applicable functions + # try to python agg + + if alt is None: + # we cannot perform the operation + # in an alternate way, exclude the block + assert how == "ohlc" + deleted_items.append(locs) + continue + + # call our grouper again with only this block + obj = self.obj[data.items[locs]] + if obj.shape[1] == 1: + # Avoid call to self.values that can occur in DataFrame + # reductions; see GH#28949 + obj = obj.iloc[:, 0] + + s = get_groupby(obj, self.grouper) + try: + result = s.aggregate(lambda x: alt(x, axis=self.axis)) + except TypeError: + # we may have an exception in trying to aggregate + # continue and exclude the block + deleted_items.append(locs) + continue + else: + result = cast(DataFrame, result) + # unwrap DataFrame to get array + if len(result._data.blocks) != 1: + # We've split an object block! Everything we've assumed + # about a single block input returning a single block output + # is a lie. To keep the code-path for the typical non-split case + # clean, we choose to clean up this mess later on. + split_items.append(locs) + split_frames.append(result) + continue + + assert len(result._data.blocks) == 1 + result = result._data.blocks[0].values + if isinstance(result, np.ndarray) and result.ndim == 1: + result = result.reshape(1, -1) + + assert not isinstance(result, DataFrame) + + if result is not no_result: + # see if we can cast the block back to the original dtype + result = maybe_downcast_numeric(result, block.dtype) + + if block.is_extension and isinstance(result, np.ndarray): + # e.g. block.values was an IntegerArray + # (1, N) case can occur if block.values was Categorical + # and result is ndarray[object] + assert result.ndim == 1 or result.shape[0] == 1 + try: + # Cast back if feasible + result = type(block.values)._from_sequence( + result.ravel(), dtype=block.values.dtype + ) + except (ValueError, TypeError): + # reshape to be valid for non-Extension Block + result = result.reshape(1, -1) + + agg_block: Block = block.make_block(result) + + new_items.append(locs) + agg_blocks.append(agg_block) + + if not (agg_blocks or split_frames): + raise DataError("No numeric types to aggregate") + + if split_items: + # Clean up the mess left over from split blocks. + for locs, result in zip(split_items, split_frames): + assert len(locs) == result.shape[1] + for i, loc in enumerate(locs): + new_items.append(np.array([loc], dtype=locs.dtype)) + agg_blocks.append(result.iloc[:, [i]]._data.blocks[0]) + + # reset the locs in the blocks to correspond to our + # current ordering + indexer = np.concatenate(new_items) + agg_items = data.items.take(np.sort(indexer)) + + if deleted_items: + + # we need to adjust the indexer to account for the + # items we have removed + # really should be done in internals :< + + deleted = np.concatenate(deleted_items) + ai = np.arange(len(data)) + mask = np.zeros(len(data)) + mask[deleted] = 1 + indexer = (ai - mask.cumsum())[indexer] + + offset = 0 + for blk in agg_blocks: + loc = len(blk.mgr_locs) + blk.mgr_locs = indexer[offset : (offset + loc)] + offset += loc + + return agg_blocks, agg_items + + def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: + if self.grouper.nkeys != 1: + raise AssertionError("Number of keys must be 1") + + axis = self.axis + obj = self._obj_with_exclusions + + result: Dict[Union[int, str], Union[NDFrame, np.ndarray]] = {} + if axis != obj._info_axis_number: + for name, data in self: + fres = func(data, *args, **kwargs) + result[name] = fres + else: + for name in self.indices: + data = self.get_group(name, obj=obj) + fres = func(data, *args, **kwargs) + result[name] = fres + + return self._wrap_frame_output(result, obj) + + def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame: + # only for axis==0 + + obj = self._obj_with_exclusions + result: Dict[Union[int, str], NDFrame] = {} + cannot_agg = [] + for item in obj: + data = obj[item] + colg = SeriesGroupBy(data, selection=item, grouper=self.grouper) + + cast = self._transform_should_cast(func) + try: + result[item] = colg.aggregate(func, *args, **kwargs) + + except ValueError as err: + if "Must produce aggregated value" in str(err): + # raised in _aggregate_named, handle at higher level + # see test_apply_with_mutated_index + raise + # otherwise we get here from an AttributeError in _make_wrapper + cannot_agg.append(item) + continue + + else: + if cast: + result[item] = self._try_cast(result[item], data) + + result_columns = obj.columns + if cannot_agg: + result_columns = result_columns.drop(cannot_agg) + + return DataFrame(result, columns=result_columns) + + def _wrap_applied_output(self, keys, values, not_indexed_same=False): + if len(keys) == 0: + return DataFrame(index=keys) + + key_names = self.grouper.names + + # GH12824. + def first_not_none(values): + try: + return next(com.not_none(*values)) + except StopIteration: + return None + + v = first_not_none(values) + + if v is None: + # GH9684. If all values are None, then this will throw an error. + # We'd prefer it return an empty dataframe. + return DataFrame() + elif isinstance(v, DataFrame): + return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) + elif self.grouper.groupings is not None: + if len(self.grouper.groupings) > 1: + key_index = self.grouper.result_index + + else: + ping = self.grouper.groupings[0] + if len(keys) == ping.ngroups: + key_index = ping.group_index + key_index.name = key_names[0] + + key_lookup = Index(keys) + indexer = key_lookup.get_indexer(key_index) + + # reorder the values + values = [values[i] for i in indexer] + else: + + key_index = Index(keys, name=key_names[0]) + + # don't use the key indexer + if not self.as_index: + key_index = None + + # make Nones an empty object + v = first_not_none(values) + if v is None: + return DataFrame() + elif isinstance(v, NDFrame): + + # this is to silence a DeprecationWarning + # TODO: Remove when default dtype of empty Series is object + kwargs = v._construct_axes_dict() + if v._constructor is Series: + backup = create_series_with_explicit_dtype( + **kwargs, dtype_if_empty=object + ) + else: + backup = v._constructor(**kwargs) + + values = [x if (x is not None) else backup for x in values] + + v = values[0] + + if isinstance(v, (np.ndarray, Index, Series)): + if isinstance(v, Series): + applied_index = self._selected_obj._get_axis(self.axis) + all_indexed_same = all_indexes_same([x.index for x in values]) + singular_series = len(values) == 1 and applied_index.nlevels == 1 + + # GH3596 + # provide a reduction (Frame -> Series) if groups are + # unique + if self.squeeze: + # assign the name to this series + if singular_series: + values[0].name = keys[0] + + # GH2893 + # we have series in the values array, we want to + # produce a series: + # if any of the sub-series are not indexed the same + # OR we don't have a multi-index and we have only a + # single values + return self._concat_objects( + keys, values, not_indexed_same=not_indexed_same + ) + + # still a series + # path added as of GH 5545 + elif all_indexed_same: + from pandas.core.reshape.concat import concat + + return concat(values) + + if not all_indexed_same: + # GH 8467 + return self._concat_objects(keys, values, not_indexed_same=True) + + if self.axis == 0 and isinstance(v, ABCSeries): + # GH6124 if the list of Series have a consistent name, + # then propagate that name to the result. + index = v.index.copy() + if index.name is None: + # Only propagate the series name to the result + # if all series have a consistent name. If the + # series do not have a consistent name, do + # nothing. + names = {v.name for v in values} + if len(names) == 1: + index.name = list(names)[0] + + # normally use vstack as its faster than concat + # and if we have mi-columns + if ( + isinstance(v.index, MultiIndex) + or key_index is None + or isinstance(key_index, MultiIndex) + ): + stacked_values = np.vstack([np.asarray(v) for v in values]) + result = DataFrame( + stacked_values, index=key_index, columns=index + ) + else: + # GH5788 instead of stacking; concat gets the + # dtypes correct + from pandas.core.reshape.concat import concat + + result = concat( + values, + keys=key_index, + names=key_index.names, + axis=self.axis, + ).unstack() + result.columns = index + elif isinstance(v, ABCSeries): + stacked_values = np.vstack([np.asarray(v) for v in values]) + result = DataFrame( + stacked_values.T, index=v.index, columns=key_index + ) + else: + # GH#1738: values is list of arrays of unequal lengths + # fall through to the outer else clause + # TODO: sure this is right? we used to do this + # after raising AttributeError above + return Series(values, index=key_index, name=self._selection_name) + + # if we have date/time like in the original, then coerce dates + # as we are stacking can easily have object dtypes here + so = self._selected_obj + if so.ndim == 2 and so.dtypes.apply(needs_i8_conversion).any(): + result = _recast_datetimelike_result(result) + else: + result = result._convert(datetime=True) + + return self._reindex_output(result) + + # values are not series or array-like but scalars + else: + # only coerce dates if we find at least 1 datetime + should_coerce = any(isinstance(x, Timestamp) for x in values) + # self._selection_name not passed through to Series as the + # result should not take the name of original selection + # of columns + return Series(values, index=key_index)._convert( + datetime=True, coerce=should_coerce + ) + + else: + # Handle cases like BinGrouper + return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) + + def _transform_general(self, func, *args, **kwargs): + from pandas.core.reshape.concat import concat + + applied = [] + obj = self._obj_with_exclusions + gen = self.grouper.get_iterator(obj, axis=self.axis) + fast_path, slow_path = self._define_paths(func, *args, **kwargs) + + path = None + for name, group in gen: + object.__setattr__(group, "name", name) + + if path is None: + # Try slow path and fast path. + try: + path, res = self._choose_path(fast_path, slow_path, group) + except TypeError: + return self._transform_item_by_item(obj, fast_path) + except ValueError: + msg = "transform must return a scalar value for each group" + raise ValueError(msg) + else: + res = path(group) + + if isinstance(res, Series): + + # we need to broadcast across the + # other dimension; this will preserve dtypes + # GH14457 + if not np.prod(group.shape): + continue + elif res.index.is_(obj.index): + r = concat([res] * len(group.columns), axis=1) + r.columns = group.columns + r.index = group.index + else: + r = DataFrame( + np.concatenate([res.values] * len(group.index)).reshape( + group.shape + ), + columns=group.columns, + index=group.index, + ) + + applied.append(r) + else: + applied.append(res) + + concat_index = obj.columns if self.axis == 0 else obj.index + other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1 + concatenated = concat(applied, axis=self.axis, verify_integrity=False) + concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False) + return self._set_result_index_ordered(concatenated) + + @Substitution(klass="DataFrame", selected="") + @Appender(_transform_template) + def transform(self, func, *args, **kwargs): + + # optimized transforms + func = self._get_cython_func(func) or func + + if not isinstance(func, str): + return self._transform_general(func, *args, **kwargs) + + elif func not in base.transform_kernel_whitelist: + msg = f"'{func}' is not a valid function name for transform(name)" + raise ValueError(msg) + elif func in base.cythonized_kernels: + # cythonized transformation or canned "reduction+broadcast" + return getattr(self, func)(*args, **kwargs) + + # If func is a reduction, we need to broadcast the + # result to the whole group. Compute func result + # and deal with possible broadcasting below. + result = getattr(self, func)(*args, **kwargs) + + # a reduction transform + if not isinstance(result, DataFrame): + return self._transform_general(func, *args, **kwargs) + + obj = self._obj_with_exclusions + + # nuisance columns + if not result.columns.equals(obj.columns): + return self._transform_general(func, *args, **kwargs) + + return self._transform_fast(result, func) + + def _transform_fast(self, result: DataFrame, func_nm: str) -> DataFrame: + """ + Fast transform path for aggregations + """ + # if there were groups with no observations (Categorical only?) + # try casting data to original dtype + cast = self._transform_should_cast(func_nm) + + obj = self._obj_with_exclusions + + # for each col, reshape to to size of original frame + # by take operation + ids, _, ngroup = self.grouper.group_info + output = [] + for i, _ in enumerate(result.columns): + res = algorithms.take_1d(result.iloc[:, i].values, ids) + # TODO: we have no test cases that get here with EA dtypes; + # try_cast may not be needed if EAs never get here + if cast: + res = self._try_cast(res, obj.iloc[:, i]) + output.append(res) + + return DataFrame._from_arrays(output, columns=result.columns, index=obj.index) + + def _define_paths(self, func, *args, **kwargs): + if isinstance(func, str): + fast_path = lambda group: getattr(group, func)(*args, **kwargs) + slow_path = lambda group: group.apply( + lambda x: getattr(x, func)(*args, **kwargs), axis=self.axis + ) + else: + fast_path = lambda group: func(group, *args, **kwargs) + slow_path = lambda group: group.apply( + lambda x: func(x, *args, **kwargs), axis=self.axis + ) + return fast_path, slow_path + + def _choose_path(self, fast_path: Callable, slow_path: Callable, group: DataFrame): + path = slow_path + res = slow_path(group) + + # if we make it here, test if we can use the fast path + try: + res_fast = fast_path(group) + except AssertionError: + raise + except Exception: + # GH#29631 For user-defined function, we cant predict what may be + # raised; see test_transform.test_transform_fastpath_raises + return path, res + + # verify fast path does not change columns (and names), otherwise + # its results cannot be joined with those of the slow path + if not isinstance(res_fast, DataFrame): + return path, res + + if not res_fast.columns.equals(group.columns): + return path, res + + if res_fast.equals(res): + path = fast_path + + return path, res + + def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: + # iterate through columns + output = {} + inds = [] + for i, col in enumerate(obj): + try: + output[col] = self[col].transform(wrapper) + except TypeError: + # e.g. trying to call nanmean with string values + pass + else: + inds.append(i) + + if len(output) == 0: + raise TypeError("Transform function invalid for data types") + + columns = obj.columns + if len(output) < len(obj.columns): + columns = columns.take(inds) + + return DataFrame(output, index=obj.index, columns=columns) + + def filter(self, func, dropna=True, *args, **kwargs): + """ + Return a copy of a DataFrame excluding elements from groups that + do not satisfy the boolean criterion specified by func. + + Parameters + ---------- + f : function + Function to apply to each subframe. Should return True or False. + dropna : Drop groups that do not pass the filter. True by default; + If False, groups that evaluate False are filled with NaNs. + + Returns + ------- + filtered : DataFrame + + Notes + ----- + Each subframe is endowed the attribute 'name' in case you need to know + which group you are working on. + + Examples + -------- + >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', + ... 'foo', 'bar'], + ... 'B' : [1, 2, 3, 4, 5, 6], + ... 'C' : [2.0, 5., 8., 1., 2., 9.]}) + >>> grouped = df.groupby('A') + >>> grouped.filter(lambda x: x['B'].mean() > 3.) + A B C + 1 bar 2 5.0 + 3 bar 4 1.0 + 5 bar 6 9.0 + """ + + indices = [] + + obj = self._selected_obj + gen = self.grouper.get_iterator(obj, axis=self.axis) + + for name, group in gen: + object.__setattr__(group, "name", name) + + res = func(group, *args, **kwargs) + + try: + res = res.squeeze() + except AttributeError: # allow e.g., scalars and frames to pass + pass + + # interpret the result of the filter + if is_bool(res) or (is_scalar(res) and isna(res)): + if res and notna(res): + indices.append(self._get_index(name)) + else: + # non scalars aren't allowed + raise TypeError( + f"filter function returned a {type(res).__name__}, " + "but expected a scalar bool" + ) + + return self._apply_filter(indices, dropna) + + def __getitem__(self, key): + # per GH 23566 + if isinstance(key, tuple) and len(key) > 1: + # if len == 1, then it becomes a SeriesGroupBy and this is actually + # valid syntax, so don't raise warning + warnings.warn( + "Indexing with multiple keys (implicitly converted to a tuple " + "of keys) will be deprecated, use a list instead.", + FutureWarning, + stacklevel=2, + ) + return super().__getitem__(key) + + def _gotitem(self, key, ndim: int, subset=None): + """ + sub-classes to define + return a sliced object + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ + + if ndim == 2: + if subset is None: + subset = self.obj + return DataFrameGroupBy( + subset, + self.grouper, + selection=key, + grouper=self.grouper, + exclusions=self.exclusions, + as_index=self.as_index, + observed=self.observed, + ) + elif ndim == 1: + if subset is None: + subset = self.obj[key] + return SeriesGroupBy( + subset, selection=key, grouper=self.grouper, observed=self.observed + ) + + raise AssertionError("invalid ndim for _gotitem") + + def _wrap_frame_output(self, result, obj) -> DataFrame: + result_index = self.grouper.levels[0] + + if self.axis == 0: + return DataFrame(result, index=obj.columns, columns=result_index).T + else: + return DataFrame(result, index=obj.index, columns=result_index) + + def _get_data_to_aggregate(self) -> BlockManager: + obj = self._obj_with_exclusions + if self.axis == 1: + return obj.T._data + else: + return obj._data + + def _insert_inaxis_grouper_inplace(self, result): + # zip in reverse so we can always insert at loc 0 + izip = zip( + *map( + reversed, + ( + self.grouper.names, + self.grouper.get_group_levels(), + [grp.in_axis for grp in self.grouper.groupings], + ), + ) + ) + + for name, lev, in_axis in izip: + if in_axis: + result.insert(0, name, lev) + + def _wrap_aggregated_output( + self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]] + ) -> DataFrame: + """ + Wraps the output of DataFrameGroupBy aggregations into the expected result. + + Parameters + ---------- + output : Mapping[base.OutputKey, Union[Series, np.ndarray]] + Data to wrap. + + Returns + ------- + DataFrame + """ + indexed_output = {key.position: val for key, val in output.items()} + columns = Index(key.label for key in output) + + result = DataFrame(indexed_output) + result.columns = columns + + if not self.as_index: + self._insert_inaxis_grouper_inplace(result) + result = result._consolidate() + else: + index = self.grouper.result_index + result.index = index + + if self.axis == 1: + result = result.T + + return self._reindex_output(result)._convert(datetime=True) + + def _wrap_transformed_output( + self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]] + ) -> DataFrame: + """ + Wraps the output of DataFrameGroupBy transformations into the expected result. + + Parameters + ---------- + output : Mapping[base.OutputKey, Union[Series, np.ndarray]] + Data to wrap. + + Returns + ------- + DataFrame + """ + indexed_output = {key.position: val for key, val in output.items()} + columns = Index(key.label for key in output) + + result = DataFrame(indexed_output) + result.columns = columns + result.index = self.obj.index + + return result + + def _wrap_agged_blocks(self, blocks: "Sequence[Block]", items: Index) -> DataFrame: + if not self.as_index: + index = np.arange(blocks[0].values.shape[-1]) + mgr = BlockManager(blocks, axes=[items, index]) + result = DataFrame(mgr) + + self._insert_inaxis_grouper_inplace(result) + result = result._consolidate() + else: + index = self.grouper.result_index + mgr = BlockManager(blocks, axes=[items, index]) + result = DataFrame(mgr) + + if self.axis == 1: + result = result.T + + return self._reindex_output(result)._convert(datetime=True) + + def _iterate_column_groupbys(self): + for i, colname in enumerate(self._selected_obj.columns): + yield colname, SeriesGroupBy( + self._selected_obj.iloc[:, i], + selection=colname, + grouper=self.grouper, + exclusions=self.exclusions, + ) + + def _apply_to_column_groupbys(self, func): + from pandas.core.reshape.concat import concat + + return concat( + (func(col_groupby) for _, col_groupby in self._iterate_column_groupbys()), + keys=self._selected_obj.columns, + axis=1, + ) + + def count(self): + """ + Compute count of group, excluding missing values. + + Returns + ------- + DataFrame + Count of values within each group. + """ + data = self._get_data_to_aggregate() + ids, _, ngroups = self.grouper.group_info + mask = ids != -1 + + vals = ( + (mask & ~_isna_ndarraylike(np.atleast_2d(blk.get_values()))) + for blk in data.blocks + ) + locs = (blk.mgr_locs for blk in data.blocks) + + counted = ( + lib.count_level_2d(x, labels=ids, max_bin=ngroups, axis=1) for x in vals + ) + blocks = [make_block(val, placement=loc) for val, loc in zip(counted, locs)] + + return self._wrap_agged_blocks(blocks, items=data.items) + + def nunique(self, dropna: bool = True): + """ + Return DataFrame with number of distinct observations per group for + each column. + + Parameters + ---------- + dropna : bool, default True + Don't include NaN in the counts. + + Returns + ------- + nunique: DataFrame + + Examples + -------- + >>> df = pd.DataFrame({'id': ['spam', 'egg', 'egg', 'spam', + ... 'ham', 'ham'], + ... 'value1': [1, 5, 5, 2, 5, 5], + ... 'value2': list('abbaxy')}) + >>> df + id value1 value2 + 0 spam 1 a + 1 egg 5 b + 2 egg 5 b + 3 spam 2 a + 4 ham 5 x + 5 ham 5 y + + >>> df.groupby('id').nunique() + id value1 value2 + id + egg 1 1 1 + ham 1 1 2 + spam 1 2 1 + + Check for rows with the same id but conflicting values: + + >>> df.groupby('id').filter(lambda g: (g.nunique() > 1).any()) + id value1 value2 + 0 spam 1 a + 3 spam 2 a + 4 ham 5 x + 5 ham 5 y + """ + + obj = self._selected_obj + + def groupby_series(obj, col=None): + return SeriesGroupBy(obj, selection=col, grouper=self.grouper).nunique( + dropna=dropna + ) + + if isinstance(obj, Series): + results = groupby_series(obj) + else: + # TODO: this is duplicative of how GroupBy naturally works + # Try to consolidate with normal wrapping functions + from pandas.core.reshape.concat import concat + + axis_number = obj._get_axis_number(self.axis) + other_axis = int(not axis_number) + if axis_number == 0: + iter_func = obj.items + else: + iter_func = obj.iterrows + + results = [groupby_series(content, label) for label, content in iter_func()] + results = concat(results, axis=1) + + if axis_number == 1: + results = results.T + + results._get_axis(other_axis).names = obj._get_axis(other_axis).names + + if not self.as_index: + results.index = ibase.default_index(len(results)) + return results + + boxplot = boxplot_frame_groupby + + +def _is_multi_agg_with_relabel(**kwargs) -> bool: + """ + Check whether kwargs passed to .agg look like multi-agg with relabeling. + + Parameters + ---------- + **kwargs : dict + + Returns + ------- + bool + + Examples + -------- + >>> _is_multi_agg_with_relabel(a='max') + False + >>> _is_multi_agg_with_relabel(a_max=('a', 'max'), + ... a_min=('a', 'min')) + True + >>> _is_multi_agg_with_relabel() + False + """ + return all(isinstance(v, tuple) and len(v) == 2 for v in kwargs.values()) and ( + len(kwargs) > 0 + ) + + +def _normalize_keyword_aggregation(kwargs): + """ + Normalize user-provided "named aggregation" kwargs. + + Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs + to the old Dict[str, List[scalar]]]. + + Parameters + ---------- + kwargs : dict + + Returns + ------- + aggspec : dict + The transformed kwargs. + columns : List[str] + The user-provided keys. + col_idx_order : List[int] + List of columns indices. + + Examples + -------- + >>> _normalize_keyword_aggregation({'output': ('input', 'sum')}) + ({'input': ['sum']}, ('output',), [('input', 'sum')]) + """ + # Normalize the aggregation functions as Mapping[column, List[func]], + # process normally, then fixup the names. + # TODO: aggspec type: typing.Dict[str, List[AggScalar]] + # May be hitting https://github.com/python/mypy/issues/5958 + # saying it doesn't have an attribute __name__ + aggspec = defaultdict(list) + order = [] + columns, pairs = list(zip(*kwargs.items())) + + for name, (column, aggfunc) in zip(columns, pairs): + aggspec[column].append(aggfunc) + order.append((column, com.get_callable_name(aggfunc) or aggfunc)) + + # uniquify aggfunc name if duplicated in order list + uniquified_order = _make_unique(order) + + # GH 25719, due to aggspec will change the order of assigned columns in aggregation + # uniquified_aggspec will store uniquified order list and will compare it with order + # based on index + aggspec_order = [ + (column, com.get_callable_name(aggfunc) or aggfunc) + for column, aggfuncs in aggspec.items() + for aggfunc in aggfuncs + ] + uniquified_aggspec = _make_unique(aggspec_order) + + # get the new indice of columns by comparison + col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order) + return aggspec, columns, col_idx_order + + +def _make_unique(seq): + """Uniquify aggfunc name of the pairs in the order list + + Examples: + -------- + >>> _make_unique([('a', ''), ('a', ''), ('b', '')]) + [('a', '_0'), ('a', '_1'), ('b', '')] + """ + return [ + (pair[0], "_".join([pair[1], str(seq[:i].count(pair))])) + if seq.count(pair) > 1 + else pair + for i, pair in enumerate(seq) + ] + + +# TODO: Can't use, because mypy doesn't like us setting __name__ +# error: "partial[Any]" has no attribute "__name__" +# the type is: +# typing.Sequence[Callable[..., ScalarResult]] +# -> typing.Sequence[Callable[..., ScalarResult]]: + + +def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]: + """ + Possibly mangle a list of aggfuncs. + + Parameters + ---------- + aggfuncs : Sequence + + Returns + ------- + mangled: list-like + A new AggSpec sequence, where lambdas have been converted + to have unique names. + + Notes + ----- + If just one aggfunc is passed, the name will not be mangled. + """ + if len(aggfuncs) <= 1: + # don't mangle for .agg([lambda x: .]) + return aggfuncs + i = 0 + mangled_aggfuncs = [] + for aggfunc in aggfuncs: + if com.get_callable_name(aggfunc) == "": + aggfunc = partial(aggfunc) + aggfunc.__name__ = f"" + i += 1 + mangled_aggfuncs.append(aggfunc) + + return mangled_aggfuncs + + +def _maybe_mangle_lambdas(agg_spec: Any) -> Any: + """ + Make new lambdas with unique names. + + Parameters + ---------- + agg_spec : Any + An argument to GroupBy.agg. + Non-dict-like `agg_spec` are pass through as is. + For dict-like `agg_spec` a new spec is returned + with name-mangled lambdas. + + Returns + ------- + mangled : Any + Same type as the input. + + Examples + -------- + >>> _maybe_mangle_lambdas('sum') + 'sum' + + >>> _maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP + [, + .f(*args, **kwargs)>] + """ + is_dict = is_dict_like(agg_spec) + if not (is_dict or is_list_like(agg_spec)): + return agg_spec + mangled_aggspec = type(agg_spec)() # dict or OrderdDict + + if is_dict: + for key, aggfuncs in agg_spec.items(): + if is_list_like(aggfuncs) and not is_dict_like(aggfuncs): + mangled_aggfuncs = _managle_lambda_list(aggfuncs) + else: + mangled_aggfuncs = aggfuncs + + mangled_aggspec[key] = mangled_aggfuncs + else: + mangled_aggspec = _managle_lambda_list(agg_spec) + + return mangled_aggspec + + +def _recast_datetimelike_result(result: DataFrame) -> DataFrame: + """ + If we have date/time like in the original, then coerce dates + as we are stacking can easily have object dtypes here. + + Parameters + ---------- + result : DataFrame + + Returns + ------- + DataFrame + + Notes + ----- + - Assumes Groupby._selected_obj has ndim==2 and at least one + datetimelike column + """ + result = result.copy() + + obj_cols = [ + idx + for idx in range(len(result.columns)) + if is_object_dtype(result.dtypes.iloc[idx]) + ] + + # See GH#26285 + for n in obj_cols: + converted = maybe_convert_objects( + result.iloc[:, n].values, convert_numeric=False + ) + + result.iloc[:, n] = converted + return result diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py new file mode 100644 index 00000000..478239b1 --- /dev/null +++ b/pandas/core/groupby/groupby.py @@ -0,0 +1,2571 @@ +""" +Provide the groupby split-apply-combine paradigm. Define the GroupBy +class providing the base-class of operations. + +The SeriesGroupBy and DataFrameGroupBy sub-class +(defined in pandas.core.groupby.generic) +expose these user-facing objects to provide specific functionality. +""" + +from contextlib import contextmanager +import datetime +from functools import partial, wraps +import inspect +import re +import types +from typing import ( + Callable, + Dict, + FrozenSet, + Hashable, + Iterable, + List, + Mapping, + Optional, + Tuple, + Type, + Union, +) + +import numpy as np + +from pandas._config.config import option_context + +from pandas._libs import Timestamp +import pandas._libs.groupby as libgroupby +from pandas._typing import FrameOrSeries, Scalar +from pandas.compat import set_function_name +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import Appender, Substitution, cache_readonly + +from pandas.core.dtypes.cast import maybe_downcast_to_dtype +from pandas.core.dtypes.common import ( + ensure_float, + is_categorical_dtype, + is_datetime64_dtype, + is_extension_array_dtype, + is_integer_dtype, + is_numeric_dtype, + is_object_dtype, + is_scalar, +) +from pandas.core.dtypes.missing import isna, notna + +from pandas.core import nanops +import pandas.core.algorithms as algorithms +from pandas.core.arrays import Categorical, DatetimeArray, try_cast_to_ea +from pandas.core.base import DataError, PandasObject, SelectionMixin +import pandas.core.common as com +from pandas.core.frame import DataFrame +from pandas.core.generic import NDFrame +from pandas.core.groupby import base, ops +from pandas.core.indexes.api import CategoricalIndex, Index, MultiIndex +from pandas.core.series import Series +from pandas.core.sorting import get_group_index_sorter + +_common_see_also = """ + See Also + -------- + Series.%(name)s + DataFrame.%(name)s +""" + +_apply_docs = dict( + template=""" + Apply function `func` group-wise and combine the results together. + + The function passed to `apply` must take a {input} as its first + argument and return a DataFrame, Series or scalar. `apply` will + then take care of combining the results back together into a single + dataframe or series. `apply` is therefore a highly flexible + grouping method. + + While `apply` is a very flexible method, its downside is that + using it can be quite a bit slower than using more specific methods + like `agg` or `transform`. Pandas offers a wide range of method that will + be much faster than using `apply` for their specific purposes, so try to + use them before reaching for `apply`. + + Parameters + ---------- + func : callable + A callable that takes a {input} as its first argument, and + returns a dataframe, a series or a scalar. In addition the + callable may take positional and keyword arguments. + args, kwargs : tuple and dict + Optional positional and keyword arguments to pass to `func`. + + Returns + ------- + applied : Series or DataFrame + + See Also + -------- + pipe : Apply function to the full GroupBy object instead of to each + group. + aggregate : Apply aggregate function to the GroupBy object. + transform : Apply function column-by-column to the GroupBy object. + Series.apply : Apply a function to a Series. + DataFrame.apply : Apply a function to each row or column of a DataFrame. + """, + dataframe_examples=""" + >>> df = pd.DataFrame({'A': 'a a b'.split(), + 'B': [1,2,3], + 'C': [4,6, 5]}) + >>> g = df.groupby('A') + + Notice that ``g`` has two groups, ``a`` and ``b``. + Calling `apply` in various ways, we can get different grouping results: + + Example 1: below the function passed to `apply` takes a DataFrame as + its argument and returns a DataFrame. `apply` combines the result for + each group together into a new DataFrame: + + >>> g[['B', 'C']].apply(lambda x: x / x.sum()) + B C + 0 0.333333 0.4 + 1 0.666667 0.6 + 2 1.000000 1.0 + + Example 2: The function passed to `apply` takes a DataFrame as + its argument and returns a Series. `apply` combines the result for + each group together into a new DataFrame: + + >>> g[['B', 'C']].apply(lambda x: x.max() - x.min()) + B C + A + a 1 2 + b 0 0 + + Example 3: The function passed to `apply` takes a DataFrame as + its argument and returns a scalar. `apply` combines the result for + each group together into a Series, including setting the index as + appropriate: + + >>> g.apply(lambda x: x.C.max() - x.B.min()) + A + a 5 + b 2 + dtype: int64 + """, + series_examples=""" + >>> s = pd.Series([0, 1, 2], index='a a b'.split()) + >>> g = s.groupby(s.index) + + From ``s`` above we can see that ``g`` has two groups, ``a`` and ``b``. + Calling `apply` in various ways, we can get different grouping results: + + Example 1: The function passed to `apply` takes a Series as + its argument and returns a Series. `apply` combines the result for + each group together into a new Series: + + >>> g.apply(lambda x: x*2 if x.name == 'b' else x/2) + 0 0.0 + 1 0.5 + 2 4.0 + dtype: float64 + + Example 2: The function passed to `apply` takes a Series as + its argument and returns a scalar. `apply` combines the result for + each group together into a Series, including setting the index as + appropriate: + + >>> g.apply(lambda x: x.max() - x.min()) + a 1 + b 0 + dtype: int64 + + Notes + ----- + In the current implementation `apply` calls `func` twice on the + first group to decide whether it can take a fast or slow code + path. This can lead to unexpected behavior if `func` has + side-effects, as they will take effect twice for the first + group. + + Examples + -------- + {examples} + """, +) + +_pipe_template = """ +Apply a function `func` with arguments to this %(klass)s object and return +the function's result. + +%(versionadded)s + +Use `.pipe` when you want to improve readability by chaining together +functions that expect Series, DataFrames, GroupBy or Resampler objects. +Instead of writing + +>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) + +You can write + +>>> (df.groupby('group') +... .pipe(f) +... .pipe(g, arg1=a) +... .pipe(h, arg2=b, arg3=c)) + +which is much more readable. + +Parameters +---------- +func : callable or tuple of (callable, string) + Function to apply to this %(klass)s object or, alternatively, + a `(callable, data_keyword)` tuple where `data_keyword` is a + string indicating the keyword of `callable` that expects the + %(klass)s object. +args : iterable, optional + Positional arguments passed into `func`. +kwargs : dict, optional + A dictionary of keyword arguments passed into `func`. + +Returns +------- +object : the return type of `func`. + +See Also +-------- +Series.pipe : Apply a function with arguments to a series. +DataFrame.pipe: Apply a function with arguments to a dataframe. +apply : Apply function to each group instead of to the + full %(klass)s object. + +Notes +----- +See more `here +`_ + +Examples +-------- +%(examples)s +""" + +_transform_template = """ +Call function producing a like-indexed %(klass)s on each group and +return a %(klass)s having the same indexes as the original object +filled with the transformed values + +Parameters +---------- +f : function + Function to apply to each group + +Returns +------- +%(klass)s + +See Also +-------- +aggregate, transform + +Notes +----- +Each group is endowed the attribute 'name' in case you need to know +which group you are working on. + +The current implementation imposes three requirements on f: + +* f must return a value that either has the same shape as the input + subframe or can be broadcast to the shape of the input subframe. + For example, if `f` returns a scalar it will be broadcast to have the + same shape as the input subframe. +* if this is a DataFrame, f must support application column-by-column + in the subframe. If f also supports application to the entire subframe, + then a fast path is used starting from the second chunk. +* f must not mutate groups. Mutation is not supported and may + produce unexpected results. + +Examples +-------- + +# Same shape +>>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', +... 'foo', 'bar'], +... 'B' : ['one', 'one', 'two', 'three', +... 'two', 'two'], +... 'C' : [1, 5, 5, 2, 5, 5], +... 'D' : [2.0, 5., 8., 1., 2., 9.]}) +>>> grouped = df.groupby('A') +>>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + C D +0 -1.154701 -0.577350 +1 0.577350 0.000000 +2 0.577350 1.154701 +3 -1.154701 -1.000000 +4 0.577350 -0.577350 +5 0.577350 1.000000 + +# Broadcastable +>>> grouped.transform(lambda x: x.max() - x.min()) + C D +0 4 6.0 +1 3 8.0 +2 4 6.0 +3 3 8.0 +4 4 6.0 +5 3 8.0 +""" + + +class GroupByPlot(PandasObject): + """ + Class implementing the .plot attribute for groupby objects. + """ + + def __init__(self, groupby): + self._groupby = groupby + + def __call__(self, *args, **kwargs): + def f(self): + return self.plot(*args, **kwargs) + + f.__name__ = "plot" + return self._groupby.apply(f) + + def __getattr__(self, name: str): + def attr(*args, **kwargs): + def f(self): + return getattr(self.plot, name)(*args, **kwargs) + + return self._groupby.apply(f) + + return attr + + +@contextmanager +def _group_selection_context(groupby): + """ + Set / reset the _group_selection_context. + """ + groupby._set_group_selection() + yield groupby + groupby._reset_group_selection() + + +_KeysArgType = Union[ + Hashable, + List[Hashable], + Callable[[Hashable], Hashable], + List[Callable[[Hashable], Hashable]], + Mapping[Hashable, Hashable], +] + + +class _GroupBy(PandasObject, SelectionMixin): + _group_selection = None + _apply_whitelist: FrozenSet[str] = frozenset() + + def __init__( + self, + obj: NDFrame, + keys: Optional[_KeysArgType] = None, + axis: int = 0, + level=None, + grouper: "Optional[ops.BaseGrouper]" = None, + exclusions=None, + selection=None, + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + squeeze: bool = False, + observed: bool = False, + mutated: bool = False, + ): + + self._selection = selection + + assert isinstance(obj, NDFrame), type(obj) + obj._consolidate_inplace() + + self.level = level + + if not as_index: + if not isinstance(obj, DataFrame): + raise TypeError("as_index=False only valid with DataFrame") + if axis != 0: + raise ValueError("as_index=False only valid for axis=0") + + self.as_index = as_index + self.keys = keys + self.sort = sort + self.group_keys = group_keys + self.squeeze = squeeze + self.observed = observed + self.mutated = mutated + + if grouper is None: + from pandas.core.groupby.grouper import get_grouper + + grouper, exclusions, obj = get_grouper( + obj, + keys, + axis=axis, + level=level, + sort=sort, + observed=observed, + mutated=self.mutated, + ) + + self.obj = obj + self.axis = obj._get_axis_number(axis) + self.grouper = grouper + self.exclusions = set(exclusions) if exclusions else set() + + def __len__(self) -> int: + return len(self.groups) + + def __repr__(self) -> str: + # TODO: Better repr for GroupBy object + return object.__repr__(self) + + def _assure_grouper(self): + """ + We create the grouper on instantiation sub-classes may have a + different policy. + """ + pass + + @property + def groups(self): + """ + Dict {group name -> group labels}. + """ + self._assure_grouper() + return self.grouper.groups + + @property + def ngroups(self): + self._assure_grouper() + return self.grouper.ngroups + + @property + def indices(self): + """ + Dict {group name -> group indices}. + """ + self._assure_grouper() + return self.grouper.indices + + def _get_indices(self, names): + """ + Safe get multiple indices, translate keys for + datelike to underlying repr. + """ + + def get_converter(s): + # possibly convert to the actual key types + # in the indices, could be a Timestamp or a np.datetime64 + if isinstance(s, datetime.datetime): + return lambda key: Timestamp(key) + elif isinstance(s, np.datetime64): + return lambda key: Timestamp(key).asm8 + else: + return lambda key: key + + if len(names) == 0: + return [] + + if len(self.indices) > 0: + index_sample = next(iter(self.indices)) + else: + index_sample = None # Dummy sample + + name_sample = names[0] + if isinstance(index_sample, tuple): + if not isinstance(name_sample, tuple): + msg = "must supply a tuple to get_group with multiple grouping keys" + raise ValueError(msg) + if not len(name_sample) == len(index_sample): + try: + # If the original grouper was a tuple + return [self.indices[name] for name in names] + except KeyError: + # turns out it wasn't a tuple + msg = ( + "must supply a same-length tuple to get_group " + "with multiple grouping keys" + ) + raise ValueError(msg) + + converters = [get_converter(s) for s in index_sample] + names = (tuple(f(n) for f, n in zip(converters, name)) for name in names) + + else: + converter = get_converter(index_sample) + names = (converter(name) for name in names) + + return [self.indices.get(name, []) for name in names] + + def _get_index(self, name): + """ + Safe get index, translate keys for datelike to underlying repr. + """ + return self._get_indices([name])[0] + + @cache_readonly + def _selected_obj(self): + # Note: _selected_obj is always just `self.obj` for SeriesGroupBy + + if self._selection is None or isinstance(self.obj, Series): + if self._group_selection is not None: + return self.obj[self._group_selection] + return self.obj + else: + return self.obj[self._selection] + + def _reset_group_selection(self): + """ + Clear group based selection. + + Used for methods needing to return info on each group regardless of + whether a group selection was previously set. + """ + if self._group_selection is not None: + # GH12839 clear cached selection too when changing group selection + self._group_selection = None + self._reset_cache("_selected_obj") + + def _set_group_selection(self): + """ + Create group based selection. + + Used when selection is not passed directly but instead via a grouper. + + NOTE: this should be paired with a call to _reset_group_selection + """ + grp = self.grouper + if not ( + self.as_index + and getattr(grp, "groupings", None) is not None + and self.obj.ndim > 1 + and self._group_selection is None + ): + return + + ax = self.obj._info_axis + groupers = [g.name for g in grp.groupings if g.level is None and g.in_axis] + + if len(groupers): + # GH12839 clear selected obj cache when group selection changes + self._group_selection = ax.difference(Index(groupers), sort=False).tolist() + self._reset_cache("_selected_obj") + + def _set_result_index_ordered(self, result): + # set the result index on the passed values object and + # return the new object, xref 8046 + + # the values/counts are repeated according to the group index + # shortcut if we have an already ordered grouper + if not self.grouper.is_monotonic: + index = Index(np.concatenate(self._get_indices(self.grouper.result_index))) + result.set_axis(index, axis=self.axis, inplace=True) + result = result.sort_index(axis=self.axis) + + result.set_axis(self.obj._get_axis(self.axis), axis=self.axis, inplace=True) + return result + + def _dir_additions(self): + return self.obj._dir_additions() | self._apply_whitelist + + def __getattr__(self, attr: str): + if attr in self._internal_names_set: + return object.__getattribute__(self, attr) + if attr in self.obj: + return self[attr] + + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{attr}'" + ) + + @Substitution( + klass="GroupBy", + versionadded=".. versionadded:: 0.21.0", + examples="""\ +>>> df = pd.DataFrame({'A': 'a b a b'.split(), 'B': [1, 2, 3, 4]}) +>>> df + A B +0 a 1 +1 b 2 +2 a 3 +3 b 4 + +To get the difference between each groups maximum and minimum value in one +pass, you can do + +>>> df.groupby('A').pipe(lambda x: x.max() - x.min()) + B +A +a 2 +b 2""", + ) + @Appender(_pipe_template) + def pipe(self, func, *args, **kwargs): + return com.pipe(self, func, *args, **kwargs) + + plot = property(GroupByPlot) + + def _make_wrapper(self, name): + assert name in self._apply_whitelist + + self._set_group_selection() + + # need to setup the selection + # as are not passed directly but in the grouper + f = getattr(self._selected_obj, name) + if not isinstance(f, types.MethodType): + return self.apply(lambda self: getattr(self, name)) + + f = getattr(type(self._selected_obj), name) + sig = inspect.signature(f) + + def wrapper(*args, **kwargs): + # a little trickery for aggregation functions that need an axis + # argument + if "axis" in sig.parameters: + if kwargs.get("axis", None) is None: + kwargs["axis"] = self.axis + + def curried(x): + return f(x, *args, **kwargs) + + # preserve the name so we can detect it when calling plot methods, + # to avoid duplicates + curried.__name__ = name + + # special case otherwise extra plots are created when catching the + # exception below + if name in base.plotting_methods: + return self.apply(curried) + + try: + return self.apply(curried) + except TypeError as err: + if not re.search( + "reduction operation '.*' not allowed for this dtype", str(err) + ): + # We don't have a cython implementation + # TODO: is the above comment accurate? + raise + + if self.obj.ndim == 1: + # this can be called recursively, so need to raise ValueError + raise ValueError + + # GH#3688 try to operate item-by-item + result = self._aggregate_item_by_item(name, *args, **kwargs) + return result + + wrapper.__name__ = name + return wrapper + + def get_group(self, name, obj=None): + """ + Construct DataFrame from group with provided name. + + Parameters + ---------- + name : object + The name of the group to get as a DataFrame. + obj : DataFrame, default None + The DataFrame to take the DataFrame out of. If + it is None, the object groupby was called on will + be used. + + Returns + ------- + group : same type as obj + """ + if obj is None: + obj = self._selected_obj + + inds = self._get_index(name) + if not len(inds): + raise KeyError(name) + + return obj._take_with_is_copy(inds, axis=self.axis) + + def __iter__(self): + """ + Groupby iterator. + + Returns + ------- + Generator yielding sequence of (name, subsetted object) + for each group + """ + return self.grouper.get_iterator(self.obj, axis=self.axis) + + @Appender( + _apply_docs["template"].format( + input="dataframe", examples=_apply_docs["dataframe_examples"] + ) + ) + def apply(self, func, *args, **kwargs): + + func = self._is_builtin_func(func) + + # this is needed so we don't try and wrap strings. If we could + # resolve functions to their callable functions prior, this + # wouldn't be needed + if args or kwargs: + if callable(func): + + @wraps(func) + def f(g): + with np.errstate(all="ignore"): + return func(g, *args, **kwargs) + + elif hasattr(nanops, "nan" + func): + # TODO: should we wrap this in to e.g. _is_builtin_func? + f = getattr(nanops, "nan" + func) + + else: + raise ValueError( + "func must be a callable if args or kwargs are supplied" + ) + else: + f = func + + # ignore SettingWithCopy here in case the user mutates + with option_context("mode.chained_assignment", None): + try: + result = self._python_apply_general(f) + except TypeError: + # gh-20949 + # try again, with .apply acting as a filtering + # operation, by excluding the grouping column + # This would normally not be triggered + # except if the udf is trying an operation that + # fails on *some* columns, e.g. a numeric operation + # on a string grouper column + + with _group_selection_context(self): + return self._python_apply_general(f) + + return result + + def _python_apply_general(self, f): + keys, values, mutated = self.grouper.apply(f, self._selected_obj, self.axis) + + return self._wrap_applied_output( + keys, values, not_indexed_same=mutated or self.mutated + ) + + def _iterate_slices(self) -> Iterable[Series]: + raise AbstractMethodError(self) + + def transform(self, func, *args, **kwargs): + raise AbstractMethodError(self) + + def _cumcount_array(self, ascending: bool = True): + """ + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from length of group - 1 to 0. + + Notes + ----- + this is currently implementing sort=False + (though the default is sort=True) for groupby in general + """ + ids, _, ngroups = self.grouper.group_info + sorter = get_group_index_sorter(ids, ngroups) + ids, count = ids[sorter], len(ids) + + if count == 0: + return np.empty(0, dtype=np.int64) + + run = np.r_[True, ids[:-1] != ids[1:]] + rep = np.diff(np.r_[np.nonzero(run)[0], count]) + out = (~run).cumsum() + + if ascending: + out -= np.repeat(out[run], rep) + else: + out = np.repeat(out[np.r_[run[1:], True]], rep) - out + + rev = np.empty(count, dtype=np.intp) + rev[sorter] = np.arange(count, dtype=np.intp) + return out[rev].astype(np.int64, copy=False) + + def _try_cast(self, result, obj, numeric_only: bool = False): + """ + Try to cast the result to our obj original type, + we may have roundtripped through object in the mean-time. + + If numeric_only is True, then only try to cast numerics + and not datetimelikes. + + """ + if obj.ndim > 1: + dtype = obj._values.dtype + else: + dtype = obj.dtype + + if not is_scalar(result): + if ( + is_extension_array_dtype(dtype) + and not is_categorical_dtype(dtype) + and dtype.kind != "M" + ): + # We have to special case categorical so as not to upcast + # things like counts back to categorical + cls = dtype.construct_array_type() + result = try_cast_to_ea(cls, result, dtype=dtype) + + elif numeric_only and is_numeric_dtype(dtype) or not numeric_only: + result = maybe_downcast_to_dtype(result, dtype) + + return result + + def _transform_should_cast(self, func_nm: str) -> bool: + """ + Parameters + ---------- + func_nm: str + The name of the aggregation function being performed + + Returns + ------- + bool + Whether transform should attempt to cast the result of aggregation + """ + return (self.size().fillna(0) > 0).any() and ( + func_nm not in base.cython_cast_blacklist + ) + + def _cython_transform(self, how: str, numeric_only: bool = True, **kwargs): + output: Dict[base.OutputKey, np.ndarray] = {} + for idx, obj in enumerate(self._iterate_slices()): + name = obj.name + is_numeric = is_numeric_dtype(obj.dtype) + if numeric_only and not is_numeric: + continue + + try: + result, _ = self.grouper.transform(obj.values, how, **kwargs) + except NotImplementedError: + continue + + if self._transform_should_cast(how): + result = self._try_cast(result, obj) + + key = base.OutputKey(label=name, position=idx) + output[key] = result + + if len(output) == 0: + raise DataError("No numeric types to aggregate") + + return self._wrap_transformed_output(output) + + def _wrap_aggregated_output(self, output: Mapping[base.OutputKey, np.ndarray]): + raise AbstractMethodError(self) + + def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]): + raise AbstractMethodError(self) + + def _wrap_applied_output(self, keys, values, not_indexed_same: bool = False): + raise AbstractMethodError(self) + + def _cython_agg_general( + self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 + ): + output: Dict[base.OutputKey, Union[np.ndarray, DatetimeArray]] = {} + # Ideally we would be able to enumerate self._iterate_slices and use + # the index from enumeration as the key of output, but ohlc in particular + # returns a (n x 4) array. Output requires 1D ndarrays as values, so we + # need to slice that up into 1D arrays + idx = 0 + for obj in self._iterate_slices(): + name = obj.name + is_numeric = is_numeric_dtype(obj.dtype) + if numeric_only and not is_numeric: + continue + + result, agg_names = self.grouper.aggregate( + obj._values, how, min_count=min_count + ) + + if agg_names: + # e.g. ohlc + assert len(agg_names) == result.shape[1] + for result_column, result_name in zip(result.T, agg_names): + key = base.OutputKey(label=result_name, position=idx) + output[key] = self._try_cast(result_column, obj) + idx += 1 + else: + assert result.ndim == 1 + key = base.OutputKey(label=name, position=idx) + output[key] = self._try_cast(result, obj) + idx += 1 + + if len(output) == 0: + raise DataError("No numeric types to aggregate") + + return self._wrap_aggregated_output(output) + + def _python_agg_general(self, func, *args, **kwargs): + func = self._is_builtin_func(func) + f = lambda x: func(x, *args, **kwargs) + + # iterate through "columns" ex exclusions to populate output dict + output: Dict[base.OutputKey, np.ndarray] = {} + + for idx, obj in enumerate(self._iterate_slices()): + name = obj.name + if self.grouper.ngroups == 0: + # agg_series below assumes ngroups > 0 + continue + + try: + # if this function is invalid for this dtype, we will ignore it. + result, counts = self.grouper.agg_series(obj, f) + except TypeError: + continue + + assert result is not None + key = base.OutputKey(label=name, position=idx) + output[key] = self._try_cast(result, obj, numeric_only=True) + + if len(output) == 0: + return self._python_apply_general(f) + + if self.grouper._filter_empty_groups: + + mask = counts.ravel() > 0 + for key, result in output.items(): + + # since we are masking, make sure that we have a float object + values = result + if is_numeric_dtype(values.dtype): + values = ensure_float(values) + + output[key] = self._try_cast(values[mask], result) + + return self._wrap_aggregated_output(output) + + def _concat_objects(self, keys, values, not_indexed_same: bool = False): + from pandas.core.reshape.concat import concat + + def reset_identity(values): + # reset the identities of the components + # of the values to prevent aliasing + for v in com.not_none(*values): + ax = v._get_axis(self.axis) + ax._reset_identity() + return values + + if not not_indexed_same: + result = concat(values, axis=self.axis) + ax = self._selected_obj._get_axis(self.axis) + + if isinstance(result, Series): + result = result.reindex(ax) + else: + + # this is a very unfortunate situation + # we have a multi-index that is NOT lexsorted + # and we have a result which is duplicated + # we can't reindex, so we resort to this + # GH 14776 + if isinstance(ax, MultiIndex) and not ax.is_unique: + indexer = algorithms.unique1d( + result.index.get_indexer_for(ax.values) + ) + result = result.take(indexer, axis=self.axis) + else: + result = result.reindex(ax, axis=self.axis) + + elif self.group_keys: + + values = reset_identity(values) + if self.as_index: + + # possible MI return case + group_keys = keys + group_levels = self.grouper.levels + group_names = self.grouper.names + + result = concat( + values, + axis=self.axis, + keys=group_keys, + levels=group_levels, + names=group_names, + sort=False, + ) + else: + + # GH5610, returns a MI, with the first level being a + # range index + keys = list(range(len(values))) + result = concat(values, axis=self.axis, keys=keys) + else: + values = reset_identity(values) + result = concat(values, axis=self.axis) + + if isinstance(result, Series) and self._selection_name is not None: + + result.name = self._selection_name + + return result + + def _apply_filter(self, indices, dropna): + if len(indices) == 0: + indices = np.array([], dtype="int64") + else: + indices = np.sort(np.concatenate(indices)) + if dropna: + filtered = self._selected_obj.take(indices, axis=self.axis) + else: + mask = np.empty(len(self._selected_obj.index), dtype=bool) + mask.fill(False) + mask[indices.astype(int)] = True + # mask fails to broadcast when passed to where; broadcast manually. + mask = np.tile(mask, list(self._selected_obj.shape[1:]) + [1]).T + filtered = self._selected_obj.where(mask) # Fill with NaNs. + return filtered + + +class GroupBy(_GroupBy): + """ + Class for grouping and aggregating relational data. + + See aggregate, transform, and apply functions on this object. + + It's easiest to use obj.groupby(...) to use GroupBy, but you can also do: + + :: + + grouped = groupby(obj, ...) + + Parameters + ---------- + obj : pandas object + axis : int, default 0 + level : int, default None + Level of MultiIndex + groupings : list of Grouping objects + Most users should ignore this + exclusions : array-like, optional + List of columns to exclude + name : str + Most users should ignore this + + Returns + ------- + **Attributes** + groups : dict + {group name -> group labels} + len(grouped) : int + Number of groups + + Notes + ----- + After grouping, see aggregate, apply, and transform functions. Here are + some other brief notes about usage. When grouping by multiple groups, the + result index will be a MultiIndex (hierarchical) by default. + + Iteration produces (key, group) tuples, i.e. chunking the data by group. So + you can write code like: + + :: + + grouped = obj.groupby(keys, axis=axis) + for key, group in grouped: + # do something with the data + + Function calls on GroupBy, if not specially implemented, "dispatch" to the + grouped data. So if you group a DataFrame and wish to invoke the std() + method on each group, you can simply do: + + :: + + df.groupby(mapper).std() + + rather than + + :: + + df.groupby(mapper).aggregate(np.std) + + You can pass arguments to these "wrapped" functions, too. + + See the online documentation for full exposition on these topics and much + more + """ + + def _bool_agg(self, val_test, skipna): + """ + Shared func to call any / all Cython GroupBy implementations. + """ + + def objs_to_bool(vals: np.ndarray) -> Tuple[np.ndarray, Type]: + if is_object_dtype(vals): + vals = np.array([bool(x) for x in vals]) + else: + vals = vals.astype(np.bool) + + return vals.view(np.uint8), np.bool + + def result_to_bool(result: np.ndarray, inference: Type) -> np.ndarray: + return result.astype(inference, copy=False) + + return self._get_cythonized_result( + "group_any_all", + aggregate=True, + cython_dtype=np.dtype(np.uint8), + needs_values=True, + needs_mask=True, + pre_processing=objs_to_bool, + post_processing=result_to_bool, + val_test=val_test, + skipna=skipna, + ) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def any(self, skipna: bool = True): + """ + Return True if any value in the group is truthful, else False. + + Parameters + ---------- + skipna : bool, default True + Flag to ignore nan values during truth testing. + + Returns + ------- + bool + """ + return self._bool_agg("any", skipna) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def all(self, skipna: bool = True): + """ + Return True if all values in the group are truthful, else False. + + Parameters + ---------- + skipna : bool, default True + Flag to ignore nan values during truth testing. + + Returns + ------- + bool + """ + return self._bool_agg("all", skipna) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def count(self): + """ + Compute count of group, excluding missing values. + + Returns + ------- + Series or DataFrame + Count of values within each group. + """ + + # defined here for API doc + raise NotImplementedError + + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def mean(self, *args, **kwargs): + """ + Compute mean of groups, excluding missing values. + + Returns + ------- + pandas.Series or pandas.DataFrame + %(see_also)s + Examples + -------- + >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], + ... 'B': [np.nan, 2, 3, 4, 5], + ... 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C']) + + Groupby one column and return the mean of the remaining columns in + each group. + + >>> df.groupby('A').mean() + B C + A + 1 3.0 1.333333 + 2 4.0 1.500000 + + Groupby two columns and return the mean of the remaining column. + + >>> df.groupby(['A', 'B']).mean() + C + A B + 1 2.0 2 + 4.0 1 + 2 3.0 1 + 5.0 2 + + Groupby one column and return the mean of only particular column in + the group. + + >>> df.groupby('A')['B'].mean() + A + 1 3.0 + 2 4.0 + Name: B, dtype: float64 + """ + nv.validate_groupby_func("mean", args, kwargs, ["numeric_only"]) + return self._cython_agg_general( + "mean", alt=lambda x, axis: Series(x).mean(**kwargs), **kwargs + ) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def median(self, **kwargs): + """ + Compute median of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex + + Returns + ------- + Series or DataFrame + Median of values within each group. + """ + return self._cython_agg_general( + "median", + alt=lambda x, axis: Series(x).median(axis=axis, **kwargs), + **kwargs, + ) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def std(self, ddof: int = 1, *args, **kwargs): + """ + Compute standard deviation of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + Returns + ------- + Series or DataFrame + Standard deviation of values within each group. + """ + + # TODO: implement at Cython level? + nv.validate_groupby_func("std", args, kwargs) + return np.sqrt(self.var(ddof=ddof, **kwargs)) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def var(self, ddof: int = 1, *args, **kwargs): + """ + Compute variance of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + Returns + ------- + Series or DataFrame + Variance of values within each group. + """ + nv.validate_groupby_func("var", args, kwargs) + if ddof == 1: + return self._cython_agg_general( + "var", alt=lambda x, axis: Series(x).var(ddof=ddof, **kwargs), **kwargs + ) + else: + f = lambda x: x.var(ddof=ddof, **kwargs) + with _group_selection_context(self): + return self._python_agg_general(f) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def sem(self, ddof: int = 1): + """ + Compute standard error of the mean of groups, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + Returns + ------- + Series or DataFrame + Standard error of the mean of values within each group. + """ + return self.std(ddof=ddof) / np.sqrt(self.count()) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def size(self): + """ + Compute group sizes. + + Returns + ------- + Series + Number of rows in each group. + """ + result = self.grouper.size() + + if isinstance(self.obj, Series): + result.name = self.obj.name + return self._reindex_output(result, fill_value=0) + + @classmethod + def _add_numeric_operations(cls): + """ + Add numeric operations to the GroupBy generically. + """ + + def groupby_function( + name: str, + alias: str, + npfunc, + numeric_only: bool = True, + min_count: int = -1, + ): + + _local_template = """ + Compute %(f)s of group values. + + Returns + ------- + Series or DataFrame + Computed %(f)s of values within each group. + """ + + @Substitution(name="groupby", f=name) + @Appender(_common_see_also) + @Appender(_local_template) + def f(self, **kwargs): + if "numeric_only" not in kwargs: + kwargs["numeric_only"] = numeric_only + if "min_count" not in kwargs: + kwargs["min_count"] = min_count + + self._set_group_selection() + + # try a cython aggregation if we can + try: + return self._cython_agg_general(alias, alt=npfunc, **kwargs) + except DataError: + pass + except NotImplementedError as err: + if "function is not implemented for this dtype" in str( + err + ) or "category dtype not supported" in str(err): + # raised in _get_cython_function, in some cases can + # be trimmed by implementing cython funcs for more dtypes + pass + else: + raise + + # apply a non-cython aggregation + result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) + return result + + set_function_name(f, name, cls) + + return f + + def first_compat(x, axis=0): + def first(x): + x = x.to_numpy() + + x = x[notna(x)] + if len(x) == 0: + return np.nan + return x[0] + + if isinstance(x, DataFrame): + return x.apply(first, axis=axis) + else: + return first(x) + + def last_compat(x, axis=0): + def last(x): + x = x.to_numpy() + x = x[notna(x)] + if len(x) == 0: + return np.nan + return x[-1] + + if isinstance(x, DataFrame): + return x.apply(last, axis=axis) + else: + return last(x) + + cls.sum = groupby_function("sum", "add", np.sum, min_count=0) + cls.prod = groupby_function("prod", "prod", np.prod, min_count=0) + cls.min = groupby_function("min", "min", np.min, numeric_only=False) + cls.max = groupby_function("max", "max", np.max, numeric_only=False) + cls.first = groupby_function("first", "first", first_compat, numeric_only=False) + cls.last = groupby_function("last", "last", last_compat, numeric_only=False) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def ohlc(self) -> DataFrame: + """ + Compute sum of values, excluding missing values. + + For multiple groupings, the result index will be a MultiIndex + + Returns + ------- + DataFrame + Open, high, low and close values within each group. + """ + + return self._apply_to_column_groupbys(lambda x: x._cython_agg_general("ohlc")) + + @Appender(DataFrame.describe.__doc__) + def describe(self, **kwargs): + with _group_selection_context(self): + result = self.apply(lambda x: x.describe(**kwargs)) + if self.axis == 1: + return result.T + return result.unstack() + + def resample(self, rule, *args, **kwargs): + """ + Provide resampling when using a TimeGrouper. + + Given a grouper, the function resamples it according to a string + "string" -> "frequency". + + See the :ref:`frequency aliases ` + documentation for more details. + + Parameters + ---------- + rule : str or DateOffset + The offset string or object representing target grouper conversion. + *args, **kwargs + Possible arguments are `how`, `fill_method`, `limit`, `kind` and + `on`, and other arguments of `TimeGrouper`. + + Returns + ------- + Grouper + Return a new grouper with our resampler appended. + + See Also + -------- + Grouper : Specify a frequency to resample with when + grouping by a key. + DatetimeIndex.resample : Frequency conversion and resampling of + time series. + + Examples + -------- + >>> idx = pd.date_range('1/1/2000', periods=4, freq='T') + >>> df = pd.DataFrame(data=4 * [range(2)], + ... index=idx, + ... columns=['a', 'b']) + >>> df.iloc[2, 0] = 5 + >>> df + a b + 2000-01-01 00:00:00 0 1 + 2000-01-01 00:01:00 0 1 + 2000-01-01 00:02:00 5 1 + 2000-01-01 00:03:00 0 1 + + Downsample the DataFrame into 3 minute bins and sum the values of + the timestamps falling into a bin. + + >>> df.groupby('a').resample('3T').sum() + a b + a + 0 2000-01-01 00:00:00 0 2 + 2000-01-01 00:03:00 0 1 + 5 2000-01-01 00:00:00 5 1 + + Upsample the series into 30 second bins. + + >>> df.groupby('a').resample('30S').sum() + a b + a + 0 2000-01-01 00:00:00 0 1 + 2000-01-01 00:00:30 0 0 + 2000-01-01 00:01:00 0 1 + 2000-01-01 00:01:30 0 0 + 2000-01-01 00:02:00 0 0 + 2000-01-01 00:02:30 0 0 + 2000-01-01 00:03:00 0 1 + 5 2000-01-01 00:02:00 5 1 + + Resample by month. Values are assigned to the month of the period. + + >>> df.groupby('a').resample('M').sum() + a b + a + 0 2000-01-31 0 3 + 5 2000-01-31 5 1 + + Downsample the series into 3 minute bins as above, but close the right + side of the bin interval. + + >>> df.groupby('a').resample('3T', closed='right').sum() + a b + a + 0 1999-12-31 23:57:00 0 1 + 2000-01-01 00:00:00 0 2 + 5 2000-01-01 00:00:00 5 1 + + Downsample the series into 3 minute bins and close the right side of + the bin interval, but label each bin using the right edge instead of + the left. + + >>> df.groupby('a').resample('3T', closed='right', label='right').sum() + a b + a + 0 2000-01-01 00:00:00 0 1 + 2000-01-01 00:03:00 0 2 + 5 2000-01-01 00:03:00 5 1 + + Add an offset of twenty seconds. + + >>> df.groupby('a').resample('3T', loffset='20s').sum() + a b + a + 0 2000-01-01 00:00:20 0 2 + 2000-01-01 00:03:20 0 1 + 5 2000-01-01 00:00:20 5 1 + """ + from pandas.core.resample import get_resampler_for_grouping + + return get_resampler_for_grouping(self, rule, *args, **kwargs) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def rolling(self, *args, **kwargs): + """ + Return a rolling grouper, providing rolling functionality per group. + """ + from pandas.core.window import RollingGroupby + + return RollingGroupby(self, *args, **kwargs) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def expanding(self, *args, **kwargs): + """ + Return an expanding grouper, providing expanding + functionality per group. + """ + from pandas.core.window import ExpandingGroupby + + return ExpandingGroupby(self, *args, **kwargs) + + def _fill(self, direction, limit=None): + """ + Shared function for `pad` and `backfill` to call Cython method. + + Parameters + ---------- + direction : {'ffill', 'bfill'} + Direction passed to underlying Cython function. `bfill` will cause + values to be filled backwards. `ffill` and any other values will + default to a forward fill + limit : int, default None + Maximum number of consecutive values to fill. If `None`, this + method will convert to -1 prior to passing to Cython + + Returns + ------- + `Series` or `DataFrame` with filled values + + See Also + -------- + pad + backfill + """ + # Need int value for Cython + if limit is None: + limit = -1 + + return self._get_cythonized_result( + "group_fillna_indexer", + needs_mask=True, + cython_dtype=np.dtype(np.int64), + result_is_index=True, + direction=direction, + limit=limit, + ) + + @Substitution(name="groupby") + def pad(self, limit=None): + """ + Forward fill the values. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series or DataFrame + Object with missing values filled. + + See Also + -------- + Series.pad + DataFrame.pad + Series.fillna + DataFrame.fillna + """ + return self._fill("ffill", limit=limit) + + ffill = pad + + @Substitution(name="groupby") + def backfill(self, limit=None): + """ + Backward fill the values. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series or DataFrame + Object with missing values filled. + + See Also + -------- + Series.backfill + DataFrame.backfill + Series.fillna + DataFrame.fillna + """ + return self._fill("bfill", limit=limit) + + bfill = backfill + + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFrame: + """ + Take the nth row from each group if n is an int, or a subset of rows + if n is a list of ints. + + If dropna, will take the nth non-null row, dropna is either + 'all' or 'any'; this is equivalent to calling dropna(how=dropna) + before the groupby. + + Parameters + ---------- + n : int or list of ints + A single nth value for the row or a list of nth values. + dropna : None or str, optional + Apply the specified dropna operation before counting which row is + the nth row. Needs to be None, 'any' or 'all'. + + Returns + ------- + Series or DataFrame + N-th value within each group. + %(see_also)s + Examples + -------- + + >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], + ... 'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B']) + >>> g = df.groupby('A') + >>> g.nth(0) + B + A + 1 NaN + 2 3.0 + >>> g.nth(1) + B + A + 1 2.0 + 2 5.0 + >>> g.nth(-1) + B + A + 1 4.0 + 2 5.0 + >>> g.nth([0, 1]) + B + A + 1 NaN + 1 2.0 + 2 3.0 + 2 5.0 + + Specifying `dropna` allows count ignoring ``NaN`` + + >>> g.nth(0, dropna='any') + B + A + 1 2.0 + 2 3.0 + + NaNs denote group exhausted when using dropna + + >>> g.nth(3, dropna='any') + B + A + 1 NaN + 2 NaN + + Specifying `as_index=False` in `groupby` keeps the original index. + + >>> df.groupby('A', as_index=False).nth(1) + A B + 1 1 2.0 + 4 2 5.0 + """ + + valid_containers = (set, list, tuple) + if not isinstance(n, (valid_containers, int)): + raise TypeError("n needs to be an int or a list/set/tuple of ints") + + if not dropna: + + if isinstance(n, int): + nth_values = [n] + elif isinstance(n, valid_containers): + nth_values = list(set(n)) + + nth_array = np.array(nth_values, dtype=np.intp) + self._set_group_selection() + + mask_left = np.in1d(self._cumcount_array(), nth_array) + mask_right = np.in1d(self._cumcount_array(ascending=False) + 1, -nth_array) + mask = mask_left | mask_right + + ids, _, _ = self.grouper.group_info + + # Drop NA values in grouping + mask = mask & (ids != -1) + + out = self._selected_obj[mask] + if not self.as_index: + return out + + result_index = self.grouper.result_index + out.index = result_index[ids[mask]] + + if not self.observed and isinstance(result_index, CategoricalIndex): + out = out.reindex(result_index) + + out = self._reindex_output(out) + return out.sort_index() if self.sort else out + + # dropna is truthy + if isinstance(n, valid_containers): + raise ValueError("dropna option with a list of nth values is not supported") + + if dropna not in ["any", "all"]: + # Note: when agg-ing picker doesn't raise this, just returns NaN + raise ValueError( + "For a DataFrame groupby, dropna must be " + "either None, 'any' or 'all', " + f"(was passed {dropna})." + ) + + # old behaviour, but with all and any support for DataFrames. + # modified in GH 7559 to have better perf + max_len = n if n >= 0 else -1 - n + dropped = self.obj.dropna(how=dropna, axis=self.axis) + + # get a new grouper for our dropped obj + if self.keys is None and self.level is None: + + # we don't have the grouper info available + # (e.g. we have selected out + # a column that is not in the current object) + axis = self.grouper.axis + grouper = axis[axis.isin(dropped.index)] + + else: + + # create a grouper with the original parameters, but on dropped + # object + from pandas.core.groupby.grouper import get_grouper + + grouper, _, _ = get_grouper( + dropped, + key=self.keys, + axis=self.axis, + level=self.level, + sort=self.sort, + mutated=self.mutated, + ) + + grb = dropped.groupby(grouper, as_index=self.as_index, sort=self.sort) + sizes, result = grb.size(), grb.nth(n) + mask = (sizes < max_len).values + + # set the results which don't meet the criteria + if len(result) and mask.any(): + result.loc[mask] = np.nan + + # reset/reindex to the original groups + if len(self.obj) == len(dropped) or len(result) == len( + self.grouper.result_index + ): + result.index = self.grouper.result_index + else: + result = result.reindex(self.grouper.result_index) + + return result + + def quantile(self, q=0.5, interpolation: str = "linear"): + """ + Return group values at the given quantile, a la numpy.percentile. + + Parameters + ---------- + q : float or array-like, default 0.5 (50% quantile) + Value(s) between 0 and 1 providing the quantile(s) to compute. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + Method to use when the desired quantile falls between two points. + + Returns + ------- + Series or DataFrame + Return type determined by caller of GroupBy object. + + See Also + -------- + Series.quantile : Similar method for Series. + DataFrame.quantile : Similar method for DataFrame. + numpy.percentile : NumPy method to compute qth percentile. + + Examples + -------- + >>> df = pd.DataFrame([ + ... ['a', 1], ['a', 2], ['a', 3], + ... ['b', 1], ['b', 3], ['b', 5] + ... ], columns=['key', 'val']) + >>> df.groupby('key').quantile() + val + key + a 2.0 + b 3.0 + """ + from pandas import concat + + def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]: + if is_object_dtype(vals): + raise TypeError( + "'quantile' cannot be performed against 'object' dtypes!" + ) + + inference = None + if is_integer_dtype(vals): + inference = np.int64 + elif is_datetime64_dtype(vals): + inference = "datetime64[ns]" + vals = vals.astype(np.float) + + return vals, inference + + def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: + if inference: + # Check for edge case + if not ( + is_integer_dtype(inference) + and interpolation in {"linear", "midpoint"} + ): + vals = vals.astype(inference) + + return vals + + if is_scalar(q): + return self._get_cythonized_result( + "group_quantile", + aggregate=True, + needs_values=True, + needs_mask=True, + cython_dtype=np.dtype(np.float64), + pre_processing=pre_processor, + post_processing=post_processor, + q=q, + interpolation=interpolation, + ) + else: + results = [ + self._get_cythonized_result( + "group_quantile", + aggregate=True, + needs_values=True, + needs_mask=True, + cython_dtype=np.dtype(np.float64), + pre_processing=pre_processor, + post_processing=post_processor, + q=qi, + interpolation=interpolation, + ) + for qi in q + ] + result = concat(results, axis=0, keys=q) + # fix levels to place quantiles on the inside + # TODO(GH-10710): Ideally, we could write this as + # >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :] + # but this hits https://github.com/pandas-dev/pandas/issues/10710 + # which doesn't reorder the list-like `q` on the inner level. + order = list(range(1, result.index.nlevels)) + [0] + + # temporarily saves the index names + index_names = np.array(result.index.names) + + # set index names to positions to avoid confusion + result.index.names = np.arange(len(index_names)) + + # place quantiles on the inside + result = result.reorder_levels(order) + + # restore the index names in order + result.index.names = index_names[order] + + # reorder rows to keep things sorted + indices = np.arange(len(result)).reshape([len(q), self.ngroups]).T.flatten() + return result.take(indices) + + @Substitution(name="groupby") + def ngroup(self, ascending: bool = True): + """ + Number each group from 0 to the number of groups - 1. + + This is the enumerative complement of cumcount. Note that the + numbers given to the groups match the order in which the groups + would be seen when iterating over the groupby object, not the + order they are first observed. + + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from number of group - 1 to 0. + + Returns + ------- + Series + Unique numbers for each group. + + See Also + -------- + .cumcount : Number the rows in each group. + + Examples + -------- + + >>> df = pd.DataFrame({"A": list("aaabba")}) + >>> df + A + 0 a + 1 a + 2 a + 3 b + 4 b + 5 a + >>> df.groupby('A').ngroup() + 0 0 + 1 0 + 2 0 + 3 1 + 4 1 + 5 0 + dtype: int64 + >>> df.groupby('A').ngroup(ascending=False) + 0 1 + 1 1 + 2 1 + 3 0 + 4 0 + 5 1 + dtype: int64 + >>> df.groupby(["A", [1,1,2,3,2,1]]).ngroup() + 0 0 + 1 0 + 2 1 + 3 3 + 4 2 + 5 0 + dtype: int64 + """ + + with _group_selection_context(self): + index = self._selected_obj.index + result = Series(self.grouper.group_info[0], index) + if not ascending: + result = self.ngroups - 1 - result + return result + + @Substitution(name="groupby") + def cumcount(self, ascending: bool = True): + """ + Number each item in each group from 0 to the length of that group - 1. + + Essentially this is equivalent to + + >>> self.apply(lambda x: pd.Series(np.arange(len(x)), x.index)) + + Parameters + ---------- + ascending : bool, default True + If False, number in reverse, from length of group - 1 to 0. + + Returns + ------- + Series + Sequence number of each element within each group. + + See Also + -------- + .ngroup : Number the groups themselves. + + Examples + -------- + + >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], + ... columns=['A']) + >>> df + A + 0 a + 1 a + 2 a + 3 b + 4 b + 5 a + >>> df.groupby('A').cumcount() + 0 0 + 1 1 + 2 2 + 3 0 + 4 1 + 5 3 + dtype: int64 + >>> df.groupby('A').cumcount(ascending=False) + 0 3 + 1 2 + 2 1 + 3 1 + 4 0 + 5 0 + dtype: int64 + """ + + with _group_selection_context(self): + index = self._selected_obj.index + cumcounts = self._cumcount_array(ascending=ascending) + return Series(cumcounts, index) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def rank( + self, + method: str = "average", + ascending: bool = True, + na_option: str = "keep", + pct: bool = False, + axis: int = 0, + ): + """ + Provide the rank of values within each group. + + Parameters + ---------- + method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' + * average: average rank of group. + * min: lowest rank in group. + * max: highest rank in group. + * first: ranks assigned in order they appear in the array. + * dense: like 'min', but rank always increases by 1 between groups. + ascending : bool, default True + False for ranks by high (1) to low (N). + na_option : {'keep', 'top', 'bottom'}, default 'keep' + * keep: leave NA values where they are. + * top: smallest rank if ascending. + * bottom: smallest rank if descending. + pct : bool, default False + Compute percentage rank of data within each group. + axis : int, default 0 + The axis of the object over which to compute the rank. + + Returns + ------- + DataFrame with ranking of values within each group + """ + if na_option not in {"keep", "top", "bottom"}: + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + raise ValueError(msg) + return self._cython_transform( + "rank", + numeric_only=False, + ties_method=method, + ascending=ascending, + na_option=na_option, + pct=pct, + axis=axis, + ) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def cumprod(self, axis=0, *args, **kwargs): + """ + Cumulative product for each group. + + Returns + ------- + Series or DataFrame + """ + nv.validate_groupby_func("cumprod", args, kwargs, ["numeric_only", "skipna"]) + if axis != 0: + return self.apply(lambda x: x.cumprod(axis=axis, **kwargs)) + + return self._cython_transform("cumprod", **kwargs) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def cumsum(self, axis=0, *args, **kwargs): + """ + Cumulative sum for each group. + + Returns + ------- + Series or DataFrame + """ + nv.validate_groupby_func("cumsum", args, kwargs, ["numeric_only", "skipna"]) + if axis != 0: + return self.apply(lambda x: x.cumsum(axis=axis, **kwargs)) + + return self._cython_transform("cumsum", **kwargs) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def cummin(self, axis=0, **kwargs): + """ + Cumulative min for each group. + + Returns + ------- + Series or DataFrame + """ + if axis != 0: + return self.apply(lambda x: np.minimum.accumulate(x, axis)) + + return self._cython_transform("cummin", numeric_only=False) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def cummax(self, axis=0, **kwargs): + """ + Cumulative max for each group. + + Returns + ------- + Series or DataFrame + """ + if axis != 0: + return self.apply(lambda x: np.maximum.accumulate(x, axis)) + + return self._cython_transform("cummax", numeric_only=False) + + def _get_cythonized_result( + self, + how: str, + cython_dtype: np.dtype, + aggregate: bool = False, + needs_values: bool = False, + needs_mask: bool = False, + needs_ngroups: bool = False, + result_is_index: bool = False, + pre_processing=None, + post_processing=None, + **kwargs, + ): + """ + Get result for Cythonized functions. + + Parameters + ---------- + how : str, Cythonized function name to be called + cython_dtype : np.dtype + Type of the array that will be modified by the Cython call. + aggregate : bool, default False + Whether the result should be aggregated to match the number of + groups + needs_values : bool, default False + Whether the values should be a part of the Cython call + signature + needs_mask : bool, default False + Whether boolean mask needs to be part of the Cython call + signature + needs_ngroups : bool, default False + Whether number of groups is part of the Cython call signature + result_is_index : bool, default False + Whether the result of the Cython operation is an index of + values to be retrieved, instead of the actual values themselves + pre_processing : function, default None + Function to be applied to `values` prior to passing to Cython. + Function should return a tuple where the first element is the + values to be passed to Cython and the second element is an optional + type which the values should be converted to after being returned + by the Cython operation. Raises if `needs_values` is False. + post_processing : function, default None + Function to be applied to result of Cython function. Should accept + an array of values as the first argument and type inferences as its + second argument, i.e. the signature should be + (ndarray, Type). + **kwargs : dict + Extra arguments to be passed back to Cython funcs + + Returns + ------- + `Series` or `DataFrame` with filled values + """ + if result_is_index and aggregate: + raise ValueError("'result_is_index' and 'aggregate' cannot both be True!") + if post_processing: + if not callable(pre_processing): + raise ValueError("'post_processing' must be a callable!") + if pre_processing: + if not callable(pre_processing): + raise ValueError("'pre_processing' must be a callable!") + if not needs_values: + raise ValueError( + "Cannot use 'pre_processing' without specifying 'needs_values'!" + ) + + grouper = self.grouper + + labels, _, ngroups = grouper.group_info + output: Dict[base.OutputKey, np.ndarray] = {} + base_func = getattr(libgroupby, how) + + for idx, obj in enumerate(self._iterate_slices()): + name = obj.name + values = obj._data._values + + if aggregate: + result_sz = ngroups + else: + result_sz = len(values) + + result = np.zeros(result_sz, dtype=cython_dtype) + func = partial(base_func, result, labels) + inferences = None + + if needs_values: + vals = values + if pre_processing: + vals, inferences = pre_processing(vals) + func = partial(func, vals) + + if needs_mask: + mask = isna(values).view(np.uint8) + func = partial(func, mask) + + if needs_ngroups: + func = partial(func, ngroups) + + func(**kwargs) # Call func to modify indexer values in place + + if result_is_index: + result = algorithms.take_nd(values, result) + + if post_processing: + result = post_processing(result, inferences) + + key = base.OutputKey(label=name, position=idx) + output[key] = result + + if aggregate: + return self._wrap_aggregated_output(output) + else: + return self._wrap_transformed_output(output) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def shift(self, periods=1, freq=None, axis=0, fill_value=None): + """ + Shift each group by periods observations. + + Parameters + ---------- + periods : int, default 1 + Number of periods to shift. + freq : frequency string + axis : axis to shift, default 0 + fill_value : optional + + .. versionadded:: 0.24.0 + + Returns + ------- + Series or DataFrame + Object shifted within each group. + """ + + if freq is not None or axis != 0 or not isna(fill_value): + return self.apply(lambda x: x.shift(periods, freq, axis, fill_value)) + + return self._get_cythonized_result( + "group_shift_indexer", + cython_dtype=np.dtype(np.int64), + needs_ngroups=True, + result_is_index=True, + periods=periods, + ) + + @Substitution(name="groupby") + @Appender(_common_see_also) + def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0): + """ + Calculate pct_change of each value to previous entry in group. + + Returns + ------- + Series or DataFrame + Percentage changes within each group. + """ + if freq is not None or axis != 0: + return self.apply( + lambda x: x.pct_change( + periods=periods, + fill_method=fill_method, + limit=limit, + freq=freq, + axis=axis, + ) + ) + if fill_method is None: # GH30463 + fill_method = "pad" + limit = 0 + filled = getattr(self, fill_method)(limit=limit) + fill_grp = filled.groupby(self.grouper.codes) + shifted = fill_grp.shift(periods=periods, freq=freq) + return (filled / shifted) - 1 + + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def head(self, n=5): + """ + Return first n rows of each group. + + Similar to ``.apply(lambda x: x.head(n))``, but it returns a subset of rows + from the original DataFrame with original index and order preserved + (``as_index`` flag is ignored). + + Does not work for negative values of `n`. + + Returns + ------- + Series or DataFrame + %(see_also)s + Examples + -------- + + >>> df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], + ... columns=['A', 'B']) + >>> df.groupby('A').head(1) + A B + 0 1 2 + 2 5 6 + >>> df.groupby('A').head(-1) + Empty DataFrame + Columns: [A, B] + Index: [] + """ + self._reset_group_selection() + mask = self._cumcount_array() < n + return self._selected_obj[mask] + + @Substitution(name="groupby") + @Substitution(see_also=_common_see_also) + def tail(self, n=5): + """ + Return last n rows of each group. + + Similar to ``.apply(lambda x: x.tail(n))``, but it returns a subset of rows + from the original DataFrame with original index and order preserved + (``as_index`` flag is ignored). + + Does not work for negative values of `n`. + + Returns + ------- + Series or DataFrame + %(see_also)s + Examples + -------- + + >>> df = pd.DataFrame([['a', 1], ['a', 2], ['b', 1], ['b', 2]], + ... columns=['A', 'B']) + >>> df.groupby('A').tail(1) + A B + 1 a 2 + 3 b 2 + >>> df.groupby('A').tail(-1) + Empty DataFrame + Columns: [A, B] + Index: [] + """ + self._reset_group_selection() + mask = self._cumcount_array(ascending=False) < n + return self._selected_obj[mask] + + def _reindex_output( + self, output: FrameOrSeries, fill_value: Scalar = np.NaN + ) -> FrameOrSeries: + """ + If we have categorical groupers, then we might want to make sure that + we have a fully re-indexed output to the levels. This means expanding + the output space to accommodate all values in the cartesian product of + our groups, regardless of whether they were observed in the data or + not. This will expand the output space if there are missing groups. + + The method returns early without modifying the input if the number of + groupings is less than 2, self.observed == True or none of the groupers + are categorical. + + Parameters + ---------- + output : Series or DataFrame + Object resulting from grouping and applying an operation. + fill_value : scalar, default np.NaN + Value to use for unobserved categories if self.observed is False. + + Returns + ------- + Series or DataFrame + Object (potentially) re-indexed to include all possible groups. + """ + groupings = self.grouper.groupings + if groupings is None: + return output + elif len(groupings) == 1: + return output + + # if we only care about the observed values + # we are done + elif self.observed: + return output + + # reindexing only applies to a Categorical grouper + elif not any( + isinstance(ping.grouper, (Categorical, CategoricalIndex)) + for ping in groupings + ): + return output + + levels_list = [ping.group_index for ping in groupings] + index, _ = MultiIndex.from_product( + levels_list, names=self.grouper.names + ).sortlevel() + + if self.as_index: + d = { + self.obj._get_axis_name(self.axis): index, + "copy": False, + "fill_value": fill_value, + } + return output.reindex(**d) + + # GH 13204 + # Here, the categorical in-axis groupers, which need to be fully + # expanded, are columns in `output`. An idea is to do: + # output = output.set_index(self.grouper.names) + # .reindex(index).reset_index() + # but special care has to be taken because of possible not-in-axis + # groupers. + # So, we manually select and drop the in-axis grouper columns, + # reindex `output`, and then reset the in-axis grouper columns. + + # Select in-axis groupers + in_axis_grps = ( + (i, ping.name) for (i, ping) in enumerate(groupings) if ping.in_axis + ) + g_nums, g_names = zip(*in_axis_grps) + + output = output.drop(labels=list(g_names), axis=1) + + # Set a temp index and reindex (possibly expanding) + output = output.set_index(self.grouper.result_index).reindex( + index, copy=False, fill_value=fill_value + ) + + # Reset in-axis grouper columns + # (using level numbers `g_nums` because level names may not be unique) + output = output.reset_index(level=g_nums) + + return output.reset_index(drop=True) + + +GroupBy._add_numeric_operations() + + +@Appender(GroupBy.__doc__) +def get_groupby( + obj: NDFrame, + by: Optional[_KeysArgType] = None, + axis: int = 0, + level=None, + grouper: "Optional[ops.BaseGrouper]" = None, + exclusions=None, + selection=None, + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + squeeze: bool = False, + observed: bool = False, + mutated: bool = False, +) -> GroupBy: + + klass: Type[GroupBy] + if isinstance(obj, Series): + from pandas.core.groupby.generic import SeriesGroupBy + + klass = SeriesGroupBy + elif isinstance(obj, DataFrame): + from pandas.core.groupby.generic import DataFrameGroupBy + + klass = DataFrameGroupBy + else: + raise TypeError(f"invalid type: {obj}") + + return klass( + obj=obj, + keys=by, + axis=axis, + level=level, + grouper=grouper, + exclusions=exclusions, + selection=selection, + as_index=as_index, + sort=sort, + group_keys=group_keys, + squeeze=squeeze, + observed=observed, + mutated=mutated, + ) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py new file mode 100644 index 00000000..48d3ea20 --- /dev/null +++ b/pandas/core/groupby/grouper.py @@ -0,0 +1,660 @@ +""" +Provide user facing operators for doing the split part of the +split-apply-combine paradigm. +""" + +from typing import Dict, Hashable, List, Optional, Tuple + +import numpy as np + +from pandas._typing import FrameOrSeries +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.common import ( + ensure_categorical, + is_categorical_dtype, + is_datetime64_dtype, + is_list_like, + is_scalar, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ABCSeries + +import pandas.core.algorithms as algorithms +from pandas.core.arrays import Categorical, ExtensionArray +import pandas.core.common as com +from pandas.core.frame import DataFrame +from pandas.core.groupby import ops +from pandas.core.groupby.categorical import recode_for_groupby, recode_from_groupby +from pandas.core.indexes.api import CategoricalIndex, Index, MultiIndex +from pandas.core.series import Series + +from pandas.io.formats.printing import pprint_thing + + +class Grouper: + """ + A Grouper allows the user to specify a groupby instruction for an object. + + This specification will select a column via the key parameter, or if the + level and/or axis parameters are given, a level of the index of the target + object. + + If `axis` and/or `level` are passed as keywords to both `Grouper` and + `groupby`, the values passed to `Grouper` take precedence. + + Parameters + ---------- + key : str, defaults to None + Groupby key, which selects the grouping column of the target. + level : name/number, defaults to None + The level for the target index. + freq : str / frequency object, defaults to None + This will groupby the specified frequency if the target selection + (via key or level) is a datetime-like object. For full specification + of available frequencies, please see `here + `_. + axis : str, int, defaults to 0 + Number/name of the axis. + sort : bool, default to False + Whether to sort the resulting labels. + closed : {'left' or 'right'} + Closed end of interval. Only when `freq` parameter is passed. + label : {'left' or 'right'} + Interval boundary to use for labeling. + Only when `freq` parameter is passed. + convention : {'start', 'end', 'e', 's'} + If grouper is PeriodIndex and `freq` parameter is passed. + base : int, default 0 + Only when `freq` parameter is passed. + loffset : str, DateOffset, timedelta object + Only when `freq` parameter is passed. + + Returns + ------- + A specification for a groupby instruction + + Examples + -------- + + Syntactic sugar for ``df.groupby('A')`` + + >>> df.groupby(Grouper(key='A')) + + Specify a resample operation on the column 'date' + + >>> df.groupby(Grouper(key='date', freq='60s')) + + Specify a resample operation on the level 'date' on the columns axis + with a frequency of 60s + + >>> df.groupby(Grouper(level='date', freq='60s', axis=1)) + """ + + _attributes: Tuple[str, ...] = ("key", "level", "freq", "axis", "sort") + + def __new__(cls, *args, **kwargs): + if kwargs.get("freq") is not None: + from pandas.core.resample import TimeGrouper + + cls = TimeGrouper + return super().__new__(cls) + + def __init__(self, key=None, level=None, freq=None, axis=0, sort=False): + self.key = key + self.level = level + self.freq = freq + self.axis = axis + self.sort = sort + + self.grouper = None + self.obj = None + self.indexer = None + self.binner = None + self._grouper = None + + @property + def ax(self): + return self.grouper + + def _get_grouper(self, obj, validate: bool = True): + """ + Parameters + ---------- + obj : the subject object + validate : boolean, default True + if True, validate the grouper + + Returns + ------- + a tuple of binner, grouper, obj (possibly sorted) + """ + + self._set_grouper(obj) + self.grouper, _, self.obj = get_grouper( + self.obj, + [self.key], + axis=self.axis, + level=self.level, + sort=self.sort, + validate=validate, + ) + return self.binner, self.grouper, self.obj + + def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): + """ + given an object and the specifications, setup the internal grouper + for this particular specification + + Parameters + ---------- + obj : Series or DataFrame + sort : bool, default False + whether the resulting grouper should be sorted + """ + assert obj is not None + + if self.key is not None and self.level is not None: + raise ValueError("The Grouper cannot specify both a key and a level!") + + # Keep self.grouper value before overriding + if self._grouper is None: + self._grouper = self.grouper + + # the key must be a valid info item + if self.key is not None: + key = self.key + # The 'on' is already defined + if getattr(self.grouper, "name", None) == key and isinstance( + obj, ABCSeries + ): + ax = self._grouper.take(obj.index) + else: + if key not in obj._info_axis: + raise KeyError(f"The grouper name {key} is not found") + ax = Index(obj[key], name=key) + + else: + ax = obj._get_axis(self.axis) + if self.level is not None: + level = self.level + + # if a level is given it must be a mi level or + # equivalent to the axis name + if isinstance(ax, MultiIndex): + level = ax._get_level_number(level) + ax = Index(ax._get_level_values(level), name=ax.names[level]) + + else: + if level not in (0, ax.name): + raise ValueError(f"The level {level} is not valid") + + # possibly sort + if (self.sort or sort) and not ax.is_monotonic: + # use stable sort to support first, last, nth + indexer = self.indexer = ax.argsort(kind="mergesort") + ax = ax.take(indexer) + obj = obj.take(indexer, axis=self.axis) + + self.obj = obj + self.grouper = ax + return self.grouper + + @property + def groups(self): + return self.grouper.groups + + def __repr__(self) -> str: + attrs_list = ( + f"{attr_name}={repr(getattr(self, attr_name))}" + for attr_name in self._attributes + if getattr(self, attr_name) is not None + ) + attrs = ", ".join(attrs_list) + cls_name = type(self).__name__ + return f"{cls_name}({attrs})" + + +class Grouping: + """ + Holds the grouping information for a single key + + Parameters + ---------- + index : Index + grouper : + obj Union[DataFrame, Series]: + name : + level : + observed : bool, default False + If we are a Categorical, use the observed values + in_axis : if the Grouping is a column in self.obj and hence among + Groupby.exclusions list + + Returns + ------- + **Attributes**: + * indices : dict of {group -> index_list} + * codes : ndarray, group codes + * group_index : unique groups + * groups : dict of {group -> label_list} + """ + + def __init__( + self, + index: Index, + grouper=None, + obj: Optional[FrameOrSeries] = None, + name=None, + level=None, + sort: bool = True, + observed: bool = False, + in_axis: bool = False, + ): + self.name = name + self.level = level + self.grouper = _convert_grouper(index, grouper) + self.all_grouper = None + self.index = index + self.sort = sort + self.obj = obj + self.observed = observed + self.in_axis = in_axis + + # right place for this? + if isinstance(grouper, (Series, Index)) and name is None: + self.name = grouper.name + + if isinstance(grouper, MultiIndex): + self.grouper = grouper.values + + # we have a single grouper which may be a myriad of things, + # some of which are dependent on the passing in level + + if level is not None: + if not isinstance(level, int): + if level not in index.names: + raise AssertionError(f"Level {level} not in index") + level = index.names.index(level) + + if self.name is None: + self.name = index.names[level] + + ( + self.grouper, + self._codes, + self._group_index, + ) = index._get_grouper_for_level(self.grouper, level) + + # a passed Grouper like, directly get the grouper in the same way + # as single grouper groupby, use the group_info to get codes + elif isinstance(self.grouper, Grouper): + # get the new grouper; we already have disambiguated + # what key/level refer to exactly, don't need to + # check again as we have by this point converted these + # to an actual value (rather than a pd.Grouper) + _, grouper, _ = self.grouper._get_grouper(self.obj, validate=False) + if self.name is None: + self.name = grouper.result_index.name + self.obj = self.grouper.obj + self.grouper = grouper._get_grouper() + + else: + if self.grouper is None and self.name is not None and self.obj is not None: + self.grouper = self.obj[self.name] + + elif isinstance(self.grouper, (list, tuple)): + self.grouper = com.asarray_tuplesafe(self.grouper) + + # a passed Categorical + elif is_categorical_dtype(self.grouper): + + self.grouper, self.all_grouper = recode_for_groupby( + self.grouper, self.sort, observed + ) + categories = self.grouper.categories + + # we make a CategoricalIndex out of the cat grouper + # preserving the categories / ordered attributes + self._codes = self.grouper.codes + if observed: + codes = algorithms.unique1d(self.grouper.codes) + codes = codes[codes != -1] + if sort or self.grouper.ordered: + codes = np.sort(codes) + else: + codes = np.arange(len(categories)) + + self._group_index = CategoricalIndex( + Categorical.from_codes( + codes=codes, categories=categories, ordered=self.grouper.ordered + ), + name=self.name, + ) + + # we are done + if isinstance(self.grouper, Grouping): + self.grouper = self.grouper.grouper + + # no level passed + elif not isinstance( + self.grouper, (Series, Index, ExtensionArray, np.ndarray) + ): + if getattr(self.grouper, "ndim", 1) != 1: + t = self.name or str(type(self.grouper)) + raise ValueError(f"Grouper for '{t}' not 1-dimensional") + self.grouper = self.index.map(self.grouper) + if not ( + hasattr(self.grouper, "__len__") + and len(self.grouper) == len(self.index) + ): + grper = pprint_thing(self.grouper) + errmsg = ( + "Grouper result violates len(labels) == " + f"len(data)\nresult: {grper}" + ) + self.grouper = None # Try for sanity + raise AssertionError(errmsg) + + # if we have a date/time-like grouper, make sure that we have + # Timestamps like + if getattr(self.grouper, "dtype", None) is not None: + if is_datetime64_dtype(self.grouper): + self.grouper = self.grouper.astype("datetime64[ns]") + elif is_timedelta64_dtype(self.grouper): + + self.grouper = self.grouper.astype("timedelta64[ns]") + + def __repr__(self) -> str: + return f"Grouping({self.name})" + + def __iter__(self): + return iter(self.indices) + + _codes: Optional[np.ndarray] = None + _group_index: Optional[Index] = None + + @property + def ngroups(self) -> int: + return len(self.group_index) + + @cache_readonly + def indices(self): + # we have a list of groupers + if isinstance(self.grouper, ops.BaseGrouper): + return self.grouper.indices + + values = ensure_categorical(self.grouper) + return values._reverse_indexer() + + @property + def codes(self) -> np.ndarray: + if self._codes is None: + self._make_codes() + return self._codes + + @cache_readonly + def result_index(self) -> Index: + if self.all_grouper is not None: + return recode_from_groupby(self.all_grouper, self.sort, self.group_index) + return self.group_index + + @property + def group_index(self) -> Index: + if self._group_index is None: + self._make_codes() + assert self._group_index is not None + return self._group_index + + def _make_codes(self) -> None: + if self._codes is None or self._group_index is None: + # we have a list of groupers + if isinstance(self.grouper, ops.BaseGrouper): + codes = self.grouper.codes_info + uniques = self.grouper.result_index + else: + codes, uniques = algorithms.factorize(self.grouper, sort=self.sort) + uniques = Index(uniques, name=self.name) + self._codes = codes + self._group_index = uniques + + @cache_readonly + def groups(self) -> Dict[Hashable, np.ndarray]: + return self.index.groupby(Categorical.from_codes(self.codes, self.group_index)) + + +def get_grouper( + obj: FrameOrSeries, + key=None, + axis: int = 0, + level=None, + sort: bool = True, + observed: bool = False, + mutated: bool = False, + validate: bool = True, +) -> "Tuple[ops.BaseGrouper, List[Hashable], FrameOrSeries]": + """ + Create and return a BaseGrouper, which is an internal + mapping of how to create the grouper indexers. + This may be composed of multiple Grouping objects, indicating + multiple groupers + + Groupers are ultimately index mappings. They can originate as: + index mappings, keys to columns, functions, or Groupers + + Groupers enable local references to axis,level,sort, while + the passed in axis, level, and sort are 'global'. + + This routine tries to figure out what the passing in references + are and then creates a Grouping for each one, combined into + a BaseGrouper. + + If observed & we have a categorical grouper, only show the observed + values. + + If validate, then check for key/level overlaps. + + """ + group_axis = obj._get_axis(axis) + + # validate that the passed single level is compatible with the passed + # axis of the object + if level is not None: + # TODO: These if-block and else-block are almost same. + # MultiIndex instance check is removable, but it seems that there are + # some processes only for non-MultiIndex in else-block, + # eg. `obj.index.name != level`. We have to consider carefully whether + # these are applicable for MultiIndex. Even if these are applicable, + # we need to check if it makes no side effect to subsequent processes + # on the outside of this condition. + # (GH 17621) + if isinstance(group_axis, MultiIndex): + if is_list_like(level) and len(level) == 1: + level = level[0] + + if key is None and is_scalar(level): + # Get the level values from group_axis + key = group_axis.get_level_values(level) + level = None + + else: + # allow level to be a length-one list-like object + # (e.g., level=[0]) + # GH 13901 + if is_list_like(level): + nlevels = len(level) + if nlevels == 1: + level = level[0] + elif nlevels == 0: + raise ValueError("No group keys passed!") + else: + raise ValueError("multiple levels only valid with MultiIndex") + + if isinstance(level, str): + if obj._get_axis(axis).name != level: + raise ValueError( + f"level name {level} is not the name " + f"of the {obj._get_axis_name(axis)}" + ) + elif level > 0 or level < -1: + raise ValueError("level > 0 or level < -1 only valid with MultiIndex") + + # NOTE: `group_axis` and `group_axis.get_level_values(level)` + # are same in this section. + level = None + key = group_axis + + # a passed-in Grouper, directly convert + if isinstance(key, Grouper): + binner, grouper, obj = key._get_grouper(obj, validate=False) + if key.key is None: + return grouper, [], obj + else: + return grouper, [key.key], obj + + # already have a BaseGrouper, just return it + elif isinstance(key, ops.BaseGrouper): + return key, [], obj + + if not isinstance(key, list): + keys = [key] + match_axis_length = False + else: + keys = key + match_axis_length = len(keys) == len(group_axis) + + # what are we after, exactly? + any_callable = any(callable(g) or isinstance(g, dict) for g in keys) + any_groupers = any(isinstance(g, Grouper) for g in keys) + any_arraylike = any( + isinstance(g, (list, tuple, Series, Index, np.ndarray)) for g in keys + ) + + # is this an index replacement? + if ( + not any_callable + and not any_arraylike + and not any_groupers + and match_axis_length + and level is None + ): + if isinstance(obj, DataFrame): + all_in_columns_index = all( + g in obj.columns or g in obj.index.names for g in keys + ) + else: + assert isinstance(obj, Series) + all_in_columns_index = all(g in obj.index.names for g in keys) + + if not all_in_columns_index: + keys = [com.asarray_tuplesafe(keys)] + + if isinstance(level, (tuple, list)): + if key is None: + keys = [None] * len(level) + levels = level + else: + levels = [level] * len(keys) + + groupings: List[Grouping] = [] + exclusions: List[Hashable] = [] + + # if the actual grouper should be obj[key] + def is_in_axis(key) -> bool: + if not _is_label_like(key): + items = obj._data.items + try: + items.get_loc(key) + except (KeyError, TypeError): + # TypeError shows up here if we pass e.g. Int64Index + return False + + return True + + # if the grouper is obj[name] + def is_in_obj(gpr) -> bool: + if not hasattr(gpr, "name"): + return False + try: + return gpr is obj[gpr.name] + except (KeyError, IndexError, ValueError): + # TODO: ValueError: Given date string not likely a datetime. + # should be KeyError? + return False + + for i, (gpr, level) in enumerate(zip(keys, levels)): + + if is_in_obj(gpr): # df.groupby(df['name']) + in_axis, name = True, gpr.name + exclusions.append(name) + + elif is_in_axis(gpr): # df.groupby('name') + if gpr in obj: + if validate: + obj._check_label_or_level_ambiguity(gpr, axis=axis) + in_axis, name, gpr = True, gpr, obj[gpr] + exclusions.append(name) + elif obj._is_level_reference(gpr, axis=axis): + in_axis, name, level, gpr = False, None, gpr, None + else: + raise KeyError(gpr) + elif isinstance(gpr, Grouper) and gpr.key is not None: + # Add key to exclusions + exclusions.append(gpr.key) + in_axis, name = False, None + else: + in_axis, name = False, None + + if is_categorical_dtype(gpr) and len(gpr) != obj.shape[axis]: + raise ValueError( + f"Length of grouper ({len(gpr)}) and axis ({obj.shape[axis]}) " + "must be same length" + ) + + # create the Grouping + # allow us to passing the actual Grouping as the gpr + ping = ( + Grouping( + group_axis, + gpr, + obj=obj, + name=name, + level=level, + sort=sort, + observed=observed, + in_axis=in_axis, + ) + if not isinstance(gpr, Grouping) + else gpr + ) + + groupings.append(ping) + + if len(groupings) == 0 and len(obj): + raise ValueError("No group keys passed!") + elif len(groupings) == 0: + groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp))) + + # create the internals grouper + grouper = ops.BaseGrouper(group_axis, groupings, sort=sort, mutated=mutated) + return grouper, exclusions, obj + + +def _is_label_like(val) -> bool: + return isinstance(val, (str, tuple)) or (val is not None and is_scalar(val)) + + +def _convert_grouper(axis: Index, grouper): + if isinstance(grouper, dict): + return grouper.get + elif isinstance(grouper, Series): + if grouper.index.equals(axis): + return grouper._values + else: + return grouper.reindex(axis)._values + elif isinstance(grouper, (list, Series, Index, np.ndarray)): + if len(grouper) != len(axis): + raise ValueError("Grouper and axis must be same length") + return grouper + else: + return grouper diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py new file mode 100644 index 00000000..92ce80ff --- /dev/null +++ b/pandas/core/groupby/ops.py @@ -0,0 +1,947 @@ +""" +Provide classes to perform the groupby aggregate operations. + +These are not exposed to the user and provide implementations of the grouping +operations, primarily in cython. These classes (BaseGrouper and BinGrouper) +are contained *in* the SeriesGroupBy and DataFrameGroupBy objects. +""" + +import collections +from typing import List, Optional, Sequence, Tuple, Type + +import numpy as np + +from pandas._libs import NaT, iNaT, lib +import pandas._libs.groupby as libgroupby +import pandas._libs.reduction as libreduction +from pandas._typing import FrameOrSeries +from pandas.errors import AbstractMethodError +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.common import ( + ensure_float64, + ensure_int64, + ensure_int_or_float, + ensure_platform_int, + is_bool_dtype, + is_categorical_dtype, + is_complex_dtype, + is_datetime64_any_dtype, + is_datetime64tz_dtype, + is_extension_array_dtype, + is_integer_dtype, + is_numeric_dtype, + is_period_dtype, + is_sparse, + is_timedelta64_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.missing import _maybe_fill, isna + +import pandas.core.algorithms as algorithms +from pandas.core.base import SelectionMixin +import pandas.core.common as com +from pandas.core.frame import DataFrame +from pandas.core.generic import NDFrame +from pandas.core.groupby import base, grouper +from pandas.core.indexes.api import Index, MultiIndex, ensure_index +from pandas.core.series import Series +from pandas.core.sorting import ( + compress_group_index, + decons_obs_group_ids, + get_flattened_iterator, + get_group_index, + get_group_index_sorter, + get_indexer_dict, +) + + +class BaseGrouper: + """ + This is an internal Grouper class, which actually holds + the generated groups + + Parameters + ---------- + axis : Index + groupings : Sequence[Grouping] + all the grouping instances to handle in this grouper + for example for grouper list to groupby, need to pass the list + sort : bool, default True + whether this grouper will give sorted result or not + group_keys : bool, default True + mutated : bool, default False + indexer : intp array, optional + the indexer created by Grouper + some groupers (TimeGrouper) will sort its axis and its + group_info is also sorted, so need the indexer to reorder + + """ + + def __init__( + self, + axis: Index, + groupings: "Sequence[grouper.Grouping]", + sort: bool = True, + group_keys: bool = True, + mutated: bool = False, + indexer: Optional[np.ndarray] = None, + ): + assert isinstance(axis, Index), axis + + self._filter_empty_groups = self.compressed = len(groupings) != 1 + self.axis = axis + self._groupings: List[grouper.Grouping] = list(groupings) + self.sort = sort + self.group_keys = group_keys + self.mutated = mutated + self.indexer = indexer + + @property + def groupings(self) -> List["grouper.Grouping"]: + return self._groupings + + @property + def shape(self): + return tuple(ping.ngroups for ping in self.groupings) + + def __iter__(self): + return iter(self.indices) + + @property + def nkeys(self) -> int: + return len(self.groupings) + + def get_iterator(self, data: FrameOrSeries, axis: int = 0): + """ + Groupby iterator + + Returns + ------- + Generator yielding sequence of (name, subsetted object) + for each group + """ + splitter = self._get_splitter(data, axis=axis) + keys = self._get_group_keys() + for key, (i, group) in zip(keys, splitter): + yield key, group + + def _get_splitter(self, data: FrameOrSeries, axis: int = 0) -> "DataSplitter": + comp_ids, _, ngroups = self.group_info + return get_splitter(data, comp_ids, ngroups, axis=axis) + + def _get_grouper(self): + """ + We are a grouper as part of another's groupings. + + We have a specific method of grouping, so cannot + convert to a Index for our grouper. + """ + return self.groupings[0].grouper + + def _get_group_keys(self): + if len(self.groupings) == 1: + return self.levels[0] + else: + comp_ids, _, ngroups = self.group_info + + # provide "flattened" iterator for multi-group setting + return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes) + + def apply(self, f, data: FrameOrSeries, axis: int = 0): + mutated = self.mutated + splitter = self._get_splitter(data, axis=axis) + group_keys = self._get_group_keys() + result_values = None + + sdata: FrameOrSeries = splitter._get_sorted_data() + if sdata.ndim == 2 and np.any(sdata.dtypes.apply(is_extension_array_dtype)): + # calling splitter.fast_apply will raise TypeError via apply_frame_axis0 + # if we pass EA instead of ndarray + # TODO: can we have a workaround for EAs backed by ndarray? + pass + + elif ( + com.get_callable_name(f) not in base.plotting_methods + and isinstance(splitter, FrameSplitter) + and axis == 0 + # fast_apply/libreduction doesn't allow non-numpy backed indexes + and not sdata.index._has_complex_internals + ): + try: + result_values, mutated = splitter.fast_apply(f, group_keys) + + except libreduction.InvalidApply as err: + # This Exception is raised if `f` triggers an exception + # but it is preferable to raise the exception in Python. + if "Let this error raise above us" not in str(err): + # TODO: can we infer anything about whether this is + # worth-retrying in pure-python? + raise + + else: + # If the fast apply path could be used we can return here. + # Otherwise we need to fall back to the slow implementation. + if len(result_values) == len(group_keys): + return group_keys, result_values, mutated + + for key, (i, group) in zip(group_keys, splitter): + object.__setattr__(group, "name", key) + + # result_values is None if fast apply path wasn't taken + # or fast apply aborted with an unexpected exception. + # In either case, initialize the result list and perform + # the slow iteration. + if result_values is None: + result_values = [] + + # If result_values is not None we're in the case that the + # fast apply loop was broken prematurely but we have + # already the result for the first group which we can reuse. + elif i == 0: + continue + + # group might be modified + group_axes = group.axes + res = f(group) + if not _is_indexed_like(res, group_axes): + mutated = True + result_values.append(res) + + return group_keys, result_values, mutated + + @cache_readonly + def indices(self): + """ dict {group name -> group indices} """ + if len(self.groupings) == 1: + return self.groupings[0].indices + else: + codes_list = [ping.codes for ping in self.groupings] + keys = [com.values_from_object(ping.group_index) for ping in self.groupings] + return get_indexer_dict(codes_list, keys) + + @property + def codes(self) -> List[np.ndarray]: + return [ping.codes for ping in self.groupings] + + @property + def levels(self) -> List[Index]: + return [ping.group_index for ping in self.groupings] + + @property + def names(self): + return [ping.name for ping in self.groupings] + + def size(self) -> Series: + """ + Compute group sizes. + """ + ids, _, ngroup = self.group_info + ids = ensure_platform_int(ids) + if ngroup: + out = np.bincount(ids[ids != -1], minlength=ngroup) + else: + out = [] + return Series(out, index=self.result_index, dtype="int64") + + @cache_readonly + def groups(self): + """ dict {group name -> group labels} """ + if len(self.groupings) == 1: + return self.groupings[0].groups + else: + to_groupby = zip(*(ping.grouper for ping in self.groupings)) + to_groupby = Index(to_groupby) + return self.axis.groupby(to_groupby) + + @cache_readonly + def is_monotonic(self) -> bool: + # return if my group orderings are monotonic + return Index(self.group_info[0]).is_monotonic + + @cache_readonly + def group_info(self): + comp_ids, obs_group_ids = self._get_compressed_codes() + + ngroups = len(obs_group_ids) + comp_ids = ensure_int64(comp_ids) + return comp_ids, obs_group_ids, ngroups + + @cache_readonly + def codes_info(self) -> np.ndarray: + # return the codes of items in original grouped axis + codes, _, _ = self.group_info + if self.indexer is not None: + sorter = np.lexsort((codes, self.indexer)) + codes = codes[sorter] + return codes + + def _get_compressed_codes(self) -> Tuple[np.ndarray, np.ndarray]: + all_codes = self.codes + if len(all_codes) > 1: + group_index = get_group_index(all_codes, self.shape, sort=True, xnull=True) + return compress_group_index(group_index, sort=self.sort) + + ping = self.groupings[0] + return ping.codes, np.arange(len(ping.group_index)) + + @cache_readonly + def ngroups(self) -> int: + return len(self.result_index) + + @property + def reconstructed_codes(self) -> List[np.ndarray]: + codes = self.codes + comp_ids, obs_ids, _ = self.group_info + return decons_obs_group_ids(comp_ids, obs_ids, self.shape, codes, xnull=True) + + @cache_readonly + def result_index(self) -> Index: + if not self.compressed and len(self.groupings) == 1: + return self.groupings[0].result_index.rename(self.names[0]) + + codes = self.reconstructed_codes + levels = [ping.result_index for ping in self.groupings] + result = MultiIndex( + levels=levels, codes=codes, verify_integrity=False, names=self.names + ) + return result + + def get_group_levels(self): + if not self.compressed and len(self.groupings) == 1: + return [self.groupings[0].result_index] + + name_list = [] + for ping, codes in zip(self.groupings, self.reconstructed_codes): + codes = ensure_platform_int(codes) + levels = ping.result_index.take(codes) + + name_list.append(levels) + + return name_list + + # ------------------------------------------------------------ + # Aggregation functions + + _cython_functions = { + "aggregate": { + "add": "group_add", + "prod": "group_prod", + "min": "group_min", + "max": "group_max", + "mean": "group_mean", + "median": "group_median", + "var": "group_var", + "first": "group_nth", + "last": "group_last", + "ohlc": "group_ohlc", + }, + "transform": { + "cumprod": "group_cumprod", + "cumsum": "group_cumsum", + "cummin": "group_cummin", + "cummax": "group_cummax", + "rank": "group_rank", + }, + } + + _cython_arity = {"ohlc": 4} # OHLC + + _name_functions = {"ohlc": ["open", "high", "low", "close"]} + + def _is_builtin_func(self, arg): + """ + if we define an builtin function for this argument, return it, + otherwise return the arg + """ + return SelectionMixin._builtin_table.get(arg, arg) + + def _get_cython_function(self, kind: str, how: str, values, is_numeric: bool): + + dtype_str = values.dtype.name + ftype = self._cython_functions[kind][how] + + # see if there is a fused-type version of function + # only valid for numeric + f = getattr(libgroupby, ftype, None) + if f is not None and is_numeric: + return f + + # otherwise find dtype-specific version, falling back to object + for dt in [dtype_str, "object"]: + f2 = getattr(libgroupby, f"{ftype}_{dt}", None) + if f2 is not None: + return f2 + + if hasattr(f, "__signatures__"): + # inspect what fused types are implemented + if dtype_str == "object" and "object" not in f.__signatures__: + # disallow this function so we get a NotImplementedError below + # instead of a TypeError at runtime + f = None + + func = f + + if func is None: + raise NotImplementedError( + f"function is not implemented for this dtype: " + f"[how->{how},dtype->{dtype_str}]" + ) + + return func + + def _get_cython_func_and_vals( + self, kind: str, how: str, values: np.ndarray, is_numeric: bool + ): + """ + Find the appropriate cython function, casting if necessary. + + Parameters + ---------- + kind : sttr + how : srt + values : np.ndarray + is_numeric : bool + + Returns + ------- + func : callable + values : np.ndarray + """ + try: + func = self._get_cython_function(kind, how, values, is_numeric) + except NotImplementedError: + if is_numeric: + try: + values = ensure_float64(values) + except TypeError: + if lib.infer_dtype(values, skipna=False) == "complex": + values = values.astype(complex) + else: + raise + func = self._get_cython_function(kind, how, values, is_numeric) + else: + raise + return func, values + + def _cython_operation( + self, kind: str, values, how: str, axis, min_count: int = -1, **kwargs + ) -> Tuple[np.ndarray, Optional[List[str]]]: + """ + Returns the values of a cython operation as a Tuple of [data, names]. + + Names is only useful when dealing with 2D results, like ohlc + (see self._name_functions). + """ + + assert kind in ["transform", "aggregate"] + orig_values = values + + if values.ndim > 2: + raise NotImplementedError("number of dimensions is currently limited to 2") + elif values.ndim == 2: + # Note: it is *not* the case that axis is always 0 for 1-dim values, + # as we can have 1D ExtensionArrays that we need to treat as 2D + assert axis == 1, axis + + # can we do this operation with our cython functions + # if not raise NotImplementedError + + # we raise NotImplemented if this is an invalid operation + # entirely, e.g. adding datetimes + + # categoricals are only 1d, so we + # are not setup for dim transforming + if is_categorical_dtype(values) or is_sparse(values): + raise NotImplementedError(f"{values.dtype} dtype not supported") + elif is_datetime64_any_dtype(values): + if how in ["add", "prod", "cumsum", "cumprod"]: + raise NotImplementedError( + f"datetime64 type does not support {how} operations" + ) + elif is_timedelta64_dtype(values): + if how in ["prod", "cumprod"]: + raise NotImplementedError( + f"timedelta64 type does not support {how} operations" + ) + + if is_datetime64tz_dtype(values.dtype): + # Cast to naive; we'll cast back at the end of the function + # TODO: possible need to reshape? kludge can be avoided when + # 2D EA is allowed. + values = values.view("M8[ns]") + + is_datetimelike = needs_i8_conversion(values.dtype) + is_numeric = is_numeric_dtype(values.dtype) + + if is_datetimelike: + values = values.view("int64") + is_numeric = True + elif is_bool_dtype(values.dtype): + values = ensure_float64(values) + elif is_integer_dtype(values): + # we use iNaT for the missing value on ints + # so pre-convert to guard this condition + if (values == iNaT).any(): + values = ensure_float64(values) + else: + values = ensure_int_or_float(values) + elif is_numeric and not is_complex_dtype(values): + values = ensure_float64(values) + else: + values = values.astype(object) + + arity = self._cython_arity.get(how, 1) + + vdim = values.ndim + swapped = False + if vdim == 1: + values = values[:, None] + out_shape = (self.ngroups, arity) + else: + if axis > 0: + swapped = True + assert axis == 1, axis + values = values.T + if arity > 1: + raise NotImplementedError( + "arity of more than 1 is not supported for the 'how' argument" + ) + out_shape = (self.ngroups,) + values.shape[1:] + + func, values = self._get_cython_func_and_vals(kind, how, values, is_numeric) + + if how == "rank": + out_dtype = "float" + else: + if is_numeric: + out_dtype = f"{values.dtype.kind}{values.dtype.itemsize}" + else: + out_dtype = "object" + + codes, _, _ = self.group_info + + if kind == "aggregate": + result = _maybe_fill( + np.empty(out_shape, dtype=out_dtype), fill_value=np.nan + ) + counts = np.zeros(self.ngroups, dtype=np.int64) + result = self._aggregate( + result, counts, values, codes, func, is_datetimelike, min_count + ) + elif kind == "transform": + result = _maybe_fill( + np.empty_like(values, dtype=out_dtype), fill_value=np.nan + ) + + # TODO: min_count + result = self._transform( + result, values, codes, func, is_datetimelike, **kwargs + ) + + if is_integer_dtype(result) and not is_datetimelike: + mask = result == iNaT + if mask.any(): + result = result.astype("float64") + result[mask] = np.nan + elif ( + how == "add" + and is_integer_dtype(orig_values.dtype) + and is_extension_array_dtype(orig_values.dtype) + ): + # We need this to ensure that Series[Int64Dtype].resample().sum() + # remains int64 dtype. + # Two options for avoiding this special case + # 1. mask-aware ops and avoid casting to float with NaN above + # 2. specify the result dtype when calling this method + result = result.astype("int64") + + if kind == "aggregate" and self._filter_empty_groups and not counts.all(): + assert result.ndim != 2 + result = result[counts > 0] + + if vdim == 1 and arity == 1: + result = result[:, 0] + + names: Optional[List[str]] = self._name_functions.get(how, None) + + if swapped: + result = result.swapaxes(0, axis) + + if is_datetime64tz_dtype(orig_values.dtype) or is_period_dtype( + orig_values.dtype + ): + # We need to use the constructors directly for these dtypes + # since numpy won't recognize them + # https://github.com/pandas-dev/pandas/issues/31471 + result = type(orig_values)(result.astype(np.int64), dtype=orig_values.dtype) + elif is_datetimelike and kind == "aggregate": + result = result.astype(orig_values.dtype) + + return result, names + + def aggregate( + self, values, how: str, axis: int = 0, min_count: int = -1 + ) -> Tuple[np.ndarray, Optional[List[str]]]: + return self._cython_operation( + "aggregate", values, how, axis, min_count=min_count + ) + + def transform(self, values, how: str, axis: int = 0, **kwargs): + return self._cython_operation("transform", values, how, axis, **kwargs) + + def _aggregate( + self, + result, + counts, + values, + comp_ids, + agg_func, + is_datetimelike: bool, + min_count: int = -1, + ): + if agg_func is libgroupby.group_nth: + # different signature from the others + # TODO: should we be using min_count instead of hard-coding it? + agg_func(result, counts, values, comp_ids, rank=1, min_count=-1) + else: + agg_func(result, counts, values, comp_ids, min_count) + + return result + + def _transform( + self, result, values, comp_ids, transform_func, is_datetimelike: bool, **kwargs + ): + + comp_ids, _, ngroups = self.group_info + transform_func(result, values, comp_ids, ngroups, is_datetimelike, **kwargs) + + return result + + def agg_series(self, obj: Series, func): + # Caller is responsible for checking ngroups != 0 + assert self.ngroups != 0 + + if len(obj) == 0: + # SeriesGrouper would raise if we were to call _aggregate_series_fast + return self._aggregate_series_pure_python(obj, func) + + elif is_extension_array_dtype(obj.dtype): + # _aggregate_series_fast would raise TypeError when + # calling libreduction.Slider + # In the datetime64tz case it would incorrectly cast to tz-naive + # TODO: can we get a performant workaround for EAs backed by ndarray? + return self._aggregate_series_pure_python(obj, func) + + elif obj.index._has_complex_internals: + # Pre-empt TypeError in _aggregate_series_fast + return self._aggregate_series_pure_python(obj, func) + + try: + return self._aggregate_series_fast(obj, func) + except ValueError as err: + if "Function does not reduce" in str(err): + # raised in libreduction + pass + else: + raise + return self._aggregate_series_pure_python(obj, func) + + def _aggregate_series_fast(self, obj: Series, func): + # At this point we have already checked that + # - obj.index is not a MultiIndex + # - obj is backed by an ndarray, not ExtensionArray + # - len(obj) > 0 + # - ngroups != 0 + func = self._is_builtin_func(func) + + group_index, _, ngroups = self.group_info + + # avoids object / Series creation overhead + dummy = obj._get_values(slice(None, 0)) + indexer = get_group_index_sorter(group_index, ngroups) + obj = obj.take(indexer) + group_index = algorithms.take_nd(group_index, indexer, allow_fill=False) + grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups, dummy) + result, counts = grouper.get_result() + return result, counts + + def _aggregate_series_pure_python(self, obj: Series, func): + + group_index, _, ngroups = self.group_info + + counts = np.zeros(ngroups, dtype=int) + result = None + + splitter = get_splitter(obj, group_index, ngroups, axis=0) + + for label, group in splitter: + res = func(group) + if result is None: + if isinstance(res, (Series, Index, np.ndarray)): + if len(res) == 1: + # e.g. test_agg_lambda_with_timezone lambda e: e.head(1) + # FIXME: are we potentially losing import res.index info? + res = res.item() + else: + raise ValueError("Function does not reduce") + result = np.empty(ngroups, dtype="O") + + counts[label] = group.shape[0] + result[label] = res + + assert result is not None + result = lib.maybe_convert_objects(result, try_float=0) + # TODO: try_cast back to EA? + + return result, counts + + +class BinGrouper(BaseGrouper): + """ + This is an internal Grouper class + + Parameters + ---------- + bins : the split index of binlabels to group the item of axis + binlabels : the label list + filter_empty : boolean, default False + mutated : boolean, default False + indexer : a intp array + + Examples + -------- + bins: [2, 4, 6, 8, 10] + binlabels: DatetimeIndex(['2005-01-01', '2005-01-03', + '2005-01-05', '2005-01-07', '2005-01-09'], + dtype='datetime64[ns]', freq='2D') + + the group_info, which contains the label of each item in grouped + axis, the index of label in label list, group number, is + + (array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]), array([0, 1, 2, 3, 4]), 5) + + means that, the grouped axis has 10 items, can be grouped into 5 + labels, the first and second items belong to the first label, the + third and forth items belong to the second label, and so on + + """ + + def __init__( + self, + bins, + binlabels, + filter_empty: bool = False, + mutated: bool = False, + indexer=None, + ): + self.bins = ensure_int64(bins) + self.binlabels = ensure_index(binlabels) + self._filter_empty_groups = filter_empty + self.mutated = mutated + self.indexer = indexer + + # These lengths must match, otherwise we could call agg_series + # with empty self.bins, which would raise in libreduction. + assert len(self.binlabels) == len(self.bins) + + @cache_readonly + def groups(self): + """ dict {group name -> group labels} """ + + # this is mainly for compat + # GH 3881 + result = { + key: value + for key, value in zip(self.binlabels, self.bins) + if key is not NaT + } + return result + + @property + def nkeys(self) -> int: + return 1 + + def _get_grouper(self): + """ + We are a grouper as part of another's groupings. + + We have a specific method of grouping, so cannot + convert to a Index for our grouper. + """ + return self + + def get_iterator(self, data: FrameOrSeries, axis: int = 0): + """ + Groupby iterator + + Returns + ------- + Generator yielding sequence of (name, subsetted object) + for each group + """ + slicer = lambda start, edge: data._slice(slice(start, edge), axis=axis) + length = len(data.axes[axis]) + + start = 0 + for edge, label in zip(self.bins, self.binlabels): + if label is not NaT: + yield label, slicer(start, edge) + start = edge + + if start < length: + yield self.binlabels[-1], slicer(start, None) + + @cache_readonly + def indices(self): + indices = collections.defaultdict(list) + + i = 0 + for label, bin in zip(self.binlabels, self.bins): + if i < bin: + if label is not NaT: + indices[label] = list(range(i, bin)) + i = bin + return indices + + @cache_readonly + def group_info(self): + ngroups = self.ngroups + obs_group_ids = np.arange(ngroups) + rep = np.diff(np.r_[0, self.bins]) + + rep = ensure_platform_int(rep) + if ngroups == len(self.bins): + comp_ids = np.repeat(np.arange(ngroups), rep) + else: + comp_ids = np.repeat(np.r_[-1, np.arange(ngroups)], rep) + + return ( + comp_ids.astype("int64", copy=False), + obs_group_ids.astype("int64", copy=False), + ngroups, + ) + + @cache_readonly + def reconstructed_codes(self) -> List[np.ndarray]: + # get unique result indices, and prepend 0 as groupby starts from the first + return [np.r_[0, np.flatnonzero(self.bins[1:] != self.bins[:-1]) + 1]] + + @cache_readonly + def result_index(self): + if len(self.binlabels) != 0 and isna(self.binlabels[0]): + return self.binlabels[1:] + + return self.binlabels + + @property + def levels(self): + return [self.binlabels] + + @property + def names(self): + return [self.binlabels.name] + + @property + def groupings(self) -> "List[grouper.Grouping]": + return [ + grouper.Grouping(lvl, lvl, in_axis=False, level=None, name=name) + for lvl, name in zip(self.levels, self.names) + ] + + def agg_series(self, obj: Series, func): + # Caller is responsible for checking ngroups != 0 + assert self.ngroups != 0 + assert len(self.bins) > 0 # otherwise we'd get IndexError in get_result + + if is_extension_array_dtype(obj.dtype): + # pre-empt SeriesBinGrouper from raising TypeError + return self._aggregate_series_pure_python(obj, func) + + dummy = obj[:0] + grouper = libreduction.SeriesBinGrouper(obj, func, self.bins, dummy) + return grouper.get_result() + + +def _is_indexed_like(obj, axes) -> bool: + if isinstance(obj, Series): + if len(axes) > 1: + return False + return obj.index.equals(axes[0]) + elif isinstance(obj, DataFrame): + return obj.index.equals(axes[0]) + + return False + + +# ---------------------------------------------------------------------- +# Splitting / application + + +class DataSplitter: + def __init__(self, data: FrameOrSeries, labels, ngroups: int, axis: int = 0): + self.data = data + self.labels = ensure_int64(labels) + self.ngroups = ngroups + + self.axis = axis + assert isinstance(axis, int), axis + + @cache_readonly + def slabels(self): + # Sorted labels + return algorithms.take_nd(self.labels, self.sort_idx, allow_fill=False) + + @cache_readonly + def sort_idx(self): + # Counting sort indexer + return get_group_index_sorter(self.labels, self.ngroups) + + def __iter__(self): + sdata = self._get_sorted_data() + + if self.ngroups == 0: + # we are inside a generator, rather than raise StopIteration + # we merely return signal the end + return + + starts, ends = lib.generate_slices(self.slabels, self.ngroups) + + for i, (start, end) in enumerate(zip(starts, ends)): + yield i, self._chop(sdata, slice(start, end)) + + def _get_sorted_data(self) -> FrameOrSeries: + return self.data.take(self.sort_idx, axis=self.axis) + + def _chop(self, sdata, slice_obj: slice) -> NDFrame: + raise AbstractMethodError(self) + + +class SeriesSplitter(DataSplitter): + def _chop(self, sdata: Series, slice_obj: slice) -> Series: + return sdata._get_values(slice_obj) + + +class FrameSplitter(DataSplitter): + def fast_apply(self, f, names): + # must return keys::list, values::list, mutated::bool + starts, ends = lib.generate_slices(self.slabels, self.ngroups) + + sdata = self._get_sorted_data() + return libreduction.apply_frame_axis0(sdata, f, names, starts, ends) + + def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: + if self.axis == 0: + return sdata.iloc[slice_obj] + else: + return sdata._slice(slice_obj, axis=1) + + +def get_splitter(data: FrameOrSeries, *args, **kwargs) -> DataSplitter: + if isinstance(data, Series): + klass: Type[DataSplitter] = SeriesSplitter + else: + # i.e. DataFrame + klass = FrameSplitter + + return klass(data, *args, **kwargs) diff --git a/pandas/core/index.py b/pandas/core/index.py new file mode 100644 index 00000000..8cff53d7 --- /dev/null +++ b/pandas/core/index.py @@ -0,0 +1,31 @@ +import warnings + +from pandas.core.indexes.api import ( # noqa:F401 + CategoricalIndex, + DatetimeIndex, + Float64Index, + Index, + Int64Index, + IntervalIndex, + InvalidIndexError, + MultiIndex, + NaT, + NumericIndex, + PeriodIndex, + RangeIndex, + TimedeltaIndex, + UInt64Index, + _new_Index, + ensure_index, + ensure_index_from_sequences, + get_objs_combined_axis, +) +from pandas.core.indexes.multi import _sparsify # noqa:F401 + +# GH#30193 +warnings.warn( + "pandas.core.index is deprecated and will be removed in a future version. " + "The public classes are available in the top-level namespace.", + FutureWarning, + stacklevel=2, +) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py new file mode 100644 index 00000000..e9bdc99c --- /dev/null +++ b/pandas/core/indexers.py @@ -0,0 +1,414 @@ +""" +Low-dependency indexing utilities. +""" +import warnings + +import numpy as np + +from pandas._typing import Any, AnyArrayLike + +from pandas.core.dtypes.common import ( + is_array_like, + is_bool_dtype, + is_extension_array_dtype, + is_integer_dtype, + is_list_like, +) +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries + +# ----------------------------------------------------------- +# Indexer Identification + + +def is_list_like_indexer(key) -> bool: + """ + Check if we have a list-like indexer that is *not* a NamedTuple. + + Parameters + ---------- + key : object + + Returns + ------- + bool + """ + # allow a list_like, but exclude NamedTuples which can be indexers + return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) + + +def is_scalar_indexer(indexer, arr_value) -> bool: + """ + Return True if we are all scalar indexers. + + Returns + ------- + bool + """ + if arr_value.ndim == 1: + if not isinstance(indexer, tuple): + indexer = tuple([indexer]) + return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) + return False + + +def is_empty_indexer(indexer, arr_value: np.ndarray) -> bool: + """ + Check if we have an empty indexer. + + Parameters + ---------- + indexer : object + arr_value : np.ndarray + + Returns + ------- + bool + """ + if is_list_like(indexer) and not len(indexer): + return True + if arr_value.ndim == 1: + if not isinstance(indexer, tuple): + indexer = tuple([indexer]) + return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) + return False + + +# ----------------------------------------------------------- +# Indexer Validation + + +def check_setitem_lengths(indexer, value, values) -> None: + """ + Validate that value and indexer are the same length. + + An special-case is allowed for when the indexer is a boolean array + and the number of true values equals the length of ``value``. In + this case, no exception is raised. + + Parameters + ---------- + indexer : sequence + Key for the setitem. + value : array-like + Value for the setitem. + values : array-like + Values being set into. + + Returns + ------- + None + + Raises + ------ + ValueError + When the indexer is an ndarray or list and the lengths don't match. + """ + # boolean with truth values == len of the value is ok too + if isinstance(indexer, (np.ndarray, list)): + if is_list_like(value) and len(indexer) != len(value): + if not ( + isinstance(indexer, np.ndarray) + and indexer.dtype == np.bool_ + and len(indexer[indexer]) == len(value) + ): + raise ValueError( + "cannot set using a list-like indexer " + "with a different length than the value" + ) + + elif isinstance(indexer, slice): + # slice + if is_list_like(value) and len(values): + if len(value) != length_of_indexer(indexer, values): + raise ValueError( + "cannot set using a slice indexer with a " + "different length than the value" + ) + + +def validate_indices(indices: np.ndarray, n: int) -> None: + """ + Perform bounds-checking for an indexer. + + -1 is allowed for indicating missing values. + + Parameters + ---------- + indices : ndarray + n : int + Length of the array being indexed. + + Raises + ------ + ValueError + + Examples + -------- + >>> validate_indices([1, 2], 3) + # OK + >>> validate_indices([1, -2], 3) + ValueError + >>> validate_indices([1, 2, 3], 3) + IndexError + >>> validate_indices([-1, -1], 0) + # OK + >>> validate_indices([0, 1], 0) + IndexError + """ + if len(indices): + min_idx = indices.min() + if min_idx < -1: + msg = f"'indices' contains values less than allowed ({min_idx} < -1)" + raise ValueError(msg) + + max_idx = indices.max() + if max_idx >= n: + raise IndexError("indices are out-of-bounds") + + +# ----------------------------------------------------------- +# Indexer Conversion + + +def maybe_convert_indices(indices, n: int): + """ + Attempt to convert indices into valid, positive indices. + + If we have negative indices, translate to positive here. + If we have indices that are out-of-bounds, raise an IndexError. + + Parameters + ---------- + indices : array-like + Array of indices that we are to convert. + n : int + Number of elements in the array that we are indexing. + + Returns + ------- + array-like + An array-like of positive indices that correspond to the ones + that were passed in initially to this function. + + Raises + ------ + IndexError + One of the converted indices either exceeded the number of, + elements (specified by `n`), or was still negative. + """ + if isinstance(indices, list): + indices = np.array(indices) + if len(indices) == 0: + # If `indices` is empty, np.array will return a float, + # and will cause indexing errors. + return np.empty(0, dtype=np.intp) + + mask = indices < 0 + if mask.any(): + indices = indices.copy() + indices[mask] += n + + mask = (indices >= n) | (indices < 0) + if mask.any(): + raise IndexError("indices are out-of-bounds") + return indices + + +# ----------------------------------------------------------- +# Unsorted + + +def length_of_indexer(indexer, target=None) -> int: + """ + Return the length of a single non-tuple indexer which could be a slice. + + Returns + ------- + int + """ + if target is not None and isinstance(indexer, slice): + target_len = len(target) + start = indexer.start + stop = indexer.stop + step = indexer.step + if start is None: + start = 0 + elif start < 0: + start += target_len + if stop is None or stop > target_len: + stop = target_len + elif stop < 0: + stop += target_len + if step is None: + step = 1 + elif step < 0: + start, stop = stop + 1, start + 1 + step = -step + return (stop - start + step - 1) // step + elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)): + return len(indexer) + elif not is_list_like_indexer(indexer): + return 1 + raise AssertionError("cannot find the length of the indexer") + + +def deprecate_ndim_indexing(result): + """ + Helper function to raise the deprecation warning for multi-dimensional + indexing on 1D Series/Index. + + GH#27125 indexer like idx[:, None] expands dim, but we cannot do that + and keep an index, so we currently return ndarray, which is deprecated + (Deprecation GH#30588). + """ + if np.ndim(result) > 1: + warnings.warn( + "Support for multi-dimensional indexing (e.g. `index[:, None]`) " + "on an Index is deprecated and will be removed in a future " + "version. Convert to a numpy array before indexing instead.", + DeprecationWarning, + stacklevel=3, + ) + + +# ----------------------------------------------------------- +# Public indexer validation + + +def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: + """ + Check if `indexer` is a valid array indexer for `array`. + + For a boolean mask, `array` and `indexer` are checked to have the same + length. The dtype is validated, and if it is an integer or boolean + ExtensionArray, it is checked if there are missing values present, and + it is converted to the appropriate numpy array. Other dtypes will raise + an error. + + Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed + through as is. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + array : array-like + The array that is being indexed (only used for the length). + indexer : array-like or list-like + The array-like that's used to index. List-like input that is not yet + a numpy array or an ExtensionArray is converted to one. Other input + types are passed through as is + + Returns + ------- + numpy.ndarray + The validated indexer as a numpy array that can be used to index. + + Raises + ------ + IndexError + When the lengths don't match. + ValueError + When `indexer` cannot be converted to a numpy ndarray to index + (e.g. presence of missing values). + + See Also + -------- + api.types.is_bool_dtype : Check if `key` is of boolean dtype. + + Examples + -------- + When checking a boolean mask, a boolean ndarray is returned when the + arguments are all valid. + + >>> mask = pd.array([True, False]) + >>> arr = pd.array([1, 2]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + array([ True, False]) + + An IndexError is raised when the lengths don't match. + + >>> mask = pd.array([True, False, True]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + Traceback (most recent call last): + ... + IndexError: Boolean index has wrong length: 3 instead of 2. + + NA values in a boolean array are treated as False. + + >>> mask = pd.array([True, pd.NA]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + array([ True, False]) + + A numpy boolean mask will get passed through (if the length is correct): + + >>> mask = np.array([True, False]) + >>> pd.api.indexers.check_array_indexer(arr, mask) + array([ True, False]) + + Similarly for integer indexers, an integer ndarray is returned when it is + a valid indexer, otherwise an error is (for integer indexers, a matching + length is not required): + + >>> indexer = pd.array([0, 2], dtype="Int64") + >>> arr = pd.array([1, 2, 3]) + >>> pd.api.indexers.check_array_indexer(arr, indexer) + array([0, 2]) + + >>> indexer = pd.array([0, pd.NA], dtype="Int64") + >>> pd.api.indexers.check_array_indexer(arr, indexer) + Traceback (most recent call last): + ... + ValueError: Cannot index with an integer indexer containing NA values + + For non-integer/boolean dtypes, an appropriate error is raised: + + >>> indexer = np.array([0., 2.], dtype="float64") + >>> pd.api.indexers.check_array_indexer(arr, indexer) + Traceback (most recent call last): + ... + IndexError: arrays used as indices must be of integer or boolean type + """ + from pandas.core.construction import array as pd_array + + # whathever is not an array-like is returned as-is (possible valid array + # indexers that are not array-like: integer, slice, Ellipsis, None) + # In this context, tuples are not considered as array-like, as they have + # a specific meaning in indexing (multi-dimensional indexing) + if is_list_like(indexer): + if isinstance(indexer, tuple): + return indexer + else: + return indexer + + # convert list-likes to array + if not is_array_like(indexer): + indexer = pd_array(indexer) + if len(indexer) == 0: + # empty list is converted to float array by pd.array + indexer = np.array([], dtype=np.intp) + + dtype = indexer.dtype + if is_bool_dtype(dtype): + if is_extension_array_dtype(dtype): + indexer = indexer.to_numpy(dtype=bool, na_value=False) + else: + indexer = np.asarray(indexer, dtype=bool) + + # GH26658 + if len(indexer) != len(array): + raise IndexError( + f"Boolean index has wrong length: " + f"{len(indexer)} instead of {len(array)}" + ) + elif is_integer_dtype(dtype): + try: + indexer = np.asarray(indexer, dtype=np.intp) + except ValueError: + raise ValueError( + "Cannot index with an integer indexer containing NA values" + ) + else: + raise IndexError("arrays used as indices must be of integer or boolean type") + + return indexer diff --git a/pandas/core/indexes/__init__.py b/pandas/core/indexes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py new file mode 100644 index 00000000..db774a03 --- /dev/null +++ b/pandas/core/indexes/accessors.py @@ -0,0 +1,338 @@ +""" +datetimelike delegation +""" +import numpy as np + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_datetime_arraylike, + is_integer_dtype, + is_list_like, + is_period_arraylike, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ABCSeries + +from pandas.core.accessor import PandasDelegate, delegate_names +from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray +from pandas.core.base import NoNewAttributesMixin, PandasObject +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex + + +class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): + def __init__(self, data, orig): + if not isinstance(data, ABCSeries): + raise TypeError( + f"cannot convert an object of type {type(data)} to a datetimelike index" + ) + + self._parent = data + self.orig = orig + self.name = getattr(data, "name", None) + self._freeze() + + def _get_values(self): + data = self._parent + if is_datetime64_dtype(data.dtype): + return DatetimeIndex(data, copy=False, name=self.name) + + elif is_datetime64tz_dtype(data.dtype): + return DatetimeIndex(data, copy=False, name=self.name) + + elif is_timedelta64_dtype(data.dtype): + return TimedeltaIndex(data, copy=False, name=self.name) + + else: + if is_period_arraylike(data): + # TODO: use to_period_array + return PeriodArray(data, copy=False) + if is_datetime_arraylike(data): + return DatetimeIndex(data, copy=False, name=self.name) + + raise TypeError( + f"cannot convert an object of type {type(data)} to a datetimelike index" + ) + + def _delegate_property_get(self, name): + from pandas import Series + + values = self._get_values() + + result = getattr(values, name) + + # maybe need to upcast (ints) + if isinstance(result, np.ndarray): + if is_integer_dtype(result): + result = result.astype("int64") + elif not is_list_like(result): + return result + + result = np.asarray(result) + + if self.orig is not None: + index = self.orig.index + else: + index = self._parent.index + # return the result as a Series, which is by definition a copy + result = Series(result, index=index, name=self.name) + + # setting this object will show a SettingWithCopyWarning/Error + result._is_copy = ( + "modifications to a property of a datetimelike " + "object are not supported and are discarded. " + "Change values on the original." + ) + + return result + + def _delegate_property_set(self, name, value, *args, **kwargs): + raise ValueError( + "modifications to a property of a datetimelike object are not supported. " + "Change values on the original." + ) + + def _delegate_method(self, name, *args, **kwargs): + from pandas import Series + + values = self._get_values() + + method = getattr(values, name) + result = method(*args, **kwargs) + + if not is_list_like(result): + return result + + result = Series(result, index=self._parent.index, name=self.name) + + # setting this object will show a SettingWithCopyWarning/Error + result._is_copy = ( + "modifications to a method of a datetimelike " + "object are not supported and are discarded. " + "Change values on the original." + ) + + return result + + +@delegate_names( + delegate=DatetimeArray, accessors=DatetimeArray._datetimelike_ops, typ="property" +) +@delegate_names( + delegate=DatetimeArray, accessors=DatetimeArray._datetimelike_methods, typ="method" +) +class DatetimeProperties(Properties): + """ + Accessor object for datetimelike properties of the Series values. + + Examples + -------- + >>> s.dt.hour + >>> s.dt.second + >>> s.dt.quarter + + Returns a Series indexed like the original Series. + Raises TypeError if the Series does not contain datetimelike values. + """ + + def to_pydatetime(self): + """ + Return the data as an array of native Python datetime objects. + + Timezone information is retained if present. + + .. warning:: + + Python's datetime uses microsecond resolution, which is lower than + pandas (nanosecond). The values are truncated. + + Returns + ------- + numpy.ndarray + Object dtype array containing native Python datetime objects. + + See Also + -------- + datetime.datetime : Standard library value for a datetime. + + Examples + -------- + >>> s = pd.Series(pd.date_range('20180310', periods=2)) + >>> s + 0 2018-03-10 + 1 2018-03-11 + dtype: datetime64[ns] + + >>> s.dt.to_pydatetime() + array([datetime.datetime(2018, 3, 10, 0, 0), + datetime.datetime(2018, 3, 11, 0, 0)], dtype=object) + + pandas' nanosecond precision is truncated to microseconds. + + >>> s = pd.Series(pd.date_range('20180310', periods=2, freq='ns')) + >>> s + 0 2018-03-10 00:00:00.000000000 + 1 2018-03-10 00:00:00.000000001 + dtype: datetime64[ns] + + >>> s.dt.to_pydatetime() + array([datetime.datetime(2018, 3, 10, 0, 0), + datetime.datetime(2018, 3, 10, 0, 0)], dtype=object) + """ + return self._get_values().to_pydatetime() + + @property + def freq(self): + return self._get_values().inferred_freq + + +@delegate_names( + delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property" +) +@delegate_names( + delegate=TimedeltaArray, + accessors=TimedeltaArray._datetimelike_methods, + typ="method", +) +class TimedeltaProperties(Properties): + """ + Accessor object for datetimelike properties of the Series values. + + Examples + -------- + >>> s.dt.hours + >>> s.dt.seconds + + Returns a Series indexed like the original Series. + Raises TypeError if the Series does not contain datetimelike values. + """ + + def to_pytimedelta(self): + """ + Return an array of native `datetime.timedelta` objects. + + Python's standard `datetime` library uses a different representation + timedelta's. This method converts a Series of pandas Timedeltas + to `datetime.timedelta` format with the same length as the original + Series. + + Returns + ------- + numpy.ndarray + Array of 1D containing data with `datetime.timedelta` type. + + See Also + -------- + datetime.timedelta + + Examples + -------- + >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='d')) + >>> s + 0 0 days + 1 1 days + 2 2 days + 3 3 days + 4 4 days + dtype: timedelta64[ns] + + >>> s.dt.to_pytimedelta() + array([datetime.timedelta(0), datetime.timedelta(1), + datetime.timedelta(2), datetime.timedelta(3), + datetime.timedelta(4)], dtype=object) + """ + return self._get_values().to_pytimedelta() + + @property + def components(self): + """ + Return a Dataframe of the components of the Timedeltas. + + Returns + ------- + DataFrame + + Examples + -------- + >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='s')) + >>> s + 0 00:00:00 + 1 00:00:01 + 2 00:00:02 + 3 00:00:03 + 4 00:00:04 + dtype: timedelta64[ns] + >>> s.dt.components + days hours minutes seconds milliseconds microseconds nanoseconds + 0 0 0 0 0 0 0 0 + 1 0 0 0 1 0 0 0 + 2 0 0 0 2 0 0 0 + 3 0 0 0 3 0 0 0 + 4 0 0 0 4 0 0 0 + """ # noqa: E501 + return self._get_values().components.set_index(self._parent.index) + + @property + def freq(self): + return self._get_values().inferred_freq + + +@delegate_names( + delegate=PeriodArray, accessors=PeriodArray._datetimelike_ops, typ="property" +) +@delegate_names( + delegate=PeriodArray, accessors=PeriodArray._datetimelike_methods, typ="method" +) +class PeriodProperties(Properties): + """ + Accessor object for datetimelike properties of the Series values. + + Examples + -------- + >>> s.dt.hour + >>> s.dt.second + >>> s.dt.quarter + + Returns a Series indexed like the original Series. + Raises TypeError if the Series does not contain datetimelike values. + """ + + +class CombinedDatetimelikeProperties( + DatetimeProperties, TimedeltaProperties, PeriodProperties +): + def __new__(cls, data): + # CombinedDatetimelikeProperties isn't really instantiated. Instead + # we need to choose which parent (datetime or timedelta) is + # appropriate. Since we're checking the dtypes anyway, we'll just + # do all the validation here. + from pandas import Series + + if not isinstance(data, ABCSeries): + raise TypeError( + f"cannot convert an object of type {type(data)} to a datetimelike index" + ) + + orig = data if is_categorical_dtype(data) else None + if orig is not None: + data = Series( + orig.array, + name=orig.name, + copy=False, + dtype=orig.values.categories.dtype, + ) + + if is_datetime64_dtype(data.dtype): + return DatetimeProperties(data, orig) + elif is_datetime64tz_dtype(data.dtype): + return DatetimeProperties(data, orig) + elif is_timedelta64_dtype(data.dtype): + return TimedeltaProperties(data, orig) + elif is_period_arraylike(data): + return PeriodProperties(data, orig) + elif is_datetime_arraylike(data): + return DatetimeProperties(data, orig) + + raise AttributeError("Can only use .dt accessor with datetimelike values") diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py new file mode 100644 index 00000000..4072d06b --- /dev/null +++ b/pandas/core/indexes/api.py @@ -0,0 +1,301 @@ +import textwrap +from typing import List, Set + +from pandas._libs import NaT, lib + +import pandas.core.common as com +from pandas.core.indexes.base import ( + Index, + InvalidIndexError, + _new_Index, + ensure_index, + ensure_index_from_sequences, +) +from pandas.core.indexes.category import CategoricalIndex +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.interval import IntervalIndex +from pandas.core.indexes.multi import MultiIndex +from pandas.core.indexes.numeric import ( + Float64Index, + Int64Index, + NumericIndex, + UInt64Index, +) +from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.range import RangeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex + +_sort_msg = textwrap.dedent( + """\ +Sorting because non-concatenation axis is not aligned. A future version +of pandas will change to not sort by default. + +To accept the future behavior, pass 'sort=False'. + +To retain the current behavior and silence the warning, pass 'sort=True'. +""" +) + + +__all__ = [ + "Index", + "MultiIndex", + "NumericIndex", + "Float64Index", + "Int64Index", + "CategoricalIndex", + "IntervalIndex", + "RangeIndex", + "UInt64Index", + "InvalidIndexError", + "TimedeltaIndex", + "PeriodIndex", + "DatetimeIndex", + "_new_Index", + "NaT", + "ensure_index", + "ensure_index_from_sequences", + "get_objs_combined_axis", + "union_indexes", + "get_consensus_names", + "all_indexes_same", +] + + +def get_objs_combined_axis( + objs, intersect: bool = False, axis=0, sort: bool = True +) -> Index: + """ + Extract combined index: return intersection or union (depending on the + value of "intersect") of indexes on given axis, or None if all objects + lack indexes (e.g. they are numpy arrays). + + Parameters + ---------- + objs : list + Series or DataFrame objects, may be mix of the two. + intersect : bool, default False + If True, calculate the intersection between indexes. Otherwise, + calculate the union. + axis : {0 or 'index', 1 or 'outer'}, default 0 + The axis to extract indexes from. + sort : bool, default True + Whether the result index should come out sorted or not. + + Returns + ------- + Index + """ + obs_idxes = [obj._get_axis(axis) for obj in objs] + return _get_combined_index(obs_idxes, intersect=intersect, sort=sort) + + +def _get_distinct_objs(objs: List[Index]) -> List[Index]: + """ + Return a list with distinct elements of "objs" (different ids). + Preserves order. + """ + ids: Set[int] = set() + res = [] + for obj in objs: + if id(obj) not in ids: + ids.add(id(obj)) + res.append(obj) + return res + + +def _get_combined_index( + indexes: List[Index], intersect: bool = False, sort: bool = False +) -> Index: + """ + Return the union or intersection of indexes. + + Parameters + ---------- + indexes : list of Index or list objects + When intersect=True, do not accept list of lists. + intersect : bool, default False + If True, calculate the intersection between indexes. Otherwise, + calculate the union. + sort : bool, default False + Whether the result index should come out sorted or not. + + Returns + ------- + Index + """ + # TODO: handle index names! + indexes = _get_distinct_objs(indexes) + if len(indexes) == 0: + index = Index([]) + elif len(indexes) == 1: + index = indexes[0] + elif intersect: + index = indexes[0] + for other in indexes[1:]: + index = index.intersection(other) + else: + index = union_indexes(indexes, sort=sort) + index = ensure_index(index) + + if sort: + try: + index = index.sort_values() + except TypeError: + pass + return index + + +def union_indexes(indexes, sort=True) -> Index: + """ + Return the union of indexes. + + The behavior of sort and names is not consistent. + + Parameters + ---------- + indexes : list of Index or list objects + sort : bool, default True + Whether the result index should come out sorted or not. + + Returns + ------- + Index + """ + if len(indexes) == 0: + raise AssertionError("Must have at least 1 Index to union") + if len(indexes) == 1: + result = indexes[0] + if isinstance(result, list): + result = Index(sorted(result)) + return result + + indexes, kind = _sanitize_and_check(indexes) + + def _unique_indices(inds) -> Index: + """ + Convert indexes to lists and concatenate them, removing duplicates. + + The final dtype is inferred. + + Parameters + ---------- + inds : list of Index or list objects + + Returns + ------- + Index + """ + + def conv(i): + if isinstance(i, Index): + i = i.tolist() + return i + + return Index(lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort)) + + if kind == "special": + result = indexes[0] + + if hasattr(result, "union_many"): + # DatetimeIndex + return result.union_many(indexes[1:]) + else: + for other in indexes[1:]: + result = result.union(other) + return result + elif kind == "array": + index = indexes[0] + for other in indexes[1:]: + if not index.equals(other): + return _unique_indices(indexes) + + name = get_consensus_names(indexes)[0] + if name != index.name: + index = index._shallow_copy(name=name) + return index + else: # kind='list' + return _unique_indices(indexes) + + +def _sanitize_and_check(indexes): + """ + Verify the type of indexes and convert lists to Index. + + Cases: + + - [list, list, ...]: Return ([list, list, ...], 'list') + - [list, Index, ...]: Return _sanitize_and_check([Index, Index, ...]) + Lists are sorted and converted to Index. + - [Index, Index, ...]: Return ([Index, Index, ...], TYPE) + TYPE = 'special' if at least one special type, 'array' otherwise. + + Parameters + ---------- + indexes : list of Index or list objects + + Returns + ------- + sanitized_indexes : list of Index or list objects + type : {'list', 'array', 'special'} + """ + kinds = list({type(index) for index in indexes}) + + if list in kinds: + if len(kinds) > 1: + indexes = [ + Index(com.try_sort(x)) if not isinstance(x, Index) else x + for x in indexes + ] + kinds.remove(list) + else: + return indexes, "list" + + if len(kinds) > 1 or Index not in kinds: + return indexes, "special" + else: + return indexes, "array" + + +def get_consensus_names(indexes): + """ + Give a consensus 'names' to indexes. + + If there's exactly one non-empty 'names', return this, + otherwise, return empty. + + Parameters + ---------- + indexes : list of Index objects + + Returns + ------- + list + A list representing the consensus 'names' found. + """ + # find the non-none names, need to tupleify to make + # the set hashable, then reverse on return + consensus_names = {tuple(i.names) for i in indexes if com.any_not_none(*i.names)} + if len(consensus_names) == 1: + return list(list(consensus_names)[0]) + return [None] * indexes[0].nlevels + + +def all_indexes_same(indexes): + """ + Determine if all indexes contain the same elements. + + Parameters + ---------- + indexes : list of Index objects + + Returns + ------- + bool + True if all indexes contain the same elements, False otherwise. + """ + first = indexes[0] + for index in indexes[1:]: + if not first.equals(index): + return False + return True diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py new file mode 100644 index 00000000..5ff8f590 --- /dev/null +++ b/pandas/core/indexes/base.py @@ -0,0 +1,5576 @@ +from datetime import datetime +import operator +from textwrap import dedent +from typing import Dict, FrozenSet, Hashable, Optional, Union +import warnings + +import numpy as np + +from pandas._libs import algos as libalgos, index as libindex, lib +import pandas._libs.join as libjoin +from pandas._libs.lib import is_datetime_array +from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp +from pandas._libs.tslibs.period import IncompatibleFrequency +from pandas._libs.tslibs.timezones import tz_compare +from pandas.compat import set_function_name +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender, Substitution, cache_readonly + +from pandas.core.dtypes import concat as _concat +from pandas.core.dtypes.cast import maybe_cast_to_integer_array +from pandas.core.dtypes.common import ( + ensure_categorical, + ensure_int64, + ensure_object, + ensure_platform_int, + is_bool, + is_bool_dtype, + is_categorical, + is_categorical_dtype, + is_datetime64_any_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_hashable, + is_integer, + is_integer_dtype, + is_interval_dtype, + is_iterator, + is_list_like, + is_object_dtype, + is_period_dtype, + is_scalar, + is_signed_integer_dtype, + is_timedelta64_dtype, + is_unsigned_integer_dtype, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.generic import ( + ABCCategorical, + ABCDataFrame, + ABCDatetimeArray, + ABCDatetimeIndex, + ABCIndexClass, + ABCIntervalIndex, + ABCMultiIndex, + ABCPandasArray, + ABCPeriodIndex, + ABCSeries, + ABCTimedeltaIndex, +) +from pandas.core.dtypes.missing import array_equivalent, isna + +from pandas.core import ops +from pandas.core.accessor import CachedAccessor +import pandas.core.algorithms as algos +from pandas.core.arrays import ExtensionArray +from pandas.core.base import IndexOpsMixin, PandasObject +import pandas.core.common as com +from pandas.core.construction import extract_array +from pandas.core.indexers import deprecate_ndim_indexing, maybe_convert_indices +from pandas.core.indexes.frozen import FrozenList +import pandas.core.missing as missing +from pandas.core.ops import get_op_result_name +from pandas.core.ops.invalid import make_invalid_op +from pandas.core.strings import StringMethods + +from pandas.io.formats.printing import ( + default_pprint, + format_object_attrs, + format_object_summary, + pprint_thing, +) + +__all__ = ["Index"] + +_unsortable_types = frozenset(("mixed", "mixed-integer")) + +_index_doc_kwargs = dict( + klass="Index", + inplace="", + target_klass="Index", + raises_section="", + unique="Index", + duplicated="np.ndarray", +) +_index_shared_docs = dict() + + +def _make_comparison_op(op, cls): + def cmp_method(self, other): + if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)): + if other.ndim > 0 and len(self) != len(other): + raise ValueError("Lengths must match to compare") + + if is_object_dtype(self) and isinstance(other, ABCCategorical): + left = type(other)(self._values, dtype=other.dtype) + return op(left, other) + elif is_object_dtype(self) and isinstance(other, ExtensionArray): + # e.g. PeriodArray + with np.errstate(all="ignore"): + result = op(self.values, other) + + elif is_object_dtype(self) and not isinstance(self, ABCMultiIndex): + # don't pass MultiIndex + with np.errstate(all="ignore"): + result = ops.comp_method_OBJECT_ARRAY(op, self.values, other) + + else: + with np.errstate(all="ignore"): + result = op(self.values, np.asarray(other)) + + if is_bool_dtype(result): + return result + return ops.invalid_comparison(self, other, op) + + name = f"__{op.__name__}__" + return set_function_name(cmp_method, name, cls) + + +def _make_arithmetic_op(op, cls): + def index_arithmetic_method(self, other): + if isinstance(other, (ABCSeries, ABCDataFrame, ABCTimedeltaIndex)): + return NotImplemented + + from pandas import Series + + result = op(Series(self), other) + if isinstance(result, tuple): + return (Index(result[0]), Index(result[1])) + return Index(result) + + name = f"__{op.__name__}__" + # TODO: docstring? + return set_function_name(index_arithmetic_method, name, cls) + + +class InvalidIndexError(Exception): + pass + + +_o_dtype = np.dtype(object) +_Identity = object + + +def _new_Index(cls, d): + """ + This is called upon unpickling, rather than the default which doesn't + have arguments and breaks __new__. + """ + # required for backward compat, because PI can't be instantiated with + # ordinals through __new__ GH #13277 + if issubclass(cls, ABCPeriodIndex): + from pandas.core.indexes.period import _new_PeriodIndex + + return _new_PeriodIndex(cls, **d) + + if issubclass(cls, ABCMultiIndex): + if "labels" in d and "codes" not in d: + # GH#23752 "labels" kwarg has been replaced with "codes" + d["codes"] = d.pop("labels") + + return cls.__new__(cls, **d) + + +class Index(IndexOpsMixin, PandasObject): + """ + Immutable ndarray implementing an ordered, sliceable set. The basic object + storing axis labels for all pandas objects. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype (default: object) + If dtype is None, we find the dtype that best fits the data. + If an actual dtype is provided, we coerce to that dtype if it's safe. + Otherwise, an error will be raised. + copy : bool + Make a copy of input ndarray. + name : object + Name to be stored in the index. + tupleize_cols : bool (default: True) + When True, attempt to create a MultiIndex if possible. + + See Also + -------- + RangeIndex : Index implementing a monotonic integer range. + CategoricalIndex : Index of :class:`Categorical` s. + MultiIndex : A multi-level, or hierarchical, Index. + IntervalIndex : An Index of :class:`Interval` s. + DatetimeIndex, TimedeltaIndex, PeriodIndex + Int64Index, UInt64Index, Float64Index + + Notes + ----- + An Index instance can **only** contain hashable objects + + Examples + -------- + >>> pd.Index([1, 2, 3]) + Int64Index([1, 2, 3], dtype='int64') + + >>> pd.Index(list('abc')) + Index(['a', 'b', 'c'], dtype='object') + """ + + # tolist is not actually deprecated, just suppressed in the __dir__ + _deprecations: FrozenSet[str] = ( + PandasObject._deprecations + | IndexOpsMixin._deprecations + | frozenset(["contains", "set_value"]) + ) + + # To hand over control to subclasses + _join_precedence = 1 + + # Cython methods; see github.com/cython/cython/issues/2647 + # for why we need to wrap these instead of making them class attributes + # Moreover, cython will choose the appropriate-dtyped sub-function + # given the dtypes of the passed arguments + def _left_indexer_unique(self, left, right): + return libjoin.left_join_indexer_unique(left, right) + + def _left_indexer(self, left, right): + return libjoin.left_join_indexer(left, right) + + def _inner_indexer(self, left, right): + return libjoin.inner_join_indexer(left, right) + + def _outer_indexer(self, left, right): + return libjoin.outer_join_indexer(left, right) + + _typ = "index" + _data: Union[ExtensionArray, np.ndarray] + _id = None + _name: Optional[Hashable] = None + # MultiIndex.levels previously allowed setting the index name. We + # don't allow this anymore, and raise if it happens rather than + # failing silently. + _no_setting_name: bool = False + _comparables = ["name"] + _attributes = ["name"] + _is_numeric_dtype = False + _can_hold_na = True + + # would we like our indexing holder to defer to us + _defer_to_indexing = False + + # prioritize current class for _shallow_copy_with_infer, + # used to infer integers as datetime-likes + _infer_as_myclass = False + + _engine_type = libindex.ObjectEngine + # whether we support partial string indexing. Overridden + # in DatetimeIndex and PeriodIndex + _supports_partial_string_indexing = False + + _accessors = {"str"} + + str = CachedAccessor("str", StringMethods) + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs, + ) -> "Index": + + from pandas.core.indexes.range import RangeIndex + + name = maybe_extract_name(name, data, cls) + + if isinstance(data, ABCPandasArray): + # ensure users don't accidentally put a PandasArray in an index. + data = data.to_numpy() + + # range + if isinstance(data, RangeIndex): + return RangeIndex(start=data, copy=copy, dtype=dtype, name=name) + elif isinstance(data, range): + return RangeIndex.from_range(data, dtype=dtype, name=name) + + # categorical + elif is_categorical_dtype(data) or is_categorical_dtype(dtype): + # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 + from pandas.core.indexes.category import CategoricalIndex + + return CategoricalIndex(data, dtype=dtype, copy=copy, name=name, **kwargs) + + # interval + elif is_interval_dtype(data) or is_interval_dtype(dtype): + # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 + from pandas.core.indexes.interval import IntervalIndex + + closed = kwargs.pop("closed", None) + if is_dtype_equal(_o_dtype, dtype): + return IntervalIndex( + data, name=name, copy=copy, closed=closed, **kwargs + ).astype(object) + return IntervalIndex( + data, dtype=dtype, name=name, copy=copy, closed=closed, **kwargs + ) + + elif ( + is_datetime64_any_dtype(data) + or is_datetime64_any_dtype(dtype) + or "tz" in kwargs + ): + # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 + from pandas import DatetimeIndex + + if is_dtype_equal(_o_dtype, dtype): + # GH#23524 passing `dtype=object` to DatetimeIndex is invalid, + # will raise in the where `data` is already tz-aware. So + # we leave it out of this step and cast to object-dtype after + # the DatetimeIndex construction. + # Note we can pass copy=False because the .astype below + # will always make a copy + return DatetimeIndex(data, copy=False, name=name, **kwargs).astype( + object + ) + else: + return DatetimeIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) + + elif is_timedelta64_dtype(data) or is_timedelta64_dtype(dtype): + # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 + from pandas import TimedeltaIndex + + if is_dtype_equal(_o_dtype, dtype): + # Note we can pass copy=False because the .astype below + # will always make a copy + return TimedeltaIndex(data, copy=False, name=name, **kwargs).astype( + object + ) + else: + return TimedeltaIndex(data, copy=copy, name=name, dtype=dtype, **kwargs) + + elif is_period_dtype(data) or is_period_dtype(dtype): + # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 + from pandas import PeriodIndex + + if is_dtype_equal(_o_dtype, dtype): + return PeriodIndex(data, copy=False, name=name, **kwargs).astype(object) + return PeriodIndex(data, dtype=dtype, copy=copy, name=name, **kwargs) + + # extension dtype + elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype): + if not (dtype is None or is_object_dtype(dtype)): + # coerce to the provided dtype + ea_cls = dtype.construct_array_type() + data = ea_cls._from_sequence(data, dtype=dtype, copy=False) + else: + data = np.asarray(data, dtype=object) + + # coerce to the object dtype + data = data.astype(object) + return Index(data, dtype=object, copy=copy, name=name, **kwargs) + + # index-like + elif isinstance(data, (np.ndarray, Index, ABCSeries)): + # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 + from pandas.core.indexes.numeric import ( + Float64Index, + Int64Index, + UInt64Index, + ) + + if dtype is not None: + # we need to avoid having numpy coerce + # things that look like ints/floats to ints unless + # they are actually ints, e.g. '0' and 0.0 + # should not be coerced + # GH 11836 + data = _maybe_cast_with_dtype(data, dtype, copy) + dtype = data.dtype # TODO: maybe not for object? + + # maybe coerce to a sub-class + if is_signed_integer_dtype(data.dtype): + return Int64Index(data, copy=copy, dtype=dtype, name=name) + elif is_unsigned_integer_dtype(data.dtype): + return UInt64Index(data, copy=copy, dtype=dtype, name=name) + elif is_float_dtype(data.dtype): + return Float64Index(data, copy=copy, dtype=dtype, name=name) + elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data): + subarr = data.astype("object") + else: + subarr = com.asarray_tuplesafe(data, dtype=object) + + # asarray_tuplesafe does not always copy underlying data, + # so need to make sure that this happens + if copy: + subarr = subarr.copy() + + if dtype is None: + new_data, new_dtype = _maybe_cast_data_without_dtype(subarr) + if new_dtype is not None: + return cls( + new_data, dtype=new_dtype, copy=False, name=name, **kwargs + ) + + if kwargs: + raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") + if subarr.ndim > 1: + # GH#13601, GH#20285, GH#27125 + raise ValueError("Index data must be 1-dimensional") + return cls._simple_new(subarr, name, **kwargs) + + elif hasattr(data, "__array__"): + return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) + elif data is None or is_scalar(data): + raise cls._scalar_data_error(data) + else: + if tupleize_cols and is_list_like(data): + # GH21470: convert iterable to list before determining if empty + if is_iterator(data): + data = list(data) + + if data and all(isinstance(e, tuple) for e in data): + # we must be all tuples, otherwise don't construct + # 10697 + from pandas.core.indexes.multi import MultiIndex + + return MultiIndex.from_tuples( + data, names=name or kwargs.get("names") + ) + # other iterable of some kind + subarr = com.asarray_tuplesafe(data, dtype=object) + return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) + + """ + NOTE for new Index creation: + + - _simple_new: It returns new Index with the same type as the caller. + All metadata (such as name) must be provided by caller's responsibility. + Using _shallow_copy is recommended because it fills these metadata + otherwise specified. + + - _shallow_copy: It returns new Index with the same type (using + _simple_new), but fills caller's metadata otherwise specified. Passed + kwargs will overwrite corresponding metadata. + + - _shallow_copy_with_infer: It returns new Index inferring its type + from passed values. It fills caller's metadata otherwise specified as the + same as _shallow_copy. + + See each method's docstring. + """ + + @property + def asi8(self): + """ + Integer representation of the values. + + Returns + ------- + ndarray + An ndarray with int64 dtype. + """ + return None + + @classmethod + def _simple_new(cls, values, name=None, dtype=None): + """ + We require that we have a dtype compat for the values. If we are passed + a non-dtype compat, then coerce using the constructor. + + Must be careful not to recurse. + """ + if isinstance(values, (ABCSeries, ABCIndexClass)): + # Index._data must always be an ndarray. + # This is no-copy for when _values is an ndarray, + # which should be always at this point. + values = np.asarray(values._values) + + result = object.__new__(cls) + result._data = values + # _index_data is a (temporary?) fix to ensure that the direct data + # manipulation we do in `_libs/reduction.pyx` continues to work. + # We need access to the actual ndarray, since we're messing with + # data buffers and strides. We don't re-use `_ndarray_values`, since + # we actually set this value too. + result._index_data = values + result._name = name + + return result._reset_identity() + + @cache_readonly + def _constructor(self): + return type(self) + + # -------------------------------------------------------------------- + # Index Internals Methods + + def _get_attributes_dict(self): + """ + Return an attributes dict for my class. + """ + return {k: getattr(self, k, None) for k in self._attributes} + + _index_shared_docs[ + "_shallow_copy" + ] = """ + Create a new Index with the same class as the caller, don't copy the + data, use the same object attributes with passed in attributes taking + precedence. + + *this is an internal non-public method* + + Parameters + ---------- + values : the values to create the new Index, optional + kwargs : updates the default attributes for this Index + """ + + @Appender(_index_shared_docs["_shallow_copy"]) + def _shallow_copy(self, values=None, **kwargs): + if values is None: + values = self.values + attributes = self._get_attributes_dict() + attributes.update(kwargs) + if not len(values) and "dtype" not in kwargs: + attributes["dtype"] = self.dtype + + # _simple_new expects an the type of self._data + values = getattr(values, "_values", values) + if isinstance(values, ABCDatetimeArray): + # `self.values` returns `self` for tz-aware, so we need to unwrap + # more specifically + values = values.asi8 + + return self._simple_new(values, **attributes) + + def _shallow_copy_with_infer(self, values, **kwargs): + """ + Create a new Index inferring the class with passed value, don't copy + the data, use the same object attributes with passed in attributes + taking precedence. + + *this is an internal non-public method* + + Parameters + ---------- + values : the values to create the new Index, optional + kwargs : updates the default attributes for this Index + """ + attributes = self._get_attributes_dict() + attributes.update(kwargs) + attributes["copy"] = False + if not len(values) and "dtype" not in kwargs: + attributes["dtype"] = self.dtype + if self._infer_as_myclass: + try: + return self._constructor(values, **attributes) + except (TypeError, ValueError): + pass + return Index(values, **attributes) + + def _update_inplace(self, result, **kwargs): + # guard when called from IndexOpsMixin + raise TypeError("Index can't be updated inplace") + + def is_(self, other) -> bool: + """ + More flexible, faster check like ``is`` but that works through views. + + Note: this is *not* the same as ``Index.identical()``, which checks + that metadata is also the same. + + Parameters + ---------- + other : object + other object to compare against. + + Returns + ------- + True if both have same underlying data, False otherwise : bool + """ + # use something other than None to be clearer + return self._id is getattr(other, "_id", Ellipsis) and self._id is not None + + def _reset_identity(self): + """ + Initializes or resets ``_id`` attribute with new object. + """ + self._id = _Identity() + return self + + def _cleanup(self): + self._engine.clear_mapping() + + @cache_readonly + def _engine(self): + # property, for now, slow to look up + + # to avoid a reference cycle, bind `_ndarray_values` to a local variable, so + # `self` is not passed into the lambda. + _ndarray_values = self._ndarray_values + return self._engine_type(lambda: _ndarray_values, len(self)) + + # -------------------------------------------------------------------- + # Array-Like Methods + + # ndarray compat + def __len__(self) -> int: + """ + Return the length of the Index. + """ + return len(self._data) + + def __array__(self, dtype=None) -> np.ndarray: + """ + The array interface, return my values. + """ + return np.asarray(self._data, dtype=dtype) + + def __array_wrap__(self, result, context=None): + """ + Gets called after a ufunc. + """ + result = lib.item_from_zerodim(result) + if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1: + return result + + attrs = self._get_attributes_dict() + return Index(result, **attrs) + + @cache_readonly + def dtype(self): + """ + Return the dtype object of the underlying data. + """ + return self._data.dtype + + def ravel(self, order="C"): + """ + Return an ndarray of the flattened values of the underlying data. + + Returns + ------- + numpy.ndarray + Flattened array. + + See Also + -------- + numpy.ndarray.ravel + """ + return self._ndarray_values.ravel(order=order) + + def view(self, cls=None): + + # we need to see if we are subclassing an + # index type here + if cls is not None and not hasattr(cls, "_typ"): + result = self._data.view(cls) + else: + result = self._shallow_copy() + if isinstance(result, Index): + result._id = self._id + return result + + _index_shared_docs[ + "astype" + ] = """ + Create an Index with values cast to dtypes. The class of a new Index + is determined by dtype. When conversion is impossible, a ValueError + exception is raised. + + Parameters + ---------- + dtype : numpy dtype or pandas type + Note that any signed integer `dtype` is treated as ``'int64'``, + and any unsigned integer `dtype` is treated as ``'uint64'``, + regardless of the size. + copy : bool, default True + By default, astype always returns a newly allocated object. + If copy is set to False and internal requirements on dtype are + satisfied, the original data is used to create a new Index + or the original Index is returned. + + Returns + ------- + Index + Index with values cast to specified dtype. + """ + + @Appender(_index_shared_docs["astype"]) + def astype(self, dtype, copy=True): + if is_dtype_equal(self.dtype, dtype): + return self.copy() if copy else self + + elif is_categorical_dtype(dtype): + from pandas.core.indexes.category import CategoricalIndex + + return CategoricalIndex(self.values, name=self.name, dtype=dtype, copy=copy) + + elif is_extension_array_dtype(dtype): + return Index(np.asarray(self), dtype=dtype, copy=copy) + + try: + casted = self.values.astype(dtype, copy=copy) + except (TypeError, ValueError): + raise TypeError(f"Cannot cast {type(self).__name__} to dtype {dtype}") + return Index(casted, name=self.name, dtype=dtype) + + _index_shared_docs[ + "take" + ] = """ + Return a new %(klass)s of the values selected by the indices. + + For internal compatibility with numpy arrays. + + Parameters + ---------- + indices : list + Indices to be taken. + axis : int, optional + The axis over which to select values, always 0. + allow_fill : bool, default True + fill_value : bool, default None + If allow_fill=True and fill_value is not None, indices specified by + -1 is regarded as NA. If Index doesn't hold NA, raise ValueError. + + Returns + ------- + numpy.ndarray + Elements of given indices. + + See Also + -------- + numpy.ndarray.take + """ + + @Appender(_index_shared_docs["take"] % _index_doc_kwargs) + def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): + if kwargs: + nv.validate_take(tuple(), kwargs) + indices = ensure_platform_int(indices) + if self._can_hold_na: + taken = self._assert_take_fillable( + self.values, + indices, + allow_fill=allow_fill, + fill_value=fill_value, + na_value=self._na_value, + ) + else: + if allow_fill and fill_value is not None: + cls_name = type(self).__name__ + raise ValueError( + f"Unable to fill values because {cls_name} cannot contain NA" + ) + taken = self.values.take(indices) + return self._shallow_copy(taken) + + def _assert_take_fillable( + self, values, indices, allow_fill=True, fill_value=None, na_value=np.nan + ): + """ + Internal method to handle NA filling of take. + """ + indices = ensure_platform_int(indices) + + # only fill if we are passing a non-None fill_value + if allow_fill and fill_value is not None: + if (indices < -1).any(): + raise ValueError( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + taken = algos.take( + values, indices, allow_fill=allow_fill, fill_value=na_value + ) + else: + taken = values.take(indices) + return taken + + _index_shared_docs[ + "repeat" + ] = """ + Repeat elements of a %(klass)s. + + Returns a new %(klass)s where each element of the current %(klass)s + is repeated consecutively a given number of times. + + Parameters + ---------- + repeats : int or array of ints + The number of repetitions for each element. This should be a + non-negative integer. Repeating 0 times will return an empty + %(klass)s. + axis : None + Must be ``None``. Has no effect but is accepted for compatibility + with numpy. + + Returns + ------- + repeated_index : %(klass)s + Newly created %(klass)s with repeated elements. + + See Also + -------- + Series.repeat : Equivalent function for Series. + numpy.repeat : Similar method for :class:`numpy.ndarray`. + + Examples + -------- + >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx + Index(['a', 'b', 'c'], dtype='object') + >>> idx.repeat(2) + Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object') + >>> idx.repeat([1, 2, 3]) + Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object') + """ + + @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) + def repeat(self, repeats, axis=None): + repeats = ensure_platform_int(repeats) + nv.validate_repeat(tuple(), dict(axis=axis)) + return self._shallow_copy(self._values.repeat(repeats)) + + # -------------------------------------------------------------------- + # Copying Methods + + _index_shared_docs[ + "copy" + ] = """ + Make a copy of this object. Name and dtype sets those attributes on + the new object. + + Parameters + ---------- + name : str, optional + deep : bool, default False + dtype : numpy dtype or pandas type + + Returns + ------- + copy : Index + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + """ + + @Appender(_index_shared_docs["copy"]) + def copy(self, name=None, deep=False, dtype=None, **kwargs): + if deep: + new_index = self._shallow_copy(self._data.copy()) + else: + new_index = self._shallow_copy() + + names = kwargs.get("names") + names = self._validate_names(name=name, names=names, deep=deep) + new_index = new_index.set_names(names) + + if dtype: + new_index = new_index.astype(dtype) + return new_index + + def __copy__(self, **kwargs): + return self.copy(**kwargs) + + def __deepcopy__(self, memo=None): + """ + Parameters + ---------- + memo, default None + Standard signature. Unused + """ + return self.copy(deep=True) + + # -------------------------------------------------------------------- + # Rendering Methods + + def __repr__(self): + """ + Return a string representation for this object. + """ + klass_name = type(self).__name__ + data = self._format_data() + attrs = self._format_attrs() + space = self._format_space() + attrs_str = [f"{k}={v}" for k, v in attrs] + prepr = f",{space}".join(attrs_str) + + # no data provided, just attributes + if data is None: + data = "" + + res = f"{klass_name}({data}{prepr})" + + return res + + def _format_space(self): + + # using space here controls if the attributes + # are line separated or not (the default) + + # max_seq_items = get_option('display.max_seq_items') + # if len(self) > max_seq_items: + # space = "\n%s" % (' ' * (len(klass) + 1)) + return " " + + @property + def _formatter_func(self): + """ + Return the formatter function. + """ + return default_pprint + + def _format_data(self, name=None): + """ + Return the formatted data as a unicode string. + """ + + # do we want to justify (only do so for non-objects) + is_justify = not ( + self.inferred_type in ("string", "unicode") + or ( + self.inferred_type == "categorical" and is_object_dtype(self.categories) + ) + ) + + return format_object_summary( + self, self._formatter_func, is_justify=is_justify, name=name + ) + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value). + """ + return format_object_attrs(self) + + def _mpl_repr(self): + # how to represent ourselves to matplotlib + return self.values + + def format(self, name=False, formatter=None, **kwargs): + """ + Render a string representation of the Index. + """ + header = [] + if name: + header.append( + pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) + if self.name is not None + else "" + ) + + if formatter is not None: + return header + list(self.map(formatter)) + + return self._format_with_header(header, **kwargs) + + def _format_with_header(self, header, na_rep="NaN", **kwargs): + values = self.values + + from pandas.io.formats.format import format_array + + if is_categorical_dtype(values.dtype): + values = np.array(values) + + elif is_object_dtype(values.dtype): + values = lib.maybe_convert_objects(values, safe=1) + + if is_object_dtype(values.dtype): + result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] + + # could have nans + mask = isna(values) + if mask.any(): + result = np.array(result) + result[mask] = na_rep + result = result.tolist() + + else: + result = _trim_front(format_array(values, None, justify="left")) + return header + result + + def to_native_types(self, slicer=None, **kwargs): + """ + Format specified values of `self` and return them. + + Parameters + ---------- + slicer : int, array-like + An indexer into `self` that specifies which values + are used in the formatting process. + kwargs : dict + Options for specifying how the values should be formatted. + These options include the following: + + 1) na_rep : str + The value that serves as a placeholder for NULL values + 2) quoting : bool or None + Whether or not there are quoted values in `self` + 3) date_format : str + The format used to represent date-like values. + + Returns + ------- + numpy.ndarray + Formatted values. + """ + + values = self + if slicer is not None: + values = values[slicer] + return values._format_native_types(**kwargs) + + def _format_native_types(self, na_rep="", quoting=None, **kwargs): + """ + Actually format specific types of the index. + """ + mask = isna(self) + if not self.is_object() and not quoting: + values = np.asarray(self).astype(str) + else: + values = np.array(self, dtype=object, copy=True) + + values[mask] = na_rep + return values + + def _summary(self, name=None): + """ + Return a summarized representation. + + Parameters + ---------- + name : str + name to use in the summary representation + + Returns + ------- + String with a summarized representation of the index + """ + if len(self) > 0: + head = self[0] + if hasattr(head, "format") and not isinstance(head, str): + head = head.format() + tail = self[-1] + if hasattr(tail, "format") and not isinstance(tail, str): + tail = tail.format() + index_summary = f", {head} to {tail}" + else: + index_summary = "" + + if name is None: + name = type(self).__name__ + return f"{name}: {len(self)} entries{index_summary}" + + # -------------------------------------------------------------------- + # Conversion Methods + + def to_flat_index(self): + """ + Identity method. + + .. versionadded:: 0.24.0 + + This is implemented for compatibility with subclass implementations + when chaining. + + Returns + ------- + pd.Index + Caller. + + See Also + -------- + MultiIndex.to_flat_index : Subclass implementation. + """ + return self + + def to_series(self, index=None, name=None): + """ + Create a Series with both index and values equal to the index keys. + + Useful with map for returning an indexer based on an index. + + Parameters + ---------- + index : Index, optional + Index of resulting Series. If None, defaults to original index. + name : str, optional + Dame of resulting Series. If None, defaults to name of original + index. + + Returns + ------- + Series + The dtype will be based on the type of the Index values. + """ + + from pandas import Series + + if index is None: + index = self._shallow_copy() + if name is None: + name = self.name + + return Series(self.values.copy(), index=index, name=name) + + def to_frame(self, index=True, name=None): + """ + Create a DataFrame with a column containing the Index. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + index : bool, default True + Set the index of the returned DataFrame as the original Index. + + name : object, default None + The passed name should substitute for the index name (if it has + one). + + Returns + ------- + DataFrame + DataFrame containing the original Index data. + + See Also + -------- + Index.to_series : Convert an Index to a Series. + Series.to_frame : Convert Series to DataFrame. + + Examples + -------- + >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') + >>> idx.to_frame() + animal + animal + Ant Ant + Bear Bear + Cow Cow + + By default, the original Index is reused. To enforce a new Index: + + >>> idx.to_frame(index=False) + animal + 0 Ant + 1 Bear + 2 Cow + + To override the name of the resulting column, specify `name`: + + >>> idx.to_frame(index=False, name='zoo') + zoo + 0 Ant + 1 Bear + 2 Cow + """ + + from pandas import DataFrame + + if name is None: + name = self.name or 0 + result = DataFrame({name: self._values.copy()}) + + if index: + result.index = self + return result + + # -------------------------------------------------------------------- + # Name-Centric Methods + + @property + def name(self): + return self._name + + @name.setter + def name(self, value): + if self._no_setting_name: + # Used in MultiIndex.levels to avoid silently ignoring name updates. + raise RuntimeError( + "Cannot set name on a level of a MultiIndex. Use " + "'MultiIndex.set_names' instead." + ) + maybe_extract_name(value, None, type(self)) + self._name = value + + def _validate_names(self, name=None, names=None, deep=False): + """ + Handles the quirks of having a singular 'name' parameter for general + Index and plural 'names' parameter for MultiIndex. + """ + from copy import deepcopy + + if names is not None and name is not None: + raise TypeError("Can only provide one of `names` and `name`") + elif names is None and name is None: + return deepcopy(self.names) if deep else self.names + elif names is not None: + if not is_list_like(names): + raise TypeError("Must pass list-like as `names`.") + return names + else: + if not is_list_like(name): + return [name] + return name + + def _get_names(self): + return FrozenList((self.name,)) + + def _set_names(self, values, level=None): + """ + Set new names on index. Each name has to be a hashable type. + + Parameters + ---------- + values : str or sequence + name(s) to set + level : int, level name, or sequence of int/level names (default None) + If the index is a MultiIndex (hierarchical), level(s) to set (None + for all levels). Otherwise level must be None + + Raises + ------ + TypeError if each name is not hashable. + """ + if not is_list_like(values): + raise ValueError("Names must be a list-like") + if len(values) != 1: + raise ValueError(f"Length of new names must be 1, got {len(values)}") + + # GH 20527 + # All items in 'name' need to be hashable: + for name in values: + if not is_hashable(name): + raise TypeError(f"{type(self).__name__}.name must be a hashable type") + self._name = values[0] + + names = property(fset=_set_names, fget=_get_names) + + def set_names(self, names, level=None, inplace=False): + """ + Set Index or MultiIndex name. + + Able to set new names partially and by level. + + Parameters + ---------- + names : label or list of label + Name(s) to set. + level : int, label or list of int or label, optional + If the index is a MultiIndex, level(s) to set (None for all + levels). Otherwise level must be None. + inplace : bool, default False + Modifies the object directly, instead of creating a new Index or + MultiIndex. + + Returns + ------- + Index + The same type as the caller or None if inplace is True. + + See Also + -------- + Index.rename : Able to set new names without level. + + Examples + -------- + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx + Int64Index([1, 2, 3, 4], dtype='int64') + >>> idx.set_names('quarter') + Int64Index([1, 2, 3, 4], dtype='int64', name='quarter') + + >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], + ... [2018, 2019]]) + >>> idx + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + ) + >>> idx.set_names(['kind', 'year'], inplace=True) + >>> idx + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['kind', 'year']) + >>> idx.set_names('species', level=0) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['species', 'year']) + """ + + if level is not None and not isinstance(self, ABCMultiIndex): + raise ValueError("Level must be None for non-MultiIndex") + + if level is not None and not is_list_like(level) and is_list_like(names): + raise TypeError("Names must be a string when a single level is provided.") + + if not is_list_like(names) and level is None and self.nlevels > 1: + raise TypeError("Must pass list-like as `names`.") + + if not is_list_like(names): + names = [names] + if level is not None and not is_list_like(level): + level = [level] + + if inplace: + idx = self + else: + idx = self._shallow_copy() + idx._set_names(names, level=level) + if not inplace: + return idx + + def rename(self, name, inplace=False): + """ + Alter Index or MultiIndex name. + + Able to set new names without level. Defaults to returning new index. + Length of names must match number of levels in MultiIndex. + + Parameters + ---------- + name : label or list of labels + Name(s) to set. + inplace : bool, default False + Modifies the object directly, instead of creating a new Index or + MultiIndex. + + Returns + ------- + Index + The same type as the caller or None if inplace is True. + + See Also + -------- + Index.set_names : Able to set new names partially and by level. + + Examples + -------- + >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score') + >>> idx.rename('grade') + Index(['A', 'C', 'A', 'B'], dtype='object', name='grade') + + >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], + ... [2018, 2019]], + ... names=['kind', 'year']) + >>> idx + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['kind', 'year']) + >>> idx.rename(['species', 'year']) + MultiIndex([('python', 2018), + ('python', 2019), + ( 'cobra', 2018), + ( 'cobra', 2019)], + names=['species', 'year']) + >>> idx.rename('species') + Traceback (most recent call last): + TypeError: Must pass list-like as `names`. + """ + return self.set_names([name], inplace=inplace) + + # -------------------------------------------------------------------- + # Level-Centric Methods + + @property + def nlevels(self) -> int: + """ + Number of levels. + """ + return 1 + + def _sort_levels_monotonic(self): + """ + Compat with MultiIndex. + """ + return self + + def _validate_index_level(self, level): + """ + Validate index level. + + For single-level Index getting level number is a no-op, but some + verification must be done like in MultiIndex. + + """ + if isinstance(level, int): + if level < 0 and level != -1: + raise IndexError( + "Too many levels: Index has only 1 level, " + f"{level} is not a valid level number" + ) + elif level > 0: + raise IndexError( + f"Too many levels: Index has only 1 level, not {level + 1}" + ) + elif level != self.name: + raise KeyError( + f"Requested level ({level}) does not match index name ({self.name})" + ) + + def _get_level_number(self, level): + self._validate_index_level(level) + return 0 + + def sortlevel(self, level=None, ascending=True, sort_remaining=None): + """ + For internal compatibility with with the Index API. + + Sort the Index. This is for compat with MultiIndex + + Parameters + ---------- + ascending : bool, default True + False to sort in descending order + + level, sort_remaining are compat parameters + + Returns + ------- + Index + """ + return self.sort_values(return_indexer=True, ascending=ascending) + + def _get_level_values(self, level): + """ + Return an Index of values for requested level. + + This is primarily useful to get an individual level of values from a + MultiIndex, but is provided on Index as well for compatibility. + + Parameters + ---------- + level : int or str + It is either the integer position or the name of the level. + + Returns + ------- + Index + Calling object, as there is only one level in the Index. + + See Also + -------- + MultiIndex.get_level_values : Get values for a level of a MultiIndex. + + Notes + ----- + For Index, level should be 0, since there are no multiple levels. + + Examples + -------- + + >>> idx = pd.Index(list('abc')) + >>> idx + Index(['a', 'b', 'c'], dtype='object') + + Get level values by supplying `level` as integer: + + >>> idx.get_level_values(0) + Index(['a', 'b', 'c'], dtype='object') + """ + self._validate_index_level(level) + return self + + get_level_values = _get_level_values + + def droplevel(self, level=0): + """ + Return index with requested level(s) removed. + + If resulting index has only 1 level left, the result will be + of Index type, not MultiIndex. + + .. versionadded:: 0.23.1 (support for non-MultiIndex) + + Parameters + ---------- + level : int, str, or list-like, default 0 + If a string is given, must be the name of a level + If list-like, elements must be names or indexes of levels. + + Returns + ------- + Index or MultiIndex + """ + if not isinstance(level, (tuple, list)): + level = [level] + + levnums = sorted(self._get_level_number(lev) for lev in level)[::-1] + + if len(level) == 0: + return self + if len(level) >= self.nlevels: + raise ValueError( + f"Cannot remove {len(level)} levels from an index with {self.nlevels} " + "levels: at least one level must be left." + ) + # The two checks above guarantee that here self is a MultiIndex + + new_levels = list(self.levels) + new_codes = list(self.codes) + new_names = list(self.names) + + for i in levnums: + new_levels.pop(i) + new_codes.pop(i) + new_names.pop(i) + + if len(new_levels) == 1: + + # set nan if needed + mask = new_codes[0] == -1 + result = new_levels[0].take(new_codes[0]) + if mask.any(): + result = result.putmask(mask, np.nan) + + result._name = new_names[0] + return result + else: + from pandas.core.indexes.multi import MultiIndex + + return MultiIndex( + levels=new_levels, + codes=new_codes, + names=new_names, + verify_integrity=False, + ) + + _index_shared_docs[ + "_get_grouper_for_level" + ] = """ + Get index grouper corresponding to an index level + + Parameters + ---------- + mapper: Group mapping function or None + Function mapping index values to groups + level : int or None + Index level + + Returns + ------- + grouper : Index + Index of values to group on. + labels : ndarray of int or None + Array of locations in level_index. + uniques : Index or None + Index of unique values for level. + """ + + @Appender(_index_shared_docs["_get_grouper_for_level"]) + def _get_grouper_for_level(self, mapper, level=None): + assert level is None or level == 0 + if mapper is None: + grouper = self + else: + grouper = self.map(mapper) + + return grouper, None, None + + # -------------------------------------------------------------------- + # Introspection Methods + + @property + def is_monotonic(self) -> bool: + """ + Alias for is_monotonic_increasing. + """ + return self.is_monotonic_increasing + + @property + def is_monotonic_increasing(self): + """ + Return if the index is monotonic increasing (only equal or + increasing) values. + + Examples + -------- + >>> Index([1, 2, 3]).is_monotonic_increasing + True + >>> Index([1, 2, 2]).is_monotonic_increasing + True + >>> Index([1, 3, 2]).is_monotonic_increasing + False + """ + return self._engine.is_monotonic_increasing + + @property + def is_monotonic_decreasing(self) -> bool: + """ + Return if the index is monotonic decreasing (only equal or + decreasing) values. + + Examples + -------- + >>> Index([3, 2, 1]).is_monotonic_decreasing + True + >>> Index([3, 2, 2]).is_monotonic_decreasing + True + >>> Index([3, 1, 2]).is_monotonic_decreasing + False + """ + return self._engine.is_monotonic_decreasing + + @property + def _is_strictly_monotonic_increasing(self) -> bool: + """ + Return if the index is strictly monotonic increasing + (only increasing) values. + + Examples + -------- + >>> Index([1, 2, 3])._is_strictly_monotonic_increasing + True + >>> Index([1, 2, 2])._is_strictly_monotonic_increasing + False + >>> Index([1, 3, 2])._is_strictly_monotonic_increasing + False + """ + return self.is_unique and self.is_monotonic_increasing + + @property + def _is_strictly_monotonic_decreasing(self) -> bool: + """ + Return if the index is strictly monotonic decreasing + (only decreasing) values. + + Examples + -------- + >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing + True + >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing + False + >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing + False + """ + return self.is_unique and self.is_monotonic_decreasing + + @cache_readonly + def is_unique(self) -> bool: + """ + Return if the index has unique values. + """ + return self._engine.is_unique + + @property + def has_duplicates(self) -> bool: + return not self.is_unique + + def is_boolean(self) -> bool: + return self.inferred_type in ["boolean"] + + def is_integer(self) -> bool: + return self.inferred_type in ["integer"] + + def is_floating(self) -> bool: + return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"] + + def is_numeric(self) -> bool: + return self.inferred_type in ["integer", "floating"] + + def is_object(self) -> bool: + return is_object_dtype(self.dtype) + + def is_categorical(self) -> bool: + """ + Check if the Index holds categorical data. + + Returns + ------- + boolean + True if the Index is categorical. + + See Also + -------- + CategoricalIndex : Index for categorical data. + + Examples + -------- + >>> idx = pd.Index(["Watermelon", "Orange", "Apple", + ... "Watermelon"]).astype("category") + >>> idx.is_categorical() + True + + >>> idx = pd.Index([1, 3, 5, 7]) + >>> idx.is_categorical() + False + + >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"]) + >>> s + 0 Peter + 1 Victor + 2 Elisabeth + 3 Mar + dtype: object + >>> s.index.is_categorical() + False + """ + return self.inferred_type in ["categorical"] + + def is_interval(self) -> bool: + return self.inferred_type in ["interval"] + + def is_mixed(self) -> bool: + return self.inferred_type in ["mixed"] + + def holds_integer(self): + """ + Whether the type is an integer type. + """ + return self.inferred_type in ["integer", "mixed-integer"] + + @cache_readonly + def inferred_type(self): + """ + Return a string of the type inferred from the values. + """ + return lib.infer_dtype(self, skipna=False) + + @cache_readonly + def is_all_dates(self) -> bool: + return is_datetime_array(ensure_object(self.values)) + + # -------------------------------------------------------------------- + # Pickle Methods + + def __reduce__(self): + d = dict(data=self._data) + d.update(self._get_attributes_dict()) + return _new_Index, (type(self), d), None + + # -------------------------------------------------------------------- + # Null Handling Methods + + _na_value = np.nan + """The expected NA value to use with this index.""" + + @cache_readonly + def _isnan(self): + """ + Return if each value is NaN. + """ + if self._can_hold_na: + return isna(self) + else: + # shouldn't reach to this condition by checking hasnans beforehand + values = np.empty(len(self), dtype=np.bool_) + values.fill(False) + return values + + @cache_readonly + def _nan_idxs(self): + if self._can_hold_na: + return self._isnan.nonzero()[0] + else: + return np.array([], dtype=np.int64) + + @cache_readonly + def hasnans(self): + """ + Return if I have any nans; enables various perf speedups. + """ + if self._can_hold_na: + return bool(self._isnan.any()) + else: + return False + + def isna(self): + """ + Detect missing values. + + Return a boolean same-sized object indicating if the values are NA. + NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get + mapped to ``True`` values. + Everything else get mapped to ``False`` values. Characters such as + empty strings `''` or :attr:`numpy.inf` are not considered NA values + (unless you set ``pandas.options.mode.use_inf_as_na = True``). + + Returns + ------- + numpy.ndarray + A boolean array of whether my values are NA. + + See Also + -------- + Index.notna : Boolean inverse of isna. + Index.dropna : Omit entries with missing values. + isna : Top-level isna. + Series.isna : Detect missing values in Series object. + + Examples + -------- + Show which entries in a pandas.Index are NA. The result is an + array. + + >>> idx = pd.Index([5.2, 6.0, np.NaN]) + >>> idx + Float64Index([5.2, 6.0, nan], dtype='float64') + >>> idx.isna() + array([False, False, True], dtype=bool) + + Empty strings are not considered NA values. None is considered an NA + value. + + >>> idx = pd.Index(['black', '', 'red', None]) + >>> idx + Index(['black', '', 'red', None], dtype='object') + >>> idx.isna() + array([False, False, False, True], dtype=bool) + + For datetimes, `NaT` (Not a Time) is considered as an NA value. + + >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'), + ... pd.Timestamp(''), None, pd.NaT]) + >>> idx + DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'], + dtype='datetime64[ns]', freq=None) + >>> idx.isna() + array([False, True, True, True], dtype=bool) + """ + return self._isnan + + isnull = isna + + def notna(self): + """ + Detect existing (non-missing) values. + + Return a boolean same-sized object indicating if the values are not NA. + Non-missing values get mapped to ``True``. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values + (unless you set ``pandas.options.mode.use_inf_as_na = True``). + NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False`` + values. + + Returns + ------- + numpy.ndarray + Boolean array to indicate which entries are not NA. + + See Also + -------- + Index.notnull : Alias of notna. + Index.isna: Inverse of notna. + notna : Top-level notna. + + Examples + -------- + Show which entries in an Index are not NA. The result is an + array. + + >>> idx = pd.Index([5.2, 6.0, np.NaN]) + >>> idx + Float64Index([5.2, 6.0, nan], dtype='float64') + >>> idx.notna() + array([ True, True, False]) + + Empty strings are not considered NA values. None is considered a NA + value. + + >>> idx = pd.Index(['black', '', 'red', None]) + >>> idx + Index(['black', '', 'red', None], dtype='object') + >>> idx.notna() + array([ True, True, True, False]) + """ + return ~self.isna() + + notnull = notna + + _index_shared_docs[ + "fillna" + ] = """ + Fill NA/NaN values with the specified value. + + Parameters + ---------- + value : scalar + Scalar value to use to fill holes (e.g. 0). + This value cannot be a list-likes. + downcast : dict, default is None + a dict of item->dtype of what to downcast if possible, + or the string 'infer' which will try to downcast to an appropriate + equal type (e.g. float64 to int64 if possible). + + Returns + ------- + filled : Index + """ + + @Appender(_index_shared_docs["fillna"]) + def fillna(self, value=None, downcast=None): + self._assert_can_do_op(value) + if self.hasnans: + result = self.putmask(self._isnan, value) + if downcast is None: + # no need to care metadata other than name + # because it can't have freq if + return Index(result, name=self.name) + return self._shallow_copy() + + _index_shared_docs[ + "dropna" + ] = """ + Return Index without NA/NaN values. + + Parameters + ---------- + how : {'any', 'all'}, default 'any' + If the Index is a MultiIndex, drop the value when any or all levels + are NaN. + + Returns + ------- + valid : Index + """ + + @Appender(_index_shared_docs["dropna"]) + def dropna(self, how="any"): + if how not in ("any", "all"): + raise ValueError(f"invalid how option: {how}") + + if self.hasnans: + return self._shallow_copy(self._values[~self._isnan]) + return self._shallow_copy() + + # -------------------------------------------------------------------- + # Uniqueness Methods + + _index_shared_docs[ + "index_unique" + ] = """ + Return unique values in the index. Uniques are returned in order + of appearance, this does NOT sort. + + Parameters + ---------- + level : int or str, optional, default None + Only return values from specified level (for MultiIndex). + + .. versionadded:: 0.23.0 + + Returns + ------- + Index without duplicates + + See Also + -------- + unique + Series.unique + """ + + @Appender(_index_shared_docs["index_unique"] % _index_doc_kwargs) + def unique(self, level=None): + if level is not None: + self._validate_index_level(level) + result = super().unique() + return self._shallow_copy(result) + + def drop_duplicates(self, keep="first"): + """ + Return Index with duplicate values removed. + + Parameters + ---------- + keep : {'first', 'last', ``False``}, default 'first' + - 'first' : Drop duplicates except for the first occurrence. + - 'last' : Drop duplicates except for the last occurrence. + - ``False`` : Drop all duplicates. + + Returns + ------- + deduplicated : Index + + See Also + -------- + Series.drop_duplicates : Equivalent method on Series. + DataFrame.drop_duplicates : Equivalent method on DataFrame. + Index.duplicated : Related method on Index, indicating duplicate + Index values. + + Examples + -------- + Generate an pandas.Index with duplicate values. + + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) + + The `keep` parameter controls which duplicate values are removed. + The value 'first' keeps the first occurrence for each + set of duplicated entries. The default value of keep is 'first'. + + >>> idx.drop_duplicates(keep='first') + Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object') + + The value 'last' keeps the last occurrence for each set of duplicated + entries. + + >>> idx.drop_duplicates(keep='last') + Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object') + + The value ``False`` discards all sets of duplicated entries. + + >>> idx.drop_duplicates(keep=False) + Index(['cow', 'beetle', 'hippo'], dtype='object') + """ + return super().drop_duplicates(keep=keep) + + def duplicated(self, keep="first"): + """ + Indicate duplicate index values. + + Duplicated values are indicated as ``True`` values in the resulting + array. Either all duplicates, all except the first, or all except the + last occurrence of duplicates can be indicated. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + The value or values in a set of duplicates to mark as missing. + + - 'first' : Mark duplicates as ``True`` except for the first + occurrence. + - 'last' : Mark duplicates as ``True`` except for the last + occurrence. + - ``False`` : Mark all duplicates as ``True``. + + Returns + ------- + numpy.ndarray + + See Also + -------- + Series.duplicated : Equivalent method on pandas.Series. + DataFrame.duplicated : Equivalent method on pandas.DataFrame. + Index.drop_duplicates : Remove duplicate values from Index. + + Examples + -------- + By default, for each set of duplicated values, the first occurrence is + set to False and all others to True: + + >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama']) + >>> idx.duplicated() + array([False, False, True, False, True]) + + which is equivalent to + + >>> idx.duplicated(keep='first') + array([False, False, True, False, True]) + + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True: + + >>> idx.duplicated(keep='last') + array([ True, False, True, False, False]) + + By setting keep on ``False``, all duplicates are True: + + >>> idx.duplicated(keep=False) + array([ True, False, True, False, True]) + """ + return super().duplicated(keep=keep) + + def _get_unique_index(self, dropna=False): + """ + Returns an index containing unique values. + + Parameters + ---------- + dropna : bool + If True, NaN values are dropped. + + Returns + ------- + uniques : index + """ + if self.is_unique and not dropna: + return self + + values = self.values + + if not self.is_unique: + values = self.unique() + + if dropna: + try: + if self.hasnans: + values = values[~isna(values)] + except NotImplementedError: + pass + + return self._shallow_copy(values) + + # -------------------------------------------------------------------- + # Arithmetic & Logical Methods + + def __add__(self, other): + if isinstance(other, (ABCSeries, ABCDataFrame)): + return NotImplemented + from pandas import Series + + return Index(Series(self) + other) + + def __radd__(self, other): + from pandas import Series + + return Index(other + Series(self)) + + def __iadd__(self, other): + # alias for __add__ + return self + other + + def __sub__(self, other): + return Index(np.array(self) - other) + + def __rsub__(self, other): + # wrap Series to ensure we pin name correctly + from pandas import Series + + return Index(other - Series(self)) + + def __and__(self, other): + return self.intersection(other) + + def __or__(self, other): + return self.union(other) + + def __xor__(self, other): + return self.symmetric_difference(other) + + def __nonzero__(self): + raise ValueError( + f"The truth value of a {type(self).__name__} is ambiguous. " + "Use a.empty, a.bool(), a.item(), a.any() or a.all()." + ) + + __bool__ = __nonzero__ + + # -------------------------------------------------------------------- + # Set Operation Methods + + def _get_reconciled_name_object(self, other): + """ + If the result of a set operation will be self, + return self, unless the name changes, in which + case make a shallow copy of self. + """ + name = get_op_result_name(self, other) + if self.name != name: + return self._shallow_copy(name=name) + return self + + def _union_incompatible_dtypes(self, other, sort): + """ + Casts this and other index to object dtype to allow the formation + of a union between incompatible types. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default False + Whether to sort the resulting index. + + * False : do not sort the result. + * None : sort the result, except when `self` and `other` are equal + or when the values cannot be compared. + + Returns + ------- + Index + """ + this = self.astype(object, copy=False) + # cast to Index for when `other` is list-like + other = Index(other).astype(object, copy=False) + return Index.union(this, other, sort=sort).astype(object, copy=False) + + def _is_compatible_with_other(self, other): + """ + Check whether this and the other dtype are compatible with each other. + Meaning a union can be formed between them without needing to be cast + to dtype object. + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + bool + """ + return type(self) is type(other) and is_dtype_equal(self.dtype, other.dtype) + + def _validate_sort_keyword(self, sort): + if sort not in [None, False]: + raise ValueError( + "The 'sort' keyword only takes the values of " + f"None or False; {sort} was passed." + ) + + def union(self, other, sort=None): + """ + Form the union of two Index objects. + + If the Index objects are incompatible, both Index objects will be + cast to dtype('object') first. + + .. versionchanged:: 0.25.0 + + Parameters + ---------- + other : Index or array-like + sort : bool or None, default None + Whether to sort the resulting Index. + + * None : Sort the result, except when + + 1. `self` and `other` are equal. + 2. `self` or `other` has length 0. + 3. Some values in `self` or `other` cannot be compared. + A RuntimeWarning is issued in this case. + + * False : do not sort the result. + + .. versionadded:: 0.24.0 + + .. versionchanged:: 0.24.1 + + Changed the default value from ``True`` to ``None`` + (without change in behaviour). + + Returns + ------- + union : Index + + Examples + -------- + + Union matching dtypes + + >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx2 = pd.Index([3, 4, 5, 6]) + >>> idx1.union(idx2) + Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') + + Union mismatched dtypes + + >>> idx1 = pd.Index(['a', 'b', 'c', 'd']) + >>> idx2 = pd.Index([1, 2, 3, 4]) + >>> idx1.union(idx2) + Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object') + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + + if not self._is_compatible_with_other(other): + return self._union_incompatible_dtypes(other, sort=sort) + + return self._union(other, sort=sort) + + def _union(self, other, sort): + """ + Specific union logic should go here. In subclasses, union behavior + should be overwritten here rather than in `self.union`. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default False + Whether to sort the resulting index. + + * False : do not sort the result. + * None : sort the result, except when `self` and `other` are equal + or when the values cannot be compared. + + Returns + ------- + Index + """ + + if not len(other) or self.equals(other): + return self._get_reconciled_name_object(other) + + if not len(self): + return other._get_reconciled_name_object(self) + + # TODO(EA): setops-refactor, clean all this up + if is_datetime64tz_dtype(self): + lvals = self._ndarray_values + else: + lvals = self._values + if is_datetime64tz_dtype(other): + rvals = other._ndarray_values + else: + rvals = other._values + + if sort is None and self.is_monotonic and other.is_monotonic: + try: + result = self._outer_indexer(lvals, rvals)[0] + except TypeError: + # incomparable objects + result = list(lvals) + + # worth making this faster? a very unusual case + value_set = set(lvals) + result.extend([x for x in rvals if x not in value_set]) + else: + # find indexes of things in "other" that are not in "self" + if self.is_unique: + indexer = self.get_indexer(other) + indexer = (indexer == -1).nonzero()[0] + else: + indexer = algos.unique1d(self.get_indexer_non_unique(other)[1]) + + if len(indexer) > 0: + other_diff = algos.take_nd(rvals, indexer, allow_fill=False) + result = concat_compat((lvals, other_diff)) + + else: + result = lvals + + if sort is None: + try: + result = algos.safe_sort(result) + except TypeError as err: + warnings.warn( + f"{err}, sort order is undefined for incomparable objects", + RuntimeWarning, + stacklevel=3, + ) + + # for subclasses + return self._wrap_setop_result(other, result) + + def _wrap_setop_result(self, other, result): + return self._constructor(result, name=get_op_result_name(self, other)) + + _index_shared_docs[ + "intersection" + ] = """ + Form the intersection of two Index objects. + + This returns a new Index with elements common to the index and `other`. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default False + Whether to sort the resulting index. + + * False : do not sort the result. + * None : sort the result, except when `self` and `other` are equal + or when the values cannot be compared. + + .. versionadded:: 0.24.0 + + .. versionchanged:: 0.24.1 + + Changed the default from ``True`` to ``False``, to match + the behaviour of 0.23.4 and earlier. + + Returns + ------- + intersection : Index + + Examples + -------- + + >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx2 = pd.Index([3, 4, 5, 6]) + >>> idx1.intersection(idx2) + Int64Index([3, 4], dtype='int64') + """ + + # TODO: standardize return type of non-union setops type(self vs other) + @Appender(_index_shared_docs["intersection"]) + def intersection(self, other, sort=False): + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other = ensure_index(other) + + if self.equals(other): + return self._get_reconciled_name_object(other) + + if not is_dtype_equal(self.dtype, other.dtype): + this = self.astype("O") + other = other.astype("O") + return this.intersection(other, sort=sort) + + # TODO(EA): setops-refactor, clean all this up + lvals = self._values + rvals = other._values + + if self.is_monotonic and other.is_monotonic: + try: + result = self._inner_indexer(lvals, rvals)[0] + return self._wrap_setop_result(other, result) + except TypeError: + pass + + try: + indexer = Index(rvals).get_indexer(lvals) + indexer = indexer.take((indexer != -1).nonzero()[0]) + except (InvalidIndexError, IncompatibleFrequency): + # InvalidIndexError raised by get_indexer if non-unique + # IncompatibleFrequency raised by PeriodIndex.get_indexer + indexer = algos.unique1d(Index(rvals).get_indexer_non_unique(lvals)[0]) + indexer = indexer[indexer != -1] + + taken = other.take(indexer) + res_name = get_op_result_name(self, other) + + if sort is None: + taken = algos.safe_sort(taken.values) + return self._shallow_copy(taken, name=res_name) + + taken.name = res_name + return taken + + def difference(self, other, sort=None): + """ + Return a new Index with elements from the index that are not in + `other`. + + This is the set difference of two Index objects. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default None + Whether to sort the resulting index. By default, the + values are attempted to be sorted, but any TypeError from + incomparable elements is caught by pandas. + + * None : Attempt to sort the result, but catch any TypeErrors + from comparing incomparable elements. + * False : Do not sort the result. + + .. versionadded:: 0.24.0 + + .. versionchanged:: 0.24.1 + + Changed the default value from ``True`` to ``None`` + (without change in behaviour). + + Returns + ------- + difference : Index + + Examples + -------- + + >>> idx1 = pd.Index([2, 1, 3, 4]) + >>> idx2 = pd.Index([3, 4, 5, 6]) + >>> idx1.difference(idx2) + Int64Index([1, 2], dtype='int64') + >>> idx1.difference(idx2, sort=False) + Int64Index([2, 1], dtype='int64') + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + + if self.equals(other): + # pass an empty np.ndarray with the appropriate dtype + return self._shallow_copy(self._data[:0]) + + other, result_name = self._convert_can_do_setop(other) + + this = self._get_unique_index() + + indexer = this.get_indexer(other) + indexer = indexer.take((indexer != -1).nonzero()[0]) + + label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) + the_diff = this.values.take(label_diff) + if sort is None: + try: + the_diff = algos.safe_sort(the_diff) + except TypeError: + pass + + return this._shallow_copy(the_diff, name=result_name) + + def symmetric_difference(self, other, result_name=None, sort=None): + """ + Compute the symmetric difference of two Index objects. + + Parameters + ---------- + other : Index or array-like + result_name : str + sort : False or None, default None + Whether to sort the resulting index. By default, the + values are attempted to be sorted, but any TypeError from + incomparable elements is caught by pandas. + + * None : Attempt to sort the result, but catch any TypeErrors + from comparing incomparable elements. + * False : Do not sort the result. + + .. versionadded:: 0.24.0 + + .. versionchanged:: 0.24.1 + + Changed the default value from ``True`` to ``None`` + (without change in behaviour). + + Returns + ------- + symmetric_difference : Index + + Notes + ----- + ``symmetric_difference`` contains elements that appear in either + ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by + ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates + dropped. + + Examples + -------- + >>> idx1 = pd.Index([1, 2, 3, 4]) + >>> idx2 = pd.Index([2, 3, 4, 5]) + >>> idx1.symmetric_difference(idx2) + Int64Index([1, 5], dtype='int64') + + You can also use the ``^`` operator: + + >>> idx1 ^ idx2 + Int64Index([1, 5], dtype='int64') + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_name_update = self._convert_can_do_setop(other) + if result_name is None: + result_name = result_name_update + + this = self._get_unique_index() + other = other._get_unique_index() + indexer = this.get_indexer(other) + + # {this} minus {other} + common_indexer = indexer.take((indexer != -1).nonzero()[0]) + left_indexer = np.setdiff1d( + np.arange(this.size), common_indexer, assume_unique=True + ) + left_diff = this._values.take(left_indexer) + + # {other} minus {this} + right_indexer = (indexer == -1).nonzero()[0] + right_diff = other._values.take(right_indexer) + + the_diff = concat_compat([left_diff, right_diff]) + if sort is None: + try: + the_diff = algos.safe_sort(the_diff) + except TypeError: + pass + + attribs = self._get_attributes_dict() + attribs["name"] = result_name + if "freq" in attribs: + attribs["freq"] = None + return self._shallow_copy_with_infer(the_diff, **attribs) + + def _assert_can_do_setop(self, other): + if not is_list_like(other): + raise TypeError("Input must be Index or array-like") + return True + + def _convert_can_do_setop(self, other): + if not isinstance(other, Index): + other = Index(other, name=self.name) + result_name = self.name + else: + result_name = get_op_result_name(self, other) + return other, result_name + + # -------------------------------------------------------------------- + # Indexing Methods + + _index_shared_docs[ + "get_loc" + ] = """ + Get integer location, slice or boolean mask for requested label. + + Parameters + ---------- + key : label + method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional + * default: exact matches only. + * pad / ffill: find the PREVIOUS index value if no exact match. + * backfill / bfill: use NEXT index value if no exact match + * nearest: use the NEAREST index value if no exact match. Tied + distances are broken by preferring the larger index value. + tolerance : int or float, optional + Maximum distance from index value for inexact matches. The value of + the index at the matching location most satisfy the equation + ``abs(index[loc] - key) <= tolerance``. + + .. versionadded:: 0.21.0 (list-like tolerance) + + Returns + ------- + loc : int if unique index, slice if monotonic index, else mask + + Examples + -------- + >>> unique_index = pd.Index(list('abc')) + >>> unique_index.get_loc('b') + 1 + + >>> monotonic_index = pd.Index(list('abbc')) + >>> monotonic_index.get_loc('b') + slice(1, 3, None) + + >>> non_monotonic_index = pd.Index(list('abcb')) + >>> non_monotonic_index.get_loc('b') + array([False, True, False, True], dtype=bool) + """ + + @Appender(_index_shared_docs["get_loc"]) + def get_loc(self, key, method=None, tolerance=None): + if method is None: + if tolerance is not None: + raise ValueError( + "tolerance argument only valid if using pad, " + "backfill or nearest lookups" + ) + try: + return self._engine.get_loc(key) + except KeyError: + return self._engine.get_loc(self._maybe_cast_indexer(key)) + indexer = self.get_indexer([key], method=method, tolerance=tolerance) + if indexer.ndim > 1 or indexer.size > 1: + raise TypeError("get_loc requires scalar valued input") + loc = indexer.item() + if loc == -1: + raise KeyError(key) + return loc + + _index_shared_docs[ + "get_indexer" + ] = """ + Compute indexer and mask for new index given the current index. The + indexer should be then used as an input to ndarray.take to align the + current data to the new index. + + Parameters + ---------- + target : %(target_klass)s + method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional + * default: exact matches only. + * pad / ffill: find the PREVIOUS index value if no exact match. + * backfill / bfill: use NEXT index value if no exact match + * nearest: use the NEAREST index value if no exact match. Tied + distances are broken by preferring the larger index value. + limit : int, optional + Maximum number of consecutive labels in ``target`` to match for + inexact matches. + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations most + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + .. versionadded:: 0.21.0 (list-like tolerance) + + Returns + ------- + indexer : ndarray of int + Integers from 0 to n - 1 indicating that the index at these + positions matches the corresponding target values. Missing values + in the target are marked by -1. + %(raises_section)s + Examples + -------- + >>> index = pd.Index(['c', 'a', 'b']) + >>> index.get_indexer(['a', 'b', 'x']) + array([ 1, 2, -1]) + + Notice that the return value is an array of locations in ``index`` + and ``x`` is marked by -1, as it is not in ``index``. + """ + + @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) + def get_indexer(self, target, method=None, limit=None, tolerance=None): + method = missing.clean_reindex_fill_method(method) + target = ensure_index(target) + if tolerance is not None: + tolerance = self._convert_tolerance(tolerance, target) + + # Treat boolean labels passed to a numeric index as not found. Without + # this fix False and True would be treated as 0 and 1 respectively. + # (GH #16877) + if target.is_boolean() and self.is_numeric(): + return ensure_platform_int(np.repeat(-1, target.size)) + + pself, ptarget = self._maybe_promote(target) + if pself is not self or ptarget is not target: + return pself.get_indexer( + ptarget, method=method, limit=limit, tolerance=tolerance + ) + + if not is_dtype_equal(self.dtype, target.dtype): + this = self.astype(object) + target = target.astype(object) + return this.get_indexer( + target, method=method, limit=limit, tolerance=tolerance + ) + + if not self.is_unique: + raise InvalidIndexError( + "Reindexing only valid with uniquely valued Index objects" + ) + + if method == "pad" or method == "backfill": + indexer = self._get_fill_indexer(target, method, limit, tolerance) + elif method == "nearest": + indexer = self._get_nearest_indexer(target, limit, tolerance) + else: + if tolerance is not None: + raise ValueError( + "tolerance argument only valid if doing pad, " + "backfill or nearest reindexing" + ) + if limit is not None: + raise ValueError( + "limit argument only valid if doing pad, " + "backfill or nearest reindexing" + ) + + indexer = self._engine.get_indexer(target._ndarray_values) + + return ensure_platform_int(indexer) + + def _convert_tolerance(self, tolerance, target): + # override this method on subclasses + tolerance = np.asarray(tolerance) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError("list-like tolerance size must match target index size") + return tolerance + + def _get_fill_indexer(self, target, method, limit=None, tolerance=None): + if self.is_monotonic_increasing and target.is_monotonic_increasing: + method = ( + self._engine.get_pad_indexer + if method == "pad" + else self._engine.get_backfill_indexer + ) + indexer = method(target._ndarray_values, limit) + else: + indexer = self._get_fill_indexer_searchsorted(target, method, limit) + if tolerance is not None: + indexer = self._filter_indexer_tolerance( + target._ndarray_values, indexer, tolerance + ) + return indexer + + def _get_fill_indexer_searchsorted(self, target, method, limit=None): + """ + Fallback pad/backfill get_indexer that works for monotonic decreasing + indexes and non-monotonic targets. + """ + if limit is not None: + raise ValueError( + f"limit argument for {repr(method)} method only well-defined " + "if index and target are monotonic" + ) + + side = "left" if method == "pad" else "right" + + # find exact matches first (this simplifies the algorithm) + indexer = self.get_indexer(target) + nonexact = indexer == -1 + indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side) + if side == "left": + # searchsorted returns "indices into a sorted array such that, + # if the corresponding elements in v were inserted before the + # indices, the order of a would be preserved". + # Thus, we need to subtract 1 to find values to the left. + indexer[nonexact] -= 1 + # This also mapped not found values (values of 0 from + # np.searchsorted) to -1, which conveniently is also our + # sentinel for missing values + else: + # Mark indices to the right of the largest value as not found + indexer[indexer == len(self)] = -1 + return indexer + + def _get_nearest_indexer(self, target, limit, tolerance): + """ + Get the indexer for the nearest index labels; requires an index with + values that can be subtracted from each other (e.g., not strings or + tuples). + """ + left_indexer = self.get_indexer(target, "pad", limit=limit) + right_indexer = self.get_indexer(target, "backfill", limit=limit) + + target_values = target._values + left_distances = np.abs(self._values[left_indexer] - target_values) + right_distances = np.abs(self._values[right_indexer] - target_values) + + op = operator.lt if self.is_monotonic_increasing else operator.le + indexer = np.where( + op(left_distances, right_distances) | (right_indexer == -1), + left_indexer, + right_indexer, + ) + if tolerance is not None: + indexer = self._filter_indexer_tolerance(target_values, indexer, tolerance) + return indexer + + def _filter_indexer_tolerance(self, target, indexer, tolerance): + distance = abs(self._values[indexer] - target) + indexer = np.where(distance <= tolerance, indexer, -1) + return indexer + + # -------------------------------------------------------------------- + # Indexer Conversion Methods + + _index_shared_docs[ + "_convert_scalar_indexer" + ] = """ + Convert a scalar indexer. + + Parameters + ---------- + key : label of the slice bound + kind : {'ix', 'loc', 'getitem', 'iloc'} or None + """ + + @Appender(_index_shared_docs["_convert_scalar_indexer"]) + def _convert_scalar_indexer(self, key, kind=None): + assert kind in ["ix", "loc", "getitem", "iloc", None] + + if kind == "iloc": + return self._validate_indexer("positional", key, kind) + + if len(self) and not isinstance(self, ABCMultiIndex): + + # we can raise here if we are definitive that this + # is positional indexing (eg. .ix on with a float) + # or label indexing if we are using a type able + # to be represented in the index + + if kind in ["getitem", "ix"] and is_float(key): + if not self.is_floating(): + return self._invalid_indexer("label", key) + + elif kind in ["loc"] and is_float(key): + + # we want to raise KeyError on string/mixed here + # technically we *could* raise a TypeError + # on anything but mixed though + if self.inferred_type not in [ + "floating", + "mixed-integer-float", + "integer-na", + "string", + "unicode", + "mixed", + ]: + self._invalid_indexer("label", key) + + elif kind in ["loc"] and is_integer(key): + if not self.holds_integer(): + self._invalid_indexer("label", key) + + return key + + _index_shared_docs[ + "_convert_slice_indexer" + ] = """ + Convert a slice indexer. + + By definition, these are labels unless 'iloc' is passed in. + Floats are not allowed as the start, step, or stop of the slice. + + Parameters + ---------- + key : label of the slice bound + kind : {'ix', 'loc', 'getitem', 'iloc'} or None + """ + + @Appender(_index_shared_docs["_convert_slice_indexer"]) + def _convert_slice_indexer(self, key: slice, kind=None): + assert kind in ["ix", "loc", "getitem", "iloc", None] + + # validate iloc + if kind == "iloc": + return slice( + self._validate_indexer("slice", key.start, kind), + self._validate_indexer("slice", key.stop, kind), + self._validate_indexer("slice", key.step, kind), + ) + + # potentially cast the bounds to integers + start, stop, step = key.start, key.stop, key.step + + # figure out if this is a positional indexer + def is_int(v): + return v is None or is_integer(v) + + is_null_slicer = start is None and stop is None + is_index_slice = is_int(start) and is_int(stop) + is_positional = is_index_slice and not ( + self.is_integer() or self.is_categorical() + ) + + if kind == "getitem": + """ + called from the getitem slicers, validate that we are in fact + integers + """ + if self.is_integer() or is_index_slice: + return slice( + self._validate_indexer("slice", key.start, kind), + self._validate_indexer("slice", key.stop, kind), + self._validate_indexer("slice", key.step, kind), + ) + + # convert the slice to an indexer here + + # if we are mixed and have integers + try: + if is_positional and self.is_mixed(): + # Validate start & stop + if start is not None: + self.get_loc(start) + if stop is not None: + self.get_loc(stop) + is_positional = False + except KeyError: + if self.inferred_type in ["mixed-integer-float", "integer-na"]: + raise + + if is_null_slicer: + indexer = key + elif is_positional: + indexer = key + else: + indexer = self.slice_indexer(start, stop, step, kind=kind) + + return indexer + + def _convert_listlike_indexer(self, keyarr, kind=None): + """ + Parameters + ---------- + keyarr : list-like + Indexer to convert. + + Returns + ------- + indexer : numpy.ndarray or None + Return an ndarray or None if cannot convert. + keyarr : numpy.ndarray + Return tuple-safe keys. + """ + if isinstance(keyarr, Index): + keyarr = self._convert_index_indexer(keyarr) + else: + keyarr = self._convert_arr_indexer(keyarr) + + indexer = self._convert_list_indexer(keyarr, kind=kind) + return indexer, keyarr + + _index_shared_docs[ + "_convert_arr_indexer" + ] = """ + Convert an array-like indexer to the appropriate dtype. + + Parameters + ---------- + keyarr : array-like + Indexer to convert. + + Returns + ------- + converted_keyarr : array-like + """ + + @Appender(_index_shared_docs["_convert_arr_indexer"]) + def _convert_arr_indexer(self, keyarr): + keyarr = com.asarray_tuplesafe(keyarr) + return keyarr + + _index_shared_docs[ + "_convert_index_indexer" + ] = """ + Convert an Index indexer to the appropriate dtype. + + Parameters + ---------- + keyarr : Index (or sub-class) + Indexer to convert. + + Returns + ------- + converted_keyarr : Index (or sub-class) + """ + + @Appender(_index_shared_docs["_convert_index_indexer"]) + def _convert_index_indexer(self, keyarr): + return keyarr + + _index_shared_docs[ + "_convert_list_indexer" + ] = """ + Convert a list-like indexer to the appropriate dtype. + + Parameters + ---------- + keyarr : Index (or sub-class) + Indexer to convert. + kind : iloc, ix, loc, optional + + Returns + ------- + positional indexer or None + """ + + @Appender(_index_shared_docs["_convert_list_indexer"]) + def _convert_list_indexer(self, keyarr, kind=None): + if ( + kind in [None, "iloc", "ix"] + and is_integer_dtype(keyarr) + and not self.is_floating() + and not isinstance(keyarr, ABCPeriodIndex) + ): + + if self.inferred_type == "mixed-integer": + indexer = self.get_indexer(keyarr) + if (indexer >= 0).all(): + return indexer + # missing values are flagged as -1 by get_indexer and negative + # indices are already converted to positive indices in the + # above if-statement, so the negative flags are changed to + # values outside the range of indices so as to trigger an + # IndexError in maybe_convert_indices + indexer[indexer < 0] = len(self) + + return maybe_convert_indices(indexer, len(self)) + + elif not self.inferred_type == "integer": + keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr) + return keyarr + + return None + + def _invalid_indexer(self, form, key): + """ + Consistent invalid indexer message. + """ + raise TypeError( + f"cannot do {form} indexing on {type(self)} with these " + f"indexers [{key}] of {type(key)}" + ) + + # -------------------------------------------------------------------- + # Reindex Methods + + def _can_reindex(self, indexer): + """ + Check if we are allowing reindexing with this particular indexer. + + Parameters + ---------- + indexer : an integer indexer + + Raises + ------ + ValueError if its a duplicate axis + """ + + # trying to reindex on an axis with duplicates + if not self.is_unique and len(indexer): + raise ValueError("cannot reindex from a duplicate axis") + + def reindex(self, target, method=None, level=None, limit=None, tolerance=None): + """ + Create index with target's values (move/add/delete values + as necessary). + + Parameters + ---------- + target : an iterable + + Returns + ------- + new_index : pd.Index + Resulting index. + indexer : np.ndarray or None + Indices of output values in original index. + """ + # GH6552: preserve names when reindexing to non-named target + # (i.e. neither Index nor Series). + preserve_names = not hasattr(target, "name") + + # GH7774: preserve dtype/tz if target is empty and not an Index. + target = _ensure_has_len(target) # target may be an iterator + + if not isinstance(target, Index) and len(target) == 0: + attrs = self._get_attributes_dict() + attrs.pop("freq", None) # don't preserve freq + values = self._data[:0] # appropriately-dtyped empty array + target = self._simple_new(values, dtype=self.dtype, **attrs) + else: + target = ensure_index(target) + + if level is not None: + if method is not None: + raise TypeError("Fill method not supported if level passed") + _, indexer, _ = self._join_level( + target, level, how="right", return_indexers=True + ) + else: + if self.equals(target): + indexer = None + else: + # check is_overlapping for IntervalIndex compat + if self.is_unique and not getattr(self, "is_overlapping", False): + indexer = self.get_indexer( + target, method=method, limit=limit, tolerance=tolerance + ) + else: + if method is not None or limit is not None: + raise ValueError( + "cannot reindex a non-unique index " + "with a method or limit" + ) + indexer, missing = self.get_indexer_non_unique(target) + + if preserve_names and target.nlevels == 1 and target.name != self.name: + target = target.copy() + target.name = self.name + + return target, indexer + + def _reindex_non_unique(self, target): + """ + Create a new index with target's values (move/add/delete values as + necessary) use with non-unique Index and a possibly non-unique target. + + Parameters + ---------- + target : an iterable + + Returns + ------- + new_index : pd.Index + Resulting index. + indexer : np.ndarray or None + Indices of output values in original index. + + """ + + target = ensure_index(target) + indexer, missing = self.get_indexer_non_unique(target) + check = indexer != -1 + new_labels = self.take(indexer[check]) + new_indexer = None + + if len(missing): + length = np.arange(len(indexer)) + + missing = ensure_platform_int(missing) + missing_labels = target.take(missing) + missing_indexer = ensure_int64(length[~check]) + cur_labels = self.take(indexer[check]).values + cur_indexer = ensure_int64(length[check]) + + new_labels = np.empty(tuple([len(indexer)]), dtype=object) + new_labels[cur_indexer] = cur_labels + new_labels[missing_indexer] = missing_labels + + # a unique indexer + if target.is_unique: + + # see GH5553, make sure we use the right indexer + new_indexer = np.arange(len(indexer)) + new_indexer[cur_indexer] = np.arange(len(cur_labels)) + new_indexer[missing_indexer] = -1 + + # we have a non_unique selector, need to use the original + # indexer here + else: + + # need to retake to have the same size as the indexer + indexer[~check] = -1 + + # reset the new indexer to account for the new size + new_indexer = np.arange(len(self.take(indexer))) + new_indexer[~check] = -1 + + new_index = self._shallow_copy_with_infer(new_labels) + return new_index, indexer, new_indexer + + # -------------------------------------------------------------------- + # Join Methods + + _index_shared_docs[ + "join" + ] = """ + Compute join_index and indexers to conform data + structures to the new index. + + Parameters + ---------- + other : Index + how : {'left', 'right', 'inner', 'outer'} + level : int or level name, default None + return_indexers : bool, default False + sort : bool, default False + Sort the join keys lexicographically in the result Index. If False, + the order of the join keys depends on the join type (how keyword). + + Returns + ------- + join_index, (left_indexer, right_indexer) + """ + + @Appender(_index_shared_docs["join"]) + def join(self, other, how="left", level=None, return_indexers=False, sort=False): + self_is_mi = isinstance(self, ABCMultiIndex) + other_is_mi = isinstance(other, ABCMultiIndex) + + # try to figure out the join level + # GH3662 + if level is None and (self_is_mi or other_is_mi): + + # have the same levels/names so a simple join + if self.names == other.names: + pass + else: + return self._join_multi(other, how=how, return_indexers=return_indexers) + + # join on the level + if level is not None and (self_is_mi or other_is_mi): + return self._join_level( + other, level, how=how, return_indexers=return_indexers + ) + + other = ensure_index(other) + + if len(other) == 0 and how in ("left", "outer"): + join_index = self._shallow_copy() + if return_indexers: + rindexer = np.repeat(-1, len(join_index)) + return join_index, None, rindexer + else: + return join_index + + if len(self) == 0 and how in ("right", "outer"): + join_index = other._shallow_copy() + if return_indexers: + lindexer = np.repeat(-1, len(join_index)) + return join_index, lindexer, None + else: + return join_index + + if self._join_precedence < other._join_precedence: + how = {"right": "left", "left": "right"}.get(how, how) + result = other.join( + self, how=how, level=level, return_indexers=return_indexers + ) + if return_indexers: + x, y, z = result + result = x, z, y + return result + + if not is_dtype_equal(self.dtype, other.dtype): + this = self.astype("O") + other = other.astype("O") + return this.join(other, how=how, return_indexers=return_indexers) + + _validate_join_method(how) + + if not self.is_unique and not other.is_unique: + return self._join_non_unique( + other, how=how, return_indexers=return_indexers + ) + elif not self.is_unique or not other.is_unique: + if self.is_monotonic and other.is_monotonic: + return self._join_monotonic( + other, how=how, return_indexers=return_indexers + ) + else: + return self._join_non_unique( + other, how=how, return_indexers=return_indexers + ) + elif self.is_monotonic and other.is_monotonic: + try: + return self._join_monotonic( + other, how=how, return_indexers=return_indexers + ) + except TypeError: + pass + + if how == "left": + join_index = self + elif how == "right": + join_index = other + elif how == "inner": + # TODO: sort=False here for backwards compat. It may + # be better to use the sort parameter passed into join + join_index = self.intersection(other, sort=False) + elif how == "outer": + # TODO: sort=True here for backwards compat. It may + # be better to use the sort parameter passed into join + join_index = self.union(other) + + if sort: + join_index = join_index.sort_values() + + if return_indexers: + if join_index is self: + lindexer = None + else: + lindexer = self.get_indexer(join_index) + if join_index is other: + rindexer = None + else: + rindexer = other.get_indexer(join_index) + return join_index, lindexer, rindexer + else: + return join_index + + def _join_multi(self, other, how, return_indexers=True): + from pandas.core.indexes.multi import MultiIndex + from pandas.core.reshape.merge import _restore_dropped_levels_multijoin + + # figure out join names + self_names = set(com.not_none(*self.names)) + other_names = set(com.not_none(*other.names)) + overlap = self_names & other_names + + # need at least 1 in common + if not overlap: + raise ValueError("cannot join with no overlapping index names") + + self_is_mi = isinstance(self, MultiIndex) + other_is_mi = isinstance(other, MultiIndex) + + if self_is_mi and other_is_mi: + + # Drop the non-matching levels from left and right respectively + ldrop_names = list(self_names - overlap) + rdrop_names = list(other_names - overlap) + + # if only the order differs + if not len(ldrop_names + rdrop_names): + self_jnlevels = self + other_jnlevels = other.reorder_levels(self.names) + else: + self_jnlevels = self.droplevel(ldrop_names) + other_jnlevels = other.droplevel(rdrop_names) + + # Join left and right + # Join on same leveled multi-index frames is supported + join_idx, lidx, ridx = self_jnlevels.join( + other_jnlevels, how, return_indexers=True + ) + + # Restore the dropped levels + # Returned index level order is + # common levels, ldrop_names, rdrop_names + dropped_names = ldrop_names + rdrop_names + + levels, codes, names = _restore_dropped_levels_multijoin( + self, other, dropped_names, join_idx, lidx, ridx + ) + + # Re-create the multi-index + multi_join_idx = MultiIndex( + levels=levels, codes=codes, names=names, verify_integrity=False + ) + + multi_join_idx = multi_join_idx.remove_unused_levels() + + return multi_join_idx, lidx, ridx + + jl = list(overlap)[0] + + # Case where only one index is multi + # make the indices into mi's that match + flip_order = False + if self_is_mi: + self, other = other, self + flip_order = True + # flip if join method is right or left + how = {"right": "left", "left": "right"}.get(how, how) + + level = other.names.index(jl) + result = self._join_level( + other, level, how=how, return_indexers=return_indexers + ) + + if flip_order: + if isinstance(result, tuple): + return result[0], result[2], result[1] + return result + + def _join_non_unique(self, other, how="left", return_indexers=False): + from pandas.core.reshape.merge import _get_join_indexers + + left_idx, right_idx = _get_join_indexers( + [self._ndarray_values], [other._ndarray_values], how=how, sort=True + ) + + left_idx = ensure_platform_int(left_idx) + right_idx = ensure_platform_int(right_idx) + + join_index = np.asarray(self._ndarray_values.take(left_idx)) + mask = left_idx == -1 + np.putmask(join_index, mask, other._ndarray_values.take(right_idx)) + + join_index = self._wrap_joined_index(join_index, other) + + if return_indexers: + return join_index, left_idx, right_idx + else: + return join_index + + def _join_level( + self, other, level, how="left", return_indexers=False, keep_order=True + ): + """ + The join method *only* affects the level of the resulting + MultiIndex. Otherwise it just exactly aligns the Index data to the + labels of the level in the MultiIndex. + + If ```keep_order == True```, the order of the data indexed by the + MultiIndex will not be changed; otherwise, it will tie out + with `other`. + """ + from pandas.core.indexes.multi import MultiIndex + + def _get_leaf_sorter(labels): + """ + Returns sorter for the inner most level while preserving the + order of higher levels. + """ + if labels[0].size == 0: + return np.empty(0, dtype="int64") + + if len(labels) == 1: + lab = ensure_int64(labels[0]) + sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max()) + return sorter + + # find indexers of beginning of each set of + # same-key labels w.r.t all but last level + tic = labels[0][:-1] != labels[0][1:] + for lab in labels[1:-1]: + tic |= lab[:-1] != lab[1:] + + starts = np.hstack(([True], tic, [True])).nonzero()[0] + lab = ensure_int64(labels[-1]) + return lib.get_level_sorter(lab, ensure_int64(starts)) + + if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): + raise TypeError("Join on level between two MultiIndex objects is ambiguous") + + left, right = self, other + + flip_order = not isinstance(self, MultiIndex) + if flip_order: + left, right = right, left + how = {"right": "left", "left": "right"}.get(how, how) + + level = left._get_level_number(level) + old_level = left.levels[level] + + if not right.is_unique: + raise NotImplementedError( + "Index._join_level on non-unique index is not implemented" + ) + + new_level, left_lev_indexer, right_lev_indexer = old_level.join( + right, how=how, return_indexers=True + ) + + if left_lev_indexer is None: + if keep_order or len(left) == 0: + left_indexer = None + join_index = left + else: # sort the leaves + left_indexer = _get_leaf_sorter(left.codes[: level + 1]) + join_index = left[left_indexer] + + else: + left_lev_indexer = ensure_int64(left_lev_indexer) + rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) + + new_lev_codes = algos.take_nd( + rev_indexer, left.codes[level], allow_fill=False + ) + + new_codes = list(left.codes) + new_codes[level] = new_lev_codes + + new_levels = list(left.levels) + new_levels[level] = new_level + + if keep_order: # just drop missing values. o.w. keep order + left_indexer = np.arange(len(left), dtype=np.intp) + mask = new_lev_codes != -1 + if not mask.all(): + new_codes = [lab[mask] for lab in new_codes] + left_indexer = left_indexer[mask] + + else: # tie out the order with other + if level == 0: # outer most level, take the fast route + ngroups = 1 + new_lev_codes.max() + left_indexer, counts = libalgos.groupsort_indexer( + new_lev_codes, ngroups + ) + + # missing values are placed first; drop them! + left_indexer = left_indexer[counts[0] :] + new_codes = [lab[left_indexer] for lab in new_codes] + + else: # sort the leaves + mask = new_lev_codes != -1 + mask_all = mask.all() + if not mask_all: + new_codes = [lab[mask] for lab in new_codes] + + left_indexer = _get_leaf_sorter(new_codes[: level + 1]) + new_codes = [lab[left_indexer] for lab in new_codes] + + # left_indexers are w.r.t masked frame. + # reverse to original frame! + if not mask_all: + left_indexer = mask.nonzero()[0][left_indexer] + + join_index = MultiIndex( + levels=new_levels, + codes=new_codes, + names=left.names, + verify_integrity=False, + ) + + if right_lev_indexer is not None: + right_indexer = algos.take_nd( + right_lev_indexer, join_index.codes[level], allow_fill=False + ) + else: + right_indexer = join_index.codes[level] + + if flip_order: + left_indexer, right_indexer = right_indexer, left_indexer + + if return_indexers: + left_indexer = ( + None if left_indexer is None else ensure_platform_int(left_indexer) + ) + right_indexer = ( + None if right_indexer is None else ensure_platform_int(right_indexer) + ) + return join_index, left_indexer, right_indexer + else: + return join_index + + def _join_monotonic(self, other, how="left", return_indexers=False): + if self.equals(other): + ret_index = other if how == "right" else self + if return_indexers: + return ret_index, None, None + else: + return ret_index + + sv = self._ndarray_values + ov = other._ndarray_values + + if self.is_unique and other.is_unique: + # We can perform much better than the general case + if how == "left": + join_index = self + lidx = None + ridx = self._left_indexer_unique(sv, ov) + elif how == "right": + join_index = other + lidx = self._left_indexer_unique(ov, sv) + ridx = None + elif how == "inner": + join_index, lidx, ridx = self._inner_indexer(sv, ov) + join_index = self._wrap_joined_index(join_index, other) + elif how == "outer": + join_index, lidx, ridx = self._outer_indexer(sv, ov) + join_index = self._wrap_joined_index(join_index, other) + else: + if how == "left": + join_index, lidx, ridx = self._left_indexer(sv, ov) + elif how == "right": + join_index, ridx, lidx = self._left_indexer(ov, sv) + elif how == "inner": + join_index, lidx, ridx = self._inner_indexer(sv, ov) + elif how == "outer": + join_index, lidx, ridx = self._outer_indexer(sv, ov) + join_index = self._wrap_joined_index(join_index, other) + + if return_indexers: + lidx = None if lidx is None else ensure_platform_int(lidx) + ridx = None if ridx is None else ensure_platform_int(ridx) + return join_index, lidx, ridx + else: + return join_index + + def _wrap_joined_index(self, joined, other): + name = get_op_result_name(self, other) + return Index(joined, name=name) + + # -------------------------------------------------------------------- + # Uncategorized Methods + + @property + def values(self): + """ + Return an array representing the data in the Index. + + .. warning:: + + We recommend using :attr:`Index.array` or + :meth:`Index.to_numpy`, depending on whether you need + a reference to the underlying data or a NumPy array. + + Returns + ------- + array: numpy.ndarray or ExtensionArray + + See Also + -------- + Index.array : Reference to the underlying data. + Index.to_numpy : A NumPy array representing the underlying data. + """ + return self._data.view(np.ndarray) + + @cache_readonly + @Appender(IndexOpsMixin.array.__doc__) # type: ignore + def array(self) -> ExtensionArray: + array = self._data + if isinstance(array, np.ndarray): + from pandas.core.arrays.numpy_ import PandasArray + + array = PandasArray(array) + return array + + @property + def _values(self) -> Union[ExtensionArray, ABCIndexClass, np.ndarray]: + # TODO(EA): remove index types as they become extension arrays + """ + The best array representation. + + This is an ndarray, ExtensionArray, or Index subclass. This differs + from ``_ndarray_values``, which always returns an ndarray. + + Both ``_values`` and ``_ndarray_values`` are consistent between + ``Series`` and ``Index``. + + It may differ from the public '.values' method. + + index | values | _values | _ndarray_values | + ----------------- | --------------- | ------------- | --------------- | + Index | ndarray | ndarray | ndarray | + CategoricalIndex | Categorical | Categorical | ndarray[int] | + DatetimeIndex | ndarray[M8ns] | ndarray[M8ns] | ndarray[M8ns] | + DatetimeIndex[tz] | ndarray[M8ns] | DTI[tz] | ndarray[M8ns] | + PeriodIndex | ndarray[object] | PeriodArray | ndarray[int] | + IntervalIndex | IntervalArray | IntervalArray | ndarray[object] | + + See Also + -------- + values + _ndarray_values + """ + return self._data + + def _internal_get_values(self): + """ + Return `Index` data as an `numpy.ndarray`. + + Returns + ------- + numpy.ndarray + A one-dimensional numpy array of the `Index` values. + + See Also + -------- + Index.values : The attribute that _internal_get_values wraps. + + Examples + -------- + Getting the `Index` values of a `DataFrame`: + + >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + ... index=['a', 'b', 'c'], columns=['A', 'B', 'C']) + >>> df + A B C + a 1 2 3 + b 4 5 6 + c 7 8 9 + >>> df.index._internal_get_values() + array(['a', 'b', 'c'], dtype=object) + + Standalone `Index` values: + + >>> idx = pd.Index(['1', '2', '3']) + >>> idx._internal_get_values() + array(['1', '2', '3'], dtype=object) + + `MultiIndex` arrays also have only one dimension: + + >>> midx = pd.MultiIndex.from_arrays([[1, 2, 3], ['a', 'b', 'c']], + ... names=('number', 'letter')) + >>> midx._internal_get_values() + array([(1, 'a'), (2, 'b'), (3, 'c')], dtype=object) + >>> midx._internal_get_values().ndim + 1 + """ + return self.values + + @Appender(IndexOpsMixin.memory_usage.__doc__) + def memory_usage(self, deep=False): + result = super().memory_usage(deep=deep) + + # include our engine hashtable + result += self._engine.sizeof(deep=deep) + return result + + _index_shared_docs[ + "where" + ] = """ + Return an Index of same shape as self and whose corresponding + entries are from self where cond is True and otherwise are from + other. + + Parameters + ---------- + cond : bool array-like with the same length as self + other : scalar, or array-like + + Returns + ------- + Index + """ + + @Appender(_index_shared_docs["where"]) + def where(self, cond, other=None): + if other is None: + other = self._na_value + + dtype = self.dtype + values = self.values + + if is_bool(other) or is_bool_dtype(other): + + # bools force casting + values = values.astype(object) + dtype = None + + values = np.where(cond, values, other) + + if self._is_numeric_dtype and np.any(isna(values)): + # We can't coerce to the numeric dtype of "self" (unless + # it's float) if there are NaN values in our output. + dtype = None + + return self._shallow_copy_with_infer(values, dtype=dtype) + + # construction helpers + @classmethod + def _scalar_data_error(cls, data): + # We return the TypeError so that we can raise it from the constructor + # in order to keep mypy happy + return TypeError( + f"{cls.__name__}(...) must be called with a collection of some " + f"kind, {repr(data)} was passed" + ) + + @classmethod + def _string_data_error(cls, data): + raise TypeError( + "String dtype not supported, you may need " + "to explicitly cast to a numeric type" + ) + + def _coerce_scalar_to_index(self, item): + """ + We need to coerce a scalar to a compat for our index type. + + Parameters + ---------- + item : scalar item to coerce + """ + dtype = self.dtype + + if self._is_numeric_dtype and isna(item): + # We can't coerce to the numeric dtype of "self" (unless + # it's float) if there are NaN values in our output. + dtype = None + + return Index([item], dtype=dtype, **self._get_attributes_dict()) + + def _to_safe_for_reshape(self): + """ + Convert to object if we are a categorical. + """ + return self + + def _convert_for_op(self, value): + """ + Convert value to be insertable to ndarray. + """ + return value + + def _assert_can_do_op(self, value): + """ + Check value is valid for scalar op. + """ + if not is_scalar(value): + raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") + + @property + def _has_complex_internals(self): + """ + Indicates if an index is not directly backed by a numpy array + """ + # used to avoid libreduction code paths, which raise or require conversion + return False + + def _is_memory_usage_qualified(self) -> bool: + """ + Return a boolean if we need a qualified .info display. + """ + return self.is_object() + + def is_type_compatible(self, kind) -> bool: + """ + Whether the index type is compatible with the provided type. + """ + return kind == self.inferred_type + + _index_shared_docs[ + "contains" + ] = """ + Return a boolean indicating whether the provided key is in the index. + + Parameters + ---------- + key : label + The key to check if it is present in the index. + + Returns + ------- + bool + Whether the key search is in the index. + + See Also + -------- + Index.isin : Returns an ndarray of boolean dtype indicating whether the + list-like key is in the index. + + Examples + -------- + >>> idx = pd.Index([1, 2, 3, 4]) + >>> idx + Int64Index([1, 2, 3, 4], dtype='int64') + + >>> 2 in idx + True + >>> 6 in idx + False + """ + + @Appender(_index_shared_docs["contains"] % _index_doc_kwargs) + def __contains__(self, key) -> bool: + hash(key) + try: + return key in self._engine + except (OverflowError, TypeError, ValueError): + return False + + def __hash__(self): + raise TypeError(f"unhashable type: {repr(type(self).__name__)}") + + def __setitem__(self, key, value): + raise TypeError("Index does not support mutable operations") + + def __getitem__(self, key): + """ + Override numpy.ndarray's __getitem__ method to work as desired. + + This function adds lists and Series as valid boolean indexers + (ndarrays only supports ndarray with dtype=bool). + + If resulting ndim != 1, plain ndarray is returned instead of + corresponding `Index` subclass. + + """ + # There's no custom logic to be implemented in __getslice__, so it's + # not overloaded intentionally. + getitem = self._data.__getitem__ + promote = self._shallow_copy + + if is_scalar(key): + key = com.cast_scalar_indexer(key) + return getitem(key) + + if isinstance(key, slice): + # This case is separated from the conditional above to avoid + # pessimization of basic indexing. + return promote(getitem(key)) + + if com.is_bool_indexer(key): + key = np.asarray(key, dtype=bool) + + key = com.values_from_object(key) + result = getitem(key) + if not is_scalar(result): + if np.ndim(result) > 1: + deprecate_ndim_indexing(result) + return result + return promote(result) + else: + return result + + def _can_hold_identifiers_and_holds_name(self, name) -> bool: + """ + Faster check for ``name in self`` when we know `name` is a Python + identifier (e.g. in NDFrame.__getattr__, which hits this to support + . key lookup). For indexes that can't hold identifiers (everything + but object & categorical) we just return False. + + https://github.com/pandas-dev/pandas/issues/19764 + """ + if self.is_object() or self.is_categorical(): + return name in self + return False + + def append(self, other): + """ + Append a collection of Index options together. + + Parameters + ---------- + other : Index or list/tuple of indices + + Returns + ------- + appended : Index + """ + + to_concat = [self] + + if isinstance(other, (list, tuple)): + to_concat = to_concat + list(other) + else: + to_concat.append(other) + + for obj in to_concat: + if not isinstance(obj, Index): + raise TypeError("all inputs must be Index") + + names = {obj.name for obj in to_concat} + name = None if len(names) > 1 else self.name + + return self._concat(to_concat, name) + + def _concat(self, to_concat, name): + + typs = _concat.get_dtype_kinds(to_concat) + + if len(typs) == 1: + return self._concat_same_dtype(to_concat, name=name) + return Index._concat_same_dtype(self, to_concat, name=name) + + def _concat_same_dtype(self, to_concat, name): + """ + Concatenate to_concat which has the same class. + """ + # must be overridden in specific classes + klasses = ( + ABCDatetimeIndex, + ABCTimedeltaIndex, + ABCPeriodIndex, + ExtensionArray, + ABCIntervalIndex, + ) + to_concat = [ + x.astype(object) if isinstance(x, klasses) else x for x in to_concat + ] + + self = to_concat[0] + attribs = self._get_attributes_dict() + attribs["name"] = name + + to_concat = [x._values if isinstance(x, Index) else x for x in to_concat] + + return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs) + + def putmask(self, mask, value): + """ + Return a new Index of the values set with the mask. + + Returns + ------- + Index + + See Also + -------- + numpy.ndarray.putmask + """ + values = self.values.copy() + try: + np.putmask(values, mask, self._convert_for_op(value)) + return self._shallow_copy(values) + except (ValueError, TypeError) as err: + if is_object_dtype(self): + raise err + + # coerces to object + return self.astype(object).putmask(mask, value) + + def equals(self, other) -> bool: + """ + Determine if two Index objects contain the same elements. + + Returns + ------- + bool + True if "other" is an Index and it has the same elements as calling + index; False otherwise. + """ + if self.is_(other): + return True + + if not isinstance(other, Index): + return False + + if is_object_dtype(self) and not is_object_dtype(other): + # if other is not object, use other's logic for coercion + return other.equals(self) + + if isinstance(other, ABCMultiIndex): + # d-level MultiIndex can equal d-tuple Index + if not is_object_dtype(self.dtype): + if self.nlevels != other.nlevels: + return False + + return array_equivalent( + com.values_from_object(self), com.values_from_object(other) + ) + + def identical(self, other) -> bool: + """ + Similar to equals, but check that other comparable attributes are + also equal. + + Returns + ------- + bool + If two Index objects have equal elements and same type True, + otherwise False. + """ + return ( + self.equals(other) + and all( + ( + getattr(self, c, None) == getattr(other, c, None) + for c in self._comparables + ) + ) + and type(self) == type(other) + ) + + def asof(self, label): + """ + Return the label from the index, or, if not present, the previous one. + + Assuming that the index is sorted, return the passed index label if it + is in the index, or return the previous index label if the passed one + is not in the index. + + Parameters + ---------- + label : object + The label up to which the method returns the latest index label. + + Returns + ------- + object + The passed label if it is in the index. The previous label if the + passed label is not in the sorted index or `NaN` if there is no + such label. + + See Also + -------- + Series.asof : Return the latest value in a Series up to the + passed index. + merge_asof : Perform an asof merge (similar to left join but it + matches on nearest key rather than equal key). + Index.get_loc : An `asof` is a thin wrapper around `get_loc` + with method='pad'. + + Examples + -------- + `Index.asof` returns the latest index label up to the passed label. + + >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03']) + >>> idx.asof('2014-01-01') + '2013-12-31' + + If the label is in the index, the method returns the passed label. + + >>> idx.asof('2014-01-02') + '2014-01-02' + + If all of the labels in the index are later than the passed label, + NaN is returned. + + >>> idx.asof('1999-01-02') + nan + + If the index is not sorted, an error is raised. + + >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02', + ... '2014-01-03']) + >>> idx_not_sorted.asof('2013-12-31') + Traceback (most recent call last): + ValueError: index must be monotonic increasing or decreasing + """ + try: + loc = self.get_loc(label, method="pad") + except KeyError: + return self._na_value + else: + if isinstance(loc, slice): + loc = loc.indices(len(self))[-1] + return self[loc] + + def asof_locs(self, where, mask): + """ + Find the locations (indices) of the labels from the index for + every entry in the `where` argument. + + As in the `asof` function, if the label (a particular entry in + `where`) is not in the index, the latest index label up to the + passed label is chosen and its index returned. + + If all of the labels in the index are later than a label in `where`, + -1 is returned. + + `mask` is used to ignore NA values in the index during calculation. + + Parameters + ---------- + where : Index + An Index consisting of an array of timestamps. + mask : array-like + Array of booleans denoting where values in the original + data are not NA. + + Returns + ------- + numpy.ndarray + An array of locations (indices) of the labels from the Index + which correspond to the return values of the `asof` function + for every element in `where`. + """ + locs = self.values[mask].searchsorted(where.values, side="right") + locs = np.where(locs > 0, locs - 1, 0) + + result = np.arange(len(self))[mask].take(locs) + + first = mask.argmax() + result[(locs == 0) & (where.values < self.values[first])] = -1 + + return result + + def sort_values(self, return_indexer=False, ascending=True): + """ + Return a sorted copy of the index. + + Return a sorted copy of the index, and optionally return the indices + that sorted the index itself. + + Parameters + ---------- + return_indexer : bool, default False + Should the indices that would sort the index be returned. + ascending : bool, default True + Should the index values be sorted in an ascending order. + + Returns + ------- + sorted_index : pandas.Index + Sorted copy of the index. + indexer : numpy.ndarray, optional + The indices that the index itself was sorted by. + + See Also + -------- + Series.sort_values : Sort values of a Series. + DataFrame.sort_values : Sort values in a DataFrame. + + Examples + -------- + >>> idx = pd.Index([10, 100, 1, 1000]) + >>> idx + Int64Index([10, 100, 1, 1000], dtype='int64') + + Sort values in ascending order (default behavior). + + >>> idx.sort_values() + Int64Index([1, 10, 100, 1000], dtype='int64') + + Sort values in descending order, and also get the indices `idx` was + sorted by. + + >>> idx.sort_values(ascending=False, return_indexer=True) + (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) + """ + _as = self.argsort() + if not ascending: + _as = _as[::-1] + + sorted_index = self.take(_as) + + if return_indexer: + return sorted_index, _as + else: + return sorted_index + + def sort(self, *args, **kwargs): + """ + Use sort_values instead. + """ + raise TypeError("cannot sort an Index object in-place, use sort_values instead") + + def shift(self, periods=1, freq=None): + """ + Shift index by desired number of time frequency increments. + + This method is for shifting the values of datetime-like indexes + by a specified time increment a given number of times. + + Parameters + ---------- + periods : int, default 1 + Number of periods (or increments) to shift by, + can be positive or negative. + freq : pandas.DateOffset, pandas.Timedelta or str, optional + Frequency increment to shift by. + If None, the index is shifted by its own `freq` attribute. + Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. + + Returns + ------- + pandas.Index + Shifted index. + + See Also + -------- + Series.shift : Shift values of Series. + + Notes + ----- + This method is only implemented for datetime-like index classes, + i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex. + + Examples + -------- + Put the first 5 month starts of 2011 into an index. + + >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS') + >>> month_starts + DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01', + '2011-05-01'], + dtype='datetime64[ns]', freq='MS') + + Shift the index by 10 days. + + >>> month_starts.shift(10, freq='D') + DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11', + '2011-05-11'], + dtype='datetime64[ns]', freq=None) + + The default value of `freq` is the `freq` attribute of the index, + which is 'MS' (month start) in this example. + + >>> month_starts.shift(10) + DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01', + '2012-03-01'], + dtype='datetime64[ns]', freq='MS') + """ + raise NotImplementedError(f"Not supported for type {type(self).__name__}") + + def argsort(self, *args, **kwargs): + """ + Return the integer indices that would sort the index. + + Parameters + ---------- + *args + Passed to `numpy.ndarray.argsort`. + **kwargs + Passed to `numpy.ndarray.argsort`. + + Returns + ------- + numpy.ndarray + Integer indices that would sort the index if used as + an indexer. + + See Also + -------- + numpy.argsort : Similar method for NumPy arrays. + Index.sort_values : Return sorted copy of Index. + + Examples + -------- + >>> idx = pd.Index(['b', 'a', 'd', 'c']) + >>> idx + Index(['b', 'a', 'd', 'c'], dtype='object') + + >>> order = idx.argsort() + >>> order + array([1, 0, 3, 2]) + + >>> idx[order] + Index(['a', 'b', 'c', 'd'], dtype='object') + """ + result = self.asi8 + if result is None: + result = np.array(self) + return result.argsort(*args, **kwargs) + + _index_shared_docs[ + "get_value" + ] = """ + Fast lookup of value from 1-dimensional ndarray. Only use this if you + know what you're doing. + + Returns + ------- + scalar + A value in the Series with the index of the key value in self. + """ + + @Appender(_index_shared_docs["get_value"] % _index_doc_kwargs) + def get_value(self, series, key): + + # if we have something that is Index-like, then + # use this, e.g. DatetimeIndex + # Things like `Series._get_value` (via .at) pass the EA directly here. + s = extract_array(series, extract_numpy=True) + if isinstance(s, ExtensionArray): + if is_scalar(key): + # GH 20882, 21257 + # First try to convert the key to a location + # If that fails, raise a KeyError if an integer + # index, otherwise, see if key is an integer, and + # try that + try: + iloc = self.get_loc(key) + return s[iloc] + except KeyError: + if len(self) > 0 and (self.holds_integer() or self.is_boolean()): + raise + elif is_integer(key): + return s[key] + else: + # if key is not a scalar, directly raise an error (the code below + # would convert to numpy arrays and raise later any way) - GH29926 + raise InvalidIndexError(key) + + s = com.values_from_object(series) + k = com.values_from_object(key) + + k = self._convert_scalar_indexer(k, kind="getitem") + try: + return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None)) + except KeyError as e1: + if len(self) > 0 and (self.holds_integer() or self.is_boolean()): + raise + + try: + return libindex.get_value_at(s, key) + except IndexError: + raise + except TypeError: + # generator/iterator-like + if is_iterator(key): + raise InvalidIndexError(key) + else: + raise e1 + except Exception: + raise e1 + except TypeError: + # e.g. "[False] is an invalid key" + if is_scalar(key): + raise IndexError(key) + raise InvalidIndexError(key) + + def set_value(self, arr, key, value): + """ + Fast lookup of value from 1-dimensional ndarray. + + .. deprecated:: 1.0 + + Notes + ----- + Only use this if you know what you're doing. + """ + warnings.warn( + ( + "The 'set_value' method is deprecated, and " + "will be removed in a future version." + ), + FutureWarning, + stacklevel=2, + ) + self._engine.set_value( + com.values_from_object(arr), com.values_from_object(key), value + ) + + _index_shared_docs[ + "get_indexer_non_unique" + ] = """ + Compute indexer and mask for new index given the current index. The + indexer should be then used as an input to ndarray.take to align the + current data to the new index. + + Parameters + ---------- + target : %(target_klass)s + + Returns + ------- + indexer : ndarray of int + Integers from 0 to n - 1 indicating that the index at these + positions matches the corresponding target values. Missing values + in the target are marked by -1. + missing : ndarray of int + An indexer into the target of the values not found. + These correspond to the -1 in the indexer array. + """ + + @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) + def get_indexer_non_unique(self, target): + target = ensure_index(target) + pself, ptarget = self._maybe_promote(target) + if pself is not self or ptarget is not target: + return pself.get_indexer_non_unique(ptarget) + + if is_categorical(target): + tgt_values = np.asarray(target) + elif self.is_all_dates and target.is_all_dates: # GH 30399 + tgt_values = target.asi8 + else: + tgt_values = target._ndarray_values + + indexer, missing = self._engine.get_indexer_non_unique(tgt_values) + return ensure_platform_int(indexer), missing + + def get_indexer_for(self, target, **kwargs): + """ + Guaranteed return of an indexer even when non-unique. + + This dispatches to get_indexer or get_indexer_non_unique + as appropriate. + + Returns + ------- + numpy.ndarray + List of indices. + """ + if self.is_unique: + return self.get_indexer(target, **kwargs) + indexer, _ = self.get_indexer_non_unique(target, **kwargs) + return indexer + + def _maybe_promote(self, other): + # A hack, but it works + + if self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex): + return type(other)(self), other + elif self.inferred_type == "boolean": + if not is_object_dtype(self.dtype): + return self.astype("object"), other.astype("object") + return self, other + + def groupby(self, values) -> Dict[Hashable, np.ndarray]: + """ + Group the index labels by a given array of values. + + Parameters + ---------- + values : array + Values used to determine the groups. + + Returns + ------- + dict + {group name -> group labels} + """ + + # TODO: if we are a MultiIndex, we can do better + # that converting to tuples + if isinstance(values, ABCMultiIndex): + values = values.values + values = ensure_categorical(values) + result = values._reverse_indexer() + + # map to the label + result = {k: self.take(v) for k, v in result.items()} + + return result + + def map(self, mapper, na_action=None): + """ + Map values using input correspondence (a dict, Series, or function). + + Parameters + ---------- + mapper : function, dict, or Series + Mapping correspondence. + na_action : {None, 'ignore'} + If 'ignore', propagate NA values, without passing them to the + mapping correspondence. + + Returns + ------- + applied : Union[Index, MultiIndex], inferred + The output of the mapping function applied to the index. + If the function returns a tuple with more than one element + a MultiIndex will be returned. + """ + + from pandas.core.indexes.multi import MultiIndex + + new_values = super()._map_values(mapper, na_action=na_action) + + attributes = self._get_attributes_dict() + + # we can return a MultiIndex + if new_values.size and isinstance(new_values[0], tuple): + if isinstance(self, MultiIndex): + names = self.names + elif attributes.get("name"): + names = [attributes.get("name")] * len(new_values[0]) + else: + names = None + return MultiIndex.from_tuples(new_values, names=names) + + attributes["copy"] = False + if not new_values.size: + # empty + attributes["dtype"] = self.dtype + + return Index(new_values, **attributes) + + def isin(self, values, level=None): + """ + Return a boolean array where the index values are in `values`. + + Compute boolean array of whether each index value is found in the + passed set of values. The length of the returned boolean array matches + the length of the index. + + Parameters + ---------- + values : set or list-like + Sought values. + level : str or int, optional + Name or position of the index level to use (if the index is a + `MultiIndex`). + + Returns + ------- + is_contained : ndarray + NumPy array of boolean values. + + See Also + -------- + Series.isin : Same for Series. + DataFrame.isin : Same method for DataFrames. + + Notes + ----- + In the case of `MultiIndex` you must either specify `values` as a + list-like object containing tuples that are the same length as the + number of levels, or specify `level`. Otherwise it will raise a + ``ValueError``. + + If `level` is specified: + + - if it is the name of one *and only one* index level, use that level; + - otherwise it should be a number indicating level position. + + Examples + -------- + >>> idx = pd.Index([1,2,3]) + >>> idx + Int64Index([1, 2, 3], dtype='int64') + + Check whether each index value in a list of values. + >>> idx.isin([1, 4]) + array([ True, False, False]) + + >>> midx = pd.MultiIndex.from_arrays([[1,2,3], + ... ['red', 'blue', 'green']], + ... names=('number', 'color')) + >>> midx + MultiIndex(levels=[[1, 2, 3], ['blue', 'green', 'red']], + codes=[[0, 1, 2], [2, 0, 1]], + names=['number', 'color']) + + Check whether the strings in the 'color' level of the MultiIndex + are in a list of colors. + + >>> midx.isin(['red', 'orange', 'yellow'], level='color') + array([ True, False, False]) + + To check across the levels of a MultiIndex, pass a list of tuples: + + >>> midx.isin([(1, 'red'), (3, 'red')]) + array([ True, False, False]) + + For a DatetimeIndex, string values in `values` are converted to + Timestamps. + + >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13'] + >>> dti = pd.to_datetime(dates) + >>> dti + DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'], + dtype='datetime64[ns]', freq=None) + + >>> dti.isin(['2000-03-11']) + array([ True, False, False]) + """ + if level is not None: + self._validate_index_level(level) + return algos.isin(self, values) + + def _get_string_slice(self, key, use_lhs=True, use_rhs=True): + # this is for partial string indexing, + # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex + raise NotImplementedError + + def slice_indexer(self, start=None, end=None, step=None, kind=None): + """ + For an ordered or unique index, compute the slice indexer for input + labels and step. + + Parameters + ---------- + start : label, default None + If None, defaults to the beginning. + end : label, default None + If None, defaults to the end. + step : int, default None + kind : str, default None + + Returns + ------- + indexer : slice + + Raises + ------ + KeyError : If key does not exist, or key is not unique and index is + not ordered. + + Notes + ----- + This function assumes that the data is sorted, so use at your own peril + + Examples + -------- + This is a method on all index types. For example you can do: + + >>> idx = pd.Index(list('abcd')) + >>> idx.slice_indexer(start='b', end='c') + slice(1, 3) + + >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')]) + >>> idx.slice_indexer(start='b', end=('c', 'g')) + slice(1, 3) + """ + start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind) + + # return a slice + if not is_scalar(start_slice): + raise AssertionError("Start slice bound is non-scalar") + if not is_scalar(end_slice): + raise AssertionError("End slice bound is non-scalar") + + return slice(start_slice, end_slice, step) + + def _maybe_cast_indexer(self, key): + """ + If we have a float key and are not a floating index, then try to cast + to an int if equivalent. + """ + + if is_float(key) and not self.is_floating(): + try: + ckey = int(key) + if ckey == key: + key = ckey + except (OverflowError, ValueError, TypeError): + pass + return key + + def _validate_indexer(self, form, key, kind): + """ + If we are positional indexer, validate that we have appropriate + typed bounds must be an integer. + """ + assert kind in ["ix", "loc", "getitem", "iloc"] + + if key is None: + pass + elif is_integer(key): + pass + elif kind in ["iloc", "getitem"]: + self._invalid_indexer(form, key) + return key + + _index_shared_docs[ + "_maybe_cast_slice_bound" + ] = """ + This function should be overloaded in subclasses that allow non-trivial + casting on label-slice bounds, e.g. datetime-like indices allowing + strings containing formatted datetimes. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : {'ix', 'loc', 'getitem'} + + Returns + ------- + label : object + + Notes + ----- + Value of `side` parameter should be validated in caller. + """ + + @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) + def _maybe_cast_slice_bound(self, label, side, kind): + assert kind in ["ix", "loc", "getitem", None] + + # We are a plain index here (sub-class override this method if they + # wish to have special treatment for floats/ints, e.g. Float64Index and + # datetimelike Indexes + # reject them + if is_float(label): + if not (kind in ["ix"] and (self.holds_integer() or self.is_floating())): + self._invalid_indexer("slice", label) + + # we are trying to find integer bounds on a non-integer based index + # this is rejected (generally .loc gets you here) + elif is_integer(label): + self._invalid_indexer("slice", label) + + return label + + def _searchsorted_monotonic(self, label, side="left"): + if self.is_monotonic_increasing: + return self.searchsorted(label, side=side) + elif self.is_monotonic_decreasing: + # np.searchsorted expects ascending sort order, have to reverse + # everything for it to work (element ordering, search side and + # resulting value). + pos = self[::-1].searchsorted( + label, side="right" if side == "left" else "left" + ) + return len(self) - pos + + raise ValueError("index must be monotonic increasing or decreasing") + + def get_slice_bound(self, label, side, kind): + """ + Calculate slice bound that corresponds to given label. + + Returns leftmost (one-past-the-rightmost if ``side=='right'``) position + of given label. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : {'ix', 'loc', 'getitem'} + + Returns + ------- + int + Index of label. + """ + assert kind in ["ix", "loc", "getitem", None] + + if side not in ("left", "right"): + raise ValueError( + f"Invalid value for side kwarg, must be either" + f" 'left' or 'right': {side}" + ) + + original_label = label + + # For datetime indices label may be a string that has to be converted + # to datetime boundary according to its resolution. + label = self._maybe_cast_slice_bound(label, side, kind) + + # we need to look up the label + try: + slc = self.get_loc(label) + except KeyError as err: + try: + return self._searchsorted_monotonic(label, side) + except ValueError: + # raise the original KeyError + raise err + + if isinstance(slc, np.ndarray): + # get_loc may return a boolean array or an array of indices, which + # is OK as long as they are representable by a slice. + if is_bool_dtype(slc): + slc = lib.maybe_booleans_to_slice(slc.view("u1")) + else: + slc = lib.maybe_indices_to_slice(slc.astype("i8"), len(self)) + if isinstance(slc, np.ndarray): + raise KeyError( + f"Cannot get {side} slice bound for non-unique " + f"label: {repr(original_label)}" + ) + + if isinstance(slc, slice): + if side == "left": + return slc.start + else: + return slc.stop + else: + if side == "right": + return slc + 1 + else: + return slc + + def slice_locs(self, start=None, end=None, step=None, kind=None): + """ + Compute slice locations for input labels. + + Parameters + ---------- + start : label, default None + If None, defaults to the beginning. + end : label, default None + If None, defaults to the end. + step : int, defaults None + If None, defaults to 1. + kind : {'ix', 'loc', 'getitem'} or None + + Returns + ------- + start, end : int + + See Also + -------- + Index.get_loc : Get location for a single label. + + Notes + ----- + This method only works if the index is monotonic or unique. + + Examples + -------- + >>> idx = pd.Index(list('abcd')) + >>> idx.slice_locs(start='b', end='c') + (1, 3) + """ + inc = step is None or step >= 0 + + if not inc: + # If it's a reverse slice, temporarily swap bounds. + start, end = end, start + + # GH 16785: If start and end happen to be date strings with UTC offsets + # attempt to parse and check that the offsets are the same + if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)): + try: + ts_start = Timestamp(start) + ts_end = Timestamp(end) + except (ValueError, TypeError): + pass + else: + if not tz_compare(ts_start.tzinfo, ts_end.tzinfo): + raise ValueError("Both dates must have the same UTC offset") + + start_slice = None + if start is not None: + start_slice = self.get_slice_bound(start, "left", kind) + if start_slice is None: + start_slice = 0 + + end_slice = None + if end is not None: + end_slice = self.get_slice_bound(end, "right", kind) + if end_slice is None: + end_slice = len(self) + + if not inc: + # Bounds at this moment are swapped, swap them back and shift by 1. + # + # slice_locs('B', 'A', step=-1): s='B', e='A' + # + # s='A' e='B' + # AFTER SWAP: | | + # v ------------------> V + # ----------------------------------- + # | | |A|A|A|A| | | | | |B|B| | | | | + # ----------------------------------- + # ^ <------------------ ^ + # SHOULD BE: | | + # end=s-1 start=e-1 + # + end_slice, start_slice = start_slice - 1, end_slice - 1 + + # i == -1 triggers ``len(self) + i`` selection that points to the + # last element, not before-the-first one, subtracting len(self) + # compensates that. + if end_slice == -1: + end_slice -= len(self) + if start_slice == -1: + start_slice -= len(self) + + return start_slice, end_slice + + def delete(self, loc): + """ + Make new Index with passed location(-s) deleted. + + Returns + ------- + new_index : Index + """ + return self._shallow_copy(np.delete(self._data, loc)) + + def insert(self, loc, item): + """ + Make new Index inserting new item at location. + + Follows Python list.append semantics for negative values. + + Parameters + ---------- + loc : int + item : object + + Returns + ------- + new_index : Index + """ + _self = np.asarray(self) + item = self._coerce_scalar_to_index(item)._ndarray_values + idx = np.concatenate((_self[:loc], item, _self[loc:])) + return self._shallow_copy_with_infer(idx) + + def drop(self, labels, errors="raise"): + """ + Make new Index with passed list of labels deleted. + + Parameters + ---------- + labels : array-like + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and existing labels are dropped. + + Returns + ------- + dropped : Index + + Raises + ------ + KeyError + If not all of the labels are found in the selected axis + """ + arr_dtype = "object" if self.dtype == "object" else None + labels = com.index_labels_to_array(labels, dtype=arr_dtype) + indexer = self.get_indexer(labels) + mask = indexer == -1 + if mask.any(): + if errors != "ignore": + raise KeyError(f"{labels[mask]} not found in axis") + indexer = indexer[~mask] + return self.delete(indexer) + + # -------------------------------------------------------------------- + # Generated Arithmetic, Comparison, and Unary Methods + + @classmethod + def _add_comparison_methods(cls): + """ + Add in comparison methods. + """ + cls.__eq__ = _make_comparison_op(operator.eq, cls) + cls.__ne__ = _make_comparison_op(operator.ne, cls) + cls.__lt__ = _make_comparison_op(operator.lt, cls) + cls.__gt__ = _make_comparison_op(operator.gt, cls) + cls.__le__ = _make_comparison_op(operator.le, cls) + cls.__ge__ = _make_comparison_op(operator.ge, cls) + + @classmethod + def _add_numeric_methods_add_sub_disabled(cls): + """ + Add in the numeric add/sub methods to disable. + """ + cls.__add__ = make_invalid_op("__add__") + cls.__radd__ = make_invalid_op("__radd__") + cls.__iadd__ = make_invalid_op("__iadd__") + cls.__sub__ = make_invalid_op("__sub__") + cls.__rsub__ = make_invalid_op("__rsub__") + cls.__isub__ = make_invalid_op("__isub__") + + @classmethod + def _add_numeric_methods_disabled(cls): + """ + Add in numeric methods to disable other than add/sub. + """ + cls.__pow__ = make_invalid_op("__pow__") + cls.__rpow__ = make_invalid_op("__rpow__") + cls.__mul__ = make_invalid_op("__mul__") + cls.__rmul__ = make_invalid_op("__rmul__") + cls.__floordiv__ = make_invalid_op("__floordiv__") + cls.__rfloordiv__ = make_invalid_op("__rfloordiv__") + cls.__truediv__ = make_invalid_op("__truediv__") + cls.__rtruediv__ = make_invalid_op("__rtruediv__") + cls.__mod__ = make_invalid_op("__mod__") + cls.__divmod__ = make_invalid_op("__divmod__") + cls.__neg__ = make_invalid_op("__neg__") + cls.__pos__ = make_invalid_op("__pos__") + cls.__abs__ = make_invalid_op("__abs__") + cls.__inv__ = make_invalid_op("__inv__") + + @classmethod + def _add_numeric_methods_binary(cls): + """ + Add in numeric methods. + """ + cls.__add__ = _make_arithmetic_op(operator.add, cls) + cls.__radd__ = _make_arithmetic_op(ops.radd, cls) + cls.__sub__ = _make_arithmetic_op(operator.sub, cls) + cls.__rsub__ = _make_arithmetic_op(ops.rsub, cls) + cls.__rpow__ = _make_arithmetic_op(ops.rpow, cls) + cls.__pow__ = _make_arithmetic_op(operator.pow, cls) + + cls.__truediv__ = _make_arithmetic_op(operator.truediv, cls) + cls.__rtruediv__ = _make_arithmetic_op(ops.rtruediv, cls) + + # TODO: rmod? rdivmod? + cls.__mod__ = _make_arithmetic_op(operator.mod, cls) + cls.__floordiv__ = _make_arithmetic_op(operator.floordiv, cls) + cls.__rfloordiv__ = _make_arithmetic_op(ops.rfloordiv, cls) + cls.__divmod__ = _make_arithmetic_op(divmod, cls) + cls.__mul__ = _make_arithmetic_op(operator.mul, cls) + cls.__rmul__ = _make_arithmetic_op(ops.rmul, cls) + + @classmethod + def _add_numeric_methods_unary(cls): + """ + Add in numeric unary methods. + """ + + def _make_evaluate_unary(op, opstr): + def _evaluate_numeric_unary(self): + + attrs = self._get_attributes_dict() + return Index(op(self.values), **attrs) + + _evaluate_numeric_unary.__name__ = opstr + return _evaluate_numeric_unary + + cls.__neg__ = _make_evaluate_unary(operator.neg, "__neg__") + cls.__pos__ = _make_evaluate_unary(operator.pos, "__pos__") + cls.__abs__ = _make_evaluate_unary(np.abs, "__abs__") + cls.__inv__ = _make_evaluate_unary(lambda x: -x, "__inv__") + + @classmethod + def _add_numeric_methods(cls): + cls._add_numeric_methods_unary() + cls._add_numeric_methods_binary() + + @classmethod + def _add_logical_methods(cls): + """ + Add in logical methods. + """ + _doc = """ + %(desc)s + + Parameters + ---------- + *args + These parameters will be passed to numpy.%(outname)s. + **kwargs + These parameters will be passed to numpy.%(outname)s. + + Returns + ------- + %(outname)s : bool or array_like (if axis is specified) + A single element array_like may be converted to bool.""" + + _index_shared_docs["index_all"] = dedent( + """ + + See Also + -------- + Index.any : Return whether any element in an Index is True. + Series.any : Return whether any element in a Series is True. + Series.all : Return whether all elements in a Series are True. + + Notes + ----- + Not a Number (NaN), positive infinity and negative infinity + evaluate to True because these are not equal to zero. + + Examples + -------- + **all** + + True, because nonzero integers are considered True. + + >>> pd.Index([1, 2, 3]).all() + True + + False, because ``0`` is considered False. + + >>> pd.Index([0, 1, 2]).all() + False + + **any** + + True, because ``1`` is considered True. + + >>> pd.Index([0, 0, 1]).any() + True + + False, because ``0`` is considered False. + + >>> pd.Index([0, 0, 0]).any() + False + """ + ) + + _index_shared_docs["index_any"] = dedent( + """ + + See Also + -------- + Index.all : Return whether all elements are True. + Series.all : Return whether all elements are True. + + Notes + ----- + Not a Number (NaN), positive infinity and negative infinity + evaluate to True because these are not equal to zero. + + Examples + -------- + >>> index = pd.Index([0, 1, 2]) + >>> index.any() + True + + >>> index = pd.Index([0, 0, 0]) + >>> index.any() + False + """ + ) + + def _make_logical_function(name, desc, f): + @Substitution(outname=name, desc=desc) + @Appender(_index_shared_docs["index_" + name]) + @Appender(_doc) + def logical_func(self, *args, **kwargs): + result = f(self.values) + if ( + isinstance(result, (np.ndarray, ABCSeries, Index)) + and result.ndim == 0 + ): + # return NumPy type + return result.dtype.type(result.item()) + else: # pragma: no cover + return result + + logical_func.__name__ = name + return logical_func + + cls.all = _make_logical_function( + "all", "Return whether all elements are True.", np.all + ) + cls.any = _make_logical_function( + "any", "Return whether any element is True.", np.any + ) + + @classmethod + def _add_logical_methods_disabled(cls): + """ + Add in logical methods to disable. + """ + cls.all = make_invalid_op("all") + cls.any = make_invalid_op("any") + + @property + def shape(self): + """ + Return a tuple of the shape of the underlying data. + """ + # not using "(len(self), )" to return "correct" shape if the values + # consists of a >1 D array (see GH-27775) + # overridden in MultiIndex.shape to avoid materializing the values + return self._values.shape + + +Index._add_numeric_methods_disabled() +Index._add_logical_methods() +Index._add_comparison_methods() + + +def ensure_index_from_sequences(sequences, names=None): + """ + Construct an index from sequences of data. + + A single sequence returns an Index. Many sequences returns a + MultiIndex. + + Parameters + ---------- + sequences : sequence of sequences + names : sequence of str + + Returns + ------- + index : Index or MultiIndex + + Examples + -------- + >>> ensure_index_from_sequences([[1, 2, 3]], names=['name']) + Int64Index([1, 2, 3], dtype='int64', name='name') + + >>> ensure_index_from_sequences([['a', 'a'], ['a', 'b']], + names=['L1', 'L2']) + MultiIndex([('a', 'a'), + ('a', 'b')], + names=['L1', 'L2']) + + See Also + -------- + ensure_index + """ + from pandas.core.indexes.multi import MultiIndex + + if len(sequences) == 1: + if names is not None: + names = names[0] + return Index(sequences[0], name=names) + else: + return MultiIndex.from_arrays(sequences, names=names) + + +def ensure_index(index_like, copy=False): + """ + Ensure that we have an index from some index-like object. + + Parameters + ---------- + index : sequence + An Index or other sequence + copy : bool + + Returns + ------- + index : Index or MultiIndex + + Examples + -------- + >>> ensure_index(['a', 'b']) + Index(['a', 'b'], dtype='object') + + >>> ensure_index([('a', 'a'), ('b', 'c')]) + Index([('a', 'a'), ('b', 'c')], dtype='object') + + >>> ensure_index([['a', 'a'], ['b', 'c']]) + MultiIndex([('a', 'b'), + ('a', 'c')], + dtype='object') + ) + + See Also + -------- + ensure_index_from_sequences + """ + if isinstance(index_like, Index): + if copy: + index_like = index_like.copy() + return index_like + if hasattr(index_like, "name"): + return Index(index_like, name=index_like.name, copy=copy) + + if is_iterator(index_like): + index_like = list(index_like) + + # must check for exactly list here because of strict type + # check in clean_index_list + if isinstance(index_like, list): + if type(index_like) != list: + index_like = list(index_like) + + converted, all_arrays = lib.clean_index_list(index_like) + + if len(converted) > 0 and all_arrays: + from pandas.core.indexes.multi import MultiIndex + + return MultiIndex.from_arrays(converted) + else: + index_like = converted + else: + # clean_index_list does the equivalent of copying + # so only need to do this if not list instance + if copy: + from copy import copy + + index_like = copy(index_like) + + return Index(index_like) + + +def _ensure_has_len(seq): + """ + If seq is an iterator, put its values into a list. + """ + try: + len(seq) + except TypeError: + return list(seq) + else: + return seq + + +def _trim_front(strings): + """ + Trims zeros and decimal points. + """ + trimmed = strings + while len(strings) > 0 and all(x[0] == " " for x in trimmed): + trimmed = [x[1:] for x in trimmed] + return trimmed + + +def _validate_join_method(method): + if method not in ["left", "right", "inner", "outer"]: + raise ValueError(f"do not recognize join method {method}") + + +def default_index(n): + from pandas.core.indexes.range import RangeIndex + + return RangeIndex(0, n, name=None) + + +def maybe_extract_name(name, obj, cls) -> Optional[Hashable]: + """ + If no name is passed, then extract it from data, validating hashability. + """ + if name is None and isinstance(obj, (Index, ABCSeries)): + # Note we don't just check for "name" attribute since that would + # pick up e.g. dtype.name + name = obj.name + + # GH#29069 + if not is_hashable(name): + raise TypeError(f"{cls.__name__}.name must be a hashable type") + + return name + + +def _maybe_cast_with_dtype(data: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray: + """ + If a dtype is passed, cast to the closest matching dtype that is supported + by Index. + + Parameters + ---------- + data : np.ndarray + dtype : np.dtype + copy : bool + + Returns + ------- + np.ndarray + """ + # we need to avoid having numpy coerce + # things that look like ints/floats to ints unless + # they are actually ints, e.g. '0' and 0.0 + # should not be coerced + # GH 11836 + if is_integer_dtype(dtype): + inferred = lib.infer_dtype(data, skipna=False) + if inferred == "integer": + data = maybe_cast_to_integer_array(data, dtype, copy=copy) + elif inferred in ["floating", "mixed-integer-float"]: + if isna(data).any(): + raise ValueError("cannot convert float NaN to integer") + + if inferred == "mixed-integer-float": + data = maybe_cast_to_integer_array(data, dtype) + + # If we are actually all equal to integers, + # then coerce to integer. + try: + data = _try_convert_to_int_array(data, copy, dtype) + except ValueError: + data = np.array(data, dtype=np.float64, copy=copy) + + elif inferred == "string": + pass + else: + data = data.astype(dtype) + elif is_float_dtype(dtype): + inferred = lib.infer_dtype(data, skipna=False) + if inferred == "string": + pass + else: + data = data.astype(dtype) + else: + data = np.array(data, dtype=dtype, copy=copy) + + return data + + +def _maybe_cast_data_without_dtype(subarr): + """ + If we have an arraylike input but no passed dtype, try to infer + a supported dtype. + + Parameters + ---------- + subarr : np.ndarray, Index, or Series + + Returns + ------- + converted : np.ndarray or ExtensionArray + dtype : np.dtype or ExtensionDtype + """ + # Runtime import needed bc IntervalArray imports Index + from pandas.core.arrays import ( + IntervalArray, + PeriodArray, + DatetimeArray, + TimedeltaArray, + ) + + inferred = lib.infer_dtype(subarr, skipna=False) + + if inferred == "integer": + try: + data = _try_convert_to_int_array(subarr, False, None) + return data, data.dtype + except ValueError: + pass + + return subarr, object + + elif inferred in ["floating", "mixed-integer-float", "integer-na"]: + # TODO: Returns IntegerArray for integer-na case in the future + return subarr, np.float64 + + elif inferred == "interval": + try: + data = IntervalArray._from_sequence(subarr, copy=False) + return data, data.dtype + except ValueError: + # GH27172: mixed closed Intervals --> object dtype + pass + elif inferred == "boolean": + # don't support boolean explicitly ATM + pass + elif inferred != "string": + if inferred.startswith("datetime"): + try: + data = DatetimeArray._from_sequence(subarr, copy=False) + return data, data.dtype + except (ValueError, OutOfBoundsDatetime): + # GH 27011 + # If we have mixed timezones, just send it + # down the base constructor + pass + + elif inferred.startswith("timedelta"): + data = TimedeltaArray._from_sequence(subarr, copy=False) + return data, data.dtype + elif inferred == "period": + try: + data = PeriodArray._from_sequence(subarr) + return data, data.dtype + except IncompatibleFrequency: + pass + + return subarr, subarr.dtype + + +def _try_convert_to_int_array( + data: np.ndarray, copy: bool, dtype: np.dtype +) -> np.ndarray: + """ + Attempt to convert an array of data into an integer array. + + Parameters + ---------- + data : The data to convert. + copy : bool + Whether to copy the data or not. + dtype : np.dtype + + Returns + ------- + int_array : data converted to either an ndarray[int64] or ndarray[uint64] + + Raises + ------ + ValueError if the conversion was not successful. + """ + + if not is_unsigned_integer_dtype(dtype): + # skip int64 conversion attempt if uint-like dtype is passed, as + # this could return Int64Index when UInt64Index is what's desired + try: + res = data.astype("i8", copy=False) + if (res == data).all(): + return res # TODO: might still need to copy + except (OverflowError, TypeError, ValueError): + pass + + # Conversion to int64 failed (possibly due to overflow) or was skipped, + # so let's try now with uint64. + try: + res = data.astype("u8", copy=False) + if (res == data).all(): + return res # TODO: might still need to copy + except (OverflowError, TypeError, ValueError): + pass + + raise ValueError diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py new file mode 100644 index 00000000..51201367 --- /dev/null +++ b/pandas/core/indexes/category.py @@ -0,0 +1,884 @@ +from typing import Any, List +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import index as libindex +from pandas._libs.hashtable import duplicated_int64 +from pandas._typing import AnyArrayLike +from pandas.util._decorators import Appender, cache_readonly + +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_categorical_dtype, + is_interval_dtype, + is_list_like, + is_scalar, +) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.generic import ABCCategorical, ABCSeries +from pandas.core.dtypes.missing import isna + +from pandas.core import accessor +from pandas.core.algorithms import take_1d +from pandas.core.arrays.categorical import Categorical, _recode_for_categories, contains +import pandas.core.common as com +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name +from pandas.core.indexes.extension import ExtensionIndex, inherit_names +import pandas.core.missing as missing +from pandas.core.ops import get_op_result_name + +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update(dict(target_klass="CategoricalIndex")) + + +@inherit_names( + [ + "argsort", + "_internal_get_values", + "tolist", + "codes", + "categories", + "ordered", + "_reverse_indexer", + "searchsorted", + "is_dtype_equal", + "min", + "max", + ], + Categorical, +) +@accessor.delegate_names( + delegate=Categorical, + accessors=[ + "rename_categories", + "reorder_categories", + "add_categories", + "remove_categories", + "remove_unused_categories", + "set_categories", + "as_ordered", + "as_unordered", + ], + typ="method", + overwrite=True, +) +class CategoricalIndex(ExtensionIndex, accessor.PandasDelegate): + """ + Index based on an underlying :class:`Categorical`. + + CategoricalIndex, like Categorical, can only take on a limited, + and usually fixed, number of possible values (`categories`). Also, + like Categorical, it might have an order, but numerical operations + (additions, divisions, ...) are not possible. + + Parameters + ---------- + data : array-like (1-dimensional) + The values of the categorical. If `categories` are given, values not in + `categories` will be replaced with NaN. + categories : index-like, optional + The categories for the categorical. Items need to be unique. + If the categories are not given here (and also not in `dtype`), they + will be inferred from the `data`. + ordered : bool, optional + Whether or not this categorical is treated as an ordered + categorical. If not given here or in `dtype`, the resulting + categorical will be unordered. + dtype : CategoricalDtype or "category", optional + If :class:`CategoricalDtype`, cannot be used together with + `categories` or `ordered`. + + .. versionadded:: 0.21.0 + copy : bool, default False + Make a copy of input ndarray. + name : object, optional + Name to be stored in the index. + + Attributes + ---------- + codes + categories + ordered + + Methods + ------- + rename_categories + reorder_categories + add_categories + remove_categories + remove_unused_categories + set_categories + as_ordered + as_unordered + map + + Raises + ------ + ValueError + If the categories do not validate. + TypeError + If an explicit ``ordered=True`` is given but no `categories` and the + `values` are not sortable. + + See Also + -------- + Index : The base pandas Index type. + Categorical : A categorical array. + CategoricalDtype : Type for categorical data. + + Notes + ----- + See the `user guide + `_ + for more. + + Examples + -------- + >>> pd.CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c']) + CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category') # noqa + + ``CategoricalIndex`` can also be instantiated from a ``Categorical``: + + >>> c = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']) + >>> pd.CategoricalIndex(c) + CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category') # noqa + + Ordered ``CategoricalIndex`` can have a min and max value. + + >>> ci = pd.CategoricalIndex(['a','b','c','a','b','c'], ordered=True, + ... categories=['c', 'b', 'a']) + >>> ci + CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], categories=['c', 'b', 'a'], ordered=True, dtype='category') # noqa + >>> ci.min() + 'c' + """ + + _typ = "categoricalindex" + + _raw_inherit = { + "argsort", + "_internal_get_values", + "tolist", + "codes", + "categories", + "ordered", + "_reverse_indexer", + "searchsorted", + } + + codes: np.ndarray + categories: Index + + @property + def _engine_type(self): + # self.codes can have dtype int8, int16, int32 or int64, so we need + # to return the corresponding engine type (libindex.Int8Engine, etc.). + return { + np.int8: libindex.Int8Engine, + np.int16: libindex.Int16Engine, + np.int32: libindex.Int32Engine, + np.int64: libindex.Int64Engine, + }[self.codes.dtype.type] + + _attributes = ["name"] + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, data=None, categories=None, ordered=None, dtype=None, copy=False, name=None + ): + + dtype = CategoricalDtype._from_values_or_dtype(data, categories, ordered, dtype) + + name = maybe_extract_name(name, data, cls) + + if not is_categorical_dtype(data): + # don't allow scalars + # if data is None, then categories must be provided + if is_scalar(data): + if data is not None or categories is None: + raise cls._scalar_data_error(data) + data = [] + + data = cls._create_categorical(data, dtype=dtype) + + data = data.copy() if copy else data + + return cls._simple_new(data, name=name) + + def _create_from_codes(self, codes, dtype=None, name=None): + """ + *this is an internal non-public method* + + create the correct categorical from codes + + Parameters + ---------- + codes : new codes + dtype: CategoricalDtype, defaults to existing + name : optional name attribute, defaults to existing + + Returns + ------- + CategoricalIndex + """ + + if dtype is None: + dtype = self.dtype + if name is None: + name = self.name + cat = Categorical.from_codes(codes, dtype=dtype) + return CategoricalIndex(cat, name=name) + + @classmethod + def _create_categorical(cls, data, dtype=None): + """ + *this is an internal non-public method* + + create the correct categorical from data and the properties + + Parameters + ---------- + data : data for new Categorical + dtype : CategoricalDtype, defaults to existing + + Returns + ------- + Categorical + """ + if isinstance(data, (cls, ABCSeries)) and is_categorical_dtype(data): + data = data.values + + if not isinstance(data, ABCCategorical): + return Categorical(data, dtype=dtype) + + if isinstance(dtype, CategoricalDtype) and dtype != data.dtype: + # we want to silently ignore dtype='category' + data = data._set_dtype(dtype) + return data + + @classmethod + def _simple_new(cls, values, name=None, dtype=None): + result = object.__new__(cls) + + values = cls._create_categorical(values, dtype=dtype) + result._data = values + result.name = name + + result._reset_identity() + result._no_setting_name = False + return result + + # -------------------------------------------------------------------- + + @Appender(_index_shared_docs["_shallow_copy"]) + def _shallow_copy(self, values=None, dtype=None, **kwargs): + if dtype is None: + dtype = self.dtype + return super()._shallow_copy(values=values, dtype=dtype, **kwargs) + + def _is_dtype_compat(self, other) -> bool: + """ + *this is an internal non-public method* + + provide a comparison between the dtype of self and other (coercing if + needed) + + Raises + ------ + TypeError if the dtypes are not compatible + """ + if is_categorical_dtype(other): + if isinstance(other, CategoricalIndex): + other = other._values + if not other.is_dtype_equal(self): + raise TypeError( + "categories must match existing categories when appending" + ) + else: + values = other + if not is_list_like(values): + values = [values] + other = CategoricalIndex(self._create_categorical(other, dtype=self.dtype)) + if not other.isin(values).all(): + raise TypeError( + "cannot append a non-category item to a CategoricalIndex" + ) + + return other + + def equals(self, other): + """ + Determine if two CategoricalIndex objects contain the same elements. + + Returns + ------- + bool + If two CategoricalIndex objects have equal elements True, + otherwise False. + """ + if self.is_(other): + return True + + if not isinstance(other, Index): + return False + + try: + other = self._is_dtype_compat(other) + if isinstance(other, type(self)): + other = other._data + return self._data.equals(other) + except (TypeError, ValueError): + pass + + return False + + # -------------------------------------------------------------------- + # Rendering Methods + + @property + def _formatter_func(self): + return self.categories._formatter_func + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value) + """ + max_categories = ( + 10 + if get_option("display.max_categories") == 0 + else get_option("display.max_categories") + ) + attrs = [ + ( + "categories", + ibase.default_pprint(self.categories, max_seq_items=max_categories), + ), + ("ordered", self.ordered), + ] + if self.name is not None: + attrs.append(("name", ibase.default_pprint(self.name))) + attrs.append(("dtype", f"'{self.dtype.name}'")) + max_seq_items = get_option("display.max_seq_items") or len(self) + if len(self) > max_seq_items: + attrs.append(("length", len(self))) + return attrs + + # -------------------------------------------------------------------- + + @property + def inferred_type(self) -> str: + return "categorical" + + @property + def values(self): + """ return the underlying data, which is a Categorical """ + return self._data + + @property + def _has_complex_internals(self): + # used to avoid libreduction code paths, which raise or require conversion + return True + + def _wrap_setop_result(self, other, result): + name = get_op_result_name(self, other) + # We use _shallow_copy rather than the Index implementation + # (which uses _constructor) in order to preserve dtype. + return self._shallow_copy(result, name=name) + + @Appender(_index_shared_docs["contains"] % _index_doc_kwargs) + def __contains__(self, key) -> bool: + # if key is a NaN, check if any NaN is in self. + if is_scalar(key) and isna(key): + return self.hasnans + + return contains(self, key, container=self._engine) + + def __array__(self, dtype=None) -> np.ndarray: + """ the array interface, return my values """ + return np.array(self._data, dtype=dtype) + + @Appender(_index_shared_docs["astype"]) + def astype(self, dtype, copy=True): + if is_interval_dtype(dtype): + from pandas import IntervalIndex + + return IntervalIndex(np.array(self)) + elif is_categorical_dtype(dtype): + # GH 18630 + dtype = self.dtype.update_dtype(dtype) + if dtype == self.dtype: + return self.copy() if copy else self + + return Index.astype(self, dtype=dtype, copy=copy) + + @cache_readonly + def _isnan(self): + """ return if each value is nan""" + return self._data.codes == -1 + + @Appender(ibase._index_shared_docs["fillna"]) + def fillna(self, value, downcast=None): + self._assert_can_do_op(value) + return CategoricalIndex(self._data.fillna(value), name=self.name) + + @cache_readonly + def _engine(self): + # we are going to look things up with the codes themselves. + # To avoid a reference cycle, bind `codes` to a local variable, so + # `self` is not passed into the lambda. + codes = self.codes + return self._engine_type(lambda: codes, len(self)) + + # introspection + @cache_readonly + def is_unique(self) -> bool: + return self._engine.is_unique + + @property + def is_monotonic_increasing(self): + return self._engine.is_monotonic_increasing + + @property + def is_monotonic_decreasing(self) -> bool: + return self._engine.is_monotonic_decreasing + + @Appender(_index_shared_docs["index_unique"] % _index_doc_kwargs) + def unique(self, level=None): + if level is not None: + self._validate_index_level(level) + result = self.values.unique() + # CategoricalIndex._shallow_copy keeps original dtype + # if not otherwise specified + return self._shallow_copy(result, dtype=result.dtype) + + @Appender(Index.duplicated.__doc__) + def duplicated(self, keep="first"): + codes = self.codes.astype("i8") + return duplicated_int64(codes, keep) + + def _to_safe_for_reshape(self): + """ convert to object if we are a categorical """ + return self.astype("object") + + def get_loc(self, key, method=None): + """ + Get integer location, slice or boolean mask for requested label. + + Parameters + ---------- + key : label + method : {None} + * default: exact matches only. + + Returns + ------- + loc : int if unique index, slice if monotonic index, else mask + + Raises + ------ + KeyError : if the key is not in the index + + Examples + -------- + >>> unique_index = pd.CategoricalIndex(list('abc')) + >>> unique_index.get_loc('b') + 1 + + >>> monotonic_index = pd.CategoricalIndex(list('abbc')) + >>> monotonic_index.get_loc('b') + slice(1, 3, None) + + >>> non_monotonic_index = pd.CategoricalIndex(list('abcb')) + >>> non_monotonic_index.get_loc('b') + array([False, True, False, True], dtype=bool) + """ + code = self.categories.get_loc(key) + code = self.codes.dtype.type(code) + try: + return self._engine.get_loc(code) + except KeyError: + raise KeyError(key) + + def get_value(self, series: AnyArrayLike, key: Any): + """ + Fast lookup of value from 1-dimensional ndarray. Only use this if you + know what you're doing + + Parameters + ---------- + series : Series, ExtensionArray, Index, or ndarray + 1-dimensional array to take values from + key: : scalar + The value of this index at the position of the desired value, + otherwise the positional index of the desired value + + Returns + ------- + Any + The element of the series at the position indicated by the key + """ + try: + k = com.values_from_object(key) + k = self._convert_scalar_indexer(k, kind="getitem") + indexer = self.get_loc(k) + return series.take([indexer])[0] + except (KeyError, TypeError): + pass + + # we might be a positional inexer + return super().get_value(series, key) + + @Appender(_index_shared_docs["where"]) + def where(self, cond, other=None): + # TODO: Investigate an alternative implementation with + # 1. copy the underlying Categorical + # 2. setitem with `cond` and `other` + # 3. Rebuild CategoricalIndex. + if other is None: + other = self._na_value + values = np.where(cond, self.values, other) + cat = Categorical(values, dtype=self.dtype) + return self._shallow_copy(cat, **self._get_attributes_dict()) + + def reindex(self, target, method=None, level=None, limit=None, tolerance=None): + """ + Create index with target's values (move/add/delete values as necessary) + + Returns + ------- + new_index : pd.Index + Resulting index + indexer : np.ndarray or None + Indices of output values in original index + + """ + if method is not None: + raise NotImplementedError( + "argument method is not implemented for CategoricalIndex.reindex" + ) + if level is not None: + raise NotImplementedError( + "argument level is not implemented for CategoricalIndex.reindex" + ) + if limit is not None: + raise NotImplementedError( + "argument limit is not implemented for CategoricalIndex.reindex" + ) + + target = ibase.ensure_index(target) + + missing: List[int] + if self.equals(target): + indexer = None + missing = [] + else: + indexer, missing = self.get_indexer_non_unique(np.array(target)) + + if len(self.codes) and indexer is not None: + new_target = self.take(indexer) + else: + new_target = target + + # filling in missing if needed + if len(missing): + cats = self.categories.get_indexer(target) + + if (cats == -1).any(): + # coerce to a regular index here! + result = Index(np.array(self), name=self.name) + new_target, indexer, _ = result._reindex_non_unique(np.array(target)) + else: + + codes = new_target.codes.copy() + codes[indexer == -1] = cats[missing] + new_target = self._create_from_codes(codes) + + # we always want to return an Index type here + # to be consistent with .reindex for other index types (e.g. they don't + # coerce based on the actual values, only on the dtype) + # unless we had an initial Categorical to begin with + # in which case we are going to conform to the passed Categorical + new_target = np.asarray(new_target) + if is_categorical_dtype(target): + new_target = target._shallow_copy(new_target, name=self.name) + else: + new_target = Index(new_target, name=self.name) + + return new_target, indexer + + def _reindex_non_unique(self, target): + """ reindex from a non-unique; which CategoricalIndex's are almost + always + """ + new_target, indexer = self.reindex(target) + new_indexer = None + + check = indexer == -1 + if check.any(): + new_indexer = np.arange(len(self.take(indexer))) + new_indexer[check] = -1 + + cats = self.categories.get_indexer(target) + if not (cats == -1).any(): + # .reindex returns normal Index. Revert to CategoricalIndex if + # all targets are included in my categories + new_target = self._shallow_copy(new_target) + + return new_target, indexer, new_indexer + + @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) + def get_indexer(self, target, method=None, limit=None, tolerance=None): + method = missing.clean_reindex_fill_method(method) + target = ibase.ensure_index(target) + + if self.is_unique and self.equals(target): + return np.arange(len(self), dtype="intp") + + if method == "pad" or method == "backfill": + raise NotImplementedError( + "method='pad' and method='backfill' not " + "implemented yet for CategoricalIndex" + ) + elif method == "nearest": + raise NotImplementedError( + "method='nearest' not implemented yet for CategoricalIndex" + ) + + if isinstance(target, CategoricalIndex) and self.values.is_dtype_equal(target): + if self.values.equals(target.values): + # we have the same codes + codes = target.codes + else: + codes = _recode_for_categories( + target.codes, target.categories, self.values.categories + ) + else: + if isinstance(target, CategoricalIndex): + code_indexer = self.categories.get_indexer(target.categories) + codes = take_1d(code_indexer, target.codes, fill_value=-1) + else: + codes = self.categories.get_indexer(target) + + indexer, _ = self._engine.get_indexer_non_unique(codes) + return ensure_platform_int(indexer) + + @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) + def get_indexer_non_unique(self, target): + target = ibase.ensure_index(target) + + if isinstance(target, CategoricalIndex): + # Indexing on codes is more efficient if categories are the same: + if target.categories is self.categories: + target = target.codes + indexer, missing = self._engine.get_indexer_non_unique(target) + return ensure_platform_int(indexer), missing + target = target.values + + codes = self.categories.get_indexer(target) + indexer, missing = self._engine.get_indexer_non_unique(codes) + return ensure_platform_int(indexer), missing + + @Appender(_index_shared_docs["_convert_scalar_indexer"]) + def _convert_scalar_indexer(self, key, kind=None): + if kind == "loc": + try: + return self.categories._convert_scalar_indexer(key, kind=kind) + except TypeError: + self._invalid_indexer("label", key) + return super()._convert_scalar_indexer(key, kind=kind) + + @Appender(_index_shared_docs["_convert_list_indexer"]) + def _convert_list_indexer(self, keyarr, kind=None): + # Return our indexer or raise if all of the values are not included in + # the categories + + if self.categories._defer_to_indexing: + indexer = self.categories._convert_list_indexer(keyarr, kind=kind) + return Index(self.codes).get_indexer_for(indexer) + + indexer = self.categories.get_indexer(np.asarray(keyarr)) + if (indexer == -1).any(): + raise KeyError( + "a list-indexer must only include values that are in the categories" + ) + + return self.get_indexer(keyarr) + + @Appender(_index_shared_docs["_convert_arr_indexer"]) + def _convert_arr_indexer(self, keyarr): + keyarr = com.asarray_tuplesafe(keyarr) + + if self.categories._defer_to_indexing: + return keyarr + + return self._shallow_copy(keyarr) + + @Appender(_index_shared_docs["_convert_index_indexer"]) + def _convert_index_indexer(self, keyarr): + return self._shallow_copy(keyarr) + + def take_nd(self, *args, **kwargs): + """Alias for `take`""" + warnings.warn( + "CategoricalIndex.take_nd is deprecated, use CategoricalIndex.take instead", + FutureWarning, + stacklevel=2, + ) + return self.take(*args, **kwargs) + + @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) + def _maybe_cast_slice_bound(self, label, side, kind): + if kind == "loc": + return label + + return super()._maybe_cast_slice_bound(label, side, kind) + + def map(self, mapper): + """ + Map values using input correspondence (a dict, Series, or function). + + Maps the values (their categories, not the codes) of the index to new + categories. If the mapping correspondence is one-to-one the result is a + :class:`~pandas.CategoricalIndex` which has the same order property as + the original, otherwise an :class:`~pandas.Index` is returned. + + If a `dict` or :class:`~pandas.Series` is used any unmapped category is + mapped to `NaN`. Note that if this happens an :class:`~pandas.Index` + will be returned. + + Parameters + ---------- + mapper : function, dict, or Series + Mapping correspondence. + + Returns + ------- + pandas.CategoricalIndex or pandas.Index + Mapped index. + + See Also + -------- + Index.map : Apply a mapping correspondence on an + :class:`~pandas.Index`. + Series.map : Apply a mapping correspondence on a + :class:`~pandas.Series`. + Series.apply : Apply more complex functions on a + :class:`~pandas.Series`. + + Examples + -------- + >>> idx = pd.CategoricalIndex(['a', 'b', 'c']) + >>> idx + CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], + ordered=False, dtype='category') + >>> idx.map(lambda x: x.upper()) + CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'], + ordered=False, dtype='category') + >>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'}) + CategoricalIndex(['first', 'second', 'third'], categories=['first', + 'second', 'third'], ordered=False, dtype='category') + + If the mapping is one-to-one the ordering of the categories is + preserved: + + >>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True) + >>> idx + CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], + ordered=True, dtype='category') + >>> idx.map({'a': 3, 'b': 2, 'c': 1}) + CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True, + dtype='category') + + If the mapping is not one-to-one an :class:`~pandas.Index` is returned: + + >>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'}) + Index(['first', 'second', 'first'], dtype='object') + + If a `dict` is used, all unmapped categories are mapped to `NaN` and + the result is an :class:`~pandas.Index`: + + >>> idx.map({'a': 'first', 'b': 'second'}) + Index(['first', 'second', nan], dtype='object') + """ + return self._shallow_copy_with_infer(self.values.map(mapper)) + + def delete(self, loc): + """ + Make new Index with passed location(-s) deleted + + Returns + ------- + new_index : Index + """ + return self._create_from_codes(np.delete(self.codes, loc)) + + def insert(self, loc, item): + """ + Make new Index inserting new item at location. Follows + Python list.append semantics for negative values + + Parameters + ---------- + loc : int + item : object + + Returns + ------- + new_index : Index + + Raises + ------ + ValueError if the item is not in the categories + + """ + code = self.categories.get_indexer([item]) + if (code == -1) and not (is_scalar(item) and isna(item)): + raise TypeError( + "cannot insert an item into a CategoricalIndex " + "that is not already an existing category" + ) + + codes = self.codes + codes = np.concatenate((codes[:loc], code, codes[loc:])) + return self._create_from_codes(codes) + + def _concat(self, to_concat, name): + # if calling index is category, don't check dtype of others + return CategoricalIndex._concat_same_dtype(self, to_concat, name) + + def _concat_same_dtype(self, to_concat, name): + """ + Concatenate to_concat which has the same class + ValueError if other is not in the categories + """ + codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat]) + result = self._create_from_codes(codes, name=name) + # if name is None, _create_from_codes sets self.name + result.name = name + return result + + def _delegate_property_get(self, name, *args, **kwargs): + """ method delegation to the ._values """ + prop = getattr(self._values, name) + return prop # no wrapping for now + + def _delegate_method(self, name, *args, **kwargs): + """ method delegation to the ._values """ + method = getattr(self._values, name) + if "inplace" in kwargs: + raise ValueError("cannot use inplace with CategoricalIndex") + res = method(*args, **kwargs) + if is_scalar(res) or name in self._raw_inherit: + return res + return CategoricalIndex(res, name=self.name) + + +CategoricalIndex._add_numeric_methods_add_sub_disabled() +CategoricalIndex._add_numeric_methods_disabled() +CategoricalIndex._add_logical_methods_disabled() diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py new file mode 100644 index 00000000..aaea609e --- /dev/null +++ b/pandas/core/indexes/datetimelike.py @@ -0,0 +1,908 @@ +""" +Base and utility classes for tseries type pandas objects. +""" +import operator +from typing import List, Optional, Set + +import numpy as np + +from pandas._libs import NaT, iNaT, join as libjoin, lib +from pandas._libs.algos import unique_deltas +from pandas._libs.tslibs import timezones +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import Appender, cache_readonly + +from pandas.core.dtypes.common import ( + ensure_int64, + is_bool_dtype, + is_categorical_dtype, + is_dtype_equal, + is_float, + is_integer, + is_list_like, + is_period_dtype, + is_scalar, + needs_i8_conversion, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries +from pandas.core.dtypes.missing import isna + +from pandas.core import algorithms +from pandas.core.accessor import PandasDelegate +from pandas.core.arrays import DatetimeArray, ExtensionArray, TimedeltaArray +from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import Index, _index_shared_docs +from pandas.core.indexes.extension import ( + ExtensionIndex, + inherit_names, + make_wrapped_arith_op, +) +from pandas.core.indexes.numeric import Int64Index +from pandas.core.ops import get_op_result_name +from pandas.core.tools.timedeltas import to_timedelta + +from pandas.tseries.frequencies import DateOffset, to_offset + +_index_doc_kwargs = dict(ibase._index_doc_kwargs) + + +def _join_i8_wrapper(joinf, with_indexers: bool = True): + """ + Create the join wrapper methods. + """ + + @staticmethod # type: ignore + def wrapper(left, right): + if isinstance(left, (np.ndarray, ABCIndex, ABCSeries, DatetimeLikeArrayMixin)): + left = left.view("i8") + if isinstance(right, (np.ndarray, ABCIndex, ABCSeries, DatetimeLikeArrayMixin)): + right = right.view("i8") + + results = joinf(left, right) + if with_indexers: + # dtype should be timedelta64[ns] for TimedeltaIndex + # and datetime64[ns] for DatetimeIndex + dtype = left.dtype.base + + join_index, left_indexer, right_indexer = results + join_index = join_index.view(dtype) + return join_index, left_indexer, right_indexer + return results + + return wrapper + + +@inherit_names( + ["inferred_freq", "_isnan", "_resolution", "resolution"], + DatetimeLikeArrayMixin, + cache=True, +) +@inherit_names( + ["__iter__", "mean", "freq", "freqstr", "_ndarray_values", "asi8", "_box_values"], + DatetimeLikeArrayMixin, +) +class DatetimeIndexOpsMixin(ExtensionIndex): + """ + Common ops mixin to support a unified interface datetimelike Index. + """ + + _data: ExtensionArray + freq: Optional[DateOffset] + freqstr: Optional[str] + _resolution: int + _bool_ops: List[str] = [] + _field_ops: List[str] = [] + + hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget) # type: ignore + _hasnans = hasnans # for index / array -agnostic code + + @property + def is_all_dates(self) -> bool: + return True + + # ------------------------------------------------------------------------ + # Abstract data attributes + + @property + def values(self): + # Note: PeriodArray overrides this to return an ndarray of objects. + return self._data._data + + def __array_wrap__(self, result, context=None): + """ + Gets called after a ufunc. + """ + result = lib.item_from_zerodim(result) + if is_bool_dtype(result) or lib.is_scalar(result): + return result + + attrs = self._get_attributes_dict() + if not is_period_dtype(self) and attrs["freq"]: + # no need to infer if freq is None + attrs["freq"] = "infer" + return Index(result, **attrs) + + # ------------------------------------------------------------------------ + + def equals(self, other) -> bool: + """ + Determines if two Index objects contain the same elements. + """ + if self.is_(other): + return True + + if not isinstance(other, ABCIndexClass): + return False + elif not isinstance(other, type(self)): + try: + other = type(self)(other) + except (ValueError, TypeError, OverflowError): + # e.g. + # ValueError -> cannot parse str entry, or OutOfBoundsDatetime + # TypeError -> trying to convert IntervalIndex to DatetimeIndex + # OverflowError -> Index([very_large_timedeltas]) + return False + + if not is_dtype_equal(self.dtype, other.dtype): + # have different timezone + return False + + return np.array_equal(self.asi8, other.asi8) + + @Appender(_index_shared_docs["contains"] % _index_doc_kwargs) + def __contains__(self, key): + try: + res = self.get_loc(key) + return ( + is_scalar(res) + or isinstance(res, slice) + or (is_list_like(res) and len(res)) + ) + except (KeyError, TypeError, ValueError): + return False + + def sort_values(self, return_indexer=False, ascending=True): + """ + Return sorted copy of Index. + """ + if return_indexer: + _as = self.argsort() + if not ascending: + _as = _as[::-1] + sorted_index = self.take(_as) + return sorted_index, _as + else: + # NB: using asi8 instead of _ndarray_values matters in numpy 1.18 + # because the treatment of NaT has been changed to put NaT last + # instead of first. + sorted_values = np.sort(self.asi8) + attribs = self._get_attributes_dict() + freq = attribs["freq"] + + if freq is not None and not is_period_dtype(self): + if freq.n > 0 and not ascending: + freq = freq * -1 + elif freq.n < 0 and ascending: + freq = freq * -1 + attribs["freq"] = freq + + if not ascending: + sorted_values = sorted_values[::-1] + + return self._simple_new(sorted_values, **attribs) + + @Appender(_index_shared_docs["take"] % _index_doc_kwargs) + def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): + nv.validate_take(tuple(), kwargs) + indices = ensure_int64(indices) + + maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) + if isinstance(maybe_slice, slice): + return self[maybe_slice] + + return ExtensionIndex.take( + self, indices, axis, allow_fill, fill_value, **kwargs + ) + + _can_hold_na = True + + _na_value = NaT + """The expected NA value to use with this index.""" + + def _convert_tolerance(self, tolerance, target): + tolerance = np.asarray(to_timedelta(tolerance).to_numpy()) + + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError("list-like tolerance size must match target index size") + return tolerance + + def tolist(self) -> List: + """ + Return a list of the underlying data. + """ + return list(self.astype(object)) + + def min(self, axis=None, skipna=True, *args, **kwargs): + """ + Return the minimum value of the Index or minimum along + an axis. + + See Also + -------- + numpy.ndarray.min + Series.min : Return the minimum value in a Series. + """ + nv.validate_min(args, kwargs) + nv.validate_minmax_axis(axis) + + if not len(self): + return self._na_value + + i8 = self.asi8 + try: + # quick check + if len(i8) and self.is_monotonic: + if i8[0] != iNaT: + return self._box_func(i8[0]) + + if self.hasnans: + if skipna: + min_stamp = self[~self._isnan].asi8.min() + else: + return self._na_value + else: + min_stamp = i8.min() + return self._box_func(min_stamp) + except ValueError: + return self._na_value + + def argmin(self, axis=None, skipna=True, *args, **kwargs): + """ + Returns the indices of the minimum values along an axis. + + See `numpy.ndarray.argmin` for more information on the + `axis` parameter. + + See Also + -------- + numpy.ndarray.argmin + """ + nv.validate_argmin(args, kwargs) + nv.validate_minmax_axis(axis) + + i8 = self.asi8 + if self.hasnans: + mask = self._isnan + if mask.all() or not skipna: + return -1 + i8 = i8.copy() + i8[mask] = np.iinfo("int64").max + return i8.argmin() + + def max(self, axis=None, skipna=True, *args, **kwargs): + """ + Return the maximum value of the Index or maximum along + an axis. + + See Also + -------- + numpy.ndarray.max + Series.max : Return the maximum value in a Series. + """ + nv.validate_max(args, kwargs) + nv.validate_minmax_axis(axis) + + if not len(self): + return self._na_value + + i8 = self.asi8 + try: + # quick check + if len(i8) and self.is_monotonic: + if i8[-1] != iNaT: + return self._box_func(i8[-1]) + + if self.hasnans: + if skipna: + max_stamp = self[~self._isnan].asi8.max() + else: + return self._na_value + else: + max_stamp = i8.max() + return self._box_func(max_stamp) + except ValueError: + return self._na_value + + def argmax(self, axis=None, skipna=True, *args, **kwargs): + """ + Returns the indices of the maximum values along an axis. + + See `numpy.ndarray.argmax` for more information on the + `axis` parameter. + + See Also + -------- + numpy.ndarray.argmax + """ + nv.validate_argmax(args, kwargs) + nv.validate_minmax_axis(axis) + + i8 = self.asi8 + if self.hasnans: + mask = self._isnan + if mask.all() or not skipna: + return -1 + i8 = i8.copy() + i8[mask] = 0 + return i8.argmax() + + # -------------------------------------------------------------------- + # Rendering Methods + + def _format_with_header(self, header, na_rep="NaT", **kwargs): + return header + list(self._format_native_types(na_rep, **kwargs)) + + @property + def _formatter_func(self): + raise AbstractMethodError(self) + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value). + """ + attrs = super()._format_attrs() + for attrib in self._attributes: + if attrib == "freq": + freq = self.freqstr + if freq is not None: + freq = repr(freq) + attrs.append(("freq", freq)) + return attrs + + # -------------------------------------------------------------------- + + def _convert_scalar_indexer(self, key, kind=None): + """ + We don't allow integer or float indexing on datetime-like when using + loc. + + Parameters + ---------- + key : label of the slice bound + kind : {'ix', 'loc', 'getitem', 'iloc'} or None + """ + + assert kind in ["ix", "loc", "getitem", "iloc", None] + + # we don't allow integer/float indexing for loc + # we don't allow float indexing for ix/getitem + if is_scalar(key): + is_int = is_integer(key) + is_flt = is_float(key) + if kind in ["loc"] and (is_int or is_flt): + self._invalid_indexer("index", key) + elif kind in ["ix", "getitem"] and is_flt: + self._invalid_indexer("index", key) + + return super()._convert_scalar_indexer(key, kind=kind) + + __add__ = make_wrapped_arith_op("__add__") + __radd__ = make_wrapped_arith_op("__radd__") + __sub__ = make_wrapped_arith_op("__sub__") + __rsub__ = make_wrapped_arith_op("__rsub__") + __pow__ = make_wrapped_arith_op("__pow__") + __rpow__ = make_wrapped_arith_op("__rpow__") + __mul__ = make_wrapped_arith_op("__mul__") + __rmul__ = make_wrapped_arith_op("__rmul__") + __floordiv__ = make_wrapped_arith_op("__floordiv__") + __rfloordiv__ = make_wrapped_arith_op("__rfloordiv__") + __mod__ = make_wrapped_arith_op("__mod__") + __rmod__ = make_wrapped_arith_op("__rmod__") + __divmod__ = make_wrapped_arith_op("__divmod__") + __rdivmod__ = make_wrapped_arith_op("__rdivmod__") + __truediv__ = make_wrapped_arith_op("__truediv__") + __rtruediv__ = make_wrapped_arith_op("__rtruediv__") + + def isin(self, values, level=None): + """ + Compute boolean array of whether each index value is found in the + passed set of values. + + Parameters + ---------- + values : set or sequence of values + + Returns + ------- + is_contained : ndarray (boolean dtype) + """ + if level is not None: + self._validate_index_level(level) + + if not isinstance(values, type(self)): + try: + values = type(self)(values) + except ValueError: + return self.astype(object).isin(values) + + return algorithms.isin(self.asi8, values.asi8) + + @Appender(_index_shared_docs["where"] % _index_doc_kwargs) + def where(self, cond, other=None): + values = self.view("i8") + + if is_scalar(other) and isna(other): + other = NaT.value + + else: + # Do type inference if necessary up front + # e.g. we passed PeriodIndex.values and got an ndarray of Periods + other = Index(other) + + if is_categorical_dtype(other): + # e.g. we have a Categorical holding self.dtype + if needs_i8_conversion(other.categories): + other = other._internal_get_values() + + if not is_dtype_equal(self.dtype, other.dtype): + raise TypeError(f"Where requires matching dtype, not {other.dtype}") + + other = other.view("i8") + + result = np.where(cond, values, other).astype("i8") + return self._shallow_copy(result) + + def _summary(self, name=None): + """ + Return a summarized representation. + + Parameters + ---------- + name : str + Name to use in the summary representation. + + Returns + ------- + str + Summarized representation of the index. + """ + formatter = self._formatter_func + if len(self) > 0: + index_summary = f", {formatter(self[0])} to {formatter(self[-1])}" + else: + index_summary = "" + + if name is None: + name = type(self).__name__ + result = f"{name}: {len(self)} entries{index_summary}" + if self.freq: + result += f"\nFreq: {self.freqstr}" + + # display as values, not quoted + result = result.replace("'", "") + return result + + def _concat_same_dtype(self, to_concat, name): + """ + Concatenate to_concat which has the same class. + """ + attribs = self._get_attributes_dict() + attribs["name"] = name + # do not pass tz to set because tzlocal cannot be hashed + if len({str(x.dtype) for x in to_concat}) != 1: + raise ValueError("to_concat must have the same tz") + + new_data = type(self._values)._concat_same_type(to_concat).asi8 + + # GH 3232: If the concat result is evenly spaced, we can retain the + # original frequency + is_diff_evenly_spaced = len(unique_deltas(new_data)) == 1 + if not is_period_dtype(self) and not is_diff_evenly_spaced: + # reset freq + attribs["freq"] = None + + return self._simple_new(new_data, **attribs) + + def shift(self, periods=1, freq=None): + """ + Shift index by desired number of time frequency increments. + + This method is for shifting the values of datetime-like indexes + by a specified time increment a given number of times. + + Parameters + ---------- + periods : int, default 1 + Number of periods (or increments) to shift by, + can be positive or negative. + + .. versionchanged:: 0.24.0 + + freq : pandas.DateOffset, pandas.Timedelta or string, optional + Frequency increment to shift by. + If None, the index is shifted by its own `freq` attribute. + Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. + + Returns + ------- + pandas.DatetimeIndex + Shifted index. + + See Also + -------- + Index.shift : Shift values of Index. + PeriodIndex.shift : Shift values of PeriodIndex. + """ + result = self._data._time_shift(periods, freq=freq) + return type(self)(result, name=self.name) + + # -------------------------------------------------------------------- + # List-like Methods + + def delete(self, loc): + new_i8s = np.delete(self.asi8, loc) + + freq = None + if is_period_dtype(self): + freq = self.freq + elif is_integer(loc): + if loc in (0, -len(self), -1, len(self) - 1): + freq = self.freq + else: + if is_list_like(loc): + loc = lib.maybe_indices_to_slice(ensure_int64(np.array(loc)), len(self)) + if isinstance(loc, slice) and loc.step in (1, None): + if loc.start in (0, None) or loc.stop in (len(self), None): + freq = self.freq + + return self._shallow_copy(new_i8s, freq=freq) + + +class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index): + """ + Mixin class for methods shared by DatetimeIndex and TimedeltaIndex, + but not PeriodIndex + """ + + # Compat for frequency inference, see GH#23789 + _is_monotonic_increasing = Index.is_monotonic_increasing + _is_monotonic_decreasing = Index.is_monotonic_decreasing + _is_unique = Index.is_unique + + def _set_freq(self, freq): + """ + Set the _freq attribute on our underlying DatetimeArray. + + Parameters + ---------- + freq : DateOffset, None, or "infer" + """ + # GH#29843 + if freq is None: + # Always valid + pass + elif len(self) == 0 and isinstance(freq, DateOffset): + # Always valid. In the TimedeltaIndex case, we assume this + # is a Tick offset. + pass + else: + # As an internal method, we can ensure this assertion always holds + assert freq == "infer" + freq = to_offset(self.inferred_freq) + + self._data._freq = freq + + def _shallow_copy(self, values=None, **kwargs): + if values is None: + values = self._data + if isinstance(values, type(self)): + values = values._data + + attributes = self._get_attributes_dict() + + if "freq" not in kwargs and self.freq is not None: + if isinstance(values, (DatetimeArray, TimedeltaArray)): + if values.freq is None: + del attributes["freq"] + + attributes.update(kwargs) + return self._simple_new(values, **attributes) + + # -------------------------------------------------------------------- + # Set Operation Methods + + @Appender(Index.difference.__doc__) + def difference(self, other, sort=None): + new_idx = super().difference(other, sort=sort) + new_idx._set_freq(None) + return new_idx + + def intersection(self, other, sort=False): + """ + Specialized intersection for DatetimeIndex/TimedeltaIndex. + + May be much faster than Index.intersection + + Parameters + ---------- + other : Same type as self or array-like + sort : False or None, default False + Sort the resulting index if possible. + + .. versionadded:: 0.24.0 + + .. versionchanged:: 0.24.1 + + Changed the default to ``False`` to match the behaviour + from before 0.24.0. + + .. versionchanged:: 0.25.0 + + The `sort` keyword is added + + Returns + ------- + y : Index or same type as self + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + + if self.equals(other): + return self._get_reconciled_name_object(other) + + if len(self) == 0: + return self.copy() + if len(other) == 0: + return other.copy() + + if not isinstance(other, type(self)): + result = Index.intersection(self, other, sort=sort) + if isinstance(result, type(self)): + if result.freq is None: + result._set_freq("infer") + return result + + elif ( + other.freq is None + or self.freq is None + or other.freq != self.freq + or not other.freq.is_anchored() + or (not self.is_monotonic or not other.is_monotonic) + ): + result = Index.intersection(self, other, sort=sort) + + # Invalidate the freq of `result`, which may not be correct at + # this point, depending on the values. + + result._set_freq(None) + result = self._shallow_copy( + result._data, name=result.name, dtype=result.dtype, freq=None + ) + if result.freq is None: + result._set_freq("infer") + return result + + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + else: + left, right = other, self + + # after sorting, the intersection always starts with the right index + # and ends with the index of which the last elements is smallest + end = min(left[-1], right[-1]) + start = right[0] + + if end < start: + return type(self)(data=[]) + else: + lslice = slice(*left.slice_locs(start, end)) + left_chunk = left.values[lslice] + return self._shallow_copy(left_chunk) + + def _can_fast_union(self, other) -> bool: + if not isinstance(other, type(self)): + return False + + freq = self.freq + + if freq is None or freq != other.freq: + return False + + if not self.is_monotonic or not other.is_monotonic: + return False + + if len(self) == 0 or len(other) == 0: + return True + + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + else: + left, right = other, self + + right_start = right[0] + left_end = left[-1] + + # Only need to "adjoin", not overlap + try: + return (right_start == left_end + freq) or right_start in left + except ValueError: + # if we are comparing a freq that does not propagate timezones + # this will raise + return False + + def _fast_union(self, other, sort=None): + if len(other) == 0: + return self.view(type(self)) + + if len(self) == 0: + return other.view(type(self)) + + # to make our life easier, "sort" the two ranges + if self[0] <= other[0]: + left, right = self, other + elif sort is False: + # TDIs are not in the "correct" order and we don't want + # to sort but want to remove overlaps + left, right = self, other + left_start = left[0] + loc = right.searchsorted(left_start, side="left") + right_chunk = right.values[:loc] + dates = concat_compat((left.values, right_chunk)) + return self._shallow_copy(dates) + else: + left, right = other, self + + left_end = left[-1] + right_end = right[-1] + + # concatenate + if left_end < right_end: + loc = right.searchsorted(left_end, side="right") + right_chunk = right.values[loc:] + dates = concat_compat((left.values, right_chunk)) + return self._shallow_copy(dates) + else: + return left + + def _union(self, other, sort): + if not len(other) or self.equals(other) or not len(self): + return super()._union(other, sort=sort) + + # We are called by `union`, which is responsible for this validation + assert isinstance(other, type(self)) + + this, other = self._maybe_utc_convert(other) + + if this._can_fast_union(other): + return this._fast_union(other, sort=sort) + else: + result = Index._union(this, other, sort=sort) + if isinstance(result, type(self)): + assert result._data.dtype == this.dtype + if result.freq is None: + result._set_freq("infer") + return result + + # -------------------------------------------------------------------- + # Join Methods + _join_precedence = 10 + + _inner_indexer = _join_i8_wrapper(libjoin.inner_join_indexer) + _outer_indexer = _join_i8_wrapper(libjoin.outer_join_indexer) + _left_indexer = _join_i8_wrapper(libjoin.left_join_indexer) + _left_indexer_unique = _join_i8_wrapper( + libjoin.left_join_indexer_unique, with_indexers=False + ) + + def join( + self, other, how: str = "left", level=None, return_indexers=False, sort=False + ): + """ + See Index.join + """ + if self._is_convertible_to_index_for_join(other): + try: + other = type(self)(other) + except (TypeError, ValueError): + pass + + this, other = self._maybe_utc_convert(other) + return Index.join( + this, + other, + how=how, + level=level, + return_indexers=return_indexers, + sort=sort, + ) + + def _maybe_utc_convert(self, other): + this = self + if not hasattr(self, "tz"): + return this, other + + if isinstance(other, type(self)): + if self.tz is not None: + if other.tz is None: + raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") + elif other.tz is not None: + raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") + + if not timezones.tz_compare(self.tz, other.tz): + this = self.tz_convert("UTC") + other = other.tz_convert("UTC") + return this, other + + @classmethod + def _is_convertible_to_index_for_join(cls, other: Index) -> bool: + """ + return a boolean whether I can attempt conversion to a + DatetimeIndex/TimedeltaIndex + """ + if isinstance(other, cls): + return False + elif len(other) > 0 and other.inferred_type not in ( + "floating", + "mixed-integer", + "integer", + "integer-na", + "mixed-integer-float", + "mixed", + ): + return True + return False + + def _wrap_joined_index(self, joined: np.ndarray, other): + assert other.dtype == self.dtype, (other.dtype, self.dtype) + name = get_op_result_name(self, other) + + freq = self.freq if self._can_fast_union(other) else None + new_data = type(self._data)._simple_new( # type: ignore + joined, dtype=self.dtype, freq=freq + ) + + return type(self)._simple_new(new_data, name=name) + + +class DatetimelikeDelegateMixin(PandasDelegate): + """ + Delegation mechanism, specific for Datetime, Timedelta, and Period types. + + Functionality is delegated from the Index class to an Array class. A + few things can be customized + + * _delegated_methods, delegated_properties : List + The list of property / method names being delagated. + * raw_methods : Set + The set of methods whose results should should *not* be + boxed in an index, after being returned from the array + * raw_properties : Set + The set of properties whose results should should *not* be + boxed in an index, after being returned from the array + """ + + # raw_methods : dispatch methods that shouldn't be boxed in an Index + _raw_methods: Set[str] = set() + # raw_properties : dispatch properties that shouldn't be boxed in an Index + _raw_properties: Set[str] = set() + _data: ExtensionArray + + def _delegate_property_get(self, name, *args, **kwargs): + result = getattr(self._data, name) + if name not in self._raw_properties: + result = Index(result, name=self.name) + return result + + def _delegate_property_set(self, name, value, *args, **kwargs): + setattr(self._data, name, value) + + def _delegate_method(self, name, *args, **kwargs): + result = operator.methodcaller(name, *args, **kwargs)(self._data) + if name not in self._raw_methods: + result = Index(result, name=self.name) + return result diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py new file mode 100644 index 00000000..292f3dd8 --- /dev/null +++ b/pandas/core/indexes/datetimes.py @@ -0,0 +1,1295 @@ +from datetime import date, datetime, time, timedelta, tzinfo +import operator +from typing import Optional +import warnings + +import numpy as np + +from pandas._libs import NaT, Timestamp, index as libindex, lib, tslib as libts +from pandas._libs.tslibs import ccalendar, fields, parsing, timezones +from pandas.util._decorators import Appender, Substitution, cache_readonly + +from pandas.core.dtypes.common import _NS_DTYPE, is_float, is_integer, is_scalar +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna + +from pandas.core.accessor import delegate_names +from pandas.core.arrays.datetimes import ( + DatetimeArray, + tz_to_dtype, + validate_tz_from_dtype, +) +from pandas.core.base import _shared_docs +import pandas.core.common as com +from pandas.core.indexes.base import Index, maybe_extract_name +from pandas.core.indexes.datetimelike import ( + DatetimelikeDelegateMixin, + DatetimeTimedeltaMixin, +) +from pandas.core.indexes.extension import inherit_names +from pandas.core.ops import get_op_result_name +import pandas.core.tools.datetimes as tools + +from pandas.tseries.frequencies import Resolution, to_offset +from pandas.tseries.offsets import Nano, prefix_mapping + + +def _new_DatetimeIndex(cls, d): + """ + This is called upon unpickling, rather than the default which doesn't + have arguments and breaks __new__ + """ + if "data" in d and not isinstance(d["data"], DatetimeIndex): + # Avoid need to verify integrity by calling simple_new directly + data = d.pop("data") + result = cls._simple_new(data, **d) + else: + with warnings.catch_warnings(): + # TODO: If we knew what was going in to **d, we might be able to + # go through _simple_new instead + warnings.simplefilter("ignore") + result = cls.__new__(cls, **d) + + return result + + +class DatetimeDelegateMixin(DatetimelikeDelegateMixin): + # Most attrs are dispatched via datetimelike_{ops,methods} + # Some are "raw" methods, the result is not not re-boxed in an Index + # We also have a few "extra" attrs, which may or may not be raw, + # which we we dont' want to expose in the .dt accessor. + _extra_methods = ["to_period", "to_perioddelta", "to_julian_date", "strftime"] + _extra_raw_methods = [ + "to_pydatetime", + "_local_timestamps", + "_has_same_tz", + "_format_native_types", + "__iter__", + ] + _extra_raw_properties = ["_box_func", "tz", "tzinfo", "dtype"] + _delegated_properties = DatetimeArray._datetimelike_ops + _extra_raw_properties + _delegated_methods = ( + DatetimeArray._datetimelike_methods + _extra_methods + _extra_raw_methods + ) + _raw_properties = ( + {"date", "time", "timetz"} + | set(DatetimeArray._bool_ops) + | set(_extra_raw_properties) + ) + _raw_methods = set(_extra_raw_methods) + + +@inherit_names(["_timezone", "is_normalized", "_resolution"], DatetimeArray, cache=True) +@inherit_names( + [ + "_bool_ops", + "_object_ops", + "_field_ops", + "_datetimelike_ops", + "_datetimelike_methods", + ], + DatetimeArray, +) +@delegate_names( + DatetimeArray, DatetimeDelegateMixin._delegated_properties, typ="property" +) +@delegate_names( + DatetimeArray, + DatetimeDelegateMixin._delegated_methods, + typ="method", + overwrite=True, +) +class DatetimeIndex(DatetimeTimedeltaMixin, DatetimeDelegateMixin): + """ + Immutable ndarray of datetime64 data, represented internally as int64, and + which can be boxed to Timestamp objects that are subclasses of datetime and + carry metadata such as frequency information. + + Parameters + ---------- + data : array-like (1-dimensional), optional + Optional datetime-like data to construct index with. + copy : bool + Make a copy of input ndarray. + freq : str or pandas offset object, optional + One of pandas date offset strings or corresponding objects. The string + 'infer' can be passed in order to set the frequency of the index as the + inferred frequency upon creation. + tz : pytz.timezone or dateutil.tz.tzfile + ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' + When clocks moved backward due to DST, ambiguous times may arise. + For example in Central European Time (UTC+01), when going from 03:00 + DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC + and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter + dictates how ambiguous times should be handled. + + - 'infer' will attempt to infer fall dst-transition hours based on + order + - bool-ndarray where True signifies a DST time, False signifies a + non-DST time (note that this flag is only applicable for ambiguous + times) + - 'NaT' will return NaT where there are ambiguous times + - 'raise' will raise an AmbiguousTimeError if there are ambiguous times. + name : object + Name to be stored in the index. + dayfirst : bool, default False + If True, parse dates in `data` with the day first order. + yearfirst : bool, default False + If True parse dates in `data` with the year first order. + + Attributes + ---------- + year + month + day + hour + minute + second + microsecond + nanosecond + date + time + timetz + dayofyear + weekofyear + week + dayofweek + weekday + quarter + tz + freq + freqstr + is_month_start + is_month_end + is_quarter_start + is_quarter_end + is_year_start + is_year_end + is_leap_year + inferred_freq + + Methods + ------- + normalize + strftime + snap + tz_convert + tz_localize + round + floor + ceil + to_period + to_perioddelta + to_pydatetime + to_series + to_frame + month_name + day_name + mean + + See Also + -------- + Index : The base pandas Index type. + TimedeltaIndex : Index of timedelta64 data. + PeriodIndex : Index of Period data. + to_datetime : Convert argument to datetime. + date_range : Create a fixed-frequency DatetimeIndex. + + Notes + ----- + To learn more about the frequency strings, please see `this link + `__. + """ + + _typ = "datetimeindex" + + _engine_type = libindex.DatetimeEngine + _supports_partial_string_indexing = True + + _comparables = ["name", "freqstr", "tz"] + _attributes = ["name", "tz", "freq"] + + _is_numeric_dtype = False + _infer_as_myclass = True + + tz: Optional[tzinfo] + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + data=None, + freq=None, + tz=None, + normalize=False, + closed=None, + ambiguous="raise", + dayfirst=False, + yearfirst=False, + dtype=None, + copy=False, + name=None, + ): + + if is_scalar(data): + raise TypeError( + f"{cls.__name__}() must be called with a " + f"collection of some kind, {repr(data)} was passed" + ) + + # - Cases checked above all return/raise before reaching here - # + + name = maybe_extract_name(name, data, cls) + + dtarr = DatetimeArray._from_sequence( + data, + dtype=dtype, + copy=copy, + tz=tz, + freq=freq, + dayfirst=dayfirst, + yearfirst=yearfirst, + ambiguous=ambiguous, + ) + + subarr = cls._simple_new(dtarr, name=name, freq=dtarr.freq, tz=dtarr.tz) + return subarr + + @classmethod + def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): + """ + We require the we have a dtype compat for the values + if we are passed a non-dtype compat, then coerce using the constructor + """ + if isinstance(values, DatetimeArray): + if tz: + tz = validate_tz_from_dtype(dtype, tz) + dtype = DatetimeTZDtype(tz=tz) + elif dtype is None: + dtype = _NS_DTYPE + + values = DatetimeArray(values, freq=freq, dtype=dtype) + tz = values.tz + freq = values.freq + values = values._data + + # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes + if isinstance(values, DatetimeIndex): + values = values._data + + dtype = tz_to_dtype(tz) + dtarr = DatetimeArray._simple_new(values, freq=freq, dtype=dtype) + assert isinstance(dtarr, DatetimeArray) + + result = object.__new__(cls) + result._data = dtarr + result.name = name + result._no_setting_name = False + # For groupby perf. See note in indexes/base about _index_data + result._index_data = dtarr._data + result._reset_identity() + return result + + # -------------------------------------------------------------------- + + def __array__(self, dtype=None) -> np.ndarray: + return np.asarray(self._data, dtype=dtype) + + @cache_readonly + def _is_dates_only(self) -> bool: + """ + Return a boolean if we are only dates (and don't have a timezone) + + Returns + ------- + bool + """ + from pandas.io.formats.format import _is_dates_only + + return _is_dates_only(self.values) and self.tz is None + + def __reduce__(self): + + # we use a special reduce here because we need + # to simply set the .tz (and not reinterpret it) + + d = dict(data=self._data) + d.update(self._get_attributes_dict()) + return _new_DatetimeIndex, (type(self), d), None + + def _convert_for_op(self, value): + """ + Convert value to be insertable to ndarray. + """ + if self._has_same_tz(value): + return Timestamp(value).asm8 + raise ValueError("Passed item and index have different timezone") + + # -------------------------------------------------------------------- + # Rendering Methods + + def _mpl_repr(self): + # how to represent ourselves to matplotlib + return libts.ints_to_pydatetime(self.asi8, self.tz) + + @property + def _formatter_func(self): + from pandas.io.formats.format import _get_format_datetime64 + + formatter = _get_format_datetime64(is_dates_only=self._is_dates_only) + return lambda x: f"'{formatter(x, tz=self.tz)}'" + + # -------------------------------------------------------------------- + # Set Operation Methods + + def union_many(self, others): + """ + A bit of a hack to accelerate unioning a collection of indexes. + """ + this = self + + for other in others: + if not isinstance(this, DatetimeIndex): + this = Index.union(this, other) + continue + + if not isinstance(other, DatetimeIndex): + try: + other = DatetimeIndex(other) + except TypeError: + pass + + this, other = this._maybe_utc_convert(other) + + if this._can_fast_union(other): + this = this._fast_union(other) + else: + dtype = this.dtype + this = Index.union(this, other) + if isinstance(this, DatetimeIndex): + # TODO: we shouldn't be setting attributes like this; + # in all the tests this equality already holds + this._data._dtype = dtype + return this + + def _wrap_setop_result(self, other, result): + name = get_op_result_name(self, other) + return self._shallow_copy(result, name=name, freq=None, tz=self.tz) + + # -------------------------------------------------------------------- + + def _get_time_micros(self): + values = self.asi8 + if self.tz is not None and not timezones.is_utc(self.tz): + values = self._data._local_timestamps() + return fields.get_time_micros(values) + + def to_series(self, keep_tz=lib.no_default, index=None, name=None): + """ + Create a Series with both index and values equal to the index keys + useful with map for returning an indexer based on an index. + + Parameters + ---------- + keep_tz : optional, defaults True + Return the data keeping the timezone. + + If keep_tz is True: + + If the timezone is not set, the resulting + Series will have a datetime64[ns] dtype. + + Otherwise the Series will have an datetime64[ns, tz] dtype; the + tz will be preserved. + + If keep_tz is False: + + Series will have a datetime64[ns] dtype. TZ aware + objects will have the tz removed. + + .. versionchanged:: 1.0.0 + The default value is now True. In a future version, + this keyword will be removed entirely. Stop passing the + argument to obtain the future behavior and silence the warning. + + index : Index, optional + Index of resulting Series. If None, defaults to original index. + name : str, optional + Name of resulting Series. If None, defaults to name of original + index. + + Returns + ------- + Series + """ + from pandas import Series + + if index is None: + index = self._shallow_copy() + if name is None: + name = self.name + + if keep_tz is not lib.no_default: + if keep_tz: + warnings.warn( + "The 'keep_tz' keyword in DatetimeIndex.to_series " + "is deprecated and will be removed in a future version. " + "You can stop passing 'keep_tz' to silence this warning.", + FutureWarning, + stacklevel=2, + ) + else: + warnings.warn( + "Specifying 'keep_tz=False' is deprecated and this " + "option will be removed in a future release. If " + "you want to remove the timezone information, you " + "can do 'idx.tz_convert(None)' before calling " + "'to_series'.", + FutureWarning, + stacklevel=2, + ) + else: + keep_tz = True + + if keep_tz and self.tz is not None: + # preserve the tz & copy + values = self.copy(deep=True) + else: + values = self.values.copy() + + return Series(values, index=index, name=name) + + def snap(self, freq="S"): + """ + Snap time stamps to nearest occurring frequency. + + Returns + ------- + DatetimeIndex + """ + # Superdumb, punting on any optimizing + freq = to_offset(freq) + + snapped = np.empty(len(self), dtype=_NS_DTYPE) + + for i, v in enumerate(self): + s = v + if not freq.is_on_offset(s): + t0 = freq.rollback(s) + t1 = freq.rollforward(s) + if abs(s - t0) < abs(t1 - s): + s = t0 + else: + s = t1 + snapped[i] = s + + # we know it conforms; skip check + return DatetimeIndex._simple_new(snapped, name=self.name, tz=self.tz, freq=freq) + + def _parsed_string_to_bounds(self, reso, parsed): + """ + Calculate datetime bounds for parsed time string and its resolution. + + Parameters + ---------- + reso : Resolution + Resolution provided by parsed string. + parsed : datetime + Datetime from parsed string. + + Returns + ------- + lower, upper: pd.Timestamp + + """ + valid_resos = { + "year", + "month", + "quarter", + "day", + "hour", + "minute", + "second", + "minute", + "second", + "microsecond", + } + if reso not in valid_resos: + raise KeyError + if reso == "year": + start = Timestamp(parsed.year, 1, 1) + end = Timestamp(parsed.year, 12, 31, 23, 59, 59, 999999) + elif reso == "month": + d = ccalendar.get_days_in_month(parsed.year, parsed.month) + start = Timestamp(parsed.year, parsed.month, 1) + end = Timestamp(parsed.year, parsed.month, d, 23, 59, 59, 999999) + elif reso == "quarter": + qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead + d = ccalendar.get_days_in_month(parsed.year, qe) # at end of month + start = Timestamp(parsed.year, parsed.month, 1) + end = Timestamp(parsed.year, qe, d, 23, 59, 59, 999999) + elif reso == "day": + start = Timestamp(parsed.year, parsed.month, parsed.day) + end = start + timedelta(days=1) - Nano(1) + elif reso == "hour": + start = Timestamp(parsed.year, parsed.month, parsed.day, parsed.hour) + end = start + timedelta(hours=1) - Nano(1) + elif reso == "minute": + start = Timestamp( + parsed.year, parsed.month, parsed.day, parsed.hour, parsed.minute + ) + end = start + timedelta(minutes=1) - Nano(1) + elif reso == "second": + start = Timestamp( + parsed.year, + parsed.month, + parsed.day, + parsed.hour, + parsed.minute, + parsed.second, + ) + end = start + timedelta(seconds=1) - Nano(1) + elif reso == "microsecond": + start = Timestamp( + parsed.year, + parsed.month, + parsed.day, + parsed.hour, + parsed.minute, + parsed.second, + parsed.microsecond, + ) + end = start + timedelta(microseconds=1) - Nano(1) + # GH 24076 + # If an incoming date string contained a UTC offset, need to localize + # the parsed date to this offset first before aligning with the index's + # timezone + if parsed.tzinfo is not None: + if self.tz is None: + raise ValueError( + "The index must be timezone aware when indexing " + "with a date string with a UTC offset" + ) + start = start.tz_localize(parsed.tzinfo).tz_convert(self.tz) + end = end.tz_localize(parsed.tzinfo).tz_convert(self.tz) + elif self.tz is not None: + start = start.tz_localize(self.tz) + end = end.tz_localize(self.tz) + return start, end + + def _partial_date_slice( + self, reso: str, parsed, use_lhs: bool = True, use_rhs: bool = True + ): + """ + Parameters + ---------- + reso : str + use_lhs : bool, default True + use_rhs : bool, default True + """ + is_monotonic = self.is_monotonic + if ( + is_monotonic + and reso in ["day", "hour", "minute", "second"] + and self._resolution >= Resolution.get_reso(reso) + ): + # These resolution/monotonicity validations came from GH3931, + # GH3452 and GH2369. + + # See also GH14826 + raise KeyError + + if reso == "microsecond": + # _partial_date_slice doesn't allow microsecond resolution, but + # _parsed_string_to_bounds allows it. + raise KeyError + + t1, t2 = self._parsed_string_to_bounds(reso, parsed) + stamps = self.asi8 + + if is_monotonic: + + # we are out of range + if len(stamps) and ( + (use_lhs and t1.value < stamps[0] and t2.value < stamps[0]) + or ((use_rhs and t1.value > stamps[-1] and t2.value > stamps[-1])) + ): + raise KeyError + + # a monotonic (sorted) series can be sliced + left = stamps.searchsorted(t1.value, side="left") if use_lhs else None + right = stamps.searchsorted(t2.value, side="right") if use_rhs else None + + return slice(left, right) + + lhs_mask = (stamps >= t1.value) if use_lhs else True + rhs_mask = (stamps <= t2.value) if use_rhs else True + + # try to find a the dates + return (lhs_mask & rhs_mask).nonzero()[0] + + def _maybe_promote(self, other): + if other.inferred_type == "date": + other = DatetimeIndex(other) + return self, other + + def get_value(self, series, key): + """ + Fast lookup of value from 1-dimensional ndarray. Only use this if you + know what you're doing + """ + + if isinstance(key, (datetime, np.datetime64)): + return self.get_value_maybe_box(series, key) + + if isinstance(key, time): + locs = self.indexer_at_time(key) + return series.take(locs) + + try: + value = Index.get_value(self, series, key) + except KeyError: + try: + loc = self._get_string_slice(key) + return series[loc] + except (TypeError, ValueError, KeyError): + pass + + try: + return self.get_value_maybe_box(series, key) + except (TypeError, ValueError, KeyError): + raise KeyError(key) + else: + return com.maybe_box(self, value, series, key) + + def get_value_maybe_box(self, series, key): + # needed to localize naive datetimes + if self.tz is not None: + key = Timestamp(key) + if key.tzinfo is not None: + key = key.tz_convert(self.tz) + else: + key = key.tz_localize(self.tz) + elif not isinstance(key, Timestamp): + key = Timestamp(key) + values = self._engine.get_value(com.values_from_object(series), key, tz=self.tz) + return com.maybe_box(self, values, series, key) + + def get_loc(self, key, method=None, tolerance=None): + """ + Get integer location for requested label + + Returns + ------- + loc : int + """ + + if tolerance is not None: + # try converting tolerance now, so errors don't get swallowed by + # the try/except clauses below + tolerance = self._convert_tolerance(tolerance, np.asarray(key)) + + if isinstance(key, datetime): + # needed to localize naive datetimes + if key.tzinfo is None: + key = Timestamp(key, tz=self.tz) + else: + key = Timestamp(key).tz_convert(self.tz) + return Index.get_loc(self, key, method, tolerance) + + elif isinstance(key, timedelta): + # GH#20464 + raise TypeError( + f"Cannot index {type(self).__name__} with {type(key).__name__}" + ) + + if isinstance(key, time): + if method is not None: + raise NotImplementedError( + "cannot yet lookup inexact labels when key is a time object" + ) + return self.indexer_at_time(key) + + try: + return Index.get_loc(self, key, method, tolerance) + except (KeyError, ValueError, TypeError): + try: + return self._get_string_slice(key) + except (TypeError, KeyError, ValueError, OverflowError): + pass + + try: + stamp = Timestamp(key) + if stamp.tzinfo is not None and self.tz is not None: + stamp = stamp.tz_convert(self.tz) + else: + stamp = stamp.tz_localize(self.tz) + return Index.get_loc(self, stamp, method, tolerance) + except KeyError: + raise KeyError(key) + except ValueError as e: + # list-like tolerance size must match target index size + if "list-like" in str(e): + raise e + raise KeyError(key) + + def _maybe_cast_slice_bound(self, label, side, kind): + """ + If label is a string, cast it to datetime according to resolution. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : {'ix', 'loc', 'getitem'} + + Returns + ------- + label : object + + Notes + ----- + Value of `side` parameter should be validated in caller. + """ + assert kind in ["ix", "loc", "getitem", None] + + if is_float(label) or isinstance(label, time) or is_integer(label): + self._invalid_indexer("slice", label) + + if isinstance(label, str): + freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) + _, parsed, reso = parsing.parse_time_string(label, freq) + lower, upper = self._parsed_string_to_bounds(reso, parsed) + # lower, upper form the half-open interval: + # [parsed, parsed + 1 freq) + # because label may be passed to searchsorted + # the bounds need swapped if index is reverse sorted and has a + # length > 1 (is_monotonic_decreasing gives True for empty + # and length 1 index) + if self._is_strictly_monotonic_decreasing and len(self) > 1: + return upper if side == "left" else lower + return lower if side == "left" else upper + else: + return label + + def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True): + freq = getattr(self, "freqstr", getattr(self, "inferred_freq", None)) + _, parsed, reso = parsing.parse_time_string(key, freq) + loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs) + return loc + + def slice_indexer(self, start=None, end=None, step=None, kind=None): + """ + Return indexer for specified label slice. + Index.slice_indexer, customized to handle time slicing. + + In addition to functionality provided by Index.slice_indexer, does the + following: + + - if both `start` and `end` are instances of `datetime.time`, it + invokes `indexer_between_time` + - if `start` and `end` are both either string or None perform + value-based selection in non-monotonic cases. + + """ + # For historical reasons DatetimeIndex supports slices between two + # instances of datetime.time as if it were applying a slice mask to + # an array of (self.hour, self.minute, self.seconds, self.microsecond). + if isinstance(start, time) and isinstance(end, time): + if step is not None and step != 1: + raise ValueError("Must have step size of 1 with time slices") + return self.indexer_between_time(start, end) + + if isinstance(start, time) or isinstance(end, time): + raise KeyError("Cannot mix time and non-time slice keys") + + # Pandas supports slicing with dates, treated as datetimes at midnight. + # https://github.com/pandas-dev/pandas/issues/31501 + if isinstance(start, date) and not isinstance(start, datetime): + start = datetime.combine(start, time(0, 0)) + if isinstance(end, date) and not isinstance(end, datetime): + end = datetime.combine(end, time(0, 0)) + + try: + return Index.slice_indexer(self, start, end, step, kind=kind) + except KeyError: + # For historical reasons DatetimeIndex by default supports + # value-based partial (aka string) slices on non-monotonic arrays, + # let's try that. + if (start is None or isinstance(start, str)) and ( + end is None or isinstance(end, str) + ): + mask = True + if start is not None: + start_casted = self._maybe_cast_slice_bound(start, "left", kind) + mask = start_casted <= self + + if end is not None: + end_casted = self._maybe_cast_slice_bound(end, "right", kind) + mask = (self <= end_casted) & mask + + indexer = mask.nonzero()[0][::step] + if len(indexer) == len(self): + return slice(None) + else: + return indexer + else: + raise + + # -------------------------------------------------------------------- + + @Substitution(klass="DatetimeIndex") + @Appender(_shared_docs["searchsorted"]) + def searchsorted(self, value, side="left", sorter=None): + if isinstance(value, (np.ndarray, Index)): + if not type(self._data)._is_recognized_dtype(value): + raise TypeError( + "searchsorted requires compatible dtype or scalar, " + f"not {type(value).__name__}" + ) + value = type(self._data)(value) + self._data._check_compatible_with(value) + + elif isinstance(value, self._data._recognized_scalars): + self._data._check_compatible_with(value) + value = self._data._scalar_type(value) + + elif not isinstance(value, DatetimeArray): + raise TypeError( + "searchsorted requires compatible dtype or scalar, " + f"not {type(value).__name__}" + ) + + return self._data.searchsorted(value, side=side) + + def is_type_compatible(self, typ) -> bool: + return typ == self.inferred_type or typ == "datetime" + + @property + def inferred_type(self) -> str: + # b/c datetime is represented as microseconds since the epoch, make + # sure we can't have ambiguous indexing + return "datetime64" + + def insert(self, loc, item): + """ + Make new Index inserting new item at location + + Parameters + ---------- + loc : int + item : object + if not either a Python datetime or a numpy integer-like, returned + Index dtype will be object rather than datetime. + + Returns + ------- + new_index : Index + """ + if isinstance(item, self._data._recognized_scalars): + item = self._data._scalar_type(item) + elif is_valid_nat_for_dtype(item, self.dtype): + # GH 18295 + item = self._na_value + elif is_scalar(item) and isna(item): + # i.e. timedeltat64("NaT") + raise TypeError( + f"cannot insert {type(self).__name__} with incompatible label" + ) + + freq = None + if isinstance(item, self._data._scalar_type) or item is NaT: + self._data._check_compatible_with(item, setitem=True) + + # check freq can be preserved on edge cases + if self.size and self.freq is not None: + if item is NaT: + pass + elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: + freq = self.freq + elif (loc == len(self)) and item - self.freq == self[-1]: + freq = self.freq + item = item.asm8 + + try: + new_i8s = np.concatenate( + (self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8) + ) + return self._shallow_copy(new_i8s, freq=freq) + except (AttributeError, TypeError): + + # fall back to object index + if isinstance(item, str): + return self.astype(object).insert(loc, item) + raise TypeError( + f"cannot insert {type(self).__name__} with incompatible label" + ) + + def indexer_at_time(self, time, asof=False): + """ + Return index locations of index values at particular time of day + (e.g. 9:30AM). + + Parameters + ---------- + time : datetime.time or str + datetime.time or string in appropriate format ("%H:%M", "%H%M", + "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", + "%I%M%S%p"). + + Returns + ------- + values_at_time : array of integers + + See Also + -------- + indexer_between_time, DataFrame.at_time + """ + if asof: + raise NotImplementedError("'asof' argument is not supported") + + if isinstance(time, str): + from dateutil.parser import parse + + time = parse(time).time() + + if time.tzinfo: + if self.tz is None: + raise ValueError("Index must be timezone aware.") + time_micros = self.tz_convert(time.tzinfo)._get_time_micros() + else: + time_micros = self._get_time_micros() + micros = _time_to_micros(time) + return (micros == time_micros).nonzero()[0] + + def indexer_between_time( + self, start_time, end_time, include_start=True, include_end=True + ): + """ + Return index locations of values between particular times of day + (e.g., 9:00-9:30AM). + + Parameters + ---------- + start_time, end_time : datetime.time, str + datetime.time or string in appropriate format ("%H:%M", "%H%M", + "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", + "%I%M%S%p"). + include_start : bool, default True + include_end : bool, default True + + Returns + ------- + values_between_time : array of integers + + See Also + -------- + indexer_at_time, DataFrame.between_time + """ + start_time = tools.to_time(start_time) + end_time = tools.to_time(end_time) + time_micros = self._get_time_micros() + start_micros = _time_to_micros(start_time) + end_micros = _time_to_micros(end_time) + + if include_start and include_end: + lop = rop = operator.le + elif include_start: + lop = operator.le + rop = operator.lt + elif include_end: + lop = operator.lt + rop = operator.le + else: + lop = rop = operator.lt + + if start_time <= end_time: + join_op = operator.and_ + else: + join_op = operator.or_ + + mask = join_op(lop(start_micros, time_micros), rop(time_micros, end_micros)) + + return mask.nonzero()[0] + + +DatetimeIndex._add_numeric_methods_disabled() +DatetimeIndex._add_logical_methods_disabled() + + +def date_range( + start=None, + end=None, + periods=None, + freq=None, + tz=None, + normalize=False, + name=None, + closed=None, + **kwargs, +) -> DatetimeIndex: + """ + Return a fixed frequency DatetimeIndex. + + Parameters + ---------- + start : str or datetime-like, optional + Left bound for generating dates. + end : str or datetime-like, optional + Right bound for generating dates. + periods : int, optional + Number of periods to generate. + freq : str or DateOffset, default 'D' + Frequency strings can have multiples, e.g. '5H'. See + :ref:`here ` for a list of + frequency aliases. + tz : str or tzinfo, optional + Time zone name for returning localized DatetimeIndex, for example + 'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is + timezone-naive. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + name : str, default None + Name of the resulting DatetimeIndex. + closed : {None, 'left', 'right'}, optional + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None, the default). + **kwargs + For compatibility. Has no effect on the result. + + Returns + ------- + rng : DatetimeIndex + + See Also + -------- + DatetimeIndex : An immutable container for datetimes. + timedelta_range : Return a fixed frequency TimedeltaIndex. + period_range : Return a fixed frequency PeriodIndex. + interval_range : Return a fixed frequency IntervalIndex. + + Notes + ----- + Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. If ``freq`` is omitted, the resulting + ``DatetimeIndex`` will have ``periods`` linearly spaced elements between + ``start`` and ``end`` (closed on both sides). + + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + **Specifying the values** + + The next four examples generate the same `DatetimeIndex`, but vary + the combination of `start`, `end` and `periods`. + + Specify `start` and `end`, with the default daily frequency. + + >>> pd.date_range(start='1/1/2018', end='1/08/2018') + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', + '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], + dtype='datetime64[ns]', freq='D') + + Specify `start` and `periods`, the number of periods (days). + + >>> pd.date_range(start='1/1/2018', periods=8) + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', + '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], + dtype='datetime64[ns]', freq='D') + + Specify `end` and `periods`, the number of periods (days). + + >>> pd.date_range(end='1/1/2018', periods=8) + DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28', + '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'], + dtype='datetime64[ns]', freq='D') + + Specify `start`, `end`, and `periods`; the frequency is generated + automatically (linearly spaced). + + >>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3) + DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00', + '2018-04-27 00:00:00'], + dtype='datetime64[ns]', freq=None) + + **Other Parameters** + + Changed the `freq` (frequency) to ``'M'`` (month end frequency). + + >>> pd.date_range(start='1/1/2018', periods=5, freq='M') + DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30', + '2018-05-31'], + dtype='datetime64[ns]', freq='M') + + Multiples are allowed + + >>> pd.date_range(start='1/1/2018', periods=5, freq='3M') + DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', + '2019-01-31'], + dtype='datetime64[ns]', freq='3M') + + `freq` can also be specified as an Offset object. + + >>> pd.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3)) + DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', + '2019-01-31'], + dtype='datetime64[ns]', freq='3M') + + Specify `tz` to set the timezone. + + >>> pd.date_range(start='1/1/2018', periods=5, tz='Asia/Tokyo') + DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00', + '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00', + '2018-01-05 00:00:00+09:00'], + dtype='datetime64[ns, Asia/Tokyo]', freq='D') + + `closed` controls whether to include `start` and `end` that are on the + boundary. The default includes boundary points on either end. + + >>> pd.date_range(start='2017-01-01', end='2017-01-04', closed=None) + DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], + dtype='datetime64[ns]', freq='D') + + Use ``closed='left'`` to exclude `end` if it falls on the boundary. + + >>> pd.date_range(start='2017-01-01', end='2017-01-04', closed='left') + DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], + dtype='datetime64[ns]', freq='D') + + Use ``closed='right'`` to exclude `start` if it falls on the boundary. + + >>> pd.date_range(start='2017-01-01', end='2017-01-04', closed='right') + DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], + dtype='datetime64[ns]', freq='D') + """ + + if freq is None and com.any_none(periods, start, end): + freq = "D" + + dtarr = DatetimeArray._generate_range( + start=start, + end=end, + periods=periods, + freq=freq, + tz=tz, + normalize=normalize, + closed=closed, + **kwargs, + ) + return DatetimeIndex._simple_new(dtarr, tz=dtarr.tz, freq=dtarr.freq, name=name) + + +def bdate_range( + start=None, + end=None, + periods=None, + freq="B", + tz=None, + normalize=True, + name=None, + weekmask=None, + holidays=None, + closed=None, + **kwargs, +) -> DatetimeIndex: + """ + Return a fixed frequency DatetimeIndex, with business day as the default + frequency. + + Parameters + ---------- + start : str or datetime-like, default None + Left bound for generating dates. + end : str or datetime-like, default None + Right bound for generating dates. + periods : int, default None + Number of periods to generate. + freq : str or DateOffset, default 'B' (business daily) + Frequency strings can have multiples, e.g. '5H'. + tz : str or None + Time zone name for returning localized DatetimeIndex, for example + Asia/Beijing. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + name : str, default None + Name of the resulting DatetimeIndex. + weekmask : str or None, default None + Weekmask of valid business days, passed to ``numpy.busdaycalendar``, + only used when custom frequency strings are passed. The default + value None is equivalent to 'Mon Tue Wed Thu Fri'. + + .. versionadded:: 0.21.0 + + holidays : list-like or None, default None + Dates to exclude from the set of valid business days, passed to + ``numpy.busdaycalendar``, only used when custom frequency strings + are passed. + + .. versionadded:: 0.21.0 + + closed : str, default None + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None). + **kwargs + For compatibility. Has no effect on the result. + + Returns + ------- + DatetimeIndex + + Notes + ----- + Of the four parameters: ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. Specifying ``freq`` is a requirement + for ``bdate_range``. Use ``date_range`` if specifying ``freq`` is not + desired. + + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + Note how the two weekend days are skipped in the result. + + >>> pd.bdate_range(start='1/1/2018', end='1/08/2018') + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', + '2018-01-05', '2018-01-08'], + dtype='datetime64[ns]', freq='B') + """ + if freq is None: + msg = "freq must be specified for bdate_range; use date_range instead" + raise TypeError(msg) + + if isinstance(freq, str) and freq.startswith("C"): + try: + weekmask = weekmask or "Mon Tue Wed Thu Fri" + freq = prefix_mapping[freq](holidays=holidays, weekmask=weekmask) + except (KeyError, TypeError): + msg = f"invalid custom frequency string: {freq}" + raise ValueError(msg) + elif holidays or weekmask: + msg = ( + "a custom frequency string is required when holidays or " + f"weekmask are passed, got frequency {freq}" + ) + raise ValueError(msg) + + return date_range( + start=start, + end=end, + periods=periods, + freq=freq, + tz=tz, + normalize=normalize, + name=name, + closed=closed, + **kwargs, + ) + + +def _time_to_micros(time): + seconds = time.hour * 60 * 60 + 60 * time.minute + time.second + return 1000000 * seconds + time.microsecond diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py new file mode 100644 index 00000000..d5664d76 --- /dev/null +++ b/pandas/core/indexes/extension.py @@ -0,0 +1,295 @@ +""" +Shared methods for Index subclasses backed by ExtensionArray. +""" +from typing import List + +import numpy as np + +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender, cache_readonly + +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_dtype_equal, + is_object_dtype, +) +from pandas.core.dtypes.generic import ABCSeries + +from pandas.core.arrays import ExtensionArray +from pandas.core.indexers import deprecate_ndim_indexing +from pandas.core.indexes.base import Index +from pandas.core.ops import get_op_result_name + + +def inherit_from_data(name: str, delegate, cache: bool = False, wrap: bool = False): + """ + Make an alias for a method of the underlying ExtensionArray. + + Parameters + ---------- + name : str + Name of an attribute the class should inherit from its EA parent. + delegate : class + cache : bool, default False + Whether to convert wrapped properties into cache_readonly + wrap : bool, default False + Whether to wrap the inherited result in an Index. + + Returns + ------- + attribute, method, property, or cache_readonly + """ + + attr = getattr(delegate, name) + + if isinstance(attr, property): + if cache: + + def cached(self): + return getattr(self._data, name) + + cached.__name__ = name + cached.__doc__ = attr.__doc__ + method = cache_readonly(cached) + + else: + + def fget(self): + result = getattr(self._data, name) + if wrap: + if isinstance(result, type(self._data)): + return type(self)._simple_new(result, name=self.name) + return Index(result, name=self.name) + return result + + def fset(self, value): + setattr(self._data, name, value) + + fget.__name__ = name + fget.__doc__ = attr.__doc__ + + method = property(fget, fset) + + elif not callable(attr): + # just a normal attribute, no wrapping + method = attr + + else: + + def method(self, *args, **kwargs): + result = attr(self._data, *args, **kwargs) + if wrap: + if isinstance(result, type(self._data)): + return type(self)._simple_new(result, name=self.name) + return Index(result, name=self.name) + return result + + method.__name__ = name + method.__doc__ = attr.__doc__ + return method + + +def inherit_names(names: List[str], delegate, cache: bool = False, wrap: bool = False): + """ + Class decorator to pin attributes from an ExtensionArray to a Index subclass. + + Parameters + ---------- + names : List[str] + delegate : class + cache : bool, default False + wrap : bool, default False + Whether to wrap the inherited result in an Index. + """ + + def wrapper(cls): + for name in names: + meth = inherit_from_data(name, delegate, cache=cache, wrap=wrap) + setattr(cls, name, meth) + + return cls + + return wrapper + + +def _make_wrapped_comparison_op(opname): + """ + Create a comparison method that dispatches to ``._data``. + """ + + def wrapper(self, other): + if isinstance(other, ABCSeries): + # the arrays defer to Series for comparison ops but the indexes + # don't, so we have to unwrap here. + other = other._values + + other = _maybe_unwrap_index(other) + + op = getattr(self._data, opname) + return op(other) + + wrapper.__name__ = opname + return wrapper + + +def make_wrapped_arith_op(opname): + def method(self, other): + if ( + isinstance(other, Index) + and is_object_dtype(other.dtype) + and type(other) is not Index + ): + # We return NotImplemented for object-dtype index *subclasses* so they have + # a chance to implement ops before we unwrap them. + # See https://github.com/pandas-dev/pandas/issues/31109 + return NotImplemented + meth = getattr(self._data, opname) + result = meth(_maybe_unwrap_index(other)) + return _wrap_arithmetic_op(self, other, result) + + method.__name__ = opname + return method + + +def _wrap_arithmetic_op(self, other, result): + if result is NotImplemented: + return NotImplemented + + if isinstance(result, tuple): + # divmod, rdivmod + assert len(result) == 2 + return ( + _wrap_arithmetic_op(self, other, result[0]), + _wrap_arithmetic_op(self, other, result[1]), + ) + + if not isinstance(result, Index): + # Index.__new__ will choose appropriate subclass for dtype + result = Index(result) + + res_name = get_op_result_name(self, other) + result.name = res_name + return result + + +def _maybe_unwrap_index(obj): + """ + If operating against another Index object, we need to unwrap the underlying + data before deferring to the DatetimeArray/TimedeltaArray/PeriodArray + implementation, otherwise we will incorrectly return NotImplemented. + + Parameters + ---------- + obj : object + + Returns + ------- + unwrapped object + """ + if isinstance(obj, Index): + return obj._data + return obj + + +class ExtensionIndex(Index): + """ + Index subclass for indexes backed by ExtensionArray. + """ + + _data: ExtensionArray + + __eq__ = _make_wrapped_comparison_op("__eq__") + __ne__ = _make_wrapped_comparison_op("__ne__") + __lt__ = _make_wrapped_comparison_op("__lt__") + __gt__ = _make_wrapped_comparison_op("__gt__") + __le__ = _make_wrapped_comparison_op("__le__") + __ge__ = _make_wrapped_comparison_op("__ge__") + + def __getitem__(self, key): + result = self._data[key] + if isinstance(result, type(self._data)): + return type(self)(result, name=self.name) + + # Includes cases where we get a 2D ndarray back for MPL compat + deprecate_ndim_indexing(result) + return result + + def __iter__(self): + return self._data.__iter__() + + @property + def _ndarray_values(self) -> np.ndarray: + return self._data._ndarray_values + + @Appender(Index.dropna.__doc__) + def dropna(self, how="any"): + if how not in ("any", "all"): + raise ValueError(f"invalid how option: {how}") + + if self.hasnans: + return self._shallow_copy(self._data[~self._isnan]) + return self._shallow_copy() + + def repeat(self, repeats, axis=None): + nv.validate_repeat(tuple(), dict(axis=axis)) + result = self._data.repeat(repeats, axis=axis) + return self._shallow_copy(result) + + @Appender(Index.take.__doc__) + def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): + nv.validate_take(tuple(), kwargs) + indices = ensure_platform_int(indices) + + taken = self._assert_take_fillable( + self._data, + indices, + allow_fill=allow_fill, + fill_value=fill_value, + na_value=self._na_value, + ) + return type(self)(taken, name=self.name) + + def unique(self, level=None): + if level is not None: + self._validate_index_level(level) + + result = self._data.unique() + return self._shallow_copy(result) + + def _get_unique_index(self, dropna=False): + if self.is_unique and not dropna: + return self + + result = self._data.unique() + if dropna and self.hasnans: + result = result[~result.isna()] + return self._shallow_copy(result) + + @Appender(Index.map.__doc__) + def map(self, mapper, na_action=None): + # Try to run function on index first, and then on elements of index + # Especially important for group-by functionality + try: + result = mapper(self) + + # Try to use this result if we can + if isinstance(result, np.ndarray): + result = Index(result) + + if not isinstance(result, Index): + raise TypeError("The map function must return an Index object") + return result + except Exception: + return self.astype(object).map(mapper) + + @Appender(Index.astype.__doc__) + def astype(self, dtype, copy=True): + if is_dtype_equal(self.dtype, dtype) and copy is False: + # Ensure that self.astype(self.dtype) is self + return self + + new_values = self._data.astype(dtype, copy=copy) + + # pass copy=False because any copying will be done in the + # _data.astype call above + return Index(new_values, dtype=new_values.dtype, name=self.name, copy=False) diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py new file mode 100644 index 00000000..909643d5 --- /dev/null +++ b/pandas/core/indexes/frozen.py @@ -0,0 +1,107 @@ +""" +frozen (immutable) data structures to support MultiIndexing + +These are used for: + +- .names (FrozenList) + +""" + +from typing import Any + +from pandas.core.base import PandasObject + +from pandas.io.formats.printing import pprint_thing + + +class FrozenList(PandasObject, list): + """ + Container that doesn't allow setting item *but* + because it's technically non-hashable, will be used + for lookups, appropriately, etc. + """ + + # Side note: This has to be of type list. Otherwise, + # it messes up PyTables type checks. + + def union(self, other) -> "FrozenList": + """ + Returns a FrozenList with other concatenated to the end of self. + + Parameters + ---------- + other : array-like + The array-like whose elements we are concatenating. + + Returns + ------- + FrozenList + The collection difference between self and other. + """ + if isinstance(other, tuple): + other = list(other) + return type(self)(super().__add__(other)) + + def difference(self, other) -> "FrozenList": + """ + Returns a FrozenList with elements from other removed from self. + + Parameters + ---------- + other : array-like + The array-like whose elements we are removing self. + + Returns + ------- + FrozenList + The collection difference between self and other. + """ + other = set(other) + temp = [x for x in self if x not in other] + return type(self)(temp) + + # TODO: Consider deprecating these in favor of `union` (xref gh-15506) + __add__ = __iadd__ = union + + def __getitem__(self, n): + if isinstance(n, slice): + return type(self)(super().__getitem__(n)) + return super().__getitem__(n) + + def __radd__(self, other): + if isinstance(other, tuple): + other = list(other) + return type(self)(other + list(self)) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, (tuple, FrozenList)): + other = list(other) + return super().__eq__(other) + + __req__ = __eq__ + + def __mul__(self, other): + return type(self)(super().__mul__(other)) + + __imul__ = __mul__ + + def __reduce__(self): + return type(self), (list(self),) + + def __hash__(self): + return hash(tuple(self)) + + def _disabled(self, *args, **kwargs): + """ + This method will not function because object is immutable. + """ + raise TypeError(f"'{type(self).__name__}' does not support mutable operations.") + + def __str__(self) -> str: + return pprint_thing(self, quote_strings=True, escape_chars=("\t", "\r", "\n")) + + def __repr__(self) -> str: + return f"{type(self).__name__}({str(self)})" + + __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled + pop = append = extend = remove = sort = insert = _disabled diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py new file mode 100644 index 00000000..a5ab7cba --- /dev/null +++ b/pandas/core/indexes/interval.py @@ -0,0 +1,1383 @@ +""" define the IntervalIndex """ +from operator import le, lt +import textwrap +from typing import Any, Optional, Tuple, Union + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import Timedelta, Timestamp, lib +from pandas._libs.interval import Interval, IntervalMixin, IntervalTree +from pandas._typing import AnyArrayLike +from pandas.util._decorators import Appender, Substitution, cache_readonly +from pandas.util._exceptions import rewrite_exception + +from pandas.core.dtypes.cast import ( + find_common_type, + infer_dtype_from_scalar, + maybe_downcast_to_dtype, +) +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_categorical, + is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, + is_dtype_equal, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_interval_dtype, + is_list_like, + is_number, + is_object_dtype, + is_scalar, +) +from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.missing import isna + +from pandas.core.algorithms import take_1d +from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs +import pandas.core.common as com +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import ( + Index, + InvalidIndexError, + _index_shared_docs, + default_pprint, + ensure_index, + maybe_extract_name, +) +from pandas.core.indexes.datetimes import DatetimeIndex, date_range +from pandas.core.indexes.extension import ExtensionIndex, inherit_names +from pandas.core.indexes.multi import MultiIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range +from pandas.core.ops import get_op_result_name + +from pandas.tseries.frequencies import to_offset +from pandas.tseries.offsets import DateOffset + +_VALID_CLOSED = {"left", "right", "both", "neither"} +_index_doc_kwargs = dict(ibase._index_doc_kwargs) + +_index_doc_kwargs.update( + dict( + klass="IntervalIndex", + qualname="IntervalIndex", + target_klass="IntervalIndex or list of Intervals", + name=textwrap.dedent( + """\ + name : object, optional + Name to be stored in the index. + """ + ), + ) +) + + +def _get_next_label(label): + dtype = getattr(label, "dtype", type(label)) + if isinstance(label, (Timestamp, Timedelta)): + dtype = "datetime64" + if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype): + return label + np.timedelta64(1, "ns") + elif is_integer_dtype(dtype): + return label + 1 + elif is_float_dtype(dtype): + return np.nextafter(label, np.infty) + else: + raise TypeError(f"cannot determine next label for type {repr(type(label))}") + + +def _get_prev_label(label): + dtype = getattr(label, "dtype", type(label)) + if isinstance(label, (Timestamp, Timedelta)): + dtype = "datetime64" + if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype): + return label - np.timedelta64(1, "ns") + elif is_integer_dtype(dtype): + return label - 1 + elif is_float_dtype(dtype): + return np.nextafter(label, -np.infty) + else: + raise TypeError(f"cannot determine next label for type {repr(type(label))}") + + +def _new_IntervalIndex(cls, d): + """ + This is called upon unpickling, rather than the default which doesn't have + arguments and breaks __new__. + """ + return cls.from_arrays(**d) + + +class SetopCheck: + """ + This is called to decorate the set operations of IntervalIndex + to perform the type check in advance. + """ + + def __init__(self, op_name): + self.op_name = op_name + + def __call__(self, setop): + def func(intvidx_self, other, sort=False): + intvidx_self._assert_can_do_setop(other) + other = ensure_index(other) + + if not isinstance(other, IntervalIndex): + result = getattr(intvidx_self.astype(object), self.op_name)(other) + if self.op_name in ("difference",): + result = result.astype(intvidx_self.dtype) + return result + elif intvidx_self.closed != other.closed: + raise ValueError( + "can only do set operations between two IntervalIndex " + "objects that are closed on the same side" + ) + + # GH 19016: ensure set op will not return a prohibited dtype + subtypes = [intvidx_self.dtype.subtype, other.dtype.subtype] + common_subtype = find_common_type(subtypes) + if is_object_dtype(common_subtype): + raise TypeError( + f"can only do {self.op_name} between two IntervalIndex " + "objects that have compatible dtypes" + ) + + return setop(intvidx_self, other, sort) + + return func + + +@Appender( + _interval_shared_docs["class"] + % dict( + klass="IntervalIndex", + summary="Immutable index of intervals that are closed on the same side.", + name=_index_doc_kwargs["name"], + versionadded="0.20.0", + extra_attributes="is_overlapping\nvalues\n", + extra_methods="", + examples=textwrap.dedent( + """\ + Examples + -------- + A new ``IntervalIndex`` is typically constructed using + :func:`interval_range`: + + >>> pd.interval_range(start=0, end=5) + IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], + closed='right', + dtype='interval[int64]') + + It may also be constructed using one of the constructor + methods: :meth:`IntervalIndex.from_arrays`, + :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`. + + See further examples in the doc strings of ``interval_range`` and the + mentioned constructor methods. + """ + ), + ) +) +@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True) +@inherit_names( + [ + "__len__", + "__array__", + "overlaps", + "contains", + "size", + "dtype", + "left", + "right", + "length", + ], + IntervalArray, +) +@inherit_names( + ["is_non_overlapping_monotonic", "mid", "_ndarray_values", "closed"], + IntervalArray, + cache=True, +) +class IntervalIndex(IntervalMixin, ExtensionIndex): + _typ = "intervalindex" + _comparables = ["name"] + _attributes = ["name", "closed"] + + # we would like our indexing holder to defer to us + _defer_to_indexing = True + + # Immutable, so we are able to cache computations like isna in '_mask' + _mask = None + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + data, + closed=None, + dtype=None, + copy: bool = False, + name=None, + verify_integrity: bool = True, + ): + + name = maybe_extract_name(name, data, cls) + + with rewrite_exception("IntervalArray", cls.__name__): + array = IntervalArray( + data, + closed=closed, + copy=copy, + dtype=dtype, + verify_integrity=verify_integrity, + ) + + return cls._simple_new(array, name) + + @classmethod + def _simple_new(cls, array, name, closed=None): + """ + Construct from an IntervalArray + + Parameters + ---------- + array : IntervalArray + name : str + Attached as result.name + closed : Any + Ignored. + """ + result = IntervalMixin.__new__(cls) + result._data = array + result.name = name + result._no_setting_name = False + result._reset_identity() + return result + + @classmethod + @Appender( + _interval_shared_docs["from_breaks"] + % dict( + klass="IntervalIndex", + examples=textwrap.dedent( + """\ + Examples + -------- + >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3]) + IntervalIndex([(0, 1], (1, 2], (2, 3]], + closed='right', + dtype='interval[int64]') + """ + ), + ) + ) + def from_breaks( + cls, breaks, closed: str = "right", name=None, copy: bool = False, dtype=None + ): + with rewrite_exception("IntervalArray", cls.__name__): + array = IntervalArray.from_breaks( + breaks, closed=closed, copy=copy, dtype=dtype + ) + return cls._simple_new(array, name=name) + + @classmethod + @Appender( + _interval_shared_docs["from_arrays"] + % dict( + klass="IntervalIndex", + examples=textwrap.dedent( + """\ + Examples + -------- + >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3]) + IntervalIndex([(0, 1], (1, 2], (2, 3]], + closed='right', + dtype='interval[int64]') + """ + ), + ) + ) + def from_arrays( + cls, + left, + right, + closed: str = "right", + name=None, + copy: bool = False, + dtype=None, + ): + with rewrite_exception("IntervalArray", cls.__name__): + array = IntervalArray.from_arrays( + left, right, closed, copy=copy, dtype=dtype + ) + return cls._simple_new(array, name=name) + + @classmethod + @Appender( + _interval_shared_docs["from_tuples"] + % dict( + klass="IntervalIndex", + examples=textwrap.dedent( + """\ + Examples + -------- + >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)]) + IntervalIndex([(0, 1], (1, 2]], + closed='right', + dtype='interval[int64]') + """ + ), + ) + ) + def from_tuples( + cls, data, closed: str = "right", name=None, copy: bool = False, dtype=None + ): + with rewrite_exception("IntervalArray", cls.__name__): + arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype) + return cls._simple_new(arr, name=name) + + # -------------------------------------------------------------------- + + @Appender(_index_shared_docs["_shallow_copy"]) + def _shallow_copy(self, left=None, right=None, **kwargs): + result = self._data._shallow_copy(left=left, right=right) + attributes = self._get_attributes_dict() + attributes.update(kwargs) + return self._simple_new(result, **attributes) + + @cache_readonly + def _isnan(self): + """ + Return a mask indicating if each value is NA. + """ + if self._mask is None: + self._mask = isna(self.left) + return self._mask + + @cache_readonly + def _engine(self): + left = self._maybe_convert_i8(self.left) + right = self._maybe_convert_i8(self.right) + return IntervalTree(left, right, closed=self.closed) + + def __contains__(self, key) -> bool: + """ + return a boolean if this key is IN the index + We *only* accept an Interval + + Parameters + ---------- + key : Interval + + Returns + ------- + bool + """ + if not isinstance(key, Interval): + return False + + try: + self.get_loc(key) + return True + except KeyError: + return False + + @cache_readonly + def _multiindex(self): + return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"]) + + @cache_readonly + def values(self): + """ + Return the IntervalIndex's data as an IntervalArray. + """ + return self._data + + @cache_readonly + def _values(self): + return self._data + + @property + def _has_complex_internals(self): + # used to avoid libreduction code paths, which raise or require conversion + return True + + def __array_wrap__(self, result, context=None): + # we don't want the superclass implementation + return result + + def __reduce__(self): + d = dict(left=self.left, right=self.right) + d.update(self._get_attributes_dict()) + return _new_IntervalIndex, (type(self), d), None + + @Appender(_index_shared_docs["astype"]) + def astype(self, dtype, copy=True): + with rewrite_exception("IntervalArray", type(self).__name__): + new_values = self.values.astype(dtype, copy=copy) + if is_interval_dtype(new_values): + return self._shallow_copy(new_values.left, new_values.right) + return Index.astype(self, dtype, copy=copy) + + @property + def inferred_type(self) -> str: + """Return a string of the type inferred from the values""" + return "interval" + + @Appender(Index.memory_usage.__doc__) + def memory_usage(self, deep: bool = False) -> int: + # we don't use an explicit engine + # so return the bytes here + return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep) + + @cache_readonly + def is_monotonic(self) -> bool: + """ + Return True if the IntervalIndex is monotonic increasing (only equal or + increasing values), else False + """ + return self.is_monotonic_increasing + + @cache_readonly + def is_monotonic_increasing(self) -> bool: + """ + Return True if the IntervalIndex is monotonic increasing (only equal or + increasing values), else False + """ + return self._engine.is_monotonic_increasing + + @cache_readonly + def is_monotonic_decreasing(self) -> bool: + """ + Return True if the IntervalIndex is monotonic decreasing (only equal or + decreasing values), else False + """ + return self[::-1].is_monotonic_increasing + + @cache_readonly + def is_unique(self): + """ + Return True if the IntervalIndex contains unique elements, else False. + """ + left = self.left + right = self.right + + if self.isna().sum() > 1: + return False + + if left.is_unique or right.is_unique: + return True + + seen_pairs = set() + check_idx = np.where(left.duplicated(keep=False))[0] + for idx in check_idx: + pair = (left[idx], right[idx]) + if pair in seen_pairs: + return False + seen_pairs.add(pair) + + return True + + @property + def is_overlapping(self): + """ + Return True if the IntervalIndex has overlapping intervals, else False. + + Two intervals overlap if they share a common point, including closed + endpoints. Intervals that only have an open endpoint in common do not + overlap. + + .. versionadded:: 0.24.0 + + Returns + ------- + bool + Boolean indicating if the IntervalIndex has overlapping intervals. + + See Also + -------- + Interval.overlaps : Check whether two Interval objects overlap. + IntervalIndex.overlaps : Check an IntervalIndex elementwise for + overlaps. + + Examples + -------- + >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)]) + >>> index + IntervalIndex([(0, 2], (1, 3], (4, 5]], + closed='right', + dtype='interval[int64]') + >>> index.is_overlapping + True + + Intervals that share closed endpoints overlap: + + >>> index = pd.interval_range(0, 3, closed='both') + >>> index + IntervalIndex([[0, 1], [1, 2], [2, 3]], + closed='both', + dtype='interval[int64]') + >>> index.is_overlapping + True + + Intervals that only have an open endpoint in common do not overlap: + + >>> index = pd.interval_range(0, 3, closed='left') + >>> index + IntervalIndex([[0, 1), [1, 2), [2, 3)], + closed='left', + dtype='interval[int64]') + >>> index.is_overlapping + False + """ + # GH 23309 + return self._engine.is_overlapping + + @Appender(_index_shared_docs["_convert_scalar_indexer"]) + def _convert_scalar_indexer(self, key, kind=None): + if kind == "iloc": + return super()._convert_scalar_indexer(key, kind=kind) + return key + + def _maybe_cast_slice_bound(self, label, side, kind): + return getattr(self, side)._maybe_cast_slice_bound(label, side, kind) + + @Appender(_index_shared_docs["_convert_list_indexer"]) + def _convert_list_indexer(self, keyarr, kind=None): + """ + we are passed a list-like indexer. Return the + indexer for matching intervals. + """ + locs = self.get_indexer_for(keyarr) + + # we have missing values + if (locs == -1).any(): + raise KeyError + + return locs + + def _can_reindex(self, indexer: np.ndarray) -> None: + """ + Check if we are allowing reindexing with this particular indexer. + + Parameters + ---------- + indexer : an integer indexer + + Raises + ------ + ValueError if its a duplicate axis + """ + + # trying to reindex on an axis with duplicates + if self.is_overlapping and len(indexer): + raise ValueError("cannot reindex from an overlapping axis") + + def _needs_i8_conversion(self, key): + """ + Check if a given key needs i8 conversion. Conversion is necessary for + Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An + Interval-like requires conversion if it's endpoints are one of the + aforementioned types. + + Assumes that any list-like data has already been cast to an Index. + + Parameters + ---------- + key : scalar or Index-like + The key that should be checked for i8 conversion + + Returns + ------- + bool + """ + if is_interval_dtype(key) or isinstance(key, Interval): + return self._needs_i8_conversion(key.left) + + i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex) + return isinstance(key, i8_types) + + def _maybe_convert_i8(self, key): + """ + Maybe convert a given key to it's equivalent i8 value(s). Used as a + preprocessing step prior to IntervalTree queries (self._engine), which + expects numeric data. + + Parameters + ---------- + key : scalar or list-like + The key that should maybe be converted to i8. + + Returns + ------- + scalar or list-like + The original key if no conversion occurred, int if converted scalar, + Int64Index if converted list-like. + """ + original = key + if is_list_like(key): + key = ensure_index(key) + + if not self._needs_i8_conversion(key): + return original + + scalar = is_scalar(key) + if is_interval_dtype(key) or isinstance(key, Interval): + # convert left/right and reconstruct + left = self._maybe_convert_i8(key.left) + right = self._maybe_convert_i8(key.right) + constructor = Interval if scalar else IntervalIndex.from_arrays + return constructor(left, right, closed=self.closed) + + if scalar: + # Timestamp/Timedelta + key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True) + else: + # DatetimeIndex/TimedeltaIndex + key_dtype, key_i8 = key.dtype, Index(key.asi8) + if key.hasnans: + # convert NaT from it's i8 value to np.nan so it's not viewed + # as a valid value, maybe causing errors (e.g. is_overlapping) + key_i8 = key_i8.where(~key._isnan) + + # ensure consistency with IntervalIndex subtype + subtype = self.dtype.subtype + + if not is_dtype_equal(subtype, key_dtype): + raise ValueError( + f"Cannot index an IntervalIndex of subtype {subtype} with " + f"values of dtype {key_dtype}" + ) + + return key_i8 + + def _check_method(self, method): + if method is None: + return + + if method in ["bfill", "backfill", "pad", "ffill", "nearest"]: + raise NotImplementedError( + f"method {method} not yet implemented for IntervalIndex" + ) + + raise ValueError("Invalid fill method") + + def _searchsorted_monotonic(self, label, side, exclude_label=False): + if not self.is_non_overlapping_monotonic: + raise KeyError( + "can only get slices from an IntervalIndex if bounds are " + "non-overlapping and all monotonic increasing or decreasing" + ) + + if isinstance(label, IntervalMixin): + raise NotImplementedError("Interval objects are not currently supported") + + # GH 20921: "not is_monotonic_increasing" for the second condition + # instead of "is_monotonic_decreasing" to account for single element + # indexes being both increasing and decreasing + if (side == "left" and self.left.is_monotonic_increasing) or ( + side == "right" and not self.left.is_monotonic_increasing + ): + sub_idx = self.right + if self.open_right or exclude_label: + label = _get_next_label(label) + else: + sub_idx = self.left + if self.open_left or exclude_label: + label = _get_prev_label(label) + + return sub_idx._searchsorted_monotonic(label, side) + + def get_loc( + self, key: Any, method: Optional[str] = None, tolerance=None + ) -> Union[int, slice, np.ndarray]: + """ + Get integer location, slice or boolean mask for requested label. + + Parameters + ---------- + key : label + method : {None}, optional + * default: matches where the label is within an interval only. + + Returns + ------- + int if unique index, slice if monotonic index, else mask + + Examples + -------- + >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2) + >>> index = pd.IntervalIndex([i1, i2]) + >>> index.get_loc(1) + 0 + + You can also supply a point inside an interval. + + >>> index.get_loc(1.5) + 1 + + If a label is in several intervals, you get the locations of all the + relevant intervals. + + >>> i3 = pd.Interval(0, 2) + >>> overlapping_index = pd.IntervalIndex([i1, i2, i3]) + >>> overlapping_index.get_loc(0.5) + array([ True, False, True]) + + Only exact matches will be returned if an interval is provided. + + >>> index.get_loc(pd.Interval(0, 1)) + 0 + """ + self._check_method(method) + + # list-like are invalid labels for II but in some cases may work, e.g + # single element array of comparable type, so guard against them early + if is_list_like(key): + raise KeyError(key) + + if isinstance(key, Interval): + if self.closed != key.closed: + raise KeyError(key) + mask = (self.left == key.left) & (self.right == key.right) + else: + # assume scalar + op_left = le if self.closed_left else lt + op_right = le if self.closed_right else lt + try: + mask = op_left(self.left, key) & op_right(key, self.right) + except TypeError: + # scalar is not comparable to II subtype --> invalid label + raise KeyError(key) + + matches = mask.sum() + if matches == 0: + raise KeyError(key) + elif matches == 1: + return mask.argmax() + return lib.maybe_booleans_to_slice(mask.view("u1")) + + @Substitution( + **dict( + _index_doc_kwargs, + **{ + "raises_section": textwrap.dedent( + """ + Raises + ------ + NotImplementedError + If any method argument other than the default of + None is specified as these are not yet implemented. + """ + ) + }, + ) + ) + @Appender(_index_shared_docs["get_indexer"]) + def get_indexer( + self, + target: AnyArrayLike, + method: Optional[str] = None, + limit: Optional[int] = None, + tolerance: Optional[Any] = None, + ) -> np.ndarray: + + self._check_method(method) + + if self.is_overlapping: + raise InvalidIndexError( + "cannot handle overlapping indices; " + "use IntervalIndex.get_indexer_non_unique" + ) + + target_as_index = ensure_index(target) + + if isinstance(target_as_index, IntervalIndex): + # equal indexes -> 1:1 positional match + if self.equals(target_as_index): + return np.arange(len(self), dtype="intp") + + # different closed or incompatible subtype -> no matches + common_subtype = find_common_type( + [self.dtype.subtype, target_as_index.dtype.subtype] + ) + if self.closed != target_as_index.closed or is_object_dtype(common_subtype): + return np.repeat(np.intp(-1), len(target_as_index)) + + # non-overlapping -> at most one match per interval in target_as_index + # want exact matches -> need both left/right to match, so defer to + # left/right get_indexer, compare elementwise, equality -> match + left_indexer = self.left.get_indexer(target_as_index.left) + right_indexer = self.right.get_indexer(target_as_index.right) + indexer = np.where(left_indexer == right_indexer, left_indexer, -1) + elif is_categorical(target_as_index): + # get an indexer for unique categories then propagate to codes via take_1d + categories_indexer = self.get_indexer(target_as_index.categories) + indexer = take_1d(categories_indexer, target_as_index.codes, fill_value=-1) + elif not is_object_dtype(target_as_index): + # homogeneous scalar index: use IntervalTree + target_as_index = self._maybe_convert_i8(target_as_index) + indexer = self._engine.get_indexer(target_as_index.values) + else: + # heterogeneous scalar index: defer elementwise to get_loc + # (non-overlapping so get_loc guarantees scalar of KeyError) + indexer = [] + for key in target_as_index: + try: + loc = self.get_loc(key) + except KeyError: + loc = -1 + indexer.append(loc) + + return ensure_platform_int(indexer) + + @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) + def get_indexer_non_unique( + self, target: AnyArrayLike + ) -> Tuple[np.ndarray, np.ndarray]: + target_as_index = ensure_index(target) + + # check that target_as_index IntervalIndex is compatible + if isinstance(target_as_index, IntervalIndex): + common_subtype = find_common_type( + [self.dtype.subtype, target_as_index.dtype.subtype] + ) + if self.closed != target_as_index.closed or is_object_dtype(common_subtype): + # different closed or incompatible subtype -> no matches + return ( + np.repeat(-1, len(target_as_index)), + np.arange(len(target_as_index)), + ) + + if is_object_dtype(target_as_index) or isinstance( + target_as_index, IntervalIndex + ): + # target_as_index might contain intervals: defer elementwise to get_loc + indexer, missing = [], [] + for i, key in enumerate(target_as_index): + try: + locs = self.get_loc(key) + if isinstance(locs, slice): + locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp") + locs = np.array(locs, ndmin=1) + except KeyError: + missing.append(i) + locs = np.array([-1]) + indexer.append(locs) + indexer = np.concatenate(indexer) + else: + target_as_index = self._maybe_convert_i8(target_as_index) + indexer, missing = self._engine.get_indexer_non_unique( + target_as_index.values + ) + + return ensure_platform_int(indexer), ensure_platform_int(missing) + + def get_indexer_for(self, target: AnyArrayLike, **kwargs) -> np.ndarray: + """ + Guaranteed return of an indexer even when overlapping. + + This dispatches to get_indexer or get_indexer_non_unique + as appropriate. + + Returns + ------- + numpy.ndarray + List of indices. + """ + if self.is_overlapping: + return self.get_indexer_non_unique(target)[0] + return self.get_indexer(target, **kwargs) + + @Appender(_index_shared_docs["get_value"] % _index_doc_kwargs) + def get_value(self, series: ABCSeries, key: Any) -> Any: + + if com.is_bool_indexer(key): + loc = key + elif is_list_like(key): + if self.is_overlapping: + loc, missing = self.get_indexer_non_unique(key) + if len(missing): + raise KeyError + else: + loc = self.get_indexer(key) + elif isinstance(key, slice): + if not (key.step is None or key.step == 1): + raise ValueError("cannot support not-default step in a slice") + loc = self._convert_slice_indexer(key, kind="getitem") + else: + loc = self.get_loc(key) + return series.iloc[loc] + + @Appender(_index_shared_docs["where"]) + def where(self, cond, other=None): + if other is None: + other = self._na_value + values = np.where(cond, self.values, other) + return self._shallow_copy(values) + + def delete(self, loc): + """ + Return a new IntervalIndex with passed location(-s) deleted + + Returns + ------- + IntervalIndex + """ + new_left = self.left.delete(loc) + new_right = self.right.delete(loc) + return self._shallow_copy(new_left, new_right) + + def insert(self, loc, item): + """ + Return a new IntervalIndex inserting new item at location. Follows + Python list.append semantics for negative values. Only Interval + objects and NA can be inserted into an IntervalIndex + + Parameters + ---------- + loc : int + item : object + + Returns + ------- + IntervalIndex + """ + if isinstance(item, Interval): + if item.closed != self.closed: + raise ValueError( + "inserted item must be closed on the same side as the index" + ) + left_insert = item.left + right_insert = item.right + elif is_scalar(item) and isna(item): + # GH 18295 + left_insert = right_insert = item + else: + raise ValueError( + "can only insert Interval objects and NA into an IntervalIndex" + ) + + new_left = self.left.insert(loc, left_insert) + new_right = self.right.insert(loc, right_insert) + return self._shallow_copy(new_left, new_right) + + def _concat_same_dtype(self, to_concat, name): + """ + assert that we all have the same .closed + we allow a 0-len index here as well + """ + if not len({i.closed for i in to_concat if len(i)}) == 1: + raise ValueError( + "can only append two IntervalIndex objects " + "that are closed on the same side" + ) + return super()._concat_same_dtype(to_concat, name) + + @Appender(_index_shared_docs["take"] % _index_doc_kwargs) + def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): + result = self._data.take( + indices, axis=axis, allow_fill=allow_fill, fill_value=fill_value, **kwargs + ) + return self._shallow_copy(result) + + def __getitem__(self, value): + result = self._data[value] + if isinstance(result, IntervalArray): + return self._shallow_copy(result) + else: + # scalar + return result + + # -------------------------------------------------------------------- + # Rendering Methods + # __repr__ associated methods are based on MultiIndex + + def _format_with_header(self, header, **kwargs): + return header + list(self._format_native_types(**kwargs)) + + def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs): + # GH 28210: use base method but with different default na_rep + return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs) + + def _format_data(self, name=None): + + # TODO: integrate with categorical and make generic + # name argument is unused here; just for compat with base / categorical + n = len(self) + max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10) + + formatter = str + + if n == 0: + summary = "[]" + elif n == 1: + first = formatter(self[0]) + summary = f"[{first}]" + elif n == 2: + first = formatter(self[0]) + last = formatter(self[-1]) + summary = f"[{first}, {last}]" + else: + + if n > max_seq_items: + n = min(max_seq_items // 2, 10) + head = [formatter(x) for x in self[:n]] + tail = [formatter(x) for x in self[-n:]] + head_joined = ", ".join(head) + tail_joined = ", ".join(tail) + summary = f"[{head_joined} ... {tail_joined}]" + else: + tail = [formatter(x) for x in self] + joined = ", ".join(tail) + summary = f"[{joined}]" + + return summary + "," + self._format_space() + + def _format_attrs(self): + attrs = [("closed", repr(self.closed))] + if self.name is not None: + attrs.append(("name", default_pprint(self.name))) + attrs.append(("dtype", f"'{self.dtype}'")) + return attrs + + def _format_space(self) -> str: + space = " " * (len(type(self).__name__) + 1) + return f"\n{space}" + + # -------------------------------------------------------------------- + + def argsort(self, *args, **kwargs): + return np.lexsort((self.right, self.left)) + + def equals(self, other) -> bool: + """ + Determines if two IntervalIndex objects contain the same elements. + """ + if self.is_(other): + return True + + # if we can coerce to an II + # then we can compare + if not isinstance(other, IntervalIndex): + if not is_interval_dtype(other): + return False + other = Index(getattr(other, ".values", other)) + + return ( + self.left.equals(other.left) + and self.right.equals(other.right) + and self.closed == other.closed + ) + + @Appender(_index_shared_docs["intersection"]) + @SetopCheck(op_name="intersection") + def intersection( + self, other: "IntervalIndex", sort: bool = False + ) -> "IntervalIndex": + if self.left.is_unique and self.right.is_unique: + taken = self._intersection_unique(other) + elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1: + # Swap other/self if other is unique and self does not have + # multiple NaNs + taken = other._intersection_unique(self) + else: + # duplicates + taken = self._intersection_non_unique(other) + + if sort is None: + taken = taken.sort_values() + + return taken + + def _intersection_unique(self, other: "IntervalIndex") -> "IntervalIndex": + """ + Used when the IntervalIndex does not have any common endpoint, + no mater left or right. + Return the intersection with another IntervalIndex. + + Parameters + ---------- + other : IntervalIndex + + Returns + ------- + IntervalIndex + """ + lindexer = self.left.get_indexer(other.left) + rindexer = self.right.get_indexer(other.right) + + match = (lindexer == rindexer) & (lindexer != -1) + indexer = lindexer.take(match.nonzero()[0]) + + return self.take(indexer) + + def _intersection_non_unique(self, other: "IntervalIndex") -> "IntervalIndex": + """ + Used when the IntervalIndex does have some common endpoints, + on either sides. + Return the intersection with another IntervalIndex. + + Parameters + ---------- + other : IntervalIndex + + Returns + ------- + IntervalIndex + """ + mask = np.zeros(len(self), dtype=bool) + + if self.hasnans and other.hasnans: + first_nan_loc = np.arange(len(self))[self.isna()][0] + mask[first_nan_loc] = True + + other_tups = set(zip(other.left, other.right)) + for i, tup in enumerate(zip(self.left, self.right)): + if tup in other_tups: + mask[i] = True + + return self[mask] + + def _setop(op_name: str, sort=None): + @SetopCheck(op_name=op_name) + def func(self, other, sort=sort): + result = getattr(self._multiindex, op_name)(other._multiindex, sort=sort) + result_name = get_op_result_name(self, other) + + # GH 19101: ensure empty results have correct dtype + if result.empty: + result = result.values.astype(self.dtype.subtype) + else: + result = result.values + + return type(self).from_tuples(result, closed=self.closed, name=result_name) + + return func + + @property + def is_all_dates(self) -> bool: + """ + This is False even when left/right contain datetime-like objects, + as the check is done on the Interval itself + """ + return False + + union = _setop("union") + difference = _setop("difference") + symmetric_difference = _setop("symmetric_difference") + + # TODO: arithmetic operations + + # GH#30817 until IntervalArray implements inequalities, get them from Index + def __lt__(self, other): + return Index.__lt__(self, other) + + def __le__(self, other): + return Index.__le__(self, other) + + def __gt__(self, other): + return Index.__gt__(self, other) + + def __ge__(self, other): + return Index.__ge__(self, other) + + +IntervalIndex._add_logical_methods_disabled() + + +def _is_valid_endpoint(endpoint) -> bool: + """ + Helper for interval_range to check if start/end are valid types. + """ + return any( + [ + is_number(endpoint), + isinstance(endpoint, Timestamp), + isinstance(endpoint, Timedelta), + endpoint is None, + ] + ) + + +def _is_type_compatible(a, b) -> bool: + """ + Helper for interval_range to check type compat of start/end/freq. + """ + is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset)) + is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset)) + return ( + (is_number(a) and is_number(b)) + or (is_ts_compat(a) and is_ts_compat(b)) + or (is_td_compat(a) and is_td_compat(b)) + or com.any_none(a, b) + ) + + +def interval_range( + start=None, end=None, periods=None, freq=None, name=None, closed="right" +): + """ + Return a fixed frequency IntervalIndex. + + Parameters + ---------- + start : numeric or datetime-like, default None + Left bound for generating intervals. + end : numeric or datetime-like, default None + Right bound for generating intervals. + periods : int, default None + Number of periods to generate. + freq : numeric, str, or DateOffset, default None + The length of each interval. Must be consistent with the type of start + and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 + for numeric and 'D' for datetime-like. + name : str, default None + Name of the resulting IntervalIndex. + closed : {'left', 'right', 'both', 'neither'}, default 'right' + Whether the intervals are closed on the left-side, right-side, both + or neither. + + Returns + ------- + IntervalIndex + + See Also + -------- + IntervalIndex : An Index of intervals that are all closed on the same side. + + Notes + ----- + Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. If ``freq`` is omitted, the resulting + ``IntervalIndex`` will have ``periods`` linearly spaced elements between + ``start`` and ``end``, inclusively. + + To learn more about datetime-like frequency strings, please see `this link + `__. + + Examples + -------- + Numeric ``start`` and ``end`` is supported. + + >>> pd.interval_range(start=0, end=5) + IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], + closed='right', dtype='interval[int64]') + + Additionally, datetime-like input is also supported. + + >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), + ... end=pd.Timestamp('2017-01-04')) + IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], + (2017-01-03, 2017-01-04]], + closed='right', dtype='interval[datetime64[ns]]') + + The ``freq`` parameter specifies the frequency between the left and right. + endpoints of the individual intervals within the ``IntervalIndex``. For + numeric ``start`` and ``end``, the frequency must also be numeric. + + >>> pd.interval_range(start=0, periods=4, freq=1.5) + IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], + closed='right', dtype='interval[float64]') + + Similarly, for datetime-like ``start`` and ``end``, the frequency must be + convertible to a DateOffset. + + >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), + ... periods=3, freq='MS') + IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01], + (2017-03-01, 2017-04-01]], + closed='right', dtype='interval[datetime64[ns]]') + + Specify ``start``, ``end``, and ``periods``; the frequency is generated + automatically (linearly spaced). + + >>> pd.interval_range(start=0, end=6, periods=4) + IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], + closed='right', + dtype='interval[float64]') + + The ``closed`` parameter specifies which endpoints of the individual + intervals within the ``IntervalIndex`` are closed. + + >>> pd.interval_range(end=5, periods=4, closed='both') + IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], + closed='both', dtype='interval[int64]') + """ + start = com.maybe_box_datetimelike(start) + end = com.maybe_box_datetimelike(end) + endpoint = start if start is not None else end + + if freq is None and com.any_none(periods, start, end): + freq = 1 if is_number(endpoint) else "D" + + if com.count_not_none(start, end, periods, freq) != 3: + raise ValueError( + "Of the four parameters: start, end, periods, and " + "freq, exactly three must be specified" + ) + + if not _is_valid_endpoint(start): + raise ValueError(f"start must be numeric or datetime-like, got {start}") + elif not _is_valid_endpoint(end): + raise ValueError(f"end must be numeric or datetime-like, got {end}") + + if is_float(periods): + periods = int(periods) + elif not is_integer(periods) and periods is not None: + raise TypeError(f"periods must be a number, got {periods}") + + if freq is not None and not is_number(freq): + try: + freq = to_offset(freq) + except ValueError: + raise ValueError( + f"freq must be numeric or convertible to DateOffset, got {freq}" + ) + + # verify type compatibility + if not all( + [ + _is_type_compatible(start, end), + _is_type_compatible(start, freq), + _is_type_compatible(end, freq), + ] + ): + raise TypeError("start, end, freq need to be type compatible") + + # +1 to convert interval count to breaks count (n breaks = n-1 intervals) + if periods is not None: + periods += 1 + + if is_number(endpoint): + # force consistency between start/end/freq (lower end if freq skips it) + if com.all_not_none(start, end, freq): + end -= (end - start) % freq + + # compute the period/start/end if unspecified (at most one) + if periods is None: + periods = int((end - start) // freq) + 1 + elif start is None: + start = end - (periods - 1) * freq + elif end is None: + end = start + (periods - 1) * freq + + breaks = np.linspace(start, end, periods) + if all(is_integer(x) for x in com.not_none(start, end, freq)): + # np.linspace always produces float output + breaks = maybe_downcast_to_dtype(breaks, "int64") + else: + # delegate to the appropriate range function + if isinstance(endpoint, Timestamp): + range_func = date_range + else: + range_func = timedelta_range + + breaks = range_func(start=start, end=end, periods=periods, freq=freq) + + return IntervalIndex.from_breaks(breaks, name=name, closed=closed) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py new file mode 100644 index 00000000..0c57b27f --- /dev/null +++ b/pandas/core/indexes/multi.py @@ -0,0 +1,3519 @@ +import datetime +from sys import getsizeof +from typing import Hashable, List, Optional, Sequence, Union +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import Timestamp, algos as libalgos, index as libindex, lib, tslibs +from pandas._libs.hashtable import duplicated_int64 +from pandas.compat.numpy import function as nv +from pandas.errors import PerformanceWarning, UnsortedIndexError +from pandas.util._decorators import Appender, cache_readonly + +from pandas.core.dtypes.cast import coerce_indexer_dtype +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_platform_int, + is_categorical_dtype, + is_hashable, + is_integer, + is_iterator, + is_list_like, + is_object_dtype, + is_scalar, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.generic import ABCDataFrame +from pandas.core.dtypes.missing import array_equivalent, isna + +import pandas.core.algorithms as algos +from pandas.core.arrays import Categorical +from pandas.core.arrays.categorical import factorize_from_iterables +import pandas.core.common as com +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import ( + Index, + InvalidIndexError, + _index_shared_docs, + ensure_index, +) +from pandas.core.indexes.frozen import FrozenList +import pandas.core.missing as missing +from pandas.core.sorting import ( + get_group_index, + indexer_from_factorized, + lexsort_indexer, +) +from pandas.core.util.hashing import hash_tuple, hash_tuples + +from pandas.io.formats.printing import ( + format_object_attrs, + format_object_summary, + pprint_thing, +) + +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update( + dict(klass="MultiIndex", target_klass="MultiIndex or list of tuples") +) + + +class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine): + """ + This class manages a MultiIndex by mapping label combinations to positive + integers. + """ + + _base = libindex.UInt64Engine + + def _codes_to_ints(self, codes): + """ + Transform combination(s) of uint64 in one uint64 (each), in a strictly + monotonic way (i.e. respecting the lexicographic order of integer + combinations): see BaseMultiIndexCodesEngine documentation. + + Parameters + ---------- + codes : 1- or 2-dimensional array of dtype uint64 + Combinations of integers (one per row) + + Returns + ------- + scalar or 1-dimensional array, of dtype uint64 + Integer(s) representing one combination (each). + """ + # Shift the representation of each level by the pre-calculated number + # of bits: + codes <<= self.offsets + + # Now sum and OR are in fact interchangeable. This is a simple + # composition of the (disjunct) significant bits of each level (i.e. + # each column in "codes") in a single positive integer: + if codes.ndim == 1: + # Single key + return np.bitwise_or.reduce(codes) + + # Multiple keys + return np.bitwise_or.reduce(codes, axis=1) + + +class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine): + """ + This class manages those (extreme) cases in which the number of possible + label combinations overflows the 64 bits integers, and uses an ObjectEngine + containing Python integers. + """ + + _base = libindex.ObjectEngine + + def _codes_to_ints(self, codes): + """ + Transform combination(s) of uint64 in one Python integer (each), in a + strictly monotonic way (i.e. respecting the lexicographic order of + integer combinations): see BaseMultiIndexCodesEngine documentation. + + Parameters + ---------- + codes : 1- or 2-dimensional array of dtype uint64 + Combinations of integers (one per row) + + Returns + ------- + int, or 1-dimensional array of dtype object + Integer(s) representing one combination (each). + """ + + # Shift the representation of each level by the pre-calculated number + # of bits. Since this can overflow uint64, first make sure we are + # working with Python integers: + codes = codes.astype("object") << self.offsets + + # Now sum and OR are in fact interchangeable. This is a simple + # composition of the (disjunct) significant bits of each level (i.e. + # each column in "codes") in a single positive integer (per row): + if codes.ndim == 1: + # Single key + return np.bitwise_or.reduce(codes) + + # Multiple keys + return np.bitwise_or.reduce(codes, axis=1) + + +class MultiIndex(Index): + """ + A multi-level, or hierarchical, index object for pandas objects. + + Parameters + ---------- + levels : sequence of arrays + The unique labels for each level. + codes : sequence of arrays + Integers for each level designating which label at each location. + + .. versionadded:: 0.24.0 + sortorder : optional int + Level of sortedness (must be lexicographically sorted by that + level). + names : optional sequence of objects + Names for each of the index levels. (name is accepted for compat). + copy : bool, default False + Copy the meta-data. + verify_integrity : bool, default True + Check that the levels/codes are consistent and valid. + + Attributes + ---------- + names + levels + codes + nlevels + levshape + + Methods + ------- + from_arrays + from_tuples + from_product + from_frame + set_levels + set_codes + to_frame + to_flat_index + is_lexsorted + sortlevel + droplevel + swaplevel + reorder_levels + remove_unused_levels + get_locs + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_product : Create a MultiIndex from the cartesian product + of iterables. + MultiIndex.from_tuples : Convert list of tuples to a MultiIndex. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + Index : The base pandas Index type. + + Notes + ----- + See the `user guide + `_ + for more. + + Examples + -------- + A new ``MultiIndex`` is typically constructed using one of the helper + methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product` + and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``): + + >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] + >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], + names=['number', 'color']) + + See further examples for how to construct a MultiIndex in the doc strings + of the mentioned helper methods. + """ + + _deprecations = Index._deprecations | frozenset() + + # initialize to zero-length tuples to make everything work + _typ = "multiindex" + _names = FrozenList() + _levels = FrozenList() + _codes = FrozenList() + _comparables = ["names"] + rename = Index.set_names + + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + levels=None, + codes=None, + sortorder=None, + names=None, + dtype=None, + copy=False, + name=None, + verify_integrity: bool = True, + _set_identity: bool = True, + ): + + # compat with Index + if name is not None: + names = name + if levels is None or codes is None: + raise TypeError("Must pass both levels and codes") + if len(levels) != len(codes): + raise ValueError("Length of levels and codes must be the same.") + if len(levels) == 0: + raise ValueError("Must pass non-zero number of levels/codes") + + result = object.__new__(MultiIndex) + + # we've already validated levels and codes, so shortcut here + result._set_levels(levels, copy=copy, validate=False) + result._set_codes(codes, copy=copy, validate=False) + + result._names = [None] * len(levels) + if names is not None: + # handles name validation + result._set_names(names) + + if sortorder is not None: + result.sortorder = int(sortorder) + else: + result.sortorder = sortorder + + if verify_integrity: + new_codes = result._verify_integrity() + result._codes = new_codes + + if _set_identity: + result._reset_identity() + + return result + + def _validate_codes(self, level: List, code: List): + """ + Reassign code values as -1 if their corresponding levels are NaN. + + Parameters + ---------- + code : list + Code to reassign. + level : list + Level to check for missing values (NaN, NaT, None). + + Returns + ------- + new code where code value = -1 if it corresponds + to a level with missing values (NaN, NaT, None). + """ + null_mask = isna(level) + if np.any(null_mask): + code = np.where(null_mask[code], -1, code) + return code + + def _verify_integrity( + self, codes: Optional[List] = None, levels: Optional[List] = None + ): + """ + Parameters + ---------- + codes : optional list + Codes to check for validity. Defaults to current codes. + levels : optional list + Levels to check for validity. Defaults to current levels. + + Raises + ------ + ValueError + If length of levels and codes don't match, if the codes for any + level would exceed level bounds, or there are any duplicate levels. + + Returns + ------- + new codes where code value = -1 if it corresponds to a + NaN level. + """ + # NOTE: Currently does not check, among other things, that cached + # nlevels matches nor that sortorder matches actually sortorder. + codes = codes or self.codes + levels = levels or self.levels + + if len(levels) != len(codes): + raise ValueError( + "Length of levels and codes must match. NOTE: " + "this index is in an inconsistent state." + ) + codes_length = len(codes[0]) + for i, (level, level_codes) in enumerate(zip(levels, codes)): + if len(level_codes) != codes_length: + raise ValueError( + f"Unequal code lengths: {[len(code_) for code_ in codes]}" + ) + if len(level_codes) and level_codes.max() >= len(level): + raise ValueError( + f"On level {i}, code max ({level_codes.max()}) >= length of " + f"level ({len(level)}). NOTE: this index is in an " + "inconsistent state" + ) + if len(level_codes) and level_codes.min() < -1: + raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1") + if not level.is_unique: + raise ValueError( + f"Level values must be unique: {list(level)} on level {i}" + ) + if self.sortorder is not None: + if self.sortorder > self._lexsort_depth(): + raise ValueError( + "Value for sortorder must be inferior or equal to actual " + f"lexsort_depth: sortorder {self.sortorder} " + f"with lexsort_depth {self._lexsort_depth()}" + ) + + codes = [ + self._validate_codes(level, code) for level, code in zip(levels, codes) + ] + new_codes = FrozenList(codes) + return new_codes + + @classmethod + def from_arrays(cls, arrays, sortorder=None, names=lib.no_default): + """ + Convert arrays to MultiIndex. + + Parameters + ---------- + arrays : list / sequence of array-likes + Each array-like gives one level's value for each data point. + len(arrays) is the number of levels. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). + names : list / sequence of str, optional + Names for the levels in the index. + + Returns + ------- + MultiIndex + + See Also + -------- + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + + Examples + -------- + >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] + >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], + names=['number', 'color']) + """ + error_msg = "Input must be a list / sequence of array-likes." + if not is_list_like(arrays): + raise TypeError(error_msg) + elif is_iterator(arrays): + arrays = list(arrays) + + # Check if elements of array are list-like + for array in arrays: + if not is_list_like(array): + raise TypeError(error_msg) + + # Check if lengths of all arrays are equal or not, + # raise ValueError, if not + for i in range(1, len(arrays)): + if len(arrays[i]) != len(arrays[i - 1]): + raise ValueError("all arrays must be same length") + + codes, levels = factorize_from_iterables(arrays) + if names is lib.no_default: + names = [getattr(arr, "name", None) for arr in arrays] + + return MultiIndex( + levels=levels, + codes=codes, + sortorder=sortorder, + names=names, + verify_integrity=False, + ) + + @classmethod + def from_tuples(cls, tuples, sortorder=None, names=None): + """ + Convert list of tuples to MultiIndex. + + Parameters + ---------- + tuples : list / sequence of tuple-likes + Each tuple is the index of one row/column. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). + names : list / sequence of str, optional + Names for the levels in the index. + + Returns + ------- + MultiIndex + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + + Examples + -------- + >>> tuples = [(1, 'red'), (1, 'blue'), + ... (2, 'red'), (2, 'blue')] + >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color')) + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], + names=['number', 'color']) + """ + if not is_list_like(tuples): + raise TypeError("Input must be a list / sequence of tuple-likes.") + elif is_iterator(tuples): + tuples = list(tuples) + + if len(tuples) == 0: + if names is None: + raise TypeError("Cannot infer number of levels from empty list") + arrays = [[]] * len(names) + elif isinstance(tuples, (np.ndarray, Index)): + if isinstance(tuples, Index): + tuples = tuples._values + + arrays = list(lib.tuples_to_object_array(tuples).T) + elif isinstance(tuples, list): + arrays = list(lib.to_object_array_tuples(tuples).T) + else: + arrays = zip(*tuples) + + return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names) + + @classmethod + def from_product(cls, iterables, sortorder=None, names=lib.no_default): + """ + Make a MultiIndex from the cartesian product of multiple iterables. + + Parameters + ---------- + iterables : list / sequence of iterables + Each iterable has unique labels for each level of the index. + sortorder : int or None + Level of sortedness (must be lexicographically sorted by that + level). + names : list / sequence of str, optional + Names for the levels in the index. + + .. versionchanged:: 1.0.0 + + If not explicitly provided, names will be inferred from the + elements of iterables if an element has a name attribute + + Returns + ------- + MultiIndex + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + + Examples + -------- + >>> numbers = [0, 1, 2] + >>> colors = ['green', 'purple'] + >>> pd.MultiIndex.from_product([numbers, colors], + ... names=['number', 'color']) + MultiIndex([(0, 'green'), + (0, 'purple'), + (1, 'green'), + (1, 'purple'), + (2, 'green'), + (2, 'purple')], + names=['number', 'color']) + """ + from pandas.core.reshape.util import cartesian_product + + if not is_list_like(iterables): + raise TypeError("Input must be a list / sequence of iterables.") + elif is_iterator(iterables): + iterables = list(iterables) + + codes, levels = factorize_from_iterables(iterables) + if names is lib.no_default: + names = [getattr(it, "name", None) for it in iterables] + + codes = cartesian_product(codes) + return MultiIndex(levels, codes, sortorder=sortorder, names=names) + + @classmethod + def from_frame(cls, df, sortorder=None, names=None): + """ + Make a MultiIndex from a DataFrame. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + df : DataFrame + DataFrame to be converted to MultiIndex. + sortorder : int, optional + Level of sortedness (must be lexicographically sorted by that + level). + names : list-like, optional + If no names are provided, use the column names, or tuple of column + names if the columns is a MultiIndex. If a sequence, overwrite + names with the given sequence. + + Returns + ------- + MultiIndex + The MultiIndex representation of the given DataFrame. + + See Also + -------- + MultiIndex.from_arrays : Convert list of arrays to MultiIndex. + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables. + + Examples + -------- + >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'], + ... ['NJ', 'Temp'], ['NJ', 'Precip']], + ... columns=['a', 'b']) + >>> df + a b + 0 HI Temp + 1 HI Precip + 2 NJ Temp + 3 NJ Precip + + >>> pd.MultiIndex.from_frame(df) + MultiIndex([('HI', 'Temp'), + ('HI', 'Precip'), + ('NJ', 'Temp'), + ('NJ', 'Precip')], + names=['a', 'b']) + + Using explicit names, instead of the column names + + >>> pd.MultiIndex.from_frame(df, names=['state', 'observation']) + MultiIndex([('HI', 'Temp'), + ('HI', 'Precip'), + ('NJ', 'Temp'), + ('NJ', 'Precip')], + names=['state', 'observation']) + """ + if not isinstance(df, ABCDataFrame): + raise TypeError("Input must be a DataFrame") + + column_names, columns = zip(*df.items()) + names = column_names if names is None else names + return cls.from_arrays(columns, sortorder=sortorder, names=names) + + # -------------------------------------------------------------------- + + @property + def _values(self): + # We override here, since our parent uses _data, which we don't use. + return self.values + + @property + def shape(self): + """ + Return a tuple of the shape of the underlying data. + """ + # overriding the base Index.shape definition to avoid materializing + # the values (GH-27384, GH-27775) + return (len(self),) + + @property + def array(self): + """ + Raises a ValueError for `MultiIndex` because there's no single + array backing a MultiIndex. + + Raises + ------ + ValueError + """ + raise ValueError( + "MultiIndex has no single backing array. Use " + "'MultiIndex.to_numpy()' to get a NumPy array of tuples." + ) + + # -------------------------------------------------------------------- + # Levels Methods + + @cache_readonly + def levels(self): + # Use cache_readonly to ensure that self.get_locs doesn't repeatedly + # create new IndexEngine + # https://github.com/pandas-dev/pandas/issues/31648 + result = [ + x._shallow_copy(name=name) for x, name in zip(self._levels, self._names) + ] + for level in result: + # disallow midx.levels[0].name = "foo" + level._no_setting_name = True + return FrozenList(result) + + def _set_levels( + self, levels, level=None, copy=False, validate=True, verify_integrity=False + ): + # This is NOT part of the levels property because it should be + # externally not allowed to set levels. User beware if you change + # _levels directly + if validate: + if len(levels) == 0: + raise ValueError("Must set non-zero number of levels.") + if level is None and len(levels) != self.nlevels: + raise ValueError("Length of levels must match number of levels.") + if level is not None and len(levels) != len(level): + raise ValueError("Length of levels must match length of level.") + + if level is None: + new_levels = FrozenList( + ensure_index(lev, copy=copy)._shallow_copy() for lev in levels + ) + else: + level_numbers = [self._get_level_number(lev) for lev in level] + new_levels = list(self._levels) + for lev_num, lev in zip(level_numbers, levels): + new_levels[lev_num] = ensure_index(lev, copy=copy)._shallow_copy() + new_levels = FrozenList(new_levels) + + if verify_integrity: + new_codes = self._verify_integrity(levels=new_levels) + self._codes = new_codes + + names = self.names + self._levels = new_levels + if any(names): + self._set_names(names) + + self._tuples = None + self._reset_cache() + + def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): + """ + Set new levels on MultiIndex. Defaults to returning new index. + + Parameters + ---------- + levels : sequence or list of sequence + New level(s) to apply. + level : int, level name, or sequence of int/level names (default None) + Level(s) to set (None for all levels). + inplace : bool + If True, mutates in place. + verify_integrity : bool, default True + If True, checks that levels and codes are compatible. + + Returns + ------- + new index (of same type and class...etc) + + Examples + -------- + >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), + (2, 'one'), (2, 'two'), + (3, 'one'), (3, 'two')], + names=['foo', 'bar']) + >>> idx.set_levels([['a', 'b', 'c'], [1, 2]]) + MultiIndex([('a', 1), + ('a', 2), + ('b', 1), + ('b', 2), + ('c', 1), + ('c', 2)], + names=['foo', 'bar']) + >>> idx.set_levels(['a', 'b', 'c'], level=0) + MultiIndex([('a', 'one'), + ('a', 'two'), + ('b', 'one'), + ('b', 'two'), + ('c', 'one'), + ('c', 'two')], + names=['foo', 'bar']) + >>> idx.set_levels(['a', 'b'], level='bar') + MultiIndex([(1, 'a'), + (1, 'b'), + (2, 'a'), + (2, 'b'), + (3, 'a'), + (3, 'b')], + names=['foo', 'bar']) + + If any of the levels passed to ``set_levels()`` exceeds the + existing length, all of the values from that argument will + be stored in the MultiIndex levels, though the values will + be truncated in the MultiIndex output. + + >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]) + MultiIndex([('a', 1), + ('a', 2), + ('b', 1), + ('b', 2)], + names=['foo', 'bar']) + >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels + FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]]) + """ + if is_list_like(levels) and not isinstance(levels, Index): + levels = list(levels) + + if level is not None and not is_list_like(level): + if not is_list_like(levels): + raise TypeError("Levels must be list-like") + if is_list_like(levels[0]): + raise TypeError("Levels must be list-like") + level = [level] + levels = [levels] + elif level is None or is_list_like(level): + if not is_list_like(levels) or not is_list_like(levels[0]): + raise TypeError("Levels must be list of lists-like") + + if inplace: + idx = self + else: + idx = self._shallow_copy() + idx._reset_identity() + idx._set_levels( + levels, level=level, validate=True, verify_integrity=verify_integrity + ) + if not inplace: + return idx + + @property + def codes(self): + return self._codes + + def _set_codes( + self, codes, level=None, copy=False, validate=True, verify_integrity=False + ): + if validate: + if level is None and len(codes) != self.nlevels: + raise ValueError("Length of codes must match number of levels") + if level is not None and len(codes) != len(level): + raise ValueError("Length of codes must match length of levels.") + + if level is None: + new_codes = FrozenList( + _coerce_indexer_frozen(level_codes, lev, copy=copy).view() + for lev, level_codes in zip(self._levels, codes) + ) + else: + level_numbers = [self._get_level_number(lev) for lev in level] + new_codes = list(self._codes) + for lev_num, level_codes in zip(level_numbers, codes): + lev = self.levels[lev_num] + new_codes[lev_num] = _coerce_indexer_frozen(level_codes, lev, copy=copy) + new_codes = FrozenList(new_codes) + + if verify_integrity: + new_codes = self._verify_integrity(codes=new_codes) + + self._codes = new_codes + + self._tuples = None + self._reset_cache() + + def set_codes(self, codes, level=None, inplace=False, verify_integrity=True): + """ + Set new codes on MultiIndex. Defaults to returning + new index. + + .. versionadded:: 0.24.0 + + New name for deprecated method `set_labels`. + + Parameters + ---------- + codes : sequence or list of sequence + New codes to apply. + level : int, level name, or sequence of int/level names (default None) + Level(s) to set (None for all levels). + inplace : bool + If True, mutates in place. + verify_integrity : bool (default True) + If True, checks that levels and codes are compatible. + + Returns + ------- + new index (of same type and class...etc) + + Examples + -------- + >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), + (1, 'two'), + (2, 'one'), + (2, 'two')], + names=['foo', 'bar']) + >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) + MultiIndex([(2, 'one'), + (1, 'one'), + (2, 'two'), + (1, 'two')], + names=['foo', 'bar']) + >>> idx.set_codes([1, 0, 1, 0], level=0) + MultiIndex([(2, 'one'), + (1, 'two'), + (2, 'one'), + (1, 'two')], + names=['foo', 'bar']) + >>> idx.set_codes([0, 0, 1, 1], level='bar') + MultiIndex([(1, 'one'), + (1, 'one'), + (2, 'two'), + (2, 'two')], + names=['foo', 'bar']) + >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) + MultiIndex([(2, 'one'), + (1, 'one'), + (2, 'two'), + (1, 'two')], + names=['foo', 'bar']) + """ + if level is not None and not is_list_like(level): + if not is_list_like(codes): + raise TypeError("Codes must be list-like") + if is_list_like(codes[0]): + raise TypeError("Codes must be list-like") + level = [level] + codes = [codes] + elif level is None or is_list_like(level): + if not is_list_like(codes) or not is_list_like(codes[0]): + raise TypeError("Codes must be list of lists-like") + + if inplace: + idx = self + else: + idx = self._shallow_copy() + idx._reset_identity() + idx._set_codes(codes, level=level, verify_integrity=verify_integrity) + if not inplace: + return idx + + def copy( + self, + names=None, + dtype=None, + levels=None, + codes=None, + deep=False, + _set_identity=False, + **kwargs, + ): + """ + Make a copy of this object. Names, dtype, levels and codes can be + passed and will be set on new copy. + + Parameters + ---------- + names : sequence, optional + dtype : numpy dtype or pandas type, optional + levels : sequence, optional + codes : sequence, optional + + Returns + ------- + copy : MultiIndex + + Notes + ----- + In most cases, there should be no functional difference from using + ``deep``, but if ``deep`` is passed it will attempt to deepcopy. + This could be potentially expensive on large MultiIndex objects. + """ + name = kwargs.get("name") + names = self._validate_names(name=name, names=names, deep=deep) + if "labels" in kwargs: + raise TypeError("'labels' argument has been removed; use 'codes' instead") + if deep: + from copy import deepcopy + + if levels is None: + levels = deepcopy(self.levels) + if codes is None: + codes = deepcopy(self.codes) + else: + if levels is None: + levels = self.levels + if codes is None: + codes = self.codes + return MultiIndex( + levels=levels, + codes=codes, + names=names, + sortorder=self.sortorder, + verify_integrity=False, + _set_identity=_set_identity, + ) + + def __array__(self, dtype=None) -> np.ndarray: + """ the array interface, return my values """ + return self.values + + def view(self, cls=None): + """ this is defined as a copy with the same identity """ + result = self.copy() + result._id = self._id + return result + + def _shallow_copy_with_infer(self, values, **kwargs): + # On equal MultiIndexes the difference is empty. + # Therefore, an empty MultiIndex is returned GH13490 + if len(values) == 0: + return MultiIndex( + levels=[[] for _ in range(self.nlevels)], + codes=[[] for _ in range(self.nlevels)], + **kwargs, + ) + return self._shallow_copy(values, **kwargs) + + @Appender(_index_shared_docs["contains"] % _index_doc_kwargs) + def __contains__(self, key) -> bool: + hash(key) + try: + self.get_loc(key) + return True + except (LookupError, TypeError, ValueError): + return False + + @Appender(_index_shared_docs["_shallow_copy"]) + def _shallow_copy(self, values=None, **kwargs): + if values is not None: + names = kwargs.pop("names", kwargs.pop("name", self.names)) + # discards freq + kwargs.pop("freq", None) + return MultiIndex.from_tuples(values, names=names, **kwargs) + return self.copy(**kwargs) + + @cache_readonly + def dtype(self) -> np.dtype: + return np.dtype("O") + + def _is_memory_usage_qualified(self) -> bool: + """ return a boolean if we need a qualified .info display """ + + def f(l): + return "mixed" in l or "string" in l or "unicode" in l + + return any(f(l) for l in self._inferred_type_levels) + + @Appender(Index.memory_usage.__doc__) + def memory_usage(self, deep: bool = False) -> int: + # we are overwriting our base class to avoid + # computing .values here which could materialize + # a tuple representation unnecessarily + return self._nbytes(deep) + + @cache_readonly + def nbytes(self) -> int: + """ return the number of bytes in the underlying data """ + return self._nbytes(False) + + def _nbytes(self, deep: bool = False) -> int: + """ + return the number of bytes in the underlying data + deeply introspect the level data if deep=True + + include the engine hashtable + + *this is in internal routine* + + """ + + # for implementations with no useful getsizeof (PyPy) + objsize = 24 + + level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels) + label_nbytes = sum(i.nbytes for i in self.codes) + names_nbytes = sum(getsizeof(i, objsize) for i in self.names) + result = level_nbytes + label_nbytes + names_nbytes + + # include our engine hashtable + result += self._engine.sizeof(deep=deep) + return result + + # -------------------------------------------------------------------- + # Rendering Methods + def _formatter_func(self, tup): + """ + Formats each item in tup according to its level's formatter function. + """ + formatter_funcs = [level._formatter_func for level in self.levels] + return tuple(func(val) for func, val in zip(formatter_funcs, tup)) + + def _format_data(self, name=None): + """ + Return the formatted data as a unicode string + """ + return format_object_summary( + self, self._formatter_func, name=name, line_break_each_value=True + ) + + def _format_attrs(self): + """ + Return a list of tuples of the (attr,formatted_value). + """ + return format_object_attrs(self, include_dtype=False) + + def _format_native_types(self, na_rep="nan", **kwargs): + new_levels = [] + new_codes = [] + + # go through the levels and format them + for level, level_codes in zip(self.levels, self.codes): + level = level._format_native_types(na_rep=na_rep, **kwargs) + # add nan values, if there are any + mask = level_codes == -1 + if mask.any(): + nan_index = len(level) + level = np.append(level, na_rep) + assert not level_codes.flags.writeable # i.e. copy is needed + level_codes = level_codes.copy() # make writeable + level_codes[mask] = nan_index + new_levels.append(level) + new_codes.append(level_codes) + + if len(new_levels) == 1: + # a single-level multi-index + return Index(new_levels[0].take(new_codes[0]))._format_native_types() + else: + # reconstruct the multi-index + mi = MultiIndex( + levels=new_levels, + codes=new_codes, + names=self.names, + sortorder=self.sortorder, + verify_integrity=False, + ) + return mi.values + + def format( + self, + space=2, + sparsify=None, + adjoin=True, + names=False, + na_rep=None, + formatter=None, + ): + if len(self) == 0: + return [] + + stringified_levels = [] + for lev, level_codes in zip(self.levels, self.codes): + na = na_rep if na_rep is not None else _get_na_rep(lev.dtype.type) + + if len(lev) > 0: + + formatted = lev.take(level_codes).format(formatter=formatter) + + # we have some NA + mask = level_codes == -1 + if mask.any(): + formatted = np.array(formatted, dtype=object) + formatted[mask] = na + formatted = formatted.tolist() + + else: + # weird all NA case + formatted = [ + pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n")) + for x in algos.take_1d(lev._values, level_codes) + ] + stringified_levels.append(formatted) + + result_levels = [] + for lev, name in zip(stringified_levels, self.names): + level = [] + + if names: + level.append( + pprint_thing(name, escape_chars=("\t", "\r", "\n")) + if name is not None + else "" + ) + + level.extend(np.array(lev, dtype=object)) + result_levels.append(level) + + if sparsify is None: + sparsify = get_option("display.multi_sparse") + + if sparsify: + sentinel = "" + # GH3547 + # use value of sparsify as sentinel, unless it's an obvious + # "Truthy" value + if sparsify not in [True, 1]: + sentinel = sparsify + # little bit of a kludge job for #1217 + result_levels = _sparsify( + result_levels, start=int(names), sentinel=sentinel + ) + + if adjoin: + from pandas.io.formats.format import _get_adjustment + + adj = _get_adjustment() + return adj.adjoin(space, *result_levels).split("\n") + else: + return result_levels + + # -------------------------------------------------------------------- + + def __len__(self) -> int: + return len(self.codes[0]) + + def _get_names(self): + return FrozenList(self._names) + + def _set_names(self, names, level=None, validate=True): + """ + Set new names on index. Each name has to be a hashable type. + + Parameters + ---------- + values : str or sequence + name(s) to set + level : int, level name, or sequence of int/level names (default None) + If the index is a MultiIndex (hierarchical), level(s) to set (None + for all levels). Otherwise level must be None + validate : boolean, default True + validate that the names match level lengths + + Raises + ------ + TypeError if each name is not hashable. + + Notes + ----- + sets names on levels. WARNING: mutates! + + Note that you generally want to set this *after* changing levels, so + that it only acts on copies + """ + # GH 15110 + # Don't allow a single string for names in a MultiIndex + if names is not None and not is_list_like(names): + raise ValueError("Names should be list-like for a MultiIndex") + names = list(names) + + if validate: + if level is not None and len(names) != len(level): + raise ValueError("Length of names must match length of level.") + if level is None and len(names) != self.nlevels: + raise ValueError( + "Length of names must match number of levels in MultiIndex." + ) + + if level is None: + level = range(self.nlevels) + else: + level = [self._get_level_number(lev) for lev in level] + + # set the name + for lev, name in zip(level, names): + if name is not None: + # GH 20527 + # All items in 'names' need to be hashable: + if not is_hashable(name): + raise TypeError( + f"{type(self).__name__}.name must be a hashable type" + ) + self._names[lev] = name + + # If .levels has been accessed, the names in our cache will be stale. + self._reset_cache() + + names = property( + fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex.\n""" + ) + + @Appender(_index_shared_docs["_get_grouper_for_level"]) + def _get_grouper_for_level(self, mapper, level): + indexer = self.codes[level] + level_index = self.levels[level] + + if mapper is not None: + # Handle group mapping function and return + level_values = self.levels[level].take(indexer) + grouper = level_values.map(mapper) + return grouper, None, None + + codes, uniques = algos.factorize(indexer, sort=True) + + if len(uniques) > 0 and uniques[0] == -1: + # Handle NAs + mask = indexer != -1 + ok_codes, uniques = algos.factorize(indexer[mask], sort=True) + + codes = np.empty(len(indexer), dtype=indexer.dtype) + codes[mask] = ok_codes + codes[~mask] = -1 + + if len(uniques) < len(level_index): + # Remove unobserved levels from level_index + level_index = level_index.take(uniques) + else: + # break references back to us so that setting the name + # on the output of a groupby doesn't reflect back here. + level_index = level_index.copy() + + if level_index._can_hold_na: + grouper = level_index.take(codes, fill_value=True) + else: + grouper = level_index.take(codes) + + return grouper, codes, level_index + + @property + def _constructor(self): + return MultiIndex.from_tuples + + @cache_readonly + def inferred_type(self) -> str: + return "mixed" + + def _get_level_number(self, level) -> int: + count = self.names.count(level) + if (count > 1) and not is_integer(level): + raise ValueError( + f"The name {level} occurs multiple times, use a level number" + ) + try: + level = self.names.index(level) + except ValueError: + if not is_integer(level): + raise KeyError(f"Level {level} not found") + elif level < 0: + level += self.nlevels + if level < 0: + orig_level = level - self.nlevels + raise IndexError( + f"Too many levels: Index has only {self.nlevels} levels," + f" {orig_level} is not a valid level number" + ) + # Note: levels are zero-based + elif level >= self.nlevels: + raise IndexError( + f"Too many levels: Index has only {self.nlevels} levels, " + f"not {level + 1}" + ) + return level + + _tuples = None + + @cache_readonly + def _engine(self): + # Calculate the number of bits needed to represent labels in each + # level, as log2 of their sizes (including -1 for NaN): + sizes = np.ceil(np.log2([len(l) + 1 for l in self.levels])) + + # Sum bit counts, starting from the _right_.... + lev_bits = np.cumsum(sizes[::-1])[::-1] + + # ... in order to obtain offsets such that sorting the combination of + # shifted codes (one for each level, resulting in a unique integer) is + # equivalent to sorting lexicographically the codes themselves. Notice + # that each level needs to be shifted by the number of bits needed to + # represent the _previous_ ones: + offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64") + + # Check the total number of bits needed for our representation: + if lev_bits[0] > 64: + # The levels would overflow a 64 bit uint - use Python integers: + return MultiIndexPyIntEngine(self.levels, self.codes, offsets) + return MultiIndexUIntEngine(self.levels, self.codes, offsets) + + @property + def values(self): + if self._tuples is not None: + return self._tuples + + values = [] + + for i in range(self.nlevels): + vals = self._get_level_values(i) + if is_categorical_dtype(vals): + vals = vals._internal_get_values() + if isinstance(vals.dtype, ExtensionDtype) or hasattr(vals, "_box_values"): + vals = vals.astype(object) + vals = np.array(vals, copy=False) + values.append(vals) + + self._tuples = lib.fast_zip(values) + return self._tuples + + @property + def _has_complex_internals(self): + # used to avoid libreduction code paths, which raise or require conversion + return True + + @cache_readonly + def is_monotonic_increasing(self) -> bool: + """ + return if the index is monotonic increasing (only equal or + increasing) values. + """ + + if all(x.is_monotonic for x in self.levels): + # If each level is sorted, we can operate on the codes directly. GH27495 + return libalgos.is_lexsorted( + [x.astype("int64", copy=False) for x in self.codes] + ) + + # reversed() because lexsort() wants the most significant key last. + values = [ + self._get_level_values(i).values for i in reversed(range(len(self.levels))) + ] + try: + sort_order = np.lexsort(values) + return Index(sort_order).is_monotonic + except TypeError: + + # we have mixed types and np.lexsort is not happy + return Index(self.values).is_monotonic + + @cache_readonly + def is_monotonic_decreasing(self) -> bool: + """ + return if the index is monotonic decreasing (only equal or + decreasing) values. + """ + # monotonic decreasing if and only if reverse is monotonic increasing + return self[::-1].is_monotonic_increasing + + @cache_readonly + def _have_mixed_levels(self): + """ return a boolean list indicated if we have mixed levels """ + return ["mixed" in l for l in self._inferred_type_levels] + + @cache_readonly + def _inferred_type_levels(self): + """ return a list of the inferred types, one for each level """ + return [i.inferred_type for i in self.levels] + + @cache_readonly + def _hashed_values(self): + """ return a uint64 ndarray of my hashed values """ + return hash_tuples(self) + + def _hashed_indexing_key(self, key): + """ + validate and return the hash for the provided key + + *this is internal for use for the cython routines* + + Parameters + ---------- + key : string or tuple + + Returns + ------- + np.uint64 + + Notes + ----- + we need to stringify if we have mixed levels + """ + + if not isinstance(key, tuple): + return hash_tuples(key) + + if not len(key) == self.nlevels: + raise KeyError + + def f(k, stringify): + if stringify and not isinstance(k, str): + k = str(k) + return k + + key = tuple( + f(k, stringify) for k, stringify in zip(key, self._have_mixed_levels) + ) + return hash_tuple(key) + + @Appender(Index.duplicated.__doc__) + def duplicated(self, keep="first"): + shape = map(len, self.levels) + ids = get_group_index(self.codes, shape, sort=False, xnull=False) + + return duplicated_int64(ids, keep) + + def fillna(self, value=None, downcast=None): + """ + fillna is not implemented for MultiIndex + """ + raise NotImplementedError("isna is not defined for MultiIndex") + + @Appender(_index_shared_docs["dropna"]) + def dropna(self, how="any"): + nans = [level_codes == -1 for level_codes in self.codes] + if how == "any": + indexer = np.any(nans, axis=0) + elif how == "all": + indexer = np.all(nans, axis=0) + else: + raise ValueError(f"invalid how option: {how}") + + new_codes = [level_codes[~indexer] for level_codes in self.codes] + return self.copy(codes=new_codes, deep=True) + + def get_value(self, series, key): + # Label-based + s = com.values_from_object(series) + k = com.values_from_object(key) + + def _try_mi(k): + # TODO: what if a level contains tuples?? + loc = self.get_loc(k) + new_values = series._values[loc] + new_index = self[loc] + new_index = maybe_droplevels(new_index, k) + return series._constructor( + new_values, index=new_index, name=series.name + ).__finalize__(self) + + try: + return self._engine.get_value(s, k) + except KeyError as e1: + try: + return _try_mi(key) + except KeyError: + pass + + try: + return libindex.get_value_at(s, k) + except IndexError: + raise + except TypeError: + # generator/iterator-like + if is_iterator(key): + raise InvalidIndexError(key) + else: + raise e1 + except Exception: # pragma: no cover + raise e1 + except TypeError: + + # a Timestamp will raise a TypeError in a multi-index + # rather than a KeyError, try it here + # note that a string that 'looks' like a Timestamp will raise + # a KeyError! (GH5725) + if isinstance(key, (datetime.datetime, np.datetime64, str)): + try: + return _try_mi(key) + except KeyError: + raise + except (IndexError, ValueError, TypeError): + pass + + try: + return _try_mi(Timestamp(key)) + except ( + KeyError, + TypeError, + IndexError, + ValueError, + tslibs.OutOfBoundsDatetime, + ): + pass + + raise InvalidIndexError(key) + + def _get_level_values(self, level, unique=False): + """ + Return vector of label values for requested level, + equal to the length of the index + + **this is an internal method** + + Parameters + ---------- + level : int level + unique : bool, default False + if True, drop duplicated values + + Returns + ------- + values : ndarray + """ + + lev = self.levels[level] + level_codes = self.codes[level] + name = self._names[level] + if unique: + level_codes = algos.unique(level_codes) + filled = algos.take_1d(lev._values, level_codes, fill_value=lev._na_value) + return lev._shallow_copy(filled, name=name) + + def get_level_values(self, level): + """ + Return vector of label values for requested level, + equal to the length of the index. + + Parameters + ---------- + level : int or str + ``level`` is either the integer position of the level in the + MultiIndex, or the name of the level. + + Returns + ------- + values : Index + Values is a level of this MultiIndex converted to + a single :class:`Index` (or subclass thereof). + + Examples + -------- + + Create a MultiIndex: + + >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) + >>> mi.names = ['level_1', 'level_2'] + + Get level values by supplying level as either integer or name: + + >>> mi.get_level_values(0) + Index(['a', 'b', 'c'], dtype='object', name='level_1') + >>> mi.get_level_values('level_2') + Index(['d', 'e', 'f'], dtype='object', name='level_2') + """ + level = self._get_level_number(level) + values = self._get_level_values(level) + return values + + @Appender(_index_shared_docs["index_unique"] % _index_doc_kwargs) + def unique(self, level=None): + + if level is None: + return super().unique() + else: + level = self._get_level_number(level) + return self._get_level_values(level=level, unique=True) + + def _to_safe_for_reshape(self): + """ convert to object if we are a categorical """ + return self.set_levels([i._to_safe_for_reshape() for i in self.levels]) + + def to_frame(self, index=True, name=None): + """ + Create a DataFrame with the levels of the MultiIndex as columns. + + Column ordering is determined by the DataFrame constructor with data as + a dict. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + index : bool, default True + Set the index of the returned DataFrame as the original MultiIndex. + + name : list / sequence of strings, optional + The passed names should substitute index level names. + + Returns + ------- + DataFrame : a DataFrame containing the original MultiIndex data. + + See Also + -------- + DataFrame + """ + + from pandas import DataFrame + + if name is not None: + if not is_list_like(name): + raise TypeError("'name' must be a list / sequence of column names.") + + if len(name) != len(self.levels): + raise ValueError( + "'name' should have same length as number of levels on index." + ) + idx_names = name + else: + idx_names = self.names + + # Guarantee resulting column order - PY36+ dict maintains insertion order + result = DataFrame( + { + (level if lvlname is None else lvlname): self._get_level_values(level) + for lvlname, level in zip(idx_names, range(len(self.levels))) + }, + copy=False, + ) + + if index: + result.index = self + return result + + def to_flat_index(self): + """ + Convert a MultiIndex to an Index of Tuples containing the level values. + + .. versionadded:: 0.24.0 + + Returns + ------- + pd.Index + Index with the MultiIndex data represented in Tuples. + + Notes + ----- + This method will simply return the caller if called by anything other + than a MultiIndex. + + Examples + -------- + >>> index = pd.MultiIndex.from_product( + ... [['foo', 'bar'], ['baz', 'qux']], + ... names=['a', 'b']) + >>> index.to_flat_index() + Index([('foo', 'baz'), ('foo', 'qux'), + ('bar', 'baz'), ('bar', 'qux')], + dtype='object') + """ + return Index(self.values, tupleize_cols=False) + + @property + def is_all_dates(self) -> bool: + return False + + def is_lexsorted(self) -> bool: + """ + Return True if the codes are lexicographically sorted. + + Returns + ------- + bool + """ + return self.lexsort_depth == self.nlevels + + @cache_readonly + def lexsort_depth(self): + if self.sortorder is not None: + return self.sortorder + + return self._lexsort_depth() + + def _lexsort_depth(self) -> int: + """ + Compute and return the lexsort_depth, the number of levels of the + MultiIndex that are sorted lexically + + Returns + ------ + int + """ + int64_codes = [ensure_int64(level_codes) for level_codes in self.codes] + for k in range(self.nlevels, 0, -1): + if libalgos.is_lexsorted(int64_codes[:k]): + return k + return 0 + + def _sort_levels_monotonic(self): + """ + This is an *internal* function. + + Create a new MultiIndex from the current to monotonically sorted + items IN the levels. This does not actually make the entire MultiIndex + monotonic, JUST the levels. + + The resulting MultiIndex will have the same outward + appearance, meaning the same .values and ordering. It will also + be .equals() to the original. + + Returns + ------- + MultiIndex + + Examples + -------- + + >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], + ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> mi + MultiIndex([('a', 'bb'), + ('a', 'aa'), + ('b', 'bb'), + ('b', 'aa')], + ) + + >>> mi.sort_values() + MultiIndex([('a', 'aa'), + ('a', 'bb'), + ('b', 'aa'), + ('b', 'bb')], + ) + """ + + if self.is_lexsorted() and self.is_monotonic: + return self + + new_levels = [] + new_codes = [] + + for lev, level_codes in zip(self.levels, self.codes): + + if not lev.is_monotonic: + try: + # indexer to reorder the levels + indexer = lev.argsort() + except TypeError: + pass + else: + lev = lev.take(indexer) + + # indexer to reorder the level codes + indexer = ensure_int64(indexer) + ri = lib.get_reverse_indexer(indexer, len(indexer)) + level_codes = algos.take_1d(ri, level_codes) + + new_levels.append(lev) + new_codes.append(level_codes) + + return MultiIndex( + new_levels, + new_codes, + names=self.names, + sortorder=self.sortorder, + verify_integrity=False, + ) + + def remove_unused_levels(self): + """ + Create a new MultiIndex from the current that removes + unused levels, meaning that they are not expressed in the labels. + + The resulting MultiIndex will have the same outward + appearance, meaning the same .values and ordering. It will also + be .equals() to the original. + + Returns + ------- + MultiIndex + + Examples + -------- + >>> mi = pd.MultiIndex.from_product([range(2), list('ab')]) + >>> mi + MultiIndex([(0, 'a'), + (0, 'b'), + (1, 'a'), + (1, 'b')], + ) + + >>> mi[2:] + MultiIndex([(1, 'a'), + (1, 'b')], + ) + + The 0 from the first level is not represented + and can be removed + + >>> mi2 = mi[2:].remove_unused_levels() + >>> mi2.levels + FrozenList([[1], ['a', 'b']]) + """ + + new_levels = [] + new_codes = [] + + changed = False + for lev, level_codes in zip(self.levels, self.codes): + + # Since few levels are typically unused, bincount() is more + # efficient than unique() - however it only accepts positive values + # (and drops order): + uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1 + has_na = int(len(uniques) and (uniques[0] == -1)) + + if len(uniques) != len(lev) + has_na: + # We have unused levels + changed = True + + # Recalculate uniques, now preserving order. + # Can easily be cythonized by exploiting the already existing + # "uniques" and stop parsing "level_codes" when all items + # are found: + uniques = algos.unique(level_codes) + if has_na: + na_idx = np.where(uniques == -1)[0] + # Just ensure that -1 is in first position: + uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]] + + # codes get mapped from uniques to 0:len(uniques) + # -1 (if present) is mapped to last position + code_mapping = np.zeros(len(lev) + has_na) + # ... and reassigned value -1: + code_mapping[uniques] = np.arange(len(uniques)) - has_na + + level_codes = code_mapping[level_codes] + + # new levels are simple + lev = lev.take(uniques[has_na:]) + + new_levels.append(lev) + new_codes.append(level_codes) + + result = self.view() + + if changed: + result._reset_identity() + result._set_levels(new_levels, validate=False) + result._set_codes(new_codes, validate=False) + + return result + + @property + def nlevels(self) -> int: + """ + Integer number of levels in this MultiIndex. + """ + return len(self._levels) + + @property + def levshape(self): + """ + A tuple with the length of each level. + """ + return tuple(len(x) for x in self.levels) + + def __reduce__(self): + """Necessary for making this object picklable""" + d = dict( + levels=list(self.levels), + codes=list(self.codes), + sortorder=self.sortorder, + names=list(self.names), + ) + return ibase._new_Index, (type(self), d), None + + def __setstate__(self, state): + """Necessary for making this object picklable""" + + if isinstance(state, dict): + levels = state.get("levels") + codes = state.get("codes") + sortorder = state.get("sortorder") + names = state.get("names") + + elif isinstance(state, tuple): + + nd_state, own_state = state + levels, codes, sortorder, names = own_state + + self._set_levels([Index(x) for x in levels], validate=False) + self._set_codes(codes) + new_codes = self._verify_integrity() + self._set_codes(new_codes) + self._set_names(names) + self.sortorder = sortorder + self._reset_identity() + + def __getitem__(self, key): + if is_scalar(key): + key = com.cast_scalar_indexer(key) + + retval = [] + for lev, level_codes in zip(self.levels, self.codes): + if level_codes[key] == -1: + retval.append(np.nan) + else: + retval.append(lev[level_codes[key]]) + + return tuple(retval) + else: + if com.is_bool_indexer(key): + key = np.asarray(key, dtype=bool) + sortorder = self.sortorder + else: + # cannot be sure whether the result will be sorted + sortorder = None + + if isinstance(key, Index): + key = np.asarray(key) + + new_codes = [level_codes[key] for level_codes in self.codes] + + return MultiIndex( + levels=self.levels, + codes=new_codes, + names=self.names, + sortorder=sortorder, + verify_integrity=False, + ) + + @Appender(_index_shared_docs["take"] % _index_doc_kwargs) + def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): + nv.validate_take(tuple(), kwargs) + indices = ensure_platform_int(indices) + taken = self._assert_take_fillable( + self.codes, + indices, + allow_fill=allow_fill, + fill_value=fill_value, + na_value=-1, + ) + return MultiIndex( + levels=self.levels, codes=taken, names=self.names, verify_integrity=False + ) + + def _assert_take_fillable( + self, values, indices, allow_fill=True, fill_value=None, na_value=None + ): + """ Internal method to handle NA filling of take """ + # only fill if we are passing a non-None fill_value + if allow_fill and fill_value is not None: + if (indices < -1).any(): + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + raise ValueError(msg) + taken = [lab.take(indices) for lab in self.codes] + mask = indices == -1 + if mask.any(): + masked = [] + for new_label in taken: + label_values = new_label + label_values[mask] = na_value + masked.append(np.asarray(label_values)) + taken = masked + else: + taken = [lab.take(indices) for lab in self.codes] + return taken + + def append(self, other): + """ + Append a collection of Index options together + + Parameters + ---------- + other : Index or list/tuple of indices + + Returns + ------- + appended : Index + """ + if not isinstance(other, (list, tuple)): + other = [other] + + if all( + (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other + ): + arrays = [] + for i in range(self.nlevels): + label = self._get_level_values(i) + appended = [o._get_level_values(i) for o in other] + arrays.append(label.append(appended)) + return MultiIndex.from_arrays(arrays, names=self.names) + + to_concat = (self.values,) + tuple(k._values for k in other) + new_tuples = np.concatenate(to_concat) + + # if all(isinstance(x, MultiIndex) for x in other): + try: + return MultiIndex.from_tuples(new_tuples, names=self.names) + except (TypeError, IndexError): + return Index(new_tuples) + + def argsort(self, *args, **kwargs): + return self.values.argsort(*args, **kwargs) + + @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) + def repeat(self, repeats, axis=None): + nv.validate_repeat(tuple(), dict(axis=axis)) + repeats = ensure_platform_int(repeats) + return MultiIndex( + levels=self.levels, + codes=[ + level_codes.view(np.ndarray).astype(np.intp).repeat(repeats) + for level_codes in self.codes + ], + names=self.names, + sortorder=self.sortorder, + verify_integrity=False, + ) + + def where(self, cond, other=None): + raise NotImplementedError(".where is not supported for MultiIndex operations") + + def drop(self, codes, level=None, errors="raise"): + """ + Make new MultiIndex with passed list of codes deleted + + Parameters + ---------- + codes : array-like + Must be a list of tuples + level : int or level name, default None + errors : str, default 'raise' + + Returns + ------- + dropped : MultiIndex + """ + if level is not None: + return self._drop_from_level(codes, level, errors) + + if not isinstance(codes, (np.ndarray, Index)): + try: + codes = com.index_labels_to_array(codes, dtype=object) + except ValueError: + pass + + inds = [] + for level_codes in codes: + try: + loc = self.get_loc(level_codes) + # get_loc returns either an integer, a slice, or a boolean + # mask + if isinstance(loc, int): + inds.append(loc) + elif isinstance(loc, slice): + inds.extend(range(loc.start, loc.stop)) + elif com.is_bool_indexer(loc): + if self.lexsort_depth == 0: + warnings.warn( + "dropping on a non-lexsorted multi-index " + "without a level parameter may impact performance.", + PerformanceWarning, + stacklevel=3, + ) + loc = loc.nonzero()[0] + inds.extend(loc) + else: + msg = f"unsupported indexer of type {type(loc)}" + raise AssertionError(msg) + except KeyError: + if errors != "ignore": + raise + + return self.delete(inds) + + def _drop_from_level(self, codes, level, errors="raise"): + codes = com.index_labels_to_array(codes) + i = self._get_level_number(level) + index = self.levels[i] + values = index.get_indexer(codes) + + mask = ~algos.isin(self.codes[i], values) + if mask.all() and errors != "ignore": + raise KeyError(f"labels {codes} not found in level") + + return self[mask] + + def swaplevel(self, i=-2, j=-1): + """ + Swap level i with level j. + + Calling this method does not change the ordering of the values. + + Parameters + ---------- + i : int, str, default -2 + First level of index to be swapped. Can pass level name as string. + Type of parameters can be mixed. + j : int, str, default -1 + Second level of index to be swapped. Can pass level name as string. + Type of parameters can be mixed. + + Returns + ------- + MultiIndex + A new MultiIndex. + + See Also + -------- + Series.swaplevel : Swap levels i and j in a MultiIndex. + Dataframe.swaplevel : Swap levels i and j in a MultiIndex on a + particular axis. + + Examples + -------- + >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], + ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> mi + MultiIndex([('a', 'bb'), + ('a', 'aa'), + ('b', 'bb'), + ('b', 'aa')], + ) + >>> mi.swaplevel(0, 1) + MultiIndex([('bb', 'a'), + ('aa', 'a'), + ('bb', 'b'), + ('aa', 'b')], + ) + """ + new_levels = list(self.levels) + new_codes = list(self.codes) + new_names = list(self.names) + + i = self._get_level_number(i) + j = self._get_level_number(j) + + new_levels[i], new_levels[j] = new_levels[j], new_levels[i] + new_codes[i], new_codes[j] = new_codes[j], new_codes[i] + new_names[i], new_names[j] = new_names[j], new_names[i] + + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + def reorder_levels(self, order): + """ + Rearrange levels using input order. May not drop or duplicate levels. + + Parameters + ---------- + + Returns + ------- + MultiIndex + """ + order = [self._get_level_number(i) for i in order] + if len(order) != self.nlevels: + raise AssertionError( + f"Length of order must be same as number of levels ({self.nlevels})," + f" got {len(order)}" + ) + new_levels = [self.levels[i] for i in order] + new_codes = [self.codes[i] for i in order] + new_names = [self.names[i] for i in order] + + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + def _get_codes_for_sorting(self): + """ + we categorizing our codes by using the + available categories (all, not just observed) + excluding any missing ones (-1); this is in preparation + for sorting, where we need to disambiguate that -1 is not + a valid valid + """ + + def cats(level_codes): + return np.arange( + np.array(level_codes).max() + 1 if len(level_codes) else 0, + dtype=level_codes.dtype, + ) + + return [ + Categorical.from_codes(level_codes, cats(level_codes), ordered=True) + for level_codes in self.codes + ] + + def sortlevel(self, level=0, ascending=True, sort_remaining=True): + """ + Sort MultiIndex at the requested level. The result will respect the + original ordering of the associated factor at that level. + + Parameters + ---------- + level : list-like, int or str, default 0 + If a string is given, must be a name of the level. + If list-like must be names or ints of levels. + ascending : bool, default True + False to sort in descending order. + Can also be a list to specify a directed ordering. + sort_remaining : sort by the remaining levels after level + + Returns + ------- + sorted_index : pd.MultiIndex + Resulting index. + indexer : np.ndarray + Indices of output values in original index. + """ + if isinstance(level, (str, int)): + level = [level] + level = [self._get_level_number(lev) for lev in level] + sortorder = None + + # we have a directed ordering via ascending + if isinstance(ascending, list): + if not len(level) == len(ascending): + raise ValueError("level must have same length as ascending") + + indexer = lexsort_indexer( + [self.codes[lev] for lev in level], orders=ascending + ) + + # level ordering + else: + + codes = list(self.codes) + shape = list(self.levshape) + + # partition codes and shape + primary = tuple(codes[lev] for lev in level) + primshp = tuple(shape[lev] for lev in level) + + # Reverse sorted to retain the order of + # smaller indices that needs to be removed + for lev in sorted(level, reverse=True): + codes.pop(lev) + shape.pop(lev) + + if sort_remaining: + primary += primary + tuple(codes) + primshp += primshp + tuple(shape) + else: + sortorder = level[0] + + indexer = indexer_from_factorized(primary, primshp, compress=False) + + if not ascending: + indexer = indexer[::-1] + + indexer = ensure_platform_int(indexer) + new_codes = [level_codes.take(indexer) for level_codes in self.codes] + + new_index = MultiIndex( + codes=new_codes, + levels=self.levels, + names=self.names, + sortorder=sortorder, + verify_integrity=False, + ) + + return new_index, indexer + + def _convert_listlike_indexer(self, keyarr, kind=None): + """ + Parameters + ---------- + keyarr : list-like + Indexer to convert. + + Returns + ------- + tuple (indexer, keyarr) + indexer is an ndarray or None if cannot convert + keyarr are tuple-safe keys + """ + indexer, keyarr = super()._convert_listlike_indexer(keyarr, kind=kind) + + # are we indexing a specific level + if indexer is None and len(keyarr) and not isinstance(keyarr[0], tuple): + level = 0 + _, indexer = self.reindex(keyarr, level=level) + + # take all + if indexer is None: + indexer = np.arange(len(self)) + + check = self.levels[0].get_indexer(keyarr) + mask = check == -1 + if mask.any(): + raise KeyError(f"{keyarr[mask]} not in index") + + return indexer, keyarr + + @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) + def get_indexer(self, target, method=None, limit=None, tolerance=None): + method = missing.clean_reindex_fill_method(method) + target = ensure_index(target) + + # empty indexer + if is_list_like(target) and not len(target): + return ensure_platform_int(np.array([])) + + if not isinstance(target, MultiIndex): + try: + target = MultiIndex.from_tuples(target) + except (TypeError, ValueError): + + # let's instead try with a straight Index + if method is None: + return Index(self.values).get_indexer( + target, method=method, limit=limit, tolerance=tolerance + ) + + if not self.is_unique: + raise ValueError("Reindexing only valid with uniquely valued Index objects") + + if method == "pad" or method == "backfill": + if tolerance is not None: + raise NotImplementedError( + "tolerance not implemented yet for MultiIndex" + ) + indexer = self._engine.get_indexer(target, method, limit) + elif method == "nearest": + raise NotImplementedError( + "method='nearest' not implemented yet " + "for MultiIndex; see GitHub issue 9365" + ) + else: + indexer = self._engine.get_indexer(target) + + return ensure_platform_int(indexer) + + @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) + def get_indexer_non_unique(self, target): + return super().get_indexer_non_unique(target) + + def reindex(self, target, method=None, level=None, limit=None, tolerance=None): + """ + Create index with target's values (move/add/delete values as necessary) + + Returns + ------- + new_index : pd.MultiIndex + Resulting index + indexer : np.ndarray or None + Indices of output values in original index. + + """ + # GH6552: preserve names when reindexing to non-named target + # (i.e. neither Index nor Series). + preserve_names = not hasattr(target, "names") + + if level is not None: + if method is not None: + raise TypeError("Fill method not supported if level passed") + + # GH7774: preserve dtype/tz if target is empty and not an Index. + # target may be an iterator + target = ibase._ensure_has_len(target) + if len(target) == 0 and not isinstance(target, Index): + idx = self.levels[level] + attrs = idx._get_attributes_dict() + attrs.pop("freq", None) # don't preserve freq + target = type(idx)._simple_new(np.empty(0, dtype=idx.dtype), **attrs) + else: + target = ensure_index(target) + target, indexer, _ = self._join_level( + target, level, how="right", return_indexers=True, keep_order=False + ) + else: + target = ensure_index(target) + if self.equals(target): + indexer = None + else: + if self.is_unique: + indexer = self.get_indexer( + target, method=method, limit=limit, tolerance=tolerance + ) + else: + raise ValueError("cannot handle a non-unique multi-index!") + + if not isinstance(target, MultiIndex): + if indexer is None: + target = self + elif (indexer >= 0).all(): + target = self.take(indexer) + else: + # hopefully? + target = MultiIndex.from_tuples(target) + + if ( + preserve_names + and target.nlevels == self.nlevels + and target.names != self.names + ): + target = target.copy(deep=False) + target.names = self.names + + return target, indexer + + def get_slice_bound( + self, label: Union[Hashable, Sequence[Hashable]], side: str, kind: str + ) -> int: + """ + For an ordered MultiIndex, compute slice bound + that corresponds to given label. + + Returns leftmost (one-past-the-rightmost if `side=='right') position + of given label. + + Parameters + ---------- + label : object or tuple of objects + side : {'left', 'right'} + kind : {'loc', 'getitem'} + + Returns + ------- + int + Index of label. + + Notes + ----- + This method only works if level 0 index of the MultiIndex is lexsorted. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')]) + + Get the locations from the leftmost 'b' in the first level + until the end of the multiindex: + + >>> mi.get_slice_bound('b', side="left", kind="loc") + 1 + + Like above, but if you get the locations from the rightmost + 'b' in the first level and 'f' in the second level: + + >>> mi.get_slice_bound(('b','f'), side="right", kind="loc") + 3 + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. + """ + + if not isinstance(label, tuple): + label = (label,) + return self._partial_tup_index(label, side=side) + + def slice_locs(self, start=None, end=None, step=None, kind=None): + """ + For an ordered MultiIndex, compute the slice locations for input + labels. + + The input labels can be tuples representing partial levels, e.g. for a + MultiIndex with 3 levels, you can pass a single value (corresponding to + the first level), or a 1-, 2-, or 3-tuple. + + Parameters + ---------- + start : label or tuple, default None + If None, defaults to the beginning + end : label or tuple + If None, defaults to the end + step : int or None + Slice step + kind : string, optional, defaults None + + Returns + ------- + (start, end) : (int, int) + + Notes + ----- + This method only works if the MultiIndex is properly lexsorted. So, + if only the first 2 levels of a 3-level MultiIndex are lexsorted, + you can only pass two levels to ``.slice_locs``. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')], + ... names=['A', 'B']) + + Get the slice locations from the beginning of 'b' in the first level + until the end of the multiindex: + + >>> mi.slice_locs(start='b') + (1, 4) + + Like above, but stop at the end of 'b' in the first level and 'f' in + the second level: + + >>> mi.slice_locs(start='b', end=('b', 'f')) + (1, 3) + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. + """ + # This function adds nothing to its parent implementation (the magic + # happens in get_slice_bound method), but it adds meaningful doc. + return super().slice_locs(start, end, step, kind=kind) + + def _partial_tup_index(self, tup, side="left"): + if len(tup) > self.lexsort_depth: + raise UnsortedIndexError( + f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth" + f" ({self.lexsort_depth})" + ) + + n = len(tup) + start, end = 0, len(self) + zipped = zip(tup, self.levels, self.codes) + for k, (lab, lev, labs) in enumerate(zipped): + section = labs[start:end] + + if lab not in lev and np.ndim(lab) == 0 and not isna(lab): + if not lev.is_type_compatible(lib.infer_dtype([lab], skipna=False)): + raise TypeError(f"Level type mismatch: {lab}") + + # short circuit + loc = lev.searchsorted(lab, side=side) + if side == "right" and loc >= 0: + loc -= 1 + return start + section.searchsorted(loc, side=side) + + idx = self._get_loc_single_level_index(lev, lab) + if k < n - 1: + end = start + section.searchsorted(idx, side="right") + start = start + section.searchsorted(idx, side="left") + else: + return start + section.searchsorted(idx, side=side) + + def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: + """ + If key is NA value, location of index unify as -1. + + Parameters + ---------- + level_index: Index + key : label + + Returns + ------- + loc : int + If key is NA value, loc is -1 + Else, location of key in index. + + See Also + -------- + Index.get_loc : The get_loc method for (single-level) index. + """ + + if is_scalar(key) and isna(key): + return -1 + else: + return level_index.get_loc(key) + + def get_loc(self, key, method=None): + """ + Get location for a label or a tuple of labels as an integer, slice or + boolean mask. + + Parameters + ---------- + key : label or tuple of labels (one for each level) + method : None + + Returns + ------- + loc : int, slice object or boolean mask + If the key is past the lexsort depth, the return may be a + boolean mask array, otherwise it is always a slice or int. + + See Also + -------- + Index.get_loc : The get_loc method for (single-level) index. + MultiIndex.slice_locs : Get slice location given start label(s) and + end label(s). + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. + + Notes + ----- + The key cannot be a slice, list of same-level labels, a boolean mask, + or a sequence of such. If you want to use those, use + :meth:`MultiIndex.get_locs` instead. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) + + >>> mi.get_loc('b') + slice(1, 3, None) + + >>> mi.get_loc(('b', 'e')) + 1 + """ + if method is not None: + raise NotImplementedError( + "only the default get_loc method is " + "currently supported for MultiIndex" + ) + + def _maybe_to_slice(loc): + """convert integer indexer to boolean mask or slice if possible""" + if not isinstance(loc, np.ndarray) or loc.dtype != "int64": + return loc + + loc = lib.maybe_indices_to_slice(loc, len(self)) + if isinstance(loc, slice): + return loc + + mask = np.empty(len(self), dtype="bool") + mask.fill(False) + mask[loc] = True + return mask + + if not isinstance(key, (tuple, list)): + # not including list here breaks some indexing, xref #30892 + loc = self._get_level_indexer(key, level=0) + return _maybe_to_slice(loc) + + keylen = len(key) + if self.nlevels < keylen: + raise KeyError( + f"Key length ({keylen}) exceeds index depth ({self.nlevels})" + ) + + if keylen == self.nlevels and self.is_unique: + return self._engine.get_loc(key) + + # -- partial selection or non-unique index + # break the key into 2 parts based on the lexsort_depth of the index; + # the first part returns a continuous slice of the index; the 2nd part + # needs linear search within the slice + i = self.lexsort_depth + lead_key, follow_key = key[:i], key[i:] + start, stop = ( + self.slice_locs(lead_key, lead_key) if lead_key else (0, len(self)) + ) + + if start == stop: + raise KeyError(key) + + if not follow_key: + return slice(start, stop) + + warnings.warn( + "indexing past lexsort depth may impact performance.", + PerformanceWarning, + stacklevel=10, + ) + + loc = np.arange(start, stop, dtype="int64") + + for i, k in enumerate(follow_key, len(lead_key)): + mask = self.codes[i][loc] == self._get_loc_single_level_index( + self.levels[i], k + ) + if not mask.all(): + loc = loc[mask] + if not len(loc): + raise KeyError(key) + + return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop) + + def get_loc_level(self, key, level=0, drop_level: bool = True): + """ + Get both the location for the requested label(s) and the + resulting sliced index. + + Parameters + ---------- + key : label or sequence of labels + level : int/level name or list thereof, optional + drop_level : bool, default True + If ``False``, the resulting index will not drop any level. + + Returns + ------- + loc : A 2-tuple where the elements are: + Element 0: int, slice object or boolean array + Element 1: The resulting sliced multiindex/index. If the key + contains all levels, this will be ``None``. + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.get_locs : Get location for a label/slice/list/mask or a + sequence of such. + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')], + ... names=['A', 'B']) + + >>> mi.get_loc_level('b') + (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B')) + + >>> mi.get_loc_level('e', level='B') + (array([False, True, False], dtype=bool), + Index(['b'], dtype='object', name='A')) + + >>> mi.get_loc_level(['b', 'e']) + (1, None) + """ + + # different name to distinguish from maybe_droplevels + def maybe_mi_droplevels(indexer, levels, drop_level: bool): + if not drop_level: + return self[indexer] + # kludgearound + orig_index = new_index = self[indexer] + levels = [self._get_level_number(i) for i in levels] + for i in sorted(levels, reverse=True): + try: + new_index = new_index.droplevel(i) + except ValueError: + + # no dropping here + return orig_index + return new_index + + if isinstance(level, (tuple, list)): + if len(key) != len(level): + raise AssertionError( + "Key for location must have same length as number of levels" + ) + result = None + for lev, k in zip(level, key): + loc, new_index = self.get_loc_level(k, level=lev) + if isinstance(loc, slice): + mask = np.zeros(len(self), dtype=bool) + mask[loc] = True + loc = mask + + result = loc if result is None else result & loc + + return result, maybe_mi_droplevels(result, level, drop_level) + + level = self._get_level_number(level) + + # kludge for #1796 + if isinstance(key, list): + key = tuple(key) + + if isinstance(key, tuple) and level == 0: + + try: + if key in self.levels[0]: + indexer = self._get_level_indexer(key, level=level) + new_index = maybe_mi_droplevels(indexer, [0], drop_level) + return indexer, new_index + except TypeError: + pass + + if not any(isinstance(k, slice) for k in key): + + # partial selection + # optionally get indexer to avoid re-calculation + def partial_selection(key, indexer=None): + if indexer is None: + indexer = self.get_loc(key) + ilevels = [ + i for i in range(len(key)) if key[i] != slice(None, None) + ] + return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level) + + if len(key) == self.nlevels and self.is_unique: + # Complete key in unique index -> standard get_loc + try: + return (self._engine.get_loc(key), None) + except KeyError as e: + raise KeyError(key) from e + else: + return partial_selection(key) + else: + indexer = None + for i, k in enumerate(key): + if not isinstance(k, slice): + k = self._get_level_indexer(k, level=i) + if isinstance(k, slice): + # everything + if k.start == 0 and k.stop == len(self): + k = slice(None, None) + else: + k_index = k + + if isinstance(k, slice): + if k == slice(None, None): + continue + else: + raise TypeError(key) + + if indexer is None: + indexer = k_index + else: # pragma: no cover + indexer &= k_index + if indexer is None: + indexer = slice(None, None) + ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] + return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level) + else: + indexer = self._get_level_indexer(key, level=level) + return indexer, maybe_mi_droplevels(indexer, [level], drop_level) + + def _get_level_indexer(self, key, level=0, indexer=None): + # return an indexer, boolean array or a slice showing where the key is + # in the totality of values + # if the indexer is provided, then use this + + level_index = self.levels[level] + level_codes = self.codes[level] + + def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): + # given the inputs and the codes/indexer, compute an indexer set + # if we have a provided indexer, then this need not consider + # the entire labels set + + r = np.arange(start, stop, step) + if indexer is not None and len(indexer) != len(codes): + + # we have an indexer which maps the locations in the labels + # that we have already selected (and is not an indexer for the + # entire set) otherwise this is wasteful so we only need to + # examine locations that are in this set the only magic here is + # that the result are the mappings to the set that we have + # selected + from pandas import Series + + mapper = Series(indexer) + indexer = codes.take(ensure_platform_int(indexer)) + result = Series(Index(indexer).isin(r).nonzero()[0]) + m = result.map(mapper)._ndarray_values + + else: + m = np.zeros(len(codes), dtype=bool) + m[np.in1d(codes, r, assume_unique=Index(codes).is_unique)] = True + + return m + + if isinstance(key, slice): + # handle a slice, returning a slice if we can + # otherwise a boolean indexer + + try: + if key.start is not None: + start = level_index.get_loc(key.start) + else: + start = 0 + if key.stop is not None: + stop = level_index.get_loc(key.stop) + else: + stop = len(level_index) - 1 + step = key.step + except KeyError: + + # we have a partial slice (like looking up a partial date + # string) + start = stop = level_index.slice_indexer( + key.start, key.stop, key.step, kind="loc" + ) + step = start.step + + if isinstance(start, slice) or isinstance(stop, slice): + # we have a slice for start and/or stop + # a partial date slicer on a DatetimeIndex generates a slice + # note that the stop ALREADY includes the stopped point (if + # it was a string sliced) + start = getattr(start, "start", start) + stop = getattr(stop, "stop", stop) + return convert_indexer(start, stop, step) + + elif level > 0 or self.lexsort_depth == 0 or step is not None: + # need to have like semantics here to right + # searching as when we are using a slice + # so include the stop+1 (so we include stop) + return convert_indexer(start, stop + 1, step) + else: + # sorted, so can return slice object -> view + i = level_codes.searchsorted(start, side="left") + j = level_codes.searchsorted(stop, side="right") + return slice(i, j, step) + + else: + + code = self._get_loc_single_level_index(level_index, key) + + if level > 0 or self.lexsort_depth == 0: + # Desired level is not sorted + locs = np.array(level_codes == code, dtype=bool, copy=False) + if not locs.any(): + # The label is present in self.levels[level] but unused: + raise KeyError(key) + return locs + + i = level_codes.searchsorted(code, side="left") + j = level_codes.searchsorted(code, side="right") + if i == j: + # The label is present in self.levels[level] but unused: + raise KeyError(key) + return slice(i, j) + + def get_locs(self, seq): + """ + Get location for a sequence of labels. + + Parameters + ---------- + seq : label, slice, list, mask or a sequence of such + You should use one of the above for each level. + If a level should not be used, set it to ``slice(None)``. + + Returns + ------- + numpy.ndarray + NumPy array of integers suitable for passing to iloc. + + See Also + -------- + MultiIndex.get_loc : Get location for a label or a tuple of labels. + MultiIndex.slice_locs : Get slice location given start label(s) and + end label(s). + + Examples + -------- + >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) + + >>> mi.get_locs('b') # doctest: +SKIP + array([1, 2], dtype=int64) + + >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP + array([1, 2], dtype=int64) + + >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP + array([2], dtype=int64) + """ + from pandas.core.indexes.numeric import Int64Index + + # must be lexsorted to at least as many levels + true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] + if true_slices and true_slices[-1] >= self.lexsort_depth: + raise UnsortedIndexError( + "MultiIndex slicing requires the index to be lexsorted: slicing " + f"on levels {true_slices}, lexsort depth {self.lexsort_depth}" + ) + # indexer + # this is the list of all values that we want to select + n = len(self) + indexer = None + + def _convert_to_indexer(r): + # return an indexer + if isinstance(r, slice): + m = np.zeros(n, dtype=bool) + m[r] = True + r = m.nonzero()[0] + elif com.is_bool_indexer(r): + if len(r) != n: + raise ValueError( + "cannot index with a boolean indexer " + "that is not the same length as the " + "index" + ) + r = r.nonzero()[0] + return Int64Index(r) + + def _update_indexer(idxr, indexer=indexer): + if indexer is None: + indexer = Index(np.arange(n)) + if idxr is None: + return indexer + return indexer & idxr + + for i, k in enumerate(seq): + + if com.is_bool_indexer(k): + # a boolean indexer, must be the same length! + k = np.asarray(k) + indexer = _update_indexer(_convert_to_indexer(k), indexer=indexer) + + elif is_list_like(k): + # a collection of labels to include from this level (these + # are or'd) + indexers = None + for x in k: + try: + idxrs = _convert_to_indexer( + self._get_level_indexer(x, level=i, indexer=indexer) + ) + indexers = idxrs if indexers is None else indexers | idxrs + except KeyError: + + # ignore not founds + continue + + if indexers is not None: + indexer = _update_indexer(indexers, indexer=indexer) + else: + # no matches we are done + return Int64Index([])._ndarray_values + + elif com.is_null_slice(k): + # empty slice + indexer = _update_indexer(None, indexer=indexer) + + elif isinstance(k, slice): + + # a slice, include BOTH of the labels + indexer = _update_indexer( + _convert_to_indexer( + self._get_level_indexer(k, level=i, indexer=indexer) + ), + indexer=indexer, + ) + else: + # a single label + indexer = _update_indexer( + _convert_to_indexer( + self.get_loc_level(k, level=i, drop_level=False)[0] + ), + indexer=indexer, + ) + + # empty indexer + if indexer is None: + return Int64Index([])._ndarray_values + return indexer._ndarray_values + + def truncate(self, before=None, after=None): + """ + Slice index between two labels / tuples, return new MultiIndex + + Parameters + ---------- + before : label or tuple, can be partial. Default None + None defaults to start + after : label or tuple, can be partial. Default None + None defaults to end + + Returns + ------- + truncated : MultiIndex + """ + if after and before and after < before: + raise ValueError("after < before") + + i, j = self.levels[0].slice_locs(before, after) + left, right = self.slice_locs(before, after) + + new_levels = list(self.levels) + new_levels[0] = new_levels[0][i:j] + + new_codes = [level_codes[left:right] for level_codes in self.codes] + new_codes[0] = new_codes[0] - i + + return MultiIndex(levels=new_levels, codes=new_codes, verify_integrity=False) + + def equals(self, other) -> bool: + """ + Determines if two MultiIndex objects have the same labeling information + (the levels themselves do not necessarily have to be the same) + + See Also + -------- + equal_levels + """ + if self.is_(other): + return True + + if not isinstance(other, Index): + return False + + if not isinstance(other, MultiIndex): + # d-level MultiIndex can equal d-tuple Index + if not is_object_dtype(other.dtype): + if self.nlevels != other.nlevels: + return False + + other_vals = com.values_from_object(ensure_index(other)) + return array_equivalent(self._ndarray_values, other_vals) + + if self.nlevels != other.nlevels: + return False + + if len(self) != len(other): + return False + + for i in range(self.nlevels): + self_codes = self.codes[i] + self_codes = self_codes[self_codes != -1] + self_values = algos.take_nd( + np.asarray(self.levels[i]._values), self_codes, allow_fill=False + ) + + other_codes = other.codes[i] + other_codes = other_codes[other_codes != -1] + other_values = algos.take_nd( + np.asarray(other.levels[i]._values), other_codes, allow_fill=False + ) + + # since we use NaT both datetime64 and timedelta64 + # we can have a situation where a level is typed say + # timedelta64 in self (IOW it has other values than NaT) + # but types datetime64 in other (where its all NaT) + # but these are equivalent + if len(self_values) == 0 and len(other_values) == 0: + continue + + if not array_equivalent(self_values, other_values): + return False + + return True + + def equal_levels(self, other): + """ + Return True if the levels of both MultiIndex objects are the same + + """ + if self.nlevels != other.nlevels: + return False + + for i in range(self.nlevels): + if not self.levels[i].equals(other.levels[i]): + return False + return True + + def union(self, other, sort=None): + """ + Form the union of two MultiIndex objects + + Parameters + ---------- + other : MultiIndex or array / Index of tuples + sort : False or None, default None + Whether to sort the resulting Index. + + * None : Sort the result, except when + + 1. `self` and `other` are equal. + 2. `self` has length 0. + 3. Some values in `self` or `other` cannot be compared. + A RuntimeWarning is issued in this case. + + * False : do not sort the result. + + .. versionadded:: 0.24.0 + + .. versionchanged:: 0.24.1 + + Changed the default value from ``True`` to ``None`` + (without change in behaviour). + + Returns + ------- + Index + + >>> index.union(index2) + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_names = self._convert_can_do_setop(other) + + if len(other) == 0 or self.equals(other): + return self + + # TODO: Index.union returns other when `len(self)` is 0. + + uniq_tuples = lib.fast_unique_multiple( + [self._ndarray_values, other._ndarray_values], sort=sort + ) + + return MultiIndex.from_arrays( + zip(*uniq_tuples), sortorder=0, names=result_names + ) + + def intersection(self, other, sort=False): + """ + Form the intersection of two MultiIndex objects. + + Parameters + ---------- + other : MultiIndex or array / Index of tuples + sort : False or None, default False + Sort the resulting MultiIndex if possible + + .. versionadded:: 0.24.0 + + .. versionchanged:: 0.24.1 + + Changed the default from ``True`` to ``False``, to match + behaviour from before 0.24.0 + + Returns + ------- + Index + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_names = self._convert_can_do_setop(other) + + if self.equals(other): + return self + + self_tuples = self._ndarray_values + other_tuples = other._ndarray_values + uniq_tuples = set(self_tuples) & set(other_tuples) + + if sort is None: + uniq_tuples = sorted(uniq_tuples) + + if len(uniq_tuples) == 0: + return MultiIndex( + levels=self.levels, + codes=[[]] * self.nlevels, + names=result_names, + verify_integrity=False, + ) + else: + return MultiIndex.from_arrays( + zip(*uniq_tuples), sortorder=0, names=result_names + ) + + def difference(self, other, sort=None): + """ + Compute set difference of two MultiIndex objects + + Parameters + ---------- + other : MultiIndex + sort : False or None, default None + Sort the resulting MultiIndex if possible + + .. versionadded:: 0.24.0 + + .. versionchanged:: 0.24.1 + + Changed the default value from ``True`` to ``None`` + (without change in behaviour). + + Returns + ------- + diff : MultiIndex + """ + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + other, result_names = self._convert_can_do_setop(other) + + if len(other) == 0: + return self + + if self.equals(other): + return MultiIndex( + levels=self.levels, + codes=[[]] * self.nlevels, + names=result_names, + verify_integrity=False, + ) + + this = self._get_unique_index() + + indexer = this.get_indexer(other) + indexer = indexer.take((indexer != -1).nonzero()[0]) + + label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) + difference = this.values.take(label_diff) + if sort is None: + difference = sorted(difference) + + if len(difference) == 0: + return MultiIndex( + levels=[[]] * self.nlevels, + codes=[[]] * self.nlevels, + names=result_names, + verify_integrity=False, + ) + else: + return MultiIndex.from_tuples(difference, sortorder=0, names=result_names) + + @Appender(_index_shared_docs["astype"]) + def astype(self, dtype, copy=True): + dtype = pandas_dtype(dtype) + if is_categorical_dtype(dtype): + msg = "> 1 ndim Categorical are not supported at this time" + raise NotImplementedError(msg) + elif not is_object_dtype(dtype): + raise TypeError( + f"Setting {type(self)} dtype to anything other " + "than object is not supported" + ) + elif copy is True: + return self._shallow_copy() + return self + + def _convert_can_do_setop(self, other): + result_names = self.names + + if not hasattr(other, "names"): + if len(other) == 0: + other = MultiIndex( + levels=[[]] * self.nlevels, + codes=[[]] * self.nlevels, + verify_integrity=False, + ) + else: + msg = "other must be a MultiIndex or a list of tuples" + try: + other = MultiIndex.from_tuples(other) + except TypeError: + raise TypeError(msg) + else: + result_names = self.names if self.names == other.names else None + return other, result_names + + def insert(self, loc, item): + """ + Make new MultiIndex inserting new item at location + + Parameters + ---------- + loc : int + item : tuple + Must be same length as number of levels in the MultiIndex + + Returns + ------- + new_index : Index + """ + # Pad the key with empty strings if lower levels of the key + # aren't specified: + if not isinstance(item, tuple): + item = (item,) + ("",) * (self.nlevels - 1) + elif len(item) != self.nlevels: + raise ValueError("Item must have length equal to number of levels.") + + new_levels = [] + new_codes = [] + for k, level, level_codes in zip(item, self.levels, self.codes): + if k not in level: + # have to insert into level + # must insert at end otherwise you have to recompute all the + # other codes + lev_loc = len(level) + level = level.insert(lev_loc, k) + else: + lev_loc = level.get_loc(k) + + new_levels.append(level) + new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc)) + + return MultiIndex( + levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False + ) + + def delete(self, loc): + """ + Make new index with passed location deleted + + Returns + ------- + new_index : MultiIndex + """ + new_codes = [np.delete(level_codes, loc) for level_codes in self.codes] + return MultiIndex( + levels=self.levels, + codes=new_codes, + names=self.names, + verify_integrity=False, + ) + + def _wrap_joined_index(self, joined, other): + names = self.names if self.names == other.names else None + return MultiIndex.from_tuples(joined, names=names) + + @Appender(Index.isin.__doc__) + def isin(self, values, level=None): + if level is None: + values = MultiIndex.from_tuples(values, names=self.names).values + return algos.isin(self.values, values) + else: + num = self._get_level_number(level) + levs = self.get_level_values(num) + + if levs.size == 0: + return np.zeros(len(levs), dtype=np.bool_) + return levs.isin(values) + + +MultiIndex._add_numeric_methods_disabled() +MultiIndex._add_numeric_methods_add_sub_disabled() +MultiIndex._add_logical_methods_disabled() + + +def _sparsify(label_list, start: int = 0, sentinel=""): + pivoted = list(zip(*label_list)) + k = len(label_list) + + result = pivoted[: start + 1] + prev = pivoted[start] + + for cur in pivoted[start + 1 :]: + sparse_cur = [] + + for i, (p, t) in enumerate(zip(prev, cur)): + if i == k - 1: + sparse_cur.append(t) + result.append(sparse_cur) + break + + if p == t: + sparse_cur.append(sentinel) + else: + sparse_cur.extend(cur[i:]) + result.append(sparse_cur) + break + + prev = cur + + return list(zip(*result)) + + +def _get_na_rep(dtype) -> str: + return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN") + + +def maybe_droplevels(index, key): + """ + Attempt to drop level or levels from the given index. + + Parameters + ---------- + index: Index + key : scalar or tuple + + Returns + ------- + Index + """ + # drop levels + original_index = index + if isinstance(key, tuple): + for _ in key: + try: + index = index.droplevel(0) + except ValueError: + # we have dropped too much, so back out + return original_index + else: + try: + index = index.droplevel(0) + except ValueError: + pass + + return index + + +def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray: + """ + Coerce the array_like indexer to the smallest integer dtype that can encode all + of the given categories. + + Parameters + ---------- + array_like : array-like + categories : array-like + copy : bool + + Returns + ------- + np.ndarray + Non-writeable. + """ + array_like = coerce_indexer_dtype(array_like, categories) + if copy: + array_like = array_like.copy() + array_like.flags.writeable = False + return array_like diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py new file mode 100644 index 00000000..b9b44284 --- /dev/null +++ b/pandas/core/indexes/numeric.py @@ -0,0 +1,531 @@ +import numpy as np + +from pandas._libs import index as libindex, lib +from pandas._typing import Dtype +from pandas.util._decorators import Appender, cache_readonly + +from pandas.core.dtypes.cast import astype_nansafe +from pandas.core.dtypes.common import ( + is_bool, + is_bool_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_integer_dtype, + is_scalar, + is_signed_integer_dtype, + is_unsigned_integer_dtype, + needs_i8_conversion, + pandas_dtype, +) +from pandas.core.dtypes.generic import ( + ABCFloat64Index, + ABCInt64Index, + ABCRangeIndex, + ABCSeries, + ABCUInt64Index, +) +from pandas.core.dtypes.missing import isna + +from pandas.core import algorithms +import pandas.core.common as com +from pandas.core.indexes.base import ( + Index, + InvalidIndexError, + _index_shared_docs, + maybe_extract_name, +) +from pandas.core.ops import get_op_result_name + +_num_index_shared_docs = dict() + + +class NumericIndex(Index): + """ + Provide numeric type operations. + + This is an abstract class. + """ + + _is_numeric_dtype = True + + def __new__(cls, data=None, dtype=None, copy=False, name=None): + cls._validate_dtype(dtype) + + # Coerce to ndarray if not already ndarray or Index + if not isinstance(data, (np.ndarray, Index)): + if is_scalar(data): + raise cls._scalar_data_error(data) + + # other iterable of some kind + if not isinstance(data, (ABCSeries, list, tuple)): + data = list(data) + + data = np.asarray(data, dtype=dtype) + + if issubclass(data.dtype.type, str): + cls._string_data_error(data) + + if copy or not is_dtype_equal(data.dtype, cls._default_dtype): + subarr = np.array(data, dtype=cls._default_dtype, copy=copy) + cls._assert_safe_casting(data, subarr) + else: + subarr = data + + if subarr.ndim > 1: + # GH#13601, GH#20285, GH#27125 + raise ValueError("Index data must be 1-dimensional") + + name = maybe_extract_name(name, data, cls) + return cls._simple_new(subarr, name=name) + + @classmethod + def _validate_dtype(cls, dtype: Dtype) -> None: + if dtype is None: + return + validation_metadata = { + "int64index": (is_signed_integer_dtype, "signed integer"), + "uint64index": (is_unsigned_integer_dtype, "unsigned integer"), + "float64index": (is_float_dtype, "float"), + "rangeindex": (is_signed_integer_dtype, "signed integer"), + } + + validation_func, expected = validation_metadata[cls._typ] + if not validation_func(dtype): + raise ValueError( + f"Incorrect `dtype` passed: expected {expected}, received {dtype}" + ) + + @Appender(_index_shared_docs["_maybe_cast_slice_bound"]) + def _maybe_cast_slice_bound(self, label, side, kind): + assert kind in ["ix", "loc", "getitem", None] + + # we will try to coerce to integers + return self._maybe_cast_indexer(label) + + @Appender(_index_shared_docs["_shallow_copy"]) + def _shallow_copy(self, values=None, **kwargs): + if values is not None and not self._can_hold_na: + # Ensure we are not returning an Int64Index with float data: + return self._shallow_copy_with_infer(values=values, **kwargs) + return super()._shallow_copy(values=values, **kwargs) + + def _convert_for_op(self, value): + """ + Convert value to be insertable to ndarray. + """ + if is_bool(value) or is_bool_dtype(value): + # force conversion to object + # so we don't lose the bools + raise TypeError + + return value + + def _convert_tolerance(self, tolerance, target): + tolerance = np.asarray(tolerance) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError("list-like tolerance size must match target index size") + if not np.issubdtype(tolerance.dtype, np.number): + if tolerance.ndim > 0: + raise ValueError( + f"tolerance argument for {type(self).__name__} must contain " + "numeric elements if it is list type" + ) + else: + raise ValueError( + f"tolerance argument for {type(self).__name__} must be numeric " + f"if it is a scalar: {repr(tolerance)}" + ) + return tolerance + + @classmethod + def _assert_safe_casting(cls, data, subarr): + """ + Subclasses need to override this only if the process of casting data + from some accepted dtype to the internal dtype(s) bears the risk of + truncation (e.g. float to int). + """ + pass + + def _concat_same_dtype(self, indexes, name): + result = type(indexes[0])(np.concatenate([x._values for x in indexes])) + return result.rename(name) + + @property + def is_all_dates(self) -> bool: + """ + Checks that all the labels are datetime objects. + """ + return False + + @Appender(Index.insert.__doc__) + def insert(self, loc, item): + # treat NA values as nans: + if is_scalar(item) and isna(item): + item = self._na_value + return super().insert(loc, item) + + def _union(self, other, sort): + # Right now, we treat union(int, float) a bit special. + # See https://github.com/pandas-dev/pandas/issues/26778 for discussion + # We may change union(int, float) to go to object. + # float | [u]int -> float (the special case) + # | -> T + # | -> object + needs_cast = (is_integer_dtype(self.dtype) and is_float_dtype(other.dtype)) or ( + is_integer_dtype(other.dtype) and is_float_dtype(self.dtype) + ) + if needs_cast: + first = self.astype("float") + second = other.astype("float") + return first._union(second, sort) + else: + return super()._union(other, sort) + + +_num_index_shared_docs[ + "class_descr" +] = """ + Immutable ndarray implementing an ordered, sliceable set. The basic object + storing axis labels for all pandas objects. %(klass)s is a special case + of `Index` with purely %(ltype)s labels. %(extra)s. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype (default: %(dtype)s) + copy : bool + Make a copy of input ndarray. + name : object + Name to be stored in the index. + + Attributes + ---------- + None + + Methods + ------- + None + + See Also + -------- + Index : The base pandas Index type. + + Notes + ----- + An Index instance can **only** contain hashable objects. +""" + +_int64_descr_args = dict(klass="Int64Index", ltype="integer", dtype="int64", extra="") + + +class IntegerIndex(NumericIndex): + """ + This is an abstract class for Int64Index, UInt64Index. + """ + + def __contains__(self, key) -> bool: + """ + Check if key is a float and has a decimal. If it has, return False. + """ + hash(key) + try: + if is_float(key) and int(key) != key: + return False + return key in self._engine + except (OverflowError, TypeError, ValueError): + return False + + +class Int64Index(IntegerIndex): + __doc__ = _num_index_shared_docs["class_descr"] % _int64_descr_args + + _typ = "int64index" + _can_hold_na = False + _engine_type = libindex.Int64Engine + _default_dtype = np.int64 + + @property + def inferred_type(self) -> str: + """ + Always 'integer' for ``Int64Index`` + """ + return "integer" + + @property + def asi8(self) -> np.ndarray: + # do not cache or you'll create a memory leak + return self.values.view("i8") + + @Appender(_index_shared_docs["_convert_scalar_indexer"]) + def _convert_scalar_indexer(self, key, kind=None): + assert kind in ["ix", "loc", "getitem", "iloc", None] + + # don't coerce ilocs to integers + if kind != "iloc": + key = self._maybe_cast_indexer(key) + return super()._convert_scalar_indexer(key, kind=kind) + + def _wrap_joined_index(self, joined, other): + name = get_op_result_name(self, other) + return Int64Index(joined, name=name) + + @classmethod + def _assert_safe_casting(cls, data, subarr): + """ + Ensure incoming data can be represented as ints. + """ + if not issubclass(data.dtype.type, np.signedinteger): + if not np.array_equal(data, subarr): + raise TypeError("Unsafe NumPy casting, you must explicitly cast") + + def _is_compatible_with_other(self, other): + return super()._is_compatible_with_other(other) or all( + isinstance(type(obj), (ABCInt64Index, ABCFloat64Index, ABCRangeIndex)) + for obj in [self, other] + ) + + +Int64Index._add_numeric_methods() +Int64Index._add_logical_methods() + +_uint64_descr_args = dict( + klass="UInt64Index", ltype="unsigned integer", dtype="uint64", extra="" +) + + +class UInt64Index(IntegerIndex): + __doc__ = _num_index_shared_docs["class_descr"] % _uint64_descr_args + + _typ = "uint64index" + _can_hold_na = False + _engine_type = libindex.UInt64Engine + _default_dtype = np.uint64 + + @property + def inferred_type(self) -> str: + """ + Always 'integer' for ``UInt64Index`` + """ + return "integer" + + @property + def asi8(self) -> np.ndarray: + # do not cache or you'll create a memory leak + return self.values.view("u8") + + @Appender(_index_shared_docs["_convert_scalar_indexer"]) + def _convert_scalar_indexer(self, key, kind=None): + assert kind in ["ix", "loc", "getitem", "iloc", None] + + # don't coerce ilocs to integers + if kind != "iloc": + key = self._maybe_cast_indexer(key) + return super()._convert_scalar_indexer(key, kind=kind) + + @Appender(_index_shared_docs["_convert_arr_indexer"]) + def _convert_arr_indexer(self, keyarr): + # Cast the indexer to uint64 if possible so that the values returned + # from indexing are also uint64. + dtype = None + if is_integer_dtype(keyarr) or ( + lib.infer_dtype(keyarr, skipna=False) == "integer" + ): + dtype = np.uint64 + + return com.asarray_tuplesafe(keyarr, dtype=dtype) + + @Appender(_index_shared_docs["_convert_index_indexer"]) + def _convert_index_indexer(self, keyarr): + # Cast the indexer to uint64 if possible so + # that the values returned from indexing are + # also uint64. + if keyarr.is_integer(): + return keyarr.astype(np.uint64) + return keyarr + + def _wrap_joined_index(self, joined, other): + name = get_op_result_name(self, other) + return UInt64Index(joined, name=name) + + @classmethod + def _assert_safe_casting(cls, data, subarr): + """ + Ensure incoming data can be represented as uints. + """ + if not issubclass(data.dtype.type, np.unsignedinteger): + if not np.array_equal(data, subarr): + raise TypeError("Unsafe NumPy casting, you must explicitly cast") + + def _is_compatible_with_other(self, other): + return super()._is_compatible_with_other(other) or all( + isinstance(type(obj), (ABCUInt64Index, ABCFloat64Index)) + for obj in [self, other] + ) + + +UInt64Index._add_numeric_methods() +UInt64Index._add_logical_methods() + +_float64_descr_args = dict( + klass="Float64Index", dtype="float64", ltype="float", extra="" +) + + +class Float64Index(NumericIndex): + __doc__ = _num_index_shared_docs["class_descr"] % _float64_descr_args + + _typ = "float64index" + _engine_type = libindex.Float64Engine + _default_dtype = np.float64 + + @property + def inferred_type(self) -> str: + """ + Always 'floating' for ``Float64Index`` + """ + return "floating" + + @Appender(_index_shared_docs["astype"]) + def astype(self, dtype, copy=True): + dtype = pandas_dtype(dtype) + if needs_i8_conversion(dtype): + raise TypeError( + f"Cannot convert Float64Index to dtype {dtype}; integer " + "values are required for conversion" + ) + elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype): + # TODO(jreback); this can change once we have an EA Index type + # GH 13149 + arr = astype_nansafe(self.values, dtype=dtype) + return Int64Index(arr) + return super().astype(dtype, copy=copy) + + @Appender(_index_shared_docs["_convert_scalar_indexer"]) + def _convert_scalar_indexer(self, key, kind=None): + assert kind in ["ix", "loc", "getitem", "iloc", None] + + if kind == "iloc": + return self._validate_indexer("positional", key, kind) + + return key + + @Appender(_index_shared_docs["_convert_slice_indexer"]) + def _convert_slice_indexer(self, key, kind=None): + # if we are not a slice, then we are done + if not isinstance(key, slice): + return key + + if kind == "iloc": + return super()._convert_slice_indexer(key, kind=kind) + + # translate to locations + return self.slice_indexer(key.start, key.stop, key.step, kind=kind) + + def _format_native_types( + self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs + ): + from pandas.io.formats.format import FloatArrayFormatter + + formatter = FloatArrayFormatter( + self.values, + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + quoting=quoting, + fixed_width=False, + ) + return formatter.get_result_as_array() + + def get_value(self, series, key): + """ + We always want to get an index value, never a value. + """ + if not is_scalar(key): + raise InvalidIndexError + + k = com.values_from_object(key) + loc = self.get_loc(k) + new_values = com.values_from_object(series)[loc] + + return new_values + + def equals(self, other) -> bool: + """ + Determines if two Index objects contain the same elements. + """ + if self is other: + return True + + if not isinstance(other, Index): + return False + + # need to compare nans locations and make sure that they are the same + # since nans don't compare equal this is a bit tricky + try: + if not isinstance(other, Float64Index): + other = self._constructor(other) + if not is_dtype_equal(self.dtype, other.dtype) or self.shape != other.shape: + return False + left, right = self._ndarray_values, other._ndarray_values + return ((left == right) | (self._isnan & other._isnan)).all() + except (TypeError, ValueError): + return False + + def __contains__(self, other) -> bool: + if super().__contains__(other): + return True + + try: + # if other is a sequence this throws a ValueError + return np.isnan(other) and self.hasnans + except ValueError: + try: + return len(other) <= 1 and other.item() in self + except AttributeError: + return len(other) <= 1 and other in self + except TypeError: + pass + except TypeError: + pass + + return False + + @Appender(_index_shared_docs["get_loc"]) + def get_loc(self, key, method=None, tolerance=None): + try: + if np.all(np.isnan(key)) or is_bool(key): + nan_idxs = self._nan_idxs + try: + return nan_idxs.item() + except ValueError: + if not len(nan_idxs): + raise KeyError(key) + return nan_idxs + except (TypeError, NotImplementedError): + pass + return super().get_loc(key, method=method, tolerance=tolerance) + + @cache_readonly + def is_unique(self) -> bool: + return super().is_unique and self._nan_idxs.size < 2 + + @Appender(Index.isin.__doc__) + def isin(self, values, level=None): + if level is not None: + self._validate_index_level(level) + return algorithms.isin(np.array(self), values) + + def _is_compatible_with_other(self, other): + return super()._is_compatible_with_other(other) or all( + isinstance( + type(obj), + (ABCInt64Index, ABCFloat64Index, ABCUInt64Index, ABCRangeIndex), + ) + for obj in [self, other] + ) + + +Float64Index._add_numeric_methods() +Float64Index._add_logical_methods_disabled() diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py new file mode 100644 index 00000000..6877cf02 --- /dev/null +++ b/pandas/core/indexes/period.py @@ -0,0 +1,915 @@ +from datetime import datetime, timedelta +import weakref + +import numpy as np + +from pandas._libs import index as libindex +from pandas._libs.tslibs import NaT, frequencies as libfrequencies, iNaT, resolution +from pandas._libs.tslibs.period import Period +from pandas.util._decorators import Appender, Substitution, cache_readonly + +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_bool_dtype, + is_datetime64_any_dtype, + is_dtype_equal, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_object_dtype, + pandas_dtype, +) + +from pandas.core.accessor import delegate_names +from pandas.core.arrays.period import ( + PeriodArray, + period_array, + raise_on_incompatible, + validate_dtype_freq, +) +from pandas.core.base import _shared_docs +import pandas.core.common as com +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import ( + _index_shared_docs, + ensure_index, + maybe_extract_name, +) +from pandas.core.indexes.datetimelike import ( + DatetimeIndexOpsMixin, + DatetimelikeDelegateMixin, +) +from pandas.core.indexes.datetimes import DatetimeIndex, Index +from pandas.core.indexes.numeric import Int64Index +from pandas.core.missing import isna +from pandas.core.ops import get_op_result_name +from pandas.core.tools.datetimes import DateParseError, parse_time_string + +from pandas.tseries import frequencies +from pandas.tseries.offsets import DateOffset, Tick + +_index_doc_kwargs = dict(ibase._index_doc_kwargs) +_index_doc_kwargs.update(dict(target_klass="PeriodIndex or list of Periods")) + + +# --- Period index sketch + + +def _new_PeriodIndex(cls, **d): + # GH13277 for unpickling + values = d.pop("data") + if values.dtype == "int64": + freq = d.pop("freq", None) + values = PeriodArray(values, freq=freq) + return cls._simple_new(values, **d) + else: + return cls(values, **d) + + +class PeriodDelegateMixin(DatetimelikeDelegateMixin): + """ + Delegate from PeriodIndex to PeriodArray. + """ + + _raw_methods = {"_format_native_types"} + _raw_properties = {"is_leap_year", "freq"} + + _delegated_properties = PeriodArray._datetimelike_ops + list(_raw_properties) + _delegated_methods = set(PeriodArray._datetimelike_methods) | _raw_methods + + +@delegate_names(PeriodArray, PeriodDelegateMixin._delegated_properties, typ="property") +@delegate_names( + PeriodArray, PeriodDelegateMixin._delegated_methods, typ="method", overwrite=True +) +class PeriodIndex(DatetimeIndexOpsMixin, Int64Index, PeriodDelegateMixin): + """ + Immutable ndarray holding ordinal values indicating regular periods in time. + + Index keys are boxed to Period objects which carries the metadata (eg, + frequency information). + + Parameters + ---------- + data : array-like (1d int np.ndarray or PeriodArray), optional + Optional period-like data to construct index with. + copy : bool + Make a copy of input ndarray. + freq : str or period object, optional + One of pandas period strings or corresponding objects + year : int, array, or Series, default None + month : int, array, or Series, default None + quarter : int, array, or Series, default None + day : int, array, or Series, default None + hour : int, array, or Series, default None + minute : int, array, or Series, default None + second : int, array, or Series, default None + tz : object, default None + Timezone for converting datetime64 data to Periods. + dtype : str or PeriodDtype, default None + + Attributes + ---------- + day + dayofweek + dayofyear + days_in_month + daysinmonth + end_time + freq + freqstr + hour + is_leap_year + minute + month + quarter + qyear + second + start_time + week + weekday + weekofyear + year + + Methods + ------- + asfreq + strftime + to_timestamp + + See Also + -------- + Index : The base pandas Index type. + Period : Represents a period of time. + DatetimeIndex : Index with datetime64 data. + TimedeltaIndex : Index of timedelta64 data. + period_range : Create a fixed-frequency PeriodIndex. + + Examples + -------- + >>> idx = pd.PeriodIndex(year=year_arr, quarter=q_arr) + """ + + _typ = "periodindex" + _attributes = ["name", "freq"] + + # define my properties & methods for delegation + _is_numeric_dtype = False + _infer_as_myclass = True + + _data: PeriodArray + + _engine_type = libindex.PeriodEngine + _supports_partial_string_indexing = True + + # ------------------------------------------------------------------------ + # Index Constructors + + def __new__( + cls, + data=None, + ordinal=None, + freq=None, + tz=None, + dtype=None, + copy=False, + name=None, + **fields, + ): + + valid_field_set = { + "year", + "month", + "day", + "quarter", + "hour", + "minute", + "second", + } + + if not set(fields).issubset(valid_field_set): + argument = list(set(fields) - valid_field_set)[0] + raise TypeError(f"__new__() got an unexpected keyword argument {argument}") + + name = maybe_extract_name(name, data, cls) + + if data is None and ordinal is None: + # range-based. + data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields) + # PeriodArray._generate range does validation that fields is + # empty when really using the range-based constructor. + freq = freq2 + + data = PeriodArray(data, freq=freq) + else: + freq = validate_dtype_freq(dtype, freq) + + # PeriodIndex allow PeriodIndex(period_index, freq=different) + # Let's not encourage that kind of behavior in PeriodArray. + + if freq and isinstance(data, cls) and data.freq != freq: + # TODO: We can do some of these with no-copy / coercion? + # e.g. D -> 2D seems to be OK + data = data.asfreq(freq) + + if data is None and ordinal is not None: + # we strangely ignore `ordinal` if data is passed. + ordinal = np.asarray(ordinal, dtype=np.int64) + data = PeriodArray(ordinal, freq) + else: + # don't pass copy here, since we copy later. + data = period_array(data=data, freq=freq) + + if copy: + data = data.copy() + + return cls._simple_new(data, name=name) + + @classmethod + def _simple_new(cls, values, name=None, freq=None, **kwargs): + """ + Create a new PeriodIndex. + + Parameters + ---------- + values : PeriodArray, PeriodIndex, Index[int64], ndarray[int64] + Values that can be converted to a PeriodArray without inference + or coercion. + + """ + # TODO: raising on floats is tested, but maybe not useful. + # Should the callers know not to pass floats? + # At the very least, I think we can ensure that lists aren't passed. + if isinstance(values, list): + values = np.asarray(values) + if is_float_dtype(values): + raise TypeError("PeriodIndex._simple_new does not accept floats.") + if freq: + freq = Period._maybe_convert_freq(freq) + values = PeriodArray(values, freq=freq) + + if not isinstance(values, PeriodArray): + raise TypeError("PeriodIndex._simple_new only accepts PeriodArray") + result = object.__new__(cls) + result._data = values + # For groupby perf. See note in indexes/base about _index_data + result._index_data = values._data + result.name = name + result._reset_identity() + return result + + # ------------------------------------------------------------------------ + # Data + + @property + def values(self): + return np.asarray(self) + + @property + def _has_complex_internals(self): + # used to avoid libreduction code paths, which raise or require conversion + return True + + def _shallow_copy(self, values=None, **kwargs): + # TODO: simplify, figure out type of values + if values is None: + values = self._data + + if isinstance(values, type(self)): + values = values._data + + if not isinstance(values, PeriodArray): + if isinstance(values, np.ndarray) and values.dtype == "i8": + values = PeriodArray(values, freq=self.freq) + else: + # GH#30713 this should never be reached + raise TypeError(type(values), getattr(values, "dtype", None)) + + # We don't allow changing `freq` in _shallow_copy. + validate_dtype_freq(self.dtype, kwargs.get("freq")) + attributes = self._get_attributes_dict() + + attributes.update(kwargs) + if not len(values) and "dtype" not in kwargs: + attributes["dtype"] = self.dtype + return self._simple_new(values, **attributes) + + def _shallow_copy_with_infer(self, values=None, **kwargs): + """ we always want to return a PeriodIndex """ + return self._shallow_copy(values=values, **kwargs) + + @property + def _box_func(self): + """Maybe box an ordinal or Period""" + # TODO(DatetimeArray): Avoid double-boxing + # PeriodArray takes care of boxing already, so we need to check + # whether we're given an ordinal or a Period. It seems like some + # places outside of indexes/period.py are calling this _box_func, + # but passing data that's already boxed. + def func(x): + if isinstance(x, Period) or x is NaT: + return x + else: + return Period._from_ordinal(ordinal=x, freq=self.freq) + + return func + + def _maybe_convert_timedelta(self, other): + """ + Convert timedelta-like input to an integer multiple of self.freq + + Parameters + ---------- + other : timedelta, np.timedelta64, DateOffset, int, np.ndarray + + Returns + ------- + converted : int, np.ndarray[int64] + + Raises + ------ + IncompatibleFrequency : if the input cannot be written as a multiple + of self.freq. Note IncompatibleFrequency subclasses ValueError. + """ + if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): + offset = frequencies.to_offset(self.freq.rule_code) + if isinstance(offset, Tick): + # _check_timedeltalike_freq_compat will raise if incompatible + delta = self._data._check_timedeltalike_freq_compat(other) + return delta + elif isinstance(other, DateOffset): + freqstr = other.rule_code + base = libfrequencies.get_base_alias(freqstr) + if base == self.freq.rule_code: + return other.n + + raise raise_on_incompatible(self, other) + elif is_integer(other): + # integer is passed to .shift via + # _add_datetimelike_methods basically + # but ufunc may pass integer to _add_delta + return other + + # raise when input doesn't have freq + raise raise_on_incompatible(self, None) + + # ------------------------------------------------------------------------ + # Rendering Methods + + def _mpl_repr(self): + # how to represent ourselves to matplotlib + return self.astype(object).values + + @property + def _formatter_func(self): + return self.array._formatter(boxed=False) + + # ------------------------------------------------------------------------ + # Indexing + + @cache_readonly + def _engine(self): + # To avoid a reference cycle, pass a weakref of self to _engine_type. + period = weakref.ref(self) + return self._engine_type(period, len(self)) + + @Appender(_index_shared_docs["contains"]) + def __contains__(self, key) -> bool: + if isinstance(key, Period): + if key.freq != self.freq: + return False + else: + return key.ordinal in self._engine + else: + try: + self.get_loc(key) + return True + except (TypeError, KeyError): + # TypeError can be reached if we pass a tuple that is not hashable + return False + + @cache_readonly + def _int64index(self): + return Int64Index._simple_new(self.asi8, name=self.name) + + # ------------------------------------------------------------------------ + # Index Methods + + def __array__(self, dtype=None) -> np.ndarray: + if is_integer_dtype(dtype): + return self.asi8 + else: + return self.astype(object).values + + def __array_wrap__(self, result, context=None): + """ + Gets called after a ufunc. Needs additional handling as + PeriodIndex stores internal data as int dtype + + Replace this to __numpy_ufunc__ in future version + """ + if isinstance(context, tuple) and len(context) > 0: + func = context[0] + if func is np.add: + pass + elif func is np.subtract: + name = self.name + left = context[1][0] + right = context[1][1] + if isinstance(left, PeriodIndex) and isinstance(right, PeriodIndex): + name = left.name if left.name == right.name else None + return Index(result, name=name) + elif isinstance(left, Period) or isinstance(right, Period): + return Index(result, name=name) + elif isinstance(func, np.ufunc): + if "M->M" not in func.types: + msg = f"ufunc '{func.__name__}' not supported for the PeriodIndex" + # This should be TypeError, but TypeError cannot be raised + # from here because numpy catches. + raise ValueError(msg) + + if is_bool_dtype(result): + return result + # the result is object dtype array of Period + # cannot pass _simple_new as it is + return type(self)(result, freq=self.freq, name=self.name) + + def asof_locs(self, where, mask): + """ + where : array of timestamps + mask : array of booleans where data is not NA + + """ + where_idx = where + if isinstance(where_idx, DatetimeIndex): + where_idx = PeriodIndex(where_idx.values, freq=self.freq) + + locs = self._ndarray_values[mask].searchsorted( + where_idx._ndarray_values, side="right" + ) + + locs = np.where(locs > 0, locs - 1, 0) + result = np.arange(len(self))[mask].take(locs) + + first = mask.argmax() + result[ + (locs == 0) & (where_idx._ndarray_values < self._ndarray_values[first]) + ] = -1 + + return result + + @Appender(_index_shared_docs["astype"]) + def astype(self, dtype, copy=True, how="start"): + dtype = pandas_dtype(dtype) + + if is_datetime64_any_dtype(dtype): + # 'how' is index-specific, isn't part of the EA interface. + tz = getattr(dtype, "tz", None) + return self.to_timestamp(how=how).tz_localize(tz) + + # TODO: should probably raise on `how` here, so we don't ignore it. + return super().astype(dtype, copy=copy) + + @Substitution(klass="PeriodIndex") + @Appender(_shared_docs["searchsorted"]) + def searchsorted(self, value, side="left", sorter=None): + if isinstance(value, Period) or value is NaT: + self._data._check_compatible_with(value) + elif isinstance(value, str): + try: + value = Period(value, freq=self.freq) + except DateParseError: + raise KeyError(f"Cannot interpret '{value}' as period") + elif not isinstance(value, PeriodArray): + raise TypeError( + "PeriodIndex.searchsorted requires either a Period or PeriodArray" + ) + + return self._data.searchsorted(value, side=side, sorter=sorter) + + @property + def is_full(self) -> bool: + """ + Returns True if this PeriodIndex is range-like in that all Periods + between start and end are present, in order. + """ + if len(self) == 0: + return True + if not self.is_monotonic: + raise ValueError("Index is not monotonic") + values = self.asi8 + return ((values[1:] - values[:-1]) < 2).all() + + @property + def inferred_type(self) -> str: + # b/c data is represented as ints make sure we can't have ambiguous + # indexing + return "period" + + def get_value(self, series, key): + """ + Fast lookup of value from 1-dimensional ndarray. Only use this if you + know what you're doing + """ + s = com.values_from_object(series) + try: + value = super().get_value(s, key) + except (KeyError, IndexError): + if isinstance(key, str): + asdt, parsed, reso = parse_time_string(key, self.freq) + grp = resolution.Resolution.get_freq_group(reso) + freqn = resolution.get_freq_group(self.freq) + + vals = self._ndarray_values + + # if our data is higher resolution than requested key, slice + if grp < freqn: + iv = Period(asdt, freq=(grp, 1)) + ord1 = iv.asfreq(self.freq, how="S").ordinal + ord2 = iv.asfreq(self.freq, how="E").ordinal + + if ord2 < vals[0] or ord1 > vals[-1]: + raise KeyError(key) + + pos = np.searchsorted(self._ndarray_values, [ord1, ord2]) + key = slice(pos[0], pos[1] + 1) + return series[key] + elif grp == freqn: + key = Period(asdt, freq=self.freq).ordinal + return com.maybe_box( + self, self._int64index.get_value(s, key), series, key + ) + else: + raise KeyError(key) + + period = Period(key, self.freq) + key = period.value if isna(period) else period.ordinal + return com.maybe_box(self, self._int64index.get_value(s, key), series, key) + else: + return com.maybe_box(self, value, series, key) + + @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) + def get_indexer(self, target, method=None, limit=None, tolerance=None): + target = ensure_index(target) + + if isinstance(target, PeriodIndex): + if target.freq != self.freq: + # No matches + no_matches = -1 * np.ones(self.shape, dtype=np.intp) + return no_matches + + target = target.asi8 + self_index = self._int64index + else: + self_index = self + + if tolerance is not None: + tolerance = self._convert_tolerance(tolerance, target) + return Index.get_indexer(self_index, target, method, limit, tolerance) + + @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) + def get_indexer_non_unique(self, target): + target = ensure_index(target) + + if isinstance(target, PeriodIndex): + if target.freq != self.freq: + no_matches = -1 * np.ones(self.shape, dtype=np.intp) + return no_matches, no_matches + + target = target.asi8 + + indexer, missing = self._int64index.get_indexer_non_unique(target) + return ensure_platform_int(indexer), missing + + def get_loc(self, key, method=None, tolerance=None): + """ + Get integer location for requested label + + Returns + ------- + loc : int + """ + try: + return self._engine.get_loc(key) + except KeyError: + if is_integer(key): + raise + + try: + asdt, parsed, reso = parse_time_string(key, self.freq) + key = asdt + except TypeError: + pass + except DateParseError: + # A string with invalid format + raise KeyError(f"Cannot interpret '{key}' as period") + + try: + key = Period(key, freq=self.freq) + except ValueError: + # we cannot construct the Period + # as we have an invalid type + raise KeyError(key) + + try: + ordinal = iNaT if key is NaT else key.ordinal + if tolerance is not None: + tolerance = self._convert_tolerance(tolerance, np.asarray(key)) + return self._int64index.get_loc(ordinal, method, tolerance) + + except KeyError: + raise KeyError(key) + + def _maybe_cast_slice_bound(self, label, side, kind): + """ + If label is a string or a datetime, cast it to Period.ordinal according + to resolution. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : {'ix', 'loc', 'getitem'} + + Returns + ------- + bound : Period or object + + Notes + ----- + Value of `side` parameter should be validated in caller. + + """ + assert kind in ["ix", "loc", "getitem"] + + if isinstance(label, datetime): + return Period(label, freq=self.freq) + elif isinstance(label, str): + try: + _, parsed, reso = parse_time_string(label, self.freq) + bounds = self._parsed_string_to_bounds(reso, parsed) + return bounds[0 if side == "left" else 1] + except ValueError: + # string cannot be parsed as datetime-like + # TODO: we need tests for this case + raise KeyError(label) + elif is_integer(label) or is_float(label): + self._invalid_indexer("slice", label) + + return label + + def _parsed_string_to_bounds(self, reso, parsed): + if reso == "year": + t1 = Period(year=parsed.year, freq="A") + elif reso == "month": + t1 = Period(year=parsed.year, month=parsed.month, freq="M") + elif reso == "quarter": + q = (parsed.month - 1) // 3 + 1 + t1 = Period(year=parsed.year, quarter=q, freq="Q-DEC") + elif reso == "day": + t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day, freq="D") + elif reso == "hour": + t1 = Period( + year=parsed.year, + month=parsed.month, + day=parsed.day, + hour=parsed.hour, + freq="H", + ) + elif reso == "minute": + t1 = Period( + year=parsed.year, + month=parsed.month, + day=parsed.day, + hour=parsed.hour, + minute=parsed.minute, + freq="T", + ) + elif reso == "second": + t1 = Period( + year=parsed.year, + month=parsed.month, + day=parsed.day, + hour=parsed.hour, + minute=parsed.minute, + second=parsed.second, + freq="S", + ) + else: + raise KeyError(reso) + return (t1.asfreq(self.freq, how="start"), t1.asfreq(self.freq, how="end")) + + def _get_string_slice(self, key): + if not self.is_monotonic: + raise ValueError("Partial indexing only valid for ordered time series") + + key, parsed, reso = parse_time_string(key, self.freq) + grp = resolution.Resolution.get_freq_group(reso) + freqn = resolution.get_freq_group(self.freq) + if reso in ["day", "hour", "minute", "second"] and not grp < freqn: + raise KeyError(key) + + t1, t2 = self._parsed_string_to_bounds(reso, parsed) + return slice( + self.searchsorted(t1, side="left"), self.searchsorted(t2, side="right") + ) + + def _convert_tolerance(self, tolerance, target): + tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance, target) + if target.size != tolerance.size and tolerance.size > 1: + raise ValueError("list-like tolerance size must match target index size") + return self._maybe_convert_timedelta(tolerance) + + def insert(self, loc, item): + if not isinstance(item, Period) or self.freq != item.freq: + return self.astype(object).insert(loc, item) + + idx = np.concatenate( + (self[:loc].asi8, np.array([item.ordinal]), self[loc:].asi8) + ) + return self._shallow_copy(idx) + + def join(self, other, how="left", level=None, return_indexers=False, sort=False): + """ + See Index.join + """ + self._assert_can_do_setop(other) + + if not isinstance(other, PeriodIndex): + return self.astype(object).join( + other, how=how, level=level, return_indexers=return_indexers, sort=sort + ) + + result = Int64Index.join( + self, + other, + how=how, + level=level, + return_indexers=return_indexers, + sort=sort, + ) + + if return_indexers: + result, lidx, ridx = result + return self._apply_meta(result), lidx, ridx + return self._apply_meta(result) + + # ------------------------------------------------------------------------ + # Set Operation Methods + + def _assert_can_do_setop(self, other): + super()._assert_can_do_setop(other) + + # *Can't* use PeriodIndexes of different freqs + # *Can* use PeriodIndex/DatetimeIndex + if isinstance(other, PeriodIndex) and self.freq != other.freq: + raise raise_on_incompatible(self, other) + + def intersection(self, other, sort=False): + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + res_name = get_op_result_name(self, other) + other = ensure_index(other) + + if self.equals(other): + return self._get_reconciled_name_object(other) + + if not is_dtype_equal(self.dtype, other.dtype): + # TODO: fastpath for if we have a different PeriodDtype + this = self.astype("O") + other = other.astype("O") + return this.intersection(other, sort=sort) + + i8self = Int64Index._simple_new(self.asi8) + i8other = Int64Index._simple_new(other.asi8) + i8result = i8self.intersection(i8other, sort=sort) + + result = self._shallow_copy(np.asarray(i8result, dtype=np.int64), name=res_name) + return result + + def difference(self, other, sort=None): + self._validate_sort_keyword(sort) + self._assert_can_do_setop(other) + res_name = get_op_result_name(self, other) + other = ensure_index(other) + + if self.equals(other): + # pass an empty PeriodArray with the appropriate dtype + return self._shallow_copy(self._data[:0]) + + if is_object_dtype(other): + return self.astype(object).difference(other).astype(self.dtype) + + elif not is_dtype_equal(self.dtype, other.dtype): + return self + + i8self = Int64Index._simple_new(self.asi8) + i8other = Int64Index._simple_new(other.asi8) + i8result = i8self.difference(i8other, sort=sort) + + result = self._shallow_copy(np.asarray(i8result, dtype=np.int64), name=res_name) + return result + + def _union(self, other, sort): + if not len(other) or self.equals(other) or not len(self): + return super()._union(other, sort=sort) + + # We are called by `union`, which is responsible for this validation + assert isinstance(other, type(self)) + + if not is_dtype_equal(self.dtype, other.dtype): + this = self.astype("O") + other = other.astype("O") + return this._union(other, sort=sort) + + i8self = Int64Index._simple_new(self.asi8) + i8other = Int64Index._simple_new(other.asi8) + i8result = i8self._union(i8other, sort=sort) + + res_name = get_op_result_name(self, other) + result = self._shallow_copy(np.asarray(i8result, dtype=np.int64), name=res_name) + return result + + # ------------------------------------------------------------------------ + + def _apply_meta(self, rawarr): + if not isinstance(rawarr, PeriodIndex): + rawarr = PeriodIndex._simple_new(rawarr, freq=self.freq, name=self.name) + return rawarr + + def memory_usage(self, deep=False): + result = super().memory_usage(deep=deep) + if hasattr(self, "_cache") and "_int64index" in self._cache: + result += self._int64index.memory_usage(deep=deep) + return result + + +PeriodIndex._add_numeric_methods_disabled() +PeriodIndex._add_logical_methods_disabled() + + +def period_range( + start=None, end=None, periods=None, freq=None, name=None +) -> PeriodIndex: + """ + Return a fixed frequency PeriodIndex. + + The day (calendar) is the default frequency. + + Parameters + ---------- + start : str or period-like, default None + Left bound for generating periods. + end : str or period-like, default None + Right bound for generating periods. + periods : int, default None + Number of periods to generate. + freq : str or DateOffset, optional + Frequency alias. By default the freq is taken from `start` or `end` + if those are Period objects. Otherwise, the default is ``"D"`` for + daily frequency. + name : str, default None + Name of the resulting PeriodIndex. + + Returns + ------- + PeriodIndex + + Notes + ----- + Of the three parameters: ``start``, ``end``, and ``periods``, exactly two + must be specified. + + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + + >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') + PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', + '2017-06', '2017-06', '2017-07', '2017-08', '2017-09', + '2017-10', '2017-11', '2017-12', '2018-01'], + dtype='period[M]', freq='M') + + If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor + endpoints for a ``PeriodIndex`` with frequency matching that of the + ``period_range`` constructor. + + >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), + ... end=pd.Period('2017Q2', freq='Q'), freq='M') + PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], + dtype='period[M]', freq='M') + """ + if com.count_not_none(start, end, periods) != 2: + raise ValueError( + "Of the three parameters: start, end, and periods, " + "exactly two must be specified" + ) + if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)): + freq = "D" + + data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={}) + data = PeriodArray(data, freq=freq) + return PeriodIndex(data, name=name) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py new file mode 100644 index 00000000..b4cc71a2 --- /dev/null +++ b/pandas/core/indexes/range.py @@ -0,0 +1,811 @@ +from datetime import timedelta +import operator +from sys import getsizeof +from typing import Optional, Union +import warnings + +import numpy as np + +from pandas._libs import index as libindex +import pandas.compat as compat +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender, cache_readonly + +from pandas.core.dtypes.common import ( + ensure_platform_int, + ensure_python_int, + is_integer, + is_integer_dtype, + is_list_like, + is_scalar, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ABCTimedeltaIndex + +from pandas.core import ops +import pandas.core.common as com +from pandas.core.construction import extract_array +import pandas.core.indexes.base as ibase +from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name +from pandas.core.indexes.numeric import Int64Index +from pandas.core.ops.common import unpack_zerodim_and_defer + +from pandas.io.formats.printing import pprint_thing + + +class RangeIndex(Int64Index): + """ + Immutable Index implementing a monotonic integer range. + + RangeIndex is a memory-saving special case of Int64Index limited to + representing monotonic ranges. Using RangeIndex may in some instances + improve computing speed. + + This is the default index type used + by DataFrame and Series when no explicit index is provided by the user. + + Parameters + ---------- + start : int (default: 0), or other RangeIndex instance + If int and "stop" is not given, interpreted as "stop" instead. + stop : int (default: 0) + step : int (default: 1) + name : object, optional + Name to be stored in the index. + copy : bool, default False + Unused, accepted for homogeneity with other index types. + + Attributes + ---------- + start + stop + step + + Methods + ------- + from_range + + See Also + -------- + Index : The base pandas Index type. + Int64Index : Index of int64 data. + """ + + _typ = "rangeindex" + _engine_type = libindex.Int64Engine + _range: range + + # check whether self._data has been called + _cached_data: Optional[np.ndarray] = None + # -------------------------------------------------------------------- + # Constructors + + def __new__( + cls, start=None, stop=None, step=None, dtype=None, copy=False, name=None, + ): + + cls._validate_dtype(dtype) + name = maybe_extract_name(name, start, cls) + + # RangeIndex + if isinstance(start, RangeIndex): + start = start._range + return cls._simple_new(start, dtype=dtype, name=name) + + # validate the arguments + if com.all_none(start, stop, step): + raise TypeError("RangeIndex(...) must be called with integers") + + start = ensure_python_int(start) if start is not None else 0 + + if stop is None: + start, stop = 0, start + else: + stop = ensure_python_int(stop) + + step = ensure_python_int(step) if step is not None else 1 + if step == 0: + raise ValueError("Step must not be zero") + + rng = range(start, stop, step) + return cls._simple_new(rng, dtype=dtype, name=name) + + @classmethod + def from_range(cls, data, name=None, dtype=None): + """ + Create RangeIndex from a range object. + + Returns + ------- + RangeIndex + """ + if not isinstance(data, range): + raise TypeError( + f"{cls.__name__}(...) must be called with object coercible to a " + f"range, {repr(data)} was passed" + ) + + cls._validate_dtype(dtype) + return cls._simple_new(data, dtype=dtype, name=name) + + @classmethod + def _simple_new(cls, values, name=None, dtype=None): + result = object.__new__(cls) + + # handle passed None, non-integers + if values is None: + # empty + values = range(0, 0, 1) + elif not isinstance(values, range): + return Index(values, dtype=dtype, name=name) + + result._range = values + result.name = name + + result._reset_identity() + return result + + # -------------------------------------------------------------------- + + @cache_readonly + def _constructor(self): + """ return the class to use for construction """ + return Int64Index + + @property + def _data(self): + """ + An int array that for performance reasons is created only when needed. + + The constructed array is saved in ``_cached_data``. This allows us to + check if the array has been created without accessing ``_data`` and + triggering the construction. + """ + if self._cached_data is None: + self._cached_data = np.arange( + self.start, self.stop, self.step, dtype=np.int64 + ) + return self._cached_data + + @cache_readonly + def _int64index(self): + return Int64Index._simple_new(self._data, name=self.name) + + def _get_data_as_items(self): + """ return a list of tuples of start, stop, step """ + rng = self._range + return [("start", rng.start), ("stop", rng.stop), ("step", rng.step)] + + def __reduce__(self): + d = self._get_attributes_dict() + d.update(dict(self._get_data_as_items())) + return ibase._new_Index, (type(self), d), None + + # -------------------------------------------------------------------- + # Rendering Methods + + def _format_attrs(self): + """ + Return a list of tuples of the (attr, formatted_value) + """ + attrs = self._get_data_as_items() + if self.name is not None: + attrs.append(("name", ibase.default_pprint(self.name))) + return attrs + + def _format_data(self, name=None): + # we are formatting thru the attributes + return None + + def _format_with_header(self, header, na_rep="NaN", **kwargs): + return header + list(map(pprint_thing, self._range)) + + # -------------------------------------------------------------------- + _deprecation_message = ( + "RangeIndex.{} is deprecated and will be " + "removed in a future version. Use RangeIndex.{} " + "instead" + ) + + @cache_readonly + def start(self): + """ + The value of the `start` parameter (``0`` if this was not supplied). + """ + # GH 25710 + return self._range.start + + @property + def _start(self): + """ + The value of the `start` parameter (``0`` if this was not supplied). + + .. deprecated:: 0.25.0 + Use ``start`` instead. + """ + warnings.warn( + self._deprecation_message.format("_start", "start"), + FutureWarning, + stacklevel=2, + ) + return self.start + + @cache_readonly + def stop(self): + """ + The value of the `stop` parameter. + """ + return self._range.stop + + @property + def _stop(self): + """ + The value of the `stop` parameter. + + .. deprecated:: 0.25.0 + Use ``stop`` instead. + """ + # GH 25710 + warnings.warn( + self._deprecation_message.format("_stop", "stop"), + FutureWarning, + stacklevel=2, + ) + return self.stop + + @cache_readonly + def step(self): + """ + The value of the `step` parameter (``1`` if this was not supplied). + """ + # GH 25710 + return self._range.step + + @property + def _step(self): + """ + The value of the `step` parameter (``1`` if this was not supplied). + + .. deprecated:: 0.25.0 + Use ``step`` instead. + """ + # GH 25710 + warnings.warn( + self._deprecation_message.format("_step", "step"), + FutureWarning, + stacklevel=2, + ) + return self.step + + @cache_readonly + def nbytes(self) -> int: + """ + Return the number of bytes in the underlying data. + """ + rng = self._range + return getsizeof(rng) + sum( + getsizeof(getattr(rng, attr_name)) + for attr_name in ["start", "stop", "step"] + ) + + def memory_usage(self, deep: bool = False) -> int: + """ + Memory usage of my values + + Parameters + ---------- + deep : bool + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption + + Returns + ------- + bytes used + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False + + See Also + -------- + numpy.ndarray.nbytes + """ + return self.nbytes + + @property + def dtype(self) -> np.dtype: + return np.dtype(np.int64) + + @property + def is_unique(self) -> bool: + """ return if the index has unique values """ + return True + + @cache_readonly + def is_monotonic_increasing(self) -> bool: + return self._range.step > 0 or len(self) <= 1 + + @cache_readonly + def is_monotonic_decreasing(self) -> bool: + return self._range.step < 0 or len(self) <= 1 + + @property + def has_duplicates(self) -> bool: + return False + + def __contains__(self, key: Union[int, np.integer]) -> bool: + hash(key) + try: + key = ensure_python_int(key) + except TypeError: + return False + return key in self._range + + @Appender(_index_shared_docs["get_loc"]) + def get_loc(self, key, method=None, tolerance=None): + if is_integer(key) and method is None and tolerance is None: + new_key = int(key) + try: + return self._range.index(new_key) + except ValueError: + raise KeyError(key) + return super().get_loc(key, method=method, tolerance=tolerance) + + @Appender(_index_shared_docs["get_indexer"]) + def get_indexer(self, target, method=None, limit=None, tolerance=None): + if com.any_not_none(method, tolerance, limit) or not is_list_like(target): + return super().get_indexer( + target, method=method, tolerance=tolerance, limit=limit + ) + + if self.step > 0: + start, stop, step = self.start, self.stop, self.step + else: + # GH 28678: work on reversed range for simplicity + reverse = self._range[::-1] + start, stop, step = reverse.start, reverse.stop, reverse.step + + target_array = np.asarray(target) + if not (is_integer_dtype(target_array) and target_array.ndim == 1): + # checks/conversions/roundings are delegated to general method + return super().get_indexer(target, method=method, tolerance=tolerance) + + locs = target_array - start + valid = (locs % step == 0) & (locs >= 0) & (target_array < stop) + locs[~valid] = -1 + locs[valid] = locs[valid] / step + + if step != self.step: + # We reversed this range: transform to original locs + locs[valid] = len(self) - 1 - locs[valid] + return ensure_platform_int(locs) + + def tolist(self): + return list(self._range) + + @Appender(_index_shared_docs["_shallow_copy"]) + def _shallow_copy(self, values=None, **kwargs): + if values is None: + name = kwargs.get("name", self.name) + return self._simple_new(self._range, name=name) + else: + kwargs.setdefault("name", self.name) + return self._int64index._shallow_copy(values, **kwargs) + + @Appender(ibase._index_shared_docs["copy"]) + def copy(self, name=None, deep=False, dtype=None, **kwargs): + self._validate_dtype(dtype) + if name is None: + name = self.name + return self.from_range(self._range, name=name) + + def _minmax(self, meth): + no_steps = len(self) - 1 + if no_steps == -1: + return np.nan + elif (meth == "min" and self.step > 0) or (meth == "max" and self.step < 0): + return self.start + + return self.start + self.step * no_steps + + def min(self, axis=None, skipna=True, *args, **kwargs): + """The minimum value of the RangeIndex""" + nv.validate_minmax_axis(axis) + nv.validate_min(args, kwargs) + return self._minmax("min") + + def max(self, axis=None, skipna=True, *args, **kwargs): + """The maximum value of the RangeIndex""" + nv.validate_minmax_axis(axis) + nv.validate_max(args, kwargs) + return self._minmax("max") + + def argsort(self, *args, **kwargs): + """ + Returns the indices that would sort the index and its + underlying data. + + Returns + ------- + argsorted : numpy array + + See Also + -------- + numpy.ndarray.argsort + """ + nv.validate_argsort(args, kwargs) + + if self._range.step > 0: + return np.arange(len(self)) + else: + return np.arange(len(self) - 1, -1, -1) + + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if isinstance(other, RangeIndex): + return self._range == other._range + return super().equals(other) + + def intersection(self, other, sort=False): + """ + Form the intersection of two Index objects. + + Parameters + ---------- + other : Index or array-like + sort : False or None, default False + Sort the resulting index if possible + + .. versionadded:: 0.24.0 + + .. versionchanged:: 0.24.1 + + Changed the default to ``False`` to match the behaviour + from before 0.24.0. + + Returns + ------- + intersection : Index + """ + self._validate_sort_keyword(sort) + + if self.equals(other): + return self._get_reconciled_name_object(other) + + if not isinstance(other, RangeIndex): + return super().intersection(other, sort=sort) + + if not len(self) or not len(other): + return self._simple_new(None) + + first = self._range[::-1] if self.step < 0 else self._range + second = other._range[::-1] if other.step < 0 else other._range + + # check whether intervals intersect + # deals with in- and decreasing ranges + int_low = max(first.start, second.start) + int_high = min(first.stop, second.stop) + if int_high <= int_low: + return self._simple_new(None) + + # Method hint: linear Diophantine equation + # solve intersection problem + # performance hint: for identical step sizes, could use + # cheaper alternative + gcd, s, t = self._extended_gcd(first.step, second.step) + + # check whether element sets intersect + if (first.start - second.start) % gcd: + return self._simple_new(None) + + # calculate parameters for the RangeIndex describing the + # intersection disregarding the lower bounds + tmp_start = first.start + (second.start - first.start) * first.step // gcd * s + new_step = first.step * second.step // gcd + new_range = range(tmp_start, int_high, new_step) + new_index = self._simple_new(new_range) + + # adjust index to limiting interval + new_start = new_index._min_fitting_element(int_low) + new_range = range(new_start, new_index.stop, new_index.step) + new_index = self._simple_new(new_range) + + if (self.step < 0 and other.step < 0) is not (new_index.step < 0): + new_index = new_index[::-1] + if sort is None: + new_index = new_index.sort_values() + return new_index + + def _min_fitting_element(self, lower_limit): + """Returns the smallest element greater than or equal to the limit""" + no_steps = -(-(lower_limit - self.start) // abs(self.step)) + return self.start + abs(self.step) * no_steps + + def _max_fitting_element(self, upper_limit): + """Returns the largest element smaller than or equal to the limit""" + no_steps = (upper_limit - self.start) // abs(self.step) + return self.start + abs(self.step) * no_steps + + def _extended_gcd(self, a, b): + """ + Extended Euclidean algorithms to solve Bezout's identity: + a*x + b*y = gcd(x, y) + Finds one particular solution for x, y: s, t + Returns: gcd, s, t + """ + s, old_s = 0, 1 + t, old_t = 1, 0 + r, old_r = b, a + while r: + quotient = old_r // r + old_r, r = r, old_r - quotient * r + old_s, s = s, old_s - quotient * s + old_t, t = t, old_t - quotient * t + return old_r, old_s, old_t + + def _union(self, other, sort): + """ + Form the union of two Index objects and sorts if possible + + Parameters + ---------- + other : Index or array-like + + sort : False or None, default None + Whether to sort resulting index. ``sort=None`` returns a + monotonically increasing ``RangeIndex`` if possible or a sorted + ``Int64Index`` if not. ``sort=False`` always returns an + unsorted ``Int64Index`` + + .. versionadded:: 0.25.0 + + Returns + ------- + union : Index + """ + if not len(other) or self.equals(other) or not len(self): + return super()._union(other, sort=sort) + + if isinstance(other, RangeIndex) and sort is None: + start_s, step_s = self.start, self.step + end_s = self.start + self.step * (len(self) - 1) + start_o, step_o = other.start, other.step + end_o = other.start + other.step * (len(other) - 1) + if self.step < 0: + start_s, step_s, end_s = end_s, -step_s, start_s + if other.step < 0: + start_o, step_o, end_o = end_o, -step_o, start_o + if len(self) == 1 and len(other) == 1: + step_s = step_o = abs(self.start - other.start) + elif len(self) == 1: + step_s = step_o + elif len(other) == 1: + step_o = step_s + start_r = min(start_s, start_o) + end_r = max(end_s, end_o) + if step_o == step_s: + if ( + (start_s - start_o) % step_s == 0 + and (start_s - end_o) <= step_s + and (start_o - end_s) <= step_s + ): + return type(self)(start_r, end_r + step_s, step_s) + if ( + (step_s % 2 == 0) + and (abs(start_s - start_o) <= step_s / 2) + and (abs(end_s - end_o) <= step_s / 2) + ): + return type(self)(start_r, end_r + step_s / 2, step_s / 2) + elif step_o % step_s == 0: + if ( + (start_o - start_s) % step_s == 0 + and (start_o + step_s >= start_s) + and (end_o - step_s <= end_s) + ): + return type(self)(start_r, end_r + step_s, step_s) + elif step_s % step_o == 0: + if ( + (start_s - start_o) % step_o == 0 + and (start_s + step_o >= start_o) + and (end_s - step_o <= end_o) + ): + return type(self)(start_r, end_r + step_o, step_o) + return self._int64index._union(other, sort=sort) + + @Appender(_index_shared_docs["join"]) + def join(self, other, how="left", level=None, return_indexers=False, sort=False): + if how == "outer" and self is not other: + # note: could return RangeIndex in more circumstances + return self._int64index.join(other, how, level, return_indexers, sort) + + return super().join(other, how, level, return_indexers, sort) + + def _concat_same_dtype(self, indexes, name): + """ + Concatenates multiple RangeIndex instances. All members of "indexes" must + be of type RangeIndex; result will be RangeIndex if possible, Int64Index + otherwise. E.g.: + indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) + indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) + """ + start = step = next_ = None + + # Filter the empty indexes + non_empty_indexes = [obj for obj in indexes if len(obj)] + + for obj in non_empty_indexes: + rng: range = obj._range + + if start is None: + # This is set by the first non-empty index + start = rng.start + if step is None and len(rng) > 1: + step = rng.step + elif step is None: + # First non-empty index had only one element + if rng.start == start: + result = Int64Index(np.concatenate([x._values for x in indexes])) + return result.rename(name) + + step = rng.start - start + + non_consecutive = (step != rng.step and len(rng) > 1) or ( + next_ is not None and rng.start != next_ + ) + if non_consecutive: + result = Int64Index(np.concatenate([x._values for x in indexes])) + return result.rename(name) + + if step is not None: + next_ = rng[-1] + step + + if non_empty_indexes: + # Get the stop value from "next" or alternatively + # from the last non-empty index + stop = non_empty_indexes[-1].stop if next_ is None else next_ + return RangeIndex(start, stop, step).rename(name) + + # Here all "indexes" had 0 length, i.e. were empty. + # In this case return an empty range index. + return RangeIndex(0, 0).rename(name) + + def __len__(self) -> int: + """ + return the length of the RangeIndex + """ + return len(self._range) + + @property + def size(self) -> int: + return len(self) + + def __getitem__(self, key): + """ + Conserve RangeIndex type for scalar and slice keys. + """ + if isinstance(key, slice): + new_range = self._range[key] + return self._simple_new(new_range, name=self.name) + elif is_integer(key): + new_key = int(key) + try: + return self._range[new_key] + except IndexError: + raise IndexError( + f"index {key} is out of bounds for axis 0 with size {len(self)}" + ) + elif is_scalar(key): + raise IndexError( + "only integers, slices (`:`), " + "ellipsis (`...`), numpy.newaxis (`None`) " + "and integer or boolean " + "arrays are valid indices" + ) + # fall back to Int64Index + return super().__getitem__(key) + + @unpack_zerodim_and_defer("__floordiv__") + def __floordiv__(self, other): + + if is_integer(other) and other != 0: + if len(self) == 0 or self.start % other == 0 and self.step % other == 0: + start = self.start // other + step = self.step // other + stop = start + len(self) * step + new_range = range(start, stop, step or 1) + return self._simple_new(new_range, name=self.name) + if len(self) == 1: + start = self.start // other + new_range = range(start, start + 1, 1) + return self._simple_new(new_range, name=self.name) + return self._int64index // other + + def all(self) -> bool: + return 0 not in self._range + + def any(self) -> bool: + return any(self._range) + + @classmethod + def _add_numeric_methods_binary(cls): + """ add in numeric methods, specialized to RangeIndex """ + + def _make_evaluate_binop(op, step=False): + """ + Parameters + ---------- + op : callable that accepts 2 parms + perform the binary op + step : callable, optional, default to False + op to apply to the step parm if not None + if False, use the existing step + """ + + @unpack_zerodim_and_defer(op.__name__) + def _evaluate_numeric_binop(self, other): + if isinstance(other, ABCTimedeltaIndex): + # Defer to TimedeltaIndex implementation + return NotImplemented + elif isinstance(other, (timedelta, np.timedelta64)): + # GH#19333 is_integer evaluated True on timedelta64, + # so we need to catch these explicitly + return op(self._int64index, other) + elif is_timedelta64_dtype(other): + # Must be an np.ndarray; GH#22390 + return op(self._int64index, other) + + other = extract_array(other, extract_numpy=True) + attrs = self._get_attributes_dict() + + left, right = self, other + + try: + # apply if we have an override + if step: + with np.errstate(all="ignore"): + rstep = step(left.step, right) + + # we don't have a representable op + # so return a base index + if not is_integer(rstep) or not rstep: + raise ValueError + + else: + rstep = left.step + + with np.errstate(all="ignore"): + rstart = op(left.start, right) + rstop = op(left.stop, right) + + result = type(self)(rstart, rstop, rstep, **attrs) + + # for compat with numpy / Int64Index + # even if we can represent as a RangeIndex, return + # as a Float64Index if we have float-like descriptors + if not all(is_integer(x) for x in [rstart, rstop, rstep]): + result = result.astype("float64") + + return result + + except (ValueError, TypeError, ZeroDivisionError): + # Defer to Int64Index implementation + return op(self._int64index, other) + # TODO: Do attrs get handled reliably? + + name = f"__{op.__name__}__" + return compat.set_function_name(_evaluate_numeric_binop, name, cls) + + cls.__add__ = _make_evaluate_binop(operator.add) + cls.__radd__ = _make_evaluate_binop(ops.radd) + cls.__sub__ = _make_evaluate_binop(operator.sub) + cls.__rsub__ = _make_evaluate_binop(ops.rsub) + cls.__mul__ = _make_evaluate_binop(operator.mul, step=operator.mul) + cls.__rmul__ = _make_evaluate_binop(ops.rmul, step=ops.rmul) + cls.__truediv__ = _make_evaluate_binop(operator.truediv, step=operator.truediv) + cls.__rtruediv__ = _make_evaluate_binop(ops.rtruediv, step=ops.rtruediv) + + +RangeIndex._add_numeric_methods() diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py new file mode 100644 index 00000000..c78020fb --- /dev/null +++ b/pandas/core/indexes/timedeltas.py @@ -0,0 +1,509 @@ +""" implement the TimedeltaIndex """ +from datetime import datetime + +import numpy as np + +from pandas._libs import NaT, Timedelta, index as libindex +from pandas.util._decorators import Appender, Substitution + +from pandas.core.dtypes.common import ( + _TD_DTYPE, + is_float, + is_integer, + is_list_like, + is_scalar, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + pandas_dtype, +) +from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna + +from pandas.core.accessor import delegate_names +from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays.timedeltas import TimedeltaArray, _is_convertible_to_td +from pandas.core.base import _shared_docs +import pandas.core.common as com +from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name +from pandas.core.indexes.datetimelike import ( + DatetimeIndexOpsMixin, + DatetimelikeDelegateMixin, + DatetimeTimedeltaMixin, +) +from pandas.core.indexes.extension import inherit_names + +from pandas.tseries.frequencies import to_offset + + +class TimedeltaDelegateMixin(DatetimelikeDelegateMixin): + # Most attrs are dispatched via datetimelike_{ops,methods} + # Some are "raw" methods, the result is not re-boxed in an Index + # We also have a few "extra" attrs, which may or may not be raw, + # which we don't want to expose in the .dt accessor. + _raw_properties = {"components", "_box_func"} + _raw_methods = {"to_pytimedelta", "sum", "std", "median", "_format_native_types"} + + _delegated_properties = TimedeltaArray._datetimelike_ops + list(_raw_properties) + _delegated_methods = TimedeltaArray._datetimelike_methods + list(_raw_methods) + + +@inherit_names( + ["_box_values", "__neg__", "__pos__", "__abs__"], TimedeltaArray, wrap=True +) +@inherit_names( + [ + "_bool_ops", + "_object_ops", + "_field_ops", + "_datetimelike_ops", + "_datetimelike_methods", + "_other_ops", + ], + TimedeltaArray, +) +@delegate_names( + TimedeltaArray, TimedeltaDelegateMixin._delegated_properties, typ="property" +) +@delegate_names( + TimedeltaArray, + TimedeltaDelegateMixin._delegated_methods, + typ="method", + overwrite=True, +) +class TimedeltaIndex( + DatetimeTimedeltaMixin, dtl.TimelikeOps, TimedeltaDelegateMixin, +): + """ + Immutable ndarray of timedelta64 data, represented internally as int64, and + which can be boxed to timedelta objects. + + Parameters + ---------- + data : array-like (1-dimensional), optional + Optional timedelta-like data to construct index with. + unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, optional + Which is an integer/float number. + freq : str or pandas offset object, optional + One of pandas date offset strings or corresponding objects. The string + 'infer' can be passed in order to set the frequency of the index as the + inferred frequency upon creation. + copy : bool + Make a copy of input ndarray. + name : object + Name to be stored in the index. + + Attributes + ---------- + days + seconds + microseconds + nanoseconds + components + inferred_freq + + Methods + ------- + to_pytimedelta + to_series + round + floor + ceil + to_frame + mean + + See Also + -------- + Index : The base pandas Index type. + Timedelta : Represents a duration between two dates or times. + DatetimeIndex : Index of datetime64 data. + PeriodIndex : Index of Period data. + timedelta_range : Create a fixed-frequency TimedeltaIndex. + + Notes + ----- + To learn more about the frequency strings, please see `this link + `__. + """ + + _typ = "timedeltaindex" + + _engine_type = libindex.TimedeltaEngine + + _comparables = ["name", "freq"] + _attributes = ["name", "freq"] + _is_numeric_dtype = True + _infer_as_myclass = True + + # ------------------------------------------------------------------- + # Constructors + + def __new__( + cls, + data=None, + unit=None, + freq=None, + closed=None, + dtype=_TD_DTYPE, + copy=False, + name=None, + ): + name = maybe_extract_name(name, data, cls) + + if is_scalar(data): + raise TypeError( + f"{cls.__name__}() must be called with a " + f"collection of some kind, {repr(data)} was passed" + ) + + if unit in {"Y", "y", "M"}: + raise ValueError( + "Units 'M' and 'Y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." + ) + + if isinstance(data, TimedeltaArray): + if copy: + data = data.copy() + return cls._simple_new(data, name=name, freq=freq) + + if isinstance(data, TimedeltaIndex) and freq is None and name is None: + if copy: + return data.copy() + else: + return data._shallow_copy() + + # - Cases checked above all return/raise before reaching here - # + + tdarr = TimedeltaArray._from_sequence( + data, freq=freq, unit=unit, dtype=dtype, copy=copy + ) + return cls._simple_new(tdarr._data, freq=tdarr.freq, name=name) + + @classmethod + def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): + # `dtype` is passed by _shallow_copy in corner cases, should always + # be timedelta64[ns] if present + if not isinstance(values, TimedeltaArray): + values = TimedeltaArray._simple_new(values, dtype=dtype, freq=freq) + else: + if freq is None: + freq = values.freq + assert isinstance(values, TimedeltaArray), type(values) + assert dtype == _TD_DTYPE, dtype + assert values.dtype == "m8[ns]", values.dtype + + tdarr = TimedeltaArray._simple_new(values._data, freq=freq) + result = object.__new__(cls) + result._data = tdarr + result._name = name + # For groupby perf. See note in indexes/base about _index_data + result._index_data = tdarr._data + + result._reset_identity() + return result + + # ------------------------------------------------------------------- + # Rendering Methods + + @property + def _formatter_func(self): + from pandas.io.formats.format import _get_format_timedelta64 + + return _get_format_timedelta64(self, box=True) + + # ------------------------------------------------------------------- + + @Appender(_index_shared_docs["astype"]) + def astype(self, dtype, copy=True): + dtype = pandas_dtype(dtype) + if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype): + # Have to repeat the check for 'timedelta64' (not ns) dtype + # so that we can return a numeric index, since pandas will return + # a TimedeltaIndex when dtype='timedelta' + result = self._data.astype(dtype, copy=copy) + if self.hasnans: + return Index(result, name=self.name) + return Index(result.astype("i8"), name=self.name) + return DatetimeIndexOpsMixin.astype(self, dtype, copy=copy) + + def _maybe_promote(self, other): + if other.inferred_type == "timedelta": + other = TimedeltaIndex(other) + return self, other + + def get_value(self, series, key): + """ + Fast lookup of value from 1-dimensional ndarray. Only use this if you + know what you're doing + """ + + if _is_convertible_to_td(key): + key = Timedelta(key) + return self.get_value_maybe_box(series, key) + + try: + value = Index.get_value(self, series, key) + except KeyError: + try: + loc = self._get_string_slice(key) + return series[loc] + except (TypeError, ValueError, KeyError): + pass + + try: + return self.get_value_maybe_box(series, key) + except (TypeError, ValueError, KeyError): + raise KeyError(key) + else: + return com.maybe_box(self, value, series, key) + + def get_value_maybe_box(self, series, key: Timedelta): + values = self._engine.get_value(com.values_from_object(series), key) + return com.maybe_box(self, values, series, key) + + def get_loc(self, key, method=None, tolerance=None): + """ + Get integer location for requested label + + Returns + ------- + loc : int + """ + if is_list_like(key) or (isinstance(key, datetime) and key is not NaT): + # GH#20464 datetime check here is to ensure we don't allow + # datetime objects to be incorrectly treated as timedelta + # objects; NaT is a special case because it plays a double role + # as Not-A-Timedelta + raise TypeError + + if isna(key): + key = NaT + + if tolerance is not None: + # try converting tolerance now, so errors don't get swallowed by + # the try/except clauses below + tolerance = self._convert_tolerance(tolerance, np.asarray(key)) + + if _is_convertible_to_td(key) or key is NaT: + key = Timedelta(key) + return Index.get_loc(self, key, method, tolerance) + + try: + return Index.get_loc(self, key, method, tolerance) + except (KeyError, ValueError, TypeError): + try: + return self._get_string_slice(key) + except (TypeError, KeyError, ValueError): + pass + + try: + stamp = Timedelta(key) + return Index.get_loc(self, stamp, method, tolerance) + except (KeyError, ValueError): + raise KeyError(key) + + def _maybe_cast_slice_bound(self, label, side, kind): + """ + If label is a string, cast it to timedelta according to resolution. + + Parameters + ---------- + label : object + side : {'left', 'right'} + kind : {'ix', 'loc', 'getitem'} + + Returns + ------- + label : object + """ + assert kind in ["ix", "loc", "getitem", None] + + if isinstance(label, str): + parsed = Timedelta(label) + lbound = parsed.round(parsed.resolution_string) + if side == "left": + return lbound + else: + return lbound + to_offset(parsed.resolution_string) - Timedelta(1, "ns") + elif is_integer(label) or is_float(label): + self._invalid_indexer("slice", label) + + return label + + def _get_string_slice(self, key): + if is_integer(key) or is_float(key) or key is NaT: + self._invalid_indexer("slice", key) + loc = self._partial_td_slice(key) + return loc + + def _partial_td_slice(self, key): + + # given a key, try to figure out a location for a partial slice + if not isinstance(key, str): + return key + + raise NotImplementedError + + @Substitution(klass="TimedeltaIndex") + @Appender(_shared_docs["searchsorted"]) + def searchsorted(self, value, side="left", sorter=None): + if isinstance(value, (np.ndarray, Index)): + if not type(self._data)._is_recognized_dtype(value): + raise TypeError( + "searchsorted requires compatible dtype or scalar, " + f"not {type(value).__name__}" + ) + value = type(self._data)(value) + self._data._check_compatible_with(value) + + elif isinstance(value, self._data._recognized_scalars): + self._data._check_compatible_with(value) + value = self._data._scalar_type(value) + + elif not isinstance(value, TimedeltaArray): + raise TypeError( + "searchsorted requires compatible dtype or scalar, " + f"not {type(value).__name__}" + ) + + return self._data.searchsorted(value, side=side, sorter=sorter) + + def is_type_compatible(self, typ) -> bool: + return typ == self.inferred_type or typ == "timedelta" + + @property + def inferred_type(self) -> str: + return "timedelta64" + + def insert(self, loc, item): + """ + Make new Index inserting new item at location + + Parameters + ---------- + loc : int + item : object + If not either a Python datetime or a numpy integer-like, returned + Index dtype will be object rather than datetime. + + Returns + ------- + new_index : Index + """ + # try to convert if possible + if isinstance(item, self._data._recognized_scalars): + item = self._data._scalar_type(item) + elif is_valid_nat_for_dtype(item, self.dtype): + # GH 18295 + item = self._na_value + elif is_scalar(item) and isna(item): + # i.e. datetime64("NaT") + raise TypeError( + f"cannot insert {type(self).__name__} with incompatible label" + ) + + freq = None + if isinstance(item, self._data._scalar_type) or item is NaT: + self._data._check_compatible_with(item, setitem=True) + + # check freq can be preserved on edge cases + if self.size and self.freq is not None: + if item is NaT: + pass + elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]: + freq = self.freq + elif (loc == len(self)) and item - self.freq == self[-1]: + freq = self.freq + item = item.asm8 + + try: + new_i8s = np.concatenate( + (self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8) + ) + return self._shallow_copy(new_i8s, freq=freq) + except (AttributeError, TypeError): + + # fall back to object index + if isinstance(item, str): + return self.astype(object).insert(loc, item) + raise TypeError( + f"cannot insert {type(self).__name__} with incompatible label" + ) + + +TimedeltaIndex._add_logical_methods_disabled() + + +def timedelta_range( + start=None, end=None, periods=None, freq=None, name=None, closed=None +) -> TimedeltaIndex: + """ + Return a fixed frequency TimedeltaIndex, with day as the default + frequency. + + Parameters + ---------- + start : str or timedelta-like, default None + Left bound for generating timedeltas. + end : str or timedelta-like, default None + Right bound for generating timedeltas. + periods : int, default None + Number of periods to generate. + freq : str or DateOffset, default 'D' + Frequency strings can have multiples, e.g. '5H'. + name : str, default None + Name of the resulting TimedeltaIndex. + closed : str, default None + Make the interval closed with respect to the given frequency to + the 'left', 'right', or both sides (None). + + Returns + ------- + rng : TimedeltaIndex + + Notes + ----- + Of the four parameters ``start``, ``end``, ``periods``, and ``freq``, + exactly three must be specified. If ``freq`` is omitted, the resulting + ``TimedeltaIndex`` will have ``periods`` linearly spaced elements between + ``start`` and ``end`` (closed on both sides). + + To learn more about the frequency strings, please see `this link + `__. + + Examples + -------- + + >>> pd.timedelta_range(start='1 day', periods=4) + TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq='D') + + The ``closed`` parameter specifies which endpoint is included. The default + behavior is to include both endpoints. + + >>> pd.timedelta_range(start='1 day', periods=4, closed='right') + TimedeltaIndex(['2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq='D') + + The ``freq`` parameter specifies the frequency of the TimedeltaIndex. + Only fixed frequencies can be passed, non-fixed frequencies such as + 'M' (month end) will raise. + + >>> pd.timedelta_range(start='1 day', end='2 days', freq='6H') + TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00', + '1 days 18:00:00', '2 days 00:00:00'], + dtype='timedelta64[ns]', freq='6H') + + Specify ``start``, ``end``, and ``periods``; the frequency is generated + automatically (linearly spaced). + + >>> pd.timedelta_range(start='1 day', end='5 days', periods=4) + TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00', + '5 days 00:00:00'], + dtype='timedelta64[ns]', freq=None) + """ + if freq is None and com.any_none(periods, start, end): + freq = "D" + + freq, freq_infer = dtl.maybe_infer_freq(freq) + tdarr = TimedeltaArray._generate_range(start, end, periods, freq, closed=closed) + return TimedeltaIndex._simple_new(tdarr._data, freq=tdarr.freq, name=name) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py new file mode 100755 index 00000000..7a67280f --- /dev/null +++ b/pandas/core/indexing.py @@ -0,0 +1,2489 @@ +from typing import Hashable, List, Tuple, Union + +import numpy as np + +from pandas._libs.indexing import _NDFrameIndexerBase +from pandas._libs.lib import item_from_zerodim +from pandas.errors import AbstractMethodError +from pandas.util._decorators import Appender + +from pandas.core.dtypes.common import ( + is_float, + is_integer, + is_iterator, + is_list_like, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_sequence, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries +from pandas.core.dtypes.missing import _infer_fill_value, isna + +import pandas.core.common as com +from pandas.core.indexers import ( + check_array_indexer, + is_list_like_indexer, + length_of_indexer, +) +from pandas.core.indexes.api import Index, InvalidIndexError + +# "null slice" +_NS = slice(None, None) + + +# the public IndexSlicerMaker +class _IndexSlice: + """ + Create an object to more easily perform multi-index slicing. + + See Also + -------- + MultiIndex.remove_unused_levels : New MultiIndex with no unused levels. + + Notes + ----- + See :ref:`Defined Levels ` + for further info on slicing a MultiIndex. + + Examples + -------- + + >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']]) + >>> columns = ['foo', 'bar'] + >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))), + index=midx, columns=columns) + + Using the default slice command: + + >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :] + foo bar + A0 B0 0 1 + B1 2 3 + A1 B0 8 9 + B1 10 11 + + Using the IndexSlice class for a more intuitive command: + + >>> idx = pd.IndexSlice + >>> dfmi.loc[idx[:, 'B0':'B1'], :] + foo bar + A0 B0 0 1 + B1 2 3 + A1 B0 8 9 + B1 10 11 + """ + + def __getitem__(self, arg): + return arg + + +IndexSlice = _IndexSlice() + + +class IndexingError(Exception): + pass + + +class IndexingMixin: + """Mixin for adding .loc/.iloc/.at/.iat to Datafames and Series. + """ + + @property + def iloc(self) -> "_iLocIndexer": + """ + Purely integer-location based indexing for selection by position. + + ``.iloc[]`` is primarily integer position based (from ``0`` to + ``length-1`` of the axis), but may also be used with a boolean + array. + + Allowed inputs are: + + - An integer, e.g. ``5``. + - A list or array of integers, e.g. ``[4, 3, 0]``. + - A slice object with ints, e.g. ``1:7``. + - A boolean array. + - A ``callable`` function with one argument (the calling Series or + DataFrame) and that returns valid output for indexing (one of the above). + This is useful in method chains, when you don't have a reference to the + calling object, but would like to base your selection on some value. + + ``.iloc`` will raise ``IndexError`` if a requested indexer is + out-of-bounds, except *slice* indexers which allow out-of-bounds + indexing (this conforms with python/numpy *slice* semantics). + + See more at :ref:`Selection by Position `. + + See Also + -------- + DataFrame.iat : Fast integer location scalar accessor. + DataFrame.loc : Purely label-location based indexer for selection by label. + Series.iloc : Purely integer-location based indexing for + selection by position. + + Examples + -------- + + >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, + ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, + ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }] + >>> df = pd.DataFrame(mydict) + >>> df + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 + + **Indexing just the rows** + + With a scalar integer. + + >>> type(df.iloc[0]) + + >>> df.iloc[0] + a 1 + b 2 + c 3 + d 4 + Name: 0, dtype: int64 + + With a list of integers. + + >>> df.iloc[[0]] + a b c d + 0 1 2 3 4 + >>> type(df.iloc[[0]]) + + + >>> df.iloc[[0, 1]] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + + With a `slice` object. + + >>> df.iloc[:3] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 + + With a boolean mask the same length as the index. + + >>> df.iloc[[True, False, True]] + a b c d + 0 1 2 3 4 + 2 1000 2000 3000 4000 + + With a callable, useful in method chains. The `x` passed + to the ``lambda`` is the DataFrame being sliced. This selects + the rows whose index label even. + + >>> df.iloc[lambda x: x.index % 2 == 0] + a b c d + 0 1 2 3 4 + 2 1000 2000 3000 4000 + + **Indexing both axes** + + You can mix the indexer types for the index and columns. Use ``:`` to + select the entire axis. + + With scalar integers. + + >>> df.iloc[0, 1] + 2 + + With lists of integers. + + >>> df.iloc[[0, 2], [1, 3]] + b d + 0 2 4 + 2 2000 4000 + + With `slice` objects. + + >>> df.iloc[1:3, 0:3] + a b c + 1 100 200 300 + 2 1000 2000 3000 + + With a boolean array whose length matches the columns. + + >>> df.iloc[:, [True, False, True, False]] + a c + 0 1 3 + 1 100 300 + 2 1000 3000 + + With a callable function that expects the Series or DataFrame. + + >>> df.iloc[:, lambda df: [0, 2]] + a c + 0 1 3 + 1 100 300 + 2 1000 3000 + """ + return _iLocIndexer("iloc", self) + + @property + def loc(self) -> "_LocIndexer": + """ + Access a group of rows and columns by label(s) or a boolean array. + + ``.loc[]`` is primarily label based, but may also be used with a + boolean array. + + Allowed inputs are: + + - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is + interpreted as a *label* of the index, and **never** as an + integer position along the index). + - A list or array of labels, e.g. ``['a', 'b', 'c']``. + - A slice object with labels, e.g. ``'a':'f'``. + + .. warning:: Note that contrary to usual python slices, **both** the + start and the stop are included + + - A boolean array of the same length as the axis being sliced, + e.g. ``[True, False, True]``. + - A ``callable`` function with one argument (the calling Series or + DataFrame) and that returns valid output for indexing (one of the above) + + See more at :ref:`Selection by Label ` + + Raises + ------ + KeyError + If any items are not found. + + See Also + -------- + DataFrame.at : Access a single value for a row/column label pair. + DataFrame.iloc : Access group of rows and columns by integer position(s). + DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the + Series/DataFrame. + Series.loc : Access group of values using labels. + + Examples + -------- + **Getting values** + + >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]], + ... index=['cobra', 'viper', 'sidewinder'], + ... columns=['max_speed', 'shield']) + >>> df + max_speed shield + cobra 1 2 + viper 4 5 + sidewinder 7 8 + + Single label. Note this returns the row as a Series. + + >>> df.loc['viper'] + max_speed 4 + shield 5 + Name: viper, dtype: int64 + + List of labels. Note using ``[[]]`` returns a DataFrame. + + >>> df.loc[['viper', 'sidewinder']] + max_speed shield + viper 4 5 + sidewinder 7 8 + + Single label for row and column + + >>> df.loc['cobra', 'shield'] + 2 + + Slice with labels for row and single label for column. As mentioned + above, note that both the start and stop of the slice are included. + + >>> df.loc['cobra':'viper', 'max_speed'] + cobra 1 + viper 4 + Name: max_speed, dtype: int64 + + Boolean list with the same length as the row axis + + >>> df.loc[[False, False, True]] + max_speed shield + sidewinder 7 8 + + Conditional that returns a boolean Series + + >>> df.loc[df['shield'] > 6] + max_speed shield + sidewinder 7 8 + + Conditional that returns a boolean Series with column labels specified + + >>> df.loc[df['shield'] > 6, ['max_speed']] + max_speed + sidewinder 7 + + Callable that returns a boolean Series + + >>> df.loc[lambda df: df['shield'] == 8] + max_speed shield + sidewinder 7 8 + + **Setting values** + + Set value for all items matching the list of labels + + >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50 + >>> df + max_speed shield + cobra 1 2 + viper 4 50 + sidewinder 7 50 + + Set value for an entire row + + >>> df.loc['cobra'] = 10 + >>> df + max_speed shield + cobra 10 10 + viper 4 50 + sidewinder 7 50 + + Set value for an entire column + + >>> df.loc[:, 'max_speed'] = 30 + >>> df + max_speed shield + cobra 30 10 + viper 30 50 + sidewinder 30 50 + + Set value for rows matching callable condition + + >>> df.loc[df['shield'] > 35] = 0 + >>> df + max_speed shield + cobra 30 10 + viper 0 0 + sidewinder 0 0 + + **Getting values on a DataFrame with an index that has integer labels** + + Another example using integers for the index + + >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]], + ... index=[7, 8, 9], columns=['max_speed', 'shield']) + >>> df + max_speed shield + 7 1 2 + 8 4 5 + 9 7 8 + + Slice with integer labels for rows. As mentioned above, note that both + the start and stop of the slice are included. + + >>> df.loc[7:9] + max_speed shield + 7 1 2 + 8 4 5 + 9 7 8 + + **Getting values with a MultiIndex** + + A number of examples using a DataFrame with a MultiIndex + + >>> tuples = [ + ... ('cobra', 'mark i'), ('cobra', 'mark ii'), + ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'), + ... ('viper', 'mark ii'), ('viper', 'mark iii') + ... ] + >>> index = pd.MultiIndex.from_tuples(tuples) + >>> values = [[12, 2], [0, 4], [10, 20], + ... [1, 4], [7, 1], [16, 36]] + >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index) + >>> df + max_speed shield + cobra mark i 12 2 + mark ii 0 4 + sidewinder mark i 10 20 + mark ii 1 4 + viper mark ii 7 1 + mark iii 16 36 + + Single label. Note this returns a DataFrame with a single index. + + >>> df.loc['cobra'] + max_speed shield + mark i 12 2 + mark ii 0 4 + + Single index tuple. Note this returns a Series. + + >>> df.loc[('cobra', 'mark ii')] + max_speed 0 + shield 4 + Name: (cobra, mark ii), dtype: int64 + + Single label for row and column. Similar to passing in a tuple, this + returns a Series. + + >>> df.loc['cobra', 'mark i'] + max_speed 12 + shield 2 + Name: (cobra, mark i), dtype: int64 + + Single tuple. Note using ``[[]]`` returns a DataFrame. + + >>> df.loc[[('cobra', 'mark ii')]] + max_speed shield + cobra mark ii 0 4 + + Single tuple for the index with a single label for the column + + >>> df.loc[('cobra', 'mark i'), 'shield'] + 2 + + Slice from index tuple to single label + + >>> df.loc[('cobra', 'mark i'):'viper'] + max_speed shield + cobra mark i 12 2 + mark ii 0 4 + sidewinder mark i 10 20 + mark ii 1 4 + viper mark ii 7 1 + mark iii 16 36 + + Slice from index tuple to index tuple + + >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')] + max_speed shield + cobra mark i 12 2 + mark ii 0 4 + sidewinder mark i 10 20 + mark ii 1 4 + viper mark ii 7 1 + """ + return _LocIndexer("loc", self) + + @property + def at(self) -> "_AtIndexer": + """ + Access a single value for a row/column label pair. + + Similar to ``loc``, in that both provide label-based lookups. Use + ``at`` if you only need to get or set a single value in a DataFrame + or Series. + + Raises + ------ + KeyError + If 'label' does not exist in DataFrame. + + See Also + -------- + DataFrame.iat : Access a single value for a row/column pair by integer + position. + DataFrame.loc : Access a group of rows and columns by label(s). + Series.at : Access a single value using a label. + + Examples + -------- + >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], + ... index=[4, 5, 6], columns=['A', 'B', 'C']) + >>> df + A B C + 4 0 2 3 + 5 0 4 1 + 6 10 20 30 + + Get value at specified row/column pair + + >>> df.at[4, 'B'] + 2 + + Set value at specified row/column pair + + >>> df.at[4, 'B'] = 10 + >>> df.at[4, 'B'] + 10 + + Get value within a Series + + >>> df.loc[5].at['B'] + 4 + """ + return _AtIndexer("at", self) + + @property + def iat(self) -> "_iAtIndexer": + """ + Access a single value for a row/column pair by integer position. + + Similar to ``iloc``, in that both provide integer-based lookups. Use + ``iat`` if you only need to get or set a single value in a DataFrame + or Series. + + Raises + ------ + IndexError + When integer position is out of bounds. + + See Also + -------- + DataFrame.at : Access a single value for a row/column label pair. + DataFrame.loc : Access a group of rows and columns by label(s). + DataFrame.iloc : Access a group of rows and columns by integer position(s). + + Examples + -------- + >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], + ... columns=['A', 'B', 'C']) + >>> df + A B C + 0 0 2 3 + 1 0 4 1 + 2 10 20 30 + + Get value at specified row/column pair + + >>> df.iat[1, 2] + 1 + + Set value at specified row/column pair + + >>> df.iat[1, 2] = 10 + >>> df.iat[1, 2] + 10 + + Get value within a series + + >>> df.loc[0].iat[1] + 2 + """ + return _iAtIndexer("iat", self) + + +class _NDFrameIndexer(_NDFrameIndexerBase): + _valid_types: str + axis = None + + def __call__(self, axis=None): + # we need to return a copy of ourselves + new_self = type(self)(self.name, self.obj) + + if axis is not None: + axis = self.obj._get_axis_number(axis) + new_self.axis = axis + return new_self + + # TODO: remove once geopandas no longer needs this + def __getitem__(self, key): + # Used in ix and downstream in geopandas _CoordinateIndexer + if type(key) is tuple: + # Note: we check the type exactly instead of with isinstance + # because NamedTuple is checked separately. + key = tuple(com.apply_if_callable(x, self.obj) for x in key) + try: + values = self.obj._get_value(*key) + except (KeyError, TypeError, InvalidIndexError, AttributeError): + # TypeError occurs here if the key has non-hashable entries, + # generally slice or list. + # TODO(ix): most/all of the TypeError cases here are for ix, + # so this check can be removed once ix is removed. + # The InvalidIndexError is only catched for compatibility + # with geopandas, see + # https://github.com/pandas-dev/pandas/issues/27258 + # TODO: The AttributeError is for IntervalIndex which + # incorrectly implements get_value, see + # https://github.com/pandas-dev/pandas/issues/27865 + pass + else: + if is_scalar(values): + return values + + return self._getitem_tuple(key) + else: + # we by definition only have the 0th axis + axis = self.axis or 0 + + key = com.apply_if_callable(key, self.obj) + return self._getitem_axis(key, axis=axis) + + def _get_label(self, label, axis: int): + if self.ndim == 1: + # for perf reasons we want to try _xs first + # as its basically direct indexing + # but will fail when the index is not present + # see GH5667 + return self.obj._xs(label, axis=axis) + elif isinstance(label, tuple) and isinstance(label[axis], slice): + raise IndexingError("no slices here, handle elsewhere") + + return self.obj._xs(label, axis=axis) + + def _get_loc(self, key: int, axis: int): + return self.obj._ixs(key, axis=axis) + + def _slice(self, obj, axis: int, kind=None): + return self.obj._slice(obj, axis=axis, kind=kind) + + def _get_setitem_indexer(self, key): + if self.axis is not None: + return self._convert_tuple(key) + + ax = self.obj._get_axis(0) + + if isinstance(ax, ABCMultiIndex) and self.name != "iloc": + try: + return ax.get_loc(key) + except (TypeError, KeyError, InvalidIndexError): + # TypeError e.g. passed a bool + pass + + if isinstance(key, tuple): + try: + return self._convert_tuple(key) + except IndexingError: + pass + + if isinstance(key, range): + return list(key) + + axis = self.axis or 0 + try: + return self._convert_to_indexer(key, axis=axis) + except TypeError as e: + + # invalid indexer type vs 'other' indexing errors + if "cannot do" in str(e): + raise + raise IndexingError(key) + + def __setitem__(self, key, value): + if isinstance(key, tuple): + key = tuple(com.apply_if_callable(x, self.obj) for x in key) + else: + key = com.apply_if_callable(key, self.obj) + indexer = self._get_setitem_indexer(key) + self._setitem_with_indexer(indexer, value) + + def _validate_key(self, key, axis: int): + """ + Ensure that key is valid for current indexer. + + Parameters + ---------- + key : scalar, slice or list-like + Key requested. + axis : int + Dimension on which the indexing is being made. + + Raises + ------ + TypeError + If the key (or some element of it) has wrong type. + IndexError + If the key (or some element of it) is out of bounds. + KeyError + If the key was not found. + """ + raise AbstractMethodError(self) + + def _has_valid_tuple(self, key: Tuple): + """ + Check the key for valid keys across my indexer. + """ + for i, k in enumerate(key): + if i >= self.ndim: + raise IndexingError("Too many indexers") + try: + self._validate_key(k, i) + except ValueError: + raise ValueError( + "Location based indexing can only have " + f"[{self._valid_types}] types" + ) + + def _is_nested_tuple_indexer(self, tup: Tuple) -> bool: + """ + Returns + ------- + bool + """ + if any(isinstance(ax, ABCMultiIndex) for ax in self.obj.axes): + return any(is_nested_tuple(tup, ax) for ax in self.obj.axes) + return False + + def _convert_tuple(self, key): + keyidx = [] + if self.axis is not None: + axis = self.obj._get_axis_number(self.axis) + for i in range(self.ndim): + if i == axis: + keyidx.append(self._convert_to_indexer(key, axis=axis)) + else: + keyidx.append(slice(None)) + else: + for i, k in enumerate(key): + if i >= self.ndim: + raise IndexingError("Too many indexers") + idx = self._convert_to_indexer(k, axis=i) + keyidx.append(idx) + return tuple(keyidx) + + def _convert_scalar_indexer(self, key, axis: int): + # if we are accessing via lowered dim, use the last dim + ax = self.obj._get_axis(min(axis, self.ndim - 1)) + # a scalar + return ax._convert_scalar_indexer(key, kind=self.name) + + def _convert_slice_indexer(self, key: slice, axis: int): + # if we are accessing via lowered dim, use the last dim + ax = self.obj._get_axis(min(axis, self.ndim - 1)) + return ax._convert_slice_indexer(key, kind=self.name) + + def _has_valid_setitem_indexer(self, indexer) -> bool: + return True + + def _has_valid_positional_setitem_indexer(self, indexer) -> bool: + """ + Validate that a positional indexer cannot enlarge its target + will raise if needed, does not modify the indexer externally. + + Returns + ------- + bool + """ + if isinstance(indexer, dict): + raise IndexError(f"{self.name} cannot enlarge its target object") + else: + if not isinstance(indexer, tuple): + indexer = _tuplify(self.ndim, indexer) + for ax, i in zip(self.obj.axes, indexer): + if isinstance(i, slice): + # should check the stop slice? + pass + elif is_list_like_indexer(i): + # should check the elements? + pass + elif is_integer(i): + if i >= len(ax): + raise IndexError( + f"{self.name} cannot enlarge its target object" + ) + elif isinstance(i, dict): + raise IndexError(f"{self.name} cannot enlarge its target object") + + return True + + def _setitem_with_indexer(self, indexer, value): + self._has_valid_setitem_indexer(indexer) + + # also has the side effect of consolidating in-place + from pandas import Series + + info_axis = self.obj._info_axis_number + + # maybe partial set + take_split_path = self.obj._is_mixed_type + + # if there is only one block/type, still have to take split path + # unless the block is one-dimensional or it can hold the value + if not take_split_path and self.obj._data.blocks: + (blk,) = self.obj._data.blocks + if 1 < blk.ndim: # in case of dict, keys are indices + val = list(value.values()) if isinstance(value, dict) else value + take_split_path = not blk._can_hold_element(val) + + # if we have any multi-indexes that have non-trivial slices + # (not null slices) then we must take the split path, xref + # GH 10360, GH 27841 + if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes): + for i, ax in zip(indexer, self.obj.axes): + if isinstance(ax, ABCMultiIndex) and not ( + is_integer(i) or com.is_null_slice(i) + ): + take_split_path = True + break + + if isinstance(indexer, tuple): + nindexer = [] + for i, idx in enumerate(indexer): + if isinstance(idx, dict): + + # reindex the axis to the new value + # and set inplace + key, _ = convert_missing_indexer(idx) + + # if this is the items axes, then take the main missing + # path first + # this correctly sets the dtype and avoids cache issues + # essentially this separates out the block that is needed + # to possibly be modified + if self.ndim > 1 and i == self.obj._info_axis_number: + + # add the new item, and set the value + # must have all defined axes if we have a scalar + # or a list-like on the non-info axes if we have a + # list-like + len_non_info_axes = ( + len(_ax) for _i, _ax in enumerate(self.obj.axes) if _i != i + ) + if any(not l for l in len_non_info_axes): + if not is_list_like_indexer(value): + raise ValueError( + "cannot set a frame with no " + "defined index and a scalar" + ) + self.obj[key] = value + return self.obj + + # add a new item with the dtype setup + self.obj[key] = _infer_fill_value(value) + + new_indexer = convert_from_missing_indexer_tuple( + indexer, self.obj.axes + ) + self._setitem_with_indexer(new_indexer, value) + + return self.obj + + # reindex the axis + # make sure to clear the cache because we are + # just replacing the block manager here + # so the object is the same + index = self.obj._get_axis(i) + labels = index.insert(len(index), key) + self.obj._data = self.obj.reindex(labels, axis=i)._data + self.obj._maybe_update_cacher(clear=True) + self.obj._is_copy = None + + nindexer.append(labels.get_loc(key)) + + else: + nindexer.append(idx) + + indexer = tuple(nindexer) + else: + + indexer, missing = convert_missing_indexer(indexer) + + if missing: + return self._setitem_with_indexer_missing(indexer, value) + + # set + item_labels = self.obj._get_axis(info_axis) + + # align and set the values + if take_split_path: + # Above we only set take_split_path to True for 2D cases + assert self.ndim == 2 + assert info_axis == 1 + + if not isinstance(indexer, tuple): + indexer = _tuplify(self.ndim, indexer) + + if isinstance(value, ABCSeries): + value = self._align_series(indexer, value) + + info_idx = indexer[info_axis] + if is_integer(info_idx): + info_idx = [info_idx] + labels = item_labels[info_idx] + + # if we have a partial multiindex, then need to adjust the plane + # indexer here + if len(labels) == 1 and isinstance( + self.obj[labels[0]].axes[0], ABCMultiIndex + ): + item = labels[0] + obj = self.obj[item] + index = obj.index + idx = indexer[:info_axis][0] + + plane_indexer = tuple([idx]) + indexer[info_axis + 1 :] + lplane_indexer = length_of_indexer(plane_indexer[0], index) + + # require that we are setting the right number of values that + # we are indexing + if ( + is_list_like_indexer(value) + and np.iterable(value) + and lplane_indexer != len(value) + ): + + if len(obj[idx]) != len(value): + raise ValueError( + "cannot set using a multi-index " + "selection indexer with a different " + "length than the value" + ) + + # make sure we have an ndarray + value = getattr(value, "values", value).ravel() + + # we can directly set the series here + # as we select a slice indexer on the mi + if isinstance(idx, slice): + idx = index._convert_slice_indexer(idx) + obj._consolidate_inplace() + obj = obj.copy() + obj._data = obj._data.setitem(indexer=tuple([idx]), value=value) + self.obj[item] = obj + return + + # non-mi + else: + plane_indexer = indexer[:info_axis] + indexer[info_axis + 1 :] + plane_axis = self.obj.axes[:info_axis][0] + lplane_indexer = length_of_indexer(plane_indexer[0], plane_axis) + + def setter(item, v): + s = self.obj[item] + pi = plane_indexer[0] if lplane_indexer == 1 else plane_indexer + + # perform the equivalent of a setitem on the info axis + # as we have a null slice or a slice with full bounds + # which means essentially reassign to the columns of a + # multi-dim object + # GH6149 (null slice), GH10408 (full bounds) + if isinstance(pi, tuple) and all( + com.is_null_slice(idx) or com.is_full_slice(idx, len(self.obj)) + for idx in pi + ): + s = v + else: + # set the item, possibly having a dtype change + s._consolidate_inplace() + s = s.copy() + s._data = s._data.setitem(indexer=pi, value=v) + s._maybe_update_cacher(clear=True) + + # reset the sliced object if unique + self.obj[item] = s + + # we need an iterable, with a ndim of at least 1 + # eg. don't pass through np.array(0) + if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0: + + # we have an equal len Frame + if isinstance(value, ABCDataFrame): + sub_indexer = list(indexer) + multiindex_indexer = isinstance(labels, ABCMultiIndex) + + for item in labels: + if item in value: + sub_indexer[info_axis] = item + v = self._align_series( + tuple(sub_indexer), value[item], multiindex_indexer + ) + else: + v = np.nan + + setter(item, v) + + # we have an equal len ndarray/convertible to our labels + # hasattr first, to avoid coercing to ndarray without reason. + # But we may be relying on the ndarray coercion to check ndim. + # Why not just convert to an ndarray earlier on if needed? + elif np.ndim(value) == 2: + + # note that this coerces the dtype if we are mixed + # GH 7551 + value = np.array(value, dtype=object) + if len(labels) != value.shape[1]: + raise ValueError( + "Must have equal len keys and value " + "when setting with an ndarray" + ) + + for i, item in enumerate(labels): + + # setting with a list, recoerces + setter(item, value[:, i].tolist()) + + # we have an equal len list/ndarray + elif _can_do_equal_len( + labels, value, plane_indexer, lplane_indexer, self.obj + ): + setter(labels[0], value) + + # per label values + else: + + if len(labels) != len(value): + raise ValueError( + "Must have equal len keys and value " + "when setting with an iterable" + ) + + for item, v in zip(labels, value): + setter(item, v) + else: + + # scalar + for item in labels: + setter(item, value) + + else: + if isinstance(indexer, tuple): + indexer = maybe_convert_ix(*indexer) + + # if we are setting on the info axis ONLY + # set using those methods to avoid block-splitting + # logic here + if ( + len(indexer) > info_axis + and is_integer(indexer[info_axis]) + and all( + com.is_null_slice(idx) + for i, idx in enumerate(indexer) + if i != info_axis + ) + and item_labels.is_unique + ): + self.obj[item_labels[indexer[info_axis]]] = value + return + + if isinstance(value, (ABCSeries, dict)): + # TODO(EA): ExtensionBlock.setitem this causes issues with + # setting for extensionarrays that store dicts. Need to decide + # if it's worth supporting that. + value = self._align_series(indexer, Series(value)) + + elif isinstance(value, ABCDataFrame): + value = self._align_frame(indexer, value) + + # check for chained assignment + self.obj._check_is_chained_assignment_possible() + + # actually do the set + self.obj._consolidate_inplace() + self.obj._data = self.obj._data.setitem(indexer=indexer, value=value) + self.obj._maybe_update_cacher(clear=True) + + def _setitem_with_indexer_missing(self, indexer, value): + """ + Insert new row(s) or column(s) into the Series or DataFrame. + """ + from pandas import Series + + # reindex the axis to the new value + # and set inplace + if self.ndim == 1: + index = self.obj.index + new_index = index.insert(len(index), indexer) + + # we have a coerced indexer, e.g. a float + # that matches in an Int64Index, so + # we will not create a duplicate index, rather + # index to that element + # e.g. 0.0 -> 0 + # GH#12246 + if index.is_unique: + new_indexer = index.get_indexer([new_index[-1]]) + if (new_indexer != -1).any(): + return self._setitem_with_indexer(new_indexer, value) + + # this preserves dtype of the value + new_values = Series([value])._values + if len(self.obj._values): + # GH#22717 handle casting compatibility that np.concatenate + # does incorrectly + new_values = concat_compat([self.obj._values, new_values]) + self.obj._data = self.obj._constructor( + new_values, index=new_index, name=self.obj.name + )._data + self.obj._maybe_update_cacher(clear=True) + return self.obj + + elif self.ndim == 2: + + if not len(self.obj.columns): + # no columns and scalar + raise ValueError("cannot set a frame with no defined columns") + + if isinstance(value, ABCSeries): + # append a Series + value = value.reindex(index=self.obj.columns, copy=True) + value.name = indexer + + else: + # a list-list + if is_list_like_indexer(value): + # must have conforming columns + if len(value) != len(self.obj.columns): + raise ValueError("cannot set a row with mismatched columns") + + value = Series(value, index=self.obj.columns, name=indexer) + + self.obj._data = self.obj.append(value)._data + self.obj._maybe_update_cacher(clear=True) + return self.obj + + def _align_series(self, indexer, ser: ABCSeries, multiindex_indexer: bool = False): + """ + Parameters + ---------- + indexer : tuple, slice, scalar + Indexer used to get the locations that will be set to `ser`. + ser : pd.Series + Values to assign to the locations specified by `indexer`. + multiindex_indexer : boolean, optional + Defaults to False. Should be set to True if `indexer` was from + a `pd.MultiIndex`, to avoid unnecessary broadcasting. + + Returns + ------- + `np.array` of `ser` broadcast to the appropriate shape for assignment + to the locations selected by `indexer` + """ + if isinstance(indexer, (slice, np.ndarray, list, Index)): + indexer = tuple([indexer]) + + if isinstance(indexer, tuple): + + # flatten np.ndarray indexers + def ravel(i): + return i.ravel() if isinstance(i, np.ndarray) else i + + indexer = tuple(map(ravel, indexer)) + + aligners = [not com.is_null_slice(idx) for idx in indexer] + sum_aligners = sum(aligners) + single_aligner = sum_aligners == 1 + is_frame = self.ndim == 2 + obj = self.obj + + # are we a single alignable value on a non-primary + # dim (e.g. panel: 1,2, or frame: 0) ? + # hence need to align to a single axis dimension + # rather that find all valid dims + + # frame + if is_frame: + single_aligner = single_aligner and aligners[0] + + # we have a frame, with multiple indexers on both axes; and a + # series, so need to broadcast (see GH5206) + if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer): + ser = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values + + # single indexer + if len(indexer) > 1 and not multiindex_indexer: + len_indexer = len(indexer[1]) + ser = np.tile(ser, len_indexer).reshape(len_indexer, -1).T + + return ser + + for i, idx in enumerate(indexer): + ax = obj.axes[i] + + # multiple aligners (or null slices) + if is_sequence(idx) or isinstance(idx, slice): + if single_aligner and com.is_null_slice(idx): + continue + new_ix = ax[idx] + if not is_list_like_indexer(new_ix): + new_ix = Index([new_ix]) + else: + new_ix = Index(new_ix) + if ser.index.equals(new_ix) or not len(new_ix): + return ser._values.copy() + + return ser.reindex(new_ix)._values + + # 2 dims + elif single_aligner: + + # reindex along index + ax = self.obj.axes[1] + if ser.index.equals(ax) or not len(ax): + return ser._values.copy() + return ser.reindex(ax)._values + + elif is_scalar(indexer): + ax = self.obj._get_axis(1) + + if ser.index.equals(ax): + return ser._values.copy() + + return ser.reindex(ax)._values + + raise ValueError("Incompatible indexer with Series") + + def _align_frame(self, indexer, df: ABCDataFrame): + is_frame = self.ndim == 2 + + if isinstance(indexer, tuple): + + idx, cols = None, None + sindexers = [] + for i, ix in enumerate(indexer): + ax = self.obj.axes[i] + if is_sequence(ix) or isinstance(ix, slice): + if isinstance(ix, np.ndarray): + ix = ix.ravel() + if idx is None: + idx = ax[ix] + elif cols is None: + cols = ax[ix] + else: + break + else: + sindexers.append(i) + + if idx is not None and cols is not None: + + if df.index.equals(idx) and df.columns.equals(cols): + val = df.copy()._values + else: + val = df.reindex(idx, columns=cols)._values + return val + + elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame: + ax = self.obj.index[indexer] + if df.index.equals(ax): + val = df.copy()._values + else: + + # we have a multi-index and are trying to align + # with a particular, level GH3738 + if ( + isinstance(ax, ABCMultiIndex) + and isinstance(df.index, ABCMultiIndex) + and ax.nlevels != df.index.nlevels + ): + raise TypeError( + "cannot align on a multi-index with out " + "specifying the join levels" + ) + + val = df.reindex(index=ax)._values + return val + + raise ValueError("Incompatible indexer with DataFrame") + + def _getitem_tuple(self, tup: Tuple): + try: + return self._getitem_lowerdim(tup) + except IndexingError: + pass + + # no multi-index, so validate all of the indexers + self._has_valid_tuple(tup) + + # ugly hack for GH #836 + if self._multi_take_opportunity(tup): + return self._multi_take(tup) + + # no shortcut needed + retval = self.obj + for i, key in enumerate(tup): + if com.is_null_slice(key): + continue + + retval = getattr(retval, self.name)._getitem_axis(key, axis=i) + + return retval + + def _multi_take_opportunity(self, tup: Tuple) -> bool: + """ + Check whether there is the possibility to use ``_multi_take``. + + Currently the limit is that all axes being indexed, must be indexed with + list-likes. + + Parameters + ---------- + tup : tuple + Tuple of indexers, one per axis. + + Returns + ------- + bool + Whether the current indexing, + can be passed through `_multi_take`. + """ + if not all(is_list_like_indexer(x) for x in tup): + return False + + # just too complicated + if any(com.is_bool_indexer(x) for x in tup): + return False + + return True + + def _multi_take(self, tup: Tuple): + """ + Create the indexers for the passed tuple of keys, and + executes the take operation. This allows the take operation to be + executed all at once, rather than once for each dimension. + Improving efficiency. + + Parameters + ---------- + tup : tuple + Tuple of indexers, one per axis. + + Returns + ------- + values: same type as the object being indexed + """ + # GH 836 + o = self.obj + d = { + axis: self._get_listlike_indexer(key, axis) + for (key, axis) in zip(tup, o._AXIS_ORDERS) + } + return o._reindex_with_indexers(d, copy=True, allow_dups=True) + + def _convert_for_reindex(self, key, axis: int): + return key + + def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): + # we have an axis0 multi-index, handle or raise + axis = self.axis or 0 + try: + # fast path for series or for tup devoid of slices + return self._get_label(tup, axis=axis) + except TypeError: + # slices are unhashable + pass + except KeyError as ek: + # raise KeyError if number of indexers match + # else IndexingError will be raised + if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim: + raise ek + + return None + + def _getitem_lowerdim(self, tup: Tuple): + + # we can directly get the axis result since the axis is specified + if self.axis is not None: + axis = self.obj._get_axis_number(self.axis) + return self._getitem_axis(tup, axis=axis) + + # we may have a nested tuples indexer here + if self._is_nested_tuple_indexer(tup): + return self._getitem_nested_tuple(tup) + + # we maybe be using a tuple to represent multiple dimensions here + ax0 = self.obj._get_axis(0) + # ...but iloc should handle the tuple as simple integer-location + # instead of checking it as multiindex representation (GH 13797) + if isinstance(ax0, ABCMultiIndex) and self.name != "iloc": + result = self._handle_lowerdim_multi_index_axis0(tup) + if result is not None: + return result + + if len(tup) > self.ndim: + raise IndexingError("Too many indexers. handle elsewhere") + + for i, key in enumerate(tup): + if is_label_like(key) or isinstance(key, tuple): + section = self._getitem_axis(key, axis=i) + + # we have yielded a scalar ? + if not is_list_like_indexer(section): + return section + + elif section.ndim == self.ndim: + # we're in the middle of slicing through a MultiIndex + # revise the key wrt to `section` by inserting an _NS + new_key = tup[:i] + (_NS,) + tup[i + 1 :] + + else: + new_key = tup[:i] + tup[i + 1 :] + + # unfortunately need an odious kludge here because of + # DataFrame transposing convention + if ( + isinstance(section, ABCDataFrame) + and i > 0 + and len(new_key) == 2 + ): + a, b = new_key + new_key = b, a + + if len(new_key) == 1: + new_key = new_key[0] + + # Slices should return views, but calling iloc/loc with a null + # slice returns a new object. + if com.is_null_slice(new_key): + return section + # This is an elided recursive call to iloc/loc/etc' + return getattr(section, self.name)[new_key] + + raise IndexingError("not applicable") + + def _getitem_nested_tuple(self, tup: Tuple): + # we have a nested tuple so have at least 1 multi-index level + # we should be able to match up the dimensionality here + + # we have too many indexers for our dim, but have at least 1 + # multi-index dimension, try to see if we have something like + # a tuple passed to a series with a multi-index + if len(tup) > self.ndim: + result = self._handle_lowerdim_multi_index_axis0(tup) + if result is not None: + return result + + # this is a series with a multi-index specified a tuple of + # selectors + axis = self.axis or 0 + return self._getitem_axis(tup, axis=axis) + + # handle the multi-axis by taking sections and reducing + # this is iterative + obj = self.obj + axis = 0 + for i, key in enumerate(tup): + + if com.is_null_slice(key): + axis += 1 + continue + + current_ndim = obj.ndim + obj = getattr(obj, self.name)._getitem_axis(key, axis=axis) + axis += 1 + + # if we have a scalar, we are done + if is_scalar(obj) or not hasattr(obj, "ndim"): + break + + # has the dim of the obj changed? + # GH 7199 + if obj.ndim < current_ndim: + axis -= 1 + + return obj + + # TODO: remove once geopandas no longer needs __getitem__ + def _getitem_axis(self, key, axis: int): + if is_iterator(key): + key = list(key) + self._validate_key(key, axis) + + labels = self.obj._get_axis(axis) + if isinstance(key, slice): + return self._get_slice_axis(key, axis=axis) + elif is_list_like_indexer(key) and not ( + isinstance(key, tuple) and isinstance(labels, ABCMultiIndex) + ): + + if hasattr(key, "ndim") and key.ndim > 1: + raise ValueError("Cannot index with multidimensional key") + + return self._getitem_iterable(key, axis=axis) + else: + + # maybe coerce a float scalar to integer + key = labels._maybe_cast_indexer(key) + + if is_integer(key): + if axis == 0 and isinstance(labels, ABCMultiIndex): + try: + return self._get_label(key, axis=axis) + except (KeyError, TypeError): + if self.obj.index.levels[0].is_integer(): + raise + + # this is the fallback! (for a non-float, non-integer index) + if not labels.is_floating() and not labels.is_integer(): + return self._get_loc(key, axis=axis) + + return self._get_label(key, axis=axis) + + def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False): + """ + Transform a list-like of keys into a new index and an indexer. + + Parameters + ---------- + key : list-like + Targeted labels. + axis: int + Dimension on which the indexing is being made. + raise_missing: bool, default False + Whether to raise a KeyError if some labels were not found. + Will be removed in the future, and then this method will always behave as + if ``raise_missing=True``. + + Raises + ------ + KeyError + If at least one key was requested but none was found, and + raise_missing=True. + + Returns + ------- + keyarr: Index + New index (coinciding with 'key' if the axis is unique). + values : array-like + Indexer for the return object, -1 denotes keys not found. + """ + o = self.obj + ax = o._get_axis(axis) + + # Have the index compute an indexer or return None + # if it cannot handle: + indexer, keyarr = ax._convert_listlike_indexer(key, kind=self.name) + # We only act on all found values: + if indexer is not None and (indexer != -1).all(): + self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing) + return ax[indexer], indexer + + if ax.is_unique and not getattr(ax, "is_overlapping", False): + # If we are trying to get actual keys from empty Series, we + # patiently wait for a KeyError later on - otherwise, convert + if len(ax) or not len(key): + key = self._convert_for_reindex(key, axis) + indexer = ax.get_indexer_for(key) + keyarr = ax.reindex(keyarr)[0] + else: + keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr) + + self._validate_read_indexer( + keyarr, indexer, o._get_axis_number(axis), raise_missing=raise_missing + ) + return keyarr, indexer + + def _getitem_iterable(self, key, axis: int): + """ + Index current object with an an iterable key. + + The iterable key can be a boolean indexer or a collection of keys. + + Parameters + ---------- + key : iterable + Targeted labels or boolean indexer. + axis: int + Dimension on which the indexing is being made. + + Raises + ------ + KeyError + If no key was found. Will change in the future to raise if not all + keys were found. + IndexingError + If the boolean indexer is unalignable with the object being + indexed. + + Returns + ------- + scalar, DataFrame, or Series: indexed value(s). + """ + # caller is responsible for ensuring non-None axis + self._validate_key(key, axis) + + labels = self.obj._get_axis(axis) + + if com.is_bool_indexer(key): + # A boolean indexer + key = check_bool_indexer(labels, key) + (inds,) = key.nonzero() + return self.obj._take_with_is_copy(inds, axis=axis) + else: + # A collection of keys + keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False) + return self.obj._reindex_with_indexers( + {axis: [keyarr, indexer]}, copy=True, allow_dups=True + ) + + def _validate_read_indexer( + self, key, indexer, axis: int, raise_missing: bool = False + ): + """ + Check that indexer can be used to return a result. + + e.g. at least one element was found, + unless the list of keys was actually empty. + + Parameters + ---------- + key : list-like + Targeted labels (only used to show correct error message). + indexer: array-like of booleans + Indices corresponding to the key, + (with -1 indicating not found). + axis: int + Dimension on which the indexing is being made. + raise_missing: bool + Whether to raise a KeyError if some labels are not found. Will be + removed in the future, and then this method will always behave as + if raise_missing=True. + + Raises + ------ + KeyError + If at least one key was requested but none was found, and + raise_missing=True. + """ + ax = self.obj._get_axis(axis) + + if len(key) == 0: + return + + # Count missing values: + missing = (indexer < 0).sum() + + if missing: + if missing == len(indexer): + axis_name = self.obj._get_axis_name(axis) + raise KeyError(f"None of [{key}] are in the [{axis_name}]") + + # We (temporarily) allow for some missing keys with .loc, except in + # some cases (e.g. setting) in which "raise_missing" will be False + if not (self.name == "loc" and not raise_missing): + not_found = list(set(key) - set(ax)) + raise KeyError(f"{not_found} not in index") + + # we skip the warning on Categorical/Interval + # as this check is actually done (check for + # non-missing values), but a bit later in the + # code, so we want to avoid warning & then + # just raising + if not (ax.is_categorical() or ax.is_interval()): + raise KeyError( + "Passing list-likes to .loc or [] with any missing labels " + "is no longer supported, see " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501 + ) + + def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False): + """ + Convert indexing key into something we can use to do actual fancy + indexing on a ndarray. + + Examples + ix[:5] -> slice(0, 5) + ix[[1,2,3]] -> [1,2,3] + ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz) + + Going by Zen of Python? + 'In the face of ambiguity, refuse the temptation to guess.' + raise AmbiguousIndexError with integer labels? + - No, prefer label-based indexing + """ + labels = self.obj._get_axis(axis) + + if isinstance(obj, slice): + return self._convert_slice_indexer(obj, axis) + + # try to find out correct indexer, if not type correct raise + try: + obj = self._convert_scalar_indexer(obj, axis) + except TypeError: + # but we will allow setting + pass + + # see if we are positional in nature + is_int_index = labels.is_integer() + is_int_positional = is_integer(obj) and not is_int_index + + # if we are a label return me + try: + return labels.get_loc(obj) + except LookupError: + if isinstance(obj, tuple) and isinstance(labels, ABCMultiIndex): + if len(obj) == labels.nlevels: + return {"key": obj} + raise + except TypeError: + pass + except ValueError: + if not is_int_positional: + raise + + # a positional + if is_int_positional: + + # if we are setting and its not a valid location + # its an insert which fails by definition + + if self.name == "loc": + # always valid + return {"key": obj} + + if obj >= self.obj.shape[axis] and not isinstance(labels, ABCMultiIndex): + # a positional + raise ValueError("cannot set by positional indexing with enlargement") + + return obj + + if is_nested_tuple(obj, labels): + return labels.get_locs(obj) + + elif is_list_like_indexer(obj): + + if com.is_bool_indexer(obj): + obj = check_bool_indexer(labels, obj) + (inds,) = obj.nonzero() + return inds + else: + # When setting, missing keys are not allowed, even with .loc: + return self._get_listlike_indexer(obj, axis, raise_missing=True)[1] + else: + try: + return labels.get_loc(obj) + except LookupError: + # allow a not found key only if we are a setter + if not is_list_like_indexer(obj): + return {"key": obj} + raise + + def _get_slice_axis(self, slice_obj: slice, axis: int): + # caller is responsible for ensuring non-None axis + obj = self.obj + + if not need_slice(slice_obj): + return obj.copy(deep=False) + + indexer = self._convert_slice_indexer(slice_obj, axis) + return self._slice(indexer, axis=axis, kind="iloc") + + +class _LocationIndexer(_NDFrameIndexer): + def __getitem__(self, key): + if type(key) is tuple: + key = tuple(com.apply_if_callable(x, self.obj) for x in key) + if self._is_scalar_access(key): + try: + return self._getitem_scalar(key) + except (KeyError, IndexError, AttributeError): + pass + return self._getitem_tuple(key) + else: + # we by definition only have the 0th axis + axis = self.axis or 0 + + maybe_callable = com.apply_if_callable(key, self.obj) + return self._getitem_axis(maybe_callable, axis=axis) + + def _is_scalar_access(self, key: Tuple): + raise NotImplementedError() + + def _getitem_scalar(self, key): + raise NotImplementedError() + + def _getitem_axis(self, key, axis: int): + raise NotImplementedError() + + def _getbool_axis(self, key, axis: int): + # caller is responsible for ensuring non-None axis + labels = self.obj._get_axis(axis) + key = check_bool_indexer(labels, key) + inds = key.nonzero()[0] + return self.obj._take_with_is_copy(inds, axis=axis) + + def _get_slice_axis(self, slice_obj: slice, axis: int): + """ + This is pretty simple as we just have to deal with labels. + """ + # caller is responsible for ensuring non-None axis + obj = self.obj + if not need_slice(slice_obj): + return obj.copy(deep=False) + + labels = obj._get_axis(axis) + indexer = labels.slice_indexer( + slice_obj.start, slice_obj.stop, slice_obj.step, kind=self.name + ) + + if isinstance(indexer, slice): + return self._slice(indexer, axis=axis, kind="iloc") + else: + # DatetimeIndex overrides Index.slice_indexer and may + # return a DatetimeIndex instead of a slice object. + return self.obj._take_with_is_copy(indexer, axis=axis) + + +@Appender(IndexingMixin.loc.__doc__) +class _LocIndexer(_LocationIndexer): + _valid_types = ( + "labels (MUST BE IN THE INDEX), slices of labels (BOTH " + "endpoints included! Can be slices of integers if the " + "index is integers), listlike of labels, boolean" + ) + + @Appender(_NDFrameIndexer._validate_key.__doc__) + def _validate_key(self, key, axis: int): + + # valid for a collection of labels (we check their presence later) + # slice of labels (where start-end in labels) + # slice of integers (only if in the labels) + # boolean + + if isinstance(key, slice): + return + + if com.is_bool_indexer(key): + return + + if not is_list_like_indexer(key): + self._convert_scalar_indexer(key, axis) + + def _is_scalar_access(self, key: Tuple) -> bool: + """ + Returns + ------- + bool + """ + # this is a shortcut accessor to both .loc and .iloc + # that provide the equivalent access of .at and .iat + # a) avoid getting things via sections and (to minimize dtype changes) + # b) provide a performant path + if len(key) != self.ndim: + return False + + for i, k in enumerate(key): + if not is_scalar(k): + return False + + ax = self.obj.axes[i] + if isinstance(ax, ABCMultiIndex): + return False + + if isinstance(k, str) and ax._supports_partial_string_indexing: + # partial string indexing, df.loc['2000', 'A'] + # should not be considered scalar + return False + + if not ax.is_unique: + return False + + return True + + def _getitem_scalar(self, key): + # a fast-path to scalar access + # if not, raise + values = self.obj._get_value(*key) + return values + + def _get_partial_string_timestamp_match_key(self, key, labels): + """ + Translate any partial string timestamp matches in key, returning the + new key. + + (GH 10331) + """ + if isinstance(labels, ABCMultiIndex): + if ( + isinstance(key, str) + and labels.levels[0]._supports_partial_string_indexing + ): + # Convert key '2016-01-01' to + # ('2016-01-01'[, slice(None, None, None)]+) + key = tuple([key] + [slice(None)] * (len(labels.levels) - 1)) + + if isinstance(key, tuple): + # Convert (..., '2016-01-01', ...) in tuple to + # (..., slice('2016-01-01', '2016-01-01', None), ...) + new_key = [] + for i, component in enumerate(key): + if ( + isinstance(component, str) + and labels.levels[i]._supports_partial_string_indexing + ): + new_key.append(slice(component, component, None)) + else: + new_key.append(component) + key = tuple(new_key) + + return key + + def _getitem_axis(self, key, axis: int): + key = item_from_zerodim(key) + if is_iterator(key): + key = list(key) + + labels = self.obj._get_axis(axis) + key = self._get_partial_string_timestamp_match_key(key, labels) + + if isinstance(key, slice): + self._validate_key(key, axis) + return self._get_slice_axis(key, axis=axis) + elif com.is_bool_indexer(key): + return self._getbool_axis(key, axis=axis) + elif is_list_like_indexer(key): + + # convert various list-like indexers + # to a list of keys + # we will use the *values* of the object + # and NOT the index if its a PandasObject + if isinstance(labels, ABCMultiIndex): + + if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim <= 1: + # Series, or 0,1 ndim ndarray + # GH 14730 + key = list(key) + elif isinstance(key, ABCDataFrame): + # GH 15438 + raise NotImplementedError( + "Indexing a MultiIndex with a " + "DataFrame key is not " + "implemented" + ) + elif hasattr(key, "ndim") and key.ndim > 1: + raise NotImplementedError( + "Indexing a MultiIndex with a " + "multidimensional key is not " + "implemented" + ) + + if ( + not isinstance(key, tuple) + and len(key) + and not isinstance(key[0], tuple) + ): + key = tuple([key]) + + # an iterable multi-selection + if not (isinstance(key, tuple) and isinstance(labels, ABCMultiIndex)): + + if hasattr(key, "ndim") and key.ndim > 1: + raise ValueError("Cannot index with multidimensional key") + + return self._getitem_iterable(key, axis=axis) + + # nested tuple slicing + if is_nested_tuple(key, labels): + locs = labels.get_locs(key) + indexer = [slice(None)] * self.ndim + indexer[axis] = locs + return self.obj.iloc[tuple(indexer)] + + # fall thru to straight lookup + self._validate_key(key, axis) + return self._get_label(key, axis=axis) + + +@Appender(IndexingMixin.iloc.__doc__) +class _iLocIndexer(_LocationIndexer): + _valid_types = ( + "integer, integer slice (START point is INCLUDED, END " + "point is EXCLUDED), listlike of integers, boolean array" + ) + _get_slice_axis = _NDFrameIndexer._get_slice_axis + + def _validate_key(self, key, axis: int): + if com.is_bool_indexer(key): + if hasattr(key, "index") and isinstance(key.index, Index): + if key.index.inferred_type == "integer": + raise NotImplementedError( + "iLocation based boolean " + "indexing on an integer type " + "is not available" + ) + raise ValueError( + "iLocation based boolean indexing cannot use " + "an indexable as a mask" + ) + return + + if isinstance(key, slice): + return + elif is_integer(key): + self._validate_integer(key, axis) + elif isinstance(key, tuple): + # a tuple should already have been caught by this point + # so don't treat a tuple as a valid indexer + raise IndexingError("Too many indexers") + elif is_list_like_indexer(key): + arr = np.array(key) + len_axis = len(self.obj._get_axis(axis)) + + # check that the key has a numeric dtype + if not is_numeric_dtype(arr.dtype): + raise IndexError(f".iloc requires numeric indexers, got {arr}") + + # check that the key does not exceed the maximum size of the index + if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis): + raise IndexError("positional indexers are out-of-bounds") + else: + raise ValueError(f"Can only index by location with a [{self._valid_types}]") + + def _has_valid_setitem_indexer(self, indexer): + self._has_valid_positional_setitem_indexer(indexer) + + def _is_scalar_access(self, key: Tuple) -> bool: + """ + Returns + ------- + bool + """ + # this is a shortcut accessor to both .loc and .iloc + # that provide the equivalent access of .at and .iat + # a) avoid getting things via sections and (to minimize dtype changes) + # b) provide a performant path + if len(key) != self.ndim: + return False + + for i, k in enumerate(key): + if not is_integer(k): + return False + + ax = self.obj.axes[i] + if not ax.is_unique: + return False + + return True + + def _getitem_scalar(self, key): + # a fast-path to scalar access + # if not, raise + values = self.obj._get_value(*key, takeable=True) + return values + + def _validate_integer(self, key: int, axis: int) -> None: + """ + Check that 'key' is a valid position in the desired axis. + + Parameters + ---------- + key : int + Requested position. + axis : int + Desired axis. + + Raises + ------ + IndexError + If 'key' is not a valid position in axis 'axis'. + """ + len_axis = len(self.obj._get_axis(axis)) + if key >= len_axis or key < -len_axis: + raise IndexError("single positional indexer is out-of-bounds") + + def _getitem_tuple(self, tup: Tuple): + + self._has_valid_tuple(tup) + try: + return self._getitem_lowerdim(tup) + except IndexingError: + pass + + retval = self.obj + axis = 0 + for i, key in enumerate(tup): + if com.is_null_slice(key): + axis += 1 + continue + + retval = getattr(retval, self.name)._getitem_axis(key, axis=axis) + + # if the dim was reduced, then pass a lower-dim the next time + if retval.ndim < self.ndim: + # TODO: this is never reached in tests; can we confirm that + # it is impossible? + axis -= 1 + + # try to get for the next axis + axis += 1 + + return retval + + def _get_list_axis(self, key, axis: int): + """ + Return Series values by list or array of integers. + + Parameters + ---------- + key : list-like positional indexer + axis : int + + Returns + ------- + Series object + + Notes + ----- + `axis` can only be zero. + """ + try: + return self.obj._take_with_is_copy(key, axis=axis) + except IndexError: + # re-raise with different error message + raise IndexError("positional indexers are out-of-bounds") + + def _getitem_axis(self, key, axis: int): + if isinstance(key, slice): + return self._get_slice_axis(key, axis=axis) + + if isinstance(key, list): + key = np.asarray(key) + + if com.is_bool_indexer(key): + self._validate_key(key, axis) + return self._getbool_axis(key, axis=axis) + + # a list of integers + elif is_list_like_indexer(key): + return self._get_list_axis(key, axis=axis) + + # a single integer + else: + key = item_from_zerodim(key) + if not is_integer(key): + raise TypeError("Cannot index by location index with a non-integer key") + + # validate the location + self._validate_integer(key, axis) + + return self._get_loc(key, axis=axis) + + # raise_missing is included for compat with the parent class signature + def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False): + """ + Much simpler as we only have to deal with our valid types. + """ + # make need to convert a float key + if isinstance(obj, slice): + return self._convert_slice_indexer(obj, axis) + + elif is_float(obj): + return self._convert_scalar_indexer(obj, axis) + + try: + self._validate_key(obj, axis) + return obj + except ValueError: + raise ValueError(f"Can only index by location with a [{self._valid_types}]") + + +class _ScalarAccessIndexer(_NDFrameIndexerBase): + """ + Access scalars quickly. + """ + + def _convert_key(self, key, is_setter: bool = False): + raise AbstractMethodError(self) + + def __getitem__(self, key): + if not isinstance(key, tuple): + + # we could have a convertible item here (e.g. Timestamp) + if not is_list_like_indexer(key): + key = tuple([key]) + else: + raise ValueError("Invalid call for scalar access (getting)!") + + key = self._convert_key(key) + return self.obj._get_value(*key, takeable=self._takeable) + + def __setitem__(self, key, value): + if isinstance(key, tuple): + key = tuple(com.apply_if_callable(x, self.obj) for x in key) + else: + # scalar callable may return tuple + key = com.apply_if_callable(key, self.obj) + + if not isinstance(key, tuple): + key = _tuplify(self.ndim, key) + if len(key) != self.ndim: + raise ValueError("Not enough indexers for scalar access (setting)!") + key = list(self._convert_key(key, is_setter=True)) + key.append(value) + self.obj._set_value(*key, takeable=self._takeable) + + +@Appender(IndexingMixin.at.__doc__) +class _AtIndexer(_ScalarAccessIndexer): + _takeable = False + + def _convert_key(self, key, is_setter: bool = False): + """ + Require they keys to be the same type as the index. (so we don't + fallback) + """ + # allow arbitrary setting + if is_setter: + return list(key) + + for ax, i in zip(self.obj.axes, key): + if ax.is_integer(): + if not is_integer(i): + raise ValueError( + "At based indexing on an integer index " + "can only have integer indexers" + ) + else: + if is_integer(i) and not ax.holds_integer(): + raise ValueError( + "At based indexing on an non-integer " + "index can only have non-integer " + "indexers" + ) + return key + + +@Appender(IndexingMixin.iat.__doc__) +class _iAtIndexer(_ScalarAccessIndexer): + _takeable = True + + def _convert_key(self, key, is_setter: bool = False): + """ + Require integer args. (and convert to label arguments) + """ + for a, i in zip(self.obj.axes, key): + if not is_integer(i): + raise ValueError("iAt based indexing can only have integer indexers") + return key + + +def _tuplify(ndim: int, loc: Hashable) -> Tuple[Union[Hashable, slice], ...]: + """ + Given an indexer for the first dimension, create an equivalent tuple + for indexing over all dimensions. + + Parameters + ---------- + ndim : int + loc : object + + Returns + ------- + tuple + """ + _tup: List[Union[Hashable, slice]] + _tup = [slice(None, None) for _ in range(ndim)] + _tup[0] = loc + return tuple(_tup) + + +def convert_to_index_sliceable(obj, key): + """ + If we are index sliceable, then return my slicer, otherwise return None. + """ + idx = obj.index + if isinstance(key, slice): + return idx._convert_slice_indexer(key, kind="getitem") + + elif isinstance(key, str): + + # we are an actual column + if key in obj._data.items: + return None + + # We might have a datetimelike string that we can translate to a + # slice here via partial string indexing + if idx._supports_partial_string_indexing: + try: + return idx._get_string_slice(key) + except (KeyError, ValueError, NotImplementedError): + return None + + return None + + +def check_bool_indexer(index: Index, key) -> np.ndarray: + """ + Check if key is a valid boolean indexer for an object with such index and + perform reindexing or conversion if needed. + + This function assumes that is_bool_indexer(key) == True. + + Parameters + ---------- + index : Index + Index of the object on which the indexing is done. + key : list-like + Boolean indexer to check. + + Returns + ------- + np.array + Resulting key. + + Raises + ------ + IndexError + If the key does not have the same length as index. + IndexingError + If the index of the key is unalignable to index. + """ + result = key + if isinstance(key, ABCSeries) and not key.index.equals(index): + result = result.reindex(index) + mask = isna(result._values) + if mask.any(): + raise IndexingError( + "Unalignable boolean Series provided as " + "indexer (index of the boolean Series and of " + "the indexed object do not match)." + ) + result = result.astype(bool)._values + elif is_object_dtype(key): + # key might be object-dtype bool, check_array_indexer needs bool array + result = np.asarray(result, dtype=bool) + result = check_array_indexer(index, result) + else: + result = check_array_indexer(index, result) + + return result + + +def convert_missing_indexer(indexer): + """ + Reverse convert a missing indexer, which is a dict + return the scalar indexer and a boolean indicating if we converted + """ + if isinstance(indexer, dict): + + # a missing key (but not a tuple indexer) + indexer = indexer["key"] + + if isinstance(indexer, bool): + raise KeyError("cannot use a single bool to index into setitem") + return indexer, True + + return indexer, False + + +def convert_from_missing_indexer_tuple(indexer, axes): + """ + Create a filtered indexer that doesn't have any missing indexers. + """ + + def get_indexer(_i, _idx): + return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx + + return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer)) + + +def maybe_convert_ix(*args): + """ + We likely want to take the cross-product. + """ + ixify = True + for arg in args: + if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)): + ixify = False + + if ixify: + return np.ix_(*args) + else: + return args + + +def is_nested_tuple(tup, labels) -> bool: + """ + Returns + ------- + bool + """ + # check for a compatible nested tuple and multiindexes among the axes + if not isinstance(tup, tuple): + return False + + for i, k in enumerate(tup): + + if is_list_like(k) or isinstance(k, slice): + return isinstance(labels, ABCMultiIndex) + + return False + + +def is_label_like(key) -> bool: + """ + Returns + ------- + bool + """ + # select a label or row + return not isinstance(key, slice) and not is_list_like_indexer(key) + + +def need_slice(obj) -> bool: + """ + Returns + ------- + bool + """ + return ( + obj.start is not None + or obj.stop is not None + or (obj.step is not None and obj.step != 1) + ) + + +def _non_reducing_slice(slice_): + """ + Ensurse that a slice doesn't reduce to a Series or Scalar. + + Any user-paseed `subset` should have this called on it + to make sure we're always working with DataFrames. + """ + # default to column slice, like DataFrame + # ['A', 'B'] -> IndexSlices[:, ['A', 'B']] + kinds = (ABCSeries, np.ndarray, Index, list, str) + if isinstance(slice_, kinds): + slice_ = IndexSlice[:, slice_] + + def pred(part) -> bool: + """ + Returns + ------- + bool + True if slice does *not* reduce, + False if `part` is a tuple. + """ + # true when slice does *not* reduce, False when part is a tuple, + # i.e. MultiIndex slice + return (isinstance(part, slice) or is_list_like(part)) and not isinstance( + part, tuple + ) + + if not is_list_like(slice_): + if not isinstance(slice_, slice): + # a 1-d slice, like df.loc[1] + slice_ = [[slice_]] + else: + # slice(a, b, c) + slice_ = [slice_] # to tuplize later + else: + slice_ = [part if pred(part) else [part] for part in slice_] + return tuple(slice_) + + +def _maybe_numeric_slice(df, slice_, include_bool=False): + """ + Want nice defaults for background_gradient that don't break + with non-numeric data. But if slice_ is passed go with that. + """ + if slice_ is None: + dtypes = [np.number] + if include_bool: + dtypes.append(bool) + slice_ = IndexSlice[:, df.select_dtypes(include=dtypes).columns] + return slice_ + + +def _can_do_equal_len(labels, value, plane_indexer, lplane_indexer, obj) -> bool: + """ + Returns + ------- + bool + True if we have an equal len settable. + """ + if not len(labels) == 1 or not np.iterable(value) or is_scalar(plane_indexer[0]): + return False + + item = labels[0] + index = obj[item].index + + values_len = len(value) + # equal len list/ndarray + if len(index) == values_len: + return True + elif lplane_indexer == values_len: + return True + + return False diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py new file mode 100644 index 00000000..37a34055 --- /dev/null +++ b/pandas/core/internals/__init__.py @@ -0,0 +1,47 @@ +from pandas.core.internals.blocks import ( # io.pytables, io.packers + Block, + BoolBlock, + CategoricalBlock, + ComplexBlock, + DatetimeBlock, + DatetimeTZBlock, + ExtensionBlock, + FloatBlock, + IntBlock, + ObjectBlock, + TimeDeltaBlock, + _block_shape, + _safe_reshape, + make_block, +) +from pandas.core.internals.managers import ( + BlockManager, + SingleBlockManager, + _transform_index, + concatenate_block_managers, + create_block_manager_from_arrays, + create_block_manager_from_blocks, +) + +__all__ = [ + "Block", + "BoolBlock", + "CategoricalBlock", + "ComplexBlock", + "DatetimeBlock", + "DatetimeTZBlock", + "ExtensionBlock", + "FloatBlock", + "IntBlock", + "ObjectBlock", + "TimeDeltaBlock", + "_safe_reshape", + "make_block", + "_block_shape", + "BlockManager", + "SingleBlockManager", + "_transform_index", + "concatenate_block_managers", + "create_block_manager_from_arrays", + "create_block_manager_from_blocks", +] diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py new file mode 100644 index 00000000..317d3c30 --- /dev/null +++ b/pandas/core/internals/blocks.py @@ -0,0 +1,3219 @@ +from datetime import datetime, timedelta +import functools +import inspect +import re +from typing import Any, List +import warnings + +import numpy as np + +from pandas._libs import NaT, Timestamp, algos as libalgos, lib, tslib, writers +from pandas._libs.index import convert_scalar +import pandas._libs.internals as libinternals +from pandas._libs.tslibs import Timedelta, conversion +from pandas._libs.tslibs.timezones import tz_compare +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.cast import ( + astype_nansafe, + find_common_type, + infer_dtype_from, + infer_dtype_from_scalar, + maybe_downcast_numeric, + maybe_downcast_to_dtype, + maybe_infer_dtype_type, + maybe_promote, + maybe_upcast, + soft_convert_objects, +) +from pandas.core.dtypes.common import ( + _NS_DTYPE, + _TD_DTYPE, + ensure_platform_int, + is_bool_dtype, + is_categorical, + is_categorical_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_float_dtype, + is_integer, + is_integer_dtype, + is_interval_dtype, + is_list_like, + is_object_dtype, + is_period_dtype, + is_re, + is_re_compilable, + is_sparse, + is_timedelta64_dtype, + pandas_dtype, +) +from pandas.core.dtypes.concat import concat_categorical, concat_datetime +from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCExtensionArray, + ABCPandasArray, + ABCSeries, +) +from pandas.core.dtypes.missing import ( + _isna_compat, + array_equivalent, + is_valid_nat_for_dtype, + isna, +) + +import pandas.core.algorithms as algos +from pandas.core.arrays import ( + Categorical, + DatetimeArray, + ExtensionArray, + PandasArray, + PandasDtype, + TimedeltaArray, +) +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.construction import extract_array +from pandas.core.indexers import ( + check_setitem_lengths, + is_empty_indexer, + is_scalar_indexer, +) +import pandas.core.missing as missing +from pandas.core.nanops import nanpercentile + +from pandas.io.formats.printing import pprint_thing + + +class Block(PandasObject): + """ + Canonical n-dimensional unit of homogeneous dtype contained in a pandas + data structure + + Index-ignorant; let the container take care of that + """ + + __slots__ = ["_mgr_locs", "values", "ndim"] + is_numeric = False + is_float = False + is_integer = False + is_complex = False + is_datetime = False + is_datetimetz = False + is_timedelta = False + is_bool = False + is_object = False + is_categorical = False + is_extension = False + _can_hold_na = False + _can_consolidate = True + _verify_integrity = True + _validate_ndim = True + _ftype = "dense" + _concatenator = staticmethod(np.concatenate) + + def __init__(self, values, placement, ndim=None): + self.ndim = self._check_ndim(values, ndim) + self.mgr_locs = placement + self.values = values + + if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values): + raise ValueError( + f"Wrong number of items passed {len(self.values)}, " + f"placement implies {len(self.mgr_locs)}" + ) + + def _check_ndim(self, values, ndim): + """ + ndim inference and validation. + + Infers ndim from 'values' if not provided to __init__. + Validates that values.ndim and ndim are consistent if and only if + the class variable '_validate_ndim' is True. + + Parameters + ---------- + values : array-like + ndim : int or None + + Returns + ------- + ndim : int + + Raises + ------ + ValueError : the number of dimensions do not match + """ + if ndim is None: + ndim = values.ndim + + if self._validate_ndim and values.ndim != ndim: + raise ValueError( + "Wrong number of dimensions. " + f"values.ndim != ndim [{values.ndim} != {ndim}]" + ) + return ndim + + @property + def _holder(self): + """The array-like that can hold the underlying values. + + None for 'Block', overridden by subclasses that don't + use an ndarray. + """ + return None + + @property + def _consolidate_key(self): + return (self._can_consolidate, self.dtype.name) + + @property + def _is_single_block(self): + return self.ndim == 1 + + @property + def is_view(self): + """ return a boolean if I am possibly a view """ + return self.values.base is not None + + @property + def is_datelike(self): + """ return True if I am a non-datelike """ + return self.is_datetime or self.is_timedelta + + def is_categorical_astype(self, dtype): + """ + validate that we have a astypeable to categorical, + returns a boolean if we are a categorical + """ + if dtype is Categorical or dtype is CategoricalDtype: + # this is a pd.Categorical, but is not + # a valid type for astypeing + raise TypeError(f"invalid type {dtype} for astype") + + elif is_categorical_dtype(dtype): + return True + + return False + + def external_values(self, dtype=None): + """ + The array that Series.values returns (public attribute). + + This has some historical constraints, and is overridden in block + subclasses to return the correct array (e.g. period returns + object ndarray and datetimetz a datetime64[ns] ndarray instead of + proper extension array). + """ + return self.values + + def internal_values(self, dtype=None): + """ return an internal format, currently just the ndarray + this should be the pure internal API format + """ + return self.values + + def array_values(self) -> ExtensionArray: + """ + The array that Series.array returns. Always an ExtensionArray. + """ + return PandasArray(self.values) + + def get_values(self, dtype=None): + """ + return an internal format, currently just the ndarray + this is often overridden to handle to_dense like operations + """ + if is_object_dtype(dtype): + return self.values.astype(object) + return self.values + + def get_block_values(self, dtype=None): + """ + This is used in the JSON C code + """ + return self.get_values(dtype=dtype) + + def to_dense(self): + return self.values.view() + + @property + def fill_value(self): + return np.nan + + @property + def mgr_locs(self): + return self._mgr_locs + + @mgr_locs.setter + def mgr_locs(self, new_mgr_locs): + if not isinstance(new_mgr_locs, libinternals.BlockPlacement): + new_mgr_locs = libinternals.BlockPlacement(new_mgr_locs) + + self._mgr_locs = new_mgr_locs + + @property + def array_dtype(self): + """ the dtype to return if I want to construct this block as an + array + """ + return self.dtype + + def make_block(self, values, placement=None) -> "Block": + """ + Create a new block, with type inference propagate any values that are + not specified + """ + if placement is None: + placement = self.mgr_locs + + return make_block(values, placement=placement, ndim=self.ndim) + + def make_block_same_class(self, values, placement=None, ndim=None): + """ Wrap given values in a block of same type as self. """ + if placement is None: + placement = self.mgr_locs + if ndim is None: + ndim = self.ndim + return make_block(values, placement=placement, ndim=ndim, klass=type(self)) + + def __repr__(self) -> str: + # don't want to print out all of the items here + name = type(self).__name__ + if self._is_single_block: + + result = f"{name}: {len(self)} dtype: {self.dtype}" + + else: + + shape = " x ".join(pprint_thing(s) for s in self.shape) + result = ( + f"{name}: {pprint_thing(self.mgr_locs.indexer)}, " + f"{shape}, dtype: {self.dtype}" + ) + + return result + + def __len__(self) -> int: + return len(self.values) + + def __getstate__(self): + return self.mgr_locs.indexer, self.values + + def __setstate__(self, state): + self.mgr_locs = libinternals.BlockPlacement(state[0]) + self.values = state[1] + self.ndim = self.values.ndim + + def _slice(self, slicer): + """ return a slice of my values """ + return self.values[slicer] + + def getitem_block(self, slicer, new_mgr_locs=None): + """ + Perform __getitem__-like, return result as block. + + As of now, only supports slices that preserve dimensionality. + """ + if new_mgr_locs is None: + if isinstance(slicer, tuple): + axis0_slicer = slicer[0] + else: + axis0_slicer = slicer + new_mgr_locs = self.mgr_locs[axis0_slicer] + + new_values = self._slice(slicer) + + if self._validate_ndim and new_values.ndim != self.ndim: + raise ValueError("Only same dim slicing is allowed") + + return self.make_block_same_class(new_values, new_mgr_locs) + + @property + def shape(self): + return self.values.shape + + @property + def dtype(self): + return self.values.dtype + + @property + def ftype(self): + if getattr(self.values, "_pandas_ftype", False): + dtype = self.dtype.subtype + else: + dtype = self.dtype + return f"{dtype}:{self._ftype}" + + def merge(self, other): + return _merge_blocks([self, other]) + + def concat_same_type(self, to_concat, placement=None): + """ + Concatenate list of single blocks of the same type. + """ + values = self._concatenator( + [blk.values for blk in to_concat], axis=self.ndim - 1 + ) + return self.make_block_same_class( + values, placement=placement or slice(0, len(values), 1) + ) + + def iget(self, i): + return self.values[i] + + def set(self, locs, values): + """ + Modify Block in-place with new item value + + Returns + ------- + None + """ + self.values[locs] = values + + def delete(self, loc): + """ + Delete given loc(-s) from block in-place. + """ + self.values = np.delete(self.values, loc, 0) + self.mgr_locs = self.mgr_locs.delete(loc) + + def apply(self, func, **kwargs): + """ apply the function to my values; return a block if we are not + one + """ + with np.errstate(all="ignore"): + result = func(self.values, **kwargs) + + if is_extension_array_dtype(result) and result.ndim > 1: + # if we get a 2D ExtensionArray, we need to split it into 1D pieces + nbs = [] + for i, loc in enumerate(self.mgr_locs): + vals = result[i] + nv = _block_shape(vals, ndim=self.ndim) + block = self.make_block(values=nv, placement=[loc]) + nbs.append(block) + return nbs + + if not isinstance(result, Block): + result = self.make_block(values=_block_shape(result, ndim=self.ndim)) + + return result + + def fillna(self, value, limit=None, inplace=False, downcast=None): + """ fillna on the block with the value. If we fail, then convert to + ObjectBlock and try again + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + mask = isna(self.values) + if limit is not None: + limit = libalgos._validate_limit(None, limit=limit) + mask[mask.cumsum(self.ndim - 1) > limit] = False + + if not self._can_hold_na: + if inplace: + return self + else: + return self.copy() + + if self._can_hold_element(value): + # equivalent: _try_coerce_args(value) would not raise + blocks = self.putmask(mask, value, inplace=inplace) + return self._maybe_downcast(blocks, downcast) + + # we can't process the value, but nothing to do + if not mask.any(): + return self if inplace else self.copy() + + # operate column-by-column + def f(mask, val, idx): + block = self.coerce_to_target_dtype(value) + + # slice out our block + if idx is not None: + # i.e. self.ndim == 2 + block = block.getitem_block(slice(idx, idx + 1)) + return block.fillna(value, limit=limit, inplace=inplace, downcast=None) + + return self.split_and_operate(None, f, inplace) + + def split_and_operate(self, mask, f, inplace: bool): + """ + split the block per-column, and apply the callable f + per-column, return a new block for each. Handle + masking which will not change a block unless needed. + + Parameters + ---------- + mask : 2-d boolean mask + f : callable accepting (1d-mask, 1d values, indexer) + inplace : boolean + + Returns + ------- + list of blocks + """ + + if mask is None: + mask = np.broadcast_to(True, shape=self.shape) + + new_values = self.values + + def make_a_block(nv, ref_loc): + if isinstance(nv, list): + assert len(nv) == 1, nv + assert isinstance(nv[0], Block) + block = nv[0] + else: + # Put back the dimension that was taken from it and make + # a block out of the result. + nv = _block_shape(nv, ndim=self.ndim) + block = self.make_block(values=nv, placement=ref_loc) + return block + + # ndim == 1 + if self.ndim == 1: + if mask.any(): + nv = f(mask, new_values, None) + else: + nv = new_values if inplace else new_values.copy() + block = make_a_block(nv, self.mgr_locs) + return [block] + + # ndim > 1 + new_blocks = [] + for i, ref_loc in enumerate(self.mgr_locs): + m = mask[i] + v = new_values[i] + + # need a new block + if m.any(): + nv = f(m, v, i) + else: + nv = v if inplace else v.copy() + + block = make_a_block(nv, [ref_loc]) + new_blocks.append(block) + + return new_blocks + + def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]: + + # no need to downcast our float + # unless indicated + if downcast is None and ( + self.is_float or self.is_timedelta or self.is_datetime + ): + return blocks + + return _extend_blocks([b.downcast(downcast) for b in blocks]) + + def downcast(self, dtypes=None): + """ try to downcast each item to the dict of dtypes if present """ + + # turn it off completely + if dtypes is False: + return self + + values = self.values + + # single block handling + if self._is_single_block: + + # try to cast all non-floats here + if dtypes is None: + dtypes = "infer" + + nv = maybe_downcast_to_dtype(values, dtypes) + return self.make_block(nv) + + # ndim > 1 + if dtypes is None: + return self + + if not (dtypes == "infer" or isinstance(dtypes, dict)): + raise ValueError( + "downcast must have a dictionary or 'infer' as its argument" + ) + elif dtypes != "infer": + raise AssertionError("dtypes as dict is not supported yet") + + # operate column-by-column + # this is expensive as it splits the blocks items-by-item + def f(mask, val, idx): + val = maybe_downcast_to_dtype(val, dtype="infer") + return val + + return self.split_and_operate(None, f, False) + + def astype(self, dtype, copy: bool = False, errors: str = "raise"): + """ + Coerce to the new dtype. + + Parameters + ---------- + dtype : str, dtype convertible + copy : bool, default False + copy if indicated + errors : str, {'raise', 'ignore'}, default 'ignore' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + + Returns + ------- + Block + """ + errors_legal_values = ("raise", "ignore") + + if errors not in errors_legal_values: + invalid_arg = ( + "Expected value of kwarg 'errors' to be one of " + f"{list(errors_legal_values)}. Supplied value is '{errors}'" + ) + raise ValueError(invalid_arg) + + if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): + msg = ( + f"Expected an instance of {dtype.__name__}, " + "but got the class instead. Try instantiating 'dtype'." + ) + raise TypeError(msg) + + # may need to convert to categorical + if self.is_categorical_astype(dtype): + + if is_categorical_dtype(self.values): + # GH 10696/18593: update an existing categorical efficiently + return self.make_block(self.values.astype(dtype, copy=copy)) + + return self.make_block(Categorical(self.values, dtype=dtype)) + + dtype = pandas_dtype(dtype) + + # astype processing + if is_dtype_equal(self.dtype, dtype): + if copy: + return self.copy() + return self + + # force the copy here + if self.is_extension: + # TODO: Should we try/except this astype? + values = self.values.astype(dtype) + else: + if issubclass(dtype.type, str): + + # use native type formatting for datetime/tz/timedelta + if self.is_datelike: + values = self.to_native_types() + + # astype formatting + else: + values = self.get_values() + + else: + values = self.get_values(dtype=dtype) + + # _astype_nansafe works fine with 1-d only + vals1d = values.ravel() + try: + values = astype_nansafe(vals1d, dtype, copy=True) + except (ValueError, TypeError): + # e.g. astype_nansafe can fail on object-dtype of strings + # trying to convert to float + if errors == "raise": + raise + newb = self.copy() if copy else self + return newb + + # TODO(extension) + # should we make this attribute? + if isinstance(values, np.ndarray): + values = values.reshape(self.shape) + + newb = make_block(values, placement=self.mgr_locs, ndim=self.ndim) + + if newb.is_numeric and self.is_numeric: + if newb.shape != self.shape: + raise TypeError( + f"cannot set astype for copy = [{copy}] for dtype " + f"({self.dtype.name} [{self.shape}]) to different shape " + f"({newb.dtype.name} [{newb.shape}])" + ) + return newb + + def convert( + self, + copy: bool = True, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + coerce: bool = False, + ): + """ attempt to coerce any object types to better types return a copy + of the block (if copy = True) by definition we are not an ObjectBlock + here! + """ + + return self.copy() if copy else self + + def _can_hold_element(self, element: Any) -> bool: + """ require the same dtype as ourselves """ + dtype = self.values.dtype.type + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return issubclass(tipo.type, dtype) + return isinstance(element, dtype) + + def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): + """ convert to our native types format, slicing if desired """ + values = self.get_values() + + if slicer is not None: + values = values[:, slicer] + mask = isna(values) + itemsize = writers.word_len(na_rep) + + if not self.is_object and not quoting and itemsize: + values = values.astype(str) + if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize: + # enlarge for the na_rep + values = values.astype(f"= 1: + if self.ndim - 1 == new.ndim and axis == 1: + new = np.repeat(new, new_values.shape[-1]).reshape(self.shape) + new = new.astype(new_values.dtype) + + # we require exact matches between the len of the + # values we are setting (or is compat). np.putmask + # doesn't check this and will simply truncate / pad + # the output, but we want sane error messages + # + # TODO: this prob needs some better checking + # for 2D cases + if ( + is_list_like(new) + and np.any(mask[mask]) + and getattr(new, "ndim", 1) == 1 + ): + if mask[mask].shape[-1] == len(new): + # GH 30567 + # If length of ``new`` is less than the length of ``new_values``, + # `np.putmask` would first repeat the ``new`` array and then + # assign the masked values hence produces incorrect result. + # `np.place` on the other hand uses the ``new`` values at it is + # to place in the masked locations of ``new_values`` + np.place(new_values, mask, new) + elif mask.shape[-1] == len(new) or len(new) == 1: + np.putmask(new_values, mask, new) + else: + raise ValueError("cannot assign mismatch length to masked array") + else: + np.putmask(new_values, mask, new) + + # maybe upcast me + elif mask.any(): + if transpose: + mask = mask.T + if isinstance(new, np.ndarray): + new = new.T + axis = new_values.ndim - axis - 1 + + # Pseudo-broadcast + if getattr(new, "ndim", 0) >= 1: + if self.ndim - 1 == new.ndim: + new_shape = list(new.shape) + new_shape.insert(axis, 1) + new = new.reshape(tuple(new_shape)) + + # operate column-by-column + def f(mask, val, idx): + + if idx is None: + # ndim==1 case. + n = new + else: + + if isinstance(new, np.ndarray): + n = np.squeeze(new[idx % new.shape[0]]) + else: + n = np.array(new) + + # type of the new block + dtype, _ = maybe_promote(n.dtype) + + # we need to explicitly astype here to make a copy + n = n.astype(dtype) + + nv = _putmask_smart(val, mask, n) + return nv + + new_blocks = self.split_and_operate(mask, f, inplace) + return new_blocks + + if inplace: + return [self] + + if transpose: + new_values = new_values.T + + return [self.make_block(new_values)] + + def coerce_to_target_dtype(self, other): + """ + coerce the current block to a dtype compat for other + we will return a block, possibly object, and not raise + + we can also safely try to coerce to the same dtype + and will receive the same block + """ + + # if we cannot then coerce to object + dtype, _ = infer_dtype_from(other, pandas_dtype=True) + + if is_dtype_equal(self.dtype, dtype): + return self + + if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype): + # we don't upcast to bool + return self.astype(object) + + elif (self.is_float or self.is_complex) and ( + is_integer_dtype(dtype) or is_float_dtype(dtype) + ): + # don't coerce float/complex to int + return self + + elif ( + self.is_datetime + or is_datetime64_dtype(dtype) + or is_datetime64tz_dtype(dtype) + ): + + # not a datetime + if not ( + (is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype)) + and self.is_datetime + ): + return self.astype(object) + + # don't upcast timezone with different timezone or no timezone + mytz = getattr(self.dtype, "tz", None) + othertz = getattr(dtype, "tz", None) + + if not tz_compare(mytz, othertz): + return self.astype(object) + + raise AssertionError( + f"possible recursion in coerce_to_target_dtype: {self} {other}" + ) + + elif self.is_timedelta or is_timedelta64_dtype(dtype): + + # not a timedelta + if not (is_timedelta64_dtype(dtype) and self.is_timedelta): + return self.astype(object) + + raise AssertionError( + f"possible recursion in coerce_to_target_dtype: {self} {other}" + ) + + try: + return self.astype(dtype) + except (ValueError, TypeError, OverflowError): + return self.astype(object) + + def interpolate( + self, + method="pad", + axis=0, + index=None, + values=None, + inplace=False, + limit=None, + limit_direction="forward", + limit_area=None, + fill_value=None, + coerce=False, + downcast=None, + **kwargs, + ): + + inplace = validate_bool_kwarg(inplace, "inplace") + + def check_int_bool(self, inplace): + # Only FloatBlocks will contain NaNs. + # timedelta subclasses IntBlock + if (self.is_bool or self.is_integer) and not self.is_timedelta: + if inplace: + return self + else: + return self.copy() + + # a fill na type method + try: + m = missing.clean_fill_method(method) + except ValueError: + m = None + + if m is not None: + r = check_int_bool(self, inplace) + if r is not None: + return r + return self._interpolate_with_fill( + method=m, + axis=axis, + inplace=inplace, + limit=limit, + fill_value=fill_value, + coerce=coerce, + downcast=downcast, + ) + # validate the interp method + m = missing.clean_interp_method(method, **kwargs) + + r = check_int_bool(self, inplace) + if r is not None: + return r + return self._interpolate( + method=m, + index=index, + values=values, + axis=axis, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, + inplace=inplace, + downcast=downcast, + **kwargs, + ) + + def _interpolate_with_fill( + self, + method="pad", + axis=0, + inplace=False, + limit=None, + fill_value=None, + coerce=False, + downcast=None, + ): + """ fillna but using the interpolate machinery """ + + inplace = validate_bool_kwarg(inplace, "inplace") + + # if we are coercing, then don't force the conversion + # if the block can't hold the type + if coerce: + if not self._can_hold_na: + if inplace: + return [self] + else: + return [self.copy()] + + values = self.values if inplace else self.values.copy() + + # We only get here for non-ExtensionBlock + fill_value = convert_scalar(self.values, fill_value) + + values = missing.interpolate_2d( + values, + method=method, + axis=axis, + limit=limit, + fill_value=fill_value, + dtype=self.dtype, + ) + + blocks = [self.make_block_same_class(values, ndim=self.ndim)] + return self._maybe_downcast(blocks, downcast) + + def _interpolate( + self, + method=None, + index=None, + values=None, + fill_value=None, + axis=0, + limit=None, + limit_direction="forward", + limit_area=None, + inplace=False, + downcast=None, + **kwargs, + ): + """ interpolate using scipy wrappers """ + + inplace = validate_bool_kwarg(inplace, "inplace") + data = self.values if inplace else self.values.copy() + + # only deal with floats + if not self.is_float: + if not self.is_integer: + return self + data = data.astype(np.float64) + + if fill_value is None: + fill_value = self.fill_value + + if method in ("krogh", "piecewise_polynomial", "pchip"): + if not index.is_monotonic: + raise ValueError( + f"{method} interpolation requires that the index be monotonic." + ) + # process 1-d slices in the axis direction + + def func(x): + + # process a 1-d slice, returning it + # should the axis argument be handled below in apply_along_axis? + # i.e. not an arg to missing.interpolate_1d + return missing.interpolate_1d( + index, + x, + method=method, + limit=limit, + limit_direction=limit_direction, + limit_area=limit_area, + fill_value=fill_value, + bounds_error=False, + **kwargs, + ) + + # interp each column independently + interp_values = np.apply_along_axis(func, axis, data) + + blocks = [self.make_block_same_class(interp_values)] + return self._maybe_downcast(blocks, downcast) + + def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): + """ + Take values according to indexer and return them as a block.bb + + """ + + # algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock + # so need to preserve types + # sparse is treated like an ndarray, but needs .get_values() shaping + + values = self.values + + if fill_tuple is None: + fill_value = self.fill_value + allow_fill = False + else: + fill_value = fill_tuple[0] + allow_fill = True + + new_values = algos.take_nd( + values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value + ) + + # Called from three places in managers, all of which satisfy + # this assertion + assert not (axis == 0 and new_mgr_locs is None) + if new_mgr_locs is None: + new_mgr_locs = self.mgr_locs + + if not is_dtype_equal(new_values.dtype, self.dtype): + return self.make_block(new_values, new_mgr_locs) + else: + return self.make_block_same_class(new_values, new_mgr_locs) + + def diff(self, n: int, axis: int = 1) -> List["Block"]: + """ return block for the diff of the values """ + new_values = algos.diff(self.values, n, axis=axis, stacklevel=7) + # We use block_shape for ExtensionBlock subclasses, which may call here + # via a super. + new_values = _block_shape(new_values, ndim=self.ndim) + return [self.make_block(values=new_values)] + + def shift(self, periods, axis=0, fill_value=None): + """ shift the block by periods, possibly upcast """ + + # convert integer to float if necessary. need to do a lot more than + # that, handle boolean etc also + new_values, fill_value = maybe_upcast(self.values, fill_value) + + # make sure array sent to np.roll is c_contiguous + f_ordered = new_values.flags.f_contiguous + if f_ordered: + new_values = new_values.T + axis = new_values.ndim - axis - 1 + + if np.prod(new_values.shape): + new_values = np.roll(new_values, ensure_platform_int(periods), axis=axis) + + axis_indexer = [slice(None)] * self.ndim + if periods > 0: + axis_indexer[axis] = slice(None, periods) + else: + axis_indexer[axis] = slice(periods, None) + new_values[tuple(axis_indexer)] = fill_value + + # restore original order + if f_ordered: + new_values = new_values.T + + return [self.make_block(new_values)] + + def where( + self, + other, + cond, + align=True, + errors="raise", + try_cast: bool = False, + axis: int = 0, + ) -> List["Block"]: + """ + evaluate the block; return result block(s) from the result + + Parameters + ---------- + other : a ndarray/object + cond : the condition to respect + align : boolean, perform alignment on other/cond + errors : str, {'raise', 'ignore'}, default 'raise' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + axis : int + + Returns + ------- + a new block(s), the result of the func + """ + import pandas.core.computation.expressions as expressions + + assert errors in ["raise", "ignore"] + transpose = self.ndim == 2 + + values = self.values + orig_other = other + if transpose: + values = values.T + + other = getattr(other, "_values", getattr(other, "values", other)) + cond = getattr(cond, "values", cond) + + # If the default broadcasting would go in the wrong direction, then + # explicitly reshape other instead + if getattr(other, "ndim", 0) >= 1: + if values.ndim - 1 == other.ndim and axis == 1: + other = other.reshape(tuple(other.shape + (1,))) + elif transpose and values.ndim == self.ndim - 1: + cond = cond.T + + if not hasattr(cond, "shape"): + raise ValueError("where must have a condition that is ndarray like") + + # our where function + def func(cond, values, other): + + if not ( + (self.is_integer or self.is_bool) + and lib.is_float(other) + and np.isnan(other) + ): + # np.where will cast integer array to floats in this case + if not self._can_hold_element(other): + raise TypeError + if lib.is_scalar(other) and isinstance(values, np.ndarray): + other = convert_scalar(values, other) + + fastres = expressions.where(cond, values, other) + return fastres + + if cond.ravel().all(): + result = values + else: + # see if we can operate on the entire block, or need item-by-item + # or if we are a single block (ndim == 1) + try: + result = func(cond, values, other) + except TypeError: + + # we cannot coerce, return a compat dtype + # we are explicitly ignoring errors + block = self.coerce_to_target_dtype(other) + blocks = block.where( + orig_other, + cond, + align=align, + errors=errors, + try_cast=try_cast, + axis=axis, + ) + return self._maybe_downcast(blocks, "infer") + + if self._can_hold_na or self.ndim == 1: + + if transpose: + result = result.T + + return [self.make_block(result)] + + # might need to separate out blocks + axis = cond.ndim - 1 + cond = cond.swapaxes(axis, 0) + mask = np.array([cond[i].all() for i in range(cond.shape[0])], dtype=bool) + + result_blocks = [] + for m in [mask, ~mask]: + if m.any(): + taken = result.take(m.nonzero()[0], axis=axis) + r = maybe_downcast_numeric(taken, self.dtype) + nb = self.make_block(r.T, placement=self.mgr_locs[m]) + result_blocks.append(nb) + + return result_blocks + + def equals(self, other) -> bool: + if self.dtype != other.dtype or self.shape != other.shape: + return False + return array_equivalent(self.values, other.values) + + def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): + """Return a list of unstacked blocks of self + + Parameters + ---------- + unstacker_func : callable + Partially applied unstacker. + new_columns : Index + All columns of the unstacked BlockManager. + n_rows : int + Only used in ExtensionBlock._unstack + fill_value : int + Only used in ExtensionBlock._unstack + + Returns + ------- + blocks : list of Block + New blocks of unstacked values. + mask : array_like of bool + The mask of columns of `blocks` we should keep. + """ + unstacker = unstacker_func(self.values.T) + new_items = unstacker.get_new_columns() + new_placement = new_columns.get_indexer(new_items) + new_values, mask = unstacker.get_new_values() + + mask = mask.any(0) + new_values = new_values.T[mask] + new_placement = new_placement[mask] + + blocks = [make_block(new_values, placement=new_placement)] + return blocks, mask + + def quantile(self, qs, interpolation="linear", axis=0): + """ + compute the quantiles of the + + Parameters + ---------- + qs: a scalar or list of the quantiles to be computed + interpolation: type of interpolation, default 'linear' + axis: axis to compute, default 0 + + Returns + ------- + Block + """ + # We should always have ndim == 2 because Series dispatches to DataFrame + assert self.ndim == 2 + + values = self.get_values() + + is_empty = values.shape[axis] == 0 + orig_scalar = not is_list_like(qs) + if orig_scalar: + # make list-like, unpack later + qs = [qs] + + if is_empty: + # create the array of na_values + # 2d len(values) * len(qs) + result = np.repeat( + np.array([self.fill_value] * len(qs)), len(values) + ).reshape(len(values), len(qs)) + else: + # asarray needed for Sparse, see GH#24600 + mask = np.asarray(isna(values)) + result = nanpercentile( + values, + np.array(qs) * 100, + axis=axis, + na_value=self.fill_value, + mask=mask, + ndim=values.ndim, + interpolation=interpolation, + ) + + result = np.array(result, copy=False) + result = result.T + + if orig_scalar and not lib.is_scalar(result): + # result could be scalar in case with is_empty and self.ndim == 1 + assert result.shape[-1] == 1, result.shape + result = result[..., 0] + result = lib.item_from_zerodim(result) + + ndim = np.ndim(result) + return make_block(result, placement=np.arange(len(result)), ndim=ndim) + + def _replace_coerce( + self, to_replace, value, inplace=True, regex=False, convert=False, mask=None + ): + """ + Replace value corresponding to the given boolean array with another + value. + + Parameters + ---------- + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + inplace : bool, default False + Perform inplace modification. + regex : bool, default False + If true, perform regular expression substitution. + convert : bool, default True + If true, try to coerce any object types to better types. + mask : array-like of bool, optional + True indicate corresponding element is ignored. + + Returns + ------- + A new block if there is anything to replace or the original block. + """ + + if mask.any(): + if not regex: + self = self.coerce_to_target_dtype(value) + return self.putmask(mask, value, inplace=inplace) + else: + return self._replace_single( + to_replace, + value, + inplace=inplace, + regex=regex, + convert=convert, + mask=mask, + ) + return self + + +class NonConsolidatableMixIn: + """ hold methods for the nonconsolidatable blocks """ + + _can_consolidate = False + _verify_integrity = False + _validate_ndim = False + + def __init__(self, values, placement, ndim=None): + """Initialize a non-consolidatable block. + + 'ndim' may be inferred from 'placement'. + + This will call continue to call __init__ for the other base + classes mixed in with this Mixin. + """ + # Placement must be converted to BlockPlacement so that we can check + # its length + if not isinstance(placement, libinternals.BlockPlacement): + placement = libinternals.BlockPlacement(placement) + + # Maybe infer ndim from placement + if ndim is None: + if len(placement) != 1: + ndim = 1 + else: + ndim = 2 + super().__init__(values, placement, ndim=ndim) + + @property + def shape(self): + if self.ndim == 1: + return ((len(self.values)),) + return (len(self.mgr_locs), len(self.values)) + + def iget(self, col): + + if self.ndim == 2 and isinstance(col, tuple): + col, loc = col + if not com.is_null_slice(col) and col != 0: + raise IndexError(f"{self} only contains one item") + elif isinstance(col, slice): + if col != slice(None): + raise NotImplementedError(col) + return self.values[[loc]] + return self.values[loc] + else: + if col != 0: + raise IndexError(f"{self} only contains one item") + return self.values + + def should_store(self, value): + return isinstance(value, self._holder) + + def set(self, locs, values, check=False): + assert locs.tolist() == [0] + self.values = values + + def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False): + """ + putmask the data to the block; we must be a single block and not + generate other blocks + + return the resulting block + + Parameters + ---------- + mask : the condition to respect + new : a ndarray/object + align : boolean, perform alignment on other/cond, default is True + inplace : perform inplace modification, default is False + + Returns + ------- + a new block, the result of the putmask + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + # use block's copy logic. + # .values may be an Index which does shallow copy by default + new_values = self.values if inplace else self.copy().values + + if isinstance(new, np.ndarray) and len(new) == len(mask): + new = new[mask] + + mask = _safe_reshape(mask, new_values.shape) + + new_values[mask] = new + return [self.make_block(values=new_values)] + + def _get_unstack_items(self, unstacker, new_columns): + """ + Get the placement, values, and mask for a Block unstack. + + This is shared between ObjectBlock and ExtensionBlock. They + differ in that ObjectBlock passes the values, while ExtensionBlock + passes the dummy ndarray of positions to be used by a take + later. + + Parameters + ---------- + unstacker : pandas.core.reshape.reshape._Unstacker + new_columns : Index + All columns of the unstacked BlockManager. + + Returns + ------- + new_placement : ndarray[int] + The placement of the new columns in `new_columns`. + new_values : Union[ndarray, ExtensionArray] + The first return value from _Unstacker.get_new_values. + mask : ndarray[bool] + The second return value from _Unstacker.get_new_values. + """ + # shared with ExtensionBlock + new_items = unstacker.get_new_columns() + new_placement = new_columns.get_indexer(new_items) + new_values, mask = unstacker.get_new_values() + + mask = mask.any(0) + return new_placement, new_values, mask + + +class ExtensionBlock(NonConsolidatableMixIn, Block): + """Block for holding extension types. + + Notes + ----- + This holds all 3rd-party extension array types. It's also the immediate + parent class for our internal extension types' blocks, CategoricalBlock. + + ExtensionArrays are limited to 1-D. + """ + + is_extension = True + + def __init__(self, values, placement, ndim=None): + values = self._maybe_coerce_values(values) + super().__init__(values, placement, ndim) + + def _maybe_coerce_values(self, values): + """ + Unbox to an extension array. + + This will unbox an ExtensionArray stored in an Index or Series. + ExtensionArrays pass through. No dtype coercion is done. + + Parameters + ---------- + values : Index, Series, ExtensionArray + + Returns + ------- + ExtensionArray + """ + return extract_array(values) + + @property + def _holder(self): + # For extension blocks, the holder is values-dependent. + return type(self.values) + + @property + def fill_value(self): + # Used in reindex_indexer + return self.values.dtype.na_value + + @property + def _can_hold_na(self): + # The default ExtensionArray._can_hold_na is True + return self._holder._can_hold_na + + @property + def is_view(self): + """Extension arrays are never treated as views.""" + return False + + @property + def is_numeric(self): + return self.values.dtype._is_numeric + + def setitem(self, indexer, value): + """Set the value inplace, returning a same-typed block. + + This differs from Block.setitem by not allowing setitem to change + the dtype of the Block. + + Parameters + ---------- + indexer : tuple, list-like, array-like, slice + The subset of self.values to set + value : object + The value being set + + Returns + ------- + Block + + Notes + ----- + `indexer` is a direct slice/positional indexer. `value` must + be a compatible shape. + """ + if isinstance(indexer, tuple): + # we are always 1-D + indexer = indexer[0] + + check_setitem_lengths(indexer, value, self.values) + self.values[indexer] = value + return self + + def get_values(self, dtype=None): + # ExtensionArrays must be iterable, so this works. + values = np.asarray(self.values) + if values.ndim == self.ndim - 1: + values = values.reshape((1,) + values.shape) + return values + + def array_values(self) -> ExtensionArray: + return self.values + + def to_dense(self): + return np.asarray(self.values) + + def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): + """override to use ExtensionArray astype for the conversion""" + values = self.values + if slicer is not None: + values = values[slicer] + mask = isna(values) + + values = np.asarray(values.astype(object)) + values[mask] = na_rep + + # we are expected to return a 2-d ndarray + return values.reshape(1, len(values)) + + def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): + """ + Take values according to indexer and return them as a block. + """ + if fill_tuple is None: + fill_value = None + else: + fill_value = fill_tuple[0] + + # axis doesn't matter; we are really a single-dim object + # but are passed the axis depending on the calling routing + # if its REALLY axis 0, then this will be a reindex and not a take + new_values = self.values.take(indexer, fill_value=fill_value, allow_fill=True) + + # Called from three places in managers, all of which satisfy + # this assertion + assert not (self.ndim == 1 and new_mgr_locs is None) + if new_mgr_locs is None: + new_mgr_locs = self.mgr_locs + + return self.make_block_same_class(new_values, new_mgr_locs) + + def _can_hold_element(self, element: Any) -> bool: + # XXX: We may need to think about pushing this onto the array. + # We're doing the same as CategoricalBlock here. + return True + + def _slice(self, slicer): + """ return a slice of my values """ + + # slice the category + # return same dims as we currently have + + if isinstance(slicer, tuple) and len(slicer) == 2: + if not com.is_null_slice(slicer[0]): + raise AssertionError("invalid slicing for a 1-ndim categorical") + slicer = slicer[1] + + return self.values[slicer] + + def concat_same_type(self, to_concat, placement=None): + """ + Concatenate list of single blocks of the same type. + """ + values = self._holder._concat_same_type([blk.values for blk in to_concat]) + placement = placement or slice(0, len(values), 1) + return self.make_block_same_class(values, ndim=self.ndim, placement=placement) + + def fillna(self, value, limit=None, inplace=False, downcast=None): + values = self.values if inplace else self.values.copy() + values = values.fillna(value=value, limit=limit) + return [ + self.make_block_same_class( + values=values, placement=self.mgr_locs, ndim=self.ndim + ) + ] + + def interpolate( + self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs + ): + + values = self.values if inplace else self.values.copy() + return self.make_block_same_class( + values=values.fillna(value=fill_value, method=method, limit=limit), + placement=self.mgr_locs, + ) + + def diff(self, n: int, axis: int = 1) -> List["Block"]: + if axis == 1: + # we are by definition 1D. + axis = 0 + return super().diff(n, axis) + + def shift( + self, periods: int, axis: int = 0, fill_value: Any = None, + ) -> List["ExtensionBlock"]: + """ + Shift the block by `periods`. + + Dispatches to underlying ExtensionArray and re-boxes in an + ExtensionBlock. + """ + return [ + self.make_block_same_class( + self.values.shift(periods=periods, fill_value=fill_value), + placement=self.mgr_locs, + ndim=self.ndim, + ) + ] + + def where( + self, + other, + cond, + align=True, + errors="raise", + try_cast: bool = False, + axis: int = 0, + ) -> List["Block"]: + if isinstance(other, ABCDataFrame): + # ExtensionArrays are 1-D, so if we get here then + # `other` should be a DataFrame with a single column. + assert other.shape[1] == 1 + other = other.iloc[:, 0] + + other = extract_array(other, extract_numpy=True) + + if isinstance(cond, ABCDataFrame): + assert cond.shape[1] == 1 + cond = cond.iloc[:, 0] + + cond = extract_array(cond, extract_numpy=True) + + if lib.is_scalar(other) and isna(other): + # The default `other` for Series / Frame is np.nan + # we want to replace that with the correct NA value + # for the type + other = self.dtype.na_value + + if is_sparse(self.values): + # TODO(SparseArray.__setitem__): remove this if condition + # We need to re-infer the type of the data after doing the + # where, for cases where the subtypes don't match + dtype = None + else: + dtype = self.dtype + + result = self.values.copy() + icond = ~cond + if lib.is_scalar(other): + set_other = other + else: + set_other = other[icond] + try: + result[icond] = set_other + except (NotImplementedError, TypeError): + # NotImplementedError for class not implementing `__setitem__` + # TypeError for SparseArray, which implements just to raise + # a TypeError + result = self._holder._from_sequence( + np.where(cond, self.values, other), dtype=dtype + ) + + return [self.make_block_same_class(result, placement=self.mgr_locs)] + + @property + def _ftype(self): + return getattr(self.values, "_pandas_ftype", Block._ftype) + + def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): + # ExtensionArray-safe unstack. + # We override ObjectBlock._unstack, which unstacks directly on the + # values of the array. For EA-backed blocks, this would require + # converting to a 2-D ndarray of objects. + # Instead, we unstack an ndarray of integer positions, followed by + # a `take` on the actual values. + dummy_arr = np.arange(n_rows) + dummy_unstacker = functools.partial(unstacker_func, fill_value=-1) + unstacker = dummy_unstacker(dummy_arr) + + new_placement, new_values, mask = self._get_unstack_items( + unstacker, new_columns + ) + + blocks = [ + self.make_block_same_class( + self.values.take(indices, allow_fill=True, fill_value=fill_value), + [place], + ) + for indices, place in zip(new_values.T, new_placement) + ] + return blocks, mask + + +class ObjectValuesExtensionBlock(ExtensionBlock): + """ + Block providing backwards-compatibility for `.values`. + + Used by PeriodArray and IntervalArray to ensure that + Series[T].values is an ndarray of objects. + """ + + def external_values(self, dtype=None): + return self.values.astype(object) + + +class NumericBlock(Block): + __slots__ = () + is_numeric = True + _can_hold_na = True + + +class FloatOrComplexBlock(NumericBlock): + __slots__ = () + + def equals(self, other) -> bool: + if self.dtype != other.dtype or self.shape != other.shape: + return False + left, right = self.values, other.values + return ((left == right) | (np.isnan(left) & np.isnan(right))).all() + + +class FloatBlock(FloatOrComplexBlock): + __slots__ = () + is_float = True + + def _can_hold_element(self, element: Any) -> bool: + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return issubclass(tipo.type, (np.floating, np.integer)) and not issubclass( + tipo.type, (np.datetime64, np.timedelta64) + ) + return isinstance( + element, (float, int, np.floating, np.int_) + ) and not isinstance( + element, + (bool, np.bool_, datetime, timedelta, np.datetime64, np.timedelta64), + ) + + def to_native_types( + self, + slicer=None, + na_rep="", + float_format=None, + decimal=".", + quoting=None, + **kwargs, + ): + """ convert to our native types format, slicing if desired """ + + values = self.values + if slicer is not None: + values = values[:, slicer] + + # see gh-13418: no special formatting is desired at the + # output (important for appropriate 'quoting' behaviour), + # so do not pass it through the FloatArrayFormatter + if float_format is None and decimal == ".": + mask = isna(values) + + if not quoting: + values = values.astype(str) + else: + values = np.array(values, dtype="object") + + values[mask] = na_rep + return values + + from pandas.io.formats.format import FloatArrayFormatter + + formatter = FloatArrayFormatter( + values, + na_rep=na_rep, + float_format=float_format, + decimal=decimal, + quoting=quoting, + fixed_width=False, + ) + return formatter.get_result_as_array() + + def should_store(self, value): + # when inserting a column should not coerce integers to floats + # unnecessarily + return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype + + +class ComplexBlock(FloatOrComplexBlock): + __slots__ = () + is_complex = True + + def _can_hold_element(self, element: Any) -> bool: + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return issubclass(tipo.type, (np.floating, np.integer, np.complexfloating)) + return isinstance( + element, (float, int, complex, np.float_, np.int_) + ) and not isinstance(element, (bool, np.bool_)) + + def should_store(self, value): + return issubclass(value.dtype.type, np.complexfloating) + + +class IntBlock(NumericBlock): + __slots__ = () + is_integer = True + _can_hold_na = False + + def _can_hold_element(self, element: Any) -> bool: + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return ( + issubclass(tipo.type, np.integer) + and not issubclass(tipo.type, (np.datetime64, np.timedelta64)) + and self.dtype.itemsize >= tipo.itemsize + ) + return is_integer(element) + + def should_store(self, value): + return is_integer_dtype(value) and value.dtype == self.dtype + + +class DatetimeLikeBlockMixin: + """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock.""" + + @property + def _holder(self): + return DatetimeArray + + @property + def fill_value(self): + return np.datetime64("NaT", "ns") + + def get_values(self, dtype=None): + """ + return object dtype as boxed values, such as Timestamps/Timedelta + """ + if is_object_dtype(dtype): + values = self.values.ravel() + result = self._holder(values).astype(object) + return result.reshape(self.values.shape) + return self.values + + def iget(self, key): + # GH#31649 we need to wrap scalars in Timestamp/Timedelta + # TODO(EA2D): this can be removed if we ever have 2D EA + result = super().iget(key) + if isinstance(result, np.datetime64): + result = Timestamp(result) + elif isinstance(result, np.timedelta64): + result = Timedelta(result) + return result + + def shift(self, periods, axis=0, fill_value=None): + # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs + values = self.array_values() + new_values = values.shift(periods, fill_value=fill_value, axis=axis) + return self.make_block_same_class(new_values) + + +class DatetimeBlock(DatetimeLikeBlockMixin, Block): + __slots__ = () + is_datetime = True + + def __init__(self, values, placement, ndim=None): + values = self._maybe_coerce_values(values) + super().__init__(values, placement=placement, ndim=ndim) + + @property + def _can_hold_na(self): + return True + + def _maybe_coerce_values(self, values): + """ + Input validation for values passed to __init__. Ensure that + we have datetime64ns, coercing if necessary. + + Parameters + ---------- + values : array-like + Must be convertible to datetime64 + + Returns + ------- + values : ndarray[datetime64ns] + + Overridden by DatetimeTZBlock. + """ + if values.dtype != _NS_DTYPE: + values = conversion.ensure_datetime64ns(values) + + if isinstance(values, DatetimeArray): + values = values._data + + assert isinstance(values, np.ndarray), type(values) + return values + + def astype(self, dtype, copy: bool = False, errors: str = "raise"): + """ + these automatically copy, so copy=True has no effect + raise on an except if raise == True + """ + dtype = pandas_dtype(dtype) + + # if we are passed a datetime64[ns, tz] + if is_datetime64tz_dtype(dtype): + values = self.values + if copy: + # this should be the only copy + values = values.copy() + if getattr(values, "tz", None) is None: + values = DatetimeArray(values).tz_localize("UTC") + values = values.tz_convert(dtype.tz) + return self.make_block(values) + + # delegate + return super().astype(dtype=dtype, copy=copy, errors=errors) + + def _can_hold_element(self, element: Any) -> bool: + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + if self.is_datetimetz: + # require exact match, since non-nano does not exist + return is_dtype_equal(tipo, self.dtype) or is_valid_nat_for_dtype( + element, self.dtype + ) + + # GH#27419 if we get a non-nano datetime64 object + return is_datetime64_dtype(tipo) + elif element is NaT: + return True + elif isinstance(element, datetime): + if self.is_datetimetz: + return tz_compare(element.tzinfo, self.dtype.tz) + return element.tzinfo is None + + return is_valid_nat_for_dtype(element, self.dtype) + + def to_native_types( + self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs + ): + """ convert to our native types format, slicing if desired """ + + values = self.values + i8values = self.values.view("i8") + + if slicer is not None: + values = values[..., slicer] + i8values = i8values[..., slicer] + + from pandas.io.formats.format import _get_format_datetime64_from_values + + fmt = _get_format_datetime64_from_values(values, date_format) + + result = tslib.format_array_from_datetime( + i8values.ravel(), + tz=getattr(self.values, "tz", None), + format=fmt, + na_rep=na_rep, + ).reshape(i8values.shape) + return np.atleast_2d(result) + + def should_store(self, value): + return ( + issubclass(value.dtype.type, np.datetime64) + and not is_datetime64tz_dtype(value) + and not is_extension_array_dtype(value) + ) + + def set(self, locs, values): + """ + Modify Block in-place with new item value + + Returns + ------- + None + """ + values = conversion.ensure_datetime64ns(values, copy=False) + + self.values[locs] = values + + def external_values(self): + return np.asarray(self.values.astype("datetime64[ns]", copy=False)) + + def array_values(self) -> ExtensionArray: + return DatetimeArray._simple_new(self.values) + + +class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): + """ implement a datetime64 block with a tz attribute """ + + __slots__ = () + is_datetimetz = True + is_extension = True + + _can_hold_element = DatetimeBlock._can_hold_element + to_native_types = DatetimeBlock.to_native_types + fill_value = np.datetime64("NaT", "ns") + + @property + def _holder(self): + return DatetimeArray + + def _maybe_coerce_values(self, values): + """Input validation for values passed to __init__. Ensure that + we have datetime64TZ, coercing if necessary. + + Parameters + ---------- + values : array-like + Must be convertible to datetime64 + + Returns + ------- + values : DatetimeArray + """ + if not isinstance(values, self._holder): + values = self._holder(values) + + if values.tz is None: + raise ValueError("cannot create a DatetimeTZBlock without a tz") + + return values + + @property + def is_view(self): + """ return a boolean if I am possibly a view """ + # check the ndarray values of the DatetimeIndex values + return self.values._data.base is not None + + def get_values(self, dtype=None): + """ + Returns an ndarray of values. + + Parameters + ---------- + dtype : np.dtype + Only `object`-like dtypes are respected here (not sure + why). + + Returns + ------- + values : ndarray + When ``dtype=object``, then and object-dtype ndarray of + boxed values is returned. Otherwise, an M8[ns] ndarray + is returned. + + DatetimeArray is always 1-d. ``get_values`` will reshape + the return value to be the same dimensionality as the + block. + """ + values = self.values + if is_object_dtype(dtype): + values = values.astype(object) + + values = np.asarray(values) + + if self.ndim == 2: + # Ensure that our shape is correct for DataFrame. + # ExtensionArrays are always 1-D, even in a DataFrame when + # the analogous NumPy-backed column would be a 2-D ndarray. + values = values.reshape(1, -1) + return values + + def to_dense(self): + # we request M8[ns] dtype here, even though it discards tzinfo, + # as lots of code (e.g. anything using values_from_object) + # expects that behavior. + return np.asarray(self.values, dtype=_NS_DTYPE) + + def _slice(self, slicer): + """ return a slice of my values """ + if isinstance(slicer, tuple): + col, loc = slicer + if not com.is_null_slice(col) and col != 0: + raise IndexError(f"{self} only contains one item") + return self.values[loc] + return self.values[slicer] + + def diff(self, n: int, axis: int = 0) -> List["Block"]: + """ + 1st discrete difference. + + Parameters + ---------- + n : int + Number of periods to diff. + axis : int, default 0 + Axis to diff upon. + + Returns + ------- + A list with a new TimeDeltaBlock. + + Notes + ----- + The arguments here are mimicking shift so they are called correctly + by apply. + """ + if axis == 0: + # Cannot currently calculate diff across multiple blocks since this + # function is invoked via apply + raise NotImplementedError + new_values = (self.values - self.shift(n, axis=axis)[0].values).asi8 + + # Reshape the new_values like how algos.diff does for timedelta data + new_values = new_values.reshape(1, len(new_values)) + new_values = new_values.astype("timedelta64[ns]") + return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)] + + def concat_same_type(self, to_concat, placement=None): + # need to handle concat([tz1, tz2]) here, since DatetimeArray + # only handles cases where all the tzs are the same. + # Instead of placing the condition here, it could also go into the + # is_uniform_join_units check, but I'm not sure what is better. + if len({x.dtype for x in to_concat}) > 1: + values = concat_datetime([x.values for x in to_concat]) + placement = placement or slice(0, len(values), 1) + + if self.ndim > 1: + values = np.atleast_2d(values) + return ObjectBlock(values, ndim=self.ndim, placement=placement) + return super().concat_same_type(to_concat, placement) + + def fillna(self, value, limit=None, inplace=False, downcast=None): + # We support filling a DatetimeTZ with a `value` whose timezone + # is different by coercing to object. + if self._can_hold_element(value): + return super().fillna(value, limit, inplace, downcast) + + # different timezones, or a non-tz + return self.astype(object).fillna( + value, limit=limit, inplace=inplace, downcast=downcast + ) + + def setitem(self, indexer, value): + # https://github.com/pandas-dev/pandas/issues/24020 + # Need a dedicated setitem until #24020 (type promotion in setitem + # for extension arrays) is designed and implemented. + if self._can_hold_element(value) or ( + isinstance(indexer, np.ndarray) and indexer.size == 0 + ): + return super().setitem(indexer, value) + + obj_vals = self.values.astype(object) + newb = make_block( + obj_vals, placement=self.mgr_locs, klass=ObjectBlock, ndim=self.ndim + ) + return newb.setitem(indexer, value) + + def equals(self, other) -> bool: + # override for significant performance improvement + if self.dtype != other.dtype or self.shape != other.shape: + return False + return (self.values.view("i8") == other.values.view("i8")).all() + + def quantile(self, qs, interpolation="linear", axis=0): + naive = self.values.view("M8[ns]") + + # kludge for 2D block with 1D values + naive = naive.reshape(self.shape) + + blk = self.make_block(naive) + res_blk = blk.quantile(qs, interpolation=interpolation, axis=axis) + + # ravel is kludge for 2D block with 1D values, assumes column-like + aware = self._holder(res_blk.values.ravel(), dtype=self.dtype) + return self.make_block_same_class(aware, ndim=res_blk.ndim) + + +class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): + __slots__ = () + is_timedelta = True + _can_hold_na = True + is_numeric = False + fill_value = np.timedelta64("NaT", "ns") + + def __init__(self, values, placement, ndim=None): + if values.dtype != _TD_DTYPE: + values = conversion.ensure_timedelta64ns(values) + if isinstance(values, TimedeltaArray): + values = values._data + assert isinstance(values, np.ndarray), type(values) + super().__init__(values, placement=placement, ndim=ndim) + + @property + def _holder(self): + return TimedeltaArray + + def _can_hold_element(self, element: Any) -> bool: + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return issubclass(tipo.type, np.timedelta64) + elif element is NaT: + return True + elif isinstance(element, (timedelta, np.timedelta64)): + return True + return is_valid_nat_for_dtype(element, self.dtype) + + def fillna(self, value, **kwargs): + + # allow filling with integers to be + # interpreted as nanoseconds + if is_integer(value): + # Deprecation GH#24694, GH#19233 + raise TypeError( + "Passing integers to fillna for timedelta64[ns] dtype is no " + "longer supported. To obtain the old behavior, pass " + "`pd.Timedelta(seconds=n)` instead." + ) + return super().fillna(value, **kwargs) + + def should_store(self, value): + return issubclass( + value.dtype.type, np.timedelta64 + ) and not is_extension_array_dtype(value) + + def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs): + """ convert to our native types format, slicing if desired """ + + values = self.values + if slicer is not None: + values = values[:, slicer] + mask = isna(values) + + rvalues = np.empty(values.shape, dtype=object) + if na_rep is None: + na_rep = "NaT" + rvalues[mask] = na_rep + imask = (~mask).ravel() + + # FIXME: + # should use the formats.format.Timedelta64Formatter here + # to figure what format to pass to the Timedelta + # e.g. to not show the decimals say + rvalues.flat[imask] = np.array( + [Timedelta(val)._repr_base(format="all") for val in values.ravel()[imask]], + dtype=object, + ) + return rvalues + + def external_values(self, dtype=None): + return np.asarray(self.values.astype("timedelta64[ns]", copy=False)) + + def array_values(self) -> ExtensionArray: + return TimedeltaArray._simple_new(self.values) + + +class BoolBlock(NumericBlock): + __slots__ = () + is_bool = True + _can_hold_na = False + + def _can_hold_element(self, element: Any) -> bool: + tipo = maybe_infer_dtype_type(element) + if tipo is not None: + return issubclass(tipo.type, np.bool_) + return isinstance(element, (bool, np.bool_)) + + def should_store(self, value): + return issubclass(value.dtype.type, np.bool_) and not is_extension_array_dtype( + value + ) + + def replace( + self, to_replace, value, inplace=False, filter=None, regex=False, convert=True + ): + inplace = validate_bool_kwarg(inplace, "inplace") + to_replace_values = np.atleast_1d(to_replace) + if not np.can_cast(to_replace_values, bool): + return self + return super().replace( + to_replace, + value, + inplace=inplace, + filter=filter, + regex=regex, + convert=convert, + ) + + +class ObjectBlock(Block): + __slots__ = () + is_object = True + _can_hold_na = True + + def __init__(self, values, placement=None, ndim=2): + if issubclass(values.dtype.type, str): + values = np.array(values, dtype=object) + + super().__init__(values, ndim=ndim, placement=placement) + + @property + def is_bool(self): + """ we can be a bool if we have only bool values but are of type + object + """ + return lib.is_bool_array(self.values.ravel()) + + def convert( + self, + copy: bool = True, + datetime: bool = True, + numeric: bool = True, + timedelta: bool = True, + coerce: bool = False, + ): + """ attempt to coerce any object types to better types return a copy of + the block (if copy = True) by definition we ARE an ObjectBlock!!!!! + + can return multiple blocks! + """ + + # operate column-by-column + def f(mask, val, idx): + shape = val.shape + values = soft_convert_objects( + val.ravel(), + datetime=datetime, + numeric=numeric, + timedelta=timedelta, + coerce=coerce, + copy=copy, + ) + if isinstance(values, np.ndarray): + # TODO: allow EA once reshape is supported + values = values.reshape(shape) + + values = _block_shape(values, ndim=self.ndim) + return values + + if self.ndim == 2: + blocks = self.split_and_operate(None, f, False) + else: + values = f(None, self.values.ravel(), None) + blocks = [make_block(values, ndim=self.ndim, placement=self.mgr_locs)] + + return blocks + + def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]: + + if downcast is not None: + return blocks + + # split and convert the blocks + return _extend_blocks([b.convert(datetime=True, numeric=False) for b in blocks]) + + def _can_hold_element(self, element: Any) -> bool: + return True + + def should_store(self, value): + return not ( + issubclass( + value.dtype.type, + (np.integer, np.floating, np.complexfloating, np.datetime64, np.bool_), + ) + or is_extension_array_dtype(value) + ) + + def replace( + self, to_replace, value, inplace=False, filter=None, regex=False, convert=True + ): + to_rep_is_list = is_list_like(to_replace) + value_is_list = is_list_like(value) + both_lists = to_rep_is_list and value_is_list + either_list = to_rep_is_list or value_is_list + + result_blocks = [] + blocks = [self] + + if not either_list and is_re(to_replace): + return self._replace_single( + to_replace, + value, + inplace=inplace, + filter=filter, + regex=True, + convert=convert, + ) + elif not (either_list or regex): + return super().replace( + to_replace, + value, + inplace=inplace, + filter=filter, + regex=regex, + convert=convert, + ) + elif both_lists: + for to_rep, v in zip(to_replace, value): + result_blocks = [] + for b in blocks: + result = b._replace_single( + to_rep, + v, + inplace=inplace, + filter=filter, + regex=regex, + convert=convert, + ) + result_blocks = _extend_blocks(result, result_blocks) + blocks = result_blocks + return result_blocks + + elif to_rep_is_list and regex: + for to_rep in to_replace: + result_blocks = [] + for b in blocks: + result = b._replace_single( + to_rep, + value, + inplace=inplace, + filter=filter, + regex=regex, + convert=convert, + ) + result_blocks = _extend_blocks(result, result_blocks) + blocks = result_blocks + return result_blocks + + return self._replace_single( + to_replace, + value, + inplace=inplace, + filter=filter, + convert=convert, + regex=regex, + ) + + def _replace_single( + self, + to_replace, + value, + inplace=False, + filter=None, + regex=False, + convert=True, + mask=None, + ): + """ + Replace elements by the given value. + + Parameters + ---------- + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + inplace : bool, default False + Perform inplace modification. + filter : list, optional + regex : bool, default False + If true, perform regular expression substitution. + convert : bool, default True + If true, try to coerce any object types to better types. + mask : array-like of bool, optional + True indicate corresponding element is ignored. + + Returns + ------- + a new block, the result after replacing + """ + inplace = validate_bool_kwarg(inplace, "inplace") + + # to_replace is regex compilable + to_rep_re = regex and is_re_compilable(to_replace) + + # regex is regex compilable + regex_re = is_re_compilable(regex) + + # only one will survive + if to_rep_re and regex_re: + raise AssertionError( + "only one of to_replace and regex can be regex compilable" + ) + + # if regex was passed as something that can be a regex (rather than a + # boolean) + if regex_re: + to_replace = regex + + regex = regex_re or to_rep_re + + # try to get the pattern attribute (compiled re) or it's a string + if is_re(to_replace): + pattern = to_replace.pattern + else: + pattern = to_replace + + # if the pattern is not empty and to_replace is either a string or a + # regex + if regex and pattern: + rx = re.compile(to_replace) + else: + # if the thing to replace is not a string or compiled regex call + # the superclass method -> to_replace is some kind of object + return super().replace( + to_replace, value, inplace=inplace, filter=filter, regex=regex + ) + + new_values = self.values if inplace else self.values.copy() + + # deal with replacing values with objects (strings) that match but + # whose replacement is not a string (numeric, nan, object) + if isna(value) or not isinstance(value, str): + + def re_replacer(s): + if is_re(rx) and isinstance(s, str): + return value if rx.search(s) is not None else s + else: + return s + + else: + # value is guaranteed to be a string here, s can be either a string + # or null if it's null it gets returned + def re_replacer(s): + if is_re(rx) and isinstance(s, str): + return rx.sub(value, s) + else: + return s + + f = np.vectorize(re_replacer, otypes=[self.dtype]) + + if filter is None: + filt = slice(None) + else: + filt = self.mgr_locs.isin(filter).nonzero()[0] + + if mask is None: + new_values[filt] = f(new_values[filt]) + else: + new_values[filt][mask] = f(new_values[filt][mask]) + + # convert + block = self.make_block(new_values) + if convert: + block = block.convert(numeric=False) + return block + + def _replace_coerce( + self, to_replace, value, inplace=True, regex=False, convert=False, mask=None + ): + """ + Replace value corresponding to the given boolean array with another + value. + + Parameters + ---------- + to_replace : object or pattern + Scalar to replace or regular expression to match. + value : object + Replacement object. + inplace : bool, default False + Perform inplace modification. + regex : bool, default False + If true, perform regular expression substitution. + convert : bool, default True + If true, try to coerce any object types to better types. + mask : array-like of bool, optional + True indicate corresponding element is ignored. + + Returns + ------- + A new block if there is anything to replace or the original block. + """ + if mask.any(): + block = super()._replace_coerce( + to_replace=to_replace, + value=value, + inplace=inplace, + regex=regex, + convert=convert, + mask=mask, + ) + if convert: + block = [b.convert(numeric=False, copy=True) for b in block] + return block + if convert: + return [self.convert(numeric=False, copy=True)] + return self + + +class CategoricalBlock(ExtensionBlock): + __slots__ = () + is_categorical = True + _verify_integrity = True + _can_hold_na = True + _concatenator = staticmethod(concat_categorical) + + def __init__(self, values, placement, ndim=None): + # coerce to categorical if we can + values = extract_array(values) + assert isinstance(values, Categorical), type(values) + super().__init__(values, placement=placement, ndim=ndim) + + @property + def _holder(self): + return Categorical + + @property + def array_dtype(self): + """ the dtype to return if I want to construct this block as an + array + """ + return np.object_ + + def to_dense(self): + # Categorical.get_values returns a DatetimeIndex for datetime + # categories, so we can't simply use `np.asarray(self.values)` like + # other types. + return self.values._internal_get_values() + + def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs): + """ convert to our native types format, slicing if desired """ + + values = self.values + if slicer is not None: + # Categorical is always one dimension + values = values[slicer] + mask = isna(values) + values = np.array(values, dtype="object") + values[mask] = na_rep + + # we are expected to return a 2-d ndarray + return values.reshape(1, len(values)) + + def concat_same_type(self, to_concat, placement=None): + """ + Concatenate list of single blocks of the same type. + + Note that this CategoricalBlock._concat_same_type *may* not + return a CategoricalBlock. When the categories in `to_concat` + differ, this will return an object ndarray. + + If / when we decide we don't like that behavior: + + 1. Change Categorical._concat_same_type to use union_categoricals + 2. Delete this method. + """ + values = self._concatenator( + [blk.values for blk in to_concat], axis=self.ndim - 1 + ) + # not using self.make_block_same_class as values can be object dtype + return make_block( + values, placement=placement or slice(0, len(values), 1), ndim=self.ndim + ) + + def replace( + self, + to_replace, + value, + inplace: bool = False, + filter=None, + regex: bool = False, + convert: bool = True, + ): + inplace = validate_bool_kwarg(inplace, "inplace") + result = self if inplace else self.copy() + if filter is None: # replace was called on a series + result.values.replace(to_replace, value, inplace=True) + if convert: + return result.convert(numeric=False, copy=not inplace) + else: + return result + else: # replace was called on a DataFrame + if not isna(value): + result.values.add_categories(value, inplace=True) + return super(CategoricalBlock, result).replace( + to_replace, value, inplace, filter, regex, convert + ) + + +# ----------------------------------------------------------------- +# Constructor Helpers + + +def get_block_type(values, dtype=None): + """ + Find the appropriate Block subclass to use for the given values and dtype. + + Parameters + ---------- + values : ndarray-like + dtype : numpy or pandas dtype + + Returns + ------- + cls : class, subclass of Block + """ + dtype = dtype or values.dtype + vtype = dtype.type + + if is_sparse(dtype): + # Need this first(ish) so that Sparse[datetime] is sparse + cls = ExtensionBlock + elif is_categorical(values): + cls = CategoricalBlock + elif issubclass(vtype, np.datetime64): + assert not is_datetime64tz_dtype(values) + cls = DatetimeBlock + elif is_datetime64tz_dtype(values): + cls = DatetimeTZBlock + elif is_interval_dtype(dtype) or is_period_dtype(dtype): + cls = ObjectValuesExtensionBlock + elif is_extension_array_dtype(values): + cls = ExtensionBlock + elif issubclass(vtype, np.floating): + cls = FloatBlock + elif issubclass(vtype, np.timedelta64): + assert issubclass(vtype, np.integer) + cls = TimeDeltaBlock + elif issubclass(vtype, np.complexfloating): + cls = ComplexBlock + elif issubclass(vtype, np.integer): + cls = IntBlock + elif dtype == np.bool_: + cls = BoolBlock + else: + cls = ObjectBlock + return cls + + +def make_block(values, placement, klass=None, ndim=None, dtype=None): + # Ensure that we don't allow PandasArray / PandasDtype in internals. + # For now, blocks should be backed by ndarrays when possible. + if isinstance(values, ABCPandasArray): + values = values.to_numpy() + if ndim and ndim > 1: + values = np.atleast_2d(values) + + if isinstance(dtype, PandasDtype): + dtype = dtype.numpy_dtype + + if klass is None: + dtype = dtype or values.dtype + klass = get_block_type(values, dtype) + + elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values): + # TODO: This is no longer hit internally; does it need to be retained + # for e.g. pyarrow? + values = DatetimeArray._simple_new(values, dtype=dtype) + + return klass(values, ndim=ndim, placement=placement) + + +# ----------------------------------------------------------------- + + +def _extend_blocks(result, blocks=None): + """ return a new extended blocks, given the result """ + from pandas.core.internals import BlockManager + + if blocks is None: + blocks = [] + if isinstance(result, list): + for r in result: + if isinstance(r, list): + blocks.extend(r) + else: + blocks.append(r) + elif isinstance(result, BlockManager): + blocks.extend(result.blocks) + else: + blocks.append(result) + return blocks + + +def _block_shape(values, ndim=1, shape=None): + """ guarantee the shape of the values to be at least 1 d """ + if values.ndim < ndim: + if shape is None: + shape = values.shape + if not is_extension_array_dtype(values): + # TODO: https://github.com/pandas-dev/pandas/issues/23023 + # block.shape is incorrect for "2D" ExtensionArrays + # We can't, and don't need to, reshape. + values = values.reshape(tuple((1,) + shape)) + return values + + +def _merge_blocks(blocks, dtype=None, _can_consolidate=True): + + if len(blocks) == 1: + return blocks[0] + + if _can_consolidate: + + if dtype is None: + if len({b.dtype for b in blocks}) != 1: + raise AssertionError("_merge_blocks are invalid!") + + # FIXME: optimization potential in case all mgrs contain slices and + # combination of those slices is a slice, too. + new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) + new_values = np.vstack([b.values for b in blocks]) + + argsort = np.argsort(new_mgr_locs) + new_values = new_values[argsort] + new_mgr_locs = new_mgr_locs[argsort] + + return make_block(new_values, placement=new_mgr_locs) + + # no merge + return blocks + + +def _safe_reshape(arr, new_shape): + """ + If possible, reshape `arr` to have shape `new_shape`, + with a couple of exceptions (see gh-13012): + + 1) If `arr` is a ExtensionArray or Index, `arr` will be + returned as is. + 2) If `arr` is a Series, the `_values` attribute will + be reshaped and returned. + + Parameters + ---------- + arr : array-like, object to be reshaped + new_shape : int or tuple of ints, the new shape + """ + if isinstance(arr, ABCSeries): + arr = arr._values + if not isinstance(arr, ABCExtensionArray): + arr = arr.reshape(new_shape) + return arr + + +def _putmask_smart(v, mask, n): + """ + Return a new ndarray, try to preserve dtype if possible. + + Parameters + ---------- + v : `values`, updated in-place (array like) + mask : np.ndarray + Applies to both sides (array like). + n : `new values` either scalar or an array like aligned with `values` + + Returns + ------- + values : ndarray with updated values + this *may* be a copy of the original + + See Also + -------- + ndarray.putmask + """ + + # we cannot use np.asarray() here as we cannot have conversions + # that numpy does when numeric are mixed with strings + + # n should be the length of the mask or a scalar here + if not is_list_like(n): + n = np.repeat(n, len(mask)) + + # see if we are only masking values that if putted + # will work in the current dtype + try: + nn = n[mask] + except TypeError: + # TypeError: only integer scalar arrays can be converted to a scalar index + pass + else: + # make sure that we have a nullable type + # if we have nulls + if not _isna_compat(v, nn[0]): + pass + elif not (is_float_dtype(nn.dtype) or is_integer_dtype(nn.dtype)): + # only compare integers/floats + pass + elif not (is_float_dtype(v.dtype) or is_integer_dtype(v.dtype)): + # only compare integers/floats + pass + else: + + # we ignore ComplexWarning here + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", np.ComplexWarning) + nn_at = nn.astype(v.dtype) + + comp = nn == nn_at + if is_list_like(comp) and comp.all(): + nv = v.copy() + nv[mask] = nn_at + return nv + + n = np.asarray(n) + + def _putmask_preserve(nv, n): + try: + nv[mask] = n[mask] + except (IndexError, ValueError): + nv[mask] = n + return nv + + # preserves dtype if possible + if v.dtype.kind == n.dtype.kind: + return _putmask_preserve(v, n) + + # change the dtype if needed + dtype, _ = maybe_promote(n.dtype) + + if is_extension_array_dtype(v.dtype) and is_object_dtype(dtype): + v = v._internal_get_values(dtype) + else: + v = v.astype(dtype) + + return _putmask_preserve(v, n) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py new file mode 100644 index 00000000..c75373b8 --- /dev/null +++ b/pandas/core/internals/concat.py @@ -0,0 +1,493 @@ +# TODO: Needs a better name; too many modules are already called "concat" +from collections import defaultdict +import copy + +import numpy as np + +from pandas._libs import internals as libinternals, tslibs +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.common import ( + _get_dtype, + is_categorical_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_extension_array_dtype, + is_float_dtype, + is_numeric_dtype, + is_sparse, + is_timedelta64_dtype, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.missing import isna + +import pandas.core.algorithms as algos + + +def get_mgr_concatenation_plan(mgr, indexers): + """ + Construct concatenation plan for given block manager and indexers. + + Parameters + ---------- + mgr : BlockManager + indexers : dict of {axis: indexer} + + Returns + ------- + plan : list of (BlockPlacement, JoinUnit) tuples + + """ + # Calculate post-reindex shape , save for item axis which will be separate + # for each block anyway. + mgr_shape = list(mgr.shape) + for ax, indexer in indexers.items(): + mgr_shape[ax] = len(indexer) + mgr_shape = tuple(mgr_shape) + + if 0 in indexers: + ax0_indexer = indexers.pop(0) + blknos = algos.take_1d(mgr._blknos, ax0_indexer, fill_value=-1) + blklocs = algos.take_1d(mgr._blklocs, ax0_indexer, fill_value=-1) + else: + + if mgr._is_single_block: + blk = mgr.blocks[0] + return [(blk.mgr_locs, JoinUnit(blk, mgr_shape, indexers))] + + ax0_indexer = None + blknos = mgr._blknos + blklocs = mgr._blklocs + + plan = [] + for blkno, placements in libinternals.get_blkno_placements(blknos, group=False): + + assert placements.is_slice_like + + join_unit_indexers = indexers.copy() + + shape = list(mgr_shape) + shape[0] = len(placements) + shape = tuple(shape) + + if blkno == -1: + unit = JoinUnit(None, shape) + else: + blk = mgr.blocks[blkno] + ax0_blk_indexer = blklocs[placements.indexer] + + unit_no_ax0_reindexing = ( + len(placements) == len(blk.mgr_locs) + and + # Fastpath detection of join unit not + # needing to reindex its block: no ax0 + # reindexing took place and block + # placement was sequential before. + ( + ( + ax0_indexer is None + and blk.mgr_locs.is_slice_like + and blk.mgr_locs.as_slice.step == 1 + ) + or + # Slow-ish detection: all indexer locs + # are sequential (and length match is + # checked above). + (np.diff(ax0_blk_indexer) == 1).all() + ) + ) + + # Omit indexer if no item reindexing is required. + if unit_no_ax0_reindexing: + join_unit_indexers.pop(0, None) + else: + join_unit_indexers[0] = ax0_blk_indexer + + unit = JoinUnit(blk, shape, join_unit_indexers) + + plan.append((placements, unit)) + + return plan + + +class JoinUnit: + def __init__(self, block, shape, indexers=None): + # Passing shape explicitly is required for cases when block is None. + if indexers is None: + indexers = {} + self.block = block + self.indexers = indexers + self.shape = shape + + def __repr__(self) -> str: + return f"{type(self).__name__}({repr(self.block)}, {self.indexers})" + + @cache_readonly + def needs_filling(self): + for indexer in self.indexers.values(): + # FIXME: cache results of indexer == -1 checks. + if (indexer == -1).any(): + return True + + return False + + @cache_readonly + def dtype(self): + if self.block is None: + raise AssertionError("Block is None, no dtype") + + if not self.needs_filling: + return self.block.dtype + else: + return _get_dtype(maybe_promote(self.block.dtype, self.block.fill_value)[0]) + + @cache_readonly + def is_na(self): + if self.block is None: + return True + + if not self.block._can_hold_na: + return False + + # Usually it's enough to check but a small fraction of values to see if + # a block is NOT null, chunks should help in such cases. 1000 value + # was chosen rather arbitrarily. + values = self.block.values + if self.block.is_categorical: + values_flat = values.categories + elif is_sparse(self.block.values.dtype): + return False + elif self.block.is_extension: + values_flat = values + else: + values_flat = values.ravel(order="K") + total_len = values_flat.shape[0] + chunk_len = max(total_len // 40, 1000) + for i in range(0, total_len, chunk_len): + if not isna(values_flat[i : i + chunk_len]).all(): + return False + + return True + + def get_reindexed_values(self, empty_dtype, upcasted_na): + if upcasted_na is None: + # No upcasting is necessary + fill_value = self.block.fill_value + values = self.block.get_values() + else: + fill_value = upcasted_na + + if self.is_na: + if getattr(self.block, "is_object", False): + # we want to avoid filling with np.nan if we are + # using None; we already know that we are all + # nulls + values = self.block.values.ravel(order="K") + if len(values) and values[0] is None: + fill_value = None + + if getattr(self.block, "is_datetimetz", False) or is_datetime64tz_dtype( + empty_dtype + ): + if self.block is None: + array = empty_dtype.construct_array_type() + return array( + np.full(self.shape[1], fill_value.value), dtype=empty_dtype + ) + elif getattr(self.block, "is_categorical", False): + pass + elif getattr(self.block, "is_extension", False): + pass + else: + missing_arr = np.empty(self.shape, dtype=empty_dtype) + missing_arr.fill(fill_value) + return missing_arr + + if not self.indexers: + if not self.block._can_consolidate: + # preserve these for validation in concat_compat + return self.block.values + + if self.block.is_bool and not self.block.is_categorical: + # External code requested filling/upcasting, bool values must + # be upcasted to object to avoid being upcasted to numeric. + values = self.block.astype(np.object_).values + elif self.block.is_extension: + values = self.block.values + else: + # No dtype upcasting is done here, it will be performed during + # concatenation itself. + values = self.block.get_values() + + if not self.indexers: + # If there's no indexing to be done, we want to signal outside + # code that this array must be copied explicitly. This is done + # by returning a view and checking `retval.base`. + values = values.view() + + else: + for ax, indexer in self.indexers.items(): + values = algos.take_nd(values, indexer, axis=ax, fill_value=fill_value) + + return values + + +def concatenate_join_units(join_units, concat_axis, copy): + """ + Concatenate values from several join units along selected axis. + """ + if concat_axis == 0 and len(join_units) > 1: + # Concatenating join units along ax0 is handled in _merge_blocks. + raise AssertionError("Concatenating join units along axis0") + + empty_dtype, upcasted_na = _get_empty_dtype_and_na(join_units) + + to_concat = [ + ju.get_reindexed_values(empty_dtype=empty_dtype, upcasted_na=upcasted_na) + for ju in join_units + ] + + if len(to_concat) == 1: + # Only one block, nothing to concatenate. + concat_values = to_concat[0] + if copy: + if isinstance(concat_values, np.ndarray): + # non-reindexed (=not yet copied) arrays are made into a view + # in JoinUnit.get_reindexed_values + if concat_values.base is not None: + concat_values = concat_values.copy() + else: + concat_values = concat_values.copy() + else: + concat_values = concat_compat(to_concat, axis=concat_axis) + + return concat_values + + +def _get_empty_dtype_and_na(join_units): + """ + Return dtype and N/A values to use when concatenating specified units. + + Returned N/A value may be None which means there was no casting involved. + + Returns + ------- + dtype + na + """ + if len(join_units) == 1: + blk = join_units[0].block + if blk is None: + return np.float64, np.nan + + if _is_uniform_reindex(join_units): + # FIXME: integrate property + empty_dtype = join_units[0].block.dtype + upcasted_na = join_units[0].block.fill_value + return empty_dtype, upcasted_na + + has_none_blocks = False + dtypes = [None] * len(join_units) + for i, unit in enumerate(join_units): + if unit.block is None: + has_none_blocks = True + else: + dtypes[i] = unit.dtype + + upcast_classes = defaultdict(list) + null_upcast_classes = defaultdict(list) + for dtype, unit in zip(dtypes, join_units): + if dtype is None: + continue + + if is_categorical_dtype(dtype): + upcast_cls = "category" + elif is_datetime64tz_dtype(dtype): + upcast_cls = "datetimetz" + elif issubclass(dtype.type, np.bool_): + upcast_cls = "bool" + elif issubclass(dtype.type, np.object_): + upcast_cls = "object" + elif is_datetime64_dtype(dtype): + upcast_cls = "datetime" + elif is_timedelta64_dtype(dtype): + upcast_cls = "timedelta" + elif is_sparse(dtype): + upcast_cls = dtype.subtype.name + elif is_extension_array_dtype(dtype): + upcast_cls = "object" + elif is_float_dtype(dtype) or is_numeric_dtype(dtype): + upcast_cls = dtype.name + else: + upcast_cls = "float" + + # Null blocks should not influence upcast class selection, unless there + # are only null blocks, when same upcasting rules must be applied to + # null upcast classes. + if unit.is_na: + null_upcast_classes[upcast_cls].append(dtype) + else: + upcast_classes[upcast_cls].append(dtype) + + if not upcast_classes: + upcast_classes = null_upcast_classes + + # TODO: de-duplicate with maybe_promote? + # create the result + if "object" in upcast_classes: + return np.dtype(np.object_), np.nan + elif "bool" in upcast_classes: + if has_none_blocks: + return np.dtype(np.object_), np.nan + else: + return np.dtype(np.bool_), None + elif "category" in upcast_classes: + return np.dtype(np.object_), np.nan + elif "datetimetz" in upcast_classes: + # GH-25014. We use NaT instead of iNaT, since this eventually + # ends up in DatetimeArray.take, which does not allow iNaT. + dtype = upcast_classes["datetimetz"] + return dtype[0], tslibs.NaT + elif "datetime" in upcast_classes: + return np.dtype("M8[ns]"), np.datetime64("NaT", "ns") + elif "timedelta" in upcast_classes: + return np.dtype("m8[ns]"), np.timedelta64("NaT", "ns") + else: # pragma + try: + g = np.find_common_type(upcast_classes, []) + except TypeError: + # At least one is an ExtensionArray + return np.dtype(np.object_), np.nan + else: + if is_float_dtype(g): + return g, g.type(np.nan) + elif is_numeric_dtype(g): + if has_none_blocks: + return np.float64, np.nan + else: + return g, None + + msg = "invalid dtype determination in get_concat_dtype" + raise AssertionError(msg) + + +def is_uniform_join_units(join_units): + """ + Check if the join units consist of blocks of uniform type that can + be concatenated using Block.concat_same_type instead of the generic + concatenate_join_units (which uses `concat_compat`). + + """ + return ( + # all blocks need to have the same type + all(type(ju.block) is type(join_units[0].block) for ju in join_units) + and # noqa + # no blocks that would get missing values (can lead to type upcasts) + # unless we're an extension dtype. + all(not ju.is_na or ju.block.is_extension for ju in join_units) + and + # no blocks with indexers (as then the dimensions do not fit) + all(not ju.indexers for ju in join_units) + and + # only use this path when there is something to concatenate + len(join_units) > 1 + ) + + +def _is_uniform_reindex(join_units) -> bool: + return ( + # TODO: should this be ju.block._can_hold_na? + all(ju.block and ju.block.is_extension for ju in join_units) + and len({ju.block.dtype.name for ju in join_units}) == 1 + ) + + +def _trim_join_unit(join_unit, length): + """ + Reduce join_unit's shape along item axis to length. + + Extra items that didn't fit are returned as a separate block. + """ + + if 0 not in join_unit.indexers: + extra_indexers = join_unit.indexers + + if join_unit.block is None: + extra_block = None + else: + extra_block = join_unit.block.getitem_block(slice(length, None)) + join_unit.block = join_unit.block.getitem_block(slice(length)) + else: + extra_block = join_unit.block + + extra_indexers = copy.copy(join_unit.indexers) + extra_indexers[0] = extra_indexers[0][length:] + join_unit.indexers[0] = join_unit.indexers[0][:length] + + extra_shape = (join_unit.shape[0] - length,) + join_unit.shape[1:] + join_unit.shape = (length,) + join_unit.shape[1:] + + return JoinUnit(block=extra_block, indexers=extra_indexers, shape=extra_shape) + + +def combine_concat_plans(plans, concat_axis): + """ + Combine multiple concatenation plans into one. + + existing_plan is updated in-place. + """ + if len(plans) == 1: + for p in plans[0]: + yield p[0], [p[1]] + + elif concat_axis == 0: + offset = 0 + for plan in plans: + last_plc = None + + for plc, unit in plan: + yield plc.add(offset), [unit] + last_plc = plc + + if last_plc is not None: + offset += last_plc.as_slice.stop + + else: + num_ended = [0] + + def _next_or_none(seq): + retval = next(seq, None) + if retval is None: + num_ended[0] += 1 + return retval + + plans = list(map(iter, plans)) + next_items = list(map(_next_or_none, plans)) + + while num_ended[0] != len(next_items): + if num_ended[0] > 0: + raise ValueError("Plan shapes are not aligned") + + placements, units = zip(*next_items) + + lengths = list(map(len, placements)) + min_len, max_len = min(lengths), max(lengths) + + if min_len == max_len: + yield placements[0], units + next_items[:] = map(_next_or_none, plans) + else: + yielded_placement = None + yielded_units = [None] * len(next_items) + for i, (plc, unit) in enumerate(next_items): + yielded_units[i] = unit + if len(plc) > min_len: + # _trim_join_unit updates unit in place, so only + # placement needs to be sliced to skip min_len. + next_items[i] = (plc[min_len:], _trim_join_unit(unit, min_len)) + else: + yielded_placement = plc + next_items[i] = _next_or_none(plans[i]) + + yield yielded_placement, yielded_units diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py new file mode 100644 index 00000000..0856f653 --- /dev/null +++ b/pandas/core/internals/construction.py @@ -0,0 +1,626 @@ +""" +Functions for preparing various inputs passed to the DataFrame or Series +constructors before passing them to a BlockManager. +""" +from collections import abc + +import numpy as np +import numpy.ma as ma + +from pandas._libs import lib + +from pandas.core.dtypes.cast import ( + construct_1d_arraylike_from_scalar, + maybe_cast_to_datetime, + maybe_convert_platform, + maybe_infer_to_datetimelike, + maybe_upcast, +) +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_integer_dtype, + is_list_like, + is_object_dtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCDatetimeIndex, + ABCIndexClass, + ABCPeriodIndex, + ABCSeries, + ABCTimedeltaIndex, +) + +from pandas.core import algorithms, common as com +from pandas.core.arrays import Categorical +from pandas.core.construction import sanitize_array +from pandas.core.indexes import base as ibase +from pandas.core.indexes.api import ( + Index, + ensure_index, + get_objs_combined_axis, + union_indexes, +) +from pandas.core.internals import ( + create_block_manager_from_arrays, + create_block_manager_from_blocks, +) + +# --------------------------------------------------------------------- +# BlockManager Interface + + +def arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): + """ + Segregate Series based on type and coerce into matrices. + + Needs to handle a lot of exceptional cases. + """ + # figure out the index, if necessary + if index is None: + index = extract_index(arrays) + else: + index = ensure_index(index) + + # don't force copy because getting jammed in an ndarray anyway + arrays = _homogenize(arrays, index, dtype) + + # from BlockManager perspective + axes = [ensure_index(columns), index] + + return create_block_manager_from_arrays(arrays, arr_names, axes) + + +def masked_rec_array_to_mgr(data, index, columns, dtype, copy): + """ + Extract from a masked rec array and create the manager. + """ + + # essentially process a record array then fill it + fill_value = data.fill_value + fdata = ma.getdata(data) + if index is None: + index = get_names_from_index(fdata) + if index is None: + index = ibase.default_index(len(data)) + index = ensure_index(index) + + if columns is not None: + columns = ensure_index(columns) + arrays, arr_columns = to_arrays(fdata, columns) + + # fill if needed + new_arrays = [] + for fv, arr, col in zip(fill_value, arrays, arr_columns): + # TODO: numpy docs suggest fv must be scalar, but could it be + # non-scalar for object dtype? + assert lib.is_scalar(fv), fv + mask = ma.getmaskarray(data[col]) + if mask.any(): + arr, fv = maybe_upcast(arr, fill_value=fv, copy=True) + arr[mask] = fv + new_arrays.append(arr) + + # create the manager + arrays, arr_columns = reorder_arrays(new_arrays, arr_columns, columns) + if columns is None: + columns = arr_columns + + mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype) + + if copy: + mgr = mgr.copy() + return mgr + + +# --------------------------------------------------------------------- +# DataFrame Constructor Interface + + +def init_ndarray(values, index, columns, dtype=None, copy=False): + # input must be a ndarray, list, Series, index + + if isinstance(values, ABCSeries): + if columns is None: + if values.name is not None: + columns = [values.name] + if index is None: + index = values.index + else: + values = values.reindex(index) + + # zero len case (GH #2234) + if not len(values) and columns is not None and len(columns): + values = np.empty((0, 1), dtype=object) + + # we could have a categorical type passed or coerced to 'category' + # recast this to an arrays_to_mgr + if is_categorical_dtype(getattr(values, "dtype", None)) or is_categorical_dtype( + dtype + ): + + if not hasattr(values, "dtype"): + values = prep_ndarray(values, copy=copy) + values = values.ravel() + elif copy: + values = values.copy() + + index, columns = _get_axes(len(values), 1, index, columns) + return arrays_to_mgr([values], columns, index, columns, dtype=dtype) + elif is_extension_array_dtype(values) or is_extension_array_dtype(dtype): + # GH#19157 + + if isinstance(values, np.ndarray) and values.ndim > 1: + # GH#12513 a EA dtype passed with a 2D array, split into + # multiple EAs that view the values + values = [values[:, n] for n in range(values.shape[1])] + else: + values = [values] + + if columns is None: + columns = list(range(len(values))) + return arrays_to_mgr(values, columns, index, columns, dtype=dtype) + + # by definition an array here + # the dtypes will be coerced to a single dtype + values = prep_ndarray(values, copy=copy) + + if dtype is not None: + if not is_dtype_equal(values.dtype, dtype): + try: + values = values.astype(dtype) + except Exception as orig: + # e.g. ValueError when trying to cast object dtype to float64 + raise ValueError( + f"failed to cast to '{dtype}' (Exception was: {orig})" + ) from orig + + index, columns = _get_axes(*values.shape, index=index, columns=columns) + values = values.T + + # if we don't have a dtype specified, then try to convert objects + # on the entire block; this is to convert if we have datetimelike's + # embedded in an object type + if dtype is None and is_object_dtype(values): + + if values.ndim == 2 and values.shape[0] != 1: + # transpose and separate blocks + + dvals_list = [maybe_infer_to_datetimelike(row) for row in values] + for n in range(len(dvals_list)): + if isinstance(dvals_list[n], np.ndarray): + dvals_list[n] = dvals_list[n].reshape(1, -1) + + from pandas.core.internals.blocks import make_block + + # TODO: What about re-joining object columns? + block_values = [ + make_block(dvals_list[n], placement=[n]) for n in range(len(dvals_list)) + ] + + else: + datelike_vals = maybe_infer_to_datetimelike(values) + block_values = [datelike_vals] + else: + block_values = [values] + + return create_block_manager_from_blocks(block_values, [columns, index]) + + +def init_dict(data, index, columns, dtype=None): + """ + Segregate Series based on type and coerce into matrices. + Needs to handle a lot of exceptional cases. + """ + if columns is not None: + from pandas.core.series import Series + + arrays = Series(data, index=columns, dtype=object) + data_names = arrays.index + + missing = arrays.isna() + if index is None: + # GH10856 + # raise ValueError if only scalars in dict + index = extract_index(arrays[~missing]) + else: + index = ensure_index(index) + + # no obvious "empty" int column + if missing.any() and not is_integer_dtype(dtype): + if dtype is None or np.issubdtype(dtype, np.flexible): + # GH#1783 + nan_dtype = np.dtype(object) + else: + nan_dtype = dtype + val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) + arrays.loc[missing] = [val] * missing.sum() + + else: + keys = list(data.keys()) + columns = data_names = Index(keys) + arrays = (com.maybe_iterable_to_list(data[k]) for k in keys) + # GH#24096 need copy to be deep for datetime64tz case + # TODO: See if we can avoid these copies + arrays = [ + arr if not isinstance(arr, ABCIndexClass) else arr._data for arr in arrays + ] + arrays = [ + arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays + ] + return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype) + + +# --------------------------------------------------------------------- + + +def prep_ndarray(values, copy=True) -> np.ndarray: + if not isinstance(values, (np.ndarray, ABCSeries, Index)): + if len(values) == 0: + return np.empty((0, 0), dtype=object) + elif isinstance(values, range): + arr = np.arange(values.start, values.stop, values.step, dtype="int64") + return arr[..., np.newaxis] + + def convert(v): + return maybe_convert_platform(v) + + # we could have a 1-dim or 2-dim list here + # this is equiv of np.asarray, but does object conversion + # and platform dtype preservation + try: + if is_list_like(values[0]) or hasattr(values[0], "len"): + values = np.array([convert(v) for v in values]) + elif isinstance(values[0], np.ndarray) and values[0].ndim == 0: + # GH#21861 + values = np.array([convert(v) for v in values]) + else: + values = convert(values) + except (ValueError, TypeError): + values = convert(values) + + else: + + # drop subclass info, do not copy data + values = np.asarray(values) + if copy: + values = values.copy() + + if values.ndim == 1: + values = values.reshape((values.shape[0], 1)) + elif values.ndim != 2: + raise ValueError("Must pass 2-d input") + + return values + + +def _homogenize(data, index, dtype=None): + oindex = None + homogenized = [] + + for val in data: + if isinstance(val, ABCSeries): + if dtype is not None: + val = val.astype(dtype) + if val.index is not index: + # Forces alignment. No need to copy data since we + # are putting it into an ndarray later + val = val.reindex(index, copy=False) + else: + if isinstance(val, dict): + if oindex is None: + oindex = index.astype("O") + + if isinstance(index, (ABCDatetimeIndex, ABCTimedeltaIndex)): + val = com.dict_compat(val) + else: + val = dict(val) + val = lib.fast_multiget(val, oindex.values, default=np.nan) + val = sanitize_array( + val, index, dtype=dtype, copy=False, raise_cast_failure=False + ) + + homogenized.append(val) + + return homogenized + + +def extract_index(data): + index = None + if len(data) == 0: + index = Index([]) + elif len(data) > 0: + raw_lengths = [] + indexes = [] + + have_raw_arrays = False + have_series = False + have_dicts = False + + for val in data: + if isinstance(val, ABCSeries): + have_series = True + indexes.append(val.index) + elif isinstance(val, dict): + have_dicts = True + indexes.append(list(val.keys())) + elif is_list_like(val) and getattr(val, "ndim", 1) == 1: + have_raw_arrays = True + raw_lengths.append(len(val)) + + if not indexes and not raw_lengths: + raise ValueError("If using all scalar values, you must pass an index") + + if have_series: + index = union_indexes(indexes) + elif have_dicts: + index = union_indexes(indexes, sort=False) + + if have_raw_arrays: + lengths = list(set(raw_lengths)) + if len(lengths) > 1: + raise ValueError("arrays must all be same length") + + if have_dicts: + raise ValueError( + "Mixing dicts with non-Series may lead to ambiguous ordering." + ) + + if have_series: + if lengths[0] != len(index): + msg = ( + f"array length {lengths[0]} does not match index " + f"length {len(index)}" + ) + raise ValueError(msg) + else: + index = ibase.default_index(lengths[0]) + + return ensure_index(index) + + +def reorder_arrays(arrays, arr_columns, columns): + # reorder according to the columns + if ( + columns is not None + and len(columns) + and arr_columns is not None + and len(arr_columns) + ): + indexer = ensure_index(arr_columns).get_indexer(columns) + arr_columns = ensure_index([arr_columns[i] for i in indexer]) + arrays = [arrays[i] for i in indexer] + return arrays, arr_columns + + +def get_names_from_index(data): + has_some_name = any(getattr(s, "name", None) is not None for s in data) + if not has_some_name: + return ibase.default_index(len(data)) + + index = list(range(len(data))) + count = 0 + for i, s in enumerate(data): + n = getattr(s, "name", None) + if n is not None: + index[i] = n + else: + index[i] = f"Unnamed {count}" + count += 1 + + return index + + +def _get_axes(N, K, index, columns): + # helper to create the axes as indexes + # return axes or defaults + + if index is None: + index = ibase.default_index(N) + else: + index = ensure_index(index) + + if columns is None: + columns = ibase.default_index(K) + else: + columns = ensure_index(columns) + return index, columns + + +# --------------------------------------------------------------------- +# Conversion of Inputs to Arrays + + +def to_arrays(data, columns, coerce_float=False, dtype=None): + """ + Return list of arrays, columns. + """ + if isinstance(data, ABCDataFrame): + if columns is not None: + arrays = [ + data._ixs(i, axis=1).values + for i, col in enumerate(data.columns) + if col in columns + ] + else: + columns = data.columns + arrays = [data._ixs(i, axis=1).values for i in range(len(columns))] + + return arrays, columns + + if not len(data): + if isinstance(data, np.ndarray): + columns = data.dtype.names + if columns is not None: + return [[]] * len(columns), columns + return [], [] # columns if columns is not None else [] + if isinstance(data[0], (list, tuple)): + return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype) + elif isinstance(data[0], abc.Mapping): + return _list_of_dict_to_arrays( + data, columns, coerce_float=coerce_float, dtype=dtype + ) + elif isinstance(data[0], ABCSeries): + return _list_of_series_to_arrays( + data, columns, coerce_float=coerce_float, dtype=dtype + ) + elif isinstance(data[0], Categorical): + if columns is None: + columns = ibase.default_index(len(data)) + return data, columns + elif ( + isinstance(data, (np.ndarray, ABCSeries, Index)) + and data.dtype.names is not None + ): + + columns = list(data.dtype.names) + arrays = [data[k] for k in columns] + return arrays, columns + else: + # last ditch effort + data = [tuple(x) for x in data] + return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype) + + +def _list_to_arrays(data, columns, coerce_float=False, dtype=None): + if len(data) > 0 and isinstance(data[0], tuple): + content = list(lib.to_object_array_tuples(data).T) + else: + # list of lists + content = list(lib.to_object_array(data).T) + # gh-26429 do not raise user-facing AssertionError + try: + result = _convert_object_array( + content, columns, dtype=dtype, coerce_float=coerce_float + ) + except AssertionError as e: + raise ValueError(e) from e + return result + + +def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): + if columns is None: + # We know pass_data is non-empty because data[0] is a Series + pass_data = [x for x in data if isinstance(x, (ABCSeries, ABCDataFrame))] + columns = get_objs_combined_axis(pass_data, sort=False) + + indexer_cache = {} + + aligned_values = [] + for s in data: + index = getattr(s, "index", None) + if index is None: + index = ibase.default_index(len(s)) + + if id(index) in indexer_cache: + indexer = indexer_cache[id(index)] + else: + indexer = indexer_cache[id(index)] = index.get_indexer(columns) + + values = com.values_from_object(s) + aligned_values.append(algorithms.take_1d(values, indexer)) + + values = np.vstack(aligned_values) + + if values.dtype == np.object_: + content = list(values.T) + return _convert_object_array( + content, columns, dtype=dtype, coerce_float=coerce_float + ) + else: + return values.T, columns + + +def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): + """Convert list of dicts to numpy arrays + + if `columns` is not passed, column names are inferred from the records + - for OrderedDict and dicts, the column names match + the key insertion-order from the first record to the last. + - For other kinds of dict-likes, the keys are lexically sorted. + + Parameters + ---------- + data : iterable + collection of records (OrderedDict, dict) + columns: iterables or None + coerce_float : bool + dtype : np.dtype + + Returns + ------- + tuple + arrays, columns + """ + + if columns is None: + gen = (list(x.keys()) for x in data) + sort = not any(isinstance(d, dict) for d in data) + columns = lib.fast_unique_multiple_list_gen(gen, sort=sort) + + # assure that they are of the base dict class and not of derived + # classes + data = [(type(d) is dict) and d or dict(d) for d in data] + + content = list(lib.dicts_to_array(data, list(columns)).T) + return _convert_object_array( + content, columns, dtype=dtype, coerce_float=coerce_float + ) + + +def _convert_object_array(content, columns, coerce_float=False, dtype=None): + if columns is None: + columns = ibase.default_index(len(content)) + else: + if len(columns) != len(content): # pragma: no cover + # caller's responsibility to check for this... + raise AssertionError( + f"{len(columns)} columns passed, passed data had " + f"{len(content)} columns" + ) + + # provide soft conversion of object dtypes + def convert(arr): + if dtype != object and dtype != np.object: + arr = lib.maybe_convert_objects(arr, try_float=coerce_float) + arr = maybe_cast_to_datetime(arr, dtype) + return arr + + arrays = [convert(arr) for arr in content] + + return arrays, columns + + +# --------------------------------------------------------------------- +# Series-Based + + +def sanitize_index(data, index, copy=False): + """ + Sanitize an index type to return an ndarray of the underlying, pass + through a non-Index. + """ + + if index is None: + return data + + if len(data) != len(index): + raise ValueError("Length of values does not match length of index") + + if isinstance(data, ABCIndexClass) and not copy: + pass + elif isinstance(data, (ABCPeriodIndex, ABCDatetimeIndex)): + data = data._values + if copy: + data = data.copy() + + elif isinstance(data, np.ndarray): + + # coerce datetimelike types + if data.dtype.kind in ["M", "m"]: + data = sanitize_array(data, index, copy=copy) + + return data diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py new file mode 100644 index 00000000..67afd069 --- /dev/null +++ b/pandas/core/internals/managers.py @@ -0,0 +1,2027 @@ +from collections import defaultdict +from functools import partial +import itertools +import operator +import re +from typing import List, Optional, Sequence, Tuple, Union + +import numpy as np + +from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib +from pandas.util._validators import validate_bool_kwarg + +from pandas.core.dtypes.cast import ( + find_common_type, + infer_dtype_from_scalar, + maybe_convert_objects, + maybe_promote, +) +from pandas.core.dtypes.common import ( + _NS_DTYPE, + is_datetimelike_v_numeric, + is_extension_array_dtype, + is_list_like, + is_numeric_v_string_like, + is_scalar, + is_sparse, +) +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.dtypes import ExtensionDtype +from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries +from pandas.core.dtypes.missing import isna + +import pandas.core.algorithms as algos +from pandas.core.base import PandasObject +from pandas.core.indexers import maybe_convert_indices +from pandas.core.indexes.api import Index, MultiIndex, ensure_index +from pandas.core.internals.blocks import ( + Block, + CategoricalBlock, + DatetimeTZBlock, + ExtensionBlock, + ObjectValuesExtensionBlock, + _extend_blocks, + _merge_blocks, + _safe_reshape, + get_block_type, + make_block, +) +from pandas.core.internals.concat import ( # all for concatenate_block_managers + combine_concat_plans, + concatenate_join_units, + get_mgr_concatenation_plan, + is_uniform_join_units, +) + +from pandas.io.formats.printing import pprint_thing + +# TODO: flexible with index=None and/or items=None + + +class BlockManager(PandasObject): + """ + Core internal data structure to implement DataFrame, Series, etc. + + Manage a bunch of labeled 2D mixed-type ndarrays. Essentially it's a + lightweight blocked set of labeled data to be manipulated by the DataFrame + public API class + + Attributes + ---------- + shape + ndim + axes + values + items + + Methods + ------- + set_axis(axis, new_labels) + copy(deep=True) + + get_dtype_counts + get_dtypes + + apply(func, axes, block_filter_fn) + + get_bool_data + get_numeric_data + + get_slice(slice_like, axis) + get(label) + iget(loc) + + take(indexer, axis) + reindex_axis(new_labels, axis) + reindex_indexer(new_labels, indexer, axis) + + delete(label) + insert(loc, label, value) + set(label, value) + + Parameters + ---------- + + + Notes + ----- + This is *not* a public API class + """ + + __slots__ = [ + "axes", + "blocks", + "_ndim", + "_shape", + "_known_consolidated", + "_is_consolidated", + "_blknos", + "_blklocs", + ] + + def __init__( + self, + blocks: Sequence[Block], + axes: Sequence[Index], + do_integrity_check: bool = True, + ): + self.axes = [ensure_index(ax) for ax in axes] + self.blocks: Tuple[Block, ...] = tuple(blocks) + + for block in blocks: + if self.ndim != block.ndim: + raise AssertionError( + f"Number of Block dimensions ({block.ndim}) must equal " + f"number of axes ({self.ndim})" + ) + + if do_integrity_check: + self._verify_integrity() + + self._consolidate_check() + + self._rebuild_blknos_and_blklocs() + + def make_empty(self, axes=None): + """ return an empty BlockManager with the items axis of len 0 """ + if axes is None: + axes = [ensure_index([])] + [ensure_index(a) for a in self.axes[1:]] + + # preserve dtype if possible + if self.ndim == 1: + blocks = np.array([], dtype=self.array_dtype) + else: + blocks = [] + return type(self)(blocks, axes) + + def __nonzero__(self): + return True + + # Python3 compat + __bool__ = __nonzero__ + + @property + def shape(self): + return tuple(len(ax) for ax in self.axes) + + @property + def ndim(self) -> int: + return len(self.axes) + + def set_axis(self, axis, new_labels): + new_labels = ensure_index(new_labels) + old_len = len(self.axes[axis]) + new_len = len(new_labels) + + if new_len != old_len: + raise ValueError( + f"Length mismatch: Expected axis has {old_len} elements, new " + f"values have {new_len} elements" + ) + + self.axes[axis] = new_labels + + def rename_axis(self, mapper, axis, copy=True, level=None): + """ + Rename one of axes. + + Parameters + ---------- + mapper : unary callable + axis : int + copy : boolean, default True + level : int, default None + """ + obj = self.copy(deep=copy) + obj.set_axis(axis, _transform_index(self.axes[axis], mapper, level)) + return obj + + @property + def _is_single_block(self): + if self.ndim == 1: + return True + + if len(self.blocks) != 1: + return False + + blk = self.blocks[0] + return blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice == slice( + 0, len(self), 1 + ) + + def _rebuild_blknos_and_blklocs(self): + """ + Update mgr._blknos / mgr._blklocs. + """ + new_blknos = np.empty(self.shape[0], dtype=np.int64) + new_blklocs = np.empty(self.shape[0], dtype=np.int64) + new_blknos.fill(-1) + new_blklocs.fill(-1) + + for blkno, blk in enumerate(self.blocks): + rl = blk.mgr_locs + new_blknos[rl.indexer] = blkno + new_blklocs[rl.indexer] = np.arange(len(rl)) + + if (new_blknos == -1).any(): + raise AssertionError("Gaps in blk ref_locs") + + self._blknos = new_blknos + self._blklocs = new_blklocs + + @property + def items(self): + return self.axes[0] + + def _get_counts(self, f): + """ return a dict of the counts of the function in BlockManager """ + self._consolidate_inplace() + counts = dict() + for b in self.blocks: + v = f(b) + counts[v] = counts.get(v, 0) + b.shape[0] + return counts + + def get_dtype_counts(self): + return self._get_counts(lambda b: b.dtype.name) + + def get_dtypes(self): + dtypes = np.array([blk.dtype for blk in self.blocks]) + return algos.take_1d(dtypes, self._blknos, allow_fill=False) + + def __getstate__(self): + block_values = [b.values for b in self.blocks] + block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks] + axes_array = list(self.axes) + + extra_state = { + "0.14.1": { + "axes": axes_array, + "blocks": [ + dict(values=b.values, mgr_locs=b.mgr_locs.indexer) + for b in self.blocks + ], + } + } + + # First three elements of the state are to maintain forward + # compatibility with 0.13.1. + return axes_array, block_values, block_items, extra_state + + def __setstate__(self, state): + def unpickle_block(values, mgr_locs): + return make_block(values, placement=mgr_locs) + + if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]: + state = state[3]["0.14.1"] + self.axes = [ensure_index(ax) for ax in state["axes"]] + self.blocks = tuple( + unpickle_block(b["values"], b["mgr_locs"]) for b in state["blocks"] + ) + else: + # discard anything after 3rd, support beta pickling format for a + # little while longer + ax_arrays, bvalues, bitems = state[:3] + + self.axes = [ensure_index(ax) for ax in ax_arrays] + + if len(bitems) == 1 and self.axes[0].equals(bitems[0]): + # This is a workaround for pre-0.14.1 pickles that didn't + # support unpickling multi-block frames/panels with non-unique + # columns/items, because given a manager with items ["a", "b", + # "a"] there's no way of knowing which block's "a" is where. + # + # Single-block case can be supported under the assumption that + # block items corresponded to manager items 1-to-1. + all_mgr_locs = [slice(0, len(bitems[0]))] + else: + all_mgr_locs = [ + self.axes[0].get_indexer(blk_items) for blk_items in bitems + ] + + self.blocks = tuple( + unpickle_block(values, mgr_locs) + for values, mgr_locs in zip(bvalues, all_mgr_locs) + ) + + self._post_setstate() + + def _post_setstate(self): + self._is_consolidated = False + self._known_consolidated = False + self._rebuild_blknos_and_blklocs() + + def __len__(self) -> int: + return len(self.items) + + def __repr__(self) -> str: + output = type(self).__name__ + for i, ax in enumerate(self.axes): + if i == 0: + output += f"\nItems: {ax}" + else: + output += f"\nAxis {i}: {ax}" + + for block in self.blocks: + output += f"\n{pprint_thing(block)}" + return output + + def _verify_integrity(self): + mgr_shape = self.shape + tot_items = sum(len(x.mgr_locs) for x in self.blocks) + for block in self.blocks: + if block._verify_integrity and block.shape[1:] != mgr_shape[1:]: + construction_error(tot_items, block.shape[1:], self.axes) + if len(self.items) != tot_items: + raise AssertionError( + "Number of manager items must equal union of " + f"block items\n# manager items: {len(self.items)}, # " + f"tot_items: {tot_items}" + ) + + def reduce(self, func, *args, **kwargs): + # If 2D, we assume that we're operating column-wise + if self.ndim == 1: + # we'll be returning a scalar + blk = self.blocks[0] + return func(blk.values, *args, **kwargs) + + res = {} + for blk in self.blocks: + bres = func(blk.values, *args, **kwargs) + + if np.ndim(bres) == 0: + # EA + assert blk.shape[0] == 1 + new_res = zip(blk.mgr_locs.as_array, [bres]) + else: + assert bres.ndim == 1, bres.shape + assert blk.shape[0] == len(bres), (blk.shape, bres.shape, args, kwargs) + new_res = zip(blk.mgr_locs.as_array, bres) + + nr = dict(new_res) + assert not any(key in res for key in nr) + res.update(nr) + + return res + + def apply(self, f, filter=None, **kwargs): + """ + Iterate over the blocks, collect and create a new BlockManager. + + Parameters + ---------- + f : str or callable + Name of the Block method to apply. + filter : list, if supplied, only call the block if the filter is in + the block + + Returns + ------- + BlockManager + """ + + result_blocks = [] + + # filter kwarg is used in replace-* family of methods + if filter is not None: + filter_locs = set(self.items.get_indexer_for(filter)) + if len(filter_locs) == len(self.items): + # All items are included, as if there were no filtering + filter = None + else: + kwargs["filter"] = filter_locs + + self._consolidate_inplace() + + if f == "where": + align_copy = True + if kwargs.get("align", True): + align_keys = ["other", "cond"] + else: + align_keys = ["cond"] + elif f == "putmask": + align_copy = False + if kwargs.get("align", True): + align_keys = ["new", "mask"] + else: + align_keys = ["mask"] + elif f == "fillna": + # fillna internally does putmask, maybe it's better to do this + # at mgr, not block level? + align_copy = False + align_keys = ["value"] + else: + align_keys = [] + + # TODO(EA): may interfere with ExtensionBlock.setitem for blocks + # with a .values attribute. + aligned_args = { + k: kwargs[k] + for k in align_keys + if not isinstance(kwargs[k], ABCExtensionArray) + and hasattr(kwargs[k], "values") + } + + for b in self.blocks: + if filter is not None: + if not b.mgr_locs.isin(filter_locs).any(): + result_blocks.append(b) + continue + + if aligned_args: + b_items = self.items[b.mgr_locs.indexer] + + for k, obj in aligned_args.items(): + axis = obj._info_axis_number + kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy) + + if callable(f): + applied = b.apply(f, **kwargs) + else: + applied = getattr(b, f)(**kwargs) + result_blocks = _extend_blocks(applied, result_blocks) + + if len(result_blocks) == 0: + return self.make_empty(self.axes) + bm = type(self)(result_blocks, self.axes, do_integrity_check=False) + return bm + + def quantile( + self, + axis=0, + consolidate=True, + transposed=False, + interpolation="linear", + qs=None, + numeric_only=None, + ): + """ + Iterate over blocks applying quantile reduction. + This routine is intended for reduction type operations and + will do inference on the generated blocks. + + Parameters + ---------- + axis: reduction axis, default 0 + consolidate: boolean, default True. Join together blocks having same + dtype + transposed: boolean, default False + we are holding transposed data + interpolation : type of interpolation, default 'linear' + qs : a scalar or list of the quantiles to be computed + numeric_only : ignored + + Returns + ------- + Block Manager (new object) + """ + + # Series dispatches to DataFrame for quantile, which allows us to + # simplify some of the code here and in the blocks + assert self.ndim >= 2 + + if consolidate: + self._consolidate_inplace() + + def get_axe(block, qs, axes): + # Because Series dispatches to DataFrame, we will always have + # block.ndim == 2 + from pandas import Float64Index + + if is_list_like(qs): + ax = Float64Index(qs) + else: + ax = axes[0] + return ax + + axes, blocks = [], [] + for b in self.blocks: + block = b.quantile(axis=axis, qs=qs, interpolation=interpolation) + + axe = get_axe(b, qs, axes=self.axes) + + axes.append(axe) + blocks.append(block) + + # note that some DatetimeTZ, Categorical are always ndim==1 + ndim = {b.ndim for b in blocks} + assert 0 not in ndim, ndim + + if 2 in ndim: + + new_axes = list(self.axes) + + # multiple blocks that are reduced + if len(blocks) > 1: + new_axes[1] = axes[0] + + # reset the placement to the original + for b, sb in zip(blocks, self.blocks): + b.mgr_locs = sb.mgr_locs + + else: + new_axes[axis] = Index(np.concatenate([ax.values for ax in axes])) + + if transposed: + new_axes = new_axes[::-1] + blocks = [ + b.make_block(b.values.T, placement=np.arange(b.shape[1])) + for b in blocks + ] + + return type(self)(blocks, new_axes) + + # single block, i.e. ndim == {1} + values = concat_compat([b.values for b in blocks]) + + # compute the orderings of our original data + if len(self.blocks) > 1: + + indexer = np.empty(len(self.axes[0]), dtype=np.intp) + i = 0 + for b in self.blocks: + for j in b.mgr_locs: + indexer[j] = i + i = i + 1 + + values = values.take(indexer) + + return SingleBlockManager( + [make_block(values, ndim=1, placement=np.arange(len(values)))], axes[0] + ) + + def isna(self, func): + return self.apply("apply", func=func) + + def where(self, **kwargs): + return self.apply("where", **kwargs) + + def setitem(self, **kwargs): + return self.apply("setitem", **kwargs) + + def putmask(self, **kwargs): + return self.apply("putmask", **kwargs) + + def diff(self, **kwargs): + return self.apply("diff", **kwargs) + + def interpolate(self, **kwargs): + return self.apply("interpolate", **kwargs) + + def shift(self, **kwargs): + return self.apply("shift", **kwargs) + + def fillna(self, **kwargs): + return self.apply("fillna", **kwargs) + + def downcast(self, **kwargs): + return self.apply("downcast", **kwargs) + + def astype(self, dtype, copy: bool = False, errors: str = "raise"): + return self.apply("astype", dtype=dtype, copy=copy, errors=errors) + + def convert(self, **kwargs): + return self.apply("convert", **kwargs) + + def replace(self, value, **kwargs): + assert np.ndim(value) == 0, value + return self.apply("replace", value=value, **kwargs) + + def replace_list(self, src_list, dest_list, inplace=False, regex=False): + """ do a list replace """ + + inplace = validate_bool_kwarg(inplace, "inplace") + + # figure out our mask a-priori to avoid repeated replacements + values = self.as_array() + + def comp(s, regex=False): + """ + Generate a bool array by perform an equality check, or perform + an element-wise regular expression matching + """ + if isna(s): + return isna(values) + if isinstance(s, (Timedelta, Timestamp)) and getattr(s, "tz", None) is None: + + return _compare_or_regex_search( + maybe_convert_objects(values), s.asm8, regex + ) + return _compare_or_regex_search(values, s, regex) + + masks = [comp(s, regex) for i, s in enumerate(src_list)] + + result_blocks = [] + src_len = len(src_list) - 1 + for blk in self.blocks: + + # its possible to get multiple result blocks here + # replace ALWAYS will return a list + rb = [blk if inplace else blk.copy()] + for i, (s, d) in enumerate(zip(src_list, dest_list)): + # TODO: assert/validate that `d` is always a scalar? + new_rb = [] + for b in rb: + m = masks[i][b.mgr_locs.indexer] + convert = i == src_len + result = b._replace_coerce( + mask=m, + to_replace=s, + value=d, + inplace=inplace, + convert=convert, + regex=regex, + ) + if m.any() or convert: + new_rb = _extend_blocks(result, new_rb) + else: + new_rb.append(b) + rb = new_rb + result_blocks.extend(rb) + + bm = type(self)(result_blocks, self.axes) + bm._consolidate_inplace() + return bm + + def is_consolidated(self): + """ + Return True if more than one block with the same dtype + """ + if not self._known_consolidated: + self._consolidate_check() + return self._is_consolidated + + def _consolidate_check(self): + ftypes = [blk.ftype for blk in self.blocks] + self._is_consolidated = len(ftypes) == len(set(ftypes)) + self._known_consolidated = True + + @property + def is_mixed_type(self): + # Warning, consolidation needs to get checked upstairs + self._consolidate_inplace() + return len(self.blocks) > 1 + + @property + def is_numeric_mixed_type(self): + # Warning, consolidation needs to get checked upstairs + self._consolidate_inplace() + return all(block.is_numeric for block in self.blocks) + + @property + def is_datelike_mixed_type(self): + # Warning, consolidation needs to get checked upstairs + self._consolidate_inplace() + return any(block.is_datelike for block in self.blocks) + + @property + def any_extension_types(self): + """Whether any of the blocks in this manager are extension blocks""" + return any(block.is_extension for block in self.blocks) + + @property + def is_view(self): + """ return a boolean if we are a single block and are a view """ + if len(self.blocks) == 1: + return self.blocks[0].is_view + + # It is technically possible to figure out which blocks are views + # e.g. [ b.values.base is not None for b in self.blocks ] + # but then we have the case of possibly some blocks being a view + # and some blocks not. setting in theory is possible on the non-view + # blocks w/o causing a SettingWithCopy raise/warn. But this is a bit + # complicated + + return False + + def get_bool_data(self, copy=False): + """ + Parameters + ---------- + copy : boolean, default False + Whether to copy the blocks + """ + self._consolidate_inplace() + return self.combine([b for b in self.blocks if b.is_bool], copy) + + def get_numeric_data(self, copy=False): + """ + Parameters + ---------- + copy : boolean, default False + Whether to copy the blocks + """ + self._consolidate_inplace() + return self.combine([b for b in self.blocks if b.is_numeric], copy) + + def combine(self, blocks, copy=True): + """ return a new manager with the blocks """ + if len(blocks) == 0: + return self.make_empty() + + # FIXME: optimization potential + indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks])) + inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) + + new_blocks = [] + for b in blocks: + b = b.copy(deep=copy) + b.mgr_locs = algos.take_1d( + inv_indexer, b.mgr_locs.as_array, axis=0, allow_fill=False + ) + new_blocks.append(b) + + axes = list(self.axes) + axes[0] = self.items.take(indexer) + + return type(self)(new_blocks, axes, do_integrity_check=False) + + def get_slice(self, slobj: slice, axis: int = 0): + if axis >= self.ndim: + raise IndexError("Requested axis not found in manager") + + if axis == 0: + new_blocks = self._slice_take_blocks_ax0(slobj) + else: + _slicer = [slice(None)] * (axis + 1) + _slicer[axis] = slobj + slicer = tuple(_slicer) + new_blocks = [blk.getitem_block(slicer) for blk in self.blocks] + + new_axes = list(self.axes) + new_axes[axis] = new_axes[axis][slobj] + + bm = type(self)(new_blocks, new_axes, do_integrity_check=False) + bm._consolidate_inplace() + return bm + + def __contains__(self, item) -> bool: + return item in self.items + + @property + def nblocks(self) -> int: + return len(self.blocks) + + def copy(self, deep=True): + """ + Make deep or shallow copy of BlockManager + + Parameters + ---------- + deep : bool or string, default True + If False, return shallow copy (do not copy data) + If 'all', copy data and a deep copy of the index + + Returns + ------- + BlockManager + """ + # this preserves the notion of view copying of axes + if deep: + # hit in e.g. tests.io.json.test_pandas + + def copy_func(ax): + if deep == "all": + return ax.copy(deep=True) + else: + return ax.view() + + new_axes = [copy_func(ax) for ax in self.axes] + else: + new_axes = list(self.axes) + + res = self.apply("copy", deep=deep) + res.axes = new_axes + return res + + def as_array(self, transpose=False, items=None): + """Convert the blockmanager data into an numpy array. + + Parameters + ---------- + transpose : boolean, default False + If True, transpose the return array + items : list of strings or None + Names of block items that will be included in the returned + array. ``None`` means that all block items will be used + + Returns + ------- + arr : ndarray + """ + if len(self.blocks) == 0: + arr = np.empty(self.shape, dtype=float) + return arr.transpose() if transpose else arr + + if items is not None: + mgr = self.reindex_axis(items, axis=0) + else: + mgr = self + + if self._is_single_block and mgr.blocks[0].is_datetimetz: + # TODO(Block.get_values): Make DatetimeTZBlock.get_values + # always be object dtype. Some callers seem to want the + # DatetimeArray (previously DTI) + arr = mgr.blocks[0].get_values(dtype=object) + elif self._is_single_block or not self.is_mixed_type: + arr = np.asarray(mgr.blocks[0].get_values()) + else: + arr = mgr._interleave() + + return arr.transpose() if transpose else arr + + def _interleave(self): + """ + Return ndarray from blocks with specified item order + Items must be contained in the blocks + """ + dtype = _interleaved_dtype(self.blocks) + + # TODO: https://github.com/pandas-dev/pandas/issues/22791 + # Give EAs some input on what happens here. Sparse needs this. + if is_sparse(dtype): + dtype = dtype.subtype + elif is_extension_array_dtype(dtype): + dtype = "object" + + result = np.empty(self.shape, dtype=dtype) + + itemmask = np.zeros(self.shape[0]) + + for blk in self.blocks: + rl = blk.mgr_locs + result[rl.indexer] = blk.get_values(dtype) + itemmask[rl.indexer] = 1 + + if not itemmask.all(): + raise AssertionError("Some items were not contained in blocks") + + return result + + def to_dict(self, copy=True): + """ + Return a dict of str(dtype) -> BlockManager + + Parameters + ---------- + copy : boolean, default True + + Returns + ------- + values : a dict of dtype -> BlockManager + + Notes + ----- + This consolidates based on str(dtype) + """ + self._consolidate_inplace() + + bd = {} + for b in self.blocks: + bd.setdefault(str(b.dtype), []).append(b) + + return {dtype: self.combine(blocks, copy=copy) for dtype, blocks in bd.items()} + + def fast_xs(self, loc): + """ + get a cross sectional for a given location in the + items ; handle dups + + return the result, is *could* be a view in the case of a + single block + """ + if len(self.blocks) == 1: + return self.blocks[0].iget((slice(None), loc)) + + items = self.items + + # non-unique (GH4726) + if not items.is_unique: + result = self._interleave() + if self.ndim == 2: + result = result.T + return result[loc] + + # unique + dtype = _interleaved_dtype(self.blocks) + + n = len(items) + if is_extension_array_dtype(dtype): + # we'll eventually construct an ExtensionArray. + result = np.empty(n, dtype=object) + else: + result = np.empty(n, dtype=dtype) + + for blk in self.blocks: + # Such assignment may incorrectly coerce NaT to None + # result[blk.mgr_locs] = blk._slice((slice(None), loc)) + for i, rl in enumerate(blk.mgr_locs): + result[rl] = blk.iget((i, loc)) + + if is_extension_array_dtype(dtype): + result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) + + return result + + def consolidate(self): + """ + Join together blocks having same dtype + + Returns + ------- + y : BlockManager + """ + if self.is_consolidated(): + return self + + bm = type(self)(self.blocks, self.axes) + bm._is_consolidated = False + bm._consolidate_inplace() + return bm + + def _consolidate_inplace(self): + if not self.is_consolidated(): + self.blocks = tuple(_consolidate(self.blocks)) + self._is_consolidated = True + self._known_consolidated = True + self._rebuild_blknos_and_blklocs() + + def get(self, item): + """ + Return values for selected item (ndarray or BlockManager). + """ + if self.items.is_unique: + + if not isna(item): + loc = self.items.get_loc(item) + else: + indexer = np.arange(len(self.items))[isna(self.items)] + + # allow a single nan location indexer + if not is_scalar(indexer): + if len(indexer) == 1: + loc = indexer.item() + else: + raise ValueError("cannot label index with a null key") + + return self.iget(loc) + else: + + if isna(item): + raise TypeError("cannot label index with a null key") + + indexer = self.items.get_indexer_for([item]) + return self.reindex_indexer( + new_axis=self.items[indexer], indexer=indexer, axis=0, allow_dups=True + ) + + def iget(self, i): + """ + Return the data as a SingleBlockManager if possible + + Otherwise return as a ndarray + """ + block = self.blocks[self._blknos[i]] + values = block.iget(self._blklocs[i]) + + # shortcut for select a single-dim from a 2-dim BM + return SingleBlockManager( + [ + block.make_block_same_class( + values, placement=slice(0, len(values)), ndim=1 + ) + ], + self.axes[1], + ) + + def delete(self, item): + """ + Delete selected item (items if non-unique) in-place. + """ + indexer = self.items.get_loc(item) + + is_deleted = np.zeros(self.shape[0], dtype=np.bool_) + is_deleted[indexer] = True + ref_loc_offset = -is_deleted.cumsum() + + is_blk_deleted = [False] * len(self.blocks) + + if isinstance(indexer, int): + affected_start = indexer + else: + affected_start = is_deleted.nonzero()[0][0] + + for blkno, _ in _fast_count_smallints(self._blknos[affected_start:]): + blk = self.blocks[blkno] + bml = blk.mgr_locs + blk_del = is_deleted[bml.indexer].nonzero()[0] + + if len(blk_del) == len(bml): + is_blk_deleted[blkno] = True + continue + elif len(blk_del) != 0: + blk.delete(blk_del) + bml = blk.mgr_locs + + blk.mgr_locs = bml.add(ref_loc_offset[bml.indexer]) + + # FIXME: use Index.delete as soon as it uses fastpath=True + self.axes[0] = self.items[~is_deleted] + self.blocks = tuple( + b for blkno, b in enumerate(self.blocks) if not is_blk_deleted[blkno] + ) + self._shape = None + self._rebuild_blknos_and_blklocs() + + def set(self, item, value): + """ + Set new item in-place. Does not consolidate. Adds new Block if not + contained in the current set of items + """ + # FIXME: refactor, clearly separate broadcasting & zip-like assignment + # can prob also fix the various if tests for sparse/categorical + + value_is_extension_type = is_extension_array_dtype(value) + + # categorical/sparse/datetimetz + if value_is_extension_type: + + def value_getitem(placement): + return value + + else: + if value.ndim == self.ndim - 1: + value = _safe_reshape(value, (1,) + value.shape) + + def value_getitem(placement): + return value + + else: + + def value_getitem(placement): + return value[placement.indexer] + + if value.shape[1:] != self.shape[1:]: + raise AssertionError( + "Shape of new values must be compatible with manager shape" + ) + + try: + loc = self.items.get_loc(item) + except KeyError: + # This item wasn't present, just insert at end + self.insert(len(self.items), item, value) + return + + if isinstance(loc, int): + loc = [loc] + + blknos = self._blknos[loc] + blklocs = self._blklocs[loc].copy() + + unfit_mgr_locs = [] + unfit_val_locs = [] + removed_blknos = [] + for blkno, val_locs in libinternals.get_blkno_placements(blknos, group=True): + blk = self.blocks[blkno] + blk_locs = blklocs[val_locs.indexer] + if blk.should_store(value): + blk.set(blk_locs, value_getitem(val_locs)) + else: + unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs]) + unfit_val_locs.append(val_locs) + + # If all block items are unfit, schedule the block for removal. + if len(val_locs) == len(blk.mgr_locs): + removed_blknos.append(blkno) + else: + self._blklocs[blk.mgr_locs.indexer] = -1 + blk.delete(blk_locs) + self._blklocs[blk.mgr_locs.indexer] = np.arange(len(blk)) + + if len(removed_blknos): + # Remove blocks & update blknos accordingly + is_deleted = np.zeros(self.nblocks, dtype=np.bool_) + is_deleted[removed_blknos] = True + + new_blknos = np.empty(self.nblocks, dtype=np.int64) + new_blknos.fill(-1) + new_blknos[~is_deleted] = np.arange(self.nblocks - len(removed_blknos)) + self._blknos = algos.take_1d( + new_blknos, self._blknos, axis=0, allow_fill=False + ) + self.blocks = tuple( + blk for i, blk in enumerate(self.blocks) if i not in set(removed_blknos) + ) + + if unfit_val_locs: + unfit_mgr_locs = np.concatenate(unfit_mgr_locs) + unfit_count = len(unfit_mgr_locs) + + new_blocks = [] + if value_is_extension_type: + # This code (ab-)uses the fact that sparse blocks contain only + # one item. + new_blocks.extend( + make_block( + values=value.copy(), + ndim=self.ndim, + placement=slice(mgr_loc, mgr_loc + 1), + ) + for mgr_loc in unfit_mgr_locs + ) + + self._blknos[unfit_mgr_locs] = np.arange(unfit_count) + len(self.blocks) + self._blklocs[unfit_mgr_locs] = 0 + + else: + # unfit_val_locs contains BlockPlacement objects + unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:]) + + new_blocks.append( + make_block( + values=value_getitem(unfit_val_items), + ndim=self.ndim, + placement=unfit_mgr_locs, + ) + ) + + self._blknos[unfit_mgr_locs] = len(self.blocks) + self._blklocs[unfit_mgr_locs] = np.arange(unfit_count) + + self.blocks += tuple(new_blocks) + + # Newly created block's dtype may already be present. + self._known_consolidated = False + + def insert(self, loc: int, item, value, allow_duplicates: bool = False): + """ + Insert item at selected position. + + Parameters + ---------- + loc : int + item : hashable + value : array_like + allow_duplicates: bool + If False, trying to insert non-unique item will raise + + """ + if not allow_duplicates and item in self.items: + # Should this be a different kind of error?? + raise ValueError(f"cannot insert {item}, already exists") + + if not isinstance(loc, int): + raise TypeError("loc must be int") + + # insert to the axis; this could possibly raise a TypeError + new_axis = self.items.insert(loc, item) + + block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1)) + + for blkno, count in _fast_count_smallints(self._blknos[loc:]): + blk = self.blocks[blkno] + if count == len(blk.mgr_locs): + blk.mgr_locs = blk.mgr_locs.add(1) + else: + new_mgr_locs = blk.mgr_locs.as_array.copy() + new_mgr_locs[new_mgr_locs >= loc] += 1 + blk.mgr_locs = new_mgr_locs + + if loc == self._blklocs.shape[0]: + # np.append is a lot faster, let's use it if we can. + self._blklocs = np.append(self._blklocs, 0) + self._blknos = np.append(self._blknos, len(self.blocks)) + else: + self._blklocs = np.insert(self._blklocs, loc, 0) + self._blknos = np.insert(self._blknos, loc, len(self.blocks)) + + self.axes[0] = new_axis + self.blocks += (block,) + self._shape = None + + self._known_consolidated = False + + if len(self.blocks) > 100: + self._consolidate_inplace() + + def reindex_axis( + self, new_index, axis, method=None, limit=None, fill_value=None, copy=True + ): + """ + Conform block manager to new index. + """ + new_index = ensure_index(new_index) + new_index, indexer = self.axes[axis].reindex( + new_index, method=method, limit=limit + ) + + return self.reindex_indexer( + new_index, indexer, axis=axis, fill_value=fill_value, copy=copy + ) + + def reindex_indexer( + self, new_axis, indexer, axis, fill_value=None, allow_dups=False, copy=True + ): + """ + Parameters + ---------- + new_axis : Index + indexer : ndarray of int64 or None + axis : int + fill_value : object + allow_dups : bool + + pandas-indexer with -1's only. + """ + if indexer is None: + if new_axis is self.axes[axis] and not copy: + return self + + result = self.copy(deep=copy) + result.axes = list(self.axes) + result.axes[axis] = new_axis + return result + + self._consolidate_inplace() + + # some axes don't allow reindexing with dups + if not allow_dups: + self.axes[axis]._can_reindex(indexer) + + if axis >= self.ndim: + raise IndexError("Requested axis not found in manager") + + if axis == 0: + new_blocks = self._slice_take_blocks_ax0(indexer, fill_tuple=(fill_value,)) + else: + new_blocks = [ + blk.take_nd( + indexer, + axis=axis, + fill_tuple=( + fill_value if fill_value is not None else blk.fill_value, + ), + ) + for blk in self.blocks + ] + + new_axes = list(self.axes) + new_axes[axis] = new_axis + return type(self)(new_blocks, new_axes) + + def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): + """ + Slice/take blocks along axis=0. + + Overloaded for SingleBlock + + Returns + ------- + new_blocks : list of Block + """ + + allow_fill = fill_tuple is not None + + sl_type, slobj, sllen = _preprocess_slice_or_indexer( + slice_or_indexer, self.shape[0], allow_fill=allow_fill + ) + + if self._is_single_block: + blk = self.blocks[0] + + if sl_type in ("slice", "mask"): + return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] + elif not allow_fill or self.ndim == 1: + if allow_fill and fill_tuple[0] is None: + _, fill_value = maybe_promote(blk.dtype) + fill_tuple = (fill_value,) + + return [ + blk.take_nd( + slobj, + axis=0, + new_mgr_locs=slice(0, sllen), + fill_tuple=fill_tuple, + ) + ] + + if sl_type in ("slice", "mask"): + blknos = self._blknos[slobj] + blklocs = self._blklocs[slobj] + else: + blknos = algos.take_1d( + self._blknos, slobj, fill_value=-1, allow_fill=allow_fill + ) + blklocs = algos.take_1d( + self._blklocs, slobj, fill_value=-1, allow_fill=allow_fill + ) + + # When filling blknos, make sure blknos is updated before appending to + # blocks list, that way new blkno is exactly len(blocks). + # + # FIXME: mgr_groupby_blknos must return mgr_locs in ascending order, + # pytables serialization will break otherwise. + blocks = [] + for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=True): + if blkno == -1: + # If we've got here, fill_tuple was not None. + fill_value = fill_tuple[0] + + blocks.append( + self._make_na_block(placement=mgr_locs, fill_value=fill_value) + ) + else: + blk = self.blocks[blkno] + + # Otherwise, slicing along items axis is necessary. + if not blk._can_consolidate: + # A non-consolidatable block, it's easy, because there's + # only one item and each mgr loc is a copy of that single + # item. + for mgr_loc in mgr_locs: + newblk = blk.copy(deep=True) + newblk.mgr_locs = slice(mgr_loc, mgr_loc + 1) + blocks.append(newblk) + + else: + blocks.append( + blk.take_nd( + blklocs[mgr_locs.indexer], + axis=0, + new_mgr_locs=mgr_locs, + fill_tuple=None, + ) + ) + + return blocks + + def _make_na_block(self, placement, fill_value=None): + # TODO: infer dtypes other than float64 from fill_value + + if fill_value is None: + fill_value = np.nan + block_shape = list(self.shape) + block_shape[0] = len(placement) + + dtype, fill_value = infer_dtype_from_scalar(fill_value) + block_values = np.empty(block_shape, dtype=dtype) + block_values.fill(fill_value) + return make_block(block_values, placement=placement) + + def take(self, indexer, axis=1, verify=True, convert=True): + """ + Take items along any axis. + """ + self._consolidate_inplace() + indexer = ( + np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64") + if isinstance(indexer, slice) + else np.asanyarray(indexer, dtype="int64") + ) + + n = self.shape[axis] + if convert: + indexer = maybe_convert_indices(indexer, n) + + if verify: + if ((indexer == -1) | (indexer >= n)).any(): + raise Exception("Indices must be nonzero and less than the axis length") + + new_labels = self.axes[axis].take(indexer) + return self.reindex_indexer( + new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True + ) + + def equals(self, other): + self_axes, other_axes = self.axes, other.axes + if len(self_axes) != len(other_axes): + return False + if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)): + return False + self._consolidate_inplace() + other._consolidate_inplace() + if len(self.blocks) != len(other.blocks): + return False + + # canonicalize block order, using a tuple combining the mgr_locs + # then type name because there might be unconsolidated + # blocks (say, Categorical) which can only be distinguished by + # the iteration order + def canonicalize(block): + return (block.mgr_locs.as_array.tolist(), block.dtype.name) + + self_blocks = sorted(self.blocks, key=canonicalize) + other_blocks = sorted(other.blocks, key=canonicalize) + return all( + block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks) + ) + + def unstack(self, unstacker_func, fill_value): + """Return a blockmanager with all blocks unstacked. + + Parameters + ---------- + unstacker_func : callable + A (partially-applied) ``pd.core.reshape._Unstacker`` class. + fill_value : Any + fill_value for newly introduced missing values. + + Returns + ------- + unstacked : BlockManager + """ + n_rows = self.shape[-1] + dummy = unstacker_func(np.empty((0, 0)), value_columns=self.items) + new_columns = dummy.get_new_columns() + new_index = dummy.get_new_index() + new_blocks = [] + columns_mask = [] + + for blk in self.blocks: + blocks, mask = blk._unstack( + partial(unstacker_func, value_columns=self.items[blk.mgr_locs.indexer]), + new_columns, + n_rows, + fill_value, + ) + + new_blocks.extend(blocks) + columns_mask.extend(mask) + + new_columns = new_columns[columns_mask] + + bm = BlockManager(new_blocks, [new_columns, new_index]) + return bm + + +class SingleBlockManager(BlockManager): + """ manage a single block with """ + + ndim = 1 + _is_consolidated = True + _known_consolidated = True + __slots__ = () + + def __init__( + self, + block: Block, + axis: Union[Index, List[Index]], + do_integrity_check: bool = False, + fastpath: bool = False, + ): + if isinstance(axis, list): + if len(axis) != 1: + raise ValueError( + "cannot create SingleBlockManager with more than 1 axis" + ) + axis = axis[0] + + # passed from constructor, single block, single axis + if fastpath: + self.axes = [axis] + if isinstance(block, list): + + # empty block + if len(block) == 0: + block = [np.array([])] + elif len(block) != 1: + raise ValueError( + "Cannot create SingleBlockManager with more than 1 block" + ) + block = block[0] + else: + self.axes = [ensure_index(axis)] + + # create the block here + if isinstance(block, list): + + # provide consolidation to the interleaved_dtype + if len(block) > 1: + dtype = _interleaved_dtype(block) + block = [b.astype(dtype) for b in block] + block = _consolidate(block) + + if len(block) != 1: + raise ValueError( + "Cannot create SingleBlockManager with more than 1 block" + ) + block = block[0] + + if not isinstance(block, Block): + block = make_block(block, placement=slice(0, len(axis)), ndim=1) + + self.blocks = tuple([block]) + + def _post_setstate(self): + pass + + @property + def _block(self): + return self.blocks[0] + + @property + def _values(self): + return self._block.values + + @property + def _blknos(self): + """ compat with BlockManager """ + return None + + @property + def _blklocs(self): + """ compat with BlockManager """ + return None + + def get_slice(self, slobj, axis=0): + if axis >= self.ndim: + raise IndexError("Requested axis not found in manager") + + return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True,) + + @property + def index(self): + return self.axes[0] + + @property + def dtype(self): + return self._block.dtype + + @property + def array_dtype(self): + return self._block.array_dtype + + def get_dtype_counts(self): + return {self.dtype.name: 1} + + def get_dtypes(self): + return np.array([self._block.dtype]) + + def external_values(self): + return self._block.external_values() + + def internal_values(self): + return self._block.internal_values() + + def get_values(self): + """ return a dense type view """ + return np.array(self._block.to_dense(), copy=False) + + @property + def _can_hold_na(self): + return self._block._can_hold_na + + def is_consolidated(self): + return True + + def _consolidate_check(self): + pass + + def _consolidate_inplace(self): + pass + + def delete(self, item): + """ + Delete single item from SingleBlockManager. + + Ensures that self.blocks doesn't become empty. + """ + loc = self.items.get_loc(item) + self._block.delete(loc) + self.axes[0] = self.axes[0].delete(loc) + + def fast_xs(self, loc): + """ + fast path for getting a cross-section + return a view of the data + """ + return self._block.values[loc] + + def concat(self, to_concat, new_axis): + """ + Concatenate a list of SingleBlockManagers into a single + SingleBlockManager. + + Used for pd.concat of Series objects with axis=0. + + Parameters + ---------- + to_concat : list of SingleBlockManagers + new_axis : Index of the result + + Returns + ------- + SingleBlockManager + + """ + non_empties = [x for x in to_concat if len(x) > 0] + + # check if all series are of the same block type: + if len(non_empties) > 0: + blocks = [obj.blocks[0] for obj in non_empties] + if len({b.dtype for b in blocks}) == 1: + new_block = blocks[0].concat_same_type(blocks) + else: + values = [x.values for x in blocks] + values = concat_compat(values) + new_block = make_block(values, placement=slice(0, len(values), 1)) + else: + values = [x._block.values for x in to_concat] + values = concat_compat(values) + new_block = make_block(values, placement=slice(0, len(values), 1)) + + mgr = SingleBlockManager(new_block, new_axis) + return mgr + + +# -------------------------------------------------------------------- +# Constructor Helpers + + +def create_block_manager_from_blocks(blocks, axes): + try: + if len(blocks) == 1 and not isinstance(blocks[0], Block): + # if blocks[0] is of length 0, return empty blocks + if not len(blocks[0]): + blocks = [] + else: + # It's OK if a single block is passed as values, its placement + # is basically "all items", but if there're many, don't bother + # converting, it's an error anyway. + blocks = [ + make_block(values=blocks[0], placement=slice(0, len(axes[0]))) + ] + + mgr = BlockManager(blocks, axes) + mgr._consolidate_inplace() + return mgr + + except ValueError as e: + blocks = [getattr(b, "values", b) for b in blocks] + tot_items = sum(b.shape[0] for b in blocks) + construction_error(tot_items, blocks[0].shape[1:], axes, e) + + +def create_block_manager_from_arrays(arrays, names, axes): + + try: + blocks = form_blocks(arrays, names, axes) + mgr = BlockManager(blocks, axes) + mgr._consolidate_inplace() + return mgr + except ValueError as e: + construction_error(len(arrays), arrays[0].shape, axes, e) + + +def construction_error(tot_items, block_shape, axes, e=None): + """ raise a helpful message about our construction """ + passed = tuple(map(int, [tot_items] + list(block_shape))) + # Correcting the user facing error message during dataframe construction + if len(passed) <= 2: + passed = passed[::-1] + + implied = tuple(len(ax) for ax in axes) + # Correcting the user facing error message during dataframe construction + if len(implied) <= 2: + implied = implied[::-1] + + if passed == implied and e is not None: + raise e + if block_shape[0] == 0: + raise ValueError("Empty data passed with indices specified.") + raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}") + + +# ----------------------------------------------------------------------- + + +def form_blocks(arrays, names, axes): + # put "leftover" items in float bucket, where else? + # generalize? + items_dict = defaultdict(list) + extra_locs = [] + + names_idx = ensure_index(names) + if names_idx.equals(axes[0]): + names_indexer = np.arange(len(names_idx)) + else: + assert names_idx.intersection(axes[0]).is_unique + names_indexer = names_idx.get_indexer_for(axes[0]) + + for i, name_idx in enumerate(names_indexer): + if name_idx == -1: + extra_locs.append(i) + continue + + k = names[name_idx] + v = arrays[name_idx] + + block_type = get_block_type(v) + items_dict[block_type.__name__].append((i, k, v)) + + blocks = [] + if len(items_dict["FloatBlock"]): + float_blocks = _multi_blockify(items_dict["FloatBlock"]) + blocks.extend(float_blocks) + + if len(items_dict["ComplexBlock"]): + complex_blocks = _multi_blockify(items_dict["ComplexBlock"]) + blocks.extend(complex_blocks) + + if len(items_dict["TimeDeltaBlock"]): + timedelta_blocks = _multi_blockify(items_dict["TimeDeltaBlock"]) + blocks.extend(timedelta_blocks) + + if len(items_dict["IntBlock"]): + int_blocks = _multi_blockify(items_dict["IntBlock"]) + blocks.extend(int_blocks) + + if len(items_dict["DatetimeBlock"]): + datetime_blocks = _simple_blockify(items_dict["DatetimeBlock"], _NS_DTYPE) + blocks.extend(datetime_blocks) + + if len(items_dict["DatetimeTZBlock"]): + dttz_blocks = [ + make_block(array, klass=DatetimeTZBlock, placement=[i]) + for i, _, array in items_dict["DatetimeTZBlock"] + ] + blocks.extend(dttz_blocks) + + if len(items_dict["BoolBlock"]): + bool_blocks = _simple_blockify(items_dict["BoolBlock"], np.bool_) + blocks.extend(bool_blocks) + + if len(items_dict["ObjectBlock"]) > 0: + object_blocks = _simple_blockify(items_dict["ObjectBlock"], np.object_) + blocks.extend(object_blocks) + + if len(items_dict["CategoricalBlock"]) > 0: + cat_blocks = [ + make_block(array, klass=CategoricalBlock, placement=[i]) + for i, _, array in items_dict["CategoricalBlock"] + ] + blocks.extend(cat_blocks) + + if len(items_dict["ExtensionBlock"]): + + external_blocks = [ + make_block(array, klass=ExtensionBlock, placement=[i]) + for i, _, array in items_dict["ExtensionBlock"] + ] + + blocks.extend(external_blocks) + + if len(items_dict["ObjectValuesExtensionBlock"]): + external_blocks = [ + make_block(array, klass=ObjectValuesExtensionBlock, placement=[i]) + for i, _, array in items_dict["ObjectValuesExtensionBlock"] + ] + + blocks.extend(external_blocks) + + if len(extra_locs): + shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:]) + + # empty items -> dtype object + block_values = np.empty(shape, dtype=object) + block_values.fill(np.nan) + + na_block = make_block(block_values, placement=extra_locs) + blocks.append(na_block) + + return blocks + + +def _simple_blockify(tuples, dtype): + """ return a single array of a block that has a single dtype; if dtype is + not None, coerce to this dtype + """ + values, placement = _stack_arrays(tuples, dtype) + + # TODO: CHECK DTYPE? + if dtype is not None and values.dtype != dtype: # pragma: no cover + values = values.astype(dtype) + + block = make_block(values, placement=placement) + return [block] + + +def _multi_blockify(tuples, dtype=None): + """ return an array of blocks that potentially have different dtypes """ + + # group by dtype + grouper = itertools.groupby(tuples, lambda x: x[2].dtype) + + new_blocks = [] + for dtype, tup_block in grouper: + + values, placement = _stack_arrays(list(tup_block), dtype) + + block = make_block(values, placement=placement) + new_blocks.append(block) + + return new_blocks + + +def _stack_arrays(tuples, dtype): + + # fml + def _asarray_compat(x): + if isinstance(x, ABCSeries): + return x._values + else: + return np.asarray(x) + + def _shape_compat(x): + if isinstance(x, ABCSeries): + return (len(x),) + else: + return x.shape + + placement, names, arrays = zip(*tuples) + + first = arrays[0] + shape = (len(arrays),) + _shape_compat(first) + + stacked = np.empty(shape, dtype=dtype) + for i, arr in enumerate(arrays): + stacked[i] = _asarray_compat(arr) + + return stacked, placement + + +def _interleaved_dtype( + blocks: List[Block], +) -> Optional[Union[np.dtype, ExtensionDtype]]: + """Find the common dtype for `blocks`. + + Parameters + ---------- + blocks : List[Block] + + Returns + ------- + dtype : Optional[Union[np.dtype, ExtensionDtype]] + None is returned when `blocks` is empty. + """ + if not len(blocks): + return None + + return find_common_type([b.dtype for b in blocks]) + + +def _consolidate(blocks): + """ + Merge blocks having same dtype, exclude non-consolidating blocks + """ + + # sort by _can_consolidate, dtype + gkey = lambda x: x._consolidate_key + grouper = itertools.groupby(sorted(blocks, key=gkey), gkey) + + new_blocks = [] + for (_can_consolidate, dtype), group_blocks in grouper: + merged_blocks = _merge_blocks( + list(group_blocks), dtype=dtype, _can_consolidate=_can_consolidate + ) + new_blocks = _extend_blocks(merged_blocks, new_blocks) + return new_blocks + + +def _compare_or_regex_search(a, b, regex=False): + """ + Compare two array_like inputs of the same shape or two scalar values + + Calls operator.eq or re.search, depending on regex argument. If regex is + True, perform an element-wise regex matching. + + Parameters + ---------- + a : array_like or scalar + b : array_like or scalar + regex : bool, default False + + Returns + ------- + mask : array_like of bool + """ + if not regex: + op = lambda x: operator.eq(x, b) + else: + op = np.vectorize( + lambda x: bool(re.search(b, x)) if isinstance(x, str) else False + ) + + is_a_array = isinstance(a, np.ndarray) + is_b_array = isinstance(b, np.ndarray) + + if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b): + # GH#29553 avoid deprecation warnings from numpy + result = False + else: + result = op(a) + + if is_scalar(result) and (is_a_array or is_b_array): + type_names = [type(a).__name__, type(b).__name__] + + if is_a_array: + type_names[0] = f"ndarray(dtype={a.dtype})" + + if is_b_array: + type_names[1] = f"ndarray(dtype={b.dtype})" + + raise TypeError( + f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" + ) + return result + + +def _transform_index(index, func, level=None): + """ + Apply function to all values found in index. + + This includes transforming multiindex entries separately. + Only apply function to one level of the MultiIndex if level is specified. + + """ + if isinstance(index, MultiIndex): + if level is not None: + items = [ + tuple(func(y) if i == level else y for i, y in enumerate(x)) + for x in index + ] + else: + items = [tuple(func(y) for y in x) for x in index] + return MultiIndex.from_tuples(items, names=index.names) + else: + items = [func(x) for x in index] + return Index(items, name=index.name, tupleize_cols=False) + + +def _fast_count_smallints(arr): + """Faster version of set(arr) for sequences of small numbers.""" + counts = np.bincount(arr.astype(np.int_)) + nz = counts.nonzero()[0] + return np.c_[nz, counts[nz]] + + +def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill): + if isinstance(slice_or_indexer, slice): + return ( + "slice", + slice_or_indexer, + libinternals.slice_len(slice_or_indexer, length), + ) + elif ( + isinstance(slice_or_indexer, np.ndarray) and slice_or_indexer.dtype == np.bool_ + ): + return "mask", slice_or_indexer, slice_or_indexer.sum() + else: + indexer = np.asanyarray(slice_or_indexer, dtype=np.int64) + if not allow_fill: + indexer = maybe_convert_indices(indexer, length) + return "fancy", indexer, len(indexer) + + +def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy): + """ + Concatenate block managers into one. + + Parameters + ---------- + mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples + axes : list of Index + concat_axis : int + copy : bool + + """ + concat_plans = [ + get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers + ] + concat_plan = combine_concat_plans(concat_plans, concat_axis) + blocks = [] + + for placement, join_units in concat_plan: + + if len(join_units) == 1 and not join_units[0].indexers: + b = join_units[0].block + values = b.values + if copy: + values = values.copy() + else: + values = values.view() + b = b.make_block_same_class(values, placement=placement) + elif is_uniform_join_units(join_units): + b = join_units[0].block.concat_same_type( + [ju.block for ju in join_units], placement=placement + ) + else: + b = make_block( + concatenate_join_units(join_units, concat_axis, copy=copy), + placement=placement, + ) + blocks.append(b) + + return BlockManager(blocks, axes) diff --git a/pandas/core/missing.py b/pandas/core/missing.py new file mode 100644 index 00000000..b30a7a24 --- /dev/null +++ b/pandas/core/missing.py @@ -0,0 +1,684 @@ +""" +Routines for filling missing data. +""" + +import numpy as np + +from pandas._libs import algos, lib +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.cast import infer_dtype_from_array +from pandas.core.dtypes.common import ( + ensure_float64, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_integer_dtype, + is_numeric_v_string_like, + is_scalar, + is_timedelta64_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.missing import isna + + +def mask_missing(arr, values_to_mask): + """ + Return a masking array of same size/shape as arr + with entries equaling any member of values_to_mask set to True + """ + dtype, values_to_mask = infer_dtype_from_array(values_to_mask) + + try: + values_to_mask = np.array(values_to_mask, dtype=dtype) + + except Exception: + values_to_mask = np.array(values_to_mask, dtype=object) + + na_mask = isna(values_to_mask) + nonna = values_to_mask[~na_mask] + + mask = None + for x in nonna: + if mask is None: + if is_numeric_v_string_like(arr, x): + # GH#29553 prevent numpy deprecation warnings + mask = False + else: + mask = arr == x + + # if x is a string and arr is not, then we get False and we must + # expand the mask to size arr.shape + if is_scalar(mask): + mask = np.zeros(arr.shape, dtype=bool) + else: + if is_numeric_v_string_like(arr, x): + # GH#29553 prevent numpy deprecation warnings + mask |= False + else: + mask |= arr == x + + if na_mask.any(): + if mask is None: + mask = isna(arr) + else: + mask |= isna(arr) + + # GH 21977 + if mask is None: + mask = np.zeros(arr.shape, dtype=bool) + + return mask + + +def clean_fill_method(method, allow_nearest=False): + # asfreq is compat for resampling + if method in [None, "asfreq"]: + return None + + if isinstance(method, str): + method = method.lower() + if method == "ffill": + method = "pad" + elif method == "bfill": + method = "backfill" + + valid_methods = ["pad", "backfill"] + expecting = "pad (ffill) or backfill (bfill)" + if allow_nearest: + valid_methods.append("nearest") + expecting = "pad (ffill), backfill (bfill) or nearest" + if method not in valid_methods: + raise ValueError(f"Invalid fill method. Expecting {expecting}. Got {method}") + return method + + +def clean_interp_method(method, **kwargs): + order = kwargs.get("order") + valid = [ + "linear", + "time", + "index", + "values", + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "barycentric", + "polynomial", + "krogh", + "piecewise_polynomial", + "pchip", + "akima", + "spline", + "from_derivatives", + ] + if method in ("spline", "polynomial") and order is None: + raise ValueError("You must specify the order of the spline or polynomial.") + if method not in valid: + raise ValueError(f"method must be one of {valid}. Got '{method}' instead.") + + return method + + +def find_valid_index(values, how: str): + """ + Retrieves the index of the first valid value. + + Parameters + ---------- + values : ndarray or ExtensionArray + how : {'first', 'last'} + Use this parameter to change between the first or last valid index. + + Returns + ------- + int or None + """ + assert how in ["first", "last"] + + if len(values) == 0: # early stop + return None + + is_valid = ~isna(values) + + if values.ndim == 2: + is_valid = is_valid.any(1) # reduce axis 1 + + if how == "first": + idxpos = is_valid[::].argmax() + + if how == "last": + idxpos = len(values) - 1 - is_valid[::-1].argmax() + + chk_notna = is_valid[idxpos] + + if not chk_notna: + return None + return idxpos + + +def interpolate_1d( + xvalues, + yvalues, + method="linear", + limit=None, + limit_direction="forward", + limit_area=None, + fill_value=None, + bounds_error=False, + order=None, + **kwargs, +): + """ + Logic for the 1-d interpolation. The result should be 1-d, inputs + xvalues and yvalues will each be 1-d arrays of the same length. + + Bounds_error is currently hardcoded to False since non-scipy ones don't + take it as an argument. + """ + # Treat the original, non-scipy methods first. + + invalid = isna(yvalues) + valid = ~invalid + + if not valid.any(): + # have to call np.asarray(xvalues) since xvalues could be an Index + # which can't be mutated + result = np.empty_like(np.asarray(xvalues), dtype=np.float64) + result.fill(np.nan) + return result + + if valid.all(): + return yvalues + + if method == "time": + if not getattr(xvalues, "is_all_dates", None): + # if not issubclass(xvalues.dtype.type, np.datetime64): + raise ValueError( + "time-weighted interpolation only works " + "on Series or DataFrames with a " + "DatetimeIndex" + ) + method = "values" + + valid_limit_directions = ["forward", "backward", "both"] + limit_direction = limit_direction.lower() + if limit_direction not in valid_limit_directions: + raise ValueError( + "Invalid limit_direction: expecting one of " + f"{valid_limit_directions}, got '{limit_direction}'." + ) + + if limit_area is not None: + valid_limit_areas = ["inside", "outside"] + limit_area = limit_area.lower() + if limit_area not in valid_limit_areas: + raise ValueError( + f"Invalid limit_area: expecting one of {valid_limit_areas}, got " + f"{limit_area}." + ) + + # default limit is unlimited GH #16282 + limit = algos._validate_limit(nobs=None, limit=limit) + + # These are sets of index pointers to invalid values... i.e. {0, 1, etc... + all_nans = set(np.flatnonzero(invalid)) + start_nans = set(range(find_valid_index(yvalues, "first"))) + end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) + mid_nans = all_nans - start_nans - end_nans + + # Like the sets above, preserve_nans contains indices of invalid values, + # but in this case, it is the final set of indices that need to be + # preserved as NaN after the interpolation. + + # For example if limit_direction='forward' then preserve_nans will + # contain indices of NaNs at the beginning of the series, and NaNs that + # are more than'limit' away from the prior non-NaN. + + # set preserve_nans based on direction using _interp_limit + if limit_direction == "forward": + preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) + elif limit_direction == "backward": + preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) + else: + # both directions... just use _interp_limit + preserve_nans = set(_interp_limit(invalid, limit, limit)) + + # if limit_area is set, add either mid or outside indices + # to preserve_nans GH #16284 + if limit_area == "inside": + # preserve NaNs on the outside + preserve_nans |= start_nans | end_nans + elif limit_area == "outside": + # preserve NaNs on the inside + preserve_nans |= mid_nans + + # sort preserve_nans and covert to list + preserve_nans = sorted(preserve_nans) + + xvalues = getattr(xvalues, "values", xvalues) + yvalues = getattr(yvalues, "values", yvalues) + result = yvalues.copy() + + if method in ["linear", "time", "index", "values"]: + if method in ("values", "index"): + inds = np.asarray(xvalues) + # hack for DatetimeIndex, #1646 + if needs_i8_conversion(inds.dtype.type): + inds = inds.view(np.int64) + if inds.dtype == np.object_: + inds = lib.maybe_convert_objects(inds) + else: + inds = xvalues + # np.interp requires sorted X values, #21037 + indexer = np.argsort(inds[valid]) + result[invalid] = np.interp( + inds[invalid], inds[valid][indexer], yvalues[valid][indexer] + ) + result[preserve_nans] = np.nan + return result + + sp_methods = [ + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "barycentric", + "krogh", + "spline", + "polynomial", + "from_derivatives", + "piecewise_polynomial", + "pchip", + "akima", + ] + + if method in sp_methods: + inds = np.asarray(xvalues) + # hack for DatetimeIndex, #1646 + if issubclass(inds.dtype.type, np.datetime64): + inds = inds.view(np.int64) + result[invalid] = _interpolate_scipy_wrapper( + inds[valid], + yvalues[valid], + inds[invalid], + method=method, + fill_value=fill_value, + bounds_error=bounds_error, + order=order, + **kwargs, + ) + result[preserve_nans] = np.nan + return result + + +def _interpolate_scipy_wrapper( + x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs +): + """ + Passed off to scipy.interpolate.interp1d. method is scipy's kind. + Returns an array interpolated at new_x. Add any new methods to + the list in _clean_interp_method. + """ + extra = f"{method} interpolation requires SciPy." + import_optional_dependency("scipy", extra=extra) + from scipy import interpolate + + new_x = np.asarray(new_x) + + # ignores some kwargs that could be passed along. + alt_methods = { + "barycentric": interpolate.barycentric_interpolate, + "krogh": interpolate.krogh_interpolate, + "from_derivatives": _from_derivatives, + "piecewise_polynomial": _from_derivatives, + } + + if getattr(x, "is_all_dates", False): + # GH 5975, scipy.interp1d can't handle datetime64s + x, new_x = x._values.astype("i8"), new_x.astype("i8") + + if method == "pchip": + try: + alt_methods["pchip"] = interpolate.pchip_interpolate + except AttributeError: + raise ImportError( + "Your version of Scipy does not support PCHIP interpolation." + ) + elif method == "akima": + alt_methods["akima"] = _akima_interpolate + + interp1d_methods = [ + "nearest", + "zero", + "slinear", + "quadratic", + "cubic", + "polynomial", + ] + if method in interp1d_methods: + if method == "polynomial": + method = order + terp = interpolate.interp1d( + x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error + ) + new_y = terp(new_x) + elif method == "spline": + # GH #10633, #24014 + if isna(order) or (order <= 0): + raise ValueError( + f"order needs to be specified and greater than 0; got order: {order}" + ) + terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs) + new_y = terp(new_x) + else: + # GH 7295: need to be able to write for some reason + # in some circumstances: check all three + if not x.flags.writeable: + x = x.copy() + if not y.flags.writeable: + y = y.copy() + if not new_x.flags.writeable: + new_x = new_x.copy() + method = alt_methods[method] + new_y = method(x, y, new_x, **kwargs) + return new_y + + +def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): + """ + Convenience function for interpolate.BPoly.from_derivatives. + + Construct a piecewise polynomial in the Bernstein basis, compatible + with the specified values and derivatives at breakpoints. + + Parameters + ---------- + xi : array_like + sorted 1D array of x-coordinates + yi : array_like or list of array-likes + yi[i][j] is the j-th derivative known at xi[i] + order: None or int or array_like of ints. Default: None. + Specifies the degree of local polynomials. If not None, some + derivatives are ignored. + der : int or list + How many derivatives to extract; None for all potentially nonzero + derivatives (that is a number equal to the number of points), or a + list of derivatives to extract. This numberincludes the function + value as 0th derivative. + extrapolate : bool, optional + Whether to extrapolate to ouf-of-bounds points based on first and last + intervals, or to return NaNs. Default: True. + + See Also + -------- + scipy.interpolate.BPoly.from_derivatives + + Returns + ------- + y : scalar or array_like + The result, of length R or length M or M by R. + """ + from scipy import interpolate + + # return the method for compat with scipy version & backwards compat + method = interpolate.BPoly.from_derivatives + m = method(xi, yi.reshape(-1, 1), orders=order, extrapolate=extrapolate) + + return m(x) + + +def _akima_interpolate(xi, yi, x, der=0, axis=0): + """ + Convenience function for akima interpolation. + xi and yi are arrays of values used to approximate some function f, + with ``yi = f(xi)``. + + See `Akima1DInterpolator` for details. + + Parameters + ---------- + xi : array_like + A sorted list of x-coordinates, of length N. + yi : array_like + A 1-D array of real values. `yi`'s length along the interpolation + axis must be equal to the length of `xi`. If N-D array, use axis + parameter to select correct axis. + x : scalar or array_like + Of length M. + der : int or list, optional + How many derivatives to extract; None for all potentially + nonzero derivatives (that is a number equal to the number + of points), or a list of derivatives to extract. This number + includes the function value as 0th derivative. + axis : int, optional + Axis in the yi array corresponding to the x-coordinate values. + + See Also + -------- + scipy.interpolate.Akima1DInterpolator + + Returns + ------- + y : scalar or array_like + The result, of length R or length M or M by R, + + """ + from scipy import interpolate + + P = interpolate.Akima1DInterpolator(xi, yi, axis=axis) + + if der == 0: + return P(x) + elif interpolate._isscalar(der): + return P(x, der=der) + else: + return [P(x, nu) for nu in der] + + +def interpolate_2d( + values, method="pad", axis=0, limit=None, fill_value=None, dtype=None +): + """ + Perform an actual interpolation of values, values will be make 2-d if + needed fills inplace, returns the result. + """ + orig_values = values + + transf = (lambda x: x) if axis == 0 else (lambda x: x.T) + + # reshape a 1 dim if needed + ndim = values.ndim + if values.ndim == 1: + if axis != 0: # pragma: no cover + raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") + values = values.reshape(tuple((1,) + values.shape)) + + if fill_value is None: + mask = None + else: # todo create faster fill func without masking + mask = mask_missing(transf(values), fill_value) + + method = clean_fill_method(method) + if method == "pad": + values = transf(pad_2d(transf(values), limit=limit, mask=mask, dtype=dtype)) + else: + values = transf( + backfill_2d(transf(values), limit=limit, mask=mask, dtype=dtype) + ) + + # reshape back + if ndim == 1: + values = values[0] + + if orig_values.dtype.kind == "M": + # convert float back to datetime64 + values = values.astype(orig_values.dtype) + + return values + + +def _cast_values_for_fillna(values, dtype): + """ + Cast values to a dtype that algos.pad and algos.backfill can handle. + """ + # TODO: for int-dtypes we make a copy, but for everything else this + # alters the values in-place. Is this intentional? + + if ( + is_datetime64_dtype(dtype) + or is_datetime64tz_dtype(dtype) + or is_timedelta64_dtype(dtype) + ): + values = values.view(np.int64) + + elif is_integer_dtype(values): + # NB: this check needs to come after the datetime64 check above + values = ensure_float64(values) + + return values + + +def _fillna_prep(values, mask=None, dtype=None): + # boilerplate for pad_1d, backfill_1d, pad_2d, backfill_2d + if dtype is None: + dtype = values.dtype + + if mask is None: + # This needs to occur before datetime/timedeltas are cast to int64 + mask = isna(values) + + values = _cast_values_for_fillna(values, dtype) + + mask = mask.view(np.uint8) + return values, mask + + +def pad_1d(values, limit=None, mask=None, dtype=None): + values, mask = _fillna_prep(values, mask, dtype) + algos.pad_inplace(values, mask, limit=limit) + return values + + +def backfill_1d(values, limit=None, mask=None, dtype=None): + values, mask = _fillna_prep(values, mask, dtype) + algos.backfill_inplace(values, mask, limit=limit) + return values + + +def pad_2d(values, limit=None, mask=None, dtype=None): + values, mask = _fillna_prep(values, mask, dtype) + + if np.all(values.shape): + algos.pad_2d_inplace(values, mask, limit=limit) + else: + # for test coverage + pass + return values + + +def backfill_2d(values, limit=None, mask=None, dtype=None): + values, mask = _fillna_prep(values, mask, dtype) + + if np.all(values.shape): + algos.backfill_2d_inplace(values, mask, limit=limit) + else: + # for test coverage + pass + return values + + +_fill_methods = {"pad": pad_1d, "backfill": backfill_1d} + + +def get_fill_func(method): + method = clean_fill_method(method) + return _fill_methods[method] + + +def clean_reindex_fill_method(method): + return clean_fill_method(method, allow_nearest=True) + + +def _interp_limit(invalid, fw_limit, bw_limit): + """ + Get indexers of values that won't be filled + because they exceed the limits. + + Parameters + ---------- + invalid : boolean ndarray + fw_limit : int or None + forward limit to index + bw_limit : int or None + backward limit to index + + Returns + ------- + set of indexers + + Notes + ----- + This is equivalent to the more readable, but slower + + .. code-block:: python + + def _interp_limit(invalid, fw_limit, bw_limit): + for x in np.where(invalid)[0]: + if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): + yield x + """ + # handle forward first; the backward direction is the same except + # 1. operate on the reversed array + # 2. subtract the returned indices from N - 1 + N = len(invalid) + f_idx = set() + b_idx = set() + + def inner(invalid, limit): + limit = min(limit, N) + windowed = _rolling_window(invalid, limit + 1).all(1) + idx = set(np.where(windowed)[0] + limit) | set( + np.where((~invalid[: limit + 1]).cumsum() == 0)[0] + ) + return idx + + if fw_limit is not None: + + if fw_limit == 0: + f_idx = set(np.where(invalid)[0]) + else: + f_idx = inner(invalid, fw_limit) + + if bw_limit is not None: + + if bw_limit == 0: + # then we don't even need to care about backwards + # just use forwards + return f_idx + else: + b_idx = list(inner(invalid[::-1], bw_limit)) + b_idx = set(N - 1 - np.asarray(b_idx)) + if fw_limit == 0: + return b_idx + + return f_idx & b_idx + + +def _rolling_window(a, window): + """ + [True, True, False, True, False], 2 -> + + [ + [True, True], + [True, False], + [False, True], + [True, False], + ] + """ + # https://stackoverflow.com/a/6811241 + shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) + strides = a.strides + (a.strides[-1],) + return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py new file mode 100644 index 00000000..6b03e76a --- /dev/null +++ b/pandas/core/nanops.py @@ -0,0 +1,1424 @@ +import functools +import itertools +import operator +from typing import Any, Optional, Tuple, Union + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask +from pandas.core.dtypes.common import ( + _get_dtype, + is_any_int_dtype, + is_bool_dtype, + is_complex, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_datetime_or_timedelta_dtype, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_numeric_dtype, + is_object_dtype, + is_scalar, + is_timedelta64_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna + +bn = import_optional_dependency("bottleneck", raise_on_missing=False, on_version="warn") +_BOTTLENECK_INSTALLED = bn is not None +_USE_BOTTLENECK = False + + +def set_use_bottleneck(v=True): + # set/unset to use bottleneck + global _USE_BOTTLENECK + if _BOTTLENECK_INSTALLED: + _USE_BOTTLENECK = v + + +set_use_bottleneck(get_option("compute.use_bottleneck")) + + +class disallow: + def __init__(self, *dtypes): + super().__init__() + self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes) + + def check(self, obj) -> bool: + return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes) + + def __call__(self, f): + @functools.wraps(f) + def _f(*args, **kwargs): + obj_iter = itertools.chain(args, kwargs.values()) + if any(self.check(obj) for obj in obj_iter): + f_name = f.__name__.replace("nan", "") + raise TypeError( + f"reduction operation '{f_name}' not allowed for this dtype" + ) + try: + with np.errstate(invalid="ignore"): + return f(*args, **kwargs) + except ValueError as e: + # we want to transform an object array + # ValueError message to the more typical TypeError + # e.g. this is normally a disallowed function on + # object arrays that contain strings + if is_object_dtype(args[0]): + raise TypeError(e) + raise + + return _f + + +class bottleneck_switch: + def __init__(self, name=None, **kwargs): + self.name = name + self.kwargs = kwargs + + def __call__(self, alt): + bn_name = self.name or alt.__name__ + + try: + bn_func = getattr(bn, bn_name) + except (AttributeError, NameError): # pragma: no cover + bn_func = None + + @functools.wraps(alt) + def f(values, axis=None, skipna=True, **kwds): + if len(self.kwargs) > 0: + for k, v in self.kwargs.items(): + if k not in kwds: + kwds[k] = v + + if values.size == 0 and kwds.get("min_count") is None: + # We are empty, returning NA for our type + # Only applies for the default `min_count` of None + # since that affects how empty arrays are handled. + # TODO(GH-18976) update all the nanops methods to + # correctly handle empty inputs and remove this check. + # It *may* just be `var` + return _na_for_min_count(values, axis) + + if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, bn_name): + if kwds.get("mask", None) is None: + # `mask` is not recognised by bottleneck, would raise + # TypeError if called + kwds.pop("mask", None) + result = bn_func(values, axis=axis, **kwds) + + # prefer to treat inf/-inf as NA, but must compute the func + # twice :( + if _has_infs(result): + result = alt(values, axis=axis, skipna=skipna, **kwds) + else: + result = alt(values, axis=axis, skipna=skipna, **kwds) + else: + result = alt(values, axis=axis, skipna=skipna, **kwds) + + return result + + return f + + +def _bn_ok_dtype(dt, name: str) -> bool: + # Bottleneck chokes on datetime64 + if not is_object_dtype(dt) and not ( + is_datetime_or_timedelta_dtype(dt) or is_datetime64tz_dtype(dt) + ): + + # GH 15507 + # bottleneck does not properly upcast during the sum + # so can overflow + + # GH 9422 + # further we also want to preserve NaN when all elements + # are NaN, unlinke bottleneck/numpy which consider this + # to be 0 + if name in ["nansum", "nanprod"]: + return False + + return True + return False + + +def _has_infs(result) -> bool: + if isinstance(result, np.ndarray): + if result.dtype == "f8": + return lib.has_infs_f8(result.ravel()) + elif result.dtype == "f4": + return lib.has_infs_f4(result.ravel()) + try: + return np.isinf(result).any() + except (TypeError, NotImplementedError): + # if it doesn't support infs, then it can't have infs + return False + + +def _get_fill_value(dtype, fill_value=None, fill_value_typ=None): + """ return the correct fill value for the dtype of the values """ + if fill_value is not None: + return fill_value + if _na_ok_dtype(dtype): + if fill_value_typ is None: + return np.nan + else: + if fill_value_typ == "+inf": + return np.inf + else: + return -np.inf + else: + if fill_value_typ is None: + return iNaT + else: + if fill_value_typ == "+inf": + # need the max int here + return _int64_max + else: + return iNaT + + +def _maybe_get_mask( + values: np.ndarray, skipna: bool, mask: Optional[np.ndarray] +) -> Optional[np.ndarray]: + """ + Compute a mask if and only if necessary. + + This function will compute a mask iff it is necessary. Otherwise, + return the provided mask (potentially None) when a mask does not need to be + computed. + + A mask is never necessary if the values array is of boolean or integer + dtypes, as these are incapable of storing NaNs. If passing a NaN-capable + dtype that is interpretable as either boolean or integer data (eg, + timedelta64), a mask must be provided. + + If the skipna parameter is False, a new mask will not be computed. + + The mask is computed using isna() by default. Setting invert=True selects + notna() as the masking function. + + Parameters + ---------- + values : ndarray + input array to potentially compute mask for + skipna : bool + boolean for whether NaNs should be skipped + mask : Optional[ndarray] + nan-mask if known + + Returns + ------- + Optional[np.ndarray] + """ + + if mask is None: + if is_bool_dtype(values.dtype) or is_integer_dtype(values.dtype): + # Boolean data cannot contain nulls, so signal via mask being None + return None + + if skipna: + mask = isna(values) + + return mask + + +def _get_values( + values: np.ndarray, + skipna: bool, + fill_value: Any = None, + fill_value_typ: Optional[str] = None, + mask: Optional[np.ndarray] = None, +) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]: + """ + Utility to get the values view, mask, dtype, dtype_max, and fill_value. + + If both mask and fill_value/fill_value_typ are not None and skipna is True, + the values array will be copied. + + For input arrays of boolean or integer dtypes, copies will only occur if a + precomputed mask, a fill_value/fill_value_typ, and skipna=True are + provided. + + Parameters + ---------- + values : ndarray + input array to potentially compute mask for + skipna : bool + boolean for whether NaNs should be skipped + fill_value : Any + value to fill NaNs with + fill_value_typ : str + Set to '+inf' or '-inf' to handle dtype-specific infinities + mask : Optional[np.ndarray] + nan-mask if known + + Returns + ------- + values : ndarray + Potential copy of input value array + mask : Optional[ndarray[bool]] + Mask for values, if deemed necessary to compute + dtype : dtype + dtype for values + dtype_max : dtype + platform independent dtype + fill_value : Any + fill value used + """ + + # In _get_values is only called from within nanops, and in all cases + # with scalar fill_value. This guarantee is important for the + # maybe_upcast_putmask call below + assert is_scalar(fill_value) + + mask = _maybe_get_mask(values, skipna, mask) + + if is_datetime64tz_dtype(values): + # lib.values_from_object returns M8[ns] dtype instead of tz-aware, + # so this case must be handled separately from the rest + dtype = values.dtype + values = getattr(values, "_values", values) + else: + values = lib.values_from_object(values) + dtype = values.dtype + + if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values): + # changing timedelta64/datetime64 to int64 needs to happen after + # finding `mask` above + values = getattr(values, "asi8", values) + values = values.view(np.int64) + + dtype_ok = _na_ok_dtype(dtype) + + # get our fill value (in case we need to provide an alternative + # dtype for it) + fill_value = _get_fill_value( + dtype, fill_value=fill_value, fill_value_typ=fill_value_typ + ) + + copy = (mask is not None) and (fill_value is not None) + + if skipna and copy: + values = values.copy() + if dtype_ok: + np.putmask(values, mask, fill_value) + + # promote if needed + else: + values, _ = maybe_upcast_putmask(values, mask, fill_value) + + # return a platform independent precision dtype + dtype_max = dtype + if is_integer_dtype(dtype) or is_bool_dtype(dtype): + dtype_max = np.int64 + elif is_float_dtype(dtype): + dtype_max = np.float64 + + return values, mask, dtype, dtype_max, fill_value + + +def _na_ok_dtype(dtype): + # TODO: what about datetime64tz? PeriodDtype? + return not issubclass(dtype.type, (np.integer, np.timedelta64, np.datetime64)) + + +def _wrap_results(result, dtype, fill_value=None): + """ wrap our results if needed """ + + if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): + if fill_value is None: + # GH#24293 + fill_value = iNaT + if not isinstance(result, np.ndarray): + tz = getattr(dtype, "tz", None) + assert not isna(fill_value), "Expected non-null fill_value" + if result == fill_value: + result = np.nan + result = Timestamp(result, tz=tz) + else: + result = result.view(dtype) + elif is_timedelta64_dtype(dtype): + if not isinstance(result, np.ndarray): + if result == fill_value: + result = np.nan + + # raise if we have a timedelta64[ns] which is too large + if np.fabs(result) > _int64_max: + raise ValueError("overflow in timedelta operation") + + result = Timedelta(result, unit="ns") + else: + result = result.astype("m8[ns]").view(dtype) + + return result + + +def _na_for_min_count(values, axis: Optional[int]): + """ + Return the missing value for `values`. + + Parameters + ---------- + values : ndarray + axis : int or None + axis for the reduction, required if values.ndim > 1. + + Returns + ------- + result : scalar or ndarray + For 1-D values, returns a scalar of the correct missing type. + For 2-D values, returns a 1-D array where each element is missing. + """ + # we either return np.nan or pd.NaT + if is_numeric_dtype(values): + values = values.astype("float64") + fill_value = na_value_for_dtype(values.dtype) + + if values.ndim == 1: + return fill_value + else: + assert axis is not None # assertion to make mypy happy + result_shape = values.shape[:axis] + values.shape[axis + 1 :] + result = np.empty(result_shape, dtype=values.dtype) + result.fill(fill_value) + return result + + +def nanany(values, axis=None, skipna: bool = True, mask=None): + """ + Check if any elements along an axis evaluate to True. + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : bool + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2]) + >>> nanops.nanany(s) + True + + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([np.nan]) + >>> nanops.nanany(s) + False + """ + values, _, _, _, _ = _get_values(values, skipna, fill_value=False, mask=mask) + return values.any(axis) + + +def nanall(values, axis=None, skipna: bool = True, mask=None): + """ + Check if all elements along an axis evaluate to True. + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : bool + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nanall(s) + True + + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 0]) + >>> nanops.nanall(s) + False + """ + values, _, _, _, _ = _get_values(values, skipna, fill_value=True, mask=mask) + return values.all(axis) + + +@disallow("M8") +def nansum(values, axis=None, skipna=True, min_count=0, mask=None): + """ + Sum the elements along an axis ignoring NaNs + + Parameters + ---------- + values : ndarray[dtype] + axis: int, optional + skipna : bool, default True + min_count: int, default 0 + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : dtype + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nansum(s) + 3.0 + """ + values, mask, dtype, dtype_max, _ = _get_values( + values, skipna, fill_value=0, mask=mask + ) + dtype_sum = dtype_max + if is_float_dtype(dtype): + dtype_sum = dtype + elif is_timedelta64_dtype(dtype): + dtype_sum = np.float64 + the_sum = values.sum(axis, dtype=dtype_sum) + the_sum = _maybe_null_out(the_sum, axis, mask, values.shape, min_count=min_count) + + return _wrap_results(the_sum, dtype) + + +@disallow("M8", DatetimeTZDtype) +@bottleneck_switch() +def nanmean(values, axis=None, skipna=True, mask=None): + """ + Compute the mean of the element along an axis ignoring NaNs + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nanmean(s) + 1.5 + """ + values, mask, dtype, dtype_max, _ = _get_values( + values, skipna, fill_value=0, mask=mask + ) + dtype_sum = dtype_max + dtype_count = np.float64 + if ( + is_integer_dtype(dtype) + or is_timedelta64_dtype(dtype) + or is_datetime64_dtype(dtype) + or is_datetime64tz_dtype(dtype) + ): + dtype_sum = np.float64 + elif is_float_dtype(dtype): + dtype_sum = dtype + dtype_count = dtype + count = _get_counts(values.shape, mask, axis, dtype=dtype_count) + the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum)) + + if axis is not None and getattr(the_sum, "ndim", False): + with np.errstate(all="ignore"): + # suppress division by zero warnings + the_mean = the_sum / count + ct_mask = count == 0 + if ct_mask.any(): + the_mean[ct_mask] = np.nan + else: + the_mean = the_sum / count if count > 0 else np.nan + + return _wrap_results(the_mean, dtype) + + +@disallow("M8") +@bottleneck_switch() +def nanmedian(values, axis=None, skipna=True, mask=None): + """ + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 2]) + >>> nanops.nanmedian(s) + 2.0 + """ + + def get_median(x): + mask = notna(x) + if not skipna and not mask.all(): + return np.nan + return np.nanmedian(x[mask]) + + values, mask, dtype, dtype_max, _ = _get_values(values, skipna, mask=mask) + if not is_float_dtype(values): + values = values.astype("f8") + if mask is not None: + values[mask] = np.nan + + if axis is None: + values = values.ravel() + + notempty = values.size + + # an array from a frame + if values.ndim > 1: + + # there's a non-empty array to apply over otherwise numpy raises + if notempty: + if not skipna: + return _wrap_results( + np.apply_along_axis(get_median, axis, values), dtype + ) + + # fastpath for the skipna case + return _wrap_results(np.nanmedian(values, axis), dtype) + + # must return the correct shape, but median is not defined for the + # empty set so return nans of shape "everything but the passed axis" + # since "axis" is where the reduction would occur if we had a nonempty + # array + shp = np.array(values.shape) + dims = np.arange(values.ndim) + ret = np.empty(shp[dims != axis]) + ret.fill(np.nan) + return _wrap_results(ret, dtype) + + # otherwise return a scalar value + return _wrap_results(get_median(values) if notempty else np.nan, dtype) + + +def _get_counts_nanvar( + value_counts: Tuple[int], + mask: Optional[np.ndarray], + axis: Optional[int], + ddof: int, + dtype=float, +) -> Tuple[Union[int, np.ndarray], Union[int, np.ndarray]]: + """ Get the count of non-null values along an axis, accounting + for degrees of freedom. + + Parameters + ---------- + values_shape : Tuple[int] + shape tuple from values ndarray, used if mask is None + mask : Optional[ndarray[bool]] + locations in values that should be considered missing + axis : Optional[int] + axis to count along + ddof : int + degrees of freedom + dtype : type, optional + type to use for count + + Returns + ------- + count : scalar or array + d : scalar or array + """ + dtype = _get_dtype(dtype) + count = _get_counts(value_counts, mask, axis, dtype=dtype) + d = count - dtype.type(ddof) + + # always return NaN, never inf + if is_scalar(count): + if count <= ddof: + count = np.nan + d = np.nan + else: + mask2: np.ndarray = count <= ddof + if mask2.any(): + np.putmask(d, mask2, np.nan) + np.putmask(count, mask2, np.nan) + return count, d + + +@disallow("M8") +@bottleneck_switch(ddof=1) +def nanstd(values, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the standard deviation along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nanstd(s) + 1.0 + """ + orig_dtype = values.dtype + values, mask, dtype, dtype_max, fill_value = _get_values(values, skipna, mask=mask) + + result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, mask=mask)) + return _wrap_results(result, orig_dtype) + + +@disallow("M8", "m8") +@bottleneck_switch(ddof=1) +def nanvar(values, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the variance along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nanvar(s) + 1.0 + """ + values = lib.values_from_object(values) + dtype = values.dtype + mask = _maybe_get_mask(values, skipna, mask) + if is_any_int_dtype(values): + values = values.astype("f8") + if mask is not None: + values[mask] = np.nan + + if is_float_dtype(values): + count, d = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) + else: + count, d = _get_counts_nanvar(values.shape, mask, axis, ddof) + + if skipna and mask is not None: + values = values.copy() + np.putmask(values, mask, 0) + + # xref GH10242 + # Compute variance via two-pass algorithm, which is stable against + # cancellation errors and relatively accurate for small numbers of + # observations. + # + # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count + if axis is not None: + avg = np.expand_dims(avg, axis) + sqr = _ensure_numeric((avg - values) ** 2) + if mask is not None: + np.putmask(sqr, mask, 0) + result = sqr.sum(axis=axis, dtype=np.float64) / d + + # Return variance as np.float64 (the datatype used in the accumulator), + # unless we were dealing with a float array, in which case use the same + # precision as the original values array. + if is_float_dtype(dtype): + result = result.astype(dtype) + return _wrap_results(result, values.dtype) + + +@disallow("M8", "m8") +def nansem(values, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the standard error in the mean along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nansem(s) + 0.5773502691896258 + """ + + # This checks if non-numeric-like data is passed with numeric_only=False + # and raises a TypeError otherwise + nanvar(values, axis, skipna, ddof=ddof, mask=mask) + + mask = _maybe_get_mask(values, skipna, mask) + if not is_float_dtype(values.dtype): + values = values.astype("f8") + + count, _ = _get_counts_nanvar(values.shape, mask, axis, ddof, values.dtype) + var = nanvar(values, axis, skipna, ddof=ddof) + + return np.sqrt(var) / np.sqrt(count) + + +def _nanminmax(meth, fill_value_typ): + @bottleneck_switch(name="nan" + meth) + def reduction(values, axis=None, skipna=True, mask=None): + + values, mask, dtype, dtype_max, fill_value = _get_values( + values, skipna, fill_value_typ=fill_value_typ, mask=mask + ) + + if (axis is not None and values.shape[axis] == 0) or values.size == 0: + try: + result = getattr(values, meth)(axis, dtype=dtype_max) + result.fill(np.nan) + except (AttributeError, TypeError, ValueError): + result = np.nan + else: + result = getattr(values, meth)(axis) + + result = _wrap_results(result, dtype, fill_value) + return _maybe_null_out(result, axis, mask, values.shape) + + return reduction + + +nanmin = _nanminmax("min", fill_value_typ="+inf") +nanmax = _nanminmax("max", fill_value_typ="-inf") + + +@disallow("O") +def nanargmax(values, axis=None, skipna=True, mask=None): + """ + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : int + The index of max value in specified axis or -1 in the NA case + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmax(s) + 4 + """ + values, mask, dtype, _, _ = _get_values( + values, True, fill_value_typ="-inf", mask=mask + ) + result = values.argmax(axis) + result = _maybe_arg_null_out(result, axis, mask, skipna) + return result + + +@disallow("O") +def nanargmin(values, axis=None, skipna=True, mask=None): + """ + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : int + The index of min value in specified axis or -1 in the NA case + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmin(s) + 0 + """ + values, mask, dtype, _, _ = _get_values( + values, True, fill_value_typ="+inf", mask=mask + ) + result = values.argmin(axis) + result = _maybe_arg_null_out(result, axis, mask, skipna) + return result + + +@disallow("M8", "m8") +def nanskew(values, axis=None, skipna=True, mask=None): + """ Compute the sample skewness. + + The statistic computed here is the adjusted Fisher-Pearson standardized + moment coefficient G1. The algorithm computes this coefficient directly + from the second and third central moment. + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1,np.nan, 1, 2]) + >>> nanops.nanskew(s) + 1.7320508075688787 + """ + values = lib.values_from_object(values) + mask = _maybe_get_mask(values, skipna, mask) + if not is_float_dtype(values.dtype): + values = values.astype("f8") + count = _get_counts(values.shape, mask, axis) + else: + count = _get_counts(values.shape, mask, axis, dtype=values.dtype) + + if skipna and mask is not None: + values = values.copy() + np.putmask(values, mask, 0) + + mean = values.sum(axis, dtype=np.float64) / count + if axis is not None: + mean = np.expand_dims(mean, axis) + + adjusted = values - mean + if skipna and mask is not None: + np.putmask(adjusted, mask, 0) + adjusted2 = adjusted ** 2 + adjusted3 = adjusted2 * adjusted + m2 = adjusted2.sum(axis, dtype=np.float64) + m3 = adjusted3.sum(axis, dtype=np.float64) + + # floating point error + # + # #18044 in _libs/windows.pyx calc_skew follow this behavior + # to fix the fperr to treat m2 <1e-14 as zero + m2 = _zero_out_fperr(m2) + m3 = _zero_out_fperr(m3) + + with np.errstate(invalid="ignore", divide="ignore"): + result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5) + + dtype = values.dtype + if is_float_dtype(dtype): + result = result.astype(dtype) + + if isinstance(result, np.ndarray): + result = np.where(m2 == 0, 0, result) + result[count < 3] = np.nan + return result + else: + result = 0 if m2 == 0 else result + if count < 3: + return np.nan + return result + + +@disallow("M8", "m8") +def nankurt(values, axis=None, skipna=True, mask=None): + """ + Compute the sample excess kurtosis + + The statistic computed here is the adjusted Fisher-Pearson standardized + moment coefficient G2, computed directly from the second and fourth + central moment. + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1,np.nan, 1, 3, 2]) + >>> nanops.nankurt(s) + -1.2892561983471076 + """ + values = lib.values_from_object(values) + mask = _maybe_get_mask(values, skipna, mask) + if not is_float_dtype(values.dtype): + values = values.astype("f8") + count = _get_counts(values.shape, mask, axis) + else: + count = _get_counts(values.shape, mask, axis, dtype=values.dtype) + + if skipna and mask is not None: + values = values.copy() + np.putmask(values, mask, 0) + + mean = values.sum(axis, dtype=np.float64) / count + if axis is not None: + mean = np.expand_dims(mean, axis) + + adjusted = values - mean + if skipna and mask is not None: + np.putmask(adjusted, mask, 0) + adjusted2 = adjusted ** 2 + adjusted4 = adjusted2 ** 2 + m2 = adjusted2.sum(axis, dtype=np.float64) + m4 = adjusted4.sum(axis, dtype=np.float64) + + with np.errstate(invalid="ignore", divide="ignore"): + adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) + numer = count * (count + 1) * (count - 1) * m4 + denom = (count - 2) * (count - 3) * m2 ** 2 + + # floating point error + # + # #18044 in _libs/windows.pyx calc_kurt follow this behavior + # to fix the fperr to treat denom <1e-14 as zero + numer = _zero_out_fperr(numer) + denom = _zero_out_fperr(denom) + + if not isinstance(denom, np.ndarray): + # if ``denom`` is a scalar, check these corner cases first before + # doing division + if count < 4: + return np.nan + if denom == 0: + return 0 + + with np.errstate(invalid="ignore", divide="ignore"): + result = numer / denom - adj + + dtype = values.dtype + if is_float_dtype(dtype): + result = result.astype(dtype) + + if isinstance(result, np.ndarray): + result = np.where(denom == 0, 0, result) + result[count < 4] = np.nan + + return result + + +@disallow("M8", "m8") +def nanprod(values, axis=None, skipna=True, min_count=0, mask=None): + """ + Parameters + ---------- + values : ndarray[dtype] + axis: int, optional + skipna : bool, default True + min_count: int, default 0 + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : dtype + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan]) + >>> nanops.nanprod(s) + 6.0 + + Returns + ------- + The product of all elements on a given axis. ( NaNs are treated as 1) + """ + mask = _maybe_get_mask(values, skipna, mask) + + if skipna and mask is not None: + values = values.copy() + values[mask] = 1 + result = values.prod(axis) + return _maybe_null_out(result, axis, mask, values.shape, min_count=min_count) + + +def _maybe_arg_null_out( + result: np.ndarray, axis: Optional[int], mask: Optional[np.ndarray], skipna: bool +) -> Union[np.ndarray, int]: + # helper function for nanargmin/nanargmax + if mask is None: + return result + + if axis is None or not getattr(result, "ndim", False): + if skipna: + if mask.all(): + result = -1 + else: + if mask.any(): + result = -1 + else: + if skipna: + na_mask = mask.all(axis) + else: + na_mask = mask.any(axis) + if na_mask.any(): + result[na_mask] = -1 + return result + + +def _get_counts( + values_shape: Tuple[int], + mask: Optional[np.ndarray], + axis: Optional[int], + dtype=float, +) -> Union[int, np.ndarray]: + """ Get the count of non-null values along an axis + + Parameters + ---------- + values_shape : Tuple[int] + shape tuple from values ndarray, used if mask is None + mask : Optional[ndarray[bool]] + locations in values that should be considered missing + axis : Optional[int] + axis to count along + dtype : type, optional + type to use for count + + Returns + ------- + count : scalar or array + """ + dtype = _get_dtype(dtype) + if axis is None: + if mask is not None: + n = mask.size - mask.sum() + else: + n = np.prod(values_shape) + return dtype.type(n) + + if mask is not None: + count = mask.shape[axis] - mask.sum(axis) + else: + count = values_shape[axis] + + if is_scalar(count): + return dtype.type(count) + try: + return count.astype(dtype) + except AttributeError: + return np.array(count, dtype=dtype) + + +def _maybe_null_out( + result: np.ndarray, + axis: Optional[int], + mask: Optional[np.ndarray], + shape: Tuple, + min_count: int = 1, +) -> np.ndarray: + if mask is not None and axis is not None and getattr(result, "ndim", False): + null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0 + if np.any(null_mask): + if is_numeric_dtype(result): + if np.iscomplexobj(result): + result = result.astype("c16") + else: + result = result.astype("f8") + result[null_mask] = np.nan + else: + # GH12941, use None to auto cast null + result[null_mask] = None + elif result is not NaT: + if mask is not None: + null_mask = mask.size - mask.sum() + else: + null_mask = np.prod(shape) + if null_mask < min_count: + result = np.nan + + return result + + +def _zero_out_fperr(arg): + # #18044 reference this behavior to fix rolling skew/kurt issue + if isinstance(arg, np.ndarray): + with np.errstate(invalid="ignore"): + return np.where(np.abs(arg) < 1e-14, 0, arg) + else: + return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg + + +@disallow("M8", "m8") +def nancorr(a, b, method="pearson", min_periods=None): + """ + a, b: ndarrays + """ + if len(a) != len(b): + raise AssertionError("Operands to nancorr must have same size") + + if min_periods is None: + min_periods = 1 + + valid = notna(a) & notna(b) + if not valid.all(): + a = a[valid] + b = b[valid] + + if len(a) < min_periods: + return np.nan + + f = get_corr_func(method) + return f(a, b) + + +def get_corr_func(method): + if method in ["kendall", "spearman"]: + from scipy.stats import kendalltau, spearmanr + elif method in ["pearson"]: + pass + elif callable(method): + return method + else: + raise ValueError( + f"Unkown method '{method}', expected one of 'kendall', 'spearman'" + ) + + def _pearson(a, b): + return np.corrcoef(a, b)[0, 1] + + def _kendall(a, b): + # kendallttau returns a tuple of the tau statistic and pvalue + rs = kendalltau(a, b) + return rs[0] + + def _spearman(a, b): + return spearmanr(a, b)[0] + + _cor_methods = {"pearson": _pearson, "kendall": _kendall, "spearman": _spearman} + return _cor_methods[method] + + +@disallow("M8", "m8") +def nancov(a, b, min_periods=None): + if len(a) != len(b): + raise AssertionError("Operands to nancov must have same size") + + if min_periods is None: + min_periods = 1 + + valid = notna(a) & notna(b) + if not valid.all(): + a = a[valid] + b = b[valid] + + if len(a) < min_periods: + return np.nan + + return np.cov(a, b)[0, 1] + + +def _ensure_numeric(x): + if isinstance(x, np.ndarray): + if is_integer_dtype(x) or is_bool_dtype(x): + x = x.astype(np.float64) + elif is_object_dtype(x): + try: + x = x.astype(np.complex128) + except (TypeError, ValueError): + x = x.astype(np.float64) + else: + if not np.any(np.imag(x)): + x = x.real + elif not (is_float(x) or is_integer(x) or is_complex(x)): + try: + x = float(x) + except ValueError: + # e.g. "1+1j" or "foo" + try: + x = complex(x) + except ValueError: + # e.g. "foo" + raise TypeError(f"Could not convert {x} to numeric") + return x + + +# NA-friendly array comparisons + + +def make_nancomp(op): + def f(x, y): + xmask = isna(x) + ymask = isna(y) + mask = xmask | ymask + + with np.errstate(all="ignore"): + result = op(x, y) + + if mask.any(): + if is_bool_dtype(result): + result = result.astype("O") + np.putmask(result, mask, np.nan) + + return result + + return f + + +nangt = make_nancomp(operator.gt) +nange = make_nancomp(operator.ge) +nanlt = make_nancomp(operator.lt) +nanle = make_nancomp(operator.le) +naneq = make_nancomp(operator.eq) +nanne = make_nancomp(operator.ne) + + +def _nanpercentile_1d(values, mask, q, na_value, interpolation): + """ + Wrapper for np.percentile that skips missing values, specialized to + 1-dimensional case. + + Parameters + ---------- + values : array over which to find quantiles + mask : ndarray[bool] + locations in values that should be considered missing + q : scalar or array of quantile indices to find + na_value : scalar + value to return for empty or all-null values + interpolation : str + + Returns + ------- + quantiles : scalar or array + """ + # mask is Union[ExtensionArray, ndarray] + values = values[~mask] + + if len(values) == 0: + if lib.is_scalar(q): + return na_value + else: + return np.array([na_value] * len(q), dtype=values.dtype) + + return np.percentile(values, q, interpolation=interpolation) + + +def nanpercentile(values, q, axis, na_value, mask, ndim, interpolation): + """ + Wrapper for np.percentile that skips missing values. + + Parameters + ---------- + values : array over which to find quantiles + q : scalar or array of quantile indices to find + axis : {0, 1} + na_value : scalar + value to return for empty or all-null values + mask : ndarray[bool] + locations in values that should be considered missing + ndim : {1, 2} + interpolation : str + + Returns + ------- + quantiles : scalar or array + """ + if values.dtype.kind in ["m", "M"]: + # need to cast to integer to avoid rounding errors in numpy + result = nanpercentile( + values.view("i8"), q, axis, na_value.view("i8"), mask, ndim, interpolation + ) + + # Note: we have to do do `astype` and not view because in general we + # have float result at this point, not i8 + return result.astype(values.dtype) + + if not lib.is_scalar(mask) and mask.any(): + if ndim == 1: + return _nanpercentile_1d( + values, mask, q, na_value, interpolation=interpolation + ) + else: + # for nonconsolidatable blocks mask is 1D, but values 2D + if mask.ndim < values.ndim: + mask = mask.reshape(values.shape) + if axis == 0: + values = values.T + mask = mask.T + result = [ + _nanpercentile_1d(val, m, q, na_value, interpolation=interpolation) + for (val, m) in zip(list(values), list(mask)) + ] + result = np.array(result, dtype=values.dtype, copy=False).T + return result + else: + return np.percentile(values, q, axis=axis, interpolation=interpolation) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py new file mode 100644 index 00000000..42a0bacb --- /dev/null +++ b/pandas/core/ops/__init__.py @@ -0,0 +1,857 @@ +""" +Arithmetic operations for PandasObjects + +This is not a public API. +""" +import datetime +import operator +from typing import TYPE_CHECKING, Set, Tuple, Union + +import numpy as np + +from pandas._libs import Timedelta, Timestamp, lib +from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401 +from pandas.util._decorators import Appender + +from pandas.core.dtypes.common import is_list_like, is_timedelta64_dtype +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCExtensionArray, + ABCIndexClass, + ABCSeries, +) +from pandas.core.dtypes.missing import isna + +from pandas.core.construction import extract_array +from pandas.core.ops.array_ops import ( + arithmetic_op, + comparison_op, + define_na_arithmetic_op, + get_array_op, + logical_op, +) +from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY # noqa:F401 +from pandas.core.ops.common import unpack_zerodim_and_defer +from pandas.core.ops.dispatch import should_series_dispatch +from pandas.core.ops.docstrings import ( + _arith_doc_FRAME, + _flex_comp_doc_FRAME, + _make_flex_doc, + _op_descriptions, +) +from pandas.core.ops.invalid import invalid_comparison # noqa:F401 +from pandas.core.ops.mask_ops import kleene_and, kleene_or, kleene_xor # noqa: F401 +from pandas.core.ops.methods import ( # noqa:F401 + add_flex_arithmetic_methods, + add_special_arithmetic_methods, +) +from pandas.core.ops.roperator import ( # noqa:F401 + radd, + rand_, + rdiv, + rdivmod, + rfloordiv, + rmod, + rmul, + ror_, + rpow, + rsub, + rtruediv, + rxor, +) + +if TYPE_CHECKING: + from pandas import DataFrame # noqa:F401 + +# ----------------------------------------------------------------------------- +# constants +ARITHMETIC_BINOPS: Set[str] = { + "add", + "sub", + "mul", + "pow", + "mod", + "floordiv", + "truediv", + "divmod", + "radd", + "rsub", + "rmul", + "rpow", + "rmod", + "rfloordiv", + "rtruediv", + "rdivmod", +} + + +COMPARISON_BINOPS: Set[str] = { + "eq", + "ne", + "lt", + "gt", + "le", + "ge", +} + +# ----------------------------------------------------------------------------- +# Ops Wrapping Utilities + + +def get_op_result_name(left, right): + """ + Find the appropriate name to pin to an operation result. This result + should always be either an Index or a Series. + + Parameters + ---------- + left : {Series, Index} + right : object + + Returns + ------- + name : object + Usually a string + """ + # `left` is always a Series when called from within ops + if isinstance(right, (ABCSeries, ABCIndexClass)): + name = _maybe_match_name(left, right) + else: + name = left.name + return name + + +def _maybe_match_name(a, b): + """ + Try to find a name to attach to the result of an operation between + a and b. If only one of these has a `name` attribute, return that + name. Otherwise return a consensus name if they match of None if + they have different names. + + Parameters + ---------- + a : object + b : object + + Returns + ------- + name : str or None + + See Also + -------- + pandas.core.common.consensus_name_attr + """ + a_has = hasattr(a, "name") + b_has = hasattr(b, "name") + if a_has and b_has: + if a.name == b.name: + return a.name + else: + # TODO: what if they both have np.nan for their names? + return None + elif a_has: + return a.name + elif b_has: + return b.name + return None + + +def maybe_upcast_for_op(obj, shape: Tuple[int, ...]): + """ + Cast non-pandas objects to pandas types to unify behavior of arithmetic + and comparison operations. + + Parameters + ---------- + obj: object + shape : tuple[int] + + Returns + ------- + out : object + + Notes + ----- + Be careful to call this *after* determining the `name` attribute to be + attached to the result of the arithmetic operation. + """ + from pandas.core.arrays import DatetimeArray, TimedeltaArray + + if type(obj) is datetime.timedelta: + # GH#22390 cast up to Timedelta to rely on Timedelta + # implementation; otherwise operation against numeric-dtype + # raises TypeError + return Timedelta(obj) + elif isinstance(obj, np.datetime64): + # GH#28080 numpy casts integer-dtype to datetime64 when doing + # array[int] + datetime64, which we do not allow + if isna(obj): + # Avoid possible ambiguities with pd.NaT + obj = obj.astype("datetime64[ns]") + right = np.broadcast_to(obj, shape) + return DatetimeArray(right) + + return Timestamp(obj) + + elif isinstance(obj, np.timedelta64): + if isna(obj): + # wrapping timedelta64("NaT") in Timedelta returns NaT, + # which would incorrectly be treated as a datetime-NaT, so + # we broadcast and wrap in a TimedeltaArray + obj = obj.astype("timedelta64[ns]") + right = np.broadcast_to(obj, shape) + return TimedeltaArray(right) + + # In particular non-nanosecond timedelta64 needs to be cast to + # nanoseconds, or else we get undesired behavior like + # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D') + return Timedelta(obj) + + elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj.dtype): + # GH#22390 Unfortunately we need to special-case right-hand + # timedelta64 dtypes because numpy casts integer dtypes to + # timedelta64 when operating with timedelta64 + return TimedeltaArray._from_sequence(obj) + return obj + + +# ----------------------------------------------------------------------------- + + +def _get_frame_op_default_axis(name): + """ + Only DataFrame cares about default_axis, specifically: + special methods have default_axis=None and flex methods + have default_axis='columns'. + + Parameters + ---------- + name : str + + Returns + ------- + default_axis: str or None + """ + if name.replace("__r", "__") in ["__and__", "__or__", "__xor__"]: + # bool methods + return "columns" + elif name.startswith("__"): + # __add__, __mul__, ... + return None + else: + # add, mul, ... + return "columns" + + +def _get_opstr(op): + """ + Find the operation string, if any, to pass to numexpr for this + operation. + + Parameters + ---------- + op : binary operator + + Returns + ------- + op_str : string or None + """ + + return { + operator.add: "+", + radd: "+", + operator.mul: "*", + rmul: "*", + operator.sub: "-", + rsub: "-", + operator.truediv: "/", + rtruediv: "/", + operator.floordiv: "//", + rfloordiv: "//", + operator.mod: None, # TODO: Why None for mod but '%' for rmod? + rmod: "%", + operator.pow: "**", + rpow: "**", + operator.eq: "==", + operator.ne: "!=", + operator.le: "<=", + operator.lt: "<", + operator.ge: ">=", + operator.gt: ">", + operator.and_: "&", + rand_: "&", + operator.or_: "|", + ror_: "|", + operator.xor: "^", + rxor: "^", + divmod: None, + rdivmod: None, + }[op] + + +def _get_op_name(op, special): + """ + Find the name to attach to this method according to conventions + for special and non-special methods. + + Parameters + ---------- + op : binary operator + special : bool + + Returns + ------- + op_name : str + """ + opname = op.__name__.strip("_") + if special: + opname = f"__{opname}__" + return opname + + +# ----------------------------------------------------------------------------- +# Masking NA values and fallbacks for operations numpy does not support + + +def fill_binop(left, right, fill_value): + """ + If a non-None fill_value is given, replace null entries in left and right + with this value, but only in positions where _one_ of left/right is null, + not both. + + Parameters + ---------- + left : array-like + right : array-like + fill_value : object + + Returns + ------- + left : array-like + right : array-like + + Notes + ----- + Makes copies if fill_value is not None + """ + # TODO: can we make a no-copy implementation? + if fill_value is not None: + left_mask = isna(left) + right_mask = isna(right) + left = left.copy() + right = right.copy() + + # one but not both + mask = left_mask ^ right_mask + left[left_mask & mask] = fill_value + right[right_mask & mask] = fill_value + return left, right + + +# ----------------------------------------------------------------------------- +# Dispatch logic + + +def dispatch_to_series(left, right, func, str_rep=None, axis=None): + """ + Evaluate the frame operation func(left, right) by evaluating + column-by-column, dispatching to the Series implementation. + + Parameters + ---------- + left : DataFrame + right : scalar or DataFrame + func : arithmetic or comparison operator + str_rep : str or None, default None + axis : {None, 0, 1, "index", "columns"} + + Returns + ------- + DataFrame + """ + # Note: we use iloc to access columns for compat with cases + # with non-unique columns. + import pandas.core.computation.expressions as expressions + + right = lib.item_from_zerodim(right) + if lib.is_scalar(right) or np.ndim(right) == 0: + + # Get the appropriate array-op to apply to each block's values. + array_op = get_array_op(func, str_rep=str_rep) + bm = left._data.apply(array_op, right=right) + return type(left)(bm) + + elif isinstance(right, ABCDataFrame): + assert right._indexed_same(left) + + def column_op(a, b): + return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))} + + elif isinstance(right, ABCSeries) and axis == "columns": + # We only get here if called via _combine_series_frame, + # in which case we specifically want to operate row-by-row + assert right.index.equals(left.columns) + + if right.dtype == "timedelta64[ns]": + # ensure we treat NaT values as the correct dtype + # Note: we do not do this unconditionally as it may be lossy or + # expensive for EA dtypes. + right = np.asarray(right) + + def column_op(a, b): + return {i: func(a.iloc[:, i], b[i]) for i in range(len(a.columns))} + + else: + + def column_op(a, b): + return {i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns))} + + elif isinstance(right, ABCSeries): + assert right.index.equals(left.index) # Handle other cases later + + def column_op(a, b): + return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))} + + else: + # Remaining cases have less-obvious dispatch rules + raise NotImplementedError(right) + + new_data = expressions.evaluate(column_op, str_rep, left, right) + return new_data + + +# ----------------------------------------------------------------------------- +# Series + + +def _align_method_SERIES(left, right, align_asobject=False): + """ align lhs and rhs Series """ + + # ToDo: Different from _align_method_FRAME, list, tuple and ndarray + # are not coerced here + # because Series has inconsistencies described in #13637 + + if isinstance(right, ABCSeries): + # avoid repeated alignment + if not left.index.equals(right.index): + + if align_asobject: + # to keep original value's dtype for bool ops + left = left.astype(object) + right = right.astype(object) + + left, right = left.align(right, copy=False) + + return left, right + + +def _construct_result( + left: ABCSeries, + result: Union[np.ndarray, ABCExtensionArray], + index: ABCIndexClass, + name, +): + """ + Construct an appropriately-labelled Series from the result of an op. + + Parameters + ---------- + left : Series + result : ndarray or ExtensionArray + index : Index + name : object + + Returns + ------- + Series + In the case of __divmod__ or __rdivmod__, a 2-tuple of Series. + """ + if isinstance(result, tuple): + # produced by divmod or rdivmod + return ( + _construct_result(left, result[0], index=index, name=name), + _construct_result(left, result[1], index=index, name=name), + ) + + # We do not pass dtype to ensure that the Series constructor + # does inference in the case where `result` has object-dtype. + out = left._constructor(result, index=index) + out = out.__finalize__(left) + + # Set the result's name after __finalize__ is called because __finalize__ + # would set it back to self.name + out.name = name + return out + + +def _arith_method_SERIES(cls, op, special): + """ + Wrapper function for Series arithmetic operations, to avoid + code duplication. + """ + str_rep = _get_opstr(op) + op_name = _get_op_name(op, special) + + @unpack_zerodim_and_defer(op_name) + def wrapper(left, right): + + left, right = _align_method_SERIES(left, right) + res_name = get_op_result_name(left, right) + + lvalues = extract_array(left, extract_numpy=True) + rvalues = extract_array(right, extract_numpy=True) + result = arithmetic_op(lvalues, rvalues, op, str_rep) + + return _construct_result(left, result, index=left.index, name=res_name) + + wrapper.__name__ = op_name + return wrapper + + +def _comp_method_SERIES(cls, op, special): + """ + Wrapper function for Series arithmetic operations, to avoid + code duplication. + """ + op_name = _get_op_name(op, special) + + @unpack_zerodim_and_defer(op_name) + def wrapper(self, other): + + res_name = get_op_result_name(self, other) + + if isinstance(other, ABCSeries) and not self._indexed_same(other): + raise ValueError("Can only compare identically-labeled Series objects") + + lvalues = extract_array(self, extract_numpy=True) + rvalues = extract_array(other, extract_numpy=True) + + res_values = comparison_op(lvalues, rvalues, op) + + return _construct_result(self, res_values, index=self.index, name=res_name) + + wrapper.__name__ = op_name + return wrapper + + +def _bool_method_SERIES(cls, op, special): + """ + Wrapper function for Series arithmetic operations, to avoid + code duplication. + """ + op_name = _get_op_name(op, special) + + @unpack_zerodim_and_defer(op_name) + def wrapper(self, other): + self, other = _align_method_SERIES(self, other, align_asobject=True) + res_name = get_op_result_name(self, other) + + lvalues = extract_array(self, extract_numpy=True) + rvalues = extract_array(other, extract_numpy=True) + + res_values = logical_op(lvalues, rvalues, op) + return _construct_result(self, res_values, index=self.index, name=res_name) + + wrapper.__name__ = op_name + return wrapper + + +def _flex_method_SERIES(cls, op, special): + name = _get_op_name(op, special) + doc = _make_flex_doc(name, "series") + + @Appender(doc) + def flex_wrapper(self, other, level=None, fill_value=None, axis=0): + # validate axis + if axis is not None: + self._get_axis_number(axis) + + if isinstance(other, ABCSeries): + return self._binop(other, op, level=level, fill_value=fill_value) + elif isinstance(other, (np.ndarray, list, tuple)): + if len(other) != len(self): + raise ValueError("Lengths must be equal") + other = self._constructor(other, self.index) + return self._binop(other, op, level=level, fill_value=fill_value) + else: + if fill_value is not None: + self = self.fillna(fill_value) + + return op(self, other) + + flex_wrapper.__name__ = name + return flex_wrapper + + +# ----------------------------------------------------------------------------- +# DataFrame + + +def _combine_series_frame(self, other, func, fill_value=None, axis=None, level=None): + """ + Apply binary operator `func` to self, other using alignment and fill + conventions determined by the fill_value, axis, and level kwargs. + + Parameters + ---------- + self : DataFrame + other : Series + func : binary operator + fill_value : object, default None + axis : {0, 1, 'columns', 'index', None}, default None + level : int or None, default None + + Returns + ------- + result : DataFrame + """ + if fill_value is not None: + raise NotImplementedError(f"fill_value {fill_value} not supported.") + + if axis is None: + # default axis is columns + axis = 1 + + axis = self._get_axis_number(axis) + left, right = self.align(other, join="outer", axis=axis, level=level, copy=False) + if axis == 0: + new_data = left._combine_match_index(right, func) + else: + new_data = dispatch_to_series(left, right, func, axis="columns") + + return left._construct_result(new_data) + + +def _align_method_FRAME(left, right, axis): + """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """ + + def to_series(right): + msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}" + if axis is not None and left._get_axis_name(axis) == "index": + if len(left.index) != len(right): + raise ValueError( + msg.format(req_len=len(left.index), given_len=len(right)) + ) + right = left._constructor_sliced(right, index=left.index) + else: + if len(left.columns) != len(right): + raise ValueError( + msg.format(req_len=len(left.columns), given_len=len(right)) + ) + right = left._constructor_sliced(right, index=left.columns) + return right + + if isinstance(right, np.ndarray): + + if right.ndim == 1: + right = to_series(right) + + elif right.ndim == 2: + if right.shape == left.shape: + right = left._constructor(right, index=left.index, columns=left.columns) + + elif right.shape[0] == left.shape[0] and right.shape[1] == 1: + # Broadcast across columns + right = np.broadcast_to(right, left.shape) + right = left._constructor(right, index=left.index, columns=left.columns) + + elif right.shape[1] == left.shape[1] and right.shape[0] == 1: + # Broadcast along rows + right = to_series(right[0, :]) + + else: + raise ValueError( + "Unable to coerce to DataFrame, shape " + f"must be {left.shape}: given {right.shape}" + ) + + elif right.ndim > 2: + raise ValueError( + "Unable to coerce to Series/DataFrame, dim " + f"must be <= 2: {right.shape}" + ) + + elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)): + # GH17901 + right = to_series(right) + + return right + + +def _should_reindex_frame_op( + left: "DataFrame", right, op, axis, default_axis: int, fill_value, level +) -> bool: + """ + Check if this is an operation between DataFrames that will need to reindex. + """ + assert isinstance(left, ABCDataFrame) + + if op is operator.pow or op is rpow: + # GH#32685 pow has special semantics for operating with null values + return False + + if not isinstance(right, ABCDataFrame): + return False + + if fill_value is None and level is None and axis is default_axis: + # TODO: any other cases we should handle here? + cols = left.columns.intersection(right.columns) + if not (cols.equals(left.columns) and cols.equals(right.columns)): + return True + + return False + + +def _frame_arith_method_with_reindex( + left: "DataFrame", right: "DataFrame", op +) -> "DataFrame": + """ + For DataFrame-with-DataFrame operations that require reindexing, + operate only on shared columns, then reindex. + + Parameters + ---------- + left : DataFrame + right : DataFrame + op : binary operator + + Returns + ------- + DataFrame + """ + # GH#31623, only operate on shared columns + cols = left.columns.intersection(right.columns) + + new_left = left[cols] + new_right = right[cols] + result = op(new_left, new_right) + + # Do the join on the columns instead of using _align_method_FRAME + # to avoid constructing two potentially large/sparse DataFrames + join_columns, _, _ = left.columns.join( + right.columns, how="outer", level=None, return_indexers=True + ) + return result.reindex(join_columns, axis=1) + + +def _arith_method_FRAME(cls, op, special): + str_rep = _get_opstr(op) + op_name = _get_op_name(op, special) + default_axis = _get_frame_op_default_axis(op_name) + + na_op = define_na_arithmetic_op(op, str_rep) + is_logical = str_rep in ["&", "|", "^"] + + if op_name in _op_descriptions: + # i.e. include "add" but not "__add__" + doc = _make_flex_doc(op_name, "dataframe") + else: + doc = _arith_doc_FRAME % op_name + + @Appender(doc) + def f(self, other, axis=default_axis, level=None, fill_value=None): + + if _should_reindex_frame_op( + self, other, op, axis, default_axis, fill_value, level + ): + return _frame_arith_method_with_reindex(self, other, op) + + other = _align_method_FRAME(self, other, axis) + + if isinstance(other, ABCDataFrame): + # Another DataFrame + pass_op = op if should_series_dispatch(self, other, op) else na_op + pass_op = pass_op if not is_logical else op + + left, right = self.align(other, join="outer", level=level, copy=False) + new_data = left._combine_frame(right, pass_op, fill_value) + return left._construct_result(new_data) + + elif isinstance(other, ABCSeries): + # For these values of `axis`, we end up dispatching to Series op, + # so do not want the masked op. + pass_op = op if axis in [0, "columns", None] else na_op + pass_op = pass_op if not is_logical else op + return _combine_series_frame( + self, other, pass_op, fill_value=fill_value, axis=axis, level=level + ) + else: + # in this case we always have `np.ndim(other) == 0` + if fill_value is not None: + self = self.fillna(fill_value) + + new_data = dispatch_to_series(self, other, op, str_rep) + return self._construct_result(new_data) + + f.__name__ = op_name + + return f + + +def _flex_comp_method_FRAME(cls, op, special): + str_rep = _get_opstr(op) + op_name = _get_op_name(op, special) + default_axis = _get_frame_op_default_axis(op_name) + + doc = _flex_comp_doc_FRAME.format( + op_name=op_name, desc=_op_descriptions[op_name]["desc"] + ) + + @Appender(doc) + def f(self, other, axis=default_axis, level=None): + + other = _align_method_FRAME(self, other, axis) + + if isinstance(other, ABCDataFrame): + # Another DataFrame + if not self._indexed_same(other): + self, other = self.align(other, "outer", level=level, copy=False) + new_data = dispatch_to_series(self, other, op, str_rep) + return self._construct_result(new_data) + + elif isinstance(other, ABCSeries): + return _combine_series_frame( + self, other, op, fill_value=None, axis=axis, level=level + ) + else: + # in this case we always have `np.ndim(other) == 0` + new_data = dispatch_to_series(self, other, op) + return self._construct_result(new_data) + + f.__name__ = op_name + + return f + + +def _comp_method_FRAME(cls, op, special): + str_rep = _get_opstr(op) + op_name = _get_op_name(op, special) + + @Appender(f"Wrapper for comparison method {op_name}") + def f(self, other): + + other = _align_method_FRAME(self, other, axis=None) + + if isinstance(other, ABCDataFrame): + # Another DataFrame + if not self._indexed_same(other): + raise ValueError( + "Can only compare identically-labeled DataFrame objects" + ) + new_data = dispatch_to_series(self, other, op, str_rep) + return self._construct_result(new_data) + + elif isinstance(other, ABCSeries): + return _combine_series_frame( + self, other, op, fill_value=None, axis=None, level=None + ) + else: + + # straight boolean comparisons we want to allow all columns + # (regardless of dtype to pass thru) See #4537 for discussion. + new_data = dispatch_to_series(self, other, op) + return self._construct_result(new_data) + + f.__name__ = op_name + + return f diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py new file mode 100644 index 00000000..56519846 --- /dev/null +++ b/pandas/core/ops/array_ops.py @@ -0,0 +1,393 @@ +""" +Functions for arithmetic and comparison operations on NumPy arrays and +ExtensionArrays. +""" +from functools import partial +import operator +from typing import Any, Optional, Union + +import numpy as np + +from pandas._libs import Timedelta, Timestamp, lib, ops as libops + +from pandas.core.dtypes.cast import ( + construct_1d_object_array_from_listlike, + find_common_type, + maybe_upcast_putmask, +) +from pandas.core.dtypes.common import ( + ensure_object, + is_bool_dtype, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_scalar, +) +from pandas.core.dtypes.generic import ( + ABCDatetimeArray, + ABCExtensionArray, + ABCIndex, + ABCIndexClass, + ABCSeries, + ABCTimedeltaArray, +) +from pandas.core.dtypes.missing import isna, notna + +from pandas.core.ops import missing +from pandas.core.ops.dispatch import dispatch_to_extension_op, should_extension_dispatch +from pandas.core.ops.invalid import invalid_comparison +from pandas.core.ops.roperator import rpow + + +def comp_method_OBJECT_ARRAY(op, x, y): + if isinstance(y, list): + y = construct_1d_object_array_from_listlike(y) + + # TODO: Should the checks below be ABCIndexClass? + if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): + # TODO: should this be ABCIndexClass?? + if not is_object_dtype(y.dtype): + y = y.astype(np.object_) + + if isinstance(y, (ABCSeries, ABCIndex)): + y = y.values + + result = libops.vec_compare(x.ravel(), y, op) + else: + result = libops.scalar_compare(x.ravel(), y, op) + return result.reshape(x.shape) + + +def masked_arith_op(x, y, op): + """ + If the given arithmetic operation fails, attempt it again on + only the non-null elements of the input array(s). + + Parameters + ---------- + x : np.ndarray + y : np.ndarray, Series, Index + op : binary operator + """ + # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes + # the logic valid for both Series and DataFrame ops. + xrav = x.ravel() + assert isinstance(x, np.ndarray), type(x) + if isinstance(y, np.ndarray): + dtype = find_common_type([x.dtype, y.dtype]) + result = np.empty(x.size, dtype=dtype) + + # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex + # we would get int64 dtype, see GH#19956 + yrav = y.ravel() + mask = notna(xrav) & notna(yrav) + + if yrav.shape != mask.shape: + # FIXME: GH#5284, GH#5035, GH#19448 + # Without specifically raising here we get mismatched + # errors in Py3 (TypeError) vs Py2 (ValueError) + # Note: Only = an issue in DataFrame case + raise ValueError("Cannot broadcast operands together.") + + if mask.any(): + with np.errstate(all="ignore"): + result[mask] = op(xrav[mask], yrav[mask]) + + else: + if not is_scalar(y): + raise TypeError(type(y)) + + # mask is only meaningful for x + result = np.empty(x.size, dtype=x.dtype) + mask = notna(xrav) + + # 1 ** np.nan is 1. So we have to unmask those. + if op is pow: + mask = np.where(x == 1, False, mask) + elif op is rpow: + mask = np.where(y == 1, False, mask) + + if mask.any(): + with np.errstate(all="ignore"): + result[mask] = op(xrav[mask], y) + + result, _ = maybe_upcast_putmask(result, ~mask, np.nan) + result = result.reshape(x.shape) # 2D compat + return result + + +def define_na_arithmetic_op(op, str_rep: str): + def na_op(x, y): + return na_arithmetic_op(x, y, op, str_rep) + + return na_op + + +def na_arithmetic_op(left, right, op, str_rep: str): + """ + Return the result of evaluating op on the passed in values. + + If native types are not compatible, try coersion to object dtype. + + Parameters + ---------- + left : np.ndarray + right : np.ndarray or scalar + str_rep : str or None + + Returns + ------- + array-like + + Raises + ------ + TypeError : invalid operation + """ + import pandas.core.computation.expressions as expressions + + try: + result = expressions.evaluate(op, str_rep, left, right) + except TypeError: + result = masked_arith_op(left, right, op) + + return missing.dispatch_fill_zeros(op, left, right, result) + + +def arithmetic_op( + left: Union[np.ndarray, ABCExtensionArray], right: Any, op, str_rep: str +): + """ + Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ... + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + Cannot be a DataFrame or Index. Series is *not* excluded. + op : {operator.add, operator.sub, ...} + Or one of the reversed variants from roperator. + str_rep : str + + Returns + ------- + ndarrray or ExtensionArray + Or a 2-tuple of these in the case of divmod or rdivmod. + """ + + from pandas.core.ops import maybe_upcast_for_op + + # NB: We assume that extract_array has already been called + # on `left` and `right`. + lvalues = left + rvalues = right + + rvalues = maybe_upcast_for_op(rvalues, lvalues.shape) + + if should_extension_dispatch(left, rvalues) or isinstance( + rvalues, (ABCTimedeltaArray, ABCDatetimeArray, Timestamp, Timedelta) + ): + # TimedeltaArray, DatetimeArray, and Timestamp are included here + # because they have `freq` attribute which is handled correctly + # by dispatch_to_extension_op. + # Timedelta is included because numexpr will fail on it, see GH#31457 + res_values = dispatch_to_extension_op(op, lvalues, rvalues) + + else: + with np.errstate(all="ignore"): + res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep) + + return res_values + + +def comparison_op( + left: Union[np.ndarray, ABCExtensionArray], right: Any, op +) -> Union[np.ndarray, ABCExtensionArray]: + """ + Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + Cannot be a DataFrame, Series, or Index. + op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le} + + Returns + ------- + ndarrray or ExtensionArray + """ + + # NB: We assume extract_array has already been called on left and right + lvalues = left + rvalues = right + + rvalues = lib.item_from_zerodim(rvalues) + if isinstance(rvalues, list): + # TODO: same for tuples? + rvalues = np.asarray(rvalues) + + if isinstance(rvalues, (np.ndarray, ABCExtensionArray, ABCIndexClass)): + # TODO: make this treatment consistent across ops and classes. + # We are not catching all listlikes here (e.g. frozenset, tuple) + # The ambiguous case is object-dtype. See GH#27803 + if len(lvalues) != len(rvalues): + raise ValueError("Lengths must match to compare") + + if should_extension_dispatch(lvalues, rvalues): + res_values = dispatch_to_extension_op(op, lvalues, rvalues) + + elif is_scalar(rvalues) and isna(rvalues): + # numpy does not like comparisons vs None + if op is operator.ne: + res_values = np.ones(lvalues.shape, dtype=bool) + else: + res_values = np.zeros(lvalues.shape, dtype=bool) + + elif is_object_dtype(lvalues.dtype): + res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues) + + else: + op_name = f"__{op.__name__}__" + method = getattr(lvalues, op_name) + with np.errstate(all="ignore"): + res_values = method(rvalues) + + if res_values is NotImplemented: + res_values = invalid_comparison(lvalues, rvalues, op) + if is_scalar(res_values): + typ = type(rvalues) + raise TypeError(f"Could not compare {typ} type with Series") + + return res_values + + +def na_logical_op(x: np.ndarray, y, op): + try: + # For exposition, write: + # yarr = isinstance(y, np.ndarray) + # yint = is_integer(y) or (yarr and y.dtype.kind == "i") + # ybool = is_bool(y) or (yarr and y.dtype.kind == "b") + # xint = x.dtype.kind == "i" + # xbool = x.dtype.kind == "b" + # Then Cases where this goes through without raising include: + # (xint or xbool) and (yint or bool) + result = op(x, y) + except TypeError: + if isinstance(y, np.ndarray): + # bool-bool dtype operations should be OK, should not get here + assert not (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)) + x = ensure_object(x) + y = ensure_object(y) + result = libops.vec_binop(x, y, op) + else: + # let null fall thru + assert lib.is_scalar(y) + if not isna(y): + y = bool(y) + try: + result = libops.scalar_binop(x, y, op) + except ( + TypeError, + ValueError, + AttributeError, + OverflowError, + NotImplementedError, + ): + typ = type(y).__name__ + raise TypeError( + f"Cannot perform '{op.__name__}' with a dtyped [{x.dtype}] array " + f"and scalar of type [{typ}]" + ) + + return result + + +def logical_op( + left: Union[np.ndarray, ABCExtensionArray], right: Any, op +) -> Union[np.ndarray, ABCExtensionArray]: + """ + Evaluate a logical operation `|`, `&`, or `^`. + + Parameters + ---------- + left : np.ndarray or ExtensionArray + right : object + Cannot be a DataFrame, Series, or Index. + op : {operator.and_, operator.or_, operator.xor} + Or one of the reversed variants from roperator. + + Returns + ------- + ndarrray or ExtensionArray + """ + + fill_int = lambda x: x + + def fill_bool(x, left=None): + # if `left` is specifically not-boolean, we do not cast to bool + if x.dtype.kind in ["c", "f", "O"]: + # dtypes that can hold NA + mask = isna(x) + if mask.any(): + x = x.astype(object) + x[mask] = False + + if left is None or is_bool_dtype(left.dtype): + x = x.astype(bool) + return x + + is_self_int_dtype = is_integer_dtype(left.dtype) + + right = lib.item_from_zerodim(right) + if is_list_like(right) and not hasattr(right, "dtype"): + # e.g. list, tuple + right = construct_1d_object_array_from_listlike(right) + + # NB: We assume extract_array has already been called on left and right + lvalues = left + rvalues = right + + if should_extension_dispatch(lvalues, rvalues): + res_values = dispatch_to_extension_op(op, lvalues, rvalues) + + else: + if isinstance(rvalues, np.ndarray): + is_other_int_dtype = is_integer_dtype(rvalues.dtype) + rvalues = rvalues if is_other_int_dtype else fill_bool(rvalues, lvalues) + + else: + # i.e. scalar + is_other_int_dtype = lib.is_integer(rvalues) + + # For int vs int `^`, `|`, `&` are bitwise operators and return + # integer dtypes. Otherwise these are boolean ops + filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool + + res_values = na_logical_op(lvalues, rvalues, op) + res_values = filler(res_values) # type: ignore + + return res_values + + +def get_array_op(op, str_rep: Optional[str] = None): + """ + Return a binary array operation corresponding to the given operator op. + + Parameters + ---------- + op : function + Binary operator from operator or roperator module. + str_rep : str or None, default None + str_rep to pass to arithmetic_op + + Returns + ------- + function + """ + op_name = op.__name__.strip("_") + if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: + return partial(comparison_op, op=op) + elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}: + return partial(logical_op, op=op) + else: + return partial(arithmetic_op, op=op, str_rep=str_rep) diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py new file mode 100644 index 00000000..f4b16cf4 --- /dev/null +++ b/pandas/core/ops/common.py @@ -0,0 +1,66 @@ +""" +Boilerplate functions used in defining binary operations. +""" +from functools import wraps + +from pandas._libs.lib import item_from_zerodim + +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries + + +def unpack_zerodim_and_defer(name: str): + """ + Boilerplate for pandas conventions in arithmetic and comparison methods. + + Parameters + ---------- + name : str + + Returns + ------- + decorator + """ + + def wrapper(method): + return _unpack_zerodim_and_defer(method, name) + + return wrapper + + +def _unpack_zerodim_and_defer(method, name: str): + """ + Boilerplate for pandas conventions in arithmetic and comparison methods. + + Ensure method returns NotImplemented when operating against "senior" + classes. Ensure zero-dimensional ndarrays are always unpacked. + + Parameters + ---------- + method : binary method + name : str + + Returns + ------- + method + """ + + is_cmp = name.strip("__") in {"eq", "ne", "lt", "le", "gt", "ge"} + + @wraps(method) + def new_method(self, other): + + if is_cmp and isinstance(self, ABCIndexClass) and isinstance(other, ABCSeries): + # For comparison ops, Index does *not* defer to Series + pass + else: + for cls in [ABCDataFrame, ABCSeries, ABCIndexClass]: + if isinstance(self, cls): + break + if isinstance(other, cls): + return NotImplemented + + other = item_from_zerodim(other) + + return method(self, other) + + return new_method diff --git a/pandas/core/ops/dispatch.py b/pandas/core/ops/dispatch.py new file mode 100644 index 00000000..61a3032c --- /dev/null +++ b/pandas/core/ops/dispatch.py @@ -0,0 +1,126 @@ +""" +Functions for defining unary operations. +""" +from typing import Any, Union + +import numpy as np + +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_extension_array_dtype, + is_integer_dtype, + is_object_dtype, + is_scalar, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries + +from pandas.core.construction import array + + +def should_extension_dispatch(left: ABCSeries, right: Any) -> bool: + """ + Identify cases where Series operation should use dispatch_to_extension_op. + + Parameters + ---------- + left : Series + right : object + + Returns + ------- + bool + """ + if ( + is_extension_array_dtype(left.dtype) + or is_datetime64_dtype(left.dtype) + or is_timedelta64_dtype(left.dtype) + ): + return True + + if not is_scalar(right) and is_extension_array_dtype(right): + # GH#22378 disallow scalar to exclude e.g. "category", "Int64" + return True + + return False + + +def should_series_dispatch(left, right, op): + """ + Identify cases where a DataFrame operation should dispatch to its + Series counterpart. + + Parameters + ---------- + left : DataFrame + right : DataFrame or Series + op : binary operator + + Returns + ------- + override : bool + """ + if left._is_mixed_type or right._is_mixed_type: + return True + + if op.__name__.strip("_") in ["and", "or", "xor", "rand", "ror", "rxor"]: + # TODO: GH references for what this fixes + # Note: this check must come before the check for nonempty columns. + return True + + if right.ndim == 1: + # operating with Series, short-circuit checks that would fail + # with AttributeError. + return False + + if not len(left.columns) or not len(right.columns): + # ensure obj.dtypes[0] exists for each obj + return False + + ldtype = left.dtypes.iloc[0] + rdtype = right.dtypes.iloc[0] + + if (is_timedelta64_dtype(ldtype) and is_integer_dtype(rdtype)) or ( + is_timedelta64_dtype(rdtype) and is_integer_dtype(ldtype) + ): + # numpy integer dtypes as timedelta64 dtypes in this scenario + return True + + if is_datetime64_dtype(ldtype) and is_object_dtype(rdtype): + # in particular case where right is an array of DateOffsets + return True + + return False + + +def dispatch_to_extension_op( + op, left: Union[ABCExtensionArray, np.ndarray], right: Any, +): + """ + Assume that left or right is a Series backed by an ExtensionArray, + apply the operator defined by op. + + Parameters + ---------- + op : binary operator + left : ExtensionArray or np.ndarray + right : object + + Returns + ------- + ExtensionArray or np.ndarray + 2-tuple of these if op is divmod or rdivmod + """ + # NB: left and right should already be unboxed, so neither should be + # a Series or Index. + + if left.dtype.kind in "mM" and isinstance(left, np.ndarray): + # We need to cast datetime64 and timedelta64 ndarrays to + # DatetimeArray/TimedeltaArray. But we avoid wrapping others in + # PandasArray as that behaves poorly with e.g. IntegerArray. + left = array(left) + + # The op calls will raise TypeError if the op is not defined + # on the ExtensionArray + res_values = op(left, right) + return res_values diff --git a/pandas/core/ops/docstrings.py b/pandas/core/ops/docstrings.py new file mode 100644 index 00000000..e3db65f1 --- /dev/null +++ b/pandas/core/ops/docstrings.py @@ -0,0 +1,675 @@ +""" +Templating for ops docstrings +""" +from typing import Dict, Optional + + +def _make_flex_doc(op_name, typ): + """ + Make the appropriate substitutions for the given operation and class-typ + into either _flex_doc_SERIES or _flex_doc_FRAME to return the docstring + to attach to a generated method. + + Parameters + ---------- + op_name : str {'__add__', '__sub__', ... '__eq__', '__ne__', ...} + typ : str {series, 'dataframe']} + + Returns + ------- + doc : str + """ + op_name = op_name.replace("__", "") + op_desc = _op_descriptions[op_name] + + if op_name.startswith("r"): + equiv = "other " + op_desc["op"] + " " + typ + else: + equiv = typ + " " + op_desc["op"] + " other" + + if typ == "series": + base_doc = _flex_doc_SERIES + doc_no_examples = base_doc.format( + desc=op_desc["desc"], + op_name=op_name, + equiv=equiv, + reverse=op_desc["reverse"], + ) + if op_desc["series_examples"]: + doc = doc_no_examples + op_desc["series_examples"] + else: + doc = doc_no_examples + elif typ == "dataframe": + base_doc = _flex_doc_FRAME + doc = base_doc.format( + desc=op_desc["desc"], + op_name=op_name, + equiv=equiv, + reverse=op_desc["reverse"], + ) + else: + raise AssertionError("Invalid typ argument.") + return doc + + +_add_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.add(b, fill_value=0) +a 2.0 +b 1.0 +c 1.0 +d 1.0 +e NaN +dtype: float64 +""" + +_sub_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.subtract(b, fill_value=0) +a 0.0 +b 1.0 +c 1.0 +d -1.0 +e NaN +dtype: float64 +""" + +_mul_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.multiply(b, fill_value=0) +a 1.0 +b 0.0 +c 0.0 +d 0.0 +e NaN +dtype: float64 +""" + +_div_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.divide(b, fill_value=0) +a 1.0 +b inf +c inf +d 0.0 +e NaN +dtype: float64 +""" + +_floordiv_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.floordiv(b, fill_value=0) +a 1.0 +b NaN +c NaN +d 0.0 +e NaN +dtype: float64 +""" + +_mod_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.mod(b, fill_value=0) +a 0.0 +b NaN +c NaN +d 0.0 +e NaN +dtype: float64 +""" +_pow_example_SERIES = """ +Examples +-------- +>>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) +>>> a +a 1.0 +b 1.0 +c 1.0 +d NaN +dtype: float64 +>>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) +>>> b +a 1.0 +b NaN +d 1.0 +e NaN +dtype: float64 +>>> a.pow(b, fill_value=0) +a 1.0 +b 1.0 +c 1.0 +d 0.0 +e NaN +dtype: float64 +""" + +_op_descriptions: Dict[str, Dict[str, Optional[str]]] = { + # Arithmetic Operators + "add": { + "op": "+", + "desc": "Addition", + "reverse": "radd", + "series_examples": _add_example_SERIES, + }, + "sub": { + "op": "-", + "desc": "Subtraction", + "reverse": "rsub", + "series_examples": _sub_example_SERIES, + }, + "mul": { + "op": "*", + "desc": "Multiplication", + "reverse": "rmul", + "series_examples": _mul_example_SERIES, + "df_examples": None, + }, + "mod": { + "op": "%", + "desc": "Modulo", + "reverse": "rmod", + "series_examples": _mod_example_SERIES, + }, + "pow": { + "op": "**", + "desc": "Exponential power", + "reverse": "rpow", + "series_examples": _pow_example_SERIES, + "df_examples": None, + }, + "truediv": { + "op": "/", + "desc": "Floating division", + "reverse": "rtruediv", + "series_examples": _div_example_SERIES, + "df_examples": None, + }, + "floordiv": { + "op": "//", + "desc": "Integer division", + "reverse": "rfloordiv", + "series_examples": _floordiv_example_SERIES, + "df_examples": None, + }, + "divmod": { + "op": "divmod", + "desc": "Integer division and modulo", + "reverse": "rdivmod", + "series_examples": None, + "df_examples": None, + }, + # Comparison Operators + "eq": {"op": "==", "desc": "Equal to", "reverse": None, "series_examples": None}, + "ne": { + "op": "!=", + "desc": "Not equal to", + "reverse": None, + "series_examples": None, + }, + "lt": {"op": "<", "desc": "Less than", "reverse": None, "series_examples": None}, + "le": { + "op": "<=", + "desc": "Less than or equal to", + "reverse": None, + "series_examples": None, + }, + "gt": {"op": ">", "desc": "Greater than", "reverse": None, "series_examples": None}, + "ge": { + "op": ">=", + "desc": "Greater than or equal to", + "reverse": None, + "series_examples": None, + }, +} + +_op_names = list(_op_descriptions.keys()) +for key in _op_names: + reverse_op = _op_descriptions[key]["reverse"] + if reverse_op is not None: + _op_descriptions[reverse_op] = _op_descriptions[key].copy() + _op_descriptions[reverse_op]["reverse"] = key + +_flex_doc_SERIES = """ +Return {desc} of series and other, element-wise (binary operator `{op_name}`). + +Equivalent to ``{equiv}``, but with support to substitute a fill_value for +missing data in one of the inputs. + +Parameters +---------- +other : Series or scalar value +fill_value : None or float value, default None (NaN) + Fill existing missing (NaN) values, and any new element needed for + successful Series alignment, with this value before computation. + If data in both corresponding Series locations is missing + the result will be missing. +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level. + +Returns +------- +Series + The result of the operation. + +See Also +-------- +Series.{reverse} +""" + +_arith_doc_FRAME = """ +Binary operator %s with support to substitute a fill_value for missing data in +one of the inputs + +Parameters +---------- +other : Series, DataFrame, or constant +axis : {0, 1, 'index', 'columns'} + For Series input, axis to match Series index on +fill_value : None or float value, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + +Returns +------- +result : DataFrame + +Notes +----- +Mismatched indices will be unioned together +""" + +_flex_doc_FRAME = """ +Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`). + +Equivalent to ``{equiv}``, but with support to substitute a fill_value +for missing data in one of the inputs. With reverse version, `{reverse}`. + +Among flexible wrappers (`add`, `sub`, `mul`, `div`, `mod`, `pow`) to +arithmetic operators: `+`, `-`, `*`, `/`, `//`, `%`, `**`. + +Parameters +---------- +other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. +axis : {{0 or 'index', 1 or 'columns'}} + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). For Series input, axis to match Series index on. +level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. +fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + +Returns +------- +DataFrame + Result of the arithmetic operation. + +See Also +-------- +DataFrame.add : Add DataFrames. +DataFrame.sub : Subtract DataFrames. +DataFrame.mul : Multiply DataFrames. +DataFrame.div : Divide DataFrames (float division). +DataFrame.truediv : Divide DataFrames (float division). +DataFrame.floordiv : Divide DataFrames (integer division). +DataFrame.mod : Calculate modulo (remainder after division). +DataFrame.pow : Calculate exponential power. + +Notes +----- +Mismatched indices will be unioned together. + +Examples +-------- +>>> df = pd.DataFrame({{'angles': [0, 3, 4], +... 'degrees': [360, 180, 360]}}, +... index=['circle', 'triangle', 'rectangle']) +>>> df + angles degrees +circle 0 360 +triangle 3 180 +rectangle 4 360 + +Add a scalar with operator version which return the same +results. + +>>> df + 1 + angles degrees +circle 1 361 +triangle 4 181 +rectangle 5 361 + +>>> df.add(1) + angles degrees +circle 1 361 +triangle 4 181 +rectangle 5 361 + +Divide by constant with reverse version. + +>>> df.div(10) + angles degrees +circle 0.0 36.0 +triangle 0.3 18.0 +rectangle 0.4 36.0 + +>>> df.rdiv(10) + angles degrees +circle inf 0.027778 +triangle 3.333333 0.055556 +rectangle 2.500000 0.027778 + +Subtract a list and Series by axis with operator version. + +>>> df - [1, 2] + angles degrees +circle -1 358 +triangle 2 178 +rectangle 3 358 + +>>> df.sub([1, 2], axis='columns') + angles degrees +circle -1 358 +triangle 2 178 +rectangle 3 358 + +>>> df.sub(pd.Series([1, 1, 1], index=['circle', 'triangle', 'rectangle']), +... axis='index') + angles degrees +circle -1 359 +triangle 2 179 +rectangle 3 359 + +Multiply a DataFrame of different shape with operator version. + +>>> other = pd.DataFrame({{'angles': [0, 3, 4]}}, +... index=['circle', 'triangle', 'rectangle']) +>>> other + angles +circle 0 +triangle 3 +rectangle 4 + +>>> df * other + angles degrees +circle 0 NaN +triangle 9 NaN +rectangle 16 NaN + +>>> df.mul(other, fill_value=0) + angles degrees +circle 0 0.0 +triangle 9 0.0 +rectangle 16 0.0 + +Divide by a MultiIndex by level. + +>>> df_multindex = pd.DataFrame({{'angles': [0, 3, 4, 4, 5, 6], +... 'degrees': [360, 180, 360, 360, 540, 720]}}, +... index=[['A', 'A', 'A', 'B', 'B', 'B'], +... ['circle', 'triangle', 'rectangle', +... 'square', 'pentagon', 'hexagon']]) +>>> df_multindex + angles degrees +A circle 0 360 + triangle 3 180 + rectangle 4 360 +B square 4 360 + pentagon 5 540 + hexagon 6 720 + +>>> df.div(df_multindex, level=1, fill_value=0) + angles degrees +A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 +B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 +""" + +_flex_comp_doc_FRAME = """ +Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`). + +Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison +operators. + +Equivalent to `==`, `=!`, `<=`, `<`, `>=`, `>` with support to choose axis +(rows or columns) and level for comparison. + +Parameters +---------- +other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. +axis : {{0 or 'index', 1 or 'columns'}}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). +level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + +Returns +------- +DataFrame of bool + Result of the comparison. + +See Also +-------- +DataFrame.eq : Compare DataFrames for equality elementwise. +DataFrame.ne : Compare DataFrames for inequality elementwise. +DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. +DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. +DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. +DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + +Notes +----- +Mismatched indices will be unioned together. +`NaN` values are considered different (i.e. `NaN` != `NaN`). + +Examples +-------- +>>> df = pd.DataFrame({{'cost': [250, 150, 100], +... 'revenue': [100, 250, 300]}}, +... index=['A', 'B', 'C']) +>>> df + cost revenue +A 250 100 +B 150 250 +C 100 300 + +Comparison with a scalar, using either the operator or method: + +>>> df == 100 + cost revenue +A False True +B False False +C True False + +>>> df.eq(100) + cost revenue +A False True +B False False +C True False + +When `other` is a :class:`Series`, the columns of a DataFrame are aligned +with the index of `other` and broadcast: + +>>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue +A True True +B True False +C False True + +Use the method to control the broadcast axis: + +>>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis='index') + cost revenue +A True False +B True True +C True True +D True True + +When comparing to an arbitrary sequence, the number of columns must +match the number elements in `other`: + +>>> df == [250, 100] + cost revenue +A True True +B False False +C False False + +Use the method to control the axis: + +>>> df.eq([250, 250, 100], axis='index') + cost revenue +A True False +B False True +C True False + +Compare to a DataFrame of different shape. + +>>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}}, +... index=['A', 'B', 'C', 'D']) +>>> other + revenue +A 300 +B 250 +C 100 +D 150 + +>>> df.gt(other) + cost revenue +A False False +B False False +C False True +D False False + +Compare to a MultiIndex by level. + +>>> df_multindex = pd.DataFrame({{'cost': [250, 150, 100, 150, 300, 220], +... 'revenue': [100, 250, 300, 200, 175, 225]}}, +... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'], +... ['A', 'B', 'C', 'A', 'B', 'C']]) +>>> df_multindex + cost revenue +Q1 A 250 100 + B 150 250 + C 100 300 +Q2 A 150 200 + B 300 175 + C 220 225 + +>>> df.le(df_multindex, level=1) + cost revenue +Q1 A True True + B True True + C True True +Q2 A False True + B True False + C True False +""" diff --git a/pandas/core/ops/invalid.py b/pandas/core/ops/invalid.py new file mode 100644 index 00000000..cc4a1f11 --- /dev/null +++ b/pandas/core/ops/invalid.py @@ -0,0 +1,56 @@ +""" +Templates for invalid operations. +""" +import operator + +import numpy as np + + +def invalid_comparison(left, right, op): + """ + If a comparison has mismatched types and is not necessarily meaningful, + follow python3 conventions by: + + - returning all-False for equality + - returning all-True for inequality + - raising TypeError otherwise + + Parameters + ---------- + left : array-like + right : scalar, array-like + op : operator.{eq, ne, lt, le, gt} + + Raises + ------ + TypeError : on inequality comparisons + """ + if op is operator.eq: + res_values = np.zeros(left.shape, dtype=bool) + elif op is operator.ne: + res_values = np.ones(left.shape, dtype=bool) + else: + typ = type(right).__name__ + raise TypeError(f"Invalid comparison between dtype={left.dtype} and {typ}") + return res_values + + +def make_invalid_op(name: str): + """ + Return a binary method that always raises a TypeError. + + Parameters + ---------- + name : str + + Returns + ------- + invalid_op : function + """ + + def invalid_op(self, other=None): + typ = type(self).__name__ + raise TypeError(f"cannot perform {name} with this index type: {typ}") + + invalid_op.__name__ = name + return invalid_op diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py new file mode 100644 index 00000000..8fb81faf --- /dev/null +++ b/pandas/core/ops/mask_ops.py @@ -0,0 +1,178 @@ +""" +Ops for masked arrays. +""" +from typing import Optional, Union + +import numpy as np + +from pandas._libs import lib, missing as libmissing + + +def kleene_or( + left: Union[bool, np.ndarray], + right: Union[bool, np.ndarray], + left_mask: Optional[np.ndarray], + right_mask: Optional[np.ndarray], +): + """ + Boolean ``or`` using Kleene logic. + + Values are NA where we have ``NA | NA`` or ``NA | False``. + ``NA | True`` is considered True. + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. Only one of these may be None, which implies that + the associated `left` or `right` value is a scalar. + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical or, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since because + # A | B == B | A + if left_mask is None: + return kleene_or(right, left, right_mask, left_mask) + + assert isinstance(left, np.ndarray) + + raise_for_nan(right, method="or") + + if right is libmissing.NA: + result = left.copy() + else: + result = left | right + + if right_mask is not None: + # output is unknown where (False & NA), (NA & False), (NA & NA) + left_false = ~(left | left_mask) + right_false = ~(right | right_mask) + mask = ( + (left_false & right_mask) + | (right_false & left_mask) + | (left_mask & right_mask) + ) + else: + if right is True: + mask = np.zeros_like(left_mask) + elif right is libmissing.NA: + mask = (~left & ~left_mask) | left_mask + else: + # False + mask = left_mask.copy() + + return result, mask + + +def kleene_xor( + left: Union[bool, np.ndarray], + right: Union[bool, np.ndarray], + left_mask: Optional[np.ndarray], + right_mask: Optional[np.ndarray], +): + """ + Boolean ``xor`` using Kleene logic. + + This is the same as ``or``, with the following adjustments + + * True, True -> False + * True, NA -> NA + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. Only one of these may be None, which implies that + the associated `left` or `right` value is a scalar. + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical xor, and the new mask. + """ + if left_mask is None: + return kleene_xor(right, left, right_mask, left_mask) + + raise_for_nan(right, method="xor") + if right is libmissing.NA: + result = np.zeros_like(left) + else: + result = left ^ right + + if right_mask is None: + if right is libmissing.NA: + mask = np.ones_like(left_mask) + else: + mask = left_mask.copy() + else: + mask = left_mask | right_mask + + return result, mask + + +def kleene_and( + left: Union[bool, libmissing.NAType, np.ndarray], + right: Union[bool, libmissing.NAType, np.ndarray], + left_mask: Optional[np.ndarray], + right_mask: Optional[np.ndarray], +): + """ + Boolean ``and`` using Kleene logic. + + Values are ``NA`` for ``NA & NA`` or ``True & NA``. + + Parameters + ---------- + left, right : ndarray, NA, or bool + The values of the array. + left_mask, right_mask : ndarray, optional + The masks. Only one of these may be None, which implies that + the associated `left` or `right` value is a scalar. + + Returns + ------- + result, mask: ndarray[bool] + The result of the logical xor, and the new mask. + """ + # To reduce the number of cases, we ensure that `left` & `left_mask` + # always come from an array, not a scalar. This is safe, since because + # A | B == B | A + if left_mask is None: + return kleene_and(right, left, right_mask, left_mask) + + assert isinstance(left, np.ndarray) + raise_for_nan(right, method="and") + + if right is libmissing.NA: + result = np.zeros_like(left) + else: + result = left & right + + if right_mask is None: + # Scalar `right` + if right is libmissing.NA: + mask = (left & ~left_mask) | left_mask + + else: + mask = left_mask.copy() + if right is False: + # unmask everything + mask[:] = False + else: + # unmask where either left or right is False + left_false = ~(left | left_mask) + right_false = ~(right | right_mask) + mask = (left_mask & ~right_false) | (right_mask & ~left_false) + + return result, mask + + +def raise_for_nan(value, method): + if lib.is_float(value) and np.isnan(value): + raise ValueError(f"Cannot perform logical '{method}' with floating NaN") diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py new file mode 100644 index 00000000..c0465856 --- /dev/null +++ b/pandas/core/ops/methods.py @@ -0,0 +1,233 @@ +""" +Functions to generate methods and pin them to the appropriate classes. +""" +import operator + +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCSparseArray + +from pandas.core.ops.roperator import ( + radd, + rand_, + rdivmod, + rfloordiv, + rmod, + rmul, + ror_, + rpow, + rsub, + rtruediv, + rxor, +) + + +def _get_method_wrappers(cls): + """ + Find the appropriate operation-wrappers to use when defining flex/special + arithmetic, boolean, and comparison operations with the given class. + + Parameters + ---------- + cls : class + + Returns + ------- + arith_flex : function or None + comp_flex : function or None + arith_special : function + comp_special : function + bool_special : function + + Notes + ----- + None is only returned for SparseArray + """ + # TODO: make these non-runtime imports once the relevant functions + # are no longer in __init__ + from pandas.core.ops import ( + _arith_method_FRAME, + _arith_method_SERIES, + _bool_method_SERIES, + _comp_method_FRAME, + _comp_method_SERIES, + _flex_comp_method_FRAME, + _flex_method_SERIES, + ) + + if issubclass(cls, ABCSeries): + # Just Series + arith_flex = _flex_method_SERIES + comp_flex = _flex_method_SERIES + arith_special = _arith_method_SERIES + comp_special = _comp_method_SERIES + bool_special = _bool_method_SERIES + elif issubclass(cls, ABCDataFrame): + arith_flex = _arith_method_FRAME + comp_flex = _flex_comp_method_FRAME + arith_special = _arith_method_FRAME + comp_special = _comp_method_FRAME + bool_special = _arith_method_FRAME + return arith_flex, comp_flex, arith_special, comp_special, bool_special + + +def add_special_arithmetic_methods(cls): + """ + Adds the full suite of special arithmetic methods (``__add__``, + ``__sub__``, etc.) to the class. + + Parameters + ---------- + cls : class + special methods will be defined and pinned to this class + """ + _, _, arith_method, comp_method, bool_method = _get_method_wrappers(cls) + new_methods = _create_methods( + cls, arith_method, comp_method, bool_method, special=True + ) + # inplace operators (I feel like these should get passed an `inplace=True` + # or just be removed + + def _wrap_inplace_method(method): + """ + return an inplace wrapper for this method + """ + + def f(self, other): + result = method(self, other) + + # this makes sure that we are aligned like the input + # we are updating inplace so we want to ignore is_copy + self._update_inplace( + result.reindex_like(self, copy=False)._data, verify_is_copy=False + ) + + return self + + name = method.__name__.strip("__") + f.__name__ = f"__i{name}__" + return f + + new_methods.update( + dict( + __iadd__=_wrap_inplace_method(new_methods["__add__"]), + __isub__=_wrap_inplace_method(new_methods["__sub__"]), + __imul__=_wrap_inplace_method(new_methods["__mul__"]), + __itruediv__=_wrap_inplace_method(new_methods["__truediv__"]), + __ifloordiv__=_wrap_inplace_method(new_methods["__floordiv__"]), + __imod__=_wrap_inplace_method(new_methods["__mod__"]), + __ipow__=_wrap_inplace_method(new_methods["__pow__"]), + ) + ) + + new_methods.update( + dict( + __iand__=_wrap_inplace_method(new_methods["__and__"]), + __ior__=_wrap_inplace_method(new_methods["__or__"]), + __ixor__=_wrap_inplace_method(new_methods["__xor__"]), + ) + ) + + _add_methods(cls, new_methods=new_methods) + + +def add_flex_arithmetic_methods(cls): + """ + Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) + to the class. + + Parameters + ---------- + cls : class + flex methods will be defined and pinned to this class + """ + flex_arith_method, flex_comp_method, _, _, _ = _get_method_wrappers(cls) + new_methods = _create_methods( + cls, flex_arith_method, flex_comp_method, bool_method=None, special=False + ) + new_methods.update( + dict( + multiply=new_methods["mul"], + subtract=new_methods["sub"], + divide=new_methods["div"], + ) + ) + # opt out of bool flex methods for now + assert not any(kname in new_methods for kname in ("ror_", "rxor", "rand_")) + + _add_methods(cls, new_methods=new_methods) + + +def _create_methods(cls, arith_method, comp_method, bool_method, special): + # creates actual methods based upon arithmetic, comp and bool method + # constructors. + + have_divmod = issubclass(cls, ABCSeries) + # divmod is available for Series + + new_methods = dict( + add=arith_method(cls, operator.add, special), + radd=arith_method(cls, radd, special), + sub=arith_method(cls, operator.sub, special), + mul=arith_method(cls, operator.mul, special), + truediv=arith_method(cls, operator.truediv, special), + floordiv=arith_method(cls, operator.floordiv, special), + # Causes a floating point exception in the tests when numexpr enabled, + # so for now no speedup + mod=arith_method(cls, operator.mod, special), + pow=arith_method(cls, operator.pow, special), + # not entirely sure why this is necessary, but previously was included + # so it's here to maintain compatibility + rmul=arith_method(cls, rmul, special), + rsub=arith_method(cls, rsub, special), + rtruediv=arith_method(cls, rtruediv, special), + rfloordiv=arith_method(cls, rfloordiv, special), + rpow=arith_method(cls, rpow, special), + rmod=arith_method(cls, rmod, special), + ) + new_methods["div"] = new_methods["truediv"] + new_methods["rdiv"] = new_methods["rtruediv"] + if have_divmod: + # divmod doesn't have an op that is supported by numexpr + new_methods["divmod"] = arith_method(cls, divmod, special) + new_methods["rdivmod"] = arith_method(cls, rdivmod, special) + + new_methods.update( + dict( + eq=comp_method(cls, operator.eq, special), + ne=comp_method(cls, operator.ne, special), + lt=comp_method(cls, operator.lt, special), + gt=comp_method(cls, operator.gt, special), + le=comp_method(cls, operator.le, special), + ge=comp_method(cls, operator.ge, special), + ) + ) + + if bool_method: + new_methods.update( + dict( + and_=bool_method(cls, operator.and_, special), + or_=bool_method(cls, operator.or_, special), + # For some reason ``^`` wasn't used in original. + xor=bool_method(cls, operator.xor, special), + rand_=bool_method(cls, rand_, special), + ror_=bool_method(cls, ror_, special), + rxor=bool_method(cls, rxor, special), + ) + ) + + if special: + dunderize = lambda x: f"__{x.strip('_')}__" + else: + dunderize = lambda x: x + new_methods = {dunderize(k): v for k, v in new_methods.items()} + return new_methods + + +def _add_methods(cls, new_methods): + for name, method in new_methods.items(): + # For most methods, if we find that the class already has a method + # of the same name, it is OK to over-write it. The exception is + # inplace methods (__iadd__, __isub__, ...) for SparseArray, which + # retain the np.ndarray versions. + force = not (issubclass(cls, ABCSparseArray) and name.startswith("__i")) + if force or name not in cls.__dict__: + setattr(cls, name, method) diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py new file mode 100644 index 00000000..5039ffab --- /dev/null +++ b/pandas/core/ops/missing.py @@ -0,0 +1,179 @@ +""" +Missing data handling for arithmetic operations. + +In particular, pandas conventions regarding division by zero differ +from numpy in the following ways: + 1) np.array([-1, 0, 1], dtype=dtype1) // np.array([0, 0, 0], dtype=dtype2) + gives [nan, nan, nan] for most dtype combinations, and [0, 0, 0] for + the remaining pairs + (the remaining being dtype1==dtype2==intN and dtype==dtype2==uintN). + + pandas convention is to return [-inf, nan, inf] for all dtype + combinations. + + Note: the numpy behavior described here is py3-specific. + + 2) np.array([-1, 0, 1], dtype=dtype1) % np.array([0, 0, 0], dtype=dtype2) + gives precisely the same results as the // operation. + + pandas convention is to return [nan, nan, nan] for all dtype + combinations. + + 3) divmod behavior consistent with 1) and 2). +""" +import operator + +import numpy as np + +from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_scalar + +from pandas.core.ops.roperator import rdivmod, rfloordiv, rmod + + +def fill_zeros(result, x, y): + """ + If this is a reversed op, then flip x,y + + If we have an integer value (or array in y) + and we have 0's, fill them with np.nan, + return the result. + + Mask the nan's from x. + """ + if is_float_dtype(result.dtype): + return result + + is_variable_type = hasattr(y, "dtype") or hasattr(y, "type") + is_scalar_type = is_scalar(y) + + if not is_variable_type and not is_scalar_type: + return result + + if is_scalar_type: + y = np.array(y) + + if is_integer_dtype(y.dtype): + + if (y == 0).any(): + + # GH#7325, mask and nans must be broadcastable (also: GH#9308) + # Raveling and then reshaping makes np.putmask faster + mask = ((y == 0) & ~np.isnan(result)).ravel() + + shape = result.shape + result = result.astype("float64", copy=False).ravel() + + np.putmask(result, mask, np.nan) + + result = result.reshape(shape) + + return result + + +def mask_zero_div_zero(x, y, result): + """ + Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes + of the numerator or the denominator. + + Parameters + ---------- + x : ndarray + y : ndarray + result : ndarray + + Returns + ------- + filled_result : ndarray + + Examples + -------- + >>> x = np.array([1, 0, -1], dtype=np.int64) + >>> y = 0 # int 0; numpy behavior is different with float + >>> result = x / y + >>> result # raw numpy result does not fill division by zero + array([0, 0, 0]) + >>> mask_zero_div_zero(x, y, result) + array([ inf, nan, -inf]) + """ + if not isinstance(result, np.ndarray): + # FIXME: SparseArray would raise TypeError with np.putmask + return result + + if is_scalar(y): + y = np.array(y) + + zmask = y == 0 + + if isinstance(zmask, bool): + # FIXME: numpy did not evaluate pointwise, seen in docs build + return result + + if zmask.any(): + shape = result.shape + + # Flip sign if necessary for -0.0 + zneg_mask = zmask & np.signbit(y) + zpos_mask = zmask & ~zneg_mask + + nan_mask = (zmask & (x == 0)).ravel() + with np.errstate(invalid="ignore"): + neginf_mask = ((zpos_mask & (x < 0)) | (zneg_mask & (x > 0))).ravel() + posinf_mask = ((zpos_mask & (x > 0)) | (zneg_mask & (x < 0))).ravel() + + if nan_mask.any() or neginf_mask.any() or posinf_mask.any(): + # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN + result = result.astype("float64", copy=False).ravel() + + np.putmask(result, nan_mask, np.nan) + np.putmask(result, posinf_mask, np.inf) + np.putmask(result, neginf_mask, -np.inf) + + result = result.reshape(shape) + + return result + + +def dispatch_fill_zeros(op, left, right, result): + """ + Call fill_zeros with the appropriate fill value depending on the operation, + with special logic for divmod and rdivmod. + + Parameters + ---------- + op : function (operator.add, operator.div, ...) + left : object (np.ndarray for non-reversed ops) + right : object (np.ndarray for reversed ops) + result : ndarray + + Returns + ------- + result : np.ndarray + + Notes + ----- + For divmod and rdivmod, the `result` parameter and returned `result` + is a 2-tuple of ndarray objects. + """ + if op is divmod: + result = ( + mask_zero_div_zero(left, right, result[0]), + fill_zeros(result[1], left, right), + ) + elif op is rdivmod: + result = ( + mask_zero_div_zero(right, left, result[0]), + fill_zeros(result[1], right, left), + ) + elif op is operator.floordiv: + # Note: no need to do this for truediv; in py3 numpy behaves the way + # we want. + result = mask_zero_div_zero(left, right, result) + elif op is rfloordiv: + # Note: no need to do this for rtruediv; in py3 numpy behaves the way + # we want. + result = mask_zero_div_zero(right, left, result) + elif op is operator.mod: + result = fill_zeros(result, left, right) + elif op is rmod: + result = fill_zeros(result, right, left) + return result diff --git a/pandas/core/ops/roperator.py b/pandas/core/ops/roperator.py new file mode 100644 index 00000000..e6691ddf --- /dev/null +++ b/pandas/core/ops/roperator.py @@ -0,0 +1,60 @@ +""" +Reversed Operations not available in the stdlib operator module. +Defining these instead of using lambdas allows us to reference them by name. +""" +import operator + + +def radd(left, right): + return right + left + + +def rsub(left, right): + return right - left + + +def rmul(left, right): + return right * left + + +def rdiv(left, right): + return right / left + + +def rtruediv(left, right): + return right / left + + +def rfloordiv(left, right): + return right // left + + +def rmod(left, right): + # check if right is a string as % is the string + # formatting operation; this is a TypeError + # otherwise perform the op + if isinstance(right, str): + typ = type(left).__name__ + raise TypeError(f"{typ} cannot perform the operation mod") + + return right % left + + +def rdivmod(left, right): + return divmod(right, left) + + +def rpow(left, right): + return right ** left + + +def rand_(left, right): + return operator.and_(right, left) + + +def ror_(left, right): + return operator.or_(right, left) + + +def rxor(left, right): + return operator.xor(right, left) diff --git a/pandas/core/resample.py b/pandas/core/resample.py new file mode 100644 index 00000000..0e43880d --- /dev/null +++ b/pandas/core/resample.py @@ -0,0 +1,1799 @@ +import copy +from datetime import timedelta +from textwrap import dedent +from typing import Dict, no_type_check + +import numpy as np + +from pandas._libs import lib +from pandas._libs.tslibs import NaT, Period, Timestamp +from pandas._libs.tslibs.frequencies import is_subperiod, is_superperiod +from pandas._libs.tslibs.period import IncompatibleFrequency +from pandas.compat.numpy import function as nv +from pandas.errors import AbstractMethodError +from pandas.util._decorators import Appender, Substitution + +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries + +import pandas.core.algorithms as algos +from pandas.core.base import DataError, ShallowMixin +from pandas.core.generic import _shared_docs +from pandas.core.groupby.base import GroupByMixin +from pandas.core.groupby.generic import SeriesGroupBy +from pandas.core.groupby.groupby import GroupBy, _GroupBy, _pipe_template, get_groupby +from pandas.core.groupby.grouper import Grouper +from pandas.core.groupby.ops import BinGrouper +from pandas.core.indexes.datetimes import DatetimeIndex, date_range +from pandas.core.indexes.period import PeriodIndex, period_range +from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range + +from pandas.tseries.frequencies import to_offset +from pandas.tseries.offsets import DateOffset, Day, Nano, Tick + +_shared_docs_kwargs: Dict[str, str] = dict() + + +class Resampler(_GroupBy, ShallowMixin): + """ + Class for resampling datetimelike data, a groupby-like operation. + See aggregate, transform, and apply functions on this object. + + It's easiest to use obj.resample(...) to use Resampler. + + Parameters + ---------- + obj : pandas object + groupby : a TimeGrouper object + axis : int, default 0 + kind : str or None + 'period', 'timestamp' to override default index treatment + + Returns + ------- + a Resampler of the appropriate type + + Notes + ----- + After resampling, see aggregate, apply, and transform functions. + """ + + # to the groupby descriptor + _attributes = [ + "freq", + "axis", + "closed", + "label", + "convention", + "loffset", + "base", + "kind", + ] + + def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs): + self.groupby = groupby + self.keys = None + self.sort = True + self.axis = axis + self.kind = kind + self.squeeze = False + self.group_keys = True + self.as_index = True + self.exclusions = set() + self.binner = None + self.grouper = None + + if self.groupby is not None: + self.groupby._set_grouper(self._convert_obj(obj), sort=True) + + def __str__(self) -> str: + """ + Provide a nice str repr of our rolling object. + """ + attrs = ( + f"{k}={getattr(self.groupby, k)}" + for k in self._attributes + if getattr(self.groupby, k, None) is not None + ) + return f"{type(self).__name__} [{', '.join(attrs)}]" + + def __getattr__(self, attr: str): + if attr in self._internal_names_set: + return object.__getattribute__(self, attr) + if attr in self._attributes: + return getattr(self.groupby, attr) + if attr in self.obj: + return self[attr] + + return object.__getattribute__(self, attr) + + def __iter__(self): + """ + Resampler iterator. + + Returns + ------- + Generator yielding sequence of (name, subsetted object) + for each group. + + See Also + -------- + GroupBy.__iter__ + """ + self._set_binner() + return super().__iter__() + + @property + def obj(self): + return self.groupby.obj + + @property + def ax(self): + return self.groupby.ax + + @property + def _typ(self) -> str: + """ + Masquerade for compat as a Series or a DataFrame. + """ + if isinstance(self._selected_obj, ABCSeries): + return "series" + return "dataframe" + + @property + def _from_selection(self) -> bool: + """ + Is the resampling from a DataFrame column or MultiIndex level. + """ + # upsampling and PeriodIndex resampling do not work + # with selection, this state used to catch and raise an error + return self.groupby is not None and ( + self.groupby.key is not None or self.groupby.level is not None + ) + + def _convert_obj(self, obj): + """ + Provide any conversions for the object in order to correctly handle. + + Parameters + ---------- + obj : the object to be resampled + + Returns + ------- + obj : converted object + """ + obj = obj._consolidate() + return obj + + def _get_binner_for_time(self): + raise AbstractMethodError(self) + + def _set_binner(self): + """ + Setup our binners. + + Cache these as we are an immutable object + """ + if self.binner is None: + self.binner, self.grouper = self._get_binner() + + def _get_binner(self): + """ + Create the BinGrouper, assume that self.set_grouper(obj) + has already been called. + """ + + binner, bins, binlabels = self._get_binner_for_time() + assert len(bins) == len(binlabels) + bin_grouper = BinGrouper(bins, binlabels, indexer=self.groupby.indexer) + return binner, bin_grouper + + def _assure_grouper(self): + """ + Make sure that we are creating our binner & grouper. + """ + self._set_binner() + + @Substitution( + klass="Resampler", + versionadded=".. versionadded:: 0.23.0", + examples=""" + >>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, + ... index=pd.date_range('2012-08-02', periods=4)) + >>> df + A + 2012-08-02 1 + 2012-08-03 2 + 2012-08-04 3 + 2012-08-05 4 + + To get the difference between each 2-day period's maximum and minimum + value in one pass, you can do + + >>> df.resample('2D').pipe(lambda x: x.max() - x.min()) + A + 2012-08-02 1 + 2012-08-04 1""", + ) + @Appender(_pipe_template) + def pipe(self, func, *args, **kwargs): + return super().pipe(func, *args, **kwargs) + + _agg_see_also_doc = dedent( + """ + See Also + -------- + DataFrame.groupby.aggregate + DataFrame.resample.transform + DataFrame.aggregate + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> s = pd.Series([1,2,3,4,5], + index=pd.date_range('20130101', periods=5,freq='s')) + 2013-01-01 00:00:00 1 + 2013-01-01 00:00:01 2 + 2013-01-01 00:00:02 3 + 2013-01-01 00:00:03 4 + 2013-01-01 00:00:04 5 + Freq: S, dtype: int64 + + >>> r = s.resample('2s') + DatetimeIndexResampler [freq=<2 * Seconds>, axis=0, closed=left, + label=left, convention=start, base=0] + + >>> r.agg(np.sum) + 2013-01-01 00:00:00 3 + 2013-01-01 00:00:02 7 + 2013-01-01 00:00:04 5 + Freq: 2S, dtype: int64 + + >>> r.agg(['sum','mean','max']) + sum mean max + 2013-01-01 00:00:00 3 1.5 2 + 2013-01-01 00:00:02 7 3.5 4 + 2013-01-01 00:00:04 5 5.0 5 + + >>> r.agg({'result' : lambda x: x.mean() / x.std(), + 'total' : np.sum}) + total result + 2013-01-01 00:00:00 3 2.121320 + 2013-01-01 00:00:02 7 4.949747 + 2013-01-01 00:00:04 5 NaN + """ + ) + + @Substitution( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded="", + klass="DataFrame", + axis="", + ) + @Appender(_shared_docs["aggregate"]) + def aggregate(self, func, *args, **kwargs): + + self._set_binner() + result, how = self._aggregate(func, *args, **kwargs) + if result is None: + how = func + grouper = None + result = self._groupby_and_aggregate(how, grouper, *args, **kwargs) + + result = self._apply_loffset(result) + return result + + agg = aggregate + apply = aggregate + + def transform(self, arg, *args, **kwargs): + """ + Call function producing a like-indexed Series on each group and return + a Series with the transformed values. + + Parameters + ---------- + arg : function + To apply to each group. Should return a Series with the same index. + + Returns + ------- + transformed : Series + + Examples + -------- + >>> resampled.transform(lambda x: (x - x.mean()) / x.std()) + """ + return self._selected_obj.groupby(self.groupby).transform(arg, *args, **kwargs) + + def _downsample(self, f): + raise AbstractMethodError(self) + + def _upsample(self, f, limit=None, fill_value=None): + raise AbstractMethodError(self) + + def _gotitem(self, key, ndim: int, subset=None): + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ + self._set_binner() + grouper = self.grouper + if subset is None: + subset = self.obj + grouped = get_groupby(subset, by=None, grouper=grouper, axis=self.axis) + + # try the key selection + try: + return grouped[key] + except KeyError: + return grouped + + def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs): + """ + Re-evaluate the obj with a groupby aggregation. + """ + + if grouper is None: + self._set_binner() + grouper = self.grouper + + obj = self._selected_obj + + grouped = get_groupby(obj, by=None, grouper=grouper, axis=self.axis) + + try: + if isinstance(obj, ABCDataFrame) and callable(how): + # Check if the function is reducing or not. + result = grouped._aggregate_item_by_item(how, *args, **kwargs) + else: + result = grouped.aggregate(how, *args, **kwargs) + except DataError: + # we have a non-reducing function; try to evaluate + result = grouped.apply(how, *args, **kwargs) + except ValueError as err: + if "Must produce aggregated value" in str(err): + # raised in _aggregate_named + pass + elif "len(index) != len(labels)" in str(err): + # raised in libgroupby validation + pass + elif "No objects to concatenate" in str(err): + # raised in concat call + # In tests this is reached via either + # _apply_to_column_groupbys (ohlc) or DataFrameGroupBy.nunique + pass + else: + raise + + # we have a non-reducing function + # try to evaluate + result = grouped.apply(how, *args, **kwargs) + + result = self._apply_loffset(result) + return self._wrap_result(result) + + def _apply_loffset(self, result): + """ + If loffset is set, offset the result index. + + This is NOT an idempotent routine, it will be applied + exactly once to the result. + + Parameters + ---------- + result : Series or DataFrame + the result of resample + """ + + needs_offset = ( + isinstance(self.loffset, (DateOffset, timedelta, np.timedelta64)) + and isinstance(result.index, DatetimeIndex) + and len(result.index) > 0 + ) + + if needs_offset: + result.index = result.index + self.loffset + + self.loffset = None + return result + + def _get_resampler_for_grouping(self, groupby, **kwargs): + """ + Return the correct class for resampling with groupby. + """ + return self._resampler_for_grouping(self, groupby=groupby, **kwargs) + + def _wrap_result(self, result): + """ + Potentially wrap any results. + """ + if isinstance(result, ABCSeries) and self._selection is not None: + result.name = self._selection + + if isinstance(result, ABCSeries) and result.empty: + obj = self.obj + if isinstance(obj.index, PeriodIndex): + result.index = obj.index.asfreq(self.freq) + else: + result.index = obj.index._shallow_copy(freq=self.freq) + result.name = getattr(obj, "name", None) + + return result + + def pad(self, limit=None): + """ + Forward fill the values. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + An upsampled Series. + + See Also + -------- + Series.fillna + DataFrame.fillna + """ + return self._upsample("pad", limit=limit) + + ffill = pad + + def nearest(self, limit=None): + """ + Resample by using the nearest value. + + When resampling data, missing values may appear (e.g., when the + resampling frequency is higher than the original frequency). + The `nearest` method will replace ``NaN`` values that appeared in + the resampled data with the value from the nearest member of the + sequence, based on the index value. + Missing values that existed in the original data will not be modified. + If `limit` is given, fill only this many values in each direction for + each of the original values. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + .. versionadded:: 0.21.0 + + Returns + ------- + Series or DataFrame + An upsampled Series or DataFrame with ``NaN`` values filled with + their nearest value. + + See Also + -------- + backfill : Backward fill the new missing values in the resampled data. + pad : Forward fill ``NaN`` values. + + Examples + -------- + >>> s = pd.Series([1, 2], + ... index=pd.date_range('20180101', + ... periods=2, + ... freq='1h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + Freq: H, dtype: int64 + + >>> s.resample('15min').nearest() + 2018-01-01 00:00:00 1 + 2018-01-01 00:15:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 00:45:00 2 + 2018-01-01 01:00:00 2 + Freq: 15T, dtype: int64 + + Limit the number of upsampled values imputed by the nearest: + + >>> s.resample('15min').nearest(limit=1) + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:15:00 1.0 + 2018-01-01 00:30:00 NaN + 2018-01-01 00:45:00 2.0 + 2018-01-01 01:00:00 2.0 + Freq: 15T, dtype: float64 + """ + return self._upsample("nearest", limit=limit) + + def backfill(self, limit=None): + """ + Backward fill the new missing values in the resampled data. + + In statistics, imputation is the process of replacing missing data with + substituted values [1]_. When resampling data, missing values may + appear (e.g., when the resampling frequency is higher than the original + frequency). The backward fill will replace NaN values that appeared in + the resampled data with the next value in the original sequence. + Missing values that existed in the original data will not be modified. + + Parameters + ---------- + limit : int, optional + Limit of how many values to fill. + + Returns + ------- + Series, DataFrame + An upsampled Series or DataFrame with backward filled NaN values. + + See Also + -------- + bfill : Alias of backfill. + fillna : Fill NaN values using the specified method, which can be + 'backfill'. + nearest : Fill NaN values with nearest neighbor starting from center. + pad : Forward fill NaN values. + Series.fillna : Fill NaN values in the Series using the + specified method, which can be 'backfill'. + DataFrame.fillna : Fill NaN values in the DataFrame using the + specified method, which can be 'backfill'. + + References + ---------- + .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics) + + Examples + -------- + + Resampling a Series: + + >>> s = pd.Series([1, 2, 3], + ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + 2018-01-01 02:00:00 3 + Freq: H, dtype: int64 + + >>> s.resample('30min').backfill() + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 3 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + + >>> s.resample('15min').backfill(limit=2) + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:15:00 NaN + 2018-01-01 00:30:00 2.0 + 2018-01-01 00:45:00 2.0 + 2018-01-01 01:00:00 2.0 + 2018-01-01 01:15:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 01:45:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 15T, dtype: float64 + + Resampling a DataFrame that has missing values: + + >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]}, + ... index=pd.date_range('20180101', periods=3, + ... freq='h')) + >>> df + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 02:00:00 6.0 5 + + >>> df.resample('30min').backfill() + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 00:30:00 NaN 3 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 01:30:00 6.0 5 + 2018-01-01 02:00:00 6.0 5 + + >>> df.resample('15min').backfill(limit=2) + a b + 2018-01-01 00:00:00 2.0 1.0 + 2018-01-01 00:15:00 NaN NaN + 2018-01-01 00:30:00 NaN 3.0 + 2018-01-01 00:45:00 NaN 3.0 + 2018-01-01 01:00:00 NaN 3.0 + 2018-01-01 01:15:00 NaN NaN + 2018-01-01 01:30:00 6.0 5.0 + 2018-01-01 01:45:00 6.0 5.0 + 2018-01-01 02:00:00 6.0 5.0 + """ + return self._upsample("backfill", limit=limit) + + bfill = backfill + + def fillna(self, method, limit=None): + """ + Fill missing values introduced by upsampling. + + In statistics, imputation is the process of replacing missing data with + substituted values [1]_. When resampling data, missing values may + appear (e.g., when the resampling frequency is higher than the original + frequency). + + Missing values that existed in the original data will + not be modified. + + Parameters + ---------- + method : {'pad', 'backfill', 'ffill', 'bfill', 'nearest'} + Method to use for filling holes in resampled data + + * 'pad' or 'ffill': use previous valid observation to fill gap + (forward fill). + * 'backfill' or 'bfill': use next valid observation to fill gap. + * 'nearest': use nearest valid observation to fill gap. + + limit : int, optional + Limit of how many consecutive missing values to fill. + + Returns + ------- + Series or DataFrame + An upsampled Series or DataFrame with missing values filled. + + See Also + -------- + backfill : Backward fill NaN values in the resampled data. + pad : Forward fill NaN values in the resampled data. + nearest : Fill NaN values in the resampled data + with nearest neighbor starting from center. + interpolate : Fill NaN values using interpolation. + Series.fillna : Fill NaN values in the Series using the + specified method, which can be 'bfill' and 'ffill'. + DataFrame.fillna : Fill NaN values in the DataFrame using the + specified method, which can be 'bfill' and 'ffill'. + + References + ---------- + .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics) + + Examples + -------- + Resampling a Series: + + >>> s = pd.Series([1, 2, 3], + ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> s + 2018-01-01 00:00:00 1 + 2018-01-01 01:00:00 2 + 2018-01-01 02:00:00 3 + Freq: H, dtype: int64 + + Without filling the missing values you get: + + >>> s.resample("30min").asfreq() + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:30:00 NaN + 2018-01-01 01:00:00 2.0 + 2018-01-01 01:30:00 NaN + 2018-01-01 02:00:00 3.0 + Freq: 30T, dtype: float64 + + >>> s.resample('30min').fillna("backfill") + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 3 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + + >>> s.resample('15min').fillna("backfill", limit=2) + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:15:00 NaN + 2018-01-01 00:30:00 2.0 + 2018-01-01 00:45:00 2.0 + 2018-01-01 01:00:00 2.0 + 2018-01-01 01:15:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 01:45:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 15T, dtype: float64 + + >>> s.resample('30min').fillna("pad") + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 1 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 2 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + + >>> s.resample('30min').fillna("nearest") + 2018-01-01 00:00:00 1 + 2018-01-01 00:30:00 2 + 2018-01-01 01:00:00 2 + 2018-01-01 01:30:00 3 + 2018-01-01 02:00:00 3 + Freq: 30T, dtype: int64 + + Missing values present before the upsampling are not affected. + + >>> sm = pd.Series([1, None, 3], + ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> sm + 2018-01-01 00:00:00 1.0 + 2018-01-01 01:00:00 NaN + 2018-01-01 02:00:00 3.0 + Freq: H, dtype: float64 + + >>> sm.resample('30min').fillna('backfill') + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:30:00 NaN + 2018-01-01 01:00:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 30T, dtype: float64 + + >>> sm.resample('30min').fillna('pad') + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:30:00 1.0 + 2018-01-01 01:00:00 NaN + 2018-01-01 01:30:00 NaN + 2018-01-01 02:00:00 3.0 + Freq: 30T, dtype: float64 + + >>> sm.resample('30min').fillna('nearest') + 2018-01-01 00:00:00 1.0 + 2018-01-01 00:30:00 NaN + 2018-01-01 01:00:00 NaN + 2018-01-01 01:30:00 3.0 + 2018-01-01 02:00:00 3.0 + Freq: 30T, dtype: float64 + + DataFrame resampling is done column-wise. All the same options are + available. + + >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]}, + ... index=pd.date_range('20180101', periods=3, + ... freq='h')) + >>> df + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 02:00:00 6.0 5 + + >>> df.resample('30min').fillna("bfill") + a b + 2018-01-01 00:00:00 2.0 1 + 2018-01-01 00:30:00 NaN 3 + 2018-01-01 01:00:00 NaN 3 + 2018-01-01 01:30:00 6.0 5 + 2018-01-01 02:00:00 6.0 5 + """ + return self._upsample(method, limit=limit) + + @Appender(_shared_docs["interpolate"] % _shared_docs_kwargs) + def interpolate( + self, + method="linear", + axis=0, + limit=None, + inplace=False, + limit_direction="forward", + limit_area=None, + downcast=None, + **kwargs, + ): + """ + Interpolate values according to different methods. + """ + result = self._upsample(None) + return result.interpolate( + method=method, + axis=axis, + limit=limit, + inplace=inplace, + limit_direction=limit_direction, + limit_area=limit_area, + downcast=downcast, + **kwargs, + ) + + def asfreq(self, fill_value=None): + """ + Return the values at the new freq, essentially a reindex. + + Parameters + ---------- + fill_value : scalar, optional + Value to use for missing values, applied during upsampling (note + this does not fill NaNs that already were present). + + Returns + ------- + DataFrame or Series + Values at the specified freq. + + See Also + -------- + Series.asfreq + DataFrame.asfreq + """ + return self._upsample("asfreq", fill_value=fill_value) + + def std(self, ddof=1, *args, **kwargs): + """ + Compute standard deviation of groups, excluding missing values. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + Returns + ------- + DataFrame or Series + Standard deviation of values within each group. + """ + nv.validate_resampler_func("std", args, kwargs) + return self._downsample("std", ddof=ddof) + + def var(self, ddof=1, *args, **kwargs): + """ + Compute variance of groups, excluding missing values. + + Parameters + ---------- + ddof : int, default 1 + Degrees of freedom. + + Returns + ------- + DataFrame or Series + Variance of values within each group. + """ + nv.validate_resampler_func("var", args, kwargs) + return self._downsample("var", ddof=ddof) + + @Appender(GroupBy.size.__doc__) + def size(self): + result = self._downsample("size") + if not len(self.ax): + from pandas import Series + + if self._selected_obj.ndim == 1: + name = self._selected_obj.name + else: + name = None + result = Series([], index=result.index, dtype="int64", name=name) + return result + + @Appender(GroupBy.count.__doc__) + def count(self): + result = self._downsample("count") + if not len(self.ax): + if self._selected_obj.ndim == 1: + result = type(self._selected_obj)( + [], index=result.index, dtype="int64", name=self._selected_obj.name + ) + else: + from pandas import DataFrame + + result = DataFrame( + [], index=result.index, columns=result.columns, dtype="int64" + ) + + return result + + def quantile(self, q=0.5, **kwargs): + """ + Return value at the given quantile. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + q : float or array-like, default 0.5 (50% quantile) + + Returns + ------- + DataFrame or Series + Quantile of values within each group. + + See Also + -------- + Series.quantile + DataFrame.quantile + DataFrameGroupBy.quantile + """ + return self._downsample("quantile", q=q, **kwargs) + + +# downsample methods +for method in ["sum", "prod"]: + + def f(self, _method=method, min_count=0, *args, **kwargs): + nv.validate_resampler_func(_method, args, kwargs) + return self._downsample(_method, min_count=min_count) + + f.__doc__ = getattr(GroupBy, method).__doc__ + setattr(Resampler, method, f) + + +# downsample methods +for method in ["min", "max", "first", "last", "mean", "sem", "median", "ohlc"]: + + def g(self, _method=method, *args, **kwargs): + nv.validate_resampler_func(_method, args, kwargs) + return self._downsample(_method) + + g.__doc__ = getattr(GroupBy, method).__doc__ + setattr(Resampler, method, g) + + +# series only methods +for method in ["nunique"]: + + def h(self, _method=method): + return self._downsample(_method) + + h.__doc__ = getattr(SeriesGroupBy, method).__doc__ + setattr(Resampler, method, h) + + +class _GroupByMixin(GroupByMixin): + """ + Provide the groupby facilities. + """ + + def __init__(self, obj, *args, **kwargs): + + parent = kwargs.pop("parent", None) + groupby = kwargs.pop("groupby", None) + if parent is None: + parent = obj + + # initialize our GroupByMixin object with + # the resampler attributes + for attr in self._attributes: + setattr(self, attr, kwargs.get(attr, getattr(parent, attr))) + + super().__init__(None) + self._groupby = groupby + self._groupby.mutated = True + self._groupby.grouper.mutated = True + self.groupby = copy.copy(parent.groupby) + + @no_type_check + def _apply(self, f, grouper=None, *args, **kwargs): + """ + Dispatch to _upsample; we are stripping all of the _upsample kwargs and + performing the original function call on the grouped object. + """ + + def func(x): + x = self._shallow_copy(x, groupby=self.groupby) + + if isinstance(f, str): + return getattr(x, f)(**kwargs) + + return x.apply(f, *args, **kwargs) + + result = self._groupby.apply(func) + return self._wrap_result(result) + + _upsample = _apply + _downsample = _apply + _groupby_and_aggregate = _apply + + +class DatetimeIndexResampler(Resampler): + @property + def _resampler_for_grouping(self): + return DatetimeIndexResamplerGroupby + + def _get_binner_for_time(self): + + # this is how we are actually creating the bins + if self.kind == "period": + return self.groupby._get_time_period_bins(self.ax) + return self.groupby._get_time_bins(self.ax) + + def _downsample(self, how, **kwargs): + """ + Downsample the cython defined function. + + Parameters + ---------- + how : string / cython mapped function + **kwargs : kw args passed to how function + """ + self._set_binner() + how = self._get_cython_func(how) or how + ax = self.ax + obj = self._selected_obj + + if not len(ax): + # reset to the new freq + obj = obj.copy() + obj.index._set_freq(self.freq) + return obj + + # do we have a regular frequency + if ax.freq is not None or ax.inferred_freq is not None: + + if len(self.grouper.binlabels) > len(ax) and how is None: + + # let's do an asfreq + return self.asfreq() + + # we are downsampling + # we want to call the actual grouper method here + result = obj.groupby(self.grouper, axis=self.axis).aggregate(how, **kwargs) + + result = self._apply_loffset(result) + return self._wrap_result(result) + + def _adjust_binner_for_upsample(self, binner): + """ + Adjust our binner when upsampling. + + The range of a new index should not be outside specified range + """ + if self.closed == "right": + binner = binner[1:] + else: + binner = binner[:-1] + return binner + + def _upsample(self, method, limit=None, fill_value=None): + """ + Parameters + ---------- + method : string {'backfill', 'bfill', 'pad', + 'ffill', 'asfreq'} method for upsampling + limit : int, default None + Maximum size gap to fill when reindexing + fill_value : scalar, default None + Value to use for missing values + + See Also + -------- + .fillna + + """ + self._set_binner() + if self.axis: + raise AssertionError("axis must be 0") + if self._from_selection: + raise ValueError( + "Upsampling from level= or on= selection " + "is not supported, use .set_index(...) " + "to explicitly set index to datetime-like" + ) + + ax = self.ax + obj = self._selected_obj + binner = self.binner + res_index = self._adjust_binner_for_upsample(binner) + + # if we have the same frequency as our axis, then we are equal sampling + if limit is None and to_offset(ax.inferred_freq) == self.freq: + result = obj.copy() + result.index = res_index + else: + result = obj.reindex( + res_index, method=method, limit=limit, fill_value=fill_value + ) + + result = self._apply_loffset(result) + return self._wrap_result(result) + + def _wrap_result(self, result): + result = super()._wrap_result(result) + + # we may have a different kind that we were asked originally + # convert if needed + if self.kind == "period" and not isinstance(result.index, PeriodIndex): + result.index = result.index.to_period(self.freq) + return result + + +class DatetimeIndexResamplerGroupby(_GroupByMixin, DatetimeIndexResampler): + """ + Provides a resample of a groupby implementation + """ + + @property + def _constructor(self): + return DatetimeIndexResampler + + +class PeriodIndexResampler(DatetimeIndexResampler): + @property + def _resampler_for_grouping(self): + return PeriodIndexResamplerGroupby + + def _get_binner_for_time(self): + if self.kind == "timestamp": + return super()._get_binner_for_time() + return self.groupby._get_period_bins(self.ax) + + def _convert_obj(self, obj): + obj = super()._convert_obj(obj) + + if self._from_selection: + # see GH 14008, GH 12871 + msg = ( + "Resampling from level= or on= selection " + "with a PeriodIndex is not currently supported, " + "use .set_index(...) to explicitly set index" + ) + raise NotImplementedError(msg) + + if self.loffset is not None: + # Cannot apply loffset/timedelta to PeriodIndex -> convert to + # timestamps + self.kind = "timestamp" + + # convert to timestamp + if self.kind == "timestamp": + obj = obj.to_timestamp(how=self.convention) + + return obj + + def _downsample(self, how, **kwargs): + """ + Downsample the cython defined function. + + Parameters + ---------- + how : string / cython mapped function + **kwargs : kw args passed to how function + """ + + # we may need to actually resample as if we are timestamps + if self.kind == "timestamp": + return super()._downsample(how, **kwargs) + + how = self._get_cython_func(how) or how + ax = self.ax + + if is_subperiod(ax.freq, self.freq): + # Downsampling + return self._groupby_and_aggregate(how, grouper=self.grouper, **kwargs) + elif is_superperiod(ax.freq, self.freq): + if how == "ohlc": + # GH #13083 + # upsampling to subperiods is handled as an asfreq, which works + # for pure aggregating/reducing methods + # OHLC reduces along the time dimension, but creates multiple + # values for each period -> handle by _groupby_and_aggregate() + return self._groupby_and_aggregate(how, grouper=self.grouper) + return self.asfreq() + elif ax.freq == self.freq: + return self.asfreq() + + raise IncompatibleFrequency( + f"Frequency {ax.freq} cannot be resampled to {self.freq}, " + "as they are not sub or super periods" + ) + + def _upsample(self, method, limit=None, fill_value=None): + """ + Parameters + ---------- + method : string {'backfill', 'bfill', 'pad', 'ffill'} + Method for upsampling. + limit : int, default None + Maximum size gap to fill when reindexing. + fill_value : scalar, default None + Value to use for missing values. + + See Also + -------- + .fillna + + """ + + # we may need to actually resample as if we are timestamps + if self.kind == "timestamp": + return super()._upsample(method, limit=limit, fill_value=fill_value) + + self._set_binner() + ax = self.ax + obj = self.obj + new_index = self.binner + + # Start vs. end of period + memb = ax.asfreq(self.freq, how=self.convention) + + # Get the fill indexer + indexer = memb.get_indexer(new_index, method=method, limit=limit) + return self._wrap_result( + _take_new_index(obj, indexer, new_index, axis=self.axis) + ) + + +class PeriodIndexResamplerGroupby(_GroupByMixin, PeriodIndexResampler): + """ + Provides a resample of a groupby implementation. + """ + + @property + def _constructor(self): + return PeriodIndexResampler + + +class TimedeltaIndexResampler(DatetimeIndexResampler): + @property + def _resampler_for_grouping(self): + return TimedeltaIndexResamplerGroupby + + def _get_binner_for_time(self): + return self.groupby._get_time_delta_bins(self.ax) + + def _adjust_binner_for_upsample(self, binner): + """ + Adjust our binner when upsampling. + + The range of a new index is allowed to be greater than original range + so we don't need to change the length of a binner, GH 13022 + """ + return binner + + +class TimedeltaIndexResamplerGroupby(_GroupByMixin, TimedeltaIndexResampler): + """ + Provides a resample of a groupby implementation. + """ + + @property + def _constructor(self): + return TimedeltaIndexResampler + + +def resample(obj, kind=None, **kwds): + """ + Create a TimeGrouper and return our resampler. + """ + tg = TimeGrouper(**kwds) + return tg._get_resampler(obj, kind=kind) + + +resample.__doc__ = Resampler.__doc__ + + +def get_resampler_for_grouping( + groupby, rule, how=None, fill_method=None, limit=None, kind=None, **kwargs +): + """ + Return our appropriate resampler when grouping as well. + """ + + # .resample uses 'on' similar to how .groupby uses 'key' + kwargs["key"] = kwargs.pop("on", None) + + tg = TimeGrouper(freq=rule, **kwargs) + resampler = tg._get_resampler(groupby.obj, kind=kind) + return resampler._get_resampler_for_grouping(groupby=groupby) + + +class TimeGrouper(Grouper): + """ + Custom groupby class for time-interval grouping. + + Parameters + ---------- + freq : pandas date offset or offset alias for identifying bin edges + closed : closed end of interval; 'left' or 'right' + label : interval boundary to use for labeling; 'left' or 'right' + convention : {'start', 'end', 'e', 's'} + If axis is PeriodIndex + """ + + _attributes = Grouper._attributes + ( + "closed", + "label", + "how", + "loffset", + "kind", + "convention", + "base", + ) + + def __init__( + self, + freq="Min", + closed=None, + label=None, + how="mean", + axis=0, + fill_method=None, + limit=None, + loffset=None, + kind=None, + convention=None, + base=0, + **kwargs, + ): + # Check for correctness of the keyword arguments which would + # otherwise silently use the default if misspelled + if label not in {None, "left", "right"}: + raise ValueError(f"Unsupported value {label} for `label`") + if closed not in {None, "left", "right"}: + raise ValueError(f"Unsupported value {closed} for `closed`") + if convention not in {None, "start", "end", "e", "s"}: + raise ValueError(f"Unsupported value {convention} for `convention`") + + freq = to_offset(freq) + + end_types = {"M", "A", "Q", "BM", "BA", "BQ", "W"} + rule = freq.rule_code + if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types): + if closed is None: + closed = "right" + if label is None: + label = "right" + else: + if closed is None: + closed = "left" + if label is None: + label = "left" + + self.closed = closed + self.label = label + self.kind = kind + + self.convention = convention or "E" + self.convention = self.convention.lower() + + if isinstance(loffset, str): + loffset = to_offset(loffset) + self.loffset = loffset + + self.how = how + self.fill_method = fill_method + self.limit = limit + self.base = base + + # always sort time groupers + kwargs["sort"] = True + + super().__init__(freq=freq, axis=axis, **kwargs) + + def _get_resampler(self, obj, kind=None): + """ + Return my resampler or raise if we have an invalid axis. + + Parameters + ---------- + obj : input object + kind : string, optional + 'period','timestamp','timedelta' are valid + + Returns + ------- + a Resampler + + Raises + ------ + TypeError if incompatible axis + + """ + self._set_grouper(obj) + + ax = self.ax + if isinstance(ax, DatetimeIndex): + return DatetimeIndexResampler(obj, groupby=self, kind=kind, axis=self.axis) + elif isinstance(ax, PeriodIndex) or kind == "period": + return PeriodIndexResampler(obj, groupby=self, kind=kind, axis=self.axis) + elif isinstance(ax, TimedeltaIndex): + return TimedeltaIndexResampler(obj, groupby=self, axis=self.axis) + + raise TypeError( + "Only valid with DatetimeIndex, " + "TimedeltaIndex or PeriodIndex, " + f"but got an instance of '{type(ax).__name__}'" + ) + + def _get_grouper(self, obj, validate: bool = True): + # create the resampler and return our binner + r = self._get_resampler(obj) + r._set_binner() + return r.binner, r.grouper, r.obj + + def _get_time_bins(self, ax): + if not isinstance(ax, DatetimeIndex): + raise TypeError( + "axis must be a DatetimeIndex, but got " + f"an instance of {type(ax).__name__}" + ) + + if len(ax) == 0: + binner = labels = DatetimeIndex(data=[], freq=self.freq, name=ax.name) + return binner, [], labels + + first, last = _get_timestamp_range_edges( + ax.min(), ax.max(), self.freq, closed=self.closed, base=self.base + ) + # GH #12037 + # use first/last directly instead of call replace() on them + # because replace() will swallow the nanosecond part + # thus last bin maybe slightly before the end if the end contains + # nanosecond part and lead to `Values falls after last bin` error + binner = labels = date_range( + freq=self.freq, + start=first, + end=last, + tz=ax.tz, + name=ax.name, + ambiguous="infer", + nonexistent="shift_forward", + ) + + ax_values = ax.asi8 + binner, bin_edges = self._adjust_bin_edges(binner, ax_values) + + # general version, knowing nothing about relative frequencies + bins = lib.generate_bins_dt64( + ax_values, bin_edges, self.closed, hasnans=ax.hasnans + ) + + if self.closed == "right": + labels = binner + if self.label == "right": + labels = labels[1:] + elif self.label == "right": + labels = labels[1:] + + if ax.hasnans: + binner = binner.insert(0, NaT) + labels = labels.insert(0, NaT) + + # if we end up with more labels than bins + # adjust the labels + # GH4076 + if len(bins) < len(labels): + labels = labels[: len(bins)] + + return binner, bins, labels + + def _adjust_bin_edges(self, binner, ax_values): + # Some hacks for > daily data, see #1471, #1458, #1483 + + if self.freq != "D" and is_superperiod(self.freq, "D"): + if self.closed == "right": + # GH 21459, GH 9119: Adjust the bins relative to the wall time + bin_edges = binner.tz_localize(None) + bin_edges = bin_edges + timedelta(1) - Nano(1) + bin_edges = bin_edges.tz_localize(binner.tz).asi8 + else: + bin_edges = binner.asi8 + + # intraday values on last day + if bin_edges[-2] > ax_values.max(): + bin_edges = bin_edges[:-1] + binner = binner[:-1] + else: + bin_edges = binner.asi8 + return binner, bin_edges + + def _get_time_delta_bins(self, ax): + if not isinstance(ax, TimedeltaIndex): + raise TypeError( + "axis must be a TimedeltaIndex, but got " + f"an instance of {type(ax).__name__}" + ) + + if not len(ax): + binner = labels = TimedeltaIndex(data=[], freq=self.freq, name=ax.name) + return binner, [], labels + + start, end = ax.min(), ax.max() + labels = binner = timedelta_range( + start=start, end=end, freq=self.freq, name=ax.name + ) + + end_stamps = labels + self.freq + bins = ax.searchsorted(end_stamps, side="left") + + # Addresses GH #10530 + if self.base > 0: + labels += type(self.freq)(self.base) + + return binner, bins, labels + + def _get_time_period_bins(self, ax): + if not isinstance(ax, DatetimeIndex): + raise TypeError( + "axis must be a DatetimeIndex, but got " + f"an instance of {type(ax).__name__}" + ) + + freq = self.freq + + if not len(ax): + binner = labels = PeriodIndex(data=[], freq=freq, name=ax.name) + return binner, [], labels + + labels = binner = period_range(start=ax[0], end=ax[-1], freq=freq, name=ax.name) + + end_stamps = (labels + freq).asfreq(freq, "s").to_timestamp() + if ax.tzinfo: + end_stamps = end_stamps.tz_localize(ax.tzinfo) + bins = ax.searchsorted(end_stamps, side="left") + + return binner, bins, labels + + def _get_period_bins(self, ax): + if not isinstance(ax, PeriodIndex): + raise TypeError( + "axis must be a PeriodIndex, but got " + f"an instance of {type(ax).__name__}" + ) + + memb = ax.asfreq(self.freq, how=self.convention) + + # NaT handling as in pandas._lib.lib.generate_bins_dt64() + nat_count = 0 + if memb.hasnans: + nat_count = np.sum(memb._isnan) + memb = memb[~memb._isnan] + + # if index contains no valid (non-NaT) values, return empty index + if not len(memb): + binner = labels = PeriodIndex(data=[], freq=self.freq, name=ax.name) + return binner, [], labels + + freq_mult = self.freq.n + + start = ax.min().asfreq(self.freq, how=self.convention) + end = ax.max().asfreq(self.freq, how="end") + bin_shift = 0 + + # GH 23882 + if self.base: + # get base adjusted bin edge labels + p_start, end = _get_period_range_edges( + start, end, self.freq, closed=self.closed, base=self.base + ) + + # Get offset for bin edge (not label edge) adjustment + start_offset = Period(start, self.freq) - Period(p_start, self.freq) + bin_shift = start_offset.n % freq_mult + start = p_start + + labels = binner = period_range( + start=start, end=end, freq=self.freq, name=ax.name + ) + + i8 = memb.asi8 + + # when upsampling to subperiods, we need to generate enough bins + expected_bins_count = len(binner) * freq_mult + i8_extend = expected_bins_count - (i8[-1] - i8[0]) + rng = np.arange(i8[0], i8[-1] + i8_extend, freq_mult) + rng += freq_mult + # adjust bin edge indexes to account for base + rng -= bin_shift + + # Wrap in PeriodArray for PeriodArray.searchsorted + prng = type(memb._data)(rng, dtype=memb.dtype) + bins = memb.searchsorted(prng, side="left") + + if nat_count > 0: + # NaT handling as in pandas._lib.lib.generate_bins_dt64() + # shift bins by the number of NaT + bins += nat_count + bins = np.insert(bins, 0, nat_count) + binner = binner.insert(0, NaT) + labels = labels.insert(0, NaT) + + return binner, bins, labels + + +def _take_new_index(obj, indexer, new_index, axis=0): + + if isinstance(obj, ABCSeries): + new_values = algos.take_1d(obj.values, indexer) + return obj._constructor(new_values, index=new_index, name=obj.name) + elif isinstance(obj, ABCDataFrame): + if axis == 1: + raise NotImplementedError("axis 1 is not supported") + return obj._constructor( + obj._data.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) + ) + else: + raise ValueError("'obj' should be either a Series or a DataFrame") + + +def _get_timestamp_range_edges(first, last, offset, closed="left", base=0): + """ + Adjust the `first` Timestamp to the preceding Timestamp that resides on + the provided offset. Adjust the `last` Timestamp to the following + Timestamp that resides on the provided offset. Input Timestamps that + already reside on the offset will be adjusted depending on the type of + offset and the `closed` parameter. + + Parameters + ---------- + first : pd.Timestamp + The beginning Timestamp of the range to be adjusted. + last : pd.Timestamp + The ending Timestamp of the range to be adjusted. + offset : pd.DateOffset + The dateoffset to which the Timestamps will be adjusted. + closed : {'right', 'left'}, default None + Which side of bin interval is closed. + base : int, default 0 + The "origin" of the adjusted Timestamps. + + Returns + ------- + A tuple of length 2, containing the adjusted pd.Timestamp objects. + """ + if isinstance(offset, Tick): + if isinstance(offset, Day): + # _adjust_dates_anchored assumes 'D' means 24H, but first/last + # might contain a DST transition (23H, 24H, or 25H). + # So "pretend" the dates are naive when adjusting the endpoints + tz = first.tz + first = first.tz_localize(None) + last = last.tz_localize(None) + + first, last = _adjust_dates_anchored( + first, last, offset, closed=closed, base=base + ) + if isinstance(offset, Day): + first = first.tz_localize(tz) + last = last.tz_localize(tz) + return first, last + + else: + first = first.normalize() + last = last.normalize() + + if closed == "left": + first = Timestamp(offset.rollback(first)) + else: + first = Timestamp(first - offset) + + last = Timestamp(last + offset) + + return first, last + + +def _get_period_range_edges(first, last, offset, closed="left", base=0): + """ + Adjust the provided `first` and `last` Periods to the respective Period of + the given offset that encompasses them. + + Parameters + ---------- + first : pd.Period + The beginning Period of the range to be adjusted. + last : pd.Period + The ending Period of the range to be adjusted. + offset : pd.DateOffset + The dateoffset to which the Periods will be adjusted. + closed : {'right', 'left'}, default None + Which side of bin interval is closed. + base : int, default 0 + The "origin" of the adjusted Periods. + + Returns + ------- + A tuple of length 2, containing the adjusted pd.Period objects. + """ + if not all(isinstance(obj, Period) for obj in [first, last]): + raise TypeError("'first' and 'last' must be instances of type Period") + + # GH 23882 + first = first.to_timestamp() + last = last.to_timestamp() + adjust_first = not offset.is_on_offset(first) + adjust_last = offset.is_on_offset(last) + + first, last = _get_timestamp_range_edges( + first, last, offset, closed=closed, base=base + ) + + first = (first + adjust_first * offset).to_period(offset) + last = (last - adjust_last * offset).to_period(offset) + return first, last + + +def _adjust_dates_anchored(first, last, offset, closed="right", base=0): + # First and last offsets should be calculated from the start day to fix an + # error cause by resampling across multiple days when a one day period is + # not a multiple of the frequency. + # + # See https://github.com/pandas-dev/pandas/issues/8683 + + # GH 10117 & GH 19375. If first and last contain timezone information, + # Perform the calculation in UTC in order to avoid localizing on an + # Ambiguous or Nonexistent time. + first_tzinfo = first.tzinfo + last_tzinfo = last.tzinfo + start_day_nanos = first.normalize().value + if first_tzinfo is not None: + first = first.tz_convert("UTC") + if last_tzinfo is not None: + last = last.tz_convert("UTC") + + base_nanos = (base % offset.n) * offset.nanos // offset.n + start_day_nanos += base_nanos + + foffset = (first.value - start_day_nanos) % offset.nanos + loffset = (last.value - start_day_nanos) % offset.nanos + + if closed == "right": + if foffset > 0: + # roll back + fresult = first.value - foffset + else: + fresult = first.value - offset.nanos + + if loffset > 0: + # roll forward + lresult = last.value + (offset.nanos - loffset) + else: + # already the end of the road + lresult = last.value + else: # closed == 'left' + if foffset > 0: + fresult = first.value - foffset + else: + # start of the road + fresult = first.value + + if loffset > 0: + # roll forward + lresult = last.value + (offset.nanos - loffset) + else: + lresult = last.value + offset.nanos + fresult = Timestamp(fresult) + lresult = Timestamp(lresult) + if first_tzinfo is not None: + fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo) + if last_tzinfo is not None: + lresult = lresult.tz_localize("UTC").tz_convert(last_tzinfo) + return fresult, lresult + + +def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None): + """ + Utility frequency conversion method for Series/DataFrame. + """ + if isinstance(obj.index, PeriodIndex): + if method is not None: + raise NotImplementedError("'method' argument is not supported") + + if how is None: + how = "E" + + new_obj = obj.copy() + new_obj.index = obj.index.asfreq(freq, how=how) + + elif len(obj.index) == 0: + new_obj = obj.copy() + new_obj.index = obj.index._shallow_copy(freq=to_offset(freq)) + + else: + dti = date_range(obj.index[0], obj.index[-1], freq=freq) + dti.name = obj.index.name + new_obj = obj.reindex(dti, method=method, fill_value=fill_value) + if normalize: + new_obj.index = new_obj.index.normalize() + + return new_obj diff --git a/pandas/core/reshape/__init__.py b/pandas/core/reshape/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/reshape/api.py b/pandas/core/reshape/api.py new file mode 100644 index 00000000..3c76eef8 --- /dev/null +++ b/pandas/core/reshape/api.py @@ -0,0 +1,8 @@ +# flake8: noqa + +from pandas.core.reshape.concat import concat +from pandas.core.reshape.melt import lreshape, melt, wide_to_long +from pandas.core.reshape.merge import merge, merge_asof, merge_ordered +from pandas.core.reshape.pivot import crosstab, pivot, pivot_table +from pandas.core.reshape.reshape import get_dummies +from pandas.core.reshape.tile import cut, qcut diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py new file mode 100644 index 00000000..502b8d19 --- /dev/null +++ b/pandas/core/reshape/concat.py @@ -0,0 +1,701 @@ +""" +concat routines +""" + +from typing import Hashable, Iterable, List, Mapping, Optional, Union, overload + +import numpy as np + +from pandas._typing import FrameOrSeriesUnion + +from pandas import DataFrame, Index, MultiIndex, Series +from pandas.core.arrays.categorical import ( + factorize_from_iterable, + factorize_from_iterables, +) +import pandas.core.common as com +from pandas.core.generic import NDFrame +from pandas.core.indexes.api import ( + all_indexes_same, + ensure_index, + get_consensus_names, + get_objs_combined_axis, +) +import pandas.core.indexes.base as ibase +from pandas.core.internals import concatenate_block_managers + +# --------------------------------------------------------------------- +# Concatenate DataFrame objects + + +@overload +def concat( + objs: Union[Iterable["DataFrame"], Mapping[Optional[Hashable], "DataFrame"]], + axis=0, + join: str = "outer", + ignore_index: bool = False, + keys=None, + levels=None, + names=None, + verify_integrity: bool = False, + sort: bool = False, + copy: bool = True, +) -> "DataFrame": + ... + + +@overload +def concat( + objs: Union[ + Iterable[FrameOrSeriesUnion], Mapping[Optional[Hashable], FrameOrSeriesUnion] + ], + axis=0, + join: str = "outer", + ignore_index: bool = False, + keys=None, + levels=None, + names=None, + verify_integrity: bool = False, + sort: bool = False, + copy: bool = True, +) -> FrameOrSeriesUnion: + ... + + +def concat( + objs: Union[ + Iterable[FrameOrSeriesUnion], Mapping[Optional[Hashable], FrameOrSeriesUnion] + ], + axis=0, + join="outer", + ignore_index: bool = False, + keys=None, + levels=None, + names=None, + verify_integrity: bool = False, + sort: bool = False, + copy: bool = True, +) -> FrameOrSeriesUnion: + """ + Concatenate pandas objects along a particular axis with optional set logic + along the other axes. + + Can also add a layer of hierarchical indexing on the concatenation axis, + which may be useful if the labels are the same (or overlapping) on + the passed axis number. + + Parameters + ---------- + objs : a sequence or mapping of Series or DataFrame objects + If a dict is passed, the sorted keys will be used as the `keys` + argument, unless it is passed, in which case the values will be + selected (see below). Any None objects will be dropped silently unless + they are all None in which case a ValueError will be raised. + axis : {0/'index', 1/'columns'}, default 0 + The axis to concatenate along. + join : {'inner', 'outer'}, default 'outer' + How to handle indexes on other axis (or axes). + ignore_index : bool, default False + If True, do not use the index values along the concatenation axis. The + resulting axis will be labeled 0, ..., n - 1. This is useful if you are + concatenating objects where the concatenation axis does not have + meaningful indexing information. Note the index values on the other + axes are still respected in the join. + keys : sequence, default None + If multiple levels passed, should contain tuples. Construct + hierarchical index using the passed keys as the outermost level. + levels : list of sequences, default None + Specific levels (unique values) to use for constructing a + MultiIndex. Otherwise they will be inferred from the keys. + names : list, default None + Names for the levels in the resulting hierarchical index. + verify_integrity : bool, default False + Check whether the new concatenated axis contains duplicates. This can + be very expensive relative to the actual data concatenation. + sort : bool, default False + Sort non-concatenation axis if it is not already aligned when `join` + is 'outer'. + This has no effect when ``join='inner'``, which already preserves + the order of the non-concatenation axis. + + .. versionadded:: 0.23.0 + .. versionchanged:: 1.0.0 + + Changed to not sort by default. + + copy : bool, default True + If False, do not copy data unnecessarily. + + Returns + ------- + object, type of objs + When concatenating all ``Series`` along the index (axis=0), a + ``Series`` is returned. When ``objs`` contains at least one + ``DataFrame``, a ``DataFrame`` is returned. When concatenating along + the columns (axis=1), a ``DataFrame`` is returned. + + See Also + -------- + Series.append : Concatenate Series. + DataFrame.append : Concatenate DataFrames. + DataFrame.join : Join DataFrames using indexes. + DataFrame.merge : Merge DataFrames by indexes or columns. + + Notes + ----- + The keys, levels, and names arguments are all optional. + + A walkthrough of how this method fits in with other tools for combining + pandas objects can be found `here + `__. + + Examples + -------- + Combine two ``Series``. + + >>> s1 = pd.Series(['a', 'b']) + >>> s2 = pd.Series(['c', 'd']) + >>> pd.concat([s1, s2]) + 0 a + 1 b + 0 c + 1 d + dtype: object + + Clear the existing index and reset it in the result + by setting the ``ignore_index`` option to ``True``. + + >>> pd.concat([s1, s2], ignore_index=True) + 0 a + 1 b + 2 c + 3 d + dtype: object + + Add a hierarchical index at the outermost level of + the data with the ``keys`` option. + + >>> pd.concat([s1, s2], keys=['s1', 's2']) + s1 0 a + 1 b + s2 0 c + 1 d + dtype: object + + Label the index keys you create with the ``names`` option. + + >>> pd.concat([s1, s2], keys=['s1', 's2'], + ... names=['Series name', 'Row ID']) + Series name Row ID + s1 0 a + 1 b + s2 0 c + 1 d + dtype: object + + Combine two ``DataFrame`` objects with identical columns. + + >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], + ... columns=['letter', 'number']) + >>> df1 + letter number + 0 a 1 + 1 b 2 + >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], + ... columns=['letter', 'number']) + >>> df2 + letter number + 0 c 3 + 1 d 4 + >>> pd.concat([df1, df2]) + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + + Combine ``DataFrame`` objects with overlapping columns + and return everything. Columns outside the intersection will + be filled with ``NaN`` values. + + >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], + ... columns=['letter', 'number', 'animal']) + >>> df3 + letter number animal + 0 c 3 cat + 1 d 4 dog + >>> pd.concat([df1, df3], sort=False) + letter number animal + 0 a 1 NaN + 1 b 2 NaN + 0 c 3 cat + 1 d 4 dog + + Combine ``DataFrame`` objects with overlapping columns + and return only those that are shared by passing ``inner`` to + the ``join`` keyword argument. + + >>> pd.concat([df1, df3], join="inner") + letter number + 0 a 1 + 1 b 2 + 0 c 3 + 1 d 4 + + Combine ``DataFrame`` objects horizontally along the x axis by + passing in ``axis=1``. + + >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], + ... columns=['animal', 'name']) + >>> pd.concat([df1, df4], axis=1) + letter number animal name + 0 a 1 bird polly + 1 b 2 monkey george + + Prevent the result from including duplicate index values with the + ``verify_integrity`` option. + + >>> df5 = pd.DataFrame([1], index=['a']) + >>> df5 + 0 + a 1 + >>> df6 = pd.DataFrame([2], index=['a']) + >>> df6 + 0 + a 2 + >>> pd.concat([df5, df6], verify_integrity=True) + Traceback (most recent call last): + ... + ValueError: Indexes have overlapping values: ['a'] + """ + op = _Concatenator( + objs, + axis=axis, + ignore_index=ignore_index, + join=join, + keys=keys, + levels=levels, + names=names, + verify_integrity=verify_integrity, + copy=copy, + sort=sort, + ) + + return op.get_result() + + +class _Concatenator: + """ + Orchestrates a concatenation operation for BlockManagers + """ + + def __init__( + self, + objs, + axis=0, + join: str = "outer", + keys=None, + levels=None, + names=None, + ignore_index: bool = False, + verify_integrity: bool = False, + copy: bool = True, + sort=False, + ): + if isinstance(objs, (NDFrame, str)): + raise TypeError( + "first argument must be an iterable of pandas " + "objects, you passed an object of type " + '"{name}"'.format(name=type(objs).__name__) + ) + + if join == "outer": + self.intersect = False + elif join == "inner": + self.intersect = True + else: # pragma: no cover + raise ValueError( + "Only can inner (intersect) or outer (union) join the other axis" + ) + + if isinstance(objs, dict): + if keys is None: + keys = list(objs.keys()) + objs = [objs[k] for k in keys] + else: + objs = list(objs) + + if len(objs) == 0: + raise ValueError("No objects to concatenate") + + if keys is None: + objs = list(com.not_none(*objs)) + else: + # #1649 + clean_keys = [] + clean_objs = [] + for k, v in zip(keys, objs): + if v is None: + continue + clean_keys.append(k) + clean_objs.append(v) + objs = clean_objs + name = getattr(keys, "name", None) + keys = Index(clean_keys, name=name) + + if len(objs) == 0: + raise ValueError("All objects passed were None") + + # consolidate data & figure out what our result ndim is going to be + ndims = set() + for obj in objs: + if not isinstance(obj, (Series, DataFrame)): + msg = ( + "cannot concatenate object of type '{typ}'; " + "only Series and DataFrame objs are valid".format(typ=type(obj)) + ) + raise TypeError(msg) + + # consolidate + obj._consolidate(inplace=True) + ndims.add(obj.ndim) + + # get the sample + # want the highest ndim that we have, and must be non-empty + # unless all objs are empty + sample = None + if len(ndims) > 1: + max_ndim = max(ndims) + for obj in objs: + if obj.ndim == max_ndim and np.sum(obj.shape): + sample = obj + break + + else: + # filter out the empties if we have not multi-index possibilities + # note to keep empty Series as it affect to result columns / name + non_empties = [ + obj for obj in objs if sum(obj.shape) > 0 or isinstance(obj, Series) + ] + + if len(non_empties) and ( + keys is None and names is None and levels is None and not self.intersect + ): + objs = non_empties + sample = objs[0] + + if sample is None: + sample = objs[0] + self.objs = objs + + # Standardize axis parameter to int + if isinstance(sample, Series): + axis = DataFrame._get_axis_number(axis) + else: + axis = sample._get_axis_number(axis) + + # Need to flip BlockManager axis in the DataFrame special case + self._is_frame = isinstance(sample, DataFrame) + if self._is_frame: + axis = 1 if axis == 0 else 0 + + self._is_series = isinstance(sample, Series) + if not 0 <= axis <= sample.ndim: + raise AssertionError( + "axis must be between 0 and {ndim}, input was " + "{axis}".format(ndim=sample.ndim, axis=axis) + ) + + # if we have mixed ndims, then convert to highest ndim + # creating column numbers as needed + if len(ndims) > 1: + current_column = 0 + max_ndim = sample.ndim + self.objs, objs = [], self.objs + for obj in objs: + + ndim = obj.ndim + if ndim == max_ndim: + pass + + elif ndim != max_ndim - 1: + raise ValueError( + "cannot concatenate unaligned mixed " + "dimensional NDFrame objects" + ) + + else: + name = getattr(obj, "name", None) + if ignore_index or name is None: + name = current_column + current_column += 1 + + # doing a row-wise concatenation so need everything + # to line up + if self._is_frame and axis == 1: + name = 0 + obj = sample._constructor({name: obj}) + + self.objs.append(obj) + + # note: this is the BlockManager axis (since DataFrame is transposed) + self.axis = axis + self.keys = keys + self.names = names or getattr(keys, "names", None) + self.levels = levels + self.sort = sort + + self.ignore_index = ignore_index + self.verify_integrity = verify_integrity + self.copy = copy + + self.new_axes = self._get_new_axes() + + def get_result(self): + + # series only + if self._is_series: + + # stack blocks + if self.axis == 0: + name = com.consensus_name_attr(self.objs) + + mgr = self.objs[0]._data.concat( + [x._data for x in self.objs], self.new_axes + ) + cons = self.objs[0]._constructor + return cons(mgr, name=name).__finalize__(self, method="concat") + + # combine as columns in a frame + else: + data = dict(zip(range(len(self.objs)), self.objs)) + cons = DataFrame + + index, columns = self.new_axes + df = cons(data, index=index) + df.columns = columns + return df.__finalize__(self, method="concat") + + # combine block managers + else: + mgrs_indexers = [] + for obj in self.objs: + mgr = obj._data + indexers = {} + for ax, new_labels in enumerate(self.new_axes): + if ax == self.axis: + # Suppress reindexing on concat axis + continue + + obj_labels = mgr.axes[ax] + if not new_labels.equals(obj_labels): + indexers[ax] = obj_labels.reindex(new_labels)[1] + + mgrs_indexers.append((obj._data, indexers)) + + new_data = concatenate_block_managers( + mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy + ) + if not self.copy: + new_data._consolidate_inplace() + + cons = self.objs[0]._constructor + return cons._from_axes(new_data, self.new_axes).__finalize__( + self, method="concat" + ) + + def _get_result_dim(self) -> int: + if self._is_series and self.axis == 1: + return 2 + else: + return self.objs[0].ndim + + def _get_new_axes(self) -> List[Index]: + ndim = self._get_result_dim() + return [ + self._get_concat_axis() if i == self.axis else self._get_comb_axis(i) + for i in range(ndim) + ] + + def _get_comb_axis(self, i: int) -> Index: + data_axis = self.objs[0]._get_block_manager_axis(i) + return get_objs_combined_axis( + self.objs, axis=data_axis, intersect=self.intersect, sort=self.sort + ) + + def _get_concat_axis(self) -> Index: + """ + Return index to be used along concatenation axis. + """ + if self._is_series: + if self.axis == 0: + indexes = [x.index for x in self.objs] + elif self.ignore_index: + idx = ibase.default_index(len(self.objs)) + return idx + elif self.keys is None: + names: List[Optional[Hashable]] = [None] * len(self.objs) + num = 0 + has_names = False + for i, x in enumerate(self.objs): + if not isinstance(x, Series): + raise TypeError( + f"Cannot concatenate type 'Series' with " + f"object of type '{type(x).__name__}'" + ) + if x.name is not None: + names[i] = x.name + has_names = True + else: + names[i] = num + num += 1 + if has_names: + return Index(names) + else: + return ibase.default_index(len(self.objs)) + else: + return ensure_index(self.keys).set_names(self.names) + else: + indexes = [x._data.axes[self.axis] for x in self.objs] + + if self.ignore_index: + idx = ibase.default_index(sum(len(i) for i in indexes)) + return idx + + if self.keys is None: + concat_axis = _concat_indexes(indexes) + else: + concat_axis = _make_concat_multiindex( + indexes, self.keys, self.levels, self.names + ) + + self._maybe_check_integrity(concat_axis) + + return concat_axis + + def _maybe_check_integrity(self, concat_index: Index): + if self.verify_integrity: + if not concat_index.is_unique: + overlap = concat_index[concat_index.duplicated()].unique() + raise ValueError( + "Indexes have overlapping values: " + "{overlap!s}".format(overlap=overlap) + ) + + +def _concat_indexes(indexes) -> Index: + return indexes[0].append(indexes[1:]) + + +def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiIndex: + + if (levels is None and isinstance(keys[0], tuple)) or ( + levels is not None and len(levels) > 1 + ): + zipped = list(zip(*keys)) + if names is None: + names = [None] * len(zipped) + + if levels is None: + _, levels = factorize_from_iterables(zipped) + else: + levels = [ensure_index(x) for x in levels] + else: + zipped = [keys] + if names is None: + names = [None] + + if levels is None: + levels = [ensure_index(keys)] + else: + levels = [ensure_index(x) for x in levels] + + if not all_indexes_same(indexes): + codes_list = [] + + # things are potentially different sizes, so compute the exact codes + # for each level and pass those to MultiIndex.from_arrays + + for hlevel, level in zip(zipped, levels): + to_concat = [] + for key, index in zip(hlevel, indexes): + try: + i = level.get_loc(key) + except KeyError: + raise ValueError( + "Key {key!s} not in level {level!s}".format( + key=key, level=level + ) + ) + + to_concat.append(np.repeat(i, len(index))) + codes_list.append(np.concatenate(to_concat)) + + concat_index = _concat_indexes(indexes) + + # these go at the end + if isinstance(concat_index, MultiIndex): + levels.extend(concat_index.levels) + codes_list.extend(concat_index.codes) + else: + codes, categories = factorize_from_iterable(concat_index) + levels.append(categories) + codes_list.append(codes) + + if len(names) == len(levels): + names = list(names) + else: + # make sure that all of the passed indices have the same nlevels + if not len({idx.nlevels for idx in indexes}) == 1: + raise AssertionError( + "Cannot concat indices that do " + "not have the same number of levels" + ) + + # also copies + names = names + get_consensus_names(indexes) + + return MultiIndex( + levels=levels, codes=codes_list, names=names, verify_integrity=False + ) + + new_index = indexes[0] + n = len(new_index) + kpieces = len(indexes) + + # also copies + new_names = list(names) + new_levels = list(levels) + + # construct codes + new_codes = [] + + # do something a bit more speedy + + for hlevel, level in zip(zipped, levels): + hlevel = ensure_index(hlevel) + mapped = level.get_indexer(hlevel) + + mask = mapped == -1 + if mask.any(): + raise ValueError( + "Values not found in passed level: {hlevel!s}".format( + hlevel=hlevel[mask] + ) + ) + + new_codes.append(np.repeat(mapped, n)) + + if isinstance(new_index, MultiIndex): + new_levels.extend(new_index.levels) + new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes]) + else: + new_levels.append(new_index) + new_codes.append(np.tile(np.arange(n), kpieces)) + + if len(new_names) < len(new_levels): + new_names.extend(new_index.names) + + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py new file mode 100644 index 00000000..d4ccb19f --- /dev/null +++ b/pandas/core/reshape/melt.py @@ -0,0 +1,474 @@ +import re +from typing import List + +import numpy as np + +from pandas.util._decorators import Appender, deprecate_kwarg + +from pandas.core.dtypes.common import is_extension_array_dtype, is_list_like +from pandas.core.dtypes.concat import concat_compat +from pandas.core.dtypes.generic import ABCMultiIndex +from pandas.core.dtypes.missing import notna + +from pandas.core.arrays import Categorical +import pandas.core.common as com +from pandas.core.frame import DataFrame, _shared_docs +from pandas.core.indexes.base import Index +from pandas.core.reshape.concat import concat +from pandas.core.tools.numeric import to_numeric + + +@Appender( + _shared_docs["melt"] + % dict(caller="pd.melt(df, ", versionadded="", other="DataFrame.melt") +) +def melt( + frame: DataFrame, + id_vars=None, + value_vars=None, + var_name=None, + value_name="value", + col_level=None, +) -> DataFrame: + # TODO: what about the existing index? + # If multiindex, gather names of columns on all level for checking presence + # of `id_vars` and `value_vars` + if isinstance(frame.columns, ABCMultiIndex): + cols = [x for c in frame.columns for x in c] + else: + cols = list(frame.columns) + + if id_vars is not None: + if not is_list_like(id_vars): + id_vars = [id_vars] + elif isinstance(frame.columns, ABCMultiIndex) and not isinstance(id_vars, list): + raise ValueError( + "id_vars must be a list of tuples when columns are a MultiIndex" + ) + else: + # Check that `id_vars` are in frame + id_vars = list(id_vars) + missing = Index(com.flatten(id_vars)).difference(cols) + if not missing.empty: + raise KeyError( + "The following 'id_vars' are not present " + "in the DataFrame: {missing}" + "".format(missing=list(missing)) + ) + else: + id_vars = [] + + if value_vars is not None: + if not is_list_like(value_vars): + value_vars = [value_vars] + elif isinstance(frame.columns, ABCMultiIndex) and not isinstance( + value_vars, list + ): + raise ValueError( + "value_vars must be a list of tuples when columns are a MultiIndex" + ) + else: + value_vars = list(value_vars) + # Check that `value_vars` are in frame + missing = Index(com.flatten(value_vars)).difference(cols) + if not missing.empty: + raise KeyError( + "The following 'value_vars' are not present in " + "the DataFrame: {missing}" + "".format(missing=list(missing)) + ) + frame = frame.loc[:, id_vars + value_vars] + else: + frame = frame.copy() + + if col_level is not None: # allow list or other? + # frame is a copy + frame.columns = frame.columns.get_level_values(col_level) + + if var_name is None: + if isinstance(frame.columns, ABCMultiIndex): + if len(frame.columns.names) == len(set(frame.columns.names)): + var_name = frame.columns.names + else: + var_name = [ + "variable_{i}".format(i=i) for i in range(len(frame.columns.names)) + ] + else: + var_name = [ + frame.columns.name if frame.columns.name is not None else "variable" + ] + if isinstance(var_name, str): + var_name = [var_name] + + N, K = frame.shape + K -= len(id_vars) + + mdata = {} + for col in id_vars: + id_data = frame.pop(col) + if is_extension_array_dtype(id_data): + id_data = concat([id_data] * K, ignore_index=True) + else: + id_data = np.tile(id_data.values, K) + mdata[col] = id_data + + mcolumns = id_vars + var_name + [value_name] + + mdata[value_name] = frame.values.ravel("F") + for i, col in enumerate(var_name): + # asanyarray will keep the columns as an Index + mdata[col] = np.asanyarray(frame.columns._get_level_values(i)).repeat(N) + + return frame._constructor(mdata, columns=mcolumns) + + +@deprecate_kwarg(old_arg_name="label", new_arg_name=None) +def lreshape(data: DataFrame, groups, dropna: bool = True, label=None) -> DataFrame: + """ + Reshape long-format data to wide. Generalized inverse of DataFrame.pivot + + Parameters + ---------- + data : DataFrame + groups : dict + {new_name : list_of_columns} + dropna : boolean, default True + + Examples + -------- + >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], + ... 'team': ['Red Sox', 'Yankees'], + ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) + >>> data + hr1 hr2 team year1 year2 + 0 514 545 Red Sox 2007 2008 + 1 573 526 Yankees 2007 2008 + + >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) + team year hr + 0 Red Sox 2007 514 + 1 Yankees 2007 573 + 2 Red Sox 2008 545 + 3 Yankees 2008 526 + + Returns + ------- + reshaped : DataFrame + """ + if isinstance(groups, dict): + keys = list(groups.keys()) + values = list(groups.values()) + else: + keys, values = zip(*groups) + + all_cols = list(set.union(*[set(x) for x in values])) + id_cols = list(data.columns.difference(all_cols)) + + K = len(values[0]) + + for seq in values: + if len(seq) != K: + raise ValueError("All column lists must be same length") + + mdata = {} + pivot_cols = [] + + for target, names in zip(keys, values): + to_concat = [data[col].values for col in names] + + mdata[target] = concat_compat(to_concat) + pivot_cols.append(target) + + for col in id_cols: + mdata[col] = np.tile(data[col].values, K) + + if dropna: + mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool) + for c in pivot_cols: + mask &= notna(mdata[c]) + if not mask.all(): + mdata = {k: v[mask] for k, v in mdata.items()} + + return data._constructor(mdata, columns=id_cols + pivot_cols) + + +def wide_to_long( + df: DataFrame, stubnames, i, j, sep: str = "", suffix: str = r"\d+" +) -> DataFrame: + r""" + Wide panel to long format. Less flexible but more user-friendly than melt. + + With stubnames ['A', 'B'], this function expects to find one or more + group of columns with format + A-suffix1, A-suffix2,..., B-suffix1, B-suffix2,... + You specify what you want to call this suffix in the resulting long format + with `j` (for example `j='year'`) + + Each row of these wide variables are assumed to be uniquely identified by + `i` (can be a single column name or a list of column names) + + All remaining variables in the data frame are left intact. + + Parameters + ---------- + df : DataFrame + The wide-format DataFrame. + stubnames : str or list-like + The stub name(s). The wide format variables are assumed to + start with the stub names. + i : str or list-like + Column(s) to use as id variable(s). + j : str + The name of the sub-observation variable. What you wish to name your + suffix in the long format. + sep : str, default "" + A character indicating the separation of the variable names + in the wide format, to be stripped from the names in the long format. + For example, if your column names are A-suffix1, A-suffix2, you + can strip the hyphen by specifying `sep='-'`. + suffix : str, default '\\d+' + A regular expression capturing the wanted suffixes. '\\d+' captures + numeric suffixes. Suffixes with no numbers could be specified with the + negated character class '\\D+'. You can also further disambiguate + suffixes, for example, if your wide variables are of the form + A-one, B-two,.., and you have an unrelated column A-rating, you can + ignore the last one by specifying `suffix='(!?one|two)'`. + + .. versionchanged:: 0.23.0 + When all suffixes are numeric, they are cast to int64/float64. + + Returns + ------- + DataFrame + A DataFrame that contains each stub name as a variable, with new index + (i, j). + + Notes + ----- + All extra variables are left untouched. This simply uses + `pandas.melt` under the hood, but is hard-coded to "do the right thing" + in a typical case. + + Examples + -------- + >>> np.random.seed(123) + >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"}, + ... "A1980" : {0 : "d", 1 : "e", 2 : "f"}, + ... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7}, + ... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1}, + ... "X" : dict(zip(range(3), np.random.randn(3))) + ... }) + >>> df["id"] = df.index + >>> df + A1970 A1980 B1970 B1980 X id + 0 a d 2.5 3.2 -1.085631 0 + 1 b e 1.2 1.3 0.997345 1 + 2 c f 0.7 0.1 0.282978 2 + >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year") + ... # doctest: +NORMALIZE_WHITESPACE + X A B + id year + 0 1970 -1.085631 a 2.5 + 1 1970 0.997345 b 1.2 + 2 1970 0.282978 c 0.7 + 0 1980 -1.085631 d 3.2 + 1 1980 0.997345 e 1.3 + 2 1980 0.282978 f 0.1 + + With multiple id columns + + >>> df = pd.DataFrame({ + ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], + ... 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], + ... 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] + ... }) + >>> df + famid birth ht1 ht2 + 0 1 1 2.8 3.4 + 1 1 2 2.9 3.8 + 2 1 3 2.2 2.9 + 3 2 1 2.0 3.2 + 4 2 2 1.8 2.8 + 5 2 3 1.9 2.4 + 6 3 1 2.2 3.3 + 7 3 2 2.3 3.4 + 8 3 3 2.1 2.9 + >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age') + >>> l + ... # doctest: +NORMALIZE_WHITESPACE + ht + famid birth age + 1 1 1 2.8 + 2 3.4 + 2 1 2.9 + 2 3.8 + 3 1 2.2 + 2 2.9 + 2 1 1 2.0 + 2 3.2 + 2 1 1.8 + 2 2.8 + 3 1 1.9 + 2 2.4 + 3 1 1 2.2 + 2 3.3 + 2 1 2.3 + 2 3.4 + 3 1 2.1 + 2 2.9 + + Going from long back to wide just takes some creative use of `unstack` + + >>> w = l.unstack() + >>> w.columns = w.columns.map('{0[0]}{0[1]}'.format) + >>> w.reset_index() + famid birth ht1 ht2 + 0 1 1 2.8 3.4 + 1 1 2 2.9 3.8 + 2 1 3 2.2 2.9 + 3 2 1 2.0 3.2 + 4 2 2 1.8 2.8 + 5 2 3 1.9 2.4 + 6 3 1 2.2 3.3 + 7 3 2 2.3 3.4 + 8 3 3 2.1 2.9 + + Less wieldy column names are also handled + + >>> np.random.seed(0) + >>> df = pd.DataFrame({'A(weekly)-2010': np.random.rand(3), + ... 'A(weekly)-2011': np.random.rand(3), + ... 'B(weekly)-2010': np.random.rand(3), + ... 'B(weekly)-2011': np.random.rand(3), + ... 'X' : np.random.randint(3, size=3)}) + >>> df['id'] = df.index + >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS + A(weekly)-2010 A(weekly)-2011 B(weekly)-2010 B(weekly)-2011 X id + 0 0.548814 0.544883 0.437587 0.383442 0 0 + 1 0.715189 0.423655 0.891773 0.791725 1 1 + 2 0.602763 0.645894 0.963663 0.528895 1 2 + + >>> pd.wide_to_long(df, ['A(weekly)', 'B(weekly)'], i='id', + ... j='year', sep='-') + ... # doctest: +NORMALIZE_WHITESPACE + X A(weekly) B(weekly) + id year + 0 2010 0 0.548814 0.437587 + 1 2010 1 0.715189 0.891773 + 2 2010 1 0.602763 0.963663 + 0 2011 0 0.544883 0.383442 + 1 2011 1 0.423655 0.791725 + 2 2011 1 0.645894 0.528895 + + If we have many columns, we could also use a regex to find our + stubnames and pass that list on to wide_to_long + + >>> stubnames = sorted( + ... set([match[0] for match in df.columns.str.findall( + ... r'[A-B]\(.*\)').values if match != []]) + ... ) + >>> list(stubnames) + ['A(weekly)', 'B(weekly)'] + + All of the above examples have integers as suffixes. It is possible to + have non-integers as suffixes. + + >>> df = pd.DataFrame({ + ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], + ... 'ht_one': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], + ... 'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] + ... }) + >>> df + famid birth ht_one ht_two + 0 1 1 2.8 3.4 + 1 1 2 2.9 3.8 + 2 1 3 2.2 2.9 + 3 2 1 2.0 3.2 + 4 2 2 1.8 2.8 + 5 2 3 1.9 2.4 + 6 3 1 2.2 3.3 + 7 3 2 2.3 3.4 + 8 3 3 2.1 2.9 + + >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age', + ... sep='_', suffix='\w+') + >>> l + ... # doctest: +NORMALIZE_WHITESPACE + ht + famid birth age + 1 1 one 2.8 + two 3.4 + 2 one 2.9 + two 3.8 + 3 one 2.2 + two 2.9 + 2 1 one 2.0 + two 3.2 + 2 one 1.8 + two 2.8 + 3 one 1.9 + two 2.4 + 3 1 one 2.2 + two 3.3 + 2 one 2.3 + two 3.4 + 3 one 2.1 + two 2.9 + """ + + def get_var_names(df, stub: str, sep: str, suffix: str) -> List[str]: + regex = r"^{stub}{sep}{suffix}$".format( + stub=re.escape(stub), sep=re.escape(sep), suffix=suffix + ) + pattern = re.compile(regex) + return [col for col in df.columns if pattern.match(col)] + + def melt_stub(df, stub: str, i, j, value_vars, sep: str): + newdf = melt( + df, + id_vars=i, + value_vars=value_vars, + value_name=stub.rstrip(sep), + var_name=j, + ) + newdf[j] = Categorical(newdf[j]) + newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "") + + # GH17627 Cast numerics suffixes to int/float + newdf[j] = to_numeric(newdf[j], errors="ignore") + + return newdf.set_index(i + [j]) + + if not is_list_like(stubnames): + stubnames = [stubnames] + else: + stubnames = list(stubnames) + + if any(col in stubnames for col in df.columns): + raise ValueError("stubname can't be identical to a column name") + + if not is_list_like(i): + i = [i] + else: + i = list(i) + + if df[i].duplicated().any(): + raise ValueError("the id variables need to uniquely identify each row") + + value_vars = [get_var_names(df, stub, sep, suffix) for stub in stubnames] + + value_vars_flattened = [e for sublist in value_vars for e in sublist] + id_vars = list(set(df.columns.tolist()).difference(value_vars_flattened)) + + _melted = [melt_stub(df, s, i, j, v, sep) for s, v in zip(stubnames, value_vars)] + melted = _melted[0].join(_melted[1:], how="outer") + + if len(i) == 1: + new = df[id_vars].set_index(i).join(melted) + return new + + new = df[id_vars].merge(melted.reset_index(), on=i).set_index(i + [j]) + + return new diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py new file mode 100644 index 00000000..5f92e4a8 --- /dev/null +++ b/pandas/core/reshape/merge.py @@ -0,0 +1,2052 @@ +""" +SQL-style merge routines +""" + +import copy +import datetime +from functools import partial +import string +from typing import TYPE_CHECKING, Optional, Tuple, Union +import warnings + +import numpy as np + +from pandas._libs import Timedelta, hashtable as libhashtable, lib +import pandas._libs.join as libjoin +from pandas._typing import FrameOrSeries +from pandas.errors import MergeError +from pandas.util._decorators import Appender, Substitution + +from pandas.core.dtypes.common import ( + ensure_float64, + ensure_int64, + ensure_object, + is_array_like, + is_bool, + is_bool_dtype, + is_categorical_dtype, + is_datetime64tz_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_float_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_number, + is_numeric_dtype, + is_object_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.missing import isna, na_value_for_dtype + +from pandas import Categorical, Index, MultiIndex +from pandas.core import groupby +import pandas.core.algorithms as algos +from pandas.core.arrays.categorical import _recode_for_categories +import pandas.core.common as com +from pandas.core.frame import _merge_doc +from pandas.core.internals import _transform_index, concatenate_block_managers +from pandas.core.sorting import is_int64_overflow_possible + +if TYPE_CHECKING: + from pandas import DataFrame, Series # noqa:F401 + + +@Substitution("\nleft : DataFrame") +@Appender(_merge_doc, indents=0) +def merge( + left, + right, + how: str = "inner", + on=None, + left_on=None, + right_on=None, + left_index: bool = False, + right_index: bool = False, + sort: bool = False, + suffixes=("_x", "_y"), + copy: bool = True, + indicator: bool = False, + validate=None, +) -> "DataFrame": + op = _MergeOperation( + left, + right, + how=how, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + sort=sort, + suffixes=suffixes, + copy=copy, + indicator=indicator, + validate=validate, + ) + return op.get_result() + + +if __debug__: + merge.__doc__ = _merge_doc % "\nleft : DataFrame" + + +def _groupby_and_merge( + by, on, left, right: "DataFrame", _merge_pieces, check_duplicates: bool = True +): + """ + groupby & merge; we are always performing a left-by type operation + + Parameters + ---------- + by: field to group + on: duplicates field + left: left frame + right: right frame + _merge_pieces: function for merging + check_duplicates: bool, default True + should we check & clean duplicates + """ + + pieces = [] + if not isinstance(by, (list, tuple)): + by = [by] + + lby = left.groupby(by, sort=False) + rby: Optional[groupby.DataFrameGroupBy] = None + + # if we can groupby the rhs + # then we can get vastly better perf + + # we will check & remove duplicates if indicated + if check_duplicates: + if on is None: + on = [] + elif not isinstance(on, (list, tuple)): + on = [on] + + if right.duplicated(by + on).any(): + _right = right.drop_duplicates(by + on, keep="last") + # TODO: use overload to refine return type of drop_duplicates + assert _right is not None # needed for mypy + right = _right + try: + rby = right.groupby(by, sort=False) + except KeyError: + pass + + for key, lhs in lby: + + if rby is None: + rhs = right + else: + try: + rhs = right.take(rby.indices[key]) + except KeyError: + # key doesn't exist in left + lcols = lhs.columns.tolist() + cols = lcols + [r for r in right.columns if r not in set(lcols)] + merged = lhs.reindex(columns=cols) + merged.index = range(len(merged)) + pieces.append(merged) + continue + + merged = _merge_pieces(lhs, rhs) + + # make sure join keys are in the merged + # TODO, should _merge_pieces do this? + for k in by: + try: + if k in merged: + merged[k] = key + except KeyError: + pass + + pieces.append(merged) + + # preserve the original order + # if we have a missing piece this can be reset + from pandas.core.reshape.concat import concat + + result = concat(pieces, ignore_index=True) + result = result.reindex(columns=pieces[0].columns, copy=False) + return result, lby + + +def merge_ordered( + left, + right, + on=None, + left_on=None, + right_on=None, + left_by=None, + right_by=None, + fill_method=None, + suffixes=("_x", "_y"), + how: str = "outer", +) -> "DataFrame": + """ + Perform merge with optional filling/interpolation. + + Designed for ordered data like time series data. Optionally + perform group-wise merge (see examples). + + Parameters + ---------- + left : DataFrame + right : DataFrame + on : label or list + Field names to join on. Must be found in both DataFrames. + left_on : label or list, or array-like + Field names to join on in left DataFrame. Can be a vector or list of + vectors of the length of the DataFrame to use a particular vector as + the join key instead of columns. + right_on : label or list, or array-like + Field names to join on in right DataFrame or vector/list of vectors per + left_on docs. + left_by : column name or list of column names + Group left DataFrame by group columns and merge piece by piece with + right DataFrame. + right_by : column name or list of column names + Group right DataFrame by group columns and merge piece by piece with + left DataFrame. + fill_method : {'ffill', None}, default None + Interpolation method for data. + suffixes : Sequence, default is ("_x", "_y") + A length-2 sequence where each element is optionally a string + indicating the suffix to add to overlapping column names in + `left` and `right` respectively. Pass a value of `None` instead + of a string to indicate that the column name from `left` or + `right` should be left as-is, with no suffix. At least one of the + values must not be None. + + .. versionchanged:: 0.25.0 + how : {'left', 'right', 'outer', 'inner'}, default 'outer' + * left: use only keys from left frame (SQL: left outer join) + * right: use only keys from right frame (SQL: right outer join) + * outer: use union of keys from both frames (SQL: full outer join) + * inner: use intersection of keys from both frames (SQL: inner join). + + Returns + ------- + DataFrame + The merged DataFrame output type will the be same as + 'left', if it is a subclass of DataFrame. + + See Also + -------- + merge + merge_asof + + Examples + -------- + >>> A + key lvalue group + 0 a 1 a + 1 c 2 a + 2 e 3 a + 3 a 1 b + 4 c 2 b + 5 e 3 b + + >>> B + Key rvalue + 0 b 1 + 1 c 2 + 2 d 3 + + >>> merge_ordered(A, B, fill_method='ffill', left_by='group') + group key lvalue rvalue + 0 a a 1 NaN + 1 a b 1 1.0 + 2 a c 2 2.0 + 3 a d 2 3.0 + 4 a e 3 3.0 + 5 b a 1 NaN + 6 b b 1 1.0 + 7 b c 2 2.0 + 8 b d 2 3.0 + 9 b e 3 3.0 + """ + + def _merger(x, y): + # perform the ordered merge operation + op = _OrderedMerge( + x, + y, + on=on, + left_on=left_on, + right_on=right_on, + suffixes=suffixes, + fill_method=fill_method, + how=how, + ) + return op.get_result() + + if left_by is not None and right_by is not None: + raise ValueError("Can only group either left or right frames") + elif left_by is not None: + result, _ = _groupby_and_merge( + left_by, on, left, right, lambda x, y: _merger(x, y), check_duplicates=False + ) + elif right_by is not None: + result, _ = _groupby_and_merge( + right_by, + on, + right, + left, + lambda x, y: _merger(y, x), + check_duplicates=False, + ) + else: + result = _merger(left, right) + return result + + +def merge_asof( + left, + right, + on=None, + left_on=None, + right_on=None, + left_index: bool = False, + right_index: bool = False, + by=None, + left_by=None, + right_by=None, + suffixes=("_x", "_y"), + tolerance=None, + allow_exact_matches: bool = True, + direction: str = "backward", +) -> "DataFrame": + """ + Perform an asof merge. This is similar to a left-join except that we + match on nearest key rather than equal keys. + + Both DataFrames must be sorted by the key. + + For each row in the left DataFrame: + + - A "backward" search selects the last row in the right DataFrame whose + 'on' key is less than or equal to the left's key. + + - A "forward" search selects the first row in the right DataFrame whose + 'on' key is greater than or equal to the left's key. + + - A "nearest" search selects the row in the right DataFrame whose 'on' + key is closest in absolute distance to the left's key. + + The default is "backward" and is compatible in versions below 0.20.0. + The direction parameter was added in version 0.20.0 and introduces + "forward" and "nearest". + + Optionally match on equivalent keys with 'by' before searching with 'on'. + + Parameters + ---------- + left : DataFrame + right : DataFrame + on : label + Field name to join on. Must be found in both DataFrames. + The data MUST be ordered. Furthermore this must be a numeric column, + such as datetimelike, integer, or float. On or left_on/right_on + must be given. + left_on : label + Field name to join on in left DataFrame. + right_on : label + Field name to join on in right DataFrame. + left_index : bool + Use the index of the left DataFrame as the join key. + right_index : bool + Use the index of the right DataFrame as the join key. + by : column name or list of column names + Match on these columns before performing merge operation. + left_by : column name + Field names to match on in the left DataFrame. + right_by : column name + Field names to match on in the right DataFrame. + suffixes : 2-length sequence (tuple, list, ...) + Suffix to apply to overlapping column names in the left and right + side, respectively. + tolerance : int or Timedelta, optional, default None + Select asof tolerance within this range; must be compatible + with the merge index. + allow_exact_matches : bool, default True + + - If True, allow matching with the same 'on' value + (i.e. less-than-or-equal-to / greater-than-or-equal-to) + - If False, don't match the same 'on' value + (i.e., strictly less-than / strictly greater-than). + + direction : 'backward' (default), 'forward', or 'nearest' + Whether to search for prior, subsequent, or closest matches. + + Returns + ------- + merged : DataFrame + + See Also + -------- + merge + merge_ordered + + Examples + -------- + >>> left = pd.DataFrame({'a': [1, 5, 10], 'left_val': ['a', 'b', 'c']}) + >>> left + a left_val + 0 1 a + 1 5 b + 2 10 c + + >>> right = pd.DataFrame({'a': [1, 2, 3, 6, 7], + ... 'right_val': [1, 2, 3, 6, 7]}) + >>> right + a right_val + 0 1 1 + 1 2 2 + 2 3 3 + 3 6 6 + 4 7 7 + + >>> pd.merge_asof(left, right, on='a') + a left_val right_val + 0 1 a 1 + 1 5 b 3 + 2 10 c 7 + + >>> pd.merge_asof(left, right, on='a', allow_exact_matches=False) + a left_val right_val + 0 1 a NaN + 1 5 b 3.0 + 2 10 c 7.0 + + >>> pd.merge_asof(left, right, on='a', direction='forward') + a left_val right_val + 0 1 a 1.0 + 1 5 b 6.0 + 2 10 c NaN + + >>> pd.merge_asof(left, right, on='a', direction='nearest') + a left_val right_val + 0 1 a 1 + 1 5 b 6 + 2 10 c 7 + + We can use indexed DataFrames as well. + + >>> left = pd.DataFrame({'left_val': ['a', 'b', 'c']}, index=[1, 5, 10]) + >>> left + left_val + 1 a + 5 b + 10 c + + >>> right = pd.DataFrame({'right_val': [1, 2, 3, 6, 7]}, + ... index=[1, 2, 3, 6, 7]) + >>> right + right_val + 1 1 + 2 2 + 3 3 + 6 6 + 7 7 + + >>> pd.merge_asof(left, right, left_index=True, right_index=True) + left_val right_val + 1 a 1 + 5 b 3 + 10 c 7 + + Here is a real-world times-series example + + >>> quotes + time ticker bid ask + 0 2016-05-25 13:30:00.023 GOOG 720.50 720.93 + 1 2016-05-25 13:30:00.023 MSFT 51.95 51.96 + 2 2016-05-25 13:30:00.030 MSFT 51.97 51.98 + 3 2016-05-25 13:30:00.041 MSFT 51.99 52.00 + 4 2016-05-25 13:30:00.048 GOOG 720.50 720.93 + 5 2016-05-25 13:30:00.049 AAPL 97.99 98.01 + 6 2016-05-25 13:30:00.072 GOOG 720.50 720.88 + 7 2016-05-25 13:30:00.075 MSFT 52.01 52.03 + + >>> trades + time ticker price quantity + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 + + By default we are taking the asof of the quotes + + >>> pd.merge_asof(trades, quotes, + ... on='time', + ... by='ticker') + time ticker price quantity bid ask + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN + + We only asof within 2ms between the quote time and the trade time + + >>> pd.merge_asof(trades, quotes, + ... on='time', + ... by='ticker', + ... tolerance=pd.Timedelta('2ms')) + time ticker price quantity bid ask + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 51.95 51.96 + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 NaN NaN + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 720.50 720.93 + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 720.50 720.93 + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN + + We only asof within 10ms between the quote time and the trade time + and we exclude exact matches on time. However *prior* data will + propagate forward + + >>> pd.merge_asof(trades, quotes, + ... on='time', + ... by='ticker', + ... tolerance=pd.Timedelta('10ms'), + ... allow_exact_matches=False) + time ticker price quantity bid ask + 0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN + 1 2016-05-25 13:30:00.038 MSFT 51.95 155 51.97 51.98 + 2 2016-05-25 13:30:00.048 GOOG 720.77 100 NaN NaN + 3 2016-05-25 13:30:00.048 GOOG 720.92 100 NaN NaN + 4 2016-05-25 13:30:00.048 AAPL 98.00 100 NaN NaN + """ + op = _AsOfMerge( + left, + right, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + by=by, + left_by=left_by, + right_by=right_by, + suffixes=suffixes, + how="asof", + tolerance=tolerance, + allow_exact_matches=allow_exact_matches, + direction=direction, + ) + return op.get_result() + + +# TODO: transformations?? +# TODO: only copy DataFrames when modification necessary +class _MergeOperation: + """ + Perform a database (SQL) merge operation between two DataFrame or Series + objects using either columns as keys or their row indexes + """ + + _merge_type = "merge" + + def __init__( + self, + left: Union["Series", "DataFrame"], + right: Union["Series", "DataFrame"], + how: str = "inner", + on=None, + left_on=None, + right_on=None, + axis=1, + left_index: bool = False, + right_index: bool = False, + sort: bool = True, + suffixes=("_x", "_y"), + copy: bool = True, + indicator: bool = False, + validate=None, + ): + _left = _validate_operand(left) + _right = _validate_operand(right) + self.left = self.orig_left = _left + self.right = self.orig_right = _right + self.how = how + self.axis = axis + + self.on = com.maybe_make_list(on) + self.left_on = com.maybe_make_list(left_on) + self.right_on = com.maybe_make_list(right_on) + + self.copy = copy + self.suffixes = suffixes + self.sort = sort + + self.left_index = left_index + self.right_index = right_index + + self.indicator = indicator + + self.indicator_name: Optional[str] + if isinstance(self.indicator, str): + self.indicator_name = self.indicator + elif isinstance(self.indicator, bool): + self.indicator_name = "_merge" if self.indicator else None + else: + raise ValueError( + "indicator option can only accept boolean or string arguments" + ) + + if not is_bool(left_index): + raise ValueError( + "left_index parameter must be of type bool, not " + "{left_index}".format(left_index=type(left_index)) + ) + if not is_bool(right_index): + raise ValueError( + "right_index parameter must be of type bool, not " + "{right_index}".format(right_index=type(right_index)) + ) + + # warn user when merging between different levels + if _left.columns.nlevels != _right.columns.nlevels: + msg = ( + "merging between different levels can give an unintended " + "result ({left} levels on the left, {right} on the right)" + ).format(left=_left.columns.nlevels, right=_right.columns.nlevels) + warnings.warn(msg, UserWarning) + + self._validate_specification() + + # note this function has side effects + ( + self.left_join_keys, + self.right_join_keys, + self.join_names, + ) = self._get_merge_keys() + + # validate the merge keys dtypes. We may need to coerce + # to avoid incompat dtypes + self._maybe_coerce_merge_keys() + + # If argument passed to validate, + # check if columns specified as unique + # are in fact unique. + if validate is not None: + self._validate(validate) + + def get_result(self): + if self.indicator: + self.left, self.right = self._indicator_pre_merge(self.left, self.right) + + join_index, left_indexer, right_indexer = self._get_join_info() + + ldata, rdata = self.left._data, self.right._data + lsuf, rsuf = self.suffixes + + llabels, rlabels = _items_overlap_with_suffix( + ldata.items, lsuf, rdata.items, rsuf + ) + + lindexers = {1: left_indexer} if left_indexer is not None else {} + rindexers = {1: right_indexer} if right_indexer is not None else {} + + result_data = concatenate_block_managers( + [(ldata, lindexers), (rdata, rindexers)], + axes=[llabels.append(rlabels), join_index], + concat_axis=0, + copy=self.copy, + ) + + typ = self.left._constructor + result = typ(result_data).__finalize__(self, method=self._merge_type) + + if self.indicator: + result = self._indicator_post_merge(result) + + self._maybe_add_join_keys(result, left_indexer, right_indexer) + + self._maybe_restore_index_levels(result) + + return result + + def _indicator_pre_merge( + self, left: "DataFrame", right: "DataFrame" + ) -> Tuple["DataFrame", "DataFrame"]: + + columns = left.columns.union(right.columns) + + for i in ["_left_indicator", "_right_indicator"]: + if i in columns: + raise ValueError( + "Cannot use `indicator=True` option when " + "data contains a column named {name}".format(name=i) + ) + if self.indicator_name in columns: + raise ValueError( + "Cannot use name of an existing column for indicator column" + ) + + left = left.copy() + right = right.copy() + + left["_left_indicator"] = 1 + left["_left_indicator"] = left["_left_indicator"].astype("int8") + + right["_right_indicator"] = 2 + right["_right_indicator"] = right["_right_indicator"].astype("int8") + + return left, right + + def _indicator_post_merge(self, result): + + result["_left_indicator"] = result["_left_indicator"].fillna(0) + result["_right_indicator"] = result["_right_indicator"].fillna(0) + + result[self.indicator_name] = Categorical( + (result["_left_indicator"] + result["_right_indicator"]), + categories=[1, 2, 3], + ) + result[self.indicator_name] = result[self.indicator_name].cat.rename_categories( + ["left_only", "right_only", "both"] + ) + + result = result.drop(labels=["_left_indicator", "_right_indicator"], axis=1) + return result + + def _maybe_restore_index_levels(self, result): + """ + Restore index levels specified as `on` parameters + + Here we check for cases where `self.left_on` and `self.right_on` pairs + each reference an index level in their respective DataFrames. The + joined columns corresponding to these pairs are then restored to the + index of `result`. + + **Note:** This method has side effects. It modifies `result` in-place + + Parameters + ---------- + result: DataFrame + merge result + + Returns + ------- + None + """ + names_to_restore = [] + for name, left_key, right_key in zip( + self.join_names, self.left_on, self.right_on + ): + if ( + self.orig_left._is_level_reference(left_key) + and self.orig_right._is_level_reference(right_key) + and name not in result.index.names + ): + + names_to_restore.append(name) + + if names_to_restore: + result.set_index(names_to_restore, inplace=True) + + def _maybe_add_join_keys(self, result, left_indexer, right_indexer): + + left_has_missing = None + right_has_missing = None + + keys = zip(self.join_names, self.left_on, self.right_on) + for i, (name, lname, rname) in enumerate(keys): + if not _should_fill(lname, rname): + continue + + take_left, take_right = None, None + + if name in result: + + if left_indexer is not None and right_indexer is not None: + if name in self.left: + + if left_has_missing is None: + left_has_missing = (left_indexer == -1).any() + + if left_has_missing: + take_right = self.right_join_keys[i] + + if not is_dtype_equal( + result[name].dtype, self.left[name].dtype + ): + take_left = self.left[name]._values + + elif name in self.right: + + if right_has_missing is None: + right_has_missing = (right_indexer == -1).any() + + if right_has_missing: + take_left = self.left_join_keys[i] + + if not is_dtype_equal( + result[name].dtype, self.right[name].dtype + ): + take_right = self.right[name]._values + + elif left_indexer is not None and is_array_like(self.left_join_keys[i]): + take_left = self.left_join_keys[i] + take_right = self.right_join_keys[i] + + if take_left is not None or take_right is not None: + + if take_left is None: + lvals = result[name]._values + else: + lfill = na_value_for_dtype(take_left.dtype) + lvals = algos.take_1d(take_left, left_indexer, fill_value=lfill) + + if take_right is None: + rvals = result[name]._values + else: + rfill = na_value_for_dtype(take_right.dtype) + rvals = algos.take_1d(take_right, right_indexer, fill_value=rfill) + + # if we have an all missing left_indexer + # make sure to just use the right values + mask = left_indexer == -1 + if mask.all(): + key_col = rvals + else: + key_col = Index(lvals).where(~mask, rvals) + + if result._is_label_reference(name): + result[name] = key_col + elif result._is_level_reference(name): + if isinstance(result.index, MultiIndex): + key_col.name = name + idx_list = [ + result.index.get_level_values(level_name) + if level_name != name + else key_col + for level_name in result.index.names + ] + + result.set_index(idx_list, inplace=True) + else: + result.index = Index(key_col, name=name) + else: + result.insert(i, name or "key_{i}".format(i=i), key_col) + + def _get_join_indexers(self): + """ return the join indexers """ + return _get_join_indexers( + self.left_join_keys, self.right_join_keys, sort=self.sort, how=self.how + ) + + def _get_join_info(self): + left_ax = self.left._data.axes[self.axis] + right_ax = self.right._data.axes[self.axis] + + if self.left_index and self.right_index and self.how != "asof": + join_index, left_indexer, right_indexer = left_ax.join( + right_ax, how=self.how, return_indexers=True, sort=self.sort + ) + elif self.right_index and self.how == "left": + join_index, left_indexer, right_indexer = _left_join_on_index( + left_ax, right_ax, self.left_join_keys, sort=self.sort + ) + + elif self.left_index and self.how == "right": + join_index, right_indexer, left_indexer = _left_join_on_index( + right_ax, left_ax, self.right_join_keys, sort=self.sort + ) + else: + (left_indexer, right_indexer) = self._get_join_indexers() + + if self.right_index: + if len(self.left) > 0: + join_index = self._create_join_index( + self.left.index, + self.right.index, + left_indexer, + right_indexer, + how="right", + ) + else: + join_index = self.right.index.take(right_indexer) + left_indexer = np.array([-1] * len(join_index)) + elif self.left_index: + if len(self.right) > 0: + join_index = self._create_join_index( + self.right.index, + self.left.index, + right_indexer, + left_indexer, + how="left", + ) + else: + join_index = self.left.index.take(left_indexer) + right_indexer = np.array([-1] * len(join_index)) + else: + join_index = Index(np.arange(len(left_indexer))) + + if len(join_index) == 0: + join_index = join_index.astype(object) + return join_index, left_indexer, right_indexer + + def _create_join_index( + self, + index: Index, + other_index: Index, + indexer, + other_indexer, + how: str = "left", + ): + """ + Create a join index by rearranging one index to match another + + Parameters + ---------- + index: Index being rearranged + other_index: Index used to supply values not found in index + indexer: how to rearrange index + how: replacement is only necessary if indexer based on other_index + + Returns + ------- + join_index + """ + if self.how in (how, "outer") and not isinstance(other_index, MultiIndex): + # if final index requires values in other_index but not target + # index, indexer may hold missing (-1) values, causing Index.take + # to take the final value in target index. So, we set the last + # element to be the desired fill value. We do not use allow_fill + # and fill_value because it throws a ValueError on integer indices + mask = indexer == -1 + if np.any(mask): + fill_value = na_value_for_dtype(index.dtype, compat=False) + index = index.append(Index([fill_value])) + return index.take(indexer) + + def _get_merge_keys(self): + """ + Note: has side effects (copy/delete key columns) + + Parameters + ---------- + left + right + on + + Returns + ------- + left_keys, right_keys + """ + left_keys = [] + right_keys = [] + join_names = [] + right_drop = [] + left_drop = [] + + left, right = self.left, self.right + + is_lkey = lambda x: is_array_like(x) and len(x) == len(left) + is_rkey = lambda x: is_array_like(x) and len(x) == len(right) + + # Note that pd.merge_asof() has separate 'on' and 'by' parameters. A + # user could, for example, request 'left_index' and 'left_by'. In a + # regular pd.merge(), users cannot specify both 'left_index' and + # 'left_on'. (Instead, users have a MultiIndex). That means the + # self.left_on in this function is always empty in a pd.merge(), but + # a pd.merge_asof(left_index=True, left_by=...) will result in a + # self.left_on array with a None in the middle of it. This requires + # a work-around as designated in the code below. + # See _validate_specification() for where this happens. + + # ugh, spaghetti re #733 + if _any(self.left_on) and _any(self.right_on): + for lk, rk in zip(self.left_on, self.right_on): + if is_lkey(lk): + left_keys.append(lk) + if is_rkey(rk): + right_keys.append(rk) + join_names.append(None) # what to do? + else: + if rk is not None: + right_keys.append(right._get_label_or_level_values(rk)) + join_names.append(rk) + else: + # work-around for merge_asof(right_index=True) + right_keys.append(right.index) + join_names.append(right.index.name) + else: + if not is_rkey(rk): + if rk is not None: + right_keys.append(right._get_label_or_level_values(rk)) + else: + # work-around for merge_asof(right_index=True) + right_keys.append(right.index) + if lk is not None and lk == rk: + # avoid key upcast in corner case (length-0) + if len(left) > 0: + right_drop.append(rk) + else: + left_drop.append(lk) + else: + right_keys.append(rk) + if lk is not None: + left_keys.append(left._get_label_or_level_values(lk)) + join_names.append(lk) + else: + # work-around for merge_asof(left_index=True) + left_keys.append(left.index) + join_names.append(left.index.name) + elif _any(self.left_on): + for k in self.left_on: + if is_lkey(k): + left_keys.append(k) + join_names.append(None) + else: + left_keys.append(left._get_label_or_level_values(k)) + join_names.append(k) + if isinstance(self.right.index, MultiIndex): + right_keys = [ + lev._values.take(lev_codes) + for lev, lev_codes in zip( + self.right.index.levels, self.right.index.codes + ) + ] + else: + right_keys = [self.right.index._values] + elif _any(self.right_on): + for k in self.right_on: + if is_rkey(k): + right_keys.append(k) + join_names.append(None) + else: + right_keys.append(right._get_label_or_level_values(k)) + join_names.append(k) + if isinstance(self.left.index, MultiIndex): + left_keys = [ + lev._values.take(lev_codes) + for lev, lev_codes in zip( + self.left.index.levels, self.left.index.codes + ) + ] + else: + left_keys = [self.left.index._values] + + if left_drop: + self.left = self.left._drop_labels_or_levels(left_drop) + + if right_drop: + self.right = self.right._drop_labels_or_levels(right_drop) + + return left_keys, right_keys, join_names + + def _maybe_coerce_merge_keys(self): + # we have valid mergees but we may have to further + # coerce these if they are originally incompatible types + # + # for example if these are categorical, but are not dtype_equal + # or if we have object and integer dtypes + + for lk, rk, name in zip( + self.left_join_keys, self.right_join_keys, self.join_names + ): + if (len(lk) and not len(rk)) or (not len(lk) and len(rk)): + continue + + lk_is_cat = is_categorical_dtype(lk) + rk_is_cat = is_categorical_dtype(rk) + lk_is_object = is_object_dtype(lk) + rk_is_object = is_object_dtype(rk) + + # if either left or right is a categorical + # then the must match exactly in categories & ordered + if lk_is_cat and rk_is_cat: + if lk.is_dtype_equal(rk): + continue + + elif lk_is_cat or rk_is_cat: + pass + + elif is_dtype_equal(lk.dtype, rk.dtype): + continue + + msg = ( + "You are trying to merge on {lk_dtype} and " + "{rk_dtype} columns. If you wish to proceed " + "you should use pd.concat".format(lk_dtype=lk.dtype, rk_dtype=rk.dtype) + ) + + # if we are numeric, then allow differing + # kinds to proceed, eg. int64 and int8, int and float + # further if we are object, but we infer to + # the same, then proceed + if is_numeric_dtype(lk) and is_numeric_dtype(rk): + if lk.dtype.kind == rk.dtype.kind: + continue + + # check whether ints and floats + elif is_integer_dtype(rk) and is_float_dtype(lk): + if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all(): + warnings.warn( + "You are merging on int and float " + "columns where the float values " + "are not equal to their int " + "representation", + UserWarning, + ) + continue + + elif is_float_dtype(rk) and is_integer_dtype(lk): + if not (rk == rk.astype(lk.dtype))[~np.isnan(rk)].all(): + warnings.warn( + "You are merging on int and float " + "columns where the float values " + "are not equal to their int " + "representation", + UserWarning, + ) + continue + + # let's infer and see if we are ok + elif lib.infer_dtype(lk, skipna=False) == lib.infer_dtype( + rk, skipna=False + ): + continue + + # Check if we are trying to merge on obviously + # incompatible dtypes GH 9780, GH 15800 + + # bool values are coerced to object + elif (lk_is_object and is_bool_dtype(rk)) or ( + is_bool_dtype(lk) and rk_is_object + ): + pass + + # object values are allowed to be merged + elif (lk_is_object and is_numeric_dtype(rk)) or ( + is_numeric_dtype(lk) and rk_is_object + ): + inferred_left = lib.infer_dtype(lk, skipna=False) + inferred_right = lib.infer_dtype(rk, skipna=False) + bool_types = ["integer", "mixed-integer", "boolean", "empty"] + string_types = ["string", "unicode", "mixed", "bytes", "empty"] + + # inferred bool + if inferred_left in bool_types and inferred_right in bool_types: + pass + + # unless we are merging non-string-like with string-like + elif ( + inferred_left in string_types and inferred_right not in string_types + ) or ( + inferred_right in string_types and inferred_left not in string_types + ): + raise ValueError(msg) + + # datetimelikes must match exactly + elif needs_i8_conversion(lk) and not needs_i8_conversion(rk): + raise ValueError(msg) + elif not needs_i8_conversion(lk) and needs_i8_conversion(rk): + raise ValueError(msg) + elif is_datetime64tz_dtype(lk) and not is_datetime64tz_dtype(rk): + raise ValueError(msg) + elif not is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): + raise ValueError(msg) + + elif lk_is_object and rk_is_object: + continue + + # Houston, we have a problem! + # let's coerce to object if the dtypes aren't + # categorical, otherwise coerce to the category + # dtype. If we coerced categories to object, + # then we would lose type information on some + # columns, and end up trying to merge + # incompatible dtypes. See GH 16900. + if name in self.left.columns: + typ = lk.categories.dtype if lk_is_cat else object + self.left = self.left.assign(**{name: self.left[name].astype(typ)}) + if name in self.right.columns: + typ = rk.categories.dtype if rk_is_cat else object + self.right = self.right.assign(**{name: self.right[name].astype(typ)}) + + def _validate_specification(self): + # Hm, any way to make this logic less complicated?? + if self.on is None and self.left_on is None and self.right_on is None: + + if self.left_index and self.right_index: + self.left_on, self.right_on = (), () + elif self.left_index: + if self.right_on is None: + raise MergeError("Must pass right_on or right_index=True") + elif self.right_index: + if self.left_on is None: + raise MergeError("Must pass left_on or left_index=True") + else: + # use the common columns + common_cols = self.left.columns.intersection(self.right.columns) + if len(common_cols) == 0: + raise MergeError( + "No common columns to perform merge on. " + "Merge options: left_on={lon}, right_on={ron}, " + "left_index={lidx}, right_index={ridx}".format( + lon=self.left_on, + ron=self.right_on, + lidx=self.left_index, + ridx=self.right_index, + ) + ) + if not common_cols.is_unique: + raise MergeError(f"Data columns not unique: {repr(common_cols)}") + self.left_on = self.right_on = common_cols + elif self.on is not None: + if self.left_on is not None or self.right_on is not None: + raise MergeError( + 'Can only pass argument "on" OR "left_on" ' + 'and "right_on", not a combination of both.' + ) + self.left_on = self.right_on = self.on + elif self.left_on is not None: + n = len(self.left_on) + if self.right_index: + if len(self.left_on) != self.right.index.nlevels: + raise ValueError( + "len(left_on) must equal the number " + 'of levels in the index of "right"' + ) + self.right_on = [None] * n + elif self.right_on is not None: + n = len(self.right_on) + if self.left_index: + if len(self.right_on) != self.left.index.nlevels: + raise ValueError( + "len(right_on) must equal the number " + 'of levels in the index of "left"' + ) + self.left_on = [None] * n + if len(self.right_on) != len(self.left_on): + raise ValueError("len(right_on) must equal len(left_on)") + + def _validate(self, validate: str): + + # Check uniqueness of each + if self.left_index: + left_unique = self.orig_left.index.is_unique + else: + left_unique = MultiIndex.from_arrays(self.left_join_keys).is_unique + + if self.right_index: + right_unique = self.orig_right.index.is_unique + else: + right_unique = MultiIndex.from_arrays(self.right_join_keys).is_unique + + # Check data integrity + if validate in ["one_to_one", "1:1"]: + if not left_unique and not right_unique: + raise MergeError( + "Merge keys are not unique in either left " + "or right dataset; not a one-to-one merge" + ) + elif not left_unique: + raise MergeError( + "Merge keys are not unique in left dataset; " + "not a one-to-one merge" + ) + elif not right_unique: + raise MergeError( + "Merge keys are not unique in right dataset; " + "not a one-to-one merge" + ) + + elif validate in ["one_to_many", "1:m"]: + if not left_unique: + raise MergeError( + "Merge keys are not unique in left dataset; " + "not a one-to-many merge" + ) + + elif validate in ["many_to_one", "m:1"]: + if not right_unique: + raise MergeError( + "Merge keys are not unique in right dataset; " + "not a many-to-one merge" + ) + + elif validate in ["many_to_many", "m:m"]: + pass + + else: + raise ValueError("Not a valid argument for validate") + + +def _get_join_indexers( + left_keys, right_keys, sort: bool = False, how: str = "inner", **kwargs +): + """ + + Parameters + ---------- + left_keys: ndarray, Index, Series + right_keys: ndarray, Index, Series + sort: bool, default False + how: string {'inner', 'outer', 'left', 'right'}, default 'inner' + + Returns + ------- + tuple of (left_indexer, right_indexer) + indexers into the left_keys, right_keys + + """ + assert len(left_keys) == len( + right_keys + ), "left_key and right_keys must be the same length" + + # get left & right join labels and num. of levels at each location + mapped = ( + _factorize_keys(left_keys[n], right_keys[n], sort=sort) + for n in range(len(left_keys)) + ) + zipped = zip(*mapped) + llab, rlab, shape = [list(x) for x in zipped] + + # get flat i8 keys from label lists + lkey, rkey = _get_join_keys(llab, rlab, shape, sort) + + # factorize keys to a dense i8 space + # `count` is the num. of unique keys + # set(lkey) | set(rkey) == range(count) + lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) + + # preserve left frame order if how == 'left' and sort == False + kwargs = copy.copy(kwargs) + if how == "left": + kwargs["sort"] = sort + join_func = _join_functions[how] + + return join_func(lkey, rkey, count, **kwargs) + + +def _restore_dropped_levels_multijoin( + left: MultiIndex, + right: MultiIndex, + dropped_level_names, + join_index, + lindexer, + rindexer, +): + """ + *this is an internal non-public method* + + Returns the levels, labels and names of a multi-index to multi-index join. + Depending on the type of join, this method restores the appropriate + dropped levels of the joined multi-index. + The method relies on lidx, rindexer which hold the index positions of + left and right, where a join was feasible + + Parameters + ---------- + left : MultiIndex + left index + right : MultiIndex + right index + dropped_level_names : str array + list of non-common level names + join_index : MultiIndex + the index of the join between the + common levels of left and right + lindexer : intp array + left indexer + rindexer : intp array + right indexer + + Returns + ------- + levels : list of Index + levels of combined multiindexes + labels : intp array + labels of combined multiindexes + names : str array + names of combined multiindexes + + """ + + def _convert_to_mulitindex(index) -> MultiIndex: + if isinstance(index, MultiIndex): + return index + else: + return MultiIndex.from_arrays([index.values], names=[index.name]) + + # For multi-multi joins with one overlapping level, + # the returned index if of type Index + # Assure that join_index is of type MultiIndex + # so that dropped levels can be appended + join_index = _convert_to_mulitindex(join_index) + + join_levels = join_index.levels + join_codes = join_index.codes + join_names = join_index.names + + # lindexer and rindexer hold the indexes where the join occurred + # for left and right respectively. If left/right is None then + # the join occurred on all indices of left/right + if lindexer is None: + lindexer = range(left.size) + + if rindexer is None: + rindexer = range(right.size) + + # Iterate through the levels that must be restored + for dropped_level_name in dropped_level_names: + if dropped_level_name in left.names: + idx = left + indexer = lindexer + else: + idx = right + indexer = rindexer + + # The index of the level name to be restored + name_idx = idx.names.index(dropped_level_name) + + restore_levels = idx.levels[name_idx] + # Inject -1 in the codes list where a join was not possible + # IOW indexer[i]=-1 + codes = idx.codes[name_idx] + restore_codes = algos.take_nd(codes, indexer, fill_value=-1) + + join_levels = join_levels + [restore_levels] + join_codes = join_codes + [restore_codes] + join_names = join_names + [dropped_level_name] + + return join_levels, join_codes, join_names + + +class _OrderedMerge(_MergeOperation): + _merge_type = "ordered_merge" + + def __init__( + self, + left, + right, + on=None, + left_on=None, + right_on=None, + left_index: bool = False, + right_index: bool = False, + axis=1, + suffixes=("_x", "_y"), + copy: bool = True, + fill_method=None, + how: str = "outer", + ): + + self.fill_method = fill_method + _MergeOperation.__init__( + self, + left, + right, + on=on, + left_on=left_on, + left_index=left_index, + right_index=right_index, + right_on=right_on, + axis=axis, + how=how, + suffixes=suffixes, + sort=True, # factorize sorts + ) + + def get_result(self): + join_index, left_indexer, right_indexer = self._get_join_info() + + # this is a bit kludgy + ldata, rdata = self.left._data, self.right._data + lsuf, rsuf = self.suffixes + + llabels, rlabels = _items_overlap_with_suffix( + ldata.items, lsuf, rdata.items, rsuf + ) + + if self.fill_method == "ffill": + left_join_indexer = libjoin.ffill_indexer(left_indexer) + right_join_indexer = libjoin.ffill_indexer(right_indexer) + else: + left_join_indexer = left_indexer + right_join_indexer = right_indexer + + lindexers = {1: left_join_indexer} if left_join_indexer is not None else {} + rindexers = {1: right_join_indexer} if right_join_indexer is not None else {} + + result_data = concatenate_block_managers( + [(ldata, lindexers), (rdata, rindexers)], + axes=[llabels.append(rlabels), join_index], + concat_axis=0, + copy=self.copy, + ) + + typ = self.left._constructor + result = typ(result_data).__finalize__(self, method=self._merge_type) + + self._maybe_add_join_keys(result, left_indexer, right_indexer) + + return result + + +def _asof_function(direction: str): + name = "asof_join_{dir}".format(dir=direction) + return getattr(libjoin, name, None) + + +def _asof_by_function(direction: str): + name = "asof_join_{dir}_on_X_by_Y".format(dir=direction) + return getattr(libjoin, name, None) + + +_type_casters = { + "int64_t": ensure_int64, + "double": ensure_float64, + "object": ensure_object, +} + + +def _get_cython_type_upcast(dtype): + """ Upcast a dtype to 'int64_t', 'double', or 'object' """ + if is_integer_dtype(dtype): + return "int64_t" + elif is_float_dtype(dtype): + return "double" + else: + return "object" + + +class _AsOfMerge(_OrderedMerge): + _merge_type = "asof_merge" + + def __init__( + self, + left, + right, + on=None, + left_on=None, + right_on=None, + left_index: bool = False, + right_index: bool = False, + by=None, + left_by=None, + right_by=None, + axis=1, + suffixes=("_x", "_y"), + copy: bool = True, + fill_method=None, + how: str = "asof", + tolerance=None, + allow_exact_matches: bool = True, + direction: str = "backward", + ): + + self.by = by + self.left_by = left_by + self.right_by = right_by + self.tolerance = tolerance + self.allow_exact_matches = allow_exact_matches + self.direction = direction + + _OrderedMerge.__init__( + self, + left, + right, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + axis=axis, + how=how, + suffixes=suffixes, + fill_method=fill_method, + ) + + def _validate_specification(self): + super()._validate_specification() + + # we only allow on to be a single item for on + if len(self.left_on) != 1 and not self.left_index: + raise MergeError("can only asof on a key for left") + + if len(self.right_on) != 1 and not self.right_index: + raise MergeError("can only asof on a key for right") + + if self.left_index and isinstance(self.left.index, MultiIndex): + raise MergeError("left can only have one index") + + if self.right_index and isinstance(self.right.index, MultiIndex): + raise MergeError("right can only have one index") + + # set 'by' columns + if self.by is not None: + if self.left_by is not None or self.right_by is not None: + raise MergeError("Can only pass by OR left_by and right_by") + self.left_by = self.right_by = self.by + if self.left_by is None and self.right_by is not None: + raise MergeError("missing left_by") + if self.left_by is not None and self.right_by is None: + raise MergeError("missing right_by") + + # add 'by' to our key-list so we can have it in the + # output as a key + if self.left_by is not None: + if not is_list_like(self.left_by): + self.left_by = [self.left_by] + if not is_list_like(self.right_by): + self.right_by = [self.right_by] + + if len(self.left_by) != len(self.right_by): + raise MergeError("left_by and right_by must be same length") + + self.left_on = self.left_by + list(self.left_on) + self.right_on = self.right_by + list(self.right_on) + + # check 'direction' is valid + if self.direction not in ["backward", "forward", "nearest"]: + raise MergeError( + "direction invalid: {direction}".format(direction=self.direction) + ) + + @property + def _asof_key(self): + """ This is our asof key, the 'on' """ + return self.left_on[-1] + + def _get_merge_keys(self): + + # note this function has side effects + (left_join_keys, right_join_keys, join_names) = super()._get_merge_keys() + + # validate index types are the same + for i, (lk, rk) in enumerate(zip(left_join_keys, right_join_keys)): + if not is_dtype_equal(lk.dtype, rk.dtype): + if is_categorical_dtype(lk.dtype) and is_categorical_dtype(rk.dtype): + # The generic error message is confusing for categoricals. + # + # In this function, the join keys include both the original + # ones of the merge_asof() call, and also the keys passed + # to its by= argument. Unordered but equal categories + # are not supported for the former, but will fail + # later with a ValueError, so we don't *need* to check + # for them here. + msg = ( + "incompatible merge keys [{i}] {lkdtype} and " + "{rkdtype}, both sides category, but not equal ones".format( + i=i, lkdtype=repr(lk.dtype), rkdtype=repr(rk.dtype) + ) + ) + else: + msg = ( + "incompatible merge keys [{i}] {lkdtype} and " + "{rkdtype}, must be the same type".format( + i=i, lkdtype=repr(lk.dtype), rkdtype=repr(rk.dtype) + ) + ) + raise MergeError(msg) + + # validate tolerance; datetime.timedelta or Timedelta if we have a DTI + if self.tolerance is not None: + + if self.left_index: + lt = self.left.index + else: + lt = left_join_keys[-1] + + msg = ( + "incompatible tolerance {tolerance}, must be compat " + "with type {lkdtype}".format( + tolerance=type(self.tolerance), lkdtype=repr(lt.dtype) + ) + ) + + if needs_i8_conversion(lt): + if not isinstance(self.tolerance, datetime.timedelta): + raise MergeError(msg) + if self.tolerance < Timedelta(0): + raise MergeError("tolerance must be positive") + + elif is_integer_dtype(lt): + if not is_integer(self.tolerance): + raise MergeError(msg) + if self.tolerance < 0: + raise MergeError("tolerance must be positive") + + elif is_float_dtype(lt): + if not is_number(self.tolerance): + raise MergeError(msg) + if self.tolerance < 0: + raise MergeError("tolerance must be positive") + + else: + raise MergeError("key must be integer, timestamp or float") + + # validate allow_exact_matches + if not is_bool(self.allow_exact_matches): + msg = "allow_exact_matches must be boolean, passed {passed}" + raise MergeError(msg.format(passed=self.allow_exact_matches)) + + return left_join_keys, right_join_keys, join_names + + def _get_join_indexers(self): + """ return the join indexers """ + + def flip(xs): + """ unlike np.transpose, this returns an array of tuples """ + xs = [ + x if not is_extension_array_dtype(x) else x._ndarray_values for x in xs + ] + labels = list(string.ascii_lowercase[: len(xs)]) + dtypes = [x.dtype for x in xs] + labeled_dtypes = list(zip(labels, dtypes)) + return np.array(list(zip(*xs)), labeled_dtypes) + + # values to compare + left_values = ( + self.left.index.values if self.left_index else self.left_join_keys[-1] + ) + right_values = ( + self.right.index.values if self.right_index else self.right_join_keys[-1] + ) + tolerance = self.tolerance + + # we require sortedness and non-null values in the join keys + msg_sorted = "{side} keys must be sorted" + msg_missings = "Merge keys contain null values on {side} side" + + if not Index(left_values).is_monotonic: + if isna(left_values).any(): + raise ValueError(msg_missings.format(side="left")) + else: + raise ValueError(msg_sorted.format(side="left")) + + if not Index(right_values).is_monotonic: + if isna(right_values).any(): + raise ValueError(msg_missings.format(side="right")) + else: + raise ValueError(msg_sorted.format(side="right")) + + # initial type conversion as needed + if needs_i8_conversion(left_values): + left_values = left_values.view("i8") + right_values = right_values.view("i8") + if tolerance is not None: + tolerance = Timedelta(tolerance) + tolerance = tolerance.value + + # a "by" parameter requires special handling + if self.left_by is not None: + # remove 'on' parameter from values if one existed + if self.left_index and self.right_index: + left_by_values = self.left_join_keys + right_by_values = self.right_join_keys + else: + left_by_values = self.left_join_keys[0:-1] + right_by_values = self.right_join_keys[0:-1] + + # get tuple representation of values if more than one + if len(left_by_values) == 1: + left_by_values = left_by_values[0] + right_by_values = right_by_values[0] + else: + left_by_values = flip(left_by_values) + right_by_values = flip(right_by_values) + + # upcast 'by' parameter because HashTable is limited + by_type = _get_cython_type_upcast(left_by_values.dtype) + by_type_caster = _type_casters[by_type] + left_by_values = by_type_caster(left_by_values) + right_by_values = by_type_caster(right_by_values) + + # choose appropriate function by type + func = _asof_by_function(self.direction) + return func( + left_values, + right_values, + left_by_values, + right_by_values, + self.allow_exact_matches, + tolerance, + ) + else: + # choose appropriate function by type + func = _asof_function(self.direction) + return func(left_values, right_values, self.allow_exact_matches, tolerance) + + +def _get_multiindex_indexer(join_keys, index: MultiIndex, sort: bool): + + # left & right join labels and num. of levels at each location + mapped = ( + _factorize_keys(index.levels[n], join_keys[n], sort=sort) + for n in range(index.nlevels) + ) + zipped = zip(*mapped) + rcodes, lcodes, shape = [list(x) for x in zipped] + if sort: + rcodes = list(map(np.take, rcodes, index.codes)) + else: + i8copy = lambda a: a.astype("i8", subok=False, copy=True) + rcodes = list(map(i8copy, index.codes)) + + # fix right labels if there were any nulls + for i in range(len(join_keys)): + mask = index.codes[i] == -1 + if mask.any(): + # check if there already was any nulls at this location + # if there was, it is factorized to `shape[i] - 1` + a = join_keys[i][lcodes[i] == shape[i] - 1] + if a.size == 0 or not a[0] != a[0]: + shape[i] += 1 + + rcodes[i][mask] = shape[i] - 1 + + # get flat i8 join keys + lkey, rkey = _get_join_keys(lcodes, rcodes, shape, sort) + + # factorize keys to a dense i8 space + lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) + + return libjoin.left_outer_join(lkey, rkey, count, sort=sort) + + +def _get_single_indexer(join_key, index, sort: bool = False): + left_key, right_key, count = _factorize_keys(join_key, index, sort=sort) + + left_indexer, right_indexer = libjoin.left_outer_join( + ensure_int64(left_key), ensure_int64(right_key), count, sort=sort + ) + + return left_indexer, right_indexer + + +def _left_join_on_index(left_ax: Index, right_ax: Index, join_keys, sort: bool = False): + if len(join_keys) > 1: + if not ( + (isinstance(right_ax, MultiIndex) and len(join_keys) == right_ax.nlevels) + ): + raise AssertionError( + "If more than one join key is given then " + "'right_ax' must be a MultiIndex and the " + "number of join keys must be the number of " + "levels in right_ax" + ) + + left_indexer, right_indexer = _get_multiindex_indexer( + join_keys, right_ax, sort=sort + ) + else: + jkey = join_keys[0] + + left_indexer, right_indexer = _get_single_indexer(jkey, right_ax, sort=sort) + + if sort or len(left_ax) != len(left_indexer): + # if asked to sort or there are 1-to-many matches + join_index = left_ax.take(left_indexer) + return join_index, left_indexer, right_indexer + + # left frame preserves order & length of its index + return left_ax, None, right_indexer + + +def _right_outer_join(x, y, max_groups): + right_indexer, left_indexer = libjoin.left_outer_join(y, x, max_groups) + return left_indexer, right_indexer + + +_join_functions = { + "inner": libjoin.inner_join, + "left": libjoin.left_outer_join, + "right": _right_outer_join, + "outer": libjoin.full_outer_join, +} + + +def _factorize_keys(lk, rk, sort=True): + # Some pre-processing for non-ndarray lk / rk + if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): + lk = getattr(lk, "_values", lk)._data + rk = getattr(rk, "_values", rk)._data + + elif ( + is_categorical_dtype(lk) and is_categorical_dtype(rk) and lk.is_dtype_equal(rk) + ): + if lk.categories.equals(rk.categories): + # if we exactly match in categories, allow us to factorize on codes + rk = rk.codes + else: + # Same categories in different orders -> recode + rk = _recode_for_categories(rk.codes, rk.categories, lk.categories) + + lk = ensure_int64(lk.codes) + rk = ensure_int64(rk) + + elif ( + is_extension_array_dtype(lk.dtype) + and is_extension_array_dtype(rk.dtype) + and lk.dtype == rk.dtype + ): + lk, _ = lk._values_for_factorize() + rk, _ = rk._values_for_factorize() + + if is_integer_dtype(lk) and is_integer_dtype(rk): + # GH#23917 TODO: needs tests for case where lk is integer-dtype + # and rk is datetime-dtype + klass = libhashtable.Int64Factorizer + lk = ensure_int64(com.values_from_object(lk)) + rk = ensure_int64(com.values_from_object(rk)) + elif issubclass(lk.dtype.type, (np.timedelta64, np.datetime64)) and issubclass( + rk.dtype.type, (np.timedelta64, np.datetime64) + ): + # GH#23917 TODO: Needs tests for non-matching dtypes + klass = libhashtable.Int64Factorizer + lk = ensure_int64(com.values_from_object(lk)) + rk = ensure_int64(com.values_from_object(rk)) + else: + klass = libhashtable.Factorizer + lk = ensure_object(lk) + rk = ensure_object(rk) + + rizer = klass(max(len(lk), len(rk))) + + llab = rizer.factorize(lk) + rlab = rizer.factorize(rk) + + count = rizer.get_count() + + if sort: + uniques = rizer.uniques.to_array() + llab, rlab = _sort_labels(uniques, llab, rlab) + + # NA group + lmask = llab == -1 + lany = lmask.any() + rmask = rlab == -1 + rany = rmask.any() + + if lany or rany: + if lany: + np.putmask(llab, lmask, count) + if rany: + np.putmask(rlab, rmask, count) + count += 1 + + return llab, rlab, count + + +def _sort_labels(uniques: np.ndarray, left, right): + if not isinstance(uniques, np.ndarray): + # tuplesafe + uniques = Index(uniques).values + + llength = len(left) + labels = np.concatenate([left, right]) + + _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1) + new_labels = ensure_int64(new_labels) + new_left, new_right = new_labels[:llength], new_labels[llength:] + + return new_left, new_right + + +def _get_join_keys(llab, rlab, shape, sort: bool): + + # how many levels can be done without overflow + pred = lambda i: not is_int64_overflow_possible(shape[:i]) + nlev = next(filter(pred, range(len(shape), 0, -1))) + + # get keys for the first `nlev` levels + stride = np.prod(shape[1:nlev], dtype="i8") + lkey = stride * llab[0].astype("i8", subok=False, copy=False) + rkey = stride * rlab[0].astype("i8", subok=False, copy=False) + + for i in range(1, nlev): + with np.errstate(divide="ignore"): + stride //= shape[i] + lkey += llab[i] * stride + rkey += rlab[i] * stride + + if nlev == len(shape): # all done! + return lkey, rkey + + # densify current keys to avoid overflow + lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort) + + llab = [lkey] + llab[nlev:] + rlab = [rkey] + rlab[nlev:] + shape = [count] + shape[nlev:] + + return _get_join_keys(llab, rlab, shape, sort) + + +def _should_fill(lname, rname) -> bool: + if not isinstance(lname, str) or not isinstance(rname, str): + return True + return lname == rname + + +def _any(x) -> bool: + return x is not None and com.any_not_none(*x) + + +def _validate_operand(obj: FrameOrSeries) -> "DataFrame": + if isinstance(obj, ABCDataFrame): + return obj + elif isinstance(obj, ABCSeries): + if obj.name is None: + raise ValueError("Cannot merge a Series without a name") + else: + return obj.to_frame() + else: + raise TypeError( + "Can only merge Series or DataFrame objects, " + "a {obj} was passed".format(obj=type(obj)) + ) + + +def _items_overlap_with_suffix(left: Index, lsuffix, right: Index, rsuffix): + """ + If two indices overlap, add suffixes to overlapping entries. + + If corresponding suffix is empty, the entry is simply converted to string. + + """ + to_rename = left.intersection(right) + if len(to_rename) == 0: + return left, right + + if not lsuffix and not rsuffix: + raise ValueError( + "columns overlap but no suffix specified: " + "{rename}".format(rename=to_rename) + ) + + def renamer(x, suffix): + """ + Rename the left and right indices. + + If there is overlap, and suffix is not None, add + suffix, otherwise, leave it as-is. + + Parameters + ---------- + x : original column name + suffix : str or None + + Returns + ------- + x : renamed column name + """ + if x in to_rename and suffix is not None: + return "{x}{suffix}".format(x=x, suffix=suffix) + return x + + lrenamer = partial(renamer, suffix=lsuffix) + rrenamer = partial(renamer, suffix=rsuffix) + + return (_transform_index(left, lrenamer), _transform_index(right, rrenamer)) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py new file mode 100644 index 00000000..d91bf3e2 --- /dev/null +++ b/pandas/core/reshape/pivot.py @@ -0,0 +1,707 @@ +from typing import TYPE_CHECKING, Callable, Dict, List, Tuple, Union + +import numpy as np + +from pandas.util._decorators import Appender, Substitution + +from pandas.core.dtypes.cast import maybe_downcast_to_dtype +from pandas.core.dtypes.common import is_integer_dtype, is_list_like, is_scalar +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries + +import pandas.core.common as com +from pandas.core.frame import _shared_docs +from pandas.core.groupby import Grouper +from pandas.core.indexes.api import Index, MultiIndex, get_objs_combined_axis +from pandas.core.reshape.concat import concat +from pandas.core.reshape.util import cartesian_product +from pandas.core.series import Series + +if TYPE_CHECKING: + from pandas import DataFrame + + +# Note: We need to make sure `frame` is imported before `pivot`, otherwise +# _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency +@Substitution("\ndata : DataFrame") +@Appender(_shared_docs["pivot_table"], indents=1) +def pivot_table( + data, + values=None, + index=None, + columns=None, + aggfunc="mean", + fill_value=None, + margins=False, + dropna=True, + margins_name="All", + observed=False, +) -> "DataFrame": + index = _convert_by(index) + columns = _convert_by(columns) + + if isinstance(aggfunc, list): + pieces: List[DataFrame] = [] + keys = [] + for func in aggfunc: + table = pivot_table( + data, + values=values, + index=index, + columns=columns, + fill_value=fill_value, + aggfunc=func, + margins=margins, + dropna=dropna, + margins_name=margins_name, + observed=observed, + ) + pieces.append(table) + keys.append(getattr(func, "__name__", func)) + + return concat(pieces, keys=keys, axis=1) + + keys = index + columns + + values_passed = values is not None + if values_passed: + if is_list_like(values): + values_multi = True + values = list(values) + else: + values_multi = False + values = [values] + + # GH14938 Make sure value labels are in data + for i in values: + if i not in data: + raise KeyError(i) + + to_filter = [] + for x in keys + values: + if isinstance(x, Grouper): + x = x.key + try: + if x in data: + to_filter.append(x) + except TypeError: + pass + if len(to_filter) < len(data.columns): + data = data[to_filter] + + else: + values = data.columns + for key in keys: + try: + values = values.drop(key) + except (TypeError, ValueError, KeyError): + pass + values = list(values) + + grouped = data.groupby(keys, observed=observed) + agged = grouped.agg(aggfunc) + if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): + agged = agged.dropna(how="all") + + # gh-21133 + # we want to down cast if + # the original values are ints + # as we grouped with a NaN value + # and then dropped, coercing to floats + for v in values: + if ( + v in data + and is_integer_dtype(data[v]) + and v in agged + and not is_integer_dtype(agged[v]) + ): + agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype) + + table = agged + if table.index.nlevels > 1: + # Related GH #17123 + # If index_names are integers, determine whether the integers refer + # to the level position or name. + index_names = agged.index.names[: len(index)] + to_unstack = [] + for i in range(len(index), len(keys)): + name = agged.index.names[i] + if name is None or name in index_names: + to_unstack.append(i) + else: + to_unstack.append(name) + table = agged.unstack(to_unstack) + + if not dropna: + if table.index.nlevels > 1: + m = MultiIndex.from_arrays( + cartesian_product(table.index.levels), names=table.index.names + ) + table = table.reindex(m, axis=0) + + if table.columns.nlevels > 1: + m = MultiIndex.from_arrays( + cartesian_product(table.columns.levels), names=table.columns.names + ) + table = table.reindex(m, axis=1) + + if isinstance(table, ABCDataFrame): + table = table.sort_index(axis=1) + + if fill_value is not None: + _table = table.fillna(fill_value, downcast="infer") + assert _table is not None # needed for mypy + table = _table + + if margins: + if dropna: + data = data[data.notna().all(axis=1)] + table = _add_margins( + table, + data, + values, + rows=index, + cols=columns, + aggfunc=aggfunc, + observed=dropna, + margins_name=margins_name, + fill_value=fill_value, + ) + + # discard the top level + if ( + values_passed + and not values_multi + and not table.empty + and (table.columns.nlevels > 1) + ): + table = table[values[0]] + + if len(index) == 0 and len(columns) > 0: + table = table.T + + # GH 15193 Make sure empty columns are removed if dropna=True + if isinstance(table, ABCDataFrame) and dropna: + table = table.dropna(how="all", axis=1) + + return table + + +def _add_margins( + table: Union["Series", "DataFrame"], + data, + values, + rows, + cols, + aggfunc, + observed=None, + margins_name: str = "All", + fill_value=None, +): + if not isinstance(margins_name, str): + raise ValueError("margins_name argument must be a string") + + msg = 'Conflicting name "{name}" in margins'.format(name=margins_name) + for level in table.index.names: + if margins_name in table.index.get_level_values(level): + raise ValueError(msg) + + grand_margin = _compute_grand_margin(data, values, aggfunc, margins_name) + + if table.ndim == 2: + # i.e. DataFramae + for level in table.columns.names[1:]: + if margins_name in table.columns.get_level_values(level): + raise ValueError(msg) + + key: Union[str, Tuple[str, ...]] + if len(rows) > 1: + key = (margins_name,) + ("",) * (len(rows) - 1) + else: + key = margins_name + + if not values and isinstance(table, ABCSeries): + # If there are no values and the table is a series, then there is only + # one column in the data. Compute grand margin and return it. + return table.append(Series({key: grand_margin[margins_name]})) + + elif values: + marginal_result_set = _generate_marginal_results( + table, + data, + values, + rows, + cols, + aggfunc, + observed, + grand_margin, + margins_name, + ) + if not isinstance(marginal_result_set, tuple): + return marginal_result_set + result, margin_keys, row_margin = marginal_result_set + else: + # no values, and table is a DataFrame + assert isinstance(table, ABCDataFrame) + marginal_result_set = _generate_marginal_results_without_values( + table, data, rows, cols, aggfunc, observed, margins_name + ) + if not isinstance(marginal_result_set, tuple): + return marginal_result_set + result, margin_keys, row_margin = marginal_result_set + + row_margin = row_margin.reindex(result.columns, fill_value=fill_value) + # populate grand margin + for k in margin_keys: + if isinstance(k, str): + row_margin[k] = grand_margin[k] + else: + row_margin[k] = grand_margin[k[0]] + + from pandas import DataFrame + + margin_dummy = DataFrame(row_margin, columns=[key]).T + + row_names = result.index.names + try: + # check the result column and leave floats + for dtype in set(result.dtypes): + cols = result.select_dtypes([dtype]).columns + margin_dummy[cols] = margin_dummy[cols].apply( + maybe_downcast_to_dtype, args=(dtype,) + ) + result = result.append(margin_dummy) + except TypeError: + + # we cannot reshape, so coerce the axis + result.index = result.index._to_safe_for_reshape() + result = result.append(margin_dummy) + result.index.names = row_names + + return result + + +def _compute_grand_margin(data, values, aggfunc, margins_name: str = "All"): + + if values: + grand_margin = {} + for k, v in data[values].items(): + try: + if isinstance(aggfunc, str): + grand_margin[k] = getattr(v, aggfunc)() + elif isinstance(aggfunc, dict): + if isinstance(aggfunc[k], str): + grand_margin[k] = getattr(v, aggfunc[k])() + else: + grand_margin[k] = aggfunc[k](v) + else: + grand_margin[k] = aggfunc(v) + except TypeError: + pass + return grand_margin + else: + return {margins_name: aggfunc(data.index)} + + +def _generate_marginal_results( + table, + data, + values, + rows, + cols, + aggfunc, + observed, + grand_margin, + margins_name: str = "All", +): + if len(cols) > 0: + # need to "interleave" the margins + table_pieces = [] + margin_keys = [] + + def _all_key(key): + return (key, margins_name) + ("",) * (len(cols) - 1) + + if len(rows) > 0: + margin = data[rows + values].groupby(rows, observed=observed).agg(aggfunc) + cat_axis = 1 + + for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): + all_key = _all_key(key) + + # we are going to mutate this, so need to copy! + piece = piece.copy() + try: + piece[all_key] = margin[key] + except TypeError: + + # we cannot reshape, so coerce the axis + piece.set_axis( + piece._get_axis(cat_axis)._to_safe_for_reshape(), + axis=cat_axis, + inplace=True, + ) + piece[all_key] = margin[key] + + table_pieces.append(piece) + margin_keys.append(all_key) + else: + margin = grand_margin + cat_axis = 0 + for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): + all_key = _all_key(key) + table_pieces.append(piece) + table_pieces.append(Series(margin[key], index=[all_key])) + margin_keys.append(all_key) + + result = concat(table_pieces, axis=cat_axis) + + if len(rows) == 0: + return result + else: + result = table + margin_keys = table.columns + + if len(cols) > 0: + row_margin = data[cols + values].groupby(cols, observed=observed).agg(aggfunc) + row_margin = row_margin.stack() + + # slight hack + new_order = [len(cols)] + list(range(len(cols))) + row_margin.index = row_margin.index.reorder_levels(new_order) + else: + row_margin = Series(np.nan, index=result.columns) + + return result, margin_keys, row_margin + + +def _generate_marginal_results_without_values( + table: "DataFrame", data, rows, cols, aggfunc, observed, margins_name: str = "All" +): + if len(cols) > 0: + # need to "interleave" the margins + margin_keys = [] + + def _all_key(): + if len(cols) == 1: + return margins_name + return (margins_name,) + ("",) * (len(cols) - 1) + + if len(rows) > 0: + margin = data[rows].groupby(rows, observed=observed).apply(aggfunc) + all_key = _all_key() + table[all_key] = margin + result = table + margin_keys.append(all_key) + + else: + margin = data.groupby(level=0, axis=0, observed=observed).apply(aggfunc) + all_key = _all_key() + table[all_key] = margin + result = table + margin_keys.append(all_key) + return result + else: + result = table + margin_keys = table.columns + + if len(cols): + row_margin = data[cols].groupby(cols, observed=observed).apply(aggfunc) + else: + row_margin = Series(np.nan, index=result.columns) + + return result, margin_keys, row_margin + + +def _convert_by(by): + if by is None: + by = [] + elif ( + is_scalar(by) + or isinstance(by, (np.ndarray, Index, ABCSeries, Grouper)) + or hasattr(by, "__call__") + ): + by = [by] + else: + by = list(by) + return by + + +@Substitution("\ndata : DataFrame") +@Appender(_shared_docs["pivot"], indents=1) +def pivot(data: "DataFrame", index=None, columns=None, values=None) -> "DataFrame": + if values is None: + cols = [columns] if index is None else [index, columns] + append = index is None + indexed = data.set_index(cols, append=append) + else: + if index is None: + index = data.index + else: + index = data[index] + index = MultiIndex.from_arrays([index, data[columns]]) + + if is_list_like(values) and not isinstance(values, tuple): + # Exclude tuple because it is seen as a single column name + indexed = data._constructor( + data[values].values, index=index, columns=values + ) + else: + indexed = data._constructor_sliced(data[values].values, index=index) + return indexed.unstack(columns) + + +def crosstab( + index, + columns, + values=None, + rownames=None, + colnames=None, + aggfunc=None, + margins=False, + margins_name: str = "All", + dropna: bool = True, + normalize=False, +) -> "DataFrame": + """ + Compute a simple cross tabulation of two (or more) factors. By default + computes a frequency table of the factors unless an array of values and an + aggregation function are passed. + + Parameters + ---------- + index : array-like, Series, or list of arrays/Series + Values to group by in the rows. + columns : array-like, Series, or list of arrays/Series + Values to group by in the columns. + values : array-like, optional + Array of values to aggregate according to the factors. + Requires `aggfunc` be specified. + rownames : sequence, default None + If passed, must match number of row arrays passed. + colnames : sequence, default None + If passed, must match number of column arrays passed. + aggfunc : function, optional + If specified, requires `values` be specified as well. + margins : bool, default False + Add row/column margins (subtotals). + margins_name : str, default 'All' + Name of the row/column that will contain the totals + when margins is True. + + .. versionadded:: 0.21.0 + + dropna : bool, default True + Do not include columns whose entries are all NaN. + normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False + Normalize by dividing all values by the sum of values. + + - If passed 'all' or `True`, will normalize over all values. + - If passed 'index' will normalize over each row. + - If passed 'columns' will normalize over each column. + - If margins is `True`, will also normalize margin values. + + Returns + ------- + DataFrame + Cross tabulation of the data. + + See Also + -------- + DataFrame.pivot : Reshape data based on column values. + pivot_table : Create a pivot table as a DataFrame. + + Notes + ----- + Any Series passed will have their name attributes used unless row or column + names for the cross-tabulation are specified. + + Any input passed containing Categorical data will have **all** of its + categories included in the cross-tabulation, even if the actual data does + not contain any instances of a particular category. + + In the event that there aren't overlapping indexes an empty DataFrame will + be returned. + + Examples + -------- + >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar", + ... "bar", "bar", "foo", "foo", "foo"], dtype=object) + >>> b = np.array(["one", "one", "one", "two", "one", "one", + ... "one", "two", "two", "two", "one"], dtype=object) + >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny", + ... "shiny", "dull", "shiny", "shiny", "shiny"], + ... dtype=object) + >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) + b one two + c dull shiny dull shiny + a + bar 1 2 1 0 + foo 2 2 1 2 + + Here 'c' and 'f' are not represented in the data and will not be + shown in the output because dropna is True by default. Set + dropna=False to preserve categories with no data. + + >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) + >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) + >>> pd.crosstab(foo, bar) + col_0 d e + row_0 + a 1 0 + b 0 1 + >>> pd.crosstab(foo, bar, dropna=False) + col_0 d e f + row_0 + a 1 0 0 + b 0 1 0 + c 0 0 0 + """ + + index = com.maybe_make_list(index) + columns = com.maybe_make_list(columns) + + rownames = _get_names(index, rownames, prefix="row") + colnames = _get_names(columns, colnames, prefix="col") + + common_idx = None + pass_objs = [x for x in index + columns if isinstance(x, (ABCSeries, ABCDataFrame))] + if pass_objs: + common_idx = get_objs_combined_axis(pass_objs, intersect=True, sort=False) + + data: Dict = {} + data.update(zip(rownames, index)) + data.update(zip(colnames, columns)) + + if values is None and aggfunc is not None: + raise ValueError("aggfunc cannot be used without values.") + + if values is not None and aggfunc is None: + raise ValueError("values cannot be used without an aggfunc.") + + from pandas import DataFrame + + df = DataFrame(data, index=common_idx) + if values is None: + df["__dummy__"] = 0 + kwargs = {"aggfunc": len, "fill_value": 0} + else: + df["__dummy__"] = values + kwargs = {"aggfunc": aggfunc} + + table = df.pivot_table( + "__dummy__", + index=rownames, + columns=colnames, + margins=margins, + margins_name=margins_name, + dropna=dropna, + **kwargs, + ) + + # Post-process + if normalize is not False: + table = _normalize( + table, normalize=normalize, margins=margins, margins_name=margins_name + ) + + return table + + +def _normalize(table, normalize, margins: bool, margins_name="All"): + + if not isinstance(normalize, (bool, str)): + axis_subs = {0: "index", 1: "columns"} + try: + normalize = axis_subs[normalize] + except KeyError: + raise ValueError("Not a valid normalize argument") + + if margins is False: + + # Actual Normalizations + normalizers: Dict[Union[bool, str], Callable] = { + "all": lambda x: x / x.sum(axis=1).sum(axis=0), + "columns": lambda x: x / x.sum(), + "index": lambda x: x.div(x.sum(axis=1), axis=0), + } + + normalizers[True] = normalizers["all"] + + try: + f = normalizers[normalize] + except KeyError: + raise ValueError("Not a valid normalize argument") + + table = f(table) + table = table.fillna(0) + + elif margins is True: + # keep index and column of pivoted table + table_index = table.index + table_columns = table.columns + + # check if margin name is in (for MI cases) or equal to last + # index/column and save the column and index margin + if (margins_name not in table.iloc[-1, :].name) | ( + margins_name != table.iloc[:, -1].name + ): + raise ValueError( + "{mname} not in pivoted DataFrame".format(mname=margins_name) + ) + column_margin = table.iloc[:-1, -1] + index_margin = table.iloc[-1, :-1] + + # keep the core table + table = table.iloc[:-1, :-1] + + # Normalize core + table = _normalize(table, normalize=normalize, margins=False) + + # Fix Margins + if normalize == "columns": + column_margin = column_margin / column_margin.sum() + table = concat([table, column_margin], axis=1) + table = table.fillna(0) + table.columns = table_columns + + elif normalize == "index": + index_margin = index_margin / index_margin.sum() + table = table.append(index_margin) + table = table.fillna(0) + table.index = table_index + + elif normalize == "all" or normalize is True: + column_margin = column_margin / column_margin.sum() + index_margin = index_margin / index_margin.sum() + index_margin.loc[margins_name] = 1 + table = concat([table, column_margin], axis=1) + table = table.append(index_margin) + + table = table.fillna(0) + table.index = table_index + table.columns = table_columns + + else: + raise ValueError("Not a valid normalize argument") + + else: + raise ValueError("Not a valid margins argument") + + return table + + +def _get_names(arrs, names, prefix: str = "row"): + if names is None: + names = [] + for i, arr in enumerate(arrs): + if isinstance(arr, ABCSeries) and arr.name is not None: + names.append(arr.name) + else: + names.append("{prefix}_{i}".format(prefix=prefix, i=i)) + else: + if len(names) != len(arrs): + raise AssertionError("arrays and names must have the same length") + if not isinstance(names, list): + names = list(names) + + return names diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py new file mode 100644 index 00000000..97f416e3 --- /dev/null +++ b/pandas/core/reshape/reshape.py @@ -0,0 +1,1080 @@ +from functools import partial +import itertools +from typing import List + +import numpy as np + +import pandas._libs.algos as libalgos +import pandas._libs.reshape as libreshape +from pandas._libs.sparse import IntIndex + +from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.common import ( + ensure_platform_int, + is_bool_dtype, + is_extension_array_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_object_dtype, + needs_i8_conversion, +) +from pandas.core.dtypes.missing import notna + +import pandas.core.algorithms as algos +from pandas.core.arrays import SparseArray +from pandas.core.arrays.categorical import factorize_from_iterable +from pandas.core.construction import extract_array +from pandas.core.frame import DataFrame +from pandas.core.indexes.api import Index, MultiIndex +from pandas.core.series import Series +from pandas.core.sorting import ( + compress_group_index, + decons_obs_group_ids, + get_compressed_ids, + get_group_index, +) + + +class _Unstacker: + """ + Helper class to unstack data / pivot with multi-level index + + Parameters + ---------- + values : ndarray + Values of DataFrame to "Unstack" + index : object + Pandas ``Index`` + level : int or str, default last level + Level to "unstack". Accepts a name for the level. + value_columns : Index, optional + Pandas ``Index`` or ``MultiIndex`` object if unstacking a DataFrame + fill_value : scalar, optional + Default value to fill in missing values if subgroups do not have the + same set of labels. By default, missing values will be replaced with + the default fill value for that data type, NaN for float, NaT for + datetimelike, etc. For integer types, by default data will converted to + float and missing values will be set to NaN. + constructor : object + Pandas ``DataFrame`` or subclass used to create unstacked + response. If None, DataFrame will be used. + + Examples + -------- + >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), + ... ('two', 'a'), ('two', 'b')]) + >>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index) + >>> s + one a 1 + b 2 + two a 3 + b 4 + dtype: int64 + + >>> s.unstack(level=-1) + a b + one 1 2 + two 3 4 + + >>> s.unstack(level=0) + one two + a 1 3 + b 2 4 + + Returns + ------- + unstacked : DataFrame + """ + + def __init__( + self, + values: np.ndarray, + index, + level=-1, + value_columns=None, + fill_value=None, + constructor=None, + ): + + if values.ndim == 1: + values = values[:, np.newaxis] + self.values = values + self.value_columns = value_columns + self.fill_value = fill_value + + if constructor is None: + constructor = DataFrame + self.constructor = constructor + + if value_columns is None and values.shape[1] != 1: # pragma: no cover + raise ValueError("must pass column labels for multi-column data") + + self.index = index.remove_unused_levels() + + self.level = self.index._get_level_number(level) + + # when index includes `nan`, need to lift levels/strides by 1 + self.lift = 1 if -1 in self.index.codes[self.level] else 0 + + self.new_index_levels = list(self.index.levels) + self.new_index_names = list(self.index.names) + + self.removed_name = self.new_index_names.pop(self.level) + self.removed_level = self.new_index_levels.pop(self.level) + self.removed_level_full = index.levels[self.level] + + # Bug fix GH 20601 + # If the data frame is too big, the number of unique index combination + # will cause int32 overflow on windows environments. + # We want to check and raise an error before this happens + num_rows = np.max([index_level.size for index_level in self.new_index_levels]) + num_columns = self.removed_level.size + + # GH20601: This forces an overflow if the number of cells is too high. + num_cells = np.multiply(num_rows, num_columns, dtype=np.int32) + + if num_rows > 0 and num_columns > 0 and num_cells <= 0: + raise ValueError("Unstacked DataFrame is too big, causing int32 overflow") + + self._make_sorted_values_labels() + self._make_selectors() + + def _make_sorted_values_labels(self): + v = self.level + + codes = list(self.index.codes) + levs = list(self.index.levels) + to_sort = codes[:v] + codes[v + 1 :] + [codes[v]] + sizes = [len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]]] + + comp_index, obs_ids = get_compressed_ids(to_sort, sizes) + ngroups = len(obs_ids) + + indexer = libalgos.groupsort_indexer(comp_index, ngroups)[0] + indexer = ensure_platform_int(indexer) + + self.sorted_values = algos.take_nd(self.values, indexer, axis=0) + self.sorted_labels = [l.take(indexer) for l in to_sort] + + def _make_selectors(self): + new_levels = self.new_index_levels + + # make the mask + remaining_labels = self.sorted_labels[:-1] + level_sizes = [len(x) for x in new_levels] + + comp_index, obs_ids = get_compressed_ids(remaining_labels, level_sizes) + ngroups = len(obs_ids) + + comp_index = ensure_platform_int(comp_index) + stride = self.index.levshape[self.level] + self.lift + self.full_shape = ngroups, stride + + selector = self.sorted_labels[-1] + stride * comp_index + self.lift + mask = np.zeros(np.prod(self.full_shape), dtype=bool) + mask.put(selector, True) + + if mask.sum() < len(self.index): + raise ValueError("Index contains duplicate entries, cannot reshape") + + self.group_index = comp_index + self.mask = mask + self.unique_groups = obs_ids + self.compressor = comp_index.searchsorted(np.arange(ngroups)) + + def get_result(self): + values, _ = self.get_new_values() + columns = self.get_new_columns() + index = self.get_new_index() + + return self.constructor(values, index=index, columns=columns) + + def get_new_values(self): + values = self.values + + # place the values + length, width = self.full_shape + stride = values.shape[1] + result_width = width * stride + result_shape = (length, result_width) + mask = self.mask + mask_all = mask.all() + + # we can simply reshape if we don't have a mask + if mask_all and len(values): + new_values = ( + self.sorted_values.reshape(length, width, stride) + .swapaxes(1, 2) + .reshape(result_shape) + ) + new_mask = np.ones(result_shape, dtype=bool) + return new_values, new_mask + + # if our mask is all True, then we can use our existing dtype + if mask_all: + dtype = values.dtype + new_values = np.empty(result_shape, dtype=dtype) + else: + dtype, fill_value = maybe_promote(values.dtype, self.fill_value) + new_values = np.empty(result_shape, dtype=dtype) + new_values.fill(fill_value) + + new_mask = np.zeros(result_shape, dtype=bool) + + name = np.dtype(dtype).name + sorted_values = self.sorted_values + + # we need to convert to a basic dtype + # and possibly coerce an input to our output dtype + # e.g. ints -> floats + if needs_i8_conversion(values): + sorted_values = sorted_values.view("i8") + new_values = new_values.view("i8") + elif is_bool_dtype(values): + sorted_values = sorted_values.astype("object") + new_values = new_values.astype("object") + else: + sorted_values = sorted_values.astype(name, copy=False) + + # fill in our values & mask + libreshape.unstack( + sorted_values, + mask.view("u1"), + stride, + length, + width, + new_values, + new_mask.view("u1"), + ) + + # reconstruct dtype if needed + if needs_i8_conversion(values): + new_values = new_values.view(values.dtype) + + return new_values, new_mask + + def get_new_columns(self): + if self.value_columns is None: + if self.lift == 0: + return self.removed_level._shallow_copy(name=self.removed_name) + + lev = self.removed_level.insert(0, item=self.removed_level._na_value) + return lev.rename(self.removed_name) + + stride = len(self.removed_level) + self.lift + width = len(self.value_columns) + propagator = np.repeat(np.arange(width), stride) + if isinstance(self.value_columns, MultiIndex): + new_levels = self.value_columns.levels + (self.removed_level_full,) + new_names = self.value_columns.names + (self.removed_name,) + + new_codes = [lab.take(propagator) for lab in self.value_columns.codes] + else: + new_levels = [self.value_columns, self.removed_level_full] + new_names = [self.value_columns.name, self.removed_name] + new_codes = [propagator] + + # The two indices differ only if the unstacked level had unused items: + if len(self.removed_level_full) != len(self.removed_level): + # In this case, we remap the new codes to the original level: + repeater = self.removed_level_full.get_indexer(self.removed_level) + if self.lift: + repeater = np.insert(repeater, 0, -1) + else: + # Otherwise, we just use each level item exactly once: + repeater = np.arange(stride) - self.lift + + # The entire level is then just a repetition of the single chunk: + new_codes.append(np.tile(repeater, width)) + return MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + def get_new_index(self): + result_codes = [lab.take(self.compressor) for lab in self.sorted_labels[:-1]] + + # construct the new index + if len(self.new_index_levels) == 1: + level, level_codes = self.new_index_levels[0], result_codes[0] + if (level_codes == -1).any(): + level = level.insert(len(level), level._na_value) + return level.take(level_codes).rename(self.new_index_names[0]) + + return MultiIndex( + levels=self.new_index_levels, + codes=result_codes, + names=self.new_index_names, + verify_integrity=False, + ) + + +def _unstack_multiple(data, clocs, fill_value=None): + if len(clocs) == 0: + return data + + # NOTE: This doesn't deal with hierarchical columns yet + + index = data.index + + clocs = [index._get_level_number(i) for i in clocs] + + rlocs = [i for i in range(index.nlevels) if i not in clocs] + + clevels = [index.levels[i] for i in clocs] + ccodes = [index.codes[i] for i in clocs] + cnames = [index.names[i] for i in clocs] + rlevels = [index.levels[i] for i in rlocs] + rcodes = [index.codes[i] for i in rlocs] + rnames = [index.names[i] for i in rlocs] + + shape = [len(x) for x in clevels] + group_index = get_group_index(ccodes, shape, sort=False, xnull=False) + + comp_ids, obs_ids = compress_group_index(group_index, sort=False) + recons_codes = decons_obs_group_ids(comp_ids, obs_ids, shape, ccodes, xnull=False) + + if rlocs == []: + # Everything is in clocs, so the dummy df has a regular index + dummy_index = Index(obs_ids, name="__placeholder__") + else: + dummy_index = MultiIndex( + levels=rlevels + [obs_ids], + codes=rcodes + [comp_ids], + names=rnames + ["__placeholder__"], + verify_integrity=False, + ) + + if isinstance(data, Series): + dummy = data.copy() + dummy.index = dummy_index + + unstacked = dummy.unstack("__placeholder__", fill_value=fill_value) + new_levels = clevels + new_names = cnames + new_codes = recons_codes + else: + if isinstance(data.columns, MultiIndex): + result = data + for i in range(len(clocs)): + val = clocs[i] + result = result.unstack(val, fill_value=fill_value) + clocs = [v if i > v else v - 1 for v in clocs] + + return result + + dummy = data.copy() + dummy.index = dummy_index + + unstacked = dummy.unstack("__placeholder__", fill_value=fill_value) + if isinstance(unstacked, Series): + unstcols = unstacked.index + else: + unstcols = unstacked.columns + new_levels = [unstcols.levels[0]] + clevels + new_names = [data.columns.name] + cnames + + new_codes = [unstcols.codes[0]] + for rec in recons_codes: + new_codes.append(rec.take(unstcols.codes[-1])) + + new_columns = MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + if isinstance(unstacked, Series): + unstacked.index = new_columns + else: + unstacked.columns = new_columns + + return unstacked + + +def unstack(obj, level, fill_value=None): + if isinstance(level, (tuple, list)): + if len(level) != 1: + # _unstack_multiple only handles MultiIndexes, + # and isn't needed for a single level + return _unstack_multiple(obj, level, fill_value=fill_value) + else: + level = level[0] + + # Prioritize integer interpretation (GH #21677): + if not is_integer(level) and not level == "__placeholder__": + level = obj.index._get_level_number(level) + + if isinstance(obj, DataFrame): + if isinstance(obj.index, MultiIndex): + return _unstack_frame(obj, level, fill_value=fill_value) + else: + return obj.T.stack(dropna=False) + else: + if is_extension_array_dtype(obj.dtype): + return _unstack_extension_series(obj, level, fill_value) + unstacker = _Unstacker( + obj.values, + obj.index, + level=level, + fill_value=fill_value, + constructor=obj._constructor_expanddim, + ) + return unstacker.get_result() + + +def _unstack_frame(obj, level, fill_value=None): + if obj._is_mixed_type: + unstacker = partial( + _Unstacker, index=obj.index, level=level, fill_value=fill_value + ) + blocks = obj._data.unstack(unstacker, fill_value=fill_value) + return obj._constructor(blocks) + else: + unstacker = _Unstacker( + obj.values, + obj.index, + level=level, + value_columns=obj.columns, + fill_value=fill_value, + constructor=obj._constructor, + ) + return unstacker.get_result() + + +def _unstack_extension_series(series, level, fill_value): + """ + Unstack an ExtensionArray-backed Series. + + The ExtensionDtype is preserved. + + Parameters + ---------- + series : Series + A Series with an ExtensionArray for values + level : Any + The level name or number. + fill_value : Any + The user-level (not physical storage) fill value to use for + missing values introduced by the reshape. Passed to + ``series.values.take``. + + Returns + ------- + DataFrame + Each column of the DataFrame will have the same dtype as + the input Series. + """ + # Implementation note: the basic idea is to + # 1. Do a regular unstack on a dummy array of integers + # 2. Followup with a columnwise take. + # We use the dummy take to discover newly-created missing values + # introduced by the reshape. + from pandas.core.reshape.concat import concat + + dummy_arr = np.arange(len(series)) + # fill_value=-1, since we will do a series.values.take later + result = _Unstacker( + dummy_arr, series.index, level=level, fill_value=-1 + ).get_result() + + out = [] + values = extract_array(series, extract_numpy=False) + + for col, indices in result.items(): + out.append( + Series( + values.take(indices.values, allow_fill=True, fill_value=fill_value), + name=col, + index=result.index, + ) + ) + return concat(out, axis="columns", copy=False, keys=result.columns) + + +def stack(frame, level=-1, dropna=True): + """ + Convert DataFrame to Series with multi-level Index. Columns become the + second level of the resulting hierarchical index + + Returns + ------- + stacked : Series + """ + + def factorize(index): + if index.is_unique: + return index, np.arange(len(index)) + codes, categories = factorize_from_iterable(index) + return categories, codes + + N, K = frame.shape + + # Will also convert negative level numbers and check if out of bounds. + level_num = frame.columns._get_level_number(level) + + if isinstance(frame.columns, MultiIndex): + return _stack_multi_columns(frame, level_num=level_num, dropna=dropna) + elif isinstance(frame.index, MultiIndex): + new_levels = list(frame.index.levels) + new_codes = [lab.repeat(K) for lab in frame.index.codes] + + clev, clab = factorize(frame.columns) + new_levels.append(clev) + new_codes.append(np.tile(clab, N).ravel()) + + new_names = list(frame.index.names) + new_names.append(frame.columns.name) + new_index = MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + else: + levels, (ilab, clab) = zip(*map(factorize, (frame.index, frame.columns))) + codes = ilab.repeat(K), np.tile(clab, N).ravel() + new_index = MultiIndex( + levels=levels, + codes=codes, + names=[frame.index.name, frame.columns.name], + verify_integrity=False, + ) + + if frame._is_homogeneous_type: + # For homogeneous EAs, frame.values will coerce to object. So + # we concatenate instead. + dtypes = list(frame.dtypes.values) + dtype = dtypes[0] + + if is_extension_array_dtype(dtype): + arr = dtype.construct_array_type() + new_values = arr._concat_same_type( + [col._values for _, col in frame.items()] + ) + new_values = _reorder_for_extension_array_stack(new_values, N, K) + else: + # homogeneous, non-EA + new_values = frame.values.ravel() + + else: + # non-homogeneous + new_values = frame.values.ravel() + + if dropna: + mask = notna(new_values) + new_values = new_values[mask] + new_index = new_index[mask] + + return frame._constructor_sliced(new_values, index=new_index) + + +def stack_multiple(frame, level, dropna=True): + # If all passed levels match up to column names, no + # ambiguity about what to do + if all(lev in frame.columns.names for lev in level): + result = frame + for lev in level: + result = stack(result, lev, dropna=dropna) + + # Otherwise, level numbers may change as each successive level is stacked + elif all(isinstance(lev, int) for lev in level): + # As each stack is done, the level numbers decrease, so we need + # to account for that when level is a sequence of ints + result = frame + # _get_level_number() checks level numbers are in range and converts + # negative numbers to positive + level = [frame.columns._get_level_number(lev) for lev in level] + + # Can't iterate directly through level as we might need to change + # values as we go + for index in range(len(level)): + lev = level[index] + result = stack(result, lev, dropna=dropna) + # Decrement all level numbers greater than current, as these + # have now shifted down by one + updated_level = [] + for other in level: + if other > lev: + updated_level.append(other - 1) + else: + updated_level.append(other) + level = updated_level + + else: + raise ValueError( + "level should contain all level names or all level " + "numbers, not a mixture of the two." + ) + + return result + + +def _stack_multi_columns(frame, level_num=-1, dropna=True): + def _convert_level_number(level_num, columns): + """ + Logic for converting the level number to something we can safely pass + to swaplevel: + + We generally want to convert the level number into a level name, except + when columns do not have names, in which case we must leave as a level + number + """ + if level_num in columns.names: + return columns.names[level_num] + else: + if columns.names[level_num] is None: + return level_num + else: + return columns.names[level_num] + + this = frame.copy() + + # this makes life much simpler + if level_num != frame.columns.nlevels - 1: + # roll levels to put selected level at end + roll_columns = this.columns + for i in range(level_num, frame.columns.nlevels - 1): + # Need to check if the ints conflict with level names + lev1 = _convert_level_number(i, roll_columns) + lev2 = _convert_level_number(i + 1, roll_columns) + roll_columns = roll_columns.swaplevel(lev1, lev2) + this.columns = roll_columns + + if not this.columns.is_lexsorted(): + # Workaround the edge case where 0 is one of the column names, + # which interferes with trying to sort based on the first + # level + level_to_sort = _convert_level_number(0, this.columns) + this = this.sort_index(level=level_to_sort, axis=1) + + # tuple list excluding level for grouping columns + if len(frame.columns.levels) > 2: + tuples = list( + zip( + *[ + lev.take(level_codes) + for lev, level_codes in zip( + this.columns.levels[:-1], this.columns.codes[:-1] + ) + ] + ) + ) + unique_groups = [key for key, _ in itertools.groupby(tuples)] + new_names = this.columns.names[:-1] + new_columns = MultiIndex.from_tuples(unique_groups, names=new_names) + else: + new_columns = this.columns.levels[0]._shallow_copy(name=this.columns.names[0]) + unique_groups = new_columns + + # time to ravel the values + new_data = {} + level_vals = this.columns.levels[-1] + level_codes = sorted(set(this.columns.codes[-1])) + level_vals_used = level_vals[level_codes] + levsize = len(level_codes) + drop_cols = [] + for key in unique_groups: + try: + loc = this.columns.get_loc(key) + except KeyError: + drop_cols.append(key) + continue + + # can make more efficient? + # we almost always return a slice + # but if unsorted can get a boolean + # indexer + if not isinstance(loc, slice): + slice_len = len(loc) + else: + slice_len = loc.stop - loc.start + + if slice_len != levsize: + chunk = this.loc[:, this.columns[loc]] + chunk.columns = level_vals.take(chunk.columns.codes[-1]) + value_slice = chunk.reindex(columns=level_vals_used).values + else: + if frame._is_homogeneous_type and is_extension_array_dtype( + frame.dtypes.iloc[0] + ): + dtype = this[this.columns[loc]].dtypes.iloc[0] + subset = this[this.columns[loc]] + + value_slice = dtype.construct_array_type()._concat_same_type( + [x._values for _, x in subset.items()] + ) + N, K = this.shape + idx = np.arange(N * K).reshape(K, N).T.ravel() + value_slice = value_slice.take(idx) + + elif frame._is_mixed_type: + value_slice = this[this.columns[loc]].values + else: + value_slice = this.values[:, loc] + + if value_slice.ndim > 1: + # i.e. not extension + value_slice = value_slice.ravel() + + new_data[key] = value_slice + + if len(drop_cols) > 0: + new_columns = new_columns.difference(drop_cols) + + N = len(this) + + if isinstance(this.index, MultiIndex): + new_levels = list(this.index.levels) + new_names = list(this.index.names) + new_codes = [lab.repeat(levsize) for lab in this.index.codes] + else: + old_codes, old_levels = factorize_from_iterable(this.index) + new_levels = [old_levels] + new_codes = [old_codes.repeat(levsize)] + new_names = [this.index.name] # something better? + + new_levels.append(level_vals) + new_codes.append(np.tile(level_codes, N)) + new_names.append(frame.columns.names[level_num]) + + new_index = MultiIndex( + levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False + ) + + result = frame._constructor(new_data, index=new_index, columns=new_columns) + + # more efficient way to go about this? can do the whole masking biz but + # will only save a small amount of time... + if dropna: + result = result.dropna(axis=0, how="all") + + return result + + +def get_dummies( + data, + prefix=None, + prefix_sep="_", + dummy_na=False, + columns=None, + sparse=False, + drop_first=False, + dtype=None, +) -> "DataFrame": + """ + Convert categorical variable into dummy/indicator variables. + + Parameters + ---------- + data : array-like, Series, or DataFrame + Data of which to get dummy indicators. + prefix : str, list of str, or dict of str, default None + String to append DataFrame column names. + Pass a list with length equal to the number of columns + when calling get_dummies on a DataFrame. Alternatively, `prefix` + can be a dictionary mapping column names to prefixes. + prefix_sep : str, default '_' + If appending prefix, separator/delimiter to use. Or pass a + list or dictionary as with `prefix`. + dummy_na : bool, default False + Add a column to indicate NaNs, if False NaNs are ignored. + columns : list-like, default None + Column names in the DataFrame to be encoded. + If `columns` is None then all the columns with + `object` or `category` dtype will be converted. + sparse : bool, default False + Whether the dummy-encoded columns should be backed by + a :class:`SparseArray` (True) or a regular NumPy array (False). + drop_first : bool, default False + Whether to get k-1 dummies out of k categorical levels by removing the + first level. + dtype : dtype, default np.uint8 + Data type for new columns. Only a single dtype is allowed. + + .. versionadded:: 0.23.0 + + Returns + ------- + DataFrame + Dummy-coded data. + + See Also + -------- + Series.str.get_dummies : Convert Series to dummy codes. + + Examples + -------- + >>> s = pd.Series(list('abca')) + + >>> pd.get_dummies(s) + a b c + 0 1 0 0 + 1 0 1 0 + 2 0 0 1 + 3 1 0 0 + + >>> s1 = ['a', 'b', np.nan] + + >>> pd.get_dummies(s1) + a b + 0 1 0 + 1 0 1 + 2 0 0 + + >>> pd.get_dummies(s1, dummy_na=True) + a b NaN + 0 1 0 0 + 1 0 1 0 + 2 0 0 1 + + >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], + ... 'C': [1, 2, 3]}) + + >>> pd.get_dummies(df, prefix=['col1', 'col2']) + C col1_a col1_b col2_a col2_b col2_c + 0 1 1 0 0 1 0 + 1 2 0 1 1 0 0 + 2 3 1 0 0 0 1 + + >>> pd.get_dummies(pd.Series(list('abcaa'))) + a b c + 0 1 0 0 + 1 0 1 0 + 2 0 0 1 + 3 1 0 0 + 4 1 0 0 + + >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True) + b c + 0 0 0 + 1 1 0 + 2 0 1 + 3 0 0 + 4 0 0 + + >>> pd.get_dummies(pd.Series(list('abc')), dtype=float) + a b c + 0 1.0 0.0 0.0 + 1 0.0 1.0 0.0 + 2 0.0 0.0 1.0 + """ + from pandas.core.reshape.concat import concat + + dtypes_to_encode = ["object", "category"] + + if isinstance(data, DataFrame): + # determine columns being encoded + if columns is None: + data_to_encode = data.select_dtypes(include=dtypes_to_encode) + elif not is_list_like(columns): + raise TypeError("Input must be a list-like for parameter `columns`") + else: + data_to_encode = data[columns] + + # validate prefixes and separator to avoid silently dropping cols + def check_len(item, name): + len_msg = ( + "Length of '{name}' ({len_item}) did not match the " + "length of the columns being encoded ({len_enc})." + ) + + if is_list_like(item): + if not len(item) == data_to_encode.shape[1]: + len_msg = len_msg.format( + name=name, len_item=len(item), len_enc=data_to_encode.shape[1] + ) + raise ValueError(len_msg) + + check_len(prefix, "prefix") + check_len(prefix_sep, "prefix_sep") + + if isinstance(prefix, str): + prefix = itertools.cycle([prefix]) + if isinstance(prefix, dict): + prefix = [prefix[col] for col in data_to_encode.columns] + + if prefix is None: + prefix = data_to_encode.columns + + # validate separators + if isinstance(prefix_sep, str): + prefix_sep = itertools.cycle([prefix_sep]) + elif isinstance(prefix_sep, dict): + prefix_sep = [prefix_sep[col] for col in data_to_encode.columns] + + if data_to_encode.shape == data.shape: + # Encoding the entire df, do not prepend any dropped columns + with_dummies: List[DataFrame] = [] + elif columns is not None: + # Encoding only cols specified in columns. Get all cols not in + # columns to prepend to result. + with_dummies = [data.drop(columns, axis=1)] + else: + # Encoding only object and category dtype columns. Get remaining + # columns to prepend to result. + with_dummies = [data.select_dtypes(exclude=dtypes_to_encode)] + + for (col, pre, sep) in zip(data_to_encode.items(), prefix, prefix_sep): + # col is (column_name, column), use just column data here + dummy = _get_dummies_1d( + col[1], + prefix=pre, + prefix_sep=sep, + dummy_na=dummy_na, + sparse=sparse, + drop_first=drop_first, + dtype=dtype, + ) + with_dummies.append(dummy) + result = concat(with_dummies, axis=1) + else: + result = _get_dummies_1d( + data, + prefix, + prefix_sep, + dummy_na, + sparse=sparse, + drop_first=drop_first, + dtype=dtype, + ) + return result + + +def _get_dummies_1d( + data, + prefix, + prefix_sep="_", + dummy_na=False, + sparse=False, + drop_first=False, + dtype=None, +): + from pandas.core.reshape.concat import concat + + # Series avoids inconsistent NaN handling + codes, levels = factorize_from_iterable(Series(data)) + + if dtype is None: + dtype = np.uint8 + dtype = np.dtype(dtype) + + if is_object_dtype(dtype): + raise ValueError("dtype=object is not a valid dtype for get_dummies") + + def get_empty_frame(data) -> DataFrame: + if isinstance(data, Series): + index = data.index + else: + index = np.arange(len(data)) + return DataFrame(index=index) + + # if all NaN + if not dummy_na and len(levels) == 0: + return get_empty_frame(data) + + codes = codes.copy() + if dummy_na: + codes[codes == -1] = len(levels) + levels = np.append(levels, np.nan) + + # if dummy_na, we just fake a nan level. drop_first will drop it again + if drop_first and len(levels) == 1: + return get_empty_frame(data) + + number_of_cols = len(levels) + + if prefix is None: + dummy_cols = levels + else: + + # PY2 embedded unicode, gh-22084 + def _make_col_name(prefix, prefix_sep, level) -> str: + fstr = "{prefix}{prefix_sep}{level}" + return fstr.format(prefix=prefix, prefix_sep=prefix_sep, level=level) + + dummy_cols = [_make_col_name(prefix, prefix_sep, level) for level in levels] + + if isinstance(data, Series): + index = data.index + else: + index = None + + if sparse: + + if is_integer_dtype(dtype): + fill_value = 0 + elif dtype == bool: + fill_value = False + else: + fill_value = 0.0 + + sparse_series = [] + N = len(data) + sp_indices = [[] for _ in range(len(dummy_cols))] + mask = codes != -1 + codes = codes[mask] + n_idx = np.arange(N)[mask] + + for ndx, code in zip(n_idx, codes): + sp_indices[code].append(ndx) + + if drop_first: + # remove first categorical level to avoid perfect collinearity + # GH12042 + sp_indices = sp_indices[1:] + dummy_cols = dummy_cols[1:] + for col, ixs in zip(dummy_cols, sp_indices): + sarr = SparseArray( + np.ones(len(ixs), dtype=dtype), + sparse_index=IntIndex(N, ixs), + fill_value=fill_value, + dtype=dtype, + ) + sparse_series.append(Series(data=sarr, index=index, name=col)) + + out = concat(sparse_series, axis=1, copy=False) + return out + + else: + dummy_mat = np.eye(number_of_cols, dtype=dtype).take(codes, axis=0) + + if not dummy_na: + # reset NaN GH4446 + dummy_mat[codes == -1] = 0 + + if drop_first: + # remove first GH12042 + dummy_mat = dummy_mat[:, 1:] + dummy_cols = dummy_cols[1:] + return DataFrame(dummy_mat, index=index, columns=dummy_cols) + + +def _reorder_for_extension_array_stack(arr, n_rows: int, n_columns: int): + """ + Re-orders the values when stacking multiple extension-arrays. + + The indirect stacking method used for EAs requires a followup + take to get the order correct. + + Parameters + ---------- + arr : ExtensionArray + n_rows, n_columns : int + The number of rows and columns in the original DataFrame. + + Returns + ------- + taken : ExtensionArray + The original `arr` with elements re-ordered appropriately + + Examples + -------- + >>> arr = np.array(['a', 'b', 'c', 'd', 'e', 'f']) + >>> _reorder_for_extension_array_stack(arr, 2, 3) + array(['a', 'c', 'e', 'b', 'd', 'f'], dtype='>> _reorder_for_extension_array_stack(arr, 3, 2) + array(['a', 'd', 'b', 'e', 'c', 'f'], dtype='>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3) + ... # doctest: +ELLIPSIS + [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... + Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ... + + >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True) + ... # doctest: +ELLIPSIS + ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ... + Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ... + array([0.994, 3. , 5. , 7. ])) + + Discovers the same bins, but assign them specific labels. Notice that + the returned Categorical's categories are `labels` and is ordered. + + >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), + ... 3, labels=["bad", "medium", "good"]) + [bad, good, medium, medium, good, bad] + Categories (3, object): [bad < medium < good] + + ``labels=False`` implies you just want the bins back. + + >>> pd.cut([0, 1, 1, 2], bins=4, labels=False) + array([0, 1, 1, 3]) + + Passing a Series as an input returns a Series with categorical dtype: + + >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), + ... index=['a', 'b', 'c', 'd', 'e']) + >>> pd.cut(s, 3) + ... # doctest: +ELLIPSIS + a (1.992, 4.667] + b (1.992, 4.667] + c (4.667, 7.333] + d (7.333, 10.0] + e (7.333, 10.0] + dtype: category + Categories (3, interval[float64]): [(1.992, 4.667] < (4.667, ... + + Passing a Series as an input returns a Series with mapping value. + It is used to map numerically to intervals based on bins. + + >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), + ... index=['a', 'b', 'c', 'd', 'e']) + >>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False) + ... # doctest: +ELLIPSIS + (a 0.0 + b 1.0 + c 2.0 + d 3.0 + e 4.0 + dtype: float64, array([0, 2, 4, 6, 8])) + + Use `drop` optional when bins is not unique + + >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True, + ... right=False, duplicates='drop') + ... # doctest: +ELLIPSIS + (a 0.0 + b 1.0 + c 2.0 + d 3.0 + e 3.0 + dtype: float64, array([0, 2, 4, 6, 8])) + + Passing an IntervalIndex for `bins` results in those categories exactly. + Notice that values not covered by the IntervalIndex are set to NaN. 0 + is to the left of the first bin (which is closed on the right), and 1.5 + falls between two bins. + + >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]) + >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins) + [NaN, (0, 1], NaN, (2, 3], (4, 5]] + Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]] + """ + # NOTE: this binning code is changed a bit from histogram for var(x) == 0 + + original = x + x = _preprocess_for_cut(x) + x, dtype = _coerce_to_type(x) + + if not np.iterable(bins): + if is_scalar(bins) and bins < 1: + raise ValueError("`bins` should be a positive integer.") + + try: # for array-like + sz = x.size + except AttributeError: + x = np.asarray(x) + sz = x.size + + if sz == 0: + raise ValueError("Cannot cut empty array") + + rng = (nanops.nanmin(x), nanops.nanmax(x)) + mn, mx = [mi + 0.0 for mi in rng] + + if np.isinf(mn) or np.isinf(mx): + # GH 24314 + raise ValueError( + "cannot specify integer `bins` when input data contains infinity" + ) + elif mn == mx: # adjust end points before binning + mn -= 0.001 * abs(mn) if mn != 0 else 0.001 + mx += 0.001 * abs(mx) if mx != 0 else 0.001 + bins = np.linspace(mn, mx, bins + 1, endpoint=True) + else: # adjust end points after binning + bins = np.linspace(mn, mx, bins + 1, endpoint=True) + adj = (mx - mn) * 0.001 # 0.1% of the range + if right: + bins[0] -= adj + else: + bins[-1] += adj + + elif isinstance(bins, IntervalIndex): + if bins.is_overlapping: + raise ValueError("Overlapping IntervalIndex is not accepted.") + + else: + if is_datetime64tz_dtype(bins): + bins = np.asarray(bins, dtype=_NS_DTYPE) + else: + bins = np.asarray(bins) + bins = _convert_bin_to_numeric_type(bins, dtype) + + # GH 26045: cast to float64 to avoid an overflow + if (np.diff(bins.astype("float64")) < 0).any(): + raise ValueError("bins must increase monotonically.") + + fac, bins = _bins_to_cuts( + x, + bins, + right=right, + labels=labels, + precision=precision, + include_lowest=include_lowest, + dtype=dtype, + duplicates=duplicates, + ) + + return _postprocess_for_cut(fac, bins, retbins, dtype, original) + + +def qcut( + x, + q, + labels=None, + retbins: bool = False, + precision: int = 3, + duplicates: str = "raise", +): + """ + Quantile-based discretization function. + + Discretize variable into equal-sized buckets based on rank or based + on sample quantiles. For example 1000 values for 10 quantiles would + produce a Categorical object indicating quantile membership for each data point. + + Parameters + ---------- + x : 1d ndarray or Series + q : int or list-like of int + Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately + array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles. + labels : array or False, default None + Used as labels for the resulting bins. Must be of the same length as + the resulting bins. If False, return only integer indicators of the + bins. If True, raises an error. + retbins : bool, optional + Whether to return the (bins, labels) or not. Can be useful if bins + is given as a scalar. + precision : int, optional + The precision at which to store and display the bins labels. + duplicates : {default 'raise', 'drop'}, optional + If bin edges are not unique, raise ValueError or drop non-uniques. + + Returns + ------- + out : Categorical or Series or array of integers if labels is False + The return type (Categorical or Series) depends on the input: a Series + of type category if input is a Series else Categorical. Bins are + represented as categories when categorical data is returned. + bins : ndarray of floats + Returned only if `retbins` is True. + + Notes + ----- + Out of bounds values will be NA in the resulting Categorical object + + Examples + -------- + >>> pd.qcut(range(5), 4) + ... # doctest: +ELLIPSIS + [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]] + Categories (4, interval[float64]): [(-0.001, 1.0] < (1.0, 2.0] ... + + >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"]) + ... # doctest: +SKIP + [good, good, medium, bad, bad] + Categories (3, object): [good < medium < bad] + + >>> pd.qcut(range(5), 4, labels=False) + array([0, 0, 1, 2, 3]) + """ + original = x + x = _preprocess_for_cut(x) + x, dtype = _coerce_to_type(x) + + if is_integer(q): + quantiles = np.linspace(0, 1, q + 1) + else: + quantiles = q + bins = algos.quantile(x, quantiles) + fac, bins = _bins_to_cuts( + x, + bins, + labels=labels, + precision=precision, + include_lowest=True, + dtype=dtype, + duplicates=duplicates, + ) + + return _postprocess_for_cut(fac, bins, retbins, dtype, original) + + +def _bins_to_cuts( + x, + bins, + right: bool = True, + labels=None, + precision: int = 3, + include_lowest: bool = False, + dtype=None, + duplicates: str = "raise", +): + + if duplicates not in ["raise", "drop"]: + raise ValueError( + "invalid value for 'duplicates' parameter, " + "valid options are: raise, drop" + ) + + if isinstance(bins, IntervalIndex): + # we have a fast-path here + ids = bins.get_indexer(x) + result = Categorical.from_codes(ids, categories=bins, ordered=True) + return result, bins + + unique_bins = algos.unique(bins) + if len(unique_bins) < len(bins) and len(bins) != 2: + if duplicates == "raise": + raise ValueError( + f"Bin edges must be unique: {repr(bins)}.\n" + f"You can drop duplicate edges by setting the 'duplicates' kwarg" + ) + else: + bins = unique_bins + + side = "left" if right else "right" + ids = ensure_int64(bins.searchsorted(x, side=side)) + + if include_lowest: + ids[x == bins[0]] = 1 + + na_mask = isna(x) | (ids == len(bins)) | (ids == 0) + has_nas = na_mask.any() + + if labels is not False: + if not (labels is None or is_list_like(labels)): + raise ValueError( + "Bin labels must either be False, None or passed in as a " + "list-like argument" + ) + + elif labels is None: + labels = _format_labels( + bins, precision, right=right, include_lowest=include_lowest, dtype=dtype + ) + + else: + if len(labels) != len(bins) - 1: + raise ValueError( + "Bin labels must be one fewer than the number of bin edges" + ) + + if not is_categorical_dtype(labels): + labels = Categorical(labels, categories=labels, ordered=True) + + np.putmask(ids, na_mask, 0) + result = algos.take_nd(labels, ids - 1) + + else: + result = ids - 1 + if has_nas: + result = result.astype(np.float64) + np.putmask(result, na_mask, np.nan) + + return result, bins + + +def _coerce_to_type(x): + """ + if the passed data is of datetime/timedelta, bool or nullable int type, + this method converts it to numeric so that cut or qcut method can + handle it + """ + dtype = None + + if is_datetime64tz_dtype(x): + dtype = x.dtype + elif is_datetime64_dtype(x): + x = to_datetime(x) + dtype = np.dtype("datetime64[ns]") + elif is_timedelta64_dtype(x): + x = to_timedelta(x) + dtype = np.dtype("timedelta64[ns]") + elif is_bool_dtype(x): + # GH 20303 + x = x.astype(np.int64) + # To support cut and qcut for IntegerArray we convert to float dtype. + # Will properly support in the future. + # https://github.com/pandas-dev/pandas/pull/31290 + # https://github.com/pandas-dev/pandas/issues/31389 + elif is_extension_array_dtype(x) and is_integer_dtype(x): + x = x.to_numpy(dtype=np.float64, na_value=np.nan) + + if dtype is not None: + # GH 19768: force NaT to NaN during integer conversion + x = np.where(x.notna(), x.view(np.int64), np.nan) + + return x, dtype + + +def _convert_bin_to_numeric_type(bins, dtype): + """ + if the passed bin is of datetime/timedelta type, + this method converts it to integer + + Parameters + ---------- + bins : list-like of bins + dtype : dtype of data + + Raises + ------ + ValueError if bins are not of a compat dtype to dtype + """ + bins_dtype = infer_dtype(bins, skipna=False) + if is_timedelta64_dtype(dtype): + if bins_dtype in ["timedelta", "timedelta64"]: + bins = to_timedelta(bins).view(np.int64) + else: + raise ValueError("bins must be of timedelta64 dtype") + elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): + if bins_dtype in ["datetime", "datetime64"]: + bins = to_datetime(bins).view(np.int64) + else: + raise ValueError("bins must be of datetime64 dtype") + + return bins + + +def _convert_bin_to_datelike_type(bins, dtype): + """ + Convert bins to a DatetimeIndex or TimedeltaIndex if the original dtype is + datelike + + Parameters + ---------- + bins : list-like of bins + dtype : dtype of data + + Returns + ------- + bins : Array-like of bins, DatetimeIndex or TimedeltaIndex if dtype is + datelike + """ + if is_datetime64tz_dtype(dtype): + bins = to_datetime(bins.astype(np.int64), utc=True).tz_convert(dtype.tz) + elif is_datetime_or_timedelta_dtype(dtype): + bins = Index(bins.astype(np.int64), dtype=dtype) + return bins + + +def _format_labels( + bins, precision: int, right: bool = True, include_lowest: bool = False, dtype=None +): + """ based on the dtype, return our labels """ + + closed = "right" if right else "left" + + if is_datetime64tz_dtype(dtype): + formatter = lambda x: Timestamp(x, tz=dtype.tz) + adjust = lambda x: x - Timedelta("1ns") + elif is_datetime64_dtype(dtype): + formatter = Timestamp + adjust = lambda x: x - Timedelta("1ns") + elif is_timedelta64_dtype(dtype): + formatter = Timedelta + adjust = lambda x: x - Timedelta("1ns") + else: + precision = _infer_precision(precision, bins) + formatter = lambda x: _round_frac(x, precision) + adjust = lambda x: x - 10 ** (-precision) + + breaks = [formatter(b) for b in bins] + if right and include_lowest: + # adjust lhs of first interval by precision to account for being right closed + breaks[0] = adjust(breaks[0]) + + return IntervalIndex.from_breaks(breaks, closed=closed) + + +def _preprocess_for_cut(x): + """ + handles preprocessing for cut where we convert passed + input to array, strip the index information and store it + separately + """ + + # Check that the passed array is a Pandas or Numpy object + # We don't want to strip away a Pandas data-type here (e.g. datetimetz) + ndim = getattr(x, "ndim", None) + if ndim is None: + x = np.asarray(x) + if x.ndim != 1: + raise ValueError("Input array must be 1 dimensional") + + return x + + +def _postprocess_for_cut(fac, bins, retbins: bool, dtype, original): + """ + handles post processing for the cut method where + we combine the index information if the originally passed + datatype was a series + """ + if isinstance(original, ABCSeries): + fac = original._constructor(fac, index=original.index, name=original.name) + + if not retbins: + return fac + + bins = _convert_bin_to_datelike_type(bins, dtype) + + return fac, bins + + +def _round_frac(x, precision: int): + """ + Round the fractional part of the given number + """ + if not np.isfinite(x) or x == 0: + return x + else: + frac, whole = np.modf(x) + if whole == 0: + digits = -int(np.floor(np.log10(abs(frac)))) - 1 + precision + else: + digits = precision + return np.around(x, digits) + + +def _infer_precision(base_precision: int, bins) -> int: + """Infer an appropriate precision for _round_frac + """ + for precision in range(base_precision, 20): + levels = [_round_frac(b, precision) for b in bins] + if algos.unique(levels).size == bins.size: + return precision + return base_precision # default diff --git a/pandas/core/reshape/util.py b/pandas/core/reshape/util.py new file mode 100644 index 00000000..d8652c9b --- /dev/null +++ b/pandas/core/reshape/util.py @@ -0,0 +1,59 @@ +import numpy as np + +from pandas.core.dtypes.common import is_list_like + +import pandas.core.common as com + + +def cartesian_product(X): + """ + Numpy version of itertools.product. + Sometimes faster (for large inputs)... + + Parameters + ---------- + X : list-like of list-likes + + Returns + ------- + product : list of ndarrays + + Examples + -------- + >>> cartesian_product([list('ABC'), [1, 2]]) + [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'), + array([1, 2, 1, 2, 1, 2])] + + See Also + -------- + itertools.product : Cartesian product of input iterables. Equivalent to + nested for-loops. + """ + msg = "Input must be a list-like of list-likes" + if not is_list_like(X): + raise TypeError(msg) + for x in X: + if not is_list_like(x): + raise TypeError(msg) + + if len(X) == 0: + return [] + + lenX = np.fromiter((len(x) for x in X), dtype=np.intp) + cumprodX = np.cumproduct(lenX) + + a = np.roll(cumprodX, 1) + a[0] = 1 + + if cumprodX[-1] != 0: + b = cumprodX[-1] / cumprodX + else: + # if any factor is empty, the cartesian product is empty + b = np.zeros_like(cumprodX) + + return [ + np.tile( + np.repeat(np.asarray(com.values_from_object(x)), b[i]), np.product(a[i]) + ) + for i, x in enumerate(X) + ] diff --git a/pandas/core/series.py b/pandas/core/series.py new file mode 100644 index 00000000..02381951 --- /dev/null +++ b/pandas/core/series.py @@ -0,0 +1,4576 @@ +""" +Data structure for 1-dimensional cross-sectional and time series data +""" +from io import StringIO +from shutil import get_terminal_size +from textwrap import dedent +from typing import IO, Any, Callable, Hashable, List, Optional +import warnings + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import index as libindex, lib, reshape, tslibs +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender, Substitution +from pandas.util._validators import validate_bool_kwarg, validate_percentile + +from pandas.core.dtypes.cast import convert_dtypes +from pandas.core.dtypes.common import ( + _is_unorderable_exception, + ensure_platform_int, + is_bool, + is_categorical_dtype, + is_datetime64_dtype, + is_dict_like, + is_extension_array_dtype, + is_integer, + is_iterator, + is_list_like, + is_object_dtype, + is_scalar, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCDatetimeIndex, + ABCSeries, + ABCSparseArray, +) +from pandas.core.dtypes.inference import is_hashable +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, + notna, + remove_na_arraylike, +) + +import pandas as pd +from pandas.core import algorithms, base, generic, nanops, ops +from pandas.core.accessor import CachedAccessor +from pandas.core.arrays import ExtensionArray, try_cast_to_ea +from pandas.core.arrays.categorical import Categorical, CategoricalAccessor +from pandas.core.arrays.sparse import SparseAccessor +import pandas.core.common as com +from pandas.core.construction import ( + create_series_with_explicit_dtype, + extract_array, + is_empty_data, + sanitize_array, +) +from pandas.core.groupby import generic as groupby_generic +from pandas.core.indexers import maybe_convert_indices +from pandas.core.indexes.accessors import CombinedDatetimelikeProperties +from pandas.core.indexes.api import ( + Float64Index, + Index, + InvalidIndexError, + MultiIndex, + ensure_index, +) +import pandas.core.indexes.base as ibase +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex +from pandas.core.indexing import check_bool_indexer +from pandas.core.internals import SingleBlockManager +from pandas.core.strings import StringMethods +from pandas.core.tools.datetimes import to_datetime + +import pandas.io.formats.format as fmt +import pandas.plotting + +__all__ = ["Series"] + +_shared_doc_kwargs = dict( + axes="index", + klass="Series", + axes_single_arg="{0 or 'index'}", + axis="""axis : {0 or 'index'} + Parameter needed for compatibility with DataFrame.""", + inplace="""inplace : boolean, default False + If True, performs operation inplace and returns None.""", + unique="np.ndarray", + duplicated="Series", + optional_by="", + optional_mapper="", + optional_labels="", + optional_axis="", + versionadded_to_excel="\n .. versionadded:: 0.20.0\n", +) + + +def _coerce_method(converter): + """ + Install the scalar coercion methods. + """ + + def wrapper(self): + if len(self) == 1: + return converter(self.iloc[0]) + raise TypeError(f"cannot convert the series to {converter}") + + wrapper.__name__ = f"__{converter.__name__}__" + return wrapper + + +# ---------------------------------------------------------------------- +# Series class + + +class Series(base.IndexOpsMixin, generic.NDFrame): + """ + One-dimensional ndarray with axis labels (including time series). + + Labels need not be unique but must be a hashable type. The object + supports both integer- and label-based indexing and provides a host of + methods for performing operations involving the index. Statistical + methods from ndarray have been overridden to automatically exclude + missing data (currently represented as NaN). + + Operations between Series (+, -, /, *, **) align values based on their + associated index values-- they need not be the same length. The result + index will be the sorted union of the two indexes. + + Parameters + ---------- + data : array-like, Iterable, dict, or scalar value + Contains data stored in Series. + + .. versionchanged:: 0.23.0 + If data is a dict, argument order is maintained for Python 3.6 + and later. + + index : array-like or Index (1d) + Values must be hashable and have the same length as `data`. + Non-unique index values are allowed. Will default to + RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index + sequence are used, the index will override the keys found in the + dict. + dtype : str, numpy.dtype, or ExtensionDtype, optional + Data type for the output Series. If not specified, this will be + inferred from `data`. + See the :ref:`user guide ` for more usages. + name : str, optional + The name to give to the Series. + copy : bool, default False + Copy input data. + """ + + _typ = "series" + + _name: Optional[Hashable] + _metadata: List[str] = ["name"] + _accessors = {"dt", "cat", "str", "sparse"} + _deprecations = ( + base.IndexOpsMixin._deprecations + | generic.NDFrame._deprecations + | frozenset(["compress", "ptp"]) + ) + + # Override cache_readonly bc Series is mutable + hasnans = property( + base.IndexOpsMixin.hasnans.func, doc=base.IndexOpsMixin.hasnans.__doc__ + ) + _data: SingleBlockManager + div: Callable[["Series", Any], "Series"] + rdiv: Callable[["Series", Any], "Series"] + + # ---------------------------------------------------------------------- + # Constructors + + def __init__( + self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False + ): + + # we are called internally, so short-circuit + if fastpath: + + # data is an ndarray, index is defined + if not isinstance(data, SingleBlockManager): + data = SingleBlockManager(data, index, fastpath=True) + if copy: + data = data.copy() + if index is None: + index = data.index + + else: + + name = ibase.maybe_extract_name(name, data, type(self)) + + if is_empty_data(data) and dtype is None: + # gh-17261 + warnings.warn( + "The default dtype for empty Series will be 'object' instead " + "of 'float64' in a future version. Specify a dtype explicitly " + "to silence this warning.", + DeprecationWarning, + stacklevel=2, + ) + # uncomment the line below when removing the DeprecationWarning + # dtype = np.dtype(object) + + if index is not None: + index = ensure_index(index) + + if data is None: + data = {} + if dtype is not None: + dtype = self._validate_dtype(dtype) + + if isinstance(data, MultiIndex): + raise NotImplementedError( + "initializing a Series from a MultiIndex is not supported" + ) + elif isinstance(data, Index): + + if dtype is not None: + # astype copies + data = data.astype(dtype) + else: + # need to copy to avoid aliasing issues + data = data._values.copy() + if isinstance(data, ABCDatetimeIndex) and data.tz is not None: + # GH#24096 need copy to be deep for datetime64tz case + # TODO: See if we can avoid these copies + data = data._values.copy(deep=True) + copy = False + + elif isinstance(data, np.ndarray): + if len(data.dtype): + # GH#13296 we are dealing with a compound dtype, which + # should be treated as 2D + raise ValueError( + "Cannot construct a Series from an ndarray with " + "compound dtype. Use DataFrame instead." + ) + pass + elif isinstance(data, ABCSeries): + if index is None: + index = data.index + else: + data = data.reindex(index, copy=copy) + data = data._data + elif is_dict_like(data): + data, index = self._init_dict(data, index, dtype) + dtype = None + copy = False + elif isinstance(data, SingleBlockManager): + if index is None: + index = data.index + elif not data.index.equals(index) or copy: + # GH#19275 SingleBlockManager input should only be called + # internally + raise AssertionError( + "Cannot pass both SingleBlockManager " + "`data` argument and a different " + "`index` argument. `copy` must be False." + ) + + elif is_extension_array_dtype(data): + pass + elif isinstance(data, (set, frozenset)): + raise TypeError(f"'{type(data).__name__}' type is unordered") + elif isinstance(data, ABCSparseArray): + # handle sparse passed here (and force conversion) + data = data.to_dense() + else: + data = com.maybe_iterable_to_list(data) + + if index is None: + if not is_list_like(data): + data = [data] + index = ibase.default_index(len(data)) + elif is_list_like(data): + + # a scalar numpy array is list-like but doesn't + # have a proper length + try: + if len(index) != len(data): + raise ValueError( + f"Length of passed values is {len(data)}, " + f"index implies {len(index)}." + ) + except TypeError: + pass + + # create/copy the manager + if isinstance(data, SingleBlockManager): + if dtype is not None: + data = data.astype(dtype=dtype, errors="ignore", copy=copy) + elif copy: + data = data.copy() + else: + data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True) + + data = SingleBlockManager(data, index, fastpath=True) + + generic.NDFrame.__init__(self, data, fastpath=True) + self.name = name + self._set_axis(0, index, fastpath=True) + + def _init_dict(self, data, index=None, dtype=None): + """ + Derive the "_data" and "index" attributes of a new Series from a + dictionary input. + + Parameters + ---------- + data : dict or dict-like + Data used to populate the new Series. + index : Index or index-like, default None + Index for the new Series: if None, use dict keys. + dtype : dtype, default None + The dtype for the new Series: if None, infer from data. + + Returns + ------- + _data : BlockManager for the new Series + index : index for the new Series + """ + # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')] + # raises KeyError), so we iterate the entire dict, and align + if data: + keys, values = zip(*data.items()) + values = list(values) + elif index is not None: + # fastpath for Series(data=None). Just use broadcasting a scalar + # instead of reindexing. + values = na_value_for_dtype(dtype) + keys = index + else: + keys, values = [], [] + + # Input is now list-like, so rely on "standard" construction: + + # TODO: passing np.float64 to not break anything yet. See GH-17261 + s = create_series_with_explicit_dtype( + values, index=keys, dtype=dtype, dtype_if_empty=np.float64 + ) + + # Now we just make sure the order is respected, if any + if data and index is not None: + s = s.reindex(index, copy=False) + return s._data, s.index + + # ---------------------------------------------------------------------- + + @property + def _constructor(self): + return Series + + @property + def _constructor_expanddim(self): + from pandas.core.frame import DataFrame + + return DataFrame + + # types + @property + def _can_hold_na(self): + return self._data._can_hold_na + + _index = None + + def _set_axis(self, axis, labels, fastpath=False): + """ + Override generic, we want to set the _typ here. + """ + + if not fastpath: + labels = ensure_index(labels) + + is_all_dates = labels.is_all_dates + if is_all_dates: + if not isinstance(labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + try: + labels = DatetimeIndex(labels) + # need to set here because we changed the index + if fastpath: + self._data.set_axis(axis, labels) + except (tslibs.OutOfBoundsDatetime, ValueError): + # labels may exceeds datetime bounds, + # or not be a DatetimeIndex + pass + + self._set_subtyp(is_all_dates) + + object.__setattr__(self, "_index", labels) + if not fastpath: + self._data.set_axis(axis, labels) + + def _set_subtyp(self, is_all_dates): + if is_all_dates: + object.__setattr__(self, "_subtyp", "time_series") + else: + object.__setattr__(self, "_subtyp", "series") + + def _update_inplace(self, result, **kwargs): + # we want to call the generic version and not the IndexOpsMixin + return generic.NDFrame._update_inplace(self, result, **kwargs) + + # ndarray compatibility + @property + def dtype(self): + """ + Return the dtype object of the underlying data. + """ + return self._data.dtype + + @property + def dtypes(self): + """ + Return the dtype object of the underlying data. + """ + return self._data.dtype + + @property + def name(self) -> Optional[Hashable]: + return self._name + + @name.setter + def name(self, value: Optional[Hashable]) -> None: + if not is_hashable(value): + raise TypeError("Series.name must be a hashable type") + object.__setattr__(self, "_name", value) + + @property + def values(self): + """ + Return Series as ndarray or ndarray-like depending on the dtype. + + .. warning:: + + We recommend using :attr:`Series.array` or + :meth:`Series.to_numpy`, depending on whether you need + a reference to the underlying data or a NumPy array. + + Returns + ------- + numpy.ndarray or ndarray-like + + See Also + -------- + Series.array : Reference to the underlying data. + Series.to_numpy : A NumPy array representing the underlying data. + + Examples + -------- + >>> pd.Series([1, 2, 3]).values + array([1, 2, 3]) + + >>> pd.Series(list('aabc')).values + array(['a', 'a', 'b', 'c'], dtype=object) + + >>> pd.Series(list('aabc')).astype('category').values + [a, a, b, c] + Categories (3, object): [a, b, c] + + Timezone aware datetime data is converted to UTC: + + >>> pd.Series(pd.date_range('20130101', periods=3, + ... tz='US/Eastern')).values + array(['2013-01-01T05:00:00.000000000', + '2013-01-02T05:00:00.000000000', + '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]') + """ + return self._data.external_values() + + @property + def _values(self): + """ + Return the internal repr of this data (defined by Block.interval_values). + This are the values as stored in the Block (ndarray or ExtensionArray + depending on the Block class). + + Differs from the public ``.values`` for certain data types, because of + historical backwards compatibility of the public attribute (e.g. period + returns object ndarray and datetimetz a datetime64[ns] ndarray for + ``.values`` while it returns an ExtensionArray for ``._values`` in those + cases). + + Differs from ``.array`` in that this still returns the numpy array if + the Block is backed by a numpy array, while ``.array`` ensures to always + return an ExtensionArray. + + Differs from ``._ndarray_values``, as that ensures to always return a + numpy array (it will call ``_ndarray_values`` on the ExtensionArray, if + the Series was backed by an ExtensionArray). + + Overview: + + dtype | values | _values | array | _ndarray_values | + ----------- | ------------- | ------------- | ------------- | --------------- | + Numeric | ndarray | ndarray | PandasArray | ndarray | + Category | Categorical | Categorical | Categorical | ndarray[int] | + dt64[ns] | ndarray[M8ns] | ndarray[M8ns] | DatetimeArray | ndarray[M8ns] | + dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] | + Period | ndarray[obj] | PeriodArray | PeriodArray | ndarray[int] | + Nullable | EA | EA | EA | ndarray | + + """ + return self._data.internal_values() + + @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore + @property + def array(self) -> ExtensionArray: + return self._data._block.array_values() + + def _internal_get_values(self): + """ + Same as values (but handles sparseness conversions); is a view. + + Returns + ------- + numpy.ndarray + Data of the Series. + """ + + return self._data.get_values() + + # ops + def ravel(self, order="C"): + """ + Return the flattened underlying data as an ndarray. + + Returns + ------- + numpy.ndarray or ndarray-like + Flattened data of the Series. + + See Also + -------- + numpy.ndarray.ravel + """ + return self._values.ravel(order=order) + + def __len__(self) -> int: + """ + Return the length of the Series. + """ + return len(self._data) + + def view(self, dtype=None): + """ + Create a new view of the Series. + + This function will return a new Series with a view of the same + underlying values in memory, optionally reinterpreted with a new data + type. The new data type must preserve the same size in bytes as to not + cause index misalignment. + + Parameters + ---------- + dtype : data type + Data type object or one of their string representations. + + Returns + ------- + Series + A new Series object as a view of the same data in memory. + + See Also + -------- + numpy.ndarray.view : Equivalent numpy function to create a new view of + the same data in memory. + + Notes + ----- + Series are instantiated with ``dtype=float64`` by default. While + ``numpy.ndarray.view()`` will return a view with the same data type as + the original array, ``Series.view()`` (without specified dtype) + will try using ``float64`` and may fail if the original data type size + in bytes is not the same. + + Examples + -------- + >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8') + >>> s + 0 -2 + 1 -1 + 2 0 + 3 1 + 4 2 + dtype: int8 + + The 8 bit signed integer representation of `-1` is `0b11111111`, but + the same bytes represent 255 if read as an 8 bit unsigned integer: + + >>> us = s.view('uint8') + >>> us + 0 254 + 1 255 + 2 0 + 3 1 + 4 2 + dtype: uint8 + + The views share the same underlying values: + + >>> us[0] = 128 + >>> s + 0 -128 + 1 -1 + 2 0 + 3 1 + 4 2 + dtype: int8 + """ + return self._constructor( + self._values.view(dtype), index=self.index + ).__finalize__(self) + + # ---------------------------------------------------------------------- + # NDArray Compat + _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray) + + def __array_ufunc__( + self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any + ): + # TODO: handle DataFrame + cls = type(self) + + # for binary ops, use our custom dunder methods + result = ops.maybe_dispatch_ufunc_to_dunder_op( + self, ufunc, method, *inputs, **kwargs + ) + if result is not NotImplemented: + return result + + # Determine if we should defer. + no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__) + + for item in inputs: + higher_priority = ( + hasattr(item, "__array_priority__") + and item.__array_priority__ > self.__array_priority__ + ) + has_array_ufunc = ( + hasattr(item, "__array_ufunc__") + and type(item).__array_ufunc__ not in no_defer + and not isinstance(item, self._HANDLED_TYPES) + ) + if higher_priority or has_array_ufunc: + return NotImplemented + + # align all the inputs. + names = [getattr(x, "name") for x in inputs if hasattr(x, "name")] + types = tuple(type(x) for x in inputs) + # TODO: dataframe + alignable = [x for x, t in zip(inputs, types) if issubclass(t, Series)] + + if len(alignable) > 1: + # This triggers alignment. + # At the moment, there aren't any ufuncs with more than two inputs + # so this ends up just being x1.index | x2.index, but we write + # it to handle *args. + index = alignable[0].index + for s in alignable[1:]: + index |= s.index + inputs = tuple( + x.reindex(index) if issubclass(t, Series) else x + for x, t in zip(inputs, types) + ) + else: + index = self.index + + inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs) + result = getattr(ufunc, method)(*inputs, **kwargs) + + name: Optional[Hashable] + if len(set(names)) == 1: + name = names[0] + else: + name = None + + def construct_return(result): + if lib.is_scalar(result): + return result + elif result.ndim > 1: + # e.g. np.subtract.outer + if method == "outer": + # GH#27198 + raise NotImplementedError + return result + return self._constructor(result, index=index, name=name, copy=False) + + if type(result) is tuple: + # multiple return values + return tuple(construct_return(x) for x in result) + elif method == "at": + # no return value + return None + else: + return construct_return(result) + + def __array__(self, dtype=None) -> np.ndarray: + """ + Return the values as a NumPy array. + + Users should not call this directly. Rather, it is invoked by + :func:`numpy.array` and :func:`numpy.asarray`. + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to use for the resulting NumPy array. By default, + the dtype is inferred from the data. + + Returns + ------- + numpy.ndarray + The values in the series converted to a :class:`numpy.ndarary` + with the specified `dtype`. + + See Also + -------- + array : Create a new array from data. + Series.array : Zero-copy view to the array backing the Series. + Series.to_numpy : Series method for similar behavior. + + Examples + -------- + >>> ser = pd.Series([1, 2, 3]) + >>> np.asarray(ser) + array([1, 2, 3]) + + For timezone-aware data, the timezones may be retained with + ``dtype='object'`` + + >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + >>> np.asarray(tzser, dtype="object") + array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'), + Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')], + dtype=object) + + Or the values may be localized to UTC and the tzinfo discarded with + ``dtype='datetime64[ns]'`` + + >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS + array(['1999-12-31T23:00:00.000000000', ...], + dtype='datetime64[ns]') + """ + return np.asarray(self.array, dtype) + + # ---------------------------------------------------------------------- + # Unary Methods + + # coercion + __float__ = _coerce_method(float) + __long__ = _coerce_method(int) + __int__ = _coerce_method(int) + + # ---------------------------------------------------------------------- + + def _unpickle_series_compat(self, state): + if isinstance(state, dict): + self._data = state["_data"] + self.name = state["name"] + self.index = self._data.index + + elif isinstance(state, tuple): + + # < 0.12 series pickle + + nd_state, own_state = state + + # recreate the ndarray + data = np.empty(nd_state[1], dtype=nd_state[2]) + np.ndarray.__setstate__(data, nd_state) + + # backwards compat + index, name = own_state[0], None + if len(own_state) > 1: + name = own_state[1] + + # recreate + self._data = SingleBlockManager(data, index, fastpath=True) + self._index = index + self.name = name + + else: + raise Exception(f"cannot unpickle legacy formats -> [{state}]") + + # indexers + @property + def axes(self): + """ + Return a list of the row axis labels. + """ + return [self.index] + + # ---------------------------------------------------------------------- + # Indexing Methods + + @Appender(generic.NDFrame.take.__doc__) + def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series": + if is_copy is not None: + warnings.warn( + "is_copy is deprecated and will be removed in a future version. " + "'take' always returns a copy, so there is no need to specify this.", + FutureWarning, + stacklevel=2, + ) + nv.validate_take(tuple(), kwargs) + + indices = ensure_platform_int(indices) + new_index = self.index.take(indices) + + if is_categorical_dtype(self): + # https://github.com/pandas-dev/pandas/issues/20664 + # TODO: remove when the default Categorical.take behavior changes + indices = maybe_convert_indices(indices, len(self._get_axis(axis))) + kwargs = {"allow_fill": False} + else: + kwargs = {} + new_values = self._values.take(indices, **kwargs) + + return self._constructor( + new_values, index=new_index, fastpath=True + ).__finalize__(self) + + def _take_with_is_copy(self, indices, axis=0, **kwargs): + """ + Internal version of the `take` method that sets the `_is_copy` + attribute to keep track of the parent dataframe (using in indexing + for the SettingWithCopyWarning). For Series this does the same + as the public take (it never sets `_is_copy`). + + See the docstring of `take` for full explanation of the parameters. + """ + return self.take(indices=indices, axis=axis, **kwargs) + + def _ixs(self, i: int, axis: int = 0): + """ + Return the i-th value or values in the Series by location. + + Parameters + ---------- + i : int + + Returns + ------- + scalar (int) or Series (slice, sequence) + """ + + # dispatch to the values if we need + values = self._values + if isinstance(values, np.ndarray): + return libindex.get_value_at(values, i) + else: + return values[i] + + def _slice(self, slobj: slice, axis: int = 0, kind=None): + slobj = self.index._convert_slice_indexer(slobj, kind=kind or "getitem") + return self._get_values(slobj) + + def __getitem__(self, key): + key = com.apply_if_callable(key, self) + try: + result = self.index.get_value(self, key) + + if not is_scalar(result): + if is_list_like(result) and not isinstance(result, Series): + + # we need to box if loc of the key isn't scalar here + # otherwise have inline ndarray/lists + try: + if not is_scalar(self.index.get_loc(key)): + result = self._constructor( + result, index=[key] * len(result), dtype=self.dtype + ).__finalize__(self) + except KeyError: + pass + return result + except InvalidIndexError: + pass + except (KeyError, ValueError): + if isinstance(key, tuple) and isinstance(self.index, MultiIndex): + # kludge + pass + elif key is Ellipsis: + return self + elif com.is_bool_indexer(key): + pass + else: + + # we can try to coerce the indexer (or this will raise) + new_key = self.index._convert_scalar_indexer(key, kind="getitem") + if type(new_key) != type(key): + return self.__getitem__(new_key) + raise + + if is_iterator(key): + key = list(key) + + if com.is_bool_indexer(key): + key = check_bool_indexer(self.index, key) + + return self._get_with(key) + + def _get_with(self, key): + # other: fancy integer or otherwise + if isinstance(key, slice): + return self._slice(key) + elif isinstance(key, ABCDataFrame): + raise TypeError( + "Indexing a Series with DataFrame is not " + "supported, use the appropriate DataFrame column" + ) + elif isinstance(key, tuple): + try: + return self._get_values_tuple(key) + except ValueError: + # if we don't have a MultiIndex, we may still be able to handle + # a 1-tuple. see test_1tuple_without_multiindex + if len(key) == 1: + key = key[0] + if isinstance(key, slice): + return self._get_values(key) + raise + + if not isinstance(key, (list, np.ndarray, Series, Index)): + key = list(key) + + if isinstance(key, Index): + key_type = key.inferred_type + else: + key_type = lib.infer_dtype(key, skipna=False) + + if key_type == "integer": + if self.index.is_integer() or self.index.is_floating(): + return self.loc[key] + else: + return self._get_values(key) + elif key_type == "boolean": + return self._get_values(key) + + if isinstance(key, (list, tuple)): + # TODO: de-dup with tuple case handled above? + # handle the dup indexing case GH#4246 + if len(key) == 1 and isinstance(key[0], slice): + # [slice(0, 5, None)] will break if you convert to ndarray, + # e.g. as requested by np.median + # FIXME: hack + return self._get_values(key) + + return self.loc[key] + + return self.reindex(key) + + def _get_values_tuple(self, key): + # mpl hackaround + if com.any_none(*key): + # suppress warning from slicing the index with a 2d indexer. + # eventually we'll want Series itself to warn. + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", "Support for multi-dim", DeprecationWarning + ) + return self._get_values(key) + + if not isinstance(self.index, MultiIndex): + raise ValueError("Can only tuple-index with a MultiIndex") + + # If key is contained, would have returned by now + indexer, new_index = self.index.get_loc_level(key) + return self._constructor(self._values[indexer], index=new_index).__finalize__( + self + ) + + def _get_values(self, indexer): + try: + return self._constructor( + self._data.get_slice(indexer), fastpath=True + ).__finalize__(self) + except ValueError: + # mpl compat if we look up e.g. ser[:, np.newaxis]; + # see tests.series.timeseries.test_mpl_compat_hack + return self._values[indexer] + + def _get_value(self, label, takeable: bool = False): + """ + Quickly retrieve single value at passed index label. + + Parameters + ---------- + label : object + takeable : interpret the index as indexers, default False + + Returns + ------- + scalar value + """ + if takeable: + return com.maybe_box_datetimelike(self._values[label]) + return self.index.get_value(self._values, label) + + def __setitem__(self, key, value): + key = com.apply_if_callable(key, self) + cacher_needs_updating = self._check_is_chained_assignment_possible() + + try: + self._set_with_engine(key, value) + except com.SettingWithCopyError: + raise + except (KeyError, ValueError): + values = self._values + if is_integer(key) and not self.index.inferred_type == "integer": + values[key] = value + elif key is Ellipsis: + self[:] = value + else: + self.loc[key] = value + + except TypeError as e: + if isinstance(key, tuple) and not isinstance(self.index, MultiIndex): + raise ValueError("Can only tuple-index with a MultiIndex") + + # python 3 type errors should be raised + if _is_unorderable_exception(e): + raise IndexError(key) + + if com.is_bool_indexer(key): + key = check_bool_indexer(self.index, key) + try: + self._where(~key, value, inplace=True) + return + except InvalidIndexError: + pass + + self._set_with(key, value) + + if cacher_needs_updating: + self._maybe_update_cacher() + + def _set_with_engine(self, key, value): + values = self._values + if is_extension_array_dtype(values.dtype): + # The cython indexing engine does not support ExtensionArrays. + values[self.index.get_loc(key)] = value + return + try: + self.index._engine.set_value(values, key, value) + return + except KeyError: + values[self.index.get_loc(key)] = value + return + + def _set_with(self, key, value): + # other: fancy integer or otherwise + if isinstance(key, slice): + indexer = self.index._convert_slice_indexer(key, kind="getitem") + return self._set_values(indexer, value) + + elif is_scalar(key) and not is_integer(key) and key not in self.index: + # GH#12862 adding an new key to the Series + # Note: have to exclude integers because that is ambiguously + # position-based + self.loc[key] = value + return + + else: + if isinstance(key, tuple): + try: + # TODO: no test cases that get here + self._set_values(key, value) + except Exception: + pass + + if is_scalar(key): + key = [key] + + if isinstance(key, Index): + key_type = key.inferred_type + key = key._values + else: + key_type = lib.infer_dtype(key, skipna=False) + + if key_type == "integer": + if self.index.inferred_type == "integer": + self._set_labels(key, value) + else: + return self._set_values(key, value) + elif key_type == "boolean": + self._set_values(key.astype(np.bool_), value) + else: + self._set_labels(key, value) + + def _set_labels(self, key, value): + key = com.asarray_tuplesafe(key) + indexer = self.index.get_indexer(key) + mask = indexer == -1 + if mask.any(): + raise ValueError(f"{key[mask]} not contained in the index") + self._set_values(indexer, value) + + def _set_values(self, key, value): + if isinstance(key, Series): + key = key._values + self._data = self._data.setitem(indexer=key, value=value) + self._maybe_update_cacher() + + def _set_value(self, label, value, takeable: bool = False): + """ + Quickly set single value at passed label. + + If label is not contained, a new object is created with the label + placed at the end of the result index. + + Parameters + ---------- + label : object + Partial indexing with MultiIndex not allowed. + value : object + Scalar value. + takeable : interpret the index as indexers, default False + + Returns + ------- + Series + If label is contained, will be reference to calling Series, + otherwise a new object. + """ + try: + if takeable: + self._values[label] = value + else: + self.index._engine.set_value(self._values, label, value) + except (KeyError, TypeError): + # set using a non-recursive method + self.loc[label] = value + + return self + + # ---------------------------------------------------------------------- + # Unsorted + + @property + def _is_mixed_type(self): + return False + + def repeat(self, repeats, axis=None): + """ + Repeat elements of a Series. + + Returns a new Series where each element of the current Series + is repeated consecutively a given number of times. + + Parameters + ---------- + repeats : int or array of ints + The number of repetitions for each element. This should be a + non-negative integer. Repeating 0 times will return an empty + Series. + axis : None + Must be ``None``. Has no effect but is accepted for compatibility + with numpy. + + Returns + ------- + Series + Newly created Series with repeated elements. + + See Also + -------- + Index.repeat : Equivalent function for Index. + numpy.repeat : Similar method for :class:`numpy.ndarray`. + + Examples + -------- + >>> s = pd.Series(['a', 'b', 'c']) + >>> s + 0 a + 1 b + 2 c + dtype: object + >>> s.repeat(2) + 0 a + 0 a + 1 b + 1 b + 2 c + 2 c + dtype: object + >>> s.repeat([1, 2, 3]) + 0 a + 1 b + 1 b + 2 c + 2 c + 2 c + dtype: object + """ + nv.validate_repeat(tuple(), dict(axis=axis)) + new_index = self.index.repeat(repeats) + new_values = self._values.repeat(repeats) + return self._constructor(new_values, index=new_index).__finalize__(self) + + def reset_index(self, level=None, drop=False, name=None, inplace=False): + """ + Generate a new DataFrame or Series with the index reset. + + This is useful when the index needs to be treated as a column, or + when the index is meaningless and needs to be reset to the default + before another operation. + + Parameters + ---------- + level : int, str, tuple, or list, default optional + For a Series with a MultiIndex, only remove the specified levels + from the index. Removes all levels by default. + drop : bool, default False + Just reset the index, without inserting it as a column in + the new DataFrame. + name : object, optional + The name to use for the column containing the original Series + values. Uses ``self.name`` by default. This argument is ignored + when `drop` is True. + inplace : bool, default False + Modify the Series in place (do not create a new object). + + Returns + ------- + Series or DataFrame + When `drop` is False (the default), a DataFrame is returned. + The newly created columns will come first in the DataFrame, + followed by the original Series values. + When `drop` is True, a `Series` is returned. + In either case, if ``inplace=True``, no value is returned. + + See Also + -------- + DataFrame.reset_index: Analogous function for DataFrame. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4], name='foo', + ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx')) + + Generate a DataFrame with default index. + + >>> s.reset_index() + idx foo + 0 a 1 + 1 b 2 + 2 c 3 + 3 d 4 + + To specify the name of the new column use `name`. + + >>> s.reset_index(name='values') + idx values + 0 a 1 + 1 b 2 + 2 c 3 + 3 d 4 + + To generate a new Series with the default set `drop` to True. + + >>> s.reset_index(drop=True) + 0 1 + 1 2 + 2 3 + 3 4 + Name: foo, dtype: int64 + + To update the Series in place, without generating a new one + set `inplace` to True. Note that it also requires ``drop=True``. + + >>> s.reset_index(inplace=True, drop=True) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + Name: foo, dtype: int64 + + The `level` parameter is interesting for Series with a multi-level + index. + + >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']), + ... np.array(['one', 'two', 'one', 'two'])] + >>> s2 = pd.Series( + ... range(4), name='foo', + ... index=pd.MultiIndex.from_arrays(arrays, + ... names=['a', 'b'])) + + To remove a specific level from the Index, use `level`. + + >>> s2.reset_index(level='a') + a foo + b + one bar 0 + two bar 1 + one baz 2 + two baz 3 + + If `level` is not set, all levels are removed from the Index. + + >>> s2.reset_index() + a b foo + 0 bar one 0 + 1 bar two 1 + 2 baz one 2 + 3 baz two 3 + """ + inplace = validate_bool_kwarg(inplace, "inplace") + if drop: + new_index = ibase.default_index(len(self)) + if level is not None: + if not isinstance(level, (tuple, list)): + level = [level] + level = [self.index._get_level_number(lev) for lev in level] + if len(level) < self.index.nlevels: + new_index = self.index.droplevel(level) + + if inplace: + self.index = new_index + # set name if it was passed, otherwise, keep the previous name + self.name = name or self.name + else: + return self._constructor( + self._values.copy(), index=new_index + ).__finalize__(self) + elif inplace: + raise TypeError( + "Cannot reset_index inplace on a Series to create a DataFrame" + ) + else: + df = self.to_frame(name) + return df.reset_index(level=level, drop=drop) + + # ---------------------------------------------------------------------- + # Rendering Methods + + def __repr__(self) -> str: + """ + Return a string representation for a particular Series. + """ + buf = StringIO("") + width, height = get_terminal_size() + max_rows = ( + height + if get_option("display.max_rows") == 0 + else get_option("display.max_rows") + ) + min_rows = ( + height + if get_option("display.max_rows") == 0 + else get_option("display.min_rows") + ) + show_dimensions = get_option("display.show_dimensions") + + self.to_string( + buf=buf, + name=self.name, + dtype=self.dtype, + min_rows=min_rows, + max_rows=max_rows, + length=show_dimensions, + ) + result = buf.getvalue() + + return result + + def to_string( + self, + buf=None, + na_rep="NaN", + float_format=None, + header=True, + index=True, + length=False, + dtype=False, + name=False, + max_rows=None, + min_rows=None, + ): + """ + Render a string representation of the Series. + + Parameters + ---------- + buf : StringIO-like, optional + Buffer to write to. + na_rep : str, optional + String representation of NaN to use, default 'NaN'. + float_format : one-parameter function, optional + Formatter function to apply to columns' elements if they are + floats, default None. + header : bool, default True + Add the Series header (index name). + index : bool, optional + Add index (row) labels, default True. + length : bool, default False + Add the Series length. + dtype : bool, default False + Add the Series dtype. + name : bool, default False + Add the Series name if not None. + max_rows : int, optional + Maximum number of rows to show before truncating. If None, show + all. + min_rows : int, optional + The number of rows to display in a truncated repr (when number + of rows is above `max_rows`). + + Returns + ------- + str or None + String representation of Series if ``buf=None``, otherwise None. + """ + + formatter = fmt.SeriesFormatter( + self, + name=name, + length=length, + header=header, + index=index, + dtype=dtype, + na_rep=na_rep, + float_format=float_format, + min_rows=min_rows, + max_rows=max_rows, + ) + result = formatter.to_string() + + # catch contract violations + if not isinstance(result, str): + raise AssertionError( + "result must be of type str, type" + f" of result is {repr(type(result).__name__)}" + ) + + if buf is None: + return result + else: + try: + buf.write(result) + except AttributeError: + with open(buf, "w") as f: + f.write(result) + + @Appender( + """ + Examples + -------- + >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") + >>> print(s.to_markdown()) + | | animal | + |---:|:---------| + | 0 | elk | + | 1 | pig | + | 2 | dog | + | 3 | quetzal | + """ + ) + @Substitution(klass="Series") + @Appender(generic._shared_docs["to_markdown"]) + def to_markdown( + self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs + ) -> Optional[str]: + return self.to_frame().to_markdown(buf, mode, **kwargs) + + # ---------------------------------------------------------------------- + + def items(self): + """ + Lazily iterate over (index, value) tuples. + + This method returns an iterable tuple (index, value). This is + convenient if you want to create a lazy iterator. + + Returns + ------- + iterable + Iterable of tuples containing the (index, value) pairs from a + Series. + + See Also + -------- + DataFrame.items : Iterate over (column name, Series) pairs. + DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs. + + Examples + -------- + >>> s = pd.Series(['A', 'B', 'C']) + >>> for index, value in s.items(): + ... print(f"Index : {index}, Value : {value}") + Index : 0, Value : A + Index : 1, Value : B + Index : 2, Value : C + """ + return zip(iter(self.index), iter(self)) + + @Appender(items.__doc__) + def iteritems(self): + return self.items() + + # ---------------------------------------------------------------------- + # Misc public methods + + def keys(self): + """ + Return alias for index. + + Returns + ------- + Index + Index of the Series. + """ + return self.index + + def to_dict(self, into=dict): + """ + Convert Series to {label -> value} dict or dict-like object. + + Parameters + ---------- + into : class, default dict + The collections.abc.Mapping subclass to use as the return + object. Can be the actual class or an empty + instance of the mapping type you want. If you want a + collections.defaultdict, you must pass it initialized. + + .. versionadded:: 0.21.0 + + Returns + ------- + collections.abc.Mapping + Key-value representation of Series. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.to_dict() + {0: 1, 1: 2, 2: 3, 3: 4} + >>> from collections import OrderedDict, defaultdict + >>> s.to_dict(OrderedDict) + OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) + >>> dd = defaultdict(list) + >>> s.to_dict(dd) + defaultdict(, {0: 1, 1: 2, 2: 3, 3: 4}) + """ + # GH16122 + into_c = com.standardize_mapping(into) + return into_c(self.items()) + + def to_frame(self, name=None): + """ + Convert Series to DataFrame. + + Parameters + ---------- + name : object, default None + The passed name should substitute for the series name (if it has + one). + + Returns + ------- + DataFrame + DataFrame representation of Series. + + Examples + -------- + >>> s = pd.Series(["a", "b", "c"], + ... name="vals") + >>> s.to_frame() + vals + 0 a + 1 b + 2 c + """ + if name is None: + df = self._constructor_expanddim(self) + else: + df = self._constructor_expanddim({name: self}) + + return df + + def _set_name(self, name, inplace=False): + """ + Set the Series name. + + Parameters + ---------- + name : str + inplace : bool + Whether to modify `self` directly or return a copy. + """ + inplace = validate_bool_kwarg(inplace, "inplace") + ser = self if inplace else self.copy() + ser.name = name + return ser + + @Appender( + """ +Examples +-------- +>>> ser = pd.Series([390., 350., 30., 20.], +... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed") +>>> ser +Falcon 390.0 +Falcon 350.0 +Parrot 30.0 +Parrot 20.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(["a", "b", "a", "b"]).mean() +a 210.0 +b 185.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(level=0).mean() +Falcon 370.0 +Parrot 25.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(ser > 100).mean() +Max Speed +False 25.0 +True 370.0 +Name: Max Speed, dtype: float64 + +**Grouping by Indexes** + +We can groupby different levels of a hierarchical index +using the `level` parameter: + +>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], +... ['Captive', 'Wild', 'Captive', 'Wild']] +>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) +>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed") +>>> ser +Animal Type +Falcon Captive 390.0 + Wild 350.0 +Parrot Captive 30.0 + Wild 20.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(level=0).mean() +Animal +Falcon 370.0 +Parrot 25.0 +Name: Max Speed, dtype: float64 +>>> ser.groupby(level="Type").mean() +Type +Captive 210.0 +Wild 185.0 +Name: Max Speed, dtype: float64 +""" + ) + @Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs) + def groupby( + self, + by=None, + axis=0, + level=None, + as_index: bool = True, + sort: bool = True, + group_keys: bool = True, + squeeze: bool = False, + observed: bool = False, + ) -> "groupby_generic.SeriesGroupBy": + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + axis = self._get_axis_number(axis) + + return groupby_generic.SeriesGroupBy( + obj=self, + keys=by, + axis=axis, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + squeeze=squeeze, + observed=observed, + ) + + # ---------------------------------------------------------------------- + # Statistics, overridden ndarray methods + + # TODO: integrate bottleneck + + def count(self, level=None): + """ + Return number of non-NA/null observations in the Series. + + Parameters + ---------- + level : int or level name, default None + If the axis is a MultiIndex (hierarchical), count along a + particular level, collapsing into a smaller Series. + + Returns + ------- + int or Series (if level specified) + Number of non-null values in the Series. + + Examples + -------- + >>> s = pd.Series([0.0, 1.0, np.nan]) + >>> s.count() + 2 + """ + if level is None: + return notna(self.array).sum() + + if isinstance(level, str): + level = self.index._get_level_number(level) + + lev = self.index.levels[level] + level_codes = np.array(self.index.codes[level], subok=False, copy=True) + + mask = level_codes == -1 + if mask.any(): + level_codes[mask] = cnt = len(lev) + lev = lev.insert(cnt, lev._na_value) + + obs = level_codes[notna(self.values)] + out = np.bincount(obs, minlength=len(lev) or None) + return self._constructor(out, index=lev, dtype="int64").__finalize__(self) + + def mode(self, dropna=True): + """ + Return the mode(s) of the dataset. + + Always returns Series even if only one value is returned. + + Parameters + ---------- + dropna : bool, default True + Don't consider counts of NaN/NaT. + + .. versionadded:: 0.24.0 + + Returns + ------- + Series + Modes of the Series in sorted order. + """ + # TODO: Add option for bins like value_counts() + return algorithms.mode(self, dropna=dropna) + + def unique(self): + """ + Return unique values of Series object. + + Uniques are returned in order of appearance. Hash table-based unique, + therefore does NOT sort. + + Returns + ------- + ndarray or ExtensionArray + The unique values returned as a NumPy array. See Notes. + + See Also + -------- + unique : Top-level unique method for any 1-d array-like object. + Index.unique : Return Index with unique values from an Index object. + + Notes + ----- + Returns the unique values as a NumPy array. In case of an + extension-array backed Series, a new + :class:`~api.extensions.ExtensionArray` of that type with just + the unique values is returned. This includes + + * Categorical + * Period + * Datetime with Timezone + * Interval + * Sparse + * IntegerNA + + See Examples section. + + Examples + -------- + >>> pd.Series([2, 1, 3, 3], name='A').unique() + array([2, 1, 3]) + + >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique() + array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]') + + >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern') + ... for _ in range(3)]).unique() + + ['2016-01-01 00:00:00-05:00'] + Length: 1, dtype: datetime64[ns, US/Eastern] + + An unordered Categorical will return categories in the order of + appearance. + + >>> pd.Series(pd.Categorical(list('baabc'))).unique() + [b, a, c] + Categories (3, object): [b, a, c] + + An ordered Categorical preserves the category ordering. + + >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'), + ... ordered=True)).unique() + [b, a, c] + Categories (3, object): [a < b < c] + """ + result = super().unique() + return result + + def drop_duplicates(self, keep="first", inplace=False): + """ + Return Series with duplicate values removed. + + Parameters + ---------- + keep : {'first', 'last', ``False``}, default 'first' + Method to handle dropping duplicates: + + - 'first' : Drop duplicates except for the first occurrence. + - 'last' : Drop duplicates except for the last occurrence. + - ``False`` : Drop all duplicates. + + inplace : bool, default ``False`` + If ``True``, performs operation inplace and returns None. + + Returns + ------- + Series + Series with duplicates dropped. + + See Also + -------- + Index.drop_duplicates : Equivalent method on Index. + DataFrame.drop_duplicates : Equivalent method on DataFrame. + Series.duplicated : Related method on Series, indicating duplicate + Series values. + + Examples + -------- + Generate a Series with duplicated entries. + + >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'], + ... name='animal') + >>> s + 0 lama + 1 cow + 2 lama + 3 beetle + 4 lama + 5 hippo + Name: animal, dtype: object + + With the 'keep' parameter, the selection behaviour of duplicated values + can be changed. The value 'first' keeps the first occurrence for each + set of duplicated entries. The default value of keep is 'first'. + + >>> s.drop_duplicates() + 0 lama + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: object + + The value 'last' for parameter 'keep' keeps the last occurrence for + each set of duplicated entries. + + >>> s.drop_duplicates(keep='last') + 1 cow + 3 beetle + 4 lama + 5 hippo + Name: animal, dtype: object + + The value ``False`` for parameter 'keep' discards all sets of + duplicated entries. Setting the value of 'inplace' to ``True`` performs + the operation inplace and returns ``None``. + + >>> s.drop_duplicates(keep=False, inplace=True) + >>> s + 1 cow + 3 beetle + 5 hippo + Name: animal, dtype: object + """ + return super().drop_duplicates(keep=keep, inplace=inplace) + + def duplicated(self, keep="first"): + """ + Indicate duplicate Series values. + + Duplicated values are indicated as ``True`` values in the resulting + Series. Either all duplicates, all except the first or all except the + last occurrence of duplicates can be indicated. + + Parameters + ---------- + keep : {'first', 'last', False}, default 'first' + Method to handle dropping duplicates: + + - 'first' : Mark duplicates as ``True`` except for the first + occurrence. + - 'last' : Mark duplicates as ``True`` except for the last + occurrence. + - ``False`` : Mark all duplicates as ``True``. + + Returns + ------- + Series + Series indicating whether each value has occurred in the + preceding values. + + See Also + -------- + Index.duplicated : Equivalent method on pandas.Index. + DataFrame.duplicated : Equivalent method on pandas.DataFrame. + Series.drop_duplicates : Remove duplicate values from Series. + + Examples + -------- + By default, for each set of duplicated values, the first occurrence is + set on False and all others on True: + + >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama']) + >>> animals.duplicated() + 0 False + 1 False + 2 True + 3 False + 4 True + dtype: bool + + which is equivalent to + + >>> animals.duplicated(keep='first') + 0 False + 1 False + 2 True + 3 False + 4 True + dtype: bool + + By using 'last', the last occurrence of each set of duplicated values + is set on False and all others on True: + + >>> animals.duplicated(keep='last') + 0 True + 1 False + 2 True + 3 False + 4 False + dtype: bool + + By setting keep on ``False``, all duplicates are True: + + >>> animals.duplicated(keep=False) + 0 True + 1 False + 2 True + 3 False + 4 True + dtype: bool + """ + return super().duplicated(keep=keep) + + def idxmin(self, axis=0, skipna=True, *args, **kwargs): + """ + Return the row label of the minimum value. + + If multiple values equal the minimum, the first row label with that + value is returned. + + Parameters + ---------- + axis : int, default 0 + For compatibility with DataFrame.idxmin. Redundant for application + on Series. + skipna : bool, default True + Exclude NA/null values. If the entire Series is NA, the result + will be NA. + *args, **kwargs + Additional arguments and keywords have no effect but might be + accepted for compatibility with NumPy. + + Returns + ------- + Index + Label of the minimum value. + + Raises + ------ + ValueError + If the Series is empty. + + See Also + -------- + numpy.argmin : Return indices of the minimum values + along the given axis. + DataFrame.idxmin : Return index of first occurrence of minimum + over requested axis. + Series.idxmax : Return index *label* of the first occurrence + of maximum of values. + + Notes + ----- + This method is the Series version of ``ndarray.argmin``. This method + returns the label of the minimum, while ``ndarray.argmin`` returns + the position. To get the position, use ``series.values.argmin()``. + + Examples + -------- + >>> s = pd.Series(data=[1, None, 4, 1], + ... index=['A', 'B', 'C', 'D']) + >>> s + A 1.0 + B NaN + C 4.0 + D 1.0 + dtype: float64 + + >>> s.idxmin() + 'A' + + If `skipna` is False and there is an NA value in the data, + the function returns ``nan``. + + >>> s.idxmin(skipna=False) + nan + """ + skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) + i = nanops.nanargmin(com.values_from_object(self), skipna=skipna) + if i == -1: + return np.nan + return self.index[i] + + def idxmax(self, axis=0, skipna=True, *args, **kwargs): + """ + Return the row label of the maximum value. + + If multiple values equal the maximum, the first row label with that + value is returned. + + Parameters + ---------- + axis : int, default 0 + For compatibility with DataFrame.idxmax. Redundant for application + on Series. + skipna : bool, default True + Exclude NA/null values. If the entire Series is NA, the result + will be NA. + *args, **kwargs + Additional arguments and keywords have no effect but might be + accepted for compatibility with NumPy. + + Returns + ------- + Index + Label of the maximum value. + + Raises + ------ + ValueError + If the Series is empty. + + See Also + -------- + numpy.argmax : Return indices of the maximum values + along the given axis. + DataFrame.idxmax : Return index of first occurrence of maximum + over requested axis. + Series.idxmin : Return index *label* of the first occurrence + of minimum of values. + + Notes + ----- + This method is the Series version of ``ndarray.argmax``. This method + returns the label of the maximum, while ``ndarray.argmax`` returns + the position. To get the position, use ``series.values.argmax()``. + + Examples + -------- + >>> s = pd.Series(data=[1, None, 4, 3, 4], + ... index=['A', 'B', 'C', 'D', 'E']) + >>> s + A 1.0 + B NaN + C 4.0 + D 3.0 + E 4.0 + dtype: float64 + + >>> s.idxmax() + 'C' + + If `skipna` is False and there is an NA value in the data, + the function returns ``nan``. + + >>> s.idxmax(skipna=False) + nan + """ + skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs) + i = nanops.nanargmax(com.values_from_object(self), skipna=skipna) + if i == -1: + return np.nan + return self.index[i] + + def round(self, decimals=0, *args, **kwargs): + """ + Round each value in a Series to the given number of decimals. + + Parameters + ---------- + decimals : int, default 0 + Number of decimal places to round to. If decimals is negative, + it specifies the number of positions to the left of the decimal point. + + Returns + ------- + Series + Rounded values of the Series. + + See Also + -------- + numpy.around : Round values of an np.array. + DataFrame.round : Round values of a DataFrame. + + Examples + -------- + >>> s = pd.Series([0.1, 1.3, 2.7]) + >>> s.round() + 0 0.0 + 1 1.0 + 2 3.0 + dtype: float64 + """ + nv.validate_round(args, kwargs) + result = com.values_from_object(self).round(decimals) + result = self._constructor(result, index=self.index).__finalize__(self) + + return result + + def quantile(self, q=0.5, interpolation="linear"): + """ + Return value at the given quantile. + + Parameters + ---------- + q : float or array-like, default 0.5 (50% quantile) + The quantile(s) to compute, which can lie in range: 0 <= q <= 1. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + + Returns + ------- + float or Series + If ``q`` is an array, a Series will be returned where the + index is ``q`` and the values are the quantiles, otherwise + a float will be returned. + + See Also + -------- + core.window.Rolling.quantile + numpy.percentile + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.quantile(.5) + 2.5 + >>> s.quantile([.25, .5, .75]) + 0.25 1.75 + 0.50 2.50 + 0.75 3.25 + dtype: float64 + """ + + validate_percentile(q) + + # We dispatch to DataFrame so that core.internals only has to worry + # about 2D cases. + df = self.to_frame() + + result = df.quantile(q=q, interpolation=interpolation, numeric_only=False) + if result.ndim == 2: + result = result.iloc[:, 0] + + if is_list_like(q): + result.name = self.name + return self._constructor(result, index=Float64Index(q), name=self.name) + else: + # scalar + return result.iloc[0] + + def corr(self, other, method="pearson", min_periods=None): + """ + Compute correlation with `other` Series, excluding missing values. + + Parameters + ---------- + other : Series + Series with which to compute the correlation. + method : {'pearson', 'kendall', 'spearman'} or callable + Method used to compute correlation: + + - pearson : Standard correlation coefficient + - kendall : Kendall Tau correlation coefficient + - spearman : Spearman rank correlation + - callable: Callable with input two 1d ndarrays and returning a float. + + .. versionadded:: 0.24.0 + Note that the returned matrix from corr will have 1 along the + diagonals and will be symmetric regardless of the callable's + behavior. + min_periods : int, optional + Minimum number of observations needed to have a valid result. + + Returns + ------- + float + Correlation with other. + + Examples + -------- + >>> def histogram_intersection(a, b): + ... v = np.minimum(a, b).sum().round(decimals=1) + ... return v + >>> s1 = pd.Series([.2, .0, .6, .2]) + >>> s2 = pd.Series([.3, .6, .0, .1]) + >>> s1.corr(s2, method=histogram_intersection) + 0.3 + """ + this, other = self.align(other, join="inner", copy=False) + if len(this) == 0: + return np.nan + + if method in ["pearson", "spearman", "kendall"] or callable(method): + return nanops.nancorr( + this.values, other.values, method=method, min_periods=min_periods + ) + + raise ValueError( + "method must be either 'pearson', " + "'spearman', 'kendall', or a callable, " + f"'{method}' was supplied" + ) + + def cov(self, other, min_periods=None): + """ + Compute covariance with Series, excluding missing values. + + Parameters + ---------- + other : Series + Series with which to compute the covariance. + min_periods : int, optional + Minimum number of observations needed to have a valid result. + + Returns + ------- + float + Covariance between Series and other normalized by N-1 + (unbiased estimator). + + Examples + -------- + >>> s1 = pd.Series([0.90010907, 0.13484424, 0.62036035]) + >>> s2 = pd.Series([0.12528585, 0.26962463, 0.51111198]) + >>> s1.cov(s2) + -0.01685762652715874 + """ + this, other = self.align(other, join="inner", copy=False) + if len(this) == 0: + return np.nan + return nanops.nancov(this.values, other.values, min_periods=min_periods) + + def diff(self, periods=1): + """ + First discrete difference of element. + + Calculates the difference of a Series element compared with another + element in the Series (default is element in previous row). + + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating difference, accepts negative + values. + + Returns + ------- + Series + First differences of the Series. + + See Also + -------- + Series.pct_change: Percent change over given number of periods. + Series.shift: Shift index by desired number of periods with an + optional time freq. + DataFrame.diff: First discrete difference of object. + + Notes + ----- + For boolean dtypes, this uses :meth:`operator.xor` rather than + :meth:`operator.sub`. + + Examples + -------- + Difference with previous row + + >>> s = pd.Series([1, 1, 2, 3, 5, 8]) + >>> s.diff() + 0 NaN + 1 0.0 + 2 1.0 + 3 1.0 + 4 2.0 + 5 3.0 + dtype: float64 + + Difference with 3rd previous row + + >>> s.diff(periods=3) + 0 NaN + 1 NaN + 2 NaN + 3 2.0 + 4 4.0 + 5 6.0 + dtype: float64 + + Difference with following row + + >>> s.diff(periods=-1) + 0 0.0 + 1 -1.0 + 2 -1.0 + 3 -2.0 + 4 -3.0 + 5 NaN + dtype: float64 + """ + result = algorithms.diff(self.array, periods) + return self._constructor(result, index=self.index).__finalize__(self) + + def autocorr(self, lag=1): + """ + Compute the lag-N autocorrelation. + + This method computes the Pearson correlation between + the Series and its shifted self. + + Parameters + ---------- + lag : int, default 1 + Number of lags to apply before performing autocorrelation. + + Returns + ------- + float + The Pearson correlation between self and self.shift(lag). + + See Also + -------- + Series.corr : Compute the correlation between two Series. + Series.shift : Shift index by desired number of periods. + DataFrame.corr : Compute pairwise correlation of columns. + DataFrame.corrwith : Compute pairwise correlation between rows or + columns of two DataFrame objects. + + Notes + ----- + If the Pearson correlation is not well defined return 'NaN'. + + Examples + -------- + >>> s = pd.Series([0.25, 0.5, 0.2, -0.05]) + >>> s.autocorr() # doctest: +ELLIPSIS + 0.10355... + >>> s.autocorr(lag=2) # doctest: +ELLIPSIS + -0.99999... + + If the Pearson correlation is not well defined, then 'NaN' is returned. + + >>> s = pd.Series([1, 0, 0, 0]) + >>> s.autocorr() + nan + """ + return self.corr(self.shift(lag)) + + def dot(self, other): + """ + Compute the dot product between the Series and the columns of other. + + This method computes the dot product between the Series and another + one, or the Series and each columns of a DataFrame, or the Series and + each columns of an array. + + It can also be called using `self @ other` in Python >= 3.5. + + Parameters + ---------- + other : Series, DataFrame or array-like + The other object to compute the dot product with its columns. + + Returns + ------- + scalar, Series or numpy.ndarray + Return the dot product of the Series and other if other is a + Series, the Series of the dot product of Series and each rows of + other if other is a DataFrame or a numpy.ndarray between the Series + and each columns of the numpy array. + + See Also + -------- + DataFrame.dot: Compute the matrix product with the DataFrame. + Series.mul: Multiplication of series and other, element-wise. + + Notes + ----- + The Series and other has to share the same index if other is a Series + or a DataFrame. + + Examples + -------- + >>> s = pd.Series([0, 1, 2, 3]) + >>> other = pd.Series([-1, 2, -3, 4]) + >>> s.dot(other) + 8 + >>> s @ other + 8 + >>> df = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]]) + >>> s.dot(df) + 0 24 + 1 14 + dtype: int64 + >>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]]) + >>> s.dot(arr) + array([24, 14]) + """ + if isinstance(other, (Series, ABCDataFrame)): + common = self.index.union(other.index) + if len(common) > len(self.index) or len(common) > len(other.index): + raise ValueError("matrices are not aligned") + + left = self.reindex(index=common, copy=False) + right = other.reindex(index=common, copy=False) + lvals = left.values + rvals = right.values + else: + lvals = self.values + rvals = np.asarray(other) + if lvals.shape[0] != rvals.shape[0]: + raise Exception( + f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}" + ) + + if isinstance(other, ABCDataFrame): + return self._constructor( + np.dot(lvals, rvals), index=other.columns + ).__finalize__(self) + elif isinstance(other, Series): + return np.dot(lvals, rvals) + elif isinstance(rvals, np.ndarray): + return np.dot(lvals, rvals) + else: # pragma: no cover + raise TypeError(f"unsupported type: {type(other)}") + + def __matmul__(self, other): + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ + return self.dot(other) + + def __rmatmul__(self, other): + """ + Matrix multiplication using binary `@` operator in Python>=3.5. + """ + return self.dot(np.transpose(other)) + + @Substitution(klass="Series") + @Appender(base._shared_docs["searchsorted"]) + def searchsorted(self, value, side="left", sorter=None): + return algorithms.searchsorted(self._values, value, side=side, sorter=sorter) + + # ------------------------------------------------------------------- + # Combination + + def append(self, to_append, ignore_index=False, verify_integrity=False): + """ + Concatenate two or more Series. + + Parameters + ---------- + to_append : Series or list/tuple of Series + Series to append with self. + ignore_index : bool, default False + If True, do not use the index labels. + verify_integrity : bool, default False + If True, raise Exception on creating index with duplicates. + + Returns + ------- + Series + Concatenated Series. + + See Also + -------- + concat : General function to concatenate DataFrame or Series objects. + + Notes + ----- + Iteratively appending to a Series can be more computationally intensive + than a single concatenate. A better solution is to append values to a + list and then concatenate the list with the original Series all at + once. + + Examples + -------- + >>> s1 = pd.Series([1, 2, 3]) + >>> s2 = pd.Series([4, 5, 6]) + >>> s3 = pd.Series([4, 5, 6], index=[3, 4, 5]) + >>> s1.append(s2) + 0 1 + 1 2 + 2 3 + 0 4 + 1 5 + 2 6 + dtype: int64 + + >>> s1.append(s3) + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + 5 6 + dtype: int64 + + With `ignore_index` set to True: + + >>> s1.append(s2, ignore_index=True) + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + 5 6 + dtype: int64 + + With `verify_integrity` set to True: + + >>> s1.append(s2, verify_integrity=True) + Traceback (most recent call last): + ... + ValueError: Indexes have overlapping values: [0, 1, 2] + """ + from pandas.core.reshape.concat import concat + + if isinstance(to_append, (list, tuple)): + to_concat = [self] + to_concat.extend(to_append) + else: + to_concat = [self, to_append] + return concat( + to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity + ) + + def _binop(self, other, func, level=None, fill_value=None): + """ + Perform generic binary operation with optional fill value. + + Parameters + ---------- + other : Series + func : binary operator + fill_value : float or object + Value to substitute for NA/null values. If both Series are NA in a + location, the result will be NA regardless of the passed fill value. + level : int or level name, default None + Broadcast across a level, matching Index values on the + passed MultiIndex level. + + Returns + ------- + Series + """ + + if not isinstance(other, Series): + raise AssertionError("Other operand must be Series") + + new_index = self.index + this = self + + if not self.index.equals(other.index): + this, other = self.align(other, level=level, join="outer", copy=False) + new_index = this.index + + this_vals, other_vals = ops.fill_binop(this.values, other.values, fill_value) + + with np.errstate(all="ignore"): + result = func(this_vals, other_vals) + + name = ops.get_op_result_name(self, other) + ret = ops._construct_result(self, result, new_index, name) + return ret + + def combine(self, other, func, fill_value=None): + """ + Combine the Series with a Series or scalar according to `func`. + + Combine the Series and `other` using `func` to perform elementwise + selection for combined Series. + `fill_value` is assumed when value is missing at some index + from one of the two objects being combined. + + Parameters + ---------- + other : Series or scalar + The value(s) to be combined with the `Series`. + func : function + Function that takes two scalars as inputs and returns an element. + fill_value : scalar, optional + The value to assume when an index is missing from + one Series or the other. The default specifies to use the + appropriate NaN value for the underlying dtype of the Series. + + Returns + ------- + Series + The result of combining the Series with the other object. + + See Also + -------- + Series.combine_first : Combine Series values, choosing the calling + Series' values first. + + Examples + -------- + Consider 2 Datasets ``s1`` and ``s2`` containing + highest clocked speeds of different birds. + + >>> s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0}) + >>> s1 + falcon 330.0 + eagle 160.0 + dtype: float64 + >>> s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0}) + >>> s2 + falcon 345.0 + eagle 200.0 + duck 30.0 + dtype: float64 + + Now, to combine the two datasets and view the highest speeds + of the birds across the two datasets + + >>> s1.combine(s2, max) + duck NaN + eagle 200.0 + falcon 345.0 + dtype: float64 + + In the previous example, the resulting value for duck is missing, + because the maximum of a NaN and a float is a NaN. + So, in the example, we set ``fill_value=0``, + so the maximum value returned will be the value from some dataset. + + >>> s1.combine(s2, max, fill_value=0) + duck 30.0 + eagle 200.0 + falcon 345.0 + dtype: float64 + """ + if fill_value is None: + fill_value = na_value_for_dtype(self.dtype, compat=False) + + if isinstance(other, Series): + # If other is a Series, result is based on union of Series, + # so do this element by element + new_index = self.index.union(other.index) + new_name = ops.get_op_result_name(self, other) + new_values = [] + for idx in new_index: + lv = self.get(idx, fill_value) + rv = other.get(idx, fill_value) + with np.errstate(all="ignore"): + new_values.append(func(lv, rv)) + else: + # Assume that other is a scalar, so apply the function for + # each element in the Series + new_index = self.index + with np.errstate(all="ignore"): + new_values = [func(lv, other) for lv in self._values] + new_name = self.name + + if is_categorical_dtype(self.values): + pass + elif is_extension_array_dtype(self.values): + # The function can return something of any type, so check + # if the type is compatible with the calling EA. + new_values = try_cast_to_ea(self._values, new_values) + return self._constructor(new_values, index=new_index, name=new_name) + + def combine_first(self, other): + """ + Combine Series values, choosing the calling Series's values first. + + Parameters + ---------- + other : Series + The value(s) to be combined with the `Series`. + + Returns + ------- + Series + The result of combining the Series with the other object. + + See Also + -------- + Series.combine : Perform elementwise operation on two Series + using a given function. + + Notes + ----- + Result index will be the union of the two indexes. + + Examples + -------- + >>> s1 = pd.Series([1, np.nan]) + >>> s2 = pd.Series([3, 4]) + >>> s1.combine_first(s2) + 0 1.0 + 1 4.0 + dtype: float64 + """ + new_index = self.index.union(other.index) + this = self.reindex(new_index, copy=False) + other = other.reindex(new_index, copy=False) + if this.dtype.kind == "M" and other.dtype.kind != "M": + other = to_datetime(other) + + return this.where(notna(this), other) + + def update(self, other): + """ + Modify Series in place using non-NA values from passed + Series. Aligns on index. + + Parameters + ---------- + other : Series + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.update(pd.Series([4, 5, 6])) + >>> s + 0 4 + 1 5 + 2 6 + dtype: int64 + + >>> s = pd.Series(['a', 'b', 'c']) + >>> s.update(pd.Series(['d', 'e'], index=[0, 2])) + >>> s + 0 d + 1 b + 2 e + dtype: object + + >>> s = pd.Series([1, 2, 3]) + >>> s.update(pd.Series([4, 5, 6, 7, 8])) + >>> s + 0 4 + 1 5 + 2 6 + dtype: int64 + + If ``other`` contains NaNs the corresponding values are not updated + in the original Series. + + >>> s = pd.Series([1, 2, 3]) + >>> s.update(pd.Series([4, np.nan, 6])) + >>> s + 0 4 + 1 2 + 2 6 + dtype: int64 + """ + other = other.reindex_like(self) + mask = notna(other) + + self._data = self._data.putmask(mask=mask, new=other, inplace=True) + self._maybe_update_cacher() + + # ---------------------------------------------------------------------- + # Reindexing, sorting + + def sort_values( + self, + axis=0, + ascending=True, + inplace=False, + kind="quicksort", + na_position="last", + ignore_index=False, + ): + """ + Sort by the values. + + Sort a Series in ascending or descending order by some + criterion. + + Parameters + ---------- + axis : {0 or 'index'}, default 0 + Axis to direct sorting. The value 'index' is accepted for + compatibility with DataFrame.sort_values. + ascending : bool, default True + If True, sort values in ascending order, otherwise descending. + inplace : bool, default False + If True, perform operation in-place. + kind : {'quicksort', 'mergesort' or 'heapsort'}, default 'quicksort' + Choice of sorting algorithm. See also :func:`numpy.sort` for more + information. 'mergesort' is the only stable algorithm. + na_position : {'first' or 'last'}, default 'last' + Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at + the end. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + Returns + ------- + Series + Series ordered by values. + + See Also + -------- + Series.sort_index : Sort by the Series indices. + DataFrame.sort_values : Sort DataFrame by the values along either axis. + DataFrame.sort_index : Sort DataFrame by indices. + + Examples + -------- + >>> s = pd.Series([np.nan, 1, 3, 10, 5]) + >>> s + 0 NaN + 1 1.0 + 2 3.0 + 3 10.0 + 4 5.0 + dtype: float64 + + Sort values ascending order (default behaviour) + + >>> s.sort_values(ascending=True) + 1 1.0 + 2 3.0 + 4 5.0 + 3 10.0 + 0 NaN + dtype: float64 + + Sort values descending order + + >>> s.sort_values(ascending=False) + 3 10.0 + 4 5.0 + 2 3.0 + 1 1.0 + 0 NaN + dtype: float64 + + Sort values inplace + + >>> s.sort_values(ascending=False, inplace=True) + >>> s + 3 10.0 + 4 5.0 + 2 3.0 + 1 1.0 + 0 NaN + dtype: float64 + + Sort values putting NAs first + + >>> s.sort_values(na_position='first') + 0 NaN + 1 1.0 + 2 3.0 + 4 5.0 + 3 10.0 + dtype: float64 + + Sort a series of strings + + >>> s = pd.Series(['z', 'b', 'd', 'a', 'c']) + >>> s + 0 z + 1 b + 2 d + 3 a + 4 c + dtype: object + + >>> s.sort_values() + 3 a + 1 b + 4 c + 2 d + 0 z + dtype: object + """ + inplace = validate_bool_kwarg(inplace, "inplace") + # Validate the axis parameter + self._get_axis_number(axis) + + # GH 5856/5853 + if inplace and self._is_cached: + raise ValueError( + "This Series is a view of some other array, to " + "sort in-place you must create a copy" + ) + + def _try_kind_sort(arr): + # easier to ask forgiveness than permission + try: + # if kind==mergesort, it can fail for object dtype + return arr.argsort(kind=kind) + except TypeError: + # stable sort not available for object dtype + # uses the argsort default quicksort + return arr.argsort(kind="quicksort") + + arr = self._values + sorted_index = np.empty(len(self), dtype=np.int32) + + bad = isna(arr) + + good = ~bad + idx = ibase.default_index(len(self)) + + argsorted = _try_kind_sort(arr[good]) + + if is_list_like(ascending): + if len(ascending) != 1: + raise ValueError( + f"Length of ascending ({len(ascending)}) must be 1 for Series" + ) + ascending = ascending[0] + + if not is_bool(ascending): + raise ValueError("ascending must be boolean") + + if not ascending: + argsorted = argsorted[::-1] + + if na_position == "last": + n = good.sum() + sorted_index[:n] = idx[good][argsorted] + sorted_index[n:] = idx[bad] + elif na_position == "first": + n = bad.sum() + sorted_index[n:] = idx[good][argsorted] + sorted_index[:n] = idx[bad] + else: + raise ValueError(f"invalid na_position: {na_position}") + + result = self._constructor(arr[sorted_index], index=self.index[sorted_index]) + + if ignore_index: + result.index = ibase.default_index(len(sorted_index)) + + if inplace: + self._update_inplace(result) + else: + return result.__finalize__(self) + + def sort_index( + self, + axis=0, + level=None, + ascending=True, + inplace=False, + kind="quicksort", + na_position="last", + sort_remaining=True, + ignore_index: bool = False, + ): + """ + Sort Series by index labels. + + Returns a new Series sorted by label if `inplace` argument is + ``False``, otherwise updates the original series and returns None. + + Parameters + ---------- + axis : int, default 0 + Axis to direct sorting. This can only be 0 for Series. + level : int, optional + If not None, sort on values in specified index level(s). + ascending : bool, default true + Sort ascending vs. descending. + inplace : bool, default False + If True, perform operation in-place. + kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' + Choice of sorting algorithm. See also :func:`numpy.sort` for more + information. 'mergesort' is the only stable algorithm. For + DataFrames, this option is only applied when sorting on a single + column or label. + na_position : {'first', 'last'}, default 'last' + If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end. + Not implemented for MultiIndex. + sort_remaining : bool, default True + If True and sorting by level and index is multilevel, sort by other + levels too (in order) after sorting by specified level. + ignore_index : bool, default False + If True, the resulting axis will be labeled 0, 1, …, n - 1. + + .. versionadded:: 1.0.0 + + Returns + ------- + Series + The original Series sorted by the labels. + + See Also + -------- + DataFrame.sort_index: Sort DataFrame by the index. + DataFrame.sort_values: Sort DataFrame by the value. + Series.sort_values : Sort Series by the value. + + Examples + -------- + >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4]) + >>> s.sort_index() + 1 c + 2 b + 3 a + 4 d + dtype: object + + Sort Descending + + >>> s.sort_index(ascending=False) + 4 d + 3 a + 2 b + 1 c + dtype: object + + Sort Inplace + + >>> s.sort_index(inplace=True) + >>> s + 1 c + 2 b + 3 a + 4 d + dtype: object + + By default NaNs are put at the end, but use `na_position` to place + them at the beginning + + >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan]) + >>> s.sort_index(na_position='first') + NaN d + 1.0 c + 2.0 b + 3.0 a + dtype: object + + Specify index level to sort + + >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo', + ... 'baz', 'baz', 'bar', 'bar']), + ... np.array(['two', 'one', 'two', 'one', + ... 'two', 'one', 'two', 'one'])] + >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) + >>> s.sort_index(level=1) + bar one 8 + baz one 6 + foo one 4 + qux one 2 + bar two 7 + baz two 5 + foo two 3 + qux two 1 + dtype: int64 + + Does not sort by remaining levels when sorting by levels + + >>> s.sort_index(level=1, sort_remaining=False) + qux one 2 + foo one 4 + baz one 6 + bar one 8 + qux two 1 + foo two 3 + baz two 5 + bar two 7 + dtype: int64 + """ + # TODO: this can be combined with DataFrame.sort_index impl as + # almost identical + inplace = validate_bool_kwarg(inplace, "inplace") + # Validate the axis parameter + self._get_axis_number(axis) + index = self.index + + if level is not None: + new_index, indexer = index.sortlevel( + level, ascending=ascending, sort_remaining=sort_remaining + ) + elif isinstance(index, MultiIndex): + from pandas.core.sorting import lexsort_indexer + + labels = index._sort_levels_monotonic() + indexer = lexsort_indexer( + labels._get_codes_for_sorting(), + orders=ascending, + na_position=na_position, + ) + else: + from pandas.core.sorting import nargsort + + # Check monotonic-ness before sort an index + # GH11080 + if (ascending and index.is_monotonic_increasing) or ( + not ascending and index.is_monotonic_decreasing + ): + if inplace: + return + else: + return self.copy() + + indexer = nargsort( + index, kind=kind, ascending=ascending, na_position=na_position + ) + + indexer = ensure_platform_int(indexer) + new_index = index.take(indexer) + new_index = new_index._sort_levels_monotonic() + + new_values = self._values.take(indexer) + result = self._constructor(new_values, index=new_index) + + if ignore_index: + result.index = ibase.default_index(len(result)) + + if inplace: + self._update_inplace(result) + else: + return result.__finalize__(self) + + def argsort(self, axis=0, kind="quicksort", order=None): + """ + Override ndarray.argsort. Argsorts the value, omitting NA/null values, + and places the result in the same locations as the non-NA values. + + Parameters + ---------- + axis : {0 or "index"} + Has no effect but is accepted for compatibility with numpy. + kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort' + Choice of sorting algorithm. See np.sort for more + information. 'mergesort' is the only stable algorithm. + order : None + Has no effect but is accepted for compatibility with numpy. + + Returns + ------- + Series + Positions of values within the sort order with -1 indicating + nan values. + + See Also + -------- + numpy.ndarray.argsort + """ + values = self._values + mask = isna(values) + + if mask.any(): + result = Series(-1, index=self.index, name=self.name, dtype="int64") + notmask = ~mask + result[notmask] = np.argsort(values[notmask], kind=kind) + return self._constructor(result, index=self.index).__finalize__(self) + else: + return self._constructor( + np.argsort(values, kind=kind), index=self.index, dtype="int64" + ).__finalize__(self) + + def nlargest(self, n=5, keep="first"): + """ + Return the largest `n` elements. + + Parameters + ---------- + n : int, default 5 + Return this many descending sorted values. + keep : {'first', 'last', 'all'}, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + + - ``first`` : return the first `n` occurrences in order + of appearance. + - ``last`` : return the last `n` occurrences in reverse + order of appearance. + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. + + Returns + ------- + Series + The `n` largest values in the Series, sorted in decreasing order. + + See Also + -------- + Series.nsmallest: Get the `n` smallest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. + + Notes + ----- + Faster than ``.sort_values(ascending=False).head(n)`` for small `n` + relative to the size of the ``Series`` object. + + Examples + -------- + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Malta": 434000, "Maldives": 434000, + ... "Brunei": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Monserat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Monserat 5200 + dtype: int64 + + The `n` largest elements where ``n=5`` by default. + + >>> s.nlargest() + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + + The `n` largest elements where ``n=3``. Default `keep` value is 'first' + so Malta will be kept. + + >>> s.nlargest(3) + France 65000000 + Italy 59000000 + Malta 434000 + dtype: int64 + + The `n` largest elements where ``n=3`` and keeping the last duplicates. + Brunei will be kept since it is the last with value 434000 based on + the index order. + + >>> s.nlargest(3, keep='last') + France 65000000 + Italy 59000000 + Brunei 434000 + dtype: int64 + + The `n` largest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has five elements due to the three duplicates. + + >>> s.nlargest(3, keep='all') + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + """ + return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() + + def nsmallest(self, n=5, keep="first"): + """ + Return the smallest `n` elements. + + Parameters + ---------- + n : int, default 5 + Return this many ascending sorted values. + keep : {'first', 'last', 'all'}, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + + - ``first`` : return the first `n` occurrences in order + of appearance. + - ``last`` : return the last `n` occurrences in reverse + order of appearance. + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. + + Returns + ------- + Series + The `n` smallest values in the Series, sorted in increasing order. + + See Also + -------- + Series.nlargest: Get the `n` largest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. + + Notes + ----- + Faster than ``.sort_values().head(n)`` for small `n` relative to + the size of the ``Series`` object. + + Examples + -------- + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Brunei": 434000, "Malta": 434000, + ... "Maldives": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Monserat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Brunei 434000 + Malta 434000 + Maldives 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Monserat 5200 + dtype: int64 + + The `n` smallest elements where ``n=5`` by default. + + >>> s.nsmallest() + Monserat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Iceland 337000 + dtype: int64 + + The `n` smallest elements where ``n=3``. Default `keep` value is + 'first' so Nauru and Tuvalu will be kept. + + >>> s.nsmallest(3) + Monserat 5200 + Nauru 11300 + Tuvalu 11300 + dtype: int64 + + The `n` smallest elements where ``n=3`` and keeping the last + duplicates. Anguilla and Tuvalu will be kept since they are the last + with value 11300 based on the index order. + + >>> s.nsmallest(3, keep='last') + Monserat 5200 + Anguilla 11300 + Tuvalu 11300 + dtype: int64 + + The `n` smallest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has four elements due to the three duplicates. + + >>> s.nsmallest(3, keep='all') + Monserat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + dtype: int64 + """ + return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest() + + def swaplevel(self, i=-2, j=-1, copy=True): + """ + Swap levels i and j in a :class:`MultiIndex`. + + Default is to swap the two innermost levels of the index. + + Parameters + ---------- + i, j : int, str + Level of the indices to be swapped. Can pass level name as string. + copy : bool, default True + Whether to copy underlying data. + + Returns + ------- + Series + Series with levels swapped in MultiIndex. + """ + new_index = self.index.swaplevel(i, j) + return self._constructor(self._values, index=new_index, copy=copy).__finalize__( + self + ) + + def reorder_levels(self, order): + """ + Rearrange index levels using input order. + + May not drop or duplicate levels. + + Parameters + ---------- + order : list of int representing new level order + Reference level by number or key. + + Returns + ------- + type of caller (new object) + """ + if not isinstance(self.index, MultiIndex): # pragma: no cover + raise Exception("Can only reorder levels on a hierarchical axis.") + + result = self.copy() + result.index = result.index.reorder_levels(order) + return result + + def explode(self) -> "Series": + """ + Transform each element of a list-like to a row, replicating the + index values. + + .. versionadded:: 0.25.0 + + Returns + ------- + Series + Exploded lists to rows; index will be duplicated for these rows. + + See Also + -------- + Series.str.split : Split string values on specified separator. + Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex + to produce DataFrame. + DataFrame.melt : Unpivot a DataFrame from wide format to long format. + DataFrame.explode : Explode a DataFrame from list-like + columns to long format. + + Notes + ----- + This routine will explode list-likes including lists, tuples, + Series, and np.ndarray. The result dtype of the subset rows will + be object. Scalars will be returned unchanged. Empty list-likes will + result in a np.nan for that row. + + Examples + -------- + >>> s = pd.Series([[1, 2, 3], 'foo', [], [3, 4]]) + >>> s + 0 [1, 2, 3] + 1 foo + 2 [] + 3 [3, 4] + dtype: object + + >>> s.explode() + 0 1 + 0 2 + 0 3 + 1 foo + 2 NaN + 3 3 + 3 4 + dtype: object + """ + if not len(self) or not is_object_dtype(self): + return self.copy() + + values, counts = reshape.explode(np.asarray(self.array)) + + result = Series(values, index=self.index.repeat(counts), name=self.name) + return result + + def unstack(self, level=-1, fill_value=None): + """ + Unstack, a.k.a. pivot, Series with MultiIndex to produce DataFrame. + The level involved will automatically get sorted. + + Parameters + ---------- + level : int, str, or list of these, default last level + Level(s) to unstack, can pass level name. + fill_value : scalar value, default None + Value to use when replacing NaN values. + + Returns + ------- + DataFrame + Unstacked Series. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4], + ... index=pd.MultiIndex.from_product([['one', 'two'], + ... ['a', 'b']])) + >>> s + one a 1 + b 2 + two a 3 + b 4 + dtype: int64 + + >>> s.unstack(level=-1) + a b + one 1 2 + two 3 4 + + >>> s.unstack(level=0) + one two + a 1 3 + b 2 4 + """ + from pandas.core.reshape.reshape import unstack + + return unstack(self, level, fill_value) + + # ---------------------------------------------------------------------- + # function application + + def map(self, arg, na_action=None): + """ + Map values of Series according to input correspondence. + + Used for substituting each value in a Series with another value, + that may be derived from a function, a ``dict`` or + a :class:`Series`. + + Parameters + ---------- + arg : function, collections.abc.Mapping subclass or Series + Mapping correspondence. + na_action : {None, 'ignore'}, default None + If 'ignore', propagate NaN values, without passing them to the + mapping correspondence. + + Returns + ------- + Series + Same index as caller. + + See Also + -------- + Series.apply : For applying more complex functions on a Series. + DataFrame.apply : Apply a function row-/column-wise. + DataFrame.applymap : Apply a function elementwise on a whole DataFrame. + + Notes + ----- + When ``arg`` is a dictionary, values in Series that are not in the + dictionary (as keys) are converted to ``NaN``. However, if the + dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e. + provides a method for default values), then this default is used + rather than ``NaN``. + + Examples + -------- + >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit']) + >>> s + 0 cat + 1 dog + 2 NaN + 3 rabbit + dtype: object + + ``map`` accepts a ``dict`` or a ``Series``. Values that are not found + in the ``dict`` are converted to ``NaN``, unless the dict has a default + value (e.g. ``defaultdict``): + + >>> s.map({'cat': 'kitten', 'dog': 'puppy'}) + 0 kitten + 1 puppy + 2 NaN + 3 NaN + dtype: object + + It also accepts a function: + + >>> s.map('I am a {}'.format) + 0 I am a cat + 1 I am a dog + 2 I am a nan + 3 I am a rabbit + dtype: object + + To avoid applying the function to missing values (and keep them as + ``NaN``) ``na_action='ignore'`` can be used: + + >>> s.map('I am a {}'.format, na_action='ignore') + 0 I am a cat + 1 I am a dog + 2 NaN + 3 I am a rabbit + dtype: object + """ + new_values = super()._map_values(arg, na_action=na_action) + return self._constructor(new_values, index=self.index).__finalize__(self) + + def _gotitem(self, key, ndim, subset=None): + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : string / list of selections + ndim : 1,2 + Requested ndim of result. + subset : object, default None + Subset to act on. + """ + return self + + _agg_see_also_doc = dedent( + """ + See Also + -------- + Series.apply : Invoke function on a Series. + Series.transform : Transform function producing a Series with like indexes. + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + dtype: int64 + + >>> s.agg('min') + 1 + + >>> s.agg(['min', 'max']) + min 1 + max 4 + dtype: int64 + """ + ) + + @Substitution( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded="\n.. versionadded:: 0.20.0\n", + **_shared_doc_kwargs, + ) + @Appender(generic._shared_docs["aggregate"]) + def aggregate(self, func, axis=0, *args, **kwargs): + # Validate the axis parameter + self._get_axis_number(axis) + result, how = self._aggregate(func, *args, **kwargs) + if result is None: + + # we can be called from an inner function which + # passes this meta-data + kwargs.pop("_axis", None) + kwargs.pop("_level", None) + + # try a regular apply, this evaluates lambdas + # row-by-row; however if the lambda is expected a Series + # expression, e.g.: lambda x: x-x.quantile(0.25) + # this will fail, so we can try a vectorized evaluation + + # we cannot FIRST try the vectorized evaluation, because + # then .agg and .apply would have different semantics if the + # operation is actually defined on the Series, e.g. str + try: + result = self.apply(func, *args, **kwargs) + except (ValueError, AttributeError, TypeError): + result = func(self, *args, **kwargs) + + return result + + agg = aggregate + + @Appender(generic._shared_docs["transform"] % _shared_doc_kwargs) + def transform(self, func, axis=0, *args, **kwargs): + # Validate the axis parameter + self._get_axis_number(axis) + return super().transform(func, *args, **kwargs) + + def apply(self, func, convert_dtype=True, args=(), **kwds): + """ + Invoke function on values of Series. + + Can be ufunc (a NumPy function that applies to the entire Series) + or a Python function that only works on single values. + + Parameters + ---------- + func : function + Python function or NumPy ufunc to apply. + convert_dtype : bool, default True + Try to find better dtype for elementwise function results. If + False, leave as dtype=object. + args : tuple + Positional arguments passed to func after the series value. + **kwds + Additional keyword arguments passed to func. + + Returns + ------- + Series or DataFrame + If func returns a Series object the result will be a DataFrame. + + See Also + -------- + Series.map: For element-wise operations. + Series.agg: Only perform aggregating type operations. + Series.transform: Only perform transforming type operations. + + Examples + -------- + Create a series with typical summer temperatures for each city. + + >>> s = pd.Series([20, 21, 12], + ... index=['London', 'New York', 'Helsinki']) + >>> s + London 20 + New York 21 + Helsinki 12 + dtype: int64 + + Square the values by defining a function and passing it as an + argument to ``apply()``. + + >>> def square(x): + ... return x ** 2 + >>> s.apply(square) + London 400 + New York 441 + Helsinki 144 + dtype: int64 + + Square the values by passing an anonymous function as an + argument to ``apply()``. + + >>> s.apply(lambda x: x ** 2) + London 400 + New York 441 + Helsinki 144 + dtype: int64 + + Define a custom function that needs additional positional + arguments and pass these additional arguments using the + ``args`` keyword. + + >>> def subtract_custom_value(x, custom_value): + ... return x - custom_value + + >>> s.apply(subtract_custom_value, args=(5,)) + London 15 + New York 16 + Helsinki 7 + dtype: int64 + + Define a custom function that takes keyword arguments + and pass these arguments to ``apply``. + + >>> def add_custom_values(x, **kwargs): + ... for month in kwargs: + ... x += kwargs[month] + ... return x + + >>> s.apply(add_custom_values, june=30, july=20, august=25) + London 95 + New York 96 + Helsinki 87 + dtype: int64 + + Use a function from the Numpy library. + + >>> s.apply(np.log) + London 2.995732 + New York 3.044522 + Helsinki 2.484907 + dtype: float64 + """ + if len(self) == 0: + return self._constructor(dtype=self.dtype, index=self.index).__finalize__( + self + ) + + # dispatch to agg + if isinstance(func, (list, dict)): + return self.aggregate(func, *args, **kwds) + + # if we are a string, try to dispatch + if isinstance(func, str): + return self._try_aggregate_string_function(func, *args, **kwds) + + # handle ufuncs and lambdas + if kwds or args and not isinstance(func, np.ufunc): + + def f(x): + return func(x, *args, **kwds) + + else: + f = func + + with np.errstate(all="ignore"): + if isinstance(f, np.ufunc): + return f(self) + + # row-wise access + if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"): + # GH#23179 some EAs do not have `map` + mapped = self._values.map(f) + else: + values = self.astype(object).values + mapped = lib.map_infer(values, f, convert=convert_dtype) + + if len(mapped) and isinstance(mapped[0], Series): + # GH 25959 use pd.array instead of tolist + # so extension arrays can be used + return self._constructor_expanddim(pd.array(mapped), index=self.index) + else: + return self._constructor(mapped, index=self.index).__finalize__(self) + + def _reduce( + self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds + ): + """ + Perform a reduction operation. + + If we have an ndarray as a value, then simply perform the operation, + otherwise delegate to the object. + """ + delegate = self._values + + if axis is not None: + self._get_axis_number(axis) + + if isinstance(delegate, Categorical): + return delegate._reduce(name, skipna=skipna, **kwds) + elif isinstance(delegate, ExtensionArray): + # dispatch to ExtensionArray interface + return delegate._reduce(name, skipna=skipna, **kwds) + elif is_datetime64_dtype(delegate): + # use DatetimeIndex implementation to handle skipna correctly + delegate = DatetimeIndex(delegate) + elif is_timedelta64_dtype(delegate) and hasattr(TimedeltaIndex, name): + # use TimedeltaIndex to handle skipna correctly + # TODO: remove hasattr check after TimedeltaIndex has `std` method + delegate = TimedeltaIndex(delegate) + + # dispatch to numpy arrays + elif isinstance(delegate, np.ndarray): + if numeric_only: + raise NotImplementedError( + f"Series.{name} does not implement numeric_only." + ) + with np.errstate(all="ignore"): + return op(delegate, skipna=skipna, **kwds) + + # TODO(EA) dispatch to Index + # remove once all internals extension types are + # moved to ExtensionArrays + return delegate._reduce( + op=op, + name=name, + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + filter_type=filter_type, + **kwds, + ) + + def _reindex_indexer(self, new_index, indexer, copy): + if indexer is None: + if copy: + return self.copy() + return self + + new_values = algorithms.take_1d( + self._values, indexer, allow_fill=True, fill_value=None + ) + return self._constructor(new_values, index=new_index) + + def _needs_reindex_multi(self, axes, method, level): + """ + Check if we do need a multi reindex; this is for compat with + higher dims. + """ + return False + + @Appender(generic._shared_docs["align"] % _shared_doc_kwargs) + def align( + self, + other, + join="outer", + axis=None, + level=None, + copy=True, + fill_value=None, + method=None, + limit=None, + fill_axis=0, + broadcast_axis=None, + ): + return super().align( + other, + join=join, + axis=axis, + level=level, + copy=copy, + fill_value=fill_value, + method=method, + limit=limit, + fill_axis=fill_axis, + broadcast_axis=broadcast_axis, + ) + + def rename( + self, + index=None, + *, + axis=None, + copy=True, + inplace=False, + level=None, + errors="ignore", + ): + """ + Alter Series index labels or name. + + Function / dict values must be unique (1-to-1). Labels not contained in + a dict / Series will be left as-is. Extra labels listed don't throw an + error. + + Alternatively, change ``Series.name`` with a scalar value. + + See the :ref:`user guide ` for more. + + Parameters + ---------- + axis : {0 or "index"} + Unused. Accepted for compatability with DataFrame method only. + index : scalar, hashable sequence, dict-like or function, optional + Functions or dict-like are transformations to apply to + the index. + Scalar or hashable sequence-like will alter the ``Series.name`` + attribute. + + **kwargs + Additional keyword arguments passed to the function. Only the + "inplace" keyword is used. + + Returns + ------- + Series + Series with index labels or name altered. + + See Also + -------- + DataFrame.rename : Corresponding DataFrame method. + Series.rename_axis : Set the name of the axis. + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s + 0 1 + 1 2 + 2 3 + dtype: int64 + >>> s.rename("my_name") # scalar, changes Series.name + 0 1 + 1 2 + 2 3 + Name: my_name, dtype: int64 + >>> s.rename(lambda x: x ** 2) # function, changes labels + 0 1 + 1 2 + 4 3 + dtype: int64 + >>> s.rename({1: 3, 2: 5}) # mapping, changes labels + 0 1 + 3 2 + 5 3 + dtype: int64 + """ + if callable(index) or is_dict_like(index): + return super().rename( + index, copy=copy, inplace=inplace, level=level, errors=errors + ) + else: + return self._set_name(index, inplace=inplace) + + @Substitution(**_shared_doc_kwargs) + @Appender(generic.NDFrame.reindex.__doc__) + def reindex(self, index=None, **kwargs): + return super().reindex(index=index, **kwargs) + + def drop( + self, + labels=None, + axis=0, + index=None, + columns=None, + level=None, + inplace=False, + errors="raise", + ): + """ + Return Series with specified index labels removed. + + Remove elements of a Series based on specifying the index labels. + When using a multi-index, labels on different levels can be removed + by specifying the level. + + Parameters + ---------- + labels : single label or list-like + Index labels to drop. + axis : 0, default 0 + Redundant for application on Series. + index : single label or list-like + Redundant for application on Series, but 'index' can be used instead + of 'labels'. + + .. versionadded:: 0.21.0 + columns : single label or list-like + No change is made to the Series; use 'index' or 'labels' instead. + + .. versionadded:: 0.21.0 + level : int or level name, optional + For MultiIndex, level for which the labels will be removed. + inplace : bool, default False + If True, do operation inplace and return None. + errors : {'ignore', 'raise'}, default 'raise' + If 'ignore', suppress error and only existing labels are dropped. + + Returns + ------- + Series + Series with specified index labels removed. + + Raises + ------ + KeyError + If none of the labels are found in the index. + + See Also + -------- + Series.reindex : Return only specified index labels of Series. + Series.dropna : Return series without null values. + Series.drop_duplicates : Return Series with duplicate values removed. + DataFrame.drop : Drop specified labels from rows or columns. + + Examples + -------- + >>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C']) + >>> s + A 0 + B 1 + C 2 + dtype: int64 + + Drop labels B en C + + >>> s.drop(labels=['B', 'C']) + A 0 + dtype: int64 + + Drop 2nd level label in MultiIndex Series + + >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'], + ... ['speed', 'weight', 'length']], + ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], + ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) + >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], + ... index=midx) + >>> s + lama speed 45.0 + weight 200.0 + length 1.2 + cow speed 30.0 + weight 250.0 + length 1.5 + falcon speed 320.0 + weight 1.0 + length 0.3 + dtype: float64 + + >>> s.drop(labels='weight', level=1) + lama speed 45.0 + length 1.2 + cow speed 30.0 + length 1.5 + falcon speed 320.0 + length 0.3 + dtype: float64 + """ + return super().drop( + labels=labels, + axis=axis, + index=index, + columns=columns, + level=level, + inplace=inplace, + errors=errors, + ) + + @Substitution(**_shared_doc_kwargs) + @Appender(generic.NDFrame.fillna.__doc__) + def fillna( + self, + value=None, + method=None, + axis=None, + inplace=False, + limit=None, + downcast=None, + ) -> Optional["Series"]: + return super().fillna( + value=value, + method=method, + axis=axis, + inplace=inplace, + limit=limit, + downcast=downcast, + ) + + @Appender(generic._shared_docs["replace"] % _shared_doc_kwargs) + def replace( + self, + to_replace=None, + value=None, + inplace=False, + limit=None, + regex=False, + method="pad", + ): + return super().replace( + to_replace=to_replace, + value=value, + inplace=inplace, + limit=limit, + regex=regex, + method=method, + ) + + @Appender(generic._shared_docs["shift"] % _shared_doc_kwargs) + def shift(self, periods=1, freq=None, axis=0, fill_value=None): + return super().shift( + periods=periods, freq=freq, axis=axis, fill_value=fill_value + ) + + def memory_usage(self, index=True, deep=False): + """ + Return the memory usage of the Series. + + The memory usage can optionally include the contribution of + the index and of elements of `object` dtype. + + Parameters + ---------- + index : bool, default True + Specifies whether to include the memory usage of the Series index. + deep : bool, default False + If True, introspect the data deeply by interrogating + `object` dtypes for system-level memory consumption, and include + it in the returned value. + + Returns + ------- + int + Bytes of memory consumed. + + See Also + -------- + numpy.ndarray.nbytes : Total bytes consumed by the elements of the + array. + DataFrame.memory_usage : Bytes consumed by a DataFrame. + + Examples + -------- + >>> s = pd.Series(range(3)) + >>> s.memory_usage() + 152 + + Not including the index gives the size of the rest of the data, which + is necessarily smaller: + + >>> s.memory_usage(index=False) + 24 + + The memory footprint of `object` values is ignored by default: + + >>> s = pd.Series(["a", "b"]) + >>> s.values + array(['a', 'b'], dtype=object) + >>> s.memory_usage() + 144 + >>> s.memory_usage(deep=True) + 260 + """ + v = super().memory_usage(deep=deep) + if index: + v += self.index.memory_usage(deep=deep) + return v + + def isin(self, values): + """ + Check whether `values` are contained in Series. + + Return a boolean Series showing whether each element in the Series + matches an element in the passed sequence of `values` exactly. + + Parameters + ---------- + values : set or list-like + The sequence of values to test. Passing in a single string will + raise a ``TypeError``. Instead, turn a single string into a + list of one element. + + Returns + ------- + Series + Series of booleans indicating if each element is in values. + + Raises + ------ + TypeError + * If `values` is a string + + See Also + -------- + DataFrame.isin : Equivalent method on DataFrame. + + Examples + -------- + >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', + ... 'hippo'], name='animal') + >>> s.isin(['cow', 'lama']) + 0 True + 1 True + 2 True + 3 False + 4 True + 5 False + Name: animal, dtype: bool + + Passing a single string as ``s.isin('lama')`` will raise an error. Use + a list of one element instead: + + >>> s.isin(['lama']) + 0 True + 1 False + 2 True + 3 False + 4 True + 5 False + Name: animal, dtype: bool + """ + result = algorithms.isin(self, values) + return self._constructor(result, index=self.index).__finalize__(self) + + def between(self, left, right, inclusive=True): + """ + Return boolean Series equivalent to left <= series <= right. + + This function returns a boolean vector containing `True` wherever the + corresponding Series element is between the boundary values `left` and + `right`. NA values are treated as `False`. + + Parameters + ---------- + left : scalar or list-like + Left boundary. + right : scalar or list-like + Right boundary. + inclusive : bool, default True + Include boundaries. + + Returns + ------- + Series + Series representing whether each element is between left and + right (inclusive). + + See Also + -------- + Series.gt : Greater than of series and other. + Series.lt : Less than of series and other. + + Notes + ----- + This function is equivalent to ``(left <= ser) & (ser <= right)`` + + Examples + -------- + >>> s = pd.Series([2, 0, 4, 8, np.nan]) + + Boundary values are included by default: + + >>> s.between(1, 4) + 0 True + 1 False + 2 True + 3 False + 4 False + dtype: bool + + With `inclusive` set to ``False`` boundary values are excluded: + + >>> s.between(1, 4, inclusive=False) + 0 True + 1 False + 2 False + 3 False + 4 False + dtype: bool + + `left` and `right` can be any scalar value: + + >>> s = pd.Series(['Alice', 'Bob', 'Carol', 'Eve']) + >>> s.between('Anna', 'Daniel') + 0 False + 1 True + 2 True + 3 False + dtype: bool + """ + if inclusive: + lmask = self >= left + rmask = self <= right + else: + lmask = self > left + rmask = self < right + + return lmask & rmask + + # ---------------------------------------------------------------------- + # Convert to types that support pd.NA + + def _convert_dtypes( + self: ABCSeries, + infer_objects: bool = True, + convert_string: bool = True, + convert_integer: bool = True, + convert_boolean: bool = True, + ) -> "Series": + input_series = self + if infer_objects: + input_series = input_series.infer_objects() + if is_object_dtype(input_series): + input_series = input_series.copy() + + if convert_string or convert_integer or convert_boolean: + inferred_dtype = convert_dtypes( + input_series._values, convert_string, convert_integer, convert_boolean + ) + try: + result = input_series.astype(inferred_dtype) + except TypeError: + result = input_series.copy() + else: + result = input_series.copy() + return result + + @Appender(generic._shared_docs["isna"] % _shared_doc_kwargs) + def isna(self): + return super().isna() + + @Appender(generic._shared_docs["isna"] % _shared_doc_kwargs) + def isnull(self): + return super().isnull() + + @Appender(generic._shared_docs["notna"] % _shared_doc_kwargs) + def notna(self): + return super().notna() + + @Appender(generic._shared_docs["notna"] % _shared_doc_kwargs) + def notnull(self): + return super().notnull() + + def dropna(self, axis=0, inplace=False, how=None): + """ + Return a new Series with missing values removed. + + See the :ref:`User Guide ` for more on which values are + considered missing, and how to work with missing data. + + Parameters + ---------- + axis : {0 or 'index'}, default 0 + There is only one axis to drop values from. + inplace : bool, default False + If True, do operation inplace and return None. + how : str, optional + Not in use. Kept for compatibility. + + Returns + ------- + Series + Series with NA entries dropped from it. + + See Also + -------- + Series.isna: Indicate missing values. + Series.notna : Indicate existing (non-missing) values. + Series.fillna : Replace missing values. + DataFrame.dropna : Drop rows or columns which contain NA values. + Index.dropna : Drop missing indices. + + Examples + -------- + >>> ser = pd.Series([1., 2., np.nan]) + >>> ser + 0 1.0 + 1 2.0 + 2 NaN + dtype: float64 + + Drop NA values from a Series. + + >>> ser.dropna() + 0 1.0 + 1 2.0 + dtype: float64 + + Keep the Series with valid entries in the same variable. + + >>> ser.dropna(inplace=True) + >>> ser + 0 1.0 + 1 2.0 + dtype: float64 + + Empty strings are not considered NA values. ``None`` is considered an + NA value. + + >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay']) + >>> ser + 0 NaN + 1 2 + 2 NaT + 3 + 4 None + 5 I stay + dtype: object + >>> ser.dropna() + 1 2 + 3 + 5 I stay + dtype: object + """ + inplace = validate_bool_kwarg(inplace, "inplace") + # Validate the axis parameter + self._get_axis_number(axis or 0) + + if self._can_hold_na: + result = remove_na_arraylike(self) + if inplace: + self._update_inplace(result) + else: + return result + else: + if inplace: + # do nothing + pass + else: + return self.copy() + + # ---------------------------------------------------------------------- + # Time series-oriented methods + + def to_timestamp(self, freq=None, how="start", copy=True): + """ + Cast to DatetimeIndex of Timestamps, at *beginning* of period. + + Parameters + ---------- + freq : str, default frequency of PeriodIndex + Desired frequency. + how : {'s', 'e', 'start', 'end'} + Convention for converting period to timestamp; start of period + vs. end. + copy : bool, default True + Whether or not to return a copy. + + Returns + ------- + Series with DatetimeIndex + """ + new_values = self._values + if copy: + new_values = new_values.copy() + + new_index = self.index.to_timestamp(freq=freq, how=how) + return self._constructor(new_values, index=new_index).__finalize__(self) + + def to_period(self, freq=None, copy=True): + """ + Convert Series from DatetimeIndex to PeriodIndex with desired + frequency (inferred from index if not passed). + + Parameters + ---------- + freq : str, default None + Frequency associated with the PeriodIndex. + copy : bool, default True + Whether or not to return a copy. + + Returns + ------- + Series + Series with index converted to PeriodIndex. + """ + new_values = self._values + if copy: + new_values = new_values.copy() + + new_index = self.index.to_period(freq=freq) + return self._constructor(new_values, index=new_index).__finalize__(self) + + # ---------------------------------------------------------------------- + # Accessor Methods + # ---------------------------------------------------------------------- + str = CachedAccessor("str", StringMethods) + dt = CachedAccessor("dt", CombinedDatetimelikeProperties) + cat = CachedAccessor("cat", CategoricalAccessor) + plot = CachedAccessor("plot", pandas.plotting.PlotAccessor) + sparse = CachedAccessor("sparse", SparseAccessor) + + # ---------------------------------------------------------------------- + # Add plotting methods to Series + hist = pandas.plotting.hist_series + + +Series._setup_axes(["index"], docs={"index": "The index (axis labels) of the Series."}) +Series._add_numeric_operations() +Series._add_series_or_dataframe_operations() + +# Add arithmetic! +ops.add_flex_arithmetic_methods(Series) +ops.add_special_arithmetic_methods(Series) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py new file mode 100644 index 00000000..51c154aa --- /dev/null +++ b/pandas/core/sorting.py @@ -0,0 +1,411 @@ +""" miscellaneous sorting / groupby utilities """ +import numpy as np + +from pandas._libs import algos, hashtable, lib +from pandas._libs.hashtable import unique_label_indices + +from pandas.core.dtypes.common import ( + ensure_int64, + ensure_platform_int, + is_categorical_dtype, + is_extension_array_dtype, +) +from pandas.core.dtypes.missing import isna + +import pandas.core.algorithms as algorithms +from pandas.core.construction import extract_array + +_INT64_MAX = np.iinfo(np.int64).max + + +def get_group_index(labels, shape, sort: bool, xnull: bool): + """ + For the particular label_list, gets the offsets into the hypothetical list + representing the totally ordered cartesian product of all possible label + combinations, *as long as* this space fits within int64 bounds; + otherwise, though group indices identify unique combinations of + labels, they cannot be deconstructed. + - If `sort`, rank of returned ids preserve lexical ranks of labels. + i.e. returned id's can be used to do lexical sort on labels; + - If `xnull` nulls (-1 labels) are passed through. + + Parameters + ---------- + labels : sequence of arrays + Integers identifying levels at each location + shape : sequence of ints + Number of unique levels at each location + sort : bool + If the ranks of returned ids should match lexical ranks of labels + xnull : bool + If true nulls are excluded. i.e. -1 values in the labels are + passed through. + + Returns + ------- + An array of type int64 where two elements are equal if their corresponding + labels are equal at all location. + + Notes + ----- + The length of `labels` and `shape` must be identical. + """ + + def _int64_cut_off(shape) -> int: + acc = 1 + for i, mul in enumerate(shape): + acc *= int(mul) + if not acc < _INT64_MAX: + return i + return len(shape) + + def maybe_lift(lab, size): + # promote nan values (assigned -1 label in lab array) + # so that all output values are non-negative + return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) + + labels = map(ensure_int64, labels) + if not xnull: + labels, shape = map(list, zip(*map(maybe_lift, labels, shape))) + + labels = list(labels) + shape = list(shape) + + # Iteratively process all the labels in chunks sized so less + # than _INT64_MAX unique int ids will be required for each chunk + while True: + # how many levels can be done without overflow: + nlev = _int64_cut_off(shape) + + # compute flat ids for the first `nlev` levels + stride = np.prod(shape[1:nlev], dtype="i8") + out = stride * labels[0].astype("i8", subok=False, copy=False) + + for i in range(1, nlev): + if shape[i] == 0: + stride = 0 + else: + stride //= shape[i] + out += labels[i] * stride + + if xnull: # exclude nulls + mask = labels[0] == -1 + for lab in labels[1:nlev]: + mask |= lab == -1 + out[mask] = -1 + + if nlev == len(shape): # all levels done! + break + + # compress what has been done so far in order to avoid overflow + # to retain lexical ranks, obs_ids should be sorted + comp_ids, obs_ids = compress_group_index(out, sort=sort) + + labels = [comp_ids] + labels[nlev:] + shape = [len(obs_ids)] + shape[nlev:] + + return out + + +def get_compressed_ids(labels, sizes): + """ + Group_index is offsets into cartesian product of all possible labels. This + space can be huge, so this function compresses it, by computing offsets + (comp_ids) into the list of unique labels (obs_group_ids). + + Parameters + ---------- + labels : list of label arrays + sizes : list of size of the levels + + Returns + ------- + tuple of (comp_ids, obs_group_ids) + """ + ids = get_group_index(labels, sizes, sort=True, xnull=False) + return compress_group_index(ids, sort=True) + + +def is_int64_overflow_possible(shape) -> bool: + the_prod = 1 + for x in shape: + the_prod *= int(x) + + return the_prod >= _INT64_MAX + + +def decons_group_index(comp_labels, shape): + # reconstruct labels + if is_int64_overflow_possible(shape): + # at some point group indices are factorized, + # and may not be deconstructed here! wrong path! + raise ValueError("cannot deconstruct factorized group indices!") + + label_list = [] + factor = 1 + y = 0 + x = comp_labels + for i in reversed(range(len(shape))): + labels = (x - y) % (factor * shape[i]) // factor + np.putmask(labels, comp_labels < 0, -1) + label_list.append(labels) + y = labels * factor + factor *= shape[i] + return label_list[::-1] + + +def decons_obs_group_ids(comp_ids, obs_ids, shape, labels, xnull: bool): + """ + Reconstruct labels from observed group ids. + + Parameters + ---------- + xnull : bool + If nulls are excluded; i.e. -1 labels are passed through. + """ + if not xnull: + lift = np.fromiter(((a == -1).any() for a in labels), dtype="i8") + shape = np.asarray(shape, dtype="i8") + lift + + if not is_int64_overflow_possible(shape): + # obs ids are deconstructable! take the fast route! + out = decons_group_index(obs_ids, shape) + return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)] + + i = unique_label_indices(comp_ids) + i8copy = lambda a: a.astype("i8", subok=False, copy=True) + return [i8copy(lab[i]) for lab in labels] + + +def indexer_from_factorized(labels, shape, compress: bool = True): + ids = get_group_index(labels, shape, sort=True, xnull=False) + + if not compress: + ngroups = (ids.size and ids.max()) + 1 + else: + ids, obs = compress_group_index(ids, sort=True) + ngroups = len(obs) + + return get_group_index_sorter(ids, ngroups) + + +def lexsort_indexer(keys, orders=None, na_position: str = "last"): + """ + Parameters + ---------- + na_position : {'first', 'last'}, default 'last' + """ + from pandas.core.arrays import Categorical + + labels = [] + shape = [] + if isinstance(orders, bool): + orders = [orders] * len(keys) + elif orders is None: + orders = [True] * len(keys) + + for key, order in zip(keys, orders): + + # we are already a Categorical + if is_categorical_dtype(key): + cat = key + + # create the Categorical + else: + cat = Categorical(key, ordered=True) + + if na_position not in ["last", "first"]: + raise ValueError(f"invalid na_position: {na_position}") + + n = len(cat.categories) + codes = cat.codes.copy() + + mask = cat.codes == -1 + if order: # ascending + if na_position == "last": + codes = np.where(mask, n, codes) + elif na_position == "first": + codes += 1 + else: # not order means descending + if na_position == "last": + codes = np.where(mask, n, n - codes - 1) + elif na_position == "first": + codes = np.where(mask, 0, n - codes) + if mask.any(): + n += 1 + + shape.append(n) + labels.append(codes) + + return indexer_from_factorized(labels, shape) + + +def nargsort( + items, kind: str = "quicksort", ascending: bool = True, na_position: str = "last" +): + """ + Intended to be a drop-in replacement for np.argsort which handles NaNs. + + Adds ascending and na_position parameters. + + (GH #6399, #5231) + + Parameters + ---------- + kind : str, default 'quicksort' + ascending : bool, default True + na_position : {'first', 'last'}, default 'last' + """ + items = extract_array(items) + mask = np.asarray(isna(items)) + + if is_extension_array_dtype(items): + items = items._values_for_argsort() + else: + items = np.asanyarray(items) + + idx = np.arange(len(items)) + non_nans = items[~mask] + non_nan_idx = idx[~mask] + nan_idx = np.nonzero(mask)[0] + if not ascending: + non_nans = non_nans[::-1] + non_nan_idx = non_nan_idx[::-1] + indexer = non_nan_idx[non_nans.argsort(kind=kind)] + if not ascending: + indexer = indexer[::-1] + # Finally, place the NaNs at the end or the beginning according to + # na_position + if na_position == "last": + indexer = np.concatenate([indexer, nan_idx]) + elif na_position == "first": + indexer = np.concatenate([nan_idx, indexer]) + else: + raise ValueError(f"invalid na_position: {na_position}") + return indexer + + +class _KeyMapper: + """ + Map compressed group id -> key tuple. + """ + + def __init__(self, comp_ids, ngroups: int, levels, labels): + self.levels = levels + self.labels = labels + self.comp_ids = comp_ids.astype(np.int64) + + self.k = len(labels) + self.tables = [hashtable.Int64HashTable(ngroups) for _ in range(self.k)] + + self._populate_tables() + + def _populate_tables(self): + for labs, table in zip(self.labels, self.tables): + table.map(self.comp_ids, labs.astype(np.int64)) + + def get_key(self, comp_id): + return tuple( + level[table.get_item(comp_id)] + for table, level in zip(self.tables, self.levels) + ) + + +def get_flattened_iterator(comp_ids, ngroups, levels, labels): + # provide "flattened" iterator for multi-group setting + mapper = _KeyMapper(comp_ids, ngroups, levels, labels) + return [mapper.get_key(i) for i in range(ngroups)] + + +def get_indexer_dict(label_list, keys): + """ + Returns + ------- + dict + Labels mapped to indexers. + """ + shape = [len(x) for x in keys] + + group_index = get_group_index(label_list, shape, sort=True, xnull=True) + ngroups = ( + ((group_index.size and group_index.max()) + 1) + if is_int64_overflow_possible(shape) + else np.prod(shape, dtype="i8") + ) + + sorter = get_group_index_sorter(group_index, ngroups) + + sorted_labels = [lab.take(sorter) for lab in label_list] + group_index = group_index.take(sorter) + + return lib.indices_fast(sorter, group_index, keys, sorted_labels) + + +# ---------------------------------------------------------------------- +# sorting levels...cleverly? + + +def get_group_index_sorter(group_index, ngroups: int): + """ + algos.groupsort_indexer implements `counting sort` and it is at least + O(ngroups), where + ngroups = prod(shape) + shape = map(len, keys) + that is, linear in the number of combinations (cartesian product) of unique + values of groupby keys. This can be huge when doing multi-key groupby. + np.argsort(kind='mergesort') is O(count x log(count)) where count is the + length of the data-frame; + Both algorithms are `stable` sort and that is necessary for correctness of + groupby operations. e.g. consider: + df.groupby(key)[col].transform('first') + """ + count = len(group_index) + alpha = 0.0 # taking complexities literally; there may be + beta = 1.0 # some room for fine-tuning these parameters + do_groupsort = count > 0 and ((alpha + beta * ngroups) < (count * np.log(count))) + if do_groupsort: + sorter, _ = algos.groupsort_indexer(ensure_int64(group_index), ngroups) + return ensure_platform_int(sorter) + else: + return group_index.argsort(kind="mergesort") + + +def compress_group_index(group_index, sort: bool = True): + """ + Group_index is offsets into cartesian product of all possible labels. This + space can be huge, so this function compresses it, by computing offsets + (comp_ids) into the list of unique labels (obs_group_ids). + """ + + size_hint = min(len(group_index), hashtable._SIZE_HINT_LIMIT) + table = hashtable.Int64HashTable(size_hint) + + group_index = ensure_int64(group_index) + + # note, group labels come out ascending (ie, 1,2,3 etc) + comp_ids, obs_group_ids = table.get_labels_groupby(group_index) + + if sort and len(obs_group_ids) > 0: + obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids) + + return comp_ids, obs_group_ids + + +def _reorder_by_uniques(uniques, labels): + # sorter is index where elements ought to go + sorter = uniques.argsort() + + # reverse_indexer is where elements came from + reverse_indexer = np.empty(len(sorter), dtype=np.int64) + reverse_indexer.put(sorter, np.arange(len(sorter))) + + mask = labels < 0 + + # move labels to right locations (ie, unsort ascending labels) + labels = algorithms.take_nd(reverse_indexer, labels, allow_fill=False) + np.putmask(labels, mask, -1) + + # sort observed ids + uniques = algorithms.take_nd(uniques, sorter, allow_fill=False) + + return uniques, labels diff --git a/pandas/core/sparse/__init__.py b/pandas/core/sparse/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/sparse/api.py b/pandas/core/sparse/api.py new file mode 100644 index 00000000..e7bf94cd --- /dev/null +++ b/pandas/core/sparse/api.py @@ -0,0 +1,3 @@ +from pandas.core.arrays.sparse import SparseArray, SparseDtype + +__all__ = ["SparseArray", "SparseDtype"] diff --git a/pandas/core/strings.py b/pandas/core/strings.py new file mode 100644 index 00000000..9ef066d5 --- /dev/null +++ b/pandas/core/strings.py @@ -0,0 +1,3568 @@ +import codecs +from functools import wraps +import re +import textwrap +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Type, Union +import warnings + +import numpy as np + +import pandas._libs.lib as lib +import pandas._libs.missing as libmissing +import pandas._libs.ops as libops +from pandas._typing import ArrayLike, Dtype +from pandas.util._decorators import Appender + +from pandas.core.dtypes.common import ( + ensure_object, + is_bool_dtype, + is_categorical_dtype, + is_extension_array_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_re, + is_scalar, + is_string_dtype, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndexClass, + ABCMultiIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import isna + +from pandas.core.algorithms import take_1d +from pandas.core.base import NoNewAttributesMixin +import pandas.core.common as com +from pandas.core.construction import extract_array + +if TYPE_CHECKING: + from pandas.arrays import StringArray + +_cpython_optimized_encoders = ( + "utf-8", + "utf8", + "latin-1", + "latin1", + "iso-8859-1", + "mbcs", + "ascii", +) +_cpython_optimized_decoders = _cpython_optimized_encoders + ("utf-16", "utf-32") + +_shared_docs: Dict[str, str] = dict() + + +def cat_core(list_of_columns: List, sep: str): + """ + Auxiliary function for :meth:`str.cat` + + Parameters + ---------- + list_of_columns : list of numpy arrays + List of arrays to be concatenated with sep; + these arrays may not contain NaNs! + sep : string + The separator string for concatenating the columns. + + Returns + ------- + nd.array + The concatenation of list_of_columns with sep. + """ + if sep == "": + # no need to interleave sep if it is empty + arr_of_cols = np.asarray(list_of_columns, dtype=object) + return np.sum(arr_of_cols, axis=0) + list_with_sep = [sep] * (2 * len(list_of_columns) - 1) + list_with_sep[::2] = list_of_columns + arr_with_sep = np.asarray(list_with_sep, dtype=object) + return np.sum(arr_with_sep, axis=0) + + +def cat_safe(list_of_columns: List, sep: str): + """ + Auxiliary function for :meth:`str.cat`. + + Same signature as cat_core, but handles TypeErrors in concatenation, which + happen if the arrays in list_of columns have the wrong dtypes or content. + + Parameters + ---------- + list_of_columns : list of numpy arrays + List of arrays to be concatenated with sep; + these arrays may not contain NaNs! + sep : string + The separator string for concatenating the columns. + + Returns + ------- + nd.array + The concatenation of list_of_columns with sep. + """ + try: + result = cat_core(list_of_columns, sep) + except TypeError: + # if there are any non-string values (wrong dtype or hidden behind + # object dtype), np.sum will fail; catch and return with better message + for column in list_of_columns: + dtype = lib.infer_dtype(column, skipna=True) + if dtype not in ["string", "empty"]: + raise TypeError( + "Concatenation requires list-likes containing only " + "strings (or missing values). Offending values found in " + f"column {dtype}" + ) from None + return result + + +def _na_map(f, arr, na_result=None, dtype=object): + if is_extension_array_dtype(arr.dtype): + if na_result is None: + na_result = libmissing.NA + # just StringDtype + arr = extract_array(arr) + return _map_stringarray(f, arr, na_value=na_result, dtype=dtype) + if na_result is None: + na_result = np.nan + return _map_object(f, arr, na_mask=True, na_value=na_result, dtype=dtype) + + +def _map_stringarray( + func: Callable[[str], Any], arr: "StringArray", na_value: Any, dtype: Dtype +) -> ArrayLike: + """ + Map a callable over valid elements of a StringArrray. + + Parameters + ---------- + func : Callable[[str], Any] + Apply to each valid element. + arr : StringArray + na_value : Any + The value to use for missing values. By default, this is + the original value (NA). + dtype : Dtype + The result dtype to use. Specifying this avoids an intermediate + object-dtype allocation. + + Returns + ------- + ArrayLike + An ExtensionArray for integer or string dtypes, otherwise + an ndarray. + + """ + from pandas.arrays import IntegerArray, StringArray, BooleanArray + + mask = isna(arr) + + assert isinstance(arr, StringArray) + arr = np.asarray(arr) + + if is_integer_dtype(dtype) or is_bool_dtype(dtype): + constructor: Union[Type[IntegerArray], Type[BooleanArray]] + if is_integer_dtype(dtype): + constructor = IntegerArray + else: + constructor = BooleanArray + + na_value_is_na = isna(na_value) + if na_value_is_na: + na_value = 1 + result = lib.map_infer_mask( + arr, + func, + mask.view("uint8"), + convert=False, + na_value=na_value, + dtype=np.dtype(dtype), + ) + + if not na_value_is_na: + mask[:] = False + + return constructor(result, mask) + + elif is_string_dtype(dtype) and not is_object_dtype(dtype): + # i.e. StringDtype + result = lib.map_infer_mask( + arr, func, mask.view("uint8"), convert=False, na_value=na_value + ) + return StringArray(result) + else: + # This is when the result type is object. We reach this when + # -> We know the result type is truly object (e.g. .encode returns bytes + # or .findall returns a list). + # -> We don't know the result type. E.g. `.get` can return anything. + return lib.map_infer_mask(arr, func, mask.view("uint8")) + + +def _map_object(f, arr, na_mask=False, na_value=np.nan, dtype=object): + if not len(arr): + return np.ndarray(0, dtype=dtype) + + if isinstance(arr, ABCSeries): + arr = arr.values + if not isinstance(arr, np.ndarray): + arr = np.asarray(arr, dtype=object) + if na_mask: + mask = isna(arr) + convert = not np.all(mask) + try: + result = lib.map_infer_mask(arr, f, mask.view(np.uint8), convert) + except (TypeError, AttributeError) as e: + # Reraise the exception if callable `f` got wrong number of args. + # The user may want to be warned by this, instead of getting NaN + p_err = ( + r"((takes)|(missing)) (?(2)from \d+ to )?\d+ " + r"(?(3)required )positional arguments?" + ) + + if len(e.args) >= 1 and re.search(p_err, e.args[0]): + # FIXME: this should be totally avoidable + raise e + + def g(x): + try: + return f(x) + except (TypeError, AttributeError): + return na_value + + return _map_object(g, arr, dtype=dtype) + if na_value is not np.nan: + np.putmask(result, mask, na_value) + if result.dtype == object: + result = lib.maybe_convert_objects(result) + return result + else: + return lib.map_infer(arr, f) + + +def str_count(arr, pat, flags=0): + """ + Count occurrences of pattern in each string of the Series/Index. + + This function is used to count the number of times a particular regex + pattern is repeated in each of the string elements of the + :class:`~pandas.Series`. + + Parameters + ---------- + pat : str + Valid regular expression. + flags : int, default 0, meaning no flags + Flags for the `re` module. For a complete list, `see here + `_. + **kwargs + For compatibility with other string methods. Not used. + + Returns + ------- + Series or Index + Same type as the calling object containing the integer counts. + + See Also + -------- + re : Standard library module for regular expressions. + str.count : Standard library version, without regular expression support. + + Notes + ----- + Some characters need to be escaped when passing in `pat`. + eg. ``'$'`` has a special meaning in regex and must be escaped when + finding this literal character. + + Examples + -------- + >>> s = pd.Series(['A', 'B', 'Aaba', 'Baca', np.nan, 'CABA', 'cat']) + >>> s.str.count('a') + 0 0.0 + 1 0.0 + 2 2.0 + 3 2.0 + 4 NaN + 5 0.0 + 6 1.0 + dtype: float64 + + Escape ``'$'`` to find the literal dollar sign. + + >>> s = pd.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat']) + >>> s.str.count('\\$') + 0 1 + 1 0 + 2 1 + 3 2 + 4 2 + 5 0 + dtype: int64 + + This is also available on Index + + >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a') + Int64Index([0, 0, 2, 1], dtype='int64') + """ + regex = re.compile(pat, flags=flags) + f = lambda x: len(regex.findall(x)) + return _na_map(f, arr, dtype="int64") + + +def str_contains(arr, pat, case=True, flags=0, na=np.nan, regex=True): + """ + Test if pattern or regex is contained within a string of a Series or Index. + + Return boolean Series or Index based on whether a given pattern or regex is + contained within a string of a Series or Index. + + Parameters + ---------- + pat : str + Character sequence or regular expression. + case : bool, default True + If True, case sensitive. + flags : int, default 0 (no flags) + Flags to pass through to the re module, e.g. re.IGNORECASE. + na : default NaN + Fill value for missing values. + regex : bool, default True + If True, assumes the pat is a regular expression. + + If False, treats the pat as a literal string. + + Returns + ------- + Series or Index of boolean values + A Series or Index of boolean values indicating whether the + given pattern is contained within the string of each element + of the Series or Index. + + See Also + -------- + match : Analogous, but stricter, relying on re.match instead of re.search. + Series.str.startswith : Test if the start of each string element matches a + pattern. + Series.str.endswith : Same as startswith, but tests the end of string. + + Examples + -------- + + Returning a Series of booleans using only a literal pattern. + + >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.NaN]) + >>> s1.str.contains('og', regex=False) + 0 False + 1 True + 2 False + 3 False + 4 NaN + dtype: object + + Returning an Index of booleans using only a literal pattern. + + >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.NaN]) + >>> ind.str.contains('23', regex=False) + Index([False, False, False, True, nan], dtype='object') + + Specifying case sensitivity using `case`. + + >>> s1.str.contains('oG', case=True, regex=True) + 0 False + 1 False + 2 False + 3 False + 4 NaN + dtype: object + + Specifying `na` to be `False` instead of `NaN` replaces NaN values + with `False`. If Series or Index does not contain NaN values + the resultant dtype will be `bool`, otherwise, an `object` dtype. + + >>> s1.str.contains('og', na=False, regex=True) + 0 False + 1 True + 2 False + 3 False + 4 False + dtype: bool + + Returning 'house' or 'dog' when either expression occurs in a string. + + >>> s1.str.contains('house|dog', regex=True) + 0 False + 1 True + 2 True + 3 False + 4 NaN + dtype: object + + Ignoring case sensitivity using `flags` with regex. + + >>> import re + >>> s1.str.contains('PARROT', flags=re.IGNORECASE, regex=True) + 0 False + 1 False + 2 True + 3 False + 4 NaN + dtype: object + + Returning any digit using regular expression. + + >>> s1.str.contains('\\d', regex=True) + 0 False + 1 False + 2 False + 3 True + 4 NaN + dtype: object + + Ensure `pat` is a not a literal pattern when `regex` is set to True. + Note in the following example one might expect only `s2[1]` and `s2[3]` to + return `True`. However, '.0' as a regex matches any character + followed by a 0. + + >>> s2 = pd.Series(['40', '40.0', '41', '41.0', '35']) + >>> s2.str.contains('.0', regex=True) + 0 True + 1 True + 2 False + 3 True + 4 False + dtype: bool + """ + if regex: + if not case: + flags |= re.IGNORECASE + + regex = re.compile(pat, flags=flags) + + if regex.groups > 0: + warnings.warn( + "This pattern has match groups. To actually get the " + "groups, use str.extract.", + UserWarning, + stacklevel=3, + ) + + f = lambda x: bool(regex.search(x)) + else: + if case: + f = lambda x: pat in x + else: + upper_pat = pat.upper() + f = lambda x: upper_pat in x + uppered = _na_map(lambda x: x.upper(), arr) + return _na_map(f, uppered, na, dtype=bool) + return _na_map(f, arr, na, dtype=bool) + + +def str_startswith(arr, pat, na=np.nan): + """ + Test if the start of each string element matches a pattern. + + Equivalent to :meth:`str.startswith`. + + Parameters + ---------- + pat : str + Character sequence. Regular expressions are not accepted. + na : object, default NaN + Object shown if element tested is not a string. + + Returns + ------- + Series or Index of bool + A Series of booleans indicating whether the given pattern matches + the start of each string element. + + See Also + -------- + str.startswith : Python standard library string method. + Series.str.endswith : Same as startswith, but tests the end of string. + Series.str.contains : Tests if string element contains a pattern. + + Examples + -------- + >>> s = pd.Series(['bat', 'Bear', 'cat', np.nan]) + >>> s + 0 bat + 1 Bear + 2 cat + 3 NaN + dtype: object + + >>> s.str.startswith('b') + 0 True + 1 False + 2 False + 3 NaN + dtype: object + + Specifying `na` to be `False` instead of `NaN`. + + >>> s.str.startswith('b', na=False) + 0 True + 1 False + 2 False + 3 False + dtype: bool + """ + f = lambda x: x.startswith(pat) + return _na_map(f, arr, na, dtype=bool) + + +def str_endswith(arr, pat, na=np.nan): + """ + Test if the end of each string element matches a pattern. + + Equivalent to :meth:`str.endswith`. + + Parameters + ---------- + pat : str + Character sequence. Regular expressions are not accepted. + na : object, default NaN + Object shown if element tested is not a string. + + Returns + ------- + Series or Index of bool + A Series of booleans indicating whether the given pattern matches + the end of each string element. + + See Also + -------- + str.endswith : Python standard library string method. + Series.str.startswith : Same as endswith, but tests the start of string. + Series.str.contains : Tests if string element contains a pattern. + + Examples + -------- + >>> s = pd.Series(['bat', 'bear', 'caT', np.nan]) + >>> s + 0 bat + 1 bear + 2 caT + 3 NaN + dtype: object + + >>> s.str.endswith('t') + 0 True + 1 False + 2 False + 3 NaN + dtype: object + + Specifying `na` to be `False` instead of `NaN`. + + >>> s.str.endswith('t', na=False) + 0 True + 1 False + 2 False + 3 False + dtype: bool + """ + f = lambda x: x.endswith(pat) + return _na_map(f, arr, na, dtype=bool) + + +def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): + r""" + Replace occurrences of pattern/regex in the Series/Index with + some other string. Equivalent to :meth:`str.replace` or + :func:`re.sub`. + + Parameters + ---------- + pat : str or compiled regex + String can be a character sequence or regular expression. + repl : str or callable + Replacement string or a callable. The callable is passed the regex + match object and must return a replacement string to be used. + See :func:`re.sub`. + n : int, default -1 (all) + Number of replacements to make from start. + case : bool, default None + Determines if replace is case sensitive: + + - If True, case sensitive (the default if `pat` is a string) + - Set to False for case insensitive + - Cannot be set if `pat` is a compiled regex. + + flags : int, default 0 (no flags) + Regex module flags, e.g. re.IGNORECASE. Cannot be set if `pat` is a compiled + regex. + regex : bool, default True + Determines if assumes the passed-in pattern is a regular expression: + + - If True, assumes the passed-in pattern is a regular expression. + - If False, treats the pattern as a literal string + - Cannot be set to False if `pat` is a compiled regex or `repl` is + a callable. + + .. versionadded:: 0.23.0 + + Returns + ------- + Series or Index of object + A copy of the object with all matching occurrences of `pat` replaced by + `repl`. + + Raises + ------ + ValueError + * if `regex` is False and `repl` is a callable or `pat` is a compiled + regex + * if `pat` is a compiled regex and `case` or `flags` is set + + Notes + ----- + When `pat` is a compiled regex, all flags should be included in the + compiled regex. Use of `case`, `flags`, or `regex=False` with a compiled + regex will raise an error. + + Examples + -------- + When `pat` is a string and `regex` is True (the default), the given `pat` + is compiled as a regex. When `repl` is a string, it replaces matching + regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are + left as is: + + >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True) + 0 bao + 1 baz + 2 NaN + dtype: object + + When `pat` is a string and `regex` is False, every `pat` is replaced with + `repl` as with :meth:`str.replace`: + + >>> pd.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False) + 0 bao + 1 fuz + 2 NaN + dtype: object + + When `repl` is a callable, it is called on every `pat` using + :func:`re.sub`. The callable should expect one positional argument + (a regex object) and return a string. + + To get the idea: + + >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr) + 0 <_sre.SRE_Match object; span=(0, 1), match='f'>oo + 1 <_sre.SRE_Match object; span=(0, 1), match='f'>uz + 2 NaN + dtype: object + + Reverse every lowercase alphabetic word: + + >>> repl = lambda m: m.group(0)[::-1] + >>> pd.Series(['foo 123', 'bar baz', np.nan]).str.replace(r'[a-z]+', repl) + 0 oof 123 + 1 rab zab + 2 NaN + dtype: object + + Using regex groups (extract second group and swap case): + + >>> pat = r"(?P\w+) (?P\w+) (?P\w+)" + >>> repl = lambda m: m.group('two').swapcase() + >>> pd.Series(['One Two Three', 'Foo Bar Baz']).str.replace(pat, repl) + 0 tWO + 1 bAR + dtype: object + + Using a compiled regex with flags + + >>> import re + >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE) + >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar') + 0 foo + 1 bar + 2 NaN + dtype: object + """ + + # Check whether repl is valid (GH 13438, GH 15055) + if not (isinstance(repl, str) or callable(repl)): + raise TypeError("repl must be a string or callable") + + is_compiled_re = is_re(pat) + if regex: + if is_compiled_re: + if (case is not None) or (flags != 0): + raise ValueError( + "case and flags cannot be set when pat is a compiled regex" + ) + else: + # not a compiled regex + # set default case + if case is None: + case = True + + # add case flag, if provided + if case is False: + flags |= re.IGNORECASE + if is_compiled_re or len(pat) > 1 or flags or callable(repl): + n = n if n >= 0 else 0 + compiled = re.compile(pat, flags=flags) + f = lambda x: compiled.sub(repl=repl, string=x, count=n) + else: + f = lambda x: x.replace(pat, repl, n) + else: + if is_compiled_re: + raise ValueError( + "Cannot use a compiled regex as replacement pattern with regex=False" + ) + if callable(repl): + raise ValueError("Cannot use a callable replacement when regex=False") + f = lambda x: x.replace(pat, repl, n) + + return _na_map(f, arr, dtype=str) + + +def str_repeat(arr, repeats): + """ + Duplicate each string in the Series or Index. + + Parameters + ---------- + repeats : int or sequence of int + Same value for all (int) or different value per (sequence). + + Returns + ------- + Series or Index of object + Series or Index of repeated string objects specified by + input parameter repeats. + + Examples + -------- + >>> s = pd.Series(['a', 'b', 'c']) + >>> s + 0 a + 1 b + 2 c + dtype: object + + Single int repeats string in Series + + >>> s.str.repeat(repeats=2) + 0 aa + 1 bb + 2 cc + dtype: object + + Sequence of int repeats corresponding string in Series + + >>> s.str.repeat(repeats=[1, 2, 3]) + 0 a + 1 bb + 2 ccc + dtype: object + """ + if is_scalar(repeats): + + def scalar_rep(x): + try: + return bytes.__mul__(x, repeats) + except TypeError: + return str.__mul__(x, repeats) + + return _na_map(scalar_rep, arr, dtype=str) + else: + + def rep(x, r): + if x is libmissing.NA: + return x + try: + return bytes.__mul__(x, r) + except TypeError: + return str.__mul__(x, r) + + repeats = np.asarray(repeats, dtype=object) + result = libops.vec_binop(com.values_from_object(arr), repeats, rep) + return result + + +def str_match(arr, pat, case=True, flags=0, na=np.nan): + """ + Determine if each string matches a regular expression. + + Parameters + ---------- + pat : str + Character sequence or regular expression. + case : bool, default True + If True, case sensitive. + flags : int, default 0 (no flags) + Regex module flags, e.g. re.IGNORECASE. + na : default NaN + Fill value for missing values. + + Returns + ------- + Series/array of boolean values + + See Also + -------- + contains : Analogous, but less strict, relying on re.search instead of + re.match. + extract : Extract matched groups. + """ + if not case: + flags |= re.IGNORECASE + + regex = re.compile(pat, flags=flags) + + dtype = bool + f = lambda x: bool(regex.match(x)) + + return _na_map(f, arr, na, dtype=dtype) + + +def _get_single_group_name(rx): + try: + return list(rx.groupindex.keys()).pop() + except IndexError: + return None + + +def _groups_or_na_fun(regex): + """Used in both extract_noexpand and extract_frame""" + if regex.groups == 0: + raise ValueError("pattern contains no capture groups") + empty_row = [np.nan] * regex.groups + + def f(x): + if not isinstance(x, str): + return empty_row + m = regex.search(x) + if m: + return [np.nan if item is None else item for item in m.groups()] + else: + return empty_row + + return f + + +def _result_dtype(arr): + # workaround #27953 + # ideally we just pass `dtype=arr.dtype` unconditionally, but this fails + # when the list of values is empty. + if arr.dtype.name == "string": + return "string" + else: + return object + + +def _str_extract_noexpand(arr, pat, flags=0): + """ + Find groups in each string in the Series using passed regular + expression. This function is called from + str_extract(expand=False), and can return Series, DataFrame, or + Index. + + """ + from pandas import DataFrame + + regex = re.compile(pat, flags=flags) + groups_or_na = _groups_or_na_fun(regex) + + if regex.groups == 1: + result = np.array([groups_or_na(val)[0] for val in arr], dtype=object) + name = _get_single_group_name(regex) + else: + if isinstance(arr, ABCIndexClass): + raise ValueError("only one regex group is supported with Index") + name = None + names = dict(zip(regex.groupindex.values(), regex.groupindex.keys())) + columns = [names.get(1 + i, i) for i in range(regex.groups)] + if arr.empty: + result = DataFrame(columns=columns, dtype=object) + else: + dtype = _result_dtype(arr) + result = DataFrame( + [groups_or_na(val) for val in arr], + columns=columns, + index=arr.index, + dtype=dtype, + ) + return result, name + + +def _str_extract_frame(arr, pat, flags=0): + """ + For each subject string in the Series, extract groups from the + first match of regular expression pat. This function is called from + str_extract(expand=True), and always returns a DataFrame. + + """ + from pandas import DataFrame + + regex = re.compile(pat, flags=flags) + groups_or_na = _groups_or_na_fun(regex) + names = dict(zip(regex.groupindex.values(), regex.groupindex.keys())) + columns = [names.get(1 + i, i) for i in range(regex.groups)] + + if len(arr) == 0: + return DataFrame(columns=columns, dtype=object) + try: + result_index = arr.index + except AttributeError: + result_index = None + dtype = _result_dtype(arr) + return DataFrame( + [groups_or_na(val) for val in arr], + columns=columns, + index=result_index, + dtype=dtype, + ) + + +def str_extract(arr, pat, flags=0, expand=True): + r""" + Extract capture groups in the regex `pat` as columns in a DataFrame. + + For each subject string in the Series, extract groups from the + first match of regular expression `pat`. + + Parameters + ---------- + pat : str + Regular expression pattern with capturing groups. + flags : int, default 0 (no flags) + Flags from the ``re`` module, e.g. ``re.IGNORECASE``, that + modify regular expression matching for things like case, + spaces, etc. For more details, see :mod:`re`. + expand : bool, default True + If True, return DataFrame with one column per capture group. + If False, return a Series/Index if there is one capture group + or DataFrame if there are multiple capture groups. + + Returns + ------- + DataFrame or Series or Index + A DataFrame with one row for each subject string, and one + column for each group. Any capture group names in regular + expression pat will be used for column names; otherwise + capture group numbers will be used. The dtype of each result + column is always object, even when no match is found. If + ``expand=False`` and pat has only one capture group, then + return a Series (if subject is a Series) or Index (if subject + is an Index). + + See Also + -------- + extractall : Returns all matches (not just the first match). + + Examples + -------- + A pattern with two groups will return a DataFrame with two columns. + Non-matches will be NaN. + + >>> s = pd.Series(['a1', 'b2', 'c3']) + >>> s.str.extract(r'([ab])(\d)') + 0 1 + 0 a 1 + 1 b 2 + 2 NaN NaN + + A pattern may contain optional groups. + + >>> s.str.extract(r'([ab])?(\d)') + 0 1 + 0 a 1 + 1 b 2 + 2 NaN 3 + + Named groups will become column names in the result. + + >>> s.str.extract(r'(?P[ab])(?P\d)') + letter digit + 0 a 1 + 1 b 2 + 2 NaN NaN + + A pattern with one group will return a DataFrame with one column + if expand=True. + + >>> s.str.extract(r'[ab](\d)', expand=True) + 0 + 0 1 + 1 2 + 2 NaN + + A pattern with one group will return a Series if expand=False. + + >>> s.str.extract(r'[ab](\d)', expand=False) + 0 1 + 1 2 + 2 NaN + dtype: object + """ + if not isinstance(expand, bool): + raise ValueError("expand must be True or False") + if expand: + return _str_extract_frame(arr._orig, pat, flags=flags) + else: + result, name = _str_extract_noexpand(arr._parent, pat, flags=flags) + return arr._wrap_result(result, name=name, expand=expand) + + +def str_extractall(arr, pat, flags=0): + r""" + For each subject string in the Series, extract groups from all + matches of regular expression pat. When each subject string in the + Series has exactly one match, extractall(pat).xs(0, level='match') + is the same as extract(pat). + + Parameters + ---------- + pat : str + Regular expression pattern with capturing groups. + flags : int, default 0 (no flags) + A ``re`` module flag, for example ``re.IGNORECASE``. These allow + to modify regular expression matching for things like case, spaces, + etc. Multiple flags can be combined with the bitwise OR operator, + for example ``re.IGNORECASE | re.MULTILINE``. + + Returns + ------- + DataFrame + A ``DataFrame`` with one row for each match, and one column for each + group. Its rows have a ``MultiIndex`` with first levels that come from + the subject ``Series``. The last level is named 'match' and indexes the + matches in each item of the ``Series``. Any capture group names in + regular expression pat will be used for column names; otherwise capture + group numbers will be used. + + See Also + -------- + extract : Returns first match only (not all matches). + + Examples + -------- + A pattern with one group will return a DataFrame with one column. + Indices with no matches will not appear in the result. + + >>> s = pd.Series(["a1a2", "b1", "c1"], index=["A", "B", "C"]) + >>> s.str.extractall(r"[ab](\d)") + 0 + match + A 0 1 + 1 2 + B 0 1 + + Capture group names are used for column names of the result. + + >>> s.str.extractall(r"[ab](?P\d)") + digit + match + A 0 1 + 1 2 + B 0 1 + + A pattern with two groups will return a DataFrame with two columns. + + >>> s.str.extractall(r"(?P[ab])(?P\d)") + letter digit + match + A 0 a 1 + 1 a 2 + B 0 b 1 + + Optional groups that do not match are NaN in the result. + + >>> s.str.extractall(r"(?P[ab])?(?P\d)") + letter digit + match + A 0 a 1 + 1 a 2 + B 0 b 1 + C 0 NaN 1 + """ + + regex = re.compile(pat, flags=flags) + # the regex must contain capture groups. + if regex.groups == 0: + raise ValueError("pattern contains no capture groups") + + if isinstance(arr, ABCIndexClass): + arr = arr.to_series().reset_index(drop=True) + + names = dict(zip(regex.groupindex.values(), regex.groupindex.keys())) + columns = [names.get(1 + i, i) for i in range(regex.groups)] + match_list = [] + index_list = [] + is_mi = arr.index.nlevels > 1 + + for subject_key, subject in arr.items(): + if isinstance(subject, str): + + if not is_mi: + subject_key = (subject_key,) + + for match_i, match_tuple in enumerate(regex.findall(subject)): + if isinstance(match_tuple, str): + match_tuple = (match_tuple,) + na_tuple = [np.NaN if group == "" else group for group in match_tuple] + match_list.append(na_tuple) + result_key = tuple(subject_key + (match_i,)) + index_list.append(result_key) + + from pandas import MultiIndex + + index = MultiIndex.from_tuples(index_list, names=arr.index.names + ["match"]) + dtype = _result_dtype(arr) + + result = arr._constructor_expanddim( + match_list, index=index, columns=columns, dtype=dtype + ) + return result + + +def str_get_dummies(arr, sep="|"): + """ + Split each string in the Series by sep and return a DataFrame + of dummy/indicator variables. + + Parameters + ---------- + sep : str, default "|" + String to split on. + + Returns + ------- + DataFrame + Dummy variables corresponding to values of the Series. + + See Also + -------- + get_dummies : Convert categorical variable into dummy/indicator + variables. + + Examples + -------- + >>> pd.Series(['a|b', 'a', 'a|c']).str.get_dummies() + a b c + 0 1 1 0 + 1 1 0 0 + 2 1 0 1 + + >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies() + a b c + 0 1 1 0 + 1 0 0 0 + 2 1 0 1 + """ + arr = arr.fillna("") + try: + arr = sep + arr + sep + except TypeError: + arr = sep + arr.astype(str) + sep + + tags = set() + for ts in arr.str.split(sep): + tags.update(ts) + tags = sorted(tags - {""}) + + dummies = np.empty((len(arr), len(tags)), dtype=np.int64) + + for i, t in enumerate(tags): + pat = sep + t + sep + dummies[:, i] = lib.map_infer(arr.to_numpy(), lambda x: pat in x) + return dummies, tags + + +def str_join(arr, sep): + """ + Join lists contained as elements in the Series/Index with passed delimiter. + + If the elements of a Series are lists themselves, join the content of these + lists using the delimiter passed to the function. + This function is an equivalent to :meth:`str.join`. + + Parameters + ---------- + sep : str + Delimiter to use between list entries. + + Returns + ------- + Series/Index: object + The list entries concatenated by intervening occurrences of the + delimiter. + + Raises + ------ + AttributeError + If the supplied Series contains neither strings nor lists. + + See Also + -------- + str.join : Standard library version of this method. + Series.str.split : Split strings around given separator/delimiter. + + Notes + ----- + If any of the list items is not a string object, the result of the join + will be `NaN`. + + Examples + -------- + Example with a list that contains non-string elements. + + >>> s = pd.Series([['lion', 'elephant', 'zebra'], + ... [1.1, 2.2, 3.3], + ... ['cat', np.nan, 'dog'], + ... ['cow', 4.5, 'goat'], + ... ['duck', ['swan', 'fish'], 'guppy']]) + >>> s + 0 [lion, elephant, zebra] + 1 [1.1, 2.2, 3.3] + 2 [cat, nan, dog] + 3 [cow, 4.5, goat] + 4 [duck, [swan, fish], guppy] + dtype: object + + Join all lists using a '-'. The lists containing object(s) of types other + than str will produce a NaN. + + >>> s.str.join('-') + 0 lion-elephant-zebra + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: object + """ + return _na_map(sep.join, arr, dtype=str) + + +def str_findall(arr, pat, flags=0): + """ + Find all occurrences of pattern or regular expression in the Series/Index. + + Equivalent to applying :func:`re.findall` to all the elements in the + Series/Index. + + Parameters + ---------- + pat : str + Pattern or regular expression. + flags : int, default 0 + Flags from ``re`` module, e.g. `re.IGNORECASE` (default is 0, which + means no flags). + + Returns + ------- + Series/Index of lists of strings + All non-overlapping matches of pattern or regular expression in each + string of this Series/Index. + + See Also + -------- + count : Count occurrences of pattern or regular expression in each string + of the Series/Index. + extractall : For each string in the Series, extract groups from all matches + of regular expression and return a DataFrame with one row for each + match and one column for each group. + re.findall : The equivalent ``re`` function to all non-overlapping matches + of pattern or regular expression in string, as a list of strings. + + Examples + -------- + + >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit']) + + The search for the pattern 'Monkey' returns one match: + + >>> s.str.findall('Monkey') + 0 [] + 1 [Monkey] + 2 [] + dtype: object + + On the other hand, the search for the pattern 'MONKEY' doesn't return any + match: + + >>> s.str.findall('MONKEY') + 0 [] + 1 [] + 2 [] + dtype: object + + Flags can be added to the pattern or regular expression. For instance, + to find the pattern 'MONKEY' ignoring the case: + + >>> import re + >>> s.str.findall('MONKEY', flags=re.IGNORECASE) + 0 [] + 1 [Monkey] + 2 [] + dtype: object + + When the pattern matches more than one string in the Series, all matches + are returned: + + >>> s.str.findall('on') + 0 [on] + 1 [on] + 2 [] + dtype: object + + Regular expressions are supported too. For instance, the search for all the + strings ending with the word 'on' is shown next: + + >>> s.str.findall('on$') + 0 [on] + 1 [] + 2 [] + dtype: object + + If the pattern is found more than once in the same string, then a list of + multiple strings is returned: + + >>> s.str.findall('b') + 0 [] + 1 [] + 2 [b, b] + dtype: object + """ + regex = re.compile(pat, flags=flags) + return _na_map(regex.findall, arr) + + +def str_find(arr, sub, start=0, end=None, side="left"): + """ + Return indexes in each strings in the Series/Index where the + substring is fully contained between [start:end]. Return -1 on failure. + + Parameters + ---------- + sub : str + Substring being searched. + start : int + Left edge index. + end : int + Right edge index. + side : {'left', 'right'}, default 'left' + Specifies a starting side, equivalent to ``find`` or ``rfind``. + + Returns + ------- + Series or Index + Indexes where substring is found. + """ + + if not isinstance(sub, str): + msg = f"expected a string object, not {type(sub).__name__}" + raise TypeError(msg) + + if side == "left": + method = "find" + elif side == "right": + method = "rfind" + else: # pragma: no cover + raise ValueError("Invalid side") + + if end is None: + f = lambda x: getattr(x, method)(sub, start) + else: + f = lambda x: getattr(x, method)(sub, start, end) + + return _na_map(f, arr, dtype="int64") + + +def str_index(arr, sub, start=0, end=None, side="left"): + if not isinstance(sub, str): + msg = f"expected a string object, not {type(sub).__name__}" + raise TypeError(msg) + + if side == "left": + method = "index" + elif side == "right": + method = "rindex" + else: # pragma: no cover + raise ValueError("Invalid side") + + if end is None: + f = lambda x: getattr(x, method)(sub, start) + else: + f = lambda x: getattr(x, method)(sub, start, end) + + return _na_map(f, arr, dtype="int64") + + +def str_pad(arr, width, side="left", fillchar=" "): + """ + Pad strings in the Series/Index up to width. + + Parameters + ---------- + width : int + Minimum width of resulting string; additional characters will be filled + with character defined in `fillchar`. + side : {'left', 'right', 'both'}, default 'left' + Side from which to fill resulting string. + fillchar : str, default ' ' + Additional character for filling, default is whitespace. + + Returns + ------- + Series or Index of object + Returns Series or Index with minimum number of char in object. + + See Also + -------- + Series.str.rjust : Fills the left side of strings with an arbitrary + character. Equivalent to ``Series.str.pad(side='left')``. + Series.str.ljust : Fills the right side of strings with an arbitrary + character. Equivalent to ``Series.str.pad(side='right')``. + Series.str.center : Fills boths sides of strings with an arbitrary + character. Equivalent to ``Series.str.pad(side='both')``. + Series.str.zfill : Pad strings in the Series/Index by prepending '0' + character. Equivalent to ``Series.str.pad(side='left', fillchar='0')``. + + Examples + -------- + >>> s = pd.Series(["caribou", "tiger"]) + >>> s + 0 caribou + 1 tiger + dtype: object + + >>> s.str.pad(width=10) + 0 caribou + 1 tiger + dtype: object + + >>> s.str.pad(width=10, side='right', fillchar='-') + 0 caribou--- + 1 tiger----- + dtype: object + + >>> s.str.pad(width=10, side='both', fillchar='-') + 0 -caribou-- + 1 --tiger--- + dtype: object + """ + if not isinstance(fillchar, str): + msg = f"fillchar must be a character, not {type(fillchar).__name__}" + raise TypeError(msg) + + if len(fillchar) != 1: + raise TypeError("fillchar must be a character, not str") + + if not is_integer(width): + msg = f"width must be of integer type, not {type(width).__name__}" + raise TypeError(msg) + + if side == "left": + f = lambda x: x.rjust(width, fillchar) + elif side == "right": + f = lambda x: x.ljust(width, fillchar) + elif side == "both": + f = lambda x: x.center(width, fillchar) + else: # pragma: no cover + raise ValueError("Invalid side") + + return _na_map(f, arr, dtype=str) + + +def str_split(arr, pat=None, n=None): + + if pat is None: + if n is None or n == 0: + n = -1 + f = lambda x: x.split(pat, n) + else: + if len(pat) == 1: + if n is None or n == 0: + n = -1 + f = lambda x: x.split(pat, n) + else: + if n is None or n == -1: + n = 0 + regex = re.compile(pat) + f = lambda x: regex.split(x, maxsplit=n) + res = _na_map(f, arr) + return res + + +def str_rsplit(arr, pat=None, n=None): + + if n is None or n == 0: + n = -1 + f = lambda x: x.rsplit(pat, n) + res = _na_map(f, arr) + return res + + +def str_slice(arr, start=None, stop=None, step=None): + """ + Slice substrings from each element in the Series or Index. + + Parameters + ---------- + start : int, optional + Start position for slice operation. + stop : int, optional + Stop position for slice operation. + step : int, optional + Step size for slice operation. + + Returns + ------- + Series or Index of object + Series or Index from sliced substring from original string object. + + See Also + -------- + Series.str.slice_replace : Replace a slice with a string. + Series.str.get : Return element at position. + Equivalent to `Series.str.slice(start=i, stop=i+1)` with `i` + being the position. + + Examples + -------- + >>> s = pd.Series(["koala", "fox", "chameleon"]) + >>> s + 0 koala + 1 fox + 2 chameleon + dtype: object + + >>> s.str.slice(start=1) + 0 oala + 1 ox + 2 hameleon + dtype: object + + >>> s.str.slice(start=-1) + 0 a + 1 x + 2 n + dtype: object + + >>> s.str.slice(stop=2) + 0 ko + 1 fo + 2 ch + dtype: object + + >>> s.str.slice(step=2) + 0 kaa + 1 fx + 2 caeen + dtype: object + + >>> s.str.slice(start=0, stop=5, step=3) + 0 kl + 1 f + 2 cm + dtype: object + + Equivalent behaviour to: + + >>> s.str[0:5:3] + 0 kl + 1 f + 2 cm + dtype: object + """ + obj = slice(start, stop, step) + f = lambda x: x[obj] + return _na_map(f, arr, dtype=str) + + +def str_slice_replace(arr, start=None, stop=None, repl=None): + """ + Replace a positional slice of a string with another value. + + Parameters + ---------- + start : int, optional + Left index position to use for the slice. If not specified (None), + the slice is unbounded on the left, i.e. slice from the start + of the string. + stop : int, optional + Right index position to use for the slice. If not specified (None), + the slice is unbounded on the right, i.e. slice until the + end of the string. + repl : str, optional + String for replacement. If not specified (None), the sliced region + is replaced with an empty string. + + Returns + ------- + Series or Index + Same type as the original object. + + See Also + -------- + Series.str.slice : Just slicing without replacement. + + Examples + -------- + >>> s = pd.Series(['a', 'ab', 'abc', 'abdc', 'abcde']) + >>> s + 0 a + 1 ab + 2 abc + 3 abdc + 4 abcde + dtype: object + + Specify just `start`, meaning replace `start` until the end of the + string with `repl`. + + >>> s.str.slice_replace(1, repl='X') + 0 aX + 1 aX + 2 aX + 3 aX + 4 aX + dtype: object + + Specify just `stop`, meaning the start of the string to `stop` is replaced + with `repl`, and the rest of the string is included. + + >>> s.str.slice_replace(stop=2, repl='X') + 0 X + 1 X + 2 Xc + 3 Xdc + 4 Xcde + dtype: object + + Specify `start` and `stop`, meaning the slice from `start` to `stop` is + replaced with `repl`. Everything before or after `start` and `stop` is + included as is. + + >>> s.str.slice_replace(start=1, stop=3, repl='X') + 0 aX + 1 aX + 2 aX + 3 aXc + 4 aXde + dtype: object + """ + if repl is None: + repl = "" + + def f(x): + if x[start:stop] == "": + local_stop = start + else: + local_stop = stop + y = "" + if start is not None: + y += x[:start] + y += repl + if stop is not None: + y += x[local_stop:] + return y + + return _na_map(f, arr, dtype=str) + + +def str_strip(arr, to_strip=None, side="both"): + """ + Strip whitespace (including newlines) from each string in the + Series/Index. + + Parameters + ---------- + to_strip : str or unicode + side : {'left', 'right', 'both'}, default 'both' + + Returns + ------- + Series or Index + """ + if side == "both": + f = lambda x: x.strip(to_strip) + elif side == "left": + f = lambda x: x.lstrip(to_strip) + elif side == "right": + f = lambda x: x.rstrip(to_strip) + else: # pragma: no cover + raise ValueError("Invalid side") + return _na_map(f, arr, dtype=str) + + +def str_wrap(arr, width, **kwargs): + r""" + Wrap long strings in the Series/Index to be formatted in + paragraphs with length less than a given width. + + This method has the same keyword parameters and defaults as + :class:`textwrap.TextWrapper`. + + Parameters + ---------- + width : int + Maximum line width. + expand_tabs : bool, optional + If True, tab characters will be expanded to spaces (default: True). + replace_whitespace : bool, optional + If True, each whitespace character (as defined by string.whitespace) + remaining after tab expansion will be replaced by a single space + (default: True). + drop_whitespace : bool, optional + If True, whitespace that, after wrapping, happens to end up at the + beginning or end of a line is dropped (default: True). + break_long_words : bool, optional + If True, then words longer than width will be broken in order to ensure + that no lines are longer than width. If it is false, long words will + not be broken, and some lines may be longer than width (default: True). + break_on_hyphens : bool, optional + If True, wrapping will occur preferably on whitespace and right after + hyphens in compound words, as it is customary in English. If false, + only whitespaces will be considered as potentially good places for line + breaks, but you need to set break_long_words to false if you want truly + insecable words (default: True). + + Returns + ------- + Series or Index + + Notes + ----- + Internally, this method uses a :class:`textwrap.TextWrapper` instance with + default settings. To achieve behavior matching R's stringr library str_wrap + function, use the arguments: + + - expand_tabs = False + - replace_whitespace = True + - drop_whitespace = True + - break_long_words = False + - break_on_hyphens = False + + Examples + -------- + + >>> s = pd.Series(['line to be wrapped', 'another line to be wrapped']) + >>> s.str.wrap(12) + 0 line to be\nwrapped + 1 another line\nto be\nwrapped + dtype: object + """ + kwargs["width"] = width + + tw = textwrap.TextWrapper(**kwargs) + + return _na_map(lambda s: "\n".join(tw.wrap(s)), arr, dtype=str) + + +def str_translate(arr, table): + """ + Map all characters in the string through the given mapping table. + Equivalent to standard :meth:`str.translate`. + + Parameters + ---------- + table : dict + Table is a mapping of Unicode ordinals to Unicode ordinals, strings, or + None. Unmapped characters are left untouched. + Characters mapped to None are deleted. :meth:`str.maketrans` is a + helper function for making translation tables. + + Returns + ------- + Series or Index + """ + return _na_map(lambda x: x.translate(table), arr, dtype=str) + + +def str_get(arr, i): + """ + Extract element from each component at specified position. + + Extract element from lists, tuples, or strings in each element in the + Series/Index. + + Parameters + ---------- + i : int + Position of element to extract. + + Returns + ------- + Series or Index + + Examples + -------- + >>> s = pd.Series(["String", + ... (1, 2, 3), + ... ["a", "b", "c"], + ... 123, + ... -456, + ... {1: "Hello", "2": "World"}]) + >>> s + 0 String + 1 (1, 2, 3) + 2 [a, b, c] + 3 123 + 4 -456 + 5 {1: 'Hello', '2': 'World'} + dtype: object + + >>> s.str.get(1) + 0 t + 1 2 + 2 b + 3 NaN + 4 NaN + 5 Hello + dtype: object + + >>> s.str.get(-1) + 0 g + 1 3 + 2 c + 3 NaN + 4 NaN + 5 None + dtype: object + """ + + def f(x): + if isinstance(x, dict): + return x.get(i) + elif len(x) > i >= -len(x): + return x[i] + return np.nan + + return _na_map(f, arr) + + +def str_decode(arr, encoding, errors="strict"): + """ + Decode character string in the Series/Index using indicated encoding. + Equivalent to :meth:`str.decode` in python2 and :meth:`bytes.decode` in + python3. + + Parameters + ---------- + encoding : str + errors : str, optional + + Returns + ------- + Series or Index + """ + if encoding in _cpython_optimized_decoders: + # CPython optimized implementation + f = lambda x: x.decode(encoding, errors) + else: + decoder = codecs.getdecoder(encoding) + f = lambda x: decoder(x, errors)[0] + return _na_map(f, arr) + + +def str_encode(arr, encoding, errors="strict"): + """ + Encode character string in the Series/Index using indicated encoding. + Equivalent to :meth:`str.encode`. + + Parameters + ---------- + encoding : str + errors : str, optional + + Returns + ------- + encoded : Series/Index of objects + """ + if encoding in _cpython_optimized_encoders: + # CPython optimized implementation + f = lambda x: x.encode(encoding, errors) + else: + encoder = codecs.getencoder(encoding) + f = lambda x: encoder(x, errors)[0] + return _na_map(f, arr) + + +def forbid_nonstring_types(forbidden, name=None): + """ + Decorator to forbid specific types for a method of StringMethods. + + For calling `.str.{method}` on a Series or Index, it is necessary to first + initialize the :class:`StringMethods` object, and then call the method. + However, different methods allow different input types, and so this can not + be checked during :meth:`StringMethods.__init__`, but must be done on a + per-method basis. This decorator exists to facilitate this process, and + make it explicit which (inferred) types are disallowed by the method. + + :meth:`StringMethods.__init__` allows the *union* of types its different + methods allow (after skipping NaNs; see :meth:`StringMethods._validate`), + namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer']. + + The default string types ['string', 'empty'] are allowed for all methods. + For the additional types ['bytes', 'mixed', 'mixed-integer'], each method + then needs to forbid the types it is not intended for. + + Parameters + ---------- + forbidden : list-of-str or None + List of forbidden non-string types, may be one or more of + `['bytes', 'mixed', 'mixed-integer']`. + name : str, default None + Name of the method to use in the error message. By default, this is + None, in which case the name from the method being wrapped will be + copied. However, for working with further wrappers (like _pat_wrapper + and _noarg_wrapper), it is necessary to specify the name. + + Returns + ------- + func : wrapper + The method to which the decorator is applied, with an added check that + enforces the inferred type to not be in the list of forbidden types. + + Raises + ------ + TypeError + If the inferred type of the underlying data is in `forbidden`. + """ + + # deal with None + forbidden = [] if forbidden is None else forbidden + + allowed_types = {"string", "empty", "bytes", "mixed", "mixed-integer"} - set( + forbidden + ) + + def _forbid_nonstring_types(func): + func_name = func.__name__ if name is None else name + + @wraps(func) + def wrapper(self, *args, **kwargs): + if self._inferred_dtype not in allowed_types: + msg = ( + f"Cannot use .str.{func_name} with values of " + f"inferred dtype '{self._inferred_dtype}'." + ) + raise TypeError(msg) + return func(self, *args, **kwargs) + + wrapper.__name__ = func_name + return wrapper + + return _forbid_nonstring_types + + +def _noarg_wrapper( + f, + name=None, + docstring=None, + forbidden_types=["bytes"], + returns_string=True, + **kargs, +): + @forbid_nonstring_types(forbidden_types, name=name) + def wrapper(self): + result = _na_map(f, self._parent, **kargs) + return self._wrap_result(result, returns_string=returns_string) + + wrapper.__name__ = f.__name__ if name is None else name + if docstring is not None: + wrapper.__doc__ = docstring + else: + raise ValueError("Provide docstring") + + return wrapper + + +def _pat_wrapper( + f, + flags=False, + na=False, + name=None, + forbidden_types=["bytes"], + returns_string=True, + **kwargs, +): + @forbid_nonstring_types(forbidden_types, name=name) + def wrapper1(self, pat): + result = f(self._parent, pat) + return self._wrap_result(result, returns_string=returns_string) + + @forbid_nonstring_types(forbidden_types, name=name) + def wrapper2(self, pat, flags=0, **kwargs): + result = f(self._parent, pat, flags=flags, **kwargs) + return self._wrap_result(result, returns_string=returns_string) + + @forbid_nonstring_types(forbidden_types, name=name) + def wrapper3(self, pat, na=np.nan): + result = f(self._parent, pat, na=na) + return self._wrap_result(result, returns_string=returns_string) + + wrapper = wrapper3 if na else wrapper2 if flags else wrapper1 + + wrapper.__name__ = f.__name__ if name is None else name + if f.__doc__: + wrapper.__doc__ = f.__doc__ + + return wrapper + + +def copy(source): + "Copy a docstring from another source function (if present)" + + def do_copy(target): + if source.__doc__: + target.__doc__ = source.__doc__ + return target + + return do_copy + + +class StringMethods(NoNewAttributesMixin): + """ + Vectorized string functions for Series and Index. NAs stay NA unless + handled otherwise by a particular method. Patterned after Python's string + methods, with some inspiration from R's stringr package. + + Examples + -------- + >>> s.str.split('_') + >>> s.str.replace('_', '') + """ + + def __init__(self, data): + self._inferred_dtype = self._validate(data) + self._is_categorical = is_categorical_dtype(data) + self._is_string = data.dtype.name == "string" + + # .values.categories works for both Series/Index + self._parent = data.values.categories if self._is_categorical else data + # save orig to blow up categoricals to the right type + self._orig = data + self._freeze() + + @staticmethod + def _validate(data): + """ + Auxiliary function for StringMethods, infers and checks dtype of data. + + This is a "first line of defence" at the creation of the StringMethods- + object (see _make_accessor), and just checks that the dtype is in the + *union* of the allowed types over all string methods below; this + restriction is then refined on a per-method basis using the decorator + @forbid_nonstring_types (more info in the corresponding docstring). + + This really should exclude all series/index with any non-string values, + but that isn't practical for performance reasons until we have a str + dtype (GH 9343 / 13877) + + Parameters + ---------- + data : The content of the Series + + Returns + ------- + dtype : inferred dtype of data + """ + from pandas import StringDtype + + if isinstance(data, ABCMultiIndex): + raise AttributeError( + "Can only use .str accessor with Index, not MultiIndex" + ) + + # see _libs/lib.pyx for list of inferred types + allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"] + + values = getattr(data, "values", data) # Series / Index + values = getattr(values, "categories", values) # categorical / normal + + # explicitly allow StringDtype + if isinstance(values.dtype, StringDtype): + return "string" + + try: + inferred_dtype = lib.infer_dtype(values, skipna=True) + except ValueError: + # GH#27571 mostly occurs with ExtensionArray + inferred_dtype = None + + if inferred_dtype not in allowed_types: + raise AttributeError("Can only use .str accessor with string values!") + return inferred_dtype + + def __getitem__(self, key): + if isinstance(key, slice): + return self.slice(start=key.start, stop=key.stop, step=key.step) + else: + return self.get(key) + + def __iter__(self): + warnings.warn( + "Columnar iteration over characters will be deprecated in future releases.", + FutureWarning, + stacklevel=2, + ) + i = 0 + g = self.get(i) + while g.notna().any(): + yield g + i += 1 + g = self.get(i) + + def _wrap_result( + self, + result, + use_codes=True, + name=None, + expand=None, + fill_value=np.nan, + returns_string=True, + ): + + from pandas import Index, Series, MultiIndex + + # for category, we do the stuff on the categories, so blow it up + # to the full series again + # But for some operations, we have to do the stuff on the full values, + # so make it possible to skip this step as the method already did this + # before the transformation... + if use_codes and self._is_categorical: + # if self._orig is a CategoricalIndex, there is no .cat-accessor + result = take_1d( + result, Series(self._orig, copy=False).cat.codes, fill_value=fill_value + ) + + if not hasattr(result, "ndim") or not hasattr(result, "dtype"): + return result + assert result.ndim < 3 + + # We can be wrapping a string / object / categorical result, in which + # case we'll want to return the same dtype as the input. + # Or we can be wrapping a numeric output, in which case we don't want + # to return a StringArray. + if self._is_string and returns_string: + dtype = "string" + else: + dtype = None + + if expand is None: + # infer from ndim if expand is not specified + expand = result.ndim != 1 + + elif expand is True and not isinstance(self._orig, ABCIndexClass): + # required when expand=True is explicitly specified + # not needed when inferred + + def cons_row(x): + if is_list_like(x): + return x + else: + return [x] + + result = [cons_row(x) for x in result] + if result: + # propagate nan values to match longest sequence (GH 18450) + max_len = max(len(x) for x in result) + result = [ + x * max_len if len(x) == 0 or x[0] is np.nan else x for x in result + ] + + if not isinstance(expand, bool): + raise ValueError("expand must be True or False") + + if expand is False: + # if expand is False, result should have the same name + # as the original otherwise specified + if name is None: + name = getattr(result, "name", None) + if name is None: + # do not use logical or, _orig may be a DataFrame + # which has "name" column + name = self._orig.name + + # Wait until we are sure result is a Series or Index before + # checking attributes (GH 12180) + if isinstance(self._orig, ABCIndexClass): + # if result is a boolean np.array, return the np.array + # instead of wrapping it into a boolean Index (GH 8875) + if is_bool_dtype(result): + return result + + if expand: + result = list(result) + out = MultiIndex.from_tuples(result, names=name) + if out.nlevels == 1: + # We had all tuples of length-one, which are + # better represented as a regular Index. + out = out.get_level_values(0) + return out + else: + return Index(result, name=name) + else: + index = self._orig.index + if expand: + cons = self._orig._constructor_expanddim + result = cons(result, columns=name, index=index, dtype=dtype) + else: + # Must be a Series + cons = self._orig._constructor + result = cons(result, name=name, index=index, dtype=dtype) + return result + + def _get_series_list(self, others): + """ + Auxiliary function for :meth:`str.cat`. Turn potentially mixed input + into a list of Series (elements without an index must match the length + of the calling Series/Index). + + Parameters + ---------- + others : Series, DataFrame, np.ndarray, list-like or list-like of + Objects that are either Series, Index or np.ndarray (1-dim). + + Returns + ------- + list of Series + Others transformed into list of Series. + """ + from pandas import Series, DataFrame + + # self._orig is either Series or Index + idx = self._orig if isinstance(self._orig, ABCIndexClass) else self._orig.index + + # Generally speaking, all objects without an index inherit the index + # `idx` of the calling Series/Index - i.e. must have matching length. + # Objects with an index (i.e. Series/Index/DataFrame) keep their own. + if isinstance(others, ABCSeries): + return [others] + elif isinstance(others, ABCIndexClass): + return [Series(others.values, index=others)] + elif isinstance(others, ABCDataFrame): + return [others[x] for x in others] + elif isinstance(others, np.ndarray) and others.ndim == 2: + others = DataFrame(others, index=idx) + return [others[x] for x in others] + elif is_list_like(others, allow_sets=False): + others = list(others) # ensure iterators do not get read twice etc + + # in case of list-like `others`, all elements must be + # either Series/Index/np.ndarray (1-dim)... + if all( + isinstance(x, (ABCSeries, ABCIndexClass)) + or (isinstance(x, np.ndarray) and x.ndim == 1) + for x in others + ): + los = [] + while others: # iterate through list and append each element + los = los + self._get_series_list(others.pop(0)) + return los + # ... or just strings + elif all(not is_list_like(x) for x in others): + return [Series(others, index=idx)] + raise TypeError( + "others must be Series, Index, DataFrame, np.ndarrary " + "or list-like (either containing only strings or " + "containing only objects of type Series/Index/" + "np.ndarray[1-dim])" + ) + + @forbid_nonstring_types(["bytes", "mixed", "mixed-integer"]) + def cat(self, others=None, sep=None, na_rep=None, join="left"): + """ + Concatenate strings in the Series/Index with given separator. + + If `others` is specified, this function concatenates the Series/Index + and elements of `others` element-wise. + If `others` is not passed, then all values in the Series/Index are + concatenated into a single string with a given `sep`. + + Parameters + ---------- + others : Series, Index, DataFrame, np.ndarray or list-like + Series, Index, DataFrame, np.ndarray (one- or two-dimensional) and + other list-likes of strings must have the same length as the + calling Series/Index, with the exception of indexed objects (i.e. + Series/Index/DataFrame) if `join` is not None. + + If others is a list-like that contains a combination of Series, + Index or np.ndarray (1-dim), then all elements will be unpacked and + must satisfy the above criteria individually. + + If others is None, the method returns the concatenation of all + strings in the calling Series/Index. + sep : str, default '' + The separator between the different elements/columns. By default + the empty string `''` is used. + na_rep : str or None, default None + Representation that is inserted for all missing values: + + - If `na_rep` is None, and `others` is None, missing values in the + Series/Index are omitted from the result. + - If `na_rep` is None, and `others` is not None, a row containing a + missing value in any of the columns (before concatenation) will + have a missing value in the result. + join : {'left', 'right', 'outer', 'inner'}, default 'left' + Determines the join-style between the calling Series/Index and any + Series/Index/DataFrame in `others` (objects without an index need + to match the length of the calling Series/Index). To disable + alignment, use `.values` on any Series/Index/DataFrame in `others`. + + .. versionadded:: 0.23.0 + .. versionchanged:: 1.0.0 + Changed default of `join` from None to `'left'`. + + Returns + ------- + str, Series or Index + If `others` is None, `str` is returned, otherwise a `Series/Index` + (same type as caller) of objects is returned. + + See Also + -------- + split : Split each string in the Series/Index. + join : Join lists contained as elements in the Series/Index. + + Examples + -------- + When not passing `others`, all values are concatenated into a single + string: + + >>> s = pd.Series(['a', 'b', np.nan, 'd']) + >>> s.str.cat(sep=' ') + 'a b d' + + By default, NA values in the Series are ignored. Using `na_rep`, they + can be given a representation: + + >>> s.str.cat(sep=' ', na_rep='?') + 'a b ? d' + + If `others` is specified, corresponding values are concatenated with + the separator. Result will be a Series of strings. + + >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',') + 0 a,A + 1 b,B + 2 NaN + 3 d,D + dtype: object + + Missing values will remain missing in the result, but can again be + represented using `na_rep` + + >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-') + 0 a,A + 1 b,B + 2 -,C + 3 d,D + dtype: object + + If `sep` is not specified, the values are concatenated without + separation. + + >>> s.str.cat(['A', 'B', 'C', 'D'], na_rep='-') + 0 aA + 1 bB + 2 -C + 3 dD + dtype: object + + Series with different indexes can be aligned before concatenation. The + `join`-keyword works as in other methods. + + >>> t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2]) + >>> s.str.cat(t, join='left', na_rep='-') + 0 aa + 1 b- + 2 -c + 3 dd + dtype: object + >>> + >>> s.str.cat(t, join='outer', na_rep='-') + 0 aa + 1 b- + 2 -c + 3 dd + 4 -e + dtype: object + >>> + >>> s.str.cat(t, join='inner', na_rep='-') + 0 aa + 2 -c + 3 dd + dtype: object + >>> + >>> s.str.cat(t, join='right', na_rep='-') + 3 dd + 0 aa + 4 -e + 2 -c + dtype: object + + For more examples, see :ref:`here `. + """ + from pandas import Index, Series, concat + + if isinstance(others, str): + raise ValueError("Did you mean to supply a `sep` keyword?") + if sep is None: + sep = "" + + if isinstance(self._orig, ABCIndexClass): + data = Series(self._orig, index=self._orig) + else: # Series + data = self._orig + + # concatenate Series/Index with itself if no "others" + if others is None: + data = ensure_object(data) + na_mask = isna(data) + if na_rep is None and na_mask.any(): + data = data[~na_mask] + elif na_rep is not None and na_mask.any(): + data = np.where(na_mask, na_rep, data) + return sep.join(data) + + try: + # turn anything in "others" into lists of Series + others = self._get_series_list(others) + except ValueError: # do not catch TypeError raised by _get_series_list + raise ValueError( + "If `others` contains arrays or lists (or other " + "list-likes without an index), these must all be " + "of the same length as the calling Series/Index." + ) + + # align if required + if any(not data.index.equals(x.index) for x in others): + # Need to add keys for uniqueness in case of duplicate columns + others = concat( + others, + axis=1, + join=(join if join == "inner" else "outer"), + keys=range(len(others)), + sort=False, + copy=False, + ) + data, others = data.align(others, join=join) + others = [others[x] for x in others] # again list of Series + + all_cols = [ensure_object(x) for x in [data] + others] + na_masks = np.array([isna(x) for x in all_cols]) + union_mask = np.logical_or.reduce(na_masks, axis=0) + + if na_rep is None and union_mask.any(): + # no na_rep means NaNs for all rows where any column has a NaN + # only necessary if there are actually any NaNs + result = np.empty(len(data), dtype=object) + np.putmask(result, union_mask, np.nan) + + not_masked = ~union_mask + result[not_masked] = cat_safe([x[not_masked] for x in all_cols], sep) + elif na_rep is not None and union_mask.any(): + # fill NaNs with na_rep in case there are actually any NaNs + all_cols = [ + np.where(nm, na_rep, col) for nm, col in zip(na_masks, all_cols) + ] + result = cat_safe(all_cols, sep) + else: + # no NaNs - can just concatenate + result = cat_safe(all_cols, sep) + + if isinstance(self._orig, ABCIndexClass): + # add dtype for case that result is all-NA + result = Index(result, dtype=object, name=self._orig.name) + else: # Series + if is_categorical_dtype(self._orig.dtype): + # We need to infer the new categories. + dtype = None + else: + dtype = self._orig.dtype + result = Series(result, dtype=dtype, index=data.index, name=self._orig.name) + return result + + _shared_docs[ + "str_split" + ] = r""" + Split strings around given separator/delimiter. + + Splits the string in the Series/Index from the %(side)s, + at the specified delimiter string. Equivalent to :meth:`str.%(method)s`. + + Parameters + ---------- + pat : str, optional + String or regular expression to split on. + If not specified, split on whitespace. + n : int, default -1 (all) + Limit number of splits in output. + ``None``, 0 and -1 will be interpreted as return all splits. + expand : bool, default False + Expand the splitted strings into separate columns. + + * If ``True``, return DataFrame/MultiIndex expanding dimensionality. + * If ``False``, return Series/Index, containing lists of strings. + + Returns + ------- + Series, Index, DataFrame or MultiIndex + Type matches caller unless ``expand=True`` (see Notes). + + See Also + -------- + Series.str.split : Split strings around given separator/delimiter. + Series.str.rsplit : Splits string around given separator/delimiter, + starting from the right. + Series.str.join : Join lists contained as elements in the Series/Index + with passed delimiter. + str.split : Standard library version for split. + str.rsplit : Standard library version for rsplit. + + Notes + ----- + The handling of the `n` keyword depends on the number of found splits: + + - If found splits > `n`, make first `n` splits only + - If found splits <= `n`, make all splits + - If for a certain row the number of found splits < `n`, + append `None` for padding up to `n` if ``expand=True`` + + If using ``expand=True``, Series and Index callers return DataFrame and + MultiIndex objects, respectively. + + Examples + -------- + >>> s = pd.Series(["this is a regular sentence", + ... "https://docs.python.org/3/tutorial/index.html", + ... np.nan]) + 0 this is a regular sentence + 1 https://docs.python.org/3/tutorial/index.html + 2 NaN + dtype: object + + In the default setting, the string is split by whitespace. + + >>> s.str.split() + 0 [this, is, a, regular, sentence] + 1 [https://docs.python.org/3/tutorial/index.html] + 2 NaN + dtype: object + + Without the `n` parameter, the outputs of `rsplit` and `split` + are identical. + + >>> s.str.rsplit() + 0 [this, is, a, regular, sentence] + 1 [https://docs.python.org/3/tutorial/index.html] + 2 NaN + dtype: object + + The `n` parameter can be used to limit the number of splits on the + delimiter. The outputs of `split` and `rsplit` are different. + + >>> s.str.split(n=2) + 0 [this, is, a regular sentence] + 1 [https://docs.python.org/3/tutorial/index.html] + 2 NaN + dtype: object + + >>> s.str.rsplit(n=2) + 0 [this is a, regular, sentence] + 1 [https://docs.python.org/3/tutorial/index.html] + 2 NaN + dtype: object + + The `pat` parameter can be used to split by other characters. + + >>> s.str.split(pat = "/") + 0 [this is a regular sentence] + 1 [https:, , docs.python.org, 3, tutorial, index... + 2 NaN + dtype: object + + When using ``expand=True``, the split elements will expand out into + separate columns. If NaN is present, it is propagated throughout + the columns during the split. + + >>> s.str.split(expand=True) + 0 1 2 3 + 0 this is a regular + 1 https://docs.python.org/3/tutorial/index.html None None None + 2 NaN NaN NaN NaN \ + 4 + 0 sentence + 1 None + 2 NaN + + For slightly more complex use cases like splitting the html document name + from a url, a combination of parameter settings can be used. + + >>> s.str.rsplit("/", n=1, expand=True) + 0 1 + 0 this is a regular sentence None + 1 https://docs.python.org/3/tutorial index.html + 2 NaN NaN + + Remember to escape special characters when explicitly using regular + expressions. + + >>> s = pd.Series(["1+1=2"]) + + >>> s.str.split(r"\+|=", expand=True) + 0 1 2 + 0 1 1 2 + """ + + @Appender(_shared_docs["str_split"] % {"side": "beginning", "method": "split"}) + @forbid_nonstring_types(["bytes"]) + def split(self, pat=None, n=-1, expand=False): + result = str_split(self._parent, pat, n=n) + return self._wrap_result(result, expand=expand, returns_string=expand) + + @Appender(_shared_docs["str_split"] % {"side": "end", "method": "rsplit"}) + @forbid_nonstring_types(["bytes"]) + def rsplit(self, pat=None, n=-1, expand=False): + result = str_rsplit(self._parent, pat, n=n) + return self._wrap_result(result, expand=expand, returns_string=expand) + + _shared_docs[ + "str_partition" + ] = """ + Split the string at the %(side)s occurrence of `sep`. + + This method splits the string at the %(side)s occurrence of `sep`, + and returns 3 elements containing the part before the separator, + the separator itself, and the part after the separator. + If the separator is not found, return %(return)s. + + Parameters + ---------- + sep : str, default whitespace + String to split on. + expand : bool, default True + If True, return DataFrame/MultiIndex expanding dimensionality. + If False, return Series/Index. + + Returns + ------- + DataFrame/MultiIndex or Series/Index of objects + + See Also + -------- + %(also)s + Series.str.split : Split strings around given separators. + str.partition : Standard library version. + + Examples + -------- + + >>> s = pd.Series(['Linda van der Berg', 'George Pitt-Rivers']) + >>> s + 0 Linda van der Berg + 1 George Pitt-Rivers + dtype: object + + >>> s.str.partition() + 0 1 2 + 0 Linda van der Berg + 1 George Pitt-Rivers + + To partition by the last space instead of the first one: + + >>> s.str.rpartition() + 0 1 2 + 0 Linda van der Berg + 1 George Pitt-Rivers + + To partition by something different than a space: + + >>> s.str.partition('-') + 0 1 2 + 0 Linda van der Berg + 1 George Pitt - Rivers + + To return a Series containing tuples instead of a DataFrame: + + >>> s.str.partition('-', expand=False) + 0 (Linda van der Berg, , ) + 1 (George Pitt, -, Rivers) + dtype: object + + Also available on indices: + + >>> idx = pd.Index(['X 123', 'Y 999']) + >>> idx + Index(['X 123', 'Y 999'], dtype='object') + + Which will create a MultiIndex: + + >>> idx.str.partition() + MultiIndex([('X', ' ', '123'), + ('Y', ' ', '999')], + dtype='object') + + Or an index with tuples with ``expand=False``: + + >>> idx.str.partition(expand=False) + Index([('X', ' ', '123'), ('Y', ' ', '999')], dtype='object') + """ + + @Appender( + _shared_docs["str_partition"] + % { + "side": "first", + "return": "3 elements containing the string itself, followed by two " + "empty strings", + "also": "rpartition : Split the string at the last occurrence of `sep`.", + } + ) + @forbid_nonstring_types(["bytes"]) + def partition(self, sep=" ", expand=True): + f = lambda x: x.partition(sep) + result = _na_map(f, self._parent) + return self._wrap_result(result, expand=expand, returns_string=expand) + + @Appender( + _shared_docs["str_partition"] + % { + "side": "last", + "return": "3 elements containing two empty strings, followed by the " + "string itself", + "also": "partition : Split the string at the first occurrence of `sep`.", + } + ) + @forbid_nonstring_types(["bytes"]) + def rpartition(self, sep=" ", expand=True): + f = lambda x: x.rpartition(sep) + result = _na_map(f, self._parent) + return self._wrap_result(result, expand=expand, returns_string=expand) + + @copy(str_get) + def get(self, i): + result = str_get(self._parent, i) + return self._wrap_result(result) + + @copy(str_join) + @forbid_nonstring_types(["bytes"]) + def join(self, sep): + result = str_join(self._parent, sep) + return self._wrap_result(result) + + @copy(str_contains) + @forbid_nonstring_types(["bytes"]) + def contains(self, pat, case=True, flags=0, na=np.nan, regex=True): + result = str_contains( + self._parent, pat, case=case, flags=flags, na=na, regex=regex + ) + return self._wrap_result(result, fill_value=na, returns_string=False) + + @copy(str_match) + @forbid_nonstring_types(["bytes"]) + def match(self, pat, case=True, flags=0, na=np.nan): + result = str_match(self._parent, pat, case=case, flags=flags, na=na) + return self._wrap_result(result, fill_value=na, returns_string=False) + + @copy(str_replace) + @forbid_nonstring_types(["bytes"]) + def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True): + result = str_replace( + self._parent, pat, repl, n=n, case=case, flags=flags, regex=regex + ) + return self._wrap_result(result) + + @copy(str_repeat) + @forbid_nonstring_types(["bytes"]) + def repeat(self, repeats): + result = str_repeat(self._parent, repeats) + return self._wrap_result(result) + + @copy(str_pad) + @forbid_nonstring_types(["bytes"]) + def pad(self, width, side="left", fillchar=" "): + result = str_pad(self._parent, width, side=side, fillchar=fillchar) + return self._wrap_result(result) + + _shared_docs[ + "str_pad" + ] = """ + Filling %(side)s side of strings in the Series/Index with an + additional character. Equivalent to :meth:`str.%(method)s`. + + Parameters + ---------- + width : int + Minimum width of resulting string; additional characters will be filled + with ``fillchar``. + fillchar : str + Additional character for filling, default is whitespace. + + Returns + ------- + filled : Series/Index of objects. + """ + + @Appender(_shared_docs["str_pad"] % dict(side="left and right", method="center")) + @forbid_nonstring_types(["bytes"]) + def center(self, width, fillchar=" "): + return self.pad(width, side="both", fillchar=fillchar) + + @Appender(_shared_docs["str_pad"] % dict(side="right", method="ljust")) + @forbid_nonstring_types(["bytes"]) + def ljust(self, width, fillchar=" "): + return self.pad(width, side="right", fillchar=fillchar) + + @Appender(_shared_docs["str_pad"] % dict(side="left", method="rjust")) + @forbid_nonstring_types(["bytes"]) + def rjust(self, width, fillchar=" "): + return self.pad(width, side="left", fillchar=fillchar) + + @forbid_nonstring_types(["bytes"]) + def zfill(self, width): + """ + Pad strings in the Series/Index by prepending '0' characters. + + Strings in the Series/Index are padded with '0' characters on the + left of the string to reach a total string length `width`. Strings + in the Series/Index with length greater or equal to `width` are + unchanged. + + Parameters + ---------- + width : int + Minimum length of resulting string; strings with length less + than `width` be prepended with '0' characters. + + Returns + ------- + Series/Index of objects. + + See Also + -------- + Series.str.rjust : Fills the left side of strings with an arbitrary + character. + Series.str.ljust : Fills the right side of strings with an arbitrary + character. + Series.str.pad : Fills the specified sides of strings with an arbitrary + character. + Series.str.center : Fills boths sides of strings with an arbitrary + character. + + Notes + ----- + Differs from :meth:`str.zfill` which has special handling + for '+'/'-' in the string. + + Examples + -------- + >>> s = pd.Series(['-1', '1', '1000', 10, np.nan]) + >>> s + 0 -1 + 1 1 + 2 1000 + 3 10 + 4 NaN + dtype: object + + Note that ``10`` and ``NaN`` are not strings, therefore they are + converted to ``NaN``. The minus sign in ``'-1'`` is treated as a + regular character and the zero is added to the left of it + (:meth:`str.zfill` would have moved it to the left). ``1000`` + remains unchanged as it is longer than `width`. + + >>> s.str.zfill(3) + 0 0-1 + 1 001 + 2 1000 + 3 NaN + 4 NaN + dtype: object + """ + result = str_pad(self._parent, width, side="left", fillchar="0") + return self._wrap_result(result) + + @copy(str_slice) + def slice(self, start=None, stop=None, step=None): + result = str_slice(self._parent, start, stop, step) + return self._wrap_result(result) + + @copy(str_slice_replace) + @forbid_nonstring_types(["bytes"]) + def slice_replace(self, start=None, stop=None, repl=None): + result = str_slice_replace(self._parent, start, stop, repl) + return self._wrap_result(result) + + @copy(str_decode) + def decode(self, encoding, errors="strict"): + # need to allow bytes here + result = str_decode(self._parent, encoding, errors) + # TODO: Not sure how to handle this. + return self._wrap_result(result, returns_string=False) + + @copy(str_encode) + @forbid_nonstring_types(["bytes"]) + def encode(self, encoding, errors="strict"): + result = str_encode(self._parent, encoding, errors) + return self._wrap_result(result, returns_string=False) + + _shared_docs[ + "str_strip" + ] = r""" + Remove leading and trailing characters. + + Strip whitespaces (including newlines) or a set of specified characters + from each string in the Series/Index from %(side)s. + Equivalent to :meth:`str.%(method)s`. + + Parameters + ---------- + to_strip : str or None, default None + Specifying the set of characters to be removed. + All combinations of this set of characters will be stripped. + If None then whitespaces are removed. + + Returns + ------- + Series or Index of object + + See Also + -------- + Series.str.strip : Remove leading and trailing characters in Series/Index. + Series.str.lstrip : Remove leading characters in Series/Index. + Series.str.rstrip : Remove trailing characters in Series/Index. + + Examples + -------- + >>> s = pd.Series(['1. Ant. ', '2. Bee!\n', '3. Cat?\t', np.nan]) + >>> s + 0 1. Ant. + 1 2. Bee!\n + 2 3. Cat?\t + 3 NaN + dtype: object + + >>> s.str.strip() + 0 1. Ant. + 1 2. Bee! + 2 3. Cat? + 3 NaN + dtype: object + + >>> s.str.lstrip('123.') + 0 Ant. + 1 Bee!\n + 2 Cat?\t + 3 NaN + dtype: object + + >>> s.str.rstrip('.!? \n\t') + 0 1. Ant + 1 2. Bee + 2 3. Cat + 3 NaN + dtype: object + + >>> s.str.strip('123.!? \n\t') + 0 Ant + 1 Bee + 2 Cat + 3 NaN + dtype: object + """ + + @Appender( + _shared_docs["str_strip"] % dict(side="left and right sides", method="strip") + ) + @forbid_nonstring_types(["bytes"]) + def strip(self, to_strip=None): + result = str_strip(self._parent, to_strip, side="both") + return self._wrap_result(result) + + @Appender(_shared_docs["str_strip"] % dict(side="left side", method="lstrip")) + @forbid_nonstring_types(["bytes"]) + def lstrip(self, to_strip=None): + result = str_strip(self._parent, to_strip, side="left") + return self._wrap_result(result) + + @Appender(_shared_docs["str_strip"] % dict(side="right side", method="rstrip")) + @forbid_nonstring_types(["bytes"]) + def rstrip(self, to_strip=None): + result = str_strip(self._parent, to_strip, side="right") + return self._wrap_result(result) + + @copy(str_wrap) + @forbid_nonstring_types(["bytes"]) + def wrap(self, width, **kwargs): + result = str_wrap(self._parent, width, **kwargs) + return self._wrap_result(result) + + @copy(str_get_dummies) + @forbid_nonstring_types(["bytes"]) + def get_dummies(self, sep="|"): + # we need to cast to Series of strings as only that has all + # methods available for making the dummies... + data = self._orig.astype(str) if self._is_categorical else self._parent + result, name = str_get_dummies(data, sep) + return self._wrap_result( + result, + use_codes=(not self._is_categorical), + name=name, + expand=True, + returns_string=False, + ) + + @copy(str_translate) + @forbid_nonstring_types(["bytes"]) + def translate(self, table): + result = str_translate(self._parent, table) + return self._wrap_result(result) + + count = _pat_wrapper(str_count, flags=True, name="count", returns_string=False) + startswith = _pat_wrapper( + str_startswith, na=True, name="startswith", returns_string=False + ) + endswith = _pat_wrapper( + str_endswith, na=True, name="endswith", returns_string=False + ) + findall = _pat_wrapper( + str_findall, flags=True, name="findall", returns_string=False + ) + + @copy(str_extract) + @forbid_nonstring_types(["bytes"]) + def extract(self, pat, flags=0, expand=True): + return str_extract(self, pat, flags=flags, expand=expand) + + @copy(str_extractall) + @forbid_nonstring_types(["bytes"]) + def extractall(self, pat, flags=0): + return str_extractall(self._orig, pat, flags=flags) + + _shared_docs[ + "find" + ] = """ + Return %(side)s indexes in each strings in the Series/Index + where the substring is fully contained between [start:end]. + Return -1 on failure. Equivalent to standard :meth:`str.%(method)s`. + + Parameters + ---------- + sub : str + Substring being searched. + start : int + Left edge index. + end : int + Right edge index. + + Returns + ------- + Series or Index of int. + + See Also + -------- + %(also)s + """ + + @Appender( + _shared_docs["find"] + % dict( + side="lowest", + method="find", + also="rfind : Return highest indexes in each strings.", + ) + ) + @forbid_nonstring_types(["bytes"]) + def find(self, sub, start=0, end=None): + result = str_find(self._parent, sub, start=start, end=end, side="left") + return self._wrap_result(result, returns_string=False) + + @Appender( + _shared_docs["find"] + % dict( + side="highest", + method="rfind", + also="find : Return lowest indexes in each strings.", + ) + ) + @forbid_nonstring_types(["bytes"]) + def rfind(self, sub, start=0, end=None): + result = str_find(self._parent, sub, start=start, end=end, side="right") + return self._wrap_result(result, returns_string=False) + + @forbid_nonstring_types(["bytes"]) + def normalize(self, form): + """ + Return the Unicode normal form for the strings in the Series/Index. + For more information on the forms, see the + :func:`unicodedata.normalize`. + + Parameters + ---------- + form : {'NFC', 'NFKC', 'NFD', 'NFKD'} + Unicode form. + + Returns + ------- + normalized : Series/Index of objects + """ + import unicodedata + + f = lambda x: unicodedata.normalize(form, x) + result = _na_map(f, self._parent, dtype=str) + return self._wrap_result(result) + + _shared_docs[ + "index" + ] = """ + Return %(side)s indexes in each strings where the substring is + fully contained between [start:end]. This is the same as + ``str.%(similar)s`` except instead of returning -1, it raises a ValueError + when the substring is not found. Equivalent to standard ``str.%(method)s``. + + Parameters + ---------- + sub : str + Substring being searched. + start : int + Left edge index. + end : int + Right edge index. + + Returns + ------- + Series or Index of object + + See Also + -------- + %(also)s + """ + + @Appender( + _shared_docs["index"] + % dict( + side="lowest", + similar="find", + method="index", + also="rindex : Return highest indexes in each strings.", + ) + ) + @forbid_nonstring_types(["bytes"]) + def index(self, sub, start=0, end=None): + result = str_index(self._parent, sub, start=start, end=end, side="left") + return self._wrap_result(result, returns_string=False) + + @Appender( + _shared_docs["index"] + % dict( + side="highest", + similar="rfind", + method="rindex", + also="index : Return lowest indexes in each strings.", + ) + ) + @forbid_nonstring_types(["bytes"]) + def rindex(self, sub, start=0, end=None): + result = str_index(self._parent, sub, start=start, end=end, side="right") + return self._wrap_result(result, returns_string=False) + + _shared_docs[ + "len" + ] = """ + Compute the length of each element in the Series/Index. The element may be + a sequence (such as a string, tuple or list) or a collection + (such as a dictionary). + + Returns + ------- + Series or Index of int + A Series or Index of integer values indicating the length of each + element in the Series or Index. + + See Also + -------- + str.len : Python built-in function returning the length of an object. + Series.size : Returns the length of the Series. + + Examples + -------- + Returns the length (number of characters) in a string. Returns the + number of entries for dictionaries, lists or tuples. + + >>> s = pd.Series(['dog', + ... '', + ... 5, + ... {'foo' : 'bar'}, + ... [2, 3, 5, 7], + ... ('one', 'two', 'three')]) + >>> s + 0 dog + 1 + 2 5 + 3 {'foo': 'bar'} + 4 [2, 3, 5, 7] + 5 (one, two, three) + dtype: object + >>> s.str.len() + 0 3.0 + 1 0.0 + 2 NaN + 3 1.0 + 4 4.0 + 5 3.0 + dtype: float64 + """ + len = _noarg_wrapper( + len, + docstring=_shared_docs["len"], + forbidden_types=None, + dtype="int64", + returns_string=False, + ) + + _shared_docs[ + "casemethods" + ] = """ + Convert strings in the Series/Index to %(type)s. + %(version)s + Equivalent to :meth:`str.%(method)s`. + + Returns + ------- + Series or Index of object + + See Also + -------- + Series.str.lower : Converts all characters to lowercase. + Series.str.upper : Converts all characters to uppercase. + Series.str.title : Converts first character of each word to uppercase and + remaining to lowercase. + Series.str.capitalize : Converts first character to uppercase and + remaining to lowercase. + Series.str.swapcase : Converts uppercase to lowercase and lowercase to + uppercase. + Series.str.casefold: Removes all case distinctions in the string. + + Examples + -------- + >>> s = pd.Series(['lower', 'CAPITALS', 'this is a sentence', 'SwApCaSe']) + >>> s + 0 lower + 1 CAPITALS + 2 this is a sentence + 3 SwApCaSe + dtype: object + + >>> s.str.lower() + 0 lower + 1 capitals + 2 this is a sentence + 3 swapcase + dtype: object + + >>> s.str.upper() + 0 LOWER + 1 CAPITALS + 2 THIS IS A SENTENCE + 3 SWAPCASE + dtype: object + + >>> s.str.title() + 0 Lower + 1 Capitals + 2 This Is A Sentence + 3 Swapcase + dtype: object + + >>> s.str.capitalize() + 0 Lower + 1 Capitals + 2 This is a sentence + 3 Swapcase + dtype: object + + >>> s.str.swapcase() + 0 LOWER + 1 capitals + 2 THIS IS A SENTENCE + 3 sWaPcAsE + dtype: object + """ + + # _doc_args holds dict of strings to use in substituting casemethod docs + _doc_args: Dict[str, Dict[str, str]] = {} + _doc_args["lower"] = dict(type="lowercase", method="lower", version="") + _doc_args["upper"] = dict(type="uppercase", method="upper", version="") + _doc_args["title"] = dict(type="titlecase", method="title", version="") + _doc_args["capitalize"] = dict( + type="be capitalized", method="capitalize", version="" + ) + _doc_args["swapcase"] = dict(type="be swapcased", method="swapcase", version="") + _doc_args["casefold"] = dict( + type="be casefolded", + method="casefold", + version="\n .. versionadded:: 0.25.0\n", + ) + lower = _noarg_wrapper( + lambda x: x.lower(), + name="lower", + docstring=_shared_docs["casemethods"] % _doc_args["lower"], + dtype=str, + ) + upper = _noarg_wrapper( + lambda x: x.upper(), + name="upper", + docstring=_shared_docs["casemethods"] % _doc_args["upper"], + dtype=str, + ) + title = _noarg_wrapper( + lambda x: x.title(), + name="title", + docstring=_shared_docs["casemethods"] % _doc_args["title"], + dtype=str, + ) + capitalize = _noarg_wrapper( + lambda x: x.capitalize(), + name="capitalize", + docstring=_shared_docs["casemethods"] % _doc_args["capitalize"], + dtype=str, + ) + swapcase = _noarg_wrapper( + lambda x: x.swapcase(), + name="swapcase", + docstring=_shared_docs["casemethods"] % _doc_args["swapcase"], + dtype=str, + ) + casefold = _noarg_wrapper( + lambda x: x.casefold(), + name="casefold", + docstring=_shared_docs["casemethods"] % _doc_args["casefold"], + dtype=str, + ) + + _shared_docs[ + "ismethods" + ] = """ + Check whether all characters in each string are %(type)s. + + This is equivalent to running the Python string method + :meth:`str.%(method)s` for each element of the Series/Index. If a string + has zero characters, ``False`` is returned for that check. + + Returns + ------- + Series or Index of bool + Series or Index of boolean values with the same length as the original + Series/Index. + + See Also + -------- + Series.str.isalpha : Check whether all characters are alphabetic. + Series.str.isnumeric : Check whether all characters are numeric. + Series.str.isalnum : Check whether all characters are alphanumeric. + Series.str.isdigit : Check whether all characters are digits. + Series.str.isdecimal : Check whether all characters are decimal. + Series.str.isspace : Check whether all characters are whitespace. + Series.str.islower : Check whether all characters are lowercase. + Series.str.isupper : Check whether all characters are uppercase. + Series.str.istitle : Check whether all characters are titlecase. + + Examples + -------- + **Checks for Alphabetic and Numeric Characters** + + >>> s1 = pd.Series(['one', 'one1', '1', '']) + + >>> s1.str.isalpha() + 0 True + 1 False + 2 False + 3 False + dtype: bool + + >>> s1.str.isnumeric() + 0 False + 1 False + 2 True + 3 False + dtype: bool + + >>> s1.str.isalnum() + 0 True + 1 True + 2 True + 3 False + dtype: bool + + Note that checks against characters mixed with any additional punctuation + or whitespace will evaluate to false for an alphanumeric check. + + >>> s2 = pd.Series(['A B', '1.5', '3,000']) + >>> s2.str.isalnum() + 0 False + 1 False + 2 False + dtype: bool + + **More Detailed Checks for Numeric Characters** + + There are several different but overlapping sets of numeric characters that + can be checked for. + + >>> s3 = pd.Series(['23', '³', '⅕', '']) + + The ``s3.str.isdecimal`` method checks for characters used to form numbers + in base 10. + + >>> s3.str.isdecimal() + 0 True + 1 False + 2 False + 3 False + dtype: bool + + The ``s.str.isdigit`` method is the same as ``s3.str.isdecimal`` but also + includes special digits, like superscripted and subscripted digits in + unicode. + + >>> s3.str.isdigit() + 0 True + 1 True + 2 False + 3 False + dtype: bool + + The ``s.str.isnumeric`` method is the same as ``s3.str.isdigit`` but also + includes other characters that can represent quantities such as unicode + fractions. + + >>> s3.str.isnumeric() + 0 True + 1 True + 2 True + 3 False + dtype: bool + + **Checks for Whitespace** + + >>> s4 = pd.Series([' ', '\\t\\r\\n ', '']) + >>> s4.str.isspace() + 0 True + 1 True + 2 False + dtype: bool + + **Checks for Character Case** + + >>> s5 = pd.Series(['leopard', 'Golden Eagle', 'SNAKE', '']) + + >>> s5.str.islower() + 0 True + 1 False + 2 False + 3 False + dtype: bool + + >>> s5.str.isupper() + 0 False + 1 False + 2 True + 3 False + dtype: bool + + The ``s5.str.istitle`` method checks for whether all words are in title + case (whether only the first letter of each word is capitalized). Words are + assumed to be as any sequence of non-numeric characters separated by + whitespace characters. + + >>> s5.str.istitle() + 0 False + 1 True + 2 False + 3 False + dtype: bool + """ + _doc_args["isalnum"] = dict(type="alphanumeric", method="isalnum") + _doc_args["isalpha"] = dict(type="alphabetic", method="isalpha") + _doc_args["isdigit"] = dict(type="digits", method="isdigit") + _doc_args["isspace"] = dict(type="whitespace", method="isspace") + _doc_args["islower"] = dict(type="lowercase", method="islower") + _doc_args["isupper"] = dict(type="uppercase", method="isupper") + _doc_args["istitle"] = dict(type="titlecase", method="istitle") + _doc_args["isnumeric"] = dict(type="numeric", method="isnumeric") + _doc_args["isdecimal"] = dict(type="decimal", method="isdecimal") + # force _noarg_wrapper return type with dtype=bool (GH 29624) + isalnum = _noarg_wrapper( + lambda x: x.isalnum(), + name="isalnum", + docstring=_shared_docs["ismethods"] % _doc_args["isalnum"], + returns_string=False, + dtype=bool, + ) + isalpha = _noarg_wrapper( + lambda x: x.isalpha(), + name="isalpha", + docstring=_shared_docs["ismethods"] % _doc_args["isalpha"], + returns_string=False, + dtype=bool, + ) + isdigit = _noarg_wrapper( + lambda x: x.isdigit(), + name="isdigit", + docstring=_shared_docs["ismethods"] % _doc_args["isdigit"], + returns_string=False, + dtype=bool, + ) + isspace = _noarg_wrapper( + lambda x: x.isspace(), + name="isspace", + docstring=_shared_docs["ismethods"] % _doc_args["isspace"], + returns_string=False, + dtype=bool, + ) + islower = _noarg_wrapper( + lambda x: x.islower(), + name="islower", + docstring=_shared_docs["ismethods"] % _doc_args["islower"], + returns_string=False, + dtype=bool, + ) + isupper = _noarg_wrapper( + lambda x: x.isupper(), + name="isupper", + docstring=_shared_docs["ismethods"] % _doc_args["isupper"], + returns_string=False, + dtype=bool, + ) + istitle = _noarg_wrapper( + lambda x: x.istitle(), + name="istitle", + docstring=_shared_docs["ismethods"] % _doc_args["istitle"], + returns_string=False, + dtype=bool, + ) + isnumeric = _noarg_wrapper( + lambda x: x.isnumeric(), + name="isnumeric", + docstring=_shared_docs["ismethods"] % _doc_args["isnumeric"], + returns_string=False, + dtype=bool, + ) + isdecimal = _noarg_wrapper( + lambda x: x.isdecimal(), + name="isdecimal", + docstring=_shared_docs["ismethods"] % _doc_args["isdecimal"], + returns_string=False, + dtype=bool, + ) + + @classmethod + def _make_accessor(cls, data): + cls._validate(data) + return cls(data) diff --git a/pandas/core/tools/__init__.py b/pandas/core/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py new file mode 100644 index 00000000..de52a1e4 --- /dev/null +++ b/pandas/core/tools/datetimes.py @@ -0,0 +1,1053 @@ +from collections import abc +from datetime import datetime, time +from functools import partial +from itertools import islice +from typing import Optional, TypeVar, Union + +import numpy as np + +from pandas._libs import tslib, tslibs +from pandas._libs.tslibs import Timestamp, conversion, parsing +from pandas._libs.tslibs.parsing import ( # noqa + DateParseError, + _format_is_iso, + _guess_datetime_format, + parse_time_string, +) +from pandas._libs.tslibs.strptime import array_strptime +from pandas._typing import ArrayLike + +from pandas.core.dtypes.common import ( + ensure_object, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_float, + is_integer, + is_integer_dtype, + is_list_like, + is_numeric_dtype, + is_scalar, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCDatetimeIndex, + ABCIndex, + ABCIndexClass, + ABCSeries, +) +from pandas.core.dtypes.missing import notna + +from pandas.arrays import IntegerArray +from pandas.core import algorithms +from pandas.core.algorithms import unique + +# --------------------------------------------------------------------- +# types used in annotations + +ArrayConvertible = Union[list, tuple, ArrayLike, ABCSeries] +Scalar = Union[int, float, str] +DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime) +DatetimeScalarOrArrayConvertible = Union[ + DatetimeScalar, list, tuple, ArrayLike, ABCSeries +] + + +# --------------------------------------------------------------------- + + +def _guess_datetime_format_for_array(arr, **kwargs): + # Try to guess the format based on the first non-NaN element + non_nan_elements = notna(arr).nonzero()[0] + if len(non_nan_elements): + return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs) + + +def should_cache( + arg: ArrayConvertible, unique_share: float = 0.7, check_count: Optional[int] = None +) -> bool: + """ + Decides whether to do caching. + + If the percent of unique elements among `check_count` elements less + than `unique_share * 100` then we can do caching. + + Parameters + ---------- + arg: listlike, tuple, 1-d array, Series + unique_share: float, default=0.7, optional + 0 < unique_share < 1 + check_count: int, optional + 0 <= check_count <= len(arg) + + Returns + ------- + do_caching: bool + + Notes + ----- + By default for a sequence of less than 50 items in size, we don't do + caching; for the number of elements less than 5000, we take ten percent of + all elements to check for a uniqueness share; if the sequence size is more + than 5000, then we check only the first 500 elements. + All constants were chosen empirically by. + """ + do_caching = True + + # default realization + if check_count is None: + # in this case, the gain from caching is negligible + if len(arg) <= 50: + return False + + if len(arg) <= 5000: + check_count = int(len(arg) * 0.1) + else: + check_count = 500 + else: + assert ( + 0 <= check_count <= len(arg) + ), "check_count must be in next bounds: [0; len(arg)]" + if check_count == 0: + return False + + assert 0 < unique_share < 1, "unique_share must be in next bounds: (0; 1)" + + unique_elements = set(islice(arg, check_count)) + if len(unique_elements) > check_count * unique_share: + do_caching = False + return do_caching + + +def _maybe_cache(arg, format, cache, convert_listlike): + """ + Create a cache of unique dates from an array of dates + + Parameters + ---------- + arg : listlike, tuple, 1-d array, Series + format : string + Strftime format to parse time + cache : boolean + True attempts to create a cache of converted values + convert_listlike : function + Conversion function to apply on dates + + Returns + ------- + cache_array : Series + Cache of converted, unique dates. Can be empty + """ + from pandas import Series + + cache_array = Series(dtype=object) + + if cache: + # Perform a quicker unique check + if not should_cache(arg): + return cache_array + + unique_dates = unique(arg) + if len(unique_dates) < len(arg): + cache_dates = convert_listlike(unique_dates, format) + cache_array = Series(cache_dates, index=unique_dates) + return cache_array + + +def _box_as_indexlike( + dt_array: ArrayLike, utc: Optional[bool] = None, name: Optional[str] = None +) -> Union[ABCIndex, ABCDatetimeIndex]: + """ + Properly boxes the ndarray of datetimes to DatetimeIndex + if it is possible or to generic Index instead + + Parameters + ---------- + dt_array: 1-d array + Array of datetimes to be wrapped in an Index. + tz : object + None or 'utc' + name : string, default None + Name for a resulting index + + Returns + ------- + result : datetime of converted dates + - DatetimeIndex if convertible to sole datetime64 type + - general Index otherwise + """ + from pandas import DatetimeIndex, Index + + if is_datetime64_dtype(dt_array): + tz = "utc" if utc else None + return DatetimeIndex(dt_array, tz=tz, name=name) + return Index(dt_array, name=name) + + +def _convert_and_box_cache( + arg: DatetimeScalarOrArrayConvertible, + cache_array: ABCSeries, + name: Optional[str] = None, +) -> ABCIndexClass: + """ + Convert array of dates with a cache and wrap the result in an Index. + + Parameters + ---------- + arg : integer, float, string, datetime, list, tuple, 1-d array, Series + cache_array : Series + Cache of converted, unique dates + name : string, default None + Name for a DatetimeIndex + + Returns + ------- + result : Index-like of converted dates + """ + from pandas import Series + + result = Series(arg).map(cache_array) + return _box_as_indexlike(result, utc=None, name=name) + + +def _return_parsed_timezone_results(result, timezones, tz, name): + """ + Return results from array_strptime if a %z or %Z directive was passed. + + Parameters + ---------- + result : ndarray + int64 date representations of the dates + timezones : ndarray + pytz timezone objects + tz : object + None or pytz timezone object + name : string, default None + Name for a DatetimeIndex + + Returns + ------- + tz_result : Index-like of parsed dates with timezone + """ + if tz is not None: + raise ValueError( + "Cannot pass a tz argument when " + "parsing strings with timezone " + "information." + ) + tz_results = np.array( + [Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)] + ) + from pandas import Index + + return Index(tz_results, name=name) + + +def _convert_listlike_datetimes( + arg, + format, + name=None, + tz=None, + unit=None, + errors=None, + infer_datetime_format=None, + dayfirst=None, + yearfirst=None, + exact=None, +): + """ + Helper function for to_datetime. Performs the conversions of 1D listlike + of dates + + Parameters + ---------- + arg : list, tuple, ndarray, Series, Index + date to be parced + name : object + None or string for the Index name + tz : object + None or 'utc' + unit : string + None or string of the frequency of the passed data + errors : string + error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' + infer_datetime_format : boolean + inferring format behavior from to_datetime + dayfirst : boolean + dayfirst parsing behavior from to_datetime + yearfirst : boolean + yearfirst parsing behavior from to_datetime + exact : boolean + exact format matching behavior from to_datetime + + Returns + ------- + Index-like of parsed dates + """ + from pandas import DatetimeIndex + from pandas.core.arrays import DatetimeArray + from pandas.core.arrays.datetimes import ( + maybe_convert_dtype, + objects_to_datetime64ns, + ) + + if isinstance(arg, (list, tuple)): + arg = np.array(arg, dtype="O") + + # these are shortcutable + if is_datetime64tz_dtype(arg): + if not isinstance(arg, (DatetimeArray, DatetimeIndex)): + return DatetimeIndex(arg, tz=tz, name=name) + if tz == "utc": + arg = arg.tz_convert(None).tz_localize(tz) + return arg + + elif is_datetime64_ns_dtype(arg): + if not isinstance(arg, (DatetimeArray, DatetimeIndex)): + try: + return DatetimeIndex(arg, tz=tz, name=name) + except ValueError: + pass + elif tz: + # DatetimeArray, DatetimeIndex + return arg.tz_localize(tz) + + return arg + + elif unit is not None: + if format is not None: + raise ValueError("cannot specify both format and unit") + arg = getattr(arg, "_values", arg) + + # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime + # because it expects an ndarray argument + if isinstance(arg, IntegerArray): + # Explicitly pass NaT mask to array_with_unit_to_datetime + mask = arg.isna() + arg = arg._ndarray_values + else: + mask = None + + result, tz_parsed = tslib.array_with_unit_to_datetime( + arg, mask, unit, errors=errors + ) + + if errors == "ignore": + from pandas import Index + + result = Index(result, name=name) + else: + result = DatetimeIndex(result, name=name) + # GH 23758: We may still need to localize the result with tz + # GH 25546: Apply tz_parsed first (from arg), then tz (from caller) + # result will be naive but in UTC + try: + result = result.tz_localize("UTC").tz_convert(tz_parsed) + except AttributeError: + # Regular Index from 'ignore' path + return result + if tz is not None: + if result.tz is None: + result = result.tz_localize(tz) + else: + result = result.tz_convert(tz) + return result + elif getattr(arg, "ndim", 1) > 1: + raise TypeError( + "arg must be a string, datetime, list, tuple, 1-d array, or Series" + ) + + # warn if passing timedelta64, raise for PeriodDtype + # NB: this must come after unit transformation + orig_arg = arg + arg, _ = maybe_convert_dtype(arg, copy=False) + + arg = ensure_object(arg) + require_iso8601 = False + + if infer_datetime_format and format is None: + format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) + + if format is not None: + # There is a special fast-path for iso8601 formatted + # datetime strings, so in those cases don't use the inferred + # format because this path makes process slower in this + # special case + format_is_iso8601 = _format_is_iso(format) + if format_is_iso8601: + require_iso8601 = not infer_datetime_format + format = None + + tz_parsed = None + result = None + + if format is not None: + try: + # shortcut formatting here + if format == "%Y%m%d": + try: + # pass orig_arg as float-dtype may have been converted to + # datetime64[ns] + orig_arg = ensure_object(orig_arg) + result = _attempt_YYYYMMDD(orig_arg, errors=errors) + except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): + raise ValueError("cannot convert the input to '%Y%m%d' date format") + + # fallback + if result is None: + try: + result, timezones = array_strptime( + arg, format, exact=exact, errors=errors + ) + if "%Z" in format or "%z" in format: + return _return_parsed_timezone_results( + result, timezones, tz, name + ) + except tslibs.OutOfBoundsDatetime: + if errors == "raise": + raise + elif errors == "coerce": + result = np.empty(arg.shape, dtype="M8[ns]") + iresult = result.view("i8") + iresult.fill(tslibs.iNaT) + else: + result = arg + except ValueError: + # if format was inferred, try falling back + # to array_to_datetime - terminate here + # for specified formats + if not infer_datetime_format: + if errors == "raise": + raise + elif errors == "coerce": + result = np.empty(arg.shape, dtype="M8[ns]") + iresult = result.view("i8") + iresult.fill(tslibs.iNaT) + else: + result = arg + except ValueError as e: + # Fallback to try to convert datetime objects if timezone-aware + # datetime objects are found without passing `utc=True` + try: + values, tz = conversion.datetime_to_datetime64(arg) + return DatetimeIndex._simple_new(values, name=name, tz=tz) + except (ValueError, TypeError): + raise e + + if result is None: + assert format is None or infer_datetime_format + utc = tz == "utc" + result, tz_parsed = objects_to_datetime64ns( + arg, + dayfirst=dayfirst, + yearfirst=yearfirst, + utc=utc, + errors=errors, + require_iso8601=require_iso8601, + allow_object=True, + ) + + if tz_parsed is not None: + # We can take a shortcut since the datetime64 numpy array + # is in UTC + return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) + + utc = tz == "utc" + return _box_as_indexlike(result, utc=utc, name=name) + + +def _adjust_to_origin(arg, origin, unit): + """ + Helper function for to_datetime. + Adjust input argument to the specified origin + + Parameters + ---------- + arg : list, tuple, ndarray, Series, Index + date to be adjusted + origin : 'julian' or Timestamp + origin offset for the arg + unit : string + passed unit from to_datetime, must be 'D' + + Returns + ------- + ndarray or scalar of adjusted date(s) + """ + if origin == "julian": + original = arg + j0 = Timestamp(0).to_julian_date() + if unit != "D": + raise ValueError("unit must be 'D' for origin='julian'") + try: + arg = arg - j0 + except TypeError: + raise ValueError("incompatible 'arg' type for given 'origin'='julian'") + + # preemptively check this for a nice range + j_max = Timestamp.max.to_julian_date() - j0 + j_min = Timestamp.min.to_julian_date() - j0 + if np.any(arg > j_max) or np.any(arg < j_min): + raise tslibs.OutOfBoundsDatetime( + f"{original} is Out of Bounds for origin='julian'" + ) + else: + # arg must be numeric + if not ( + (is_scalar(arg) and (is_integer(arg) or is_float(arg))) + or is_numeric_dtype(np.asarray(arg)) + ): + raise ValueError( + f"'{arg}' is not compatible with origin='{origin}'; " + "it must be numeric with a unit specified" + ) + + # we are going to offset back to unix / epoch time + try: + offset = Timestamp(origin) + except tslibs.OutOfBoundsDatetime: + raise tslibs.OutOfBoundsDatetime(f"origin {origin} is Out of Bounds") + except ValueError: + raise ValueError(f"origin {origin} cannot be converted to a Timestamp") + + if offset.tz is not None: + raise ValueError(f"origin offset {offset} must be tz-naive") + offset -= Timestamp(0) + + # convert the offset to the unit of the arg + # this should be lossless in terms of precision + offset = offset // tslibs.Timedelta(1, unit=unit) + + # scalars & ndarray-like can handle the addition + if is_list_like(arg) and not isinstance( + arg, (ABCSeries, ABCIndexClass, np.ndarray) + ): + arg = np.asarray(arg) + arg = arg + offset + return arg + + +def to_datetime( + arg, + errors="raise", + dayfirst=False, + yearfirst=False, + utc=None, + format=None, + exact=True, + unit=None, + infer_datetime_format=False, + origin="unix", + cache=True, +): + """ + Convert argument to datetime. + + Parameters + ---------- + arg : int, float, str, datetime, list, tuple, 1-d array, Series DataFrame/dict-like + The object to convert to a datetime. + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception. + - If 'coerce', then invalid parsing will be set as NaT. + - If 'ignore', then invalid parsing will return the input. + dayfirst : bool, default False + Specify a date parse order if `arg` is str or its list-likes. + If True, parses dates with the day first, eg 10/11/12 is parsed as + 2012-11-10. + Warning: dayfirst=True is not strict, but will prefer to parse + with day first (this is a known bug, based on dateutil behavior). + yearfirst : bool, default False + Specify a date parse order if `arg` is str or its list-likes. + + - If True parses dates with the year first, eg 10/11/12 is parsed as + 2010-11-12. + - If both dayfirst and yearfirst are True, yearfirst is preceded (same + as dateutil). + + Warning: yearfirst=True is not strict, but will prefer to parse + with year first (this is a known bug, based on dateutil behavior). + utc : bool, default None + Return UTC DatetimeIndex if True (converting any tz-aware + datetime.datetime objects as well). + format : str, default None + The strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse + all the way up to nanoseconds. + See strftime documentation for more information on choices: + https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior. + exact : bool, True by default + Behaves as: + - If True, require an exact format match. + - If False, allow the format to match anywhere in the target string. + + unit : str, default 'ns' + The unit of the arg (D,s,ms,us,ns) denote the unit, which is an + integer or float number. This will be based off the origin. + Example, with unit='ms' and origin='unix' (the default), this + would calculate the number of milliseconds to the unix epoch start. + infer_datetime_format : bool, default False + If True and no `format` is given, attempt to infer the format of the + datetime strings, and if it can be inferred, switch to a faster + method of parsing them. In some cases this can increase the parsing + speed by ~5-10x. + origin : scalar, default 'unix' + Define the reference date. The numeric values would be parsed as number + of units (defined by `unit`) since this reference date. + + - If 'unix' (or POSIX) time; origin is set to 1970-01-01. + - If 'julian', unit must be 'D', and origin is set to beginning of + Julian Calendar. Julian day number 0 is assigned to the day starting + at noon on January 1, 4713 BC. + - If Timestamp convertible, origin is set to Timestamp identified by + origin. + cache : bool, default True + If True, use a cache of unique, converted dates to apply the datetime + conversion. May produce significant speed-up when parsing duplicate + date strings, especially ones with timezone offsets. The cache is only + used when there are at least 50 values. The presence of out-of-bounds + values will render the cache unusable and may slow down parsing. + + .. versionadded:: 0.23.0 + + .. versionchanged:: 0.25.0 + - changed default value from False to True. + + Returns + ------- + datetime + If parsing succeeded. + Return type depends on input: + + - list-like: DatetimeIndex + - Series: Series of datetime64 dtype + - scalar: Timestamp + + In case when it is not possible to return designated types (e.g. when + any element of input is before Timestamp.min or after Timestamp.max) + return will have datetime.datetime type (or corresponding + array/Series). + + See Also + -------- + DataFrame.astype : Cast argument to a specified dtype. + to_timedelta : Convert argument to timedelta. + convert_dtypes : Convert dtypes. + + Examples + -------- + Assembling a datetime from multiple columns of a DataFrame. The keys can be + common abbreviations like ['year', 'month', 'day', 'minute', 'second', + 'ms', 'us', 'ns']) or plurals of the same + + >>> df = pd.DataFrame({'year': [2015, 2016], + ... 'month': [2, 3], + ... 'day': [4, 5]}) + >>> pd.to_datetime(df) + 0 2015-02-04 + 1 2016-03-05 + dtype: datetime64[ns] + + If a date does not meet the `timestamp limitations + `_, passing errors='ignore' + will return the original input instead of raising any exception. + + Passing errors='coerce' will force an out-of-bounds date to NaT, + in addition to forcing non-dates (or non-parseable dates) to NaT. + + >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore') + datetime.datetime(1300, 1, 1, 0, 0) + >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') + NaT + + Passing infer_datetime_format=True can often-times speedup a parsing + if its not an ISO8601 format exactly, but in a regular format. + + >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000) + >>> s.head() + 0 3/11/2000 + 1 3/12/2000 + 2 3/13/2000 + 3 3/11/2000 + 4 3/12/2000 + dtype: object + + >>> %timeit pd.to_datetime(s, infer_datetime_format=True) # doctest: +SKIP + 100 loops, best of 3: 10.4 ms per loop + + >>> %timeit pd.to_datetime(s, infer_datetime_format=False) # doctest: +SKIP + 1 loop, best of 3: 471 ms per loop + + Using a unix epoch time + + >>> pd.to_datetime(1490195805, unit='s') + Timestamp('2017-03-22 15:16:45') + >>> pd.to_datetime(1490195805433502912, unit='ns') + Timestamp('2017-03-22 15:16:45.433502912') + + .. warning:: For float arg, precision rounding might happen. To prevent + unexpected behavior use a fixed-width exact type. + + Using a non-unix epoch origin + + >>> pd.to_datetime([1, 2, 3], unit='D', + ... origin=pd.Timestamp('1960-01-01')) + DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], \ +dtype='datetime64[ns]', freq=None) + """ + if arg is None: + return None + + if origin != "unix": + arg = _adjust_to_origin(arg, origin, unit) + + tz = "utc" if utc else None + convert_listlike = partial( + _convert_listlike_datetimes, + tz=tz, + unit=unit, + dayfirst=dayfirst, + yearfirst=yearfirst, + errors=errors, + exact=exact, + infer_datetime_format=infer_datetime_format, + ) + + if isinstance(arg, Timestamp): + result = arg + if tz is not None: + if arg.tz is not None: + result = result.tz_convert(tz) + else: + result = result.tz_localize(tz) + elif isinstance(arg, ABCSeries): + cache_array = _maybe_cache(arg, format, cache, convert_listlike) + if not cache_array.empty: + result = arg.map(cache_array) + else: + values = convert_listlike(arg._values, format) + result = arg._constructor(values, index=arg.index, name=arg.name) + elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)): + result = _assemble_from_unit_mappings(arg, errors, tz) + elif isinstance(arg, ABCIndexClass): + cache_array = _maybe_cache(arg, format, cache, convert_listlike) + if not cache_array.empty: + result = _convert_and_box_cache(arg, cache_array, name=arg.name) + else: + convert_listlike = partial(convert_listlike, name=arg.name) + result = convert_listlike(arg, format) + elif is_list_like(arg): + try: + cache_array = _maybe_cache(arg, format, cache, convert_listlike) + except tslibs.OutOfBoundsDatetime: + # caching attempts to create a DatetimeIndex, which may raise + # an OOB. If that's the desired behavior, then just reraise... + if errors == "raise": + raise + # ... otherwise, continue without the cache. + from pandas import Series + + cache_array = Series([], dtype=object) # just an empty array + if not cache_array.empty: + result = _convert_and_box_cache(arg, cache_array) + else: + result = convert_listlike(arg, format) + else: + result = convert_listlike(np.array([arg]), format)[0] + + return result + + +# mappings for assembling units +_unit_map = { + "year": "year", + "years": "year", + "month": "month", + "months": "month", + "day": "day", + "days": "day", + "hour": "h", + "hours": "h", + "minute": "m", + "minutes": "m", + "second": "s", + "seconds": "s", + "ms": "ms", + "millisecond": "ms", + "milliseconds": "ms", + "us": "us", + "microsecond": "us", + "microseconds": "us", + "ns": "ns", + "nanosecond": "ns", + "nanoseconds": "ns", +} + + +def _assemble_from_unit_mappings(arg, errors, tz): + """ + assemble the unit specified fields from the arg (DataFrame) + Return a Series for actual parsing + + Parameters + ---------- + arg : DataFrame + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + + - If 'raise', then invalid parsing will raise an exception + - If 'coerce', then invalid parsing will be set as NaT + - If 'ignore', then invalid parsing will return the input + tz : None or 'utc' + + Returns + ------- + Series + """ + from pandas import to_timedelta, to_numeric, DataFrame + + arg = DataFrame(arg) + if not arg.columns.is_unique: + raise ValueError("cannot assemble with duplicate keys") + + # replace passed unit with _unit_map + def f(value): + if value in _unit_map: + return _unit_map[value] + + # m is case significant + if value.lower() in _unit_map: + return _unit_map[value.lower()] + + return value + + unit = {k: f(k) for k in arg.keys()} + unit_rev = {v: k for k, v in unit.items()} + + # we require at least Ymd + required = ["year", "month", "day"] + req = sorted(set(required) - set(unit_rev.keys())) + if len(req): + required = ",".join(req) + raise ValueError( + "to assemble mappings requires at least that " + f"[year, month, day] be specified: [{required}] " + "is missing" + ) + + # keys we don't recognize + excess = sorted(set(unit_rev.keys()) - set(_unit_map.values())) + if len(excess): + excess = ",".join(excess) + raise ValueError( + f"extra keys have been passed to the datetime assemblage: [{excess}]" + ) + + def coerce(values): + # we allow coercion to if errors allows + values = to_numeric(values, errors=errors) + + # prevent overflow in case of int8 or int16 + if is_integer_dtype(values): + values = values.astype("int64", copy=False) + return values + + values = ( + coerce(arg[unit_rev["year"]]) * 10000 + + coerce(arg[unit_rev["month"]]) * 100 + + coerce(arg[unit_rev["day"]]) + ) + try: + values = to_datetime(values, format="%Y%m%d", errors=errors, utc=tz) + except (TypeError, ValueError) as err: + raise ValueError(f"cannot assemble the datetimes: {err}") + + for u in ["h", "m", "s", "ms", "us", "ns"]: + value = unit_rev.get(u) + if value is not None and value in arg: + try: + values += to_timedelta(coerce(arg[value]), unit=u, errors=errors) + except (TypeError, ValueError) as err: + raise ValueError(f"cannot assemble the datetimes [{value}]: {err}") + return values + + +def _attempt_YYYYMMDD(arg, errors): + """ + try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, + arg is a passed in as an object dtype, but could really be ints/strings + with nan-like/or floats (e.g. with nan) + + Parameters + ---------- + arg : passed value + errors : 'raise','ignore','coerce' + """ + + def calc(carg): + # calculate the actual result + carg = carg.astype(object) + parsed = parsing.try_parse_year_month_day( + carg / 10000, carg / 100 % 100, carg % 100 + ) + return tslib.array_to_datetime(parsed, errors=errors)[0] + + def calc_with_mask(carg, mask): + result = np.empty(carg.shape, dtype="M8[ns]") + iresult = result.view("i8") + iresult[~mask] = tslibs.iNaT + + masked_result = calc(carg[mask].astype(np.float64).astype(np.int64)) + result[mask] = masked_result.astype("M8[ns]") + return result + + # try intlike / strings that are ints + try: + return calc(arg.astype(np.int64)) + except (ValueError, OverflowError, TypeError): + pass + + # a float with actual np.nan + try: + carg = arg.astype(np.float64) + return calc_with_mask(carg, notna(carg)) + except (ValueError, OverflowError, TypeError): + pass + + # string with NaN-like + try: + mask = ~algorithms.isin(arg, list(tslib.nat_strings)) + return calc_with_mask(arg, mask) + except (ValueError, OverflowError, TypeError): + pass + + return None + + +# Fixed time formats for time parsing +_time_formats = [ + "%H:%M", + "%H%M", + "%I:%M%p", + "%I%M%p", + "%H:%M:%S", + "%H%M%S", + "%I:%M:%S%p", + "%I%M%S%p", +] + + +def _guess_time_format_for_array(arr): + # Try to guess the format based on the first non-NaN element + non_nan_elements = notna(arr).nonzero()[0] + if len(non_nan_elements): + element = arr[non_nan_elements[0]] + for time_format in _time_formats: + try: + datetime.strptime(element, time_format) + return time_format + except ValueError: + pass + + return None + + +def to_time(arg, format=None, infer_time_format=False, errors="raise"): + """ + Parse time strings to time objects using fixed strptime formats ("%H:%M", + "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", + "%I%M%S%p") + + Use infer_time_format if all the strings are in the same format to speed + up conversion. + + Parameters + ---------- + arg : string in time format, datetime.time, list, tuple, 1-d array, Series + format : str, default None + Format used to convert arg into a time object. If None, fixed formats + are used. + infer_time_format: bool, default False + Infer the time format based on the first non-NaN element. If all + strings are in the same format, this will speed up conversion. + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception + - If 'coerce', then invalid parsing will be set as None + - If 'ignore', then invalid parsing will return the input + + Returns + ------- + datetime.time + """ + + def _convert_listlike(arg, format): + + if isinstance(arg, (list, tuple)): + arg = np.array(arg, dtype="O") + + elif getattr(arg, "ndim", 1) > 1: + raise TypeError( + "arg must be a string, datetime, list, tuple, 1-d array, or Series" + ) + + arg = ensure_object(arg) + + if infer_time_format and format is None: + format = _guess_time_format_for_array(arg) + + times = [] + if format is not None: + for element in arg: + try: + times.append(datetime.strptime(element, format).time()) + except (ValueError, TypeError): + if errors == "raise": + msg = ( + f"Cannot convert {element} to a time with given " + f"format {format}" + ) + raise ValueError(msg) + elif errors == "ignore": + return arg + else: + times.append(None) + else: + formats = _time_formats[:] + format_found = False + for element in arg: + time_object = None + for time_format in formats: + try: + time_object = datetime.strptime(element, time_format).time() + if not format_found: + # Put the found format in front + fmt = formats.pop(formats.index(time_format)) + formats.insert(0, fmt) + format_found = True + break + except (ValueError, TypeError): + continue + + if time_object is not None: + times.append(time_object) + elif errors == "raise": + raise ValueError(f"Cannot convert arg {arg} to a time") + elif errors == "ignore": + return arg + else: + times.append(None) + + return times + + if arg is None: + return arg + elif isinstance(arg, time): + return arg + elif isinstance(arg, ABCSeries): + values = _convert_listlike(arg._values, format) + return arg._constructor(values, index=arg.index, name=arg.name) + elif isinstance(arg, ABCIndexClass): + return _convert_listlike(arg, format) + elif is_list_like(arg): + return _convert_listlike(arg, format) + + return _convert_listlike(np.array([arg]), format)[0] diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py new file mode 100644 index 00000000..4939cbfc --- /dev/null +++ b/pandas/core/tools/numeric.py @@ -0,0 +1,194 @@ +import numpy as np + +from pandas._libs import lib + +from pandas.core.dtypes.cast import maybe_downcast_to_dtype +from pandas.core.dtypes.common import ( + ensure_object, + is_datetime_or_timedelta_dtype, + is_decimal, + is_number, + is_numeric_dtype, + is_scalar, +) +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries + +import pandas as pd + + +def to_numeric(arg, errors="raise", downcast=None): + """ + Convert argument to a numeric type. + + The default return dtype is `float64` or `int64` + depending on the data supplied. Use the `downcast` parameter + to obtain other dtypes. + + Please note that precision loss may occur if really large numbers + are passed in. Due to the internal limitations of `ndarray`, if + numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min) + or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are + passed in, it is very likely they will be converted to float so that + they can stored in an `ndarray`. These warnings apply similarly to + `Series` since it internally leverages `ndarray`. + + Parameters + ---------- + arg : scalar, list, tuple, 1-d array, or Series + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception. + - If 'coerce', then invalid parsing will be set as NaN. + - If 'ignore', then invalid parsing will return the input. + downcast : {'integer', 'signed', 'unsigned', 'float'}, default None + If not None, and if the data has been successfully cast to a + numerical dtype (or if the data was numeric to begin with), + downcast that resulting data to the smallest numerical dtype + possible according to the following rules: + + - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) + - 'unsigned': smallest unsigned int dtype (min.: np.uint8) + - 'float': smallest float dtype (min.: np.float32) + + As this behaviour is separate from the core conversion to + numeric values, any errors raised during the downcasting + will be surfaced regardless of the value of the 'errors' input. + + In addition, downcasting will only occur if the size + of the resulting data's dtype is strictly larger than + the dtype it is to be cast to, so if none of the dtypes + checked satisfy that specification, no downcasting will be + performed on the data. + + Returns + ------- + ret : numeric if parsing succeeded. + Return type depends on input. Series if Series, otherwise ndarray. + + See Also + -------- + DataFrame.astype : Cast argument to a specified dtype. + to_datetime : Convert argument to datetime. + to_timedelta : Convert argument to timedelta. + numpy.ndarray.astype : Cast a numpy array to a specified type. + convert_dtypes : Convert dtypes. + + Examples + -------- + Take separate series and convert to numeric, coercing when told to + + >>> s = pd.Series(['1.0', '2', -3]) + >>> pd.to_numeric(s) + 0 1.0 + 1 2.0 + 2 -3.0 + dtype: float64 + >>> pd.to_numeric(s, downcast='float') + 0 1.0 + 1 2.0 + 2 -3.0 + dtype: float32 + >>> pd.to_numeric(s, downcast='signed') + 0 1 + 1 2 + 2 -3 + dtype: int8 + >>> s = pd.Series(['apple', '1.0', '2', -3]) + >>> pd.to_numeric(s, errors='ignore') + 0 apple + 1 1.0 + 2 2 + 3 -3 + dtype: object + >>> pd.to_numeric(s, errors='coerce') + 0 NaN + 1 1.0 + 2 2.0 + 3 -3.0 + dtype: float64 + """ + if downcast not in (None, "integer", "signed", "unsigned", "float"): + raise ValueError("invalid downcasting method provided") + + if errors not in ("ignore", "raise", "coerce"): + raise ValueError("invalid error value specified") + + is_series = False + is_index = False + is_scalars = False + + if isinstance(arg, ABCSeries): + is_series = True + values = arg.values + elif isinstance(arg, ABCIndexClass): + is_index = True + values = arg.asi8 + if values is None: + values = arg.values + elif isinstance(arg, (list, tuple)): + values = np.array(arg, dtype="O") + elif is_scalar(arg): + if is_decimal(arg): + return float(arg) + if is_number(arg): + return arg + is_scalars = True + values = np.array([arg], dtype="O") + elif getattr(arg, "ndim", 1) > 1: + raise TypeError("arg must be a list, tuple, 1-d array, or Series") + else: + values = arg + + if is_numeric_dtype(values): + pass + elif is_datetime_or_timedelta_dtype(values): + values = values.astype(np.int64) + else: + values = ensure_object(values) + coerce_numeric = errors not in ("ignore", "raise") + try: + values = lib.maybe_convert_numeric( + values, set(), coerce_numeric=coerce_numeric + ) + except (ValueError, TypeError): + if errors == "raise": + raise + + # attempt downcast only if the data has been successfully converted + # to a numerical dtype and if a downcast method has been specified + if downcast is not None and is_numeric_dtype(values): + typecodes = None + + if downcast in ("integer", "signed"): + typecodes = np.typecodes["Integer"] + elif downcast == "unsigned" and np.min(values) >= 0: + typecodes = np.typecodes["UnsignedInteger"] + elif downcast == "float": + typecodes = np.typecodes["Float"] + + # pandas support goes only to np.float32, + # as float dtypes smaller than that are + # extremely rare and not well supported + float_32_char = np.dtype(np.float32).char + float_32_ind = typecodes.index(float_32_char) + typecodes = typecodes[float_32_ind:] + + if typecodes is not None: + # from smallest to largest + for dtype in typecodes: + if np.dtype(dtype).itemsize <= values.dtype.itemsize: + values = maybe_downcast_to_dtype(values, dtype) + + # successful conversion + if values.dtype == dtype: + break + + if is_series: + return pd.Series(values, index=arg.index, name=arg.name) + elif is_index: + # because we want to coerce to numeric if possible, + # do not use _shallow_copy_with_infer + return pd.Index(values, name=arg.name) + elif is_scalars: + return values[0] + else: + return values diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py new file mode 100644 index 00000000..3f0cfce3 --- /dev/null +++ b/pandas/core/tools/timedeltas.py @@ -0,0 +1,157 @@ +""" +timedelta support tools +""" + +import numpy as np + +from pandas._libs.tslibs import NaT +from pandas._libs.tslibs.timedeltas import Timedelta, parse_timedelta_unit + +from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries + +from pandas.core.arrays.timedeltas import sequence_to_td64ns + + +def to_timedelta(arg, unit="ns", errors="raise"): + """ + Convert argument to timedelta. + + Timedeltas are absolute differences in times, expressed in difference + units (e.g. days, hours, minutes, seconds). This method converts + an argument from a recognized timedelta format / value into + a Timedelta type. + + Parameters + ---------- + arg : str, timedelta, list-like or Series + The data to be converted to timedelta. + unit : str, default 'ns' + Denotes the unit of the arg. Possible values: + ('Y', 'M', 'W', 'D', 'days', 'day', 'hours', hour', 'hr', + 'h', 'm', 'minute', 'min', 'minutes', 'T', 'S', 'seconds', + 'sec', 'second', 'ms', 'milliseconds', 'millisecond', + 'milli', 'millis', 'L', 'us', 'microseconds', 'microsecond', + 'micro', 'micros', 'U', 'ns', 'nanoseconds', 'nano', 'nanos', + 'nanosecond', 'N'). + + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception. + - If 'coerce', then invalid parsing will be set as NaT. + - If 'ignore', then invalid parsing will return the input. + + Returns + ------- + timedelta64 or numpy.array of timedelta64 + Output type returned if parsing succeeded. + + See Also + -------- + DataFrame.astype : Cast argument to a specified dtype. + to_datetime : Convert argument to datetime. + convert_dtypes : Convert dtypes. + + Examples + -------- + + Parsing a single string to a Timedelta: + + >>> pd.to_timedelta('1 days 06:05:01.00003') + Timedelta('1 days 06:05:01.000030') + >>> pd.to_timedelta('15.5us') + Timedelta('0 days 00:00:00.000015') + + Parsing a list or array of strings: + + >>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan']) + TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015', NaT], + dtype='timedelta64[ns]', freq=None) + + Converting numbers by specifying the `unit` keyword argument: + + >>> pd.to_timedelta(np.arange(5), unit='s') + TimedeltaIndex(['00:00:00', '00:00:01', '00:00:02', + '00:00:03', '00:00:04'], + dtype='timedelta64[ns]', freq=None) + >>> pd.to_timedelta(np.arange(5), unit='d') + TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], + dtype='timedelta64[ns]', freq=None) + """ + unit = parse_timedelta_unit(unit) + + if errors not in ("ignore", "raise", "coerce"): + raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'}") + + if unit in {"Y", "y", "M"}: + raise ValueError( + "Units 'M' and 'Y' are no longer supported, as they do not " + "represent unambiguous timedelta values durations." + ) + + if arg is None: + return arg + elif isinstance(arg, ABCSeries): + values = _convert_listlike(arg._values, unit=unit, errors=errors) + return arg._constructor(values, index=arg.index, name=arg.name) + elif isinstance(arg, ABCIndexClass): + return _convert_listlike(arg, unit=unit, errors=errors, name=arg.name) + elif isinstance(arg, np.ndarray) and arg.ndim == 0: + # extract array scalar and process below + arg = arg.item() + elif is_list_like(arg) and getattr(arg, "ndim", 1) == 1: + return _convert_listlike(arg, unit=unit, errors=errors) + elif getattr(arg, "ndim", 1) > 1: + raise TypeError( + "arg must be a string, timedelta, list, tuple, 1-d array, or Series" + ) + + # ...so it must be a scalar value. Return scalar. + return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors) + + +def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): + """Convert string 'r' to a timedelta object.""" + + try: + result = Timedelta(r, unit) + except ValueError: + if errors == "raise": + raise + elif errors == "ignore": + return r + + # coerce + result = NaT + + return result + + +def _convert_listlike(arg, unit="ns", errors="raise", name=None): + """Convert a list of objects to a timedelta index object.""" + + if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"): + # This is needed only to ensure that in the case where we end up + # returning arg (errors == "ignore"), and where the input is a + # generator, we return a useful list-like instead of a + # used-up generator + arg = np.array(list(arg), dtype=object) + + try: + value = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] + except ValueError: + if errors == "ignore": + return arg + else: + # This else-block accounts for the cases when errors='raise' + # and errors='coerce'. If errors == 'raise', these errors + # should be raised. If errors == 'coerce', we shouldn't + # expect any errors to be raised, since all parsing errors + # cause coercion to pd.NaT. However, if an error / bug is + # introduced that causes an Exception to be raised, we would + # like to surface it. + raise + + from pandas import TimedeltaIndex + + value = TimedeltaIndex(value, unit="ns", name=name) + return value diff --git a/pandas/core/util/__init__.py b/pandas/core/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py new file mode 100644 index 00000000..3366f10b --- /dev/null +++ b/pandas/core/util/hashing.py @@ -0,0 +1,359 @@ +""" +data hash pandas / numpy objects +""" +import itertools +from typing import Optional + +import numpy as np + +from pandas._libs import Timestamp +import pandas._libs.hashing as hashing + +from pandas.core.dtypes.cast import infer_dtype_from_scalar +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_extension_array_dtype, + is_list_like, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndexClass, + ABCMultiIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import isna + +# 16 byte long hashing key +_default_hash_key = "0123456789123456" + + +def _combine_hash_arrays(arrays, num_items: int): + """ + Parameters + ---------- + arrays : generator + num_items : int + + Should be the same as CPython's tupleobject.c + """ + try: + first = next(arrays) + except StopIteration: + return np.array([], dtype=np.uint64) + + arrays = itertools.chain([first], arrays) + + mult = np.uint64(1000003) + out = np.zeros_like(first) + np.uint64(0x345678) + for i, a in enumerate(arrays): + inverse_i = num_items - i + out ^= a + out *= mult + mult += np.uint64(82520 + inverse_i + inverse_i) + assert i + 1 == num_items, "Fed in wrong num_items" + out += np.uint64(97531) + return out + + +def hash_pandas_object( + obj, + index: bool = True, + encoding: str = "utf8", + hash_key: Optional[str] = _default_hash_key, + categorize: bool = True, +): + """ + Return a data hash of the Index/Series/DataFrame. + + Parameters + ---------- + index : bool, default True + Include the index in the hash (if Series/DataFrame). + encoding : str, default 'utf8' + Encoding for data & key when strings. + hash_key : str, default _default_hash_key + Hash_key for string key to encode. + categorize : bool, default True + Whether to first categorize object arrays before hashing. This is more + efficient when the array contains duplicate values. + + Returns + ------- + Series of uint64, same length as the object + """ + from pandas import Series + + if hash_key is None: + hash_key = _default_hash_key + + if isinstance(obj, ABCMultiIndex): + return Series(hash_tuples(obj, encoding, hash_key), dtype="uint64", copy=False) + + elif isinstance(obj, ABCIndexClass): + h = hash_array(obj.values, encoding, hash_key, categorize).astype( + "uint64", copy=False + ) + h = Series(h, index=obj, dtype="uint64", copy=False) + + elif isinstance(obj, ABCSeries): + h = hash_array(obj.values, encoding, hash_key, categorize).astype( + "uint64", copy=False + ) + if index: + index_iter = ( + hash_pandas_object( + obj.index, + index=False, + encoding=encoding, + hash_key=hash_key, + categorize=categorize, + ).values + for _ in [None] + ) + arrays = itertools.chain([h], index_iter) + h = _combine_hash_arrays(arrays, 2) + + h = Series(h, index=obj.index, dtype="uint64", copy=False) + + elif isinstance(obj, ABCDataFrame): + hashes = (hash_array(series.values) for _, series in obj.items()) + num_items = len(obj.columns) + if index: + index_hash_generator = ( + hash_pandas_object( + obj.index, + index=False, + encoding=encoding, + hash_key=hash_key, + categorize=categorize, + ).values # noqa + for _ in [None] + ) + num_items += 1 + + # keep `hashes` specifically a generator to keep mypy happy + _hashes = itertools.chain(hashes, index_hash_generator) + hashes = (x for x in _hashes) + h = _combine_hash_arrays(hashes, num_items) + + h = Series(h, index=obj.index, dtype="uint64", copy=False) + else: + raise TypeError(f"Unexpected type for hashing {type(obj)}") + return h + + +def hash_tuples(vals, encoding="utf8", hash_key: str = _default_hash_key): + """ + Hash an MultiIndex / list-of-tuples efficiently + + Parameters + ---------- + vals : MultiIndex, list-of-tuples, or single tuple + encoding : str, default 'utf8' + hash_key : str, default _default_hash_key + + Returns + ------- + ndarray of hashed values array + """ + is_tuple = False + if isinstance(vals, tuple): + vals = [vals] + is_tuple = True + elif not is_list_like(vals): + raise TypeError("must be convertible to a list-of-tuples") + + from pandas import Categorical, MultiIndex + + if not isinstance(vals, ABCMultiIndex): + vals = MultiIndex.from_tuples(vals) + + # create a list-of-Categoricals + vals = [ + Categorical(vals.codes[level], vals.levels[level], ordered=False, fastpath=True) + for level in range(vals.nlevels) + ] + + # hash the list-of-ndarrays + hashes = ( + _hash_categorical(cat, encoding=encoding, hash_key=hash_key) for cat in vals + ) + h = _combine_hash_arrays(hashes, len(vals)) + if is_tuple: + h = h[0] + + return h + + +def hash_tuple(val, encoding: str = "utf8", hash_key: str = _default_hash_key): + """ + Hash a single tuple efficiently + + Parameters + ---------- + val : single tuple + encoding : str, default 'utf8' + hash_key : str, default _default_hash_key + + Returns + ------- + hash + + """ + hashes = (_hash_scalar(v, encoding=encoding, hash_key=hash_key) for v in val) + + h = _combine_hash_arrays(hashes, len(val))[0] + + return h + + +def _hash_categorical(c, encoding: str, hash_key: str): + """ + Hash a Categorical by hashing its categories, and then mapping the codes + to the hashes + + Parameters + ---------- + c : Categorical + encoding : str + hash_key : str + + Returns + ------- + ndarray of hashed values array, same size as len(c) + """ + # Convert ExtensionArrays to ndarrays + values = np.asarray(c.categories.values) + hashed = hash_array(values, encoding, hash_key, categorize=False) + + # we have uint64, as we don't directly support missing values + # we don't want to use take_nd which will coerce to float + # instead, directly construct the result with a + # max(np.uint64) as the missing value indicator + # + # TODO: GH 15362 + + mask = c.isna() + if len(hashed): + result = hashed.take(c.codes) + else: + result = np.zeros(len(mask), dtype="uint64") + + if mask.any(): + result[mask] = np.iinfo(np.uint64).max + + return result + + +def hash_array( + vals, + encoding: str = "utf8", + hash_key: str = _default_hash_key, + categorize: bool = True, +): + """ + Given a 1d array, return an array of deterministic integers. + + Parameters + ---------- + vals : ndarray, Categorical + encoding : str, default 'utf8' + Encoding for data & key when strings. + hash_key : str, default _default_hash_key + Hash_key for string key to encode. + categorize : bool, default True + Whether to first categorize object arrays before hashing. This is more + efficient when the array contains duplicate values. + + Returns + ------- + 1d uint64 numpy array of hash values, same length as the vals + """ + + if not hasattr(vals, "dtype"): + raise TypeError("must pass a ndarray-like") + dtype = vals.dtype + + # For categoricals, we hash the categories, then remap the codes to the + # hash values. (This check is above the complex check so that we don't ask + # numpy if categorical is a subdtype of complex, as it will choke). + if is_categorical_dtype(dtype): + return _hash_categorical(vals, encoding, hash_key) + elif is_extension_array_dtype(dtype): + vals, _ = vals._values_for_factorize() + dtype = vals.dtype + + # we'll be working with everything as 64-bit values, so handle this + # 128-bit value early + if np.issubdtype(dtype, np.complex128): + return hash_array(np.real(vals)) + 23 * hash_array(np.imag(vals)) + + # First, turn whatever array this is into unsigned 64-bit ints, if we can + # manage it. + elif isinstance(dtype, np.bool): + vals = vals.astype("u8") + elif issubclass(dtype.type, (np.datetime64, np.timedelta64)): + vals = vals.view("i8").astype("u8", copy=False) + elif issubclass(dtype.type, np.number) and dtype.itemsize <= 8: + vals = vals.view("u{}".format(vals.dtype.itemsize)).astype("u8") + else: + # With repeated values, its MUCH faster to categorize object dtypes, + # then hash and rename categories. We allow skipping the categorization + # when the values are known/likely to be unique. + if categorize: + from pandas import factorize, Categorical, Index + + codes, categories = factorize(vals, sort=False) + cat = Categorical(codes, Index(categories), ordered=False, fastpath=True) + return _hash_categorical(cat, encoding, hash_key) + + try: + vals = hashing.hash_object_array(vals, hash_key, encoding) + except TypeError: + # we have mixed types + vals = hashing.hash_object_array( + vals.astype(str).astype(object), hash_key, encoding + ) + + # Then, redistribute these 64-bit ints within the space of 64-bit ints + vals ^= vals >> 30 + vals *= np.uint64(0xBF58476D1CE4E5B9) + vals ^= vals >> 27 + vals *= np.uint64(0x94D049BB133111EB) + vals ^= vals >> 31 + return vals + + +def _hash_scalar( + val, encoding: str = "utf8", hash_key: str = _default_hash_key +) -> np.ndarray: + """ + Hash scalar value. + + Parameters + ---------- + val : scalar + encoding : str, default "utf8" + hash_key : str, default _default_hash_key + + Returns + ------- + 1d uint64 numpy array of hash value, of length 1 + """ + + if isna(val): + # this is to be consistent with the _hash_categorical implementation + return np.array([np.iinfo(np.uint64).max], dtype="u8") + + if getattr(val, "tzinfo", None) is not None: + # for tz-aware datetimes, we need the underlying naive UTC value and + # not the tz aware object or pd extension type (as + # infer_dtype_from_scalar would do) + if not isinstance(val, Timestamp): + val = Timestamp(val) + val = val.tz_convert(None) + + dtype, val = infer_dtype_from_scalar(val) + vals = np.array([val], dtype=dtype) + + return hash_array(vals, hash_key=hash_key, encoding=encoding, categorize=False) diff --git a/pandas/core/window/__init__.py b/pandas/core/window/__init__.py new file mode 100644 index 00000000..dcf58a4c --- /dev/null +++ b/pandas/core/window/__init__.py @@ -0,0 +1,3 @@ +from pandas.core.window.ewm import EWM # noqa:F401 +from pandas.core.window.expanding import Expanding, ExpandingGroupby # noqa:F401 +from pandas.core.window.rolling import Rolling, RollingGroupby, Window # noqa:F401 diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py new file mode 100644 index 00000000..64ec0e68 --- /dev/null +++ b/pandas/core/window/common.py @@ -0,0 +1,326 @@ +"""Common utility functions for rolling operations""" +from collections import defaultdict +from typing import Callable, Optional +import warnings + +import numpy as np + +from pandas.core.dtypes.common import is_integer +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries + +import pandas.core.common as com +from pandas.core.generic import _shared_docs +from pandas.core.groupby.base import GroupByMixin +from pandas.core.indexes.api import MultiIndex + +_shared_docs = dict(**_shared_docs) +_doc_template = """ + Returns + ------- + Series or DataFrame + Return type is determined by the caller. + + See Also + -------- + Series.%(name)s : Series %(name)s. + DataFrame.%(name)s : DataFrame %(name)s. +""" + + +def _dispatch(name: str, *args, **kwargs): + """ + Dispatch to apply. + """ + + def outer(self, *args, **kwargs): + def f(x): + x = self._shallow_copy(x, groupby=self._groupby) + return getattr(x, name)(*args, **kwargs) + + return self._groupby.apply(f) + + outer.__name__ = name + return outer + + +class WindowGroupByMixin(GroupByMixin): + """ + Provide the groupby facilities. + """ + + def __init__(self, obj, *args, **kwargs): + kwargs.pop("parent", None) + groupby = kwargs.pop("groupby", None) + if groupby is None: + groupby, obj = obj, obj.obj + self._groupby = groupby + self._groupby.mutated = True + self._groupby.grouper.mutated = True + super().__init__(obj, *args, **kwargs) + + count = _dispatch("count") + corr = _dispatch("corr", other=None, pairwise=None) + cov = _dispatch("cov", other=None, pairwise=None) + + def _apply( + self, + func: Callable, + center: bool, + require_min_periods: int = 0, + floor: int = 1, + is_weighted: bool = False, + name: Optional[str] = None, + use_numba_cache: bool = False, + **kwargs, + ): + """ + Dispatch to apply; we are stripping all of the _apply kwargs and + performing the original function call on the grouped object. + """ + kwargs.pop("floor", None) + + # TODO: can we de-duplicate with _dispatch? + def f(x, name=name, *args): + x = self._shallow_copy(x) + + if isinstance(name, str): + return getattr(x, name)(*args, **kwargs) + + return x.apply(name, *args, **kwargs) + + return self._groupby.apply(f) + + +def _flex_binary_moment(arg1, arg2, f, pairwise=False): + + if not ( + isinstance(arg1, (np.ndarray, ABCSeries, ABCDataFrame)) + and isinstance(arg2, (np.ndarray, ABCSeries, ABCDataFrame)) + ): + raise TypeError( + "arguments to moment function must be of type " + "np.ndarray/Series/DataFrame" + ) + + if isinstance(arg1, (np.ndarray, ABCSeries)) and isinstance( + arg2, (np.ndarray, ABCSeries) + ): + X, Y = prep_binary(arg1, arg2) + return f(X, Y) + + elif isinstance(arg1, ABCDataFrame): + from pandas import DataFrame + + def dataframe_from_int_dict(data, frame_template): + result = DataFrame(data, index=frame_template.index) + if len(result.columns) > 0: + result.columns = frame_template.columns[result.columns] + return result + + results = {} + if isinstance(arg2, ABCDataFrame): + if pairwise is False: + if arg1 is arg2: + # special case in order to handle duplicate column names + for i, col in enumerate(arg1.columns): + results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) + return dataframe_from_int_dict(results, arg1) + else: + if not arg1.columns.is_unique: + raise ValueError("'arg1' columns are not unique") + if not arg2.columns.is_unique: + raise ValueError("'arg2' columns are not unique") + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + X, Y = arg1.align(arg2, join="outer") + X = X + 0 * Y + Y = Y + 0 * X + + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + res_columns = arg1.columns.union(arg2.columns) + for col in res_columns: + if col in X and col in Y: + results[col] = f(X[col], Y[col]) + return DataFrame(results, index=X.index, columns=res_columns) + elif pairwise is True: + results = defaultdict(dict) + for i, k1 in enumerate(arg1.columns): + for j, k2 in enumerate(arg2.columns): + if j < i and arg2 is arg1: + # Symmetric case + results[i][j] = results[j][i] + else: + results[i][j] = f( + *prep_binary(arg1.iloc[:, i], arg2.iloc[:, j]) + ) + + from pandas import concat + + result_index = arg1.index.union(arg2.index) + if len(result_index): + + # construct result frame + result = concat( + [ + concat( + [results[i][j] for j, c in enumerate(arg2.columns)], + ignore_index=True, + ) + for i, c in enumerate(arg1.columns) + ], + ignore_index=True, + axis=1, + ) + result.columns = arg1.columns + + # set the index and reorder + if arg2.columns.nlevels > 1: + result.index = MultiIndex.from_product( + arg2.columns.levels + [result_index] + ) + result = result.reorder_levels([2, 0, 1]).sort_index() + else: + result.index = MultiIndex.from_product( + [range(len(arg2.columns)), range(len(result_index))] + ) + result = result.swaplevel(1, 0).sort_index() + result.index = MultiIndex.from_product( + [result_index] + [arg2.columns] + ) + else: + + # empty result + result = DataFrame( + index=MultiIndex( + levels=[arg1.index, arg2.columns], codes=[[], []] + ), + columns=arg2.columns, + dtype="float64", + ) + + # reset our index names to arg1 names + # reset our column names to arg2 names + # careful not to mutate the original names + result.columns = result.columns.set_names(arg1.columns.names) + result.index = result.index.set_names( + result_index.names + arg2.columns.names + ) + + return result + + else: + raise ValueError("'pairwise' is not True/False") + else: + results = { + i: f(*prep_binary(arg1.iloc[:, i], arg2)) + for i, col in enumerate(arg1.columns) + } + return dataframe_from_int_dict(results, arg1) + + else: + return _flex_binary_moment(arg2, arg1, f) + + +def _get_center_of_mass(comass, span, halflife, alpha): + valid_count = com.count_not_none(comass, span, halflife, alpha) + if valid_count > 1: + raise ValueError("comass, span, halflife, and alpha are mutually exclusive") + + # Convert to center of mass; domain checks ensure 0 < alpha <= 1 + if comass is not None: + if comass < 0: + raise ValueError("comass must satisfy: comass >= 0") + elif span is not None: + if span < 1: + raise ValueError("span must satisfy: span >= 1") + comass = (span - 1) / 2.0 + elif halflife is not None: + if halflife <= 0: + raise ValueError("halflife must satisfy: halflife > 0") + decay = 1 - np.exp(np.log(0.5) / halflife) + comass = 1 / decay - 1 + elif alpha is not None: + if alpha <= 0 or alpha > 1: + raise ValueError("alpha must satisfy: 0 < alpha <= 1") + comass = (1.0 - alpha) / alpha + else: + raise ValueError("Must pass one of comass, span, halflife, or alpha") + + return float(comass) + + +def calculate_center_offset(window): + if not is_integer(window): + window = len(window) + return int((window - 1) / 2.0) + + +def calculate_min_periods( + window: int, + min_periods: Optional[int], + num_values: int, + required_min_periods: int, + floor: int, +) -> int: + """ + Calculates final minimum periods value for rolling aggregations. + + Parameters + ---------- + window : passed window value + min_periods : passed min periods value + num_values : total number of values + required_min_periods : required min periods per aggregation function + floor : required min periods per aggregation function + + Returns + ------- + min_periods : int + """ + if min_periods is None: + min_periods = window + else: + min_periods = max(required_min_periods, min_periods) + if min_periods > window: + raise ValueError(f"min_periods {min_periods} must be <= window {window}") + elif min_periods > num_values: + min_periods = num_values + 1 + elif min_periods < 0: + raise ValueError("min_periods must be >= 0") + return max(min_periods, floor) + + +def zsqrt(x): + with np.errstate(all="ignore"): + result = np.sqrt(x) + mask = x < 0 + + if isinstance(x, ABCDataFrame): + if mask.values.any(): + result[mask] = 0 + else: + if mask.any(): + result[mask] = 0 + + return result + + +def prep_binary(arg1, arg2): + if not isinstance(arg2, type(arg1)): + raise Exception("Input arrays must be of the same type!") + + # mask out values, this also makes a common index... + X = arg1 + 0 * arg2 + Y = arg2 + 0 * arg1 + + return X, Y + + +def get_weighted_roll_func(cfunc: Callable) -> Callable: + def func(arg, window, min_periods=None): + if min_periods is None: + min_periods = len(window) + return cfunc(arg, window, min_periods) + + return func diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py new file mode 100644 index 00000000..37e3cd42 --- /dev/null +++ b/pandas/core/window/ewm.py @@ -0,0 +1,403 @@ +from textwrap import dedent + +import numpy as np + +import pandas._libs.window.aggregations as window_aggregations +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender, Substitution + +from pandas.core.dtypes.generic import ABCDataFrame + +from pandas.core.base import DataError +from pandas.core.window.common import ( + _doc_template, + _get_center_of_mass, + _shared_docs, + zsqrt, +) +from pandas.core.window.rolling import _flex_binary_moment, _Rolling + +_bias_template = """ + Parameters + ---------- + bias : bool, default False + Use a standard estimation bias correction. + *args, **kwargs + Arguments and keyword arguments to be passed into func. +""" + + +class EWM(_Rolling): + r""" + Provide exponential weighted functions. + + Parameters + ---------- + com : float, optional + Specify decay in terms of center of mass, + :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0`. + span : float, optional + Specify decay in terms of span, + :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1`. + halflife : float, optional + Specify decay in terms of half-life, + :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{for} halflife > 0`. + alpha : float, optional + Specify smoothing factor :math:`\alpha` directly, + :math:`0 < \alpha \leq 1`. + min_periods : int, default 0 + Minimum number of observations in window required to have a value + (otherwise result is NA). + adjust : bool, default True + Divide by decaying adjustment factor in beginning periods to account + for imbalance in relative weightings + (viewing EWMA as a moving average). + ignore_na : bool, default False + Ignore missing values when calculating weights; + specify True to reproduce pre-0.15.0 behavior. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. The value 0 identifies the rows, and 1 + identifies the columns. + + Returns + ------- + DataFrame + A Window sub-classed for the particular operation. + + See Also + -------- + rolling : Provides rolling window calculations. + expanding : Provides expanding transformations. + + Notes + ----- + Exactly one of center of mass, span, half-life, and alpha must be provided. + Allowed values and relationship between the parameters are specified in the + parameter descriptions above; see the link at the end of this section for + a detailed explanation. + + When adjust is True (default), weighted averages are calculated using + weights (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. + + When adjust is False, weighted averages are calculated recursively as: + weighted_average[0] = arg[0]; + weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i]. + + When ignore_na is False (default), weights are based on absolute positions. + For example, the weights of x and y used in calculating the final weighted + average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and + (1-alpha)**2 and alpha (if adjust is False). + + When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based + on relative positions. For example, the weights of x and y used in + calculating the final weighted average of [x, None, y] are 1-alpha and 1 + (if adjust is True), and 1-alpha and alpha (if adjust is False). + + More details can be found at + https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows + + Examples + -------- + + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + >>> df.ewm(com=0.5).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + """ + _attributes = ["com", "min_periods", "adjust", "ignore_na", "axis"] + + def __init__( + self, + obj, + com=None, + span=None, + halflife=None, + alpha=None, + min_periods=0, + adjust=True, + ignore_na=False, + axis=0, + ): + self.obj = obj + self.com = _get_center_of_mass(com, span, halflife, alpha) + self.min_periods = min_periods + self.adjust = adjust + self.ignore_na = ignore_na + self.axis = axis + self.on = None + + @property + def _constructor(self): + return EWM + + _agg_see_also_doc = dedent( + """ + See Also + -------- + pandas.DataFrame.rolling.aggregate + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) + >>> df + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.004295 0.905829 -0.954544 + 2 0.735167 -0.165272 -1.619346 + 3 -0.702657 -1.340923 -0.706334 + 4 -0.246845 0.211596 -0.901819 + 5 2.463718 3.157577 -1.380906 + 6 -1.142255 2.340594 -0.039875 + 7 1.396598 -1.647453 1.677227 + 8 -0.543425 1.761277 -0.220481 + 9 -0.640505 0.289374 -1.550670 + + >>> df.ewm(alpha=0.5).mean() + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.464856 0.569633 -0.490089 + 2 -0.207700 0.149687 -1.135379 + 3 -0.471677 -0.645305 -0.906555 + 4 -0.355635 -0.203033 -0.904111 + 5 1.076417 1.503943 -1.146293 + 6 -0.041654 1.925562 -0.588728 + 7 0.680292 0.132049 0.548693 + 8 0.067236 0.948257 0.163353 + 9 -0.286980 0.618493 -0.694496 + """ + ) + + @Substitution( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded="", + klass="Series/Dataframe", + axis="", + ) + @Appender(_shared_docs["aggregate"]) + def aggregate(self, func, *args, **kwargs): + return super().aggregate(func, *args, **kwargs) + + agg = aggregate + + def _apply(self, func, **kwargs): + """ + Rolling statistical measure using supplied function. Designed to be + used with passed-in Cython array-based functions. + + Parameters + ---------- + func : str/callable to apply + + Returns + ------- + y : same type as input argument + """ + blocks, obj = self._create_blocks() + block_list = list(blocks) + + results = [] + exclude = [] + for i, b in enumerate(blocks): + try: + values = self._prep_values(b.values) + + except (TypeError, NotImplementedError): + if isinstance(obj, ABCDataFrame): + exclude.extend(b.columns) + del block_list[i] + continue + else: + raise DataError("No numeric types to aggregate") + + if values.size == 0: + results.append(values.copy()) + continue + + # if we have a string function name, wrap it + if isinstance(func, str): + cfunc = getattr(window_aggregations, func, None) + if cfunc is None: + raise ValueError( + f"we do not support this function in window_aggregations.{func}" + ) + + def func(arg): + return cfunc( + arg, + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + ) + + results.append(np.apply_along_axis(func, self.axis, values)) + + return self._wrap_results(results, block_list, obj, exclude) + + @Substitution(name="ewm") + @Appender(_doc_template) + def mean(self, *args, **kwargs): + """ + Exponential weighted moving average. + + Parameters + ---------- + *args, **kwargs + Arguments and keyword arguments to be passed into func. + """ + nv.validate_window_func("mean", args, kwargs) + return self._apply("ewma", **kwargs) + + @Substitution(name="ewm") + @Appender(_doc_template) + @Appender(_bias_template) + def std(self, bias=False, *args, **kwargs): + """ + Exponential weighted moving stddev. + """ + nv.validate_window_func("std", args, kwargs) + return zsqrt(self.var(bias=bias, **kwargs)) + + vol = std + + @Substitution(name="ewm") + @Appender(_doc_template) + @Appender(_bias_template) + def var(self, bias=False, *args, **kwargs): + """ + Exponential weighted moving variance. + """ + nv.validate_window_func("var", args, kwargs) + + def f(arg): + return window_aggregations.ewmcov( + arg, + arg, + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + int(bias), + ) + + return self._apply(f, **kwargs) + + @Substitution(name="ewm") + @Appender(_doc_template) + def cov(self, other=None, pairwise=None, bias=False, **kwargs): + """ + Exponential weighted sample covariance. + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndex DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + bias : bool, default False + Use a standard estimation bias correction. + **kwargs + Keyword arguments to be passed into func. + """ + if other is None: + other = self._selected_obj + # only default unset + pairwise = True if pairwise is None else pairwise + other = self._shallow_copy(other) + + def _get_cov(X, Y): + X = self._shallow_copy(X) + Y = self._shallow_copy(Y) + cov = window_aggregations.ewmcov( + X._prep_values(), + Y._prep_values(), + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + int(bias), + ) + return X._wrap_result(cov) + + return _flex_binary_moment( + self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise) + ) + + @Substitution(name="ewm") + @Appender(_doc_template) + def corr(self, other=None, pairwise=None, **kwargs): + """ + Exponential weighted sample correlation. + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndex DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + **kwargs + Keyword arguments to be passed into func. + """ + if other is None: + other = self._selected_obj + # only default unset + pairwise = True if pairwise is None else pairwise + other = self._shallow_copy(other) + + def _get_corr(X, Y): + X = self._shallow_copy(X) + Y = self._shallow_copy(Y) + + def _cov(x, y): + return window_aggregations.ewmcov( + x, + y, + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + 1, + ) + + x_values = X._prep_values() + y_values = Y._prep_values() + with np.errstate(all="ignore"): + cov = _cov(x_values, y_values) + x_var = _cov(x_values, x_values) + y_var = _cov(y_values, y_values) + corr = cov / zsqrt(x_var * y_var) + return X._wrap_result(corr) + + return _flex_binary_moment( + self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise) + ) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py new file mode 100644 index 00000000..68c35143 --- /dev/null +++ b/pandas/core/window/expanding.py @@ -0,0 +1,259 @@ +from textwrap import dedent + +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender, Substitution + +from pandas.core.window.common import WindowGroupByMixin, _doc_template, _shared_docs +from pandas.core.window.rolling import _Rolling_and_Expanding + + +class Expanding(_Rolling_and_Expanding): + """ + Provide expanding transformations. + + Parameters + ---------- + min_periods : int, default 1 + Minimum number of observations in window required to have a value + (otherwise result is NA). + center : bool, default False + Set the labels at the center of the window. + axis : int or str, default 0 + + Returns + ------- + a Window sub-classed for the particular operation + + See Also + -------- + rolling : Provides rolling window calculations. + ewm : Provides exponential weighted functions. + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + Examples + -------- + + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + >>> df.expanding(2).sum() + B + 0 NaN + 1 1.0 + 2 3.0 + 3 3.0 + 4 7.0 + """ + + _attributes = ["min_periods", "center", "axis"] + + def __init__(self, obj, min_periods=1, center=False, axis=0, **kwargs): + super().__init__(obj=obj, min_periods=min_periods, center=center, axis=axis) + + @property + def _constructor(self): + return Expanding + + def _get_window(self, other=None, **kwargs): + """ + Get the window length over which to perform some operation. + + Parameters + ---------- + other : object, default None + The other object that is involved in the operation. + Such an object is involved for operations like covariance. + + Returns + ------- + window : int + The window length. + """ + axis = self.obj._get_axis(self.axis) + length = len(axis) + (other is not None) * len(axis) + + other = self.min_periods or -1 + return max(length, other) + + _agg_see_also_doc = dedent( + """ + See Also + -------- + DataFrame.expanding.aggregate + DataFrame.rolling.aggregate + DataFrame.aggregate + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) + >>> df + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.004295 0.905829 -0.954544 + 2 0.735167 -0.165272 -1.619346 + 3 -0.702657 -1.340923 -0.706334 + 4 -0.246845 0.211596 -0.901819 + 5 2.463718 3.157577 -1.380906 + 6 -1.142255 2.340594 -0.039875 + 7 1.396598 -1.647453 1.677227 + 8 -0.543425 1.761277 -0.220481 + 9 -0.640505 0.289374 -1.550670 + + >>> df.ewm(alpha=0.5).mean() + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.464856 0.569633 -0.490089 + 2 -0.207700 0.149687 -1.135379 + 3 -0.471677 -0.645305 -0.906555 + 4 -0.355635 -0.203033 -0.904111 + 5 1.076417 1.503943 -1.146293 + 6 -0.041654 1.925562 -0.588728 + 7 0.680292 0.132049 0.548693 + 8 0.067236 0.948257 0.163353 + 9 -0.286980 0.618493 -0.694496 + """ + ) + + @Substitution( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded="", + klass="Series/Dataframe", + axis="", + ) + @Appender(_shared_docs["aggregate"]) + def aggregate(self, func, *args, **kwargs): + return super().aggregate(func, *args, **kwargs) + + agg = aggregate + + @Substitution(name="expanding") + @Appender(_shared_docs["count"]) + def count(self, **kwargs): + return super().count(**kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["apply"]) + def apply(self, func, raw=False, args=(), kwargs={}): + return super().apply(func, raw=raw, args=args, kwargs=kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["sum"]) + def sum(self, *args, **kwargs): + nv.validate_expanding_func("sum", args, kwargs) + return super().sum(*args, **kwargs) + + @Substitution(name="expanding") + @Appender(_doc_template) + @Appender(_shared_docs["max"]) + def max(self, *args, **kwargs): + nv.validate_expanding_func("max", args, kwargs) + return super().max(*args, **kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["min"]) + def min(self, *args, **kwargs): + nv.validate_expanding_func("min", args, kwargs) + return super().min(*args, **kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["mean"]) + def mean(self, *args, **kwargs): + nv.validate_expanding_func("mean", args, kwargs) + return super().mean(*args, **kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["median"]) + def median(self, **kwargs): + return super().median(**kwargs) + + @Substitution(name="expanding", versionadded="") + @Appender(_shared_docs["std"]) + def std(self, ddof=1, *args, **kwargs): + nv.validate_expanding_func("std", args, kwargs) + return super().std(ddof=ddof, **kwargs) + + @Substitution(name="expanding", versionadded="") + @Appender(_shared_docs["var"]) + def var(self, ddof=1, *args, **kwargs): + nv.validate_expanding_func("var", args, kwargs) + return super().var(ddof=ddof, **kwargs) + + @Substitution(name="expanding") + @Appender(_doc_template) + @Appender(_shared_docs["skew"]) + def skew(self, **kwargs): + return super().skew(**kwargs) + + _agg_doc = dedent( + """ + Examples + -------- + + The example below will show an expanding calculation with a window size of + four matching the equivalent function call using `scipy.stats`. + + >>> arr = [1, 2, 3, 4, 999] + >>> import scipy.stats + >>> print(f"{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}") + -1.200000 + >>> print(f"{scipy.stats.kurtosis(arr, bias=False):.6f}") + 4.999874 + >>> s = pd.Series(arr) + >>> s.expanding(4).kurt() + 0 NaN + 1 NaN + 2 NaN + 3 -1.200000 + 4 4.999874 + dtype: float64 + """ + ) + + @Appender(_agg_doc) + @Substitution(name="expanding") + @Appender(_shared_docs["kurt"]) + def kurt(self, **kwargs): + return super().kurt(**kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["quantile"]) + def quantile(self, quantile, interpolation="linear", **kwargs): + return super().quantile( + quantile=quantile, interpolation=interpolation, **kwargs + ) + + @Substitution(name="expanding") + @Appender(_doc_template) + @Appender(_shared_docs["cov"]) + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["corr"]) + def corr(self, other=None, pairwise=None, **kwargs): + return super().corr(other=other, pairwise=pairwise, **kwargs) + + +class ExpandingGroupby(WindowGroupByMixin, Expanding): + """ + Provide a expanding groupby implementation. + """ + + @property + def _constructor(self): + return Expanding diff --git a/pandas/core/window/indexers.py b/pandas/core/window/indexers.py new file mode 100644 index 00000000..0fa24a0b --- /dev/null +++ b/pandas/core/window/indexers.py @@ -0,0 +1,122 @@ +"""Indexer objects for computing start/end window bounds for rolling operations""" +from typing import Optional, Tuple + +import numpy as np + +from pandas._libs.window.indexers import calculate_variable_window_bounds +from pandas.util._decorators import Appender + +get_window_bounds_doc = """ +Computes the bounds of a window. + +Parameters +---------- +num_values : int, default 0 + number of values that will be aggregated over +window_size : int, default 0 + the number of rows in a window +min_periods : int, default None + min_periods passed from the top level rolling API +center : bool, default None + center passed from the top level rolling API +closed : str, default None + closed passed from the top level rolling API +win_type : str, default None + win_type passed from the top level rolling API + +Returns +------- +A tuple of ndarray[int64]s, indicating the boundaries of each +window +""" + + +class BaseIndexer: + """Base class for window bounds calculations""" + + def __init__( + self, index_array: Optional[np.ndarray] = None, window_size: int = 0, **kwargs, + ): + """ + Parameters + ---------- + **kwargs : + keyword arguments that will be available when get_window_bounds is called + """ + self.index_array = index_array + self.window_size = window_size + # Set user defined kwargs as attributes that can be used in get_window_bounds + for key, value in kwargs.items(): + setattr(self, key, value) + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: Optional[int] = None, + center: Optional[bool] = None, + closed: Optional[str] = None, + ) -> Tuple[np.ndarray, np.ndarray]: + + raise NotImplementedError + + +class FixedWindowIndexer(BaseIndexer): + """Creates window boundaries that are of fixed length.""" + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: Optional[int] = None, + center: Optional[bool] = None, + closed: Optional[str] = None, + ) -> Tuple[np.ndarray, np.ndarray]: + + start_s = np.zeros(self.window_size, dtype="int64") + start_e = ( + np.arange(self.window_size, num_values, dtype="int64") + - self.window_size + + 1 + ) + start = np.concatenate([start_s, start_e])[:num_values] + + end_s = np.arange(self.window_size, dtype="int64") + 1 + end_e = start_e + self.window_size + end = np.concatenate([end_s, end_e])[:num_values] + return start, end + + +class VariableWindowIndexer(BaseIndexer): + """Creates window boundaries that are of variable length, namely for time series.""" + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: Optional[int] = None, + center: Optional[bool] = None, + closed: Optional[str] = None, + ) -> Tuple[np.ndarray, np.ndarray]: + + return calculate_variable_window_bounds( + num_values, self.window_size, min_periods, center, closed, self.index_array, + ) + + +class ExpandingIndexer(BaseIndexer): + """Calculate expanding window bounds, mimicking df.expanding()""" + + @Appender(get_window_bounds_doc) + def get_window_bounds( + self, + num_values: int = 0, + min_periods: Optional[int] = None, + center: Optional[bool] = None, + closed: Optional[str] = None, + ) -> Tuple[np.ndarray, np.ndarray]: + + return ( + np.zeros(num_values, dtype=np.int64), + np.arange(1, num_values + 1, dtype=np.int64), + ) diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py new file mode 100644 index 00000000..d6f28c90 --- /dev/null +++ b/pandas/core/window/numba_.py @@ -0,0 +1,133 @@ +from distutils.version import LooseVersion +import types +from typing import Any, Callable, Dict, Optional, Tuple + +import numpy as np + +from pandas._typing import Scalar +from pandas.compat._optional import import_optional_dependency + + +def make_rolling_apply( + func: Callable[..., Scalar], + args: Tuple, + nogil: bool, + parallel: bool, + nopython: bool, +): + """ + Creates a JITted rolling apply function with a JITted version of + the user's function. + + Parameters + ---------- + func : function + function to be applied to each window and will be JITed + args : tuple + *args to be passed into the function + nogil : bool + nogil parameter from engine_kwargs for numba.jit + parallel : bool + parallel parameter from engine_kwargs for numba.jit + nopython : bool + nopython parameter from engine_kwargs for numba.jit + + Returns + ------- + Numba function + """ + numba = import_optional_dependency("numba") + + if parallel: + loop_range = numba.prange + else: + loop_range = range + + if LooseVersion(numba.__version__) >= LooseVersion("0.49.0"): + is_jitted = numba.extending.is_jitted(func) + else: + is_jitted = isinstance(func, numba.targets.registry.CPUDispatcher) + + if is_jitted: + # Don't jit a user passed jitted function + numba_func = func + else: + + @numba.generated_jit(nopython=nopython, nogil=nogil, parallel=parallel) + def numba_func(window, *_args): + if getattr(np, func.__name__, False) is func or isinstance( + func, types.BuiltinFunctionType + ): + jf = func + else: + jf = numba.jit(func, nopython=nopython, nogil=nogil) + + def impl(window, *_args): + return jf(window, *_args) + + return impl + + @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) + def roll_apply( + values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int, + ) -> np.ndarray: + result = np.empty(len(begin)) + for i in loop_range(len(result)): + start = begin[i] + stop = end[i] + window = values[start:stop] + count_nan = np.sum(np.isnan(window)) + if len(window) - count_nan >= minimum_periods: + result[i] = numba_func(window, *args) + else: + result[i] = np.nan + return result + + return roll_apply + + +def generate_numba_apply_func( + args: Tuple, + kwargs: Dict[str, Any], + func: Callable[..., Scalar], + engine_kwargs: Optional[Dict[str, bool]], +): + """ + Generate a numba jitted apply function specified by values from engine_kwargs. + + 1. jit the user's function + 2. Return a rolling apply function with the jitted function inline + + Configurations specified in engine_kwargs apply to both the user's + function _AND_ the rolling apply function. + + Parameters + ---------- + args : tuple + *args to be passed into the function + kwargs : dict + **kwargs to be passed into the function + func : function + function to be applied to each window and will be JITed + engine_kwargs : dict + dictionary of arguments to be passed into numba.jit + + Returns + ------- + Numba function + """ + + if engine_kwargs is None: + engine_kwargs = {} + + nopython = engine_kwargs.get("nopython", True) + nogil = engine_kwargs.get("nogil", False) + parallel = engine_kwargs.get("parallel", False) + + if kwargs and nopython: + raise ValueError( + "numba does not support kwargs with nopython=True: " + "https://github.com/numba/numba/issues/2916" + ) + + return make_rolling_apply(func, args, nogil, parallel, nopython) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py new file mode 100644 index 00000000..a01a753e --- /dev/null +++ b/pandas/core/window/rolling.py @@ -0,0 +1,2123 @@ +""" +Provide a generic structure to support window functions, +similar to how we have a Groupby object. +""" +from datetime import timedelta +from functools import partial +import inspect +from textwrap import dedent +from typing import Callable, Dict, List, Optional, Set, Tuple, Union + +import numpy as np + +import pandas._libs.window.aggregations as window_aggregations +from pandas._typing import Axis, FrameOrSeries, Scalar +from pandas.compat._optional import import_optional_dependency +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender, Substitution, cache_readonly + +from pandas.core.dtypes.common import ( + ensure_float64, + is_bool, + is_float_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_scalar, + needs_i8_conversion, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCDateOffset, + ABCDatetimeIndex, + ABCPeriodIndex, + ABCSeries, + ABCTimedeltaIndex, +) + +from pandas.core.base import DataError, PandasObject, SelectionMixin, ShallowMixin +import pandas.core.common as com +from pandas.core.indexes.api import Index, ensure_index +from pandas.core.window.common import ( + WindowGroupByMixin, + _doc_template, + _flex_binary_moment, + _shared_docs, + calculate_center_offset, + calculate_min_periods, + get_weighted_roll_func, + zsqrt, +) +from pandas.core.window.indexers import ( + BaseIndexer, + FixedWindowIndexer, + VariableWindowIndexer, +) +from pandas.core.window.numba_ import generate_numba_apply_func + + +class _Window(PandasObject, ShallowMixin, SelectionMixin): + _attributes: List[str] = [ + "window", + "min_periods", + "center", + "win_type", + "axis", + "on", + "closed", + ] + exclusions: Set[str] = set() + + def __init__( + self, + obj, + window=None, + min_periods: Optional[int] = None, + center: Optional[bool] = False, + win_type: Optional[str] = None, + axis: Axis = 0, + on: Optional[Union[str, Index]] = None, + closed: Optional[str] = None, + **kwargs, + ): + + self.__dict__.update(kwargs) + self.obj = obj + self.on = on + self.closed = closed + self.window = window + self.min_periods = min_periods + self.center = center + self.win_type = win_type + self.win_freq = None + self.axis = obj._get_axis_number(axis) if axis is not None else None + self.validate() + self._numba_func_cache: Dict[Optional[str], Callable] = dict() + + @property + def _constructor(self): + return Window + + @property + def is_datetimelike(self) -> Optional[bool]: + return None + + @property + def _on(self): + return None + + @property + def is_freq_type(self) -> bool: + return self.win_type == "freq" + + def validate(self) -> None: + if self.center is not None and not is_bool(self.center): + raise ValueError("center must be a boolean") + if self.min_periods is not None and not is_integer(self.min_periods): + raise ValueError("min_periods must be an integer") + if self.closed is not None and self.closed not in [ + "right", + "both", + "left", + "neither", + ]: + raise ValueError("closed must be 'right', 'left', 'both' or 'neither'") + if not isinstance(self.obj, (ABCSeries, ABCDataFrame)): + raise TypeError(f"invalid type: {type(self)}") + if isinstance(self.window, BaseIndexer): + self._validate_get_window_bounds_signature(self.window) + + @staticmethod + def _validate_get_window_bounds_signature(window: BaseIndexer) -> None: + """ + Validate that the passed BaseIndexer subclass has + a get_window_bounds with the correct signature. + """ + get_window_bounds_signature = inspect.signature( + window.get_window_bounds + ).parameters.keys() + expected_signature = inspect.signature( + BaseIndexer().get_window_bounds + ).parameters.keys() + if get_window_bounds_signature != expected_signature: + raise ValueError( + f"{type(window).__name__} does not implement the correct signature for " + f"get_window_bounds" + ) + + def _create_blocks(self): + """ + Split data into blocks & return conformed data. + """ + + obj = self._selected_obj + + # filter out the on from the object + if self.on is not None and not isinstance(self.on, Index): + if obj.ndim == 2: + obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) + blocks = obj._to_dict_of_blocks(copy=False).values() + + return blocks, obj + + def _gotitem(self, key, ndim, subset=None): + """ + Sub-classes to define. Return a sliced object. + + Parameters + ---------- + key : str / list of selections + ndim : 1,2 + requested ndim of result + subset : object, default None + subset to act on + """ + + # create a new object to prevent aliasing + if subset is None: + subset = self.obj + self = self._shallow_copy(subset) + self._reset_cache() + if subset.ndim == 2: + if is_scalar(key) and key in subset or is_list_like(key): + self._selection = key + return self + + def __getattr__(self, attr: str): + if attr in self._internal_names_set: + return object.__getattribute__(self, attr) + if attr in self.obj: + return self[attr] + + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{attr}'" + ) + + def _dir_additions(self): + return self.obj._dir_additions() + + def _get_win_type(self, kwargs: Dict): + """ + Exists for compatibility, overriden by subclass Window. + + Parameters + ---------- + kwargs : dict + ignored, exists for compatibility + + Returns + ------- + None + """ + return None + + def _get_window(self, other=None, win_type: Optional[str] = None) -> int: + """ + Return window length. + + Parameters + ---------- + other : + ignored, exists for compatibility + win_type : + ignored, exists for compatibility + + Returns + ------- + window : int + """ + if isinstance(self.window, BaseIndexer): + return self.min_periods or 0 + return self.window + + @property + def _window_type(self) -> str: + return type(self).__name__ + + def __repr__(self) -> str: + """ + Provide a nice str repr of our rolling object. + """ + + attrs_list = ( + f"{attr_name}={getattr(self, attr_name)}" + for attr_name in self._attributes + if getattr(self, attr_name, None) is not None + ) + attrs = ",".join(attrs_list) + return f"{self._window_type} [{attrs}]" + + def __iter__(self): + url = "https://github.com/pandas-dev/pandas/issues/11704" + raise NotImplementedError(f"See issue #11704 {url}") + + def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray: + """Convert input to numpy arrays for Cython routines""" + if values is None: + values = getattr(self._selected_obj, "values", self._selected_obj) + + # GH #12373 : rolling functions error on float32 data + # make sure the data is coerced to float64 + if is_float_dtype(values.dtype): + values = ensure_float64(values) + elif is_integer_dtype(values.dtype): + values = ensure_float64(values) + elif needs_i8_conversion(values.dtype): + raise NotImplementedError( + f"ops for {self._window_type} for this " + f"dtype {values.dtype} are not implemented" + ) + else: + try: + values = ensure_float64(values) + except (ValueError, TypeError): + raise TypeError(f"cannot handle this type -> {values.dtype}") + + # Convert inf to nan for C funcs + inf = np.isinf(values) + if inf.any(): + values = np.where(inf, np.nan, values) + + return values + + def _wrap_result(self, result, block=None, obj=None): + """ + Wrap a single result. + """ + + if obj is None: + obj = self._selected_obj + index = obj.index + + if isinstance(result, np.ndarray): + + if result.ndim == 1: + from pandas import Series + + return Series(result, index, name=obj.name) + + return type(obj)(result, index=index, columns=block.columns) + return result + + def _wrap_results(self, results, blocks, obj, exclude=None) -> FrameOrSeries: + """ + Wrap the results. + + Parameters + ---------- + results : list of ndarrays + blocks : list of blocks + obj : conformed data (may be resampled) + exclude: list of columns to exclude, default to None + """ + + from pandas import Series, concat + + final = [] + for result, block in zip(results, blocks): + + result = self._wrap_result(result, block=block, obj=obj) + if result.ndim == 1: + return result + final.append(result) + + # if we have an 'on' column + # we want to put it back into the results + # in the same location + columns = self._selected_obj.columns + if self.on is not None and not self._on.equals(obj.index): + + name = self._on.name + final.append(Series(self._on, index=obj.index, name=name)) + + if self._selection is not None: + + selection = ensure_index(self._selection) + + # need to reorder to include original location of + # the on column (if its not already there) + if name not in selection: + columns = self.obj.columns + indexer = columns.get_indexer(selection.tolist() + [name]) + columns = columns.take(sorted(indexer)) + + # exclude nuisance columns so that they are not reindexed + if exclude is not None and exclude: + columns = [c for c in columns if c not in exclude] + + if not columns: + raise DataError("No numeric types to aggregate") + + if not len(final): + return obj.astype("float64") + return concat(final, axis=1).reindex(columns=columns, copy=False) + + def _center_window(self, result, window) -> np.ndarray: + """ + Center the result in the window. + """ + if self.axis > result.ndim - 1: + raise ValueError("Requested axis is larger then no. of argument dimensions") + + offset = calculate_center_offset(window) + if offset > 0: + lead_indexer = [slice(None)] * result.ndim + lead_indexer[self.axis] = slice(offset, None) + result = np.copy(result[tuple(lead_indexer)]) + return result + + def _get_roll_func(self, func_name: str) -> Callable: + """ + Wrap rolling function to check values passed. + + Parameters + ---------- + func_name : str + Cython function used to calculate rolling statistics + + Returns + ------- + func : callable + """ + window_func = getattr(window_aggregations, func_name, None) + if window_func is None: + raise ValueError( + f"we do not support this function in window_aggregations.{func_name}" + ) + return window_func + + def _get_cython_func_type(self, func: str) -> Callable: + """ + Return a variable or fixed cython function type. + + Variable algorithms do not use window while fixed do. + """ + if self.is_freq_type or isinstance(self.window, BaseIndexer): + return self._get_roll_func(f"{func}_variable") + return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()) + + def _get_window_indexer(self, window: int) -> BaseIndexer: + """ + Return an indexer class that will compute the window start and end bounds + """ + if isinstance(self.window, BaseIndexer): + return self.window + if self.is_freq_type: + return VariableWindowIndexer(index_array=self._on.asi8, window_size=window) + return FixedWindowIndexer(window_size=window) + + def _apply( + self, + func: Callable, + center: bool, + require_min_periods: int = 0, + floor: int = 1, + is_weighted: bool = False, + name: Optional[str] = None, + use_numba_cache: bool = False, + **kwargs, + ): + """ + Rolling statistical measure using supplied function. + + Designed to be used with passed-in Cython array-based functions. + + Parameters + ---------- + func : callable function to apply + center : bool + require_min_periods : int + floor : int + is_weighted : bool + name : str, + compatibility with groupby.rolling + use_numba_cache : bool + whether to cache a numba compiled function. Only available for numba + enabled methods (so far only apply) + **kwargs + additional arguments for rolling function and window function + + Returns + ------- + y : type of input + """ + win_type = self._get_win_type(kwargs) + window = self._get_window(win_type=win_type) + + blocks, obj = self._create_blocks() + block_list = list(blocks) + window_indexer = self._get_window_indexer(window) + + results = [] + exclude: List[Scalar] = [] + for i, b in enumerate(blocks): + try: + values = self._prep_values(b.values) + + except (TypeError, NotImplementedError): + if isinstance(obj, ABCDataFrame): + exclude.extend(b.columns) + del block_list[i] + continue + else: + raise DataError("No numeric types to aggregate") + + if values.size == 0: + results.append(values.copy()) + continue + + # calculation function + offset = calculate_center_offset(window) if center else 0 + additional_nans = np.array([np.nan] * offset) + + if not is_weighted: + + def calc(x): + x = np.concatenate((x, additional_nans)) + if not isinstance(window, BaseIndexer): + min_periods = calculate_min_periods( + window, self.min_periods, len(x), require_min_periods, floor + ) + else: + min_periods = calculate_min_periods( + self.min_periods or 1, + self.min_periods, + len(x), + require_min_periods, + floor, + ) + start, end = window_indexer.get_window_bounds( + num_values=len(x), + min_periods=self.min_periods, + center=self.center, + closed=self.closed, + ) + return func(x, start, end, min_periods) + + else: + + def calc(x): + x = np.concatenate((x, additional_nans)) + return func(x, window, self.min_periods) + + with np.errstate(all="ignore"): + if values.ndim > 1: + result = np.apply_along_axis(calc, self.axis, values) + else: + result = calc(values) + result = np.asarray(result) + + if use_numba_cache: + self._numba_func_cache[name] = func + + if center: + result = self._center_window(result, window) + + results.append(result) + + return self._wrap_results(results, block_list, obj, exclude) + + def aggregate(self, func, *args, **kwargs): + result, how = self._aggregate(func, *args, **kwargs) + if result is None: + return self.apply(func, raw=False, args=args, kwargs=kwargs) + return result + + agg = aggregate + + _shared_docs["sum"] = dedent( + """ + Calculate %(name)s sum of given DataFrame or Series. + + Parameters + ---------- + *args, **kwargs + For compatibility with other %(name)s methods. Has no effect + on the computed value. + + Returns + ------- + Series or DataFrame + Same type as the input, with the same index, containing the + %(name)s sum. + + See Also + -------- + Series.sum : Reducing sum for Series. + DataFrame.sum : Reducing sum for DataFrame. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4, 5]) + >>> s + 0 1 + 1 2 + 2 3 + 3 4 + 4 5 + dtype: int64 + + >>> s.rolling(3).sum() + 0 NaN + 1 NaN + 2 6.0 + 3 9.0 + 4 12.0 + dtype: float64 + + >>> s.expanding(3).sum() + 0 NaN + 1 NaN + 2 6.0 + 3 10.0 + 4 15.0 + dtype: float64 + + >>> s.rolling(3, center=True).sum() + 0 NaN + 1 6.0 + 2 9.0 + 3 12.0 + 4 NaN + dtype: float64 + + For DataFrame, each %(name)s sum is computed column-wise. + + >>> df = pd.DataFrame({"A": s, "B": s ** 2}) + >>> df + A B + 0 1 1 + 1 2 4 + 2 3 9 + 3 4 16 + 4 5 25 + + >>> df.rolling(3).sum() + A B + 0 NaN NaN + 1 NaN NaN + 2 6.0 14.0 + 3 9.0 29.0 + 4 12.0 50.0 + """ + ) + + _shared_docs["mean"] = dedent( + """ + Calculate the %(name)s mean of the values. + + Parameters + ---------- + *args + Under Review. + **kwargs + Under Review. + + Returns + ------- + Series or DataFrame + Returned object type is determined by the caller of the %(name)s + calculation. + + See Also + -------- + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + Series.mean : Equivalent method for Series. + DataFrame.mean : Equivalent method for DataFrame. + + Examples + -------- + The below examples will show rolling mean calculations with window sizes of + two and three, respectively. + + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.rolling(2).mean() + 0 NaN + 1 1.5 + 2 2.5 + 3 3.5 + dtype: float64 + + >>> s.rolling(3).mean() + 0 NaN + 1 NaN + 2 2.0 + 3 3.0 + dtype: float64 + """ + ) + + _shared_docs["var"] = dedent( + """ + Calculate unbiased %(name)s variance. + %(versionadded)s + Normalized by N-1 by default. This can be changed using the `ddof` + argument. + + Parameters + ---------- + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + *args, **kwargs + For NumPy compatibility. No additional arguments are used. + + Returns + ------- + Series or DataFrame + Returns the same object type as the caller of the %(name)s calculation. + + See Also + -------- + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + Series.var : Equivalent method for Series. + DataFrame.var : Equivalent method for DataFrame. + numpy.var : Equivalent method for Numpy array. + + Notes + ----- + The default `ddof` of 1 used in :meth:`Series.var` is different than the + default `ddof` of 0 in :func:`numpy.var`. + + A minimum of 1 period is required for the rolling calculation. + + Examples + -------- + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + >>> s.rolling(3).var() + 0 NaN + 1 NaN + 2 0.333333 + 3 1.000000 + 4 1.000000 + 5 1.333333 + 6 0.000000 + dtype: float64 + + >>> s.expanding(3).var() + 0 NaN + 1 NaN + 2 0.333333 + 3 0.916667 + 4 0.800000 + 5 0.700000 + 6 0.619048 + dtype: float64 + """ + ) + + _shared_docs["std"] = dedent( + """ + Calculate %(name)s standard deviation. + %(versionadded)s + Normalized by N-1 by default. This can be changed using the `ddof` + argument. + + Parameters + ---------- + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + *args, **kwargs + For NumPy compatibility. No additional arguments are used. + + Returns + ------- + Series or DataFrame + Returns the same object type as the caller of the %(name)s calculation. + + See Also + -------- + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + Series.std : Equivalent method for Series. + DataFrame.std : Equivalent method for DataFrame. + numpy.std : Equivalent method for Numpy array. + + Notes + ----- + The default `ddof` of 1 used in Series.std is different than the default + `ddof` of 0 in numpy.std. + + A minimum of one period is required for the rolling calculation. + + Examples + -------- + >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) + >>> s.rolling(3).std() + 0 NaN + 1 NaN + 2 0.577350 + 3 1.000000 + 4 1.000000 + 5 1.154701 + 6 0.000000 + dtype: float64 + + >>> s.expanding(3).std() + 0 NaN + 1 NaN + 2 0.577350 + 3 0.957427 + 4 0.894427 + 5 0.836660 + 6 0.786796 + dtype: float64 + """ + ) + + +class Window(_Window): + """ + Provide rolling window calculations. + + Parameters + ---------- + window : int, offset, or BaseIndexer subclass + Size of the moving window. This is the number of observations used for + calculating the statistic. Each window will be a fixed size. + + If its an offset then this will be the time period of each window. Each + window will be a variable sized based on the observations included in + the time-period. This is only valid for datetimelike indexes. + + If a BaseIndexer subclass is passed, calculates the window boundaries + based on the defined ``get_window_bounds`` method. Additional rolling + keyword arguments, namely `min_periods`, `center`, and + `closed` will be passed to `get_window_bounds`. + min_periods : int, default None + Minimum number of observations in window required to have a value + (otherwise result is NA). For a window that is specified by an offset, + `min_periods` will default to 1. Otherwise, `min_periods` will default + to the size of the window. + center : bool, default False + Set the labels at the center of the window. + win_type : str, default None + Provide a window type. If ``None``, all points are evenly weighted. + See the notes below for further information. + on : str, optional + For a DataFrame, a datetime-like column or MultiIndex level on which + to calculate the rolling window, rather than the DataFrame's index. + Provided integer column is ignored and excluded from result since + an integer index is not used to calculate the rolling window. + axis : int or str, default 0 + closed : str, default None + Make the interval closed on the 'right', 'left', 'both' or + 'neither' endpoints. + For offset-based windows, it defaults to 'right'. + For fixed windows, defaults to 'both'. Remaining cases not implemented + for fixed windows. + + Returns + ------- + a Window or Rolling sub-classed for the particular operation + + See Also + -------- + expanding : Provides expanding transformations. + ewm : Provides exponential weighted functions. + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + To learn more about the offsets & frequency strings, please see `this link + `__. + + The recognized win_types are: + + * ``boxcar`` + * ``triang`` + * ``blackman`` + * ``hamming`` + * ``bartlett`` + * ``parzen`` + * ``bohman`` + * ``blackmanharris`` + * ``nuttall`` + * ``barthann`` + * ``kaiser`` (needs beta) + * ``gaussian`` (needs std) + * ``general_gaussian`` (needs power, width) + * ``slepian`` (needs width) + * ``exponential`` (needs tau), center is set to None. + + If ``win_type=None`` all points are evenly weighted. To learn more about + different window types see `scipy.signal window functions + `__. + + Examples + -------- + + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + Rolling sum with a window length of 2, using the 'triang' + window type. + + >>> df.rolling(2, win_type='triang').sum() + B + 0 NaN + 1 0.5 + 2 1.5 + 3 NaN + 4 NaN + + Rolling sum with a window length of 2, using the 'gaussian' + window type (note how we need to specify std). + + >>> df.rolling(2, win_type='gaussian').sum(std=3) + B + 0 NaN + 1 0.986207 + 2 2.958621 + 3 NaN + 4 NaN + + Rolling sum with a window length of 2, min_periods defaults + to the window length. + + >>> df.rolling(2).sum() + B + 0 NaN + 1 1.0 + 2 3.0 + 3 NaN + 4 NaN + + Same as above, but explicitly set the min_periods + + >>> df.rolling(2, min_periods=1).sum() + B + 0 0.0 + 1 1.0 + 2 3.0 + 3 2.0 + 4 4.0 + + A ragged (meaning not-a-regular frequency), time-indexed DataFrame + + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, + ... index = [pd.Timestamp('20130101 09:00:00'), + ... pd.Timestamp('20130101 09:00:02'), + ... pd.Timestamp('20130101 09:00:03'), + ... pd.Timestamp('20130101 09:00:05'), + ... pd.Timestamp('20130101 09:00:06')]) + + >>> df + B + 2013-01-01 09:00:00 0.0 + 2013-01-01 09:00:02 1.0 + 2013-01-01 09:00:03 2.0 + 2013-01-01 09:00:05 NaN + 2013-01-01 09:00:06 4.0 + + Contrasting to an integer rolling window, this will roll a variable + length window corresponding to the time period. + The default for min_periods is 1. + + >>> df.rolling('2s').sum() + B + 2013-01-01 09:00:00 0.0 + 2013-01-01 09:00:02 1.0 + 2013-01-01 09:00:03 3.0 + 2013-01-01 09:00:05 NaN + 2013-01-01 09:00:06 4.0 + """ + + def validate(self): + super().validate() + + window = self.window + if isinstance(window, BaseIndexer): + raise NotImplementedError( + "BaseIndexer subclasses not implemented with win_types." + ) + elif isinstance(window, (list, tuple, np.ndarray)): + pass + elif is_integer(window): + if window <= 0: + raise ValueError("window must be > 0 ") + import_optional_dependency( + "scipy", extra="Scipy is required to generate window weight." + ) + import scipy.signal as sig + + if not isinstance(self.win_type, str): + raise ValueError(f"Invalid win_type {self.win_type}") + if getattr(sig, self.win_type, None) is None: + raise ValueError(f"Invalid win_type {self.win_type}") + else: + raise ValueError(f"Invalid window {window}") + + def _get_win_type(self, kwargs: Dict) -> Union[str, Tuple]: + """ + Extract arguments for the window type, provide validation for it + and return the validated window type. + + Parameters + ---------- + kwargs : dict + + Returns + ------- + win_type : str, or tuple + """ + # the below may pop from kwargs + def _validate_win_type(win_type, kwargs): + arg_map = { + "kaiser": ["beta"], + "gaussian": ["std"], + "general_gaussian": ["power", "width"], + "slepian": ["width"], + "exponential": ["tau"], + } + + if win_type in arg_map: + win_args = _pop_args(win_type, arg_map[win_type], kwargs) + if win_type == "exponential": + # exponential window requires the first arg (center) + # to be set to None (necessary for symmetric window) + win_args.insert(0, None) + + return tuple([win_type] + win_args) + + return win_type + + def _pop_args(win_type, arg_names, kwargs): + all_args = [] + for n in arg_names: + if n not in kwargs: + raise ValueError(f"{win_type} window requires {n}") + all_args.append(kwargs.pop(n)) + return all_args + + return _validate_win_type(self.win_type, kwargs) + + def _get_window( + self, other=None, win_type: Optional[Union[str, Tuple]] = None + ) -> np.ndarray: + """ + Get the window, weights. + + Parameters + ---------- + other : + ignored, exists for compatibility + win_type : str, or tuple + type of window to create + + Returns + ------- + window : ndarray + the window, weights + """ + + window = self.window + if isinstance(window, (list, tuple, np.ndarray)): + return com.asarray_tuplesafe(window).astype(float) + elif is_integer(window): + import scipy.signal as sig + + # GH #15662. `False` makes symmetric window, rather than periodic. + return sig.get_window(win_type, window, False).astype(float) + + _agg_see_also_doc = dedent( + """ + See Also + -------- + pandas.DataFrame.rolling.aggregate + pandas.DataFrame.aggregate + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) + >>> df + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.004295 0.905829 -0.954544 + 2 0.735167 -0.165272 -1.619346 + 3 -0.702657 -1.340923 -0.706334 + 4 -0.246845 0.211596 -0.901819 + 5 2.463718 3.157577 -1.380906 + 6 -1.142255 2.340594 -0.039875 + 7 1.396598 -1.647453 1.677227 + 8 -0.543425 1.761277 -0.220481 + 9 -0.640505 0.289374 -1.550670 + + >>> df.rolling(3, win_type='boxcar').agg('mean') + A B C + 0 NaN NaN NaN + 1 NaN NaN NaN + 2 -0.885035 0.212600 -0.711689 + 3 -0.323928 -0.200122 -1.093408 + 4 -0.071445 -0.431533 -1.075833 + 5 0.504739 0.676083 -0.996353 + 6 0.358206 1.903256 -0.774200 + 7 0.906020 1.283573 0.085482 + 8 -0.096361 0.818139 0.472290 + 9 0.070889 0.134399 -0.031308 + """ + ) + + @Substitution( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded="", + klass="Series/DataFrame", + axis="", + ) + @Appender(_shared_docs["aggregate"]) + def aggregate(self, func, *args, **kwargs): + result, how = self._aggregate(func, *args, **kwargs) + if result is None: + + # these must apply directly + result = func(self) + + return result + + agg = aggregate + + @Substitution(name="window") + @Appender(_shared_docs["sum"]) + def sum(self, *args, **kwargs): + nv.validate_window_func("sum", args, kwargs) + window_func = self._get_roll_func("roll_weighted_sum") + window_func = get_weighted_roll_func(window_func) + return self._apply( + window_func, center=self.center, is_weighted=True, name="sum", **kwargs + ) + + @Substitution(name="window") + @Appender(_shared_docs["mean"]) + def mean(self, *args, **kwargs): + nv.validate_window_func("mean", args, kwargs) + window_func = self._get_roll_func("roll_weighted_mean") + window_func = get_weighted_roll_func(window_func) + return self._apply( + window_func, center=self.center, is_weighted=True, name="mean", **kwargs + ) + + @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") + @Appender(_shared_docs["var"]) + def var(self, ddof=1, *args, **kwargs): + nv.validate_window_func("var", args, kwargs) + window_func = partial(self._get_roll_func("roll_weighted_var"), ddof=ddof) + window_func = get_weighted_roll_func(window_func) + kwargs.pop("name", None) + return self._apply( + window_func, center=self.center, is_weighted=True, name="var", **kwargs + ) + + @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") + @Appender(_shared_docs["std"]) + def std(self, ddof=1, *args, **kwargs): + nv.validate_window_func("std", args, kwargs) + return zsqrt(self.var(ddof=ddof, name="std", **kwargs)) + + +class _Rolling(_Window): + @property + def _constructor(self): + return Rolling + + +class _Rolling_and_Expanding(_Rolling): + + _shared_docs["count"] = dedent( + r""" + The %(name)s count of any non-NaN observations inside the window. + + Returns + ------- + Series or DataFrame + Returned object type is determined by the caller of the %(name)s + calculation. + + See Also + -------- + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + DataFrame.count : Count of the full DataFrame. + + Examples + -------- + >>> s = pd.Series([2, 3, np.nan, 10]) + >>> s.rolling(2).count() + 0 1.0 + 1 2.0 + 2 1.0 + 3 1.0 + dtype: float64 + >>> s.rolling(3).count() + 0 1.0 + 1 2.0 + 2 2.0 + 3 2.0 + dtype: float64 + >>> s.rolling(4).count() + 0 1.0 + 1 2.0 + 2 2.0 + 3 3.0 + dtype: float64 + """ + ) + + def count(self): + + blocks, obj = self._create_blocks() + results = [] + for b in blocks: + result = b.notna().astype(int) + result = self._constructor( + result, + window=self._get_window(), + min_periods=self.min_periods or 0, + center=self.center, + axis=self.axis, + closed=self.closed, + ).sum() + results.append(result) + + return self._wrap_results(results, blocks, obj) + + _shared_docs["apply"] = dedent( + r""" + The %(name)s function's apply function. + + Parameters + ---------- + func : function + Must produce a single value from an ndarray input if ``raw=True`` + or a single value from a Series if ``raw=False``. Can also accept a + Numba JIT function with ``engine='numba'`` specified. + + .. versionchanged:: 1.0.0 + + raw : bool, default None + * ``False`` : passes each row or column as a Series to the + function. + * ``True`` : the passed function will receive ndarray + objects instead. + If you are just applying a NumPy reduction function this will + achieve much better performance. + engine : str, default 'cython' + * ``'cython'`` : Runs rolling apply through C-extensions from cython. + * ``'numba'`` : Runs rolling apply through JIT compiled code from numba. + Only available when ``raw`` is set to ``True``. + + .. versionadded:: 1.0.0 + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be + applied to both the ``func`` and the ``apply`` rolling aggregation. + + .. versionadded:: 1.0.0 + + args : tuple, default None + Positional arguments to be passed into func. + kwargs : dict, default None + Keyword arguments to be passed into func. + + Returns + ------- + Series or DataFrame + Return type is determined by the caller. + + See Also + -------- + Series.%(name)s : Series %(name)s. + DataFrame.%(name)s : DataFrame %(name)s. + + Notes + ----- + See :ref:`stats.rolling_apply` for extended documentation and performance + considerations for the Numba engine. + """ + ) + + def apply( + self, + func, + raw: bool = False, + engine: str = "cython", + engine_kwargs: Optional[Dict] = None, + args: Optional[Tuple] = None, + kwargs: Optional[Dict] = None, + ): + if args is None: + args = () + if kwargs is None: + kwargs = {} + kwargs.pop("_level", None) + kwargs.pop("floor", None) + window = self._get_window() + offset = calculate_center_offset(window) if self.center else 0 + if not is_bool(raw): + raise ValueError("raw parameter must be `True` or `False`") + + if engine == "cython": + if engine_kwargs is not None: + raise ValueError("cython engine does not accept engine_kwargs") + apply_func = self._generate_cython_apply_func( + args, kwargs, raw, offset, func + ) + elif engine == "numba": + if raw is False: + raise ValueError("raw must be `True` when using the numba engine") + if func in self._numba_func_cache: + # Return an already compiled version of roll_apply if available + apply_func = self._numba_func_cache[func] + else: + apply_func = generate_numba_apply_func( + args, kwargs, func, engine_kwargs + ) + else: + raise ValueError("engine must be either 'numba' or 'cython'") + + # TODO: Why do we always pass center=False? + # name=func & raw=raw for WindowGroupByMixin._apply + return self._apply( + apply_func, + center=False, + floor=0, + name=func, + use_numba_cache=engine == "numba", + raw=raw, + args=args, + kwargs=kwargs, + ) + + def _generate_cython_apply_func(self, args, kwargs, raw, offset, func): + from pandas import Series + + window_func = partial( + self._get_cython_func_type("roll_generic"), + args=args, + kwargs=kwargs, + raw=raw, + offset=offset, + func=func, + ) + + def apply_func(values, begin, end, min_periods, raw=raw): + if not raw: + values = Series(values, index=self.obj.index) + return window_func(values, begin, end, min_periods) + + return apply_func + + def sum(self, *args, **kwargs): + nv.validate_window_func("sum", args, kwargs) + window_func = self._get_cython_func_type("roll_sum") + kwargs.pop("floor", None) + return self._apply( + window_func, center=self.center, floor=0, name="sum", **kwargs + ) + + _shared_docs["max"] = dedent( + """ + Calculate the %(name)s maximum. + + Parameters + ---------- + *args, **kwargs + Arguments and keyword arguments to be passed into func. + """ + ) + + def max(self, *args, **kwargs): + nv.validate_window_func("max", args, kwargs) + window_func = self._get_cython_func_type("roll_max") + return self._apply(window_func, center=self.center, name="max", **kwargs) + + _shared_docs["min"] = dedent( + """ + Calculate the %(name)s minimum. + + Parameters + ---------- + **kwargs + Under Review. + + Returns + ------- + Series or DataFrame + Returned object type is determined by the caller of the %(name)s + calculation. + + See Also + -------- + Series.%(name)s : Calling object with a Series. + DataFrame.%(name)s : Calling object with a DataFrame. + Series.min : Similar method for Series. + DataFrame.min : Similar method for DataFrame. + + Examples + -------- + Performing a rolling minimum with a window size of 3. + + >>> s = pd.Series([4, 3, 5, 2, 6]) + >>> s.rolling(3).min() + 0 NaN + 1 NaN + 2 3.0 + 3 2.0 + 4 2.0 + dtype: float64 + """ + ) + + def min(self, *args, **kwargs): + nv.validate_window_func("min", args, kwargs) + window_func = self._get_cython_func_type("roll_min") + return self._apply(window_func, center=self.center, name="min", **kwargs) + + def mean(self, *args, **kwargs): + nv.validate_window_func("mean", args, kwargs) + window_func = self._get_cython_func_type("roll_mean") + return self._apply(window_func, center=self.center, name="mean", **kwargs) + + _shared_docs["median"] = dedent( + """ + Calculate the %(name)s median. + + Parameters + ---------- + **kwargs + For compatibility with other %(name)s methods. Has no effect + on the computed median. + + Returns + ------- + Series or DataFrame + Returned type is the same as the original object. + + See Also + -------- + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + Series.median : Equivalent method for Series. + DataFrame.median : Equivalent method for DataFrame. + + Examples + -------- + Compute the rolling median of a series with a window size of 3. + + >>> s = pd.Series([0, 1, 2, 3, 4]) + >>> s.rolling(3).median() + 0 NaN + 1 NaN + 2 1.0 + 3 2.0 + 4 3.0 + dtype: float64 + """ + ) + + def median(self, **kwargs): + window_func = self._get_roll_func("roll_median_c") + window_func = partial(window_func, win=self._get_window()) + return self._apply(window_func, center=self.center, name="median", **kwargs) + + def std(self, ddof=1, *args, **kwargs): + nv.validate_window_func("std", args, kwargs) + kwargs.pop("require_min_periods", None) + window_func = self._get_cython_func_type("roll_var") + + def zsqrt_func(values, begin, end, min_periods): + return zsqrt(window_func(values, begin, end, min_periods, ddof=ddof)) + + # ddof passed again for compat with groupby.rolling + return self._apply( + zsqrt_func, + center=self.center, + require_min_periods=1, + name="std", + ddof=ddof, + **kwargs, + ) + + def var(self, ddof=1, *args, **kwargs): + nv.validate_window_func("var", args, kwargs) + kwargs.pop("require_min_periods", None) + window_func = partial(self._get_cython_func_type("roll_var"), ddof=ddof) + # ddof passed again for compat with groupby.rolling + return self._apply( + window_func, + center=self.center, + require_min_periods=1, + name="var", + ddof=ddof, + **kwargs, + ) + + _shared_docs[ + "skew" + ] = """ + Unbiased %(name)s skewness. + + Parameters + ---------- + **kwargs + Keyword arguments to be passed into func. + """ + + def skew(self, **kwargs): + window_func = self._get_cython_func_type("roll_skew") + kwargs.pop("require_min_periods", None) + return self._apply( + window_func, + center=self.center, + require_min_periods=3, + name="skew", + **kwargs, + ) + + _shared_docs["kurt"] = dedent( + """ + Calculate unbiased %(name)s kurtosis. + + This function uses Fisher's definition of kurtosis without bias. + + Parameters + ---------- + **kwargs + Under Review. + + Returns + ------- + Series or DataFrame + Returned object type is determined by the caller of the %(name)s + calculation. + + See Also + -------- + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + Series.kurt : Equivalent method for Series. + DataFrame.kurt : Equivalent method for DataFrame. + scipy.stats.skew : Third moment of a probability density. + scipy.stats.kurtosis : Reference SciPy method. + + Notes + ----- + A minimum of 4 periods is required for the %(name)s calculation. + """ + ) + + def kurt(self, **kwargs): + window_func = self._get_cython_func_type("roll_kurt") + kwargs.pop("require_min_periods", None) + return self._apply( + window_func, + center=self.center, + require_min_periods=4, + name="kurt", + **kwargs, + ) + + _shared_docs["quantile"] = dedent( + """ + Calculate the %(name)s quantile. + + Parameters + ---------- + quantile : float + Quantile to compute. 0 <= quantile <= 1. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + .. versionadded:: 0.23.0 + + This optional parameter specifies the interpolation method to use, + when the desired quantile lies between two data points `i` and `j`: + + * linear: `i + (j - i) * fraction`, where `fraction` is the + fractional part of the index surrounded by `i` and `j`. + * lower: `i`. + * higher: `j`. + * nearest: `i` or `j` whichever is nearest. + * midpoint: (`i` + `j`) / 2. + **kwargs + For compatibility with other %(name)s methods. Has no effect on + the result. + + Returns + ------- + Series or DataFrame + Returned object type is determined by the caller of the %(name)s + calculation. + + See Also + -------- + Series.quantile : Computes value at the given quantile over all data + in Series. + DataFrame.quantile : Computes values at the given quantile over + requested axis in DataFrame. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4]) + >>> s.rolling(2).quantile(.4, interpolation='lower') + 0 NaN + 1 1.0 + 2 2.0 + 3 3.0 + dtype: float64 + + >>> s.rolling(2).quantile(.4, interpolation='midpoint') + 0 NaN + 1 1.5 + 2 2.5 + 3 3.5 + dtype: float64 + """ + ) + + def quantile(self, quantile, interpolation="linear", **kwargs): + if quantile == 1.0: + window_func = self._get_cython_func_type("roll_max") + elif quantile == 0.0: + window_func = self._get_cython_func_type("roll_min") + else: + window_func = partial( + self._get_roll_func("roll_quantile"), + win=self._get_window(), + quantile=quantile, + interpolation=interpolation, + ) + + # Pass through for groupby.rolling + kwargs["quantile"] = quantile + kwargs["interpolation"] = interpolation + return self._apply(window_func, center=self.center, name="quantile", **kwargs) + + _shared_docs[ + "cov" + ] = """ + Calculate the %(name)s sample covariance. + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndexed DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + **kwargs + Keyword arguments to be passed into func. + """ + + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + if other is None: + other = self._selected_obj + # only default unset + pairwise = True if pairwise is None else pairwise + other = self._shallow_copy(other) + + # GH 16058: offset window + if self.is_freq_type: + window = self.win_freq + else: + window = self._get_window(other) + + def _get_cov(X, Y): + # GH #12373 : rolling functions error on float32 data + # to avoid potential overflow, cast the data to float64 + X = X.astype("float64") + Y = Y.astype("float64") + mean = lambda x: x.rolling( + window, self.min_periods, center=self.center + ).mean(**kwargs) + count = ( + (X + Y) + .rolling(window=window, min_periods=0, center=self.center) + .count(**kwargs) + ) + bias_adj = count / (count - ddof) + return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj + + return _flex_binary_moment( + self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise) + ) + + _shared_docs["corr"] = dedent( + """ + Calculate %(name)s correlation. + + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + If not supplied then will default to self. + pairwise : bool, default None + Calculate pairwise combinations of columns within a + DataFrame. If `other` is not specified, defaults to `True`, + otherwise defaults to `False`. + Not relevant for :class:`~pandas.Series`. + **kwargs + Unused. + + Returns + ------- + Series or DataFrame + Returned object type is determined by the caller of the + %(name)s calculation. + + See Also + -------- + Series.%(name)s : Calling object with Series data. + DataFrame.%(name)s : Calling object with DataFrames. + Series.corr : Equivalent method for Series. + DataFrame.corr : Equivalent method for DataFrame. + %(name)s.cov : Similar method to calculate covariance. + numpy.corrcoef : NumPy Pearson's correlation calculation. + + Notes + ----- + This function uses Pearson's definition of correlation + (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient). + + When `other` is not specified, the output will be self correlation (e.g. + all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` + set to `True`. + + Function will return ``NaN`` for correlations of equal valued sequences; + this is the result of a 0/0 division error. + + When `pairwise` is set to `False`, only matching columns between `self` and + `other` will be used. + + When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame + with the original index on the first level, and the `other` DataFrame + columns on the second level. + + In the case of missing elements, only complete pairwise observations + will be used. + + Examples + -------- + The below example shows a rolling calculation with a window size of + four matching the equivalent function call using :meth:`numpy.corrcoef`. + + >>> v1 = [3, 3, 3, 5, 8] + >>> v2 = [3, 4, 4, 4, 8] + >>> # numpy returns a 2X2 array, the correlation coefficient + >>> # is the number at entry [0][1] + >>> print(f"{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}") + 0.333333 + >>> print(f"{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}") + 0.916949 + >>> s1 = pd.Series(v1) + >>> s2 = pd.Series(v2) + >>> s1.rolling(4).corr(s2) + 0 NaN + 1 NaN + 2 NaN + 3 0.333333 + 4 0.916949 + dtype: float64 + + The below example shows a similar rolling calculation on a + DataFrame using the pairwise option. + + >>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\ + [46., 31.], [50., 36.]]) + >>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7)) + [[1. 0.6263001] + [0.6263001 1. ]] + >>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7)) + [[1. 0.5553681] + [0.5553681 1. ]] + >>> df = pd.DataFrame(matrix, columns=['X','Y']) + >>> df + X Y + 0 51.0 35.0 + 1 49.0 30.0 + 2 47.0 32.0 + 3 46.0 31.0 + 4 50.0 36.0 + >>> df.rolling(4).corr(pairwise=True) + X Y + 0 X NaN NaN + Y NaN NaN + 1 X NaN NaN + Y NaN NaN + 2 X NaN NaN + Y NaN NaN + 3 X 1.000000 0.626300 + Y 0.626300 1.000000 + 4 X 1.000000 0.555368 + Y 0.555368 1.000000 + """ + ) + + def corr(self, other=None, pairwise=None, **kwargs): + if other is None: + other = self._selected_obj + # only default unset + pairwise = True if pairwise is None else pairwise + other = self._shallow_copy(other) + window = self._get_window(other) if not self.is_freq_type else self.win_freq + + def _get_corr(a, b): + a = a.rolling( + window=window, min_periods=self.min_periods, center=self.center + ) + b = b.rolling( + window=window, min_periods=self.min_periods, center=self.center + ) + + return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) + + return _flex_binary_moment( + self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise) + ) + + +class Rolling(_Rolling_and_Expanding): + @cache_readonly + def is_datetimelike(self) -> bool: + return isinstance( + self._on, (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex) + ) + + @cache_readonly + def _on(self) -> Index: + if self.on is None: + if self.axis == 0: + return self.obj.index + else: + # i.e. self.axis == 1 + return self.obj.columns + elif isinstance(self.on, Index): + return self.on + elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns: + return Index(self.obj[self.on]) + else: + raise ValueError( + f"invalid on specified as {self.on}, " + "must be a column (of DataFrame), an Index " + "or None" + ) + + def validate(self): + super().validate() + + # we allow rolling on a datetimelike index + if (self.obj.empty or self.is_datetimelike) and isinstance( + self.window, (str, ABCDateOffset, timedelta) + ): + + self._validate_monotonic() + freq = self._validate_freq() + + # we don't allow center + if self.center: + raise NotImplementedError( + "center is not implemented " + "for datetimelike and offset " + "based windows" + ) + + # this will raise ValueError on non-fixed freqs + self.win_freq = self.window + self.window = freq.nanos + self.win_type = "freq" + + # min_periods must be an integer + if self.min_periods is None: + self.min_periods = 1 + + elif isinstance(self.window, BaseIndexer): + # Passed BaseIndexer subclass should handle all other rolling kwargs + return + elif not is_integer(self.window): + raise ValueError("window must be an integer") + elif self.window < 0: + raise ValueError("window must be non-negative") + + if not self.is_datetimelike and self.closed is not None: + raise ValueError( + "closed only implemented for datetimelike and offset based windows" + ) + + def _validate_monotonic(self): + """ + Validate monotonic (increasing or decreasing). + """ + if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing): + formatted = self.on + if self.on is None: + formatted = "index" + raise ValueError(f"{formatted} must be monotonic") + + def _validate_freq(self): + """ + Validate & return window frequency. + """ + from pandas.tseries.frequencies import to_offset + + try: + return to_offset(self.window) + except (TypeError, ValueError): + raise ValueError( + f"passed window {self.window} is not " + "compatible with a datetimelike " + "index" + ) + + _agg_see_also_doc = dedent( + """ + See Also + -------- + Series.rolling + DataFrame.rolling + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) + >>> df + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.004295 0.905829 -0.954544 + 2 0.735167 -0.165272 -1.619346 + 3 -0.702657 -1.340923 -0.706334 + 4 -0.246845 0.211596 -0.901819 + 5 2.463718 3.157577 -1.380906 + 6 -1.142255 2.340594 -0.039875 + 7 1.396598 -1.647453 1.677227 + 8 -0.543425 1.761277 -0.220481 + 9 -0.640505 0.289374 -1.550670 + + >>> df.rolling(3).sum() + A B C + 0 NaN NaN NaN + 1 NaN NaN NaN + 2 -2.655105 0.637799 -2.135068 + 3 -0.971785 -0.600366 -3.280224 + 4 -0.214334 -1.294599 -3.227500 + 5 1.514216 2.028250 -2.989060 + 6 1.074618 5.709767 -2.322600 + 7 2.718061 3.850718 0.256446 + 8 -0.289082 2.454418 1.416871 + 9 0.212668 0.403198 -0.093924 + + >>> df.rolling(3).agg({'A':'sum', 'B':'min'}) + A B + 0 NaN NaN + 1 NaN NaN + 2 -2.655105 -0.165272 + 3 -0.971785 -1.340923 + 4 -0.214334 -1.340923 + 5 1.514216 -1.340923 + 6 1.074618 0.211596 + 7 2.718061 -1.647453 + 8 -0.289082 -1.647453 + 9 0.212668 -1.647453 + """ + ) + + @Substitution( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded="", + klass="Series/Dataframe", + axis="", + ) + @Appender(_shared_docs["aggregate"]) + def aggregate(self, func, *args, **kwargs): + return super().aggregate(func, *args, **kwargs) + + agg = aggregate + + @Substitution(name="rolling") + @Appender(_shared_docs["count"]) + def count(self): + + # different impl for freq counting + if self.is_freq_type: + window_func = self._get_roll_func("roll_count") + return self._apply(window_func, center=self.center, name="count") + + return super().count() + + @Substitution(name="rolling") + @Appender(_shared_docs["apply"]) + def apply( + self, + func, + raw=False, + engine="cython", + engine_kwargs=None, + args=None, + kwargs=None, + ): + return super().apply( + func, + raw=raw, + engine=engine, + engine_kwargs=engine_kwargs, + args=args, + kwargs=kwargs, + ) + + @Substitution(name="rolling") + @Appender(_shared_docs["sum"]) + def sum(self, *args, **kwargs): + nv.validate_rolling_func("sum", args, kwargs) + return super().sum(*args, **kwargs) + + @Substitution(name="rolling") + @Appender(_doc_template) + @Appender(_shared_docs["max"]) + def max(self, *args, **kwargs): + nv.validate_rolling_func("max", args, kwargs) + return super().max(*args, **kwargs) + + @Substitution(name="rolling") + @Appender(_shared_docs["min"]) + def min(self, *args, **kwargs): + nv.validate_rolling_func("min", args, kwargs) + return super().min(*args, **kwargs) + + @Substitution(name="rolling") + @Appender(_shared_docs["mean"]) + def mean(self, *args, **kwargs): + nv.validate_rolling_func("mean", args, kwargs) + return super().mean(*args, **kwargs) + + @Substitution(name="rolling") + @Appender(_shared_docs["median"]) + def median(self, **kwargs): + return super().median(**kwargs) + + @Substitution(name="rolling", versionadded="") + @Appender(_shared_docs["std"]) + def std(self, ddof=1, *args, **kwargs): + nv.validate_rolling_func("std", args, kwargs) + return super().std(ddof=ddof, **kwargs) + + @Substitution(name="rolling", versionadded="") + @Appender(_shared_docs["var"]) + def var(self, ddof=1, *args, **kwargs): + nv.validate_rolling_func("var", args, kwargs) + return super().var(ddof=ddof, **kwargs) + + @Substitution(name="rolling") + @Appender(_doc_template) + @Appender(_shared_docs["skew"]) + def skew(self, **kwargs): + return super().skew(**kwargs) + + _agg_doc = dedent( + """ + Examples + -------- + + The example below will show a rolling calculation with a window size of + four matching the equivalent function call using `scipy.stats`. + + >>> arr = [1, 2, 3, 4, 999] + >>> import scipy.stats + >>> print(f"{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}") + -1.200000 + >>> print(f"{scipy.stats.kurtosis(arr[1:], bias=False):.6f}") + 3.999946 + >>> s = pd.Series(arr) + >>> s.rolling(4).kurt() + 0 NaN + 1 NaN + 2 NaN + 3 -1.200000 + 4 3.999946 + dtype: float64 + """ + ) + + @Appender(_agg_doc) + @Substitution(name="rolling") + @Appender(_shared_docs["kurt"]) + def kurt(self, **kwargs): + return super().kurt(**kwargs) + + @Substitution(name="rolling") + @Appender(_shared_docs["quantile"]) + def quantile(self, quantile, interpolation="linear", **kwargs): + return super().quantile( + quantile=quantile, interpolation=interpolation, **kwargs + ) + + @Substitution(name="rolling") + @Appender(_doc_template) + @Appender(_shared_docs["cov"]) + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) + + @Substitution(name="rolling") + @Appender(_shared_docs["corr"]) + def corr(self, other=None, pairwise=None, **kwargs): + return super().corr(other=other, pairwise=pairwise, **kwargs) + + +Rolling.__doc__ = Window.__doc__ + + +class RollingGroupby(WindowGroupByMixin, Rolling): + """ + Provide a rolling groupby implementation. + """ + + @property + def _constructor(self): + return Rolling + + def _gotitem(self, key, ndim, subset=None): + + # we are setting the index on the actual object + # here so our index is carried thru to the selected obj + # when we do the splitting for the groupby + if self.on is not None: + self._groupby.obj = self._groupby.obj.set_index(self._on) + self.on = None + return super()._gotitem(key, ndim, subset=subset) + + def _validate_monotonic(self): + """ + Validate that on is monotonic; + we don't care for groupby.rolling + because we have already validated at a higher + level. + """ + pass diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py new file mode 100644 index 00000000..ebe9a3d5 --- /dev/null +++ b/pandas/errors/__init__.py @@ -0,0 +1,184 @@ +# flake8: noqa + +""" +Expose public exceptions & warnings +""" + +from pandas._libs.tslibs import NullFrequencyError, OutOfBoundsDatetime + + +class PerformanceWarning(Warning): + """ + Warning raised when there is a possible performance impact. + """ + + +class UnsupportedFunctionCall(ValueError): + """ + Exception raised when attempting to call a numpy function + on a pandas object, but that function is not supported by + the object e.g. ``np.cumsum(groupby_object)``. + """ + + +class UnsortedIndexError(KeyError): + """ + Error raised when attempting to get a slice of a MultiIndex, + and the index has not been lexsorted. Subclass of `KeyError`. + """ + + +class ParserError(ValueError): + """ + Exception that is raised by an error encountered in parsing file contents. + + This is a generic error raised for errors encountered when functions like + `read_csv` or `read_html` are parsing contents of a file. + + See Also + -------- + read_csv : Read CSV (comma-separated) file into a DataFrame. + read_html : Read HTML table into a DataFrame. + """ + + +class DtypeWarning(Warning): + """ + Warning raised when reading different dtypes in a column from a file. + + Raised for a dtype incompatibility. This can happen whenever `read_csv` + or `read_table` encounter non-uniform dtypes in a column(s) of a given + CSV file. + + See Also + -------- + read_csv : Read CSV (comma-separated) file into a DataFrame. + read_table : Read general delimited file into a DataFrame. + + Notes + ----- + This warning is issued when dealing with larger files because the dtype + checking happens per chunk read. + + Despite the warning, the CSV file is read with mixed types in a single + column which will be an object type. See the examples below to better + understand this issue. + + Examples + -------- + This example creates and reads a large CSV file with a column that contains + `int` and `str`. + + >>> df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 + + ... ['1'] * 100000), + ... 'b': ['b'] * 300000}) + >>> df.to_csv('test.csv', index=False) + >>> df2 = pd.read_csv('test.csv') + ... # DtypeWarning: Columns (0) have mixed types + + Important to notice that ``df2`` will contain both `str` and `int` for the + same input, '1'. + + >>> df2.iloc[262140, 0] + '1' + >>> type(df2.iloc[262140, 0]) + + >>> df2.iloc[262150, 0] + 1 + >>> type(df2.iloc[262150, 0]) + + + One way to solve this issue is using the `dtype` parameter in the + `read_csv` and `read_table` functions to explicit the conversion: + + >>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str}) + + No warning was issued. + + >>> import os + >>> os.remove('test.csv') + """ + + +class EmptyDataError(ValueError): + """ + Exception that is thrown in `pd.read_csv` (by both the C and + Python engines) when empty data or header is encountered. + """ + + +class ParserWarning(Warning): + """ + Warning raised when reading a file that doesn't use the default 'c' parser. + + Raised by `pd.read_csv` and `pd.read_table` when it is necessary to change + parsers, generally from the default 'c' parser to 'python'. + + It happens due to a lack of support or functionality for parsing a + particular attribute of a CSV file with the requested engine. + + Currently, 'c' unsupported options include the following parameters: + + 1. `sep` other than a single character (e.g. regex separators) + 2. `skipfooter` higher than 0 + 3. `sep=None` with `delim_whitespace=False` + + The warning can be avoided by adding `engine='python'` as a parameter in + `pd.read_csv` and `pd.read_table` methods. + + See Also + -------- + pd.read_csv : Read CSV (comma-separated) file into DataFrame. + pd.read_table : Read general delimited file into DataFrame. + + Examples + -------- + Using a `sep` in `pd.read_csv` other than a single character: + + >>> import io + >>> csv = '''a;b;c + ... 1;1,8 + ... 1;2,1''' + >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') # doctest: +SKIP + ... # ParserWarning: Falling back to the 'python' engine... + + Adding `engine='python'` to `pd.read_csv` removes the Warning: + + >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]', engine='python') + """ + + +class MergeError(ValueError): + """ + Error raised when problems arise during merging due to problems + with input data. Subclass of `ValueError`. + """ + + +class AccessorRegistrationWarning(Warning): + """ + Warning for attribute conflicts in accessor registration. + """ + + +class AbstractMethodError(NotImplementedError): + """ + Raise this error instead of NotImplementedError for abstract methods + while keeping compatibility with Python 2 and Python 3. + """ + + def __init__(self, class_instance, methodtype="method"): + types = {"method", "classmethod", "staticmethod", "property"} + if methodtype not in types: + raise ValueError( + f"methodtype must be one of {methodtype}, got {types} instead." + ) + self.methodtype = methodtype + self.class_instance = class_instance + + def __str__(self) -> str: + if self.methodtype == "classmethod": + name = self.class_instance.__name__ + else: + name = type(self.class_instance).__name__ + return f"This {self.methodtype} must be defined in the concrete class {name}" diff --git a/pandas/io/__init__.py b/pandas/io/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/io/api.py b/pandas/io/api.py new file mode 100644 index 00000000..2d25ffe5 --- /dev/null +++ b/pandas/io/api.py @@ -0,0 +1,21 @@ +""" +Data IO api +""" + +# flake8: noqa + +from pandas.io.clipboards import read_clipboard +from pandas.io.excel import ExcelFile, ExcelWriter, read_excel +from pandas.io.feather_format import read_feather +from pandas.io.gbq import read_gbq +from pandas.io.html import read_html +from pandas.io.json import read_json +from pandas.io.orc import read_orc +from pandas.io.parquet import read_parquet +from pandas.io.parsers import read_csv, read_fwf, read_table +from pandas.io.pickle import read_pickle, to_pickle +from pandas.io.pytables import HDFStore, read_hdf +from pandas.io.sas import read_sas +from pandas.io.spss import read_spss +from pandas.io.sql import read_sql, read_sql_query, read_sql_table +from pandas.io.stata import read_stata diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py new file mode 100644 index 00000000..f808b7e7 --- /dev/null +++ b/pandas/io/clipboard/__init__.py @@ -0,0 +1,667 @@ +""" +Pyperclip + +A cross-platform clipboard module for Python, +with copy & paste functions for plain text. +By Al Sweigart al@inventwithpython.com +BSD License + +Usage: + import pyperclip + pyperclip.copy('The text to be copied to the clipboard.') + spam = pyperclip.paste() + + if not pyperclip.is_available(): + print("Copy functionality unavailable!") + +On Windows, no additional modules are needed. +On Mac, the pyobjc module is used, falling back to the pbcopy and pbpaste cli + commands. (These commands should come with OS X.). +On Linux, install xclip or xsel via package manager. For example, in Debian: + sudo apt-get install xclip + sudo apt-get install xsel + +Otherwise on Linux, you will need the PyQt5 modules installed. + +This module does not work with PyGObject yet. + +Cygwin is currently not supported. + +Security Note: This module runs programs with these names: + - which + - where + - pbcopy + - pbpaste + - xclip + - xsel + - klipper + - qdbus +A malicious user could rename or add programs with these names, tricking +Pyperclip into running them with whatever permissions the Python process has. + +""" +__version__ = "1.7.0" + +import contextlib +import ctypes +from ctypes import c_size_t, c_wchar, c_wchar_p, get_errno, sizeof +import os +import platform +import subprocess +import time +import warnings + +# `import PyQt4` sys.exit()s if DISPLAY is not in the environment. +# Thus, we need to detect the presence of $DISPLAY manually +# and not load PyQt4 if it is absent. +HAS_DISPLAY = os.getenv("DISPLAY", False) + +EXCEPT_MSG = """ + Pyperclip could not find a copy/paste mechanism for your system. + For more information, please visit + https://pyperclip.readthedocs.io/en/latest/introduction.html#not-implemented-error + """ + +ENCODING = "utf-8" + +# The "which" unix command finds where a command is. +if platform.system() == "Windows": + WHICH_CMD = "where" +else: + WHICH_CMD = "which" + + +def _executable_exists(name): + return ( + subprocess.call( + [WHICH_CMD, name], stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + == 0 + ) + + +# Exceptions +class PyperclipException(RuntimeError): + pass + + +class PyperclipWindowsException(PyperclipException): + def __init__(self, message): + message += f" ({ctypes.WinError()})" + super().__init__(message) + + +def _stringifyText(text) -> str: + acceptedTypes = (str, int, float, bool) + if not isinstance(text, acceptedTypes): + raise PyperclipException( + f"only str, int, float, and bool values " + f"can be copied to the clipboard, not {type(text).__name__}" + ) + return str(text) + + +def init_osx_pbcopy_clipboard(): + def copy_osx_pbcopy(text): + text = _stringifyText(text) # Converts non-str values to str. + p = subprocess.Popen(["pbcopy", "w"], stdin=subprocess.PIPE, close_fds=True) + p.communicate(input=text.encode(ENCODING)) + + def paste_osx_pbcopy(): + p = subprocess.Popen(["pbpaste", "r"], stdout=subprocess.PIPE, close_fds=True) + stdout, stderr = p.communicate() + return stdout.decode(ENCODING) + + return copy_osx_pbcopy, paste_osx_pbcopy + + +def init_osx_pyobjc_clipboard(): + def copy_osx_pyobjc(text): + """Copy string argument to clipboard""" + text = _stringifyText(text) # Converts non-str values to str. + newStr = Foundation.NSString.stringWithString_(text).nsstring() + newData = newStr.dataUsingEncoding_(Foundation.NSUTF8StringEncoding) + board = AppKit.NSPasteboard.generalPasteboard() + board.declareTypes_owner_([AppKit.NSStringPboardType], None) + board.setData_forType_(newData, AppKit.NSStringPboardType) + + def paste_osx_pyobjc(): + "Returns contents of clipboard" + board = AppKit.NSPasteboard.generalPasteboard() + content = board.stringForType_(AppKit.NSStringPboardType) + return content + + return copy_osx_pyobjc, paste_osx_pyobjc + + +def init_qt_clipboard(): + global QApplication + # $DISPLAY should exist + + # Try to import from qtpy, but if that fails try PyQt5 then PyQt4 + try: + from qtpy.QtWidgets import QApplication + except ImportError: + try: + from PyQt5.QtWidgets import QApplication + except ImportError: + from PyQt4.QtGui import QApplication + + app = QApplication.instance() + if app is None: + app = QApplication([]) + + def copy_qt(text): + text = _stringifyText(text) # Converts non-str values to str. + cb = app.clipboard() + cb.setText(text) + + def paste_qt() -> str: + cb = app.clipboard() + return str(cb.text()) + + return copy_qt, paste_qt + + +def init_xclip_clipboard(): + DEFAULT_SELECTION = "c" + PRIMARY_SELECTION = "p" + + def copy_xclip(text, primary=False): + text = _stringifyText(text) # Converts non-str values to str. + selection = DEFAULT_SELECTION + if primary: + selection = PRIMARY_SELECTION + p = subprocess.Popen( + ["xclip", "-selection", selection], stdin=subprocess.PIPE, close_fds=True + ) + p.communicate(input=text.encode(ENCODING)) + + def paste_xclip(primary=False): + selection = DEFAULT_SELECTION + if primary: + selection = PRIMARY_SELECTION + p = subprocess.Popen( + ["xclip", "-selection", selection, "-o"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) + stdout, stderr = p.communicate() + # Intentionally ignore extraneous output on stderr when clipboard is empty + return stdout.decode(ENCODING) + + return copy_xclip, paste_xclip + + +def init_xsel_clipboard(): + DEFAULT_SELECTION = "-b" + PRIMARY_SELECTION = "-p" + + def copy_xsel(text, primary=False): + text = _stringifyText(text) # Converts non-str values to str. + selection_flag = DEFAULT_SELECTION + if primary: + selection_flag = PRIMARY_SELECTION + p = subprocess.Popen( + ["xsel", selection_flag, "-i"], stdin=subprocess.PIPE, close_fds=True + ) + p.communicate(input=text.encode(ENCODING)) + + def paste_xsel(primary=False): + selection_flag = DEFAULT_SELECTION + if primary: + selection_flag = PRIMARY_SELECTION + p = subprocess.Popen( + ["xsel", selection_flag, "-o"], stdout=subprocess.PIPE, close_fds=True + ) + stdout, stderr = p.communicate() + return stdout.decode(ENCODING) + + return copy_xsel, paste_xsel + + +def init_klipper_clipboard(): + def copy_klipper(text): + text = _stringifyText(text) # Converts non-str values to str. + p = subprocess.Popen( + [ + "qdbus", + "org.kde.klipper", + "/klipper", + "setClipboardContents", + text.encode(ENCODING), + ], + stdin=subprocess.PIPE, + close_fds=True, + ) + p.communicate(input=None) + + def paste_klipper(): + p = subprocess.Popen( + ["qdbus", "org.kde.klipper", "/klipper", "getClipboardContents"], + stdout=subprocess.PIPE, + close_fds=True, + ) + stdout, stderr = p.communicate() + + # Workaround for https://bugs.kde.org/show_bug.cgi?id=342874 + # TODO: https://github.com/asweigart/pyperclip/issues/43 + clipboardContents = stdout.decode(ENCODING) + # even if blank, Klipper will append a newline at the end + assert len(clipboardContents) > 0 + # make sure that newline is there + assert clipboardContents.endswith("\n") + if clipboardContents.endswith("\n"): + clipboardContents = clipboardContents[:-1] + return clipboardContents + + return copy_klipper, paste_klipper + + +def init_dev_clipboard_clipboard(): + def copy_dev_clipboard(text): + text = _stringifyText(text) # Converts non-str values to str. + if text == "": + warnings.warn( + "Pyperclip cannot copy a blank string to the clipboard on Cygwin." + "This is effectively a no-op." + ) + if "\r" in text: + warnings.warn("Pyperclip cannot handle \\r characters on Cygwin.") + + with open("/dev/clipboard", "wt") as fo: + fo.write(text) + + def paste_dev_clipboard() -> str: + with open("/dev/clipboard", "rt") as fo: + content = fo.read() + return content + + return copy_dev_clipboard, paste_dev_clipboard + + +def init_no_clipboard(): + class ClipboardUnavailable: + def __call__(self, *args, **kwargs): + raise PyperclipException(EXCEPT_MSG) + + def __bool__(self) -> bool: + return False + + return ClipboardUnavailable(), ClipboardUnavailable() + + +# Windows-related clipboard functions: +class CheckedCall: + def __init__(self, f): + super().__setattr__("f", f) + + def __call__(self, *args): + ret = self.f(*args) + if not ret and get_errno(): + raise PyperclipWindowsException("Error calling " + self.f.__name__) + return ret + + def __setattr__(self, key, value): + setattr(self.f, key, value) + + +def init_windows_clipboard(): + global HGLOBAL, LPVOID, DWORD, LPCSTR, INT + global HWND, HINSTANCE, HMENU, BOOL, UINT, HANDLE + from ctypes.wintypes import ( + HGLOBAL, + LPVOID, + DWORD, + LPCSTR, + INT, + HWND, + HINSTANCE, + HMENU, + BOOL, + UINT, + HANDLE, + ) + + windll = ctypes.windll + msvcrt = ctypes.CDLL("msvcrt") + + safeCreateWindowExA = CheckedCall(windll.user32.CreateWindowExA) + safeCreateWindowExA.argtypes = [ + DWORD, + LPCSTR, + LPCSTR, + DWORD, + INT, + INT, + INT, + INT, + HWND, + HMENU, + HINSTANCE, + LPVOID, + ] + safeCreateWindowExA.restype = HWND + + safeDestroyWindow = CheckedCall(windll.user32.DestroyWindow) + safeDestroyWindow.argtypes = [HWND] + safeDestroyWindow.restype = BOOL + + OpenClipboard = windll.user32.OpenClipboard + OpenClipboard.argtypes = [HWND] + OpenClipboard.restype = BOOL + + safeCloseClipboard = CheckedCall(windll.user32.CloseClipboard) + safeCloseClipboard.argtypes = [] + safeCloseClipboard.restype = BOOL + + safeEmptyClipboard = CheckedCall(windll.user32.EmptyClipboard) + safeEmptyClipboard.argtypes = [] + safeEmptyClipboard.restype = BOOL + + safeGetClipboardData = CheckedCall(windll.user32.GetClipboardData) + safeGetClipboardData.argtypes = [UINT] + safeGetClipboardData.restype = HANDLE + + safeSetClipboardData = CheckedCall(windll.user32.SetClipboardData) + safeSetClipboardData.argtypes = [UINT, HANDLE] + safeSetClipboardData.restype = HANDLE + + safeGlobalAlloc = CheckedCall(windll.kernel32.GlobalAlloc) + safeGlobalAlloc.argtypes = [UINT, c_size_t] + safeGlobalAlloc.restype = HGLOBAL + + safeGlobalLock = CheckedCall(windll.kernel32.GlobalLock) + safeGlobalLock.argtypes = [HGLOBAL] + safeGlobalLock.restype = LPVOID + + safeGlobalUnlock = CheckedCall(windll.kernel32.GlobalUnlock) + safeGlobalUnlock.argtypes = [HGLOBAL] + safeGlobalUnlock.restype = BOOL + + wcslen = CheckedCall(msvcrt.wcslen) + wcslen.argtypes = [c_wchar_p] + wcslen.restype = UINT + + GMEM_MOVEABLE = 0x0002 + CF_UNICODETEXT = 13 + + @contextlib.contextmanager + def window(): + """ + Context that provides a valid Windows hwnd. + """ + # we really just need the hwnd, so setting "STATIC" + # as predefined lpClass is just fine. + hwnd = safeCreateWindowExA( + 0, b"STATIC", None, 0, 0, 0, 0, 0, None, None, None, None + ) + try: + yield hwnd + finally: + safeDestroyWindow(hwnd) + + @contextlib.contextmanager + def clipboard(hwnd): + """ + Context manager that opens the clipboard and prevents + other applications from modifying the clipboard content. + """ + # We may not get the clipboard handle immediately because + # some other application is accessing it (?) + # We try for at least 500ms to get the clipboard. + t = time.time() + 0.5 + success = False + while time.time() < t: + success = OpenClipboard(hwnd) + if success: + break + time.sleep(0.01) + if not success: + raise PyperclipWindowsException("Error calling OpenClipboard") + + try: + yield + finally: + safeCloseClipboard() + + def copy_windows(text): + # This function is heavily based on + # http://msdn.com/ms649016#_win32_Copying_Information_to_the_Clipboard + + text = _stringifyText(text) # Converts non-str values to str. + + with window() as hwnd: + # http://msdn.com/ms649048 + # If an application calls OpenClipboard with hwnd set to NULL, + # EmptyClipboard sets the clipboard owner to NULL; + # this causes SetClipboardData to fail. + # => We need a valid hwnd to copy something. + with clipboard(hwnd): + safeEmptyClipboard() + + if text: + # http://msdn.com/ms649051 + # If the hMem parameter identifies a memory object, + # the object must have been allocated using the + # function with the GMEM_MOVEABLE flag. + count = wcslen(text) + 1 + handle = safeGlobalAlloc(GMEM_MOVEABLE, count * sizeof(c_wchar)) + locked_handle = safeGlobalLock(handle) + + ctypes.memmove( + c_wchar_p(locked_handle), + c_wchar_p(text), + count * sizeof(c_wchar), + ) + + safeGlobalUnlock(handle) + safeSetClipboardData(CF_UNICODETEXT, handle) + + def paste_windows(): + with clipboard(None): + handle = safeGetClipboardData(CF_UNICODETEXT) + if not handle: + # GetClipboardData may return NULL with errno == NO_ERROR + # if the clipboard is empty. + # (Also, it may return a handle to an empty buffer, + # but technically that's not empty) + return "" + return c_wchar_p(handle).value + + return copy_windows, paste_windows + + +def init_wsl_clipboard(): + def copy_wsl(text): + text = _stringifyText(text) # Converts non-str values to str. + p = subprocess.Popen(["clip.exe"], stdin=subprocess.PIPE, close_fds=True) + p.communicate(input=text.encode(ENCODING)) + + def paste_wsl(): + p = subprocess.Popen( + ["powershell.exe", "-command", "Get-Clipboard"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) + stdout, stderr = p.communicate() + # WSL appends "\r\n" to the contents. + return stdout[:-2].decode(ENCODING) + + return copy_wsl, paste_wsl + + +# Automatic detection of clipboard mechanisms +# and importing is done in deteremine_clipboard(): +def determine_clipboard(): + """ + Determine the OS/platform and set the copy() and paste() functions + accordingly. + """ + + global Foundation, AppKit, qtpy, PyQt4, PyQt5 + + # Setup for the CYGWIN platform: + if ( + "cygwin" in platform.system().lower() + ): # Cygwin has a variety of values returned by platform.system(), + # such as 'CYGWIN_NT-6.1' + # FIXME: pyperclip currently does not support Cygwin, + # see https://github.com/asweigart/pyperclip/issues/55 + if os.path.exists("/dev/clipboard"): + warnings.warn( + "Pyperclip's support for Cygwin is not perfect," + "see https://github.com/asweigart/pyperclip/issues/55" + ) + return init_dev_clipboard_clipboard() + + # Setup for the WINDOWS platform: + elif os.name == "nt" or platform.system() == "Windows": + return init_windows_clipboard() + + if platform.system() == "Linux": + with open("/proc/version", "r") as f: + if "Microsoft" in f.read(): + return init_wsl_clipboard() + + # Setup for the MAC OS X platform: + if os.name == "mac" or platform.system() == "Darwin": + try: + import Foundation # check if pyobjc is installed + import AppKit + except ImportError: + return init_osx_pbcopy_clipboard() + else: + return init_osx_pyobjc_clipboard() + + # Setup for the LINUX platform: + if HAS_DISPLAY: + if _executable_exists("xsel"): + return init_xsel_clipboard() + if _executable_exists("xclip"): + return init_xclip_clipboard() + if _executable_exists("klipper") and _executable_exists("qdbus"): + return init_klipper_clipboard() + + try: + # qtpy is a small abstraction layer that lets you write applications + # using a single api call to either PyQt or PySide. + # https://pypi.python.org/project/QtPy + import qtpy # check if qtpy is installed + except ImportError: + # If qtpy isn't installed, fall back on importing PyQt4. + try: + import PyQt5 # check if PyQt5 is installed + except ImportError: + try: + import PyQt4 # check if PyQt4 is installed + except ImportError: + pass # We want to fail fast for all non-ImportError exceptions. + else: + return init_qt_clipboard() + else: + return init_qt_clipboard() + else: + return init_qt_clipboard() + + return init_no_clipboard() + + +def set_clipboard(clipboard): + """ + Explicitly sets the clipboard mechanism. The "clipboard mechanism" is how + the copy() and paste() functions interact with the operating system to + implement the copy/paste feature. The clipboard parameter must be one of: + - pbcopy + - pbobjc (default on Mac OS X) + - qt + - xclip + - xsel + - klipper + - windows (default on Windows) + - no (this is what is set when no clipboard mechanism can be found) + """ + global copy, paste + + clipboard_types = { + "pbcopy": init_osx_pbcopy_clipboard, + "pyobjc": init_osx_pyobjc_clipboard, + "qt": init_qt_clipboard, # TODO - split this into 'qtpy', 'pyqt4', and 'pyqt5' + "xclip": init_xclip_clipboard, + "xsel": init_xsel_clipboard, + "klipper": init_klipper_clipboard, + "windows": init_windows_clipboard, + "no": init_no_clipboard, + } + + if clipboard not in clipboard_types: + allowed_clipboard_types = [repr(_) for _ in clipboard_types.keys()] + raise ValueError( + f"Argument must be one of {', '.join(allowed_clipboard_types)}" + ) + + # Sets pyperclip's copy() and paste() functions: + copy, paste = clipboard_types[clipboard]() + + +def lazy_load_stub_copy(text): + """ + A stub function for copy(), which will load the real copy() function when + called so that the real copy() function is used for later calls. + + This allows users to import pyperclip without having determine_clipboard() + automatically run, which will automatically select a clipboard mechanism. + This could be a problem if it selects, say, the memory-heavy PyQt4 module + but the user was just going to immediately call set_clipboard() to use a + different clipboard mechanism. + + The lazy loading this stub function implements gives the user a chance to + call set_clipboard() to pick another clipboard mechanism. Or, if the user + simply calls copy() or paste() without calling set_clipboard() first, + will fall back on whatever clipboard mechanism that determine_clipboard() + automatically chooses. + """ + global copy, paste + copy, paste = determine_clipboard() + return copy(text) + + +def lazy_load_stub_paste(): + """ + A stub function for paste(), which will load the real paste() function when + called so that the real paste() function is used for later calls. + + This allows users to import pyperclip without having determine_clipboard() + automatically run, which will automatically select a clipboard mechanism. + This could be a problem if it selects, say, the memory-heavy PyQt4 module + but the user was just going to immediately call set_clipboard() to use a + different clipboard mechanism. + + The lazy loading this stub function implements gives the user a chance to + call set_clipboard() to pick another clipboard mechanism. Or, if the user + simply calls copy() or paste() without calling set_clipboard() first, + will fall back on whatever clipboard mechanism that determine_clipboard() + automatically chooses. + """ + global copy, paste + copy, paste = determine_clipboard() + return paste() + + +def is_available() -> bool: + return copy != lazy_load_stub_copy and paste != lazy_load_stub_paste + + +# Initially, copy() and paste() are set to lazy loading wrappers which will +# set `copy` and `paste` to real functions the first time they're used, unless +# set_clipboard() or determine_clipboard() is called first. +copy, paste = lazy_load_stub_copy, lazy_load_stub_paste + + +__all__ = ["copy", "paste", "set_clipboard", "determine_clipboard"] + +# pandas aliases +clipboard_get = paste +clipboard_set = copy diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py new file mode 100644 index 00000000..34e8e03d --- /dev/null +++ b/pandas/io/clipboards.py @@ -0,0 +1,138 @@ +""" io on the clipboard """ +from io import StringIO +import warnings + +from pandas.core.dtypes.generic import ABCDataFrame + +from pandas import get_option, option_context + + +def read_clipboard(sep=r"\s+", **kwargs): # pragma: no cover + r""" + Read text from clipboard and pass to read_csv. + + Parameters + ---------- + sep : str, default '\s+' + A string or regex delimiter. The default of '\s+' denotes + one or more whitespace characters. + + **kwargs + See read_csv for the full argument list. + + Returns + ------- + DataFrame + A parsed DataFrame object. + """ + encoding = kwargs.pop("encoding", "utf-8") + + # only utf-8 is valid for passed value because that's what clipboard + # supports + if encoding is not None and encoding.lower().replace("-", "") != "utf8": + raise NotImplementedError("reading from clipboard only supports utf-8 encoding") + + from pandas.io.clipboard import clipboard_get + from pandas.io.parsers import read_csv + + text = clipboard_get() + + # Try to decode (if needed, as "text" might already be a string here). + try: + text = text.decode(kwargs.get("encoding") or get_option("display.encoding")) + except AttributeError: + pass + + # Excel copies into clipboard with \t separation + # inspect no more then the 10 first lines, if they + # all contain an equal number (>0) of tabs, infer + # that this came from excel and set 'sep' accordingly + lines = text[:10000].split("\n")[:-1][:10] + + # Need to remove leading white space, since read_csv + # accepts: + # a b + # 0 1 2 + # 1 3 4 + + counts = {x.lstrip().count("\t") for x in lines} + if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0: + sep = "\t" + + # Edge case where sep is specified to be None, return to default + if sep is None and kwargs.get("delim_whitespace") is None: + sep = r"\s+" + + # Regex separator currently only works with python engine. + # Default to python if separator is multi-character (regex) + if len(sep) > 1 and kwargs.get("engine") is None: + kwargs["engine"] = "python" + elif len(sep) > 1 and kwargs.get("engine") == "c": + warnings.warn( + "read_clipboard with regex separator does not work " + "properly with c engine" + ) + + return read_csv(StringIO(text), sep=sep, **kwargs) + + +def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover + """ + Attempt to write text representation of object to the system clipboard + The clipboard can be then pasted into Excel for example. + + Parameters + ---------- + obj : the object to write to the clipboard + excel : boolean, defaults to True + if True, use the provided separator, writing in a csv + format for allowing easy pasting into excel. + if False, write a string representation of the object + to the clipboard + sep : optional, defaults to tab + other keywords are passed to to_csv + + Notes + ----- + Requirements for your platform + - Linux: xclip, or xsel (with PyQt4 modules) + - Windows: + - OS X: + """ + encoding = kwargs.pop("encoding", "utf-8") + + # testing if an invalid encoding is passed to clipboard + if encoding is not None and encoding.lower().replace("-", "") != "utf8": + raise ValueError("clipboard only supports utf-8 encoding") + + from pandas.io.clipboard import clipboard_set + + if excel is None: + excel = True + + if excel: + try: + if sep is None: + sep = "\t" + buf = StringIO() + + # clipboard_set (pyperclip) expects unicode + obj.to_csv(buf, sep=sep, encoding="utf-8", **kwargs) + text = buf.getvalue() + + clipboard_set(text) + return + except TypeError: + warnings.warn( + "to_clipboard in excel mode requires a single character separator." + ) + elif sep is not None: + warnings.warn("to_clipboard with excel=False ignores the sep argument") + + if isinstance(obj, ABCDataFrame): + # str(df) has various unhelpful defaults, like truncation + with option_context("display.max_colwidth", None): + objstr = obj.to_string(**kwargs) + else: + objstr = str(obj) + clipboard_set(objstr) diff --git a/pandas/io/common.py b/pandas/io/common.py new file mode 100644 index 00000000..96179659 --- /dev/null +++ b/pandas/io/common.py @@ -0,0 +1,528 @@ +"""Common IO api utilities""" + +import bz2 +from collections import abc +import gzip +from io import BufferedIOBase, BytesIO, RawIOBase +import mmap +import os +import pathlib +from typing import IO, Any, AnyStr, Dict, List, Mapping, Optional, Tuple, Union +from urllib.parse import ( # noqa + urlencode, + urljoin, + urlparse as parse_url, + uses_netloc, + uses_params, + uses_relative, +) +import zipfile + +from pandas._typing import FilePathOrBuffer +from pandas.compat import _get_lzma_file, _import_lzma +from pandas.errors import ( # noqa + AbstractMethodError, + DtypeWarning, + EmptyDataError, + ParserError, + ParserWarning, +) + +from pandas.core.dtypes.common import is_file_like + +lzma = _import_lzma() + + +_VALID_URLS = set(uses_relative + uses_netloc + uses_params) +_VALID_URLS.discard("") + + +def is_url(url) -> bool: + """ + Check to see if a URL has a valid protocol. + + Parameters + ---------- + url : str or unicode + + Returns + ------- + isurl : bool + If `url` has a valid protocol return True otherwise False. + """ + if not isinstance(url, str): + return False + return parse_url(url).scheme in _VALID_URLS + + +def _expand_user( + filepath_or_buffer: FilePathOrBuffer[AnyStr], +) -> FilePathOrBuffer[AnyStr]: + """Return the argument with an initial component of ~ or ~user + replaced by that user's home directory. + + Parameters + ---------- + filepath_or_buffer : object to be converted if possible + + Returns + ------- + expanded_filepath_or_buffer : an expanded filepath or the + input if not expandable + """ + if isinstance(filepath_or_buffer, str): + return os.path.expanduser(filepath_or_buffer) + return filepath_or_buffer + + +def validate_header_arg(header) -> None: + if isinstance(header, bool): + raise TypeError( + "Passing a bool to header is invalid. " + "Use header=None for no header or " + "header=int or list-like of ints to specify " + "the row(s) making up the column names" + ) + + +def stringify_path( + filepath_or_buffer: FilePathOrBuffer[AnyStr], +) -> FilePathOrBuffer[AnyStr]: + """Attempt to convert a path-like object to a string. + + Parameters + ---------- + filepath_or_buffer : object to be converted + + Returns + ------- + str_filepath_or_buffer : maybe a string version of the object + + Notes + ----- + Objects supporting the fspath protocol (python 3.6+) are coerced + according to its __fspath__ method. + + For backwards compatibility with older pythons, pathlib.Path and + py.path objects are specially coerced. + + Any other object is passed through unchanged, which includes bytes, + strings, buffers, or anything else that's not even path-like. + """ + if hasattr(filepath_or_buffer, "__fspath__"): + # https://github.com/python/mypy/issues/1424 + return filepath_or_buffer.__fspath__() # type: ignore + elif isinstance(filepath_or_buffer, pathlib.Path): + return str(filepath_or_buffer) + return _expand_user(filepath_or_buffer) + + +def is_s3_url(url) -> bool: + """Check for an s3, s3n, or s3a url""" + if not isinstance(url, str): + return False + return parse_url(url).scheme in ["s3", "s3n", "s3a"] + + +def is_gcs_url(url) -> bool: + """Check for a gcs url""" + if not isinstance(url, str): + return False + return parse_url(url).scheme in ["gcs", "gs"] + + +def urlopen(*args, **kwargs): + """ + Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of + the stdlib. + """ + import urllib.request + + return urllib.request.urlopen(*args, **kwargs) + + +def get_filepath_or_buffer( + filepath_or_buffer: FilePathOrBuffer, + encoding: Optional[str] = None, + compression: Optional[str] = None, + mode: Optional[str] = None, +): + """ + If the filepath_or_buffer is a url, translate and return the buffer. + Otherwise passthrough. + + Parameters + ---------- + filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), + or buffer + compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional + encoding : the encoding to use to decode bytes, default is 'utf-8' + mode : str, optional + + Returns + ------- + tuple of ({a filepath_ or buffer or S3File instance}, + encoding, str, + compression, str, + should_close, bool) + """ + filepath_or_buffer = stringify_path(filepath_or_buffer) + + if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer): + req = urlopen(filepath_or_buffer) + content_encoding = req.headers.get("Content-Encoding", None) + if content_encoding == "gzip": + # Override compression based on Content-Encoding header + compression = "gzip" + reader = BytesIO(req.read()) + req.close() + return reader, encoding, compression, True + + if is_s3_url(filepath_or_buffer): + from pandas.io import s3 + + return s3.get_filepath_or_buffer( + filepath_or_buffer, encoding=encoding, compression=compression, mode=mode + ) + + if is_gcs_url(filepath_or_buffer): + from pandas.io import gcs + + return gcs.get_filepath_or_buffer( + filepath_or_buffer, encoding=encoding, compression=compression, mode=mode + ) + + if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)): + return _expand_user(filepath_or_buffer), None, compression, False + + if not is_file_like(filepath_or_buffer): + msg = f"Invalid file path or buffer object type: {type(filepath_or_buffer)}" + raise ValueError(msg) + + return filepath_or_buffer, None, compression, False + + +def file_path_to_url(path: str) -> str: + """ + converts an absolute native path to a FILE URL. + + Parameters + ---------- + path : a path in native format + + Returns + ------- + a valid FILE URL + """ + # lazify expensive import (~30ms) + from urllib.request import pathname2url + + return urljoin("file:", pathname2url(path)) + + +_compression_to_extension = {"gzip": ".gz", "bz2": ".bz2", "zip": ".zip", "xz": ".xz"} + + +def get_compression_method( + compression: Optional[Union[str, Mapping[str, str]]] +) -> Tuple[Optional[str], Dict[str, str]]: + """ + Simplifies a compression argument to a compression method string and + a mapping containing additional arguments. + + Parameters + ---------- + compression : str or mapping + If string, specifies the compression method. If mapping, value at key + 'method' specifies compression method. + + Returns + ------- + tuple of ({compression method}, Optional[str] + {compression arguments}, Dict[str, str]) + + Raises + ------ + ValueError on mapping missing 'method' key + """ + if isinstance(compression, Mapping): + compression_args = dict(compression) + try: + compression = compression_args.pop("method") + except KeyError: + raise ValueError("If mapping, compression must have key 'method'") + else: + compression_args = {} + return compression, compression_args + + +def infer_compression( + filepath_or_buffer: FilePathOrBuffer, compression: Optional[str] +) -> Optional[str]: + """ + Get the compression method for filepath_or_buffer. If compression='infer', + the inferred compression method is returned. Otherwise, the input + compression method is returned unchanged, unless it's invalid, in which + case an error is raised. + + Parameters + ---------- + filepath_or_buffer : str or file handle + File path or object. + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} + If 'infer' and `filepath_or_buffer` is path-like, then detect + compression from the following extensions: '.gz', '.bz2', '.zip', + or '.xz' (otherwise no compression). + + Returns + ------- + string or None + + Raises + ------ + ValueError on invalid compression specified. + """ + + # No compression has been explicitly specified + if compression is None: + return None + + # Infer compression + if compression == "infer": + # Convert all path types (e.g. pathlib.Path) to strings + filepath_or_buffer = stringify_path(filepath_or_buffer) + if not isinstance(filepath_or_buffer, str): + # Cannot infer compression of a buffer, assume no compression + return None + + # Infer compression from the filename/URL extension + for compression, extension in _compression_to_extension.items(): + if filepath_or_buffer.endswith(extension): + return compression + return None + + # Compression has been specified. Check that it's valid + if compression in _compression_to_extension: + return compression + + msg = f"Unrecognized compression type: {compression}" + valid = ["infer", None] + sorted(_compression_to_extension) + msg += f"\nValid compression types are {valid}" + raise ValueError(msg) + + +def get_handle( + path_or_buf, + mode: str, + encoding=None, + compression: Optional[Union[str, Mapping[str, Any]]] = None, + memory_map: bool = False, + is_text: bool = True, +): + """ + Get file handle for given path/buffer and mode. + + Parameters + ---------- + path_or_buf : str or file handle + File path or object. + mode : str + Mode to open path_or_buf with. + encoding : str or None + Encoding to use. + compression : str or dict, default None + If string, specifies compression mode. If dict, value at key 'method' + specifies compression mode. Compression mode must be one of {'infer', + 'gzip', 'bz2', 'zip', 'xz', None}. If compression mode is 'infer' + and `filepath_or_buffer` is path-like, then detect compression from + the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise + no compression). If dict and compression mode is 'zip' or inferred as + 'zip', other entries passed as additional compression options. + + .. versionchanged:: 1.0.0 + + May now be a dict with key 'method' as compression mode + and other keys as compression options if compression + mode is 'zip'. + + memory_map : boolean, default False + See parsers._parser_params for more information. + is_text : boolean, default True + whether file/buffer is in text format (csv, json, etc.), or in binary + mode (pickle, etc.). + + Returns + ------- + f : file-like + A file-like object. + handles : list of file-like objects + A list of file-like object that were opened in this function. + """ + try: + from s3fs import S3File + + need_text_wrapping = (BufferedIOBase, RawIOBase, S3File) + except ImportError: + need_text_wrapping = (BufferedIOBase, RawIOBase) # type: ignore + + handles: List[IO] = list() + f = path_or_buf + + # Convert pathlib.Path/py.path.local or string + path_or_buf = stringify_path(path_or_buf) + is_path = isinstance(path_or_buf, str) + + compression, compression_args = get_compression_method(compression) + if is_path: + compression = infer_compression(path_or_buf, compression) + + if compression: + + # GZ Compression + if compression == "gzip": + if is_path: + f = gzip.open(path_or_buf, mode) + else: + f = gzip.GzipFile(fileobj=path_or_buf) + + # BZ Compression + elif compression == "bz2": + if is_path: + f = bz2.BZ2File(path_or_buf, mode) + else: + f = bz2.BZ2File(path_or_buf) + + # ZIP Compression + elif compression == "zip": + zf = _BytesZipFile(path_or_buf, mode, **compression_args) + # Ensure the container is closed as well. + handles.append(zf) + if zf.mode == "w": + f = zf + elif zf.mode == "r": + zip_names = zf.namelist() + if len(zip_names) == 1: + f = zf.open(zip_names.pop()) + elif len(zip_names) == 0: + raise ValueError(f"Zero files found in ZIP file {path_or_buf}") + else: + raise ValueError( + "Multiple files found in ZIP file." + f" Only one file per ZIP: {zip_names}" + ) + + # XZ Compression + elif compression == "xz": + f = _get_lzma_file(lzma)(path_or_buf, mode) + + # Unrecognized Compression + else: + msg = f"Unrecognized compression type: {compression}" + raise ValueError(msg) + + handles.append(f) + + elif is_path: + if encoding: + # Encoding + f = open(path_or_buf, mode, encoding=encoding, newline="") + elif is_text: + # No explicit encoding + f = open(path_or_buf, mode, errors="replace", newline="") + else: + # Binary mode + f = open(path_or_buf, mode) + handles.append(f) + + # Convert BytesIO or file objects passed with an encoding + if is_text and (compression or isinstance(f, need_text_wrapping)): + from io import TextIOWrapper + + g = TextIOWrapper(f, encoding=encoding, newline="") + if not isinstance(f, (BufferedIOBase, RawIOBase)): + handles.append(g) + f = g + + if memory_map and hasattr(f, "fileno"): + try: + wrapped = _MMapWrapper(f) + f.close() + f = wrapped + except Exception: + # we catch any errors that may have occurred + # because that is consistent with the lower-level + # functionality of the C engine (pd.read_csv), so + # leave the file handler as is then + pass + + return f, handles + + +class _BytesZipFile(zipfile.ZipFile, BytesIO): # type: ignore + """ + Wrapper for standard library class ZipFile and allow the returned file-like + handle to accept byte strings via `write` method. + + BytesIO provides attributes of file-like object and ZipFile.writestr writes + bytes strings into a member of the archive. + """ + + # GH 17778 + def __init__( + self, + file: FilePathOrBuffer, + mode: str, + archive_name: Optional[str] = None, + **kwargs, + ): + if mode in ["wb", "rb"]: + mode = mode.replace("b", "") + self.archive_name = archive_name + super().__init__(file, mode, zipfile.ZIP_DEFLATED, **kwargs) + + def write(self, data): + archive_name = self.filename + if self.archive_name is not None: + archive_name = self.archive_name + super().writestr(archive_name, data) + + @property + def closed(self): + return self.fp is None + + +class _MMapWrapper(abc.Iterator): + """ + Wrapper for the Python's mmap class so that it can be properly read in + by Python's csv.reader class. + + Parameters + ---------- + f : file object + File object to be mapped onto memory. Must support the 'fileno' + method or have an equivalent attribute + + """ + + def __init__(self, f: IO): + self.mmap = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + + def __getattr__(self, name: str): + return getattr(self.mmap, name) + + def __iter__(self) -> "_MMapWrapper": + return self + + def __next__(self) -> str: + newbytes = self.mmap.readline() + + # readline returns bytes, not str, but Python's CSV reader + # expects str, so convert the output to str before continuing + newline = newbytes.decode("utf-8") + + # mmap doesn't raise if reading past the allocated + # data but instead returns an empty string, so raise + # if that is returned + if newline == "": + raise StopIteration + return newline diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py new file mode 100644 index 00000000..7fdca2d6 --- /dev/null +++ b/pandas/io/date_converters.py @@ -0,0 +1,64 @@ +"""This module is designed for community supported date conversion functions""" +import numpy as np + +from pandas._libs.tslibs import parsing + + +def parse_date_time(date_col, time_col): + date_col = _maybe_cast(date_col) + time_col = _maybe_cast(time_col) + return parsing.try_parse_date_and_time(date_col, time_col) + + +def parse_date_fields(year_col, month_col, day_col): + year_col = _maybe_cast(year_col) + month_col = _maybe_cast(month_col) + day_col = _maybe_cast(day_col) + return parsing.try_parse_year_month_day(year_col, month_col, day_col) + + +def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col, second_col): + year_col = _maybe_cast(year_col) + month_col = _maybe_cast(month_col) + day_col = _maybe_cast(day_col) + hour_col = _maybe_cast(hour_col) + minute_col = _maybe_cast(minute_col) + second_col = _maybe_cast(second_col) + return parsing.try_parse_datetime_components( + year_col, month_col, day_col, hour_col, minute_col, second_col + ) + + +def generic_parser(parse_func, *cols): + N = _check_columns(cols) + results = np.empty(N, dtype=object) + + for i in range(N): + args = [c[i] for c in cols] + results[i] = parse_func(*args) + + return results + + +def _maybe_cast(arr): + if not arr.dtype.type == np.object_: + arr = np.array(arr, dtype=object) + return arr + + +def _check_columns(cols): + if not len(cols): + raise AssertionError("There must be at least 1 column") + + head, tail = cols[0], cols[1:] + + N = len(head) + + for i, n in enumerate(map(len, tail)): + if n != N: + raise AssertionError( + f"All columns must have the same length: {N}; " + f"column {i} has length {n}" + ) + + return N diff --git a/pandas/io/excel/__init__.py b/pandas/io/excel/__init__.py new file mode 100644 index 00000000..455abaa7 --- /dev/null +++ b/pandas/io/excel/__init__.py @@ -0,0 +1,16 @@ +from pandas.io.excel._base import ExcelFile, ExcelWriter, read_excel +from pandas.io.excel._openpyxl import _OpenpyxlWriter +from pandas.io.excel._util import register_writer +from pandas.io.excel._xlsxwriter import _XlsxWriter +from pandas.io.excel._xlwt import _XlwtWriter + +__all__ = ["read_excel", "ExcelWriter", "ExcelFile"] + + +register_writer(_OpenpyxlWriter) + + +register_writer(_XlwtWriter) + + +register_writer(_XlsxWriter) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py new file mode 100644 index 00000000..a8ae3595 --- /dev/null +++ b/pandas/io/excel/_base.py @@ -0,0 +1,915 @@ +import abc +from datetime import date, datetime, timedelta +from io import BytesIO +import os +from textwrap import fill + +from pandas._config import config + +from pandas._libs.parsers import STR_NA_VALUES +from pandas.errors import EmptyDataError +from pandas.util._decorators import Appender + +from pandas.core.dtypes.common import is_bool, is_float, is_integer, is_list_like + +from pandas.core.frame import DataFrame + +from pandas.io.common import ( + get_filepath_or_buffer, + is_url, + stringify_path, + urlopen, + validate_header_arg, +) +from pandas.io.excel._util import ( + _fill_mi_header, + _get_default_writer, + _maybe_convert_usecols, + _pop_header_name, + get_writer, +) +from pandas.io.formats.printing import pprint_thing +from pandas.io.parsers import TextParser + +_read_excel_doc = ( + """ +Read an Excel file into a pandas DataFrame. + +Supports `xls`, `xlsx`, `xlsm`, `xlsb`, and `odf` file extensions +read from a local filesystem or URL. Supports an option to read +a single sheet or a list of sheets. + +Parameters +---------- +io : str, bytes, ExcelFile, xlrd.Book, path object, or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: ``file://localhost/path/to/table.xlsx``. + + If you want to pass in a path object, pandas accepts any ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handler (e.g. via builtin ``open`` function) + or ``StringIO``. +sheet_name : str, int, list, or None, default 0 + Strings are used for sheet names. Integers are used in zero-indexed + sheet positions. Lists of strings/integers are used to request + multiple sheets. Specify None to get all sheets. + + Available cases: + + * Defaults to ``0``: 1st sheet as a `DataFrame` + * ``1``: 2nd sheet as a `DataFrame` + * ``"Sheet1"``: Load sheet with name "Sheet1" + * ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5" + as a dict of `DataFrame` + * None: All sheets. + +header : int, list of int, default 0 + Row (0-indexed) to use for the column labels of the parsed + DataFrame. If a list of integers is passed those row positions will + be combined into a ``MultiIndex``. Use None if there is no header. +names : array-like, default None + List of column names to use. If file contains no header row, + then you should explicitly pass header=None. +index_col : int, list of int, default None + Column (0-indexed) to use as the row labels of the DataFrame. + Pass None if there is no such column. If a list is passed, + those columns will be combined into a ``MultiIndex``. If a + subset of data is selected with ``usecols``, index_col + is based on the subset. +usecols : int, str, list-like, or callable default None + * If None, then parse all columns. + * If str, then indicates comma separated list of Excel column letters + and column ranges (e.g. "A:E" or "A,C,E:F"). Ranges are inclusive of + both sides. + * If list of int, then indicates list of column numbers to be parsed. + * If list of string, then indicates list of column names to be parsed. + + .. versionadded:: 0.24.0 + + * If callable, then evaluate each column name against it and parse the + column if the callable returns ``True``. + + Returns a subset of the columns according to behavior above. + + .. versionadded:: 0.24.0 + +squeeze : bool, default False + If the parsed data only contains one column then return a Series. +dtype : Type name or dict of column -> type, default None + Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32} + Use `object` to preserve data as stored in Excel and not interpret dtype. + If converters are specified, they will be applied INSTEAD + of dtype conversion. +engine : str, default None + If io is not a buffer or path, this must be set to identify io. + Acceptable values are None, "xlrd", "openpyxl" or "odf". +converters : dict, default None + Dict of functions for converting values in certain columns. Keys can + either be integers or column labels, values are functions that take one + input argument, the Excel cell content, and return the transformed + content. +true_values : list, default None + Values to consider as True. +false_values : list, default None + Values to consider as False. +skiprows : list-like + Rows to skip at the beginning (0-indexed). +nrows : int, default None + Number of rows to parse. + + .. versionadded:: 0.23.0 + +na_values : scalar, str, list-like, or dict, default None + Additional strings to recognize as NA/NaN. If dict passed, specific + per-column NA values. By default the following values are interpreted + as NaN: '""" + + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + + """'. +keep_default_na : bool, default True + Whether or not to include the default NaN values when parsing the data. + Depending on whether `na_values` is passed in, the behavior is as follows: + + * If `keep_default_na` is True, and `na_values` are specified, `na_values` + is appended to the default NaN values used for parsing. + * If `keep_default_na` is True, and `na_values` are not specified, only + the default NaN values are used for parsing. + * If `keep_default_na` is False, and `na_values` are specified, only + the NaN values specified `na_values` are used for parsing. + * If `keep_default_na` is False, and `na_values` are not specified, no + strings will be parsed as NaN. + + Note that if `na_filter` is passed in as False, the `keep_default_na` and + `na_values` parameters will be ignored. +na_filter : bool, default True + Detect missing value markers (empty strings and the value of na_values). In + data without any NAs, passing na_filter=False can improve the performance + of reading a large file. +verbose : bool, default False + Indicate number of NA values placed in non-numeric columns. +parse_dates : bool, list-like, or dict, default False + The behavior is as follows: + + * bool. If True -> try parsing the index. + * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 + each as a separate date column. + * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as + a single date column. + * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call + result 'foo' + + If a column or index contains an unparseable date, the entire column or + index will be returned unaltered as an object data type. If you don`t want to + parse some cells as date just change their type in Excel to "Text". + For non-standard datetime parsing, use ``pd.to_datetime`` after ``pd.read_excel``. + + Note: A fast-path exists for iso8601-formatted dates. +date_parser : function, optional + Function to use for converting a sequence of string columns to an array of + datetime instances. The default uses ``dateutil.parser.parser`` to do the + conversion. Pandas will try to call `date_parser` in three different ways, + advancing to the next if an exception occurs: 1) Pass one or more arrays + (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the + string values from the columns defined by `parse_dates` into a single array + and pass that; and 3) call `date_parser` once for each row using one or + more strings (corresponding to the columns defined by `parse_dates`) as + arguments. +thousands : str, default None + Thousands separator for parsing string columns to numeric. Note that + this parameter is only necessary for columns stored as TEXT in Excel, + any numeric columns will automatically be parsed, regardless of display + format. +comment : str, default None + Comments out remainder of line. Pass a character or characters to this + argument to indicate comments in the input file. Any data between the + comment string and the end of the current line is ignored. +skipfooter : int, default 0 + Rows at the end to skip (0-indexed). +convert_float : bool, default True + Convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric + data will be read in as floats: Excel stores all numbers as floats + internally. +mangle_dupe_cols : bool, default True + Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than + 'X'...'X'. Passing in False will cause data to be overwritten if there + are duplicate names in the columns. +**kwds : optional + Optional keyword arguments can be passed to ``TextFileReader``. + +Returns +------- +DataFrame or dict of DataFrames + DataFrame from the passed in Excel file. See notes in sheet_name + argument for more information on when a dict of DataFrames is returned. + +See Also +-------- +to_excel : Write DataFrame to an Excel file. +to_csv : Write DataFrame to a comma-separated values (csv) file. +read_csv : Read a comma-separated values (csv) file into DataFrame. +read_fwf : Read a table of fixed-width formatted lines into DataFrame. + +Examples +-------- +The file can be read using the file name as string or an open file object: + +>>> pd.read_excel('tmp.xlsx', index_col=0) # doctest: +SKIP + Name Value +0 string1 1 +1 string2 2 +2 #Comment 3 + +>>> pd.read_excel(open('tmp.xlsx', 'rb'), +... sheet_name='Sheet3') # doctest: +SKIP + Unnamed: 0 Name Value +0 0 string1 1 +1 1 string2 2 +2 2 #Comment 3 + +Index and header can be specified via the `index_col` and `header` arguments + +>>> pd.read_excel('tmp.xlsx', index_col=None, header=None) # doctest: +SKIP + 0 1 2 +0 NaN Name Value +1 0.0 string1 1 +2 1.0 string2 2 +3 2.0 #Comment 3 + +Column types are inferred but can be explicitly specified + +>>> pd.read_excel('tmp.xlsx', index_col=0, +... dtype={'Name': str, 'Value': float}) # doctest: +SKIP + Name Value +0 string1 1.0 +1 string2 2.0 +2 #Comment 3.0 + +True, False, and NA values, and thousands separators have defaults, +but can be explicitly specified, too. Supply the values you would like +as strings or lists of strings! + +>>> pd.read_excel('tmp.xlsx', index_col=0, +... na_values=['string1', 'string2']) # doctest: +SKIP + Name Value +0 NaN 1 +1 NaN 2 +2 #Comment 3 + +Comment lines in the excel input file can be skipped using the `comment` kwarg + +>>> pd.read_excel('tmp.xlsx', index_col=0, comment='#') # doctest: +SKIP + Name Value +0 string1 1.0 +1 string2 2.0 +2 None NaN +""" +) + + +@Appender(_read_excel_doc) +def read_excel( + io, + sheet_name=0, + header=0, + names=None, + index_col=None, + usecols=None, + squeeze=False, + dtype=None, + engine=None, + converters=None, + true_values=None, + false_values=None, + skiprows=None, + nrows=None, + na_values=None, + keep_default_na=True, + verbose=False, + parse_dates=False, + date_parser=None, + thousands=None, + comment=None, + skipfooter=0, + convert_float=True, + mangle_dupe_cols=True, + **kwds, +): + + for arg in ("sheet", "sheetname", "parse_cols"): + if arg in kwds: + raise TypeError(f"read_excel() got an unexpected keyword argument `{arg}`") + + if not isinstance(io, ExcelFile): + io = ExcelFile(io, engine=engine) + elif engine and engine != io.engine: + raise ValueError( + "Engine should not be specified when passing " + "an ExcelFile - ExcelFile already has the engine set" + ) + + return io.parse( + sheet_name=sheet_name, + header=header, + names=names, + index_col=index_col, + usecols=usecols, + squeeze=squeeze, + dtype=dtype, + converters=converters, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + keep_default_na=keep_default_na, + verbose=verbose, + parse_dates=parse_dates, + date_parser=date_parser, + thousands=thousands, + comment=comment, + skipfooter=skipfooter, + convert_float=convert_float, + mangle_dupe_cols=mangle_dupe_cols, + **kwds, + ) + + +class _BaseExcelReader(metaclass=abc.ABCMeta): + def __init__(self, filepath_or_buffer): + # If filepath_or_buffer is a url, load the data into a BytesIO + if is_url(filepath_or_buffer): + filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read()) + elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)): + filepath_or_buffer, _, _, _ = get_filepath_or_buffer(filepath_or_buffer) + + if isinstance(filepath_or_buffer, self._workbook_class): + self.book = filepath_or_buffer + elif hasattr(filepath_or_buffer, "read"): + # N.B. xlrd.Book has a read attribute too + filepath_or_buffer.seek(0) + self.book = self.load_workbook(filepath_or_buffer) + elif isinstance(filepath_or_buffer, str): + self.book = self.load_workbook(filepath_or_buffer) + elif isinstance(filepath_or_buffer, bytes): + self.book = self.load_workbook(BytesIO(filepath_or_buffer)) + else: + raise ValueError( + "Must explicitly set engine if not passing in buffer or path for io." + ) + + @property + @abc.abstractmethod + def _workbook_class(self): + pass + + @abc.abstractmethod + def load_workbook(self, filepath_or_buffer): + pass + + def close(self): + pass + + @property + @abc.abstractmethod + def sheet_names(self): + pass + + @abc.abstractmethod + def get_sheet_by_name(self, name): + pass + + @abc.abstractmethod + def get_sheet_by_index(self, index): + pass + + @abc.abstractmethod + def get_sheet_data(self, sheet, convert_float): + pass + + def parse( + self, + sheet_name=0, + header=0, + names=None, + index_col=None, + usecols=None, + squeeze=False, + dtype=None, + true_values=None, + false_values=None, + skiprows=None, + nrows=None, + na_values=None, + verbose=False, + parse_dates=False, + date_parser=None, + thousands=None, + comment=None, + skipfooter=0, + convert_float=True, + mangle_dupe_cols=True, + **kwds, + ): + + validate_header_arg(header) + + ret_dict = False + + # Keep sheetname to maintain backwards compatibility. + if isinstance(sheet_name, list): + sheets = sheet_name + ret_dict = True + elif sheet_name is None: + sheets = self.sheet_names + ret_dict = True + else: + sheets = [sheet_name] + + # handle same-type duplicates. + sheets = list(dict.fromkeys(sheets).keys()) + + output = {} + + for asheetname in sheets: + if verbose: + print(f"Reading sheet {asheetname}") + + if isinstance(asheetname, str): + sheet = self.get_sheet_by_name(asheetname) + else: # assume an integer if not a string + sheet = self.get_sheet_by_index(asheetname) + + data = self.get_sheet_data(sheet, convert_float) + usecols = _maybe_convert_usecols(usecols) + + if not data: + output[asheetname] = DataFrame() + continue + + if is_list_like(header) and len(header) == 1: + header = header[0] + + # forward fill and pull out names for MultiIndex column + header_names = None + if header is not None and is_list_like(header): + header_names = [] + control_row = [True] * len(data[0]) + + for row in header: + if is_integer(skiprows): + row += skiprows + + data[row], control_row = _fill_mi_header(data[row], control_row) + + if index_col is not None: + header_name, _ = _pop_header_name(data[row], index_col) + header_names.append(header_name) + + if is_list_like(index_col): + # Forward fill values for MultiIndex index. + if not is_list_like(header): + offset = 1 + header + else: + offset = 1 + max(header) + + # Check if we have an empty dataset + # before trying to collect data. + if offset < len(data): + for col in index_col: + last = data[offset][col] + + for row in range(offset + 1, len(data)): + if data[row][col] == "" or data[row][col] is None: + data[row][col] = last + else: + last = data[row][col] + + has_index_names = is_list_like(header) and len(header) > 1 + + # GH 12292 : error when read one empty column from excel file + try: + parser = TextParser( + data, + names=names, + header=header, + index_col=index_col, + has_index_names=has_index_names, + squeeze=squeeze, + dtype=dtype, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + parse_dates=parse_dates, + date_parser=date_parser, + thousands=thousands, + comment=comment, + skipfooter=skipfooter, + usecols=usecols, + mangle_dupe_cols=mangle_dupe_cols, + **kwds, + ) + + output[asheetname] = parser.read(nrows=nrows) + + if not squeeze or isinstance(output[asheetname], DataFrame): + if header_names: + output[asheetname].columns = output[ + asheetname + ].columns.set_names(header_names) + + except EmptyDataError: + # No Data, return an empty DataFrame + output[asheetname] = DataFrame() + + if ret_dict: + return output + else: + return output[asheetname] + + +class ExcelWriter(metaclass=abc.ABCMeta): + """ + Class for writing DataFrame objects into excel sheets. + + Default is to use xlwt for xls, openpyxl for xlsx. + See DataFrame.to_excel for typical usage. + + Parameters + ---------- + path : str + Path to xls or xlsx file. + engine : str (optional) + Engine to use for writing. If None, defaults to + ``io.excel..writer``. NOTE: can only be passed as a keyword + argument. + date_format : str, default None + Format string for dates written into Excel files (e.g. 'YYYY-MM-DD'). + datetime_format : str, default None + Format string for datetime objects written into Excel files. + (e.g. 'YYYY-MM-DD HH:MM:SS'). + mode : {'w', 'a'}, default 'w' + File mode to use (write or append). + + .. versionadded:: 0.24.0 + + Attributes + ---------- + None + + Methods + ------- + None + + Notes + ----- + None of the methods and properties are considered public. + + For compatibility with CSV writers, ExcelWriter serializes lists + and dicts to strings before writing. + + Examples + -------- + Default usage: + + >>> with ExcelWriter('path_to_file.xlsx') as writer: + ... df.to_excel(writer) + + To write to separate sheets in a single file: + + >>> with ExcelWriter('path_to_file.xlsx') as writer: + ... df1.to_excel(writer, sheet_name='Sheet1') + ... df2.to_excel(writer, sheet_name='Sheet2') + + You can set the date format or datetime format: + + >>> with ExcelWriter('path_to_file.xlsx', + date_format='YYYY-MM-DD', + datetime_format='YYYY-MM-DD HH:MM:SS') as writer: + ... df.to_excel(writer) + + You can also append to an existing Excel file: + + >>> with ExcelWriter('path_to_file.xlsx', mode='a') as writer: + ... df.to_excel(writer, sheet_name='Sheet3') + """ + + # Defining an ExcelWriter implementation (see abstract methods for more...) + + # - Mandatory + # - ``write_cells(self, cells, sheet_name=None, startrow=0, startcol=0)`` + # --> called to write additional DataFrames to disk + # - ``supported_extensions`` (tuple of supported extensions), used to + # check that engine supports the given extension. + # - ``engine`` - string that gives the engine name. Necessary to + # instantiate class directly and bypass ``ExcelWriterMeta`` engine + # lookup. + # - ``save(self)`` --> called to save file to disk + # - Mostly mandatory (i.e. should at least exist) + # - book, cur_sheet, path + + # - Optional: + # - ``__init__(self, path, engine=None, **kwargs)`` --> always called + # with path as first argument. + + # You also need to register the class with ``register_writer()``. + # Technically, ExcelWriter implementations don't need to subclass + # ExcelWriter. + def __new__(cls, path, engine=None, **kwargs): + # only switch class if generic(ExcelWriter) + + if cls is ExcelWriter: + if engine is None or (isinstance(engine, str) and engine == "auto"): + if isinstance(path, str): + ext = os.path.splitext(path)[-1][1:] + else: + ext = "xlsx" + + try: + engine = config.get_option(f"io.excel.{ext}.writer") + if engine == "auto": + engine = _get_default_writer(ext) + except KeyError: + raise ValueError(f"No engine for filetype: '{ext}'") + cls = get_writer(engine) + + return object.__new__(cls) + + # declare external properties you can count on + book = None + curr_sheet = None + path = None + + @property + @abc.abstractmethod + def supported_extensions(self): + """Extensions that writer engine supports.""" + pass + + @property + @abc.abstractmethod + def engine(self): + """Name of engine.""" + pass + + @abc.abstractmethod + def write_cells( + self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None + ): + """ + Write given formatted cells into Excel an excel sheet + + Parameters + ---------- + cells : generator + cell of formatted data to save to Excel sheet + sheet_name : str, default None + Name of Excel sheet, if None, then use self.cur_sheet + startrow : upper left cell row to dump data frame + startcol : upper left cell column to dump data frame + freeze_panes: int tuple of length 2 + contains the bottom-most row and right-most column to freeze + """ + pass + + @abc.abstractmethod + def save(self): + """ + Save workbook to disk. + """ + pass + + def __init__( + self, + path, + engine=None, + date_format=None, + datetime_format=None, + mode="w", + **engine_kwargs, + ): + # validate that this engine can handle the extension + if isinstance(path, str): + ext = os.path.splitext(path)[-1] + else: + ext = "xls" if engine == "xlwt" else "xlsx" + + self.check_extension(ext) + + self.path = path + self.sheets = {} + self.cur_sheet = None + + if date_format is None: + self.date_format = "YYYY-MM-DD" + else: + self.date_format = date_format + if datetime_format is None: + self.datetime_format = "YYYY-MM-DD HH:MM:SS" + else: + self.datetime_format = datetime_format + + self.mode = mode + + def __fspath__(self): + return stringify_path(self.path) + + def _get_sheet_name(self, sheet_name): + if sheet_name is None: + sheet_name = self.cur_sheet + if sheet_name is None: # pragma: no cover + raise ValueError("Must pass explicit sheet_name or set cur_sheet property") + return sheet_name + + def _value_with_fmt(self, val): + """Convert numpy types to Python types for the Excel writers. + + Parameters + ---------- + val : object + Value to be written into cells + + Returns + ------- + Tuple with the first element being the converted value and the second + being an optional format + """ + fmt = None + + if is_integer(val): + val = int(val) + elif is_float(val): + val = float(val) + elif is_bool(val): + val = bool(val) + elif isinstance(val, datetime): + fmt = self.datetime_format + elif isinstance(val, date): + fmt = self.date_format + elif isinstance(val, timedelta): + val = val.total_seconds() / float(86400) + fmt = "0" + else: + val = str(val) + + return val, fmt + + @classmethod + def check_extension(cls, ext): + """checks that path's extension against the Writer's supported + extensions. If it isn't supported, raises UnsupportedFiletypeError.""" + if ext.startswith("."): + ext = ext[1:] + if not any(ext in extension for extension in cls.supported_extensions): + msg = "Invalid extension for engine" + f"'{pprint_thing(cls.engine)}': '{pprint_thing(ext)}'" + raise ValueError(msg) + else: + return True + + # Allow use as a contextmanager + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def close(self): + """synonym for save, to make it more file-like""" + return self.save() + + +class ExcelFile: + """ + Class for parsing tabular excel sheets into DataFrame objects. + Uses xlrd. See read_excel for more documentation + + Parameters + ---------- + io : str, path object (pathlib.Path or py._path.local.LocalPath), + a file-like object, xlrd workbook or openpypl workbook. + If a string or path object, expected to be a path to xls, xlsx or odf file. + engine : str, default None + If io is not a buffer or path, this must be set to identify io. + Acceptable values are None, ``xlrd``, ``openpyxl``, ``odf``, or ``pyxlsb``. + Note that ``odf`` reads tables out of OpenDocument formatted files. + """ + + from pandas.io.excel._odfreader import _ODFReader + from pandas.io.excel._openpyxl import _OpenpyxlReader + from pandas.io.excel._xlrd import _XlrdReader + from pandas.io.excel._pyxlsb import _PyxlsbReader + + _engines = { + "xlrd": _XlrdReader, + "openpyxl": _OpenpyxlReader, + "odf": _ODFReader, + "pyxlsb": _PyxlsbReader, + } + + def __init__(self, io, engine=None): + if engine is None: + engine = "xlrd" + if engine not in self._engines: + raise ValueError(f"Unknown engine: {engine}") + + self.engine = engine + # could be a str, ExcelFile, Book, etc. + self.io = io + # Always a string + self._io = stringify_path(io) + + self._reader = self._engines[engine](self._io) + + def __fspath__(self): + return self._io + + def parse( + self, + sheet_name=0, + header=0, + names=None, + index_col=None, + usecols=None, + squeeze=False, + converters=None, + true_values=None, + false_values=None, + skiprows=None, + nrows=None, + na_values=None, + parse_dates=False, + date_parser=None, + thousands=None, + comment=None, + skipfooter=0, + convert_float=True, + mangle_dupe_cols=True, + **kwds, + ): + """ + Parse specified sheet(s) into a DataFrame. + + Equivalent to read_excel(ExcelFile, ...) See the read_excel + docstring for more info on accepted parameters. + + Returns + ------- + DataFrame or dict of DataFrames + DataFrame from the passed in Excel file. + """ + if "chunksize" in kwds: + raise NotImplementedError( + "chunksize keyword of read_excel is not implemented" + ) + + return self._reader.parse( + sheet_name=sheet_name, + header=header, + names=names, + index_col=index_col, + usecols=usecols, + squeeze=squeeze, + converters=converters, + true_values=true_values, + false_values=false_values, + skiprows=skiprows, + nrows=nrows, + na_values=na_values, + parse_dates=parse_dates, + date_parser=date_parser, + thousands=thousands, + comment=comment, + skipfooter=skipfooter, + convert_float=convert_float, + mangle_dupe_cols=mangle_dupe_cols, + **kwds, + ) + + @property + def book(self): + return self._reader.book + + @property + def sheet_names(self): + return self._reader.sheet_names + + def close(self): + """close io if necessary""" + self._reader.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def __del__(self): + # Ensure we don't leak file descriptors, but put in try/except in case + # attributes are already deleted + try: + self.close() + except AttributeError: + pass diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py new file mode 100644 index 00000000..ec5f6fcb --- /dev/null +++ b/pandas/io/excel/_odfreader.py @@ -0,0 +1,181 @@ +from typing import List + +from pandas._typing import FilePathOrBuffer, Scalar +from pandas.compat._optional import import_optional_dependency + +import pandas as pd + +from pandas.io.excel._base import _BaseExcelReader + + +class _ODFReader(_BaseExcelReader): + """ + Read tables out of OpenDocument formatted files. + + Parameters + ---------- + filepath_or_buffer: string, path to be parsed or + an open readable stream. + """ + + def __init__(self, filepath_or_buffer: FilePathOrBuffer): + import_optional_dependency("odf") + super().__init__(filepath_or_buffer) + + @property + def _workbook_class(self): + from odf.opendocument import OpenDocument + + return OpenDocument + + def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): + from odf.opendocument import load + + return load(filepath_or_buffer) + + @property + def empty_value(self) -> str: + """Property for compat with other readers.""" + return "" + + @property + def sheet_names(self) -> List[str]: + """Return a list of sheet names present in the document""" + from odf.table import Table + + tables = self.book.getElementsByType(Table) + return [t.getAttribute("name") for t in tables] + + def get_sheet_by_index(self, index: int): + from odf.table import Table + + tables = self.book.getElementsByType(Table) + return tables[index] + + def get_sheet_by_name(self, name: str): + from odf.table import Table + + tables = self.book.getElementsByType(Table) + + for table in tables: + if table.getAttribute("name") == name: + return table + + raise ValueError(f"sheet {name} not found") + + def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: + """Parse an ODF Table into a list of lists + """ + from odf.table import CoveredTableCell, TableCell, TableRow + + covered_cell_name = CoveredTableCell().qname + table_cell_name = TableCell().qname + cell_names = {covered_cell_name, table_cell_name} + + sheet_rows = sheet.getElementsByType(TableRow) + empty_rows = 0 + max_row_len = 0 + + table: List[List[Scalar]] = [] + + for i, sheet_row in enumerate(sheet_rows): + sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names] + empty_cells = 0 + table_row: List[Scalar] = [] + + for j, sheet_cell in enumerate(sheet_cells): + if sheet_cell.qname == table_cell_name: + value = self._get_cell_value(sheet_cell, convert_float) + else: + value = self.empty_value + + column_repeat = self._get_column_repeat(sheet_cell) + + # Queue up empty values, writing only if content succeeds them + if value == self.empty_value: + empty_cells += column_repeat + else: + table_row.extend([self.empty_value] * empty_cells) + empty_cells = 0 + table_row.extend([value] * column_repeat) + + if max_row_len < len(table_row): + max_row_len = len(table_row) + + row_repeat = self._get_row_repeat(sheet_row) + if self._is_empty_row(sheet_row): + empty_rows += row_repeat + else: + # add blank rows to our table + table.extend([[self.empty_value]] * empty_rows) + empty_rows = 0 + for _ in range(row_repeat): + table.append(table_row) + + # Make our table square + for row in table: + if len(row) < max_row_len: + row.extend([self.empty_value] * (max_row_len - len(row))) + + return table + + def _get_row_repeat(self, row) -> int: + """Return number of times this row was repeated + Repeating an empty row appeared to be a common way + of representing sparse rows in the table. + """ + from odf.namespaces import TABLENS + + return int(row.attributes.get((TABLENS, "number-rows-repeated"), 1)) + + def _get_column_repeat(self, cell) -> int: + from odf.namespaces import TABLENS + + return int(cell.attributes.get((TABLENS, "number-columns-repeated"), 1)) + + def _is_empty_row(self, row) -> bool: + """Helper function to find empty rows + """ + for column in row.childNodes: + if len(column.childNodes) > 0: + return False + + return True + + def _get_cell_value(self, cell, convert_float: bool) -> Scalar: + from odf.namespaces import OFFICENS + + cell_type = cell.attributes.get((OFFICENS, "value-type")) + if cell_type == "boolean": + if str(cell) == "TRUE": + return True + return False + if cell_type is None: + return self.empty_value + elif cell_type == "float": + # GH5394 + cell_value = float(cell.attributes.get((OFFICENS, "value"))) + + if cell_value == 0.0: # NA handling + return str(cell) + + if convert_float: + val = int(cell_value) + if val == cell_value: + return val + return cell_value + elif cell_type == "percentage": + cell_value = cell.attributes.get((OFFICENS, "value")) + return float(cell_value) + elif cell_type == "string": + return str(cell) + elif cell_type == "currency": + cell_value = cell.attributes.get((OFFICENS, "value")) + return float(cell_value) + elif cell_type == "date": + cell_value = cell.attributes.get((OFFICENS, "date-value")) + return pd.to_datetime(cell_value) + elif cell_type == "time": + return pd.to_datetime(str(cell)).time() + else: + raise ValueError(f"Unrecognized type {cell_type}") diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py new file mode 100644 index 00000000..c4327316 --- /dev/null +++ b/pandas/io/excel/_openpyxl.py @@ -0,0 +1,542 @@ +from typing import List + +import numpy as np + +from pandas._typing import FilePathOrBuffer, Scalar +from pandas.compat._optional import import_optional_dependency + +from pandas.io.excel._base import ExcelWriter, _BaseExcelReader +from pandas.io.excel._util import _validate_freeze_panes + + +class _OpenpyxlWriter(ExcelWriter): + engine = "openpyxl" + supported_extensions = (".xlsx", ".xlsm") + + def __init__(self, path, engine=None, mode="w", **engine_kwargs): + # Use the openpyxl module as the Excel writer. + from openpyxl.workbook import Workbook + + super().__init__(path, mode=mode, **engine_kwargs) + + if self.mode == "a": # Load from existing workbook + from openpyxl import load_workbook + + book = load_workbook(self.path) + self.book = book + else: + # Create workbook object with default optimized_write=True. + self.book = Workbook() + + if self.book.worksheets: + try: + self.book.remove(self.book.worksheets[0]) + except AttributeError: + + # compat - for openpyxl <= 2.4 + self.book.remove_sheet(self.book.worksheets[0]) + + def save(self): + """ + Save workbook to disk. + """ + return self.book.save(self.path) + + @classmethod + def _convert_to_style(cls, style_dict): + """ + Converts a style_dict to an openpyxl style object. + + Parameters + ---------- + style_dict : style dictionary to convert + """ + + from openpyxl.style import Style + + xls_style = Style() + for key, value in style_dict.items(): + for nk, nv in value.items(): + if key == "borders": + ( + xls_style.borders.__getattribute__(nk).__setattr__( + "border_style", nv + ) + ) + else: + xls_style.__getattribute__(key).__setattr__(nk, nv) + + return xls_style + + @classmethod + def _convert_to_style_kwargs(cls, style_dict): + """ + Convert a style_dict to a set of kwargs suitable for initializing + or updating-on-copy an openpyxl v2 style object. + + Parameters + ---------- + style_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'font' + 'fill' + 'border' ('borders') + 'alignment' + 'number_format' + 'protection' + + Returns + ------- + style_kwargs : dict + A dict with the same, normalized keys as ``style_dict`` but each + value has been replaced with a native openpyxl style object of the + appropriate class. + """ + + _style_key_map = {"borders": "border"} + + style_kwargs = {} + for k, v in style_dict.items(): + if k in _style_key_map: + k = _style_key_map[k] + _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None) + new_v = _conv_to_x(v) + if new_v: + style_kwargs[k] = new_v + + return style_kwargs + + @classmethod + def _convert_to_color(cls, color_spec): + """ + Convert ``color_spec`` to an openpyxl v2 Color object. + + Parameters + ---------- + color_spec : str, dict + A 32-bit ARGB hex string, or a dict with zero or more of the + following keys. + 'rgb' + 'indexed' + 'auto' + 'theme' + 'tint' + 'index' + 'type' + + Returns + ------- + color : openpyxl.styles.Color + """ + + from openpyxl.styles import Color + + if isinstance(color_spec, str): + return Color(color_spec) + else: + return Color(**color_spec) + + @classmethod + def _convert_to_font(cls, font_dict): + """ + Convert ``font_dict`` to an openpyxl v2 Font object. + + Parameters + ---------- + font_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'name' + 'size' ('sz') + 'bold' ('b') + 'italic' ('i') + 'underline' ('u') + 'strikethrough' ('strike') + 'color' + 'vertAlign' ('vertalign') + 'charset' + 'scheme' + 'family' + 'outline' + 'shadow' + 'condense' + + Returns + ------- + font : openpyxl.styles.Font + """ + + from openpyxl.styles import Font + + _font_key_map = { + "sz": "size", + "b": "bold", + "i": "italic", + "u": "underline", + "strike": "strikethrough", + "vertalign": "vertAlign", + } + + font_kwargs = {} + for k, v in font_dict.items(): + if k in _font_key_map: + k = _font_key_map[k] + if k == "color": + v = cls._convert_to_color(v) + font_kwargs[k] = v + + return Font(**font_kwargs) + + @classmethod + def _convert_to_stop(cls, stop_seq): + """ + Convert ``stop_seq`` to a list of openpyxl v2 Color objects, + suitable for initializing the ``GradientFill`` ``stop`` parameter. + + Parameters + ---------- + stop_seq : iterable + An iterable that yields objects suitable for consumption by + ``_convert_to_color``. + + Returns + ------- + stop : list of openpyxl.styles.Color + """ + + return map(cls._convert_to_color, stop_seq) + + @classmethod + def _convert_to_fill(cls, fill_dict): + """ + Convert ``fill_dict`` to an openpyxl v2 Fill object. + + Parameters + ---------- + fill_dict : dict + A dict with one or more of the following keys (or their synonyms), + 'fill_type' ('patternType', 'patterntype') + 'start_color' ('fgColor', 'fgcolor') + 'end_color' ('bgColor', 'bgcolor') + or one or more of the following keys (or their synonyms). + 'type' ('fill_type') + 'degree' + 'left' + 'right' + 'top' + 'bottom' + 'stop' + + Returns + ------- + fill : openpyxl.styles.Fill + """ + + from openpyxl.styles import PatternFill, GradientFill + + _pattern_fill_key_map = { + "patternType": "fill_type", + "patterntype": "fill_type", + "fgColor": "start_color", + "fgcolor": "start_color", + "bgColor": "end_color", + "bgcolor": "end_color", + } + + _gradient_fill_key_map = {"fill_type": "type"} + + pfill_kwargs = {} + gfill_kwargs = {} + for k, v in fill_dict.items(): + pk = gk = None + if k in _pattern_fill_key_map: + pk = _pattern_fill_key_map[k] + if k in _gradient_fill_key_map: + gk = _gradient_fill_key_map[k] + if pk in ["start_color", "end_color"]: + v = cls._convert_to_color(v) + if gk == "stop": + v = cls._convert_to_stop(v) + if pk: + pfill_kwargs[pk] = v + elif gk: + gfill_kwargs[gk] = v + else: + pfill_kwargs[k] = v + gfill_kwargs[k] = v + + try: + return PatternFill(**pfill_kwargs) + except TypeError: + return GradientFill(**gfill_kwargs) + + @classmethod + def _convert_to_side(cls, side_spec): + """ + Convert ``side_spec`` to an openpyxl v2 Side object. + + Parameters + ---------- + side_spec : str, dict + A string specifying the border style, or a dict with zero or more + of the following keys (or their synonyms). + 'style' ('border_style') + 'color' + + Returns + ------- + side : openpyxl.styles.Side + """ + + from openpyxl.styles import Side + + _side_key_map = {"border_style": "style"} + + if isinstance(side_spec, str): + return Side(style=side_spec) + + side_kwargs = {} + for k, v in side_spec.items(): + if k in _side_key_map: + k = _side_key_map[k] + if k == "color": + v = cls._convert_to_color(v) + side_kwargs[k] = v + + return Side(**side_kwargs) + + @classmethod + def _convert_to_border(cls, border_dict): + """ + Convert ``border_dict`` to an openpyxl v2 Border object. + + Parameters + ---------- + border_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'left' + 'right' + 'top' + 'bottom' + 'diagonal' + 'diagonal_direction' + 'vertical' + 'horizontal' + 'diagonalUp' ('diagonalup') + 'diagonalDown' ('diagonaldown') + 'outline' + + Returns + ------- + border : openpyxl.styles.Border + """ + + from openpyxl.styles import Border + + _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"} + + border_kwargs = {} + for k, v in border_dict.items(): + if k in _border_key_map: + k = _border_key_map[k] + if k == "color": + v = cls._convert_to_color(v) + if k in ["left", "right", "top", "bottom", "diagonal"]: + v = cls._convert_to_side(v) + border_kwargs[k] = v + + return Border(**border_kwargs) + + @classmethod + def _convert_to_alignment(cls, alignment_dict): + """ + Convert ``alignment_dict`` to an openpyxl v2 Alignment object. + + Parameters + ---------- + alignment_dict : dict + A dict with zero or more of the following keys (or their synonyms). + 'horizontal' + 'vertical' + 'text_rotation' + 'wrap_text' + 'shrink_to_fit' + 'indent' + Returns + ------- + alignment : openpyxl.styles.Alignment + """ + + from openpyxl.styles import Alignment + + return Alignment(**alignment_dict) + + @classmethod + def _convert_to_number_format(cls, number_format_dict): + """ + Convert ``number_format_dict`` to an openpyxl v2.1.0 number format + initializer. + Parameters + ---------- + number_format_dict : dict + A dict with zero or more of the following keys. + 'format_code' : str + Returns + ------- + number_format : str + """ + return number_format_dict["format_code"] + + @classmethod + def _convert_to_protection(cls, protection_dict): + """ + Convert ``protection_dict`` to an openpyxl v2 Protection object. + Parameters + ---------- + protection_dict : dict + A dict with zero or more of the following keys. + 'locked' + 'hidden' + Returns + ------- + """ + + from openpyxl.styles import Protection + + return Protection(**protection_dict) + + def write_cells( + self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None + ): + # Write the frame cells using openpyxl. + sheet_name = self._get_sheet_name(sheet_name) + + _style_cache = {} + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.create_sheet() + wks.title = sheet_name + self.sheets[sheet_name] = wks + + if _validate_freeze_panes(freeze_panes): + wks.freeze_panes = wks.cell( + row=freeze_panes[0] + 1, column=freeze_panes[1] + 1 + ) + + for cell in cells: + xcell = wks.cell( + row=startrow + cell.row + 1, column=startcol + cell.col + 1 + ) + xcell.value, fmt = self._value_with_fmt(cell.val) + if fmt: + xcell.number_format = fmt + + style_kwargs = {} + if cell.style: + key = str(cell.style) + style_kwargs = _style_cache.get(key) + if style_kwargs is None: + style_kwargs = self._convert_to_style_kwargs(cell.style) + _style_cache[key] = style_kwargs + + if style_kwargs: + for k, v in style_kwargs.items(): + setattr(xcell, k, v) + + if cell.mergestart is not None and cell.mergeend is not None: + + wks.merge_cells( + start_row=startrow + cell.row + 1, + start_column=startcol + cell.col + 1, + end_column=startcol + cell.mergeend + 1, + end_row=startrow + cell.mergestart + 1, + ) + + # When cells are merged only the top-left cell is preserved + # The behaviour of the other cells in a merged range is + # undefined + if style_kwargs: + first_row = startrow + cell.row + 1 + last_row = startrow + cell.mergestart + 1 + first_col = startcol + cell.col + 1 + last_col = startcol + cell.mergeend + 1 + + for row in range(first_row, last_row + 1): + for col in range(first_col, last_col + 1): + if row == first_row and col == first_col: + # Ignore first cell. It is already handled. + continue + xcell = wks.cell(column=col, row=row) + for k, v in style_kwargs.items(): + setattr(xcell, k, v) + + +class _OpenpyxlReader(_BaseExcelReader): + def __init__(self, filepath_or_buffer: FilePathOrBuffer) -> None: + """Reader using openpyxl engine. + + Parameters + ---------- + filepath_or_buffer : string, path object or Workbook + Object to be parsed. + """ + import_optional_dependency("openpyxl") + super().__init__(filepath_or_buffer) + + @property + def _workbook_class(self): + from openpyxl import Workbook + + return Workbook + + def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): + from openpyxl import load_workbook + + return load_workbook( + filepath_or_buffer, read_only=True, data_only=True, keep_links=False + ) + + def close(self): + # https://stackoverflow.com/questions/31416842/ + # openpyxl-does-not-close-excel-workbook-in-read-only-mode + self.book.close() + + @property + def sheet_names(self) -> List[str]: + return self.book.sheetnames + + def get_sheet_by_name(self, name: str): + return self.book[name] + + def get_sheet_by_index(self, index: int): + return self.book.worksheets[index] + + def _convert_cell(self, cell, convert_float: bool) -> Scalar: + + # TODO: replace with openpyxl constants + if cell.is_date: + return cell.value + elif cell.data_type == "e": + return np.nan + elif cell.data_type == "b": + return bool(cell.value) + elif cell.value is None: + return "" # compat with xlrd + elif cell.data_type == "n": + # GH5394 + if convert_float: + val = int(cell.value) + if val == cell.value: + return val + else: + return float(cell.value) + + return cell.value + + def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: + data: List[List[Scalar]] = [] + for row in sheet.rows: + data.append([self._convert_cell(cell, convert_float) for cell in row]) + + return data diff --git a/pandas/io/excel/_pyxlsb.py b/pandas/io/excel/_pyxlsb.py new file mode 100644 index 00000000..df6a3800 --- /dev/null +++ b/pandas/io/excel/_pyxlsb.py @@ -0,0 +1,68 @@ +from typing import List + +from pandas._typing import FilePathOrBuffer, Scalar +from pandas.compat._optional import import_optional_dependency + +from pandas.io.excel._base import _BaseExcelReader + + +class _PyxlsbReader(_BaseExcelReader): + def __init__(self, filepath_or_buffer: FilePathOrBuffer): + """Reader using pyxlsb engine. + + Parameters + __________ + filepath_or_buffer: string, path object, or Workbook + Object to be parsed. + """ + import_optional_dependency("pyxlsb") + # This will call load_workbook on the filepath or buffer + # And set the result to the book-attribute + super().__init__(filepath_or_buffer) + + @property + def _workbook_class(self): + from pyxlsb import Workbook + + return Workbook + + def load_workbook(self, filepath_or_buffer: FilePathOrBuffer): + from pyxlsb import open_workbook + + # Todo: hack in buffer capability + # This might need some modifications to the Pyxlsb library + # Actual work for opening it is in xlsbpackage.py, line 20-ish + + return open_workbook(filepath_or_buffer) + + @property + def sheet_names(self) -> List[str]: + return self.book.sheets + + def get_sheet_by_name(self, name: str): + return self.book.get_sheet(name) + + def get_sheet_by_index(self, index: int): + # pyxlsb sheets are indexed from 1 onwards + # There's a fix for this in the source, but the pypi package doesn't have it + return self.book.get_sheet(index + 1) + + def _convert_cell(self, cell, convert_float: bool) -> Scalar: + # Todo: there is no way to distinguish between floats and datetimes in pyxlsb + # This means that there is no way to read datetime types from an xlsb file yet + if cell.v is None: + return "" # Prevents non-named columns from not showing up as Unnamed: i + if isinstance(cell.v, float) and convert_float: + val = int(cell.v) + if val == cell.v: + return val + else: + return float(cell.v) + + return cell.v + + def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: + return [ + [self._convert_cell(c, convert_float) for c in r] + for r in sheet.rows(sparse=False) + ] diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py new file mode 100644 index 00000000..a084be54 --- /dev/null +++ b/pandas/io/excel/_util.py @@ -0,0 +1,229 @@ +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.common import is_integer, is_list_like + +_writers = {} + + +def register_writer(klass): + """ + Add engine to the excel writer registry.io.excel. + + You must use this method to integrate with ``to_excel``. + + Parameters + ---------- + klass : ExcelWriter + """ + if not callable(klass): + raise ValueError("Can only register callables as engines") + engine_name = klass.engine + _writers[engine_name] = klass + + +def _get_default_writer(ext): + """ + Return the default writer for the given extension. + + Parameters + ---------- + ext : str + The excel file extension for which to get the default engine. + + Returns + ------- + str + The default engine for the extension. + """ + _default_writers = {"xlsx": "openpyxl", "xlsm": "openpyxl", "xls": "xlwt"} + xlsxwriter = import_optional_dependency( + "xlsxwriter", raise_on_missing=False, on_version="warn" + ) + if xlsxwriter: + _default_writers["xlsx"] = "xlsxwriter" + return _default_writers[ext] + + +def get_writer(engine_name): + try: + return _writers[engine_name] + except KeyError: + raise ValueError(f"No Excel writer '{engine_name}'") + + +def _excel2num(x): + """ + Convert Excel column name like 'AB' to 0-based column index. + + Parameters + ---------- + x : str + The Excel column name to convert to a 0-based column index. + + Returns + ------- + num : int + The column index corresponding to the name. + + Raises + ------ + ValueError + Part of the Excel column name was invalid. + """ + index = 0 + + for c in x.upper().strip(): + cp = ord(c) + + if cp < ord("A") or cp > ord("Z"): + raise ValueError(f"Invalid column name: {x}") + + index = index * 26 + cp - ord("A") + 1 + + return index - 1 + + +def _range2cols(areas): + """ + Convert comma separated list of column names and ranges to indices. + + Parameters + ---------- + areas : str + A string containing a sequence of column ranges (or areas). + + Returns + ------- + cols : list + A list of 0-based column indices. + + Examples + -------- + >>> _range2cols('A:E') + [0, 1, 2, 3, 4] + >>> _range2cols('A,C,Z:AB') + [0, 2, 25, 26, 27] + """ + cols = [] + + for rng in areas.split(","): + if ":" in rng: + rng = rng.split(":") + cols.extend(range(_excel2num(rng[0]), _excel2num(rng[1]) + 1)) + else: + cols.append(_excel2num(rng)) + + return cols + + +def _maybe_convert_usecols(usecols): + """ + Convert `usecols` into a compatible format for parsing in `parsers.py`. + + Parameters + ---------- + usecols : object + The use-columns object to potentially convert. + + Returns + ------- + converted : object + The compatible format of `usecols`. + """ + if usecols is None: + return usecols + + if is_integer(usecols): + raise ValueError( + "Passing an integer for `usecols` is no longer supported. " + "Please pass in a list of int from 0 to `usecols` " + "inclusive instead." + ) + + if isinstance(usecols, str): + return _range2cols(usecols) + + return usecols + + +def _validate_freeze_panes(freeze_panes): + if freeze_panes is not None: + if len(freeze_panes) == 2 and all( + isinstance(item, int) for item in freeze_panes + ): + return True + + raise ValueError( + "freeze_panes must be of form (row, column) " + "where row and column are integers" + ) + + # freeze_panes wasn't specified, return False so it won't be applied + # to output sheet + return False + + +def _trim_excel_header(row): + # trim header row so auto-index inference works + # xlrd uses '' , openpyxl None + while len(row) > 0 and (row[0] == "" or row[0] is None): + row = row[1:] + return row + + +def _fill_mi_header(row, control_row): + """Forward fill blank entries in row but only inside the same parent index. + + Used for creating headers in Multiindex. + Parameters + ---------- + row : list + List of items in a single row. + control_row : list of bool + Helps to determine if particular column is in same parent index as the + previous value. Used to stop propagation of empty cells between + different indexes. + + Returns + ------- + Returns changed row and control_row + """ + last = row[0] + for i in range(1, len(row)): + if not control_row[i]: + last = row[i] + + if row[i] == "" or row[i] is None: + row[i] = last + else: + control_row[i] = False + last = row[i] + + return row, control_row + + +def _pop_header_name(row, index_col): + """ + Pop the header name for MultiIndex parsing. + + Parameters + ---------- + row : list + The data row to parse for the header name. + index_col : int, list + The index columns for our data. Assumed to be non-null. + + Returns + ------- + header_name : str + The extracted header name. + trimmed_row : list + The original data row with the header name removed. + """ + # Pop out header name and fill w/blank. + i = index_col if not is_list_like(index_col) else max(index_col) + + header_name = row[i] + header_name = None if header_name == "" else header_name + + return header_name, row[:i] + [""] + row[i + 1 :] diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py new file mode 100644 index 00000000..be1b78ee --- /dev/null +++ b/pandas/io/excel/_xlrd.py @@ -0,0 +1,106 @@ +from datetime import time + +import numpy as np + +from pandas.compat._optional import import_optional_dependency + +from pandas.io.excel._base import _BaseExcelReader + + +class _XlrdReader(_BaseExcelReader): + def __init__(self, filepath_or_buffer): + """Reader using xlrd engine. + + Parameters + ---------- + filepath_or_buffer : string, path object or Workbook + Object to be parsed. + """ + err_msg = "Install xlrd >= 1.0.0 for Excel support" + import_optional_dependency("xlrd", extra=err_msg) + super().__init__(filepath_or_buffer) + + @property + def _workbook_class(self): + from xlrd import Book + + return Book + + def load_workbook(self, filepath_or_buffer): + from xlrd import open_workbook + + if hasattr(filepath_or_buffer, "read"): + data = filepath_or_buffer.read() + return open_workbook(file_contents=data) + else: + return open_workbook(filepath_or_buffer) + + @property + def sheet_names(self): + return self.book.sheet_names() + + def get_sheet_by_name(self, name): + return self.book.sheet_by_name(name) + + def get_sheet_by_index(self, index): + return self.book.sheet_by_index(index) + + def get_sheet_data(self, sheet, convert_float): + from xlrd import ( + xldate, + XL_CELL_DATE, + XL_CELL_ERROR, + XL_CELL_BOOLEAN, + XL_CELL_NUMBER, + ) + + epoch1904 = self.book.datemode + + def _parse_cell(cell_contents, cell_typ): + """converts the contents of the cell into a pandas + appropriate object""" + + if cell_typ == XL_CELL_DATE: + + # Use the newer xlrd datetime handling. + try: + cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904) + except OverflowError: + return cell_contents + + # Excel doesn't distinguish between dates and time, + # so we treat dates on the epoch as times only. + # Also, Excel supports 1900 and 1904 epochs. + year = (cell_contents.timetuple())[0:3] + if (not epoch1904 and year == (1899, 12, 31)) or ( + epoch1904 and year == (1904, 1, 1) + ): + cell_contents = time( + cell_contents.hour, + cell_contents.minute, + cell_contents.second, + cell_contents.microsecond, + ) + + elif cell_typ == XL_CELL_ERROR: + cell_contents = np.nan + elif cell_typ == XL_CELL_BOOLEAN: + cell_contents = bool(cell_contents) + elif convert_float and cell_typ == XL_CELL_NUMBER: + # GH5394 - Excel 'numbers' are always floats + # it's a minimal perf hit and less surprising + val = int(cell_contents) + if val == cell_contents: + cell_contents = val + return cell_contents + + data = [] + + for i in range(sheet.nrows): + row = [ + _parse_cell(value, typ) + for value, typ in zip(sheet.row_values(i), sheet.row_types(i)) + ] + data.append(row) + + return data diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py new file mode 100644 index 00000000..6d9ff9be --- /dev/null +++ b/pandas/io/excel/_xlsxwriter.py @@ -0,0 +1,237 @@ +import pandas._libs.json as json + +from pandas.io.excel._base import ExcelWriter +from pandas.io.excel._util import _validate_freeze_panes + + +class _XlsxStyler: + # Map from openpyxl-oriented styles to flatter xlsxwriter representation + # Ordering necessary for both determinism and because some are keyed by + # prefixes of others. + STYLE_MAPPING = { + "font": [ + (("name",), "font_name"), + (("sz",), "font_size"), + (("size",), "font_size"), + (("color", "rgb"), "font_color"), + (("color",), "font_color"), + (("b",), "bold"), + (("bold",), "bold"), + (("i",), "italic"), + (("italic",), "italic"), + (("u",), "underline"), + (("underline",), "underline"), + (("strike",), "font_strikeout"), + (("vertAlign",), "font_script"), + (("vertalign",), "font_script"), + ], + "number_format": [(("format_code",), "num_format"), ((), "num_format")], + "protection": [(("locked",), "locked"), (("hidden",), "hidden")], + "alignment": [ + (("horizontal",), "align"), + (("vertical",), "valign"), + (("text_rotation",), "rotation"), + (("wrap_text",), "text_wrap"), + (("indent",), "indent"), + (("shrink_to_fit",), "shrink"), + ], + "fill": [ + (("patternType",), "pattern"), + (("patterntype",), "pattern"), + (("fill_type",), "pattern"), + (("start_color", "rgb"), "fg_color"), + (("fgColor", "rgb"), "fg_color"), + (("fgcolor", "rgb"), "fg_color"), + (("start_color",), "fg_color"), + (("fgColor",), "fg_color"), + (("fgcolor",), "fg_color"), + (("end_color", "rgb"), "bg_color"), + (("bgColor", "rgb"), "bg_color"), + (("bgcolor", "rgb"), "bg_color"), + (("end_color",), "bg_color"), + (("bgColor",), "bg_color"), + (("bgcolor",), "bg_color"), + ], + "border": [ + (("color", "rgb"), "border_color"), + (("color",), "border_color"), + (("style",), "border"), + (("top", "color", "rgb"), "top_color"), + (("top", "color"), "top_color"), + (("top", "style"), "top"), + (("top",), "top"), + (("right", "color", "rgb"), "right_color"), + (("right", "color"), "right_color"), + (("right", "style"), "right"), + (("right",), "right"), + (("bottom", "color", "rgb"), "bottom_color"), + (("bottom", "color"), "bottom_color"), + (("bottom", "style"), "bottom"), + (("bottom",), "bottom"), + (("left", "color", "rgb"), "left_color"), + (("left", "color"), "left_color"), + (("left", "style"), "left"), + (("left",), "left"), + ], + } + + @classmethod + def convert(cls, style_dict, num_format_str=None): + """ + converts a style_dict to an xlsxwriter format dict + + Parameters + ---------- + style_dict : style dictionary to convert + num_format_str : optional number format string + """ + + # Create a XlsxWriter format object. + props = {} + + if num_format_str is not None: + props["num_format"] = num_format_str + + if style_dict is None: + return props + + if "borders" in style_dict: + style_dict = style_dict.copy() + style_dict["border"] = style_dict.pop("borders") + + for style_group_key, style_group in style_dict.items(): + for src, dst in cls.STYLE_MAPPING.get(style_group_key, []): + # src is a sequence of keys into a nested dict + # dst is a flat key + if dst in props: + continue + v = style_group + for k in src: + try: + v = v[k] + except (KeyError, TypeError): + break + else: + props[dst] = v + + if isinstance(props.get("pattern"), str): + # TODO: support other fill patterns + props["pattern"] = 0 if props["pattern"] == "none" else 1 + + for k in ["border", "top", "right", "bottom", "left"]: + if isinstance(props.get(k), str): + try: + props[k] = [ + "none", + "thin", + "medium", + "dashed", + "dotted", + "thick", + "double", + "hair", + "mediumDashed", + "dashDot", + "mediumDashDot", + "dashDotDot", + "mediumDashDotDot", + "slantDashDot", + ].index(props[k]) + except ValueError: + props[k] = 2 + + if isinstance(props.get("font_script"), str): + props["font_script"] = ["baseline", "superscript", "subscript"].index( + props["font_script"] + ) + + if isinstance(props.get("underline"), str): + props["underline"] = { + "none": 0, + "single": 1, + "double": 2, + "singleAccounting": 33, + "doubleAccounting": 34, + }[props["underline"]] + + return props + + +class _XlsxWriter(ExcelWriter): + engine = "xlsxwriter" + supported_extensions = (".xlsx",) + + def __init__( + self, + path, + engine=None, + date_format=None, + datetime_format=None, + mode="w", + **engine_kwargs, + ): + # Use the xlsxwriter module as the Excel writer. + import xlsxwriter + + if mode == "a": + raise ValueError("Append mode is not supported with xlsxwriter!") + + super().__init__( + path, + engine=engine, + date_format=date_format, + datetime_format=datetime_format, + mode=mode, + **engine_kwargs, + ) + + self.book = xlsxwriter.Workbook(path, **engine_kwargs) + + def save(self): + """ + Save workbook to disk. + """ + + return self.book.close() + + def write_cells( + self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None + ): + # Write the frame cells using xlsxwriter. + sheet_name = self._get_sheet_name(sheet_name) + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.add_worksheet(sheet_name) + self.sheets[sheet_name] = wks + + style_dict = {"null": None} + + if _validate_freeze_panes(freeze_panes): + wks.freeze_panes(*(freeze_panes)) + + for cell in cells: + val, fmt = self._value_with_fmt(cell.val) + + stylekey = json.dumps(cell.style) + if fmt: + stylekey += fmt + + if stylekey in style_dict: + style = style_dict[stylekey] + else: + style = self.book.add_format(_XlsxStyler.convert(cell.style, fmt)) + style_dict[stylekey] = style + + if cell.mergestart is not None and cell.mergeend is not None: + wks.merge_range( + startrow + cell.row, + startcol + cell.col, + startrow + cell.mergestart, + startcol + cell.mergeend, + val, + style, + ) + else: + wks.write(startrow + cell.row, startcol + cell.col, val, style) diff --git a/pandas/io/excel/_xlwt.py b/pandas/io/excel/_xlwt.py new file mode 100644 index 00000000..d102a885 --- /dev/null +++ b/pandas/io/excel/_xlwt.py @@ -0,0 +1,138 @@ +import pandas._libs.json as json + +from pandas.io.excel._base import ExcelWriter +from pandas.io.excel._util import _validate_freeze_panes + + +class _XlwtWriter(ExcelWriter): + engine = "xlwt" + supported_extensions = (".xls",) + + def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs): + # Use the xlwt module as the Excel writer. + import xlwt + + engine_kwargs["engine"] = engine + + if mode == "a": + raise ValueError("Append mode is not supported with xlwt!") + + super().__init__(path, mode=mode, **engine_kwargs) + + if encoding is None: + encoding = "ascii" + self.book = xlwt.Workbook(encoding=encoding) + self.fm_datetime = xlwt.easyxf(num_format_str=self.datetime_format) + self.fm_date = xlwt.easyxf(num_format_str=self.date_format) + + def save(self): + """ + Save workbook to disk. + """ + return self.book.save(self.path) + + def write_cells( + self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None + ): + # Write the frame cells using xlwt. + + sheet_name = self._get_sheet_name(sheet_name) + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = self.book.add_sheet(sheet_name) + self.sheets[sheet_name] = wks + + if _validate_freeze_panes(freeze_panes): + wks.set_panes_frozen(True) + wks.set_horz_split_pos(freeze_panes[0]) + wks.set_vert_split_pos(freeze_panes[1]) + + style_dict = {} + + for cell in cells: + val, fmt = self._value_with_fmt(cell.val) + + stylekey = json.dumps(cell.style) + if fmt: + stylekey += fmt + + if stylekey in style_dict: + style = style_dict[stylekey] + else: + style = self._convert_to_style(cell.style, fmt) + style_dict[stylekey] = style + + if cell.mergestart is not None and cell.mergeend is not None: + wks.write_merge( + startrow + cell.row, + startrow + cell.mergestart, + startcol + cell.col, + startcol + cell.mergeend, + val, + style, + ) + else: + wks.write(startrow + cell.row, startcol + cell.col, val, style) + + @classmethod + def _style_to_xlwt( + cls, item, firstlevel: bool = True, field_sep=",", line_sep=";" + ) -> str: + """helper which recursively generate an xlwt easy style string + for example: + + hstyle = {"font": {"bold": True}, + "border": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "align": {"horiz": "center"}} + will be converted to + font: bold on; \ + border: top thin, right thin, bottom thin, left thin; \ + align: horiz center; + """ + if hasattr(item, "items"): + if firstlevel: + it = [ + f"{key}: {cls._style_to_xlwt(value, False)}" + for key, value in item.items() + ] + out = f"{(line_sep).join(it)} " + return out + else: + it = [ + f"{key} {cls._style_to_xlwt(value, False)}" + for key, value in item.items() + ] + out = f"{(field_sep).join(it)} " + return out + else: + item = f"{item}" + item = item.replace("True", "on") + item = item.replace("False", "off") + return item + + @classmethod + def _convert_to_style(cls, style_dict, num_format_str=None): + """ + converts a style_dict to an xlwt style object + + Parameters + ---------- + style_dict : style dictionary to convert + num_format_str : optional number format string + """ + import xlwt + + if style_dict: + xlwt_stylestr = cls._style_to_xlwt(style_dict) + style = xlwt.easyxf(xlwt_stylestr, field_sep=",", line_sep=";") + else: + style = xlwt.XFStyle() + if num_format_str is not None: + style.num_format_str = num_format_str + + return style diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py new file mode 100644 index 00000000..eb05004d --- /dev/null +++ b/pandas/io/feather_format.py @@ -0,0 +1,103 @@ +""" feather-format compat """ + +from pandas.compat._optional import import_optional_dependency + +from pandas import DataFrame, Int64Index, RangeIndex + +from pandas.io.common import stringify_path + + +def to_feather(df: DataFrame, path): + """ + Write a DataFrame to the feather-format + + Parameters + ---------- + df : DataFrame + path : string file path, or file-like object + + """ + import_optional_dependency("pyarrow") + from pyarrow import feather + + path = stringify_path(path) + + if not isinstance(df, DataFrame): + raise ValueError("feather only support IO with DataFrames") + + valid_types = {"string", "unicode"} + + # validate index + # -------------- + + # validate that we have only a default index + # raise on anything else as we don't serialize the index + + if not isinstance(df.index, Int64Index): + typ = type(df.index) + raise ValueError( + f"feather does not support serializing {typ} " + "for the index; you can .reset_index() " + "to make the index into column(s)" + ) + + if not df.index.equals(RangeIndex.from_range(range(len(df)))): + raise ValueError( + "feather does not support serializing a " + "non-default index for the index; you " + "can .reset_index() to make the index " + "into column(s)" + ) + + if df.index.name is not None: + raise ValueError( + "feather does not serialize index meta-data on a default index" + ) + + # validate columns + # ---------------- + + # must have value column names (strings only) + if df.columns.inferred_type not in valid_types: + raise ValueError("feather must have string column names") + + feather.write_feather(df, path) + + +def read_feather(path, columns=None, use_threads: bool = True): + """ + Load a feather-format object from the file path. + + Parameters + ---------- + path : str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.feather``. + + If you want to pass in a path object, pandas accepts any + ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handler (e.g. via builtin ``open`` function) + or ``StringIO``. + columns : sequence, default None + If not provided, all columns are read. + + .. versionadded:: 0.24.0 + use_threads : bool, default True + Whether to parallelize reading using multiple threads. + + .. versionadded:: 0.24.0 + + Returns + ------- + type of object stored in file + """ + import_optional_dependency("pyarrow") + from pyarrow import feather + + path = stringify_path(path) + + return feather.read_feather(path, columns=columns, use_threads=bool(use_threads)) diff --git a/pandas/io/formats/__init__.py b/pandas/io/formats/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py new file mode 100644 index 00000000..bed29e1f --- /dev/null +++ b/pandas/io/formats/console.py @@ -0,0 +1,91 @@ +""" +Internal module for console introspection +""" + +from shutil import get_terminal_size + + +def get_console_size(): + """ + Return console size as tuple = (width, height). + + Returns (None,None) in non-interactive session. + """ + from pandas import get_option + + display_width = get_option("display.width") + display_height = get_option("display.max_rows") + + # Consider + # interactive shell terminal, can detect term size + # interactive non-shell terminal (ipnb/ipqtconsole), cannot detect term + # size non-interactive script, should disregard term size + + # in addition + # width,height have default values, but setting to 'None' signals + # should use Auto-Detection, But only in interactive shell-terminal. + # Simple. yeah. + + if in_interactive_session(): + if in_ipython_frontend(): + # sane defaults for interactive non-shell terminal + # match default for width,height in config_init + from pandas._config.config import get_default_val + + terminal_width = get_default_val("display.width") + terminal_height = get_default_val("display.max_rows") + else: + # pure terminal + terminal_width, terminal_height = get_terminal_size() + else: + terminal_width, terminal_height = None, None + + # Note if the User sets width/Height to None (auto-detection) + # and we're in a script (non-inter), this will return (None,None) + # caller needs to deal. + return (display_width or terminal_width, display_height or terminal_height) + + +# ---------------------------------------------------------------------- +# Detect our environment + + +def in_interactive_session(): + """ + Check if we're running in an interactive shell. + + Returns + ------- + bool + True if running under python/ipython interactive shell. + """ + from pandas import get_option + + def check_main(): + try: + import __main__ as main + except ModuleNotFoundError: + return get_option("mode.sim_interactive") + return not hasattr(main, "__file__") or get_option("mode.sim_interactive") + + try: + return __IPYTHON__ or check_main() # noqa + except NameError: + return check_main() + + +def in_ipython_frontend(): + """ + Check if we're inside an an IPython zmq frontend. + + Returns + ------- + bool + """ + try: + ip = get_ipython() # noqa + return "zmq" in str(type(ip)).lower() + except NameError: + pass + + return False diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py new file mode 100644 index 00000000..b40d2a57 --- /dev/null +++ b/pandas/io/formats/css.py @@ -0,0 +1,264 @@ +""" +Utilities for interpreting CSS from Stylers for formatting non-HTML outputs. +""" + +import re +import warnings + + +class CSSWarning(UserWarning): + """ + This CSS syntax cannot currently be parsed. + """ + + pass + + +def _side_expander(prop_fmt: str): + def expand(self, prop, value: str): + tokens = value.split() + try: + mapping = self.SIDE_SHORTHANDS[len(tokens)] + except KeyError: + warnings.warn( + f'Could not expand "{prop}: {value}"', CSSWarning, + ) + return + for key, idx in zip(self.SIDES, mapping): + yield prop_fmt.format(key), tokens[idx] + + return expand + + +class CSSResolver: + """ + A callable for parsing and resolving CSS to atomic properties. + """ + + def __call__(self, declarations_str, inherited=None): + """ + The given declarations to atomic properties. + + Parameters + ---------- + declarations_str : str + A list of CSS declarations + inherited : dict, optional + Atomic properties indicating the inherited style context in which + declarations_str is to be resolved. ``inherited`` should already + be resolved, i.e. valid output of this method. + + Returns + ------- + dict + Atomic CSS 2.2 properties. + + Examples + -------- + >>> resolve = CSSResolver() + >>> inherited = {'font-family': 'serif', 'font-weight': 'bold'} + >>> out = resolve(''' + ... border-color: BLUE RED; + ... font-size: 1em; + ... font-size: 2em; + ... font-weight: normal; + ... font-weight: inherit; + ... ''', inherited) + >>> sorted(out.items()) # doctest: +NORMALIZE_WHITESPACE + [('border-bottom-color', 'blue'), + ('border-left-color', 'red'), + ('border-right-color', 'red'), + ('border-top-color', 'blue'), + ('font-family', 'serif'), + ('font-size', '24pt'), + ('font-weight', 'bold')] + """ + props = dict(self.atomize(self.parse(declarations_str))) + if inherited is None: + inherited = {} + + # 1. resolve inherited, initial + for prop, val in inherited.items(): + if prop not in props: + props[prop] = val + + for prop, val in list(props.items()): + if val == "inherit": + val = inherited.get(prop, "initial") + if val == "initial": + val = None + + if val is None: + # we do not define a complete initial stylesheet + del props[prop] + else: + props[prop] = val + + # 2. resolve relative font size + if props.get("font-size"): + if "font-size" in inherited: + em_pt = inherited["font-size"] + assert em_pt[-2:] == "pt" + em_pt = float(em_pt[:-2]) + else: + em_pt = None + props["font-size"] = self.size_to_pt( + props["font-size"], em_pt, conversions=self.FONT_SIZE_RATIOS + ) + + font_size = float(props["font-size"][:-2]) + else: + font_size = None + + # 3. TODO: resolve other font-relative units + for side in self.SIDES: + prop = f"border-{side}-width" + if prop in props: + props[prop] = self.size_to_pt( + props[prop], em_pt=font_size, conversions=self.BORDER_WIDTH_RATIOS + ) + for prop in [ + f"margin-{side}", + f"padding-{side}", + ]: + if prop in props: + # TODO: support % + props[prop] = self.size_to_pt( + props[prop], em_pt=font_size, conversions=self.MARGIN_RATIOS + ) + + return props + + UNIT_RATIOS = { + "rem": ("pt", 12), + "ex": ("em", 0.5), + # 'ch': + "px": ("pt", 0.75), + "pc": ("pt", 12), + "in": ("pt", 72), + "cm": ("in", 1 / 2.54), + "mm": ("in", 1 / 25.4), + "q": ("mm", 0.25), + "!!default": ("em", 0), + } + + FONT_SIZE_RATIOS = UNIT_RATIOS.copy() + FONT_SIZE_RATIOS.update( + { + "%": ("em", 0.01), + "xx-small": ("rem", 0.5), + "x-small": ("rem", 0.625), + "small": ("rem", 0.8), + "medium": ("rem", 1), + "large": ("rem", 1.125), + "x-large": ("rem", 1.5), + "xx-large": ("rem", 2), + "smaller": ("em", 1 / 1.2), + "larger": ("em", 1.2), + "!!default": ("em", 1), + } + ) + + MARGIN_RATIOS = UNIT_RATIOS.copy() + MARGIN_RATIOS.update({"none": ("pt", 0)}) + + BORDER_WIDTH_RATIOS = UNIT_RATIOS.copy() + BORDER_WIDTH_RATIOS.update( + { + "none": ("pt", 0), + "thick": ("px", 4), + "medium": ("px", 2), + "thin": ("px", 1), + # Default: medium only if solid + } + ) + + def size_to_pt(self, in_val, em_pt=None, conversions=UNIT_RATIOS): + def _error(): + warnings.warn(f"Unhandled size: {repr(in_val)}", CSSWarning) + return self.size_to_pt("1!!default", conversions=conversions) + + try: + val, unit = re.match(r"^(\S*?)([a-zA-Z%!].*)", in_val).groups() + except AttributeError: + return _error() + if val == "": + # hack for 'large' etc. + val = 1 + else: + try: + val = float(val) + except ValueError: + return _error() + + while unit != "pt": + if unit == "em": + if em_pt is None: + unit = "rem" + else: + val *= em_pt + unit = "pt" + continue + + try: + unit, mul = conversions[unit] + except KeyError: + return _error() + val *= mul + + val = round(val, 5) + if int(val) == val: + size_fmt = f"{int(val):d}pt" + else: + size_fmt = f"{val:f}pt" + return size_fmt + + def atomize(self, declarations): + for prop, value in declarations: + attr = "expand_" + prop.replace("-", "_") + try: + expand = getattr(self, attr) + except AttributeError: + yield prop, value + else: + for prop, value in expand(prop, value): + yield prop, value + + SIDE_SHORTHANDS = { + 1: [0, 0, 0, 0], + 2: [0, 1, 0, 1], + 3: [0, 1, 2, 1], + 4: [0, 1, 2, 3], + } + SIDES = ("top", "right", "bottom", "left") + + expand_border_color = _side_expander("border-{:s}-color") + expand_border_style = _side_expander("border-{:s}-style") + expand_border_width = _side_expander("border-{:s}-width") + expand_margin = _side_expander("margin-{:s}") + expand_padding = _side_expander("padding-{:s}") + + def parse(self, declarations_str: str): + """ + Generates (prop, value) pairs from declarations. + + In a future version may generate parsed tokens from tinycss/tinycss2 + + Parameters + ---------- + declarations_str : str + """ + for decl in declarations_str.split(";"): + if not decl.strip(): + continue + prop, sep, val = decl.partition(":") + prop = prop.strip().lower() + # TODO: don't lowercase case sensitive parts of values (strings) + val = val.strip().lower() + if sep: + yield prop, val + else: + warnings.warn( + f"Ill-formatted attribute: expected a colon in {repr(decl)}", + CSSWarning, + ) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py new file mode 100644 index 00000000..0b802f0f --- /dev/null +++ b/pandas/io/formats/csvs.py @@ -0,0 +1,356 @@ +""" +Module for formatting output data into CSV files. +""" + +import csv as csvlib +from io import StringIO +import os +from typing import Hashable, List, Mapping, Optional, Sequence, Union +import warnings +from zipfile import ZipFile + +import numpy as np + +from pandas._libs import writers as libwriters +from pandas._typing import FilePathOrBuffer + +from pandas.core.dtypes.generic import ( + ABCDatetimeIndex, + ABCIndexClass, + ABCMultiIndex, + ABCPeriodIndex, +) +from pandas.core.dtypes.missing import notna + +from pandas.io.common import ( + get_compression_method, + get_filepath_or_buffer, + get_handle, + infer_compression, +) + + +class CSVFormatter: + def __init__( + self, + obj, + path_or_buf: Optional[FilePathOrBuffer[str]] = None, + sep: str = ",", + na_rep: str = "", + float_format: Optional[str] = None, + cols=None, + header: Union[bool, Sequence[Hashable]] = True, + index: bool = True, + index_label: Optional[Union[bool, Hashable, Sequence[Hashable]]] = None, + mode: str = "w", + encoding: Optional[str] = None, + compression: Union[str, Mapping[str, str], None] = "infer", + quoting: Optional[int] = None, + line_terminator="\n", + chunksize: Optional[int] = None, + quotechar='"', + date_format: Optional[str] = None, + doublequote: bool = True, + escapechar: Optional[str] = None, + decimal=".", + ): + self.obj = obj + + if path_or_buf is None: + path_or_buf = StringIO() + + # Extract compression mode as given, if dict + compression, self.compression_args = get_compression_method(compression) + + self.path_or_buf, _, _, self.should_close = get_filepath_or_buffer( + path_or_buf, encoding=encoding, compression=compression, mode=mode + ) + self.sep = sep + self.na_rep = na_rep + self.float_format = float_format + self.decimal = decimal + + self.header = header + self.index = index + self.index_label = index_label + self.mode = mode + if encoding is None: + encoding = "utf-8" + self.encoding = encoding + self.compression = infer_compression(self.path_or_buf, compression) + + if quoting is None: + quoting = csvlib.QUOTE_MINIMAL + self.quoting = quoting + + if quoting == csvlib.QUOTE_NONE: + # prevents crash in _csv + quotechar = None + self.quotechar = quotechar + + self.doublequote = doublequote + self.escapechar = escapechar + + self.line_terminator = line_terminator or os.linesep + + self.date_format = date_format + + self.has_mi_columns = isinstance(obj.columns, ABCMultiIndex) + + # validate mi options + if self.has_mi_columns: + if cols is not None: + raise TypeError("cannot specify cols with a MultiIndex on the columns") + + if cols is not None: + if isinstance(cols, ABCIndexClass): + cols = cols.to_native_types( + na_rep=na_rep, + float_format=float_format, + date_format=date_format, + quoting=self.quoting, + ) + else: + cols = list(cols) + self.obj = self.obj.loc[:, cols] + + # update columns to include possible multiplicity of dupes + # and make sure sure cols is just a list of labels + cols = self.obj.columns + if isinstance(cols, ABCIndexClass): + cols = cols.to_native_types( + na_rep=na_rep, + float_format=float_format, + date_format=date_format, + quoting=self.quoting, + ) + else: + cols = list(cols) + + # save it + self.cols = cols + + # preallocate data 2d list + self.blocks = self.obj._data.blocks + ncols = sum(b.shape[0] for b in self.blocks) + self.data = [None] * ncols + + if chunksize is None: + chunksize = (100000 // (len(self.cols) or 1)) or 1 + self.chunksize = int(chunksize) + + self.data_index = obj.index + if ( + isinstance(self.data_index, (ABCDatetimeIndex, ABCPeriodIndex)) + and date_format is not None + ): + from pandas import Index + + self.data_index = Index( + [x.strftime(date_format) if notna(x) else "" for x in self.data_index] + ) + + self.nlevels = getattr(self.data_index, "nlevels", 1) + if not index: + self.nlevels = 0 + + def save(self) -> None: + """ + Create the writer & save. + """ + # GH21227 internal compression is not used when file-like passed. + if self.compression and hasattr(self.path_or_buf, "write"): + warnings.warn( + "compression has no effect when passing file-like object as input.", + RuntimeWarning, + stacklevel=2, + ) + + # when zip compression is called. + is_zip = isinstance(self.path_or_buf, ZipFile) or ( + not hasattr(self.path_or_buf, "write") and self.compression == "zip" + ) + + if is_zip: + # zipfile doesn't support writing string to archive. uses string + # buffer to receive csv writing and dump into zip compression + # file handle. GH21241, GH21118 + f = StringIO() + close = False + elif hasattr(self.path_or_buf, "write"): + f = self.path_or_buf + close = False + else: + f, handles = get_handle( + self.path_or_buf, + self.mode, + encoding=self.encoding, + compression=dict(self.compression_args, method=self.compression), + ) + close = True + + try: + # Note: self.encoding is irrelevant here + self.writer = csvlib.writer( + f, + lineterminator=self.line_terminator, + delimiter=self.sep, + quoting=self.quoting, + doublequote=self.doublequote, + escapechar=self.escapechar, + quotechar=self.quotechar, + ) + + self._save() + + finally: + if is_zip: + # GH17778 handles zip compression separately. + buf = f.getvalue() + if hasattr(self.path_or_buf, "write"): + self.path_or_buf.write(buf) + else: + compression = dict(self.compression_args, method=self.compression) + + f, handles = get_handle( + self.path_or_buf, + self.mode, + encoding=self.encoding, + compression=compression, + ) + f.write(buf) + close = True + if close: + f.close() + for _fh in handles: + _fh.close() + elif self.should_close: + f.close() + + def _save_header(self): + writer = self.writer + obj = self.obj + index_label = self.index_label + cols = self.cols + has_mi_columns = self.has_mi_columns + header = self.header + encoded_labels: List[str] = [] + + has_aliases = isinstance(header, (tuple, list, np.ndarray, ABCIndexClass)) + if not (has_aliases or self.header): + return + if has_aliases: + if len(header) != len(cols): + raise ValueError( + f"Writing {len(cols)} cols but got {len(header)} aliases" + ) + else: + write_cols = header + else: + write_cols = cols + + if self.index: + # should write something for index label + if index_label is not False: + if index_label is None: + if isinstance(obj.index, ABCMultiIndex): + index_label = [] + for i, name in enumerate(obj.index.names): + if name is None: + name = "" + index_label.append(name) + else: + index_label = obj.index.name + if index_label is None: + index_label = [""] + else: + index_label = [index_label] + elif not isinstance( + index_label, (list, tuple, np.ndarray, ABCIndexClass) + ): + # given a string for a DF with Index + index_label = [index_label] + + encoded_labels = list(index_label) + else: + encoded_labels = [] + + if not has_mi_columns or has_aliases: + encoded_labels += list(write_cols) + writer.writerow(encoded_labels) + else: + # write out the mi + columns = obj.columns + + # write out the names for each level, then ALL of the values for + # each level + for i in range(columns.nlevels): + + # we need at least 1 index column to write our col names + col_line = [] + if self.index: + + # name is the first column + col_line.append(columns.names[i]) + + if isinstance(index_label, list) and len(index_label) > 1: + col_line.extend([""] * (len(index_label) - 1)) + + col_line.extend(columns._get_level_values(i)) + + writer.writerow(col_line) + + # Write out the index line if it's not empty. + # Otherwise, we will print out an extraneous + # blank line between the mi and the data rows. + if encoded_labels and set(encoded_labels) != {""}: + encoded_labels.extend([""] * len(columns)) + writer.writerow(encoded_labels) + + def _save(self) -> None: + self._save_header() + + nrows = len(self.data_index) + + # write in chunksize bites + chunksize = self.chunksize + chunks = int(nrows / chunksize) + 1 + + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, nrows) + if start_i >= end_i: + break + + self._save_chunk(start_i, end_i) + + def _save_chunk(self, start_i: int, end_i: int) -> None: + data_index = self.data_index + + # create the data for a chunk + slicer = slice(start_i, end_i) + for i in range(len(self.blocks)): + b = self.blocks[i] + d = b.to_native_types( + slicer=slicer, + na_rep=self.na_rep, + float_format=self.float_format, + decimal=self.decimal, + date_format=self.date_format, + quoting=self.quoting, + ) + + for col_loc, col in zip(b.mgr_locs, d): + # self.data is a preallocated list + self.data[col_loc] = col + + ix = data_index.to_native_types( + slicer=slicer, + na_rep=self.na_rep, + float_format=self.float_format, + decimal=self.decimal, + date_format=self.date_format, + quoting=self.quoting, + ) + + libwriters.write_csv_rows(self.data, ix, self.nlevels, self.cols, self.writer) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py new file mode 100644 index 00000000..3d5b571c --- /dev/null +++ b/pandas/io/formats/excel.py @@ -0,0 +1,738 @@ +"""Utilities for conversion to writer-agnostic Excel representation +""" + +from functools import reduce +import itertools +import re +from typing import Callable, Dict, List, Optional, Sequence, Union +import warnings + +import numpy as np + +from pandas.core.dtypes import missing +from pandas.core.dtypes.common import is_float, is_scalar +from pandas.core.dtypes.generic import ABCMultiIndex, ABCPeriodIndex + +from pandas import Index +import pandas.core.common as com + +from pandas.io.common import stringify_path +from pandas.io.formats.css import CSSResolver, CSSWarning +from pandas.io.formats.format import get_level_lengths +from pandas.io.formats.printing import pprint_thing + + +class ExcelCell: + __fields__ = ("row", "col", "val", "style", "mergestart", "mergeend") + __slots__ = __fields__ + + def __init__( + self, row: int, col: int, val, style=None, mergestart=None, mergeend=None + ): + self.row = row + self.col = col + self.val = val + self.style = style + self.mergestart = mergestart + self.mergeend = mergeend + + +class CSSToExcelConverter: + """A callable for converting CSS declarations to ExcelWriter styles + + Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow), + focusing on font styling, backgrounds, borders and alignment. + + Operates by first computing CSS styles in a fairly generic + way (see :meth:`compute_css`) then determining Excel style + properties from CSS properties (see :meth:`build_xlstyle`). + + Parameters + ---------- + inherited : str, optional + CSS declarations understood to be the containing scope for the + CSS processed by :meth:`__call__`. + """ + + # NB: Most of the methods here could be classmethods, as only __init__ + # and __call__ make use of instance attributes. We leave them as + # instancemethods so that users can easily experiment with extensions + # without monkey-patching. + + def __init__(self, inherited: Optional[str] = None): + if inherited is not None: + inherited = self.compute_css(inherited) + + self.inherited = inherited + + compute_css = CSSResolver() + + def __call__(self, declarations_str: str) -> Dict[str, Dict[str, str]]: + """ + Convert CSS declarations to ExcelWriter style. + + Parameters + ---------- + declarations_str : str + List of CSS declarations. + e.g. "font-weight: bold; background: blue" + + Returns + ------- + xlstyle : dict + A style as interpreted by ExcelWriter when found in + ExcelCell.style. + """ + # TODO: memoize? + properties = self.compute_css(declarations_str, self.inherited) + return self.build_xlstyle(properties) + + def build_xlstyle(self, props: Dict[str, str]) -> Dict[str, Dict[str, str]]: + out = { + "alignment": self.build_alignment(props), + "border": self.build_border(props), + "fill": self.build_fill(props), + "font": self.build_font(props), + "number_format": self.build_number_format(props), + } + + # TODO: handle cell width and height: needs support in pandas.io.excel + + def remove_none(d: Dict[str, str]) -> None: + """Remove key where value is None, through nested dicts""" + for k, v in list(d.items()): + if v is None: + del d[k] + elif isinstance(v, dict): + remove_none(v) + if not v: + del d[k] + + remove_none(out) + return out + + VERTICAL_MAP = { + "top": "top", + "text-top": "top", + "middle": "center", + "baseline": "bottom", + "bottom": "bottom", + "text-bottom": "bottom", + # OpenXML also has 'justify', 'distributed' + } + + def build_alignment(self, props) -> Dict[str, Optional[Union[bool, str]]]: + # TODO: text-indent, padding-left -> alignment.indent + return { + "horizontal": props.get("text-align"), + "vertical": self.VERTICAL_MAP.get(props.get("vertical-align")), + "wrap_text": ( + None + if props.get("white-space") is None + else props["white-space"] not in ("nowrap", "pre", "pre-line") + ), + } + + def build_border(self, props: Dict) -> Dict[str, Dict[str, str]]: + return { + side: { + "style": self._border_style( + props.get(f"border-{side}-style"), + props.get(f"border-{side}-width"), + ), + "color": self.color_to_excel(props.get(f"border-{side}-color")), + } + for side in ["top", "right", "bottom", "left"] + } + + def _border_style(self, style: Optional[str], width): + # convert styles and widths to openxml, one of: + # 'dashDot' + # 'dashDotDot' + # 'dashed' + # 'dotted' + # 'double' + # 'hair' + # 'medium' + # 'mediumDashDot' + # 'mediumDashDotDot' + # 'mediumDashed' + # 'slantDashDot' + # 'thick' + # 'thin' + if width is None and style is None: + return None + if style == "none" or style == "hidden": + return None + + if width is None: + width = "2pt" + width = float(width[:-2]) + if width < 1e-5: + return None + elif width < 1.3: + width_name = "thin" + elif width < 2.8: + width_name = "medium" + else: + width_name = "thick" + + if style in (None, "groove", "ridge", "inset", "outset"): + # not handled + style = "solid" + + if style == "double": + return "double" + if style == "solid": + return width_name + if style == "dotted": + if width_name in ("hair", "thin"): + return "dotted" + return "mediumDashDotDot" + if style == "dashed": + if width_name in ("hair", "thin"): + return "dashed" + return "mediumDashed" + + def build_fill(self, props: Dict[str, str]): + # TODO: perhaps allow for special properties + # -excel-pattern-bgcolor and -excel-pattern-type + fill_color = props.get("background-color") + if fill_color not in (None, "transparent", "none"): + return {"fgColor": self.color_to_excel(fill_color), "patternType": "solid"} + + BOLD_MAP = { + "bold": True, + "bolder": True, + "600": True, + "700": True, + "800": True, + "900": True, + "normal": False, + "lighter": False, + "100": False, + "200": False, + "300": False, + "400": False, + "500": False, + } + ITALIC_MAP = {"normal": False, "italic": True, "oblique": True} + + def build_font(self, props) -> Dict[str, Optional[Union[bool, int, str]]]: + size = props.get("font-size") + if size is not None: + assert size.endswith("pt") + size = float(size[:-2]) + + font_names_tmp = re.findall( + r"""(?x) + ( + "(?:[^"]|\\")+" + | + '(?:[^']|\\')+' + | + [^'",]+ + )(?=,|\s*$) + """, + props.get("font-family", ""), + ) + font_names = [] + for name in font_names_tmp: + if name[:1] == '"': + name = name[1:-1].replace('\\"', '"') + elif name[:1] == "'": + name = name[1:-1].replace("\\'", "'") + else: + name = name.strip() + if name: + font_names.append(name) + + family = None + for name in font_names: + if name == "serif": + family = 1 # roman + break + elif name == "sans-serif": + family = 2 # swiss + break + elif name == "cursive": + family = 4 # script + break + elif name == "fantasy": + family = 5 # decorative + break + + decoration = props.get("text-decoration") + if decoration is not None: + decoration = decoration.split() + else: + decoration = () + + return { + "name": font_names[0] if font_names else None, + "family": family, + "size": size, + "bold": self.BOLD_MAP.get(props.get("font-weight")), + "italic": self.ITALIC_MAP.get(props.get("font-style")), + "underline": ("single" if "underline" in decoration else None), + "strike": ("line-through" in decoration) or None, + "color": self.color_to_excel(props.get("color")), + # shadow if nonzero digit before shadow color + "shadow": ( + bool(re.search("^[^#(]*[1-9]", props["text-shadow"])) + if "text-shadow" in props + else None + ), + # FIXME: dont leave commented-out + # 'vertAlign':, + # 'charset': , + # 'scheme': , + # 'outline': , + # 'condense': , + } + + NAMED_COLORS = { + "maroon": "800000", + "brown": "A52A2A", + "red": "FF0000", + "pink": "FFC0CB", + "orange": "FFA500", + "yellow": "FFFF00", + "olive": "808000", + "green": "008000", + "purple": "800080", + "fuchsia": "FF00FF", + "lime": "00FF00", + "teal": "008080", + "aqua": "00FFFF", + "blue": "0000FF", + "navy": "000080", + "black": "000000", + "gray": "808080", + "grey": "808080", + "silver": "C0C0C0", + "white": "FFFFFF", + } + + def color_to_excel(self, val: Optional[str]): + if val is None: + return None + if val.startswith("#") and len(val) == 7: + return val[1:].upper() + if val.startswith("#") and len(val) == 4: + return (val[1] * 2 + val[2] * 2 + val[3] * 2).upper() + try: + return self.NAMED_COLORS[val] + except KeyError: + warnings.warn(f"Unhandled color format: {repr(val)}", CSSWarning) + + def build_number_format(self, props: Dict) -> Dict[str, Optional[str]]: + return {"format_code": props.get("number-format")} + + +class ExcelFormatter: + """ + Class for formatting a DataFrame to a list of ExcelCells, + + Parameters + ---------- + df : DataFrame or Styler + na_rep: na representation + float_format : string, default None + Format string for floating point numbers + cols : sequence, optional + Columns to write + header : boolean or list of string, default True + Write out column names. If a list of string is given it is + assumed to be aliases for the column names + index : boolean, default True + output row names (index) + index_label : string or sequence, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. + merge_cells : boolean, default False + Format MultiIndex and Hierarchical Rows as merged cells. + inf_rep : string, default `'inf'` + representation for np.inf values (which aren't representable in Excel) + A `'-'` sign will be added in front of -inf. + style_converter : callable, optional + This translates Styler styles (CSS) into ExcelWriter styles. + Defaults to ``CSSToExcelConverter()``. + It should have signature css_declarations string -> excel style. + This is only called for body cells. + """ + + max_rows = 2 ** 20 + max_cols = 2 ** 14 + + def __init__( + self, + df, + na_rep: str = "", + float_format: Optional[str] = None, + cols: Optional[Sequence] = None, + header: Union[bool, List[str]] = True, + index: bool = True, + index_label: Union[str, Sequence, None] = None, + merge_cells: bool = False, + inf_rep: str = "inf", + style_converter: Optional[Callable] = None, + ): + self.rowcounter = 0 + self.na_rep = na_rep + if hasattr(df, "render"): + self.styler = df + df = df.data + if style_converter is None: + style_converter = CSSToExcelConverter() + self.style_converter = style_converter + else: + self.styler = None + self.df = df + if cols is not None: + + # all missing, raise + if not len(Index(cols) & df.columns): + raise KeyError("passes columns are not ALL present dataframe") + + if len(Index(cols) & df.columns) != len(cols): + # Deprecated in GH#17295, enforced in 1.0.0 + raise KeyError("Not all names specified in 'columns' are found") + + self.df = df.reindex(columns=cols) + + self.columns = self.df.columns + self.float_format = float_format + self.index = index + self.index_label = index_label + self.header = header + self.merge_cells = merge_cells + self.inf_rep = inf_rep + + @property + def header_style(self): + return { + "font": {"bold": True}, + "borders": { + "top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin", + }, + "alignment": {"horizontal": "center", "vertical": "top"}, + } + + def _format_value(self, val): + if is_scalar(val) and missing.isna(val): + val = self.na_rep + elif is_float(val): + if missing.isposinf_scalar(val): + val = self.inf_rep + elif missing.isneginf_scalar(val): + val = f"-{self.inf_rep}" + elif self.float_format is not None: + val = float(self.float_format % val) + if getattr(val, "tzinfo", None) is not None: + raise ValueError( + "Excel does not support datetimes with " + "timezones. Please ensure that datetimes " + "are timezone unaware before writing to Excel." + ) + return val + + def _format_header_mi(self): + if self.columns.nlevels > 1: + if not self.index: + raise NotImplementedError( + "Writing to Excel with MultiIndex columns and no " + "index ('index'=False) is not yet implemented." + ) + + has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) + if not (has_aliases or self.header): + return + + columns = self.columns + level_strs = columns.format( + sparsify=self.merge_cells, adjoin=False, names=False + ) + level_lengths = get_level_lengths(level_strs) + coloffset = 0 + lnum = 0 + + if self.index and isinstance(self.df.index, ABCMultiIndex): + coloffset = len(self.df.index[0]) - 1 + + if self.merge_cells: + # Format multi-index as a merged cells. + for lnum in range(len(level_lengths)): + name = columns.names[lnum] + yield ExcelCell(lnum, coloffset, name, self.header_style) + + for lnum, (spans, levels, level_codes) in enumerate( + zip(level_lengths, columns.levels, columns.codes) + ): + values = levels.take(level_codes) + for i in spans: + if spans[i] > 1: + yield ExcelCell( + lnum, + coloffset + i + 1, + values[i], + self.header_style, + lnum, + coloffset + i + spans[i], + ) + else: + yield ExcelCell( + lnum, coloffset + i + 1, values[i], self.header_style + ) + else: + # Format in legacy format with dots to indicate levels. + for i, values in enumerate(zip(*level_strs)): + v = ".".join(map(pprint_thing, values)) + yield ExcelCell(lnum, coloffset + i + 1, v, self.header_style) + + self.rowcounter = lnum + + def _format_header_regular(self): + has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) + if has_aliases or self.header: + coloffset = 0 + + if self.index: + coloffset = 1 + if isinstance(self.df.index, ABCMultiIndex): + coloffset = len(self.df.index[0]) + + colnames = self.columns + if has_aliases: + if len(self.header) != len(self.columns): + raise ValueError( + f"Writing {len(self.columns)} cols but got {len(self.header)} " + "aliases" + ) + else: + colnames = self.header + + for colindex, colname in enumerate(colnames): + yield ExcelCell( + self.rowcounter, colindex + coloffset, colname, self.header_style + ) + + def _format_header(self): + if isinstance(self.columns, ABCMultiIndex): + gen = self._format_header_mi() + else: + gen = self._format_header_regular() + + gen2 = () + if self.df.index.names: + row = [x if x is not None else "" for x in self.df.index.names] + [ + "" + ] * len(self.columns) + if reduce(lambda x, y: x and y, map(lambda x: x != "", row)): + gen2 = ( + ExcelCell(self.rowcounter, colindex, val, self.header_style) + for colindex, val in enumerate(row) + ) + self.rowcounter += 1 + return itertools.chain(gen, gen2) + + def _format_body(self): + if isinstance(self.df.index, ABCMultiIndex): + return self._format_hierarchical_rows() + else: + return self._format_regular_rows() + + def _format_regular_rows(self): + has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) + if has_aliases or self.header: + self.rowcounter += 1 + + # output index and index_label? + if self.index: + # check aliases + # if list only take first as this is not a MultiIndex + if self.index_label and isinstance( + self.index_label, (list, tuple, np.ndarray, Index) + ): + index_label = self.index_label[0] + # if string good to go + elif self.index_label and isinstance(self.index_label, str): + index_label = self.index_label + else: + index_label = self.df.index.names[0] + + if isinstance(self.columns, ABCMultiIndex): + self.rowcounter += 1 + + if index_label and self.header is not False: + yield ExcelCell(self.rowcounter - 1, 0, index_label, self.header_style) + + # write index_values + index_values = self.df.index + if isinstance(self.df.index, ABCPeriodIndex): + index_values = self.df.index.to_timestamp() + + for idx, idxval in enumerate(index_values): + yield ExcelCell(self.rowcounter + idx, 0, idxval, self.header_style) + + coloffset = 1 + else: + coloffset = 0 + + for cell in self._generate_body(coloffset): + yield cell + + def _format_hierarchical_rows(self): + has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) + if has_aliases or self.header: + self.rowcounter += 1 + + gcolidx = 0 + + if self.index: + index_labels = self.df.index.names + # check for aliases + if self.index_label and isinstance( + self.index_label, (list, tuple, np.ndarray, Index) + ): + index_labels = self.index_label + + # MultiIndex columns require an extra row + # with index names (blank if None) for + # unambiguous round-trip, unless not merging, + # in which case the names all go on one row Issue #11328 + if isinstance(self.columns, ABCMultiIndex) and self.merge_cells: + self.rowcounter += 1 + + # if index labels are not empty go ahead and dump + if com.any_not_none(*index_labels) and self.header is not False: + + for cidx, name in enumerate(index_labels): + yield ExcelCell(self.rowcounter - 1, cidx, name, self.header_style) + + if self.merge_cells: + # Format hierarchical rows as merged cells. + level_strs = self.df.index.format( + sparsify=True, adjoin=False, names=False + ) + level_lengths = get_level_lengths(level_strs) + + for spans, levels, level_codes in zip( + level_lengths, self.df.index.levels, self.df.index.codes + ): + + values = levels.take( + level_codes, allow_fill=levels._can_hold_na, fill_value=True + ) + + for i in spans: + if spans[i] > 1: + yield ExcelCell( + self.rowcounter + i, + gcolidx, + values[i], + self.header_style, + self.rowcounter + i + spans[i] - 1, + gcolidx, + ) + else: + yield ExcelCell( + self.rowcounter + i, + gcolidx, + values[i], + self.header_style, + ) + gcolidx += 1 + + else: + # Format hierarchical rows with non-merged values. + for indexcolvals in zip(*self.df.index): + for idx, indexcolval in enumerate(indexcolvals): + yield ExcelCell( + self.rowcounter + idx, + gcolidx, + indexcolval, + self.header_style, + ) + gcolidx += 1 + + for cell in self._generate_body(gcolidx): + yield cell + + def _generate_body(self, coloffset: int): + if self.styler is None: + styles = None + else: + styles = self.styler._compute().ctx + if not styles: + styles = None + xlstyle = None + + # Write the body of the frame data series by series. + for colidx in range(len(self.columns)): + series = self.df.iloc[:, colidx] + for i, val in enumerate(series): + if styles is not None: + xlstyle = self.style_converter(";".join(styles[i, colidx])) + yield ExcelCell(self.rowcounter + i, colidx + coloffset, val, xlstyle) + + def get_formatted_cells(self): + for cell in itertools.chain(self._format_header(), self._format_body()): + cell.val = self._format_value(cell.val) + yield cell + + def write( + self, + writer, + sheet_name="Sheet1", + startrow=0, + startcol=0, + freeze_panes=None, + engine=None, + ): + """ + writer : string or ExcelWriter object + File path or existing ExcelWriter + sheet_name : string, default 'Sheet1' + Name of sheet which will contain DataFrame + startrow : + upper left cell row to dump data frame + startcol : + upper left cell column to dump data frame + freeze_panes : tuple of integer (length 2), default None + Specifies the one-based bottommost row and rightmost column that + is to be frozen + engine : string, default None + write engine to use if writer is a path - you can also set this + via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, + and ``io.excel.xlsm.writer``. + """ + from pandas.io.excel import ExcelWriter + + num_rows, num_cols = self.df.shape + if num_rows > self.max_rows or num_cols > self.max_cols: + raise ValueError( + f"This sheet is too large! Your sheet size is: {num_rows}, {num_cols} " + f"Max sheet size is: {self.max_rows}, {self.max_cols}" + ) + + if isinstance(writer, ExcelWriter): + need_save = False + else: + writer = ExcelWriter(stringify_path(writer), engine=engine) + need_save = True + + formatted_cells = self.get_formatted_cells() + writer.write_cells( + formatted_cells, + sheet_name, + startrow=startrow, + startcol=startcol, + freeze_panes=freeze_panes, + ) + if need_save: + writer.save() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py new file mode 100644 index 00000000..9578bb9f --- /dev/null +++ b/pandas/io/formats/format.py @@ -0,0 +1,1992 @@ +""" +Internal module for formatting output data in csv, html, +and latex files. This module also applies to display formatting. +""" + +from contextlib import contextmanager +from datetime import tzinfo +import decimal +from functools import partial +from io import StringIO +import math +import re +from shutil import get_terminal_size +from typing import ( + IO, + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + List, + Mapping, + Optional, + Sequence, + Tuple, + Type, + Union, + cast, +) +from unicodedata import east_asian_width + +import numpy as np + +from pandas._config.config import get_option, set_option + +from pandas._libs import lib +from pandas._libs.missing import NA +from pandas._libs.tslib import format_array_from_datetime +from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT +from pandas._libs.tslibs.nattype import NaTType +from pandas._typing import FilePathOrBuffer +from pandas.errors import AbstractMethodError + +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_complex_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_extension_array_dtype, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_list_like, + is_numeric_dtype, + is_scalar, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ( + ABCIndexClass, + ABCMultiIndex, + ABCSeries, + ABCSparseArray, +) +from pandas.core.dtypes.missing import isna, notna + +from pandas.core.arrays.datetimes import DatetimeArray +from pandas.core.arrays.timedeltas import TimedeltaArray +from pandas.core.base import PandasObject +import pandas.core.common as com +from pandas.core.indexes.api import Index, ensure_index +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex + +from pandas.io.common import stringify_path +from pandas.io.formats.printing import adjoin, justify, pprint_thing + +if TYPE_CHECKING: + from pandas import Series, DataFrame, Categorical + +formatters_type = Union[ + List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] +] +float_format_type = Union[str, Callable, "EngFormatter"] + +common_docstring = """ + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + columns : sequence, optional, default None + The subset of columns to write. Writes all columns by default. + col_space : %(col_space_type)s, optional + %(col_space)s. + header : %(header_type)s, optional + %(header)s. + index : bool, optional, default True + Whether to print index (row) labels. + na_rep : str, optional, default 'NaN' + String representation of NAN to use. + formatters : list, tuple or dict of one-param. functions, optional + Formatter functions to apply to columns' elements by position or + name. + The result of each function must be a unicode string. + List/tuple must be of length equal to the number of columns. + float_format : one-parameter function, optional, default None + Formatter function to apply to columns' elements if they are + floats. The result of this function must be a unicode string. + sparsify : bool, optional, default True + Set to False for a DataFrame with a hierarchical index to print + every multiindex key at each row. + index_names : bool, optional, default True + Prints the names of the indexes. + justify : str, default None + How to justify the column labels. If None uses the option from + the print configuration (controlled by set_option), 'right' out + of the box. Valid values are + + * left + * right + * center + * justify + * justify-all + * start + * end + * inherit + * match-parent + * initial + * unset. + max_rows : int, optional + Maximum number of rows to display in the console. + min_rows : int, optional + The number of rows to display in the console in a truncated repr + (when number of rows is above `max_rows`). + max_cols : int, optional + Maximum number of columns to display in the console. + show_dimensions : bool, default False + Display DataFrame dimensions (number of rows by number of columns). + decimal : str, default '.' + Character recognized as decimal separator, e.g. ',' in Europe. + """ + +_VALID_JUSTIFY_PARAMETERS = ( + "left", + "right", + "center", + "justify", + "justify-all", + "start", + "end", + "inherit", + "match-parent", + "initial", + "unset", +) + +return_docstring = """ + Returns + ------- + str or None + If buf is None, returns the result as a string. Otherwise returns + None. + """ + + +class CategoricalFormatter: + def __init__( + self, + categorical: "Categorical", + buf: Optional[IO[str]] = None, + length: bool = True, + na_rep: str = "NaN", + footer: bool = True, + ): + self.categorical = categorical + self.buf = buf if buf is not None else StringIO("") + self.na_rep = na_rep + self.length = length + self.footer = footer + + def _get_footer(self) -> str: + footer = "" + + if self.length: + if footer: + footer += ", " + footer += "Length: {length}".format(length=len(self.categorical)) + + level_info = self.categorical._repr_categories_info() + + # Levels are added in a newline + if footer: + footer += "\n" + footer += level_info + + return str(footer) + + def _get_formatted_values(self) -> List[str]: + return format_array( + self.categorical._internal_get_values(), + None, + float_format=None, + na_rep=self.na_rep, + ) + + def to_string(self) -> str: + categorical = self.categorical + + if len(categorical) == 0: + if self.footer: + return self._get_footer() + else: + return "" + + fmt_values = self._get_formatted_values() + + fmt_values = ["{i}".format(i=i) for i in fmt_values] + fmt_values = [i.strip() for i in fmt_values] + values = ", ".join(fmt_values) + result = ["[" + values + "]"] + if self.footer: + footer = self._get_footer() + if footer: + result.append(footer) + + return str("\n".join(result)) + + +class SeriesFormatter: + def __init__( + self, + series: "Series", + buf: Optional[IO[str]] = None, + length: Union[bool, str] = True, + header: bool = True, + index: bool = True, + na_rep: str = "NaN", + name: bool = False, + float_format: Optional[str] = None, + dtype: bool = True, + max_rows: Optional[int] = None, + min_rows: Optional[int] = None, + ): + self.series = series + self.buf = buf if buf is not None else StringIO() + self.name = name + self.na_rep = na_rep + self.header = header + self.length = length + self.index = index + self.max_rows = max_rows + self.min_rows = min_rows + + if float_format is None: + float_format = get_option("display.float_format") + self.float_format = float_format + self.dtype = dtype + self.adj = _get_adjustment() + + self._chk_truncate() + + def _chk_truncate(self) -> None: + from pandas.core.reshape.concat import concat + + self.tr_row_num: Optional[int] + + min_rows = self.min_rows + max_rows = self.max_rows + # truncation determined by max_rows, actual truncated number of rows + # used below by min_rows + truncate_v = max_rows and (len(self.series) > max_rows) + series = self.series + if truncate_v: + max_rows = cast(int, max_rows) + if min_rows: + # if min_rows is set (not None or 0), set max_rows to minimum + # of both + max_rows = min(min_rows, max_rows) + if max_rows == 1: + row_num = max_rows + series = series.iloc[:max_rows] + else: + row_num = max_rows // 2 + series = concat((series.iloc[:row_num], series.iloc[-row_num:])) + self.tr_row_num = row_num + else: + self.tr_row_num = None + self.tr_series = series + self.truncate_v = truncate_v + + def _get_footer(self) -> str: + name = self.series.name + footer = "" + + if getattr(self.series.index, "freq", None) is not None: + footer += "Freq: {freq}".format(freq=self.series.index.freqstr) + + if self.name is not False and name is not None: + if footer: + footer += ", " + + series_name = pprint_thing(name, escape_chars=("\t", "\r", "\n")) + footer += ( + ("Name: {sname}".format(sname=series_name)) if name is not None else "" + ) + + if self.length is True or (self.length == "truncate" and self.truncate_v): + if footer: + footer += ", " + footer += "Length: {length}".format(length=len(self.series)) + + if self.dtype is not False and self.dtype is not None: + name = getattr(self.tr_series.dtype, "name", None) + if name: + if footer: + footer += ", " + footer += "dtype: {typ}".format(typ=pprint_thing(name)) + + # level infos are added to the end and in a new line, like it is done + # for Categoricals + if is_categorical_dtype(self.tr_series.dtype): + level_info = self.tr_series._values._repr_categories_info() + if footer: + footer += "\n" + footer += level_info + + return str(footer) + + def _get_formatted_index(self) -> Tuple[List[str], bool]: + index = self.tr_series.index + is_multi = isinstance(index, ABCMultiIndex) + + if is_multi: + have_header = any(name for name in index.names) + fmt_index = index.format(names=True) + else: + have_header = index.name is not None + fmt_index = index.format(name=True) + return fmt_index, have_header + + def _get_formatted_values(self) -> List[str]: + return format_array( + self.tr_series._values, + None, + float_format=self.float_format, + na_rep=self.na_rep, + ) + + def to_string(self) -> str: + series = self.tr_series + footer = self._get_footer() + + if len(series) == 0: + return "{name}([], {footer})".format( + name=type(self.series).__name__, footer=footer + ) + + fmt_index, have_header = self._get_formatted_index() + fmt_values = self._get_formatted_values() + + if self.truncate_v: + n_header_rows = 0 + row_num = self.tr_row_num + row_num = cast(int, row_num) + width = self.adj.len(fmt_values[row_num - 1]) + if width > 3: + dot_str = "..." + else: + dot_str = ".." + # Series uses mode=center because it has single value columns + # DataFrame uses mode=left + dot_str = self.adj.justify([dot_str], width, mode="center")[0] + fmt_values.insert(row_num + n_header_rows, dot_str) + fmt_index.insert(row_num + 1, "") + + if self.index: + result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values]) + else: + result = self.adj.adjoin(3, fmt_values) + + if self.header and have_header: + result = fmt_index[0] + "\n" + result + + if footer: + result += "\n" + footer + + return str("".join(result)) + + +class TextAdjustment: + def __init__(self): + self.encoding = get_option("display.encoding") + + def len(self, text: str) -> int: + return len(text) + + def justify(self, texts: Any, max_len: int, mode: str = "right") -> List[str]: + return justify(texts, max_len, mode=mode) + + def adjoin(self, space: int, *lists, **kwargs) -> str: + return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs) + + +class EastAsianTextAdjustment(TextAdjustment): + def __init__(self): + super().__init__() + if get_option("display.unicode.ambiguous_as_wide"): + self.ambiguous_width = 2 + else: + self.ambiguous_width = 1 + + # Definition of East Asian Width + # http://unicode.org/reports/tr11/ + # Ambiguous width can be changed by option + self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1} + + def len(self, text: str) -> int: + """ + Calculate display width considering unicode East Asian Width + """ + if not isinstance(text, str): + return len(text) + + return sum( + self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text + ) + + def justify( + self, texts: Iterable[str], max_len: int, mode: str = "right" + ) -> List[str]: + # re-calculate padding space per str considering East Asian Width + def _get_pad(t): + return max_len - self.len(t) + len(t) + + if mode == "left": + return [x.ljust(_get_pad(x)) for x in texts] + elif mode == "center": + return [x.center(_get_pad(x)) for x in texts] + else: + return [x.rjust(_get_pad(x)) for x in texts] + + +def _get_adjustment() -> TextAdjustment: + use_east_asian_width = get_option("display.unicode.east_asian_width") + if use_east_asian_width: + return EastAsianTextAdjustment() + else: + return TextAdjustment() + + +class TableFormatter: + + show_dimensions: Union[bool, str] + is_truncated: bool + formatters: formatters_type + columns: Index + + @property + def should_show_dimensions(self) -> bool: + return self.show_dimensions is True or ( + self.show_dimensions == "truncate" and self.is_truncated + ) + + def _get_formatter(self, i: Union[str, int]) -> Optional[Callable]: + if isinstance(self.formatters, (list, tuple)): + if is_integer(i): + i = cast(int, i) + return self.formatters[i] + else: + return None + else: + if is_integer(i) and i not in self.columns: + i = self.columns[i] + return self.formatters.get(i, None) + + @contextmanager + def get_buffer( + self, buf: Optional[FilePathOrBuffer[str]], encoding: Optional[str] = None + ): + """ + Context manager to open, yield and close buffer for filenames or Path-like + objects, otherwise yield buf unchanged. + """ + if buf is not None: + buf = stringify_path(buf) + else: + buf = StringIO() + + if encoding is None: + encoding = "utf-8" + elif not isinstance(buf, str): + raise ValueError("buf is not a file name and encoding is specified.") + + if hasattr(buf, "write"): + yield buf + elif isinstance(buf, str): + with open(buf, "w", encoding=encoding, newline="") as f: + # GH#30034 open instead of codecs.open prevents a file leak + # if we have an invalid encoding argument. + # newline="" is needed to roundtrip correctly on + # windows test_to_latex_filename + yield f + else: + raise TypeError("buf is not a file name and it has no write method") + + def write_result(self, buf: IO[str]) -> None: + """ + Write the result of serialization to buf. + """ + raise AbstractMethodError(self) + + def get_result( + self, + buf: Optional[FilePathOrBuffer[str]] = None, + encoding: Optional[str] = None, + ) -> Optional[str]: + """ + Perform serialization. Write to buf or return as string if buf is None. + """ + with self.get_buffer(buf, encoding=encoding) as f: + self.write_result(buf=f) + if buf is None: + return f.getvalue() + return None + + +class DataFrameFormatter(TableFormatter): + """ + Render a DataFrame + + self.to_string() : console-friendly tabular output + self.to_html() : html table + self.to_latex() : LaTeX tabular environment table + + """ + + __doc__ = __doc__ if __doc__ else "" + __doc__ += common_docstring + return_docstring + + def __init__( + self, + frame: "DataFrame", + columns: Optional[Sequence[str]] = None, + col_space: Optional[Union[str, int]] = None, + header: Union[bool, Sequence[str]] = True, + index: bool = True, + na_rep: str = "NaN", + formatters: Optional[formatters_type] = None, + justify: Optional[str] = None, + float_format: Optional[float_format_type] = None, + sparsify: Optional[bool] = None, + index_names: bool = True, + line_width: Optional[int] = None, + max_rows: Optional[int] = None, + min_rows: Optional[int] = None, + max_cols: Optional[int] = None, + show_dimensions: Union[bool, str] = False, + decimal: str = ".", + table_id: Optional[str] = None, + render_links: bool = False, + bold_rows: bool = False, + escape: bool = True, + ): + self.frame = frame + self.show_index_names = index_names + + if sparsify is None: + sparsify = get_option("display.multi_sparse") + + self.sparsify = sparsify + + self.float_format = float_format + if formatters is None: + self.formatters = {} + elif len(frame.columns) == len(formatters) or isinstance(formatters, dict): + self.formatters = formatters + else: + raise ValueError( + ( + "Formatters length({flen}) should match " + "DataFrame number of columns({dlen})" + ).format(flen=len(formatters), dlen=len(frame.columns)) + ) + self.na_rep = na_rep + self.decimal = decimal + self.col_space = col_space + self.header = header + self.index = index + self.line_width = line_width + self.max_rows = max_rows + self.min_rows = min_rows + self.max_cols = max_cols + self.max_rows_displayed = min(max_rows or len(self.frame), len(self.frame)) + self.show_dimensions = show_dimensions + self.table_id = table_id + self.render_links = render_links + + if justify is None: + self.justify = get_option("display.colheader_justify") + else: + self.justify = justify + + self.bold_rows = bold_rows + self.escape = escape + + if columns is not None: + self.columns = ensure_index(columns) + self.frame = self.frame[self.columns] + else: + self.columns = frame.columns + + self._chk_truncate() + self.adj = _get_adjustment() + + def _chk_truncate(self) -> None: + """ + Checks whether the frame should be truncated. If so, slices + the frame up. + """ + from pandas.core.reshape.concat import concat + + # Cut the data to the information actually printed + max_cols = self.max_cols + max_rows = self.max_rows + self.max_rows_adj: Optional[int] + max_rows_adj: Optional[int] + + if max_cols == 0 or max_rows == 0: # assume we are in the terminal + (w, h) = get_terminal_size() + self.w = w + self.h = h + if self.max_rows == 0: + dot_row = 1 + prompt_row = 1 + if self.show_dimensions: + show_dimension_rows = 3 + # assume we only get here if self.header is boolean. + # i.e. not to_latex() where self.header may be List[str] + self.header = cast(bool, self.header) + n_add_rows = self.header + dot_row + show_dimension_rows + prompt_row + # rows available to fill with actual data + max_rows_adj = self.h - n_add_rows + self.max_rows_adj = max_rows_adj + + # Format only rows and columns that could potentially fit the + # screen + if max_cols == 0 and len(self.frame.columns) > w: + max_cols = w + if max_rows == 0 and len(self.frame) > h: + max_rows = h + + if not hasattr(self, "max_rows_adj"): + if max_rows: + if (len(self.frame) > max_rows) and self.min_rows: + # if truncated, set max_rows showed to min_rows + max_rows = min(self.min_rows, max_rows) + self.max_rows_adj = max_rows + if not hasattr(self, "max_cols_adj"): + self.max_cols_adj = max_cols + + max_cols_adj = self.max_cols_adj + max_rows_adj = self.max_rows_adj + + truncate_h = max_cols_adj and (len(self.columns) > max_cols_adj) + truncate_v = max_rows_adj and (len(self.frame) > max_rows_adj) + + frame = self.frame + if truncate_h: + # cast here since if truncate_h is True, max_cols_adj is not None + max_cols_adj = cast(int, max_cols_adj) + if max_cols_adj == 0: + col_num = len(frame.columns) + elif max_cols_adj == 1: + max_cols = cast(int, max_cols) + frame = frame.iloc[:, :max_cols] + col_num = max_cols + else: + col_num = max_cols_adj // 2 + frame = concat( + (frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1 + ) + # truncate formatter + if isinstance(self.formatters, (list, tuple)): + truncate_fmt = self.formatters + self.formatters = [ + *truncate_fmt[:col_num], + *truncate_fmt[-col_num:], + ] + self.tr_col_num = col_num + if truncate_v: + # cast here since if truncate_v is True, max_rows_adj is not None + max_rows_adj = cast(int, max_rows_adj) + if max_rows_adj == 1: + row_num = max_rows + frame = frame.iloc[:max_rows, :] + else: + row_num = max_rows_adj // 2 + frame = concat((frame.iloc[:row_num, :], frame.iloc[-row_num:, :])) + self.tr_row_num = row_num + else: + self.tr_row_num = None + + self.tr_frame = frame + self.truncate_h = truncate_h + self.truncate_v = truncate_v + self.is_truncated = bool(self.truncate_h or self.truncate_v) + + def _to_str_columns(self) -> List[List[str]]: + """ + Render a DataFrame to a list of columns (as lists of strings). + """ + # this method is not used by to_html where self.col_space + # could be a string so safe to cast + self.col_space = cast(int, self.col_space) + + frame = self.tr_frame + # may include levels names also + + str_index = self._get_formatted_index(frame) + + if not is_list_like(self.header) and not self.header: + stringified = [] + for i, c in enumerate(frame): + fmt_values = self._format_col(i) + fmt_values = _make_fixed_width( + fmt_values, + self.justify, + minimum=(self.col_space or 0), + adj=self.adj, + ) + stringified.append(fmt_values) + else: + if is_list_like(self.header): + # cast here since can't be bool if is_list_like + self.header = cast(List[str], self.header) + if len(self.header) != len(self.columns): + raise ValueError( + ( + "Writing {ncols} cols but got {nalias} " + "aliases".format( + ncols=len(self.columns), nalias=len(self.header) + ) + ) + ) + str_columns = [[label] for label in self.header] + else: + str_columns = self._get_formatted_column_labels(frame) + + if self.show_row_idx_names: + for x in str_columns: + x.append("") + + stringified = [] + for i, c in enumerate(frame): + cheader = str_columns[i] + header_colwidth = max( + self.col_space or 0, *(self.adj.len(x) for x in cheader) + ) + fmt_values = self._format_col(i) + fmt_values = _make_fixed_width( + fmt_values, self.justify, minimum=header_colwidth, adj=self.adj + ) + + max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth) + cheader = self.adj.justify(cheader, max_len, mode=self.justify) + stringified.append(cheader + fmt_values) + + strcols = stringified + if self.index: + strcols.insert(0, str_index) + + # Add ... to signal truncated + truncate_h = self.truncate_h + truncate_v = self.truncate_v + + if truncate_h: + col_num = self.tr_col_num + strcols.insert(self.tr_col_num + 1, [" ..."] * (len(str_index))) + if truncate_v: + n_header_rows = len(str_index) - len(frame) + row_num = self.tr_row_num + # cast here since if truncate_v is True, self.tr_row_num is not None + row_num = cast(int, row_num) + for ix, col in enumerate(strcols): + # infer from above row + cwidth = self.adj.len(strcols[ix][row_num]) + is_dot_col = False + if truncate_h: + is_dot_col = ix == col_num + 1 + if cwidth > 3 or is_dot_col: + my_str = "..." + else: + my_str = ".." + + if ix == 0: + dot_mode = "left" + elif is_dot_col: + cwidth = 4 + dot_mode = "right" + else: + dot_mode = "right" + dot_str = self.adj.justify([my_str], cwidth, mode=dot_mode)[0] + strcols[ix].insert(row_num + n_header_rows, dot_str) + return strcols + + def write_result(self, buf: IO[str]) -> None: + """ + Render a DataFrame to a console-friendly tabular output. + """ + from pandas import Series + + frame = self.frame + + if len(frame.columns) == 0 or len(frame.index) == 0: + info_line = "Empty {name}\nColumns: {col}\nIndex: {idx}".format( + name=type(self.frame).__name__, + col=pprint_thing(frame.columns), + idx=pprint_thing(frame.index), + ) + text = info_line + else: + + strcols = self._to_str_columns() + if self.line_width is None: # no need to wrap around just print + # the whole frame + text = self.adj.adjoin(1, *strcols) + elif ( + not isinstance(self.max_cols, int) or self.max_cols > 0 + ): # need to wrap around + text = self._join_multiline(*strcols) + else: # max_cols == 0. Try to fit frame to terminal + lines = self.adj.adjoin(1, *strcols).split("\n") + max_len = Series(lines).str.len().max() + # plus truncate dot col + dif = max_len - self.w + # '+ 1' to avoid too wide repr (GH PR #17023) + adj_dif = dif + 1 + col_lens = Series([Series(ele).apply(len).max() for ele in strcols]) + n_cols = len(col_lens) + counter = 0 + while adj_dif > 0 and n_cols > 1: + counter += 1 + mid = int(round(n_cols / 2.0)) + mid_ix = col_lens.index[mid] + col_len = col_lens[mid_ix] + # adjoin adds one + adj_dif -= col_len + 1 + col_lens = col_lens.drop(mid_ix) + n_cols = len(col_lens) + # subtract index column + max_cols_adj = n_cols - self.index + # GH-21180. Ensure that we print at least two. + max_cols_adj = max(max_cols_adj, 2) + self.max_cols_adj = max_cols_adj + + # Call again _chk_truncate to cut frame appropriately + # and then generate string representation + self._chk_truncate() + strcols = self._to_str_columns() + text = self.adj.adjoin(1, *strcols) + buf.writelines(text) + + if self.should_show_dimensions: + buf.write( + "\n\n[{nrows} rows x {ncols} columns]".format( + nrows=len(frame), ncols=len(frame.columns) + ) + ) + + def _join_multiline(self, *args) -> str: + lwidth = self.line_width + adjoin_width = 1 + strcols = list(args) + if self.index: + idx = strcols.pop(0) + lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width + + col_widths = [ + np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0 + for col in strcols + ] + + assert lwidth is not None + col_bins = _binify(col_widths, lwidth) + nbins = len(col_bins) + + if self.truncate_v: + # cast here since if truncate_v is True, max_rows_adj is not None + self.max_rows_adj = cast(int, self.max_rows_adj) + nrows = self.max_rows_adj + 1 + else: + nrows = len(self.frame) + + str_lst = [] + st = 0 + for i, ed in enumerate(col_bins): + row = strcols[st:ed] + if self.index: + row.insert(0, idx) + if nbins > 1: + if ed <= len(strcols) and i < nbins - 1: + row.append([" \\"] + [" "] * (nrows - 1)) + else: + row.append([" "] * nrows) + str_lst.append(self.adj.adjoin(adjoin_width, *row)) + st = ed + return "\n\n".join(str_lst) + + def to_string( + self, + buf: Optional[FilePathOrBuffer[str]] = None, + encoding: Optional[str] = None, + ) -> Optional[str]: + return self.get_result(buf=buf, encoding=encoding) + + def to_latex( + self, + buf: Optional[FilePathOrBuffer[str]] = None, + column_format: Optional[str] = None, + longtable: bool = False, + encoding: Optional[str] = None, + multicolumn: bool = False, + multicolumn_format: Optional[str] = None, + multirow: bool = False, + caption: Optional[str] = None, + label: Optional[str] = None, + ) -> Optional[str]: + """ + Render a DataFrame to a LaTeX tabular/longtable environment output. + """ + + from pandas.io.formats.latex import LatexFormatter + + return LatexFormatter( + self, + column_format=column_format, + longtable=longtable, + multicolumn=multicolumn, + multicolumn_format=multicolumn_format, + multirow=multirow, + caption=caption, + label=label, + ).get_result(buf=buf, encoding=encoding) + + def _format_col(self, i: int) -> List[str]: + frame = self.tr_frame + formatter = self._get_formatter(i) + return format_array( + frame.iloc[:, i]._values, + formatter, + float_format=self.float_format, + na_rep=self.na_rep, + space=self.col_space, + decimal=self.decimal, + ) + + def to_html( + self, + buf: Optional[FilePathOrBuffer[str]] = None, + encoding: Optional[str] = None, + classes: Optional[Union[str, List, Tuple]] = None, + notebook: bool = False, + border: Optional[int] = None, + ) -> Optional[str]: + """ + Render a DataFrame to a html table. + + Parameters + ---------- + classes : str or list-like + classes to include in the `class` attribute of the opening + ``
    `` tag, in addition to the default "dataframe". + notebook : {True, False}, optional, default False + Whether the generated HTML is for IPython Notebook. + border : int + A ``border=border`` attribute is included in the opening + ``
    `` tag. Default ``pd.options.display.html.border``. + """ + from pandas.io.formats.html import HTMLFormatter, NotebookFormatter + + Klass = NotebookFormatter if notebook else HTMLFormatter + return Klass(self, classes=classes, border=border).get_result( + buf=buf, encoding=encoding + ) + + def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]: + from pandas.core.indexes.multi import _sparsify + + columns = frame.columns + + if isinstance(columns, ABCMultiIndex): + fmt_columns = columns.format(sparsify=False, adjoin=False) + fmt_columns = list(zip(*fmt_columns)) + dtypes = self.frame.dtypes._values + + # if we have a Float level, they don't use leading space at all + restrict_formatting = any(l.is_floating for l in columns.levels) + need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) + + def space_format(x, y): + if ( + y not in self.formatters + and need_leadsp[x] + and not restrict_formatting + ): + return " " + y + return y + + str_columns = list( + zip(*[[space_format(x, y) for y in x] for x in fmt_columns]) + ) + if self.sparsify and len(str_columns): + str_columns = _sparsify(str_columns) + + str_columns = [list(x) for x in zip(*str_columns)] + else: + fmt_columns = columns.format() + dtypes = self.frame.dtypes + need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) + str_columns = [ + [" " + x if not self._get_formatter(i) and need_leadsp[x] else x] + for i, (col, x) in enumerate(zip(columns, fmt_columns)) + ] + # self.str_columns = str_columns + return str_columns + + @property + def has_index_names(self) -> bool: + return _has_names(self.frame.index) + + @property + def has_column_names(self) -> bool: + return _has_names(self.frame.columns) + + @property + def show_row_idx_names(self) -> bool: + return all((self.has_index_names, self.index, self.show_index_names)) + + @property + def show_col_idx_names(self) -> bool: + return all((self.has_column_names, self.show_index_names, self.header)) + + def _get_formatted_index(self, frame: "DataFrame") -> List[str]: + # Note: this is only used by to_string() and to_latex(), not by + # to_html(). so safe to cast col_space here. + self.col_space = cast(int, self.col_space) + index = frame.index + columns = frame.columns + fmt = self._get_formatter("__index__") + + if isinstance(index, ABCMultiIndex): + fmt_index = index.format( + sparsify=self.sparsify, + adjoin=False, + names=self.show_row_idx_names, + formatter=fmt, + ) + else: + fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)] + + fmt_index = [ + tuple( + _make_fixed_width( + list(x), justify="left", minimum=(self.col_space or 0), adj=self.adj + ) + ) + for x in fmt_index + ] + + adjoined = self.adj.adjoin(1, *fmt_index).split("\n") + + # empty space for columns + if self.show_col_idx_names: + col_header = ["{x}".format(x=x) for x in self._get_column_name_list()] + else: + col_header = [""] * columns.nlevels + + if self.header: + return col_header + adjoined + else: + return adjoined + + def _get_column_name_list(self) -> List[str]: + names: List[str] = [] + columns = self.frame.columns + if isinstance(columns, ABCMultiIndex): + names.extend("" if name is None else name for name in columns.names) + else: + names.append("" if columns.name is None else columns.name) + return names + + +# ---------------------------------------------------------------------- +# Array formatters + + +def format_array( + values: Any, + formatter: Optional[Callable], + float_format: Optional[float_format_type] = None, + na_rep: str = "NaN", + digits: Optional[int] = None, + space: Optional[Union[str, int]] = None, + justify: str = "right", + decimal: str = ".", + leading_space: Optional[bool] = None, +) -> List[str]: + """ + Format an array for printing. + + Parameters + ---------- + values + formatter + float_format + na_rep + digits + space + justify + decimal + leading_space : bool, optional + Whether the array should be formatted with a leading space. + When an array as a column of a Series or DataFrame, we do want + the leading space to pad between columns. + + When formatting an Index subclass + (e.g. IntervalIndex._format_native_types), we don't want the + leading space since it should be left-aligned. + + Returns + ------- + List[str] + """ + + fmt_klass: Type[GenericArrayFormatter] + if is_datetime64_dtype(values.dtype): + fmt_klass = Datetime64Formatter + elif is_datetime64tz_dtype(values): + fmt_klass = Datetime64TZFormatter + elif is_timedelta64_dtype(values.dtype): + fmt_klass = Timedelta64Formatter + elif is_extension_array_dtype(values.dtype): + fmt_klass = ExtensionArrayFormatter + elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype): + fmt_klass = FloatArrayFormatter + elif is_integer_dtype(values.dtype): + fmt_klass = IntArrayFormatter + else: + fmt_klass = GenericArrayFormatter + + if space is None: + space = get_option("display.column_space") + + if float_format is None: + float_format = get_option("display.float_format") + + if digits is None: + digits = get_option("display.precision") + + fmt_obj = fmt_klass( + values, + digits=digits, + na_rep=na_rep, + float_format=float_format, + formatter=formatter, + space=space, + justify=justify, + decimal=decimal, + leading_space=leading_space, + ) + + return fmt_obj.get_result() + + +class GenericArrayFormatter: + def __init__( + self, + values: Any, + digits: int = 7, + formatter: Optional[Callable] = None, + na_rep: str = "NaN", + space: Union[str, int] = 12, + float_format: Optional[float_format_type] = None, + justify: str = "right", + decimal: str = ".", + quoting: Optional[int] = None, + fixed_width: bool = True, + leading_space: Optional[bool] = None, + ): + self.values = values + self.digits = digits + self.na_rep = na_rep + self.space = space + self.formatter = formatter + self.float_format = float_format + self.justify = justify + self.decimal = decimal + self.quoting = quoting + self.fixed_width = fixed_width + self.leading_space = leading_space + + def get_result(self) -> List[str]: + fmt_values = self._format_strings() + return _make_fixed_width(fmt_values, self.justify) + + def _format_strings(self) -> List[str]: + if self.float_format is None: + float_format = get_option("display.float_format") + if float_format is None: + fmt_str = "{{x: .{prec:d}g}}".format( + prec=get_option("display.precision") + ) + float_format = lambda x: fmt_str.format(x=x) + else: + float_format = self.float_format + + formatter = ( + self.formatter + if self.formatter is not None + else (lambda x: pprint_thing(x, escape_chars=("\t", "\r", "\n"))) + ) + + def _format(x): + if self.na_rep is not None and is_scalar(x) and isna(x): + try: + # try block for np.isnat specifically + # determine na_rep if x is None or NaT-like + if x is None: + return "None" + elif x is NA: + return str(NA) + elif x is NaT or np.isnat(x): + return "NaT" + except (TypeError, ValueError): + # np.isnat only handles datetime or timedelta objects + pass + return self.na_rep + elif isinstance(x, PandasObject): + return "{x}".format(x=x) + else: + # object dtype + return "{x}".format(x=formatter(x)) + + vals = self.values + if isinstance(vals, Index): + vals = vals._values + elif isinstance(vals, ABCSparseArray): + vals = vals.values + + is_float_type = lib.map_infer(vals, is_float) & notna(vals) + leading_space = self.leading_space + if leading_space is None: + leading_space = is_float_type.any() + + fmt_values = [] + for i, v in enumerate(vals): + if not is_float_type[i] and leading_space: + fmt_values.append(" {v}".format(v=_format(v))) + elif is_float_type[i]: + fmt_values.append(float_format(v)) + else: + if leading_space is False: + # False specifically, so that the default is + # to include a space if we get here. + tpl = "{v}" + else: + tpl = " {v}" + fmt_values.append(tpl.format(v=_format(v))) + + return fmt_values + + +class FloatArrayFormatter(GenericArrayFormatter): + """ + + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # float_format is expected to be a string + # formatter should be used to pass a function + if self.float_format is not None and self.formatter is None: + # GH21625, GH22270 + self.fixed_width = False + if callable(self.float_format): + self.formatter = self.float_format + self.float_format = None + + def _value_formatter( + self, + float_format: Optional[float_format_type] = None, + threshold: Optional[Union[float, int]] = None, + ) -> Callable: + """Returns a function to be applied on each value to format it + """ + + # the float_format parameter supersedes self.float_format + if float_format is None: + float_format = self.float_format + + # we are going to compose different functions, to first convert to + # a string, then replace the decimal symbol, and finally chop according + # to the threshold + + # when there is no float_format, we use str instead of '%g' + # because str(0.0) = '0.0' while '%g' % 0.0 = '0' + if float_format: + + def base_formatter(v): + return float_format(value=v) if notna(v) else self.na_rep + + else: + + def base_formatter(v): + return str(v) if notna(v) else self.na_rep + + if self.decimal != ".": + + def decimal_formatter(v): + return base_formatter(v).replace(".", self.decimal, 1) + + else: + decimal_formatter = base_formatter + + if threshold is None: + return decimal_formatter + + def formatter(value): + if notna(value): + if abs(value) > threshold: + return decimal_formatter(value) + else: + return decimal_formatter(0.0) + else: + return self.na_rep + + return formatter + + def get_result_as_array(self) -> np.ndarray: + """ + Returns the float values converted into strings using + the parameters given at initialisation, as a numpy array + """ + + if self.formatter is not None: + return np.array([self.formatter(x) for x in self.values]) + + if self.fixed_width: + threshold = get_option("display.chop_threshold") + else: + threshold = None + + # if we have a fixed_width, we'll need to try different float_format + def format_values_with(float_format): + formatter = self._value_formatter(float_format, threshold) + + # default formatter leaves a space to the left when formatting + # floats, must be consistent for left-justifying NaNs (GH #25061) + if self.justify == "left": + na_rep = " " + self.na_rep + else: + na_rep = self.na_rep + + # separate the wheat from the chaff + values = self.values + is_complex = is_complex_dtype(values) + mask = isna(values) + if hasattr(values, "to_dense"): # sparse numpy ndarray + values = values.to_dense() + values = np.array(values, dtype="object") + values[mask] = na_rep + imask = (~mask).ravel() + values.flat[imask] = np.array( + [formatter(val) for val in values.ravel()[imask]] + ) + + if self.fixed_width: + if is_complex: + result = _trim_zeros_complex(values, na_rep) + else: + result = _trim_zeros_float(values, na_rep) + return np.asarray(result, dtype="object") + + return values + + # There is a special default string when we are fixed-width + # The default is otherwise to use str instead of a formatting string + float_format: Optional[float_format_type] + if self.float_format is None: + if self.fixed_width: + float_format = partial( + "{value: .{digits:d}f}".format, digits=self.digits + ) + else: + float_format = self.float_format + else: + float_format = lambda value: self.float_format % value + + formatted_values = format_values_with(float_format) + + if not self.fixed_width: + return formatted_values + + # we need do convert to engineering format if some values are too small + # and would appear as 0, or if some values are too big and take too + # much space + + if len(formatted_values) > 0: + maxlen = max(len(x) for x in formatted_values) + too_long = maxlen > self.digits + 6 + else: + too_long = False + + with np.errstate(invalid="ignore"): + abs_vals = np.abs(self.values) + # this is pretty arbitrary for now + # large values: more that 8 characters including decimal symbol + # and first digit, hence > 1e6 + has_large_values = (abs_vals > 1e6).any() + has_small_values = ( + (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0) + ).any() + + if has_small_values or (too_long and has_large_values): + float_format = partial("{value: .{digits:d}e}".format, digits=self.digits) + formatted_values = format_values_with(float_format) + + return formatted_values + + def _format_strings(self) -> List[str]: + # shortcut + if self.formatter is not None: + return [self.formatter(x) for x in self.values] + + return list(self.get_result_as_array()) + + +class IntArrayFormatter(GenericArrayFormatter): + def _format_strings(self) -> List[str]: + formatter = self.formatter or (lambda x: "{x: d}".format(x=x)) + fmt_values = [formatter(x) for x in self.values] + return fmt_values + + +class Datetime64Formatter(GenericArrayFormatter): + def __init__( + self, + values: Union[np.ndarray, "Series", DatetimeIndex, DatetimeArray], + nat_rep: str = "NaT", + date_format: None = None, + **kwargs, + ): + super().__init__(values, **kwargs) + self.nat_rep = nat_rep + self.date_format = date_format + + def _format_strings(self) -> List[str]: + """ we by definition have DO NOT have a TZ """ + + values = self.values + + if not isinstance(values, DatetimeIndex): + values = DatetimeIndex(values) + + if self.formatter is not None and callable(self.formatter): + return [self.formatter(x) for x in values] + + fmt_values = format_array_from_datetime( + values.asi8.ravel(), + format=_get_format_datetime64_from_values(values, self.date_format), + na_rep=self.nat_rep, + ).reshape(values.shape) + return fmt_values.tolist() + + +class ExtensionArrayFormatter(GenericArrayFormatter): + def _format_strings(self) -> List[str]: + values = self.values + if isinstance(values, (ABCIndexClass, ABCSeries)): + values = values._values + + formatter = values._formatter(boxed=True) + + if is_categorical_dtype(values.dtype): + # Categorical is special for now, so that we can preserve tzinfo + array = values._internal_get_values() + else: + array = np.asarray(values) + + fmt_values = format_array( + array, + formatter, + float_format=self.float_format, + na_rep=self.na_rep, + digits=self.digits, + space=self.space, + justify=self.justify, + leading_space=self.leading_space, + ) + return fmt_values + + +def format_percentiles( + percentiles: Union[ + np.ndarray, List[Union[int, float]], List[float], List[Union[str, float]] + ] +) -> List[str]: + """ + Outputs rounded and formatted percentiles. + + Parameters + ---------- + percentiles : list-like, containing floats from interval [0,1] + + Returns + ------- + formatted : list of strings + + Notes + ----- + Rounding precision is chosen so that: (1) if any two elements of + ``percentiles`` differ, they remain different after rounding + (2) no entry is *rounded* to 0% or 100%. + Any non-integer is always rounded to at least 1 decimal place. + + Examples + -------- + Keeps all entries different after rounding: + + >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999]) + ['1.999%', '2.001%', '50%', '66.667%', '99.99%'] + + No element is rounded to 0% or 100% (unless already equal to it). + Duplicates are allowed: + + >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) + ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%'] + """ + + percentiles = np.asarray(percentiles) + + # It checks for np.NaN as well + with np.errstate(invalid="ignore"): + if ( + not is_numeric_dtype(percentiles) + or not np.all(percentiles >= 0) + or not np.all(percentiles <= 1) + ): + raise ValueError("percentiles should all be in the interval [0,1]") + + percentiles = 100 * percentiles + int_idx = np.isclose(percentiles.astype(int), percentiles) + + if np.all(int_idx): + out = percentiles.astype(int).astype(str) + return [i + "%" for i in out] + + unique_pcts = np.unique(percentiles) + to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None + to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None + + # Least precision that keeps percentiles unique after rounding + prec = -np.floor( + np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end))) + ).astype(int) + prec = max(1, prec) + out = np.empty_like(percentiles, dtype=object) + out[int_idx] = percentiles[int_idx].astype(int).astype(str) + out[~int_idx] = percentiles[~int_idx].round(prec).astype(str) + return [i + "%" for i in out] + + +def _is_dates_only( + values: Union[np.ndarray, DatetimeArray, Index, DatetimeIndex] +) -> bool: + # return a boolean if we are only dates (and don't have a timezone) + assert values.ndim == 1 + + values = DatetimeIndex(values) + if values.tz is not None: + return False + + values_int = values.asi8 + consider_values = values_int != iNaT + one_day_nanos = 86400 * 1e9 + even_days = ( + np.logical_and(consider_values, values_int % int(one_day_nanos) != 0).sum() == 0 + ) + if even_days: + return True + return False + + +def _format_datetime64( + x: Union[NaTType, Timestamp], tz: Optional[tzinfo] = None, nat_rep: str = "NaT" +) -> str: + if x is None or (is_scalar(x) and isna(x)): + return nat_rep + + if tz is not None or not isinstance(x, Timestamp): + if getattr(x, "tzinfo", None) is not None: + x = Timestamp(x).tz_convert(tz) + else: + x = Timestamp(x).tz_localize(tz) + + return str(x) + + +def _format_datetime64_dateonly( + x: Union[NaTType, Timestamp], nat_rep: str = "NaT", date_format: None = None +) -> str: + if x is None or (is_scalar(x) and isna(x)): + return nat_rep + + if not isinstance(x, Timestamp): + x = Timestamp(x) + + if date_format: + return x.strftime(date_format) + else: + return x._date_repr + + +def _get_format_datetime64( + is_dates_only: bool, nat_rep: str = "NaT", date_format: None = None +) -> Callable: + + if is_dates_only: + return lambda x, tz=None: _format_datetime64_dateonly( + x, nat_rep=nat_rep, date_format=date_format + ) + else: + return lambda x, tz=None: _format_datetime64(x, tz=tz, nat_rep=nat_rep) + + +def _get_format_datetime64_from_values( + values: Union[np.ndarray, DatetimeArray, DatetimeIndex], date_format: Optional[str] +) -> Optional[str]: + """ given values and a date_format, return a string format """ + + if isinstance(values, np.ndarray) and values.ndim > 1: + # We don't actually care about the order of values, and DatetimeIndex + # only accepts 1D values + values = values.ravel() + + is_dates_only = _is_dates_only(values) + if is_dates_only: + return date_format or "%Y-%m-%d" + return date_format + + +class Datetime64TZFormatter(Datetime64Formatter): + def _format_strings(self) -> List[str]: + """ we by definition have a TZ """ + + values = self.values.astype(object) + is_dates_only = _is_dates_only(values) + formatter = self.formatter or _get_format_datetime64( + is_dates_only, date_format=self.date_format + ) + fmt_values = [formatter(x) for x in values] + + return fmt_values + + +class Timedelta64Formatter(GenericArrayFormatter): + def __init__( + self, + values: Union[np.ndarray, TimedeltaIndex], + nat_rep: str = "NaT", + box: bool = False, + **kwargs, + ): + super().__init__(values, **kwargs) + self.nat_rep = nat_rep + self.box = box + + def _format_strings(self) -> List[str]: + formatter = self.formatter or _get_format_timedelta64( + self.values, nat_rep=self.nat_rep, box=self.box + ) + return [formatter(x) for x in self.values] + + +def _get_format_timedelta64( + values: Union[np.ndarray, TimedeltaIndex, TimedeltaArray], + nat_rep: str = "NaT", + box: bool = False, +) -> Callable: + """ + Return a formatter function for a range of timedeltas. + These will all have the same format argument + + If box, then show the return in quotes + """ + + values_int = values.astype(np.int64) + + consider_values = values_int != iNaT + + one_day_nanos = 86400 * 1e9 + even_days = ( + np.logical_and(consider_values, values_int % one_day_nanos != 0).sum() == 0 + ) + all_sub_day = ( + np.logical_and(consider_values, np.abs(values_int) >= one_day_nanos).sum() == 0 + ) + + if even_days: + format = None + elif all_sub_day: + format = "sub_day" + else: + format = "long" + + def _formatter(x): + if x is None or (is_scalar(x) and isna(x)): + return nat_rep + + if not isinstance(x, Timedelta): + x = Timedelta(x) + result = x._repr_base(format=format) + if box: + result = "'{res}'".format(res=result) + return result + + return _formatter + + +def _make_fixed_width( + strings: List[str], + justify: str = "right", + minimum: Optional[int] = None, + adj: Optional[TextAdjustment] = None, +) -> List[str]: + + if len(strings) == 0 or justify == "all": + return strings + + if adj is None: + adj = _get_adjustment() + + max_len = max(adj.len(x) for x in strings) + + if minimum is not None: + max_len = max(minimum, max_len) + + conf_max = get_option("display.max_colwidth") + if conf_max is not None and max_len > conf_max: + max_len = conf_max + + def just(x): + if conf_max is not None: + if (conf_max > 3) & (adj.len(x) > max_len): + x = x[: max_len - 3] + "..." + return x + + strings = [just(x) for x in strings] + result = adj.justify(strings, max_len, mode=justify) + return result + + +def _trim_zeros_complex(str_complexes: np.ndarray, na_rep: str = "NaN") -> List[str]: + """ + Separates the real and imaginary parts from the complex number, and + executes the _trim_zeros_float method on each of those. + """ + return [ + "".join(_trim_zeros_float(re.split(r"([j+-])", x), na_rep)) + for x in str_complexes + ] + + +def _trim_zeros_float( + str_floats: Union[np.ndarray, List[str]], na_rep: str = "NaN" +) -> List[str]: + """ + Trims zeros, leaving just one before the decimal points if need be. + """ + trimmed = str_floats + + def _is_number(x): + return x != na_rep and not x.endswith("inf") + + def _cond(values): + finite = [x for x in values if _is_number(x)] + return ( + len(finite) > 0 + and all(x.endswith("0") for x in finite) + and not (any(("e" in x) or ("E" in x) for x in finite)) + ) + + while _cond(trimmed): + trimmed = [x[:-1] if _is_number(x) else x for x in trimmed] + + # leave one 0 after the decimal points if need be. + return [x + "0" if x.endswith(".") and _is_number(x) else x for x in trimmed] + + +def _has_names(index: Index) -> bool: + if isinstance(index, ABCMultiIndex): + return com.any_not_none(*index.names) + else: + return index.name is not None + + +class EngFormatter: + """ + Formats float values according to engineering format. + + Based on matplotlib.ticker.EngFormatter + """ + + # The SI engineering prefixes + ENG_PREFIXES = { + -24: "y", + -21: "z", + -18: "a", + -15: "f", + -12: "p", + -9: "n", + -6: "u", + -3: "m", + 0: "", + 3: "k", + 6: "M", + 9: "G", + 12: "T", + 15: "P", + 18: "E", + 21: "Z", + 24: "Y", + } + + def __init__(self, accuracy: Optional[int] = None, use_eng_prefix: bool = False): + self.accuracy = accuracy + self.use_eng_prefix = use_eng_prefix + + def __call__(self, num: Union[int, float]) -> str: + """ Formats a number in engineering notation, appending a letter + representing the power of 1000 of the original number. Some examples: + + >>> format_eng(0) # for self.accuracy = 0 + ' 0' + + >>> format_eng(1000000) # for self.accuracy = 1, + # self.use_eng_prefix = True + ' 1.0M' + + >>> format_eng("-1e-6") # for self.accuracy = 2 + # self.use_eng_prefix = False + '-1.00E-06' + + @param num: the value to represent + @type num: either a numeric value or a string that can be converted to + a numeric value (as per decimal.Decimal constructor) + + @return: engineering formatted string + """ + dnum = decimal.Decimal(str(num)) + + if decimal.Decimal.is_nan(dnum): + return "NaN" + + if decimal.Decimal.is_infinite(dnum): + return "inf" + + sign = 1 + + if dnum < 0: # pragma: no cover + sign = -1 + dnum = -dnum + + if dnum != 0: + pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3)) + else: + pow10 = decimal.Decimal(0) + + pow10 = pow10.min(max(self.ENG_PREFIXES.keys())) + pow10 = pow10.max(min(self.ENG_PREFIXES.keys())) + int_pow10 = int(pow10) + + if self.use_eng_prefix: + prefix = self.ENG_PREFIXES[int_pow10] + else: + if int_pow10 < 0: + prefix = "E-{pow10:02d}".format(pow10=-int_pow10) + else: + prefix = "E+{pow10:02d}".format(pow10=int_pow10) + + mant = sign * dnum / (10 ** pow10) + + if self.accuracy is None: # pragma: no cover + format_str = "{mant: g}{prefix}" + else: + format_str = "{{mant: .{acc:d}f}}{{prefix}}".format(acc=self.accuracy) + + formatted = format_str.format(mant=mant, prefix=prefix) + + return formatted + + +def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None: + """ + Alter default behavior on how float is formatted in DataFrame. + Format float in engineering format. By accuracy, we mean the number of + decimal digits after the floating point. + + See also EngFormatter. + """ + + set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix)) + set_option("display.column_space", max(12, accuracy + 9)) + + +def _binify(cols: List[int], line_width: int) -> List[int]: + adjoin_width = 1 + bins = [] + curr_width = 0 + i_last_column = len(cols) - 1 + for i, w in enumerate(cols): + w_adjoined = w + adjoin_width + curr_width += w_adjoined + if i_last_column == i: + wrap = curr_width + 1 > line_width and i > 0 + else: + wrap = curr_width + 2 > line_width and i > 0 + if wrap: + bins.append(i) + curr_width = w_adjoined + + bins.append(len(cols)) + return bins + + +def get_level_lengths( + levels: Any, sentinel: Union[bool, object, str] = "" +) -> List[Dict[int, int]]: + """For each index in each level the function returns lengths of indexes. + + Parameters + ---------- + levels : list of lists + List of values on for level. + sentinel : string, optional + Value which states that no new index starts on there. + + Returns + ------- + Returns list of maps. For each level returns map of indexes (key is index + in row and value is length of index). + """ + if len(levels) == 0: + return [] + + control = [True] * len(levels[0]) + + result = [] + for level in levels: + last_index = 0 + + lengths = {} + for i, key in enumerate(level): + if control[i] and key == sentinel: + pass + else: + control[i] = False + lengths[last_index] = i - last_index + last_index = i + + lengths[last_index] = len(level) - last_index + + result.append(lengths) + + return result + + +def buffer_put_lines(buf: IO[str], lines: List[str]) -> None: + """ + Appends lines to a buffer. + + Parameters + ---------- + buf + The buffer to write to + lines + The lines to append. + """ + if any(isinstance(x, str) for x in lines): + lines = [str(x) for x in lines] + buf.write("\n".join(lines)) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py new file mode 100644 index 00000000..b46b2f6c --- /dev/null +++ b/pandas/io/formats/html.py @@ -0,0 +1,614 @@ +""" +Module for formatting output data in HTML. +""" + +from textwrap import dedent +from typing import IO, Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union, cast + +from pandas._config import get_option + +from pandas._libs import lib + +from pandas.core.dtypes.generic import ABCMultiIndex + +from pandas import option_context + +from pandas.io.common import is_url +from pandas.io.formats.format import ( + DataFrameFormatter, + TableFormatter, + buffer_put_lines, + get_level_lengths, +) +from pandas.io.formats.printing import pprint_thing + + +class HTMLFormatter(TableFormatter): + """ + Internal class for formatting output data in html. + This class is intended for shared functionality between + DataFrame.to_html() and DataFrame._repr_html_(). + Any logic in common with other output formatting methods + should ideally be inherited from classes in format.py + and this class responsible for only producing html markup. + """ + + indent_delta = 2 + + def __init__( + self, + formatter: DataFrameFormatter, + classes: Optional[Union[str, List[str], Tuple[str, ...]]] = None, + border: Optional[int] = None, + ) -> None: + self.fmt = formatter + self.classes = classes + + self.frame = self.fmt.frame + self.columns = self.fmt.tr_frame.columns + self.elements: List[str] = [] + self.bold_rows = self.fmt.bold_rows + self.escape = self.fmt.escape + self.show_dimensions = self.fmt.show_dimensions + if border is None: + border = cast(int, get_option("display.html.border")) + self.border = border + self.table_id = self.fmt.table_id + self.render_links = self.fmt.render_links + if isinstance(self.fmt.col_space, int): + self.fmt.col_space = "{colspace}px".format(colspace=self.fmt.col_space) + + @property + def show_row_idx_names(self) -> bool: + return self.fmt.show_row_idx_names + + @property + def show_col_idx_names(self) -> bool: + return self.fmt.show_col_idx_names + + @property + def row_levels(self) -> int: + if self.fmt.index: + # showing (row) index + return self.frame.index.nlevels + elif self.show_col_idx_names: + # see gh-22579 + # Column misalignment also occurs for + # a standard index when the columns index is named. + # If the row index is not displayed a column of + # blank cells need to be included before the DataFrame values. + return 1 + # not showing (row) index + return 0 + + def _get_columns_formatted_values(self) -> Iterable: + return self.columns + + # https://github.com/python/mypy/issues/1237 + @property + def is_truncated(self) -> bool: # type: ignore + return self.fmt.is_truncated + + @property + def ncols(self) -> int: + return len(self.fmt.tr_frame.columns) + + def write(self, s: Any, indent: int = 0) -> None: + rs = pprint_thing(s) + self.elements.append(" " * indent + rs) + + def write_th( + self, s: Any, header: bool = False, indent: int = 0, tags: Optional[str] = None + ) -> None: + """ + Method for writting a formatted . This will + cause min-width to be set if there is one. + indent : int, default 0 + The indentation level of the cell. + tags : str, default None + Tags to include in the cell. + + Returns + ------- + A written ", indent) + else: + self.write(''.format(align=align), indent) + indent += indent_delta + + for i, s in enumerate(line): + val_tag = tags.get(i, None) + if header or (self.bold_rows and i < nindex_levels): + self.write_th(s, indent=indent, header=header, tags=val_tag) + else: + self.write_td(s, indent, tags=val_tag) + + indent -= indent_delta + self.write("", indent) + + def render(self) -> List[str]: + self._write_table() + + if self.should_show_dimensions: + by = chr(215) # × + self.write( + "

    {rows} rows {by} {cols} columns

    ".format( + rows=len(self.frame), by=by, cols=len(self.frame.columns) + ) + ) + + return self.elements + + def write_result(self, buf: IO[str]) -> None: + buffer_put_lines(buf, self.render()) + + def _write_table(self, indent: int = 0) -> None: + _classes = ["dataframe"] # Default class. + use_mathjax = get_option("display.html.use_mathjax") + if not use_mathjax: + _classes.append("tex2jax_ignore") + if self.classes is not None: + if isinstance(self.classes, str): + self.classes = self.classes.split() + if not isinstance(self.classes, (list, tuple)): + raise TypeError( + "classes must be a string, list, or tuple, " + "not {typ}".format(typ=type(self.classes)) + ) + _classes.extend(self.classes) + + if self.table_id is None: + id_section = "" + else: + id_section = ' id="{table_id}"'.format(table_id=self.table_id) + + self.write( + '
    cell. + + If col_space is set on the formatter then that is used for + the value of min-width. + + Parameters + ---------- + s : object + The data to be written inside the cell. + header : bool, default False + Set to True if the is for use inside
    cell. + """ + if header and self.fmt.col_space is not None: + tags = tags or "" + tags += 'style="min-width: {colspace};"'.format(colspace=self.fmt.col_space) + + self._write_cell(s, kind="th", indent=indent, tags=tags) + + def write_td(self, s: Any, indent: int = 0, tags: Optional[str] = None) -> None: + self._write_cell(s, kind="td", indent=indent, tags=tags) + + def _write_cell( + self, s: Any, kind: str = "td", indent: int = 0, tags: Optional[str] = None + ) -> None: + if tags is not None: + start_tag = "<{kind} {tags}>".format(kind=kind, tags=tags) + else: + start_tag = "<{kind}>".format(kind=kind) + + if self.escape: + # escape & first to prevent double escaping of & + esc = {"&": r"&", "<": r"<", ">": r">"} + else: + esc = {} + + rs = pprint_thing(s, escape_chars=esc).strip() + + if self.render_links and is_url(rs): + rs_unescaped = pprint_thing(s, escape_chars={}).strip() + start_tag += ''.format(url=rs_unescaped) + end_a = "" + else: + end_a = "" + + self.write( + "{start}{rs}{end_a}".format( + start=start_tag, rs=rs, end_a=end_a, kind=kind + ), + indent, + ) + + def write_tr( + self, + line: Iterable, + indent: int = 0, + indent_delta: int = 0, + header: bool = False, + align: Optional[str] = None, + tags: Optional[Dict[int, str]] = None, + nindex_levels: int = 0, + ) -> None: + if tags is None: + tags = {} + + if align is None: + self.write("
    '.format( + border=self.border, cls=" ".join(_classes), id_section=id_section + ), + indent, + ) + + if self.fmt.header or self.show_row_idx_names: + self._write_header(indent + self.indent_delta) + + self._write_body(indent + self.indent_delta) + + self.write("
    ", indent) + + def _write_col_header(self, indent: int) -> None: + truncate_h = self.fmt.truncate_h + if isinstance(self.columns, ABCMultiIndex): + template = 'colspan="{span:d}" halign="left"' + + if self.fmt.sparsify: + # GH3547 + sentinel = lib.no_default + else: + sentinel = False + levels = self.columns.format(sparsify=sentinel, adjoin=False, names=False) + level_lengths = get_level_lengths(levels, sentinel) + inner_lvl = len(level_lengths) - 1 + for lnum, (records, values) in enumerate(zip(level_lengths, levels)): + if truncate_h: + # modify the header lines + ins_col = self.fmt.tr_col_num + if self.fmt.sparsify: + recs_new = {} + # Increment tags after ... col. + for tag, span in list(records.items()): + if tag >= ins_col: + recs_new[tag + 1] = span + elif tag + span > ins_col: + recs_new[tag] = span + 1 + if lnum == inner_lvl: + values = ( + values[:ins_col] + ("...",) + values[ins_col:] + ) + else: + # sparse col headers do not receive a ... + values = ( + values[:ins_col] + + (values[ins_col - 1],) + + values[ins_col:] + ) + else: + recs_new[tag] = span + # if ins_col lies between tags, all col headers + # get ... + if tag + span == ins_col: + recs_new[ins_col] = 1 + values = values[:ins_col] + ("...",) + values[ins_col:] + records = recs_new + inner_lvl = len(level_lengths) - 1 + if lnum == inner_lvl: + records[ins_col] = 1 + else: + recs_new = {} + for tag, span in list(records.items()): + if tag >= ins_col: + recs_new[tag + 1] = span + else: + recs_new[tag] = span + recs_new[ins_col] = 1 + records = recs_new + values = values[:ins_col] + ["..."] + values[ins_col:] + + # see gh-22579 + # Column Offset Bug with to_html(index=False) with + # MultiIndex Columns and Index. + # Initially fill row with blank cells before column names. + # TODO: Refactor to remove code duplication with code + # block below for standard columns index. + row = [""] * (self.row_levels - 1) + if self.fmt.index or self.show_col_idx_names: + # see gh-22747 + # If to_html(index_names=False) do not show columns + # index names. + # TODO: Refactor to use _get_column_name_list from + # DataFrameFormatter class and create a + # _get_formatted_column_labels function for code + # parity with DataFrameFormatter class. + if self.fmt.show_index_names: + name = self.columns.names[lnum] + row.append(pprint_thing(name or "")) + else: + row.append("") + + tags = {} + j = len(row) + for i, v in enumerate(values): + if i in records: + if records[i] > 1: + tags[j] = template.format(span=records[i]) + else: + continue + j += 1 + row.append(v) + self.write_tr(row, indent, self.indent_delta, tags=tags, header=True) + else: + # see gh-22579 + # Column misalignment also occurs for + # a standard index when the columns index is named. + # Initially fill row with blank cells before column names. + # TODO: Refactor to remove code duplication with code block + # above for columns MultiIndex. + row = [""] * (self.row_levels - 1) + if self.fmt.index or self.show_col_idx_names: + # see gh-22747 + # If to_html(index_names=False) do not show columns + # index names. + # TODO: Refactor to use _get_column_name_list from + # DataFrameFormatter class. + if self.fmt.show_index_names: + row.append(self.columns.name or "") + else: + row.append("") + row.extend(self._get_columns_formatted_values()) + align = self.fmt.justify + + if truncate_h: + ins_col = self.row_levels + self.fmt.tr_col_num + row.insert(ins_col, "...") + + self.write_tr(row, indent, self.indent_delta, header=True, align=align) + + def _write_row_header(self, indent: int) -> None: + truncate_h = self.fmt.truncate_h + row = [x if x is not None else "" for x in self.frame.index.names] + [""] * ( + self.ncols + (1 if truncate_h else 0) + ) + self.write_tr(row, indent, self.indent_delta, header=True) + + def _write_header(self, indent: int) -> None: + self.write("", indent) + + if self.fmt.header: + self._write_col_header(indent + self.indent_delta) + + if self.show_row_idx_names: + self._write_row_header(indent + self.indent_delta) + + self.write("", indent) + + def _get_formatted_values(self) -> Dict[int, List[str]]: + with option_context("display.max_colwidth", None): + fmt_values = {i: self.fmt._format_col(i) for i in range(self.ncols)} + return fmt_values + + def _write_body(self, indent: int) -> None: + self.write("", indent) + fmt_values = self._get_formatted_values() + + # write values + if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex): + self._write_hierarchical_rows(fmt_values, indent + self.indent_delta) + else: + self._write_regular_rows(fmt_values, indent + self.indent_delta) + + self.write("", indent) + + def _write_regular_rows( + self, fmt_values: Mapping[int, List[str]], indent: int + ) -> None: + truncate_h = self.fmt.truncate_h + truncate_v = self.fmt.truncate_v + + nrows = len(self.fmt.tr_frame) + + if self.fmt.index: + fmt = self.fmt._get_formatter("__index__") + if fmt is not None: + index_values = self.fmt.tr_frame.index.map(fmt) + else: + index_values = self.fmt.tr_frame.index.format() + + row: List[str] = [] + for i in range(nrows): + + if truncate_v and i == (self.fmt.tr_row_num): + str_sep_row = ["..."] * len(row) + self.write_tr( + str_sep_row, + indent, + self.indent_delta, + tags=None, + nindex_levels=self.row_levels, + ) + + row = [] + if self.fmt.index: + row.append(index_values[i]) + # see gh-22579 + # Column misalignment also occurs for + # a standard index when the columns index is named. + # Add blank cell before data cells. + elif self.show_col_idx_names: + row.append("") + row.extend(fmt_values[j][i] for j in range(self.ncols)) + + if truncate_h: + dot_col_ix = self.fmt.tr_col_num + self.row_levels + row.insert(dot_col_ix, "...") + self.write_tr( + row, indent, self.indent_delta, tags=None, nindex_levels=self.row_levels + ) + + def _write_hierarchical_rows( + self, fmt_values: Mapping[int, List[str]], indent: int + ) -> None: + template = 'rowspan="{span}" valign="top"' + + truncate_h = self.fmt.truncate_h + truncate_v = self.fmt.truncate_v + frame = self.fmt.tr_frame + nrows = len(frame) + + idx_values = frame.index.format(sparsify=False, adjoin=False, names=False) + idx_values = list(zip(*idx_values)) + + if self.fmt.sparsify: + # GH3547 + sentinel = lib.no_default + levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False) + + level_lengths = get_level_lengths(levels, sentinel) + inner_lvl = len(level_lengths) - 1 + if truncate_v: + # Insert ... row and adjust idx_values and + # level_lengths to take this into account. + ins_row = self.fmt.tr_row_num + # cast here since if truncate_v is True, self.fmt.tr_row_num is not None + ins_row = cast(int, ins_row) + inserted = False + for lnum, records in enumerate(level_lengths): + rec_new = {} + for tag, span in list(records.items()): + if tag >= ins_row: + rec_new[tag + 1] = span + elif tag + span > ins_row: + rec_new[tag] = span + 1 + + # GH 14882 - Make sure insertion done once + if not inserted: + dot_row = list(idx_values[ins_row - 1]) + dot_row[-1] = "..." + idx_values.insert(ins_row, tuple(dot_row)) + inserted = True + else: + dot_row = list(idx_values[ins_row]) + dot_row[inner_lvl - lnum] = "..." + idx_values[ins_row] = tuple(dot_row) + else: + rec_new[tag] = span + # If ins_row lies between tags, all cols idx cols + # receive ... + if tag + span == ins_row: + rec_new[ins_row] = 1 + if lnum == 0: + idx_values.insert( + ins_row, tuple(["..."] * len(level_lengths)) + ) + + # GH 14882 - Place ... in correct level + elif inserted: + dot_row = list(idx_values[ins_row]) + dot_row[inner_lvl - lnum] = "..." + idx_values[ins_row] = tuple(dot_row) + level_lengths[lnum] = rec_new + + level_lengths[inner_lvl][ins_row] = 1 + for ix_col in range(len(fmt_values)): + fmt_values[ix_col].insert(ins_row, "...") + nrows += 1 + + for i in range(nrows): + row = [] + tags = {} + + sparse_offset = 0 + j = 0 + for records, v in zip(level_lengths, idx_values[i]): + if i in records: + if records[i] > 1: + tags[j] = template.format(span=records[i]) + else: + sparse_offset += 1 + continue + + j += 1 + row.append(v) + + row.extend(fmt_values[j][i] for j in range(self.ncols)) + if truncate_h: + row.insert( + self.row_levels - sparse_offset + self.fmt.tr_col_num, "..." + ) + self.write_tr( + row, + indent, + self.indent_delta, + tags=tags, + nindex_levels=len(levels) - sparse_offset, + ) + else: + row = [] + for i in range(len(frame)): + if truncate_v and i == (self.fmt.tr_row_num): + str_sep_row = ["..."] * len(row) + self.write_tr( + str_sep_row, + indent, + self.indent_delta, + tags=None, + nindex_levels=self.row_levels, + ) + + idx_values = list( + zip(*frame.index.format(sparsify=False, adjoin=False, names=False)) + ) + row = [] + row.extend(idx_values[i]) + row.extend(fmt_values[j][i] for j in range(self.ncols)) + if truncate_h: + row.insert(self.row_levels + self.fmt.tr_col_num, "...") + self.write_tr( + row, + indent, + self.indent_delta, + tags=None, + nindex_levels=frame.index.nlevels, + ) + + +class NotebookFormatter(HTMLFormatter): + """ + Internal class for formatting output data in html for display in Jupyter + Notebooks. This class is intended for functionality specific to + DataFrame._repr_html_() and DataFrame.to_html(notebook=True) + """ + + def _get_formatted_values(self) -> Dict[int, List[str]]: + return {i: self.fmt._format_col(i) for i in range(self.ncols)} + + def _get_columns_formatted_values(self) -> List[str]: + return self.columns.format() + + def write_style(self) -> None: + # We use the "scoped" attribute here so that the desired + # style properties for the data frame are not then applied + # throughout the entire notebook. + template_first = """\ + """ + template_select = """\ + .dataframe %s { + %s: %s; + }""" + element_props = [ + ("tbody tr th:only-of-type", "vertical-align", "middle"), + ("tbody tr th", "vertical-align", "top"), + ] + if isinstance(self.columns, ABCMultiIndex): + element_props.append(("thead tr th", "text-align", "left")) + if self.show_row_idx_names: + element_props.append( + ("thead tr:last-of-type th", "text-align", "right") + ) + else: + element_props.append(("thead th", "text-align", "right")) + template_mid = "\n\n".join(map(lambda t: template_select % t, element_props)) + template = dedent("\n".join((template_first, template_mid, template_last))) + self.write(template) + + def render(self) -> List[str]: + self.write("
    ") + self.write_style() + super().render() + self.write("
    ") + return self.elements diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py new file mode 100644 index 00000000..008a9942 --- /dev/null +++ b/pandas/io/formats/latex.py @@ -0,0 +1,377 @@ +""" +Module for formatting output data in Latex. +""" +from typing import IO, List, Optional, Tuple + +import numpy as np + +from pandas.core.dtypes.generic import ABCMultiIndex + +from pandas.io.formats.format import DataFrameFormatter, TableFormatter + + +class LatexFormatter(TableFormatter): + """ + Used to render a DataFrame to a LaTeX tabular/longtable environment output. + + Parameters + ---------- + formatter : `DataFrameFormatter` + column_format : str, default None + The columns format as specified in `LaTeX table format + `__ e.g 'rcl' for 3 columns + longtable : boolean, default False + Use a longtable environment instead of tabular. + + See Also + -------- + HTMLFormatter + """ + + def __init__( + self, + formatter: DataFrameFormatter, + column_format: Optional[str] = None, + longtable: bool = False, + multicolumn: bool = False, + multicolumn_format: Optional[str] = None, + multirow: bool = False, + caption: Optional[str] = None, + label: Optional[str] = None, + ): + self.fmt = formatter + self.frame = self.fmt.frame + self.bold_rows = self.fmt.bold_rows + self.column_format = column_format + self.longtable = longtable + self.multicolumn = multicolumn + self.multicolumn_format = multicolumn_format + self.multirow = multirow + self.caption = caption + self.label = label + self.escape = self.fmt.escape + + def write_result(self, buf: IO[str]) -> None: + """ + Render a DataFrame to a LaTeX tabular, longtable, or table/tabular + environment output. + """ + + # string representation of the columns + if len(self.frame.columns) == 0 or len(self.frame.index) == 0: + info_line = "Empty {name}\nColumns: {col}\nIndex: {idx}".format( + name=type(self.frame).__name__, + col=self.frame.columns, + idx=self.frame.index, + ) + strcols = [[info_line]] + else: + strcols = self.fmt._to_str_columns() + + def get_col_type(dtype): + if issubclass(dtype.type, np.number): + return "r" + else: + return "l" + + # reestablish the MultiIndex that has been joined by _to_str_column + if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex): + out = self.frame.index.format( + adjoin=False, + sparsify=self.fmt.sparsify, + names=self.fmt.has_index_names, + na_rep=self.fmt.na_rep, + ) + + # index.format will sparsify repeated entries with empty strings + # so pad these with some empty space + def pad_empties(x): + for pad in reversed(x): + if pad: + break + return [x[0]] + [i if i else " " * len(pad) for i in x[1:]] + + out = (pad_empties(i) for i in out) + + # Add empty spaces for each column level + clevels = self.frame.columns.nlevels + out = [[" " * len(i[-1])] * clevels + i for i in out] + + # Add the column names to the last index column + cnames = self.frame.columns.names + if any(cnames): + new_names = [i if i else "{}" for i in cnames] + out[self.frame.index.nlevels - 1][:clevels] = new_names + + # Get rid of old multiindex column and add new ones + strcols = out + strcols[1:] + + if self.column_format is None: + dtypes = self.frame.dtypes._values + column_format = "".join(map(get_col_type, dtypes)) + if self.fmt.index: + index_format = "l" * self.frame.index.nlevels + column_format = index_format + column_format + elif not isinstance(self.column_format, str): # pragma: no cover + raise AssertionError( + "column_format must be str or unicode, " + "not {typ}".format(typ=type(column_format)) + ) + else: + column_format = self.column_format + + if self.longtable: + self._write_longtable_begin(buf, column_format) + else: + self._write_tabular_begin(buf, column_format) + + buf.write("\\toprule\n") + + ilevels = self.frame.index.nlevels + clevels = self.frame.columns.nlevels + nlevels = clevels + if self.fmt.has_index_names and self.fmt.show_index_names: + nlevels += 1 + strrows = list(zip(*strcols)) + self.clinebuf: List[List[int]] = [] + + for i, row in enumerate(strrows): + if i == nlevels and self.fmt.header: + buf.write("\\midrule\n") # End of header + if self.longtable: + buf.write("\\endhead\n") + buf.write("\\midrule\n") + buf.write( + "\\multicolumn{{{n}}}{{r}}{{{{Continued on next " + "page}}}} \\\\\n".format(n=len(row)) + ) + buf.write("\\midrule\n") + buf.write("\\endfoot\n\n") + buf.write("\\bottomrule\n") + buf.write("\\endlastfoot\n") + if self.escape: + # escape backslashes first + crow = [ + ( + x.replace("\\", "\\textbackslash ") + .replace("_", "\\_") + .replace("%", "\\%") + .replace("$", "\\$") + .replace("#", "\\#") + .replace("{", "\\{") + .replace("}", "\\}") + .replace("~", "\\textasciitilde ") + .replace("^", "\\textasciicircum ") + .replace("&", "\\&") + if (x and x != "{}") + else "{}" + ) + for x in row + ] + else: + crow = [x if x else "{}" for x in row] + if self.bold_rows and self.fmt.index: + # bold row labels + crow = [ + "\\textbf{{{x}}}".format(x=x) + if j < ilevels and x.strip() not in ["", "{}"] + else x + for j, x in enumerate(crow) + ] + if i < clevels and self.fmt.header and self.multicolumn: + # sum up columns to multicolumns + crow = self._format_multicolumn(crow, ilevels) + if i >= nlevels and self.fmt.index and self.multirow and ilevels > 1: + # sum up rows to multirows + crow = self._format_multirow(crow, ilevels, i, strrows) + buf.write(" & ".join(crow)) + buf.write(" \\\\\n") + if self.multirow and i < len(strrows) - 1: + self._print_cline(buf, i, len(strcols)) + + if self.longtable: + self._write_longtable_end(buf) + else: + self._write_tabular_end(buf) + + def _format_multicolumn(self, row: List[str], ilevels: int) -> List[str]: + r""" + Combine columns belonging to a group to a single multicolumn entry + according to self.multicolumn_format + + e.g.: + a & & & b & c & + will become + \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c} + """ + row2 = list(row[:ilevels]) + ncol = 1 + coltext = "" + + def append_col(): + # write multicolumn if needed + if ncol > 1: + row2.append( + "\\multicolumn{{{ncol:d}}}{{{fmt:s}}}{{{txt:s}}}".format( + ncol=ncol, fmt=self.multicolumn_format, txt=coltext.strip() + ) + ) + # don't modify where not needed + else: + row2.append(coltext) + + for c in row[ilevels:]: + # if next col has text, write the previous + if c.strip(): + if coltext: + append_col() + coltext = c + ncol = 1 + # if not, add it to the previous multicolumn + else: + ncol += 1 + # write last column name + if coltext: + append_col() + return row2 + + def _format_multirow( + self, row: List[str], ilevels: int, i: int, rows: List[Tuple[str, ...]] + ) -> List[str]: + r""" + Check following rows, whether row should be a multirow + + e.g.: becomes: + a & 0 & \multirow{2}{*}{a} & 0 & + & 1 & & 1 & + b & 0 & \cline{1-2} + b & 0 & + """ + for j in range(ilevels): + if row[j].strip(): + nrow = 1 + for r in rows[i + 1 :]: + if not r[j].strip(): + nrow += 1 + else: + break + if nrow > 1: + # overwrite non-multirow entry + row[j] = "\\multirow{{{nrow:d}}}{{*}}{{{row:s}}}".format( + nrow=nrow, row=row[j].strip() + ) + # save when to end the current block with \cline + self.clinebuf.append([i + nrow - 1, j + 1]) + return row + + def _print_cline(self, buf: IO[str], i: int, icol: int) -> None: + """ + Print clines after multirow-blocks are finished. + """ + for cl in self.clinebuf: + if cl[0] == i: + buf.write("\\cline{{{cl:d}-{icol:d}}}\n".format(cl=cl[1], icol=icol)) + # remove entries that have been written to buffer + self.clinebuf = [x for x in self.clinebuf if x[0] != i] + + def _write_tabular_begin(self, buf, column_format: str): + """ + Write the beginning of a tabular environment or + nested table/tabular environments including caption and label. + + Parameters + ---------- + buf : string or file handle + File path or object. If not specified, the result is returned as + a string. + column_format : str + The columns format as specified in `LaTeX table format + `__ e.g 'rcl' + for 3 columns + """ + if self.caption is not None or self.label is not None: + # then write output in a nested table/tabular environment + if self.caption is None: + caption_ = "" + else: + caption_ = "\n\\caption{{{}}}".format(self.caption) + + if self.label is None: + label_ = "" + else: + label_ = "\n\\label{{{}}}".format(self.label) + + buf.write("\\begin{{table}}\n\\centering{}{}\n".format(caption_, label_)) + else: + # then write output only in a tabular environment + pass + + buf.write("\\begin{{tabular}}{{{fmt}}}\n".format(fmt=column_format)) + + def _write_tabular_end(self, buf): + """ + Write the end of a tabular environment or nested table/tabular + environment. + + Parameters + ---------- + buf : string or file handle + File path or object. If not specified, the result is returned as + a string. + + """ + buf.write("\\bottomrule\n") + buf.write("\\end{tabular}\n") + if self.caption is not None or self.label is not None: + buf.write("\\end{table}\n") + else: + pass + + def _write_longtable_begin(self, buf, column_format: str): + """ + Write the beginning of a longtable environment including caption and + label if provided by user. + + Parameters + ---------- + buf : string or file handle + File path or object. If not specified, the result is returned as + a string. + column_format : str + The columns format as specified in `LaTeX table format + `__ e.g 'rcl' + for 3 columns + """ + buf.write("\\begin{{longtable}}{{{fmt}}}\n".format(fmt=column_format)) + + if self.caption is not None or self.label is not None: + if self.caption is None: + pass + else: + buf.write("\\caption{{{}}}".format(self.caption)) + + if self.label is None: + pass + else: + buf.write("\\label{{{}}}".format(self.label)) + + # a double-backslash is required at the end of the line + # as discussed here: + # https://tex.stackexchange.com/questions/219138 + buf.write("\\\\\n") + else: + pass + + @staticmethod + def _write_longtable_end(buf): + """ + Write the end of a longtable environment. + + Parameters + ---------- + buf : string or file handle + File path or object. If not specified, the result is returned as + a string. + + """ + buf.write("\\end{longtable}\n") diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py new file mode 100644 index 00000000..4b5b5e9a --- /dev/null +++ b/pandas/io/formats/printing.py @@ -0,0 +1,530 @@ +""" +Printing tools. +""" + +import sys +from typing import ( + Any, + Callable, + Iterable, + List, + Mapping, + Optional, + Sequence, + Tuple, + Union, +) + +from pandas._config import get_option + +from pandas.core.dtypes.inference import is_sequence + +EscapeChars = Union[Mapping[str, str], Iterable[str]] + + +def adjoin(space: int, *lists: List[str], **kwargs) -> str: + """ + Glues together two sets of strings using the amount of space requested. + The idea is to prettify. + + ---------- + space : int + number of spaces for padding + lists : str + list of str which being joined + strlen : callable + function used to calculate the length of each str. Needed for unicode + handling. + justfunc : callable + function used to justify str. Needed for unicode handling. + """ + strlen = kwargs.pop("strlen", len) + justfunc = kwargs.pop("justfunc", justify) + + out_lines = [] + newLists = [] + lengths = [max(map(strlen, x)) + space for x in lists[:-1]] + # not the last one + lengths.append(max(map(len, lists[-1]))) + maxLen = max(map(len, lists)) + for i, lst in enumerate(lists): + nl = justfunc(lst, lengths[i], mode="left") + nl.extend([" " * lengths[i]] * (maxLen - len(lst))) + newLists.append(nl) + toJoin = zip(*newLists) + for lines in toJoin: + out_lines.append("".join(lines)) + return "\n".join(out_lines) + + +def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> List[str]: + """ + Perform ljust, center, rjust against string or list-like + """ + if mode == "left": + return [x.ljust(max_len) for x in texts] + elif mode == "center": + return [x.center(max_len) for x in texts] + else: + return [x.rjust(max_len) for x in texts] + + +# Unicode consolidation +# --------------------- +# +# pprinting utility functions for generating Unicode text or +# bytes(3.x)/str(2.x) representations of objects. +# Try to use these as much as possible rather then rolling your own. +# +# When to use +# ----------- +# +# 1) If you're writing code internal to pandas (no I/O directly involved), +# use pprint_thing(). +# +# It will always return unicode text which can handled by other +# parts of the package without breakage. +# +# 2) if you need to write something out to file, use +# pprint_thing_encoded(encoding). +# +# If no encoding is specified, it defaults to utf-8. Since encoding pure +# ascii with utf-8 is a no-op you can safely use the default utf-8 if you're +# working with straight ascii. + + +def _pprint_seq( + seq: Sequence, _nest_lvl: int = 0, max_seq_items: Optional[int] = None, **kwds +) -> str: + """ + internal. pprinter for iterables. you should probably use pprint_thing() + rather then calling this directly. + + bounds length of printed sequence, depending on options + """ + if isinstance(seq, set): + fmt = "{{{body}}}" + else: + fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})" + + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) + + s = iter(seq) + # handle sets, no slicing + r = [ + pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) + for i in range(min(nitems, len(seq))) + ] + body = ", ".join(r) + + if nitems < len(seq): + body += ", ..." + elif isinstance(seq, tuple) and len(seq) == 1: + body += "," + + return fmt.format(body=body) + + +def _pprint_dict( + seq: Mapping, _nest_lvl: int = 0, max_seq_items: Optional[int] = None, **kwds +) -> str: + """ + internal. pprinter for iterables. you should probably use pprint_thing() + rather then calling this directly. + """ + fmt = "{{{things}}}" + pairs = [] + + pfmt = "{key}: {val}" + + if max_seq_items is False: + nitems = len(seq) + else: + nitems = max_seq_items or get_option("max_seq_items") or len(seq) + + for k, v in list(seq.items())[:nitems]: + pairs.append( + pfmt.format( + key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), + val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), + ) + ) + + if nitems < len(seq): + return fmt.format(things=", ".join(pairs) + ", ...") + else: + return fmt.format(things=", ".join(pairs)) + + +def pprint_thing( + thing: Any, + _nest_lvl: int = 0, + escape_chars: Optional[EscapeChars] = None, + default_escapes: bool = False, + quote_strings: bool = False, + max_seq_items: Optional[int] = None, +) -> str: + """ + This function is the sanctioned way of converting objects + to a string representation and properly handles nested sequences. + + Parameters + ---------- + thing : anything to be formatted + _nest_lvl : internal use only. pprint_thing() is mutually-recursive + with pprint_sequence, this argument is used to keep track of the + current nesting level, and limit it. + escape_chars : list or dict, optional + Characters to escape. If a dict is passed the values are the + replacements + default_escapes : bool, default False + Whether the input escape characters replaces or adds to the defaults + max_seq_items : int or None, default None + Pass through to other pretty printers to limit sequence printing + + Returns + ------- + str + """ + + def as_escaped_string( + thing: Any, escape_chars: Optional[EscapeChars] = escape_chars + ) -> str: + translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"} + if isinstance(escape_chars, dict): + if default_escapes: + translate.update(escape_chars) + else: + translate = escape_chars + escape_chars = list(escape_chars.keys()) + else: + escape_chars = escape_chars or tuple() + + result = str(thing) + for c in escape_chars: + result = result.replace(c, translate[c]) + return result + + if hasattr(thing, "__next__"): + return str(thing) + elif isinstance(thing, dict) and _nest_lvl < get_option( + "display.pprint_nest_depth" + ): + result = _pprint_dict( + thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items + ) + elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"): + result = _pprint_seq( + thing, + _nest_lvl, + escape_chars=escape_chars, + quote_strings=quote_strings, + max_seq_items=max_seq_items, + ) + elif isinstance(thing, str) and quote_strings: + result = "'{thing}'".format(thing=as_escaped_string(thing)) + else: + result = as_escaped_string(thing) + + return result + + +def pprint_thing_encoded( + object, encoding: str = "utf-8", errors: str = "replace" +) -> bytes: + value = pprint_thing(object) # get unicode representation of object + return value.encode(encoding, errors) + + +def _enable_data_resource_formatter(enable: bool) -> None: + if "IPython" not in sys.modules: + # definitely not in IPython + return + from IPython import get_ipython + + ip = get_ipython() + if ip is None: + # still not in IPython + return + + formatters = ip.display_formatter.formatters + mimetype = "application/vnd.dataresource+json" + + if enable: + if mimetype not in formatters: + # define tableschema formatter + from IPython.core.formatters import BaseFormatter + + class TableSchemaFormatter(BaseFormatter): + print_method = "_repr_data_resource_" + _return_type = (dict,) + + # register it: + formatters[mimetype] = TableSchemaFormatter() + # enable it if it's been disabled: + formatters[mimetype].enabled = True + else: + # unregister tableschema mime-type + if mimetype in formatters: + formatters[mimetype].enabled = False + + +default_pprint = lambda x, max_seq_items=None: pprint_thing( + x, escape_chars=("\t", "\r", "\n"), quote_strings=True, max_seq_items=max_seq_items +) + + +def format_object_summary( + obj, + formatter: Callable, + is_justify: bool = True, + name: Optional[str] = None, + indent_for_name: bool = True, + line_break_each_value: bool = False, +) -> str: + """ + Return the formatted obj as a unicode string + + Parameters + ---------- + obj : object + must be iterable and support __getitem__ + formatter : callable + string formatter for an element + is_justify : boolean + should justify the display + name : name, optional + defaults to the class name of the obj + indent_for_name : bool, default True + Whether subsequent lines should be be indented to + align with the name. + line_break_each_value : bool, default False + If True, inserts a line break for each value of ``obj``. + If False, only break lines when the a line of values gets wider + than the display width. + + .. versionadded:: 0.25.0 + + Returns + ------- + summary string + """ + from pandas.io.formats.console import get_console_size + from pandas.io.formats.format import _get_adjustment + + display_width, _ = get_console_size() + if display_width is None: + display_width = get_option("display.width") or 80 + if name is None: + name = type(obj).__name__ + + if indent_for_name: + name_len = len(name) + space1 = f'\n{(" " * (name_len + 1))}' + space2 = f'\n{(" " * (name_len + 2))}' + else: + space1 = "\n" + space2 = "\n " # space for the opening '[' + + n = len(obj) + if line_break_each_value: + # If we want to vertically align on each value of obj, we need to + # separate values by a line break and indent the values + sep = ",\n " + " " * len(name) + else: + sep = "," + max_seq_items = get_option("display.max_seq_items") or n + + # are we a truncated display + is_truncated = n > max_seq_items + + # adj can optionally handle unicode eastern asian width + adj = _get_adjustment() + + def _extend_line( + s: str, line: str, value: str, display_width: int, next_line_prefix: str + ) -> Tuple[str, str]: + + if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width: + s += line.rstrip() + line = next_line_prefix + line += value + return s, line + + def best_len(values: List[str]) -> int: + if values: + return max(adj.len(x) for x in values) + else: + return 0 + + close = ", " + + if n == 0: + summary = f"[]{close}" + elif n == 1 and not line_break_each_value: + first = formatter(obj[0]) + summary = f"[{first}]{close}" + elif n == 2 and not line_break_each_value: + first = formatter(obj[0]) + last = formatter(obj[-1]) + summary = f"[{first}, {last}]{close}" + else: + + if n > max_seq_items: + n = min(max_seq_items // 2, 10) + head = [formatter(x) for x in obj[:n]] + tail = [formatter(x) for x in obj[-n:]] + else: + head = [] + tail = [formatter(x) for x in obj] + + # adjust all values to max length if needed + if is_justify: + if line_break_each_value: + # Justify each string in the values of head and tail, so the + # strings will right align when head and tail are stacked + # vertically. + head, tail = _justify(head, tail) + elif is_truncated or not ( + len(", ".join(head)) < display_width + and len(", ".join(tail)) < display_width + ): + # Each string in head and tail should align with each other + max_length = max(best_len(head), best_len(tail)) + head = [x.rjust(max_length) for x in head] + tail = [x.rjust(max_length) for x in tail] + # If we are not truncated and we are only a single + # line, then don't justify + + if line_break_each_value: + # Now head and tail are of type List[Tuple[str]]. Below we + # convert them into List[str], so there will be one string per + # value. Also truncate items horizontally if wider than + # max_space + max_space = display_width - len(space2) + value = tail[0] + for max_items in reversed(range(1, len(value) + 1)): + pprinted_seq = _pprint_seq(value, max_seq_items=max_items) + if len(pprinted_seq) < max_space: + break + head = [_pprint_seq(x, max_seq_items=max_items) for x in head] + tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail] + + summary = "" + line = space2 + + for max_items in range(len(head)): + word = head[max_items] + sep + " " + summary, line = _extend_line(summary, line, word, display_width, space2) + + if is_truncated: + # remove trailing space of last line + summary += line.rstrip() + space2 + "..." + line = space2 + + for max_items in range(len(tail) - 1): + word = tail[max_items] + sep + " " + summary, line = _extend_line(summary, line, word, display_width, space2) + + # last value: no sep added + 1 space of width used for trailing ',' + summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2) + summary += line + + # right now close is either '' or ', ' + # Now we want to include the ']', but not the maybe space. + close = "]" + close.rstrip(" ") + summary += close + + if len(summary) > (display_width) or line_break_each_value: + summary += space1 + else: # one row + summary += " " + + # remove initial space + summary = "[" + summary[len(space2) :] + + return summary + + +def _justify( + head: List[Sequence[str]], tail: List[Sequence[str]] +) -> Tuple[List[Tuple[str, ...]], List[Tuple[str, ...]]]: + """ + Justify items in head and tail, so they are right-aligned when stacked. + + Parameters + ---------- + head : list-like of list-likes of strings + tail : list-like of list-likes of strings + + Returns + ------- + tuple of list of tuples of strings + Same as head and tail, but items are right aligned when stacked + vertically. + + Examples + -------- + >>> _justify([['a', 'b']], [['abc', 'abcd']]) + ([(' a', ' b')], [('abc', 'abcd')]) + """ + combined = head + tail + + # For each position for the sequences in ``combined``, + # find the length of the largest string. + max_length = [0] * len(combined[0]) + for inner_seq in combined: + length = [len(item) for item in inner_seq] + max_length = [max(x, y) for x, y in zip(max_length, length)] + + # justify each item in each list-like in head and tail using max_length + head = [ + tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head + ] + tail = [ + tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail + ] + # https://github.com/python/mypy/issues/4975 + # error: Incompatible return value type (got "Tuple[List[Sequence[str]], + # List[Sequence[str]]]", expected "Tuple[List[Tuple[str, ...]], + # List[Tuple[str, ...]]]") + return head, tail # type: ignore + + +def format_object_attrs( + obj: Sequence, include_dtype: bool = True +) -> List[Tuple[str, Union[str, int]]]: + """ + Return a list of tuples of the (attr, formatted_value) + for common attrs, including dtype, name, length + + Parameters + ---------- + obj : object + must be iterable + include_dtype : bool + If False, dtype won't be in the returned list + + Returns + ------- + list of 2-tuple + + """ + attrs: List[Tuple[str, Union[str, int]]] = [] + if hasattr(obj, "dtype") and include_dtype: + # error: "Sequence[Any]" has no attribute "dtype" + attrs.append(("dtype", f"'{obj.dtype}'")) # type: ignore + if getattr(obj, "name", None) is not None: + # error: "Sequence[Any]" has no attribute "name" + attrs.append(("name", default_pprint(obj.name))) # type: ignore + # error: "Sequence[Any]" has no attribute "names" + elif getattr(obj, "names", None) is not None and any(obj.names): # type: ignore + # error: "Sequence[Any]" has no attribute "names" + attrs.append(("names", default_pprint(obj.names))) # type: ignore + max_seq_items = get_option("display.max_seq_items") or len(obj) + if len(obj) > max_seq_items: + attrs.append(("length", len(obj))) + return attrs diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py new file mode 100644 index 00000000..85708755 --- /dev/null +++ b/pandas/io/formats/style.py @@ -0,0 +1,1528 @@ +""" +Module for applying conditional formatting to +DataFrames and Series. +""" + +from collections import defaultdict +from contextlib import contextmanager +import copy +from functools import partial +from itertools import product +from typing import Any, Callable, DefaultDict, Dict, List, Optional, Sequence, Tuple +from uuid import uuid1 + +import numpy as np + +from pandas._config import get_option + +from pandas._libs import lib +from pandas.compat._optional import import_optional_dependency +from pandas.util._decorators import Appender + +from pandas.core.dtypes.common import is_float + +import pandas as pd +from pandas.api.types import is_dict_like, is_list_like +import pandas.core.common as com +from pandas.core.generic import _shared_docs +from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice + +jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") + + +try: + import matplotlib.pyplot as plt + from matplotlib import colors + + has_mpl = True +except ImportError: + has_mpl = False + no_mpl_message = "{0} requires matplotlib." + + +@contextmanager +def _mpl(func): + if has_mpl: + yield plt, colors + else: + raise ImportError(no_mpl_message.format(func.__name__)) + + +class Styler: + """ + Helps style a DataFrame or Series according to the data with HTML and CSS. + + Parameters + ---------- + data : Series or DataFrame + Data to be styled - either a Series or DataFrame. + precision : int + Precision to round floats to, defaults to pd.options.display.precision. + table_styles : list-like, default None + List of {selector: (attr, value)} dicts; see Notes. + uuid : str, default None + A unique identifier to avoid CSS collisions; generated automatically. + caption : str, default None + Caption to attach to the table. + table_attributes : str, default None + Items that show up in the opening ```` tag + in addition to automatic (by default) id. + cell_ids : bool, default True + If True, each cell will have an ``id`` attribute in their HTML tag. + The ``id`` takes the form ``T__row_col`` + where ```` is the unique identifier, ```` is the row + number and ```` is the column number. + na_rep : str, optional + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied + + .. versionadded:: 1.0.0 + + Attributes + ---------- + env : Jinja2 jinja2.Environment + template : Jinja2 Template + loader : Jinja2 Loader + + See Also + -------- + DataFrame.style : Return a Styler object containing methods for building + a styled HTML representation for the DataFrame. + + Notes + ----- + Most styling will be done by passing style functions into + ``Styler.apply`` or ``Styler.applymap``. Style functions should + return values with strings containing CSS ``'attr: value'`` that will + be applied to the indicated cells. + + If using in the Jupyter notebook, Styler has defined a ``_repr_html_`` + to automatically render itself. Otherwise call Styler.render to get + the generated HTML. + + CSS classes are attached to the generated HTML + + * Index and Column names include ``index_name`` and ``level`` + where `k` is its level in a MultiIndex + * Index label cells include + + * ``row_heading`` + * ``row`` where `n` is the numeric position of the row + * ``level`` where `k` is the level in a MultiIndex + + * Column label cells include + * ``col_heading`` + * ``col`` where `n` is the numeric position of the column + * ``evel`` where `k` is the level in a MultiIndex + + * Blank cells include ``blank`` + * Data cells include ``data`` + """ + + loader = jinja2.PackageLoader("pandas", "io/formats/templates") + env = jinja2.Environment(loader=loader, trim_blocks=True) + template = env.get_template("html.tpl") + + def __init__( + self, + data, + precision=None, + table_styles=None, + uuid=None, + caption=None, + table_attributes=None, + cell_ids=True, + na_rep: Optional[str] = None, + ): + self.ctx: DefaultDict[Tuple[int, int], List[str]] = defaultdict(list) + self._todo: List[Tuple[Callable, Tuple, Dict]] = [] + + if not isinstance(data, (pd.Series, pd.DataFrame)): + raise TypeError("``data`` must be a Series or DataFrame") + if data.ndim == 1: + data = data.to_frame() + if not data.index.is_unique or not data.columns.is_unique: + raise ValueError("style is not supported for non-unique indices.") + + self.data = data + self.index = data.index + self.columns = data.columns + + self.uuid = uuid + self.table_styles = table_styles + self.caption = caption + if precision is None: + precision = get_option("display.precision") + self.precision = precision + self.table_attributes = table_attributes + self.hidden_index = False + self.hidden_columns: Sequence[int] = [] + self.cell_ids = cell_ids + self.na_rep = na_rep + + # display_funcs maps (row, col) -> formatting function + + def default_display_func(x): + if self.na_rep is not None and pd.isna(x): + return self.na_rep + elif is_float(x): + display_format = f"{x:.{self.precision}f}" + return display_format + else: + return x + + self._display_funcs: DefaultDict[ + Tuple[int, int], Callable[[Any], str] + ] = defaultdict(lambda: default_display_func) + + def _repr_html_(self): + """ + Hooks into Jupyter notebook rich display system. + """ + return self.render() + + @Appender( + _shared_docs["to_excel"] + % dict( + axes="index, columns", + klass="Styler", + axes_single_arg="{0 or 'index', 1 or 'columns'}", + optional_by=""" + by : str or list of str + Name or list of names which refer to the axis items.""", + versionadded_to_excel="\n .. versionadded:: 0.20", + ) + ) + def to_excel( + self, + excel_writer, + sheet_name="Sheet1", + na_rep="", + float_format=None, + columns=None, + header=True, + index=True, + index_label=None, + startrow=0, + startcol=0, + engine=None, + merge_cells=True, + encoding=None, + inf_rep="inf", + verbose=True, + freeze_panes=None, + ): + + from pandas.io.formats.excel import ExcelFormatter + + formatter = ExcelFormatter( + self, + na_rep=na_rep, + cols=columns, + header=header, + float_format=float_format, + index=index, + index_label=index_label, + merge_cells=merge_cells, + inf_rep=inf_rep, + ) + formatter.write( + excel_writer, + sheet_name=sheet_name, + startrow=startrow, + startcol=startcol, + freeze_panes=freeze_panes, + engine=engine, + ) + + def _translate(self): + """ + Convert the DataFrame in `self.data` and the attrs from `_build_styles` + into a dictionary of {head, body, uuid, cellstyle}. + """ + table_styles = self.table_styles or [] + caption = self.caption + ctx = self.ctx + precision = self.precision + hidden_index = self.hidden_index + hidden_columns = self.hidden_columns + uuid = self.uuid or str(uuid1()).replace("-", "_") + ROW_HEADING_CLASS = "row_heading" + COL_HEADING_CLASS = "col_heading" + INDEX_NAME_CLASS = "index_name" + + DATA_CLASS = "data" + BLANK_CLASS = "blank" + BLANK_VALUE = "" + + def format_attr(pair): + return f"{pair['key']}={pair['value']}" + + # for sparsifying a MultiIndex + idx_lengths = _get_level_lengths(self.index) + col_lengths = _get_level_lengths(self.columns, hidden_columns) + + cell_context = dict() + + n_rlvls = self.data.index.nlevels + n_clvls = self.data.columns.nlevels + rlabels = self.data.index.tolist() + clabels = self.data.columns.tolist() + + if n_rlvls == 1: + rlabels = [[x] for x in rlabels] + if n_clvls == 1: + clabels = [[x] for x in clabels] + clabels = list(zip(*clabels)) + + cellstyle = [] + head = [] + + for r in range(n_clvls): + # Blank for Index columns... + row_es = [ + { + "type": "th", + "value": BLANK_VALUE, + "display_value": BLANK_VALUE, + "is_visible": not hidden_index, + "class": " ".join([BLANK_CLASS]), + } + ] * (n_rlvls - 1) + + # ... except maybe the last for columns.names + name = self.data.columns.names[r] + cs = [ + BLANK_CLASS if name is None else INDEX_NAME_CLASS, + f"level{r}", + ] + name = BLANK_VALUE if name is None else name + row_es.append( + { + "type": "th", + "value": name, + "display_value": name, + "class": " ".join(cs), + "is_visible": not hidden_index, + } + ) + + if clabels: + for c, value in enumerate(clabels[r]): + cs = [ + COL_HEADING_CLASS, + f"level{r}", + f"col{c}", + ] + cs.extend( + cell_context.get("col_headings", {}).get(r, {}).get(c, []) + ) + es = { + "type": "th", + "value": value, + "display_value": value, + "class": " ".join(cs), + "is_visible": _is_visible(c, r, col_lengths), + } + colspan = col_lengths.get((r, c), 0) + if colspan > 1: + es["attributes"] = [ + format_attr({"key": "colspan", "value": colspan}) + ] + row_es.append(es) + head.append(row_es) + + if ( + self.data.index.names + and com.any_not_none(*self.data.index.names) + and not hidden_index + ): + index_header_row = [] + + for c, name in enumerate(self.data.index.names): + cs = [INDEX_NAME_CLASS, f"level{c}"] + name = "" if name is None else name + index_header_row.append( + {"type": "th", "value": name, "class": " ".join(cs)} + ) + + index_header_row.extend( + [{"type": "th", "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS])}] + * (len(clabels[0]) - len(hidden_columns)) + ) + + head.append(index_header_row) + + body = [] + for r, idx in enumerate(self.data.index): + row_es = [] + for c, value in enumerate(rlabels[r]): + rid = [ + ROW_HEADING_CLASS, + f"level{c}", + f"row{r}", + ] + es = { + "type": "th", + "is_visible": (_is_visible(r, c, idx_lengths) and not hidden_index), + "value": value, + "display_value": value, + "id": "_".join(rid[1:]), + "class": " ".join(rid), + } + rowspan = idx_lengths.get((c, r), 0) + if rowspan > 1: + es["attributes"] = [ + format_attr({"key": "rowspan", "value": rowspan}) + ] + row_es.append(es) + + for c, col in enumerate(self.data.columns): + cs = [DATA_CLASS, f"row{r}", f"col{c}"] + cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) + formatter = self._display_funcs[(r, c)] + value = self.data.iloc[r, c] + row_dict = { + "type": "td", + "value": value, + "class": " ".join(cs), + "display_value": formatter(value), + "is_visible": (c not in hidden_columns), + } + # only add an id if the cell has a style + if self.cell_ids or not (len(ctx[r, c]) == 1 and ctx[r, c][0] == ""): + row_dict["id"] = "_".join(cs[1:]) + row_es.append(row_dict) + props = [] + for x in ctx[r, c]: + # have to handle empty styles like [''] + if x.count(":"): + props.append(x.split(":")) + else: + props.append(["", ""]) + cellstyle.append({"props": props, "selector": f"row{r}_col{c}"}) + body.append(row_es) + + table_attr = self.table_attributes + use_mathjax = get_option("display.html.use_mathjax") + if not use_mathjax: + table_attr = table_attr or "" + if 'class="' in table_attr: + table_attr = table_attr.replace('class="', 'class="tex2jax_ignore ') + else: + table_attr += ' class="tex2jax_ignore"' + + return dict( + head=head, + cellstyle=cellstyle, + body=body, + uuid=uuid, + precision=precision, + table_styles=table_styles, + caption=caption, + table_attributes=table_attr, + ) + + def format(self, formatter, subset=None, na_rep: Optional[str] = None): + """ + Format the text display value of cells. + + Parameters + ---------- + formatter : str, callable, dict or None + If ``formatter`` is None, the default formatter is used + subset : IndexSlice + An argument to ``DataFrame.loc`` that restricts which elements + ``formatter`` is applied to. + na_rep : str, optional + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied + + .. versionadded:: 1.0.0 + + Returns + ------- + self : Styler + + Notes + ----- + + ``formatter`` is either an ``a`` or a dict ``{column name: a}`` where + ``a`` is one of + + - str: this will be wrapped in: ``a.format(x)`` + - callable: called with the value of an individual cell + + The default display value for numeric values is the "general" (``g``) + format with ``pd.options.display.precision`` precision. + + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b']) + >>> df.style.format("{:.2%}") + >>> df['c'] = ['a', 'b', 'c', 'd'] + >>> df.style.format({'c': str.upper}) + """ + if formatter is None: + assert self._display_funcs.default_factory is not None + formatter = self._display_funcs.default_factory() + + if subset is None: + row_locs = range(len(self.data)) + col_locs = range(len(self.data.columns)) + else: + subset = _non_reducing_slice(subset) + if len(subset) == 1: + subset = subset, self.data.columns + + sub_df = self.data.loc[subset] + row_locs = self.data.index.get_indexer_for(sub_df.index) + col_locs = self.data.columns.get_indexer_for(sub_df.columns) + + if is_dict_like(formatter): + for col, col_formatter in formatter.items(): + # formatter must be callable, so '{}' are converted to lambdas + col_formatter = _maybe_wrap_formatter(col_formatter, na_rep) + col_num = self.data.columns.get_indexer_for([col])[0] + + for row_num in row_locs: + self._display_funcs[(row_num, col_num)] = col_formatter + else: + # single scalar to format all cells with + formatter = _maybe_wrap_formatter(formatter, na_rep) + locs = product(*(row_locs, col_locs)) + for i, j in locs: + self._display_funcs[(i, j)] = formatter + return self + + def render(self, **kwargs): + """ + Render the built up styles to HTML. + + Parameters + ---------- + **kwargs + Any additional keyword arguments are passed + through to ``self.template.render``. + This is useful when you need to provide + additional variables for a custom template. + + Returns + ------- + rendered : str + The rendered HTML. + + Notes + ----- + ``Styler`` objects have defined the ``_repr_html_`` method + which automatically calls ``self.render()`` when it's the + last item in a Notebook cell. When calling ``Styler.render()`` + directly, wrap the result in ``IPython.display.HTML`` to view + the rendered HTML in the notebook. + + Pandas uses the following keys in render. Arguments passed + in ``**kwargs`` take precedence, so think carefully if you want + to override them: + + * head + * cellstyle + * body + * uuid + * precision + * table_styles + * caption + * table_attributes + """ + self._compute() + # TODO: namespace all the pandas keys + d = self._translate() + # filter out empty styles, every cell will have a class + # but the list of props may just be [['', '']]. + # so we have the neested anys below + trimmed = [x for x in d["cellstyle"] if any(any(y) for y in x["props"])] + d["cellstyle"] = trimmed + d.update(kwargs) + return self.template.render(**d) + + def _update_ctx(self, attrs): + """ + Update the state of the Styler. + + Collects a mapping of {index_label: [': ']}. + + attrs : Series or DataFrame + should contain strings of ': ;: ' + Whitespace shouldn't matter and the final trailing ';' shouldn't + matter. + """ + for row_label, v in attrs.iterrows(): + for col_label, col in v.items(): + i = self.index.get_indexer([row_label])[0] + j = self.columns.get_indexer([col_label])[0] + for pair in col.rstrip(";").split(";"): + self.ctx[(i, j)].append(pair) + + def _copy(self, deepcopy=False): + styler = Styler( + self.data, + precision=self.precision, + caption=self.caption, + uuid=self.uuid, + table_styles=self.table_styles, + na_rep=self.na_rep, + ) + if deepcopy: + styler.ctx = copy.deepcopy(self.ctx) + styler._todo = copy.deepcopy(self._todo) + else: + styler.ctx = self.ctx + styler._todo = self._todo + return styler + + def __copy__(self): + """ + Deep copy by default. + """ + return self._copy(deepcopy=False) + + def __deepcopy__(self, memo): + return self._copy(deepcopy=True) + + def clear(self): + """ + Reset the styler, removing any previously applied styles. + + Returns None. + """ + self.ctx.clear() + self._todo = [] + + def _compute(self): + """ + Execute the style functions built up in `self._todo`. + + Relies on the conventions that all style functions go through + .apply or .applymap. The append styles to apply as tuples of + + (application method, *args, **kwargs) + """ + r = self + for func, args, kwargs in self._todo: + r = func(self)(*args, **kwargs) + return r + + def _apply(self, func, axis=0, subset=None, **kwargs): + subset = slice(None) if subset is None else subset + subset = _non_reducing_slice(subset) + data = self.data.loc[subset] + if axis is not None: + result = data.apply(func, axis=axis, result_type="expand", **kwargs) + result.columns = data.columns + else: + result = func(data, **kwargs) + if not isinstance(result, pd.DataFrame): + raise TypeError( + f"Function {repr(func)} must return a DataFrame when " + f"passed to `Styler.apply` with axis=None" + ) + if not ( + result.index.equals(data.index) and result.columns.equals(data.columns) + ): + raise ValueError( + f"Result of {repr(func)} must have identical " + f"index and columns as the input" + ) + + result_shape = result.shape + expected_shape = self.data.loc[subset].shape + if result_shape != expected_shape: + raise ValueError( + f"Function {repr(func)} returned the wrong shape.\n" + f"Result has shape: {result.shape}\n" + f"Expected shape: {expected_shape}" + ) + self._update_ctx(result) + return self + + def apply(self, func, axis=0, subset=None, **kwargs): + """ + Apply a function column-wise, row-wise, or table-wise. + + Updates the HTML representation with the result. + + Parameters + ---------- + func : function + ``func`` should take a Series or DataFrame (depending + on ``axis``), and return an object with the same shape. + Must return a DataFrame with identical index and + column labels when ``axis=None``. + axis : {0 or 'index', 1 or 'columns', None}, default 0 + Apply to each column (``axis=0`` or ``'index'``), to each row + (``axis=1`` or ``'columns'``), or to the entire DataFrame at once + with ``axis=None``. + subset : IndexSlice + A valid indexer to limit ``data`` to *before* applying the + function. Consider using a pandas.IndexSlice. + **kwargs : dict + Pass along to ``func``. + + Returns + ------- + self : Styler + + Notes + ----- + The output shape of ``func`` should match the input, i.e. if + ``x`` is the input row, column, or table (depending on ``axis``), + then ``func(x).shape == x.shape`` should be true. + + This is similar to ``DataFrame.apply``, except that ``axis=None`` + applies the function to the entire DataFrame at once, + rather than column-wise or row-wise. + + Examples + -------- + >>> def highlight_max(x): + ... return ['background-color: yellow' if v == x.max() else '' + for v in x] + ... + >>> df = pd.DataFrame(np.random.randn(5, 2)) + >>> df.style.apply(highlight_max) + """ + self._todo.append( + (lambda instance: getattr(instance, "_apply"), (func, axis, subset), kwargs) + ) + return self + + def _applymap(self, func, subset=None, **kwargs): + func = partial(func, **kwargs) # applymap doesn't take kwargs? + if subset is None: + subset = pd.IndexSlice[:] + subset = _non_reducing_slice(subset) + result = self.data.loc[subset].applymap(func) + self._update_ctx(result) + return self + + def applymap(self, func, subset=None, **kwargs): + """ + Apply a function elementwise. + + Updates the HTML representation with the result. + + Parameters + ---------- + func : function + ``func`` should take a scalar and return a scalar. + subset : IndexSlice + A valid indexer to limit ``data`` to *before* applying the + function. Consider using a pandas.IndexSlice. + **kwargs : dict + Pass along to ``func``. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.where + """ + self._todo.append( + (lambda instance: getattr(instance, "_applymap"), (func, subset), kwargs) + ) + return self + + def where(self, cond, value, other=None, subset=None, **kwargs): + """ + Apply a function elementwise. + + Updates the HTML representation with a style which is + selected in accordance with the return value of a function. + + .. versionadded:: 0.21.0 + + Parameters + ---------- + cond : callable + ``cond`` should take a scalar and return a boolean. + value : str + Applied when ``cond`` returns true. + other : str + Applied when ``cond`` returns false. + subset : IndexSlice + A valid indexer to limit ``data`` to *before* applying the + function. Consider using a pandas.IndexSlice. + **kwargs : dict + Pass along to ``cond``. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.applymap + """ + + if other is None: + other = "" + + return self.applymap( + lambda val: value if cond(val) else other, subset=subset, **kwargs + ) + + def set_precision(self, precision): + """ + Set the precision used to render. + + Parameters + ---------- + precision : int + + Returns + ------- + self : Styler + """ + self.precision = precision + return self + + def set_table_attributes(self, attributes): + """ + Set the table attributes. + + These are the items that show up in the opening ``
    `` tag + in addition to to automatic (by default) id. + + Parameters + ---------- + attributes : str + + Returns + ------- + self : Styler + + Examples + -------- + >>> df = pd.DataFrame(np.random.randn(10, 4)) + >>> df.style.set_table_attributes('class="pure-table"') + # ...
    ... + """ + self.table_attributes = attributes + return self + + def export(self): + """ + Export the styles to applied to the current Styler. + + Can be applied to a second style with ``Styler.use``. + + Returns + ------- + styles : list + + See Also + -------- + Styler.use + """ + return self._todo + + def use(self, styles): + """ + Set the styles on the current Styler. + + Possibly uses styles from ``Styler.export``. + + Parameters + ---------- + styles : list + List of style functions. + + Returns + ------- + self : Styler + + See Also + -------- + Styler.export + """ + self._todo.extend(styles) + return self + + def set_uuid(self, uuid): + """ + Set the uuid for a Styler. + + Parameters + ---------- + uuid : str + + Returns + ------- + self : Styler + """ + self.uuid = uuid + return self + + def set_caption(self, caption): + """ + Set the caption on a Styler. + + Parameters + ---------- + caption : str + + Returns + ------- + self : Styler + """ + self.caption = caption + return self + + def set_table_styles(self, table_styles): + """ + Set the table styles on a Styler. + + These are placed in a `` +{%- endblock style %} +{%- block before_table %}{% endblock before_table %} +{%- block table %} +
    +{%- block caption %} +{%- if caption -%} + +{%- endif -%} +{%- endblock caption %} +{%- block thead %} + + {%- block before_head_rows %}{% endblock %} + {%- for r in head %} + {%- block head_tr scoped %} + + {%- for c in r %} + {%- if c.is_visible != False %} + <{{ c.type }} class="{{c.class}}" {{ c.attributes|join(" ") }}>{{c.value}} + {%- endif %} + {%- endfor %} + + {%- endblock head_tr %} + {%- endfor %} + {%- block after_head_rows %}{% endblock %} + +{%- endblock thead %} +{%- block tbody %} + + {% block before_rows %}{% endblock before_rows %} + {% for r in body %} + {% block tr scoped %} + + {% for c in r %} + {% if c.is_visible != False %} + <{{ c.type }} {% if c.id is defined -%} id="T_{{ uuid }}{{ c.id }}" {%- endif %} class="{{ c.class }}" {{ c.attributes|join(" ") }}>{{ c.display_value }} + {% endif %} + {%- endfor %} + + {% endblock tr %} + {%- endfor %} + {%- block after_rows %}{%- endblock after_rows %} + +{%- endblock tbody %} +
    {{caption}}
    +{%- endblock table %} +{%- block after_table %}{% endblock after_table %} diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py new file mode 100644 index 00000000..69ebc470 --- /dev/null +++ b/pandas/io/gbq.py @@ -0,0 +1,220 @@ +""" Google BigQuery support """ +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union + +from pandas.compat._optional import import_optional_dependency + +if TYPE_CHECKING: + from pandas import DataFrame + + +def _try_import(): + # since pandas is a dependency of pandas-gbq + # we need to import on first use + msg = ( + "pandas-gbq is required to load data from Google BigQuery. " + "See the docs: https://pandas-gbq.readthedocs.io." + ) + pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg) + return pandas_gbq + + +def read_gbq( + query: str, + project_id: Optional[str] = None, + index_col: Optional[str] = None, + col_order: Optional[List[str]] = None, + reauth: bool = False, + auth_local_webserver: bool = False, + dialect: Optional[str] = None, + location: Optional[str] = None, + configuration: Optional[Dict[str, Any]] = None, + credentials=None, + use_bqstorage_api: Optional[bool] = None, + private_key=None, + verbose=None, + progress_bar_type: Optional[str] = None, +) -> "DataFrame": + """ + Load data from Google BigQuery. + + This function requires the `pandas-gbq package + `__. + + See the `How to authenticate with Google BigQuery + `__ + guide for authentication instructions. + + Parameters + ---------- + query : str + SQL-Like Query to return data values. + project_id : str, optional + Google BigQuery Account project ID. Optional when available from + the environment. + index_col : str, optional + Name of result column to use for index in results DataFrame. + col_order : list(str), optional + List of BigQuery column names in the desired order for results + DataFrame. + reauth : bool, default False + Force Google BigQuery to re-authenticate the user. This is useful + if multiple accounts are used. + auth_local_webserver : bool, default False + Use the `local webserver flow`_ instead of the `console flow`_ + when getting user credentials. + + .. _local webserver flow: + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server + .. _console flow: + http://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console + + *New in version 0.2.0 of pandas-gbq*. + dialect : str, default 'legacy' + Note: The default value is changing to 'standard' in a future version. + + SQL syntax dialect to use. Value can be one of: + + ``'legacy'`` + Use BigQuery's legacy SQL dialect. For more information see + `BigQuery Legacy SQL Reference + `__. + ``'standard'`` + Use BigQuery's standard SQL, which is + compliant with the SQL 2011 standard. For more information + see `BigQuery Standard SQL Reference + `__. + + .. versionchanged:: 0.24.0 + location : str, optional + Location where the query job should run. See the `BigQuery locations + documentation + `__ for a + list of available locations. The location must match that of any + datasets used in the query. + + *New in version 0.5.0 of pandas-gbq*. + configuration : dict, optional + Query config parameters for job processing. + For example: + + configuration = {'query': {'useQueryCache': False}} + + For more information see `BigQuery REST API Reference + `__. + credentials : google.auth.credentials.Credentials, optional + Credentials for accessing Google APIs. Use this parameter to override + default credentials, such as to use Compute Engine + :class:`google.auth.compute_engine.Credentials` or Service Account + :class:`google.oauth2.service_account.Credentials` directly. + + *New in version 0.8.0 of pandas-gbq*. + + .. versionadded:: 0.24.0 + use_bqstorage_api : bool, default False + Use the `BigQuery Storage API + `__ to + download query results quickly, but at an increased cost. To use this + API, first `enable it in the Cloud Console + `__. + You must also have the `bigquery.readsessions.create + `__ + permission on the project you are billing queries to. + + This feature requires version 0.10.0 or later of the ``pandas-gbq`` + package. It also requires the ``google-cloud-bigquery-storage`` and + ``fastavro`` packages. + + .. versionadded:: 0.25.0 + progress_bar_type : Optional, str + If set, use the `tqdm `__ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + Note that his feature requires version 0.12.0 or later of the + ``pandas-gbq`` package. And it requires the ``tqdm`` package. Slightly + different than ``pandas-gbq``, here the default is ``None``. + + .. versionadded:: 1.0.0 + + Returns + ------- + df: DataFrame + DataFrame representing results of query. + + See Also + -------- + pandas_gbq.read_gbq : This function in the pandas-gbq library. + DataFrame.to_gbq : Write a DataFrame to Google BigQuery. + """ + pandas_gbq = _try_import() + + kwargs: Dict[str, Union[str, bool]] = {} + + # START: new kwargs. Don't populate unless explicitly set. + if use_bqstorage_api is not None: + kwargs["use_bqstorage_api"] = use_bqstorage_api + + if progress_bar_type is not None: + kwargs["progress_bar_type"] = progress_bar_type + # END: new kwargs + + return pandas_gbq.read_gbq( + query, + project_id=project_id, + index_col=index_col, + col_order=col_order, + reauth=reauth, + auth_local_webserver=auth_local_webserver, + dialect=dialect, + location=location, + configuration=configuration, + credentials=credentials, + **kwargs, + ) + + +def to_gbq( + dataframe: "DataFrame", + destination_table: str, + project_id: Optional[str] = None, + chunksize: Optional[int] = None, + reauth: bool = False, + if_exists: str = "fail", + auth_local_webserver: bool = False, + table_schema: Optional[List[Dict[str, str]]] = None, + location: Optional[str] = None, + progress_bar: bool = True, + credentials=None, + verbose=None, + private_key=None, +) -> None: + pandas_gbq = _try_import() + pandas_gbq.to_gbq( + dataframe, + destination_table, + project_id=project_id, + chunksize=chunksize, + reauth=reauth, + if_exists=if_exists, + auth_local_webserver=auth_local_webserver, + table_schema=table_schema, + location=location, + progress_bar=progress_bar, + credentials=credentials, + verbose=verbose, + private_key=private_key, + ) diff --git a/pandas/io/gcs.py b/pandas/io/gcs.py new file mode 100644 index 00000000..1f5e0fae --- /dev/null +++ b/pandas/io/gcs.py @@ -0,0 +1,18 @@ +""" GCS support for remote file interactivity """ +from pandas.compat._optional import import_optional_dependency + +gcsfs = import_optional_dependency( + "gcsfs", extra="The gcsfs library is required to handle GCS files" +) + + +def get_filepath_or_buffer( + filepath_or_buffer, encoding=None, compression=None, mode=None +): + + if mode is None: + mode = "rb" + + fs = gcsfs.GCSFileSystem() + filepath_or_buffer = fs.open(filepath_or_buffer, mode) + return filepath_or_buffer, None, compression, True diff --git a/pandas/io/html.py b/pandas/io/html.py new file mode 100644 index 00000000..04f9f317 --- /dev/null +++ b/pandas/io/html.py @@ -0,0 +1,1101 @@ +""" +:mod:`pandas.io.html` is a module containing functionality for dealing with +HTML IO. + +""" + +from collections import abc +import numbers +import os +import re + +from pandas.compat._optional import import_optional_dependency +from pandas.errors import AbstractMethodError, EmptyDataError + +from pandas.core.dtypes.common import is_list_like + +from pandas.core.construction import create_series_with_explicit_dtype + +from pandas.io.common import is_url, urlopen, validate_header_arg +from pandas.io.formats.printing import pprint_thing +from pandas.io.parsers import TextParser + +_IMPORTS = False +_HAS_BS4 = False +_HAS_LXML = False +_HAS_HTML5LIB = False + + +def _importers(): + # import things we need + # but make this done on a first use basis + + global _IMPORTS + if _IMPORTS: + return + + global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB + bs4 = import_optional_dependency("bs4", raise_on_missing=False, on_version="ignore") + _HAS_BS4 = bs4 is not None + + lxml = import_optional_dependency( + "lxml.etree", raise_on_missing=False, on_version="ignore" + ) + _HAS_LXML = lxml is not None + + html5lib = import_optional_dependency( + "html5lib", raise_on_missing=False, on_version="ignore" + ) + _HAS_HTML5LIB = html5lib is not None + + _IMPORTS = True + + +############# +# READ HTML # +############# +_RE_WHITESPACE = re.compile(r"[\r\n]+|\s{2,}") + + +def _remove_whitespace(s: str, regex=_RE_WHITESPACE) -> str: + """ + Replace extra whitespace inside of a string with a single space. + + Parameters + ---------- + s : str or unicode + The string from which to remove extra whitespace. + regex : re.Pattern + The regular expression to use to remove extra whitespace. + + Returns + ------- + subd : str or unicode + `s` with all extra whitespace replaced with a single space. + """ + return regex.sub(" ", s.strip()) + + +def _get_skiprows(skiprows): + """ + Get an iterator given an integer, slice or container. + + Parameters + ---------- + skiprows : int, slice, container + The iterator to use to skip rows; can also be a slice. + + Raises + ------ + TypeError + * If `skiprows` is not a slice, integer, or Container + + Returns + ------- + it : iterable + A proper iterator to use to skip rows of a DataFrame. + """ + if isinstance(skiprows, slice): + start, step = skiprows.start or 0, skiprows.step or 1 + return list(range(start, skiprows.stop, step)) + elif isinstance(skiprows, numbers.Integral) or is_list_like(skiprows): + return skiprows + elif skiprows is None: + return 0 + raise TypeError(f"{type(skiprows).__name__} is not a valid type for skipping rows") + + +def _read(obj): + """ + Try to read from a url, file or string. + + Parameters + ---------- + obj : str, unicode, or file-like + + Returns + ------- + raw_text : str + """ + if is_url(obj): + with urlopen(obj) as url: + text = url.read() + elif hasattr(obj, "read"): + text = obj.read() + elif isinstance(obj, (str, bytes)): + text = obj + try: + if os.path.isfile(text): + with open(text, "rb") as f: + return f.read() + except (TypeError, ValueError): + pass + else: + raise TypeError(f"Cannot read object of type '{type(obj).__name__}'") + return text + + +class _HtmlFrameParser: + """ + Base class for parsers that parse HTML into DataFrames. + + Parameters + ---------- + io : str or file-like + This can be either a string of raw HTML, a valid URL using the HTTP, + FTP, or FILE protocols or a file-like object. + + match : str or regex + The text to match in the document. + + attrs : dict + List of HTML element attributes to match. + + encoding : str + Encoding to be used by parser + + displayed_only : bool + Whether or not items with "display:none" should be ignored + + .. versionadded:: 0.23.0 + + Attributes + ---------- + io : str or file-like + raw HTML, URL, or file-like object + + match : regex + The text to match in the raw HTML + + attrs : dict-like + A dictionary of valid table attributes to use to search for table + elements. + + encoding : str + Encoding to be used by parser + + displayed_only : bool + Whether or not items with "display:none" should be ignored + + .. versionadded:: 0.23.0 + + Notes + ----- + To subclass this class effectively you must override the following methods: + * :func:`_build_doc` + * :func:`_attr_getter` + * :func:`_text_getter` + * :func:`_parse_td` + * :func:`_parse_thead_tr` + * :func:`_parse_tbody_tr` + * :func:`_parse_tfoot_tr` + * :func:`_parse_tables` + * :func:`_equals_tag` + See each method's respective documentation for details on their + functionality. + """ + + def __init__(self, io, match, attrs, encoding, displayed_only): + self.io = io + self.match = match + self.attrs = attrs + self.encoding = encoding + self.displayed_only = displayed_only + + def parse_tables(self): + """ + Parse and return all tables from the DOM. + + Returns + ------- + list of parsed (header, body, footer) tuples from tables. + """ + tables = self._parse_tables(self._build_doc(), self.match, self.attrs) + return (self._parse_thead_tbody_tfoot(table) for table in tables) + + def _attr_getter(self, obj, attr): + """ + Return the attribute value of an individual DOM node. + + Parameters + ---------- + obj : node-like + A DOM node. + + attr : str or unicode + The attribute, such as "colspan" + + Returns + ------- + str or unicode + The attribute value. + """ + # Both lxml and BeautifulSoup have the same implementation: + return obj.get(attr) + + def _text_getter(self, obj): + """ + Return the text of an individual DOM node. + + Parameters + ---------- + obj : node-like + A DOM node. + + Returns + ------- + text : str or unicode + The text from an individual DOM node. + """ + raise AbstractMethodError(self) + + def _parse_td(self, obj): + """ + Return the td elements from a row element. + + Parameters + ---------- + obj : node-like + A DOM node. + + Returns + ------- + list of node-like + These are the elements of each row, i.e., the columns. + """ + raise AbstractMethodError(self) + + def _parse_thead_tr(self, table): + """ + Return the list of thead row elements from the parsed table element. + + Parameters + ---------- + table : a table element that contains zero or more thead elements. + + Returns + ------- + list of node-like + These are the row elements of a table. + """ + raise AbstractMethodError(self) + + def _parse_tbody_tr(self, table): + """ + Return the list of tbody row elements from the parsed table element. + + HTML5 table bodies consist of either 0 or more elements (which + only contain elements) or 0 or more elements. This method + checks for both structures. + + Parameters + ---------- + table : a table element that contains row elements. + + Returns + ------- + list of node-like + These are the row elements of a table. + """ + raise AbstractMethodError(self) + + def _parse_tfoot_tr(self, table): + """ + Return the list of tfoot row elements from the parsed table element. + + Parameters + ---------- + table : a table element that contains row elements. + + Returns + ------- + list of node-like + These are the row elements of a table. + """ + raise AbstractMethodError(self) + + def _parse_tables(self, doc, match, attrs): + """ + Return all tables from the parsed DOM. + + Parameters + ---------- + doc : the DOM from which to parse the table element. + + match : str or regular expression + The text to search for in the DOM tree. + + attrs : dict + A dictionary of table attributes that can be used to disambiguate + multiple tables on a page. + + Raises + ------ + ValueError : `match` does not match any text in the document. + + Returns + ------- + list of node-like + HTML
    elements to be parsed into raw data. + """ + raise AbstractMethodError(self) + + def _equals_tag(self, obj, tag): + """ + Return whether an individual DOM node matches a tag + + Parameters + ---------- + obj : node-like + A DOM node. + + tag : str + Tag name to be checked for equality. + + Returns + ------- + boolean + Whether `obj`'s tag name is `tag` + """ + raise AbstractMethodError(self) + + def _build_doc(self): + """ + Return a tree-like object that can be used to iterate over the DOM. + + Returns + ------- + node-like + The DOM from which to parse the table element. + """ + raise AbstractMethodError(self) + + def _parse_thead_tbody_tfoot(self, table_html): + """ + Given a table, return parsed header, body, and foot. + + Parameters + ---------- + table_html : node-like + + Returns + ------- + tuple of (header, body, footer), each a list of list-of-text rows. + + Notes + ----- + Header and body are lists-of-lists. Top level list is a list of + rows. Each row is a list of str text. + + Logic: Use , , elements to identify + header, body, and footer, otherwise: + - Put all rows into body + - Move rows from top of body to header only if + all elements inside row are . Move the top all- or + while body_rows and row_is_all_th(body_rows[0]): + header_rows.append(body_rows.pop(0)) + + header = self._expand_colspan_rowspan(header_rows) + body = self._expand_colspan_rowspan(body_rows) + footer = self._expand_colspan_rowspan(footer_rows) + + return header, body, footer + + def _expand_colspan_rowspan(self, rows): + """ + Given a list of s, return a list of text rows. + + Parameters + ---------- + rows : list of node-like + List of s + + Returns + ------- + list of list + Each returned row is a list of str text. + + Notes + ----- + Any cell with ``rowspan`` or ``colspan`` will have its contents copied + to subsequent cells. + """ + + all_texts = [] # list of rows, each a list of str + remainder = [] # list of (index, text, nrows) + + for tr in rows: + texts = [] # the output for this row + next_remainder = [] + + index = 0 + tds = self._parse_td(tr) + for td in tds: + # Append texts from previous rows with rowspan>1 that come + # before this or (see _parse_thead_tr). + return row.xpath("./td|./th") + + def _parse_tables(self, doc, match, kwargs): + pattern = match.pattern + + # 1. check all descendants for the given pattern and only search tables + # 2. go up the tree until we find a table + xpath_expr = f"//table//*[re:test(text(), {repr(pattern)})]/ancestor::table" + + # if any table attributes were given build an xpath expression to + # search for them + if kwargs: + xpath_expr += _build_xpath_expr(kwargs) + + tables = doc.xpath(xpath_expr, namespaces=_re_namespace) + + tables = self._handle_hidden_tables(tables, "attrib") + if self.displayed_only: + for table in tables: + # lxml utilizes XPATH 1.0 which does not have regex + # support. As a result, we find all elements with a style + # attribute and iterate them to check for display:none + for elem in table.xpath(".//*[@style]"): + if "display:none" in elem.attrib.get("style", "").replace(" ", ""): + elem.getparent().remove(elem) + + if not tables: + raise ValueError(f"No tables found matching regex {repr(pattern)}") + return tables + + def _equals_tag(self, obj, tag): + return obj.tag == tag + + def _build_doc(self): + """ + Raises + ------ + ValueError + * If a URL that lxml cannot parse is passed. + + Exception + * Any other ``Exception`` thrown. For example, trying to parse a + URL that is syntactically correct on a machine with no internet + connection will fail. + + See Also + -------- + pandas.io.html._HtmlFrameParser._build_doc + """ + from lxml.html import parse, fromstring, HTMLParser + from lxml.etree import XMLSyntaxError + + parser = HTMLParser(recover=True, encoding=self.encoding) + + try: + if is_url(self.io): + with urlopen(self.io) as f: + r = parse(f, parser=parser) + else: + # try to parse the input in the simplest way + r = parse(self.io, parser=parser) + try: + r = r.getroot() + except AttributeError: + pass + except (UnicodeDecodeError, IOError) as e: + # if the input is a blob of html goop + if not is_url(self.io): + r = fromstring(self.io, parser=parser) + + try: + r = r.getroot() + except AttributeError: + pass + else: + raise e + else: + if not hasattr(r, "text_content"): + raise XMLSyntaxError("no text parsed from document", 0, 0, 0) + return r + + def _parse_thead_tr(self, table): + rows = [] + + for thead in table.xpath(".//thead"): + rows.extend(thead.xpath("./tr")) + + # HACK: lxml does not clean up the clearly-erroneous + # . (Missing ). Add + # the and _pretend_ it's a ; _parse_td() will find its + # children as though it's a . + # + # Better solution would be to use html5lib. + elements_at_root = thead.xpath("./td|./th") + if elements_at_root: + rows.append(thead) + + return rows + + def _parse_tbody_tr(self, table): + from_tbody = table.xpath(".//tbody//tr") + from_root = table.xpath("./tr") + # HTML spec: at most one of these lists has content + return from_tbody + from_root + + def _parse_tfoot_tr(self, table): + return table.xpath(".//tfoot//tr") + + +def _expand_elements(body): + data = [len(elem) for elem in body] + lens = create_series_with_explicit_dtype(data, dtype_if_empty=object) + lens_max = lens.max() + not_max = lens[lens != lens_max] + + empty = [""] + for ind, length in not_max.items(): + body[ind] += empty * (lens_max - length) + + +def _data_to_frame(**kwargs): + head, body, foot = kwargs.pop("data") + header = kwargs.pop("header") + kwargs["skiprows"] = _get_skiprows(kwargs["skiprows"]) + if head: + body = head + body + + # Infer header when there is a or top ") + + result1 = self.read_html(data1)[0] + result2 = self.read_html(data2)[0] + + tm.assert_frame_equal(result1, expected1) + tm.assert_frame_equal(result2, expected2) + + def test_parse_header_of_non_string_column(self): + # GH5048: if header is specified explicitly, an int column should be + # parsed as int while its header is parsed as str + result = self.read_html( + """ +
    + - Move rows from bottom of body to footer only if + all elements inside row are + """ + + header_rows = self._parse_thead_tr(table_html) + body_rows = self._parse_tbody_tr(table_html) + footer_rows = self._parse_tfoot_tr(table_html) + + def row_is_all_th(row): + return all(self._equals_tag(t, "th") for t in self._parse_td(row)) + + if not header_rows: + # The table has no
    rows from + # body_rows to header_rows. (This is a common case because many + # tables in the wild have no
    + while remainder and remainder[0][0] <= index: + prev_i, prev_text, prev_rowspan = remainder.pop(0) + texts.append(prev_text) + if prev_rowspan > 1: + next_remainder.append((prev_i, prev_text, prev_rowspan - 1)) + index += 1 + + # Append the text from this , colspan times + text = _remove_whitespace(self._text_getter(td)) + rowspan = int(self._attr_getter(td, "rowspan") or 1) + colspan = int(self._attr_getter(td, "colspan") or 1) + + for _ in range(colspan): + texts.append(text) + if rowspan > 1: + next_remainder.append((index, text, rowspan - 1)) + index += 1 + + # Append texts from previous rows at the final position + for prev_i, prev_text, prev_rowspan in remainder: + texts.append(prev_text) + if prev_rowspan > 1: + next_remainder.append((prev_i, prev_text, prev_rowspan - 1)) + + all_texts.append(texts) + remainder = next_remainder + + # Append rows that only appear because the previous row had non-1 + # rowspan + while remainder: + next_remainder = [] + texts = [] + for prev_i, prev_text, prev_rowspan in remainder: + texts.append(prev_text) + if prev_rowspan > 1: + next_remainder.append((prev_i, prev_text, prev_rowspan - 1)) + all_texts.append(texts) + remainder = next_remainder + + return all_texts + + def _handle_hidden_tables(self, tbl_list, attr_name): + """ + Return list of tables, potentially removing hidden elements + + Parameters + ---------- + tbl_list : list of node-like + Type of list elements will vary depending upon parser used + attr_name : str + Name of the accessor for retrieving HTML attributes + + Returns + ------- + list of node-like + Return type matches `tbl_list` + """ + if not self.displayed_only: + return tbl_list + + return [ + x + for x in tbl_list + if "display:none" + not in getattr(x, attr_name).get("style", "").replace(" ", "") + ] + + +class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser): + """ + HTML to DataFrame parser that uses BeautifulSoup under the hood. + + See Also + -------- + pandas.io.html._HtmlFrameParser + pandas.io.html._LxmlFrameParser + + Notes + ----- + Documentation strings for this class are in the base class + :class:`pandas.io.html._HtmlFrameParser`. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + from bs4 import SoupStrainer + + self._strainer = SoupStrainer("table") + + def _parse_tables(self, doc, match, attrs): + element_name = self._strainer.name + tables = doc.find_all(element_name, attrs=attrs) + + if not tables: + raise ValueError("No tables found") + + result = [] + unique_tables = set() + tables = self._handle_hidden_tables(tables, "attrs") + + for table in tables: + if self.displayed_only: + for elem in table.find_all(style=re.compile(r"display:\s*none")): + elem.decompose() + + if table not in unique_tables and table.find(text=match) is not None: + result.append(table) + unique_tables.add(table) + + if not result: + raise ValueError(f"No tables found matching pattern {repr(match.pattern)}") + return result + + def _text_getter(self, obj): + return obj.text + + def _equals_tag(self, obj, tag): + return obj.name == tag + + def _parse_td(self, row): + return row.find_all(("td", "th"), recursive=False) + + def _parse_thead_tr(self, table): + return table.select("thead tr") + + def _parse_tbody_tr(self, table): + from_tbody = table.select("tbody tr") + from_root = table.find_all("tr", recursive=False) + # HTML spec: at most one of these lists has content + return from_tbody + from_root + + def _parse_tfoot_tr(self, table): + return table.select("tfoot tr") + + def _setup_build_doc(self): + raw_text = _read(self.io) + if not raw_text: + raise ValueError(f"No text parsed from document: {self.io}") + return raw_text + + def _build_doc(self): + from bs4 import BeautifulSoup + + bdoc = self._setup_build_doc() + if isinstance(bdoc, bytes) and self.encoding is not None: + udoc = bdoc.decode(self.encoding) + from_encoding = None + else: + udoc = bdoc + from_encoding = self.encoding + return BeautifulSoup(udoc, features="html5lib", from_encoding=from_encoding) + + +def _build_xpath_expr(attrs) -> str: + """Build an xpath expression to simulate bs4's ability to pass in kwargs to + search for attributes when using the lxml parser. + + Parameters + ---------- + attrs : dict + A dict of HTML attributes. These are NOT checked for validity. + + Returns + ------- + expr : unicode + An XPath expression that checks for the given HTML attributes. + """ + # give class attribute as class_ because class is a python keyword + if "class_" in attrs: + attrs["class"] = attrs.pop("class_") + + s = " and ".join([f"@{k}={repr(v)}" for k, v in attrs.items()]) + return f"[{s}]" + + +_re_namespace = {"re": "http://exslt.org/regular-expressions"} +_valid_schemes = "http", "file", "ftp" + + +class _LxmlFrameParser(_HtmlFrameParser): + """ + HTML to DataFrame parser that uses lxml under the hood. + + Warning + ------- + This parser can only handle HTTP, FTP, and FILE urls. + + See Also + -------- + _HtmlFrameParser + _BeautifulSoupLxmlFrameParser + + Notes + ----- + Documentation strings for this class are in the base class + :class:`_HtmlFrameParser`. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def _text_getter(self, obj): + return obj.text_content() + + def _parse_td(self, row): + # Look for direct children only: the "row" element here may be a + #
    foobar
    -only rows + if header is None: + if len(head) == 1: + header = 0 + else: + # ignore all-empty-text rows + header = [i for i, row in enumerate(head) if any(text for text in row)] + + if foot: + body += foot + + # fill out elements of body that are "ragged" + _expand_elements(body) + tp = TextParser(body, header=header, **kwargs) + df = tp.read() + return df + + +_valid_parsers = { + "lxml": _LxmlFrameParser, + None: _LxmlFrameParser, + "html5lib": _BeautifulSoupHtml5LibFrameParser, + "bs4": _BeautifulSoupHtml5LibFrameParser, +} + + +def _parser_dispatch(flavor): + """ + Choose the parser based on the input flavor. + + Parameters + ---------- + flavor : str + The type of parser to use. This must be a valid backend. + + Returns + ------- + cls : _HtmlFrameParser subclass + The parser class based on the requested input flavor. + + Raises + ------ + ValueError + * If `flavor` is not a valid backend. + ImportError + * If you do not have the requested `flavor` + """ + valid_parsers = list(_valid_parsers.keys()) + if flavor not in valid_parsers: + raise ValueError( + f"{repr(flavor)} is not a valid flavor, valid flavors are {valid_parsers}" + ) + + if flavor in ("bs4", "html5lib"): + if not _HAS_HTML5LIB: + raise ImportError("html5lib not found, please install it") + if not _HAS_BS4: + raise ImportError("BeautifulSoup4 (bs4) not found, please install it") + # Although we call this above, we want to raise here right before use. + bs4 = import_optional_dependency("bs4") # noqa:F841 + + else: + if not _HAS_LXML: + raise ImportError("lxml not found, please install it") + return _valid_parsers[flavor] + + +def _print_as_set(s) -> str: + arg = ", ".join(pprint_thing(el) for el in s) + return f"{{{arg}}}" + + +def _validate_flavor(flavor): + if flavor is None: + flavor = "lxml", "bs4" + elif isinstance(flavor, str): + flavor = (flavor,) + elif isinstance(flavor, abc.Iterable): + if not all(isinstance(flav, str) for flav in flavor): + raise TypeError( + f"Object of type {repr(type(flavor).__name__)} " + f"is not an iterable of strings" + ) + else: + msg = repr(flavor) if isinstance(flavor, str) else str(flavor) + msg += " is not a valid flavor" + raise ValueError(msg) + + flavor = tuple(flavor) + valid_flavors = set(_valid_parsers) + flavor_set = set(flavor) + + if not flavor_set & valid_flavors: + raise ValueError( + f"{_print_as_set(flavor_set)} is not a valid set of flavors, valid " + f"flavors are {_print_as_set(valid_flavors)}" + ) + return flavor + + +def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs): + flavor = _validate_flavor(flavor) + compiled_match = re.compile(match) # you can pass a compiled regex here + + retained = None + for flav in flavor: + parser = _parser_dispatch(flav) + p = parser(io, compiled_match, attrs, encoding, displayed_only) + + try: + tables = p.parse_tables() + except ValueError as caught: + # if `io` is an io-like object, check if it's seekable + # and try to rewind it before trying the next parser + if hasattr(io, "seekable") and io.seekable(): + io.seek(0) + elif hasattr(io, "seekable") and not io.seekable(): + # if we couldn't rewind it, let the user know + raise ValueError( + f"The flavor {flav} failed to parse your input. " + "Since you passed a non-rewindable file " + "object, we can't rewind it to try " + "another parser. Try read_html() with a " + "different flavor." + ) + + retained = caught + else: + break + else: + raise retained + + ret = [] + for table in tables: + try: + ret.append(_data_to_frame(data=table, **kwargs)) + except EmptyDataError: # empty table + continue + return ret + + +def read_html( + io, + match=".+", + flavor=None, + header=None, + index_col=None, + skiprows=None, + attrs=None, + parse_dates=False, + thousands=",", + encoding=None, + decimal=".", + converters=None, + na_values=None, + keep_default_na=True, + displayed_only=True, +): + r""" + Read HTML tables into a ``list`` of ``DataFrame`` objects. + + Parameters + ---------- + io : str, path object or file-like object + A URL, a file-like object, or a raw string containing HTML. Note that + lxml only accepts the http, ftp and file url protocols. If you have a + URL that starts with ``'https'`` you might try removing the ``'s'``. + + match : str or compiled regular expression, optional + The set of tables containing text matching this regex or string will be + returned. Unless the HTML is extremely simple you will probably need to + pass a non-empty string here. Defaults to '.+' (match any non-empty + string). The default value will return all tables contained on a page. + This value is converted to a regular expression so that there is + consistent behavior between Beautiful Soup and lxml. + + flavor : str or None + The parsing engine to use. 'bs4' and 'html5lib' are synonymous with + each other, they are both there for backwards compatibility. The + default of ``None`` tries to use ``lxml`` to parse and if that fails it + falls back on ``bs4`` + ``html5lib``. + + header : int or list-like or None, optional + The row (or list of rows for a :class:`~pandas.MultiIndex`) to use to + make the columns headers. + + index_col : int or list-like or None, optional + The column (or list of columns) to use to create the index. + + skiprows : int or list-like or slice or None, optional + Number of rows to skip after parsing the column integer. 0-based. If a + sequence of integers or a slice is given, will skip the rows indexed by + that sequence. Note that a single element sequence means 'skip the nth + row' whereas an integer means 'skip n rows'. + + attrs : dict or None, optional + This is a dictionary of attributes that you can pass to use to identify + the table in the HTML. These are not checked for validity before being + passed to lxml or Beautiful Soup. However, these attributes must be + valid HTML table attributes to work correctly. For example, :: + + attrs = {'id': 'table'} + + is a valid attribute dictionary because the 'id' HTML tag attribute is + a valid HTML attribute for *any* HTML tag as per `this document + `__. :: + + attrs = {'asdf': 'table'} + + is *not* a valid attribute dictionary because 'asdf' is not a valid + HTML attribute even if it is a valid XML attribute. Valid HTML 4.01 + table attributes can be found `here + `__. A + working draft of the HTML 5 spec can be found `here + `__. It contains the + latest information on table attributes for the modern web. + + parse_dates : bool, optional + See :func:`~read_csv` for more details. + + thousands : str, optional + Separator to use to parse thousands. Defaults to ``','``. + + encoding : str or None, optional + The encoding used to decode the web page. Defaults to ``None``.``None`` + preserves the previous encoding behavior, which depends on the + underlying parser library (e.g., the parser library will try to use + the encoding provided by the document). + + decimal : str, default '.' + Character to recognize as decimal point (e.g. use ',' for European + data). + + converters : dict, default None + Dict of functions for converting values in certain columns. Keys can + either be integers or column labels, values are functions that take one + input argument, the cell (not column) content, and return the + transformed content. + + na_values : iterable, default None + Custom NA values. + + keep_default_na : bool, default True + If na_values are specified and keep_default_na is False the default NaN + values are overridden, otherwise they're appended to. + + displayed_only : bool, default True + Whether elements with "display: none" should be parsed. + + Returns + ------- + dfs + A list of DataFrames. + + See Also + -------- + read_csv + + Notes + ----- + Before using this function you should read the :ref:`gotchas about the + HTML parsing libraries `. + + Expect to do some cleanup after you call this function. For example, you + might need to manually assign column names if the column names are + converted to NaN when you pass the `header=0` argument. We try to assume as + little as possible about the structure of the table and push the + idiosyncrasies of the HTML contained in the table to the user. + + This function searches for ```` elements and only for ```` + and ```` or ```` argument, it is used to construct + the header, otherwise the function attempts to find the header within + the body (by putting rows with only ``" not in df._repr_html_() + + with option_context("display.max_rows", 12, "display.min_rows", None): + # when set to None, follow value of max_rows + assert "5 5" in repr(df) + assert "" in df._repr_html_() + + with option_context("display.max_rows", 10, "display.min_rows", 12): + # when set value higher as max_rows, use the minimum + assert "5 5" not in repr(df) + assert "" not in df._repr_html_() + + with option_context("display.max_rows", None, "display.min_rows", 12): + # max_rows of None -> never truncate + assert ".." not in repr(df) + assert ".." not in df._repr_html_() + + def test_str_max_colwidth(self): + # GH 7856 + df = pd.DataFrame( + [ + { + "a": "foo", + "b": "bar", + "c": "uncomfortably long line with lots of stuff", + "d": 1, + }, + {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, + ] + ) + df.set_index(["a", "b", "c"]) + assert str(df) == ( + " a b c d\n" + "0 foo bar uncomfortably long line with lots of stuff 1\n" + "1 foo bar stuff 1" + ) + with option_context("max_colwidth", 20): + assert str(df) == ( + " a b c d\n" + "0 foo bar uncomfortably lo... 1\n" + "1 foo bar stuff 1" + ) + + def test_to_string_truncate(self): + # GH 9784 - dont truncate when calling DataFrame.to_string + df = pd.DataFrame( + [ + { + "a": "foo", + "b": "bar", + "c": "let's make this a very VERY long line that is longer " + "than the default 50 character limit", + "d": 1, + }, + {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, + ] + ) + df.set_index(["a", "b", "c"]) + assert df.to_string() == ( + " a b " + " c d\n" + "0 foo bar let's make this a very VERY long line t" + "hat is longer than the default 50 character limit 1\n" + "1 foo bar " + " stuff 1" + ) + with option_context("max_colwidth", 20): + # the display option has no effect on the to_string method + assert df.to_string() == ( + " a b " + " c d\n" + "0 foo bar let's make this a very VERY long line t" + "hat is longer than the default 50 character limit 1\n" + "1 foo bar " + " stuff 1" + ) + assert df.to_string(max_colwidth=20) == ( + " a b c d\n" + "0 foo bar let's make this ... 1\n" + "1 foo bar stuff 1" + ) + + def test_auto_detect(self): + term_width, term_height = get_terminal_size() + fac = 1.05 # Arbitrary large factor to exceed term width + cols = range(int(term_width * fac)) + index = range(10) + df = DataFrame(index=index, columns=cols) + with option_context("mode.sim_interactive", True): + with option_context("max_rows", None): + with option_context("max_columns", None): + # Wrap around with None + assert has_expanded_repr(df) + with option_context("max_rows", 0): + with option_context("max_columns", 0): + # Truncate with auto detection. + assert has_horizontally_truncated_repr(df) + + index = range(int(term_height * fac)) + df = DataFrame(index=index, columns=cols) + with option_context("max_rows", 0): + with option_context("max_columns", None): + # Wrap around with None + assert has_expanded_repr(df) + # Truncate vertically + assert has_vertically_truncated_repr(df) + + with option_context("max_rows", None): + with option_context("max_columns", 0): + assert has_horizontally_truncated_repr(df) + + def test_to_string_repr_unicode(self): + buf = StringIO() + + unicode_values = ["\u03c3"] * 10 + unicode_values = np.array(unicode_values, dtype=object) + df = DataFrame({"unicode": unicode_values}) + df.to_string(col_space=10, buf=buf) + + # it works! + repr(df) + + idx = Index(["abc", "\u03c3a", "aegdvg"]) + ser = Series(np.random.randn(len(idx)), idx) + rs = repr(ser).split("\n") + line_len = len(rs[0]) + for line in rs[1:]: + try: + line = line.decode(get_option("display.encoding")) + except AttributeError: + pass + if not line.startswith("dtype:"): + assert len(line) == line_len + + # it works even if sys.stdin in None + _stdin = sys.stdin + try: + sys.stdin = None + repr(df) + finally: + sys.stdin = _stdin + + def test_to_string_unicode_columns(self, float_frame): + df = DataFrame({"\u03c3": np.arange(10.0)}) + + buf = StringIO() + df.to_string(buf=buf) + buf.getvalue() + + buf = StringIO() + df.info(buf=buf) + buf.getvalue() + + result = float_frame.to_string() + assert isinstance(result, str) + + def test_to_string_utf8_columns(self): + n = "\u05d0".encode("utf-8") + + with option_context("display.max_rows", 1): + df = DataFrame([1, 2], columns=[n]) + repr(df) + + def test_to_string_unicode_two(self): + dm = DataFrame({"c/\u03c3": []}) + buf = StringIO() + dm.to_string(buf) + + def test_to_string_unicode_three(self): + dm = DataFrame(["\xc2"]) + buf = StringIO() + dm.to_string(buf) + + def test_to_string_with_formatters(self): + df = DataFrame( + { + "int": [1, 2, 3], + "float": [1.0, 2.0, 3.0], + "object": [(1, 2), True, False], + }, + columns=["int", "float", "object"], + ) + + formatters = [ + ("int", lambda x: f"0x{x:x}"), + ("float", lambda x: f"[{x: 4.1f}]"), + ("object", lambda x: f"-{x!s}-"), + ] + result = df.to_string(formatters=dict(formatters)) + result2 = df.to_string(formatters=list(zip(*formatters))[1]) + assert result == ( + " int float object\n" + "0 0x1 [ 1.0] -(1, 2)-\n" + "1 0x2 [ 2.0] -True-\n" + "2 0x3 [ 3.0] -False-" + ) + assert result == result2 + + def test_to_string_with_datetime64_monthformatter(self): + months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] + x = DataFrame({"months": months}) + + def format_func(x): + return x.strftime("%Y-%m") + + result = x.to_string(formatters={"months": format_func}) + expected = "months\n0 2016-01\n1 2016-02" + assert result.strip() == expected + + def test_to_string_with_datetime64_hourformatter(self): + + x = DataFrame( + { + "hod": pd.to_datetime( + ["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f" + ) + } + ) + + def format_func(x): + return x.strftime("%H:%M") + + result = x.to_string(formatters={"hod": format_func}) + expected = "hod\n0 10:10\n1 12:12" + assert result.strip() == expected + + def test_to_string_with_formatters_unicode(self): + df = DataFrame({"c/\u03c3": [1, 2, 3]}) + result = df.to_string(formatters={"c/\u03c3": str}) + assert result == " c/\u03c3\n" + "0 1\n1 2\n2 3" + + def test_east_asian_unicode_false(self): + # not aligned properly because of east asian width + + # mid col + df = DataFrame( + {"a": ["あ", "いいい", "う", "ええええええ"], "b": [1, 222, 33333, 4]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\na あ 1\n" + "bb いいい 222\nc う 33333\n" + "ddd ええええええ 4" + ) + assert repr(df) == expected + + # last col + df = DataFrame( + {"a": [1, 222, 33333, 4], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\na 1 あ\n" + "bb 222 いいい\nc 33333 う\n" + "ddd 4 ええええええ" + ) + assert repr(df) == expected + + # all col + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\na あああああ あ\n" + "bb い いいい\nc う う\n" + "ddd えええ ええええええ" + ) + assert repr(df) == expected + + # column name + df = DataFrame( + {"b": ["あ", "いいい", "う", "ええええええ"], "あああああ": [1, 222, 33333, 4]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " b あああああ\na あ 1\n" + "bb いいい 222\nc う 33333\n" + "ddd ええええええ 4" + ) + assert repr(df) == expected + + # index + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=["あああ", "いいいいいい", "うう", "え"], + ) + expected = ( + " a b\nあああ あああああ あ\n" + "いいいいいい い いいい\nうう う う\n" + "え えええ ええええええ" + ) + assert repr(df) == expected + + # index name + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=pd.Index(["あ", "い", "うう", "え"], name="おおおお"), + ) + expected = ( + " a b\n" + "おおおお \n" + "あ あああああ あ\n" + "い い いいい\n" + "うう う う\n" + "え えええ ええええええ" + ) + assert repr(df) == expected + + # all + df = DataFrame( + {"あああ": ["あああ", "い", "う", "えええええ"], "いいいいい": ["あ", "いいい", "う", "ええ"]}, + index=pd.Index(["あ", "いいい", "うう", "え"], name="お"), + ) + expected = ( + " あああ いいいいい\n" + "お \n" + "あ あああ あ\n" + "いいい い いいい\n" + "うう う う\n" + "え えええええ ええ" + ) + assert repr(df) == expected + + # MultiIndex + idx = pd.MultiIndex.from_tuples( + [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")] + ) + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=idx, + ) + expected = ( + " a b\n" + "あ いい あああああ あ\n" + "う え い いいい\n" + "おおお かかかか う う\n" + "き くく えええ ええええええ" + ) + assert repr(df) == expected + + # truncate + with option_context("display.max_rows", 3, "display.max_columns", 3): + df = pd.DataFrame( + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + "c": ["お", "か", "ききき", "くくくくくく"], + "ああああ": ["さ", "し", "す", "せ"], + }, + columns=["a", "b", "c", "ああああ"], + ) + + expected = ( + " a ... ああああ\n0 あああああ ... さ\n" + ".. ... ... ...\n3 えええ ... せ\n" + "\n[4 rows x 4 columns]" + ) + assert repr(df) == expected + + df.index = ["あああ", "いいいい", "う", "aaa"] + expected = ( + " a ... ああああ\nあああ あああああ ... さ\n" + ".. ... ... ...\naaa えええ ... せ\n" + "\n[4 rows x 4 columns]" + ) + assert repr(df) == expected + + def test_east_asian_unicode_true(self): + # Enable Unicode option ----------------------------------------- + with option_context("display.unicode.east_asian_width", True): + + # mid col + df = DataFrame( + {"a": ["あ", "いいい", "う", "ええええええ"], "b": [1, 222, 33333, 4]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\na あ 1\n" + "bb いいい 222\nc う 33333\n" + "ddd ええええええ 4" + ) + assert repr(df) == expected + + # last col + df = DataFrame( + {"a": [1, 222, 33333, 4], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\na 1 あ\n" + "bb 222 いいい\nc 33333 う\n" + "ddd 4 ええええええ" + ) + assert repr(df) == expected + + # all col + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\n" + "a あああああ あ\n" + "bb い いいい\n" + "c う う\n" + "ddd えええ ええええええ" + ) + assert repr(df) == expected + + # column name + df = DataFrame( + {"b": ["あ", "いいい", "う", "ええええええ"], "あああああ": [1, 222, 33333, 4]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " b あああああ\n" + "a あ 1\n" + "bb いいい 222\n" + "c う 33333\n" + "ddd ええええええ 4" + ) + assert repr(df) == expected + + # index + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=["あああ", "いいいいいい", "うう", "え"], + ) + expected = ( + " a b\n" + "あああ あああああ あ\n" + "いいいいいい い いいい\n" + "うう う う\n" + "え えええ ええええええ" + ) + assert repr(df) == expected + + # index name + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=pd.Index(["あ", "い", "うう", "え"], name="おおおお"), + ) + expected = ( + " a b\n" + "おおおお \n" + "あ あああああ あ\n" + "い い いいい\n" + "うう う う\n" + "え えええ ええええええ" + ) + assert repr(df) == expected + + # all + df = DataFrame( + {"あああ": ["あああ", "い", "う", "えええええ"], "いいいいい": ["あ", "いいい", "う", "ええ"]}, + index=pd.Index(["あ", "いいい", "うう", "え"], name="お"), + ) + expected = ( + " あああ いいいいい\n" + "お \n" + "あ あああ あ\n" + "いいい い いいい\n" + "うう う う\n" + "え えええええ ええ" + ) + assert repr(df) == expected + + # MultiIndex + idx = pd.MultiIndex.from_tuples( + [("あ", "いい"), ("う", "え"), ("おおお", "かかかか"), ("き", "くく")] + ) + df = DataFrame( + {"a": ["あああああ", "い", "う", "えええ"], "b": ["あ", "いいい", "う", "ええええええ"]}, + index=idx, + ) + expected = ( + " a b\n" + "あ いい あああああ あ\n" + "う え い いいい\n" + "おおお かかかか う う\n" + "き くく えええ ええええええ" + ) + assert repr(df) == expected + + # truncate + with option_context("display.max_rows", 3, "display.max_columns", 3): + + df = pd.DataFrame( + { + "a": ["あああああ", "い", "う", "えええ"], + "b": ["あ", "いいい", "う", "ええええええ"], + "c": ["お", "か", "ききき", "くくくくくく"], + "ああああ": ["さ", "し", "す", "せ"], + }, + columns=["a", "b", "c", "ああああ"], + ) + + expected = ( + " a ... ああああ\n" + "0 あああああ ... さ\n" + ".. ... ... ...\n" + "3 えええ ... せ\n" + "\n[4 rows x 4 columns]" + ) + assert repr(df) == expected + + df.index = ["あああ", "いいいい", "う", "aaa"] + expected = ( + " a ... ああああ\n" + "あああ あああああ ... さ\n" + "... ... ... ...\n" + "aaa えええ ... せ\n" + "\n[4 rows x 4 columns]" + ) + assert repr(df) == expected + + # ambiguous unicode + df = DataFrame( + {"b": ["あ", "いいい", "¡¡", "ええええええ"], "あああああ": [1, 222, 33333, 4]}, + index=["a", "bb", "c", "¡¡¡"], + ) + expected = ( + " b あああああ\n" + "a あ 1\n" + "bb いいい 222\n" + "c ¡¡ 33333\n" + "¡¡¡ ええええええ 4" + ) + assert repr(df) == expected + + def test_to_string_buffer_all_unicode(self): + buf = StringIO() + + empty = DataFrame({"c/\u03c3": Series(dtype=object)}) + nonempty = DataFrame({"c/\u03c3": Series([1, 2, 3])}) + + print(empty, file=buf) + print(nonempty, file=buf) + + # this should work + buf.getvalue() + + def test_to_string_with_col_space(self): + df = DataFrame(np.random.random(size=(1, 3))) + c10 = len(df.to_string(col_space=10).split("\n")[1]) + c20 = len(df.to_string(col_space=20).split("\n")[1]) + c30 = len(df.to_string(col_space=30).split("\n")[1]) + assert c10 < c20 < c30 + + # GH 8230 + # col_space wasn't being applied with header=False + with_header = df.to_string(col_space=20) + with_header_row1 = with_header.splitlines()[1] + no_header = df.to_string(col_space=20, header=False) + assert len(with_header_row1) == len(no_header) + + def test_to_string_truncate_indices(self): + for index in [ + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeIntIndex, + tm.makeDateIndex, + tm.makePeriodIndex, + ]: + for column in [tm.makeStringIndex]: + for h in [10, 20]: + for w in [10, 20]: + with option_context("display.expand_frame_repr", False): + df = DataFrame(index=index(h), columns=column(w)) + with option_context("display.max_rows", 15): + if h == 20: + assert has_vertically_truncated_repr(df) + else: + assert not has_vertically_truncated_repr(df) + with option_context("display.max_columns", 15): + if w == 20: + assert has_horizontally_truncated_repr(df) + else: + assert not (has_horizontally_truncated_repr(df)) + with option_context( + "display.max_rows", 15, "display.max_columns", 15 + ): + if h == 20 and w == 20: + assert has_doubly_truncated_repr(df) + else: + assert not has_doubly_truncated_repr(df) + + def test_to_string_truncate_multilevel(self): + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + df = DataFrame(index=arrays, columns=arrays) + with option_context("display.max_rows", 7, "display.max_columns", 7): + assert has_doubly_truncated_repr(df) + + def test_truncate_with_different_dtypes(self): + + # 11594, 12045 + # when truncated the dtypes of the splits can differ + + # 11594 + import datetime + + s = Series( + [datetime.datetime(2012, 1, 1)] * 10 + + [datetime.datetime(1012, 1, 2)] + + [datetime.datetime(2012, 1, 3)] * 10 + ) + + with pd.option_context("display.max_rows", 8): + result = str(s) + assert "object" in result + + # 12045 + df = DataFrame({"text": ["some words"] + [None] * 9}) + + with pd.option_context("display.max_rows", 8, "display.max_columns", 3): + result = str(df) + assert "None" in result + assert "NaN" not in result + + def test_truncate_with_different_dtypes_multiindex(self): + # GH#13000 + df = DataFrame({"Vals": range(100)}) + frame = pd.concat([df], keys=["Sweep"], names=["Sweep", "Index"]) + result = repr(frame) + + result2 = repr(frame.iloc[:5]) + assert result.startswith(result2) + + def test_datetimelike_frame(self): + + # GH 12211 + df = DataFrame( + {"date": [pd.Timestamp("20130101").tz_localize("UTC")] + [pd.NaT] * 5} + ) + + with option_context("display.max_rows", 5): + result = str(df) + assert "2013-01-01 00:00:00+00:00" in result + assert "NaT" in result + assert "..." in result + assert "[6 rows x 1 columns]" in result + + dts = [pd.Timestamp("2011-01-01", tz="US/Eastern")] * 5 + [pd.NaT] * 5 + df = pd.DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + with option_context("display.max_rows", 5): + expected = ( + " dt x\n" + "0 2011-01-01 00:00:00-05:00 1\n" + "1 2011-01-01 00:00:00-05:00 2\n" + ".. ... ..\n" + "8 NaT 9\n" + "9 NaT 10\n\n" + "[10 rows x 2 columns]" + ) + assert repr(df) == expected + + dts = [pd.NaT] * 5 + [pd.Timestamp("2011-01-01", tz="US/Eastern")] * 5 + df = pd.DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + with option_context("display.max_rows", 5): + expected = ( + " dt x\n" + "0 NaT 1\n" + "1 NaT 2\n" + ".. ... ..\n" + "8 2011-01-01 00:00:00-05:00 9\n" + "9 2011-01-01 00:00:00-05:00 10\n\n" + "[10 rows x 2 columns]" + ) + assert repr(df) == expected + + dts = [pd.Timestamp("2011-01-01", tz="Asia/Tokyo")] * 5 + [ + pd.Timestamp("2011-01-01", tz="US/Eastern") + ] * 5 + df = pd.DataFrame({"dt": dts, "x": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}) + with option_context("display.max_rows", 5): + expected = ( + " dt x\n" + "0 2011-01-01 00:00:00+09:00 1\n" + "1 2011-01-01 00:00:00+09:00 2\n" + ".. ... ..\n" + "8 2011-01-01 00:00:00-05:00 9\n" + "9 2011-01-01 00:00:00-05:00 10\n\n" + "[10 rows x 2 columns]" + ) + assert repr(df) == expected + + @pytest.mark.parametrize( + "start_date", + [ + "2017-01-01 23:59:59.999999999", + "2017-01-01 23:59:59.99999999", + "2017-01-01 23:59:59.9999999", + "2017-01-01 23:59:59.999999", + "2017-01-01 23:59:59.99999", + "2017-01-01 23:59:59.9999", + ], + ) + def test_datetimeindex_highprecision(self, start_date): + # GH19030 + # Check that high-precision time values for the end of day are + # included in repr for DatetimeIndex + df = DataFrame({"A": date_range(start=start_date, freq="D", periods=5)}) + result = str(df) + assert start_date in result + + dti = date_range(start=start_date, freq="D", periods=5) + df = DataFrame({"A": range(5)}, index=dti) + result = str(df.index) + assert start_date in result + + def test_nonunicode_nonascii_alignment(self): + df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) + rep_str = df.to_string() + lines = rep_str.split("\n") + assert len(lines[1]) == len(lines[2]) + + def test_unicode_problem_decoding_as_ascii(self): + dm = DataFrame({"c/\u03c3": Series({"test": np.nan})}) + str(dm.to_string()) + + def test_string_repr_encoding(self, datapath): + filepath = datapath("io", "parser", "data", "unicode_series.csv") + df = pd.read_csv(filepath, header=None, encoding="latin1") + repr(df) + repr(df[1]) + + def test_repr_corner(self): + # representing infs poses no problems + df = DataFrame({"foo": [-np.inf, np.inf]}) + repr(df) + + def test_frame_info_encoding(self): + index = ["'Til There Was You (1997)", "ldum klaka (Cold Fever) (1994)"] + fmt.set_option("display.max_rows", 1) + df = DataFrame(columns=["a", "b", "c"], index=index) + repr(df) + repr(df.T) + fmt.set_option("display.max_rows", 200) + + def test_wide_repr(self): + with option_context( + "mode.sim_interactive", + True, + "display.show_dimensions", + True, + "display.max_columns", + 20, + ): + max_cols = get_option("display.max_columns") + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) + set_option("display.expand_frame_repr", False) + rep_str = repr(df) + + assert f"10 rows x {max_cols - 1} columns" in rep_str + set_option("display.expand_frame_repr", True) + wide_repr = repr(df) + assert rep_str != wide_repr + + with option_context("display.width", 120): + wider_repr = repr(df) + assert len(wider_repr) < len(wide_repr) + + reset_option("display.expand_frame_repr") + + def test_wide_repr_wide_columns(self): + with option_context("mode.sim_interactive", True, "display.max_columns", 20): + df = DataFrame( + np.random.randn(5, 3), columns=["a" * 90, "b" * 90, "c" * 90] + ) + rep_str = repr(df) + + assert len(rep_str.splitlines()) == 20 + + def test_wide_repr_named(self): + with option_context("mode.sim_interactive", True, "display.max_columns", 20): + max_cols = get_option("display.max_columns") + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) + df.index.name = "DataFrame Index" + set_option("display.expand_frame_repr", False) + + rep_str = repr(df) + set_option("display.expand_frame_repr", True) + wide_repr = repr(df) + assert rep_str != wide_repr + + with option_context("display.width", 150): + wider_repr = repr(df) + assert len(wider_repr) < len(wide_repr) + + for line in wide_repr.splitlines()[1::13]: + assert "DataFrame Index" in line + + reset_option("display.expand_frame_repr") + + def test_wide_repr_multiindex(self): + with option_context("mode.sim_interactive", True, "display.max_columns", 20): + midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10))) + max_cols = get_option("display.max_columns") + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)), index=midx) + df.index.names = ["Level 0", "Level 1"] + set_option("display.expand_frame_repr", False) + rep_str = repr(df) + set_option("display.expand_frame_repr", True) + wide_repr = repr(df) + assert rep_str != wide_repr + + with option_context("display.width", 150): + wider_repr = repr(df) + assert len(wider_repr) < len(wide_repr) + + for line in wide_repr.splitlines()[1::13]: + assert "Level 0 Level 1" in line + + reset_option("display.expand_frame_repr") + + def test_wide_repr_multiindex_cols(self): + with option_context("mode.sim_interactive", True, "display.max_columns", 20): + max_cols = get_option("display.max_columns") + midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10))) + mcols = MultiIndex.from_arrays(tm.rands_array(3, size=(2, max_cols - 1))) + df = DataFrame( + tm.rands_array(25, (10, max_cols - 1)), index=midx, columns=mcols + ) + df.index.names = ["Level 0", "Level 1"] + set_option("display.expand_frame_repr", False) + rep_str = repr(df) + set_option("display.expand_frame_repr", True) + wide_repr = repr(df) + assert rep_str != wide_repr + + with option_context("display.width", 150, "display.max_columns", 20): + wider_repr = repr(df) + assert len(wider_repr) < len(wide_repr) + + reset_option("display.expand_frame_repr") + + def test_wide_repr_unicode(self): + with option_context("mode.sim_interactive", True, "display.max_columns", 20): + max_cols = 20 + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) + set_option("display.expand_frame_repr", False) + rep_str = repr(df) + set_option("display.expand_frame_repr", True) + wide_repr = repr(df) + assert rep_str != wide_repr + + with option_context("display.width", 150): + wider_repr = repr(df) + assert len(wider_repr) < len(wide_repr) + + reset_option("display.expand_frame_repr") + + def test_wide_repr_wide_long_columns(self): + with option_context("mode.sim_interactive", True): + df = DataFrame({"a": ["a" * 30, "b" * 30], "b": ["c" * 70, "d" * 80]}) + + result = repr(df) + assert "ccccc" in result + assert "ddddd" in result + + def test_long_series(self): + n = 1000 + s = Series( + np.random.randint(-50, 50, n), + index=[f"s{x:04d}" for x in range(n)], + dtype="int64", + ) + + import re + + str_rep = str(s) + nmatches = len(re.findall("dtype", str_rep)) + assert nmatches == 1 + + def test_index_with_nan(self): + # GH 2850 + df = DataFrame( + { + "id1": {0: "1a3", 1: "9h4"}, + "id2": {0: np.nan, 1: "d67"}, + "id3": {0: "78d", 1: "79d"}, + "value": {0: 123, 1: 64}, + } + ) + + # multi-index + y = df.set_index(["id1", "id2", "id3"]) + result = y.to_string() + expected = ( + " value\nid1 id2 id3 \n" + "1a3 NaN 78d 123\n9h4 d67 79d 64" + ) + assert result == expected + + # index + y = df.set_index("id2") + result = y.to_string() + expected = ( + " id1 id3 value\nid2 \n" + "NaN 1a3 78d 123\nd67 9h4 79d 64" + ) + assert result == expected + + # with append (this failed in 0.12) + y = df.set_index(["id1", "id2"]).set_index("id3", append=True) + result = y.to_string() + expected = ( + " value\nid1 id2 id3 \n" + "1a3 NaN 78d 123\n9h4 d67 79d 64" + ) + assert result == expected + + # all-nan in mi + df2 = df.copy() + df2.loc[:, "id2"] = np.nan + y = df2.set_index("id2") + result = y.to_string() + expected = ( + " id1 id3 value\nid2 \n" + "NaN 1a3 78d 123\nNaN 9h4 79d 64" + ) + assert result == expected + + # partial nan in mi + df2 = df.copy() + df2.loc[:, "id2"] = np.nan + y = df2.set_index(["id2", "id3"]) + result = y.to_string() + expected = ( + " id1 value\nid2 id3 \n" + "NaN 78d 1a3 123\n 79d 9h4 64" + ) + assert result == expected + + df = DataFrame( + { + "id1": {0: np.nan, 1: "9h4"}, + "id2": {0: np.nan, 1: "d67"}, + "id3": {0: np.nan, 1: "79d"}, + "value": {0: 123, 1: 64}, + } + ) + + y = df.set_index(["id1", "id2", "id3"]) + result = y.to_string() + expected = ( + " value\nid1 id2 id3 \n" + "NaN NaN NaN 123\n9h4 d67 79d 64" + ) + assert result == expected + + def test_to_string(self): + + # big mixed + biggie = DataFrame( + {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, + index=np.arange(200), + ) + + biggie.loc[:20, "A"] = np.nan + biggie.loc[:20, "B"] = np.nan + s = biggie.to_string() + + buf = StringIO() + retval = biggie.to_string(buf=buf) + assert retval is None + assert buf.getvalue() == s + + assert isinstance(s, str) + + # print in right order + result = biggie.to_string( + columns=["B", "A"], col_space=17, float_format="%.5f".__mod__ + ) + lines = result.split("\n") + header = lines[0].strip().split() + joined = "\n".join(re.sub(r"\s+", " ", x).strip() for x in lines[1:]) + recons = read_csv(StringIO(joined), names=header, header=None, sep=" ") + tm.assert_series_equal(recons["B"], biggie["B"]) + assert recons["A"].count() == biggie["A"].count() + assert (np.abs(recons["A"].dropna() - biggie["A"].dropna()) < 0.1).all() + + # expected = ['B', 'A'] + # assert header == expected + + result = biggie.to_string(columns=["A"], col_space=17) + header = result.split("\n")[0].strip().split() + expected = ["A"] + assert header == expected + + biggie.to_string(columns=["B", "A"], formatters={"A": lambda x: f"{x:.1f}"}) + + biggie.to_string(columns=["B", "A"], float_format=str) + biggie.to_string(columns=["B", "A"], col_space=12, float_format=str) + + frame = DataFrame(index=np.arange(200)) + frame.to_string() + + def test_to_string_no_header(self): + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(header=False) + expected = "0 1 4\n1 2 5\n2 3 6" + + assert df_s == expected + + def test_to_string_specified_header(self): + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(header=["X", "Y"]) + expected = " X Y\n0 1 4\n1 2 5\n2 3 6" + + assert df_s == expected + + with pytest.raises(ValueError): + df.to_string(header=["X"]) + + def test_to_string_no_index(self): + # GH 16839, GH 13032 + df = DataFrame({"x": [11, 22], "y": [33, -44], "z": ["AAA", " "]}) + + df_s = df.to_string(index=False) + # Leading space is expected for positive numbers. + expected = " x y z\n 11 33 AAA\n 22 -44 " + assert df_s == expected + + df_s = df[["y", "x", "z"]].to_string(index=False) + expected = " y x z\n 33 11 AAA\n-44 22 " + assert df_s == expected + + def test_to_string_line_width_no_index(self): + # GH 13998, GH 22505 + df = DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, index=False) + expected = " x \\\n 1 \n 2 \n 3 \n\n y \n 4 \n 5 \n 6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, 33], "y": [4, 5, 6]}) + + df_s = df.to_string(line_width=1, index=False) + expected = " x \\\n 11 \n 22 \n 33 \n\n y \n 4 \n 5 \n 6 " + + assert df_s == expected + + df = DataFrame({"x": [11, 22, -33], "y": [4, 5, -6]}) + + df_s = df.to_string(line_width=1, index=False) + expected = " x \\\n 11 \n 22 \n-33 \n\n y \n 4 \n 5 \n-6 " + + assert df_s == expected + + def test_to_string_float_formatting(self): + tm.reset_display_options() + fmt.set_option( + "display.precision", + 5, + "display.column_space", + 12, + "display.notebook_repr_html", + False, + ) + + df = DataFrame( + {"x": [0, 0.25, 3456.000, 12e45, 1.64e6, 1.7e8, 1.253456, np.pi, -1e6]} + ) + + df_s = df.to_string() + + if _three_digit_exp(): + expected = ( + " x\n0 0.00000e+000\n1 2.50000e-001\n" + "2 3.45600e+003\n3 1.20000e+046\n4 1.64000e+006\n" + "5 1.70000e+008\n6 1.25346e+000\n7 3.14159e+000\n" + "8 -1.00000e+006" + ) + else: + expected = ( + " x\n0 0.00000e+00\n1 2.50000e-01\n" + "2 3.45600e+03\n3 1.20000e+46\n4 1.64000e+06\n" + "5 1.70000e+08\n6 1.25346e+00\n7 3.14159e+00\n" + "8 -1.00000e+06" + ) + assert df_s == expected + + df = DataFrame({"x": [3234, 0.253]}) + df_s = df.to_string() + + expected = " x\n0 3234.000\n1 0.253" + assert df_s == expected + + tm.reset_display_options() + assert get_option("display.precision") == 6 + + df = DataFrame({"x": [1e9, 0.2512]}) + df_s = df.to_string() + + if _three_digit_exp(): + expected = " x\n0 1.000000e+009\n1 2.512000e-001" + else: + expected = " x\n0 1.000000e+09\n1 2.512000e-01" + assert df_s == expected + + def test_to_string_float_format_no_fixed_width(self): + + # GH 21625 + df = DataFrame({"x": [0.19999]}) + expected = " x\n0 0.200" + assert df.to_string(float_format="%.3f") == expected + + # GH 22270 + df = DataFrame({"x": [100.0]}) + expected = " x\n0 100" + assert df.to_string(float_format="%.0f") == expected + + def test_to_string_small_float_values(self): + df = DataFrame({"a": [1.5, 1e-17, -5.5e-7]}) + + result = df.to_string() + # sadness per above + if _three_digit_exp(): + expected = ( + " a\n" + "0 1.500000e+000\n" + "1 1.000000e-017\n" + "2 -5.500000e-007" + ) + else: + expected = ( + " a\n" + "0 1.500000e+00\n" + "1 1.000000e-17\n" + "2 -5.500000e-07" + ) + assert result == expected + + # but not all exactly zero + df = df * 0 + result = df.to_string() + expected = " 0\n0 0\n1 0\n2 -0" + + def test_to_string_float_index(self): + index = Index([1.5, 2, 3, 4, 5]) + df = DataFrame(np.arange(5), index=index) + + result = df.to_string() + expected = " 0\n1.5 0\n2.0 1\n3.0 2\n4.0 3\n5.0 4" + assert result == expected + + def test_to_string_complex_float_formatting(self): + # GH #25514, 25745 + with pd.option_context("display.precision", 5): + df = DataFrame( + { + "x": [ + (0.4467846931321966 + 0.0715185102060818j), + (0.2739442392974528 + 0.23515228785438969j), + (0.26974928742135185 + 0.3250604054898979j), + (-1j), + ] + } + ) + result = df.to_string() + expected = ( + " x\n0 0.44678+0.07152j\n" + "1 0.27394+0.23515j\n" + "2 0.26975+0.32506j\n" + "3 -0.00000-1.00000j" + ) + assert result == expected + + def test_to_string_ascii_error(self): + data = [ + ( + "0 ", + " .gitignore ", + " 5 ", + " \xe2\x80\xa2\xe2\x80\xa2\xe2\x80\xa2\xe2\x80\xa2\xe2\x80\xa2", + ) + ] + df = DataFrame(data) + + # it works! + repr(df) + + def test_to_string_int_formatting(self): + df = DataFrame({"x": [-15, 20, 25, -35]}) + assert issubclass(df["x"].dtype.type, np.integer) + + output = df.to_string() + expected = " x\n0 -15\n1 20\n2 25\n3 -35" + assert output == expected + + def test_to_string_index_formatter(self): + df = DataFrame([range(5), range(5, 10), range(10, 15)]) + + rs = df.to_string(formatters={"__index__": lambda x: "abc"[x]}) + + xp = """\ + 0 1 2 3 4 +a 0 1 2 3 4 +b 5 6 7 8 9 +c 10 11 12 13 14\ +""" + + assert rs == xp + + def test_to_string_left_justify_cols(self): + tm.reset_display_options() + df = DataFrame({"x": [3234, 0.253]}) + df_s = df.to_string(justify="left") + expected = " x \n0 3234.000\n1 0.253" + assert df_s == expected + + def test_to_string_format_na(self): + tm.reset_display_options() + df = DataFrame( + { + "A": [np.nan, -1, -2.1234, 3, 4], + "B": [np.nan, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 NaN NaN\n" + "1 -1.0000 foo\n" + "2 -2.1234 foooo\n" + "3 3.0000 fooooo\n" + "4 4.0000 bar" + ) + assert result == expected + + df = DataFrame( + { + "A": [np.nan, -1.0, -2.0, 3.0, 4.0], + "B": [np.nan, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 NaN NaN\n" + "1 -1.0 foo\n" + "2 -2.0 foooo\n" + "3 3.0 fooooo\n" + "4 4.0 bar" + ) + assert result == expected + + def test_to_string_format_inf(self): + # Issue #24861 + tm.reset_display_options() + df = DataFrame( + { + "A": [-np.inf, np.inf, -1, -2.1234, 3, 4], + "B": [-np.inf, np.inf, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 -inf -inf\n" + "1 inf inf\n" + "2 -1.0000 foo\n" + "3 -2.1234 foooo\n" + "4 3.0000 fooooo\n" + "5 4.0000 bar" + ) + assert result == expected + + df = DataFrame( + { + "A": [-np.inf, np.inf, -1.0, -2.0, 3.0, 4.0], + "B": [-np.inf, np.inf, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 -inf -inf\n" + "1 inf inf\n" + "2 -1.0 foo\n" + "3 -2.0 foooo\n" + "4 3.0 fooooo\n" + "5 4.0 bar" + ) + assert result == expected + + def test_to_string_decimal(self): + # Issue #23614 + df = DataFrame({"A": [6.0, 3.1, 2.2]}) + expected = " A\n0 6,0\n1 3,1\n2 2,2" + assert df.to_string(decimal=",") == expected + + def test_to_string_line_width(self): + df = DataFrame(123, index=range(10, 15), columns=range(30)) + s = df.to_string(line_width=80) + assert max(len(l) for l in s.split("\n")) == 80 + + def test_show_dimensions(self): + df = DataFrame(123, index=range(10, 15), columns=range(30)) + + with option_context( + "display.max_rows", + 10, + "display.max_columns", + 40, + "display.width", + 500, + "display.expand_frame_repr", + "info", + "display.show_dimensions", + True, + ): + assert "5 rows" in str(df) + assert "5 rows" in df._repr_html_() + with option_context( + "display.max_rows", + 10, + "display.max_columns", + 40, + "display.width", + 500, + "display.expand_frame_repr", + "info", + "display.show_dimensions", + False, + ): + assert "5 rows" not in str(df) + assert "5 rows" not in df._repr_html_() + with option_context( + "display.max_rows", + 2, + "display.max_columns", + 2, + "display.width", + 500, + "display.expand_frame_repr", + "info", + "display.show_dimensions", + "truncate", + ): + assert "5 rows" in str(df) + assert "5 rows" in df._repr_html_() + with option_context( + "display.max_rows", + 10, + "display.max_columns", + 40, + "display.width", + 500, + "display.expand_frame_repr", + "info", + "display.show_dimensions", + "truncate", + ): + assert "5 rows" not in str(df) + assert "5 rows" not in df._repr_html_() + + def test_repr_html(self, float_frame): + df = float_frame + df._repr_html_() + + fmt.set_option("display.max_rows", 1, "display.max_columns", 1) + df._repr_html_() + + fmt.set_option("display.notebook_repr_html", False) + df._repr_html_() + + tm.reset_display_options() + + df = DataFrame([[1, 2], [3, 4]]) + fmt.set_option("display.show_dimensions", True) + assert "2 rows" in df._repr_html_() + fmt.set_option("display.show_dimensions", False) + assert "2 rows" not in df._repr_html_() + + tm.reset_display_options() + + def test_repr_html_mathjax(self): + df = DataFrame([[1, 2], [3, 4]]) + assert "tex2jax_ignore" not in df._repr_html_() + + with pd.option_context("display.html.use_mathjax", False): + assert "tex2jax_ignore" in df._repr_html_() + + def test_repr_html_wide(self): + max_cols = 20 + df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) + with option_context("display.max_rows", 60, "display.max_columns", 20): + assert "..." not in df._repr_html_() + + wide_df = DataFrame(tm.rands_array(25, size=(10, max_cols + 1))) + with option_context("display.max_rows", 60, "display.max_columns", 20): + assert "..." in wide_df._repr_html_() + + def test_repr_html_wide_multiindex_cols(self): + max_cols = 20 + + mcols = MultiIndex.from_product( + [np.arange(max_cols // 2), ["foo", "bar"]], names=["first", "second"] + ) + df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols) + reg_repr = df._repr_html_() + assert "..." not in reg_repr + + mcols = MultiIndex.from_product( + (np.arange(1 + (max_cols // 2)), ["foo", "bar"]), names=["first", "second"] + ) + df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols) + with option_context("display.max_rows", 60, "display.max_columns", 20): + assert "..." in df._repr_html_() + + def test_repr_html_long(self): + with option_context("display.max_rows", 60): + max_rows = get_option("display.max_rows") + h = max_rows - 1 + df = DataFrame({"A": np.arange(1, 1 + h), "B": np.arange(41, 41 + h)}) + reg_repr = df._repr_html_() + assert ".." not in reg_repr + assert str(41 + max_rows // 2) in reg_repr + + h = max_rows + 1 + df = DataFrame({"A": np.arange(1, 1 + h), "B": np.arange(41, 41 + h)}) + long_repr = df._repr_html_() + assert ".." in long_repr + assert str(41 + max_rows // 2) not in long_repr + assert f"{h} rows " in long_repr + assert "2 columns" in long_repr + + def test_repr_html_float(self): + with option_context("display.max_rows", 60): + + max_rows = get_option("display.max_rows") + h = max_rows - 1 + df = DataFrame( + { + "idx": np.linspace(-10, 10, h), + "A": np.arange(1, 1 + h), + "B": np.arange(41, 41 + h), + } + ).set_index("idx") + reg_repr = df._repr_html_() + assert ".." not in reg_repr + assert f"" in reg_repr + + h = max_rows + 1 + df = DataFrame( + { + "idx": np.linspace(-10, 10, h), + "A": np.arange(1, 1 + h), + "B": np.arange(41, 41 + h), + } + ).set_index("idx") + long_repr = df._repr_html_() + assert ".." in long_repr + assert "" not in long_repr + assert f"{h} rows " in long_repr + assert "2 columns" in long_repr + + def test_repr_html_long_multiindex(self): + max_rows = 60 + max_L1 = max_rows // 2 + + tuples = list(itertools.product(np.arange(max_L1), ["foo", "bar"])) + idx = MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = DataFrame(np.random.randn(max_L1 * 2, 2), index=idx, columns=["A", "B"]) + with option_context("display.max_rows", 60, "display.max_columns", 20): + reg_repr = df._repr_html_() + assert "..." not in reg_repr + + tuples = list(itertools.product(np.arange(max_L1 + 1), ["foo", "bar"])) + idx = MultiIndex.from_tuples(tuples, names=["first", "second"]) + df = DataFrame( + np.random.randn((max_L1 + 1) * 2, 2), index=idx, columns=["A", "B"] + ) + long_repr = df._repr_html_() + assert "..." in long_repr + + def test_repr_html_long_and_wide(self): + max_cols = 20 + max_rows = 60 + + h, w = max_rows - 1, max_cols - 1 + df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) + with option_context("display.max_rows", 60, "display.max_columns", 20): + assert "..." not in df._repr_html_() + + h, w = max_rows + 1, max_cols + 1 + df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) + with option_context("display.max_rows", 60, "display.max_columns", 20): + assert "..." in df._repr_html_() + + def test_info_repr(self): + # GH#21746 For tests inside a terminal (i.e. not CI) we need to detect + # the terminal size to ensure that we try to print something "too big" + term_width, term_height = get_terminal_size() + + max_rows = 60 + max_cols = 20 + (max(term_width, 80) - 80) // 4 + # Long + h, w = max_rows + 1, max_cols - 1 + df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) + assert has_vertically_truncated_repr(df) + with option_context("display.large_repr", "info"): + assert has_info_repr(df) + + # Wide + h, w = max_rows - 1, max_cols + 1 + df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) + assert has_horizontally_truncated_repr(df) + with option_context( + "display.large_repr", "info", "display.max_columns", max_cols + ): + assert has_info_repr(df) + + def test_info_repr_max_cols(self): + # GH #6939 + df = DataFrame(np.random.randn(10, 5)) + with option_context( + "display.large_repr", + "info", + "display.max_columns", + 1, + "display.max_info_columns", + 4, + ): + assert has_non_verbose_info_repr(df) + + with option_context( + "display.large_repr", + "info", + "display.max_columns", + 1, + "display.max_info_columns", + 5, + ): + assert not has_non_verbose_info_repr(df) + + # test verbose overrides + # fmt.set_option('display.max_info_columns', 4) # exceeded + + def test_info_repr_html(self): + max_rows = 60 + max_cols = 20 + # Long + h, w = max_rows + 1, max_cols - 1 + df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) + assert r"<class" not in df._repr_html_() + with option_context("display.large_repr", "info"): + assert r"<class" in df._repr_html_() + + # Wide + h, w = max_rows - 1, max_cols + 1 + df = DataFrame({k: np.arange(1, 1 + h) for k in np.arange(w)}) + assert " never truncate + assert ".." not in repr(s) + + def test_to_string_name(self): + s = Series(range(100), dtype="int64") + s.name = "myser" + res = s.to_string(max_rows=2, name=True) + exp = "0 0\n ..\n99 99\nName: myser" + assert res == exp + res = s.to_string(max_rows=2, name=False) + exp = "0 0\n ..\n99 99" + assert res == exp + + def test_to_string_dtype(self): + s = Series(range(100), dtype="int64") + res = s.to_string(max_rows=2, dtype=True) + exp = "0 0\n ..\n99 99\ndtype: int64" + assert res == exp + res = s.to_string(max_rows=2, dtype=False) + exp = "0 0\n ..\n99 99" + assert res == exp + + def test_to_string_length(self): + s = Series(range(100), dtype="int64") + res = s.to_string(max_rows=2, length=True) + exp = "0 0\n ..\n99 99\nLength: 100" + assert res == exp + + def test_to_string_na_rep(self): + s = pd.Series(index=range(100), dtype=np.float64) + res = s.to_string(na_rep="foo", max_rows=2) + exp = "0 foo\n ..\n99 foo" + assert res == exp + + def test_to_string_float_format(self): + s = pd.Series(range(10), dtype="float64") + res = s.to_string(float_format=lambda x: f"{x:2.1f}", max_rows=2) + exp = "0 0.0\n ..\n9 9.0" + assert res == exp + + def test_to_string_header(self): + s = pd.Series(range(10), dtype="int64") + s.index.name = "foo" + res = s.to_string(header=True, max_rows=2) + exp = "foo\n0 0\n ..\n9 9" + assert res == exp + res = s.to_string(header=False, max_rows=2) + exp = "0 0\n ..\n9 9" + assert res == exp + + def test_to_string_multindex_header(self): + # GH 16718 + df = pd.DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]}).set_index( + ["a", "b"] + ) + res = df.to_string(header=["r1", "r2"]) + exp = " r1 r2\na b \n0 1 2 3" + assert res == exp + + +def _three_digit_exp(): + return f"{1.7e8:.4g}" == "1.7e+008" + + +class TestFloatArrayFormatter: + def test_misc(self): + obj = fmt.FloatArrayFormatter(np.array([], dtype=np.float64)) + result = obj.get_result() + assert len(result) == 0 + + def test_format(self): + obj = fmt.FloatArrayFormatter(np.array([12, 0], dtype=np.float64)) + result = obj.get_result() + assert result[0] == " 12.0" + assert result[1] == " 0.0" + + def test_output_significant_digits(self): + # Issue #9764 + + # In case default display precision changes: + with pd.option_context("display.precision", 6): + # DataFrame example from issue #9764 + d = pd.DataFrame( + { + "col1": [ + 9.999e-8, + 1e-7, + 1.0001e-7, + 2e-7, + 4.999e-7, + 5e-7, + 5.0001e-7, + 6e-7, + 9.999e-7, + 1e-6, + 1.0001e-6, + 2e-6, + 4.999e-6, + 5e-6, + 5.0001e-6, + 6e-6, + ] + } + ) + + expected_output = { + (0, 6): " col1\n" + "0 9.999000e-08\n" + "1 1.000000e-07\n" + "2 1.000100e-07\n" + "3 2.000000e-07\n" + "4 4.999000e-07\n" + "5 5.000000e-07", + (1, 6): " col1\n" + "1 1.000000e-07\n" + "2 1.000100e-07\n" + "3 2.000000e-07\n" + "4 4.999000e-07\n" + "5 5.000000e-07", + (1, 8): " col1\n" + "1 1.000000e-07\n" + "2 1.000100e-07\n" + "3 2.000000e-07\n" + "4 4.999000e-07\n" + "5 5.000000e-07\n" + "6 5.000100e-07\n" + "7 6.000000e-07", + (8, 16): " col1\n" + "8 9.999000e-07\n" + "9 1.000000e-06\n" + "10 1.000100e-06\n" + "11 2.000000e-06\n" + "12 4.999000e-06\n" + "13 5.000000e-06\n" + "14 5.000100e-06\n" + "15 6.000000e-06", + (9, 16): " col1\n" + "9 0.000001\n" + "10 0.000001\n" + "11 0.000002\n" + "12 0.000005\n" + "13 0.000005\n" + "14 0.000005\n" + "15 0.000006", + } + + for (start, stop), v in expected_output.items(): + assert str(d[start:stop]) == v + + def test_too_long(self): + # GH 10451 + with pd.option_context("display.precision", 4): + # need both a number > 1e6 and something that normally formats to + # having length > display.precision + 6 + df = pd.DataFrame(dict(x=[12345.6789])) + assert str(df) == " x\n0 12345.6789" + df = pd.DataFrame(dict(x=[2e6])) + assert str(df) == " x\n0 2000000.0" + df = pd.DataFrame(dict(x=[12345.6789, 2e6])) + assert str(df) == " x\n0 1.2346e+04\n1 2.0000e+06" + + +class TestRepr_timedelta64: + def test_none(self): + delta_1d = pd.to_timedelta(1, unit="D") + delta_0d = pd.to_timedelta(0, unit="D") + delta_1s = pd.to_timedelta(1, unit="s") + delta_500ms = pd.to_timedelta(500, unit="ms") + + drepr = lambda x: x._repr_base() + assert drepr(delta_1d) == "1 days" + assert drepr(-delta_1d) == "-1 days" + assert drepr(delta_0d) == "0 days" + assert drepr(delta_1s) == "0 days 00:00:01" + assert drepr(delta_500ms) == "0 days 00:00:00.500000" + assert drepr(delta_1d + delta_1s) == "1 days 00:00:01" + assert drepr(-delta_1d + delta_1s) == "-1 days +00:00:01" + assert drepr(delta_1d + delta_500ms) == "1 days 00:00:00.500000" + assert drepr(-delta_1d + delta_500ms) == "-1 days +00:00:00.500000" + + def test_sub_day(self): + delta_1d = pd.to_timedelta(1, unit="D") + delta_0d = pd.to_timedelta(0, unit="D") + delta_1s = pd.to_timedelta(1, unit="s") + delta_500ms = pd.to_timedelta(500, unit="ms") + + drepr = lambda x: x._repr_base(format="sub_day") + assert drepr(delta_1d) == "1 days" + assert drepr(-delta_1d) == "-1 days" + assert drepr(delta_0d) == "00:00:00" + assert drepr(delta_1s) == "00:00:01" + assert drepr(delta_500ms) == "00:00:00.500000" + assert drepr(delta_1d + delta_1s) == "1 days 00:00:01" + assert drepr(-delta_1d + delta_1s) == "-1 days +00:00:01" + assert drepr(delta_1d + delta_500ms) == "1 days 00:00:00.500000" + assert drepr(-delta_1d + delta_500ms) == "-1 days +00:00:00.500000" + + def test_long(self): + delta_1d = pd.to_timedelta(1, unit="D") + delta_0d = pd.to_timedelta(0, unit="D") + delta_1s = pd.to_timedelta(1, unit="s") + delta_500ms = pd.to_timedelta(500, unit="ms") + + drepr = lambda x: x._repr_base(format="long") + assert drepr(delta_1d) == "1 days 00:00:00" + assert drepr(-delta_1d) == "-1 days +00:00:00" + assert drepr(delta_0d) == "0 days 00:00:00" + assert drepr(delta_1s) == "0 days 00:00:01" + assert drepr(delta_500ms) == "0 days 00:00:00.500000" + assert drepr(delta_1d + delta_1s) == "1 days 00:00:01" + assert drepr(-delta_1d + delta_1s) == "-1 days +00:00:01" + assert drepr(delta_1d + delta_500ms) == "1 days 00:00:00.500000" + assert drepr(-delta_1d + delta_500ms) == "-1 days +00:00:00.500000" + + def test_all(self): + delta_1d = pd.to_timedelta(1, unit="D") + delta_0d = pd.to_timedelta(0, unit="D") + delta_1ns = pd.to_timedelta(1, unit="ns") + + drepr = lambda x: x._repr_base(format="all") + assert drepr(delta_1d) == "1 days 00:00:00.000000000" + assert drepr(-delta_1d) == "-1 days +00:00:00.000000000" + assert drepr(delta_0d) == "0 days 00:00:00.000000000" + assert drepr(delta_1ns) == "0 days 00:00:00.000000001" + assert drepr(-delta_1d + delta_1ns) == "-1 days +00:00:00.000000001" + + +class TestTimedelta64Formatter: + def test_days(self): + x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit="D") + result = fmt.Timedelta64Formatter(x, box=True).get_result() + assert result[0].strip() == "'0 days'" + assert result[1].strip() == "'1 days'" + + result = fmt.Timedelta64Formatter(x[1:2], box=True).get_result() + assert result[0].strip() == "'1 days'" + + result = fmt.Timedelta64Formatter(x, box=False).get_result() + assert result[0].strip() == "0 days" + assert result[1].strip() == "1 days" + + result = fmt.Timedelta64Formatter(x[1:2], box=False).get_result() + assert result[0].strip() == "1 days" + + def test_days_neg(self): + x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit="D") + result = fmt.Timedelta64Formatter(-x, box=True).get_result() + assert result[0].strip() == "'0 days'" + assert result[1].strip() == "'-1 days'" + + def test_subdays(self): + y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit="s") + result = fmt.Timedelta64Formatter(y, box=True).get_result() + assert result[0].strip() == "'00:00:00'" + assert result[1].strip() == "'00:00:01'" + + def test_subdays_neg(self): + y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit="s") + result = fmt.Timedelta64Formatter(-y, box=True).get_result() + assert result[0].strip() == "'00:00:00'" + assert result[1].strip() == "'-1 days +23:59:59'" + + def test_zero(self): + x = pd.to_timedelta(list(range(1)) + [pd.NaT], unit="D") + result = fmt.Timedelta64Formatter(x, box=True).get_result() + assert result[0].strip() == "'0 days'" + + x = pd.to_timedelta(list(range(1)), unit="D") + result = fmt.Timedelta64Formatter(x, box=True).get_result() + assert result[0].strip() == "'0 days'" + + +class TestDatetime64Formatter: + def test_mixed(self): + x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), pd.NaT]) + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01 00:00:00" + assert result[1].strip() == "2013-01-01 12:00:00" + + def test_dates(self): + x = Series([datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT]) + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01" + assert result[1].strip() == "2013-01-02" + + def test_date_nanos(self): + x = Series([Timestamp(200)]) + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "1970-01-01 00:00:00.000000200" + + def test_dates_display(self): + + # 10170 + # make sure that we are consistently display date formatting + x = Series(date_range("20130101 09:00:00", periods=5, freq="D")) + x.iloc[1] = np.nan + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01 09:00:00" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-05 09:00:00" + + x = Series(date_range("20130101 09:00:00", periods=5, freq="s")) + x.iloc[1] = np.nan + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01 09:00:00" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-01 09:00:04" + + x = Series(date_range("20130101 09:00:00", periods=5, freq="ms")) + x.iloc[1] = np.nan + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01 09:00:00.000" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-01 09:00:00.004" + + x = Series(date_range("20130101 09:00:00", periods=5, freq="us")) + x.iloc[1] = np.nan + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01 09:00:00.000000" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-01 09:00:00.000004" + + x = Series(date_range("20130101 09:00:00", periods=5, freq="N")) + x.iloc[1] = np.nan + result = fmt.Datetime64Formatter(x).get_result() + assert result[0].strip() == "2013-01-01 09:00:00.000000000" + assert result[1].strip() == "NaT" + assert result[4].strip() == "2013-01-01 09:00:00.000000004" + + def test_datetime64formatter_yearmonth(self): + x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)]) + + def format_func(x): + return x.strftime("%Y-%m") + + formatter = fmt.Datetime64Formatter(x, formatter=format_func) + result = formatter.get_result() + assert result == ["2016-01", "2016-02"] + + def test_datetime64formatter_hoursecond(self): + + x = Series( + pd.to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f") + ) + + def format_func(x): + return x.strftime("%H:%M") + + formatter = fmt.Datetime64Formatter(x, formatter=format_func) + result = formatter.get_result() + assert result == ["10:10", "12:12"] + + +class TestNaTFormatting: + def test_repr(self): + assert repr(pd.NaT) == "NaT" + + def test_str(self): + assert str(pd.NaT) == "NaT" + + +class TestDatetimeIndexFormat: + def test_datetime(self): + formatted = pd.to_datetime([datetime(2003, 1, 1, 12), pd.NaT]).format() + assert formatted[0] == "2003-01-01 12:00:00" + assert formatted[1] == "NaT" + + def test_date(self): + formatted = pd.to_datetime([datetime(2003, 1, 1), pd.NaT]).format() + assert formatted[0] == "2003-01-01" + assert formatted[1] == "NaT" + + def test_date_tz(self): + formatted = pd.to_datetime([datetime(2013, 1, 1)], utc=True).format() + assert formatted[0] == "2013-01-01 00:00:00+00:00" + + formatted = pd.to_datetime([datetime(2013, 1, 1), pd.NaT], utc=True).format() + assert formatted[0] == "2013-01-01 00:00:00+00:00" + + def test_date_explicit_date_format(self): + formatted = pd.to_datetime([datetime(2003, 2, 1), pd.NaT]).format( + date_format="%m-%d-%Y", na_rep="UT" + ) + assert formatted[0] == "02-01-2003" + assert formatted[1] == "UT" + + +class TestDatetimeIndexUnicode: + def test_dates(self): + text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1)])) + assert "['2013-01-01'," in text + assert ", '2014-01-01']" in text + + def test_mixed(self): + text = str( + pd.to_datetime( + [datetime(2013, 1, 1), datetime(2014, 1, 1, 12), datetime(2014, 1, 1)] + ) + ) + assert "'2013-01-01 00:00:00'," in text + assert "'2014-01-01 00:00:00']" in text + + +class TestStringRepTimestamp: + def test_no_tz(self): + dt_date = datetime(2013, 1, 2) + assert str(dt_date) == str(Timestamp(dt_date)) + + dt_datetime = datetime(2013, 1, 2, 12, 1, 3) + assert str(dt_datetime) == str(Timestamp(dt_datetime)) + + dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45) + assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us)) + + ts_nanos_only = Timestamp(200) + assert str(ts_nanos_only) == "1970-01-01 00:00:00.000000200" + + ts_nanos_micros = Timestamp(1200) + assert str(ts_nanos_micros) == "1970-01-01 00:00:00.000001200" + + def test_tz_pytz(self): + dt_date = datetime(2013, 1, 2, tzinfo=pytz.utc) + assert str(dt_date) == str(Timestamp(dt_date)) + + dt_datetime = datetime(2013, 1, 2, 12, 1, 3, tzinfo=pytz.utc) + assert str(dt_datetime) == str(Timestamp(dt_datetime)) + + dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=pytz.utc) + assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us)) + + def test_tz_dateutil(self): + utc = dateutil.tz.tzutc() + + dt_date = datetime(2013, 1, 2, tzinfo=utc) + assert str(dt_date) == str(Timestamp(dt_date)) + + dt_datetime = datetime(2013, 1, 2, 12, 1, 3, tzinfo=utc) + assert str(dt_datetime) == str(Timestamp(dt_datetime)) + + dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=utc) + assert str(dt_datetime_us) == str(Timestamp(dt_datetime_us)) + + def test_nat_representations(self): + for f in (str, repr, methodcaller("isoformat")): + assert f(pd.NaT) == "NaT" + + +def test_format_percentiles(): + result = fmt.format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999]) + expected = ["1.999%", "2.001%", "50%", "66.667%", "99.99%"] + assert result == expected + + result = fmt.format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999]) + expected = ["0%", "50%", "2.0%", "50%", "66.67%", "99.99%"] + assert result == expected + + msg = r"percentiles should all be in the interval \[0,1\]" + with pytest.raises(ValueError, match=msg): + fmt.format_percentiles([0.1, np.nan, 0.5]) + with pytest.raises(ValueError, match=msg): + fmt.format_percentiles([-0.001, 0.1, 0.5]) + with pytest.raises(ValueError, match=msg): + fmt.format_percentiles([2, 0.1, 0.5]) + with pytest.raises(ValueError, match=msg): + fmt.format_percentiles([0.1, 0.5, "a"]) + + +def test_format_percentiles_integer_idx(): + # Issue #26660 + result = fmt.format_percentiles(np.linspace(0, 1, 10 + 1)) + expected = [ + "0%", + "10%", + "20%", + "30%", + "40%", + "50%", + "60%", + "70%", + "80%", + "90%", + "100%", + ] + assert result == expected + + +def test_repr_html_ipython_config(ip): + code = textwrap.dedent( + """\ + import pandas as pd + df = pd.DataFrame({"A": [1, 2]}) + df._repr_html_() + + cfg = get_ipython().config + cfg['IPKernelApp']['parent_appname'] + df._repr_html_() + """ + ) + result = ip.run_cell(code) + assert not result.error_in_exec + + +@pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) +@pytest.mark.parametrize( + "encoding, data", + [(None, "abc"), ("utf-8", "abc"), ("gbk", "造成输出中文显示乱码"), ("foo", "abc")], +) +def test_filepath_or_buffer_arg( + method, + filepath_or_buffer, + assert_filepath_or_buffer_equals, + encoding, + data, + filepath_or_buffer_id, +): + df = DataFrame([data]) + + if filepath_or_buffer_id not in ["string", "pathlike"] and encoding is not None: + with pytest.raises( + ValueError, match="buf is not a file name and encoding is specified." + ): + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) + elif encoding == "foo": + with tm.assert_produces_warning(None): + with pytest.raises(LookupError, match="unknown encoding"): + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) + else: + expected = getattr(df, method)() + getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) + assert_filepath_or_buffer_equals(expected) + + +@pytest.mark.parametrize("method", ["to_string", "to_html", "to_latex"]) +def test_filepath_or_buffer_bad_arg_raises(float_frame, method): + msg = "buf is not a file name and it has no write method" + with pytest.raises(TypeError, match=msg): + getattr(float_frame, method)(buf=object()) diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py new file mode 100644 index 00000000..f0d5ef19 --- /dev/null +++ b/pandas/tests/io/formats/test_printing.py @@ -0,0 +1,205 @@ +import numpy as np +import pytest + +import pandas._config.config as cf + +import pandas as pd + +import pandas.io.formats.format as fmt +import pandas.io.formats.printing as printing + + +def test_adjoin(): + data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]] + expected = "a dd ggg\nb ee hhh\nc ff iii" + + adjoined = printing.adjoin(2, *data) + + assert adjoined == expected + + +def test_repr_binary_type(): + import string + + letters = string.ascii_letters + try: + raw = bytes(letters, encoding=cf.get_option("display.encoding")) + except TypeError: + raw = bytes(letters) + b = str(raw.decode("utf-8")) + res = printing.pprint_thing(b, quote_strings=True) + assert res == repr(b) + res = printing.pprint_thing(b, quote_strings=False) + assert res == b + + +class TestFormattBase: + def test_adjoin(self): + data = [["a", "b", "c"], ["dd", "ee", "ff"], ["ggg", "hhh", "iii"]] + expected = "a dd ggg\nb ee hhh\nc ff iii" + + adjoined = printing.adjoin(2, *data) + + assert adjoined == expected + + def test_adjoin_unicode(self): + data = [["あ", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "hhh", "いいい"]] + expected = "あ dd ggg\nb ええ hhh\nc ff いいい" + adjoined = printing.adjoin(2, *data) + assert adjoined == expected + + adj = fmt.EastAsianTextAdjustment() + + expected = """あ dd ggg +b ええ hhh +c ff いいい""" + + adjoined = adj.adjoin(2, *data) + assert adjoined == expected + cols = adjoined.split("\n") + assert adj.len(cols[0]) == 13 + assert adj.len(cols[1]) == 13 + assert adj.len(cols[2]) == 16 + + expected = """あ dd ggg +b ええ hhh +c ff いいい""" + + adjoined = adj.adjoin(7, *data) + assert adjoined == expected + cols = adjoined.split("\n") + assert adj.len(cols[0]) == 23 + assert adj.len(cols[1]) == 23 + assert adj.len(cols[2]) == 26 + + def test_justify(self): + adj = fmt.EastAsianTextAdjustment() + + def just(x, *args, **kwargs): + # wrapper to test single str + return adj.justify([x], *args, **kwargs)[0] + + assert just("abc", 5, mode="left") == "abc " + assert just("abc", 5, mode="center") == " abc " + assert just("abc", 5, mode="right") == " abc" + assert just("abc", 5, mode="left") == "abc " + assert just("abc", 5, mode="center") == " abc " + assert just("abc", 5, mode="right") == " abc" + + assert just("パンダ", 5, mode="left") == "パンダ" + assert just("パンダ", 5, mode="center") == "パンダ" + assert just("パンダ", 5, mode="right") == "パンダ" + + assert just("パンダ", 10, mode="left") == "パンダ " + assert just("パンダ", 10, mode="center") == " パンダ " + assert just("パンダ", 10, mode="right") == " パンダ" + + def test_east_asian_len(self): + adj = fmt.EastAsianTextAdjustment() + + assert adj.len("abc") == 3 + assert adj.len("abc") == 3 + + assert adj.len("パンダ") == 6 + assert adj.len("パンダ") == 5 + assert adj.len("パンダpanda") == 11 + assert adj.len("パンダpanda") == 10 + + def test_ambiguous_width(self): + adj = fmt.EastAsianTextAdjustment() + assert adj.len("¡¡ab") == 4 + + with cf.option_context("display.unicode.ambiguous_as_wide", True): + adj = fmt.EastAsianTextAdjustment() + assert adj.len("¡¡ab") == 6 + + data = [["あ", "b", "c"], ["dd", "ええ", "ff"], ["ggg", "¡¡ab", "いいい"]] + expected = "あ dd ggg \nb ええ ¡¡ab\nc ff いいい" + adjoined = adj.adjoin(2, *data) + assert adjoined == expected + + +class TestTableSchemaRepr: + @classmethod + def setup_class(cls): + pytest.importorskip("IPython") + + from IPython.core.interactiveshell import InteractiveShell + + cls.display_formatter = InteractiveShell.instance().display_formatter + + def test_publishes(self): + + df = pd.DataFrame({"A": [1, 2]}) + objects = [df["A"], df, df] # dataframe / series + expected_keys = [ + {"text/plain", "application/vnd.dataresource+json"}, + {"text/plain", "text/html", "application/vnd.dataresource+json"}, + ] + + opt = pd.option_context("display.html.table_schema", True) + for obj, expected in zip(objects, expected_keys): + with opt: + formatted = self.display_formatter.format(obj) + assert set(formatted[0].keys()) == expected + + with_latex = pd.option_context("display.latex.repr", True) + + with opt, with_latex: + formatted = self.display_formatter.format(obj) + + expected = { + "text/plain", + "text/html", + "text/latex", + "application/vnd.dataresource+json", + } + assert set(formatted[0].keys()) == expected + + def test_publishes_not_implemented(self): + # column MultiIndex + # GH 15996 + midx = pd.MultiIndex.from_product([["A", "B"], ["a", "b", "c"]]) + df = pd.DataFrame(np.random.randn(5, len(midx)), columns=midx) + + opt = pd.option_context("display.html.table_schema", True) + + with opt: + formatted = self.display_formatter.format(df) + + expected = {"text/plain", "text/html"} + assert set(formatted[0].keys()) == expected + + def test_config_on(self): + df = pd.DataFrame({"A": [1, 2]}) + with pd.option_context("display.html.table_schema", True): + result = df._repr_data_resource_() + + assert result is not None + + def test_config_default_off(self): + df = pd.DataFrame({"A": [1, 2]}) + with pd.option_context("display.html.table_schema", False): + result = df._repr_data_resource_() + + assert result is None + + def test_enable_data_resource_formatter(self): + # GH 10491 + formatters = self.display_formatter.formatters + mimetype = "application/vnd.dataresource+json" + + with pd.option_context("display.html.table_schema", True): + assert "application/vnd.dataresource+json" in formatters + assert formatters[mimetype].enabled + + # still there, just disabled + assert "application/vnd.dataresource+json" in formatters + assert not formatters[mimetype].enabled + + # able to re-set + with pd.option_context("display.html.table_schema", True): + assert "application/vnd.dataresource+json" in formatters + assert formatters[mimetype].enabled + # smoke test that it works + self.display_formatter.format(cf) diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py new file mode 100644 index 00000000..e5dac18a --- /dev/null +++ b/pandas/tests/io/formats/test_style.py @@ -0,0 +1,1789 @@ +import copy +import re +import textwrap + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + +jinja2 = pytest.importorskip("jinja2") +from pandas.io.formats.style import Styler, _get_level_lengths # noqa # isort:skip + + +class TestStyler: + def setup_method(self, method): + np.random.seed(24) + self.s = DataFrame({"A": np.random.permutation(range(6))}) + self.df = DataFrame({"A": [0, 1], "B": np.random.randn(2)}) + self.f = lambda x: x + self.g = lambda x: x + + def h(x, foo="bar"): + return pd.Series(f"color: {foo}", index=x.index, name=x.name) + + self.h = h + self.styler = Styler(self.df) + self.attrs = pd.DataFrame({"A": ["color: red", "color: blue"]}) + self.dataframes = [ + self.df, + pd.DataFrame( + {"f": [1.0, 2.0], "o": ["a", "b"], "c": pd.Categorical(["a", "b"])} + ), + ] + + def test_init_non_pandas(self): + with pytest.raises(TypeError): + Styler([1, 2, 3]) + + def test_init_series(self): + result = Styler(pd.Series([1, 2])) + assert result.data.ndim == 2 + + def test_repr_html_ok(self): + self.styler._repr_html_() + + def test_repr_html_mathjax(self): + # gh-19824 + assert "tex2jax_ignore" not in self.styler._repr_html_() + + with pd.option_context("display.html.use_mathjax", False): + assert "tex2jax_ignore" in self.styler._repr_html_() + + def test_update_ctx(self): + self.styler._update_ctx(self.attrs) + expected = {(0, 0): ["color: red"], (1, 0): ["color: blue"]} + assert self.styler.ctx == expected + + def test_update_ctx_flatten_multi(self): + attrs = DataFrame({"A": ["color: red; foo: bar", "color: blue; foo: baz"]}) + self.styler._update_ctx(attrs) + expected = { + (0, 0): ["color: red", " foo: bar"], + (1, 0): ["color: blue", " foo: baz"], + } + assert self.styler.ctx == expected + + def test_update_ctx_flatten_multi_traliing_semi(self): + attrs = DataFrame({"A": ["color: red; foo: bar;", "color: blue; foo: baz;"]}) + self.styler._update_ctx(attrs) + expected = { + (0, 0): ["color: red", " foo: bar"], + (1, 0): ["color: blue", " foo: baz"], + } + assert self.styler.ctx == expected + + def test_copy(self): + s2 = copy.copy(self.styler) + assert self.styler is not s2 + assert self.styler.ctx is s2.ctx # shallow + assert self.styler._todo is s2._todo + + self.styler._update_ctx(self.attrs) + self.styler.highlight_max() + assert self.styler.ctx == s2.ctx + assert self.styler._todo == s2._todo + + def test_deepcopy(self): + s2 = copy.deepcopy(self.styler) + assert self.styler is not s2 + assert self.styler.ctx is not s2.ctx + assert self.styler._todo is not s2._todo + + self.styler._update_ctx(self.attrs) + self.styler.highlight_max() + assert self.styler.ctx != s2.ctx + assert s2._todo == [] + assert self.styler._todo != s2._todo + + def test_clear(self): + s = self.df.style.highlight_max()._compute() + assert len(s.ctx) > 0 + assert len(s._todo) > 0 + s.clear() + assert len(s.ctx) == 0 + assert len(s._todo) == 0 + + def test_render(self): + df = pd.DataFrame({"A": [0, 1]}) + style = lambda x: pd.Series(["color: red", "color: blue"], name=x.name) + s = Styler(df, uuid="AB").apply(style) + s.render() + # it worked? + + def test_render_empty_dfs(self): + empty_df = DataFrame() + es = Styler(empty_df) + es.render() + # An index but no columns + DataFrame(columns=["a"]).style.render() + # A column but no index + DataFrame(index=["a"]).style.render() + # No IndexError raised? + + def test_render_double(self): + df = pd.DataFrame({"A": [0, 1]}) + style = lambda x: pd.Series( + ["color: red; border: 1px", "color: blue; border: 2px"], name=x.name + ) + s = Styler(df, uuid="AB").apply(style) + s.render() + # it worked? + + def test_set_properties(self): + df = pd.DataFrame({"A": [0, 1]}) + result = df.style.set_properties(color="white", size="10px")._compute().ctx + # order is deterministic + v = ["color: white", "size: 10px"] + expected = {(0, 0): v, (1, 0): v} + assert result.keys() == expected.keys() + for v1, v2 in zip(result.values(), expected.values()): + assert sorted(v1) == sorted(v2) + + def test_set_properties_subset(self): + df = pd.DataFrame({"A": [0, 1]}) + result = ( + df.style.set_properties(subset=pd.IndexSlice[0, "A"], color="white") + ._compute() + .ctx + ) + expected = {(0, 0): ["color: white"]} + assert result == expected + + def test_empty_index_name_doesnt_display(self): + # https://github.com/pandas-dev/pandas/pull/12090#issuecomment-180695902 + df = pd.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + result = df.style._translate() + + expected = [ + [ + { + "class": "blank level0", + "type": "th", + "value": "", + "is_visible": True, + "display_value": "", + }, + { + "class": "col_heading level0 col0", + "display_value": "A", + "type": "th", + "value": "A", + "is_visible": True, + }, + { + "class": "col_heading level0 col1", + "display_value": "B", + "type": "th", + "value": "B", + "is_visible": True, + }, + { + "class": "col_heading level0 col2", + "display_value": "C", + "type": "th", + "value": "C", + "is_visible": True, + }, + ] + ] + + assert result["head"] == expected + + def test_index_name(self): + # https://github.com/pandas-dev/pandas/issues/11655 + df = pd.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + result = df.set_index("A").style._translate() + + expected = [ + [ + { + "class": "blank level0", + "type": "th", + "value": "", + "display_value": "", + "is_visible": True, + }, + { + "class": "col_heading level0 col0", + "type": "th", + "value": "B", + "display_value": "B", + "is_visible": True, + }, + { + "class": "col_heading level0 col1", + "type": "th", + "value": "C", + "display_value": "C", + "is_visible": True, + }, + ], + [ + {"class": "index_name level0", "type": "th", "value": "A"}, + {"class": "blank", "type": "th", "value": ""}, + {"class": "blank", "type": "th", "value": ""}, + ], + ] + + assert result["head"] == expected + + def test_multiindex_name(self): + # https://github.com/pandas-dev/pandas/issues/11655 + df = pd.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + result = df.set_index(["A", "B"]).style._translate() + + expected = [ + [ + { + "class": "blank", + "type": "th", + "value": "", + "display_value": "", + "is_visible": True, + }, + { + "class": "blank level0", + "type": "th", + "value": "", + "display_value": "", + "is_visible": True, + }, + { + "class": "col_heading level0 col0", + "type": "th", + "value": "C", + "display_value": "C", + "is_visible": True, + }, + ], + [ + {"class": "index_name level0", "type": "th", "value": "A"}, + {"class": "index_name level1", "type": "th", "value": "B"}, + {"class": "blank", "type": "th", "value": ""}, + ], + ] + + assert result["head"] == expected + + def test_numeric_columns(self): + # https://github.com/pandas-dev/pandas/issues/12125 + # smoke test for _translate + df = pd.DataFrame({0: [1, 2, 3]}) + df.style._translate() + + def test_apply_axis(self): + df = pd.DataFrame({"A": [0, 0], "B": [1, 1]}) + f = lambda x: [f"val: {x.max()}" for v in x] + result = df.style.apply(f, axis=1) + assert len(result._todo) == 1 + assert len(result.ctx) == 0 + result._compute() + expected = { + (0, 0): ["val: 1"], + (0, 1): ["val: 1"], + (1, 0): ["val: 1"], + (1, 1): ["val: 1"], + } + assert result.ctx == expected + + result = df.style.apply(f, axis=0) + expected = { + (0, 0): ["val: 0"], + (0, 1): ["val: 1"], + (1, 0): ["val: 0"], + (1, 1): ["val: 1"], + } + result._compute() + assert result.ctx == expected + result = df.style.apply(f) # default + result._compute() + assert result.ctx == expected + + def test_apply_subset(self): + axes = [0, 1] + slices = [ + pd.IndexSlice[:], + pd.IndexSlice[:, ["A"]], + pd.IndexSlice[[1], :], + pd.IndexSlice[[1], ["A"]], + pd.IndexSlice[:2, ["A", "B"]], + ] + for ax in axes: + for slice_ in slices: + result = ( + self.df.style.apply(self.h, axis=ax, subset=slice_, foo="baz") + ._compute() + .ctx + ) + expected = { + (r, c): ["color: baz"] + for r, row in enumerate(self.df.index) + for c, col in enumerate(self.df.columns) + if row in self.df.loc[slice_].index + and col in self.df.loc[slice_].columns + } + assert result == expected + + def test_applymap_subset(self): + def f(x): + return "foo: bar" + + slices = [ + pd.IndexSlice[:], + pd.IndexSlice[:, ["A"]], + pd.IndexSlice[[1], :], + pd.IndexSlice[[1], ["A"]], + pd.IndexSlice[:2, ["A", "B"]], + ] + + for slice_ in slices: + result = self.df.style.applymap(f, subset=slice_)._compute().ctx + expected = { + (r, c): ["foo: bar"] + for r, row in enumerate(self.df.index) + for c, col in enumerate(self.df.columns) + if row in self.df.loc[slice_].index + and col in self.df.loc[slice_].columns + } + assert result == expected + + def test_applymap_subset_multiindex(self): + # GH 19861 + # Smoke test for applymap + def color_negative_red(val): + """ + Takes a scalar and returns a string with + the css property `'color: red'` for negative + strings, black otherwise. + """ + color = "red" if val < 0 else "black" + return f"color: {color}" + + dic = { + ("a", "d"): [-1.12, 2.11], + ("a", "c"): [2.78, -2.88], + ("b", "c"): [-3.99, 3.77], + ("b", "d"): [4.21, -1.22], + } + + idx = pd.IndexSlice + df = pd.DataFrame(dic, index=[0, 1]) + + (df.style.applymap(color_negative_red, subset=idx[:, idx["b", "d"]]).render()) + + def test_applymap_subset_multiindex_code(self): + # https://github.com/pandas-dev/pandas/issues/25858 + # Checks styler.applymap works with multindex when codes are provided + codes = np.array([[0, 0, 1, 1], [0, 1, 0, 1]]) + columns = pd.MultiIndex( + levels=[["a", "b"], ["%", "#"]], codes=codes, names=["", ""] + ) + df = DataFrame( + [[1, -1, 1, 1], [-1, 1, 1, 1]], index=["hello", "world"], columns=columns + ) + pct_subset = pd.IndexSlice[:, pd.IndexSlice[:, "%":"%"]] + + def color_negative_red(val): + color = "red" if val < 0 else "black" + return f"color: {color}" + + df.loc[pct_subset] + df.style.applymap(color_negative_red, subset=pct_subset) + + def test_where_with_one_style(self): + # GH 17474 + def f(x): + return x > 0.5 + + style1 = "foo: bar" + + result = self.df.style.where(f, style1)._compute().ctx + expected = { + (r, c): [style1 if f(self.df.loc[row, col]) else ""] + for r, row in enumerate(self.df.index) + for c, col in enumerate(self.df.columns) + } + assert result == expected + + def test_where_subset(self): + # GH 17474 + def f(x): + return x > 0.5 + + style1 = "foo: bar" + style2 = "baz: foo" + + slices = [ + pd.IndexSlice[:], + pd.IndexSlice[:, ["A"]], + pd.IndexSlice[[1], :], + pd.IndexSlice[[1], ["A"]], + pd.IndexSlice[:2, ["A", "B"]], + ] + + for slice_ in slices: + result = ( + self.df.style.where(f, style1, style2, subset=slice_)._compute().ctx + ) + expected = { + (r, c): [style1 if f(self.df.loc[row, col]) else style2] + for r, row in enumerate(self.df.index) + for c, col in enumerate(self.df.columns) + if row in self.df.loc[slice_].index + and col in self.df.loc[slice_].columns + } + assert result == expected + + def test_where_subset_compare_with_applymap(self): + # GH 17474 + def f(x): + return x > 0.5 + + style1 = "foo: bar" + style2 = "baz: foo" + + def g(x): + return style1 if f(x) else style2 + + slices = [ + pd.IndexSlice[:], + pd.IndexSlice[:, ["A"]], + pd.IndexSlice[[1], :], + pd.IndexSlice[[1], ["A"]], + pd.IndexSlice[:2, ["A", "B"]], + ] + + for slice_ in slices: + result = ( + self.df.style.where(f, style1, style2, subset=slice_)._compute().ctx + ) + expected = self.df.style.applymap(g, subset=slice_)._compute().ctx + assert result == expected + + def test_empty(self): + df = pd.DataFrame({"A": [1, 0]}) + s = df.style + s.ctx = {(0, 0): ["color: red"], (1, 0): [""]} + + result = s._translate()["cellstyle"] + expected = [ + {"props": [["color", " red"]], "selector": "row0_col0"}, + {"props": [["", ""]], "selector": "row1_col0"}, + ] + assert result == expected + + def test_bar_align_left(self): + df = pd.DataFrame({"A": [0, 1, 2]}) + result = df.style.bar()._compute().ctx + expected = { + (0, 0): ["width: 10em", " height: 80%"], + (1, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(" + "90deg,#d65f5f 50.0%, transparent 50.0%)", + ], + (2, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(" + "90deg,#d65f5f 100.0%, transparent 100.0%)", + ], + } + assert result == expected + + result = df.style.bar(color="red", width=50)._compute().ctx + expected = { + (0, 0): ["width: 10em", " height: 80%"], + (1, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,red 25.0%, transparent 25.0%)", + ], + (2, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,red 50.0%, transparent 50.0%)", + ], + } + assert result == expected + + df["C"] = ["a"] * len(df) + result = df.style.bar(color="red", width=50)._compute().ctx + assert result == expected + df["C"] = df["C"].astype("category") + result = df.style.bar(color="red", width=50)._compute().ctx + assert result == expected + + def test_bar_align_left_0points(self): + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + result = df.style.bar()._compute().ctx + expected = { + (0, 0): ["width: 10em", " height: 80%"], + (0, 1): ["width: 10em", " height: 80%"], + (0, 2): ["width: 10em", " height: 80%"], + (1, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,#d65f5f 50.0%, transparent 50.0%)", + ], + (1, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,#d65f5f 50.0%, transparent 50.0%)", + ], + (1, 2): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,#d65f5f 50.0%, transparent 50.0%)", + ], + (2, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,#d65f5f 100.0%" + ", transparent 100.0%)", + ], + (2, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,#d65f5f 100.0%" + ", transparent 100.0%)", + ], + (2, 2): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,#d65f5f 100.0%" + ", transparent 100.0%)", + ], + } + assert result == expected + + result = df.style.bar(axis=1)._compute().ctx + expected = { + (0, 0): ["width: 10em", " height: 80%"], + (0, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,#d65f5f 50.0%, transparent 50.0%)", + ], + (0, 2): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,#d65f5f 100.0%" + ", transparent 100.0%)", + ], + (1, 0): ["width: 10em", " height: 80%"], + (1, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,#d65f5f 50.0%" + ", transparent 50.0%)", + ], + (1, 2): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,#d65f5f 100.0%" + ", transparent 100.0%)", + ], + (2, 0): ["width: 10em", " height: 80%"], + (2, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,#d65f5f 50.0%" + ", transparent 50.0%)", + ], + (2, 2): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg,#d65f5f 100.0%" + ", transparent 100.0%)", + ], + } + assert result == expected + + def test_bar_align_mid_pos_and_neg(self): + df = pd.DataFrame({"A": [-10, 0, 20, 90]}) + + result = df.style.bar(align="mid", color=["#d65f5f", "#5fba7d"])._compute().ctx + + expected = { + (0, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#d65f5f 10.0%, transparent 10.0%)", + ], + (1, 0): ["width: 10em", " height: 80%"], + (2, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 10.0%, #5fba7d 10.0%" + ", #5fba7d 30.0%, transparent 30.0%)", + ], + (3, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 10.0%, " + "#5fba7d 10.0%, #5fba7d 100.0%, " + "transparent 100.0%)", + ], + } + + assert result == expected + + def test_bar_align_mid_all_pos(self): + df = pd.DataFrame({"A": [10, 20, 50, 100]}) + + result = df.style.bar(align="mid", color=["#d65f5f", "#5fba7d"])._compute().ctx + + expected = { + (0, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#5fba7d 10.0%, transparent 10.0%)", + ], + (1, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#5fba7d 20.0%, transparent 20.0%)", + ], + (2, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#5fba7d 50.0%, transparent 50.0%)", + ], + (3, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#5fba7d 100.0%, transparent 100.0%)", + ], + } + + assert result == expected + + def test_bar_align_mid_all_neg(self): + df = pd.DataFrame({"A": [-100, -60, -30, -20]}) + + result = df.style.bar(align="mid", color=["#d65f5f", "#5fba7d"])._compute().ctx + + expected = { + (0, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#d65f5f 100.0%, transparent 100.0%)", + ], + (1, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 40.0%, " + "#d65f5f 40.0%, #d65f5f 100.0%, " + "transparent 100.0%)", + ], + (2, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 70.0%, " + "#d65f5f 70.0%, #d65f5f 100.0%, " + "transparent 100.0%)", + ], + (3, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 80.0%, " + "#d65f5f 80.0%, #d65f5f 100.0%, " + "transparent 100.0%)", + ], + } + assert result == expected + + def test_bar_align_zero_pos_and_neg(self): + # See https://github.com/pandas-dev/pandas/pull/14757 + df = pd.DataFrame({"A": [-10, 0, 20, 90]}) + + result = ( + df.style.bar(align="zero", color=["#d65f5f", "#5fba7d"], width=90) + ._compute() + .ctx + ) + expected = { + (0, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 40.0%, #d65f5f 40.0%, " + "#d65f5f 45.0%, transparent 45.0%)", + ], + (1, 0): ["width: 10em", " height: 80%"], + (2, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 45.0%, #5fba7d 45.0%, " + "#5fba7d 55.0%, transparent 55.0%)", + ], + (3, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 45.0%, #5fba7d 45.0%, " + "#5fba7d 90.0%, transparent 90.0%)", + ], + } + assert result == expected + + def test_bar_align_left_axis_none(self): + df = pd.DataFrame({"A": [0, 1], "B": [2, 4]}) + result = df.style.bar(axis=None)._compute().ctx + expected = { + (0, 0): ["width: 10em", " height: 80%"], + (1, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#d65f5f 25.0%, transparent 25.0%)", + ], + (0, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#d65f5f 50.0%, transparent 50.0%)", + ], + (1, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#d65f5f 100.0%, transparent 100.0%)", + ], + } + assert result == expected + + def test_bar_align_zero_axis_none(self): + df = pd.DataFrame({"A": [0, 1], "B": [-2, 4]}) + result = df.style.bar(align="zero", axis=None)._compute().ctx + expected = { + (0, 0): ["width: 10em", " height: 80%"], + (1, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 50.0%, #d65f5f 50.0%, " + "#d65f5f 62.5%, transparent 62.5%)", + ], + (0, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 25.0%, #d65f5f 25.0%, " + "#d65f5f 50.0%, transparent 50.0%)", + ], + (1, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 50.0%, #d65f5f 50.0%, " + "#d65f5f 100.0%, transparent 100.0%)", + ], + } + assert result == expected + + def test_bar_align_mid_axis_none(self): + df = pd.DataFrame({"A": [0, 1], "B": [-2, 4]}) + result = df.style.bar(align="mid", axis=None)._compute().ctx + expected = { + (0, 0): ["width: 10em", " height: 80%"], + (1, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 33.3%, #d65f5f 33.3%, " + "#d65f5f 50.0%, transparent 50.0%)", + ], + (0, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#d65f5f 33.3%, transparent 33.3%)", + ], + (1, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 33.3%, #d65f5f 33.3%, " + "#d65f5f 100.0%, transparent 100.0%)", + ], + } + assert result == expected + + def test_bar_align_mid_vmin(self): + df = pd.DataFrame({"A": [0, 1], "B": [-2, 4]}) + result = df.style.bar(align="mid", axis=None, vmin=-6)._compute().ctx + expected = { + (0, 0): ["width: 10em", " height: 80%"], + (1, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 60.0%, #d65f5f 60.0%, " + "#d65f5f 70.0%, transparent 70.0%)", + ], + (0, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 40.0%, #d65f5f 40.0%, " + "#d65f5f 60.0%, transparent 60.0%)", + ], + (1, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 60.0%, #d65f5f 60.0%, " + "#d65f5f 100.0%, transparent 100.0%)", + ], + } + assert result == expected + + def test_bar_align_mid_vmax(self): + df = pd.DataFrame({"A": [0, 1], "B": [-2, 4]}) + result = df.style.bar(align="mid", axis=None, vmax=8)._compute().ctx + expected = { + (0, 0): ["width: 10em", " height: 80%"], + (1, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 20.0%, #d65f5f 20.0%, " + "#d65f5f 30.0%, transparent 30.0%)", + ], + (0, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#d65f5f 20.0%, transparent 20.0%)", + ], + (1, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 20.0%, #d65f5f 20.0%, " + "#d65f5f 60.0%, transparent 60.0%)", + ], + } + assert result == expected + + def test_bar_align_mid_vmin_vmax_wide(self): + df = pd.DataFrame({"A": [0, 1], "B": [-2, 4]}) + result = df.style.bar(align="mid", axis=None, vmin=-3, vmax=7)._compute().ctx + expected = { + (0, 0): ["width: 10em", " height: 80%"], + (1, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 30.0%, #d65f5f 30.0%, " + "#d65f5f 40.0%, transparent 40.0%)", + ], + (0, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 10.0%, #d65f5f 10.0%, " + "#d65f5f 30.0%, transparent 30.0%)", + ], + (1, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 30.0%, #d65f5f 30.0%, " + "#d65f5f 70.0%, transparent 70.0%)", + ], + } + assert result == expected + + def test_bar_align_mid_vmin_vmax_clipping(self): + df = pd.DataFrame({"A": [0, 1], "B": [-2, 4]}) + result = df.style.bar(align="mid", axis=None, vmin=-1, vmax=3)._compute().ctx + expected = { + (0, 0): ["width: 10em", " height: 80%"], + (1, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 25.0%, #d65f5f 25.0%, " + "#d65f5f 50.0%, transparent 50.0%)", + ], + (0, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#d65f5f 25.0%, transparent 25.0%)", + ], + (1, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 25.0%, #d65f5f 25.0%, " + "#d65f5f 100.0%, transparent 100.0%)", + ], + } + assert result == expected + + def test_bar_align_mid_nans(self): + df = pd.DataFrame({"A": [1, None], "B": [-1, 3]}) + result = df.style.bar(align="mid", axis=None)._compute().ctx + expected = { + (0, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 25.0%, #d65f5f 25.0%, " + "#d65f5f 50.0%, transparent 50.0%)", + ], + (1, 0): [""], + (0, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg," + "#d65f5f 25.0%, transparent 25.0%)", + ], + (1, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 25.0%, #d65f5f 25.0%, " + "#d65f5f 100.0%, transparent 100.0%)", + ], + } + assert result == expected + + def test_bar_align_zero_nans(self): + df = pd.DataFrame({"A": [1, None], "B": [-1, 2]}) + result = df.style.bar(align="zero", axis=None)._compute().ctx + expected = { + (0, 0): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 50.0%, #d65f5f 50.0%, " + "#d65f5f 75.0%, transparent 75.0%)", + ], + (1, 0): [""], + (0, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 25.0%, #d65f5f 25.0%, " + "#d65f5f 50.0%, transparent 50.0%)", + ], + (1, 1): [ + "width: 10em", + " height: 80%", + "background: linear-gradient(90deg, " + "transparent 50.0%, #d65f5f 50.0%, " + "#d65f5f 100.0%, transparent 100.0%)", + ], + } + assert result == expected + + def test_bar_bad_align_raises(self): + df = pd.DataFrame({"A": [-100, -60, -30, -20]}) + with pytest.raises(ValueError): + df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]) + + def test_format_with_na_rep(self): + # GH 21527 28358 + df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + ctx = df.style.format(None, na_rep="-")._translate() + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + + ctx = df.style.format("{:.2%}", na_rep="-")._translate() + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][1]["display_value"] == "110.00%" + assert ctx["body"][1][2]["display_value"] == "120.00%" + + ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate() + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][2]["display_value"] == "120.00%" + + def test_init_with_na_rep(self): + # GH 21527 28358 + df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + ctx = Styler(df, na_rep="NA")._translate() + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + + def test_set_na_rep(self): + # GH 21527 28358 + df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + ctx = df.style.set_na_rep("NA")._translate() + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + + ctx = ( + df.style.set_na_rep("NA") + .format(None, na_rep="-", subset=["B"]) + ._translate() + ) + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "-" + + def test_format_non_numeric_na(self): + # GH 21527 28358 + df = pd.DataFrame( + { + "object": [None, np.nan, "foo"], + "datetime": [None, pd.NaT, pd.Timestamp("20120101")], + } + ) + + ctx = df.style.set_na_rep("NA")._translate() + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + assert ctx["body"][1][1]["display_value"] == "NA" + assert ctx["body"][1][2]["display_value"] == "NA" + + ctx = df.style.format(None, na_rep="-")._translate() + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][1]["display_value"] == "-" + assert ctx["body"][1][2]["display_value"] == "-" + + def test_format_with_bad_na_rep(self): + # GH 21527 28358 + df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + with pytest.raises(TypeError): + df.style.format(None, na_rep=-1) + + def test_highlight_null(self, null_color="red"): + df = pd.DataFrame({"A": [0, np.nan]}) + result = df.style.highlight_null()._compute().ctx + expected = {(0, 0): [""], (1, 0): ["background-color: red"]} + assert result == expected + + def test_nonunique_raises(self): + df = pd.DataFrame([[1, 2]], columns=["A", "A"]) + with pytest.raises(ValueError): + df.style + + with pytest.raises(ValueError): + Styler(df) + + def test_caption(self): + styler = Styler(self.df, caption="foo") + result = styler.render() + assert all(["caption" in result, "foo" in result]) + + styler = self.df.style + result = styler.set_caption("baz") + assert styler is result + assert styler.caption == "baz" + + def test_uuid(self): + styler = Styler(self.df, uuid="abc123") + result = styler.render() + assert "abc123" in result + + styler = self.df.style + result = styler.set_uuid("aaa") + assert result is styler + assert result.uuid == "aaa" + + def test_unique_id(self): + # See https://github.com/pandas-dev/pandas/issues/16780 + df = pd.DataFrame({"a": [1, 3, 5, 6], "b": [2, 4, 12, 21]}) + result = df.style.render(uuid="test") + assert "test" in result + ids = re.findall('id="(.*?)"', result) + assert np.unique(ids).size == len(ids) + + def test_table_styles(self): + style = [{"selector": "th", "props": [("foo", "bar")]}] + styler = Styler(self.df, table_styles=style) + result = " ".join(styler.render().split()) + assert "th { foo: bar; }" in result + + styler = self.df.style + result = styler.set_table_styles(style) + assert styler is result + assert styler.table_styles == style + + def test_table_attributes(self): + attributes = 'class="foo" data-bar' + styler = Styler(self.df, table_attributes=attributes) + result = styler.render() + assert 'class="foo" data-bar' in result + + result = self.df.style.set_table_attributes(attributes).render() + assert 'class="foo" data-bar' in result + + def test_precision(self): + with pd.option_context("display.precision", 10): + s = Styler(self.df) + assert s.precision == 10 + s = Styler(self.df, precision=2) + assert s.precision == 2 + + s2 = s.set_precision(4) + assert s is s2 + assert s.precision == 4 + + def test_apply_none(self): + def f(x): + return pd.DataFrame( + np.where(x == x.max(), "color: red", ""), + index=x.index, + columns=x.columns, + ) + + result = pd.DataFrame([[1, 2], [3, 4]]).style.apply(f, axis=None)._compute().ctx + assert result[(1, 1)] == ["color: red"] + + def test_trim(self): + result = self.df.style.render() # trim=True + assert result.count("#") == 0 + + result = self.df.style.highlight_max().render() + assert result.count("#") == len(self.df.columns) + + def test_highlight_max(self): + df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + # max(df) = min(-df) + for max_ in [True, False]: + if max_: + attr = "highlight_max" + else: + df = -df + attr = "highlight_min" + result = getattr(df.style, attr)()._compute().ctx + assert result[(1, 1)] == ["background-color: yellow"] + + result = getattr(df.style, attr)(color="green")._compute().ctx + assert result[(1, 1)] == ["background-color: green"] + + result = getattr(df.style, attr)(subset="A")._compute().ctx + assert result[(1, 0)] == ["background-color: yellow"] + + result = getattr(df.style, attr)(axis=0)._compute().ctx + expected = { + (1, 0): ["background-color: yellow"], + (1, 1): ["background-color: yellow"], + (0, 1): [""], + (0, 0): [""], + } + assert result == expected + + result = getattr(df.style, attr)(axis=1)._compute().ctx + expected = { + (0, 1): ["background-color: yellow"], + (1, 1): ["background-color: yellow"], + (0, 0): [""], + (1, 0): [""], + } + assert result == expected + + # separate since we can't negate the strs + df["C"] = ["a", "b"] + result = df.style.highlight_max()._compute().ctx + expected = {(1, 1): ["background-color: yellow"]} + + result = df.style.highlight_min()._compute().ctx + expected = {(0, 0): ["background-color: yellow"]} + + def test_export(self): + f = lambda x: "color: red" if x > 0 else "color: blue" + g = lambda x, z: f"color: {z}" if x > 0 else f"color: {z}" + style1 = self.styler + style1.applymap(f).applymap(g, z="b").highlight_max() + result = style1.export() + style2 = self.df.style + style2.use(result) + assert style1._todo == style2._todo + style2.render() + + def test_display_format(self): + df = pd.DataFrame(np.random.random(size=(2, 2))) + ctx = df.style.format("{:0.1f}")._translate() + + assert all(["display_value" in c for c in row] for row in ctx["body"]) + assert all( + [len(c["display_value"]) <= 3 for c in row[1:]] for row in ctx["body"] + ) + assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3 + + def test_display_format_raises(self): + df = pd.DataFrame(np.random.randn(2, 2)) + with pytest.raises(TypeError): + df.style.format(5) + with pytest.raises(TypeError): + df.style.format(True) + + def test_display_set_precision(self): + # Issue #13257 + df = pd.DataFrame(data=[[1.0, 2.0090], [3.2121, 4.566]], columns=["a", "b"]) + s = Styler(df) + + ctx = s.set_precision(1)._translate() + + assert s.precision == 1 + assert ctx["body"][0][1]["display_value"] == "1.0" + assert ctx["body"][0][2]["display_value"] == "2.0" + assert ctx["body"][1][1]["display_value"] == "3.2" + assert ctx["body"][1][2]["display_value"] == "4.6" + + ctx = s.set_precision(2)._translate() + assert s.precision == 2 + assert ctx["body"][0][1]["display_value"] == "1.00" + assert ctx["body"][0][2]["display_value"] == "2.01" + assert ctx["body"][1][1]["display_value"] == "3.21" + assert ctx["body"][1][2]["display_value"] == "4.57" + + ctx = s.set_precision(3)._translate() + assert s.precision == 3 + assert ctx["body"][0][1]["display_value"] == "1.000" + assert ctx["body"][0][2]["display_value"] == "2.009" + assert ctx["body"][1][1]["display_value"] == "3.212" + assert ctx["body"][1][2]["display_value"] == "4.566" + + def test_display_subset(self): + df = pd.DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"]) + ctx = df.style.format( + {"a": "{:0.1f}", "b": "{0:.2%}"}, subset=pd.IndexSlice[0, :] + )._translate() + expected = "0.1" + raw_11 = "1.123400" + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == raw_11 + assert ctx["body"][0][2]["display_value"] == "12.34%" + + ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, :])._translate() + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == raw_11 + + ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice["a"])._translate() + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][0][2]["display_value"] == "0.123400" + + ctx = df.style.format("{:0.1f}", subset=pd.IndexSlice[0, "a"])._translate() + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == raw_11 + + ctx = df.style.format( + "{:0.1f}", subset=pd.IndexSlice[[0, 1], ["a"]] + )._translate() + assert ctx["body"][0][1]["display_value"] == expected + assert ctx["body"][1][1]["display_value"] == "1.1" + assert ctx["body"][0][2]["display_value"] == "0.123400" + assert ctx["body"][1][2]["display_value"] == raw_11 + + def test_display_dict(self): + df = pd.DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"]) + ctx = df.style.format({"a": "{:0.1f}", "b": "{0:.2%}"})._translate() + assert ctx["body"][0][1]["display_value"] == "0.1" + assert ctx["body"][0][2]["display_value"] == "12.34%" + df["c"] = ["aaa", "bbb"] + ctx = df.style.format({"a": "{:0.1f}", "c": str.upper})._translate() + assert ctx["body"][0][1]["display_value"] == "0.1" + assert ctx["body"][0][3]["display_value"] == "AAA" + + def test_bad_apply_shape(self): + df = pd.DataFrame([[1, 2], [3, 4]]) + with pytest.raises(ValueError): + df.style._apply(lambda x: "x", subset=pd.IndexSlice[[0, 1], :]) + + with pytest.raises(ValueError): + df.style._apply(lambda x: [""], subset=pd.IndexSlice[[0, 1], :]) + + with pytest.raises(ValueError): + df.style._apply(lambda x: ["", "", "", ""]) + + with pytest.raises(ValueError): + df.style._apply(lambda x: ["", "", ""], subset=1) + + with pytest.raises(ValueError): + df.style._apply(lambda x: ["", "", ""], axis=1) + + def test_apply_bad_return(self): + def f(x): + return "" + + df = pd.DataFrame([[1, 2], [3, 4]]) + with pytest.raises(TypeError): + df.style._apply(f, axis=None) + + def test_apply_bad_labels(self): + def f(x): + return pd.DataFrame(index=[1, 2], columns=["a", "b"]) + + df = pd.DataFrame([[1, 2], [3, 4]]) + with pytest.raises(ValueError): + df.style._apply(f, axis=None) + + def test_get_level_lengths(self): + index = pd.MultiIndex.from_product([["a", "b"], [0, 1, 2]]) + expected = { + (0, 0): 3, + (0, 3): 3, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + (1, 4): 1, + (1, 5): 1, + } + result = _get_level_lengths(index) + tm.assert_dict_equal(result, expected) + + def test_get_level_lengths_un_sorted(self): + index = pd.MultiIndex.from_arrays([[1, 1, 2, 1], ["a", "b", "b", "d"]]) + expected = { + (0, 0): 2, + (0, 2): 1, + (0, 3): 1, + (1, 0): 1, + (1, 1): 1, + (1, 2): 1, + (1, 3): 1, + } + result = _get_level_lengths(index) + tm.assert_dict_equal(result, expected) + + def test_mi_sparse(self): + df = pd.DataFrame( + {"A": [1, 2]}, index=pd.MultiIndex.from_arrays([["a", "a"], [0, 1]]) + ) + + result = df.style._translate() + body_0 = result["body"][0][0] + expected_0 = { + "value": "a", + "display_value": "a", + "is_visible": True, + "type": "th", + "attributes": ["rowspan=2"], + "class": "row_heading level0 row0", + "id": "level0_row0", + } + tm.assert_dict_equal(body_0, expected_0) + + body_1 = result["body"][0][1] + expected_1 = { + "value": 0, + "display_value": 0, + "is_visible": True, + "type": "th", + "class": "row_heading level1 row0", + "id": "level1_row0", + } + tm.assert_dict_equal(body_1, expected_1) + + body_10 = result["body"][1][0] + expected_10 = { + "value": "a", + "display_value": "a", + "is_visible": False, + "type": "th", + "class": "row_heading level0 row1", + "id": "level0_row1", + } + tm.assert_dict_equal(body_10, expected_10) + + head = result["head"][0] + expected = [ + { + "type": "th", + "class": "blank", + "value": "", + "is_visible": True, + "display_value": "", + }, + { + "type": "th", + "class": "blank level0", + "value": "", + "is_visible": True, + "display_value": "", + }, + { + "type": "th", + "class": "col_heading level0 col0", + "value": "A", + "is_visible": True, + "display_value": "A", + }, + ] + assert head == expected + + def test_mi_sparse_disabled(self): + with pd.option_context("display.multi_sparse", False): + df = pd.DataFrame( + {"A": [1, 2]}, index=pd.MultiIndex.from_arrays([["a", "a"], [0, 1]]) + ) + result = df.style._translate() + body = result["body"] + for row in body: + assert "attributes" not in row[0] + + def test_mi_sparse_index_names(self): + df = pd.DataFrame( + {"A": [1, 2]}, + index=pd.MultiIndex.from_arrays( + [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] + ), + ) + result = df.style._translate() + head = result["head"][1] + expected = [ + {"class": "index_name level0", "value": "idx_level_0", "type": "th"}, + {"class": "index_name level1", "value": "idx_level_1", "type": "th"}, + {"class": "blank", "value": "", "type": "th"}, + ] + + assert head == expected + + def test_mi_sparse_column_names(self): + df = pd.DataFrame( + np.arange(16).reshape(4, 4), + index=pd.MultiIndex.from_arrays( + [["a", "a", "b", "a"], [0, 1, 1, 2]], + names=["idx_level_0", "idx_level_1"], + ), + columns=pd.MultiIndex.from_arrays( + [["C1", "C1", "C2", "C2"], [1, 0, 1, 0]], names=["col_0", "col_1"] + ), + ) + result = df.style._translate() + head = result["head"][1] + expected = [ + { + "class": "blank", + "value": "", + "display_value": "", + "type": "th", + "is_visible": True, + }, + { + "class": "index_name level1", + "value": "col_1", + "display_value": "col_1", + "is_visible": True, + "type": "th", + }, + { + "class": "col_heading level1 col0", + "display_value": 1, + "is_visible": True, + "type": "th", + "value": 1, + }, + { + "class": "col_heading level1 col1", + "display_value": 0, + "is_visible": True, + "type": "th", + "value": 0, + }, + { + "class": "col_heading level1 col2", + "display_value": 1, + "is_visible": True, + "type": "th", + "value": 1, + }, + { + "class": "col_heading level1 col3", + "display_value": 0, + "is_visible": True, + "type": "th", + "value": 0, + }, + ] + assert head == expected + + def test_hide_single_index(self): + # GH 14194 + # single unnamed index + ctx = self.df.style._translate() + assert ctx["body"][0][0]["is_visible"] + assert ctx["head"][0][0]["is_visible"] + ctx2 = self.df.style.hide_index()._translate() + assert not ctx2["body"][0][0]["is_visible"] + assert not ctx2["head"][0][0]["is_visible"] + + # single named index + ctx3 = self.df.set_index("A").style._translate() + assert ctx3["body"][0][0]["is_visible"] + assert len(ctx3["head"]) == 2 # 2 header levels + assert ctx3["head"][0][0]["is_visible"] + + ctx4 = self.df.set_index("A").style.hide_index()._translate() + assert not ctx4["body"][0][0]["is_visible"] + assert len(ctx4["head"]) == 1 # only 1 header levels + assert not ctx4["head"][0][0]["is_visible"] + + def test_hide_multiindex(self): + # GH 14194 + df = pd.DataFrame( + {"A": [1, 2]}, + index=pd.MultiIndex.from_arrays( + [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] + ), + ) + ctx1 = df.style._translate() + # tests for 'a' and '0' + assert ctx1["body"][0][0]["is_visible"] + assert ctx1["body"][0][1]["is_visible"] + # check for blank header rows + assert ctx1["head"][0][0]["is_visible"] + assert ctx1["head"][0][1]["is_visible"] + + ctx2 = df.style.hide_index()._translate() + # tests for 'a' and '0' + assert not ctx2["body"][0][0]["is_visible"] + assert not ctx2["body"][0][1]["is_visible"] + # check for blank header rows + assert not ctx2["head"][0][0]["is_visible"] + assert not ctx2["head"][0][1]["is_visible"] + + def test_hide_columns_single_level(self): + # GH 14194 + # test hiding single column + ctx = self.df.style._translate() + assert ctx["head"][0][1]["is_visible"] + assert ctx["head"][0][1]["display_value"] == "A" + assert ctx["head"][0][2]["is_visible"] + assert ctx["head"][0][2]["display_value"] == "B" + assert ctx["body"][0][1]["is_visible"] # col A, row 1 + assert ctx["body"][1][2]["is_visible"] # col B, row 1 + + ctx = self.df.style.hide_columns("A")._translate() + assert not ctx["head"][0][1]["is_visible"] + assert not ctx["body"][0][1]["is_visible"] # col A, row 1 + assert ctx["body"][1][2]["is_visible"] # col B, row 1 + + # test hiding mulitiple columns + ctx = self.df.style.hide_columns(["A", "B"])._translate() + assert not ctx["head"][0][1]["is_visible"] + assert not ctx["head"][0][2]["is_visible"] + assert not ctx["body"][0][1]["is_visible"] # col A, row 1 + assert not ctx["body"][1][2]["is_visible"] # col B, row 1 + + def test_hide_columns_mult_levels(self): + # GH 14194 + # setup dataframe with multiple column levels and indices + i1 = pd.MultiIndex.from_arrays( + [["a", "a"], [0, 1]], names=["idx_level_0", "idx_level_1"] + ) + i2 = pd.MultiIndex.from_arrays( + [["b", "b"], [0, 1]], names=["col_level_0", "col_level_1"] + ) + df = pd.DataFrame([[1, 2], [3, 4]], index=i1, columns=i2) + ctx = df.style._translate() + # column headers + assert ctx["head"][0][2]["is_visible"] + assert ctx["head"][1][2]["is_visible"] + assert ctx["head"][1][3]["display_value"] == 1 + # indices + assert ctx["body"][0][0]["is_visible"] + # data + assert ctx["body"][1][2]["is_visible"] + assert ctx["body"][1][2]["display_value"] == 3 + assert ctx["body"][1][3]["is_visible"] + assert ctx["body"][1][3]["display_value"] == 4 + + # hide top column level, which hides both columns + ctx = df.style.hide_columns("b")._translate() + assert not ctx["head"][0][2]["is_visible"] # b + assert not ctx["head"][1][2]["is_visible"] # 0 + assert not ctx["body"][1][2]["is_visible"] # 3 + assert ctx["body"][0][0]["is_visible"] # index + + # hide first column only + ctx = df.style.hide_columns([("b", 0)])._translate() + assert ctx["head"][0][2]["is_visible"] # b + assert not ctx["head"][1][2]["is_visible"] # 0 + assert not ctx["body"][1][2]["is_visible"] # 3 + assert ctx["body"][1][3]["is_visible"] + assert ctx["body"][1][3]["display_value"] == 4 + + # hide second column and index + ctx = df.style.hide_columns([("b", 1)]).hide_index()._translate() + assert not ctx["body"][0][0]["is_visible"] # index + assert ctx["head"][0][2]["is_visible"] # b + assert ctx["head"][1][2]["is_visible"] # 0 + assert not ctx["head"][1][3]["is_visible"] # 1 + assert not ctx["body"][1][3]["is_visible"] # 4 + assert ctx["body"][1][2]["is_visible"] + assert ctx["body"][1][2]["display_value"] == 3 + + def test_pipe(self): + def set_caption_from_template(styler, a, b): + return styler.set_caption(f"Dataframe with a = {a} and b = {b}") + + styler = self.df.style.pipe(set_caption_from_template, "A", b="B") + assert "Dataframe with a = A and b = B" in styler.render() + + # Test with an argument that is a (callable, keyword_name) pair. + def f(a, b, styler): + return (a, b, styler) + + styler = self.df.style + result = styler.pipe((f, "styler"), a=1, b=2) + assert result == (1, 2, styler) + + +@td.skip_if_no_mpl +class TestStylerMatplotlibDep: + def test_background_gradient(self): + df = pd.DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) + + for c_map in [None, "YlOrRd"]: + result = df.style.background_gradient(cmap=c_map)._compute().ctx + assert all("#" in x[0] for x in result.values()) + assert result[(0, 0)] == result[(0, 1)] + assert result[(1, 0)] == result[(1, 1)] + + result = ( + df.style.background_gradient(subset=pd.IndexSlice[1, "A"])._compute().ctx + ) + + assert result[(1, 0)] == ["background-color: #fff7fb", "color: #000000"] + + @pytest.mark.parametrize( + "c_map,expected", + [ + ( + None, + { + (0, 0): ["background-color: #440154", "color: #f1f1f1"], + (1, 0): ["background-color: #fde725", "color: #000000"], + }, + ), + ( + "YlOrRd", + { + (0, 0): ["background-color: #ffffcc", "color: #000000"], + (1, 0): ["background-color: #800026", "color: #f1f1f1"], + }, + ), + ], + ) + def test_text_color_threshold(self, c_map, expected): + df = pd.DataFrame([1, 2], columns=["A"]) + result = df.style.background_gradient(cmap=c_map)._compute().ctx + assert result == expected + + @pytest.mark.parametrize("text_color_threshold", [1.1, "1", -1, [2, 2]]) + def test_text_color_threshold_raises(self, text_color_threshold): + df = pd.DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) + msg = "`text_color_threshold` must be a value from 0 to 1." + with pytest.raises(ValueError, match=msg): + df.style.background_gradient( + text_color_threshold=text_color_threshold + )._compute() + + @td.skip_if_no_mpl + def test_background_gradient_axis(self): + df = pd.DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) + + low = ["background-color: #f7fbff", "color: #000000"] + high = ["background-color: #08306b", "color: #f1f1f1"] + mid = ["background-color: #abd0e6", "color: #000000"] + result = df.style.background_gradient(cmap="Blues", axis=0)._compute().ctx + assert result[(0, 0)] == low + assert result[(0, 1)] == low + assert result[(1, 0)] == high + assert result[(1, 1)] == high + + result = df.style.background_gradient(cmap="Blues", axis=1)._compute().ctx + assert result[(0, 0)] == low + assert result[(0, 1)] == high + assert result[(1, 0)] == low + assert result[(1, 1)] == high + + result = df.style.background_gradient(cmap="Blues", axis=None)._compute().ctx + assert result[(0, 0)] == low + assert result[(0, 1)] == mid + assert result[(1, 0)] == mid + assert result[(1, 1)] == high + + def test_background_gradient_vmin_vmax(self): + # GH 12145 + df = pd.DataFrame(range(5)) + ctx = df.style.background_gradient(vmin=1, vmax=3)._compute().ctx + assert ctx[(0, 0)] == ctx[(1, 0)] + assert ctx[(4, 0)] == ctx[(3, 0)] + + def test_background_gradient_int64(self): + # GH 28869 + df1 = pd.Series(range(3)).to_frame() + df2 = pd.Series(range(3), dtype="Int64").to_frame() + ctx1 = df1.style.background_gradient()._compute().ctx + ctx2 = df2.style.background_gradient()._compute().ctx + assert ctx2[(0, 0)] == ctx1[(0, 0)] + assert ctx2[(1, 0)] == ctx1[(1, 0)] + assert ctx2[(2, 0)] == ctx1[(2, 0)] + + +def test_block_names(): + # catch accidental removal of a block + expected = { + "before_style", + "style", + "table_styles", + "before_cellstyle", + "cellstyle", + "before_table", + "table", + "caption", + "thead", + "tbody", + "after_table", + "before_head_rows", + "head_tr", + "after_head_rows", + "before_rows", + "tr", + "after_rows", + } + result = set(Styler.template.blocks) + assert result == expected + + +def test_from_custom_template(tmpdir): + p = tmpdir.mkdir("templates").join("myhtml.tpl") + p.write( + textwrap.dedent( + """\ + {% extends "html.tpl" %} + {% block table %} +

    {{ table_title|default("My Table") }}

    + {{ super() }} + {% endblock table %}""" + ) + ) + result = Styler.from_custom_template(str(tmpdir.join("templates")), "myhtml.tpl") + assert issubclass(result, Styler) + assert result.env is not Styler.env + assert result.template is not Styler.template + styler = result(pd.DataFrame({"A": [1, 2]})) + assert styler.render() diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py new file mode 100644 index 00000000..b3ee8da5 --- /dev/null +++ b/pandas/tests/io/formats/test_to_csv.py @@ -0,0 +1,599 @@ +import io +import os +import sys + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, compat +import pandas._testing as tm + + +class TestToCSV: + @pytest.mark.xfail( + (3, 6, 5) > sys.version_info, + reason=("Python csv library bug (see https://bugs.python.org/issue32255)"), + ) + def test_to_csv_with_single_column(self): + # see gh-18676, https://bugs.python.org/issue32255 + # + # Python's CSV library adds an extraneous '""' + # before the newline when the NaN-value is in + # the first row. Otherwise, only the newline + # character is added. This behavior is inconsistent + # and was patched in https://bugs.python.org/pull_request4672. + df1 = DataFrame([None, 1]) + expected1 = """\ +"" +1.0 +""" + with tm.ensure_clean("test.csv") as path: + df1.to_csv(path, header=None, index=None) + with open(path, "r") as f: + assert f.read() == expected1 + + df2 = DataFrame([1, None]) + expected2 = """\ +1.0 +"" +""" + with tm.ensure_clean("test.csv") as path: + df2.to_csv(path, header=None, index=None) + with open(path, "r") as f: + assert f.read() == expected2 + + def test_to_csv_defualt_encoding(self): + # GH17097 + df = DataFrame({"col": ["AAAAA", "ÄÄÄÄÄ", "ßßßßß", "聞聞聞聞聞"]}) + + with tm.ensure_clean("test.csv") as path: + # the default to_csv encoding is uft-8. + df.to_csv(path) + tm.assert_frame_equal(pd.read_csv(path, index_col=0), df) + + def test_to_csv_quotechar(self): + df = DataFrame({"col": [1, 2]}) + expected = """\ +"","col" +"0","1" +"1","2" +""" + + with tm.ensure_clean("test.csv") as path: + df.to_csv(path, quoting=1) # 1=QUOTE_ALL + with open(path, "r") as f: + assert f.read() == expected + + expected = """\ +$$,$col$ +$0$,$1$ +$1$,$2$ +""" + + with tm.ensure_clean("test.csv") as path: + df.to_csv(path, quoting=1, quotechar="$") + with open(path, "r") as f: + assert f.read() == expected + + with tm.ensure_clean("test.csv") as path: + with pytest.raises(TypeError, match="quotechar"): + df.to_csv(path, quoting=1, quotechar=None) + + def test_to_csv_doublequote(self): + df = DataFrame({"col": ['a"a', '"bb"']}) + expected = '''\ +"","col" +"0","a""a" +"1","""bb""" +''' + + with tm.ensure_clean("test.csv") as path: + df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL + with open(path, "r") as f: + assert f.read() == expected + + from _csv import Error + + with tm.ensure_clean("test.csv") as path: + with pytest.raises(Error, match="escapechar"): + df.to_csv(path, doublequote=False) # no escapechar set + + def test_to_csv_escapechar(self): + df = DataFrame({"col": ['a"a', '"bb"']}) + expected = """\ +"","col" +"0","a\\"a" +"1","\\"bb\\"" +""" + + with tm.ensure_clean("test.csv") as path: # QUOTE_ALL + df.to_csv(path, quoting=1, doublequote=False, escapechar="\\") + with open(path, "r") as f: + assert f.read() == expected + + df = DataFrame({"col": ["a,a", ",bb,"]}) + expected = """\ +,col +0,a\\,a +1,\\,bb\\, +""" + + with tm.ensure_clean("test.csv") as path: + df.to_csv(path, quoting=3, escapechar="\\") # QUOTE_NONE + with open(path, "r") as f: + assert f.read() == expected + + def test_csv_to_string(self): + df = DataFrame({"col": [1, 2]}) + expected_rows = [",col", "0,1", "1,2"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv() == expected + + def test_to_csv_decimal(self): + # see gh-781 + df = DataFrame({"col1": [1], "col2": ["a"], "col3": [10.1]}) + + expected_rows = [",col1,col2,col3", "0,1,a,10.1"] + expected_default = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv() == expected_default + + expected_rows = [";col1;col2;col3", "0;1;a;10,1"] + expected_european_excel = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv(decimal=",", sep=";") == expected_european_excel + + expected_rows = [",col1,col2,col3", "0,1,a,10.10"] + expected_float_format_default = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv(float_format="%.2f") == expected_float_format_default + + expected_rows = [";col1;col2;col3", "0;1;a;10,10"] + expected_float_format = tm.convert_rows_list_to_csv_str(expected_rows) + assert ( + df.to_csv(decimal=",", sep=";", float_format="%.2f") + == expected_float_format + ) + + # see gh-11553: testing if decimal is taken into account for '0.0' + df = pd.DataFrame({"a": [0, 1.1], "b": [2.2, 3.3], "c": 1}) + + expected_rows = ["a,b,c", "0^0,2^2,1", "1^1,3^3,1"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv(index=False, decimal="^") == expected + + # same but for an index + assert df.set_index("a").to_csv(decimal="^") == expected + + # same for a multi-index + assert df.set_index(["a", "b"]).to_csv(decimal="^") == expected + + def test_to_csv_float_format(self): + # testing if float_format is taken into account for the index + # GH 11553 + df = pd.DataFrame({"a": [0, 1], "b": [2.2, 3.3], "c": 1}) + + expected_rows = ["a,b,c", "0,2.20,1", "1,3.30,1"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.set_index("a").to_csv(float_format="%.2f") == expected + + # same for a multi-index + assert df.set_index(["a", "b"]).to_csv(float_format="%.2f") == expected + + def test_to_csv_na_rep(self): + # see gh-11553 + # + # Testing if NaN values are correctly represented in the index. + df = DataFrame({"a": [0, np.NaN], "b": [0, 1], "c": [2, 3]}) + expected_rows = ["a,b,c", "0.0,0,2", "_,1,3"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + assert df.set_index("a").to_csv(na_rep="_") == expected + assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected + + # now with an index containing only NaNs + df = DataFrame({"a": np.NaN, "b": [0, 1], "c": [2, 3]}) + expected_rows = ["a,b,c", "_,0,2", "_,1,3"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + assert df.set_index("a").to_csv(na_rep="_") == expected + assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected + + # check if na_rep parameter does not break anything when no NaN + df = DataFrame({"a": 0, "b": [0, 1], "c": [2, 3]}) + expected_rows = ["a,b,c", "0,0,2", "0,1,3"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + assert df.set_index("a").to_csv(na_rep="_") == expected + assert df.set_index(["a", "b"]).to_csv(na_rep="_") == expected + + # GH 29975 + # Make sure full na_rep shows up when a dtype is provided + csv = pd.Series(["a", pd.NA, "c"]).to_csv(na_rep="ZZZZZ") + expected = tm.convert_rows_list_to_csv_str([",0", "0,a", "1,ZZZZZ", "2,c"]) + assert expected == csv + csv = pd.Series(["a", pd.NA, "c"], dtype="string").to_csv(na_rep="ZZZZZ") + assert expected == csv + + def test_to_csv_date_format(self): + # GH 10209 + df_sec = DataFrame({"A": pd.date_range("20130101", periods=5, freq="s")}) + df_day = DataFrame({"A": pd.date_range("20130101", periods=5, freq="d")}) + + expected_rows = [ + ",A", + "0,2013-01-01 00:00:00", + "1,2013-01-01 00:00:01", + "2,2013-01-01 00:00:02", + "3,2013-01-01 00:00:03", + "4,2013-01-01 00:00:04", + ] + expected_default_sec = tm.convert_rows_list_to_csv_str(expected_rows) + assert df_sec.to_csv() == expected_default_sec + + expected_rows = [ + ",A", + "0,2013-01-01 00:00:00", + "1,2013-01-02 00:00:00", + "2,2013-01-03 00:00:00", + "3,2013-01-04 00:00:00", + "4,2013-01-05 00:00:00", + ] + expected_ymdhms_day = tm.convert_rows_list_to_csv_str(expected_rows) + assert df_day.to_csv(date_format="%Y-%m-%d %H:%M:%S") == expected_ymdhms_day + + expected_rows = [ + ",A", + "0,2013-01-01", + "1,2013-01-01", + "2,2013-01-01", + "3,2013-01-01", + "4,2013-01-01", + ] + expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows) + assert df_sec.to_csv(date_format="%Y-%m-%d") == expected_ymd_sec + + expected_rows = [ + ",A", + "0,2013-01-01", + "1,2013-01-02", + "2,2013-01-03", + "3,2013-01-04", + "4,2013-01-05", + ] + expected_default_day = tm.convert_rows_list_to_csv_str(expected_rows) + assert df_day.to_csv() == expected_default_day + assert df_day.to_csv(date_format="%Y-%m-%d") == expected_default_day + + # see gh-7791 + # + # Testing if date_format parameter is taken into account + # for multi-indexed DataFrames. + df_sec["B"] = 0 + df_sec["C"] = 1 + + expected_rows = ["A,B,C", "2013-01-01,0,1"] + expected_ymd_sec = tm.convert_rows_list_to_csv_str(expected_rows) + + df_sec_grouped = df_sec.groupby([pd.Grouper(key="A", freq="1h"), "B"]) + assert df_sec_grouped.mean().to_csv(date_format="%Y-%m-%d") == expected_ymd_sec + + def test_to_csv_multi_index(self): + # see gh-6618 + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([[1], [2]])) + + exp_rows = [",1", ",2", "0,1"] + exp = tm.convert_rows_list_to_csv_str(exp_rows) + assert df.to_csv() == exp + + exp_rows = ["1", "2", "1"] + exp = tm.convert_rows_list_to_csv_str(exp_rows) + assert df.to_csv(index=False) == exp + + df = DataFrame( + [1], + columns=pd.MultiIndex.from_arrays([[1], [2]]), + index=pd.MultiIndex.from_arrays([[1], [2]]), + ) + + exp_rows = [",,1", ",,2", "1,2,1"] + exp = tm.convert_rows_list_to_csv_str(exp_rows) + assert df.to_csv() == exp + + exp_rows = ["1", "2", "1"] + exp = tm.convert_rows_list_to_csv_str(exp_rows) + assert df.to_csv(index=False) == exp + + df = DataFrame([1], columns=pd.MultiIndex.from_arrays([["foo"], ["bar"]])) + + exp_rows = [",foo", ",bar", "0,1"] + exp = tm.convert_rows_list_to_csv_str(exp_rows) + assert df.to_csv() == exp + + exp_rows = ["foo", "bar", "1"] + exp = tm.convert_rows_list_to_csv_str(exp_rows) + assert df.to_csv(index=False) == exp + + @pytest.mark.parametrize( + "ind,expected", + [ + ( + pd.MultiIndex(levels=[[1.0]], codes=[[0]], names=["x"]), + "x,data\n1.0,1\n", + ), + ( + pd.MultiIndex( + levels=[[1.0], [2.0]], codes=[[0], [0]], names=["x", "y"] + ), + "x,y,data\n1.0,2.0,1\n", + ), + ], + ) + @pytest.mark.parametrize("klass", [pd.DataFrame, pd.Series]) + def test_to_csv_single_level_multi_index(self, ind, expected, klass): + # see gh-19589 + result = klass(pd.Series([1], ind, name="data")).to_csv( + line_terminator="\n", header=True + ) + assert result == expected + + def test_to_csv_string_array_ascii(self): + # GH 10813 + str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}] + df = pd.DataFrame(str_array) + expected_ascii = """\ +,names +0,"['foo', 'bar']" +1,"['baz', 'qux']" +""" + with tm.ensure_clean("str_test.csv") as path: + df.to_csv(path, encoding="ascii") + with open(path, "r") as f: + assert f.read() == expected_ascii + + def test_to_csv_string_array_utf8(self): + # GH 10813 + str_array = [{"names": ["foo", "bar"]}, {"names": ["baz", "qux"]}] + df = pd.DataFrame(str_array) + expected_utf8 = """\ +,names +0,"['foo', 'bar']" +1,"['baz', 'qux']" +""" + with tm.ensure_clean("unicode_test.csv") as path: + df.to_csv(path, encoding="utf-8") + with open(path, "r") as f: + assert f.read() == expected_utf8 + + def test_to_csv_string_with_lf(self): + # GH 20353 + data = {"int": [1, 2, 3], "str_lf": ["abc", "d\nef", "g\nh\n\ni"]} + df = pd.DataFrame(data) + with tm.ensure_clean("lf_test.csv") as path: + # case 1: The default line terminator(=os.linesep)(PR 21406) + os_linesep = os.linesep.encode("utf-8") + expected_noarg = ( + b"int,str_lf" + + os_linesep + + b"1,abc" + + os_linesep + + b'2,"d\nef"' + + os_linesep + + b'3,"g\nh\n\ni"' + + os_linesep + ) + df.to_csv(path, index=False) + with open(path, "rb") as f: + assert f.read() == expected_noarg + with tm.ensure_clean("lf_test.csv") as path: + # case 2: LF as line terminator + expected_lf = b'int,str_lf\n1,abc\n2,"d\nef"\n3,"g\nh\n\ni"\n' + df.to_csv(path, line_terminator="\n", index=False) + with open(path, "rb") as f: + assert f.read() == expected_lf + with tm.ensure_clean("lf_test.csv") as path: + # case 3: CRLF as line terminator + # 'line_terminator' should not change inner element + expected_crlf = b'int,str_lf\r\n1,abc\r\n2,"d\nef"\r\n3,"g\nh\n\ni"\r\n' + df.to_csv(path, line_terminator="\r\n", index=False) + with open(path, "rb") as f: + assert f.read() == expected_crlf + + def test_to_csv_string_with_crlf(self): + # GH 20353 + data = {"int": [1, 2, 3], "str_crlf": ["abc", "d\r\nef", "g\r\nh\r\n\r\ni"]} + df = pd.DataFrame(data) + with tm.ensure_clean("crlf_test.csv") as path: + # case 1: The default line terminator(=os.linesep)(PR 21406) + os_linesep = os.linesep.encode("utf-8") + expected_noarg = ( + b"int,str_crlf" + + os_linesep + + b"1,abc" + + os_linesep + + b'2,"d\r\nef"' + + os_linesep + + b'3,"g\r\nh\r\n\r\ni"' + + os_linesep + ) + df.to_csv(path, index=False) + with open(path, "rb") as f: + assert f.read() == expected_noarg + with tm.ensure_clean("crlf_test.csv") as path: + # case 2: LF as line terminator + expected_lf = b'int,str_crlf\n1,abc\n2,"d\r\nef"\n3,"g\r\nh\r\n\r\ni"\n' + df.to_csv(path, line_terminator="\n", index=False) + with open(path, "rb") as f: + assert f.read() == expected_lf + with tm.ensure_clean("crlf_test.csv") as path: + # case 3: CRLF as line terminator + # 'line_terminator' should not change inner element + expected_crlf = ( + b"int,str_crlf\r\n" + b"1,abc\r\n" + b'2,"d\r\nef"\r\n' + b'3,"g\r\nh\r\n\r\ni"\r\n' + ) + df.to_csv(path, line_terminator="\r\n", index=False) + with open(path, "rb") as f: + assert f.read() == expected_crlf + + def test_to_csv_stdout_file(self, capsys): + # GH 21561 + df = pd.DataFrame( + [["foo", "bar"], ["baz", "qux"]], columns=["name_1", "name_2"] + ) + expected_rows = [",name_1,name_2", "0,foo,bar", "1,baz,qux"] + expected_ascii = tm.convert_rows_list_to_csv_str(expected_rows) + + df.to_csv(sys.stdout, encoding="ascii") + captured = capsys.readouterr() + + assert captured.out == expected_ascii + assert not sys.stdout.closed + + @pytest.mark.xfail( + compat.is_platform_windows(), + reason=( + "Especially in Windows, file stream should not be passed" + "to csv writer without newline='' option." + "(https://docs.python.org/3.6/library/csv.html#csv.writer)" + ), + ) + def test_to_csv_write_to_open_file(self): + # GH 21696 + df = pd.DataFrame({"a": ["x", "y", "z"]}) + expected = """\ +manual header +x +y +z +""" + with tm.ensure_clean("test.txt") as path: + with open(path, "w") as f: + f.write("manual header\n") + df.to_csv(f, header=None, index=None) + with open(path, "r") as f: + assert f.read() == expected + + def test_to_csv_write_to_open_file_with_newline_py3(self): + # see gh-21696 + # see gh-20353 + df = pd.DataFrame({"a": ["x", "y", "z"]}) + expected_rows = ["x", "y", "z"] + expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows) + with tm.ensure_clean("test.txt") as path: + with open(path, "w", newline="") as f: + f.write("manual header\n") + df.to_csv(f, header=None, index=None) + + with open(path, "rb") as f: + assert f.read() == bytes(expected, "utf-8") + + @pytest.mark.parametrize("to_infer", [True, False]) + @pytest.mark.parametrize("read_infer", [True, False]) + def test_to_csv_compression(self, compression_only, read_infer, to_infer): + # see gh-15008 + compression = compression_only + + if compression == "zip": + pytest.skip(f"{compression} is not supported for to_csv") + + # We'll complete file extension subsequently. + filename = "test." + + if compression == "gzip": + filename += "gz" + else: + # xz --> .xz + # bz2 --> .bz2 + filename += compression + + df = DataFrame({"A": [1]}) + + to_compression = "infer" if to_infer else compression + read_compression = "infer" if read_infer else compression + + with tm.ensure_clean(filename) as path: + df.to_csv(path, compression=to_compression) + result = pd.read_csv(path, index_col=0, compression=read_compression) + tm.assert_frame_equal(result, df) + + def test_to_csv_compression_dict(self, compression_only): + # GH 26023 + method = compression_only + df = DataFrame({"ABC": [1]}) + filename = "to_csv_compress_as_dict." + filename += "gz" if method == "gzip" else method + with tm.ensure_clean(filename) as path: + df.to_csv(path, compression={"method": method}) + read_df = pd.read_csv(path, index_col=0) + tm.assert_frame_equal(read_df, df) + + def test_to_csv_compression_dict_no_method_raises(self): + # GH 26023 + df = DataFrame({"ABC": [1]}) + compression = {"some_option": True} + msg = "must have key 'method'" + + with tm.ensure_clean("out.zip") as path: + with pytest.raises(ValueError, match=msg): + df.to_csv(path, compression=compression) + + @pytest.mark.parametrize("compression", ["zip", "infer"]) + @pytest.mark.parametrize( + "archive_name", [None, "test_to_csv.csv", "test_to_csv.zip"] + ) + def test_to_csv_zip_arguments(self, compression, archive_name): + # GH 26023 + from zipfile import ZipFile + + df = DataFrame({"ABC": [1]}) + with tm.ensure_clean("to_csv_archive_name.zip") as path: + df.to_csv( + path, compression={"method": compression, "archive_name": archive_name} + ) + zp = ZipFile(path) + expected_arcname = path if archive_name is None else archive_name + expected_arcname = os.path.basename(expected_arcname) + assert len(zp.filelist) == 1 + archived_file = os.path.basename(zp.filelist[0].filename) + assert archived_file == expected_arcname + + @pytest.mark.parametrize("df_new_type", ["Int64"]) + def test_to_csv_na_rep_long_string(self, df_new_type): + # see gh-25099 + df = pd.DataFrame({"c": [float("nan")] * 3}) + df = df.astype(df_new_type) + expected_rows = ["c", "mynull", "mynull", "mynull"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + result = df.to_csv(index=False, na_rep="mynull", encoding="ascii") + + assert expected == result + + def test_to_csv_timedelta_precision(self): + # GH 6783 + s = pd.Series([1, 1]).astype("timedelta64[ns]") + buf = io.StringIO() + s.to_csv(buf) + result = buf.getvalue() + expected_rows = [ + ",0", + "0,0 days 00:00:00.000000001", + "1,0 days 00:00:00.000000001", + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_na_rep_truncated(self): + # https://github.com/pandas-dev/pandas/issues/31447 + result = pd.Series(range(8, 12)).to_csv(na_rep="-") + expected = tm.convert_rows_list_to_csv_str([",0", "0,8", "1,9", "2,10", "3,11"]) + assert result == expected + + result = pd.Series([True, False]).to_csv(na_rep="nan") + expected = tm.convert_rows_list_to_csv_str([",0", "0,True", "1,False"]) + assert result == expected + + result = pd.Series([1.1, 2.2]).to_csv(na_rep=".") + expected = tm.convert_rows_list_to_csv_str([",0", "0,1.1", "1,2.2"]) + assert result == expected diff --git a/pandas/tests/io/formats/test_to_excel.py b/pandas/tests/io/formats/test_to_excel.py new file mode 100644 index 00000000..883240b7 --- /dev/null +++ b/pandas/tests/io/formats/test_to_excel.py @@ -0,0 +1,315 @@ +"""Tests formatting as writer-agnostic ExcelCells + +ExcelFormatter is tested implicitly in pandas/tests/io/test_excel.py +""" + +import pytest + +import pandas._testing as tm + +from pandas.io.formats.css import CSSWarning +from pandas.io.formats.excel import CSSToExcelConverter + + +@pytest.mark.parametrize( + "css,expected", + [ + # FONT + # - name + ("font-family: foo,bar", {"font": {"name": "foo"}}), + ('font-family: "foo bar",baz', {"font": {"name": "foo bar"}}), + ("font-family: foo,\nbar", {"font": {"name": "foo"}}), + ("font-family: foo, bar, baz", {"font": {"name": "foo"}}), + ("font-family: bar, foo", {"font": {"name": "bar"}}), + ("font-family: 'foo bar', baz", {"font": {"name": "foo bar"}}), + ("font-family: 'foo \\'bar', baz", {"font": {"name": "foo 'bar"}}), + ('font-family: "foo \\"bar", baz', {"font": {"name": 'foo "bar'}}), + ('font-family: "foo ,bar", baz', {"font": {"name": "foo ,bar"}}), + # - family + ("font-family: serif", {"font": {"name": "serif", "family": 1}}), + ("font-family: Serif", {"font": {"name": "serif", "family": 1}}), + ("font-family: roman, serif", {"font": {"name": "roman", "family": 1}}), + ("font-family: roman, sans-serif", {"font": {"name": "roman", "family": 2}}), + ("font-family: roman, sans serif", {"font": {"name": "roman"}}), + ("font-family: roman, sansserif", {"font": {"name": "roman"}}), + ("font-family: roman, cursive", {"font": {"name": "roman", "family": 4}}), + ("font-family: roman, fantasy", {"font": {"name": "roman", "family": 5}}), + # - size + ("font-size: 1em", {"font": {"size": 12}}), + ("font-size: xx-small", {"font": {"size": 6}}), + ("font-size: x-small", {"font": {"size": 7.5}}), + ("font-size: small", {"font": {"size": 9.6}}), + ("font-size: medium", {"font": {"size": 12}}), + ("font-size: large", {"font": {"size": 13.5}}), + ("font-size: x-large", {"font": {"size": 18}}), + ("font-size: xx-large", {"font": {"size": 24}}), + ("font-size: 50%", {"font": {"size": 6}}), + # - bold + ("font-weight: 100", {"font": {"bold": False}}), + ("font-weight: 200", {"font": {"bold": False}}), + ("font-weight: 300", {"font": {"bold": False}}), + ("font-weight: 400", {"font": {"bold": False}}), + ("font-weight: normal", {"font": {"bold": False}}), + ("font-weight: lighter", {"font": {"bold": False}}), + ("font-weight: bold", {"font": {"bold": True}}), + ("font-weight: bolder", {"font": {"bold": True}}), + ("font-weight: 700", {"font": {"bold": True}}), + ("font-weight: 800", {"font": {"bold": True}}), + ("font-weight: 900", {"font": {"bold": True}}), + # - italic + ("font-style: italic", {"font": {"italic": True}}), + ("font-style: oblique", {"font": {"italic": True}}), + # - underline + ("text-decoration: underline", {"font": {"underline": "single"}}), + ("text-decoration: overline", {}), + ("text-decoration: none", {}), + # - strike + ("text-decoration: line-through", {"font": {"strike": True}}), + ( + "text-decoration: underline line-through", + {"font": {"strike": True, "underline": "single"}}, + ), + ( + "text-decoration: underline; text-decoration: line-through", + {"font": {"strike": True}}, + ), + # - color + ("color: red", {"font": {"color": "FF0000"}}), + ("color: #ff0000", {"font": {"color": "FF0000"}}), + ("color: #f0a", {"font": {"color": "FF00AA"}}), + # - shadow + ("text-shadow: none", {"font": {"shadow": False}}), + ("text-shadow: 0px -0em 0px #CCC", {"font": {"shadow": False}}), + ("text-shadow: 0px -0em 0px #999", {"font": {"shadow": False}}), + ("text-shadow: 0px -0em 0px", {"font": {"shadow": False}}), + ("text-shadow: 2px -0em 0px #CCC", {"font": {"shadow": True}}), + ("text-shadow: 0px -2em 0px #CCC", {"font": {"shadow": True}}), + ("text-shadow: 0px -0em 2px #CCC", {"font": {"shadow": True}}), + ("text-shadow: 0px -0em 2px", {"font": {"shadow": True}}), + ("text-shadow: 0px -2em", {"font": {"shadow": True}}), + # FILL + # - color, fillType + ( + "background-color: red", + {"fill": {"fgColor": "FF0000", "patternType": "solid"}}, + ), + ( + "background-color: #ff0000", + {"fill": {"fgColor": "FF0000", "patternType": "solid"}}, + ), + ( + "background-color: #f0a", + {"fill": {"fgColor": "FF00AA", "patternType": "solid"}}, + ), + # BORDER + # - style + ( + "border-style: solid", + { + "border": { + "top": {"style": "medium"}, + "bottom": {"style": "medium"}, + "left": {"style": "medium"}, + "right": {"style": "medium"}, + } + }, + ), + ( + "border-style: solid; border-width: thin", + { + "border": { + "top": {"style": "thin"}, + "bottom": {"style": "thin"}, + "left": {"style": "thin"}, + "right": {"style": "thin"}, + } + }, + ), + ( + "border-top-style: solid; border-top-width: thin", + {"border": {"top": {"style": "thin"}}}, + ), + ( + "border-top-style: solid; border-top-width: 1pt", + {"border": {"top": {"style": "thin"}}}, + ), + ("border-top-style: solid", {"border": {"top": {"style": "medium"}}}), + ( + "border-top-style: solid; border-top-width: medium", + {"border": {"top": {"style": "medium"}}}, + ), + ( + "border-top-style: solid; border-top-width: 2pt", + {"border": {"top": {"style": "medium"}}}, + ), + ( + "border-top-style: solid; border-top-width: thick", + {"border": {"top": {"style": "thick"}}}, + ), + ( + "border-top-style: solid; border-top-width: 4pt", + {"border": {"top": {"style": "thick"}}}, + ), + ( + "border-top-style: dotted", + {"border": {"top": {"style": "mediumDashDotDot"}}}, + ), + ( + "border-top-style: dotted; border-top-width: thin", + {"border": {"top": {"style": "dotted"}}}, + ), + ("border-top-style: dashed", {"border": {"top": {"style": "mediumDashed"}}}), + ( + "border-top-style: dashed; border-top-width: thin", + {"border": {"top": {"style": "dashed"}}}, + ), + ("border-top-style: double", {"border": {"top": {"style": "double"}}}), + # - color + ( + "border-style: solid; border-color: #0000ff", + { + "border": { + "top": {"style": "medium", "color": "0000FF"}, + "right": {"style": "medium", "color": "0000FF"}, + "bottom": {"style": "medium", "color": "0000FF"}, + "left": {"style": "medium", "color": "0000FF"}, + } + }, + ), + ( + "border-top-style: double; border-top-color: blue", + {"border": {"top": {"style": "double", "color": "0000FF"}}}, + ), + ( + "border-top-style: solid; border-top-color: #06c", + {"border": {"top": {"style": "medium", "color": "0066CC"}}}, + ), + # ALIGNMENT + # - horizontal + ("text-align: center", {"alignment": {"horizontal": "center"}}), + ("text-align: left", {"alignment": {"horizontal": "left"}}), + ("text-align: right", {"alignment": {"horizontal": "right"}}), + ("text-align: justify", {"alignment": {"horizontal": "justify"}}), + # - vertical + ("vertical-align: top", {"alignment": {"vertical": "top"}}), + ("vertical-align: text-top", {"alignment": {"vertical": "top"}}), + ("vertical-align: middle", {"alignment": {"vertical": "center"}}), + ("vertical-align: bottom", {"alignment": {"vertical": "bottom"}}), + ("vertical-align: text-bottom", {"alignment": {"vertical": "bottom"}}), + # - wrap_text + ("white-space: nowrap", {"alignment": {"wrap_text": False}}), + ("white-space: pre", {"alignment": {"wrap_text": False}}), + ("white-space: pre-line", {"alignment": {"wrap_text": False}}), + ("white-space: normal", {"alignment": {"wrap_text": True}}), + # NUMBER FORMAT + ("number-format: 0%", {"number_format": {"format_code": "0%"}}), + ], +) +def test_css_to_excel(css, expected): + convert = CSSToExcelConverter() + assert expected == convert(css) + + +def test_css_to_excel_multiple(): + convert = CSSToExcelConverter() + actual = convert( + """ + font-weight: bold; + text-decoration: underline; + color: red; + border-width: thin; + text-align: center; + vertical-align: top; + unused: something; + """ + ) + assert { + "font": {"bold": True, "underline": "single", "color": "FF0000"}, + "border": { + "top": {"style": "thin"}, + "right": {"style": "thin"}, + "bottom": {"style": "thin"}, + "left": {"style": "thin"}, + }, + "alignment": {"horizontal": "center", "vertical": "top"}, + } == actual + + +@pytest.mark.parametrize( + "css,inherited,expected", + [ + ("font-weight: bold", "", {"font": {"bold": True}}), + ("", "font-weight: bold", {"font": {"bold": True}}), + ( + "font-weight: bold", + "font-style: italic", + {"font": {"bold": True, "italic": True}}, + ), + ("font-style: normal", "font-style: italic", {"font": {"italic": False}}), + ("font-style: inherit", "", {}), + ( + "font-style: normal; font-style: inherit", + "font-style: italic", + {"font": {"italic": True}}, + ), + ], +) +def test_css_to_excel_inherited(css, inherited, expected): + convert = CSSToExcelConverter(inherited) + assert expected == convert(css) + + +@pytest.mark.parametrize( + "input_color,output_color", + ( + list(CSSToExcelConverter.NAMED_COLORS.items()) + + [("#" + rgb, rgb) for rgb in CSSToExcelConverter.NAMED_COLORS.values()] + + [("#F0F", "FF00FF"), ("#ABC", "AABBCC")] + ), +) +def test_css_to_excel_good_colors(input_color, output_color): + # see gh-18392 + css = ( + f"border-top-color: {input_color}; " + f"border-right-color: {input_color}; " + f"border-bottom-color: {input_color}; " + f"border-left-color: {input_color}; " + f"background-color: {input_color}; " + f"color: {input_color}" + ) + + expected = dict() + + expected["fill"] = {"patternType": "solid", "fgColor": output_color} + + expected["font"] = {"color": output_color} + + expected["border"] = { + k: {"color": output_color} for k in ("top", "right", "bottom", "left") + } + + with tm.assert_produces_warning(None): + convert = CSSToExcelConverter() + assert expected == convert(css) + + +@pytest.mark.parametrize("input_color", [None, "not-a-color"]) +def test_css_to_excel_bad_colors(input_color): + # see gh-18392 + css = ( + f"border-top-color: {input_color}; " + f"border-right-color: {input_color}; " + f"border-bottom-color: {input_color}; " + f"border-left-color: {input_color}; " + f"background-color: {input_color}; " + f"color: {input_color}" + ) + + expected = dict() + + if input_color is not None: + expected["fill"] = {"patternType": "solid"} + + with tm.assert_produces_warning(CSSWarning): + convert = CSSToExcelConverter() + assert expected == convert(css) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py new file mode 100644 index 00000000..d3f044a4 --- /dev/null +++ b/pandas/tests/io/formats/test_to_html.py @@ -0,0 +1,788 @@ +from datetime import datetime +from io import StringIO +import re + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, option_context +import pandas._testing as tm + +import pandas.io.formats.format as fmt + +lorem_ipsum = ( + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod " + "tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim " + "veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex " + "ea commodo consequat. Duis aute irure dolor in reprehenderit in " + "voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur " + "sint occaecat cupidatat non proident, sunt in culpa qui officia " + "deserunt mollit anim id est laborum." +) + + +def expected_html(datapath, name): + """ + Read HTML file from formats data directory. + + Parameters + ---------- + datapath : pytest fixture + The datapath fixture injected into a test by pytest. + name : str + The name of the HTML file without the suffix. + + Returns + ------- + str : contents of HTML file. + """ + filename = ".".join([name, "html"]) + filepath = datapath("io", "formats", "data", "html", filename) + with open(filepath, encoding="utf-8") as f: + html = f.read() + return html.rstrip() + + +@pytest.fixture(params=["mixed", "empty"]) +def biggie_df_fixture(request): + """Fixture for a big mixed Dataframe and an empty Dataframe""" + if request.param == "mixed": + df = DataFrame( + {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, + index=np.arange(200), + ) + df.loc[:20, "A"] = np.nan + df.loc[:20, "B"] = np.nan + return df + elif request.param == "empty": + df = DataFrame(index=np.arange(200)) + return df + + +@pytest.fixture(params=fmt._VALID_JUSTIFY_PARAMETERS) +def justify(request): + return request.param + + +@pytest.mark.parametrize("col_space", [30, 50]) +def test_to_html_with_col_space(col_space): + df = DataFrame(np.random.random(size=(1, 3))) + # check that col_space affects HTML generation + # and be very brittle about it. + result = df.to_html(col_space=col_space) + hdrs = [x for x in result.split(r"\n") if re.search(r"\s]", x)] + assert len(hdrs) > 0 + for h in hdrs: + assert "min-width" in h + assert str(col_space) in h + + +def test_to_html_with_empty_string_label(): + # GH 3547, to_html regards empty string labels as repeated labels + data = {"c1": ["a", "b"], "c2": ["a", ""], "data": [1, 2]} + df = DataFrame(data).set_index(["c1", "c2"]) + result = df.to_html() + assert "rowspan" not in result + + +@pytest.mark.parametrize( + "df,expected", + [ + (DataFrame({"\u03c3": np.arange(10.0)}), "unicode_1"), + (DataFrame({"A": ["\u03c3"]}), "unicode_2"), + ], +) +def test_to_html_unicode(df, expected, datapath): + expected = expected_html(datapath, expected) + result = df.to_html() + assert result == expected + + +def test_to_html_encoding(float_frame, tmp_path): + # GH 28663 + path = tmp_path / "test.html" + float_frame.to_html(path, encoding="gbk") + with open(str(path), "r", encoding="gbk") as f: + assert float_frame.to_html() == f.read() + + +def test_to_html_decimal(datapath): + # GH 12031 + df = DataFrame({"A": [6.0, 3.1, 2.2]}) + result = df.to_html(decimal=",") + expected = expected_html(datapath, "gh12031_expected_output") + assert result == expected + + +@pytest.mark.parametrize( + "kwargs,string,expected", + [ + (dict(), "", "escaped"), + (dict(escape=False), "bold", "escape_disabled"), + ], +) +def test_to_html_escaped(kwargs, string, expected, datapath): + a = "strl2": {a: string, b: string}} + result = DataFrame(test_dict).to_html(**kwargs) + expected = expected_html(datapath, expected) + assert result == expected + + +@pytest.mark.parametrize("index_is_named", [True, False]) +def test_to_html_multiindex_index_false(index_is_named, datapath): + # GH 8452 + df = DataFrame( + {"a": range(2), "b": range(3, 5), "c": range(5, 7), "d": range(3, 5)} + ) + df.columns = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + if index_is_named: + df.index = Index(df.index.values, name="idx") + result = df.to_html(index=False) + expected = expected_html(datapath, "gh8452_expected_output") + assert result == expected + + +@pytest.mark.parametrize( + "multi_sparse,expected", + [ + (False, "multiindex_sparsify_false_multi_sparse_1"), + (False, "multiindex_sparsify_false_multi_sparse_2"), + (True, "multiindex_sparsify_1"), + (True, "multiindex_sparsify_2"), + ], +) +def test_to_html_multiindex_sparsify(multi_sparse, expected, datapath): + index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], names=["foo", None]) + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) + if expected.endswith("2"): + df.columns = index[::2] + with option_context("display.multi_sparse", multi_sparse): + result = df.to_html() + expected = expected_html(datapath, expected) + assert result == expected + + +@pytest.mark.parametrize( + "max_rows,expected", + [ + (60, "gh14882_expected_output_1"), + # Test that ... appears in a middle level + (56, "gh14882_expected_output_2"), + ], +) +def test_to_html_multiindex_odd_even_truncate(max_rows, expected, datapath): + # GH 14882 - Issue on truncation with odd length DataFrame + index = MultiIndex.from_product( + [[100, 200, 300], [10, 20, 30], [1, 2, 3, 4, 5, 6, 7]], names=["a", "b", "c"] + ) + df = DataFrame({"n": range(len(index))}, index=index) + result = df.to_html(max_rows=max_rows) + expected = expected_html(datapath, expected) + assert result == expected + + +@pytest.mark.parametrize( + "df,formatters,expected", + [ + ( + DataFrame( + [[0, 1], [2, 3], [4, 5], [6, 7]], + columns=["foo", None], + index=np.arange(4), + ), + {"__index__": lambda x: "abcd"[x]}, + "index_formatter", + ), + ( + DataFrame({"months": [datetime(2016, 1, 1), datetime(2016, 2, 2)]}), + {"months": lambda x: x.strftime("%Y-%m")}, + "datetime64_monthformatter", + ), + ( + DataFrame( + { + "hod": pd.to_datetime( + ["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f" + ) + } + ), + {"hod": lambda x: x.strftime("%H:%M")}, + "datetime64_hourformatter", + ), + ], +) +def test_to_html_formatters(df, formatters, expected, datapath): + expected = expected_html(datapath, expected) + result = df.to_html(formatters=formatters) + assert result == expected + + +def test_to_html_regression_GH6098(): + df = DataFrame( + { + "clé1": ["a", "a", "b", "b", "a"], + "clé2": ["1er", "2ème", "1er", "2ème", "1er"], + "données1": np.random.randn(5), + "données2": np.random.randn(5), + } + ) + + # it works + df.pivot_table(index=["clé1"], columns=["clé2"])._repr_html_() + + +def test_to_html_truncate(datapath): + index = pd.date_range(start="20010101", freq="D", periods=20) + df = DataFrame(index=index, columns=range(20)) + result = df.to_html(max_rows=8, max_cols=4) + expected = expected_html(datapath, "truncate") + assert result == expected + + +@pytest.mark.parametrize("size", [1, 5]) +def test_html_invalid_formatters_arg_raises(size): + # issue-28469 + df = DataFrame(columns=["a", "b", "c"]) + msg = "Formatters length({}) should match DataFrame number of columns(3)" + with pytest.raises(ValueError, match=re.escape(msg.format(size))): + df.to_html(formatters=["{}".format] * size) + + +def test_to_html_truncate_formatter(datapath): + # issue-25955 + data = [ + {"A": 1, "B": 2, "C": 3, "D": 4}, + {"A": 5, "B": 6, "C": 7, "D": 8}, + {"A": 9, "B": 10, "C": 11, "D": 12}, + {"A": 13, "B": 14, "C": 15, "D": 16}, + ] + + df = DataFrame(data) + fmt = lambda x: str(x) + "_mod" + formatters = [fmt, fmt, None, None] + result = df.to_html(formatters=formatters, max_cols=3) + expected = expected_html(datapath, "truncate_formatter") + assert result == expected + + +@pytest.mark.parametrize( + "sparsify,expected", + [(True, "truncate_multi_index"), (False, "truncate_multi_index_sparse_off")], +) +def test_to_html_truncate_multi_index(sparsify, expected, datapath): + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + df = DataFrame(index=arrays, columns=arrays) + result = df.to_html(max_rows=7, max_cols=7, sparsify=sparsify) + expected = expected_html(datapath, expected) + assert result == expected + + +@pytest.mark.parametrize( + "option,result,expected", + [ + (None, lambda df: df.to_html(), "1"), + (None, lambda df: df.to_html(border=0), "0"), + (0, lambda df: df.to_html(), "0"), + (0, lambda df: df._repr_html_(), "0"), + ], +) +def test_to_html_border(option, result, expected): + df = DataFrame({"A": [1, 2]}) + if option is None: + result = result(df) + else: + with option_context("display.html.border", option): + result = result(df) + expected = 'border="{}"'.format(expected) + assert expected in result + + +@pytest.mark.parametrize("biggie_df_fixture", ["mixed"], indirect=True) +def test_to_html(biggie_df_fixture): + # TODO: split this test + df = biggie_df_fixture + s = df.to_html() + + buf = StringIO() + retval = df.to_html(buf=buf) + assert retval is None + assert buf.getvalue() == s + + assert isinstance(s, str) + + df.to_html(columns=["B", "A"], col_space=17) + df.to_html(columns=["B", "A"], formatters={"A": lambda x: "{x:.1f}".format(x=x)}) + + df.to_html(columns=["B", "A"], float_format=str) + df.to_html(columns=["B", "A"], col_space=12, float_format=str) + + +@pytest.mark.parametrize("biggie_df_fixture", ["empty"], indirect=True) +def test_to_html_empty_dataframe(biggie_df_fixture): + df = biggie_df_fixture + df.to_html() + + +def test_to_html_filename(biggie_df_fixture, tmpdir): + df = biggie_df_fixture + expected = df.to_html() + path = tmpdir.join("test.html") + df.to_html(path) + result = path.read() + assert result == expected + + +def test_to_html_with_no_bold(): + df = DataFrame({"x": np.random.randn(5)}) + html = df.to_html(bold_rows=False) + result = html[html.find("
    ")] + assert "B" not in result + + +@pytest.mark.parametrize( + "columns,justify,expected", + [ + ( + MultiIndex.from_tuples( + list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))), + names=["CL0", "CL1"], + ), + "left", + "multiindex_1", + ), + ( + MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))), + "right", + "multiindex_2", + ), + ], +) +def test_to_html_multiindex(columns, justify, expected, datapath): + df = DataFrame([list("abcd"), list("efgh")], columns=columns) + result = df.to_html(justify=justify) + expected = expected_html(datapath, expected) + assert result == expected + + +def test_to_html_justify(justify, datapath): + df = DataFrame( + {"A": [6, 30000, 2], "B": [1, 2, 70000], "C": [223442, 0, 1]}, + columns=["A", "B", "C"], + ) + result = df.to_html(justify=justify) + expected = expected_html(datapath, "justify").format(justify=justify) + assert result == expected + + +@pytest.mark.parametrize( + "justify", ["super-right", "small-left", "noinherit", "tiny", "pandas"] +) +def test_to_html_invalid_justify(justify): + # GH 17527 + df = DataFrame() + msg = "Invalid value for justify parameter" + + with pytest.raises(ValueError, match=msg): + df.to_html(justify=justify) + + +def test_to_html_index(datapath): + # TODO: split this test + index = ["foo", "bar", "baz"] + df = DataFrame( + {"A": [1, 2, 3], "B": [1.2, 3.4, 5.6], "C": ["one", "two", np.nan]}, + columns=["A", "B", "C"], + index=index, + ) + expected_with_index = expected_html(datapath, "index_1") + assert df.to_html() == expected_with_index + + expected_without_index = expected_html(datapath, "index_2") + result = df.to_html(index=False) + for i in index: + assert i not in result + assert result == expected_without_index + df.index = Index(["foo", "bar", "baz"], name="idx") + expected_with_index = expected_html(datapath, "index_3") + assert df.to_html() == expected_with_index + assert df.to_html(index=False) == expected_without_index + + tuples = [("foo", "car"), ("foo", "bike"), ("bar", "car")] + df.index = MultiIndex.from_tuples(tuples) + + expected_with_index = expected_html(datapath, "index_4") + assert df.to_html() == expected_with_index + + result = df.to_html(index=False) + for i in ["foo", "bar", "car", "bike"]: + assert i not in result + # must be the same result as normal index + assert result == expected_without_index + + df.index = MultiIndex.from_tuples(tuples, names=["idx1", "idx2"]) + expected_with_index = expected_html(datapath, "index_5") + assert df.to_html() == expected_with_index + assert df.to_html(index=False) == expected_without_index + + +@pytest.mark.parametrize("classes", ["sortable draggable", ["sortable", "draggable"]]) +def test_to_html_with_classes(classes, datapath): + df = DataFrame() + expected = expected_html(datapath, "with_classes") + result = df.to_html(classes=classes) + assert result == expected + + +def test_to_html_no_index_max_rows(datapath): + # GH 14998 + df = DataFrame({"A": [1, 2, 3, 4]}) + result = df.to_html(index=False, max_rows=1) + expected = expected_html(datapath, "gh14998_expected_output") + assert result == expected + + +def test_to_html_multiindex_max_cols(datapath): + # GH 6131 + index = MultiIndex( + levels=[["ba", "bb", "bc"], ["ca", "cb", "cc"]], + codes=[[0, 1, 2], [0, 1, 2]], + names=["b", "c"], + ) + columns = MultiIndex( + levels=[["d"], ["aa", "ab", "ac"]], + codes=[[0, 0, 0], [0, 1, 2]], + names=[None, "a"], + ) + data = np.array( + [[1.0, np.nan, np.nan], [np.nan, 2.0, np.nan], [np.nan, np.nan, 3.0]] + ) + df = DataFrame(data, index, columns) + result = df.to_html(max_cols=2) + expected = expected_html(datapath, "gh6131_expected_output") + assert result == expected + + +def test_to_html_multi_indexes_index_false(datapath): + # GH 22579 + df = DataFrame( + {"a": range(10), "b": range(10, 20), "c": range(10, 20), "d": range(10, 20)} + ) + df.columns = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + df.index = MultiIndex.from_product([["a", "b"], ["c", "d", "e", "f", "g"]]) + result = df.to_html(index=False) + expected = expected_html(datapath, "gh22579_expected_output") + assert result == expected + + +@pytest.mark.parametrize("index_names", [True, False]) +@pytest.mark.parametrize("header", [True, False]) +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize( + "column_index, column_type", + [ + (Index([0, 1]), "unnamed_standard"), + (Index([0, 1], name="columns.name"), "named_standard"), + (MultiIndex.from_product([["a"], ["b", "c"]]), "unnamed_multi"), + ( + MultiIndex.from_product( + [["a"], ["b", "c"]], names=["columns.name.0", "columns.name.1"] + ), + "named_multi", + ), + ], +) +@pytest.mark.parametrize( + "row_index, row_type", + [ + (Index([0, 1]), "unnamed_standard"), + (Index([0, 1], name="index.name"), "named_standard"), + (MultiIndex.from_product([["a"], ["b", "c"]]), "unnamed_multi"), + ( + MultiIndex.from_product( + [["a"], ["b", "c"]], names=["index.name.0", "index.name.1"] + ), + "named_multi", + ), + ], +) +def test_to_html_basic_alignment( + datapath, row_index, row_type, column_index, column_type, index, header, index_names +): + # GH 22747, GH 22579 + df = DataFrame(np.zeros((2, 2), dtype=int), index=row_index, columns=column_index) + result = df.to_html(index=index, header=header, index_names=index_names) + + if not index: + row_type = "none" + elif not index_names and row_type.startswith("named"): + row_type = "un" + row_type + + if not header: + column_type = "none" + elif not index_names and column_type.startswith("named"): + column_type = "un" + column_type + + filename = "index_" + row_type + "_columns_" + column_type + expected = expected_html(datapath, filename) + assert result == expected + + +@pytest.mark.parametrize("index_names", [True, False]) +@pytest.mark.parametrize("header", [True, False]) +@pytest.mark.parametrize("index", [True, False]) +@pytest.mark.parametrize( + "column_index, column_type", + [ + (Index(np.arange(8)), "unnamed_standard"), + (Index(np.arange(8), name="columns.name"), "named_standard"), + ( + MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]), + "unnamed_multi", + ), + ( + MultiIndex.from_product( + [["a", "b"], ["c", "d"], ["e", "f"]], names=["foo", None, "baz"] + ), + "named_multi", + ), + ], +) +@pytest.mark.parametrize( + "row_index, row_type", + [ + (Index(np.arange(8)), "unnamed_standard"), + (Index(np.arange(8), name="index.name"), "named_standard"), + ( + MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]), + "unnamed_multi", + ), + ( + MultiIndex.from_product( + [["a", "b"], ["c", "d"], ["e", "f"]], names=["foo", None, "baz"] + ), + "named_multi", + ), + ], +) +def test_to_html_alignment_with_truncation( + datapath, row_index, row_type, column_index, column_type, index, header, index_names +): + # GH 22747, GH 22579 + df = DataFrame(np.arange(64).reshape(8, 8), index=row_index, columns=column_index) + result = df.to_html( + max_rows=4, max_cols=4, index=index, header=header, index_names=index_names + ) + + if not index: + row_type = "none" + elif not index_names and row_type.startswith("named"): + row_type = "un" + row_type + + if not header: + column_type = "none" + elif not index_names and column_type.startswith("named"): + column_type = "un" + column_type + + filename = "trunc_df_index_" + row_type + "_columns_" + column_type + expected = expected_html(datapath, filename) + assert result == expected + + +@pytest.mark.parametrize("index", [False, 0]) +def test_to_html_truncation_index_false_max_rows(datapath, index): + # GH 15019 + data = [ + [1.764052, 0.400157], + [0.978738, 2.240893], + [1.867558, -0.977278], + [0.950088, -0.151357], + [-0.103219, 0.410599], + ] + df = DataFrame(data) + result = df.to_html(max_rows=4, index=index) + expected = expected_html(datapath, "gh15019_expected_output") + assert result == expected + + +@pytest.mark.parametrize("index", [False, 0]) +@pytest.mark.parametrize( + "col_index_named, expected_output", + [(False, "gh22783_expected_output"), (True, "gh22783_named_columns_index")], +) +def test_to_html_truncation_index_false_max_cols( + datapath, index, col_index_named, expected_output +): + # GH 22783 + data = [ + [1.764052, 0.400157, 0.978738, 2.240893, 1.867558], + [-0.977278, 0.950088, -0.151357, -0.103219, 0.410599], + ] + df = DataFrame(data) + if col_index_named: + df.columns.rename("columns.name", inplace=True) + result = df.to_html(max_cols=4, index=index) + expected = expected_html(datapath, expected_output) + assert result == expected + + +@pytest.mark.parametrize("notebook", [True, False]) +def test_to_html_notebook_has_style(notebook): + df = DataFrame({"A": [1, 2, 3]}) + result = df.to_html(notebook=notebook) + + if notebook: + assert "tbody tr th:only-of-type" in result + assert "vertical-align: middle;" in result + assert "thead th" in result + else: + assert "tbody tr th:only-of-type" not in result + assert "vertical-align: middle;" not in result + assert "thead th" not in result + + +def test_to_html_with_index_names_false(): + # GH 16493 + df = DataFrame({"A": [1, 2]}, index=Index(["a", "b"], name="myindexname")) + result = df.to_html(index_names=False) + assert "myindexname" not in result + + +def test_to_html_with_id(): + # GH 8496 + df = DataFrame({"A": [1, 2]}, index=Index(["a", "b"], name="myindexname")) + result = df.to_html(index_names=False, table_id="TEST_ID") + assert ' id="TEST_ID"' in result + + +@pytest.mark.parametrize( + "value,float_format,expected", + [ + (0.19999, "%.3f", "gh21625_expected_output"), + (100.0, "%.0f", "gh22270_expected_output"), + ], +) +def test_to_html_float_format_no_fixed_width(value, float_format, expected, datapath): + # GH 21625, GH 22270 + df = DataFrame({"x": [value]}) + expected = expected_html(datapath, expected) + result = df.to_html(float_format=float_format) + assert result == expected + + +@pytest.mark.parametrize( + "render_links,expected", + [(True, "render_links_true"), (False, "render_links_false")], +) +def test_to_html_render_links(render_links, expected, datapath): + # GH 2679 + data = [ + [0, "https://pandas.pydata.org/?q1=a&q2=b", "pydata.org"], + [0, "www.pydata.org", "pydata.org"], + ] + df = DataFrame(data, columns=["foo", "bar", None]) + + result = df.to_html(render_links=render_links) + expected = expected_html(datapath, expected) + assert result == expected + + +@pytest.mark.parametrize( + "method,expected", + [ + ("to_html", lambda x: lorem_ipsum), + ("_repr_html_", lambda x: lorem_ipsum[: x - 4] + "..."), # regression case + ], +) +@pytest.mark.parametrize("max_colwidth", [10, 20, 50, 100]) +def test_ignore_display_max_colwidth(method, expected, max_colwidth): + # see gh-17004 + df = DataFrame([lorem_ipsum]) + with pd.option_context("display.max_colwidth", max_colwidth): + result = getattr(df, method)() + expected = expected(max_colwidth) + assert expected in result + + +@pytest.mark.parametrize("classes", [True, 0]) +def test_to_html_invalid_classes_type(classes): + # GH 25608 + df = DataFrame() + msg = "classes must be a string, list, or tuple" + + with pytest.raises(TypeError, match=msg): + df.to_html(classes=classes) + + +def test_to_html_round_column_headers(): + # GH 17280 + df = DataFrame([1], columns=[0.55555]) + with pd.option_context("display.precision", 3): + html = df.to_html(notebook=False) + notebook = df.to_html(notebook=True) + assert "0.55555" in html + assert "0.556" in notebook + + +@pytest.mark.parametrize("unit", ["100px", "10%", "5em", 150]) +def test_to_html_with_col_space_units(unit): + # GH 25941 + df = DataFrame(np.random.random(size=(1, 3))) + result = df.to_html(col_space=unit) + result = result.split("tbody")[0] + hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] + if isinstance(unit, int): + unit = str(unit) + "px" + for h in hdrs: + expected = ' within on malformed HTML. + """ + result = self.read_html( + """
    `` rows and ```` elements within each ``
    `` + element in the table. ```` stands for "table data". This function + attempts to properly handle ``colspan`` and ``rowspan`` attributes. + If the function has a ``
    `` elements into the header). + + .. versionadded:: 0.21.0 + + Similar to :func:`~read_csv` the `header` argument is applied + **after** `skiprows` is applied. + + This function will *always* return a list of :class:`DataFrame` *or* + it will fail, e.g., it will *not* return an empty list. + + Examples + -------- + See the :ref:`read_html documentation in the IO section of the docs + ` for some examples of reading in HTML tables. + """ + _importers() + + # Type check here. We don't want to parse only to fail because of an + # invalid value of an integer skiprows. + if isinstance(skiprows, numbers.Integral) and skiprows < 0: + raise ValueError( + "cannot skip rows starting from the end of the " + "data (you passed a negative value)" + ) + validate_header_arg(header) + return _parse( + flavor=flavor, + io=io, + match=match, + header=header, + index_col=index_col, + skiprows=skiprows, + parse_dates=parse_dates, + thousands=thousands, + attrs=attrs, + encoding=encoding, + decimal=decimal, + converters=converters, + na_values=na_values, + keep_default_na=keep_default_na, + displayed_only=displayed_only, + ) diff --git a/pandas/io/json/__init__.py b/pandas/io/json/__init__.py new file mode 100644 index 00000000..48febb08 --- /dev/null +++ b/pandas/io/json/__init__.py @@ -0,0 +1,13 @@ +from pandas.io.json._json import dumps, loads, read_json, to_json +from pandas.io.json._normalize import _json_normalize, json_normalize +from pandas.io.json._table_schema import build_table_schema + +__all__ = [ + "dumps", + "loads", + "read_json", + "to_json", + "_json_normalize", + "json_normalize", + "build_table_schema", +] diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py new file mode 100644 index 00000000..12ce5e4a --- /dev/null +++ b/pandas/io/json/_json.py @@ -0,0 +1,1180 @@ +from collections import abc +import functools +from io import StringIO +from itertools import islice +import os +from typing import Any, Callable, Optional, Type + +import numpy as np + +import pandas._libs.json as json +from pandas._libs.tslibs import iNaT +from pandas._typing import JSONSerializable +from pandas.errors import AbstractMethodError +from pandas.util._decorators import deprecate_kwarg + +from pandas.core.dtypes.common import ensure_str, is_period_dtype + +from pandas import DataFrame, MultiIndex, Series, isna, to_datetime +from pandas.core.construction import create_series_with_explicit_dtype +from pandas.core.reshape.concat import concat + +from pandas.io.common import ( + get_filepath_or_buffer, + get_handle, + infer_compression, + stringify_path, +) +from pandas.io.json._normalize import convert_to_line_delimits +from pandas.io.json._table_schema import build_table_schema, parse_table_schema +from pandas.io.parsers import _validate_integer + +loads = json.loads +dumps = json.dumps + +TABLE_SCHEMA_VERSION = "0.20.0" + + +# interface to/from +def to_json( + path_or_buf, + obj, + orient: Optional[str] = None, + date_format: str = "epoch", + double_precision: int = 10, + force_ascii: bool = True, + date_unit: str = "ms", + default_handler: Optional[Callable[[Any], JSONSerializable]] = None, + lines: bool = False, + compression: Optional[str] = "infer", + index: bool = True, + indent: int = 0, +): + + if not index and orient not in ["split", "table"]: + raise ValueError( + "'index=False' is only valid when 'orient' is 'split' or 'table'" + ) + + path_or_buf = stringify_path(path_or_buf) + if lines and orient != "records": + raise ValueError("'lines' keyword only valid when 'orient' is records") + + if orient == "table" and isinstance(obj, Series): + obj = obj.to_frame(name=obj.name or "values") + + writer: Type["Writer"] + if orient == "table" and isinstance(obj, DataFrame): + writer = JSONTableWriter + elif isinstance(obj, Series): + writer = SeriesWriter + elif isinstance(obj, DataFrame): + writer = FrameWriter + else: + raise NotImplementedError("'obj' should be a Series or a DataFrame") + + s = writer( + obj, + orient=orient, + date_format=date_format, + double_precision=double_precision, + ensure_ascii=force_ascii, + date_unit=date_unit, + default_handler=default_handler, + index=index, + indent=indent, + ).write() + + if lines: + s = convert_to_line_delimits(s) + + if isinstance(path_or_buf, str): + fh, handles = get_handle(path_or_buf, "w", compression=compression) + try: + fh.write(s) + finally: + fh.close() + elif path_or_buf is None: + return s + else: + path_or_buf.write(s) + + +class Writer: + def __init__( + self, + obj, + orient: Optional[str], + date_format: str, + double_precision: int, + ensure_ascii: bool, + date_unit: str, + index: bool, + default_handler: Optional[Callable[[Any], JSONSerializable]] = None, + indent: int = 0, + ): + self.obj = obj + + if orient is None: + orient = self._default_orient # type: ignore + + self.orient = orient + self.date_format = date_format + self.double_precision = double_precision + self.ensure_ascii = ensure_ascii + self.date_unit = date_unit + self.default_handler = default_handler + self.index = index + self.indent = indent + + self.is_copy = None + self._format_axes() + + def _format_axes(self): + raise AbstractMethodError(self) + + def write(self): + return self._write( + self.obj, + self.orient, + self.double_precision, + self.ensure_ascii, + self.date_unit, + self.date_format == "iso", + self.default_handler, + self.indent, + ) + + def _write( + self, + obj, + orient: Optional[str], + double_precision: int, + ensure_ascii: bool, + date_unit: str, + iso_dates: bool, + default_handler: Optional[Callable[[Any], JSONSerializable]], + indent: int, + ): + return dumps( + obj, + orient=orient, + double_precision=double_precision, + ensure_ascii=ensure_ascii, + date_unit=date_unit, + iso_dates=iso_dates, + default_handler=default_handler, + indent=indent, + ) + + +class SeriesWriter(Writer): + _default_orient = "index" + + def _format_axes(self): + if not self.obj.index.is_unique and self.orient == "index": + raise ValueError(f"Series index must be unique for orient='{self.orient}'") + + def _write( + self, + obj, + orient: Optional[str], + double_precision: int, + ensure_ascii: bool, + date_unit: str, + iso_dates: bool, + default_handler: Optional[Callable[[Any], JSONSerializable]], + indent: int, + ): + if not self.index and orient == "split": + obj = {"name": obj.name, "data": obj.values} + return super()._write( + obj, + orient, + double_precision, + ensure_ascii, + date_unit, + iso_dates, + default_handler, + indent, + ) + + +class FrameWriter(Writer): + _default_orient = "columns" + + def _format_axes(self): + """ + Try to format axes if they are datelike. + """ + if not self.obj.index.is_unique and self.orient in ("index", "columns"): + raise ValueError( + f"DataFrame index must be unique for orient='{self.orient}'." + ) + if not self.obj.columns.is_unique and self.orient in ( + "index", + "columns", + "records", + ): + raise ValueError( + f"DataFrame columns must be unique for orient='{self.orient}'." + ) + + def _write( + self, + obj, + orient: Optional[str], + double_precision: int, + ensure_ascii: bool, + date_unit: str, + iso_dates: bool, + default_handler: Optional[Callable[[Any], JSONSerializable]], + indent: int, + ): + if not self.index and orient == "split": + obj = obj.to_dict(orient="split") + del obj["index"] + return super()._write( + obj, + orient, + double_precision, + ensure_ascii, + date_unit, + iso_dates, + default_handler, + indent, + ) + + +class JSONTableWriter(FrameWriter): + _default_orient = "records" + + def __init__( + self, + obj, + orient: Optional[str], + date_format: str, + double_precision: int, + ensure_ascii: bool, + date_unit: str, + index: bool, + default_handler: Optional[Callable[[Any], JSONSerializable]] = None, + indent: int = 0, + ): + """ + Adds a `schema` attribute with the Table Schema, resets + the index (can't do in caller, because the schema inference needs + to know what the index is, forces orient to records, and forces + date_format to 'iso'. + """ + + super().__init__( + obj, + orient, + date_format, + double_precision, + ensure_ascii, + date_unit, + index, + default_handler=default_handler, + indent=indent, + ) + + if date_format != "iso": + msg = ( + "Trying to write with `orient='table'` and " + f"`date_format='{date_format}'`. Table Schema requires dates " + "to be formatted with `date_format='iso'`" + ) + raise ValueError(msg) + + self.schema = build_table_schema(obj, index=self.index) + + # NotImplemented on a column MultiIndex + if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): + raise NotImplementedError("orient='table' is not supported for MultiIndex") + + # TODO: Do this timedelta properly in objToJSON.c See GH #15137 + if ( + (obj.ndim == 1) + and (obj.name in set(obj.index.names)) + or len(obj.columns & obj.index.names) + ): + msg = "Overlapping names between the index and columns" + raise ValueError(msg) + + obj = obj.copy() + timedeltas = obj.select_dtypes(include=["timedelta"]).columns + if len(timedeltas): + obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat()) + # Convert PeriodIndex to datetimes before serializing + if is_period_dtype(obj.index): + obj.index = obj.index.to_timestamp() + + # exclude index from obj if index=False + if not self.index: + self.obj = obj.reset_index(drop=True) + else: + self.obj = obj.reset_index(drop=False) + self.date_format = "iso" + self.orient = "records" + self.index = index + + def _write( + self, + obj, + orient, + double_precision, + ensure_ascii, + date_unit, + iso_dates, + default_handler, + indent, + ): + table_obj = {"schema": self.schema, "data": obj} + serialized = super()._write( + table_obj, + orient, + double_precision, + ensure_ascii, + date_unit, + iso_dates, + default_handler, + indent, + ) + + return serialized + + +@deprecate_kwarg(old_arg_name="numpy", new_arg_name=None) +def read_json( + path_or_buf=None, + orient=None, + typ="frame", + dtype=None, + convert_axes=None, + convert_dates=True, + keep_default_dates=True, + numpy=False, + precise_float=False, + date_unit=None, + encoding=None, + lines=False, + chunksize=None, + compression="infer", +): + """ + Convert a JSON string to pandas object. + + Parameters + ---------- + path_or_buf : a valid JSON str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.json``. + + If you want to pass in a path object, pandas accepts any + ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handler (e.g. via builtin ``open`` function) + or ``StringIO``. + orient : str + Indication of expected JSON string format. + Compatible JSON strings can be produced by ``to_json()`` with a + corresponding orient value. + The set of possible orients is: + + - ``'split'`` : dict like + ``{index -> [index], columns -> [columns], data -> [values]}`` + - ``'records'`` : list like + ``[{column -> value}, ... , {column -> value}]`` + - ``'index'`` : dict like ``{index -> {column -> value}}`` + - ``'columns'`` : dict like ``{column -> {index -> value}}`` + - ``'values'`` : just the values array + + The allowed and default values depend on the value + of the `typ` parameter. + + * when ``typ == 'series'``, + + - allowed orients are ``{'split','records','index'}`` + - default is ``'index'`` + - The Series index must be unique for orient ``'index'``. + + * when ``typ == 'frame'``, + + - allowed orients are ``{'split','records','index', + 'columns','values', 'table'}`` + - default is ``'columns'`` + - The DataFrame index must be unique for orients ``'index'`` and + ``'columns'``. + - The DataFrame columns must be unique for orients ``'index'``, + ``'columns'``, and ``'records'``. + + .. versionadded:: 0.23.0 + 'table' as an allowed value for the ``orient`` argument + + typ : {'frame', 'series'}, default 'frame' + The type of object to recover. + + dtype : bool or dict, default None + If True, infer dtypes; if a dict of column to dtype, then use those; + if False, then don't infer dtypes at all, applies only to the data. + + For all ``orient`` values except ``'table'``, default is True. + + .. versionchanged:: 0.25.0 + + Not applicable for ``orient='table'``. + + convert_axes : bool, default None + Try to convert the axes to the proper dtypes. + + For all ``orient`` values except ``'table'``, default is True. + + .. versionchanged:: 0.25.0 + + Not applicable for ``orient='table'``. + + convert_dates : bool or list of str, default True + List of columns to parse for dates. If True, then try to parse + datelike columns. A column label is datelike if + + * it ends with ``'_at'``, + + * it ends with ``'_time'``, + + * it begins with ``'timestamp'``, + + * it is ``'modified'``, or + + * it is ``'date'``. + + keep_default_dates : bool, default True + If parsing dates, then parse the default datelike columns. + + numpy : bool, default False + Direct decoding to numpy arrays. Supports numeric data only, but + non-numeric column and index labels are supported. Note also that the + JSON ordering MUST be the same for each term if numpy=True. + + .. deprecated:: 1.0.0 + + precise_float : bool, default False + Set to enable usage of higher precision (strtod) function when + decoding string to double values. Default (False) is to use fast but + less precise builtin functionality. + + date_unit : str, default None + The timestamp unit to detect if converting dates. The default behaviour + is to try and detect the correct precision, but if this is not desired + then pass one of 's', 'ms', 'us' or 'ns' to force parsing only seconds, + milliseconds, microseconds or nanoseconds respectively. + + encoding : str, default is 'utf-8' + The encoding to use to decode py3 bytes. + + lines : bool, default False + Read the file as a json object per line. + + chunksize : int, optional + Return JsonReader object for iteration. + See the `line-delimited json docs + `_ + for more information on ``chunksize``. + This can only be passed if `lines=True`. + If this is None, the file will be read into memory all at once. + + .. versionadded:: 0.21.0 + + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' + For on-the-fly decompression of on-disk data. If 'infer', then use + gzip, bz2, zip or xz if path_or_buf is a string ending in + '.gz', '.bz2', '.zip', or 'xz', respectively, and no decompression + otherwise. If using 'zip', the ZIP file must contain only one data + file to be read in. Set to None for no decompression. + + .. versionadded:: 0.21.0 + + Returns + ------- + Series or DataFrame + The type returned depends on the value of `typ`. + + See Also + -------- + DataFrame.to_json : Convert a DataFrame to a JSON string. + Series.to_json : Convert a Series to a JSON string. + + Notes + ----- + Specific to ``orient='table'``, if a :class:`DataFrame` with a literal + :class:`Index` name of `index` gets written with :func:`to_json`, the + subsequent read operation will incorrectly set the :class:`Index` name to + ``None``. This is because `index` is also used by :func:`DataFrame.to_json` + to denote a missing :class:`Index` name, and the subsequent + :func:`read_json` operation cannot distinguish between the two. The same + limitation is encountered with a :class:`MultiIndex` and any names + beginning with ``'level_'``. + + Examples + -------- + + >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], + ... index=['row 1', 'row 2'], + ... columns=['col 1', 'col 2']) + + Encoding/decoding a Dataframe using ``'split'`` formatted JSON: + + >>> df.to_json(orient='split') + '{"columns":["col 1","col 2"], + "index":["row 1","row 2"], + "data":[["a","b"],["c","d"]]}' + >>> pd.read_json(_, orient='split') + col 1 col 2 + row 1 a b + row 2 c d + + Encoding/decoding a Dataframe using ``'index'`` formatted JSON: + + >>> df.to_json(orient='index') + '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}' + >>> pd.read_json(_, orient='index') + col 1 col 2 + row 1 a b + row 2 c d + + Encoding/decoding a Dataframe using ``'records'`` formatted JSON. + Note that index labels are not preserved with this encoding. + + >>> df.to_json(orient='records') + '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]' + >>> pd.read_json(_, orient='records') + col 1 col 2 + 0 a b + 1 c d + + Encoding with Table Schema + + >>> df.to_json(orient='table') + '{"schema": {"fields": [{"name": "index", "type": "string"}, + {"name": "col 1", "type": "string"}, + {"name": "col 2", "type": "string"}], + "primaryKey": "index", + "pandas_version": "0.20.0"}, + "data": [{"index": "row 1", "col 1": "a", "col 2": "b"}, + {"index": "row 2", "col 1": "c", "col 2": "d"}]}' + """ + + if orient == "table" and dtype: + raise ValueError("cannot pass both dtype and orient='table'") + if orient == "table" and convert_axes: + raise ValueError("cannot pass both convert_axes and orient='table'") + + if dtype is None and orient != "table": + dtype = True + if convert_axes is None and orient != "table": + convert_axes = True + if encoding is None: + encoding = "utf-8" + + compression = infer_compression(path_or_buf, compression) + filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( + path_or_buf, encoding=encoding, compression=compression + ) + + json_reader = JsonReader( + filepath_or_buffer, + orient=orient, + typ=typ, + dtype=dtype, + convert_axes=convert_axes, + convert_dates=convert_dates, + keep_default_dates=keep_default_dates, + numpy=numpy, + precise_float=precise_float, + date_unit=date_unit, + encoding=encoding, + lines=lines, + chunksize=chunksize, + compression=compression, + ) + + if chunksize: + return json_reader + + result = json_reader.read() + if should_close: + filepath_or_buffer.close() + + return result + + +class JsonReader(abc.Iterator): + """ + JsonReader provides an interface for reading in a JSON file. + + If initialized with ``lines=True`` and ``chunksize``, can be iterated over + ``chunksize`` lines at a time. Otherwise, calling ``read`` reads in the + whole document. + """ + + def __init__( + self, + filepath_or_buffer, + orient, + typ, + dtype, + convert_axes, + convert_dates, + keep_default_dates, + numpy, + precise_float, + date_unit, + encoding, + lines, + chunksize, + compression, + ): + + self.path_or_buf = filepath_or_buffer + self.orient = orient + self.typ = typ + self.dtype = dtype + self.convert_axes = convert_axes + self.convert_dates = convert_dates + self.keep_default_dates = keep_default_dates + self.numpy = numpy + self.precise_float = precise_float + self.date_unit = date_unit + self.encoding = encoding + self.compression = compression + self.lines = lines + self.chunksize = chunksize + self.nrows_seen = 0 + self.should_close = False + + if self.chunksize is not None: + self.chunksize = _validate_integer("chunksize", self.chunksize, 1) + if not self.lines: + raise ValueError("chunksize can only be passed if lines=True") + + data = self._get_data_from_filepath(filepath_or_buffer) + self.data = self._preprocess_data(data) + + def _preprocess_data(self, data): + """ + At this point, the data either has a `read` attribute (e.g. a file + object or a StringIO) or is a string that is a JSON document. + + If self.chunksize, we prepare the data for the `__next__` method. + Otherwise, we read it into memory for the `read` method. + """ + if hasattr(data, "read") and not self.chunksize: + data = data.read() + if not hasattr(data, "read") and self.chunksize: + data = StringIO(data) + + return data + + def _get_data_from_filepath(self, filepath_or_buffer): + """ + The function read_json accepts three input types: + 1. filepath (string-like) + 2. file-like object (e.g. open file object, StringIO) + 3. JSON string + + This method turns (1) into (2) to simplify the rest of the processing. + It returns input types (2) and (3) unchanged. + """ + data = filepath_or_buffer + + exists = False + if isinstance(data, str): + try: + exists = os.path.exists(filepath_or_buffer) + # gh-5874: if the filepath is too long will raise here + except (TypeError, ValueError): + pass + + if exists or self.compression is not None: + data, _ = get_handle( + filepath_or_buffer, + "r", + encoding=self.encoding, + compression=self.compression, + ) + self.should_close = True + self.open_stream = data + + return data + + def _combine_lines(self, lines) -> str: + """ + Combines a list of JSON objects into one JSON object. + """ + lines = filter(None, map(lambda x: x.strip(), lines)) + return "[" + ",".join(lines) + "]" + + def read(self): + """ + Read the whole JSON input into a pandas object. + """ + if self.lines and self.chunksize: + obj = concat(self) + elif self.lines: + data = ensure_str(self.data) + obj = self._get_object_parser(self._combine_lines(data.split("\n"))) + else: + obj = self._get_object_parser(self.data) + self.close() + return obj + + def _get_object_parser(self, json): + """ + Parses a json document into a pandas object. + """ + typ = self.typ + dtype = self.dtype + kwargs = { + "orient": self.orient, + "dtype": self.dtype, + "convert_axes": self.convert_axes, + "convert_dates": self.convert_dates, + "keep_default_dates": self.keep_default_dates, + "numpy": self.numpy, + "precise_float": self.precise_float, + "date_unit": self.date_unit, + } + obj = None + if typ == "frame": + obj = FrameParser(json, **kwargs).parse() + + if typ == "series" or obj is None: + if not isinstance(dtype, bool): + kwargs["dtype"] = dtype + obj = SeriesParser(json, **kwargs).parse() + + return obj + + def close(self): + """ + If we opened a stream earlier, in _get_data_from_filepath, we should + close it. + + If an open stream or file was passed, we leave it open. + """ + if self.should_close: + try: + self.open_stream.close() + except (IOError, AttributeError): + pass + + def __next__(self): + lines = list(islice(self.data, self.chunksize)) + if lines: + lines_json = self._combine_lines(lines) + obj = self._get_object_parser(lines_json) + + # Make sure that the returned objects have the right index. + obj.index = range(self.nrows_seen, self.nrows_seen + len(obj)) + self.nrows_seen += len(obj) + + return obj + + self.close() + raise StopIteration + + +class Parser: + + _STAMP_UNITS = ("s", "ms", "us", "ns") + _MIN_STAMPS = { + "s": 31536000, + "ms": 31536000000, + "us": 31536000000000, + "ns": 31536000000000000, + } + + def __init__( + self, + json, + orient, + dtype=None, + convert_axes=True, + convert_dates=True, + keep_default_dates=False, + numpy=False, + precise_float=False, + date_unit=None, + ): + self.json = json + + if orient is None: + orient = self._default_orient + self.orient = orient + + self.dtype = dtype + + if orient == "split": + numpy = False + + if date_unit is not None: + date_unit = date_unit.lower() + if date_unit not in self._STAMP_UNITS: + raise ValueError(f"date_unit must be one of {self._STAMP_UNITS}") + self.min_stamp = self._MIN_STAMPS[date_unit] + else: + self.min_stamp = self._MIN_STAMPS["s"] + + self.numpy = numpy + self.precise_float = precise_float + self.convert_axes = convert_axes + self.convert_dates = convert_dates + self.date_unit = date_unit + self.keep_default_dates = keep_default_dates + self.obj = None + + def check_keys_split(self, decoded): + """ + Checks that dict has only the appropriate keys for orient='split'. + """ + bad_keys = set(decoded.keys()).difference(set(self._split_keys)) + if bad_keys: + bad_keys = ", ".join(bad_keys) + raise ValueError(f"JSON data had unexpected key(s): {bad_keys}") + + def parse(self): + + # try numpy + numpy = self.numpy + if numpy: + self._parse_numpy() + + else: + self._parse_no_numpy() + + if self.obj is None: + return None + if self.convert_axes: + self._convert_axes() + self._try_convert_types() + return self.obj + + def _convert_axes(self): + """ + Try to convert axes. + """ + for axis in self.obj._AXIS_NUMBERS.keys(): + new_axis, result = self._try_convert_data( + axis, self.obj._get_axis(axis), use_dtypes=False, convert_dates=True + ) + if result: + setattr(self.obj, axis, new_axis) + + def _try_convert_types(self): + raise AbstractMethodError(self) + + def _try_convert_data(self, name, data, use_dtypes=True, convert_dates=True): + """ + Try to parse a ndarray like into a column by inferring dtype. + """ + + # don't try to coerce, unless a force conversion + if use_dtypes: + if not self.dtype: + return data, False + elif self.dtype is True: + pass + else: + # dtype to force + dtype = ( + self.dtype.get(name) if isinstance(self.dtype, dict) else self.dtype + ) + if dtype is not None: + try: + dtype = np.dtype(dtype) + return data.astype(dtype), True + except (TypeError, ValueError): + return data, False + + if convert_dates: + new_data, result = self._try_convert_to_date(data) + if result: + return new_data, True + + result = False + + if data.dtype == "object": + + # try float + try: + data = data.astype("float64") + result = True + except (TypeError, ValueError): + pass + + if data.dtype.kind == "f": + + if data.dtype != "float64": + + # coerce floats to 64 + try: + data = data.astype("float64") + result = True + except (TypeError, ValueError): + pass + + # don't coerce 0-len data + if len(data) and (data.dtype == "float" or data.dtype == "object"): + + # coerce ints if we can + try: + new_data = data.astype("int64") + if (new_data == data).all(): + data = new_data + result = True + except (TypeError, ValueError): + pass + + # coerce ints to 64 + if data.dtype == "int": + + # coerce floats to 64 + try: + data = data.astype("int64") + result = True + except (TypeError, ValueError): + pass + + return data, result + + def _try_convert_to_date(self, data): + """ + Try to parse a ndarray like into a date column. + + Try to coerce object in epoch/iso formats and integer/float in epoch + formats. Return a boolean if parsing was successful. + """ + + # no conversion on empty + if not len(data): + return data, False + + new_data = data + if new_data.dtype == "object": + try: + new_data = data.astype("int64") + except (TypeError, ValueError, OverflowError): + pass + + # ignore numbers that are out of range + if issubclass(new_data.dtype.type, np.number): + in_range = ( + isna(new_data.values) + | (new_data > self.min_stamp) + | (new_data.values == iNaT) + ) + if not in_range.all(): + return data, False + + date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS + for date_unit in date_units: + try: + new_data = to_datetime(new_data, errors="raise", unit=date_unit) + except (ValueError, OverflowError): + continue + return new_data, True + return data, False + + def _try_convert_dates(self): + raise AbstractMethodError(self) + + +class SeriesParser(Parser): + _default_orient = "index" + _split_keys = ("name", "index", "data") + + def _parse_no_numpy(self): + data = loads(self.json, precise_float=self.precise_float) + + if self.orient == "split": + decoded = {str(k): v for k, v in data.items()} + self.check_keys_split(decoded) + self.obj = create_series_with_explicit_dtype(**decoded) + else: + self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object) + + def _parse_numpy(self): + load_kwargs = { + "dtype": None, + "numpy": True, + "precise_float": self.precise_float, + } + if self.orient in ["columns", "index"]: + load_kwargs["labelled"] = True + loads_ = functools.partial(loads, **load_kwargs) + data = loads_(self.json) + + if self.orient == "split": + decoded = {str(k): v for k, v in data.items()} + self.check_keys_split(decoded) + self.obj = create_series_with_explicit_dtype(**decoded) + elif self.orient in ["columns", "index"]: + self.obj = create_series_with_explicit_dtype(*data, dtype_if_empty=object) + else: + self.obj = create_series_with_explicit_dtype(data, dtype_if_empty=object) + + def _try_convert_types(self): + if self.obj is None: + return + obj, result = self._try_convert_data( + "data", self.obj, convert_dates=self.convert_dates + ) + if result: + self.obj = obj + + +class FrameParser(Parser): + _default_orient = "columns" + _split_keys = ("columns", "index", "data") + + def _parse_numpy(self): + + json = self.json + orient = self.orient + + if orient == "columns": + args = loads( + json, + dtype=None, + numpy=True, + labelled=True, + precise_float=self.precise_float, + ) + if len(args): + args = (args[0].T, args[2], args[1]) + self.obj = DataFrame(*args) + elif orient == "split": + decoded = loads( + json, dtype=None, numpy=True, precise_float=self.precise_float + ) + decoded = {str(k): v for k, v in decoded.items()} + self.check_keys_split(decoded) + self.obj = DataFrame(**decoded) + elif orient == "values": + self.obj = DataFrame( + loads(json, dtype=None, numpy=True, precise_float=self.precise_float) + ) + else: + self.obj = DataFrame( + *loads( + json, + dtype=None, + numpy=True, + labelled=True, + precise_float=self.precise_float, + ) + ) + + def _parse_no_numpy(self): + + json = self.json + orient = self.orient + + if orient == "columns": + self.obj = DataFrame( + loads(json, precise_float=self.precise_float), dtype=None + ) + elif orient == "split": + decoded = { + str(k): v + for k, v in loads(json, precise_float=self.precise_float).items() + } + self.check_keys_split(decoded) + self.obj = DataFrame(dtype=None, **decoded) + elif orient == "index": + self.obj = DataFrame.from_dict( + loads(json, precise_float=self.precise_float), + dtype=None, + orient="index", + ) + elif orient == "table": + self.obj = parse_table_schema(json, precise_float=self.precise_float) + else: + self.obj = DataFrame( + loads(json, precise_float=self.precise_float), dtype=None + ) + + def _process_converter(self, f, filt=None): + """ + Take a conversion function and possibly recreate the frame. + """ + + if filt is None: + filt = lambda col, c: True + + needs_new_obj = False + new_obj = dict() + for i, (col, c) in enumerate(self.obj.items()): + if filt(col, c): + new_data, result = f(col, c) + if result: + c = new_data + needs_new_obj = True + new_obj[i] = c + + if needs_new_obj: + + # possibly handle dup columns + new_obj = DataFrame(new_obj, index=self.obj.index) + new_obj.columns = self.obj.columns + self.obj = new_obj + + def _try_convert_types(self): + if self.obj is None: + return + if self.convert_dates: + self._try_convert_dates() + + self._process_converter( + lambda col, c: self._try_convert_data(col, c, convert_dates=False) + ) + + def _try_convert_dates(self): + if self.obj is None: + return + + # our columns to parse + convert_dates = self.convert_dates + if convert_dates is True: + convert_dates = [] + convert_dates = set(convert_dates) + + def is_ok(col) -> bool: + """ + Return if this col is ok to try for a date parse. + """ + if not isinstance(col, str): + return False + + col_lower = col.lower() + if ( + col_lower.endswith("_at") + or col_lower.endswith("_time") + or col_lower == "modified" + or col_lower == "date" + or col_lower == "datetime" + or col_lower.startswith("timestamp") + ): + return True + return False + + self._process_converter( + lambda col, c: self._try_convert_to_date(c), + lambda col, c: ( + (self.keep_default_dates and is_ok(col)) or col in convert_dates + ), + ) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py new file mode 100644 index 00000000..ea1f2405 --- /dev/null +++ b/pandas/io/json/_normalize.py @@ -0,0 +1,363 @@ +# --------------------------------------------------------------------- +# JSON normalization routines + +from collections import defaultdict +import copy +from typing import Any, DefaultDict, Dict, Iterable, List, Optional, Union + +import numpy as np + +from pandas._libs.writers import convert_json_to_lines +from pandas._typing import Scalar +from pandas.util._decorators import deprecate + +import pandas as pd +from pandas import DataFrame + + +def convert_to_line_delimits(s): + """ + Helper function that converts JSON lists to line delimited JSON. + """ + + # Determine we have a JSON list to turn to lines otherwise just return the + # json object, only lists can + if not s[0] == "[" and s[-1] == "]": + return s + s = s[1:-1] + + return convert_json_to_lines(s) + + +def nested_to_record( + ds, + prefix: str = "", + sep: str = ".", + level: int = 0, + max_level: Optional[int] = None, +): + """ + A simplified json_normalize + + Converts a nested dict into a flat dict ("record"), unlike json_normalize, + it does not attempt to extract a subset of the data. + + Parameters + ---------- + ds : dict or list of dicts + prefix: the prefix, optional, default: "" + sep : str, default '.' + Nested records will generate names separated by sep, + e.g., for sep='.', { 'foo' : { 'bar' : 0 } } -> foo.bar + level: int, optional, default: 0 + The number of levels in the json string. + + max_level: int, optional, default: None + The max depth to normalize. + + .. versionadded:: 0.25.0 + + Returns + ------- + d - dict or list of dicts, matching `ds` + + Examples + -------- + + IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2), + nested=dict(e=dict(c=1,d=2),d=2))) + Out[52]: + {'dict1.c': 1, + 'dict1.d': 2, + 'flat1': 1, + 'nested.d': 2, + 'nested.e.c': 1, + 'nested.e.d': 2} + """ + singleton = False + if isinstance(ds, dict): + ds = [ds] + singleton = True + new_ds = [] + for d in ds: + new_d = copy.deepcopy(d) + for k, v in d.items(): + # each key gets renamed with prefix + if not isinstance(k, str): + k = str(k) + if level == 0: + newkey = k + else: + newkey = prefix + sep + k + + # flatten if type is dict and + # current dict level < maximum level provided and + # only dicts gets recurse-flattened + # only at level>1 do we rename the rest of the keys + if not isinstance(v, dict) or ( + max_level is not None and level >= max_level + ): + if level != 0: # so we skip copying for top level, common case + v = new_d.pop(k) + new_d[newkey] = v + continue + else: + v = new_d.pop(k) + new_d.update(nested_to_record(v, newkey, sep, level + 1, max_level)) + new_ds.append(new_d) + + if singleton: + return new_ds[0] + return new_ds + + +def _json_normalize( + data: Union[Dict, List[Dict]], + record_path: Optional[Union[str, List]] = None, + meta: Optional[Union[str, List[Union[str, List[str]]]]] = None, + meta_prefix: Optional[str] = None, + record_prefix: Optional[str] = None, + errors: Optional[str] = "raise", + sep: str = ".", + max_level: Optional[int] = None, +) -> "DataFrame": + """ + Normalize semi-structured JSON data into a flat table. + + Parameters + ---------- + data : dict or list of dicts + Unserialized JSON objects. + record_path : str or list of str, default None + Path in each object to list of records. If not passed, data will be + assumed to be an array of records. + meta : list of paths (str or list of str), default None + Fields to use as metadata for each record in resulting table. + meta_prefix : str, default None + If True, prefix records with dotted (?) path, e.g. foo.bar.field if + meta is ['foo', 'bar']. + record_prefix : str, default None + If True, prefix records with dotted (?) path, e.g. foo.bar.field if + path to records is ['foo', 'bar']. + errors : {'raise', 'ignore'}, default 'raise' + Configures error handling. + + * 'ignore' : will ignore KeyError if keys listed in meta are not + always present. + * 'raise' : will raise KeyError if keys listed in meta are not + always present. + sep : str, default '.' + Nested records will generate names separated by sep. + e.g., for sep='.', {'foo': {'bar': 0}} -> foo.bar. + max_level : int, default None + Max number of levels(depth of dict) to normalize. + if None, normalizes all levels. + + .. versionadded:: 0.25.0 + + Returns + ------- + frame : DataFrame + Normalize semi-structured JSON data into a flat table. + + Examples + -------- + + >>> from pandas.io.json import json_normalize + >>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}}, + ... {'name': {'given': 'Mose', 'family': 'Regner'}}, + ... {'id': 2, 'name': 'Faye Raker'}] + >>> json_normalize(data) + id name name.family name.first name.given name.last + 0 1.0 NaN NaN Coleen NaN Volk + 1 NaN NaN Regner NaN Mose NaN + 2 2.0 Faye Raker NaN NaN NaN NaN + + >>> data = [{'id': 1, + ... 'name': "Cole Volk", + ... 'fitness': {'height': 130, 'weight': 60}}, + ... {'name': "Mose Reg", + ... 'fitness': {'height': 130, 'weight': 60}}, + ... {'id': 2, 'name': 'Faye Raker', + ... 'fitness': {'height': 130, 'weight': 60}}] + >>> json_normalize(data, max_level=0) + fitness id name + 0 {'height': 130, 'weight': 60} 1.0 Cole Volk + 1 {'height': 130, 'weight': 60} NaN Mose Reg + 2 {'height': 130, 'weight': 60} 2.0 Faye Raker + + Normalizes nested data up to level 1. + + >>> data = [{'id': 1, + ... 'name': "Cole Volk", + ... 'fitness': {'height': 130, 'weight': 60}}, + ... {'name': "Mose Reg", + ... 'fitness': {'height': 130, 'weight': 60}}, + ... {'id': 2, 'name': 'Faye Raker', + ... 'fitness': {'height': 130, 'weight': 60}}] + >>> json_normalize(data, max_level=1) + fitness.height fitness.weight id name + 0 130 60 1.0 Cole Volk + 1 130 60 NaN Mose Reg + 2 130 60 2.0 Faye Raker + + >>> data = [{'state': 'Florida', + ... 'shortname': 'FL', + ... 'info': {'governor': 'Rick Scott'}, + ... 'counties': [{'name': 'Dade', 'population': 12345}, + ... {'name': 'Broward', 'population': 40000}, + ... {'name': 'Palm Beach', 'population': 60000}]}, + ... {'state': 'Ohio', + ... 'shortname': 'OH', + ... 'info': {'governor': 'John Kasich'}, + ... 'counties': [{'name': 'Summit', 'population': 1234}, + ... {'name': 'Cuyahoga', 'population': 1337}]}] + >>> result = json_normalize(data, 'counties', ['state', 'shortname', + ... ['info', 'governor']]) + >>> result + name population state shortname info.governor + 0 Dade 12345 Florida FL Rick Scott + 1 Broward 40000 Florida FL Rick Scott + 2 Palm Beach 60000 Florida FL Rick Scott + 3 Summit 1234 Ohio OH John Kasich + 4 Cuyahoga 1337 Ohio OH John Kasich + + >>> data = {'A': [1, 2]} + >>> json_normalize(data, 'A', record_prefix='Prefix.') + Prefix.0 + 0 1 + 1 2 + + Returns normalized data with columns prefixed with the given string. + """ + + def _pull_field( + js: Dict[str, Any], spec: Union[List, str] + ) -> Union[Scalar, Iterable]: + """Internal function to pull field""" + result = js # type: ignore + if isinstance(spec, list): + for field in spec: + result = result[field] + else: + result = result[spec] + return result + + def _pull_records(js: Dict[str, Any], spec: Union[List, str]) -> Iterable: + """ + Interal function to pull field for records, and similar to + _pull_field, but require to return Iterable. And will raise error + if has non iterable value. + """ + result = _pull_field(js, spec) + + # GH 31507 GH 30145, if result is not Iterable, raise TypeError if not + # null, otherwise return an empty list + if not isinstance(result, Iterable): + if pd.isnull(result): + result = [] # type: ignore + else: + raise TypeError( + f"{js} has non iterable value {result} for path {spec}. " + "Must be iterable or null." + ) + return result + + if isinstance(data, list) and not data: + return DataFrame() + + # A bit of a hackjob + if isinstance(data, dict): + data = [data] + + if record_path is None: + if any([isinstance(x, dict) for x in y.values()] for y in data): + # naive normalization, this is idempotent for flat records + # and potentially will inflate the data considerably for + # deeply nested structures: + # {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@} + # + # TODO: handle record value which are lists, at least error + # reasonably + data = nested_to_record(data, sep=sep, max_level=max_level) + return DataFrame(data) + elif not isinstance(record_path, list): + record_path = [record_path] + + if meta is None: + meta = [] + elif not isinstance(meta, list): + meta = [meta] + + _meta = [m if isinstance(m, list) else [m] for m in meta] + + # Disastrously inefficient for now + records: List = [] + lengths = [] + + meta_vals: DefaultDict = defaultdict(list) + meta_keys = [sep.join(val) for val in _meta] + + def _recursive_extract(data, path, seen_meta, level=0): + if isinstance(data, dict): + data = [data] + if len(path) > 1: + for obj in data: + for val, key in zip(_meta, meta_keys): + if level + 1 == len(val): + seen_meta[key] = _pull_field(obj, val[-1]) + + _recursive_extract(obj[path[0]], path[1:], seen_meta, level=level + 1) + else: + for obj in data: + recs = _pull_records(obj, path[0]) + recs = [ + nested_to_record(r, sep=sep, max_level=max_level) + if isinstance(r, dict) + else r + for r in recs + ] + + # For repeating the metadata later + lengths.append(len(recs)) + for val, key in zip(_meta, meta_keys): + if level + 1 > len(val): + meta_val = seen_meta[key] + else: + try: + meta_val = _pull_field(obj, val[level:]) + except KeyError as e: + if errors == "ignore": + meta_val = np.nan + else: + raise KeyError( + "Try running with " + "errors='ignore' as key " + f"{e} is not always present" + ) + meta_vals[key].append(meta_val) + records.extend(recs) + + _recursive_extract(data, record_path, {}, level=0) + + result = DataFrame(records) + + if record_prefix is not None: + result = result.rename(columns=lambda x: f"{record_prefix}{x}") + + # Data types, a problem + for k, v in meta_vals.items(): + if meta_prefix is not None: + k = meta_prefix + k + + if k in result: + raise ValueError( + f"Conflicting metadata name {k}, need distinguishing prefix " + ) + result[k] = np.array(v, dtype=object).repeat(lengths) + return result + + +json_normalize = deprecate( + "pandas.io.json.json_normalize", _json_normalize, "1.0.0", "pandas.json_normalize" +) diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py new file mode 100644 index 00000000..5f23b95c --- /dev/null +++ b/pandas/io/json/_table_schema.py @@ -0,0 +1,338 @@ +""" +Table Schema builders + +http://specs.frictionlessdata.io/json-table-schema/ +""" +import warnings + +import pandas._libs.json as json + +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_categorical_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_integer_dtype, + is_numeric_dtype, + is_period_dtype, + is_string_dtype, + is_timedelta64_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype + +from pandas import DataFrame +import pandas.core.common as com + +loads = json.loads + + +def as_json_table_type(x): + """ + Convert a NumPy / pandas type to its corresponding json_table. + + Parameters + ---------- + x : array or dtype + + Returns + ------- + t : str + the Table Schema data types + + Notes + ----- + This table shows the relationship between NumPy / pandas dtypes, + and Table Schema dtypes. + + ============== ================= + Pandas type Table Schema type + ============== ================= + int64 integer + float64 number + bool boolean + datetime64[ns] datetime + timedelta64[ns] duration + object str + categorical any + =============== ================= + """ + if is_integer_dtype(x): + return "integer" + elif is_bool_dtype(x): + return "boolean" + elif is_numeric_dtype(x): + return "number" + elif is_datetime64_dtype(x) or is_datetime64tz_dtype(x) or is_period_dtype(x): + return "datetime" + elif is_timedelta64_dtype(x): + return "duration" + elif is_categorical_dtype(x): + return "any" + elif is_string_dtype(x): + return "string" + else: + return "any" + + +def set_default_names(data): + """Sets index names to 'index' for regular, or 'level_x' for Multi""" + if com.all_not_none(*data.index.names): + nms = data.index.names + if len(nms) == 1 and data.index.name == "index": + warnings.warn("Index name of 'index' is not round-trippable") + elif len(nms) > 1 and any(x.startswith("level_") for x in nms): + warnings.warn("Index names beginning with 'level_' are not round-trippable") + return data + + data = data.copy() + if data.index.nlevels > 1: + names = [ + name if name is not None else f"level_{i}" + for i, name in enumerate(data.index.names) + ] + data.index.names = names + else: + data.index.name = data.index.name or "index" + return data + + +def convert_pandas_type_to_json_field(arr, dtype=None): + dtype = dtype or arr.dtype + if arr.name is None: + name = "values" + else: + name = arr.name + field = {"name": name, "type": as_json_table_type(dtype)} + + if is_categorical_dtype(arr): + if hasattr(arr, "categories"): + cats = arr.categories + ordered = arr.ordered + else: + cats = arr.cat.categories + ordered = arr.cat.ordered + field["constraints"] = {"enum": list(cats)} + field["ordered"] = ordered + elif is_period_dtype(arr): + field["freq"] = arr.freqstr + elif is_datetime64tz_dtype(arr): + if hasattr(arr, "dt"): + field["tz"] = arr.dt.tz.zone + else: + field["tz"] = arr.tz.zone + return field + + +def convert_json_field_to_pandas_type(field): + """ + Converts a JSON field descriptor into its corresponding NumPy / pandas type + + Parameters + ---------- + field + A JSON field descriptor + + Returns + ------- + dtype + + Raises + ------ + ValueError + If the type of the provided field is unknown or currently unsupported + + Examples + -------- + >>> convert_json_field_to_pandas_type({'name': 'an_int', + 'type': 'integer'}) + 'int64' + >>> convert_json_field_to_pandas_type({'name': 'a_categorical', + 'type': 'any', + 'constraints': {'enum': [ + 'a', 'b', 'c']}, + 'ordered': True}) + 'CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)' + >>> convert_json_field_to_pandas_type({'name': 'a_datetime', + 'type': 'datetime'}) + 'datetime64[ns]' + >>> convert_json_field_to_pandas_type({'name': 'a_datetime_with_tz', + 'type': 'datetime', + 'tz': 'US/Central'}) + 'datetime64[ns, US/Central]' + """ + typ = field["type"] + if typ == "string": + return "object" + elif typ == "integer": + return "int64" + elif typ == "number": + return "float64" + elif typ == "boolean": + return "bool" + elif typ == "duration": + return "timedelta64" + elif typ == "datetime": + if field.get("tz"): + return f"datetime64[ns, {field['tz']}]" + else: + return "datetime64[ns]" + elif typ == "any": + if "constraints" in field and "ordered" in field: + return CategoricalDtype( + categories=field["constraints"]["enum"], ordered=field["ordered"] + ) + else: + return "object" + + raise ValueError(f"Unsupported or invalid field type: {typ}") + + +def build_table_schema(data, index=True, primary_key=None, version=True): + """ + Create a Table schema from ``data``. + + Parameters + ---------- + data : Series, DataFrame + index : bool, default True + Whether to include ``data.index`` in the schema. + primary_key : bool or None, default True + Column names to designate as the primary key. + The default `None` will set `'primaryKey'` to the index + level or levels if the index is unique. + version : bool, default True + Whether to include a field `pandas_version` with the version + of pandas that generated the schema. + + Returns + ------- + schema : dict + + Notes + ----- + See `_as_json_table_type` for conversion types. + Timedeltas as converted to ISO8601 duration format with + 9 decimal places after the seconds field for nanosecond precision. + + Categoricals are converted to the `any` dtype, and use the `enum` field + constraint to list the allowed values. The `ordered` attribute is included + in an `ordered` field. + + Examples + -------- + >>> df = pd.DataFrame( + ... {'A': [1, 2, 3], + ... 'B': ['a', 'b', 'c'], + ... 'C': pd.date_range('2016-01-01', freq='d', periods=3), + ... }, index=pd.Index(range(3), name='idx')) + >>> build_table_schema(df) + {'fields': [{'name': 'idx', 'type': 'integer'}, + {'name': 'A', 'type': 'integer'}, + {'name': 'B', 'type': 'string'}, + {'name': 'C', 'type': 'datetime'}], + 'pandas_version': '0.20.0', + 'primaryKey': ['idx']} + """ + if index is True: + data = set_default_names(data) + + schema = {} + fields = [] + + if index: + if data.index.nlevels > 1: + for level, name in zip(data.index.levels, data.index.names): + new_field = convert_pandas_type_to_json_field(level) + new_field["name"] = name + fields.append(new_field) + else: + fields.append(convert_pandas_type_to_json_field(data.index)) + + if data.ndim > 1: + for column, s in data.items(): + fields.append(convert_pandas_type_to_json_field(s)) + else: + fields.append(convert_pandas_type_to_json_field(data)) + + schema["fields"] = fields + if index and data.index.is_unique and primary_key is None: + if data.index.nlevels == 1: + schema["primaryKey"] = [data.index.name] + else: + schema["primaryKey"] = data.index.names + elif primary_key is not None: + schema["primaryKey"] = primary_key + + if version: + schema["pandas_version"] = "0.20.0" + return schema + + +def parse_table_schema(json, precise_float): + """ + Builds a DataFrame from a given schema + + Parameters + ---------- + json : + A JSON table schema + precise_float : boolean + Flag controlling precision when decoding string to double values, as + dictated by ``read_json`` + + Returns + ------- + df : DataFrame + + Raises + ------ + NotImplementedError + If the JSON table schema contains either timezone or timedelta data + + Notes + ----- + Because :func:`DataFrame.to_json` uses the string 'index' to denote a + name-less :class:`Index`, this function sets the name of the returned + :class:`DataFrame` to ``None`` when said string is encountered with a + normal :class:`Index`. For a :class:`MultiIndex`, the same limitation + applies to any strings beginning with 'level_'. Therefore, an + :class:`Index` name of 'index' and :class:`MultiIndex` names starting + with 'level_' are not supported. + + See Also + -------- + build_table_schema : Inverse function. + pandas.read_json + """ + table = loads(json, precise_float=precise_float) + col_order = [field["name"] for field in table["schema"]["fields"]] + df = DataFrame(table["data"], columns=col_order)[col_order] + + dtypes = { + field["name"]: convert_json_field_to_pandas_type(field) + for field in table["schema"]["fields"] + } + + # Cannot directly use as_type with timezone data on object; raise for now + if any(str(x).startswith("datetime64[ns, ") for x in dtypes.values()): + raise NotImplementedError('table="orient" can not yet read timezone data') + + # No ISO constructor for Timedelta as of yet, so need to raise + if "timedelta64" in dtypes.values(): + raise NotImplementedError( + 'table="orient" can not yet read ISO-formatted Timedelta data' + ) + + df = df.astype(dtypes) + + if "primaryKey" in table["schema"]: + df = df.set_index(table["schema"]["primaryKey"]) + if len(df.index.names) == 1: + if df.index.name == "index": + df.index.name = None + else: + df.index.names = [ + None if x.startswith("level_") else x for x in df.index.names + ] + + return df diff --git a/pandas/io/orc.py b/pandas/io/orc.py new file mode 100644 index 00000000..bbefe447 --- /dev/null +++ b/pandas/io/orc.py @@ -0,0 +1,57 @@ +""" orc compat """ + +import distutils +from typing import TYPE_CHECKING, List, Optional + +from pandas._typing import FilePathOrBuffer + +from pandas.io.common import get_filepath_or_buffer + +if TYPE_CHECKING: + from pandas import DataFrame + + +def read_orc( + path: FilePathOrBuffer, columns: Optional[List[str]] = None, **kwargs, +) -> "DataFrame": + """ + Load an ORC object from the file path, returning a DataFrame. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + path : str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.orc``. + + If you want to pass in a path object, pandas accepts any + ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handler (e.g. via builtin ``open`` function) + or ``StringIO``. + columns : list, default None + If not None, only these columns will be read from the file. + **kwargs + Any additional kwargs are passed to pyarrow. + + Returns + ------- + DataFrame + """ + + # we require a newer version of pyarrow than we support for parquet + import pyarrow + + if distutils.version.LooseVersion(pyarrow.__version__) < "0.13.0": + raise ImportError("pyarrow must be >= 0.13.0 for read_orc") + + import pyarrow.orc + + path, _, _, _ = get_filepath_or_buffer(path) + orc_file = pyarrow.orc.ORCFile(path) + result = orc_file.read(columns=columns, **kwargs).to_pandas() + return result diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py new file mode 100644 index 00000000..ff6e1869 --- /dev/null +++ b/pandas/io/parquet.py @@ -0,0 +1,312 @@ +""" parquet compat """ + +from typing import Any, Dict, Optional +from warnings import catch_warnings + +from pandas.compat._optional import import_optional_dependency +from pandas.errors import AbstractMethodError + +from pandas import DataFrame, get_option + +from pandas.io.common import get_filepath_or_buffer, is_gcs_url, is_s3_url + + +def get_engine(engine: str) -> "BaseImpl": + """ return our implementation """ + + if engine == "auto": + engine = get_option("io.parquet.engine") + + if engine == "auto": + # try engines in this order + try: + return PyArrowImpl() + except ImportError: + pass + + try: + return FastParquetImpl() + except ImportError: + pass + + raise ImportError( + "Unable to find a usable engine; " + "tried using: 'pyarrow', 'fastparquet'.\n" + "pyarrow or fastparquet is required for parquet " + "support" + ) + + if engine == "pyarrow": + return PyArrowImpl() + elif engine == "fastparquet": + return FastParquetImpl() + + raise ValueError("engine must be one of 'pyarrow', 'fastparquet'") + + +class BaseImpl: + @staticmethod + def validate_dataframe(df: DataFrame): + + if not isinstance(df, DataFrame): + raise ValueError("to_parquet only supports IO with DataFrames") + + # must have value column names (strings only) + if df.columns.inferred_type not in {"string", "unicode", "empty"}: + raise ValueError("parquet must have string column names") + + # index level names must be strings + valid_names = all( + isinstance(name, str) for name in df.index.names if name is not None + ) + if not valid_names: + raise ValueError("Index level names must be strings") + + def write(self, df: DataFrame, path, compression, **kwargs): + raise AbstractMethodError(self) + + def read(self, path, columns=None, **kwargs): + raise AbstractMethodError(self) + + +class PyArrowImpl(BaseImpl): + def __init__(self): + import_optional_dependency( + "pyarrow", extra="pyarrow is required for parquet support." + ) + import pyarrow.parquet + + # import utils to register the pyarrow extension types + import pandas.core.arrays._arrow_utils # noqa + + self.api = pyarrow + + def write( + self, + df: DataFrame, + path, + compression="snappy", + coerce_timestamps="ms", + index: Optional[bool] = None, + partition_cols=None, + **kwargs, + ): + self.validate_dataframe(df) + path, _, _, should_close = get_filepath_or_buffer(path, mode="wb") + + from_pandas_kwargs: Dict[str, Any] = {"schema": kwargs.pop("schema", None)} + if index is not None: + from_pandas_kwargs["preserve_index"] = index + + table = self.api.Table.from_pandas(df, **from_pandas_kwargs) + if partition_cols is not None: + self.api.parquet.write_to_dataset( + table, + path, + compression=compression, + coerce_timestamps=coerce_timestamps, + partition_cols=partition_cols, + **kwargs, + ) + else: + self.api.parquet.write_table( + table, + path, + compression=compression, + coerce_timestamps=coerce_timestamps, + **kwargs, + ) + if should_close: + path.close() + + def read(self, path, columns=None, **kwargs): + path, _, _, should_close = get_filepath_or_buffer(path) + + kwargs["use_pandas_metadata"] = True + result = self.api.parquet.read_table( + path, columns=columns, **kwargs + ).to_pandas() + if should_close: + path.close() + + return result + + +class FastParquetImpl(BaseImpl): + def __init__(self): + # since pandas is a dependency of fastparquet + # we need to import on first use + fastparquet = import_optional_dependency( + "fastparquet", extra="fastparquet is required for parquet support." + ) + self.api = fastparquet + + def write( + self, + df: DataFrame, + path, + compression="snappy", + index=None, + partition_cols=None, + **kwargs, + ): + self.validate_dataframe(df) + # thriftpy/protocol/compact.py:339: + # DeprecationWarning: tostring() is deprecated. + # Use tobytes() instead. + + if "partition_on" in kwargs and partition_cols is not None: + raise ValueError( + "Cannot use both partition_on and " + "partition_cols. Use partition_cols for " + "partitioning data" + ) + elif "partition_on" in kwargs: + partition_cols = kwargs.pop("partition_on") + + if partition_cols is not None: + kwargs["file_scheme"] = "hive" + + if is_s3_url(path) or is_gcs_url(path): + # if path is s3:// or gs:// we need to open the file in 'wb' mode. + # TODO: Support 'ab' + + path, _, _, _ = get_filepath_or_buffer(path, mode="wb") + # And pass the opened file to the fastparquet internal impl. + kwargs["open_with"] = lambda path, _: path + else: + path, _, _, _ = get_filepath_or_buffer(path) + + with catch_warnings(record=True): + self.api.write( + path, + df, + compression=compression, + write_index=index, + partition_on=partition_cols, + **kwargs, + ) + + def read(self, path, columns=None, **kwargs): + if is_s3_url(path): + from pandas.io.s3 import get_file_and_filesystem + + # When path is s3:// an S3File is returned. + # We need to retain the original path(str) while also + # pass the S3File().open function to fsatparquet impl. + s3, filesystem = get_file_and_filesystem(path) + try: + parquet_file = self.api.ParquetFile(path, open_with=filesystem.open) + finally: + s3.close() + else: + path, _, _, _ = get_filepath_or_buffer(path) + parquet_file = self.api.ParquetFile(path) + + return parquet_file.to_pandas(columns=columns, **kwargs) + + +def to_parquet( + df: DataFrame, + path, + engine: str = "auto", + compression="snappy", + index: Optional[bool] = None, + partition_cols=None, + **kwargs, +): + """ + Write a DataFrame to the parquet format. + + Parameters + ---------- + df : DataFrame + path : str + File path or Root Directory path. Will be used as Root Directory path + while writing a partitioned dataset. + + .. versionchanged:: 0.24.0 + + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. + compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy' + Name of the compression to use. Use ``None`` for no compression. + index : bool, default None + If ``True``, include the dataframe's index(es) in the file output. If + ``False``, they will not be written to the file. + If ``None``, similar to ``True`` the dataframe's index(es) + will be saved. However, instead of being saved as values, + the RangeIndex will be stored as a range in the metadata so it + doesn't require much space and is faster. Other indexes will + be included as columns in the file output. + + .. versionadded:: 0.24.0 + + partition_cols : str or list, optional, default None + Column names by which to partition the dataset + Columns are partitioned in the order they are given + + .. versionadded:: 0.24.0 + + kwargs + Additional keyword arguments passed to the engine + """ + if isinstance(partition_cols, str): + partition_cols = [partition_cols] + impl = get_engine(engine) + return impl.write( + df, + path, + compression=compression, + index=index, + partition_cols=partition_cols, + **kwargs, + ) + + +def read_parquet(path, engine: str = "auto", columns=None, **kwargs): + """ + Load a parquet object from the file path, returning a DataFrame. + + .. versionadded:: 0.21.0 + + Parameters + ---------- + path : str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.parquet``. + A file URL can also be a path to a directory that contains multiple + partitioned parquet files. Both pyarrow and fastparquet support + paths to directories as well as file URLs. A directory path could be: + ``file://localhost/path/to/tables`` + + If you want to pass in a path object, pandas accepts any + ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handler (e.g. via builtin ``open`` function) + or ``StringIO``. + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' + Parquet library to use. If 'auto', then the option + ``io.parquet.engine`` is used. The default ``io.parquet.engine`` + behavior is to try 'pyarrow', falling back to 'fastparquet' if + 'pyarrow' is unavailable. + columns : list, default=None + If not None, only these columns will be read from the file. + + .. versionadded:: 0.21.1 + **kwargs + Any additional kwargs are passed to the engine. + + Returns + ------- + DataFrame + """ + + impl = get_engine(engine) + return impl.read(path, columns=columns, **kwargs) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py new file mode 100755 index 00000000..3ddfb71f --- /dev/null +++ b/pandas/io/parsers.py @@ -0,0 +1,3671 @@ +""" +Module contains tools for processing files into DataFrames or other objects +""" + +from collections import abc, defaultdict +import csv +import datetime +from io import StringIO, TextIOWrapper +import re +import sys +from textwrap import fill +from typing import Any, Dict, Set +import warnings + +import numpy as np + +import pandas._libs.lib as lib +import pandas._libs.ops as libops +import pandas._libs.parsers as parsers +from pandas._libs.parsers import STR_NA_VALUES +from pandas._libs.tslibs import parsing +from pandas._typing import FilePathOrBuffer +from pandas.errors import ( + AbstractMethodError, + EmptyDataError, + ParserError, + ParserWarning, +) +from pandas.util._decorators import Appender + +from pandas.core.dtypes.cast import astype_nansafe +from pandas.core.dtypes.common import ( + ensure_object, + ensure_str, + is_bool_dtype, + is_categorical_dtype, + is_dtype_equal, + is_extension_array_dtype, + is_file_like, + is_float, + is_integer, + is_integer_dtype, + is_list_like, + is_object_dtype, + is_scalar, + is_string_dtype, + pandas_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.missing import isna + +from pandas.core import algorithms +from pandas.core.arrays import Categorical +from pandas.core.frame import DataFrame +from pandas.core.indexes.api import ( + Index, + MultiIndex, + RangeIndex, + ensure_index_from_sequences, +) +from pandas.core.series import Series +from pandas.core.tools import datetimes as tools + +from pandas.io.common import ( + get_filepath_or_buffer, + get_handle, + infer_compression, + validate_header_arg, +) +from pandas.io.date_converters import generic_parser + +# BOM character (byte order mark) +# This exists at the beginning of a file to indicate endianness +# of a file (stream). Unfortunately, this marker screws up parsing, +# so we need to remove it if we see it. +_BOM = "\ufeff" + +_doc_read_csv_and_table = ( + r""" +{summary} + +Also supports optionally iterating or breaking of the file +into chunks. + +Additional help can be found in the online docs for +`IO Tools `_. + +Parameters +---------- +filepath_or_buffer : str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: file://localhost/path/to/table.csv. + + If you want to pass in a path object, pandas accepts any ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, such as + a file handler (e.g. via builtin ``open`` function) or ``StringIO``. +sep : str, default {_default_sep} + Delimiter to use. If sep is None, the C engine cannot automatically detect + the separator, but the Python parsing engine can, meaning the latter will + be used and automatically detect the separator by Python's builtin sniffer + tool, ``csv.Sniffer``. In addition, separators longer than 1 character and + different from ``'\s+'`` will be interpreted as regular expressions and + will also force the use of the Python parsing engine. Note that regex + delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``. +delimiter : str, default ``None`` + Alias for sep. +header : int, list of int, default 'infer' + Row number(s) to use as the column names, and the start of the + data. Default behavior is to infer the column names: if no names + are passed the behavior is identical to ``header=0`` and column + names are inferred from the first line of the file, if column + names are passed explicitly then the behavior is identical to + ``header=None``. Explicitly pass ``header=0`` to be able to + replace existing names. The header can be a list of integers that + specify row locations for a multi-index on the columns + e.g. [0,1,3]. Intervening rows that are not specified will be + skipped (e.g. 2 in this example is skipped). Note that this + parameter ignores commented lines and empty lines if + ``skip_blank_lines=True``, so ``header=0`` denotes the first line of + data rather than the first line of the file. +names : array-like, optional + List of column names to use. If the file contains a header row, + then you should explicitly pass ``header=0`` to override the column names. + Duplicates in this list are not allowed. +index_col : int, str, sequence of int / str, or False, default ``None`` + Column(s) to use as the row labels of the ``DataFrame``, either given as + string name or column index. If a sequence of int / str is given, a + MultiIndex is used. + + Note: ``index_col=False`` can be used to force pandas to *not* use the first + column as the index, e.g. when you have a malformed file with delimiters at + the end of each line. +usecols : list-like or callable, optional + Return a subset of the columns. If list-like, all elements must either + be positional (i.e. integer indices into the document columns) or strings + that correspond to column names provided either by the user in `names` or + inferred from the document header row(s). For example, a valid list-like + `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``. + Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``. + To instantiate a DataFrame from ``data`` with element order preserved use + ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns + in ``['foo', 'bar']`` order or + ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]`` + for ``['bar', 'foo']`` order. + + If callable, the callable function will be evaluated against the column + names, returning names where the callable function evaluates to True. An + example of a valid callable argument would be ``lambda x: x.upper() in + ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster + parsing time and lower memory usage. +squeeze : bool, default False + If the parsed data only contains one column then return a Series. +prefix : str, optional + Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... +mangle_dupe_cols : bool, default True + Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than + 'X'...'X'. Passing in False will cause data to be overwritten if there + are duplicate names in the columns. +dtype : Type name or dict of column -> type, optional + Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32, + 'c': 'Int64'}} + Use `str` or `object` together with suitable `na_values` settings + to preserve and not interpret dtype. + If converters are specified, they will be applied INSTEAD + of dtype conversion. +engine : {{'c', 'python'}}, optional + Parser engine to use. The C engine is faster while the python engine is + currently more feature-complete. +converters : dict, optional + Dict of functions for converting values in certain columns. Keys can either + be integers or column labels. +true_values : list, optional + Values to consider as True. +false_values : list, optional + Values to consider as False. +skipinitialspace : bool, default False + Skip spaces after delimiter. +skiprows : list-like, int or callable, optional + Line numbers to skip (0-indexed) or number of lines to skip (int) + at the start of the file. + + If callable, the callable function will be evaluated against the row + indices, returning True if the row should be skipped and False otherwise. + An example of a valid callable argument would be ``lambda x: x in [0, 2]``. +skipfooter : int, default 0 + Number of lines at bottom of file to skip (Unsupported with engine='c'). +nrows : int, optional + Number of rows of file to read. Useful for reading pieces of large files. +na_values : scalar, str, list-like, or dict, optional + Additional strings to recognize as NA/NaN. If dict passed, specific + per-column NA values. By default the following values are interpreted as + NaN: '""" + + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + + """'. +keep_default_na : bool, default True + Whether or not to include the default NaN values when parsing the data. + Depending on whether `na_values` is passed in, the behavior is as follows: + + * If `keep_default_na` is True, and `na_values` are specified, `na_values` + is appended to the default NaN values used for parsing. + * If `keep_default_na` is True, and `na_values` are not specified, only + the default NaN values are used for parsing. + * If `keep_default_na` is False, and `na_values` are specified, only + the NaN values specified `na_values` are used for parsing. + * If `keep_default_na` is False, and `na_values` are not specified, no + strings will be parsed as NaN. + + Note that if `na_filter` is passed in as False, the `keep_default_na` and + `na_values` parameters will be ignored. +na_filter : bool, default True + Detect missing value markers (empty strings and the value of na_values). In + data without any NAs, passing na_filter=False can improve the performance + of reading a large file. +verbose : bool, default False + Indicate number of NA values placed in non-numeric columns. +skip_blank_lines : bool, default True + If True, skip over blank lines rather than interpreting as NaN values. +parse_dates : bool or list of int or names or list of lists or dict, \ +default False + The behavior is as follows: + + * boolean. If True -> try parsing the index. + * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 + each as a separate date column. + * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as + a single date column. + * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call + result 'foo' + + If a column or index cannot be represented as an array of datetimes, + say because of an unparseable value or a mixture of timezones, the column + or index will be returned unaltered as an object data type. For + non-standard datetime parsing, use ``pd.to_datetime`` after + ``pd.read_csv``. To parse an index or column with a mixture of timezones, + specify ``date_parser`` to be a partially-applied + :func:`pandas.to_datetime` with ``utc=True``. See + :ref:`io.csv.mixed_timezones` for more. + + Note: A fast-path exists for iso8601-formatted dates. +infer_datetime_format : bool, default False + If True and `parse_dates` is enabled, pandas will attempt to infer the + format of the datetime strings in the columns, and if it can be inferred, + switch to a faster method of parsing them. In some cases this can increase + the parsing speed by 5-10x. +keep_date_col : bool, default False + If True and `parse_dates` specifies combining multiple columns then + keep the original columns. +date_parser : function, optional + Function to use for converting a sequence of string columns to an array of + datetime instances. The default uses ``dateutil.parser.parser`` to do the + conversion. Pandas will try to call `date_parser` in three different ways, + advancing to the next if an exception occurs: 1) Pass one or more arrays + (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the + string values from the columns defined by `parse_dates` into a single array + and pass that; and 3) call `date_parser` once for each row using one or + more strings (corresponding to the columns defined by `parse_dates`) as + arguments. +dayfirst : bool, default False + DD/MM format dates, international and European format. +cache_dates : bool, default True + If True, use a cache of unique, converted dates to apply the datetime + conversion. May produce significant speed-up when parsing duplicate + date strings, especially ones with timezone offsets. + + .. versionadded:: 0.25.0 +iterator : bool, default False + Return TextFileReader object for iteration or getting chunks with + ``get_chunk()``. +chunksize : int, optional + Return TextFileReader object for iteration. + See the `IO Tools docs + `_ + for more information on ``iterator`` and ``chunksize``. +compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer' + For on-the-fly decompression of on-disk data. If 'infer' and + `filepath_or_buffer` is path-like, then detect compression from the + following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no + decompression). If using 'zip', the ZIP file must contain only one data + file to be read in. Set to None for no decompression. +thousands : str, optional + Thousands separator. +decimal : str, default '.' + Character to recognize as decimal point (e.g. use ',' for European data). +lineterminator : str (length 1), optional + Character to break file into lines. Only valid with C parser. +quotechar : str (length 1), optional + The character used to denote the start and end of a quoted item. Quoted + items can include the delimiter and it will be ignored. +quoting : int or csv.QUOTE_* instance, default 0 + Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of + QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3). +doublequote : bool, default ``True`` + When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate + whether or not to interpret two consecutive quotechar elements INSIDE a + field as a single ``quotechar`` element. +escapechar : str (length 1), optional + One-character string used to escape other characters. +comment : str, optional + Indicates remainder of line should not be parsed. If found at the beginning + of a line, the line will be ignored altogether. This parameter must be a + single character. Like empty lines (as long as ``skip_blank_lines=True``), + fully commented lines are ignored by the parameter `header` but not by + `skiprows`. For example, if ``comment='#'``, parsing + ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in 'a,b,c' being + treated as the header. +encoding : str, optional + Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python + standard encodings + `_ . +dialect : str or csv.Dialect, optional + If provided, this parameter will override values (default or not) for the + following parameters: `delimiter`, `doublequote`, `escapechar`, + `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to + override values, a ParserWarning will be issued. See csv.Dialect + documentation for more details. +error_bad_lines : bool, default True + Lines with too many fields (e.g. a csv line with too many commas) will by + default cause an exception to be raised, and no DataFrame will be returned. + If False, then these "bad lines" will dropped from the DataFrame that is + returned. +warn_bad_lines : bool, default True + If error_bad_lines is False, and warn_bad_lines is True, a warning for each + "bad line" will be output. +delim_whitespace : bool, default False + Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be + used as the sep. Equivalent to setting ``sep='\\s+'``. If this option + is set to True, nothing should be passed in for the ``delimiter`` + parameter. +low_memory : bool, default True + Internally process the file in chunks, resulting in lower memory use + while parsing, but possibly mixed type inference. To ensure no mixed + types either set False, or specify the type with the `dtype` parameter. + Note that the entire file is read into a single DataFrame regardless, + use the `chunksize` or `iterator` parameter to return the data in chunks. + (Only valid with C parser). +memory_map : bool, default False + If a filepath is provided for `filepath_or_buffer`, map the file object + directly onto memory and access the data directly from there. Using this + option can improve performance because there is no longer any I/O overhead. +float_precision : str, optional + Specifies which converter the C engine should use for floating-point + values. The options are `None` for the ordinary converter, + `high` for the high-precision converter, and `round_trip` for the + round-trip converter. + +Returns +------- +DataFrame or TextParser + A comma-separated values (csv) file is returned as two-dimensional + data structure with labeled axes. + +See Also +-------- +to_csv : Write DataFrame to a comma-separated values (csv) file. +read_csv : Read a comma-separated values (csv) file into DataFrame. +read_fwf : Read a table of fixed-width formatted lines into DataFrame. + +Examples +-------- +>>> pd.{func_name}('data.csv') # doctest: +SKIP +""" +) + + +def _validate_integer(name, val, min_val=0): + """ + Checks whether the 'name' parameter for parsing is either + an integer OR float that can SAFELY be cast to an integer + without losing accuracy. Raises a ValueError if that is + not the case. + + Parameters + ---------- + name : string + Parameter name (used for error reporting) + val : int or float + The value to check + min_val : int + Minimum allowed value (val < min_val will result in a ValueError) + """ + msg = f"'{name:s}' must be an integer >={min_val:d}" + + if val is not None: + if is_float(val): + if int(val) != val: + raise ValueError(msg) + val = int(val) + elif not (is_integer(val) and val >= min_val): + raise ValueError(msg) + + return val + + +def _validate_names(names): + """ + Raise ValueError if the `names` parameter contains duplicates. + + Parameters + ---------- + names : array-like or None + An array containing a list of the names used for the output DataFrame. + + Raises + ------ + ValueError + If names are not unique. + """ + + if names is not None: + if len(names) != len(set(names)): + raise ValueError("Duplicate names are not allowed.") + + +def _read(filepath_or_buffer: FilePathOrBuffer, kwds): + """Generic reader of line files.""" + encoding = kwds.get("encoding", None) + if encoding is not None: + encoding = re.sub("_", "-", encoding).lower() + kwds["encoding"] = encoding + + compression = kwds.get("compression", "infer") + compression = infer_compression(filepath_or_buffer, compression) + + # TODO: get_filepath_or_buffer could return + # Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile] + # though mypy handling of conditional imports is difficult. + # See https://github.com/python/mypy/issues/1297 + fp_or_buf, _, compression, should_close = get_filepath_or_buffer( + filepath_or_buffer, encoding, compression + ) + kwds["compression"] = compression + + if kwds.get("date_parser", None) is not None: + if isinstance(kwds["parse_dates"], bool): + kwds["parse_dates"] = True + + # Extract some of the arguments (pass chunksize on). + iterator = kwds.get("iterator", False) + chunksize = _validate_integer("chunksize", kwds.get("chunksize", None), 1) + nrows = kwds.get("nrows", None) + + # Check for duplicates in names. + _validate_names(kwds.get("names", None)) + + # Create the parser. + parser = TextFileReader(fp_or_buf, **kwds) + + if chunksize or iterator: + return parser + + try: + data = parser.read(nrows) + finally: + parser.close() + + if should_close: + try: + fp_or_buf.close() + except ValueError: + pass + + return data + + +_parser_defaults = { + "delimiter": None, + "escapechar": None, + "quotechar": '"', + "quoting": csv.QUOTE_MINIMAL, + "doublequote": True, + "skipinitialspace": False, + "lineterminator": None, + "header": "infer", + "index_col": None, + "names": None, + "prefix": None, + "skiprows": None, + "skipfooter": 0, + "nrows": None, + "na_values": None, + "keep_default_na": True, + "true_values": None, + "false_values": None, + "converters": None, + "dtype": None, + "cache_dates": True, + "thousands": None, + "comment": None, + "decimal": ".", + # 'engine': 'c', + "parse_dates": False, + "keep_date_col": False, + "dayfirst": False, + "date_parser": None, + "usecols": None, + # 'iterator': False, + "chunksize": None, + "verbose": False, + "encoding": None, + "squeeze": False, + "compression": None, + "mangle_dupe_cols": True, + "infer_datetime_format": False, + "skip_blank_lines": True, +} + + +_c_parser_defaults = { + "delim_whitespace": False, + "na_filter": True, + "low_memory": True, + "memory_map": False, + "error_bad_lines": True, + "warn_bad_lines": True, + "float_precision": None, +} + +_fwf_defaults = {"colspecs": "infer", "infer_nrows": 100, "widths": None} + +_c_unsupported = {"skipfooter"} +_python_unsupported = {"low_memory", "float_precision"} + +_deprecated_defaults: Dict[str, Any] = {} +_deprecated_args: Set[str] = set() + + +def _make_parser_function(name, default_sep=","): + def parser_f( + filepath_or_buffer: FilePathOrBuffer, + sep=default_sep, + delimiter=None, + # Column and Index Locations and Names + header="infer", + names=None, + index_col=None, + usecols=None, + squeeze=False, + prefix=None, + mangle_dupe_cols=True, + # General Parsing Configuration + dtype=None, + engine=None, + converters=None, + true_values=None, + false_values=None, + skipinitialspace=False, + skiprows=None, + skipfooter=0, + nrows=None, + # NA and Missing Data Handling + na_values=None, + keep_default_na=True, + na_filter=True, + verbose=False, + skip_blank_lines=True, + # Datetime Handling + parse_dates=False, + infer_datetime_format=False, + keep_date_col=False, + date_parser=None, + dayfirst=False, + cache_dates=True, + # Iteration + iterator=False, + chunksize=None, + # Quoting, Compression, and File Format + compression="infer", + thousands=None, + decimal: str = ".", + lineterminator=None, + quotechar='"', + quoting=csv.QUOTE_MINIMAL, + doublequote=True, + escapechar=None, + comment=None, + encoding=None, + dialect=None, + # Error Handling + error_bad_lines=True, + warn_bad_lines=True, + # Internal + delim_whitespace=False, + low_memory=_c_parser_defaults["low_memory"], + memory_map=False, + float_precision=None, + ): + + # gh-23761 + # + # When a dialect is passed, it overrides any of the overlapping + # parameters passed in directly. We don't want to warn if the + # default parameters were passed in (since it probably means + # that the user didn't pass them in explicitly in the first place). + # + # "delimiter" is the annoying corner case because we alias it to + # "sep" before doing comparison to the dialect values later on. + # Thus, we need a flag to indicate that we need to "override" + # the comparison to dialect values by checking if default values + # for BOTH "delimiter" and "sep" were provided. + if dialect is not None: + sep_override = delimiter is None and sep == default_sep + kwds = dict(sep_override=sep_override) + else: + kwds = dict() + + # Alias sep -> delimiter. + if delimiter is None: + delimiter = sep + + if delim_whitespace and delimiter != default_sep: + raise ValueError( + "Specified a delimiter with both sep and " + "delim_whitespace=True; you can only " + "specify one." + ) + + if engine is not None: + engine_specified = True + else: + engine = "c" + engine_specified = False + + kwds.update( + delimiter=delimiter, + engine=engine, + dialect=dialect, + compression=compression, + engine_specified=engine_specified, + doublequote=doublequote, + escapechar=escapechar, + quotechar=quotechar, + quoting=quoting, + skipinitialspace=skipinitialspace, + lineterminator=lineterminator, + header=header, + index_col=index_col, + names=names, + prefix=prefix, + skiprows=skiprows, + skipfooter=skipfooter, + na_values=na_values, + true_values=true_values, + false_values=false_values, + keep_default_na=keep_default_na, + thousands=thousands, + comment=comment, + decimal=decimal, + parse_dates=parse_dates, + keep_date_col=keep_date_col, + dayfirst=dayfirst, + date_parser=date_parser, + cache_dates=cache_dates, + nrows=nrows, + iterator=iterator, + chunksize=chunksize, + converters=converters, + dtype=dtype, + usecols=usecols, + verbose=verbose, + encoding=encoding, + squeeze=squeeze, + memory_map=memory_map, + float_precision=float_precision, + na_filter=na_filter, + delim_whitespace=delim_whitespace, + warn_bad_lines=warn_bad_lines, + error_bad_lines=error_bad_lines, + low_memory=low_memory, + mangle_dupe_cols=mangle_dupe_cols, + infer_datetime_format=infer_datetime_format, + skip_blank_lines=skip_blank_lines, + ) + + return _read(filepath_or_buffer, kwds) + + parser_f.__name__ = name + + return parser_f + + +read_csv = _make_parser_function("read_csv", default_sep=",") +read_csv = Appender( + _doc_read_csv_and_table.format( + func_name="read_csv", + summary="Read a comma-separated values (csv) file into DataFrame.", + _default_sep="','", + ) +)(read_csv) + +read_table = _make_parser_function("read_table", default_sep="\t") +read_table = Appender( + _doc_read_csv_and_table.format( + func_name="read_table", + summary="Read general delimited file into DataFrame.", + _default_sep=r"'\\t' (tab-stop)", + ) +)(read_table) + + +def read_fwf( + filepath_or_buffer: FilePathOrBuffer, + colspecs="infer", + widths=None, + infer_nrows=100, + **kwds, +): + + r""" + Read a table of fixed-width formatted lines into DataFrame. + + Also supports optionally iterating or breaking of the file + into chunks. + + Additional help can be found in the `online docs for IO Tools + `_. + + Parameters + ---------- + filepath_or_buffer : str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.csv``. + + If you want to pass in a path object, pandas accepts any + ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handler (e.g. via builtin ``open`` function) + or ``StringIO``. + colspecs : list of tuple (int, int) or 'infer'. optional + A list of tuples giving the extents of the fixed-width + fields of each line as half-open intervals (i.e., [from, to[ ). + String value 'infer' can be used to instruct the parser to try + detecting the column specifications from the first 100 rows of + the data which are not being skipped via skiprows (default='infer'). + widths : list of int, optional + A list of field widths which can be used instead of 'colspecs' if + the intervals are contiguous. + infer_nrows : int, default 100 + The number of rows to consider when letting the parser determine the + `colspecs`. + + .. versionadded:: 0.24.0 + **kwds : optional + Optional keyword arguments can be passed to ``TextFileReader``. + + Returns + ------- + DataFrame or TextParser + A comma-separated values (csv) file is returned as two-dimensional + data structure with labeled axes. + + See Also + -------- + to_csv : Write DataFrame to a comma-separated values (csv) file. + read_csv : Read a comma-separated values (csv) file into DataFrame. + + Examples + -------- + >>> pd.read_fwf('data.csv') # doctest: +SKIP + """ + + # Check input arguments. + if colspecs is None and widths is None: + raise ValueError("Must specify either colspecs or widths") + elif colspecs not in (None, "infer") and widths is not None: + raise ValueError("You must specify only one of 'widths' and 'colspecs'") + + # Compute 'colspecs' from 'widths', if specified. + if widths is not None: + colspecs, col = [], 0 + for w in widths: + colspecs.append((col, col + w)) + col += w + + kwds["colspecs"] = colspecs + kwds["infer_nrows"] = infer_nrows + kwds["engine"] = "python-fwf" + return _read(filepath_or_buffer, kwds) + + +class TextFileReader(abc.Iterator): + """ + + Passed dialect overrides any of the related parser options + + """ + + def __init__(self, f, engine=None, **kwds): + + self.f = f + + if engine is not None: + engine_specified = True + else: + engine = "python" + engine_specified = False + + self._engine_specified = kwds.get("engine_specified", engine_specified) + + if kwds.get("dialect") is not None: + dialect = kwds["dialect"] + if dialect in csv.list_dialects(): + dialect = csv.get_dialect(dialect) + + # Any valid dialect should have these attributes. + # If any are missing, we will raise automatically. + for param in ( + "delimiter", + "doublequote", + "escapechar", + "skipinitialspace", + "quotechar", + "quoting", + ): + try: + dialect_val = getattr(dialect, param) + except AttributeError: + raise ValueError(f"Invalid dialect {kwds['dialect']} provided") + parser_default = _parser_defaults[param] + provided = kwds.get(param, parser_default) + + # Messages for conflicting values between the dialect + # instance and the actual parameters provided. + conflict_msgs = [] + + # Don't warn if the default parameter was passed in, + # even if it conflicts with the dialect (gh-23761). + if provided != parser_default and provided != dialect_val: + msg = ( + f"Conflicting values for '{param}': '{provided}' was " + f"provided, but the dialect specifies '{dialect_val}'. " + "Using the dialect-specified value." + ) + + # Annoying corner case for not warning about + # conflicts between dialect and delimiter parameter. + # Refer to the outer "_read_" function for more info. + if not (param == "delimiter" and kwds.pop("sep_override", False)): + conflict_msgs.append(msg) + + if conflict_msgs: + warnings.warn( + "\n\n".join(conflict_msgs), ParserWarning, stacklevel=2 + ) + kwds[param] = dialect_val + + if kwds.get("skipfooter"): + if kwds.get("iterator") or kwds.get("chunksize"): + raise ValueError("'skipfooter' not supported for 'iteration'") + if kwds.get("nrows"): + raise ValueError("'skipfooter' not supported with 'nrows'") + + if kwds.get("header", "infer") == "infer": + kwds["header"] = 0 if kwds.get("names") is None else None + + self.orig_options = kwds + + # miscellanea + self.engine = engine + self._engine = None + self._currow = 0 + + options = self._get_options_with_defaults(engine) + + self.chunksize = options.pop("chunksize", None) + self.nrows = options.pop("nrows", None) + self.squeeze = options.pop("squeeze", False) + + # might mutate self.engine + self.engine = self._check_file_or_buffer(f, engine) + self.options, self.engine = self._clean_options(options, engine) + + if "has_index_names" in kwds: + self.options["has_index_names"] = kwds["has_index_names"] + + self._make_engine(self.engine) + + def close(self): + self._engine.close() + + def _get_options_with_defaults(self, engine): + kwds = self.orig_options + + options = {} + + for argname, default in _parser_defaults.items(): + value = kwds.get(argname, default) + + # see gh-12935 + if argname == "mangle_dupe_cols" and not value: + raise ValueError("Setting mangle_dupe_cols=False is not supported yet") + else: + options[argname] = value + + for argname, default in _c_parser_defaults.items(): + if argname in kwds: + value = kwds[argname] + + if engine != "c" and value != default: + if "python" in engine and argname not in _python_unsupported: + pass + elif value == _deprecated_defaults.get(argname, default): + pass + else: + raise ValueError( + f"The {repr(argname)} option is not supported with the" + f" {repr(engine)} engine" + ) + else: + value = _deprecated_defaults.get(argname, default) + options[argname] = value + + if engine == "python-fwf": + for argname, default in _fwf_defaults.items(): + options[argname] = kwds.get(argname, default) + + return options + + def _check_file_or_buffer(self, f, engine): + # see gh-16530 + if is_file_like(f): + next_attr = "__next__" + + # The C engine doesn't need the file-like to have the "next" or + # "__next__" attribute. However, the Python engine explicitly calls + # "next(...)" when iterating through such an object, meaning it + # needs to have that attribute ("next" for Python 2.x, "__next__" + # for Python 3.x) + if engine != "c" and not hasattr(f, next_attr): + msg = "The 'python' engine cannot iterate through this file buffer." + raise ValueError(msg) + + return engine + + def _clean_options(self, options, engine): + result = options.copy() + + engine_specified = self._engine_specified + fallback_reason = None + + sep = options["delimiter"] + delim_whitespace = options["delim_whitespace"] + + # C engine not supported yet + if engine == "c": + if options["skipfooter"] > 0: + fallback_reason = "the 'c' engine does not support skipfooter" + engine = "python" + + encoding = sys.getfilesystemencoding() or "utf-8" + if sep is None and not delim_whitespace: + if engine == "c": + fallback_reason = ( + "the 'c' engine does not support " + "sep=None with delim_whitespace=False" + ) + engine = "python" + elif sep is not None and len(sep) > 1: + if engine == "c" and sep == r"\s+": + result["delim_whitespace"] = True + del result["delimiter"] + elif engine not in ("python", "python-fwf"): + # wait until regex engine integrated + fallback_reason = ( + "the 'c' engine does not support " + "regex separators (separators > 1 char and " + r"different from '\s+' are " + "interpreted as regex)" + ) + engine = "python" + elif delim_whitespace: + if "python" in engine: + result["delimiter"] = r"\s+" + elif sep is not None: + encodeable = True + try: + if len(sep.encode(encoding)) > 1: + encodeable = False + except UnicodeDecodeError: + encodeable = False + if not encodeable and engine not in ("python", "python-fwf"): + fallback_reason = ( + f"the separator encoded in {encoding} " + "is > 1 char long, and the 'c' engine " + "does not support such separators" + ) + engine = "python" + + quotechar = options["quotechar"] + if quotechar is not None and isinstance(quotechar, (str, bytes)): + if ( + len(quotechar) == 1 + and ord(quotechar) > 127 + and engine not in ("python", "python-fwf") + ): + fallback_reason = ( + "ord(quotechar) > 127, meaning the " + "quotechar is larger than one byte, " + "and the 'c' engine does not support " + "such quotechars" + ) + engine = "python" + + if fallback_reason and engine_specified: + raise ValueError(fallback_reason) + + if engine == "c": + for arg in _c_unsupported: + del result[arg] + + if "python" in engine: + for arg in _python_unsupported: + if fallback_reason and result[arg] != _c_parser_defaults[arg]: + raise ValueError( + "Falling back to the 'python' engine because " + f"{fallback_reason}, but this causes {repr(arg)} to be " + "ignored as it is not supported by the 'python' engine." + ) + del result[arg] + + if fallback_reason: + warnings.warn( + ( + "Falling back to the 'python' engine because " + f"{fallback_reason}; you can avoid this warning by specifying " + "engine='python'." + ), + ParserWarning, + stacklevel=5, + ) + + index_col = options["index_col"] + names = options["names"] + converters = options["converters"] + na_values = options["na_values"] + skiprows = options["skiprows"] + + validate_header_arg(options["header"]) + + depr_warning = "" + + for arg in _deprecated_args: + parser_default = _c_parser_defaults[arg] + depr_default = _deprecated_defaults[arg] + + msg = ( + f"The {repr(arg)} argument has been deprecated and will be " + "removed in a future version." + ) + + if result.get(arg, depr_default) != depr_default: + depr_warning += msg + "\n\n" + else: + result[arg] = parser_default + + if depr_warning != "": + warnings.warn(depr_warning, FutureWarning, stacklevel=2) + + if index_col is True: + raise ValueError("The value of index_col couldn't be 'True'") + if _is_index_col(index_col): + if not isinstance(index_col, (list, tuple, np.ndarray)): + index_col = [index_col] + result["index_col"] = index_col + + names = list(names) if names is not None else names + + # type conversion-related + if converters is not None: + if not isinstance(converters, dict): + raise TypeError( + "Type converters must be a dict or subclass, " + f"input was a {type(converters).__name__}" + ) + else: + converters = {} + + # Converting values to NA + keep_default_na = options["keep_default_na"] + na_values, na_fvalues = _clean_na_values(na_values, keep_default_na) + + # handle skiprows; this is internally handled by the + # c-engine, so only need for python parsers + if engine != "c": + if is_integer(skiprows): + skiprows = list(range(skiprows)) + if skiprows is None: + skiprows = set() + elif not callable(skiprows): + skiprows = set(skiprows) + + # put stuff back + result["names"] = names + result["converters"] = converters + result["na_values"] = na_values + result["na_fvalues"] = na_fvalues + result["skiprows"] = skiprows + + return result, engine + + def __next__(self): + try: + return self.get_chunk() + except StopIteration: + self.close() + raise + + def _make_engine(self, engine="c"): + if engine == "c": + self._engine = CParserWrapper(self.f, **self.options) + else: + if engine == "python": + klass = PythonParser + elif engine == "python-fwf": + klass = FixedWidthFieldParser + else: + raise ValueError( + f"Unknown engine: {engine} (valid options are " + '"c", "python", or ' + '"python-fwf")' + ) + self._engine = klass(self.f, **self.options) + + def _failover_to_python(self): + raise AbstractMethodError(self) + + def read(self, nrows=None): + nrows = _validate_integer("nrows", nrows) + ret = self._engine.read(nrows) + + # May alter columns / col_dict + index, columns, col_dict = self._create_index(ret) + + if index is None: + if col_dict: + # Any column is actually fine: + new_rows = len(next(iter(col_dict.values()))) + index = RangeIndex(self._currow, self._currow + new_rows) + else: + new_rows = 0 + else: + new_rows = len(index) + + df = DataFrame(col_dict, columns=columns, index=index) + + self._currow += new_rows + + if self.squeeze and len(df.columns) == 1: + return df[df.columns[0]].copy() + return df + + def _create_index(self, ret): + index, columns, col_dict = ret + return index, columns, col_dict + + def get_chunk(self, size=None): + if size is None: + size = self.chunksize + if self.nrows is not None: + if self._currow >= self.nrows: + raise StopIteration + size = min(size, self.nrows - self._currow) + return self.read(nrows=size) + + +def _is_index_col(col): + return col is not None and col is not False + + +def _is_potential_multi_index(columns): + """ + Check whether or not the `columns` parameter + could be converted into a MultiIndex. + + Parameters + ---------- + columns : array-like + Object which may or may not be convertible into a MultiIndex + + Returns + ------- + boolean : Whether or not columns could become a MultiIndex + """ + return ( + len(columns) + and not isinstance(columns, MultiIndex) + and all(isinstance(c, tuple) for c in columns) + ) + + +def _evaluate_usecols(usecols, names): + """ + Check whether or not the 'usecols' parameter + is a callable. If so, enumerates the 'names' + parameter and returns a set of indices for + each entry in 'names' that evaluates to True. + If not a callable, returns 'usecols'. + """ + if callable(usecols): + return {i for i, name in enumerate(names) if usecols(name)} + return usecols + + +def _validate_usecols_names(usecols, names): + """ + Validates that all usecols are present in a given + list of names. If not, raise a ValueError that + shows what usecols are missing. + + Parameters + ---------- + usecols : iterable of usecols + The columns to validate are present in names. + names : iterable of names + The column names to check against. + + Returns + ------- + usecols : iterable of usecols + The `usecols` parameter if the validation succeeds. + + Raises + ------ + ValueError : Columns were missing. Error message will list them. + """ + missing = [c for c in usecols if c not in names] + if len(missing) > 0: + raise ValueError( + "Usecols do not match columns, " + f"columns expected but not found: {missing}" + ) + + return usecols + + +def _validate_skipfooter_arg(skipfooter): + """ + Validate the 'skipfooter' parameter. + + Checks whether 'skipfooter' is a non-negative integer. + Raises a ValueError if that is not the case. + + Parameters + ---------- + skipfooter : non-negative integer + The number of rows to skip at the end of the file. + + Returns + ------- + validated_skipfooter : non-negative integer + The original input if the validation succeeds. + + Raises + ------ + ValueError : 'skipfooter' was not a non-negative integer. + """ + + if not is_integer(skipfooter): + raise ValueError("skipfooter must be an integer") + + if skipfooter < 0: + raise ValueError("skipfooter cannot be negative") + + return skipfooter + + +def _validate_usecols_arg(usecols): + """ + Validate the 'usecols' parameter. + + Checks whether or not the 'usecols' parameter contains all integers + (column selection by index), strings (column by name) or is a callable. + Raises a ValueError if that is not the case. + + Parameters + ---------- + usecols : list-like, callable, or None + List of columns to use when parsing or a callable that can be used + to filter a list of table columns. + + Returns + ------- + usecols_tuple : tuple + A tuple of (verified_usecols, usecols_dtype). + + 'verified_usecols' is either a set if an array-like is passed in or + 'usecols' if a callable or None is passed in. + + 'usecols_dtype` is the inferred dtype of 'usecols' if an array-like + is passed in or None if a callable or None is passed in. + """ + msg = ( + "'usecols' must either be list-like of all strings, all unicode, " + "all integers or a callable." + ) + if usecols is not None: + if callable(usecols): + return usecols, None + + if not is_list_like(usecols): + # see gh-20529 + # + # Ensure it is iterable container but not string. + raise ValueError(msg) + + usecols_dtype = lib.infer_dtype(usecols, skipna=False) + + if usecols_dtype not in ("empty", "integer", "string", "unicode"): + raise ValueError(msg) + + usecols = set(usecols) + + return usecols, usecols_dtype + return usecols, None + + +def _validate_parse_dates_arg(parse_dates): + """ + Check whether or not the 'parse_dates' parameter + is a non-boolean scalar. Raises a ValueError if + that is the case. + """ + msg = ( + "Only booleans, lists, and " + "dictionaries are accepted " + "for the 'parse_dates' parameter" + ) + + if parse_dates is not None: + if is_scalar(parse_dates): + if not lib.is_bool(parse_dates): + raise TypeError(msg) + + elif not isinstance(parse_dates, (list, dict)): + raise TypeError(msg) + + return parse_dates + + +class ParserBase: + def __init__(self, kwds): + self.names = kwds.get("names") + self.orig_names = None + self.prefix = kwds.pop("prefix", None) + + self.index_col = kwds.get("index_col", None) + self.unnamed_cols = set() + self.index_names = None + self.col_names = None + + self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False)) + self.date_parser = kwds.pop("date_parser", None) + self.dayfirst = kwds.pop("dayfirst", False) + self.keep_date_col = kwds.pop("keep_date_col", False) + + self.na_values = kwds.get("na_values") + self.na_fvalues = kwds.get("na_fvalues") + self.na_filter = kwds.get("na_filter", False) + self.keep_default_na = kwds.get("keep_default_na", True) + + self.true_values = kwds.get("true_values") + self.false_values = kwds.get("false_values") + self.mangle_dupe_cols = kwds.get("mangle_dupe_cols", True) + self.infer_datetime_format = kwds.pop("infer_datetime_format", False) + self.cache_dates = kwds.pop("cache_dates", True) + + self._date_conv = _make_date_converter( + date_parser=self.date_parser, + dayfirst=self.dayfirst, + infer_datetime_format=self.infer_datetime_format, + cache_dates=self.cache_dates, + ) + + # validate header options for mi + self.header = kwds.get("header") + if isinstance(self.header, (list, tuple, np.ndarray)): + if not all(map(is_integer, self.header)): + raise ValueError("header must be integer or list of integers") + if any(i < 0 for i in self.header): + raise ValueError( + "cannot specify multi-index header with negative integers" + ) + if kwds.get("usecols"): + raise ValueError( + "cannot specify usecols when specifying a multi-index header" + ) + if kwds.get("names"): + raise ValueError( + "cannot specify names when specifying a multi-index header" + ) + + # validate index_col that only contains integers + if self.index_col is not None: + is_sequence = isinstance(self.index_col, (list, tuple, np.ndarray)) + if not ( + is_sequence + and all(map(is_integer, self.index_col)) + or is_integer(self.index_col) + ): + raise ValueError( + "index_col must only contain row numbers " + "when specifying a multi-index header" + ) + + # GH 16338 + elif self.header is not None and not is_integer(self.header): + raise ValueError("header must be integer or list of integers") + + # GH 27779 + elif self.header is not None and self.header < 0: + raise ValueError( + "Passing negative integer to header is invalid. " + "For no header, use header=None instead" + ) + + self._name_processed = False + + self._first_chunk = True + + # GH 13932 + # keep references to file handles opened by the parser itself + self.handles = [] + + def close(self): + for f in self.handles: + f.close() + + @property + def _has_complex_date_col(self): + return isinstance(self.parse_dates, dict) or ( + isinstance(self.parse_dates, list) + and len(self.parse_dates) > 0 + and isinstance(self.parse_dates[0], list) + ) + + def _should_parse_dates(self, i): + if isinstance(self.parse_dates, bool): + return self.parse_dates + else: + if self.index_names is not None: + name = self.index_names[i] + else: + name = None + j = self.index_col[i] + + if is_scalar(self.parse_dates): + return (j == self.parse_dates) or ( + name is not None and name == self.parse_dates + ) + else: + return (j in self.parse_dates) or ( + name is not None and name in self.parse_dates + ) + + def _extract_multi_indexer_columns( + self, header, index_names, col_names, passed_names=False + ): + """ extract and return the names, index_names, col_names + header is a list-of-lists returned from the parsers """ + if len(header) < 2: + return header[0], index_names, col_names, passed_names + + # the names are the tuples of the header that are not the index cols + # 0 is the name of the index, assuming index_col is a list of column + # numbers + ic = self.index_col + if ic is None: + ic = [] + + if not isinstance(ic, (list, tuple, np.ndarray)): + ic = [ic] + sic = set(ic) + + # clean the index_names + index_names = header.pop(-1) + index_names, names, index_col = _clean_index_names( + index_names, self.index_col, self.unnamed_cols + ) + + # extract the columns + field_count = len(header[0]) + + def extract(r): + return tuple(r[i] for i in range(field_count) if i not in sic) + + columns = list(zip(*(extract(r) for r in header))) + names = ic + columns + + # If we find unnamed columns all in a single + # level, then our header was too long. + for n in range(len(columns[0])): + if all(ensure_str(col[n]) in self.unnamed_cols for col in columns): + raise ParserError( + "Passed header=[{header}] are too many rows for this " + "multi_index of columns".format( + header=",".join(str(x) for x in self.header) + ) + ) + + # Clean the column names (if we have an index_col). + if len(ic): + col_names = [ + r[0] if (len(r[0]) and r[0] not in self.unnamed_cols) else None + for r in header + ] + else: + col_names = [None] * len(header) + + passed_names = True + + return names, index_names, col_names, passed_names + + def _maybe_dedup_names(self, names): + # see gh-7160 and gh-9424: this helps to provide + # immediate alleviation of the duplicate names + # issue and appears to be satisfactory to users, + # but ultimately, not needing to butcher the names + # would be nice! + if self.mangle_dupe_cols: + names = list(names) # so we can index + counts = defaultdict(int) + is_potential_mi = _is_potential_multi_index(names) + + for i, col in enumerate(names): + cur_count = counts[col] + + while cur_count > 0: + counts[col] = cur_count + 1 + + if is_potential_mi: + col = col[:-1] + (f"{col[-1]}.{cur_count}",) + else: + col = f"{col}.{cur_count}" + cur_count = counts[col] + + names[i] = col + counts[col] = cur_count + 1 + + return names + + def _maybe_make_multi_index_columns(self, columns, col_names=None): + # possibly create a column mi here + if _is_potential_multi_index(columns): + columns = MultiIndex.from_tuples(columns, names=col_names) + return columns + + def _make_index(self, data, alldata, columns, indexnamerow=False): + if not _is_index_col(self.index_col) or not self.index_col: + index = None + + elif not self._has_complex_date_col: + index = self._get_simple_index(alldata, columns) + index = self._agg_index(index) + elif self._has_complex_date_col: + if not self._name_processed: + (self.index_names, _, self.index_col) = _clean_index_names( + list(columns), self.index_col, self.unnamed_cols + ) + self._name_processed = True + index = self._get_complex_date_index(data, columns) + index = self._agg_index(index, try_parse_dates=False) + + # add names for the index + if indexnamerow: + coffset = len(indexnamerow) - len(columns) + index = index.set_names(indexnamerow[:coffset]) + + # maybe create a mi on the columns + columns = self._maybe_make_multi_index_columns(columns, self.col_names) + + return index, columns + + _implicit_index = False + + def _get_simple_index(self, data, columns): + def ix(col): + if not isinstance(col, str): + return col + raise ValueError(f"Index {col} invalid") + + to_remove = [] + index = [] + for idx in self.index_col: + i = ix(idx) + to_remove.append(i) + index.append(data[i]) + + # remove index items from content and columns, don't pop in + # loop + for i in sorted(to_remove, reverse=True): + data.pop(i) + if not self._implicit_index: + columns.pop(i) + + return index + + def _get_complex_date_index(self, data, col_names): + def _get_name(icol): + if isinstance(icol, str): + return icol + + if col_names is None: + raise ValueError(f"Must supply column order to use {icol!s} as index") + + for i, c in enumerate(col_names): + if i == icol: + return c + + to_remove = [] + index = [] + for idx in self.index_col: + name = _get_name(idx) + to_remove.append(name) + index.append(data[name]) + + # remove index items from content and columns, don't pop in + # loop + for c in sorted(to_remove, reverse=True): + data.pop(c) + col_names.remove(c) + + return index + + def _agg_index(self, index, try_parse_dates=True): + arrays = [] + + for i, arr in enumerate(index): + + if try_parse_dates and self._should_parse_dates(i): + arr = self._date_conv(arr) + + if self.na_filter: + col_na_values = self.na_values + col_na_fvalues = self.na_fvalues + else: + col_na_values = set() + col_na_fvalues = set() + + if isinstance(self.na_values, dict): + col_name = self.index_names[i] + if col_name is not None: + col_na_values, col_na_fvalues = _get_na_values( + col_name, self.na_values, self.na_fvalues, self.keep_default_na + ) + + arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues) + arrays.append(arr) + + names = self.index_names + index = ensure_index_from_sequences(arrays, names) + + return index + + def _convert_to_ndarrays( + self, dct, na_values, na_fvalues, verbose=False, converters=None, dtypes=None + ): + result = {} + for c, values in dct.items(): + conv_f = None if converters is None else converters.get(c, None) + if isinstance(dtypes, dict): + cast_type = dtypes.get(c, None) + else: + # single dtype or None + cast_type = dtypes + + if self.na_filter: + col_na_values, col_na_fvalues = _get_na_values( + c, na_values, na_fvalues, self.keep_default_na + ) + else: + col_na_values, col_na_fvalues = set(), set() + + if conv_f is not None: + # conv_f applied to data before inference + if cast_type is not None: + warnings.warn( + ( + "Both a converter and dtype were specified " + f"for column {c} - only the converter will " + "be used" + ), + ParserWarning, + stacklevel=7, + ) + + try: + values = lib.map_infer(values, conv_f) + except ValueError: + mask = algorithms.isin(values, list(na_values)).view(np.uint8) + values = lib.map_infer_mask(values, conv_f, mask) + + cvals, na_count = self._infer_types( + values, set(col_na_values) | col_na_fvalues, try_num_bool=False + ) + else: + is_str_or_ea_dtype = is_string_dtype( + cast_type + ) or is_extension_array_dtype(cast_type) + # skip inference if specified dtype is object + # or casting to an EA + try_num_bool = not (cast_type and is_str_or_ea_dtype) + + # general type inference and conversion + cvals, na_count = self._infer_types( + values, set(col_na_values) | col_na_fvalues, try_num_bool + ) + + # type specified in dtype param or cast_type is an EA + if cast_type and ( + not is_dtype_equal(cvals, cast_type) + or is_extension_array_dtype(cast_type) + ): + try: + if ( + is_bool_dtype(cast_type) + and not is_categorical_dtype(cast_type) + and na_count > 0 + ): + raise ValueError(f"Bool column has NA values in column {c}") + except (AttributeError, TypeError): + # invalid input to is_bool_dtype + pass + cvals = self._cast_types(cvals, cast_type, c) + + result[c] = cvals + if verbose and na_count: + print(f"Filled {na_count} NA values in column {c!s}") + return result + + def _infer_types(self, values, na_values, try_num_bool=True): + """ + Infer types of values, possibly casting + + Parameters + ---------- + values : ndarray + na_values : set + try_num_bool : bool, default try + try to cast values to numeric (first preference) or boolean + + Returns + ------- + converted : ndarray + na_count : int + """ + na_count = 0 + if issubclass(values.dtype.type, (np.number, np.bool_)): + mask = algorithms.isin(values, list(na_values)) + na_count = mask.sum() + if na_count > 0: + if is_integer_dtype(values): + values = values.astype(np.float64) + np.putmask(values, mask, np.nan) + return values, na_count + + if try_num_bool and is_object_dtype(values.dtype): + # exclude e.g DatetimeIndex here + try: + result = lib.maybe_convert_numeric(values, na_values, False) + except (ValueError, TypeError): + # e.g. encountering datetime string gets ValueError + # TypeError can be raised in floatify + result = values + na_count = parsers.sanitize_objects(result, na_values, False) + else: + na_count = isna(result).sum() + else: + result = values + if values.dtype == np.object_: + na_count = parsers.sanitize_objects(values, na_values, False) + + if result.dtype == np.object_ and try_num_bool: + result = libops.maybe_convert_bool( + np.asarray(values), + true_values=self.true_values, + false_values=self.false_values, + ) + + return result, na_count + + def _cast_types(self, values, cast_type, column): + """ + Cast values to specified type + + Parameters + ---------- + values : ndarray + cast_type : string or np.dtype + dtype to cast values to + column : string + column name - used only for error reporting + + Returns + ------- + converted : ndarray + """ + + if is_categorical_dtype(cast_type): + known_cats = ( + isinstance(cast_type, CategoricalDtype) + and cast_type.categories is not None + ) + + if not is_object_dtype(values) and not known_cats: + # XXX this is for consistency with + # c-parser which parses all categories + # as strings + values = astype_nansafe(values, str) + + cats = Index(values).unique().dropna() + values = Categorical._from_inferred_categories( + cats, cats.get_indexer(values), cast_type, true_values=self.true_values + ) + + # use the EA's implementation of casting + elif is_extension_array_dtype(cast_type): + # ensure cast_type is an actual dtype and not a string + cast_type = pandas_dtype(cast_type) + array_type = cast_type.construct_array_type() + try: + return array_type._from_sequence_of_strings(values, dtype=cast_type) + except NotImplementedError: + raise NotImplementedError( + f"Extension Array: {array_type} must implement " + "_from_sequence_of_strings in order " + "to be used in parser methods" + ) + + else: + try: + values = astype_nansafe(values, cast_type, copy=True, skipna=True) + except ValueError: + raise ValueError( + f"Unable to convert column {column} to type {cast_type}" + ) + return values + + def _do_date_conversions(self, names, data): + # returns data, columns + + if self.parse_dates is not None: + data, names = _process_date_conversion( + data, + self._date_conv, + self.parse_dates, + self.index_col, + self.index_names, + names, + keep_date_col=self.keep_date_col, + ) + + return names, data + + +class CParserWrapper(ParserBase): + """ + + """ + + def __init__(self, src, **kwds): + self.kwds = kwds + kwds = kwds.copy() + + ParserBase.__init__(self, kwds) + + encoding = kwds.get("encoding") + + if kwds.get("compression") is None and encoding: + if isinstance(src, str): + src = open(src, "rb") + self.handles.append(src) + + # Handle the file object with universal line mode enabled. + # We will handle the newline character ourselves later on. + if hasattr(src, "read") and not hasattr(src, "encoding"): + src = TextIOWrapper(src, encoding=encoding, newline="") + + kwds["encoding"] = "utf-8" + + # #2442 + kwds["allow_leading_cols"] = self.index_col is not False + + # GH20529, validate usecol arg before TextReader + self.usecols, self.usecols_dtype = _validate_usecols_arg(kwds["usecols"]) + kwds["usecols"] = self.usecols + + self._reader = parsers.TextReader(src, **kwds) + self.unnamed_cols = self._reader.unnamed_cols + + passed_names = self.names is None + + if self._reader.header is None: + self.names = None + else: + if len(self._reader.header) > 1: + # we have a multi index in the columns + ( + self.names, + self.index_names, + self.col_names, + passed_names, + ) = self._extract_multi_indexer_columns( + self._reader.header, self.index_names, self.col_names, passed_names + ) + else: + self.names = list(self._reader.header[0]) + + if self.names is None: + if self.prefix: + self.names = [ + f"{self.prefix}{i}" for i in range(self._reader.table_width) + ] + else: + self.names = list(range(self._reader.table_width)) + + # gh-9755 + # + # need to set orig_names here first + # so that proper indexing can be done + # with _set_noconvert_columns + # + # once names has been filtered, we will + # then set orig_names again to names + self.orig_names = self.names[:] + + if self.usecols: + usecols = _evaluate_usecols(self.usecols, self.orig_names) + + # GH 14671 + if self.usecols_dtype == "string" and not set(usecols).issubset( + self.orig_names + ): + _validate_usecols_names(usecols, self.orig_names) + + if len(self.names) > len(usecols): + self.names = [ + n + for i, n in enumerate(self.names) + if (i in usecols or n in usecols) + ] + + if len(self.names) < len(usecols): + _validate_usecols_names(usecols, self.names) + + self._set_noconvert_columns() + + self.orig_names = self.names + + if not self._has_complex_date_col: + if self._reader.leading_cols == 0 and _is_index_col(self.index_col): + + self._name_processed = True + (index_names, self.names, self.index_col) = _clean_index_names( + self.names, self.index_col, self.unnamed_cols + ) + + if self.index_names is None: + self.index_names = index_names + + if self._reader.header is None and not passed_names: + self.index_names = [None] * len(self.index_names) + + self._implicit_index = self._reader.leading_cols > 0 + + def close(self): + for f in self.handles: + f.close() + + # close additional handles opened by C parser (for compression) + try: + self._reader.close() + except ValueError: + pass + + def _set_noconvert_columns(self): + """ + Set the columns that should not undergo dtype conversions. + + Currently, any column that is involved with date parsing will not + undergo such conversions. + """ + names = self.orig_names + if self.usecols_dtype == "integer": + # A set of integers will be converted to a list in + # the correct order every single time. + usecols = list(self.usecols) + usecols.sort() + elif callable(self.usecols) or self.usecols_dtype not in ("empty", None): + # The names attribute should have the correct columns + # in the proper order for indexing with parse_dates. + usecols = self.names[:] + else: + # Usecols is empty. + usecols = None + + def _set(x): + if usecols is not None and is_integer(x): + x = usecols[x] + + if not is_integer(x): + x = names.index(x) + + self._reader.set_noconvert(x) + + if isinstance(self.parse_dates, list): + for val in self.parse_dates: + if isinstance(val, list): + for k in val: + _set(k) + else: + _set(val) + + elif isinstance(self.parse_dates, dict): + for val in self.parse_dates.values(): + if isinstance(val, list): + for k in val: + _set(k) + else: + _set(val) + + elif self.parse_dates: + if isinstance(self.index_col, list): + for k in self.index_col: + _set(k) + elif self.index_col is not None: + _set(self.index_col) + + def set_error_bad_lines(self, status): + self._reader.set_error_bad_lines(int(status)) + + def read(self, nrows=None): + try: + data = self._reader.read(nrows) + except StopIteration: + if self._first_chunk: + self._first_chunk = False + names = self._maybe_dedup_names(self.orig_names) + index, columns, col_dict = _get_empty_meta( + names, + self.index_col, + self.index_names, + dtype=self.kwds.get("dtype"), + ) + columns = self._maybe_make_multi_index_columns(columns, self.col_names) + + if self.usecols is not None: + columns = self._filter_usecols(columns) + + col_dict = dict( + filter(lambda item: item[0] in columns, col_dict.items()) + ) + + return index, columns, col_dict + + else: + raise + + # Done with first read, next time raise StopIteration + self._first_chunk = False + + names = self.names + + if self._reader.leading_cols: + if self._has_complex_date_col: + raise NotImplementedError("file structure not yet supported") + + # implicit index, no index names + arrays = [] + + for i in range(self._reader.leading_cols): + if self.index_col is None: + values = data.pop(i) + else: + values = data.pop(self.index_col[i]) + + values = self._maybe_parse_dates(values, i, try_parse_dates=True) + arrays.append(values) + + index = ensure_index_from_sequences(arrays) + + if self.usecols is not None: + names = self._filter_usecols(names) + + names = self._maybe_dedup_names(names) + + # rename dict keys + data = sorted(data.items()) + data = {k: v for k, (i, v) in zip(names, data)} + + names, data = self._do_date_conversions(names, data) + + else: + # rename dict keys + data = sorted(data.items()) + + # ugh, mutation + names = list(self.orig_names) + names = self._maybe_dedup_names(names) + + if self.usecols is not None: + names = self._filter_usecols(names) + + # columns as list + alldata = [x[1] for x in data] + + data = {k: v for k, (i, v) in zip(names, data)} + + names, data = self._do_date_conversions(names, data) + index, names = self._make_index(data, alldata, names) + + # maybe create a mi on the columns + names = self._maybe_make_multi_index_columns(names, self.col_names) + + return index, names, data + + def _filter_usecols(self, names): + # hackish + usecols = _evaluate_usecols(self.usecols, names) + if usecols is not None and len(names) != len(usecols): + names = [ + name for i, name in enumerate(names) if i in usecols or name in usecols + ] + return names + + def _get_index_names(self): + names = list(self._reader.header[0]) + idx_names = None + + if self._reader.leading_cols == 0 and self.index_col is not None: + (idx_names, names, self.index_col) = _clean_index_names( + names, self.index_col, self.unnamed_cols + ) + + return names, idx_names + + def _maybe_parse_dates(self, values, index, try_parse_dates=True): + if try_parse_dates and self._should_parse_dates(index): + values = self._date_conv(values) + return values + + +def TextParser(*args, **kwds): + """ + Converts lists of lists/tuples into DataFrames with proper type inference + and optional (e.g. string to datetime) conversion. Also enables iterating + lazily over chunks of large files + + Parameters + ---------- + data : file-like object or list + delimiter : separator character to use + dialect : str or csv.Dialect instance, optional + Ignored if delimiter is longer than 1 character + names : sequence, default + header : int, default 0 + Row to use to parse column labels. Defaults to the first row. Prior + rows will be discarded + index_col : int or list, optional + Column or columns to use as the (possibly hierarchical) index + has_index_names: bool, default False + True if the cols defined in index_col have an index name and are + not in the header. + na_values : scalar, str, list-like, or dict, optional + Additional strings to recognize as NA/NaN. + keep_default_na : bool, default True + thousands : str, optional + Thousands separator + comment : str, optional + Comment out remainder of line + parse_dates : bool, default False + keep_date_col : bool, default False + date_parser : function, optional + skiprows : list of integers + Row numbers to skip + skipfooter : int + Number of line at bottom of file to skip + converters : dict, optional + Dict of functions for converting values in certain columns. Keys can + either be integers or column labels, values are functions that take one + input argument, the cell (not column) content, and return the + transformed content. + encoding : str, optional + Encoding to use for UTF when reading/writing (ex. 'utf-8') + squeeze : bool, default False + returns Series if only one column. + infer_datetime_format: bool, default False + If True and `parse_dates` is True for a column, try to infer the + datetime format based on the first datetime string. If the format + can be inferred, there often will be a large parsing speed-up. + float_precision : str, optional + Specifies which converter the C engine should use for floating-point + values. The options are None for the ordinary converter, + 'high' for the high-precision converter, and 'round_trip' for the + round-trip converter. + """ + kwds["engine"] = "python" + return TextFileReader(*args, **kwds) + + +def count_empty_vals(vals): + return sum(1 for v in vals if v == "" or v is None) + + +class PythonParser(ParserBase): + def __init__(self, f, **kwds): + """ + Workhorse function for processing nested list into DataFrame + """ + ParserBase.__init__(self, kwds) + + self.data = None + self.buf = [] + self.pos = 0 + self.line_pos = 0 + + self.encoding = kwds["encoding"] + self.compression = kwds["compression"] + self.memory_map = kwds["memory_map"] + self.skiprows = kwds["skiprows"] + + if callable(self.skiprows): + self.skipfunc = self.skiprows + else: + self.skipfunc = lambda x: x in self.skiprows + + self.skipfooter = _validate_skipfooter_arg(kwds["skipfooter"]) + self.delimiter = kwds["delimiter"] + + self.quotechar = kwds["quotechar"] + if isinstance(self.quotechar, str): + self.quotechar = str(self.quotechar) + + self.escapechar = kwds["escapechar"] + self.doublequote = kwds["doublequote"] + self.skipinitialspace = kwds["skipinitialspace"] + self.lineterminator = kwds["lineterminator"] + self.quoting = kwds["quoting"] + self.usecols, _ = _validate_usecols_arg(kwds["usecols"]) + self.skip_blank_lines = kwds["skip_blank_lines"] + + self.warn_bad_lines = kwds["warn_bad_lines"] + self.error_bad_lines = kwds["error_bad_lines"] + + self.names_passed = kwds["names"] or None + + self.has_index_names = False + if "has_index_names" in kwds: + self.has_index_names = kwds["has_index_names"] + + self.verbose = kwds["verbose"] + self.converters = kwds["converters"] + + self.dtype = kwds["dtype"] + self.thousands = kwds["thousands"] + self.decimal = kwds["decimal"] + + self.comment = kwds["comment"] + self._comment_lines = [] + + f, handles = get_handle( + f, + "r", + encoding=self.encoding, + compression=self.compression, + memory_map=self.memory_map, + ) + self.handles.extend(handles) + + # Set self.data to something that can read lines. + if hasattr(f, "readline"): + self._make_reader(f) + else: + self.data = f + + # Get columns in two steps: infer from data, then + # infer column indices from self.usecols if it is specified. + self._col_indices = None + ( + self.columns, + self.num_original_columns, + self.unnamed_cols, + ) = self._infer_columns() + + # Now self.columns has the set of columns that we will process. + # The original set is stored in self.original_columns. + if len(self.columns) > 1: + # we are processing a multi index column + ( + self.columns, + self.index_names, + self.col_names, + _, + ) = self._extract_multi_indexer_columns( + self.columns, self.index_names, self.col_names + ) + # Update list of original names to include all indices. + self.num_original_columns = len(self.columns) + else: + self.columns = self.columns[0] + + # get popped off for index + self.orig_names = list(self.columns) + + # needs to be cleaned/refactored + # multiple date column thing turning into a real spaghetti factory + + if not self._has_complex_date_col: + (index_names, self.orig_names, self.columns) = self._get_index_name( + self.columns + ) + self._name_processed = True + if self.index_names is None: + self.index_names = index_names + + if self.parse_dates: + self._no_thousands_columns = self._set_no_thousands_columns() + else: + self._no_thousands_columns = None + + if len(self.decimal) != 1: + raise ValueError("Only length-1 decimal markers supported") + + if self.thousands is None: + self.nonnum = re.compile(fr"[^-^0-9^{self.decimal}]+") + else: + self.nonnum = re.compile(fr"[^-^0-9^{self.thousands}^{self.decimal}]+") + + def _set_no_thousands_columns(self): + # Create a set of column ids that are not to be stripped of thousands + # operators. + noconvert_columns = set() + + def _set(x): + if is_integer(x): + noconvert_columns.add(x) + else: + noconvert_columns.add(self.columns.index(x)) + + if isinstance(self.parse_dates, list): + for val in self.parse_dates: + if isinstance(val, list): + for k in val: + _set(k) + else: + _set(val) + + elif isinstance(self.parse_dates, dict): + for val in self.parse_dates.values(): + if isinstance(val, list): + for k in val: + _set(k) + else: + _set(val) + + elif self.parse_dates: + if isinstance(self.index_col, list): + for k in self.index_col: + _set(k) + elif self.index_col is not None: + _set(self.index_col) + + return noconvert_columns + + def _make_reader(self, f): + sep = self.delimiter + + if sep is None or len(sep) == 1: + if self.lineterminator: + raise ValueError( + "Custom line terminators not supported in python parser (yet)" + ) + + class MyDialect(csv.Dialect): + delimiter = self.delimiter + quotechar = self.quotechar + escapechar = self.escapechar + doublequote = self.doublequote + skipinitialspace = self.skipinitialspace + quoting = self.quoting + lineterminator = "\n" + + dia = MyDialect + + sniff_sep = True + + if sep is not None: + sniff_sep = False + dia.delimiter = sep + # attempt to sniff the delimiter + if sniff_sep: + line = f.readline() + while self.skipfunc(self.pos): + self.pos += 1 + line = f.readline() + + line = self._check_comments([line])[0] + + self.pos += 1 + self.line_pos += 1 + sniffed = csv.Sniffer().sniff(line) + dia.delimiter = sniffed.delimiter + + # Note: self.encoding is irrelevant here + line_rdr = csv.reader(StringIO(line), dialect=dia) + self.buf.extend(list(line_rdr)) + + # Note: self.encoding is irrelevant here + reader = csv.reader(f, dialect=dia, strict=True) + + else: + + def _read(): + line = f.readline() + pat = re.compile(sep) + + yield pat.split(line.strip()) + + for line in f: + yield pat.split(line.strip()) + + reader = _read() + + self.data = reader + + def read(self, rows=None): + try: + content = self._get_lines(rows) + except StopIteration: + if self._first_chunk: + content = [] + else: + raise + + # done with first read, next time raise StopIteration + self._first_chunk = False + + columns = list(self.orig_names) + if not len(content): # pragma: no cover + # DataFrame with the right metadata, even though it's length 0 + names = self._maybe_dedup_names(self.orig_names) + index, columns, col_dict = _get_empty_meta( + names, self.index_col, self.index_names, self.dtype + ) + columns = self._maybe_make_multi_index_columns(columns, self.col_names) + return index, columns, col_dict + + # handle new style for names in index + count_empty_content_vals = count_empty_vals(content[0]) + indexnamerow = None + if self.has_index_names and count_empty_content_vals == len(columns): + indexnamerow = content[0] + content = content[1:] + + alldata = self._rows_to_cols(content) + data = self._exclude_implicit_index(alldata) + + columns = self._maybe_dedup_names(self.columns) + columns, data = self._do_date_conversions(columns, data) + + data = self._convert_data(data) + index, columns = self._make_index(data, alldata, columns, indexnamerow) + + return index, columns, data + + def _exclude_implicit_index(self, alldata): + names = self._maybe_dedup_names(self.orig_names) + + if self._implicit_index: + excl_indices = self.index_col + + data = {} + offset = 0 + for i, col in enumerate(names): + while i + offset in excl_indices: + offset += 1 + data[col] = alldata[i + offset] + else: + data = {k: v for k, v in zip(names, alldata)} + + return data + + # legacy + def get_chunk(self, size=None): + if size is None: + size = self.chunksize + return self.read(rows=size) + + def _convert_data(self, data): + # apply converters + def _clean_mapping(mapping): + "converts col numbers to names" + clean = {} + for col, v in mapping.items(): + if isinstance(col, int) and col not in self.orig_names: + col = self.orig_names[col] + clean[col] = v + return clean + + clean_conv = _clean_mapping(self.converters) + if not isinstance(self.dtype, dict): + # handles single dtype applied to all columns + clean_dtypes = self.dtype + else: + clean_dtypes = _clean_mapping(self.dtype) + + # Apply NA values. + clean_na_values = {} + clean_na_fvalues = {} + + if isinstance(self.na_values, dict): + for col in self.na_values: + na_value = self.na_values[col] + na_fvalue = self.na_fvalues[col] + + if isinstance(col, int) and col not in self.orig_names: + col = self.orig_names[col] + + clean_na_values[col] = na_value + clean_na_fvalues[col] = na_fvalue + else: + clean_na_values = self.na_values + clean_na_fvalues = self.na_fvalues + + return self._convert_to_ndarrays( + data, + clean_na_values, + clean_na_fvalues, + self.verbose, + clean_conv, + clean_dtypes, + ) + + def _infer_columns(self): + names = self.names + num_original_columns = 0 + clear_buffer = True + unnamed_cols = set() + + if self.header is not None: + header = self.header + + if isinstance(header, (list, tuple, np.ndarray)): + have_mi_columns = len(header) > 1 + # we have a mi columns, so read an extra line + if have_mi_columns: + header = list(header) + [header[-1] + 1] + else: + have_mi_columns = False + header = [header] + + columns = [] + for level, hr in enumerate(header): + try: + line = self._buffered_line() + + while self.line_pos <= hr: + line = self._next_line() + + except StopIteration: + if self.line_pos < hr: + raise ValueError( + f"Passed header={hr} but only {self.line_pos + 1} lines in " + "file" + ) + + # We have an empty file, so check + # if columns are provided. That will + # serve as the 'line' for parsing + if have_mi_columns and hr > 0: + if clear_buffer: + self._clear_buffer() + columns.append([None] * len(columns[-1])) + return columns, num_original_columns, unnamed_cols + + if not self.names: + raise EmptyDataError("No columns to parse from file") + + line = self.names[:] + + this_columns = [] + this_unnamed_cols = [] + + for i, c in enumerate(line): + if c == "": + if have_mi_columns: + col_name = f"Unnamed: {i}_level_{level}" + else: + col_name = f"Unnamed: {i}" + + this_unnamed_cols.append(i) + this_columns.append(col_name) + else: + this_columns.append(c) + + if not have_mi_columns and self.mangle_dupe_cols: + counts = defaultdict(int) + + for i, col in enumerate(this_columns): + cur_count = counts[col] + + while cur_count > 0: + counts[col] = cur_count + 1 + col = f"{col}.{cur_count}" + cur_count = counts[col] + + this_columns[i] = col + counts[col] = cur_count + 1 + elif have_mi_columns: + + # if we have grabbed an extra line, but its not in our + # format so save in the buffer, and create an blank extra + # line for the rest of the parsing code + if hr == header[-1]: + lc = len(this_columns) + ic = len(self.index_col) if self.index_col is not None else 0 + unnamed_count = len(this_unnamed_cols) + + if lc != unnamed_count and lc - ic > unnamed_count: + clear_buffer = False + this_columns = [None] * lc + self.buf = [self.buf[-1]] + + columns.append(this_columns) + unnamed_cols.update({this_columns[i] for i in this_unnamed_cols}) + + if len(columns) == 1: + num_original_columns = len(this_columns) + + if clear_buffer: + self._clear_buffer() + + if names is not None: + if (self.usecols is not None and len(names) != len(self.usecols)) or ( + self.usecols is None and len(names) != len(columns[0]) + ): + raise ValueError( + "Number of passed names did not match " + "number of header fields in the file" + ) + if len(columns) > 1: + raise TypeError("Cannot pass names with multi-index columns") + + if self.usecols is not None: + # Set _use_cols. We don't store columns because they are + # overwritten. + self._handle_usecols(columns, names) + else: + self._col_indices = None + num_original_columns = len(names) + columns = [names] + else: + columns = self._handle_usecols(columns, columns[0]) + else: + try: + line = self._buffered_line() + + except StopIteration: + if not names: + raise EmptyDataError("No columns to parse from file") + + line = names[:] + + ncols = len(line) + num_original_columns = ncols + + if not names: + if self.prefix: + columns = [[f"{self.prefix}{i}" for i in range(ncols)]] + else: + columns = [list(range(ncols))] + columns = self._handle_usecols(columns, columns[0]) + else: + if self.usecols is None or len(names) >= num_original_columns: + columns = self._handle_usecols([names], names) + num_original_columns = len(names) + else: + if not callable(self.usecols) and len(names) != len(self.usecols): + raise ValueError( + "Number of passed names did not match number of " + "header fields in the file" + ) + # Ignore output but set used columns. + self._handle_usecols([names], names) + columns = [names] + num_original_columns = ncols + + return columns, num_original_columns, unnamed_cols + + def _handle_usecols(self, columns, usecols_key): + """ + Sets self._col_indices + + usecols_key is used if there are string usecols. + """ + if self.usecols is not None: + if callable(self.usecols): + col_indices = _evaluate_usecols(self.usecols, usecols_key) + elif any(isinstance(u, str) for u in self.usecols): + if len(columns) > 1: + raise ValueError( + "If using multiple headers, usecols must be integers." + ) + col_indices = [] + + for col in self.usecols: + if isinstance(col, str): + try: + col_indices.append(usecols_key.index(col)) + except ValueError: + _validate_usecols_names(self.usecols, usecols_key) + else: + col_indices.append(col) + else: + col_indices = self.usecols + + columns = [ + [n for i, n in enumerate(column) if i in col_indices] + for column in columns + ] + self._col_indices = col_indices + return columns + + def _buffered_line(self): + """ + Return a line from buffer, filling buffer if required. + """ + if len(self.buf) > 0: + return self.buf[0] + else: + return self._next_line() + + def _check_for_bom(self, first_row): + """ + Checks whether the file begins with the BOM character. + If it does, remove it. In addition, if there is quoting + in the field subsequent to the BOM, remove it as well + because it technically takes place at the beginning of + the name, not the middle of it. + """ + # first_row will be a list, so we need to check + # that that list is not empty before proceeding. + if not first_row: + return first_row + + # The first element of this row is the one that could have the + # BOM that we want to remove. Check that the first element is a + # string before proceeding. + if not isinstance(first_row[0], str): + return first_row + + # Check that the string is not empty, as that would + # obviously not have a BOM at the start of it. + if not first_row[0]: + return first_row + + # Since the string is non-empty, check that it does + # in fact begin with a BOM. + first_elt = first_row[0][0] + if first_elt != _BOM: + return first_row + + first_row_bom = first_row[0] + + if len(first_row_bom) > 1 and first_row_bom[1] == self.quotechar: + start = 2 + quote = first_row_bom[1] + end = first_row_bom[2:].index(quote) + 2 + + # Extract the data between the quotation marks + new_row = first_row_bom[start:end] + + # Extract any remaining data after the second + # quotation mark. + if len(first_row_bom) > end + 1: + new_row += first_row_bom[end + 1 :] + return [new_row] + first_row[1:] + + elif len(first_row_bom) > 1: + return [first_row_bom[1:]] + else: + # First row is just the BOM, so we + # return an empty string. + return [""] + + def _is_line_empty(self, line): + """ + Check if a line is empty or not. + + Parameters + ---------- + line : str, array-like + The line of data to check. + + Returns + ------- + boolean : Whether or not the line is empty. + """ + return not line or all(not x for x in line) + + def _next_line(self): + if isinstance(self.data, list): + while self.skipfunc(self.pos): + self.pos += 1 + + while True: + try: + line = self._check_comments([self.data[self.pos]])[0] + self.pos += 1 + # either uncommented or blank to begin with + if not self.skip_blank_lines and ( + self._is_line_empty(self.data[self.pos - 1]) or line + ): + break + elif self.skip_blank_lines: + ret = self._remove_empty_lines([line]) + if ret: + line = ret[0] + break + except IndexError: + raise StopIteration + else: + while self.skipfunc(self.pos): + self.pos += 1 + next(self.data) + + while True: + orig_line = self._next_iter_line(row_num=self.pos + 1) + self.pos += 1 + + if orig_line is not None: + line = self._check_comments([orig_line])[0] + + if self.skip_blank_lines: + ret = self._remove_empty_lines([line]) + + if ret: + line = ret[0] + break + elif self._is_line_empty(orig_line) or line: + break + + # This was the first line of the file, + # which could contain the BOM at the + # beginning of it. + if self.pos == 1: + line = self._check_for_bom(line) + + self.line_pos += 1 + self.buf.append(line) + return line + + def _alert_malformed(self, msg, row_num): + """ + Alert a user about a malformed row. + + If `self.error_bad_lines` is True, the alert will be `ParserError`. + If `self.warn_bad_lines` is True, the alert will be printed out. + + Parameters + ---------- + msg : The error message to display. + row_num : The row number where the parsing error occurred. + Because this row number is displayed, we 1-index, + even though we 0-index internally. + """ + + if self.error_bad_lines: + raise ParserError(msg) + elif self.warn_bad_lines: + base = f"Skipping line {row_num}: " + sys.stderr.write(base + msg + "\n") + + def _next_iter_line(self, row_num): + """ + Wrapper around iterating through `self.data` (CSV source). + + When a CSV error is raised, we check for specific + error messages that allow us to customize the + error message displayed to the user. + + Parameters + ---------- + row_num : The row number of the line being parsed. + """ + + try: + return next(self.data) + except csv.Error as e: + if self.warn_bad_lines or self.error_bad_lines: + msg = str(e) + + if "NULL byte" in msg or "line contains NUL" in msg: + msg = ( + "NULL byte detected. This byte " + "cannot be processed in Python's " + "native csv library at the moment, " + "so please pass in engine='c' instead" + ) + + if self.skipfooter > 0: + reason = ( + "Error could possibly be due to " + "parsing errors in the skipped footer rows " + "(the skipfooter keyword is only applied " + "after Python's csv library has parsed " + "all rows)." + ) + msg += ". " + reason + + self._alert_malformed(msg, row_num) + return None + + def _check_comments(self, lines): + if self.comment is None: + return lines + ret = [] + for l in lines: + rl = [] + for x in l: + if not isinstance(x, str) or self.comment not in x: + rl.append(x) + else: + x = x[: x.find(self.comment)] + if len(x) > 0: + rl.append(x) + break + ret.append(rl) + return ret + + def _remove_empty_lines(self, lines): + """ + Iterate through the lines and remove any that are + either empty or contain only one whitespace value + + Parameters + ---------- + lines : array-like + The array of lines that we are to filter. + + Returns + ------- + filtered_lines : array-like + The same array of lines with the "empty" ones removed. + """ + + ret = [] + for l in lines: + # Remove empty lines and lines with only one whitespace value + if ( + len(l) > 1 + or len(l) == 1 + and (not isinstance(l[0], str) or l[0].strip()) + ): + ret.append(l) + return ret + + def _check_thousands(self, lines): + if self.thousands is None: + return lines + + return self._search_replace_num_columns( + lines=lines, search=self.thousands, replace="" + ) + + def _search_replace_num_columns(self, lines, search, replace): + ret = [] + for l in lines: + rl = [] + for i, x in enumerate(l): + if ( + not isinstance(x, str) + or search not in x + or (self._no_thousands_columns and i in self._no_thousands_columns) + or self.nonnum.search(x.strip()) + ): + rl.append(x) + else: + rl.append(x.replace(search, replace)) + ret.append(rl) + return ret + + def _check_decimal(self, lines): + if self.decimal == _parser_defaults["decimal"]: + return lines + + return self._search_replace_num_columns( + lines=lines, search=self.decimal, replace="." + ) + + def _clear_buffer(self): + self.buf = [] + + _implicit_index = False + + def _get_index_name(self, columns): + """ + Try several cases to get lines: + + 0) There are headers on row 0 and row 1 and their + total summed lengths equals the length of the next line. + Treat row 0 as columns and row 1 as indices + 1) Look for implicit index: there are more columns + on row 1 than row 0. If this is true, assume that row + 1 lists index columns and row 0 lists normal columns. + 2) Get index from the columns if it was listed. + """ + orig_names = list(columns) + columns = list(columns) + + try: + line = self._next_line() + except StopIteration: + line = None + + try: + next_line = self._next_line() + except StopIteration: + next_line = None + + # implicitly index_col=0 b/c 1 fewer column names + implicit_first_cols = 0 + if line is not None: + # leave it 0, #2442 + # Case 1 + if self.index_col is not False: + implicit_first_cols = len(line) - self.num_original_columns + + # Case 0 + if next_line is not None: + if len(next_line) == len(line) + self.num_original_columns: + # column and index names on diff rows + self.index_col = list(range(len(line))) + self.buf = self.buf[1:] + + for c in reversed(line): + columns.insert(0, c) + + # Update list of original names to include all indices. + orig_names = list(columns) + self.num_original_columns = len(columns) + return line, orig_names, columns + + if implicit_first_cols > 0: + # Case 1 + self._implicit_index = True + if self.index_col is None: + self.index_col = list(range(implicit_first_cols)) + + index_name = None + + else: + # Case 2 + (index_name, columns_, self.index_col) = _clean_index_names( + columns, self.index_col, self.unnamed_cols + ) + + return index_name, orig_names, columns + + def _rows_to_cols(self, content): + col_len = self.num_original_columns + + if self._implicit_index: + col_len += len(self.index_col) + + max_len = max(len(row) for row in content) + + # Check that there are no rows with too many + # elements in their row (rows with too few + # elements are padded with NaN). + if max_len > col_len and self.index_col is not False and self.usecols is None: + + footers = self.skipfooter if self.skipfooter else 0 + bad_lines = [] + + iter_content = enumerate(content) + content_len = len(content) + content = [] + + for (i, l) in iter_content: + actual_len = len(l) + + if actual_len > col_len: + if self.error_bad_lines or self.warn_bad_lines: + row_num = self.pos - (content_len - i + footers) + bad_lines.append((row_num, actual_len)) + + if self.error_bad_lines: + break + else: + content.append(l) + + for row_num, actual_len in bad_lines: + msg = ( + f"Expected {col_len} fields in line {row_num + 1}, saw " + f"{actual_len}" + ) + if ( + self.delimiter + and len(self.delimiter) > 1 + and self.quoting != csv.QUOTE_NONE + ): + # see gh-13374 + reason = ( + "Error could possibly be due to quotes being " + "ignored when a multi-char delimiter is used." + ) + msg += ". " + reason + + self._alert_malformed(msg, row_num + 1) + + # see gh-13320 + zipped_content = list(lib.to_object_array(content, min_width=col_len).T) + + if self.usecols: + if self._implicit_index: + zipped_content = [ + a + for i, a in enumerate(zipped_content) + if ( + i < len(self.index_col) + or i - len(self.index_col) in self._col_indices + ) + ] + else: + zipped_content = [ + a for i, a in enumerate(zipped_content) if i in self._col_indices + ] + return zipped_content + + def _get_lines(self, rows=None): + lines = self.buf + new_rows = None + + # already fetched some number + if rows is not None: + # we already have the lines in the buffer + if len(self.buf) >= rows: + new_rows, self.buf = self.buf[:rows], self.buf[rows:] + + # need some lines + else: + rows -= len(self.buf) + + if new_rows is None: + if isinstance(self.data, list): + if self.pos > len(self.data): + raise StopIteration + if rows is None: + new_rows = self.data[self.pos :] + new_pos = len(self.data) + else: + new_rows = self.data[self.pos : self.pos + rows] + new_pos = self.pos + rows + + # Check for stop rows. n.b.: self.skiprows is a set. + if self.skiprows: + new_rows = [ + row + for i, row in enumerate(new_rows) + if not self.skipfunc(i + self.pos) + ] + + lines.extend(new_rows) + self.pos = new_pos + + else: + new_rows = [] + try: + if rows is not None: + for _ in range(rows): + new_rows.append(next(self.data)) + lines.extend(new_rows) + else: + rows = 0 + + while True: + new_row = self._next_iter_line(row_num=self.pos + rows + 1) + rows += 1 + + if new_row is not None: + new_rows.append(new_row) + + except StopIteration: + if self.skiprows: + new_rows = [ + row + for i, row in enumerate(new_rows) + if not self.skipfunc(i + self.pos) + ] + lines.extend(new_rows) + if len(lines) == 0: + raise + self.pos += len(new_rows) + + self.buf = [] + else: + lines = new_rows + + if self.skipfooter: + lines = lines[: -self.skipfooter] + + lines = self._check_comments(lines) + if self.skip_blank_lines: + lines = self._remove_empty_lines(lines) + lines = self._check_thousands(lines) + return self._check_decimal(lines) + + +def _make_date_converter( + date_parser=None, dayfirst=False, infer_datetime_format=False, cache_dates=True +): + def converter(*date_cols): + if date_parser is None: + strs = parsing._concat_date_cols(date_cols) + + try: + return tools.to_datetime( + ensure_object(strs), + utc=None, + dayfirst=dayfirst, + errors="ignore", + infer_datetime_format=infer_datetime_format, + cache=cache_dates, + ).to_numpy() + + except ValueError: + return tools.to_datetime( + parsing.try_parse_dates(strs, dayfirst=dayfirst), cache=cache_dates + ) + else: + try: + result = tools.to_datetime( + date_parser(*date_cols), errors="ignore", cache=cache_dates + ) + if isinstance(result, datetime.datetime): + raise Exception("scalar parser") + return result + except Exception: + try: + return tools.to_datetime( + parsing.try_parse_dates( + parsing._concat_date_cols(date_cols), + parser=date_parser, + dayfirst=dayfirst, + ), + errors="ignore", + ) + except Exception: + return generic_parser(date_parser, *date_cols) + + return converter + + +def _process_date_conversion( + data_dict, + converter, + parse_spec, + index_col, + index_names, + columns, + keep_date_col=False, +): + def _isindex(colspec): + return (isinstance(index_col, list) and colspec in index_col) or ( + isinstance(index_names, list) and colspec in index_names + ) + + new_cols = [] + new_data = {} + + orig_names = columns + columns = list(columns) + + date_cols = set() + + if parse_spec is None or isinstance(parse_spec, bool): + return data_dict, columns + + if isinstance(parse_spec, list): + # list of column lists + for colspec in parse_spec: + if is_scalar(colspec): + if isinstance(colspec, int) and colspec not in data_dict: + colspec = orig_names[colspec] + if _isindex(colspec): + continue + data_dict[colspec] = converter(data_dict[colspec]) + else: + new_name, col, old_names = _try_convert_dates( + converter, colspec, data_dict, orig_names + ) + if new_name in data_dict: + raise ValueError(f"New date column already in dict {new_name}") + new_data[new_name] = col + new_cols.append(new_name) + date_cols.update(old_names) + + elif isinstance(parse_spec, dict): + # dict of new name to column list + for new_name, colspec in parse_spec.items(): + if new_name in data_dict: + raise ValueError(f"Date column {new_name} already in dict") + + _, col, old_names = _try_convert_dates( + converter, colspec, data_dict, orig_names + ) + + new_data[new_name] = col + new_cols.append(new_name) + date_cols.update(old_names) + + data_dict.update(new_data) + new_cols.extend(columns) + + if not keep_date_col: + for c in list(date_cols): + data_dict.pop(c) + new_cols.remove(c) + + return data_dict, new_cols + + +def _try_convert_dates(parser, colspec, data_dict, columns): + colset = set(columns) + colnames = [] + + for c in colspec: + if c in colset: + colnames.append(c) + elif isinstance(c, int) and c not in columns: + colnames.append(columns[c]) + else: + colnames.append(c) + + new_name = "_".join(str(x) for x in colnames) + to_parse = [data_dict[c] for c in colnames if c in data_dict] + + new_col = parser(*to_parse) + return new_name, new_col, colnames + + +def _clean_na_values(na_values, keep_default_na=True): + + if na_values is None: + if keep_default_na: + na_values = STR_NA_VALUES + else: + na_values = set() + na_fvalues = set() + elif isinstance(na_values, dict): + old_na_values = na_values.copy() + na_values = {} # Prevent aliasing. + + # Convert the values in the na_values dictionary + # into array-likes for further use. This is also + # where we append the default NaN values, provided + # that `keep_default_na=True`. + for k, v in old_na_values.items(): + if not is_list_like(v): + v = [v] + + if keep_default_na: + v = set(v) | STR_NA_VALUES + + na_values[k] = v + na_fvalues = {k: _floatify_na_values(v) for k, v in na_values.items()} + else: + if not is_list_like(na_values): + na_values = [na_values] + na_values = _stringify_na_values(na_values) + if keep_default_na: + na_values = na_values | STR_NA_VALUES + + na_fvalues = _floatify_na_values(na_values) + + return na_values, na_fvalues + + +def _clean_index_names(columns, index_col, unnamed_cols): + if not _is_index_col(index_col): + return None, columns, index_col + + columns = list(columns) + + cp_cols = list(columns) + index_names = [] + + # don't mutate + index_col = list(index_col) + + for i, c in enumerate(index_col): + if isinstance(c, str): + index_names.append(c) + for j, name in enumerate(cp_cols): + if name == c: + index_col[i] = j + columns.remove(name) + break + else: + name = cp_cols[c] + columns.remove(name) + index_names.append(name) + + # Only clean index names that were placeholders. + for i, name in enumerate(index_names): + if isinstance(name, str) and name in unnamed_cols: + index_names[i] = None + + return index_names, columns, index_col + + +def _get_empty_meta(columns, index_col, index_names, dtype=None): + columns = list(columns) + + # Convert `dtype` to a defaultdict of some kind. + # This will enable us to write `dtype[col_name]` + # without worrying about KeyError issues later on. + if not isinstance(dtype, dict): + # if dtype == None, default will be np.object. + default_dtype = dtype or np.object + dtype = defaultdict(lambda: default_dtype) + else: + # Save a copy of the dictionary. + _dtype = dtype.copy() + dtype = defaultdict(lambda: np.object) + + # Convert column indexes to column names. + for k, v in _dtype.items(): + col = columns[k] if is_integer(k) else k + dtype[col] = v + + # Even though we have no data, the "index" of the empty DataFrame + # could for example still be an empty MultiIndex. Thus, we need to + # check whether we have any index columns specified, via either: + # + # 1) index_col (column indices) + # 2) index_names (column names) + # + # Both must be non-null to ensure a successful construction. Otherwise, + # we have to create a generic empty Index. + if (index_col is None or index_col is False) or index_names is None: + index = Index([]) + else: + data = [Series([], dtype=dtype[name]) for name in index_names] + index = ensure_index_from_sequences(data, names=index_names) + index_col.sort() + + for i, n in enumerate(index_col): + columns.pop(n - i) + + col_dict = {col_name: Series([], dtype=dtype[col_name]) for col_name in columns} + + return index, columns, col_dict + + +def _floatify_na_values(na_values): + # create float versions of the na_values + result = set() + for v in na_values: + try: + v = float(v) + if not np.isnan(v): + result.add(v) + except (TypeError, ValueError, OverflowError): + pass + return result + + +def _stringify_na_values(na_values): + """ return a stringified and numeric for these values """ + result = [] + for x in na_values: + result.append(str(x)) + result.append(x) + try: + v = float(x) + + # we are like 999 here + if v == int(v): + v = int(v) + result.append(f"{v}.0") + result.append(str(v)) + + result.append(v) + except (TypeError, ValueError, OverflowError): + pass + try: + result.append(int(x)) + except (TypeError, ValueError, OverflowError): + pass + return set(result) + + +def _get_na_values(col, na_values, na_fvalues, keep_default_na): + """ + Get the NaN values for a given column. + + Parameters + ---------- + col : str + The name of the column. + na_values : array-like, dict + The object listing the NaN values as strings. + na_fvalues : array-like, dict + The object listing the NaN values as floats. + keep_default_na : bool + If `na_values` is a dict, and the column is not mapped in the + dictionary, whether to return the default NaN values or the empty set. + + Returns + ------- + nan_tuple : A length-two tuple composed of + + 1) na_values : the string NaN values for that column. + 2) na_fvalues : the float NaN values for that column. + """ + + if isinstance(na_values, dict): + if col in na_values: + return na_values[col], na_fvalues[col] + else: + if keep_default_na: + return STR_NA_VALUES, set() + + return set(), set() + else: + return na_values, na_fvalues + + +def _get_col_names(colspec, columns): + colset = set(columns) + colnames = [] + for c in colspec: + if c in colset: + colnames.append(c) + elif isinstance(c, int): + colnames.append(columns[c]) + return colnames + + +class FixedWidthReader(abc.Iterator): + """ + A reader of fixed-width lines. + """ + + def __init__(self, f, colspecs, delimiter, comment, skiprows=None, infer_nrows=100): + self.f = f + self.buffer = None + self.delimiter = "\r\n" + delimiter if delimiter else "\n\r\t " + self.comment = comment + if colspecs == "infer": + self.colspecs = self.detect_colspecs( + infer_nrows=infer_nrows, skiprows=skiprows + ) + else: + self.colspecs = colspecs + + if not isinstance(self.colspecs, (tuple, list)): + raise TypeError( + "column specifications must be a list or tuple, " + f"input was a {type(colspecs).__name__}" + ) + + for colspec in self.colspecs: + if not ( + isinstance(colspec, (tuple, list)) + and len(colspec) == 2 + and isinstance(colspec[0], (int, np.integer, type(None))) + and isinstance(colspec[1], (int, np.integer, type(None))) + ): + raise TypeError( + "Each column specification must be " + "2 element tuple or list of integers" + ) + + def get_rows(self, infer_nrows, skiprows=None): + """ + Read rows from self.f, skipping as specified. + + We distinguish buffer_rows (the first <= infer_nrows + lines) from the rows returned to detect_colspecs + because it's simpler to leave the other locations + with skiprows logic alone than to modify them to + deal with the fact we skipped some rows here as + well. + + Parameters + ---------- + infer_nrows : int + Number of rows to read from self.f, not counting + rows that are skipped. + skiprows: set, optional + Indices of rows to skip. + + Returns + ------- + detect_rows : list of str + A list containing the rows to read. + + """ + if skiprows is None: + skiprows = set() + buffer_rows = [] + detect_rows = [] + for i, row in enumerate(self.f): + if i not in skiprows: + detect_rows.append(row) + buffer_rows.append(row) + if len(detect_rows) >= infer_nrows: + break + self.buffer = iter(buffer_rows) + return detect_rows + + def detect_colspecs(self, infer_nrows=100, skiprows=None): + # Regex escape the delimiters + delimiters = "".join(r"\{}".format(x) for x in self.delimiter) + pattern = re.compile("([^{}]+)".format(delimiters)) + rows = self.get_rows(infer_nrows, skiprows) + if not rows: + raise EmptyDataError("No rows from which to infer column width") + max_len = max(map(len, rows)) + mask = np.zeros(max_len + 1, dtype=int) + if self.comment is not None: + rows = [row.partition(self.comment)[0] for row in rows] + for row in rows: + for m in pattern.finditer(row): + mask[m.start() : m.end()] = 1 + shifted = np.roll(mask, 1) + shifted[0] = 0 + edges = np.where((mask ^ shifted) == 1)[0] + edge_pairs = list(zip(edges[::2], edges[1::2])) + return edge_pairs + + def __next__(self): + if self.buffer is not None: + try: + line = next(self.buffer) + except StopIteration: + self.buffer = None + line = next(self.f) + else: + line = next(self.f) + # Note: 'colspecs' is a sequence of half-open intervals. + return [line[fromm:to].strip(self.delimiter) for (fromm, to) in self.colspecs] + + +class FixedWidthFieldParser(PythonParser): + """ + Specialization that Converts fixed-width fields into DataFrames. + See PythonParser for details. + """ + + def __init__(self, f, **kwds): + # Support iterators, convert to a list. + self.colspecs = kwds.pop("colspecs") + self.infer_nrows = kwds.pop("infer_nrows") + PythonParser.__init__(self, f, **kwds) + + def _make_reader(self, f): + self.data = FixedWidthReader( + f, + self.colspecs, + self.delimiter, + self.comment, + self.skiprows, + self.infer_nrows, + ) diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py new file mode 100644 index 00000000..4e731b8e --- /dev/null +++ b/pandas/io/pickle.py @@ -0,0 +1,199 @@ +""" pickle compat """ +import pickle +from typing import Any, Optional +import warnings + +from pandas._typing import FilePathOrBuffer +from pandas.compat import pickle_compat as pc + +from pandas.io.common import get_filepath_or_buffer, get_handle + + +def to_pickle( + obj: Any, + filepath_or_buffer: FilePathOrBuffer, + compression: Optional[str] = "infer", + protocol: int = pickle.HIGHEST_PROTOCOL, +): + """ + Pickle (serialize) object to file. + + Parameters + ---------- + obj : any object + Any python object. + filepath_or_buffer : str, path object or file-like object + File path, URL, or buffer where the pickled object will be stored. + + .. versionchanged:: 1.0.0 + Accept URL. URL has to be of S3 or GCS. + + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' + If 'infer' and 'path_or_url' is path-like, then detect compression from + the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no + compression) If 'infer' and 'path_or_url' is not path-like, then use + None (= no decompression). + protocol : int + Int which indicates which protocol should be used by the pickler, + default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible + values for this parameter depend on the version of Python. For Python + 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. + For Python >= 3.4, 4 is a valid value. A negative value for the + protocol parameter is equivalent to setting its value to + HIGHEST_PROTOCOL. + + .. [1] https://docs.python.org/3/library/pickle.html + .. versionadded:: 0.21.0 + + See Also + -------- + read_pickle : Load pickled pandas object (or any object) from file. + DataFrame.to_hdf : Write DataFrame to an HDF5 file. + DataFrame.to_sql : Write DataFrame to a SQL database. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. + + Examples + -------- + >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)}) + >>> original_df + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + >>> pd.to_pickle(original_df, "./dummy.pkl") + + >>> unpickled_df = pd.read_pickle("./dummy.pkl") + >>> unpickled_df + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + + >>> import os + >>> os.remove("./dummy.pkl") + """ + fp_or_buf, _, compression, should_close = get_filepath_or_buffer( + filepath_or_buffer, compression=compression, mode="wb" + ) + if not isinstance(fp_or_buf, str) and compression == "infer": + compression = None + f, fh = get_handle(fp_or_buf, "wb", compression=compression, is_text=False) + if protocol < 0: + protocol = pickle.HIGHEST_PROTOCOL + try: + f.write(pickle.dumps(obj, protocol=protocol)) + finally: + f.close() + for _f in fh: + _f.close() + if should_close: + try: + fp_or_buf.close() + except ValueError: + pass + + +def read_pickle( + filepath_or_buffer: FilePathOrBuffer, compression: Optional[str] = "infer" +): + """ + Load pickled pandas object (or any object) from file. + + .. warning:: + + Loading pickled data received from untrusted sources can be + unsafe. See `here `__. + + Parameters + ---------- + filepath_or_buffer : str, path object or file-like object + File path, URL, or buffer where the pickled object will be loaded from. + + .. versionchanged:: 1.0.0 + Accept URL. URL is not limited to S3 and GCS. + + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer' + If 'infer' and 'path_or_url' is path-like, then detect compression from + the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no + compression) If 'infer' and 'path_or_url' is not path-like, then use + None (= no decompression). + + Returns + ------- + unpickled : same type as object stored in file + + See Also + -------- + DataFrame.to_pickle : Pickle (serialize) DataFrame object to file. + Series.to_pickle : Pickle (serialize) Series object to file. + read_hdf : Read HDF5 file into a DataFrame. + read_sql : Read SQL query or database table into a DataFrame. + read_parquet : Load a parquet object, returning a DataFrame. + + Notes + ----- + read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3. + + Examples + -------- + >>> original_df = pd.DataFrame({"foo": range(5), "bar": range(5, 10)}) + >>> original_df + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + >>> pd.to_pickle(original_df, "./dummy.pkl") + + >>> unpickled_df = pd.read_pickle("./dummy.pkl") + >>> unpickled_df + foo bar + 0 0 5 + 1 1 6 + 2 2 7 + 3 3 8 + 4 4 9 + + >>> import os + >>> os.remove("./dummy.pkl") + """ + fp_or_buf, _, compression, should_close = get_filepath_or_buffer( + filepath_or_buffer, compression=compression + ) + if not isinstance(fp_or_buf, str) and compression == "infer": + compression = None + f, fh = get_handle(fp_or_buf, "rb", compression=compression, is_text=False) + + # 1) try standard library Pickle + # 2) try pickle_compat (older pandas version) to handle subclass changes + # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError + + try: + excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError) + try: + with warnings.catch_warnings(record=True): + # We want to silence any warnings about, e.g. moved modules. + warnings.simplefilter("ignore", Warning) + return pickle.load(f) + except excs_to_catch: + # e.g. + # "No module named 'pandas.core.sparse.series'" + # "Can't get attribute '__nat_unpickle' on %s,key->%s] [items->%s] +""" + +# formats +_FORMAT_MAP = {"f": "fixed", "fixed": "fixed", "t": "table", "table": "table"} + +# axes map +_AXES_MAP = {DataFrame: [0]} + +# register our configuration options +dropna_doc = """ +: boolean + drop ALL nan rows when appending to a table +""" +format_doc = """ +: format + default format writing format, if None, then + put will default to 'fixed' and append will default to 'table' +""" + +with config.config_prefix("io.hdf"): + config.register_option("dropna_table", False, dropna_doc, validator=config.is_bool) + config.register_option( + "default_format", + None, + format_doc, + validator=config.is_one_of_factory(["fixed", "table", None]), + ) + +# oh the troubles to reduce import time +_table_mod = None +_table_file_open_policy_is_strict = False + + +def _tables(): + global _table_mod + global _table_file_open_policy_is_strict + if _table_mod is None: + import tables + + _table_mod = tables + + # set the file open policy + # return the file open policy; this changes as of pytables 3.1 + # depending on the HDF5 version + try: + _table_file_open_policy_is_strict = ( + tables.file._FILE_OPEN_POLICY == "strict" + ) + except AttributeError: + pass + + return _table_mod + + +# interface to/from ### + + +def to_hdf( + path_or_buf, + key: str, + value: FrameOrSeries, + mode: str = "a", + complevel: Optional[int] = None, + complib: Optional[str] = None, + append: bool = False, + format: Optional[str] = None, + index: bool = True, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + nan_rep=None, + dropna: Optional[bool] = None, + data_columns: Optional[List[str]] = None, + errors: str = "strict", + encoding: str = "UTF-8", +): + """ store this object, close it if we opened it """ + + if append: + f = lambda store: store.append( + key, + value, + format=format, + index=index, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + dropna=dropna, + data_columns=data_columns, + errors=errors, + encoding=encoding, + ) + else: + # NB: dropna is not passed to `put` + f = lambda store: store.put( + key, + value, + format=format, + index=index, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + data_columns=data_columns, + errors=errors, + encoding=encoding, + ) + + path_or_buf = stringify_path(path_or_buf) + if isinstance(path_or_buf, str): + with HDFStore( + path_or_buf, mode=mode, complevel=complevel, complib=complib + ) as store: + f(store) + else: + f(path_or_buf) + + +def read_hdf( + path_or_buf, + key=None, + mode: str = "r", + errors: str = "strict", + where=None, + start: Optional[int] = None, + stop: Optional[int] = None, + columns=None, + iterator=False, + chunksize: Optional[int] = None, + **kwargs, +): + """ + Read from the store, close it if we opened it. + + Retrieve pandas object stored in file, optionally based on where + criteria + + Parameters + ---------- + path_or_buf : str, path object, pandas.HDFStore or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: ``file://localhost/path/to/table.h5``. + + If you want to pass in a path object, pandas accepts any + ``os.PathLike``. + + Alternatively, pandas accepts an open :class:`pandas.HDFStore` object. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handler (e.g. via builtin ``open`` function) + or ``StringIO``. + + .. versionadded:: 0.21.0 support for __fspath__ protocol. + + key : object, optional + The group identifier in the store. Can be omitted if the HDF file + contains a single pandas object. + mode : {'r', 'r+', 'a'}, default 'r' + Mode to use when opening the file. Ignored if path_or_buf is a + :class:`pandas.HDFStore`. Default is 'r'. + where : list, optional + A list of Term (or convertible) objects. + start : int, optional + Row number to start selection. + stop : int, optional + Row number to stop selection. + columns : list, optional + A list of columns names to return. + iterator : bool, optional + Return an iterator object. + chunksize : int, optional + Number of rows to include in an iteration when using an iterator. + errors : str, default 'strict' + Specifies how encoding and decoding errors are to be handled. + See the errors argument for :func:`open` for a full list + of options. + **kwargs + Additional keyword arguments passed to HDFStore. + + Returns + ------- + item : object + The selected object. Return type depends on the object stored. + + See Also + -------- + DataFrame.to_hdf : Write a HDF file from a DataFrame. + HDFStore : Low-level access to HDF files. + + Examples + -------- + >>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z']) + >>> df.to_hdf('./store.h5', 'data') + >>> reread = pd.read_hdf('./store.h5') + """ + + if mode not in ["r", "r+", "a"]: + raise ValueError( + f"mode {mode} is not allowed while performing a read. " + f"Allowed modes are r, r+ and a." + ) + # grab the scope + if where is not None: + where = _ensure_term(where, scope_level=1) + + if isinstance(path_or_buf, HDFStore): + if not path_or_buf.is_open: + raise IOError("The HDFStore must be open for reading.") + + store = path_or_buf + auto_close = False + else: + path_or_buf = stringify_path(path_or_buf) + if not isinstance(path_or_buf, str): + raise NotImplementedError( + "Support for generic buffers has not been implemented." + ) + try: + exists = os.path.exists(path_or_buf) + + # if filepath is too long + except (TypeError, ValueError): + exists = False + + if not exists: + raise FileNotFoundError(f"File {path_or_buf} does not exist") + + store = HDFStore(path_or_buf, mode=mode, errors=errors, **kwargs) + # can't auto open/close if we are using an iterator + # so delegate to the iterator + auto_close = True + + try: + if key is None: + groups = store.groups() + if len(groups) == 0: + raise ValueError("No dataset in HDF5 file.") + candidate_only_group = groups[0] + + # For the HDF file to have only one dataset, all other groups + # should then be metadata groups for that candidate group. (This + # assumes that the groups() method enumerates parent groups + # before their children.) + for group_to_check in groups[1:]: + if not _is_metadata_of(group_to_check, candidate_only_group): + raise ValueError( + "key must be provided when HDF5 file " + "contains multiple datasets." + ) + key = candidate_only_group._v_pathname + return store.select( + key, + where=where, + start=start, + stop=stop, + columns=columns, + iterator=iterator, + chunksize=chunksize, + auto_close=auto_close, + ) + except (ValueError, TypeError, KeyError): + if not isinstance(path_or_buf, HDFStore): + # if there is an error, close the store if we opened it. + try: + store.close() + except AttributeError: + pass + + raise + + +def _is_metadata_of(group: "Node", parent_group: "Node") -> bool: + """Check if a given group is a metadata group for a given parent_group.""" + if group._v_depth <= parent_group._v_depth: + return False + + current = group + while current._v_depth > 1: + parent = current._v_parent + if parent == parent_group and current._v_name == "meta": + return True + current = current._v_parent + return False + + +class HDFStore: + """ + Dict-like IO interface for storing pandas objects in PyTables. + + Either Fixed or Table format. + + Parameters + ---------- + path : string + File path to HDF5 file + mode : {'a', 'w', 'r', 'r+'}, default 'a' + + ``'r'`` + Read-only; no data can be modified. + ``'w'`` + Write; a new file is created (an existing file with the same + name would be deleted). + ``'a'`` + Append; an existing file is opened for reading and writing, + and if the file does not exist it is created. + ``'r+'`` + It is similar to ``'a'``, but the file must already exist. + complevel : int, 0-9, default None + Specifies a compression level for data. + A value of 0 or None disables compression. + complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib' + Specifies the compression library to be used. + As of v0.20.2 these additional compressors for Blosc are supported + (default if no compressor specified: 'blosc:blosclz'): + {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', + 'blosc:zlib', 'blosc:zstd'}. + Specifying a compression library which is not available issues + a ValueError. + fletcher32 : bool, default False + If applying compression use the fletcher32 checksum + + Examples + -------- + >>> bar = pd.DataFrame(np.random.randn(10, 4)) + >>> store = pd.HDFStore('test.h5') + >>> store['foo'] = bar # write to HDF5 + >>> bar = store['foo'] # retrieve + >>> store.close() + """ + + _handle: Optional["File"] + _mode: str + _complevel: int + _fletcher32: bool + + def __init__( + self, + path, + mode: str = "a", + complevel: Optional[int] = None, + complib=None, + fletcher32: bool = False, + **kwargs, + ): + + if "format" in kwargs: + raise ValueError("format is not a defined argument for HDFStore") + + tables = import_optional_dependency("tables") + + if complib is not None and complib not in tables.filters.all_complibs: + raise ValueError( + f"complib only supports {tables.filters.all_complibs} compression." + ) + + if complib is None and complevel is not None: + complib = tables.filters.default_complib + + self._path = stringify_path(path) + if mode is None: + mode = "a" + self._mode = mode + self._handle = None + self._complevel = complevel if complevel else 0 + self._complib = complib + self._fletcher32 = fletcher32 + self._filters = None + self.open(mode=mode, **kwargs) + + def __fspath__(self): + return self._path + + @property + def root(self): + """ return the root node """ + self._check_if_open() + return self._handle.root + + @property + def filename(self): + return self._path + + def __getitem__(self, key: str): + return self.get(key) + + def __setitem__(self, key: str, value): + self.put(key, value) + + def __delitem__(self, key: str): + return self.remove(key) + + def __getattr__(self, name: str): + """ allow attribute access to get stores """ + try: + return self.get(name) + except (KeyError, ClosedFileError): + pass + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{name}'" + ) + + def __contains__(self, key: str) -> bool: + """ check for existence of this key + can match the exact pathname or the pathnm w/o the leading '/' + """ + node = self.get_node(key) + if node is not None: + name = node._v_pathname + if name == key or name[1:] == key: + return True + return False + + def __len__(self) -> int: + return len(self.groups()) + + def __repr__(self) -> str: + pstr = pprint_thing(self._path) + return f"{type(self)}\nFile path: {pstr}\n" + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def keys(self) -> List[str]: + """ + Return a list of keys corresponding to objects stored in HDFStore. + + Returns + ------- + list + List of ABSOLUTE path-names (e.g. have the leading '/'). + """ + return [n._v_pathname for n in self.groups()] + + def __iter__(self): + return iter(self.keys()) + + def items(self): + """ + iterate on key->group + """ + for g in self.groups(): + yield g._v_pathname, g + + iteritems = items + + def open(self, mode: str = "a", **kwargs): + """ + Open the file in the specified mode + + Parameters + ---------- + mode : {'a', 'w', 'r', 'r+'}, default 'a' + See HDFStore docstring or tables.open_file for info about modes + """ + tables = _tables() + + if self._mode != mode: + + # if we are changing a write mode to read, ok + if self._mode in ["a", "w"] and mode in ["r", "r+"]: + pass + elif mode in ["w"]: + + # this would truncate, raise here + if self.is_open: + raise PossibleDataLossError( + f"Re-opening the file [{self._path}] with mode [{self._mode}] " + "will delete the current file!" + ) + + self._mode = mode + + # close and reopen the handle + if self.is_open: + self.close() + + if self._complevel and self._complevel > 0: + self._filters = _tables().Filters( + self._complevel, self._complib, fletcher32=self._fletcher32 + ) + + try: + self._handle = tables.open_file(self._path, self._mode, **kwargs) + except IOError as err: # pragma: no cover + if "can not be written" in str(err): + print(f"Opening {self._path} in read-only mode") + self._handle = tables.open_file(self._path, "r", **kwargs) + else: + raise + + except ValueError as err: + + # trap PyTables >= 3.1 FILE_OPEN_POLICY exception + # to provide an updated message + if "FILE_OPEN_POLICY" in str(err): + hdf_version = tables.get_hdf5_version() + err = ValueError( + f"PyTables [{tables.__version__}] no longer supports " + "opening multiple files\n" + "even in read-only mode on this HDF5 version " + f"[{hdf_version}]. You can accept this\n" + "and not open the same file multiple times at once,\n" + "upgrade the HDF5 version, or downgrade to PyTables 3.0.0 " + "which allows\n" + "files to be opened multiple times at once\n" + ) + + raise err + + except Exception as err: + + # trying to read from a non-existent file causes an error which + # is not part of IOError, make it one + if self._mode == "r" and "Unable to open/create file" in str(err): + raise IOError(str(err)) + raise + + def close(self): + """ + Close the PyTables file handle + """ + if self._handle is not None: + self._handle.close() + self._handle = None + + @property + def is_open(self) -> bool: + """ + return a boolean indicating whether the file is open + """ + if self._handle is None: + return False + return bool(self._handle.isopen) + + def flush(self, fsync: bool = False): + """ + Force all buffered modifications to be written to disk. + + Parameters + ---------- + fsync : bool (default False) + call ``os.fsync()`` on the file handle to force writing to disk. + + Notes + ----- + Without ``fsync=True``, flushing may not guarantee that the OS writes + to disk. With fsync, the operation will block until the OS claims the + file has been written; however, other caching layers may still + interfere. + """ + if self._handle is not None: + self._handle.flush() + if fsync: + try: + os.fsync(self._handle.fileno()) + except OSError: + pass + + def get(self, key: str): + """ + Retrieve pandas object stored in file. + + Parameters + ---------- + key : str + + Returns + ------- + object + Same type as object stored in file. + """ + group = self.get_node(key) + if group is None: + raise KeyError(f"No object named {key} in the file") + return self._read_group(group) + + def select( + self, + key: str, + where=None, + start=None, + stop=None, + columns=None, + iterator=False, + chunksize=None, + auto_close: bool = False, + ): + """ + Retrieve pandas object stored in file, optionally based on where criteria. + + Parameters + ---------- + key : str + Object being retrieved from file. + where : list, default None + List of Term (or convertible) objects, optional. + start : int, default None + Row number to start selection. + stop : int, default None + Row number to stop selection. + columns : list, default None + A list of columns that if not None, will limit the return columns. + iterator : bool, default False + Returns an iterator. + chunksize : int, default None + Number or rows to include in iteration, return an iterator. + auto_close : bool, default False + Should automatically close the store when finished. + + Returns + ------- + object + Retrieved object from file. + """ + group = self.get_node(key) + if group is None: + raise KeyError(f"No object named {key} in the file") + + # create the storer and axes + where = _ensure_term(where, scope_level=1) + s = self._create_storer(group) + s.infer_axes() + + # function to call on iteration + def func(_start, _stop, _where): + return s.read(start=_start, stop=_stop, where=_where, columns=columns) + + # create the iterator + it = TableIterator( + self, + s, + func, + where=where, + nrows=s.nrows, + start=start, + stop=stop, + iterator=iterator, + chunksize=chunksize, + auto_close=auto_close, + ) + + return it.get_result() + + def select_as_coordinates( + self, + key: str, + where=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): + """ + return the selection as an Index + + Parameters + ---------- + key : str + where : list of Term (or convertible) objects, optional + start : integer (defaults to None), row number to start selection + stop : integer (defaults to None), row number to stop selection + """ + where = _ensure_term(where, scope_level=1) + tbl = self.get_storer(key) + if not isinstance(tbl, Table): + raise TypeError("can only read_coordinates with a table") + return tbl.read_coordinates(where=where, start=start, stop=stop) + + def select_column( + self, + key: str, + column: str, + start: Optional[int] = None, + stop: Optional[int] = None, + ): + """ + return a single column from the table. This is generally only useful to + select an indexable + + Parameters + ---------- + key : str + column : str + The column of interest. + start : int or None, default None + stop : int or None, default None + + Raises + ------ + raises KeyError if the column is not found (or key is not a valid + store) + raises ValueError if the column can not be extracted individually (it + is part of a data block) + + """ + tbl = self.get_storer(key) + if not isinstance(tbl, Table): + raise TypeError("can only read_column with a table") + return tbl.read_column(column=column, start=start, stop=stop) + + def select_as_multiple( + self, + keys, + where=None, + selector=None, + columns=None, + start=None, + stop=None, + iterator=False, + chunksize=None, + auto_close: bool = False, + ): + """ + Retrieve pandas objects from multiple tables. + + Parameters + ---------- + keys : a list of the tables + selector : the table to apply the where criteria (defaults to keys[0] + if not supplied) + columns : the columns I want back + start : integer (defaults to None), row number to start selection + stop : integer (defaults to None), row number to stop selection + iterator : boolean, return an iterator, default False + chunksize : nrows to include in iteration, return an iterator + auto_close : bool, default False + Should automatically close the store when finished. + + Raises + ------ + raises KeyError if keys or selector is not found or keys is empty + raises TypeError if keys is not a list or tuple + raises ValueError if the tables are not ALL THE SAME DIMENSIONS + """ + + # default to single select + where = _ensure_term(where, scope_level=1) + if isinstance(keys, (list, tuple)) and len(keys) == 1: + keys = keys[0] + if isinstance(keys, str): + return self.select( + key=keys, + where=where, + columns=columns, + start=start, + stop=stop, + iterator=iterator, + chunksize=chunksize, + auto_close=auto_close, + ) + + if not isinstance(keys, (list, tuple)): + raise TypeError("keys must be a list/tuple") + + if not len(keys): + raise ValueError("keys must have a non-zero length") + + if selector is None: + selector = keys[0] + + # collect the tables + tbls = [self.get_storer(k) for k in keys] + s = self.get_storer(selector) + + # validate rows + nrows = None + for t, k in itertools.chain([(s, selector)], zip(tbls, keys)): + if t is None: + raise KeyError(f"Invalid table [{k}]") + if not t.is_table: + raise TypeError( + f"object [{t.pathname}] is not a table, and cannot be used in all " + "select as multiple" + ) + + if nrows is None: + nrows = t.nrows + elif t.nrows != nrows: + raise ValueError("all tables must have exactly the same nrows!") + + # The isinstance checks here are redundant with the check above, + # but necessary for mypy; see GH#29757 + _tbls = [x for x in tbls if isinstance(x, Table)] + + # axis is the concentration axes + axis = list({t.non_index_axes[0][0] for t in _tbls})[0] + + def func(_start, _stop, _where): + + # retrieve the objs, _where is always passed as a set of + # coordinates here + objs = [ + t.read(where=_where, columns=columns, start=_start, stop=_stop) + for t in tbls + ] + + # concat and return + return concat(objs, axis=axis, verify_integrity=False)._consolidate() + + # create the iterator + it = TableIterator( + self, + s, + func, + where=where, + nrows=nrows, + start=start, + stop=stop, + iterator=iterator, + chunksize=chunksize, + auto_close=auto_close, + ) + + return it.get_result(coordinates=True) + + def put( + self, + key: str, + value: FrameOrSeries, + format=None, + index=True, + append=False, + complib=None, + complevel: Optional[int] = None, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + nan_rep=None, + data_columns: Optional[List[str]] = None, + encoding=None, + errors: str = "strict", + ): + """ + Store object in HDFStore. + + Parameters + ---------- + key : str + value : {Series, DataFrame} + format : 'fixed(f)|table(t)', default is 'fixed' + fixed(f) : Fixed format + Fast writing/reading. Not-appendable, nor searchable. + table(t) : Table format + Write as a PyTables Table structure which may perform + worse but allow more flexible operations like searching + / selecting subsets of the data. + append : bool, default False + This will force Table format, append the input data to the + existing. + data_columns : list, default None + List of columns to create as data columns, or True to + use all columns. See `here + `__. + encoding : str, default None + Provide an encoding for strings. + dropna : bool, default False, do not write an ALL nan row to + The store settable by the option 'io.hdf.dropna_table'. + """ + if format is None: + format = get_option("io.hdf.default_format") or "fixed" + format = self._validate_format(format) + self._write_to_group( + key, + value, + format=format, + index=index, + append=append, + complib=complib, + complevel=complevel, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + data_columns=data_columns, + encoding=encoding, + errors=errors, + ) + + def remove(self, key: str, where=None, start=None, stop=None): + """ + Remove pandas object partially by specifying the where condition + + Parameters + ---------- + key : string + Node to remove or delete rows from + where : list of Term (or convertible) objects, optional + start : integer (defaults to None), row number to start selection + stop : integer (defaults to None), row number to stop selection + + Returns + ------- + number of rows removed (or None if not a Table) + + Raises + ------ + raises KeyError if key is not a valid store + + """ + where = _ensure_term(where, scope_level=1) + try: + s = self.get_storer(key) + except KeyError: + # the key is not a valid store, re-raising KeyError + raise + except AssertionError: + # surface any assertion errors for e.g. debugging + raise + except Exception: + # In tests we get here with ClosedFileError, TypeError, and + # _table_mod.NoSuchNodeError. TODO: Catch only these? + + if where is not None: + raise ValueError( + "trying to remove a node with a non-None where clause!" + ) + + # we are actually trying to remove a node (with children) + node = self.get_node(key) + if node is not None: + node._f_remove(recursive=True) + return None + + # remove the node + if com.all_none(where, start, stop): + s.group._f_remove(recursive=True) + + # delete from the table + else: + if not s.is_table: + raise ValueError( + "can only remove with where on objects written as tables" + ) + return s.delete(where=where, start=start, stop=stop) + + def append( + self, + key: str, + value: FrameOrSeries, + format=None, + axes=None, + index=True, + append=True, + complib=None, + complevel: Optional[int] = None, + columns=None, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + nan_rep=None, + chunksize=None, + expectedrows=None, + dropna: Optional[bool] = None, + data_columns: Optional[List[str]] = None, + encoding=None, + errors: str = "strict", + ): + """ + Append to Table in file. Node must already exist and be Table + format. + + Parameters + ---------- + key : str + value : {Series, DataFrame} + format : 'table' is the default + table(t) : table format + Write as a PyTables Table structure which may perform + worse but allow more flexible operations like searching + / selecting subsets of the data. + append : bool, default True + Append the input data to the existing. + data_columns : list of columns, or True, default None + List of columns to create as indexed data columns for on-disk + queries, or True to use all columns. By default only the axes + of the object are indexed. See `here + `__. + min_itemsize : dict of columns that specify minimum string sizes + nan_rep : string to use as string nan representation + chunksize : size to chunk the writing + expectedrows : expected TOTAL row size of this table + encoding : default None, provide an encoding for strings + dropna : bool, default False + Do not write an ALL nan row to the store settable + by the option 'io.hdf.dropna_table'. + + Notes + ----- + Does *not* check if data being appended overlaps with existing + data in the table, so be careful + """ + if columns is not None: + raise TypeError( + "columns is not a supported keyword in append, try data_columns" + ) + + if dropna is None: + dropna = get_option("io.hdf.dropna_table") + if format is None: + format = get_option("io.hdf.default_format") or "table" + format = self._validate_format(format) + self._write_to_group( + key, + value, + format=format, + axes=axes, + index=index, + append=append, + complib=complib, + complevel=complevel, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + chunksize=chunksize, + expectedrows=expectedrows, + dropna=dropna, + data_columns=data_columns, + encoding=encoding, + errors=errors, + ) + + def append_to_multiple( + self, + d: Dict, + value, + selector, + data_columns=None, + axes=None, + dropna=False, + **kwargs, + ): + """ + Append to multiple tables + + Parameters + ---------- + d : a dict of table_name to table_columns, None is acceptable as the + values of one node (this will get all the remaining columns) + value : a pandas object + selector : a string that designates the indexable table; all of its + columns will be designed as data_columns, unless data_columns is + passed, in which case these are used + data_columns : list of columns to create as data columns, or True to + use all columns + dropna : if evaluates to True, drop rows from all tables if any single + row in each table has all NaN. Default False. + + Notes + ----- + axes parameter is currently not accepted + + """ + if axes is not None: + raise TypeError( + "axes is currently not accepted as a parameter to append_to_multiple; " + "you can create the tables independently instead" + ) + + if not isinstance(d, dict): + raise ValueError( + "append_to_multiple must have a dictionary specified as the " + "way to split the value" + ) + + if selector not in d: + raise ValueError( + "append_to_multiple requires a selector that is in passed dict" + ) + + # figure out the splitting axis (the non_index_axis) + axis = list(set(range(value.ndim)) - set(_AXES_MAP[type(value)]))[0] + + # figure out how to split the value + remain_key = None + remain_values: List = [] + for k, v in d.items(): + if v is None: + if remain_key is not None: + raise ValueError( + "append_to_multiple can only have one value in d that " + "is None" + ) + remain_key = k + else: + remain_values.extend(v) + if remain_key is not None: + ordered = value.axes[axis] + ordd = ordered.difference(Index(remain_values)) + ordd = sorted(ordered.get_indexer(ordd)) + d[remain_key] = ordered.take(ordd) + + # data_columns + if data_columns is None: + data_columns = d[selector] + + # ensure rows are synchronized across the tables + if dropna: + idxs = (value[cols].dropna(how="all").index for cols in d.values()) + valid_index = next(idxs) + for index in idxs: + valid_index = valid_index.intersection(index) + value = value.loc[valid_index] + + # append + for k, v in d.items(): + dc = data_columns if k == selector else None + + # compute the val + val = value.reindex(v, axis=axis) + + self.append(k, val, data_columns=dc, **kwargs) + + def create_table_index( + self, + key: str, + columns=None, + optlevel: Optional[int] = None, + kind: Optional[str] = None, + ): + """ + Create a pytables index on the table. + + Parameters + ---------- + key : str + columns : None, bool, or listlike[str] + Indicate which columns to create an index on. + + * False : Do not create any indexes. + * True : Create indexes on all columns. + * None : Create indexes on all columns. + * listlike : Create indexes on the given columns. + + optlevel : int or None, default None + Optimization level, if None, pytables defaults to 6. + kind : str or None, default None + Kind of index, if None, pytables defaults to "medium". + + Raises + ------ + TypeError: raises if the node is not a table + """ + + # version requirements + _tables() + s = self.get_storer(key) + if s is None: + return + + if not isinstance(s, Table): + raise TypeError("cannot create table index on a Fixed format store") + s.create_index(columns=columns, optlevel=optlevel, kind=kind) + + def groups(self): + """ + Return a list of all the top-level nodes. + + Each node returned is not a pandas storage object. + + Returns + ------- + list + List of objects. + """ + _tables() + self._check_if_open() + return [ + g + for g in self._handle.walk_groups() + if ( + not isinstance(g, _table_mod.link.Link) + and ( + getattr(g._v_attrs, "pandas_type", None) + or getattr(g, "table", None) + or (isinstance(g, _table_mod.table.Table) and g._v_name != "table") + ) + ) + ] + + def walk(self, where="/"): + """ + Walk the pytables group hierarchy for pandas objects. + + This generator will yield the group path, subgroups and pandas object + names for each group. + + Any non-pandas PyTables objects that are not a group will be ignored. + + The `where` group itself is listed first (preorder), then each of its + child groups (following an alphanumerical order) is also traversed, + following the same procedure. + + .. versionadded:: 0.24.0 + + Parameters + ---------- + where : str, default "/" + Group where to start walking. + + Yields + ------ + path : str + Full path to a group (without trailing '/'). + groups : list + Names (strings) of the groups contained in `path`. + leaves : list + Names (strings) of the pandas objects contained in `path`. + """ + _tables() + self._check_if_open() + for g in self._handle.walk_groups(where): + if getattr(g._v_attrs, "pandas_type", None) is not None: + continue + + groups = [] + leaves = [] + for child in g._v_children.values(): + pandas_type = getattr(child._v_attrs, "pandas_type", None) + if pandas_type is None: + if isinstance(child, _table_mod.group.Group): + groups.append(child._v_name) + else: + leaves.append(child._v_name) + + yield (g._v_pathname.rstrip("/"), groups, leaves) + + def get_node(self, key: str) -> Optional["Node"]: + """ return the node with the key or None if it does not exist """ + self._check_if_open() + if not key.startswith("/"): + key = "/" + key + + assert self._handle is not None + assert _table_mod is not None # for mypy + try: + node = self._handle.get_node(self.root, key) + except _table_mod.exceptions.NoSuchNodeError: + return None + + assert isinstance(node, _table_mod.Node), type(node) + return node + + def get_storer(self, key: str) -> Union["GenericFixed", "Table"]: + """ return the storer object for a key, raise if not in the file """ + group = self.get_node(key) + if group is None: + raise KeyError(f"No object named {key} in the file") + + s = self._create_storer(group) + s.infer_axes() + return s + + def copy( + self, + file, + mode="w", + propindexes: bool = True, + keys=None, + complib=None, + complevel: Optional[int] = None, + fletcher32: bool = False, + overwrite=True, + ): + """ + Copy the existing store to a new file, updating in place. + + Parameters + ---------- + propindexes: bool, default True + Restore indexes in copied file. + keys : list of keys to include in the copy (defaults to all) + overwrite : overwrite (remove and replace) existing nodes in the + new store (default is True) + mode, complib, complevel, fletcher32 same as in HDFStore.__init__ + + Returns + ------- + open file handle of the new store + """ + new_store = HDFStore( + file, mode=mode, complib=complib, complevel=complevel, fletcher32=fletcher32 + ) + if keys is None: + keys = list(self.keys()) + if not isinstance(keys, (tuple, list)): + keys = [keys] + for k in keys: + s = self.get_storer(k) + if s is not None: + + if k in new_store: + if overwrite: + new_store.remove(k) + + data = self.select(k) + if isinstance(s, Table): + + index: Union[bool, List[str]] = False + if propindexes: + index = [a.name for a in s.axes if a.is_indexed] + new_store.append( + k, + data, + index=index, + data_columns=getattr(s, "data_columns", None), + encoding=s.encoding, + ) + else: + new_store.put(k, data, encoding=s.encoding) + + return new_store + + def info(self) -> str: + """ + Print detailed information on the store. + + .. versionadded:: 0.21.0 + + Returns + ------- + str + """ + path = pprint_thing(self._path) + output = f"{type(self)}\nFile path: {path}\n" + + if self.is_open: + lkeys = sorted(self.keys()) + if len(lkeys): + keys = [] + values = [] + + for k in lkeys: + try: + s = self.get_storer(k) + if s is not None: + keys.append(pprint_thing(s.pathname or k)) + values.append(pprint_thing(s or "invalid_HDFStore node")) + except AssertionError: + # surface any assertion errors for e.g. debugging + raise + except Exception as detail: + keys.append(k) + dstr = pprint_thing(detail) + values.append(f"[invalid_HDFStore node: {dstr}]") + + output += adjoin(12, keys, values) + else: + output += "Empty" + else: + output += "File is CLOSED" + + return output + + # ------------------------------------------------------------------------ + # private methods + + def _check_if_open(self): + if not self.is_open: + raise ClosedFileError(f"{self._path} file is not open!") + + def _validate_format(self, format: str) -> str: + """ validate / deprecate formats """ + + # validate + try: + format = _FORMAT_MAP[format.lower()] + except KeyError: + raise TypeError(f"invalid HDFStore format specified [{format}]") + + return format + + def _create_storer( + self, + group, + format=None, + value: Optional[FrameOrSeries] = None, + encoding: str = "UTF-8", + errors: str = "strict", + ) -> Union["GenericFixed", "Table"]: + """ return a suitable class to operate """ + + cls: Union[Type["GenericFixed"], Type["Table"]] + + if value is not None and not isinstance(value, (Series, DataFrame)): + raise TypeError("value must be None, Series, or DataFrame") + + def error(t): + # return instead of raising so mypy can tell where we are raising + return TypeError( + f"cannot properly create the storer for: [{t}] [group->" + f"{group},value->{type(value)},format->{format}" + ) + + pt = _ensure_decoded(getattr(group._v_attrs, "pandas_type", None)) + tt = _ensure_decoded(getattr(group._v_attrs, "table_type", None)) + + # infer the pt from the passed value + if pt is None: + if value is None: + + _tables() + assert _table_mod is not None # for mypy + if getattr(group, "table", None) or isinstance( + group, _table_mod.table.Table + ): + pt = "frame_table" + tt = "generic_table" + else: + raise TypeError( + "cannot create a storer if the object is not existing " + "nor a value are passed" + ) + else: + _TYPE_MAP = {Series: "series", DataFrame: "frame"} + pt = _TYPE_MAP[type(value)] + + # we are actually a table + if format == "table": + pt += "_table" + + # a storer node + if "table" not in pt: + _STORER_MAP = {"series": SeriesFixed, "frame": FrameFixed} + try: + cls = _STORER_MAP[pt] + except KeyError: + raise error("_STORER_MAP") + return cls(self, group, encoding=encoding, errors=errors) + + # existing node (and must be a table) + if tt is None: + + # if we are a writer, determine the tt + if value is not None: + + if pt == "series_table": + index = getattr(value, "index", None) + if index is not None: + if index.nlevels == 1: + tt = "appendable_series" + elif index.nlevels > 1: + tt = "appendable_multiseries" + elif pt == "frame_table": + index = getattr(value, "index", None) + if index is not None: + if index.nlevels == 1: + tt = "appendable_frame" + elif index.nlevels > 1: + tt = "appendable_multiframe" + + _TABLE_MAP = { + "generic_table": GenericTable, + "appendable_series": AppendableSeriesTable, + "appendable_multiseries": AppendableMultiSeriesTable, + "appendable_frame": AppendableFrameTable, + "appendable_multiframe": AppendableMultiFrameTable, + "worm": WORMTable, + } + try: + cls = _TABLE_MAP[tt] + except KeyError: + raise error("_TABLE_MAP") + + return cls(self, group, encoding=encoding, errors=errors) + + def _write_to_group( + self, + key: str, + value: FrameOrSeries, + format, + axes=None, + index=True, + append=False, + complib=None, + complevel: Optional[int] = None, + fletcher32=None, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + chunksize=None, + expectedrows=None, + dropna=False, + nan_rep=None, + data_columns=None, + encoding=None, + errors: str = "strict", + ): + group = self.get_node(key) + + # we make this assertion for mypy; the get_node call will already + # have raised if this is incorrect + assert self._handle is not None + + # remove the node if we are not appending + if group is not None and not append: + self._handle.remove_node(group, recursive=True) + group = None + + # we don't want to store a table node at all if our object is 0-len + # as there are not dtypes + if getattr(value, "empty", None) and (format == "table" or append): + return + + if group is None: + paths = key.split("/") + + # recursively create the groups + path = "/" + for p in paths: + if not len(p): + continue + new_path = path + if not path.endswith("/"): + new_path += "/" + new_path += p + group = self.get_node(new_path) + if group is None: + group = self._handle.create_group(path, p) + path = new_path + + s = self._create_storer(group, format, value, encoding=encoding, errors=errors) + if append: + # raise if we are trying to append to a Fixed format, + # or a table that exists (and we are putting) + if not s.is_table or (s.is_table and format == "fixed" and s.is_exists): + raise ValueError("Can only append to Tables") + if not s.is_exists: + s.set_object_info() + else: + s.set_object_info() + + if not s.is_table and complib: + raise ValueError("Compression not supported on Fixed format stores") + + # write the object + s.write( + obj=value, + axes=axes, + append=append, + complib=complib, + complevel=complevel, + fletcher32=fletcher32, + min_itemsize=min_itemsize, + chunksize=chunksize, + expectedrows=expectedrows, + dropna=dropna, + nan_rep=nan_rep, + data_columns=data_columns, + ) + + if isinstance(s, Table) and index: + s.create_index(columns=index) + + def _read_group(self, group: "Node"): + s = self._create_storer(group) + s.infer_axes() + return s.read() + + +class TableIterator: + """ + Define the iteration interface on a table + + Parameters + ---------- + store : HDFStore + s : the referred storer + func : the function to execute the query + where : the where of the query + nrows : the rows to iterate on + start : the passed start value (default is None) + stop : the passed stop value (default is None) + iterator : bool, default False + Whether to use the default iterator. + chunksize : the passed chunking value (default is 100000) + auto_close : bool, default False + Whether to automatically close the store at the end of iteration. + """ + + chunksize: Optional[int] + store: HDFStore + s: Union["GenericFixed", "Table"] + + def __init__( + self, + store: HDFStore, + s: Union["GenericFixed", "Table"], + func, + where, + nrows, + start=None, + stop=None, + iterator: bool = False, + chunksize: Optional[int] = None, + auto_close: bool = False, + ): + self.store = store + self.s = s + self.func = func + self.where = where + + # set start/stop if they are not set if we are a table + if self.s.is_table: + if nrows is None: + nrows = 0 + if start is None: + start = 0 + if stop is None: + stop = nrows + stop = min(nrows, stop) + + self.nrows = nrows + self.start = start + self.stop = stop + + self.coordinates = None + if iterator or chunksize is not None: + if chunksize is None: + chunksize = 100000 + self.chunksize = int(chunksize) + else: + self.chunksize = None + + self.auto_close = auto_close + + def __iter__(self): + + # iterate + current = self.start + while current < self.stop: + + stop = min(current + self.chunksize, self.stop) + value = self.func(None, None, self.coordinates[current:stop]) + current = stop + if value is None or not len(value): + continue + + yield value + + self.close() + + def close(self): + if self.auto_close: + self.store.close() + + def get_result(self, coordinates: bool = False): + + # return the actual iterator + if self.chunksize is not None: + if not isinstance(self.s, Table): + raise TypeError("can only use an iterator or chunksize on a table") + + self.coordinates = self.s.read_coordinates(where=self.where) + + return self + + # if specified read via coordinates (necessary for multiple selections + if coordinates: + if not isinstance(self.s, Table): + raise TypeError("can only read_coordinates on a table") + where = self.s.read_coordinates( + where=self.where, start=self.start, stop=self.stop + ) + else: + where = self.where + + # directly return the result + results = self.func(self.start, self.stop, where) + self.close() + return results + + +class IndexCol: + """ an index column description class + + Parameters + ---------- + + axis : axis which I reference + values : the ndarray like converted values + kind : a string description of this type + typ : the pytables type + pos : the position in the pytables + + """ + + is_an_indexable = True + is_data_indexable = True + _info_fields = ["freq", "tz", "index_name"] + + name: str + cname: str + + def __init__( + self, + name: str, + values=None, + kind=None, + typ=None, + cname: Optional[str] = None, + axis=None, + pos=None, + freq=None, + tz=None, + index_name=None, + ordered=None, + table=None, + meta=None, + metadata=None, + ): + + if not isinstance(name, str): + raise ValueError("`name` must be a str.") + + self.values = values + self.kind = kind + self.typ = typ + self.name = name + self.cname = cname or name + self.axis = axis + self.pos = pos + self.freq = freq + self.tz = tz + self.index_name = index_name + self.ordered = ordered + self.table = table + self.meta = meta + self.metadata = metadata + + if pos is not None: + self.set_pos(pos) + + # These are ensured as long as the passed arguments match the + # constructor annotations. + assert isinstance(self.name, str) + assert isinstance(self.cname, str) + + @property + def itemsize(self) -> int: + # Assumes self.typ has already been initialized + return self.typ.itemsize + + @property + def kind_attr(self) -> str: + return f"{self.name}_kind" + + def set_pos(self, pos: int): + """ set the position of this column in the Table """ + self.pos = pos + if pos is not None and self.typ is not None: + self.typ._v_pos = pos + + def __repr__(self) -> str: + temp = tuple( + map(pprint_thing, (self.name, self.cname, self.axis, self.pos, self.kind)) + ) + return ",".join( + ( + f"{key}->{value}" + for key, value in zip(["name", "cname", "axis", "pos", "kind"], temp) + ) + ) + + def __eq__(self, other: Any) -> bool: + """ compare 2 col items """ + return all( + getattr(self, a, None) == getattr(other, a, None) + for a in ["name", "cname", "axis", "pos"] + ) + + def __ne__(self, other) -> bool: + return not self.__eq__(other) + + @property + def is_indexed(self) -> bool: + """ return whether I am an indexed column """ + if not hasattr(self.table, "cols"): + # e.g. if infer hasn't been called yet, self.table will be None. + return False + # GH#29692 mypy doesn't recognize self.table as having a "cols" attribute + # 'error: "None" has no attribute "cols"' + return getattr(self.table.cols, self.cname).is_indexed # type: ignore + + def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): + """ + Convert the data from this selection to the appropriate pandas type. + """ + assert isinstance(values, np.ndarray), type(values) + + # values is a recarray + if values.dtype.fields is not None: + values = values[self.cname] + + val_kind = _ensure_decoded(self.kind) + values = _maybe_convert(values, val_kind, encoding, errors) + + kwargs = dict() + kwargs["name"] = _ensure_decoded(self.index_name) + + if self.freq is not None: + kwargs["freq"] = _ensure_decoded(self.freq) + + # making an Index instance could throw a number of different errors + try: + new_pd_index = Index(values, **kwargs) + except ValueError: + # if the output freq is different that what we recorded, + # it should be None (see also 'doc example part 2') + if "freq" in kwargs: + kwargs["freq"] = None + new_pd_index = Index(values, **kwargs) + + new_pd_index = _set_tz(new_pd_index, self.tz) + return new_pd_index, new_pd_index + + def take_data(self): + """ return the values""" + return self.values + + @property + def attrs(self): + return self.table._v_attrs + + @property + def description(self): + return self.table.description + + @property + def col(self): + """ return my current col description """ + return getattr(self.description, self.cname, None) + + @property + def cvalues(self): + """ return my cython values """ + return self.values + + def __iter__(self): + return iter(self.values) + + def maybe_set_size(self, min_itemsize=None): + """ maybe set a string col itemsize: + min_itemsize can be an integer or a dict with this columns name + with an integer size """ + if _ensure_decoded(self.kind) == "string": + + if isinstance(min_itemsize, dict): + min_itemsize = min_itemsize.get(self.name) + + if min_itemsize is not None and self.typ.itemsize < min_itemsize: + self.typ = _tables().StringCol(itemsize=min_itemsize, pos=self.pos) + + def validate_names(self): + pass + + def validate_and_set(self, handler: "AppendableTable", append: bool): + self.table = handler.table + self.validate_col() + self.validate_attr(append) + self.validate_metadata(handler) + self.write_metadata(handler) + self.set_attr() + + def validate_col(self, itemsize=None): + """ validate this column: return the compared against itemsize """ + + # validate this column for string truncation (or reset to the max size) + if _ensure_decoded(self.kind) == "string": + c = self.col + if c is not None: + if itemsize is None: + itemsize = self.itemsize + if c.itemsize < itemsize: + raise ValueError( + f"Trying to store a string with len [{itemsize}] in " + f"[{self.cname}] column but\nthis column has a limit of " + f"[{c.itemsize}]!\nConsider using min_itemsize to " + "preset the sizes on these columns" + ) + return c.itemsize + + return None + + def validate_attr(self, append: bool): + # check for backwards incompatibility + if append: + existing_kind = getattr(self.attrs, self.kind_attr, None) + if existing_kind is not None and existing_kind != self.kind: + raise TypeError( + f"incompatible kind in col [{existing_kind} - {self.kind}]" + ) + + def update_info(self, info): + """ set/update the info for this indexable with the key/value + if there is a conflict raise/warn as needed """ + + for key in self._info_fields: + + value = getattr(self, key, None) + idx = info.setdefault(self.name, {}) + + existing_value = idx.get(key) + if key in idx and value is not None and existing_value != value: + + # frequency/name just warn + if key in ["freq", "index_name"]: + ws = attribute_conflict_doc % (key, existing_value, value) + warnings.warn(ws, AttributeConflictWarning, stacklevel=6) + + # reset + idx[key] = None + setattr(self, key, None) + + else: + raise ValueError( + f"invalid info for [{self.name}] for [{key}], " + f"existing_value [{existing_value}] conflicts with " + f"new value [{value}]" + ) + else: + if value is not None or existing_value is not None: + idx[key] = value + + def set_info(self, info): + """ set my state from the passed info """ + idx = info.get(self.name) + if idx is not None: + self.__dict__.update(idx) + + def set_attr(self): + """ set the kind for this column """ + setattr(self.attrs, self.kind_attr, self.kind) + + def validate_metadata(self, handler: "AppendableTable"): + """ validate that kind=category does not change the categories """ + if self.meta == "category": + new_metadata = self.metadata + cur_metadata = handler.read_metadata(self.cname) + if ( + new_metadata is not None + and cur_metadata is not None + and not array_equivalent(new_metadata, cur_metadata) + ): + raise ValueError( + "cannot append a categorical with " + "different categories to the existing" + ) + + def write_metadata(self, handler: "AppendableTable"): + """ set the meta data """ + if self.metadata is not None: + handler.write_metadata(self.cname, self.metadata) + + +class GenericIndexCol(IndexCol): + """ an index which is not represented in the data of the table """ + + @property + def is_indexed(self) -> bool: + return False + + def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): + """ + Convert the data from this selection to the appropriate pandas type. + + Parameters + ---------- + values : np.ndarray + nan_rep : str + encoding : str + errors : str + """ + assert isinstance(values, np.ndarray), type(values) + + values = Int64Index(np.arange(len(values))) + return values, values + + def set_attr(self): + pass + + +class DataCol(IndexCol): + """ a data holding column, by definition this is not indexable + + Parameters + ---------- + + data : the actual data + cname : the column name in the table to hold the data (typically + values) + meta : a string description of the metadata + metadata : the actual metadata + """ + + is_an_indexable = False + is_data_indexable = False + _info_fields = ["tz", "ordered"] + + def __init__( + self, + name: str, + values=None, + kind=None, + typ=None, + cname=None, + pos=None, + tz=None, + ordered=None, + table=None, + meta=None, + metadata=None, + dtype=None, + data=None, + ): + super().__init__( + name=name, + values=values, + kind=kind, + typ=typ, + pos=pos, + cname=cname, + tz=tz, + ordered=ordered, + table=table, + meta=meta, + metadata=metadata, + ) + self.dtype = dtype + self.data = data + + @property + def dtype_attr(self) -> str: + return f"{self.name}_dtype" + + @property + def meta_attr(self) -> str: + return f"{self.name}_meta" + + def __repr__(self) -> str: + temp = tuple( + map( + pprint_thing, (self.name, self.cname, self.dtype, self.kind, self.shape) + ) + ) + return ",".join( + ( + f"{key}->{value}" + for key, value in zip(["name", "cname", "dtype", "kind", "shape"], temp) + ) + ) + + def __eq__(self, other: Any) -> bool: + """ compare 2 col items """ + return all( + getattr(self, a, None) == getattr(other, a, None) + for a in ["name", "cname", "dtype", "pos"] + ) + + def set_data(self, data: Union[np.ndarray, ABCExtensionArray]): + assert data is not None + assert self.dtype is None + + data, dtype_name = _get_data_and_dtype_name(data) + + self.data = data + self.dtype = dtype_name + self.kind = _dtype_to_kind(dtype_name) + + def take_data(self): + """ return the data """ + return self.data + + @classmethod + def _get_atom(cls, values: Union[np.ndarray, ABCExtensionArray]) -> "Col": + """ + Get an appropriately typed and shaped pytables.Col object for values. + """ + + dtype = values.dtype + itemsize = dtype.itemsize + + shape = values.shape + if values.ndim == 1: + # EA, use block shape pretending it is 2D + shape = (1, values.size) + + if is_categorical_dtype(dtype): + codes = values.codes + atom = cls.get_atom_data(shape, kind=codes.dtype.name) + elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): + atom = cls.get_atom_datetime64(shape) + elif is_timedelta64_dtype(dtype): + atom = cls.get_atom_timedelta64(shape) + elif is_complex_dtype(dtype): + atom = _tables().ComplexCol(itemsize=itemsize, shape=shape[0]) + + elif is_string_dtype(dtype): + atom = cls.get_atom_string(shape, itemsize) + + else: + atom = cls.get_atom_data(shape, kind=dtype.name) + + return atom + + @classmethod + def get_atom_string(cls, shape, itemsize): + return _tables().StringCol(itemsize=itemsize, shape=shape[0]) + + @classmethod + def get_atom_coltype(cls, kind: str) -> Type["Col"]: + """ return the PyTables column class for this column """ + if kind.startswith("uint"): + k4 = kind[4:] + col_name = f"UInt{k4}Col" + elif kind.startswith("period"): + # we store as integer + col_name = "Int64Col" + else: + kcap = kind.capitalize() + col_name = f"{kcap}Col" + + return getattr(_tables(), col_name) + + @classmethod + def get_atom_data(cls, shape, kind: str) -> "Col": + return cls.get_atom_coltype(kind=kind)(shape=shape[0]) + + @classmethod + def get_atom_datetime64(cls, shape): + return _tables().Int64Col(shape=shape[0]) + + @classmethod + def get_atom_timedelta64(cls, shape): + return _tables().Int64Col(shape=shape[0]) + + @property + def shape(self): + return getattr(self.data, "shape", None) + + @property + def cvalues(self): + """ return my cython values """ + return self.data + + def validate_attr(self, append): + """validate that we have the same order as the existing & same dtype""" + if append: + existing_fields = getattr(self.attrs, self.kind_attr, None) + if existing_fields is not None and existing_fields != list(self.values): + raise ValueError("appended items do not match existing items in table!") + + existing_dtype = getattr(self.attrs, self.dtype_attr, None) + if existing_dtype is not None and existing_dtype != self.dtype: + raise ValueError( + "appended items dtype do not match existing " + "items dtype in table!" + ) + + def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): + """ + Convert the data from this selection to the appropriate pandas type. + + Parameters + ---------- + values : np.ndarray + nan_rep : + encoding : str + errors : str + + Returns + ------- + index : listlike to become an Index + data : ndarraylike to become a column + """ + assert isinstance(values, np.ndarray), type(values) + + # values is a recarray + if values.dtype.fields is not None: + values = values[self.cname] + + assert self.typ is not None + if self.dtype is None: + # Note: in tests we never have timedelta64 or datetime64, + # so the _get_data_and_dtype_name may be unnecessary + converted, dtype_name = _get_data_and_dtype_name(values) + kind = _dtype_to_kind(dtype_name) + else: + converted = values + dtype_name = self.dtype + kind = self.kind + + assert isinstance(converted, np.ndarray) # for mypy + + # use the meta if needed + meta = _ensure_decoded(self.meta) + metadata = self.metadata + ordered = self.ordered + tz = self.tz + + assert dtype_name is not None + # convert to the correct dtype + dtype = _ensure_decoded(dtype_name) + + # reverse converts + if dtype == "datetime64": + + # recreate with tz if indicated + converted = _set_tz(converted, tz, coerce=True) + + elif dtype == "timedelta64": + converted = np.asarray(converted, dtype="m8[ns]") + elif dtype == "date": + try: + converted = np.asarray( + [date.fromordinal(v) for v in converted], dtype=object + ) + except ValueError: + converted = np.asarray( + [date.fromtimestamp(v) for v in converted], dtype=object + ) + + elif meta == "category": + + # we have a categorical + categories = metadata + codes = converted.ravel() + + # if we have stored a NaN in the categories + # then strip it; in theory we could have BOTH + # -1s in the codes and nulls :< + if categories is None: + # Handle case of NaN-only categorical columns in which case + # the categories are an empty array; when this is stored, + # pytables cannot write a zero-len array, so on readback + # the categories would be None and `read_hdf()` would fail. + categories = Index([], dtype=np.float64) + else: + mask = isna(categories) + if mask.any(): + categories = categories[~mask] + codes[codes != -1] -= mask.astype(int).cumsum().values + + converted = Categorical.from_codes( + codes, categories=categories, ordered=ordered + ) + + else: + + try: + converted = converted.astype(dtype, copy=False) + except TypeError: + converted = converted.astype("O", copy=False) + + # convert nans / decode + if _ensure_decoded(kind) == "string": + converted = _unconvert_string_array( + converted, nan_rep=nan_rep, encoding=encoding, errors=errors + ) + + return self.values, converted + + def set_attr(self): + """ set the data for this column """ + setattr(self.attrs, self.kind_attr, self.values) + setattr(self.attrs, self.meta_attr, self.meta) + assert self.dtype is not None + setattr(self.attrs, self.dtype_attr, self.dtype) + + +class DataIndexableCol(DataCol): + """ represent a data column that can be indexed """ + + is_data_indexable = True + + def validate_names(self): + if not Index(self.values).is_object(): + # TODO: should the message here be more specifically non-str? + raise ValueError("cannot have non-object label DataIndexableCol") + + @classmethod + def get_atom_string(cls, shape, itemsize): + return _tables().StringCol(itemsize=itemsize) + + @classmethod + def get_atom_data(cls, shape, kind: str) -> "Col": + return cls.get_atom_coltype(kind=kind)() + + @classmethod + def get_atom_datetime64(cls, shape): + return _tables().Int64Col() + + @classmethod + def get_atom_timedelta64(cls, shape): + return _tables().Int64Col() + + +class GenericDataIndexableCol(DataIndexableCol): + """ represent a generic pytables data column """ + + pass + + +class Fixed: + """ represent an object in my store + facilitate read/write of various types of objects + this is an abstract base class + + Parameters + ---------- + parent : HDFStore + group : Node + The group node where the table resides. + """ + + pandas_kind: str + format_type: str = "fixed" # GH#30962 needed by dask + obj_type: Type[Union[DataFrame, Series]] + ndim: int + encoding: str + parent: HDFStore + group: "Node" + errors: str + is_table = False + + def __init__( + self, + parent: HDFStore, + group: "Node", + encoding: str = "UTF-8", + errors: str = "strict", + ): + assert isinstance(parent, HDFStore), type(parent) + assert _table_mod is not None # needed for mypy + assert isinstance(group, _table_mod.Node), type(group) + self.parent = parent + self.group = group + self.encoding = _ensure_encoding(encoding) + self.errors = errors + + @property + def is_old_version(self) -> bool: + return self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1 + + @property + def version(self) -> Tuple[int, int, int]: + """ compute and set our version """ + version = _ensure_decoded(getattr(self.group._v_attrs, "pandas_version", None)) + try: + version = tuple(int(x) for x in version.split(".")) + if len(version) == 2: + version = version + (0,) + except AttributeError: + version = (0, 0, 0) + return version + + @property + def pandas_type(self): + return _ensure_decoded(getattr(self.group._v_attrs, "pandas_type", None)) + + def __repr__(self) -> str: + """ return a pretty representation of myself """ + self.infer_axes() + s = self.shape + if s is not None: + if isinstance(s, (list, tuple)): + jshape = ",".join(pprint_thing(x) for x in s) + s = f"[{jshape}]" + return f"{self.pandas_type:12.12} (shape->{s})" + return self.pandas_type + + def set_object_info(self): + """ set my pandas type & version """ + self.attrs.pandas_type = str(self.pandas_kind) + self.attrs.pandas_version = str(_version) + + def copy(self): + new_self = copy.copy(self) + return new_self + + @property + def shape(self): + return self.nrows + + @property + def pathname(self): + return self.group._v_pathname + + @property + def _handle(self): + return self.parent._handle + + @property + def _filters(self): + return self.parent._filters + + @property + def _complevel(self) -> int: + return self.parent._complevel + + @property + def _fletcher32(self) -> bool: + return self.parent._fletcher32 + + @property + def attrs(self): + return self.group._v_attrs + + def set_attrs(self): + """ set our object attributes """ + pass + + def get_attrs(self): + """ get our object attributes """ + pass + + @property + def storable(self): + """ return my storable """ + return self.group + + @property + def is_exists(self) -> bool: + return False + + @property + def nrows(self): + return getattr(self.storable, "nrows", None) + + def validate(self, other): + """ validate against an existing storable """ + if other is None: + return + return True + + def validate_version(self, where=None): + """ are we trying to operate on an old version? """ + return True + + def infer_axes(self): + """ infer the axes of my storer + return a boolean indicating if we have a valid storer or not """ + + s = self.storable + if s is None: + return False + self.get_attrs() + return True + + def read( + self, + where=None, + columns=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): + raise NotImplementedError( + "cannot read on an abstract storer: subclasses should implement" + ) + + def write(self, **kwargs): + raise NotImplementedError( + "cannot write on an abstract storer: subclasses should implement" + ) + + def delete( + self, where=None, start: Optional[int] = None, stop: Optional[int] = None + ): + """ + support fully deleting the node in its entirety (only) - where + specification must be None + """ + if com.all_none(where, start, stop): + self._handle.remove_node(self.group, recursive=True) + return None + + raise TypeError("cannot delete on an abstract storer") + + +class GenericFixed(Fixed): + """ a generified fixed version """ + + _index_type_map = {DatetimeIndex: "datetime", PeriodIndex: "period"} + _reverse_index_map = {v: k for k, v in _index_type_map.items()} + attributes: List[str] = [] + + # indexer helpders + def _class_to_alias(self, cls) -> str: + return self._index_type_map.get(cls, "") + + def _alias_to_class(self, alias): + if isinstance(alias, type): # pragma: no cover + # compat: for a short period of time master stored types + return alias + return self._reverse_index_map.get(alias, Index) + + def _get_index_factory(self, klass): + if klass == DatetimeIndex: + + def f(values, freq=None, tz=None): + # data are already in UTC, localize and convert if tz present + result = DatetimeIndex._simple_new(values.values, name=None, freq=freq) + if tz is not None: + result = result.tz_localize("UTC").tz_convert(tz) + return result + + return f + elif klass == PeriodIndex: + + def f(values, freq=None, tz=None): + return PeriodIndex._simple_new(values, name=None, freq=freq) + + return f + + return klass + + def validate_read(self, columns, where): + """ + raise if any keywords are passed which are not-None + """ + if columns is not None: + raise TypeError( + "cannot pass a column specification when reading " + "a Fixed format store. this store must be " + "selected in its entirety" + ) + if where is not None: + raise TypeError( + "cannot pass a where specification when reading " + "from a Fixed format store. this store must be " + "selected in its entirety" + ) + + @property + def is_exists(self) -> bool: + return True + + def set_attrs(self): + """ set our object attributes """ + self.attrs.encoding = self.encoding + self.attrs.errors = self.errors + + def get_attrs(self): + """ retrieve our attributes """ + self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None)) + self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict")) + for n in self.attributes: + setattr(self, n, _ensure_decoded(getattr(self.attrs, n, None))) + + def write(self, obj, **kwargs): + self.set_attrs() + + def read_array( + self, key: str, start: Optional[int] = None, stop: Optional[int] = None + ): + """ read an array for the specified node (off of group """ + import tables + + node = getattr(self.group, key) + attrs = node._v_attrs + + transposed = getattr(attrs, "transposed", False) + + if isinstance(node, tables.VLArray): + ret = node[0][start:stop] + else: + dtype = getattr(attrs, "value_type", None) + shape = getattr(attrs, "shape", None) + + if shape is not None: + # length 0 axis + ret = np.empty(shape, dtype=dtype) + else: + ret = node[start:stop] + + if dtype == "datetime64": + + # reconstruct a timezone if indicated + tz = getattr(attrs, "tz", None) + ret = _set_tz(ret, tz, coerce=True) + + elif dtype == "timedelta64": + ret = np.asarray(ret, dtype="m8[ns]") + + if transposed: + return ret.T + else: + return ret + + def read_index( + self, key: str, start: Optional[int] = None, stop: Optional[int] = None + ) -> Index: + variety = _ensure_decoded(getattr(self.attrs, f"{key}_variety")) + + if variety == "multi": + return self.read_multi_index(key, start=start, stop=stop) + elif variety == "regular": + node = getattr(self.group, key) + index = self.read_index_node(node, start=start, stop=stop) + return index + else: # pragma: no cover + raise TypeError(f"unrecognized index variety: {variety}") + + def write_index(self, key: str, index: Index): + if isinstance(index, MultiIndex): + setattr(self.attrs, f"{key}_variety", "multi") + self.write_multi_index(key, index) + else: + setattr(self.attrs, f"{key}_variety", "regular") + converted = _convert_index("index", index, self.encoding, self.errors) + + self.write_array(key, converted.values) + + node = getattr(self.group, key) + node._v_attrs.kind = converted.kind + node._v_attrs.name = index.name + + if isinstance(index, (DatetimeIndex, PeriodIndex)): + node._v_attrs.index_class = self._class_to_alias(type(index)) + + if isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + node._v_attrs.freq = index.freq + + if isinstance(index, DatetimeIndex) and index.tz is not None: + node._v_attrs.tz = _get_tz(index.tz) + + def write_multi_index(self, key: str, index: MultiIndex): + setattr(self.attrs, f"{key}_nlevels", index.nlevels) + + for i, (lev, level_codes, name) in enumerate( + zip(index.levels, index.codes, index.names) + ): + # write the level + if is_extension_array_dtype(lev): + raise NotImplementedError( + "Saving a MultiIndex with an extension dtype is not supported." + ) + level_key = f"{key}_level{i}" + conv_level = _convert_index(level_key, lev, self.encoding, self.errors) + self.write_array(level_key, conv_level.values) + node = getattr(self.group, level_key) + node._v_attrs.kind = conv_level.kind + node._v_attrs.name = name + + # write the name + setattr(node._v_attrs, f"{key}_name{name}", name) + + # write the labels + label_key = f"{key}_label{i}" + self.write_array(label_key, level_codes) + + def read_multi_index( + self, key: str, start: Optional[int] = None, stop: Optional[int] = None + ) -> MultiIndex: + nlevels = getattr(self.attrs, f"{key}_nlevels") + + levels = [] + codes = [] + names: List[Optional[Hashable]] = [] + for i in range(nlevels): + level_key = f"{key}_level{i}" + node = getattr(self.group, level_key) + lev = self.read_index_node(node, start=start, stop=stop) + levels.append(lev) + names.append(lev.name) + + label_key = f"{key}_label{i}" + level_codes = self.read_array(label_key, start=start, stop=stop) + codes.append(level_codes) + + return MultiIndex( + levels=levels, codes=codes, names=names, verify_integrity=True + ) + + def read_index_node( + self, node: "Node", start: Optional[int] = None, stop: Optional[int] = None + ) -> Index: + data = node[start:stop] + # If the index was an empty array write_array_empty() will + # have written a sentinel. Here we relace it with the original. + if "shape" in node._v_attrs and np.prod(node._v_attrs.shape) == 0: + data = np.empty(node._v_attrs.shape, dtype=node._v_attrs.value_type,) + kind = _ensure_decoded(node._v_attrs.kind) + name = None + + if "name" in node._v_attrs: + name = _ensure_str(node._v_attrs.name) + name = _ensure_decoded(name) + + index_class = self._alias_to_class( + _ensure_decoded(getattr(node._v_attrs, "index_class", "")) + ) + factory = self._get_index_factory(index_class) + + kwargs = {} + if "freq" in node._v_attrs: + kwargs["freq"] = node._v_attrs["freq"] + + if "tz" in node._v_attrs: + if isinstance(node._v_attrs["tz"], bytes): + # created by python2 + kwargs["tz"] = node._v_attrs["tz"].decode("utf-8") + else: + # created by python3 + kwargs["tz"] = node._v_attrs["tz"] + + if kind == "date": + index = factory( + _unconvert_index( + data, kind, encoding=self.encoding, errors=self.errors + ), + dtype=object, + **kwargs, + ) + else: + index = factory( + _unconvert_index( + data, kind, encoding=self.encoding, errors=self.errors + ), + **kwargs, + ) + + index.name = name + + return index + + def write_array_empty(self, key: str, value: ArrayLike): + """ write a 0-len array """ + + # ugly hack for length 0 axes + arr = np.empty((1,) * value.ndim) + self._handle.create_array(self.group, key, arr) + node = getattr(self.group, key) + node._v_attrs.value_type = str(value.dtype) + node._v_attrs.shape = value.shape + + def write_array(self, key: str, value: ArrayLike, items: Optional[Index] = None): + # TODO: we only have one test that gets here, the only EA + # that gets passed is DatetimeArray, and we never have + # both self._filters and EA + assert isinstance(value, (np.ndarray, ABCExtensionArray)), type(value) + + if key in self.group: + self._handle.remove_node(self.group, key) + + # Transform needed to interface with pytables row/col notation + empty_array = value.size == 0 + transposed = False + + if is_categorical_dtype(value): + raise NotImplementedError( + "Cannot store a category dtype in " + "a HDF5 dataset that uses format=" + '"fixed". Use format="table".' + ) + if not empty_array: + if hasattr(value, "T"): + # ExtensionArrays (1d) may not have transpose. + value = value.T + transposed = True + + atom = None + if self._filters is not None: + try: + # get the atom for this datatype + atom = _tables().Atom.from_dtype(value.dtype) + except ValueError: + pass + + if atom is not None: + # We only get here if self._filters is non-None and + # the Atom.from_dtype call succeeded + + # create an empty chunked array and fill it from value + if not empty_array: + ca = self._handle.create_carray( + self.group, key, atom, value.shape, filters=self._filters + ) + ca[:] = value + + else: + self.write_array_empty(key, value) + + elif value.dtype.type == np.object_: + + # infer the type, warn if we have a non-string type here (for + # performance) + inferred_type = lib.infer_dtype(value.ravel(), skipna=False) + if empty_array: + pass + elif inferred_type == "string": + pass + else: + ws = performance_doc % (inferred_type, key, items) + warnings.warn(ws, PerformanceWarning, stacklevel=7) + + vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom()) + vlarr.append(value) + + elif empty_array: + self.write_array_empty(key, value) + elif is_datetime64_dtype(value.dtype): + self._handle.create_array(self.group, key, value.view("i8")) + getattr(self.group, key)._v_attrs.value_type = "datetime64" + elif is_datetime64tz_dtype(value.dtype): + # store as UTC + # with a zone + self._handle.create_array(self.group, key, value.asi8) + + node = getattr(self.group, key) + node._v_attrs.tz = _get_tz(value.tz) + node._v_attrs.value_type = "datetime64" + elif is_timedelta64_dtype(value.dtype): + self._handle.create_array(self.group, key, value.view("i8")) + getattr(self.group, key)._v_attrs.value_type = "timedelta64" + else: + self._handle.create_array(self.group, key, value) + + getattr(self.group, key)._v_attrs.transposed = transposed + + +class SeriesFixed(GenericFixed): + pandas_kind = "series" + attributes = ["name"] + + name: Optional[Hashable] + + @property + def shape(self): + try: + return (len(self.group.values),) + except (TypeError, AttributeError): + return None + + def read( + self, + where=None, + columns=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): + self.validate_read(columns, where) + index = self.read_index("index", start=start, stop=stop) + values = self.read_array("values", start=start, stop=stop) + return Series(values, index=index, name=self.name) + + def write(self, obj, **kwargs): + super().write(obj, **kwargs) + self.write_index("index", obj.index) + self.write_array("values", obj.values) + self.attrs.name = obj.name + + +class BlockManagerFixed(GenericFixed): + attributes = ["ndim", "nblocks"] + + nblocks: int + + @property + def shape(self): + try: + ndim = self.ndim + + # items + items = 0 + for i in range(self.nblocks): + node = getattr(self.group, f"block{i}_items") + shape = getattr(node, "shape", None) + if shape is not None: + items += shape[0] + + # data shape + node = self.group.block0_values + shape = getattr(node, "shape", None) + if shape is not None: + shape = list(shape[0 : (ndim - 1)]) + else: + shape = [] + + shape.append(items) + + return shape + except AttributeError: + return None + + def read( + self, + where=None, + columns=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): + # start, stop applied to rows, so 0th axis only + self.validate_read(columns, where) + select_axis = self.obj_type()._get_block_manager_axis(0) + + axes = [] + for i in range(self.ndim): + + _start, _stop = (start, stop) if i == select_axis else (None, None) + ax = self.read_index(f"axis{i}", start=_start, stop=_stop) + axes.append(ax) + + items = axes[0] + dfs = [] + + for i in range(self.nblocks): + + blk_items = self.read_index(f"block{i}_items") + values = self.read_array(f"block{i}_values", start=_start, stop=_stop) + + columns = items[items.get_indexer(blk_items)] + df = DataFrame(values.T, columns=columns, index=axes[1]) + dfs.append(df) + + if len(dfs) > 0: + out = concat(dfs, axis=1) + out = out.reindex(columns=items, copy=False) + return out + + return DataFrame(columns=axes[0], index=axes[1]) + + def write(self, obj, **kwargs): + super().write(obj, **kwargs) + data = obj._data + if not data.is_consolidated(): + data = data.consolidate() + + self.attrs.ndim = data.ndim + for i, ax in enumerate(data.axes): + if i == 0: + if not ax.is_unique: + raise ValueError("Columns index has to be unique for fixed format") + self.write_index(f"axis{i}", ax) + + # Supporting mixed-type DataFrame objects...nontrivial + self.attrs.nblocks = len(data.blocks) + for i, blk in enumerate(data.blocks): + # I have no idea why, but writing values before items fixed #2299 + blk_items = data.items.take(blk.mgr_locs) + self.write_array(f"block{i}_values", blk.values, items=blk_items) + self.write_index(f"block{i}_items", blk_items) + + +class FrameFixed(BlockManagerFixed): + pandas_kind = "frame" + obj_type = DataFrame + + +class Table(Fixed): + """ represent a table: + facilitate read/write of various types of tables + + Attrs in Table Node + ------------------- + These are attributes that are store in the main table node, they are + necessary to recreate these tables when read back in. + + index_axes : a list of tuples of the (original indexing axis and + index column) + non_index_axes: a list of tuples of the (original index axis and + columns on a non-indexing axis) + values_axes : a list of the columns which comprise the data of this + table + data_columns : a list of the columns that we are allowing indexing + (these become single columns in values_axes), or True to force all + columns + nan_rep : the string to use for nan representations for string + objects + levels : the names of levels + metadata : the names of the metadata columns + + """ + + pandas_kind = "wide_table" + format_type: str = "table" # GH#30962 needed by dask + table_type: str + levels = 1 + is_table = True + + index_axes: List[IndexCol] + non_index_axes: List[Tuple[int, Any]] + values_axes: List[DataCol] + data_columns: List + metadata: List + info: Dict + + def __init__( + self, + parent: HDFStore, + group: "Node", + encoding=None, + errors: str = "strict", + index_axes=None, + non_index_axes=None, + values_axes=None, + data_columns=None, + info=None, + nan_rep=None, + ): + super().__init__(parent, group, encoding=encoding, errors=errors) + self.index_axes = index_axes or [] + self.non_index_axes = non_index_axes or [] + self.values_axes = values_axes or [] + self.data_columns = data_columns or [] + self.info = info or dict() + self.nan_rep = nan_rep + + @property + def table_type_short(self) -> str: + return self.table_type.split("_")[0] + + def __repr__(self) -> str: + """ return a pretty representation of myself """ + self.infer_axes() + jdc = ",".join(self.data_columns) if len(self.data_columns) else "" + dc = f",dc->[{jdc}]" + + ver = "" + if self.is_old_version: + jver = ".".join(str(x) for x in self.version) + ver = f"[{jver}]" + + jindex_axes = ",".join(a.name for a in self.index_axes) + return ( + f"{self.pandas_type:12.12}{ver} " + f"(typ->{self.table_type_short},nrows->{self.nrows}," + f"ncols->{self.ncols},indexers->[{jindex_axes}]{dc})" + ) + + def __getitem__(self, c: str): + """ return the axis for c """ + for a in self.axes: + if c == a.name: + return a + return None + + def validate(self, other): + """ validate against an existing table """ + if other is None: + return + + if other.table_type != self.table_type: + raise TypeError( + "incompatible table_type with existing " + f"[{other.table_type} - {self.table_type}]" + ) + + for c in ["index_axes", "non_index_axes", "values_axes"]: + sv = getattr(self, c, None) + ov = getattr(other, c, None) + if sv != ov: + + # show the error for the specific axes + for i, sax in enumerate(sv): + oax = ov[i] + if sax != oax: + raise ValueError( + f"invalid combination of [{c}] on appending data " + f"[{sax}] vs current table [{oax}]" + ) + + # should never get here + raise Exception( + f"invalid combination of [{c}] on appending data [{sv}] vs " + f"current table [{ov}]" + ) + + @property + def is_multi_index(self) -> bool: + """the levels attribute is 1 or a list in the case of a multi-index""" + return isinstance(self.levels, list) + + def validate_multiindex(self, obj): + """validate that we can store the multi-index; reset and return the + new object + """ + levels = [ + l if l is not None else f"level_{i}" for i, l in enumerate(obj.index.names) + ] + try: + return obj.reset_index(), levels + except ValueError: + raise ValueError( + "duplicate names/columns in the multi-index when storing as a table" + ) + + @property + def nrows_expected(self) -> int: + """ based on our axes, compute the expected nrows """ + return np.prod([i.cvalues.shape[0] for i in self.index_axes]) + + @property + def is_exists(self) -> bool: + """ has this table been created """ + return "table" in self.group + + @property + def storable(self): + return getattr(self.group, "table", None) + + @property + def table(self): + """ return the table group (this is my storable) """ + return self.storable + + @property + def dtype(self): + return self.table.dtype + + @property + def description(self): + return self.table.description + + @property + def axes(self): + return itertools.chain(self.index_axes, self.values_axes) + + @property + def ncols(self) -> int: + """ the number of total columns in the values axes """ + return sum(len(a.values) for a in self.values_axes) + + @property + def is_transposed(self) -> bool: + return False + + @property + def data_orientation(self): + """return a tuple of my permutated axes, non_indexable at the front""" + return tuple( + itertools.chain( + [int(a[0]) for a in self.non_index_axes], + [int(a.axis) for a in self.index_axes], + ) + ) + + def queryables(self) -> Dict[str, Any]: + """ return a dict of the kinds allowable columns for this object """ + + # mypy doesn't recognize DataFrame._AXIS_NAMES, so we re-write it here + axis_names = {0: "index", 1: "columns"} + + # compute the values_axes queryables + d1 = [(a.cname, a) for a in self.index_axes] + d2 = [(axis_names[axis], None) for axis, values in self.non_index_axes] + d3 = [ + (v.cname, v) for v in self.values_axes if v.name in set(self.data_columns) + ] + + return dict(d1 + d2 + d3) # type: ignore + # error: List comprehension has incompatible type + # List[Tuple[Any, None]]; expected List[Tuple[str, IndexCol]] + + def index_cols(self): + """ return a list of my index cols """ + # Note: each `i.cname` below is assured to be a str. + return [(i.axis, i.cname) for i in self.index_axes] + + def values_cols(self) -> List[str]: + """ return a list of my values cols """ + return [i.cname for i in self.values_axes] + + def _get_metadata_path(self, key: str) -> str: + """ return the metadata pathname for this key """ + group = self.group._v_pathname + return f"{group}/meta/{key}/meta" + + def write_metadata(self, key: str, values: np.ndarray): + """ + Write out a metadata array to the key as a fixed-format Series. + + Parameters + ---------- + key : str + values : ndarray + """ + values = Series(values) + self.parent.put( + self._get_metadata_path(key), + values, + format="table", + encoding=self.encoding, + errors=self.errors, + nan_rep=self.nan_rep, + ) + + def read_metadata(self, key: str): + """ return the meta data array for this key """ + if getattr(getattr(self.group, "meta", None), key, None) is not None: + return self.parent.select(self._get_metadata_path(key)) + return None + + def set_attrs(self): + """ set our table type & indexables """ + self.attrs.table_type = str(self.table_type) + self.attrs.index_cols = self.index_cols() + self.attrs.values_cols = self.values_cols() + self.attrs.non_index_axes = self.non_index_axes + self.attrs.data_columns = self.data_columns + self.attrs.nan_rep = self.nan_rep + self.attrs.encoding = self.encoding + self.attrs.errors = self.errors + self.attrs.levels = self.levels + self.attrs.info = self.info + + def get_attrs(self): + """ retrieve our attributes """ + self.non_index_axes = getattr(self.attrs, "non_index_axes", None) or [] + self.data_columns = getattr(self.attrs, "data_columns", None) or [] + self.info = getattr(self.attrs, "info", None) or dict() + self.nan_rep = getattr(self.attrs, "nan_rep", None) + self.encoding = _ensure_encoding(getattr(self.attrs, "encoding", None)) + self.errors = _ensure_decoded(getattr(self.attrs, "errors", "strict")) + self.levels = getattr(self.attrs, "levels", None) or [] + self.index_axes = [a for a in self.indexables if a.is_an_indexable] + self.values_axes = [a for a in self.indexables if not a.is_an_indexable] + + def validate_version(self, where=None): + """ are we trying to operate on an old version? """ + if where is not None: + if self.version[0] <= 0 and self.version[1] <= 10 and self.version[2] < 1: + ws = incompatibility_doc % ".".join([str(x) for x in self.version]) + warnings.warn(ws, IncompatibilityWarning) + + def validate_min_itemsize(self, min_itemsize): + """validate the min_itemsize doesn't contain items that are not in the + axes this needs data_columns to be defined + """ + if min_itemsize is None: + return + if not isinstance(min_itemsize, dict): + return + + q = self.queryables() + for k, v in min_itemsize.items(): + + # ok, apply generally + if k == "values": + continue + if k not in q: + raise ValueError( + f"min_itemsize has the key [{k}] which is not an axis or " + "data_column" + ) + + @cache_readonly + def indexables(self): + """ create/cache the indexables if they don't exist """ + _indexables = [] + + desc = self.description + table_attrs = self.table.attrs + + # Note: each of the `name` kwargs below are str, ensured + # by the definition in index_cols. + # index columns + for i, (axis, name) in enumerate(self.attrs.index_cols): + atom = getattr(desc, name) + md = self.read_metadata(name) + meta = "category" if md is not None else None + + kind_attr = f"{name}_kind" + kind = getattr(table_attrs, kind_attr, None) + + index_col = IndexCol( + name=name, + axis=axis, + pos=i, + kind=kind, + typ=atom, + table=self.table, + meta=meta, + metadata=md, + ) + _indexables.append(index_col) + + # values columns + dc = set(self.data_columns) + base_pos = len(_indexables) + + def f(i, c): + assert isinstance(c, str) + klass = DataCol + if c in dc: + klass = DataIndexableCol + + atom = getattr(desc, c) + adj_name = _maybe_adjust_name(c, self.version) + + # TODO: why kind_attr here? + values = getattr(table_attrs, f"{adj_name}_kind", None) + dtype = getattr(table_attrs, f"{adj_name}_dtype", None) + kind = _dtype_to_kind(dtype) + + md = self.read_metadata(c) + # TODO: figure out why these two versions of `meta` dont always match. + # meta = "category" if md is not None else None + meta = getattr(table_attrs, f"{adj_name}_meta", None) + + obj = klass( + name=adj_name, + cname=c, + values=values, + kind=kind, + pos=base_pos + i, + typ=atom, + table=self.table, + meta=meta, + metadata=md, + dtype=dtype, + ) + return obj + + # Note: the definition of `values_cols` ensures that each + # `c` below is a str. + _indexables.extend([f(i, c) for i, c in enumerate(self.attrs.values_cols)]) + + return _indexables + + def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None): + """ + Create a pytables index on the specified columns. + + Parameters + ---------- + columns : None, bool, or listlike[str] + Indicate which columns to create an index on. + + * False : Do not create any indexes. + * True : Create indexes on all columns. + * None : Create indexes on all columns. + * listlike : Create indexes on the given columns. + + optlevel : int or None, default None + Optimization level, if None, pytables defaults to 6. + kind : str or None, default None + Kind of index, if None, pytables defaults to "medium". + + Raises + ------ + TypeError if trying to create an index on a complex-type column. + + Notes + ----- + Cannot index Time64Col or ComplexCol. + Pytables must be >= 3.0. + """ + + if not self.infer_axes(): + return + if columns is False: + return + + # index all indexables and data_columns + if columns is None or columns is True: + columns = [a.cname for a in self.axes if a.is_data_indexable] + if not isinstance(columns, (tuple, list)): + columns = [columns] + + kw = dict() + if optlevel is not None: + kw["optlevel"] = optlevel + if kind is not None: + kw["kind"] = kind + + table = self.table + for c in columns: + v = getattr(table.cols, c, None) + if v is not None: + + # remove the index if the kind/optlevel have changed + if v.is_indexed: + index = v.index + cur_optlevel = index.optlevel + cur_kind = index.kind + + if kind is not None and cur_kind != kind: + v.remove_index() + else: + kw["kind"] = cur_kind + + if optlevel is not None and cur_optlevel != optlevel: + v.remove_index() + else: + kw["optlevel"] = cur_optlevel + + # create the index + if not v.is_indexed: + if v.type.startswith("complex"): + raise TypeError( + "Columns containing complex values can be stored but " + "cannot be indexed when using table format. Either use " + "fixed format, set index=False, or do not include " + "the columns containing complex values to " + "data_columns when initializing the table." + ) + v.create_index(**kw) + + def _read_axes( + self, where, start: Optional[int] = None, stop: Optional[int] = None + ) -> List[Tuple[ArrayLike, ArrayLike]]: + """ + Create the axes sniffed from the table. + + Parameters + ---------- + where : ??? + start : int or None, default None + stop : int or None, default None + + Returns + ------- + List[Tuple[index_values, column_values]] + """ + + # create the selection + selection = Selection(self, where=where, start=start, stop=stop) + values = selection.select() + + results = [] + # convert the data + for a in self.axes: + a.set_info(self.info) + res = a.convert( + values, + nan_rep=self.nan_rep, + encoding=self.encoding, + errors=self.errors, + ) + results.append(res) + + return results + + @classmethod + def get_object(cls, obj, transposed: bool): + """ return the data for this obj """ + return obj + + def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): + """take the input data_columns and min_itemize and create a data + columns spec + """ + + if not len(non_index_axes): + return [] + + axis, axis_labels = non_index_axes[0] + info = self.info.get(axis, dict()) + if info.get("type") == "MultiIndex" and data_columns: + raise ValueError( + f"cannot use a multi-index on axis [{axis}] with " + f"data_columns {data_columns}" + ) + + # evaluate the passed data_columns, True == use all columns + # take only valide axis labels + if data_columns is True: + data_columns = list(axis_labels) + elif data_columns is None: + data_columns = [] + + # if min_itemsize is a dict, add the keys (exclude 'values') + if isinstance(min_itemsize, dict): + + existing_data_columns = set(data_columns) + data_columns = list(data_columns) # ensure we do not modify + data_columns.extend( + [ + k + for k in min_itemsize.keys() + if k != "values" and k not in existing_data_columns + ] + ) + + # return valid columns in the order of our axis + return [c for c in data_columns if c in axis_labels] + + def _create_axes( + self, + axes, + obj: DataFrame, + validate: bool = True, + nan_rep=None, + data_columns=None, + min_itemsize=None, + ): + """ + Create and return the axes. + + Parameters + ---------- + axes: list or None + The names or numbers of the axes to create. + obj : DataFrame + The object to create axes on. + validate: bool, default True + Whether to validate the obj against an existing object already written. + nan_rep : + A value to use for string column nan_rep. + data_columns : List[str], True, or None, default None + Specify the columns that we want to create to allow indexing on. + + * True : Use all available columns. + * None : Use no columns. + * List[str] : Use the specified columns. + + min_itemsize: Dict[str, int] or None, default None + The min itemsize for a column in bytes. + """ + + if not isinstance(obj, DataFrame): + group = self.group._v_name + raise TypeError( + f"cannot properly create the storer for: [group->{group}," + f"value->{type(obj)}]" + ) + + # set the default axes if needed + if axes is None: + axes = [0] + + # map axes to numbers + axes = [obj._get_axis_number(a) for a in axes] + + # do we have an existing table (if so, use its axes & data_columns) + if self.infer_axes(): + table_exists = True + axes = [a.axis for a in self.index_axes] + data_columns = list(self.data_columns) + nan_rep = self.nan_rep + # TODO: do we always have validate=True here? + else: + table_exists = False + + new_info = self.info + + assert self.ndim == 2 # with next check, we must have len(axes) == 1 + # currently support on ndim-1 axes + if len(axes) != self.ndim - 1: + raise ValueError( + "currently only support ndim-1 indexers in an AppendableTable" + ) + + # create according to the new data + new_non_index_axes: List = [] + + # nan_representation + if nan_rep is None: + nan_rep = "nan" + + # We construct the non-index-axis first, since that alters new_info + idx = [x for x in [0, 1] if x not in axes][0] + + a = obj.axes[idx] + # we might be able to change the axes on the appending data if necessary + append_axis = list(a) + if table_exists: + indexer = len(new_non_index_axes) # i.e. 0 + exist_axis = self.non_index_axes[indexer][1] + if not array_equivalent(np.array(append_axis), np.array(exist_axis)): + + # ahah! -> reindex + if array_equivalent( + np.array(sorted(append_axis)), np.array(sorted(exist_axis)) + ): + append_axis = exist_axis + + # the non_index_axes info + info = new_info.setdefault(idx, {}) + info["names"] = list(a.names) + info["type"] = type(a).__name__ + + new_non_index_axes.append((idx, append_axis)) + + # Now we can construct our new index axis + idx = axes[0] + a = obj.axes[idx] + axis_name = obj._AXIS_NAMES[idx] + new_index = _convert_index(axis_name, a, self.encoding, self.errors) + new_index.axis = idx + + # Because we are always 2D, there is only one new_index, so + # we know it will have pos=0 + new_index.set_pos(0) + new_index.update_info(new_info) + new_index.maybe_set_size(min_itemsize) # check for column conflicts + + new_index_axes = [new_index] + j = len(new_index_axes) # i.e. 1 + assert j == 1 + + # reindex by our non_index_axes & compute data_columns + assert len(new_non_index_axes) == 1 + for a in new_non_index_axes: + obj = _reindex_axis(obj, a[0], a[1]) + + def get_blk_items(mgr, blocks): + return [mgr.items.take(blk.mgr_locs) for blk in blocks] + + transposed = new_index.axis == 1 + + # figure out data_columns and get out blocks + data_columns = self.validate_data_columns( + data_columns, min_itemsize, new_non_index_axes + ) + + block_obj = self.get_object(obj, transposed)._consolidate() + + blocks, blk_items = self._get_blocks_and_items( + block_obj, table_exists, new_non_index_axes, self.values_axes, data_columns + ) + + # add my values + vaxes = [] + for i, (b, b_items) in enumerate(zip(blocks, blk_items)): + + # shape of the data column are the indexable axes + klass = DataCol + name = None + + # we have a data_column + if data_columns and len(b_items) == 1 and b_items[0] in data_columns: + klass = DataIndexableCol + name = b_items[0] + if not (name is None or isinstance(name, str)): + # TODO: should the message here be more specifically non-str? + raise ValueError("cannot have non-object label DataIndexableCol") + + # make sure that we match up the existing columns + # if we have an existing table + existing_col: Optional[DataCol] + + if table_exists and validate: + try: + existing_col = self.values_axes[i] + except (IndexError, KeyError): + raise ValueError( + f"Incompatible appended table [{blocks}]" + f"with existing table [{self.values_axes}]" + ) + else: + existing_col = None + + new_name = name or f"values_block_{i}" + data_converted = _maybe_convert_for_string_atom( + new_name, + b, + existing_col=existing_col, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + encoding=self.encoding, + errors=self.errors, + ) + adj_name = _maybe_adjust_name(new_name, self.version) + + typ = klass._get_atom(data_converted) + kind = _dtype_to_kind(data_converted.dtype.name) + tz = _get_tz(data_converted.tz) if hasattr(data_converted, "tz") else None + + meta = metadata = ordered = None + if is_categorical_dtype(data_converted): + ordered = data_converted.ordered + meta = "category" + metadata = np.array(data_converted.categories, copy=False).ravel() + + data, dtype_name = _get_data_and_dtype_name(data_converted) + + col = klass( + name=adj_name, + cname=new_name, + values=list(b_items), + typ=typ, + pos=j, + kind=kind, + tz=tz, + ordered=ordered, + meta=meta, + metadata=metadata, + dtype=dtype_name, + data=data, + ) + col.update_info(new_info) + + vaxes.append(col) + + j += 1 + + dcs = [col.name for col in vaxes if col.is_data_indexable] + + new_table = type(self)( + parent=self.parent, + group=self.group, + encoding=self.encoding, + errors=self.errors, + index_axes=new_index_axes, + non_index_axes=new_non_index_axes, + values_axes=vaxes, + data_columns=dcs, + info=new_info, + nan_rep=nan_rep, + ) + if hasattr(self, "levels"): + # TODO: get this into constructor, only for appropriate subclass + new_table.levels = self.levels + + new_table.validate_min_itemsize(min_itemsize) + + if validate and table_exists: + new_table.validate(self) + + return new_table + + @staticmethod + def _get_blocks_and_items( + block_obj, table_exists, new_non_index_axes, values_axes, data_columns + ): + # Helper to clarify non-state-altering parts of _create_axes + + def get_blk_items(mgr, blocks): + return [mgr.items.take(blk.mgr_locs) for blk in blocks] + + blocks = block_obj._data.blocks + blk_items = get_blk_items(block_obj._data, blocks) + + if len(data_columns): + axis, axis_labels = new_non_index_axes[0] + new_labels = Index(axis_labels).difference(Index(data_columns)) + mgr = block_obj.reindex(new_labels, axis=axis)._data + + blocks = list(mgr.blocks) + blk_items = get_blk_items(mgr, blocks) + for c in data_columns: + mgr = block_obj.reindex([c], axis=axis)._data + blocks.extend(mgr.blocks) + blk_items.extend(get_blk_items(mgr, mgr.blocks)) + + # reorder the blocks in the same order as the existing table if we can + if table_exists: + by_items = { + tuple(b_items.tolist()): (b, b_items) + for b, b_items in zip(blocks, blk_items) + } + new_blocks = [] + new_blk_items = [] + for ea in values_axes: + items = tuple(ea.values) + try: + b, b_items = by_items.pop(items) + new_blocks.append(b) + new_blk_items.append(b_items) + except (IndexError, KeyError): + jitems = ",".join(pprint_thing(item) for item in items) + raise ValueError( + f"cannot match existing table structure for [{jitems}] " + "on appending data" + ) + blocks = new_blocks + blk_items = new_blk_items + + return blocks, blk_items + + def process_axes(self, obj, selection: "Selection", columns=None): + """ process axes filters """ + + # make a copy to avoid side effects + if columns is not None: + columns = list(columns) + + # make sure to include levels if we have them + if columns is not None and self.is_multi_index: + assert isinstance(self.levels, list) # assured by is_multi_index + for n in self.levels: + if n not in columns: + columns.insert(0, n) + + # reorder by any non_index_axes & limit to the select columns + for axis, labels in self.non_index_axes: + obj = _reindex_axis(obj, axis, labels, columns) + + # apply the selection filters (but keep in the same order) + if selection.filter is not None: + for field, op, filt in selection.filter.format(): + + def process_filter(field, filt): + + for axis_name in obj._AXIS_NAMES.values(): + axis_number = obj._get_axis_number(axis_name) + axis_values = obj._get_axis(axis_name) + assert axis_number is not None + + # see if the field is the name of an axis + if field == axis_name: + + # if we have a multi-index, then need to include + # the levels + if self.is_multi_index: + filt = filt.union(Index(self.levels)) + + takers = op(axis_values, filt) + return obj.loc(axis=axis_number)[takers] + + # this might be the name of a file IN an axis + elif field in axis_values: + + # we need to filter on this dimension + values = ensure_index(getattr(obj, field).values) + filt = ensure_index(filt) + + # hack until we support reversed dim flags + if isinstance(obj, DataFrame): + axis_number = 1 - axis_number + takers = op(values, filt) + return obj.loc(axis=axis_number)[takers] + + raise ValueError(f"cannot find the field [{field}] for filtering!") + + obj = process_filter(field, filt) + + return obj + + def create_description( + self, + complib, + complevel: Optional[int], + fletcher32: bool, + expectedrows: Optional[int], + ) -> Dict[str, Any]: + """ create the description of the table from the axes & values """ + + # provided expected rows if its passed + if expectedrows is None: + expectedrows = max(self.nrows_expected, 10000) + + d = dict(name="table", expectedrows=expectedrows) + + # description from the axes & values + d["description"] = {a.cname: a.typ for a in self.axes} + + if complib: + if complevel is None: + complevel = self._complevel or 9 + filters = _tables().Filters( + complevel=complevel, + complib=complib, + fletcher32=fletcher32 or self._fletcher32, + ) + d["filters"] = filters + elif self._filters is not None: + d["filters"] = self._filters + + return d + + def read_coordinates( + self, where=None, start: Optional[int] = None, stop: Optional[int] = None, + ): + """select coordinates (row numbers) from a table; return the + coordinates object + """ + + # validate the version + self.validate_version(where) + + # infer the data kind + if not self.infer_axes(): + return False + + # create the selection + selection = Selection(self, where=where, start=start, stop=stop) + coords = selection.select_coords() + if selection.filter is not None: + for field, op, filt in selection.filter.format(): + data = self.read_column( + field, start=coords.min(), stop=coords.max() + 1 + ) + coords = coords[op(data.iloc[coords - coords.min()], filt).values] + + return Index(coords) + + def read_column( + self, + column: str, + where=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): + """return a single column from the table, generally only indexables + are interesting + """ + + # validate the version + self.validate_version() + + # infer the data kind + if not self.infer_axes(): + return False + + if where is not None: + raise TypeError("read_column does not currently accept a where clause") + + # find the axes + for a in self.axes: + if column == a.name: + + if not a.is_data_indexable: + raise ValueError( + f"column [{column}] can not be extracted individually; " + "it is not data indexable" + ) + + # column must be an indexable or a data column + c = getattr(self.table.cols, column) + a.set_info(self.info) + col_values = a.convert( + c[start:stop], + nan_rep=self.nan_rep, + encoding=self.encoding, + errors=self.errors, + ) + return Series(_set_tz(col_values[1], a.tz), name=column) + + raise KeyError(f"column [{column}] not found in the table") + + +class WORMTable(Table): + """ a write-once read-many table: this format DOES NOT ALLOW appending to a + table. writing is a one-time operation the data are stored in a format + that allows for searching the data on disk + """ + + table_type = "worm" + + def read( + self, + where=None, + columns=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): + """ read the indices and the indexing array, calculate offset rows and + return """ + raise NotImplementedError("WORMTable needs to implement read") + + def write(self, **kwargs): + """ write in a format that we can search later on (but cannot append + to): write out the indices and the values using _write_array + (e.g. a CArray) create an indexing table so that we can search + """ + raise NotImplementedError("WORMTable needs to implement write") + + +class AppendableTable(Table): + """ support the new appendable table formats """ + + table_type = "appendable" + + def write( + self, + obj, + axes=None, + append=False, + complib=None, + complevel=None, + fletcher32=None, + min_itemsize=None, + chunksize=None, + expectedrows=None, + dropna=False, + nan_rep=None, + data_columns=None, + ): + + if not append and self.is_exists: + self._handle.remove_node(self.group, "table") + + # create the axes + table = self._create_axes( + axes=axes, + obj=obj, + validate=append, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + data_columns=data_columns, + ) + + for a in table.axes: + a.validate_names() + + if not table.is_exists: + + # create the table + options = table.create_description( + complib=complib, + complevel=complevel, + fletcher32=fletcher32, + expectedrows=expectedrows, + ) + + # set the table attributes + table.set_attrs() + + # create the table + table._handle.create_table(table.group, **options) + + # update my info + table.attrs.info = table.info + + # validate the axes and set the kinds + for a in table.axes: + a.validate_and_set(table, append) + + # add the rows + table.write_data(chunksize, dropna=dropna) + + def write_data(self, chunksize: Optional[int], dropna: bool = False): + """ we form the data into a 2-d including indexes,values,mask + write chunk-by-chunk """ + + names = self.dtype.names + nrows = self.nrows_expected + + # if dropna==True, then drop ALL nan rows + masks = [] + if dropna: + + for a in self.values_axes: + + # figure the mask: only do if we can successfully process this + # column, otherwise ignore the mask + mask = isna(a.data).all(axis=0) + if isinstance(mask, np.ndarray): + masks.append(mask.astype("u1", copy=False)) + + # consolidate masks + if len(masks): + mask = masks[0] + for m in masks[1:]: + mask = mask & m + mask = mask.ravel() + else: + mask = None + + # broadcast the indexes if needed + indexes = [a.cvalues for a in self.index_axes] + nindexes = len(indexes) + assert nindexes == 1, nindexes # ensures we dont need to broadcast + + # transpose the values so first dimension is last + # reshape the values if needed + values = [a.take_data() for a in self.values_axes] + values = [v.transpose(np.roll(np.arange(v.ndim), v.ndim - 1)) for v in values] + bvalues = [] + for i, v in enumerate(values): + new_shape = (nrows,) + self.dtype[names[nindexes + i]].shape + bvalues.append(values[i].reshape(new_shape)) + + # write the chunks + if chunksize is None: + chunksize = 100000 + + rows = np.empty(min(chunksize, nrows), dtype=self.dtype) + chunks = int(nrows / chunksize) + 1 + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, nrows) + if start_i >= end_i: + break + + self.write_data_chunk( + rows, + indexes=[a[start_i:end_i] for a in indexes], + mask=mask[start_i:end_i] if mask is not None else None, + values=[v[start_i:end_i] for v in bvalues], + ) + + def write_data_chunk( + self, + rows: np.ndarray, + indexes: List[np.ndarray], + mask: Optional[np.ndarray], + values: List[np.ndarray], + ): + """ + Parameters + ---------- + rows : an empty memory space where we are putting the chunk + indexes : an array of the indexes + mask : an array of the masks + values : an array of the values + """ + + # 0 len + for v in values: + if not np.prod(v.shape): + return + + nrows = indexes[0].shape[0] + if nrows != len(rows): + rows = np.empty(nrows, dtype=self.dtype) + names = self.dtype.names + nindexes = len(indexes) + + # indexes + for i, idx in enumerate(indexes): + rows[names[i]] = idx + + # values + for i, v in enumerate(values): + rows[names[i + nindexes]] = v + + # mask + if mask is not None: + m = ~mask.ravel().astype(bool, copy=False) + if not m.all(): + rows = rows[m] + + if len(rows): + self.table.append(rows) + self.table.flush() + + def delete( + self, where=None, start: Optional[int] = None, stop: Optional[int] = None, + ): + + # delete all rows (and return the nrows) + if where is None or not len(where): + if start is None and stop is None: + nrows = self.nrows + self._handle.remove_node(self.group, recursive=True) + else: + # pytables<3.0 would remove a single row with stop=None + if stop is None: + stop = self.nrows + nrows = self.table.remove_rows(start=start, stop=stop) + self.table.flush() + return nrows + + # infer the data kind + if not self.infer_axes(): + return None + + # create the selection + table = self.table + selection = Selection(self, where, start=start, stop=stop) + values = selection.select_coords() + + # delete the rows in reverse order + sorted_series = Series(values).sort_values() + ln = len(sorted_series) + + if ln: + + # construct groups of consecutive rows + diff = sorted_series.diff() + groups = list(diff[diff > 1].index) + + # 1 group + if not len(groups): + groups = [0] + + # final element + if groups[-1] != ln: + groups.append(ln) + + # initial element + if groups[0] != 0: + groups.insert(0, 0) + + # we must remove in reverse order! + pg = groups.pop() + for g in reversed(groups): + rows = sorted_series.take(range(g, pg)) + table.remove_rows( + start=rows[rows.index[0]], stop=rows[rows.index[-1]] + 1 + ) + pg = g + + self.table.flush() + + # return the number of rows removed + return ln + + +class AppendableFrameTable(AppendableTable): + """ support the new appendable table formats """ + + pandas_kind = "frame_table" + table_type = "appendable_frame" + ndim = 2 + obj_type: Type[Union[DataFrame, Series]] = DataFrame + + @property + def is_transposed(self) -> bool: + return self.index_axes[0].axis == 1 + + @classmethod + def get_object(cls, obj, transposed: bool): + """ these are written transposed """ + if transposed: + obj = obj.T + return obj + + def read( + self, + where=None, + columns=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): + + # validate the version + self.validate_version(where) + + # infer the data kind + if not self.infer_axes(): + return None + + result = self._read_axes(where=where, start=start, stop=stop) + + info = ( + self.info.get(self.non_index_axes[0][0], dict()) + if len(self.non_index_axes) + else dict() + ) + + inds = [i for i, ax in enumerate(self.axes) if ax is self.index_axes[0]] + assert len(inds) == 1 + ind = inds[0] + + index = result[ind][0] + + frames = [] + for i, a in enumerate(self.axes): + if a not in self.values_axes: + continue + index_vals, cvalues = result[i] + + # we could have a multi-index constructor here + # ensure_index doesn't recognized our list-of-tuples here + if info.get("type") == "MultiIndex": + cols = MultiIndex.from_tuples(index_vals) + else: + cols = Index(index_vals) + + names = info.get("names") + if names is not None: + cols.set_names(names, inplace=True) + + if self.is_transposed: + values = cvalues + index_ = cols + cols_ = Index(index, name=getattr(index, "name", None)) + else: + values = cvalues.T + index_ = Index(index, name=getattr(index, "name", None)) + cols_ = cols + + # if we have a DataIndexableCol, its shape will only be 1 dim + if values.ndim == 1 and isinstance(values, np.ndarray): + values = values.reshape((1, values.shape[0])) + + if isinstance(values, np.ndarray): + df = DataFrame(values.T, columns=cols_, index=index_) + elif isinstance(values, Index): + df = DataFrame(values, columns=cols_, index=index_) + else: + # Categorical + df = DataFrame([values], columns=cols_, index=index_) + assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype) + frames.append(df) + + if len(frames) == 1: + df = frames[0] + else: + df = concat(frames, axis=1) + + selection = Selection(self, where=where, start=start, stop=stop) + # apply the selection filters & axis orderings + df = self.process_axes(df, selection=selection, columns=columns) + + return df + + +class AppendableSeriesTable(AppendableFrameTable): + """ support the new appendable table formats """ + + pandas_kind = "series_table" + table_type = "appendable_series" + ndim = 2 + obj_type = Series + + @property + def is_transposed(self) -> bool: + return False + + @classmethod + def get_object(cls, obj, transposed: bool): + return obj + + def write(self, obj, data_columns=None, **kwargs): + """ we are going to write this as a frame table """ + if not isinstance(obj, DataFrame): + name = obj.name or "values" + obj = obj.to_frame(name) + return super().write(obj=obj, data_columns=obj.columns.tolist(), **kwargs) + + def read( + self, + where=None, + columns=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ) -> Series: + + is_multi_index = self.is_multi_index + if columns is not None and is_multi_index: + assert isinstance(self.levels, list) # needed for mypy + for n in self.levels: + if n not in columns: + columns.insert(0, n) + s = super().read(where=where, columns=columns, start=start, stop=stop) + if is_multi_index: + s.set_index(self.levels, inplace=True) + + s = s.iloc[:, 0] + + # remove the default name + if s.name == "values": + s.name = None + return s + + +class AppendableMultiSeriesTable(AppendableSeriesTable): + """ support the new appendable table formats """ + + pandas_kind = "series_table" + table_type = "appendable_multiseries" + + def write(self, obj, **kwargs): + """ we are going to write this as a frame table """ + name = obj.name or "values" + obj, self.levels = self.validate_multiindex(obj) + cols = list(self.levels) + cols.append(name) + obj.columns = cols + return super().write(obj=obj, **kwargs) + + +class GenericTable(AppendableFrameTable): + """ a table that read/writes the generic pytables table format """ + + pandas_kind = "frame_table" + table_type = "generic_table" + ndim = 2 + obj_type = DataFrame + + @property + def pandas_type(self) -> str: + return self.pandas_kind + + @property + def storable(self): + return getattr(self.group, "table", None) or self.group + + def get_attrs(self): + """ retrieve our attributes """ + self.non_index_axes = [] + self.nan_rep = None + self.levels = [] + + self.index_axes = [a for a in self.indexables if a.is_an_indexable] + self.values_axes = [a for a in self.indexables if not a.is_an_indexable] + self.data_columns = [a.name for a in self.values_axes] + + @cache_readonly + def indexables(self): + """ create the indexables from the table description """ + d = self.description + + # TODO: can we get a typ for this? AFAICT it is the only place + # where we aren't passing one + # the index columns is just a simple index + md = self.read_metadata("index") + meta = "category" if md is not None else None + index_col = GenericIndexCol( + name="index", axis=0, table=self.table, meta=meta, metadata=md + ) + + _indexables = [index_col] + + for i, n in enumerate(d._v_names): + assert isinstance(n, str) + + atom = getattr(d, n) + md = self.read_metadata(n) + meta = "category" if md is not None else None + dc = GenericDataIndexableCol( + name=n, + pos=i, + values=[n], + typ=atom, + table=self.table, + meta=meta, + metadata=md, + ) + _indexables.append(dc) + + return _indexables + + def write(self, **kwargs): + raise NotImplementedError("cannot write on an generic table") + + +class AppendableMultiFrameTable(AppendableFrameTable): + """ a frame with a multi-index """ + + table_type = "appendable_multiframe" + obj_type = DataFrame + ndim = 2 + _re_levels = re.compile(r"^level_\d+$") + + @property + def table_type_short(self) -> str: + return "appendable_multi" + + def write(self, obj, data_columns=None, **kwargs): + if data_columns is None: + data_columns = [] + elif data_columns is True: + data_columns = obj.columns.tolist() + obj, self.levels = self.validate_multiindex(obj) + for n in self.levels: + if n not in data_columns: + data_columns.insert(0, n) + return super().write(obj=obj, data_columns=data_columns, **kwargs) + + def read( + self, + where=None, + columns=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): + + df = super().read(where=where, columns=columns, start=start, stop=stop) + df = df.set_index(self.levels) + + # remove names for 'level_%d' + df.index = df.index.set_names( + [None if self._re_levels.search(l) else l for l in df.index.names] + ) + + return df + + +def _reindex_axis(obj: DataFrame, axis: int, labels: Index, other=None) -> DataFrame: + ax = obj._get_axis(axis) + labels = ensure_index(labels) + + # try not to reindex even if other is provided + # if it equals our current index + if other is not None: + other = ensure_index(other) + if (other is None or labels.equals(other)) and labels.equals(ax): + return obj + + labels = ensure_index(labels.unique()) + if other is not None: + labels = ensure_index(other.unique()).intersection(labels, sort=False) + if not labels.equals(ax): + slicer: List[Union[slice, Index]] = [slice(None, None)] * obj.ndim + slicer[axis] = labels + obj = obj.loc[tuple(slicer)] + return obj + + +# tz to/from coercion + + +def _get_tz(tz: tzinfo) -> Union[str, tzinfo]: + """ for a tz-aware type, return an encoded zone """ + zone = timezones.get_timezone(tz) + return zone + + +def _set_tz( + values: Union[np.ndarray, Index], + tz: Optional[Union[str, tzinfo]], + coerce: bool = False, +) -> Union[np.ndarray, DatetimeIndex]: + """ + coerce the values to a DatetimeIndex if tz is set + preserve the input shape if possible + + Parameters + ---------- + values : ndarray or Index + tz : str or tzinfo + coerce : if we do not have a passed timezone, coerce to M8[ns] ndarray + """ + if isinstance(values, DatetimeIndex): + # If values is tzaware, the tz gets dropped in the values.ravel() + # call below (which returns an ndarray). So we are only non-lossy + # if `tz` matches `values.tz`. + assert values.tz is None or values.tz == tz + + if tz is not None: + name = getattr(values, "name", None) + values = values.ravel() + tz = timezones.get_timezone(_ensure_decoded(tz)) + values = DatetimeIndex(values, name=name) + values = values.tz_localize("UTC").tz_convert(tz) + elif coerce: + values = np.asarray(values, dtype="M8[ns]") + + return values + + +def _convert_index(name: str, index: Index, encoding: str, errors: str) -> IndexCol: + assert isinstance(name, str) + + index_name = index.name + converted, dtype_name = _get_data_and_dtype_name(index) + kind = _dtype_to_kind(dtype_name) + atom = DataIndexableCol._get_atom(converted) + + if isinstance(index, Int64Index): + # Includes Int64Index, RangeIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, + # in which case "kind" is "integer", "integer", "datetime64", + # "timedelta64", and "integer", respectively. + return IndexCol( + name, + values=converted, + kind=kind, + typ=atom, + freq=getattr(index, "freq", None), + tz=getattr(index, "tz", None), + index_name=index_name, + ) + + if isinstance(index, MultiIndex): + raise TypeError("MultiIndex not supported here!") + + inferred_type = lib.infer_dtype(index, skipna=False) + # we wont get inferred_type of "datetime64" or "timedelta64" as these + # would go through the DatetimeIndex/TimedeltaIndex paths above + + values = np.asarray(index) + + if inferred_type == "date": + converted = np.asarray([v.toordinal() for v in values], dtype=np.int32) + return IndexCol( + name, converted, "date", _tables().Time32Col(), index_name=index_name, + ) + elif inferred_type == "string": + + converted = _convert_string_array(values, encoding, errors) + itemsize = converted.dtype.itemsize + return IndexCol( + name, + converted, + "string", + _tables().StringCol(itemsize), + index_name=index_name, + ) + + elif inferred_type in ["integer", "floating"]: + return IndexCol( + name, values=converted, kind=kind, typ=atom, index_name=index_name, + ) + else: + assert isinstance(converted, np.ndarray) and converted.dtype == object + assert kind == "object", kind + atom = _tables().ObjectAtom() + return IndexCol(name, converted, kind, atom, index_name=index_name,) + + +def _unconvert_index( + data, kind: str, encoding: str, errors: str +) -> Union[np.ndarray, Index]: + index: Union[Index, np.ndarray] + + if kind == "datetime64": + index = DatetimeIndex(data) + elif kind == "timedelta64": + index = TimedeltaIndex(data) + elif kind == "date": + try: + index = np.asarray([date.fromordinal(v) for v in data], dtype=object) + except (ValueError): + index = np.asarray([date.fromtimestamp(v) for v in data], dtype=object) + elif kind in ("integer", "float"): + index = np.asarray(data) + elif kind in ("string"): + index = _unconvert_string_array( + data, nan_rep=None, encoding=encoding, errors=errors + ) + elif kind == "object": + index = np.asarray(data[0]) + else: # pragma: no cover + raise ValueError(f"unrecognized index type {kind}") + return index + + +def _maybe_convert_for_string_atom( + name: str, block, existing_col, min_itemsize, nan_rep, encoding, errors +): + + if not block.is_object: + return block.values + + dtype_name = block.dtype.name + inferred_type = lib.infer_dtype(block.values, skipna=False) + + if inferred_type == "date": + raise TypeError("[date] is not implemented as a table column") + elif inferred_type == "datetime": + # after GH#8260 + # this only would be hit for a multi-timezone dtype which is an error + raise TypeError( + "too many timezones in this block, create separate data columns" + ) + + elif not (inferred_type == "string" or dtype_name == "object"): + return block.values + + block = block.fillna(nan_rep, downcast=False) + if isinstance(block, list): + # Note: because block is always object dtype, fillna goes + # through a path such that the result is always a 1-element list + block = block[0] + data = block.values + + # see if we have a valid string type + inferred_type = lib.infer_dtype(data.ravel(), skipna=False) + if inferred_type != "string": + + # we cannot serialize this data, so report an exception on a column + # by column basis + for i in range(len(block.shape[0])): + + col = block.iget(i) + inferred_type = lib.infer_dtype(col.ravel(), skipna=False) + if inferred_type != "string": + iloc = block.mgr_locs.indexer[i] + raise TypeError( + f"Cannot serialize the column [{iloc}] because\n" + f"its data contents are [{inferred_type}] object dtype" + ) + + # itemsize is the maximum length of a string (along any dimension) + data_converted = _convert_string_array(data, encoding, errors).reshape(data.shape) + assert data_converted.shape == block.shape, (data_converted.shape, block.shape) + itemsize = data_converted.itemsize + + # specified min_itemsize? + if isinstance(min_itemsize, dict): + min_itemsize = int(min_itemsize.get(name) or min_itemsize.get("values") or 0) + itemsize = max(min_itemsize or 0, itemsize) + + # check for column in the values conflicts + if existing_col is not None: + eci = existing_col.validate_col(itemsize) + if eci > itemsize: + itemsize = eci + + data_converted = data_converted.astype(f"|S{itemsize}", copy=False) + return data_converted + + +def _convert_string_array(data: np.ndarray, encoding: str, errors: str) -> np.ndarray: + """ + Take a string-like that is object dtype and coerce to a fixed size string type. + + Parameters + ---------- + data : np.ndarray[object] + encoding : str + errors : str + Handler for encoding errors. + + Returns + ------- + np.ndarray[fixed-length-string] + """ + + # encode if needed + if len(data): + data = ( + Series(data.ravel()).str.encode(encoding, errors).values.reshape(data.shape) + ) + + # create the sized dtype + ensured = ensure_object(data.ravel()) + itemsize = max(1, libwriters.max_len_string_array(ensured)) + + data = np.asarray(data, dtype=f"S{itemsize}") + return data + + +def _unconvert_string_array( + data: np.ndarray, nan_rep, encoding: str, errors: str +) -> np.ndarray: + """ + Inverse of _convert_string_array. + + Parameters + ---------- + data : np.ndarray[fixed-length-string] + nan_rep : the storage repr of NaN + encoding : str + errors : str + Handler for encoding errors. + + Returns + ------- + np.ndarray[object] + Decoded data. + """ + shape = data.shape + data = np.asarray(data.ravel(), dtype=object) + + if len(data): + + itemsize = libwriters.max_len_string_array(ensure_object(data)) + dtype = f"U{itemsize}" + + if isinstance(data[0], bytes): + data = Series(data).str.decode(encoding, errors=errors).values + else: + data = data.astype(dtype, copy=False).astype(object, copy=False) + + if nan_rep is None: + nan_rep = "nan" + + data = libwriters.string_array_replace_from_nan_rep(data, nan_rep) + return data.reshape(shape) + + +def _maybe_convert(values: np.ndarray, val_kind: str, encoding: str, errors: str): + assert isinstance(val_kind, str), type(val_kind) + if _need_convert(val_kind): + conv = _get_converter(val_kind, encoding, errors) + values = conv(values) + return values + + +def _get_converter(kind: str, encoding: str, errors: str): + if kind == "datetime64": + return lambda x: np.asarray(x, dtype="M8[ns]") + elif kind == "string": + return lambda x: _unconvert_string_array( + x, nan_rep=None, encoding=encoding, errors=errors + ) + else: # pragma: no cover + raise ValueError(f"invalid kind {kind}") + + +def _need_convert(kind: str) -> bool: + if kind in ("datetime64", "string"): + return True + return False + + +def _maybe_adjust_name(name: str, version) -> str: + """ + Prior to 0.10.1, we named values blocks like: values_block_0 an the + name values_0, adjust the given name if necessary. + + Parameters + ---------- + name : str + version : Tuple[int, int, int] + + Returns + ------- + str + """ + try: + if version[0] == 0 and version[1] <= 10 and version[2] == 0: + m = re.search(r"values_block_(\d+)", name) + if m: + grp = m.groups()[0] + name = f"values_{grp}" + except IndexError: + pass + return name + + +def _dtype_to_kind(dtype_str: str) -> str: + """ + Find the "kind" string describing the given dtype name. + """ + dtype_str = _ensure_decoded(dtype_str) + + if dtype_str.startswith("string") or dtype_str.startswith("bytes"): + kind = "string" + elif dtype_str.startswith("float"): + kind = "float" + elif dtype_str.startswith("complex"): + kind = "complex" + elif dtype_str.startswith("int") or dtype_str.startswith("uint"): + kind = "integer" + elif dtype_str.startswith("datetime64"): + kind = "datetime64" + elif dtype_str.startswith("timedelta"): + kind = "timedelta64" + elif dtype_str.startswith("bool"): + kind = "bool" + elif dtype_str.startswith("category"): + kind = "category" + elif dtype_str.startswith("period"): + # We store the `freq` attr so we can restore from integers + kind = "integer" + elif dtype_str == "object": + kind = "object" + else: + raise ValueError(f"cannot interpret dtype of [{dtype_str}]") + + return kind + + +def _get_data_and_dtype_name(data: Union[np.ndarray, ABCExtensionArray]): + """ + Convert the passed data into a storable form and a dtype string. + """ + if is_categorical_dtype(data.dtype): + data = data.codes + + # For datetime64tz we need to drop the TZ in tests TODO: why? + dtype_name = data.dtype.name.split("[")[0] + + if data.dtype.kind in ["m", "M"]: + data = np.asarray(data.view("i8")) + # TODO: we used to reshape for the dt64tz case, but no longer + # doing that doesn't seem to break anything. why? + + elif isinstance(data, PeriodIndex): + data = data.asi8 + + data = np.asarray(data) + return data, dtype_name + + +class Selection: + """ + Carries out a selection operation on a tables.Table object. + + Parameters + ---------- + table : a Table object + where : list of Terms (or convertible to) + start, stop: indices to start and/or stop selection + + """ + + def __init__( + self, + table: Table, + where=None, + start: Optional[int] = None, + stop: Optional[int] = None, + ): + self.table = table + self.where = where + self.start = start + self.stop = stop + self.condition = None + self.filter = None + self.terms = None + self.coordinates = None + + if is_list_like(where): + + # see if we have a passed coordinate like + try: + inferred = lib.infer_dtype(where, skipna=False) + if inferred == "integer" or inferred == "boolean": + where = np.asarray(where) + if where.dtype == np.bool_: + start, stop = self.start, self.stop + if start is None: + start = 0 + if stop is None: + stop = self.table.nrows + self.coordinates = np.arange(start, stop)[where] + elif issubclass(where.dtype.type, np.integer): + if (self.start is not None and (where < self.start).any()) or ( + self.stop is not None and (where >= self.stop).any() + ): + raise ValueError( + "where must have index locations >= start and < stop" + ) + self.coordinates = where + + except ValueError: + pass + + if self.coordinates is None: + + self.terms = self.generate(where) + + # create the numexpr & the filter + if self.terms is not None: + self.condition, self.filter = self.terms.evaluate() + + def generate(self, where): + """ where can be a : dict,list,tuple,string """ + if where is None: + return None + + q = self.table.queryables() + try: + return PyTablesExpr(where, queryables=q, encoding=self.table.encoding) + except NameError: + # raise a nice message, suggesting that the user should use + # data_columns + qkeys = ",".join(q.keys()) + raise ValueError( + f"The passed where expression: {where}\n" + " contains an invalid variable reference\n" + " all of the variable references must be a " + "reference to\n" + " an axis (e.g. 'index' or 'columns'), or a " + "data_column\n" + f" The currently defined references are: {qkeys}\n" + ) + + def select(self): + """ + generate the selection + """ + if self.condition is not None: + return self.table.table.read_where( + self.condition.format(), start=self.start, stop=self.stop + ) + elif self.coordinates is not None: + return self.table.table.read_coordinates(self.coordinates) + return self.table.table.read(start=self.start, stop=self.stop) + + def select_coords(self): + """ + generate the selection + """ + start, stop = self.start, self.stop + nrows = self.table.nrows + if start is None: + start = 0 + elif start < 0: + start += nrows + if self.stop is None: + stop = nrows + elif stop < 0: + stop += nrows + + if self.condition is not None: + return self.table.table.get_where_list( + self.condition.format(), start=start, stop=stop, sort=True + ) + elif self.coordinates is not None: + return self.coordinates + + return np.arange(start, stop) diff --git a/pandas/io/s3.py b/pandas/io/s3.py new file mode 100644 index 00000000..976c319f --- /dev/null +++ b/pandas/io/s3.py @@ -0,0 +1,49 @@ +""" s3 support for remote file interactivity """ +from typing import IO, Any, Optional, Tuple +from urllib.parse import urlparse as parse_url + +from pandas._typing import FilePathOrBuffer +from pandas.compat._optional import import_optional_dependency + +s3fs = import_optional_dependency( + "s3fs", extra="The s3fs package is required to handle s3 files." +) + + +def _strip_schema(url): + """Returns the url without the s3:// part""" + result = parse_url(url, allow_fragments=False) + return result.netloc + result.path + + +def get_file_and_filesystem( + filepath_or_buffer: FilePathOrBuffer, mode: Optional[str] = None +) -> Tuple[IO, Any]: + from botocore.exceptions import NoCredentialsError + + if mode is None: + mode = "rb" + + fs = s3fs.S3FileSystem(anon=False) + try: + file = fs.open(_strip_schema(filepath_or_buffer), mode) + except (FileNotFoundError, NoCredentialsError): + # boto3 has troubles when trying to access a public file + # when credentialed... + # An OSError is raised if you have credentials, but they + # aren't valid for that bucket. + # A NoCredentialsError is raised if you don't have creds + # for that bucket. + fs = s3fs.S3FileSystem(anon=True) + file = fs.open(_strip_schema(filepath_or_buffer), mode) + return file, fs + + +def get_filepath_or_buffer( + filepath_or_buffer: FilePathOrBuffer, + encoding: Optional[str] = None, + compression: Optional[str] = None, + mode: Optional[str] = None, +) -> Tuple[IO, Optional[str], Optional[str], bool]: + file, _fs = get_file_and_filesystem(filepath_or_buffer, mode=mode) + return file, None, compression, True diff --git a/pandas/io/sas/__init__.py b/pandas/io/sas/__init__.py new file mode 100644 index 00000000..8f81352e --- /dev/null +++ b/pandas/io/sas/__init__.py @@ -0,0 +1 @@ +from pandas.io.sas.sasreader import read_sas # noqa diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx new file mode 100644 index 00000000..2a6a7089 --- /dev/null +++ b/pandas/io/sas/sas.pyx @@ -0,0 +1,444 @@ +# cython: profile=False +# cython: boundscheck=False, initializedcheck=False +from cython import Py_ssize_t + +import numpy as np +import pandas.io.sas.sas_constants as const + +ctypedef signed long long int64_t +ctypedef unsigned char uint8_t +ctypedef unsigned short uint16_t + +# rle_decompress decompresses data using a Run Length Encoding +# algorithm. It is partially documented here: +# +# https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf +cdef const uint8_t[:] rle_decompress(int result_length, + const uint8_t[:] inbuff): + + cdef: + uint8_t control_byte, x + uint8_t[:] result = np.zeros(result_length, np.uint8) + int rpos = 0 + int i, nbytes, end_of_first_byte + Py_ssize_t ipos = 0, length = len(inbuff) + + while ipos < length: + control_byte = inbuff[ipos] & 0xF0 + end_of_first_byte = (inbuff[ipos] & 0x0F) + ipos += 1 + + if control_byte == 0x00: + if end_of_first_byte != 0: + raise ValueError("Unexpected non-zero end_of_first_byte") + nbytes = (inbuff[ipos]) + 64 + ipos += 1 + for i in range(nbytes): + result[rpos] = inbuff[ipos] + rpos += 1 + ipos += 1 + elif control_byte == 0x40: + # not documented + nbytes = end_of_first_byte * 16 + nbytes += (inbuff[ipos]) + ipos += 1 + for i in range(nbytes): + result[rpos] = inbuff[ipos] + rpos += 1 + ipos += 1 + elif control_byte == 0x60: + nbytes = end_of_first_byte * 256 + (inbuff[ipos]) + 17 + ipos += 1 + for i in range(nbytes): + result[rpos] = 0x20 + rpos += 1 + elif control_byte == 0x70: + nbytes = end_of_first_byte * 256 + (inbuff[ipos]) + 17 + ipos += 1 + for i in range(nbytes): + result[rpos] = 0x00 + rpos += 1 + elif control_byte == 0x80: + nbytes = end_of_first_byte + 1 + for i in range(nbytes): + result[rpos] = inbuff[ipos + i] + rpos += 1 + ipos += nbytes + elif control_byte == 0x90: + nbytes = end_of_first_byte + 17 + for i in range(nbytes): + result[rpos] = inbuff[ipos + i] + rpos += 1 + ipos += nbytes + elif control_byte == 0xA0: + nbytes = end_of_first_byte + 33 + for i in range(nbytes): + result[rpos] = inbuff[ipos + i] + rpos += 1 + ipos += nbytes + elif control_byte == 0xB0: + nbytes = end_of_first_byte + 49 + for i in range(nbytes): + result[rpos] = inbuff[ipos + i] + rpos += 1 + ipos += nbytes + elif control_byte == 0xC0: + nbytes = end_of_first_byte + 3 + x = inbuff[ipos] + ipos += 1 + for i in range(nbytes): + result[rpos] = x + rpos += 1 + elif control_byte == 0xD0: + nbytes = end_of_first_byte + 2 + for i in range(nbytes): + result[rpos] = 0x40 + rpos += 1 + elif control_byte == 0xE0: + nbytes = end_of_first_byte + 2 + for i in range(nbytes): + result[rpos] = 0x20 + rpos += 1 + elif control_byte == 0xF0: + nbytes = end_of_first_byte + 2 + for i in range(nbytes): + result[rpos] = 0x00 + rpos += 1 + else: + raise ValueError(f"unknown control byte: {control_byte}") + + # In py37 cython/clang sees `len(outbuff)` as size_t and not Py_ssize_t + if len(result) != result_length: + raise ValueError(f"RLE: {len(result)} != {result_length}") + + return np.asarray(result) + + +# rdc_decompress decompresses data using the Ross Data Compression algorithm: +# +# http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm +cdef const uint8_t[:] rdc_decompress(int result_length, + const uint8_t[:] inbuff): + + cdef: + uint8_t cmd + uint16_t ctrl_bits, ctrl_mask = 0, ofs, cnt + int rpos = 0, k + uint8_t[:] outbuff = np.zeros(result_length, dtype=np.uint8) + Py_ssize_t ipos = 0, length = len(inbuff) + + ii = -1 + + while ipos < length: + ii += 1 + ctrl_mask = ctrl_mask >> 1 + if ctrl_mask == 0: + ctrl_bits = ((inbuff[ipos] << 8) + + inbuff[ipos + 1]) + ipos += 2 + ctrl_mask = 0x8000 + + if ctrl_bits & ctrl_mask == 0: + outbuff[rpos] = inbuff[ipos] + ipos += 1 + rpos += 1 + continue + + cmd = (inbuff[ipos] >> 4) & 0x0F + cnt = (inbuff[ipos] & 0x0F) + ipos += 1 + + # short RLE + if cmd == 0: + cnt += 3 + for k in range(cnt): + outbuff[rpos + k] = inbuff[ipos] + rpos += cnt + ipos += 1 + + # long RLE + elif cmd == 1: + cnt += inbuff[ipos] << 4 + cnt += 19 + ipos += 1 + for k in range(cnt): + outbuff[rpos + k] = inbuff[ipos] + rpos += cnt + ipos += 1 + + # long pattern + elif cmd == 2: + ofs = cnt + 3 + ofs += inbuff[ipos] << 4 + ipos += 1 + cnt = inbuff[ipos] + ipos += 1 + cnt += 16 + for k in range(cnt): + outbuff[rpos + k] = outbuff[rpos - ofs + k] + rpos += cnt + + # short pattern + elif (cmd >= 3) & (cmd <= 15): + ofs = cnt + 3 + ofs += inbuff[ipos] << 4 + ipos += 1 + for k in range(cmd): + outbuff[rpos + k] = outbuff[rpos - ofs + k] + rpos += cmd + + else: + raise ValueError("unknown RDC command") + + # In py37 cython/clang sees `len(outbuff)` as size_t and not Py_ssize_t + if len(outbuff) != result_length: + raise ValueError(f"RDC: {len(outbuff)} != {result_length}\n") + + return np.asarray(outbuff) + + +cdef enum ColumnTypes: + column_type_decimal = 1 + column_type_string = 2 + + +# type the page_data types +cdef: + int page_meta_type = const.page_meta_type + int page_mix_types_0 = const.page_mix_types[0] + int page_mix_types_1 = const.page_mix_types[1] + int page_data_type = const.page_data_type + int subheader_pointers_offset = const.subheader_pointers_offset + + +cdef class Parser: + + cdef: + int column_count + int64_t[:] lengths + int64_t[:] offsets + int64_t[:] column_types + uint8_t[:, :] byte_chunk + object[:, :] string_chunk + char *cached_page + int current_row_on_page_index + int current_page_block_count + int current_page_data_subheader_pointers_len + int current_page_subheaders_count + int current_row_in_chunk_index + int current_row_in_file_index + int header_length + int row_length + int bit_offset + int subheader_pointer_length + int current_page_type + bint is_little_endian + const uint8_t[:] (*decompress)(int result_length, + const uint8_t[:] inbuff) + object parser + + def __init__(self, object parser): + cdef: + int j + char[:] column_types + + self.parser = parser + self.header_length = self.parser.header_length + self.column_count = parser.column_count + self.lengths = parser.column_data_lengths() + self.offsets = parser.column_data_offsets() + self.byte_chunk = parser._byte_chunk + self.string_chunk = parser._string_chunk + self.row_length = parser.row_length + self.bit_offset = self.parser._page_bit_offset + self.subheader_pointer_length = self.parser._subheader_pointer_length + self.is_little_endian = parser.byte_order == "<" + self.column_types = np.empty(self.column_count, dtype='int64') + + # page indicators + self.update_next_page() + + column_types = parser.column_types() + + # map column types + for j in range(self.column_count): + if column_types[j] == b'd': + self.column_types[j] = column_type_decimal + elif column_types[j] == b's': + self.column_types[j] = column_type_string + else: + raise ValueError("unknown column type: " + f"{self.parser.columns[j].ctype}") + + # compression + if parser.compression == const.rle_compression: + self.decompress = rle_decompress + elif parser.compression == const.rdc_compression: + self.decompress = rdc_decompress + else: + self.decompress = NULL + + # update to current state of the parser + self.current_row_in_chunk_index = parser._current_row_in_chunk_index + self.current_row_in_file_index = parser._current_row_in_file_index + self.current_row_on_page_index = parser._current_row_on_page_index + + def read(self, int nrows): + cdef: + bint done + int i + + for i in range(nrows): + done = self.readline() + if done: + break + + # update the parser + self.parser._current_row_on_page_index = self.current_row_on_page_index + self.parser._current_row_in_chunk_index =\ + self.current_row_in_chunk_index + self.parser._current_row_in_file_index = self.current_row_in_file_index + + cdef bint read_next_page(self): + cdef done + + done = self.parser._read_next_page() + if done: + self.cached_page = NULL + else: + self.update_next_page() + return done + + cdef update_next_page(self): + # update data for the current page + + self.cached_page = self.parser._cached_page + self.current_row_on_page_index = 0 + self.current_page_type = self.parser._current_page_type + self.current_page_block_count = self.parser._current_page_block_count + self.current_page_data_subheader_pointers_len = len( + self.parser._current_page_data_subheader_pointers) + self.current_page_subheaders_count =\ + self.parser._current_page_subheaders_count + + cdef readline(self): + + cdef: + int offset, bit_offset, align_correction + int subheader_pointer_length, mn + bint done, flag + + bit_offset = self.bit_offset + subheader_pointer_length = self.subheader_pointer_length + + # If there is no page, go to the end of the header and read a page. + if self.cached_page == NULL: + self.parser._path_or_buf.seek(self.header_length) + done = self.read_next_page() + if done: + return True + + # Loop until a data row is read + while True: + if self.current_page_type == page_meta_type: + flag = self.current_row_on_page_index >=\ + self.current_page_data_subheader_pointers_len + if flag: + done = self.read_next_page() + if done: + return True + continue + current_subheader_pointer = ( + self.parser._current_page_data_subheader_pointers[ + self.current_row_on_page_index]) + self.process_byte_array_with_data( + current_subheader_pointer.offset, + current_subheader_pointer.length) + return False + elif (self.current_page_type == page_mix_types_0 or + self.current_page_type == page_mix_types_1): + align_correction = (bit_offset + subheader_pointers_offset + + self.current_page_subheaders_count * + subheader_pointer_length) + align_correction = align_correction % 8 + offset = bit_offset + align_correction + offset += subheader_pointers_offset + offset += (self.current_page_subheaders_count * + subheader_pointer_length) + offset += self.current_row_on_page_index * self.row_length + self.process_byte_array_with_data(offset, + self.row_length) + mn = min(self.parser.row_count, + self.parser._mix_page_row_count) + if self.current_row_on_page_index == mn: + done = self.read_next_page() + if done: + return True + return False + elif self.current_page_type & page_data_type == page_data_type: + self.process_byte_array_with_data( + bit_offset + subheader_pointers_offset + + self.current_row_on_page_index * self.row_length, + self.row_length) + flag = (self.current_row_on_page_index == + self.current_page_block_count) + if flag: + done = self.read_next_page() + if done: + return True + return False + else: + raise ValueError(f"unknown page type: {self.current_page_type}") + + cdef void process_byte_array_with_data(self, int offset, int length): + + cdef: + Py_ssize_t j + int s, k, m, jb, js, current_row + int64_t lngt, start, ct + const uint8_t[:] source + int64_t[:] column_types + int64_t[:] lengths + int64_t[:] offsets + uint8_t[:, :] byte_chunk + object[:, :] string_chunk + + source = np.frombuffer( + self.cached_page[offset:offset + length], dtype=np.uint8) + + if self.decompress != NULL and (length < self.row_length): + source = self.decompress(self.row_length, source) + + current_row = self.current_row_in_chunk_index + column_types = self.column_types + lengths = self.lengths + offsets = self.offsets + byte_chunk = self.byte_chunk + string_chunk = self.string_chunk + s = 8 * self.current_row_in_chunk_index + js = 0 + jb = 0 + for j in range(self.column_count): + lngt = lengths[j] + if lngt == 0: + break + start = offsets[j] + ct = column_types[j] + if ct == column_type_decimal: + # decimal + if self.is_little_endian: + m = s + 8 - lngt + else: + m = s + for k in range(lngt): + byte_chunk[jb, m + k] = source[start + k] + jb += 1 + elif column_types[j] == column_type_string: + # string + string_chunk[js, current_row] = np.array(source[start:( + start + lngt)]).tobytes().rstrip(b"\x00 ") + js += 1 + + self.current_row_on_page_index += 1 + self.current_row_in_chunk_index += 1 + self.current_row_in_file_index += 1 diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py new file mode 100644 index 00000000..f917477b --- /dev/null +++ b/pandas/io/sas/sas7bdat.py @@ -0,0 +1,732 @@ +""" +Read SAS7BDAT files + +Based on code written by Jared Hobbs: + https://bitbucket.org/jaredhobbs/sas7bdat + +See also: + https://github.com/BioStatMatt/sas7bdat + +Partial documentation of the file format: + https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf + +Reference for binary data compression: + http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm +""" +from collections import abc +from datetime import datetime +import struct + +import numpy as np + +from pandas.errors import EmptyDataError + +import pandas as pd + +from pandas.io.common import get_filepath_or_buffer +from pandas.io.sas._sas import Parser +import pandas.io.sas.sas_constants as const + + +class _subheader_pointer: + pass + + +class _column: + pass + + +# SAS7BDAT represents a SAS data file in SAS7BDAT format. +class SAS7BDATReader(abc.Iterator): + """ + Read SAS files in SAS7BDAT format. + + Parameters + ---------- + path_or_buf : path name or buffer + Name of SAS file or file-like object pointing to SAS file + contents. + index : column identifier, defaults to None + Column to use as index. + convert_dates : boolean, defaults to True + Attempt to convert dates to Pandas datetime values. Note that + some rarely used SAS date formats may be unsupported. + blank_missing : boolean, defaults to True + Convert empty strings to missing values (SAS uses blanks to + indicate missing character variables). + chunksize : int, defaults to None + Return SAS7BDATReader object for iterations, returns chunks + with given number of lines. + encoding : string, defaults to None + String encoding. + convert_text : bool, defaults to True + If False, text variables are left as raw bytes. + convert_header_text : bool, defaults to True + If False, header text, including column names, are left as raw + bytes. + """ + + def __init__( + self, + path_or_buf, + index=None, + convert_dates=True, + blank_missing=True, + chunksize=None, + encoding=None, + convert_text=True, + convert_header_text=True, + ): + + self.index = index + self.convert_dates = convert_dates + self.blank_missing = blank_missing + self.chunksize = chunksize + self.encoding = encoding + self.convert_text = convert_text + self.convert_header_text = convert_header_text + + self.default_encoding = "latin-1" + self.compression = "" + self.column_names_strings = [] + self.column_names = [] + self.column_formats = [] + self.columns = [] + + self._current_page_data_subheader_pointers = [] + self._cached_page = None + self._column_data_lengths = [] + self._column_data_offsets = [] + self._column_types = [] + + self._current_row_in_file_index = 0 + self._current_row_on_page_index = 0 + self._current_row_in_file_index = 0 + + self._path_or_buf, _, _, _ = get_filepath_or_buffer(path_or_buf) + if isinstance(self._path_or_buf, str): + self._path_or_buf = open(self._path_or_buf, "rb") + self.handle = self._path_or_buf + + self._get_properties() + self._parse_metadata() + + def column_data_lengths(self): + """Return a numpy int64 array of the column data lengths""" + return np.asarray(self._column_data_lengths, dtype=np.int64) + + def column_data_offsets(self): + """Return a numpy int64 array of the column offsets""" + return np.asarray(self._column_data_offsets, dtype=np.int64) + + def column_types(self): + """Returns a numpy character array of the column types: + s (string) or d (double)""" + return np.asarray(self._column_types, dtype=np.dtype("S1")) + + def close(self): + try: + self.handle.close() + except AttributeError: + pass + + def _get_properties(self): + + # Check magic number + self._path_or_buf.seek(0) + self._cached_page = self._path_or_buf.read(288) + if self._cached_page[0 : len(const.magic)] != const.magic: + self.close() + raise ValueError("magic number mismatch (not a SAS file?)") + + # Get alignment information + align1, align2 = 0, 0 + buf = self._read_bytes(const.align_1_offset, const.align_1_length) + if buf == const.u64_byte_checker_value: + align2 = const.align_2_value + self.U64 = True + self._int_length = 8 + self._page_bit_offset = const.page_bit_offset_x64 + self._subheader_pointer_length = const.subheader_pointer_length_x64 + else: + self.U64 = False + self._page_bit_offset = const.page_bit_offset_x86 + self._subheader_pointer_length = const.subheader_pointer_length_x86 + self._int_length = 4 + buf = self._read_bytes(const.align_2_offset, const.align_2_length) + if buf == const.align_1_checker_value: + align1 = const.align_2_value + total_align = align1 + align2 + + # Get endianness information + buf = self._read_bytes(const.endianness_offset, const.endianness_length) + if buf == b"\x01": + self.byte_order = "<" + else: + self.byte_order = ">" + + # Get encoding information + buf = self._read_bytes(const.encoding_offset, const.encoding_length)[0] + if buf in const.encoding_names: + self.file_encoding = const.encoding_names[buf] + else: + self.file_encoding = f"unknown (code={buf})" + + # Get platform information + buf = self._read_bytes(const.platform_offset, const.platform_length) + if buf == b"1": + self.platform = "unix" + elif buf == b"2": + self.platform = "windows" + else: + self.platform = "unknown" + + buf = self._read_bytes(const.dataset_offset, const.dataset_length) + self.name = buf.rstrip(b"\x00 ") + if self.convert_header_text: + self.name = self.name.decode(self.encoding or self.default_encoding) + + buf = self._read_bytes(const.file_type_offset, const.file_type_length) + self.file_type = buf.rstrip(b"\x00 ") + if self.convert_header_text: + self.file_type = self.file_type.decode( + self.encoding or self.default_encoding + ) + + # Timestamp is epoch 01/01/1960 + epoch = datetime(1960, 1, 1) + x = self._read_float( + const.date_created_offset + align1, const.date_created_length + ) + self.date_created = epoch + pd.to_timedelta(x, unit="s") + x = self._read_float( + const.date_modified_offset + align1, const.date_modified_length + ) + self.date_modified = epoch + pd.to_timedelta(x, unit="s") + + self.header_length = self._read_int( + const.header_size_offset + align1, const.header_size_length + ) + + # Read the rest of the header into cached_page. + buf = self._path_or_buf.read(self.header_length - 288) + self._cached_page += buf + if len(self._cached_page) != self.header_length: + self.close() + raise ValueError("The SAS7BDAT file appears to be truncated.") + + self._page_length = self._read_int( + const.page_size_offset + align1, const.page_size_length + ) + self._page_count = self._read_int( + const.page_count_offset + align1, const.page_count_length + ) + + buf = self._read_bytes( + const.sas_release_offset + total_align, const.sas_release_length + ) + self.sas_release = buf.rstrip(b"\x00 ") + if self.convert_header_text: + self.sas_release = self.sas_release.decode( + self.encoding or self.default_encoding + ) + + buf = self._read_bytes( + const.sas_server_type_offset + total_align, const.sas_server_type_length + ) + self.server_type = buf.rstrip(b"\x00 ") + if self.convert_header_text: + self.server_type = self.server_type.decode( + self.encoding or self.default_encoding + ) + + buf = self._read_bytes( + const.os_version_number_offset + total_align, const.os_version_number_length + ) + self.os_version = buf.rstrip(b"\x00 ") + if self.convert_header_text: + self.os_version = self.os_version.decode( + self.encoding or self.default_encoding + ) + + buf = self._read_bytes(const.os_name_offset + total_align, const.os_name_length) + buf = buf.rstrip(b"\x00 ") + if len(buf) > 0: + self.os_name = buf.decode(self.encoding or self.default_encoding) + else: + buf = self._read_bytes( + const.os_maker_offset + total_align, const.os_maker_length + ) + self.os_name = buf.rstrip(b"\x00 ") + if self.convert_header_text: + self.os_name = self.os_name.decode( + self.encoding or self.default_encoding + ) + + def __next__(self): + da = self.read(nrows=self.chunksize or 1) + if da is None: + raise StopIteration + return da + + # Read a single float of the given width (4 or 8). + def _read_float(self, offset, width): + if width not in (4, 8): + self.close() + raise ValueError("invalid float width") + buf = self._read_bytes(offset, width) + fd = "f" if width == 4 else "d" + return struct.unpack(self.byte_order + fd, buf)[0] + + # Read a single signed integer of the given width (1, 2, 4 or 8). + def _read_int(self, offset, width): + if width not in (1, 2, 4, 8): + self.close() + raise ValueError("invalid int width") + buf = self._read_bytes(offset, width) + it = {1: "b", 2: "h", 4: "l", 8: "q"}[width] + iv = struct.unpack(self.byte_order + it, buf)[0] + return iv + + def _read_bytes(self, offset, length): + if self._cached_page is None: + self._path_or_buf.seek(offset) + buf = self._path_or_buf.read(length) + if len(buf) < length: + self.close() + msg = f"Unable to read {length:d} bytes from file position {offset:d}." + raise ValueError(msg) + return buf + else: + if offset + length > len(self._cached_page): + self.close() + raise ValueError("The cached page is too small.") + return self._cached_page[offset : offset + length] + + def _parse_metadata(self): + done = False + while not done: + self._cached_page = self._path_or_buf.read(self._page_length) + if len(self._cached_page) <= 0: + break + if len(self._cached_page) != self._page_length: + self.close() + raise ValueError("Failed to read a meta data page from the SAS file.") + done = self._process_page_meta() + + def _process_page_meta(self): + self._read_page_header() + pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types + if self._current_page_type in pt: + self._process_page_metadata() + is_data_page = self._current_page_type & const.page_data_type + is_mix_page = self._current_page_type in const.page_mix_types + return ( + is_data_page + or is_mix_page + or self._current_page_data_subheader_pointers != [] + ) + + def _read_page_header(self): + bit_offset = self._page_bit_offset + tx = const.page_type_offset + bit_offset + self._current_page_type = self._read_int(tx, const.page_type_length) + tx = const.block_count_offset + bit_offset + self._current_page_block_count = self._read_int(tx, const.block_count_length) + tx = const.subheader_count_offset + bit_offset + self._current_page_subheaders_count = self._read_int( + tx, const.subheader_count_length + ) + + def _process_page_metadata(self): + bit_offset = self._page_bit_offset + + for i in range(self._current_page_subheaders_count): + pointer = self._process_subheader_pointers( + const.subheader_pointers_offset + bit_offset, i + ) + if pointer.length == 0: + continue + if pointer.compression == const.truncated_subheader_id: + continue + subheader_signature = self._read_subheader_signature(pointer.offset) + subheader_index = self._get_subheader_index( + subheader_signature, pointer.compression, pointer.ptype + ) + self._process_subheader(subheader_index, pointer) + + def _get_subheader_index(self, signature, compression, ptype): + index = const.subheader_signature_to_index.get(signature) + if index is None: + f1 = (compression == const.compressed_subheader_id) or (compression == 0) + f2 = ptype == const.compressed_subheader_type + if (self.compression != "") and f1 and f2: + index = const.SASIndex.data_subheader_index + else: + self.close() + raise ValueError("Unknown subheader signature") + return index + + def _process_subheader_pointers(self, offset, subheader_pointer_index): + + subheader_pointer_length = self._subheader_pointer_length + total_offset = offset + subheader_pointer_length * subheader_pointer_index + + subheader_offset = self._read_int(total_offset, self._int_length) + total_offset += self._int_length + + subheader_length = self._read_int(total_offset, self._int_length) + total_offset += self._int_length + + subheader_compression = self._read_int(total_offset, 1) + total_offset += 1 + + subheader_type = self._read_int(total_offset, 1) + + x = _subheader_pointer() + x.offset = subheader_offset + x.length = subheader_length + x.compression = subheader_compression + x.ptype = subheader_type + + return x + + def _read_subheader_signature(self, offset): + subheader_signature = self._read_bytes(offset, self._int_length) + return subheader_signature + + def _process_subheader(self, subheader_index, pointer): + offset = pointer.offset + length = pointer.length + + if subheader_index == const.SASIndex.row_size_index: + processor = self._process_rowsize_subheader + elif subheader_index == const.SASIndex.column_size_index: + processor = self._process_columnsize_subheader + elif subheader_index == const.SASIndex.column_text_index: + processor = self._process_columntext_subheader + elif subheader_index == const.SASIndex.column_name_index: + processor = self._process_columnname_subheader + elif subheader_index == const.SASIndex.column_attributes_index: + processor = self._process_columnattributes_subheader + elif subheader_index == const.SASIndex.format_and_label_index: + processor = self._process_format_subheader + elif subheader_index == const.SASIndex.column_list_index: + processor = self._process_columnlist_subheader + elif subheader_index == const.SASIndex.subheader_counts_index: + processor = self._process_subheader_counts + elif subheader_index == const.SASIndex.data_subheader_index: + self._current_page_data_subheader_pointers.append(pointer) + return + else: + raise ValueError("unknown subheader index") + + processor(offset, length) + + def _process_rowsize_subheader(self, offset, length): + + int_len = self._int_length + lcs_offset = offset + lcp_offset = offset + if self.U64: + lcs_offset += 682 + lcp_offset += 706 + else: + lcs_offset += 354 + lcp_offset += 378 + + self.row_length = self._read_int( + offset + const.row_length_offset_multiplier * int_len, int_len + ) + self.row_count = self._read_int( + offset + const.row_count_offset_multiplier * int_len, int_len + ) + self.col_count_p1 = self._read_int( + offset + const.col_count_p1_multiplier * int_len, int_len + ) + self.col_count_p2 = self._read_int( + offset + const.col_count_p2_multiplier * int_len, int_len + ) + mx = const.row_count_on_mix_page_offset_multiplier * int_len + self._mix_page_row_count = self._read_int(offset + mx, int_len) + self._lcs = self._read_int(lcs_offset, 2) + self._lcp = self._read_int(lcp_offset, 2) + + def _process_columnsize_subheader(self, offset, length): + int_len = self._int_length + offset += int_len + self.column_count = self._read_int(offset, int_len) + if self.col_count_p1 + self.col_count_p2 != self.column_count: + print( + f"Warning: column count mismatch ({self.col_count_p1} + " + f"{self.col_count_p2} != " + f"{self.column_count})\n" + ) + + # Unknown purpose + def _process_subheader_counts(self, offset, length): + pass + + def _process_columntext_subheader(self, offset, length): + + offset += self._int_length + text_block_size = self._read_int(offset, const.text_block_size_length) + + buf = self._read_bytes(offset, text_block_size) + cname_raw = buf[0:text_block_size].rstrip(b"\x00 ") + cname = cname_raw + if self.convert_header_text: + cname = cname.decode(self.encoding or self.default_encoding) + self.column_names_strings.append(cname) + + if len(self.column_names_strings) == 1: + compression_literal = "" + for cl in const.compression_literals: + if cl in cname_raw: + compression_literal = cl + self.compression = compression_literal + offset -= self._int_length + + offset1 = offset + 16 + if self.U64: + offset1 += 4 + + buf = self._read_bytes(offset1, self._lcp) + compression_literal = buf.rstrip(b"\x00") + if compression_literal == "": + self._lcs = 0 + offset1 = offset + 32 + if self.U64: + offset1 += 4 + buf = self._read_bytes(offset1, self._lcp) + self.creator_proc = buf[0 : self._lcp] + elif compression_literal == const.rle_compression: + offset1 = offset + 40 + if self.U64: + offset1 += 4 + buf = self._read_bytes(offset1, self._lcp) + self.creator_proc = buf[0 : self._lcp] + elif self._lcs > 0: + self._lcp = 0 + offset1 = offset + 16 + if self.U64: + offset1 += 4 + buf = self._read_bytes(offset1, self._lcs) + self.creator_proc = buf[0 : self._lcp] + if self.convert_header_text: + if hasattr(self, "creator_proc"): + self.creator_proc = self.creator_proc.decode( + self.encoding or self.default_encoding + ) + + def _process_columnname_subheader(self, offset, length): + int_len = self._int_length + offset += int_len + column_name_pointers_count = (length - 2 * int_len - 12) // 8 + for i in range(column_name_pointers_count): + text_subheader = ( + offset + + const.column_name_pointer_length * (i + 1) + + const.column_name_text_subheader_offset + ) + col_name_offset = ( + offset + + const.column_name_pointer_length * (i + 1) + + const.column_name_offset_offset + ) + col_name_length = ( + offset + + const.column_name_pointer_length * (i + 1) + + const.column_name_length_offset + ) + + idx = self._read_int( + text_subheader, const.column_name_text_subheader_length + ) + col_offset = self._read_int( + col_name_offset, const.column_name_offset_length + ) + col_len = self._read_int(col_name_length, const.column_name_length_length) + + name_str = self.column_names_strings[idx] + self.column_names.append(name_str[col_offset : col_offset + col_len]) + + def _process_columnattributes_subheader(self, offset, length): + int_len = self._int_length + column_attributes_vectors_count = (length - 2 * int_len - 12) // (int_len + 8) + for i in range(column_attributes_vectors_count): + col_data_offset = ( + offset + int_len + const.column_data_offset_offset + i * (int_len + 8) + ) + col_data_len = ( + offset + + 2 * int_len + + const.column_data_length_offset + + i * (int_len + 8) + ) + col_types = ( + offset + 2 * int_len + const.column_type_offset + i * (int_len + 8) + ) + + x = self._read_int(col_data_offset, int_len) + self._column_data_offsets.append(x) + + x = self._read_int(col_data_len, const.column_data_length_length) + self._column_data_lengths.append(x) + + x = self._read_int(col_types, const.column_type_length) + self._column_types.append(b"d" if x == 1 else b"s") + + def _process_columnlist_subheader(self, offset, length): + # unknown purpose + pass + + def _process_format_subheader(self, offset, length): + int_len = self._int_length + text_subheader_format = ( + offset + const.column_format_text_subheader_index_offset + 3 * int_len + ) + col_format_offset = offset + const.column_format_offset_offset + 3 * int_len + col_format_len = offset + const.column_format_length_offset + 3 * int_len + text_subheader_label = ( + offset + const.column_label_text_subheader_index_offset + 3 * int_len + ) + col_label_offset = offset + const.column_label_offset_offset + 3 * int_len + col_label_len = offset + const.column_label_length_offset + 3 * int_len + + x = self._read_int( + text_subheader_format, const.column_format_text_subheader_index_length + ) + format_idx = min(x, len(self.column_names_strings) - 1) + + format_start = self._read_int( + col_format_offset, const.column_format_offset_length + ) + format_len = self._read_int(col_format_len, const.column_format_length_length) + + label_idx = self._read_int( + text_subheader_label, const.column_label_text_subheader_index_length + ) + label_idx = min(label_idx, len(self.column_names_strings) - 1) + + label_start = self._read_int(col_label_offset, const.column_label_offset_length) + label_len = self._read_int(col_label_len, const.column_label_length_length) + + label_names = self.column_names_strings[label_idx] + column_label = label_names[label_start : label_start + label_len] + format_names = self.column_names_strings[format_idx] + column_format = format_names[format_start : format_start + format_len] + current_column_number = len(self.columns) + + col = _column() + col.col_id = current_column_number + col.name = self.column_names[current_column_number] + col.label = column_label + col.format = column_format + col.ctype = self._column_types[current_column_number] + col.length = self._column_data_lengths[current_column_number] + + self.column_formats.append(column_format) + self.columns.append(col) + + def read(self, nrows=None): + + if (nrows is None) and (self.chunksize is not None): + nrows = self.chunksize + elif nrows is None: + nrows = self.row_count + + if len(self._column_types) == 0: + self.close() + raise EmptyDataError("No columns to parse from file") + + if self._current_row_in_file_index >= self.row_count: + return None + + m = self.row_count - self._current_row_in_file_index + if nrows > m: + nrows = m + + nd = self._column_types.count(b"d") + ns = self._column_types.count(b"s") + + self._string_chunk = np.empty((ns, nrows), dtype=np.object) + self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8) + + self._current_row_in_chunk_index = 0 + p = Parser(self) + p.read(nrows) + + rslt = self._chunk_to_dataframe() + if self.index is not None: + rslt = rslt.set_index(self.index) + + return rslt + + def _read_next_page(self): + self._current_page_data_subheader_pointers = [] + self._cached_page = self._path_or_buf.read(self._page_length) + if len(self._cached_page) <= 0: + return True + elif len(self._cached_page) != self._page_length: + self.close() + msg = ( + "failed to read complete page from file (read " + f"{len(self._cached_page):d} of " + f"{self._page_length:d} bytes)" + ) + raise ValueError(msg) + + self._read_page_header() + page_type = self._current_page_type + if page_type == const.page_meta_type: + self._process_page_metadata() + + is_data_page = page_type & const.page_data_type + pt = [const.page_meta_type] + const.page_mix_types + if not is_data_page and self._current_page_type not in pt: + return self._read_next_page() + + return False + + def _chunk_to_dataframe(self): + + n = self._current_row_in_chunk_index + m = self._current_row_in_file_index + ix = range(m - n, m) + rslt = pd.DataFrame(index=ix) + + js, jb = 0, 0 + for j in range(self.column_count): + + name = self.column_names[j] + + if self._column_types[j] == b"d": + rslt[name] = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d") + rslt[name] = np.asarray(rslt[name], dtype=np.float64) + if self.convert_dates: + unit = None + if self.column_formats[j] in const.sas_date_formats: + unit = "d" + elif self.column_formats[j] in const.sas_datetime_formats: + unit = "s" + if unit: + rslt[name] = pd.to_datetime( + rslt[name], unit=unit, origin="1960-01-01" + ) + jb += 1 + elif self._column_types[j] == b"s": + rslt[name] = self._string_chunk[js, :] + if self.convert_text and (self.encoding is not None): + rslt[name] = rslt[name].str.decode( + self.encoding or self.default_encoding + ) + if self.blank_missing: + ii = rslt[name].str.len() == 0 + rslt.loc[ii, name] = np.nan + js += 1 + else: + self.close() + raise ValueError(f"unknown column type {self._column_types[j]}") + + return rslt diff --git a/pandas/io/sas/sas_constants.py b/pandas/io/sas/sas_constants.py new file mode 100644 index 00000000..23b23a1b --- /dev/null +++ b/pandas/io/sas/sas_constants.py @@ -0,0 +1,253 @@ +magic = ( + b"\x00\x00\x00\x00\x00\x00\x00\x00" + + b"\x00\x00\x00\x00\xc2\xea\x81\x60" + + b"\xb3\x14\x11\xcf\xbd\x92\x08\x00" + + b"\x09\xc7\x31\x8c\x18\x1f\x10\x11" +) + +align_1_checker_value = b"3" +align_1_offset = 32 +align_1_length = 1 +align_1_value = 4 +u64_byte_checker_value = b"3" +align_2_offset = 35 +align_2_length = 1 +align_2_value = 4 +endianness_offset = 37 +endianness_length = 1 +platform_offset = 39 +platform_length = 1 +encoding_offset = 70 +encoding_length = 1 +dataset_offset = 92 +dataset_length = 64 +file_type_offset = 156 +file_type_length = 8 +date_created_offset = 164 +date_created_length = 8 +date_modified_offset = 172 +date_modified_length = 8 +header_size_offset = 196 +header_size_length = 4 +page_size_offset = 200 +page_size_length = 4 +page_count_offset = 204 +page_count_length = 4 +sas_release_offset = 216 +sas_release_length = 8 +sas_server_type_offset = 224 +sas_server_type_length = 16 +os_version_number_offset = 240 +os_version_number_length = 16 +os_maker_offset = 256 +os_maker_length = 16 +os_name_offset = 272 +os_name_length = 16 +page_bit_offset_x86 = 16 +page_bit_offset_x64 = 32 +subheader_pointer_length_x86 = 12 +subheader_pointer_length_x64 = 24 +page_type_offset = 0 +page_type_length = 2 +block_count_offset = 2 +block_count_length = 2 +subheader_count_offset = 4 +subheader_count_length = 2 +page_meta_type = 0 +page_data_type = 256 +page_amd_type = 1024 +page_metc_type = 16384 +page_comp_type = -28672 +page_mix_types = [512, 640] +subheader_pointers_offset = 8 +truncated_subheader_id = 1 +compressed_subheader_id = 4 +compressed_subheader_type = 1 +text_block_size_length = 2 +row_length_offset_multiplier = 5 +row_count_offset_multiplier = 6 +col_count_p1_multiplier = 9 +col_count_p2_multiplier = 10 +row_count_on_mix_page_offset_multiplier = 15 +column_name_pointer_length = 8 +column_name_text_subheader_offset = 0 +column_name_text_subheader_length = 2 +column_name_offset_offset = 2 +column_name_offset_length = 2 +column_name_length_offset = 4 +column_name_length_length = 2 +column_data_offset_offset = 8 +column_data_length_offset = 8 +column_data_length_length = 4 +column_type_offset = 14 +column_type_length = 1 +column_format_text_subheader_index_offset = 22 +column_format_text_subheader_index_length = 2 +column_format_offset_offset = 24 +column_format_offset_length = 2 +column_format_length_offset = 26 +column_format_length_length = 2 +column_label_text_subheader_index_offset = 28 +column_label_text_subheader_index_length = 2 +column_label_offset_offset = 30 +column_label_offset_length = 2 +column_label_length_offset = 32 +column_label_length_length = 2 +rle_compression = b"SASYZCRL" +rdc_compression = b"SASYZCR2" + +compression_literals = [rle_compression, rdc_compression] + +# Incomplete list of encodings, using SAS nomenclature: +# http://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm +encoding_names = { + 29: "latin1", + 20: "utf-8", + 33: "cyrillic", + 60: "wlatin2", + 61: "wcyrillic", + 62: "wlatin1", + 90: "ebcdic870", +} + + +class SASIndex: + row_size_index = 0 + column_size_index = 1 + subheader_counts_index = 2 + column_text_index = 3 + column_name_index = 4 + column_attributes_index = 5 + format_and_label_index = 6 + column_list_index = 7 + data_subheader_index = 8 + + +subheader_signature_to_index = { + b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index, + b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index, + b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index, + b"\xF7\xF7\xF7\xF7\xFF\xFF\xFB\xFE": SASIndex.row_size_index, + b"\xF6\xF6\xF6\xF6": SASIndex.column_size_index, + b"\x00\x00\x00\x00\xF6\xF6\xF6\xF6": SASIndex.column_size_index, + b"\xF6\xF6\xF6\xF6\x00\x00\x00\x00": SASIndex.column_size_index, + b"\xF6\xF6\xF6\xF6\xFF\xFF\xFB\xFE": SASIndex.column_size_index, + b"\x00\xFC\xFF\xFF": SASIndex.subheader_counts_index, + b"\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index, + b"\x00\xFC\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.subheader_counts_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index, + b"\xFD\xFF\xFF\xFF": SASIndex.column_text_index, + b"\xFF\xFF\xFF\xFD": SASIndex.column_text_index, + b"\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_text_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD": SASIndex.column_text_index, + b"\xFF\xFF\xFF\xFF": SASIndex.column_name_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_name_index, + b"\xFC\xFF\xFF\xFF": SASIndex.column_attributes_index, + b"\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index, + b"\xFC\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_attributes_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index, + b"\xFE\xFB\xFF\xFF": SASIndex.format_and_label_index, + b"\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index, + b"\xFE\xFB\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.format_and_label_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index, + b"\xFE\xFF\xFF\xFF": SASIndex.column_list_index, + b"\xFF\xFF\xFF\xFE": SASIndex.column_list_index, + b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_list_index, + b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": SASIndex.column_list_index, +} + + +# List of frequently used SAS date and datetime formats +# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm +# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java +sas_date_formats = ( + "DATE", + "DAY", + "DDMMYY", + "DOWNAME", + "JULDAY", + "JULIAN", + "MMDDYY", + "MMYY", + "MMYYC", + "MMYYD", + "MMYYP", + "MMYYS", + "MMYYN", + "MONNAME", + "MONTH", + "MONYY", + "QTR", + "QTRR", + "NENGO", + "WEEKDATE", + "WEEKDATX", + "WEEKDAY", + "WEEKV", + "WORDDATE", + "WORDDATX", + "YEAR", + "YYMM", + "YYMMC", + "YYMMD", + "YYMMP", + "YYMMS", + "YYMMN", + "YYMON", + "YYMMDD", + "YYQ", + "YYQC", + "YYQD", + "YYQP", + "YYQS", + "YYQN", + "YYQR", + "YYQRC", + "YYQRD", + "YYQRP", + "YYQRS", + "YYQRN", + "YYMMDDP", + "YYMMDDC", + "E8601DA", + "YYMMDDN", + "MMDDYYC", + "MMDDYYS", + "MMDDYYD", + "YYMMDDS", + "B8601DA", + "DDMMYYN", + "YYMMDDD", + "DDMMYYB", + "DDMMYYP", + "MMDDYYP", + "YYMMDDB", + "MMDDYYN", + "DDMMYYC", + "DDMMYYD", + "DDMMYYS", + "MINGUO", +) + +sas_datetime_formats = ( + "DATETIME", + "DTWKDATX", + "B8601DN", + "B8601DT", + "B8601DX", + "B8601DZ", + "B8601LX", + "E8601DN", + "E8601DT", + "E8601DX", + "E8601DZ", + "E8601LX", + "DATEAMPM", + "DTDATE", + "DTMONYY", + "DTMONYY", + "DTWKDATX", + "DTYEAR", + "TOD", + "MDYAMPM", +) diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py new file mode 100644 index 00000000..3cf7fd88 --- /dev/null +++ b/pandas/io/sas/sas_xport.py @@ -0,0 +1,507 @@ +""" +Read a SAS XPort format file into a Pandas DataFrame. + +Based on code from Jack Cushman (github.com/jcushman/xport). + +The file format is defined here: + +https://support.sas.com/techsup/technote/ts140.pdf +""" +from collections import abc +from datetime import datetime +from io import BytesIO +import struct +import warnings + +import numpy as np + +from pandas.util._decorators import Appender + +import pandas as pd + +from pandas.io.common import get_filepath_or_buffer + +_correct_line1 = ( + "HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!" + "000000000000000000000000000000 " +) +_correct_header1 = ( + "HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000" +) +_correct_header2 = ( + "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!" + "000000000000000000000000000000 " +) +_correct_obs_header = ( + "HEADER RECORD*******OBS HEADER RECORD!!!!!!!" + "000000000000000000000000000000 " +) +_fieldkeys = [ + "ntype", + "nhfun", + "field_length", + "nvar0", + "name", + "label", + "nform", + "nfl", + "num_decimals", + "nfj", + "nfill", + "niform", + "nifl", + "nifd", + "npos", + "_", +] + + +_base_params_doc = """\ +Parameters +---------- +filepath_or_buffer : string or file-like object + Path to SAS file or object implementing binary read method.""" + +_params2_doc = """\ +index : identifier of index column + Identifier of column that should be used as index of the DataFrame. +encoding : string + Encoding for text data. +chunksize : int + Read file `chunksize` lines at a time, returns iterator.""" + +_format_params_doc = """\ +format : string + File format, only `xport` is currently supported.""" + +_iterator_doc = """\ +iterator : boolean, default False + Return XportReader object for reading file incrementally.""" + + +_read_sas_doc = """Read a SAS file into a DataFrame. + +%(_base_params_doc)s +%(_format_params_doc)s +%(_params2_doc)s +%(_iterator_doc)s + +Returns +------- +DataFrame or XportReader + +Examples +-------- +Read a SAS Xport file: + +>>> df = pd.read_sas('filename.XPT') + +Read a Xport file in 10,000 line chunks: + +>>> itr = pd.read_sas('filename.XPT', chunksize=10000) +>>> for chunk in itr: +>>> do_something(chunk) + +""" % { + "_base_params_doc": _base_params_doc, + "_format_params_doc": _format_params_doc, + "_params2_doc": _params2_doc, + "_iterator_doc": _iterator_doc, +} + + +_xport_reader_doc = """\ +Class for reading SAS Xport files. + +%(_base_params_doc)s +%(_params2_doc)s + +Attributes +---------- +member_info : list + Contains information about the file +fields : list + Contains information about the variables in the file +""" % { + "_base_params_doc": _base_params_doc, + "_params2_doc": _params2_doc, +} + + +_read_method_doc = """\ +Read observations from SAS Xport file, returning as data frame. + +Parameters +---------- +nrows : int + Number of rows to read from data file; if None, read whole + file. + +Returns +------- +A DataFrame. +""" + + +def _parse_date(datestr: str) -> datetime: + """ Given a date in xport format, return Python date. """ + try: + # e.g. "16FEB11:10:07:55" + return datetime.strptime(datestr, "%d%b%y:%H:%M:%S") + except ValueError: + return pd.NaT + + +def _split_line(s: str, parts): + """ + Parameters + ---------- + s: str + Fixed-length string to split + parts: list of (name, length) pairs + Used to break up string, name '_' will be filtered from output. + + Returns + ------- + Dict of name:contents of string at given location. + """ + out = {} + start = 0 + for name, length in parts: + out[name] = s[start : start + length].strip() + start += length + del out["_"] + return out + + +def _handle_truncated_float_vec(vec, nbytes): + # This feature is not well documented, but some SAS XPORT files + # have 2-7 byte "truncated" floats. To read these truncated + # floats, pad them with zeros on the right to make 8 byte floats. + # + # References: + # https://github.com/jcushman/xport/pull/3 + # The R "foreign" library + + if nbytes != 8: + vec1 = np.zeros(len(vec), np.dtype("S8")) + dtype = np.dtype("S%d,S%d" % (nbytes, 8 - nbytes)) + vec2 = vec1.view(dtype=dtype) + vec2["f0"] = vec + return vec2 + + return vec + + +def _parse_float_vec(vec): + """ + Parse a vector of float values representing IBM 8 byte floats into + native 8 byte floats. + """ + + dtype = np.dtype(">u4,>u4") + vec1 = vec.view(dtype=dtype) + xport1 = vec1["f0"] + xport2 = vec1["f1"] + + # Start by setting first half of ieee number to first half of IBM + # number sans exponent + ieee1 = xport1 & 0x00FFFFFF + + # The fraction bit to the left of the binary point in the ieee + # format was set and the number was shifted 0, 1, 2, or 3 + # places. This will tell us how to adjust the ibm exponent to be a + # power of 2 ieee exponent and how to shift the fraction bits to + # restore the correct magnitude. + shift = np.zeros(len(vec), dtype=np.uint8) + shift[np.where(xport1 & 0x00200000)] = 1 + shift[np.where(xport1 & 0x00400000)] = 2 + shift[np.where(xport1 & 0x00800000)] = 3 + + # shift the ieee number down the correct number of places then + # set the second half of the ieee number to be the second half + # of the ibm number shifted appropriately, ored with the bits + # from the first half that would have been shifted in if we + # could shift a double. All we are worried about are the low + # order 3 bits of the first half since we're only shifting by + # 1, 2, or 3. + ieee1 >>= shift + ieee2 = (xport2 >> shift) | ((xport1 & 0x00000007) << (29 + (3 - shift))) + + # clear the 1 bit to the left of the binary point + ieee1 &= 0xFFEFFFFF + + # set the exponent of the ieee number to be the actual exponent + # plus the shift count + 1023. Or this into the first half of the + # ieee number. The ibm exponent is excess 64 but is adjusted by 65 + # since during conversion to ibm format the exponent is + # incremented by 1 and the fraction bits left 4 positions to the + # right of the radix point. (had to add >> 24 because C treats & + # 0x7f as 0x7f000000 and Python doesn't) + ieee1 |= ((((((xport1 >> 24) & 0x7F) - 65) << 2) + shift + 1023) << 20) | ( + xport1 & 0x80000000 + ) + + ieee = np.empty((len(ieee1),), dtype=">u4,>u4") + ieee["f0"] = ieee1 + ieee["f1"] = ieee2 + ieee = ieee.view(dtype=">f8") + ieee = ieee.astype("f8") + + return ieee + + +class XportReader(abc.Iterator): + __doc__ = _xport_reader_doc + + def __init__( + self, filepath_or_buffer, index=None, encoding="ISO-8859-1", chunksize=None + ): + + self._encoding = encoding + self._lines_read = 0 + self._index = index + self._chunksize = chunksize + + if isinstance(filepath_or_buffer, str): + ( + filepath_or_buffer, + encoding, + compression, + should_close, + ) = get_filepath_or_buffer(filepath_or_buffer, encoding=encoding) + + if isinstance(filepath_or_buffer, (str, bytes)): + self.filepath_or_buffer = open(filepath_or_buffer, "rb") + else: + # Copy to BytesIO, and ensure no encoding + contents = filepath_or_buffer.read() + try: + contents = contents.encode(self._encoding) + except UnicodeEncodeError: + pass + self.filepath_or_buffer = BytesIO(contents) + + self._read_header() + + def close(self): + self.filepath_or_buffer.close() + + def _get_row(self): + return self.filepath_or_buffer.read(80).decode() + + def _read_header(self): + self.filepath_or_buffer.seek(0) + + # read file header + line1 = self._get_row() + if line1 != _correct_line1: + self.close() + raise ValueError("Header record is not an XPORT file.") + + line2 = self._get_row() + fif = [["prefix", 24], ["version", 8], ["OS", 8], ["_", 24], ["created", 16]] + file_info = _split_line(line2, fif) + if file_info["prefix"] != "SAS SAS SASLIB": + self.close() + raise ValueError("Header record has invalid prefix.") + file_info["created"] = _parse_date(file_info["created"]) + self.file_info = file_info + + line3 = self._get_row() + file_info["modified"] = _parse_date(line3[:16]) + + # read member header + header1 = self._get_row() + header2 = self._get_row() + headflag1 = header1.startswith(_correct_header1) + headflag2 = header2 == _correct_header2 + if not (headflag1 and headflag2): + self.close() + raise ValueError("Member header not found") + # usually 140, could be 135 + fieldnamelength = int(header1[-5:-2]) + + # member info + mem = [ + ["prefix", 8], + ["set_name", 8], + ["sasdata", 8], + ["version", 8], + ["OS", 8], + ["_", 24], + ["created", 16], + ] + member_info = _split_line(self._get_row(), mem) + mem = [["modified", 16], ["_", 16], ["label", 40], ["type", 8]] + member_info.update(_split_line(self._get_row(), mem)) + member_info["modified"] = _parse_date(member_info["modified"]) + member_info["created"] = _parse_date(member_info["created"]) + self.member_info = member_info + + # read field names + types = {1: "numeric", 2: "char"} + fieldcount = int(self._get_row()[54:58]) + datalength = fieldnamelength * fieldcount + # round up to nearest 80 + if datalength % 80: + datalength += 80 - datalength % 80 + fielddata = self.filepath_or_buffer.read(datalength) + fields = [] + obs_length = 0 + while len(fielddata) >= fieldnamelength: + # pull data for one field + field, fielddata = ( + fielddata[:fieldnamelength], + fielddata[fieldnamelength:], + ) + + # rest at end gets ignored, so if field is short, pad out + # to match struct pattern below + field = field.ljust(140) + + fieldstruct = struct.unpack(">hhhh8s40s8shhh2s8shhl52s", field) + field = dict(zip(_fieldkeys, fieldstruct)) + del field["_"] + field["ntype"] = types[field["ntype"]] + fl = field["field_length"] + if field["ntype"] == "numeric" and ((fl < 2) or (fl > 8)): + self.close() + msg = f"Floating field width {fl} is not between 2 and 8." + raise TypeError(msg) + + for k, v in field.items(): + try: + field[k] = v.strip() + except AttributeError: + pass + + obs_length += field["field_length"] + fields += [field] + + header = self._get_row() + if not header == _correct_obs_header: + self.close() + raise ValueError("Observation header not found.") + + self.fields = fields + self.record_length = obs_length + self.record_start = self.filepath_or_buffer.tell() + + self.nobs = self._record_count() + self.columns = [x["name"].decode() for x in self.fields] + + # Setup the dtype. + dtypel = [ + ("s" + str(i), "S" + str(field["field_length"])) + for i, field in enumerate(self.fields) + ] + dtype = np.dtype(dtypel) + self._dtype = dtype + + def __next__(self): + return self.read(nrows=self._chunksize or 1) + + def _record_count(self) -> int: + """ + Get number of records in file. + + This is maybe suboptimal because we have to seek to the end of + the file. + + Side effect: returns file position to record_start. + """ + + self.filepath_or_buffer.seek(0, 2) + total_records_length = self.filepath_or_buffer.tell() - self.record_start + + if total_records_length % 80 != 0: + warnings.warn("xport file may be corrupted") + + if self.record_length > 80: + self.filepath_or_buffer.seek(self.record_start) + return total_records_length // self.record_length + + self.filepath_or_buffer.seek(-80, 2) + last_card = self.filepath_or_buffer.read(80) + last_card = np.frombuffer(last_card, dtype=np.uint64) + + # 8 byte blank + ix = np.flatnonzero(last_card == 2314885530818453536) + + if len(ix) == 0: + tail_pad = 0 + else: + tail_pad = 8 * len(ix) + + self.filepath_or_buffer.seek(self.record_start) + + return (total_records_length - tail_pad) // self.record_length + + def get_chunk(self, size=None): + """ + Reads lines from Xport file and returns as dataframe + + Parameters + ---------- + size : int, defaults to None + Number of lines to read. If None, reads whole file. + + Returns + ------- + DataFrame + """ + if size is None: + size = self._chunksize + return self.read(nrows=size) + + def _missing_double(self, vec): + v = vec.view(dtype="u1,u1,u2,u4") + miss = (v["f1"] == 0) & (v["f2"] == 0) & (v["f3"] == 0) + miss1 = ( + ((v["f0"] >= 0x41) & (v["f0"] <= 0x5A)) + | (v["f0"] == 0x5F) + | (v["f0"] == 0x2E) + ) + miss &= miss1 + return miss + + @Appender(_read_method_doc) + def read(self, nrows=None): + + if nrows is None: + nrows = self.nobs + + read_lines = min(nrows, self.nobs - self._lines_read) + read_len = read_lines * self.record_length + if read_len <= 0: + self.close() + raise StopIteration + raw = self.filepath_or_buffer.read(read_len) + data = np.frombuffer(raw, dtype=self._dtype, count=read_lines) + + df = pd.DataFrame(index=range(read_lines)) + for j, x in enumerate(self.columns): + vec = data["s" + str(j)] + ntype = self.fields[j]["ntype"] + if ntype == "numeric": + vec = _handle_truncated_float_vec(vec, self.fields[j]["field_length"]) + miss = self._missing_double(vec) + v = _parse_float_vec(vec) + v[miss] = np.nan + elif self.fields[j]["ntype"] == "char": + v = [y.rstrip() for y in vec] + + if self._encoding is not None: + v = [y.decode(self._encoding) for y in v] + + df[x] = v + + if self._index is None: + df.index = range(self._lines_read, self._lines_read + read_lines) + else: + df = df.set_index(self._index) + + self._lines_read += read_lines + + return df diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py new file mode 100644 index 00000000..56ebb583 --- /dev/null +++ b/pandas/io/sas/sasreader.py @@ -0,0 +1,86 @@ +""" +Read SAS sas7bdat or xport files. +""" +from pandas.io.common import stringify_path + + +def read_sas( + filepath_or_buffer, + format=None, + index=None, + encoding=None, + chunksize=None, + iterator=False, +): + """ + Read SAS files stored as either XPORT or SAS7BDAT format files. + + Parameters + ---------- + filepath_or_buffer : str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: + ``file://localhost/path/to/table.sas``. + + If you want to pass in a path object, pandas accepts any + ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handler (e.g. via builtin ``open`` function) + or ``StringIO``. + format : str {'xport', 'sas7bdat'} or None + If None, file format is inferred from file extension. If 'xport' or + 'sas7bdat', uses the corresponding format. + index : identifier of index column, defaults to None + Identifier of column that should be used as index of the DataFrame. + encoding : str, default is None + Encoding for text data. If None, text data are stored as raw bytes. + chunksize : int + Read file `chunksize` lines at a time, returns iterator. + iterator : bool, defaults to False + If True, returns an iterator for reading the file incrementally. + + Returns + ------- + DataFrame if iterator=False and chunksize=None, else SAS7BDATReader + or XportReader + """ + if format is None: + buffer_error_msg = ( + "If this is a buffer object rather " + "than a string name, you must specify " + "a format string" + ) + filepath_or_buffer = stringify_path(filepath_or_buffer) + if not isinstance(filepath_or_buffer, str): + raise ValueError(buffer_error_msg) + fname = filepath_or_buffer.lower() + if fname.endswith(".xpt"): + format = "xport" + elif fname.endswith(".sas7bdat"): + format = "sas7bdat" + else: + raise ValueError("unable to infer format of SAS file") + + if format.lower() == "xport": + from pandas.io.sas.sas_xport import XportReader + + reader = XportReader( + filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize + ) + elif format.lower() == "sas7bdat": + from pandas.io.sas.sas7bdat import SAS7BDATReader + + reader = SAS7BDATReader( + filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize + ) + else: + raise ValueError("unknown SAS format") + + if iterator or chunksize: + return reader + + data = reader.read() + reader.close() + return data diff --git a/pandas/io/spss.py b/pandas/io/spss.py new file mode 100644 index 00000000..cdbe14e9 --- /dev/null +++ b/pandas/io/spss.py @@ -0,0 +1,45 @@ +from pathlib import Path +from typing import Optional, Sequence, Union + +from pandas.compat._optional import import_optional_dependency + +from pandas.core.dtypes.inference import is_list_like + +from pandas.core.api import DataFrame + + +def read_spss( + path: Union[str, Path], + usecols: Optional[Sequence[str]] = None, + convert_categoricals: bool = True, +) -> DataFrame: + """ + Load an SPSS file from the file path, returning a DataFrame. + + .. versionadded:: 0.25.0 + + Parameters + ---------- + path : string or Path + File path. + usecols : list-like, optional + Return a subset of the columns. If None, return all columns. + convert_categoricals : bool, default is True + Convert categorical columns into pd.Categorical. + + Returns + ------- + DataFrame + """ + pyreadstat = import_optional_dependency("pyreadstat") + + if usecols is not None: + if not is_list_like(usecols): + raise TypeError("usecols must be list-like.") + else: + usecols = list(usecols) # pyreadstat requires a list + + df, _ = pyreadstat.read_sav( + path, usecols=usecols, apply_value_formats=convert_categoricals + ) + return df diff --git a/pandas/io/sql.py b/pandas/io/sql.py new file mode 100644 index 00000000..f4527994 --- /dev/null +++ b/pandas/io/sql.py @@ -0,0 +1,1782 @@ +""" +Collection of query wrappers / abstractions to both facilitate data +retrieval and to reduce dependency on DB-specific API. +""" + +from contextlib import contextmanager +from datetime import date, datetime, time +from functools import partial +import re +import warnings + +import numpy as np + +import pandas._libs.lib as lib + +from pandas.core.dtypes.common import is_datetime64tz_dtype, is_dict_like, is_list_like +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import isna + +from pandas.core.api import DataFrame, Series +from pandas.core.base import PandasObject +from pandas.core.tools.datetimes import to_datetime + + +class SQLAlchemyRequired(ImportError): + pass + + +class DatabaseError(IOError): + pass + + +# ----------------------------------------------------------------------------- +# -- Helper functions + +_SQLALCHEMY_INSTALLED = None + + +def _is_sqlalchemy_connectable(con): + global _SQLALCHEMY_INSTALLED + if _SQLALCHEMY_INSTALLED is None: + try: + import sqlalchemy + + _SQLALCHEMY_INSTALLED = True + except ImportError: + _SQLALCHEMY_INSTALLED = False + + if _SQLALCHEMY_INSTALLED: + import sqlalchemy # noqa: F811 + + return isinstance(con, sqlalchemy.engine.Connectable) + else: + return False + + +def _convert_params(sql, params): + """Convert SQL and params args to DBAPI2.0 compliant format.""" + args = [sql] + if params is not None: + if hasattr(params, "keys"): # test if params is a mapping + args += [params] + else: + args += [list(params)] + return args + + +def _process_parse_dates_argument(parse_dates): + """Process parse_dates argument for read_sql functions""" + # handle non-list entries for parse_dates gracefully + if parse_dates is True or parse_dates is None or parse_dates is False: + parse_dates = [] + + elif not hasattr(parse_dates, "__iter__"): + parse_dates = [parse_dates] + return parse_dates + + +def _handle_date_column(col, utc=None, format=None): + if isinstance(format, dict): + return to_datetime(col, errors="ignore", **format) + else: + # Allow passing of formatting string for integers + # GH17855 + if format is None and ( + issubclass(col.dtype.type, np.floating) + or issubclass(col.dtype.type, np.integer) + ): + format = "s" + if format in ["D", "d", "h", "m", "s", "ms", "us", "ns"]: + return to_datetime(col, errors="coerce", unit=format, utc=utc) + elif is_datetime64tz_dtype(col): + # coerce to UTC timezone + # GH11216 + return to_datetime(col, utc=True) + else: + return to_datetime(col, errors="coerce", format=format, utc=utc) + + +def _parse_date_columns(data_frame, parse_dates): + """ + Force non-datetime columns to be read as such. + Supports both string formatted and integer timestamp columns. + """ + parse_dates = _process_parse_dates_argument(parse_dates) + + # we want to coerce datetime64_tz dtypes for now to UTC + # we could in theory do a 'nice' conversion from a FixedOffset tz + # GH11216 + for col_name, df_col in data_frame.items(): + if is_datetime64tz_dtype(df_col) or col_name in parse_dates: + try: + fmt = parse_dates[col_name] + except TypeError: + fmt = None + data_frame[col_name] = _handle_date_column(df_col, format=fmt) + + return data_frame + + +def _wrap_result(data, columns, index_col=None, coerce_float=True, parse_dates=None): + """Wrap result set of query in a DataFrame.""" + + frame = DataFrame.from_records(data, columns=columns, coerce_float=coerce_float) + + frame = _parse_date_columns(frame, parse_dates) + + if index_col is not None: + frame.set_index(index_col, inplace=True) + + return frame + + +def execute(sql, con, cur=None, params=None): + """ + Execute the given SQL query using the provided connection object. + + Parameters + ---------- + sql : string + SQL query to be executed. + con : SQLAlchemy connectable(engine/connection) or sqlite3 connection + Using SQLAlchemy makes it possible to use any DB supported by the + library. + If a DBAPI2 object, only sqlite3 is supported. + cur : deprecated, cursor is obtained from connection, default: None + params : list or tuple, optional, default: None + List of parameters to pass to execute method. + + Returns + ------- + Results Iterable + """ + if cur is None: + pandas_sql = pandasSQL_builder(con) + else: + pandas_sql = pandasSQL_builder(cur, is_cursor=True) + args = _convert_params(sql, params) + return pandas_sql.execute(*args) + + +# ----------------------------------------------------------------------------- +# -- Read and write to DataFrames + + +def read_sql_table( + table_name, + con, + schema=None, + index_col=None, + coerce_float=True, + parse_dates=None, + columns=None, + chunksize=None, +): + """ + Read SQL database table into a DataFrame. + + Given a table name and a SQLAlchemy connectable, returns a DataFrame. + This function does not support DBAPI connections. + + Parameters + ---------- + table_name : str + Name of SQL table in database. + con : SQLAlchemy connectable or str + A database URI could be provided as as str. + SQLite DBAPI connection mode not supported. + schema : str, default None + Name of SQL schema in database to query (if database flavor + supports this). Uses default schema if None (default). + index_col : str or list of str, optional, default: None + Column(s) to set as index(MultiIndex). + coerce_float : bool, default True + Attempts to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point. Can result in loss of Precision. + parse_dates : list or dict, default None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + to the keyword arguments of :func:`pandas.to_datetime` + Especially useful with databases without native Datetime support, + such as SQLite. + columns : list, default None + List of column names to select from SQL table. + chunksize : int, default None + If specified, returns an iterator where `chunksize` is the number of + rows to include in each chunk. + + Returns + ------- + DataFrame + A SQL table is returned as two-dimensional data structure with labeled + axes. + + See Also + -------- + read_sql_query : Read SQL query into a DataFrame. + read_sql : Read SQL query or database table into a DataFrame. + + Notes + ----- + Any datetime values with time zone information will be converted to UTC. + + Examples + -------- + >>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP + """ + + con = _engine_builder(con) + if not _is_sqlalchemy_connectable(con): + raise NotImplementedError( + "read_sql_table only supported for SQLAlchemy connectable." + ) + import sqlalchemy + from sqlalchemy.schema import MetaData + + meta = MetaData(con, schema=schema) + try: + meta.reflect(only=[table_name], views=True) + except sqlalchemy.exc.InvalidRequestError: + raise ValueError(f"Table {table_name} not found") + + pandas_sql = SQLDatabase(con, meta=meta) + table = pandas_sql.read_table( + table_name, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + columns=columns, + chunksize=chunksize, + ) + + if table is not None: + return table + else: + raise ValueError(f"Table {table_name} not found", con) + + +def read_sql_query( + sql, + con, + index_col=None, + coerce_float=True, + params=None, + parse_dates=None, + chunksize=None, +): + """ + Read SQL query into a DataFrame. + + Returns a DataFrame corresponding to the result set of the query + string. Optionally provide an `index_col` parameter to use one of the + columns as the index, otherwise default integer index will be used. + + Parameters + ---------- + sql : str SQL query or SQLAlchemy Selectable (select or text object) + SQL query to be executed. + con : SQLAlchemy connectable(engine/connection), database str URI, + or sqlite3 DBAPI2 connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object, only sqlite3 is supported. + index_col : str or list of strings, optional, default: None + Column(s) to set as index(MultiIndex). + coerce_float : bool, default True + Attempts to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point. Useful for SQL result sets. + params : list, tuple or dict, optional, default: None + List of parameters to pass to execute method. The syntax used + to pass parameters is database driver dependent. Check your + database driver documentation for which of the five syntax styles, + described in PEP 249's paramstyle, is supported. + Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + to the keyword arguments of :func:`pandas.to_datetime` + Especially useful with databases without native Datetime support, + such as SQLite. + chunksize : int, default None + If specified, return an iterator where `chunksize` is the number of + rows to include in each chunk. + + Returns + ------- + DataFrame + + See Also + -------- + read_sql_table : Read SQL database table into a DataFrame. + read_sql + + Notes + ----- + Any datetime values with time zone information parsed via the `parse_dates` + parameter will be converted to UTC. + """ + pandas_sql = pandasSQL_builder(con) + return pandas_sql.read_query( + sql, + index_col=index_col, + params=params, + coerce_float=coerce_float, + parse_dates=parse_dates, + chunksize=chunksize, + ) + + +def read_sql( + sql, + con, + index_col=None, + coerce_float=True, + params=None, + parse_dates=None, + columns=None, + chunksize=None, +): + """ + Read SQL query or database table into a DataFrame. + + This function is a convenience wrapper around ``read_sql_table`` and + ``read_sql_query`` (for backward compatibility). It will delegate + to the specific function depending on the provided input. A SQL query + will be routed to ``read_sql_query``, while a database table name will + be routed to ``read_sql_table``. Note that the delegated function might + have more specific notes about their functionality not listed here. + + Parameters + ---------- + sql : str or SQLAlchemy Selectable (select or text object) + SQL query to be executed or a table name. + con : SQLAlchemy connectable (engine/connection) or database str URI + or DBAPI2 connection (fallback mode)' + + Using SQLAlchemy makes it possible to use any DB supported by that + library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible + for engine disposal and connection closure for the SQLAlchemy connectable. See + `here `_ + index_col : str or list of strings, optional, default: None + Column(s) to set as index(MultiIndex). + coerce_float : bool, default True + Attempts to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets. + params : list, tuple or dict, optional, default: None + List of parameters to pass to execute method. The syntax used + to pass parameters is database driver dependent. Check your + database driver documentation for which of the five syntax styles, + described in PEP 249's paramstyle, is supported. + Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}. + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + to the keyword arguments of :func:`pandas.to_datetime` + Especially useful with databases without native Datetime support, + such as SQLite. + columns : list, default: None + List of column names to select from SQL table (only used when reading + a table). + chunksize : int, default None + If specified, return an iterator where `chunksize` is the + number of rows to include in each chunk. + + Returns + ------- + DataFrame + + See Also + -------- + read_sql_table : Read SQL database table into a DataFrame. + read_sql_query : Read SQL query into a DataFrame. + """ + pandas_sql = pandasSQL_builder(con) + + if isinstance(pandas_sql, SQLiteDatabase): + return pandas_sql.read_query( + sql, + index_col=index_col, + params=params, + coerce_float=coerce_float, + parse_dates=parse_dates, + chunksize=chunksize, + ) + + try: + _is_table_name = pandas_sql.has_table(sql) + except Exception: + # using generic exception to catch errors from sql drivers (GH24988) + _is_table_name = False + + if _is_table_name: + pandas_sql.meta.reflect(only=[sql]) + return pandas_sql.read_table( + sql, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + columns=columns, + chunksize=chunksize, + ) + else: + return pandas_sql.read_query( + sql, + index_col=index_col, + params=params, + coerce_float=coerce_float, + parse_dates=parse_dates, + chunksize=chunksize, + ) + + +def to_sql( + frame, + name, + con, + schema=None, + if_exists="fail", + index=True, + index_label=None, + chunksize=None, + dtype=None, + method=None, +): + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame : DataFrame, Series + name : str + Name of SQL table. + con : SQLAlchemy connectable(engine/connection) or database string URI + or sqlite3 DBAPI2 connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object, only sqlite3 is supported. + schema : str, optional + Name of SQL schema in database to write to (if database flavor + supports this). If None, use default schema (default). + if_exists : {'fail', 'replace', 'append'}, default 'fail' + - fail: If table exists, do nothing. + - replace: If table exists, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. + index : boolean, default True + Write DataFrame index as a column. + index_label : str or sequence, optional + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + chunksize : int, optional + Specify the number of rows in each batch to be written at a time. + By default, all rows will be written at once. + dtype : dict or scalar, optional + Specifying the datatype for columns. If a dictionary is used, the + keys should be the column names and the values should be the + SQLAlchemy types or strings for the sqlite3 fallback mode. If a + scalar is provided, it will be applied to all columns. + method : {None, 'multi', callable}, optional + Controls the SQL insertion clause used: + + - None : Uses standard SQL ``INSERT`` clause (one per row). + - 'multi': Pass multiple values in a single ``INSERT`` clause. + - callable with signature ``(pd_table, conn, keys, data_iter)``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + + .. versionadded:: 0.24.0 + """ + if if_exists not in ("fail", "replace", "append"): + raise ValueError(f"'{if_exists}' is not valid for if_exists") + + pandas_sql = pandasSQL_builder(con, schema=schema) + + if isinstance(frame, Series): + frame = frame.to_frame() + elif not isinstance(frame, DataFrame): + raise NotImplementedError( + "'frame' argument should be either a Series or a DataFrame" + ) + + pandas_sql.to_sql( + frame, + name, + if_exists=if_exists, + index=index, + index_label=index_label, + schema=schema, + chunksize=chunksize, + dtype=dtype, + method=method, + ) + + +def has_table(table_name, con, schema=None): + """ + Check if DataBase has named table. + + Parameters + ---------- + table_name: string + Name of SQL table. + con: SQLAlchemy connectable(engine/connection) or sqlite3 DBAPI2 connection + Using SQLAlchemy makes it possible to use any DB supported by that + library. + If a DBAPI2 object, only sqlite3 is supported. + schema : string, default None + Name of SQL schema in database to write to (if database flavor supports + this). If None, use default schema (default). + + Returns + ------- + boolean + """ + pandas_sql = pandasSQL_builder(con, schema=schema) + return pandas_sql.has_table(table_name) + + +table_exists = has_table + + +def _engine_builder(con): + """ + Returns a SQLAlchemy engine from a URI (if con is a string) + else it just return con without modifying it. + """ + global _SQLALCHEMY_INSTALLED + if isinstance(con, str): + try: + import sqlalchemy + except ImportError: + _SQLALCHEMY_INSTALLED = False + else: + con = sqlalchemy.create_engine(con) + return con + + return con + + +def pandasSQL_builder(con, schema=None, meta=None, is_cursor=False): + """ + Convenience function to return the correct PandasSQL subclass based on the + provided parameters. + """ + # When support for DBAPI connections is removed, + # is_cursor should not be necessary. + con = _engine_builder(con) + if _is_sqlalchemy_connectable(con): + return SQLDatabase(con, schema=schema, meta=meta) + elif isinstance(con, str): + raise ImportError("Using URI string without sqlalchemy installed.") + else: + return SQLiteDatabase(con, is_cursor=is_cursor) + + +class SQLTable(PandasObject): + """ + For mapping Pandas tables to SQL tables. + Uses fact that table is reflected by SQLAlchemy to + do better type conversions. + Also holds various flags needed to avoid having to + pass them between functions all the time. + """ + + # TODO: support for multiIndex + + def __init__( + self, + name, + pandas_sql_engine, + frame=None, + index=True, + if_exists="fail", + prefix="pandas", + index_label=None, + schema=None, + keys=None, + dtype=None, + ): + self.name = name + self.pd_sql = pandas_sql_engine + self.prefix = prefix + self.frame = frame + self.index = self._index_name(index, index_label) + self.schema = schema + self.if_exists = if_exists + self.keys = keys + self.dtype = dtype + + if frame is not None: + # We want to initialize based on a dataframe + self.table = self._create_table_setup() + else: + # no data provided, read-only mode + self.table = self.pd_sql.get_table(self.name, self.schema) + + if self.table is None: + raise ValueError(f"Could not init table '{name}'") + + def exists(self): + return self.pd_sql.has_table(self.name, self.schema) + + def sql_schema(self): + from sqlalchemy.schema import CreateTable + + return str(CreateTable(self.table).compile(self.pd_sql.connectable)) + + def _execute_create(self): + # Inserting table into database, add to MetaData object + self.table = self.table.tometadata(self.pd_sql.meta) + self.table.create() + + def create(self): + if self.exists(): + if self.if_exists == "fail": + raise ValueError(f"Table '{self.name}' already exists.") + elif self.if_exists == "replace": + self.pd_sql.drop_table(self.name, self.schema) + self._execute_create() + elif self.if_exists == "append": + pass + else: + raise ValueError(f"'{self.if_exists}' is not valid for if_exists") + else: + self._execute_create() + + def _execute_insert(self, conn, keys, data_iter): + """Execute SQL statement inserting data + + Parameters + ---------- + conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection + keys : list of str + Column names + data_iter : generator of list + Each item contains a list of values to be inserted + """ + data = [dict(zip(keys, row)) for row in data_iter] + conn.execute(self.table.insert(), data) + + def _execute_insert_multi(self, conn, keys, data_iter): + """Alternative to _execute_insert for DBs support multivalue INSERT. + + Note: multi-value insert is usually faster for analytics DBs + and tables containing a few columns + but performance degrades quickly with increase of columns. + """ + data = [dict(zip(keys, row)) for row in data_iter] + conn.execute(self.table.insert(data)) + + def insert_data(self): + if self.index is not None: + temp = self.frame.copy() + temp.index.names = self.index + try: + temp.reset_index(inplace=True) + except ValueError as err: + raise ValueError(f"duplicate name in index/columns: {err}") + else: + temp = self.frame + + column_names = list(map(str, temp.columns)) + ncols = len(column_names) + data_list = [None] * ncols + blocks = temp._data.blocks + + for b in blocks: + if b.is_datetime: + # return datetime.datetime objects + if b.is_datetimetz: + # GH 9086: Ensure we return datetimes with timezone info + # Need to return 2-D data; DatetimeIndex is 1D + d = b.values.to_pydatetime() + d = np.atleast_2d(d) + else: + # convert to microsecond resolution for datetime.datetime + d = b.values.astype("M8[us]").astype(object) + else: + d = np.array(b.get_values(), dtype=object) + + # replace NaN with None + if b._can_hold_na: + mask = isna(d) + d[mask] = None + + for col_loc, col in zip(b.mgr_locs, d): + data_list[col_loc] = col + + return column_names, data_list + + def insert(self, chunksize=None, method=None): + + # set insert method + if method is None: + exec_insert = self._execute_insert + elif method == "multi": + exec_insert = self._execute_insert_multi + elif callable(method): + exec_insert = partial(method, self) + else: + raise ValueError(f"Invalid parameter `method`: {method}") + + keys, data_list = self.insert_data() + + nrows = len(self.frame) + + if nrows == 0: + return + + if chunksize is None: + chunksize = nrows + elif chunksize == 0: + raise ValueError("chunksize argument should be non-zero") + + chunks = int(nrows / chunksize) + 1 + + with self.pd_sql.run_transaction() as conn: + for i in range(chunks): + start_i = i * chunksize + end_i = min((i + 1) * chunksize, nrows) + if start_i >= end_i: + break + + chunk_iter = zip(*[arr[start_i:end_i] for arr in data_list]) + exec_insert(conn, keys, chunk_iter) + + def _query_iterator( + self, result, chunksize, columns, coerce_float=True, parse_dates=None + ): + """Return generator through chunked result set.""" + + while True: + data = result.fetchmany(chunksize) + if not data: + break + else: + self.frame = DataFrame.from_records( + data, columns=columns, coerce_float=coerce_float + ) + + self._harmonize_columns(parse_dates=parse_dates) + + if self.index is not None: + self.frame.set_index(self.index, inplace=True) + + yield self.frame + + def read(self, coerce_float=True, parse_dates=None, columns=None, chunksize=None): + + if columns is not None and len(columns) > 0: + from sqlalchemy import select + + cols = [self.table.c[n] for n in columns] + if self.index is not None: + for idx in self.index[::-1]: + cols.insert(0, self.table.c[idx]) + sql_select = select(cols) + else: + sql_select = self.table.select() + + result = self.pd_sql.execute(sql_select) + column_names = result.keys() + + if chunksize is not None: + return self._query_iterator( + result, + chunksize, + column_names, + coerce_float=coerce_float, + parse_dates=parse_dates, + ) + else: + data = result.fetchall() + self.frame = DataFrame.from_records( + data, columns=column_names, coerce_float=coerce_float + ) + + self._harmonize_columns(parse_dates=parse_dates) + + if self.index is not None: + self.frame.set_index(self.index, inplace=True) + + return self.frame + + def _index_name(self, index, index_label): + # for writing: index=True to include index in sql table + if index is True: + nlevels = self.frame.index.nlevels + # if index_label is specified, set this as index name(s) + if index_label is not None: + if not isinstance(index_label, list): + index_label = [index_label] + if len(index_label) != nlevels: + raise ValueError( + "Length of 'index_label' should match number of " + f"levels, which is {nlevels}" + ) + else: + return index_label + # return the used column labels for the index columns + if ( + nlevels == 1 + and "index" not in self.frame.columns + and self.frame.index.name is None + ): + return ["index"] + else: + return [ + l if l is not None else f"level_{i}" + for i, l in enumerate(self.frame.index.names) + ] + + # for reading: index=(list of) string to specify column to set as index + elif isinstance(index, str): + return [index] + elif isinstance(index, list): + return index + else: + return None + + def _get_column_names_and_types(self, dtype_mapper): + column_names_and_types = [] + if self.index is not None: + for i, idx_label in enumerate(self.index): + idx_type = dtype_mapper(self.frame.index._get_level_values(i)) + column_names_and_types.append((str(idx_label), idx_type, True)) + + column_names_and_types += [ + (str(self.frame.columns[i]), dtype_mapper(self.frame.iloc[:, i]), False) + for i in range(len(self.frame.columns)) + ] + + return column_names_and_types + + def _create_table_setup(self): + from sqlalchemy import Table, Column, PrimaryKeyConstraint + + column_names_and_types = self._get_column_names_and_types(self._sqlalchemy_type) + + columns = [ + Column(name, typ, index=is_index) + for name, typ, is_index in column_names_and_types + ] + + if self.keys is not None: + if not is_list_like(self.keys): + keys = [self.keys] + else: + keys = self.keys + pkc = PrimaryKeyConstraint(*keys, name=self.name + "_pk") + columns.append(pkc) + + schema = self.schema or self.pd_sql.meta.schema + + # At this point, attach to new metadata, only attach to self.meta + # once table is created. + from sqlalchemy.schema import MetaData + + meta = MetaData(self.pd_sql, schema=schema) + + return Table(self.name, meta, *columns, schema=schema) + + def _harmonize_columns(self, parse_dates=None): + """ + Make the DataFrame's column types align with the SQL table + column types. + Need to work around limited NA value support. Floats are always + fine, ints must always be floats if there are Null values. + Booleans are hard because converting bool column with None replaces + all Nones with false. Therefore only convert bool if there are no + NA values. + Datetimes should already be converted to np.datetime64 if supported, + but here we also force conversion if required. + """ + parse_dates = _process_parse_dates_argument(parse_dates) + + for sql_col in self.table.columns: + col_name = sql_col.name + try: + df_col = self.frame[col_name] + + # Handle date parsing upfront; don't try to convert columns + # twice + if col_name in parse_dates: + try: + fmt = parse_dates[col_name] + except TypeError: + fmt = None + self.frame[col_name] = _handle_date_column(df_col, format=fmt) + continue + + # the type the dataframe column should have + col_type = self._get_dtype(sql_col.type) + + if ( + col_type is datetime + or col_type is date + or col_type is DatetimeTZDtype + ): + # Convert tz-aware Datetime SQL columns to UTC + utc = col_type is DatetimeTZDtype + self.frame[col_name] = _handle_date_column(df_col, utc=utc) + elif col_type is float: + # floats support NA, can always convert! + self.frame[col_name] = df_col.astype(col_type, copy=False) + + elif len(df_col) == df_col.count(): + # No NA values, can convert ints and bools + if col_type is np.dtype("int64") or col_type is bool: + self.frame[col_name] = df_col.astype(col_type, copy=False) + except KeyError: + pass # this column not in results + + def _sqlalchemy_type(self, col): + + dtype = self.dtype or {} + if col.name in dtype: + return self.dtype[col.name] + + # Infer type of column, while ignoring missing values. + # Needed for inserting typed data containing NULLs, GH 8778. + col_type = lib.infer_dtype(col, skipna=True) + + from sqlalchemy.types import ( + BigInteger, + Integer, + Float, + Text, + Boolean, + DateTime, + Date, + Time, + TIMESTAMP, + ) + + if col_type == "datetime64" or col_type == "datetime": + # GH 9086: TIMESTAMP is the suggested type if the column contains + # timezone information + try: + if col.dt.tz is not None: + return TIMESTAMP(timezone=True) + except AttributeError: + # The column is actually a DatetimeIndex + if col.tz is not None: + return TIMESTAMP(timezone=True) + return DateTime + if col_type == "timedelta64": + warnings.warn( + "the 'timedelta' type is not supported, and will be " + "written as integer values (ns frequency) to the " + "database.", + UserWarning, + stacklevel=8, + ) + return BigInteger + elif col_type == "floating": + if col.dtype == "float32": + return Float(precision=23) + else: + return Float(precision=53) + elif col_type == "integer": + if col.dtype == "int32": + return Integer + else: + return BigInteger + elif col_type == "boolean": + return Boolean + elif col_type == "date": + return Date + elif col_type == "time": + return Time + elif col_type == "complex": + raise ValueError("Complex datatypes not supported") + + return Text + + def _get_dtype(self, sqltype): + from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date, TIMESTAMP + + if isinstance(sqltype, Float): + return float + elif isinstance(sqltype, Integer): + # TODO: Refine integer size. + return np.dtype("int64") + elif isinstance(sqltype, TIMESTAMP): + # we have a timezone capable type + if not sqltype.timezone: + return datetime + return DatetimeTZDtype + elif isinstance(sqltype, DateTime): + # Caution: np.datetime64 is also a subclass of np.number. + return datetime + elif isinstance(sqltype, Date): + return date + elif isinstance(sqltype, Boolean): + return bool + return object + + +class PandasSQL(PandasObject): + """ + Subclasses Should define read_sql and to_sql. + """ + + def read_sql(self, *args, **kwargs): + raise ValueError( + "PandasSQL must be created with an SQLAlchemy " + "connectable or sqlite connection" + ) + + def to_sql(self, *args, **kwargs): + raise ValueError( + "PandasSQL must be created with an SQLAlchemy " + "connectable or sqlite connection" + ) + + +class SQLDatabase(PandasSQL): + """ + This class enables conversion between DataFrame and SQL databases + using SQLAlchemy to handle DataBase abstraction. + + Parameters + ---------- + engine : SQLAlchemy connectable + Connectable to connect with the database. Using SQLAlchemy makes it + possible to use any DB supported by that library. + schema : string, default None + Name of SQL schema in database to write to (if database flavor + supports this). If None, use default schema (default). + meta : SQLAlchemy MetaData object, default None + If provided, this MetaData object is used instead of a newly + created. This allows to specify database flavor specific + arguments in the MetaData object. + + """ + + def __init__(self, engine, schema=None, meta=None): + self.connectable = engine + if not meta: + from sqlalchemy.schema import MetaData + + meta = MetaData(self.connectable, schema=schema) + + self.meta = meta + + @contextmanager + def run_transaction(self): + with self.connectable.begin() as tx: + if hasattr(tx, "execute"): + yield tx + else: + yield self.connectable + + def execute(self, *args, **kwargs): + """Simple passthrough to SQLAlchemy connectable""" + return self.connectable.execute(*args, **kwargs) + + def read_table( + self, + table_name, + index_col=None, + coerce_float=True, + parse_dates=None, + columns=None, + schema=None, + chunksize=None, + ): + """Read SQL database table into a DataFrame. + + Parameters + ---------- + table_name : string + Name of SQL table in database. + index_col : string, optional, default: None + Column to set as index. + coerce_float : boolean, default True + Attempts to convert values of non-string, non-numeric objects + (like decimal.Decimal) to floating point. This can result in + loss of precision. + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg}``, where the arg corresponds + to the keyword arguments of :func:`pandas.to_datetime`. + Especially useful with databases without native Datetime support, + such as SQLite. + columns : list, default: None + List of column names to select from SQL table. + schema : string, default None + Name of SQL schema in database to query (if database flavor + supports this). If specified, this overwrites the default + schema of the SQL database object. + chunksize : int, default None + If specified, return an iterator where `chunksize` is the number + of rows to include in each chunk. + + Returns + ------- + DataFrame + + See Also + -------- + pandas.read_sql_table + SQLDatabase.read_query + + """ + table = SQLTable(table_name, self, index=index_col, schema=schema) + return table.read( + coerce_float=coerce_float, + parse_dates=parse_dates, + columns=columns, + chunksize=chunksize, + ) + + @staticmethod + def _query_iterator( + result, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None + ): + """Return generator through chunked result set""" + + while True: + data = result.fetchmany(chunksize) + if not data: + break + else: + yield _wrap_result( + data, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + ) + + def read_query( + self, + sql, + index_col=None, + coerce_float=True, + parse_dates=None, + params=None, + chunksize=None, + ): + """Read SQL query into a DataFrame. + + Parameters + ---------- + sql : string + SQL query to be executed. + index_col : string, optional, default: None + Column name to use as index for the returned DataFrame object. + coerce_float : boolean, default True + Attempt to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets. + params : list, tuple or dict, optional, default: None + List of parameters to pass to execute method. The syntax used + to pass parameters is database driver dependent. Check your + database driver documentation for which of the five syntax styles, + described in PEP 249's paramstyle, is supported. + Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'} + parse_dates : list or dict, default: None + - List of column names to parse as dates. + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times, or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps. + - Dict of ``{column_name: arg dict}``, where the arg dict + corresponds to the keyword arguments of + :func:`pandas.to_datetime` Especially useful with databases + without native Datetime support, such as SQLite. + chunksize : int, default None + If specified, return an iterator where `chunksize` is the number + of rows to include in each chunk. + + Returns + ------- + DataFrame + + See Also + -------- + read_sql_table : Read SQL database table into a DataFrame. + read_sql + + """ + args = _convert_params(sql, params) + + result = self.execute(*args) + columns = result.keys() + + if chunksize is not None: + return self._query_iterator( + result, + chunksize, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + ) + else: + data = result.fetchall() + frame = _wrap_result( + data, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + ) + return frame + + read_sql = read_query + + def to_sql( + self, + frame, + name, + if_exists="fail", + index=True, + index_label=None, + schema=None, + chunksize=None, + dtype=None, + method=None, + ): + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame : DataFrame + name : string + Name of SQL table. + if_exists : {'fail', 'replace', 'append'}, default 'fail' + - fail: If table exists, do nothing. + - replace: If table exists, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. + index : boolean, default True + Write DataFrame index as a column. + index_label : string or sequence, default None + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + schema : string, default None + Name of SQL schema in database to write to (if database flavor + supports this). If specified, this overwrites the default + schema of the SQLDatabase object. + chunksize : int, default None + If not None, then rows will be written in batches of this size at a + time. If None, all rows will be written at once. + dtype : single type or dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a SQLAlchemy type. If all columns are of the same type, one + single value can be used. + method : {None', 'multi', callable}, default None + Controls the SQL insertion clause used: + + * None : Uses standard SQL ``INSERT`` clause (one per row). + * 'multi': Pass multiple values in a single ``INSERT`` clause. + * callable with signature ``(pd_table, conn, keys, data_iter)``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + + .. versionadded:: 0.24.0 + """ + if dtype and not is_dict_like(dtype): + dtype = {col_name: dtype for col_name in frame} + + if dtype is not None: + from sqlalchemy.types import to_instance, TypeEngine + + for col, my_type in dtype.items(): + if not isinstance(to_instance(my_type), TypeEngine): + raise ValueError(f"The type of {col} is not a SQLAlchemy type") + + table = SQLTable( + name, + self, + frame=frame, + index=index, + if_exists=if_exists, + index_label=index_label, + schema=schema, + dtype=dtype, + ) + table.create() + table.insert(chunksize, method=method) + if not name.isdigit() and not name.islower(): + # check for potentially case sensitivity issues (GH7815) + # Only check when name is not a number and name is not lower case + engine = self.connectable.engine + with self.connectable.connect() as conn: + table_names = engine.table_names( + schema=schema or self.meta.schema, connection=conn + ) + if name not in table_names: + msg = ( + f"The provided table name '{name}' is not found exactly as " + "such in the database after writing the table, possibly " + "due to case sensitivity issues. Consider using lower " + "case table names." + ) + warnings.warn(msg, UserWarning) + + @property + def tables(self): + return self.meta.tables + + def has_table(self, name, schema=None): + return self.connectable.run_callable( + self.connectable.dialect.has_table, name, schema or self.meta.schema + ) + + def get_table(self, table_name, schema=None): + schema = schema or self.meta.schema + if schema: + tbl = self.meta.tables.get(".".join([schema, table_name])) + else: + tbl = self.meta.tables.get(table_name) + + # Avoid casting double-precision floats into decimals + from sqlalchemy import Numeric + + for column in tbl.columns: + if isinstance(column.type, Numeric): + column.type.asdecimal = False + + return tbl + + def drop_table(self, table_name, schema=None): + schema = schema or self.meta.schema + if self.has_table(table_name, schema): + self.meta.reflect(only=[table_name], schema=schema) + self.get_table(table_name, schema).drop() + self.meta.clear() + + def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): + table = SQLTable( + table_name, self, frame=frame, index=False, keys=keys, dtype=dtype + ) + return str(table.sql_schema()) + + +# ---- SQL without SQLAlchemy --- +# sqlite-specific sql strings and handler class +# dictionary used for readability purposes +_SQL_TYPES = { + "string": "TEXT", + "floating": "REAL", + "integer": "INTEGER", + "datetime": "TIMESTAMP", + "date": "DATE", + "time": "TIME", + "boolean": "INTEGER", +} + + +def _get_unicode_name(name): + try: + uname = str(name).encode("utf-8", "strict").decode("utf-8") + except UnicodeError: + raise ValueError(f"Cannot convert identifier to UTF-8: '{name}'") + return uname + + +def _get_valid_sqlite_name(name): + # See https://stackoverflow.com/questions/6514274/how-do-you-escape-strings\ + # -for-sqlite-table-column-names-in-python + # Ensure the string can be encoded as UTF-8. + # Ensure the string does not include any NUL characters. + # Replace all " with "". + # Wrap the entire thing in double quotes. + + uname = _get_unicode_name(name) + if not len(uname): + raise ValueError("Empty table or column name specified") + + nul_index = uname.find("\x00") + if nul_index >= 0: + raise ValueError("SQLite identifier cannot contain NULs") + return '"' + uname.replace('"', '""') + '"' + + +_SAFE_NAMES_WARNING = ( + "The spaces in these column names will not be changed. " + "In pandas versions < 0.14, spaces were converted to " + "underscores." +) + + +class SQLiteTable(SQLTable): + """ + Patch the SQLTable for fallback support. + Instead of a table variable just use the Create Table statement. + """ + + def __init__(self, *args, **kwargs): + # GH 8341 + # register an adapter callable for datetime.time object + import sqlite3 + + # this will transform time(12,34,56,789) into '12:34:56.000789' + # (this is what sqlalchemy does) + sqlite3.register_adapter(time, lambda _: _.strftime("%H:%M:%S.%f")) + super().__init__(*args, **kwargs) + + def sql_schema(self): + return str(";\n".join(self.table)) + + def _execute_create(self): + with self.pd_sql.run_transaction() as conn: + for stmt in self.table: + conn.execute(stmt) + + def insert_statement(self): + names = list(map(str, self.frame.columns)) + wld = "?" # wildcard char + escape = _get_valid_sqlite_name + + if self.index is not None: + for idx in self.index[::-1]: + names.insert(0, idx) + + bracketed_names = [escape(column) for column in names] + col_names = ",".join(bracketed_names) + wildcards = ",".join([wld] * len(names)) + insert_statement = ( + f"INSERT INTO {escape(self.name)} ({col_names}) VALUES ({wildcards})" + ) + return insert_statement + + def _execute_insert(self, conn, keys, data_iter): + data_list = list(data_iter) + conn.executemany(self.insert_statement(), data_list) + + def _create_table_setup(self): + """ + Return a list of SQL statements that creates a table reflecting the + structure of a DataFrame. The first entry will be a CREATE TABLE + statement while the rest will be CREATE INDEX statements. + """ + column_names_and_types = self._get_column_names_and_types(self._sql_type_name) + + pat = re.compile(r"\s+") + column_names = [col_name for col_name, _, _ in column_names_and_types] + if any(map(pat.search, column_names)): + warnings.warn(_SAFE_NAMES_WARNING, stacklevel=6) + + escape = _get_valid_sqlite_name + + create_tbl_stmts = [ + escape(cname) + " " + ctype for cname, ctype, _ in column_names_and_types + ] + + if self.keys is not None and len(self.keys): + if not is_list_like(self.keys): + keys = [self.keys] + else: + keys = self.keys + cnames_br = ", ".join(escape(c) for c in keys) + create_tbl_stmts.append( + f"CONSTRAINT {self.name}_pk PRIMARY KEY ({cnames_br})" + ) + + create_stmts = [ + "CREATE TABLE " + + escape(self.name) + + " (\n" + + ",\n ".join(create_tbl_stmts) + + "\n)" + ] + + ix_cols = [cname for cname, _, is_index in column_names_and_types if is_index] + if len(ix_cols): + cnames = "_".join(ix_cols) + cnames_br = ",".join(escape(c) for c in ix_cols) + create_stmts.append( + "CREATE INDEX " + + escape("ix_" + self.name + "_" + cnames) + + "ON " + + escape(self.name) + + " (" + + cnames_br + + ")" + ) + + return create_stmts + + def _sql_type_name(self, col): + dtype = self.dtype or {} + if col.name in dtype: + return dtype[col.name] + + # Infer type of column, while ignoring missing values. + # Needed for inserting typed data containing NULLs, GH 8778. + col_type = lib.infer_dtype(col, skipna=True) + + if col_type == "timedelta64": + warnings.warn( + "the 'timedelta' type is not supported, and will be " + "written as integer values (ns frequency) to the " + "database.", + UserWarning, + stacklevel=8, + ) + col_type = "integer" + + elif col_type == "datetime64": + col_type = "datetime" + + elif col_type == "empty": + col_type = "string" + + elif col_type == "complex": + raise ValueError("Complex datatypes not supported") + + if col_type not in _SQL_TYPES: + col_type = "string" + + return _SQL_TYPES[col_type] + + +class SQLiteDatabase(PandasSQL): + """ + Version of SQLDatabase to support SQLite connections (fallback without + SQLAlchemy). This should only be used internally. + + Parameters + ---------- + con : sqlite connection object + + """ + + def __init__(self, con, is_cursor=False): + self.is_cursor = is_cursor + self.con = con + + @contextmanager + def run_transaction(self): + cur = self.con.cursor() + try: + yield cur + self.con.commit() + except Exception: + self.con.rollback() + raise + finally: + cur.close() + + def execute(self, *args, **kwargs): + if self.is_cursor: + cur = self.con + else: + cur = self.con.cursor() + try: + cur.execute(*args, **kwargs) + return cur + except Exception as exc: + try: + self.con.rollback() + except Exception as inner_exc: # pragma: no cover + ex = DatabaseError( + f"Execution failed on sql: {args[0]}\n{exc}\nunable to rollback" + ) + raise ex from inner_exc + + ex = DatabaseError(f"Execution failed on sql '{args[0]}': {exc}") + raise ex from exc + + @staticmethod + def _query_iterator( + cursor, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None + ): + """Return generator through chunked result set""" + + while True: + data = cursor.fetchmany(chunksize) + if type(data) == tuple: + data = list(data) + if not data: + cursor.close() + break + else: + yield _wrap_result( + data, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + ) + + def read_query( + self, + sql, + index_col=None, + coerce_float=True, + params=None, + parse_dates=None, + chunksize=None, + ): + + args = _convert_params(sql, params) + cursor = self.execute(*args) + columns = [col_desc[0] for col_desc in cursor.description] + + if chunksize is not None: + return self._query_iterator( + cursor, + chunksize, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + ) + else: + data = self._fetchall_as_list(cursor) + cursor.close() + + frame = _wrap_result( + data, + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + ) + return frame + + def _fetchall_as_list(self, cur): + result = cur.fetchall() + if not isinstance(result, list): + result = list(result) + return result + + def to_sql( + self, + frame, + name, + if_exists="fail", + index=True, + index_label=None, + schema=None, + chunksize=None, + dtype=None, + method=None, + ): + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame: DataFrame + name: string + Name of SQL table. + if_exists: {'fail', 'replace', 'append'}, default 'fail' + fail: If table exists, do nothing. + replace: If table exists, drop it, recreate it, and insert data. + append: If table exists, insert data. Create if it does not exist. + index : boolean, default True + Write DataFrame index as a column + index_label : string or sequence, default None + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + schema : string, default None + Ignored parameter included for compatibility with SQLAlchemy + version of ``to_sql``. + chunksize : int, default None + If not None, then rows will be written in batches of this + size at a time. If None, all rows will be written at once. + dtype : single type or dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a string. If all columns are of the same type, one single value + can be used. + method : {None, 'multi', callable}, default None + Controls the SQL insertion clause used: + + * None : Uses standard SQL ``INSERT`` clause (one per row). + * 'multi': Pass multiple values in a single ``INSERT`` clause. + * callable with signature ``(pd_table, conn, keys, data_iter)``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + + .. versionadded:: 0.24.0 + """ + if dtype and not is_dict_like(dtype): + dtype = {col_name: dtype for col_name in frame} + + if dtype is not None: + for col, my_type in dtype.items(): + if not isinstance(my_type, str): + raise ValueError(f"{col} ({my_type}) not a string") + + table = SQLiteTable( + name, + self, + frame=frame, + index=index, + if_exists=if_exists, + index_label=index_label, + dtype=dtype, + ) + table.create() + table.insert(chunksize, method) + + def has_table(self, name, schema=None): + # TODO(wesm): unused? + # escape = _get_valid_sqlite_name + # esc_name = escape(name) + + wld = "?" + query = f"SELECT name FROM sqlite_master WHERE type='table' AND name={wld};" + + return len(self.execute(query, [name]).fetchall()) > 0 + + def get_table(self, table_name, schema=None): + return None # not supported in fallback mode + + def drop_table(self, name, schema=None): + drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}" + self.execute(drop_sql) + + def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): + table = SQLiteTable( + table_name, self, frame=frame, index=False, keys=keys, dtype=dtype + ) + return str(table.sql_schema()) + + +def get_schema(frame, name, keys=None, con=None, dtype=None): + """ + Get the SQL db table schema for the given frame. + + Parameters + ---------- + frame : DataFrame + name : string + name of SQL table + keys : string or sequence, default: None + columns to use a primary key + con: an open SQL database connection object or a SQLAlchemy connectable + Using SQLAlchemy makes it possible to use any DB supported by that + library, default: None + If a DBAPI2 object, only sqlite3 is supported. + dtype : dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a SQLAlchemy type, or a string for sqlite3 fallback connection. + + """ + + pandas_sql = pandasSQL_builder(con=con) + return pandas_sql._create_sql_schema(frame, name, keys=keys, dtype=dtype) diff --git a/pandas/io/stata.py b/pandas/io/stata.py new file mode 100644 index 00000000..a7246655 --- /dev/null +++ b/pandas/io/stata.py @@ -0,0 +1,3333 @@ +""" +Module contains tools for processing Stata files into DataFrames + +The StataReader below was originally written by Joe Presbrey as part of PyDTA. +It has been extended and improved by Skipper Seabold from the Statsmodels +project who also developed the StataWriter and was finally added to pandas in +a once again improved version. + +You can find more information on http://presbrey.mit.edu/PyDTA and +http://www.statsmodels.org/devel/ +""" +from collections import abc +import datetime +from io import BytesIO +import os +import struct +import sys +from typing import Any, Dict, Hashable, Optional, Sequence +import warnings + +from dateutil.relativedelta import relativedelta +import numpy as np + +from pandas._libs.lib import infer_dtype +from pandas._libs.writers import max_len_string_array +from pandas._typing import FilePathOrBuffer +from pandas.util._decorators import Appender + +from pandas.core.dtypes.common import ( + ensure_object, + is_categorical_dtype, + is_datetime64_dtype, +) + +from pandas import ( + Categorical, + DatetimeIndex, + NaT, + Timestamp, + concat, + isna, + to_datetime, + to_timedelta, +) +from pandas.core.frame import DataFrame +from pandas.core.series import Series + +from pandas.io.common import get_filepath_or_buffer, stringify_path + +_version_error = ( + "Version of given Stata file is {version}. pandas supports importing " + "versions 104, 105, 108, 111 (Stata 7SE), 113 (Stata 8/9), " + "114 (Stata 10/11), 115 (Stata 12), 117 (Stata 13), 118 (Stata 14/15/16)," + "and 119 (Stata 15/16, over 32,767 variables)." +) + +_statafile_processing_params1 = """\ +convert_dates : bool, default True + Convert date variables to DataFrame time values. +convert_categoricals : bool, default True + Read value labels and convert columns to Categorical/Factor variables.""" + +_statafile_processing_params2 = """\ +index_col : str, optional + Column to set as index. +convert_missing : bool, default False + Flag indicating whether to convert missing values to their Stata + representations. If False, missing values are replaced with nan. + If True, columns containing missing values are returned with + object data types and missing values are represented by + StataMissingValue objects. +preserve_dtypes : bool, default True + Preserve Stata datatypes. If False, numeric data are upcast to pandas + default types for foreign data (float64 or int64). +columns : list or None + Columns to retain. Columns will be returned in the given order. None + returns all columns. +order_categoricals : bool, default True + Flag indicating whether converted categorical data are ordered.""" + +_chunksize_params = """\ +chunksize : int, default None + Return StataReader object for iterations, returns chunks with + given number of lines.""" + +_iterator_params = """\ +iterator : bool, default False + Return StataReader object.""" + +_read_stata_doc = f""" +Read Stata file into DataFrame. + +Parameters +---------- +filepath_or_buffer : str, path object or file-like object + Any valid string path is acceptable. The string could be a URL. Valid + URL schemes include http, ftp, s3, and file. For file URLs, a host is + expected. A local file could be: ``file://localhost/path/to/table.dta``. + + If you want to pass in a path object, pandas accepts any ``os.PathLike``. + + By file-like object, we refer to objects with a ``read()`` method, + such as a file handler (e.g. via builtin ``open`` function) + or ``StringIO``. +{_statafile_processing_params1} +{_statafile_processing_params2} +{_chunksize_params} +{_iterator_params} + +Returns +------- +DataFrame or StataReader + +See Also +-------- +io.stata.StataReader : Low-level reader for Stata data files. +DataFrame.to_stata: Export Stata data files. + +Examples +-------- +Read a Stata dta file: + +>>> df = pd.read_stata('filename.dta') + +Read a Stata dta file in 10,000 line chunks: + +>>> itr = pd.read_stata('filename.dta', chunksize=10000) +>>> for chunk in itr: +... do_something(chunk) +""" + +_read_method_doc = f"""\ +Reads observations from Stata file, converting them into a dataframe + +Parameters +---------- +nrows : int + Number of lines to read from data file, if None read whole file. +{_statafile_processing_params1} +{_statafile_processing_params2} + +Returns +------- +DataFrame +""" + +_stata_reader_doc = f"""\ +Class for reading Stata dta files. + +Parameters +---------- +path_or_buf : path (string), buffer or path object + string, path object (pathlib.Path or py._path.local.LocalPath) or object + implementing a binary read() functions. + + .. versionadded:: 0.23.0 support for pathlib, py.path. +{_statafile_processing_params1} +{_statafile_processing_params2} +{_chunksize_params} +""" + + +@Appender(_read_stata_doc) +def read_stata( + filepath_or_buffer, + convert_dates=True, + convert_categoricals=True, + index_col=None, + convert_missing=False, + preserve_dtypes=True, + columns=None, + order_categoricals=True, + chunksize=None, + iterator=False, +): + + reader = StataReader( + filepath_or_buffer, + convert_dates=convert_dates, + convert_categoricals=convert_categoricals, + index_col=index_col, + convert_missing=convert_missing, + preserve_dtypes=preserve_dtypes, + columns=columns, + order_categoricals=order_categoricals, + chunksize=chunksize, + ) + + if iterator or chunksize: + data = reader + else: + try: + data = reader.read() + finally: + reader.close() + return data + + +_date_formats = ["%tc", "%tC", "%td", "%d", "%tw", "%tm", "%tq", "%th", "%ty"] + + +stata_epoch = datetime.datetime(1960, 1, 1) + + +def _stata_elapsed_date_to_datetime_vec(dates, fmt): + """ + Convert from SIF to datetime. http://www.stata.com/help.cgi?datetime + + Parameters + ---------- + dates : Series + The Stata Internal Format date to convert to datetime according to fmt + fmt : str + The format to convert to. Can be, tc, td, tw, tm, tq, th, ty + Returns + + Returns + ------- + converted : Series + The converted dates + + Examples + -------- + >>> dates = pd.Series([52]) + >>> _stata_elapsed_date_to_datetime_vec(dates , "%tw") + 0 1961-01-01 + dtype: datetime64[ns] + + Notes + ----- + datetime/c - tc + milliseconds since 01jan1960 00:00:00.000, assuming 86,400 s/day + datetime/C - tC - NOT IMPLEMENTED + milliseconds since 01jan1960 00:00:00.000, adjusted for leap seconds + date - td + days since 01jan1960 (01jan1960 = 0) + weekly date - tw + weeks since 1960w1 + This assumes 52 weeks in a year, then adds 7 * remainder of the weeks. + The datetime value is the start of the week in terms of days in the + year, not ISO calendar weeks. + monthly date - tm + months since 1960m1 + quarterly date - tq + quarters since 1960q1 + half-yearly date - th + half-years since 1960h1 yearly + date - ty + years since 0000 + + If you don't have pandas with datetime support, then you can't do + milliseconds accurately. + """ + MIN_YEAR, MAX_YEAR = Timestamp.min.year, Timestamp.max.year + MAX_DAY_DELTA = (Timestamp.max - datetime.datetime(1960, 1, 1)).days + MIN_DAY_DELTA = (Timestamp.min - datetime.datetime(1960, 1, 1)).days + MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000 + MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000 + + def convert_year_month_safe(year, month): + """ + Convert year and month to datetimes, using pandas vectorized versions + when the date range falls within the range supported by pandas. + Otherwise it falls back to a slower but more robust method + using datetime. + """ + if year.max() < MAX_YEAR and year.min() > MIN_YEAR: + return to_datetime(100 * year + month, format="%Y%m") + else: + index = getattr(year, "index", None) + return Series( + [datetime.datetime(y, m, 1) for y, m in zip(year, month)], index=index + ) + + def convert_year_days_safe(year, days): + """ + Converts year (e.g. 1999) and days since the start of the year to a + datetime or datetime64 Series + """ + if year.max() < (MAX_YEAR - 1) and year.min() > MIN_YEAR: + return to_datetime(year, format="%Y") + to_timedelta(days, unit="d") + else: + index = getattr(year, "index", None) + value = [ + datetime.datetime(y, 1, 1) + relativedelta(days=int(d)) + for y, d in zip(year, days) + ] + return Series(value, index=index) + + def convert_delta_safe(base, deltas, unit): + """ + Convert base dates and deltas to datetimes, using pandas vectorized + versions if the deltas satisfy restrictions required to be expressed + as dates in pandas. + """ + index = getattr(deltas, "index", None) + if unit == "d": + if deltas.max() > MAX_DAY_DELTA or deltas.min() < MIN_DAY_DELTA: + values = [base + relativedelta(days=int(d)) for d in deltas] + return Series(values, index=index) + elif unit == "ms": + if deltas.max() > MAX_MS_DELTA or deltas.min() < MIN_MS_DELTA: + values = [ + base + relativedelta(microseconds=(int(d) * 1000)) for d in deltas + ] + return Series(values, index=index) + else: + raise ValueError("format not understood") + base = to_datetime(base) + deltas = to_timedelta(deltas, unit=unit) + return base + deltas + + # TODO: If/when pandas supports more than datetime64[ns], this should be + # improved to use correct range, e.g. datetime[Y] for yearly + bad_locs = np.isnan(dates) + has_bad_values = False + if bad_locs.any(): + has_bad_values = True + data_col = Series(dates) + data_col[bad_locs] = 1.0 # Replace with NaT + dates = dates.astype(np.int64) + + if fmt.startswith(("%tc", "tc")): # Delta ms relative to base + base = stata_epoch + ms = dates + conv_dates = convert_delta_safe(base, ms, "ms") + elif fmt.startswith(("%tC", "tC")): + + warnings.warn("Encountered %tC format. Leaving in Stata Internal Format.") + conv_dates = Series(dates, dtype=np.object) + if has_bad_values: + conv_dates[bad_locs] = NaT + return conv_dates + # Delta days relative to base + elif fmt.startswith(("%td", "td", "%d", "d")): + base = stata_epoch + days = dates + conv_dates = convert_delta_safe(base, days, "d") + # does not count leap days - 7 days is a week. + # 52nd week may have more than 7 days + elif fmt.startswith(("%tw", "tw")): + year = stata_epoch.year + dates // 52 + days = (dates % 52) * 7 + conv_dates = convert_year_days_safe(year, days) + elif fmt.startswith(("%tm", "tm")): # Delta months relative to base + year = stata_epoch.year + dates // 12 + month = (dates % 12) + 1 + conv_dates = convert_year_month_safe(year, month) + elif fmt.startswith(("%tq", "tq")): # Delta quarters relative to base + year = stata_epoch.year + dates // 4 + month = (dates % 4) * 3 + 1 + conv_dates = convert_year_month_safe(year, month) + elif fmt.startswith(("%th", "th")): # Delta half-years relative to base + year = stata_epoch.year + dates // 2 + month = (dates % 2) * 6 + 1 + conv_dates = convert_year_month_safe(year, month) + elif fmt.startswith(("%ty", "ty")): # Years -- not delta + year = dates + month = np.ones_like(dates) + conv_dates = convert_year_month_safe(year, month) + else: + raise ValueError(f"Date fmt {fmt} not understood") + + if has_bad_values: # Restore NaT for bad values + conv_dates[bad_locs] = NaT + + return conv_dates + + +def _datetime_to_stata_elapsed_vec(dates, fmt): + """ + Convert from datetime to SIF. http://www.stata.com/help.cgi?datetime + + Parameters + ---------- + dates : Series + Series or array containing datetime.datetime or datetime64[ns] to + convert to the Stata Internal Format given by fmt + fmt : str + The format to convert to. Can be, tc, td, tw, tm, tq, th, ty + """ + index = dates.index + NS_PER_DAY = 24 * 3600 * 1000 * 1000 * 1000 + US_PER_DAY = NS_PER_DAY / 1000 + + def parse_dates_safe(dates, delta=False, year=False, days=False): + d = {} + if is_datetime64_dtype(dates.values): + if delta: + delta = dates - stata_epoch + d["delta"] = delta.values.astype(np.int64) // 1000 # microseconds + if days or year: + dates = DatetimeIndex(dates) + d["year"], d["month"] = dates.year, dates.month + if days: + days = dates.astype(np.int64) - to_datetime( + d["year"], format="%Y" + ).astype(np.int64) + d["days"] = days // NS_PER_DAY + + elif infer_dtype(dates, skipna=False) == "datetime": + if delta: + delta = dates.values - stata_epoch + f = lambda x: US_PER_DAY * x.days + 1000000 * x.seconds + x.microseconds + v = np.vectorize(f) + d["delta"] = v(delta) + if year: + year_month = dates.apply(lambda x: 100 * x.year + x.month) + d["year"] = year_month.values // 100 + d["month"] = year_month.values - d["year"] * 100 + if days: + f = lambda x: (x - datetime.datetime(x.year, 1, 1)).days + v = np.vectorize(f) + d["days"] = v(dates) + else: + raise ValueError( + "Columns containing dates must contain either " + "datetime64, datetime.datetime or null values." + ) + + return DataFrame(d, index=index) + + bad_loc = isna(dates) + index = dates.index + if bad_loc.any(): + dates = Series(dates) + if is_datetime64_dtype(dates): + dates[bad_loc] = to_datetime(stata_epoch) + else: + dates[bad_loc] = stata_epoch + + if fmt in ["%tc", "tc"]: + d = parse_dates_safe(dates, delta=True) + conv_dates = d.delta / 1000 + elif fmt in ["%tC", "tC"]: + warnings.warn("Stata Internal Format tC not supported.") + conv_dates = dates + elif fmt in ["%td", "td"]: + d = parse_dates_safe(dates, delta=True) + conv_dates = d.delta // US_PER_DAY + elif fmt in ["%tw", "tw"]: + d = parse_dates_safe(dates, year=True, days=True) + conv_dates = 52 * (d.year - stata_epoch.year) + d.days // 7 + elif fmt in ["%tm", "tm"]: + d = parse_dates_safe(dates, year=True) + conv_dates = 12 * (d.year - stata_epoch.year) + d.month - 1 + elif fmt in ["%tq", "tq"]: + d = parse_dates_safe(dates, year=True) + conv_dates = 4 * (d.year - stata_epoch.year) + (d.month - 1) // 3 + elif fmt in ["%th", "th"]: + d = parse_dates_safe(dates, year=True) + conv_dates = 2 * (d.year - stata_epoch.year) + (d.month > 6).astype(np.int) + elif fmt in ["%ty", "ty"]: + d = parse_dates_safe(dates, year=True) + conv_dates = d.year + else: + raise ValueError(f"Format {fmt} is not a known Stata date format") + + conv_dates = Series(conv_dates, dtype=np.float64) + missing_value = struct.unpack("= 2 ** 53: + ws = precision_loss_doc.format("uint64", "float64") + + data[col] = data[col].astype(dtype) + + # Check values and upcast if necessary + if dtype == np.int8: + if data[col].max() > 100 or data[col].min() < -127: + data[col] = data[col].astype(np.int16) + elif dtype == np.int16: + if data[col].max() > 32740 or data[col].min() < -32767: + data[col] = data[col].astype(np.int32) + elif dtype == np.int64: + if data[col].max() <= 2147483620 and data[col].min() >= -2147483647: + data[col] = data[col].astype(np.int32) + else: + data[col] = data[col].astype(np.float64) + if data[col].max() >= 2 ** 53 or data[col].min() <= -(2 ** 53): + ws = precision_loss_doc.format("int64", "float64") + elif dtype in (np.float32, np.float64): + value = data[col].max() + if np.isinf(value): + raise ValueError( + f"Column {col} has a maximum value of infinity which is outside " + "the range supported by Stata." + ) + if dtype == np.float32 and value > float32_max: + data[col] = data[col].astype(np.float64) + elif dtype == np.float64: + if value > float64_max: + raise ValueError( + f"Column {col} has a maximum value ({value}) outside the range " + f"supported by Stata ({float64_max})" + ) + + if ws: + warnings.warn(ws, PossiblePrecisionLoss) + + return data + + +class StataValueLabel: + """ + Parse a categorical column and prepare formatted output + + Parameters + ---------- + catarray : Categorical + Categorical Series to encode + encoding : {"latin-1", "utf-8"} + Encoding to use for value labels. + """ + + def __init__(self, catarray, encoding="latin-1"): + + if encoding not in ("latin-1", "utf-8"): + raise ValueError("Only latin-1 and utf-8 are supported.") + self.labname = catarray.name + self._encoding = encoding + categories = catarray.cat.categories + self.value_labels = list(zip(np.arange(len(categories)), categories)) + self.value_labels.sort(key=lambda x: x[0]) + self.text_len = np.int32(0) + self.off = [] + self.val = [] + self.txt = [] + self.n = 0 + + # Compute lengths and setup lists of offsets and labels + for vl in self.value_labels: + category = vl[1] + if not isinstance(category, str): + category = str(category) + warnings.warn( + value_label_mismatch_doc.format(catarray.name), + ValueLabelTypeMismatch, + ) + category = category.encode(encoding) + self.off.append(self.text_len) + self.text_len += len(category) + 1 # +1 for the padding + self.val.append(vl[0]) + self.txt.append(category) + self.n += 1 + + if self.text_len > 32000: + raise ValueError( + "Stata value labels for a single variable must " + "have a combined length less than 32,000 " + "characters." + ) + + # Ensure int32 + self.off = np.array(self.off, dtype=np.int32) + self.val = np.array(self.val, dtype=np.int32) + + # Total length + self.len = 4 + 4 + 4 * self.n + 4 * self.n + self.text_len + + def _encode(self, s): + """ + Python 3 compatibility shim + """ + return s.encode(self._encoding) + + def generate_value_label(self, byteorder): + """ + Generate the binary representation of the value labals. + + Parameters + ---------- + byteorder : str + Byte order of the output + + Returns + ------- + value_label : bytes + Bytes containing the formatted value label + """ + encoding = self._encoding + bio = BytesIO() + null_byte = b"\x00" + + # len + bio.write(struct.pack(byteorder + "i", self.len)) + + # labname + labname = self.labname[:32].encode(encoding) + lab_len = 32 if encoding not in ("utf-8", "utf8") else 128 + labname = _pad_bytes(labname, lab_len + 1) + bio.write(labname) + + # padding - 3 bytes + for i in range(3): + bio.write(struct.pack("c", null_byte)) + + # value_label_table + # n - int32 + bio.write(struct.pack(byteorder + "i", self.n)) + + # textlen - int32 + bio.write(struct.pack(byteorder + "i", self.text_len)) + + # off - int32 array (n elements) + for offset in self.off: + bio.write(struct.pack(byteorder + "i", offset)) + + # val - int32 array (n elements) + for value in self.val: + bio.write(struct.pack(byteorder + "i", value)) + + # txt - Text labels, null terminated + for text in self.txt: + bio.write(text + null_byte) + + bio.seek(0) + return bio.read() + + +class StataMissingValue: + """ + An observation's missing value. + + Parameters + ---------- + value : int8, int16, int32, float32 or float64 + The Stata missing value code + + Attributes + ---------- + string : string + String representation of the Stata missing value + value : int8, int16, int32, float32 or float64 + The original encoded missing value + + Notes + ----- + More information: + + Integer missing values make the code '.', '.a', ..., '.z' to the ranges + 101 ... 127 (for int8), 32741 ... 32767 (for int16) and 2147483621 ... + 2147483647 (for int32). Missing values for floating point data types are + more complex but the pattern is simple to discern from the following table. + + np.float32 missing values (float in Stata) + 0000007f . + 0008007f .a + 0010007f .b + ... + 00c0007f .x + 00c8007f .y + 00d0007f .z + + np.float64 missing values (double in Stata) + 000000000000e07f . + 000000000001e07f .a + 000000000002e07f .b + ... + 000000000018e07f .x + 000000000019e07f .y + 00000000001ae07f .z + """ + + # Construct a dictionary of missing values + MISSING_VALUES = {} + bases = (101, 32741, 2147483621) + for b in bases: + # Conversion to long to avoid hash issues on 32 bit platforms #8968 + MISSING_VALUES[b] = "." + for i in range(1, 27): + MISSING_VALUES[i + b] = "." + chr(96 + i) + + float32_base = b"\x00\x00\x00\x7f" + increment = struct.unpack(" 0: + MISSING_VALUES[value] += chr(96 + i) + int_value = struct.unpack(" 0: + MISSING_VALUES[value] += chr(96 + i) + int_value = struct.unpack("q", struct.pack(" str: + return self.string + + def __repr__(self) -> str: + return f"{type(self)}({self})" + + def __eq__(self, other: Any) -> bool: + return ( + isinstance(other, type(self)) + and self.string == other.string + and self.value == other.value + ) + + @classmethod + def get_base_missing_value(cls, dtype): + if dtype == np.int8: + value = cls.BASE_MISSING_VALUES["int8"] + elif dtype == np.int16: + value = cls.BASE_MISSING_VALUES["int16"] + elif dtype == np.int32: + value = cls.BASE_MISSING_VALUES["int32"] + elif dtype == np.float32: + value = cls.BASE_MISSING_VALUES["float32"] + elif dtype == np.float64: + value = cls.BASE_MISSING_VALUES["float64"] + else: + raise ValueError("Unsupported dtype") + return value + + +class StataParser: + def __init__(self): + + # type code. + # -------------------- + # str1 1 = 0x01 + # str2 2 = 0x02 + # ... + # str244 244 = 0xf4 + # byte 251 = 0xfb (sic) + # int 252 = 0xfc + # long 253 = 0xfd + # float 254 = 0xfe + # double 255 = 0xff + # -------------------- + # NOTE: the byte type seems to be reserved for categorical variables + # with a label, but the underlying variable is -127 to 100 + # we're going to drop the label and cast to int + self.DTYPE_MAP = dict( + list(zip(range(1, 245), ["a" + str(i) for i in range(1, 245)])) + + [ + (251, np.int8), + (252, np.int16), + (253, np.int32), + (254, np.float32), + (255, np.float64), + ] + ) + self.DTYPE_MAP_XML = dict( + [ + (32768, np.uint8), # Keys to GSO + (65526, np.float64), + (65527, np.float32), + (65528, np.int32), + (65529, np.int16), + (65530, np.int8), + ] + ) + self.TYPE_MAP = list(range(251)) + list("bhlfd") + self.TYPE_MAP_XML = dict( + [ + # Not really a Q, unclear how to handle byteswap + (32768, "Q"), + (65526, "d"), + (65527, "f"), + (65528, "l"), + (65529, "h"), + (65530, "b"), + ] + ) + # NOTE: technically, some of these are wrong. there are more numbers + # that can be represented. it's the 27 ABOVE and BELOW the max listed + # numeric data type in [U] 12.2.2 of the 11.2 manual + float32_min = b"\xff\xff\xff\xfe" + float32_max = b"\xff\xff\xff\x7e" + float64_min = b"\xff\xff\xff\xff\xff\xff\xef\xff" + float64_max = b"\xff\xff\xff\xff\xff\xff\xdf\x7f" + self.VALID_RANGE = { + "b": (-127, 100), + "h": (-32767, 32740), + "l": (-2147483647, 2147483620), + "f": ( + np.float32(struct.unpack(" 0 + + # calculate size of a data record + self.col_sizes = [self._calcsize(typ) for typ in self.typlist] + + def _read_new_header(self, first_char): + # The first part of the header is common to 117 - 119. + self.path_or_buf.read(27) # stata_dta>
    + self.format_version = int(self.path_or_buf.read(3)) + if self.format_version not in [117, 118, 119]: + raise ValueError(_version_error.format(version=self.format_version)) + self._set_encoding() + self.path_or_buf.read(21) # + self.byteorder = self.path_or_buf.read(3) == b"MSF" and ">" or "<" + self.path_or_buf.read(15) # + nvar_type = "H" if self.format_version <= 118 else "I" + nvar_size = 2 if self.format_version <= 118 else 4 + self.nvar = struct.unpack( + self.byteorder + nvar_type, self.path_or_buf.read(nvar_size) + )[0] + self.path_or_buf.read(7) # + + self.nobs = self._get_nobs() + self.path_or_buf.read(11) # + self.time_stamp = self._get_time_stamp() + self.path_or_buf.read(26) #
    + self.path_or_buf.read(8) # 0x0000000000000000 + self.path_or_buf.read(8) # position of + + self._seek_vartypes = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 16 + ) + self._seek_varnames = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 10 + ) + self._seek_sortlist = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 10 + ) + self._seek_formats = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 9 + ) + self._seek_value_label_names = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 19 + ) + + # Requires version-specific treatment + self._seek_variable_labels = self._get_seek_variable_labels() + + self.path_or_buf.read(8) # + self.data_location = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 6 + ) + self.seek_strls = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 7 + ) + self.seek_value_labels = ( + struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 14 + ) + + self.typlist, self.dtyplist = self._get_dtypes(self._seek_vartypes) + + self.path_or_buf.seek(self._seek_varnames) + self.varlist = self._get_varlist() + + self.path_or_buf.seek(self._seek_sortlist) + self.srtlist = struct.unpack( + self.byteorder + ("h" * (self.nvar + 1)), + self.path_or_buf.read(2 * (self.nvar + 1)), + )[:-1] + + self.path_or_buf.seek(self._seek_formats) + self.fmtlist = self._get_fmtlist() + + self.path_or_buf.seek(self._seek_value_label_names) + self.lbllist = self._get_lbllist() + + self.path_or_buf.seek(self._seek_variable_labels) + self._variable_labels = self._get_variable_labels() + + # Get data type information, works for versions 117-119. + def _get_dtypes(self, seek_vartypes): + + self.path_or_buf.seek(seek_vartypes) + raw_typlist = [ + struct.unpack(self.byteorder + "H", self.path_or_buf.read(2))[0] + for i in range(self.nvar) + ] + + def f(typ): + if typ <= 2045: + return typ + try: + return self.TYPE_MAP_XML[typ] + except KeyError: + raise ValueError(f"cannot convert stata types [{typ}]") + + typlist = [f(x) for x in raw_typlist] + + def f(typ): + if typ <= 2045: + return str(typ) + try: + return self.DTYPE_MAP_XML[typ] + except KeyError: + raise ValueError(f"cannot convert stata dtype [{typ}]") + + dtyplist = [f(x) for x in raw_typlist] + + return typlist, dtyplist + + def _get_varlist(self): + if self.format_version == 117: + b = 33 + elif self.format_version >= 118: + b = 129 + + return [self._decode(self.path_or_buf.read(b)) for i in range(self.nvar)] + + # Returns the format list + def _get_fmtlist(self): + if self.format_version >= 118: + b = 57 + elif self.format_version > 113: + b = 49 + elif self.format_version > 104: + b = 12 + else: + b = 7 + + return [self._decode(self.path_or_buf.read(b)) for i in range(self.nvar)] + + # Returns the label list + def _get_lbllist(self): + if self.format_version >= 118: + b = 129 + elif self.format_version > 108: + b = 33 + else: + b = 9 + return [self._decode(self.path_or_buf.read(b)) for i in range(self.nvar)] + + def _get_variable_labels(self): + if self.format_version >= 118: + vlblist = [ + self._decode(self.path_or_buf.read(321)) for i in range(self.nvar) + ] + elif self.format_version > 105: + vlblist = [ + self._decode(self.path_or_buf.read(81)) for i in range(self.nvar) + ] + else: + vlblist = [ + self._decode(self.path_or_buf.read(32)) for i in range(self.nvar) + ] + return vlblist + + def _get_nobs(self): + if self.format_version >= 118: + return struct.unpack(self.byteorder + "Q", self.path_or_buf.read(8))[0] + else: + return struct.unpack(self.byteorder + "I", self.path_or_buf.read(4))[0] + + def _get_data_label(self): + if self.format_version >= 118: + strlen = struct.unpack(self.byteorder + "H", self.path_or_buf.read(2))[0] + return self._decode(self.path_or_buf.read(strlen)) + elif self.format_version == 117: + strlen = struct.unpack("b", self.path_or_buf.read(1))[0] + return self._decode(self.path_or_buf.read(strlen)) + elif self.format_version > 105: + return self._decode(self.path_or_buf.read(81)) + else: + return self._decode(self.path_or_buf.read(32)) + + def _get_time_stamp(self): + if self.format_version >= 118: + strlen = struct.unpack("b", self.path_or_buf.read(1))[0] + return self.path_or_buf.read(strlen).decode("utf-8") + elif self.format_version == 117: + strlen = struct.unpack("b", self.path_or_buf.read(1))[0] + return self._decode(self.path_or_buf.read(strlen)) + elif self.format_version > 104: + return self._decode(self.path_or_buf.read(18)) + else: + raise ValueError() + + def _get_seek_variable_labels(self): + if self.format_version == 117: + self.path_or_buf.read(8) # , throw away + # Stata 117 data files do not follow the described format. This is + # a work around that uses the previous label, 33 bytes for each + # variable, 20 for the closing tag and 17 for the opening tag + return self._seek_value_label_names + (33 * self.nvar) + 20 + 17 + elif self.format_version >= 118: + return struct.unpack(self.byteorder + "q", self.path_or_buf.read(8))[0] + 17 + else: + raise ValueError() + + def _read_old_header(self, first_char): + self.format_version = struct.unpack("b", first_char)[0] + if self.format_version not in [104, 105, 108, 111, 113, 114, 115]: + raise ValueError(_version_error.format(version=self.format_version)) + self._set_encoding() + self.byteorder = ( + struct.unpack("b", self.path_or_buf.read(1))[0] == 0x1 and ">" or "<" + ) + self.filetype = struct.unpack("b", self.path_or_buf.read(1))[0] + self.path_or_buf.read(1) # unused + + self.nvar = struct.unpack(self.byteorder + "H", self.path_or_buf.read(2))[0] + self.nobs = self._get_nobs() + + self._data_label = self._get_data_label() + + self.time_stamp = self._get_time_stamp() + + # descriptors + if self.format_version > 108: + typlist = [ord(self.path_or_buf.read(1)) for i in range(self.nvar)] + else: + buf = self.path_or_buf.read(self.nvar) + typlistb = np.frombuffer(buf, dtype=np.uint8) + typlist = [] + for tp in typlistb: + if tp in self.OLD_TYPE_MAPPING: + typlist.append(self.OLD_TYPE_MAPPING[tp]) + else: + typlist.append(tp - 127) # bytes + + try: + self.typlist = [self.TYPE_MAP[typ] for typ in typlist] + except ValueError: + invalid_types = ",".join(str(x) for x in typlist) + raise ValueError(f"cannot convert stata types [{invalid_types}]") + try: + self.dtyplist = [self.DTYPE_MAP[typ] for typ in typlist] + except ValueError: + invalid_dtypes = ",".join(str(x) for x in typlist) + raise ValueError(f"cannot convert stata dtypes [{invalid_dtypes}]") + + if self.format_version > 108: + self.varlist = [ + self._decode(self.path_or_buf.read(33)) for i in range(self.nvar) + ] + else: + self.varlist = [ + self._decode(self.path_or_buf.read(9)) for i in range(self.nvar) + ] + self.srtlist = struct.unpack( + self.byteorder + ("h" * (self.nvar + 1)), + self.path_or_buf.read(2 * (self.nvar + 1)), + )[:-1] + + self.fmtlist = self._get_fmtlist() + + self.lbllist = self._get_lbllist() + + self._variable_labels = self._get_variable_labels() + + # ignore expansion fields (Format 105 and later) + # When reading, read five bytes; the last four bytes now tell you + # the size of the next read, which you discard. You then continue + # like this until you read 5 bytes of zeros. + + if self.format_version > 104: + while True: + data_type = struct.unpack( + self.byteorder + "b", self.path_or_buf.read(1) + )[0] + if self.format_version > 108: + data_len = struct.unpack( + self.byteorder + "i", self.path_or_buf.read(4) + )[0] + else: + data_len = struct.unpack( + self.byteorder + "h", self.path_or_buf.read(2) + )[0] + if data_type == 0: + break + self.path_or_buf.read(data_len) + + # necessary data to continue parsing + self.data_location = self.path_or_buf.tell() + + def _setup_dtype(self): + """Map between numpy and state dtypes""" + if self._dtype is not None: + return self._dtype + + dtype = [] # Convert struct data types to numpy data type + for i, typ in enumerate(self.typlist): + if typ in self.NUMPY_TYPE_MAP: + dtype.append(("s" + str(i), self.byteorder + self.NUMPY_TYPE_MAP[typ])) + else: + dtype.append(("s" + str(i), "S" + str(typ))) + dtype = np.dtype(dtype) + self._dtype = dtype + + return self._dtype + + def _calcsize(self, fmt): + return type(fmt) is int and fmt or struct.calcsize(self.byteorder + fmt) + + def _decode(self, s): + # have bytes not strings, so must decode + s = s.partition(b"\0")[0] + try: + return s.decode(self._encoding) + except UnicodeDecodeError: + # GH 25960, fallback to handle incorrect format produced when 117 + # files are converted to 118 files in Stata + encoding = self._encoding + msg = f""" +One or more strings in the dta file could not be decoded using {encoding}, and +so the fallback encoding of latin-1 is being used. This can happen when a file +has been incorrectly encoded by Stata or some other software. You should verify +the string values returned are correct.""" + warnings.warn(msg, UnicodeWarning) + return s.decode("latin-1") + + def _read_value_labels(self): + if self._value_labels_read: + # Don't read twice + return + if self.format_version <= 108: + # Value labels are not supported in version 108 and earlier. + self._value_labels_read = True + self.value_label_dict = dict() + return + + if self.format_version >= 117: + self.path_or_buf.seek(self.seek_value_labels) + else: + offset = self.nobs * self._dtype.itemsize + self.path_or_buf.seek(self.data_location + offset) + + self._value_labels_read = True + self.value_label_dict = dict() + + while True: + if self.format_version >= 117: + if self.path_or_buf.read(5) == b" + break # end of value label table + + slength = self.path_or_buf.read(4) + if not slength: + break # end of value label table (format < 117) + if self.format_version <= 117: + labname = self._decode(self.path_or_buf.read(33)) + else: + labname = self._decode(self.path_or_buf.read(129)) + self.path_or_buf.read(3) # padding + + n = struct.unpack(self.byteorder + "I", self.path_or_buf.read(4))[0] + txtlen = struct.unpack(self.byteorder + "I", self.path_or_buf.read(4))[0] + off = np.frombuffer( + self.path_or_buf.read(4 * n), dtype=self.byteorder + "i4", count=n + ) + val = np.frombuffer( + self.path_or_buf.read(4 * n), dtype=self.byteorder + "i4", count=n + ) + ii = np.argsort(off) + off = off[ii] + val = val[ii] + txt = self.path_or_buf.read(txtlen) + self.value_label_dict[labname] = dict() + for i in range(n): + end = off[i + 1] if i < n - 1 else txtlen + self.value_label_dict[labname][val[i]] = self._decode(txt[off[i] : end]) + if self.format_version >= 117: + self.path_or_buf.read(6) # + self._value_labels_read = True + + def _read_strls(self): + self.path_or_buf.seek(self.seek_strls) + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + self.GSO = {"0": ""} + while True: + if self.path_or_buf.read(3) != b"GSO": + break + + if self.format_version == 117: + v_o = struct.unpack(self.byteorder + "Q", self.path_or_buf.read(8))[0] + else: + buf = self.path_or_buf.read(12) + # Only tested on little endian file on little endian machine. + v_size = 2 if self.format_version == 118 else 3 + if self.byteorder == "<": + buf = buf[0:v_size] + buf[4 : 12 - v_size] + else: + # This path may not be correct, impossible to test + buf = buf[0:v_size] + buf[4 + v_size :] + v_o = struct.unpack("Q", buf)[0] + typ = struct.unpack("B", self.path_or_buf.read(1))[0] + length = struct.unpack(self.byteorder + "I", self.path_or_buf.read(4))[0] + va = self.path_or_buf.read(length) + if typ == 130: + va = va[0:-1].decode(self._encoding) + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + self.GSO[str(v_o)] = va + + def __next__(self): + return self.read(nrows=self._chunksize or 1) + + def get_chunk(self, size=None): + """ + Reads lines from Stata file and returns as dataframe + + Parameters + ---------- + size : int, defaults to None + Number of lines to read. If None, reads whole file. + + Returns + ------- + DataFrame + """ + if size is None: + size = self._chunksize + return self.read(nrows=size) + + @Appender(_read_method_doc) + def read( + self, + nrows=None, + convert_dates=None, + convert_categoricals=None, + index_col=None, + convert_missing=None, + preserve_dtypes=None, + columns=None, + order_categoricals=None, + ): + # Handle empty file or chunk. If reading incrementally raise + # StopIteration. If reading the whole thing return an empty + # data frame. + if (self.nobs == 0) and (nrows is None): + self._can_read_value_labels = True + self._data_read = True + self.close() + return DataFrame(columns=self.varlist) + + # Handle options + if convert_dates is None: + convert_dates = self._convert_dates + if convert_categoricals is None: + convert_categoricals = self._convert_categoricals + if convert_missing is None: + convert_missing = self._convert_missing + if preserve_dtypes is None: + preserve_dtypes = self._preserve_dtypes + if columns is None: + columns = self._columns + if order_categoricals is None: + order_categoricals = self._order_categoricals + if index_col is None: + index_col = self._index_col + + if nrows is None: + nrows = self.nobs + + if (self.format_version >= 117) and (not self._value_labels_read): + self._can_read_value_labels = True + self._read_strls() + + # Read data + dtype = self._dtype + max_read_len = (self.nobs - self._lines_read) * dtype.itemsize + read_len = nrows * dtype.itemsize + read_len = min(read_len, max_read_len) + if read_len <= 0: + # Iterator has finished, should never be here unless + # we are reading the file incrementally + if convert_categoricals: + self._read_value_labels() + self.close() + raise StopIteration + offset = self._lines_read * dtype.itemsize + self.path_or_buf.seek(self.data_location + offset) + read_lines = min(nrows, self.nobs - self._lines_read) + data = np.frombuffer( + self.path_or_buf.read(read_len), dtype=dtype, count=read_lines + ) + + self._lines_read += read_lines + if self._lines_read == self.nobs: + self._can_read_value_labels = True + self._data_read = True + # if necessary, swap the byte order to native here + if self.byteorder != self._native_byteorder: + data = data.byteswap().newbyteorder() + + if convert_categoricals: + self._read_value_labels() + + if len(data) == 0: + data = DataFrame(columns=self.varlist) + else: + data = DataFrame.from_records(data) + data.columns = self.varlist + + # If index is not specified, use actual row number rather than + # restarting at 0 for each chunk. + if index_col is None: + ix = np.arange(self._lines_read - read_lines, self._lines_read) + data = data.set_index(ix) + + if columns is not None: + try: + data = self._do_select_columns(data, columns) + except ValueError: + self.close() + raise + + # Decode strings + for col, typ in zip(data, self.typlist): + if type(typ) is int: + data[col] = data[col].apply(self._decode, convert_dtype=True) + + data = self._insert_strls(data) + + cols_ = np.where(self.dtyplist)[0] + + # Convert columns (if needed) to match input type + ix = data.index + requires_type_conversion = False + data_formatted = [] + for i in cols_: + if self.dtyplist[i] is not None: + col = data.columns[i] + dtype = data[col].dtype + if dtype != np.dtype(object) and dtype != self.dtyplist[i]: + requires_type_conversion = True + data_formatted.append( + (col, Series(data[col], ix, self.dtyplist[i])) + ) + else: + data_formatted.append((col, data[col])) + if requires_type_conversion: + data = DataFrame.from_dict(dict(data_formatted)) + del data_formatted + + data = self._do_convert_missing(data, convert_missing) + + if convert_dates: + + def any_startswith(x: str) -> bool: + return any(x.startswith(fmt) for fmt in _date_formats) + + cols = np.where([any_startswith(x) for x in self.fmtlist])[0] + for i in cols: + col = data.columns[i] + try: + data[col] = _stata_elapsed_date_to_datetime_vec( + data[col], self.fmtlist[i] + ) + except ValueError: + self.close() + raise + + if convert_categoricals and self.format_version > 108: + data = self._do_convert_categoricals( + data, self.value_label_dict, self.lbllist, order_categoricals + ) + + if not preserve_dtypes: + retyped_data = [] + convert = False + for col in data: + dtype = data[col].dtype + if dtype in (np.float16, np.float32): + dtype = np.float64 + convert = True + elif dtype in (np.int8, np.int16, np.int32): + dtype = np.int64 + convert = True + retyped_data.append((col, data[col].astype(dtype))) + if convert: + data = DataFrame.from_dict(dict(retyped_data)) + + if index_col is not None: + data = data.set_index(data.pop(index_col)) + + return data + + def _do_convert_missing(self, data, convert_missing): + # Check for missing values, and replace if found + replacements = {} + for i, colname in enumerate(data): + fmt = self.typlist[i] + if fmt not in self.VALID_RANGE: + continue + + nmin, nmax = self.VALID_RANGE[fmt] + series = data[colname] + missing = np.logical_or(series < nmin, series > nmax) + + if not missing.any(): + continue + + if convert_missing: # Replacement follows Stata notation + + missing_loc = np.argwhere(missing._ndarray_values) + umissing, umissing_loc = np.unique(series[missing], return_inverse=True) + replacement = Series(series, dtype=np.object) + for j, um in enumerate(umissing): + missing_value = StataMissingValue(um) + + loc = missing_loc[umissing_loc == j] + replacement.iloc[loc] = missing_value + else: # All replacements are identical + dtype = series.dtype + if dtype not in (np.float32, np.float64): + dtype = np.float64 + replacement = Series(series, dtype=dtype) + replacement[missing] = np.nan + replacements[colname] = replacement + if replacements: + columns = data.columns + replacements = DataFrame(replacements) + data = concat([data.drop(replacements.columns, 1), replacements], 1) + data = data[columns] + return data + + def _insert_strls(self, data): + if not hasattr(self, "GSO") or len(self.GSO) == 0: + return data + for i, typ in enumerate(self.typlist): + if typ != "Q": + continue + # Wrap v_o in a string to allow uint64 values as keys on 32bit OS + data.iloc[:, i] = [self.GSO[str(k)] for k in data.iloc[:, i]] + return data + + def _do_select_columns(self, data, columns): + + if not self._column_selector_set: + column_set = set(columns) + if len(column_set) != len(columns): + raise ValueError("columns contains duplicate entries") + unmatched = column_set.difference(data.columns) + if unmatched: + raise ValueError( + "The following columns were not found in the " + "Stata data set: " + ", ".join(list(unmatched)) + ) + # Copy information for retained columns for later processing + dtyplist = [] + typlist = [] + fmtlist = [] + lbllist = [] + for col in columns: + i = data.columns.get_loc(col) + dtyplist.append(self.dtyplist[i]) + typlist.append(self.typlist[i]) + fmtlist.append(self.fmtlist[i]) + lbllist.append(self.lbllist[i]) + + self.dtyplist = dtyplist + self.typlist = typlist + self.fmtlist = fmtlist + self.lbllist = lbllist + self._column_selector_set = True + + return data[columns] + + def _do_convert_categoricals( + self, data, value_label_dict, lbllist, order_categoricals + ): + """ + Converts categorical columns to Categorical type. + """ + value_labels = list(value_label_dict.keys()) + cat_converted_data = [] + for col, label in zip(data, lbllist): + if label in value_labels: + # Explicit call with ordered=True + cat_data = Categorical(data[col], ordered=order_categoricals) + categories = [] + for category in cat_data.categories: + if category in value_label_dict[label]: + categories.append(value_label_dict[label][category]) + else: + categories.append(category) # Partially labeled + try: + cat_data.categories = categories + except ValueError: + vc = Series(categories).value_counts() + repeats = list(vc.index[vc > 1]) + repeats = "-" * 80 + "\n" + "\n".join(repeats) + # GH 25772 + msg = f""" +Value labels for column {col} are not unique. These cannot be converted to +pandas categoricals. + +Either read the file with `convert_categoricals` set to False or use the +low level interface in `StataReader` to separately read the values and the +value_labels. + +The repeated labels are: +{repeats} +""" + raise ValueError(msg) + # TODO: is the next line needed above in the data(...) method? + cat_data = Series(cat_data, index=data.index) + cat_converted_data.append((col, cat_data)) + else: + cat_converted_data.append((col, data[col])) + data = DataFrame.from_dict(dict(cat_converted_data)) + return data + + @property + def data_label(self): + """ + Return data label of Stata file. + """ + return self._data_label + + def variable_labels(self): + """ + Return variable labels as a dict, associating each variable name + with corresponding label. + + Returns + ------- + dict + """ + return dict(zip(self.varlist, self._variable_labels)) + + def value_labels(self): + """ + Return a dict, associating each variable name a dict, associating + each value its corresponding label. + + Returns + ------- + dict + """ + if not self._value_labels_read: + self._read_value_labels() + + return self.value_label_dict + + +def _open_file_binary_write(fname): + """ + Open a binary file or no-op if file-like. + + Parameters + ---------- + fname : string path, path object or buffer + + Returns + ------- + file : file-like object + File object supporting write + own : bool + True if the file was created, otherwise False + """ + if hasattr(fname, "write"): + # if 'b' not in fname.mode: + return fname, False + return open(fname, "wb"), True + + +def _set_endianness(endianness): + if endianness.lower() in ["<", "little"]: + return "<" + elif endianness.lower() in [">", "big"]: + return ">" + else: # pragma : no cover + raise ValueError(f"Endianness {endianness} not understood") + + +def _pad_bytes(name, length): + """ + Take a char string and pads it with null bytes until it's length chars. + """ + if isinstance(name, bytes): + return name + b"\x00" * (length - len(name)) + return name + "\x00" * (length - len(name)) + + +def _convert_datetime_to_stata_type(fmt): + """ + Convert from one of the stata date formats to a type in TYPE_MAP. + """ + if fmt in [ + "tc", + "%tc", + "td", + "%td", + "tw", + "%tw", + "tm", + "%tm", + "tq", + "%tq", + "th", + "%th", + "ty", + "%ty", + ]: + return np.float64 # Stata expects doubles for SIFs + else: + raise NotImplementedError(f"Format {fmt} not implemented") + + +def _maybe_convert_to_int_keys(convert_dates, varlist): + new_dict = {} + for key in convert_dates: + if not convert_dates[key].startswith("%"): # make sure proper fmts + convert_dates[key] = "%" + convert_dates[key] + if key in varlist: + new_dict.update({varlist.index(key): convert_dates[key]}) + else: + if not isinstance(key, int): + raise ValueError("convert_dates key must be a column or an integer") + new_dict.update({key: convert_dates[key]}) + return new_dict + + +def _dtype_to_stata_type(dtype, column): + """ + Convert dtype types to stata types. Returns the byte of the given ordinal. + See TYPE_MAP and comments for an explanation. This is also explained in + the dta spec. + 1 - 244 are strings of this length + Pandas Stata + 251 - for int8 byte + 252 - for int16 int + 253 - for int32 long + 254 - for float32 float + 255 - for double double + + If there are dates to convert, then dtype will already have the correct + type inserted. + """ + # TODO: expand to handle datetime to integer conversion + if dtype.type == np.object_: # try to coerce it to the biggest string + # not memory efficient, what else could we + # do? + itemsize = max_len_string_array(ensure_object(column.values)) + return max(itemsize, 1) + elif dtype == np.float64: + return 255 + elif dtype == np.float32: + return 254 + elif dtype == np.int32: + return 253 + elif dtype == np.int16: + return 252 + elif dtype == np.int8: + return 251 + else: # pragma : no cover + raise NotImplementedError(f"Data type {dtype} not supported.") + + +def _dtype_to_default_stata_fmt(dtype, column, dta_version=114, force_strl=False): + """ + Map numpy dtype to stata's default format for this type. Not terribly + important since users can change this in Stata. Semantics are + + object -> "%DDs" where DD is the length of the string. If not a string, + raise ValueError + float64 -> "%10.0g" + float32 -> "%9.0g" + int64 -> "%9.0g" + int32 -> "%12.0g" + int16 -> "%8.0g" + int8 -> "%8.0g" + strl -> "%9s" + """ + # TODO: Refactor to combine type with format + # TODO: expand this to handle a default datetime format? + if dta_version < 117: + max_str_len = 244 + else: + max_str_len = 2045 + if force_strl: + return "%9s" + if dtype.type == np.object_: + itemsize = max_len_string_array(ensure_object(column.values)) + if itemsize > max_str_len: + if dta_version >= 117: + return "%9s" + else: + raise ValueError(excessive_string_length_error.format(column.name)) + return "%" + str(max(itemsize, 1)) + "s" + elif dtype == np.float64: + return "%10.0g" + elif dtype == np.float32: + return "%9.0g" + elif dtype == np.int32: + return "%12.0g" + elif dtype == np.int8 or dtype == np.int16: + return "%8.0g" + else: # pragma : no cover + raise NotImplementedError(f"Data type {dtype} not supported.") + + +class StataWriter(StataParser): + """ + A class for writing Stata binary dta files + + Parameters + ---------- + fname : path (string), buffer or path object + string, path object (pathlib.Path or py._path.local.LocalPath) or + object implementing a binary write() functions. If using a buffer + then the buffer will not be automatically closed after the file + is written. + + .. versionadded:: 0.23.0 support for pathlib, py.path. + + data : DataFrame + Input to save + convert_dates : dict + Dictionary mapping columns containing datetime types to stata internal + format to use when writing the dates. Options are 'tc', 'td', 'tm', + 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. + Datetime columns that do not have a conversion type specified will be + converted to 'tc'. Raises NotImplementedError if a datetime column has + timezone information + write_index : bool + Write the index to Stata dataset. + byteorder : str + Can be ">", "<", "little", or "big". default is `sys.byteorder` + time_stamp : datetime + A datetime to use as file creation date. Default is the current time + data_label : str + A label for the data set. Must be 80 characters or smaller. + variable_labels : dict + Dictionary containing columns as keys and variable labels as values. + Each label must be 80 characters or smaller. + + Returns + ------- + writer : StataWriter instance + The StataWriter instance has a write_file method, which will + write the file to the given `fname`. + + Raises + ------ + NotImplementedError + * If datetimes contain timezone information + ValueError + * Columns listed in convert_dates are neither datetime64[ns] + or datetime.datetime + * Column dtype is not representable in Stata + * Column listed in convert_dates is not in DataFrame + * Categorical label contains more than 32,000 characters + + Examples + -------- + >>> data = pd.DataFrame([[1.0, 1]], columns=['a', 'b']) + >>> writer = StataWriter('./data_file.dta', data) + >>> writer.write_file() + + Or with dates + >>> from datetime import datetime + >>> data = pd.DataFrame([[datetime(2000,1,1)]], columns=['date']) + >>> writer = StataWriter('./date_data_file.dta', data, {'date' : 'tw'}) + >>> writer.write_file() + """ + + _max_string_length = 244 + _encoding = "latin-1" + + def __init__( + self, + fname, + data, + convert_dates=None, + write_index=True, + byteorder=None, + time_stamp=None, + data_label=None, + variable_labels=None, + ): + super().__init__() + self._convert_dates = {} if convert_dates is None else convert_dates + self._write_index = write_index + self._time_stamp = time_stamp + self._data_label = data_label + self._variable_labels = variable_labels + self._own_file = True + # attach nobs, nvars, data, varlist, typlist + self._prepare_pandas(data) + + if byteorder is None: + byteorder = sys.byteorder + self._byteorder = _set_endianness(byteorder) + self._fname = stringify_path(fname) + self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8} + self._converted_names = {} + + def _write(self, to_write): + """ + Helper to call encode before writing to file for Python 3 compat. + """ + self._file.write(to_write.encode(self._encoding or self._default_encoding)) + + def _prepare_categoricals(self, data): + """Check for categorical columns, retain categorical information for + Stata file and convert categorical data to int""" + + is_cat = [is_categorical_dtype(data[col]) for col in data] + self._is_col_cat = is_cat + self._value_labels = [] + if not any(is_cat): + return data + + get_base_missing_value = StataMissingValue.get_base_missing_value + data_formatted = [] + for col, col_is_cat in zip(data, is_cat): + if col_is_cat: + svl = StataValueLabel(data[col], encoding=self._encoding) + self._value_labels.append(svl) + dtype = data[col].cat.codes.dtype + if dtype == np.int64: + raise ValueError( + "It is not possible to export " + "int64-based categorical data to Stata." + ) + values = data[col].cat.codes.values.copy() + + # Upcast if needed so that correct missing values can be set + if values.max() >= get_base_missing_value(dtype): + if dtype == np.int8: + dtype = np.int16 + elif dtype == np.int16: + dtype = np.int32 + else: + dtype = np.float64 + values = np.array(values, dtype=dtype) + + # Replace missing values with Stata missing value for type + values[values == -1] = get_base_missing_value(dtype) + data_formatted.append((col, values)) + else: + data_formatted.append((col, data[col])) + return DataFrame.from_dict(dict(data_formatted)) + + def _replace_nans(self, data): + # return data + """Checks floating point data columns for nans, and replaces these with + the generic Stata for missing value (.)""" + for c in data: + dtype = data[c].dtype + if dtype in (np.float32, np.float64): + if dtype == np.float32: + replacement = self.MISSING_VALUES["f"] + else: + replacement = self.MISSING_VALUES["d"] + data[c] = data[c].fillna(replacement) + + return data + + def _update_strl_names(self): + """No-op, forward compatibility""" + pass + + def _validate_variable_name(self, name): + """ + Validate variable names for Stata export. + + Parameters + ---------- + name : str + Variable name + + Returns + ------- + str + The validated name with invalid characters replaced with + underscores. + + Notes + ----- + Stata 114 and 117 support ascii characters in a-z, A-Z, 0-9 + and _. + """ + for c in name: + if ( + (c < "A" or c > "Z") + and (c < "a" or c > "z") + and (c < "0" or c > "9") + and c != "_" + ): + name = name.replace(c, "_") + return name + + def _check_column_names(self, data): + """ + Checks column names to ensure that they are valid Stata column names. + This includes checks for: + * Non-string names + * Stata keywords + * Variables that start with numbers + * Variables with names that are too long + + When an illegal variable name is detected, it is converted, and if + dates are exported, the variable name is propagated to the date + conversion dictionary + """ + converted_names = {} + columns = list(data.columns) + original_columns = columns[:] + + duplicate_var_id = 0 + for j, name in enumerate(columns): + orig_name = name + if not isinstance(name, str): + name = str(name) + + name = self._validate_variable_name(name) + + # Variable name must not be a reserved word + if name in self.RESERVED_WORDS: + name = "_" + name + + # Variable name may not start with a number + if name[0] >= "0" and name[0] <= "9": + name = "_" + name + + name = name[: min(len(name), 32)] + + if not name == orig_name: + # check for duplicates + while columns.count(name) > 0: + # prepend ascending number to avoid duplicates + name = "_" + str(duplicate_var_id) + name + name = name[: min(len(name), 32)] + duplicate_var_id += 1 + converted_names[orig_name] = name + + columns[j] = name + + data.columns = columns + + # Check date conversion, and fix key if needed + if self._convert_dates: + for c, o in zip(columns, original_columns): + if c != o: + self._convert_dates[c] = self._convert_dates[o] + del self._convert_dates[o] + + if converted_names: + conversion_warning = [] + for orig_name, name in converted_names.items(): + # need to possibly encode the orig name if its unicode + try: + orig_name = orig_name.encode("utf-8") + except (UnicodeDecodeError, AttributeError): + pass + msg = f"{orig_name} -> {name}" + conversion_warning.append(msg) + + ws = invalid_name_doc.format("\n ".join(conversion_warning)) + warnings.warn(ws, InvalidColumnName) + + self._converted_names = converted_names + self._update_strl_names() + + return data + + def _set_formats_and_types(self, dtypes): + self.typlist = [] + self.fmtlist = [] + for col, dtype in dtypes.items(): + self.fmtlist.append(_dtype_to_default_stata_fmt(dtype, self.data[col])) + self.typlist.append(_dtype_to_stata_type(dtype, self.data[col])) + + def _prepare_pandas(self, data): + # NOTE: we might need a different API / class for pandas objects so + # we can set different semantics - handle this with a PR to pandas.io + + data = data.copy() + + if self._write_index: + data = data.reset_index() + + # Ensure column names are strings + data = self._check_column_names(data) + + # Check columns for compatibility with stata, upcast if necessary + # Raise if outside the supported range + data = _cast_to_stata_types(data) + + # Replace NaNs with Stata missing values + data = self._replace_nans(data) + + # Convert categoricals to int data, and strip labels + data = self._prepare_categoricals(data) + + self.nobs, self.nvar = data.shape + self.data = data + self.varlist = data.columns.tolist() + + dtypes = data.dtypes + + # Ensure all date columns are converted + for col in data: + if col in self._convert_dates: + continue + if is_datetime64_dtype(data[col]): + self._convert_dates[col] = "tc" + + self._convert_dates = _maybe_convert_to_int_keys( + self._convert_dates, self.varlist + ) + for key in self._convert_dates: + new_type = _convert_datetime_to_stata_type(self._convert_dates[key]) + dtypes[key] = np.dtype(new_type) + + # Verify object arrays are strings and encode to bytes + self._encode_strings() + + self._set_formats_and_types(dtypes) + + # set the given format for the datetime cols + if self._convert_dates is not None: + for key in self._convert_dates: + self.fmtlist[key] = self._convert_dates[key] + + def _encode_strings(self): + """ + Encode strings in dta-specific encoding + + Do not encode columns marked for date conversion or for strL + conversion. The strL converter independently handles conversion and + also accepts empty string arrays. + """ + convert_dates = self._convert_dates + # _convert_strl is not available in dta 114 + convert_strl = getattr(self, "_convert_strl", []) + for i, col in enumerate(self.data): + # Skip columns marked for date conversion or strl conversion + if i in convert_dates or col in convert_strl: + continue + column = self.data[col] + dtype = column.dtype + if dtype.type == np.object_: + inferred_dtype = infer_dtype(column, skipna=True) + if not ((inferred_dtype in ("string", "unicode")) or len(column) == 0): + col = column.name + raise ValueError( + f"""\ +Column `{col}` cannot be exported.\n\nOnly string-like object arrays +containing all strings or a mix of strings and None can be exported. +Object arrays containing only null values are prohibited. Other object +types cannot be exported and must first be converted to one of the +supported types.""" + ) + encoded = self.data[col].str.encode(self._encoding) + # If larger than _max_string_length do nothing + if ( + max_len_string_array(ensure_object(encoded.values)) + <= self._max_string_length + ): + self.data[col] = encoded + + def write_file(self): + self._file, self._own_file = _open_file_binary_write(self._fname) + try: + self._write_header(data_label=self._data_label, time_stamp=self._time_stamp) + self._write_map() + self._write_variable_types() + self._write_varnames() + self._write_sortlist() + self._write_formats() + self._write_value_label_names() + self._write_variable_labels() + self._write_expansion_fields() + self._write_characteristics() + self._prepare_data() + self._write_data() + self._write_strls() + self._write_value_labels() + self._write_file_close_tag() + self._write_map() + except Exception as exc: + self._close() + if self._own_file: + try: + os.unlink(self._fname) + except OSError: + warnings.warn( + f"This save was not successful but {self._fname} could not " + "be deleted. This file is not valid.", + ResourceWarning, + ) + raise exc + else: + self._close() + + def _close(self): + """ + Close the file if it was created by the writer. + + If a buffer or file-like object was passed in, for example a GzipFile, + then leave this file open for the caller to close. In either case, + attempt to flush the file contents to ensure they are written to disk + (if supported) + """ + # Some file-like objects might not support flush + try: + self._file.flush() + except AttributeError: + pass + if self._own_file: + self._file.close() + + def _write_map(self): + """No-op, future compatibility""" + pass + + def _write_file_close_tag(self): + """No-op, future compatibility""" + pass + + def _write_characteristics(self): + """No-op, future compatibility""" + pass + + def _write_strls(self): + """No-op, future compatibility""" + pass + + def _write_expansion_fields(self): + """Write 5 zeros for expansion fields""" + self._write(_pad_bytes("", 5)) + + def _write_value_labels(self): + for vl in self._value_labels: + self._file.write(vl.generate_value_label(self._byteorder)) + + def _write_header(self, data_label=None, time_stamp=None): + byteorder = self._byteorder + # ds_format - just use 114 + self._file.write(struct.pack("b", 114)) + # byteorder + self._write(byteorder == ">" and "\x01" or "\x02") + # filetype + self._write("\x01") + # unused + self._write("\x00") + # number of vars, 2 bytes + self._file.write(struct.pack(byteorder + "h", self.nvar)[:2]) + # number of obs, 4 bytes + self._file.write(struct.pack(byteorder + "i", self.nobs)[:4]) + # data label 81 bytes, char, null terminated + if data_label is None: + self._file.write(self._null_terminate(_pad_bytes("", 80))) + else: + self._file.write(self._null_terminate(_pad_bytes(data_label[:80], 80))) + # time stamp, 18 bytes, char, null terminated + # format dd Mon yyyy hh:mm + if time_stamp is None: + time_stamp = datetime.datetime.now() + elif not isinstance(time_stamp, datetime.datetime): + raise ValueError("time_stamp should be datetime type") + # GH #13856 + # Avoid locale-specific month conversion + months = [ + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + ] + month_lookup = {i + 1: month for i, month in enumerate(months)} + ts = ( + time_stamp.strftime("%d ") + + month_lookup[time_stamp.month] + + time_stamp.strftime(" %Y %H:%M") + ) + self._file.write(self._null_terminate(ts)) + + def _write_variable_types(self): + for typ in self.typlist: + self._file.write(struct.pack("B", typ)) + + def _write_varnames(self): + # varlist names are checked by _check_column_names + # varlist, requires null terminated + for name in self.varlist: + name = self._null_terminate(name, True) + name = _pad_bytes(name[:32], 33) + self._write(name) + + def _write_sortlist(self): + # srtlist, 2*(nvar+1), int array, encoded by byteorder + srtlist = _pad_bytes("", 2 * (self.nvar + 1)) + self._write(srtlist) + + def _write_formats(self): + # fmtlist, 49*nvar, char array + for fmt in self.fmtlist: + self._write(_pad_bytes(fmt, 49)) + + def _write_value_label_names(self): + # lbllist, 33*nvar, char array + for i in range(self.nvar): + # Use variable name when categorical + if self._is_col_cat[i]: + name = self.varlist[i] + name = self._null_terminate(name, True) + name = _pad_bytes(name[:32], 33) + self._write(name) + else: # Default is empty label + self._write(_pad_bytes("", 33)) + + def _write_variable_labels(self): + # Missing labels are 80 blank characters plus null termination + blank = _pad_bytes("", 81) + + if self._variable_labels is None: + for i in range(self.nvar): + self._write(blank) + return + + for col in self.data: + if col in self._variable_labels: + label = self._variable_labels[col] + if len(label) > 80: + raise ValueError("Variable labels must be 80 characters or fewer") + is_latin1 = all(ord(c) < 256 for c in label) + if not is_latin1: + raise ValueError( + "Variable labels must contain only characters that " + "can be encoded in Latin-1" + ) + self._write(_pad_bytes(label, 81)) + else: + self._write(blank) + + def _convert_strls(self, data): + """No-op, future compatibility""" + return data + + def _prepare_data(self): + data = self.data + typlist = self.typlist + convert_dates = self._convert_dates + # 1. Convert dates + if self._convert_dates is not None: + for i, col in enumerate(data): + if i in convert_dates: + data[col] = _datetime_to_stata_elapsed_vec( + data[col], self.fmtlist[i] + ) + # 2. Convert strls + data = self._convert_strls(data) + + # 3. Convert bad string data to '' and pad to correct length + dtypes = {} + native_byteorder = self._byteorder == _set_endianness(sys.byteorder) + for i, col in enumerate(data): + typ = typlist[i] + if typ <= self._max_string_length: + data[col] = data[col].fillna("").apply(_pad_bytes, args=(typ,)) + stype = f"S{typ}" + dtypes[col] = stype + data[col] = data[col].astype(stype) + else: + dtype = data[col].dtype + if not native_byteorder: + dtype = dtype.newbyteorder(self._byteorder) + dtypes[col] = dtype + + self.data = data.to_records(index=False, column_dtypes=dtypes) + + def _write_data(self): + data = self.data + self._file.write(data.tobytes()) + + def _null_terminate(self, s, as_string=False): + null_byte = "\x00" + s += null_byte + + if not as_string: + s = s.encode(self._encoding) + + return s + + +def _dtype_to_stata_type_117(dtype, column, force_strl): + """ + Converts dtype types to stata types. Returns the byte of the given ordinal. + See TYPE_MAP and comments for an explanation. This is also explained in + the dta spec. + 1 - 2045 are strings of this length + Pandas Stata + 32768 - for object strL + 65526 - for int8 byte + 65527 - for int16 int + 65528 - for int32 long + 65529 - for float32 float + 65530 - for double double + + If there are dates to convert, then dtype will already have the correct + type inserted. + """ + # TODO: expand to handle datetime to integer conversion + if force_strl: + return 32768 + if dtype.type == np.object_: # try to coerce it to the biggest string + # not memory efficient, what else could we + # do? + itemsize = max_len_string_array(ensure_object(column.values)) + itemsize = max(itemsize, 1) + if itemsize <= 2045: + return itemsize + return 32768 + elif dtype == np.float64: + return 65526 + elif dtype == np.float32: + return 65527 + elif dtype == np.int32: + return 65528 + elif dtype == np.int16: + return 65529 + elif dtype == np.int8: + return 65530 + else: # pragma : no cover + raise NotImplementedError(f"Data type {dtype} not supported.") + + +def _pad_bytes_new(name, length): + """ + Takes a bytes instance and pads it with null bytes until it's length chars. + """ + if isinstance(name, str): + name = bytes(name, "utf-8") + return name + b"\x00" * (length - len(name)) + + +class StataStrLWriter: + """ + Converter for Stata StrLs + + Stata StrLs map 8 byte values to strings which are stored using a + dictionary-like format where strings are keyed to two values. + + Parameters + ---------- + df : DataFrame + DataFrame to convert + columns : list + List of columns names to convert to StrL + version : int, optional + dta version. Currently supports 117, 118 and 119 + byteorder : str, optional + Can be ">", "<", "little", or "big". default is `sys.byteorder` + + Notes + ----- + Supports creation of the StrL block of a dta file for dta versions + 117, 118 and 119. These differ in how the GSO is stored. 118 and + 119 store the GSO lookup value as a uint32 and a uint64, while 117 + uses two uint32s. 118 and 119 also encode all strings as unicode + which is required by the format. 117 uses 'latin-1' a fixed width + encoding that extends the 7-bit ascii table with an additional 128 + characters. + """ + + def __init__(self, df, columns, version=117, byteorder=None): + if version not in (117, 118, 119): + raise ValueError("Only dta versions 117, 118 and 119 supported") + self._dta_ver = version + + self.df = df + self.columns = columns + self._gso_table = {"": (0, 0)} + if byteorder is None: + byteorder = sys.byteorder + self._byteorder = _set_endianness(byteorder) + + gso_v_type = "I" # uint32 + gso_o_type = "Q" # uint64 + self._encoding = "utf-8" + if version == 117: + o_size = 4 + gso_o_type = "I" # 117 used uint32 + self._encoding = "latin-1" + elif version == 118: + o_size = 6 + else: # version == 119 + o_size = 5 + self._o_offet = 2 ** (8 * (8 - o_size)) + self._gso_o_type = gso_o_type + self._gso_v_type = gso_v_type + + def _convert_key(self, key): + v, o = key + return v + self._o_offet * o + + def generate_table(self): + """ + Generates the GSO lookup table for the DataFrame + + Returns + ------- + gso_table : dict + Ordered dictionary using the string found as keys + and their lookup position (v,o) as values + gso_df : DataFrame + DataFrame where strl columns have been converted to + (v,o) values + + Notes + ----- + Modifies the DataFrame in-place. + + The DataFrame returned encodes the (v,o) values as uint64s. The + encoding depends on the dta version, and can be expressed as + + enc = v + o * 2 ** (o_size * 8) + + so that v is stored in the lower bits and o is in the upper + bits. o_size is + + * 117: 4 + * 118: 6 + * 119: 5 + """ + + gso_table = self._gso_table + gso_df = self.df + columns = list(gso_df.columns) + selected = gso_df[self.columns] + col_index = [(col, columns.index(col)) for col in self.columns] + keys = np.empty(selected.shape, dtype=np.uint64) + for o, (idx, row) in enumerate(selected.iterrows()): + for j, (col, v) in enumerate(col_index): + val = row[col] + # Allow columns with mixed str and None (GH 23633) + val = "" if val is None else val + key = gso_table.get(val, None) + if key is None: + # Stata prefers human numbers + key = (v + 1, o + 1) + gso_table[val] = key + keys[o, j] = self._convert_key(key) + for i, col in enumerate(self.columns): + gso_df[col] = keys[:, i] + + return gso_table, gso_df + + def generate_blob(self, gso_table): + """ + Generates the binary blob of GSOs that is written to the dta file. + + Parameters + ---------- + gso_table : dict + Ordered dictionary (str, vo) + + Returns + ------- + gso : bytes + Binary content of dta file to be placed between strl tags + + Notes + ----- + Output format depends on dta version. 117 uses two uint32s to + express v and o while 118+ uses a uint32 for v and a uint64 for o. + """ + # Format information + # Length includes null term + # 117 + # GSOvvvvooootllllxxxxxxxxxxxxxxx...x + # 3 u4 u4 u1 u4 string + null term + # + # 118, 119 + # GSOvvvvooooooootllllxxxxxxxxxxxxxxx...x + # 3 u4 u8 u1 u4 string + null term + + bio = BytesIO() + gso = bytes("GSO", "ascii") + gso_type = struct.pack(self._byteorder + "B", 130) + null = struct.pack(self._byteorder + "B", 0) + v_type = self._byteorder + self._gso_v_type + o_type = self._byteorder + self._gso_o_type + len_type = self._byteorder + "I" + for strl, vo in gso_table.items(): + if vo == (0, 0): + continue + v, o = vo + + # GSO + bio.write(gso) + + # vvvv + bio.write(struct.pack(v_type, v)) + + # oooo / oooooooo + bio.write(struct.pack(o_type, o)) + + # t + bio.write(gso_type) + + # llll + utf8_string = bytes(strl, "utf-8") + bio.write(struct.pack(len_type, len(utf8_string) + 1)) + + # xxx...xxx + bio.write(utf8_string) + bio.write(null) + + bio.seek(0) + return bio.read() + + +class StataWriter117(StataWriter): + """ + A class for writing Stata binary dta files in Stata 13 format (117) + + .. versionadded:: 0.23.0 + + Parameters + ---------- + fname : path (string), buffer or path object + string, path object (pathlib.Path or py._path.local.LocalPath) or + object implementing a binary write() functions. If using a buffer + then the buffer will not be automatically closed after the file + is written. + data : DataFrame + Input to save + convert_dates : dict + Dictionary mapping columns containing datetime types to stata internal + format to use when writing the dates. Options are 'tc', 'td', 'tm', + 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. + Datetime columns that do not have a conversion type specified will be + converted to 'tc'. Raises NotImplementedError if a datetime column has + timezone information + write_index : bool + Write the index to Stata dataset. + byteorder : str + Can be ">", "<", "little", or "big". default is `sys.byteorder` + time_stamp : datetime + A datetime to use as file creation date. Default is the current time + data_label : str + A label for the data set. Must be 80 characters or smaller. + variable_labels : dict + Dictionary containing columns as keys and variable labels as values. + Each label must be 80 characters or smaller. + convert_strl : list + List of columns names to convert to Stata StrL format. Columns with + more than 2045 characters are automatically written as StrL. + Smaller columns can be converted by including the column name. Using + StrLs can reduce output file size when strings are longer than 8 + characters, and either frequently repeated or sparse. + + Returns + ------- + writer : StataWriter117 instance + The StataWriter117 instance has a write_file method, which will + write the file to the given `fname`. + + Raises + ------ + NotImplementedError + * If datetimes contain timezone information + ValueError + * Columns listed in convert_dates are neither datetime64[ns] + or datetime.datetime + * Column dtype is not representable in Stata + * Column listed in convert_dates is not in DataFrame + * Categorical label contains more than 32,000 characters + + Examples + -------- + >>> from pandas.io.stata import StataWriter117 + >>> data = pd.DataFrame([[1.0, 1, 'a']], columns=['a', 'b', 'c']) + >>> writer = StataWriter117('./data_file.dta', data) + >>> writer.write_file() + + Or with long strings stored in strl format + + >>> data = pd.DataFrame([['A relatively long string'], [''], ['']], + ... columns=['strls']) + >>> writer = StataWriter117('./data_file_with_long_strings.dta', data, + ... convert_strl=['strls']) + >>> writer.write_file() + """ + + _max_string_length = 2045 + _dta_version = 117 + + def __init__( + self, + fname, + data, + convert_dates=None, + write_index=True, + byteorder=None, + time_stamp=None, + data_label=None, + variable_labels=None, + convert_strl=None, + ): + # Shallow copy since convert_strl might be modified later + self._convert_strl = [] if convert_strl is None else convert_strl[:] + + super().__init__( + fname, + data, + convert_dates, + write_index, + byteorder=byteorder, + time_stamp=time_stamp, + data_label=data_label, + variable_labels=variable_labels, + ) + self._map = None + self._strl_blob = None + + @staticmethod + def _tag(val, tag): + """Surround val with """ + if isinstance(val, str): + val = bytes(val, "utf-8") + return bytes("<" + tag + ">", "utf-8") + val + bytes("", "utf-8") + + def _update_map(self, tag): + """Update map location for tag with file position""" + self._map[tag] = self._file.tell() + + def _write_header(self, data_label=None, time_stamp=None): + """Write the file header""" + byteorder = self._byteorder + self._file.write(bytes("", "utf-8")) + bio = BytesIO() + # ds_format - 117 + bio.write(self._tag(bytes(str(self._dta_version), "utf-8"), "release")) + # byteorder + bio.write(self._tag(byteorder == ">" and "MSF" or "LSF", "byteorder")) + # number of vars, 2 bytes in 117 and 118, 4 byte in 119 + nvar_type = "H" if self._dta_version <= 118 else "I" + bio.write(self._tag(struct.pack(byteorder + nvar_type, self.nvar), "K")) + # 117 uses 4 bytes, 118 uses 8 + nobs_size = "I" if self._dta_version == 117 else "Q" + bio.write(self._tag(struct.pack(byteorder + nobs_size, self.nobs), "N")) + # data label 81 bytes, char, null terminated + label = data_label[:80] if data_label is not None else "" + label = label.encode(self._encoding) + label_size = "B" if self._dta_version == 117 else "H" + label_len = struct.pack(byteorder + label_size, len(label)) + label = label_len + label + bio.write(self._tag(label, "label")) + # time stamp, 18 bytes, char, null terminated + # format dd Mon yyyy hh:mm + if time_stamp is None: + time_stamp = datetime.datetime.now() + elif not isinstance(time_stamp, datetime.datetime): + raise ValueError("time_stamp should be datetime type") + # Avoid locale-specific month conversion + months = [ + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + ] + month_lookup = {i + 1: month for i, month in enumerate(months)} + ts = ( + time_stamp.strftime("%d ") + + month_lookup[time_stamp.month] + + time_stamp.strftime(" %Y %H:%M") + ) + # '\x11' added due to inspection of Stata file + ts = b"\x11" + bytes(ts, "utf-8") + bio.write(self._tag(ts, "timestamp")) + bio.seek(0) + self._file.write(self._tag(bio.read(), "header")) + + def _write_map(self): + """Called twice during file write. The first populates the values in + the map with 0s. The second call writes the final map locations when + all blocks have been written.""" + if self._map is None: + self._map = dict( + ( + ("stata_data", 0), + ("map", self._file.tell()), + ("variable_types", 0), + ("varnames", 0), + ("sortlist", 0), + ("formats", 0), + ("value_label_names", 0), + ("variable_labels", 0), + ("characteristics", 0), + ("data", 0), + ("strls", 0), + ("value_labels", 0), + ("stata_data_close", 0), + ("end-of-file", 0), + ) + ) + # Move to start of map + self._file.seek(self._map["map"]) + bio = BytesIO() + for val in self._map.values(): + bio.write(struct.pack(self._byteorder + "Q", val)) + bio.seek(0) + self._file.write(self._tag(bio.read(), "map")) + + def _write_variable_types(self): + self._update_map("variable_types") + bio = BytesIO() + for typ in self.typlist: + bio.write(struct.pack(self._byteorder + "H", typ)) + bio.seek(0) + self._file.write(self._tag(bio.read(), "variable_types")) + + def _write_varnames(self): + self._update_map("varnames") + bio = BytesIO() + # 118 scales by 4 to accommodate utf-8 data worst case encoding + vn_len = 32 if self._dta_version == 117 else 128 + for name in self.varlist: + name = self._null_terminate(name, True) + name = _pad_bytes_new(name[:32].encode(self._encoding), vn_len + 1) + bio.write(name) + bio.seek(0) + self._file.write(self._tag(bio.read(), "varnames")) + + def _write_sortlist(self): + self._update_map("sortlist") + sort_size = 2 if self._dta_version < 119 else 4 + self._file.write(self._tag(b"\x00" * sort_size * (self.nvar + 1), "sortlist")) + + def _write_formats(self): + self._update_map("formats") + bio = BytesIO() + fmt_len = 49 if self._dta_version == 117 else 57 + for fmt in self.fmtlist: + bio.write(_pad_bytes_new(fmt.encode(self._encoding), fmt_len)) + bio.seek(0) + self._file.write(self._tag(bio.read(), "formats")) + + def _write_value_label_names(self): + self._update_map("value_label_names") + bio = BytesIO() + # 118 scales by 4 to accommodate utf-8 data worst case encoding + vl_len = 32 if self._dta_version == 117 else 128 + for i in range(self.nvar): + # Use variable name when categorical + name = "" # default name + if self._is_col_cat[i]: + name = self.varlist[i] + name = self._null_terminate(name, True) + name = _pad_bytes_new(name[:32].encode(self._encoding), vl_len + 1) + bio.write(name) + bio.seek(0) + self._file.write(self._tag(bio.read(), "value_label_names")) + + def _write_variable_labels(self): + # Missing labels are 80 blank characters plus null termination + self._update_map("variable_labels") + bio = BytesIO() + # 118 scales by 4 to accommodate utf-8 data worst case encoding + vl_len = 80 if self._dta_version == 117 else 320 + blank = _pad_bytes_new("", vl_len + 1) + + if self._variable_labels is None: + for _ in range(self.nvar): + bio.write(blank) + bio.seek(0) + self._file.write(self._tag(bio.read(), "variable_labels")) + return + + for col in self.data: + if col in self._variable_labels: + label = self._variable_labels[col] + if len(label) > 80: + raise ValueError("Variable labels must be 80 characters or fewer") + try: + encoded = label.encode(self._encoding) + except UnicodeEncodeError: + raise ValueError( + "Variable labels must contain only characters that " + f"can be encoded in {self._encoding}" + ) + + bio.write(_pad_bytes_new(encoded, vl_len + 1)) + else: + bio.write(blank) + bio.seek(0) + self._file.write(self._tag(bio.read(), "variable_labels")) + + def _write_characteristics(self): + self._update_map("characteristics") + self._file.write(self._tag(b"", "characteristics")) + + def _write_data(self): + self._update_map("data") + data = self.data + self._file.write(b"") + self._file.write(data.tobytes()) + self._file.write(b"") + + def _write_strls(self): + self._update_map("strls") + strls = b"" + if self._strl_blob is not None: + strls = self._strl_blob + self._file.write(self._tag(strls, "strls")) + + def _write_expansion_fields(self): + """No-op in dta 117+""" + pass + + def _write_value_labels(self): + self._update_map("value_labels") + bio = BytesIO() + for vl in self._value_labels: + lab = vl.generate_value_label(self._byteorder) + lab = self._tag(lab, "lbl") + bio.write(lab) + bio.seek(0) + self._file.write(self._tag(bio.read(), "value_labels")) + + def _write_file_close_tag(self): + self._update_map("stata_data_close") + self._file.write(bytes("", "utf-8")) + self._update_map("end-of-file") + + def _update_strl_names(self): + """Update column names for conversion to strl if they might have been + changed to comply with Stata naming rules""" + # Update convert_strl if names changed + for orig, new in self._converted_names.items(): + if orig in self._convert_strl: + idx = self._convert_strl.index(orig) + self._convert_strl[idx] = new + + def _convert_strls(self, data): + """Convert columns to StrLs if either very large or in the + convert_strl variable""" + convert_cols = [ + col + for i, col in enumerate(data) + if self.typlist[i] == 32768 or col in self._convert_strl + ] + + if convert_cols: + ssw = StataStrLWriter(data, convert_cols, version=self._dta_version) + tab, new_data = ssw.generate_table() + data = new_data + self._strl_blob = ssw.generate_blob(tab) + return data + + def _set_formats_and_types(self, dtypes): + self.typlist = [] + self.fmtlist = [] + for col, dtype in dtypes.items(): + force_strl = col in self._convert_strl + fmt = _dtype_to_default_stata_fmt( + dtype, + self.data[col], + dta_version=self._dta_version, + force_strl=force_strl, + ) + self.fmtlist.append(fmt) + self.typlist.append( + _dtype_to_stata_type_117(dtype, self.data[col], force_strl) + ) + + +class StataWriterUTF8(StataWriter117): + """ + Stata binary dta file writing in Stata 15 (118) and 16 (119) formats + + DTA 118 and 119 format files support unicode string data (both fixed + and strL) format. Unicode is also supported in value labels, variable + labels and the dataset label. Format 119 is automatically used if the + file contains more than 32,767 variables. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + fname : path (string), buffer or path object + string, path object (pathlib.Path or py._path.local.LocalPath) or + object implementing a binary write() functions. If using a buffer + then the buffer will not be automatically closed after the file + is written. + data : DataFrame + Input to save + convert_dates : dict, default None + Dictionary mapping columns containing datetime types to stata internal + format to use when writing the dates. Options are 'tc', 'td', 'tm', + 'tw', 'th', 'tq', 'ty'. Column can be either an integer or a name. + Datetime columns that do not have a conversion type specified will be + converted to 'tc'. Raises NotImplementedError if a datetime column has + timezone information + write_index : bool, default True + Write the index to Stata dataset. + byteorder : str, default None + Can be ">", "<", "little", or "big". default is `sys.byteorder` + time_stamp : datetime, default None + A datetime to use as file creation date. Default is the current time + data_label : str, default None + A label for the data set. Must be 80 characters or smaller. + variable_labels : dict, default None + Dictionary containing columns as keys and variable labels as values. + Each label must be 80 characters or smaller. + convert_strl : list, default None + List of columns names to convert to Stata StrL format. Columns with + more than 2045 characters are automatically written as StrL. + Smaller columns can be converted by including the column name. Using + StrLs can reduce output file size when strings are longer than 8 + characters, and either frequently repeated or sparse. + version : int, default None + The dta version to use. By default, uses the size of data to determine + the version. 118 is used if data.shape[1] <= 32767, and 119 is used + for storing larger DataFrames. + + Returns + ------- + StataWriterUTF8 + The instance has a write_file method, which will write the file to the + given `fname`. + + Raises + ------ + NotImplementedError + * If datetimes contain timezone information + ValueError + * Columns listed in convert_dates are neither datetime64[ns] + or datetime.datetime + * Column dtype is not representable in Stata + * Column listed in convert_dates is not in DataFrame + * Categorical label contains more than 32,000 characters + + Examples + -------- + Using Unicode data and column names + + >>> from pandas.io.stata import StataWriterUTF8 + >>> data = pd.DataFrame([[1.0, 1, 'ᴬ']], columns=['a', 'β', 'ĉ']) + >>> writer = StataWriterUTF8('./data_file.dta', data) + >>> writer.write_file() + + Or with long strings stored in strl format + + >>> data = pd.DataFrame([['ᴀ relatively long ŝtring'], [''], ['']], + ... columns=['strls']) + >>> writer = StataWriterUTF8('./data_file_with_long_strings.dta', data, + ... convert_strl=['strls']) + >>> writer.write_file() + """ + + _encoding = "utf-8" + + def __init__( + self, + fname: FilePathOrBuffer, + data: DataFrame, + convert_dates: Optional[Dict[Hashable, str]] = None, + write_index: bool = True, + byteorder: Optional[str] = None, + time_stamp: Optional[datetime.datetime] = None, + data_label: Optional[str] = None, + variable_labels: Optional[Dict[Hashable, str]] = None, + convert_strl: Optional[Sequence[Hashable]] = None, + version: Optional[int] = None, + ): + if version is None: + version = 118 if data.shape[1] <= 32767 else 119 + elif version not in (118, 119): + raise ValueError("version must be either 118 or 119.") + elif version == 118 and data.shape[1] > 32767: + raise ValueError( + "You must use version 119 for data sets containing more than" + "32,767 variables" + ) + + super().__init__( + fname, + data, + convert_dates=convert_dates, + write_index=write_index, + byteorder=byteorder, + time_stamp=time_stamp, + data_label=data_label, + variable_labels=variable_labels, + convert_strl=convert_strl, + ) + # Override version set in StataWriter117 init + self._dta_version = version + + def _validate_variable_name(self, name: str) -> str: + """ + Validate variable names for Stata export. + + Parameters + ---------- + name : str + Variable name + + Returns + ------- + str + The validated name with invalid characters replaced with + underscores. + + Notes + ----- + Stata 118+ support most unicode characters. The only limitation is in + the ascii range where the characters supported are a-z, A-Z, 0-9 and _. + """ + # High code points appear to be acceptable + for c in name: + if ( + ord(c) < 128 + and (c < "A" or c > "Z") + and (c < "a" or c > "z") + and (c < "0" or c > "9") + and c != "_" + ) or 128 <= ord(c) < 256: + name = name.replace(c, "_") + + return name diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py new file mode 100644 index 00000000..55c861e3 --- /dev/null +++ b/pandas/plotting/__init__.py @@ -0,0 +1,98 @@ +""" +Plotting public API. + +Authors of third-party plotting backends should implement a module with a +public ``plot(data, kind, **kwargs)``. The parameter `data` will contain +the data structure and can be a `Series` or a `DataFrame`. For example, +for ``df.plot()`` the parameter `data` will contain the DataFrame `df`. +In some cases, the data structure is transformed before being sent to +the backend (see PlotAccessor.__call__ in pandas/plotting/_core.py for +the exact transformations). + +The parameter `kind` will be one of: + +- line +- bar +- barh +- box +- hist +- kde +- area +- pie +- scatter +- hexbin + +See the pandas API reference for documentation on each kind of plot. + +Any other keyword argument is currently assumed to be backend specific, +but some parameters may be unified and added to the signature in the +future (e.g. `title` which should be useful for any backend). + +Currently, all the Matplotlib functions in pandas are accessed through +the selected backend. For example, `pandas.plotting.boxplot` (equivalent +to `DataFrame.boxplot`) is also accessed in the selected backend. This +is expected to change, and the exact API is under discussion. But with +the current version, backends are expected to implement the next functions: + +- plot (describe above, used for `Series.plot` and `DataFrame.plot`) +- hist_series and hist_frame (for `Series.hist` and `DataFrame.hist`) +- boxplot (`pandas.plotting.boxplot(df)` equivalent to `DataFrame.boxplot`) +- boxplot_frame and boxplot_frame_groupby +- register and deregister (register converters for the tick formats) +- Plots not called as `Series` and `DataFrame` methods: + - table + - andrews_curves + - autocorrelation_plot + - bootstrap_plot + - lag_plot + - parallel_coordinates + - radviz + - scatter_matrix + +Use the code in pandas/plotting/_matplotib.py and +https://github.com/pyviz/hvplot as a reference on how to write a backend. + +For the discussion about the API see +https://github.com/pandas-dev/pandas/issues/26747. +""" +from pandas.plotting._core import ( + PlotAccessor, + boxplot, + boxplot_frame, + boxplot_frame_groupby, + hist_frame, + hist_series, +) +from pandas.plotting._misc import ( + andrews_curves, + autocorrelation_plot, + bootstrap_plot, + deregister as deregister_matplotlib_converters, + lag_plot, + parallel_coordinates, + plot_params, + radviz, + register as register_matplotlib_converters, + scatter_matrix, + table, +) + +__all__ = [ + "PlotAccessor", + "boxplot", + "boxplot_frame", + "boxplot_frame_groupby", + "hist_frame", + "hist_series", + "scatter_matrix", + "radviz", + "andrews_curves", + "bootstrap_plot", + "parallel_coordinates", + "lag_plot", + "autocorrelation_plot", + "table", + "plot_params", + "register_matplotlib_converters", + "deregister_matplotlib_converters", +] diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py new file mode 100644 index 00000000..c239f11d --- /dev/null +++ b/pandas/plotting/_core.py @@ -0,0 +1,1678 @@ +import importlib + +from pandas._config import get_option + +from pandas.util._decorators import Appender, Substitution + +from pandas.core.dtypes.common import is_integer, is_list_like +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries + +from pandas.core.base import PandasObject + + +def hist_series( + self, + by=None, + ax=None, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + figsize=None, + bins=10, + backend=None, + **kwargs, +): + """ + Draw histogram of the input series using matplotlib. + + Parameters + ---------- + by : object, optional + If passed, then used to form histograms for separate groups. + ax : matplotlib axis object + If not passed, uses gca(). + grid : bool, default True + Whether to show axis grid lines. + xlabelsize : int, default None + If specified changes the x-axis label size. + xrot : float, default None + Rotation of x axis labels. + ylabelsize : int, default None + If specified changes the y-axis label size. + yrot : float, default None + Rotation of y axis labels. + figsize : tuple, default None + Figure size in inches by default. + bins : int or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 + + **kwargs + To be passed to the actual plotting function. + + Returns + ------- + matplotlib.AxesSubplot + A histogram plot. + + See Also + -------- + matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. + """ + plot_backend = _get_plot_backend(backend) + return plot_backend.hist_series( + self, + by=by, + ax=ax, + grid=grid, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + figsize=figsize, + bins=bins, + **kwargs, + ) + + +def hist_frame( + data, + column=None, + by=None, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + ax=None, + sharex=False, + sharey=False, + figsize=None, + layout=None, + bins=10, + backend=None, + **kwargs, +): + """ + Make a histogram of the DataFrame's. + + A `histogram`_ is a representation of the distribution of data. + This function calls :meth:`matplotlib.pyplot.hist`, on each series in + the DataFrame, resulting in one histogram per column. + + .. _histogram: https://en.wikipedia.org/wiki/Histogram + + Parameters + ---------- + data : DataFrame + The pandas object holding the data. + column : str or sequence + If passed, will be used to limit data to a subset of columns. + by : object, optional + If passed, then used to form histograms for separate groups. + grid : bool, default True + Whether to show axis grid lines. + xlabelsize : int, default None + If specified changes the x-axis label size. + xrot : float, default None + Rotation of x axis labels. For example, a value of 90 displays the + x labels rotated 90 degrees clockwise. + ylabelsize : int, default None + If specified changes the y-axis label size. + yrot : float, default None + Rotation of y axis labels. For example, a value of 90 displays the + y labels rotated 90 degrees clockwise. + ax : Matplotlib axes object, default None + The axes to plot the histogram on. + sharex : bool, default True if ax is None else False + In case subplots=True, share x axis and set some x axis labels to + invisible; defaults to True if ax is None otherwise False if an ax + is passed in. + Note that passing in both an ax and sharex=True will alter all x axis + labels for all subplots in a figure. + sharey : bool, default False + In case subplots=True, share y axis and set some y axis labels to + invisible. + figsize : tuple + The size in inches of the figure to create. Uses the value in + `matplotlib.rcParams` by default. + layout : tuple, optional + Tuple of (rows, columns) for the layout of the histograms. + bins : int or sequence, default 10 + Number of histogram bins to be used. If an integer is given, bins + 1 + bin edges are calculated and returned. If bins is a sequence, gives + bin edges, including left edge of first bin and right edge of last + bin. In this case, bins is returned unmodified. + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 + + **kwargs + All other plotting keyword arguments to be passed to + :meth:`matplotlib.pyplot.hist`. + + Returns + ------- + matplotlib.AxesSubplot or numpy.ndarray of them + + See Also + -------- + matplotlib.pyplot.hist : Plot a histogram using matplotlib. + + Examples + -------- + + .. plot:: + :context: close-figs + + This example draws a histogram based on the length and width of + some animals, displayed in three bins + + >>> df = pd.DataFrame({ + ... 'length': [1.5, 0.5, 1.2, 0.9, 3], + ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] + ... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse']) + >>> hist = df.hist(bins=3) + """ + plot_backend = _get_plot_backend(backend) + return plot_backend.hist_frame( + data, + column=column, + by=by, + grid=grid, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + ax=ax, + sharex=sharex, + sharey=sharey, + figsize=figsize, + layout=layout, + bins=bins, + **kwargs, + ) + + +_boxplot_doc = """ +Make a box plot from DataFrame columns. + +Make a box-and-whisker plot from DataFrame columns, optionally grouped +by some other columns. A box plot is a method for graphically depicting +groups of numerical data through their quartiles. +The box extends from the Q1 to Q3 quartile values of the data, +with a line at the median (Q2). The whiskers extend from the edges +of box to show the range of the data. The position of the whiskers +is set by default to `1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box. +Outlier points are those past the end of the whiskers. + +For further details see +Wikipedia's entry for `boxplot `_. + +Parameters +---------- +column : str or list of str, optional + Column name or list of names, or vector. + Can be any valid input to :meth:`pandas.DataFrame.groupby`. +by : str or array-like, optional + Column in the DataFrame to :meth:`pandas.DataFrame.groupby`. + One box-plot will be done per value of columns in `by`. +ax : object of class matplotlib.axes.Axes, optional + The matplotlib axes to be used by boxplot. +fontsize : float or str + Tick label font size in points or as a string (e.g., `large`). +rot : int or float, default 0 + The rotation angle of labels (in degrees) + with respect to the screen coordinate system. +grid : bool, default True + Setting this to True will show the grid. +figsize : A tuple (width, height) in inches + The size of the figure to create in matplotlib. +layout : tuple (rows, columns), optional + For example, (3, 5) will display the subplots + using 3 columns and 5 rows, starting from the top-left. +return_type : {'axes', 'dict', 'both'} or None, default 'axes' + The kind of object to return. The default is ``axes``. + + * 'axes' returns the matplotlib axes the boxplot is drawn on. + * 'dict' returns a dictionary whose values are the matplotlib + Lines of the boxplot. + * 'both' returns a namedtuple with the axes and dict. + * when grouping with ``by``, a Series mapping columns to + ``return_type`` is returned. + + If ``return_type`` is `None`, a NumPy array + of axes with the same shape as ``layout`` is returned. +%(backend)s\ + +**kwargs + All other plotting keyword arguments to be passed to + :func:`matplotlib.pyplot.boxplot`. + +Returns +------- +result + See Notes. + +See Also +-------- +Series.plot.hist: Make a histogram. +matplotlib.pyplot.boxplot : Matplotlib equivalent plot. + +Notes +----- +The return type depends on the `return_type` parameter: + +* 'axes' : object of class matplotlib.axes.Axes +* 'dict' : dict of matplotlib.lines.Line2D objects +* 'both' : a namedtuple with structure (ax, lines) + +For data grouped with ``by``, return a Series of the above or a numpy +array: + +* :class:`~pandas.Series` +* :class:`~numpy.array` (for ``return_type = None``) + +Use ``return_type='dict'`` when you want to tweak the appearance +of the lines after plotting. In this case a dict containing the Lines +making up the boxes, caps, fliers, medians, and whiskers is returned. + +Examples +-------- + +Boxplots can be created for every column in the dataframe +by ``df.boxplot()`` or indicating the columns to be used: + +.. plot:: + :context: close-figs + + >>> np.random.seed(1234) + >>> df = pd.DataFrame(np.random.randn(10, 4), + ... columns=['Col1', 'Col2', 'Col3', 'Col4']) + >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) + +Boxplots of variables distributions grouped by the values of a third +variable can be created using the option ``by``. For instance: + +.. plot:: + :context: close-figs + + >>> df = pd.DataFrame(np.random.randn(10, 2), + ... columns=['Col1', 'Col2']) + >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', + ... 'B', 'B', 'B', 'B', 'B']) + >>> boxplot = df.boxplot(by='X') + +A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot +in order to group the data by combination of the variables in the x-axis: + +.. plot:: + :context: close-figs + + >>> df = pd.DataFrame(np.random.randn(10, 3), + ... columns=['Col1', 'Col2', 'Col3']) + >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', + ... 'B', 'B', 'B', 'B', 'B']) + >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', + ... 'B', 'A', 'B', 'A', 'B']) + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) + +The layout of boxplot can be adjusted giving a tuple to ``layout``: + +.. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... layout=(2, 1)) + +Additional formatting can be done to the boxplot, like suppressing the grid +(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) +or changing the fontsize (i.e. ``fontsize=15``): + +.. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) + +The parameter ``return_type`` can be used to select the type of element +returned by `boxplot`. When ``return_type='axes'`` is selected, +the matplotlib axes on which the boxplot is drawn are returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes') + >>> type(boxplot) + + +When grouping with ``by``, a Series mapping columns to ``return_type`` +is returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... return_type='axes') + >>> type(boxplot) + + +If ``return_type`` is `None`, a NumPy array of axes with the same shape +as ``layout`` is returned: + + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... return_type=None) + >>> type(boxplot) + +""" + +_backend_doc = """\ +backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 +""" + + +@Substitution(backend="") +@Appender(_boxplot_doc) +def boxplot( + data, + column=None, + by=None, + ax=None, + fontsize=None, + rot=0, + grid=True, + figsize=None, + layout=None, + return_type=None, + **kwargs, +): + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.boxplot( + data, + column=column, + by=by, + ax=ax, + fontsize=fontsize, + rot=rot, + grid=grid, + figsize=figsize, + layout=layout, + return_type=return_type, + **kwargs, + ) + + +@Substitution(backend=_backend_doc) +@Appender(_boxplot_doc) +def boxplot_frame( + self, + column=None, + by=None, + ax=None, + fontsize=None, + rot=0, + grid=True, + figsize=None, + layout=None, + return_type=None, + backend=None, + **kwargs, +): + plot_backend = _get_plot_backend(backend) + return plot_backend.boxplot_frame( + self, + column=column, + by=by, + ax=ax, + fontsize=fontsize, + rot=rot, + grid=grid, + figsize=figsize, + layout=layout, + return_type=return_type, + **kwargs, + ) + + +def boxplot_frame_groupby( + grouped, + subplots=True, + column=None, + fontsize=None, + rot=0, + grid=True, + ax=None, + figsize=None, + layout=None, + sharex=False, + sharey=True, + backend=None, + **kwargs, +): + """ + Make box plots from DataFrameGroupBy data. + + Parameters + ---------- + grouped : Grouped DataFrame + subplots : bool + * ``False`` - no subplots will be used + * ``True`` - create a subplot for each group. + + column : column name or list of names, or vector + Can be any valid input to groupby. + fontsize : int or str + rot : label rotation angle + grid : Setting this to True will show the grid + ax : Matplotlib axis object, default None + figsize : A tuple (width, height) in inches + layout : tuple (optional) + The layout of the plot: (rows, columns). + sharex : bool, default False + Whether x-axes will be shared among subplots. + + .. versionadded:: 0.23.1 + sharey : bool, default True + Whether y-axes will be shared among subplots. + + .. versionadded:: 0.23.1 + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 + + **kwargs + All other plotting keyword arguments to be passed to + matplotlib's boxplot function. + + Returns + ------- + dict of key/value = group key/DataFrame.boxplot return value + or DataFrame.boxplot return value in case subplots=figures=False + + Examples + -------- + >>> import itertools + >>> tuples = [t for t in itertools.product(range(1000), range(4))] + >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) + >>> data = np.random.randn(len(index),4) + >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index) + >>> + >>> grouped = df.groupby(level='lvl1') + >>> boxplot_frame_groupby(grouped) + >>> + >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) + >>> boxplot_frame_groupby(grouped, subplots=False) + """ + plot_backend = _get_plot_backend(backend) + return plot_backend.boxplot_frame_groupby( + grouped, + subplots=subplots, + column=column, + fontsize=fontsize, + rot=rot, + grid=grid, + ax=ax, + figsize=figsize, + layout=layout, + sharex=sharex, + sharey=sharey, + **kwargs, + ) + + +class PlotAccessor(PandasObject): + """ + Make plots of Series or DataFrame. + + Uses the backend specified by the + option ``plotting.backend``. By default, matplotlib is used. + + Parameters + ---------- + data : Series or DataFrame + The object for which the method is called. + x : label or position, default None + Only used if data is a DataFrame. + y : label, position or list of label, positions, default None + Allows plotting of one column versus another. Only used if data is a + DataFrame. + kind : str + The kind of plot to produce: + + - 'line' : line plot (default) + - 'bar' : vertical bar plot + - 'barh' : horizontal bar plot + - 'hist' : histogram + - 'box' : boxplot + - 'kde' : Kernel Density Estimation plot + - 'density' : same as 'kde' + - 'area' : area plot + - 'pie' : pie plot + - 'scatter' : scatter plot + - 'hexbin' : hexbin plot. + + figsize : a tuple (width, height) in inches + use_index : bool, default True + Use index as ticks for x axis. + title : str or list + Title to use for the plot. If a string is passed, print the string + at the top of the figure. If a list is passed and `subplots` is + True, print each item in the list above the corresponding subplot. + grid : bool, default None (matlab style default) + Axis grid lines. + legend : bool or {'reverse'} + Place legend on axis subplots. + style : list or dict + The matplotlib line style per column. + logx : bool or 'sym', default False + Use log scaling or symlog scaling on x axis. + .. versionchanged:: 0.25.0 + + logy : bool or 'sym' default False + Use log scaling or symlog scaling on y axis. + .. versionchanged:: 0.25.0 + + loglog : bool or 'sym', default False + Use log scaling or symlog scaling on both x and y axes. + .. versionchanged:: 0.25.0 + + xticks : sequence + Values to use for the xticks. + yticks : sequence + Values to use for the yticks. + xlim : 2-tuple/list + ylim : 2-tuple/list + rot : int, default None + Rotation for ticks (xticks for vertical, yticks for horizontal + plots). + fontsize : int, default None + Font size for xticks and yticks. + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that + name from matplotlib. + colorbar : bool, optional + If True, plot colorbar (only relevant for 'scatter' and 'hexbin' + plots). + position : float + Specify relative alignments for bar plot layout. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center). + table : bool, Series or DataFrame, default False + If True, draw a table using the data in the DataFrame and the data + will be transposed to meet matplotlib's default layout. + If a Series or DataFrame is passed, use passed data to draw a + table. + yerr : DataFrame, Series, array-like, dict and str + See :ref:`Plotting with Error Bars ` for + detail. + xerr : DataFrame, Series, array-like, dict and str + Equivalent to yerr. + mark_right : bool, default True + When using a secondary_y axis, automatically mark the column + labels with "(right)" in the legend. + include_bool : bool, default is False + If True, boolean values can be plotted. + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to + specify the ``plotting.backend`` for the whole session, set + ``pd.options.plotting.backend``. + + .. versionadded:: 1.0.0 + + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + If the backend is not the default matplotlib one, the return value + will be the object returned by the backend. + + Notes + ----- + - See matplotlib documentation online for more on this subject + - If `kind` = 'bar' or 'barh', you can specify relative alignments + for bar plot layout by `position` keyword. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 + (center) + """ + + _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box") + _series_kinds = ("pie",) + _dataframe_kinds = ("scatter", "hexbin") + _kind_aliases = {"density": "kde"} + _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds + + def __init__(self, data): + self._parent = data + + @staticmethod + def _get_call_args(backend_name, data, args, kwargs): + """ + This function makes calls to this accessor `__call__` method compatible + with the previous `SeriesPlotMethods.__call__` and + `DataFramePlotMethods.__call__`. Those had slightly different + signatures, since `DataFramePlotMethods` accepted `x` and `y` + parameters. + """ + if isinstance(data, ABCSeries): + arg_def = [ + ("kind", "line"), + ("ax", None), + ("figsize", None), + ("use_index", True), + ("title", None), + ("grid", None), + ("legend", False), + ("style", None), + ("logx", False), + ("logy", False), + ("loglog", False), + ("xticks", None), + ("yticks", None), + ("xlim", None), + ("ylim", None), + ("rot", None), + ("fontsize", None), + ("colormap", None), + ("table", False), + ("yerr", None), + ("xerr", None), + ("label", None), + ("secondary_y", False), + ] + elif isinstance(data, ABCDataFrame): + arg_def = [ + ("x", None), + ("y", None), + ("kind", "line"), + ("ax", None), + ("subplots", False), + ("sharex", None), + ("sharey", False), + ("layout", None), + ("figsize", None), + ("use_index", True), + ("title", None), + ("grid", None), + ("legend", True), + ("style", None), + ("logx", False), + ("logy", False), + ("loglog", False), + ("xticks", None), + ("yticks", None), + ("xlim", None), + ("ylim", None), + ("rot", None), + ("fontsize", None), + ("colormap", None), + ("table", False), + ("yerr", None), + ("xerr", None), + ("secondary_y", False), + ("sort_columns", False), + ] + else: + raise TypeError( + f"Called plot accessor for type {type(data).__name__}, " + "expected Series or DataFrame" + ) + + if args and isinstance(data, ABCSeries): + positional_args = str(args)[1:-1] + keyword_args = ", ".join( + f"{name}={repr(value)}" for (name, default), value in zip(arg_def, args) + ) + msg = ( + "`Series.plot()` should not be called with positional " + "arguments, only keyword arguments. The order of " + "positional arguments will change in the future. " + f"Use `Series.plot({keyword_args})` instead of " + f"`Series.plot({positional_args})`." + ) + raise TypeError(msg) + + pos_args = {name: value for value, (name, _) in zip(args, arg_def)} + if backend_name == "pandas.plotting._matplotlib": + kwargs = dict(arg_def, **pos_args, **kwargs) + else: + kwargs = dict(pos_args, **kwargs) + + x = kwargs.pop("x", None) + y = kwargs.pop("y", None) + kind = kwargs.pop("kind", "line") + return x, y, kind, kwargs + + def __call__(self, *args, **kwargs): + plot_backend = _get_plot_backend(kwargs.pop("backend", None)) + + x, y, kind, kwargs = self._get_call_args( + plot_backend.__name__, self._parent, args, kwargs + ) + + kind = self._kind_aliases.get(kind, kind) + + # when using another backend, get out of the way + if plot_backend.__name__ != "pandas.plotting._matplotlib": + return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs) + + if kind not in self._all_kinds: + raise ValueError(f"{kind} is not a valid plot kind") + + # The original data structured can be transformed before passed to the + # backend. For example, for DataFrame is common to set the index as the + # `x` parameter, and return a Series with the parameter `y` as values. + data = self._parent.copy() + + if isinstance(data, ABCSeries): + kwargs["reuse_plot"] = True + + if kind in self._dataframe_kinds: + if isinstance(data, ABCDataFrame): + return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs) + else: + raise ValueError(f"plot kind {kind} can only be used for data frames") + elif kind in self._series_kinds: + if isinstance(data, ABCDataFrame): + if y is None and kwargs.get("subplots") is False: + raise ValueError( + f"{kind} requires either y column or 'subplots=True'" + ) + elif y is not None: + if is_integer(y) and not data.columns.holds_integer(): + y = data.columns[y] + # converted to series actually. copy to not modify + data = data[y].copy() + data.index.name = y + elif isinstance(data, ABCDataFrame): + data_cols = data.columns + if x is not None: + if is_integer(x) and not data.columns.holds_integer(): + x = data_cols[x] + elif not isinstance(data[x], ABCSeries): + raise ValueError("x must be a label or position") + data = data.set_index(x) + if y is not None: + # check if we have y as int or list of ints + int_ylist = is_list_like(y) and all(is_integer(c) for c in y) + int_y_arg = is_integer(y) or int_ylist + if int_y_arg and not data.columns.holds_integer(): + y = data_cols[y] + + label_kw = kwargs["label"] if "label" in kwargs else False + for kw in ["xerr", "yerr"]: + if kw in kwargs and ( + isinstance(kwargs[kw], str) or is_integer(kwargs[kw]) + ): + try: + kwargs[kw] = data[kwargs[kw]] + except (IndexError, KeyError, TypeError): + pass + + # don't overwrite + data = data[y].copy() + + if isinstance(data, ABCSeries): + label_name = label_kw or y + data.name = label_name + else: + match = is_list_like(label_kw) and len(label_kw) == len(y) + if label_kw and not match: + raise ValueError( + "label should be list-like and same length as y" + ) + label_name = label_kw or data.columns + data.columns = label_name + + return plot_backend.plot(data, kind=kind, **kwargs) + + __call__.__doc__ = __doc__ + + def line(self, x=None, y=None, **kwargs): + """ + Plot Series or DataFrame as lines. + + This function is useful to plot lines using DataFrame's values + as coordinates. + + Parameters + ---------- + x : int or str, optional + Columns to use for the horizontal axis. + Either the location or the label of the columns to be used. + By default, it will use the DataFrame indices. + y : int, str, or list of them, optional + The values to be plotted. + Either the location or the label of the columns to be used. + By default, it will use the remaining DataFrame numeric columns. + **kwargs + Keyword arguments to pass on to :meth:`DataFrame.plot`. + + Returns + ------- + :class:`matplotlib.axes.Axes` or :class:`numpy.ndarray` + Return an ndarray when ``subplots=True``. + + See Also + -------- + matplotlib.pyplot.plot : Plot y versus x as lines and/or markers. + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 3, 2]) + >>> s.plot.line() + + .. plot:: + :context: close-figs + + The following example shows the populations for some animals + over the years. + + >>> df = pd.DataFrame({ + ... 'pig': [20, 18, 489, 675, 1776], + ... 'horse': [4, 25, 281, 600, 1900] + ... }, index=[1990, 1997, 2003, 2009, 2014]) + >>> lines = df.plot.line() + + .. plot:: + :context: close-figs + + An example with subplots, so an array of axes is returned. + + >>> axes = df.plot.line(subplots=True) + >>> type(axes) + + + .. plot:: + :context: close-figs + + The following example shows the relationship between both + populations. + + >>> lines = df.plot.line(x='pig', y='horse') + """ + return self(kind="line", x=x, y=y, **kwargs) + + def bar(self, x=None, y=None, **kwargs): + """ + Vertical bar plot. + + A bar plot is a plot that presents categorical data with + rectangular bars with lengths proportional to the values that they + represent. A bar plot shows comparisons among discrete categories. One + axis of the plot shows the specific categories being compared, and the + other axis represents a measured value. + + Parameters + ---------- + x : label or position, optional + Allows plotting of one column versus another. If not specified, + the index of the DataFrame is used. + y : label or position, optional + Allows plotting of one column versus another. If not specified, + all numerical columns are used. + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.axes.Axes or np.ndarray of them + An ndarray is returned with one :class:`matplotlib.axes.Axes` + per column when ``subplots=True``. + + See Also + -------- + DataFrame.plot.barh : Horizontal bar plot. + DataFrame.plot : Make plots of a DataFrame. + matplotlib.pyplot.bar : Make a bar plot with matplotlib. + + Examples + -------- + Basic plot. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]}) + >>> ax = df.plot.bar(x='lab', y='val', rot=0) + + Plot a whole dataframe to a bar plot. Each column is assigned a + distinct color, and each row is nested in a group along the + horizontal axis. + + .. plot:: + :context: close-figs + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = pd.DataFrame({'speed': speed, + ... 'lifespan': lifespan}, index=index) + >>> ax = df.plot.bar(rot=0) + + Instead of nesting, the figure can be split by column with + ``subplots=True``. In this case, a :class:`numpy.ndarray` of + :class:`matplotlib.axes.Axes` are returned. + + .. plot:: + :context: close-figs + + >>> axes = df.plot.bar(rot=0, subplots=True) + >>> axes[1].legend(loc=2) # doctest: +SKIP + + Plot a single column. + + .. plot:: + :context: close-figs + + >>> ax = df.plot.bar(y='speed', rot=0) + + Plot only selected categories for the DataFrame. + + .. plot:: + :context: close-figs + + >>> ax = df.plot.bar(x='lifespan', rot=0) + """ + return self(kind="bar", x=x, y=y, **kwargs) + + def barh(self, x=None, y=None, **kwargs): + """ + Make a horizontal bar plot. + + A horizontal bar plot is a plot that presents quantitative data with + rectangular bars with lengths proportional to the values that they + represent. A bar plot shows comparisons among discrete categories. One + axis of the plot shows the specific categories being compared, and the + other axis represents a measured value. + + Parameters + ---------- + x : label or position, default DataFrame.index + Column to be used for categories. + y : label or position, default All numeric columns in dataframe + Columns to be plotted from the DataFrame. + **kwargs + Keyword arguments to pass on to :meth:`DataFrame.plot`. + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + + See Also + -------- + DataFrame.plot.bar: Vertical bar plot. + DataFrame.plot : Make plots of DataFrame using matplotlib. + matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib. + + Examples + -------- + Basic example + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]}) + >>> ax = df.plot.barh(x='lab', y='val') + + Plot a whole DataFrame to a horizontal bar plot + + .. plot:: + :context: close-figs + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = pd.DataFrame({'speed': speed, + ... 'lifespan': lifespan}, index=index) + >>> ax = df.plot.barh() + + Plot a column of the DataFrame to a horizontal bar plot + + .. plot:: + :context: close-figs + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = pd.DataFrame({'speed': speed, + ... 'lifespan': lifespan}, index=index) + >>> ax = df.plot.barh(y='speed') + + Plot DataFrame versus the desired column + + .. plot:: + :context: close-figs + + >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] + >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] + >>> index = ['snail', 'pig', 'elephant', + ... 'rabbit', 'giraffe', 'coyote', 'horse'] + >>> df = pd.DataFrame({'speed': speed, + ... 'lifespan': lifespan}, index=index) + >>> ax = df.plot.barh(x='lifespan') + """ + return self(kind="barh", x=x, y=y, **kwargs) + + def box(self, by=None, **kwargs): + r""" + Make a box plot of the DataFrame columns. + + A box plot is a method for graphically depicting groups of numerical + data through their quartiles. + The box extends from the Q1 to Q3 quartile values of the data, + with a line at the median (Q2). The whiskers extend from the edges + of box to show the range of the data. The position of the whiskers + is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the + box. Outlier points are those past the end of the whiskers. + + For further details see Wikipedia's + entry for `boxplot `__. + + A consideration when using this chart is that the box and the whiskers + can overlap, which is very common when plotting small sets of data. + + Parameters + ---------- + by : str or sequence + Column in the DataFrame to group by. + **kwargs + Additional keywords are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + + See Also + -------- + DataFrame.boxplot: Another method to draw a box plot. + Series.plot.box: Draw a box plot from a Series object. + matplotlib.pyplot.boxplot: Draw a box plot in matplotlib. + + Examples + -------- + Draw a box plot from a DataFrame with four columns of randomly + generated data. + + .. plot:: + :context: close-figs + + >>> data = np.random.randn(25, 4) + >>> df = pd.DataFrame(data, columns=list('ABCD')) + >>> ax = df.plot.box() + """ + return self(kind="box", by=by, **kwargs) + + def hist(self, by=None, bins=10, **kwargs): + """ + Draw one histogram of the DataFrame's columns. + + A histogram is a representation of the distribution of data. + This function groups the values of all given Series in the DataFrame + into bins and draws all bins in one :class:`matplotlib.axes.Axes`. + This is useful when the DataFrame's Series are in a similar scale. + + Parameters + ---------- + by : str or sequence, optional + Column in the DataFrame to group by. + bins : int, default 10 + Number of histogram bins to be used. + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + class:`matplotlib.AxesSubplot` + Return a histogram plot. + + See Also + -------- + DataFrame.hist : Draw histograms per DataFrame's Series. + Series.hist : Draw a histogram with Series' data. + + Examples + -------- + When we draw a dice 6000 times, we expect to get each value around 1000 + times. But when we draw two dices and sum the result, the distribution + is going to be quite different. A histogram illustrates those + distributions. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame( + ... np.random.randint(1, 7, 6000), + ... columns = ['one']) + >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000) + >>> ax = df.plot.hist(bins=12, alpha=0.5) + """ + return self(kind="hist", by=by, bins=bins, **kwargs) + + def kde(self, bw_method=None, ind=None, **kwargs): + """ + Generate Kernel Density Estimate plot using Gaussian kernels. + + In statistics, `kernel density estimation`_ (KDE) is a non-parametric + way to estimate the probability density function (PDF) of a random + variable. This function uses Gaussian kernels and includes automatic + bandwidth determination. + + .. _kernel density estimation: + https://en.wikipedia.org/wiki/Kernel_density_estimation + + Parameters + ---------- + bw_method : str, scalar or callable, optional + The method used to calculate the estimator bandwidth. This can be + 'scott', 'silverman', a scalar constant or a callable. + If None (default), 'scott' is used. + See :class:`scipy.stats.gaussian_kde` for more information. + ind : NumPy array or int, optional + Evaluation points for the estimated PDF. If None (default), + 1000 equally spaced points are used. If `ind` is a NumPy array, the + KDE is evaluated at the points passed. If `ind` is an integer, + `ind` number of equally spaced points are used. + **kwargs + Additional keyword arguments are documented in + :meth:`pandas.%(this-datatype)s.plot`. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray of them + + See Also + -------- + scipy.stats.gaussian_kde : Representation of a kernel-density + estimate using Gaussian kernels. This is the function used + internally to estimate the PDF. + + Examples + -------- + Given a Series of points randomly sampled from an unknown + distribution, estimate its PDF using KDE with automatic + bandwidth determination and plot the results, evaluating them at + 1000 equally spaced points (default): + + .. plot:: + :context: close-figs + + >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) + >>> ax = s.plot.kde() + + A scalar bandwidth can be specified. Using a small bandwidth value can + lead to over-fitting, while using a large bandwidth value may result + in under-fitting: + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(bw_method=0.3) + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(bw_method=3) + + Finally, the `ind` parameter determines the evaluation points for the + plot of the estimated PDF: + + .. plot:: + :context: close-figs + + >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) + + For DataFrame, it works in the same way: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5], + ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6], + ... }) + >>> ax = df.plot.kde() + + A scalar bandwidth can be specified. Using a small bandwidth value can + lead to over-fitting, while using a large bandwidth value may result + in under-fitting: + + .. plot:: + :context: close-figs + + >>> ax = df.plot.kde(bw_method=0.3) + + .. plot:: + :context: close-figs + + >>> ax = df.plot.kde(bw_method=3) + + Finally, the `ind` parameter determines the evaluation points for the + plot of the estimated PDF: + + .. plot:: + :context: close-figs + + >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) + """ + return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) + + density = kde + + def area(self, x=None, y=None, **kwargs): + """ + Draw a stacked area plot. + + An area plot displays quantitative data visually. + This function wraps the matplotlib area function. + + Parameters + ---------- + x : label or position, optional + Coordinates for the X axis. By default uses the index. + y : label or position, optional + Column to plot. By default uses all columns. + stacked : bool, default True + Area plots are stacked by default. Set to False to create a + unstacked plot. + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.axes.Axes or numpy.ndarray + Area plot, or array of area plots if subplots is True. + + See Also + -------- + DataFrame.plot : Make plots of DataFrame using matplotlib / pylab. + + Examples + -------- + Draw an area plot based on basic business metrics: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'sales': [3, 2, 3, 9, 10, 6], + ... 'signups': [5, 5, 6, 12, 14, 13], + ... 'visits': [20, 42, 28, 62, 81, 50], + ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01', + ... freq='M')) + >>> ax = df.plot.area() + + Area plots are stacked by default. To produce an unstacked plot, + pass ``stacked=False``: + + .. plot:: + :context: close-figs + + >>> ax = df.plot.area(stacked=False) + + Draw an area plot for a single column: + + .. plot:: + :context: close-figs + + >>> ax = df.plot.area(y='sales') + + Draw with a different `x`: + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'sales': [3, 2, 3], + ... 'visits': [20, 42, 28], + ... 'day': [1, 2, 3], + ... }) + >>> ax = df.plot.area(x='day') + """ + return self(kind="area", x=x, y=y, **kwargs) + + def pie(self, **kwargs): + """ + Generate a pie plot. + + A pie plot is a proportional representation of the numerical data in a + column. This function wraps :meth:`matplotlib.pyplot.pie` for the + specified column. If no column reference is passed and + ``subplots=True`` a pie plot is drawn for each numerical column + independently. + + Parameters + ---------- + y : int or label, optional + Label or position of the column to plot. + If not provided, ``subplots=True`` argument must be passed. + **kwargs + Keyword arguments to pass on to :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.axes.Axes or np.ndarray of them + A NumPy array is returned when `subplots` is True. + + See Also + -------- + Series.plot.pie : Generate a pie plot for a Series. + DataFrame.plot : Make plots of a DataFrame. + + Examples + -------- + In the example below we have a DataFrame with the information about + planet's mass and radius. We pass the the 'mass' column to the + pie function to get a pie plot. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97], + ... 'radius': [2439.7, 6051.8, 6378.1]}, + ... index=['Mercury', 'Venus', 'Earth']) + >>> plot = df.plot.pie(y='mass', figsize=(5, 5)) + + .. plot:: + :context: close-figs + + >>> plot = df.plot.pie(subplots=True, figsize=(6, 3)) + """ + if ( + isinstance(self._parent, ABCDataFrame) + and kwargs.get("y", None) is None + and not kwargs.get("subplots", False) + ): + raise ValueError("pie requires either y column or 'subplots=True'") + return self(kind="pie", **kwargs) + + def scatter(self, x, y, s=None, c=None, **kwargs): + """ + Create a scatter plot with varying marker point size and color. + + The coordinates of each point are defined by two dataframe columns and + filled circles are used to represent each point. This kind of plot is + useful to see complex correlations between two variables. Points could + be for instance natural 2D coordinates like longitude and latitude in + a map or, in general, any pair of metrics that can be plotted against + each other. + + Parameters + ---------- + x : int or str + The column name or column position to be used as horizontal + coordinates for each point. + y : int or str + The column name or column position to be used as vertical + coordinates for each point. + s : scalar or array_like, optional + The size of each point. Possible values are: + + - A single scalar so all points have the same size. + + - A sequence of scalars, which will be used for each point's size + recursively. For instance, when passing [2,14] all points size + will be either 2 or 14, alternatively. + + c : str, int or array_like, optional + The color of each point. Possible values are: + + - A single color string referred to by name, RGB or RGBA code, + for instance 'red' or '#a98d19'. + + - A sequence of color strings referred to by name, RGB or RGBA + code, which will be used for each point's color recursively. For + instance ['green','yellow'] all points will be filled in green or + yellow, alternatively. + + - A column name or position whose values will be used to color the + marker points according to a colormap. + + **kwargs + Keyword arguments to pass on to :meth:`DataFrame.plot`. + + Returns + ------- + :class:`matplotlib.axes.Axes` or numpy.ndarray of them + + See Also + -------- + matplotlib.pyplot.scatter : Scatter plot using multiple input data + formats. + + Examples + -------- + Let's see how to draw a scatter plot using coordinates from the values + in a DataFrame's columns. + + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1], + ... [6.4, 3.2, 1], [5.9, 3.0, 2]], + ... columns=['length', 'width', 'species']) + >>> ax1 = df.plot.scatter(x='length', + ... y='width', + ... c='DarkBlue') + + And now with the color determined by a column as well. + + .. plot:: + :context: close-figs + + >>> ax2 = df.plot.scatter(x='length', + ... y='width', + ... c='species', + ... colormap='viridis') + """ + return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs) + + def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs): + """ + Generate a hexagonal binning plot. + + Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None` + (the default), this is a histogram of the number of occurrences + of the observations at ``(x[i], y[i])``. + + If `C` is specified, specifies values at given coordinates + ``(x[i], y[i])``. These values are accumulated for each hexagonal + bin and then reduced according to `reduce_C_function`, + having as default the NumPy's mean function (:meth:`numpy.mean`). + (If `C` is specified, it must also be a 1-D sequence + of the same length as `x` and `y`, or a column label.) + + Parameters + ---------- + x : int or str + The column label or position for x points. + y : int or str + The column label or position for y points. + C : int or str, optional + The column label or position for the value of `(x, y)` point. + reduce_C_function : callable, default `np.mean` + Function of one argument that reduces all the values in a bin to + a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`). + gridsize : int or tuple of (int, int), default 100 + The number of hexagons in the x-direction. + The corresponding number of hexagons in the y-direction is + chosen in a way that the hexagons are approximately regular. + Alternatively, gridsize can be a tuple with two elements + specifying the number of hexagons in the x-direction and the + y-direction. + **kwargs + Additional keyword arguments are documented in + :meth:`DataFrame.plot`. + + Returns + ------- + matplotlib.AxesSubplot + The matplotlib ``Axes`` on which the hexbin is plotted. + + See Also + -------- + DataFrame.plot : Make plots of a DataFrame. + matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib, + the matplotlib function that is used under the hood. + + Examples + -------- + The following examples are generated with random data from + a normal distribution. + + .. plot:: + :context: close-figs + + >>> n = 10000 + >>> df = pd.DataFrame({'x': np.random.randn(n), + ... 'y': np.random.randn(n)}) + >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20) + + The next example uses `C` and `np.sum` as `reduce_C_function`. + Note that `'observations'` values ranges from 1 to 5 but the result + plot shows values up to more than 25. This is because of the + `reduce_C_function`. + + .. plot:: + :context: close-figs + + >>> n = 500 + >>> df = pd.DataFrame({ + ... 'coord_x': np.random.uniform(-3, 3, size=n), + ... 'coord_y': np.random.uniform(30, 50, size=n), + ... 'observations': np.random.randint(1,5, size=n) + ... }) + >>> ax = df.plot.hexbin(x='coord_x', + ... y='coord_y', + ... C='observations', + ... reduce_C_function=np.sum, + ... gridsize=10, + ... cmap="viridis") + """ + if reduce_C_function is not None: + kwargs["reduce_C_function"] = reduce_C_function + if gridsize is not None: + kwargs["gridsize"] = gridsize + + return self(kind="hexbin", x=x, y=y, C=C, **kwargs) + + +_backends = {} + + +def _find_backend(backend: str): + """ + Find a pandas plotting backend> + + Parameters + ---------- + backend : str + The identifier for the backend. Either an entrypoint item registered + with pkg_resources, or a module name. + + Notes + ----- + Modifies _backends with imported backends as a side effect. + + Returns + ------- + types.ModuleType + The imported backend. + """ + import pkg_resources # Delay import for performance. + + for entry_point in pkg_resources.iter_entry_points("pandas_plotting_backends"): + if entry_point.name == "matplotlib": + # matplotlib is an optional dependency. When + # missing, this would raise. + continue + _backends[entry_point.name] = entry_point.load() + + try: + return _backends[backend] + except KeyError: + # Fall back to unregisted, module name approach. + try: + module = importlib.import_module(backend) + except ImportError: + # We re-raise later on. + pass + else: + if hasattr(module, "plot"): + # Validate that the interface is implemented when the option + # is set, rather than at plot time. + _backends[backend] = module + return module + + raise ValueError( + f"Could not find plotting backend '{backend}'. Ensure that you've installed " + f"the package providing the '{backend}' entrypoint, or that the package has a " + "top-level `.plot` method." + ) + + +def _get_plot_backend(backend=None): + """ + Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). + + The plotting system of pandas has been using matplotlib, but the idea here + is that it can also work with other third-party backends. In the future, + this function will return the backend from a pandas option, and all the + rest of the code in this file will use the backend specified there for the + plotting. + + The backend is imported lazily, as matplotlib is a soft dependency, and + pandas can be used without it being installed. + """ + backend = backend or get_option("plotting.backend") + + if backend == "matplotlib": + # Because matplotlib is an optional dependency and first-party backend, + # we need to attempt an import here to raise an ImportError if needed. + try: + import pandas.plotting._matplotlib as module + except ImportError: + raise ImportError( + "matplotlib is required for plotting when the " + 'default backend "matplotlib" is selected.' + ) from None + + _backends["matplotlib"] = module + + if backend in _backends: + return _backends[backend] + + module = _find_backend(backend) + _backends[backend] = module + return module diff --git a/pandas/plotting/_matplotlib/__init__.py b/pandas/plotting/_matplotlib/__init__.py new file mode 100644 index 00000000..27b1d55f --- /dev/null +++ b/pandas/plotting/_matplotlib/__init__.py @@ -0,0 +1,83 @@ +from typing import TYPE_CHECKING, Dict, Type + +from pandas.plotting._matplotlib.boxplot import ( + BoxPlot, + boxplot, + boxplot_frame, + boxplot_frame_groupby, +) +from pandas.plotting._matplotlib.converter import deregister, register +from pandas.plotting._matplotlib.core import ( + AreaPlot, + BarhPlot, + BarPlot, + HexBinPlot, + LinePlot, + PiePlot, + ScatterPlot, +) +from pandas.plotting._matplotlib.hist import HistPlot, KdePlot, hist_frame, hist_series +from pandas.plotting._matplotlib.misc import ( + andrews_curves, + autocorrelation_plot, + bootstrap_plot, + lag_plot, + parallel_coordinates, + radviz, + scatter_matrix, +) +from pandas.plotting._matplotlib.tools import table + +if TYPE_CHECKING: + from pandas.plotting._matplotlib.core import MPLPlot # noqa: F401 + +PLOT_CLASSES: Dict[str, Type["MPLPlot"]] = { + "line": LinePlot, + "bar": BarPlot, + "barh": BarhPlot, + "box": BoxPlot, + "hist": HistPlot, + "kde": KdePlot, + "area": AreaPlot, + "pie": PiePlot, + "scatter": ScatterPlot, + "hexbin": HexBinPlot, +} + + +def plot(data, kind, **kwargs): + # Importing pyplot at the top of the file (before the converters are + # registered) causes problems in matplotlib 2 (converters seem to not + # work) + import matplotlib.pyplot as plt + + if kwargs.pop("reuse_plot", False): + ax = kwargs.get("ax") + if ax is None and len(plt.get_fignums()) > 0: + with plt.rc_context(): + ax = plt.gca() + kwargs["ax"] = getattr(ax, "left_ax", ax) + plot_obj = PLOT_CLASSES[kind](data, **kwargs) + plot_obj.generate() + plot_obj.draw() + return plot_obj.result + + +__all__ = [ + "plot", + "hist_series", + "hist_frame", + "boxplot", + "boxplot_frame", + "boxplot_frame_groupby", + "table", + "andrews_curves", + "autocorrelation_plot", + "bootstrap_plot", + "lag_plot", + "parallel_coordinates", + "radviz", + "scatter_matrix", + "register", + "deregister", +] diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py new file mode 100644 index 00000000..deeeb001 --- /dev/null +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -0,0 +1,436 @@ +from collections import namedtuple +import warnings + +from matplotlib.artist import setp +import numpy as np + +from pandas.core.dtypes.common import is_dict_like +from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.missing import remove_na_arraylike + +import pandas as pd + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.core import LinePlot, MPLPlot +from pandas.plotting._matplotlib.style import _get_standard_colors +from pandas.plotting._matplotlib.tools import _flatten, _subplots + + +class BoxPlot(LinePlot): + _kind = "box" + _layout_type = "horizontal" + + _valid_return_types = (None, "axes", "dict", "both") + # namedtuple to hold results + BP = namedtuple("Boxplot", ["ax", "lines"]) + + def __init__(self, data, return_type="axes", **kwargs): + # Do not call LinePlot.__init__ which may fill nan + if return_type not in self._valid_return_types: + raise ValueError("return_type must be {None, 'axes', 'dict', 'both'}") + + self.return_type = return_type + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + if self.subplots: + # Disable label ax sharing. Otherwise, all subplots shows last + # column label + if self.orientation == "vertical": + self.sharex = False + else: + self.sharey = False + + @classmethod + def _plot(cls, ax, y, column_num=None, return_type="axes", **kwds): + if y.ndim == 2: + y = [remove_na_arraylike(v) for v in y] + # Boxplot fails with empty arrays, so need to add a NaN + # if any cols are empty + # GH 8181 + y = [v if v.size > 0 else np.array([np.nan]) for v in y] + else: + y = remove_na_arraylike(y) + bp = ax.boxplot(y, **kwds) + + if return_type == "dict": + return bp, bp + elif return_type == "both": + return cls.BP(ax=ax, lines=bp), bp + else: + return ax, bp + + def _validate_color_args(self): + if "color" in self.kwds: + if self.colormap is not None: + warnings.warn( + "'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'" + ) + self.color = self.kwds.pop("color") + + if isinstance(self.color, dict): + valid_keys = ["boxes", "whiskers", "medians", "caps"] + for key, values in self.color.items(): + if key not in valid_keys: + raise ValueError( + f"color dict contains invalid key '{key}'. " + f"The key must be either {valid_keys}" + ) + else: + self.color = None + + # get standard colors for default + colors = _get_standard_colors(num_colors=3, colormap=self.colormap, color=None) + # use 2 colors by default, for box/whisker and median + # flier colors isn't needed here + # because it can be specified by ``sym`` kw + self._boxes_c = colors[0] + self._whiskers_c = colors[0] + self._medians_c = colors[2] + self._caps_c = "k" # mpl default + + def _get_colors(self, num_colors=None, color_kwds="color"): + pass + + def maybe_color_bp(self, bp): + if isinstance(self.color, dict): + boxes = self.color.get("boxes", self._boxes_c) + whiskers = self.color.get("whiskers", self._whiskers_c) + medians = self.color.get("medians", self._medians_c) + caps = self.color.get("caps", self._caps_c) + else: + # Other types are forwarded to matplotlib + # If None, use default colors + boxes = self.color or self._boxes_c + whiskers = self.color or self._whiskers_c + medians = self.color or self._medians_c + caps = self.color or self._caps_c + + setp(bp["boxes"], color=boxes, alpha=1) + setp(bp["whiskers"], color=whiskers, alpha=1) + setp(bp["medians"], color=medians, alpha=1) + setp(bp["caps"], color=caps, alpha=1) + + def _make_plot(self): + if self.subplots: + self._return_obj = pd.Series(dtype=object) + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + kwds = self.kwds.copy() + + ret, bp = self._plot( + ax, y, column_num=i, return_type=self.return_type, **kwds + ) + self.maybe_color_bp(bp) + self._return_obj[label] = ret + + label = [pprint_thing(label)] + self._set_ticklabels(ax, label) + else: + y = self.data.values.T + ax = self._get_ax(0) + kwds = self.kwds.copy() + + ret, bp = self._plot( + ax, y, column_num=0, return_type=self.return_type, **kwds + ) + self.maybe_color_bp(bp) + self._return_obj = ret + + labels = [l for l, _ in self._iter_data()] + labels = [pprint_thing(l) for l in labels] + if not self.use_index: + labels = [pprint_thing(key) for key in range(len(labels))] + self._set_ticklabels(ax, labels) + + def _set_ticklabels(self, ax, labels): + if self.orientation == "vertical": + ax.set_xticklabels(labels) + else: + ax.set_yticklabels(labels) + + def _make_legend(self): + pass + + def _post_plot_logic(self, ax, data): + pass + + @property + def orientation(self): + if self.kwds.get("vert", True): + return "vertical" + else: + return "horizontal" + + @property + def result(self): + if self.return_type is None: + return super().result + else: + return self._return_obj + + +def _grouped_plot_by_column( + plotf, + data, + columns=None, + by=None, + numeric_only=True, + grid=False, + figsize=None, + ax=None, + layout=None, + return_type=None, + **kwargs, +): + grouped = data.groupby(by) + if columns is None: + if not isinstance(by, (list, tuple)): + by = [by] + columns = data._get_numeric_data().columns.difference(by) + naxes = len(columns) + fig, axes = _subplots( + naxes=naxes, sharex=True, sharey=True, figsize=figsize, ax=ax, layout=layout + ) + + _axes = _flatten(axes) + + ax_values = [] + + for i, col in enumerate(columns): + ax = _axes[i] + gp_col = grouped[col] + keys, values = zip(*gp_col) + re_plotf = plotf(keys, values, ax, **kwargs) + ax.set_title(col) + ax.set_xlabel(pprint_thing(by)) + ax_values.append(re_plotf) + ax.grid(grid) + + result = pd.Series(ax_values, index=columns) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type is None: + result = axes + + byline = by[0] if len(by) == 1 else by + fig.suptitle(f"Boxplot grouped by {byline}") + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) + + return result + + +def boxplot( + data, + column=None, + by=None, + ax=None, + fontsize=None, + rot=0, + grid=True, + figsize=None, + layout=None, + return_type=None, + **kwds, +): + + import matplotlib.pyplot as plt + + # validate return_type: + if return_type not in BoxPlot._valid_return_types: + raise ValueError("return_type must be {'axes', 'dict', 'both'}") + + if isinstance(data, ABCSeries): + data = data.to_frame("x") + column = "x" + + def _get_colors(): + # num_colors=3 is required as method maybe_color_bp takes the colors + # in positions 0 and 2. + # if colors not provided, use same defaults as DataFrame.plot.box + result = _get_standard_colors(num_colors=3) + result = np.take(result, [0, 0, 2]) + result = np.append(result, "k") + + colors = kwds.pop("color", None) + if colors: + if is_dict_like(colors): + # replace colors in result array with user-specified colors + # taken from the colors dict parameter + # "boxes" value placed in position 0, "whiskers" in 1, etc. + valid_keys = ["boxes", "whiskers", "medians", "caps"] + key_to_index = dict(zip(valid_keys, range(4))) + for key, value in colors.items(): + if key in valid_keys: + result[key_to_index[key]] = value + else: + raise ValueError( + f"color dict contains invalid key '{key}'. " + f"The key must be either {valid_keys}" + ) + else: + result.fill(colors) + + return result + + def maybe_color_bp(bp): + setp(bp["boxes"], color=colors[0], alpha=1) + setp(bp["whiskers"], color=colors[1], alpha=1) + setp(bp["medians"], color=colors[2], alpha=1) + setp(bp["caps"], color=colors[3], alpha=1) + + def plot_group(keys, values, ax): + keys = [pprint_thing(x) for x in keys] + values = [np.asarray(remove_na_arraylike(v)) for v in values] + bp = ax.boxplot(values, **kwds) + if fontsize is not None: + ax.tick_params(axis="both", labelsize=fontsize) + if kwds.get("vert", 1): + ax.set_xticklabels(keys, rotation=rot) + else: + ax.set_yticklabels(keys, rotation=rot) + maybe_color_bp(bp) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type == "dict": + return bp + elif return_type == "both": + return BoxPlot.BP(ax=ax, lines=bp) + else: + return ax + + colors = _get_colors() + if column is None: + columns = None + else: + if isinstance(column, (list, tuple)): + columns = column + else: + columns = [column] + + if by is not None: + # Prefer array return type for 2-D plots to match the subplot layout + # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580 + result = _grouped_plot_by_column( + plot_group, + data, + columns=columns, + by=by, + grid=grid, + figsize=figsize, + ax=ax, + layout=layout, + return_type=return_type, + ) + else: + if return_type is None: + return_type = "axes" + if layout is not None: + raise ValueError("The 'layout' keyword is not supported when 'by' is None") + + if ax is None: + rc = {"figure.figsize": figsize} if figsize is not None else {} + with plt.rc_context(rc): + ax = plt.gca() + data = data._get_numeric_data() + if columns is None: + columns = data.columns + else: + data = data[columns] + + result = plot_group(columns, data.values.T, ax) + ax.grid(grid) + + return result + + +def boxplot_frame( + self, + column=None, + by=None, + ax=None, + fontsize=None, + rot=0, + grid=True, + figsize=None, + layout=None, + return_type=None, + **kwds, +): + import matplotlib.pyplot as plt + + ax = boxplot( + self, + column=column, + by=by, + ax=ax, + fontsize=fontsize, + grid=grid, + rot=rot, + figsize=figsize, + layout=layout, + return_type=return_type, + **kwds, + ) + plt.draw_if_interactive() + return ax + + +def boxplot_frame_groupby( + grouped, + subplots=True, + column=None, + fontsize=None, + rot=0, + grid=True, + ax=None, + figsize=None, + layout=None, + sharex=False, + sharey=True, + **kwds, +): + if subplots is True: + naxes = len(grouped) + fig, axes = _subplots( + naxes=naxes, + squeeze=False, + ax=ax, + sharex=sharex, + sharey=sharey, + figsize=figsize, + layout=layout, + ) + axes = _flatten(axes) + + ret = pd.Series(dtype=object) + + for (key, group), ax in zip(grouped, axes): + d = group.boxplot( + ax=ax, column=column, fontsize=fontsize, rot=rot, grid=grid, **kwds + ) + ax.set_title(pprint_thing(key)) + ret.loc[key] = d + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) + else: + keys, frames = zip(*grouped) + if grouped.axis == 0: + df = pd.concat(frames, keys=keys, axis=1) + else: + if len(frames) > 1: + df = frames[0].join(frames[1::]) + else: + df = frames[0] + ret = df.boxplot( + column=column, + fontsize=fontsize, + rot=rot, + grid=grid, + ax=ax, + figsize=figsize, + layout=layout, + **kwds, + ) + return ret diff --git a/pandas/plotting/_matplotlib/compat.py b/pandas/plotting/_matplotlib/compat.py new file mode 100644 index 00000000..f2c50321 --- /dev/null +++ b/pandas/plotting/_matplotlib/compat.py @@ -0,0 +1,23 @@ +# being a bit too dynamic +from distutils.version import LooseVersion +import operator + + +def _mpl_version(version, op): + def inner(): + try: + import matplotlib as mpl + except ImportError: + return False + return ( + op(LooseVersion(mpl.__version__), LooseVersion(version)) + and str(mpl.__version__)[0] != "0" + ) + + return inner + + +_mpl_ge_2_2_3 = _mpl_version("2.2.3", operator.ge) +_mpl_ge_3_0_0 = _mpl_version("3.0.0", operator.ge) +_mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge) +_mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge) diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py new file mode 100644 index 00000000..5b37ebb4 --- /dev/null +++ b/pandas/plotting/_matplotlib/converter.py @@ -0,0 +1,1132 @@ +import contextlib +import datetime as pydt +from datetime import datetime, timedelta +import functools + +from dateutil.relativedelta import relativedelta +import matplotlib.dates as dates +from matplotlib.ticker import AutoLocator, Formatter, Locator +from matplotlib.transforms import nonsingular +import matplotlib.units as units +import numpy as np + +from pandas._libs import lib, tslibs +from pandas._libs.tslibs import resolution +from pandas._libs.tslibs.frequencies import FreqGroup, get_freq + +from pandas.core.dtypes.common import ( + is_datetime64_ns_dtype, + is_float, + is_float_dtype, + is_integer, + is_integer_dtype, + is_nested_list_like, +) +from pandas.core.dtypes.generic import ABCSeries + +from pandas import Index, get_option +import pandas.core.common as com +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import Period, PeriodIndex, period_range +import pandas.core.tools.datetimes as tools + +# constants +HOURS_PER_DAY = 24.0 +MIN_PER_HOUR = 60.0 +SEC_PER_MIN = 60.0 + +SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR +SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY + +MUSEC_PER_DAY = 1e6 * SEC_PER_DAY + +_mpl_units = {} # Cache for units overwritten by us + + +def get_pairs(): + pairs = [ + (tslibs.Timestamp, DatetimeConverter), + (Period, PeriodConverter), + (pydt.datetime, DatetimeConverter), + (pydt.date, DatetimeConverter), + (pydt.time, TimeConverter), + (np.datetime64, DatetimeConverter), + ] + return pairs + + +def register_pandas_matplotlib_converters(func): + """ + Decorator applying pandas_converters. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + with pandas_converters(): + return func(*args, **kwargs) + + return wrapper + + +@contextlib.contextmanager +def pandas_converters(): + """ + Context manager registering pandas' converters for a plot. + + See Also + -------- + register_pandas_matplotlib_converters : Decorator that applies this. + """ + value = get_option("plotting.matplotlib.register_converters") + + if value: + # register for True or "auto" + register() + try: + yield + finally: + if value == "auto": + # only deregister for "auto" + deregister() + + +def register(): + pairs = get_pairs() + for type_, cls in pairs: + # Cache previous converter if present + if type_ in units.registry and not isinstance(units.registry[type_], cls): + previous = units.registry[type_] + _mpl_units[type_] = previous + # Replace with pandas converter + units.registry[type_] = cls() + + +def deregister(): + # Renamed in pandas.plotting.__init__ + for type_, cls in get_pairs(): + # We use type to catch our classes directly, no inheritance + if type(units.registry.get(type_)) is cls: + units.registry.pop(type_) + + # restore the old keys + for unit, formatter in _mpl_units.items(): + if type(formatter) not in {DatetimeConverter, PeriodConverter, TimeConverter}: + # make it idempotent by excluding ours. + units.registry[unit] = formatter + + +def _to_ordinalf(tm): + tot_sec = tm.hour * 3600 + tm.minute * 60 + tm.second + float(tm.microsecond / 1e6) + return tot_sec + + +def time2num(d): + if isinstance(d, str): + parsed = tools.to_datetime(d) + if not isinstance(parsed, datetime): + raise ValueError(f"Could not parse time {d}") + return _to_ordinalf(parsed.time()) + if isinstance(d, pydt.time): + return _to_ordinalf(d) + return d + + +class TimeConverter(units.ConversionInterface): + @staticmethod + def convert(value, unit, axis): + valid_types = (str, pydt.time) + if isinstance(value, valid_types) or is_integer(value) or is_float(value): + return time2num(value) + if isinstance(value, Index): + return value.map(time2num) + if isinstance(value, (list, tuple, np.ndarray, Index)): + return [time2num(x) for x in value] + return value + + @staticmethod + def axisinfo(unit, axis): + if unit != "time": + return None + + majloc = AutoLocator() + majfmt = TimeFormatter(majloc) + return units.AxisInfo(majloc=majloc, majfmt=majfmt, label="time") + + @staticmethod + def default_units(x, axis): + return "time" + + +# time formatter +class TimeFormatter(Formatter): + def __init__(self, locs): + self.locs = locs + + def __call__(self, x, pos=0): + """ + Return the time of day as a formatted string. + + Parameters + ---------- + x : float + The time of day specified as seconds since 00:00 (midnight), + with up to microsecond precision. + pos + Unused + + Returns + ------- + str + A string in HH:MM:SS.mmmuuu format. Microseconds, + milliseconds and seconds are only displayed if non-zero. + """ + fmt = "%H:%M:%S.%f" + s = int(x) + msus = int(round((x - s) * 1e6)) + ms = msus // 1000 + us = msus % 1000 + m, s = divmod(s, 60) + h, m = divmod(m, 60) + _, h = divmod(h, 24) + if us != 0: + return pydt.time(h, m, s, msus).strftime(fmt) + elif ms != 0: + return pydt.time(h, m, s, msus).strftime(fmt)[:-3] + elif s != 0: + return pydt.time(h, m, s).strftime("%H:%M:%S") + + return pydt.time(h, m).strftime("%H:%M") + + +# Period Conversion + + +class PeriodConverter(dates.DateConverter): + @staticmethod + def convert(values, units, axis): + if is_nested_list_like(values): + values = [PeriodConverter._convert_1d(v, units, axis) for v in values] + else: + values = PeriodConverter._convert_1d(values, units, axis) + return values + + @staticmethod + def _convert_1d(values, units, axis): + if not hasattr(axis, "freq"): + raise TypeError("Axis must have `freq` set to convert to Periods") + valid_types = (str, datetime, Period, pydt.date, pydt.time, np.datetime64) + if isinstance(values, valid_types) or is_integer(values) or is_float(values): + return get_datevalue(values, axis.freq) + elif isinstance(values, PeriodIndex): + return values.asfreq(axis.freq)._ndarray_values + elif isinstance(values, Index): + return values.map(lambda x: get_datevalue(x, axis.freq)) + elif lib.infer_dtype(values, skipna=False) == "period": + # https://github.com/pandas-dev/pandas/issues/24304 + # convert ndarray[period] -> PeriodIndex + return PeriodIndex(values, freq=axis.freq)._ndarray_values + elif isinstance(values, (list, tuple, np.ndarray, Index)): + return [get_datevalue(x, axis.freq) for x in values] + return values + + +def get_datevalue(date, freq): + if isinstance(date, Period): + return date.asfreq(freq).ordinal + elif isinstance(date, (str, datetime, pydt.date, pydt.time, np.datetime64)): + return Period(date, freq).ordinal + elif ( + is_integer(date) + or is_float(date) + or (isinstance(date, (np.ndarray, Index)) and (date.size == 1)) + ): + return date + elif date is None: + return None + raise ValueError(f"Unrecognizable date '{date}'") + + +def _dt_to_float_ordinal(dt): + """ + Convert :mod:`datetime` to the Gregorian date as UTC float days, + preserving hours, minutes, seconds and microseconds. Return value + is a :func:`float`. + """ + if isinstance(dt, (np.ndarray, Index, ABCSeries)) and is_datetime64_ns_dtype(dt): + base = dates.epoch2num(dt.asi8 / 1.0e9) + else: + base = dates.date2num(dt) + return base + + +# Datetime Conversion +class DatetimeConverter(dates.DateConverter): + @staticmethod + def convert(values, unit, axis): + # values might be a 1-d array, or a list-like of arrays. + if is_nested_list_like(values): + values = [DatetimeConverter._convert_1d(v, unit, axis) for v in values] + else: + values = DatetimeConverter._convert_1d(values, unit, axis) + return values + + @staticmethod + def _convert_1d(values, unit, axis): + def try_parse(values): + try: + return _dt_to_float_ordinal(tools.to_datetime(values)) + except Exception: + return values + + if isinstance(values, (datetime, pydt.date)): + return _dt_to_float_ordinal(values) + elif isinstance(values, np.datetime64): + return _dt_to_float_ordinal(tslibs.Timestamp(values)) + elif isinstance(values, pydt.time): + return dates.date2num(values) + elif is_integer(values) or is_float(values): + return values + elif isinstance(values, str): + return try_parse(values) + elif isinstance(values, (list, tuple, np.ndarray, Index, ABCSeries)): + if isinstance(values, ABCSeries): + # https://github.com/matplotlib/matplotlib/issues/11391 + # Series was skipped. Convert to DatetimeIndex to get asi8 + values = Index(values) + if isinstance(values, Index): + values = values.values + if not isinstance(values, np.ndarray): + values = com.asarray_tuplesafe(values) + + if is_integer_dtype(values) or is_float_dtype(values): + return values + + try: + values = tools.to_datetime(values) + if isinstance(values, Index): + values = _dt_to_float_ordinal(values) + else: + values = [_dt_to_float_ordinal(x) for x in values] + except Exception: + values = _dt_to_float_ordinal(values) + + return values + + @staticmethod + def axisinfo(unit, axis): + """ + Return the :class:`~matplotlib.units.AxisInfo` for *unit*. + + *unit* is a tzinfo instance or None. + The *axis* argument is required but not used. + """ + tz = unit + + majloc = PandasAutoDateLocator(tz=tz) + majfmt = PandasAutoDateFormatter(majloc, tz=tz) + datemin = pydt.date(2000, 1, 1) + datemax = pydt.date(2010, 1, 1) + + return units.AxisInfo( + majloc=majloc, majfmt=majfmt, label="", default_limits=(datemin, datemax) + ) + + +class PandasAutoDateFormatter(dates.AutoDateFormatter): + def __init__(self, locator, tz=None, defaultfmt="%Y-%m-%d"): + dates.AutoDateFormatter.__init__(self, locator, tz, defaultfmt) + + +class PandasAutoDateLocator(dates.AutoDateLocator): + def get_locator(self, dmin, dmax): + """Pick the best locator based on a distance.""" + delta = relativedelta(dmax, dmin) + + num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days + num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds + tot_sec = num_days * 86400.0 + num_sec + + if abs(tot_sec) < self.minticks: + self._freq = -1 + locator = MilliSecondLocator(self.tz) + locator.set_axis(self.axis) + + locator.set_view_interval(*self.axis.get_view_interval()) + locator.set_data_interval(*self.axis.get_data_interval()) + return locator + + return dates.AutoDateLocator.get_locator(self, dmin, dmax) + + def _get_unit(self): + return MilliSecondLocator.get_unit_generic(self._freq) + + +class MilliSecondLocator(dates.DateLocator): + + UNIT = 1.0 / (24 * 3600 * 1000) + + def __init__(self, tz): + dates.DateLocator.__init__(self, tz) + self._interval = 1.0 + + def _get_unit(self): + return self.get_unit_generic(-1) + + @staticmethod + def get_unit_generic(freq): + unit = dates.RRuleLocator.get_unit_generic(freq) + if unit < 0: + return MilliSecondLocator.UNIT + return unit + + def __call__(self): + # if no data have been set, this will tank with a ValueError + try: + dmin, dmax = self.viewlim_to_dt() + except ValueError: + return [] + + # We need to cap at the endpoints of valid datetime + + # FIXME: dont leave commented-out + # TODO(wesm) unused? + # if dmin > dmax: + # dmax, dmin = dmin, dmax + # delta = relativedelta(dmax, dmin) + # try: + # start = dmin - delta + # except ValueError: + # start = _from_ordinal(1.0) + + # try: + # stop = dmax + delta + # except ValueError: + # # The magic number! + # stop = _from_ordinal(3652059.9999999) + + nmax, nmin = dates.date2num((dmax, dmin)) + + num = (nmax - nmin) * 86400 * 1000 + max_millis_ticks = 6 + for interval in [1, 10, 50, 100, 200, 500]: + if num <= interval * (max_millis_ticks - 1): + self._interval = interval + break + else: + # We went through the whole loop without breaking, default to 1 + self._interval = 1000.0 + + estimate = (nmax - nmin) / (self._get_unit() * self._get_interval()) + + if estimate > self.MAXTICKS * 2: + raise RuntimeError( + "MillisecondLocator estimated to generate " + f"{estimate:d} ticks from {dmin} to {dmax}: " + "exceeds Locator.MAXTICKS" + f"* 2 ({self.MAXTICKS * 2:d}) " + ) + + interval = self._get_interval() + freq = f"{interval}L" + tz = self.tz.tzname(None) + st = _from_ordinal(dates.date2num(dmin)) # strip tz + ed = _from_ordinal(dates.date2num(dmax)) + all_dates = date_range(start=st, end=ed, freq=freq, tz=tz).astype(object) + + try: + if len(all_dates) > 0: + locs = self.raise_if_exceeds(dates.date2num(all_dates)) + return locs + except Exception: # pragma: no cover + pass + + lims = dates.date2num([dmin, dmax]) + return lims + + def _get_interval(self): + return self._interval + + def autoscale(self): + """ + Set the view limits to include the data range. + """ + dmin, dmax = self.datalim_to_dt() + if dmin > dmax: + dmax, dmin = dmin, dmax + + # We need to cap at the endpoints of valid datetime + + # FIXME: dont leave commented-out + # TODO(wesm): unused? + + # delta = relativedelta(dmax, dmin) + # try: + # start = dmin - delta + # except ValueError: + # start = _from_ordinal(1.0) + + # try: + # stop = dmax + delta + # except ValueError: + # # The magic number! + # stop = _from_ordinal(3652059.9999999) + + dmin, dmax = self.datalim_to_dt() + + vmin = dates.date2num(dmin) + vmax = dates.date2num(dmax) + + return self.nonsingular(vmin, vmax) + + +def _from_ordinal(x, tz=None): + ix = int(x) + dt = datetime.fromordinal(ix) + remainder = float(x) - ix + hour, remainder = divmod(24 * remainder, 1) + minute, remainder = divmod(60 * remainder, 1) + second, remainder = divmod(60 * remainder, 1) + microsecond = int(1e6 * remainder) + if microsecond < 10: + microsecond = 0 # compensate for rounding errors + dt = datetime( + dt.year, dt.month, dt.day, int(hour), int(minute), int(second), microsecond + ) + if tz is not None: + dt = dt.astimezone(tz) + + if microsecond > 999990: # compensate for rounding errors + dt += timedelta(microseconds=1e6 - microsecond) + + return dt + + +# Fixed frequency dynamic tick locators and formatters + +# ------------------------------------------------------------------------- +# --- Locators --- +# ------------------------------------------------------------------------- + + +def _get_default_annual_spacing(nyears): + """ + Returns a default spacing between consecutive ticks for annual data. + """ + if nyears < 11: + (min_spacing, maj_spacing) = (1, 1) + elif nyears < 20: + (min_spacing, maj_spacing) = (1, 2) + elif nyears < 50: + (min_spacing, maj_spacing) = (1, 5) + elif nyears < 100: + (min_spacing, maj_spacing) = (5, 10) + elif nyears < 200: + (min_spacing, maj_spacing) = (5, 25) + elif nyears < 600: + (min_spacing, maj_spacing) = (10, 50) + else: + factor = nyears // 1000 + 1 + (min_spacing, maj_spacing) = (factor * 20, factor * 100) + return (min_spacing, maj_spacing) + + +def period_break(dates, period): + """ + Returns the indices where the given period changes. + + Parameters + ---------- + dates : PeriodIndex + Array of intervals to monitor. + period : string + Name of the period to monitor. + """ + current = getattr(dates, period) + previous = getattr(dates - 1 * dates.freq, period) + return np.nonzero(current - previous)[0] + + +def has_level_label(label_flags, vmin): + """ + Returns true if the ``label_flags`` indicate there is at least one label + for this level. + + if the minimum view limit is not an exact integer, then the first tick + label won't be shown, so we must adjust for that. + """ + if label_flags.size == 0 or ( + label_flags.size == 1 and label_flags[0] == 0 and vmin % 1 > 0.0 + ): + return False + else: + return True + + +def _daily_finder(vmin, vmax, freq): + periodsperday = -1 + + if freq >= FreqGroup.FR_HR: + if freq == FreqGroup.FR_NS: + periodsperday = 24 * 60 * 60 * 1000000000 + elif freq == FreqGroup.FR_US: + periodsperday = 24 * 60 * 60 * 1000000 + elif freq == FreqGroup.FR_MS: + periodsperday = 24 * 60 * 60 * 1000 + elif freq == FreqGroup.FR_SEC: + periodsperday = 24 * 60 * 60 + elif freq == FreqGroup.FR_MIN: + periodsperday = 24 * 60 + elif freq == FreqGroup.FR_HR: + periodsperday = 24 + else: # pragma: no cover + raise ValueError(f"unexpected frequency: {freq}") + periodsperyear = 365 * periodsperday + periodspermonth = 28 * periodsperday + + elif freq == FreqGroup.FR_BUS: + periodsperyear = 261 + periodspermonth = 19 + elif freq == FreqGroup.FR_DAY: + periodsperyear = 365 + periodspermonth = 28 + elif resolution.get_freq_group(freq) == FreqGroup.FR_WK: + periodsperyear = 52 + periodspermonth = 3 + else: # pragma: no cover + raise ValueError("unexpected frequency") + + # save this for later usage + vmin_orig = vmin + + (vmin, vmax) = ( + Period(ordinal=int(vmin), freq=freq), + Period(ordinal=int(vmax), freq=freq), + ) + span = vmax.ordinal - vmin.ordinal + 1 + dates_ = period_range(start=vmin, end=vmax, freq=freq) + # Initialize the output + info = np.zeros( + span, dtype=[("val", np.int64), ("maj", bool), ("min", bool), ("fmt", "|S20")] + ) + info["val"][:] = dates_._ndarray_values + info["fmt"][:] = "" + info["maj"][[0, -1]] = True + # .. and set some shortcuts + info_maj = info["maj"] + info_min = info["min"] + info_fmt = info["fmt"] + + def first_label(label_flags): + if (label_flags[0] == 0) and (label_flags.size > 1) and ((vmin_orig % 1) > 0.0): + return label_flags[1] + else: + return label_flags[0] + + # Case 1. Less than a month + if span <= periodspermonth: + day_start = period_break(dates_, "day") + month_start = period_break(dates_, "month") + + def _hour_finder(label_interval, force_year_start): + _hour = dates_.hour + _prev_hour = (dates_ - 1 * dates_.freq).hour + hour_start = (_hour - _prev_hour) != 0 + info_maj[day_start] = True + info_min[hour_start & (_hour % label_interval == 0)] = True + year_start = period_break(dates_, "year") + info_fmt[hour_start & (_hour % label_interval == 0)] = "%H:%M" + info_fmt[day_start] = "%H:%M\n%d-%b" + info_fmt[year_start] = "%H:%M\n%d-%b\n%Y" + if force_year_start and not has_level_label(year_start, vmin_orig): + info_fmt[first_label(day_start)] = "%H:%M\n%d-%b\n%Y" + + def _minute_finder(label_interval): + hour_start = period_break(dates_, "hour") + _minute = dates_.minute + _prev_minute = (dates_ - 1 * dates_.freq).minute + minute_start = (_minute - _prev_minute) != 0 + info_maj[hour_start] = True + info_min[minute_start & (_minute % label_interval == 0)] = True + year_start = period_break(dates_, "year") + info_fmt = info["fmt"] + info_fmt[minute_start & (_minute % label_interval == 0)] = "%H:%M" + info_fmt[day_start] = "%H:%M\n%d-%b" + info_fmt[year_start] = "%H:%M\n%d-%b\n%Y" + + def _second_finder(label_interval): + minute_start = period_break(dates_, "minute") + _second = dates_.second + _prev_second = (dates_ - 1 * dates_.freq).second + second_start = (_second - _prev_second) != 0 + info["maj"][minute_start] = True + info["min"][second_start & (_second % label_interval == 0)] = True + year_start = period_break(dates_, "year") + info_fmt = info["fmt"] + info_fmt[second_start & (_second % label_interval == 0)] = "%H:%M:%S" + info_fmt[day_start] = "%H:%M:%S\n%d-%b" + info_fmt[year_start] = "%H:%M:%S\n%d-%b\n%Y" + + if span < periodsperday / 12000.0: + _second_finder(1) + elif span < periodsperday / 6000.0: + _second_finder(2) + elif span < periodsperday / 2400.0: + _second_finder(5) + elif span < periodsperday / 1200.0: + _second_finder(10) + elif span < periodsperday / 800.0: + _second_finder(15) + elif span < periodsperday / 400.0: + _second_finder(30) + elif span < periodsperday / 150.0: + _minute_finder(1) + elif span < periodsperday / 70.0: + _minute_finder(2) + elif span < periodsperday / 24.0: + _minute_finder(5) + elif span < periodsperday / 12.0: + _minute_finder(15) + elif span < periodsperday / 6.0: + _minute_finder(30) + elif span < periodsperday / 2.5: + _hour_finder(1, False) + elif span < periodsperday / 1.5: + _hour_finder(2, False) + elif span < periodsperday * 1.25: + _hour_finder(3, False) + elif span < periodsperday * 2.5: + _hour_finder(6, True) + elif span < periodsperday * 4: + _hour_finder(12, True) + else: + info_maj[month_start] = True + info_min[day_start] = True + year_start = period_break(dates_, "year") + info_fmt = info["fmt"] + info_fmt[day_start] = "%d" + info_fmt[month_start] = "%d\n%b" + info_fmt[year_start] = "%d\n%b\n%Y" + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(day_start)] = "%d\n%b\n%Y" + else: + info_fmt[first_label(month_start)] = "%d\n%b\n%Y" + + # Case 2. Less than three months + elif span <= periodsperyear // 4: + month_start = period_break(dates_, "month") + info_maj[month_start] = True + if freq < FreqGroup.FR_HR: + info["min"] = True + else: + day_start = period_break(dates_, "day") + info["min"][day_start] = True + week_start = period_break(dates_, "week") + year_start = period_break(dates_, "year") + info_fmt[week_start] = "%d" + info_fmt[month_start] = "\n\n%b" + info_fmt[year_start] = "\n\n%b\n%Y" + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(week_start)] = "\n\n%b\n%Y" + else: + info_fmt[first_label(month_start)] = "\n\n%b\n%Y" + # Case 3. Less than 14 months ............... + elif span <= 1.15 * periodsperyear: + year_start = period_break(dates_, "year") + month_start = period_break(dates_, "month") + week_start = period_break(dates_, "week") + info_maj[month_start] = True + info_min[week_start] = True + info_min[year_start] = False + info_min[month_start] = False + info_fmt[month_start] = "%b" + info_fmt[year_start] = "%b\n%Y" + if not has_level_label(year_start, vmin_orig): + info_fmt[first_label(month_start)] = "%b\n%Y" + # Case 4. Less than 2.5 years ............... + elif span <= 2.5 * periodsperyear: + year_start = period_break(dates_, "year") + quarter_start = period_break(dates_, "quarter") + month_start = period_break(dates_, "month") + info_maj[quarter_start] = True + info_min[month_start] = True + info_fmt[quarter_start] = "%b" + info_fmt[year_start] = "%b\n%Y" + # Case 4. Less than 4 years ................. + elif span <= 4 * periodsperyear: + year_start = period_break(dates_, "year") + month_start = period_break(dates_, "month") + info_maj[year_start] = True + info_min[month_start] = True + info_min[year_start] = False + + month_break = dates_[month_start].month + jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] + info_fmt[jan_or_jul] = "%b" + info_fmt[year_start] = "%b\n%Y" + # Case 5. Less than 11 years ................ + elif span <= 11 * periodsperyear: + year_start = period_break(dates_, "year") + quarter_start = period_break(dates_, "quarter") + info_maj[year_start] = True + info_min[quarter_start] = True + info_min[year_start] = False + info_fmt[year_start] = "%Y" + # Case 6. More than 12 years ................ + else: + year_start = period_break(dates_, "year") + year_break = dates_[year_start].year + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(year_break % maj_anndef == 0)] + info_maj[major_idx] = True + minor_idx = year_start[(year_break % min_anndef == 0)] + info_min[minor_idx] = True + info_fmt[major_idx] = "%Y" + + return info + + +def _monthly_finder(vmin, vmax, freq): + periodsperyear = 12 + + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + # Initialize the output + info = np.zeros( + span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S8")] + ) + info["val"] = np.arange(vmin, vmax + 1) + dates_ = info["val"] + info["fmt"] = "" + year_start = (dates_ % 12 == 0).nonzero()[0] + info_maj = info["maj"] + info_fmt = info["fmt"] + + if span <= 1.15 * periodsperyear: + info_maj[year_start] = True + info["min"] = True + + info_fmt[:] = "%b" + info_fmt[year_start] = "%b\n%Y" + + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = "%b\n%Y" + + elif span <= 2.5 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + # TODO: Check the following : is it really info['fmt'] ? + info["fmt"][quarter_start] = True + info["min"] = True + + info_fmt[quarter_start] = "%b" + info_fmt[year_start] = "%b\n%Y" + + elif span <= 4 * periodsperyear: + info_maj[year_start] = True + info["min"] = True + + jan_or_jul = (dates_ % 12 == 0) | (dates_ % 12 == 6) + info_fmt[jan_or_jul] = "%b" + info_fmt[year_start] = "%b\n%Y" + + elif span <= 11 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + info["min"][quarter_start] = True + + info_fmt[year_start] = "%Y" + + else: + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + years = dates_[year_start] // 12 + 1 + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info["min"][year_start[(years % min_anndef == 0)]] = True + + info_fmt[major_idx] = "%Y" + + return info + + +def _quarterly_finder(vmin, vmax, freq): + periodsperyear = 4 + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + info = np.zeros( + span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S8")] + ) + info["val"] = np.arange(vmin, vmax + 1) + info["fmt"] = "" + dates_ = info["val"] + info_maj = info["maj"] + info_fmt = info["fmt"] + year_start = (dates_ % 4 == 0).nonzero()[0] + + if span <= 3.5 * periodsperyear: + info_maj[year_start] = True + info["min"] = True + + info_fmt[:] = "Q%q" + info_fmt[year_start] = "Q%q\n%F" + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = "Q%q\n%F" + + elif span <= 11 * periodsperyear: + info_maj[year_start] = True + info["min"] = True + info_fmt[year_start] = "%F" + + else: + years = dates_[year_start] // 4 + 1 + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info["min"][year_start[(years % min_anndef == 0)]] = True + info_fmt[major_idx] = "%F" + + return info + + +def _annual_finder(vmin, vmax, freq): + (vmin, vmax) = (int(vmin), int(vmax + 1)) + span = vmax - vmin + 1 + + info = np.zeros( + span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S8")] + ) + info["val"] = np.arange(vmin, vmax + 1) + info["fmt"] = "" + dates_ = info["val"] + + (min_anndef, maj_anndef) = _get_default_annual_spacing(span) + major_idx = dates_ % maj_anndef == 0 + info["maj"][major_idx] = True + info["min"][(dates_ % min_anndef == 0)] = True + info["fmt"][major_idx] = "%Y" + + return info + + +def get_finder(freq): + if isinstance(freq, str): + freq = get_freq(freq) + fgroup = resolution.get_freq_group(freq) + + if fgroup == FreqGroup.FR_ANN: + return _annual_finder + elif fgroup == FreqGroup.FR_QTR: + return _quarterly_finder + elif freq == FreqGroup.FR_MTH: + return _monthly_finder + elif (freq >= FreqGroup.FR_BUS) or fgroup == FreqGroup.FR_WK: + return _daily_finder + else: # pragma: no cover + raise NotImplementedError(f"Unsupported frequency: {freq}") + + +class TimeSeries_DateLocator(Locator): + """ + Locates the ticks along an axis controlled by a :class:`Series`. + + Parameters + ---------- + freq : {var} + Valid frequency specifier. + minor_locator : {False, True}, optional + Whether the locator is for minor ticks (True) or not. + dynamic_mode : {True, False}, optional + Whether the locator should work in dynamic mode. + base : {int}, optional + quarter : {int}, optional + month : {int}, optional + day : {int}, optional + """ + + def __init__( + self, + freq, + minor_locator=False, + dynamic_mode=True, + base=1, + quarter=1, + month=1, + day=1, + plot_obj=None, + ): + if isinstance(freq, str): + freq = get_freq(freq) + self.freq = freq + self.base = base + (self.quarter, self.month, self.day) = (quarter, month, day) + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _get_default_locs(self, vmin, vmax): + "Returns the default locations of ticks." + + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + + locator = self.plot_obj.date_axis_info + + if self.isminor: + return np.compress(locator["min"], locator["val"]) + return np.compress(locator["maj"], locator["val"]) + + def __call__(self): + "Return the locations of the ticks." + # axis calls Locator.set_axis inside set_m_formatter + + vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + vmin, vmax = vi + if vmax < vmin: + vmin, vmax = vmax, vmin + if self.isdynamic: + locs = self._get_default_locs(vmin, vmax) + else: # pragma: no cover + base = self.base + (d, m) = divmod(vmin, base) + vmin = (d + 1) * base + locs = list(range(vmin, vmax + 1, base)) + return locs + + def autoscale(self): + """ + Sets the view limits to the nearest multiples of base that contain the + data. + """ + # requires matplotlib >= 0.98.0 + (vmin, vmax) = self.axis.get_data_interval() + + locs = self._get_default_locs(vmin, vmax) + (vmin, vmax) = locs[[0, -1]] + if vmin == vmax: + vmin -= 1 + vmax += 1 + return nonsingular(vmin, vmax) + + +# ------------------------------------------------------------------------- +# --- Formatter --- +# ------------------------------------------------------------------------- + + +class TimeSeries_DateFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`PeriodIndex`. + + Parameters + ---------- + freq : {int, string} + Valid frequency specifier. + minor_locator : {False, True} + Whether the current formatter should apply to minor ticks (True) or + major ticks (False). + dynamic_mode : {True, False} + Whether the formatter works in dynamic mode or not. + """ + + def __init__(self, freq, minor_locator=False, dynamic_mode=True, plot_obj=None): + if isinstance(freq, str): + freq = get_freq(freq) + self.format = None + self.freq = freq + self.locs = [] + self.formatdict = None + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _set_default_format(self, vmin, vmax): + "Returns the default ticks spacing." + + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + info = self.plot_obj.date_axis_info + + if self.isminor: + format = np.compress(info["min"] & np.logical_not(info["maj"]), info) + else: + format = np.compress(info["maj"], info) + self.formatdict = {x: f for (x, _, _, f) in format} + return self.formatdict + + def set_locs(self, locs): + "Sets the locations of the ticks" + # don't actually use the locs. This is just needed to work with + # matplotlib. Force to use vmin, vmax + + self.locs = locs + + (vmin, vmax) = vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + if vmax < vmin: + (vmin, vmax) = (vmax, vmin) + self._set_default_format(vmin, vmax) + + def __call__(self, x, pos=0): + + if self.formatdict is None: + return "" + else: + fmt = self.formatdict.pop(x, "") + if isinstance(fmt, np.bytes_): + fmt = fmt.decode("utf-8") + return Period(ordinal=int(x), freq=self.freq).strftime(fmt) + + +class TimeSeries_TimedeltaFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`. + """ + + @staticmethod + def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10 ** (n_decimals - 9)) + s = f"{int(h):02d}:{int(m):02d}:{int(s):02d}" + if n_decimals > 0: + s += f".{decimals:0{n_decimals}d}" + if d != 0: + s = f"{int(d):d} days {s}" + return s + + def __call__(self, x, pos=0): + (vmin, vmax) = tuple(self.axis.get_view_interval()) + n_decimals = int(np.ceil(np.log10(100 * 1e9 / (vmax - vmin)))) + if n_decimals > 9: + n_decimals = 9 + return self.format_timedelta_ticks(x, pos, n_decimals) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py new file mode 100644 index 00000000..2d68bb46 --- /dev/null +++ b/pandas/plotting/_matplotlib/core.py @@ -0,0 +1,1517 @@ +import re +from typing import Optional +import warnings + +import numpy as np + +from pandas.errors import AbstractMethodError +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.common import ( + is_hashable, + is_integer, + is_iterator, + is_list_like, + is_number, +) +from pandas.core.dtypes.generic import ( + ABCDataFrame, + ABCIndexClass, + ABCMultiIndex, + ABCPeriodIndex, + ABCSeries, +) +from pandas.core.dtypes.missing import isna, notna + +import pandas.core.common as com + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0 +from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters +from pandas.plotting._matplotlib.style import _get_standard_colors +from pandas.plotting._matplotlib.tools import ( + _flatten, + _get_all_lines, + _get_xlim, + _handle_shared_axes, + _subplots, + format_date_labels, + table, +) + + +class MPLPlot: + """ + Base class for assembling a pandas plot using matplotlib + + Parameters + ---------- + data : + + """ + + @property + def _kind(self): + """Specify kind str. Must be overridden in child class""" + raise NotImplementedError + + _layout_type = "vertical" + _default_rot = 0 + orientation: Optional[str] = None + _pop_attributes = [ + "label", + "style", + "logy", + "logx", + "loglog", + "mark_right", + "stacked", + ] + _attr_defaults = { + "logy": False, + "logx": False, + "loglog": False, + "mark_right": True, + "stacked": False, + } + + def __init__( + self, + data, + kind=None, + by=None, + subplots=False, + sharex=None, + sharey=False, + use_index=True, + figsize=None, + grid=None, + legend=True, + rot=None, + ax=None, + fig=None, + title=None, + xlim=None, + ylim=None, + xticks=None, + yticks=None, + sort_columns=False, + fontsize=None, + secondary_y=False, + colormap=None, + table=False, + layout=None, + include_bool=False, + **kwds, + ): + + import matplotlib.pyplot as plt + + self.data = data + self.by = by + + self.kind = kind + + self.sort_columns = sort_columns + + self.subplots = subplots + + if sharex is None: + if ax is None: + self.sharex = True + else: + # if we get an axis, the users should do the visibility + # setting... + self.sharex = False + else: + self.sharex = sharex + + self.sharey = sharey + self.figsize = figsize + self.layout = layout + + self.xticks = xticks + self.yticks = yticks + self.xlim = xlim + self.ylim = ylim + self.title = title + self.use_index = use_index + + self.fontsize = fontsize + + if rot is not None: + self.rot = rot + # need to know for format_date_labels since it's rotated to 30 by + # default + self._rot_set = True + else: + self._rot_set = False + self.rot = self._default_rot + + if grid is None: + grid = False if secondary_y else plt.rcParams["axes.grid"] + + self.grid = grid + self.legend = legend + self.legend_handles = [] + self.legend_labels = [] + + for attr in self._pop_attributes: + value = kwds.pop(attr, self._attr_defaults.get(attr, None)) + setattr(self, attr, value) + + self.ax = ax + self.fig = fig + self.axes = None + + # parse errorbar input if given + xerr = kwds.pop("xerr", None) + yerr = kwds.pop("yerr", None) + self.errors = { + kw: self._parse_errorbars(kw, err) + for kw, err in zip(["xerr", "yerr"], [xerr, yerr]) + } + + if not isinstance(secondary_y, (bool, tuple, list, np.ndarray, ABCIndexClass)): + secondary_y = [secondary_y] + self.secondary_y = secondary_y + + # ugly TypeError if user passes matplotlib's `cmap` name. + # Probably better to accept either. + if "cmap" in kwds and colormap: + raise TypeError("Only specify one of `cmap` and `colormap`.") + elif "cmap" in kwds: + self.colormap = kwds.pop("cmap") + else: + self.colormap = colormap + + self.table = table + self.include_bool = include_bool + + self.kwds = kwds + + self._validate_color_args() + + def _validate_color_args(self): + import matplotlib.colors + + if ( + "color" in self.kwds + and self.nseries == 1 + and not is_list_like(self.kwds["color"]) + ): + # support series.plot(color='green') + self.kwds["color"] = [self.kwds["color"]] + + if ( + "color" in self.kwds + and isinstance(self.kwds["color"], tuple) + and self.nseries == 1 + and len(self.kwds["color"]) in (3, 4) + ): + # support RGB and RGBA tuples in series plot + self.kwds["color"] = [self.kwds["color"]] + + if ( + "color" in self.kwds or "colors" in self.kwds + ) and self.colormap is not None: + warnings.warn( + "'color' and 'colormap' cannot be used simultaneously. Using 'color'" + ) + + if "color" in self.kwds and self.style is not None: + if is_list_like(self.style): + styles = self.style + else: + styles = [self.style] + # need only a single match + for s in styles: + for char in s: + if char in matplotlib.colors.BASE_COLORS: + raise ValueError( + "Cannot pass 'style' string with a color symbol and " + "'color' keyword argument. Please use one or the other or " + "pass 'style' without a color symbol" + ) + + def _iter_data(self, data=None, keep_index=False, fillna=None): + if data is None: + data = self.data + if fillna is not None: + data = data.fillna(fillna) + + for col, values in data.items(): + if keep_index is True: + yield col, values + else: + yield col, values.values + + @property + def nseries(self): + if self.data.ndim == 1: + return 1 + else: + return self.data.shape[1] + + def draw(self): + self.plt.draw_if_interactive() + + def generate(self): + self._args_adjust() + self._compute_plot_data() + self._setup_subplots() + self._make_plot() + self._add_table() + self._make_legend() + self._adorn_subplots() + + for ax in self.axes: + self._post_plot_logic_common(ax, self.data) + self._post_plot_logic(ax, self.data) + + def _args_adjust(self): + pass + + def _has_plotted_object(self, ax): + """check whether ax has data""" + return len(ax.lines) != 0 or len(ax.artists) != 0 or len(ax.containers) != 0 + + def _maybe_right_yaxis(self, ax, axes_num): + if not self.on_right(axes_num): + # secondary axes may be passed via ax kw + return self._get_ax_layer(ax) + + if hasattr(ax, "right_ax"): + # if it has right_ax proparty, ``ax`` must be left axes + return ax.right_ax + elif hasattr(ax, "left_ax"): + # if it has left_ax proparty, ``ax`` must be right axes + return ax + else: + # otherwise, create twin axes + orig_ax, new_ax = ax, ax.twinx() + # TODO: use Matplotlib public API when available + new_ax._get_lines = orig_ax._get_lines + new_ax._get_patches_for_fill = orig_ax._get_patches_for_fill + orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax + + if not self._has_plotted_object(orig_ax): # no data on left y + orig_ax.get_yaxis().set_visible(False) + + if self.logy is True or self.loglog is True: + new_ax.set_yscale("log") + elif self.logy == "sym" or self.loglog == "sym": + new_ax.set_yscale("symlog") + return new_ax + + def _setup_subplots(self): + if self.subplots: + fig, axes = _subplots( + naxes=self.nseries, + sharex=self.sharex, + sharey=self.sharey, + figsize=self.figsize, + ax=self.ax, + layout=self.layout, + layout_type=self._layout_type, + ) + else: + if self.ax is None: + fig = self.plt.figure(figsize=self.figsize) + axes = fig.add_subplot(111) + else: + fig = self.ax.get_figure() + if self.figsize is not None: + fig.set_size_inches(self.figsize) + axes = self.ax + + axes = _flatten(axes) + + valid_log = {False, True, "sym", None} + input_log = {self.logx, self.logy, self.loglog} + if input_log - valid_log: + invalid_log = next(iter((input_log - valid_log))) + raise ValueError( + f"Boolean, None and 'sym' are valid options, '{invalid_log}' is given." + ) + + if self.logx is True or self.loglog is True: + [a.set_xscale("log") for a in axes] + elif self.logx == "sym" or self.loglog == "sym": + [a.set_xscale("symlog") for a in axes] + + if self.logy is True or self.loglog is True: + [a.set_yscale("log") for a in axes] + elif self.logy == "sym" or self.loglog == "sym": + [a.set_yscale("symlog") for a in axes] + + self.fig = fig + self.axes = axes + + @property + def result(self): + """ + Return result axes + """ + if self.subplots: + if self.layout is not None and not is_list_like(self.ax): + return self.axes.reshape(*self.layout) + else: + return self.axes + else: + sec_true = isinstance(self.secondary_y, bool) and self.secondary_y + all_sec = ( + is_list_like(self.secondary_y) and len(self.secondary_y) == self.nseries + ) + if sec_true or all_sec: + # if all data is plotted on secondary, return right axes + return self._get_ax_layer(self.axes[0], primary=False) + else: + return self.axes[0] + + def _compute_plot_data(self): + data = self.data + + if isinstance(data, ABCSeries): + label = self.label + if label is None and data.name is None: + label = "None" + data = data.to_frame(name=label) + + # GH16953, _convert is needed as fallback, for ``Series`` + # with ``dtype == object`` + data = data._convert(datetime=True, timedelta=True) + include_type = [np.number, "datetime", "datetimetz", "timedelta"] + + # GH23719, allow plotting boolean + if self.include_bool is True: + include_type.append(np.bool_) + + # GH22799, exclude datatime-like type for boxplot + exclude_type = None + if self._kind == "box": + # TODO: change after solving issue 27881 + include_type = [np.number] + exclude_type = ["timedelta"] + + # GH 18755, include object and category type for scatter plot + if self._kind == "scatter": + include_type.extend(["object", "category"]) + + numeric_data = data.select_dtypes(include=include_type, exclude=exclude_type) + + try: + is_empty = numeric_data.columns.empty + except AttributeError: + is_empty = not len(numeric_data) + + # no non-numeric frames or series allowed + if is_empty: + raise TypeError("no numeric data to plot") + + # GH25587: cast ExtensionArray of pandas (IntegerArray, etc.) to + # np.ndarray before plot. + numeric_data = numeric_data.copy() + for col in numeric_data: + numeric_data[col] = np.asarray(numeric_data[col]) + + self.data = numeric_data + + def _make_plot(self): + raise AbstractMethodError(self) + + def _add_table(self): + if self.table is False: + return + elif self.table is True: + data = self.data.transpose() + else: + data = self.table + ax = self._get_ax(0) + table(ax, data) + + def _post_plot_logic_common(self, ax, data): + """Common post process for each axes""" + + if self.orientation == "vertical" or self.orientation is None: + self._apply_axis_properties(ax.xaxis, rot=self.rot, fontsize=self.fontsize) + self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) + + if hasattr(ax, "right_ax"): + self._apply_axis_properties(ax.right_ax.yaxis, fontsize=self.fontsize) + + elif self.orientation == "horizontal": + self._apply_axis_properties(ax.yaxis, rot=self.rot, fontsize=self.fontsize) + self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize) + + if hasattr(ax, "right_ax"): + self._apply_axis_properties(ax.right_ax.yaxis, fontsize=self.fontsize) + else: # pragma no cover + raise ValueError + + def _post_plot_logic(self, ax, data): + """Post process for each axes. Overridden in child classes""" + pass + + def _adorn_subplots(self): + """Common post process unrelated to data""" + if len(self.axes) > 0: + all_axes = self._get_subplots() + nrows, ncols = self._get_axes_layout() + _handle_shared_axes( + axarr=all_axes, + nplots=len(all_axes), + naxes=nrows * ncols, + nrows=nrows, + ncols=ncols, + sharex=self.sharex, + sharey=self.sharey, + ) + + for ax in self.axes: + if self.yticks is not None: + ax.set_yticks(self.yticks) + + if self.xticks is not None: + ax.set_xticks(self.xticks) + + if self.ylim is not None: + ax.set_ylim(self.ylim) + + if self.xlim is not None: + ax.set_xlim(self.xlim) + + ax.grid(self.grid) + + if self.title: + if self.subplots: + if is_list_like(self.title): + if len(self.title) != self.nseries: + raise ValueError( + "The length of `title` must equal the number " + "of columns if using `title` of type `list` " + "and `subplots=True`.\n" + f"length of title = {len(self.title)}\n" + f"number of columns = {self.nseries}" + ) + + for (ax, title) in zip(self.axes, self.title): + ax.set_title(title) + else: + self.fig.suptitle(self.title) + else: + if is_list_like(self.title): + msg = ( + "Using `title` of type `list` is not supported " + "unless `subplots=True` is passed" + ) + raise ValueError(msg) + self.axes[0].set_title(self.title) + + def _apply_axis_properties(self, axis, rot=None, fontsize=None): + """ Tick creation within matplotlib is reasonably expensive and is + internally deferred until accessed as Ticks are created/destroyed + multiple times per draw. It's therefore beneficial for us to avoid + accessing unless we will act on the Tick. + """ + + if rot is not None or fontsize is not None: + # rot=0 is a valid setting, hence the explicit None check + labels = axis.get_majorticklabels() + axis.get_minorticklabels() + for label in labels: + if rot is not None: + label.set_rotation(rot) + if fontsize is not None: + label.set_fontsize(fontsize) + + @property + def legend_title(self): + if not isinstance(self.data.columns, ABCMultiIndex): + name = self.data.columns.name + if name is not None: + name = pprint_thing(name) + return name + else: + stringified = map(pprint_thing, self.data.columns.names) + return ",".join(stringified) + + def _add_legend_handle(self, handle, label, index=None): + if label is not None: + if self.mark_right and index is not None: + if self.on_right(index): + label = label + " (right)" + self.legend_handles.append(handle) + self.legend_labels.append(label) + + def _make_legend(self): + ax, leg, handle = self._get_ax_legend_handle(self.axes[0]) + + handles = [] + labels = [] + title = "" + + if not self.subplots: + if leg is not None: + title = leg.get_title().get_text() + # Replace leg.LegendHandles because it misses marker info + handles.extend(handle) + labels = [x.get_text() for x in leg.get_texts()] + + if self.legend: + if self.legend == "reverse": + self.legend_handles = reversed(self.legend_handles) + self.legend_labels = reversed(self.legend_labels) + + handles += self.legend_handles + labels += self.legend_labels + + if self.legend_title is not None: + title = self.legend_title + + if len(handles) > 0: + ax.legend(handles, labels, loc="best", title=title) + + elif self.subplots and self.legend: + for ax in self.axes: + if ax.get_visible(): + ax.legend(loc="best") + + def _get_ax_legend_handle(self, ax): + """ + Take in axes and return ax, legend and handle under different scenarios + """ + leg = ax.get_legend() + + # Get handle from axes + handle, _ = ax.get_legend_handles_labels() + other_ax = getattr(ax, "left_ax", None) or getattr(ax, "right_ax", None) + other_leg = None + if other_ax is not None: + other_leg = other_ax.get_legend() + if leg is None and other_leg is not None: + leg = other_leg + ax = other_ax + return ax, leg, handle + + @cache_readonly + def plt(self): + import matplotlib.pyplot as plt + + return plt + + _need_to_set_index = False + + def _get_xticks(self, convert_period=False): + index = self.data.index + is_datetype = index.inferred_type in ("datetime", "date", "datetime64", "time") + + if self.use_index: + if convert_period and isinstance(index, ABCPeriodIndex): + self.data = self.data.reindex(index=index.sort_values()) + x = self.data.index.to_timestamp()._mpl_repr() + elif index.is_numeric(): + """ + Matplotlib supports numeric values or datetime objects as + xaxis values. Taking LBYL approach here, by the time + matplotlib raises exception when using non numeric/datetime + values for xaxis, several actions are already taken by plt. + """ + x = index._mpl_repr() + elif is_datetype: + self.data = self.data[notna(self.data.index)] + self.data = self.data.sort_index() + x = self.data.index._mpl_repr() + else: + self._need_to_set_index = True + x = list(range(len(index))) + else: + x = list(range(len(index))) + + return x + + @classmethod + @register_pandas_matplotlib_converters + def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): + mask = isna(y) + if mask.any(): + y = np.ma.array(y) + y = np.ma.masked_where(mask, y) + + if isinstance(x, ABCIndexClass): + x = x._mpl_repr() + + if is_errorbar: + if "xerr" in kwds: + kwds["xerr"] = np.array(kwds.get("xerr")) + if "yerr" in kwds: + kwds["yerr"] = np.array(kwds.get("yerr")) + return ax.errorbar(x, y, **kwds) + else: + # prevent style kwarg from going to errorbar, where it is + # unsupported + if style is not None: + args = (x, y, style) + else: + args = (x, y) + return ax.plot(*args, **kwds) + + def _get_index_name(self): + if isinstance(self.data.index, ABCMultiIndex): + name = self.data.index.names + if com.any_not_none(*name): + name = ",".join(pprint_thing(x) for x in name) + else: + name = None + else: + name = self.data.index.name + if name is not None: + name = pprint_thing(name) + + return name + + @classmethod + def _get_ax_layer(cls, ax, primary=True): + """get left (primary) or right (secondary) axes""" + if primary: + return getattr(ax, "left_ax", ax) + else: + return getattr(ax, "right_ax", ax) + + def _get_ax(self, i): + # get the twinx ax if appropriate + if self.subplots: + ax = self.axes[i] + ax = self._maybe_right_yaxis(ax, i) + self.axes[i] = ax + else: + ax = self.axes[0] + ax = self._maybe_right_yaxis(ax, i) + + ax.get_yaxis().set_visible(True) + return ax + + @classmethod + def get_default_ax(cls, ax): + import matplotlib.pyplot as plt + + if ax is None and len(plt.get_fignums()) > 0: + with plt.rc_context(): + ax = plt.gca() + ax = cls._get_ax_layer(ax) + + def on_right(self, i): + if isinstance(self.secondary_y, bool): + return self.secondary_y + + if isinstance(self.secondary_y, (tuple, list, np.ndarray, ABCIndexClass)): + return self.data.columns[i] in self.secondary_y + + def _apply_style_colors(self, colors, kwds, col_num, label): + """ + Manage style and color based on column number and its label. + Returns tuple of appropriate style and kwds which "color" may be added. + """ + style = None + if self.style is not None: + if isinstance(self.style, list): + try: + style = self.style[col_num] + except IndexError: + pass + elif isinstance(self.style, dict): + style = self.style.get(label, style) + else: + style = self.style + + has_color = "color" in kwds or self.colormap is not None + nocolor_style = style is None or re.match("[a-z]+", style) is None + if (has_color or self.subplots) and nocolor_style: + kwds["color"] = colors[col_num % len(colors)] + return style, kwds + + def _get_colors(self, num_colors=None, color_kwds="color"): + if num_colors is None: + num_colors = self.nseries + + return _get_standard_colors( + num_colors=num_colors, + colormap=self.colormap, + color=self.kwds.get(color_kwds), + ) + + def _parse_errorbars(self, label, err): + """ + Look for error keyword arguments and return the actual errorbar data + or return the error DataFrame/dict + + Error bars can be specified in several ways: + Series: the user provides a pandas.Series object of the same + length as the data + ndarray: provides a np.ndarray of the same length as the data + DataFrame/dict: error values are paired with keys matching the + key in the plotted DataFrame + str: the name of the column within the plotted DataFrame + """ + + if err is None: + return None + + def match_labels(data, e): + e = e.reindex(data.index) + return e + + # key-matched DataFrame + if isinstance(err, ABCDataFrame): + + err = match_labels(self.data, err) + # key-matched dict + elif isinstance(err, dict): + pass + + # Series of error values + elif isinstance(err, ABCSeries): + # broadcast error series across data + err = match_labels(self.data, err) + err = np.atleast_2d(err) + err = np.tile(err, (self.nseries, 1)) + + # errors are a column in the dataframe + elif isinstance(err, str): + evalues = self.data[err].values + self.data = self.data[self.data.columns.drop(err)] + err = np.atleast_2d(evalues) + err = np.tile(err, (self.nseries, 1)) + + elif is_list_like(err): + if is_iterator(err): + err = np.atleast_2d(list(err)) + else: + # raw error values + err = np.atleast_2d(err) + + err_shape = err.shape + + # asymmetrical error bars + if err.ndim == 3: + if ( + (err_shape[0] != self.nseries) + or (err_shape[1] != 2) + or (err_shape[2] != len(self.data)) + ): + raise ValueError( + "Asymmetrical error bars should be provided " + f"with the shape ({self.nseries}, 2, {len(self.data)})" + ) + + # broadcast errors to each data series + if len(err) == 1: + err = np.tile(err, (self.nseries, 1)) + + elif is_number(err): + err = np.tile([err], (self.nseries, len(self.data))) + + else: + msg = f"No valid {label} detected" + raise ValueError(msg) + + return err + + def _get_errorbars(self, label=None, index=None, xerr=True, yerr=True): + errors = {} + + for kw, flag in zip(["xerr", "yerr"], [xerr, yerr]): + if flag: + err = self.errors[kw] + # user provided label-matched dataframe of errors + if isinstance(err, (ABCDataFrame, dict)): + if label is not None and label in err.keys(): + err = err[label] + else: + err = None + elif index is not None and err is not None: + err = err[index] + + if err is not None: + errors[kw] = err + return errors + + def _get_subplots(self): + from matplotlib.axes import Subplot + + return [ + ax for ax in self.axes[0].get_figure().get_axes() if isinstance(ax, Subplot) + ] + + def _get_axes_layout(self): + axes = self._get_subplots() + x_set = set() + y_set = set() + for ax in axes: + # check axes coordinates to estimate layout + points = ax.get_position().get_points() + x_set.add(points[0][0]) + y_set.add(points[0][1]) + return (len(y_set), len(x_set)) + + +class PlanePlot(MPLPlot): + """ + Abstract class for plotting on plane, currently scatter and hexbin. + """ + + _layout_type = "single" + + def __init__(self, data, x, y, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + if x is None or y is None: + raise ValueError(self._kind + " requires an x and y column") + if is_integer(x) and not self.data.columns.holds_integer(): + x = self.data.columns[x] + if is_integer(y) and not self.data.columns.holds_integer(): + y = self.data.columns[y] + + # Scatter plot allows to plot objects data + if self._kind == "hexbin": + if len(self.data[x]._get_numeric_data()) == 0: + raise ValueError(self._kind + " requires x column to be numeric") + if len(self.data[y]._get_numeric_data()) == 0: + raise ValueError(self._kind + " requires y column to be numeric") + + self.x = x + self.y = y + + @property + def nseries(self): + return 1 + + def _post_plot_logic(self, ax, data): + x, y = self.x, self.y + ax.set_ylabel(pprint_thing(y)) + ax.set_xlabel(pprint_thing(x)) + + def _plot_colorbar(self, ax, **kwds): + # Addresses issues #10611 and #10678: + # When plotting scatterplots and hexbinplots in IPython + # inline backend the colorbar axis height tends not to + # exactly match the parent axis height. + # The difference is due to small fractional differences + # in floating points with similar representation. + # To deal with this, this method forces the colorbar + # height to take the height of the parent axes. + # For a more detailed description of the issue + # see the following link: + # https://github.com/ipython/ipython/issues/11215 + img = ax.collections[0] + cbar = self.fig.colorbar(img, ax=ax, **kwds) + + if _mpl_ge_3_0_0(): + # The workaround below is no longer necessary. + return + + points = ax.get_position().get_points() + cbar_points = cbar.ax.get_position().get_points() + + cbar.ax.set_position( + [ + cbar_points[0, 0], + points[0, 1], + cbar_points[1, 0] - cbar_points[0, 0], + points[1, 1] - points[0, 1], + ] + ) + # To see the discrepancy in axis heights uncomment + # the following two lines: + # print(points[1, 1] - points[0, 1]) + # print(cbar_points[1, 1] - cbar_points[0, 1]) + + +class ScatterPlot(PlanePlot): + _kind = "scatter" + + def __init__(self, data, x, y, s=None, c=None, **kwargs): + if s is None: + # hide the matplotlib default for size, in case we want to change + # the handling of this argument later + s = 20 + super().__init__(data, x, y, s=s, **kwargs) + if is_integer(c) and not self.data.columns.holds_integer(): + c = self.data.columns[c] + self.c = c + + def _make_plot(self): + x, y, c, data = self.x, self.y, self.c, self.data + ax = self.axes[0] + + c_is_column = is_hashable(c) and c in self.data.columns + + # plot a colorbar only if a colormap is provided or necessary + cb = self.kwds.pop("colorbar", self.colormap or c_is_column) + + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or "Greys" + cmap = self.plt.cm.get_cmap(cmap) + color = self.kwds.pop("color", None) + if c is not None and color is not None: + raise TypeError("Specify exactly one of `c` and `color`") + elif c is None and color is None: + c_values = self.plt.rcParams["patch.facecolor"] + elif color is not None: + c_values = color + elif c_is_column: + c_values = self.data[c].values + else: + c_values = c + + if self.legend and hasattr(self, "label"): + label = self.label + else: + label = None + scatter = ax.scatter( + data[x].values, + data[y].values, + c=c_values, + label=label, + cmap=cmap, + **self.kwds, + ) + if cb: + cbar_label = c if c_is_column else "" + self._plot_colorbar(ax, label=cbar_label) + + if label is not None: + self._add_legend_handle(scatter, label) + else: + self.legend = False + + errors_x = self._get_errorbars(label=x, index=0, yerr=False) + errors_y = self._get_errorbars(label=y, index=0, xerr=False) + if len(errors_x) > 0 or len(errors_y) > 0: + err_kwds = dict(errors_x, **errors_y) + err_kwds["ecolor"] = scatter.get_facecolor()[0] + ax.errorbar(data[x].values, data[y].values, linestyle="none", **err_kwds) + + +class HexBinPlot(PlanePlot): + _kind = "hexbin" + + def __init__(self, data, x, y, C=None, **kwargs): + super().__init__(data, x, y, **kwargs) + if is_integer(C) and not self.data.columns.holds_integer(): + C = self.data.columns[C] + self.C = C + + def _make_plot(self): + x, y, data, C = self.x, self.y, self.data, self.C + ax = self.axes[0] + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or "BuGn" + cmap = self.plt.cm.get_cmap(cmap) + cb = self.kwds.pop("colorbar", True) + + if C is None: + c_values = None + else: + c_values = data[C].values + + ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap, **self.kwds) + if cb: + self._plot_colorbar(ax) + + def _make_legend(self): + pass + + +class LinePlot(MPLPlot): + _kind = "line" + _default_rot = 0 + orientation = "vertical" + + def __init__(self, data, **kwargs): + from pandas.plotting import plot_params + + MPLPlot.__init__(self, data, **kwargs) + if self.stacked: + self.data = self.data.fillna(value=0) + self.x_compat = plot_params["x_compat"] + if "x_compat" in self.kwds: + self.x_compat = bool(self.kwds.pop("x_compat")) + + def _is_ts_plot(self): + # this is slightly deceptive + return not self.x_compat and self.use_index and self._use_dynamic_x() + + def _use_dynamic_x(self): + from pandas.plotting._matplotlib.timeseries import _use_dynamic_x + + return _use_dynamic_x(self._get_ax(0), self.data) + + def _make_plot(self): + if self._is_ts_plot(): + from pandas.plotting._matplotlib.timeseries import _maybe_convert_index + + data = _maybe_convert_index(self._get_ax(0), self.data) + + x = data.index # dummy, not used + plotf = self._ts_plot + it = self._iter_data(data=data, keep_index=True) + else: + x = self._get_xticks(convert_period=True) + plotf = self._plot + it = self._iter_data() + + stacking_id = self._get_stacking_id() + is_errorbar = com.any_not_none(*self.errors.values()) + + colors = self._get_colors() + for i, (label, y) in enumerate(it): + ax = self._get_ax(i) + kwds = self.kwds.copy() + style, kwds = self._apply_style_colors(colors, kwds, i, label) + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) # .encode('utf-8') + kwds["label"] = label + + newlines = plotf( + ax, + x, + y, + style=style, + column_num=i, + stacking_id=stacking_id, + is_errorbar=is_errorbar, + **kwds, + ) + self._add_legend_handle(newlines[0], label, index=i) + + if self._is_ts_plot(): + + # reset of xlim should be used for ts data + # TODO: GH28021, should find a way to change view limit on xaxis + lines = _get_all_lines(ax) + left, right = _get_xlim(lines) + ax.set_xlim(left, right) + + @classmethod + def _plot(cls, ax, x, y, style=None, column_num=None, stacking_id=None, **kwds): + # column_num is used to get the target column from protf in line and + # area plots + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds["label"]) + lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds) + cls._update_stacker(ax, stacking_id, y) + return lines + + @classmethod + def _ts_plot(cls, ax, x, data, style=None, **kwds): + from pandas.plotting._matplotlib.timeseries import ( + _maybe_resample, + _decorate_axes, + format_dateaxis, + ) + + # accept x to be consistent with normal plot func, + # x is not passed to tsplot as it uses data.index as x coordinate + # column_num must be in kwds for stacking purpose + freq, data = _maybe_resample(data, ax, kwds) + + # Set ax with freq info + _decorate_axes(ax, freq, kwds) + # digging deeper + if hasattr(ax, "left_ax"): + _decorate_axes(ax.left_ax, freq, kwds) + if hasattr(ax, "right_ax"): + _decorate_axes(ax.right_ax, freq, kwds) + ax._plot_data.append((data, cls._kind, kwds)) + + lines = cls._plot(ax, data.index, data.values, style=style, **kwds) + # set date formatter, locators and rescale limits + format_dateaxis(ax, ax.freq, data.index) + return lines + + def _get_stacking_id(self): + if self.stacked: + return id(self.data) + else: + return None + + @classmethod + def _initialize_stacker(cls, ax, stacking_id, n): + if stacking_id is None: + return + if not hasattr(ax, "_stacker_pos_prior"): + ax._stacker_pos_prior = {} + if not hasattr(ax, "_stacker_neg_prior"): + ax._stacker_neg_prior = {} + ax._stacker_pos_prior[stacking_id] = np.zeros(n) + ax._stacker_neg_prior[stacking_id] = np.zeros(n) + + @classmethod + def _get_stacked_values(cls, ax, stacking_id, values, label): + if stacking_id is None: + return values + if not hasattr(ax, "_stacker_pos_prior"): + # stacker may not be initialized for subplots + cls._initialize_stacker(ax, stacking_id, len(values)) + + if (values >= 0).all(): + return ax._stacker_pos_prior[stacking_id] + values + elif (values <= 0).all(): + return ax._stacker_neg_prior[stacking_id] + values + + raise ValueError( + "When stacked is True, each column must be either " + "all positive or negative." + f"{label} contains both positive and negative values" + ) + + @classmethod + def _update_stacker(cls, ax, stacking_id, values): + if stacking_id is None: + return + if (values >= 0).all(): + ax._stacker_pos_prior[stacking_id] += values + elif (values <= 0).all(): + ax._stacker_neg_prior[stacking_id] += values + + def _post_plot_logic(self, ax, data): + from matplotlib.ticker import FixedLocator + + def get_label(i): + try: + return pprint_thing(data.index[i]) + except Exception: + return "" + + if self._need_to_set_index: + xticks = ax.get_xticks() + xticklabels = [get_label(x) for x in xticks] + ax.set_xticklabels(xticklabels) + ax.xaxis.set_major_locator(FixedLocator(xticks)) + + condition = ( + not self._use_dynamic_x() + and data.index.is_all_dates + and not self.subplots + or (self.subplots and self.sharex) + ) + + index_name = self._get_index_name() + + if condition: + # irregular TS rotated 30 deg. by default + # probably a better place to check / set this. + if not self._rot_set: + self.rot = 30 + format_date_labels(ax, rot=self.rot) + + if index_name is not None and self.use_index: + ax.set_xlabel(index_name) + + +class AreaPlot(LinePlot): + _kind = "area" + + def __init__(self, data, **kwargs): + kwargs.setdefault("stacked", True) + data = data.fillna(value=0) + LinePlot.__init__(self, data, **kwargs) + + if not self.stacked: + # use smaller alpha to distinguish overlap + self.kwds.setdefault("alpha", 0.5) + + if self.logy or self.loglog: + raise ValueError("Log-y scales are not supported in area plot") + + @classmethod + def _plot( + cls, + ax, + x, + y, + style=None, + column_num=None, + stacking_id=None, + is_errorbar=False, + **kwds, + ): + + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds["label"]) + + # need to remove label, because subplots uses mpl legend as it is + line_kwds = kwds.copy() + line_kwds.pop("label") + lines = MPLPlot._plot(ax, x, y_values, style=style, **line_kwds) + + # get data from the line to get coordinates for fill_between + xdata, y_values = lines[0].get_data(orig=False) + + # unable to use ``_get_stacked_values`` here to get starting point + if stacking_id is None: + start = np.zeros(len(y)) + elif (y >= 0).all(): + start = ax._stacker_pos_prior[stacking_id] + elif (y <= 0).all(): + start = ax._stacker_neg_prior[stacking_id] + else: + start = np.zeros(len(y)) + + if "color" not in kwds: + kwds["color"] = lines[0].get_color() + + rect = ax.fill_between(xdata, start, y_values, **kwds) + cls._update_stacker(ax, stacking_id, y) + + # LinePlot expects list of artists + res = [rect] + return res + + def _post_plot_logic(self, ax, data): + LinePlot._post_plot_logic(self, ax, data) + + if self.ylim is None: + if (data >= 0).all().all(): + ax.set_ylim(0, None) + elif (data <= 0).all().all(): + ax.set_ylim(None, 0) + + +class BarPlot(MPLPlot): + _kind = "bar" + _default_rot = 90 + orientation = "vertical" + + def __init__(self, data, **kwargs): + # we have to treat a series differently than a + # 1-column DataFrame w.r.t. color handling + self._is_series = isinstance(data, ABCSeries) + self.bar_width = kwargs.pop("width", 0.5) + pos = kwargs.pop("position", 0.5) + kwargs.setdefault("align", "center") + self.tick_pos = np.arange(len(data)) + + self.bottom = kwargs.pop("bottom", 0) + self.left = kwargs.pop("left", 0) + + self.log = kwargs.pop("log", False) + MPLPlot.__init__(self, data, **kwargs) + + if self.stacked or self.subplots: + self.tickoffset = self.bar_width * pos + if kwargs["align"] == "edge": + self.lim_offset = self.bar_width / 2 + else: + self.lim_offset = 0 + else: + if kwargs["align"] == "edge": + w = self.bar_width / self.nseries + self.tickoffset = self.bar_width * (pos - 0.5) + w * 0.5 + self.lim_offset = w * 0.5 + else: + self.tickoffset = self.bar_width * pos + self.lim_offset = 0 + + self.ax_pos = self.tick_pos - self.tickoffset + + def _args_adjust(self): + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + if is_list_like(self.left): + self.left = np.array(self.left) + + @classmethod + def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): + return ax.bar(x, y, w, bottom=start, log=log, **kwds) + + @property + def _start_base(self): + return self.bottom + + def _make_plot(self): + import matplotlib as mpl + + colors = self._get_colors() + ncolors = len(colors) + + pos_prior = neg_prior = np.zeros(len(self.data)) + K = self.nseries + + for i, (label, y) in enumerate(self._iter_data(fillna=0)): + ax = self._get_ax(i) + kwds = self.kwds.copy() + if self._is_series: + kwds["color"] = colors + else: + kwds["color"] = colors[i % ncolors] + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) + + if (("yerr" in kwds) or ("xerr" in kwds)) and (kwds.get("ecolor") is None): + kwds["ecolor"] = mpl.rcParams["xtick.color"] + + start = 0 + if self.log and (y >= 1).all(): + start = 1 + start = start + self._start_base + + if self.subplots: + w = self.bar_width / 2 + rect = self._plot( + ax, + self.ax_pos + w, + y, + self.bar_width, + start=start, + label=label, + log=self.log, + **kwds, + ) + ax.set_title(label) + elif self.stacked: + mask = y > 0 + start = np.where(mask, pos_prior, neg_prior) + self._start_base + w = self.bar_width / 2 + rect = self._plot( + ax, + self.ax_pos + w, + y, + self.bar_width, + start=start, + label=label, + log=self.log, + **kwds, + ) + pos_prior = pos_prior + np.where(mask, y, 0) + neg_prior = neg_prior + np.where(mask, 0, y) + else: + w = self.bar_width / K + rect = self._plot( + ax, + self.ax_pos + (i + 0.5) * w, + y, + w, + start=start, + label=label, + log=self.log, + **kwds, + ) + self._add_legend_handle(rect, label, index=i) + + def _post_plot_logic(self, ax, data): + if self.use_index: + str_index = [pprint_thing(key) for key in data.index] + else: + str_index = [pprint_thing(key) for key in range(data.shape[0])] + name = self._get_index_name() + + s_edge = self.ax_pos[0] - 0.25 + self.lim_offset + e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset + + self._decorate_ticks(ax, name, str_index, s_edge, e_edge) + + def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): + ax.set_xlim((start_edge, end_edge)) + + if self.xticks is not None: + ax.set_xticks(np.array(self.xticks)) + else: + ax.set_xticks(self.tick_pos) + ax.set_xticklabels(ticklabels) + + if name is not None and self.use_index: + ax.set_xlabel(name) + + +class BarhPlot(BarPlot): + _kind = "barh" + _default_rot = 0 + orientation = "horizontal" + + @property + def _start_base(self): + return self.left + + @classmethod + def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): + return ax.barh(x, y, w, left=start, log=log, **kwds) + + def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): + # horizontal bars + ax.set_ylim((start_edge, end_edge)) + ax.set_yticks(self.tick_pos) + ax.set_yticklabels(ticklabels) + if name is not None and self.use_index: + ax.set_ylabel(name) + + +class PiePlot(MPLPlot): + _kind = "pie" + _layout_type = "horizontal" + + def __init__(self, data, kind=None, **kwargs): + data = data.fillna(value=0) + if (data < 0).any().any(): + raise ValueError(f"{kind} doesn't allow negative values") + MPLPlot.__init__(self, data, kind=kind, **kwargs) + + def _args_adjust(self): + self.grid = False + self.logy = False + self.logx = False + self.loglog = False + + def _validate_color_args(self): + pass + + def _make_plot(self): + colors = self._get_colors(num_colors=len(self.data), color_kwds="colors") + self.kwds.setdefault("colors", colors) + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + if label is not None: + label = pprint_thing(label) + ax.set_ylabel(label) + + kwds = self.kwds.copy() + + def blank_labeler(label, value): + if value == 0: + return "" + else: + return label + + idx = [pprint_thing(v) for v in self.data.index] + labels = kwds.pop("labels", idx) + # labels is used for each wedge's labels + # Blank out labels for values of 0 so they don't overlap + # with nonzero wedges + if labels is not None: + blabels = [blank_labeler(l, value) for l, value in zip(labels, y)] + else: + blabels = None + results = ax.pie(y, labels=blabels, **kwds) + + if kwds.get("autopct", None) is not None: + patches, texts, autotexts = results + else: + patches, texts = results + autotexts = [] + + if self.fontsize is not None: + for t in texts + autotexts: + t.set_fontsize(self.fontsize) + + # leglabels is used for legend labels + leglabels = labels if labels is not None else idx + for p, l in zip(patches, leglabels): + self._add_legend_handle(p, l) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py new file mode 100644 index 00000000..f8b2c7ab --- /dev/null +++ b/pandas/plotting/_matplotlib/hist.py @@ -0,0 +1,413 @@ +import numpy as np + +from pandas.core.dtypes.common import is_integer, is_list_like +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass +from pandas.core.dtypes.missing import isna, remove_na_arraylike + +import pandas.core.common as com + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.core import LinePlot, MPLPlot +from pandas.plotting._matplotlib.tools import _flatten, _set_ticks_props, _subplots + + +class HistPlot(LinePlot): + _kind = "hist" + + def __init__(self, data, bins=10, bottom=0, **kwargs): + self.bins = bins # use mpl default + self.bottom = bottom + # Do not call LinePlot.__init__ which may fill nan + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + if is_integer(self.bins): + # create common bin edge + values = self.data._convert(datetime=True)._get_numeric_data() + values = np.ravel(values) + values = values[~isna(values)] + + _, self.bins = np.histogram( + values, + bins=self.bins, + range=self.kwds.get("range", None), + weights=self.kwds.get("weights", None), + ) + + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + + @classmethod + def _plot( + cls, + ax, + y, + style=None, + bins=None, + bottom=0, + column_num=0, + stacking_id=None, + **kwds, + ): + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(bins) - 1) + y = y[~isna(y)] + + base = np.zeros(len(bins) - 1) + bottom = bottom + cls._get_stacked_values(ax, stacking_id, base, kwds["label"]) + # ignore style + n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds) + cls._update_stacker(ax, stacking_id, n) + return patches + + def _make_plot(self): + colors = self._get_colors() + stacking_id = self._get_stacking_id() + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + + kwds = self.kwds.copy() + + label = pprint_thing(label) + kwds["label"] = label + + style, kwds = self._apply_style_colors(colors, kwds, i, label) + if style is not None: + kwds["style"] = style + + kwds = self._make_plot_keywords(kwds, y) + artists = self._plot(ax, y, column_num=i, stacking_id=stacking_id, **kwds) + self._add_legend_handle(artists[0], label, index=i) + + def _make_plot_keywords(self, kwds, y): + """merge BoxPlot/KdePlot properties to passed kwds""" + # y is required for KdePlot + kwds["bottom"] = self.bottom + kwds["bins"] = self.bins + return kwds + + def _post_plot_logic(self, ax, data): + if self.orientation == "horizontal": + ax.set_xlabel("Frequency") + else: + ax.set_ylabel("Frequency") + + @property + def orientation(self): + if self.kwds.get("orientation", None) == "horizontal": + return "horizontal" + else: + return "vertical" + + +class KdePlot(HistPlot): + _kind = "kde" + orientation = "vertical" + + def __init__(self, data, bw_method=None, ind=None, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + self.bw_method = bw_method + self.ind = ind + + def _args_adjust(self): + pass + + def _get_ind(self, y): + if self.ind is None: + # np.nanmax() and np.nanmin() ignores the missing values + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace( + np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, + 1000, + ) + elif is_integer(self.ind): + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace( + np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, + self.ind, + ) + else: + ind = self.ind + return ind + + @classmethod + def _plot( + cls, + ax, + y, + style=None, + bw_method=None, + ind=None, + column_num=None, + stacking_id=None, + **kwds, + ): + from scipy.stats import gaussian_kde + + y = remove_na_arraylike(y) + gkde = gaussian_kde(y, bw_method=bw_method) + + y = gkde.evaluate(ind) + lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) + return lines + + def _make_plot_keywords(self, kwds, y): + kwds["bw_method"] = self.bw_method + kwds["ind"] = self._get_ind(y) + return kwds + + def _post_plot_logic(self, ax, data): + ax.set_ylabel("Density") + + +def _grouped_plot( + plotf, + data, + column=None, + by=None, + numeric_only=True, + figsize=None, + sharex=True, + sharey=True, + layout=None, + rot=0, + ax=None, + **kwargs, +): + + if figsize == "default": + # allowed to specify mpl default with 'default' + raise ValueError( + "figsize='default' is no longer supported. " + "Specify figure size by tuple instead" + ) + + grouped = data.groupby(by) + if column is not None: + grouped = grouped[column] + + naxes = len(grouped) + fig, axes = _subplots( + naxes=naxes, figsize=figsize, sharex=sharex, sharey=sharey, ax=ax, layout=layout + ) + + _axes = _flatten(axes) + + for i, (key, group) in enumerate(grouped): + ax = _axes[i] + if numeric_only and isinstance(group, ABCDataFrame): + group = group._get_numeric_data() + plotf(group, ax, **kwargs) + ax.set_title(pprint_thing(key)) + + return fig, axes + + +def _grouped_hist( + data, + column=None, + by=None, + ax=None, + bins=50, + figsize=None, + layout=None, + sharex=False, + sharey=False, + rot=90, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + **kwargs, +): + """ + Grouped histogram + + Parameters + ---------- + data : Series/DataFrame + column : object, optional + by : object, optional + ax : axes, optional + bins : int, default 50 + figsize : tuple, optional + layout : optional + sharex : bool, default False + sharey : bool, default False + rot : int, default 90 + grid : bool, default True + kwargs : dict, keyword arguments passed to matplotlib.Axes.hist + + Returns + ------- + collection of Matplotlib Axes + """ + + def plot_group(group, ax): + ax.hist(group.dropna().values, bins=bins, **kwargs) + + if xrot is None: + xrot = rot + + fig, axes = _grouped_plot( + plot_group, + data, + column=column, + by=by, + sharex=sharex, + sharey=sharey, + ax=ax, + figsize=figsize, + layout=layout, + rot=rot, + ) + + _set_ticks_props( + axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot + ) + + fig.subplots_adjust( + bottom=0.15, top=0.9, left=0.1, right=0.9, hspace=0.5, wspace=0.3 + ) + return axes + + +def hist_series( + self, + by=None, + ax=None, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + figsize=None, + bins=10, + **kwds, +): + import matplotlib.pyplot as plt + + if by is None: + if kwds.get("layout", None) is not None: + raise ValueError("The 'layout' keyword is not supported when 'by' is None") + # hack until the plotting interface is a bit more unified + fig = kwds.pop( + "figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize) + ) + if figsize is not None and tuple(figsize) != tuple(fig.get_size_inches()): + fig.set_size_inches(*figsize, forward=True) + if ax is None: + ax = fig.gca() + elif ax.get_figure() != fig: + raise AssertionError("passed axis not bound to passed figure") + values = self.dropna().values + + ax.hist(values, bins=bins, **kwds) + ax.grid(grid) + axes = np.array([ax]) + + _set_ticks_props( + axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot + ) + + else: + if "figure" in kwds: + raise ValueError( + "Cannot pass 'figure' when using the " + "'by' argument, since a new 'Figure' instance " + "will be created" + ) + axes = _grouped_hist( + self, + by=by, + ax=ax, + grid=grid, + figsize=figsize, + bins=bins, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + **kwds, + ) + + if hasattr(axes, "ndim"): + if axes.ndim == 1 and len(axes) == 1: + return axes[0] + return axes + + +def hist_frame( + data, + column=None, + by=None, + grid=True, + xlabelsize=None, + xrot=None, + ylabelsize=None, + yrot=None, + ax=None, + sharex=False, + sharey=False, + figsize=None, + layout=None, + bins=10, + **kwds, +): + if by is not None: + axes = _grouped_hist( + data, + column=column, + by=by, + ax=ax, + grid=grid, + figsize=figsize, + sharex=sharex, + sharey=sharey, + layout=layout, + bins=bins, + xlabelsize=xlabelsize, + xrot=xrot, + ylabelsize=ylabelsize, + yrot=yrot, + **kwds, + ) + return axes + + if column is not None: + if not isinstance(column, (list, np.ndarray, ABCIndexClass)): + column = [column] + data = data[column] + data = data._get_numeric_data() + naxes = len(data.columns) + + if naxes == 0: + raise ValueError("hist method requires numerical columns, nothing to plot.") + + fig, axes = _subplots( + naxes=naxes, + ax=ax, + squeeze=False, + sharex=sharex, + sharey=sharey, + figsize=figsize, + layout=layout, + ) + _axes = _flatten(axes) + + for i, col in enumerate(com.try_sort(data.columns)): + ax = _axes[i] + ax.hist(data[col].dropna().values, bins=bins, **kwds) + ax.set_title(col) + ax.grid(grid) + + _set_ticks_props( + axes, xlabelsize=xlabelsize, xrot=xrot, ylabelsize=ylabelsize, yrot=yrot + ) + fig.subplots_adjust(wspace=0.3, hspace=0.3) + + return axes diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py new file mode 100644 index 00000000..0720f544 --- /dev/null +++ b/pandas/plotting/_matplotlib/misc.py @@ -0,0 +1,431 @@ +import random + +import matplotlib.lines as mlines +import matplotlib.patches as patches +import numpy as np + +from pandas.core.dtypes.missing import notna + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.style import _get_standard_colors +from pandas.plotting._matplotlib.tools import _set_ticks_props, _subplots + + +def scatter_matrix( + frame, + alpha=0.5, + figsize=None, + ax=None, + grid=False, + diagonal="hist", + marker=".", + density_kwds=None, + hist_kwds=None, + range_padding=0.05, + **kwds, +): + df = frame._get_numeric_data() + n = df.columns.size + naxes = n * n + fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False) + + # no gaps between subplots + fig.subplots_adjust(wspace=0, hspace=0) + + mask = notna(df) + + marker = _get_marker_compat(marker) + + hist_kwds = hist_kwds or {} + density_kwds = density_kwds or {} + + # GH 14855 + kwds.setdefault("edgecolors", "none") + + boundaries_list = [] + for a in df.columns: + values = df[a].values[mask[a].values] + rmin_, rmax_ = np.min(values), np.max(values) + rdelta_ext = (rmax_ - rmin_) * range_padding / 2.0 + boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) + + for i, a in enumerate(df.columns): + for j, b in enumerate(df.columns): + ax = axes[i, j] + + if i == j: + values = df[a].values[mask[a].values] + + # Deal with the diagonal by drawing a histogram there. + if diagonal == "hist": + ax.hist(values, **hist_kwds) + + elif diagonal in ("kde", "density"): + from scipy.stats import gaussian_kde + + y = values + gkde = gaussian_kde(y) + ind = np.linspace(y.min(), y.max(), 1000) + ax.plot(ind, gkde.evaluate(ind), **density_kwds) + + ax.set_xlim(boundaries_list[i]) + + else: + common = (mask[a] & mask[b]).values + + ax.scatter( + df[b][common], df[a][common], marker=marker, alpha=alpha, **kwds + ) + + ax.set_xlim(boundaries_list[j]) + ax.set_ylim(boundaries_list[i]) + + ax.set_xlabel(b) + ax.set_ylabel(a) + + if j != 0: + ax.yaxis.set_visible(False) + if i != n - 1: + ax.xaxis.set_visible(False) + + if len(df.columns) > 1: + lim1 = boundaries_list[0] + locs = axes[0][1].yaxis.get_majorticklocs() + locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] + adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) + + lim0 = axes[0][0].get_ylim() + adj = adj * (lim0[1] - lim0[0]) + lim0[0] + axes[0][0].yaxis.set_ticks(adj) + + if np.all(locs == locs.astype(int)): + # if all ticks are int + locs = locs.astype(int) + axes[0][0].yaxis.set_ticklabels(locs) + + _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + + return axes + + +def _get_marker_compat(marker): + if marker not in mlines.lineMarkers: + return "o" + return marker + + +def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): + import matplotlib.pyplot as plt + + def normalize(series): + a = min(series) + b = max(series) + return (series - a) / (b - a) + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + df = frame.drop(class_column, axis=1).apply(normalize) + + if ax is None: + ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) + + to_plot = {} + colors = _get_standard_colors( + num_colors=len(classes), colormap=colormap, color_type="random", color=color + ) + + for kls in classes: + to_plot[kls] = [[], []] + + m = len(frame.columns) - 1 + s = np.array( + [ + (np.cos(t), np.sin(t)) + for t in [2.0 * np.pi * (i / float(m)) for i in range(m)] + ] + ) + + for i in range(n): + row = df.iloc[i].values + row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) + y = (s * row_).sum(axis=0) / row.sum() + kls = class_col.iat[i] + to_plot[kls][0].append(y[0]) + to_plot[kls][1].append(y[1]) + + for i, kls in enumerate(classes): + ax.scatter( + to_plot[kls][0], + to_plot[kls][1], + color=colors[i], + label=pprint_thing(kls), + **kwds, + ) + ax.legend() + + ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor="none")) + + for xy, name in zip(s, df.columns): + + ax.add_patch(patches.Circle(xy, radius=0.025, facecolor="gray")) + + if xy[0] < 0.0 and xy[1] < 0.0: + ax.text( + xy[0] - 0.025, xy[1] - 0.025, name, ha="right", va="top", size="small" + ) + elif xy[0] < 0.0 and xy[1] >= 0.0: + ax.text( + xy[0] - 0.025, + xy[1] + 0.025, + name, + ha="right", + va="bottom", + size="small", + ) + elif xy[0] >= 0.0 and xy[1] < 0.0: + ax.text( + xy[0] + 0.025, xy[1] - 0.025, name, ha="left", va="top", size="small" + ) + elif xy[0] >= 0.0 and xy[1] >= 0.0: + ax.text( + xy[0] + 0.025, xy[1] + 0.025, name, ha="left", va="bottom", size="small" + ) + + ax.axis("equal") + return ax + + +def andrews_curves( + frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwds +): + import matplotlib.pyplot as plt + + def function(amplitudes): + def f(t): + x1 = amplitudes[0] + result = x1 / np.sqrt(2.0) + + # Take the rest of the coefficients and resize them + # appropriately. Take a copy of amplitudes as otherwise numpy + # deletes the element from amplitudes itself. + coeffs = np.delete(np.copy(amplitudes), 0) + coeffs.resize(int((coeffs.size + 1) / 2), 2) + + # Generate the harmonics and arguments for the sin and cos + # functions. + harmonics = np.arange(0, coeffs.shape[0]) + 1 + trig_args = np.outer(harmonics, t) + + result += np.sum( + coeffs[:, 0, np.newaxis] * np.sin(trig_args) + + coeffs[:, 1, np.newaxis] * np.cos(trig_args), + axis=0, + ) + return result + + return f + + n = len(frame) + class_col = frame[class_column] + classes = frame[class_column].drop_duplicates() + df = frame.drop(class_column, axis=1) + t = np.linspace(-np.pi, np.pi, samples) + used_legends = set() + + color_values = _get_standard_colors( + num_colors=len(classes), colormap=colormap, color_type="random", color=color + ) + colors = dict(zip(classes, color_values)) + if ax is None: + ax = plt.gca(xlim=(-np.pi, np.pi)) + for i in range(n): + row = df.iloc[i].values + f = function(row) + y = f(t) + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(t, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(t, y, color=colors[kls], **kwds) + + ax.legend(loc="upper right") + ax.grid() + return ax + + +def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): + + import matplotlib.pyplot as plt + + # random.sample(ndarray, int) fails on python 3.3, sigh + data = list(series.values) + samplings = [random.sample(data, size) for _ in range(samples)] + + means = np.array([np.mean(sampling) for sampling in samplings]) + medians = np.array([np.median(sampling) for sampling in samplings]) + midranges = np.array( + [(min(sampling) + max(sampling)) * 0.5 for sampling in samplings] + ) + if fig is None: + fig = plt.figure() + x = list(range(samples)) + axes = [] + ax1 = fig.add_subplot(2, 3, 1) + ax1.set_xlabel("Sample") + axes.append(ax1) + ax1.plot(x, means, **kwds) + ax2 = fig.add_subplot(2, 3, 2) + ax2.set_xlabel("Sample") + axes.append(ax2) + ax2.plot(x, medians, **kwds) + ax3 = fig.add_subplot(2, 3, 3) + ax3.set_xlabel("Sample") + axes.append(ax3) + ax3.plot(x, midranges, **kwds) + ax4 = fig.add_subplot(2, 3, 4) + ax4.set_xlabel("Mean") + axes.append(ax4) + ax4.hist(means, **kwds) + ax5 = fig.add_subplot(2, 3, 5) + ax5.set_xlabel("Median") + axes.append(ax5) + ax5.hist(medians, **kwds) + ax6 = fig.add_subplot(2, 3, 6) + ax6.set_xlabel("Midrange") + axes.append(ax6) + ax6.hist(midranges, **kwds) + for axis in axes: + plt.setp(axis.get_xticklabels(), fontsize=8) + plt.setp(axis.get_yticklabels(), fontsize=8) + return fig + + +def parallel_coordinates( + frame, + class_column, + cols=None, + ax=None, + color=None, + use_columns=False, + xticks=None, + colormap=None, + axvlines=True, + axvlines_kwds=None, + sort_labels=False, + **kwds, +): + import matplotlib.pyplot as plt + + if axvlines_kwds is None: + axvlines_kwds = {"linewidth": 1, "color": "black"} + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + + if cols is None: + df = frame.drop(class_column, axis=1) + else: + df = frame[cols] + + used_legends = set() + + ncols = len(df.columns) + + # determine values to use for xticks + if use_columns is True: + if not np.all(np.isreal(list(df.columns))): + raise ValueError("Columns must be numeric to be used as xticks") + x = df.columns + elif xticks is not None: + if not np.all(np.isreal(xticks)): + raise ValueError("xticks specified must be numeric") + elif len(xticks) != ncols: + raise ValueError("Length of xticks must match number of columns") + x = xticks + else: + x = list(range(ncols)) + + if ax is None: + ax = plt.gca() + + color_values = _get_standard_colors( + num_colors=len(classes), colormap=colormap, color_type="random", color=color + ) + + if sort_labels: + classes = sorted(classes) + color_values = sorted(color_values) + colors = dict(zip(classes, color_values)) + + for i in range(n): + y = df.iloc[i].values + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(x, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(x, y, color=colors[kls], **kwds) + + if axvlines: + for i in x: + ax.axvline(i, **axvlines_kwds) + + ax.set_xticks(x) + ax.set_xticklabels(df.columns) + ax.set_xlim(x[0], x[-1]) + ax.legend(loc="upper right") + ax.grid() + return ax + + +def lag_plot(series, lag=1, ax=None, **kwds): + # workaround because `c='b'` is hardcoded in matplotlibs scatter method + import matplotlib.pyplot as plt + + kwds.setdefault("c", plt.rcParams["patch.facecolor"]) + + data = series.values + y1 = data[:-lag] + y2 = data[lag:] + if ax is None: + ax = plt.gca() + ax.set_xlabel("y(t)") + ax.set_ylabel(f"y(t + {lag})") + ax.scatter(y1, y2, **kwds) + return ax + + +def autocorrelation_plot(series, ax=None, **kwds): + import matplotlib.pyplot as plt + + n = len(series) + data = np.asarray(series) + if ax is None: + ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) + mean = np.mean(data) + c0 = np.sum((data - mean) ** 2) / float(n) + + def r(h): + return ((data[: n - h] - mean) * (data[h:] - mean)).sum() / float(n) / c0 + + x = np.arange(n) + 1 + y = [r(loc) for loc in x] + z95 = 1.959963984540054 + z99 = 2.5758293035489004 + ax.axhline(y=z99 / np.sqrt(n), linestyle="--", color="grey") + ax.axhline(y=z95 / np.sqrt(n), color="grey") + ax.axhline(y=0.0, color="black") + ax.axhline(y=-z95 / np.sqrt(n), color="grey") + ax.axhline(y=-z99 / np.sqrt(n), linestyle="--", color="grey") + ax.set_xlabel("Lag") + ax.set_ylabel("Autocorrelation") + ax.plot(x, y, **kwds) + if "label" in kwds: + ax.legend() + ax.grid() + return ax diff --git a/pandas/plotting/_matplotlib/style.py b/pandas/plotting/_matplotlib/style.py new file mode 100644 index 00000000..fd69265b --- /dev/null +++ b/pandas/plotting/_matplotlib/style.py @@ -0,0 +1,92 @@ +# being a bit too dynamic +import warnings + +import matplotlib.cm as cm +import matplotlib.colors +import numpy as np + +from pandas.core.dtypes.common import is_list_like + +import pandas.core.common as com + + +def _get_standard_colors( + num_colors=None, colormap=None, color_type="default", color=None +): + import matplotlib.pyplot as plt + + if color is None and colormap is not None: + if isinstance(colormap, str): + cmap = colormap + colormap = cm.get_cmap(colormap) + if colormap is None: + raise ValueError(f"Colormap {cmap} is not recognized") + colors = [colormap(num) for num in np.linspace(0, 1, num=num_colors)] + elif color is not None: + if colormap is not None: + warnings.warn( + "'color' and 'colormap' cannot be used simultaneously. Using 'color'" + ) + colors = list(color) if is_list_like(color) else color + else: + if color_type == "default": + # need to call list() on the result to copy so we don't + # modify the global rcParams below + try: + colors = [c["color"] for c in list(plt.rcParams["axes.prop_cycle"])] + except KeyError: + colors = list(plt.rcParams.get("axes.color_cycle", list("bgrcmyk"))) + if isinstance(colors, str): + colors = list(colors) + + colors = colors[0:num_colors] + elif color_type == "random": + + def random_color(column): + """ Returns a random color represented as a list of length 3""" + # GH17525 use common._random_state to avoid resetting the seed + rs = com.random_state(column) + return rs.rand(3).tolist() + + colors = [random_color(num) for num in range(num_colors)] + else: + raise ValueError("color_type must be either 'default' or 'random'") + + if isinstance(colors, str): + conv = matplotlib.colors.ColorConverter() + + def _maybe_valid_colors(colors): + try: + [conv.to_rgba(c) for c in colors] + return True + except ValueError: + return False + + # check whether the string can be convertible to single color + maybe_single_color = _maybe_valid_colors([colors]) + # check whether each character can be convertible to colors + maybe_color_cycle = _maybe_valid_colors(list(colors)) + if maybe_single_color and maybe_color_cycle and len(colors) > 1: + hex_color = [c["color"] for c in list(plt.rcParams["axes.prop_cycle"])] + colors = [hex_color[int(colors[1])]] + elif maybe_single_color: + colors = [colors] + else: + # ``colors`` is regarded as color cycle. + # mpl will raise error any of them is invalid + pass + + # Append more colors by cycling if there is not enough color. + # Extra colors will be ignored by matplotlib if there are more colors + # than needed and nothing needs to be done here. + if len(colors) < num_colors: + try: + multiple = num_colors // len(colors) - 1 + except ZeroDivisionError: + raise ValueError("Invalid color argument: ''") + mod = num_colors % len(colors) + + colors += multiple * colors + colors += colors[:mod] + + return colors diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py new file mode 100644 index 00000000..3abce690 --- /dev/null +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -0,0 +1,311 @@ +# TODO: Use the fact that axis can have units to simplify the process + +import functools + +import numpy as np + +from pandas._libs.tslibs.frequencies import ( + FreqGroup, + get_base_alias, + get_freq, + is_subperiod, + is_superperiod, +) +from pandas._libs.tslibs.period import Period + +from pandas.core.dtypes.generic import ( + ABCDatetimeIndex, + ABCPeriodIndex, + ABCTimedeltaIndex, +) + +from pandas.io.formats.printing import pprint_thing +from pandas.plotting._matplotlib.converter import ( + TimeSeries_DateFormatter, + TimeSeries_DateLocator, + TimeSeries_TimedeltaFormatter, +) +import pandas.tseries.frequencies as frequencies +from pandas.tseries.offsets import DateOffset + +# --------------------------------------------------------------------- +# Plotting functions and monkey patches + + +def _maybe_resample(series, ax, kwargs): + # resample against axes freq if necessary + freq, ax_freq = _get_freq(ax, series) + + if freq is None: # pragma: no cover + raise ValueError("Cannot use dynamic axis without frequency info") + + # Convert DatetimeIndex to PeriodIndex + if isinstance(series.index, ABCDatetimeIndex): + series = series.to_period(freq=freq) + + if ax_freq is not None and freq != ax_freq: + if is_superperiod(freq, ax_freq): # upsample input + series = series.copy() + series.index = series.index.asfreq(ax_freq, how="s") + freq = ax_freq + elif _is_sup(freq, ax_freq): # one is weekly + how = kwargs.pop("how", "last") + series = getattr(series.resample("D"), how)().dropna() + series = getattr(series.resample(ax_freq), how)().dropna() + freq = ax_freq + elif is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): + _upsample_others(ax, freq, kwargs) + else: # pragma: no cover + raise ValueError("Incompatible frequency conversion") + return freq, series + + +def _is_sub(f1, f2): + return (f1.startswith("W") and is_subperiod("D", f2)) or ( + f2.startswith("W") and is_subperiod(f1, "D") + ) + + +def _is_sup(f1, f2): + return (f1.startswith("W") and is_superperiod("D", f2)) or ( + f2.startswith("W") and is_superperiod(f1, "D") + ) + + +def _upsample_others(ax, freq, kwargs): + legend = ax.get_legend() + lines, labels = _replot_ax(ax, freq, kwargs) + _replot_ax(ax, freq, kwargs) + + other_ax = None + if hasattr(ax, "left_ax"): + other_ax = ax.left_ax + if hasattr(ax, "right_ax"): + other_ax = ax.right_ax + + if other_ax is not None: + rlines, rlabels = _replot_ax(other_ax, freq, kwargs) + lines.extend(rlines) + labels.extend(rlabels) + + if legend is not None and kwargs.get("legend", True) and len(lines) > 0: + title = legend.get_title().get_text() + if title == "None": + title = None + ax.legend(lines, labels, loc="best", title=title) + + +def _replot_ax(ax, freq, kwargs): + data = getattr(ax, "_plot_data", None) + + # clear current axes and data + ax._plot_data = [] + ax.clear() + + _decorate_axes(ax, freq, kwargs) + + lines = [] + labels = [] + if data is not None: + for series, plotf, kwds in data: + series = series.copy() + idx = series.index.asfreq(freq, how="S") + series.index = idx + ax._plot_data.append((series, plotf, kwds)) + + # for tsplot + if isinstance(plotf, str): + from pandas.plotting._matplotlib import PLOT_CLASSES + + plotf = PLOT_CLASSES[plotf]._plot + + lines.append(plotf(ax, series.index._mpl_repr(), series.values, **kwds)[0]) + labels.append(pprint_thing(series.name)) + + return lines, labels + + +def _decorate_axes(ax, freq, kwargs): + """Initialize axes for time-series plotting""" + if not hasattr(ax, "_plot_data"): + ax._plot_data = [] + + ax.freq = freq + xaxis = ax.get_xaxis() + xaxis.freq = freq + if not hasattr(ax, "legendlabels"): + ax.legendlabels = [kwargs.get("label", None)] + else: + ax.legendlabels.append(kwargs.get("label", None)) + ax.view_interval = None + ax.date_axis_info = None + + +def _get_ax_freq(ax): + """ + Get the freq attribute of the ax object if set. + Also checks shared axes (eg when using secondary yaxis, sharex=True + or twinx) + """ + ax_freq = getattr(ax, "freq", None) + if ax_freq is None: + # check for left/right ax in case of secondary yaxis + if hasattr(ax, "left_ax"): + ax_freq = getattr(ax.left_ax, "freq", None) + elif hasattr(ax, "right_ax"): + ax_freq = getattr(ax.right_ax, "freq", None) + if ax_freq is None: + # check if a shared ax (sharex/twinx) has already freq set + shared_axes = ax.get_shared_x_axes().get_siblings(ax) + if len(shared_axes) > 1: + for shared_ax in shared_axes: + ax_freq = getattr(shared_ax, "freq", None) + if ax_freq is not None: + break + return ax_freq + + +def _get_freq(ax, series): + # get frequency from data + freq = getattr(series.index, "freq", None) + if freq is None: + freq = getattr(series.index, "inferred_freq", None) + + ax_freq = _get_ax_freq(ax) + + # use axes freq if no data freq + if freq is None: + freq = ax_freq + + # get the period frequency + if isinstance(freq, DateOffset): + freq = freq.rule_code + else: + freq = get_base_alias(freq) + + freq = frequencies.get_period_alias(freq) + return freq, ax_freq + + +def _use_dynamic_x(ax, data): + freq = _get_index_freq(data) + ax_freq = _get_ax_freq(ax) + + if freq is None: # convert irregular if axes has freq info + freq = ax_freq + else: # do not use tsplot if irregular was plotted first + if (ax_freq is None) and (len(ax.get_lines()) > 0): + return False + + if freq is None: + return False + + if isinstance(freq, DateOffset): + freq = freq.rule_code + else: + freq = get_base_alias(freq) + freq = frequencies.get_period_alias(freq) + + if freq is None: + return False + + # hack this for 0.10.1, creating more technical debt...sigh + if isinstance(data.index, ABCDatetimeIndex): + base = get_freq(freq) + x = data.index + if base <= FreqGroup.FR_DAY: + return x[:1].is_normalized + return Period(x[0], freq).to_timestamp(tz=x.tz) == x[0] + return True + + +def _get_index_freq(data): + freq = getattr(data.index, "freq", None) + if freq is None: + freq = getattr(data.index, "inferred_freq", None) + if freq == "B": + weekdays = np.unique(data.index.dayofweek) + if (5 in weekdays) or (6 in weekdays): + freq = None + return freq + + +def _maybe_convert_index(ax, data): + # tsplot converts automatically, but don't want to convert index + # over and over for DataFrames + if isinstance(data.index, (ABCDatetimeIndex, ABCPeriodIndex)): + freq = getattr(data.index, "freq", None) + + if freq is None: + freq = getattr(data.index, "inferred_freq", None) + if isinstance(freq, DateOffset): + freq = freq.rule_code + + if freq is None: + freq = _get_ax_freq(ax) + + if freq is None: + raise ValueError("Could not get frequency alias for plotting") + + freq = get_base_alias(freq) + freq = frequencies.get_period_alias(freq) + + if isinstance(data.index, ABCDatetimeIndex): + data = data.tz_localize(None).to_period(freq=freq) + elif isinstance(data.index, ABCPeriodIndex): + data.index = data.index.asfreq(freq=freq) + return data + + +# Patch methods for subplot. Only format_dateaxis is currently used. +# Do we need the rest for convenience? + + +def _format_coord(freq, t, y): + time_period = Period(ordinal=int(t), freq=freq) + return f"t = {time_period} y = {y:8f}" + + +def format_dateaxis(subplot, freq, index): + """ + Pretty-formats the date axis (x-axis). + + Major and minor ticks are automatically set for the frequency of the + current underlying series. As the dynamic mode is activated by + default, changing the limits of the x axis will intelligently change + the positions of the ticks. + """ + from matplotlib import pylab + + # handle index specific formatting + # Note: DatetimeIndex does not use this + # interface. DatetimeIndex uses matplotlib.date directly + if isinstance(index, ABCPeriodIndex): + + majlocator = TimeSeries_DateLocator( + freq, dynamic_mode=True, minor_locator=False, plot_obj=subplot + ) + minlocator = TimeSeries_DateLocator( + freq, dynamic_mode=True, minor_locator=True, plot_obj=subplot + ) + subplot.xaxis.set_major_locator(majlocator) + subplot.xaxis.set_minor_locator(minlocator) + + majformatter = TimeSeries_DateFormatter( + freq, dynamic_mode=True, minor_locator=False, plot_obj=subplot + ) + minformatter = TimeSeries_DateFormatter( + freq, dynamic_mode=True, minor_locator=True, plot_obj=subplot + ) + subplot.xaxis.set_major_formatter(majformatter) + subplot.xaxis.set_minor_formatter(minformatter) + + # x and y coord info + subplot.format_coord = functools.partial(_format_coord, freq) + + elif isinstance(index, ABCTimedeltaIndex): + subplot.xaxis.set_major_formatter(TimeSeries_TimedeltaFormatter()) + else: + raise TypeError("index type not supported") + + pylab.draw_if_interactive() diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py new file mode 100644 index 00000000..aa551689 --- /dev/null +++ b/pandas/plotting/_matplotlib/tools.py @@ -0,0 +1,378 @@ +# being a bit too dynamic +from math import ceil +import warnings + +import matplotlib.table +import matplotlib.ticker as ticker +import numpy as np + +from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries + +from pandas.plotting._matplotlib import compat + + +def format_date_labels(ax, rot): + # mini version of autofmt_xdate + for label in ax.get_xticklabels(): + label.set_ha("right") + label.set_rotation(rot) + fig = ax.get_figure() + fig.subplots_adjust(bottom=0.2) + + +def table(ax, data, rowLabels=None, colLabels=None, **kwargs): + if isinstance(data, ABCSeries): + data = data.to_frame() + elif isinstance(data, ABCDataFrame): + pass + else: + raise ValueError("Input data must be DataFrame or Series") + + if rowLabels is None: + rowLabels = data.index + + if colLabels is None: + colLabels = data.columns + + cellText = data.values + + table = matplotlib.table.table( + ax, cellText=cellText, rowLabels=rowLabels, colLabels=colLabels, **kwargs + ) + return table + + +def _get_layout(nplots, layout=None, layout_type="box"): + if layout is not None: + if not isinstance(layout, (tuple, list)) or len(layout) != 2: + raise ValueError("Layout must be a tuple of (rows, columns)") + + nrows, ncols = layout + + # Python 2 compat + ceil_ = lambda x: int(ceil(x)) + if nrows == -1 and ncols > 0: + layout = nrows, ncols = (ceil_(float(nplots) / ncols), ncols) + elif ncols == -1 and nrows > 0: + layout = nrows, ncols = (nrows, ceil_(float(nplots) / nrows)) + elif ncols <= 0 and nrows <= 0: + msg = "At least one dimension of layout must be positive" + raise ValueError(msg) + + if nrows * ncols < nplots: + raise ValueError( + f"Layout of {nrows}x{ncols} must be larger than required size {nplots}" + ) + + return layout + + if layout_type == "single": + return (1, 1) + elif layout_type == "horizontal": + return (1, nplots) + elif layout_type == "vertical": + return (nplots, 1) + + layouts = {1: (1, 1), 2: (1, 2), 3: (2, 2), 4: (2, 2)} + try: + return layouts[nplots] + except KeyError: + k = 1 + while k ** 2 < nplots: + k += 1 + + if (k - 1) * k >= nplots: + return k, (k - 1) + else: + return k, k + + +# copied from matplotlib/pyplot.py and modified for pandas.plotting + + +def _subplots( + naxes=None, + sharex=False, + sharey=False, + squeeze=True, + subplot_kw=None, + ax=None, + layout=None, + layout_type="box", + **fig_kw, +): + """Create a figure with a set of subplots already made. + + This utility wrapper makes it convenient to create common layouts of + subplots, including the enclosing figure object, in a single call. + + Keyword arguments: + + naxes : int + Number of required axes. Exceeded axes are set invisible. Default is + nrows * ncols. + + sharex : bool + If True, the X axis will be shared amongst all subplots. + + sharey : bool + If True, the Y axis will be shared amongst all subplots. + + squeeze : bool + + If True, extra dimensions are squeezed out from the returned axis object: + - if only one subplot is constructed (nrows=ncols=1), the resulting + single Axis object is returned as a scalar. + - for Nx1 or 1xN subplots, the returned object is a 1-d numpy object + array of Axis objects are returned as numpy 1-d arrays. + - for NxM subplots with N>1 and M>1 are returned as a 2d array. + + If False, no squeezing is done: the returned axis object is always + a 2-d array containing Axis instances, even if it ends up being 1x1. + + subplot_kw : dict + Dict with keywords passed to the add_subplot() call used to create each + subplots. + + ax : Matplotlib axis object, optional + + layout : tuple + Number of rows and columns of the subplot grid. + If not specified, calculated from naxes and layout_type + + layout_type : {'box', 'horizontal', 'vertical'}, default 'box' + Specify how to layout the subplot grid. + + fig_kw : Other keyword arguments to be passed to the figure() call. + Note that all keywords not recognized above will be + automatically included here. + + Returns: + + fig, ax : tuple + - fig is the Matplotlib Figure object + - ax can be either a single axis object or an array of axis objects if + more than one subplot was created. The dimensions of the resulting array + can be controlled with the squeeze keyword, see above. + + **Examples:** + + x = np.linspace(0, 2*np.pi, 400) + y = np.sin(x**2) + + # Just a figure and one subplot + f, ax = plt.subplots() + ax.plot(x, y) + ax.set_title('Simple plot') + + # Two subplots, unpack the output array immediately + f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) + ax1.plot(x, y) + ax1.set_title('Sharing Y axis') + ax2.scatter(x, y) + + # Four polar axes + plt.subplots(2, 2, subplot_kw=dict(polar=True)) + """ + import matplotlib.pyplot as plt + + if subplot_kw is None: + subplot_kw = {} + + if ax is None: + fig = plt.figure(**fig_kw) + else: + if is_list_like(ax): + ax = _flatten(ax) + if layout is not None: + warnings.warn( + "When passing multiple axes, layout keyword is ignored", UserWarning + ) + if sharex or sharey: + warnings.warn( + "When passing multiple axes, sharex and sharey " + "are ignored. These settings must be specified " + "when creating axes", + UserWarning, + stacklevel=4, + ) + if len(ax) == naxes: + fig = ax[0].get_figure() + return fig, ax + else: + raise ValueError( + f"The number of passed axes must be {naxes}, the " + "same as the output plot" + ) + + fig = ax.get_figure() + # if ax is passed and a number of subplots is 1, return ax as it is + if naxes == 1: + if squeeze: + return fig, ax + else: + return fig, _flatten(ax) + else: + warnings.warn( + "To output multiple subplots, the figure containing " + "the passed axes is being cleared", + UserWarning, + stacklevel=4, + ) + fig.clear() + + nrows, ncols = _get_layout(naxes, layout=layout, layout_type=layout_type) + nplots = nrows * ncols + + # Create empty object array to hold all axes. It's easiest to make it 1-d + # so we can just append subplots upon creation, and then + axarr = np.empty(nplots, dtype=object) + + # Create first subplot separately, so we can share it if requested + ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw) + + if sharex: + subplot_kw["sharex"] = ax0 + if sharey: + subplot_kw["sharey"] = ax0 + axarr[0] = ax0 + + # Note off-by-one counting because add_subplot uses the MATLAB 1-based + # convention. + for i in range(1, nplots): + kwds = subplot_kw.copy() + # Set sharex and sharey to None for blank/dummy axes, these can + # interfere with proper axis limits on the visible axes if + # they share axes e.g. issue #7528 + if i >= naxes: + kwds["sharex"] = None + kwds["sharey"] = None + ax = fig.add_subplot(nrows, ncols, i + 1, **kwds) + axarr[i] = ax + + if naxes != nplots: + for ax in axarr[naxes:]: + ax.set_visible(False) + + _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) + + if squeeze: + # Reshape the array to have the final desired dimension (nrow,ncol), + # though discarding unneeded dimensions that equal 1. If we only have + # one subplot, just return it instead of a 1-element array. + if nplots == 1: + axes = axarr[0] + else: + axes = axarr.reshape(nrows, ncols).squeeze() + else: + # returned axis array will be always 2-d, even if nrows=ncols=1 + axes = axarr.reshape(nrows, ncols) + + return fig, axes + + +def _remove_labels_from_axis(axis): + for t in axis.get_majorticklabels(): + t.set_visible(False) + + # set_visible will not be effective if + # minor axis has NullLocator and NullFormattor (default) + if isinstance(axis.get_minor_locator(), ticker.NullLocator): + axis.set_minor_locator(ticker.AutoLocator()) + if isinstance(axis.get_minor_formatter(), ticker.NullFormatter): + axis.set_minor_formatter(ticker.FormatStrFormatter("")) + for t in axis.get_minorticklabels(): + t.set_visible(False) + + axis.get_label().set_visible(False) + + +def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): + if nplots > 1: + if compat._mpl_ge_3_2_0(): + row_num = lambda x: x.get_subplotspec().rowspan.start + col_num = lambda x: x.get_subplotspec().colspan.start + else: + row_num = lambda x: x.rowNum + col_num = lambda x: x.colNum + + if nrows > 1: + try: + # first find out the ax layout, + # so that we can correctly handle 'gaps" + layout = np.zeros((nrows + 1, ncols + 1), dtype=np.bool) + for ax in axarr: + layout[row_num(ax), col_num(ax)] = ax.get_visible() + + for ax in axarr: + # only the last row of subplots should get x labels -> all + # other off layout handles the case that the subplot is + # the last in the column, because below is no subplot/gap. + if not layout[row_num(ax) + 1, col_num(ax)]: + continue + if sharex or len(ax.get_shared_x_axes().get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.xaxis) + + except IndexError: + # if gridspec is used, ax.rowNum and ax.colNum may different + # from layout shape. in this case, use last_row logic + for ax in axarr: + if ax.is_last_row(): + continue + if sharex or len(ax.get_shared_x_axes().get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.xaxis) + + if ncols > 1: + for ax in axarr: + # only the first column should get y labels -> set all other to + # off as we only have labels in the first column and we always + # have a subplot there, we can skip the layout test + if ax.is_first_col(): + continue + if sharey or len(ax.get_shared_y_axes().get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.yaxis) + + +def _flatten(axes): + if not is_list_like(axes): + return np.array([axes]) + elif isinstance(axes, (np.ndarray, ABCIndexClass)): + return axes.ravel() + return np.array(axes) + + +def _set_ticks_props(axes, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None): + import matplotlib.pyplot as plt + + for ax in _flatten(axes): + if xlabelsize is not None: + plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) + if xrot is not None: + plt.setp(ax.get_xticklabels(), rotation=xrot) + if ylabelsize is not None: + plt.setp(ax.get_yticklabels(), fontsize=ylabelsize) + if yrot is not None: + plt.setp(ax.get_yticklabels(), rotation=yrot) + return axes + + +def _get_all_lines(ax): + lines = ax.get_lines() + + if hasattr(ax, "right_ax"): + lines += ax.right_ax.get_lines() + + if hasattr(ax, "left_ax"): + lines += ax.left_ax.get_lines() + + return lines + + +def _get_xlim(lines): + left, right = np.inf, -np.inf + for l in lines: + x = l.get_xdata(orig=False) + left = min(np.nanmin(x), left) + right = max(np.nanmax(x), right) + return left, right diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py new file mode 100644 index 00000000..ccd42d39 --- /dev/null +++ b/pandas/plotting/_misc.py @@ -0,0 +1,487 @@ +from contextlib import contextmanager + +from pandas.plotting._core import _get_plot_backend + + +def table(ax, data, rowLabels=None, colLabels=None, **kwargs): + """ + Helper function to convert DataFrame and Series to matplotlib.table. + + Parameters + ---------- + ax : Matplotlib axes object + data : DataFrame or Series + Data for table contents. + **kwargs + Keyword arguments to be passed to matplotlib.table.table. + If `rowLabels` or `colLabels` is not specified, data index or column + name will be used. + + Returns + ------- + matplotlib table object + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.table( + ax=ax, data=data, rowLabels=None, colLabels=None, **kwargs + ) + + +def register(): + """ + Register Pandas Formatters and Converters with matplotlib. + + This function modifies the global ``matplotlib.units.registry`` + dictionary. Pandas adds custom converters for + + * pd.Timestamp + * pd.Period + * np.datetime64 + * datetime.datetime + * datetime.date + * datetime.time + + See Also + -------- + deregister_matplotlib_converters + """ + plot_backend = _get_plot_backend("matplotlib") + plot_backend.register() + + +def deregister(): + """ + Remove pandas' formatters and converters. + + Removes the custom converters added by :func:`register`. This + attempts to set the state of the registry back to the state before + pandas registered its own units. Converters for pandas' own types like + Timestamp and Period are removed completely. Converters for types + pandas overwrites, like ``datetime.datetime``, are restored to their + original value. + + See Also + -------- + register_matplotlib_converters + """ + plot_backend = _get_plot_backend("matplotlib") + plot_backend.deregister() + + +def scatter_matrix( + frame, + alpha=0.5, + figsize=None, + ax=None, + grid=False, + diagonal="hist", + marker=".", + density_kwds=None, + hist_kwds=None, + range_padding=0.05, + **kwargs, +): + """ + Draw a matrix of scatter plots. + + Parameters + ---------- + frame : DataFrame + alpha : float, optional + Amount of transparency applied. + figsize : (float,float), optional + A tuple (width, height) in inches. + ax : Matplotlib axis object, optional + grid : bool, optional + Setting this to True will show the grid. + diagonal : {'hist', 'kde'} + Pick between 'kde' and 'hist' for either Kernel Density Estimation or + Histogram plot in the diagonal. + marker : str, optional + Matplotlib marker type, default '.'. + density_kwds : keywords + Keyword arguments to be passed to kernel density estimate plot. + hist_kwds : keywords + Keyword arguments to be passed to hist function. + range_padding : float, default 0.05 + Relative extension of axis range in x and y with respect to + (x_max - x_min) or (y_max - y_min). + **kwargs + Keyword arguments to be passed to scatter function. + + Returns + ------- + numpy.ndarray + A matrix of scatter plots. + + Examples + -------- + >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) + >>> scatter_matrix(df, alpha=0.2) + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.scatter_matrix( + frame=frame, + alpha=alpha, + figsize=figsize, + ax=ax, + grid=grid, + diagonal=diagonal, + marker=marker, + density_kwds=density_kwds, + hist_kwds=hist_kwds, + range_padding=range_padding, + **kwargs, + ) + + +def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): + """ + Plot a multidimensional dataset in 2D. + + Each Series in the DataFrame is represented as a evenly distributed + slice on a circle. Each data point is rendered in the circle according to + the value on each Series. Highly correlated `Series` in the `DataFrame` + are placed closer on the unit circle. + + RadViz allow to project a N-dimensional data set into a 2D space where the + influence of each dimension can be interpreted as a balance between the + influence of all dimensions. + + More info available at the `original article + `_ + describing RadViz. + + Parameters + ---------- + frame : `DataFrame` + Pandas object holding the data. + class_column : str + Column name containing the name of the data point category. + ax : :class:`matplotlib.axes.Axes`, optional + A plot instance to which to add the information. + color : list[str] or tuple[str], optional + Assign a color to each category. Example: ['blue', 'green']. + colormap : str or :class:`matplotlib.colors.Colormap`, default None + Colormap to select colors from. If string, load colormap with that + name from matplotlib. + **kwds + Options to pass to matplotlib scatter plotting method. + + Returns + ------- + class:`matplotlib.axes.Axes` + + See Also + -------- + plotting.andrews_curves : Plot clustering visualization. + + Examples + -------- + .. plot:: + :context: close-figs + + >>> df = pd.DataFrame({ + ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, + ... 6.7, 4.6], + ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, + ... 3.3, 3.6], + ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, + ... 5.7, 1.0], + ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, + ... 2.1, 0.2], + ... 'Category': ['virginica', 'virginica', 'setosa', + ... 'virginica', 'virginica', 'versicolor', + ... 'versicolor', 'setosa', 'virginica', + ... 'setosa'] + ... }) + >>> rad_viz = pd.plotting.radviz(df, 'Category') # doctest: +SKIP + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.radviz( + frame=frame, + class_column=class_column, + ax=ax, + color=color, + colormap=colormap, + **kwds, + ) + + +def andrews_curves( + frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwargs +): + """ + Generate a matplotlib plot of Andrews curves, for visualising clusters of + multivariate data. + + Andrews curves have the functional form: + + f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + + x_4 sin(2t) + x_5 cos(2t) + ... + + Where x coefficients correspond to the values of each dimension and t is + linearly spaced between -pi and +pi. Each row of frame then corresponds to + a single curve. + + Parameters + ---------- + frame : DataFrame + Data to be plotted, preferably normalized to (0.0, 1.0). + class_column : Name of the column containing class names + ax : matplotlib axes object, default None + samples : Number of points to plot in each curve + color : list or tuple, optional + Colors to use for the different classes. + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + class:`matplotlip.axis.Axes` + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.andrews_curves( + frame=frame, + class_column=class_column, + ax=ax, + samples=samples, + color=color, + colormap=colormap, + **kwargs, + ) + + +def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): + """ + Bootstrap plot on mean, median and mid-range statistics. + + The bootstrap plot is used to estimate the uncertainty of a statistic + by relaying on random sampling with replacement [1]_. This function will + generate bootstrapping plots for mean, median and mid-range statistics + for the given number of samples of the given size. + + .. [1] "Bootstrapping (statistics)" in \ + https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29 + + Parameters + ---------- + series : pandas.Series + Pandas Series from where to get the samplings for the bootstrapping. + fig : matplotlib.figure.Figure, default None + If given, it will use the `fig` reference for plotting instead of + creating a new one with default parameters. + size : int, default 50 + Number of data points to consider during each sampling. It must be + greater or equal than the length of the `series`. + samples : int, default 500 + Number of times the bootstrap procedure is performed. + **kwds + Options to pass to matplotlib plotting method. + + Returns + ------- + matplotlib.figure.Figure + Matplotlib figure. + + See Also + -------- + DataFrame.plot : Basic plotting for DataFrame objects. + Series.plot : Basic plotting for Series objects. + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> s = pd.Series(np.random.uniform(size=100)) + >>> fig = pd.plotting.bootstrap_plot(s) # doctest: +SKIP + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.bootstrap_plot( + series=series, fig=fig, size=size, samples=samples, **kwds + ) + + +def parallel_coordinates( + frame, + class_column, + cols=None, + ax=None, + color=None, + use_columns=False, + xticks=None, + colormap=None, + axvlines=True, + axvlines_kwds=None, + sort_labels=False, + **kwargs, +): + """ + Parallel coordinates plotting. + + Parameters + ---------- + frame : DataFrame + class_column : str + Column name containing class names. + cols : list, optional + A list of column names to use. + ax : matplotlib.axis, optional + Matplotlib axis object. + color : list or tuple, optional + Colors to use for the different classes. + use_columns : bool, optional + If true, columns will be used as xticks. + xticks : list or tuple, optional + A list of values to use for xticks. + colormap : str or matplotlib colormap, default None + Colormap to use for line colors. + axvlines : bool, optional + If true, vertical lines will be added at each xtick. + axvlines_kwds : keywords, optional + Options to be passed to axvline method for vertical lines. + sort_labels : bool, default False + Sort class_column labels, useful when assigning colors. + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + class:`matplotlib.axis.Axes` + + Examples + -------- + >>> from matplotlib import pyplot as plt + >>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master' + '/pandas/tests/data/csv/iris.csv') + >>> pd.plotting.parallel_coordinates( + df, 'Name', + color=('#556270', '#4ECDC4', '#C7F464')) + >>> plt.show() + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.parallel_coordinates( + frame=frame, + class_column=class_column, + cols=cols, + ax=ax, + color=color, + use_columns=use_columns, + xticks=xticks, + colormap=colormap, + axvlines=axvlines, + axvlines_kwds=axvlines_kwds, + sort_labels=sort_labels, + **kwargs, + ) + + +def lag_plot(series, lag=1, ax=None, **kwds): + """ + Lag plot for time series. + + Parameters + ---------- + series : Time series + lag : lag of the scatter plot, default 1 + ax : Matplotlib axis object, optional + **kwds + Matplotlib scatter method keyword arguments. + + Returns + ------- + class:`matplotlib.axis.Axes` + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds) + + +def autocorrelation_plot(series, ax=None, **kwargs): + """ + Autocorrelation plot for time series. + + Parameters + ---------- + series : Time series + ax : Matplotlib axis object, optional + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + class:`matplotlib.axis.Axes` + """ + plot_backend = _get_plot_backend("matplotlib") + return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs) + + +class _Options(dict): + """ + Stores pandas plotting options. + + Allows for parameter aliasing so you can just use parameter names that are + the same as the plot function parameters, but is stored in a canonical + format that makes it easy to breakdown into groups later. + """ + + # alias so the names are same as plotting method parameter names + _ALIASES = {"x_compat": "xaxis.compat"} + _DEFAULT_KEYS = ["xaxis.compat"] + + def __init__(self, deprecated=False): + self._deprecated = deprecated + super().__setitem__("xaxis.compat", False) + + def __getitem__(self, key): + key = self._get_canonical_key(key) + if key not in self: + raise ValueError(f"{key} is not a valid pandas plotting option") + return super().__getitem__(key) + + def __setitem__(self, key, value): + key = self._get_canonical_key(key) + return super().__setitem__(key, value) + + def __delitem__(self, key): + key = self._get_canonical_key(key) + if key in self._DEFAULT_KEYS: + raise ValueError(f"Cannot remove default parameter {key}") + return super().__delitem__(key) + + def __contains__(self, key) -> bool: + key = self._get_canonical_key(key) + return super().__contains__(key) + + def reset(self): + """ + Reset the option store to its initial state + + Returns + ------- + None + """ + self.__init__() + + def _get_canonical_key(self, key): + return self._ALIASES.get(key, key) + + @contextmanager + def use(self, key, value): + """ + Temporarily set a parameter value using the with statement. + Aliasing allowed. + """ + old_value = self[key] + try: + self[key] = value + yield self + finally: + self[key] = old_value + + +plot_params = _Options() diff --git a/pandas/testing.py b/pandas/testing.py new file mode 100644 index 00000000..0445fa5b --- /dev/null +++ b/pandas/testing.py @@ -0,0 +1,17 @@ +""" +Public testing utility functions. +""" + +from pandas._testing import ( + assert_extension_array_equal, + assert_frame_equal, + assert_index_equal, + assert_series_equal, +) + +__all__ = [ + "assert_extension_array_equal", + "assert_frame_equal", + "assert_series_equal", + "assert_index_equal", +] diff --git a/pandas/tests/__init__.py b/pandas/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/api/__init__.py b/pandas/tests/api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py new file mode 100644 index 00000000..5aab5b81 --- /dev/null +++ b/pandas/tests/api/test_api.py @@ -0,0 +1,332 @@ +import subprocess +import sys +from typing import List + +import pytest + +import pandas as pd +from pandas import api, compat +import pandas._testing as tm + + +class Base: + def check(self, namespace, expected, ignored=None): + # see which names are in the namespace, minus optional + # ignored ones + # compare vs the expected + + result = sorted(f for f in dir(namespace) if not f.startswith("__")) + if ignored is not None: + result = sorted(set(result) - set(ignored)) + + expected = sorted(expected) + tm.assert_almost_equal(result, expected) + + +class TestPDApi(Base): + # these are optionally imported based on testing + # & need to be ignored + ignored = ["tests", "locale", "conftest"] + + # top-level sub-packages + lib = [ + "api", + "arrays", + "compat", + "core", + "errors", + "pandas", + "plotting", + "test", + "testing", + "tseries", + "util", + "options", + "io", + ] + + # these are already deprecated; awaiting removal + deprecated_modules: List[str] = ["np", "datetime"] + + # misc + misc = ["IndexSlice", "NaT", "NA"] + + # top-level classes + classes = [ + "Categorical", + "CategoricalIndex", + "DataFrame", + "DateOffset", + "DatetimeIndex", + "ExcelFile", + "ExcelWriter", + "Float64Index", + "Grouper", + "HDFStore", + "Index", + "Int64Index", + "MultiIndex", + "Period", + "PeriodIndex", + "RangeIndex", + "UInt64Index", + "Series", + "SparseDtype", + "StringDtype", + "Timedelta", + "TimedeltaIndex", + "Timestamp", + "Interval", + "IntervalIndex", + "CategoricalDtype", + "PeriodDtype", + "IntervalDtype", + "DatetimeTZDtype", + "BooleanDtype", + "Int8Dtype", + "Int16Dtype", + "Int32Dtype", + "Int64Dtype", + "UInt8Dtype", + "UInt16Dtype", + "UInt32Dtype", + "UInt64Dtype", + "NamedAgg", + ] + + # these are already deprecated; awaiting removal + deprecated_classes: List[str] = [] + + # these should be deprecated in the future + deprecated_classes_in_future: List[str] = ["SparseArray"] + + if not compat.PY37: + classes.extend(["Panel", "SparseSeries", "SparseDataFrame"]) + # deprecated_modules.extend(["np", "datetime"]) + # deprecated_classes_in_future.extend(["SparseArray"]) + + # external modules exposed in pandas namespace + modules: List[str] = [] + + # top-level functions + funcs = [ + "array", + "bdate_range", + "concat", + "crosstab", + "cut", + "date_range", + "interval_range", + "eval", + "factorize", + "get_dummies", + "infer_freq", + "isna", + "isnull", + "lreshape", + "melt", + "notna", + "notnull", + "offsets", + "merge", + "merge_ordered", + "merge_asof", + "period_range", + "pivot", + "pivot_table", + "qcut", + "show_versions", + "timedelta_range", + "unique", + "value_counts", + "wide_to_long", + ] + + # top-level option funcs + funcs_option = [ + "reset_option", + "describe_option", + "get_option", + "option_context", + "set_option", + "set_eng_float_format", + ] + + # top-level read_* funcs + funcs_read = [ + "read_clipboard", + "read_csv", + "read_excel", + "read_fwf", + "read_gbq", + "read_hdf", + "read_html", + "read_json", + "read_pickle", + "read_sas", + "read_sql", + "read_sql_query", + "read_sql_table", + "read_stata", + "read_table", + "read_feather", + "read_parquet", + "read_orc", + "read_spss", + ] + + # top-level json funcs + funcs_json = ["json_normalize"] + + # top-level to_* funcs + funcs_to = ["to_datetime", "to_numeric", "to_pickle", "to_timedelta"] + + # top-level to deprecate in the future + deprecated_funcs_in_future: List[str] = [] + + # these are already deprecated; awaiting removal + deprecated_funcs: List[str] = [] + + # private modules in pandas namespace + private_modules = [ + "_config", + "_hashtable", + "_lib", + "_libs", + "_np_version_under1p14", + "_np_version_under1p15", + "_np_version_under1p16", + "_np_version_under1p17", + "_np_version_under1p18", + "_is_numpy_dev", + "_testing", + "_tslib", + "_typing", + "_version", + ] + + def test_api(self): + + checkthese = ( + self.lib + + self.misc + + self.modules + + self.classes + + self.funcs + + self.funcs_option + + self.funcs_read + + self.funcs_json + + self.funcs_to + + self.private_modules + ) + if not compat.PY37: + checkthese.extend( + self.deprecated_modules + + self.deprecated_classes + + self.deprecated_classes_in_future + + self.deprecated_funcs_in_future + + self.deprecated_funcs + ) + self.check(pd, checkthese, self.ignored) + + def test_depr(self): + deprecated_list = ( + self.deprecated_modules + + self.deprecated_classes + + self.deprecated_classes_in_future + + self.deprecated_funcs + + self.deprecated_funcs_in_future + ) + for depr in deprecated_list: + with tm.assert_produces_warning(FutureWarning): + deprecated = getattr(pd, depr) + if not compat.PY37: + if depr == "datetime": + deprecated.__getattr__(dir(pd.datetime.datetime)[-1]) + elif depr == "SparseArray": + deprecated([]) + else: + deprecated.__getattr__(dir(deprecated)[-1]) + + +def test_datetime(): + from datetime import datetime + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + assert datetime(2015, 1, 2, 0, 0) == pd.datetime(2015, 1, 2, 0, 0) + + assert isinstance(pd.datetime(2015, 1, 2, 0, 0), pd.datetime) + + +def test_sparsearray(): + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + assert isinstance(pd.array([1, 2, 3], dtype="Sparse"), pd.SparseArray) + + +def test_np(): + import numpy as np + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + assert (pd.np.arange(0, 10) == np.arange(0, 10)).all() + + +class TestApi(Base): + allowed = ["types", "extensions", "indexers"] + + def test_api(self): + self.check(api, self.allowed) + + +class TestTesting(Base): + funcs = [ + "assert_frame_equal", + "assert_series_equal", + "assert_index_equal", + "assert_extension_array_equal", + ] + + def test_testing(self): + from pandas import testing + + self.check(testing, self.funcs) + + def test_util_testing_deprecated(self): + # avoid cache state affecting the test + sys.modules.pop("pandas.util.testing", None) + + with tm.assert_produces_warning(FutureWarning) as m: + import pandas.util.testing # noqa: F401 + + assert "pandas.util.testing is deprecated" in str(m[0].message) + assert "pandas.testing instead" in str(m[0].message) + + def test_util_testing_deprecated_direct(self): + # avoid cache state affecting the test + sys.modules.pop("pandas.util.testing", None) + with tm.assert_produces_warning(FutureWarning) as m: + from pandas.util.testing import assert_series_equal # noqa: F401 + + assert "pandas.util.testing is deprecated" in str(m[0].message) + assert "pandas.testing instead" in str(m[0].message) + + def test_util_in_top_level(self): + # in a subprocess to avoid import caching issues + out = subprocess.check_output( + [ + sys.executable, + "-c", + "import pandas; pandas.util.testing.assert_series_equal", + ], + stderr=subprocess.STDOUT, + ).decode() + assert "pandas.util.testing is deprecated" in out + + with pytest.raises(AttributeError, match="foo"): + pd.util.foo diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py new file mode 100644 index 00000000..31423c03 --- /dev/null +++ b/pandas/tests/api/test_types.py @@ -0,0 +1,64 @@ +import pandas._testing as tm +from pandas.api import types + +from .test_api import Base + + +class TestTypes(Base): + + allowed = [ + "is_bool", + "is_bool_dtype", + "is_categorical", + "is_categorical_dtype", + "is_complex", + "is_complex_dtype", + "is_datetime64_any_dtype", + "is_datetime64_dtype", + "is_datetime64_ns_dtype", + "is_datetime64tz_dtype", + "is_dtype_equal", + "is_float", + "is_float_dtype", + "is_int64_dtype", + "is_integer", + "is_integer_dtype", + "is_number", + "is_numeric_dtype", + "is_object_dtype", + "is_scalar", + "is_sparse", + "is_string_dtype", + "is_signed_integer_dtype", + "is_timedelta64_dtype", + "is_timedelta64_ns_dtype", + "is_unsigned_integer_dtype", + "is_period_dtype", + "is_interval", + "is_interval_dtype", + "is_re", + "is_re_compilable", + "is_dict_like", + "is_iterator", + "is_file_like", + "is_list_like", + "is_hashable", + "is_array_like", + "is_named_tuple", + "pandas_dtype", + "union_categoricals", + "infer_dtype", + "is_extension_array_dtype", + ] + deprecated = ["is_extension_type"] + dtypes = ["CategoricalDtype", "DatetimeTZDtype", "PeriodDtype", "IntervalDtype"] + + def test_types(self): + + self.check(types, self.allowed + self.dtypes + self.deprecated) + + def test_deprecated_from_api_types(self): + + for t in self.deprecated: + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + getattr(types, t)(1) diff --git a/pandas/tests/arithmetic/__init__.py b/pandas/tests/arithmetic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py new file mode 100644 index 00000000..83d19b8a --- /dev/null +++ b/pandas/tests/arithmetic/common.py @@ -0,0 +1,89 @@ +""" +Assertion helpers for arithmetic tests. +""" +import numpy as np +import pytest + +from pandas import DataFrame, Index, Series +import pandas._testing as tm + + +def assert_invalid_addsub_type(left, right, msg=None): + """ + Helper to assert that left and right can be neither added nor subtracted. + + Parameters + --------- + left : object + right : object + msg : str or None, default None + """ + with pytest.raises(TypeError, match=msg): + left + right + with pytest.raises(TypeError, match=msg): + right + left + with pytest.raises(TypeError, match=msg): + left - right + with pytest.raises(TypeError, match=msg): + right - left + + +def get_upcast_box(box, vector): + """ + Given two box-types, find the one that takes priority + """ + if box is DataFrame or isinstance(vector, DataFrame): + return DataFrame + if box is Series or isinstance(vector, Series): + return Series + if box is Index or isinstance(vector, Index): + return Index + return box + + +def assert_invalid_comparison(left, right, box): + """ + Assert that comparison operations with mismatched types behave correctly. + + Parameters + ---------- + left : np.ndarray, ExtensionArray, Index, or Series + right : object + box : {pd.DataFrame, pd.Series, pd.Index, tm.to_array} + """ + # Not for tznaive-tzaware comparison + + # Note: not quite the same as how we do this for tm.box_expected + xbox = box if box is not Index else np.array + + result = left == right + expected = xbox(np.zeros(result.shape, dtype=np.bool_)) + + tm.assert_equal(result, expected) + + result = right == left + tm.assert_equal(result, expected) + + result = left != right + tm.assert_equal(result, ~expected) + + result = right != left + tm.assert_equal(result, ~expected) + + msg = "Invalid comparison between|Cannot compare type|not supported between" + with pytest.raises(TypeError, match=msg): + left < right + with pytest.raises(TypeError, match=msg): + left <= right + with pytest.raises(TypeError, match=msg): + left > right + with pytest.raises(TypeError, match=msg): + left >= right + with pytest.raises(TypeError, match=msg): + right < left + with pytest.raises(TypeError, match=msg): + right <= left + with pytest.raises(TypeError, match=msg): + right > left + with pytest.raises(TypeError, match=msg): + right >= left diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py new file mode 100644 index 00000000..577093c0 --- /dev/null +++ b/pandas/tests/arithmetic/conftest.py @@ -0,0 +1,248 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +# ------------------------------------------------------------------ +# Helper Functions + + +def id_func(x): + if isinstance(x, tuple): + assert len(x) == 2 + return x[0].__name__ + "-" + str(x[1]) + else: + return x.__name__ + + +# ------------------------------------------------------------------ + + +@pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) +def one(request): + """ + Several variants of integer value 1. The zero-dim integer array + behaves like an integer. + + This fixture can be used to check that datetimelike indexes handle + addition and subtraction of integers and zero-dimensional arrays + of integers. + + Examples + -------- + >>> dti = pd.date_range('2016-01-01', periods=2, freq='H') + >>> dti + DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00'], + dtype='datetime64[ns]', freq='H') + >>> dti + one + DatetimeIndex(['2016-01-01 01:00:00', '2016-01-01 02:00:00'], + dtype='datetime64[ns]', freq='H') + """ + return request.param + + +zeros = [ + box_cls([0] * 5, dtype=dtype) + for box_cls in [pd.Index, np.array] + for dtype in [np.int64, np.uint64, np.float64] +] +zeros.extend( + [box_cls([-0.0] * 5, dtype=np.float64) for box_cls in [pd.Index, np.array]] +) +zeros.extend([np.array(0, dtype=dtype) for dtype in [np.int64, np.uint64, np.float64]]) +zeros.extend([np.array(-0.0, dtype=np.float64)]) +zeros.extend([0, 0.0, -0.0]) + + +@pytest.fixture(params=zeros) +def zero(request): + """ + Several types of scalar zeros and length 5 vectors of zeros. + + This fixture can be used to check that numeric-dtype indexes handle + division by any zero numeric-dtype. + + Uses vector of length 5 for broadcasting with `numeric_idx` fixture, + which creates numeric-dtype vectors also of length 5. + + Examples + -------- + >>> arr = pd.RangeIndex(5) + >>> arr / zeros + Float64Index([nan, inf, inf, inf, inf], dtype='float64') + """ + return request.param + + +# ------------------------------------------------------------------ +# Vector Fixtures + + +@pytest.fixture( + params=[ + pd.Float64Index(np.arange(5, dtype="float64")), + pd.Int64Index(np.arange(5, dtype="int64")), + pd.UInt64Index(np.arange(5, dtype="uint64")), + pd.RangeIndex(5), + ], + ids=lambda x: type(x).__name__, +) +def numeric_idx(request): + """ + Several types of numeric-dtypes Index objects + """ + return request.param + + +# ------------------------------------------------------------------ +# Scalar Fixtures + + +@pytest.fixture( + params=[ + pd.Timedelta("5m4s").to_pytimedelta(), + pd.Timedelta("5m4s"), + pd.Timedelta("5m4s").to_timedelta64(), + ], + ids=lambda x: type(x).__name__, +) +def scalar_td(request): + """ + Several variants of Timedelta scalars representing 5 minutes and 4 seconds + """ + return request.param + + +@pytest.fixture( + params=[ + pd.offsets.Day(3), + pd.offsets.Hour(72), + pd.Timedelta(days=3).to_pytimedelta(), + pd.Timedelta("72:00:00"), + np.timedelta64(3, "D"), + np.timedelta64(72, "h"), + ], + ids=lambda x: type(x).__name__, +) +def three_days(request): + """ + Several timedelta-like and DateOffset objects that each represent + a 3-day timedelta + """ + return request.param + + +@pytest.fixture( + params=[ + pd.offsets.Hour(2), + pd.offsets.Minute(120), + pd.Timedelta(hours=2).to_pytimedelta(), + pd.Timedelta(seconds=2 * 3600), + np.timedelta64(2, "h"), + np.timedelta64(120, "m"), + ], + ids=lambda x: type(x).__name__, +) +def two_hours(request): + """ + Several timedelta-like and DateOffset objects that each represent + a 2-hour timedelta + """ + return request.param + + +_common_mismatch = [ + pd.offsets.YearBegin(2), + pd.offsets.MonthBegin(1), + pd.offsets.Minute(), +] + + +@pytest.fixture( + params=[ + pd.Timedelta(minutes=30).to_pytimedelta(), + np.timedelta64(30, "s"), + pd.Timedelta(seconds=30), + ] + + _common_mismatch +) +def not_hourly(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Hourly frequencies. + """ + return request.param + + +@pytest.fixture( + params=[ + np.timedelta64(4, "h"), + pd.Timedelta(hours=23).to_pytimedelta(), + pd.Timedelta("23:00:00"), + ] + + _common_mismatch +) +def not_daily(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Daily frequencies. + """ + return request.param + + +@pytest.fixture( + params=[ + np.timedelta64(365, "D"), + pd.Timedelta(days=365).to_pytimedelta(), + pd.Timedelta(days=365), + ] + + _common_mismatch +) +def mismatched_freq(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Monthly or Annual frequencies. + """ + return request.param + + +# ------------------------------------------------------------------ + + +@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame], ids=id_func) +def box(request): + """ + Several array-like containers that should have effectively identical + behavior with respect to arithmetic operations. + """ + return request.param + + +@pytest.fixture( + params=[ + pd.Index, + pd.Series, + pytest.param(pd.DataFrame, marks=pytest.mark.xfail), + tm.to_array, + ], + ids=id_func, +) +def box_df_fail(request): + """ + Fixture equivalent to `box` fixture but xfailing the DataFrame case. + """ + return request.param + + +@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame, tm.to_array], ids=id_func) +def box_with_array(request): + """ + Fixture to test behavior for Index, Series, DataFrame, and pandas Array + classes + """ + return request.param + + +# alias so we can use the same fixture for multiple parameters in a test +box_with_array2 = box_with_array diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py new file mode 100644 index 00000000..d3f9ac4f --- /dev/null +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -0,0 +1,2397 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +# Specifically for datetime64 and datetime64tz dtypes +from datetime import datetime, timedelta +from itertools import product, starmap +import operator +import warnings + +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs.conversion import localize_pydatetime +from pandas._libs.tslibs.offsets import shift_months +from pandas.compat.numpy import np_datetime64_compat +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import ( + DatetimeIndex, + NaT, + Period, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.ops import roperator +from pandas.tests.arithmetic.common import ( + assert_invalid_addsub_type, + assert_invalid_comparison, + get_upcast_box, +) + +# ------------------------------------------------------------------ +# Comparisons + + +class TestDatetime64ArrayLikeComparisons: + # Comparison tests for datetime64 vectors fully parametrized over + # DataFrame/Series/DatetimeIndex/DatetimeArray. Ideally all comparison + # tests will eventually end up here. + + def test_compare_zerodim(self, tz_naive_fixture, box_with_array): + # Test comparison with zero-dimensional array is unboxed + tz = tz_naive_fixture + box = box_with_array + xbox = box_with_array if box_with_array is not pd.Index else np.ndarray + dti = date_range("20130101", periods=3, tz=tz) + + other = np.array(dti.to_numpy()[0]) + + dtarr = tm.box_expected(dti, box) + result = dtarr <= other + expected = np.array([True, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + "foo", + -1, + 99, + 4.0, + object(), + timedelta(days=2), + # GH#19800, GH#19301 datetime.date comparison raises to + # match DatetimeIndex/Timestamp. This also matches the behavior + # of stdlib datetime.datetime + datetime(2001, 1, 1).date(), + # GH#19301 None and NaN are *not* cast to NaT for comparisons + None, + np.nan, + ], + ) + def test_dt64arr_cmp_scalar_invalid(self, other, tz_naive_fixture, box_with_array): + # GH#22074, GH#15966 + tz = tz_naive_fixture + + rng = date_range("1/1/2000", periods=10, tz=tz) + dtarr = tm.box_expected(rng, box_with_array) + assert_invalid_comparison(dtarr, other, box_with_array) + + @pytest.mark.parametrize( + "other", + [ + list(range(10)), + np.arange(10), + np.arange(10).astype(np.float32), + np.arange(10).astype(object), + pd.timedelta_range("1ns", periods=10).array, + np.array(pd.timedelta_range("1ns", periods=10)), + list(pd.timedelta_range("1ns", periods=10)), + pd.timedelta_range("1 Day", periods=10).astype(object), + pd.period_range("1971-01-01", freq="D", periods=10).array, + pd.period_range("1971-01-01", freq="D", periods=10).astype(object), + ], + ) + def test_dt64arr_cmp_arraylike_invalid(self, other, tz_naive_fixture): + # We don't parametrize this over box_with_array because listlike + # other plays poorly with assert_invalid_comparison reversed checks + tz = tz_naive_fixture + + dta = date_range("1970-01-01", freq="ns", periods=10, tz=tz)._data + assert_invalid_comparison(dta, other, tm.to_array) + + def test_dt64arr_cmp_mixed_invalid(self, tz_naive_fixture): + tz = tz_naive_fixture + + dta = date_range("1970-01-01", freq="h", periods=5, tz=tz)._data + + other = np.array([0, 1, 2, dta[3], pd.Timedelta(days=1)]) + result = dta == other + expected = np.array([False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dta != other + tm.assert_numpy_array_equal(result, ~expected) + + msg = "Invalid comparison between|Cannot compare type|not supported between" + with pytest.raises(TypeError, match=msg): + dta < other + with pytest.raises(TypeError, match=msg): + dta > other + with pytest.raises(TypeError, match=msg): + dta <= other + with pytest.raises(TypeError, match=msg): + dta >= other + + def test_dt64arr_nat_comparison(self, tz_naive_fixture, box_with_array): + # GH#22242, GH#22163 DataFrame considered NaT == ts incorrectly + tz = tz_naive_fixture + box = box_with_array + xbox = box if box is not pd.Index else np.ndarray + + ts = pd.Timestamp.now(tz) + ser = pd.Series([ts, pd.NaT]) + + # FIXME: Can't transpose because that loses the tz dtype on + # the NaT column + obj = tm.box_expected(ser, box, transpose=False) + + expected = pd.Series([True, False], dtype=np.bool_) + expected = tm.box_expected(expected, xbox, transpose=False) + + result = obj == ts + tm.assert_equal(result, expected) + + +class TestDatetime64SeriesComparison: + # TODO: moved from tests.series.test_operators; needs cleanup + + @pytest.mark.parametrize( + "pair", + [ + ( + [pd.Timestamp("2011-01-01"), NaT, pd.Timestamp("2011-01-03")], + [NaT, NaT, pd.Timestamp("2011-01-03")], + ), + ( + [pd.Timedelta("1 days"), NaT, pd.Timedelta("3 days")], + [NaT, NaT, pd.Timedelta("3 days")], + ), + ( + [pd.Period("2011-01", freq="M"), NaT, pd.Period("2011-03", freq="M")], + [NaT, NaT, pd.Period("2011-03", freq="M")], + ), + ], + ) + @pytest.mark.parametrize("reverse", [True, False]) + @pytest.mark.parametrize("dtype", [None, object]) + def test_nat_comparisons(self, dtype, index_or_series, reverse, pair): + box = index_or_series + l, r = pair + if reverse: + # add lhs / rhs switched data + l, r = r, l + + left = Series(l, dtype=dtype) + right = box(r, dtype=dtype) + # Series, Index + + expected = Series([False, False, True]) + tm.assert_series_equal(left == right, expected) + + expected = Series([True, True, False]) + tm.assert_series_equal(left != right, expected) + + expected = Series([False, False, False]) + tm.assert_series_equal(left < right, expected) + + expected = Series([False, False, False]) + tm.assert_series_equal(left > right, expected) + + expected = Series([False, False, True]) + tm.assert_series_equal(left >= right, expected) + + expected = Series([False, False, True]) + tm.assert_series_equal(left <= right, expected) + + def test_comparison_invalid(self, tz_naive_fixture, box_with_array): + # GH#4968 + # invalid date/int comparisons + tz = tz_naive_fixture + ser = Series(range(5)) + ser2 = Series(pd.date_range("20010101", periods=5, tz=tz)) + + ser = tm.box_expected(ser, box_with_array) + ser2 = tm.box_expected(ser2, box_with_array) + + assert_invalid_comparison(ser, ser2, box_with_array) + + @pytest.mark.parametrize( + "data", + [ + [Timestamp("2011-01-01"), NaT, Timestamp("2011-01-03")], + [Timedelta("1 days"), NaT, Timedelta("3 days")], + [Period("2011-01", freq="M"), NaT, Period("2011-03", freq="M")], + ], + ) + @pytest.mark.parametrize("dtype", [None, object]) + def test_nat_comparisons_scalar(self, dtype, data, box_with_array): + if box_with_array is tm.to_array and dtype is object: + # dont bother testing ndarray comparison methods as this fails + # on older numpys (since they check object identity) + return + + xbox = box_with_array if box_with_array is not pd.Index else np.ndarray + + left = Series(data, dtype=dtype) + left = tm.box_expected(left, box_with_array) + + expected = [False, False, False] + expected = tm.box_expected(expected, xbox) + tm.assert_equal(left == NaT, expected) + tm.assert_equal(NaT == left, expected) + + expected = [True, True, True] + expected = tm.box_expected(expected, xbox) + tm.assert_equal(left != NaT, expected) + tm.assert_equal(NaT != left, expected) + + expected = [False, False, False] + expected = tm.box_expected(expected, xbox) + tm.assert_equal(left < NaT, expected) + tm.assert_equal(NaT > left, expected) + tm.assert_equal(left <= NaT, expected) + tm.assert_equal(NaT >= left, expected) + + tm.assert_equal(left > NaT, expected) + tm.assert_equal(NaT < left, expected) + tm.assert_equal(left >= NaT, expected) + tm.assert_equal(NaT <= left, expected) + + @pytest.mark.parametrize("val", [datetime(2000, 1, 4), datetime(2000, 1, 5)]) + def test_series_comparison_scalars(self, val): + series = Series(date_range("1/1/2000", periods=10)) + + result = series > val + expected = Series([x > val for x in series]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "left,right", [("lt", "gt"), ("le", "ge"), ("eq", "eq"), ("ne", "ne")] + ) + def test_timestamp_compare_series(self, left, right): + # see gh-4982 + # Make sure we can compare Timestamps on the right AND left hand side. + ser = pd.Series(pd.date_range("20010101", periods=10), name="dates") + s_nat = ser.copy(deep=True) + + ser[0] = pd.Timestamp("nat") + ser[3] = pd.Timestamp("nat") + + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # No NaT + expected = left_f(ser, pd.Timestamp("20010109")) + result = right_f(pd.Timestamp("20010109"), ser) + tm.assert_series_equal(result, expected) + + # NaT + expected = left_f(ser, pd.Timestamp("nat")) + result = right_f(pd.Timestamp("nat"), ser) + tm.assert_series_equal(result, expected) + + # Compare to Timestamp with series containing NaT + expected = left_f(s_nat, pd.Timestamp("20010109")) + result = right_f(pd.Timestamp("20010109"), s_nat) + tm.assert_series_equal(result, expected) + + # Compare to NaT with series containing NaT + expected = left_f(s_nat, pd.Timestamp("nat")) + result = right_f(pd.Timestamp("nat"), s_nat) + tm.assert_series_equal(result, expected) + + def test_dt64arr_timestamp_equality(self, box_with_array): + # GH#11034 + xbox = box_with_array if box_with_array is not pd.Index else np.ndarray + + ser = pd.Series([pd.Timestamp("2000-01-29 01:59:00"), "NaT"]) + ser = tm.box_expected(ser, box_with_array) + + result = ser != ser + expected = tm.box_expected([False, True], xbox) + tm.assert_equal(result, expected) + + result = ser != ser[0] + expected = tm.box_expected([False, True], xbox) + tm.assert_equal(result, expected) + + result = ser != ser[1] + expected = tm.box_expected([True, True], xbox) + tm.assert_equal(result, expected) + + result = ser == ser + expected = tm.box_expected([True, False], xbox) + tm.assert_equal(result, expected) + + result = ser == ser[0] + expected = tm.box_expected([True, False], xbox) + tm.assert_equal(result, expected) + + result = ser == ser[1] + expected = tm.box_expected([False, False], xbox) + tm.assert_equal(result, expected) + + +class TestDatetimeIndexComparisons: + + # TODO: moved from tests.indexes.test_base; parametrize and de-duplicate + @pytest.mark.parametrize( + "op", + [operator.eq, operator.ne, operator.gt, operator.lt, operator.ge, operator.le], + ) + def test_comparators(self, op): + index = tm.makeDateIndex(100) + element = index[len(index) // 2] + element = Timestamp(element).to_datetime64() + + arr = np.array(index) + arr_result = op(arr, element) + index_result = op(index, element) + + assert isinstance(index_result, np.ndarray) + tm.assert_numpy_array_equal(arr_result, index_result) + + @pytest.mark.parametrize( + "other", + [datetime(2016, 1, 1), Timestamp("2016-01-01"), np.datetime64("2016-01-01")], + ) + def test_dti_cmp_datetimelike(self, other, tz_naive_fixture): + tz = tz_naive_fixture + dti = pd.date_range("2016-01-01", periods=2, tz=tz) + if tz is not None: + if isinstance(other, np.datetime64): + # no tzaware version available + return + other = localize_pydatetime(other, dti.tzinfo) + + result = dti == other + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dti > other + expected = np.array([False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = dti >= other + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = dti < other + expected = np.array([False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dti <= other + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, object]) + def test_dti_cmp_nat(self, dtype, box_with_array): + if box_with_array is tm.to_array and dtype is object: + # dont bother testing ndarray comparison methods as this fails + # on older numpys (since they check object identity) + return + + xbox = box_with_array if box_with_array is not pd.Index else np.ndarray + + left = pd.DatetimeIndex( + [pd.Timestamp("2011-01-01"), pd.NaT, pd.Timestamp("2011-01-03")] + ) + right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp("2011-01-03")]) + + left = tm.box_expected(left, box_with_array) + right = tm.box_expected(right, box_with_array) + + lhs, rhs = left, right + if dtype is object: + lhs, rhs = left.astype(object), right.astype(object) + + result = rhs == lhs + expected = np.array([False, False, True]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + result = lhs != rhs + expected = np.array([True, True, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + expected = np.array([False, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(lhs == pd.NaT, expected) + tm.assert_equal(pd.NaT == rhs, expected) + + expected = np.array([True, True, True]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(lhs != pd.NaT, expected) + tm.assert_equal(pd.NaT != lhs, expected) + + expected = np.array([False, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(lhs < pd.NaT, expected) + tm.assert_equal(pd.NaT > lhs, expected) + + def test_dti_cmp_nat_behaves_like_float_cmp_nan(self): + fidx1 = pd.Index([1.0, np.nan, 3.0, np.nan, 5.0, 7.0]) + fidx2 = pd.Index([2.0, 3.0, np.nan, np.nan, 6.0, 7.0]) + + didx1 = pd.DatetimeIndex( + ["2014-01-01", pd.NaT, "2014-03-01", pd.NaT, "2014-05-01", "2014-07-01"] + ) + didx2 = pd.DatetimeIndex( + ["2014-02-01", "2014-03-01", pd.NaT, pd.NaT, "2014-06-01", "2014-07-01"] + ) + darr = np.array( + [ + np_datetime64_compat("2014-02-01 00:00Z"), + np_datetime64_compat("2014-03-01 00:00Z"), + np_datetime64_compat("nat"), + np.datetime64("nat"), + np_datetime64_compat("2014-06-01 00:00Z"), + np_datetime64_compat("2014-07-01 00:00Z"), + ] + ) + + cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)] + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, np.nan), (didx1, pd.NaT)]: + result = idx1 < val + expected = np.array([False, False, False, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 > val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + tm.assert_numpy_array_equal(result, expected) + result = idx1 >= val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == val + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, True, True, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # Check pd.NaT is handles as the same as np.nan + with tm.assert_produces_warning(None): + for idx1, val in [(fidx1, 3), (didx1, datetime(2014, 3, 1))]: + result = idx1 < val + expected = np.array([True, False, False, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 > val + expected = np.array([False, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= val + expected = np.array([True, False, True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + result = idx1 >= val + expected = np.array([False, False, True, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == val + expected = np.array([False, False, True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != val + expected = np.array([True, True, False, True, True, True]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "op", + [operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le], + ) + def test_comparison_tzawareness_compat(self, op, box_df_fail): + # GH#18162 + box = box_df_fail + + dr = pd.date_range("2016-01-01", periods=6) + dz = dr.tz_localize("US/Pacific") + + dr = tm.box_expected(dr, box) + dz = tm.box_expected(dz, box) + + msg = "Cannot compare tz-naive and tz-aware" + with pytest.raises(TypeError, match=msg): + op(dr, dz) + + # FIXME: DataFrame case fails to raise for == and !=, wrong + # message for inequalities + with pytest.raises(TypeError, match=msg): + op(dr, list(dz)) + with pytest.raises(TypeError, match=msg): + op(dr, np.array(list(dz), dtype=object)) + with pytest.raises(TypeError, match=msg): + op(dz, dr) + + # FIXME: DataFrame case fails to raise for == and !=, wrong + # message for inequalities + with pytest.raises(TypeError, match=msg): + op(dz, list(dr)) + with pytest.raises(TypeError, match=msg): + op(dz, np.array(list(dr), dtype=object)) + + # The aware==aware and naive==naive comparisons should *not* raise + assert np.all(dr == dr) + assert np.all(dr == list(dr)) + assert np.all(list(dr) == dr) + assert np.all(np.array(list(dr), dtype=object) == dr) + assert np.all(dr == np.array(list(dr), dtype=object)) + + assert np.all(dz == dz) + assert np.all(dz == list(dz)) + assert np.all(list(dz) == dz) + assert np.all(np.array(list(dz), dtype=object) == dz) + assert np.all(dz == np.array(list(dz), dtype=object)) + + @pytest.mark.parametrize( + "op", + [operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le], + ) + def test_comparison_tzawareness_compat_scalars(self, op, box_with_array): + # GH#18162 + dr = pd.date_range("2016-01-01", periods=6) + dz = dr.tz_localize("US/Pacific") + + dr = tm.box_expected(dr, box_with_array) + dz = tm.box_expected(dz, box_with_array) + + # Check comparisons against scalar Timestamps + ts = pd.Timestamp("2000-03-14 01:59") + ts_tz = pd.Timestamp("2000-03-14 01:59", tz="Europe/Amsterdam") + + assert np.all(dr > ts) + msg = "Cannot compare tz-naive and tz-aware" + with pytest.raises(TypeError, match=msg): + op(dr, ts_tz) + + assert np.all(dz > ts_tz) + with pytest.raises(TypeError, match=msg): + op(dz, ts) + + # GH#12601: Check comparison against Timestamps and DatetimeIndex + with pytest.raises(TypeError, match=msg): + op(ts, dz) + + @pytest.mark.parametrize( + "op", + [operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le], + ) + @pytest.mark.parametrize( + "other", + [datetime(2016, 1, 1), Timestamp("2016-01-01"), np.datetime64("2016-01-01")], + ) + # Bug in NumPy? https://github.com/numpy/numpy/issues/13841 + # Raising in __eq__ will fallback to NumPy, which warns, fails, + # then re-raises the original exception. So we just need to ignore. + @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning") + @pytest.mark.filterwarnings("ignore:Converting timezone-aware:FutureWarning") + def test_scalar_comparison_tzawareness( + self, op, other, tz_aware_fixture, box_with_array + ): + tz = tz_aware_fixture + dti = pd.date_range("2016-01-01", periods=2, tz=tz) + + dtarr = tm.box_expected(dti, box_with_array) + msg = "Cannot compare tz-naive and tz-aware" + with pytest.raises(TypeError, match=msg): + op(dtarr, other) + with pytest.raises(TypeError, match=msg): + op(other, dtarr) + + @pytest.mark.parametrize( + "op", + [operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le], + ) + def test_nat_comparison_tzawareness(self, op): + # GH#19276 + # tzaware DatetimeIndex should not raise when compared to NaT + dti = pd.DatetimeIndex( + ["2014-01-01", pd.NaT, "2014-03-01", pd.NaT, "2014-05-01", "2014-07-01"] + ) + expected = np.array([op == operator.ne] * len(dti)) + result = op(dti, pd.NaT) + tm.assert_numpy_array_equal(result, expected) + + result = op(dti.tz_localize("US/Pacific"), pd.NaT) + tm.assert_numpy_array_equal(result, expected) + + def test_dti_cmp_str(self, tz_naive_fixture): + # GH#22074 + # regardless of tz, we expect these comparisons are valid + tz = tz_naive_fixture + rng = date_range("1/1/2000", periods=10, tz=tz) + other = "1/1/2000" + + result = rng == other + expected = np.array([True] + [False] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng != other + expected = np.array([False] + [True] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng < other + expected = np.array([False] * 10) + tm.assert_numpy_array_equal(result, expected) + + result = rng <= other + expected = np.array([True] + [False] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng > other + expected = np.array([False] + [True] * 9) + tm.assert_numpy_array_equal(result, expected) + + result = rng >= other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) + + def test_dti_cmp_list(self): + rng = date_range("1/1/2000", periods=10) + + result = rng == list(rng) + expected = rng == rng + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + pd.timedelta_range("1D", periods=10), + pd.timedelta_range("1D", periods=10).to_series(), + pd.timedelta_range("1D", periods=10).asi8.view("m8[ns]"), + ], + ids=lambda x: type(x).__name__, + ) + def test_dti_cmp_tdi_tzawareness(self, other): + # GH#22074 + # reversion test that we _don't_ call _assert_tzawareness_compat + # when comparing against TimedeltaIndex + dti = date_range("2000-01-01", periods=10, tz="Asia/Tokyo") + + result = dti == other + expected = np.array([False] * 10) + tm.assert_numpy_array_equal(result, expected) + + result = dti != other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) + msg = "Invalid comparison between" + with pytest.raises(TypeError, match=msg): + dti < other + with pytest.raises(TypeError, match=msg): + dti <= other + with pytest.raises(TypeError, match=msg): + dti > other + with pytest.raises(TypeError, match=msg): + dti >= other + + def test_dti_cmp_object_dtype(self): + # GH#22074 + dti = date_range("2000-01-01", periods=10, tz="Asia/Tokyo") + + other = dti.astype("O") + + result = dti == other + expected = np.array([True] * 10) + tm.assert_numpy_array_equal(result, expected) + + other = dti.tz_localize(None) + msg = "Cannot compare tz-naive and tz-aware" + with pytest.raises(TypeError, match=msg): + # tzawareness failure + dti != other + + other = np.array(list(dti[:5]) + [Timedelta(days=1)] * 5) + result = dti == other + expected = np.array([True] * 5 + [False] * 5) + tm.assert_numpy_array_equal(result, expected) + msg = "Cannot compare type" + with pytest.raises(TypeError, match=msg): + dti >= other + + +# ------------------------------------------------------------------ +# Arithmetic + + +class TestDatetime64Arithmetic: + # This class is intended for "finished" tests that are fully parametrized + # over DataFrame/Series/Index/DatetimeArray + + # ------------------------------------------------------------- + # Addition/Subtraction of timedelta-like + + def test_dt64arr_add_timedeltalike_scalar( + self, tz_naive_fixture, two_hours, box_with_array + ): + # GH#22005, GH#22163 check DataFrame doesn't raise TypeError + tz = tz_naive_fixture + + rng = pd.date_range("2000-01-01", "2000-02-01", tz=tz) + expected = pd.date_range("2000-01-01 02:00", "2000-02-01 02:00", tz=tz) + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = rng + two_hours + tm.assert_equal(result, expected) + + def test_dt64arr_iadd_timedeltalike_scalar( + self, tz_naive_fixture, two_hours, box_with_array + ): + tz = tz_naive_fixture + + rng = pd.date_range("2000-01-01", "2000-02-01", tz=tz) + expected = pd.date_range("2000-01-01 02:00", "2000-02-01 02:00", tz=tz) + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + rng += two_hours + tm.assert_equal(rng, expected) + + def test_dt64arr_sub_timedeltalike_scalar( + self, tz_naive_fixture, two_hours, box_with_array + ): + tz = tz_naive_fixture + + rng = pd.date_range("2000-01-01", "2000-02-01", tz=tz) + expected = pd.date_range("1999-12-31 22:00", "2000-01-31 22:00", tz=tz) + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = rng - two_hours + tm.assert_equal(result, expected) + + def test_dt64arr_isub_timedeltalike_scalar( + self, tz_naive_fixture, two_hours, box_with_array + ): + tz = tz_naive_fixture + + rng = pd.date_range("2000-01-01", "2000-02-01", tz=tz) + expected = pd.date_range("1999-12-31 22:00", "2000-01-31 22:00", tz=tz) + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + rng -= two_hours + tm.assert_equal(rng, expected) + + # TODO: redundant with test_dt64arr_add_timedeltalike_scalar + def test_dt64arr_add_td64_scalar(self, box_with_array): + # scalar timedeltas/np.timedelta64 objects + # operate with np.timedelta64 correctly + ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + + expected = Series( + [Timestamp("20130101 9:01:01"), Timestamp("20130101 9:02:01")] + ) + + dtarr = tm.box_expected(ser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = dtarr + np.timedelta64(1, "s") + tm.assert_equal(result, expected) + result = np.timedelta64(1, "s") + dtarr + tm.assert_equal(result, expected) + + expected = Series( + [Timestamp("20130101 9:01:00.005"), Timestamp("20130101 9:02:00.005")] + ) + expected = tm.box_expected(expected, box_with_array) + + result = dtarr + np.timedelta64(5, "ms") + tm.assert_equal(result, expected) + result = np.timedelta64(5, "ms") + dtarr + tm.assert_equal(result, expected) + + def test_dt64arr_add_sub_td64_nat(self, box_with_array, tz_naive_fixture): + # GH#23320 special handling for timedelta64("NaT") + tz = tz_naive_fixture + + dti = pd.date_range("1994-04-01", periods=9, tz=tz, freq="QS") + other = np.timedelta64("NaT") + expected = pd.DatetimeIndex(["NaT"] * 9, tz=tz) + + # FIXME: fails with transpose=True due to tz-aware DataFrame + # transpose bug + obj = tm.box_expected(dti, box_with_array, transpose=False) + expected = tm.box_expected(expected, box_with_array, transpose=False) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + msg = "cannot subtract" + with pytest.raises(TypeError, match=msg): + other - obj + + def test_dt64arr_add_sub_td64ndarray(self, tz_naive_fixture, box_with_array): + + tz = tz_naive_fixture + dti = pd.date_range("2016-01-01", periods=3, tz=tz) + tdi = pd.TimedeltaIndex(["-1 Day", "-1 Day", "-1 Day"]) + tdarr = tdi.values + + expected = pd.date_range("2015-12-31", periods=3, tz=tz) + + dtarr = tm.box_expected(dti, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = dtarr + tdarr + tm.assert_equal(result, expected) + result = tdarr + dtarr + tm.assert_equal(result, expected) + + expected = pd.date_range("2016-01-02", periods=3, tz=tz) + expected = tm.box_expected(expected, box_with_array) + + result = dtarr - tdarr + tm.assert_equal(result, expected) + msg = "cannot subtract|(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + tdarr - dtarr + + # ----------------------------------------------------------------- + # Subtraction of datetime-like scalars + + @pytest.mark.parametrize( + "ts", + [ + pd.Timestamp("2013-01-01"), + pd.Timestamp("2013-01-01").to_pydatetime(), + pd.Timestamp("2013-01-01").to_datetime64(), + ], + ) + def test_dt64arr_sub_dtscalar(self, box_with_array, ts): + # GH#8554, GH#22163 DataFrame op should _not_ return dt64 dtype + idx = pd.date_range("2013-01-01", periods=3) + idx = tm.box_expected(idx, box_with_array) + + expected = pd.TimedeltaIndex(["0 Days", "1 Day", "2 Days"]) + expected = tm.box_expected(expected, box_with_array) + + result = idx - ts + tm.assert_equal(result, expected) + + def test_dt64arr_sub_datetime64_not_ns(self, box_with_array): + # GH#7996, GH#22163 ensure non-nano datetime64 is converted to nano + # for DataFrame operation + dt64 = np.datetime64("2013-01-01") + assert dt64.dtype == "datetime64[D]" + + dti = pd.date_range("20130101", periods=3) + dtarr = tm.box_expected(dti, box_with_array) + + expected = pd.TimedeltaIndex(["0 Days", "1 Day", "2 Days"]) + expected = tm.box_expected(expected, box_with_array) + + result = dtarr - dt64 + tm.assert_equal(result, expected) + + result = dt64 - dtarr + tm.assert_equal(result, -expected) + + def test_dt64arr_sub_timestamp(self, box_with_array): + ser = pd.date_range("2014-03-17", periods=2, freq="D", tz="US/Eastern") + ts = ser[0] + + ser = tm.box_expected(ser, box_with_array) + + delta_series = pd.Series([np.timedelta64(0, "D"), np.timedelta64(1, "D")]) + expected = tm.box_expected(delta_series, box_with_array) + + tm.assert_equal(ser - ts, expected) + tm.assert_equal(ts - ser, -expected) + + def test_dt64arr_sub_NaT(self, box_with_array): + # GH#18808 + dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp("19900315")]) + ser = tm.box_expected(dti, box_with_array) + + result = ser - pd.NaT + expected = pd.Series([pd.NaT, pd.NaT], dtype="timedelta64[ns]") + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + dti_tz = dti.tz_localize("Asia/Tokyo") + ser_tz = tm.box_expected(dti_tz, box_with_array) + + result = ser_tz - pd.NaT + expected = pd.Series([pd.NaT, pd.NaT], dtype="timedelta64[ns]") + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + # ------------------------------------------------------------- + # Subtraction of datetime-like array-like + + def test_dt64arr_naive_sub_dt64ndarray(self, box_with_array): + dti = pd.date_range("2016-01-01", periods=3, tz=None) + dt64vals = dti.values + + dtarr = tm.box_expected(dti, box_with_array) + + expected = dtarr - dtarr + result = dtarr - dt64vals + tm.assert_equal(result, expected) + result = dt64vals - dtarr + tm.assert_equal(result, expected) + + def test_dt64arr_aware_sub_dt64ndarray_raises( + self, tz_aware_fixture, box_with_array + ): + + tz = tz_aware_fixture + dti = pd.date_range("2016-01-01", periods=3, tz=tz) + dt64vals = dti.values + + dtarr = tm.box_expected(dti, box_with_array) + msg = "subtraction must have the same timezones or" + with pytest.raises(TypeError, match=msg): + dtarr - dt64vals + with pytest.raises(TypeError, match=msg): + dt64vals - dtarr + + # ------------------------------------------------------------- + # Addition of datetime-like others (invalid) + + def test_dt64arr_add_dt64ndarray_raises(self, tz_naive_fixture, box_with_array): + + tz = tz_naive_fixture + dti = pd.date_range("2016-01-01", periods=3, tz=tz) + dt64vals = dti.values + + dtarr = tm.box_expected(dti, box_with_array) + msg = "cannot add" + with pytest.raises(TypeError, match=msg): + dtarr + dt64vals + with pytest.raises(TypeError, match=msg): + dt64vals + dtarr + + def test_dt64arr_add_timestamp_raises(self, box_with_array): + # GH#22163 ensure DataFrame doesn't cast Timestamp to i8 + idx = DatetimeIndex(["2011-01-01", "2011-01-02"]) + idx = tm.box_expected(idx, box_with_array) + msg = "cannot add" + with pytest.raises(TypeError, match=msg): + idx + Timestamp("2011-01-01") + with pytest.raises(TypeError, match=msg): + Timestamp("2011-01-01") + idx + + # ------------------------------------------------------------- + # Other Invalid Addition/Subtraction + + @pytest.mark.parametrize( + "other", + [ + 3.14, + np.array([2.0, 3.0]), + # GH#13078 datetime +/- Period is invalid + pd.Period("2011-01-01", freq="D"), + ], + ) + @pytest.mark.parametrize("dti_freq", [None, "D"]) + def test_dt64arr_add_sub_invalid(self, dti_freq, other, box_with_array): + dti = DatetimeIndex(["2011-01-01", "2011-01-02"], freq=dti_freq) + dtarr = tm.box_expected(dti, box_with_array) + msg = "|".join( + [ + "unsupported operand type", + "cannot (add|subtract)", + "cannot use operands with types", + "ufunc '?(add|subtract)'? cannot use operands with types", + ] + ) + assert_invalid_addsub_type(dtarr, other, msg) + + @pytest.mark.parametrize("pi_freq", ["D", "W", "Q", "H"]) + @pytest.mark.parametrize("dti_freq", [None, "D"]) + def test_dt64arr_add_sub_parr( + self, dti_freq, pi_freq, box_with_array, box_with_array2 + ): + # GH#20049 subtracting PeriodIndex should raise TypeError + dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], freq=dti_freq) + pi = dti.to_period(pi_freq) + + dtarr = tm.box_expected(dti, box_with_array) + parr = tm.box_expected(pi, box_with_array2) + msg = "|".join( + [ + "cannot (add|subtract)", + "unsupported operand", + "descriptor.*requires", + "ufunc.*cannot use operands", + ] + ) + assert_invalid_addsub_type(dtarr, parr, msg) + + +class TestDatetime64DateOffsetArithmetic: + + # ------------------------------------------------------------- + # Tick DateOffsets + + # TODO: parametrize over timezone? + def test_dt64arr_series_add_tick_DateOffset(self, box_with_array): + # GH#4532 + # operate with pd.offsets + ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + expected = Series( + [Timestamp("20130101 9:01:05"), Timestamp("20130101 9:02:05")] + ) + + ser = tm.box_expected(ser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = ser + pd.offsets.Second(5) + tm.assert_equal(result, expected) + + result2 = pd.offsets.Second(5) + ser + tm.assert_equal(result2, expected) + + def test_dt64arr_series_sub_tick_DateOffset(self, box_with_array): + # GH#4532 + # operate with pd.offsets + ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + expected = Series( + [Timestamp("20130101 9:00:55"), Timestamp("20130101 9:01:55")] + ) + + ser = tm.box_expected(ser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = ser - pd.offsets.Second(5) + tm.assert_equal(result, expected) + + result2 = -pd.offsets.Second(5) + ser + tm.assert_equal(result2, expected) + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + pd.offsets.Second(5) - ser + + @pytest.mark.parametrize( + "cls_name", ["Day", "Hour", "Minute", "Second", "Milli", "Micro", "Nano"] + ) + def test_dt64arr_add_sub_tick_DateOffset_smoke(self, cls_name, box_with_array): + # GH#4532 + # smoke tests for valid DateOffsets + ser = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + ser = tm.box_expected(ser, box_with_array) + + offset_cls = getattr(pd.offsets, cls_name) + ser + offset_cls(5) + offset_cls(5) + ser + ser - offset_cls(5) + + def test_dti_add_tick_tzaware(self, tz_aware_fixture, box_with_array): + # GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype + tz = tz_aware_fixture + if tz == "US/Pacific": + dates = date_range("2012-11-01", periods=3, tz=tz) + offset = dates + pd.offsets.Hour(5) + assert dates[0] + pd.offsets.Hour(5) == offset[0] + + dates = date_range("2010-11-01 00:00", periods=3, tz=tz, freq="H") + expected = DatetimeIndex( + ["2010-11-01 05:00", "2010-11-01 06:00", "2010-11-01 07:00"], + freq="H", + tz=tz, + ) + + dates = tm.box_expected(dates, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + # TODO: parametrize over the scalar being added? radd? sub? + offset = dates + pd.offsets.Hour(5) + tm.assert_equal(offset, expected) + offset = dates + np.timedelta64(5, "h") + tm.assert_equal(offset, expected) + offset = dates + timedelta(hours=5) + tm.assert_equal(offset, expected) + + # ------------------------------------------------------------- + # RelativeDelta DateOffsets + + def test_dt64arr_add_sub_relativedelta_offsets(self, box_with_array): + # GH#10699 + vec = DatetimeIndex( + [ + Timestamp("2000-01-05 00:15:00"), + Timestamp("2000-01-31 00:23:00"), + Timestamp("2000-01-01"), + Timestamp("2000-03-31"), + Timestamp("2000-02-29"), + Timestamp("2000-12-31"), + Timestamp("2000-05-15"), + Timestamp("2001-06-15"), + ] + ) + vec = tm.box_expected(vec, box_with_array) + vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec + + # DateOffset relativedelta fastpath + relative_kwargs = [ + ("years", 2), + ("months", 5), + ("days", 3), + ("hours", 5), + ("minutes", 10), + ("seconds", 2), + ("microseconds", 5), + ] + for i, kwd in enumerate(relative_kwargs): + off = pd.DateOffset(**dict([kwd])) + + expected = DatetimeIndex([x + off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + off) + + expected = DatetimeIndex([x - off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - off) + + off = pd.DateOffset(**dict(relative_kwargs[: i + 1])) + + expected = DatetimeIndex([x + off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + off) + + expected = DatetimeIndex([x - off for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - off) + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + off - vec + + # ------------------------------------------------------------- + # Non-Tick, Non-RelativeDelta DateOffsets + + # TODO: redundant with test_dt64arr_add_sub_DateOffset? that includes + # tz-aware cases which this does not + @pytest.mark.parametrize( + "cls_and_kwargs", + [ + "YearBegin", + ("YearBegin", {"month": 5}), + "YearEnd", + ("YearEnd", {"month": 5}), + "MonthBegin", + "MonthEnd", + "SemiMonthEnd", + "SemiMonthBegin", + "Week", + ("Week", {"weekday": 3}), + "Week", + ("Week", {"weekday": 6}), + "BusinessDay", + "BDay", + "QuarterEnd", + "QuarterBegin", + "CustomBusinessDay", + "CDay", + "CBMonthEnd", + "CBMonthBegin", + "BMonthBegin", + "BMonthEnd", + "BusinessHour", + "BYearBegin", + "BYearEnd", + "BQuarterBegin", + ("LastWeekOfMonth", {"weekday": 2}), + ( + "FY5253Quarter", + { + "qtr_with_extra_week": 1, + "startingMonth": 1, + "weekday": 2, + "variation": "nearest", + }, + ), + ("FY5253", {"weekday": 0, "startingMonth": 2, "variation": "nearest"}), + ("WeekOfMonth", {"weekday": 2, "week": 2}), + "Easter", + ("DateOffset", {"day": 4}), + ("DateOffset", {"month": 5}), + ], + ) + @pytest.mark.parametrize("normalize", [True, False]) + @pytest.mark.parametrize("n", [0, 5]) + def test_dt64arr_add_sub_DateOffsets( + self, box_with_array, n, normalize, cls_and_kwargs + ): + # GH#10699 + # assert vectorized operation matches pointwise operations + + if isinstance(cls_and_kwargs, tuple): + # If cls_name param is a tuple, then 2nd entry is kwargs for + # the offset constructor + cls_name, kwargs = cls_and_kwargs + else: + cls_name = cls_and_kwargs + kwargs = {} + + if n == 0 and cls_name in [ + "WeekOfMonth", + "LastWeekOfMonth", + "FY5253Quarter", + "FY5253", + ]: + # passing n = 0 is invalid for these offset classes + return + + vec = DatetimeIndex( + [ + Timestamp("2000-01-05 00:15:00"), + Timestamp("2000-01-31 00:23:00"), + Timestamp("2000-01-01"), + Timestamp("2000-03-31"), + Timestamp("2000-02-29"), + Timestamp("2000-12-31"), + Timestamp("2000-05-15"), + Timestamp("2001-06-15"), + ] + ) + vec = tm.box_expected(vec, box_with_array) + vec_items = vec.squeeze() if box_with_array is pd.DataFrame else vec + + offset_cls = getattr(pd.offsets, cls_name) + + with warnings.catch_warnings(record=True): + # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being + # applied to Series or DatetimeIndex + # we aren't testing that here, so ignore. + warnings.simplefilter("ignore", PerformanceWarning) + + offset = offset_cls(n, normalize=normalize, **kwargs) + + expected = DatetimeIndex([x + offset for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec + offset) + + expected = DatetimeIndex([x - offset for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, vec - offset) + + expected = DatetimeIndex([offset + x for x in vec_items]) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(expected, offset + vec) + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + offset - vec + + def test_dt64arr_add_sub_DateOffset(self, box_with_array): + # GH#10699 + s = date_range("2000-01-01", "2000-01-31", name="a") + s = tm.box_expected(s, box_with_array) + result = s + pd.DateOffset(years=1) + result2 = pd.DateOffset(years=1) + s + exp = date_range("2001-01-01", "2001-01-31", name="a") + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + result = s - pd.DateOffset(years=1) + exp = date_range("1999-01-01", "1999-01-31", name="a") + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + + s = DatetimeIndex( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + s = tm.box_expected(s, box_with_array) + result = s + pd.offsets.Day() + result2 = pd.offsets.Day() + s + exp = DatetimeIndex( + [ + Timestamp("2000-01-16 00:15:00", tz="US/Central"), + Timestamp("2000-02-16", tz="US/Central"), + ], + name="a", + ) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + s = DatetimeIndex( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + s = tm.box_expected(s, box_with_array) + result = s + pd.offsets.MonthEnd() + result2 = pd.offsets.MonthEnd() + s + exp = DatetimeIndex( + [ + Timestamp("2000-01-31 00:15:00", tz="US/Central"), + Timestamp("2000-02-29", tz="US/Central"), + ], + name="a", + ) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + # TODO: __sub__, __rsub__ + def test_dt64arr_add_mixed_offset_array(self, box_with_array): + # GH#10699 + # array of offsets + s = DatetimeIndex([Timestamp("2000-1-1"), Timestamp("2000-2-1")]) + s = tm.box_expected(s, box_with_array) + + warn = None if box_with_array is pd.DataFrame else PerformanceWarning + with tm.assert_produces_warning(warn): + other = pd.Index([pd.offsets.DateOffset(years=1), pd.offsets.MonthEnd()]) + other = tm.box_expected(other, box_with_array) + result = s + other + exp = DatetimeIndex([Timestamp("2001-1-1"), Timestamp("2000-2-29")]) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + + # same offset + other = pd.Index( + [pd.offsets.DateOffset(years=1), pd.offsets.DateOffset(years=1)] + ) + other = tm.box_expected(other, box_with_array) + result = s + other + exp = DatetimeIndex([Timestamp("2001-1-1"), Timestamp("2001-2-1")]) + exp = tm.box_expected(exp, box_with_array) + tm.assert_equal(result, exp) + + # TODO: overlap with test_dt64arr_add_mixed_offset_array? + def test_dt64arr_add_sub_offset_ndarray(self, tz_naive_fixture, box_with_array): + # GH#18849 + + tz = tz_naive_fixture + dti = pd.date_range("2017-01-01", periods=2, tz=tz) + dtarr = tm.box_expected(dti, box_with_array) + + other = np.array([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + + warn = None if box_with_array is pd.DataFrame else PerformanceWarning + with tm.assert_produces_warning(warn): + res = dtarr + other + expected = DatetimeIndex( + [dti[n] + other[n] for n in range(len(dti))], name=dti.name, freq="infer" + ) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(res, expected) + + with tm.assert_produces_warning(warn): + res2 = other + dtarr + tm.assert_equal(res2, expected) + + with tm.assert_produces_warning(warn): + res = dtarr - other + expected = DatetimeIndex( + [dti[n] - other[n] for n in range(len(dti))], name=dti.name, freq="infer" + ) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(res, expected) + + @pytest.mark.parametrize( + "op, offset, exp, exp_freq", + [ + ( + "__add__", + pd.DateOffset(months=3, days=10), + [ + Timestamp("2014-04-11"), + Timestamp("2015-04-11"), + Timestamp("2016-04-11"), + Timestamp("2017-04-11"), + ], + None, + ), + ( + "__add__", + pd.DateOffset(months=3), + [ + Timestamp("2014-04-01"), + Timestamp("2015-04-01"), + Timestamp("2016-04-01"), + Timestamp("2017-04-01"), + ], + "AS-APR", + ), + ( + "__sub__", + pd.DateOffset(months=3, days=10), + [ + Timestamp("2013-09-21"), + Timestamp("2014-09-21"), + Timestamp("2015-09-21"), + Timestamp("2016-09-21"), + ], + None, + ), + ( + "__sub__", + pd.DateOffset(months=3), + [ + Timestamp("2013-10-01"), + Timestamp("2014-10-01"), + Timestamp("2015-10-01"), + Timestamp("2016-10-01"), + ], + "AS-OCT", + ), + ], + ) + def test_dti_add_sub_nonzero_mth_offset( + self, op, offset, exp, exp_freq, tz_aware_fixture, box_with_array + ): + # GH 26258 + tz = tz_aware_fixture + date = date_range(start="01 Jan 2014", end="01 Jan 2017", freq="AS", tz=tz) + date = tm.box_expected(date, box_with_array, False) + mth = getattr(date, op) + result = mth(offset) + + expected = pd.DatetimeIndex(exp, tz=tz, freq=exp_freq) + expected = tm.box_expected(expected, box_with_array, False) + tm.assert_equal(result, expected) + + +class TestDatetime64OverflowHandling: + # TODO: box + de-duplicate + + def test_dt64_overflow_masking(self, box_with_array): + # GH#25317 + left = Series([Timestamp("1969-12-31")]) + right = Series([NaT]) + + left = tm.box_expected(left, box_with_array) + right = tm.box_expected(right, box_with_array) + + expected = TimedeltaIndex([NaT]) + expected = tm.box_expected(expected, box_with_array) + + result = left - right + tm.assert_equal(result, expected) + + def test_dt64_series_arith_overflow(self): + # GH#12534, fixed by GH#19024 + dt = pd.Timestamp("1700-01-31") + td = pd.Timedelta("20000 Days") + dti = pd.date_range("1949-09-30", freq="100Y", periods=4) + ser = pd.Series(dti) + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + ser - dt + with pytest.raises(OverflowError, match=msg): + dt - ser + with pytest.raises(OverflowError, match=msg): + ser + td + with pytest.raises(OverflowError, match=msg): + td + ser + + ser.iloc[-1] = pd.NaT + expected = pd.Series( + ["2004-10-03", "2104-10-04", "2204-10-04", "NaT"], dtype="datetime64[ns]" + ) + res = ser + td + tm.assert_series_equal(res, expected) + res = td + ser + tm.assert_series_equal(res, expected) + + ser.iloc[1:] = pd.NaT + expected = pd.Series( + ["91279 Days", "NaT", "NaT", "NaT"], dtype="timedelta64[ns]" + ) + res = ser - dt + tm.assert_series_equal(res, expected) + res = dt - ser + tm.assert_series_equal(res, -expected) + + def test_datetimeindex_sub_timestamp_overflow(self): + dtimax = pd.to_datetime(["now", pd.Timestamp.max]) + dtimin = pd.to_datetime(["now", pd.Timestamp.min]) + + tsneg = Timestamp("1950-01-01") + ts_neg_variants = [ + tsneg, + tsneg.to_pydatetime(), + tsneg.to_datetime64().astype("datetime64[ns]"), + tsneg.to_datetime64().astype("datetime64[D]"), + ] + + tspos = Timestamp("1980-01-01") + ts_pos_variants = [ + tspos, + tspos.to_pydatetime(), + tspos.to_datetime64().astype("datetime64[ns]"), + tspos.to_datetime64().astype("datetime64[D]"), + ] + msg = "Overflow in int64 addition" + for variant in ts_neg_variants: + with pytest.raises(OverflowError, match=msg): + dtimax - variant + + expected = pd.Timestamp.max.value - tspos.value + for variant in ts_pos_variants: + res = dtimax - variant + assert res[1].value == expected + + expected = pd.Timestamp.min.value - tsneg.value + for variant in ts_neg_variants: + res = dtimin - variant + assert res[1].value == expected + + for variant in ts_pos_variants: + with pytest.raises(OverflowError, match=msg): + dtimin - variant + + def test_datetimeindex_sub_datetimeindex_overflow(self): + # GH#22492, GH#22508 + dtimax = pd.to_datetime(["now", pd.Timestamp.max]) + dtimin = pd.to_datetime(["now", pd.Timestamp.min]) + + ts_neg = pd.to_datetime(["1950-01-01", "1950-01-01"]) + ts_pos = pd.to_datetime(["1980-01-01", "1980-01-01"]) + + # General tests + expected = pd.Timestamp.max.value - ts_pos[1].value + result = dtimax - ts_pos + assert result[1].value == expected + + expected = pd.Timestamp.min.value - ts_neg[1].value + result = dtimin - ts_neg + assert result[1].value == expected + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + dtimax - ts_neg + + with pytest.raises(OverflowError, match=msg): + dtimin - ts_pos + + # Edge cases + tmin = pd.to_datetime([pd.Timestamp.min]) + t1 = tmin + pd.Timedelta.max + pd.Timedelta("1us") + with pytest.raises(OverflowError, match=msg): + t1 - tmin + + tmax = pd.to_datetime([pd.Timestamp.max]) + t2 = tmax + pd.Timedelta.min - pd.Timedelta("1us") + with pytest.raises(OverflowError, match=msg): + tmax - t2 + + +class TestTimestampSeriesArithmetic: + def test_empty_series_add_sub(self): + # GH#13844 + a = Series(dtype="M8[ns]") + b = Series(dtype="m8[ns]") + tm.assert_series_equal(a, a + b) + tm.assert_series_equal(a, a - b) + tm.assert_series_equal(a, b + a) + msg = "cannot subtract" + with pytest.raises(TypeError, match=msg): + b - a + + def test_operators_datetimelike(self): + + # ## timedelta64 ### + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + # ## datetime64 ### + dt1 = Series( + [ + pd.Timestamp("20111230"), + pd.Timestamp("20120101"), + pd.Timestamp("20120103"), + ] + ) + dt1.iloc[2] = np.nan + dt2 = Series( + [ + pd.Timestamp("20111231"), + pd.Timestamp("20120102"), + pd.Timestamp("20120104"), + ] + ) + dt1 - dt2 + dt2 - dt1 + + # datetime64 with timetimedelta + dt1 + td1 + td1 + dt1 + dt1 - td1 + + # timetimedelta with datetime64 + td1 + dt1 + dt1 + td1 + + def test_dt64ser_sub_datetime_dtype(self): + ts = Timestamp(datetime(1993, 1, 7, 13, 30, 00)) + dt = datetime(1993, 6, 22, 13, 30) + ser = Series([ts]) + result = pd.to_timedelta(np.abs(ser - dt)) + assert result.dtype == "timedelta64[ns]" + + # ------------------------------------------------------------- + # TODO: This next block of tests came from tests.series.test_operators, + # needs to be de-duplicated and parametrized over `box` classes + + def test_operators_datetimelike_invalid(self, all_arithmetic_operators): + # these are all TypeEror ops + op_str = all_arithmetic_operators + + def check(get_ser, test_ser): + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + op = getattr(get_ser, op_str, None) + # Previously, _validate_for_numeric_binop in core/indexes/base.py + # did this for us. + with pytest.raises( + TypeError, match="operate|[cC]annot|unsupported operand" + ): + op(test_ser) + + # ## timedelta64 ### + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + # ## datetime64 ### + dt1 = Series( + [Timestamp("20111230"), Timestamp("20120101"), Timestamp("20120103")] + ) + dt1.iloc[2] = np.nan + dt2 = Series( + [Timestamp("20111231"), Timestamp("20120102"), Timestamp("20120104")] + ) + if op_str not in ["__sub__", "__rsub__"]: + check(dt1, dt2) + + # ## datetime64 with timetimedelta ### + # TODO(jreback) __rsub__ should raise? + if op_str not in ["__add__", "__radd__", "__sub__"]: + check(dt1, td1) + + # 8260, 10763 + # datetime64 with tz + tz = "US/Eastern" + dt1 = Series(date_range("2000-01-01 09:00:00", periods=5, tz=tz), name="foo") + dt2 = dt1.copy() + dt2.iloc[2] = np.nan + td1 = Series(pd.timedelta_range("1 days 1 min", periods=5, freq="H")) + td2 = td1.copy() + td2.iloc[1] = np.nan + + if op_str not in ["__add__", "__radd__", "__sub__", "__rsub__"]: + check(dt2, td2) + + def test_sub_single_tz(self): + # GH#12290 + s1 = Series([pd.Timestamp("2016-02-10", tz="America/Sao_Paulo")]) + s2 = Series([pd.Timestamp("2016-02-08", tz="America/Sao_Paulo")]) + result = s1 - s2 + expected = Series([Timedelta("2days")]) + tm.assert_series_equal(result, expected) + result = s2 - s1 + expected = Series([Timedelta("-2days")]) + tm.assert_series_equal(result, expected) + + def test_dt64tz_series_sub_dtitz(self): + # GH#19071 subtracting tzaware DatetimeIndex from tzaware Series + # (with same tz) raises, fixed by #19024 + dti = pd.date_range("1999-09-30", periods=10, tz="US/Pacific") + ser = pd.Series(dti) + expected = pd.Series(pd.TimedeltaIndex(["0days"] * 10)) + + res = dti - ser + tm.assert_series_equal(res, expected) + res = ser - dti + tm.assert_series_equal(res, expected) + + def test_sub_datetime_compat(self): + # see GH#14088 + s = Series([datetime(2016, 8, 23, 12, tzinfo=pytz.utc), pd.NaT]) + dt = datetime(2016, 8, 22, 12, tzinfo=pytz.utc) + exp = Series([Timedelta("1 days"), pd.NaT]) + tm.assert_series_equal(s - dt, exp) + tm.assert_series_equal(s - Timestamp(dt), exp) + + def test_dt64_series_add_mixed_tick_DateOffset(self): + # GH#4532 + # operate with pd.offsets + s = Series([Timestamp("20130101 9:01"), Timestamp("20130101 9:02")]) + + result = s + pd.offsets.Milli(5) + result2 = pd.offsets.Milli(5) + s + expected = Series( + [Timestamp("20130101 9:01:00.005"), Timestamp("20130101 9:02:00.005")] + ) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + result = s + pd.offsets.Minute(5) + pd.offsets.Milli(5) + expected = Series( + [Timestamp("20130101 9:06:00.005"), Timestamp("20130101 9:07:00.005")] + ) + tm.assert_series_equal(result, expected) + + def test_datetime64_ops_nat(self): + # GH#11349 + datetime_series = Series([NaT, Timestamp("19900315")]) + nat_series_dtype_timestamp = Series([NaT, NaT], dtype="datetime64[ns]") + single_nat_dtype_datetime = Series([NaT], dtype="datetime64[ns]") + + # subtraction + tm.assert_series_equal(-NaT + datetime_series, nat_series_dtype_timestamp) + msg = "Unary negative expects" + with pytest.raises(TypeError, match=msg): + -single_nat_dtype_datetime + datetime_series + + tm.assert_series_equal( + -NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp + ) + with pytest.raises(TypeError, match=msg): + -single_nat_dtype_datetime + nat_series_dtype_timestamp + + # addition + tm.assert_series_equal( + nat_series_dtype_timestamp + NaT, nat_series_dtype_timestamp + ) + tm.assert_series_equal( + NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp + ) + + tm.assert_series_equal( + nat_series_dtype_timestamp + NaT, nat_series_dtype_timestamp + ) + tm.assert_series_equal( + NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp + ) + + # ------------------------------------------------------------- + # Invalid Operations + # TODO: this block also needs to be de-duplicated and parametrized + + @pytest.mark.parametrize( + "dt64_series", + [ + Series([Timestamp("19900315"), Timestamp("19900315")]), + Series([pd.NaT, Timestamp("19900315")]), + Series([pd.NaT, pd.NaT], dtype="datetime64[ns]"), + ], + ) + @pytest.mark.parametrize("one", [1, 1.0, np.array(1)]) + def test_dt64_mul_div_numeric_invalid(self, one, dt64_series): + # multiplication + msg = "cannot perform .* with this index type" + with pytest.raises(TypeError, match=msg): + dt64_series * one + with pytest.raises(TypeError, match=msg): + one * dt64_series + + # division + with pytest.raises(TypeError, match=msg): + dt64_series / one + with pytest.raises(TypeError, match=msg): + one / dt64_series + + # TODO: parametrize over box + @pytest.mark.parametrize("op", ["__add__", "__radd__", "__sub__", "__rsub__"]) + @pytest.mark.parametrize("tz", [None, "Asia/Tokyo"]) + def test_dt64_series_add_intlike(self, tz, op): + # GH#19123 + dti = pd.DatetimeIndex(["2016-01-02", "2016-02-03", "NaT"], tz=tz) + ser = Series(dti) + + other = Series([20, 30, 40], dtype="uint8") + + method = getattr(ser, op) + msg = "|".join( + [ + "Addition/subtraction of integers and integer-arrays", + "cannot subtract .* from ndarray", + ] + ) + with pytest.raises(TypeError, match=msg): + method(1) + with pytest.raises(TypeError, match=msg): + method(other) + with pytest.raises(TypeError, match=msg): + method(np.array(other)) + with pytest.raises(TypeError, match=msg): + method(pd.Index(other)) + + # ------------------------------------------------------------- + # Timezone-Centric Tests + + def test_operators_datetimelike_with_timezones(self): + tz = "US/Eastern" + dt1 = Series(date_range("2000-01-01 09:00:00", periods=5, tz=tz), name="foo") + dt2 = dt1.copy() + dt2.iloc[2] = np.nan + + td1 = Series(pd.timedelta_range("1 days 1 min", periods=5, freq="H")) + td2 = td1.copy() + td2.iloc[1] = np.nan + + result = dt1 + td1[0] + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt2 + td2[0] + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + # odd numpy behavior with scalar timedeltas + result = td1[0] + dt1 + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = td2[0] + dt2 + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt1 - td1[0] + exp = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + td1[0] - dt1 + + result = dt2 - td2[0] + exp = (dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + with pytest.raises(TypeError, match=msg): + td2[0] - dt2 + + result = dt1 + td1 + exp = (dt1.dt.tz_localize(None) + td1).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt2 + td2 + exp = (dt2.dt.tz_localize(None) + td2).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt1 - td1 + exp = (dt1.dt.tz_localize(None) - td1).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + + result = dt2 - td2 + exp = (dt2.dt.tz_localize(None) - td2).dt.tz_localize(tz) + tm.assert_series_equal(result, exp) + msg = "cannot (add|subtract)" + with pytest.raises(TypeError, match=msg): + td1 - dt1 + with pytest.raises(TypeError, match=msg): + td2 - dt2 + + +class TestDatetimeIndexArithmetic: + + # ------------------------------------------------------------- + # Binary operations DatetimeIndex and int + + def test_dti_addsub_int(self, tz_naive_fixture, one): + # Variants of `one` for #19012 + tz = tz_naive_fixture + rng = pd.date_range("2000-01-01 09:00", freq="H", periods=10, tz=tz) + msg = "Addition/subtraction of integers" + + with pytest.raises(TypeError, match=msg): + rng + one + with pytest.raises(TypeError, match=msg): + rng += one + with pytest.raises(TypeError, match=msg): + rng - one + with pytest.raises(TypeError, match=msg): + rng -= one + + # ------------------------------------------------------------- + # __add__/__sub__ with integer arrays + + @pytest.mark.parametrize("freq", ["H", "D"]) + @pytest.mark.parametrize("int_holder", [np.array, pd.Index]) + def test_dti_add_intarray_tick(self, int_holder, freq): + # GH#19959 + dti = pd.date_range("2016-01-01", periods=2, freq=freq) + other = int_holder([4, -1]) + + msg = "Addition/subtraction of integers|cannot subtract DatetimeArray from" + assert_invalid_addsub_type(dti, other, msg) + + @pytest.mark.parametrize("freq", ["W", "M", "MS", "Q"]) + @pytest.mark.parametrize("int_holder", [np.array, pd.Index]) + def test_dti_add_intarray_non_tick(self, int_holder, freq): + # GH#19959 + dti = pd.date_range("2016-01-01", periods=2, freq=freq) + other = int_holder([4, -1]) + + msg = "Addition/subtraction of integers|cannot subtract DatetimeArray from" + assert_invalid_addsub_type(dti, other, msg) + + @pytest.mark.parametrize("int_holder", [np.array, pd.Index]) + def test_dti_add_intarray_no_freq(self, int_holder): + # GH#19959 + dti = pd.DatetimeIndex(["2016-01-01", "NaT", "2017-04-05 06:07:08"]) + other = int_holder([9, 4, -1]) + msg = "|".join( + ["cannot subtract DatetimeArray from", "Addition/subtraction of integers"] + ) + assert_invalid_addsub_type(dti, other, msg) + + # ------------------------------------------------------------- + # Binary operations DatetimeIndex and TimedeltaIndex/array + + def test_dti_add_tdi(self, tz_naive_fixture): + # GH#17558 + tz = tz_naive_fixture + dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + tdi = pd.timedelta_range("0 days", periods=10) + expected = pd.date_range("2017-01-01", periods=10, tz=tz) + + # add with TimdeltaIndex + result = dti + tdi + tm.assert_index_equal(result, expected) + + result = tdi + dti + tm.assert_index_equal(result, expected) + + # add with timedelta64 array + result = dti + tdi.values + tm.assert_index_equal(result, expected) + + result = tdi.values + dti + tm.assert_index_equal(result, expected) + + def test_dti_iadd_tdi(self, tz_naive_fixture): + # GH#17558 + tz = tz_naive_fixture + dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + tdi = pd.timedelta_range("0 days", periods=10) + expected = pd.date_range("2017-01-01", periods=10, tz=tz) + + # iadd with TimdeltaIndex + result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + result += tdi + tm.assert_index_equal(result, expected) + + result = pd.timedelta_range("0 days", periods=10) + result += dti + tm.assert_index_equal(result, expected) + + # iadd with timedelta64 array + result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + result += tdi.values + tm.assert_index_equal(result, expected) + + result = pd.timedelta_range("0 days", periods=10) + result += dti + tm.assert_index_equal(result, expected) + + def test_dti_sub_tdi(self, tz_naive_fixture): + # GH#17558 + tz = tz_naive_fixture + dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + tdi = pd.timedelta_range("0 days", periods=10) + expected = pd.date_range("2017-01-01", periods=10, tz=tz, freq="-1D") + + # sub with TimedeltaIndex + result = dti - tdi + tm.assert_index_equal(result, expected) + + msg = "cannot subtract .*TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdi - dti + + # sub with timedelta64 array + result = dti - tdi.values + tm.assert_index_equal(result, expected) + + msg = "cannot subtract DatetimeArray from" + with pytest.raises(TypeError, match=msg): + tdi.values - dti + + def test_dti_isub_tdi(self, tz_naive_fixture): + # GH#17558 + tz = tz_naive_fixture + dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + tdi = pd.timedelta_range("0 days", periods=10) + expected = pd.date_range("2017-01-01", periods=10, tz=tz, freq="-1D") + + # isub with TimedeltaIndex + result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + result -= tdi + tm.assert_index_equal(result, expected) + + msg = "cannot subtract .* from a TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdi -= dti + + # isub with timedelta64 array + result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10) + result -= tdi.values + tm.assert_index_equal(result, expected) + + msg = "|".join( + [ + "cannot perform __neg__ with this index type:", + "ufunc subtract cannot use operands with types", + "cannot subtract DatetimeArray from", + ] + ) + with pytest.raises(TypeError, match=msg): + tdi.values -= dti + + # ------------------------------------------------------------- + # Binary Operations DatetimeIndex and datetime-like + # TODO: A couple other tests belong in this section. Move them in + # A PR where there isn't already a giant diff. + + @pytest.mark.parametrize( + "addend", + [ + datetime(2011, 1, 1), + DatetimeIndex(["2011-01-01", "2011-01-02"]), + DatetimeIndex(["2011-01-01", "2011-01-02"]).tz_localize("US/Eastern"), + np.datetime64("2011-01-01"), + Timestamp("2011-01-01"), + ], + ids=lambda x: type(x).__name__, + ) + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_add_datetimelike_and_dtarr(self, box_with_array, addend, tz): + # GH#9631 + dti = DatetimeIndex(["2011-01-01", "2011-01-02"]).tz_localize(tz) + dtarr = tm.box_expected(dti, box_with_array) + msg = "cannot add DatetimeArray and" + + with pytest.raises(TypeError, match=msg): + dtarr + addend + with pytest.raises(TypeError, match=msg): + addend + dtarr + + # ------------------------------------------------------------- + + def test_dta_add_sub_index(self, tz_naive_fixture): + # Check that DatetimeArray defers to Index classes + dti = date_range("20130101", periods=3, tz=tz_naive_fixture) + dta = dti.array + result = dta - dti + expected = dti - dti + tm.assert_index_equal(result, expected) + + tdi = result + result = dta + tdi + expected = dti + tdi + tm.assert_index_equal(result, expected) + + result = dta - tdi + expected = dti - tdi + tm.assert_index_equal(result, expected) + + def test_sub_dti_dti(self): + # previously performed setop (deprecated in 0.16.0), now changed to + # return subtraction -> TimeDeltaIndex (GH ...) + + dti = date_range("20130101", periods=3) + dti_tz = date_range("20130101", periods=3).tz_localize("US/Eastern") + dti_tz2 = date_range("20130101", periods=3).tz_localize("UTC") + expected = TimedeltaIndex([0, 0, 0]) + + result = dti - dti + tm.assert_index_equal(result, expected) + + result = dti_tz - dti_tz + tm.assert_index_equal(result, expected) + msg = "DatetimeArray subtraction must have the same timezones or" + with pytest.raises(TypeError, match=msg): + dti_tz - dti + + with pytest.raises(TypeError, match=msg): + dti - dti_tz + + with pytest.raises(TypeError, match=msg): + dti_tz - dti_tz2 + + # isub + dti -= dti + tm.assert_index_equal(dti, expected) + + # different length raises ValueError + dti1 = date_range("20130101", periods=3) + dti2 = date_range("20130101", periods=4) + msg = "cannot add indices of unequal length" + with pytest.raises(ValueError, match=msg): + dti1 - dti2 + + # NaN propagation + dti1 = DatetimeIndex(["2012-01-01", np.nan, "2012-01-03"]) + dti2 = DatetimeIndex(["2012-01-02", "2012-01-03", np.nan]) + expected = TimedeltaIndex(["1 days", np.nan, np.nan]) + result = dti2 - dti1 + tm.assert_index_equal(result, expected) + + # ------------------------------------------------------------------- + # TODO: Most of this block is moved from series or frame tests, needs + # cleanup, box-parametrization, and de-duplication + + @pytest.mark.parametrize("op", [operator.add, operator.sub]) + def test_timedelta64_equal_timedelta_supported_ops(self, op): + ser = Series( + [ + Timestamp("20130301"), + Timestamp("20130228 23:00:00"), + Timestamp("20130228 22:00:00"), + Timestamp("20130228 21:00:00"), + ] + ) + + intervals = ["D", "h", "m", "s", "us"] + + def timedelta64(*args): + # see casting notes in NumPy gh-12927 + return np.sum(list(starmap(np.timedelta64, zip(args, intervals)))) + + for d, h, m, s, us in product(*([range(2)] * 5)): + nptd = timedelta64(d, h, m, s, us) + pytd = timedelta(days=d, hours=h, minutes=m, seconds=s, microseconds=us) + lhs = op(ser, nptd) + rhs = op(ser, pytd) + + tm.assert_series_equal(lhs, rhs) + + def test_ops_nat_mixed_datetime64_timedelta64(self): + # GH#11349 + timedelta_series = Series([NaT, Timedelta("1s")]) + datetime_series = Series([NaT, Timestamp("19900315")]) + nat_series_dtype_timedelta = Series([NaT, NaT], dtype="timedelta64[ns]") + nat_series_dtype_timestamp = Series([NaT, NaT], dtype="datetime64[ns]") + single_nat_dtype_datetime = Series([NaT], dtype="datetime64[ns]") + single_nat_dtype_timedelta = Series([NaT], dtype="timedelta64[ns]") + + # subtraction + tm.assert_series_equal( + datetime_series - single_nat_dtype_datetime, nat_series_dtype_timedelta + ) + + tm.assert_series_equal( + datetime_series - single_nat_dtype_timedelta, nat_series_dtype_timestamp + ) + tm.assert_series_equal( + -single_nat_dtype_timedelta + datetime_series, nat_series_dtype_timestamp + ) + + # without a Series wrapping the NaT, it is ambiguous + # whether it is a datetime64 or timedelta64 + # defaults to interpreting it as timedelta64 + tm.assert_series_equal( + nat_series_dtype_timestamp - single_nat_dtype_datetime, + nat_series_dtype_timedelta, + ) + + tm.assert_series_equal( + nat_series_dtype_timestamp - single_nat_dtype_timedelta, + nat_series_dtype_timestamp, + ) + tm.assert_series_equal( + -single_nat_dtype_timedelta + nat_series_dtype_timestamp, + nat_series_dtype_timestamp, + ) + msg = "cannot subtract a datelike" + with pytest.raises(TypeError, match=msg): + timedelta_series - single_nat_dtype_datetime + + # addition + tm.assert_series_equal( + nat_series_dtype_timestamp + single_nat_dtype_timedelta, + nat_series_dtype_timestamp, + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + nat_series_dtype_timestamp, + nat_series_dtype_timestamp, + ) + + tm.assert_series_equal( + nat_series_dtype_timestamp + single_nat_dtype_timedelta, + nat_series_dtype_timestamp, + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + nat_series_dtype_timestamp, + nat_series_dtype_timestamp, + ) + + tm.assert_series_equal( + nat_series_dtype_timedelta + single_nat_dtype_datetime, + nat_series_dtype_timestamp, + ) + tm.assert_series_equal( + single_nat_dtype_datetime + nat_series_dtype_timedelta, + nat_series_dtype_timestamp, + ) + + def test_ufunc_coercions(self): + idx = date_range("2011-01-01", periods=3, freq="2D", name="x") + + delta = np.timedelta64(1, "D") + exp = date_range("2011-01-02", periods=3, freq="2D", name="x") + for result in [idx + delta, np.add(idx, delta)]: + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, exp) + assert result.freq == "2D" + + exp = date_range("2010-12-31", periods=3, freq="2D", name="x") + for result in [idx - delta, np.subtract(idx, delta)]: + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, exp) + assert result.freq == "2D" + + delta = np.array( + [np.timedelta64(1, "D"), np.timedelta64(2, "D"), np.timedelta64(3, "D")] + ) + exp = DatetimeIndex( + ["2011-01-02", "2011-01-05", "2011-01-08"], freq="3D", name="x" + ) + for result in [idx + delta, np.add(idx, delta)]: + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, exp) + assert result.freq == "3D" + + exp = DatetimeIndex( + ["2010-12-31", "2011-01-01", "2011-01-02"], freq="D", name="x" + ) + for result in [idx - delta, np.subtract(idx, delta)]: + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, exp) + assert result.freq == "D" + + @pytest.mark.parametrize( + "names", [("foo", None, None), ("baz", "bar", None), ("bar", "bar", "bar")] + ) + @pytest.mark.parametrize("tz", [None, "America/Chicago"]) + def test_dti_add_series(self, tz, names): + # GH#13905 + index = DatetimeIndex( + ["2016-06-28 05:30", "2016-06-28 05:31"], tz=tz, name=names[0] + ) + ser = Series([Timedelta(seconds=5)] * 2, index=index, name=names[1]) + expected = Series(index + Timedelta(seconds=5), index=index, name=names[2]) + + # passing name arg isn't enough when names[2] is None + expected.name = names[2] + assert expected.dtype == index.dtype + result = ser + index + tm.assert_series_equal(result, expected) + result2 = index + ser + tm.assert_series_equal(result2, expected) + + expected = index + Timedelta(seconds=5) + result3 = ser.values + index + tm.assert_index_equal(result3, expected) + result4 = index + ser.values + tm.assert_index_equal(result4, expected) + + @pytest.mark.parametrize("op", [operator.add, roperator.radd, operator.sub]) + @pytest.mark.parametrize( + "names", [(None, None, None), ("foo", "bar", None), ("foo", "foo", "foo")] + ) + def test_dti_addsub_offset_arraylike( + self, tz_naive_fixture, names, op, index_or_series + ): + # GH#18849, GH#19744 + box = pd.Index + other_box = index_or_series + + tz = tz_naive_fixture + dti = pd.date_range("2017-01-01", periods=2, tz=tz, name=names[0]) + other = other_box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)], name=names[1]) + + xbox = get_upcast_box(box, other) + + with tm.assert_produces_warning(PerformanceWarning): + res = op(dti, other) + + expected = DatetimeIndex( + [op(dti[n], other[n]) for n in range(len(dti))], name=names[2], freq="infer" + ) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(res, expected) + + @pytest.mark.parametrize("other_box", [pd.Index, np.array]) + def test_dti_addsub_object_arraylike( + self, tz_naive_fixture, box_with_array, other_box + ): + tz = tz_naive_fixture + + dti = pd.date_range("2017-01-01", periods=2, tz=tz) + dtarr = tm.box_expected(dti, box_with_array) + other = other_box([pd.offsets.MonthEnd(), pd.Timedelta(days=4)]) + xbox = get_upcast_box(box_with_array, other) + + expected = pd.DatetimeIndex(["2017-01-31", "2017-01-06"], tz=tz_naive_fixture) + expected = tm.box_expected(expected, xbox) + + warn = None if box_with_array is pd.DataFrame else PerformanceWarning + with tm.assert_produces_warning(warn): + result = dtarr + other + tm.assert_equal(result, expected) + + expected = pd.DatetimeIndex(["2016-12-31", "2016-12-29"], tz=tz_naive_fixture) + expected = tm.box_expected(expected, xbox) + + with tm.assert_produces_warning(warn): + result = dtarr - other + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("years", [-1, 0, 1]) +@pytest.mark.parametrize("months", [-2, 0, 2]) +def test_shift_months(years, months): + dti = DatetimeIndex( + [ + Timestamp("2000-01-05 00:15:00"), + Timestamp("2000-01-31 00:23:00"), + Timestamp("2000-01-01"), + Timestamp("2000-02-29"), + Timestamp("2000-12-31"), + ] + ) + actual = DatetimeIndex(shift_months(dti.asi8, years * 12 + months)) + + raw = [x + pd.offsets.DateOffset(years=years, months=months) for x in dti] + expected = DatetimeIndex(raw) + tm.assert_index_equal(actual, expected) diff --git a/pandas/tests/arithmetic/test_interval.py b/pandas/tests/arithmetic/test_interval.py new file mode 100644 index 00000000..f9e1a515 --- /dev/null +++ b/pandas/tests/arithmetic/test_interval.py @@ -0,0 +1,273 @@ +import operator + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_list_like + +import pandas as pd +from pandas import ( + Categorical, + Index, + Interval, + IntervalIndex, + Period, + Series, + Timedelta, + Timestamp, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays import IntervalArray + + +@pytest.fixture( + params=[ + (Index([0, 2, 4, 4]), Index([1, 3, 5, 8])), + (Index([0.0, 1.0, 2.0, np.nan]), Index([1.0, 2.0, 3.0, np.nan])), + ( + timedelta_range("0 days", periods=3).insert(4, pd.NaT), + timedelta_range("1 day", periods=3).insert(4, pd.NaT), + ), + ( + date_range("20170101", periods=3).insert(4, pd.NaT), + date_range("20170102", periods=3).insert(4, pd.NaT), + ), + ( + date_range("20170101", periods=3, tz="US/Eastern").insert(4, pd.NaT), + date_range("20170102", periods=3, tz="US/Eastern").insert(4, pd.NaT), + ), + ], + ids=lambda x: str(x[0].dtype), +) +def left_right_dtypes(request): + """ + Fixture for building an IntervalArray from various dtypes + """ + return request.param + + +@pytest.fixture +def array(left_right_dtypes): + """ + Fixture to generate an IntervalArray of various dtypes containing NA if possible + """ + left, right = left_right_dtypes + return IntervalArray.from_arrays(left, right) + + +def create_categorical_intervals(left, right, closed="right"): + return Categorical(IntervalIndex.from_arrays(left, right, closed)) + + +def create_series_intervals(left, right, closed="right"): + return Series(IntervalArray.from_arrays(left, right, closed)) + + +def create_series_categorical_intervals(left, right, closed="right"): + return Series(Categorical(IntervalIndex.from_arrays(left, right, closed))) + + +class TestComparison: + @pytest.fixture(params=[operator.eq, operator.ne]) + def op(self, request): + return request.param + + @pytest.fixture( + params=[ + IntervalArray.from_arrays, + IntervalIndex.from_arrays, + create_categorical_intervals, + create_series_intervals, + create_series_categorical_intervals, + ], + ids=[ + "IntervalArray", + "IntervalIndex", + "Categorical[Interval]", + "Series[Interval]", + "Series[Categorical[Interval]]", + ], + ) + def interval_constructor(self, request): + """ + Fixture for all pandas native interval constructors. + To be used as the LHS of IntervalArray comparisons. + """ + return request.param + + def elementwise_comparison(self, op, array, other): + """ + Helper that performs elementwise comparisions between `array` and `other` + """ + other = other if is_list_like(other) else [other] * len(array) + return np.array([op(x, y) for x, y in zip(array, other)]) + + def test_compare_scalar_interval(self, op, array): + # matches first interval + other = array[0] + result = op(array, other) + expected = self.elementwise_comparison(op, array, other) + tm.assert_numpy_array_equal(result, expected) + + # matches on a single endpoint but not both + other = Interval(array.left[0], array.right[1]) + result = op(array, other) + expected = self.elementwise_comparison(op, array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed): + array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed) + other = Interval(0, 1, closed=other_closed) + + result = op(array, other) + expected = self.elementwise_comparison(op, array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_scalar_na(self, op, array, nulls_fixture): + result = op(array, nulls_fixture) + expected = self.elementwise_comparison(op, array, nulls_fixture) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + 0, + 1.0, + True, + "foo", + Timestamp("2017-01-01"), + Timestamp("2017-01-01", tz="US/Eastern"), + Timedelta("0 days"), + Period("2017-01-01", "D"), + ], + ) + def test_compare_scalar_other(self, op, array, other): + result = op(array, other) + expected = self.elementwise_comparison(op, array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_list_like_interval( + self, op, array, interval_constructor, + ): + # same endpoints + other = interval_constructor(array.left, array.right) + result = op(array, other) + expected = self.elementwise_comparison(op, array, other) + tm.assert_numpy_array_equal(result, expected) + + # different endpoints + other = interval_constructor(array.left[::-1], array.right[::-1]) + result = op(array, other) + expected = self.elementwise_comparison(op, array, other) + tm.assert_numpy_array_equal(result, expected) + + # all nan endpoints + other = interval_constructor([np.nan] * 4, [np.nan] * 4) + result = op(array, other) + expected = self.elementwise_comparison(op, array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_list_like_interval_mixed_closed( + self, op, interval_constructor, closed, other_closed + ): + array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed) + other = interval_constructor(range(2), range(1, 3), closed=other_closed) + + result = op(array, other) + expected = self.elementwise_comparison(op, array, other) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + ( + Interval(0, 1), + Interval(Timedelta("1 day"), Timedelta("2 days")), + Interval(4, 5, "both"), + Interval(10, 20, "neither"), + ), + (0, 1.5, Timestamp("20170103"), np.nan), + ( + Timestamp("20170102", tz="US/Eastern"), + Timedelta("2 days"), + "baz", + pd.NaT, + ), + ], + ) + def test_compare_list_like_object(self, op, array, other): + result = op(array, other) + expected = self.elementwise_comparison(op, array, other) + tm.assert_numpy_array_equal(result, expected) + + def test_compare_list_like_nan(self, op, array, nulls_fixture): + other = [nulls_fixture] * 4 + result = op(array, other) + expected = self.elementwise_comparison(op, array, other) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + np.arange(4, dtype="int64"), + np.arange(4, dtype="float64"), + date_range("2017-01-01", periods=4), + date_range("2017-01-01", periods=4, tz="US/Eastern"), + timedelta_range("0 days", periods=4), + period_range("2017-01-01", periods=4, freq="D"), + Categorical(list("abab")), + Categorical(date_range("2017-01-01", periods=4)), + pd.array(list("abcd")), + pd.array(["foo", 3.14, None, object()]), + ], + ids=lambda x: str(x.dtype), + ) + def test_compare_list_like_other(self, op, array, other): + result = op(array, other) + expected = self.elementwise_comparison(op, array, other) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("length", [1, 3, 5]) + @pytest.mark.parametrize("other_constructor", [IntervalArray, list]) + def test_compare_length_mismatch_errors(self, op, other_constructor, length): + array = IntervalArray.from_arrays(range(4), range(1, 5)) + other = other_constructor([Interval(0, 1)] * length) + with pytest.raises(ValueError, match="Lengths must match to compare"): + op(array, other) + + @pytest.mark.parametrize( + "constructor, expected_type, assert_func", + [ + (IntervalIndex, np.array, tm.assert_numpy_array_equal), + (Series, Series, tm.assert_series_equal), + ], + ) + def test_index_series_compat(self, op, constructor, expected_type, assert_func): + # IntervalIndex/Series that rely on IntervalArray for comparisons + breaks = range(4) + index = constructor(IntervalIndex.from_breaks(breaks)) + + # scalar comparisons + other = index[0] + result = op(index, other) + expected = expected_type(self.elementwise_comparison(op, index, other)) + assert_func(result, expected) + + other = breaks[0] + result = op(index, other) + expected = expected_type(self.elementwise_comparison(op, index, other)) + assert_func(result, expected) + + # list-like comparisons + other = IntervalArray.from_breaks(breaks) + result = op(index, other) + expected = expected_type(self.elementwise_comparison(op, index, other)) + assert_func(result, expected) + + other = [index[0], breaks[0], "foo"] + result = op(index, other) + expected = expected_type(self.elementwise_comparison(op, index, other)) + assert_func(result, expected) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py new file mode 100644 index 00000000..22da3de8 --- /dev/null +++ b/pandas/tests/arithmetic/test_numeric.py @@ -0,0 +1,1291 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +# Specifically for numeric dtypes +from collections import abc +from decimal import Decimal +from itertools import combinations +import operator +from typing import Any, List + +import numpy as np +import pytest + +import pandas as pd +from pandas import Index, Series, Timedelta, TimedeltaIndex +import pandas._testing as tm +from pandas.core import ops + + +def adjust_negative_zero(zero, expected): + """ + Helper to adjust the expected result if we are dividing by -0.0 + as opposed to 0.0 + """ + if np.signbit(np.array(zero)).any(): + # All entries in the `zero` fixture should be either + # all-negative or no-negative. + assert np.signbit(np.array(zero)).all() + + expected *= -1 + + return expected + + +# TODO: remove this kludge once mypy stops giving false positives here +# List comprehension has incompatible type List[PandasObject]; expected List[RangeIndex] +# See GH#29725 +ser_or_index: List[Any] = [pd.Series, pd.Index] +lefts: List[Any] = [pd.RangeIndex(10, 40, 10)] +lefts.extend( + [ + cls([10, 20, 30], dtype=dtype) + for dtype in ["i1", "i2", "i4", "i8", "u1", "u2", "u4", "u8", "f2", "f4", "f8"] + for cls in ser_or_index + ] +) + +# ------------------------------------------------------------------ +# Comparisons + + +class TestNumericComparisons: + def test_operator_series_comparison_zerorank(self): + # GH#13006 + result = np.float64(0) > pd.Series([1, 2, 3]) + expected = 0.0 > pd.Series([1, 2, 3]) + tm.assert_series_equal(result, expected) + result = pd.Series([1, 2, 3]) < np.float64(0) + expected = pd.Series([1, 2, 3]) < 0.0 + tm.assert_series_equal(result, expected) + result = np.array([0, 1, 2])[0] > pd.Series([0, 1, 2]) + expected = 0.0 > pd.Series([1, 2, 3]) + tm.assert_series_equal(result, expected) + + def test_df_numeric_cmp_dt64_raises(self): + # GH#8932, GH#22163 + ts = pd.Timestamp.now() + df = pd.DataFrame({"x": range(5)}) + + msg = "Invalid comparison between dtype=int64 and Timestamp" + + with pytest.raises(TypeError, match=msg): + df > ts + with pytest.raises(TypeError, match=msg): + df < ts + with pytest.raises(TypeError, match=msg): + ts < df + with pytest.raises(TypeError, match=msg): + ts > df + + assert not (df == ts).any().any() + assert (df != ts).all().all() + + def test_compare_invalid(self): + # GH#8058 + # ops testing + a = pd.Series(np.random.randn(5), name=0) + b = pd.Series(np.random.randn(5)) + b.name = pd.Timestamp("2000-01-01") + tm.assert_series_equal(a / b, 1 / (b / a)) + + +# ------------------------------------------------------------------ +# Numeric dtypes Arithmetic with Datetime/Timedelta Scalar + + +class TestNumericArraylikeArithmeticWithDatetimeLike: + + # TODO: also check name retentention + @pytest.mark.parametrize("box_cls", [np.array, pd.Index, pd.Series]) + @pytest.mark.parametrize( + "left", lefts, ids=lambda x: type(x).__name__ + str(x.dtype), + ) + def test_mul_td64arr(self, left, box_cls): + # GH#22390 + right = np.array([1, 2, 3], dtype="m8[s]") + right = box_cls(right) + + expected = pd.TimedeltaIndex(["10s", "40s", "90s"]) + if isinstance(left, pd.Series) or box_cls is pd.Series: + expected = pd.Series(expected) + + result = left * right + tm.assert_equal(result, expected) + + result = right * left + tm.assert_equal(result, expected) + + # TODO: also check name retentention + @pytest.mark.parametrize("box_cls", [np.array, pd.Index, pd.Series]) + @pytest.mark.parametrize( + "left", lefts, ids=lambda x: type(x).__name__ + str(x.dtype), + ) + def test_div_td64arr(self, left, box_cls): + # GH#22390 + right = np.array([10, 40, 90], dtype="m8[s]") + right = box_cls(right) + + expected = pd.TimedeltaIndex(["1s", "2s", "3s"]) + if isinstance(left, pd.Series) or box_cls is pd.Series: + expected = pd.Series(expected) + + result = right / left + tm.assert_equal(result, expected) + + result = right // left + tm.assert_equal(result, expected) + + with pytest.raises(TypeError): + left / right + + with pytest.raises(TypeError): + left // right + + # TODO: de-duplicate with test_numeric_arr_mul_tdscalar + def test_ops_series(self): + # regression test for G#H8813 + td = Timedelta("1 day") + other = pd.Series([1, 2]) + expected = pd.Series(pd.to_timedelta(["1 day", "2 days"])) + tm.assert_series_equal(expected, td * other) + tm.assert_series_equal(expected, other * td) + + # TODO: also test non-nanosecond timedelta64 and Tick objects; + # see test_numeric_arr_rdiv_tdscalar for note on these failing + @pytest.mark.parametrize( + "scalar_td", + [ + Timedelta(days=1), + Timedelta(days=1).to_timedelta64(), + Timedelta(days=1).to_pytimedelta(), + ], + ids=lambda x: type(x).__name__, + ) + def test_numeric_arr_mul_tdscalar(self, scalar_td, numeric_idx, box): + # GH#19333 + index = numeric_idx + + expected = pd.timedelta_range("0 days", "4 days") + + index = tm.box_expected(index, box) + expected = tm.box_expected(expected, box) + + result = index * scalar_td + tm.assert_equal(result, expected) + + commute = scalar_td * index + tm.assert_equal(commute, expected) + + @pytest.mark.parametrize( + "scalar_td", + [ + Timedelta(days=1), + Timedelta(days=1).to_timedelta64(), + Timedelta(days=1).to_pytimedelta(), + ], + ids=lambda x: type(x).__name__, + ) + def test_numeric_arr_mul_tdscalar_numexpr_path(self, scalar_td, box): + arr = np.arange(2 * 10 ** 4).astype(np.int64) + obj = tm.box_expected(arr, box, transpose=False) + + expected = arr.view("timedelta64[D]").astype("timedelta64[ns]") + expected = tm.box_expected(expected, box, transpose=False) + + result = obj * scalar_td + tm.assert_equal(result, expected) + + result = scalar_td * obj + tm.assert_equal(result, expected) + + def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box): + index = numeric_idx[1:3] + + expected = TimedeltaIndex(["3 Days", "36 Hours"]) + + index = tm.box_expected(index, box) + expected = tm.box_expected(expected, box) + + result = three_days / index + tm.assert_equal(result, expected) + + with pytest.raises(TypeError): + index / three_days + + @pytest.mark.parametrize( + "other", + [ + pd.Timedelta(hours=31), + pd.Timedelta(hours=31).to_pytimedelta(), + pd.Timedelta(hours=31).to_timedelta64(), + pd.Timedelta(hours=31).to_timedelta64().astype("m8[h]"), + np.timedelta64("NaT"), + np.timedelta64("NaT", "D"), + pd.offsets.Minute(3), + pd.offsets.Second(0), + ], + ) + def test_add_sub_timedeltalike_invalid(self, numeric_idx, other, box): + left = tm.box_expected(numeric_idx, box) + with pytest.raises(TypeError): + left + other + with pytest.raises(TypeError): + other + left + with pytest.raises(TypeError): + left - other + with pytest.raises(TypeError): + other - left + + @pytest.mark.parametrize( + "other", + [ + pd.Timestamp.now().to_pydatetime(), + pd.Timestamp.now(tz="UTC").to_pydatetime(), + pd.Timestamp.now().to_datetime64(), + pd.NaT, + ], + ) + @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning") + def test_add_sub_datetimelike_invalid(self, numeric_idx, other, box): + # GH#28080 numeric+datetime64 should raise; Timestamp raises + # NullFrequencyError instead of TypeError so is excluded. + left = tm.box_expected(numeric_idx, box) + + with pytest.raises(TypeError): + left + other + with pytest.raises(TypeError): + other + left + with pytest.raises(TypeError): + left - other + with pytest.raises(TypeError): + other - left + + +# ------------------------------------------------------------------ +# Arithmetic + + +class TestDivisionByZero: + def test_div_zero(self, zero, numeric_idx): + idx = numeric_idx + + expected = pd.Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) + # We only adjust for Index, because Series does not yet apply + # the adjustment correctly. + expected2 = adjust_negative_zero(zero, expected) + + result = idx / zero + tm.assert_index_equal(result, expected2) + ser_compat = Series(idx).astype("i8") / np.array(zero).astype("i8") + tm.assert_series_equal(ser_compat, Series(expected)) + + def test_floordiv_zero(self, zero, numeric_idx): + idx = numeric_idx + + expected = pd.Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) + # We only adjust for Index, because Series does not yet apply + # the adjustment correctly. + expected2 = adjust_negative_zero(zero, expected) + + result = idx // zero + tm.assert_index_equal(result, expected2) + ser_compat = Series(idx).astype("i8") // np.array(zero).astype("i8") + tm.assert_series_equal(ser_compat, Series(expected)) + + def test_mod_zero(self, zero, numeric_idx): + idx = numeric_idx + + expected = pd.Index([np.nan, np.nan, np.nan, np.nan, np.nan], dtype=np.float64) + result = idx % zero + tm.assert_index_equal(result, expected) + ser_compat = Series(idx).astype("i8") % np.array(zero).astype("i8") + tm.assert_series_equal(ser_compat, Series(result)) + + def test_divmod_zero(self, zero, numeric_idx): + idx = numeric_idx + + exleft = pd.Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) + exright = pd.Index([np.nan, np.nan, np.nan, np.nan, np.nan], dtype=np.float64) + exleft = adjust_negative_zero(zero, exleft) + + result = divmod(idx, zero) + tm.assert_index_equal(result[0], exleft) + tm.assert_index_equal(result[1], exright) + + @pytest.mark.parametrize("op", [operator.truediv, operator.floordiv]) + def test_div_negative_zero(self, zero, numeric_idx, op): + # Check that -1 / -0.0 returns np.inf, not -np.inf + if isinstance(numeric_idx, pd.UInt64Index): + return + idx = numeric_idx - 3 + + expected = pd.Index( + [-np.inf, -np.inf, -np.inf, np.nan, np.inf], dtype=np.float64 + ) + expected = adjust_negative_zero(zero, expected) + + result = op(idx, zero) + tm.assert_index_equal(result, expected) + + # ------------------------------------------------------------------ + + @pytest.mark.parametrize("dtype1", [np.int64, np.float64, np.uint64]) + def test_ser_div_ser(self, dtype1, any_real_dtype): + # no longer do integer div for any ops, but deal with the 0's + dtype2 = any_real_dtype + + first = Series([3, 4, 5, 8], name="first").astype(dtype1) + second = Series([0, 0, 0, 3], name="second").astype(dtype2) + + with np.errstate(all="ignore"): + expected = Series( + first.values.astype(np.float64) / second.values, + dtype="float64", + name=None, + ) + expected.iloc[0:3] = np.inf + + result = first / second + tm.assert_series_equal(result, expected) + assert not result.equals(second / first) + + @pytest.mark.parametrize("dtype1", [np.int64, np.float64, np.uint64]) + def test_ser_divmod_zero(self, dtype1, any_real_dtype): + # GH#26987 + dtype2 = any_real_dtype + left = pd.Series([1, 1]).astype(dtype1) + right = pd.Series([0, 2]).astype(dtype2) + + # GH#27321 pandas convention is to set 1 // 0 to np.inf, as opposed + # to numpy which sets to np.nan; patch `expected[0]` below + expected = left // right, left % right + expected = list(expected) + expected[0] = expected[0].astype(np.float64) + expected[0][0] = np.inf + result = divmod(left, right) + + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + # rdivmod case + result = divmod(left.values, right) + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + def test_ser_divmod_inf(self): + left = pd.Series([np.inf, 1.0]) + right = pd.Series([np.inf, 2.0]) + + expected = left // right, left % right + result = divmod(left, right) + + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + # rdivmod case + result = divmod(left.values, right) + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + def test_rdiv_zero_compat(self): + # GH#8674 + zero_array = np.array([0] * 5) + data = np.random.randn(5) + expected = Series([0.0] * 5) + + result = zero_array / Series(data) + tm.assert_series_equal(result, expected) + + result = Series(zero_array) / data + tm.assert_series_equal(result, expected) + + result = Series(zero_array) / Series(data) + tm.assert_series_equal(result, expected) + + def test_div_zero_inf_signs(self): + # GH#9144, inf signing + ser = Series([-1, 0, 1], name="first") + expected = Series([-np.inf, np.nan, np.inf], name="first") + + result = ser / 0 + tm.assert_series_equal(result, expected) + + def test_rdiv_zero(self): + # GH#9144 + ser = Series([-1, 0, 1], name="first") + expected = Series([0.0, np.nan, 0.0], name="first") + + result = 0 / ser + tm.assert_series_equal(result, expected) + + def test_floordiv_div(self): + # GH#9144 + ser = Series([-1, 0, 1], name="first") + + result = ser // 0 + expected = Series([-np.inf, np.nan, np.inf], name="first") + tm.assert_series_equal(result, expected) + + def test_df_div_zero_df(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + result = df / df + + first = pd.Series([1.0, 1.0, 1.0, 1.0]) + second = pd.Series([np.nan, np.nan, np.nan, 1]) + expected = pd.DataFrame({"first": first, "second": second}) + tm.assert_frame_equal(result, expected) + + def test_df_div_zero_array(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + first = pd.Series([1.0, 1.0, 1.0, 1.0]) + second = pd.Series([np.nan, np.nan, np.nan, 1]) + expected = pd.DataFrame({"first": first, "second": second}) + + with np.errstate(all="ignore"): + arr = df.values.astype("float") / df.values + result = pd.DataFrame(arr, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + def test_df_div_zero_int(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + result = df / 0 + expected = pd.DataFrame(np.inf, index=df.index, columns=df.columns) + expected.iloc[0:3, 1] = np.nan + tm.assert_frame_equal(result, expected) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all="ignore"): + arr = df.values.astype("float64") / 0 + result2 = pd.DataFrame(arr, index=df.index, columns=df.columns) + tm.assert_frame_equal(result2, expected) + + def test_df_div_zero_series_does_not_commute(self): + # integer div, but deal with the 0's (GH#9144) + df = pd.DataFrame(np.random.randn(10, 5)) + ser = df[0] + res = ser / df + res2 = df / ser + assert not res.fillna(0).equals(res2.fillna(0)) + + # ------------------------------------------------------------------ + # Mod By Zero + + def test_df_mod_zero_df(self): + # GH#3590, modulo as ints + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + # this is technically wrong, as the integer portion is coerced to float + # ### + first = pd.Series([0, 0, 0, 0], dtype="float64") + second = pd.Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({"first": first, "second": second}) + result = df % df + tm.assert_frame_equal(result, expected) + + def test_df_mod_zero_array(self): + # GH#3590, modulo as ints + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + # this is technically wrong, as the integer portion is coerced to float + # ### + first = pd.Series([0, 0, 0, 0], dtype="float64") + second = pd.Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({"first": first, "second": second}) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all="ignore"): + arr = df.values % df.values + result2 = pd.DataFrame(arr, index=df.index, columns=df.columns, dtype="float64") + result2.iloc[0:3, 1] = np.nan + tm.assert_frame_equal(result2, expected) + + def test_df_mod_zero_int(self): + # GH#3590, modulo as ints + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + + result = df % 0 + expected = pd.DataFrame(np.nan, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + # numpy has a slightly different (wrong) treatment + with np.errstate(all="ignore"): + arr = df.values.astype("float64") % 0 + result2 = pd.DataFrame(arr, index=df.index, columns=df.columns) + tm.assert_frame_equal(result2, expected) + + def test_df_mod_zero_series_does_not_commute(self): + # GH#3590, modulo as ints + # not commutative with series + df = pd.DataFrame(np.random.randn(10, 5)) + ser = df[0] + res = ser % df + res2 = df % ser + assert not res.fillna(0).equals(res2.fillna(0)) + + +class TestMultiplicationDivision: + # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__ + # for non-timestamp/timedelta/period dtypes + + @pytest.mark.parametrize( + "box", + [ + pytest.param( + pd.Index, + marks=pytest.mark.xfail( + reason="Index.__div__ always raises", raises=TypeError + ), + ), + pd.Series, + pd.DataFrame, + ], + ids=lambda x: x.__name__, + ) + def test_divide_decimal(self, box): + # resolves issue GH#9787 + ser = Series([Decimal(10)]) + expected = Series([Decimal(5)]) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = ser / Decimal(2) + + tm.assert_equal(result, expected) + + result = ser // Decimal(2) + tm.assert_equal(result, expected) + + def test_div_equiv_binop(self): + # Test Series.div as well as Series.__div__ + # float/integer issue + # GH#7785 + first = Series([1, 0], name="first") + second = Series([-0.01, -0.02], name="second") + expected = Series([-0.01, -np.inf]) + + result = second.div(first) + tm.assert_series_equal(result, expected, check_names=False) + + result = second / first + tm.assert_series_equal(result, expected) + + def test_div_int(self, numeric_idx): + idx = numeric_idx + result = idx / 1 + expected = idx.astype("float64") + tm.assert_index_equal(result, expected) + + result = idx / 2 + expected = Index(idx.values / 2) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("op", [operator.mul, ops.rmul, operator.floordiv]) + def test_mul_int_identity(self, op, numeric_idx, box_with_array): + idx = numeric_idx + idx = tm.box_expected(idx, box_with_array) + + result = op(idx, 1) + tm.assert_equal(result, idx) + + def test_mul_int_array(self, numeric_idx): + idx = numeric_idx + didx = idx * idx + + result = idx * np.array(5, dtype="int64") + tm.assert_index_equal(result, idx * 5) + + arr_dtype = "uint64" if isinstance(idx, pd.UInt64Index) else "int64" + result = idx * np.arange(5, dtype=arr_dtype) + tm.assert_index_equal(result, didx) + + def test_mul_int_series(self, numeric_idx): + idx = numeric_idx + didx = idx * idx + + arr_dtype = "uint64" if isinstance(idx, pd.UInt64Index) else "int64" + result = idx * Series(np.arange(5, dtype=arr_dtype)) + tm.assert_series_equal(result, Series(didx)) + + def test_mul_float_series(self, numeric_idx): + idx = numeric_idx + rng5 = np.arange(5, dtype="float64") + + result = idx * Series(rng5 + 0.1) + expected = Series(rng5 * (rng5 + 0.1)) + tm.assert_series_equal(result, expected) + + def test_mul_index(self, numeric_idx): + # in general not true for RangeIndex + idx = numeric_idx + if not isinstance(idx, pd.RangeIndex): + result = idx * idx + tm.assert_index_equal(result, idx ** 2) + + def test_mul_datelike_raises(self, numeric_idx): + idx = numeric_idx + with pytest.raises(TypeError): + idx * pd.date_range("20130101", periods=5) + + def test_mul_size_mismatch_raises(self, numeric_idx): + idx = numeric_idx + with pytest.raises(ValueError): + idx * idx[0:3] + with pytest.raises(ValueError): + idx * np.array([1, 2]) + + @pytest.mark.parametrize("op", [operator.pow, ops.rpow]) + def test_pow_float(self, op, numeric_idx, box_with_array): + # test power calculations both ways, GH#14973 + box = box_with_array + idx = numeric_idx + expected = pd.Float64Index(op(idx.values, 2.0)) + + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, box) + + result = op(idx, 2.0) + tm.assert_equal(result, expected) + + def test_modulo(self, numeric_idx, box_with_array): + # GH#9244 + box = box_with_array + idx = numeric_idx + expected = Index(idx.values % 2) + + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, box) + + result = idx % 2 + tm.assert_equal(result, expected) + + def test_divmod_scalar(self, numeric_idx): + idx = numeric_idx + + result = divmod(idx, 2) + with np.errstate(all="ignore"): + div, mod = divmod(idx.values, 2) + + expected = Index(div), Index(mod) + for r, e in zip(result, expected): + tm.assert_index_equal(r, e) + + def test_divmod_ndarray(self, numeric_idx): + idx = numeric_idx + other = np.ones(idx.values.shape, dtype=idx.values.dtype) * 2 + + result = divmod(idx, other) + with np.errstate(all="ignore"): + div, mod = divmod(idx.values, other) + + expected = Index(div), Index(mod) + for r, e in zip(result, expected): + tm.assert_index_equal(r, e) + + def test_divmod_series(self, numeric_idx): + idx = numeric_idx + other = np.ones(idx.values.shape, dtype=idx.values.dtype) * 2 + + result = divmod(idx, Series(other)) + with np.errstate(all="ignore"): + div, mod = divmod(idx.values, other) + + expected = Series(div), Series(mod) + for r, e in zip(result, expected): + tm.assert_series_equal(r, e) + + @pytest.mark.parametrize("other", [np.nan, 7, -23, 2.718, -3.14, np.inf]) + def test_ops_np_scalar(self, other): + vals = np.random.randn(5, 3) + f = lambda x: pd.DataFrame( + x, index=list("ABCDE"), columns=["jim", "joe", "jolie"] + ) + + df = f(vals) + + tm.assert_frame_equal(df / np.array(other), f(vals / other)) + tm.assert_frame_equal(np.array(other) * df, f(vals * other)) + tm.assert_frame_equal(df + np.array(other), f(vals + other)) + tm.assert_frame_equal(np.array(other) - df, f(other - vals)) + + # TODO: This came from series.test.test_operators, needs cleanup + def test_operators_frame(self): + # rpow does not work with DataFrame + ts = tm.makeTimeSeries() + ts.name = "ts" + + df = pd.DataFrame({"A": ts}) + + tm.assert_series_equal(ts + ts, ts + df["A"], check_names=False) + tm.assert_series_equal(ts ** ts, ts ** df["A"], check_names=False) + tm.assert_series_equal(ts < ts, ts < df["A"], check_names=False) + tm.assert_series_equal(ts / ts, ts / df["A"], check_names=False) + + # TODO: this came from tests.series.test_analytics, needs cleanup and + # de-duplication with test_modulo above + def test_modulo2(self): + with np.errstate(all="ignore"): + + # GH#3590, modulo as ints + p = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) + result = p["first"] % p["second"] + expected = Series(p["first"].values % p["second"].values, dtype="float64") + expected.iloc[0:3] = np.nan + tm.assert_series_equal(result, expected) + + result = p["first"] % 0 + expected = Series(np.nan, index=p.index, name="first") + tm.assert_series_equal(result, expected) + + p = p.astype("float64") + result = p["first"] % p["second"] + expected = Series(p["first"].values % p["second"].values) + tm.assert_series_equal(result, expected) + + p = p.astype("float64") + result = p["first"] % p["second"] + result2 = p["second"] % p["first"] + assert not result.equals(result2) + + def test_modulo_zero_int(self): + # GH#9144 + with np.errstate(all="ignore"): + s = Series([0, 1]) + + result = s % 0 + expected = Series([np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + result = 0 % s + expected = Series([np.nan, 0.0]) + tm.assert_series_equal(result, expected) + + +class TestAdditionSubtraction: + # __add__, __sub__, __radd__, __rsub__, __iadd__, __isub__ + # for non-timestamp/timedelta/period dtypes + + # TODO: This came from series.test.test_operators, needs cleanup + def test_arith_ops_df_compat(self): + # GH#1134 + s1 = pd.Series([1, 2, 3], index=list("ABC"), name="x") + s2 = pd.Series([2, 2, 2], index=list("ABD"), name="x") + + exp = pd.Series([3.0, 4.0, np.nan, np.nan], index=list("ABCD"), name="x") + tm.assert_series_equal(s1 + s2, exp) + tm.assert_series_equal(s2 + s1, exp) + + exp = pd.DataFrame({"x": [3.0, 4.0, np.nan, np.nan]}, index=list("ABCD")) + tm.assert_frame_equal(s1.to_frame() + s2.to_frame(), exp) + tm.assert_frame_equal(s2.to_frame() + s1.to_frame(), exp) + + # different length + s3 = pd.Series([1, 2, 3], index=list("ABC"), name="x") + s4 = pd.Series([2, 2, 2, 2], index=list("ABCD"), name="x") + + exp = pd.Series([3, 4, 5, np.nan], index=list("ABCD"), name="x") + tm.assert_series_equal(s3 + s4, exp) + tm.assert_series_equal(s4 + s3, exp) + + exp = pd.DataFrame({"x": [3, 4, 5, np.nan]}, index=list("ABCD")) + tm.assert_frame_equal(s3.to_frame() + s4.to_frame(), exp) + tm.assert_frame_equal(s4.to_frame() + s3.to_frame(), exp) + + # TODO: This came from series.test.test_operators, needs cleanup + def test_series_frame_radd_bug(self): + # GH#353 + vals = pd.Series(tm.rands_array(5, 10)) + result = "foo_" + vals + expected = vals.map(lambda x: "foo_" + x) + tm.assert_series_equal(result, expected) + + frame = pd.DataFrame({"vals": vals}) + result = "foo_" + frame + expected = pd.DataFrame({"vals": vals.map(lambda x: "foo_" + x)}) + tm.assert_frame_equal(result, expected) + + ts = tm.makeTimeSeries() + ts.name = "ts" + + # really raise this time + now = pd.Timestamp.now().to_pydatetime() + with pytest.raises(TypeError): + now + ts + + with pytest.raises(TypeError): + ts + now + + # TODO: This came from series.test.test_operators, needs cleanup + def test_datetime64_with_index(self): + # arithmetic integer ops with an index + ser = pd.Series(np.random.randn(5)) + expected = ser - ser.index.to_series() + result = ser - ser.index + tm.assert_series_equal(result, expected) + + # GH#4629 + # arithmetic datetime64 ops with an index + ser = pd.Series( + pd.date_range("20130101", periods=5), + index=pd.date_range("20130101", periods=5), + ) + expected = ser - ser.index.to_series() + result = ser - ser.index + tm.assert_series_equal(result, expected) + + with pytest.raises(TypeError): + # GH#18850 + result = ser - ser.index.to_period() + + df = pd.DataFrame( + np.random.randn(5, 2), index=pd.date_range("20130101", periods=5) + ) + df["date"] = pd.Timestamp("20130102") + df["expected"] = df["date"] - df.index.to_series() + df["result"] = df["date"] - df.index + tm.assert_series_equal(df["result"], df["expected"], check_names=False) + + # TODO: taken from tests.frame.test_operators, needs cleanup + def test_frame_operators(self, float_frame): + frame = float_frame + frame2 = pd.DataFrame(float_frame, columns=["D", "C", "B", "A"]) + + garbage = np.random.random(4) + colSeries = pd.Series(garbage, index=np.array(frame.columns)) + + idSum = frame + frame + seriesSum = frame + colSeries + + for col, series in idSum.items(): + for idx, val in series.items(): + origVal = frame[col][idx] * 2 + if not np.isnan(val): + assert val == origVal + else: + assert np.isnan(origVal) + + for col, series in seriesSum.items(): + for idx, val in series.items(): + origVal = frame[col][idx] + colSeries[col] + if not np.isnan(val): + assert val == origVal + else: + assert np.isnan(origVal) + + added = frame2 + frame2 + expected = frame2 * 2 + tm.assert_frame_equal(added, expected) + + df = pd.DataFrame({"a": ["a", None, "b"]}) + tm.assert_frame_equal(df + df, pd.DataFrame({"a": ["aa", np.nan, "bb"]})) + + # Test for issue #10181 + for dtype in ("float", "int64"): + frames = [ + pd.DataFrame(dtype=dtype), + pd.DataFrame(columns=["A"], dtype=dtype), + pd.DataFrame(index=[0], dtype=dtype), + ] + for df in frames: + assert (df + df).equals(df) + tm.assert_frame_equal(df + df, df) + + # TODO: taken from tests.series.test_operators; needs cleanup + def test_series_operators(self): + def _check_op(series, other, op, pos_only=False, check_dtype=True): + left = np.abs(series) if pos_only else series + right = np.abs(other) if pos_only else other + + cython_or_numpy = op(left, right) + python = left.combine(right, op) + tm.assert_series_equal(cython_or_numpy, python, check_dtype=check_dtype) + + def check(series, other): + simple_ops = ["add", "sub", "mul", "truediv", "floordiv", "mod"] + + for opname in simple_ops: + _check_op(series, other, getattr(operator, opname)) + + _check_op(series, other, operator.pow, pos_only=True) + + _check_op(series, other, ops.radd) + _check_op(series, other, ops.rsub) + _check_op(series, other, ops.rtruediv) + _check_op(series, other, ops.rfloordiv) + _check_op(series, other, ops.rmul) + _check_op(series, other, ops.rpow, pos_only=True) + _check_op(series, other, ops.rmod) + + tser = tm.makeTimeSeries().rename("ts") + check(tser, tser * 2) + check(tser, tser[::2]) + check(tser, 5) + + def check_comparators(series, other, check_dtype=True): + _check_op(series, other, operator.gt, check_dtype=check_dtype) + _check_op(series, other, operator.ge, check_dtype=check_dtype) + _check_op(series, other, operator.eq, check_dtype=check_dtype) + _check_op(series, other, operator.lt, check_dtype=check_dtype) + _check_op(series, other, operator.le, check_dtype=check_dtype) + + check_comparators(tser, 5) + check_comparators(tser, tser + 1, check_dtype=False) + + # TODO: taken from tests.series.test_operators; needs cleanup + def test_divmod(self): + def check(series, other): + results = divmod(series, other) + if isinstance(other, abc.Iterable) and len(series) != len(other): + # if the lengths don't match, this is the test where we use + # `tser[::2]`. Pad every other value in `other_np` with nan. + other_np = [] + for n in other: + other_np.append(n) + other_np.append(np.nan) + else: + other_np = other + other_np = np.asarray(other_np) + with np.errstate(all="ignore"): + expecteds = divmod(series.values, np.asarray(other_np)) + + for result, expected in zip(results, expecteds): + # check the values, name, and index separately + tm.assert_almost_equal(np.asarray(result), expected) + + assert result.name == series.name + tm.assert_index_equal(result.index, series.index) + + tser = tm.makeTimeSeries().rename("ts") + check(tser, tser * 2) + check(tser, tser[::2]) + check(tser, 5) + + def test_series_divmod_zero(self): + # Check that divmod uses pandas convention for division by zero, + # which does not match numpy. + # pandas convention has + # 1/0 == np.inf + # -1/0 == -np.inf + # 1/-0.0 == -np.inf + # -1/-0.0 == np.inf + tser = tm.makeTimeSeries().rename("ts") + other = tser * 0 + + result = divmod(tser, other) + exp1 = pd.Series([np.inf] * len(tser), index=tser.index, name="ts") + exp2 = pd.Series([np.nan] * len(tser), index=tser.index, name="ts") + tm.assert_series_equal(result[0], exp1) + tm.assert_series_equal(result[1], exp2) + + +class TestUFuncCompat: + @pytest.mark.parametrize( + "holder", + [pd.Int64Index, pd.UInt64Index, pd.Float64Index, pd.RangeIndex, pd.Series], + ) + def test_ufunc_compat(self, holder): + box = pd.Series if holder is pd.Series else pd.Index + + if holder is pd.RangeIndex: + idx = pd.RangeIndex(0, 5) + else: + idx = holder(np.arange(5, dtype="int64")) + result = np.sin(idx) + expected = box(np.sin(np.arange(5, dtype="int64"))) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "holder", [pd.Int64Index, pd.UInt64Index, pd.Float64Index, pd.Series] + ) + def test_ufunc_coercions(self, holder): + idx = holder([1, 2, 3, 4, 5], name="x") + box = pd.Series if holder is pd.Series else pd.Index + + result = np.sqrt(idx) + assert result.dtype == "f8" and isinstance(result, box) + exp = pd.Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = np.divide(idx, 2.0) + assert result.dtype == "f8" and isinstance(result, box) + exp = pd.Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + # _evaluate_numeric_binop + result = idx + 2.0 + assert result.dtype == "f8" and isinstance(result, box) + exp = pd.Float64Index([3.0, 4.0, 5.0, 6.0, 7.0], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = idx - 2.0 + assert result.dtype == "f8" and isinstance(result, box) + exp = pd.Float64Index([-1.0, 0.0, 1.0, 2.0, 3.0], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = idx * 1.0 + assert result.dtype == "f8" and isinstance(result, box) + exp = pd.Float64Index([1.0, 2.0, 3.0, 4.0, 5.0], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = idx / 2.0 + assert result.dtype == "f8" and isinstance(result, box) + exp = pd.Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x") + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + @pytest.mark.parametrize( + "holder", [pd.Int64Index, pd.UInt64Index, pd.Float64Index, pd.Series] + ) + def test_ufunc_multiple_return_values(self, holder): + obj = holder([1, 2, 3], name="x") + box = pd.Series if holder is pd.Series else pd.Index + + result = np.modf(obj) + assert isinstance(result, tuple) + exp1 = pd.Float64Index([0.0, 0.0, 0.0], name="x") + exp2 = pd.Float64Index([1.0, 2.0, 3.0], name="x") + tm.assert_equal(result[0], tm.box_expected(exp1, box)) + tm.assert_equal(result[1], tm.box_expected(exp2, box)) + + def test_ufunc_at(self): + s = pd.Series([0, 1, 2], index=[1, 2, 3], name="x") + np.add.at(s, [0, 2], 10) + expected = pd.Series([10, 1, 12], index=[1, 2, 3], name="x") + tm.assert_series_equal(s, expected) + + +class TestObjectDtypeEquivalence: + # Tests that arithmetic operations match operations executed elementwise + + @pytest.mark.parametrize("dtype", [None, object]) + def test_numarr_with_dtype_add_nan(self, dtype, box_with_array): + box = box_with_array + ser = pd.Series([1, 2, 3], dtype=dtype) + expected = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = np.nan + ser + tm.assert_equal(result, expected) + + result = ser + np.nan + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, object]) + def test_numarr_with_dtype_add_int(self, dtype, box_with_array): + box = box_with_array + ser = pd.Series([1, 2, 3], dtype=dtype) + expected = pd.Series([2, 3, 4], dtype=dtype) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = 1 + ser + tm.assert_equal(result, expected) + + result = ser + 1 + tm.assert_equal(result, expected) + + # TODO: moved from tests.series.test_operators; needs cleanup + @pytest.mark.parametrize( + "op", + [operator.add, operator.sub, operator.mul, operator.truediv, operator.floordiv], + ) + def test_operators_reverse_object(self, op): + # GH#56 + arr = pd.Series(np.random.randn(10), index=np.arange(10), dtype=object) + + result = op(1.0, arr) + expected = op(1.0, arr.astype(float)) + tm.assert_series_equal(result.astype(float), expected) + + +class TestNumericArithmeticUnsorted: + # Tests in this class have been moved from type-specific test modules + # but not yet sorted, parametrized, and de-duplicated + + def check_binop(self, ops, scalars, idxs): + for op in ops: + for a, b in combinations(idxs, 2): + result = op(a, b) + expected = op(pd.Int64Index(a), pd.Int64Index(b)) + tm.assert_index_equal(result, expected) + for idx in idxs: + for scalar in scalars: + result = op(idx, scalar) + expected = op(pd.Int64Index(idx), scalar) + tm.assert_index_equal(result, expected) + + def test_binops(self): + ops = [ + operator.add, + operator.sub, + operator.mul, + operator.floordiv, + operator.truediv, + ] + scalars = [-1, 1, 2] + idxs = [ + pd.RangeIndex(0, 10, 1), + pd.RangeIndex(0, 20, 2), + pd.RangeIndex(-10, 10, 2), + pd.RangeIndex(5, -5, -1), + ] + self.check_binop(ops, scalars, idxs) + + def test_binops_pow(self): + # numpy does not allow powers of negative integers so test separately + # https://github.com/numpy/numpy/pull/8127 + ops = [pow] + scalars = [1, 2] + idxs = [pd.RangeIndex(0, 10, 1), pd.RangeIndex(0, 20, 2)] + self.check_binop(ops, scalars, idxs) + + # TODO: mod, divmod? + @pytest.mark.parametrize( + "op", + [ + operator.add, + operator.sub, + operator.mul, + operator.floordiv, + operator.truediv, + operator.pow, + ], + ) + def test_arithmetic_with_frame_or_series(self, op): + # check that we return NotImplemented when operating with Series + # or DataFrame + index = pd.RangeIndex(5) + other = pd.Series(np.random.randn(5)) + + expected = op(pd.Series(index), other) + result = op(index, other) + tm.assert_series_equal(result, expected) + + other = pd.DataFrame(np.random.randn(2, 5)) + expected = op(pd.DataFrame([index, index]), other) + result = op(index, other) + tm.assert_frame_equal(result, expected) + + def test_numeric_compat2(self): + # validate that we are handling the RangeIndex overrides to numeric ops + # and returning RangeIndex where possible + + idx = pd.RangeIndex(0, 10, 2) + + result = idx * 2 + expected = pd.RangeIndex(0, 20, 4) + tm.assert_index_equal(result, expected, exact=True) + + result = idx + 2 + expected = pd.RangeIndex(2, 12, 2) + tm.assert_index_equal(result, expected, exact=True) + + result = idx - 2 + expected = pd.RangeIndex(-2, 8, 2) + tm.assert_index_equal(result, expected, exact=True) + + result = idx / 2 + expected = pd.RangeIndex(0, 5, 1).astype("float64") + tm.assert_index_equal(result, expected, exact=True) + + result = idx / 4 + expected = pd.RangeIndex(0, 10, 2) / 4 + tm.assert_index_equal(result, expected, exact=True) + + result = idx // 1 + expected = idx + tm.assert_index_equal(result, expected, exact=True) + + # __mul__ + result = idx * idx + expected = Index(idx.values * idx.values) + tm.assert_index_equal(result, expected, exact=True) + + # __pow__ + idx = pd.RangeIndex(0, 1000, 2) + result = idx ** 2 + expected = idx._int64index ** 2 + tm.assert_index_equal(Index(result.values), expected, exact=True) + + # __floordiv__ + cases_exact = [ + (pd.RangeIndex(0, 1000, 2), 2, pd.RangeIndex(0, 500, 1)), + (pd.RangeIndex(-99, -201, -3), -3, pd.RangeIndex(33, 67, 1)), + (pd.RangeIndex(0, 1000, 1), 2, pd.RangeIndex(0, 1000, 1)._int64index // 2), + ( + pd.RangeIndex(0, 100, 1), + 2.0, + pd.RangeIndex(0, 100, 1)._int64index // 2.0, + ), + (pd.RangeIndex(0), 50, pd.RangeIndex(0)), + (pd.RangeIndex(2, 4, 2), 3, pd.RangeIndex(0, 1, 1)), + (pd.RangeIndex(-5, -10, -6), 4, pd.RangeIndex(-2, -1, 1)), + (pd.RangeIndex(-100, -200, 3), 2, pd.RangeIndex(0)), + ] + for idx, div, expected in cases_exact: + tm.assert_index_equal(idx // div, expected, exact=True) + + @pytest.mark.parametrize("dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("delta", [1, 0, -1]) + def test_addsub_arithmetic(self, dtype, delta): + # GH#8142 + delta = dtype(delta) + index = pd.Index([10, 11, 12], dtype=dtype) + result = index + delta + expected = pd.Index(index.values + delta, dtype=dtype) + tm.assert_index_equal(result, expected) + + # this subtraction used to fail + result = index - delta + expected = pd.Index(index.values - delta, dtype=dtype) + tm.assert_index_equal(result, expected) + + tm.assert_index_equal(index + index, 2 * index) + tm.assert_index_equal(index - index, 0 * index) + assert not (index - index).empty + + +def test_fill_value_inf_masking(): + # GH #27464 make sure we mask 0/1 with Inf and not NaN + df = pd.DataFrame({"A": [0, 1, 2], "B": [1.1, None, 1.1]}) + + other = pd.DataFrame({"A": [1.1, 1.2, 1.3]}, index=[0, 2, 3]) + + result = df.rfloordiv(other, fill_value=1) + + expected = pd.DataFrame( + {"A": [np.inf, 1.0, 0.0, 1.0], "B": [0.0, np.nan, 0.0, np.nan]} + ) + tm.assert_frame_equal(result, expected) + + +def test_dataframe_div_silenced(): + # GH#26793 + pdf1 = pd.DataFrame( + { + "A": np.arange(10), + "B": [np.nan, 1, 2, 3, 4] * 2, + "C": [np.nan] * 10, + "D": np.arange(10), + }, + index=list("abcdefghij"), + columns=list("ABCD"), + ) + pdf2 = pd.DataFrame( + np.random.randn(10, 4), index=list("abcdefghjk"), columns=list("ABCX") + ) + with tm.assert_produces_warning(None): + pdf1.div(pdf2, fill_value=0) diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py new file mode 100644 index 00000000..d0f204a6 --- /dev/null +++ b/pandas/tests/arithmetic/test_object.py @@ -0,0 +1,365 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +# Specifically for object dtype +import datetime +from decimal import Decimal +import operator + +import numpy as np +import pytest + +import pandas as pd +from pandas import Series, Timestamp +import pandas._testing as tm +from pandas.core import ops + +# ------------------------------------------------------------------ +# Comparisons + + +class TestObjectComparisons: + def test_comparison_object_numeric_nas(self): + ser = Series(np.random.randn(10), dtype=object) + shifted = ser.shift(2) + + ops = ["lt", "le", "gt", "ge", "eq", "ne"] + for op in ops: + func = getattr(operator, op) + + result = func(ser, shifted) + expected = func(ser.astype(float), shifted.astype(float)) + tm.assert_series_equal(result, expected) + + def test_object_comparisons(self): + ser = Series(["a", "b", np.nan, "c", "a"]) + + result = ser == "a" + expected = Series([True, False, False, False, True]) + tm.assert_series_equal(result, expected) + + result = ser < "a" + expected = Series([False, False, False, False, False]) + tm.assert_series_equal(result, expected) + + result = ser != "a" + expected = -(ser == "a") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, object]) + def test_more_na_comparisons(self, dtype): + left = Series(["a", np.nan, "c"], dtype=dtype) + right = Series(["a", np.nan, "d"], dtype=dtype) + + result = left == right + expected = Series([True, False, False]) + tm.assert_series_equal(result, expected) + + result = left != right + expected = Series([False, True, True]) + tm.assert_series_equal(result, expected) + + result = left == np.nan + expected = Series([False, False, False]) + tm.assert_series_equal(result, expected) + + result = left != np.nan + expected = Series([True, True, True]) + tm.assert_series_equal(result, expected) + + +# ------------------------------------------------------------------ +# Arithmetic + + +class TestArithmetic: + + # TODO: parametrize + def test_pow_ops_object(self): + # GH#22922 + # pow is weird with masking & 1, so testing here + a = Series([1, np.nan, 1, np.nan], dtype=object) + b = Series([1, np.nan, np.nan, 1], dtype=object) + result = a ** b + expected = Series(a.values ** b.values, dtype=object) + tm.assert_series_equal(result, expected) + + result = b ** a + expected = Series(b.values ** a.values, dtype=object) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + @pytest.mark.parametrize("other", ["category", "Int64"]) + def test_add_extension_scalar(self, other, box_with_array, op): + # GH#22378 + # Check that scalars satisfying is_extension_array_dtype(obj) + # do not incorrectly try to dispatch to an ExtensionArray operation + + arr = pd.Series(["a", "b", "c"]) + expected = pd.Series([op(x, other) for x in arr]) + + arr = tm.box_expected(arr, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = op(arr, other) + tm.assert_equal(result, expected) + + def test_objarr_add_str(self, box): + ser = pd.Series(["x", np.nan, "x"]) + expected = pd.Series(["xa", np.nan, "xa"]) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = ser + "a" + tm.assert_equal(result, expected) + + def test_objarr_radd_str(self, box): + ser = pd.Series(["x", np.nan, "x"]) + expected = pd.Series(["ax", np.nan, "ax"]) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = "a" + ser + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "data", + [ + [1, 2, 3], + [1.1, 2.2, 3.3], + [Timestamp("2011-01-01"), Timestamp("2011-01-02"), pd.NaT], + ["x", "y", 1], + ], + ) + @pytest.mark.parametrize("dtype", [None, object]) + def test_objarr_radd_str_invalid(self, dtype, data, box_with_array): + ser = Series(data, dtype=dtype) + + ser = tm.box_expected(ser, box_with_array) + with pytest.raises(TypeError): + "foo_" + ser + + @pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub]) + def test_objarr_add_invalid(self, op, box_with_array): + # invalid ops + box = box_with_array + + obj_ser = tm.makeObjectSeries() + obj_ser.name = "objects" + + obj_ser = tm.box_expected(obj_ser, box) + with pytest.raises(Exception): + op(obj_ser, 1) + with pytest.raises(Exception): + op(obj_ser, np.array(1, dtype=np.int64)) + + # TODO: Moved from tests.series.test_operators; needs cleanup + def test_operators_na_handling(self): + ser = Series(["foo", "bar", "baz", np.nan]) + result = "prefix_" + ser + expected = pd.Series(["prefix_foo", "prefix_bar", "prefix_baz", np.nan]) + tm.assert_series_equal(result, expected) + + result = ser + "_suffix" + expected = pd.Series(["foo_suffix", "bar_suffix", "baz_suffix", np.nan]) + tm.assert_series_equal(result, expected) + + # TODO: parametrize over box + @pytest.mark.parametrize("dtype", [None, object]) + def test_series_with_dtype_radd_timedelta(self, dtype): + # note this test is _not_ aimed at timedelta64-dtyped Series + ser = pd.Series( + [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")], + dtype=dtype, + ) + expected = pd.Series( + [pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")] + ) + + result = pd.Timedelta("3 days") + ser + tm.assert_series_equal(result, expected) + + result = ser + pd.Timedelta("3 days") + tm.assert_series_equal(result, expected) + + # TODO: cleanup & parametrize over box + def test_mixed_timezone_series_ops_object(self): + # GH#13043 + ser = pd.Series( + [ + pd.Timestamp("2015-01-01", tz="US/Eastern"), + pd.Timestamp("2015-01-01", tz="Asia/Tokyo"), + ], + name="xxx", + ) + assert ser.dtype == object + + exp = pd.Series( + [ + pd.Timestamp("2015-01-02", tz="US/Eastern"), + pd.Timestamp("2015-01-02", tz="Asia/Tokyo"), + ], + name="xxx", + ) + tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp) + tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp) + + # object series & object series + ser2 = pd.Series( + [ + pd.Timestamp("2015-01-03", tz="US/Eastern"), + pd.Timestamp("2015-01-05", tz="Asia/Tokyo"), + ], + name="xxx", + ) + assert ser2.dtype == object + exp = pd.Series([pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx") + tm.assert_series_equal(ser2 - ser, exp) + tm.assert_series_equal(ser - ser2, -exp) + + ser = pd.Series( + [pd.Timedelta("01:00:00"), pd.Timedelta("02:00:00")], + name="xxx", + dtype=object, + ) + assert ser.dtype == object + + exp = pd.Series( + [pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")], name="xxx" + ) + tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp) + tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp) + + # TODO: cleanup & parametrize over box + def test_iadd_preserves_name(self): + # GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name + ser = pd.Series([1, 2, 3]) + ser.index.name = "foo" + + ser.index += 1 + assert ser.index.name == "foo" + + ser.index -= 1 + assert ser.index.name == "foo" + + def test_add_string(self): + # from bug report + index = pd.Index(["a", "b", "c"]) + index2 = index + "foo" + + assert "a" not in index2 + assert "afoo" in index2 + + def test_iadd_string(self): + index = pd.Index(["a", "b", "c"]) + # doesn't fail test unless there is a check before `+=` + assert "a" in index + + index += "_x" + assert "a_x" in index + + def test_add(self): + index = tm.makeStringIndex(100) + expected = pd.Index(index.values * 2) + tm.assert_index_equal(index + index, expected) + tm.assert_index_equal(index + index.tolist(), expected) + tm.assert_index_equal(index.tolist() + index, expected) + + # test add and radd + index = pd.Index(list("abc")) + expected = pd.Index(["a1", "b1", "c1"]) + tm.assert_index_equal(index + "1", expected) + expected = pd.Index(["1a", "1b", "1c"]) + tm.assert_index_equal("1" + index, expected) + + def test_sub_fail(self): + index = tm.makeStringIndex(100) + with pytest.raises(TypeError): + index - "a" + with pytest.raises(TypeError): + index - index + with pytest.raises(TypeError): + index - index.tolist() + with pytest.raises(TypeError): + index.tolist() - index + + def test_sub_object(self): + # GH#19369 + index = pd.Index([Decimal(1), Decimal(2)]) + expected = pd.Index([Decimal(0), Decimal(1)]) + + result = index - Decimal(1) + tm.assert_index_equal(result, expected) + + result = index - pd.Index([Decimal(1), Decimal(1)]) + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + index - "foo" + + with pytest.raises(TypeError): + index - np.array([2, "foo"]) + + def test_rsub_object(self): + # GH#19369 + index = pd.Index([Decimal(1), Decimal(2)]) + expected = pd.Index([Decimal(1), Decimal(0)]) + + result = Decimal(2) - index + tm.assert_index_equal(result, expected) + + result = np.array([Decimal(2), Decimal(2)]) - index + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + "foo" - index + + with pytest.raises(TypeError): + np.array([True, pd.Timestamp.now()]) - index + + +class MyIndex(pd.Index): + # Simple index subclass that tracks ops calls. + + _calls: int + + @classmethod + def _simple_new(cls, values, name=None, dtype=None): + result = object.__new__(cls) + result._data = values + result._index_data = values + result._name = name + result._calls = 0 + + return result._reset_identity() + + def __add__(self, other): + self._calls += 1 + return self._simple_new(self._index_data) + + def __radd__(self, other): + return self.__add__(other) + + +@pytest.mark.parametrize( + "other", + [ + [datetime.timedelta(1), datetime.timedelta(2)], + [datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)], + [pd.Period("2000"), pd.Period("2001")], + ["a", "b"], + ], + ids=["timedelta", "datetime", "period", "object"], +) +def test_index_ops_defer_to_unknown_subclasses(other): + # https://github.com/pandas-dev/pandas/issues/31109 + values = np.array( + [datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object + ) + a = MyIndex._simple_new(values) + other = pd.Index(other) + result = other + a + assert isinstance(result, MyIndex) + assert a._calls == 1 diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py new file mode 100644 index 00000000..abb66726 --- /dev/null +++ b/pandas/tests/arithmetic/test_period.py @@ -0,0 +1,1450 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +# Specifically for Period dtype +import operator + +import numpy as np +import pytest + +from pandas._libs.tslibs.period import IncompatibleFrequency +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import Period, PeriodIndex, Series, period_range +import pandas._testing as tm +from pandas.core import ops +from pandas.core.arrays import TimedeltaArray + +from pandas.tseries.frequencies import to_offset + +from .common import assert_invalid_comparison + +# ------------------------------------------------------------------ +# Comparisons + + +class TestPeriodArrayLikeComparisons: + # Comparison tests for PeriodDtype vectors fully parametrized over + # DataFrame/Series/PeriodIndex/PeriodArray. Ideally all comparison + # tests will eventually end up here. + + def test_compare_zerodim(self, box_with_array): + # GH#26689 make sure we unbox zero-dimensional arrays + xbox = box_with_array if box_with_array is not pd.Index else np.ndarray + + pi = pd.period_range("2000", periods=4) + other = np.array(pi.to_numpy()[0]) + + pi = tm.box_expected(pi, box_with_array) + result = pi <= other + expected = np.array([True, False, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "scalar", ["foo", pd.Timestamp.now(), pd.Timedelta(days=4)] + ) + def test_compare_invalid_scalar(self, box_with_array, scalar): + # comparison with scalar that cannot be interpreted as a Period + pi = pd.period_range("2000", periods=4) + parr = tm.box_expected(pi, box_with_array) + assert_invalid_comparison(parr, scalar, box_with_array) + + @pytest.mark.parametrize( + "other", + [ + pd.date_range("2000", periods=4).array, + pd.timedelta_range("1D", periods=4).array, + np.arange(4), + np.arange(4).astype(np.float64), + list(range(4)), + ], + ) + def test_compare_invalid_listlike(self, box_with_array, other): + pi = pd.period_range("2000", periods=4) + parr = tm.box_expected(pi, box_with_array) + assert_invalid_comparison(parr, other, box_with_array) + + @pytest.mark.parametrize("other_box", [list, np.array, lambda x: x.astype(object)]) + def test_compare_object_dtype(self, box_with_array, other_box): + pi = pd.period_range("2000", periods=5) + parr = tm.box_expected(pi, box_with_array) + + xbox = np.ndarray if box_with_array is pd.Index else box_with_array + + other = other_box(pi) + + expected = np.array([True, True, True, True, True]) + expected = tm.box_expected(expected, xbox) + + result = parr == other + tm.assert_equal(result, expected) + result = parr <= other + tm.assert_equal(result, expected) + result = parr >= other + tm.assert_equal(result, expected) + + result = parr != other + tm.assert_equal(result, ~expected) + result = parr < other + tm.assert_equal(result, ~expected) + result = parr > other + tm.assert_equal(result, ~expected) + + other = other_box(pi[::-1]) + + expected = np.array([False, False, True, False, False]) + expected = tm.box_expected(expected, xbox) + result = parr == other + tm.assert_equal(result, expected) + + expected = np.array([True, True, True, False, False]) + expected = tm.box_expected(expected, xbox) + result = parr <= other + tm.assert_equal(result, expected) + + expected = np.array([False, False, True, True, True]) + expected = tm.box_expected(expected, xbox) + result = parr >= other + tm.assert_equal(result, expected) + + expected = np.array([True, True, False, True, True]) + expected = tm.box_expected(expected, xbox) + result = parr != other + tm.assert_equal(result, expected) + + expected = np.array([True, True, False, False, False]) + expected = tm.box_expected(expected, xbox) + result = parr < other + tm.assert_equal(result, expected) + + expected = np.array([False, False, False, True, True]) + expected = tm.box_expected(expected, xbox) + result = parr > other + tm.assert_equal(result, expected) + + +class TestPeriodIndexComparisons: + # TODO: parameterize over boxes + + @pytest.mark.parametrize("other", ["2017", pd.Period("2017", freq="D")]) + def test_eq(self, other): + idx = PeriodIndex(["2017", "2017", "2018"], freq="D") + expected = np.array([True, True, False]) + result = idx == other + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + 2017, + [2017, 2017, 2017], + np.array([2017, 2017, 2017]), + np.array([2017, 2017, 2017], dtype=object), + pd.Index([2017, 2017, 2017]), + ], + ) + def test_eq_integer_disallowed(self, other): + # match Period semantics by not treating integers as Periods + + idx = PeriodIndex(["2017", "2017", "2018"], freq="D") + expected = np.array([False, False, False]) + result = idx == other + + tm.assert_numpy_array_equal(result, expected) + + with pytest.raises(TypeError): + idx < other + with pytest.raises(TypeError): + idx > other + with pytest.raises(TypeError): + idx <= other + with pytest.raises(TypeError): + idx >= other + + def test_pi_cmp_period(self): + idx = period_range("2007-01", periods=20, freq="M") + + result = idx < idx[10] + exp = idx.values < idx.values[10] + tm.assert_numpy_array_equal(result, exp) + + # TODO: moved from test_datetime64; de-duplicate with version below + def test_parr_cmp_period_scalar2(self, box_with_array): + xbox = box_with_array if box_with_array is not pd.Index else np.ndarray + + pi = pd.period_range("2000-01-01", periods=10, freq="D") + + val = Period("2000-01-04", freq="D") + expected = [x > val for x in pi] + + ser = tm.box_expected(pi, box_with_array) + expected = tm.box_expected(expected, xbox) + result = ser > val + tm.assert_equal(result, expected) + + val = pi[5] + result = ser > val + expected = [x > val for x in pi] + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_parr_cmp_period_scalar(self, freq, box_with_array): + # GH#13200 + xbox = np.ndarray if box_with_array is pd.Index else box_with_array + + base = PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq=freq) + base = tm.box_expected(base, box_with_array) + per = Period("2011-02", freq=freq) + + exp = np.array([False, True, False, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base == per, exp) + tm.assert_equal(per == base, exp) + + exp = np.array([True, False, True, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base != per, exp) + tm.assert_equal(per != base, exp) + + exp = np.array([False, False, True, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base > per, exp) + tm.assert_equal(per < base, exp) + + exp = np.array([True, False, False, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base < per, exp) + tm.assert_equal(per > base, exp) + + exp = np.array([False, True, True, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base >= per, exp) + tm.assert_equal(per <= base, exp) + + exp = np.array([True, True, False, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base <= per, exp) + tm.assert_equal(per >= base, exp) + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_parr_cmp_pi(self, freq, box_with_array): + # GH#13200 + xbox = np.ndarray if box_with_array is pd.Index else box_with_array + + base = PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq=freq) + base = tm.box_expected(base, box_with_array) + + # TODO: could also box idx? + idx = PeriodIndex(["2011-02", "2011-01", "2011-03", "2011-05"], freq=freq) + + exp = np.array([False, False, True, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base == idx, exp) + + exp = np.array([True, True, False, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base != idx, exp) + + exp = np.array([False, True, False, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base > idx, exp) + + exp = np.array([True, False, False, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base < idx, exp) + + exp = np.array([False, True, True, False]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base >= idx, exp) + + exp = np.array([True, False, True, True]) + exp = tm.box_expected(exp, xbox) + tm.assert_equal(base <= idx, exp) + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_parr_cmp_pi_mismatched_freq_raises(self, freq, box_with_array): + # GH#13200 + # different base freq + base = PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq=freq) + base = tm.box_expected(base, box_with_array) + + msg = "Input has different freq=A-DEC from " + with pytest.raises(IncompatibleFrequency, match=msg): + base <= Period("2011", freq="A") + + with pytest.raises(IncompatibleFrequency, match=msg): + Period("2011", freq="A") >= base + + # TODO: Could parametrize over boxes for idx? + idx = PeriodIndex(["2011", "2012", "2013", "2014"], freq="A") + rev_msg = r"Input has different freq=(M|2M|3M) from PeriodArray\(freq=A-DEC\)" + idx_msg = rev_msg if box_with_array is tm.to_array else msg + with pytest.raises(IncompatibleFrequency, match=idx_msg): + base <= idx + + # Different frequency + msg = "Input has different freq=4M from " + with pytest.raises(IncompatibleFrequency, match=msg): + base <= Period("2011", freq="4M") + + with pytest.raises(IncompatibleFrequency, match=msg): + Period("2011", freq="4M") >= base + + idx = PeriodIndex(["2011", "2012", "2013", "2014"], freq="4M") + rev_msg = r"Input has different freq=(M|2M|3M) from PeriodArray\(freq=4M\)" + idx_msg = rev_msg if box_with_array is tm.to_array else msg + with pytest.raises(IncompatibleFrequency, match=idx_msg): + base <= idx + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_pi_cmp_nat(self, freq): + idx1 = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-05"], freq=freq) + + result = idx1 > Period("2011-02", freq=freq) + exp = np.array([False, False, False, True]) + tm.assert_numpy_array_equal(result, exp) + result = Period("2011-02", freq=freq) < idx1 + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == Period("NaT", freq=freq) + exp = np.array([False, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + result = Period("NaT", freq=freq) == idx1 + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != Period("NaT", freq=freq) + exp = np.array([True, True, True, True]) + tm.assert_numpy_array_equal(result, exp) + result = Period("NaT", freq=freq) != idx1 + tm.assert_numpy_array_equal(result, exp) + + idx2 = PeriodIndex(["2011-02", "2011-01", "2011-04", "NaT"], freq=freq) + result = idx1 < idx2 + exp = np.array([True, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == idx2 + exp = np.array([False, False, False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != idx2 + exp = np.array([True, True, True, True]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 == idx1 + exp = np.array([True, True, False, True]) + tm.assert_numpy_array_equal(result, exp) + + result = idx1 != idx1 + exp = np.array([False, False, True, False]) + tm.assert_numpy_array_equal(result, exp) + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_pi_cmp_nat_mismatched_freq_raises(self, freq): + idx1 = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-05"], freq=freq) + + diff = PeriodIndex(["2011-02", "2011-01", "2011-04", "NaT"], freq="4M") + msg = "Input has different freq=4M from Period(Array|Index)" + with pytest.raises(IncompatibleFrequency, match=msg): + idx1 > diff + + with pytest.raises(IncompatibleFrequency, match=msg): + idx1 == diff + + # TODO: De-duplicate with test_pi_cmp_nat + @pytest.mark.parametrize("dtype", [object, None]) + def test_comp_nat(self, dtype): + left = pd.PeriodIndex( + [pd.Period("2011-01-01"), pd.NaT, pd.Period("2011-01-03")] + ) + right = pd.PeriodIndex([pd.NaT, pd.NaT, pd.Period("2011-01-03")]) + + if dtype is not None: + left = left.astype(dtype) + right = right.astype(dtype) + + result = left == right + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = left != right + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(left == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == right, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(left != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != left, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(left < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > left, expected) + + +class TestPeriodSeriesComparisons: + def test_cmp_series_period_series_mixed_freq(self): + # GH#13200 + base = Series( + [ + Period("2011", freq="A"), + Period("2011-02", freq="M"), + Period("2013", freq="A"), + Period("2011-04", freq="M"), + ] + ) + + ser = Series( + [ + Period("2012", freq="A"), + Period("2011-01", freq="M"), + Period("2013", freq="A"), + Period("2011-05", freq="M"), + ] + ) + + exp = Series([False, False, True, False]) + tm.assert_series_equal(base == ser, exp) + + exp = Series([True, True, False, True]) + tm.assert_series_equal(base != ser, exp) + + exp = Series([False, True, False, False]) + tm.assert_series_equal(base > ser, exp) + + exp = Series([True, False, False, True]) + tm.assert_series_equal(base < ser, exp) + + exp = Series([False, True, True, False]) + tm.assert_series_equal(base >= ser, exp) + + exp = Series([True, False, True, True]) + tm.assert_series_equal(base <= ser, exp) + + +class TestPeriodIndexSeriesComparisonConsistency: + """ Test PeriodIndex and Period Series Ops consistency """ + + # TODO: needs parametrization+de-duplication + + def _check(self, values, func, expected): + # Test PeriodIndex and Period Series Ops consistency + + idx = pd.PeriodIndex(values) + result = func(idx) + + # check that we don't pass an unwanted type to tm.assert_equal + assert isinstance(expected, (pd.Index, np.ndarray)) + tm.assert_equal(result, expected) + + s = pd.Series(values) + result = func(s) + + exp = pd.Series(expected, name=values.name) + tm.assert_series_equal(result, exp) + + def test_pi_comp_period(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx" + ) + + f = lambda x: x == pd.Period("2011-03", freq="M") + exp = np.array([False, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period("2011-03", freq="M") == x + self._check(idx, f, exp) + + f = lambda x: x != pd.Period("2011-03", freq="M") + exp = np.array([True, True, False, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period("2011-03", freq="M") != x + self._check(idx, f, exp) + + f = lambda x: pd.Period("2011-03", freq="M") >= x + exp = np.array([True, True, True, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x > pd.Period("2011-03", freq="M") + exp = np.array([False, False, False, True], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: pd.Period("2011-03", freq="M") >= x + exp = np.array([True, True, True, False], dtype=np.bool) + self._check(idx, f, exp) + + def test_pi_comp_period_nat(self): + idx = PeriodIndex( + ["2011-01", "NaT", "2011-03", "2011-04"], freq="M", name="idx" + ) + + f = lambda x: x == pd.Period("2011-03", freq="M") + exp = np.array([False, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period("2011-03", freq="M") == x + self._check(idx, f, exp) + + f = lambda x: x == pd.NaT + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.NaT == x + self._check(idx, f, exp) + + f = lambda x: x != pd.Period("2011-03", freq="M") + exp = np.array([True, True, False, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period("2011-03", freq="M") != x + self._check(idx, f, exp) + + f = lambda x: x != pd.NaT + exp = np.array([True, True, True, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.NaT != x + self._check(idx, f, exp) + + f = lambda x: pd.Period("2011-03", freq="M") >= x + exp = np.array([True, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x < pd.Period("2011-03", freq="M") + exp = np.array([True, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x > pd.NaT + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: pd.NaT >= x + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + +# ------------------------------------------------------------------ +# Arithmetic + + +class TestPeriodFrameArithmetic: + def test_ops_frame_period(self): + # GH#13043 + df = pd.DataFrame( + { + "A": [pd.Period("2015-01", freq="M"), pd.Period("2015-02", freq="M")], + "B": [pd.Period("2014-01", freq="M"), pd.Period("2014-02", freq="M")], + } + ) + assert df["A"].dtype == "Period[M]" + assert df["B"].dtype == "Period[M]" + + p = pd.Period("2015-03", freq="M") + off = p.freq + # dtype will be object because of original dtype + exp = pd.DataFrame( + { + "A": np.array([2 * off, 1 * off], dtype=object), + "B": np.array([14 * off, 13 * off], dtype=object), + } + ) + tm.assert_frame_equal(p - df, exp) + tm.assert_frame_equal(df - p, -1 * exp) + + df2 = pd.DataFrame( + { + "A": [pd.Period("2015-05", freq="M"), pd.Period("2015-06", freq="M")], + "B": [pd.Period("2015-05", freq="M"), pd.Period("2015-06", freq="M")], + } + ) + assert df2["A"].dtype == "Period[M]" + assert df2["B"].dtype == "Period[M]" + + exp = pd.DataFrame( + { + "A": np.array([4 * off, 4 * off], dtype=object), + "B": np.array([16 * off, 16 * off], dtype=object), + } + ) + tm.assert_frame_equal(df2 - df, exp) + tm.assert_frame_equal(df - df2, -1 * exp) + + +class TestPeriodIndexArithmetic: + # --------------------------------------------------------------- + # __add__/__sub__ with PeriodIndex + # PeriodIndex + other is defined for integers and timedelta-like others + # PeriodIndex - other is defined for integers, timedelta-like others, + # and PeriodIndex (with matching freq) + + def test_parr_add_iadd_parr_raises(self, box_with_array): + rng = pd.period_range("1/1/2000", freq="D", periods=5) + other = pd.period_range("1/6/2000", freq="D", periods=5) + # TODO: parametrize over boxes for other? + + rng = tm.box_expected(rng, box_with_array) + # An earlier implementation of PeriodIndex addition performed + # a set operation (union). This has since been changed to + # raise a TypeError. See GH#14164 and GH#13077 for historical + # reference. + with pytest.raises(TypeError): + rng + other + + with pytest.raises(TypeError): + rng += other + + def test_pi_sub_isub_pi(self): + # GH#20049 + # For historical reference see GH#14164, GH#13077. + # PeriodIndex subtraction originally performed set difference, + # then changed to raise TypeError before being implemented in GH#20049 + rng = pd.period_range("1/1/2000", freq="D", periods=5) + other = pd.period_range("1/6/2000", freq="D", periods=5) + + off = rng.freq + expected = pd.Index([-5 * off] * 5) + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_pi_sub_pi_with_nat(self): + rng = pd.period_range("1/1/2000", freq="D", periods=5) + other = rng[1:].insert(0, pd.NaT) + assert other[1:].equals(rng[1:]) + + result = rng - other + off = rng.freq + expected = pd.Index([pd.NaT, 0 * off, 0 * off, 0 * off, 0 * off]) + tm.assert_index_equal(result, expected) + + def test_parr_sub_pi_mismatched_freq(self, box_with_array): + rng = pd.period_range("1/1/2000", freq="D", periods=5) + other = pd.period_range("1/6/2000", freq="H", periods=5) + # TODO: parametrize over boxes for other? + + rng = tm.box_expected(rng, box_with_array) + with pytest.raises(IncompatibleFrequency): + rng - other + + @pytest.mark.parametrize("n", [1, 2, 3, 4]) + def test_sub_n_gt_1_ticks(self, tick_classes, n): + # GH 23878 + p1_d = "19910905" + p2_d = "19920406" + p1 = pd.PeriodIndex([p1_d], freq=tick_classes(n)) + p2 = pd.PeriodIndex([p2_d], freq=tick_classes(n)) + + expected = pd.PeriodIndex([p2_d], freq=p2.freq.base) - pd.PeriodIndex( + [p1_d], freq=p1.freq.base + ) + + tm.assert_index_equal((p2 - p1), expected) + + @pytest.mark.parametrize("n", [1, 2, 3, 4]) + @pytest.mark.parametrize( + "offset, kwd_name", + [ + (pd.offsets.YearEnd, "month"), + (pd.offsets.QuarterEnd, "startingMonth"), + (pd.offsets.MonthEnd, None), + (pd.offsets.Week, "weekday"), + ], + ) + def test_sub_n_gt_1_offsets(self, offset, kwd_name, n): + # GH 23878 + kwds = {kwd_name: 3} if kwd_name is not None else {} + p1_d = "19910905" + p2_d = "19920406" + freq = offset(n, normalize=False, **kwds) + p1 = pd.PeriodIndex([p1_d], freq=freq) + p2 = pd.PeriodIndex([p2_d], freq=freq) + + result = p2 - p1 + expected = pd.PeriodIndex([p2_d], freq=freq.base) - pd.PeriodIndex( + [p1_d], freq=freq.base + ) + + tm.assert_index_equal(result, expected) + + # ------------------------------------------------------------- + # Invalid Operations + + @pytest.mark.parametrize("other", [3.14, np.array([2.0, 3.0])]) + @pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub]) + def test_parr_add_sub_float_raises(self, op, other, box_with_array): + dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D") + pi = dti.to_period("D") + pi = tm.box_expected(pi, box_with_array) + with pytest.raises(TypeError): + op(pi, other) + + @pytest.mark.parametrize( + "other", + [ + # datetime scalars + pd.Timestamp.now(), + pd.Timestamp.now().to_pydatetime(), + pd.Timestamp.now().to_datetime64(), + # datetime-like arrays + pd.date_range("2016-01-01", periods=3, freq="H"), + pd.date_range("2016-01-01", periods=3, tz="Europe/Brussels"), + pd.date_range("2016-01-01", periods=3, freq="S")._data, + pd.date_range("2016-01-01", periods=3, tz="Asia/Tokyo")._data, + # Miscellaneous invalid types + ], + ) + def test_parr_add_sub_invalid(self, other, box_with_array): + # GH#23215 + rng = pd.period_range("1/1/2000", freq="D", periods=3) + rng = tm.box_expected(rng, box_with_array) + + with pytest.raises(TypeError): + rng + other + with pytest.raises(TypeError): + other + rng + with pytest.raises(TypeError): + rng - other + with pytest.raises(TypeError): + other - rng + + # ----------------------------------------------------------------- + # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64] + + def test_pi_add_sub_td64_array_non_tick_raises(self): + rng = pd.period_range("1/1/2000", freq="Q", periods=3) + tdi = pd.TimedeltaIndex(["-1 Day", "-1 Day", "-1 Day"]) + tdarr = tdi.values + + with pytest.raises(IncompatibleFrequency): + rng + tdarr + with pytest.raises(IncompatibleFrequency): + tdarr + rng + + with pytest.raises(IncompatibleFrequency): + rng - tdarr + with pytest.raises(TypeError): + tdarr - rng + + def test_pi_add_sub_td64_array_tick(self): + # PeriodIndex + Timedelta-like is allowed only with + # tick-like frequencies + rng = pd.period_range("1/1/2000", freq="90D", periods=3) + tdi = pd.TimedeltaIndex(["-1 Day", "-1 Day", "-1 Day"]) + tdarr = tdi.values + + expected = pd.period_range("12/31/1999", freq="90D", periods=3) + result = rng + tdi + tm.assert_index_equal(result, expected) + result = rng + tdarr + tm.assert_index_equal(result, expected) + result = tdi + rng + tm.assert_index_equal(result, expected) + result = tdarr + rng + tm.assert_index_equal(result, expected) + + expected = pd.period_range("1/2/2000", freq="90D", periods=3) + + result = rng - tdi + tm.assert_index_equal(result, expected) + result = rng - tdarr + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + tdarr - rng + + with pytest.raises(TypeError): + tdi - rng + + # ----------------------------------------------------------------- + # operations with array/Index of DateOffset objects + + @pytest.mark.parametrize("box", [np.array, pd.Index]) + def test_pi_add_offset_array(self, box): + # GH#18849 + pi = pd.PeriodIndex([pd.Period("2015Q1"), pd.Period("2016Q2")]) + offs = box( + [ + pd.offsets.QuarterEnd(n=1, startingMonth=12), + pd.offsets.QuarterEnd(n=-2, startingMonth=12), + ] + ) + expected = pd.PeriodIndex([pd.Period("2015Q2"), pd.Period("2015Q4")]) + + with tm.assert_produces_warning(PerformanceWarning): + res = pi + offs + tm.assert_index_equal(res, expected) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = offs + pi + tm.assert_index_equal(res2, expected) + + unanchored = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) + # addition/subtraction ops with incompatible offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + with pytest.raises(IncompatibleFrequency): + with tm.assert_produces_warning(PerformanceWarning): + pi + unanchored + with pytest.raises(IncompatibleFrequency): + with tm.assert_produces_warning(PerformanceWarning): + unanchored + pi + + @pytest.mark.parametrize("box", [np.array, pd.Index]) + def test_pi_sub_offset_array(self, box): + # GH#18824 + pi = pd.PeriodIndex([pd.Period("2015Q1"), pd.Period("2016Q2")]) + other = box( + [ + pd.offsets.QuarterEnd(n=1, startingMonth=12), + pd.offsets.QuarterEnd(n=-2, startingMonth=12), + ] + ) + + expected = PeriodIndex([pi[n] - other[n] for n in range(len(pi))]) + + with tm.assert_produces_warning(PerformanceWarning): + res = pi - other + tm.assert_index_equal(res, expected) + + anchored = box([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + + # addition/subtraction ops with anchored offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + with pytest.raises(IncompatibleFrequency): + with tm.assert_produces_warning(PerformanceWarning): + pi - anchored + with pytest.raises(IncompatibleFrequency): + with tm.assert_produces_warning(PerformanceWarning): + anchored - pi + + def test_pi_add_iadd_int(self, one): + # Variants of `one` for #19012 + rng = pd.period_range("2000-01-01 09:00", freq="H", periods=10) + result = rng + one + expected = pd.period_range("2000-01-01 10:00", freq="H", periods=10) + tm.assert_index_equal(result, expected) + rng += one + tm.assert_index_equal(rng, expected) + + def test_pi_sub_isub_int(self, one): + """ + PeriodIndex.__sub__ and __isub__ with several representations of + the integer 1, e.g. int, np.int64, np.uint8, ... + """ + rng = pd.period_range("2000-01-01 09:00", freq="H", periods=10) + result = rng - one + expected = pd.period_range("2000-01-01 08:00", freq="H", periods=10) + tm.assert_index_equal(result, expected) + rng -= one + tm.assert_index_equal(rng, expected) + + @pytest.mark.parametrize("five", [5, np.array(5, dtype=np.int64)]) + def test_pi_sub_intlike(self, five): + rng = period_range("2007-01", periods=50) + + result = rng - five + exp = rng + (-five) + tm.assert_index_equal(result, exp) + + def test_pi_sub_isub_offset(self): + # offset + # DateOffset + rng = pd.period_range("2014", "2024", freq="A") + result = rng - pd.offsets.YearEnd(5) + expected = pd.period_range("2009", "2019", freq="A") + tm.assert_index_equal(result, expected) + rng -= pd.offsets.YearEnd(5) + tm.assert_index_equal(rng, expected) + + rng = pd.period_range("2014-01", "2016-12", freq="M") + result = rng - pd.offsets.MonthEnd(5) + expected = pd.period_range("2013-08", "2016-07", freq="M") + tm.assert_index_equal(result, expected) + + rng -= pd.offsets.MonthEnd(5) + tm.assert_index_equal(rng, expected) + + @pytest.mark.parametrize("transpose", [True, False]) + def test_pi_add_offset_n_gt1(self, box_with_array, transpose): + # GH#23215 + # add offset to PeriodIndex with freq.n > 1 + + per = pd.Period("2016-01", freq="2M") + pi = pd.PeriodIndex([per]) + + expected = pd.PeriodIndex(["2016-03"], freq="2M") + + pi = tm.box_expected(pi, box_with_array, transpose=transpose) + expected = tm.box_expected(expected, box_with_array, transpose=transpose) + + result = pi + per.freq + tm.assert_equal(result, expected) + + result = per.freq + pi + tm.assert_equal(result, expected) + + def test_pi_add_offset_n_gt1_not_divisible(self, box_with_array): + # GH#23215 + # PeriodIndex with freq.n > 1 add offset with offset.n % freq.n != 0 + pi = pd.PeriodIndex(["2016-01"], freq="2M") + expected = pd.PeriodIndex(["2016-04"], freq="2M") + + # FIXME: with transposing these tests fail + pi = tm.box_expected(pi, box_with_array, transpose=False) + expected = tm.box_expected(expected, box_with_array, transpose=False) + + result = pi + to_offset("3M") + tm.assert_equal(result, expected) + + result = to_offset("3M") + pi + tm.assert_equal(result, expected) + + # --------------------------------------------------------------- + # __add__/__sub__ with integer arrays + + @pytest.mark.parametrize("int_holder", [np.array, pd.Index]) + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_pi_add_intarray(self, int_holder, op): + # GH#19959 + pi = pd.PeriodIndex([pd.Period("2015Q1"), pd.Period("NaT")]) + other = int_holder([4, -1]) + + result = op(pi, other) + expected = pd.PeriodIndex([pd.Period("2016Q1"), pd.Period("NaT")]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("int_holder", [np.array, pd.Index]) + def test_pi_sub_intarray(self, int_holder): + # GH#19959 + pi = pd.PeriodIndex([pd.Period("2015Q1"), pd.Period("NaT")]) + other = int_holder([4, -1]) + + result = pi - other + expected = pd.PeriodIndex([pd.Period("2014Q1"), pd.Period("NaT")]) + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + other - pi + + # --------------------------------------------------------------- + # Timedelta-like (timedelta, timedelta64, Timedelta, Tick) + # TODO: Some of these are misnomers because of non-Tick DateOffsets + + def test_pi_add_timedeltalike_minute_gt1(self, three_days): + # GH#23031 adding a time-delta-like offset to a PeriodArray that has + # minute frequency with n != 1. A more general case is tested below + # in test_pi_add_timedeltalike_tick_gt1, but here we write out the + # expected result more explicitly. + other = three_days + rng = pd.period_range("2014-05-01", periods=3, freq="2D") + + expected = pd.PeriodIndex(["2014-05-04", "2014-05-06", "2014-05-08"], freq="2D") + + result = rng + other + tm.assert_index_equal(result, expected) + + result = other + rng + tm.assert_index_equal(result, expected) + + # subtraction + expected = pd.PeriodIndex(["2014-04-28", "2014-04-30", "2014-05-02"], freq="2D") + result = rng - other + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + other - rng + + @pytest.mark.parametrize("freqstr", ["5ns", "5us", "5ms", "5s", "5T", "5h", "5d"]) + def test_pi_add_timedeltalike_tick_gt1(self, three_days, freqstr): + # GH#23031 adding a time-delta-like offset to a PeriodArray that has + # tick-like frequency with n != 1 + other = three_days + rng = pd.period_range("2014-05-01", periods=6, freq=freqstr) + + expected = pd.period_range(rng[0] + other, periods=6, freq=freqstr) + + result = rng + other + tm.assert_index_equal(result, expected) + + result = other + rng + tm.assert_index_equal(result, expected) + + # subtraction + expected = pd.period_range(rng[0] - other, periods=6, freq=freqstr) + result = rng - other + tm.assert_index_equal(result, expected) + + with pytest.raises(TypeError): + other - rng + + def test_pi_add_iadd_timedeltalike_daily(self, three_days): + # Tick + other = three_days + rng = pd.period_range("2014-05-01", "2014-05-15", freq="D") + expected = pd.period_range("2014-05-04", "2014-05-18", freq="D") + + result = rng + other + tm.assert_index_equal(result, expected) + + rng += other + tm.assert_index_equal(rng, expected) + + def test_pi_sub_isub_timedeltalike_daily(self, three_days): + # Tick-like 3 Days + other = three_days + rng = pd.period_range("2014-05-01", "2014-05-15", freq="D") + expected = pd.period_range("2014-04-28", "2014-05-12", freq="D") + + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_pi_add_sub_timedeltalike_freq_mismatch_daily(self, not_daily): + other = not_daily + rng = pd.period_range("2014-05-01", "2014-05-15", freq="D") + msg = "Input has different freq(=.+)? from Period.*?\\(freq=D\\)" + with pytest.raises(IncompatibleFrequency, match=msg): + rng + other + with pytest.raises(IncompatibleFrequency, match=msg): + rng += other + with pytest.raises(IncompatibleFrequency, match=msg): + rng - other + with pytest.raises(IncompatibleFrequency, match=msg): + rng -= other + + def test_pi_add_iadd_timedeltalike_hourly(self, two_hours): + other = two_hours + rng = pd.period_range("2014-01-01 10:00", "2014-01-05 10:00", freq="H") + expected = pd.period_range("2014-01-01 12:00", "2014-01-05 12:00", freq="H") + + result = rng + other + tm.assert_index_equal(result, expected) + + rng += other + tm.assert_index_equal(rng, expected) + + def test_pi_add_timedeltalike_mismatched_freq_hourly(self, not_hourly): + other = not_hourly + rng = pd.period_range("2014-01-01 10:00", "2014-01-05 10:00", freq="H") + msg = "Input has different freq(=.+)? from Period.*?\\(freq=H\\)" + + with pytest.raises(IncompatibleFrequency, match=msg): + rng + other + + with pytest.raises(IncompatibleFrequency, match=msg): + rng += other + + def test_pi_sub_isub_timedeltalike_hourly(self, two_hours): + other = two_hours + rng = pd.period_range("2014-01-01 10:00", "2014-01-05 10:00", freq="H") + expected = pd.period_range("2014-01-01 08:00", "2014-01-05 08:00", freq="H") + + result = rng - other + tm.assert_index_equal(result, expected) + + rng -= other + tm.assert_index_equal(rng, expected) + + def test_add_iadd_timedeltalike_annual(self): + # offset + # DateOffset + rng = pd.period_range("2014", "2024", freq="A") + result = rng + pd.offsets.YearEnd(5) + expected = pd.period_range("2019", "2029", freq="A") + tm.assert_index_equal(result, expected) + rng += pd.offsets.YearEnd(5) + tm.assert_index_equal(rng, expected) + + def test_pi_add_sub_timedeltalike_freq_mismatch_annual(self, mismatched_freq): + other = mismatched_freq + rng = pd.period_range("2014", "2024", freq="A") + msg = "Input has different freq(=.+)? from Period.*?\\(freq=A-DEC\\)" + with pytest.raises(IncompatibleFrequency, match=msg): + rng + other + with pytest.raises(IncompatibleFrequency, match=msg): + rng += other + with pytest.raises(IncompatibleFrequency, match=msg): + rng - other + with pytest.raises(IncompatibleFrequency, match=msg): + rng -= other + + def test_pi_add_iadd_timedeltalike_M(self): + rng = pd.period_range("2014-01", "2016-12", freq="M") + expected = pd.period_range("2014-06", "2017-05", freq="M") + + result = rng + pd.offsets.MonthEnd(5) + tm.assert_index_equal(result, expected) + + rng += pd.offsets.MonthEnd(5) + tm.assert_index_equal(rng, expected) + + def test_pi_add_sub_timedeltalike_freq_mismatch_monthly(self, mismatched_freq): + other = mismatched_freq + rng = pd.period_range("2014-01", "2016-12", freq="M") + msg = "Input has different freq(=.+)? from Period.*?\\(freq=M\\)" + with pytest.raises(IncompatibleFrequency, match=msg): + rng + other + with pytest.raises(IncompatibleFrequency, match=msg): + rng += other + with pytest.raises(IncompatibleFrequency, match=msg): + rng - other + with pytest.raises(IncompatibleFrequency, match=msg): + rng -= other + + @pytest.mark.parametrize("transpose", [True, False]) + def test_parr_add_sub_td64_nat(self, box_with_array, transpose): + # GH#23320 special handling for timedelta64("NaT") + pi = pd.period_range("1994-04-01", periods=9, freq="19D") + other = np.timedelta64("NaT") + expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") + + obj = tm.box_expected(pi, box_with_array, transpose=transpose) + expected = tm.box_expected(expected, box_with_array, transpose=transpose) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + with pytest.raises(TypeError): + other - obj + + @pytest.mark.parametrize( + "other", + [ + np.array(["NaT"] * 9, dtype="m8[ns]"), + TimedeltaArray._from_sequence(["NaT"] * 9), + ], + ) + def test_parr_add_sub_tdt64_nat_array(self, box_with_array, other): + pi = pd.period_range("1994-04-01", periods=9, freq="19D") + expected = pd.PeriodIndex(["NaT"] * 9, freq="19D") + + obj = tm.box_expected(pi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + with pytest.raises(TypeError): + other - obj + + # --------------------------------------------------------------- + # Unsorted + + def test_parr_add_sub_index(self): + # Check that PeriodArray defers to Index on arithmetic ops + pi = pd.period_range("2000-12-31", periods=3) + parr = pi.array + + result = parr - pi + expected = pi - pi + tm.assert_index_equal(result, expected) + + def test_parr_add_sub_object_array(self): + pi = pd.period_range("2000-12-31", periods=3, freq="D") + parr = pi.array + + other = np.array([pd.Timedelta(days=1), pd.offsets.Day(2), 3]) + + with tm.assert_produces_warning(PerformanceWarning): + result = parr + other + + expected = pd.PeriodIndex( + ["2001-01-01", "2001-01-03", "2001-01-05"], freq="D" + ).array + tm.assert_equal(result, expected) + + with tm.assert_produces_warning(PerformanceWarning): + result = parr - other + + expected = pd.PeriodIndex(["2000-12-30"] * 3, freq="D").array + tm.assert_equal(result, expected) + + +class TestPeriodSeriesArithmetic: + def test_ops_series_timedelta(self): + # GH#13043 + ser = pd.Series( + [pd.Period("2015-01-01", freq="D"), pd.Period("2015-01-02", freq="D")], + name="xxx", + ) + assert ser.dtype == "Period[D]" + + expected = pd.Series( + [pd.Period("2015-01-02", freq="D"), pd.Period("2015-01-03", freq="D")], + name="xxx", + ) + + result = ser + pd.Timedelta("1 days") + tm.assert_series_equal(result, expected) + + result = pd.Timedelta("1 days") + ser + tm.assert_series_equal(result, expected) + + result = ser + pd.tseries.offsets.Day() + tm.assert_series_equal(result, expected) + + result = pd.tseries.offsets.Day() + ser + tm.assert_series_equal(result, expected) + + def test_ops_series_period(self): + # GH#13043 + ser = pd.Series( + [pd.Period("2015-01-01", freq="D"), pd.Period("2015-01-02", freq="D")], + name="xxx", + ) + assert ser.dtype == "Period[D]" + + per = pd.Period("2015-01-10", freq="D") + off = per.freq + # dtype will be object because of original dtype + expected = pd.Series([9 * off, 8 * off], name="xxx", dtype=object) + tm.assert_series_equal(per - ser, expected) + tm.assert_series_equal(ser - per, -1 * expected) + + s2 = pd.Series( + [pd.Period("2015-01-05", freq="D"), pd.Period("2015-01-04", freq="D")], + name="xxx", + ) + assert s2.dtype == "Period[D]" + + expected = pd.Series([4 * off, 2 * off], name="xxx", dtype=object) + tm.assert_series_equal(s2 - ser, expected) + tm.assert_series_equal(ser - s2, -1 * expected) + + +class TestPeriodIndexSeriesMethods: + """ Test PeriodIndex and Period Series Ops consistency """ + + def _check(self, values, func, expected): + idx = pd.PeriodIndex(values) + result = func(idx) + tm.assert_equal(result, expected) + + ser = pd.Series(values) + result = func(ser) + + exp = pd.Series(expected, name=values.name) + tm.assert_series_equal(result, exp) + + def test_pi_ops(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx" + ) + + expected = PeriodIndex( + ["2011-03", "2011-04", "2011-05", "2011-06"], freq="M", name="idx" + ) + + self._check(idx, lambda x: x + 2, expected) + self._check(idx, lambda x: 2 + x, expected) + + self._check(idx + 2, lambda x: x - 2, idx) + + result = idx - Period("2011-01", freq="M") + off = idx.freq + exp = pd.Index([0 * off, 1 * off, 2 * off, 3 * off], name="idx") + tm.assert_index_equal(result, exp) + + result = Period("2011-01", freq="M") - idx + exp = pd.Index([0 * off, -1 * off, -2 * off, -3 * off], name="idx") + tm.assert_index_equal(result, exp) + + @pytest.mark.parametrize("ng", ["str", 1.5]) + @pytest.mark.parametrize( + "func", + [ + lambda obj, ng: obj + ng, + lambda obj, ng: ng + obj, + lambda obj, ng: obj - ng, + lambda obj, ng: ng - obj, + lambda obj, ng: np.add(obj, ng), + lambda obj, ng: np.add(ng, obj), + lambda obj, ng: np.subtract(obj, ng), + lambda obj, ng: np.subtract(ng, obj), + ], + ) + def test_parr_ops_errors(self, ng, func, box_with_array): + idx = PeriodIndex( + ["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx" + ) + obj = tm.box_expected(idx, box_with_array) + msg = ( + r"unsupported operand type\(s\)|can only concatenate|" + r"must be str|object to str implicitly" + ) + + with pytest.raises(TypeError, match=msg): + func(obj, ng) + + def test_pi_ops_nat(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + expected = PeriodIndex( + ["2011-03", "2011-04", "NaT", "2011-06"], freq="M", name="idx" + ) + + self._check(idx, lambda x: x + 2, expected) + self._check(idx, lambda x: 2 + x, expected) + self._check(idx, lambda x: np.add(x, 2), expected) + + self._check(idx + 2, lambda x: x - 2, idx) + self._check(idx + 2, lambda x: np.subtract(x, 2), idx) + + # freq with mult + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="2M", name="idx" + ) + expected = PeriodIndex( + ["2011-07", "2011-08", "NaT", "2011-10"], freq="2M", name="idx" + ) + + self._check(idx, lambda x: x + 3, expected) + self._check(idx, lambda x: 3 + x, expected) + self._check(idx, lambda x: np.add(x, 3), expected) + + self._check(idx + 3, lambda x: x - 3, idx) + self._check(idx + 3, lambda x: np.subtract(x, 3), idx) + + def test_pi_ops_array_int(self): + + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + f = lambda x: x + np.array([1, 2, 3, 4]) + exp = PeriodIndex( + ["2011-02", "2011-04", "NaT", "2011-08"], freq="M", name="idx" + ) + self._check(idx, f, exp) + + f = lambda x: np.add(x, np.array([4, -1, 1, 2])) + exp = PeriodIndex( + ["2011-05", "2011-01", "NaT", "2011-06"], freq="M", name="idx" + ) + self._check(idx, f, exp) + + f = lambda x: x - np.array([1, 2, 3, 4]) + exp = PeriodIndex( + ["2010-12", "2010-12", "NaT", "2010-12"], freq="M", name="idx" + ) + self._check(idx, f, exp) + + f = lambda x: np.subtract(x, np.array([3, 2, 3, -2])) + exp = PeriodIndex( + ["2010-10", "2010-12", "NaT", "2011-06"], freq="M", name="idx" + ) + self._check(idx, f, exp) + + def test_pi_ops_offset(self): + idx = PeriodIndex( + ["2011-01-01", "2011-02-01", "2011-03-01", "2011-04-01"], + freq="D", + name="idx", + ) + f = lambda x: x + pd.offsets.Day() + exp = PeriodIndex( + ["2011-01-02", "2011-02-02", "2011-03-02", "2011-04-02"], + freq="D", + name="idx", + ) + self._check(idx, f, exp) + + f = lambda x: x + pd.offsets.Day(2) + exp = PeriodIndex( + ["2011-01-03", "2011-02-03", "2011-03-03", "2011-04-03"], + freq="D", + name="idx", + ) + self._check(idx, f, exp) + + f = lambda x: x - pd.offsets.Day(2) + exp = PeriodIndex( + ["2010-12-30", "2011-01-30", "2011-02-27", "2011-03-30"], + freq="D", + name="idx", + ) + self._check(idx, f, exp) + + def test_pi_offset_errors(self): + idx = PeriodIndex( + ["2011-01-01", "2011-02-01", "2011-03-01", "2011-04-01"], + freq="D", + name="idx", + ) + ser = pd.Series(idx) + + # Series op is applied per Period instance, thus error is raised + # from Period + for obj in [idx, ser]: + msg = r"Input has different freq=2H from Period.*?\(freq=D\)" + with pytest.raises(IncompatibleFrequency, match=msg): + obj + pd.offsets.Hour(2) + + with pytest.raises(IncompatibleFrequency, match=msg): + pd.offsets.Hour(2) + obj + + msg = r"Input has different freq=-2H from Period.*?\(freq=D\)" + with pytest.raises(IncompatibleFrequency, match=msg): + obj - pd.offsets.Hour(2) + + def test_pi_sub_period(self): + # GH#13071 + idx = PeriodIndex( + ["2011-01", "2011-02", "2011-03", "2011-04"], freq="M", name="idx" + ) + + result = idx - pd.Period("2012-01", freq="M") + off = idx.freq + exp = pd.Index([-12 * off, -11 * off, -10 * off, -9 * off], name="idx") + tm.assert_index_equal(result, exp) + + result = np.subtract(idx, pd.Period("2012-01", freq="M")) + tm.assert_index_equal(result, exp) + + result = pd.Period("2012-01", freq="M") - idx + exp = pd.Index([12 * off, 11 * off, 10 * off, 9 * off], name="idx") + tm.assert_index_equal(result, exp) + + result = np.subtract(pd.Period("2012-01", freq="M"), idx) + tm.assert_index_equal(result, exp) + + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx") + tm.assert_index_equal(idx - pd.Period("NaT", freq="M"), exp) + tm.assert_index_equal(pd.Period("NaT", freq="M") - idx, exp) + + def test_pi_sub_pdnat(self): + # GH#13071 + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + exp = pd.TimedeltaIndex([pd.NaT] * 4, name="idx") + tm.assert_index_equal(pd.NaT - idx, exp) + tm.assert_index_equal(idx - pd.NaT, exp) + + def test_pi_sub_period_nat(self): + # GH#13071 + idx = PeriodIndex( + ["2011-01", "NaT", "2011-03", "2011-04"], freq="M", name="idx" + ) + + result = idx - pd.Period("2012-01", freq="M") + off = idx.freq + exp = pd.Index([-12 * off, pd.NaT, -10 * off, -9 * off], name="idx") + tm.assert_index_equal(result, exp) + + result = pd.Period("2012-01", freq="M") - idx + exp = pd.Index([12 * off, pd.NaT, 10 * off, 9 * off], name="idx") + tm.assert_index_equal(result, exp) + + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx") + tm.assert_index_equal(idx - pd.Period("NaT", freq="M"), exp) + tm.assert_index_equal(pd.Period("NaT", freq="M") - idx, exp) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py new file mode 100644 index 00000000..158da37a --- /dev/null +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -0,0 +1,2176 @@ +# Arithmetic tests for DataFrame/Series/Index/Array classes that should +# behave identically. +from datetime import datetime, timedelta + +import numpy as np +import pytest + +from pandas.errors import OutOfBoundsDatetime, PerformanceWarning + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + NaT, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + timedelta_range, +) +import pandas._testing as tm +from pandas.tests.arithmetic.common import ( + assert_invalid_addsub_type, + assert_invalid_comparison, + get_upcast_box, +) + +# ------------------------------------------------------------------ +# Timedelta64[ns] dtype Comparisons + + +class TestTimedelta64ArrayLikeComparisons: + # Comparison tests for timedelta64[ns] vectors fully parametrized over + # DataFrame/Series/TimedeltaIndex/TimedeltaArray. Ideally all comparison + # tests will eventually end up here. + + def test_compare_timedelta64_zerodim(self, box_with_array): + # GH#26689 should unbox when comparing with zerodim array + box = box_with_array + xbox = box_with_array if box_with_array is not pd.Index else np.ndarray + + tdi = pd.timedelta_range("2H", periods=4) + other = np.array(tdi.to_numpy()[0]) + + tdi = tm.box_expected(tdi, box) + res = tdi <= other + expected = np.array([True, False, False, False]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(res, expected) + + with pytest.raises(TypeError): + # zero-dim of wrong dtype should still raise + tdi >= np.array(4) + + @pytest.mark.parametrize( + "td_scalar", + [timedelta(days=1), Timedelta(days=1), Timedelta(days=1).to_timedelta64()], + ) + def test_compare_timedeltalike_scalar(self, box_with_array, td_scalar): + # regression test for GH#5963 + box = box_with_array + xbox = box if box is not pd.Index else np.ndarray + ser = pd.Series([timedelta(days=1), timedelta(days=2)]) + ser = tm.box_expected(ser, box) + actual = ser > td_scalar + expected = pd.Series([False, True]) + expected = tm.box_expected(expected, xbox) + tm.assert_equal(actual, expected) + + @pytest.mark.parametrize("invalid", [345600000000000, "a"]) + def test_td64_comparisons_invalid(self, box_with_array, invalid): + # GH#13624 for str + box = box_with_array + rng = timedelta_range("1 days", periods=10) + obj = tm.box_expected(rng, box) + + assert_invalid_comparison(obj, invalid, box) + + @pytest.mark.parametrize( + "other", + [ + list(range(10)), + np.arange(10), + np.arange(10).astype(np.float32), + np.arange(10).astype(object), + pd.date_range("1970-01-01", periods=10, tz="UTC").array, + np.array(pd.date_range("1970-01-01", periods=10)), + list(pd.date_range("1970-01-01", periods=10)), + pd.date_range("1970-01-01", periods=10).astype(object), + pd.period_range("1971-01-01", freq="D", periods=10).array, + pd.period_range("1971-01-01", freq="D", periods=10).astype(object), + ], + ) + def test_td64arr_cmp_arraylike_invalid(self, other): + # We don't parametrize this over box_with_array because listlike + # other plays poorly with assert_invalid_comparison reversed checks + + rng = timedelta_range("1 days", periods=10)._data + assert_invalid_comparison(rng, other, tm.to_array) + + def test_td64arr_cmp_mixed_invalid(self): + rng = timedelta_range("1 days", periods=5)._data + + other = np.array([0, 1, 2, rng[3], pd.Timestamp.now()]) + result = rng == other + expected = np.array([False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = rng != other + tm.assert_numpy_array_equal(result, ~expected) + + msg = "Invalid comparison between|Cannot compare type|not supported between" + with pytest.raises(TypeError, match=msg): + rng < other + with pytest.raises(TypeError, match=msg): + rng > other + with pytest.raises(TypeError, match=msg): + rng <= other + with pytest.raises(TypeError, match=msg): + rng >= other + + +class TestTimedelta64ArrayComparisons: + # TODO: All of these need to be parametrized over box + + @pytest.mark.parametrize("dtype", [None, object]) + def test_comp_nat(self, dtype): + left = pd.TimedeltaIndex( + [pd.Timedelta("1 days"), pd.NaT, pd.Timedelta("3 days")] + ) + right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta("3 days")]) + + lhs, rhs = left, right + if dtype is object: + lhs, rhs = left.astype(object), right.astype(object) + + result = rhs == lhs + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = rhs != lhs + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == rhs, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(lhs != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != lhs, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(lhs < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > lhs, expected) + + def test_comparisons_nat(self): + tdidx1 = pd.TimedeltaIndex( + [ + "1 day", + pd.NaT, + "1 day 00:00:01", + pd.NaT, + "1 day 00:00:01", + "5 day 00:00:03", + ] + ) + tdidx2 = pd.TimedeltaIndex( + ["2 day", "2 day", pd.NaT, pd.NaT, "1 day 00:00:02", "5 days 00:00:03"] + ) + tdarr = np.array( + [ + np.timedelta64(2, "D"), + np.timedelta64(2, "D"), + np.timedelta64("nat"), + np.timedelta64("nat"), + np.timedelta64(1, "D") + np.timedelta64(2, "s"), + np.timedelta64(5, "D") + np.timedelta64(3, "s"), + ] + ) + + cases = [(tdidx1, tdidx2), (tdidx1, tdarr)] + + # Check pd.NaT is handles as the same as np.nan + for idx1, idx2 in cases: + + result = idx1 < idx2 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 > idx1 + expected = np.array([True, False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 <= idx2 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx2 >= idx1 + expected = np.array([True, False, False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 == idx2 + expected = np.array([False, False, False, False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = idx1 != idx2 + expected = np.array([True, True, True, True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + # TODO: better name + def test_comparisons_coverage(self): + rng = timedelta_range("1 days", periods=10) + + result = rng < rng[3] + expected = np.array([True, True, True] + [False] * 7) + tm.assert_numpy_array_equal(result, expected) + + result = rng == list(rng) + exp = rng == rng + tm.assert_numpy_array_equal(result, exp) + + +# ------------------------------------------------------------------ +# Timedelta64[ns] dtype Arithmetic Operations + + +class TestTimedelta64ArithmeticUnsorted: + # Tests moved from type-specific test files but not + # yet sorted/parametrized/de-duplicated + + def test_ufunc_coercions(self): + # normal ops are also tested in tseries/test_timedeltas.py + idx = TimedeltaIndex(["2H", "4H", "6H", "8H", "10H"], freq="2H", name="x") + + for result in [idx * 2, np.multiply(idx, 2)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(["4H", "8H", "12H", "16H", "20H"], freq="4H", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "4H" + + for result in [idx / 2, np.divide(idx, 2)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(["1H", "2H", "3H", "4H", "5H"], freq="H", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "H" + + idx = TimedeltaIndex(["2H", "4H", "6H", "8H", "10H"], freq="2H", name="x") + for result in [-idx, np.negative(idx)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex( + ["-2H", "-4H", "-6H", "-8H", "-10H"], freq="-2H", name="x" + ) + tm.assert_index_equal(result, exp) + assert result.freq == "-2H" + + idx = TimedeltaIndex(["-2H", "-1H", "0H", "1H", "2H"], freq="H", name="x") + for result in [abs(idx), np.absolute(idx)]: + assert isinstance(result, TimedeltaIndex) + exp = TimedeltaIndex(["2H", "1H", "0H", "1H", "2H"], freq=None, name="x") + tm.assert_index_equal(result, exp) + assert result.freq is None + + def test_subtraction_ops(self): + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(["1 days", pd.NaT, "2 days"], name="foo") + dti = pd.date_range("20130101", periods=3, name="bar") + td = Timedelta("1 days") + dt = Timestamp("20130101") + + msg = "cannot subtract a datelike from a TimedeltaArray" + with pytest.raises(TypeError, match=msg): + tdi - dt + with pytest.raises(TypeError, match=msg): + tdi - dti + + msg = r"unsupported operand type\(s\) for -" + with pytest.raises(TypeError, match=msg): + td - dt + + msg = "(bad|unsupported) operand type for unary" + with pytest.raises(TypeError, match=msg): + td - dti + + result = dt - dti + expected = TimedeltaIndex(["0 days", "-1 days", "-2 days"], name="bar") + tm.assert_index_equal(result, expected) + + result = dti - dt + expected = TimedeltaIndex(["0 days", "1 days", "2 days"], name="bar") + tm.assert_index_equal(result, expected) + + result = tdi - td + expected = TimedeltaIndex(["0 days", pd.NaT, "1 days"], name="foo") + tm.assert_index_equal(result, expected, check_names=False) + + result = td - tdi + expected = TimedeltaIndex(["0 days", pd.NaT, "-1 days"], name="foo") + tm.assert_index_equal(result, expected, check_names=False) + + result = dti - td + expected = DatetimeIndex(["20121231", "20130101", "20130102"], name="bar") + tm.assert_index_equal(result, expected, check_names=False) + + result = dt - tdi + expected = DatetimeIndex(["20121231", pd.NaT, "20121230"], name="foo") + tm.assert_index_equal(result, expected) + + def test_subtraction_ops_with_tz(self): + + # check that dt/dti subtraction ops with tz are validated + dti = pd.date_range("20130101", periods=3) + ts = Timestamp("20130101") + dt = ts.to_pydatetime() + dti_tz = pd.date_range("20130101", periods=3).tz_localize("US/Eastern") + ts_tz = Timestamp("20130101").tz_localize("US/Eastern") + ts_tz2 = Timestamp("20130101").tz_localize("CET") + dt_tz = ts_tz.to_pydatetime() + td = Timedelta("1 days") + + def _check(result, expected): + assert result == expected + assert isinstance(result, Timedelta) + + # scalars + result = ts - ts + expected = Timedelta("0 days") + _check(result, expected) + + result = dt_tz - ts_tz + expected = Timedelta("0 days") + _check(result, expected) + + result = ts_tz - dt_tz + expected = Timedelta("0 days") + _check(result, expected) + + # tz mismatches + msg = "Timestamp subtraction must have the same timezones or no timezones" + with pytest.raises(TypeError, match=msg): + dt_tz - ts + msg = "can't subtract offset-naive and offset-aware datetimes" + with pytest.raises(TypeError, match=msg): + dt_tz - dt + msg = "Timestamp subtraction must have the same timezones or no timezones" + with pytest.raises(TypeError, match=msg): + dt_tz - ts_tz2 + msg = "can't subtract offset-naive and offset-aware datetimes" + with pytest.raises(TypeError, match=msg): + dt - dt_tz + msg = "Timestamp subtraction must have the same timezones or no timezones" + with pytest.raises(TypeError, match=msg): + ts - dt_tz + with pytest.raises(TypeError, match=msg): + ts_tz2 - ts + with pytest.raises(TypeError, match=msg): + ts_tz2 - dt + with pytest.raises(TypeError, match=msg): + ts_tz - ts_tz2 + + # with dti + with pytest.raises(TypeError, match=msg): + dti - ts_tz + with pytest.raises(TypeError, match=msg): + dti_tz - ts + with pytest.raises(TypeError, match=msg): + dti_tz - ts_tz2 + + result = dti_tz - dt_tz + expected = TimedeltaIndex(["0 days", "1 days", "2 days"]) + tm.assert_index_equal(result, expected) + + result = dt_tz - dti_tz + expected = TimedeltaIndex(["0 days", "-1 days", "-2 days"]) + tm.assert_index_equal(result, expected) + + result = dti_tz - ts_tz + expected = TimedeltaIndex(["0 days", "1 days", "2 days"]) + tm.assert_index_equal(result, expected) + + result = ts_tz - dti_tz + expected = TimedeltaIndex(["0 days", "-1 days", "-2 days"]) + tm.assert_index_equal(result, expected) + + result = td - td + expected = Timedelta("0 days") + _check(result, expected) + + result = dti_tz - td + expected = DatetimeIndex(["20121231", "20130101", "20130102"], tz="US/Eastern") + tm.assert_index_equal(result, expected) + + def test_dti_tdi_numeric_ops(self): + # These are normally union/diff set-like ops + tdi = TimedeltaIndex(["1 days", pd.NaT, "2 days"], name="foo") + dti = pd.date_range("20130101", periods=3, name="bar") + + # TODO(wesm): unused? + # td = Timedelta('1 days') + # dt = Timestamp('20130101') + + result = tdi - tdi + expected = TimedeltaIndex(["0 days", pd.NaT, "0 days"], name="foo") + tm.assert_index_equal(result, expected) + + result = tdi + tdi + expected = TimedeltaIndex(["2 days", pd.NaT, "4 days"], name="foo") + tm.assert_index_equal(result, expected) + + result = dti - tdi # name will be reset + expected = DatetimeIndex(["20121231", pd.NaT, "20130101"]) + tm.assert_index_equal(result, expected) + + def test_addition_ops(self): + # with datetimes/timedelta and tdi/dti + tdi = TimedeltaIndex(["1 days", pd.NaT, "2 days"], name="foo") + dti = pd.date_range("20130101", periods=3, name="bar") + td = Timedelta("1 days") + dt = Timestamp("20130101") + + result = tdi + dt + expected = DatetimeIndex(["20130102", pd.NaT, "20130103"], name="foo") + tm.assert_index_equal(result, expected) + + result = dt + tdi + expected = DatetimeIndex(["20130102", pd.NaT, "20130103"], name="foo") + tm.assert_index_equal(result, expected) + + result = td + tdi + expected = TimedeltaIndex(["2 days", pd.NaT, "3 days"], name="foo") + tm.assert_index_equal(result, expected) + + result = tdi + td + expected = TimedeltaIndex(["2 days", pd.NaT, "3 days"], name="foo") + tm.assert_index_equal(result, expected) + + # unequal length + msg = "cannot add indices of unequal length" + with pytest.raises(ValueError, match=msg): + tdi + dti[0:1] + with pytest.raises(ValueError, match=msg): + tdi[0:1] + dti + + # random indexes + with pytest.raises(TypeError): + tdi + pd.Int64Index([1, 2, 3]) + + # this is a union! + # pytest.raises(TypeError, lambda : Int64Index([1,2,3]) + tdi) + + result = tdi + dti # name will be reset + expected = DatetimeIndex(["20130102", pd.NaT, "20130105"]) + tm.assert_index_equal(result, expected) + + result = dti + tdi # name will be reset + expected = DatetimeIndex(["20130102", pd.NaT, "20130105"]) + tm.assert_index_equal(result, expected) + + result = dt + td + expected = Timestamp("20130102") + assert result == expected + + result = td + dt + expected = Timestamp("20130102") + assert result == expected + + # TODO: Needs more informative name, probably split up into + # more targeted tests + @pytest.mark.parametrize("freq", ["D", "B"]) + def test_timedelta(self, freq): + index = pd.date_range("1/1/2000", periods=50, freq=freq) + + shifted = index + timedelta(1) + back = shifted + timedelta(-1) + tm.assert_index_equal(index, back) + + if freq == "D": + expected = pd.tseries.offsets.Day(1) + assert index.freq == expected + assert shifted.freq == expected + assert back.freq == expected + else: # freq == 'B' + assert index.freq == pd.tseries.offsets.BusinessDay(1) + assert shifted.freq is None + assert back.freq == pd.tseries.offsets.BusinessDay(1) + + result = index - timedelta(1) + expected = index + timedelta(-1) + tm.assert_index_equal(result, expected) + + # GH#4134, buggy with timedeltas + rng = pd.date_range("2013", "2014") + s = Series(rng) + result1 = rng - pd.offsets.Hour(1) + result2 = DatetimeIndex(s - np.timedelta64(100000000)) + result3 = rng - np.timedelta64(100000000) + result4 = DatetimeIndex(s - pd.offsets.Hour(1)) + tm.assert_index_equal(result1, result4) + tm.assert_index_equal(result2, result3) + + def test_tda_add_sub_index(self): + # Check that TimedeltaArray defers to Index on arithmetic ops + tdi = TimedeltaIndex(["1 days", pd.NaT, "2 days"]) + tda = tdi.array + + dti = pd.date_range("1999-12-31", periods=3, freq="D") + + result = tda + dti + expected = tdi + dti + tm.assert_index_equal(result, expected) + + result = tda + tdi + expected = tdi + tdi + tm.assert_index_equal(result, expected) + + result = tda - tdi + expected = tdi - tdi + tm.assert_index_equal(result, expected) + + # ------------------------------------------------------------- + # Binary operations TimedeltaIndex and timedelta-like + + def test_tdi_iadd_timedeltalike(self, two_hours): + # only test adding/sub offsets as + is now numeric + rng = timedelta_range("1 days", "10 days") + expected = timedelta_range("1 days 02:00:00", "10 days 02:00:00", freq="D") + rng += two_hours + tm.assert_index_equal(rng, expected) + + def test_tdi_isub_timedeltalike(self, two_hours): + # only test adding/sub offsets as - is now numeric + rng = timedelta_range("1 days", "10 days") + expected = timedelta_range("0 days 22:00:00", "9 days 22:00:00") + rng -= two_hours + tm.assert_index_equal(rng, expected) + + # ------------------------------------------------------------- + + def test_tdi_ops_attributes(self): + rng = timedelta_range("2 days", periods=5, freq="2D", name="x") + + result = rng + 1 * rng.freq + exp = timedelta_range("4 days", periods=5, freq="2D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "2D" + + result = rng - 2 * rng.freq + exp = timedelta_range("-2 days", periods=5, freq="2D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "2D" + + result = rng * 2 + exp = timedelta_range("4 days", periods=5, freq="4D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "4D" + + result = rng / 2 + exp = timedelta_range("1 days", periods=5, freq="D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "D" + + result = -rng + exp = timedelta_range("-2 days", periods=5, freq="-2D", name="x") + tm.assert_index_equal(result, exp) + assert result.freq == "-2D" + + rng = pd.timedelta_range("-2 days", periods=5, freq="D", name="x") + + result = abs(rng) + exp = TimedeltaIndex( + ["2 days", "1 days", "0 days", "1 days", "2 days"], name="x" + ) + tm.assert_index_equal(result, exp) + assert result.freq is None + + +class TestAddSubNaTMasking: + # TODO: parametrize over boxes + + def test_tdi_add_timestamp_nat_masking(self): + # GH#17991 checking for overflow-masking with NaT + tdinat = pd.to_timedelta(["24658 days 11:15:00", "NaT"]) + + tsneg = Timestamp("1950-01-01") + ts_neg_variants = [ + tsneg, + tsneg.to_pydatetime(), + tsneg.to_datetime64().astype("datetime64[ns]"), + tsneg.to_datetime64().astype("datetime64[D]"), + ] + + tspos = Timestamp("1980-01-01") + ts_pos_variants = [ + tspos, + tspos.to_pydatetime(), + tspos.to_datetime64().astype("datetime64[ns]"), + tspos.to_datetime64().astype("datetime64[D]"), + ] + + for variant in ts_neg_variants + ts_pos_variants: + res = tdinat + variant + assert res[1] is pd.NaT + + def test_tdi_add_overflow(self): + # See GH#14068 + # preliminary test scalar analogue of vectorized tests below + with pytest.raises(OutOfBoundsDatetime): + pd.to_timedelta(106580, "D") + Timestamp("2000") + with pytest.raises(OutOfBoundsDatetime): + Timestamp("2000") + pd.to_timedelta(106580, "D") + + _NaT = int(pd.NaT) + 1 + msg = "Overflow in int64 addition" + with pytest.raises(OverflowError, match=msg): + pd.to_timedelta([106580], "D") + Timestamp("2000") + with pytest.raises(OverflowError, match=msg): + Timestamp("2000") + pd.to_timedelta([106580], "D") + with pytest.raises(OverflowError, match=msg): + pd.to_timedelta([_NaT]) - Timedelta("1 days") + with pytest.raises(OverflowError, match=msg): + pd.to_timedelta(["5 days", _NaT]) - Timedelta("1 days") + with pytest.raises(OverflowError, match=msg): + ( + pd.to_timedelta([_NaT, "5 days", "1 hours"]) + - pd.to_timedelta(["7 seconds", _NaT, "4 hours"]) + ) + + # These should not overflow! + exp = TimedeltaIndex([pd.NaT]) + result = pd.to_timedelta([pd.NaT]) - Timedelta("1 days") + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex(["4 days", pd.NaT]) + result = pd.to_timedelta(["5 days", pd.NaT]) - Timedelta("1 days") + tm.assert_index_equal(result, exp) + + exp = TimedeltaIndex([pd.NaT, pd.NaT, "5 hours"]) + result = pd.to_timedelta([pd.NaT, "5 days", "1 hours"]) + pd.to_timedelta( + ["7 seconds", pd.NaT, "4 hours"] + ) + tm.assert_index_equal(result, exp) + + +class TestTimedeltaArraylikeAddSubOps: + # Tests for timedelta64[ns] __add__, __sub__, __radd__, __rsub__ + + # TODO: moved from tests.indexes.timedeltas.test_arithmetic; needs + # parametrization+de-duplication + def test_timedelta_ops_with_missing_values(self): + # setup + s1 = pd.to_timedelta(Series(["00:00:01"])) + s2 = pd.to_timedelta(Series(["00:00:02"])) + + msg = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]" + with pytest.raises(TypeError, match=msg): + # Passing datetime64-dtype data to TimedeltaIndex is no longer + # supported GH#29794 + pd.to_timedelta(Series([pd.NaT])) + + sn = pd.to_timedelta(Series([pd.NaT], dtype="m8[ns]")) + + df1 = pd.DataFrame(["00:00:01"]).apply(pd.to_timedelta) + df2 = pd.DataFrame(["00:00:02"]).apply(pd.to_timedelta) + with pytest.raises(TypeError, match=msg): + # Passing datetime64-dtype data to TimedeltaIndex is no longer + # supported GH#29794 + pd.DataFrame([pd.NaT]).apply(pd.to_timedelta) + + dfn = pd.DataFrame([pd.NaT.value]).apply(pd.to_timedelta) + + scalar1 = pd.to_timedelta("00:00:01") + scalar2 = pd.to_timedelta("00:00:02") + timedelta_NaT = pd.to_timedelta("NaT") + + actual = scalar1 + scalar1 + assert actual == scalar2 + actual = scalar2 - scalar1 + assert actual == scalar1 + + actual = s1 + s1 + tm.assert_series_equal(actual, s2) + actual = s2 - s1 + tm.assert_series_equal(actual, s1) + + actual = s1 + scalar1 + tm.assert_series_equal(actual, s2) + actual = scalar1 + s1 + tm.assert_series_equal(actual, s2) + actual = s2 - scalar1 + tm.assert_series_equal(actual, s1) + actual = -scalar1 + s2 + tm.assert_series_equal(actual, s1) + + actual = s1 + timedelta_NaT + tm.assert_series_equal(actual, sn) + actual = timedelta_NaT + s1 + tm.assert_series_equal(actual, sn) + actual = s1 - timedelta_NaT + tm.assert_series_equal(actual, sn) + actual = -timedelta_NaT + s1 + tm.assert_series_equal(actual, sn) + + with pytest.raises(TypeError): + s1 + np.nan + with pytest.raises(TypeError): + np.nan + s1 + with pytest.raises(TypeError): + s1 - np.nan + with pytest.raises(TypeError): + -np.nan + s1 + + actual = s1 + pd.NaT + tm.assert_series_equal(actual, sn) + actual = s2 - pd.NaT + tm.assert_series_equal(actual, sn) + + actual = s1 + df1 + tm.assert_frame_equal(actual, df2) + actual = s2 - df1 + tm.assert_frame_equal(actual, df1) + actual = df1 + s1 + tm.assert_frame_equal(actual, df2) + actual = df2 - s1 + tm.assert_frame_equal(actual, df1) + + actual = df1 + df1 + tm.assert_frame_equal(actual, df2) + actual = df2 - df1 + tm.assert_frame_equal(actual, df1) + + actual = df1 + scalar1 + tm.assert_frame_equal(actual, df2) + actual = df2 - scalar1 + tm.assert_frame_equal(actual, df1) + + actual = df1 + timedelta_NaT + tm.assert_frame_equal(actual, dfn) + actual = df1 - timedelta_NaT + tm.assert_frame_equal(actual, dfn) + + with pytest.raises(TypeError): + df1 + np.nan + with pytest.raises(TypeError): + df1 - np.nan + + actual = df1 + pd.NaT # NaT is datetime, not timedelta + tm.assert_frame_equal(actual, dfn) + actual = df1 - pd.NaT + tm.assert_frame_equal(actual, dfn) + + # TODO: moved from tests.series.test_operators, needs splitting, cleanup, + # de-duplication, box-parametrization... + def test_operators_timedelta64(self): + # series ops + v1 = pd.date_range("2012-1-1", periods=3, freq="D") + v2 = pd.date_range("2012-1-2", periods=3, freq="D") + rs = Series(v2) - Series(v1) + xp = Series(1e9 * 3600 * 24, rs.index).astype("int64").astype("timedelta64[ns]") + tm.assert_series_equal(rs, xp) + assert rs.dtype == "timedelta64[ns]" + + df = DataFrame(dict(A=v1)) + td = Series([timedelta(days=i) for i in range(3)]) + assert td.dtype == "timedelta64[ns]" + + # series on the rhs + result = df["A"] - df["A"].shift() + assert result.dtype == "timedelta64[ns]" + + result = df["A"] + td + assert result.dtype == "M8[ns]" + + # scalar Timestamp on rhs + maxa = df["A"].max() + assert isinstance(maxa, Timestamp) + + resultb = df["A"] - df["A"].max() + assert resultb.dtype == "timedelta64[ns]" + + # timestamp on lhs + result = resultb + df["A"] + values = [Timestamp("20111230"), Timestamp("20120101"), Timestamp("20120103")] + expected = Series(values, name="A") + tm.assert_series_equal(result, expected) + + # datetimes on rhs + result = df["A"] - datetime(2001, 1, 1) + expected = Series([timedelta(days=4017 + i) for i in range(3)], name="A") + tm.assert_series_equal(result, expected) + assert result.dtype == "m8[ns]" + + d = datetime(2001, 1, 1, 3, 4) + resulta = df["A"] - d + assert resulta.dtype == "m8[ns]" + + # roundtrip + resultb = resulta + d + tm.assert_series_equal(df["A"], resultb) + + # timedeltas on rhs + td = timedelta(days=1) + resulta = df["A"] + td + resultb = resulta - td + tm.assert_series_equal(resultb, df["A"]) + assert resultb.dtype == "M8[ns]" + + # roundtrip + td = timedelta(minutes=5, seconds=3) + resulta = df["A"] + td + resultb = resulta - td + tm.assert_series_equal(df["A"], resultb) + assert resultb.dtype == "M8[ns]" + + # inplace + value = rs[2] + np.timedelta64(timedelta(minutes=5, seconds=1)) + rs[2] += np.timedelta64(timedelta(minutes=5, seconds=1)) + assert rs[2] == value + + def test_timedelta64_ops_nat(self): + # GH 11349 + timedelta_series = Series([NaT, Timedelta("1s")]) + nat_series_dtype_timedelta = Series([NaT, NaT], dtype="timedelta64[ns]") + single_nat_dtype_timedelta = Series([NaT], dtype="timedelta64[ns]") + + # subtraction + tm.assert_series_equal(timedelta_series - NaT, nat_series_dtype_timedelta) + tm.assert_series_equal(-NaT + timedelta_series, nat_series_dtype_timedelta) + + tm.assert_series_equal( + timedelta_series - single_nat_dtype_timedelta, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + -single_nat_dtype_timedelta + timedelta_series, nat_series_dtype_timedelta + ) + + # addition + tm.assert_series_equal( + nat_series_dtype_timedelta + NaT, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + NaT + nat_series_dtype_timedelta, nat_series_dtype_timedelta + ) + + tm.assert_series_equal( + nat_series_dtype_timedelta + single_nat_dtype_timedelta, + nat_series_dtype_timedelta, + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + nat_series_dtype_timedelta, + nat_series_dtype_timedelta, + ) + + tm.assert_series_equal(timedelta_series + NaT, nat_series_dtype_timedelta) + tm.assert_series_equal(NaT + timedelta_series, nat_series_dtype_timedelta) + + tm.assert_series_equal( + timedelta_series + single_nat_dtype_timedelta, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + timedelta_series, nat_series_dtype_timedelta + ) + + tm.assert_series_equal( + nat_series_dtype_timedelta + NaT, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + NaT + nat_series_dtype_timedelta, nat_series_dtype_timedelta + ) + + tm.assert_series_equal( + nat_series_dtype_timedelta + single_nat_dtype_timedelta, + nat_series_dtype_timedelta, + ) + tm.assert_series_equal( + single_nat_dtype_timedelta + nat_series_dtype_timedelta, + nat_series_dtype_timedelta, + ) + + # multiplication + tm.assert_series_equal( + nat_series_dtype_timedelta * 1.0, nat_series_dtype_timedelta + ) + tm.assert_series_equal( + 1.0 * nat_series_dtype_timedelta, nat_series_dtype_timedelta + ) + + tm.assert_series_equal(timedelta_series * 1, timedelta_series) + tm.assert_series_equal(1 * timedelta_series, timedelta_series) + + tm.assert_series_equal(timedelta_series * 1.5, Series([NaT, Timedelta("1.5s")])) + tm.assert_series_equal(1.5 * timedelta_series, Series([NaT, Timedelta("1.5s")])) + + tm.assert_series_equal(timedelta_series * np.nan, nat_series_dtype_timedelta) + tm.assert_series_equal(np.nan * timedelta_series, nat_series_dtype_timedelta) + + # division + tm.assert_series_equal(timedelta_series / 2, Series([NaT, Timedelta("0.5s")])) + tm.assert_series_equal(timedelta_series / 2.0, Series([NaT, Timedelta("0.5s")])) + tm.assert_series_equal(timedelta_series / np.nan, nat_series_dtype_timedelta) + + # ------------------------------------------------------------- + # Binary operations td64 arraylike and datetime-like + + def test_td64arr_sub_timestamp_raises(self, box_with_array): + idx = TimedeltaIndex(["1 day", "2 day"]) + idx = tm.box_expected(idx, box_with_array) + + msg = ( + "cannot subtract a datelike from|" + "Could not operate|" + "cannot perform operation" + ) + with pytest.raises(TypeError, match=msg): + idx - Timestamp("2011-01-01") + + def test_td64arr_add_timestamp(self, box_with_array, tz_naive_fixture): + # GH#23215 + + # TODO: parametrize over scalar datetime types? + tz = tz_naive_fixture + other = Timestamp("2011-01-01", tz=tz) + + idx = TimedeltaIndex(["1 day", "2 day"]) + expected = DatetimeIndex(["2011-01-02", "2011-01-03"], tz=tz) + + idx = tm.box_expected(idx, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = idx + other + tm.assert_equal(result, expected) + + result = other + idx + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "ts", + [ + Timestamp("2012-01-01"), + Timestamp("2012-01-01").to_pydatetime(), + Timestamp("2012-01-01").to_datetime64(), + ], + ) + def test_td64arr_add_sub_datetimelike_scalar(self, ts, box_with_array): + # GH#11925, GH#29558 + tdi = timedelta_range("1 day", periods=3) + expected = pd.date_range("2012-01-02", periods=3) + + tdarr = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + tm.assert_equal(ts + tdarr, expected) + tm.assert_equal(tdarr + ts, expected) + + expected2 = pd.date_range("2011-12-31", periods=3, freq="-1D") + expected2 = tm.box_expected(expected2, box_with_array) + + tm.assert_equal(ts - tdarr, expected2) + tm.assert_equal(ts + (-tdarr), expected2) + + with pytest.raises(TypeError): + tdarr - ts + + def test_tdi_sub_dt64_array(self, box_with_array): + dti = pd.date_range("2016-01-01", periods=3) + tdi = dti - dti.shift(1) + dtarr = dti.values + expected = pd.DatetimeIndex(dtarr) - tdi + + tdi = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + with pytest.raises(TypeError): + tdi - dtarr + + # TimedeltaIndex.__rsub__ + result = dtarr - tdi + tm.assert_equal(result, expected) + + def test_tdi_add_dt64_array(self, box_with_array): + dti = pd.date_range("2016-01-01", periods=3) + tdi = dti - dti.shift(1) + dtarr = dti.values + expected = pd.DatetimeIndex(dtarr) + tdi + + tdi = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = tdi + dtarr + tm.assert_equal(result, expected) + result = dtarr + tdi + tm.assert_equal(result, expected) + + def test_td64arr_add_datetime64_nat(self, box_with_array): + # GH#23215 + other = np.datetime64("NaT") + + tdi = timedelta_range("1 day", periods=3) + expected = pd.DatetimeIndex(["NaT", "NaT", "NaT"]) + + tdser = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + tm.assert_equal(tdser + other, expected) + tm.assert_equal(other + tdser, expected) + + # ------------------------------------------------------------------ + # Invalid __add__/__sub__ operations + + # TODO: moved from frame tests; needs parametrization/de-duplication + def test_td64_df_add_int_frame(self): + # GH#22696 Check that we don't dispatch to numpy implementation, + # which treats int64 as m8[ns] + tdi = pd.timedelta_range("1", periods=3) + df = tdi.to_frame() + other = pd.DataFrame([1, 2, 3], index=tdi) # indexed like `df` + assert_invalid_addsub_type(df, other) + + @pytest.mark.parametrize("pi_freq", ["D", "W", "Q", "H"]) + @pytest.mark.parametrize("tdi_freq", [None, "H"]) + def test_td64arr_sub_periodlike(self, box_with_array, tdi_freq, pi_freq): + # GH#20049 subtracting PeriodIndex should raise TypeError + tdi = TimedeltaIndex(["1 hours", "2 hours"], freq=tdi_freq) + dti = Timestamp("2018-03-07 17:16:40") + tdi + pi = dti.to_period(pi_freq) + + # TODO: parametrize over box for pi? + tdi = tm.box_expected(tdi, box_with_array) + with pytest.raises(TypeError): + tdi - pi + + # FIXME: don't leave commented-out + # FIXME: this raises with period scalar but not with PeriodIndex? + # with pytest.raises(TypeError): + # pi - tdi + + # GH#13078 subtraction of Period scalar not supported + with pytest.raises(TypeError): + tdi - pi[0] + with pytest.raises(TypeError): + pi[0] - tdi + + @pytest.mark.parametrize( + "other", + [ + # GH#12624 for str case + "a", + # GH#19123 + 1, + 1.5, + np.array(2), + ], + ) + def test_td64arr_addsub_numeric_scalar_invalid(self, box_with_array, other): + # vector-like others are tested in test_td64arr_add_sub_numeric_arr_invalid + tdser = pd.Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + tdarr = tm.box_expected(tdser, box_with_array) + + assert_invalid_addsub_type(tdarr, other) + + @pytest.mark.parametrize( + "vec", + [ + np.array([1, 2, 3]), + pd.Index([1, 2, 3]), + Series([1, 2, 3]), + DataFrame([[1, 2, 3]]), + ], + ids=lambda x: type(x).__name__, + ) + def test_td64arr_addsub_numeric_arr_invalid( + self, box_with_array, vec, any_real_dtype + ): + tdser = pd.Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + tdarr = tm.box_expected(tdser, box_with_array) + + vector = vec.astype(any_real_dtype) + assert_invalid_addsub_type(tdarr, vector) + + def test_td64arr_add_sub_int(self, box_with_array, one): + # Variants of `one` for #19012, deprecated GH#22535 + rng = timedelta_range("1 days 09:00:00", freq="H", periods=10) + tdarr = tm.box_expected(rng, box_with_array) + + msg = "Addition/subtraction of integers" + assert_invalid_addsub_type(tdarr, one, msg) + + # TOOD: get inplace ops into assert_invalid_addsub_type + with pytest.raises(TypeError, match=msg): + tdarr += one + with pytest.raises(TypeError, match=msg): + tdarr -= one + + def test_td64arr_add_sub_integer_array(self, box_with_array): + # GH#19959, deprecated GH#22535 + rng = timedelta_range("1 days 09:00:00", freq="H", periods=3) + tdarr = tm.box_expected(rng, box_with_array) + other = tm.box_expected([4, 3, 2], box_with_array) + + msg = "Addition/subtraction of integers and integer-arrays" + assert_invalid_addsub_type(tdarr, other, msg) + + def test_td64arr_addsub_integer_array_no_freq(self, box_with_array): + # GH#19959 + tdi = TimedeltaIndex(["1 Day", "NaT", "3 Hours"]) + tdarr = tm.box_expected(tdi, box_with_array) + other = tm.box_expected([14, -1, 16], box_with_array) + + msg = "Addition/subtraction of integers" + assert_invalid_addsub_type(tdarr, other, msg) + + # ------------------------------------------------------------------ + # Operations with timedelta-like others + + # TODO: this was taken from tests.series.test_ops; de-duplicate + def test_operators_timedelta64_with_timedelta(self, scalar_td): + # smoke tests + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + td1 + scalar_td + scalar_td + td1 + td1 - scalar_td + scalar_td - td1 + td1 / scalar_td + scalar_td / td1 + + # TODO: this was taken from tests.series.test_ops; de-duplicate + def test_timedelta64_operations_with_timedeltas(self): + # td operate with td + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td2 = timedelta(minutes=5, seconds=4) + result = td1 - td2 + expected = Series([timedelta(seconds=0)] * 3) - Series( + [timedelta(seconds=1)] * 3 + ) + assert result.dtype == "m8[ns]" + tm.assert_series_equal(result, expected) + + result2 = td2 - td1 + expected = Series([timedelta(seconds=1)] * 3) - Series( + [timedelta(seconds=0)] * 3 + ) + tm.assert_series_equal(result2, expected) + + # roundtrip + tm.assert_series_equal(result + td2, td1) + + # Now again, using pd.to_timedelta, which should build + # a Series or a scalar, depending on input. + td1 = Series(pd.to_timedelta(["00:05:03"] * 3)) + td2 = pd.to_timedelta("00:05:04") + result = td1 - td2 + expected = Series([timedelta(seconds=0)] * 3) - Series( + [timedelta(seconds=1)] * 3 + ) + assert result.dtype == "m8[ns]" + tm.assert_series_equal(result, expected) + + result2 = td2 - td1 + expected = Series([timedelta(seconds=1)] * 3) - Series( + [timedelta(seconds=0)] * 3 + ) + tm.assert_series_equal(result2, expected) + + # roundtrip + tm.assert_series_equal(result + td2, td1) + + def test_td64arr_add_td64_array(self, box_with_array): + box = box_with_array + dti = pd.date_range("2016-01-01", periods=3) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = 2 * tdi + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + result = tdi + tdarr + tm.assert_equal(result, expected) + result = tdarr + tdi + tm.assert_equal(result, expected) + + def test_td64arr_sub_td64_array(self, box_with_array): + box = box_with_array + dti = pd.date_range("2016-01-01", periods=3) + tdi = dti - dti.shift(1) + tdarr = tdi.values + + expected = 0 * tdi + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + result = tdi - tdarr + tm.assert_equal(result, expected) + result = tdarr - tdi + tm.assert_equal(result, expected) + + # TODO: parametrize over [add, sub, radd, rsub]? + @pytest.mark.parametrize( + "names", + [ + (None, None, None), + ("Egon", "Venkman", None), + ("NCC1701D", "NCC1701D", "NCC1701D"), + ], + ) + def test_td64arr_add_sub_tdi(self, box, names): + # GH#17250 make sure result dtype is correct + # GH#19043 make sure names are propagated correctly + if box is pd.DataFrame and names[1] == "Venkman": + pytest.skip( + "Name propagation for DataFrame does not behave like " + "it does for Index/Series" + ) + + tdi = TimedeltaIndex(["0 days", "1 day"], name=names[0]) + ser = Series([Timedelta(hours=3), Timedelta(hours=4)], name=names[1]) + expected = Series( + [Timedelta(hours=3), Timedelta(days=1, hours=4)], name=names[2] + ) + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + result = tdi + ser + tm.assert_equal(result, expected) + if box is not pd.DataFrame: + assert result.dtype == "timedelta64[ns]" + else: + assert result.dtypes[0] == "timedelta64[ns]" + + result = ser + tdi + tm.assert_equal(result, expected) + if box is not pd.DataFrame: + assert result.dtype == "timedelta64[ns]" + else: + assert result.dtypes[0] == "timedelta64[ns]" + + expected = Series( + [Timedelta(hours=-3), Timedelta(days=1, hours=-4)], name=names[2] + ) + expected = tm.box_expected(expected, box) + + result = tdi - ser + tm.assert_equal(result, expected) + if box is not pd.DataFrame: + assert result.dtype == "timedelta64[ns]" + else: + assert result.dtypes[0] == "timedelta64[ns]" + + result = ser - tdi + tm.assert_equal(result, -expected) + if box is not pd.DataFrame: + assert result.dtype == "timedelta64[ns]" + else: + assert result.dtypes[0] == "timedelta64[ns]" + + def test_td64arr_add_sub_td64_nat(self, box_with_array): + # GH#23320 special handling for timedelta64("NaT") + box = box_with_array + tdi = pd.TimedeltaIndex([NaT, Timedelta("1s")]) + other = np.timedelta64("NaT") + expected = pd.TimedeltaIndex(["NaT"] * 2) + + obj = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + result = obj + other + tm.assert_equal(result, expected) + result = other + obj + tm.assert_equal(result, expected) + result = obj - other + tm.assert_equal(result, expected) + result = other - obj + tm.assert_equal(result, expected) + + def test_td64arr_sub_NaT(self, box_with_array): + # GH#18808 + box = box_with_array + ser = Series([NaT, Timedelta("1s")]) + expected = Series([NaT, NaT], dtype="timedelta64[ns]") + + ser = tm.box_expected(ser, box) + expected = tm.box_expected(expected, box) + + res = ser - pd.NaT + tm.assert_equal(res, expected) + + def test_td64arr_add_timedeltalike(self, two_hours, box_with_array): + # only test adding/sub offsets as + is now numeric + box = box_with_array + rng = timedelta_range("1 days", "10 days") + expected = timedelta_range("1 days 02:00:00", "10 days 02:00:00", freq="D") + rng = tm.box_expected(rng, box) + expected = tm.box_expected(expected, box) + + result = rng + two_hours + tm.assert_equal(result, expected) + + def test_td64arr_sub_timedeltalike(self, two_hours, box_with_array): + # only test adding/sub offsets as - is now numeric + box = box_with_array + rng = timedelta_range("1 days", "10 days") + expected = timedelta_range("0 days 22:00:00", "9 days 22:00:00") + + rng = tm.box_expected(rng, box) + expected = tm.box_expected(expected, box) + + result = rng - two_hours + tm.assert_equal(result, expected) + + # ------------------------------------------------------------------ + # __add__/__sub__ with DateOffsets and arrays of DateOffsets + + # TODO: this was taken from tests.series.test_operators; de-duplicate + def test_timedelta64_operations_with_DateOffset(self): + # GH#10699 + td = Series([timedelta(minutes=5, seconds=3)] * 3) + result = td + pd.offsets.Minute(1) + expected = Series([timedelta(minutes=6, seconds=3)] * 3) + tm.assert_series_equal(result, expected) + + result = td - pd.offsets.Minute(1) + expected = Series([timedelta(minutes=4, seconds=3)] * 3) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(PerformanceWarning): + result = td + Series( + [pd.offsets.Minute(1), pd.offsets.Second(3), pd.offsets.Hour(2)] + ) + expected = Series( + [ + timedelta(minutes=6, seconds=3), + timedelta(minutes=5, seconds=6), + timedelta(hours=2, minutes=5, seconds=3), + ] + ) + tm.assert_series_equal(result, expected) + + result = td + pd.offsets.Minute(1) + pd.offsets.Second(12) + expected = Series([timedelta(minutes=6, seconds=15)] * 3) + tm.assert_series_equal(result, expected) + + # valid DateOffsets + for do in ["Hour", "Minute", "Second", "Day", "Micro", "Milli", "Nano"]: + op = getattr(pd.offsets, do) + td + op(5) + op(5) + td + td - op(5) + op(5) - td + + @pytest.mark.parametrize( + "names", [(None, None, None), ("foo", "bar", None), ("foo", "foo", "foo")] + ) + def test_td64arr_add_offset_index(self, names, box): + # GH#18849, GH#19744 + if box is pd.DataFrame and names[1] == "bar": + pytest.skip( + "Name propagation for DataFrame does not behave like " + "it does for Index/Series" + ) + + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"], name=names[0]) + other = pd.Index([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], name=names[1]) + + expected = TimedeltaIndex( + [tdi[n] + other[n] for n in range(len(tdi))], freq="infer", name=names[2] + ) + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + # The DataFrame operation is transposed and so operates as separate + # scalar operations, which do not issue a PerformanceWarning + warn = PerformanceWarning if box is not pd.DataFrame else None + with tm.assert_produces_warning(warn): + res = tdi + other + tm.assert_equal(res, expected) + + with tm.assert_produces_warning(warn): + res2 = other + tdi + tm.assert_equal(res2, expected) + + # TODO: combine with test_td64arr_add_offset_index by parametrizing + # over second box? + def test_td64arr_add_offset_array(self, box_with_array): + # GH#18849 + box = box_with_array + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"]) + other = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) + + expected = TimedeltaIndex( + [tdi[n] + other[n] for n in range(len(tdi))], freq="infer" + ) + + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, box) + + # The DataFrame operation is transposed and so operates as separate + # scalar operations, which do not issue a PerformanceWarning + warn = PerformanceWarning if box is not pd.DataFrame else None + with tm.assert_produces_warning(warn): + res = tdi + other + tm.assert_equal(res, expected) + + with tm.assert_produces_warning(warn): + res2 = other + tdi + tm.assert_equal(res2, expected) + + @pytest.mark.parametrize( + "names", [(None, None, None), ("foo", "bar", None), ("foo", "foo", "foo")] + ) + def test_td64arr_sub_offset_index(self, names, box_with_array): + # GH#18824, GH#19744 + box = box_with_array + xbox = box if box is not tm.to_array else pd.Index + exname = names[2] if box is not tm.to_array else names[1] + + if box is pd.DataFrame and names[1] == "bar": + pytest.skip( + "Name propagation for DataFrame does not behave like " + "it does for Index/Series" + ) + + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"], name=names[0]) + other = pd.Index([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], name=names[1]) + + expected = TimedeltaIndex( + [tdi[n] - other[n] for n in range(len(tdi))], freq="infer", name=exname + ) + + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, xbox) + + # The DataFrame operation is transposed and so operates as separate + # scalar operations, which do not issue a PerformanceWarning + warn = PerformanceWarning if box is not pd.DataFrame else None + with tm.assert_produces_warning(warn): + res = tdi - other + tm.assert_equal(res, expected) + + def test_td64arr_sub_offset_array(self, box_with_array): + # GH#18824 + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"]) + other = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) + + expected = TimedeltaIndex( + [tdi[n] - other[n] for n in range(len(tdi))], freq="infer" + ) + + tdi = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + # The DataFrame operation is transposed and so operates as separate + # scalar operations, which do not issue a PerformanceWarning + warn = None if box_with_array is pd.DataFrame else PerformanceWarning + with tm.assert_produces_warning(warn): + res = tdi - other + tm.assert_equal(res, expected) + + @pytest.mark.parametrize( + "names", [(None, None, None), ("foo", "bar", None), ("foo", "foo", "foo")] + ) + def test_td64arr_with_offset_series(self, names, box_df_fail): + # GH#18849 + box = box_df_fail + box2 = Series if box in [pd.Index, tm.to_array] else box + exname = names[2] if box is not tm.to_array else names[1] + + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"], name=names[0]) + other = Series([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], name=names[1]) + + expected_add = Series([tdi[n] + other[n] for n in range(len(tdi))], name=exname) + tdi = tm.box_expected(tdi, box) + expected_add = tm.box_expected(expected_add, box2) + + with tm.assert_produces_warning(PerformanceWarning): + res = tdi + other + tm.assert_equal(res, expected_add) + + with tm.assert_produces_warning(PerformanceWarning): + res2 = other + tdi + tm.assert_equal(res2, expected_add) + + # TODO: separate/parametrize add/sub test? + expected_sub = Series([tdi[n] - other[n] for n in range(len(tdi))], name=exname) + expected_sub = tm.box_expected(expected_sub, box2) + + with tm.assert_produces_warning(PerformanceWarning): + res3 = tdi - other + tm.assert_equal(res3, expected_sub) + + @pytest.mark.parametrize("obox", [np.array, pd.Index, pd.Series]) + def test_td64arr_addsub_anchored_offset_arraylike(self, obox, box_with_array): + # GH#18824 + tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"]) + tdi = tm.box_expected(tdi, box_with_array) + + anchored = obox([pd.offsets.MonthEnd(), pd.offsets.Day(n=2)]) + + # addition/subtraction ops with anchored offsets should issue + # a PerformanceWarning and _then_ raise a TypeError. + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + tdi + anchored + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + anchored + tdi + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + tdi - anchored + with pytest.raises(TypeError): + with tm.assert_produces_warning(PerformanceWarning): + anchored - tdi + + # ------------------------------------------------------------------ + # Unsorted + + def test_td64arr_add_sub_object_array(self, box_with_array): + tdi = pd.timedelta_range("1 day", periods=3, freq="D") + tdarr = tm.box_expected(tdi, box_with_array) + + other = np.array( + [pd.Timedelta(days=1), pd.offsets.Day(2), pd.Timestamp("2000-01-04")] + ) + + warn = PerformanceWarning if box_with_array is not pd.DataFrame else None + with tm.assert_produces_warning(warn): + result = tdarr + other + + expected = pd.Index( + [pd.Timedelta(days=2), pd.Timedelta(days=4), pd.Timestamp("2000-01-07")] + ) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + with pytest.raises(TypeError): + with tm.assert_produces_warning(warn): + tdarr - other + + with tm.assert_produces_warning(warn): + result = other - tdarr + + expected = pd.Index( + [pd.Timedelta(0), pd.Timedelta(0), pd.Timestamp("2000-01-01")] + ) + expected = tm.box_expected(expected, box_with_array) + tm.assert_equal(result, expected) + + +class TestTimedeltaArraylikeMulDivOps: + # Tests for timedelta64[ns] + # __mul__, __rmul__, __div__, __rdiv__, __floordiv__, __rfloordiv__ + + # TODO: Moved from tests.series.test_operators; needs cleanup + @pytest.mark.parametrize("m", [1, 3, 10]) + @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"]) + def test_timedelta64_conversions(self, m, unit): + startdate = Series(pd.date_range("2013-01-01", "2013-01-03")) + enddate = Series(pd.date_range("2013-03-01", "2013-03-03")) + + ser = enddate - startdate + ser[2] = np.nan + + # op + expected = Series([x / np.timedelta64(m, unit) for x in ser]) + result = ser / np.timedelta64(m, unit) + tm.assert_series_equal(result, expected) + + # reverse op + expected = Series([Timedelta(np.timedelta64(m, unit)) / x for x in ser]) + result = np.timedelta64(m, unit) / ser + tm.assert_series_equal(result, expected) + + # ------------------------------------------------------------------ + # Multiplication + # organized with scalar others first, then array-like + + def test_td64arr_mul_int(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + + result = idx * 1 + tm.assert_equal(result, idx) + + result = 1 * idx + tm.assert_equal(result, idx) + + def test_td64arr_mul_tdlike_scalar_raises(self, two_hours, box_with_array): + rng = timedelta_range("1 days", "10 days", name="foo") + rng = tm.box_expected(rng, box_with_array) + with pytest.raises(TypeError): + rng * two_hours + + def test_tdi_mul_int_array_zerodim(self, box_with_array): + rng5 = np.arange(5, dtype="int64") + idx = TimedeltaIndex(rng5) + expected = TimedeltaIndex(rng5 * 5) + + idx = tm.box_expected(idx, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = idx * np.array(5, dtype="int64") + tm.assert_equal(result, expected) + + def test_tdi_mul_int_array(self, box_with_array): + rng5 = np.arange(5, dtype="int64") + idx = TimedeltaIndex(rng5) + expected = TimedeltaIndex(rng5 ** 2) + + idx = tm.box_expected(idx, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = idx * rng5 + tm.assert_equal(result, expected) + + def test_tdi_mul_int_series(self, box_with_array): + box = box_with_array + xbox = pd.Series if box in [pd.Index, tm.to_array] else box + + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + expected = TimedeltaIndex(np.arange(5, dtype="int64") ** 2) + + idx = tm.box_expected(idx, box) + expected = tm.box_expected(expected, xbox) + + result = idx * pd.Series(np.arange(5, dtype="int64")) + tm.assert_equal(result, expected) + + def test_tdi_mul_float_series(self, box_with_array): + box = box_with_array + xbox = pd.Series if box in [pd.Index, tm.to_array] else box + + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box) + + rng5f = np.arange(5, dtype="float64") + expected = TimedeltaIndex(rng5f * (rng5f + 1.0)) + expected = tm.box_expected(expected, xbox) + + result = idx * Series(rng5f + 1.0) + tm.assert_equal(result, expected) + + # TODO: Put Series/DataFrame in others? + @pytest.mark.parametrize( + "other", + [ + np.arange(1, 11), + pd.Int64Index(range(1, 11)), + pd.UInt64Index(range(1, 11)), + pd.Float64Index(range(1, 11)), + pd.RangeIndex(1, 11), + ], + ids=lambda x: type(x).__name__, + ) + def test_tdi_rmul_arraylike(self, other, box_with_array): + box = box_with_array + xbox = get_upcast_box(box, other) + + tdi = TimedeltaIndex(["1 Day"] * 10) + expected = timedelta_range("1 days", "10 days") + expected._data.freq = None + + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, xbox) + + result = other * tdi + tm.assert_equal(result, expected) + commute = tdi * other + tm.assert_equal(commute, expected) + + # ------------------------------------------------------------------ + # __div__, __rdiv__ + + def test_td64arr_div_nat_invalid(self, box_with_array): + # don't allow division by NaT (maybe could in the future) + rng = timedelta_range("1 days", "10 days", name="foo") + rng = tm.box_expected(rng, box_with_array) + + with pytest.raises(TypeError, match="unsupported operand type"): + rng / pd.NaT + with pytest.raises(TypeError, match="Cannot divide NaTType by"): + pd.NaT / rng + + def test_td64arr_div_td64nat(self, box_with_array): + # GH#23829 + rng = timedelta_range("1 days", "10 days") + rng = tm.box_expected(rng, box_with_array) + + other = np.timedelta64("NaT") + + expected = np.array([np.nan] * 10) + expected = tm.box_expected(expected, box_with_array) + + result = rng / other + tm.assert_equal(result, expected) + + result = other / rng + tm.assert_equal(result, expected) + + def test_td64arr_div_int(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + + result = idx / 1 + tm.assert_equal(result, idx) + + with pytest.raises(TypeError, match="Cannot divide"): + # GH#23829 + 1 / idx + + def test_td64arr_div_tdlike_scalar(self, two_hours, box_with_array): + # GH#20088, GH#22163 ensure DataFrame returns correct dtype + rng = timedelta_range("1 days", "10 days", name="foo") + expected = pd.Float64Index((np.arange(10) + 1) * 12, name="foo") + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = rng / two_hours + tm.assert_equal(result, expected) + + result = two_hours / rng + expected = 1 / expected + tm.assert_equal(result, expected) + + def test_td64arr_div_tdlike_scalar_with_nat(self, two_hours, box_with_array): + rng = TimedeltaIndex(["1 days", pd.NaT, "2 days"], name="foo") + expected = pd.Float64Index([12, np.nan, 24], name="foo") + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = rng / two_hours + tm.assert_equal(result, expected) + + result = two_hours / rng + expected = 1 / expected + tm.assert_equal(result, expected) + + def test_td64arr_div_td64_ndarray(self, box_with_array): + # GH#22631 + rng = TimedeltaIndex(["1 days", pd.NaT, "2 days"]) + expected = pd.Float64Index([12, np.nan, 24]) + + rng = tm.box_expected(rng, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + other = np.array([2, 4, 2], dtype="m8[h]") + result = rng / other + tm.assert_equal(result, expected) + + result = rng / tm.box_expected(other, box_with_array) + tm.assert_equal(result, expected) + + result = rng / other.astype(object) + tm.assert_equal(result, expected) + + result = rng / list(other) + tm.assert_equal(result, expected) + + # reversed op + expected = 1 / expected + result = other / rng + tm.assert_equal(result, expected) + + result = tm.box_expected(other, box_with_array) / rng + tm.assert_equal(result, expected) + + result = other.astype(object) / rng + tm.assert_equal(result, expected) + + result = list(other) / rng + tm.assert_equal(result, expected) + + def test_tdarr_div_length_mismatch(self, box_with_array): + rng = TimedeltaIndex(["1 days", pd.NaT, "2 days"]) + mismatched = [1, 2, 3, 4] + + rng = tm.box_expected(rng, box_with_array) + for obj in [mismatched, mismatched[:2]]: + # one shorter, one longer + for other in [obj, np.array(obj), pd.Index(obj)]: + with pytest.raises(ValueError): + rng / other + with pytest.raises(ValueError): + other / rng + + # ------------------------------------------------------------------ + # __floordiv__, __rfloordiv__ + + def test_td64arr_floordiv_tdscalar(self, box_with_array, scalar_td): + # GH#18831 + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + expected = Series([0, 0, np.nan]) + + td1 = tm.box_expected(td1, box_with_array, transpose=False) + expected = tm.box_expected(expected, box_with_array, transpose=False) + + result = td1 // scalar_td + tm.assert_equal(result, expected) + + def test_td64arr_rfloordiv_tdscalar(self, box_with_array, scalar_td): + # GH#18831 + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + expected = Series([1, 1, np.nan]) + + td1 = tm.box_expected(td1, box_with_array, transpose=False) + expected = tm.box_expected(expected, box_with_array, transpose=False) + + result = scalar_td // td1 + tm.assert_equal(result, expected) + + def test_td64arr_rfloordiv_tdscalar_explicit(self, box_with_array, scalar_td): + # GH#18831 + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + expected = Series([1, 1, np.nan]) + + td1 = tm.box_expected(td1, box_with_array, transpose=False) + expected = tm.box_expected(expected, box_with_array, transpose=False) + + # We can test __rfloordiv__ using this syntax, + # see `test_timedelta_rfloordiv` + result = td1.__rfloordiv__(scalar_td) + tm.assert_equal(result, expected) + + def test_td64arr_floordiv_int(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + result = idx // 1 + tm.assert_equal(result, idx) + + pattern = "floor_divide cannot use operands|Cannot divide int by Timedelta*" + with pytest.raises(TypeError, match=pattern): + 1 // idx + + def test_td64arr_floordiv_tdlike_scalar(self, two_hours, box_with_array): + tdi = timedelta_range("1 days", "10 days", name="foo") + expected = pd.Int64Index((np.arange(10) + 1) * 12, name="foo") + + tdi = tm.box_expected(tdi, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = tdi // two_hours + tm.assert_equal(result, expected) + + # TODO: Is this redundant with test_td64arr_floordiv_tdlike_scalar? + @pytest.mark.parametrize( + "scalar_td", + [ + timedelta(minutes=10, seconds=7), + Timedelta("10m7s"), + Timedelta("10m7s").to_timedelta64(), + ], + ids=lambda x: type(x).__name__, + ) + def test_td64arr_rfloordiv_tdlike_scalar(self, scalar_td, box_with_array): + # GH#19125 + tdi = TimedeltaIndex(["00:05:03", "00:05:03", pd.NaT], freq=None) + expected = pd.Index([2.0, 2.0, np.nan]) + + tdi = tm.box_expected(tdi, box_with_array, transpose=False) + expected = tm.box_expected(expected, box_with_array, transpose=False) + + res = tdi.__rfloordiv__(scalar_td) + tm.assert_equal(res, expected) + + expected = pd.Index([0.0, 0.0, np.nan]) + expected = tm.box_expected(expected, box_with_array, transpose=False) + + res = tdi // (scalar_td) + tm.assert_equal(res, expected) + + # ------------------------------------------------------------------ + # mod, divmod + # TODO: operations with timedelta-like arrays, numeric arrays, + # reversed ops + + def test_td64arr_mod_tdscalar(self, box_with_array, three_days): + tdi = timedelta_range("1 Day", "9 days") + tdarr = tm.box_expected(tdi, box_with_array) + + expected = TimedeltaIndex(["1 Day", "2 Days", "0 Days"] * 3) + expected = tm.box_expected(expected, box_with_array) + + result = tdarr % three_days + tm.assert_equal(result, expected) + + if box_with_array is pd.DataFrame: + pytest.xfail("DataFrame does not have __divmod__ or __rdivmod__") + + result = divmod(tdarr, three_days) + tm.assert_equal(result[1], expected) + tm.assert_equal(result[0], tdarr // three_days) + + def test_td64arr_mod_int(self, box_with_array): + tdi = timedelta_range("1 ns", "10 ns", periods=10) + tdarr = tm.box_expected(tdi, box_with_array) + + expected = TimedeltaIndex(["1 ns", "0 ns"] * 5) + expected = tm.box_expected(expected, box_with_array) + + result = tdarr % 2 + tm.assert_equal(result, expected) + + with pytest.raises(TypeError): + 2 % tdarr + + if box_with_array is pd.DataFrame: + pytest.xfail("DataFrame does not have __divmod__ or __rdivmod__") + + result = divmod(tdarr, 2) + tm.assert_equal(result[1], expected) + tm.assert_equal(result[0], tdarr // 2) + + def test_td64arr_rmod_tdscalar(self, box_with_array, three_days): + tdi = timedelta_range("1 Day", "9 days") + tdarr = tm.box_expected(tdi, box_with_array) + + expected = ["0 Days", "1 Day", "0 Days"] + ["3 Days"] * 6 + expected = TimedeltaIndex(expected) + expected = tm.box_expected(expected, box_with_array) + + result = three_days % tdarr + tm.assert_equal(result, expected) + + if box_with_array is pd.DataFrame: + pytest.xfail("DataFrame does not have __divmod__ or __rdivmod__") + + result = divmod(three_days, tdarr) + tm.assert_equal(result[1], expected) + tm.assert_equal(result[0], three_days // tdarr) + + # ------------------------------------------------------------------ + # Operations with invalid others + + def test_td64arr_mul_tdscalar_invalid(self, box_with_array, scalar_td): + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + td1 = tm.box_expected(td1, box_with_array) + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + pattern = "operate|unsupported|cannot|not supported" + with pytest.raises(TypeError, match=pattern): + td1 * scalar_td + with pytest.raises(TypeError, match=pattern): + scalar_td * td1 + + def test_td64arr_mul_too_short_raises(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + with pytest.raises(TypeError): + idx * idx[:3] + with pytest.raises(ValueError): + idx * np.array([1, 2]) + + def test_td64arr_mul_td64arr_raises(self, box_with_array): + idx = TimedeltaIndex(np.arange(5, dtype="int64")) + idx = tm.box_expected(idx, box_with_array) + with pytest.raises(TypeError): + idx * idx + + # ------------------------------------------------------------------ + # Operations with numeric others + + def test_td64arr_mul_numeric_scalar(self, box_with_array, one): + # GH#4521 + # divide/multiply by integers + tdser = pd.Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + expected = Series(["-59 Days", "-59 Days", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = tdser * (-one) + tm.assert_equal(result, expected) + result = (-one) * tdser + tm.assert_equal(result, expected) + + expected = Series(["118 Days", "118 Days", "NaT"], dtype="timedelta64[ns]") + expected = tm.box_expected(expected, box_with_array) + + result = tdser * (2 * one) + tm.assert_equal(result, expected) + result = (2 * one) * tdser + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("two", [2, 2.0, np.array(2), np.array(2.0)]) + def test_td64arr_div_numeric_scalar(self, box_with_array, two): + # GH#4521 + # divide/multiply by integers + tdser = pd.Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + expected = Series(["29.5D", "29.5D", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + expected = tm.box_expected(expected, box_with_array) + + result = tdser / two + tm.assert_equal(result, expected) + + with pytest.raises(TypeError, match="Cannot divide"): + two / tdser + + @pytest.mark.parametrize( + "vector", + [np.array([20, 30, 40]), pd.Index([20, 30, 40]), Series([20, 30, 40])], + ids=lambda x: type(x).__name__, + ) + def test_td64arr_rmul_numeric_array(self, box_with_array, vector, any_real_dtype): + # GH#4521 + # divide/multiply by integers + xbox = get_upcast_box(box_with_array, vector) + + tdser = pd.Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + vector = vector.astype(any_real_dtype) + + expected = Series(["1180 Days", "1770 Days", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + expected = tm.box_expected(expected, xbox) + + result = tdser * vector + tm.assert_equal(result, expected) + + result = vector * tdser + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( + "vector", + [np.array([20, 30, 40]), pd.Index([20, 30, 40]), Series([20, 30, 40])], + ids=lambda x: type(x).__name__, + ) + def test_td64arr_div_numeric_array(self, box_with_array, vector, any_real_dtype): + # GH#4521 + # divide/multiply by integers + xbox = get_upcast_box(box_with_array, vector) + + tdser = pd.Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]") + vector = vector.astype(any_real_dtype) + + expected = Series(["2.95D", "1D 23H 12m", "NaT"], dtype="timedelta64[ns]") + + tdser = tm.box_expected(tdser, box_with_array) + expected = tm.box_expected(expected, xbox) + + result = tdser / vector + tm.assert_equal(result, expected) + + pattern = ( + "true_divide cannot use operands|" + "cannot perform __div__|" + "cannot perform __truediv__|" + "unsupported operand|" + "Cannot divide" + ) + with pytest.raises(TypeError, match=pattern): + vector / tdser + + if not isinstance(vector, pd.Index): + # Index.__rdiv__ won't try to operate elementwise, just raises + result = tdser / vector.astype(object) + if box_with_array is pd.DataFrame: + expected = [tdser.iloc[0, n] / vector[n] for n in range(len(vector))] + else: + expected = [tdser[n] / vector[n] for n in range(len(tdser))] + expected = tm.box_expected(expected, xbox) + tm.assert_equal(result, expected) + + with pytest.raises(TypeError, match=pattern): + vector.astype(object) / tdser + + @pytest.mark.parametrize( + "names", + [ + (None, None, None), + ("Egon", "Venkman", None), + ("NCC1701D", "NCC1701D", "NCC1701D"), + ], + ) + def test_td64arr_mul_int_series(self, box_df_fail, names): + # GH#19042 test for correct name attachment + box = box_df_fail # broadcasts along wrong axis, but doesn't raise + exname = names[2] if box is not tm.to_array else names[1] + + tdi = TimedeltaIndex( + ["0days", "1day", "2days", "3days", "4days"], name=names[0] + ) + # TODO: Should we be parametrizing over types for `ser` too? + ser = Series([0, 1, 2, 3, 4], dtype=np.int64, name=names[1]) + + expected = Series( + ["0days", "1day", "4days", "9days", "16days"], + dtype="timedelta64[ns]", + name=exname, + ) + + tdi = tm.box_expected(tdi, box) + box = Series if (box is pd.Index or box is tm.to_array) else box + expected = tm.box_expected(expected, box) + + result = ser * tdi + tm.assert_equal(result, expected) + + # The direct operation tdi * ser still needs to be fixed. + result = ser.__rmul__(tdi) + tm.assert_equal(result, expected) + + # TODO: Should we be parametrizing over types for `ser` too? + @pytest.mark.parametrize( + "names", + [ + (None, None, None), + ("Egon", "Venkman", None), + ("NCC1701D", "NCC1701D", "NCC1701D"), + ], + ) + def test_float_series_rdiv_td64arr(self, box_with_array, names): + # GH#19042 test for correct name attachment + # TODO: the direct operation TimedeltaIndex / Series still + # needs to be fixed. + box = box_with_array + tdi = TimedeltaIndex( + ["0days", "1day", "2days", "3days", "4days"], name=names[0] + ) + ser = Series([1.5, 3, 4.5, 6, 7.5], dtype=np.float64, name=names[1]) + + xname = names[2] if box is not tm.to_array else names[1] + expected = Series( + [tdi[n] / ser[n] for n in range(len(ser))], + dtype="timedelta64[ns]", + name=xname, + ) + + xbox = box + if box in [pd.Index, tm.to_array] and type(ser) is Series: + xbox = Series + + tdi = tm.box_expected(tdi, box) + expected = tm.box_expected(expected, xbox) + + result = ser.__rdiv__(tdi) + if box is pd.DataFrame: + # TODO: Should we skip this case sooner or test something else? + assert result is NotImplemented + else: + tm.assert_equal(result, expected) + + +class TestTimedelta64ArrayLikeArithmetic: + # Arithmetic tests for timedelta64[ns] vectors fully parametrized over + # DataFrame/Series/TimedeltaIndex/TimedeltaArray. Ideally all arithmetic + # tests will eventually end up here. + + def test_td64arr_pow_invalid(self, scalar_td, box_with_array): + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) + td1.iloc[2] = np.nan + + td1 = tm.box_expected(td1, box_with_array) + + # check that we are getting a TypeError + # with 'operate' (from core/ops.py) for the ops that are not + # defined + pattern = "operate|unsupported|cannot|not supported" + with pytest.raises(TypeError, match=pattern): + scalar_td ** td1 + + with pytest.raises(TypeError, match=pattern): + td1 ** scalar_td diff --git a/pandas/tests/arrays/__init__.py b/pandas/tests/arrays/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/categorical/__init__.py b/pandas/tests/arrays/categorical/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/categorical/common.py b/pandas/tests/arrays/categorical/common.py new file mode 100644 index 00000000..4ef93906 --- /dev/null +++ b/pandas/tests/arrays/categorical/common.py @@ -0,0 +1,8 @@ +from pandas import Categorical + + +class TestCategorical: + def setup_method(self, method): + self.factor = Categorical( + ["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True + ) diff --git a/pandas/tests/arrays/categorical/conftest.py b/pandas/tests/arrays/categorical/conftest.py new file mode 100644 index 00000000..640f5dfd --- /dev/null +++ b/pandas/tests/arrays/categorical/conftest.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.fixture(params=[True, False]) +def allow_fill(request): + """Boolean 'allow_fill' parameter for Categorical.take""" + return request.param diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py new file mode 100644 index 00000000..50ad27ce --- /dev/null +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -0,0 +1,200 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize("ordered", [True, False]) +@pytest.mark.parametrize("categories", [["b", "a", "c"], ["a", "b", "c", "d"]]) +def test_factorize(categories, ordered): + cat = pd.Categorical( + ["b", "b", "a", "c", None], categories=categories, ordered=ordered + ) + codes, uniques = pd.factorize(cat) + expected_codes = np.array([0, 0, 1, 2, -1], dtype=np.intp) + expected_uniques = pd.Categorical( + ["b", "a", "c"], categories=categories, ordered=ordered + ) + + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_categorical_equal(uniques, expected_uniques) + + +def test_factorized_sort(): + cat = pd.Categorical(["b", "b", None, "a"]) + codes, uniques = pd.factorize(cat, sort=True) + expected_codes = np.array([1, 1, -1, 0], dtype=np.intp) + expected_uniques = pd.Categorical(["a", "b"]) + + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_categorical_equal(uniques, expected_uniques) + + +def test_factorized_sort_ordered(): + cat = pd.Categorical( + ["b", "b", None, "a"], categories=["c", "b", "a"], ordered=True + ) + + codes, uniques = pd.factorize(cat, sort=True) + expected_codes = np.array([0, 0, -1, 1], dtype=np.intp) + expected_uniques = pd.Categorical( + ["b", "a"], categories=["c", "b", "a"], ordered=True + ) + + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_categorical_equal(uniques, expected_uniques) + + +def test_isin_cats(): + # GH2003 + cat = pd.Categorical(["a", "b", np.nan]) + + result = cat.isin(["a", np.nan]) + expected = np.array([True, False, True], dtype=bool) + tm.assert_numpy_array_equal(expected, result) + + result = cat.isin(["a", "c"]) + expected = np.array([True, False, False], dtype=bool) + tm.assert_numpy_array_equal(expected, result) + + +@pytest.mark.parametrize( + "to_replace, value, result, expected_error_msg", + [ + ("b", "c", ["a", "c"], "Categorical.categories are different"), + ("c", "d", ["a", "b"], None), + # https://github.com/pandas-dev/pandas/issues/33288 + ("a", "a", ["a", "b"], None), + ("b", None, ["a", None], "Categorical.categories length are different"), + ], +) +def test_replace(to_replace, value, result, expected_error_msg): + # GH 26988 + cat = pd.Categorical(["a", "b"]) + expected = pd.Categorical(result) + result = cat.replace(to_replace, value) + tm.assert_categorical_equal(result, expected) + if to_replace == "b": # the "c" test is supposed to be unchanged + with pytest.raises(AssertionError, match=expected_error_msg): + # ensure non-inplace call does not affect original + tm.assert_categorical_equal(cat, expected) + cat.replace(to_replace, value, inplace=True) + tm.assert_categorical_equal(cat, expected) + + +@pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])]) +def test_isin_empty(empty): + s = pd.Categorical(["a", "b"]) + expected = np.array([False, False], dtype=bool) + + result = s.isin(empty) + tm.assert_numpy_array_equal(expected, result) + + +def test_diff(): + s = pd.Series([1, 2, 3], dtype="category") + with tm.assert_produces_warning(FutureWarning): + result = s.diff() + expected = pd.Series([np.nan, 1, 1]) + tm.assert_series_equal(result, expected) + + expected = expected.to_frame(name="A") + df = s.to_frame(name="A") + with tm.assert_produces_warning(FutureWarning): + result = df.diff() + + tm.assert_frame_equal(result, expected) + + +class TestTake: + # https://github.com/pandas-dev/pandas/issues/20664 + + def test_take_default_allow_fill(self): + cat = pd.Categorical(["a", "b"]) + with tm.assert_produces_warning(None): + result = cat.take([0, -1]) + + assert result.equals(cat) + + def test_take_positive_no_warning(self): + cat = pd.Categorical(["a", "b"]) + with tm.assert_produces_warning(None): + cat.take([0, 0]) + + def test_take_bounds(self, allow_fill): + # https://github.com/pandas-dev/pandas/issues/20664 + cat = pd.Categorical(["a", "b", "a"]) + if allow_fill: + msg = "indices are out-of-bounds" + else: + msg = "index 4 is out of bounds for( axis 0 with)? size 3" + with pytest.raises(IndexError, match=msg): + cat.take([4, 5], allow_fill=allow_fill) + + def test_take_empty(self, allow_fill): + # https://github.com/pandas-dev/pandas/issues/20664 + cat = pd.Categorical([], categories=["a", "b"]) + if allow_fill: + msg = "indices are out-of-bounds" + else: + msg = "cannot do a non-empty take from an empty axes" + with pytest.raises(IndexError, match=msg): + cat.take([0], allow_fill=allow_fill) + + def test_positional_take(self, ordered_fixture): + cat = pd.Categorical( + ["a", "a", "b", "b"], categories=["b", "a"], ordered=ordered_fixture + ) + result = cat.take([0, 1, 2], allow_fill=False) + expected = pd.Categorical( + ["a", "a", "b"], categories=cat.categories, ordered=ordered_fixture + ) + tm.assert_categorical_equal(result, expected) + + def test_positional_take_unobserved(self, ordered_fixture): + cat = pd.Categorical( + ["a", "b"], categories=["a", "b", "c"], ordered=ordered_fixture + ) + result = cat.take([1, 0], allow_fill=False) + expected = pd.Categorical( + ["b", "a"], categories=cat.categories, ordered=ordered_fixture + ) + tm.assert_categorical_equal(result, expected) + + def test_take_allow_fill(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = pd.Categorical(["a", "a", "b"]) + result = cat.take([0, -1, -1], allow_fill=True) + expected = pd.Categorical(["a", np.nan, np.nan], categories=["a", "b"]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_with_negative_one(self): + # -1 was a category + cat = pd.Categorical([-1, 0, 1]) + result = cat.take([0, -1, 1], allow_fill=True, fill_value=-1) + expected = pd.Categorical([-1, -1, 0], categories=[-1, 0, 1]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_value(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = pd.Categorical(["a", "b", "c"]) + result = cat.take([0, 1, -1], fill_value="a", allow_fill=True) + expected = pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + def test_take_fill_value_new_raises(self): + # https://github.com/pandas-dev/pandas/issues/23296 + cat = pd.Categorical(["a", "b", "c"]) + xpr = r"'fill_value' \('d'\) is not in this Categorical's categories." + with pytest.raises(TypeError, match=xpr): + cat.take([0, 1, -1], fill_value="d", allow_fill=True) + + def test_take_nd_deprecated(self): + cat = pd.Categorical(["a", "b", "c"]) + with tm.assert_produces_warning(FutureWarning): + cat.take_nd([0, 1]) + + ci = pd.Index(cat) + with tm.assert_produces_warning(FutureWarning): + ci.take_nd([0, 1]) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py new file mode 100644 index 00000000..1fca0fa5 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -0,0 +1,380 @@ +import re +import sys + +import numpy as np +import pytest + +from pandas.compat import PYPY + +from pandas import Categorical, Index, NaT, Series, date_range +import pandas._testing as tm +from pandas.api.types import is_scalar + + +class TestCategoricalAnalytics: + @pytest.mark.parametrize("aggregation", ["min", "max"]) + def test_min_max_not_ordered_raises(self, aggregation): + # unordered cats have no min/max + cat = Categorical(["a", "b", "c", "d"], ordered=False) + msg = "Categorical is not ordered for operation {}" + agg_func = getattr(cat, aggregation) + + with pytest.raises(TypeError, match=msg.format(aggregation)): + agg_func() + + def test_min_max_ordered(self): + cat = Categorical(["a", "b", "c", "d"], ordered=True) + _min = cat.min() + _max = cat.max() + assert _min == "a" + assert _max == "d" + + cat = Categorical( + ["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True + ) + _min = cat.min() + _max = cat.max() + assert _min == "d" + assert _max == "a" + + @pytest.mark.parametrize( + "categories,expected", + [ + (list("ABC"), np.NaN), + ([1, 2, 3], np.NaN), + pytest.param( + Series(date_range("2020-01-01", periods=3), dtype="category"), + NaT, + marks=pytest.mark.xfail( + reason="https://github.com/pandas-dev/pandas/issues/29962" + ), + ), + ], + ) + @pytest.mark.parametrize("aggregation", ["min", "max"]) + def test_min_max_ordered_empty(self, categories, expected, aggregation): + # GH 30227 + cat = Categorical([], categories=list("ABC"), ordered=True) + + agg_func = getattr(cat, aggregation) + result = agg_func() + assert result is expected + + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_with_nan(self, skipna): + # GH 25303 + cat = Categorical( + [np.nan, "b", "c", np.nan], categories=["d", "c", "b", "a"], ordered=True + ) + _min = cat.min(skipna=skipna) + _max = cat.max(skipna=skipna) + + if skipna is False: + assert np.isnan(_min) + assert np.isnan(_max) + else: + assert _min == "c" + assert _max == "b" + + cat = Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True + ) + _min = cat.min(skipna=skipna) + _max = cat.max(skipna=skipna) + + if skipna is False: + assert np.isnan(_min) + assert np.isnan(_max) + else: + assert _min == 2 + assert _max == 1 + + @pytest.mark.parametrize("function", ["min", "max"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_only_nan(self, function, skipna): + # https://github.com/pandas-dev/pandas/issues/33450 + cat = Categorical([np.nan], categories=[1, 2], ordered=True) + result = getattr(cat, function)(skipna=skipna) + assert result is np.nan + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_deprecate_numeric_only_min_max(self, method): + # GH 25303 + cat = Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True + ) + with tm.assert_produces_warning(expected_warning=FutureWarning): + getattr(cat, method)(numeric_only=True) + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_numpy_min_max_raises(self, method): + cat = Categorical(["a", "b", "c", "b"], ordered=False) + msg = ( + f"Categorical is not ordered for operation {method}\n" + "you can use .as_ordered() to change the Categorical to an ordered one" + ) + method = getattr(np, method) + with pytest.raises(TypeError, match=re.escape(msg)): + method(cat) + + @pytest.mark.parametrize("kwarg", ["axis", "out", "keepdims"]) + @pytest.mark.parametrize("method", ["min", "max"]) + def test_numpy_min_max_unsupported_kwargs_raises(self, method, kwarg): + cat = Categorical(["a", "b", "c", "b"], ordered=True) + msg = ( + f"the '{kwarg}' parameter is not supported in the pandas implementation " + f"of {method}" + ) + kwargs = {kwarg: 42} + method = getattr(np, method) + with pytest.raises(ValueError, match=msg): + method(cat, **kwargs) + + @pytest.mark.parametrize("method, expected", [("min", "a"), ("max", "c")]) + def test_numpy_min_max_axis_equals_none(self, method, expected): + cat = Categorical(["a", "b", "c", "b"], ordered=True) + method = getattr(np, method) + result = method(cat, axis=None) + assert result == expected + + @pytest.mark.parametrize( + "values,categories,exp_mode", + [ + ([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]), + ([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]), + ([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]), + ([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]), + ([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]), + ([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]), + ], + ) + def test_mode(self, values, categories, exp_mode): + s = Categorical(values, categories=categories, ordered=True) + res = s.mode() + exp = Categorical(exp_mode, categories=categories, ordered=True) + tm.assert_categorical_equal(res, exp) + + def test_searchsorted(self, ordered_fixture): + # https://github.com/pandas-dev/pandas/issues/8420 + # https://github.com/pandas-dev/pandas/issues/14522 + + cat = Categorical( + ["cheese", "milk", "apple", "bread", "bread"], + categories=["cheese", "milk", "apple", "bread"], + ordered=ordered_fixture, + ) + ser = Series(cat) + + # Searching for single item argument, side='left' (default) + res_cat = cat.searchsorted("apple") + assert res_cat == 2 + assert is_scalar(res_cat) + + res_ser = ser.searchsorted("apple") + assert res_ser == 2 + assert is_scalar(res_ser) + + # Searching for single item array, side='left' (default) + res_cat = cat.searchsorted(["bread"]) + res_ser = ser.searchsorted(["bread"]) + exp = np.array([3], dtype=np.intp) + tm.assert_numpy_array_equal(res_cat, exp) + tm.assert_numpy_array_equal(res_ser, exp) + + # Searching for several items array, side='right' + res_cat = cat.searchsorted(["apple", "bread"], side="right") + res_ser = ser.searchsorted(["apple", "bread"], side="right") + exp = np.array([3, 5], dtype=np.intp) + tm.assert_numpy_array_equal(res_cat, exp) + tm.assert_numpy_array_equal(res_ser, exp) + + # Searching for a single value that is not from the Categorical + with pytest.raises(KeyError, match="cucumber"): + cat.searchsorted("cucumber") + with pytest.raises(KeyError, match="cucumber"): + ser.searchsorted("cucumber") + + # Searching for multiple values one of each is not from the Categorical + with pytest.raises(KeyError, match="cucumber"): + cat.searchsorted(["bread", "cucumber"]) + with pytest.raises(KeyError, match="cucumber"): + ser.searchsorted(["bread", "cucumber"]) + + def test_unique(self): + # categories are reordered based on value when ordered=False + cat = Categorical(["a", "b"]) + exp = Index(["a", "b"]) + res = cat.unique() + tm.assert_index_equal(res.categories, exp) + tm.assert_categorical_equal(res, cat) + + cat = Categorical(["a", "b", "a", "a"], categories=["a", "b", "c"]) + res = cat.unique() + tm.assert_index_equal(res.categories, exp) + tm.assert_categorical_equal(res, Categorical(exp)) + + cat = Categorical(["c", "a", "b", "a", "a"], categories=["a", "b", "c"]) + exp = Index(["c", "a", "b"]) + res = cat.unique() + tm.assert_index_equal(res.categories, exp) + exp_cat = Categorical(exp, categories=["c", "a", "b"]) + tm.assert_categorical_equal(res, exp_cat) + + # nan must be removed + cat = Categorical(["b", np.nan, "b", np.nan, "a"], categories=["a", "b", "c"]) + res = cat.unique() + exp = Index(["b", "a"]) + tm.assert_index_equal(res.categories, exp) + exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a"]) + tm.assert_categorical_equal(res, exp_cat) + + def test_unique_ordered(self): + # keep categories order when ordered=True + cat = Categorical(["b", "a", "b"], categories=["a", "b"], ordered=True) + res = cat.unique() + exp_cat = Categorical(["b", "a"], categories=["a", "b"], ordered=True) + tm.assert_categorical_equal(res, exp_cat) + + cat = Categorical( + ["c", "b", "a", "a"], categories=["a", "b", "c"], ordered=True + ) + res = cat.unique() + exp_cat = Categorical(["c", "b", "a"], categories=["a", "b", "c"], ordered=True) + tm.assert_categorical_equal(res, exp_cat) + + cat = Categorical(["b", "a", "a"], categories=["a", "b", "c"], ordered=True) + res = cat.unique() + exp_cat = Categorical(["b", "a"], categories=["a", "b"], ordered=True) + tm.assert_categorical_equal(res, exp_cat) + + cat = Categorical( + ["b", "b", np.nan, "a"], categories=["a", "b", "c"], ordered=True + ) + res = cat.unique() + exp_cat = Categorical(["b", np.nan, "a"], categories=["a", "b"], ordered=True) + tm.assert_categorical_equal(res, exp_cat) + + def test_unique_index_series(self): + c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1]) + # Categorical.unique sorts categories by appearance order + # if ordered=False + exp = Categorical([3, 1, 2], categories=[3, 1, 2]) + tm.assert_categorical_equal(c.unique(), exp) + + tm.assert_index_equal(Index(c).unique(), Index(exp)) + tm.assert_categorical_equal(Series(c).unique(), exp) + + c = Categorical([1, 1, 2, 2], categories=[3, 2, 1]) + exp = Categorical([1, 2], categories=[1, 2]) + tm.assert_categorical_equal(c.unique(), exp) + tm.assert_index_equal(Index(c).unique(), Index(exp)) + tm.assert_categorical_equal(Series(c).unique(), exp) + + c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1], ordered=True) + # Categorical.unique keeps categories order if ordered=True + exp = Categorical([3, 1, 2], categories=[3, 2, 1], ordered=True) + tm.assert_categorical_equal(c.unique(), exp) + + tm.assert_index_equal(Index(c).unique(), Index(exp)) + tm.assert_categorical_equal(Series(c).unique(), exp) + + def test_shift(self): + # GH 9416 + cat = Categorical(["a", "b", "c", "d", "a"]) + + # shift forward + sp1 = cat.shift(1) + xp1 = Categorical([np.nan, "a", "b", "c", "d"]) + tm.assert_categorical_equal(sp1, xp1) + tm.assert_categorical_equal(cat[:-1], sp1[1:]) + + # shift back + sn2 = cat.shift(-2) + xp2 = Categorical( + ["c", "d", "a", np.nan, np.nan], categories=["a", "b", "c", "d"] + ) + tm.assert_categorical_equal(sn2, xp2) + tm.assert_categorical_equal(cat[2:], sn2[:-2]) + + # shift by zero + tm.assert_categorical_equal(cat, cat.shift(0)) + + def test_nbytes(self): + cat = Categorical([1, 2, 3]) + exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories + assert cat.nbytes == exp + + def test_memory_usage(self): + cat = Categorical([1, 2, 3]) + + # .categories is an index, so we include the hashtable + assert 0 < cat.nbytes <= cat.memory_usage() + assert 0 < cat.nbytes <= cat.memory_usage(deep=True) + + cat = Categorical(["foo", "foo", "bar"]) + assert cat.memory_usage(deep=True) > cat.nbytes + + if not PYPY: + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) + assert abs(diff) < 100 + + def test_map(self): + c = Categorical(list("ABABC"), categories=list("CBA"), ordered=True) + result = c.map(lambda x: x.lower()) + exp = Categorical(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_categorical_equal(result, exp) + + c = Categorical(list("ABABC"), categories=list("ABC"), ordered=False) + result = c.map(lambda x: x.lower()) + exp = Categorical(list("ababc"), categories=list("abc"), ordered=False) + tm.assert_categorical_equal(result, exp) + + result = c.map(lambda x: 1) + # GH 12766: Return an index not an array + tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64))) + + @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) + def test_validate_inplace_raises(self, value): + cat = Categorical(["A", "B", "B", "C", "A"]) + msg = ( + 'For argument "inplace" expected type bool, ' + f"received type {type(value).__name__}" + ) + with pytest.raises(ValueError, match=msg): + cat.set_ordered(value=True, inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.as_ordered(inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.as_unordered(inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.set_categories(["X", "Y", "Z"], rename=True, inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.rename_categories(["X", "Y", "Z"], inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.reorder_categories(["X", "Y", "Z"], ordered=True, inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.add_categories(new_categories=["D", "E", "F"], inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.remove_categories(removals=["D", "E", "F"], inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.remove_unused_categories(inplace=value) + + with pytest.raises(ValueError, match=msg): + cat.sort_values(inplace=value) + + def test_isna(self): + exp = np.array([False, False, True]) + c = Categorical(["a", "b", np.nan]) + res = c.isna() + + tm.assert_numpy_array_equal(res, exp) diff --git a/pandas/tests/arrays/categorical/test_api.py b/pandas/tests/arrays/categorical/test_api.py new file mode 100644 index 00000000..f49f70f5 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_api.py @@ -0,0 +1,511 @@ +import re + +import numpy as np +import pytest + +from pandas import Categorical, CategoricalIndex, DataFrame, Index, Series +import pandas._testing as tm +from pandas.core.arrays.categorical import _recode_for_categories +from pandas.tests.arrays.categorical.common import TestCategorical + + +class TestCategoricalAPI: + def test_ordered_api(self): + # GH 9347 + cat1 = Categorical(list("acb"), ordered=False) + tm.assert_index_equal(cat1.categories, Index(["a", "b", "c"])) + assert not cat1.ordered + + cat2 = Categorical(list("acb"), categories=list("bca"), ordered=False) + tm.assert_index_equal(cat2.categories, Index(["b", "c", "a"])) + assert not cat2.ordered + + cat3 = Categorical(list("acb"), ordered=True) + tm.assert_index_equal(cat3.categories, Index(["a", "b", "c"])) + assert cat3.ordered + + cat4 = Categorical(list("acb"), categories=list("bca"), ordered=True) + tm.assert_index_equal(cat4.categories, Index(["b", "c", "a"])) + assert cat4.ordered + + def test_set_ordered(self): + + cat = Categorical(["a", "b", "c", "a"], ordered=True) + cat2 = cat.as_unordered() + assert not cat2.ordered + cat2 = cat.as_ordered() + assert cat2.ordered + cat2.as_unordered(inplace=True) + assert not cat2.ordered + cat2.as_ordered(inplace=True) + assert cat2.ordered + + assert cat2.set_ordered(True).ordered + assert not cat2.set_ordered(False).ordered + cat2.set_ordered(True, inplace=True) + assert cat2.ordered + cat2.set_ordered(False, inplace=True) + assert not cat2.ordered + + # removed in 0.19.0 + msg = "can't set attribute" + with pytest.raises(AttributeError, match=msg): + cat.ordered = True + with pytest.raises(AttributeError, match=msg): + cat.ordered = False + + def test_rename_categories(self): + cat = Categorical(["a", "b", "c", "a"]) + + # inplace=False: the old one must not be changed + res = cat.rename_categories([1, 2, 3]) + tm.assert_numpy_array_equal( + res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64) + ) + tm.assert_index_equal(res.categories, Index([1, 2, 3])) + + exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(cat.__array__(), exp_cat) + + exp_cat = Index(["a", "b", "c"]) + tm.assert_index_equal(cat.categories, exp_cat) + + # GH18862 (let rename_categories take callables) + result = cat.rename_categories(lambda x: x.upper()) + expected = Categorical(["A", "B", "C", "A"]) + tm.assert_categorical_equal(result, expected) + + # and now inplace + res = cat.rename_categories([1, 2, 3], inplace=True) + assert res is None + tm.assert_numpy_array_equal( + cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64) + ) + tm.assert_index_equal(cat.categories, Index([1, 2, 3])) + + @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) + def test_rename_categories_wrong_length_raises(self, new_categories): + cat = Categorical(["a", "b", "c", "a"]) + msg = ( + "new categories need to have the same number of items as the " + "old categories!" + ) + with pytest.raises(ValueError, match=msg): + cat.rename_categories(new_categories) + + def test_rename_categories_series(self): + # https://github.com/pandas-dev/pandas/issues/17981 + c = Categorical(["a", "b"]) + result = c.rename_categories(Series([0, 1], index=["a", "b"])) + expected = Categorical([0, 1]) + tm.assert_categorical_equal(result, expected) + + def test_rename_categories_dict(self): + # GH 17336 + cat = Categorical(["a", "b", "c", "d"]) + res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1}) + expected = Index([4, 3, 2, 1]) + tm.assert_index_equal(res.categories, expected) + + # Test for inplace + res = cat.rename_categories({"a": 4, "b": 3, "c": 2, "d": 1}, inplace=True) + assert res is None + tm.assert_index_equal(cat.categories, expected) + + # Test for dicts of smaller length + cat = Categorical(["a", "b", "c", "d"]) + res = cat.rename_categories({"a": 1, "c": 3}) + + expected = Index([1, "b", 3, "d"]) + tm.assert_index_equal(res.categories, expected) + + # Test for dicts with bigger length + cat = Categorical(["a", "b", "c", "d"]) + res = cat.rename_categories({"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}) + expected = Index([1, 2, 3, 4]) + tm.assert_index_equal(res.categories, expected) + + # Test for dicts with no items from old categories + cat = Categorical(["a", "b", "c", "d"]) + res = cat.rename_categories({"f": 1, "g": 3}) + + expected = Index(["a", "b", "c", "d"]) + tm.assert_index_equal(res.categories, expected) + + def test_reorder_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical( + ["a", "b", "c", "a"], categories=["c", "b", "a"], ordered=True + ) + + # first inplace == False + res = cat.reorder_categories(["c", "b", "a"]) + # cat must be the same as before + tm.assert_categorical_equal(cat, old) + # only res is changed + tm.assert_categorical_equal(res, new) + + # inplace == True + res = cat.reorder_categories(["c", "b", "a"], inplace=True) + assert res is None + tm.assert_categorical_equal(cat, new) + + @pytest.mark.parametrize( + "new_categories", + [ + ["a"], # not all "old" included in "new" + ["a", "b", "d"], # still not all "old" in "new" + ["a", "b", "c", "d"], # all "old" included in "new", but too long + ], + ) + def test_reorder_categories_raises(self, new_categories): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + msg = "items in new_categories are not the same as in old categories" + with pytest.raises(ValueError, match=msg): + cat.reorder_categories(new_categories) + + def test_add_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical( + ["a", "b", "c", "a"], categories=["a", "b", "c", "d"], ordered=True + ) + + # first inplace == False + res = cat.add_categories("d") + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + res = cat.add_categories(["d"]) + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + # inplace == True + res = cat.add_categories("d", inplace=True) + tm.assert_categorical_equal(cat, new) + assert res is None + + # GH 9927 + cat = Categorical(list("abc"), ordered=True) + expected = Categorical(list("abc"), categories=list("abcde"), ordered=True) + # test with Series, np.array, index, list + res = cat.add_categories(Series(["d", "e"])) + tm.assert_categorical_equal(res, expected) + res = cat.add_categories(np.array(["d", "e"])) + tm.assert_categorical_equal(res, expected) + res = cat.add_categories(Index(["d", "e"])) + tm.assert_categorical_equal(res, expected) + res = cat.add_categories(["d", "e"]) + tm.assert_categorical_equal(res, expected) + + def test_add_categories_existing_raises(self): + # new is in old categories + cat = Categorical(["a", "b", "c", "d"], ordered=True) + msg = re.escape("new categories must not include old categories: {'d'}") + with pytest.raises(ValueError, match=msg): + cat.add_categories(["d"]) + + def test_set_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + exp_categories = Index(["c", "b", "a"]) + exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) + + res = cat.set_categories(["c", "b", "a"], inplace=True) + tm.assert_index_equal(cat.categories, exp_categories) + tm.assert_numpy_array_equal(cat.__array__(), exp_values) + assert res is None + + res = cat.set_categories(["a", "b", "c"]) + # cat must be the same as before + tm.assert_index_equal(cat.categories, exp_categories) + tm.assert_numpy_array_equal(cat.__array__(), exp_values) + # only res is changed + exp_categories_back = Index(["a", "b", "c"]) + tm.assert_index_equal(res.categories, exp_categories_back) + tm.assert_numpy_array_equal(res.__array__(), exp_values) + + # not all "old" included in "new" -> all not included ones are now + # np.nan + cat = Categorical(["a", "b", "c", "a"], ordered=True) + res = cat.set_categories(["a"]) + tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0], dtype=np.int8)) + + # still not all "old" in "new" + res = cat.set_categories(["a", "b", "d"]) + tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0], dtype=np.int8)) + tm.assert_index_equal(res.categories, Index(["a", "b", "d"])) + + # all "old" included in "new" + cat = cat.set_categories(["a", "b", "c", "d"]) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_index_equal(cat.categories, exp_categories) + + # internals... + c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True) + tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0], dtype=np.int8)) + tm.assert_index_equal(c.categories, Index([1, 2, 3, 4])) + + exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) + tm.assert_numpy_array_equal(c.to_dense(), exp) + + # all "pointers" to '4' must be changed from 3 to 0,... + c = c.set_categories([4, 3, 2, 1]) + + # positions are changed + tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3], dtype=np.int8)) + + # categories are now in new order + tm.assert_index_equal(c.categories, Index([4, 3, 2, 1])) + + # output is the same + exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) + tm.assert_numpy_array_equal(c.to_dense(), exp) + assert c.min() == 4 + assert c.max() == 1 + + # set_categories should set the ordering if specified + c2 = c.set_categories([4, 3, 2, 1], ordered=False) + assert not c2.ordered + + tm.assert_numpy_array_equal(c.to_dense(), c2.to_dense()) + + # set_categories should pass thru the ordering + c2 = c.set_ordered(False).set_categories([4, 3, 2, 1]) + assert not c2.ordered + + tm.assert_numpy_array_equal(c.to_dense(), c2.to_dense()) + + @pytest.mark.parametrize( + "values, categories, new_categories", + [ + # No NaNs, same cats, same order + (["a", "b", "a"], ["a", "b"], ["a", "b"]), + # No NaNs, same cats, different order + (["a", "b", "a"], ["a", "b"], ["b", "a"]), + # Same, unsorted + (["b", "a", "a"], ["a", "b"], ["a", "b"]), + # No NaNs, same cats, different order + (["b", "a", "a"], ["a", "b"], ["b", "a"]), + # NaNs + (["a", "b", "c"], ["a", "b"], ["a", "b"]), + (["a", "b", "c"], ["a", "b"], ["b", "a"]), + (["b", "a", "c"], ["a", "b"], ["a", "b"]), + (["b", "a", "c"], ["a", "b"], ["a", "b"]), + # Introduce NaNs + (["a", "b", "c"], ["a", "b"], ["a"]), + (["a", "b", "c"], ["a", "b"], ["b"]), + (["b", "a", "c"], ["a", "b"], ["a"]), + (["b", "a", "c"], ["a", "b"], ["a"]), + # No overlap + (["a", "b", "c"], ["a", "b"], ["d", "e"]), + ], + ) + @pytest.mark.parametrize("ordered", [True, False]) + def test_set_categories_many(self, values, categories, new_categories, ordered): + c = Categorical(values, categories) + expected = Categorical(values, new_categories, ordered) + result = c.set_categories(new_categories, ordered=ordered) + tm.assert_categorical_equal(result, expected) + + def test_set_categories_rename_less(self): + # GH 24675 + cat = Categorical(["A", "B"]) + result = cat.set_categories(["A"], rename=True) + expected = Categorical(["A", np.nan]) + tm.assert_categorical_equal(result, expected) + + def test_set_categories_private(self): + cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"]) + cat._set_categories(["a", "c", "d", "e"]) + expected = Categorical(["a", "c", "d"], categories=list("acde")) + tm.assert_categorical_equal(cat, expected) + + # fastpath + cat = Categorical(["a", "b", "c"], categories=["a", "b", "c", "d"]) + cat._set_categories(["a", "c", "d", "e"], fastpath=True) + expected = Categorical(["a", "c", "d"], categories=list("acde")) + tm.assert_categorical_equal(cat, expected) + + def test_remove_categories(self): + cat = Categorical(["a", "b", "c", "a"], ordered=True) + old = cat.copy() + new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], ordered=True) + + # first inplace == False + res = cat.remove_categories("c") + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + res = cat.remove_categories(["c"]) + tm.assert_categorical_equal(cat, old) + tm.assert_categorical_equal(res, new) + + # inplace == True + res = cat.remove_categories("c", inplace=True) + tm.assert_categorical_equal(cat, new) + assert res is None + + @pytest.mark.parametrize("removals", [["c"], ["c", np.nan], "c", ["c", "c"]]) + def test_remove_categories_raises(self, removals): + cat = Categorical(["a", "b", "a"]) + message = re.escape("removals must all be in old categories: {'c'}") + + with pytest.raises(ValueError, match=message): + cat.remove_categories(removals) + + def test_remove_unused_categories(self): + c = Categorical(["a", "b", "c", "d", "a"], categories=["a", "b", "c", "d", "e"]) + exp_categories_all = Index(["a", "b", "c", "d", "e"]) + exp_categories_dropped = Index(["a", "b", "c", "d"]) + + tm.assert_index_equal(c.categories, exp_categories_all) + + res = c.remove_unused_categories() + tm.assert_index_equal(res.categories, exp_categories_dropped) + tm.assert_index_equal(c.categories, exp_categories_all) + + res = c.remove_unused_categories(inplace=True) + tm.assert_index_equal(c.categories, exp_categories_dropped) + assert res is None + + # with NaN values (GH11599) + c = Categorical(["a", "b", "c", np.nan], categories=["a", "b", "c", "d", "e"]) + res = c.remove_unused_categories() + tm.assert_index_equal(res.categories, Index(np.array(["a", "b", "c"]))) + exp_codes = np.array([0, 1, 2, -1], dtype=np.int8) + tm.assert_numpy_array_equal(res.codes, exp_codes) + tm.assert_index_equal(c.categories, exp_categories_all) + + val = ["F", np.nan, "D", "B", "D", "F", np.nan] + cat = Categorical(values=val, categories=list("ABCDEFG")) + out = cat.remove_unused_categories() + tm.assert_index_equal(out.categories, Index(["B", "D", "F"])) + exp_codes = np.array([2, -1, 1, 0, 1, 2, -1], dtype=np.int8) + tm.assert_numpy_array_equal(out.codes, exp_codes) + assert out.tolist() == val + + alpha = list("abcdefghijklmnopqrstuvwxyz") + val = np.random.choice(alpha[::2], 10000).astype("object") + val[np.random.choice(len(val), 100)] = np.nan + + cat = Categorical(values=val, categories=alpha) + out = cat.remove_unused_categories() + assert out.tolist() == val.tolist() + + +class TestCategoricalAPIWithFactor(TestCategorical): + def test_describe(self): + # string type + desc = self.factor.describe() + assert self.factor.ordered + exp_index = CategoricalIndex( + ["a", "b", "c"], name="categories", ordered=self.factor.ordered + ) + expected = DataFrame( + {"counts": [3, 2, 3], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0]}, index=exp_index + ) + tm.assert_frame_equal(desc, expected) + + # check unused categories + cat = self.factor.copy() + cat.set_categories(["a", "b", "c", "d"], inplace=True) + desc = cat.describe() + + exp_index = CategoricalIndex( + list("abcd"), ordered=self.factor.ordered, name="categories" + ) + expected = DataFrame( + {"counts": [3, 2, 3, 0], "freqs": [3 / 8.0, 2 / 8.0, 3 / 8.0, 0]}, + index=exp_index, + ) + tm.assert_frame_equal(desc, expected) + + # check an integer one + cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1]) + desc = cat.describe() + exp_index = CategoricalIndex([1, 2, 3], ordered=cat.ordered, name="categories") + expected = DataFrame( + {"counts": [5, 3, 3], "freqs": [5 / 11.0, 3 / 11.0, 3 / 11.0]}, + index=exp_index, + ) + tm.assert_frame_equal(desc, expected) + + # https://github.com/pandas-dev/pandas/issues/3678 + # describe should work with NaN + cat = Categorical([np.nan, 1, 2, 2]) + desc = cat.describe() + expected = DataFrame( + {"counts": [1, 2, 1], "freqs": [1 / 4.0, 2 / 4.0, 1 / 4.0]}, + index=CategoricalIndex( + [1, 2, np.nan], categories=[1, 2], name="categories" + ), + ) + tm.assert_frame_equal(desc, expected) + + def test_set_categories_inplace(self): + cat = self.factor.copy() + cat.set_categories(["a", "b", "c", "d"], inplace=True) + tm.assert_index_equal(cat.categories, Index(["a", "b", "c", "d"])) + + +class TestPrivateCategoricalAPI: + def test_codes_immutable(self): + + # Codes should be read only + c = Categorical(["a", "b", "c", "a", np.nan]) + exp = np.array([0, 1, 2, 0, -1], dtype="int8") + tm.assert_numpy_array_equal(c.codes, exp) + + # Assignments to codes should raise + with pytest.raises(ValueError, match="cannot set Categorical codes directly"): + c.codes = np.array([0, 1, 2, 0, 1], dtype="int8") + + # changes in the codes array should raise + codes = c.codes + + with pytest.raises(ValueError, match="assignment destination is read-only"): + codes[4] = 1 + + # But even after getting the codes, the original array should still be + # writeable! + c[4] = "a" + exp = np.array([0, 1, 2, 0, 0], dtype="int8") + tm.assert_numpy_array_equal(c.codes, exp) + c._codes[4] = 2 + exp = np.array([0, 1, 2, 0, 2], dtype="int8") + tm.assert_numpy_array_equal(c.codes, exp) + + @pytest.mark.parametrize( + "codes, old, new, expected", + [ + ([0, 1], ["a", "b"], ["a", "b"], [0, 1]), + ([0, 1], ["b", "a"], ["b", "a"], [0, 1]), + ([0, 1], ["a", "b"], ["b", "a"], [1, 0]), + ([0, 1], ["b", "a"], ["a", "b"], [1, 0]), + ([0, 1, 0, 1], ["a", "b"], ["a", "b", "c"], [0, 1, 0, 1]), + ([0, 1, 2, 2], ["a", "b", "c"], ["a", "b"], [0, 1, -1, -1]), + ([0, 1, -1], ["a", "b", "c"], ["a", "b", "c"], [0, 1, -1]), + ([0, 1, -1], ["a", "b", "c"], ["b"], [-1, 0, -1]), + ([0, 1, -1], ["a", "b", "c"], ["d"], [-1, -1, -1]), + ([0, 1, -1], ["a", "b", "c"], [], [-1, -1, -1]), + ([-1, -1], [], ["a", "b"], [-1, -1]), + ([1, 0], ["b", "a"], ["a", "b"], [0, 1]), + ], + ) + def test_recode_to_categories(self, codes, old, new, expected): + codes = np.asanyarray(codes, dtype=np.int8) + expected = np.asanyarray(expected, dtype=np.int8) + old = Index(old) + new = Index(new) + result = _recode_for_categories(codes, old, new) + tm.assert_numpy_array_equal(result, expected) + + def test_recode_to_categories_large(self): + N = 1000 + codes = np.arange(N) + old = Index(codes) + expected = np.arange(N - 1, -1, -1, dtype=np.int16) + new = Index(expected) + result = _recode_for_categories(codes, old, new) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py new file mode 100644 index 00000000..d5537359 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -0,0 +1,644 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas.compat.numpy import _np_version_under1p16 + +from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DatetimeIndex, + Index, + Interval, + IntervalIndex, + MultiIndex, + NaT, + Series, + Timestamp, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +class TestCategoricalConstructors: + def test_validate_ordered(self): + # see gh-14058 + exp_msg = "'ordered' must either be 'True' or 'False'" + exp_err = TypeError + + # This should be a boolean. + ordered = np.array([0, 1, 2]) + + with pytest.raises(exp_err, match=exp_msg): + Categorical([1, 2, 3], ordered=ordered) + + with pytest.raises(exp_err, match=exp_msg): + Categorical.from_codes( + [0, 0, 1], categories=["a", "b", "c"], ordered=ordered + ) + + def test_constructor_empty(self): + # GH 17248 + c = Categorical([]) + expected = Index([]) + tm.assert_index_equal(c.categories, expected) + + c = Categorical([], categories=[1, 2, 3]) + expected = pd.Int64Index([1, 2, 3]) + tm.assert_index_equal(c.categories, expected) + + def test_constructor_empty_boolean(self): + # see gh-22702 + cat = pd.Categorical([], categories=[True, False]) + categories = sorted(cat.categories.tolist()) + assert categories == [False, True] + + def test_constructor_tuples(self): + values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object) + result = Categorical(values) + expected = Index([(1,), (1, 2)], tupleize_cols=False) + tm.assert_index_equal(result.categories, expected) + assert result.ordered is False + + def test_constructor_tuples_datetimes(self): + # numpy will auto reshape when all of the tuples are the + # same len, so add an extra one with 2 items and slice it off + values = np.array( + [ + (Timestamp("2010-01-01"),), + (Timestamp("2010-01-02"),), + (Timestamp("2010-01-01"),), + (Timestamp("2010-01-02"),), + ("a", "b"), + ], + dtype=object, + )[:-1] + result = Categorical(values) + expected = Index( + [(Timestamp("2010-01-01"),), (Timestamp("2010-01-02"),)], + tupleize_cols=False, + ) + tm.assert_index_equal(result.categories, expected) + + def test_constructor_unsortable(self): + + # it works! + arr = np.array([1, 2, 3, datetime.now()], dtype="O") + factor = Categorical(arr, ordered=False) + assert not factor.ordered + + # this however will raise as cannot be sorted + msg = ( + "'values' is not ordered, please explicitly specify the " + "categories order by passing in a categories argument." + ) + with pytest.raises(TypeError, match=msg): + Categorical(arr, ordered=True) + + def test_constructor_interval(self): + result = Categorical( + [Interval(1, 2), Interval(2, 3), Interval(3, 6)], ordered=True + ) + ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)]) + exp = Categorical(ii, ordered=True) + tm.assert_categorical_equal(result, exp) + tm.assert_index_equal(result.categories, ii) + + def test_constructor(self): + + exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_) + c1 = Categorical(exp_arr) + tm.assert_numpy_array_equal(c1.__array__(), exp_arr) + c2 = Categorical(exp_arr, categories=["a", "b", "c"]) + tm.assert_numpy_array_equal(c2.__array__(), exp_arr) + c2 = Categorical(exp_arr, categories=["c", "b", "a"]) + tm.assert_numpy_array_equal(c2.__array__(), exp_arr) + + # categories must be unique + msg = "Categorical categories must be unique" + with pytest.raises(ValueError, match=msg): + Categorical([1, 2], [1, 2, 2]) + + with pytest.raises(ValueError, match=msg): + Categorical(["a", "b"], ["a", "b", "b"]) + + # The default should be unordered + c1 = Categorical(["a", "b", "c", "a"]) + assert not c1.ordered + + # Categorical as input + c1 = Categorical(["a", "b", "c", "a"]) + c2 = Categorical(c1) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical(c1) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(c1) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(c1, categories=["a", "b", "c"]) + tm.assert_numpy_array_equal(c1.__array__(), c2.__array__()) + tm.assert_index_equal(c2.categories, Index(["a", "b", "c"])) + + # Series of dtype category + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical(Series(c1)) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(Series(c1)) + tm.assert_categorical_equal(c1, c2) + + # Series + c1 = Categorical(["a", "b", "c", "a"]) + c2 = Categorical(Series(["a", "b", "c", "a"])) + tm.assert_categorical_equal(c1, c2) + + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"]) + tm.assert_categorical_equal(c1, c2) + + # This should result in integer categories, not float! + cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) + assert is_integer_dtype(cat.categories) + + # https://github.com/pandas-dev/pandas/issues/3678 + cat = Categorical([np.nan, 1, 2, 3]) + assert is_integer_dtype(cat.categories) + + # this should result in floats + cat = Categorical([np.nan, 1, 2.0, 3]) + assert is_float_dtype(cat.categories) + + cat = Categorical([np.nan, 1.0, 2.0, 3.0]) + assert is_float_dtype(cat.categories) + + # This doesn't work -> this would probably need some kind of "remember + # the original type" feature to try to cast the array interface result + # to... + + # vals = np.asarray(cat[cat.notna()]) + # assert is_integer_dtype(vals) + + # corner cases + cat = Categorical([1]) + assert len(cat.categories) == 1 + assert cat.categories[0] == 1 + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 + + cat = Categorical(["a"]) + assert len(cat.categories) == 1 + assert cat.categories[0] == "a" + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 + + # Scalars should be converted to lists + cat = Categorical(1) + assert len(cat.categories) == 1 + assert cat.categories[0] == 1 + assert len(cat.codes) == 1 + assert cat.codes[0] == 0 + + # two arrays + # - when the first is an integer dtype and the second is not + # - when the resulting codes are all -1/NaN + with tm.assert_produces_warning(None): + c_old = Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"]) # noqa + + with tm.assert_produces_warning(None): + c_old = Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5]) # noqa + + # the next one are from the old docs + with tm.assert_produces_warning(None): + c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa + cat = Categorical([1, 2], categories=[1, 2, 3]) + + # this is a legitimate constructor + with tm.assert_produces_warning(None): + c = Categorical( # noqa + np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True + ) + + def test_constructor_with_existing_categories(self): + # GH25318: constructing with pd.Series used to bogusly skip recoding + # categories + c0 = Categorical(["a", "b", "c", "a"]) + c1 = Categorical(["a", "b", "c", "a"], categories=["b", "c"]) + + c2 = Categorical(c0, categories=c1.categories) + tm.assert_categorical_equal(c1, c2) + + c3 = Categorical(Series(c0), categories=c1.categories) + tm.assert_categorical_equal(c1, c3) + + def test_constructor_not_sequence(self): + # https://github.com/pandas-dev/pandas/issues/16022 + msg = r"^Parameter 'categories' must be list-like, was" + with pytest.raises(TypeError, match=msg): + Categorical(["a", "b"], categories="a") + + def test_constructor_with_null(self): + + # Cannot have NaN in categories + msg = "Categorial categories cannot be null" + with pytest.raises(ValueError, match=msg): + Categorical([np.nan, "a", "b", "c"], categories=[np.nan, "a", "b", "c"]) + + with pytest.raises(ValueError, match=msg): + Categorical([None, "a", "b", "c"], categories=[None, "a", "b", "c"]) + + with pytest.raises(ValueError, match=msg): + Categorical( + DatetimeIndex(["nat", "20160101"]), + categories=[NaT, Timestamp("20160101")], + ) + + def test_constructor_with_index(self): + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + tm.assert_categorical_equal(ci.values, Categorical(ci)) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + tm.assert_categorical_equal( + ci.values, Categorical(ci.astype(object), categories=ci.categories) + ) + + def test_constructor_with_generator(self): + # This was raising an Error in isna(single_val).any() because isna + # returned a scalar for a generator + + exp = Categorical([0, 1, 2]) + cat = Categorical((x for x in [0, 1, 2])) + tm.assert_categorical_equal(cat, exp) + cat = Categorical(range(3)) + tm.assert_categorical_equal(cat, exp) + + MultiIndex.from_product([range(5), ["a", "b", "c"]]) + + # check that categories accept generators and sequences + cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2])) + tm.assert_categorical_equal(cat, exp) + cat = Categorical([0, 1, 2], categories=range(3)) + tm.assert_categorical_equal(cat, exp) + + @pytest.mark.parametrize( + "dtl", + [ + date_range("1995-01-01 00:00:00", periods=5, freq="s"), + date_range("1995-01-01 00:00:00", periods=5, freq="s", tz="US/Eastern"), + timedelta_range("1 day", periods=5, freq="s"), + ], + ) + def test_constructor_with_datetimelike(self, dtl): + # see gh-12077 + # constructor with a datetimelike and NaT + + s = Series(dtl) + c = Categorical(s) + + expected = type(dtl)(s) + expected._data.freq = None + + tm.assert_index_equal(c.categories, expected) + tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype="int8")) + + # with NaT + s2 = s.copy() + s2.iloc[-1] = NaT + c = Categorical(s2) + + expected = type(dtl)(s2.dropna()) + expected._data.freq = None + + tm.assert_index_equal(c.categories, expected) + + exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) + tm.assert_numpy_array_equal(c.codes, exp) + + result = repr(c) + assert "NaT" in result + + def test_constructor_from_index_series_datetimetz(self): + idx = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") + result = Categorical(idx) + tm.assert_index_equal(result.categories, idx) + + result = Categorical(Series(idx)) + tm.assert_index_equal(result.categories, idx) + + def test_constructor_from_index_series_timedelta(self): + idx = timedelta_range("1 days", freq="D", periods=3) + result = Categorical(idx) + tm.assert_index_equal(result.categories, idx) + + result = Categorical(Series(idx)) + tm.assert_index_equal(result.categories, idx) + + def test_constructor_from_index_series_period(self): + idx = period_range("2015-01-01", freq="D", periods=3) + result = Categorical(idx) + tm.assert_index_equal(result.categories, idx) + + result = Categorical(Series(idx)) + tm.assert_index_equal(result.categories, idx) + + def test_constructor_invariant(self): + # GH 14190 + vals = [ + np.array([1.0, 1.2, 1.8, np.nan]), + np.array([1, 2, 3], dtype="int64"), + ["a", "b", "c", np.nan], + [pd.Period("2014-01"), pd.Period("2014-02"), NaT], + [Timestamp("2014-01-01"), Timestamp("2014-01-02"), NaT], + [ + Timestamp("2014-01-01", tz="US/Eastern"), + Timestamp("2014-01-02", tz="US/Eastern"), + NaT, + ], + ] + for val in vals: + c = Categorical(val) + c2 = Categorical(c) + tm.assert_categorical_equal(c, c2) + + @pytest.mark.parametrize("ordered", [True, False]) + def test_constructor_with_dtype(self, ordered): + categories = ["b", "a", "c"] + dtype = CategoricalDtype(categories, ordered=ordered) + result = Categorical(["a", "b", "a", "c"], dtype=dtype) + expected = Categorical( + ["a", "b", "a", "c"], categories=categories, ordered=ordered + ) + tm.assert_categorical_equal(result, expected) + assert result.ordered is ordered + + def test_constructor_dtype_and_others_raises(self): + dtype = CategoricalDtype(["a", "b"], ordered=True) + msg = "Cannot specify `categories` or `ordered` together with `dtype`." + with pytest.raises(ValueError, match=msg): + Categorical(["a", "b"], categories=["a", "b"], dtype=dtype) + + with pytest.raises(ValueError, match=msg): + Categorical(["a", "b"], ordered=True, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + Categorical(["a", "b"], ordered=False, dtype=dtype) + + @pytest.mark.parametrize("categories", [None, ["a", "b"], ["a", "c"]]) + @pytest.mark.parametrize("ordered", [True, False]) + def test_constructor_str_category(self, categories, ordered): + result = Categorical( + ["a", "b"], categories=categories, ordered=ordered, dtype="category" + ) + expected = Categorical(["a", "b"], categories=categories, ordered=ordered) + tm.assert_categorical_equal(result, expected) + + def test_constructor_str_unknown(self): + with pytest.raises(ValueError, match="Unknown dtype"): + Categorical([1, 2], dtype="foo") + + def test_constructor_np_strs(self): + # GH#31499 Hastable.map_locations needs to work on np.str_ objects + cat = pd.Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")]) + assert all(isinstance(x, np.str_) for x in cat.categories) + + def test_constructor_from_categorical_with_dtype(self): + dtype = CategoricalDtype(["a", "b", "c"], ordered=True) + values = Categorical(["a", "b", "d"]) + result = Categorical(values, dtype=dtype) + # We use dtype.categories, not values.categories + expected = Categorical( + ["a", "b", "d"], categories=["a", "b", "c"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + def test_constructor_from_categorical_with_unknown_dtype(self): + dtype = CategoricalDtype(None, ordered=True) + values = Categorical(["a", "b", "d"]) + result = Categorical(values, dtype=dtype) + # We use values.categories, not dtype.categories + expected = Categorical( + ["a", "b", "d"], categories=["a", "b", "d"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + def test_constructor_from_categorical_string(self): + values = Categorical(["a", "b", "d"]) + # use categories, ordered + result = Categorical( + values, categories=["a", "b", "c"], ordered=True, dtype="category" + ) + expected = Categorical( + ["a", "b", "d"], categories=["a", "b", "c"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + # No string + result = Categorical(values, categories=["a", "b", "c"], ordered=True) + tm.assert_categorical_equal(result, expected) + + def test_constructor_with_categorical_categories(self): + # GH17884 + expected = Categorical(["a", "b"], categories=["a", "b", "c"]) + + result = Categorical(["a", "b"], categories=Categorical(["a", "b", "c"])) + tm.assert_categorical_equal(result, expected) + + result = Categorical(["a", "b"], categories=CategoricalIndex(["a", "b", "c"])) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("klass", [lambda x: np.array(x, dtype=object), list]) + def test_construction_with_null(self, klass, nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/31927 + values = klass(["a", nulls_fixture, "b"]) + result = Categorical(values) + + dtype = CategoricalDtype(["a", "b"]) + codes = [0, -1, 1] + expected = Categorical.from_codes(codes=codes, dtype=dtype) + + tm.assert_categorical_equal(result, expected) + + def test_from_codes(self): + + # too few categories + dtype = CategoricalDtype(categories=[1, 2]) + msg = "codes need to be between " + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([1, 2], categories=dtype.categories) + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([1, 2], dtype=dtype) + + # no int codes + msg = "codes need to be array-like integers" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(["a"], categories=dtype.categories) + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(["a"], dtype=dtype) + + # no unique categories + with pytest.raises(ValueError, match="Categorical categories must be unique"): + Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"]) + + # NaN categories included + with pytest.raises(ValueError, match="Categorial categories cannot be null"): + Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan]) + + # too negative + dtype = CategoricalDtype(categories=["a", "b", "c"]) + msg = r"codes need to be between -1 and len\(categories\)-1" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([-2, 1, 2], categories=dtype.categories) + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([-2, 1, 2], dtype=dtype) + + exp = Categorical(["a", "b", "c"], ordered=False) + res = Categorical.from_codes([0, 1, 2], categories=dtype.categories) + tm.assert_categorical_equal(exp, res) + + res = Categorical.from_codes([0, 1, 2], dtype=dtype) + tm.assert_categorical_equal(exp, res) + + def test_from_codes_with_categorical_categories(self): + # GH17884 + expected = Categorical(["a", "b"], categories=["a", "b", "c"]) + + result = Categorical.from_codes([0, 1], categories=Categorical(["a", "b", "c"])) + tm.assert_categorical_equal(result, expected) + + result = Categorical.from_codes( + [0, 1], categories=CategoricalIndex(["a", "b", "c"]) + ) + tm.assert_categorical_equal(result, expected) + + # non-unique Categorical still raises + with pytest.raises(ValueError, match="Categorical categories must be unique"): + Categorical.from_codes([0, 1], Categorical(["a", "b", "a"])) + + def test_from_codes_with_nan_code(self): + # GH21767 + codes = [1, 2, np.nan] + dtype = CategoricalDtype(categories=["a", "b", "c"]) + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, categories=dtype.categories) + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, dtype=dtype) + + def test_from_codes_with_float(self): + # GH21767 + codes = [1.0, 2.0, 0] # integer, but in float dtype + dtype = CategoricalDtype(categories=["a", "b", "c"]) + + # empty codes should not raise for floats + Categorical.from_codes([], dtype.categories) + + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, dtype.categories) + + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, dtype=dtype) + + codes = [1.1, 2.0, 0] # non-integer + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, dtype.categories) + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, dtype=dtype) + + def test_from_codes_with_dtype_raises(self): + msg = "Cannot specify" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes( + [0, 1], categories=["a", "b"], dtype=CategoricalDtype(["a", "b"]) + ) + + with pytest.raises(ValueError, match=msg): + Categorical.from_codes( + [0, 1], ordered=True, dtype=CategoricalDtype(["a", "b"]) + ) + + def test_from_codes_neither(self): + msg = "Both were None" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes([0, 1]) + + def test_from_codes_with_nullable_int(self): + codes = pd.array([0, 1], dtype="Int64") + categories = ["a", "b"] + + result = Categorical.from_codes(codes, categories=categories) + expected = Categorical.from_codes(codes.to_numpy(int), categories=categories) + + tm.assert_categorical_equal(result, expected) + + def test_from_codes_with_nullable_int_na_raises(self): + codes = pd.array([0, None], dtype="Int64") + categories = ["a", "b"] + + msg = "codes cannot contain NA values" + with pytest.raises(ValueError, match=msg): + Categorical.from_codes(codes, categories=categories) + + @pytest.mark.parametrize("dtype", [None, "category"]) + def test_from_inferred_categories(self, dtype): + cats = ["a", "b"] + codes = np.array([0, 0, 1, 1], dtype="i8") + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical.from_codes(codes, cats) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("dtype", [None, "category"]) + def test_from_inferred_categories_sorts(self, dtype): + cats = ["b", "a"] + codes = np.array([0, 1, 1, 1], dtype="i8") + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical.from_codes([1, 0, 0, 0], ["a", "b"]) + tm.assert_categorical_equal(result, expected) + + def test_from_inferred_categories_dtype(self): + cats = ["a", "b", "d"] + codes = np.array([0, 1, 0, 2], dtype="i8") + dtype = CategoricalDtype(["c", "b", "a"], ordered=True) + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical( + ["a", "b", "a", "d"], categories=["c", "b", "a"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + def test_from_inferred_categories_coerces(self): + cats = ["1", "2", "bad"] + codes = np.array([0, 0, 1, 2], dtype="i8") + dtype = CategoricalDtype([1, 2]) + result = Categorical._from_inferred_categories(cats, codes, dtype) + expected = Categorical([1, 1, 2, np.nan]) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("ordered", [None, True, False]) + def test_construction_with_ordered(self, ordered): + # GH 9347, 9190 + cat = Categorical([0, 1, 2], ordered=ordered) + assert cat.ordered == bool(ordered) + + @pytest.mark.xfail(reason="Imaginary values not supported in Categorical") + def test_constructor_imaginary(self): + values = [1, 2, 3 + 1j] + c1 = Categorical(values) + tm.assert_index_equal(c1.categories, Index(values)) + tm.assert_numpy_array_equal(np.array(c1), np.array(values)) + + @pytest.mark.skipif(_np_version_under1p16, reason="Skipping for NumPy <1.16") + def test_constructor_string_and_tuples(self): + # GH 21416 + c = pd.Categorical(np.array(["c", ("a", "b"), ("b", "a"), "c"], dtype=object)) + expected_index = pd.Index([("a", "b"), ("b", "a"), "c"]) + assert c.categories.equals(expected_index) diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py new file mode 100644 index 00000000..19746d7d --- /dev/null +++ b/pandas/tests/arrays/categorical/test_dtypes.py @@ -0,0 +1,173 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +from pandas import Categorical, CategoricalIndex, Index, Series, Timestamp +import pandas._testing as tm + + +class TestCategoricalDtypes: + def test_is_equal_dtype(self): + + # test dtype comparisons between cats + + c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False) + c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False) + c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True) + assert c1.is_dtype_equal(c1) + assert c2.is_dtype_equal(c2) + assert c3.is_dtype_equal(c3) + assert c1.is_dtype_equal(c2) + assert not c1.is_dtype_equal(c3) + assert not c1.is_dtype_equal(Index(list("aabca"))) + assert not c1.is_dtype_equal(c1.astype(object)) + assert c1.is_dtype_equal(CategoricalIndex(c1)) + assert c1.is_dtype_equal(CategoricalIndex(c1, categories=list("cab"))) + assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True)) + + # GH 16659 + s1 = Series(c1) + s2 = Series(c2) + s3 = Series(c3) + assert c1.is_dtype_equal(s1) + assert c2.is_dtype_equal(s2) + assert c3.is_dtype_equal(s3) + assert c1.is_dtype_equal(s2) + assert not c1.is_dtype_equal(s3) + assert not c1.is_dtype_equal(s1.astype(object)) + + def test_set_dtype_same(self): + c = Categorical(["a", "b", "c"]) + result = c._set_dtype(CategoricalDtype(["a", "b", "c"])) + tm.assert_categorical_equal(result, c) + + def test_set_dtype_new_categories(self): + c = Categorical(["a", "b", "c"]) + result = c._set_dtype(CategoricalDtype(list("abcd"))) + tm.assert_numpy_array_equal(result.codes, c.codes) + tm.assert_index_equal(result.dtype.categories, Index(list("abcd"))) + + @pytest.mark.parametrize( + "values, categories, new_categories", + [ + # No NaNs, same cats, same order + (["a", "b", "a"], ["a", "b"], ["a", "b"]), + # No NaNs, same cats, different order + (["a", "b", "a"], ["a", "b"], ["b", "a"]), + # Same, unsorted + (["b", "a", "a"], ["a", "b"], ["a", "b"]), + # No NaNs, same cats, different order + (["b", "a", "a"], ["a", "b"], ["b", "a"]), + # NaNs + (["a", "b", "c"], ["a", "b"], ["a", "b"]), + (["a", "b", "c"], ["a", "b"], ["b", "a"]), + (["b", "a", "c"], ["a", "b"], ["a", "b"]), + (["b", "a", "c"], ["a", "b"], ["a", "b"]), + # Introduce NaNs + (["a", "b", "c"], ["a", "b"], ["a"]), + (["a", "b", "c"], ["a", "b"], ["b"]), + (["b", "a", "c"], ["a", "b"], ["a"]), + (["b", "a", "c"], ["a", "b"], ["a"]), + # No overlap + (["a", "b", "c"], ["a", "b"], ["d", "e"]), + ], + ) + @pytest.mark.parametrize("ordered", [True, False]) + def test_set_dtype_many(self, values, categories, new_categories, ordered): + c = Categorical(values, categories) + expected = Categorical(values, new_categories, ordered) + result = c._set_dtype(expected.dtype) + tm.assert_categorical_equal(result, expected) + + def test_set_dtype_no_overlap(self): + c = Categorical(["a", "b", "c"], ["d", "e"]) + result = c._set_dtype(CategoricalDtype(["a", "b"])) + expected = Categorical([None, None, None], categories=["a", "b"]) + tm.assert_categorical_equal(result, expected) + + def test_codes_dtypes(self): + + # GH 8453 + result = Categorical(["foo", "bar", "baz"]) + assert result.codes.dtype == "int8" + + result = Categorical(["foo{i:05d}".format(i=i) for i in range(400)]) + assert result.codes.dtype == "int16" + + result = Categorical(["foo{i:05d}".format(i=i) for i in range(40000)]) + assert result.codes.dtype == "int32" + + # adding cats + result = Categorical(["foo", "bar", "baz"]) + assert result.codes.dtype == "int8" + result = result.add_categories(["foo{i:05d}".format(i=i) for i in range(400)]) + assert result.codes.dtype == "int16" + + # removing cats + result = result.remove_categories( + ["foo{i:05d}".format(i=i) for i in range(300)] + ) + assert result.codes.dtype == "int8" + + @pytest.mark.parametrize("ordered", [True, False]) + def test_astype(self, ordered): + # string + cat = Categorical(list("abbaaccc"), ordered=ordered) + result = cat.astype(object) + expected = np.array(cat) + tm.assert_numpy_array_equal(result, expected) + + msg = "could not convert string to float" + with pytest.raises(ValueError, match=msg): + cat.astype(float) + + # numeric + cat = Categorical([0, 1, 2, 2, 1, 0, 1, 0, 2], ordered=ordered) + result = cat.astype(object) + expected = np.array(cat, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = cat.astype(int) + expected = np.array(cat, dtype=np.int) + tm.assert_numpy_array_equal(result, expected) + + result = cat.astype(float) + expected = np.array(cat, dtype=np.float) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dtype_ordered", [True, False]) + @pytest.mark.parametrize("cat_ordered", [True, False]) + def test_astype_category(self, dtype_ordered, cat_ordered): + # GH 10696/18593 + data = list("abcaacbab") + cat = Categorical(data, categories=list("bac"), ordered=cat_ordered) + + # standard categories + dtype = CategoricalDtype(ordered=dtype_ordered) + result = cat.astype(dtype) + expected = Categorical(data, categories=cat.categories, ordered=dtype_ordered) + tm.assert_categorical_equal(result, expected) + + # non-standard categories + dtype = CategoricalDtype(list("adc"), dtype_ordered) + result = cat.astype(dtype) + expected = Categorical(data, dtype=dtype) + tm.assert_categorical_equal(result, expected) + + if dtype_ordered is False: + # dtype='category' can't specify ordered, so only test once + result = cat.astype("category") + expected = cat + tm.assert_categorical_equal(result, expected) + + def test_iter_python_types(self): + # GH-19909 + cat = Categorical([1, 2]) + assert isinstance(list(cat)[0], int) + assert isinstance(cat.tolist()[0], int) + + def test_iter_python_types_datetime(self): + cat = Categorical([Timestamp("2017-01-01"), Timestamp("2017-01-02")]) + assert isinstance(list(cat)[0], Timestamp) + assert isinstance(cat.tolist()[0], Timestamp) diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py new file mode 100644 index 00000000..3d9469c2 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -0,0 +1,280 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Categorical, CategoricalIndex, Index, PeriodIndex, Series +import pandas._testing as tm +import pandas.core.common as com +from pandas.tests.arrays.categorical.common import TestCategorical + + +class TestCategoricalIndexingWithFactor(TestCategorical): + def test_getitem(self): + assert self.factor[0] == "a" + assert self.factor[-1] == "c" + + subf = self.factor[[0, 1, 2]] + tm.assert_numpy_array_equal(subf._codes, np.array([0, 1, 1], dtype=np.int8)) + + subf = self.factor[np.asarray(self.factor) == "c"] + tm.assert_numpy_array_equal(subf._codes, np.array([2, 2, 2], dtype=np.int8)) + + def test_setitem(self): + + # int/positional + c = self.factor.copy() + c[0] = "b" + assert c[0] == "b" + c[-1] = "a" + assert c[-1] == "a" + + # boolean + c = self.factor.copy() + indexer = np.zeros(len(c), dtype="bool") + indexer[0] = True + indexer[-1] = True + c[indexer] = "c" + expected = Categorical(["c", "b", "b", "a", "a", "c", "c", "c"], ordered=True) + + tm.assert_categorical_equal(c, expected) + + @pytest.mark.parametrize( + "other", + [pd.Categorical(["b", "a"]), pd.Categorical(["b", "a"], categories=["b", "a"])], + ) + def test_setitem_same_but_unordered(self, other): + # GH-24142 + target = pd.Categorical(["a", "b"], categories=["a", "b"]) + mask = np.array([True, False]) + target[mask] = other[mask] + expected = pd.Categorical(["b", "b"], categories=["a", "b"]) + tm.assert_categorical_equal(target, expected) + + @pytest.mark.parametrize( + "other", + [ + pd.Categorical(["b", "a"], categories=["b", "a", "c"]), + pd.Categorical(["b", "a"], categories=["a", "b", "c"]), + pd.Categorical(["a", "a"], categories=["a"]), + pd.Categorical(["b", "b"], categories=["b"]), + ], + ) + def test_setitem_different_unordered_raises(self, other): + # GH-24142 + target = pd.Categorical(["a", "b"], categories=["a", "b"]) + mask = np.array([True, False]) + msg = "Cannot set a Categorical with another, without identical categories" + with pytest.raises(ValueError, match=msg): + target[mask] = other[mask] + + @pytest.mark.parametrize( + "other", + [ + pd.Categorical(["b", "a"]), + pd.Categorical(["b", "a"], categories=["b", "a"], ordered=True), + pd.Categorical(["b", "a"], categories=["a", "b", "c"], ordered=True), + ], + ) + def test_setitem_same_ordered_rasies(self, other): + # Gh-24142 + target = pd.Categorical(["a", "b"], categories=["a", "b"], ordered=True) + mask = np.array([True, False]) + msg = "Cannot set a Categorical with another, without identical categories" + with pytest.raises(ValueError, match=msg): + target[mask] = other[mask] + + +class TestCategoricalIndexing: + def test_getitem_listlike(self): + + # GH 9469 + # properly coerce the input indexers + np.random.seed(1) + c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8)) + result = c.codes[np.array([100000]).astype(np.int64)] + expected = c[np.array([100000]).astype(np.int64)].codes + tm.assert_numpy_array_equal(result, expected) + + def test_periodindex(self): + idx1 = PeriodIndex( + ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M" + ) + + cat1 = Categorical(idx1) + str(cat1) + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8) + exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") + tm.assert_numpy_array_equal(cat1._codes, exp_arr) + tm.assert_index_equal(cat1.categories, exp_idx) + + idx2 = PeriodIndex( + ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M" + ) + cat2 = Categorical(idx2, ordered=True) + str(cat2) + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8) + exp_idx2 = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") + tm.assert_numpy_array_equal(cat2._codes, exp_arr) + tm.assert_index_equal(cat2.categories, exp_idx2) + + idx3 = PeriodIndex( + [ + "2013-12", + "2013-11", + "2013-10", + "2013-09", + "2013-08", + "2013-07", + "2013-05", + ], + freq="M", + ) + cat3 = Categorical(idx3, ordered=True) + exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8) + exp_idx = PeriodIndex( + [ + "2013-05", + "2013-07", + "2013-08", + "2013-09", + "2013-10", + "2013-11", + "2013-12", + ], + freq="M", + ) + tm.assert_numpy_array_equal(cat3._codes, exp_arr) + tm.assert_index_equal(cat3.categories, exp_idx) + + def test_categories_assigments(self): + s = Categorical(["a", "b", "c", "a"]) + exp = np.array([1, 2, 3, 1], dtype=np.int64) + s.categories = [1, 2, 3] + tm.assert_numpy_array_equal(s.__array__(), exp) + tm.assert_index_equal(s.categories, Index([1, 2, 3])) + + @pytest.mark.parametrize("new_categories", [[1, 2, 3, 4], [1, 2]]) + def test_categories_assigments_wrong_length_raises(self, new_categories): + cat = Categorical(["a", "b", "c", "a"]) + msg = ( + "new categories need to have the same number of items " + "as the old categories!" + ) + with pytest.raises(ValueError, match=msg): + cat.categories = new_categories + + # Combinations of sorted/unique: + @pytest.mark.parametrize( + "idx_values", [[1, 2, 3, 4], [1, 3, 2, 4], [1, 3, 3, 4], [1, 2, 2, 4]] + ) + # Combinations of missing/unique + @pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]]) + @pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex]) + def test_get_indexer_non_unique(self, idx_values, key_values, key_class): + # GH 21448 + key = key_class(key_values, categories=range(1, 5)) + # Test for flat index and CategoricalIndex with same/different cats: + for dtype in None, "category", key.dtype: + idx = Index(idx_values, dtype=dtype) + expected, exp_miss = idx.get_indexer_non_unique(key_values) + result, res_miss = idx.get_indexer_non_unique(key) + + tm.assert_numpy_array_equal(expected, result) + tm.assert_numpy_array_equal(exp_miss, res_miss) + + def test_where_unobserved_nan(self): + ser = pd.Series(pd.Categorical(["a", "b"])) + result = ser.where([True, False]) + expected = pd.Series(pd.Categorical(["a", None], categories=["a", "b"])) + tm.assert_series_equal(result, expected) + + # all NA + ser = pd.Series(pd.Categorical(["a", "b"])) + result = ser.where([False, False]) + expected = pd.Series(pd.Categorical([None, None], categories=["a", "b"])) + tm.assert_series_equal(result, expected) + + def test_where_unobserved_categories(self): + ser = pd.Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"])) + result = ser.where([True, True, False], other="b") + expected = pd.Series( + Categorical(["a", "b", "b"], categories=ser.cat.categories) + ) + tm.assert_series_equal(result, expected) + + def test_where_other_categorical(self): + ser = pd.Series(Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"])) + other = Categorical(["b", "c", "a"], categories=["a", "c", "b", "d"]) + result = ser.where([True, False, True], other) + expected = pd.Series(Categorical(["a", "c", "c"], dtype=ser.dtype)) + tm.assert_series_equal(result, expected) + + def test_where_new_category_raises(self): + ser = pd.Series(Categorical(["a", "b", "c"])) + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(ValueError, match=msg): + ser.where([True, False, True], "d") + + def test_where_ordered_differs_rasies(self): + ser = pd.Series( + Categorical(["a", "b", "c"], categories=["d", "c", "b", "a"], ordered=True) + ) + other = Categorical( + ["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True + ) + with pytest.raises(ValueError, match="without identical categories"): + ser.where([True, False, True], other) + + +@pytest.mark.parametrize("index", [True, False]) +def test_mask_with_boolean(index): + s = Series(range(3)) + idx = Categorical([True, False, True]) + if index: + idx = CategoricalIndex(idx) + + assert com.is_bool_indexer(idx) + result = s[idx] + expected = s[idx.astype("object")] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("index", [True, False]) +def test_mask_with_boolean_na_treated_as_false(index): + # https://github.com/pandas-dev/pandas/issues/31503 + s = Series(range(3)) + idx = Categorical([True, False, None]) + if index: + idx = CategoricalIndex(idx) + + result = s[idx] + expected = s[idx.fillna(False)] + + tm.assert_series_equal(result, expected) + + +@pytest.fixture +def non_coercible_categorical(monkeypatch): + """ + Monkeypatch Categorical.__array__ to ensure no implicit conversion. + + Raises + ------ + ValueError + When Categorical.__array__ is called. + """ + # TODO(Categorical): identify other places where this may be + # useful and move to a conftest.py + def array(self, dtype=None): + raise ValueError("I cannot be converted.") + + with monkeypatch.context() as m: + m.setattr(Categorical, "__array__", array) + yield + + +def test_series_at(non_coercible_categorical): + arr = Categorical(["a", "b", "c"]) + ser = Series(arr) + result = ser.at[0] + assert result == "a" diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py new file mode 100644 index 00000000..19ab6d00 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_missing.py @@ -0,0 +1,135 @@ +import collections + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import Categorical, DataFrame, Index, Series, isna +import pandas._testing as tm + + +class TestCategoricalMissing: + def test_na_flags_int_categories(self): + # #1457 + + categories = list(range(10)) + labels = np.random.randint(0, 10, 20) + labels[::5] = -1 + + cat = Categorical(labels, categories, fastpath=True) + repr(cat) + + tm.assert_numpy_array_equal(isna(cat), labels == -1) + + def test_nan_handling(self): + + # Nans are represented as -1 in codes + c = Categorical(["a", "b", np.nan, "a"]) + tm.assert_index_equal(c.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) + c[1] = np.nan + tm.assert_index_equal(c.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8)) + + # Adding nan to categories should make assigned nan point to the + # category! + c = Categorical(["a", "b", np.nan, "a"]) + tm.assert_index_equal(c.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8)) + + def test_set_dtype_nans(self): + c = Categorical(["a", "b", np.nan]) + result = c._set_dtype(CategoricalDtype(["a", "c"])) + tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8")) + + def test_set_item_nan(self): + cat = Categorical([1, 2, 3]) + cat[1] = np.nan + + exp = Categorical([1, np.nan, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(cat, exp) + + @pytest.mark.parametrize( + "fillna_kwargs, msg", + [ + ( + dict(value=1, method="ffill"), + "Cannot specify both 'value' and 'method'.", + ), + (dict(), "Must specify a fill 'value' or 'method'."), + (dict(method="bad"), "Invalid fill method. Expecting .* bad"), + (dict(value=Series([1, 2, 3, 4, "a"])), "fill value must be in categories"), + ], + ) + def test_fillna_raises(self, fillna_kwargs, msg): + # https://github.com/pandas-dev/pandas/issues/19682 + # https://github.com/pandas-dev/pandas/issues/13628 + cat = Categorical([1, 2, 3, None, None]) + + with pytest.raises(ValueError, match=msg): + cat.fillna(**fillna_kwargs) + + @pytest.mark.parametrize("named", [True, False]) + def test_fillna_iterable_category(self, named): + # https://github.com/pandas-dev/pandas/issues/21097 + if named: + Point = collections.namedtuple("Point", "x y") + else: + Point = lambda *args: args # tuple + cat = Categorical(np.array([Point(0, 0), Point(0, 1), None], dtype=object)) + result = cat.fillna(Point(0, 0)) + expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)]) + + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize( + "values, expected", + [ + ([1, 2, 3], np.array([False, False, False])), + ([1, 2, np.nan], np.array([False, False, True])), + ([1, 2, np.inf], np.array([False, False, True])), + ([1, 2, pd.NA], np.array([False, False, True])), + ], + ) + def test_use_inf_as_na(self, values, expected): + # https://github.com/pandas-dev/pandas/issues/33594 + with pd.option_context("mode.use_inf_as_na", True): + cat = Categorical(values) + result = cat.isna() + tm.assert_numpy_array_equal(result, expected) + + result = Series(cat).isna() + expected = Series(expected) + tm.assert_series_equal(result, expected) + + result = DataFrame(cat).isna() + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "values, expected", + [ + ([1, 2, 3], np.array([False, False, False])), + ([1, 2, np.nan], np.array([False, False, True])), + ([1, 2, np.inf], np.array([False, False, True])), + ([1, 2, pd.NA], np.array([False, False, True])), + ], + ) + def test_use_inf_as_na_outside_context(self, values, expected): + # https://github.com/pandas-dev/pandas/issues/33594 + # Using isna directly for Categorical will fail in general here + cat = Categorical(values) + + with pd.option_context("mode.use_inf_as_na", True): + result = pd.isna(cat) + tm.assert_numpy_array_equal(result, expected) + + result = pd.isna(Series(cat)) + expected = Series(expected) + tm.assert_series_equal(result, expected) + + result = pd.isna(DataFrame(cat)) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py new file mode 100644 index 00000000..8643e7f6 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -0,0 +1,442 @@ +import operator +import warnings + +import numpy as np +import pytest + +import pandas as pd +from pandas import Categorical, DataFrame, Series, date_range +import pandas._testing as tm +from pandas.tests.arrays.categorical.common import TestCategorical + + +class TestCategoricalOpsWithFactor(TestCategorical): + def test_categories_none_comparisons(self): + factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) + tm.assert_categorical_equal(factor, self.factor) + + def test_comparisons(self): + result = self.factor[self.factor == "a"] + expected = self.factor[np.asarray(self.factor) == "a"] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor != "a"] + expected = self.factor[np.asarray(self.factor) != "a"] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor < "c"] + expected = self.factor[np.asarray(self.factor) < "c"] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor > "a"] + expected = self.factor[np.asarray(self.factor) > "a"] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor >= "b"] + expected = self.factor[np.asarray(self.factor) >= "b"] + tm.assert_categorical_equal(result, expected) + + result = self.factor[self.factor <= "b"] + expected = self.factor[np.asarray(self.factor) <= "b"] + tm.assert_categorical_equal(result, expected) + + n = len(self.factor) + + other = self.factor[np.random.permutation(n)] + result = self.factor == other + expected = np.asarray(self.factor) == np.asarray(other) + tm.assert_numpy_array_equal(result, expected) + + result = self.factor == "d" + expected = np.zeros(len(self.factor), dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + # comparisons with categoricals + cat_rev = Categorical(["a", "b", "c"], categories=["c", "b", "a"], ordered=True) + cat_rev_base = Categorical( + ["b", "b", "b"], categories=["c", "b", "a"], ordered=True + ) + cat = Categorical(["a", "b", "c"], ordered=True) + cat_base = Categorical(["b", "b", "b"], categories=cat.categories, ordered=True) + + # comparisons need to take categories ordering into account + res_rev = cat_rev > cat_rev_base + exp_rev = np.array([True, False, False]) + tm.assert_numpy_array_equal(res_rev, exp_rev) + + res_rev = cat_rev < cat_rev_base + exp_rev = np.array([False, False, True]) + tm.assert_numpy_array_equal(res_rev, exp_rev) + + res = cat > cat_base + exp = np.array([False, False, True]) + tm.assert_numpy_array_equal(res, exp) + + # Only categories with same categories can be compared + msg = "Categoricals can only be compared if 'categories' are the same" + with pytest.raises(TypeError, match=msg): + cat > cat_rev + + cat_rev_base2 = Categorical(["b", "b", "b"], categories=["c", "b", "a", "d"]) + + msg = ( + "Categoricals can only be compared if 'categories' are the same. " + "Categories are different lengths" + ) + with pytest.raises(TypeError, match=msg): + cat_rev > cat_rev_base2 + + # Only categories with same ordering information can be compared + cat_unorderd = cat.set_ordered(False) + assert not (cat > cat).any() + + msg = "Categoricals can only be compared if 'ordered' is the same" + with pytest.raises(TypeError, match=msg): + cat > cat_unorderd + + # comparison (in both directions) with Series will raise + s = Series(["b", "b", "b"]) + msg = ( + "Cannot compare a Categorical for op __gt__ with type" + r" " + ) + with pytest.raises(TypeError, match=msg): + cat > s + with pytest.raises(TypeError, match=msg): + cat_rev > s + with pytest.raises(TypeError, match=msg): + s < cat + with pytest.raises(TypeError, match=msg): + s < cat_rev + + # comparison with numpy.array will raise in both direction, but only on + # newer numpy versions + a = np.array(["b", "b", "b"]) + with pytest.raises(TypeError, match=msg): + cat > a + with pytest.raises(TypeError, match=msg): + cat_rev > a + + # Make sure that unequal comparison take the categories order in + # account + cat_rev = Categorical(list("abc"), categories=list("cba"), ordered=True) + exp = np.array([True, False, False]) + res = cat_rev > "b" + tm.assert_numpy_array_equal(res, exp) + + # check that zero-dim array gets unboxed + res = cat_rev > np.array("b") + tm.assert_numpy_array_equal(res, exp) + + +class TestCategoricalOps: + def test_compare_frame(self): + # GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame + data = ["a", "b", 2, "a"] + cat = Categorical(data) + + df = DataFrame(cat) + + result = cat == df.T + expected = DataFrame([[True, True, True, True]]) + tm.assert_frame_equal(result, expected) + + result = cat[::-1] != df.T + expected = DataFrame([[False, True, True, False]]) + tm.assert_frame_equal(result, expected) + + def test_compare_frame_raises(self, all_compare_operators): + # alignment raises unless we transpose + op = getattr(operator, all_compare_operators) + cat = Categorical(["a", "b", 2, "a"]) + df = DataFrame(cat) + msg = "Unable to coerce to Series, length must be 1: given 4" + with pytest.raises(ValueError, match=msg): + op(cat, df) + + def test_datetime_categorical_comparison(self): + dt_cat = Categorical(date_range("2014-01-01", periods=3), ordered=True) + tm.assert_numpy_array_equal(dt_cat > dt_cat[0], np.array([False, True, True])) + tm.assert_numpy_array_equal(dt_cat[0] < dt_cat, np.array([False, True, True])) + + def test_reflected_comparison_with_scalars(self): + # GH8658 + cat = Categorical([1, 2, 3], ordered=True) + tm.assert_numpy_array_equal(cat > cat[0], np.array([False, True, True])) + tm.assert_numpy_array_equal(cat[0] < cat, np.array([False, True, True])) + + def test_comparison_with_unknown_scalars(self): + # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 + # and following comparisons with scalars not in categories should raise + # for unequal comps, but not for equal/not equal + cat = Categorical([1, 2, 3], ordered=True) + + msg = ( + "Cannot compare a Categorical for op __{}__ with a scalar, " + "which is not a category" + ) + with pytest.raises(TypeError, match=msg.format("lt")): + cat < 4 + with pytest.raises(TypeError, match=msg.format("gt")): + cat > 4 + with pytest.raises(TypeError, match=msg.format("gt")): + 4 < cat + with pytest.raises(TypeError, match=msg.format("lt")): + 4 > cat + + tm.assert_numpy_array_equal(cat == 4, np.array([False, False, False])) + tm.assert_numpy_array_equal(cat != 4, np.array([True, True, True])) + + def test_comparison_of_ordered_categorical_with_nan_to_scalar( + self, compare_operators_no_eq_ne + ): + # https://github.com/pandas-dev/pandas/issues/26504 + # BUG: fix ordered categorical comparison with missing values (#26504 ) + # and following comparisons with scalars in categories with missing + # values should be evaluated as False + + cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True) + scalar = 2 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + expected = getattr(np.array(cat), compare_operators_no_eq_ne)(scalar) + actual = getattr(cat, compare_operators_no_eq_ne)(scalar) + tm.assert_numpy_array_equal(actual, expected) + + def test_comparison_of_ordered_categorical_with_nan_to_listlike( + self, compare_operators_no_eq_ne + ): + # https://github.com/pandas-dev/pandas/issues/26504 + # and following comparisons of missing values in ordered Categorical + # with listlike should be evaluated as False + + cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True) + other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2) + actual = getattr(cat, compare_operators_no_eq_ne)(other) + tm.assert_numpy_array_equal(actual, expected) + + @pytest.mark.parametrize( + "data,reverse,base", + [(list("abc"), list("cba"), list("bbb")), ([1, 2, 3], [3, 2, 1], [2, 2, 2])], + ) + def test_comparisons(self, data, reverse, base): + cat_rev = Series(Categorical(data, categories=reverse, ordered=True)) + cat_rev_base = Series(Categorical(base, categories=reverse, ordered=True)) + cat = Series(Categorical(data, ordered=True)) + cat_base = Series( + Categorical(base, categories=cat.cat.categories, ordered=True) + ) + s = Series(base) + a = np.array(base) + + # comparisons need to take categories ordering into account + res_rev = cat_rev > cat_rev_base + exp_rev = Series([True, False, False]) + tm.assert_series_equal(res_rev, exp_rev) + + res_rev = cat_rev < cat_rev_base + exp_rev = Series([False, False, True]) + tm.assert_series_equal(res_rev, exp_rev) + + res = cat > cat_base + exp = Series([False, False, True]) + tm.assert_series_equal(res, exp) + + scalar = base[1] + res = cat > scalar + exp = Series([False, False, True]) + exp2 = cat.values > scalar + tm.assert_series_equal(res, exp) + tm.assert_numpy_array_equal(res.values, exp2) + res_rev = cat_rev > scalar + exp_rev = Series([True, False, False]) + exp_rev2 = cat_rev.values > scalar + tm.assert_series_equal(res_rev, exp_rev) + tm.assert_numpy_array_equal(res_rev.values, exp_rev2) + + # Only categories with same categories can be compared + msg = "Categoricals can only be compared if 'categories' are the same" + with pytest.raises(TypeError, match=msg): + cat > cat_rev + + # categorical cannot be compared to Series or numpy array, and also + # not the other way around + msg = ( + "Cannot compare a Categorical for op __gt__ with type" + r" " + ) + with pytest.raises(TypeError, match=msg): + cat > s + with pytest.raises(TypeError, match=msg): + cat_rev > s + with pytest.raises(TypeError, match=msg): + cat > a + with pytest.raises(TypeError, match=msg): + cat_rev > a + + with pytest.raises(TypeError, match=msg): + s < cat + with pytest.raises(TypeError, match=msg): + s < cat_rev + + with pytest.raises(TypeError, match=msg): + a < cat + with pytest.raises(TypeError, match=msg): + a < cat_rev + + @pytest.mark.parametrize( + "ctor", + [ + lambda *args, **kwargs: Categorical(*args, **kwargs), + lambda *args, **kwargs: Series(Categorical(*args, **kwargs)), + ], + ) + def test_unordered_different_order_equal(self, ctor): + # https://github.com/pandas-dev/pandas/issues/16014 + c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False) + c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False) + assert (c1 == c2).all() + + c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False) + c2 = ctor(["b", "a"], categories=["b", "a"], ordered=False) + assert (c1 != c2).all() + + c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False) + c2 = ctor(["b", "b"], categories=["b", "a"], ordered=False) + assert (c1 != c2).all() + + c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False) + c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False) + result = c1 == c2 + tm.assert_numpy_array_equal(np.array(result), np.array([True, False])) + + def test_unordered_different_categories_raises(self): + c1 = Categorical(["a", "b"], categories=["a", "b"], ordered=False) + c2 = Categorical(["a", "c"], categories=["c", "a"], ordered=False) + + with pytest.raises(TypeError, match=("Categoricals can only be compared")): + c1 == c2 + + def test_compare_different_lengths(self): + c1 = Categorical([], categories=["a", "b"]) + c2 = Categorical([], categories=["a"]) + + msg = "Categories are different lengths" + with pytest.raises(TypeError, match=msg): + c1 == c2 + + def test_compare_unordered_different_order(self): + # https://github.com/pandas-dev/pandas/issues/16603#issuecomment- + # 349290078 + a = pd.Categorical(["a"], categories=["a", "b"]) + b = pd.Categorical(["b"], categories=["b", "a"]) + assert not a.equals(b) + + def test_numeric_like_ops(self): + + df = DataFrame({"value": np.random.randint(0, 10000, 100)}) + labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=["value"], ascending=True) + df["value_group"] = pd.cut( + df.value, range(0, 10500, 500), right=False, labels=cat_labels + ) + + # numeric ops should not succeed + for op, str_rep in [ + ("__add__", r"\+"), + ("__sub__", "-"), + ("__mul__", r"\*"), + ("__truediv__", "/"), + ]: + msg = r"Series cannot perform the operation {}|unsupported operand".format( + str_rep + ) + with pytest.raises(TypeError, match=msg): + getattr(df, op)(df) + + # reduction ops should not succeed (unless specifically defined, e.g. + # min/max) + s = df["value_group"] + for op in ["kurt", "skew", "var", "std", "mean", "sum", "median"]: + msg = "Categorical cannot perform the operation {}".format(op) + with pytest.raises(TypeError, match=msg): + getattr(s, op)(numeric_only=False) + + # mad technically works because it takes always the numeric data + + # numpy ops + s = Series(Categorical([1, 2, 3, 4])) + with pytest.raises( + TypeError, match="Categorical cannot perform the operation sum" + ): + np.sum(s) + + # numeric ops on a Series + for op, str_rep in [ + ("__add__", r"\+"), + ("__sub__", "-"), + ("__mul__", r"\*"), + ("__truediv__", "/"), + ]: + msg = r"Series cannot perform the operation {}|unsupported operand".format( + str_rep + ) + with pytest.raises(TypeError, match=msg): + getattr(s, op)(2) + + # invalid ufunc + msg = "Object with dtype category cannot perform the numpy op log" + with pytest.raises(TypeError, match=msg): + np.log(s) + + def test_contains(self): + # GH21508 + c = pd.Categorical(list("aabbca"), categories=list("cab")) + + assert "b" in c + assert "z" not in c + assert np.nan not in c + with pytest.raises(TypeError, match="unhashable type: 'list'"): + assert [1] in c + + # assert codes NOT in index + assert 0 not in c + assert 1 not in c + + c = pd.Categorical(list("aabbca") + [np.nan], categories=list("cab")) + assert np.nan in c + + @pytest.mark.parametrize( + "item, expected", + [ + (pd.Interval(0, 1), True), + (1.5, True), + (pd.Interval(0.5, 1.5), False), + ("a", False), + (pd.Timestamp(1), False), + (pd.Timedelta(1), False), + ], + ids=str, + ) + def test_contains_interval(self, item, expected): + # GH 23705 + cat = Categorical(pd.IntervalIndex.from_breaks(range(3))) + result = item in cat + assert result is expected + + def test_contains_list(self): + # GH#21729 + cat = Categorical([1, 2, 3]) + + assert "a" not in cat + + with pytest.raises(TypeError, match="unhashable type"): + ["a"] in cat + + with pytest.raises(TypeError, match="unhashable type"): + ["a", "b"] in cat diff --git a/pandas/tests/arrays/categorical/test_replace.py b/pandas/tests/arrays/categorical/test_replace.py new file mode 100644 index 00000000..52530123 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_replace.py @@ -0,0 +1,48 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "to_replace,value,expected,check_types,check_categorical", + [ + # one-to-one + (1, 2, [2, 2, 3], True, True), + (1, 4, [4, 2, 3], True, True), + (4, 1, [1, 2, 3], True, True), + (5, 6, [1, 2, 3], True, True), + # many-to-one + ([1], 2, [2, 2, 3], True, True), + ([1, 2], 3, [3, 3, 3], True, True), + ([1, 2], 4, [4, 4, 3], True, True), + ((1, 2, 4), 5, [5, 5, 3], True, True), + ((5, 6), 2, [1, 2, 3], True, True), + # many-to-many, handled outside of Categorical and results in separate dtype + ([1], [2], [2, 2, 3], False, False), + ([1, 4], [5, 2], [5, 2, 3], False, False), + # check_categorical sorts categories, which crashes on mixed dtypes + (3, "4", [1, 2, "4"], True, False), + ([1, 2, "3"], "5", ["5", "5", 3], True, False), + ], +) +def test_replace(to_replace, value, expected, check_types, check_categorical): + # GH 31720 + s = pd.Series([1, 2, 3], dtype="category") + result = s.replace(to_replace, value) + expected = pd.Series(expected, dtype="category") + s.replace(to_replace, value, inplace=True) + tm.assert_series_equal( + expected, + result, + check_dtype=check_types, + check_categorical=check_categorical, + check_category_order=False, + ) + tm.assert_series_equal( + expected, + s, + check_dtype=check_types, + check_categorical=check_categorical, + check_category_order=False, + ) diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py new file mode 100644 index 00000000..d08c4b47 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -0,0 +1,525 @@ +import numpy as np + +from pandas import ( + Categorical, + CategoricalIndex, + Series, + date_range, + option_context, + period_range, + timedelta_range, +) +from pandas.tests.arrays.categorical.common import TestCategorical + + +class TestCategoricalReprWithFactor(TestCategorical): + def test_print(self): + expected = ["[a, b, b, a, a, c, c, c]", "Categories (3, object): [a < b < c]"] + expected = "\n".join(expected) + actual = repr(self.factor) + assert actual == expected + + +class TestCategoricalRepr: + def test_big_print(self): + factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True) + expected = [ + "[a, b, c, a, b, ..., b, c, a, b, c]", + "Length: 600", + "Categories (3, object): [a, b, c]", + ] + expected = "\n".join(expected) + + actual = repr(factor) + + assert actual == expected + + def test_empty_print(self): + factor = Categorical([], ["a", "b", "c"]) + expected = "[], Categories (3, object): [a, b, c]" + actual = repr(factor) + assert actual == expected + + assert expected == actual + factor = Categorical([], ["a", "b", "c"], ordered=True) + expected = "[], Categories (3, object): [a < b < c]" + actual = repr(factor) + assert expected == actual + + factor = Categorical([], []) + expected = "[], Categories (0, object): []" + assert expected == repr(factor) + + def test_print_none_width(self): + # GH10087 + a = Series(Categorical([1, 2, 3, 4])) + exp = ( + "0 1\n1 2\n2 3\n3 4\n" + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]" + ) + + with option_context("display.width", None): + assert exp == repr(a) + + def test_unicode_print(self): + c = Categorical(["aaaaa", "bb", "cccc"] * 20) + expected = """\ +[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc] +Length: 60 +Categories (3, object): [aaaaa, bb, cccc]""" + + assert repr(c) == expected + + c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) + expected = """\ +[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] +Length: 60 +Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa + + assert repr(c) == expected + + # unicode option should not affect to Categorical, as it doesn't care + # the repr width + with option_context("display.unicode.east_asian_width", True): + + c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) + expected = """[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] +Length: 60 +Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa + + assert repr(c) == expected + + def test_categorical_repr(self): + c = Categorical([1, 2, 3]) + exp = """[1, 2, 3] +Categories (3, int64): [1, 2, 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) + exp = """[1, 2, 3, 1, 2, 3] +Categories (3, int64): [1, 2, 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 4, 5] * 10) + exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] +Length: 50 +Categories (5, int64): [1, 2, 3, 4, 5]""" + + assert repr(c) == exp + + c = Categorical(np.arange(20)) + exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] +Length: 20 +Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" + + assert repr(c) == exp + + def test_categorical_repr_ordered(self): + c = Categorical([1, 2, 3], ordered=True) + exp = """[1, 2, 3] +Categories (3, int64): [1 < 2 < 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True) + exp = """[1, 2, 3, 1, 2, 3] +Categories (3, int64): [1 < 2 < 3]""" + + assert repr(c) == exp + + c = Categorical([1, 2, 3, 4, 5] * 10, ordered=True) + exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] +Length: 50 +Categories (5, int64): [1 < 2 < 3 < 4 < 5]""" + + assert repr(c) == exp + + c = Categorical(np.arange(20), ordered=True) + exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] +Length: 20 +Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" + + assert repr(c) == exp + + def test_categorical_repr_datetime(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + c = Categorical(idx) + + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " + "2011-01-01 12:00:00, 2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]" + "" + ) + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " + "2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]" + ) + + assert repr(c) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + c = Categorical(idx) + exp = ( + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " + "2011-01-01 13:00:00-05:00]\n" + "Categories (5, datetime64[ns, US/Eastern]): " + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " " + "2011-01-01 13:00:00-05:00]" + ) + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = ( + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " + "2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, " + "2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, " + "2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n" + "Categories (5, datetime64[ns, US/Eastern]): " + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " " + "2011-01-01 13:00:00-05:00]" + ) + + assert repr(c) == exp + + def test_categorical_repr_datetime_ordered(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + c = Categorical(idx, ordered=True) + exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] +Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] +Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa + + assert repr(c) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + c = Categorical(idx, ordered=True) + exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < + 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < + 2011-01-01 13:00:00-05:00]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < + 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < + 2011-01-01 13:00:00-05:00]""" # noqa + + assert repr(c) == exp + + def test_categorical_repr_int_with_nan(self): + c = Categorical([1, 2, np.nan]) + c_exp = """[1, 2, NaN]\nCategories (2, int64): [1, 2]""" + assert repr(c) == c_exp + + s = Series([1, 2, np.nan], dtype="object").astype("category") + s_exp = """0 1\n1 2\n2 NaN +dtype: category +Categories (2, int64): [1, 2]""" + assert repr(s) == s_exp + + def test_categorical_repr_period(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + c = Categorical(idx) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" # noqa + + assert repr(c) == exp + + idx = period_range("2011-01", freq="M", periods=5) + c = Categorical(idx) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" # noqa + + assert repr(c) == exp + + def test_categorical_repr_period_ordered(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + c = Categorical(idx, ordered=True) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" # noqa + + assert repr(c) == exp + + idx = period_range("2011-01", freq="M", periods=5) + c = Categorical(idx, ordered=True) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" # noqa + + assert repr(c) == exp + + def test_categorical_repr_timedelta(self): + idx = timedelta_range("1 days", periods=5) + c = Categorical(idx) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" # noqa + + assert repr(c) == exp + + idx = timedelta_range("1 hours", periods=20) + c = Categorical(idx) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 20 +Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, + 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, + 18 days 01:00:00, 19 days 01:00:00]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 40 +Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, + 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, + 18 days 01:00:00, 19 days 01:00:00]""" # noqa + + assert repr(c) == exp + + def test_categorical_repr_timedelta_ordered(self): + idx = timedelta_range("1 days", periods=5) + c = Categorical(idx, ordered=True) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa + + assert repr(c) == exp + + idx = timedelta_range("1 hours", periods=20) + c = Categorical(idx, ordered=True) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 20 +Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < + 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < + 18 days 01:00:00 < 19 days 01:00:00]""" # noqa + + assert repr(c) == exp + + c = Categorical(idx.append(idx), categories=idx, ordered=True) + exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] +Length: 40 +Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < + 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < + 18 days 01:00:00 < 19 days 01:00:00]""" # noqa + + assert repr(c) == exp + + def test_categorical_index_repr(self): + idx = CategoricalIndex(Categorical([1, 2, 3])) + exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa + assert repr(idx) == exp + + i = CategoricalIndex(Categorical(np.arange(10))) + exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=False, dtype='category')""" # noqa + assert repr(i) == exp + + def test_categorical_index_repr_ordered(self): + i = CategoricalIndex(Categorical([1, 2, 3], ordered=True)) + exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa + assert repr(i) == exp + + i = CategoricalIndex(Categorical(np.arange(10), ordered=True)) + exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, 4, 5, 6, 7, ...], ordered=True, dtype='category')""" # noqa + assert repr(i) == exp + + def test_categorical_index_repr_datetime(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', + '2011-01-01 11:00:00', '2011-01-01 12:00:00', + '2011-01-01 13:00:00'], + categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" # noqa + + assert repr(i) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', + '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', + '2011-01-01 13:00:00-05:00'], + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" # noqa + + assert repr(i) == exp + + def test_categorical_index_repr_datetime_ordered(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00:00', '2011-01-01 10:00:00', + '2011-01-01 11:00:00', '2011-01-01 12:00:00', + '2011-01-01 13:00:00'], + categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" # noqa + + assert repr(i) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', + '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', + '2011-01-01 13:00:00-05:00'], + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa + + assert repr(i) == exp + + i = CategoricalIndex(Categorical(idx.append(idx), ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', + '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', + '2011-01-01 13:00:00-05:00', '2011-01-01 09:00:00-05:00', + '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', + '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], + categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" # noqa + + assert repr(i) == exp + + def test_categorical_index_repr_period(self): + # test all length + idx = period_range("2011-01-01 09:00", freq="H", periods=1) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00'], categories=[2011-01-01 09:00], ordered=False, dtype='category')""" # noqa + assert repr(i) == exp + + idx = period_range("2011-01-01 09:00", freq="H", periods=2) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00], ordered=False, dtype='category')""" # noqa + assert repr(i) == exp + + idx = period_range("2011-01-01 09:00", freq="H", periods=3) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00], ordered=False, dtype='category')""" # noqa + assert repr(i) == exp + + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', + '2011-01-01 12:00', '2011-01-01 13:00'], + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa + + assert repr(i) == exp + + i = CategoricalIndex(Categorical(idx.append(idx))) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', + '2011-01-01 12:00', '2011-01-01 13:00', '2011-01-01 09:00', + '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', + '2011-01-01 13:00'], + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" # noqa + + assert repr(i) == exp + + idx = period_range("2011-01", freq="M", periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=False, dtype='category')""" # noqa + assert repr(i) == exp + + def test_categorical_index_repr_period_ordered(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', + '2011-01-01 12:00', '2011-01-01 13:00'], + categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" # noqa + + assert repr(i) == exp + + idx = period_range("2011-01", freq="M", periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05'], categories=[2011-01, 2011-02, 2011-03, 2011-04, 2011-05], ordered=True, dtype='category')""" # noqa + assert repr(i) == exp + + def test_categorical_index_repr_timedelta(self): + idx = timedelta_range("1 days", periods=5) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=False, dtype='category')""" # noqa + assert repr(i) == exp + + idx = timedelta_range("1 hours", periods=10) + i = CategoricalIndex(Categorical(idx)) + exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', + '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', + '9 days 01:00:00'], + categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" # noqa + + assert repr(i) == exp + + def test_categorical_index_repr_timedelta_ordered(self): + idx = timedelta_range("1 days", periods=5) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" # noqa + assert repr(i) == exp + + idx = timedelta_range("1 hours", periods=10) + i = CategoricalIndex(Categorical(idx, ordered=True)) + exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', + '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', + '9 days 01:00:00'], + categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa + + assert repr(i) == exp diff --git a/pandas/tests/arrays/categorical/test_sorting.py b/pandas/tests/arrays/categorical/test_sorting.py new file mode 100644 index 00000000..2a0ef043 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_sorting.py @@ -0,0 +1,124 @@ +import numpy as np +import pytest + +from pandas import Categorical, Index +import pandas._testing as tm + + +class TestCategoricalSort: + def test_argsort(self): + c = Categorical([5, 3, 1, 4, 2], ordered=True) + + expected = np.array([2, 4, 1, 3, 0]) + tm.assert_numpy_array_equal( + c.argsort(ascending=True), expected, check_dtype=False + ) + + expected = expected[::-1] + tm.assert_numpy_array_equal( + c.argsort(ascending=False), expected, check_dtype=False + ) + + def test_numpy_argsort(self): + c = Categorical([5, 3, 1, 4, 2], ordered=True) + + expected = np.array([2, 4, 1, 3, 0]) + tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False) + + tm.assert_numpy_array_equal( + np.argsort(c, kind="mergesort"), expected, check_dtype=False + ) + + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(c, axis=0) + + msg = "the 'order' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(c, order="C") + + def test_sort_values(self): + + # unordered cats are sortable + cat = Categorical(["a", "b", "b", "a"], ordered=False) + cat.sort_values() + + cat = Categorical(["a", "c", "b", "d"], ordered=True) + + # sort_values + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=object) + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + cat = Categorical( + ["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True + ) + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=object) + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + res = cat.sort_values(ascending=False) + exp = np.array(["d", "c", "b", "a"], dtype=object) + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + # sort (inplace order) + cat1 = cat.copy() + cat1.sort_values(inplace=True) + exp = np.array(["a", "b", "c", "d"], dtype=object) + tm.assert_numpy_array_equal(cat1.__array__(), exp) + tm.assert_index_equal(res.categories, cat.categories) + + # reverse + cat = Categorical(["a", "c", "c", "b", "d"], ordered=True) + res = cat.sort_values(ascending=False) + exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(res.__array__(), exp_val) + tm.assert_index_equal(res.categories, exp_categories) + + def test_sort_values_na_position(self): + # see gh-12882 + cat = Categorical([5, 2, np.nan, 2, np.nan], ordered=True) + exp_categories = Index([2, 5]) + + exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) + res = cat.sort_values() # default arguments + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([np.nan, np.nan, 2.0, 2.0, 5.0]) + res = cat.sort_values(ascending=True, na_position="first") + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([np.nan, np.nan, 5.0, 2.0, 2.0]) + res = cat.sort_values(ascending=False, na_position="first") + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([2.0, 2.0, 5.0, np.nan, np.nan]) + res = cat.sort_values(ascending=True, na_position="last") + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + exp = np.array([5.0, 2.0, 2.0, np.nan, np.nan]) + res = cat.sort_values(ascending=False, na_position="last") + tm.assert_numpy_array_equal(res.__array__(), exp) + tm.assert_index_equal(res.categories, exp_categories) + + cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) + res = cat.sort_values(ascending=False, na_position="last") + exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(res.__array__(), exp_val) + tm.assert_index_equal(res.categories, exp_categories) + + cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) + res = cat.sort_values(ascending=False, na_position="first") + exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object) + exp_categories = Index(["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(res.__array__(), exp_val) + tm.assert_index_equal(res.categories, exp_categories) diff --git a/pandas/tests/arrays/categorical/test_subclass.py b/pandas/tests/arrays/categorical/test_subclass.py new file mode 100644 index 00000000..b80d0ff4 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_subclass.py @@ -0,0 +1,22 @@ +from pandas import Categorical +import pandas._testing as tm + + +class TestCategoricalSubclassing: + def test_constructor(self): + sc = tm.SubclassedCategorical(["a", "b", "c"]) + assert isinstance(sc, tm.SubclassedCategorical) + tm.assert_categorical_equal(sc, Categorical(["a", "b", "c"])) + + def test_from_codes(self): + sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ["a", "b", "c"]) + assert isinstance(sc, tm.SubclassedCategorical) + exp = Categorical.from_codes([1, 0, 2], ["a", "b", "c"]) + tm.assert_categorical_equal(sc, exp) + + def test_map(self): + sc = tm.SubclassedCategorical(["a", "b", "c"]) + res = sc.map(lambda x: x.upper()) + assert isinstance(res, tm.SubclassedCategorical) + exp = Categorical(["A", "B", "C"]) + tm.assert_categorical_equal(res, exp) diff --git a/pandas/tests/arrays/categorical/test_warnings.py b/pandas/tests/arrays/categorical/test_warnings.py new file mode 100644 index 00000000..9e164a25 --- /dev/null +++ b/pandas/tests/arrays/categorical/test_warnings.py @@ -0,0 +1,29 @@ +import pytest + +from pandas.util._test_decorators import async_mark + +import pandas._testing as tm + + +class TestCategoricalWarnings: + @async_mark() + async def test_tab_complete_warning(self, ip): + # https://github.com/pandas-dev/pandas/issues/16409 + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + code = "import pandas as pd; c = Categorical([])" + await ip.run_code(code) + + # GH 31324 newer jedi version raises Deprecation warning + import jedi + + if jedi.__version__ < "0.16.0": + warning = tm.assert_produces_warning(None) + else: + warning = tm.assert_produces_warning( + DeprecationWarning, check_stacklevel=False + ) + with warning: + with provisionalcompleter("ignore"): + list(ip.Completer.completions("c.", 1)) diff --git a/pandas/tests/arrays/interval/__init__.py b/pandas/tests/arrays/interval/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py new file mode 100644 index 00000000..a43ea7e4 --- /dev/null +++ b/pandas/tests/arrays/interval/test_interval.py @@ -0,0 +1,232 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Index, + Interval, + IntervalIndex, + Timedelta, + Timestamp, + date_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays import IntervalArray + + +@pytest.fixture( + params=[ + (Index([0, 2, 4]), Index([1, 3, 5])), + (Index([0.0, 1.0, 2.0]), Index([1.0, 2.0, 3.0])), + (timedelta_range("0 days", periods=3), timedelta_range("1 day", periods=3)), + (date_range("20170101", periods=3), date_range("20170102", periods=3)), + ( + date_range("20170101", periods=3, tz="US/Eastern"), + date_range("20170102", periods=3, tz="US/Eastern"), + ), + ], + ids=lambda x: str(x[0].dtype), +) +def left_right_dtypes(request): + """ + Fixture for building an IntervalArray from various dtypes + """ + return request.param + + +class TestAttributes: + @pytest.mark.parametrize( + "left, right", + [ + (0, 1), + (Timedelta("0 days"), Timedelta("1 day")), + (Timestamp("2018-01-01"), Timestamp("2018-01-02")), + ( + Timestamp("2018-01-01", tz="US/Eastern"), + Timestamp("2018-01-02", tz="US/Eastern"), + ), + ], + ) + @pytest.mark.parametrize("constructor", [IntervalArray, IntervalIndex]) + def test_is_empty(self, constructor, left, right, closed): + # GH27219 + tuples = [(left, left), (left, right), np.nan] + expected = np.array([closed != "both", False, False]) + result = constructor.from_tuples(tuples, closed=closed).is_empty + tm.assert_numpy_array_equal(result, expected) + + +class TestMethods: + @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) + def test_set_closed(self, closed, new_closed): + # GH 21670 + array = IntervalArray.from_breaks(range(10), closed=closed) + result = array.set_closed(new_closed) + expected = IntervalArray.from_breaks(range(10), closed=new_closed) + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [ + Interval(0, 1, closed="right"), + IntervalArray.from_breaks([1, 2, 3, 4], closed="right"), + ], + ) + def test_where_raises(self, other): + ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left")) + match = "'value.closed' is 'right', expected 'left'." + with pytest.raises(ValueError, match=match): + ser.where([True, False, True], other=other) + + def test_shift(self): + # https://github.com/pandas-dev/pandas/issues/31495 + a = IntervalArray.from_breaks([1, 2, 3]) + result = a.shift() + # int -> float + expected = IntervalArray.from_tuples([(np.nan, np.nan), (1.0, 2.0)]) + tm.assert_interval_array_equal(result, expected) + + def test_shift_datetime(self): + a = IntervalArray.from_breaks(pd.date_range("2000", periods=4)) + result = a.shift(2) + expected = a.take([-1, -1, 0], allow_fill=True) + tm.assert_interval_array_equal(result, expected) + + result = a.shift(-1) + expected = a.take([1, 2, -1], allow_fill=True) + tm.assert_interval_array_equal(result, expected) + + +class TestSetitem: + def test_set_na(self, left_right_dtypes): + left, right = left_right_dtypes + result = IntervalArray.from_arrays(left, right) + result[0] = np.nan + + expected_left = Index([left._na_value] + list(left[1:])) + expected_right = Index([right._na_value] + list(right[1:])) + expected = IntervalArray.from_arrays(expected_left, expected_right) + + tm.assert_extension_array_equal(result, expected) + + +def test_repr(): + # GH 25022 + arr = IntervalArray.from_tuples([(0, 1), (1, 2)]) + result = repr(arr) + expected = ( + "\n" + "[(0, 1], (1, 2]]\n" + "Length: 2, closed: right, dtype: interval[int64]" + ) + assert result == expected + + +# ---------------------------------------------------------------------------- +# Arrow interaction + + +pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.15.1.dev") + + +@pyarrow_skip +def test_arrow_extension_type(): + import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowIntervalType + + p1 = ArrowIntervalType(pa.int64(), "left") + p2 = ArrowIntervalType(pa.int64(), "left") + p3 = ArrowIntervalType(pa.int64(), "right") + + assert p1.closed == "left" + assert p1 == p2 + assert not p1 == p3 + assert hash(p1) == hash(p2) + assert not hash(p1) == hash(p3) + + +@pyarrow_skip +def test_arrow_array(): + import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowIntervalType + + intervals = pd.interval_range(1, 5, freq=1).array + + result = pa.array(intervals) + assert isinstance(result.type, ArrowIntervalType) + assert result.type.closed == intervals.closed + assert result.type.subtype == pa.int64() + assert result.storage.field("left").equals(pa.array([1, 2, 3, 4], type="int64")) + assert result.storage.field("right").equals(pa.array([2, 3, 4, 5], type="int64")) + + expected = pa.array([{"left": i, "right": i + 1} for i in range(1, 5)]) + assert result.storage.equals(expected) + + # convert to its storage type + result = pa.array(intervals, type=expected.type) + assert result.equals(expected) + + # unsupported conversions + with pytest.raises(TypeError): + pa.array(intervals, type="float64") + + with pytest.raises(TypeError, match="different 'subtype'"): + pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left")) + + +@pyarrow_skip +def test_arrow_array_missing(): + import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowIntervalType + + arr = IntervalArray.from_breaks([0, 1, 2, 3]) + arr[1] = None + + result = pa.array(arr) + assert isinstance(result.type, ArrowIntervalType) + assert result.type.closed == arr.closed + assert result.type.subtype == pa.float64() + + # fields have missing values (not NaN) + left = pa.array([0.0, None, 2.0], type="float64") + right = pa.array([1.0, None, 3.0], type="float64") + assert result.storage.field("left").equals(left) + assert result.storage.field("right").equals(right) + + # structarray itself also has missing values on the array level + vals = [ + {"left": 0.0, "right": 1.0}, + {"left": None, "right": None}, + {"left": 2.0, "right": 3.0}, + ] + expected = pa.StructArray.from_pandas(vals, mask=np.array([False, True, False])) + assert result.storage.equals(expected) + + +@pyarrow_skip +@pytest.mark.parametrize( + "breaks", + [[0, 1, 2, 3], pd.date_range("2017", periods=4, freq="D")], + ids=["int", "datetime64[ns]"], +) +def test_arrow_table_roundtrip(breaks): + import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowIntervalType + + arr = IntervalArray.from_breaks(breaks) + arr[1] = None + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + assert isinstance(table.field("a").type, ArrowIntervalType) + result = table.to_pandas() + assert isinstance(result["a"].dtype, pd.IntervalDtype) + tm.assert_frame_equal(result, df) + + table2 = pa.concat_tables([table, table]) + result = table2.to_pandas() + expected = pd.concat([df, df], ignore_index=True) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/arrays/interval/test_ops.py b/pandas/tests/arrays/interval/test_ops.py new file mode 100644 index 00000000..b4de80dc --- /dev/null +++ b/pandas/tests/arrays/interval/test_ops.py @@ -0,0 +1,88 @@ +"""Tests for Interval-Interval operations, such as overlaps, contains, etc.""" +import numpy as np +import pytest + +from pandas import Interval, IntervalIndex, Timedelta, Timestamp +import pandas._testing as tm +from pandas.core.arrays import IntervalArray + + +@pytest.fixture(params=[IntervalArray, IntervalIndex]) +def constructor(request): + """ + Fixture for testing both interval container classes. + """ + return request.param + + +@pytest.fixture( + params=[ + (Timedelta("0 days"), Timedelta("1 day")), + (Timestamp("2018-01-01"), Timedelta("1 day")), + (0, 1), + ], + ids=lambda x: type(x[0]).__name__, +) +def start_shift(request): + """ + Fixture for generating intervals of different types from a start value + and a shift value that can be added to start to generate an endpoint. + """ + return request.param + + +class TestOverlaps: + def test_overlaps_interval(self, constructor, start_shift, closed, other_closed): + start, shift = start_shift + interval = Interval(start, start + 3 * shift, other_closed) + + # intervals: identical, nested, spanning, partial, adjacent, disjoint + tuples = [ + (start, start + 3 * shift), + (start + shift, start + 2 * shift), + (start - shift, start + 4 * shift), + (start + 2 * shift, start + 4 * shift), + (start + 3 * shift, start + 4 * shift), + (start + 4 * shift, start + 5 * shift), + ] + interval_container = constructor.from_tuples(tuples, closed) + + adjacent = interval.closed_right and interval_container.closed_left + expected = np.array([True, True, True, True, adjacent, False]) + result = interval_container.overlaps(interval) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("other_constructor", [IntervalArray, IntervalIndex]) + def test_overlaps_interval_container(self, constructor, other_constructor): + # TODO: modify this test when implemented + interval_container = constructor.from_breaks(range(5)) + other_container = other_constructor.from_breaks(range(5)) + with pytest.raises(NotImplementedError): + interval_container.overlaps(other_container) + + def test_overlaps_na(self, constructor, start_shift): + """NA values are marked as False""" + start, shift = start_shift + interval = Interval(start, start + shift) + + tuples = [ + (start, start + shift), + np.nan, + (start + 2 * shift, start + 3 * shift), + ] + interval_container = constructor.from_tuples(tuples) + + expected = np.array([True, False, False]) + result = interval_container.overlaps(interval) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "other", + [10, True, "foo", Timedelta("1 day"), Timestamp("2018-01-01")], + ids=lambda x: type(x).__name__, + ) + def test_overlaps_invalid_type(self, constructor, other): + interval_container = constructor.from_breaks(range(5)) + msg = f"`other` must be Interval-like, got {type(other).__name__}" + with pytest.raises(TypeError, match=msg): + interval_container.overlaps(other) diff --git a/pandas/tests/arrays/sparse/__init__.py b/pandas/tests/arrays/sparse/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py new file mode 100644 index 00000000..d8a1831c --- /dev/null +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -0,0 +1,123 @@ +import string + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.sparse import SparseArray, SparseDtype + + +class TestSeriesAccessor: + # TODO: collect other Series accessor tests + def test_to_dense(self): + s = pd.Series([0, 1, 0, 10], dtype="Sparse[int64]") + result = s.sparse.to_dense() + expected = pd.Series([0, 1, 0, 10]) + tm.assert_series_equal(result, expected) + + +class TestFrameAccessor: + def test_accessor_raises(self): + df = pd.DataFrame({"A": [0, 1]}) + with pytest.raises(AttributeError, match="sparse"): + df.sparse + + @pytest.mark.parametrize("format", ["csc", "csr", "coo"]) + @pytest.mark.parametrize("labels", [None, list(string.ascii_letters[:10])]) + @pytest.mark.parametrize("dtype", ["float64", "int64"]) + @td.skip_if_no_scipy + def test_from_spmatrix(self, format, labels, dtype): + import scipy.sparse + + sp_dtype = SparseDtype(dtype, np.array(0, dtype=dtype).item()) + + mat = scipy.sparse.eye(10, format=format, dtype=dtype) + result = pd.DataFrame.sparse.from_spmatrix(mat, index=labels, columns=labels) + expected = pd.DataFrame( + np.eye(10, dtype=dtype), index=labels, columns=labels + ).astype(sp_dtype) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "columns", + [["a", "b"], pd.MultiIndex.from_product([["A"], ["a", "b"]]), ["a", "a"]], + ) + @td.skip_if_no_scipy + def test_from_spmatrix_columns(self, columns): + import scipy.sparse + + dtype = SparseDtype("float64", 0.0) + + mat = scipy.sparse.random(10, 2, density=0.5) + result = pd.DataFrame.sparse.from_spmatrix(mat, columns=columns) + expected = pd.DataFrame(mat.toarray(), columns=columns).astype(dtype) + tm.assert_frame_equal(result, expected) + + @td.skip_if_no_scipy + def test_to_coo(self): + import scipy.sparse + + df = pd.DataFrame({"A": [0, 1, 0], "B": [1, 0, 0]}, dtype="Sparse[int64, 0]") + result = df.sparse.to_coo() + expected = scipy.sparse.coo_matrix(np.asarray(df)) + assert (result != expected).nnz == 0 + + def test_to_dense(self): + df = pd.DataFrame( + { + "A": SparseArray([1, 0], dtype=SparseDtype("int64", 0)), + "B": SparseArray([1, 0], dtype=SparseDtype("int64", 1)), + "C": SparseArray([1.0, 0.0], dtype=SparseDtype("float64", 0.0)), + }, + index=["b", "a"], + ) + result = df.sparse.to_dense() + expected = pd.DataFrame( + {"A": [1, 0], "B": [1, 0], "C": [1.0, 0.0]}, index=["b", "a"] + ) + tm.assert_frame_equal(result, expected) + + def test_density(self): + df = pd.DataFrame( + { + "A": SparseArray([1, 0, 2, 1], fill_value=0), + "B": SparseArray([0, 1, 1, 1], fill_value=0), + } + ) + res = df.sparse.density + expected = 0.75 + assert res == expected + + @pytest.mark.parametrize("dtype", ["int64", "float64"]) + @pytest.mark.parametrize("dense_index", [True, False]) + @td.skip_if_no_scipy + def test_series_from_coo(self, dtype, dense_index): + import scipy.sparse + + A = scipy.sparse.eye(3, format="coo", dtype=dtype) + result = pd.Series.sparse.from_coo(A, dense_index=dense_index) + index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index) + if dense_index: + expected = expected.reindex(pd.MultiIndex.from_product(index.levels)) + + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_series_from_coo_incorrect_format_raises(self): + # gh-26554 + import scipy.sparse + + m = scipy.sparse.csr_matrix(np.array([[0, 1], [0, 0]])) + with pytest.raises( + TypeError, match="Expected coo_matrix. Got csr_matrix instead." + ): + pd.Series.sparse.from_coo(m) + + def test_with_column_named_sparse(self): + # https://github.com/pandas-dev/pandas/issues/30758 + df = pd.DataFrame({"sparse": pd.arrays.SparseArray([1, 2])}) + assert isinstance(df.sparse, pd.core.arrays.sparse.accessor.SparseFrameAccessor) diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py new file mode 100644 index 00000000..bf7d275e --- /dev/null +++ b/pandas/tests/arrays/sparse/test_arithmetics.py @@ -0,0 +1,495 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core import ops +from pandas.core.arrays.sparse import SparseArray, SparseDtype + + +@pytest.fixture(params=["integer", "block"]) +def kind(request): + """kind kwarg to pass to SparseArray/SparseSeries""" + return request.param + + +@pytest.fixture(params=[True, False]) +def mix(request): + # whether to operate op(sparse, dense) instead of op(sparse, sparse) + return request.param + + +class TestSparseArrayArithmetics: + + _base = np.array + _klass = SparseArray + + def _assert(self, a, b): + tm.assert_numpy_array_equal(a, b) + + def _check_numeric_ops(self, a, b, a_dense, b_dense, mix, op): + with np.errstate(invalid="ignore", divide="ignore"): + if op in [operator.floordiv, ops.rfloordiv]: + # FIXME: GH#13843 + if self._base == pd.Series and a.dtype.subtype == np.dtype("int64"): + pytest.xfail("Not defined/working. See GH#13843") + + if mix: + result = op(a, b_dense).to_dense() + else: + result = op(a, b).to_dense() + + if op in [operator.truediv, ops.rtruediv]: + # pandas uses future division + expected = op(a_dense * 1.0, b_dense) + else: + expected = op(a_dense, b_dense) + + if op in [operator.floordiv, ops.rfloordiv]: + # Series sets 1//0 to np.inf, which SparseArray does not do (yet) + mask = np.isinf(expected) + if mask.any(): + expected[mask] = np.nan + + self._assert(result, expected) + + def _check_bool_result(self, res): + assert isinstance(res, self._klass) + assert isinstance(res.dtype, SparseDtype) + assert res.dtype.subtype == np.bool + assert isinstance(res.fill_value, bool) + + def _check_comparison_ops(self, a, b, a_dense, b_dense): + with np.errstate(invalid="ignore"): + # Unfortunately, trying to wrap the computation of each expected + # value is with np.errstate() is too tedious. + # + # sparse & sparse + self._check_bool_result(a == b) + self._assert((a == b).to_dense(), a_dense == b_dense) + + self._check_bool_result(a != b) + self._assert((a != b).to_dense(), a_dense != b_dense) + + self._check_bool_result(a >= b) + self._assert((a >= b).to_dense(), a_dense >= b_dense) + + self._check_bool_result(a <= b) + self._assert((a <= b).to_dense(), a_dense <= b_dense) + + self._check_bool_result(a > b) + self._assert((a > b).to_dense(), a_dense > b_dense) + + self._check_bool_result(a < b) + self._assert((a < b).to_dense(), a_dense < b_dense) + + # sparse & dense + self._check_bool_result(a == b_dense) + self._assert((a == b_dense).to_dense(), a_dense == b_dense) + + self._check_bool_result(a != b_dense) + self._assert((a != b_dense).to_dense(), a_dense != b_dense) + + self._check_bool_result(a >= b_dense) + self._assert((a >= b_dense).to_dense(), a_dense >= b_dense) + + self._check_bool_result(a <= b_dense) + self._assert((a <= b_dense).to_dense(), a_dense <= b_dense) + + self._check_bool_result(a > b_dense) + self._assert((a > b_dense).to_dense(), a_dense > b_dense) + + self._check_bool_result(a < b_dense) + self._assert((a < b_dense).to_dense(), a_dense < b_dense) + + def _check_logical_ops(self, a, b, a_dense, b_dense): + # sparse & sparse + self._check_bool_result(a & b) + self._assert((a & b).to_dense(), a_dense & b_dense) + + self._check_bool_result(a | b) + self._assert((a | b).to_dense(), a_dense | b_dense) + # sparse & dense + self._check_bool_result(a & b_dense) + self._assert((a & b_dense).to_dense(), a_dense & b_dense) + + self._check_bool_result(a | b_dense) + self._assert((a | b_dense).to_dense(), a_dense | b_dense) + + @pytest.mark.parametrize("scalar", [0, 1, 3]) + @pytest.mark.parametrize("fill_value", [None, 0, 2]) + def test_float_scalar( + self, kind, mix, all_arithmetic_functions, fill_value, scalar + ): + op = all_arithmetic_functions + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + + a = self._klass(values, kind=kind, fill_value=fill_value) + self._check_numeric_ops(a, scalar, values, scalar, mix, op) + + def test_float_scalar_comparison(self, kind): + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + + a = self._klass(values, kind=kind) + self._check_comparison_ops(a, 1, values, 1) + self._check_comparison_ops(a, 0, values, 0) + self._check_comparison_ops(a, 3, values, 3) + + a = self._klass(values, kind=kind, fill_value=0) + self._check_comparison_ops(a, 1, values, 1) + self._check_comparison_ops(a, 0, values, 0) + self._check_comparison_ops(a, 3, values, 3) + + a = self._klass(values, kind=kind, fill_value=2) + self._check_comparison_ops(a, 1, values, 1) + self._check_comparison_ops(a, 0, values, 0) + self._check_comparison_ops(a, 3, values, 3) + + def test_float_same_index(self, kind, mix, all_arithmetic_functions): + # when sp_index are the same + op = all_arithmetic_functions + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan]) + + a = self._klass(values, kind=kind) + b = self._klass(rvalues, kind=kind) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + values = self._base([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0]) + rvalues = self._base([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0]) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind, fill_value=0) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_float_same_index_comparison(self, kind): + # when sp_index are the same + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([np.nan, 2, 3, 4, np.nan, 0, 1, 3, 2, np.nan]) + + a = self._klass(values, kind=kind) + b = self._klass(rvalues, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + + values = self._base([0.0, 1.0, 2.0, 6.0, 0.0, 0.0, 1.0, 2.0, 1.0, 0.0]) + rvalues = self._base([0.0, 2.0, 3.0, 4.0, 0.0, 0.0, 1.0, 3.0, 2.0, 0.0]) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind, fill_value=0) + self._check_comparison_ops(a, b, values, rvalues) + + def test_float_array(self, kind, mix, all_arithmetic_functions): + op = all_arithmetic_functions + + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) + + a = self._klass(values, kind=kind) + b = self._klass(rvalues, kind=kind) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind, fill_value=0) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, kind=kind, fill_value=1) + b = self._klass(rvalues, kind=kind, fill_value=2) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_float_array_different_kind(self, mix, all_arithmetic_functions): + op = all_arithmetic_functions + + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) + + a = self._klass(values, kind="integer") + b = self._klass(rvalues, kind="block") + self._check_numeric_ops(a, b, values, rvalues, mix, op) + self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) + + a = self._klass(values, kind="integer", fill_value=0) + b = self._klass(rvalues, kind="block") + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, kind="integer", fill_value=0) + b = self._klass(rvalues, kind="block", fill_value=0) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, kind="integer", fill_value=1) + b = self._klass(rvalues, kind="block", fill_value=2) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_float_array_comparison(self, kind): + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([2, np.nan, 2, 3, np.nan, 0, 1, 5, 2, np.nan]) + + a = self._klass(values, kind=kind) + b = self._klass(rvalues, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + self._check_comparison_ops(a, b * 0, values, rvalues * 0) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind, fill_value=0) + self._check_comparison_ops(a, b, values, rvalues) + + a = self._klass(values, kind=kind, fill_value=1) + b = self._klass(rvalues, kind=kind, fill_value=2) + self._check_comparison_ops(a, b, values, rvalues) + + def test_int_array(self, kind, mix, all_arithmetic_functions): + op = all_arithmetic_functions + + # have to specify dtype explicitly until fixing GH 667 + dtype = np.int64 + + values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype) + rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype) + + a = self._klass(values, dtype=dtype, kind=kind) + assert a.dtype == SparseDtype(dtype) + b = self._klass(rvalues, dtype=dtype, kind=kind) + assert b.dtype == SparseDtype(dtype) + + self._check_numeric_ops(a, b, values, rvalues, mix, op) + self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) + + a = self._klass(values, fill_value=0, dtype=dtype, kind=kind) + assert a.dtype == SparseDtype(dtype) + b = self._klass(rvalues, dtype=dtype, kind=kind) + assert b.dtype == SparseDtype(dtype) + + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, fill_value=0, dtype=dtype, kind=kind) + assert a.dtype == SparseDtype(dtype) + b = self._klass(rvalues, fill_value=0, dtype=dtype, kind=kind) + assert b.dtype == SparseDtype(dtype) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, fill_value=1, dtype=dtype, kind=kind) + assert a.dtype == SparseDtype(dtype, fill_value=1) + b = self._klass(rvalues, fill_value=2, dtype=dtype, kind=kind) + assert b.dtype == SparseDtype(dtype, fill_value=2) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_int_array_comparison(self, kind): + dtype = "int64" + # int32 NI ATM + + values = self._base([0, 1, 2, 0, 0, 0, 1, 2, 1, 0], dtype=dtype) + rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=dtype) + + a = self._klass(values, dtype=dtype, kind=kind) + b = self._klass(rvalues, dtype=dtype, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + self._check_comparison_ops(a, b * 0, values, rvalues * 0) + + a = self._klass(values, dtype=dtype, kind=kind, fill_value=0) + b = self._klass(rvalues, dtype=dtype, kind=kind) + self._check_comparison_ops(a, b, values, rvalues) + + a = self._klass(values, dtype=dtype, kind=kind, fill_value=0) + b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=0) + self._check_comparison_ops(a, b, values, rvalues) + + a = self._klass(values, dtype=dtype, kind=kind, fill_value=1) + b = self._klass(rvalues, dtype=dtype, kind=kind, fill_value=2) + self._check_comparison_ops(a, b, values, rvalues) + + @pytest.mark.parametrize("fill_value", [True, False, np.nan]) + def test_bool_same_index(self, kind, fill_value): + # GH 14000 + # when sp_index are the same + values = self._base([True, False, True, True], dtype=np.bool) + rvalues = self._base([True, False, True, True], dtype=np.bool) + + a = self._klass(values, kind=kind, dtype=np.bool, fill_value=fill_value) + b = self._klass(rvalues, kind=kind, dtype=np.bool, fill_value=fill_value) + self._check_logical_ops(a, b, values, rvalues) + + @pytest.mark.parametrize("fill_value", [True, False, np.nan]) + def test_bool_array_logical(self, kind, fill_value): + # GH 14000 + # when sp_index are the same + values = self._base([True, False, True, False, True, True], dtype=np.bool) + rvalues = self._base([True, False, False, True, False, True], dtype=np.bool) + + a = self._klass(values, kind=kind, dtype=np.bool, fill_value=fill_value) + b = self._klass(rvalues, kind=kind, dtype=np.bool, fill_value=fill_value) + self._check_logical_ops(a, b, values, rvalues) + + def test_mixed_array_float_int(self, kind, mix, all_arithmetic_functions): + op = all_arithmetic_functions + + rdtype = "int64" + + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) + + a = self._klass(values, kind=kind) + b = self._klass(rvalues, kind=kind) + assert b.dtype == SparseDtype(rdtype) + + self._check_numeric_ops(a, b, values, rvalues, mix, op) + self._check_numeric_ops(a, b * 0, values, rvalues * 0, mix, op) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind) + assert b.dtype == SparseDtype(rdtype) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind, fill_value=0) + assert b.dtype == SparseDtype(rdtype) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + a = self._klass(values, kind=kind, fill_value=1) + b = self._klass(rvalues, kind=kind, fill_value=2) + assert b.dtype == SparseDtype(rdtype, fill_value=2) + self._check_numeric_ops(a, b, values, rvalues, mix, op) + + def test_mixed_array_comparison(self, kind): + rdtype = "int64" + # int32 NI ATM + + values = self._base([np.nan, 1, 2, 0, np.nan, 0, 1, 2, 1, np.nan]) + rvalues = self._base([2, 0, 2, 3, 0, 0, 1, 5, 2, 0], dtype=rdtype) + + a = self._klass(values, kind=kind) + b = self._klass(rvalues, kind=kind) + assert b.dtype == SparseDtype(rdtype) + + self._check_comparison_ops(a, b, values, rvalues) + self._check_comparison_ops(a, b * 0, values, rvalues * 0) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind) + assert b.dtype == SparseDtype(rdtype) + self._check_comparison_ops(a, b, values, rvalues) + + a = self._klass(values, kind=kind, fill_value=0) + b = self._klass(rvalues, kind=kind, fill_value=0) + assert b.dtype == SparseDtype(rdtype) + self._check_comparison_ops(a, b, values, rvalues) + + a = self._klass(values, kind=kind, fill_value=1) + b = self._klass(rvalues, kind=kind, fill_value=2) + assert b.dtype == SparseDtype(rdtype, fill_value=2) + self._check_comparison_ops(a, b, values, rvalues) + + def test_xor(self): + s = SparseArray([True, True, False, False]) + t = SparseArray([True, False, True, False]) + result = s ^ t + sp_index = pd.core.arrays.sparse.IntIndex(4, np.array([0, 1, 2], dtype="int32")) + expected = SparseArray([False, True, True], sparse_index=sp_index) + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize("op", [operator.eq, operator.add]) +def test_with_list(op): + arr = SparseArray([0, 1], fill_value=0) + result = op(arr, [0, 1]) + expected = op(arr, SparseArray([0, 1])) + tm.assert_sp_array_equal(result, expected) + + +def test_with_dataframe(): + # GH#27910 + arr = SparseArray([0, 1], fill_value=0) + df = pd.DataFrame([[1, 2], [3, 4]]) + result = arr.__add__(df) + assert result is NotImplemented + + +def test_with_zerodim_ndarray(): + # GH#27910 + arr = SparseArray([0, 1], fill_value=0) + + result = arr * np.array(2) + expected = arr * 2 + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.abs, np.exp]) +@pytest.mark.parametrize( + "arr", [SparseArray([0, 0, -1, 1]), SparseArray([None, None, -1, 1])] +) +def test_ufuncs(ufunc, arr): + result = ufunc(arr) + fill_value = ufunc(arr.fill_value) + expected = SparseArray(ufunc(np.asarray(arr)), fill_value=fill_value) + tm.assert_sp_array_equal(result, expected) + + +@pytest.mark.parametrize( + "a, b", + [ + (SparseArray([0, 0, 0]), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + (SparseArray([0, 0, 0], fill_value=1), np.array([0, 1, 2])), + ], +) +@pytest.mark.parametrize("ufunc", [np.add, np.greater]) +def test_binary_ufuncs(ufunc, a, b): + # can't say anything about fill value here. + result = ufunc(a, b) + expected = ufunc(np.asarray(a), np.asarray(b)) + assert isinstance(result, SparseArray) + tm.assert_numpy_array_equal(np.asarray(result), expected) + + +def test_ndarray_inplace(): + sparray = SparseArray([0, 2, 0, 0]) + ndarray = np.array([0, 1, 2, 3]) + ndarray += sparray + expected = np.array([0, 3, 2, 3]) + tm.assert_numpy_array_equal(ndarray, expected) + + +def test_sparray_inplace(): + sparray = SparseArray([0, 2, 0, 0]) + ndarray = np.array([0, 1, 2, 3]) + sparray += ndarray + expected = SparseArray([0, 3, 2, 3], fill_value=0) + tm.assert_sp_array_equal(sparray, expected) + + +@pytest.mark.parametrize("fill_value", [True, False]) +def test_invert(fill_value): + arr = np.array([True, False, False, True]) + sparray = SparseArray(arr, fill_value=fill_value) + result = ~sparray + expected = SparseArray(~arr, fill_value=not fill_value) + tm.assert_sp_array_equal(result, expected) + + result = ~pd.Series(sparray) + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) + + result = ~pd.DataFrame({"A": sparray}) + expected = pd.DataFrame({"A": expected}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("fill_value", [0, np.nan]) +@pytest.mark.parametrize("op", [operator.pos, operator.neg]) +def test_unary_op(op, fill_value): + arr = np.array([0, 1, np.nan, 2]) + sparray = SparseArray(arr, fill_value=fill_value) + result = op(sparray) + expected = SparseArray(op(arr), fill_value=op(fill_value)) + tm.assert_sp_array_equal(result, expected) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py new file mode 100644 index 00000000..baca1823 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_array.py @@ -0,0 +1,1249 @@ +import operator +import re +import warnings + +import numpy as np +import pytest + +from pandas._libs.sparse import IntIndex +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import isna +import pandas._testing as tm +from pandas.core.arrays.sparse import SparseArray, SparseDtype + + +@pytest.fixture(params=["integer", "block"]) +def kind(request): + return request.param + + +class TestSparseArray: + def setup_method(self, method): + self.arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6]) + self.arr = SparseArray(self.arr_data) + self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0) + + def test_constructor_dtype(self): + arr = SparseArray([np.nan, 1, 2, np.nan]) + assert arr.dtype == SparseDtype(np.float64, np.nan) + assert arr.dtype.subtype == np.float64 + assert np.isnan(arr.fill_value) + + arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0) + assert arr.dtype == SparseDtype(np.float64, 0) + assert arr.fill_value == 0 + + arr = SparseArray([0, 1, 2, 4], dtype=np.float64) + assert arr.dtype == SparseDtype(np.float64, np.nan) + assert np.isnan(arr.fill_value) + + arr = SparseArray([0, 1, 2, 4], dtype=np.int64) + assert arr.dtype == SparseDtype(np.int64, 0) + assert arr.fill_value == 0 + + arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64) + assert arr.dtype == SparseDtype(np.int64, 0) + assert arr.fill_value == 0 + + arr = SparseArray([0, 1, 2, 4], dtype=None) + assert arr.dtype == SparseDtype(np.int64, 0) + assert arr.fill_value == 0 + + arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None) + assert arr.dtype == SparseDtype(np.int64, 0) + assert arr.fill_value == 0 + + def test_constructor_dtype_str(self): + result = SparseArray([1, 2, 3], dtype="int") + expected = SparseArray([1, 2, 3], dtype=int) + tm.assert_sp_array_equal(result, expected) + + def test_constructor_sparse_dtype(self): + result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1)) + expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64) + tm.assert_sp_array_equal(result, expected) + assert result.sp_values.dtype == np.dtype("int64") + + def test_constructor_sparse_dtype_str(self): + result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]") + expected = SparseArray([1, 0, 0, 1], dtype=np.int32) + tm.assert_sp_array_equal(result, expected) + assert result.sp_values.dtype == np.dtype("int32") + + def test_constructor_object_dtype(self): + # GH 11856 + arr = SparseArray(["A", "A", np.nan, "B"], dtype=np.object) + assert arr.dtype == SparseDtype(np.object) + assert np.isnan(arr.fill_value) + + arr = SparseArray(["A", "A", np.nan, "B"], dtype=np.object, fill_value="A") + assert arr.dtype == SparseDtype(np.object, "A") + assert arr.fill_value == "A" + + # GH 17574 + data = [False, 0, 100.0, 0.0] + arr = SparseArray(data, dtype=np.object, fill_value=False) + assert arr.dtype == SparseDtype(np.object, False) + assert arr.fill_value is False + arr_expected = np.array(data, dtype=np.object) + it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected)) + assert np.fromiter(it, dtype=np.bool).all() + + @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int]) + def test_constructor_na_dtype(self, dtype): + with pytest.raises(ValueError, match="Cannot convert"): + SparseArray([0, 1, np.nan], dtype=dtype) + + def test_constructor_spindex_dtype(self): + arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2])) + # XXX: Behavior change: specifying SparseIndex no longer changes the + # fill_value + expected = SparseArray([0, 1, 2, 0], kind="integer") + tm.assert_sp_array_equal(arr, expected) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + arr = SparseArray( + data=[1, 2, 3], + sparse_index=IntIndex(4, [1, 2, 3]), + dtype=np.int64, + fill_value=0, + ) + exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + arr = SparseArray( + data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64 + ) + exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + arr = SparseArray( + data=[1, 2, 3], + sparse_index=IntIndex(4, [1, 2, 3]), + dtype=None, + fill_value=0, + ) + exp = SparseArray([0, 1, 2, 3], dtype=None) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + @pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])]) + def test_constructor_spindex_dtype_scalar(self, sparse_index): + # scalar input + arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None) + exp = SparseArray([1], dtype=None) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None) + exp = SparseArray([1], dtype=None) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + def test_constructor_spindex_dtype_scalar_broadcasts(self): + arr = SparseArray( + data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None + ) + exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None) + tm.assert_sp_array_equal(arr, exp) + assert arr.dtype == SparseDtype(np.int64) + assert arr.fill_value == 0 + + @pytest.mark.parametrize( + "data, fill_value", + [ + (np.array([1, 2]), 0), + (np.array([1.0, 2.0]), np.nan), + ([True, False], False), + ([pd.Timestamp("2017-01-01")], pd.NaT), + ], + ) + def test_constructor_inferred_fill_value(self, data, fill_value): + result = SparseArray(data).fill_value + + if pd.isna(fill_value): + assert pd.isna(result) + else: + assert result == fill_value + + @pytest.mark.parametrize("format", ["coo", "csc", "csr"]) + @pytest.mark.parametrize( + "size", + [pytest.param(0, marks=td.skip_if_np_lt("1.16", reason="NumPy-11383")), 10], + ) + @td.skip_if_no_scipy + def test_from_spmatrix(self, size, format): + import scipy.sparse + + mat = scipy.sparse.random(size, 1, density=0.5, format=format) + result = SparseArray.from_spmatrix(mat) + + result = np.asarray(result) + expected = mat.toarray().ravel() + tm.assert_numpy_array_equal(result, expected) + + @td.skip_if_no_scipy + def test_from_spmatrix_raises(self): + import scipy.sparse + + mat = scipy.sparse.eye(5, 4, format="csc") + + with pytest.raises(ValueError, match="not '4'"): + SparseArray.from_spmatrix(mat) + + @pytest.mark.parametrize( + "scalar,dtype", + [ + (False, SparseDtype(bool, False)), + (0.0, SparseDtype("float64", 0)), + (1, SparseDtype("int64", 1)), + ("z", SparseDtype("object", "z")), + ], + ) + def test_scalar_with_index_infer_dtype(self, scalar, dtype): + # GH 19163 + arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar) + exp = SparseArray([scalar, scalar, scalar], fill_value=scalar) + + tm.assert_sp_array_equal(arr, exp) + + assert arr.dtype == dtype + assert exp.dtype == dtype + + def test_get_item(self): + + assert np.isnan(self.arr[1]) + assert self.arr[2] == 1 + assert self.arr[7] == 5 + + assert self.zarr[0] == 0 + assert self.zarr[2] == 1 + assert self.zarr[7] == 5 + + errmsg = re.compile("bounds") + + with pytest.raises(IndexError, match=errmsg): + self.arr[11] + + with pytest.raises(IndexError, match=errmsg): + self.arr[-11] + + assert self.arr[-1] == self.arr[len(self.arr) - 1] + + def test_take_scalar_raises(self): + msg = "'indices' must be an array, not a scalar '2'." + with pytest.raises(ValueError, match=msg): + self.arr.take(2) + + def test_take(self): + exp = SparseArray(np.take(self.arr_data, [2, 3])) + tm.assert_sp_array_equal(self.arr.take([2, 3]), exp) + + exp = SparseArray(np.take(self.arr_data, [0, 1, 2])) + tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), exp) + + def test_take_fill_value(self): + data = np.array([1, np.nan, 0, 3, 0]) + sparse = SparseArray(data, fill_value=0) + + exp = SparseArray(np.take(data, [0]), fill_value=0) + tm.assert_sp_array_equal(sparse.take([0]), exp) + + exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0) + tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp) + + def test_take_negative(self): + exp = SparseArray(np.take(self.arr_data, [-1])) + tm.assert_sp_array_equal(self.arr.take([-1]), exp) + + exp = SparseArray(np.take(self.arr_data, [-4, -3, -2])) + tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp) + + @pytest.mark.parametrize("fill_value", [0, None, np.nan]) + def test_shift_fill_value(self, fill_value): + # GH #24128 + sparse = SparseArray(np.array([1, 0, 0, 3, 0]), fill_value=8.0) + res = sparse.shift(1, fill_value=fill_value) + if isna(fill_value): + fill_value = res.dtype.na_value + exp = SparseArray(np.array([fill_value, 1, 0, 0, 3]), fill_value=8.0) + tm.assert_sp_array_equal(res, exp) + + def test_bad_take(self): + with pytest.raises(IndexError, match="bounds"): + self.arr.take([11]) + + def test_take_filling(self): + # similar tests as GH 12631 + sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4]) + result = sparse.take(np.array([1, 0, -1])) + expected = SparseArray([np.nan, np.nan, 4]) + tm.assert_sp_array_equal(result, expected) + + # XXX: test change: fill_value=True -> allow_fill=True + result = sparse.take(np.array([1, 0, -1]), allow_fill=True) + expected = SparseArray([np.nan, np.nan, np.nan]) + tm.assert_sp_array_equal(result, expected) + + # allow_fill=False + result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = SparseArray([np.nan, np.nan, 4]) + tm.assert_sp_array_equal(result, expected) + + msg = "Invalid value in 'indices'" + with pytest.raises(ValueError, match=msg): + sparse.take(np.array([1, 0, -2]), allow_fill=True) + + with pytest.raises(ValueError, match=msg): + sparse.take(np.array([1, 0, -5]), allow_fill=True) + + msg = "out of bounds value in 'indices'" + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, -6])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5]), allow_fill=True) + + def test_take_filling_fill_value(self): + # same tests as GH 12631 + sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0) + result = sparse.take(np.array([1, 0, -1])) + expected = SparseArray([0, np.nan, 4], fill_value=0) + tm.assert_sp_array_equal(result, expected) + + # fill_value + result = sparse.take(np.array([1, 0, -1]), allow_fill=True) + # XXX: behavior change. + # the old way of filling self.fill_value doesn't follow EA rules. + # It's supposed to be self.dtype.na_value (nan in this case) + expected = SparseArray([0, np.nan, np.nan], fill_value=0) + tm.assert_sp_array_equal(result, expected) + + # allow_fill=False + result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = SparseArray([0, np.nan, 4], fill_value=0) + tm.assert_sp_array_equal(result, expected) + + msg = "Invalid value in 'indices'." + with pytest.raises(ValueError, match=msg): + sparse.take(np.array([1, 0, -2]), allow_fill=True) + with pytest.raises(ValueError, match=msg): + sparse.take(np.array([1, 0, -5]), allow_fill=True) + + msg = "out of bounds value in 'indices'" + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, -6])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5]), fill_value=True) + + def test_take_filling_all_nan(self): + sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan]) + # XXX: did the default kind from take change? + result = sparse.take(np.array([1, 0, -1])) + expected = SparseArray([np.nan, np.nan, np.nan], kind="block") + tm.assert_sp_array_equal(result, expected) + + result = sparse.take(np.array([1, 0, -1]), fill_value=True) + expected = SparseArray([np.nan, np.nan, np.nan], kind="block") + tm.assert_sp_array_equal(result, expected) + + msg = "out of bounds value in 'indices'" + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, -6])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5])) + with pytest.raises(IndexError, match=msg): + sparse.take(np.array([1, 5]), fill_value=True) + + def test_set_item(self): + def setitem(): + self.arr[5] = 3 + + def setslice(): + self.arr[1:5] = 2 + + with pytest.raises(TypeError, match="assignment via setitem"): + setitem() + + with pytest.raises(TypeError, match="assignment via setitem"): + setslice() + + def test_constructor_from_too_large_array(self): + with pytest.raises(TypeError, match="expected dimension <= 1 data"): + SparseArray(np.arange(10).reshape((2, 5))) + + def test_constructor_from_sparse(self): + res = SparseArray(self.zarr) + assert res.fill_value == 0 + tm.assert_almost_equal(res.sp_values, self.zarr.sp_values) + + def test_constructor_copy(self): + cp = SparseArray(self.arr, copy=True) + cp.sp_values[:3] = 0 + assert not (self.arr.sp_values[:3] == 0).any() + + not_copy = SparseArray(self.arr) + not_copy.sp_values[:3] = 0 + assert (self.arr.sp_values[:3] == 0).all() + + def test_constructor_bool(self): + # GH 10648 + data = np.array([False, False, True, True, False, False]) + arr = SparseArray(data, fill_value=False, dtype=bool) + + assert arr.dtype == SparseDtype(bool) + tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True])) + # Behavior change: np.asarray densifies. + # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) + tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32)) + + dense = arr.to_dense() + assert dense.dtype == bool + tm.assert_numpy_array_equal(dense, data) + + def test_constructor_bool_fill_value(self): + arr = SparseArray([True, False, True], dtype=None) + assert arr.dtype == SparseDtype(np.bool) + assert not arr.fill_value + + arr = SparseArray([True, False, True], dtype=np.bool) + assert arr.dtype == SparseDtype(np.bool) + assert not arr.fill_value + + arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True) + assert arr.dtype == SparseDtype(np.bool, True) + assert arr.fill_value + + def test_constructor_float32(self): + # GH 10648 + data = np.array([1.0, np.nan, 3], dtype=np.float32) + arr = SparseArray(data, dtype=np.float32) + + assert arr.dtype == SparseDtype(np.float32) + tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32)) + # Behavior change: np.asarray densifies. + # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr)) + tm.assert_numpy_array_equal( + arr.sp_index.indices, np.array([0, 2], dtype=np.int32) + ) + + dense = arr.to_dense() + assert dense.dtype == np.float32 + tm.assert_numpy_array_equal(dense, data) + + def test_astype(self): + # float -> float + arr = SparseArray([None, None, 0, 2]) + result = arr.astype("Sparse[float32]") + expected = SparseArray([None, None, 0, 2], dtype=np.dtype("float32")) + tm.assert_sp_array_equal(result, expected) + + dtype = SparseDtype("float64", fill_value=0) + result = arr.astype(dtype) + expected = SparseArray._simple_new( + np.array([0.0, 2.0], dtype=dtype.subtype), IntIndex(4, [2, 3]), dtype + ) + tm.assert_sp_array_equal(result, expected) + + dtype = SparseDtype("int64", 0) + result = arr.astype(dtype) + expected = SparseArray._simple_new( + np.array([0, 2], dtype=np.int64), IntIndex(4, [2, 3]), dtype + ) + tm.assert_sp_array_equal(result, expected) + + arr = SparseArray([0, np.nan, 0, 1], fill_value=0) + with pytest.raises(ValueError, match="NA"): + arr.astype("Sparse[i8]") + + def test_astype_bool(self): + a = SparseArray([1, 0, 0, 1], dtype=SparseDtype(int, 0)) + result = a.astype(bool) + expected = SparseArray([True, 0, 0, True], dtype=SparseDtype(bool, 0)) + tm.assert_sp_array_equal(result, expected) + + # update fill value + result = a.astype(SparseDtype(bool, False)) + expected = SparseArray( + [True, False, False, True], dtype=SparseDtype(bool, False) + ) + tm.assert_sp_array_equal(result, expected) + + def test_astype_all(self, any_real_dtype): + vals = np.array([1, 2, 3]) + arr = SparseArray(vals, fill_value=1) + typ = np.dtype(any_real_dtype) + res = arr.astype(typ) + assert res.dtype == SparseDtype(typ, 1) + assert res.sp_values.dtype == typ + + tm.assert_numpy_array_equal(np.asarray(res.to_dense()), vals.astype(typ)) + + @pytest.mark.parametrize( + "array, dtype, expected", + [ + ( + SparseArray([0, 1]), + "float", + SparseArray([0.0, 1.0], dtype=SparseDtype(float, 0.0)), + ), + (SparseArray([0, 1]), bool, SparseArray([False, True])), + ( + SparseArray([0, 1], fill_value=1), + bool, + SparseArray([False, True], dtype=SparseDtype(bool, True)), + ), + pytest.param( + SparseArray([0, 1]), + "datetime64[ns]", + SparseArray( + np.array([0, 1], dtype="datetime64[ns]"), + dtype=SparseDtype("datetime64[ns]", pd.Timestamp("1970")), + ), + marks=[pytest.mark.xfail(reason="NumPy-7619")], + ), + ( + SparseArray([0, 1, 10]), + str, + SparseArray(["0", "1", "10"], dtype=SparseDtype(str, "0")), + ), + (SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])), + ( + SparseArray([0, 1, 0]), + object, + SparseArray([0, 1, 0], dtype=SparseDtype(object, 0)), + ), + ], + ) + def test_astype_more(self, array, dtype, expected): + result = array.astype(dtype) + tm.assert_sp_array_equal(result, expected) + + def test_astype_nan_raises(self): + arr = SparseArray([1.0, np.nan]) + with pytest.raises(ValueError, match="Cannot convert non-finite"): + arr.astype(int) + + def test_set_fill_value(self): + arr = SparseArray([1.0, np.nan, 2.0], fill_value=np.nan) + arr.fill_value = 2 + assert arr.fill_value == 2 + + arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64) + arr.fill_value = 2 + assert arr.fill_value == 2 + + # XXX: this seems fine? You can construct an integer + # sparsearray with NaN fill value, why not update one? + # coerces to int + # msg = "unable to set fill_value 3\\.1 to int64 dtype" + # with pytest.raises(ValueError, match=msg): + arr.fill_value = 3.1 + assert arr.fill_value == 3.1 + + # msg = "unable to set fill_value nan to int64 dtype" + # with pytest.raises(ValueError, match=msg): + arr.fill_value = np.nan + assert np.isnan(arr.fill_value) + + arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool) + arr.fill_value = True + assert arr.fill_value + + # coerces to bool + # msg = "unable to set fill_value 0 to bool dtype" + # with pytest.raises(ValueError, match=msg): + arr.fill_value = 0 + assert arr.fill_value == 0 + + # msg = "unable to set fill_value nan to bool dtype" + # with pytest.raises(ValueError, match=msg): + arr.fill_value = np.nan + assert np.isnan(arr.fill_value) + + @pytest.mark.parametrize("val", [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]) + def test_set_fill_invalid_non_scalar(self, val): + arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool) + msg = "fill_value must be a scalar" + + with pytest.raises(ValueError, match=msg): + arr.fill_value = val + + def test_copy(self): + arr2 = self.arr.copy() + assert arr2.sp_values is not self.arr.sp_values + assert arr2.sp_index is self.arr.sp_index + + def test_values_asarray(self): + tm.assert_almost_equal(self.arr.to_dense(), self.arr_data) + + @pytest.mark.parametrize( + "data,shape,dtype", + [ + ([0, 0, 0, 0, 0], (5,), None), + ([], (0,), None), + ([0], (1,), None), + (["A", "A", np.nan, "B"], (4,), np.object), + ], + ) + def test_shape(self, data, shape, dtype): + # GH 21126 + out = SparseArray(data, dtype=dtype) + assert out.shape == shape + + @pytest.mark.parametrize( + "vals", + [ + [np.nan, np.nan, np.nan, np.nan, np.nan], + [1, np.nan, np.nan, 3, np.nan], + [1, np.nan, 0, 3, 0], + ], + ) + @pytest.mark.parametrize("fill_value", [None, 0]) + def test_dense_repr(self, vals, fill_value): + vals = np.array(vals) + arr = SparseArray(vals, fill_value=fill_value) + + res = arr.to_dense() + tm.assert_numpy_array_equal(res, vals) + + res2 = arr._internal_get_values() + + tm.assert_numpy_array_equal(res2, vals) + + def test_getitem(self): + def _checkit(i): + tm.assert_almost_equal(self.arr[i], self.arr.to_dense()[i]) + + for i in range(len(self.arr)): + _checkit(i) + _checkit(-i) + + def test_getitem_arraylike_mask(self): + arr = SparseArray([0, 1, 2]) + result = arr[[True, False, True]] + expected = SparseArray([0, 2]) + tm.assert_sp_array_equal(result, expected) + + def test_getslice(self): + result = self.arr[:-3] + exp = SparseArray(self.arr.to_dense()[:-3]) + tm.assert_sp_array_equal(result, exp) + + result = self.arr[-4:] + exp = SparseArray(self.arr.to_dense()[-4:]) + tm.assert_sp_array_equal(result, exp) + + # two corner cases from Series + result = self.arr[-12:] + exp = SparseArray(self.arr) + tm.assert_sp_array_equal(result, exp) + + result = self.arr[:-12] + exp = SparseArray(self.arr.to_dense()[:0]) + tm.assert_sp_array_equal(result, exp) + + def test_getslice_tuple(self): + dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) + + sparse = SparseArray(dense) + res = sparse[ + 4:, + ] # noqa: E231 + exp = SparseArray(dense[4:,]) # noqa: E231 + tm.assert_sp_array_equal(res, exp) + + sparse = SparseArray(dense, fill_value=0) + res = sparse[ + 4:, + ] # noqa: E231 + exp = SparseArray(dense[4:,], fill_value=0) # noqa: E231 + tm.assert_sp_array_equal(res, exp) + + msg = "too many indices for array" + with pytest.raises(IndexError, match=msg): + sparse[4:, :] + + with pytest.raises(IndexError, match=msg): + # check numpy compat + dense[4:, :] + + def test_boolean_slice_empty(self): + arr = SparseArray([0, 1, 2]) + res = arr[[False, False, False]] + assert res.dtype == arr.dtype + + @pytest.mark.parametrize("op", ["add", "sub", "mul", "truediv", "floordiv", "pow"]) + def test_binary_operators(self, op): + op = getattr(operator, op) + data1 = np.random.randn(20) + data2 = np.random.randn(20) + + data1[::2] = np.nan + data2[::3] = np.nan + + arr1 = SparseArray(data1) + arr2 = SparseArray(data2) + + data1[::2] = 3 + data2[::3] = 3 + farr1 = SparseArray(data1, fill_value=3) + farr2 = SparseArray(data2, fill_value=3) + + def _check_op(op, first, second): + res = op(first, second) + exp = SparseArray( + op(first.to_dense(), second.to_dense()), fill_value=first.fill_value + ) + assert isinstance(res, SparseArray) + tm.assert_almost_equal(res.to_dense(), exp.to_dense()) + + res2 = op(first, second.to_dense()) + assert isinstance(res2, SparseArray) + tm.assert_sp_array_equal(res, res2) + + res3 = op(first.to_dense(), second) + assert isinstance(res3, SparseArray) + tm.assert_sp_array_equal(res, res3) + + res4 = op(first, 4) + assert isinstance(res4, SparseArray) + + # Ignore this if the actual op raises (e.g. pow). + try: + exp = op(first.to_dense(), 4) + exp_fv = op(first.fill_value, 4) + except ValueError: + pass + else: + tm.assert_almost_equal(res4.fill_value, exp_fv) + tm.assert_almost_equal(res4.to_dense(), exp) + + with np.errstate(all="ignore"): + for first_arr, second_arr in [(arr1, arr2), (farr1, farr2)]: + _check_op(op, first_arr, second_arr) + + def test_pickle(self): + def _check_roundtrip(obj): + unpickled = tm.round_trip_pickle(obj) + tm.assert_sp_array_equal(unpickled, obj) + + _check_roundtrip(self.arr) + _check_roundtrip(self.zarr) + + def test_generator_warnings(self): + sp_arr = SparseArray([1, 2, 3]) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings(action="always", category=DeprecationWarning) + warnings.filterwarnings(action="always", category=PendingDeprecationWarning) + for _ in sp_arr: + pass + assert len(w) == 0 + + def test_fillna(self): + s = SparseArray([1, np.nan, np.nan, 3, np.nan]) + res = s.fillna(-1) + exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([1, np.nan, 0, 3, 0]) + res = s.fillna(-1) + exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([np.nan, np.nan, np.nan, np.nan]) + res = s.fillna(-1) + exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0) + res = s.fillna(-1) + exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + # float dtype's fill_value is np.nan, replaced by -1 + s = SparseArray([0.0, 0.0, 0.0, 0.0]) + res = s.fillna(-1) + exp = SparseArray([0.0, 0.0, 0.0, 0.0], fill_value=-1) + tm.assert_sp_array_equal(res, exp) + + # int dtype shouldn't have missing. No changes. + s = SparseArray([0, 0, 0, 0]) + assert s.dtype == SparseDtype(np.int64) + assert s.fill_value == 0 + res = s.fillna(-1) + tm.assert_sp_array_equal(res, s) + + s = SparseArray([0, 0, 0, 0], fill_value=0) + assert s.dtype == SparseDtype(np.int64) + assert s.fill_value == 0 + res = s.fillna(-1) + exp = SparseArray([0, 0, 0, 0], fill_value=0) + tm.assert_sp_array_equal(res, exp) + + # fill_value can be nan if there is no missing hole. + # only fill_value will be changed + s = SparseArray([0, 0, 0, 0], fill_value=np.nan) + assert s.dtype == SparseDtype(np.int64, fill_value=np.nan) + assert np.isnan(s.fill_value) + res = s.fillna(-1) + exp = SparseArray([0, 0, 0, 0], fill_value=-1) + tm.assert_sp_array_equal(res, exp) + + def test_fillna_overlap(self): + s = SparseArray([1, np.nan, np.nan, 3, np.nan]) + # filling with existing value doesn't replace existing value with + # fill_value, i.e. existing 3 remains in sp_values + res = s.fillna(3) + exp = np.array([1, 3, 3, 3, 3], dtype=np.float64) + tm.assert_numpy_array_equal(res.to_dense(), exp) + + s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0) + res = s.fillna(3) + exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64) + tm.assert_sp_array_equal(res, exp) + + def test_nonzero(self): + # Tests regression #21172. + sa = SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) + expected = np.array([2, 5, 9], dtype=np.int32) + (result,) = sa.nonzero() + tm.assert_numpy_array_equal(expected, result) + + sa = SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) + (result,) = sa.nonzero() + tm.assert_numpy_array_equal(expected, result) + + +class TestSparseArrayAnalytics: + @pytest.mark.parametrize( + "data,pos,neg", + [ + ([True, True, True], True, False), + ([1, 2, 1], 1, 0), + ([1.0, 2.0, 1.0], 1.0, 0.0), + ], + ) + def test_all(self, data, pos, neg): + # GH 17570 + out = SparseArray(data).all() + assert out + + out = SparseArray(data, fill_value=pos).all() + assert out + + data[1] = neg + out = SparseArray(data).all() + assert not out + + out = SparseArray(data, fill_value=pos).all() + assert not out + + @pytest.mark.parametrize( + "data,pos,neg", + [ + ([True, True, True], True, False), + ([1, 2, 1], 1, 0), + ([1.0, 2.0, 1.0], 1.0, 0.0), + ], + ) + @td.skip_if_np_lt("1.15") # prior didn't dispatch + def test_numpy_all(self, data, pos, neg): + # GH 17570 + out = np.all(SparseArray(data)) + assert out + + out = np.all(SparseArray(data, fill_value=pos)) + assert out + + data[1] = neg + out = np.all(SparseArray(data)) + assert not out + + out = np.all(SparseArray(data, fill_value=pos)) + assert not out + + # raises with a different message on py2. + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.all(SparseArray(data), out=np.array([])) + + @pytest.mark.parametrize( + "data,pos,neg", + [ + ([False, True, False], True, False), + ([0, 2, 0], 2, 0), + ([0.0, 2.0, 0.0], 2.0, 0.0), + ], + ) + def test_any(self, data, pos, neg): + # GH 17570 + out = SparseArray(data).any() + assert out + + out = SparseArray(data, fill_value=pos).any() + assert out + + data[1] = neg + out = SparseArray(data).any() + assert not out + + out = SparseArray(data, fill_value=pos).any() + assert not out + + @pytest.mark.parametrize( + "data,pos,neg", + [ + ([False, True, False], True, False), + ([0, 2, 0], 2, 0), + ([0.0, 2.0, 0.0], 2.0, 0.0), + ], + ) + @td.skip_if_np_lt("1.15") # prior didn't dispatch + def test_numpy_any(self, data, pos, neg): + # GH 17570 + out = np.any(SparseArray(data)) + assert out + + out = np.any(SparseArray(data, fill_value=pos)) + assert out + + data[1] = neg + out = np.any(SparseArray(data)) + assert not out + + out = np.any(SparseArray(data, fill_value=pos)) + assert not out + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.any(SparseArray(data), out=out) + + def test_sum(self): + data = np.arange(10).astype(float) + out = SparseArray(data).sum() + assert out == 45.0 + + data[5] = np.nan + out = SparseArray(data, fill_value=2).sum() + assert out == 40.0 + + out = SparseArray(data, fill_value=np.nan).sum() + assert out == 40.0 + + def test_numpy_sum(self): + data = np.arange(10).astype(float) + out = np.sum(SparseArray(data)) + assert out == 45.0 + + data[5] = np.nan + out = np.sum(SparseArray(data, fill_value=2)) + assert out == 40.0 + + out = np.sum(SparseArray(data, fill_value=np.nan)) + assert out == 40.0 + + msg = "the 'dtype' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.sum(SparseArray(data), dtype=np.int64) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.sum(SparseArray(data), out=out) + + @pytest.mark.parametrize( + "data,expected", + [ + ( + np.array([1, 2, 3, 4, 5], dtype=float), # non-null data + SparseArray(np.array([1.0, 3.0, 6.0, 10.0, 15.0])), + ), + ( + np.array([1, 2, np.nan, 4, 5], dtype=float), # null data + SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0])), + ), + ], + ) + @pytest.mark.parametrize("numpy", [True, False]) + def test_cumsum(self, data, expected, numpy): + cumsum = np.cumsum if numpy else lambda s: s.cumsum() + + out = cumsum(SparseArray(data)) + tm.assert_sp_array_equal(out, expected) + + out = cumsum(SparseArray(data, fill_value=np.nan)) + tm.assert_sp_array_equal(out, expected) + + out = cumsum(SparseArray(data, fill_value=2)) + tm.assert_sp_array_equal(out, expected) + + if numpy: # numpy compatibility checks. + msg = "the 'dtype' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.cumsum(SparseArray(data), dtype=np.int64) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.cumsum(SparseArray(data), out=out) + else: + axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid. + msg = re.escape(f"axis(={axis}) out of bounds") + with pytest.raises(ValueError, match=msg): + SparseArray(data).cumsum(axis=axis) + + def test_mean(self): + data = np.arange(10).astype(float) + out = SparseArray(data).mean() + assert out == 4.5 + + data[5] = np.nan + out = SparseArray(data).mean() + assert out == 40.0 / 9 + + def test_numpy_mean(self): + data = np.arange(10).astype(float) + out = np.mean(SparseArray(data)) + assert out == 4.5 + + data[5] = np.nan + out = np.mean(SparseArray(data)) + assert out == 40.0 / 9 + + msg = "the 'dtype' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.mean(SparseArray(data), dtype=np.int64) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.mean(SparseArray(data), out=out) + + def test_ufunc(self): + # GH 13853 make sure ufunc is applied to fill_value + sparse = SparseArray([1, np.nan, 2, np.nan, -2]) + result = SparseArray([1, np.nan, 2, np.nan, 2]) + tm.assert_sp_array_equal(abs(sparse), result) + tm.assert_sp_array_equal(np.abs(sparse), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=1) + result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index, fill_value=1) + tm.assert_sp_array_equal(abs(sparse), result) + tm.assert_sp_array_equal(np.abs(sparse), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=-1) + result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index, fill_value=1) + tm.assert_sp_array_equal(abs(sparse), result) + tm.assert_sp_array_equal(np.abs(sparse), result) + + sparse = SparseArray([1, np.nan, 2, np.nan, -2]) + result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2])) + tm.assert_sp_array_equal(np.sin(sparse), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=1) + result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1)) + tm.assert_sp_array_equal(np.sin(sparse), result) + + sparse = SparseArray([1, -1, 0, -2], fill_value=0) + result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0)) + tm.assert_sp_array_equal(np.sin(sparse), result) + + def test_ufunc_args(self): + # GH 13853 make sure ufunc is applied to fill_value, including its arg + sparse = SparseArray([1, np.nan, 2, np.nan, -2]) + result = SparseArray([2, np.nan, 3, np.nan, -1]) + tm.assert_sp_array_equal(np.add(sparse, 1), result) + + sparse = SparseArray([1, -1, 2, -2], fill_value=1) + result = SparseArray([2, 0, 3, -1], fill_value=2) + tm.assert_sp_array_equal(np.add(sparse, 1), result) + + sparse = SparseArray([1, -1, 0, -2], fill_value=0) + result = SparseArray([2, 0, 1, -1], fill_value=1) + tm.assert_sp_array_equal(np.add(sparse, 1), result) + + @pytest.mark.parametrize("fill_value", [0.0, np.nan]) + def test_modf(self, fill_value): + # https://github.com/pandas-dev/pandas/issues/26946 + sparse = SparseArray([fill_value] * 10 + [1.1, 2.2], fill_value=fill_value) + r1, r2 = np.modf(sparse) + e1, e2 = np.modf(np.asarray(sparse)) + tm.assert_sp_array_equal(r1, SparseArray(e1, fill_value=fill_value)) + tm.assert_sp_array_equal(r2, SparseArray(e2, fill_value=fill_value)) + + def test_nbytes_integer(self): + arr = SparseArray([1, 0, 0, 0, 2], kind="integer") + result = arr.nbytes + # (2 * 8) + 2 * 4 + assert result == 24 + + def test_nbytes_block(self): + arr = SparseArray([1, 2, 0, 0, 0], kind="block") + result = arr.nbytes + # (2 * 8) + 4 + 4 + # sp_values, blocs, blenghts + assert result == 24 + + def test_asarray_datetime64(self): + s = SparseArray(pd.to_datetime(["2012", None, None, "2013"])) + np.asarray(s) + + def test_density(self): + arr = SparseArray([0, 1]) + assert arr.density == 0.5 + + def test_npoints(self): + arr = SparseArray([0, 1]) + assert arr.npoints == 1 + + +class TestAccessor: + @pytest.mark.parametrize("attr", ["npoints", "density", "fill_value", "sp_values"]) + def test_get_attributes(self, attr): + arr = SparseArray([0, 1]) + ser = pd.Series(arr) + + result = getattr(ser.sparse, attr) + expected = getattr(arr, attr) + assert result == expected + + @td.skip_if_no_scipy + def test_from_coo(self): + import scipy.sparse + + row = [0, 3, 1, 0] + col = [0, 3, 1, 2] + data = [4, 5, 7, 9] + sp_array = scipy.sparse.coo_matrix((data, (row, col))) + result = pd.Series.sparse.from_coo(sp_array) + + index = pd.MultiIndex.from_arrays([[0, 0, 1, 3], [0, 2, 1, 3]]) + expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]") + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_to_coo(self): + import scipy.sparse + + ser = pd.Series( + [1, 2, 3], + index=pd.MultiIndex.from_product([[0], [1, 2, 3]], names=["a", "b"]), + dtype="Sparse[int]", + ) + A, _, _ = ser.sparse.to_coo() + assert isinstance(A, scipy.sparse.coo.coo_matrix) + + def test_non_sparse_raises(self): + ser = pd.Series([1, 2, 3]) + with pytest.raises(AttributeError, match=".sparse"): + ser.sparse.density + + +def test_setting_fill_value_fillna_still_works(): + # This is why letting users update fill_value / dtype is bad + # astype has the same problem. + arr = SparseArray([1.0, np.nan, 1.0], fill_value=0.0) + arr.fill_value = np.nan + result = arr.isna() + # Can't do direct comparison, since the sp_index will be different + # So let's convert to ndarray and check there. + result = np.asarray(result) + + expected = np.array([False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + +def test_setting_fill_value_updates(): + arr = SparseArray([0.0, np.nan], fill_value=0) + arr.fill_value = np.nan + # use private constructor to get the index right + # otherwise both nans would be un-stored. + expected = SparseArray._simple_new( + sparse_array=np.array([np.nan]), + sparse_index=IntIndex(2, [1]), + dtype=SparseDtype(float, np.nan), + ) + tm.assert_sp_array_equal(arr, expected) + + +@pytest.mark.parametrize( + "arr, loc", + [ + ([None, 1, 2], 0), + ([0, None, 2], 1), + ([0, 1, None], 2), + ([0, 1, 1, None, None], 3), + ([1, 1, 1, 2], -1), + ([], -1), + ], +) +def test_first_fill_value_loc(arr, loc): + result = SparseArray(arr)._first_fill_value_loc() + assert result == loc + + +@pytest.mark.parametrize( + "arr", [[1, 2, np.nan, np.nan], [1, np.nan, 2, np.nan], [1, 2, np.nan]] +) +@pytest.mark.parametrize("fill_value", [np.nan, 0, 1]) +def test_unique_na_fill(arr, fill_value): + a = SparseArray(arr, fill_value=fill_value).unique() + b = pd.Series(arr).unique() + assert isinstance(a, SparseArray) + a = np.asarray(a) + tm.assert_numpy_array_equal(a, b) + + +def test_unique_all_sparse(): + # https://github.com/pandas-dev/pandas/issues/23168 + arr = SparseArray([0, 0]) + result = arr.unique() + expected = SparseArray([0]) + tm.assert_sp_array_equal(result, expected) + + +def test_map(): + arr = SparseArray([0, 1, 2]) + expected = SparseArray([10, 11, 12], fill_value=10) + + # dict + result = arr.map({0: 10, 1: 11, 2: 12}) + tm.assert_sp_array_equal(result, expected) + + # series + result = arr.map(pd.Series({0: 10, 1: 11, 2: 12})) + tm.assert_sp_array_equal(result, expected) + + # function + result = arr.map(pd.Series({0: 10, 1: 11, 2: 12})) + expected = SparseArray([10, 11, 12], fill_value=10) + tm.assert_sp_array_equal(result, expected) + + +def test_map_missing(): + arr = SparseArray([0, 1, 2]) + expected = SparseArray([10, 11, None], fill_value=10) + + result = arr.map({0: 10, 1: 11}) + tm.assert_sp_array_equal(result, expected) diff --git a/pandas/tests/arrays/sparse/test_combine_concat.py b/pandas/tests/arrays/sparse/test_combine_concat.py new file mode 100644 index 00000000..f1697dc9 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_combine_concat.py @@ -0,0 +1,31 @@ +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.arrays.sparse import SparseArray + + +class TestSparseArrayConcat: + @pytest.mark.parametrize("kind", ["integer", "block"]) + def test_basic(self, kind): + a = SparseArray([1, 0, 0, 2], kind=kind) + b = SparseArray([1, 0, 2, 2], kind=kind) + + result = SparseArray._concat_same_type([a, b]) + # Can't make any assertions about the sparse index itself + # since we aren't don't merge sparse blocs across arrays + # in to_concat + expected = np.array([1, 2, 1, 2, 2], dtype="int64") + tm.assert_numpy_array_equal(result.sp_values, expected) + assert result.kind == kind + + @pytest.mark.parametrize("kind", ["integer", "block"]) + def test_uses_first_kind(self, kind): + other = "integer" if kind == "block" else "block" + a = SparseArray([1, 0, 0, 2], kind=kind) + b = SparseArray([1, 0, 2, 2], kind=other) + + result = SparseArray._concat_same_type([a, b]) + expected = np.array([1, 2, 1, 2, 2], dtype="int64") + tm.assert_numpy_array_equal(result.sp_values, expected) + assert result.kind == kind diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py new file mode 100644 index 00000000..5e9e2d85 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -0,0 +1,198 @@ +import re + +import numpy as np +import pytest + +import pandas as pd +from pandas.core.arrays.sparse import SparseDtype + + +@pytest.mark.parametrize( + "dtype, fill_value", + [ + ("int", 0), + ("float", np.nan), + ("bool", False), + ("object", np.nan), + ("datetime64[ns]", pd.NaT), + ("timedelta64[ns]", pd.NaT), + ], +) +def test_inferred_dtype(dtype, fill_value): + sparse_dtype = SparseDtype(dtype) + result = sparse_dtype.fill_value + if pd.isna(fill_value): + assert pd.isna(result) and type(result) == type(fill_value) + else: + assert result == fill_value + + +def test_from_sparse_dtype(): + dtype = SparseDtype("float", 0) + result = SparseDtype(dtype) + assert result.fill_value == 0 + + +def test_from_sparse_dtype_fill_value(): + dtype = SparseDtype("int", 1) + result = SparseDtype(dtype, fill_value=2) + expected = SparseDtype("int", 2) + assert result == expected + + +@pytest.mark.parametrize( + "dtype, fill_value", + [ + ("int", None), + ("float", None), + ("bool", None), + ("object", None), + ("datetime64[ns]", None), + ("timedelta64[ns]", None), + ("int", np.nan), + ("float", 0), + ], +) +def test_equal(dtype, fill_value): + a = SparseDtype(dtype, fill_value) + b = SparseDtype(dtype, fill_value) + assert a == b + assert b == a + + +def test_nans_equal(): + a = SparseDtype(float, float("nan")) + b = SparseDtype(float, np.nan) + assert a == b + assert b == a + + +@pytest.mark.parametrize( + "a, b", + [ + (SparseDtype("float64"), SparseDtype("float32")), + (SparseDtype("float64"), SparseDtype("float64", 0)), + (SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)), + (SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)), + (SparseDtype("float64"), np.dtype("float64")), + ], +) +def test_not_equal(a, b): + assert a != b + + +def test_construct_from_string_raises(): + with pytest.raises( + TypeError, match="Cannot construct a 'SparseDtype' from 'not a dtype'" + ): + SparseDtype.construct_from_string("not a dtype") + + +@pytest.mark.parametrize( + "dtype, expected", + [ + (SparseDtype(int), True), + (SparseDtype(float), True), + (SparseDtype(bool), True), + (SparseDtype(object), False), + (SparseDtype(str), False), + ], +) +def test_is_numeric(dtype, expected): + assert dtype._is_numeric is expected + + +def test_str_uses_object(): + result = SparseDtype(str).subtype + assert result == np.dtype("object") + + +@pytest.mark.parametrize( + "string, expected", + [ + ("Sparse[float64]", SparseDtype(np.dtype("float64"))), + ("Sparse[float32]", SparseDtype(np.dtype("float32"))), + ("Sparse[int]", SparseDtype(np.dtype("int"))), + ("Sparse[str]", SparseDtype(np.dtype("str"))), + ("Sparse[datetime64[ns]]", SparseDtype(np.dtype("datetime64[ns]"))), + ("Sparse", SparseDtype(np.dtype("float"), np.nan)), + ], +) +def test_construct_from_string(string, expected): + result = SparseDtype.construct_from_string(string) + assert result == expected + + +@pytest.mark.parametrize( + "a, b, expected", + [ + (SparseDtype(float, 0.0), SparseDtype(np.dtype("float"), 0.0), True), + (SparseDtype(int, 0), SparseDtype(int, 0), True), + (SparseDtype(float, float("nan")), SparseDtype(float, np.nan), True), + (SparseDtype(float, 0), SparseDtype(float, np.nan), False), + (SparseDtype(int, 0.0), SparseDtype(float, 0.0), False), + ], +) +def test_hash_equal(a, b, expected): + result = a == b + assert result is expected + + result = hash(a) == hash(b) + assert result is expected + + +@pytest.mark.parametrize( + "string, expected", + [ + ("Sparse[int]", "int"), + ("Sparse[int, 0]", "int"), + ("Sparse[int64]", "int64"), + ("Sparse[int64, 0]", "int64"), + ("Sparse[datetime64[ns], 0]", "datetime64[ns]"), + ], +) +def test_parse_subtype(string, expected): + subtype, _ = SparseDtype._parse_subtype(string) + assert subtype == expected + + +@pytest.mark.parametrize( + "string", ["Sparse[int, 1]", "Sparse[float, 0.0]", "Sparse[bool, True]"] +) +def test_construct_from_string_fill_value_raises(string): + with pytest.raises(TypeError, match="fill_value in the string is not"): + SparseDtype.construct_from_string(string) + + +@pytest.mark.parametrize( + "original, dtype, expected", + [ + (SparseDtype(int, 0), float, SparseDtype(float, 0.0)), + (SparseDtype(int, 1), float, SparseDtype(float, 1.0)), + (SparseDtype(int, 1), str, SparseDtype(object, "1")), + (SparseDtype(float, 1.5), int, SparseDtype(int, 1)), + ], +) +def test_update_dtype(original, dtype, expected): + result = original.update_dtype(dtype) + assert result == expected + + +@pytest.mark.parametrize( + "original, dtype, expected_error_msg", + [ + ( + SparseDtype(float, np.nan), + int, + re.escape("Cannot convert non-finite values (NA or inf) to integer"), + ), + ( + SparseDtype(str, "abc"), + int, + re.escape("invalid literal for int() with base 10: 'abc'"), + ), + ], +) +def test_update_dtype_raises(original, dtype, expected_error_msg): + with pytest.raises(ValueError, match=expected_error_msg): + original.update_dtype(dtype) diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py new file mode 100644 index 00000000..a2f861d3 --- /dev/null +++ b/pandas/tests/arrays/sparse/test_libsparse.py @@ -0,0 +1,601 @@ +import operator + +import numpy as np +import pytest + +import pandas._libs.sparse as splib +import pandas.util._test_decorators as td + +from pandas import Series +import pandas._testing as tm +from pandas.core.arrays.sparse import BlockIndex, IntIndex, _make_index + +TEST_LENGTH = 20 + +plain_case = dict( + xloc=[0, 7, 15], + xlen=[3, 5, 5], + yloc=[2, 9, 14], + ylen=[2, 3, 5], + intersect_loc=[2, 9, 15], + intersect_len=[1, 3, 4], +) +delete_blocks = dict( + xloc=[0, 5], xlen=[4, 4], yloc=[1], ylen=[4], intersect_loc=[1], intersect_len=[3] +) +split_blocks = dict( + xloc=[0], + xlen=[10], + yloc=[0, 5], + ylen=[3, 7], + intersect_loc=[0, 5], + intersect_len=[3, 5], +) +skip_block = dict( + xloc=[10], + xlen=[5], + yloc=[0, 12], + ylen=[5, 3], + intersect_loc=[12], + intersect_len=[3], +) + +no_intersect = dict( + xloc=[0, 10], + xlen=[4, 6], + yloc=[5, 17], + ylen=[4, 2], + intersect_loc=[], + intersect_len=[], +) + + +def check_cases(_check_case): + def _check_case_dict(case): + _check_case( + case["xloc"], + case["xlen"], + case["yloc"], + case["ylen"], + case["intersect_loc"], + case["intersect_len"], + ) + + _check_case_dict(plain_case) + _check_case_dict(delete_blocks) + _check_case_dict(split_blocks) + _check_case_dict(skip_block) + _check_case_dict(no_intersect) + + # one or both is empty + _check_case([0], [5], [], [], [], []) + _check_case([], [], [], [], [], []) + + +class TestSparseIndexUnion: + def test_index_make_union(self): + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + bresult = xindex.make_union(yindex) + assert isinstance(bresult, BlockIndex) + tm.assert_numpy_array_equal(bresult.blocs, np.array(eloc, dtype=np.int32)) + tm.assert_numpy_array_equal( + bresult.blengths, np.array(elen, dtype=np.int32) + ) + + ixindex = xindex.to_int_index() + iyindex = yindex.to_int_index() + iresult = ixindex.make_union(iyindex) + assert isinstance(iresult, IntIndex) + tm.assert_numpy_array_equal(iresult.indices, bresult.to_int_index().indices) + + """ + x: ---- + y: ---- + r: -------- + """ + xloc = [0] + xlen = [5] + yloc = [5] + ylen = [4] + eloc = [0] + elen = [9] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ----- ----- + y: ----- -- + """ + xloc = [0, 10] + xlen = [5, 5] + yloc = [2, 17] + ylen = [5, 2] + eloc = [0, 10, 17] + elen = [7, 5, 2] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ------ + y: ------- + r: ---------- + """ + xloc = [1] + xlen = [5] + yloc = [3] + ylen = [5] + eloc = [1] + elen = [7] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ------ ----- + y: ------- + r: ------------- + """ + xloc = [2, 10] + xlen = [4, 4] + yloc = [4] + ylen = [8] + eloc = [2] + elen = [12] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: --- ----- + y: ------- + r: ------------- + """ + xloc = [0, 5] + xlen = [3, 5] + yloc = [0] + ylen = [7] + eloc = [0] + elen = [10] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ------ ----- + y: ------- --- + r: ------------- + """ + xloc = [2, 10] + xlen = [4, 4] + yloc = [4, 13] + ylen = [8, 4] + eloc = [2] + elen = [15] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ---------------------- + y: ---- ---- --- + r: ---------------------- + """ + xloc = [2] + xlen = [15] + yloc = [4, 9, 14] + ylen = [3, 2, 2] + eloc = [2] + elen = [15] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + """ + x: ---- --- + y: --- --- + """ + xloc = [0, 10] + xlen = [3, 3] + yloc = [5, 15] + ylen = [2, 2] + eloc = [0, 5, 10, 15] + elen = [3, 2, 3, 2] + _check_case(xloc, xlen, yloc, ylen, eloc, elen) + + def test_int_index_make_union(self): + a = IntIndex(5, np.array([0, 3, 4], dtype=np.int32)) + b = IntIndex(5, np.array([0, 2], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([0, 2, 3, 4], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([], dtype=np.int32)) + b = IntIndex(5, np.array([0, 2], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([0, 2], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([], dtype=np.int32)) + b = IntIndex(5, np.array([], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) + b = IntIndex(5, np.array([0, 1, 2, 3, 4], dtype=np.int32)) + res = a.make_union(b) + exp = IntIndex(5, np.array([0, 1, 2, 3, 4], np.int32)) + assert res.equals(exp) + + a = IntIndex(5, np.array([0, 1], dtype=np.int32)) + b = IntIndex(4, np.array([0, 1], dtype=np.int32)) + + msg = "Indices must reference same underlying length" + with pytest.raises(ValueError, match=msg): + a.make_union(b) + + +class TestSparseIndexIntersect: + @td.skip_if_windows + def test_intersect(self): + def _check_correct(a, b, expected): + result = a.intersect(b) + assert result.equals(expected) + + def _check_length_exc(a, longer): + msg = "Indices must reference same underlying length" + with pytest.raises(Exception, match=msg): + a.intersect(longer) + + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + expected = BlockIndex(TEST_LENGTH, eloc, elen) + longer_index = BlockIndex(TEST_LENGTH + 1, yloc, ylen) + + _check_correct(xindex, yindex, expected) + _check_correct( + xindex.to_int_index(), yindex.to_int_index(), expected.to_int_index() + ) + + _check_length_exc(xindex, longer_index) + _check_length_exc(xindex.to_int_index(), longer_index.to_int_index()) + + check_cases(_check_case) + + def test_intersect_empty(self): + xindex = IntIndex(4, np.array([], dtype=np.int32)) + yindex = IntIndex(4, np.array([2, 3], dtype=np.int32)) + assert xindex.intersect(yindex).equals(xindex) + assert yindex.intersect(xindex).equals(xindex) + + xindex = xindex.to_block_index() + yindex = yindex.to_block_index() + assert xindex.intersect(yindex).equals(xindex) + assert yindex.intersect(xindex).equals(xindex) + + def test_intersect_identical(self): + cases = [ + IntIndex(5, np.array([1, 2], dtype=np.int32)), + IntIndex(5, np.array([0, 2, 4], dtype=np.int32)), + IntIndex(0, np.array([], dtype=np.int32)), + IntIndex(5, np.array([], dtype=np.int32)), + ] + + for case in cases: + assert case.intersect(case).equals(case) + case = case.to_block_index() + assert case.intersect(case).equals(case) + + +class TestSparseIndexCommon: + def test_int_internal(self): + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) + + idx = _make_index(4, np.array([], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32)) + + def test_block_internal(self): + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) + + idx = _make_index(4, np.array([], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 3 + tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) + + def test_lookup(self): + for kind in ["integer", "block"]: + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == -1 + assert idx.lookup(1) == -1 + assert idx.lookup(2) == 0 + assert idx.lookup(3) == 1 + assert idx.lookup(4) == -1 + + idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) + + for i in range(-1, 5): + assert idx.lookup(i) == -1 + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == 0 + assert idx.lookup(1) == 1 + assert idx.lookup(2) == 2 + assert idx.lookup(3) == 3 + assert idx.lookup(4) == -1 + + idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) + assert idx.lookup(-1) == -1 + assert idx.lookup(0) == 0 + assert idx.lookup(1) == -1 + assert idx.lookup(2) == 1 + assert idx.lookup(3) == 2 + assert idx.lookup(4) == -1 + + def test_lookup_array(self): + for kind in ["integer", "block"]: + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind=kind) + + res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) + exp = np.array([-1, -1, 0], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) + exp = np.array([-1, 0, -1, 1], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + idx = _make_index(4, np.array([], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([-1, 0, 2, 4], dtype=np.int32)) + exp = np.array([-1, -1, -1, -1], dtype=np.int32) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([-1, 0, 2], dtype=np.int32)) + exp = np.array([-1, 0, 2], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([4, 2, 1, 3], dtype=np.int32)) + exp = np.array([-1, 2, 1, 3], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind=kind) + res = idx.lookup_array(np.array([2, 1, 3, 0], dtype=np.int32)) + exp = np.array([1, -1, 2, 0], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + res = idx.lookup_array(np.array([1, 4, 2, 5], dtype=np.int32)) + exp = np.array([-1, -1, 1, -1], dtype=np.int32) + tm.assert_numpy_array_equal(res, exp) + + def test_lookup_basics(self): + def _check(index): + assert index.lookup(0) == -1 + assert index.lookup(5) == 0 + assert index.lookup(7) == 2 + assert index.lookup(8) == -1 + assert index.lookup(9) == -1 + assert index.lookup(10) == -1 + assert index.lookup(11) == -1 + assert index.lookup(12) == 3 + assert index.lookup(17) == 8 + assert index.lookup(18) == -1 + + bindex = BlockIndex(20, [5, 12], [3, 6]) + iindex = bindex.to_int_index() + + _check(bindex) + _check(iindex) + + # corner cases + + +class TestBlockIndex: + def test_block_internal(self): + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.blocs, np.array([2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([2], dtype=np.int32)) + + idx = _make_index(4, np.array([], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.blocs, np.array([], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.blocs, np.array([0], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([4], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 2, 3], dtype=np.int32), kind="block") + assert isinstance(idx, BlockIndex) + assert idx.npoints == 3 + tm.assert_numpy_array_equal(idx.blocs, np.array([0, 2], dtype=np.int32)) + tm.assert_numpy_array_equal(idx.blengths, np.array([1, 2], dtype=np.int32)) + + def test_make_block_boundary(self): + for i in [5, 10, 100, 101]: + idx = _make_index(i, np.arange(0, i, 2, dtype=np.int32), kind="block") + + exp = np.arange(0, i, 2, dtype=np.int32) + tm.assert_numpy_array_equal(idx.blocs, exp) + tm.assert_numpy_array_equal(idx.blengths, np.ones(len(exp), dtype=np.int32)) + + def test_equals(self): + index = BlockIndex(10, [0, 4], [2, 5]) + + assert index.equals(index) + assert not index.equals(BlockIndex(10, [0, 4], [2, 6])) + + def test_check_integrity(self): + locs = [] + lengths = [] + + # 0-length OK + # TODO: index variables are not used...is that right? + index = BlockIndex(0, locs, lengths) # noqa + + # also OK even though empty + index = BlockIndex(1, locs, lengths) # noqa + + msg = "Block 0 extends beyond end" + with pytest.raises(ValueError, match=msg): + BlockIndex(10, [5], [10]) + + msg = "Block 0 overlaps" + with pytest.raises(ValueError, match=msg): + BlockIndex(10, [2, 5], [5, 3]) + + def test_to_int_index(self): + locs = [0, 10] + lengths = [4, 6] + exp_inds = [0, 1, 2, 3, 10, 11, 12, 13, 14, 15] + + block = BlockIndex(20, locs, lengths) + dense = block.to_int_index() + + tm.assert_numpy_array_equal(dense.indices, np.array(exp_inds, dtype=np.int32)) + + def test_to_block_index(self): + index = BlockIndex(10, [0, 5], [4, 5]) + assert index.to_block_index() is index + + +class TestIntIndex: + def test_check_integrity(self): + + # Too many indices than specified in self.length + msg = "Too many indices" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=1, indices=[1, 2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # No index can be negative. + msg = "No index can be less than zero" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, -2, 3]) + + # All indices must be less than the length. + msg = "All indices must be less than the length" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, 2, 5]) + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, 2, 6]) + + # Indices must be strictly ascending. + msg = "Indices must be strictly increasing" + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, 3, 2]) + + with pytest.raises(ValueError, match=msg): + IntIndex(length=5, indices=[1, 3, 3]) + + def test_int_internal(self): + idx = _make_index(4, np.array([2, 3], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 2 + tm.assert_numpy_array_equal(idx.indices, np.array([2, 3], dtype=np.int32)) + + idx = _make_index(4, np.array([], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 0 + tm.assert_numpy_array_equal(idx.indices, np.array([], dtype=np.int32)) + + idx = _make_index(4, np.array([0, 1, 2, 3], dtype=np.int32), kind="integer") + assert isinstance(idx, IntIndex) + assert idx.npoints == 4 + tm.assert_numpy_array_equal(idx.indices, np.array([0, 1, 2, 3], dtype=np.int32)) + + def test_equals(self): + index = IntIndex(10, [0, 1, 2, 3, 4]) + assert index.equals(index) + assert not index.equals(IntIndex(10, [0, 1, 2, 3])) + + def test_to_block_index(self): + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + + # see if survive the round trip + xbindex = xindex.to_int_index().to_block_index() + ybindex = yindex.to_int_index().to_block_index() + assert isinstance(xbindex, BlockIndex) + assert xbindex.equals(xindex) + assert ybindex.equals(yindex) + + check_cases(_check_case) + + def test_to_int_index(self): + index = IntIndex(10, [2, 3, 4, 5, 6]) + assert index.to_int_index() is index + + +class TestSparseOperators: + def _op_tests(self, sparse_op, python_op): + def _check_case(xloc, xlen, yloc, ylen, eloc, elen): + xindex = BlockIndex(TEST_LENGTH, xloc, xlen) + yindex = BlockIndex(TEST_LENGTH, yloc, ylen) + + xdindex = xindex.to_int_index() + ydindex = yindex.to_int_index() + + x = np.arange(xindex.npoints) * 10.0 + 1 + y = np.arange(yindex.npoints) * 100.0 + 1 + + xfill = 0 + yfill = 2 + + result_block_vals, rb_index, bfill = sparse_op( + x, xindex, xfill, y, yindex, yfill + ) + result_int_vals, ri_index, ifill = sparse_op( + x, xdindex, xfill, y, ydindex, yfill + ) + + assert rb_index.to_int_index().equals(ri_index) + tm.assert_numpy_array_equal(result_block_vals, result_int_vals) + assert bfill == ifill + + # check versus Series... + xseries = Series(x, xdindex.indices) + xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill) + + yseries = Series(y, ydindex.indices) + yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill) + + series_result = python_op(xseries, yseries) + series_result = series_result.reindex(ri_index.indices) + + tm.assert_numpy_array_equal(result_block_vals, series_result.values) + tm.assert_numpy_array_equal(result_int_vals, series_result.values) + + check_cases(_check_case) + + @pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"]) + def test_op(self, opname): + sparse_op = getattr(splib, f"sparse_{opname}_float64") + python_op = getattr(operator, opname) + self._op_tests(sparse_op, python_op) diff --git a/pandas/tests/arrays/string_/__init__.py b/pandas/tests/arrays/string_/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py new file mode 100644 index 00000000..5e2f14af --- /dev/null +++ b/pandas/tests/arrays/string_/test_string.py @@ -0,0 +1,271 @@ +import operator + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + + +def test_repr(): + df = pd.DataFrame({"A": pd.array(["a", pd.NA, "b"], dtype="string")}) + expected = " A\n0 a\n1 \n2 b" + assert repr(df) == expected + + expected = "0 a\n1 \n2 b\nName: A, dtype: string" + assert repr(df.A) == expected + + expected = "\n['a', , 'b']\nLength: 3, dtype: string" + assert repr(df.A.array) == expected + + +def test_none_to_nan(): + a = pd.arrays.StringArray._from_sequence(["a", None, "b"]) + assert a[1] is not None + assert a[1] is pd.NA + + +def test_setitem_validates(): + a = pd.arrays.StringArray._from_sequence(["a", "b"]) + with pytest.raises(ValueError, match="10"): + a[0] = 10 + + with pytest.raises(ValueError, match="strings"): + a[:] = np.array([1, 2]) + + +def test_setitem_with_scalar_string(): + # is_float_dtype considers some strings, like 'd', to be floats + # which can cause issues. + arr = pd.array(["a", "c"], dtype="string") + arr[0] = "d" + expected = pd.array(["d", "c"], dtype="string") + tm.assert_extension_array_equal(arr, expected) + + +@pytest.mark.parametrize( + "input, method", + [ + (["a", "b", "c"], operator.methodcaller("capitalize")), + (["a", "b", "c"], operator.methodcaller("capitalize")), + (["a b", "a bc. de"], operator.methodcaller("capitalize")), + ], +) +def test_string_methods(input, method): + a = pd.Series(input, dtype="string") + b = pd.Series(input, dtype="object") + result = method(a.str) + expected = method(b.str) + + assert result.dtype.name == "string" + tm.assert_series_equal(result.astype(object), expected) + + +def test_astype_roundtrip(): + s = pd.Series(pd.date_range("2000", periods=12)) + s[0] = None + + result = s.astype("string").astype("datetime64[ns]") + tm.assert_series_equal(result, s) + + +def test_add(): + a = pd.Series(["a", "b", "c", None, None], dtype="string") + b = pd.Series(["x", "y", None, "z", None], dtype="string") + + result = a + b + expected = pd.Series(["ax", "by", None, None, None], dtype="string") + tm.assert_series_equal(result, expected) + + result = a.add(b) + tm.assert_series_equal(result, expected) + + result = a.radd(b) + expected = pd.Series(["xa", "yb", None, None, None], dtype="string") + tm.assert_series_equal(result, expected) + + result = a.add(b, fill_value="-") + expected = pd.Series(["ax", "by", "c-", "-z", None], dtype="string") + tm.assert_series_equal(result, expected) + + +def test_add_2d(): + a = pd.array(["a", "b", "c"], dtype="string") + b = np.array([["a", "b", "c"]], dtype=object) + with pytest.raises(ValueError, match="3 != 1"): + a + b + + s = pd.Series(a) + with pytest.raises(ValueError, match="3 != 1"): + s + b + + +def test_add_sequence(): + a = pd.array(["a", "b", None, None], dtype="string") + other = ["x", None, "y", None] + + result = a + other + expected = pd.array(["ax", None, None, None], dtype="string") + tm.assert_extension_array_equal(result, expected) + + result = other + a + expected = pd.array(["xa", None, None, None], dtype="string") + tm.assert_extension_array_equal(result, expected) + + +def test_mul(): + a = pd.array(["a", "b", None], dtype="string") + result = a * 2 + expected = pd.array(["aa", "bb", None], dtype="string") + tm.assert_extension_array_equal(result, expected) + + result = 2 * a + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.xfail(reason="GH-28527") +def test_add_strings(): + array = pd.array(["a", "b", "c", "d"], dtype="string") + df = pd.DataFrame([["t", "u", "v", "w"]]) + assert array.__add__(df) is NotImplemented + + result = array + df + expected = pd.DataFrame([["at", "bu", "cv", "dw"]]).astype("string") + tm.assert_frame_equal(result, expected) + + result = df + array + expected = pd.DataFrame([["ta", "ub", "vc", "wd"]]).astype("string") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.xfail(reason="GH-28527") +def test_add_frame(): + array = pd.array(["a", "b", np.nan, np.nan], dtype="string") + df = pd.DataFrame([["x", np.nan, "y", np.nan]]) + + assert array.__add__(df) is NotImplemented + + result = array + df + expected = pd.DataFrame([["ax", np.nan, np.nan, np.nan]]).astype("string") + tm.assert_frame_equal(result, expected) + + result = df + array + expected = pd.DataFrame([["xa", np.nan, np.nan, np.nan]]).astype("string") + tm.assert_frame_equal(result, expected) + + +def test_comparison_methods_scalar(all_compare_operators): + op_name = all_compare_operators + + a = pd.array(["a", None, "c"], dtype="string") + other = "a" + result = getattr(a, op_name)(other) + expected = np.array([getattr(item, op_name)(other) for item in a], dtype=object) + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = getattr(a, op_name)(pd.NA) + expected = pd.array([None, None, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_comparison_methods_array(all_compare_operators): + op_name = all_compare_operators + + a = pd.array(["a", None, "c"], dtype="string") + other = [None, None, "c"] + result = getattr(a, op_name)(other) + expected = np.empty_like(a, dtype="object") + expected[-1] = getattr(other[-1], op_name)(a[-1]) + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = getattr(a, op_name)(pd.NA) + expected = pd.array([None, None, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_constructor_raises(): + with pytest.raises(ValueError, match="sequence of strings"): + pd.arrays.StringArray(np.array(["a", "b"], dtype="S1")) + + with pytest.raises(ValueError, match="sequence of strings"): + pd.arrays.StringArray(np.array([])) + + with pytest.raises(ValueError, match="strings or pandas.NA"): + pd.arrays.StringArray(np.array(["a", np.nan], dtype=object)) + + with pytest.raises(ValueError, match="strings or pandas.NA"): + pd.arrays.StringArray(np.array(["a", None], dtype=object)) + + with pytest.raises(ValueError, match="strings or pandas.NA"): + pd.arrays.StringArray(np.array(["a", pd.NaT], dtype=object)) + + +@pytest.mark.parametrize("copy", [True, False]) +def test_from_sequence_no_mutate(copy): + a = np.array(["a", np.nan], dtype=object) + original = a.copy() + result = pd.arrays.StringArray._from_sequence(a, copy=copy) + expected = pd.arrays.StringArray(np.array(["a", pd.NA], dtype=object)) + tm.assert_extension_array_equal(result, expected) + tm.assert_numpy_array_equal(a, original) + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.xfail(reason="Not implemented StringArray.sum") +def test_reduce(skipna): + arr = pd.Series(["a", "b", "c"], dtype="string") + result = arr.sum(skipna=skipna) + assert result == "abc" + + +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.xfail(reason="Not implemented StringArray.sum") +def test_reduce_missing(skipna): + arr = pd.Series([None, "a", None, "b", "c", None], dtype="string") + result = arr.sum(skipna=skipna) + if skipna: + assert result == "abc" + else: + assert pd.isna(result) + + +@td.skip_if_no("pyarrow", min_version="0.15.0") +def test_arrow_array(): + # protocol added in 0.15.0 + import pyarrow as pa + + data = pd.array(["a", "b", "c"], dtype="string") + arr = pa.array(data) + expected = pa.array(list(data), type=pa.string(), from_pandas=True) + assert arr.equals(expected) + + +@td.skip_if_no("pyarrow", min_version="0.15.1.dev") +def test_arrow_roundtrip(): + # roundtrip possible from arrow 1.0.0 + import pyarrow as pa + + data = pd.array(["a", "b", None], dtype="string") + df = pd.DataFrame({"a": data}) + table = pa.table(df) + assert table.field("a").type == "string" + result = table.to_pandas() + assert isinstance(result["a"].dtype, pd.StringDtype) + tm.assert_frame_equal(result, df) + # ensure the missing value is represented by NA and not np.nan or None + assert result.loc[2, "a"] is pd.NA + + +def test_value_counts_na(): + arr = pd.array(["a", "b", "a", pd.NA], dtype="string") + result = arr.value_counts(dropna=False) + expected = pd.Series([2, 1, 1], index=["a", "b", pd.NA], dtype="Int64") + tm.assert_series_equal(result, expected) + + result = arr.value_counts(dropna=True) + expected = pd.Series([2, 1], index=["a", "b"], dtype="Int64") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py new file mode 100644 index 00000000..b1b5a948 --- /dev/null +++ b/pandas/tests/arrays/test_array.py @@ -0,0 +1,385 @@ +import datetime +import decimal + +import numpy as np +import pytest +import pytz + +from pandas.core.dtypes.dtypes import registry + +import pandas as pd +import pandas._testing as tm +from pandas.api.extensions import register_extension_dtype +from pandas.api.types import is_scalar +from pandas.arrays import ( + BooleanArray, + DatetimeArray, + IntegerArray, + IntervalArray, + SparseArray, + StringArray, + TimedeltaArray, +) +from pandas.core.arrays import PandasArray, integer_array, period_array +from pandas.tests.extension.decimal import DecimalArray, DecimalDtype, to_decimal + + +@pytest.mark.parametrize( + "data, dtype, expected", + [ + # Basic NumPy defaults. + ([1, 2], None, IntegerArray._from_sequence([1, 2])), + ([1, 2], object, PandasArray(np.array([1, 2], dtype=object))), + ( + [1, 2], + np.dtype("float32"), + PandasArray(np.array([1.0, 2.0], dtype=np.dtype("float32"))), + ), + (np.array([1, 2], dtype="int64"), None, IntegerArray._from_sequence([1, 2]),), + # String alias passes through to NumPy + ([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))), + # Period alias + ( + [pd.Period("2000", "D"), pd.Period("2001", "D")], + "Period[D]", + period_array(["2000", "2001"], freq="D"), + ), + # Period dtype + ( + [pd.Period("2000", "D")], + pd.PeriodDtype("D"), + period_array(["2000"], freq="D"), + ), + # Datetime (naive) + ( + [1, 2], + np.dtype("datetime64[ns]"), + DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")), + ), + ( + np.array([1, 2], dtype="datetime64[ns]"), + None, + DatetimeArray._from_sequence(np.array([1, 2], dtype="datetime64[ns]")), + ), + ( + pd.DatetimeIndex(["2000", "2001"]), + np.dtype("datetime64[ns]"), + DatetimeArray._from_sequence(["2000", "2001"]), + ), + ( + pd.DatetimeIndex(["2000", "2001"]), + None, + DatetimeArray._from_sequence(["2000", "2001"]), + ), + ( + ["2000", "2001"], + np.dtype("datetime64[ns]"), + DatetimeArray._from_sequence(["2000", "2001"]), + ), + # Datetime (tz-aware) + ( + ["2000", "2001"], + pd.DatetimeTZDtype(tz="CET"), + DatetimeArray._from_sequence( + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET") + ), + ), + # Timedelta + ( + ["1H", "2H"], + np.dtype("timedelta64[ns]"), + TimedeltaArray._from_sequence(["1H", "2H"]), + ), + ( + pd.TimedeltaIndex(["1H", "2H"]), + np.dtype("timedelta64[ns]"), + TimedeltaArray._from_sequence(["1H", "2H"]), + ), + ( + pd.TimedeltaIndex(["1H", "2H"]), + None, + TimedeltaArray._from_sequence(["1H", "2H"]), + ), + # Category + (["a", "b"], "category", pd.Categorical(["a", "b"])), + ( + ["a", "b"], + pd.CategoricalDtype(None, ordered=True), + pd.Categorical(["a", "b"], ordered=True), + ), + # Interval + ( + [pd.Interval(1, 2), pd.Interval(3, 4)], + "interval", + IntervalArray.from_tuples([(1, 2), (3, 4)]), + ), + # Sparse + ([0, 1], "Sparse[int64]", SparseArray([0, 1], dtype="int64")), + # IntegerNA + ([1, None], "Int16", integer_array([1, None], dtype="Int16")), + (pd.Series([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), + # String + (["a", None], "string", StringArray._from_sequence(["a", None])), + (["a", None], pd.StringDtype(), StringArray._from_sequence(["a", None]),), + # Boolean + ([True, None], "boolean", BooleanArray._from_sequence([True, None])), + ([True, None], pd.BooleanDtype(), BooleanArray._from_sequence([True, None]),), + # Index + (pd.Index([1, 2]), None, PandasArray(np.array([1, 2], dtype=np.int64))), + # Series[EA] returns the EA + ( + pd.Series(pd.Categorical(["a", "b"], categories=["a", "b", "c"])), + None, + pd.Categorical(["a", "b"], categories=["a", "b", "c"]), + ), + # "3rd party" EAs work + ([decimal.Decimal(0), decimal.Decimal(1)], "decimal", to_decimal([0, 1])), + # pass an ExtensionArray, but a different dtype + ( + period_array(["2000", "2001"], freq="D"), + "category", + pd.Categorical([pd.Period("2000", "D"), pd.Period("2001", "D")]), + ), + ], +) +def test_array(data, dtype, expected): + result = pd.array(data, dtype=dtype) + tm.assert_equal(result, expected) + + +def test_array_copy(): + a = np.array([1, 2]) + # default is to copy + b = pd.array(a, dtype=a.dtype) + assert np.shares_memory(a, b._ndarray) is False + + # copy=True + b = pd.array(a, dtype=a.dtype, copy=True) + assert np.shares_memory(a, b._ndarray) is False + + # copy=False + b = pd.array(a, dtype=a.dtype, copy=False) + assert np.shares_memory(a, b._ndarray) is True + + +cet = pytz.timezone("CET") + + +@pytest.mark.parametrize( + "data, expected", + [ + # period + ( + [pd.Period("2000", "D"), pd.Period("2001", "D")], + period_array(["2000", "2001"], freq="D"), + ), + # interval + ([pd.Interval(0, 1), pd.Interval(1, 2)], IntervalArray.from_breaks([0, 1, 2]),), + # datetime + ( + [pd.Timestamp("2000"), pd.Timestamp("2001")], + DatetimeArray._from_sequence(["2000", "2001"]), + ), + ( + [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)], + DatetimeArray._from_sequence(["2000", "2001"]), + ), + ( + np.array([1, 2], dtype="M8[ns]"), + DatetimeArray(np.array([1, 2], dtype="M8[ns]")), + ), + ( + np.array([1, 2], dtype="M8[us]"), + DatetimeArray(np.array([1000, 2000], dtype="M8[ns]")), + ), + # datetimetz + ( + [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")], + DatetimeArray._from_sequence( + ["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET") + ), + ), + ( + [ + datetime.datetime(2000, 1, 1, tzinfo=cet), + datetime.datetime(2001, 1, 1, tzinfo=cet), + ], + DatetimeArray._from_sequence(["2000", "2001"], tz=cet), + ), + # timedelta + ( + [pd.Timedelta("1H"), pd.Timedelta("2H")], + TimedeltaArray._from_sequence(["1H", "2H"]), + ), + ( + np.array([1, 2], dtype="m8[ns]"), + TimedeltaArray(np.array([1, 2], dtype="m8[ns]")), + ), + ( + np.array([1, 2], dtype="m8[us]"), + TimedeltaArray(np.array([1000, 2000], dtype="m8[ns]")), + ), + # integer + ([1, 2], IntegerArray._from_sequence([1, 2])), + ([1, None], IntegerArray._from_sequence([1, None])), + # string + (["a", "b"], StringArray._from_sequence(["a", "b"])), + (["a", None], StringArray._from_sequence(["a", None])), + # Boolean + ([True, False], BooleanArray._from_sequence([True, False])), + ([True, None], BooleanArray._from_sequence([True, None])), + ], +) +def test_array_inference(data, expected): + result = pd.array(data) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + # mix of frequencies + [pd.Period("2000", "D"), pd.Period("2001", "A")], + # mix of closed + [pd.Interval(0, 1, closed="left"), pd.Interval(1, 2, closed="right")], + # Mix of timezones + [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000", tz="UTC")], + # Mix of tz-aware and tz-naive + [pd.Timestamp("2000", tz="CET"), pd.Timestamp("2000")], + np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]), + ], +) +def test_array_inference_fails(data): + result = pd.array(data) + expected = PandasArray(np.array(data, dtype=object)) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("data", [np.array([[1, 2], [3, 4]]), [[1, 2], [3, 4]]]) +def test_nd_raises(data): + with pytest.raises(ValueError, match="PandasArray must be 1-dimensional"): + pd.array(data, dtype="int64") + + +def test_scalar_raises(): + with pytest.raises(ValueError, match="Cannot pass scalar '1'"): + pd.array(1) + + +# --------------------------------------------------------------------------- +# A couple dummy classes to ensure that Series and Indexes are unboxed before +# getting to the EA classes. + + +@register_extension_dtype +class DecimalDtype2(DecimalDtype): + name = "decimal2" + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return DecimalArray2 + + +class DecimalArray2(DecimalArray): + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + if isinstance(scalars, (pd.Series, pd.Index)): + raise TypeError + + return super()._from_sequence(scalars, dtype=dtype, copy=copy) + + +def test_array_unboxes(index_or_series): + box = index_or_series + + data = box([decimal.Decimal("1"), decimal.Decimal("2")]) + # make sure it works + with pytest.raises(TypeError): + DecimalArray2._from_sequence(data) + + result = pd.array(data, dtype="decimal2") + expected = DecimalArray2._from_sequence(data.values) + tm.assert_equal(result, expected) + + +@pytest.fixture +def registry_without_decimal(): + idx = registry.dtypes.index(DecimalDtype) + registry.dtypes.pop(idx) + yield + registry.dtypes.append(DecimalDtype) + + +def test_array_not_registered(registry_without_decimal): + # check we aren't on it + assert registry.find("decimal") is None + data = [decimal.Decimal("1"), decimal.Decimal("2")] + + result = pd.array(data, dtype=DecimalDtype) + expected = DecimalArray._from_sequence(data) + tm.assert_equal(result, expected) + + +class TestArrayAnalytics: + def test_searchsorted(self, string_dtype): + arr = pd.array(["a", "b", "c"], dtype=string_dtype) + + result = arr.searchsorted("a", side="left") + assert is_scalar(result) + assert result == 0 + + result = arr.searchsorted("a", side="right") + assert is_scalar(result) + assert result == 1 + + def test_searchsorted_numeric_dtypes_scalar(self, any_real_dtype): + arr = pd.array([1, 3, 90], dtype=any_real_dtype) + result = arr.searchsorted(30) + assert is_scalar(result) + assert result == 2 + + result = arr.searchsorted([30]) + expected = np.array([2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_searchsorted_numeric_dtypes_vector(self, any_real_dtype): + arr = pd.array([1, 3, 90], dtype=any_real_dtype) + result = arr.searchsorted([2, 30]) + expected = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "arr, val", + [ + [ + pd.date_range("20120101", periods=10, freq="2D"), + pd.Timestamp("20120102"), + ], + [ + pd.date_range("20120101", periods=10, freq="2D", tz="Asia/Hong_Kong"), + pd.Timestamp("20120102", tz="Asia/Hong_Kong"), + ], + [ + pd.timedelta_range(start="1 day", end="10 days", periods=10), + pd.Timedelta("2 days"), + ], + ], + ) + def test_search_sorted_datetime64_scalar(self, arr, val): + arr = pd.array(arr) + result = arr.searchsorted(val) + assert is_scalar(result) + assert result == 1 + + def test_searchsorted_sorter(self, any_real_dtype): + arr = pd.array([3, 1, 2], dtype=any_real_dtype) + result = arr.searchsorted([0, 3], sorter=np.argsort(arr)) + expected = np.array([0, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py new file mode 100644 index 00000000..cb9b07db --- /dev/null +++ b/pandas/tests/arrays/test_boolean.py @@ -0,0 +1,931 @@ +import operator + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import BooleanArray +from pandas.core.arrays.boolean import coerce_to_array +from pandas.tests.extension.base import BaseOpsUtil + + +def make_data(): + return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] + + +@pytest.fixture +def dtype(): + return pd.BooleanDtype() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +def test_boolean_array_constructor(): + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + + result = BooleanArray(values, mask) + expected = pd.array([True, False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + with pytest.raises(TypeError, match="values should be boolean numpy array"): + BooleanArray(values.tolist(), mask) + + with pytest.raises(TypeError, match="mask should be boolean numpy array"): + BooleanArray(values, mask.tolist()) + + with pytest.raises(TypeError, match="values should be boolean numpy array"): + BooleanArray(values.astype(int), mask) + + with pytest.raises(TypeError, match="mask should be boolean numpy array"): + BooleanArray(values, None) + + with pytest.raises(ValueError, match="values must be a 1D array"): + BooleanArray(values.reshape(1, -1), mask) + + with pytest.raises(ValueError, match="mask must be a 1D array"): + BooleanArray(values, mask.reshape(1, -1)) + + +def test_boolean_array_constructor_copy(): + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + + result = BooleanArray(values, mask) + assert result._data is values + assert result._mask is mask + + result = BooleanArray(values, mask, copy=True) + assert result._data is not values + assert result._mask is not mask + + +def test_to_boolean_array(): + expected = BooleanArray( + np.array([True, False, True]), np.array([False, False, False]) + ) + + result = pd.array([True, False, True], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = pd.array(np.array([True, False, True]), dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + expected = BooleanArray( + np.array([True, False, True]), np.array([False, False, True]) + ) + + result = pd.array([True, False, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_all_none(): + expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True])) + + result = pd.array([None, None, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "a, b", + [ + ([True, False, None, np.nan, pd.NA], [True, False, None, None, None]), + ([True, np.nan], [True, None]), + ([True, pd.NA], [True, None]), + ([np.nan, np.nan], [None, None]), + (np.array([np.nan, np.nan], dtype=float), [None, None]), + ], +) +def test_to_boolean_array_missing_indicators(a, b): + result = pd.array(a, dtype="boolean") + expected = pd.array(b, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + ["foo", "bar"], + ["1", "2"], + # "foo", + [1, 2], + [1.0, 2.0], + pd.date_range("20130101", periods=2), + np.array(["foo"]), + np.array([1, 2]), + np.array([1.0, 2.0]), + [np.nan, {"a": 1}], + ], +) +def test_to_boolean_array_error(values): + # error in converting existing arrays to BooleanArray + with pytest.raises(TypeError): + pd.array(values, dtype="boolean") + + +def test_to_boolean_array_from_integer_array(): + result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean") + expected = pd.array([True, False, True, False], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + result = pd.array(np.array([1, 0, 1, None]), dtype="boolean") + expected = pd.array([True, False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_from_float_array(): + result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean") + expected = pd.array([True, False, True, False], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean") + expected = pd.array([True, False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_integer_like(): + # integers of 0's and 1's + result = pd.array([1, 0, 1, 0], dtype="boolean") + expected = pd.array([True, False, True, False], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + # with missing values + result = pd.array([1, 0, 1, None], dtype="boolean") + expected = pd.array([True, False, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_coerce_to_array(): + # TODO this is currently not public API + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + result = BooleanArray(*coerce_to_array(values, mask=mask)) + expected = BooleanArray(values, mask) + tm.assert_extension_array_equal(result, expected) + assert result._data is values + assert result._mask is mask + result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True)) + expected = BooleanArray(values, mask) + tm.assert_extension_array_equal(result, expected) + assert result._data is not values + assert result._mask is not mask + + # mixed missing from values and mask + values = [True, False, None, False] + mask = np.array([False, False, False, True], dtype="bool") + result = BooleanArray(*coerce_to_array(values, mask=mask)) + expected = BooleanArray( + np.array([True, False, True, True]), np.array([False, False, True, True]) + ) + tm.assert_extension_array_equal(result, expected) + result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask)) + tm.assert_extension_array_equal(result, expected) + result = BooleanArray(*coerce_to_array(values, mask=mask.tolist())) + tm.assert_extension_array_equal(result, expected) + + # raise errors for wrong dimension + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + + with pytest.raises(ValueError, match="values must be a 1D list-like"): + coerce_to_array(values.reshape(1, -1)) + + with pytest.raises(ValueError, match="mask must be a 1D list-like"): + coerce_to_array(values, mask=mask.reshape(1, -1)) + + +def test_coerce_to_array_from_boolean_array(): + # passing BooleanArray to coerce_to_array + values = np.array([True, False, True, False], dtype="bool") + mask = np.array([False, False, False, True], dtype="bool") + arr = BooleanArray(values, mask) + result = BooleanArray(*coerce_to_array(arr)) + tm.assert_extension_array_equal(result, arr) + # no copy + assert result._data is arr._data + assert result._mask is arr._mask + + result = BooleanArray(*coerce_to_array(arr), copy=True) + tm.assert_extension_array_equal(result, arr) + assert result._data is not arr._data + assert result._mask is not arr._mask + + with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"): + coerce_to_array(arr, mask=mask) + + +def test_coerce_to_numpy_array(): + # with missing values -> object dtype + arr = pd.array([True, False, None], dtype="boolean") + result = np.array(arr) + expected = np.array([True, False, pd.NA], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + # also with no missing values -> object dtype + arr = pd.array([True, False, True], dtype="boolean") + result = np.array(arr) + expected = np.array([True, False, True], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + # force bool dtype + result = np.array(arr, dtype="bool") + expected = np.array([True, False, True], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + # with missing values will raise error + arr = pd.array([True, False, None], dtype="boolean") + with pytest.raises(ValueError): + np.array(arr, dtype="bool") + + +def test_to_boolean_array_from_strings(): + result = BooleanArray._from_sequence_of_strings( + np.array(["True", "False", np.nan], dtype=object) + ) + expected = BooleanArray( + np.array([True, False, False]), np.array([False, False, True]) + ) + + tm.assert_extension_array_equal(result, expected) + + +def test_to_boolean_array_from_strings_invalid_string(): + with pytest.raises(ValueError, match="cannot be cast"): + BooleanArray._from_sequence_of_strings(["donkey"]) + + +def test_repr(): + df = pd.DataFrame({"A": pd.array([True, False, None], dtype="boolean")}) + expected = " A\n0 True\n1 False\n2 " + assert repr(df) == expected + + expected = "0 True\n1 False\n2 \nName: A, dtype: boolean" + assert repr(df.A) == expected + + expected = "\n[True, False, ]\nLength: 3, dtype: boolean" + assert repr(df.A.array) == expected + + +@pytest.mark.parametrize("box", [True, False], ids=["series", "array"]) +def test_to_numpy(box): + con = pd.Series if box else pd.array + # default (with or without missing values) -> object dtype + arr = con([True, False, True], dtype="boolean") + result = arr.to_numpy() + expected = np.array([True, False, True], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + arr = con([True, False, None], dtype="boolean") + result = arr.to_numpy() + expected = np.array([True, False, pd.NA], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + arr = con([True, False, None], dtype="boolean") + result = arr.to_numpy(dtype="str") + expected = np.array([True, False, pd.NA], dtype=" can convert to bool, otherwise raises + arr = con([True, False, True], dtype="boolean") + result = arr.to_numpy(dtype="bool") + expected = np.array([True, False, True], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + arr = con([True, False, None], dtype="boolean") + with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"): + result = arr.to_numpy(dtype="bool") + + # specify dtype and na_value + arr = con([True, False, None], dtype="boolean") + result = arr.to_numpy(dtype=object, na_value=None) + expected = np.array([True, False, None], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype=bool, na_value=False) + expected = np.array([True, False, False], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype="int64", na_value=-99) + expected = np.array([1, 0, -99], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + result = arr.to_numpy(dtype="float64", na_value=np.nan) + expected = np.array([1, 0, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + # converting to int or float without specifying na_value raises + with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"): + arr.to_numpy(dtype="int64") + with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"): + arr.to_numpy(dtype="float64") + + +def test_to_numpy_copy(): + # to_numpy can be zero-copy if no missing values + arr = pd.array([True, False, True], dtype="boolean") + result = arr.to_numpy(dtype=bool) + result[0] = False + tm.assert_extension_array_equal( + arr, pd.array([False, False, True], dtype="boolean") + ) + + arr = pd.array([True, False, True], dtype="boolean") + result = arr.to_numpy(dtype=bool, copy=True) + result[0] = False + tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean")) + + +def test_astype(): + # with missing values + arr = pd.array([True, False, None], dtype="boolean") + + with pytest.raises(ValueError, match="cannot convert NA to integer"): + arr.astype("int64") + + with pytest.raises(ValueError, match="cannot convert float NaN to"): + arr.astype("bool") + + result = arr.astype("float64") + expected = np.array([1, 0, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype("str") + expected = np.array(["True", "False", ""], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + # no missing values + arr = pd.array([True, False, True], dtype="boolean") + result = arr.astype("int64") + expected = np.array([1, 0, 1], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype("bool") + expected = np.array([True, False, True], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_to_boolean_array(): + # astype to BooleanArray + arr = pd.array([True, False, None], dtype="boolean") + + result = arr.astype("boolean") + tm.assert_extension_array_equal(result, arr) + result = arr.astype(pd.BooleanDtype()) + tm.assert_extension_array_equal(result, arr) + + +def test_astype_to_integer_array(): + # astype to IntegerArray + arr = pd.array([True, False, None], dtype="boolean") + + result = arr.astype("Int64") + expected = pd.array([1, 0, None], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("na", [None, np.nan, pd.NA]) +def test_setitem_missing_values(na): + arr = pd.array([True, False, None], dtype="boolean") + expected = pd.array([True, None, None], dtype="boolean") + arr[1] = na + tm.assert_extension_array_equal(arr, expected) + + +@pytest.mark.parametrize( + "ufunc", [np.add, np.logical_or, np.logical_and, np.logical_xor] +) +def test_ufuncs_binary(ufunc): + # two BooleanArrays + a = pd.array([True, False, None], dtype="boolean") + result = ufunc(a, a) + expected = pd.array(ufunc(a._data, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s, a) + expected = pd.Series(ufunc(a._data, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_series_equal(result, expected) + + # Boolean with numpy array + arr = np.array([True, True, False]) + result = ufunc(a, arr) + expected = pd.array(ufunc(a._data, arr), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + result = ufunc(arr, a) + expected = pd.array(ufunc(arr, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + # BooleanArray with scalar + result = ufunc(a, True) + expected = pd.array(ufunc(a._data, True), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + result = ufunc(True, a) + expected = pd.array(ufunc(True, a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + # not handled types + with pytest.raises(TypeError): + ufunc(a, "test") + + +@pytest.mark.parametrize("ufunc", [np.logical_not]) +def test_ufuncs_unary(ufunc): + a = pd.array([True, False, None], dtype="boolean") + result = ufunc(a) + expected = pd.array(ufunc(a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s) + expected = pd.Series(ufunc(a._data), dtype="boolean") + expected[a._mask] = np.nan + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("values", [[True, False], [True, None]]) +def test_ufunc_reduce_raises(values): + a = pd.array(values, dtype="boolean") + with pytest.raises(NotImplementedError): + np.add.reduce(a) + + +class TestUnaryOps: + def test_invert(self): + a = pd.array([True, False, None], dtype="boolean") + expected = pd.array([False, True, None], dtype="boolean") + tm.assert_extension_array_equal(~a, expected) + + expected = pd.Series(expected, index=["a", "b", "c"], name="name") + result = ~pd.Series(a, index=["a", "b", "c"], name="name") + tm.assert_series_equal(result, expected) + + df = pd.DataFrame({"A": a, "B": [True, False, False]}, index=["a", "b", "c"]) + result = ~df + expected = pd.DataFrame( + {"A": expected, "B": [False, True, True]}, index=["a", "b", "c"] + ) + tm.assert_frame_equal(result, expected) + + +class TestLogicalOps(BaseOpsUtil): + def test_numpy_scalars_ok(self, all_logical_operators): + a = pd.array([True, False, None], dtype="boolean") + op = getattr(a, all_logical_operators) + + tm.assert_extension_array_equal(op(True), op(np.bool(True))) + tm.assert_extension_array_equal(op(False), op(np.bool(False))) + + def get_op_from_name(self, op_name): + short_opname = op_name.strip("_") + short_opname = short_opname if "xor" in short_opname else short_opname + "_" + try: + op = getattr(operator, short_opname) + except AttributeError: + # Assume it is the reverse operator + rop = getattr(operator, short_opname[1:]) + op = lambda x, y: rop(y, x) + + return op + + def test_empty_ok(self, all_logical_operators): + a = pd.array([], dtype="boolean") + op_name = all_logical_operators + result = getattr(a, op_name)(True) + tm.assert_extension_array_equal(a, result) + + result = getattr(a, op_name)(False) + tm.assert_extension_array_equal(a, result) + + # TODO: pd.NA + # result = getattr(a, op_name)(pd.NA) + # tm.assert_extension_array_equal(a, result) + + def test_logical_length_mismatch_raises(self, all_logical_operators): + op_name = all_logical_operators + a = pd.array([True, False, None], dtype="boolean") + msg = "Lengths must match to compare" + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)([True, False]) + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)(np.array([True, False])) + + with pytest.raises(ValueError, match=msg): + getattr(a, op_name)(pd.array([True, False], dtype="boolean")) + + def test_logical_nan_raises(self, all_logical_operators): + op_name = all_logical_operators + a = pd.array([True, False, None], dtype="boolean") + msg = "Got float instead" + + with pytest.raises(TypeError, match=msg): + getattr(a, op_name)(np.nan) + + @pytest.mark.parametrize("other", ["a", 1]) + def test_non_bool_or_na_other_raises(self, other, all_logical_operators): + a = pd.array([True, False], dtype="boolean") + with pytest.raises(TypeError, match=str(type(other).__name__)): + getattr(a, all_logical_operators)(other) + + def test_kleene_or(self): + # A clear test of behavior. + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a | b + expected = pd.array( + [True, True, True, True, False, None, True, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b | a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (pd.NA, [True, None, None]), + (True, [True, True, True]), + (np.bool_(True), [True, True, True]), + (False, [True, False, None]), + (np.bool_(False), [True, False, None]), + ], + ) + def test_kleene_or_scalar(self, other, expected): + # TODO: test True & False + a = pd.array([True, False, None], dtype="boolean") + result = a | other + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = other | a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + def test_kleene_and(self): + # A clear test of behavior. + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a & b + expected = pd.array( + [True, False, None, False, False, False, None, False, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b & a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (pd.NA, [None, False, None]), + (True, [True, False, None]), + (False, [False, False, False]), + (np.bool_(True), [True, False, None]), + (np.bool_(False), [False, False, False]), + ], + ) + def test_kleene_and_scalar(self, other, expected): + a = pd.array([True, False, None], dtype="boolean") + result = a & other + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = other & a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + def test_kleene_xor(self): + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + result = a ^ b + expected = pd.array( + [False, True, None, True, False, None, None, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + result = b ^ a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + @pytest.mark.parametrize( + "other, expected", + [ + (pd.NA, [None, None, None]), + (True, [False, True, None]), + (np.bool_(True), [False, True, None]), + (np.bool_(False), [True, False, None]), + ], + ) + def test_kleene_xor_scalar(self, other, expected): + a = pd.array([True, False, None], dtype="boolean") + result = a ^ other + expected = pd.array(expected, dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + result = other ^ a + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + @pytest.mark.parametrize( + "other", [True, False, pd.NA, [True, False, None] * 3], + ) + def test_no_masked_assumptions(self, other, all_logical_operators): + # The logical operations should not assume that masked values are False! + a = pd.arrays.BooleanArray( + np.array([True, True, True, False, False, False, True, False, True]), + np.array([False] * 6 + [True, True, True]), + ) + b = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + if isinstance(other, list): + other = pd.array(other, dtype="boolean") + + result = getattr(a, all_logical_operators)(other) + expected = getattr(b, all_logical_operators)(other) + tm.assert_extension_array_equal(result, expected) + + if isinstance(other, BooleanArray): + other._data[other._mask] = True + a._data[a._mask] = False + + result = getattr(a, all_logical_operators)(other) + expected = getattr(b, all_logical_operators)(other) + tm.assert_extension_array_equal(result, expected) + + +class TestComparisonOps(BaseOpsUtil): + def _compare_other(self, data, op_name, other): + op = self.get_op_from_name(op_name) + + # array + result = pd.Series(op(data, other)) + expected = pd.Series(op(data._data, other), dtype="boolean") + # propagate NAs + expected[data._mask] = pd.NA + + tm.assert_series_equal(result, expected) + + # series + s = pd.Series(data) + result = op(s, other) + + expected = pd.Series(data._data) + expected = op(expected, other) + expected = expected.astype("boolean") + # propagate NAs + expected[data._mask] = pd.NA + + tm.assert_series_equal(result, expected) + + def test_compare_scalar(self, data, all_compare_operators): + op_name = all_compare_operators + self._compare_other(data, op_name, True) + + def test_compare_array(self, data, all_compare_operators): + op_name = all_compare_operators + other = pd.array([True] * len(data), dtype="boolean") + self._compare_other(data, op_name, other) + other = np.array([True] * len(data)) + self._compare_other(data, op_name, other) + other = pd.Series([True] * len(data)) + self._compare_other(data, op_name, other) + + @pytest.mark.parametrize("other", [True, False, pd.NA]) + def test_scalar(self, other, all_compare_operators): + op = self.get_op_from_name(all_compare_operators) + a = pd.array([True, False, None], dtype="boolean") + + result = op(a, other) + + if other is pd.NA: + expected = pd.array([None, None, None], dtype="boolean") + else: + values = op(a._data, other) + expected = BooleanArray(values, a._mask, copy=True) + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + result[0] = None + tm.assert_extension_array_equal( + a, pd.array([True, False, None], dtype="boolean") + ) + + def test_array(self, all_compare_operators): + op = self.get_op_from_name(all_compare_operators) + a = pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + b = pd.array([True, False, None] * 3, dtype="boolean") + + result = op(a, b) + + values = op(a._data, b._data) + mask = a._mask | b._mask + expected = BooleanArray(values, mask) + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + result[0] = None + tm.assert_extension_array_equal( + a, pd.array([True] * 3 + [False] * 3 + [None] * 3, dtype="boolean") + ) + tm.assert_extension_array_equal( + b, pd.array([True, False, None] * 3, dtype="boolean") + ) + + +class TestArithmeticOps(BaseOpsUtil): + def test_error(self, data, all_arithmetic_operators): + # invalid ops + + op = all_arithmetic_operators + s = pd.Series(data) + ops = getattr(s, op) + opa = getattr(data, op) + + # invalid scalars + with pytest.raises(TypeError): + ops("foo") + with pytest.raises(TypeError): + ops(pd.Timestamp("20180101")) + + # invalid array-likes + if op not in ("__mul__", "__rmul__"): + # TODO(extension) numpy's mul with object array sees booleans as numbers + with pytest.raises(TypeError): + ops(pd.Series("foo", index=s.index)) + + # 2d + result = opa(pd.DataFrame({"A": s})) + assert result is NotImplemented + + with pytest.raises(NotImplementedError): + opa(np.arange(len(s)).reshape(-1, len(s))) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_reductions_return_types(dropna, data, all_numeric_reductions): + op = all_numeric_reductions + s = pd.Series(data) + if dropna: + s = s.dropna() + + if op in ("sum", "prod"): + assert isinstance(getattr(s, op)(), np.int64) + elif op in ("min", "max"): + assert isinstance(getattr(s, op)(), np.bool_) + else: + # "mean", "std", "var", "median", "kurt", "skew" + assert isinstance(getattr(s, op)(), np.float64) + + +@pytest.mark.parametrize( + "values, exp_any, exp_all, exp_any_noskip, exp_all_noskip", + [ + ([True, pd.NA], True, True, True, pd.NA), + ([False, pd.NA], False, False, pd.NA, False), + ([pd.NA], False, True, pd.NA, pd.NA), + ([], False, True, False, True), + ], +) +def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip): + # the methods return numpy scalars + exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any) + exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all) + exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip) + exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip) + + for con in [pd.array, pd.Series]: + a = con(values, dtype="boolean") + assert a.any() is exp_any + assert a.all() is exp_all + assert a.any(skipna=False) is exp_any_noskip + assert a.all(skipna=False) is exp_all_noskip + + assert np.any(a.any()) is exp_any + assert np.all(a.all()) is exp_all + + +# TODO when BooleanArray coerces to object dtype numpy array, need to do conversion +# manually in the indexing code +# def test_indexing_boolean_mask(): +# arr = pd.array([1, 2, 3, 4], dtype="Int64") +# mask = pd.array([True, False, True, False], dtype="boolean") +# result = arr[mask] +# expected = pd.array([1, 3], dtype="Int64") +# tm.assert_extension_array_equal(result, expected) + +# # missing values -> error +# mask = pd.array([True, False, True, None], dtype="boolean") +# with pytest.raises(IndexError): +# result = arr[mask] + + +@td.skip_if_no("pyarrow", min_version="0.15.0") +def test_arrow_array(data): + # protocol added in 0.15.0 + import pyarrow as pa + + arr = pa.array(data) + + # TODO use to_numpy(na_value=None) here + data_object = np.array(data, dtype=object) + data_object[data.isna()] = None + expected = pa.array(data_object, type=pa.bool_(), from_pandas=True) + assert arr.equals(expected) + + +@td.skip_if_no("pyarrow", min_version="0.15.1.dev") +def test_arrow_roundtrip(): + # roundtrip possible from arrow 1.0.0 + import pyarrow as pa + + data = pd.array([True, False, None], dtype="boolean") + df = pd.DataFrame({"a": data}) + table = pa.table(df) + assert table.field("a").type == "bool" + result = table.to_pandas() + assert isinstance(result["a"].dtype, pd.BooleanDtype) + tm.assert_frame_equal(result, df) + + +def test_value_counts_na(): + arr = pd.array([True, False, pd.NA], dtype="boolean") + result = arr.value_counts(dropna=False) + expected = pd.Series([1, 1, 1], index=[True, False, pd.NA], dtype="Int64") + tm.assert_series_equal(result, expected) + + result = arr.value_counts(dropna=True) + expected = pd.Series([1, 1], index=[True, False], dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_diff(): + a = pd.array( + [True, True, False, False, True, None, True, None, False], dtype="boolean" + ) + result = pd.core.algorithms.diff(a, 1) + expected = pd.array( + [None, False, True, False, True, None, None, None, None], dtype="boolean" + ) + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = s.diff() + expected = pd.Series(expected) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py new file mode 100644 index 00000000..3732818c --- /dev/null +++ b/pandas/tests/arrays/test_datetimelike.py @@ -0,0 +1,813 @@ +from typing import Type, Union + +import numpy as np +import pytest + +from pandas._libs import OutOfBoundsDatetime +from pandas.compat.numpy import _np_version_under1p18 + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray +from pandas.core.indexes.datetimes import DatetimeIndex +from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.timedeltas import TimedeltaIndex + + +# TODO: more freq variants +@pytest.fixture(params=["D", "B", "W", "M", "Q", "Y"]) +def period_index(request): + """ + A fixture to provide PeriodIndex objects with different frequencies. + + Most PeriodArray behavior is already tested in PeriodIndex tests, + so here we just test that the PeriodArray behavior matches + the PeriodIndex behavior. + """ + freqstr = request.param + # TODO: non-monotone indexes; NaTs, different start dates + pi = pd.period_range(start=pd.Timestamp("2000-01-01"), periods=100, freq=freqstr) + return pi + + +@pytest.fixture(params=["D", "B", "W", "M", "Q", "Y"]) +def datetime_index(request): + """ + A fixture to provide DatetimeIndex objects with different frequencies. + + Most DatetimeArray behavior is already tested in DatetimeIndex tests, + so here we just test that the DatetimeArray behavior matches + the DatetimeIndex behavior. + """ + freqstr = request.param + # TODO: non-monotone indexes; NaTs, different start dates, timezones + dti = pd.date_range(start=pd.Timestamp("2000-01-01"), periods=100, freq=freqstr) + return dti + + +@pytest.fixture +def timedelta_index(request): + """ + A fixture to provide TimedeltaIndex objects with different frequencies. + Most TimedeltaArray behavior is already tested in TimedeltaIndex tests, + so here we just test that the TimedeltaArray behavior matches + the TimedeltaIndex behavior. + """ + # TODO: flesh this out + return pd.TimedeltaIndex(["1 Day", "3 Hours", "NaT"]) + + +class SharedTests: + index_cls: Type[Union[DatetimeIndex, PeriodIndex, TimedeltaIndex]] + + def test_compare_len1_raises(self): + # make sure we raise when comparing with different lengths, specific + # to the case where one has length-1, which numpy would broadcast + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + + idx = self.index_cls._simple_new(data, freq="D") + arr = self.array_cls(idx) + + with pytest.raises(ValueError, match="Lengths must match"): + arr == arr[:1] + + # test the index classes while we're at it, GH#23078 + with pytest.raises(ValueError, match="Lengths must match"): + idx <= idx[[0]] + + def test_take(self): + data = np.arange(100, dtype="i8") * 24 * 3600 * 10 ** 9 + np.random.shuffle(data) + + idx = self.index_cls._simple_new(data, freq="D") + arr = self.array_cls(idx) + + takers = [1, 4, 94] + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + takers = np.array([1, 4, 94]) + result = arr.take(takers) + expected = idx.take(takers) + + tm.assert_index_equal(self.index_cls(result), expected) + + def test_take_fill(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + + idx = self.index_cls._simple_new(data, freq="D") + arr = self.array_cls(idx) + + result = arr.take([-1, 1], allow_fill=True, fill_value=None) + assert result[0] is pd.NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan) + assert result[0] is pd.NaT + + result = arr.take([-1, 1], allow_fill=True, fill_value=pd.NaT) + assert result[0] is pd.NaT + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=2) + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=2.0) + + with pytest.raises(ValueError): + arr.take([0, 1], allow_fill=True, fill_value=pd.Timestamp.now().time) + + def test_concat_same_type(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + + idx = self.index_cls._simple_new(data, freq="D").insert(0, pd.NaT) + arr = self.array_cls(idx) + + result = arr._concat_same_type([arr[:-1], arr[1:], arr]) + expected = idx._concat_same_dtype([idx[:-1], idx[1:], idx], None) + + tm.assert_index_equal(self.index_cls(result), expected) + + def test_unbox_scalar(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + result = arr._unbox_scalar(arr[0]) + assert isinstance(result, int) + + result = arr._unbox_scalar(pd.NaT) + assert isinstance(result, int) + + with pytest.raises(ValueError): + arr._unbox_scalar("foo") + + def test_check_compatible_with(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + arr._check_compatible_with(arr[0]) + arr._check_compatible_with(arr[:1]) + arr._check_compatible_with(pd.NaT) + + def test_scalar_from_string(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + result = arr._scalar_from_string(str(arr[0])) + assert result == arr[0] + + def test_reduce_invalid(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + with pytest.raises(TypeError, match="cannot perform"): + arr._reduce("not a method") + + @pytest.mark.parametrize("method", ["pad", "backfill"]) + def test_fillna_method_doesnt_change_orig(self, method): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + arr[4] = pd.NaT + + fill_value = arr[3] if method == "pad" else arr[5] + + result = arr.fillna(method=method) + assert result[4] == fill_value + + # check that the original was not changed + assert arr[4] is pd.NaT + + def test_searchsorted(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + # scalar + result = arr.searchsorted(arr[1]) + assert result == 1 + + result = arr.searchsorted(arr[2], side="right") + assert result == 3 + + # own-type + result = arr.searchsorted(arr[1:3]) + expected = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + result = arr.searchsorted(arr[1:3], side="right") + expected = np.array([2, 3], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + # Following numpy convention, NaT goes at the beginning + # (unlike NaN which goes at the end) + result = arr.searchsorted(pd.NaT) + assert result == 0 + + def test_setitem(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + arr[0] = arr[1] + expected = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + expected[0] = expected[1] + + tm.assert_numpy_array_equal(arr.asi8, expected) + + arr[:2] = arr[-2:] + expected[:2] = expected[-2:] + tm.assert_numpy_array_equal(arr.asi8, expected) + + def test_setitem_raises(self): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + val = arr[0] + + with pytest.raises(IndexError, match="index 12 is out of bounds"): + arr[12] = val + + with pytest.raises(TypeError, match="'value' should be a.* 'object'"): + arr[0] = object() + + def test_inplace_arithmetic(self): + # GH#24115 check that iadd and isub are actually in-place + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + expected = arr + pd.Timedelta(days=1) + arr += pd.Timedelta(days=1) + tm.assert_equal(arr, expected) + + expected = arr - pd.Timedelta(days=1) + arr -= pd.Timedelta(days=1) + tm.assert_equal(arr, expected) + + def test_shift_fill_int_deprecated(self): + # GH#31971 + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = self.array_cls(data, freq="D") + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = arr.shift(1, fill_value=1) + + expected = arr.copy() + if self.array_cls is PeriodArray: + fill_val = PeriodArray._scalar_type._from_ordinal(1, freq=arr.freq) + else: + fill_val = arr._scalar_type(1) + expected[0] = fill_val + expected[1:] = arr[:-1] + tm.assert_equal(result, expected) + + +class TestDatetimeArray(SharedTests): + index_cls = pd.DatetimeIndex + array_cls = DatetimeArray + + def test_round(self, tz_naive_fixture): + # GH#24064 + tz = tz_naive_fixture + dti = pd.date_range("2016-01-01 01:01:00", periods=3, freq="H", tz=tz) + + result = dti.round(freq="2T") + expected = dti - pd.Timedelta(minutes=1) + tm.assert_index_equal(result, expected) + + def test_array_interface(self, datetime_index): + arr = DatetimeArray(datetime_index) + + # default asarray gives the same underlying data (for tz naive) + result = np.asarray(arr) + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + + # specifying M8[ns] gives the same result as default + result = np.asarray(arr, dtype="datetime64[ns]") + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype="datetime64[ns]", copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype="datetime64[ns]") + assert result is not expected + tm.assert_numpy_array_equal(result, expected) + + # to object dtype + result = np.asarray(arr, dtype=object) + expected = np.array(list(arr), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # to other dtype always copies + result = np.asarray(arr, dtype="int64") + assert result is not arr.asi8 + assert not np.may_share_memory(arr, result) + expected = arr.asi8.copy() + tm.assert_numpy_array_equal(result, expected) + + # other dtypes handled by numpy + for dtype in ["float64", str]: + result = np.asarray(arr, dtype=dtype) + expected = np.asarray(arr).astype(dtype) + tm.assert_numpy_array_equal(result, expected) + + def test_array_object_dtype(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range("2016-01-01", periods=3, tz=tz) + arr = DatetimeArray(dti) + + expected = np.array(list(dti)) + + result = np.array(arr, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # also test the DatetimeIndex method while we're at it + result = np.array(dti, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_array_tz(self, tz_naive_fixture): + # GH#23524 + tz = tz_naive_fixture + dti = pd.date_range("2016-01-01", periods=3, tz=tz) + arr = DatetimeArray(dti) + + expected = dti.asi8.view("M8[ns]") + result = np.array(arr, dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + result = np.array(arr, dtype="datetime64[ns]") + tm.assert_numpy_array_equal(result, expected) + + # check that we are not making copies when setting copy=False + result = np.array(arr, dtype="M8[ns]", copy=False) + assert result.base is expected.base + assert result.base is not None + result = np.array(arr, dtype="datetime64[ns]", copy=False) + assert result.base is expected.base + assert result.base is not None + + def test_array_i8_dtype(self, tz_naive_fixture): + tz = tz_naive_fixture + dti = pd.date_range("2016-01-01", periods=3, tz=tz) + arr = DatetimeArray(dti) + + expected = dti.asi8 + result = np.array(arr, dtype="i8") + tm.assert_numpy_array_equal(result, expected) + + result = np.array(arr, dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + # check that we are still making copies when setting copy=False + result = np.array(arr, dtype="i8", copy=False) + assert result.base is not expected.base + assert result.base is None + + def test_from_array_keeps_base(self): + # Ensure that DatetimeArray._data.base isn't lost. + arr = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]") + dta = DatetimeArray(arr) + + assert dta._data is arr + dta = DatetimeArray(arr[:0]) + assert dta._data.base is arr + + def test_from_dti(self, tz_naive_fixture): + tz = tz_naive_fixture + dti = pd.date_range("2016-01-01", periods=3, tz=tz) + arr = DatetimeArray(dti) + assert list(dti) == list(arr) + + # Check that Index.__new__ knows what to do with DatetimeArray + dti2 = pd.Index(arr) + assert isinstance(dti2, pd.DatetimeIndex) + assert list(dti2) == list(arr) + + def test_astype_object(self, tz_naive_fixture): + tz = tz_naive_fixture + dti = pd.date_range("2016-01-01", periods=3, tz=tz) + arr = DatetimeArray(dti) + asobj = arr.astype("O") + assert isinstance(asobj, np.ndarray) + assert asobj.dtype == "O" + assert list(asobj) == list(dti) + + @pytest.mark.parametrize("freqstr", ["D", "B", "W", "M", "Q", "Y"]) + def test_to_perioddelta(self, datetime_index, freqstr): + # GH#23113 + dti = datetime_index + arr = DatetimeArray(dti) + + expected = dti.to_perioddelta(freq=freqstr) + result = arr.to_perioddelta(freq=freqstr) + assert isinstance(result, TimedeltaArray) + + # placeholder until these become actual EA subclasses and we can use + # an EA-specific tm.assert_ function + tm.assert_index_equal(pd.Index(result), pd.Index(expected)) + + @pytest.mark.parametrize("freqstr", ["D", "B", "W", "M", "Q", "Y"]) + def test_to_period(self, datetime_index, freqstr): + dti = datetime_index + arr = DatetimeArray(dti) + + expected = dti.to_period(freq=freqstr) + result = arr.to_period(freq=freqstr) + assert isinstance(result, PeriodArray) + + # placeholder until these become actual EA subclasses and we can use + # an EA-specific tm.assert_ function + tm.assert_index_equal(pd.Index(result), pd.Index(expected)) + + @pytest.mark.parametrize("propname", pd.DatetimeIndex._bool_ops) + def test_bool_properties(self, datetime_index, propname): + # in this case _bool_ops is just `is_leap_year` + dti = datetime_index + arr = DatetimeArray(dti) + assert dti.freq == arr.freq + + result = getattr(arr, propname) + expected = np.array(getattr(dti, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("propname", pd.DatetimeIndex._field_ops) + def test_int_properties(self, datetime_index, propname): + dti = datetime_index + arr = DatetimeArray(dti) + + result = getattr(arr, propname) + expected = np.array(getattr(dti, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + + def test_take_fill_valid(self, datetime_index, tz_naive_fixture): + dti = datetime_index.tz_localize(tz_naive_fixture) + arr = DatetimeArray(dti) + + now = pd.Timestamp.now().tz_localize(dti.tz) + result = arr.take([-1, 1], allow_fill=True, fill_value=now) + assert result[0] == now + + with pytest.raises(ValueError): + # fill_value Timedelta invalid + arr.take([-1, 1], allow_fill=True, fill_value=now - now) + + with pytest.raises(ValueError): + # fill_value Period invalid + arr.take([-1, 1], allow_fill=True, fill_value=pd.Period("2014Q1")) + + tz = None if dti.tz is not None else "US/Eastern" + now = pd.Timestamp.now().tz_localize(tz) + with pytest.raises(TypeError): + # Timestamp with mismatched tz-awareness + arr.take([-1, 1], allow_fill=True, fill_value=now) + + with pytest.raises(ValueError): + # require NaT, not iNaT, as it could be confused with an integer + arr.take([-1, 1], allow_fill=True, fill_value=pd.NaT.value) + + def test_concat_same_type_invalid(self, datetime_index): + # different timezones + dti = datetime_index + arr = DatetimeArray(dti) + + if arr.tz is None: + other = arr.tz_localize("UTC") + else: + other = arr.tz_localize(None) + + with pytest.raises(AssertionError): + arr._concat_same_type([arr, other]) + + def test_concat_same_type_different_freq(self): + # we *can* concatenate DTI with different freqs. + a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central")) + b = DatetimeArray(pd.date_range("2000", periods=2, freq="H", tz="US/Central")) + result = DatetimeArray._concat_same_type([a, b]) + expected = DatetimeArray( + pd.to_datetime( + [ + "2000-01-01 00:00:00", + "2000-01-02 00:00:00", + "2000-01-01 00:00:00", + "2000-01-01 01:00:00", + ] + ).tz_localize("US/Central") + ) + + tm.assert_datetime_array_equal(result, expected) + + def test_strftime(self, datetime_index): + arr = DatetimeArray(datetime_index) + + result = arr.strftime("%Y %b") + expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_strftime_nat(self): + # GH 29578 + arr = DatetimeArray(DatetimeIndex(["2019-01-01", pd.NaT])) + + result = arr.strftime("%Y-%m-%d") + expected = np.array(["2019-01-01", np.nan], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +class TestTimedeltaArray(SharedTests): + index_cls = pd.TimedeltaIndex + array_cls = TimedeltaArray + + def test_from_tdi(self): + tdi = pd.TimedeltaIndex(["1 Day", "3 Hours"]) + arr = TimedeltaArray(tdi) + assert list(arr) == list(tdi) + + # Check that Index.__new__ knows what to do with TimedeltaArray + tdi2 = pd.Index(arr) + assert isinstance(tdi2, pd.TimedeltaIndex) + assert list(tdi2) == list(arr) + + def test_astype_object(self): + tdi = pd.TimedeltaIndex(["1 Day", "3 Hours"]) + arr = TimedeltaArray(tdi) + asobj = arr.astype("O") + assert isinstance(asobj, np.ndarray) + assert asobj.dtype == "O" + assert list(asobj) == list(tdi) + + def test_to_pytimedelta(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + expected = tdi.to_pytimedelta() + result = arr.to_pytimedelta() + + tm.assert_numpy_array_equal(result, expected) + + def test_total_seconds(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + expected = tdi.total_seconds() + result = arr.total_seconds() + + tm.assert_numpy_array_equal(result, expected.values) + + @pytest.mark.parametrize("propname", pd.TimedeltaIndex._field_ops) + def test_int_properties(self, timedelta_index, propname): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + result = getattr(arr, propname) + expected = np.array(getattr(tdi, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + + def test_array_interface(self, timedelta_index): + arr = TimedeltaArray(timedelta_index) + + # default asarray gives the same underlying data + result = np.asarray(arr) + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + + # specifying m8[ns] gives the same result as default + result = np.asarray(arr, dtype="timedelta64[ns]") + expected = arr._data + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype="timedelta64[ns]", copy=False) + assert result is expected + tm.assert_numpy_array_equal(result, expected) + result = np.array(arr, dtype="timedelta64[ns]") + assert result is not expected + tm.assert_numpy_array_equal(result, expected) + + # to object dtype + result = np.asarray(arr, dtype=object) + expected = np.array(list(arr), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # to other dtype always copies + result = np.asarray(arr, dtype="int64") + assert result is not arr.asi8 + assert not np.may_share_memory(arr, result) + expected = arr.asi8.copy() + tm.assert_numpy_array_equal(result, expected) + + # other dtypes handled by numpy + for dtype in ["float64", str]: + result = np.asarray(arr, dtype=dtype) + expected = np.asarray(arr).astype(dtype) + tm.assert_numpy_array_equal(result, expected) + + def test_take_fill_valid(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArray(tdi) + + td1 = pd.Timedelta(days=1) + result = arr.take([-1, 1], allow_fill=True, fill_value=td1) + assert result[0] == td1 + + now = pd.Timestamp.now() + with pytest.raises(ValueError): + # fill_value Timestamp invalid + arr.take([0, 1], allow_fill=True, fill_value=now) + + with pytest.raises(ValueError): + # fill_value Period invalid + arr.take([0, 1], allow_fill=True, fill_value=now.to_period("D")) + + +class TestPeriodArray(SharedTests): + index_cls = pd.PeriodIndex + array_cls = PeriodArray + + def test_from_pi(self, period_index): + pi = period_index + arr = PeriodArray(pi) + assert list(arr) == list(pi) + + # Check that Index.__new__ knows what to do with PeriodArray + pi2 = pd.Index(arr) + assert isinstance(pi2, pd.PeriodIndex) + assert list(pi2) == list(arr) + + def test_astype_object(self, period_index): + pi = period_index + arr = PeriodArray(pi) + asobj = arr.astype("O") + assert isinstance(asobj, np.ndarray) + assert asobj.dtype == "O" + assert list(asobj) == list(pi) + + @pytest.mark.parametrize("how", ["S", "E"]) + def test_to_timestamp(self, how, period_index): + pi = period_index + arr = PeriodArray(pi) + + expected = DatetimeArray(pi.to_timestamp(how=how)) + result = arr.to_timestamp(how=how) + assert isinstance(result, DatetimeArray) + + # placeholder until these become actual EA subclasses and we can use + # an EA-specific tm.assert_ function + tm.assert_index_equal(pd.Index(result), pd.Index(expected)) + + def test_to_timestamp_out_of_bounds(self): + # GH#19643 previously overflowed silently + pi = pd.period_range("1500", freq="Y", periods=3) + with pytest.raises(OutOfBoundsDatetime): + pi.to_timestamp() + + with pytest.raises(OutOfBoundsDatetime): + pi._data.to_timestamp() + + @pytest.mark.parametrize("propname", PeriodArray._bool_ops) + def test_bool_properties(self, period_index, propname): + # in this case _bool_ops is just `is_leap_year` + pi = period_index + arr = PeriodArray(pi) + + result = getattr(arr, propname) + expected = np.array(getattr(pi, propname)) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("propname", PeriodArray._field_ops) + def test_int_properties(self, period_index, propname): + pi = period_index + arr = PeriodArray(pi) + + result = getattr(arr, propname) + expected = np.array(getattr(pi, propname)) + + tm.assert_numpy_array_equal(result, expected) + + def test_array_interface(self, period_index): + arr = PeriodArray(period_index) + + # default asarray gives objects + result = np.asarray(arr) + expected = np.array(list(arr), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # to object dtype (same as default) + result = np.asarray(arr, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # to other dtypes + with pytest.raises(TypeError): + np.asarray(arr, dtype="int64") + + with pytest.raises(TypeError): + np.asarray(arr, dtype="float64") + + result = np.asarray(arr, dtype="S20") + expected = np.asarray(arr).astype("S20") + tm.assert_numpy_array_equal(result, expected) + + def test_strftime(self, period_index): + arr = PeriodArray(period_index) + + result = arr.strftime("%Y") + expected = np.array([per.strftime("%Y") for per in arr], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_strftime_nat(self): + # GH 29578 + arr = PeriodArray(PeriodIndex(["2019-01-01", pd.NaT], dtype="period[D]")) + + result = arr.strftime("%Y-%m-%d") + expected = np.array(["2019-01-01", np.nan], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "array,casting_nats", + [ + ( + pd.TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data, + (pd.NaT, np.timedelta64("NaT", "ns")), + ), + ( + pd.date_range("2000-01-01", periods=3, freq="D")._data, + (pd.NaT, np.datetime64("NaT", "ns")), + ), + (pd.period_range("2000-01-01", periods=3, freq="D")._data, (pd.NaT,)), + ], + ids=lambda x: type(x).__name__, +) +def test_casting_nat_setitem_array(array, casting_nats): + expected = type(array)._from_sequence([pd.NaT, array[1], array[2]]) + + for nat in casting_nats: + arr = array.copy() + arr[0] = nat + tm.assert_equal(arr, expected) + + +@pytest.mark.parametrize( + "array,non_casting_nats", + [ + ( + pd.TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data, + (np.datetime64("NaT", "ns"), pd.NaT.value), + ), + ( + pd.date_range("2000-01-01", periods=3, freq="D")._data, + (np.timedelta64("NaT", "ns"), pd.NaT.value), + ), + ( + pd.period_range("2000-01-01", periods=3, freq="D")._data, + (np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns"), pd.NaT.value), + ), + ], + ids=lambda x: type(x).__name__, +) +def test_invalid_nat_setitem_array(array, non_casting_nats): + for nat in non_casting_nats: + with pytest.raises(TypeError): + array[0] = nat + + +@pytest.mark.parametrize( + "array", + [ + pd.date_range("2000", periods=4).array, + pd.timedelta_range("2000", periods=4).array, + ], +) +def test_to_numpy_extra(array): + if _np_version_under1p18: + # np.isnan(NaT) raises, so use pandas' + isnan = pd.isna + else: + isnan = np.isnan + + array[0] = pd.NaT + original = array.copy() + + result = array.to_numpy() + assert isnan(result[0]) + + result = array.to_numpy(dtype="int64") + assert result[0] == -9223372036854775808 + + result = array.to_numpy(dtype="int64", na_value=0) + assert result[0] == 0 + + result = array.to_numpy(na_value=array[1].to_numpy()) + assert result[0] == result[1] + + result = array.to_numpy(na_value=array[1].to_numpy(copy=False)) + assert result[0] == result[1] + + tm.assert_equal(array, original) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py new file mode 100644 index 00000000..925781b6 --- /dev/null +++ b/pandas/tests/arrays/test_datetimes.py @@ -0,0 +1,418 @@ +""" +Tests for DatetimeArray +""" +import operator + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray +from pandas.core.arrays.datetimes import sequence_to_dt64ns + + +class TestDatetimeArrayConstructor: + def test_from_sequence_invalid_type(self): + mi = pd.MultiIndex.from_product([np.arange(5), np.arange(5)]) + with pytest.raises(TypeError, match="Cannot create a DatetimeArray"): + DatetimeArray._from_sequence(mi) + + def test_only_1dim_accepted(self): + arr = np.array([0, 1, 2, 3], dtype="M8[h]").astype("M8[ns]") + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 + DatetimeArray(arr.reshape(2, 2, 1)) + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 0-dim + DatetimeArray(arr[[0]].squeeze()) + + def test_freq_validation(self): + # GH#24623 check that invalid instances cannot be created with the + # public constructor + arr = np.arange(5, dtype=np.int64) * 3600 * 10 ** 9 + + msg = ( + "Inferred frequency H from passed values does not " + "conform to passed frequency W-SUN" + ) + with pytest.raises(ValueError, match=msg): + DatetimeArray(arr, freq="W") + + @pytest.mark.parametrize( + "meth", + [ + DatetimeArray._from_sequence, + sequence_to_dt64ns, + pd.to_datetime, + pd.DatetimeIndex, + ], + ) + def test_mixing_naive_tzaware_raises(self, meth): + # GH#24569 + arr = np.array([pd.Timestamp("2000"), pd.Timestamp("2000", tz="CET")]) + + msg = ( + "Cannot mix tz-aware with tz-naive values|" + "Tz-aware datetime.datetime cannot be converted " + "to datetime64 unless utc=True" + ) + + for obj in [arr, arr[::-1]]: + # check that we raise regardless of whether naive is found + # before aware or vice-versa + with pytest.raises(ValueError, match=msg): + meth(obj) + + def test_from_pandas_array(self): + arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10 ** 9 + + result = DatetimeArray._from_sequence(arr, freq="infer") + + expected = pd.date_range("1970-01-01", periods=5, freq="H")._data + tm.assert_datetime_array_equal(result, expected) + + def test_mismatched_timezone_raises(self): + arr = DatetimeArray( + np.array(["2000-01-01T06:00:00"], dtype="M8[ns]"), + dtype=DatetimeTZDtype(tz="US/Central"), + ) + dtype = DatetimeTZDtype(tz="US/Eastern") + with pytest.raises(TypeError, match="Timezone of the array"): + DatetimeArray(arr, dtype=dtype) + + def test_non_array_raises(self): + with pytest.raises(ValueError, match="list"): + DatetimeArray([1, 2, 3]) + + def test_other_type_raises(self): + with pytest.raises( + ValueError, match="The dtype of 'values' is incorrect.*bool" + ): + DatetimeArray(np.array([1, 2, 3], dtype="bool")) + + def test_incorrect_dtype_raises(self): + with pytest.raises(ValueError, match="Unexpected value for 'dtype'."): + DatetimeArray(np.array([1, 2, 3], dtype="i8"), dtype="category") + + def test_freq_infer_raises(self): + with pytest.raises(ValueError, match="Frequency inference"): + DatetimeArray(np.array([1, 2, 3], dtype="i8"), freq="infer") + + def test_copy(self): + data = np.array([1, 2, 3], dtype="M8[ns]") + arr = DatetimeArray(data, copy=False) + assert arr._data is data + + arr = DatetimeArray(data, copy=True) + assert arr._data is not data + + +class TestDatetimeArrayComparisons: + # TODO: merge this into tests/arithmetic/test_datetime64 once it is + # sufficiently robust + + def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators): + # arbitrary tz-naive DatetimeIndex + opname = all_compare_operators.strip("_") + op = getattr(operator, opname) + + dti = pd.date_range("2016-01-1", freq="MS", periods=9, tz=None) + arr = DatetimeArray(dti) + assert arr.freq == dti.freq + assert arr.tz == dti.tz + + right = dti + + expected = np.ones(len(arr), dtype=bool) + if opname in ["ne", "gt", "lt"]: + # for these the comparisons should be all-False + expected = ~expected + + result = op(arr, arr) + tm.assert_numpy_array_equal(result, expected) + for other in [right, np.array(right)]: + # TODO: add list and tuple, and object-dtype once those + # are fixed in the constructor + result = op(arr, other) + tm.assert_numpy_array_equal(result, expected) + + result = op(other, arr) + tm.assert_numpy_array_equal(result, expected) + + +class TestDatetimeArray: + def test_astype_to_same(self): + arr = DatetimeArray._from_sequence(["2000"], tz="US/Central") + result = arr.astype(DatetimeTZDtype(tz="US/Central"), copy=False) + assert result is arr + + @pytest.mark.parametrize("dtype", ["datetime64[ns]", "datetime64[ns, UTC]"]) + @pytest.mark.parametrize( + "other", ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, CET]"] + ) + def test_astype_copies(self, dtype, other): + # https://github.com/pandas-dev/pandas/pull/32490 + s = pd.Series([1, 2], dtype=dtype) + orig = s.copy() + t = s.astype(other) + t[:] = pd.NaT + tm.assert_series_equal(s, orig) + + @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) + def test_astype_int(self, dtype): + arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]) + result = arr.astype(dtype) + + if np.dtype(dtype).kind == "u": + expected_dtype = np.dtype("uint64") + else: + expected_dtype = np.dtype("int64") + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) + + def test_tz_setter_raises(self): + arr = DatetimeArray._from_sequence(["2000"], tz="US/Central") + with pytest.raises(AttributeError, match="tz_localize"): + arr.tz = "UTC" + + def test_setitem_different_tz_raises(self): + data = np.array([1, 2, 3], dtype="M8[ns]") + arr = DatetimeArray(data, copy=False, dtype=DatetimeTZDtype(tz="US/Central")) + with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"): + arr[0] = pd.Timestamp("2000") + + with pytest.raises(ValueError, match="US/Central"): + arr[0] = pd.Timestamp("2000", tz="US/Eastern") + + def test_setitem_clears_freq(self): + a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central")) + a[0] = pd.Timestamp("2000", tz="US/Central") + assert a.freq is None + + @pytest.mark.parametrize( + "obj", + [ + pd.Timestamp.now(), + pd.Timestamp.now().to_datetime64(), + pd.Timestamp.now().to_pydatetime(), + ], + ) + def test_setitem_objects(self, obj): + # make sure we accept datetime64 and datetime in addition to Timestamp + dti = pd.date_range("2000", periods=2, freq="D") + arr = dti._data + + arr[0] = obj + assert arr[0] == obj + + def test_repeat_preserves_tz(self): + dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central") + arr = DatetimeArray(dti) + + repeated = arr.repeat([1, 1]) + + # preserves tz and values, but not freq + expected = DatetimeArray(arr.asi8, freq=None, dtype=arr.dtype) + tm.assert_equal(repeated, expected) + + def test_value_counts_preserves_tz(self): + dti = pd.date_range("2000", periods=2, freq="D", tz="US/Central") + arr = DatetimeArray(dti).repeat([4, 3]) + + result = arr.value_counts() + + # Note: not tm.assert_index_equal, since `freq`s do not match + assert result.index.equals(dti) + + arr[-2] = pd.NaT + result = arr.value_counts() + expected = pd.Series([1, 4, 2], index=[pd.NaT, dti[0], dti[1]]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("method", ["pad", "backfill"]) + def test_fillna_preserves_tz(self, method): + dti = pd.date_range("2000-01-01", periods=5, freq="D", tz="US/Central") + arr = DatetimeArray(dti, copy=True) + arr[2] = pd.NaT + + fill_val = dti[1] if method == "pad" else dti[3] + expected = DatetimeArray._from_sequence( + [dti[0], dti[1], fill_val, dti[3], dti[4]], freq=None, tz="US/Central" + ) + + result = arr.fillna(method=method) + tm.assert_extension_array_equal(result, expected) + + # assert that arr and dti were not modified in-place + assert arr[2] is pd.NaT + assert dti[2] == pd.Timestamp("2000-01-03", tz="US/Central") + + def test_array_interface_tz(self): + tz = "US/Central" + data = DatetimeArray(pd.date_range("2017", periods=2, tz=tz)) + result = np.asarray(data) + + expected = np.array( + [ + pd.Timestamp("2017-01-01T00:00:00", tz=tz), + pd.Timestamp("2017-01-02T00:00:00", tz=tz), + ], + dtype=object, + ) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(data, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(data, dtype="M8[ns]") + + expected = np.array( + ["2017-01-01T06:00:00", "2017-01-02T06:00:00"], dtype="M8[ns]" + ) + tm.assert_numpy_array_equal(result, expected) + + def test_array_interface(self): + data = DatetimeArray(pd.date_range("2017", periods=2)) + expected = np.array( + ["2017-01-01T00:00:00", "2017-01-02T00:00:00"], dtype="datetime64[ns]" + ) + + result = np.asarray(data) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(data, dtype=object) + expected = np.array( + [pd.Timestamp("2017-01-01T00:00:00"), pd.Timestamp("2017-01-02T00:00:00")], + dtype=object, + ) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("index", [True, False]) + def test_searchsorted_different_tz(self, index): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = DatetimeArray(data, freq="D").tz_localize("Asia/Tokyo") + if index: + arr = pd.Index(arr) + + expected = arr.searchsorted(arr[2]) + result = arr.searchsorted(arr[2].tz_convert("UTC")) + assert result == expected + + expected = arr.searchsorted(arr[2:6]) + result = arr.searchsorted(arr[2:6].tz_convert("UTC")) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize("index", [True, False]) + def test_searchsorted_tzawareness_compat(self, index): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = DatetimeArray(data, freq="D") + if index: + arr = pd.Index(arr) + + mismatch = arr.tz_localize("Asia/Tokyo") + + msg = "Cannot compare tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + arr.searchsorted(mismatch[0]) + with pytest.raises(TypeError, match=msg): + arr.searchsorted(mismatch) + + with pytest.raises(TypeError, match=msg): + mismatch.searchsorted(arr[0]) + with pytest.raises(TypeError, match=msg): + mismatch.searchsorted(arr) + + @pytest.mark.parametrize( + "other", + [ + 1, + np.int64(1), + 1.0, + np.timedelta64("NaT"), + pd.Timedelta(days=2), + "invalid", + np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9, + np.arange(10).view("timedelta64[ns]") * 24 * 3600 * 10 ** 9, + pd.Timestamp.now().to_period("D"), + ], + ) + @pytest.mark.parametrize( + "index", + [ + True, + pytest.param( + False, + marks=pytest.mark.xfail( + reason="Raises ValueError instead of TypeError", raises=ValueError + ), + ), + ], + ) + def test_searchsorted_invalid_types(self, other, index): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = DatetimeArray(data, freq="D") + if index: + arr = pd.Index(arr) + + msg = "searchsorted requires compatible dtype or scalar" + with pytest.raises(TypeError, match=msg): + arr.searchsorted(other) + + +class TestSequenceToDT64NS: + def test_tz_dtype_mismatch_raises(self): + arr = DatetimeArray._from_sequence(["2000"], tz="US/Central") + with pytest.raises(TypeError, match="data is already tz-aware"): + sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="UTC")) + + def test_tz_dtype_matches(self): + arr = DatetimeArray._from_sequence(["2000"], tz="US/Central") + result, _, _ = sequence_to_dt64ns(arr, dtype=DatetimeTZDtype(tz="US/Central")) + tm.assert_numpy_array_equal(arr._data, result) + + +class TestReductions: + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_min_max(self, tz): + arr = DatetimeArray._from_sequence( + [ + "2000-01-03", + "2000-01-03", + "NaT", + "2000-01-02", + "2000-01-05", + "2000-01-04", + ], + tz=tz, + ) + + result = arr.min() + expected = pd.Timestamp("2000-01-02", tz=tz) + assert result == expected + + result = arr.max() + expected = pd.Timestamp("2000-01-05", tz=tz) + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_empty(self, skipna, tz): + arr = DatetimeArray._from_sequence([], tz=tz) + result = arr.min(skipna=skipna) + assert result is pd.NaT + + result = arr.max(skipna=skipna) + assert result is pd.NaT diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py new file mode 100644 index 00000000..2a6b6718 --- /dev/null +++ b/pandas/tests/arrays/test_integer.py @@ -0,0 +1,1096 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.generic import ABCIndexClass + +import pandas as pd +import pandas._testing as tm +from pandas.api.types import is_float, is_float_dtype, is_integer, is_scalar +from pandas.core.arrays import IntegerArray, integer_array +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) +from pandas.tests.extension.base import BaseOpsUtil + + +def make_data(): + return list(range(8)) + [np.nan] + list(range(10, 98)) + [np.nan] + [99, 100] + + +@pytest.fixture( + params=[ + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, + ] +) +def dtype(request): + return request.param() + + +@pytest.fixture +def data(dtype): + return integer_array(make_data(), dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return integer_array([np.nan, 1], dtype=dtype) + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + +def test_dtypes(dtype): + # smoke tests on auto dtype construction + + if dtype.is_signed_integer: + assert np.dtype(dtype.type).kind == "i" + else: + assert np.dtype(dtype.type).kind == "u" + assert dtype.name is not None + + +@pytest.mark.parametrize( + "dtype, expected", + [ + (Int8Dtype(), "Int8Dtype()"), + (Int16Dtype(), "Int16Dtype()"), + (Int32Dtype(), "Int32Dtype()"), + (Int64Dtype(), "Int64Dtype()"), + (UInt8Dtype(), "UInt8Dtype()"), + (UInt16Dtype(), "UInt16Dtype()"), + (UInt32Dtype(), "UInt32Dtype()"), + (UInt64Dtype(), "UInt64Dtype()"), + ], +) +def test_repr_dtype(dtype, expected): + assert repr(dtype) == expected + + +def test_repr_array(): + result = repr(integer_array([1, None, 3])) + expected = "\n[1, , 3]\nLength: 3, dtype: Int64" + assert result == expected + + +def test_repr_array_long(): + data = integer_array([1, 2, None] * 1000) + expected = ( + "\n" + "[ 1, 2, , 1, 2, , 1, 2, , 1,\n" + " ...\n" + " , 1, 2, , 1, 2, , 1, 2, ]\n" + "Length: 3000, dtype: Int64" + ) + result = repr(data) + assert result == expected + + +class TestConstructors: + def test_uses_pandas_na(self): + a = pd.array([1, None], dtype=pd.Int64Dtype()) + assert a[1] is pd.NA + + def test_from_dtype_from_float(self, data): + # construct from our dtype & string dtype + dtype = data.dtype + + # from float + expected = pd.Series(data) + result = pd.Series( + data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype) + ) + tm.assert_series_equal(result, expected) + + # from int / list + expected = pd.Series(data) + result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) + tm.assert_series_equal(result, expected) + + # from int / array + expected = pd.Series(data).dropna().reset_index(drop=True) + dropped = np.array(data.dropna()).astype(np.dtype((dtype.type))) + result = pd.Series(dropped, dtype=str(dtype)) + tm.assert_series_equal(result, expected) + + +class TestArithmeticOps(BaseOpsUtil): + def _check_divmod_op(self, s, op, other, exc=None): + super()._check_divmod_op(s, op, other, None) + + def _check_op(self, s, op_name, other, exc=None): + op = self.get_op_from_name(op_name) + result = op(s, other) + + # compute expected + mask = s.isna() + + # if s is a DataFrame, squeeze to a Series + # for comparison + if isinstance(s, pd.DataFrame): + result = result.squeeze() + s = s.squeeze() + mask = mask.squeeze() + + # other array is an Integer + if isinstance(other, IntegerArray): + omask = getattr(other, "mask", None) + mask = getattr(other, "data", other) + if omask is not None: + mask |= omask + + # 1 ** na is na, so need to unmask those + if op_name == "__pow__": + mask = np.where(~s.isna() & (s == 1), False, mask) + + elif op_name == "__rpow__": + other_is_one = other == 1 + if isinstance(other_is_one, pd.Series): + other_is_one = other_is_one.fillna(False) + mask = np.where(other_is_one, False, mask) + + # float result type or float op + if ( + is_float_dtype(other) + or is_float(other) + or op_name in ["__rtruediv__", "__truediv__", "__rdiv__", "__div__"] + ): + rs = s.astype("float") + expected = op(rs, other) + self._check_op_float(result, expected, mask, s, op_name, other) + + # integer result type + else: + rs = pd.Series(s.values._data, name=s.name) + expected = op(rs, other) + self._check_op_integer(result, expected, mask, s, op_name, other) + + def _check_op_float(self, result, expected, mask, s, op_name, other): + # check comparisons that are resulting in float dtypes + + expected[mask] = np.nan + if "floordiv" in op_name: + # Series op sets 1//0 to np.inf, which IntegerArray does not do (yet) + mask2 = np.isinf(expected) & np.isnan(result) + expected[mask2] = np.nan + tm.assert_series_equal(result, expected) + + def _check_op_integer(self, result, expected, mask, s, op_name, other): + # check comparisons that are resulting in integer dtypes + + # to compare properly, we convert the expected + # to float, mask to nans and convert infs + # if we have uints then we process as uints + # then convert to float + # and we ultimately want to create a IntArray + # for comparisons + + fill_value = 0 + + # mod/rmod turn floating 0 into NaN while + # integer works as expected (no nan) + if op_name in ["__mod__", "__rmod__"]: + if is_scalar(other): + if other == 0: + expected[s.values == 0] = 0 + else: + expected = expected.fillna(0) + else: + expected[ + (s.values == 0).fillna(False) + & ((expected == 0).fillna(False) | expected.isna()) + ] = 0 + try: + expected[ + ((expected == np.inf) | (expected == -np.inf)).fillna(False) + ] = fill_value + original = expected + expected = expected.astype(s.dtype) + + except ValueError: + + expected = expected.astype(float) + expected[ + ((expected == np.inf) | (expected == -np.inf)).fillna(False) + ] = fill_value + original = expected + expected = expected.astype(s.dtype) + + expected[mask] = pd.NA + + # assert that the expected astype is ok + # (skip for unsigned as they have wrap around) + if not s.dtype.is_unsigned_integer: + original = pd.Series(original) + + # we need to fill with 0's to emulate what an astype('int') does + # (truncation) for certain ops + if op_name in ["__rtruediv__", "__rdiv__"]: + mask |= original.isna() + original = original.fillna(0).astype("int") + + original = original.astype("float") + original[mask] = np.nan + tm.assert_series_equal(original, expected.astype("float")) + + # assert our expected result + tm.assert_series_equal(result, expected) + + def test_arith_integer_array(self, data, all_arithmetic_operators): + # we operate with a rhs of an integer array + + op = all_arithmetic_operators + + s = pd.Series(data) + rhs = pd.Series([1] * len(data), dtype=data.dtype) + rhs.iloc[-1] = np.nan + + self._check_op(s, op, rhs) + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + # scalar + op = all_arithmetic_operators + s = pd.Series(data) + self._check_op(s, op, 1, exc=TypeError) + + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + # frame & scalar + op = all_arithmetic_operators + df = pd.DataFrame({"A": data}) + self._check_op(df, op, 1, exc=TypeError) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + # ndarray & other series + op = all_arithmetic_operators + s = pd.Series(data) + other = np.ones(len(s), dtype=s.dtype.type) + self._check_op(s, op, other, exc=TypeError) + + def test_arith_coerce_scalar(self, data, all_arithmetic_operators): + + op = all_arithmetic_operators + s = pd.Series(data) + + other = 0.01 + self._check_op(s, op, other) + + @pytest.mark.parametrize("other", [1.0, np.array(1.0)]) + def test_arithmetic_conversion(self, all_arithmetic_operators, other): + # if we have a float operand we should have a float result + # if that is equal to an integer + op = self.get_op_from_name(all_arithmetic_operators) + + s = pd.Series([1, 2, 3], dtype="Int64") + result = op(s, other) + assert result.dtype is np.dtype("float") + + def test_arith_len_mismatch(self, all_arithmetic_operators): + # operating with a list-like with non-matching length raises + op = self.get_op_from_name(all_arithmetic_operators) + other = np.array([1.0]) + + s = pd.Series([1, 2, 3], dtype="Int64") + with pytest.raises(ValueError, match="Lengths must match"): + op(s, other) + + @pytest.mark.parametrize("other", [0, 0.5]) + def test_arith_zero_dim_ndarray(self, other): + arr = integer_array([1, None, 2]) + result = arr + np.array(other) + expected = arr + other + tm.assert_equal(result, expected) + + def test_error(self, data, all_arithmetic_operators): + # invalid ops + + op = all_arithmetic_operators + s = pd.Series(data) + ops = getattr(s, op) + opa = getattr(data, op) + + # invalid scalars + with pytest.raises(TypeError): + ops("foo") + with pytest.raises(TypeError): + ops(pd.Timestamp("20180101")) + + # invalid array-likes + with pytest.raises(TypeError): + ops(pd.Series("foo", index=s.index)) + + if op != "__rpow__": + # TODO(extension) + # rpow with a datetimelike coerces the integer array incorrectly + with pytest.raises(TypeError): + ops(pd.Series(pd.date_range("20180101", periods=len(s)))) + + # 2d + result = opa(pd.DataFrame({"A": s})) + assert result is NotImplemented + + with pytest.raises(NotImplementedError): + opa(np.arange(len(s)).reshape(-1, len(s))) + + @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) + def test_divide_by_zero(self, zero, negative): + # https://github.com/pandas-dev/pandas/issues/27398 + a = pd.array([0, 1, -1, None], dtype="Int64") + result = a / zero + expected = np.array([np.nan, np.inf, -np.inf, np.nan]) + if negative: + expected *= -1 + tm.assert_numpy_array_equal(result, expected) + + def test_pow_scalar(self): + a = pd.array([-1, 0, 1, None, 2], dtype="Int64") + result = a ** 0 + expected = pd.array([1, 1, 1, 1, 1], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = a ** 1 + expected = pd.array([-1, 0, 1, None, 2], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = a ** pd.NA + expected = pd.array([None, None, 1, None, None], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = a ** np.nan + expected = np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + # reversed + a = a[1:] # Can't raise integers to negative powers. + + result = 0 ** a + expected = pd.array([1, 0, None, 0], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = 1 ** a + expected = pd.array([1, 1, 1, 1], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = pd.NA ** a + expected = pd.array([1, None, None, None], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + result = np.nan ** a + expected = np.array([1, np.nan, np.nan, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + def test_pow_array(self): + a = integer_array([0, 0, 0, 1, 1, 1, None, None, None]) + b = integer_array([0, 1, None, 0, 1, None, 0, 1, None]) + result = a ** b + expected = integer_array([1, 0, None, 1, 1, 1, 1, None, None]) + tm.assert_extension_array_equal(result, expected) + + def test_rpow_one_to_na(self): + # https://github.com/pandas-dev/pandas/issues/22022 + # https://github.com/pandas-dev/pandas/issues/29997 + arr = integer_array([np.nan, np.nan]) + result = np.array([1.0, 2.0]) ** arr + expected = np.array([1.0, np.nan]) + tm.assert_numpy_array_equal(result, expected) + + +class TestComparisonOps(BaseOpsUtil): + def _compare_other(self, data, op_name, other): + op = self.get_op_from_name(op_name) + + # array + result = pd.Series(op(data, other)) + expected = pd.Series(op(data._data, other), dtype="boolean") + + # fill the nan locations + expected[data._mask] = pd.NA + + tm.assert_series_equal(result, expected) + + # series + s = pd.Series(data) + result = op(s, other) + + expected = op(pd.Series(data._data), other) + + # fill the nan locations + expected[data._mask] = pd.NA + expected = expected.astype("boolean") + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("other", [True, False, pd.NA, -1, 0, 1]) + def test_scalar(self, other, all_compare_operators): + op = self.get_op_from_name(all_compare_operators) + a = pd.array([1, 0, None], dtype="Int64") + + result = op(a, other) + + if other is pd.NA: + expected = pd.array([None, None, None], dtype="boolean") + else: + values = op(a._data, other) + expected = pd.arrays.BooleanArray(values, a._mask, copy=True) + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + result[0] = pd.NA + tm.assert_extension_array_equal(a, pd.array([1, 0, None], dtype="Int64")) + + def test_array(self, all_compare_operators): + op = self.get_op_from_name(all_compare_operators) + a = pd.array([0, 1, 2, None, None, None], dtype="Int64") + b = pd.array([0, 1, None, 0, 1, None], dtype="Int64") + + result = op(a, b) + values = op(a._data, b._data) + mask = a._mask | b._mask + + expected = pd.arrays.BooleanArray(values, mask) + tm.assert_extension_array_equal(result, expected) + + # ensure we haven't mutated anything inplace + result[0] = pd.NA + tm.assert_extension_array_equal( + a, pd.array([0, 1, 2, None, None, None], dtype="Int64") + ) + tm.assert_extension_array_equal( + b, pd.array([0, 1, None, 0, 1, None], dtype="Int64") + ) + + def test_compare_with_booleanarray(self, all_compare_operators): + op = self.get_op_from_name(all_compare_operators) + a = pd.array([True, False, None] * 3, dtype="boolean") + b = pd.array([0] * 3 + [1] * 3 + [None] * 3, dtype="Int64") + other = pd.array([False] * 3 + [True] * 3 + [None] * 3, dtype="boolean") + expected = op(a, other) + result = op(a, b) + tm.assert_extension_array_equal(result, expected) + + def test_no_shared_mask(self, data): + result = data + 1 + assert np.shares_memory(result._mask, data._mask) is False + + def test_compare_to_string(self, any_nullable_int_dtype): + # GH 28930 + s = pd.Series([1, None], dtype=any_nullable_int_dtype) + result = s == "a" + expected = pd.Series([False, pd.NA], dtype="boolean") + + self.assert_series_equal(result, expected) + + def test_compare_to_int(self, any_nullable_int_dtype, all_compare_operators): + # GH 28930 + s1 = pd.Series([1, None, 3], dtype=any_nullable_int_dtype) + s2 = pd.Series([1, None, 3], dtype="float") + + method = getattr(s1, all_compare_operators) + result = method(2) + + method = getattr(s2, all_compare_operators) + expected = method(2).astype("boolean") + expected[s2.isna()] = pd.NA + + self.assert_series_equal(result, expected) + + +class TestCasting: + @pytest.mark.parametrize("dropna", [True, False]) + def test_construct_index(self, all_data, dropna): + # ensure that we do not coerce to Float64Index, rather + # keep as Index + + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Index(integer_array(other, dtype=all_data.dtype)) + expected = pd.Index(other, dtype=object) + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("dropna", [True, False]) + def test_astype_index(self, all_data, dropna): + # as an int/uint index to Index + + all_data = all_data[:10] + if dropna: + other = all_data[~all_data.isna()] + else: + other = all_data + + dtype = all_data.dtype + idx = pd.Index(np.array(other)) + assert isinstance(idx, ABCIndexClass) + + result = idx.astype(dtype) + expected = idx.astype(object).astype(dtype) + tm.assert_index_equal(result, expected) + + def test_astype(self, all_data): + all_data = all_data[:10] + + ints = all_data[~all_data.isna()] + mixed = all_data + dtype = Int8Dtype() + + # coerce to same type - ints + s = pd.Series(ints) + result = s.astype(all_data.dtype) + expected = pd.Series(ints) + tm.assert_series_equal(result, expected) + + # coerce to same other - ints + s = pd.Series(ints) + result = s.astype(dtype) + expected = pd.Series(ints, dtype=dtype) + tm.assert_series_equal(result, expected) + + # coerce to same numpy_dtype - ints + s = pd.Series(ints) + result = s.astype(all_data.dtype.numpy_dtype) + expected = pd.Series(ints._data.astype(all_data.dtype.numpy_dtype)) + tm.assert_series_equal(result, expected) + + # coerce to same type - mixed + s = pd.Series(mixed) + result = s.astype(all_data.dtype) + expected = pd.Series(mixed) + tm.assert_series_equal(result, expected) + + # coerce to same other - mixed + s = pd.Series(mixed) + result = s.astype(dtype) + expected = pd.Series(mixed, dtype=dtype) + tm.assert_series_equal(result, expected) + + # coerce to same numpy_dtype - mixed + s = pd.Series(mixed) + with pytest.raises(ValueError): + s.astype(all_data.dtype.numpy_dtype) + + # coerce to object + s = pd.Series(mixed) + result = s.astype("object") + expected = pd.Series(np.asarray(mixed)) + tm.assert_series_equal(result, expected) + + def test_astype_to_larger_numpy(self): + a = pd.array([1, 2], dtype="Int32") + result = a.astype("int64") + expected = np.array([1, 2], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + a = pd.array([1, 2], dtype="UInt32") + result = a.astype("uint64") + expected = np.array([1, 2], dtype="uint64") + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dtype", [Int8Dtype(), "Int8", UInt32Dtype(), "UInt32"]) + def test_astype_specific_casting(self, dtype): + s = pd.Series([1, 2, 3], dtype="Int64") + result = s.astype(dtype) + expected = pd.Series([1, 2, 3], dtype=dtype) + tm.assert_series_equal(result, expected) + + s = pd.Series([1, 2, 3, None], dtype="Int64") + result = s.astype(dtype) + expected = pd.Series([1, 2, 3, None], dtype=dtype) + tm.assert_series_equal(result, expected) + + def test_construct_cast_invalid(self, dtype): + + msg = "cannot safely" + arr = [1.2, 2.3, 3.7] + with pytest.raises(TypeError, match=msg): + integer_array(arr, dtype=dtype) + + with pytest.raises(TypeError, match=msg): + pd.Series(arr).astype(dtype) + + arr = [1.2, 2.3, 3.7, np.nan] + with pytest.raises(TypeError, match=msg): + integer_array(arr, dtype=dtype) + + with pytest.raises(TypeError, match=msg): + pd.Series(arr).astype(dtype) + + @pytest.mark.parametrize("in_series", [True, False]) + def test_to_numpy_na_nan(self, in_series): + a = pd.array([0, 1, None], dtype="Int64") + if in_series: + a = pd.Series(a) + + result = a.to_numpy(dtype="float64", na_value=np.nan) + expected = np.array([0.0, 1.0, np.nan], dtype="float64") + tm.assert_numpy_array_equal(result, expected) + + result = a.to_numpy(dtype="int64", na_value=-1) + expected = np.array([0, 1, -1], dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + result = a.to_numpy(dtype="bool", na_value=False) + expected = np.array([False, True, False], dtype="bool") + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("in_series", [True, False]) + @pytest.mark.parametrize("dtype", ["int32", "int64", "bool"]) + def test_to_numpy_dtype(self, dtype, in_series): + a = pd.array([0, 1], dtype="Int64") + if in_series: + a = pd.Series(a) + + result = a.to_numpy(dtype=dtype) + expected = np.array([0, 1], dtype=dtype) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["float64", "int64", "bool"]) + def test_to_numpy_na_raises(self, dtype): + a = pd.array([0, 1, None], dtype="Int64") + with pytest.raises(ValueError, match=dtype): + a.to_numpy(dtype=dtype) + + def test_astype_str(self): + a = pd.array([1, 2, None], dtype="Int64") + expected = np.array(["1", "2", ""], dtype=object) + + tm.assert_numpy_array_equal(a.astype(str), expected) + tm.assert_numpy_array_equal(a.astype("str"), expected) + + def test_astype_boolean(self): + # https://github.com/pandas-dev/pandas/issues/31102 + a = pd.array([1, 0, -1, 2, None], dtype="Int64") + result = a.astype("boolean") + expected = pd.array([True, False, True, True, None], dtype="boolean") + tm.assert_extension_array_equal(result, expected) + + +def test_frame_repr(data_missing): + + df = pd.DataFrame({"A": data_missing}) + result = repr(df) + expected = " A\n0 \n1 1" + assert result == expected + + +def test_conversions(data_missing): + + # astype to object series + df = pd.DataFrame({"A": data_missing}) + result = df["A"].astype("object") + expected = pd.Series(np.array([np.nan, 1], dtype=object), name="A") + tm.assert_series_equal(result, expected) + + # convert to object ndarray + # we assert that we are exactly equal + # including type conversions of scalars + result = df["A"].astype("object").values + expected = np.array([pd.NA, 1], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + for r, e in zip(result, expected): + if pd.isnull(r): + assert pd.isnull(e) + elif is_integer(r): + assert r == e + assert is_integer(e) + else: + assert r == e + assert type(r) == type(e) + + +def test_integer_array_constructor(): + values = np.array([1, 2, 3, 4], dtype="int64") + mask = np.array([False, False, False, True], dtype="bool") + + result = IntegerArray(values, mask) + expected = integer_array([1, 2, 3, np.nan], dtype="int64") + tm.assert_extension_array_equal(result, expected) + + with pytest.raises(TypeError): + IntegerArray(values.tolist(), mask) + + with pytest.raises(TypeError): + IntegerArray(values, mask.tolist()) + + with pytest.raises(TypeError): + IntegerArray(values.astype(float), mask) + + with pytest.raises(TypeError): + IntegerArray(values) + + +@pytest.mark.parametrize( + "a, b", + [ + ([1, None], [1, np.nan]), + ([None], [np.nan]), + ([None, np.nan], [np.nan, np.nan]), + ([np.nan, np.nan], [np.nan, np.nan]), + ], +) +def test_integer_array_constructor_none_is_nan(a, b): + result = integer_array(a) + expected = integer_array(b) + tm.assert_extension_array_equal(result, expected) + + +def test_integer_array_constructor_copy(): + values = np.array([1, 2, 3, 4], dtype="int64") + mask = np.array([False, False, False, True], dtype="bool") + + result = IntegerArray(values, mask) + assert result._data is values + assert result._mask is mask + + result = IntegerArray(values, mask, copy=True) + assert result._data is not values + assert result._mask is not mask + + +@pytest.mark.parametrize( + "values", + [ + ["foo", "bar"], + ["1", "2"], + "foo", + 1, + 1.0, + pd.date_range("20130101", periods=2), + np.array(["foo"]), + [[1, 2], [3, 4]], + [np.nan, {"a": 1}], + ], +) +def test_to_integer_array_error(values): + # error in converting existing arrays to IntegerArrays + with pytest.raises(TypeError): + integer_array(values) + + +def test_to_integer_array_inferred_dtype(): + # if values has dtype -> respect it + result = integer_array(np.array([1, 2], dtype="int8")) + assert result.dtype == Int8Dtype() + result = integer_array(np.array([1, 2], dtype="int32")) + assert result.dtype == Int32Dtype() + + # if values have no dtype -> always int64 + result = integer_array([1, 2]) + assert result.dtype == Int64Dtype() + + +def test_to_integer_array_dtype_keyword(): + result = integer_array([1, 2], dtype="int8") + assert result.dtype == Int8Dtype() + + # if values has dtype -> override it + result = integer_array(np.array([1, 2], dtype="int8"), dtype="int32") + assert result.dtype == Int32Dtype() + + +def test_to_integer_array_float(): + result = integer_array([1.0, 2.0]) + expected = integer_array([1, 2]) + tm.assert_extension_array_equal(result, expected) + + with pytest.raises(TypeError, match="cannot safely cast non-equivalent"): + integer_array([1.5, 2.0]) + + # for float dtypes, the itemsize is not preserved + result = integer_array(np.array([1.0, 2.0], dtype="float32")) + assert result.dtype == Int64Dtype() + + +@pytest.mark.parametrize( + "bool_values, int_values, target_dtype, expected_dtype", + [ + ([False, True], [0, 1], Int64Dtype(), Int64Dtype()), + ([False, True], [0, 1], "Int64", Int64Dtype()), + ([False, True, np.nan], [0, 1, np.nan], Int64Dtype(), Int64Dtype()), + ], +) +def test_to_integer_array_bool(bool_values, int_values, target_dtype, expected_dtype): + result = integer_array(bool_values, dtype=target_dtype) + assert result.dtype == expected_dtype + expected = integer_array(int_values, dtype=target_dtype) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values, to_dtype, result_dtype", + [ + (np.array([1], dtype="int64"), None, Int64Dtype), + (np.array([1, np.nan]), None, Int64Dtype), + (np.array([1, np.nan]), "int8", Int8Dtype), + ], +) +def test_to_integer_array(values, to_dtype, result_dtype): + # convert existing arrays to IntegerArrays + result = integer_array(values, dtype=to_dtype) + assert result.dtype == result_dtype() + expected = integer_array(values, dtype=result_dtype()) + tm.assert_extension_array_equal(result, expected) + + +def test_cross_type_arithmetic(): + + df = pd.DataFrame( + { + "A": pd.Series([1, 2, np.nan], dtype="Int64"), + "B": pd.Series([1, np.nan, 3], dtype="UInt8"), + "C": [1, 2, 3], + } + ) + + result = df.A + df.C + expected = pd.Series([2, 4, np.nan], dtype="Int64") + tm.assert_series_equal(result, expected) + + result = (df.A + df.C) * 3 == 12 + expected = pd.Series([False, True, None], dtype="boolean") + tm.assert_series_equal(result, expected) + + result = df.A + df.B + expected = pd.Series([2, np.nan, np.nan], dtype="Int64") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("op", ["sum", "min", "max", "prod"]) +def test_preserve_dtypes(op): + # TODO(#22346): preserve Int64 dtype + # for ops that enable (mean would actually work here + # but generally it is a float return value) + df = pd.DataFrame( + { + "A": ["a", "b", "b"], + "B": [1, None, 3], + "C": integer_array([1, None, 3], dtype="Int64"), + } + ) + + # op + result = getattr(df.C, op)() + assert isinstance(result, int) + + # groupby + result = getattr(df.groupby("A"), op)() + + expected = pd.DataFrame( + {"B": np.array([1.0, 3.0]), "C": integer_array([1, 3], dtype="Int64")}, + index=pd.Index(["a", "b"], name="A"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("op", ["mean"]) +def test_reduce_to_float(op): + # some reduce ops always return float, even if the result + # is a rounded number + df = pd.DataFrame( + { + "A": ["a", "b", "b"], + "B": [1, None, 3], + "C": integer_array([1, None, 3], dtype="Int64"), + } + ) + + # op + result = getattr(df.C, op)() + assert isinstance(result, float) + + # groupby + result = getattr(df.groupby("A"), op)() + + expected = pd.DataFrame( + {"B": np.array([1.0, 3.0]), "C": integer_array([1, 3], dtype="Int64")}, + index=pd.Index(["a", "b"], name="A"), + ) + tm.assert_frame_equal(result, expected) + + +def test_astype_nansafe(): + # see gh-22343 + arr = integer_array([np.nan, 1, 2], dtype="Int8") + msg = "cannot convert to 'uint32'-dtype NumPy array with missing values." + + with pytest.raises(ValueError, match=msg): + arr.astype("uint32") + + +@pytest.mark.parametrize("ufunc", [np.abs, np.sign]) +def test_ufuncs_single_int(ufunc): + a = integer_array([1, 2, -3, np.nan]) + result = ufunc(a) + expected = integer_array(ufunc(a.astype(float))) + tm.assert_extension_array_equal(result, expected) + + s = pd.Series(a) + result = ufunc(s) + expected = pd.Series(integer_array(ufunc(a.astype(float)))) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt]) +def test_ufuncs_single_float(ufunc): + a = integer_array([1, 2, -3, np.nan]) + with np.errstate(invalid="ignore"): + result = ufunc(a) + expected = ufunc(a.astype(float)) + tm.assert_numpy_array_equal(result, expected) + + s = pd.Series(a) + with np.errstate(invalid="ignore"): + result = ufunc(s) + expected = ufunc(s.astype(float)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.add, np.subtract]) +def test_ufuncs_binary_int(ufunc): + # two IntegerArrays + a = integer_array([1, 2, -3, np.nan]) + result = ufunc(a, a) + expected = integer_array(ufunc(a.astype(float), a.astype(float))) + tm.assert_extension_array_equal(result, expected) + + # IntegerArray with numpy array + arr = np.array([1, 2, 3, 4]) + result = ufunc(a, arr) + expected = integer_array(ufunc(a.astype(float), arr)) + tm.assert_extension_array_equal(result, expected) + + result = ufunc(arr, a) + expected = integer_array(ufunc(arr, a.astype(float))) + tm.assert_extension_array_equal(result, expected) + + # IntegerArray with scalar + result = ufunc(a, 1) + expected = integer_array(ufunc(a.astype(float), 1)) + tm.assert_extension_array_equal(result, expected) + + result = ufunc(1, a) + expected = integer_array(ufunc(1, a.astype(float))) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("values", [[0, 1], [0, None]]) +def test_ufunc_reduce_raises(values): + a = integer_array(values) + with pytest.raises(NotImplementedError): + np.add.reduce(a) + + +@td.skip_if_no("pyarrow", min_version="0.15.0") +def test_arrow_array(data): + # protocol added in 0.15.0 + import pyarrow as pa + + arr = pa.array(data) + expected = np.array(data, dtype=object) + expected[data.isna()] = None + expected = pa.array(expected, type=data.dtype.name.lower(), from_pandas=True) + assert arr.equals(expected) + + +@td.skip_if_no("pyarrow", min_version="0.16.0") +def test_arrow_roundtrip(data): + # roundtrip possible from arrow 0.16.0 + import pyarrow as pa + + df = pd.DataFrame({"a": data}) + table = pa.table(df) + assert table.field("a").type == str(data.dtype.numpy_dtype) + result = table.to_pandas() + tm.assert_frame_equal(result, df) + + +@td.skip_if_no("pyarrow", min_version="0.16.0") +def test_arrow_from_arrow_uint(): + # https://github.com/pandas-dev/pandas/issues/31896 + # possible mismatch in types + import pyarrow as pa + + dtype = pd.UInt32Dtype() + result = dtype.__from_arrow__(pa.array([1, 2, 3, 4, None], type="int64")) + expected = pd.array([1, 2, 3, 4, None], dtype="UInt32") + + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize( + "pandasmethname, kwargs", + [ + ("var", {"ddof": 0}), + ("var", {"ddof": 1}), + ("kurtosis", {}), + ("skew", {}), + ("sem", {}), + ], +) +def test_stat_method(pandasmethname, kwargs): + s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") + pandasmeth = getattr(s, pandasmethname) + result = pandasmeth(**kwargs) + s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64") + pandasmeth = getattr(s2, pandasmethname) + expected = pandasmeth(**kwargs) + assert expected == result + + +def test_value_counts_na(): + arr = pd.array([1, 2, 1, pd.NA], dtype="Int64") + result = arr.value_counts(dropna=False) + expected = pd.Series([2, 1, 1], index=[1, 2, pd.NA], dtype="Int64") + tm.assert_series_equal(result, expected) + + result = arr.value_counts(dropna=True) + expected = pd.Series([2, 1], index=[1, 2], dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_array_setitem_nullable_boolean_mask(): + # GH 31446 + ser = pd.Series([1, 2], dtype="Int64") + result = ser.where(ser > 1) + expected = pd.Series([pd.NA, 2], dtype="Int64") + tm.assert_series_equal(result, expected) + + +def test_array_setitem(): + # GH 31446 + arr = pd.Series([1, 2], dtype="Int64").array + arr[arr > 1] = 1 + + expected = pd.array([1, 1], dtype="Int64") + tm.assert_extension_array_equal(arr, expected) + + +# TODO(jreback) - these need testing / are broken + +# shift + +# set_index (destroys type) diff --git a/pandas/tests/arrays/test_numpy.py b/pandas/tests/arrays/test_numpy.py new file mode 100644 index 00000000..86793c4e --- /dev/null +++ b/pandas/tests/arrays/test_numpy.py @@ -0,0 +1,250 @@ +""" +Additional tests for PandasArray that aren't covered by +the interface tests. +""" +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import PandasArray +from pandas.core.arrays.numpy_ import PandasDtype + + +@pytest.fixture( + params=[ + np.array(["a", "b"], dtype=object), + np.array([0, 1], dtype=float), + np.array([0, 1], dtype=int), + np.array([0, 1 + 2j], dtype=complex), + np.array([True, False], dtype=bool), + np.array([0, 1], dtype="datetime64[ns]"), + np.array([0, 1], dtype="timedelta64[ns]"), + ] +) +def any_numpy_array(request): + """ + Parametrized fixture for NumPy arrays with different dtypes. + + This excludes string and bytes. + """ + return request.param + + +# ---------------------------------------------------------------------------- +# PandasDtype + + +@pytest.mark.parametrize( + "dtype, expected", + [ + ("bool", True), + ("int", True), + ("uint", True), + ("float", True), + ("complex", True), + ("str", False), + ("bytes", False), + ("datetime64[ns]", False), + ("object", False), + ("void", False), + ], +) +def test_is_numeric(dtype, expected): + dtype = PandasDtype(dtype) + assert dtype._is_numeric is expected + + +@pytest.mark.parametrize( + "dtype, expected", + [ + ("bool", True), + ("int", False), + ("uint", False), + ("float", False), + ("complex", False), + ("str", False), + ("bytes", False), + ("datetime64[ns]", False), + ("object", False), + ("void", False), + ], +) +def test_is_boolean(dtype, expected): + dtype = PandasDtype(dtype) + assert dtype._is_boolean is expected + + +def test_repr(): + dtype = PandasDtype(np.dtype("int64")) + assert repr(dtype) == "PandasDtype('int64')" + + +def test_constructor_from_string(): + result = PandasDtype.construct_from_string("int64") + expected = PandasDtype(np.dtype("int64")) + assert result == expected + + +# ---------------------------------------------------------------------------- +# Construction + + +def test_constructor_no_coercion(): + with pytest.raises(ValueError, match="NumPy array"): + PandasArray([1, 2, 3]) + + +def test_series_constructor_with_copy(): + ndarray = np.array([1, 2, 3]) + ser = pd.Series(PandasArray(ndarray), copy=True) + + assert ser.values is not ndarray + + +def test_series_constructor_with_astype(): + ndarray = np.array([1, 2, 3]) + result = pd.Series(PandasArray(ndarray), dtype="float64") + expected = pd.Series([1.0, 2.0, 3.0], dtype="float64") + tm.assert_series_equal(result, expected) + + +def test_from_sequence_dtype(): + arr = np.array([1, 2, 3], dtype="int64") + result = PandasArray._from_sequence(arr, dtype="uint64") + expected = PandasArray(np.array([1, 2, 3], dtype="uint64")) + tm.assert_extension_array_equal(result, expected) + + +def test_constructor_copy(): + arr = np.array([0, 1]) + result = PandasArray(arr, copy=True) + + assert np.shares_memory(result._ndarray, arr) is False + + +def test_constructor_with_data(any_numpy_array): + nparr = any_numpy_array + arr = PandasArray(nparr) + assert arr.dtype.numpy_dtype == nparr.dtype + + +# ---------------------------------------------------------------------------- +# Conversion + + +def test_to_numpy(): + arr = PandasArray(np.array([1, 2, 3])) + result = arr.to_numpy() + assert result is arr._ndarray + + result = arr.to_numpy(copy=True) + assert result is not arr._ndarray + + result = arr.to_numpy(dtype="f8") + expected = np.array([1, 2, 3], dtype="f8") + tm.assert_numpy_array_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# Setitem + + +def test_setitem_series(): + ser = pd.Series([1, 2, 3]) + ser.array[0] = 10 + expected = pd.Series([10, 2, 3]) + tm.assert_series_equal(ser, expected) + + +def test_setitem(any_numpy_array): + nparr = any_numpy_array + arr = PandasArray(nparr, copy=True) + + arr[0] = arr[1] + nparr[0] = nparr[1] + + tm.assert_numpy_array_equal(arr.to_numpy(), nparr) + + +# ---------------------------------------------------------------------------- +# Reductions + + +def test_bad_reduce_raises(): + arr = np.array([1, 2, 3], dtype="int64") + arr = PandasArray(arr) + msg = "cannot perform not_a_method with type int" + with pytest.raises(TypeError, match=msg): + arr._reduce(msg) + + +def test_validate_reduction_keyword_args(): + arr = PandasArray(np.array([1, 2, 3])) + msg = "the 'keepdims' parameter is not supported .*all" + with pytest.raises(ValueError, match=msg): + arr.all(keepdims=True) + + +# ---------------------------------------------------------------------------- +# Ops + + +def test_ufunc(): + arr = PandasArray(np.array([-1.0, 0.0, 1.0])) + result = np.abs(arr) + expected = PandasArray(np.abs(arr._ndarray)) + tm.assert_extension_array_equal(result, expected) + + r1, r2 = np.divmod(arr, np.add(arr, 2)) + e1, e2 = np.divmod(arr._ndarray, np.add(arr._ndarray, 2)) + e1 = PandasArray(e1) + e2 = PandasArray(e2) + tm.assert_extension_array_equal(r1, e1) + tm.assert_extension_array_equal(r2, e2) + + +def test_basic_binop(): + # Just a basic smoke test. The EA interface tests exercise this + # more thoroughly. + x = PandasArray(np.array([1, 2, 3])) + result = x + x + expected = PandasArray(np.array([2, 4, 6])) + tm.assert_extension_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", [None, object]) +def test_setitem_object_typecode(dtype): + arr = PandasArray(np.array(["a", "b", "c"], dtype=dtype)) + arr[0] = "t" + expected = PandasArray(np.array(["t", "b", "c"], dtype=dtype)) + tm.assert_extension_array_equal(arr, expected) + + +def test_setitem_no_coercion(): + # https://github.com/pandas-dev/pandas/issues/28150 + arr = PandasArray(np.array([1, 2, 3])) + with pytest.raises(ValueError, match="int"): + arr[0] = "a" + + # With a value that we do coerce, check that we coerce the value + # and not the underlying array. + arr[0] = 2.5 + assert isinstance(arr[0], (int, np.integer)), type(arr[0]) + + +def test_setitem_preserves_views(): + # GH#28150, see also extension test of the same name + arr = PandasArray(np.array([1, 2, 3])) + view1 = arr.view() + view2 = arr[:] + view3 = np.asarray(arr) + + arr[0] = 9 + assert view1[0] == 9 + assert view2[0] == 9 + assert view3[0] == 9 + + arr[-1] = 2.5 + view1[-1] = 5 + assert arr[-1] == 5 diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py new file mode 100644 index 00000000..1f4351c7 --- /dev/null +++ b/pandas/tests/arrays/test_period.py @@ -0,0 +1,414 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import iNaT +from pandas._libs.tslibs.period import IncompatibleFrequency +import pandas.util._test_decorators as td + +from pandas.core.dtypes.dtypes import PeriodDtype, registry + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import PeriodArray, period_array + +# ---------------------------------------------------------------------------- +# Dtype + + +def test_registered(): + assert PeriodDtype in registry.dtypes + result = registry.find("Period[D]") + expected = PeriodDtype("D") + assert result == expected + + +# ---------------------------------------------------------------------------- +# period_array + + +@pytest.mark.parametrize( + "data, freq, expected", + [ + ([pd.Period("2017", "D")], None, [17167]), + ([pd.Period("2017", "D")], "D", [17167]), + ([2017], "D", [17167]), + (["2017"], "D", [17167]), + ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]), + ([pd.Period("2017", "D"), None], None, [17167, iNaT]), + (pd.Series(pd.date_range("2017", periods=3)), None, [17167, 17168, 17169]), + (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]), + ], +) +def test_period_array_ok(data, freq, expected): + result = period_array(data, freq=freq).asi8 + expected = np.asarray(expected, dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + +def test_period_array_readonly_object(): + # https://github.com/pandas-dev/pandas/issues/25403 + pa = period_array([pd.Period("2019-01-01")]) + arr = np.asarray(pa, dtype="object") + arr.setflags(write=False) + + result = period_array(arr) + tm.assert_period_array_equal(result, pa) + + result = pd.Series(arr) + tm.assert_series_equal(result, pd.Series(pa)) + + result = pd.DataFrame({"A": arr}) + tm.assert_frame_equal(result, pd.DataFrame({"A": pa})) + + +def test_from_datetime64_freq_changes(): + # https://github.com/pandas-dev/pandas/issues/23438 + arr = pd.date_range("2017", periods=3, freq="D") + result = PeriodArray._from_datetime64(arr, freq="M") + expected = period_array(["2017-01-01", "2017-01-01", "2017-01-01"], freq="M") + tm.assert_period_array_equal(result, expected) + + +@pytest.mark.parametrize( + "data, freq, msg", + [ + ( + [pd.Period("2017", "D"), pd.Period("2017", "A")], + None, + "Input has different freq", + ), + ([pd.Period("2017", "D")], "A", "Input has different freq"), + ], +) +def test_period_array_raises(data, freq, msg): + with pytest.raises(IncompatibleFrequency, match=msg): + period_array(data, freq) + + +def test_period_array_non_period_series_raies(): + ser = pd.Series([1, 2, 3]) + with pytest.raises(TypeError, match="dtype"): + PeriodArray(ser, freq="D") + + +def test_period_array_freq_mismatch(): + arr = period_array(["2000", "2001"], freq="D") + with pytest.raises(IncompatibleFrequency, match="freq"): + PeriodArray(arr, freq="M") + + with pytest.raises(IncompatibleFrequency, match="freq"): + PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd()) + + +def test_asi8(): + result = period_array(["2000", "2001", None], freq="D").asi8 + expected = np.array([10957, 11323, iNaT]) + tm.assert_numpy_array_equal(result, expected) + + +def test_take_raises(): + arr = period_array(["2000", "2001"], freq="D") + with pytest.raises(IncompatibleFrequency, match="freq"): + arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W")) + + with pytest.raises(ValueError, match="foo"): + arr.take([0, -1], allow_fill=True, fill_value="foo") + + +@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) +def test_astype(dtype): + # We choose to ignore the sign and size of integers for + # Period/Datetime/Timedelta astype + arr = period_array(["2000", "2001", None], freq="D") + result = arr.astype(dtype) + + if np.dtype(dtype).kind == "u": + expected_dtype = np.dtype("uint64") + else: + expected_dtype = np.dtype("int64") + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_copies(): + arr = period_array(["2000", "2001", None], freq="D") + result = arr.astype(np.int64, copy=False) + # Add the `.base`, since we now use `.asi8` which returns a view. + # We could maybe override it in PeriodArray to return ._data directly. + assert result.base is arr._data + + result = arr.astype(np.int64, copy=True) + assert result is not arr._data + tm.assert_numpy_array_equal(result, arr._data.view("i8")) + + +def test_astype_categorical(): + arr = period_array(["2000", "2001", "2001", None], freq="D") + result = arr.astype("category") + categories = pd.PeriodIndex(["2000", "2001"], freq="D") + expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories) + tm.assert_categorical_equal(result, expected) + + +def test_astype_period(): + arr = period_array(["2000", "2001", None], freq="D") + result = arr.astype(PeriodDtype("M")) + expected = period_array(["2000", "2001", None], freq="M") + tm.assert_period_array_equal(result, expected) + + +@pytest.mark.parametrize("other", ["datetime64[ns]", "timedelta64[ns]"]) +def test_astype_datetime(other): + arr = period_array(["2000", "2001", None], freq="D") + # slice off the [ns] so that the regex matches. + with pytest.raises(TypeError, match=other[:-4]): + arr.astype(other) + + +def test_fillna_raises(): + arr = period_array(["2000", "2001", "2002"], freq="D") + with pytest.raises(ValueError, match="Length"): + arr.fillna(arr[:2]) + + +def test_fillna_copies(): + arr = period_array(["2000", "2001", "2002"], freq="D") + result = arr.fillna(pd.Period("2000", "D")) + assert result is not arr + + +# ---------------------------------------------------------------------------- +# setitem + + +@pytest.mark.parametrize( + "key, value, expected", + [ + ([0], pd.Period("2000", "D"), [10957, 1, 2]), + ([0], None, [iNaT, 1, 2]), + ([0], np.nan, [iNaT, 1, 2]), + ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3), + ( + [0, 1, 2], + [pd.Period("2000", "D"), pd.Period("2001", "D"), pd.Period("2002", "D")], + [10957, 11323, 11688], + ), + ], +) +def test_setitem(key, value, expected): + arr = PeriodArray(np.arange(3), freq="D") + expected = PeriodArray(expected, freq="D") + arr[key] = value + tm.assert_period_array_equal(arr, expected) + + +def test_setitem_raises_incompatible_freq(): + arr = PeriodArray(np.arange(3), freq="D") + with pytest.raises(IncompatibleFrequency, match="freq"): + arr[0] = pd.Period("2000", freq="A") + + other = period_array(["2000", "2001"], freq="A") + with pytest.raises(IncompatibleFrequency, match="freq"): + arr[[0, 1]] = other + + +def test_setitem_raises_length(): + arr = PeriodArray(np.arange(3), freq="D") + with pytest.raises(ValueError, match="length"): + arr[[0, 1]] = [pd.Period("2000", freq="D")] + + +def test_setitem_raises_type(): + arr = PeriodArray(np.arange(3), freq="D") + with pytest.raises(TypeError, match="int"): + arr[0] = 1 + + +# ---------------------------------------------------------------------------- +# Ops + + +def test_sub_period(): + arr = period_array(["2000", "2001"], freq="D") + other = pd.Period("2000", freq="M") + with pytest.raises(IncompatibleFrequency, match="freq"): + arr - other + + +# ---------------------------------------------------------------------------- +# Methods + + +@pytest.mark.parametrize( + "other", + [pd.Period("2000", freq="H"), period_array(["2000", "2001", "2000"], freq="H")], +) +def test_where_different_freq_raises(other): + ser = pd.Series(period_array(["2000", "2001", "2002"], freq="D")) + cond = np.array([True, False, True]) + with pytest.raises(IncompatibleFrequency, match="freq"): + ser.where(cond, other) + + +# ---------------------------------------------------------------------------- +# Printing + + +def test_repr_small(): + arr = period_array(["2000", "2001"], freq="D") + result = str(arr) + expected = ( + "\n['2000-01-01', '2001-01-01']\nLength: 2, dtype: period[D]" + ) + assert result == expected + + +def test_repr_large(): + arr = period_array(["2000", "2001"] * 500, freq="D") + result = str(arr) + expected = ( + "\n" + "['2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " + "'2000-01-01',\n" + " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " + "'2001-01-01',\n" + " ...\n" + " '2000-01-01', '2001-01-01', '2000-01-01', '2001-01-01', " + "'2000-01-01',\n" + " '2001-01-01', '2000-01-01', '2001-01-01', '2000-01-01', " + "'2001-01-01']\n" + "Length: 1000, dtype: period[D]" + ) + assert result == expected + + +# ---------------------------------------------------------------------------- +# Reductions + + +class TestReductions: + def test_min_max(self): + arr = period_array( + [ + "2000-01-03", + "2000-01-03", + "NaT", + "2000-01-02", + "2000-01-05", + "2000-01-04", + ], + freq="D", + ) + + result = arr.min() + expected = pd.Period("2000-01-02", freq="D") + assert result == expected + + result = arr.max() + expected = pd.Period("2000-01-05", freq="D") + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_empty(self, skipna): + arr = period_array([], freq="D") + result = arr.min(skipna=skipna) + assert result is pd.NaT + + result = arr.max(skipna=skipna) + assert result is pd.NaT + + +# ---------------------------------------------------------------------------- +# Arrow interaction + +pyarrow_skip = pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.15.1.dev") + + +@pyarrow_skip +def test_arrow_extension_type(): + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + p1 = ArrowPeriodType("D") + p2 = ArrowPeriodType("D") + p3 = ArrowPeriodType("M") + + assert p1.freq == "D" + assert p1 == p2 + assert not p1 == p3 + assert hash(p1) == hash(p2) + assert not hash(p1) == hash(p3) + + +@pyarrow_skip +@pytest.mark.parametrize( + "data, freq", + [ + (pd.date_range("2017", periods=3), "D"), + (pd.date_range("2017", periods=3, freq="A"), "A-DEC"), + ], +) +def test_arrow_array(data, freq): + import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + periods = period_array(data, freq=freq) + result = pa.array(periods) + assert isinstance(result.type, ArrowPeriodType) + assert result.type.freq == freq + expected = pa.array(periods.asi8, type="int64") + assert result.storage.equals(expected) + + # convert to its storage type + result = pa.array(periods, type=pa.int64()) + assert result.equals(expected) + + # unsupported conversions + with pytest.raises(TypeError): + pa.array(periods, type="float64") + + with pytest.raises(TypeError, match="different 'freq'"): + pa.array(periods, type=ArrowPeriodType("T")) + + +@pyarrow_skip +def test_arrow_array_missing(): + import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + arr = PeriodArray([1, 2, 3], freq="D") + arr[1] = pd.NaT + + result = pa.array(arr) + assert isinstance(result.type, ArrowPeriodType) + assert result.type.freq == "D" + expected = pa.array([1, None, 3], type="int64") + assert result.storage.equals(expected) + + +@pyarrow_skip +def test_arrow_table_roundtrip(): + import pyarrow as pa + from pandas.core.arrays._arrow_utils import ArrowPeriodType + + arr = PeriodArray([1, 2, 3], freq="D") + arr[1] = pd.NaT + df = pd.DataFrame({"a": arr}) + + table = pa.table(df) + assert isinstance(table.field("a").type, ArrowPeriodType) + result = table.to_pandas() + assert isinstance(result["a"].dtype, PeriodDtype) + tm.assert_frame_equal(result, df) + + table2 = pa.concat_tables([table, table]) + result = table2.to_pandas() + expected = pd.concat([df, df], ignore_index=True) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py new file mode 100644 index 00000000..62cb4766 --- /dev/null +++ b/pandas/tests/arrays/test_timedeltas.py @@ -0,0 +1,293 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import TimedeltaArray + + +class TestTimedeltaArrayConstructor: + def test_only_1dim_accepted(self): + # GH#25282 + arr = np.array([0, 1, 2, 3], dtype="m8[h]").astype("m8[ns]") + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 3-dim, we allow 2D to sneak in for ops purposes GH#29853 + TimedeltaArray(arr.reshape(2, 2, 1)) + + with pytest.raises(ValueError, match="Only 1-dimensional"): + # 0-dim + TimedeltaArray(arr[[0]].squeeze()) + + def test_freq_validation(self): + # ensure that the public constructor cannot create an invalid instance + arr = np.array([0, 0, 1], dtype=np.int64) * 3600 * 10 ** 9 + + msg = ( + "Inferred frequency None from passed values does not " + "conform to passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + TimedeltaArray(arr.view("timedelta64[ns]"), freq="D") + + def test_non_array_raises(self): + with pytest.raises(ValueError, match="list"): + TimedeltaArray([1, 2, 3]) + + def test_other_type_raises(self): + with pytest.raises(ValueError, match="dtype bool cannot be converted"): + TimedeltaArray(np.array([1, 2, 3], dtype="bool")) + + def test_incorrect_dtype_raises(self): + # TODO: why TypeError for 'category' but ValueError for i8? + with pytest.raises( + ValueError, match=r"category cannot be converted to timedelta64\[ns\]" + ): + TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype="category") + + with pytest.raises( + ValueError, match=r"dtype int64 cannot be converted to timedelta64\[ns\]", + ): + TimedeltaArray(np.array([1, 2, 3], dtype="i8"), dtype=np.dtype("int64")) + + def test_copy(self): + data = np.array([1, 2, 3], dtype="m8[ns]") + arr = TimedeltaArray(data, copy=False) + assert arr._data is data + + arr = TimedeltaArray(data, copy=True) + assert arr._data is not data + assert arr._data.base is not data + + +class TestTimedeltaArray: + def test_np_sum(self): + # GH#25282 + vals = np.arange(5, dtype=np.int64).view("m8[h]").astype("m8[ns]") + arr = TimedeltaArray(vals) + result = np.sum(arr) + assert result == vals.sum() + + result = np.sum(pd.TimedeltaIndex(arr)) + assert result == vals.sum() + + def test_from_sequence_dtype(self): + msg = "dtype .*object.* cannot be converted to timedelta64" + with pytest.raises(ValueError, match=msg): + TimedeltaArray._from_sequence([], dtype=object) + + def test_abs(self): + vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]") + arr = TimedeltaArray(vals) + + evals = np.array([3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]") + expected = TimedeltaArray(evals) + + result = abs(arr) + tm.assert_timedelta_array_equal(result, expected) + + def test_neg(self): + vals = np.array([-3600 * 10 ** 9, "NaT", 7200 * 10 ** 9], dtype="m8[ns]") + arr = TimedeltaArray(vals) + + evals = np.array([3600 * 10 ** 9, "NaT", -7200 * 10 ** 9], dtype="m8[ns]") + expected = TimedeltaArray(evals) + + result = -arr + tm.assert_timedelta_array_equal(result, expected) + + def test_neg_freq(self): + tdi = pd.timedelta_range("2 Days", periods=4, freq="H") + arr = TimedeltaArray(tdi, freq=tdi.freq) + + expected = TimedeltaArray(-tdi._data, freq=-tdi.freq) + + result = -arr + tm.assert_timedelta_array_equal(result, expected) + + @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) + def test_astype_int(self, dtype): + arr = TimedeltaArray._from_sequence([pd.Timedelta("1H"), pd.Timedelta("2H")]) + result = arr.astype(dtype) + + if np.dtype(dtype).kind == "u": + expected_dtype = np.dtype("uint64") + else: + expected_dtype = np.dtype("int64") + expected = arr.astype(expected_dtype) + + assert result.dtype == expected_dtype + tm.assert_numpy_array_equal(result, expected) + + def test_setitem_clears_freq(self): + a = TimedeltaArray(pd.timedelta_range("1H", periods=2, freq="H")) + a[0] = pd.Timedelta("1H") + assert a.freq is None + + @pytest.mark.parametrize( + "obj", + [ + pd.Timedelta(seconds=1), + pd.Timedelta(seconds=1).to_timedelta64(), + pd.Timedelta(seconds=1).to_pytimedelta(), + ], + ) + def test_setitem_objects(self, obj): + # make sure we accept timedelta64 and timedelta in addition to Timedelta + tdi = pd.timedelta_range("2 Days", periods=4, freq="H") + arr = TimedeltaArray(tdi, freq=tdi.freq) + + arr[0] = obj + assert arr[0] == pd.Timedelta(seconds=1) + + @pytest.mark.parametrize( + "other", + [ + 1, + np.int64(1), + 1.0, + np.datetime64("NaT"), + pd.Timestamp.now(), + "invalid", + np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9, + (np.arange(10) * 24 * 3600 * 10 ** 9).view("datetime64[ns]"), + pd.Timestamp.now().to_period("D"), + ], + ) + @pytest.mark.parametrize( + "index", + [ + True, + pytest.param( + False, + marks=pytest.mark.xfail( + reason="Raises ValueError instead of TypeError", raises=ValueError + ), + ), + ], + ) + def test_searchsorted_invalid_types(self, other, index): + data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9 + arr = TimedeltaArray(data, freq="D") + if index: + arr = pd.Index(arr) + + msg = "searchsorted requires compatible dtype or scalar" + with pytest.raises(TypeError, match=msg): + arr.searchsorted(other) + + +class TestReductions: + @pytest.mark.parametrize("name", ["sum", "std", "min", "max", "median"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_reductions_empty(self, name, skipna): + tdi = pd.TimedeltaIndex([]) + arr = tdi.array + + result = getattr(tdi, name)(skipna=skipna) + assert result is pd.NaT + + result = getattr(arr, name)(skipna=skipna) + assert result is pd.NaT + + def test_min_max(self): + arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"]) + + result = arr.min() + expected = pd.Timedelta("2H") + assert result == expected + + result = arr.max() + expected = pd.Timedelta("5H") + assert result == expected + + result = arr.min(skipna=False) + assert result is pd.NaT + + result = arr.max(skipna=False) + assert result is pd.NaT + + def test_sum(self): + tdi = pd.TimedeltaIndex(["3H", "3H", "NaT", "2H", "5H", "4H"]) + arr = tdi.array + + result = arr.sum(skipna=True) + expected = pd.Timedelta(hours=17) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = tdi.sum(skipna=True) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = arr.sum(skipna=False) + assert result is pd.NaT + + result = tdi.sum(skipna=False) + assert result is pd.NaT + + result = arr.sum(min_count=9) + assert result is pd.NaT + + result = tdi.sum(min_count=9) + assert result is pd.NaT + + result = arr.sum(min_count=1) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = tdi.sum(min_count=1) + assert isinstance(result, pd.Timedelta) + assert result == expected + + def test_npsum(self): + # GH#25335 np.sum should return a Timedelta, not timedelta64 + tdi = pd.TimedeltaIndex(["3H", "3H", "2H", "5H", "4H"]) + arr = tdi.array + + result = np.sum(tdi) + expected = pd.Timedelta(hours=17) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = np.sum(arr) + assert isinstance(result, pd.Timedelta) + assert result == expected + + def test_std(self): + tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) + arr = tdi.array + + result = arr.std(skipna=True) + expected = pd.Timedelta(hours=2) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = tdi.std(skipna=True) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = arr.std(skipna=False) + assert result is pd.NaT + + result = tdi.std(skipna=False) + assert result is pd.NaT + + def test_median(self): + tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"]) + arr = tdi.array + + result = arr.median(skipna=True) + expected = pd.Timedelta(hours=2) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = tdi.median(skipna=True) + assert isinstance(result, pd.Timedelta) + assert result == expected + + result = arr.std(skipna=False) + assert result is pd.NaT + + result = tdi.std(skipna=False) + assert result is pd.NaT diff --git a/pandas/tests/base/__init__.py b/pandas/tests/base/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/base/test_constructors.py b/pandas/tests/base/test_constructors.py new file mode 100644 index 00000000..0b727439 --- /dev/null +++ b/pandas/tests/base/test_constructors.py @@ -0,0 +1,142 @@ +from datetime import datetime +import sys + +import numpy as np +import pytest + +from pandas.compat import PYPY + +import pandas as pd +from pandas import DataFrame, Index, Series +import pandas._testing as tm +from pandas.core.accessor import PandasDelegate +from pandas.core.base import NoNewAttributesMixin, PandasObject + + +class TestPandasDelegate: + class Delegator: + _properties = ["foo"] + _methods = ["bar"] + + def _set_foo(self, value): + self.foo = value + + def _get_foo(self): + return self.foo + + foo = property(_get_foo, _set_foo, doc="foo property") + + def bar(self, *args, **kwargs): + """ a test bar method """ + pass + + class Delegate(PandasDelegate, PandasObject): + def __init__(self, obj): + self.obj = obj + + def setup_method(self, method): + pass + + def test_invalid_delegation(self): + # these show that in order for the delegation to work + # the _delegate_* methods need to be overridden to not raise + # a TypeError + + self.Delegate._add_delegate_accessors( + delegate=self.Delegator, + accessors=self.Delegator._properties, + typ="property", + ) + self.Delegate._add_delegate_accessors( + delegate=self.Delegator, accessors=self.Delegator._methods, typ="method" + ) + + delegate = self.Delegate(self.Delegator()) + + with pytest.raises(TypeError): + delegate.foo + + with pytest.raises(TypeError): + delegate.foo = 5 + + with pytest.raises(TypeError): + delegate.foo() + + @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") + def test_memory_usage(self): + # Delegate does not implement memory_usage. + # Check that we fall back to in-built `__sizeof__` + # GH 12924 + delegate = self.Delegate(self.Delegator()) + sys.getsizeof(delegate) + + +class TestNoNewAttributesMixin: + def test_mixin(self): + class T(NoNewAttributesMixin): + pass + + t = T() + assert not hasattr(t, "__frozen") + + t.a = "test" + assert t.a == "test" + + t._freeze() + assert "__frozen" in dir(t) + assert getattr(t, "__frozen") + + with pytest.raises(AttributeError): + t.b = "test" + + assert not hasattr(t, "b") + + +class TestConstruction: + # test certain constructor behaviours on dtype inference across Series, + # Index and DataFrame + + @pytest.mark.parametrize( + "klass", + [ + Series, + lambda x, **kwargs: DataFrame({"a": x}, **kwargs)["a"], + pytest.param( + lambda x, **kwargs: DataFrame(x, **kwargs)[0], marks=pytest.mark.xfail + ), + Index, + ], + ) + @pytest.mark.parametrize( + "a", + [ + np.array(["2263-01-01"], dtype="datetime64[D]"), + np.array([datetime(2263, 1, 1)], dtype=object), + np.array([np.datetime64("2263-01-01", "D")], dtype=object), + np.array(["2263-01-01"], dtype=object), + ], + ids=[ + "datetime64[D]", + "object-datetime.datetime", + "object-numpy-scalar", + "object-string", + ], + ) + def test_constructor_datetime_outofbound(self, a, klass): + # GH-26853 (+ bug GH-26206 out of bound non-ns unit) + + # No dtype specified (dtype inference) + # datetime64[non-ns] raise error, other cases result in object dtype + # and preserve original data + if a.dtype.kind == "M": + with pytest.raises(pd.errors.OutOfBoundsDatetime): + klass(a) + else: + result = klass(a) + assert result.dtype == "object" + tm.assert_numpy_array_equal(result.to_numpy(), a) + + # Explicit dtype specified + # Forced conversion fails for all -> all cases raise error + with pytest.raises(pd.errors.OutOfBoundsDatetime): + klass(a, dtype="datetime64[ns]") diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py new file mode 100644 index 00000000..07a15d06 --- /dev/null +++ b/pandas/tests/base/test_conversion.py @@ -0,0 +1,439 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_datetime64_dtype, is_timedelta64_dtype +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas import CategoricalIndex, Series, Timedelta, Timestamp +import pandas._testing as tm +from pandas.core.arrays import ( + DatetimeArray, + IntervalArray, + PandasArray, + PeriodArray, + SparseArray, + TimedeltaArray, +) + + +class TestToIterable: + # test that we convert an iterable to python types + + dtypes = [ + ("int8", int), + ("int16", int), + ("int32", int), + ("int64", int), + ("uint8", int), + ("uint16", int), + ("uint32", int), + ("uint64", int), + ("float16", float), + ("float32", float), + ("float64", float), + ("datetime64[ns]", Timestamp), + ("datetime64[ns, US/Eastern]", Timestamp), + ("timedelta64[ns]", Timedelta), + ] + + @pytest.mark.parametrize("dtype, rdtype", dtypes) + @pytest.mark.parametrize( + "method", + [ + lambda x: x.tolist(), + lambda x: x.to_list(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], + ids=["tolist", "to_list", "list", "iter"], + ) + @pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning") + # TODO(GH-24559): Remove the filterwarnings + def test_iterable(self, index_or_series, method, dtype, rdtype): + # gh-10904 + # gh-13258 + # coerce iteration to underlying python / pandas types + typ = index_or_series + s = typ([1], dtype=dtype) + result = method(s)[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + "dtype, rdtype, obj", + [ + ("object", object, "a"), + ("object", int, 1), + ("category", object, "a"), + ("category", int, 1), + ], + ) + @pytest.mark.parametrize( + "method", + [ + lambda x: x.tolist(), + lambda x: x.to_list(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], + ids=["tolist", "to_list", "list", "iter"], + ) + def test_iterable_object_and_category( + self, index_or_series, method, dtype, rdtype, obj + ): + # gh-10904 + # gh-13258 + # coerce iteration to underlying python / pandas types + typ = index_or_series + s = typ([obj], dtype=dtype) + result = method(s)[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize("dtype, rdtype", dtypes) + def test_iterable_items(self, dtype, rdtype): + # gh-13258 + # test if items yields the correct boxed scalars + # this only applies to series + s = Series([1], dtype=dtype) + _, result = list(s.items())[0] + assert isinstance(result, rdtype) + + _, result = list(s.items())[0] + assert isinstance(result, rdtype) + + @pytest.mark.parametrize( + "dtype, rdtype", dtypes + [("object", int), ("category", int)] + ) + @pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning") + # TODO(GH-24559): Remove the filterwarnings + def test_iterable_map(self, index_or_series, dtype, rdtype): + # gh-13236 + # coerce iteration to underlying python / pandas types + typ = index_or_series + s = typ([1], dtype=dtype) + result = s.map(type)[0] + if not isinstance(rdtype, tuple): + rdtype = tuple([rdtype]) + assert result in rdtype + + @pytest.mark.parametrize( + "method", + [ + lambda x: x.tolist(), + lambda x: x.to_list(), + lambda x: list(x), + lambda x: list(x.__iter__()), + ], + ids=["tolist", "to_list", "list", "iter"], + ) + def test_categorial_datetimelike(self, method): + i = CategoricalIndex([Timestamp("1999-12-31"), Timestamp("2000-12-31")]) + + result = method(i)[0] + assert isinstance(result, Timestamp) + + def test_iter_box(self): + vals = [Timestamp("2011-01-01"), Timestamp("2011-01-02")] + s = Series(vals) + assert s.dtype == "datetime64[ns]" + for res, exp in zip(s, vals): + assert isinstance(res, Timestamp) + assert res.tz is None + assert res == exp + + vals = [ + Timestamp("2011-01-01", tz="US/Eastern"), + Timestamp("2011-01-02", tz="US/Eastern"), + ] + s = Series(vals) + + assert s.dtype == "datetime64[ns, US/Eastern]" + for res, exp in zip(s, vals): + assert isinstance(res, Timestamp) + assert res.tz == exp.tz + assert res == exp + + # timedelta + vals = [Timedelta("1 days"), Timedelta("2 days")] + s = Series(vals) + assert s.dtype == "timedelta64[ns]" + for res, exp in zip(s, vals): + assert isinstance(res, Timedelta) + assert res == exp + + # period + vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] + s = Series(vals) + assert s.dtype == "Period[M]" + for res, exp in zip(s, vals): + assert isinstance(res, pd.Period) + assert res.freq == "M" + assert res == exp + + +@pytest.mark.parametrize( + "array, expected_type, dtype", + [ + (np.array([0, 1], dtype=np.int64), np.ndarray, "int64"), + (np.array(["a", "b"]), np.ndarray, "object"), + (pd.Categorical(["a", "b"]), pd.Categorical, "category"), + ( + pd.DatetimeIndex(["2017", "2018"], tz="US/Central"), + DatetimeArray, + "datetime64[ns, US/Central]", + ), + ( + pd.PeriodIndex([2018, 2019], freq="A"), + PeriodArray, + pd.core.dtypes.dtypes.PeriodDtype("A-DEC"), + ), + (pd.IntervalIndex.from_breaks([0, 1, 2]), IntervalArray, "interval",), + # This test is currently failing for datetime64[ns] and timedelta64[ns]. + # The NumPy type system is sufficient for representing these types, so + # we just use NumPy for Series / DataFrame columns of these types (so + # we get consolidation and so on). + # However, DatetimeIndex and TimedeltaIndex use the DateLikeArray + # abstraction to for code reuse. + # At the moment, we've judged that allowing this test to fail is more + # practical that overriding Series._values to special case + # Series[M8[ns]] and Series[m8[ns]] to return a DateLikeArray. + pytest.param( + pd.DatetimeIndex(["2017", "2018"]), + np.ndarray, + "datetime64[ns]", + marks=[pytest.mark.xfail(reason="datetime _values", strict=True)], + ), + pytest.param( + pd.TimedeltaIndex([10 ** 10]), + np.ndarray, + "m8[ns]", + marks=[pytest.mark.xfail(reason="timedelta _values", strict=True)], + ), + ], +) +def test_values_consistent(array, expected_type, dtype): + l_values = pd.Series(array)._values + r_values = pd.Index(array)._values + assert type(l_values) is expected_type + assert type(l_values) is type(r_values) + + tm.assert_equal(l_values, r_values) + + +@pytest.mark.parametrize( + "array, expected", + [ + (np.array([0, 1], dtype=np.int64), np.array([0, 1], dtype=np.int64)), + (np.array(["0", "1"]), np.array(["0", "1"], dtype=object)), + (pd.Categorical(["a", "a"]), np.array([0, 0], dtype="int8")), + ( + pd.DatetimeIndex(["2017-01-01T00:00:00"]), + np.array(["2017-01-01T00:00:00"], dtype="M8[ns]"), + ), + ( + pd.DatetimeIndex(["2017-01-01T00:00:00"], tz="US/Eastern"), + np.array(["2017-01-01T05:00:00"], dtype="M8[ns]"), + ), + (pd.TimedeltaIndex([10 ** 10]), np.array([10 ** 10], dtype="m8[ns]")), + ( + pd.PeriodIndex(["2017", "2018"], freq="D"), + np.array([17167, 17532], dtype=np.int64), + ), + ], +) +def test_ndarray_values(array, expected): + l_values = pd.Series(array)._ndarray_values + r_values = pd.Index(array)._ndarray_values + tm.assert_numpy_array_equal(l_values, r_values) + tm.assert_numpy_array_equal(l_values, expected) + + +@pytest.mark.parametrize("arr", [np.array([1, 2, 3])]) +def test_numpy_array(arr): + ser = pd.Series(arr) + result = ser.array + expected = PandasArray(arr) + tm.assert_extension_array_equal(result, expected) + + +def test_numpy_array_all_dtypes(any_numpy_dtype): + ser = pd.Series(dtype=any_numpy_dtype) + result = ser.array + if is_datetime64_dtype(any_numpy_dtype): + assert isinstance(result, DatetimeArray) + elif is_timedelta64_dtype(any_numpy_dtype): + assert isinstance(result, TimedeltaArray) + else: + assert isinstance(result, PandasArray) + + +@pytest.mark.parametrize( + "array, attr", + [ + (pd.Categorical(["a", "b"]), "_codes"), + (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_data"), + (pd.core.arrays.integer_array([0, np.nan]), "_data"), + (IntervalArray.from_breaks([0, 1]), "_left"), + (SparseArray([0, 1]), "_sparse_values"), + (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_data"), + # tz-aware Datetime + ( + DatetimeArray( + np.array( + ["2000-01-01T12:00:00", "2000-01-02T12:00:00"], dtype="M8[ns]" + ), + dtype=DatetimeTZDtype(tz="US/Central"), + ), + "_data", + ), + ], +) +def test_array(array, attr, index_or_series): + box = index_or_series + if array.dtype.name in ("Int64", "Sparse[int64, 0]") and box is pd.Index: + pytest.skip(f"No index type for {array.dtype}") + result = box(array, copy=False).array + + if attr: + array = getattr(array, attr) + result = getattr(result, attr) + + assert result is array + + +def test_array_multiindex_raises(): + idx = pd.MultiIndex.from_product([["A"], ["a", "b"]]) + with pytest.raises(ValueError, match="MultiIndex"): + idx.array + + +@pytest.mark.parametrize( + "array, expected", + [ + (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)), + (pd.Categorical(["a", "b"]), np.array(["a", "b"], dtype=object)), + ( + pd.core.arrays.period_array(["2000", "2001"], freq="D"), + np.array([pd.Period("2000", freq="D"), pd.Period("2001", freq="D")]), + ), + ( + pd.core.arrays.integer_array([0, np.nan]), + np.array([0, pd.NA], dtype=object), + ), + ( + IntervalArray.from_breaks([0, 1, 2]), + np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object), + ), + (SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)), + # tz-naive datetime + ( + DatetimeArray(np.array(["2000", "2001"], dtype="M8[ns]")), + np.array(["2000", "2001"], dtype="M8[ns]"), + ), + # tz-aware stays tz`-aware + ( + DatetimeArray( + np.array( + ["2000-01-01T06:00:00", "2000-01-02T06:00:00"], dtype="M8[ns]" + ), + dtype=DatetimeTZDtype(tz="US/Central"), + ), + np.array( + [ + pd.Timestamp("2000-01-01", tz="US/Central"), + pd.Timestamp("2000-01-02", tz="US/Central"), + ] + ), + ), + # Timedelta + ( + TimedeltaArray(np.array([0, 3600000000000], dtype="i8"), freq="H"), + np.array([0, 3600000000000], dtype="m8[ns]"), + ), + ], +) +def test_to_numpy(array, expected, index_or_series): + box = index_or_series + thing = box(array) + + if array.dtype.name in ("Int64", "Sparse[int64, 0]") and box is pd.Index: + pytest.skip(f"No index type for {array.dtype}") + + result = thing.to_numpy() + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("as_series", [True, False]) +@pytest.mark.parametrize( + "arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)] +) +def test_to_numpy_copy(arr, as_series): + obj = pd.Index(arr, copy=False) + if as_series: + obj = pd.Series(obj.values, copy=False) + + # no copy by default + result = obj.to_numpy() + assert np.shares_memory(arr, result) is True + + result = obj.to_numpy(copy=False) + assert np.shares_memory(arr, result) is True + + # copy=True + result = obj.to_numpy(copy=True) + assert np.shares_memory(arr, result) is False + + +@pytest.mark.parametrize("as_series", [True, False]) +def test_to_numpy_dtype(as_series): + tz = "US/Eastern" + obj = pd.DatetimeIndex(["2000", "2001"], tz=tz) + if as_series: + obj = pd.Series(obj) + + # preserve tz by default + result = obj.to_numpy() + expected = np.array( + [pd.Timestamp("2000", tz=tz), pd.Timestamp("2001", tz=tz)], dtype=object + ) + tm.assert_numpy_array_equal(result, expected) + + result = obj.to_numpy(dtype="object") + tm.assert_numpy_array_equal(result, expected) + + result = obj.to_numpy(dtype="M8[ns]") + expected = np.array(["2000-01-01T05", "2001-01-01T05"], dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values, dtype, na_value, expected", + [ + ([1, 2, None], "float64", 0, [1.0, 2.0, 0.0]), + ( + [pd.Timestamp("2000"), pd.Timestamp("2000"), pd.NaT], + None, + pd.Timestamp("2000"), + [np.datetime64("2000-01-01T00:00:00.000000000")] * 3, + ), + ], +) +@pytest.mark.parametrize("container", [pd.Series, pd.Index]) # type: ignore +def test_to_numpy_na_value_numpy_dtype(container, values, dtype, na_value, expected): + s = container(values) + result = s.to_numpy(dtype=dtype, na_value=na_value) + expected = np.array(expected) + tm.assert_numpy_array_equal(result, expected) + + +def test_to_numpy_kwargs_raises(): + # numpy + s = pd.Series([1, 2, 3]) + match = r"to_numpy\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=match): + s.to_numpy(foo=True) + + # extension + s = pd.Series([1, 2, 3], dtype="Int64") + with pytest.raises(TypeError, match=match): + s.to_numpy(foo=True) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py new file mode 100644 index 00000000..2693eb12 --- /dev/null +++ b/pandas/tests/base/test_ops.py @@ -0,0 +1,899 @@ +from datetime import datetime, timedelta +from io import StringIO +import sys + +import numpy as np +import pytest + +from pandas._libs.tslib import iNaT +from pandas.compat import PYPY +from pandas.compat.numpy import np_array_datetime64_compat + +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_datetime64tz_dtype, + is_object_dtype, + needs_i8_conversion, +) + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Interval, + IntervalIndex, + PeriodIndex, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, +) +import pandas._testing as tm +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin + + +class Ops: + def _allow_na_ops(self, obj): + """Whether to skip test cases including NaN""" + if (isinstance(obj, Index) and obj.is_boolean()) or not obj._can_hold_na: + # don't test boolean / integer dtypes + return False + return True + + def setup_method(self, method): + self.bool_index = tm.makeBoolIndex(10, name="a") + self.int_index = tm.makeIntIndex(10, name="a") + self.float_index = tm.makeFloatIndex(10, name="a") + self.dt_index = tm.makeDateIndex(10, name="a") + self.dt_tz_index = tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern") + self.period_index = tm.makePeriodIndex(10, name="a") + self.string_index = tm.makeStringIndex(10, name="a") + self.unicode_index = tm.makeUnicodeIndex(10, name="a") + + arr = np.random.randn(10) + self.bool_series = Series(arr, index=self.bool_index, name="a") + self.int_series = Series(arr, index=self.int_index, name="a") + self.float_series = Series(arr, index=self.float_index, name="a") + self.dt_series = Series(arr, index=self.dt_index, name="a") + self.dt_tz_series = self.dt_tz_index.to_series() + self.period_series = Series(arr, index=self.period_index, name="a") + self.string_series = Series(arr, index=self.string_index, name="a") + self.unicode_series = Series(arr, index=self.unicode_index, name="a") + + types = ["bool", "int", "float", "dt", "dt_tz", "period", "string", "unicode"] + self.indexes = [getattr(self, f"{t}_index") for t in types] + self.series = [getattr(self, f"{t}_series") for t in types] + + # To test narrow dtypes, we use narrower *data* elements, not *index* elements + index = self.int_index + self.float32_series = Series(arr.astype(np.float32), index=index, name="a") + + arr_int = np.random.choice(10, size=10, replace=False) + self.int8_series = Series(arr_int.astype(np.int8), index=index, name="a") + self.int16_series = Series(arr_int.astype(np.int16), index=index, name="a") + self.int32_series = Series(arr_int.astype(np.int32), index=index, name="a") + + self.uint8_series = Series(arr_int.astype(np.uint8), index=index, name="a") + self.uint16_series = Series(arr_int.astype(np.uint16), index=index, name="a") + self.uint32_series = Series(arr_int.astype(np.uint32), index=index, name="a") + + nrw_types = ["float32", "int8", "int16", "int32", "uint8", "uint16", "uint32"] + self.narrow_series = [getattr(self, f"{t}_series") for t in nrw_types] + + self.objs = self.indexes + self.series + self.narrow_series + + def check_ops_properties(self, props, filter=None, ignore_failures=False): + for op in props: + for o in self.is_valid_objs: + + # if a filter, skip if it doesn't match + if filter is not None: + filt = o.index if isinstance(o, Series) else o + if not filter(filt): + continue + + try: + if isinstance(o, Series): + expected = Series(getattr(o.index, op), index=o.index, name="a") + else: + expected = getattr(o, op) + except (AttributeError): + if ignore_failures: + continue + + result = getattr(o, op) + + # these could be series, arrays or scalars + if isinstance(result, Series) and isinstance(expected, Series): + tm.assert_series_equal(result, expected) + elif isinstance(result, Index) and isinstance(expected, Index): + tm.assert_index_equal(result, expected) + elif isinstance(result, np.ndarray) and isinstance( + expected, np.ndarray + ): + tm.assert_numpy_array_equal(result, expected) + else: + assert result == expected + + # freq raises AttributeError on an Int64Index because its not + # defined we mostly care about Series here anyhow + if not ignore_failures: + for o in self.not_valid_objs: + + # an object that is datetimelike will raise a TypeError, + # otherwise an AttributeError + err = AttributeError + if issubclass(type(o), DatetimeIndexOpsMixin): + err = TypeError + + with pytest.raises(err): + getattr(o, op) + + @pytest.mark.parametrize("klass", [Series, DataFrame]) + def test_binary_ops_docs(self, klass): + op_map = { + "add": "+", + "sub": "-", + "mul": "*", + "mod": "%", + "pow": "**", + "truediv": "/", + "floordiv": "//", + } + for op_name in op_map: + operand1 = klass.__name__.lower() + operand2 = "other" + op = op_map[op_name] + expected_str = " ".join([operand1, op, operand2]) + assert expected_str in getattr(klass, op_name).__doc__ + + # reverse version of the binary ops + expected_str = " ".join([operand2, op, operand1]) + assert expected_str in getattr(klass, "r" + op_name).__doc__ + + +class TestTranspose(Ops): + errmsg = "the 'axes' parameter is not supported" + + def test_transpose(self): + for obj in self.objs: + tm.assert_equal(obj.transpose(), obj) + + def test_transpose_non_default_axes(self): + for obj in self.objs: + with pytest.raises(ValueError, match=self.errmsg): + obj.transpose(1) + with pytest.raises(ValueError, match=self.errmsg): + obj.transpose(axes=1) + + def test_numpy_transpose(self): + for obj in self.objs: + tm.assert_equal(np.transpose(obj), obj) + + with pytest.raises(ValueError, match=self.errmsg): + np.transpose(obj, axes=1) + + +class TestIndexOps(Ops): + def setup_method(self, method): + super().setup_method(method) + self.is_valid_objs = self.objs + self.not_valid_objs = [] + + def test_none_comparison(self): + + # bug brought up by #1079 + # changed from TypeError in 0.17.0 + for o in self.is_valid_objs: + if isinstance(o, Series): + + o[0] = np.nan + + # noinspection PyComparisonWithNone + result = o == None # noqa + assert not result.iat[0] + assert not result.iat[1] + + # noinspection PyComparisonWithNone + result = o != None # noqa + assert result.iat[0] + assert result.iat[1] + + result = None == o # noqa + assert not result.iat[0] + assert not result.iat[1] + + result = None != o # noqa + assert result.iat[0] + assert result.iat[1] + + if is_datetime64_dtype(o) or is_datetime64tz_dtype(o): + # Following DatetimeIndex (and Timestamp) convention, + # inequality comparisons with Series[datetime64] raise + with pytest.raises(TypeError): + None > o + with pytest.raises(TypeError): + o > None + else: + result = None > o + assert not result.iat[0] + assert not result.iat[1] + + result = o < None + assert not result.iat[0] + assert not result.iat[1] + + def test_ndarray_compat_properties(self): + + for o in self.objs: + # Check that we work. + for p in ["shape", "dtype", "T", "nbytes"]: + assert getattr(o, p, None) is not None + + # deprecated properties + for p in ["flags", "strides", "itemsize", "base", "data"]: + assert not hasattr(o, p) + + with pytest.raises(ValueError): + o.item() # len > 1 + + assert o.ndim == 1 + assert o.size == len(o) + + assert Index([1]).item() == 1 + assert Series([1]).item() == 1 + + def test_value_counts_unique_nunique(self): + for orig in self.objs: + o = orig.copy() + klass = type(o) + values = o._values + + if isinstance(values, Index): + # reset name not to affect latter process + values.name = None + + # create repeated values, 'n'th element is repeated by n+1 times + # skip boolean, because it only has 2 values at most + if isinstance(o, Index) and o.is_boolean(): + continue + elif isinstance(o, Index): + expected_index = Index(o[::-1]) + expected_index.name = None + o = o.repeat(range(1, len(o) + 1)) + o.name = "a" + else: + expected_index = Index(values[::-1]) + idx = o.index.repeat(range(1, len(o) + 1)) + # take-based repeat + indices = np.repeat(np.arange(len(o)), range(1, len(o) + 1)) + rep = values.take(indices) + o = klass(rep, index=idx, name="a") + + # check values has the same dtype as the original + assert o.dtype == orig.dtype + + expected_s = Series( + range(10, 0, -1), index=expected_index, dtype="int64", name="a" + ) + + result = o.value_counts() + tm.assert_series_equal(result, expected_s) + assert result.index.name is None + assert result.name == "a" + + result = o.unique() + if isinstance(o, Index): + assert isinstance(result, type(o)) + tm.assert_index_equal(result, orig) + assert result.dtype == orig.dtype + elif is_datetime64tz_dtype(o): + # datetimetz Series returns array of Timestamp + assert result[0] == orig[0] + for r in result: + assert isinstance(r, Timestamp) + + tm.assert_numpy_array_equal( + result.astype(object), orig._values.astype(object) + ) + else: + tm.assert_numpy_array_equal(result, orig.values) + assert result.dtype == orig.dtype + + assert o.nunique() == len(np.unique(o.values)) + + @pytest.mark.parametrize("null_obj", [np.nan, None]) + def test_value_counts_unique_nunique_null(self, null_obj): + + for orig in self.objs: + o = orig.copy() + klass = type(o) + values = o._ndarray_values + + if not self._allow_na_ops(o): + continue + + # special assign to the numpy array + if is_datetime64tz_dtype(o): + if isinstance(o, DatetimeIndex): + v = o.asi8 + v[0:2] = iNaT + values = o._shallow_copy(v) + else: + o = o.copy() + o[0:2] = pd.NaT + values = o._values + + elif needs_i8_conversion(o): + values[0:2] = iNaT + values = o._shallow_copy(values) + else: + values[0:2] = null_obj + # check values has the same dtype as the original + + assert values.dtype == o.dtype + + # create repeated values, 'n'th element is repeated by n+1 + # times + if isinstance(o, (DatetimeIndex, PeriodIndex)): + expected_index = o.copy() + expected_index.name = None + + # attach name to klass + o = klass(values.repeat(range(1, len(o) + 1))) + o.name = "a" + else: + if isinstance(o, DatetimeIndex): + expected_index = orig._values._shallow_copy(values) + else: + expected_index = Index(values) + expected_index.name = None + o = o.repeat(range(1, len(o) + 1)) + o.name = "a" + + # check values has the same dtype as the original + assert o.dtype == orig.dtype + # check values correctly have NaN + nanloc = np.zeros(len(o), dtype=np.bool) + nanloc[:3] = True + if isinstance(o, Index): + tm.assert_numpy_array_equal(pd.isna(o), nanloc) + else: + exp = Series(nanloc, o.index, name="a") + tm.assert_series_equal(pd.isna(o), exp) + + expected_s_na = Series( + list(range(10, 2, -1)) + [3], + index=expected_index[9:0:-1], + dtype="int64", + name="a", + ) + expected_s = Series( + list(range(10, 2, -1)), + index=expected_index[9:1:-1], + dtype="int64", + name="a", + ) + + result_s_na = o.value_counts(dropna=False) + tm.assert_series_equal(result_s_na, expected_s_na) + assert result_s_na.index.name is None + assert result_s_na.name == "a" + result_s = o.value_counts() + tm.assert_series_equal(o.value_counts(), expected_s) + assert result_s.index.name is None + assert result_s.name == "a" + + result = o.unique() + if isinstance(o, Index): + tm.assert_index_equal(result, Index(values[1:], name="a")) + elif is_datetime64tz_dtype(o): + # unable to compare NaT / nan + tm.assert_extension_array_equal(result[1:], values[2:]) + assert result[0] is pd.NaT + else: + tm.assert_numpy_array_equal(result[1:], values[2:]) + + assert pd.isna(result[0]) + assert result.dtype == orig.dtype + + assert o.nunique() == 8 + assert o.nunique(dropna=False) == 9 + + def test_value_counts_inferred(self, index_or_series): + klass = index_or_series + s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] + s = klass(s_values) + expected = Series([4, 3, 2, 1], index=["b", "a", "d", "c"]) + tm.assert_series_equal(s.value_counts(), expected) + + if isinstance(s, Index): + exp = Index(np.unique(np.array(s_values, dtype=np.object_))) + tm.assert_index_equal(s.unique(), exp) + else: + exp = np.unique(np.array(s_values, dtype=np.object_)) + tm.assert_numpy_array_equal(s.unique(), exp) + + assert s.nunique() == 4 + # don't sort, have to sort after the fact as not sorting is + # platform-dep + hist = s.value_counts(sort=False).sort_values() + expected = Series([3, 1, 4, 2], index=list("acbd")).sort_values() + tm.assert_series_equal(hist, expected) + + # sort ascending + hist = s.value_counts(ascending=True) + expected = Series([1, 2, 3, 4], index=list("cdab")) + tm.assert_series_equal(hist, expected) + + # relative histogram. + hist = s.value_counts(normalize=True) + expected = Series([0.4, 0.3, 0.2, 0.1], index=["b", "a", "d", "c"]) + tm.assert_series_equal(hist, expected) + + def test_value_counts_bins(self, index_or_series): + klass = index_or_series + s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] + s = klass(s_values) + + # bins + with pytest.raises(TypeError): + s.value_counts(bins=1) + + s1 = Series([1, 1, 2, 3]) + res1 = s1.value_counts(bins=1) + exp1 = Series({Interval(0.997, 3.0): 4}) + tm.assert_series_equal(res1, exp1) + res1n = s1.value_counts(bins=1, normalize=True) + exp1n = Series({Interval(0.997, 3.0): 1.0}) + tm.assert_series_equal(res1n, exp1n) + + if isinstance(s1, Index): + tm.assert_index_equal(s1.unique(), Index([1, 2, 3])) + else: + exp = np.array([1, 2, 3], dtype=np.int64) + tm.assert_numpy_array_equal(s1.unique(), exp) + + assert s1.nunique() == 3 + + # these return the same + res4 = s1.value_counts(bins=4, dropna=True) + intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) + exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) + tm.assert_series_equal(res4, exp4) + + res4 = s1.value_counts(bins=4, dropna=False) + intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) + exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) + tm.assert_series_equal(res4, exp4) + + res4n = s1.value_counts(bins=4, normalize=True) + exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 3, 1, 2])) + tm.assert_series_equal(res4n, exp4n) + + # handle NA's properly + s_values = ["a", "b", "b", "b", np.nan, np.nan, "d", "d", "a", "a", "b"] + s = klass(s_values) + expected = Series([4, 3, 2], index=["b", "a", "d"]) + tm.assert_series_equal(s.value_counts(), expected) + + if isinstance(s, Index): + exp = Index(["a", "b", np.nan, "d"]) + tm.assert_index_equal(s.unique(), exp) + else: + exp = np.array(["a", "b", np.nan, "d"], dtype=object) + tm.assert_numpy_array_equal(s.unique(), exp) + assert s.nunique() == 3 + + s = klass({}) if klass is dict else klass({}, dtype=object) + expected = Series([], dtype=np.int64) + tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) + # returned dtype differs depending on original + if isinstance(s, Index): + tm.assert_index_equal(s.unique(), Index([]), exact=False) + else: + tm.assert_numpy_array_equal(s.unique(), np.array([]), check_dtype=False) + + assert s.nunique() == 0 + + def test_value_counts_datetime64(self, index_or_series): + klass = index_or_series + + # GH 3002, datetime64[ns] + # don't test names though + txt = "\n".join( + [ + "xxyyzz20100101PIE", + "xxyyzz20100101GUM", + "xxyyzz20100101EGG", + "xxyyww20090101EGG", + "foofoo20080909PIE", + "foofoo20080909GUM", + ] + ) + f = StringIO(txt) + df = pd.read_fwf( + f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"] + ) + + s = klass(df["dt"].copy()) + s.name = None + idx = pd.to_datetime( + ["2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00"] + ) + expected_s = Series([3, 2, 1], index=idx) + tm.assert_series_equal(s.value_counts(), expected_s) + + expected = np_array_datetime64_compat( + ["2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00"], + dtype="datetime64[ns]", + ) + if isinstance(s, Index): + tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) + else: + tm.assert_numpy_array_equal(s.unique(), expected) + + assert s.nunique() == 3 + + # with NaT + s = df["dt"].copy() + s = klass(list(s.values) + [pd.NaT]) + + result = s.value_counts() + assert result.index.dtype == "datetime64[ns]" + tm.assert_series_equal(result, expected_s) + + result = s.value_counts(dropna=False) + expected_s[pd.NaT] = 1 + tm.assert_series_equal(result, expected_s) + + unique = s.unique() + assert unique.dtype == "datetime64[ns]" + + # numpy_array_equal cannot compare pd.NaT + if isinstance(s, Index): + exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT]) + tm.assert_index_equal(unique, exp_idx) + else: + tm.assert_numpy_array_equal(unique[:3], expected) + assert pd.isna(unique[3]) + + assert s.nunique() == 3 + assert s.nunique(dropna=False) == 4 + + # timedelta64[ns] + td = df.dt - df.dt + timedelta(1) + td = klass(td, name="dt") + + result = td.value_counts() + expected_s = Series([6], index=[Timedelta("1day")], name="dt") + tm.assert_series_equal(result, expected_s) + + expected = TimedeltaIndex(["1 days"], name="dt") + if isinstance(td, Index): + tm.assert_index_equal(td.unique(), expected) + else: + tm.assert_numpy_array_equal(td.unique(), expected.values) + + td2 = timedelta(1) + (df.dt - df.dt) + td2 = klass(td2, name="dt") + result2 = td2.value_counts() + tm.assert_series_equal(result2, expected_s) + + def test_factorize(self): + for orig in self.objs: + o = orig.copy() + + if isinstance(o, Index) and o.is_boolean(): + exp_arr = np.array([0, 1] + [0] * 8, dtype=np.intp) + exp_uniques = o + exp_uniques = Index([False, True]) + else: + exp_arr = np.array(range(len(o)), dtype=np.intp) + exp_uniques = o + codes, uniques = o.factorize() + + tm.assert_numpy_array_equal(codes, exp_arr) + if isinstance(o, Series): + tm.assert_index_equal(uniques, Index(orig), check_names=False) + else: + # factorize explicitly resets name + tm.assert_index_equal(uniques, exp_uniques, check_names=False) + + def test_factorize_repeated(self): + for orig in self.objs: + o = orig.copy() + + # don't test boolean + if isinstance(o, Index) and o.is_boolean(): + continue + + # sort by value, and create duplicates + if isinstance(o, Series): + o = o.sort_values() + n = o.iloc[5:].append(o) + else: + indexer = o.argsort() + o = o.take(indexer) + n = o[5:].append(o) + + exp_arr = np.array( + [5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp + ) + codes, uniques = n.factorize(sort=True) + + tm.assert_numpy_array_equal(codes, exp_arr) + if isinstance(o, Series): + tm.assert_index_equal( + uniques, Index(orig).sort_values(), check_names=False + ) + else: + tm.assert_index_equal(uniques, o, check_names=False) + + exp_arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4], np.intp) + codes, uniques = n.factorize(sort=False) + tm.assert_numpy_array_equal(codes, exp_arr) + + if isinstance(o, Series): + expected = Index(o.iloc[5:10].append(o.iloc[:5])) + tm.assert_index_equal(uniques, expected, check_names=False) + else: + expected = o[5:10].append(o[:5]) + tm.assert_index_equal(uniques, expected, check_names=False) + + def test_duplicated_drop_duplicates_index(self): + # GH 4060 + for original in self.objs: + if isinstance(original, Index): + + # special case + if original.is_boolean(): + result = original.drop_duplicates() + expected = Index([False, True], name="a") + tm.assert_index_equal(result, expected) + continue + + # original doesn't have duplicates + expected = np.array([False] * len(original), dtype=bool) + duplicated = original.duplicated() + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + result = original.drop_duplicates() + tm.assert_index_equal(result, original) + assert result is not original + + # has_duplicates + assert not original.has_duplicates + + # create repeated values, 3rd and 5th values are duplicated + idx = original[list(range(len(original))) + [5, 3]] + expected = np.array([False] * len(original) + [True, True], dtype=bool) + duplicated = idx.duplicated() + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + tm.assert_index_equal(idx.drop_duplicates(), original) + + base = [False] * len(idx) + base[3] = True + base[5] = True + expected = np.array(base) + + duplicated = idx.duplicated(keep="last") + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + result = idx.drop_duplicates(keep="last") + tm.assert_index_equal(result, idx[~expected]) + + base = [False] * len(original) + [True, True] + base[3] = True + base[5] = True + expected = np.array(base) + + duplicated = idx.duplicated(keep=False) + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + result = idx.drop_duplicates(keep=False) + tm.assert_index_equal(result, idx[~expected]) + + with pytest.raises( + TypeError, + match=r"drop_duplicates\(\) got an unexpected keyword argument", + ): + idx.drop_duplicates(inplace=True) + + else: + expected = Series( + [False] * len(original), index=original.index, name="a" + ) + tm.assert_series_equal(original.duplicated(), expected) + result = original.drop_duplicates() + tm.assert_series_equal(result, original) + assert result is not original + + idx = original.index[list(range(len(original))) + [5, 3]] + values = original._values[list(range(len(original))) + [5, 3]] + s = Series(values, index=idx, name="a") + + expected = Series( + [False] * len(original) + [True, True], index=idx, name="a" + ) + tm.assert_series_equal(s.duplicated(), expected) + tm.assert_series_equal(s.drop_duplicates(), original) + + base = [False] * len(idx) + base[3] = True + base[5] = True + expected = Series(base, index=idx, name="a") + + tm.assert_series_equal(s.duplicated(keep="last"), expected) + tm.assert_series_equal( + s.drop_duplicates(keep="last"), s[~np.array(base)] + ) + + base = [False] * len(original) + [True, True] + base[3] = True + base[5] = True + expected = Series(base, index=idx, name="a") + + tm.assert_series_equal(s.duplicated(keep=False), expected) + tm.assert_series_equal( + s.drop_duplicates(keep=False), s[~np.array(base)] + ) + + s.drop_duplicates(inplace=True) + tm.assert_series_equal(s, original) + + def test_drop_duplicates_series_vs_dataframe(self): + # GH 14192 + df = pd.DataFrame( + { + "a": [1, 1, 1, "one", "one"], + "b": [2, 2, np.nan, np.nan, np.nan], + "c": [3, 3, np.nan, np.nan, "three"], + "d": [1, 2, 3, 4, 4], + "e": [ + datetime(2015, 1, 1), + datetime(2015, 1, 1), + datetime(2015, 2, 1), + pd.NaT, + pd.NaT, + ], + } + ) + for column in df.columns: + for keep in ["first", "last", False]: + dropped_frame = df[[column]].drop_duplicates(keep=keep) + dropped_series = df[column].drop_duplicates(keep=keep) + tm.assert_frame_equal(dropped_frame, dropped_series.to_frame()) + + def test_fillna(self): + # # GH 11343 + # though Index.fillna and Series.fillna has separate impl, + # test here to confirm these works as the same + + for orig in self.objs: + + o = orig.copy() + values = o.values + + # values will not be changed + result = o.fillna(o.astype(object).values[0]) + if isinstance(o, Index): + tm.assert_index_equal(o, result) + else: + tm.assert_series_equal(o, result) + # check shallow_copied + assert o is not result + + for null_obj in [np.nan, None]: + for orig in self.objs: + o = orig.copy() + klass = type(o) + + if not self._allow_na_ops(o): + continue + + if needs_i8_conversion(o): + + values = o.astype(object).values + fill_value = values[0] + values[0:2] = pd.NaT + else: + values = o.values.copy() + fill_value = o.values[0] + values[0:2] = null_obj + + expected = [fill_value] * 2 + list(values[2:]) + + expected = klass(expected, dtype=orig.dtype) + o = klass(values) + + # check values has the same dtype as the original + assert o.dtype == orig.dtype + + result = o.fillna(fill_value) + if isinstance(o, Index): + tm.assert_index_equal(result, expected) + else: + tm.assert_series_equal(result, expected) + # check shallow_copied + assert o is not result + + @pytest.mark.skipif(PYPY, reason="not relevant for PyPy") + def test_memory_usage(self): + for o in self.objs: + res = o.memory_usage() + res_deep = o.memory_usage(deep=True) + + if is_object_dtype(o) or ( + isinstance(o, Series) and is_object_dtype(o.index) + ): + # if there are objects, only deep will pick them up + assert res_deep > res + else: + assert res == res_deep + + if isinstance(o, Series): + assert ( + o.memory_usage(index=False) + o.index.memory_usage() + ) == o.memory_usage(index=True) + + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = res_deep - sys.getsizeof(o) + assert abs(diff) < 100 + + def test_searchsorted(self): + # See gh-12238 + for o in self.objs: + index = np.searchsorted(o, max(o)) + assert 0 <= index <= len(o) + + index = np.searchsorted(o, max(o), sorter=range(len(o))) + assert 0 <= index <= len(o) + + def test_validate_bool_args(self): + invalid_values = [1, "True", [1, 2, 3], 5.0] + + for value in invalid_values: + with pytest.raises(ValueError): + self.int_series.drop_duplicates(inplace=value) + + def test_getitem(self): + for i in self.indexes: + s = pd.Series(i) + + assert i[0] == s.iloc[0] + assert i[5] == s.iloc[5] + assert i[-1] == s.iloc[-1] + + assert i[-1] == i[9] + + with pytest.raises(IndexError): + i[20] + with pytest.raises(IndexError): + s.iloc[20] + + @pytest.mark.parametrize("indexer_klass", [list, pd.Index]) + @pytest.mark.parametrize( + "indexer", + [ + [True] * 10, + [False] * 10, + [True, False, True, True, False, False, True, True, False, True], + ], + ) + def test_bool_indexing(self, indexer_klass, indexer): + # GH 22533 + for idx in self.indexes: + exp_idx = [i for i in range(len(indexer)) if indexer[i]] + tm.assert_index_equal(idx[indexer_klass(indexer)], idx[exp_idx]) + s = pd.Series(idx) + tm.assert_series_equal(s[indexer_klass(indexer)], s.iloc[exp_idx]) + + def test_get_indexer_non_unique_dtype_mismatch(self): + # GH 25459 + indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0])) + tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes) + tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing) diff --git a/pandas/tests/computation/__init__.py b/pandas/tests/computation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/computation/test_compat.py b/pandas/tests/computation/test_compat.py new file mode 100644 index 00000000..b3fbd8c1 --- /dev/null +++ b/pandas/tests/computation/test_compat.py @@ -0,0 +1,49 @@ +from distutils.version import LooseVersion + +import pytest + +from pandas.compat._optional import VERSIONS + +import pandas as pd +from pandas.core.computation.engines import _engines +import pandas.core.computation.expr as expr + + +def test_compat(): + # test we have compat with our version of nu + + from pandas.core.computation.check import _NUMEXPR_INSTALLED + + try: + import numexpr as ne + + ver = ne.__version__ + if LooseVersion(ver) < LooseVersion(VERSIONS["numexpr"]): + assert not _NUMEXPR_INSTALLED + else: + assert _NUMEXPR_INSTALLED + except ImportError: + pytest.skip("not testing numexpr version compat") + + +@pytest.mark.parametrize("engine", _engines) +@pytest.mark.parametrize("parser", expr._parsers) +def test_invalid_numexpr_version(engine, parser): + def testit(): + a, b = 1, 2 # noqa + res = pd.eval("a + b", engine=engine, parser=parser) + assert res == 3 + + if engine == "numexpr": + try: + import numexpr as ne + except ImportError: + pytest.skip("no numexpr") + else: + if LooseVersion(ne.__version__) < LooseVersion(VERSIONS["numexpr"]): + with pytest.raises(ImportError): + testit() + else: + testit() + else: + testit() diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py new file mode 100644 index 00000000..7f68abb9 --- /dev/null +++ b/pandas/tests/computation/test_eval.py @@ -0,0 +1,2051 @@ +from distutils.version import LooseVersion +from functools import reduce +from itertools import product +import operator +from typing import Dict, Type +import warnings + +import numpy as np +from numpy.random import rand, randint, randn +import pytest + +from pandas.errors import PerformanceWarning +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_bool, is_list_like, is_scalar + +import pandas as pd +from pandas import DataFrame, Series, compat, date_range +import pandas._testing as tm +from pandas.core.computation import pytables +from pandas.core.computation.check import _NUMEXPR_VERSION +from pandas.core.computation.engines import NumExprClobberingError, _engines +import pandas.core.computation.expr as expr +from pandas.core.computation.expr import ( + BaseExprVisitor, + PandasExprVisitor, + PythonExprVisitor, +) +from pandas.core.computation.expressions import _NUMEXPR_INSTALLED, _USE_NUMEXPR +from pandas.core.computation.ops import ( + _arith_ops_syms, + _binary_math_ops, + _binary_ops_dict, + _special_case_arith_ops_syms, + _unary_math_ops, +) + + +@pytest.fixture( + params=( + pytest.param( + engine, + marks=pytest.mark.skipif( + engine == "numexpr" and not _USE_NUMEXPR, + reason=f"numexpr enabled->{_USE_NUMEXPR}, " + f"installed->{_NUMEXPR_INSTALLED}", + ), + ) + for engine in _engines + ) +) # noqa +def engine(request): + return request.param + + +@pytest.fixture(params=expr._parsers) +def parser(request): + return request.param + + +@pytest.fixture +def ne_lt_2_6_9(): + if _NUMEXPR_INSTALLED and _NUMEXPR_VERSION >= LooseVersion("2.6.9"): + pytest.skip("numexpr is >= 2.6.9") + return "numexpr" + + +@pytest.fixture +def unary_fns_for_ne(): + if _NUMEXPR_INSTALLED: + if _NUMEXPR_VERSION >= LooseVersion("2.6.9"): + return _unary_math_ops + else: + return tuple(x for x in _unary_math_ops if x not in ("floor", "ceil")) + else: + pytest.skip("numexpr is not present") + + +def engine_has_neg_frac(engine): + return _engines[engine].has_neg_frac + + +def _eval_single_bin(lhs, cmp1, rhs, engine): + c = _binary_ops_dict[cmp1] + if engine_has_neg_frac(engine): + try: + return c(lhs, rhs) + except ValueError as e: + if str(e).startswith( + "negative number cannot be raised to a fractional power" + ): + return np.nan + raise + return c(lhs, rhs) + + +def _series_and_2d_ndarray(lhs, rhs): + return ( + isinstance(lhs, Series) and isinstance(rhs, np.ndarray) and rhs.ndim > 1 + ) or (isinstance(rhs, Series) and isinstance(lhs, np.ndarray) and lhs.ndim > 1) + + +def _series_and_frame(lhs, rhs): + return (isinstance(lhs, Series) and isinstance(rhs, DataFrame)) or ( + isinstance(rhs, Series) and isinstance(lhs, DataFrame) + ) + + +def _bool_and_frame(lhs, rhs): + return isinstance(lhs, bool) and isinstance(rhs, pd.core.generic.NDFrame) + + +def _is_py3_complex_incompat(result, expected): + return isinstance(expected, (complex, np.complexfloating)) and np.isnan(result) + + +_good_arith_ops = set(_arith_ops_syms).difference(_special_case_arith_ops_syms) + + +@td.skip_if_no_ne +class TestEvalNumexprPandas: + @classmethod + def setup_class(cls): + import numexpr as ne + + cls.ne = ne + cls.engine = "numexpr" + cls.parser = "pandas" + + @classmethod + def teardown_class(cls): + del cls.engine, cls.parser + if hasattr(cls, "ne"): + del cls.ne + + def setup_data(self): + nan_df1 = DataFrame(rand(10, 5)) + nan_df1[nan_df1 > 0.5] = np.nan + nan_df2 = DataFrame(rand(10, 5)) + nan_df2[nan_df2 > 0.5] = np.nan + + self.pandas_lhses = ( + DataFrame(randn(10, 5)), + Series(randn(5)), + Series([1, 2, np.nan, np.nan, 5]), + nan_df1, + ) + self.pandas_rhses = ( + DataFrame(randn(10, 5)), + Series(randn(5)), + Series([1, 2, np.nan, np.nan, 5]), + nan_df2, + ) + self.scalar_lhses = (randn(),) + self.scalar_rhses = (randn(),) + + self.lhses = self.pandas_lhses + self.scalar_lhses + self.rhses = self.pandas_rhses + self.scalar_rhses + + def setup_ops(self): + self.cmp_ops = expr._cmp_ops_syms + self.cmp2_ops = self.cmp_ops[::-1] + self.bin_ops = expr._bool_ops_syms + self.special_case_ops = _special_case_arith_ops_syms + self.arith_ops = _good_arith_ops + self.unary_ops = "-", "~", "not " + + def setup_method(self, method): + self.setup_ops() + self.setup_data() + self.current_engines = filter(lambda x: x != self.engine, _engines) + + def teardown_method(self, method): + del self.lhses, self.rhses, self.scalar_rhses, self.scalar_lhses + del self.pandas_rhses, self.pandas_lhses, self.current_engines + + @pytest.mark.slow + @pytest.mark.parametrize( + "cmp1", + ["!=", "==", "<=", ">=", "<", ">"], + ids=["ne", "eq", "le", "ge", "lt", "gt"], + ) + @pytest.mark.parametrize("cmp2", [">", "<"], ids=["gt", "lt"]) + def test_complex_cmp_ops(self, cmp1, cmp2): + for lhs, rhs, binop in product(self.lhses, self.rhses, self.bin_ops): + lhs_new = _eval_single_bin(lhs, cmp1, rhs, self.engine) + rhs_new = _eval_single_bin(lhs, cmp2, rhs, self.engine) + expected = _eval_single_bin(lhs_new, binop, rhs_new, self.engine) + + ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)" + result = pd.eval(ex, engine=self.engine, parser=self.parser) + self.check_equal(result, expected) + + def test_simple_cmp_ops(self): + bool_lhses = ( + DataFrame(tm.randbool(size=(10, 5))), + Series(tm.randbool((5,))), + tm.randbool(), + ) + bool_rhses = ( + DataFrame(tm.randbool(size=(10, 5))), + Series(tm.randbool((5,))), + tm.randbool(), + ) + for lhs, rhs, cmp_op in product(bool_lhses, bool_rhses, self.cmp_ops): + self.check_simple_cmp_op(lhs, cmp_op, rhs) + + @pytest.mark.slow + def test_binary_arith_ops(self): + for lhs, op, rhs in product(self.lhses, self.arith_ops, self.rhses): + self.check_binary_arith_op(lhs, op, rhs) + + def test_modulus(self): + for lhs, rhs in product(self.lhses, self.rhses): + self.check_modulus(lhs, "%", rhs) + + def test_floor_division(self): + for lhs, rhs in product(self.lhses, self.rhses): + self.check_floor_division(lhs, "//", rhs) + + @td.skip_if_windows + def test_pow(self): + # odd failure on win32 platform, so skip + for lhs, rhs in product(self.lhses, self.rhses): + self.check_pow(lhs, "**", rhs) + + @pytest.mark.slow + def test_single_invert_op(self): + for lhs, op, rhs in product(self.lhses, self.cmp_ops, self.rhses): + self.check_single_invert_op(lhs, op, rhs) + + @pytest.mark.slow + def test_compound_invert_op(self): + for lhs, op, rhs in product(self.lhses, self.cmp_ops, self.rhses): + self.check_compound_invert_op(lhs, op, rhs) + + @pytest.mark.slow + def test_chained_cmp_op(self): + mids = self.lhses + cmp_ops = "<", ">" + for lhs, cmp1, mid, cmp2, rhs in product( + self.lhses, cmp_ops, mids, cmp_ops, self.rhses + ): + self.check_chained_cmp_op(lhs, cmp1, mid, cmp2, rhs) + + def check_equal(self, result, expected): + if isinstance(result, DataFrame): + tm.assert_frame_equal(result, expected) + elif isinstance(result, Series): + tm.assert_series_equal(result, expected) + elif isinstance(result, np.ndarray): + tm.assert_numpy_array_equal(result, expected) + else: + assert result == expected + + def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): + def check_operands(left, right, cmp_op): + return _eval_single_bin(left, cmp_op, right, self.engine) + + lhs_new = check_operands(lhs, mid, cmp1) + rhs_new = check_operands(mid, rhs, cmp2) + + if lhs_new is not None and rhs_new is not None: + ex1 = f"lhs {cmp1} mid {cmp2} rhs" + ex2 = f"lhs {cmp1} mid and mid {cmp2} rhs" + ex3 = f"(lhs {cmp1} mid) & (mid {cmp2} rhs)" + expected = _eval_single_bin(lhs_new, "&", rhs_new, self.engine) + + for ex in (ex1, ex2, ex3): + result = pd.eval(ex, engine=self.engine, parser=self.parser) + + tm.assert_almost_equal(result, expected) + + def check_simple_cmp_op(self, lhs, cmp1, rhs): + ex = f"lhs {cmp1} rhs" + msg = ( + r"only list-like( or dict-like)? objects are allowed to be" + r" passed to (DataFrame\.)?isin\(\), you passed a" + r" (\[|')bool(\]|')|" + "argument of type 'bool' is not iterable" + ) + if cmp1 in ("in", "not in") and not is_list_like(rhs): + with pytest.raises(TypeError, match=msg): + pd.eval( + ex, + engine=self.engine, + parser=self.parser, + local_dict={"lhs": lhs, "rhs": rhs}, + ) + else: + expected = _eval_single_bin(lhs, cmp1, rhs, self.engine) + result = pd.eval(ex, engine=self.engine, parser=self.parser) + self.check_equal(result, expected) + + def check_binary_arith_op(self, lhs, arith1, rhs): + ex = f"lhs {arith1} rhs" + result = pd.eval(ex, engine=self.engine, parser=self.parser) + expected = _eval_single_bin(lhs, arith1, rhs, self.engine) + + tm.assert_almost_equal(result, expected) + ex = f"lhs {arith1} rhs {arith1} rhs" + result = pd.eval(ex, engine=self.engine, parser=self.parser) + nlhs = _eval_single_bin(lhs, arith1, rhs, self.engine) + self.check_alignment(result, nlhs, rhs, arith1) + + def check_alignment(self, result, nlhs, ghs, op): + try: + nlhs, ghs = nlhs.align(ghs) + except (ValueError, TypeError, AttributeError): + # ValueError: series frame or frame series align + # TypeError, AttributeError: series or frame with scalar align + pass + else: + + # direct numpy comparison + expected = self.ne.evaluate(f"nlhs {op} ghs") + tm.assert_numpy_array_equal(result.values, expected) + + # modulus, pow, and floor division require special casing + + def check_modulus(self, lhs, arith1, rhs): + ex = f"lhs {arith1} rhs" + result = pd.eval(ex, engine=self.engine, parser=self.parser) + expected = lhs % rhs + + tm.assert_almost_equal(result, expected) + expected = self.ne.evaluate(f"expected {arith1} rhs") + if isinstance(result, (DataFrame, Series)): + tm.assert_almost_equal(result.values, expected) + else: + tm.assert_almost_equal(result, expected.item()) + + def check_floor_division(self, lhs, arith1, rhs): + ex = f"lhs {arith1} rhs" + + if self.engine == "python": + res = pd.eval(ex, engine=self.engine, parser=self.parser) + expected = lhs // rhs + self.check_equal(res, expected) + else: + msg = ( + r"unsupported operand type\(s\) for //: 'VariableNode' and " + "'VariableNode'" + ) + with pytest.raises(TypeError, match=msg): + pd.eval( + ex, + local_dict={"lhs": lhs, "rhs": rhs}, + engine=self.engine, + parser=self.parser, + ) + + def get_expected_pow_result(self, lhs, rhs): + try: + expected = _eval_single_bin(lhs, "**", rhs, self.engine) + except ValueError as e: + if str(e).startswith( + "negative number cannot be raised to a fractional power" + ): + if self.engine == "python": + pytest.skip(str(e)) + else: + expected = np.nan + else: + raise + return expected + + def check_pow(self, lhs, arith1, rhs): + ex = f"lhs {arith1} rhs" + expected = self.get_expected_pow_result(lhs, rhs) + result = pd.eval(ex, engine=self.engine, parser=self.parser) + + if ( + is_scalar(lhs) + and is_scalar(rhs) + and _is_py3_complex_incompat(result, expected) + ): + with pytest.raises(AssertionError): + tm.assert_numpy_array_equal(result, expected) + else: + tm.assert_almost_equal(result, expected) + + ex = f"(lhs {arith1} rhs) {arith1} rhs" + result = pd.eval(ex, engine=self.engine, parser=self.parser) + expected = self.get_expected_pow_result( + self.get_expected_pow_result(lhs, rhs), rhs + ) + tm.assert_almost_equal(result, expected) + + def check_single_invert_op(self, lhs, cmp1, rhs): + # simple + for el in (lhs, rhs): + try: + elb = el.astype(bool) + except AttributeError: + elb = np.array([bool(el)]) + expected = ~elb + result = pd.eval("~elb", engine=self.engine, parser=self.parser) + tm.assert_almost_equal(expected, result) + + for engine in self.current_engines: + tm.assert_almost_equal( + result, pd.eval("~elb", engine=engine, parser=self.parser) + ) + + def check_compound_invert_op(self, lhs, cmp1, rhs): + skip_these = ["in", "not in"] + ex = f"~(lhs {cmp1} rhs)" + + msg = ( + r"only list-like( or dict-like)? objects are allowed to be" + r" passed to (DataFrame\.)?isin\(\), you passed a" + r" (\[|')float(\]|')|" + "argument of type 'float' is not iterable" + ) + if is_scalar(rhs) and cmp1 in skip_these: + with pytest.raises(TypeError, match=msg): + pd.eval( + ex, + engine=self.engine, + parser=self.parser, + local_dict={"lhs": lhs, "rhs": rhs}, + ) + else: + # compound + if is_scalar(lhs) and is_scalar(rhs): + lhs, rhs = map(lambda x: np.array([x]), (lhs, rhs)) + expected = _eval_single_bin(lhs, cmp1, rhs, self.engine) + if is_scalar(expected): + expected = not expected + else: + expected = ~expected + result = pd.eval(ex, engine=self.engine, parser=self.parser) + tm.assert_almost_equal(expected, result) + + # make sure the other engines work the same as this one + for engine in self.current_engines: + ev = pd.eval(ex, engine=self.engine, parser=self.parser) + tm.assert_almost_equal(ev, result) + + def ex(self, op, var_name="lhs"): + return f"{op}{var_name}" + + def test_frame_invert(self): + expr = self.ex("~") + + # ~ ## + # frame + # float always raises + lhs = DataFrame(randn(5, 2)) + if self.engine == "numexpr": + with pytest.raises(NotImplementedError): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + with pytest.raises(TypeError): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + + # int raises on numexpr + lhs = DataFrame(randint(5, size=(5, 2))) + if self.engine == "numexpr": + with pytest.raises(NotImplementedError): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + expect = ~lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + # bool always works + lhs = DataFrame(rand(5, 2) > 0.5) + expect = ~lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + # object raises + lhs = DataFrame({"b": ["a", 1, 2.0], "c": rand(3) > 0.5}) + if self.engine == "numexpr": + with pytest.raises(ValueError): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + with pytest.raises(TypeError): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + + def test_series_invert(self): + # ~ #### + expr = self.ex("~") + + # series + # float raises + lhs = Series(randn(5)) + if self.engine == "numexpr": + with pytest.raises(NotImplementedError): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + with pytest.raises(TypeError): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + + # int raises on numexpr + lhs = Series(randint(5, size=5)) + if self.engine == "numexpr": + with pytest.raises(NotImplementedError): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + expect = ~lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + # bool + lhs = Series(rand(5) > 0.5) + expect = ~lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + # float + # int + # bool + + # object + lhs = Series(["a", 1, 2.0]) + if self.engine == "numexpr": + with pytest.raises(ValueError): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + with pytest.raises(TypeError): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + + def test_frame_negate(self): + expr = self.ex("-") + + # float + lhs = DataFrame(randn(5, 2)) + expect = -lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + # int + lhs = DataFrame(randint(5, size=(5, 2))) + expect = -lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + # bool doesn't work with numexpr but works elsewhere + lhs = DataFrame(rand(5, 2) > 0.5) + if self.engine == "numexpr": + with pytest.raises(NotImplementedError): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + expect = -lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + def test_series_negate(self): + expr = self.ex("-") + + # float + lhs = Series(randn(5)) + expect = -lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + # int + lhs = Series(randint(5, size=5)) + expect = -lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + # bool doesn't work with numexpr but works elsewhere + lhs = Series(rand(5) > 0.5) + if self.engine == "numexpr": + with pytest.raises(NotImplementedError): + result = pd.eval(expr, engine=self.engine, parser=self.parser) + else: + expect = -lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + def test_frame_pos(self): + expr = self.ex("+") + + # float + lhs = DataFrame(randn(5, 2)) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + # int + lhs = DataFrame(randint(5, size=(5, 2))) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + # bool doesn't work with numexpr but works elsewhere + lhs = DataFrame(rand(5, 2) > 0.5) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(expect, result) + + def test_series_pos(self): + expr = self.ex("+") + + # float + lhs = Series(randn(5)) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + # int + lhs = Series(randint(5, size=5)) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + # bool doesn't work with numexpr but works elsewhere + lhs = Series(rand(5) > 0.5) + expect = lhs + result = pd.eval(expr, engine=self.engine, parser=self.parser) + tm.assert_series_equal(expect, result) + + def test_scalar_unary(self): + with pytest.raises(TypeError): + pd.eval("~1.0", engine=self.engine, parser=self.parser) + + assert pd.eval("-1.0", parser=self.parser, engine=self.engine) == -1.0 + assert pd.eval("+1.0", parser=self.parser, engine=self.engine) == +1.0 + assert pd.eval("~1", parser=self.parser, engine=self.engine) == ~1 + assert pd.eval("-1", parser=self.parser, engine=self.engine) == -1 + assert pd.eval("+1", parser=self.parser, engine=self.engine) == +1 + assert pd.eval("~True", parser=self.parser, engine=self.engine) == ~True + assert pd.eval("~False", parser=self.parser, engine=self.engine) == ~False + assert pd.eval("-True", parser=self.parser, engine=self.engine) == -True + assert pd.eval("-False", parser=self.parser, engine=self.engine) == -False + assert pd.eval("+True", parser=self.parser, engine=self.engine) == +True + assert pd.eval("+False", parser=self.parser, engine=self.engine) == +False + + def test_unary_in_array(self): + # GH 11235 + tm.assert_numpy_array_equal( + pd.eval( + "[-True, True, ~True, +True," + "-False, False, ~False, +False," + "-37, 37, ~37, +37]" + ), + np.array( + [ + -True, + True, + ~True, + +True, + -False, + False, + ~False, + +False, + -37, + 37, + ~37, + +37, + ], + dtype=np.object_, + ), + ) + + @pytest.mark.parametrize("dtype", [np.float32, np.float64]) + def test_float_comparison_bin_op(self, dtype): + # GH 16363 + df = pd.DataFrame({"x": np.array([0], dtype=dtype)}) + res = df.eval("x < -0.1") + assert res.values == np.array([False]) + + res = df.eval("-5 > x") + assert res.values == np.array([False]) + + def test_disallow_scalar_bool_ops(self): + exprs = "1 or 2", "1 and 2" + exprs += "a and b", "a or b" + exprs += ("1 or 2 and (3 + 2) > 3",) + exprs += ("2 * x > 2 or 1 and 2",) + exprs += ("2 * df > 3 and 1 or a",) + + x, a, b, df = np.random.randn(3), 1, 2, DataFrame(randn(3, 2)) # noqa + for ex in exprs: + with pytest.raises(NotImplementedError): + pd.eval(ex, engine=self.engine, parser=self.parser) + + def test_identical(self): + # see gh-10546 + x = 1 + result = pd.eval("x", engine=self.engine, parser=self.parser) + assert result == 1 + assert is_scalar(result) + + x = 1.5 + result = pd.eval("x", engine=self.engine, parser=self.parser) + assert result == 1.5 + assert is_scalar(result) + + x = False + result = pd.eval("x", engine=self.engine, parser=self.parser) + assert not result + assert is_bool(result) + assert is_scalar(result) + + x = np.array([1]) + result = pd.eval("x", engine=self.engine, parser=self.parser) + tm.assert_numpy_array_equal(result, np.array([1])) + assert result.shape == (1,) + + x = np.array([1.5]) + result = pd.eval("x", engine=self.engine, parser=self.parser) + tm.assert_numpy_array_equal(result, np.array([1.5])) + assert result.shape == (1,) + + x = np.array([False]) # noqa + result = pd.eval("x", engine=self.engine, parser=self.parser) + tm.assert_numpy_array_equal(result, np.array([False])) + assert result.shape == (1,) + + def test_line_continuation(self): + # GH 11149 + exp = """1 + 2 * \ + 5 - 1 + 2 """ + result = pd.eval(exp, engine=self.engine, parser=self.parser) + assert result == 12 + + def test_float_truncation(self): + # GH 14241 + exp = "1000000000.006" + result = pd.eval(exp, engine=self.engine, parser=self.parser) + expected = np.float64(exp) + assert result == expected + + df = pd.DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) + cutoff = 1000000000.0006 + result = df.query(f"A < {cutoff:.4f}") + assert result.empty + + cutoff = 1000000000.0010 + result = df.query(f"A > {cutoff:.4f}") + expected = df.loc[[1, 2], :] + tm.assert_frame_equal(expected, result) + + exact = 1000000000.0011 + result = df.query(f"A == {exact:.4f}") + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + + def test_disallow_python_keywords(self): + # GH 18221 + df = pd.DataFrame([[0, 0, 0]], columns=["foo", "bar", "class"]) + msg = "Python keyword not valid identifier in numexpr query" + with pytest.raises(SyntaxError, match=msg): + df.query("class == 0") + + df = pd.DataFrame() + df.index.name = "lambda" + with pytest.raises(SyntaxError, match=msg): + df.query("lambda == 0") + + +@td.skip_if_no_ne +class TestEvalNumexprPython(TestEvalNumexprPandas): + @classmethod + def setup_class(cls): + super().setup_class() + import numexpr as ne + + cls.ne = ne + cls.engine = "numexpr" + cls.parser = "python" + + def setup_ops(self): + self.cmp_ops = list( + filter(lambda x: x not in ("in", "not in"), expr._cmp_ops_syms) + ) + self.cmp2_ops = self.cmp_ops[::-1] + self.bin_ops = [s for s in expr._bool_ops_syms if s not in ("and", "or")] + self.special_case_ops = _special_case_arith_ops_syms + self.arith_ops = _good_arith_ops + self.unary_ops = "+", "-", "~" + + def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): + ex1 = f"lhs {cmp1} mid {cmp2} rhs" + with pytest.raises(NotImplementedError): + pd.eval(ex1, engine=self.engine, parser=self.parser) + + +class TestEvalPythonPython(TestEvalNumexprPython): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "python" + cls.parser = "python" + + def check_modulus(self, lhs, arith1, rhs): + ex = f"lhs {arith1} rhs" + result = pd.eval(ex, engine=self.engine, parser=self.parser) + + expected = lhs % rhs + tm.assert_almost_equal(result, expected) + + expected = _eval_single_bin(expected, arith1, rhs, self.engine) + tm.assert_almost_equal(result, expected) + + def check_alignment(self, result, nlhs, ghs, op): + try: + nlhs, ghs = nlhs.align(ghs) + except (ValueError, TypeError, AttributeError): + # ValueError: series frame or frame series align + # TypeError, AttributeError: series or frame with scalar align + pass + else: + expected = eval(f"nlhs {op} ghs") + tm.assert_almost_equal(result, expected) + + +class TestEvalPythonPandas(TestEvalPythonPython): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "python" + cls.parser = "pandas" + + def check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs): + TestEvalNumexprPandas.check_chained_cmp_op(self, lhs, cmp1, mid, cmp2, rhs) + + +f = lambda *args, **kwargs: np.random.randn() + + +# ------------------------------------- +# gh-12388: Typecasting rules consistency with python + + +class TestTypeCasting: + @pytest.mark.parametrize("op", ["+", "-", "*", "**", "/"]) + # maybe someday... numexpr has too many upcasting rules now + # chain(*(np.sctypes[x] for x in ['uint', 'int', 'float'])) + @pytest.mark.parametrize("dt", [np.float32, np.float64]) + def test_binop_typecasting(self, engine, parser, op, dt): + df = tm.makeCustomDataframe(5, 3, data_gen_f=f, dtype=dt) + s = f"df {op} 3" + res = pd.eval(s, engine=engine, parser=parser) + assert df.values.dtype == dt + assert res.values.dtype == dt + tm.assert_frame_equal(res, eval(s)) + + s = f"3 {op} df" + res = pd.eval(s, engine=engine, parser=parser) + assert df.values.dtype == dt + assert res.values.dtype == dt + tm.assert_frame_equal(res, eval(s)) + + +# ------------------------------------- +# Basic and complex alignment + + +def _is_datetime(x): + return issubclass(x.dtype.type, np.datetime64) + + +def should_warn(*args): + not_mono = not any(map(operator.attrgetter("is_monotonic"), args)) + only_one_dt = reduce(operator.xor, map(_is_datetime, args)) + return not_mono and only_one_dt + + +class TestAlignment: + + index_types = "i", "u", "dt" + lhs_index_types = index_types + ("s",) # 'p' + + def test_align_nested_unary_op(self, engine, parser): + s = "df * ~2" + df = tm.makeCustomDataframe(5, 3, data_gen_f=f) + res = pd.eval(s, engine=engine, parser=parser) + tm.assert_frame_equal(res, df * ~2) + + def test_basic_frame_alignment(self, engine, parser): + args = product(self.lhs_index_types, self.index_types, self.index_types) + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + for lr_idx_type, rr_idx_type, c_idx_type in args: + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=f, r_idx_type=lr_idx_type, c_idx_type=c_idx_type + ) + df2 = tm.makeCustomDataframe( + 20, 10, data_gen_f=f, r_idx_type=rr_idx_type, c_idx_type=c_idx_type + ) + # only warns if not monotonic and not sortable + if should_warn(df.index, df2.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("df + df2", engine=engine, parser=parser) + else: + res = pd.eval("df + df2", engine=engine, parser=parser) + tm.assert_frame_equal(res, df + df2) + + def test_frame_comparison(self, engine, parser): + args = product(self.lhs_index_types, repeat=2) + for r_idx_type, c_idx_type in args: + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + res = pd.eval("df < 2", engine=engine, parser=parser) + tm.assert_frame_equal(res, df < 2) + + df3 = DataFrame(randn(*df.shape), index=df.index, columns=df.columns) + res = pd.eval("df < df3", engine=engine, parser=parser) + tm.assert_frame_equal(res, df < df3) + + @pytest.mark.slow + def test_medium_complex_frame_alignment(self, engine, parser): + args = product( + self.lhs_index_types, self.index_types, self.index_types, self.index_types + ) + + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + + for r1, c1, r2, c2 in args: + df = tm.makeCustomDataframe( + 3, 2, data_gen_f=f, r_idx_type=r1, c_idx_type=c1 + ) + df2 = tm.makeCustomDataframe( + 4, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2 + ) + df3 = tm.makeCustomDataframe( + 5, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2 + ) + if should_warn(df.index, df2.index, df3.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("df + df2 + df3", engine=engine, parser=parser) + else: + res = pd.eval("df + df2 + df3", engine=engine, parser=parser) + tm.assert_frame_equal(res, df + df2 + df3) + + def test_basic_frame_series_alignment(self, engine, parser): + def testit(r_idx_type, c_idx_type, index_name): + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + index = getattr(df, index_name) + s = Series(np.random.randn(5), index[:5]) + + if should_warn(df.index, s.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("df + s", engine=engine, parser=parser) + else: + res = pd.eval("df + s", engine=engine, parser=parser) + + if r_idx_type == "dt" or c_idx_type == "dt": + expected = df.add(s) if engine == "numexpr" else df + s + else: + expected = df + s + tm.assert_frame_equal(res, expected) + + args = product(self.lhs_index_types, self.index_types, ("index", "columns")) + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + for r_idx_type, c_idx_type, index_name in args: + testit(r_idx_type, c_idx_type, index_name) + + def test_basic_series_frame_alignment(self, engine, parser): + def testit(r_idx_type, c_idx_type, index_name): + df = tm.makeCustomDataframe( + 10, 7, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + index = getattr(df, index_name) + s = Series(np.random.randn(5), index[:5]) + if should_warn(s.index, df.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("s + df", engine=engine, parser=parser) + else: + res = pd.eval("s + df", engine=engine, parser=parser) + + if r_idx_type == "dt" or c_idx_type == "dt": + expected = df.add(s) if engine == "numexpr" else s + df + else: + expected = s + df + tm.assert_frame_equal(res, expected) + + # only test dt with dt, otherwise weird joins result + args = product(["i", "u", "s"], ["i", "u", "s"], ("index", "columns")) + with warnings.catch_warnings(record=True): + # avoid warning about comparing strings and ints + warnings.simplefilter("ignore", RuntimeWarning) + + for r_idx_type, c_idx_type, index_name in args: + testit(r_idx_type, c_idx_type, index_name) + + # dt with dt + args = product(["dt"], ["dt"], ("index", "columns")) + with warnings.catch_warnings(record=True): + # avoid warning about comparing strings and ints + warnings.simplefilter("ignore", RuntimeWarning) + + for r_idx_type, c_idx_type, index_name in args: + testit(r_idx_type, c_idx_type, index_name) + + def test_series_frame_commutativity(self, engine, parser): + args = product( + self.lhs_index_types, self.index_types, ("+", "*"), ("index", "columns") + ) + + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + for r_idx_type, c_idx_type, op, index_name in args: + df = tm.makeCustomDataframe( + 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ) + index = getattr(df, index_name) + s = Series(np.random.randn(5), index[:5]) + + lhs = f"s {op} df" + rhs = f"df {op} s" + if should_warn(df.index, s.index): + with tm.assert_produces_warning(RuntimeWarning): + a = pd.eval(lhs, engine=engine, parser=parser) + with tm.assert_produces_warning(RuntimeWarning): + b = pd.eval(rhs, engine=engine, parser=parser) + else: + a = pd.eval(lhs, engine=engine, parser=parser) + b = pd.eval(rhs, engine=engine, parser=parser) + + if r_idx_type != "dt" and c_idx_type != "dt": + if engine == "numexpr": + tm.assert_frame_equal(a, b) + + @pytest.mark.slow + def test_complex_series_frame_alignment(self, engine, parser): + import random + + args = product( + self.lhs_index_types, self.index_types, self.index_types, self.index_types + ) + n = 3 + m1 = 5 + m2 = 2 * m1 + + with warnings.catch_warnings(record=True): + warnings.simplefilter("always", RuntimeWarning) + for r1, r2, c1, c2 in args: + index_name = random.choice(["index", "columns"]) + obj_name = random.choice(["df", "df2"]) + + df = tm.makeCustomDataframe( + m1, n, data_gen_f=f, r_idx_type=r1, c_idx_type=c1 + ) + df2 = tm.makeCustomDataframe( + m2, n, data_gen_f=f, r_idx_type=r2, c_idx_type=c2 + ) + index = getattr(locals().get(obj_name), index_name) + s = Series(np.random.randn(n), index[:n]) + + if r2 == "dt" or c2 == "dt": + if engine == "numexpr": + expected2 = df2.add(s) + else: + expected2 = df2 + s + else: + expected2 = df2 + s + + if r1 == "dt" or c1 == "dt": + if engine == "numexpr": + expected = expected2.add(df) + else: + expected = expected2 + df + else: + expected = expected2 + df + + if should_warn(df2.index, s.index, df.index): + with tm.assert_produces_warning(RuntimeWarning): + res = pd.eval("df2 + s + df", engine=engine, parser=parser) + else: + res = pd.eval("df2 + s + df", engine=engine, parser=parser) + assert res.shape == expected.shape + tm.assert_frame_equal(res, expected) + + def test_performance_warning_for_poor_alignment(self, engine, parser): + df = DataFrame(randn(1000, 10)) + s = Series(randn(10000)) + if engine == "numexpr": + seen = PerformanceWarning + else: + seen = False + + with tm.assert_produces_warning(seen): + pd.eval("df + s", engine=engine, parser=parser) + + s = Series(randn(1000)) + with tm.assert_produces_warning(False): + pd.eval("df + s", engine=engine, parser=parser) + + df = DataFrame(randn(10, 10000)) + s = Series(randn(10000)) + with tm.assert_produces_warning(False): + pd.eval("df + s", engine=engine, parser=parser) + + df = DataFrame(randn(10, 10)) + s = Series(randn(10000)) + + is_python_engine = engine == "python" + + if not is_python_engine: + wrn = PerformanceWarning + else: + wrn = False + + with tm.assert_produces_warning(wrn) as w: + pd.eval("df + s", engine=engine, parser=parser) + + if not is_python_engine: + assert len(w) == 1 + msg = str(w[0].message) + loged = np.log10(s.size - df.shape[1]) + expected = ( + f"Alignment difference on axis 1 is larger " + f"than an order of magnitude on term 'df', " + f"by more than {loged:.4g}; performance may suffer" + ) + assert msg == expected + + +# ------------------------------------ +# Slightly more complex ops + + +@td.skip_if_no_ne +class TestOperationsNumExprPandas: + @classmethod + def setup_class(cls): + cls.engine = "numexpr" + cls.parser = "pandas" + cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms + + @classmethod + def teardown_class(cls): + del cls.engine, cls.parser + + def eval(self, *args, **kwargs): + kwargs["engine"] = self.engine + kwargs["parser"] = self.parser + kwargs["level"] = kwargs.pop("level", 0) + 1 + return pd.eval(*args, **kwargs) + + def test_simple_arith_ops(self): + ops = self.arith_ops + + for op in filter(lambda x: x != "//", ops): + ex = f"1 {op} 1" + ex2 = f"x {op} 1" + ex3 = f"1 {op} (x + 1)" + + if op in ("in", "not in"): + msg = "argument of type 'int' is not iterable" + with pytest.raises(TypeError, match=msg): + pd.eval(ex, engine=self.engine, parser=self.parser) + else: + expec = _eval_single_bin(1, op, 1, self.engine) + x = self.eval(ex, engine=self.engine, parser=self.parser) + assert x == expec + + expec = _eval_single_bin(x, op, 1, self.engine) + y = self.eval( + ex2, local_dict={"x": x}, engine=self.engine, parser=self.parser + ) + assert y == expec + + expec = _eval_single_bin(1, op, x + 1, self.engine) + y = self.eval( + ex3, local_dict={"x": x}, engine=self.engine, parser=self.parser + ) + assert y == expec + + def test_simple_bool_ops(self): + for op, lhs, rhs in product(expr._bool_ops_syms, (True, False), (True, False)): + ex = f"{lhs} {op} {rhs}" + res = self.eval(ex) + exp = eval(ex) + assert res == exp + + def test_bool_ops_with_constants(self): + for op, lhs, rhs in product( + expr._bool_ops_syms, ("True", "False"), ("True", "False") + ): + ex = f"{lhs} {op} {rhs}" + res = self.eval(ex) + exp = eval(ex) + assert res == exp + + def test_4d_ndarray_fails(self): + x = randn(3, 4, 5, 6) + y = Series(randn(10)) + with pytest.raises(NotImplementedError): + self.eval("x + y", local_dict={"x": x, "y": y}) + + def test_constant(self): + x = self.eval("1") + assert x == 1 + + def test_single_variable(self): + df = DataFrame(randn(10, 2)) + df2 = self.eval("df", local_dict={"df": df}) + tm.assert_frame_equal(df, df2) + + def test_truediv(self): + s = np.array([1]) + ex = "s / 1" + d = {"s": s} # noqa + + # FutureWarning: The `truediv` parameter in pd.eval is deprecated and will be + # removed in a future version. + with tm.assert_produces_warning(FutureWarning): + res = self.eval(ex, truediv=False) + tm.assert_numpy_array_equal(res, np.array([1.0])) + + with tm.assert_produces_warning(FutureWarning): + res = self.eval(ex, truediv=True) + tm.assert_numpy_array_equal(res, np.array([1.0])) + + with tm.assert_produces_warning(FutureWarning): + res = self.eval("1 / 2", truediv=True) + expec = 0.5 + assert res == expec + + with tm.assert_produces_warning(FutureWarning): + res = self.eval("1 / 2", truediv=False) + expec = 0.5 + assert res == expec + + with tm.assert_produces_warning(FutureWarning): + res = self.eval("s / 2", truediv=False) + expec = 0.5 + assert res == expec + + with tm.assert_produces_warning(FutureWarning): + res = self.eval("s / 2", truediv=True) + expec = 0.5 + assert res == expec + + def test_failing_subscript_with_name_error(self): + df = DataFrame(np.random.randn(5, 3)) # noqa + with pytest.raises(NameError): + self.eval("df[x > 2] > 2") + + def test_lhs_expression_subscript(self): + df = DataFrame(np.random.randn(5, 3)) + result = self.eval("(df + 1)[df > 2]", local_dict={"df": df}) + expected = (df + 1)[df > 2] + tm.assert_frame_equal(result, expected) + + def test_attr_expression(self): + df = DataFrame(np.random.randn(5, 3), columns=list("abc")) + expr1 = "df.a < df.b" + expec1 = df.a < df.b + expr2 = "df.a + df.b + df.c" + expec2 = df.a + df.b + df.c + expr3 = "df.a + df.b + df.c[df.b < 0]" + expec3 = df.a + df.b + df.c[df.b < 0] + exprs = expr1, expr2, expr3 + expecs = expec1, expec2, expec3 + for e, expec in zip(exprs, expecs): + tm.assert_series_equal(expec, self.eval(e, local_dict={"df": df})) + + def test_assignment_fails(self): + df = DataFrame(np.random.randn(5, 3), columns=list("abc")) + df2 = DataFrame(np.random.randn(5, 3)) + expr1 = "df = df2" + msg = "cannot assign without a target object" + with pytest.raises(ValueError, match=msg): + self.eval(expr1, local_dict={"df": df, "df2": df2}) + + def test_assignment_column(self): + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + orig_df = df.copy() + + # multiple assignees + with pytest.raises(SyntaxError, match="invalid syntax"): + df.eval("d c = a + b") + + # invalid assignees + msg = "left hand side of an assignment must be a single name" + with pytest.raises(SyntaxError, match=msg): + df.eval("d,c = a + b") + if compat.PY38: + msg = "cannot assign to function call" + else: + msg = "can't assign to function call" + with pytest.raises(SyntaxError, match=msg): + df.eval('Timestamp("20131001") = a + b') + + # single assignment - existing variable + expected = orig_df.copy() + expected["a"] = expected["a"] + expected["b"] + df = orig_df.copy() + df.eval("a = a + b", inplace=True) + tm.assert_frame_equal(df, expected) + + # single assignment - new variable + expected = orig_df.copy() + expected["c"] = expected["a"] + expected["b"] + df = orig_df.copy() + df.eval("c = a + b", inplace=True) + tm.assert_frame_equal(df, expected) + + # with a local name overlap + def f(): + df = orig_df.copy() + a = 1 # noqa + df.eval("a = 1 + b", inplace=True) + return df + + df = f() + expected = orig_df.copy() + expected["a"] = 1 + expected["b"] + tm.assert_frame_equal(df, expected) + + df = orig_df.copy() + + def f(): + a = 1 # noqa + old_a = df.a.copy() + df.eval("a = a + b", inplace=True) + result = old_a + df.b + tm.assert_series_equal(result, df.a, check_names=False) + assert result.name is None + + f() + + # multiple assignment + df = orig_df.copy() + df.eval("c = a + b", inplace=True) + msg = "can only assign a single expression" + with pytest.raises(SyntaxError, match=msg): + df.eval("c = a = b") + + # explicit targets + df = orig_df.copy() + self.eval("c = df.a + df.b", local_dict={"df": df}, target=df, inplace=True) + expected = orig_df.copy() + expected["c"] = expected["a"] + expected["b"] + tm.assert_frame_equal(df, expected) + + def test_column_in(self): + # GH 11235 + df = DataFrame({"a": [11], "b": [-32]}) + result = df.eval("a in [11, -32]") + expected = Series([True]) + tm.assert_series_equal(result, expected) + + def assignment_not_inplace(self): + # see gh-9297 + df = DataFrame(np.random.randn(5, 2), columns=list("ab")) + + actual = df.eval("c = a + b", inplace=False) + assert actual is not None + + expected = df.copy() + expected["c"] = expected["a"] + expected["b"] + tm.assert_frame_equal(df, expected) + + def test_multi_line_expression(self): + # GH 11149 + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = df.copy() + + expected["c"] = expected["a"] + expected["b"] + expected["d"] = expected["c"] + expected["b"] + ans = df.eval( + """ + c = a + b + d = c + b""", + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert ans is None + + expected["a"] = expected["a"] - 1 + expected["e"] = expected["a"] + 2 + ans = df.eval( + """ + a = a - 1 + e = a + 2""", + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert ans is None + + # multi-line not valid if not all assignments + with pytest.raises(ValueError): + df.eval( + """ + a = b + 2 + b - 2""", + inplace=False, + ) + + def test_multi_line_expression_not_inplace(self): + # GH 11149 + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = df.copy() + + expected["c"] = expected["a"] + expected["b"] + expected["d"] = expected["c"] + expected["b"] + df = df.eval( + """ + c = a + b + d = c + b""", + inplace=False, + ) + tm.assert_frame_equal(expected, df) + + expected["a"] = expected["a"] - 1 + expected["e"] = expected["a"] + 2 + df = df.eval( + """ + a = a - 1 + e = a + 2""", + inplace=False, + ) + tm.assert_frame_equal(expected, df) + + def test_multi_line_expression_local_variable(self): + # GH 15342 + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = df.copy() + + local_var = 7 + expected["c"] = expected["a"] * local_var + expected["d"] = expected["c"] + local_var + ans = df.eval( + """ + c = a * @local_var + d = c + @local_var + """, + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert ans is None + + def test_multi_line_expression_callable_local_variable(self): + # 26426 + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + def local_func(a, b): + return b + + expected = df.copy() + expected["c"] = expected["a"] * local_func(1, 7) + expected["d"] = expected["c"] + local_func(1, 7) + ans = df.eval( + """ + c = a * @local_func(1, 7) + d = c + @local_func(1, 7) + """, + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert ans is None + + def test_multi_line_expression_callable_local_variable_with_kwargs(self): + # 26426 + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + + def local_func(a, b): + return b + + expected = df.copy() + expected["c"] = expected["a"] * local_func(b=7, a=1) + expected["d"] = expected["c"] + local_func(b=7, a=1) + ans = df.eval( + """ + c = a * @local_func(b=7, a=1) + d = c + @local_func(b=7, a=1) + """, + inplace=True, + ) + tm.assert_frame_equal(expected, df) + assert ans is None + + def test_assignment_in_query(self): + # GH 8664 + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df_orig = df.copy() + with pytest.raises(ValueError): + df.query("a = 1") + tm.assert_frame_equal(df, df_orig) + + def test_query_inplace(self): + # see gh-11149 + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + expected = df.copy() + expected = expected[expected["a"] == 2] + df.query("a == 2", inplace=True) + tm.assert_frame_equal(expected, df) + + df = {} + expected = {"a": 3} + + self.eval("a = 1 + 2", target=df, inplace=True) + tm.assert_dict_equal(df, expected) + + @pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2], np.array([]), (1, 3)]) + @pytest.mark.filterwarnings("ignore::FutureWarning") + def test_cannot_item_assign(self, invalid_target): + msg = "Cannot assign expression output to target" + expression = "a = 1 + 2" + + with pytest.raises(ValueError, match=msg): + self.eval(expression, target=invalid_target, inplace=True) + + if hasattr(invalid_target, "copy"): + with pytest.raises(ValueError, match=msg): + self.eval(expression, target=invalid_target, inplace=False) + + @pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)]) + def test_cannot_copy_item(self, invalid_target): + msg = "Cannot return a copy of the target" + expression = "a = 1 + 2" + + with pytest.raises(ValueError, match=msg): + self.eval(expression, target=invalid_target, inplace=False) + + @pytest.mark.parametrize("target", [1, "cat", [1, 2], np.array([]), (1, 3), {1: 2}]) + def test_inplace_no_assignment(self, target): + expression = "1 + 2" + + assert self.eval(expression, target=target, inplace=False) == 3 + + msg = "Cannot operate inplace if there is no assignment" + with pytest.raises(ValueError, match=msg): + self.eval(expression, target=target, inplace=True) + + def test_basic_period_index_boolean_expression(self): + df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i") + + e = df < 2 + r = self.eval("df < 2", local_dict={"df": df}) + x = df < 2 + + tm.assert_frame_equal(r, e) + tm.assert_frame_equal(x, e) + + def test_basic_period_index_subscript_expression(self): + df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i") + r = self.eval("df[df < 2 + 3]", local_dict={"df": df}) + e = df[df < 2 + 3] + tm.assert_frame_equal(r, e) + + def test_nested_period_index_subscript_expression(self): + df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i") + r = self.eval("df[df[df < 2] < 2] + df * 2", local_dict={"df": df}) + e = df[df[df < 2] < 2] + df * 2 + tm.assert_frame_equal(r, e) + + def test_date_boolean(self): + df = DataFrame(randn(5, 3)) + df["dates1"] = date_range("1/1/2012", periods=5) + res = self.eval( + "df.dates1 < 20130101", + local_dict={"df": df}, + engine=self.engine, + parser=self.parser, + ) + expec = df.dates1 < "20130101" + tm.assert_series_equal(res, expec, check_names=False) + + def test_simple_in_ops(self): + if self.parser != "python": + res = pd.eval("1 in [1, 2]", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval("2 in (1, 2)", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval("3 in (1, 2)", engine=self.engine, parser=self.parser) + assert not res + + res = pd.eval("3 not in (1, 2)", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval("[3] not in (1, 2)", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval("[3] in ([3], 2)", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval("[[3]] in [[[3]], 2]", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval("(3,) in [(3,), 2]", engine=self.engine, parser=self.parser) + assert res + + res = pd.eval( + "(3,) not in [(3,), 2]", engine=self.engine, parser=self.parser + ) + assert not res + + res = pd.eval( + "[(3,)] in [[(3,)], 2]", engine=self.engine, parser=self.parser + ) + assert res + else: + with pytest.raises(NotImplementedError): + pd.eval("1 in [1, 2]", engine=self.engine, parser=self.parser) + with pytest.raises(NotImplementedError): + pd.eval("2 in (1, 2)", engine=self.engine, parser=self.parser) + with pytest.raises(NotImplementedError): + pd.eval("3 in (1, 2)", engine=self.engine, parser=self.parser) + with pytest.raises(NotImplementedError): + pd.eval("3 not in (1, 2)", engine=self.engine, parser=self.parser) + with pytest.raises(NotImplementedError): + pd.eval( + "[(3,)] in (1, 2, [(3,)])", engine=self.engine, parser=self.parser + ) + with pytest.raises(NotImplementedError): + pd.eval( + "[3] not in (1, 2, [[3]])", engine=self.engine, parser=self.parser + ) + + +@td.skip_if_no_ne +class TestOperationsNumExprPython(TestOperationsNumExprPandas): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "numexpr" + cls.parser = "python" + cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms + cls.arith_ops = filter(lambda x: x not in ("in", "not in"), cls.arith_ops) + + def test_check_many_exprs(self): + a = 1 # noqa + expr = " * ".join("a" * 33) + expected = 1 + res = pd.eval(expr, engine=self.engine, parser=self.parser) + assert res == expected + + def test_fails_and(self): + df = DataFrame(np.random.randn(5, 3)) + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + pd.eval( + "df > 2 and df > 3", + local_dict={"df": df}, + parser=self.parser, + engine=self.engine, + ) + + def test_fails_or(self): + df = DataFrame(np.random.randn(5, 3)) + msg = "'BoolOp' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + pd.eval( + "df > 2 or df > 3", + local_dict={"df": df}, + parser=self.parser, + engine=self.engine, + ) + + def test_fails_not(self): + df = DataFrame(np.random.randn(5, 3)) + msg = "'Not' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + pd.eval( + "not df > 2", + local_dict={"df": df}, + parser=self.parser, + engine=self.engine, + ) + + def test_fails_ampersand(self): + df = DataFrame(np.random.randn(5, 3)) # noqa + ex = "(df + 2)[df > 1] > 0 & (df > 0)" + with pytest.raises(NotImplementedError): + pd.eval(ex, parser=self.parser, engine=self.engine) + + def test_fails_pipe(self): + df = DataFrame(np.random.randn(5, 3)) # noqa + ex = "(df + 2)[df > 1] > 0 | (df > 0)" + with pytest.raises(NotImplementedError): + pd.eval(ex, parser=self.parser, engine=self.engine) + + def test_bool_ops_with_constants(self): + for op, lhs, rhs in product( + expr._bool_ops_syms, ("True", "False"), ("True", "False") + ): + ex = f"{lhs} {op} {rhs}" + if op in ("and", "or"): + with pytest.raises(NotImplementedError): + self.eval(ex) + else: + res = self.eval(ex) + exp = eval(ex) + assert res == exp + + def test_simple_bool_ops(self): + for op, lhs, rhs in product(expr._bool_ops_syms, (True, False), (True, False)): + ex = f"lhs {op} rhs" + if op in ("and", "or"): + with pytest.raises(NotImplementedError): + pd.eval(ex, engine=self.engine, parser=self.parser) + else: + res = pd.eval(ex, engine=self.engine, parser=self.parser) + exp = eval(ex) + assert res == exp + + +class TestOperationsPythonPython(TestOperationsNumExprPython): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = cls.parser = "python" + cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms + cls.arith_ops = filter(lambda x: x not in ("in", "not in"), cls.arith_ops) + + +class TestOperationsPythonPandas(TestOperationsNumExprPandas): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "python" + cls.parser = "pandas" + cls.arith_ops = expr._arith_ops_syms + expr._cmp_ops_syms + + +@td.skip_if_no_ne +class TestMathPythonPython: + @classmethod + def setup_class(cls): + cls.engine = "python" + cls.parser = "pandas" + cls.unary_fns = _unary_math_ops + cls.binary_fns = _binary_math_ops + + @classmethod + def teardown_class(cls): + del cls.engine, cls.parser + + def eval(self, *args, **kwargs): + kwargs["engine"] = self.engine + kwargs["parser"] = self.parser + kwargs["level"] = kwargs.pop("level", 0) + 1 + return pd.eval(*args, **kwargs) + + def test_unary_functions(self, unary_fns_for_ne): + df = DataFrame({"a": np.random.randn(10)}) + a = df.a + + for fn in unary_fns_for_ne: + expr = f"{fn}(a)" + got = self.eval(expr) + with np.errstate(all="ignore"): + expect = getattr(np, fn)(a) + tm.assert_series_equal(got, expect, check_names=False) + + def test_floor_and_ceil_functions_raise_error(self, ne_lt_2_6_9, unary_fns_for_ne): + for fn in ("floor", "ceil"): + msg = f'"{fn}" is not a supported function' + with pytest.raises(ValueError, match=msg): + expr = f"{fn}(100)" + self.eval(expr) + + def test_binary_functions(self): + df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + a = df.a + b = df.b + for fn in self.binary_fns: + expr = f"{fn}(a, b)" + got = self.eval(expr) + with np.errstate(all="ignore"): + expect = getattr(np, fn)(a, b) + tm.assert_almost_equal(got, expect, check_names=False) + + def test_df_use_case(self): + df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df.eval( + "e = arctan2(sin(a), b)", + engine=self.engine, + parser=self.parser, + inplace=True, + ) + got = df.e + expect = np.arctan2(np.sin(df.a), df.b) + tm.assert_series_equal(got, expect, check_names=False) + + def test_df_arithmetic_subexpression(self): + df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)}) + df.eval("e = sin(a + b)", engine=self.engine, parser=self.parser, inplace=True) + got = df.e + expect = np.sin(df.a + df.b) + tm.assert_series_equal(got, expect, check_names=False) + + def check_result_type(self, dtype, expect_dtype): + df = DataFrame({"a": np.random.randn(10).astype(dtype)}) + assert df.a.dtype == dtype + df.eval("b = sin(a)", engine=self.engine, parser=self.parser, inplace=True) + got = df.b + expect = np.sin(df.a) + assert expect.dtype == got.dtype + assert expect_dtype == got.dtype + tm.assert_series_equal(got, expect, check_names=False) + + def test_result_types(self): + self.check_result_type(np.int32, np.float64) + self.check_result_type(np.int64, np.float64) + self.check_result_type(np.float32, np.float32) + self.check_result_type(np.float64, np.float64) + + @td.skip_if_windows + def test_result_complex128(self): + # xref https://github.com/pandas-dev/pandas/issues/12293 + # this fails on Windows, apparently a floating point precision issue + + # Did not test complex64 because DataFrame is converting it to + # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952 + self.check_result_type(np.complex128, np.complex128) + + def test_undefined_func(self): + df = DataFrame({"a": np.random.randn(10)}) + msg = '"mysin" is not a supported function' + + with pytest.raises(ValueError, match=msg): + df.eval("mysin(a)", engine=self.engine, parser=self.parser) + + def test_keyword_arg(self): + df = DataFrame({"a": np.random.randn(10)}) + msg = 'Function "sin" does not support keyword arguments' + + with pytest.raises(TypeError, match=msg): + df.eval("sin(x=a)", engine=self.engine, parser=self.parser) + + +class TestMathPythonPandas(TestMathPythonPython): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "python" + cls.parser = "pandas" + + +class TestMathNumExprPandas(TestMathPythonPython): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "numexpr" + cls.parser = "pandas" + + +class TestMathNumExprPython(TestMathPythonPython): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "numexpr" + cls.parser = "python" + + +_var_s = randn(10) + + +class TestScope: + def test_global_scope(self, engine, parser): + e = "_var_s * 2" + tm.assert_numpy_array_equal( + _var_s * 2, pd.eval(e, engine=engine, parser=parser) + ) + + def test_no_new_locals(self, engine, parser): + x = 1 # noqa + lcls = locals().copy() + pd.eval("x + 1", local_dict=lcls, engine=engine, parser=parser) + lcls2 = locals().copy() + lcls2.pop("lcls") + assert lcls == lcls2 + + def test_no_new_globals(self, engine, parser): + x = 1 # noqa + gbls = globals().copy() + pd.eval("x + 1", engine=engine, parser=parser) + gbls2 = globals().copy() + assert gbls == gbls2 + + +@td.skip_if_no_ne +def test_invalid_engine(): + msg = "Invalid engine 'asdf' passed" + with pytest.raises(KeyError, match=msg): + pd.eval("x + y", local_dict={"x": 1, "y": 2}, engine="asdf") + + +@td.skip_if_no_ne +def test_invalid_parser(): + msg = "Invalid parser 'asdf' passed" + with pytest.raises(KeyError, match=msg): + pd.eval("x + y", local_dict={"x": 1, "y": 2}, parser="asdf") + + +_parsers: Dict[str, Type[BaseExprVisitor]] = { + "python": PythonExprVisitor, + "pytables": pytables.PyTablesExprVisitor, + "pandas": PandasExprVisitor, +} + + +@pytest.mark.parametrize("engine", _engines) +@pytest.mark.parametrize("parser", _parsers) +def test_disallowed_nodes(engine, parser): + VisitorClass = _parsers[parser] + uns_ops = VisitorClass.unsupported_nodes + inst = VisitorClass("x + 1", engine, parser) + + for ops in uns_ops: + with pytest.raises(NotImplementedError): + getattr(inst, ops)() + + +def test_syntax_error_exprs(engine, parser): + e = "s +" + with pytest.raises(SyntaxError): + pd.eval(e, engine=engine, parser=parser) + + +def test_name_error_exprs(engine, parser): + e = "s + t" + with pytest.raises(NameError): + pd.eval(e, engine=engine, parser=parser) + + +def test_invalid_local_variable_reference(engine, parser): + a, b = 1, 2 # noqa + exprs = "a + @b", "@a + b", "@a + @b" + + for _expr in exprs: + if parser != "pandas": + with pytest.raises(SyntaxError, match="The '@' prefix is only"): + pd.eval(_expr, engine=engine, parser=parser) + else: + with pytest.raises(SyntaxError, match="The '@' prefix is not"): + pd.eval(_expr, engine=engine, parser=parser) + + +def test_numexpr_builtin_raises(engine, parser): + sin, dotted_line = 1, 2 + if engine == "numexpr": + msg = "Variables in expression .+" + with pytest.raises(NumExprClobberingError, match=msg): + pd.eval("sin + dotted_line", engine=engine, parser=parser) + else: + res = pd.eval("sin + dotted_line", engine=engine, parser=parser) + assert res == sin + dotted_line + + +def test_bad_resolver_raises(engine, parser): + cannot_resolve = 42, 3.0 + with pytest.raises(TypeError, match="Resolver of type .+"): + pd.eval("1 + 2", resolvers=cannot_resolve, engine=engine, parser=parser) + + +def test_empty_string_raises(engine, parser): + # GH 13139 + with pytest.raises(ValueError, match="expr cannot be an empty string"): + pd.eval("", engine=engine, parser=parser) + + +def test_more_than_one_expression_raises(engine, parser): + with pytest.raises(SyntaxError, match=("only a single expression is allowed")): + pd.eval("1 + 1; 2 + 2", engine=engine, parser=parser) + + +@pytest.mark.parametrize("cmp", ("and", "or")) +@pytest.mark.parametrize("lhs", (int, float)) +@pytest.mark.parametrize("rhs", (int, float)) +def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): + gen = {int: lambda: np.random.randint(10), float: np.random.randn} + + mid = gen[lhs]() # noqa + lhs = gen[lhs]() # noqa + rhs = gen[rhs]() # noqa + + ex1 = f"lhs {cmp} mid {cmp} rhs" + ex2 = f"lhs {cmp} mid and mid {cmp} rhs" + ex3 = f"(lhs {cmp} mid) & (mid {cmp} rhs)" + for ex in (ex1, ex2, ex3): + with pytest.raises(NotImplementedError): + pd.eval(ex, engine=engine, parser=parser) + + +@pytest.mark.parametrize( + "other", + [ + "'x'", + pytest.param( + "...", marks=pytest.mark.xfail(not compat.PY38, reason="GH-28116") + ), + ], +) +def test_equals_various(other): + df = DataFrame({"A": ["a", "b", "c"]}) + result = df.eval(f"A == {other}") + expected = Series([False, False, False], name="A") + if _USE_NUMEXPR: + # https://github.com/pandas-dev/pandas/issues/10239 + # lose name with numexpr engine. Remove when that's fixed. + expected.name = None + tm.assert_series_equal(result, expected) + + +def test_inf(engine, parser): + s = "inf + 1" + expected = np.inf + result = pd.eval(s, engine=engine, parser=parser) + assert result == expected + + +def test_truediv_deprecated(engine, parser): + # GH#29182 + match = "The `truediv` parameter in pd.eval is deprecated" + + with tm.assert_produces_warning(FutureWarning) as m: + pd.eval("1+1", engine=engine, parser=parser, truediv=True) + + assert len(m) == 1 + assert match in str(m[0].message) + + with tm.assert_produces_warning(FutureWarning) as m: + pd.eval("1+1", engine=engine, parser=parser, truediv=False) + + assert len(m) == 1 + assert match in str(m[0].message) + + +def test_negate_lt_eq_le(engine, parser): + df = pd.DataFrame([[0, 10], [1, 20]], columns=["cat", "count"]) + expected = df[~(df.cat > 0)] + + result = df.query("~(cat > 0)", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + if parser == "python": + with pytest.raises(NotImplementedError): + df.query("not (cat > 0)", engine=engine, parser=parser) + else: + result = df.query("not (cat > 0)", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + +class TestValidate: + def test_validate_bool_args(self): + invalid_values = [1, "True", [1, 2, 3], 5.0] + + for value in invalid_values: + with pytest.raises(ValueError): + pd.eval("2+2", inplace=value) diff --git a/pandas/tests/config/__init__.py b/pandas/tests/config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/config/test_config.py b/pandas/tests/config/test_config.py new file mode 100644 index 00000000..51640641 --- /dev/null +++ b/pandas/tests/config/test_config.py @@ -0,0 +1,470 @@ +import warnings + +import pytest + +from pandas._config import config as cf +from pandas._config.config import OptionError + +import pandas as pd + + +class TestConfig: + @classmethod + def setup_class(cls): + from copy import deepcopy + + cls.cf = cf + cls.gc = deepcopy(getattr(cls.cf, "_global_config")) + cls.do = deepcopy(getattr(cls.cf, "_deprecated_options")) + cls.ro = deepcopy(getattr(cls.cf, "_registered_options")) + + def setup_method(self, method): + setattr(self.cf, "_global_config", {}) + setattr(self.cf, "options", self.cf.DictWrapper(self.cf._global_config)) + setattr(self.cf, "_deprecated_options", {}) + setattr(self.cf, "_registered_options", {}) + + # Our test fixture in conftest.py sets "chained_assignment" + # to "raise" only after all test methods have been setup. + # However, after this setup, there is no longer any + # "chained_assignment" option, so re-register it. + self.cf.register_option("chained_assignment", "raise") + + def teardown_method(self, method): + setattr(self.cf, "_global_config", self.gc) + setattr(self.cf, "_deprecated_options", self.do) + setattr(self.cf, "_registered_options", self.ro) + + def test_api(self): + + # the pandas object exposes the user API + assert hasattr(pd, "get_option") + assert hasattr(pd, "set_option") + assert hasattr(pd, "reset_option") + assert hasattr(pd, "describe_option") + + def test_is_one_of_factory(self): + v = self.cf.is_one_of_factory([None, 12]) + + v(12) + v(None) + msg = r"Value must be one of None\|12" + with pytest.raises(ValueError, match=msg): + v(1.1) + + def test_register_option(self): + self.cf.register_option("a", 1, "doc") + + # can't register an already registered option + msg = "Option 'a' has already been registered" + with pytest.raises(OptionError, match=msg): + self.cf.register_option("a", 1, "doc") + + # can't register an already registered option + msg = "Path prefix to option 'a' is already an option" + with pytest.raises(OptionError, match=msg): + self.cf.register_option("a.b.c.d1", 1, "doc") + with pytest.raises(OptionError, match=msg): + self.cf.register_option("a.b.c.d2", 1, "doc") + + # no python keywords + msg = "for is a python keyword" + with pytest.raises(ValueError, match=msg): + self.cf.register_option("for", 0) + with pytest.raises(ValueError, match=msg): + self.cf.register_option("a.for.b", 0) + # must be valid identifier (ensure attribute access works) + msg = "oh my goddess! is not a valid identifier" + with pytest.raises(ValueError, match=msg): + self.cf.register_option("Oh my Goddess!", 0) + + # we can register options several levels deep + # without predefining the intermediate steps + # and we can define differently named options + # in the same namespace + self.cf.register_option("k.b.c.d1", 1, "doc") + self.cf.register_option("k.b.c.d2", 1, "doc") + + def test_describe_option(self): + self.cf.register_option("a", 1, "doc") + self.cf.register_option("b", 1, "doc2") + self.cf.deprecate_option("b") + + self.cf.register_option("c.d.e1", 1, "doc3") + self.cf.register_option("c.d.e2", 1, "doc4") + self.cf.register_option("f", 1) + self.cf.register_option("g.h", 1) + self.cf.register_option("k", 2) + self.cf.deprecate_option("g.h", rkey="k") + self.cf.register_option("l", "foo") + + # non-existent keys raise KeyError + msg = r"No such keys\(s\)" + with pytest.raises(OptionError, match=msg): + self.cf.describe_option("no.such.key") + + # we can get the description for any key we registered + assert "doc" in self.cf.describe_option("a", _print_desc=False) + assert "doc2" in self.cf.describe_option("b", _print_desc=False) + assert "precated" in self.cf.describe_option("b", _print_desc=False) + assert "doc3" in self.cf.describe_option("c.d.e1", _print_desc=False) + assert "doc4" in self.cf.describe_option("c.d.e2", _print_desc=False) + + # if no doc is specified we get a default message + # saying "description not available" + assert "vailable" in self.cf.describe_option("f", _print_desc=False) + assert "vailable" in self.cf.describe_option("g.h", _print_desc=False) + assert "precated" in self.cf.describe_option("g.h", _print_desc=False) + assert "k" in self.cf.describe_option("g.h", _print_desc=False) + + # default is reported + assert "foo" in self.cf.describe_option("l", _print_desc=False) + # current value is reported + assert "bar" not in self.cf.describe_option("l", _print_desc=False) + self.cf.set_option("l", "bar") + assert "bar" in self.cf.describe_option("l", _print_desc=False) + + def test_case_insensitive(self): + self.cf.register_option("KanBAN", 1, "doc") + + assert "doc" in self.cf.describe_option("kanbaN", _print_desc=False) + assert self.cf.get_option("kanBaN") == 1 + self.cf.set_option("KanBan", 2) + assert self.cf.get_option("kAnBaN") == 2 + + # gets of non-existent keys fail + msg = r"No such keys\(s\): 'no_such_option'" + with pytest.raises(OptionError, match=msg): + self.cf.get_option("no_such_option") + self.cf.deprecate_option("KanBan") + + assert self.cf._is_deprecated("kAnBaN") + + def test_get_option(self): + self.cf.register_option("a", 1, "doc") + self.cf.register_option("b.c", "hullo", "doc2") + self.cf.register_option("b.b", None, "doc2") + + # gets of existing keys succeed + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + assert self.cf.get_option("b.b") is None + + # gets of non-existent keys fail + msg = r"No such keys\(s\): 'no_such_option'" + with pytest.raises(OptionError, match=msg): + self.cf.get_option("no_such_option") + + def test_set_option(self): + self.cf.register_option("a", 1, "doc") + self.cf.register_option("b.c", "hullo", "doc2") + self.cf.register_option("b.b", None, "doc2") + + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + assert self.cf.get_option("b.b") is None + + self.cf.set_option("a", 2) + self.cf.set_option("b.c", "wurld") + self.cf.set_option("b.b", 1.1) + + assert self.cf.get_option("a") == 2 + assert self.cf.get_option("b.c") == "wurld" + assert self.cf.get_option("b.b") == 1.1 + + msg = r"No such keys\(s\): 'no.such.key'" + with pytest.raises(OptionError, match=msg): + self.cf.set_option("no.such.key", None) + + def test_set_option_empty_args(self): + msg = "Must provide an even number of non-keyword arguments" + with pytest.raises(ValueError, match=msg): + self.cf.set_option() + + def test_set_option_uneven_args(self): + msg = "Must provide an even number of non-keyword arguments" + with pytest.raises(ValueError, match=msg): + self.cf.set_option("a.b", 2, "b.c") + + def test_set_option_invalid_single_argument_type(self): + msg = "Must provide an even number of non-keyword arguments" + with pytest.raises(ValueError, match=msg): + self.cf.set_option(2) + + def test_set_option_multiple(self): + self.cf.register_option("a", 1, "doc") + self.cf.register_option("b.c", "hullo", "doc2") + self.cf.register_option("b.b", None, "doc2") + + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + assert self.cf.get_option("b.b") is None + + self.cf.set_option("a", "2", "b.c", None, "b.b", 10.0) + + assert self.cf.get_option("a") == "2" + assert self.cf.get_option("b.c") is None + assert self.cf.get_option("b.b") == 10.0 + + def test_validation(self): + self.cf.register_option("a", 1, "doc", validator=self.cf.is_int) + self.cf.register_option("d", 1, "doc", validator=self.cf.is_nonnegative_int) + self.cf.register_option("b.c", "hullo", "doc2", validator=self.cf.is_text) + + msg = "Value must have type ''" + with pytest.raises(ValueError, match=msg): + self.cf.register_option("a.b.c.d2", "NO", "doc", validator=self.cf.is_int) + + self.cf.set_option("a", 2) # int is_int + self.cf.set_option("b.c", "wurld") # str is_str + self.cf.set_option("d", 2) + self.cf.set_option("d", None) # non-negative int can be None + + # None not is_int + with pytest.raises(ValueError, match=msg): + self.cf.set_option("a", None) + with pytest.raises(ValueError, match=msg): + self.cf.set_option("a", "ab") + + msg = "Value must be a nonnegative integer or None" + with pytest.raises(ValueError, match=msg): + self.cf.register_option( + "a.b.c.d3", "NO", "doc", validator=self.cf.is_nonnegative_int + ) + with pytest.raises(ValueError, match=msg): + self.cf.register_option( + "a.b.c.d3", -2, "doc", validator=self.cf.is_nonnegative_int + ) + + msg = r"Value must be an instance of \|" + with pytest.raises(ValueError, match=msg): + self.cf.set_option("b.c", 1) + + validator = self.cf.is_one_of_factory([None, self.cf.is_callable]) + self.cf.register_option("b", lambda: None, "doc", validator=validator) + self.cf.set_option("b", "%.1f".format) # Formatter is callable + self.cf.set_option("b", None) # Formatter is none (default) + with pytest.raises(ValueError, match="Value must be a callable"): + self.cf.set_option("b", "%.1f") + + def test_reset_option(self): + self.cf.register_option("a", 1, "doc", validator=self.cf.is_int) + self.cf.register_option("b.c", "hullo", "doc2", validator=self.cf.is_str) + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + + self.cf.set_option("a", 2) + self.cf.set_option("b.c", "wurld") + assert self.cf.get_option("a") == 2 + assert self.cf.get_option("b.c") == "wurld" + + self.cf.reset_option("a") + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "wurld" + self.cf.reset_option("b.c") + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + + def test_reset_option_all(self): + self.cf.register_option("a", 1, "doc", validator=self.cf.is_int) + self.cf.register_option("b.c", "hullo", "doc2", validator=self.cf.is_str) + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + + self.cf.set_option("a", 2) + self.cf.set_option("b.c", "wurld") + assert self.cf.get_option("a") == 2 + assert self.cf.get_option("b.c") == "wurld" + + self.cf.reset_option("all") + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b.c") == "hullo" + + def test_deprecate_option(self): + # we can deprecate non-existent options + self.cf.deprecate_option("foo") + + assert self.cf._is_deprecated("foo") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + with pytest.raises(KeyError, match="No such keys.s.: 'foo'"): + self.cf.get_option("foo") + assert len(w) == 1 # should have raised one warning + assert "deprecated" in str(w[-1]) # we get the default message + + self.cf.register_option("a", 1, "doc", validator=self.cf.is_int) + self.cf.register_option("b.c", "hullo", "doc2") + self.cf.register_option("foo", "hullo", "doc2") + + self.cf.deprecate_option("a", removal_ver="nifty_ver") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + self.cf.get_option("a") + + assert len(w) == 1 # should have raised one warning + assert "eprecated" in str(w[-1]) # we get the default message + assert "nifty_ver" in str(w[-1]) # with the removal_ver quoted + + msg = "Option 'a' has already been defined as deprecated" + with pytest.raises(OptionError, match=msg): + self.cf.deprecate_option("a") + + self.cf.deprecate_option("b.c", "zounds!") + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + self.cf.get_option("b.c") + + assert len(w) == 1 # should have raised one warning + assert "zounds!" in str(w[-1]) # we get the custom message + + # test rerouting keys + self.cf.register_option("d.a", "foo", "doc2") + self.cf.register_option("d.dep", "bar", "doc2") + assert self.cf.get_option("d.a") == "foo" + assert self.cf.get_option("d.dep") == "bar" + + self.cf.deprecate_option("d.dep", rkey="d.a") # reroute d.dep to d.a + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert self.cf.get_option("d.dep") == "foo" + + assert len(w) == 1 # should have raised one warning + assert "eprecated" in str(w[-1]) # we get the custom message + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + self.cf.set_option("d.dep", "baz") # should overwrite "d.a" + + assert len(w) == 1 # should have raised one warning + assert "eprecated" in str(w[-1]) # we get the custom message + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + assert self.cf.get_option("d.dep") == "baz" + + assert len(w) == 1 # should have raised one warning + assert "eprecated" in str(w[-1]) # we get the custom message + + def test_config_prefix(self): + with self.cf.config_prefix("base"): + self.cf.register_option("a", 1, "doc1") + self.cf.register_option("b", 2, "doc2") + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b") == 2 + + self.cf.set_option("a", 3) + self.cf.set_option("b", 4) + assert self.cf.get_option("a") == 3 + assert self.cf.get_option("b") == 4 + + assert self.cf.get_option("base.a") == 3 + assert self.cf.get_option("base.b") == 4 + assert "doc1" in self.cf.describe_option("base.a", _print_desc=False) + assert "doc2" in self.cf.describe_option("base.b", _print_desc=False) + + self.cf.reset_option("base.a") + self.cf.reset_option("base.b") + + with self.cf.config_prefix("base"): + assert self.cf.get_option("a") == 1 + assert self.cf.get_option("b") == 2 + + def test_callback(self): + k = [None] + v = [None] + + def callback(key): + k.append(key) + v.append(self.cf.get_option(key)) + + self.cf.register_option("d.a", "foo", cb=callback) + self.cf.register_option("d.b", "foo", cb=callback) + + del k[-1], v[-1] + self.cf.set_option("d.a", "fooz") + assert k[-1] == "d.a" + assert v[-1] == "fooz" + + del k[-1], v[-1] + self.cf.set_option("d.b", "boo") + assert k[-1] == "d.b" + assert v[-1] == "boo" + + del k[-1], v[-1] + self.cf.reset_option("d.b") + assert k[-1] == "d.b" + + def test_set_ContextManager(self): + def eq(val): + assert self.cf.get_option("a") == val + + self.cf.register_option("a", 0) + eq(0) + with self.cf.option_context("a", 15): + eq(15) + with self.cf.option_context("a", 25): + eq(25) + eq(15) + eq(0) + + self.cf.set_option("a", 17) + eq(17) + + def test_attribute_access(self): + holder = [] + + def f3(key): + holder.append(True) + + self.cf.register_option("a", 0) + self.cf.register_option("c", 0, cb=f3) + options = self.cf.options + + assert options.a == 0 + with self.cf.option_context("a", 15): + assert options.a == 15 + + options.a = 500 + assert self.cf.get_option("a") == 500 + + self.cf.reset_option("a") + assert options.a == self.cf.get_option("a", 0) + + msg = "You can only set the value of existing options" + with pytest.raises(OptionError, match=msg): + options.b = 1 + with pytest.raises(OptionError, match=msg): + options.display = 1 + + # make sure callback kicks when using this form of setting + options.c = 1 + assert len(holder) == 1 + + def test_option_context_scope(self): + # Ensure that creating a context does not affect the existing + # environment as it is supposed to be used with the `with` statement. + # See https://github.com/pandas-dev/pandas/issues/8514 + + original_value = 60 + context_value = 10 + option_name = "a" + + self.cf.register_option(option_name, original_value) + + # Ensure creating contexts didn't affect the current context. + ctx = self.cf.option_context(option_name, context_value) + assert self.cf.get_option(option_name) == original_value + + # Ensure the correct value is available inside the context. + with ctx: + assert self.cf.get_option(option_name) == context_value + + # Ensure the current context is reset + assert self.cf.get_option(option_name) == original_value + + def test_dictwrapper_getattr(self): + options = self.cf.options + # GH 19789 + with pytest.raises(OptionError, match="No such option"): + options.bananas + assert not hasattr(options, "bananas") diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py new file mode 100644 index 00000000..e815a902 --- /dev/null +++ b/pandas/tests/config/test_localization.py @@ -0,0 +1,105 @@ +import codecs +import locale +import os + +import pytest + +from pandas._config.localization import can_set_locale, get_locales, set_locale + +from pandas.compat import is_platform_windows + +import pandas as pd + +_all_locales = get_locales() or [] +_current_locale = locale.getlocale() + +# Don't run any of these tests if we are on Windows or have no locales. +pytestmark = pytest.mark.skipif( + is_platform_windows() or not _all_locales, reason="Need non-Windows and locales" +) + +_skip_if_only_one_locale = pytest.mark.skipif( + len(_all_locales) <= 1, reason="Need multiple locales for meaningful test" +) + + +def test_can_set_locale_valid_set(): + # Can set the default locale. + assert can_set_locale("") + + +def test_can_set_locale_invalid_set(): + # Cannot set an invalid locale. + assert not can_set_locale("non-existent_locale") + + +def test_can_set_locale_invalid_get(monkeypatch): + # see GH#22129 + # In some cases, an invalid locale can be set, + # but a subsequent getlocale() raises a ValueError. + + def mock_get_locale(): + raise ValueError() + + with monkeypatch.context() as m: + m.setattr(locale, "getlocale", mock_get_locale) + assert not can_set_locale("") + + +def test_get_locales_at_least_one(): + # see GH#9744 + assert len(_all_locales) > 0 + + +@_skip_if_only_one_locale +def test_get_locales_prefix(): + first_locale = _all_locales[0] + assert len(get_locales(prefix=first_locale[:2])) > 0 + + +@_skip_if_only_one_locale +@pytest.mark.parametrize( + "lang,enc", + [ + ("it_CH", "UTF-8"), + ("en_US", "ascii"), + ("zh_CN", "GB2312"), + ("it_IT", "ISO-8859-1"), + ], +) +def test_set_locale(lang, enc): + if all(x is None for x in _current_locale): + # Not sure why, but on some Travis runs with pytest, + # getlocale() returned (None, None). + pytest.skip("Current locale is not set.") + + enc = codecs.lookup(enc).name + new_locale = lang, enc + + if not can_set_locale(new_locale): + msg = "unsupported locale setting" + + with pytest.raises(locale.Error, match=msg): + with set_locale(new_locale): + pass + else: + with set_locale(new_locale) as normalized_locale: + new_lang, new_enc = normalized_locale.split(".") + new_enc = codecs.lookup(enc).name + + normalized_locale = new_lang, new_enc + assert normalized_locale == new_locale + + # Once we exit the "with" statement, locale should be back to what it was. + current_locale = locale.getlocale() + assert current_locale == _current_locale + + +def test_encoding_detected(): + system_locale = os.environ.get("LC_ALL") + system_encoding = system_locale.split(".")[-1] if system_locale else "utf-8" + + assert ( + codecs.lookup(pd.options.display.encoding).name + == codecs.lookup(system_encoding).name + ) diff --git a/pandas/tests/dtypes/__init__.py b/pandas/tests/dtypes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/dtypes/cast/__init__.py b/pandas/tests/dtypes/cast/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/dtypes/cast/test_construct_from_scalar.py b/pandas/tests/dtypes/cast/test_construct_from_scalar.py new file mode 100644 index 00000000..cc823a3d --- /dev/null +++ b/pandas/tests/dtypes/cast/test_construct_from_scalar.py @@ -0,0 +1,20 @@ +from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar +from pandas.core.dtypes.dtypes import CategoricalDtype + +from pandas import Categorical +import pandas._testing as tm + + +def test_cast_1d_array_like_from_scalar_categorical(): + # see gh-19565 + # + # Categorical result from scalar did not maintain + # categories and ordering of the passed dtype. + cats = ["a", "b", "c"] + cat_type = CategoricalDtype(categories=cats, ordered=False) + expected = Categorical(["a", "a"], categories=cats) + + result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type) + tm.assert_categorical_equal( + result, expected, check_category_order=True, check_dtype=True + ) diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py new file mode 100644 index 00000000..fe271392 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -0,0 +1,21 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.cast import construct_1d_ndarray_preserving_na + +import pandas._testing as tm + + +@pytest.mark.parametrize( + "values, dtype, expected", + [ + ([1, 2, 3], None, np.array([1, 2, 3])), + (np.array([1, 2, 3]), None, np.array([1, 2, 3])), + (["1", "2", None], None, np.array(["1", "2", None])), + (["1", "2", None], np.dtype("str"), np.array(["1", "2", None])), + ([1, 2, None], np.dtype("str"), np.array(["1", "2", None])), + ], +) +def test_construct_1d_ndarray_preserving_na(values, dtype, expected): + result = construct_1d_ndarray_preserving_na(values, dtype=dtype) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/dtypes/cast/test_construct_object_arr.py b/pandas/tests/dtypes/cast/test_construct_object_arr.py new file mode 100644 index 00000000..cb44f91f --- /dev/null +++ b/pandas/tests/dtypes/cast/test_construct_object_arr.py @@ -0,0 +1,20 @@ +import pytest + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + + +@pytest.mark.parametrize("datum1", [1, 2.0, "3", (4, 5), [6, 7], None]) +@pytest.mark.parametrize("datum2", [8, 9.0, "10", (11, 12), [13, 14], None]) +def test_cast_1d_array(datum1, datum2): + data = [datum1, datum2] + result = construct_1d_object_array_from_listlike(data) + + # Direct comparison fails: https://github.com/numpy/numpy/issues/10218 + assert result.dtype == "object" + assert list(result) == data + + +@pytest.mark.parametrize("val", [1, 2.0, None]) +def test_cast_1d_array_invalid_scalar(val): + with pytest.raises(TypeError, match="has no len()"): + construct_1d_object_array_from_listlike(val) diff --git a/pandas/tests/dtypes/cast/test_convert_objects.py b/pandas/tests/dtypes/cast/test_convert_objects.py new file mode 100644 index 00000000..a28d554a --- /dev/null +++ b/pandas/tests/dtypes/cast/test_convert_objects.py @@ -0,0 +1,12 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_convert_objects + + +@pytest.mark.parametrize("data", [[1, 2], ["apply", "banana"]]) +def test_maybe_convert_objects_copy(data): + arr = np.array(data) + out = maybe_convert_objects(arr) + + assert arr is not out diff --git a/pandas/tests/dtypes/cast/test_downcast.py b/pandas/tests/dtypes/cast/test_downcast.py new file mode 100644 index 00000000..d6e6ed30 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_downcast.py @@ -0,0 +1,99 @@ +import decimal + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_downcast_to_dtype + +from pandas import DatetimeIndex, Series, Timestamp +import pandas._testing as tm + + +@pytest.mark.parametrize( + "arr,dtype,expected", + [ + ( + np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]), + "infer", + np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]), + ), + ( + np.array([8.0, 8.0, 8.0, 8.0, 8.9999999999995]), + "infer", + np.array([8, 8, 8, 8, 9], dtype=np.int64), + ), + ( + np.array([8.0, 8.0, 8.0, 8.0, 9.0000000000005]), + "infer", + np.array([8, 8, 8, 8, 9], dtype=np.int64), + ), + ( + # This is a judgement call, but we do _not_ downcast Decimal + # objects + np.array([decimal.Decimal(0.0)]), + "int64", + np.array([decimal.Decimal(0.0)]), + ), + ], +) +def test_downcast(arr, expected, dtype): + result = maybe_downcast_to_dtype(arr, dtype) + tm.assert_numpy_array_equal(result, expected) + + +def test_downcast_booleans(): + # see gh-16875: coercing of booleans. + ser = Series([True, True, False]) + result = maybe_downcast_to_dtype(ser, np.dtype(np.float64)) + + expected = ser + tm.assert_series_equal(result, expected) + + +def test_downcast_conversion_no_nan(any_real_dtype): + dtype = any_real_dtype + expected = np.array([1, 2]) + arr = np.array([1.0, 2.0], dtype=dtype) + + result = maybe_downcast_to_dtype(arr, "infer") + tm.assert_almost_equal(result, expected, check_dtype=False) + + +def test_downcast_conversion_nan(float_dtype): + dtype = float_dtype + data = [1.0, 2.0, np.nan] + + expected = np.array(data, dtype=dtype) + arr = np.array(data, dtype=dtype) + + result = maybe_downcast_to_dtype(arr, "infer") + tm.assert_almost_equal(result, expected) + + +def test_downcast_conversion_empty(any_real_dtype): + dtype = any_real_dtype + arr = np.array([], dtype=dtype) + result = maybe_downcast_to_dtype(arr, "int64") + tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) + + +@pytest.mark.parametrize("klass", [np.datetime64, np.timedelta64]) +def test_datetime_likes_nan(klass): + dtype = klass.__name__ + "[ns]" + arr = np.array([1, 2, np.nan]) + + exp = np.array([1, 2, klass("NaT")], dtype) + res = maybe_downcast_to_dtype(arr, dtype) + tm.assert_numpy_array_equal(res, exp) + + +@pytest.mark.parametrize("as_asi", [True, False]) +def test_datetime_with_timezone(as_asi): + # see gh-15426 + ts = Timestamp("2016-01-01 12:00:00", tz="US/Pacific") + exp = DatetimeIndex([ts, ts]) + + obj = exp.asi8 if as_asi else exp + res = maybe_downcast_to_dtype(obj, exp.dtype) + + tm.assert_index_equal(res, exp) diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py new file mode 100644 index 00000000..ac7a5221 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -0,0 +1,122 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.cast import find_common_type +from pandas.core.dtypes.dtypes import CategoricalDtype, DatetimeTZDtype, PeriodDtype + + +@pytest.mark.parametrize( + "source_dtypes,expected_common_dtype", + [ + ((np.int64,), np.int64), + ((np.uint64,), np.uint64), + ((np.float32,), np.float32), + ((np.object,), np.object), + # Into ints. + ((np.int16, np.int64), np.int64), + ((np.int32, np.uint32), np.int64), + ((np.uint16, np.uint64), np.uint64), + # Into floats. + ((np.float16, np.float32), np.float32), + ((np.float16, np.int16), np.float32), + ((np.float32, np.int16), np.float32), + ((np.uint64, np.int64), np.float64), + ((np.int16, np.float64), np.float64), + ((np.float16, np.int64), np.float64), + # Into others. + ((np.complex128, np.int32), np.complex128), + ((np.object, np.float32), np.object), + ((np.object, np.int16), np.object), + # Bool with int. + ((np.dtype("bool"), np.int64), np.object), + ((np.dtype("bool"), np.int32), np.object), + ((np.dtype("bool"), np.int16), np.object), + ((np.dtype("bool"), np.int8), np.object), + ((np.dtype("bool"), np.uint64), np.object), + ((np.dtype("bool"), np.uint32), np.object), + ((np.dtype("bool"), np.uint16), np.object), + ((np.dtype("bool"), np.uint8), np.object), + # Bool with float. + ((np.dtype("bool"), np.float64), np.object), + ((np.dtype("bool"), np.float32), np.object), + ( + (np.dtype("datetime64[ns]"), np.dtype("datetime64[ns]")), + np.dtype("datetime64[ns]"), + ), + ( + (np.dtype("timedelta64[ns]"), np.dtype("timedelta64[ns]")), + np.dtype("timedelta64[ns]"), + ), + ( + (np.dtype("datetime64[ns]"), np.dtype("datetime64[ms]")), + np.dtype("datetime64[ns]"), + ), + ( + (np.dtype("timedelta64[ms]"), np.dtype("timedelta64[ns]")), + np.dtype("timedelta64[ns]"), + ), + ((np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")), np.object), + ((np.dtype("datetime64[ns]"), np.int64), np.object), + ], +) +def test_numpy_dtypes(source_dtypes, expected_common_dtype): + assert find_common_type(source_dtypes) == expected_common_dtype + + +def test_raises_empty_input(): + with pytest.raises(ValueError, match="no types given"): + find_common_type([]) + + +@pytest.mark.parametrize( + "dtypes,exp_type", + [ + ([CategoricalDtype()], "category"), + ([np.object, CategoricalDtype()], np.object), + ([CategoricalDtype(), CategoricalDtype()], "category"), + ], +) +def test_categorical_dtype(dtypes, exp_type): + assert find_common_type(dtypes) == exp_type + + +def test_datetimetz_dtype_match(): + dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern") + assert find_common_type([dtype, dtype]) == "datetime64[ns, US/Eastern]" + + +@pytest.mark.parametrize( + "dtype2", + [ + DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"), + np.dtype("datetime64[ns]"), + np.object, + np.int64, + ], +) +def test_datetimetz_dtype_mismatch(dtype2): + dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern") + assert find_common_type([dtype, dtype2]) == np.object + assert find_common_type([dtype2, dtype]) == np.object + + +def test_period_dtype_match(): + dtype = PeriodDtype(freq="D") + assert find_common_type([dtype, dtype]) == "period[D]" + + +@pytest.mark.parametrize( + "dtype2", + [ + DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"), + PeriodDtype(freq="2D"), + PeriodDtype(freq="H"), + np.dtype("datetime64[ns]"), + np.object, + np.int64, + ], +) +def test_period_dtype_mismatch(dtype2): + dtype = PeriodDtype(freq="D") + assert find_common_type([dtype, dtype2]) == np.object + assert find_common_type([dtype2, dtype]) == np.object diff --git a/pandas/tests/dtypes/cast/test_infer_datetimelike.py b/pandas/tests/dtypes/cast/test_infer_datetimelike.py new file mode 100644 index 00000000..f4253e9d --- /dev/null +++ b/pandas/tests/dtypes/cast/test_infer_datetimelike.py @@ -0,0 +1,23 @@ +import numpy as np +import pytest + +from pandas import DataFrame, NaT, Series, Timestamp + + +@pytest.mark.parametrize( + "data,exp_size", + [ + # see gh-16362. + ([[NaT, "a", "b", 0], [NaT, "b", "c", 1]], 8), + ([[NaT, "a", 0], [NaT, "b", 1]], 6), + ], +) +def test_maybe_infer_to_datetimelike_df_construct(data, exp_size): + result = DataFrame(np.array(data)) + assert result.size == exp_size + + +def test_maybe_infer_to_datetimelike_ser_construct(): + # see gh-19671. + result = Series(["M1701", Timestamp("20130101")]) + assert result.dtype.kind == "O" diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py new file mode 100644 index 00000000..2744cfa8 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -0,0 +1,198 @@ +from datetime import date, datetime, timedelta + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import ( + cast_scalar_to_array, + infer_dtype_from_array, + infer_dtype_from_scalar, +) +from pandas.core.dtypes.common import is_dtype_equal + +from pandas import ( + Categorical, + Interval, + Period, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm + + +@pytest.fixture(params=[True, False]) +def pandas_dtype(request): + return request.param + + +def test_infer_dtype_from_int_scalar(any_int_dtype): + # Test that infer_dtype_from_scalar is + # returning correct dtype for int and float. + data = np.dtype(any_int_dtype).type(12) + dtype, val = infer_dtype_from_scalar(data) + assert dtype == type(data) + + +def test_infer_dtype_from_float_scalar(float_dtype): + float_dtype = np.dtype(float_dtype).type + data = float_dtype(12) + + dtype, val = infer_dtype_from_scalar(data) + assert dtype == float_dtype + + +@pytest.mark.parametrize("data,exp_dtype", [(12, np.int64), (np.float(12), np.float64)]) +def test_infer_dtype_from_python_scalar(data, exp_dtype): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == exp_dtype + + +@pytest.mark.parametrize("bool_val", [True, False]) +def test_infer_dtype_from_boolean(bool_val): + dtype, val = infer_dtype_from_scalar(bool_val) + assert dtype == np.bool_ + + +def test_infer_dtype_from_complex(complex_dtype): + data = np.dtype(complex_dtype).type(1) + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.complex_ + + +@pytest.mark.parametrize( + "data", [np.datetime64(1, "ns"), Timestamp(1), datetime(2000, 1, 1, 0, 0)] +) +def test_infer_dtype_from_datetime(data): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == "M8[ns]" + + +@pytest.mark.parametrize("data", [np.timedelta64(1, "ns"), Timedelta(1), timedelta(1)]) +def test_infer_dtype_from_timedelta(data): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == "m8[ns]" + + +@pytest.mark.parametrize("freq", ["M", "D"]) +def test_infer_dtype_from_period(freq, pandas_dtype): + p = Period("2011-01-01", freq=freq) + dtype, val = infer_dtype_from_scalar(p, pandas_dtype=pandas_dtype) + + if pandas_dtype: + exp_dtype = f"period[{freq}]" + exp_val = p.ordinal + else: + exp_dtype = np.object_ + exp_val = p + + assert dtype == exp_dtype + assert val == exp_val + + +@pytest.mark.parametrize( + "data", [date(2000, 1, 1), "foo", Timestamp(1, tz="US/Eastern")] +) +def test_infer_dtype_misc(data): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.object_ + + +@pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo"]) +def test_infer_from_scalar_tz(tz, pandas_dtype): + dt = Timestamp(1, tz=tz) + dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=pandas_dtype) + + if pandas_dtype: + exp_dtype = f"datetime64[ns, {tz}]" + exp_val = dt.value + else: + exp_dtype = np.object_ + exp_val = dt + + assert dtype == exp_dtype + assert val == exp_val + + +@pytest.mark.parametrize( + "left, right, subtype", + [ + (0, 1, "int64"), + (0.0, 1.0, "float64"), + (Timestamp(0), Timestamp(1), "datetime64[ns]"), + (Timestamp(0, tz="UTC"), Timestamp(1, tz="UTC"), "datetime64[ns, UTC]"), + (Timedelta(0), Timedelta(1), "timedelta64[ns]"), + ], +) +def test_infer_from_interval(left, right, subtype, closed, pandas_dtype): + # GH 30337 + interval = Interval(left, right, closed) + result_dtype, result_value = infer_dtype_from_scalar(interval, pandas_dtype) + expected_dtype = f"interval[{subtype}]" if pandas_dtype else np.object_ + assert result_dtype == expected_dtype + assert result_value == interval + + +def test_infer_dtype_from_scalar_errors(): + msg = "invalid ndarray passed to infer_dtype_from_scalar" + + with pytest.raises(ValueError, match=msg): + infer_dtype_from_scalar(np.array([1])) + + +@pytest.mark.parametrize( + "arr, expected, pandas_dtype", + [ + ("foo", np.object_, False), + (b"foo", np.object_, False), + (1, np.int_, False), + (1.5, np.float_, False), + ([1], np.int_, False), + (np.array([1], dtype=np.int64), np.int64, False), + ([np.nan, 1, ""], np.object_, False), + (np.array([[1.0, 2.0]]), np.float_, False), + (Categorical(list("aabc")), np.object_, False), + (Categorical([1, 2, 3]), np.int64, False), + (Categorical(list("aabc")), "category", True), + (Categorical([1, 2, 3]), "category", True), + (Timestamp("20160101"), np.object_, False), + (np.datetime64("2016-01-01"), np.dtype("=M8[D]"), False), + (date_range("20160101", periods=3), np.dtype("=M8[ns]"), False), + ( + date_range("20160101", periods=3, tz="US/Eastern"), + "datetime64[ns, US/Eastern]", + True, + ), + (Series([1.0, 2, 3]), np.float64, False), + (Series(list("abc")), np.object_, False), + ( + Series(date_range("20160101", periods=3, tz="US/Eastern")), + "datetime64[ns, US/Eastern]", + True, + ), + ], +) +def test_infer_dtype_from_array(arr, expected, pandas_dtype): + dtype, _ = infer_dtype_from_array(arr, pandas_dtype=pandas_dtype) + assert is_dtype_equal(dtype, expected) + + +@pytest.mark.parametrize( + "obj,dtype", + [ + (1, np.int64), + (1.1, np.float64), + (Timestamp("2011-01-01"), "datetime64[ns]"), + (Timestamp("2011-01-01", tz="US/Eastern"), np.object), + (Period("2011-01-01", freq="D"), np.object), + ], +) +def test_cast_scalar_to_array(obj, dtype): + shape = (3, 2) + + exp = np.empty(shape, dtype=dtype) + exp.fill(obj) + + arr = cast_scalar_to_array(shape, obj, dtype=dtype) + tm.assert_numpy_array_equal(arr, exp) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py new file mode 100644 index 00000000..69f8f463 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -0,0 +1,631 @@ +""" +These test the method maybe_promote from core/dtypes/cast.py +""" + +import datetime + +import numpy as np +import pytest + +from pandas._libs.tslibs import NaT + +from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.common import ( + is_complex_dtype, + is_datetime64_dtype, + is_datetime_or_timedelta_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_scalar, + is_timedelta64_dtype, +) +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import isna + +import pandas as pd + + +@pytest.fixture( + params=[ + bool, + "uint8", + "int32", + "uint64", + "float32", + "float64", + "complex64", + "complex128", + "M8[ns]", + "m8[ns]", + str, + bytes, + object, + ] +) +def any_numpy_dtype_reduced(request): + """ + Parameterized fixture for numpy dtypes, reduced from any_numpy_dtype. + + * bool + * 'int32' + * 'uint64' + * 'float32' + * 'float64' + * 'complex64' + * 'complex128' + * 'M8[ns]' + * 'M8[ns]' + * str + * bytes + * object + """ + return request.param + + +def _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar=None): + """ + Auxiliary function to unify testing of scalar/array promotion. + + Parameters + ---------- + dtype : dtype + The value to pass on as the first argument to maybe_promote. + fill_value : scalar + The value to pass on as the second argument to maybe_promote as + a scalar. + expected_dtype : dtype + The expected dtype returned by maybe_promote (by design this is the + same regardless of whether fill_value was passed as a scalar or in an + array!). + exp_val_for_scalar : scalar + The expected value for the (potentially upcast) fill_value returned by + maybe_promote. + """ + assert is_scalar(fill_value) + + # here, we pass on fill_value as a scalar directly; the expected value + # returned from maybe_promote is fill_value, potentially upcast to the + # returned dtype. + result_dtype, result_fill_value = maybe_promote(dtype, fill_value) + expected_fill_value = exp_val_for_scalar + + assert result_dtype == expected_dtype + _assert_match(result_fill_value, expected_fill_value) + + +def _assert_match(result_fill_value, expected_fill_value): + # GH#23982/25425 require the same type in addition to equality/NA-ness + res_type = type(result_fill_value) + ex_type = type(expected_fill_value) + if res_type.__name__ == "uint64": + # No idea why, but these (sometimes) do not compare as equal + assert ex_type.__name__ == "uint64" + elif res_type.__name__ == "ulonglong": + # On some builds we get this instead of np.uint64 + # Note: cant check res_type.dtype.itemsize directly on numpy 1.18 + assert res_type(0).itemsize == 8 + assert ex_type == res_type or ex_type == np.uint64 + else: + # On some builds, type comparison fails, e.g. np.int32 != np.int32 + assert res_type == ex_type or res_type.__name__ == ex_type.__name__ + + match_value = result_fill_value == expected_fill_value + + # Note: type check above ensures that we have the _same_ NA value + # for missing values, None == None (which is checked + # through match_value above), but np.nan != np.nan and pd.NaT != pd.NaT + match_missing = isna(result_fill_value) and isna(expected_fill_value) + + assert match_value or match_missing + + +@pytest.mark.parametrize( + "dtype, fill_value, expected_dtype", + [ + # size 8 + ("int8", 1, "int8"), + ("int8", np.iinfo("int8").max + 1, "int16"), + ("int8", np.iinfo("int16").max + 1, "int32"), + ("int8", np.iinfo("int32").max + 1, "int64"), + ("int8", np.iinfo("int64").max + 1, "object"), + ("int8", -1, "int8"), + ("int8", np.iinfo("int8").min - 1, "int16"), + ("int8", np.iinfo("int16").min - 1, "int32"), + ("int8", np.iinfo("int32").min - 1, "int64"), + ("int8", np.iinfo("int64").min - 1, "object"), + # keep signed-ness as long as possible + ("uint8", 1, "uint8"), + ("uint8", np.iinfo("int8").max + 1, "uint8"), + ("uint8", np.iinfo("uint8").max + 1, "uint16"), + ("uint8", np.iinfo("int16").max + 1, "uint16"), + ("uint8", np.iinfo("uint16").max + 1, "uint32"), + ("uint8", np.iinfo("int32").max + 1, "uint32"), + ("uint8", np.iinfo("uint32").max + 1, "uint64"), + ("uint8", np.iinfo("int64").max + 1, "uint64"), + ("uint8", np.iinfo("uint64").max + 1, "object"), + # max of uint8 cannot be contained in int8 + ("uint8", -1, "int16"), + ("uint8", np.iinfo("int8").min - 1, "int16"), + ("uint8", np.iinfo("int16").min - 1, "int32"), + ("uint8", np.iinfo("int32").min - 1, "int64"), + ("uint8", np.iinfo("int64").min - 1, "object"), + # size 16 + ("int16", 1, "int16"), + ("int16", np.iinfo("int8").max + 1, "int16"), + ("int16", np.iinfo("int16").max + 1, "int32"), + ("int16", np.iinfo("int32").max + 1, "int64"), + ("int16", np.iinfo("int64").max + 1, "object"), + ("int16", -1, "int16"), + ("int16", np.iinfo("int8").min - 1, "int16"), + ("int16", np.iinfo("int16").min - 1, "int32"), + ("int16", np.iinfo("int32").min - 1, "int64"), + ("int16", np.iinfo("int64").min - 1, "object"), + ("uint16", 1, "uint16"), + ("uint16", np.iinfo("int8").max + 1, "uint16"), + ("uint16", np.iinfo("uint8").max + 1, "uint16"), + ("uint16", np.iinfo("int16").max + 1, "uint16"), + ("uint16", np.iinfo("uint16").max + 1, "uint32"), + ("uint16", np.iinfo("int32").max + 1, "uint32"), + ("uint16", np.iinfo("uint32").max + 1, "uint64"), + ("uint16", np.iinfo("int64").max + 1, "uint64"), + ("uint16", np.iinfo("uint64").max + 1, "object"), + ("uint16", -1, "int32"), + ("uint16", np.iinfo("int8").min - 1, "int32"), + ("uint16", np.iinfo("int16").min - 1, "int32"), + ("uint16", np.iinfo("int32").min - 1, "int64"), + ("uint16", np.iinfo("int64").min - 1, "object"), + # size 32 + ("int32", 1, "int32"), + ("int32", np.iinfo("int8").max + 1, "int32"), + ("int32", np.iinfo("int16").max + 1, "int32"), + ("int32", np.iinfo("int32").max + 1, "int64"), + ("int32", np.iinfo("int64").max + 1, "object"), + ("int32", -1, "int32"), + ("int32", np.iinfo("int8").min - 1, "int32"), + ("int32", np.iinfo("int16").min - 1, "int32"), + ("int32", np.iinfo("int32").min - 1, "int64"), + ("int32", np.iinfo("int64").min - 1, "object"), + ("uint32", 1, "uint32"), + ("uint32", np.iinfo("int8").max + 1, "uint32"), + ("uint32", np.iinfo("uint8").max + 1, "uint32"), + ("uint32", np.iinfo("int16").max + 1, "uint32"), + ("uint32", np.iinfo("uint16").max + 1, "uint32"), + ("uint32", np.iinfo("int32").max + 1, "uint32"), + ("uint32", np.iinfo("uint32").max + 1, "uint64"), + ("uint32", np.iinfo("int64").max + 1, "uint64"), + ("uint32", np.iinfo("uint64").max + 1, "object"), + ("uint32", -1, "int64"), + ("uint32", np.iinfo("int8").min - 1, "int64"), + ("uint32", np.iinfo("int16").min - 1, "int64"), + ("uint32", np.iinfo("int32").min - 1, "int64"), + ("uint32", np.iinfo("int64").min - 1, "object"), + # size 64 + ("int64", 1, "int64"), + ("int64", np.iinfo("int8").max + 1, "int64"), + ("int64", np.iinfo("int16").max + 1, "int64"), + ("int64", np.iinfo("int32").max + 1, "int64"), + ("int64", np.iinfo("int64").max + 1, "object"), + ("int64", -1, "int64"), + ("int64", np.iinfo("int8").min - 1, "int64"), + ("int64", np.iinfo("int16").min - 1, "int64"), + ("int64", np.iinfo("int32").min - 1, "int64"), + ("int64", np.iinfo("int64").min - 1, "object"), + ("uint64", 1, "uint64"), + ("uint64", np.iinfo("int8").max + 1, "uint64"), + ("uint64", np.iinfo("uint8").max + 1, "uint64"), + ("uint64", np.iinfo("int16").max + 1, "uint64"), + ("uint64", np.iinfo("uint16").max + 1, "uint64"), + ("uint64", np.iinfo("int32").max + 1, "uint64"), + ("uint64", np.iinfo("uint32").max + 1, "uint64"), + ("uint64", np.iinfo("int64").max + 1, "uint64"), + ("uint64", np.iinfo("uint64").max + 1, "object"), + ("uint64", -1, "object"), + ("uint64", np.iinfo("int8").min - 1, "object"), + ("uint64", np.iinfo("int16").min - 1, "object"), + ("uint64", np.iinfo("int32").min - 1, "object"), + ("uint64", np.iinfo("int64").min - 1, "object"), + ], +) +def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype): + dtype = np.dtype(dtype) + expected_dtype = np.dtype(expected_dtype) + + # output is not a generic int, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_int_with_float(any_int_dtype, float_dtype): + dtype = np.dtype(any_int_dtype) + fill_dtype = np.dtype(float_dtype) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling int with float always upcasts to float64 + expected_dtype = np.float64 + # fill_value can be different float type + exp_val_for_scalar = np.float64(fill_value) + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_float_with_int(float_dtype, any_int_dtype): + + dtype = np.dtype(float_dtype) + fill_dtype = np.dtype(any_int_dtype) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling float with int always keeps float dtype + # because: np.finfo('float32').max > np.iinfo('uint64').max + expected_dtype = dtype + # output is not a generic float, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize( + "dtype, fill_value, expected_dtype", + [ + # float filled with float + ("float32", 1, "float32"), + ("float32", np.finfo("float32").max * 1.1, "float64"), + ("float64", 1, "float64"), + ("float64", np.finfo("float32").max * 1.1, "float64"), + # complex filled with float + ("complex64", 1, "complex64"), + ("complex64", np.finfo("float32").max * 1.1, "complex128"), + ("complex128", 1, "complex128"), + ("complex128", np.finfo("float32").max * 1.1, "complex128"), + # float filled with complex + ("float32", 1 + 1j, "complex64"), + ("float32", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("float64", 1 + 1j, "complex128"), + ("float64", np.finfo("float32").max * (1.1 + 1j), "complex128"), + # complex filled with complex + ("complex64", 1 + 1j, "complex64"), + ("complex64", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("complex128", 1 + 1j, "complex128"), + ("complex128", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ], +) +def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype): + + dtype = np.dtype(dtype) + expected_dtype = np.dtype(expected_dtype) + + # output is not a generic float, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_bool_with_any(any_numpy_dtype_reduced): + dtype = np.dtype(bool) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling bool with anything but bool casts to object + expected_dtype = np.dtype(object) if fill_dtype != bool else fill_dtype + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_with_bool(any_numpy_dtype_reduced): + dtype = np.dtype(any_numpy_dtype_reduced) + fill_value = True + + # filling anything but bool with bool casts to object + expected_dtype = np.dtype(object) if dtype != bool else dtype + # output is not a generic bool, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(bytes_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # we never use bytes dtype internally, always promote to object + expected_dtype = np.dtype(np.object_) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_with_bytes(any_numpy_dtype_reduced, bytes_dtype): + dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype + fill_value = b"abc" + + # we never use bytes dtype internally, always promote to object + expected_dtype = np.dtype(np.object_) + # output is not a generic bytes, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_datetime64_with_any(datetime64_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(datetime64_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling datetime with anything but datetime casts to object + if is_datetime64_dtype(fill_dtype): + expected_dtype = dtype + # for datetime dtypes, scalar values get cast to to_datetime64 + exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize( + "fill_value", + [ + pd.Timestamp("now"), + np.datetime64("now"), + datetime.datetime.now(), + datetime.date.today(), + ], + ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"], +) +def test_maybe_promote_any_with_datetime64( + any_numpy_dtype_reduced, datetime64_dtype, fill_value +): + dtype = np.dtype(any_numpy_dtype_reduced) + + # filling datetime with anything but datetime casts to object + if is_datetime64_dtype(dtype): + expected_dtype = dtype + # for datetime dtypes, scalar values get cast to pd.Timestamp.value + exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_datetimetz_with_any_numpy_dtype( + tz_aware_fixture, any_numpy_dtype_reduced +): + dtype = DatetimeTZDtype(tz=tz_aware_fixture) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling datetimetz with any numpy dtype casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_datetimetz_with_datetimetz(tz_aware_fixture, tz_aware_fixture2): + dtype = DatetimeTZDtype(tz=tz_aware_fixture) + fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture2) + + # create array of given dtype; casts "1" to correct dtype + fill_value = pd.Series([10 ** 9], dtype=fill_dtype)[0] + + # filling datetimetz with datetimetz casts to object, unless tz matches + exp_val_for_scalar = fill_value + if dtype.tz == fill_dtype.tz: + expected_dtype = dtype + else: + expected_dtype = np.dtype(object) + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize("fill_value", [None, np.nan, NaT]) +def test_maybe_promote_datetimetz_with_na(tz_aware_fixture, fill_value): + + dtype = DatetimeTZDtype(tz=tz_aware_fixture) + + expected_dtype = dtype + exp_val_for_scalar = NaT + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize( + "fill_value", + [ + pd.Timestamp("now"), + np.datetime64("now"), + datetime.datetime.now(), + datetime.date.today(), + ], + ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"], +) +def test_maybe_promote_any_numpy_dtype_with_datetimetz( + any_numpy_dtype_reduced, tz_aware_fixture, fill_value +): + dtype = np.dtype(any_numpy_dtype_reduced) + fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture) + + fill_value = pd.Series([fill_value], dtype=fill_dtype)[0] + + # filling any numpy dtype with datetimetz casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(timedelta64_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling timedelta with anything but timedelta casts to object + if is_timedelta64_dtype(fill_dtype): + expected_dtype = dtype + # for timedelta dtypes, scalar values get cast to pd.Timedelta.value + exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize( + "fill_value", + [pd.Timedelta(days=1), np.timedelta64(24, "h"), datetime.timedelta(1)], + ids=["pd.Timedelta", "np.timedelta64", "datetime.timedelta"], +) +def test_maybe_promote_any_with_timedelta64( + any_numpy_dtype_reduced, timedelta64_dtype, fill_value +): + dtype = np.dtype(any_numpy_dtype_reduced) + + # filling anything but timedelta with timedelta casts to object + if is_timedelta64_dtype(dtype): + expected_dtype = dtype + # for timedelta dtypes, scalar values get cast to pd.Timedelta.value + exp_val_for_scalar = pd.Timedelta(fill_value).to_timedelta64() + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(string_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling string with anything casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_with_string(any_numpy_dtype_reduced, string_dtype): + dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype + fill_value = "abc" + + # filling anything with a string casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_object_with_any(object_dtype, any_numpy_dtype_reduced): + dtype = np.dtype(object_dtype) + fill_dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling object with anything stays object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +def test_maybe_promote_any_with_object(any_numpy_dtype_reduced, object_dtype): + dtype = np.dtype(any_numpy_dtype_reduced) + + # create array of object dtype from a scalar value (i.e. passing + # dtypes.common.is_scalar), which can however not be cast to int/float etc. + fill_value = pd.DateOffset(1) + + # filling object with anything stays object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize("fill_value", [None, np.nan, NaT]) +def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype_reduced, fill_value): + dtype = np.dtype(any_numpy_dtype_reduced) + + if is_integer_dtype(dtype) and fill_value is not NaT: + # integer + other missing value (np.nan / None) casts to float + expected_dtype = np.float64 + exp_val_for_scalar = np.nan + elif is_object_dtype(dtype) and fill_value is NaT: + # inserting into object does not cast the value + # but *does* cast None to np.nan + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + elif is_datetime_or_timedelta_dtype(dtype): + # datetime / timedelta cast all missing values to dtyped-NaT + expected_dtype = dtype + exp_val_for_scalar = dtype.type("NaT", "ns") + elif fill_value is NaT: + # NaT upcasts everything that's not datetime/timedelta to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = NaT + elif is_float_dtype(dtype) or is_complex_dtype(dtype): + # float / complex + missing value (!= NaT) stays the same + expected_dtype = dtype + exp_val_for_scalar = np.nan + else: + # all other cases cast to object, and use np.nan as missing value + expected_dtype = np.dtype(object) + exp_val_for_scalar = np.nan + + _check_promote(dtype, fill_value, expected_dtype, exp_val_for_scalar) + + +@pytest.mark.parametrize("dim", [0, 2, 3]) +def test_maybe_promote_dimensions(any_numpy_dtype_reduced, dim): + dtype = np.dtype(any_numpy_dtype_reduced) + + # create 0-dim array of given dtype; casts "1" to correct dtype + fill_array = np.array(1, dtype=dtype) + + # expand to desired dimension: + for _ in range(dim): + fill_array = np.expand_dims(fill_array, 0) + + if dtype != object: + # test against 1-dimensional case + with pytest.raises(ValueError, match="fill_value must be a scalar"): + maybe_promote(dtype, np.array([1], dtype=dtype)) + + with pytest.raises(ValueError, match="fill_value must be a scalar"): + maybe_promote(dtype, fill_array) + + else: + expected_dtype, expected_missing_value = maybe_promote( + dtype, np.array([1], dtype=dtype) + ) + result_dtype, result_missing_value = maybe_promote(dtype, fill_array) + assert result_dtype == expected_dtype + _assert_match(result_missing_value, expected_missing_value) diff --git a/pandas/tests/dtypes/cast/test_upcast.py b/pandas/tests/dtypes/cast/test_upcast.py new file mode 100644 index 00000000..bb7a7d05 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_upcast.py @@ -0,0 +1,71 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_upcast_putmask + +from pandas import Series +import pandas._testing as tm + + +@pytest.mark.parametrize("result", [Series([10, 11, 12]), [10, 11, 12], (10, 11, 12)]) +def test_upcast_error(result): + # GH23823 require result arg to be ndarray + mask = np.array([False, True, False]) + other = np.array([61, 62, 63]) + with pytest.raises(ValueError): + result, _ = maybe_upcast_putmask(result, mask, other) + + +@pytest.mark.parametrize( + "arr, other", + [ + (np.arange(1, 6), np.array([61, 62, 63])), + (np.arange(1, 6), np.array([61.1, 62.2, 63.3])), + (np.arange(10, 15), np.array([61, 62])), + (np.arange(10, 15), np.array([61, np.nan])), + ( + np.arange("2019-01-01", "2019-01-06", dtype="datetime64[D]"), + np.arange("2018-01-01", "2018-01-04", dtype="datetime64[D]"), + ), + ( + np.arange("2019-01-01", "2019-01-06", dtype="datetime64[D]"), + np.arange("2018-01-01", "2018-01-03", dtype="datetime64[D]"), + ), + ], +) +def test_upcast_scalar_other(arr, other): + # for now we do not support non-scalar `other` + mask = np.array([False, True, False, True, True]) + with pytest.raises(ValueError, match="other must be a scalar"): + maybe_upcast_putmask(arr, mask, other) + + +def test_upcast(): + # GH23823 + arr = np.arange(1, 6) + mask = np.array([False, True, False, True, True]) + result, changed = maybe_upcast_putmask(arr, mask, other=np.nan) + + expected = np.array([1, np.nan, 3, np.nan, np.nan]) + assert changed + tm.assert_numpy_array_equal(result, expected) + + +def test_upcast_datetime(): + # GH23823 + arr = np.arange("2019-01-01", "2019-01-06", dtype="datetime64[D]") + mask = np.array([False, True, False, True, True]) + result, changed = maybe_upcast_putmask(arr, mask, other=np.nan) + + expected = np.array( + [ + "2019-01-01", + np.datetime64("NaT"), + "2019-01-03", + np.datetime64("NaT"), + np.datetime64("NaT"), + ], + dtype="datetime64[D]", + ) + assert not changed + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py new file mode 100644 index 00000000..4c917b9b --- /dev/null +++ b/pandas/tests/dtypes/test_common.py @@ -0,0 +1,757 @@ +from datetime import datetime +from typing import List + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.cast import astype_nansafe +import pandas.core.dtypes.common as com +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, + CategoricalDtypeType, + DatetimeTZDtype, + IntervalDtype, + PeriodDtype, +) +from pandas.core.dtypes.missing import isna + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import SparseArray +from pandas.conftest import ( + ALL_EA_INT_DTYPES, + ALL_INT_DTYPES, + SIGNED_EA_INT_DTYPES, + SIGNED_INT_DTYPES, + UNSIGNED_EA_INT_DTYPES, + UNSIGNED_INT_DTYPES, +) + + +# EA & Actual Dtypes +def to_ea_dtypes(dtypes): + """ convert list of string dtypes to EA dtype """ + return [getattr(pd, dt + "Dtype") for dt in dtypes] + + +def to_numpy_dtypes(dtypes): + """ convert list of string dtypes to numpy dtype """ + return [getattr(np, dt) for dt in dtypes if isinstance(dt, str)] + + +class TestPandasDtype: + + # Passing invalid dtype, both as a string or object, must raise TypeError + # Per issue GH15520 + @pytest.mark.parametrize("box", [pd.Timestamp, "pd.Timestamp", list]) + def test_invalid_dtype_error(self, box): + with pytest.raises(TypeError, match="not understood"): + com.pandas_dtype(box) + + @pytest.mark.parametrize( + "dtype", + [ + object, + "float64", + np.object_, + np.dtype("object"), + "O", + np.float64, + float, + np.dtype("float64"), + ], + ) + def test_pandas_dtype_valid(self, dtype): + assert com.pandas_dtype(dtype) == dtype + + @pytest.mark.parametrize( + "dtype", ["M8[ns]", "m8[ns]", "object", "float64", "int64"] + ) + def test_numpy_dtype(self, dtype): + assert com.pandas_dtype(dtype) == np.dtype(dtype) + + def test_numpy_string_dtype(self): + # do not parse freq-like string as period dtype + assert com.pandas_dtype("U") == np.dtype("U") + assert com.pandas_dtype("S") == np.dtype("S") + + @pytest.mark.parametrize( + "dtype", + [ + "datetime64[ns, US/Eastern]", + "datetime64[ns, Asia/Tokyo]", + "datetime64[ns, UTC]", + ], + ) + def test_datetimetz_dtype(self, dtype): + assert com.pandas_dtype(dtype) == DatetimeTZDtype.construct_from_string(dtype) + assert com.pandas_dtype(dtype) == dtype + + def test_categorical_dtype(self): + assert com.pandas_dtype("category") == CategoricalDtype() + + @pytest.mark.parametrize( + "dtype", + [ + "period[D]", + "period[3M]", + "period[U]", + "Period[D]", + "Period[3M]", + "Period[U]", + ], + ) + def test_period_dtype(self, dtype): + assert com.pandas_dtype(dtype) is PeriodDtype(dtype) + assert com.pandas_dtype(dtype) == PeriodDtype(dtype) + assert com.pandas_dtype(dtype) == dtype + + +dtypes = dict( + datetime_tz=com.pandas_dtype("datetime64[ns, US/Eastern]"), + datetime=com.pandas_dtype("datetime64[ns]"), + timedelta=com.pandas_dtype("timedelta64[ns]"), + period=PeriodDtype("D"), + integer=np.dtype(np.int64), + float=np.dtype(np.float64), + object=np.dtype(np.object), + category=com.pandas_dtype("category"), +) + + +@pytest.mark.parametrize("name1,dtype1", list(dtypes.items()), ids=lambda x: str(x)) +@pytest.mark.parametrize("name2,dtype2", list(dtypes.items()), ids=lambda x: str(x)) +def test_dtype_equal(name1, dtype1, name2, dtype2): + + # match equal to self, but not equal to other + assert com.is_dtype_equal(dtype1, dtype1) + if name1 != name2: + assert not com.is_dtype_equal(dtype1, dtype2) + + +@pytest.mark.parametrize( + "dtype1,dtype2", + [ + (np.int8, np.int64), + (np.int16, np.int64), + (np.int32, np.int64), + (np.float32, np.float64), + (PeriodDtype("D"), PeriodDtype("2D")), # PeriodType + ( + com.pandas_dtype("datetime64[ns, US/Eastern]"), + com.pandas_dtype("datetime64[ns, CET]"), + ), # Datetime + (None, None), # gh-15941: no exception should be raised. + ], +) +def test_dtype_equal_strict(dtype1, dtype2): + assert not com.is_dtype_equal(dtype1, dtype2) + + +def get_is_dtype_funcs(): + """ + Get all functions in pandas.core.dtypes.common that + begin with 'is_' and end with 'dtype' + + """ + + fnames = [f for f in dir(com) if (f.startswith("is_") and f.endswith("dtype"))] + return [getattr(com, fname) for fname in fnames] + + +@pytest.mark.parametrize("func", get_is_dtype_funcs(), ids=lambda x: x.__name__) +def test_get_dtype_error_catch(func): + # see gh-15941 + # + # No exception should be raised. + + assert not func(None) + + +def test_is_object(): + assert com.is_object_dtype(object) + assert com.is_object_dtype(np.array([], dtype=object)) + + assert not com.is_object_dtype(int) + assert not com.is_object_dtype(np.array([], dtype=int)) + assert not com.is_object_dtype([1, 2, 3]) + + +@pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] +) +def test_is_sparse(check_scipy): + assert com.is_sparse(SparseArray([1, 2, 3])) + + assert not com.is_sparse(np.array([1, 2, 3])) + + if check_scipy: + import scipy.sparse + + assert not com.is_sparse(scipy.sparse.bsr_matrix([1, 2, 3])) + + +@td.skip_if_no_scipy +def test_is_scipy_sparse(): + from scipy.sparse import bsr_matrix + + assert com.is_scipy_sparse(bsr_matrix([1, 2, 3])) + + assert not com.is_scipy_sparse(SparseArray([1, 2, 3])) + + +def test_is_categorical(): + cat = pd.Categorical([1, 2, 3]) + assert com.is_categorical(cat) + assert com.is_categorical(pd.Series(cat)) + assert com.is_categorical(pd.CategoricalIndex([1, 2, 3])) + + assert not com.is_categorical([1, 2, 3]) + + +def test_is_datetime64_dtype(): + assert not com.is_datetime64_dtype(object) + assert not com.is_datetime64_dtype([1, 2, 3]) + assert not com.is_datetime64_dtype(np.array([], dtype=int)) + + assert com.is_datetime64_dtype(np.datetime64) + assert com.is_datetime64_dtype(np.array([], dtype=np.datetime64)) + + +def test_is_datetime64tz_dtype(): + assert not com.is_datetime64tz_dtype(object) + assert not com.is_datetime64tz_dtype([1, 2, 3]) + assert not com.is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) + assert com.is_datetime64tz_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + +def test_is_timedelta64_dtype(): + assert not com.is_timedelta64_dtype(object) + assert not com.is_timedelta64_dtype(None) + assert not com.is_timedelta64_dtype([1, 2, 3]) + assert not com.is_timedelta64_dtype(np.array([], dtype=np.datetime64)) + assert not com.is_timedelta64_dtype("0 days") + assert not com.is_timedelta64_dtype("0 days 00:00:00") + assert not com.is_timedelta64_dtype(["0 days 00:00:00"]) + assert not com.is_timedelta64_dtype("NO DATE") + + assert com.is_timedelta64_dtype(np.timedelta64) + assert com.is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) + assert com.is_timedelta64_dtype(pd.to_timedelta(["0 days", "1 days"])) + + +def test_is_period_dtype(): + assert not com.is_period_dtype(object) + assert not com.is_period_dtype([1, 2, 3]) + assert not com.is_period_dtype(pd.Period("2017-01-01")) + + assert com.is_period_dtype(PeriodDtype(freq="D")) + assert com.is_period_dtype(pd.PeriodIndex([], freq="A")) + + +def test_is_interval_dtype(): + assert not com.is_interval_dtype(object) + assert not com.is_interval_dtype([1, 2, 3]) + + assert com.is_interval_dtype(IntervalDtype()) + + interval = pd.Interval(1, 2, closed="right") + assert not com.is_interval_dtype(interval) + assert com.is_interval_dtype(pd.IntervalIndex([interval])) + + +def test_is_categorical_dtype(): + assert not com.is_categorical_dtype(object) + assert not com.is_categorical_dtype([1, 2, 3]) + + assert com.is_categorical_dtype(CategoricalDtype()) + assert com.is_categorical_dtype(pd.Categorical([1, 2, 3])) + assert com.is_categorical_dtype(pd.CategoricalIndex([1, 2, 3])) + + +def test_is_string_dtype(): + assert not com.is_string_dtype(int) + assert not com.is_string_dtype(pd.Series([1, 2])) + + assert com.is_string_dtype(str) + assert com.is_string_dtype(object) + assert com.is_string_dtype(np.array(["a", "b"])) + assert com.is_string_dtype(pd.StringDtype()) + assert com.is_string_dtype(pd.array(["a", "b"], dtype="string")) + + +def test_is_period_arraylike(): + assert not com.is_period_arraylike([1, 2, 3]) + assert not com.is_period_arraylike(pd.Index([1, 2, 3])) + assert com.is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D")) + + +def test_is_datetime_arraylike(): + assert not com.is_datetime_arraylike([1, 2, 3]) + assert not com.is_datetime_arraylike(pd.Index([1, 2, 3])) + assert com.is_datetime_arraylike(pd.DatetimeIndex([1, 2, 3])) + + +integer_dtypes: List = [] + + +@pytest.mark.parametrize( + "dtype", + integer_dtypes + + [pd.Series([1, 2])] + + ALL_INT_DTYPES + + to_numpy_dtypes(ALL_INT_DTYPES) + + ALL_EA_INT_DTYPES + + to_ea_dtypes(ALL_EA_INT_DTYPES), +) +def test_is_integer_dtype(dtype): + assert com.is_integer_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", + [ + str, + float, + np.datetime64, + np.timedelta64, + pd.Index([1, 2.0]), + np.array(["a", "b"]), + np.array([], dtype=np.timedelta64), + ], +) +def test_is_not_integer_dtype(dtype): + assert not com.is_integer_dtype(dtype) + + +signed_integer_dtypes: List = [] + + +@pytest.mark.parametrize( + "dtype", + signed_integer_dtypes + + [pd.Series([1, 2])] + + SIGNED_INT_DTYPES + + to_numpy_dtypes(SIGNED_INT_DTYPES) + + SIGNED_EA_INT_DTYPES + + to_ea_dtypes(SIGNED_EA_INT_DTYPES), +) +def test_is_signed_integer_dtype(dtype): + assert com.is_integer_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", + [ + str, + float, + np.datetime64, + np.timedelta64, + pd.Index([1, 2.0]), + np.array(["a", "b"]), + np.array([], dtype=np.timedelta64), + ] + + UNSIGNED_INT_DTYPES + + to_numpy_dtypes(UNSIGNED_INT_DTYPES) + + UNSIGNED_EA_INT_DTYPES + + to_ea_dtypes(UNSIGNED_EA_INT_DTYPES), +) +def test_is_not_signed_integer_dtype(dtype): + assert not com.is_signed_integer_dtype(dtype) + + +unsigned_integer_dtypes: List = [] + + +@pytest.mark.parametrize( + "dtype", + unsigned_integer_dtypes + + [pd.Series([1, 2], dtype=np.uint32)] + + UNSIGNED_INT_DTYPES + + to_numpy_dtypes(UNSIGNED_INT_DTYPES) + + UNSIGNED_EA_INT_DTYPES + + to_ea_dtypes(UNSIGNED_EA_INT_DTYPES), +) +def test_is_unsigned_integer_dtype(dtype): + assert com.is_unsigned_integer_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", + [ + str, + float, + np.datetime64, + np.timedelta64, + pd.Index([1, 2.0]), + np.array(["a", "b"]), + np.array([], dtype=np.timedelta64), + ] + + SIGNED_INT_DTYPES + + to_numpy_dtypes(SIGNED_INT_DTYPES) + + SIGNED_EA_INT_DTYPES + + to_ea_dtypes(SIGNED_EA_INT_DTYPES), +) +def test_is_not_unsigned_integer_dtype(dtype): + assert not com.is_unsigned_integer_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", [np.int64, np.array([1, 2], dtype=np.int64), "Int64", pd.Int64Dtype] +) +def test_is_int64_dtype(dtype): + assert com.is_int64_dtype(dtype) + + +@pytest.mark.parametrize( + "dtype", + [ + str, + float, + np.int32, + np.uint64, + pd.Index([1, 2.0]), + np.array(["a", "b"]), + np.array([1, 2], dtype=np.uint32), + "int8", + "Int8", + pd.Int8Dtype, + ], +) +def test_is_not_int64_dtype(dtype): + assert not com.is_int64_dtype(dtype) + + +def test_is_datetime64_any_dtype(): + assert not com.is_datetime64_any_dtype(int) + assert not com.is_datetime64_any_dtype(str) + assert not com.is_datetime64_any_dtype(np.array([1, 2])) + assert not com.is_datetime64_any_dtype(np.array(["a", "b"])) + + assert com.is_datetime64_any_dtype(np.datetime64) + assert com.is_datetime64_any_dtype(np.array([], dtype=np.datetime64)) + assert com.is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) + assert com.is_datetime64_any_dtype( + pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]") + ) + + +def test_is_datetime64_ns_dtype(): + assert not com.is_datetime64_ns_dtype(int) + assert not com.is_datetime64_ns_dtype(str) + assert not com.is_datetime64_ns_dtype(np.datetime64) + assert not com.is_datetime64_ns_dtype(np.array([1, 2])) + assert not com.is_datetime64_ns_dtype(np.array(["a", "b"])) + assert not com.is_datetime64_ns_dtype(np.array([], dtype=np.datetime64)) + + # This datetime array has the wrong unit (ps instead of ns) + assert not com.is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) + + assert com.is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) + assert com.is_datetime64_ns_dtype( + pd.DatetimeIndex([1, 2, 3], dtype=np.dtype("datetime64[ns]")) + ) + + +def test_is_timedelta64_ns_dtype(): + assert not com.is_timedelta64_ns_dtype(np.dtype("m8[ps]")) + assert not com.is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) + + assert com.is_timedelta64_ns_dtype(np.dtype("m8[ns]")) + assert com.is_timedelta64_ns_dtype(np.array([1, 2], dtype="m8[ns]")) + + +def test_is_datetime_or_timedelta_dtype(): + assert not com.is_datetime_or_timedelta_dtype(int) + assert not com.is_datetime_or_timedelta_dtype(str) + assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2])) + assert not com.is_datetime_or_timedelta_dtype(np.array(["a", "b"])) + + # TODO(jreback), this is slightly suspect + assert not com.is_datetime_or_timedelta_dtype(DatetimeTZDtype("ns", "US/Eastern")) + + assert com.is_datetime_or_timedelta_dtype(np.datetime64) + assert com.is_datetime_or_timedelta_dtype(np.timedelta64) + assert com.is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64)) + assert com.is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) + + +def test_is_numeric_v_string_like(): + assert not com.is_numeric_v_string_like(1, 1) + assert not com.is_numeric_v_string_like(1, "foo") + assert not com.is_numeric_v_string_like("foo", "foo") + assert not com.is_numeric_v_string_like(np.array([1]), np.array([2])) + assert not com.is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"])) + + assert com.is_numeric_v_string_like(np.array([1]), "foo") + assert com.is_numeric_v_string_like("foo", np.array([1])) + assert com.is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"])) + assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) + + +def test_is_datetimelike_v_numeric(): + dt = np.datetime64(datetime(2017, 1, 1)) + + assert not com.is_datetimelike_v_numeric(1, 1) + assert not com.is_datetimelike_v_numeric(dt, dt) + assert not com.is_datetimelike_v_numeric(np.array([1]), np.array([2])) + assert not com.is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) + + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(1, dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), 1) + assert com.is_datetimelike_v_numeric(np.array([1]), dt) + assert com.is_datetimelike_v_numeric(np.array([dt]), np.array([1])) + + +def test_needs_i8_conversion(): + assert not com.needs_i8_conversion(str) + assert not com.needs_i8_conversion(np.int64) + assert not com.needs_i8_conversion(pd.Series([1, 2])) + assert not com.needs_i8_conversion(np.array(["a", "b"])) + + assert com.needs_i8_conversion(np.datetime64) + assert com.needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]")) + assert com.needs_i8_conversion(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + +def test_is_numeric_dtype(): + assert not com.is_numeric_dtype(str) + assert not com.is_numeric_dtype(np.datetime64) + assert not com.is_numeric_dtype(np.timedelta64) + assert not com.is_numeric_dtype(np.array(["a", "b"])) + assert not com.is_numeric_dtype(np.array([], dtype=np.timedelta64)) + + assert com.is_numeric_dtype(int) + assert com.is_numeric_dtype(float) + assert com.is_numeric_dtype(np.uint64) + assert com.is_numeric_dtype(pd.Series([1, 2])) + assert com.is_numeric_dtype(pd.Index([1, 2.0])) + + +def test_is_string_like_dtype(): + assert not com.is_string_like_dtype(object) + assert not com.is_string_like_dtype(pd.Series([1, 2])) + + assert com.is_string_like_dtype(str) + assert com.is_string_like_dtype(np.array(["a", "b"])) + + +def test_is_float_dtype(): + assert not com.is_float_dtype(str) + assert not com.is_float_dtype(int) + assert not com.is_float_dtype(pd.Series([1, 2])) + assert not com.is_float_dtype(np.array(["a", "b"])) + + assert com.is_float_dtype(float) + assert com.is_float_dtype(pd.Index([1, 2.0])) + + +def test_is_bool_dtype(): + assert not com.is_bool_dtype(int) + assert not com.is_bool_dtype(str) + assert not com.is_bool_dtype(pd.Series([1, 2])) + assert not com.is_bool_dtype(np.array(["a", "b"])) + assert not com.is_bool_dtype(pd.Index(["a", "b"])) + + assert com.is_bool_dtype(bool) + assert com.is_bool_dtype(np.bool) + assert com.is_bool_dtype(np.array([True, False])) + assert com.is_bool_dtype(pd.Index([True, False])) + + assert com.is_bool_dtype(pd.BooleanDtype()) + assert com.is_bool_dtype(pd.array([True, False, None], dtype="boolean")) + + +@pytest.mark.filterwarnings("ignore:'is_extension_type' is deprecated:FutureWarning") +@pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] +) +def test_is_extension_type(check_scipy): + assert not com.is_extension_type([1, 2, 3]) + assert not com.is_extension_type(np.array([1, 2, 3])) + assert not com.is_extension_type(pd.DatetimeIndex([1, 2, 3])) + + cat = pd.Categorical([1, 2, 3]) + assert com.is_extension_type(cat) + assert com.is_extension_type(pd.Series(cat)) + assert com.is_extension_type(SparseArray([1, 2, 3])) + assert com.is_extension_type(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_extension_type(s) + + if check_scipy: + import scipy.sparse + + assert not com.is_extension_type(scipy.sparse.bsr_matrix([1, 2, 3])) + + +def test_is_extension_type_deprecation(): + with tm.assert_produces_warning(FutureWarning): + com.is_extension_type([1, 2, 3]) + + +@pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] +) +def test_is_extension_array_dtype(check_scipy): + assert not com.is_extension_array_dtype([1, 2, 3]) + assert not com.is_extension_array_dtype(np.array([1, 2, 3])) + assert not com.is_extension_array_dtype(pd.DatetimeIndex([1, 2, 3])) + + cat = pd.Categorical([1, 2, 3]) + assert com.is_extension_array_dtype(cat) + assert com.is_extension_array_dtype(pd.Series(cat)) + assert com.is_extension_array_dtype(SparseArray([1, 2, 3])) + assert com.is_extension_array_dtype(pd.DatetimeIndex(["2000"], tz="US/Eastern")) + + dtype = DatetimeTZDtype("ns", tz="US/Eastern") + s = pd.Series([], dtype=dtype) + assert com.is_extension_array_dtype(s) + + if check_scipy: + import scipy.sparse + + assert not com.is_extension_array_dtype(scipy.sparse.bsr_matrix([1, 2, 3])) + + +def test_is_complex_dtype(): + assert not com.is_complex_dtype(int) + assert not com.is_complex_dtype(str) + assert not com.is_complex_dtype(pd.Series([1, 2])) + assert not com.is_complex_dtype(np.array(["a", "b"])) + + assert com.is_complex_dtype(np.complex) + assert com.is_complex_dtype(np.array([1 + 1j, 5])) + + +@pytest.mark.parametrize( + "input_param,result", + [ + (int, np.dtype(int)), + ("int32", np.dtype("int32")), + (float, np.dtype(float)), + ("float64", np.dtype("float64")), + (np.dtype("float64"), np.dtype("float64")), + (str, np.dtype(str)), + (pd.Series([1, 2], dtype=np.dtype("int16")), np.dtype("int16")), + (pd.Series(["a", "b"]), np.dtype(object)), + (pd.Index([1, 2]), np.dtype("int64")), + (pd.Index(["a", "b"]), np.dtype(object)), + ("category", "category"), + (pd.Categorical(["a", "b"]).dtype, CategoricalDtype(["a", "b"])), + (pd.Categorical(["a", "b"]), CategoricalDtype(["a", "b"])), + (pd.CategoricalIndex(["a", "b"]).dtype, CategoricalDtype(["a", "b"])), + (pd.CategoricalIndex(["a", "b"]), CategoricalDtype(["a", "b"])), + (CategoricalDtype(), CategoricalDtype()), + (CategoricalDtype(["a", "b"]), CategoricalDtype()), + (pd.DatetimeIndex([1, 2]), np.dtype("=M8[ns]")), + (pd.DatetimeIndex([1, 2]).dtype, np.dtype("=M8[ns]")), + (" df.two.sum() + + with catch_warnings(record=True) as w: + # successfully modify column in place + # this should not raise a warning + df.one += 1 + assert len(w) == 0 + assert df.one.iloc[0] == 2 + + with catch_warnings(record=True) as w: + # successfully add an attribute to a series + # this should not raise a warning + df.two.not_an_index = [1, 2] + assert len(w) == 0 + + with tm.assert_produces_warning(UserWarning): + # warn when setting column to nonexistent name + df.four = df.two + 2 + assert df.four.sum() > df.two.sum() diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py new file mode 100644 index 00000000..b148fc57 --- /dev/null +++ b/pandas/tests/dtypes/test_inference.py @@ -0,0 +1,1485 @@ +""" +These the test the public routines exposed in types/common.py +related to inference and not otherwise tested in types/test_common.py + +""" +import collections +from collections import namedtuple +from datetime import date, datetime, time, timedelta +from decimal import Decimal +from fractions import Fraction +from io import StringIO +from numbers import Number +import re + +import numpy as np +import pytest +import pytz + +from pandas._libs import iNaT, lib, missing as libmissing +import pandas.util._test_decorators as td + +from pandas.core.dtypes import inference +from pandas.core.dtypes.common import ( + ensure_categorical, + ensure_int32, + is_bool, + is_datetime64_any_dtype, + is_datetime64_dtype, + is_datetime64_ns_dtype, + is_datetime64tz_dtype, + is_float, + is_integer, + is_number, + is_scalar, + is_scipy_sparse, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, +) + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DateOffset, + DatetimeIndex, + Index, + Interval, + Period, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + isna, +) +import pandas._testing as tm +from pandas.core.arrays import IntegerArray + + +@pytest.fixture(params=[True, False], ids=str) +def coerce(request): + return request.param + + +# collect all objects to be tested for list-like-ness; use tuples of objects, +# whether they are list-like or not (special casing for sets), and their ID +ll_params = [ + ([1], True, "list"), + ([], True, "list-empty"), + ((1,), True, "tuple"), + (tuple(), True, "tuple-empty"), + ({"a": 1}, True, "dict"), + (dict(), True, "dict-empty"), + ({"a", 1}, "set", "set"), + (set(), "set", "set-empty"), + (frozenset({"a", 1}), "set", "frozenset"), + (frozenset(), "set", "frozenset-empty"), + (iter([1, 2]), True, "iterator"), + (iter([]), True, "iterator-empty"), + ((x for x in [1, 2]), True, "generator"), + ((_ for _ in []), True, "generator-empty"), + (Series([1]), True, "Series"), + (Series([], dtype=object), True, "Series-empty"), + (Series(["a"]).str, True, "StringMethods"), + (Series([], dtype="O").str, True, "StringMethods-empty"), + (Index([1]), True, "Index"), + (Index([]), True, "Index-empty"), + (DataFrame([[1]]), True, "DataFrame"), + (DataFrame(), True, "DataFrame-empty"), + (np.ndarray((2,) * 1), True, "ndarray-1d"), + (np.array([]), True, "ndarray-1d-empty"), + (np.ndarray((2,) * 2), True, "ndarray-2d"), + (np.array([[]]), True, "ndarray-2d-empty"), + (np.ndarray((2,) * 3), True, "ndarray-3d"), + (np.array([[[]]]), True, "ndarray-3d-empty"), + (np.ndarray((2,) * 4), True, "ndarray-4d"), + (np.array([[[[]]]]), True, "ndarray-4d-empty"), + (np.array(2), False, "ndarray-0d"), + (1, False, "int"), + (b"123", False, "bytes"), + (b"", False, "bytes-empty"), + ("123", False, "string"), + ("", False, "string-empty"), + (str, False, "string-type"), + (object(), False, "object"), + (np.nan, False, "NaN"), + (None, False, "None"), +] +objs, expected, ids = zip(*ll_params) + + +@pytest.fixture(params=zip(objs, expected), ids=ids) +def maybe_list_like(request): + return request.param + + +def test_is_list_like(maybe_list_like): + obj, expected = maybe_list_like + expected = True if expected == "set" else expected + assert inference.is_list_like(obj) == expected + + +def test_is_list_like_disallow_sets(maybe_list_like): + obj, expected = maybe_list_like + expected = False if expected == "set" else expected + assert inference.is_list_like(obj, allow_sets=False) == expected + + +def test_is_sequence(): + is_seq = inference.is_sequence + assert is_seq((1, 2)) + assert is_seq([1, 2]) + assert not is_seq("abcd") + assert not is_seq(np.int64) + + class A: + def __getitem__(self): + return 1 + + assert not is_seq(A()) + + +def test_is_array_like(): + assert inference.is_array_like(Series([], dtype=object)) + assert inference.is_array_like(Series([1, 2])) + assert inference.is_array_like(np.array(["a", "b"])) + assert inference.is_array_like(Index(["2016-01-01"])) + + class DtypeList(list): + dtype = "special" + + assert inference.is_array_like(DtypeList()) + + assert not inference.is_array_like([1, 2, 3]) + assert not inference.is_array_like(tuple()) + assert not inference.is_array_like("foo") + assert not inference.is_array_like(123) + + +@pytest.mark.parametrize( + "inner", + [ + [], + [1], + (1,), + (1, 2), + {"a": 1}, + {1, "a"}, + Series([1]), + Series([], dtype=object), + Series(["a"]).str, + (x for x in range(5)), + ], +) +@pytest.mark.parametrize("outer", [list, Series, np.array, tuple]) +def test_is_nested_list_like_passes(inner, outer): + result = outer([inner for _ in range(5)]) + assert inference.is_list_like(result) + + +@pytest.mark.parametrize( + "obj", + [ + "abc", + [], + [1], + (1,), + ["a"], + "a", + {"a"}, + [1, 2, 3], + Series([1]), + DataFrame({"A": [1]}), + ([1, 2] for _ in range(5)), + ], +) +def test_is_nested_list_like_fails(obj): + assert not inference.is_nested_list_like(obj) + + +@pytest.mark.parametrize("ll", [{}, {"A": 1}, Series([1]), collections.defaultdict()]) +def test_is_dict_like_passes(ll): + assert inference.is_dict_like(ll) + + +@pytest.mark.parametrize( + "ll", + [ + "1", + 1, + [1, 2], + (1, 2), + range(2), + Index([1]), + dict, + collections.defaultdict, + Series, + ], +) +def test_is_dict_like_fails(ll): + assert not inference.is_dict_like(ll) + + +@pytest.mark.parametrize("has_keys", [True, False]) +@pytest.mark.parametrize("has_getitem", [True, False]) +@pytest.mark.parametrize("has_contains", [True, False]) +def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains): + class DictLike: + def __init__(self, d): + self.d = d + + if has_keys: + + def keys(self): + return self.d.keys() + + if has_getitem: + + def __getitem__(self, key): + return self.d.__getitem__(key) + + if has_contains: + + def __contains__(self, key) -> bool: + return self.d.__contains__(key) + + d = DictLike({1: 2}) + result = inference.is_dict_like(d) + expected = has_keys and has_getitem and has_contains + + assert result is expected + + +def test_is_file_like(): + class MockFile: + pass + + is_file = inference.is_file_like + + data = StringIO("data") + assert is_file(data) + + # No read / write attributes + # No iterator attributes + m = MockFile() + assert not is_file(m) + + MockFile.write = lambda self: 0 + + # Write attribute but not an iterator + m = MockFile() + assert not is_file(m) + + # gh-16530: Valid iterator just means we have the + # __iter__ attribute for our purposes. + MockFile.__iter__ = lambda self: self + + # Valid write-only file + m = MockFile() + assert is_file(m) + + del MockFile.write + MockFile.read = lambda self: 0 + + # Valid read-only file + m = MockFile() + assert is_file(m) + + # Iterator but no read / write attributes + data = [1, 2, 3] + assert not is_file(data) + + +test_tuple = collections.namedtuple("Test", ["a", "b", "c"]) + + +@pytest.mark.parametrize("ll", [test_tuple(1, 2, 3)]) +def test_is_names_tuple_passes(ll): + assert inference.is_named_tuple(ll) + + +@pytest.mark.parametrize("ll", [(1, 2, 3), "a", Series({"pi": 3.14})]) +def test_is_names_tuple_fails(ll): + assert not inference.is_named_tuple(ll) + + +def test_is_hashable(): + + # all new-style classes are hashable by default + class HashableClass: + pass + + class UnhashableClass1: + __hash__ = None + + class UnhashableClass2: + def __hash__(self): + raise TypeError("Not hashable") + + hashable = (1, 3.14, np.float64(3.14), "a", tuple(), (1,), HashableClass()) + not_hashable = ([], UnhashableClass1()) + abc_hashable_not_really_hashable = (([],), UnhashableClass2()) + + for i in hashable: + assert inference.is_hashable(i) + for i in not_hashable: + assert not inference.is_hashable(i) + for i in abc_hashable_not_really_hashable: + assert not inference.is_hashable(i) + + # numpy.array is no longer collections.abc.Hashable as of + # https://github.com/numpy/numpy/pull/5326, just test + # is_hashable() + assert not inference.is_hashable(np.array([])) + + +@pytest.mark.parametrize("ll", [re.compile("ad")]) +def test_is_re_passes(ll): + assert inference.is_re(ll) + + +@pytest.mark.parametrize("ll", ["x", 2, 3, object()]) +def test_is_re_fails(ll): + assert not inference.is_re(ll) + + +@pytest.mark.parametrize( + "ll", [r"a", "x", r"asdf", re.compile("adsf"), r"\u2233\s*", re.compile(r"")] +) +def test_is_recompilable_passes(ll): + assert inference.is_re_compilable(ll) + + +@pytest.mark.parametrize("ll", [1, [], object()]) +def test_is_recompilable_fails(ll): + assert not inference.is_re_compilable(ll) + + +class TestInference: + def test_infer_dtype_bytes(self): + compare = "bytes" + + # string array of bytes + arr = np.array(list("abc"), dtype="S1") + assert lib.infer_dtype(arr, skipna=True) == compare + + # object array of bytes + arr = arr.astype(object) + assert lib.infer_dtype(arr, skipna=True) == compare + + # object array of bytes with missing values + assert lib.infer_dtype([b"a", np.nan, b"c"], skipna=True) == compare + + def test_isinf_scalar(self): + # GH 11352 + assert libmissing.isposinf_scalar(float("inf")) + assert libmissing.isposinf_scalar(np.inf) + assert not libmissing.isposinf_scalar(-np.inf) + assert not libmissing.isposinf_scalar(1) + assert not libmissing.isposinf_scalar("a") + + assert libmissing.isneginf_scalar(float("-inf")) + assert libmissing.isneginf_scalar(-np.inf) + assert not libmissing.isneginf_scalar(np.inf) + assert not libmissing.isneginf_scalar(1) + assert not libmissing.isneginf_scalar("a") + + @pytest.mark.parametrize("maybe_int", [True, False]) + @pytest.mark.parametrize( + "infinity", ["inf", "inF", "iNf", "Inf", "iNF", "InF", "INf", "INF"] + ) + def test_maybe_convert_numeric_infinities(self, infinity, maybe_int): + # see gh-13274 + na_values = {"", "NULL", "nan"} + + pos = np.array(["inf"], dtype=np.float64) + neg = np.array(["-inf"], dtype=np.float64) + + msg = "Unable to parse string" + + out = lib.maybe_convert_numeric( + np.array([infinity], dtype=object), na_values, maybe_int + ) + tm.assert_numpy_array_equal(out, pos) + + out = lib.maybe_convert_numeric( + np.array(["-" + infinity], dtype=object), na_values, maybe_int + ) + tm.assert_numpy_array_equal(out, neg) + + out = lib.maybe_convert_numeric( + np.array([infinity], dtype=object), na_values, maybe_int + ) + tm.assert_numpy_array_equal(out, pos) + + out = lib.maybe_convert_numeric( + np.array(["+" + infinity], dtype=object), na_values, maybe_int + ) + tm.assert_numpy_array_equal(out, pos) + + # too many characters + with pytest.raises(ValueError, match=msg): + lib.maybe_convert_numeric( + np.array(["foo_" + infinity], dtype=object), na_values, maybe_int + ) + + def test_maybe_convert_numeric_post_floatify_nan(self, coerce): + # see gh-13314 + data = np.array(["1.200", "-999.000", "4.500"], dtype=object) + expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) + nan_values = {-999, -999.0} + + out = lib.maybe_convert_numeric(data, nan_values, coerce) + tm.assert_numpy_array_equal(out, expected) + + def test_convert_infs(self): + arr = np.array(["inf", "inf", "inf"], dtype="O") + result = lib.maybe_convert_numeric(arr, set(), False) + assert result.dtype == np.float64 + + arr = np.array(["-inf", "-inf", "-inf"], dtype="O") + result = lib.maybe_convert_numeric(arr, set(), False) + assert result.dtype == np.float64 + + def test_scientific_no_exponent(self): + # See PR 12215 + arr = np.array(["42E", "2E", "99e", "6e"], dtype="O") + result = lib.maybe_convert_numeric(arr, set(), False, True) + assert np.all(np.isnan(result)) + + def test_convert_non_hashable(self): + # GH13324 + # make sure that we are handing non-hashables + arr = np.array([[10.0, 2], 1.0, "apple"], dtype=object) + result = lib.maybe_convert_numeric(arr, set(), False, True) + tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) + + def test_convert_numeric_uint64(self): + arr = np.array([2 ** 63], dtype=object) + exp = np.array([2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) + + arr = np.array([str(2 ** 63)], dtype=object) + exp = np.array([2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) + + arr = np.array([np.uint64(2 ** 63)], dtype=object) + exp = np.array([2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp) + + @pytest.mark.parametrize( + "arr", + [ + np.array([2 ** 63, np.nan], dtype=object), + np.array([str(2 ** 63), np.nan], dtype=object), + np.array([np.nan, 2 ** 63], dtype=object), + np.array([np.nan, str(2 ** 63)], dtype=object), + ], + ) + def test_convert_numeric_uint64_nan(self, coerce, arr): + expected = arr.astype(float) if coerce else arr.copy() + result = lib.maybe_convert_numeric(arr, set(), coerce_numeric=coerce) + tm.assert_almost_equal(result, expected) + + def test_convert_numeric_uint64_nan_values(self, coerce): + arr = np.array([2 ** 63, 2 ** 63 + 1], dtype=object) + na_values = {2 ** 63} + + expected = ( + np.array([np.nan, 2 ** 63 + 1], dtype=float) if coerce else arr.copy() + ) + result = lib.maybe_convert_numeric(arr, na_values, coerce_numeric=coerce) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize( + "case", + [ + np.array([2 ** 63, -1], dtype=object), + np.array([str(2 ** 63), -1], dtype=object), + np.array([str(2 ** 63), str(-1)], dtype=object), + np.array([-1, 2 ** 63], dtype=object), + np.array([-1, str(2 ** 63)], dtype=object), + np.array([str(-1), str(2 ** 63)], dtype=object), + ], + ) + def test_convert_numeric_int64_uint64(self, case, coerce): + expected = case.astype(float) if coerce else case.copy() + result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize("value", [-(2 ** 63) - 1, 2 ** 64]) + def test_convert_int_overflow(self, value): + # see gh-18584 + arr = np.array([value], dtype=object) + result = lib.maybe_convert_objects(arr) + tm.assert_numpy_array_equal(arr, result) + + def test_maybe_convert_objects_uint64(self): + # see gh-4471 + arr = np.array([2 ** 63], dtype=object) + exp = np.array([2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + # NumPy bug: can't compare uint64 to int64, as that + # results in both casting to float64, so we should + # make sure that this function is robust against it + arr = np.array([np.uint64(2 ** 63)], dtype=object) + exp = np.array([2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + arr = np.array([2, -1], dtype=object) + exp = np.array([2, -1], dtype=np.int64) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + arr = np.array([2 ** 63, -1], dtype=object) + exp = np.array([2 ** 63, -1], dtype=object) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + def test_maybe_convert_objects_datetime(self): + # GH27438 + arr = np.array( + [np.datetime64("2000-01-01"), np.timedelta64(1, "s")], dtype=object + ) + exp = arr.copy() + out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) + tm.assert_numpy_array_equal(out, exp) + + arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object) + exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]") + out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) + tm.assert_numpy_array_equal(out, exp) + + arr = np.array([np.timedelta64(1, "s"), np.nan], dtype=object) + exp = arr.copy() + out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) + tm.assert_numpy_array_equal(out, exp) + + @pytest.mark.parametrize( + "exp", + [ + IntegerArray(np.array([2, 0], dtype="i8"), np.array([False, True])), + IntegerArray(np.array([2, 0], dtype="int64"), np.array([False, True])), + ], + ) + def test_maybe_convert_objects_nullable_integer(self, exp): + # GH27335 + arr = np.array([2, np.NaN], dtype=object) + result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1) + + tm.assert_extension_array_equal(result, exp) + + def test_maybe_convert_objects_bool_nan(self): + # GH32146 + ind = pd.Index([True, False, np.nan], dtype=object) + exp = np.array([True, False, np.nan], dtype=object) + out = lib.maybe_convert_objects(ind.values, safe=1) + tm.assert_numpy_array_equal(out, exp) + + def test_mixed_dtypes_remain_object_array(self): + # GH14956 + array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) + result = lib.maybe_convert_objects(array, convert_datetime=1) + tm.assert_numpy_array_equal(result, array) + + +class TestTypeInference: + + # Dummy class used for testing with Python objects + class Dummy: + pass + + def test_inferred_dtype_fixture(self, any_skipna_inferred_dtype): + # see pandas/conftest.py + inferred_dtype, values = any_skipna_inferred_dtype + + # make sure the inferred dtype of the fixture is as requested + assert inferred_dtype == lib.infer_dtype(values, skipna=True) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_length_zero(self, skipna): + result = lib.infer_dtype(np.array([], dtype="i4"), skipna=skipna) + assert result == "integer" + + result = lib.infer_dtype([], skipna=skipna) + assert result == "empty" + + # GH 18004 + arr = np.array([np.array([], dtype=object), np.array([], dtype=object)]) + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "empty" + + def test_integers(self): + arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "integer" + + arr = np.array([1, 2, 3, np.int64(4), np.int32(5), "foo"], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed-integer" + + arr = np.array([1, 2, 3, 4, 5], dtype="i4") + result = lib.infer_dtype(arr, skipna=True) + assert result == "integer" + + @pytest.mark.parametrize( + "arr, skipna", + [ + (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), False), + (np.array([1, 2, np.nan, np.nan, 3], dtype="O"), True), + (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), False), + (np.array([1, 2, 3, np.int64(4), np.int32(5), np.nan], dtype="O"), True), + ], + ) + def test_integer_na(self, arr, skipna): + # GH 27392 + result = lib.infer_dtype(arr, skipna=skipna) + expected = "integer" if skipna else "integer-na" + assert result == expected + + def test_infer_dtype_skipna_default(self): + # infer_dtype `skipna` default deprecated in GH#24050, + # changed to True in GH#29876 + arr = np.array([1, 2, 3, np.nan], dtype=object) + + result = lib.infer_dtype(arr) + assert result == "integer" + + def test_bools(self): + arr = np.array([True, False, True, True, True], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "boolean" + + arr = np.array([np.bool_(True), np.bool_(False)], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "boolean" + + arr = np.array([True, False, True, "foo"], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed" + + arr = np.array([True, False, True], dtype=bool) + result = lib.infer_dtype(arr, skipna=True) + assert result == "boolean" + + arr = np.array([True, np.nan, False], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "boolean" + + result = lib.infer_dtype(arr, skipna=False) + assert result == "mixed" + + def test_floats(self): + arr = np.array([1.0, 2.0, 3.0, np.float64(4), np.float32(5)], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "floating" + + arr = np.array([1, 2, 3, np.float64(4), np.float32(5), "foo"], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed-integer" + + arr = np.array([1, 2, 3, 4, 5], dtype="f4") + result = lib.infer_dtype(arr, skipna=True) + assert result == "floating" + + arr = np.array([1, 2, 3, 4, 5], dtype="f8") + result = lib.infer_dtype(arr, skipna=True) + assert result == "floating" + + def test_decimals(self): + # GH15690 + arr = np.array([Decimal(1), Decimal(2), Decimal(3)]) + result = lib.infer_dtype(arr, skipna=True) + assert result == "decimal" + + arr = np.array([1.0, 2.0, Decimal(3)]) + result = lib.infer_dtype(arr, skipna=True) + assert result == "mixed" + + arr = np.array([Decimal(1), Decimal("NaN"), Decimal(3)]) + result = lib.infer_dtype(arr, skipna=True) + assert result == "decimal" + + arr = np.array([Decimal(1), np.nan, Decimal(3)], dtype="O") + result = lib.infer_dtype(arr, skipna=True) + assert result == "decimal" + + # complex is compatible with nan, so skipna has no effect + @pytest.mark.parametrize("skipna", [True, False]) + def test_complex(self, skipna): + # gets cast to complex on array construction + arr = np.array([1.0, 2.0, 1 + 1j]) + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "complex" + + arr = np.array([1.0, 2.0, 1 + 1j], dtype="O") + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "mixed" + + # gets cast to complex on array construction + arr = np.array([1, np.nan, 1 + 1j]) + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "complex" + + arr = np.array([1.0, np.nan, 1 + 1j], dtype="O") + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "mixed" + + # complex with nans stays complex + arr = np.array([1 + 1j, np.nan, 3 + 3j], dtype="O") + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "complex" + + # test smaller complex dtype; will pass through _try_infer_map fastpath + arr = np.array([1 + 1j, np.nan, 3 + 3j], dtype=np.complex64) + result = lib.infer_dtype(arr, skipna=skipna) + assert result == "complex" + + def test_string(self): + pass + + def test_unicode(self): + arr = ["a", np.nan, "c"] + result = lib.infer_dtype(arr, skipna=False) + # This currently returns "mixed", but it's not clear that's optimal. + # This could also return "string" or "mixed-string" + assert result == "mixed" + + arr = ["a", np.nan, "c"] + result = lib.infer_dtype(arr, skipna=True) + assert result == "string" + + arr = ["a", "c"] + result = lib.infer_dtype(arr, skipna=False) + assert result == "string" + + @pytest.mark.parametrize( + "dtype, missing, skipna, expected", + [ + (float, np.nan, False, "floating"), + (float, np.nan, True, "floating"), + (object, np.nan, False, "floating"), + (object, np.nan, True, "empty"), + (object, None, False, "mixed"), + (object, None, True, "empty"), + ], + ) + @pytest.mark.parametrize("box", [pd.Series, np.array]) + def test_object_empty(self, box, missing, dtype, skipna, expected): + # GH 23421 + arr = box([missing, missing], dtype=dtype) + + result = lib.infer_dtype(arr, skipna=skipna) + assert result == expected + + def test_datetime(self): + + dates = [datetime(2012, 1, x) for x in range(1, 20)] + index = Index(dates) + assert index.inferred_type == "datetime64" + + def test_infer_dtype_datetime(self): + + arr = np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + arr = np.array( + [np.datetime64("2011-01-01"), np.datetime64("2011-01-01")], dtype=object + ) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" + + arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + # starts with nan + for n in [pd.NaT, np.nan]: + arr = np.array([n, pd.Timestamp("2011-01-02")]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + arr = np.array([n, np.datetime64("2011-01-02")]) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" + + arr = np.array([n, datetime(2011, 1, 1)]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + arr = np.array([n, pd.Timestamp("2011-01-02"), n]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + arr = np.array([n, np.datetime64("2011-01-02"), n]) + assert lib.infer_dtype(arr, skipna=True) == "datetime64" + + arr = np.array([n, datetime(2011, 1, 1), n]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + # different type of nat + arr = np.array( + [np.timedelta64("nat"), np.datetime64("2011-01-02")], dtype=object + ) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array( + [np.datetime64("2011-01-02"), np.timedelta64("nat")], dtype=object + ) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + # mixed datetime + arr = np.array([datetime(2011, 1, 1), pd.Timestamp("2011-01-02")]) + assert lib.infer_dtype(arr, skipna=True) == "datetime" + + # should be datetime? + arr = np.array([np.datetime64("2011-01-01"), pd.Timestamp("2011-01-02")]) + assert lib.infer_dtype(arr, skipna=True) == "mixed" + + arr = np.array([pd.Timestamp("2011-01-02"), np.datetime64("2011-01-01")]) + assert lib.infer_dtype(arr, skipna=True) == "mixed" + + arr = np.array([np.nan, pd.Timestamp("2011-01-02"), 1]) + assert lib.infer_dtype(arr, skipna=True) == "mixed-integer" + + arr = np.array([np.nan, pd.Timestamp("2011-01-02"), 1.1]) + assert lib.infer_dtype(arr, skipna=True) == "mixed" + + arr = np.array([np.nan, "2011-01-01", pd.Timestamp("2011-01-02")]) + assert lib.infer_dtype(arr, skipna=True) == "mixed" + + def test_infer_dtype_timedelta(self): + + arr = np.array([pd.Timedelta("1 days"), pd.Timedelta("2 days")]) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + arr = np.array([np.timedelta64(1, "D"), np.timedelta64(2, "D")], dtype=object) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + arr = np.array([timedelta(1), timedelta(2)]) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + # starts with nan + for n in [pd.NaT, np.nan]: + arr = np.array([n, Timedelta("1 days")]) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + arr = np.array([n, np.timedelta64(1, "D")]) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + arr = np.array([n, timedelta(1)]) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + arr = np.array([n, pd.Timedelta("1 days"), n]) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + arr = np.array([n, np.timedelta64(1, "D"), n]) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + arr = np.array([n, timedelta(1), n]) + assert lib.infer_dtype(arr, skipna=True) == "timedelta" + + # different type of nat + arr = np.array([np.datetime64("nat"), np.timedelta64(1, "D")], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([np.timedelta64(1, "D"), np.datetime64("nat")], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + def test_infer_dtype_period(self): + # GH 13664 + arr = np.array([pd.Period("2011-01", freq="D"), pd.Period("2011-02", freq="D")]) + assert lib.infer_dtype(arr, skipna=True) == "period" + + arr = np.array([pd.Period("2011-01", freq="D"), pd.Period("2011-02", freq="M")]) + assert lib.infer_dtype(arr, skipna=True) == "period" + + # starts with nan + for n in [pd.NaT, np.nan]: + arr = np.array([n, pd.Period("2011-01", freq="D")]) + assert lib.infer_dtype(arr, skipna=True) == "period" + + arr = np.array([n, pd.Period("2011-01", freq="D"), n]) + assert lib.infer_dtype(arr, skipna=True) == "period" + + # different type of nat + arr = np.array( + [np.datetime64("nat"), pd.Period("2011-01", freq="M")], dtype=object + ) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array( + [pd.Period("2011-01", freq="M"), np.datetime64("nat")], dtype=object + ) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + @pytest.mark.parametrize( + "data", + [ + [datetime(2017, 6, 12, 19, 30), datetime(2017, 3, 11, 1, 15)], + [Timestamp("20170612"), Timestamp("20170311")], + [ + Timestamp("20170612", tz="US/Eastern"), + Timestamp("20170311", tz="US/Eastern"), + ], + [date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")], + [np.datetime64("2017-06-12"), np.datetime64("2017-03-11")], + [np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)], + ], + ) + def test_infer_datetimelike_array_datetime(self, data): + assert lib.infer_datetimelike_array(data) == "datetime" + + @pytest.mark.parametrize( + "data", + [ + [timedelta(2017, 6, 12), timedelta(2017, 3, 11)], + [timedelta(2017, 6, 12), date(2017, 3, 11)], + [np.timedelta64(2017, "D"), np.timedelta64(6, "s")], + [np.timedelta64(2017, "D"), timedelta(2017, 3, 11)], + ], + ) + def test_infer_datetimelike_array_timedelta(self, data): + assert lib.infer_datetimelike_array(data) == "timedelta" + + def test_infer_datetimelike_array_date(self): + arr = [date(2017, 6, 12), date(2017, 3, 11)] + assert lib.infer_datetimelike_array(arr) == "date" + + @pytest.mark.parametrize( + "data", + [ + ["2017-06-12", "2017-03-11"], + [20170612, 20170311], + [20170612.5, 20170311.8], + [Dummy(), Dummy()], + [Timestamp("20170612"), Timestamp("20170311", tz="US/Eastern")], + [Timestamp("20170612"), 20170311], + [timedelta(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")], + ], + ) + def test_infer_datetimelike_array_mixed(self, data): + assert lib.infer_datetimelike_array(data) == "mixed" + + @pytest.mark.parametrize( + "first, expected", + [ + [[None], "mixed"], + [[np.nan], "mixed"], + [[pd.NaT], "nat"], + [[datetime(2017, 6, 12, 19, 30), pd.NaT], "datetime"], + [[np.datetime64("2017-06-12"), pd.NaT], "datetime"], + [[date(2017, 6, 12), pd.NaT], "date"], + [[timedelta(2017, 6, 12), pd.NaT], "timedelta"], + [[np.timedelta64(2017, "D"), pd.NaT], "timedelta"], + ], + ) + @pytest.mark.parametrize("second", [None, np.nan]) + def test_infer_datetimelike_array_nan_nat_like(self, first, second, expected): + first.append(second) + assert lib.infer_datetimelike_array(first) == expected + + def test_infer_dtype_all_nan_nat_like(self): + arr = np.array([np.nan, np.nan]) + assert lib.infer_dtype(arr, skipna=True) == "floating" + + # nan and None mix are result in mixed + arr = np.array([np.nan, np.nan, None]) + assert lib.infer_dtype(arr, skipna=True) == "empty" + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([None, np.nan, np.nan]) + assert lib.infer_dtype(arr, skipna=True) == "empty" + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + # pd.NaT + arr = np.array([pd.NaT]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + arr = np.array([pd.NaT, np.nan]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + arr = np.array([np.nan, pd.NaT]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + arr = np.array([np.nan, pd.NaT, np.nan]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + arr = np.array([None, pd.NaT, None]) + assert lib.infer_dtype(arr, skipna=False) == "datetime" + + # np.datetime64(nat) + arr = np.array([np.datetime64("nat")]) + assert lib.infer_dtype(arr, skipna=False) == "datetime64" + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.datetime64("nat"), n]) + assert lib.infer_dtype(arr, skipna=False) == "datetime64" + + arr = np.array([pd.NaT, n, np.datetime64("nat"), n]) + assert lib.infer_dtype(arr, skipna=False) == "datetime64" + + arr = np.array([np.timedelta64("nat")], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "timedelta" + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.timedelta64("nat"), n]) + assert lib.infer_dtype(arr, skipna=False) == "timedelta" + + arr = np.array([pd.NaT, n, np.timedelta64("nat"), n]) + assert lib.infer_dtype(arr, skipna=False) == "timedelta" + + # datetime / timedelta mixed + arr = np.array([pd.NaT, np.datetime64("nat"), np.timedelta64("nat"), np.nan]) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + arr = np.array([np.timedelta64("nat"), np.datetime64("nat")], dtype=object) + assert lib.infer_dtype(arr, skipna=False) == "mixed" + + def test_is_datetimelike_array_all_nan_nat_like(self): + arr = np.array([np.nan, pd.NaT, np.datetime64("nat")]) + assert lib.is_datetime_array(arr) + assert lib.is_datetime64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, pd.NaT, np.timedelta64("nat")]) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, pd.NaT, np.datetime64("nat"), np.timedelta64("nat")]) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, pd.NaT]) + assert lib.is_datetime_array(arr) + assert lib.is_datetime64_array(arr) + assert lib.is_timedelta_or_timedelta64_array(arr) + + arr = np.array([np.nan, np.nan], dtype=object) + assert not lib.is_datetime_array(arr) + assert not lib.is_datetime64_array(arr) + assert not lib.is_timedelta_or_timedelta64_array(arr) + + assert lib.is_datetime_with_singletz_array( + np.array( + [ + pd.Timestamp("20130101", tz="US/Eastern"), + pd.Timestamp("20130102", tz="US/Eastern"), + ], + dtype=object, + ) + ) + assert not lib.is_datetime_with_singletz_array( + np.array( + [ + pd.Timestamp("20130101", tz="US/Eastern"), + pd.Timestamp("20130102", tz="CET"), + ], + dtype=object, + ) + ) + + @pytest.mark.parametrize( + "func", + [ + "is_datetime_array", + "is_datetime64_array", + "is_bool_array", + "is_timedelta_or_timedelta64_array", + "is_date_array", + "is_time_array", + "is_interval_array", + "is_period_array", + ], + ) + def test_other_dtypes_for_array(self, func): + func = getattr(lib, func) + arr = np.array(["foo", "bar"]) + assert not func(arr) + + arr = np.array([1, 2]) + assert not func(arr) + + def test_date(self): + + dates = [date(2012, 1, day) for day in range(1, 20)] + index = Index(dates) + assert index.inferred_type == "date" + + dates = [date(2012, 1, day) for day in range(1, 20)] + [np.nan] + result = lib.infer_dtype(dates, skipna=False) + assert result == "mixed" + + result = lib.infer_dtype(dates, skipna=True) + assert result == "date" + + def test_is_numeric_array(self): + + assert lib.is_float_array(np.array([1, 2.0])) + assert lib.is_float_array(np.array([1, 2.0, np.nan])) + assert not lib.is_float_array(np.array([1, 2])) + + assert lib.is_integer_array(np.array([1, 2])) + assert not lib.is_integer_array(np.array([1, 2.0])) + + def test_is_string_array(self): + + assert lib.is_string_array(np.array(["foo", "bar"])) + assert not lib.is_string_array( + np.array(["foo", "bar", pd.NA], dtype=object), skipna=False + ) + assert lib.is_string_array( + np.array(["foo", "bar", pd.NA], dtype=object), skipna=True + ) + # NaN is not valid for string array, just NA + assert not lib.is_string_array( + np.array(["foo", "bar", np.nan], dtype=object), skipna=True + ) + + assert not lib.is_string_array(np.array([1, 2])) + + def test_to_object_array_tuples(self): + r = (5, 6) + values = [r] + lib.to_object_array_tuples(values) + + # make sure record array works + record = namedtuple("record", "x y") + r = record(5, 6) + values = [r] + lib.to_object_array_tuples(values) + + def test_object(self): + + # GH 7431 + # cannot infer more than this as only a single element + arr = np.array([None], dtype="O") + result = lib.infer_dtype(arr, skipna=False) + assert result == "mixed" + result = lib.infer_dtype(arr, skipna=True) + assert result == "empty" + + def test_to_object_array_width(self): + # see gh-13320 + rows = [[1, 2, 3], [4, 5, 6]] + + expected = np.array(rows, dtype=object) + out = lib.to_object_array(rows) + tm.assert_numpy_array_equal(out, expected) + + expected = np.array(rows, dtype=object) + out = lib.to_object_array(rows, min_width=1) + tm.assert_numpy_array_equal(out, expected) + + expected = np.array( + [[1, 2, 3, None, None], [4, 5, 6, None, None]], dtype=object + ) + out = lib.to_object_array(rows, min_width=5) + tm.assert_numpy_array_equal(out, expected) + + def test_is_period(self): + assert lib.is_period(pd.Period("2011-01", freq="M")) + assert not lib.is_period(pd.PeriodIndex(["2011-01"], freq="M")) + assert not lib.is_period(pd.Timestamp("2011-01")) + assert not lib.is_period(1) + assert not lib.is_period(np.nan) + + def test_categorical(self): + + # GH 8974 + arr = Categorical(list("abc")) + result = lib.infer_dtype(arr, skipna=True) + assert result == "categorical" + + result = lib.infer_dtype(Series(arr), skipna=True) + assert result == "categorical" + + arr = Categorical(list("abc"), categories=["cegfab"], ordered=True) + result = lib.infer_dtype(arr, skipna=True) + assert result == "categorical" + + result = lib.infer_dtype(Series(arr), skipna=True) + assert result == "categorical" + + def test_interval(self): + idx = pd.IntervalIndex.from_breaks(range(5), closed="both") + inferred = lib.infer_dtype(idx, skipna=False) + assert inferred == "interval" + + inferred = lib.infer_dtype(idx._data, skipna=False) + assert inferred == "interval" + + inferred = lib.infer_dtype(pd.Series(idx), skipna=False) + assert inferred == "interval" + + @pytest.mark.parametrize("klass", [pd.array, pd.Series]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("data", [["a", "b", "c"], ["a", "b", pd.NA]]) + def test_string_dtype(self, data, skipna, klass): + # StringArray + val = klass(data, dtype="string") + inferred = lib.infer_dtype(val, skipna=skipna) + assert inferred == "string" + + @pytest.mark.parametrize("klass", [pd.array, pd.Series]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("data", [[True, False, True], [True, False, pd.NA]]) + def test_boolean_dtype(self, data, skipna, klass): + # BooleanArray + val = klass(data, dtype="boolean") + inferred = lib.infer_dtype(val, skipna=skipna) + assert inferred == "boolean" + + +class TestNumberScalar: + def test_is_number(self): + + assert is_number(True) + assert is_number(1) + assert is_number(1.1) + assert is_number(1 + 3j) + assert is_number(np.bool(False)) + assert is_number(np.int64(1)) + assert is_number(np.float64(1.1)) + assert is_number(np.complex128(1 + 3j)) + assert is_number(np.nan) + + assert not is_number(None) + assert not is_number("x") + assert not is_number(datetime(2011, 1, 1)) + assert not is_number(np.datetime64("2011-01-01")) + assert not is_number(Timestamp("2011-01-01")) + assert not is_number(Timestamp("2011-01-01", tz="US/Eastern")) + assert not is_number(timedelta(1000)) + assert not is_number(Timedelta("1 days")) + + # questionable + assert not is_number(np.bool_(False)) + assert is_number(np.timedelta64(1, "D")) + + def test_is_bool(self): + assert is_bool(True) + assert is_bool(np.bool(False)) + assert is_bool(np.bool_(False)) + + assert not is_bool(1) + assert not is_bool(1.1) + assert not is_bool(1 + 3j) + assert not is_bool(np.int64(1)) + assert not is_bool(np.float64(1.1)) + assert not is_bool(np.complex128(1 + 3j)) + assert not is_bool(np.nan) + assert not is_bool(None) + assert not is_bool("x") + assert not is_bool(datetime(2011, 1, 1)) + assert not is_bool(np.datetime64("2011-01-01")) + assert not is_bool(Timestamp("2011-01-01")) + assert not is_bool(Timestamp("2011-01-01", tz="US/Eastern")) + assert not is_bool(timedelta(1000)) + assert not is_bool(np.timedelta64(1, "D")) + assert not is_bool(Timedelta("1 days")) + + def test_is_integer(self): + assert is_integer(1) + assert is_integer(np.int64(1)) + + assert not is_integer(True) + assert not is_integer(1.1) + assert not is_integer(1 + 3j) + assert not is_integer(np.bool(False)) + assert not is_integer(np.bool_(False)) + assert not is_integer(np.float64(1.1)) + assert not is_integer(np.complex128(1 + 3j)) + assert not is_integer(np.nan) + assert not is_integer(None) + assert not is_integer("x") + assert not is_integer(datetime(2011, 1, 1)) + assert not is_integer(np.datetime64("2011-01-01")) + assert not is_integer(Timestamp("2011-01-01")) + assert not is_integer(Timestamp("2011-01-01", tz="US/Eastern")) + assert not is_integer(timedelta(1000)) + assert not is_integer(Timedelta("1 days")) + assert not is_integer(np.timedelta64(1, "D")) + + def test_is_float(self): + assert is_float(1.1) + assert is_float(np.float64(1.1)) + assert is_float(np.nan) + + assert not is_float(True) + assert not is_float(1) + assert not is_float(1 + 3j) + assert not is_float(np.bool(False)) + assert not is_float(np.bool_(False)) + assert not is_float(np.int64(1)) + assert not is_float(np.complex128(1 + 3j)) + assert not is_float(None) + assert not is_float("x") + assert not is_float(datetime(2011, 1, 1)) + assert not is_float(np.datetime64("2011-01-01")) + assert not is_float(Timestamp("2011-01-01")) + assert not is_float(Timestamp("2011-01-01", tz="US/Eastern")) + assert not is_float(timedelta(1000)) + assert not is_float(np.timedelta64(1, "D")) + assert not is_float(Timedelta("1 days")) + + def test_is_datetime_dtypes(self): + + ts = pd.date_range("20130101", periods=3) + tsa = pd.date_range("20130101", periods=3, tz="US/Eastern") + + assert is_datetime64_dtype("datetime64") + assert is_datetime64_dtype("datetime64[ns]") + assert is_datetime64_dtype(ts) + assert not is_datetime64_dtype(tsa) + + assert not is_datetime64_ns_dtype("datetime64") + assert is_datetime64_ns_dtype("datetime64[ns]") + assert is_datetime64_ns_dtype(ts) + assert is_datetime64_ns_dtype(tsa) + + assert is_datetime64_any_dtype("datetime64") + assert is_datetime64_any_dtype("datetime64[ns]") + assert is_datetime64_any_dtype(ts) + assert is_datetime64_any_dtype(tsa) + + assert not is_datetime64tz_dtype("datetime64") + assert not is_datetime64tz_dtype("datetime64[ns]") + assert not is_datetime64tz_dtype(ts) + assert is_datetime64tz_dtype(tsa) + + for tz in ["US/Eastern", "UTC"]: + dtype = f"datetime64[ns, {tz}]" + assert not is_datetime64_dtype(dtype) + assert is_datetime64tz_dtype(dtype) + assert is_datetime64_ns_dtype(dtype) + assert is_datetime64_any_dtype(dtype) + + def test_is_timedelta(self): + assert is_timedelta64_dtype("timedelta64") + assert is_timedelta64_dtype("timedelta64[ns]") + assert not is_timedelta64_ns_dtype("timedelta64") + assert is_timedelta64_ns_dtype("timedelta64[ns]") + + tdi = TimedeltaIndex([1e14, 2e14], dtype="timedelta64[ns]") + assert is_timedelta64_dtype(tdi) + assert is_timedelta64_ns_dtype(tdi) + assert is_timedelta64_ns_dtype(tdi.astype("timedelta64[ns]")) + + # Conversion to Int64Index: + assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64")) + assert not is_timedelta64_ns_dtype(tdi.astype("timedelta64[h]")) + + +class TestIsScalar: + def test_is_scalar_builtin_scalars(self): + assert is_scalar(None) + assert is_scalar(True) + assert is_scalar(False) + assert is_scalar(Number()) + assert is_scalar(Fraction()) + assert is_scalar(0.0) + assert is_scalar(np.nan) + assert is_scalar("foobar") + assert is_scalar(b"foobar") + assert is_scalar(datetime(2014, 1, 1)) + assert is_scalar(date(2014, 1, 1)) + assert is_scalar(time(12, 0)) + assert is_scalar(timedelta(hours=1)) + assert is_scalar(pd.NaT) + + def test_is_scalar_builtin_nonscalars(self): + assert not is_scalar({}) + assert not is_scalar([]) + assert not is_scalar([1]) + assert not is_scalar(()) + assert not is_scalar((1,)) + assert not is_scalar(slice(None)) + assert not is_scalar(Ellipsis) + + def test_is_scalar_numpy_array_scalars(self): + assert is_scalar(np.int64(1)) + assert is_scalar(np.float64(1.0)) + assert is_scalar(np.int32(1)) + assert is_scalar(np.object_("foobar")) + assert is_scalar(np.str_("foobar")) + assert is_scalar(np.unicode_("foobar")) + assert is_scalar(np.bytes_(b"foobar")) + assert is_scalar(np.datetime64("2014-01-01")) + assert is_scalar(np.timedelta64(1, "h")) + + def test_is_scalar_numpy_zerodim_arrays(self): + for zerodim in [ + np.array(1), + np.array("foobar"), + np.array(np.datetime64("2014-01-01")), + np.array(np.timedelta64(1, "h")), + np.array(np.datetime64("NaT")), + ]: + assert not is_scalar(zerodim) + assert is_scalar(lib.item_from_zerodim(zerodim)) + + @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") + def test_is_scalar_numpy_arrays(self): + assert not is_scalar(np.array([])) + assert not is_scalar(np.array([[]])) + assert not is_scalar(np.matrix("1; 2")) + + def test_is_scalar_pandas_scalars(self): + assert is_scalar(Timestamp("2014-01-01")) + assert is_scalar(Timedelta(hours=1)) + assert is_scalar(Period("2014-01-01")) + assert is_scalar(Interval(left=0, right=1)) + assert is_scalar(DateOffset(days=1)) + + def test_is_scalar_pandas_containers(self): + assert not is_scalar(Series(dtype=object)) + assert not is_scalar(Series([1])) + assert not is_scalar(DataFrame()) + assert not is_scalar(DataFrame([[1]])) + assert not is_scalar(Index([])) + assert not is_scalar(Index([1])) + + +def test_datetimeindex_from_empty_datetime64_array(): + for unit in ["ms", "us", "ns"]: + idx = DatetimeIndex(np.array([], dtype=f"datetime64[{unit}]")) + assert len(idx) == 0 + + +def test_nan_to_nat_conversions(): + + df = DataFrame( + dict({"A": np.asarray(range(10), dtype="float64"), "B": Timestamp("20010101")}) + ) + df.iloc[3:6, :] = np.nan + result = df.loc[4, "B"].value + assert result == iNaT + + s = df["B"].copy() + s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) + assert isna(s[8]) + + assert s[8].value == np.datetime64("NaT").astype(np.int64) + + +@td.skip_if_no_scipy +@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") +def test_is_scipy_sparse(spmatrix): # noqa: F811 + assert is_scipy_sparse(spmatrix([[0, 1]])) + assert not is_scipy_sparse(np.array([1])) + + +def test_ensure_int32(): + values = np.arange(10, dtype=np.int32) + result = ensure_int32(values) + assert result.dtype == np.int32 + + values = np.arange(10, dtype=np.int64) + result = ensure_int32(values) + assert result.dtype == np.int32 + + +def test_ensure_categorical(): + values = np.arange(10, dtype=np.int32) + result = ensure_categorical(values) + assert result.dtype == "category" + + values = Categorical(values) + result = ensure_categorical(values) + tm.assert_categorical_equal(result, values) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py new file mode 100644 index 00000000..7ba59786 --- /dev/null +++ b/pandas/tests/dtypes/test_missing.py @@ -0,0 +1,586 @@ +from datetime import datetime +from decimal import Decimal + +import numpy as np +import pytest + +from pandas._config import config as cf + +from pandas._libs import missing as libmissing +from pandas._libs.tslibs import iNaT, is_null_datetimelike + +from pandas.core.dtypes.common import is_scalar +from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype +from pandas.core.dtypes.missing import ( + array_equivalent, + isna, + isnull, + na_value_for_dtype, + notna, + notnull, +) + +import pandas as pd +from pandas import DatetimeIndex, Float64Index, NaT, Series, TimedeltaIndex, date_range +import pandas._testing as tm + +now = pd.Timestamp.now() +utcnow = pd.Timestamp.now("UTC") + + +@pytest.mark.parametrize("notna_f", [notna, notnull]) +def test_notna_notnull(notna_f): + assert notna_f(1.0) + assert not notna_f(None) + assert not notna_f(np.NaN) + + with cf.option_context("mode.use_inf_as_na", False): + assert notna_f(np.inf) + assert notna_f(-np.inf) + + arr = np.array([1.5, np.inf, 3.5, -np.inf]) + result = notna_f(arr) + assert result.all() + + with cf.option_context("mode.use_inf_as_na", True): + assert not notna_f(np.inf) + assert not notna_f(-np.inf) + + arr = np.array([1.5, np.inf, 3.5, -np.inf]) + result = notna_f(arr) + assert result.sum() == 2 + + with cf.option_context("mode.use_inf_as_na", False): + for s in [ + tm.makeFloatSeries(), + tm.makeStringSeries(), + tm.makeObjectSeries(), + tm.makeTimeSeries(), + tm.makePeriodSeries(), + ]: + assert isinstance(notna_f(s), Series) + + +class TestIsNA: + def test_0d_array(self): + assert isna(np.array(np.nan)) + assert not isna(np.array(0.0)) + assert not isna(np.array(0)) + # test object dtype + assert isna(np.array(np.nan, dtype=object)) + assert not isna(np.array(0.0, dtype=object)) + assert not isna(np.array(0, dtype=object)) + + def test_empty_object(self): + + for shape in [(4, 0), (4,)]: + arr = np.empty(shape=shape, dtype=object) + result = isna(arr) + expected = np.ones(shape=shape, dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("isna_f", [isna, isnull]) + def test_isna_isnull(self, isna_f): + assert not isna_f(1.0) + assert isna_f(None) + assert isna_f(np.NaN) + assert float("nan") + assert not isna_f(np.inf) + assert not isna_f(-np.inf) + + # type + assert not isna_f(type(pd.Series(dtype=object))) + assert not isna_f(type(pd.Series(dtype=np.float64))) + assert not isna_f(type(pd.DataFrame())) + + # series + for s in [ + tm.makeFloatSeries(), + tm.makeStringSeries(), + tm.makeObjectSeries(), + tm.makeTimeSeries(), + tm.makePeriodSeries(), + ]: + assert isinstance(isna_f(s), Series) + + # frame + for df in [ + tm.makeTimeDataFrame(), + tm.makePeriodFrame(), + tm.makeMixedDataFrame(), + ]: + result = isna_f(df) + expected = df.apply(isna_f) + tm.assert_frame_equal(result, expected) + + def test_isna_lists(self): + result = isna([[False]]) + exp = np.array([[False]]) + tm.assert_numpy_array_equal(result, exp) + + result = isna([[1], [2]]) + exp = np.array([[False], [False]]) + tm.assert_numpy_array_equal(result, exp) + + # list of strings / unicode + result = isna(["foo", "bar"]) + exp = np.array([False, False]) + tm.assert_numpy_array_equal(result, exp) + + result = isna(["foo", "bar"]) + exp = np.array([False, False]) + tm.assert_numpy_array_equal(result, exp) + + # GH20675 + result = isna([np.NaN, "world"]) + exp = np.array([True, False]) + tm.assert_numpy_array_equal(result, exp) + + def test_isna_nat(self): + result = isna([NaT]) + exp = np.array([True]) + tm.assert_numpy_array_equal(result, exp) + + result = isna(np.array([NaT], dtype=object)) + exp = np.array([True]) + tm.assert_numpy_array_equal(result, exp) + + def test_isna_numpy_nat(self): + arr = np.array( + [ + NaT, + np.datetime64("NaT"), + np.timedelta64("NaT"), + np.datetime64("NaT", "s"), + ] + ) + result = isna(arr) + expected = np.array([True] * 4) + tm.assert_numpy_array_equal(result, expected) + + def test_isna_datetime(self): + assert not isna(datetime.now()) + assert notna(datetime.now()) + + idx = date_range("1/1/1990", periods=20) + exp = np.ones(len(idx), dtype=bool) + tm.assert_numpy_array_equal(notna(idx), exp) + + idx = np.asarray(idx) + idx[0] = iNaT + idx = DatetimeIndex(idx) + mask = isna(idx) + assert mask[0] + exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) + tm.assert_numpy_array_equal(mask, exp) + + # GH 9129 + pidx = idx.to_period(freq="M") + mask = isna(pidx) + assert mask[0] + exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) + tm.assert_numpy_array_equal(mask, exp) + + mask = isna(pidx[1:]) + exp = np.zeros(len(mask), dtype=bool) + tm.assert_numpy_array_equal(mask, exp) + + @pytest.mark.parametrize( + "value, expected", + [ + (np.complex128(np.nan), True), + (np.float64(1), False), + (np.array([1, 1 + 0j, np.nan, 3]), np.array([False, False, True, False])), + ( + np.array([1, 1 + 0j, np.nan, 3], dtype=object), + np.array([False, False, True, False]), + ), + ( + np.array([1, 1 + 0j, np.nan, 3]).astype(object), + np.array([False, False, True, False]), + ), + ], + ) + def test_complex(self, value, expected): + result = isna(value) + if is_scalar(result): + assert result is expected + else: + tm.assert_numpy_array_equal(result, expected) + + def test_datetime_other_units(self): + idx = pd.DatetimeIndex(["2011-01-01", "NaT", "2011-01-02"]) + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx.values), exp) + tm.assert_numpy_array_equal(notna(idx.values), ~exp) + + for dtype in [ + "datetime64[D]", + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + ]: + values = idx.values.astype(dtype) + + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(values), exp) + tm.assert_numpy_array_equal(notna(values), ~exp) + + exp = pd.Series([False, True, False]) + s = pd.Series(values) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + s = pd.Series(values, dtype=object) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + + def test_timedelta_other_units(self): + idx = pd.TimedeltaIndex(["1 days", "NaT", "2 days"]) + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx.values), exp) + tm.assert_numpy_array_equal(notna(idx.values), ~exp) + + for dtype in [ + "timedelta64[D]", + "timedelta64[h]", + "timedelta64[m]", + "timedelta64[s]", + "timedelta64[ms]", + "timedelta64[us]", + "timedelta64[ns]", + ]: + values = idx.values.astype(dtype) + + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(values), exp) + tm.assert_numpy_array_equal(notna(values), ~exp) + + exp = pd.Series([False, True, False]) + s = pd.Series(values) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + s = pd.Series(values, dtype=object) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + + def test_period(self): + idx = pd.PeriodIndex(["2011-01", "NaT", "2012-01"], freq="M") + exp = np.array([False, True, False]) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + + exp = pd.Series([False, True, False]) + s = pd.Series(idx) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + s = pd.Series(idx, dtype=object) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) + + +def test_array_equivalent(): + assert array_equivalent(np.array([np.nan, np.nan]), np.array([np.nan, np.nan])) + assert array_equivalent( + np.array([np.nan, 1, np.nan]), np.array([np.nan, 1, np.nan]) + ) + assert array_equivalent( + np.array([np.nan, None], dtype="object"), + np.array([np.nan, None], dtype="object"), + ) + # Check the handling of nested arrays in array_equivalent_object + assert array_equivalent( + np.array([np.array([np.nan, None], dtype="object"), None], dtype="object"), + np.array([np.array([np.nan, None], dtype="object"), None], dtype="object"), + ) + assert array_equivalent( + np.array([np.nan, 1 + 1j], dtype="complex"), + np.array([np.nan, 1 + 1j], dtype="complex"), + ) + assert not array_equivalent( + np.array([np.nan, 1 + 1j], dtype="complex"), + np.array([np.nan, 1 + 2j], dtype="complex"), + ) + assert not array_equivalent( + np.array([np.nan, 1, np.nan]), np.array([np.nan, 2, np.nan]) + ) + assert not array_equivalent(np.array(["a", "b", "c", "d"]), np.array(["e", "e"])) + assert array_equivalent(Float64Index([0, np.nan]), Float64Index([0, np.nan])) + assert not array_equivalent(Float64Index([0, np.nan]), Float64Index([1, np.nan])) + assert array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan])) + assert not array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan])) + assert array_equivalent(TimedeltaIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) + assert not array_equivalent( + TimedeltaIndex([0, np.nan]), TimedeltaIndex([1, np.nan]) + ) + assert array_equivalent( + DatetimeIndex([0, np.nan], tz="US/Eastern"), + DatetimeIndex([0, np.nan], tz="US/Eastern"), + ) + assert not array_equivalent( + DatetimeIndex([0, np.nan], tz="US/Eastern"), + DatetimeIndex([1, np.nan], tz="US/Eastern"), + ) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan], tz="US/Eastern") + ) + assert not array_equivalent( + DatetimeIndex([0, np.nan], tz="CET"), + DatetimeIndex([0, np.nan], tz="US/Eastern"), + ) + + assert not array_equivalent(DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) + + +@pytest.mark.parametrize( + "lvalue, rvalue", + [ + # There are 3 variants for each of lvalue and rvalue. We include all + # three for the tz-naive `now` and exclude the datetim64 variant + # for utcnow because it drops tzinfo. + (now, utcnow), + (now.to_datetime64(), utcnow), + (now.to_pydatetime(), utcnow), + (now, utcnow), + (now.to_datetime64(), utcnow.to_pydatetime()), + (now.to_pydatetime(), utcnow.to_pydatetime()), + ], +) +def test_array_equivalent_tzawareness(lvalue, rvalue): + # we shouldn't raise if comparing tzaware and tznaive datetimes + left = np.array([lvalue], dtype=object) + right = np.array([rvalue], dtype=object) + + assert not array_equivalent(left, right, strict_nan=True) + assert not array_equivalent(left, right, strict_nan=False) + + +def test_array_equivalent_compat(): + # see gh-13388 + m = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) + n = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) + assert array_equivalent(m, n, strict_nan=True) + assert array_equivalent(m, n, strict_nan=False) + + m = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) + n = np.array([(1, 2), (4, 3)], dtype=[("a", int), ("b", float)]) + assert not array_equivalent(m, n, strict_nan=True) + assert not array_equivalent(m, n, strict_nan=False) + + m = np.array([(1, 2), (3, 4)], dtype=[("a", int), ("b", float)]) + n = np.array([(1, 2), (3, 4)], dtype=[("b", int), ("a", float)]) + assert not array_equivalent(m, n, strict_nan=True) + assert not array_equivalent(m, n, strict_nan=False) + + +def test_array_equivalent_str(): + for dtype in ["O", "S", "U"]: + assert array_equivalent( + np.array(["A", "B"], dtype=dtype), np.array(["A", "B"], dtype=dtype) + ) + assert not array_equivalent( + np.array(["A", "B"], dtype=dtype), np.array(["A", "X"], dtype=dtype) + ) + + +def test_array_equivalent_nested(): + # reached in groupby aggregations, make sure we use np.any when checking + # if the comparison is truthy + left = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object) + right = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object) + + assert array_equivalent(left, right, strict_nan=True) + assert not array_equivalent(left, right[::-1], strict_nan=True) + + left = np.array([np.array([50, 50, 50]), np.array([40, 40, 40])], dtype=object) + right = np.array([50, 40]) + assert not array_equivalent(left, right, strict_nan=True) + + +@pytest.mark.parametrize( + "dtype, na_value", + [ + # Datetime-like + (np.dtype("M8[ns]"), NaT), + (np.dtype("m8[ns]"), NaT), + (DatetimeTZDtype.construct_from_string("datetime64[ns, US/Eastern]"), NaT), + (PeriodDtype("M"), NaT), + # Integer + ("u1", 0), + ("u2", 0), + ("u4", 0), + ("u8", 0), + ("i1", 0), + ("i2", 0), + ("i4", 0), + ("i8", 0), + # Bool + ("bool", False), + # Float + ("f2", np.nan), + ("f4", np.nan), + ("f8", np.nan), + # Object + ("O", np.nan), + # Interval + (IntervalDtype(), np.nan), + ], +) +def test_na_value_for_dtype(dtype, na_value): + result = na_value_for_dtype(dtype) + assert result is na_value + + +class TestNAObj: + + _1d_methods = ["isnaobj", "isnaobj_old"] + _2d_methods = ["isnaobj2d", "isnaobj2d_old"] + + def _check_behavior(self, arr, expected): + for method in TestNAObj._1d_methods: + result = getattr(libmissing, method)(arr) + tm.assert_numpy_array_equal(result, expected) + + arr = np.atleast_2d(arr) + expected = np.atleast_2d(expected) + + for method in TestNAObj._2d_methods: + result = getattr(libmissing, method)(arr) + tm.assert_numpy_array_equal(result, expected) + + def test_basic(self): + arr = np.array([1, None, "foo", -5.1, pd.NaT, np.nan]) + expected = np.array([False, True, False, False, True, True]) + + self._check_behavior(arr, expected) + + def test_non_obj_dtype(self): + arr = np.array([1, 3, np.nan, 5], dtype=float) + expected = np.array([False, False, True, False]) + + self._check_behavior(arr, expected) + + def test_empty_arr(self): + arr = np.array([]) + expected = np.array([], dtype=bool) + + self._check_behavior(arr, expected) + + def test_empty_str_inp(self): + arr = np.array([""]) # empty but not na + expected = np.array([False]) + + self._check_behavior(arr, expected) + + def test_empty_like(self): + # see gh-13717: no segfaults! + arr = np.empty_like([None]) + expected = np.array([True]) + + self._check_behavior(arr, expected) + + +m8_units = ["as", "ps", "ns", "us", "ms", "s", "m", "h", "D", "W", "M", "Y"] + +na_vals = ( + [ + None, + NaT, + float("NaN"), + complex("NaN"), + np.nan, + np.float64("NaN"), + np.float32("NaN"), + np.complex64(np.nan), + np.complex128(np.nan), + np.datetime64("NaT"), + np.timedelta64("NaT"), + ] + + [np.datetime64("NaT", unit) for unit in m8_units] + + [np.timedelta64("NaT", unit) for unit in m8_units] +) + +inf_vals = [ + float("inf"), + float("-inf"), + complex("inf"), + complex("-inf"), + np.inf, + np.NINF, +] + +int_na_vals = [ + # Values that match iNaT, which we treat as null in specific cases + np.int64(NaT.value), + int(NaT.value), +] + +sometimes_na_vals = [Decimal("NaN")] + +never_na_vals = [ + # float/complex values that when viewed as int64 match iNaT + -0.0, + np.float64("-0.0"), + -0j, + np.complex64(-0j), +] + + +class TestLibMissing: + def test_checknull(self): + for value in na_vals: + assert libmissing.checknull(value) + + for value in inf_vals: + assert not libmissing.checknull(value) + + for value in int_na_vals: + assert not libmissing.checknull(value) + + for value in sometimes_na_vals: + assert not libmissing.checknull(value) + + for value in never_na_vals: + assert not libmissing.checknull(value) + + def checknull_old(self): + for value in na_vals: + assert libmissing.checknull_old(value) + + for value in inf_vals: + assert libmissing.checknull_old(value) + + for value in int_na_vals: + assert not libmissing.checknull_old(value) + + for value in sometimes_na_vals: + assert not libmissing.checknull_old(value) + + for value in never_na_vals: + assert not libmissing.checknull_old(value) + + def test_is_null_datetimelike(self): + for value in na_vals: + assert is_null_datetimelike(value) + assert is_null_datetimelike(value, False) + + for value in inf_vals: + assert not is_null_datetimelike(value) + assert not is_null_datetimelike(value, False) + + for value in int_na_vals: + assert is_null_datetimelike(value) + assert not is_null_datetimelike(value, False) + + for value in sometimes_na_vals: + assert not is_null_datetimelike(value) + assert not is_null_datetimelike(value, False) + + for value in never_na_vals: + assert not is_null_datetimelike(value) diff --git a/pandas/tests/extension/__init__.py b/pandas/tests/extension/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/extension/arrow/__init__.py b/pandas/tests/extension/arrow/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py new file mode 100644 index 00000000..b0e5a6f8 --- /dev/null +++ b/pandas/tests/extension/arrow/arrays.py @@ -0,0 +1,190 @@ +"""Rudimentary Apache Arrow-backed ExtensionArray. + +At the moment, just a boolean array / type is implemented. +Eventually, we'll want to parametrize the type and support +multiple dtypes. Not all methods are implemented yet, and the +current implementation is not efficient. +""" +import copy +import itertools + +import numpy as np +import pyarrow as pa + +import pandas as pd +from pandas.api.extensions import ( + ExtensionArray, + ExtensionDtype, + register_extension_dtype, + take, +) + + +@register_extension_dtype +class ArrowBoolDtype(ExtensionDtype): + + type = np.bool_ + kind = "b" + name = "arrow_bool" + na_value = pa.NULL + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return ArrowBoolArray + + def _is_boolean(self): + return True + + +@register_extension_dtype +class ArrowStringDtype(ExtensionDtype): + + type = str + kind = "U" + name = "arrow_string" + na_value = pa.NULL + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError(f"Cannot construct a '{cls}' from '{string}'") + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return ArrowStringArray + + +class ArrowExtensionArray(ExtensionArray): + @classmethod + def from_scalars(cls, values): + arr = pa.chunked_array([pa.array(np.asarray(values))]) + return cls(arr) + + @classmethod + def from_array(cls, arr): + assert isinstance(arr, pa.Array) + return cls(pa.chunked_array([arr])) + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls.from_scalars(scalars) + + def __repr__(self): + return f"{type(self).__name__}({repr(self._data)})" + + def __getitem__(self, item): + if pd.api.types.is_scalar(item): + return self._data.to_pandas()[item] + else: + vals = self._data.to_pandas()[item] + return type(self).from_scalars(vals) + + def __len__(self): + return len(self._data) + + def astype(self, dtype, copy=True): + # needed to fix this astype for the Series constructor. + if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: + if copy: + return self.copy() + return self + return super().astype(dtype, copy) + + @property + def dtype(self): + return self._dtype + + @property + def nbytes(self): + return sum( + x.size + for chunk in self._data.chunks + for x in chunk.buffers() + if x is not None + ) + + def isna(self): + nas = pd.isna(self._data.to_pandas()) + return type(self).from_scalars(nas) + + def take(self, indices, allow_fill=False, fill_value=None): + data = self._data.to_pandas() + + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) + return self._from_sequence(result, dtype=self.dtype) + + def copy(self): + return type(self)(copy.copy(self._data)) + + @classmethod + def _concat_same_type(cls, to_concat): + chunks = list(itertools.chain.from_iterable(x._data.chunks for x in to_concat)) + arr = pa.chunked_array(chunks) + return cls(arr) + + def __invert__(self): + return type(self).from_scalars(~self._data.to_pandas()) + + def _reduce(self, method, skipna=True, **kwargs): + if skipna: + arr = self[~self.isna()] + else: + arr = self + + try: + op = getattr(arr, method) + except AttributeError: + raise TypeError + return op(**kwargs) + + def any(self, axis=0, out=None): + return self._data.to_pandas().any() + + def all(self, axis=0, out=None): + return self._data.to_pandas().all() + + +class ArrowBoolArray(ArrowExtensionArray): + def __init__(self, values): + if not isinstance(values, pa.ChunkedArray): + raise ValueError + + assert values.type == pa.bool_() + self._data = values + self._dtype = ArrowBoolDtype() + + +class ArrowStringArray(ArrowExtensionArray): + def __init__(self, values): + if not isinstance(values, pa.ChunkedArray): + raise ValueError + + assert values.type == pa.string() + self._data = values + self._dtype = ArrowStringDtype() diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py new file mode 100644 index 00000000..94dd09d3 --- /dev/null +++ b/pandas/tests/extension/arrow/test_bool.py @@ -0,0 +1,74 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension import base + +pytest.importorskip("pyarrow", minversion="0.13.0") + +from .arrays import ArrowBoolArray, ArrowBoolDtype # isort:skip + + +@pytest.fixture +def dtype(): + return ArrowBoolDtype() + + +@pytest.fixture +def data(): + values = np.random.randint(0, 2, size=100, dtype=bool) + values[1] = ~values[0] + return ArrowBoolArray.from_scalars(values) + + +@pytest.fixture +def data_missing(): + return ArrowBoolArray.from_scalars([None, True]) + + +class BaseArrowTests: + pass + + +class TestDtype(BaseArrowTests, base.BaseDtypeTests): + def test_array_type_with_arg(self, data, dtype): + pytest.skip("GH-22666") + + +class TestInterface(BaseArrowTests, base.BaseInterfaceTests): + def test_copy(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.copy() + + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + + +class TestConstructors(BaseArrowTests, base.BaseConstructorsTests): + def test_from_dtype(self, data): + pytest.skip("GH-22666") + + # seems like some bug in isna on empty BoolArray returning floats. + @pytest.mark.xfail(reason="bad is-na for empty data") + def test_from_sequence_from_cls(self, data): + super().test_from_sequence_from_cls(data) + + +class TestReduce(base.BaseNoReduceTests): + def test_reduce_series_boolean(self): + pass + + +class TestReduceBoolean(base.BaseBooleanReduceTests): + pass + + +def test_is_bool_dtype(data): + assert pd.api.types.is_bool_dtype(data) + assert pd.core.common.is_bool_indexer(data) + s = pd.Series(range(len(data))) + result = s[data] + expected = s[np.asarray(data)] + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/arrow/test_string.py b/pandas/tests/extension/arrow/test_string.py new file mode 100644 index 00000000..abd5c1f3 --- /dev/null +++ b/pandas/tests/extension/arrow/test_string.py @@ -0,0 +1,13 @@ +import pytest + +import pandas as pd + +pytest.importorskip("pyarrow", minversion="0.13.0") + +from .arrays import ArrowStringDtype # isort:skip + + +def test_constructor_from_list(): + # GH 27673 + result = pd.Series(["E"], dtype=ArrowStringDtype()) + assert isinstance(result.dtype, ArrowStringDtype) diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py new file mode 100644 index 00000000..e2b6ea03 --- /dev/null +++ b/pandas/tests/extension/base/__init__.py @@ -0,0 +1,65 @@ +"""Base test suite for extension arrays. + +These tests are intended for third-party libraries to subclass to validate +that their extension arrays and dtypes satisfy the interface. Moving or +renaming the tests should not be done lightly. + +Libraries are expected to implement a few pytest fixtures to provide data +for the tests. The fixtures may be located in either + +* The same module as your test class. +* A ``conftest.py`` in the same directory as your test class. + +The full list of fixtures may be found in the ``conftest.py`` next to this +file. + +.. code-block:: python + + import pytest + from pandas.tests.extension.base import BaseDtypeTests + + + @pytest.fixture + def dtype(): + return MyDtype() + + + class TestMyDtype(BaseDtypeTests): + pass + + +Your class ``TestDtype`` will inherit all the tests defined on +``BaseDtypeTests``. pytest's fixture discover will supply your ``dtype`` +wherever the test requires it. You're free to implement additional tests. + +All the tests in these modules use ``self.assert_frame_equal`` or +``self.assert_series_equal`` for dataframe or series comparisons. By default, +they use the usual ``pandas.testing.assert_frame_equal`` and +``pandas.testing.assert_series_equal``. You can override the checks used +by defining the staticmethods ``assert_frame_equal`` and +``assert_series_equal`` on your base test class. + +""" +from .casting import BaseCastingTests # noqa +from .constructors import BaseConstructorsTests # noqa +from .dtype import BaseDtypeTests # noqa +from .getitem import BaseGetitemTests # noqa +from .groupby import BaseGroupbyTests # noqa +from .interface import BaseInterfaceTests # noqa +from .io import BaseParsingTests # noqa +from .methods import BaseMethodsTests # noqa +from .missing import BaseMissingTests # noqa +from .ops import ( # noqa + BaseArithmeticOpsTests, + BaseComparisonOpsTests, + BaseOpsUtil, + BaseUnaryOpsTests, +) +from .printing import BasePrintingTests # noqa +from .reduce import ( # noqa + BaseBooleanReduceTests, + BaseNoReduceTests, + BaseNumericReduceTests, +) +from .reshaping import BaseReshapingTests # noqa +from .setitem import BaseSetitemTests # noqa diff --git a/pandas/tests/extension/base/base.py b/pandas/tests/extension/base/base.py new file mode 100644 index 00000000..144b0825 --- /dev/null +++ b/pandas/tests/extension/base/base.py @@ -0,0 +1,9 @@ +import pandas._testing as tm + + +class BaseExtensionTests: + + assert_equal = staticmethod(tm.assert_equal) + assert_series_equal = staticmethod(tm.assert_series_equal) + assert_frame_equal = staticmethod(tm.assert_frame_equal) + assert_extension_array_equal = staticmethod(tm.assert_extension_array_equal) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py new file mode 100644 index 00000000..58859fc6 --- /dev/null +++ b/pandas/tests/extension/base/casting.py @@ -0,0 +1,34 @@ +import numpy as np + +import pandas as pd +from pandas.core.internals import ObjectBlock + +from .base import BaseExtensionTests + + +class BaseCastingTests(BaseExtensionTests): + """Casting to and from ExtensionDtypes""" + + def test_astype_object_series(self, all_data): + ser = pd.Series({"A": all_data}) + result = ser.astype(object) + assert isinstance(result._data.blocks[0], ObjectBlock) + + def test_tolist(self, data): + result = pd.Series(data).tolist() + expected = list(data) + assert result == expected + + def test_astype_str(self, data): + result = pd.Series(data[:5]).astype(str) + expected = pd.Series(data[:5].astype(str)) + self.assert_series_equal(result, expected) + + def test_to_numpy(self, data): + expected = np.asarray(data) + + result = data.to_numpy() + self.assert_equal(result, expected) + + result = pd.Series(data).to_numpy() + self.assert_equal(result, expected) diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py new file mode 100644 index 00000000..c40646ca --- /dev/null +++ b/pandas/tests/extension/base/constructors.py @@ -0,0 +1,85 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.core.internals import ExtensionBlock + +from .base import BaseExtensionTests + + +class BaseConstructorsTests(BaseExtensionTests): + def test_from_sequence_from_cls(self, data): + result = type(data)._from_sequence(data, dtype=data.dtype) + self.assert_extension_array_equal(result, data) + + data = data[:0] + result = type(data)._from_sequence(data, dtype=data.dtype) + self.assert_extension_array_equal(result, data) + + def test_array_from_scalars(self, data): + scalars = [data[0], data[1], data[2]] + result = data._from_sequence(scalars) + assert isinstance(result, type(data)) + + def test_series_constructor(self, data): + result = pd.Series(data) + assert result.dtype == data.dtype + assert len(result) == len(data) + assert isinstance(result._data.blocks[0], ExtensionBlock) + assert result._data.blocks[0].values is data + + # Series[EA] is unboxed / boxed correctly + result2 = pd.Series(result) + assert result2.dtype == data.dtype + assert isinstance(result2._data.blocks[0], ExtensionBlock) + + @pytest.mark.parametrize("from_series", [True, False]) + def test_dataframe_constructor_from_dict(self, data, from_series): + if from_series: + data = pd.Series(data) + result = pd.DataFrame({"A": data}) + assert result.dtypes["A"] == data.dtype + assert result.shape == (len(data), 1) + assert isinstance(result._data.blocks[0], ExtensionBlock) + + def test_dataframe_from_series(self, data): + result = pd.DataFrame(pd.Series(data)) + assert result.dtypes[0] == data.dtype + assert result.shape == (len(data), 1) + assert isinstance(result._data.blocks[0], ExtensionBlock) + + def test_series_given_mismatched_index_raises(self, data): + msg = "Length of passed values is 3, index implies 5" + with pytest.raises(ValueError, match=msg): + pd.Series(data[:3], index=[0, 1, 2, 3, 4]) + + def test_from_dtype(self, data): + # construct from our dtype & string dtype + dtype = data.dtype + + expected = pd.Series(data) + result = pd.Series(list(data), dtype=dtype) + self.assert_series_equal(result, expected) + + result = pd.Series(list(data), dtype=str(dtype)) + self.assert_series_equal(result, expected) + + # gh-30280 + + expected = pd.DataFrame(data).astype(dtype) + result = pd.DataFrame(list(data), dtype=dtype) + self.assert_frame_equal(result, expected) + + result = pd.DataFrame(list(data), dtype=str(dtype)) + self.assert_frame_equal(result, expected) + + def test_pandas_array(self, data): + # pd.array(extension_array) should be idempotent... + result = pd.array(data) + self.assert_extension_array_equal(result, data) + + def test_pandas_array_dtype(self, data): + # ... but specifying dtype will override idempotency + result = pd.array(data, dtype=np.dtype(object)) + expected = pd.arrays.PandasArray(np.asarray(data, dtype=object)) + self.assert_equal(result, expected) diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py new file mode 100644 index 00000000..3cb3c25d --- /dev/null +++ b/pandas/tests/extension/base/dtype.py @@ -0,0 +1,111 @@ +import warnings + +import numpy as np +import pytest + +import pandas as pd + +from .base import BaseExtensionTests + + +class BaseDtypeTests(BaseExtensionTests): + """Base class for ExtensionDtype classes""" + + def test_name(self, dtype): + assert isinstance(dtype.name, str) + + def test_kind(self, dtype): + valid = set("biufcmMOSUV") + assert dtype.kind in valid + + def test_construct_from_string_own_name(self, dtype): + result = dtype.construct_from_string(dtype.name) + assert type(result) is type(dtype) + + # check OK as classmethod + result = type(dtype).construct_from_string(dtype.name) + assert type(result) is type(dtype) + + def test_is_dtype_from_name(self, dtype): + result = type(dtype).is_dtype(dtype.name) + assert result is True + + def test_is_dtype_unboxes_dtype(self, data, dtype): + assert dtype.is_dtype(data) is True + + def test_is_dtype_from_self(self, dtype): + result = type(dtype).is_dtype(dtype) + assert result is True + + def test_is_dtype_other_input(self, dtype): + assert dtype.is_dtype([1, 2, 3]) is False + + def test_is_not_string_type(self, dtype): + return not pd.api.types.is_string_dtype(dtype) + + def test_is_not_object_type(self, dtype): + return not pd.api.types.is_object_dtype(dtype) + + def test_eq_with_str(self, dtype): + assert dtype == dtype.name + assert dtype != dtype.name + "-suffix" + + def test_eq_with_numpy_object(self, dtype): + assert dtype != np.dtype("object") + + def test_eq_with_self(self, dtype): + assert dtype == dtype + assert dtype != object() + + def test_array_type(self, data, dtype): + assert dtype.construct_array_type() is type(data) + + def test_check_dtype(self, data): + dtype = data.dtype + + # check equivalency for using .dtypes + df = pd.DataFrame( + {"A": pd.Series(data, dtype=dtype), "B": data, "C": "foo", "D": 1} + ) + + # TODO(numpy-1.20): This warnings filter and if block can be removed + # once we require numpy>=1.20 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + result = df.dtypes == str(dtype) + # NumPy>=1.20.0, but not pandas.compat.numpy till there + # is a wheel available with this change. + try: + new_numpy_behavior = np.dtype("int64") != "Int64" + except TypeError: + new_numpy_behavior = True + + if dtype.name == "Int64" and not new_numpy_behavior: + expected = pd.Series([True, True, False, True], index=list("ABCD")) + else: + expected = pd.Series([True, True, False, False], index=list("ABCD")) + + self.assert_series_equal(result, expected) + + expected = pd.Series([True, True, False, False], index=list("ABCD")) + result = df.dtypes.apply(str) == str(dtype) + self.assert_series_equal(result, expected) + + def test_hashable(self, dtype): + hash(dtype) # no error + + def test_str(self, dtype): + assert str(dtype) == dtype.name + + def test_eq(self, dtype): + assert dtype == dtype.name + assert dtype != "anonther_type" + + def test_construct_from_string(self, dtype): + dtype_instance = type(dtype).construct_from_string(dtype.name) + assert isinstance(dtype_instance, type(dtype)) + + def test_construct_from_string_another_type_raises(self, dtype): + msg = f"Cannot construct a '{type(dtype).__name__}' from 'another_type'" + with pytest.raises(TypeError, match=msg): + type(dtype).construct_from_string("another_type") diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py new file mode 100644 index 00000000..b08a64cc --- /dev/null +++ b/pandas/tests/extension/base/getitem.py @@ -0,0 +1,372 @@ +import numpy as np +import pytest + +import pandas as pd + +from .base import BaseExtensionTests + + +class BaseGetitemTests(BaseExtensionTests): + """Tests for ExtensionArray.__getitem__.""" + + def test_iloc_series(self, data): + ser = pd.Series(data) + result = ser.iloc[:4] + expected = pd.Series(data[:4]) + self.assert_series_equal(result, expected) + + result = ser.iloc[[0, 1, 2, 3]] + self.assert_series_equal(result, expected) + + def test_iloc_frame(self, data): + df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")}) + expected = pd.DataFrame({"A": data[:4]}) + + # slice -> frame + result = df.iloc[:4, [0]] + self.assert_frame_equal(result, expected) + + # sequence -> frame + result = df.iloc[[0, 1, 2, 3], [0]] + self.assert_frame_equal(result, expected) + + expected = pd.Series(data[:4], name="A") + + # slice -> series + result = df.iloc[:4, 0] + self.assert_series_equal(result, expected) + + # sequence -> series + result = df.iloc[:4, 0] + self.assert_series_equal(result, expected) + + def test_loc_series(self, data): + ser = pd.Series(data) + result = ser.loc[:3] + expected = pd.Series(data[:4]) + self.assert_series_equal(result, expected) + + result = ser.loc[[0, 1, 2, 3]] + self.assert_series_equal(result, expected) + + def test_loc_frame(self, data): + df = pd.DataFrame({"A": data, "B": np.arange(len(data), dtype="int64")}) + expected = pd.DataFrame({"A": data[:4]}) + + # slice -> frame + result = df.loc[:3, ["A"]] + self.assert_frame_equal(result, expected) + + # sequence -> frame + result = df.loc[[0, 1, 2, 3], ["A"]] + self.assert_frame_equal(result, expected) + + expected = pd.Series(data[:4], name="A") + + # slice -> series + result = df.loc[:3, "A"] + self.assert_series_equal(result, expected) + + # sequence -> series + result = df.loc[:3, "A"] + self.assert_series_equal(result, expected) + + def test_loc_iloc_frame_single_dtype(self, data): + # GH#27110 bug in ExtensionBlock.iget caused df.iloc[n] to incorrectly + # return a scalar + df = pd.DataFrame({"A": data}) + expected = pd.Series([data[2]], index=["A"], name=2, dtype=data.dtype) + + result = df.loc[2] + self.assert_series_equal(result, expected) + + expected = pd.Series( + [data[-1]], index=["A"], name=len(data) - 1, dtype=data.dtype + ) + result = df.iloc[-1] + self.assert_series_equal(result, expected) + + def test_getitem_scalar(self, data): + result = data[0] + assert isinstance(result, data.dtype.type) + + result = pd.Series(data)[0] + assert isinstance(result, data.dtype.type) + + def test_getitem_scalar_na(self, data_missing, na_cmp, na_value): + result = data_missing[0] + assert na_cmp(result, na_value) + + def test_getitem_empty(self, data): + # Indexing with empty list + result = data[[]] + assert len(result) == 0 + assert isinstance(result, type(data)) + + expected = data[np.array([], dtype="int64")] + self.assert_extension_array_equal(result, expected) + + def test_getitem_mask(self, data): + # Empty mask, raw array + mask = np.zeros(len(data), dtype=bool) + result = data[mask] + assert len(result) == 0 + assert isinstance(result, type(data)) + + # Empty mask, in series + mask = np.zeros(len(data), dtype=bool) + result = pd.Series(data)[mask] + assert len(result) == 0 + assert result.dtype == data.dtype + + # non-empty mask, raw array + mask[0] = True + result = data[mask] + assert len(result) == 1 + assert isinstance(result, type(data)) + + # non-empty mask, in series + result = pd.Series(data)[mask] + assert len(result) == 1 + assert result.dtype == data.dtype + + def test_getitem_mask_raises(self, data): + mask = np.array([True, False]) + with pytest.raises(IndexError): + data[mask] + + mask = pd.array(mask, dtype="boolean") + with pytest.raises(IndexError): + data[mask] + + def test_getitem_boolean_array_mask(self, data): + mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") + result = data[mask] + assert len(result) == 0 + assert isinstance(result, type(data)) + + result = pd.Series(data)[mask] + assert len(result) == 0 + assert result.dtype == data.dtype + + mask[:5] = True + expected = data.take([0, 1, 2, 3, 4]) + result = data[mask] + self.assert_extension_array_equal(result, expected) + + expected = pd.Series(expected) + result = pd.Series(data)[mask] + self.assert_series_equal(result, expected) + + def test_getitem_boolean_na_treated_as_false(self, data): + # https://github.com/pandas-dev/pandas/issues/31503 + mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") + mask[:2] = pd.NA + mask[2:4] = True + + result = data[mask] + expected = data[mask.fillna(False)] + + self.assert_extension_array_equal(result, expected) + + s = pd.Series(data) + + result = s[mask] + expected = s[mask.fillna(False)] + + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_getitem_integer_array(self, data, idx): + result = data[idx] + assert len(result) == 3 + assert isinstance(result, type(data)) + expected = data.take([0, 1, 2]) + self.assert_extension_array_equal(result, expected) + + expected = pd.Series(expected) + result = pd.Series(data)[idx] + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2, pd.NA], pd.array([0, 1, 2, pd.NA], dtype="Int64")], + ids=["list", "integer-array"], + ) + def test_getitem_integer_with_missing_raises(self, data, idx): + msg = "Cannot index with an integer indexer containing NA values" + with pytest.raises(ValueError, match=msg): + data[idx] + + # TODO this raises KeyError about labels not found (it tries label-based) + # import pandas._testing as tm + # s = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))]) + # with pytest.raises(ValueError, match=msg): + # s[idx] + + def test_getitem_slice(self, data): + # getitem[slice] should return an array + result = data[slice(0)] # empty + assert isinstance(result, type(data)) + + result = data[slice(1)] # scalar + assert isinstance(result, type(data)) + + def test_get(self, data): + # GH 20882 + s = pd.Series(data, index=[2 * i for i in range(len(data))]) + assert s.get(4) == s.iloc[2] + + result = s.get([4, 6]) + expected = s.iloc[[2, 3]] + self.assert_series_equal(result, expected) + + result = s.get(slice(2)) + expected = s.iloc[[0, 1]] + self.assert_series_equal(result, expected) + + assert s.get(-1) is None + assert s.get(s.index.max() + 1) is None + + s = pd.Series(data[:6], index=list("abcdef")) + assert s.get("c") == s.iloc[2] + + result = s.get(slice("b", "d")) + expected = s.iloc[[1, 2, 3]] + self.assert_series_equal(result, expected) + + result = s.get("Z") + assert result is None + + assert s.get(4) == s.iloc[4] + assert s.get(-1) == s.iloc[-1] + assert s.get(len(s)) is None + + # GH 21257 + s = pd.Series(data) + s2 = s[::2] + assert s2.get(1) is None + + def test_take_sequence(self, data): + result = pd.Series(data)[[0, 1, 3]] + assert result.iloc[0] == data[0] + assert result.iloc[1] == data[1] + assert result.iloc[2] == data[3] + + def test_take(self, data, na_value, na_cmp): + result = data.take([0, -1]) + assert result.dtype == data.dtype + assert result[0] == data[0] + assert result[1] == data[-1] + + result = data.take([0, -1], allow_fill=True, fill_value=na_value) + assert result[0] == data[0] + assert na_cmp(result[1], na_value) + + with pytest.raises(IndexError, match="out of bounds"): + data.take([len(data) + 1]) + + def test_take_empty(self, data, na_value, na_cmp): + empty = data[:0] + + result = empty.take([-1], allow_fill=True) + assert na_cmp(result[0], na_value) + + with pytest.raises(IndexError): + empty.take([-1]) + + with pytest.raises(IndexError, match="cannot do a non-empty take"): + empty.take([0, 1]) + + def test_take_negative(self, data): + # https://github.com/pandas-dev/pandas/issues/20640 + n = len(data) + result = data.take([0, -n, n - 1, -1]) + expected = data.take([0, 0, n - 1, n - 1]) + self.assert_extension_array_equal(result, expected) + + def test_take_non_na_fill_value(self, data_missing): + fill_value = data_missing[1] # valid + na = data_missing[0] + + array = data_missing._from_sequence( + [na, fill_value, na], dtype=data_missing.dtype + ) + result = array.take([-1, 1], fill_value=fill_value, allow_fill=True) + expected = array.take([1, 1]) + self.assert_extension_array_equal(result, expected) + + def test_take_pandas_style_negative_raises(self, data, na_value): + with pytest.raises(ValueError): + data.take([0, -2], fill_value=na_value, allow_fill=True) + + @pytest.mark.parametrize("allow_fill", [True, False]) + def test_take_out_of_bounds_raises(self, data, allow_fill): + arr = data[:3] + with pytest.raises(IndexError): + arr.take(np.asarray([0, 3]), allow_fill=allow_fill) + + def test_take_series(self, data): + s = pd.Series(data) + result = s.take([0, -1]) + expected = pd.Series( + data._from_sequence([data[0], data[len(data) - 1]], dtype=s.dtype), + index=[0, len(data) - 1], + ) + self.assert_series_equal(result, expected) + + def test_reindex(self, data, na_value): + s = pd.Series(data) + result = s.reindex([0, 1, 3]) + expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3]) + self.assert_series_equal(result, expected) + + n = len(data) + result = s.reindex([-1, 0, n]) + expected = pd.Series( + data._from_sequence([na_value, data[0], na_value], dtype=s.dtype), + index=[-1, 0, n], + ) + self.assert_series_equal(result, expected) + + result = s.reindex([n, n + 1]) + expected = pd.Series( + data._from_sequence([na_value, na_value], dtype=s.dtype), index=[n, n + 1] + ) + self.assert_series_equal(result, expected) + + def test_reindex_non_na_fill_value(self, data_missing): + valid = data_missing[1] + na = data_missing[0] + + array = data_missing._from_sequence([na, valid], dtype=data_missing.dtype) + ser = pd.Series(array) + result = ser.reindex([0, 1, 2], fill_value=valid) + expected = pd.Series( + data_missing._from_sequence([na, valid, valid], dtype=data_missing.dtype) + ) + + self.assert_series_equal(result, expected) + + def test_loc_len1(self, data): + # see GH-27785 take_nd with indexer of len 1 resulting in wrong ndim + df = pd.DataFrame({"A": data}) + res = df.loc[[0], "A"] + assert res._data._block.ndim == 1 + + def test_item(self, data): + # https://github.com/pandas-dev/pandas/pull/30175 + s = pd.Series(data) + result = s[:1].item() + assert result == data[0] + + msg = "can only convert an array of size 1 to a Python scalar" + with pytest.raises(ValueError, match=msg): + s[:0].item() + + with pytest.raises(ValueError, match=msg): + s.item() diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py new file mode 100644 index 00000000..94d0ef7b --- /dev/null +++ b/pandas/tests/extension/base/groupby.py @@ -0,0 +1,91 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + +from .base import BaseExtensionTests + + +class BaseGroupbyTests(BaseExtensionTests): + """Groupby-specific tests.""" + + def test_grouping_grouper(self, data_for_grouping): + df = pd.DataFrame( + {"A": ["B", "B", None, None, "A", "A", "B", "C"], "B": data_for_grouping} + ) + gr1 = df.groupby("A").grouper.groupings[0] + gr2 = df.groupby("B").grouper.groupings[0] + + tm.assert_numpy_array_equal(gr1.grouper, df.A.values) + tm.assert_extension_array_equal(gr2.grouper, data_for_grouping) + + @pytest.mark.parametrize("as_index", [True, False]) + def test_groupby_extension_agg(self, as_index, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + result = df.groupby("B", as_index=as_index).A.mean() + _, index = pd.factorize(data_for_grouping, sort=True) + + index = pd.Index(index, name="B") + expected = pd.Series([3, 1, 4], index=index, name="A") + if as_index: + self.assert_series_equal(result, expected) + else: + expected = expected.reset_index() + self.assert_frame_equal(result, expected) + + def test_groupby_extension_no_sort(self, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + result = df.groupby("B", sort=False).A.mean() + _, index = pd.factorize(data_for_grouping, sort=False) + + index = pd.Index(index, name="B") + expected = pd.Series([1, 3, 4], index=index, name="A") + self.assert_series_equal(result, expected) + + def test_groupby_extension_transform(self, data_for_grouping): + valid = data_for_grouping[~data_for_grouping.isna()] + df = pd.DataFrame({"A": [1, 1, 3, 3, 1, 4], "B": valid}) + + result = df.groupby("B").A.transform(len) + expected = pd.Series([3, 3, 2, 2, 3, 1], name="A") + + self.assert_series_equal(result, expected) + + def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + df.groupby("B").apply(groupby_apply_op) + df.groupby("B").A.apply(groupby_apply_op) + df.groupby("A").apply(groupby_apply_op) + df.groupby("A").B.apply(groupby_apply_op) + + def test_groupby_apply_identity(self, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) + result = df.groupby("A").B.apply(lambda x: x.array) + expected = pd.Series( + [ + df.B.iloc[[0, 1, 6]].array, + df.B.iloc[[2, 3]].array, + df.B.iloc[[4, 5]].array, + df.B.iloc[[7]].array, + ], + index=pd.Index([1, 2, 3, 4], name="A"), + name="B", + ) + self.assert_series_equal(result, expected) + + def test_in_numeric_groupby(self, data_for_grouping): + df = pd.DataFrame( + { + "A": [1, 1, 2, 2, 3, 3, 1, 4], + "B": data_for_grouping, + "C": [1, 1, 1, 1, 1, 1, 1, 1], + } + ) + result = df.groupby("A").sum().columns + + if data_for_grouping.dtype._is_numeric: + expected = pd.Index(["B", "C"]) + else: + expected = pd.Index(["C"]) + + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py new file mode 100644 index 00000000..95fb3d74 --- /dev/null +++ b/pandas/tests/extension/base/interface.py @@ -0,0 +1,95 @@ +import numpy as np + +from pandas.core.dtypes.common import is_extension_array_dtype +from pandas.core.dtypes.dtypes import ExtensionDtype + +import pandas as pd +import pandas._testing as tm + +from .base import BaseExtensionTests + + +class BaseInterfaceTests(BaseExtensionTests): + """Tests that the basic interface is satisfied.""" + + # ------------------------------------------------------------------------ + # Interface + # ------------------------------------------------------------------------ + + def test_len(self, data): + assert len(data) == 100 + + def test_size(self, data): + assert data.size == 100 + + def test_ndim(self, data): + assert data.ndim == 1 + + def test_can_hold_na_valid(self, data): + # GH-20761 + assert data._can_hold_na is True + + def test_memory_usage(self, data): + s = pd.Series(data) + result = s.memory_usage(index=False) + assert result == s.nbytes + + def test_array_interface(self, data): + result = np.array(data) + assert result[0] == data[0] + + result = np.array(data, dtype=object) + expected = np.array(list(data), dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_is_extension_array_dtype(self, data): + assert is_extension_array_dtype(data) + assert is_extension_array_dtype(data.dtype) + assert is_extension_array_dtype(pd.Series(data)) + assert isinstance(data.dtype, ExtensionDtype) + + def test_no_values_attribute(self, data): + # GH-20735: EA's with .values attribute give problems with internal + # code, disallowing this for now until solved + assert not hasattr(data, "values") + assert not hasattr(data, "_values") + + def test_is_numeric_honored(self, data): + result = pd.Series(data) + assert result._data.blocks[0].is_numeric is data.dtype._is_numeric + + def test_isna_extension_array(self, data_missing): + # If your `isna` returns an ExtensionArray, you must also implement + # _reduce. At the *very* least, you must implement any and all + na = data_missing.isna() + if is_extension_array_dtype(na): + assert na._reduce("any") + assert na.any() + + assert not na._reduce("all") + assert not na.all() + + assert na.dtype._is_boolean + + def test_copy(self, data): + # GH#27083 removing deep keyword from EA.copy + assert data[0] != data[1] + result = data.copy() + + data[1] = data[0] + assert result[1] != result[0] + + def test_view(self, data): + # view with no dtype should return a shallow copy, *not* the same + # object + assert data[1] != data[0] + + result = data.view() + assert result is not data + assert type(result) == type(data) + + result[1] = result[0] + assert data[1] == data[0] + + # check specifically that the `dtype` kwarg is accepted + data.view(dtype=None) diff --git a/pandas/tests/extension/base/io.py b/pandas/tests/extension/base/io.py new file mode 100644 index 00000000..3de752a8 --- /dev/null +++ b/pandas/tests/extension/base/io.py @@ -0,0 +1,20 @@ +from io import StringIO + +import numpy as np +import pytest + +import pandas as pd + +from .base import BaseExtensionTests + + +class BaseParsingTests(BaseExtensionTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data): + df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))}) + csv_output = df.to_csv(index=False, na_rep=np.nan) + result = pd.read_csv( + StringIO(csv_output), dtype={"with_dtype": str(data.dtype)}, engine=engine + ) + expected = df + self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py new file mode 100644 index 00000000..6ed8b782 --- /dev/null +++ b/pandas/tests/extension/base/methods.py @@ -0,0 +1,397 @@ +import operator + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_bool_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.sorting import nargsort + +from .base import BaseExtensionTests + + +class BaseMethodsTests(BaseExtensionTests): + """Various Series and DataFrame methods.""" + + @pytest.mark.parametrize("dropna", [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + + self.assert_series_equal(result, expected) + + def test_count(self, data_missing): + df = pd.DataFrame({"A": data_missing}) + result = df.count(axis="columns") + expected = pd.Series([0, 1]) + self.assert_series_equal(result, expected) + + def test_series_count(self, data_missing): + # GH#26835 + ser = pd.Series(data_missing) + result = ser.count() + expected = 1 + assert result == expected + + def test_apply_simple_series(self, data): + result = pd.Series(data).apply(id) + assert isinstance(result, pd.Series) + + def test_argsort(self, data_for_sorting): + result = pd.Series(data_for_sorting).argsort() + expected = pd.Series(np.array([2, 0, 1], dtype=np.int64)) + self.assert_series_equal(result, expected) + + def test_argsort_missing_array(self, data_missing_for_sorting): + result = data_missing_for_sorting.argsort() + expected = np.array([2, 0, 1], dtype=np.dtype("int")) + # we don't care whether it's int32 or int64 + result = result.astype("int64", casting="safe") + expected = expected.astype("int64", casting="safe") + tm.assert_numpy_array_equal(result, expected) + + def test_argsort_missing(self, data_missing_for_sorting): + result = pd.Series(data_missing_for_sorting).argsort() + expected = pd.Series(np.array([1, -1, 0], dtype=np.int64)) + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "na_position, expected", + [ + ("last", np.array([2, 0, 1], dtype=np.dtype("intp"))), + ("first", np.array([1, 2, 0], dtype=np.dtype("intp"))), + ], + ) + def test_nargsort(self, data_missing_for_sorting, na_position, expected): + # GH 25439 + result = nargsort(data_missing_for_sorting, na_position=na_position) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values(self, data_for_sorting, ascending): + ser = pd.Series(data_for_sorting) + result = ser.sort_values(ascending=ascending) + expected = ser.iloc[[2, 0, 1]] + if not ascending: + expected = expected[::-1] + + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_missing(self, data_missing_for_sorting, ascending): + ser = pd.Series(data_missing_for_sorting) + result = ser.sort_values(ascending=ascending) + if ascending: + expected = ser.iloc[[2, 0, 1]] + else: + expected = ser.iloc[[0, 2, 1]] + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_frame(self, data_for_sorting, ascending): + df = pd.DataFrame({"A": [1, 2, 1], "B": data_for_sorting}) + result = df.sort_values(["A", "B"]) + expected = pd.DataFrame( + {"A": [1, 1, 2], "B": data_for_sorting.take([2, 0, 1])}, index=[2, 0, 1] + ) + self.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) + @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) + def test_unique(self, data, box, method): + duplicated = box(data._from_sequence([data[0], data[0]])) + + result = method(duplicated) + + assert len(result) == 1 + assert isinstance(result, type(data)) + assert result[0] == duplicated[0] + + @pytest.mark.parametrize("na_sentinel", [-1, -2]) + def test_factorize(self, data_for_grouping, na_sentinel): + codes, uniques = pd.factorize(data_for_grouping, na_sentinel=na_sentinel) + expected_codes = np.array( + [0, 0, na_sentinel, na_sentinel, 1, 1, 0, 2], dtype=np.intp + ) + expected_uniques = data_for_grouping.take([0, 4, 7]) + + tm.assert_numpy_array_equal(codes, expected_codes) + self.assert_extension_array_equal(uniques, expected_uniques) + + @pytest.mark.parametrize("na_sentinel", [-1, -2]) + def test_factorize_equivalence(self, data_for_grouping, na_sentinel): + codes_1, uniques_1 = pd.factorize(data_for_grouping, na_sentinel=na_sentinel) + codes_2, uniques_2 = data_for_grouping.factorize(na_sentinel=na_sentinel) + + tm.assert_numpy_array_equal(codes_1, codes_2) + self.assert_extension_array_equal(uniques_1, uniques_2) + + def test_factorize_empty(self, data): + codes, uniques = pd.factorize(data[:0]) + expected_codes = np.array([], dtype=np.intp) + expected_uniques = type(data)._from_sequence([], dtype=data[:0].dtype) + + tm.assert_numpy_array_equal(codes, expected_codes) + self.assert_extension_array_equal(uniques, expected_uniques) + + def test_fillna_copy_frame(self, data_missing): + arr = data_missing.take([1, 1]) + df = pd.DataFrame({"A": arr}) + + filled_val = df.iloc[0, 0] + result = df.fillna(filled_val) + + assert df.A.values is not result.A.values + + def test_fillna_copy_series(self, data_missing): + arr = data_missing.take([1, 1]) + ser = pd.Series(arr) + + filled_val = ser[0] + result = ser.fillna(filled_val) + + assert ser._values is not result._values + assert ser._values is arr + + def test_fillna_length_mismatch(self, data_missing): + msg = "Length of 'value' does not match." + with pytest.raises(ValueError, match=msg): + data_missing.fillna(data_missing.take([1])) + + def test_combine_le(self, data_repeated): + # GH 20825 + # Test that combine works when doing a <= (le) comparison + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series( + [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))] + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 <= x2) + expected = pd.Series([a <= val for a in list(orig_data1)]) + self.assert_series_equal(result, expected) + + def test_combine_add(self, data_repeated): + # GH 20825 + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 + x2) + with np.errstate(over="ignore"): + expected = pd.Series( + orig_data1._from_sequence( + [a + b for (a, b) in zip(list(orig_data1), list(orig_data2))] + ) + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 + x2) + expected = pd.Series( + orig_data1._from_sequence([a + val for a in list(orig_data1)]) + ) + self.assert_series_equal(result, expected) + + def test_combine_first(self, data): + # https://github.com/pandas-dev/pandas/issues/24147 + a = pd.Series(data[:3]) + b = pd.Series(data[2:5], index=[2, 3, 4]) + result = a.combine_first(b) + expected = pd.Series(data[:5]) + self.assert_series_equal(result, expected) + + @pytest.mark.parametrize("frame", [True, False]) + @pytest.mark.parametrize( + "periods, indices", + [(-2, [2, 3, 4, -1, -1]), (0, [0, 1, 2, 3, 4]), (2, [-1, -1, 0, 1, 2])], + ) + def test_container_shift(self, data, frame, periods, indices): + # https://github.com/pandas-dev/pandas/issues/22386 + subset = data[:5] + data = pd.Series(subset, name="A") + expected = pd.Series(subset.take(indices, allow_fill=True), name="A") + + if frame: + result = data.to_frame(name="A").assign(B=1).shift(periods) + expected = pd.concat( + [expected, pd.Series([1] * 5, name="B").shift(periods)], axis=1 + ) + compare = self.assert_frame_equal + else: + result = data.shift(periods) + compare = self.assert_series_equal + + compare(result, expected) + + @pytest.mark.parametrize("periods", [1, -2]) + def test_diff(self, data, periods): + data = data[:5] + if is_bool_dtype(data.dtype): + op = operator.xor + else: + op = operator.sub + try: + # does this array implement ops? + op(data, data) + except Exception: + pytest.skip(f"{type(data)} does not support diff") + s = pd.Series(data) + result = s.diff(periods) + expected = pd.Series(op(data, data.shift(periods))) + self.assert_series_equal(result, expected) + + df = pd.DataFrame({"A": data, "B": [1.0] * 5}) + result = df.diff(periods) + if periods == 1: + b = [np.nan, 0, 0, 0, 0] + else: + b = [0, 0, 0, np.nan, np.nan] + expected = pd.DataFrame({"A": expected, "B": b}) + self.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "periods, indices", + [[-4, [-1, -1]], [-1, [1, -1]], [0, [0, 1]], [1, [-1, 0]], [4, [-1, -1]]], + ) + def test_shift_non_empty_array(self, data, periods, indices): + # https://github.com/pandas-dev/pandas/issues/23911 + subset = data[:2] + result = subset.shift(periods) + expected = subset.take(indices, allow_fill=True) + self.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize("periods", [-4, -1, 0, 1, 4]) + def test_shift_empty_array(self, data, periods): + # https://github.com/pandas-dev/pandas/issues/23911 + empty = data[:0] + result = empty.shift(periods) + expected = empty + self.assert_extension_array_equal(result, expected) + + def test_shift_zero_copies(self, data): + result = data.shift(0) + assert result is not data + + result = data[:0].shift(2) + assert result is not data + + def test_shift_fill_value(self, data): + arr = data[:4] + fill_value = data[0] + result = arr.shift(1, fill_value=fill_value) + expected = data.take([0, 0, 1, 2]) + self.assert_extension_array_equal(result, expected) + + result = arr.shift(-2, fill_value=fill_value) + expected = data.take([2, 3, 0, 0]) + self.assert_extension_array_equal(result, expected) + + def test_hash_pandas_object_works(self, data, as_frame): + # https://github.com/pandas-dev/pandas/issues/23066 + data = pd.Series(data) + if as_frame: + data = data.to_frame() + a = pd.util.hash_pandas_object(data) + b = pd.util.hash_pandas_object(data) + self.assert_equal(a, b) + + def test_searchsorted(self, data_for_sorting, as_series): + b, c, a = data_for_sorting + arr = type(data_for_sorting)._from_sequence([a, b, c]) + + if as_series: + arr = pd.Series(arr) + assert arr.searchsorted(a) == 0 + assert arr.searchsorted(a, side="right") == 1 + + assert arr.searchsorted(b) == 1 + assert arr.searchsorted(b, side="right") == 2 + + assert arr.searchsorted(c) == 2 + assert arr.searchsorted(c, side="right") == 3 + + result = arr.searchsorted(arr.take([0, 2])) + expected = np.array([0, 2], dtype=np.intp) + + tm.assert_numpy_array_equal(result, expected) + + # sorter + sorter = np.array([1, 2, 0]) + assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 + + def test_where_series(self, data, na_value, as_frame): + assert data[0] != data[1] + cls = type(data) + a, b = data[:2] + + ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype)) + cond = np.array([True, True, False, False]) + + if as_frame: + ser = ser.to_frame(name="a") + cond = cond.reshape(-1, 1) + + result = ser.where(cond) + expected = pd.Series( + cls._from_sequence([a, a, na_value, na_value], dtype=data.dtype) + ) + + if as_frame: + expected = expected.to_frame(name="a") + self.assert_equal(result, expected) + + # array other + cond = np.array([True, False, True, True]) + other = cls._from_sequence([a, b, a, b], dtype=data.dtype) + if as_frame: + other = pd.DataFrame({"a": other}) + cond = pd.DataFrame({"a": cond}) + result = ser.where(cond, other) + expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) + if as_frame: + expected = expected.to_frame(name="a") + self.assert_equal(result, expected) + + @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) + def test_repeat(self, data, repeats, as_series, use_numpy): + arr = type(data)._from_sequence(data[:3], dtype=data.dtype) + if as_series: + arr = pd.Series(arr) + + result = np.repeat(arr, repeats) if use_numpy else arr.repeat(repeats) + + repeats = [repeats] * 3 if isinstance(repeats, int) else repeats + expected = [x for x, n in zip(arr, repeats) for _ in range(n)] + expected = type(data)._from_sequence(expected, dtype=data.dtype) + if as_series: + expected = pd.Series(expected, index=arr.index.repeat(repeats)) + + self.assert_equal(result, expected) + + @pytest.mark.parametrize( + "repeats, kwargs, error, msg", + [ + (2, dict(axis=1), ValueError, "'axis"), + (-1, dict(), ValueError, "negative"), + ([1, 2], dict(), ValueError, "shape"), + (2, dict(foo="bar"), TypeError, "'foo'"), + ], + ) + def test_repeat_raises(self, data, repeats, kwargs, error, msg, use_numpy): + with pytest.raises(error, match=msg): + if use_numpy: + np.repeat(data, repeats, **kwargs) + else: + data.repeat(repeats, **kwargs) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py new file mode 100644 index 00000000..2393d2ed --- /dev/null +++ b/pandas/tests/extension/base/missing.py @@ -0,0 +1,129 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm + +from .base import BaseExtensionTests + + +class BaseMissingTests(BaseExtensionTests): + def test_isna(self, data_missing): + expected = np.array([True, False]) + + result = pd.isna(data_missing) + tm.assert_numpy_array_equal(result, expected) + + result = pd.Series(data_missing).isna() + expected = pd.Series(expected) + self.assert_series_equal(result, expected) + + # GH 21189 + result = pd.Series(data_missing).drop([0, 1]).isna() + expected = pd.Series([], dtype=bool) + self.assert_series_equal(result, expected) + + def test_dropna_array(self, data_missing): + result = data_missing.dropna() + expected = data_missing[[1]] + self.assert_extension_array_equal(result, expected) + + def test_dropna_series(self, data_missing): + ser = pd.Series(data_missing) + result = ser.dropna() + expected = ser.iloc[[1]] + self.assert_series_equal(result, expected) + + def test_dropna_frame(self, data_missing): + df = pd.DataFrame({"A": data_missing}) + + # defaults + result = df.dropna() + expected = df.iloc[[1]] + self.assert_frame_equal(result, expected) + + # axis = 1 + result = df.dropna(axis="columns") + expected = pd.DataFrame(index=[0, 1]) + self.assert_frame_equal(result, expected) + + # multiple + df = pd.DataFrame({"A": data_missing, "B": [1, np.nan]}) + result = df.dropna() + expected = df.iloc[:0] + self.assert_frame_equal(result, expected) + + def test_fillna_scalar(self, data_missing): + valid = data_missing[1] + result = data_missing.fillna(valid) + expected = data_missing.fillna(valid) + self.assert_extension_array_equal(result, expected) + + def test_fillna_limit_pad(self, data_missing): + arr = data_missing.take([1, 0, 0, 0, 1]) + result = pd.Series(arr).fillna(method="ffill", limit=2) + expected = pd.Series(data_missing.take([1, 1, 1, 0, 1])) + self.assert_series_equal(result, expected) + + def test_fillna_limit_backfill(self, data_missing): + arr = data_missing.take([1, 0, 0, 0, 1]) + result = pd.Series(arr).fillna(method="backfill", limit=2) + expected = pd.Series(data_missing.take([1, 0, 1, 1, 1])) + self.assert_series_equal(result, expected) + + def test_fillna_series(self, data_missing): + fill_value = data_missing[1] + ser = pd.Series(data_missing) + + result = ser.fillna(fill_value) + expected = pd.Series( + data_missing._from_sequence( + [fill_value, fill_value], dtype=data_missing.dtype + ) + ) + self.assert_series_equal(result, expected) + + # Fill with a series + result = ser.fillna(expected) + self.assert_series_equal(result, expected) + + # Fill with a series not affecting the missing values + result = ser.fillna(ser) + self.assert_series_equal(result, ser) + + def test_fillna_series_method(self, data_missing, fillna_method): + fill_value = data_missing[1] + + if fillna_method == "ffill": + data_missing = data_missing[::-1] + + result = pd.Series(data_missing).fillna(method=fillna_method) + expected = pd.Series( + data_missing._from_sequence( + [fill_value, fill_value], dtype=data_missing.dtype + ) + ) + + self.assert_series_equal(result, expected) + + def test_fillna_frame(self, data_missing): + fill_value = data_missing[1] + + result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value) + + expected = pd.DataFrame( + { + "A": data_missing._from_sequence( + [fill_value, fill_value], dtype=data_missing.dtype + ), + "B": [1, 2], + } + ) + + self.assert_frame_equal(result, expected) + + def test_fillna_fill_other(self, data): + result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0}) + + expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)}) + + self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py new file mode 100644 index 00000000..0609f19c --- /dev/null +++ b/pandas/tests/extension/base/ops.py @@ -0,0 +1,178 @@ +import operator +from typing import Optional, Type + +import pytest + +import pandas as pd +from pandas.core import ops + +from .base import BaseExtensionTests + + +class BaseOpsUtil(BaseExtensionTests): + def get_op_from_name(self, op_name): + short_opname = op_name.strip("_") + try: + op = getattr(operator, short_opname) + except AttributeError: + # Assume it is the reverse operator + rop = getattr(operator, short_opname[1:]) + op = lambda x, y: rop(y, x) + + return op + + def check_opname(self, s, op_name, other, exc=Exception): + op = self.get_op_from_name(op_name) + + self._check_op(s, op, other, op_name, exc) + + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + result = op(s, other) + expected = s.combine(other, op) + self.assert_series_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def _check_divmod_op(self, s, op, other, exc=Exception): + # divmod has multiple return values, so check separately + if exc is None: + result_div, result_mod = op(s, other) + if op is divmod: + expected_div, expected_mod = s // other, s % other + else: + expected_div, expected_mod = other // s, other % s + self.assert_series_equal(result_div, expected_div) + self.assert_series_equal(result_mod, expected_mod) + else: + with pytest.raises(exc): + divmod(s, other) + + +class BaseArithmeticOpsTests(BaseOpsUtil): + """Various Series and DataFrame arithmetic ops methods. + + Subclasses supporting various ops should set the class variables + to indicate that they support ops of that kind + + * series_scalar_exc = TypeError + * frame_scalar_exc = TypeError + * series_array_exc = TypeError + * divmod_exc = TypeError + """ + + series_scalar_exc: Optional[Type[TypeError]] = TypeError + frame_scalar_exc: Optional[Type[TypeError]] = TypeError + series_array_exc: Optional[Type[TypeError]] = TypeError + divmod_exc: Optional[Type[TypeError]] = TypeError + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + # series & scalar + op_name = all_arithmetic_operators + s = pd.Series(data) + self.check_opname(s, op_name, s.iloc[0], exc=self.series_scalar_exc) + + @pytest.mark.xfail(run=False, reason="_reduce needs implementation") + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): + # frame & scalar + op_name = all_arithmetic_operators + df = pd.DataFrame({"A": data}) + self.check_opname(df, op_name, data[0], exc=self.frame_scalar_exc) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + # ndarray & other series + op_name = all_arithmetic_operators + s = pd.Series(data) + self.check_opname( + s, op_name, pd.Series([s.iloc[0]] * len(s)), exc=self.series_array_exc + ) + + def test_divmod(self, data): + s = pd.Series(data) + self._check_divmod_op(s, divmod, 1, exc=self.divmod_exc) + self._check_divmod_op(1, ops.rdivmod, s, exc=self.divmod_exc) + + def test_divmod_series_array(self, data, data_for_twos): + s = pd.Series(data) + self._check_divmod_op(s, divmod, data) + + other = data_for_twos + self._check_divmod_op(other, ops.rdivmod, s) + + other = pd.Series(other) + self._check_divmod_op(other, ops.rdivmod, s) + + def test_add_series_with_extension_array(self, data): + s = pd.Series(data) + result = s + data + expected = pd.Series(data + data) + self.assert_series_equal(result, expected) + + def test_error(self, data, all_arithmetic_operators): + # invalid ops + op_name = all_arithmetic_operators + with pytest.raises(AttributeError): + getattr(data, op_name) + + def test_direct_arith_with_series_returns_not_implemented(self, data): + # EAs should return NotImplemented for ops with Series. + # Pandas takes care of unboxing the series and calling the EA's op. + other = pd.Series(data) + if hasattr(data, "__add__"): + result = data.__add__(other) + assert result is NotImplemented + else: + raise pytest.skip(f"{type(data).__name__} does not implement add") + + +class BaseComparisonOpsTests(BaseOpsUtil): + """Various Series and DataFrame comparison ops methods.""" + + def _compare_other(self, s, data, op_name, other): + op = self.get_op_from_name(op_name) + if op_name == "__eq__": + assert getattr(data, op_name)(other) is NotImplemented + assert not op(s, other).all() + elif op_name == "__ne__": + assert getattr(data, op_name)(other) is NotImplemented + assert op(s, other).all() + + else: + + # array + assert getattr(data, op_name)(other) is NotImplemented + + # series + s = pd.Series(data) + with pytest.raises(TypeError): + op(s, other) + + def test_compare_scalar(self, data, all_compare_operators): + op_name = all_compare_operators + s = pd.Series(data) + self._compare_other(s, data, op_name, 0) + + def test_compare_array(self, data, all_compare_operators): + op_name = all_compare_operators + s = pd.Series(data) + other = pd.Series([data[0]] * len(data)) + self._compare_other(s, data, op_name, other) + + def test_direct_arith_with_series_returns_not_implemented(self, data): + # EAs should return NotImplemented for ops with Series. + # Pandas takes care of unboxing the series and calling the EA's op. + other = pd.Series(data) + if hasattr(data, "__eq__"): + result = data.__eq__(other) + assert result is NotImplemented + else: + raise pytest.skip(f"{type(data).__name__} does not implement __eq__") + + +class BaseUnaryOpsTests(BaseOpsUtil): + def test_invert(self, data): + s = pd.Series(data, name="name") + result = ~s + expected = pd.Series(~data, name="name") + self.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/base/printing.py b/pandas/tests/extension/base/printing.py new file mode 100644 index 00000000..ad34a83c --- /dev/null +++ b/pandas/tests/extension/base/printing.py @@ -0,0 +1,43 @@ +import io + +import pytest + +import pandas as pd + +from .base import BaseExtensionTests + + +class BasePrintingTests(BaseExtensionTests): + """Tests checking the formatting of your EA when printed.""" + + @pytest.mark.parametrize("size", ["big", "small"]) + def test_array_repr(self, data, size): + if size == "small": + data = data[:5] + else: + data = type(data)._concat_same_type([data] * 5) + + result = repr(data) + assert type(data).__name__ in result + assert f"Length: {len(data)}" in result + assert str(data.dtype) in result + if size == "big": + assert "..." in result + + def test_array_repr_unicode(self, data): + result = str(data) + assert isinstance(result, str) + + def test_series_repr(self, data): + ser = pd.Series(data) + assert data.dtype.name in repr(ser) + + def test_dataframe_repr(self, data): + df = pd.DataFrame({"A": data}) + repr(df) + + def test_dtype_name_in_info(self, data): + buf = io.StringIO() + pd.DataFrame({"A": data}).info(buf=buf) + result = buf.getvalue() + assert data.dtype.name in result diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py new file mode 100644 index 00000000..6f433d65 --- /dev/null +++ b/pandas/tests/extension/base/reduce.py @@ -0,0 +1,60 @@ +import warnings + +import pytest + +import pandas as pd +import pandas._testing as tm + +from .base import BaseExtensionTests + + +class BaseReduceTests(BaseExtensionTests): + """ + Reduction specific tests. Generally these only + make sense for numeric/boolean operations. + """ + + def check_reduce(self, s, op_name, skipna): + result = getattr(s, op_name)(skipna=skipna) + expected = getattr(s.astype("float64"), op_name)(skipna=skipna) + tm.assert_almost_equal(result, expected) + + +class BaseNoReduceTests(BaseReduceTests): + """ we don't define any reductions """ + + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + s = pd.Series(data) + + with pytest.raises(TypeError): + getattr(s, op_name)(skipna=skipna) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna): + op_name = all_boolean_reductions + s = pd.Series(data) + + with pytest.raises(TypeError): + getattr(s, op_name)(skipna=skipna) + + +class BaseNumericReduceTests(BaseReduceTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + s = pd.Series(data) + + # min/max with empty produce numpy warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + self.check_reduce(s, op_name, skipna) + + +class BaseBooleanReduceTests(BaseReduceTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series(self, data, all_boolean_reductions, skipna): + op_name = all_boolean_reductions + s = pd.Series(data) + self.check_reduce(s, op_name, skipna) diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py new file mode 100644 index 00000000..ec218988 --- /dev/null +++ b/pandas/tests/extension/base/reshaping.py @@ -0,0 +1,326 @@ +import itertools + +import numpy as np +import pytest + +import pandas as pd +from pandas.core.internals import ExtensionBlock + +from .base import BaseExtensionTests + + +class BaseReshapingTests(BaseExtensionTests): + """Tests for reshaping and concatenation.""" + + @pytest.mark.parametrize("in_frame", [True, False]) + def test_concat(self, data, in_frame): + wrapped = pd.Series(data) + if in_frame: + wrapped = pd.DataFrame(wrapped) + result = pd.concat([wrapped, wrapped], ignore_index=True) + + assert len(result) == len(data) * 2 + + if in_frame: + dtype = result.dtypes[0] + else: + dtype = result.dtype + + assert dtype == data.dtype + assert isinstance(result._data.blocks[0], ExtensionBlock) + + @pytest.mark.parametrize("in_frame", [True, False]) + def test_concat_all_na_block(self, data_missing, in_frame): + valid_block = pd.Series(data_missing.take([1, 1]), index=[0, 1]) + na_block = pd.Series(data_missing.take([0, 0]), index=[2, 3]) + if in_frame: + valid_block = pd.DataFrame({"a": valid_block}) + na_block = pd.DataFrame({"a": na_block}) + result = pd.concat([valid_block, na_block]) + if in_frame: + expected = pd.DataFrame({"a": data_missing.take([1, 1, 0, 0])}) + self.assert_frame_equal(result, expected) + else: + expected = pd.Series(data_missing.take([1, 1, 0, 0])) + self.assert_series_equal(result, expected) + + def test_concat_mixed_dtypes(self, data): + # https://github.com/pandas-dev/pandas/issues/20762 + df1 = pd.DataFrame({"A": data[:3]}) + df2 = pd.DataFrame({"A": [1, 2, 3]}) + df3 = pd.DataFrame({"A": ["a", "b", "c"]}).astype("category") + dfs = [df1, df2, df3] + + # dataframes + result = pd.concat(dfs) + expected = pd.concat([x.astype(object) for x in dfs]) + self.assert_frame_equal(result, expected) + + # series + result = pd.concat([x["A"] for x in dfs]) + expected = pd.concat([x["A"].astype(object) for x in dfs]) + self.assert_series_equal(result, expected) + + # simple test for just EA and one other + result = pd.concat([df1, df2]) + expected = pd.concat([df1.astype("object"), df2.astype("object")]) + self.assert_frame_equal(result, expected) + + result = pd.concat([df1["A"], df2["A"]]) + expected = pd.concat([df1["A"].astype("object"), df2["A"].astype("object")]) + self.assert_series_equal(result, expected) + + def test_concat_columns(self, data, na_value): + df1 = pd.DataFrame({"A": data[:3]}) + df2 = pd.DataFrame({"B": [1, 2, 3]}) + + expected = pd.DataFrame({"A": data[:3], "B": [1, 2, 3]}) + result = pd.concat([df1, df2], axis=1) + self.assert_frame_equal(result, expected) + result = pd.concat([df1["A"], df2["B"]], axis=1) + self.assert_frame_equal(result, expected) + + # non-aligned + df2 = pd.DataFrame({"B": [1, 2, 3]}, index=[1, 2, 3]) + expected = pd.DataFrame( + { + "A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype), + "B": [np.nan, 1, 2, 3], + } + ) + + result = pd.concat([df1, df2], axis=1) + self.assert_frame_equal(result, expected) + result = pd.concat([df1["A"], df2["B"]], axis=1) + self.assert_frame_equal(result, expected) + + def test_concat_extension_arrays_copy_false(self, data, na_value): + # GH 20756 + df1 = pd.DataFrame({"A": data[:3]}) + df2 = pd.DataFrame({"B": data[3:7]}) + expected = pd.DataFrame( + { + "A": data._from_sequence(list(data[:3]) + [na_value], dtype=data.dtype), + "B": data[3:7], + } + ) + result = pd.concat([df1, df2], axis=1, copy=False) + self.assert_frame_equal(result, expected) + + def test_align(self, data, na_value): + a = data[:3] + b = data[2:5] + r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3])) + + # Assumes that the ctor can take a list of scalars of the type + e1 = pd.Series(data._from_sequence(list(a) + [na_value], dtype=data.dtype)) + e2 = pd.Series(data._from_sequence([na_value] + list(b), dtype=data.dtype)) + self.assert_series_equal(r1, e1) + self.assert_series_equal(r2, e2) + + def test_align_frame(self, data, na_value): + a = data[:3] + b = data[2:5] + r1, r2 = pd.DataFrame({"A": a}).align(pd.DataFrame({"A": b}, index=[1, 2, 3])) + + # Assumes that the ctor can take a list of scalars of the type + e1 = pd.DataFrame( + {"A": data._from_sequence(list(a) + [na_value], dtype=data.dtype)} + ) + e2 = pd.DataFrame( + {"A": data._from_sequence([na_value] + list(b), dtype=data.dtype)} + ) + self.assert_frame_equal(r1, e1) + self.assert_frame_equal(r2, e2) + + def test_align_series_frame(self, data, na_value): + # https://github.com/pandas-dev/pandas/issues/20576 + ser = pd.Series(data, name="a") + df = pd.DataFrame({"col": np.arange(len(ser) + 1)}) + r1, r2 = ser.align(df) + + e1 = pd.Series( + data._from_sequence(list(data) + [na_value], dtype=data.dtype), + name=ser.name, + ) + + self.assert_series_equal(r1, e1) + self.assert_frame_equal(r2, df) + + def test_set_frame_expand_regular_with_extension(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + df["B"] = data + expected = pd.DataFrame({"A": [1] * len(data), "B": data}) + self.assert_frame_equal(df, expected) + + def test_set_frame_expand_extension_with_regular(self, data): + df = pd.DataFrame({"A": data}) + df["B"] = [1] * len(data) + expected = pd.DataFrame({"A": data, "B": [1] * len(data)}) + self.assert_frame_equal(df, expected) + + def test_set_frame_overwrite_object(self, data): + # https://github.com/pandas-dev/pandas/issues/20555 + df = pd.DataFrame({"A": [1] * len(data)}, dtype=object) + df["A"] = data + assert df.dtypes["A"] == data.dtype + + def test_merge(self, data, na_value): + # GH-20743 + df1 = pd.DataFrame({"ext": data[:3], "int1": [1, 2, 3], "key": [0, 1, 2]}) + df2 = pd.DataFrame({"int2": [1, 2, 3, 4], "key": [0, 0, 1, 3]}) + + res = pd.merge(df1, df2) + exp = pd.DataFrame( + { + "int1": [1, 1, 2], + "int2": [1, 2, 3], + "key": [0, 0, 1], + "ext": data._from_sequence( + [data[0], data[0], data[1]], dtype=data.dtype + ), + } + ) + self.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]]) + + res = pd.merge(df1, df2, how="outer") + exp = pd.DataFrame( + { + "int1": [1, 1, 2, 3, np.nan], + "int2": [1, 2, 3, np.nan, 4], + "key": [0, 0, 1, 2, 3], + "ext": data._from_sequence( + [data[0], data[0], data[1], data[2], na_value], dtype=data.dtype + ), + } + ) + self.assert_frame_equal(res, exp[["ext", "int1", "key", "int2"]]) + + def test_merge_on_extension_array(self, data): + # GH 23020 + a, b = data[:2] + key = type(data)._from_sequence([a, b], dtype=data.dtype) + + df = pd.DataFrame({"key": key, "val": [1, 2]}) + result = pd.merge(df, df, on="key") + expected = pd.DataFrame({"key": key, "val_x": [1, 2], "val_y": [1, 2]}) + self.assert_frame_equal(result, expected) + + # order + result = pd.merge(df.iloc[[1, 0]], df, on="key") + expected = expected.iloc[[1, 0]].reset_index(drop=True) + self.assert_frame_equal(result, expected) + + def test_merge_on_extension_array_duplicates(self, data): + # GH 23020 + a, b = data[:2] + key = type(data)._from_sequence([a, b, a], dtype=data.dtype) + df1 = pd.DataFrame({"key": key, "val": [1, 2, 3]}) + df2 = pd.DataFrame({"key": key, "val": [1, 2, 3]}) + + result = pd.merge(df1, df2, on="key") + expected = pd.DataFrame( + { + "key": key.take([0, 0, 0, 0, 1]), + "val_x": [1, 1, 3, 3, 2], + "val_y": [1, 3, 1, 3, 2], + } + ) + self.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "columns", + [ + ["A", "B"], + pd.MultiIndex.from_tuples( + [("A", "a"), ("A", "b")], names=["outer", "inner"] + ), + ], + ) + def test_stack(self, data, columns): + df = pd.DataFrame({"A": data[:5], "B": data[:5]}) + df.columns = columns + result = df.stack() + expected = df.astype(object).stack() + # we need a second astype(object), in case the constructor inferred + # object -> specialized, as is done for period. + expected = expected.astype(object) + + if isinstance(expected, pd.Series): + assert result.dtype == df.iloc[:, 0].dtype + else: + assert all(result.dtypes == df.iloc[:, 0].dtype) + + result = result.astype(object) + self.assert_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + # Two levels, uniform. + pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]), + # non-uniform + pd.MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "b")]), + # three levels, non-uniform + pd.MultiIndex.from_product([("A", "B"), ("a", "b", "c"), (0, 1, 2)]), + pd.MultiIndex.from_tuples( + [ + ("A", "a", 1), + ("A", "b", 0), + ("A", "a", 0), + ("B", "a", 0), + ("B", "c", 1), + ] + ), + ], + ) + @pytest.mark.parametrize("obj", ["series", "frame"]) + def test_unstack(self, data, index, obj): + data = data[: len(index)] + if obj == "series": + ser = pd.Series(data, index=index) + else: + ser = pd.DataFrame({"A": data, "B": data}, index=index) + + n = index.nlevels + levels = list(range(n)) + # [0, 1, 2] + # [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] + combinations = itertools.chain.from_iterable( + itertools.permutations(levels, i) for i in range(1, n) + ) + + for level in combinations: + result = ser.unstack(level=level) + assert all( + isinstance(result[col].array, type(data)) for col in result.columns + ) + expected = ser.astype(object).unstack(level=level) + result = result.astype(object) + + self.assert_frame_equal(result, expected) + + def test_ravel(self, data): + # as long as EA is 1D-only, ravel is a no-op + result = data.ravel() + assert type(result) == type(data) + + # Check that we have a view, not a copy + result[0] = result[1] + assert data[0] == data[1] + + def test_transpose(self, data): + df = pd.DataFrame({"A": data[:4], "B": data[:4]}, index=["a", "b", "c", "d"]) + result = df.T + expected = pd.DataFrame( + { + "a": type(data)._from_sequence([data[0]] * 2, dtype=data.dtype), + "b": type(data)._from_sequence([data[1]] * 2, dtype=data.dtype), + "c": type(data)._from_sequence([data[2]] * 2, dtype=data.dtype), + "d": type(data)._from_sequence([data[3]] * 2, dtype=data.dtype), + }, + index=["A", "B"], + ) + self.assert_frame_equal(result, expected) + self.assert_frame_equal(np.transpose(np.transpose(df)), df) + self.assert_frame_equal(np.transpose(np.transpose(df[["A"]])), df[["A"]]) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py new file mode 100644 index 00000000..b3f6a608 --- /dev/null +++ b/pandas/tests/extension/base/setitem.py @@ -0,0 +1,345 @@ +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.numpy_ import PandasDtype + +from .base import BaseExtensionTests + + +class BaseSetitemTests(BaseExtensionTests): + def test_setitem_scalar_series(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + data[0] = data[1] + assert data[0] == data[1] + + def test_setitem_sequence(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + original = data.copy() + + data[[0, 1]] = [data[1], data[0]] + assert data[0] == original[1] + assert data[1] == original[0] + + def test_setitem_sequence_mismatched_length_raises(self, data, as_array): + ser = pd.Series(data) + original = ser.copy() + value = [data[0]] + if as_array: + value = data._from_sequence(value) + + xpr = "cannot set using a {} indexer with a different length" + with pytest.raises(ValueError, match=xpr.format("list-like")): + ser[[0, 1]] = value + # Ensure no modifications made before the exception + self.assert_series_equal(ser, original) + + with pytest.raises(ValueError, match=xpr.format("slice")): + ser[slice(3)] = value + self.assert_series_equal(ser, original) + + def test_setitem_empty_indxer(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + original = data.copy() + data[np.array([], dtype=int)] = [] + self.assert_equal(data, original) + + def test_setitem_sequence_broadcasts(self, data, box_in_series): + if box_in_series: + data = pd.Series(data) + data[[0, 1]] = data[2] + assert data[0] == data[2] + assert data[1] == data[2] + + @pytest.mark.parametrize("setter", ["loc", "iloc"]) + def test_setitem_scalar(self, data, setter): + arr = pd.Series(data) + setter = getattr(arr, setter) + operator.setitem(setter, 0, data[1]) + assert arr[0] == data[1] + + def test_setitem_loc_scalar_mixed(self, data): + df = pd.DataFrame({"A": np.arange(len(data)), "B": data}) + df.loc[0, "B"] = data[1] + assert df.loc[0, "B"] == data[1] + + def test_setitem_loc_scalar_single(self, data): + df = pd.DataFrame({"B": data}) + df.loc[10, "B"] = data[1] + assert df.loc[10, "B"] == data[1] + + def test_setitem_loc_scalar_multiple_homogoneous(self, data): + df = pd.DataFrame({"A": data, "B": data}) + df.loc[10, "B"] = data[1] + assert df.loc[10, "B"] == data[1] + + def test_setitem_iloc_scalar_mixed(self, data): + df = pd.DataFrame({"A": np.arange(len(data)), "B": data}) + df.iloc[0, 1] = data[1] + assert df.loc[0, "B"] == data[1] + + def test_setitem_iloc_scalar_single(self, data): + df = pd.DataFrame({"B": data}) + df.iloc[10, 0] = data[1] + assert df.loc[10, "B"] == data[1] + + def test_setitem_iloc_scalar_multiple_homogoneous(self, data): + df = pd.DataFrame({"A": data, "B": data}) + df.iloc[10, 1] = data[1] + assert df.loc[10, "B"] == data[1] + + @pytest.mark.parametrize( + "mask", + [ + np.array([True, True, True, False, False]), + pd.array([True, True, True, False, False], dtype="boolean"), + pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"), + ], + ids=["numpy-array", "boolean-array", "boolean-array-na"], + ) + def test_setitem_mask(self, data, mask, box_in_series): + arr = data[:5].copy() + expected = arr.take([0, 0, 0, 3, 4]) + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + arr[mask] = data[0] + self.assert_equal(expected, arr) + + def test_setitem_mask_raises(self, data, box_in_series): + # wrong length + mask = np.array([True, False]) + + if box_in_series: + data = pd.Series(data) + + with pytest.raises(IndexError, match="wrong length"): + data[mask] = data[0] + + mask = pd.array(mask, dtype="boolean") + with pytest.raises(IndexError, match="wrong length"): + data[mask] = data[0] + + def test_setitem_mask_boolean_array_with_na(self, data, box_in_series): + mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean") + mask[:3] = True + mask[3:5] = pd.NA + + if box_in_series: + data = pd.Series(data) + + data[mask] = data[0] + + assert (data[:3] == data[0]).all() + + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_setitem_integer_array(self, data, idx, box_in_series): + arr = data[:5].copy() + expected = data.take([0, 0, 0, 3, 4]) + + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + + arr[idx] = arr[0] + self.assert_equal(arr, expected) + + @pytest.mark.parametrize( + "idx, box_in_series", + [ + ([0, 1, 2, pd.NA], False), + pytest.param( + [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948") + ), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + ], + ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], + ) + def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): + arr = data.copy() + + # TODO(xfail) this raises KeyError about labels not found (it tries label-based) + # for list of labels with Series + if box_in_series: + arr = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))]) + + msg = "Cannot index with an integer indexer containing NA values" + with pytest.raises(ValueError, match=msg): + arr[idx] = arr[0] + + @pytest.mark.parametrize("as_callable", [True, False]) + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_aligned(self, data, as_callable, setter): + ser = pd.Series(data) + mask = np.zeros(len(data), dtype=bool) + mask[:2] = True + + if as_callable: + mask2 = lambda x: mask + else: + mask2 = mask + + if setter: + # loc + target = getattr(ser, setter) + else: + # Series.__setitem__ + target = ser + + operator.setitem(target, mask2, data[5:7]) + + ser[mask2] = data[5:7] + assert ser[0] == data[5] + assert ser[1] == data[6] + + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_broadcast(self, data, setter): + ser = pd.Series(data) + mask = np.zeros(len(data), dtype=bool) + mask[:2] = True + + if setter: # loc + target = getattr(ser, setter) + else: # __setitem__ + target = ser + + operator.setitem(target, mask, data[10]) + assert ser[0] == data[10] + assert ser[1] == data[10] + + def test_setitem_expand_columns(self, data): + df = pd.DataFrame({"A": data}) + result = df.copy() + result["B"] = 1 + expected = pd.DataFrame({"A": data, "B": [1] * len(data)}) + self.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[:, "B"] = 1 + self.assert_frame_equal(result, expected) + + # overwrite with new type + result["B"] = data + expected = pd.DataFrame({"A": data, "B": data}) + self.assert_frame_equal(result, expected) + + def test_setitem_expand_with_extension(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + result = df.copy() + result["B"] = data + expected = pd.DataFrame({"A": [1] * len(data), "B": data}) + self.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[:, "B"] = data + self.assert_frame_equal(result, expected) + + def test_setitem_frame_invalid_length(self, data): + df = pd.DataFrame({"A": [1] * len(data)}) + xpr = "Length of values does not match length of index" + with pytest.raises(ValueError, match=xpr): + df["B"] = data[:5] + + @pytest.mark.xfail(reason="GH#20441: setitem on extension types.") + def test_setitem_tuple_index(self, data): + s = pd.Series(data[:2], index=[(0, 0), (0, 1)]) + expected = pd.Series(data.take([1, 1]), index=s.index) + s[(0, 1)] = data[1] + self.assert_series_equal(s, expected) + + def test_setitem_slice(self, data, box_in_series): + arr = data[:5].copy() + expected = data.take([0, 0, 0, 3, 4]) + if box_in_series: + arr = pd.Series(arr) + expected = pd.Series(expected) + + arr[:3] = data[0] + self.assert_equal(arr, expected) + + def test_setitem_loc_iloc_slice(self, data): + arr = data[:5].copy() + s = pd.Series(arr, index=["a", "b", "c", "d", "e"]) + expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index) + + result = s.copy() + result.iloc[:3] = data[0] + self.assert_equal(result, expected) + + result = s.copy() + result.loc[:"c"] = data[0] + self.assert_equal(result, expected) + + def test_setitem_slice_mismatch_length_raises(self, data): + arr = data[:5] + with pytest.raises(ValueError): + arr[:1] = arr[:2] + + def test_setitem_slice_array(self, data): + arr = data[:5].copy() + arr[:5] = data[-5:] + self.assert_extension_array_equal(arr, data[-5:]) + + def test_setitem_scalar_key_sequence_raise(self, data): + arr = data[:5].copy() + with pytest.raises(ValueError): + arr[0] = arr[[0, 1]] + + def test_setitem_preserves_views(self, data): + # GH#28150 setitem shouldn't swap the underlying data + view1 = data.view() + view2 = data[:] + + data[0] = data[1] + assert view1[0] == data[1] + assert view2[0] == data[1] + + def test_setitem_nullable_mask(self, data): + # GH 31446 + # TODO: there is some issue with PandasArray, therefore, + # TODO: skip the setitem test for now, and fix it later + if data.dtype != PandasDtype("object"): + arr = data[:5] + expected = data.take([0, 0, 0, 3, 4]) + mask = pd.array([True, True, True, False, False]) + arr[mask] = data[0] + self.assert_extension_array_equal(expected, arr) + + def test_setitem_dataframe_column_with_index(self, data): + # https://github.com/pandas-dev/pandas/issues/32395 + df = expected = pd.DataFrame({"data": pd.Series(data)}) + result = pd.DataFrame(index=df.index) + result.loc[df.index, "data"] = df["data"] + self.assert_frame_equal(result, expected) + + def test_setitem_dataframe_column_without_index(self, data): + # https://github.com/pandas-dev/pandas/issues/32395 + df = expected = pd.DataFrame({"data": pd.Series(data)}) + result = pd.DataFrame(index=df.index) + result.loc[:, "data"] = df["data"] + self.assert_frame_equal(result, expected) + + def test_setitem_series_with_index(self, data): + # https://github.com/pandas-dev/pandas/issues/32395 + ser = expected = pd.Series(data, name="data") + result = pd.Series(index=ser.index, dtype=np.object, name="data") + result.loc[ser.index] = ser + self.assert_series_equal(result, expected) + + def test_setitem_series_without_index(self, data): + # https://github.com/pandas-dev/pandas/issues/32395 + ser = expected = pd.Series(data, name="data") + result = pd.Series(index=ser.index, dtype=np.object, name="data") + result.loc[:] = ser + self.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py new file mode 100644 index 00000000..d37638d3 --- /dev/null +++ b/pandas/tests/extension/conftest.py @@ -0,0 +1,178 @@ +import operator + +import pytest + +from pandas import Series + + +@pytest.fixture +def dtype(): + """A fixture providing the ExtensionDtype to validate.""" + raise NotImplementedError + + +@pytest.fixture +def data(): + """Length-100 array for this type. + + * data[0] and data[1] should both be non missing + * data[0] and data[1] should not be equal + """ + raise NotImplementedError + + +@pytest.fixture +def data_for_twos(): + """Length-100 array in which all the elements are two.""" + raise NotImplementedError + + +@pytest.fixture +def data_missing(): + """Length-2 array with [NA, Valid]""" + raise NotImplementedError + + +@pytest.fixture(params=["data", "data_missing"]) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == "data": + return data + elif request.param == "data_missing": + return data_missing + + +@pytest.fixture +def data_repeated(data): + """ + Generate many datasets. + + Parameters + ---------- + data : fixture implementing `data` + + Returns + ------- + Callable[[int], Generator]: + A callable that takes a `count` argument and + returns a generator yielding `count` datasets. + """ + + def gen(count): + for _ in range(count): + yield data + + return gen + + +@pytest.fixture +def data_for_sorting(): + """Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + """ + raise NotImplementedError + + +@pytest.fixture +def data_missing_for_sorting(): + """Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + """ + raise NotImplementedError + + +@pytest.fixture +def na_cmp(): + """Binary operator for comparing NA values. + + Should return a function of two arguments that returns + True if both arguments are (scalar) NA for your type. + + By default, uses ``operator.is_`` + """ + return operator.is_ + + +@pytest.fixture +def na_value(): + """The scalar missing value for this type. Default 'None'""" + return None + + +@pytest.fixture +def data_for_grouping(): + """Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + raise NotImplementedError + + +@pytest.fixture(params=[True, False]) +def box_in_series(request): + """Whether to box the data in a Series""" + return request.param + + +@pytest.fixture( + params=[ + lambda x: 1, + lambda x: [1] * len(x), + lambda x: Series([1] * len(x)), + lambda x: x, + ], + ids=["scalar", "list", "series", "object"], +) +def groupby_apply_op(request): + """ + Functions to test groupby.apply(). + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_frame(request): + """ + Boolean fixture to support Series and Series.to_frame() comparison testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_series(request): + """ + Boolean fixture to support arr and Series(arr) comparison testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def use_numpy(request): + """ + Boolean fixture to support comparison testing of ExtensionDtype array + and numpy array. + """ + return request.param + + +@pytest.fixture(params=["ffill", "bfill"]) +def fillna_method(request): + """ + Parametrized fixture giving method parameters 'ffill' and 'bfill' for + Series.fillna(method=) testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_array(request): + """ + Boolean fixture to support ExtensionDtype _from_sequence method testing. + """ + return request.param diff --git a/pandas/tests/extension/decimal/__init__.py b/pandas/tests/extension/decimal/__init__.py new file mode 100644 index 00000000..8194327f --- /dev/null +++ b/pandas/tests/extension/decimal/__init__.py @@ -0,0 +1,3 @@ +from .array import DecimalArray, DecimalDtype, make_data, to_decimal + +__all__ = ["DecimalArray", "DecimalDtype", "to_decimal", "make_data"] diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py new file mode 100644 index 00000000..8fd4a017 --- /dev/null +++ b/pandas/tests/extension/decimal/array.py @@ -0,0 +1,206 @@ +import decimal +import numbers +import random +import sys + +import numpy as np + +from pandas.core.dtypes.base import ExtensionDtype + +import pandas as pd +from pandas.api.extensions import no_default, register_extension_dtype +from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin +from pandas.core.indexers import check_array_indexer + + +@register_extension_dtype +class DecimalDtype(ExtensionDtype): + type = decimal.Decimal + name = "decimal" + na_value = decimal.Decimal("NaN") + _metadata = ("context",) + + def __init__(self, context=None): + self.context = context or decimal.getcontext() + + def __repr__(self) -> str: + return f"DecimalDtype(context={self.context})" + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return DecimalArray + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + + @property + def _is_numeric(self): + return True + + +class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin): + __array_priority__ = 1000 + + def __init__(self, values, dtype=None, copy=False, context=None): + for val in values: + if not isinstance(val, decimal.Decimal): + raise TypeError("All values must be of type " + str(decimal.Decimal)) + values = np.asarray(values, dtype=object) + + self._data = values + # Some aliases for common attribute names to ensure pandas supports + # these + self._items = self.data = self._data + # those aliases are currently not working due to assumptions + # in internal code (GH-20735) + # self._values = self.values = self.data + self._dtype = DecimalDtype(context) + + @property + def dtype(self): + return self._dtype + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls(scalars) + + @classmethod + def _from_sequence_of_strings(cls, strings, dtype=None, copy=False): + return cls._from_sequence([decimal.Decimal(x) for x in strings], dtype, copy) + + @classmethod + def _from_factorized(cls, values, original): + return cls(values) + + _HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray) + + def to_numpy(self, dtype=None, copy=False, na_value=no_default, decimals=None): + result = np.asarray(self, dtype=dtype) + if decimals is not None: + result = np.asarray([round(x, decimals) for x in result]) + return result + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + # + if not all( + isinstance(t, self._HANDLED_TYPES + (DecimalArray,)) for t in inputs + ): + return NotImplemented + + inputs = tuple(x._data if isinstance(x, DecimalArray) else x for x in inputs) + result = getattr(ufunc, method)(*inputs, **kwargs) + + def reconstruct(x): + if isinstance(x, (decimal.Decimal, numbers.Number)): + return x + else: + return DecimalArray._from_sequence(x) + + if isinstance(result, tuple): + return tuple(reconstruct(x) for x in result) + else: + return reconstruct(result) + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self._data[item] + else: + # array, slice. + item = pd.api.indexers.check_array_indexer(self, item) + return type(self)(self._data[item]) + + def take(self, indexer, allow_fill=False, fill_value=None): + from pandas.api.extensions import take + + data = self._data + if allow_fill and fill_value is None: + fill_value = self.dtype.na_value + + result = take(data, indexer, fill_value=fill_value, allow_fill=allow_fill) + return self._from_sequence(result) + + def copy(self): + return type(self)(self._data.copy()) + + def astype(self, dtype, copy=True): + if isinstance(dtype, type(self.dtype)): + return type(self)(self._data, context=dtype.context) + return np.asarray(self, dtype=dtype) + + def __setitem__(self, key, value): + if pd.api.types.is_list_like(value): + if pd.api.types.is_scalar(key): + raise ValueError("setting an array element with a sequence.") + value = [decimal.Decimal(v) for v in value] + else: + value = decimal.Decimal(value) + + key = check_array_indexer(self, key) + self._data[key] = value + + def __len__(self) -> int: + return len(self._data) + + @property + def nbytes(self) -> int: + n = len(self) + if n: + return n * sys.getsizeof(self[0]) + return 0 + + def isna(self): + return np.array([x.is_nan() for x in self._data], dtype=bool) + + @property + def _na_value(self): + return decimal.Decimal("NaN") + + def _formatter(self, boxed=False): + if boxed: + return "Decimal: {0}".format + return repr + + @classmethod + def _concat_same_type(cls, to_concat): + return cls(np.concatenate([x._data for x in to_concat])) + + def _reduce(self, name, skipna=True, **kwargs): + + if skipna: + # If we don't have any NAs, we can ignore skipna + if self.isna().any(): + other = self[~self.isna()] + return other._reduce(name, **kwargs) + + if name == "sum" and len(self) == 0: + # GH#29630 avoid returning int 0 or np.bool_(False) on old numpy + return decimal.Decimal(0) + + try: + op = getattr(self.data, name) + except AttributeError: + raise NotImplementedError(f"decimal does not support the {name} operation") + return op(axis=0) + + +def to_decimal(values, context=None): + return DecimalArray([decimal.Decimal(x) for x in values], context=context) + + +def make_data(): + return [decimal.Decimal(random.random()) for _ in range(100)] + + +DecimalArray._add_arithmetic_ops() +DecimalArray._add_comparison_ops() diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py new file mode 100644 index 00000000..de7c98ab --- /dev/null +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -0,0 +1,515 @@ +import decimal +import math +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension import base + +from .array import DecimalArray, DecimalDtype, make_data, to_decimal + + +@pytest.fixture +def dtype(): + return DecimalDtype() + + +@pytest.fixture +def data(): + return DecimalArray(make_data()) + + +@pytest.fixture +def data_for_twos(): + return DecimalArray([decimal.Decimal(2) for _ in range(100)]) + + +@pytest.fixture +def data_missing(): + return DecimalArray([decimal.Decimal("NaN"), decimal.Decimal(1)]) + + +@pytest.fixture +def data_for_sorting(): + return DecimalArray( + [decimal.Decimal("1"), decimal.Decimal("2"), decimal.Decimal("0")] + ) + + +@pytest.fixture +def data_missing_for_sorting(): + return DecimalArray( + [decimal.Decimal("1"), decimal.Decimal("NaN"), decimal.Decimal("0")] + ) + + +@pytest.fixture +def na_cmp(): + return lambda x, y: x.is_nan() and y.is_nan() + + +@pytest.fixture +def na_value(): + return decimal.Decimal("NaN") + + +@pytest.fixture +def data_for_grouping(): + b = decimal.Decimal("1.0") + a = decimal.Decimal("0.0") + c = decimal.Decimal("2.0") + na = decimal.Decimal("NaN") + return DecimalArray([b, b, na, na, a, a, b, c]) + + +class BaseDecimal: + def assert_series_equal(self, left, right, *args, **kwargs): + def convert(x): + # need to convert array([Decimal(NaN)], dtype='object') to np.NaN + # because Series[object].isnan doesn't recognize decimal(NaN) as + # NA. + try: + return math.isnan(x) + except TypeError: + return False + + if left.dtype == "object": + left_na = left.apply(convert) + else: + left_na = left.isna() + if right.dtype == "object": + right_na = right.apply(convert) + else: + right_na = right.isna() + + tm.assert_series_equal(left_na, right_na) + return tm.assert_series_equal(left[~left_na], right[~right_na], *args, **kwargs) + + def assert_frame_equal(self, left, right, *args, **kwargs): + # TODO(EA): select_dtypes + tm.assert_index_equal( + left.columns, + right.columns, + exact=kwargs.get("check_column_type", "equiv"), + check_names=kwargs.get("check_names", True), + check_exact=kwargs.get("check_exact", False), + check_categorical=kwargs.get("check_categorical", True), + obj="{obj}.columns".format(obj=kwargs.get("obj", "DataFrame")), + ) + + decimals = (left.dtypes == "decimal").index + + for col in decimals: + self.assert_series_equal(left[col], right[col], *args, **kwargs) + + left = left.drop(columns=decimals) + right = right.drop(columns=decimals) + tm.assert_frame_equal(left, right, *args, **kwargs) + + +class TestDtype(BaseDecimal, base.BaseDtypeTests): + def test_hashable(self, dtype): + pass + + +class TestInterface(BaseDecimal, base.BaseInterfaceTests): + pass + + +class TestConstructors(BaseDecimal, base.BaseConstructorsTests): + @pytest.mark.skip(reason="not implemented constructor from dtype") + def test_from_dtype(self, data): + # construct from our dtype & string dtype + pass + + +class TestReshaping(BaseDecimal, base.BaseReshapingTests): + pass + + +class TestGetitem(BaseDecimal, base.BaseGetitemTests): + def test_take_na_value_other_decimal(self): + arr = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("2.0")]) + result = arr.take([0, -1], allow_fill=True, fill_value=decimal.Decimal("-1.0")) + expected = DecimalArray([decimal.Decimal("1.0"), decimal.Decimal("-1.0")]) + self.assert_extension_array_equal(result, expected) + + +class TestMissing(BaseDecimal, base.BaseMissingTests): + pass + + +class Reduce: + def check_reduce(self, s, op_name, skipna): + + if op_name in ["median", "skew", "kurt"]: + with pytest.raises(NotImplementedError): + getattr(s, op_name)(skipna=skipna) + + else: + result = getattr(s, op_name)(skipna=skipna) + expected = getattr(np.asarray(s), op_name)() + tm.assert_almost_equal(result, expected) + + +class TestNumericReduce(Reduce, base.BaseNumericReduceTests): + pass + + +class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests): + pass + + +class TestMethods(BaseDecimal, base.BaseMethodsTests): + @pytest.mark.parametrize("dropna", [True, False]) + @pytest.mark.xfail(reason="value_counts not implemented yet.") + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + + tm.assert_series_equal(result, expected) + + +class TestCasting(BaseDecimal, base.BaseCastingTests): + pass + + +class TestGroupby(BaseDecimal, base.BaseGroupbyTests): + @pytest.mark.xfail( + reason="needs to correctly define __eq__ to handle nans, xref #27081." + ) + def test_groupby_apply_identity(self, data_for_grouping): + super().test_groupby_apply_identity(data_for_grouping) + + +class TestSetitem(BaseDecimal, base.BaseSetitemTests): + pass + + +class TestPrinting(BaseDecimal, base.BasePrintingTests): + def test_series_repr(self, data): + # Overriding this base test to explicitly test that + # the custom _formatter is used + ser = pd.Series(data) + assert data.dtype.name in repr(ser) + assert "Decimal: " in repr(ser) + + +# TODO(extension) +@pytest.mark.xfail( + reason=( + "raising AssertionError as this is not implemented, though easy enough to do" + ) +) +def test_series_constructor_coerce_data_to_extension_dtype_raises(): + xpr = ( + "Cannot cast data to extension dtype 'decimal'. Pass the " + "extension array directly." + ) + with pytest.raises(ValueError, match=xpr): + pd.Series([0, 1, 2], dtype=DecimalDtype()) + + +def test_series_constructor_with_dtype(): + arr = DecimalArray([decimal.Decimal("10.0")]) + result = pd.Series(arr, dtype=DecimalDtype()) + expected = pd.Series(arr) + tm.assert_series_equal(result, expected) + + result = pd.Series(arr, dtype="int64") + expected = pd.Series([10]) + tm.assert_series_equal(result, expected) + + +def test_dataframe_constructor_with_dtype(): + arr = DecimalArray([decimal.Decimal("10.0")]) + + result = pd.DataFrame({"A": arr}, dtype=DecimalDtype()) + expected = pd.DataFrame({"A": arr}) + tm.assert_frame_equal(result, expected) + + arr = DecimalArray([decimal.Decimal("10.0")]) + result = pd.DataFrame({"A": arr}, dtype="int64") + expected = pd.DataFrame({"A": [10]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("frame", [True, False]) +def test_astype_dispatches(frame): + # This is a dtype-specific test that ensures Series[decimal].astype + # gets all the way through to ExtensionArray.astype + # Designing a reliable smoke test that works for arbitrary data types + # is difficult. + data = pd.Series(DecimalArray([decimal.Decimal(2)]), name="a") + ctx = decimal.Context() + ctx.prec = 5 + + if frame: + data = data.to_frame() + + result = data.astype(DecimalDtype(ctx)) + + if frame: + result = result["a"] + + assert result.dtype.context.prec == ctx.prec + + +class TestArithmeticOps(BaseDecimal, base.BaseArithmeticOpsTests): + def check_opname(self, s, op_name, other, exc=None): + super().check_opname(s, op_name, other, exc=None) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + op_name = all_arithmetic_operators + s = pd.Series(data) + + context = decimal.getcontext() + divbyzerotrap = context.traps[decimal.DivisionByZero] + invalidoptrap = context.traps[decimal.InvalidOperation] + context.traps[decimal.DivisionByZero] = 0 + context.traps[decimal.InvalidOperation] = 0 + + # Decimal supports ops with int, but not float + other = pd.Series([int(d * 100) for d in data]) + self.check_opname(s, op_name, other) + + if "mod" not in op_name: + self.check_opname(s, op_name, s * 2) + + self.check_opname(s, op_name, 0) + self.check_opname(s, op_name, 5) + context.traps[decimal.DivisionByZero] = divbyzerotrap + context.traps[decimal.InvalidOperation] = invalidoptrap + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + # We implement divmod + super()._check_divmod_op(s, op, other, exc=None) + + def test_error(self): + pass + + +class TestComparisonOps(BaseDecimal, base.BaseComparisonOpsTests): + def check_opname(self, s, op_name, other, exc=None): + super().check_opname(s, op_name, other, exc=None) + + def _compare_other(self, s, data, op_name, other): + self.check_opname(s, op_name, other) + + def test_compare_scalar(self, data, all_compare_operators): + op_name = all_compare_operators + s = pd.Series(data) + self._compare_other(s, data, op_name, 0.5) + + def test_compare_array(self, data, all_compare_operators): + op_name = all_compare_operators + s = pd.Series(data) + + alter = np.random.choice([-1, 0, 1], len(data)) + # Randomly double, halve or keep same value + other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter] + self._compare_other(s, data, op_name, other) + + +class DecimalArrayWithoutFromSequence(DecimalArray): + """Helper class for testing error handling in _from_sequence.""" + + def _from_sequence(cls, scalars, dtype=None, copy=False): + raise KeyError("For the test") + + +class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence): + @classmethod + def _create_arithmetic_method(cls, op): + return cls._create_method(op, coerce_to_dtype=False) + + +DecimalArrayWithoutCoercion._add_arithmetic_ops() + + +def test_combine_from_sequence_raises(): + # https://github.com/pandas-dev/pandas/issues/22850 + ser = pd.Series( + DecimalArrayWithoutFromSequence( + [decimal.Decimal("1.0"), decimal.Decimal("2.0")] + ) + ) + result = ser.combine(ser, operator.add) + + # note: object dtype + expected = pd.Series( + [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object" + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "class_", [DecimalArrayWithoutFromSequence, DecimalArrayWithoutCoercion] +) +def test_scalar_ops_from_sequence_raises(class_): + # op(EA, EA) should return an EA, or an ndarray if it's not possible + # to return an EA with the return values. + arr = class_([decimal.Decimal("1.0"), decimal.Decimal("2.0")]) + result = arr + arr + expected = np.array( + [decimal.Decimal("2.0"), decimal.Decimal("4.0")], dtype="object" + ) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "reverse, expected_div, expected_mod", + [(False, [0, 1, 1, 2], [1, 0, 1, 0]), (True, [2, 1, 0, 0], [0, 0, 2, 2])], +) +def test_divmod_array(reverse, expected_div, expected_mod): + # https://github.com/pandas-dev/pandas/issues/22930 + arr = to_decimal([1, 2, 3, 4]) + if reverse: + div, mod = divmod(2, arr) + else: + div, mod = divmod(arr, 2) + expected_div = to_decimal(expected_div) + expected_mod = to_decimal(expected_mod) + + tm.assert_extension_array_equal(div, expected_div) + tm.assert_extension_array_equal(mod, expected_mod) + + +def test_ufunc_fallback(data): + a = data[:5] + s = pd.Series(a, index=range(3, 8)) + result = np.abs(s) + expected = pd.Series(np.abs(a), index=range(3, 8)) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc(): + a = to_decimal([1, 2, 3]) + result = np.exp(a) + expected = to_decimal(np.exp(a._data)) + tm.assert_extension_array_equal(result, expected) + + +def test_array_ufunc_series(): + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + result = np.exp(s) + expected = pd.Series(to_decimal(np.exp(a._data))) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc_series_scalar_other(): + # check _HANDLED_TYPES + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + result = np.add(s, decimal.Decimal(1)) + expected = pd.Series(np.add(a, decimal.Decimal(1))) + tm.assert_series_equal(result, expected) + + +def test_array_ufunc_series_defer(): + a = to_decimal([1, 2, 3]) + s = pd.Series(a) + + expected = pd.Series(to_decimal([2, 4, 6])) + r1 = np.add(s, a) + r2 = np.add(a, s) + + tm.assert_series_equal(r1, expected) + tm.assert_series_equal(r2, expected) + + +def test_groupby_agg(): + # Ensure that the result of agg is inferred to be decimal dtype + # https://github.com/pandas-dev/pandas/issues/29141 + + data = make_data()[:5] + df = pd.DataFrame( + {"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)} + ) + + # single key, selected column + expected = pd.Series(to_decimal([data[0], data[3]])) + result = df.groupby("id1")["decimals"].agg(lambda x: x.iloc[0]) + tm.assert_series_equal(result, expected, check_names=False) + result = df["decimals"].groupby(df["id1"]).agg(lambda x: x.iloc[0]) + tm.assert_series_equal(result, expected, check_names=False) + + # multiple keys, selected column + expected = pd.Series( + to_decimal([data[0], data[1], data[3]]), + index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 1)]), + ) + result = df.groupby(["id1", "id2"])["decimals"].agg(lambda x: x.iloc[0]) + tm.assert_series_equal(result, expected, check_names=False) + result = df["decimals"].groupby([df["id1"], df["id2"]]).agg(lambda x: x.iloc[0]) + tm.assert_series_equal(result, expected, check_names=False) + + # multiple columns + expected = pd.DataFrame({"id2": [0, 1], "decimals": to_decimal([data[0], data[3]])}) + result = df.groupby("id1").agg(lambda x: x.iloc[0]) + tm.assert_frame_equal(result, expected, check_names=False) + + +def test_groupby_agg_ea_method(monkeypatch): + # Ensure that the result of agg is inferred to be decimal dtype + # https://github.com/pandas-dev/pandas/issues/29141 + + def DecimalArray__my_sum(self): + return np.sum(np.array(self)) + + monkeypatch.setattr(DecimalArray, "my_sum", DecimalArray__my_sum, raising=False) + + data = make_data()[:5] + df = pd.DataFrame({"id": [0, 0, 0, 1, 1], "decimals": DecimalArray(data)}) + expected = pd.Series(to_decimal([data[0] + data[1] + data[2], data[3] + data[4]])) + + result = df.groupby("id")["decimals"].agg(lambda x: x.values.my_sum()) + tm.assert_series_equal(result, expected, check_names=False) + s = pd.Series(DecimalArray(data)) + result = s.groupby(np.array([0, 0, 0, 1, 1])).agg(lambda x: x.values.my_sum()) + tm.assert_series_equal(result, expected, check_names=False) + + +def test_indexing_no_materialize(monkeypatch): + # See https://github.com/pandas-dev/pandas/issues/29708 + # Ensure that indexing operations do not materialize (convert to a numpy + # array) the ExtensionArray unnecessary + + def DecimalArray__array__(self, dtype=None): + raise Exception("tried to convert a DecimalArray to a numpy array") + + monkeypatch.setattr(DecimalArray, "__array__", DecimalArray__array__, raising=False) + + data = make_data() + s = pd.Series(DecimalArray(data)) + df = pd.DataFrame({"a": s, "b": range(len(s))}) + + # ensure the following operations do not raise an error + s[s > 0.5] + df[s > 0.5] + s.at[0] + df.at[0, "a"] + + +def test_to_numpy_keyword(): + # test the extra keyword + values = [decimal.Decimal("1.1111"), decimal.Decimal("2.2222")] + expected = np.array( + [decimal.Decimal("1.11"), decimal.Decimal("2.22")], dtype="object" + ) + a = pd.array(values, dtype="decimal") + result = a.to_numpy(decimals=2) + tm.assert_numpy_array_equal(result, expected) + + result = pd.Series(a).to_numpy(decimals=2) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/extension/json/__init__.py b/pandas/tests/extension/json/__init__.py new file mode 100644 index 00000000..e205c7ee --- /dev/null +++ b/pandas/tests/extension/json/__init__.py @@ -0,0 +1,3 @@ +from .array import JSONArray, JSONDtype, make_data + +__all__ = ["JSONArray", "JSONDtype", "make_data"] diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py new file mode 100644 index 00000000..ded513bf --- /dev/null +++ b/pandas/tests/extension/json/array.py @@ -0,0 +1,207 @@ +"""Test extension array for storing nested data in a pandas container. + +The JSONArray stores lists of dictionaries. The storage mechanism is a list, +not an ndarray. + +Note: + +We currently store lists of UserDicts. Pandas has a few places +internally that specifically check for dicts, and does non-scalar things +in that case. We *want* the dictionaries to be treated as scalars, so we +hack around pandas by using UserDicts. +""" +from collections import UserDict, abc +import itertools +import numbers +import random +import string +import sys + +import numpy as np + +import pandas as pd +from pandas.api.extensions import ExtensionArray, ExtensionDtype + + +class JSONDtype(ExtensionDtype): + type = abc.Mapping + name = "json" + na_value = UserDict() + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return JSONArray + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") + + +class JSONArray(ExtensionArray): + dtype = JSONDtype() + __array_priority__ = 1000 + + def __init__(self, values, dtype=None, copy=False): + for val in values: + if not isinstance(val, self.dtype.type): + raise TypeError("All values must be of type " + str(self.dtype.type)) + self.data = values + + # Some aliases for common attribute names to ensure pandas supports + # these + self._items = self._data = self.data + # those aliases are currently not working due to assumptions + # in internal code (GH-20735) + # self._values = self.values = self.data + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls(scalars) + + @classmethod + def _from_factorized(cls, values, original): + return cls([UserDict(x) for x in values if x != ()]) + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self.data[item] + elif isinstance(item, slice) and item == slice(None): + # Make sure we get a view + return type(self)(self.data) + elif isinstance(item, slice): + # slice + return type(self)(self.data[item]) + else: + item = pd.api.indexers.check_array_indexer(self, item) + if pd.api.types.is_bool_dtype(item.dtype): + return self._from_sequence([x for x, m in zip(self, item) if m]) + # integer + return type(self)([self.data[i] for i in item]) + + def __setitem__(self, key, value): + if isinstance(key, numbers.Integral): + self.data[key] = value + else: + if not isinstance(value, (type(self), abc.Sequence)): + # broadcast value + value = itertools.cycle([value]) + + if isinstance(key, np.ndarray) and key.dtype == "bool": + # masking + for i, (k, v) in enumerate(zip(key, value)): + if k: + assert isinstance(v, self.dtype.type) + self.data[i] = v + else: + for k, v in zip(key, value): + assert isinstance(v, self.dtype.type) + self.data[k] = v + + def __len__(self) -> int: + return len(self.data) + + def __array__(self, dtype=None): + if dtype is None: + dtype = object + return np.asarray(self.data, dtype=dtype) + + @property + def nbytes(self) -> int: + return sys.getsizeof(self.data) + + def isna(self): + return np.array([x == self.dtype.na_value for x in self.data], dtype=bool) + + def take(self, indexer, allow_fill=False, fill_value=None): + # re-implement here, since NumPy has trouble setting + # sized objects like UserDicts into scalar slots of + # an ndarary. + indexer = np.asarray(indexer) + msg = ( + "Index is out of bounds or cannot do a " + "non-empty take from an empty array." + ) + + if allow_fill: + if fill_value is None: + fill_value = self.dtype.na_value + # bounds check + if (indexer < -1).any(): + raise ValueError + try: + output = [ + self.data[loc] if loc != -1 else fill_value for loc in indexer + ] + except IndexError: + raise IndexError(msg) + else: + try: + output = [self.data[loc] for loc in indexer] + except IndexError: + raise IndexError(msg) + + return self._from_sequence(output) + + def copy(self): + return type(self)(self.data[:]) + + def astype(self, dtype, copy=True): + # NumPy has issues when all the dicts are the same length. + # np.array([UserDict(...), UserDict(...)]) fails, + # but np.array([{...}, {...}]) works, so cast. + + # needed to add this check for the Series constructor + if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: + if copy: + return self.copy() + return self + return np.array([dict(x) for x in self], dtype=dtype, copy=copy) + + def unique(self): + # Parent method doesn't work since np.array will try to infer + # a 2-dim object. + return type(self)( + [dict(x) for x in list({tuple(d.items()) for d in self.data})] + ) + + @classmethod + def _concat_same_type(cls, to_concat): + data = list(itertools.chain.from_iterable([x.data for x in to_concat])) + return cls(data) + + def _values_for_factorize(self): + frozen = self._values_for_argsort() + if len(frozen) == 0: + # _factorize_array expects 1-d array, this is a len-0 2-d array. + frozen = frozen.ravel() + return frozen, () + + def _values_for_argsort(self): + # Disable NumPy's shape inference by including an empty tuple... + # If all the elements of self are the same size P, NumPy will + # cast them to an (N, P) array, instead of an (N,) array of tuples. + frozen = [()] + [tuple(x.items()) for x in self] + return np.array(frozen, dtype=object)[1:] + + +def make_data(): + # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer + return [ + UserDict( + [ + (random.choice(string.ascii_letters), random.randint(0, 100)) + for _ in range(random.randint(0, 10)) + ] + ) + for _ in range(100) + ] diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py new file mode 100644 index 00000000..dc03a1f1 --- /dev/null +++ b/pandas/tests/extension/json/test_json.py @@ -0,0 +1,303 @@ +import collections +import operator + +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.extension import base + +from .array import JSONArray, JSONDtype, make_data + + +@pytest.fixture +def dtype(): + return JSONDtype() + + +@pytest.fixture +def data(): + """Length-100 PeriodArray for semantics test.""" + data = make_data() + + # Why the while loop? NumPy is unable to construct an ndarray from + # equal-length ndarrays. Many of our operations involve coercing the + # EA to an ndarray of objects. To avoid random test failures, we ensure + # that our data is coercible to an ndarray. Several tests deal with only + # the first two elements, so that's what we'll check. + + while len(data[0]) == len(data[1]): + data = make_data() + + return JSONArray(data) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return JSONArray([{}, {"a": 10}]) + + +@pytest.fixture +def data_for_sorting(): + return JSONArray([{"b": 1}, {"c": 4}, {"a": 2, "c": 3}]) + + +@pytest.fixture +def data_missing_for_sorting(): + return JSONArray([{"b": 1}, {}, {"a": 4}]) + + +@pytest.fixture +def na_value(dtype): + return dtype.na_value + + +@pytest.fixture +def na_cmp(): + return operator.eq + + +@pytest.fixture +def data_for_grouping(): + return JSONArray( + [ + {"b": 1}, + {"b": 1}, + {}, + {}, + {"a": 0, "c": 2}, + {"a": 0, "c": 2}, + {"b": 1}, + {"c": 2}, + ] + ) + + +class BaseJSON: + # NumPy doesn't handle an array of equal-length UserDicts. + # The default assert_series_equal eventually does a + # Series.values, which raises. We work around it by + # converting the UserDicts to dicts. + def assert_series_equal(self, left, right, **kwargs): + if left.dtype.name == "json": + assert left.dtype == right.dtype + left = pd.Series( + JSONArray(left.values.astype(object)), index=left.index, name=left.name + ) + right = pd.Series( + JSONArray(right.values.astype(object)), + index=right.index, + name=right.name, + ) + tm.assert_series_equal(left, right, **kwargs) + + def assert_frame_equal(self, left, right, *args, **kwargs): + obj_type = kwargs.get("obj", "DataFrame") + tm.assert_index_equal( + left.columns, + right.columns, + exact=kwargs.get("check_column_type", "equiv"), + check_names=kwargs.get("check_names", True), + check_exact=kwargs.get("check_exact", False), + check_categorical=kwargs.get("check_categorical", True), + obj=f"{obj_type}.columns", + ) + + jsons = (left.dtypes == "json").index + + for col in jsons: + self.assert_series_equal(left[col], right[col], *args, **kwargs) + + left = left.drop(columns=jsons) + right = right.drop(columns=jsons) + tm.assert_frame_equal(left, right, *args, **kwargs) + + +class TestDtype(BaseJSON, base.BaseDtypeTests): + pass + + +class TestInterface(BaseJSON, base.BaseInterfaceTests): + def test_custom_asserts(self): + # This would always trigger the KeyError from trying to put + # an array of equal-length UserDicts inside an ndarray. + data = JSONArray( + [ + collections.UserDict({"a": 1}), + collections.UserDict({"b": 2}), + collections.UserDict({"c": 3}), + ] + ) + a = pd.Series(data) + self.assert_series_equal(a, a) + self.assert_frame_equal(a.to_frame(), a.to_frame()) + + b = pd.Series(data.take([0, 0, 1])) + with pytest.raises(AssertionError): + self.assert_series_equal(a, b) + + with pytest.raises(AssertionError): + self.assert_frame_equal(a.to_frame(), b.to_frame()) + + +class TestConstructors(BaseJSON, base.BaseConstructorsTests): + @pytest.mark.skip(reason="not implemented constructor from dtype") + def test_from_dtype(self, data): + # construct from our dtype & string dtype + pass + + +class TestReshaping(BaseJSON, base.BaseReshapingTests): + @pytest.mark.skip(reason="Different definitions of NA") + def test_stack(self): + """ + The test does .astype(object).stack(). If we happen to have + any missing values in `data`, then we'll end up with different + rows since we consider `{}` NA, but `.astype(object)` doesn't. + """ + + @pytest.mark.xfail(reason="dict for NA") + def test_unstack(self, data, index): + # The base test has NaN for the expected NA value. + # this matches otherwise + return super().test_unstack(data, index) + + +class TestGetitem(BaseJSON, base.BaseGetitemTests): + pass + + +class TestMissing(BaseJSON, base.BaseMissingTests): + @pytest.mark.skip(reason="Setting a dict as a scalar") + def test_fillna_series(self): + """We treat dictionaries as a mapping in fillna, not a scalar.""" + + @pytest.mark.skip(reason="Setting a dict as a scalar") + def test_fillna_frame(self): + """We treat dictionaries as a mapping in fillna, not a scalar.""" + + +unhashable = pytest.mark.skip(reason="Unhashable") + + +class TestReduce(base.BaseNoReduceTests): + pass + + +class TestMethods(BaseJSON, base.BaseMethodsTests): + @unhashable + def test_value_counts(self, all_data, dropna): + pass + + @unhashable + def test_sort_values_frame(self): + # TODO (EA.factorize): see if _values_for_factorize allows this. + pass + + def test_argsort(self, data_for_sorting): + super().test_argsort(data_for_sorting) + + def test_argsort_missing(self, data_missing_for_sorting): + super().test_argsort_missing(data_missing_for_sorting) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values(self, data_for_sorting, ascending): + super().test_sort_values(data_for_sorting, ascending) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_missing(self, data_missing_for_sorting, ascending): + super().test_sort_values_missing(data_missing_for_sorting, ascending) + + @pytest.mark.skip(reason="combine for JSONArray not supported") + def test_combine_le(self, data_repeated): + pass + + @pytest.mark.skip(reason="combine for JSONArray not supported") + def test_combine_add(self, data_repeated): + pass + + @pytest.mark.skip(reason="combine for JSONArray not supported") + def test_combine_first(self, data): + pass + + @unhashable + def test_hash_pandas_object_works(self, data, kind): + super().test_hash_pandas_object_works(data, kind) + + @pytest.mark.skip(reason="broadcasting error") + def test_where_series(self, data, na_value): + # Fails with + # *** ValueError: operands could not be broadcast together + # with shapes (4,) (4,) (0,) + super().test_where_series(data, na_value) + + @pytest.mark.skip(reason="Can't compare dicts.") + def test_searchsorted(self, data_for_sorting): + super().test_searchsorted(data_for_sorting) + + +class TestCasting(BaseJSON, base.BaseCastingTests): + @pytest.mark.skip(reason="failing on np.array(self, dtype=str)") + def test_astype_str(self): + """This currently fails in NumPy on np.array(self, dtype=str) with + + *** ValueError: setting an array element with a sequence + """ + + +# We intentionally don't run base.BaseSetitemTests because pandas' +# internals has trouble setting sequences of values into scalar positions. + + +class TestGroupby(BaseJSON, base.BaseGroupbyTests): + @unhashable + def test_groupby_extension_transform(self): + """ + This currently fails in Series.name.setter, since the + name must be hashable, but the value is a dictionary. + I think this is what we want, i.e. `.name` should be the original + values, and not the values for factorization. + """ + + @unhashable + def test_groupby_extension_apply(self): + """ + This fails in Index._do_unique_check with + + > hash(val) + E TypeError: unhashable type: 'UserDict' with + + I suspect that once we support Index[ExtensionArray], + we'll be able to dispatch unique. + """ + + @pytest.mark.parametrize("as_index", [True, False]) + def test_groupby_extension_agg(self, as_index, data_for_grouping): + super().test_groupby_extension_agg(as_index, data_for_grouping) + + +class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests): + def test_error(self, data, all_arithmetic_operators): + pass + + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + with pytest.raises(TypeError, match="unsupported"): + ser + data + + def test_divmod_series_array(self): + # GH 23287 + # skipping because it is not implemented + pass + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + return super()._check_divmod_op(s, op, other, exc=TypeError) + + +class TestComparisonOps(BaseJSON, base.BaseComparisonOpsTests): + pass + + +class TestPrinting(BaseJSON, base.BasePrintingTests): + pass diff --git a/pandas/tests/extension/list/__init__.py b/pandas/tests/extension/list/__init__.py new file mode 100644 index 00000000..108f1937 --- /dev/null +++ b/pandas/tests/extension/list/__init__.py @@ -0,0 +1,3 @@ +from .array import ListArray, ListDtype, make_data + +__all__ = ["ListArray", "ListDtype", "make_data"] diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py new file mode 100644 index 00000000..6dd00ad3 --- /dev/null +++ b/pandas/tests/extension/list/array.py @@ -0,0 +1,133 @@ +""" +Test extension array for storing nested data in a pandas container. + +The ListArray stores an ndarray of lists. +""" +import numbers +import random +import string + +import numpy as np + +from pandas.core.dtypes.base import ExtensionDtype + +import pandas as pd +from pandas.core.arrays import ExtensionArray + + +class ListDtype(ExtensionDtype): + type = list + name = "list" + na_value = np.nan + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype. + + Returns + ------- + type + """ + return ListArray + + @classmethod + def construct_from_string(cls, string): + if string == cls.name: + return cls() + else: + raise TypeError(f"Cannot construct a '{cls}' from '{string}'") + + +class ListArray(ExtensionArray): + dtype = ListDtype() + __array_priority__ = 1000 + + def __init__(self, values, dtype=None, copy=False): + if not isinstance(values, np.ndarray): + raise TypeError("Need to pass a numpy array as values") + for val in values: + if not isinstance(val, self.dtype.type) and not pd.isna(val): + raise TypeError("All values must be of type " + str(self.dtype.type)) + self.data = values + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + data = np.empty(len(scalars), dtype=object) + data[:] = scalars + return cls(data) + + def __getitem__(self, item): + if isinstance(item, numbers.Integral): + return self.data[item] + else: + # slice, list-like, mask + return type(self)(self.data[item]) + + def __len__(self) -> int: + return len(self.data) + + def isna(self): + return np.array( + [not isinstance(x, list) and np.isnan(x) for x in self.data], dtype=bool + ) + + def take(self, indexer, allow_fill=False, fill_value=None): + # re-implement here, since NumPy has trouble setting + # sized objects like UserDicts into scalar slots of + # an ndarary. + indexer = np.asarray(indexer) + msg = ( + "Index is out of bounds or cannot do a " + "non-empty take from an empty array." + ) + + if allow_fill: + if fill_value is None: + fill_value = self.dtype.na_value + # bounds check + if (indexer < -1).any(): + raise ValueError + try: + output = [ + self.data[loc] if loc != -1 else fill_value for loc in indexer + ] + except IndexError: + raise IndexError(msg) + else: + try: + output = [self.data[loc] for loc in indexer] + except IndexError: + raise IndexError(msg) + + return self._from_sequence(output) + + def copy(self): + return type(self)(self.data[:]) + + def astype(self, dtype, copy=True): + if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: + if copy: + return self.copy() + return self + elif pd.api.types.is_string_dtype(dtype) and not pd.api.types.is_object_dtype( + dtype + ): + # numpy has problems with astype(str) for nested elements + return np.array([str(x) for x in self.data], dtype=dtype) + return np.array(self.data, dtype=dtype, copy=copy) + + @classmethod + def _concat_same_type(cls, to_concat): + data = np.concatenate([x.data for x in to_concat]) + return cls(data) + + +def make_data(): + # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer + data = np.empty(100, dtype=object) + data[:] = [ + [random.choice(string.ascii_letters) for _ in range(random.randint(0, 10))] + for _ in range(100) + ] + return data diff --git a/pandas/tests/extension/list/test_list.py b/pandas/tests/extension/list/test_list.py new file mode 100644 index 00000000..c5c44171 --- /dev/null +++ b/pandas/tests/extension/list/test_list.py @@ -0,0 +1,30 @@ +import pytest + +import pandas as pd + +from .array import ListArray, ListDtype, make_data + + +@pytest.fixture +def dtype(): + return ListDtype() + + +@pytest.fixture +def data(): + """Length-100 ListArray for semantics test.""" + data = make_data() + + while len(data[0]) == len(data[1]): + data = make_data() + + return ListArray(data) + + +def test_to_csv(data): + # https://github.com/pandas-dev/pandas/issues/28840 + # array with list-likes fail when doing astype(str) on the numpy array + # which was done in to_native_types + df = pd.DataFrame({"a": data}) + res = df.to_csv() + assert str(data[0]) in res diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py new file mode 100644 index 00000000..0c6b187e --- /dev/null +++ b/pandas/tests/extension/test_boolean.py @@ -0,0 +1,351 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +from pandas.compat.numpy import _np_version_under1p14 + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.boolean import BooleanDtype +from pandas.tests.extension import base + + +def make_data(): + return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False] + + +@pytest.fixture +def dtype(): + return BooleanDtype() + + +@pytest.fixture +def data(dtype): + return pd.array(make_data(), dtype=dtype) + + +@pytest.fixture +def data_for_twos(dtype): + return pd.array(np.ones(100), dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return pd.array([np.nan, True], dtype=dtype) + + +@pytest.fixture +def data_for_sorting(dtype): + return pd.array([True, True, False], dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return pd.array([True, np.nan, False], dtype=dtype) + + +@pytest.fixture +def na_cmp(): + # we are pd.NA + return lambda x, y: x is pd.NA and y is pd.NA + + +@pytest.fixture +def na_value(): + return pd.NA + + +@pytest.fixture +def data_for_grouping(dtype): + b = True + a = False + na = np.nan + return pd.array([b, b, na, na, a, a, b], dtype=dtype) + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super().check_opname(s, op_name, other, exc=None) + + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + if op_name in ("__sub__", "__rsub__"): + # subtraction for bools raises TypeError (but not yet in 1.13) + if _np_version_under1p14: + pytest.skip("__sub__ does not yet raise in numpy 1.13") + with pytest.raises(TypeError): + op(s, other) + + return + + result = op(s, other) + expected = s.combine(other, op) + + if op_name in ( + "__floordiv__", + "__rfloordiv__", + "__pow__", + "__rpow__", + "__mod__", + "__rmod__", + ): + # combine keeps boolean type + expected = expected.astype("Int8") + elif op_name in ("__truediv__", "__rtruediv__"): + # combine with bools does not generate the correct result + # (numpy behaviour for div is to regard the bools as numeric) + expected = s.astype(float).combine(other, op) + if op_name == "__rpow__": + # for rpow, combine does not propagate NaN + expected[result.isna()] = np.nan + self.assert_series_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def _check_divmod_op(self, s, op, other, exc=None): + # override to not raise an error + super()._check_divmod_op(s, op, other, None) + + @pytest.mark.skip(reason="BooleanArray does not error on ops") + def test_error(self, data, all_arithmetic_operators): + # other specific errors tested in the boolean array specific tests + pass + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super().check_opname(s, op_name, other, exc=None) + + def _compare_other(self, s, data, op_name, other): + self.check_opname(s, op_name, other) + + @pytest.mark.skip(reason="Tested in tests/arrays/test_boolean.py") + def test_compare_scalar(self, data, all_compare_operators): + pass + + @pytest.mark.skip(reason="Tested in tests/arrays/test_boolean.py") + def test_compare_array(self, data, all_compare_operators): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.parametrize("na_sentinel", [-1, -2]) + def test_factorize(self, data_for_grouping, na_sentinel): + # override because we only have 2 unique values + labels, uniques = pd.factorize(data_for_grouping, na_sentinel=na_sentinel) + expected_labels = np.array( + [0, 0, na_sentinel, na_sentinel, 1, 1, 0], dtype=np.intp + ) + expected_uniques = data_for_grouping.take([0, 4]) + + tm.assert_numpy_array_equal(labels, expected_labels) + self.assert_extension_array_equal(uniques, expected_uniques) + + def test_combine_le(self, data_repeated): + # override because expected needs to be boolean instead of bool dtype + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series( + [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], + dtype="boolean", + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 <= x2) + expected = pd.Series([a <= val for a in list(orig_data1)], dtype="boolean") + self.assert_series_equal(result, expected) + + def test_searchsorted(self, data_for_sorting, as_series): + # override because we only have 2 unique values + data_for_sorting = pd.array([True, False], dtype="boolean") + b, a = data_for_sorting + arr = type(data_for_sorting)._from_sequence([a, b]) + + if as_series: + arr = pd.Series(arr) + assert arr.searchsorted(a) == 0 + assert arr.searchsorted(a, side="right") == 1 + + assert arr.searchsorted(b) == 1 + assert arr.searchsorted(b, side="right") == 2 + + result = arr.searchsorted(arr.take([0, 1])) + expected = np.array([0, 1], dtype=np.intp) + + tm.assert_numpy_array_equal(result, expected) + + # sorter + sorter = np.array([1, 0]) + assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 + + @pytest.mark.skip(reason="uses nullable integer") + def test_value_counts(self, all_data, dropna): + return super().test_value_counts(all_data, dropna) + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + """ + Groupby-specific tests are overridden because boolean only has 2 + unique values, base tests uses 3 groups. + """ + + def test_grouping_grouper(self, data_for_grouping): + df = pd.DataFrame( + {"A": ["B", "B", None, None, "A", "A", "B"], "B": data_for_grouping} + ) + gr1 = df.groupby("A").grouper.groupings[0] + gr2 = df.groupby("B").grouper.groupings[0] + + tm.assert_numpy_array_equal(gr1.grouper, df.A.values) + tm.assert_extension_array_equal(gr2.grouper, data_for_grouping) + + @pytest.mark.parametrize("as_index", [True, False]) + def test_groupby_extension_agg(self, as_index, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + result = df.groupby("B", as_index=as_index).A.mean() + _, index = pd.factorize(data_for_grouping, sort=True) + + index = pd.Index(index, name="B") + expected = pd.Series([3, 1], index=index, name="A") + if as_index: + self.assert_series_equal(result, expected) + else: + expected = expected.reset_index() + self.assert_frame_equal(result, expected) + + def test_groupby_extension_no_sort(self, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + result = df.groupby("B", sort=False).A.mean() + _, index = pd.factorize(data_for_grouping, sort=False) + + index = pd.Index(index, name="B") + expected = pd.Series([1, 3], index=index, name="A") + self.assert_series_equal(result, expected) + + def test_groupby_extension_transform(self, data_for_grouping): + valid = data_for_grouping[~data_for_grouping.isna()] + df = pd.DataFrame({"A": [1, 1, 3, 3, 1], "B": valid}) + + result = df.groupby("B").A.transform(len) + expected = pd.Series([3, 3, 2, 2, 3], name="A") + + self.assert_series_equal(result, expected) + + def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + df.groupby("B").apply(groupby_apply_op) + df.groupby("B").A.apply(groupby_apply_op) + df.groupby("A").apply(groupby_apply_op) + df.groupby("A").B.apply(groupby_apply_op) + + def test_groupby_apply_identity(self, data_for_grouping): + df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping}) + result = df.groupby("A").B.apply(lambda x: x.array) + expected = pd.Series( + [ + df.B.iloc[[0, 1, 6]].array, + df.B.iloc[[2, 3]].array, + df.B.iloc[[4, 5]].array, + ], + index=pd.Index([1, 2, 3], name="A"), + name="B", + ) + self.assert_series_equal(result, expected) + + def test_in_numeric_groupby(self, data_for_grouping): + df = pd.DataFrame( + { + "A": [1, 1, 2, 2, 3, 3, 1], + "B": data_for_grouping, + "C": [1, 1, 1, 1, 1, 1, 1], + } + ) + result = df.groupby("A").sum().columns + + if data_for_grouping.dtype._is_numeric: + expected = pd.Index(["B", "C"]) + else: + expected = pd.Index(["C"]) + + tm.assert_index_equal(result, expected) + + +class TestNumericReduce(base.BaseNumericReduceTests): + def check_reduce(self, s, op_name, skipna): + result = getattr(s, op_name)(skipna=skipna) + expected = getattr(s.astype("float64"), op_name)(skipna=skipna) + # override parent function to cast to bool for min/max + if np.isnan(expected): + expected = pd.NA + elif op_name in ("min", "max"): + expected = bool(expected) + tm.assert_almost_equal(result, expected) + + +class TestBooleanReduce(base.BaseBooleanReduceTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestUnaryOps(base.BaseUnaryOpsTests): + pass + + +# TODO parsing not yet supported +# class TestParsing(base.BaseParsingTests): +# pass diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py new file mode 100644 index 00000000..336b23e5 --- /dev/null +++ b/pandas/tests/extension/test_categorical.py @@ -0,0 +1,286 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import string + +import numpy as np +import pytest + +import pandas as pd +from pandas import Categorical, CategoricalIndex, Timestamp +import pandas._testing as tm +from pandas.api.types import CategoricalDtype +from pandas.tests.extension import base + + +def make_data(): + while True: + values = np.random.choice(list(string.ascii_letters), size=100) + # ensure we meet the requirements + # 1. first two not null + # 2. first and second are different + if values[0] != values[1]: + break + return values + + +@pytest.fixture +def dtype(): + return CategoricalDtype() + + +@pytest.fixture +def data(): + """Length-100 array for this type. + + * data[0] and data[1] should both be non missing + * data[0] and data[1] should not gbe equal + """ + return Categorical(make_data()) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return Categorical([np.nan, "A"]) + + +@pytest.fixture +def data_for_sorting(): + return Categorical(["A", "B", "C"], categories=["C", "A", "B"], ordered=True) + + +@pytest.fixture +def data_missing_for_sorting(): + return Categorical(["A", None, "B"], categories=["B", "A"], ordered=True) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def data_for_grouping(): + return Categorical(["a", "a", None, None, "b", "b", "a", "c"]) + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + @pytest.mark.skip(reason="Memory usage doesn't match") + def test_memory_usage(self, data): + # Is this deliberate? + super().test_memory_usage(data) + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + skip_take = pytest.mark.skip(reason="GH-20664.") + + @pytest.mark.skip(reason="Backwards compatibility") + def test_getitem_scalar(self, data): + # CategoricalDtype.type isn't "correct" since it should + # be a parent of the elements (object). But don't want + # to break things by changing. + super().test_getitem_scalar(data) + + @skip_take + def test_take(self, data, na_value, na_cmp): + # TODO remove this once Categorical.take is fixed + super().test_take(data, na_value, na_cmp) + + @skip_take + def test_take_negative(self, data): + super().test_take_negative(data) + + @skip_take + def test_take_pandas_style_negative_raises(self, data, na_value): + super().test_take_pandas_style_negative_raises(data, na_value) + + @skip_take + def test_take_non_na_fill_value(self, data_missing): + super().test_take_non_na_fill_value(data_missing) + + @skip_take + def test_take_out_of_bounds_raises(self, data, allow_fill): + return super().test_take_out_of_bounds_raises(data, allow_fill) + + @pytest.mark.skip(reason="GH-20747. Unobserved categories.") + def test_take_series(self, data): + super().test_take_series(data) + + @skip_take + def test_reindex_non_na_fill_value(self, data_missing): + super().test_reindex_non_na_fill_value(data_missing) + + @pytest.mark.skip(reason="Categorical.take buggy") + def test_take_empty(self, data, na_value, na_cmp): + super().test_take_empty(data, na_value, na_cmp) + + @pytest.mark.skip(reason="test not written correctly for categorical") + def test_reindex(self, data, na_value): + super().test_reindex(data, na_value) + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestMissing(base.BaseMissingTests): + @pytest.mark.skip(reason="Not implemented") + def test_fillna_limit_pad(self, data_missing): + super().test_fillna_limit_pad(data_missing) + + @pytest.mark.skip(reason="Not implemented") + def test_fillna_limit_backfill(self, data_missing): + super().test_fillna_limit_backfill(data_missing) + + +class TestReduce(base.BaseNoReduceTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.skip(reason="Unobserved categories included") + def test_value_counts(self, all_data, dropna): + return super().test_value_counts(all_data, dropna) + + def test_combine_add(self, data_repeated): + # GH 20825 + # When adding categoricals in combine, result is a string + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 + x2) + expected = pd.Series( + ([a + b for (a, b) in zip(list(orig_data1), list(orig_data2))]) + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 + x2) + expected = pd.Series([a + val for a in list(orig_data1)]) + self.assert_series_equal(result, expected) + + @pytest.mark.skip(reason="Not Applicable") + def test_fillna_length_mismatch(self, data_missing): + super().test_fillna_length_mismatch(data_missing) + + def test_searchsorted(self, data_for_sorting): + if not data_for_sorting.ordered: + raise pytest.skip(reason="searchsorted requires ordered data.") + + +class TestCasting(base.BaseCastingTests): + @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) + @pytest.mark.parametrize("values", [[1, np.nan], [Timestamp("2000"), pd.NaT]]) + def test_cast_nan_to_int(self, cls, values): + # GH 28406 + s = cls(values) + + msg = "Cannot (cast|convert)" + with pytest.raises((ValueError, TypeError), match=msg): + s.astype(int) + + @pytest.mark.parametrize( + "expected", + [ + pd.Series(["2019", "2020"], dtype="datetime64[ns, UTC]"), + pd.Series([0, 0], dtype="timedelta64[ns]"), + pd.Series([pd.Period("2019"), pd.Period("2020")], dtype="period[A-DEC]"), + pd.Series([pd.Interval(0, 1), pd.Interval(1, 2)], dtype="interval"), + pd.Series([1, np.nan], dtype="Int64"), + ], + ) + def test_cast_category_to_extension_dtype(self, expected): + # GH 28668 + result = expected.astype("category").astype(expected.dtype) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, expected", + [ + ( + "datetime64[ns]", + np.array(["2015-01-01T00:00:00.000000000"], dtype="datetime64[ns]"), + ), + ( + "datetime64[ns, MET]", + pd.DatetimeIndex( + [pd.Timestamp("2015-01-01 00:00:00+0100", tz="MET")] + ).array, + ), + ], + ) + def test_consistent_casting(self, dtype, expected): + # GH 28448 + result = pd.Categorical("2015-01-01").astype(dtype) + assert result == expected + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + + op_name = all_arithmetic_operators + if op_name != "__rmod__": + super().test_arith_series_with_scalar(data, op_name) + else: + pytest.skip("rmod never called when string is first argument") + + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + with pytest.raises(TypeError, match="cannot perform|unsupported operand"): + ser + data + + def test_divmod_series_array(self): + # GH 23287 + # skipping because it is not implemented + pass + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + return super()._check_divmod_op(s, op, other, exc=TypeError) + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def _compare_other(self, s, data, op_name, other): + op = self.get_op_from_name(op_name) + if op_name == "__eq__": + result = op(s, other) + expected = s.combine(other, lambda x, y: x == y) + assert (result == expected).all() + + elif op_name == "__ne__": + result = op(s, other) + expected = s.combine(other, lambda x, y: x != y) + assert (result == expected).all() + + else: + with pytest.raises(TypeError): + op(data, other) + + +class TestParsing(base.BaseParsingTests): + pass diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py new file mode 100644 index 00000000..e43650c2 --- /dev/null +++ b/pandas/tests/extension/test_common.py @@ -0,0 +1,81 @@ +import numpy as np +import pytest + +from pandas.core.dtypes import dtypes +from pandas.core.dtypes.common import is_extension_array_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import ExtensionArray + + +class DummyDtype(dtypes.ExtensionDtype): + pass + + +class DummyArray(ExtensionArray): + def __init__(self, data): + self.data = data + + def __array__(self, dtype): + return self.data + + @property + def dtype(self): + return DummyDtype() + + def astype(self, dtype, copy=True): + # we don't support anything but a single dtype + if isinstance(dtype, DummyDtype): + if copy: + return type(self)(self.data) + return self + + return np.array(self, dtype=dtype, copy=copy) + + +class TestExtensionArrayDtype: + @pytest.mark.parametrize( + "values", + [ + pd.Categorical([]), + pd.Categorical([]).dtype, + pd.Series(pd.Categorical([])), + DummyDtype(), + DummyArray(np.array([1, 2])), + ], + ) + def test_is_extension_array_dtype(self, values): + assert is_extension_array_dtype(values) + + @pytest.mark.parametrize("values", [np.array([]), pd.Series(np.array([]))]) + def test_is_not_extension_array_dtype(self, values): + assert not is_extension_array_dtype(values) + + +def test_astype(): + + arr = DummyArray(np.array([1, 2, 3])) + expected = np.array([1, 2, 3], dtype=object) + + result = arr.astype(object) + tm.assert_numpy_array_equal(result, expected) + + result = arr.astype("object") + tm.assert_numpy_array_equal(result, expected) + + +def test_astype_no_copy(): + arr = DummyArray(np.array([1, 2, 3], dtype=np.int64)) + result = arr.astype(arr.dtype, copy=False) + + assert arr is result + + result = arr.astype(arr.dtype) + assert arr is not result + + +@pytest.mark.parametrize("dtype", [dtypes.CategoricalDtype(), dtypes.IntervalDtype()]) +def test_is_extension_array_dtype(dtype): + assert isinstance(dtype, dtypes.ExtensionDtype) + assert is_extension_array_dtype(dtype) diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py new file mode 100644 index 00000000..a60607d5 --- /dev/null +++ b/pandas/tests/extension/test_datetime.py @@ -0,0 +1,214 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas.core.arrays import DatetimeArray +from pandas.tests.extension import base + + +@pytest.fixture(params=["US/Central"]) +def dtype(request): + return DatetimeTZDtype(unit="ns", tz=request.param) + + +@pytest.fixture +def data(dtype): + data = DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz), dtype=dtype) + return data + + +@pytest.fixture +def data_missing(dtype): + return DatetimeArray( + np.array(["NaT", "2000-01-01"], dtype="datetime64[ns]"), dtype=dtype + ) + + +@pytest.fixture +def data_for_sorting(dtype): + a = pd.Timestamp("2000-01-01") + b = pd.Timestamp("2000-01-02") + c = pd.Timestamp("2000-01-03") + return DatetimeArray(np.array([b, c, a], dtype="datetime64[ns]"), dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + a = pd.Timestamp("2000-01-01") + b = pd.Timestamp("2000-01-02") + return DatetimeArray(np.array([b, "NaT", a], dtype="datetime64[ns]"), dtype=dtype) + + +@pytest.fixture +def data_for_grouping(dtype): + """ + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + a = pd.Timestamp("2000-01-01") + b = pd.Timestamp("2000-01-02") + c = pd.Timestamp("2000-01-03") + na = "NaT" + return DatetimeArray( + np.array([b, b, na, na, a, a, b, c], dtype="datetime64[ns]"), dtype=dtype + ) + + +@pytest.fixture +def na_cmp(): + def cmp(a, b): + return a is pd.NaT and a is b + + return cmp + + +@pytest.fixture +def na_value(): + return pd.NaT + + +# ---------------------------------------------------------------------------- +class BaseDatetimeTests: + pass + + +# ---------------------------------------------------------------------------- +# Tests +class TestDatetimeDtype(BaseDatetimeTests, base.BaseDtypeTests): + pass + + +class TestConstructors(BaseDatetimeTests, base.BaseConstructorsTests): + pass + + +class TestGetitem(BaseDatetimeTests, base.BaseGetitemTests): + pass + + +class TestMethods(BaseDatetimeTests, base.BaseMethodsTests): + @pytest.mark.skip(reason="Incorrect expected") + def test_value_counts(self, all_data, dropna): + pass + + def test_combine_add(self, data_repeated): + # Timestamp.__add__(Timestamp) not defined + pass + + +class TestInterface(BaseDatetimeTests, base.BaseInterfaceTests): + def test_array_interface(self, data): + if data.tz: + # np.asarray(DTA) is currently always tz-naive. + pytest.skip("GH-23569") + else: + super().test_array_interface(data) + + +class TestArithmeticOps(BaseDatetimeTests, base.BaseArithmeticOpsTests): + implements = {"__sub__", "__rsub__"} + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_add_series_with_extension_array(self, data): + # Datetime + Datetime not implemented + s = pd.Series(data) + msg = "cannot add DatetimeArray and DatetimeArray" + with pytest.raises(TypeError, match=msg): + s + data + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_error(self, data, all_arithmetic_operators): + pass + + def test_divmod_series_array(self): + # GH 23287 + # skipping because it is not implemented + pass + + +class TestCasting(BaseDatetimeTests, base.BaseCastingTests): + pass + + +class TestComparisonOps(BaseDatetimeTests, base.BaseComparisonOpsTests): + def _compare_other(self, s, data, op_name, other): + # the base test is not appropriate for us. We raise on comparison + # with (some) integers, depending on the value. + pass + + +class TestMissing(BaseDatetimeTests, base.BaseMissingTests): + pass + + +class TestReshaping(BaseDatetimeTests, base.BaseReshapingTests): + @pytest.mark.skip(reason="We have DatetimeTZBlock") + def test_concat(self, data, in_frame): + pass + + def test_concat_mixed_dtypes(self, data): + # concat(Series[datetimetz], Series[category]) uses a + # plain np.array(values) on the DatetimeArray, which + # drops the tz. + super().test_concat_mixed_dtypes(data) + + @pytest.mark.parametrize("obj", ["series", "frame"]) + def test_unstack(self, obj): + # GH-13287: can't use base test, since building the expected fails. + data = DatetimeArray._from_sequence( + ["2000", "2001", "2002", "2003"], tz="US/Central" + ) + index = pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]) + + if obj == "series": + ser = pd.Series(data, index=index) + expected = pd.DataFrame( + {"A": data.take([0, 1]), "B": data.take([2, 3])}, + index=pd.Index(["a", "b"], name="b"), + ) + expected.columns.name = "a" + + else: + ser = pd.DataFrame({"A": data, "B": data}, index=index) + expected = pd.DataFrame( + { + ("A", "A"): data.take([0, 1]), + ("A", "B"): data.take([2, 3]), + ("B", "A"): data.take([0, 1]), + ("B", "B"): data.take([2, 3]), + }, + index=pd.Index(["a", "b"], name="b"), + ) + expected.columns.names = [None, "a"] + + result = ser.unstack(0) + self.assert_equal(result, expected) + + +class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests): + pass + + +class TestGroupby(BaseDatetimeTests, base.BaseGroupbyTests): + pass + + +class TestPrinting(BaseDatetimeTests, base.BasePrintingTests): + pass diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py new file mode 100644 index 00000000..6311070c --- /dev/null +++ b/pandas/tests/extension/test_external_block.py @@ -0,0 +1,55 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas.core.internals import BlockManager +from pandas.core.internals.blocks import Block, NonConsolidatableMixIn + + +class CustomBlock(NonConsolidatableMixIn, Block): + + _holder = np.ndarray + + def concat_same_type(self, to_concat, placement=None): + """ + Always concatenate disregarding self.ndim as the values are + always 1D in this custom Block + """ + values = np.concatenate([blk.values for blk in to_concat]) + return self.make_block_same_class( + values, placement=placement or slice(0, len(values), 1) + ) + + +@pytest.fixture +def df(): + df1 = pd.DataFrame({"a": [1, 2, 3]}) + blocks = df1._data.blocks + values = np.arange(3, dtype="int64") + custom_block = CustomBlock(values, placement=slice(1, 2)) + blocks = blocks + (custom_block,) + block_manager = BlockManager(blocks, [pd.Index(["a", "b"]), df1.index]) + return pd.DataFrame(block_manager) + + +def test_concat_series(): + # GH17728 + values = np.arange(3, dtype="int64") + block = CustomBlock(values, placement=slice(0, 3)) + s = pd.Series(block, pd.RangeIndex(3), fastpath=True) + + res = pd.concat([s, s]) + assert isinstance(res._data.blocks[0], CustomBlock) + + +def test_concat_dataframe(df): + # GH17728 + res = pd.concat([df, df]) + assert isinstance(res._data.blocks[1], CustomBlock) + + +def test_concat_axis1(df): + # GH17954 + df2 = pd.DataFrame({"c": [0.1, 0.2, 0.3]}) + res = pd.concat([df, df2], axis=1) + assert isinstance(res._data.blocks[1], CustomBlock) diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py new file mode 100644 index 00000000..f55ec75b --- /dev/null +++ b/pandas/tests/extension/test_integer.py @@ -0,0 +1,256 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_extension_array_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays import integer_array +from pandas.core.arrays.integer import ( + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, +) +from pandas.tests.extension import base + + +def make_data(): + return list(range(1, 9)) + [pd.NA] + list(range(10, 98)) + [pd.NA] + [99, 100] + + +@pytest.fixture( + params=[ + Int8Dtype, + Int16Dtype, + Int32Dtype, + Int64Dtype, + UInt8Dtype, + UInt16Dtype, + UInt32Dtype, + UInt64Dtype, + ] +) +def dtype(request): + return request.param() + + +@pytest.fixture +def data(dtype): + return integer_array(make_data(), dtype=dtype) + + +@pytest.fixture +def data_for_twos(dtype): + return integer_array(np.ones(100) * 2, dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return integer_array([pd.NA, 1], dtype=dtype) + + +@pytest.fixture +def data_for_sorting(dtype): + return integer_array([1, 2, 0], dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return integer_array([1, pd.NA, 0], dtype=dtype) + + +@pytest.fixture +def na_cmp(): + # we are pd.NA + return lambda x, y: x is pd.NA and y is pd.NA + + +@pytest.fixture +def na_value(): + return pd.NA + + +@pytest.fixture +def data_for_grouping(dtype): + b = 1 + a = 0 + c = 2 + na = pd.NA + return integer_array([b, b, na, na, a, a, b, c], dtype=dtype) + + +class TestDtype(base.BaseDtypeTests): + @pytest.mark.skip(reason="using multiple dtypes") + def test_is_dtype_unboxes_dtype(self): + # we have multiple dtypes, so skip + pass + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super().check_opname(s, op_name, other, exc=None) + + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + if s.dtype.is_unsigned_integer and (op_name == "__rsub__"): + # TODO see https://github.com/pandas-dev/pandas/issues/22023 + pytest.skip("unsigned subtraction gives negative values") + + if ( + hasattr(other, "dtype") + and not is_extension_array_dtype(other.dtype) + and pd.api.types.is_integer_dtype(other.dtype) + ): + # other is np.int64 and would therefore always result in + # upcasting, so keeping other as same numpy_dtype + other = other.astype(s.dtype.numpy_dtype) + + result = op(s, other) + expected = s.combine(other, op) + + if op_name in ("__rtruediv__", "__truediv__", "__div__"): + expected = expected.fillna(np.nan).astype(float) + if op_name == "__rtruediv__": + # TODO reverse operators result in object dtype + result = result.astype(float) + elif op_name.startswith("__r"): + # TODO reverse operators result in object dtype + # see https://github.com/pandas-dev/pandas/issues/22024 + expected = expected.astype(s.dtype) + result = result.astype(s.dtype) + else: + # combine method result in 'biggest' (int64) dtype + expected = expected.astype(s.dtype) + pass + + if (op_name == "__rpow__") and isinstance(other, pd.Series): + # TODO pow on Int arrays gives different result with NA + # see https://github.com/pandas-dev/pandas/issues/22022 + result = result.fillna(1) + + self.assert_series_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def _check_divmod_op(self, s, op, other, exc=None): + super()._check_divmod_op(s, op, other, None) + + @pytest.mark.skip(reason="intNA does not error on ops") + def test_error(self, data, all_arithmetic_operators): + # other specific errors tested in the integer array specific tests + pass + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + result = op(s, other) + # Override to do the astype to boolean + expected = s.combine(other, op).astype("boolean") + self.assert_series_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def check_opname(self, s, op_name, other, exc=None): + super().check_opname(s, op_name, other, exc=None) + + def _compare_other(self, s, data, op_name, other): + self.check_opname(s, op_name, other) + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + # for test_concat_mixed_dtypes test + # concat of an Integer and Int coerces to object dtype + # TODO(jreback) once integrated this would + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.skip(reason="uses nullable integer") + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts(dropna=dropna).sort_index() + expected.index = expected.index.astype(all_data.dtype) + + self.assert_series_equal(result, expected) + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + pass + + +class TestNumericReduce(base.BaseNumericReduceTests): + def check_reduce(self, s, op_name, skipna): + # overwrite to ensure pd.NA is tested instead of np.nan + # https://github.com/pandas-dev/pandas/issues/30958 + result = getattr(s, op_name)(skipna=skipna) + expected = getattr(s.astype("float64"), op_name)(skipna=skipna) + if np.isnan(expected): + expected = pd.NA + tm.assert_almost_equal(result, expected) + + +class TestBooleanReduce(base.BaseBooleanReduceTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestParsing(base.BaseParsingTests): + pass diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py new file mode 100644 index 00000000..2411f6cf --- /dev/null +++ b/pandas/tests/extension/test_interval.py @@ -0,0 +1,166 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import IntervalDtype + +from pandas import Interval +from pandas.core.arrays import IntervalArray +from pandas.tests.extension import base + + +def make_data(): + N = 100 + left = np.random.uniform(size=N).cumsum() + right = left + np.random.uniform(size=N) + return [Interval(l, r) for l, r in zip(left, right)] + + +@pytest.fixture +def dtype(): + return IntervalDtype() + + +@pytest.fixture +def data(): + """Length-100 PeriodArray for semantics test.""" + return IntervalArray(make_data()) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return IntervalArray.from_tuples([None, (0, 1)]) + + +@pytest.fixture +def data_for_sorting(): + return IntervalArray.from_tuples([(1, 2), (2, 3), (0, 1)]) + + +@pytest.fixture +def data_missing_for_sorting(): + return IntervalArray.from_tuples([(1, 2), None, (0, 1)]) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def data_for_grouping(): + a = (0, 1) + b = (1, 2) + c = (2, 3) + return IntervalArray.from_tuples([b, b, None, None, a, a, b, c]) + + +class BaseInterval: + pass + + +class TestDtype(BaseInterval, base.BaseDtypeTests): + pass + + +class TestCasting(BaseInterval, base.BaseCastingTests): + pass + + +class TestConstructors(BaseInterval, base.BaseConstructorsTests): + pass + + +class TestGetitem(BaseInterval, base.BaseGetitemTests): + pass + + +class TestGrouping(BaseInterval, base.BaseGroupbyTests): + pass + + +class TestInterface(BaseInterval, base.BaseInterfaceTests): + def test_view(self, data): + # __setitem__ incorrectly makes a copy (GH#27147), so we only + # have a smoke-test + data.view() + + +class TestReduce(base.BaseNoReduceTests): + pass + + +class TestMethods(BaseInterval, base.BaseMethodsTests): + @pytest.mark.skip(reason="addition is not defined for intervals") + def test_combine_add(self, data_repeated): + pass + + @pytest.mark.skip(reason="Not Applicable") + def test_fillna_length_mismatch(self, data_missing): + pass + + +class TestMissing(BaseInterval, base.BaseMissingTests): + # Index.fillna only accepts scalar `value`, so we have to skip all + # non-scalar fill tests. + unsupported_fill = pytest.mark.skip("Unsupported fillna option.") + + @unsupported_fill + def test_fillna_limit_pad(self): + pass + + @unsupported_fill + def test_fillna_series_method(self): + pass + + @unsupported_fill + def test_fillna_limit_backfill(self): + pass + + @unsupported_fill + def test_fillna_series(self): + pass + + def test_non_scalar_raises(self, data_missing): + msg = "Got a 'list' instead." + with pytest.raises(TypeError, match=msg): + data_missing.fillna([1, 1]) + + +class TestReshaping(BaseInterval, base.BaseReshapingTests): + pass + + +class TestSetitem(BaseInterval, base.BaseSetitemTests): + @pytest.mark.xfail(reason="GH#27147 setitem changes underlying index") + def test_setitem_preserves_views(self, data): + super().test_setitem_preserves_views(data) + + +class TestPrinting(BaseInterval, base.BasePrintingTests): + @pytest.mark.skip(reason="custom repr") + def test_array_repr(self, data, size): + pass + + +class TestParsing(BaseInterval, base.BaseParsingTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data): + expected_msg = r".*must implement _from_sequence_of_strings.*" + with pytest.raises(NotImplementedError, match=expected_msg): + super().test_EA_types(engine, data) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py new file mode 100644 index 00000000..61c59253 --- /dev/null +++ b/pandas/tests/extension/test_numpy.py @@ -0,0 +1,452 @@ +import numpy as np +import pytest + +from pandas.compat.numpy import _np_version_under1p16 + +import pandas as pd +import pandas._testing as tm +from pandas.core.arrays.numpy_ import PandasArray, PandasDtype + +from . import base + + +@pytest.fixture(params=["float", "object"]) +def dtype(request): + return PandasDtype(np.dtype(request.param)) + + +@pytest.fixture +def allow_in_pandas(monkeypatch): + """ + A monkeypatch to tells pandas to let us in. + + By default, passing a PandasArray to an index / series / frame + constructor will unbox that PandasArray to an ndarray, and treat + it as a non-EA column. We don't want people using EAs without + reason. + + The mechanism for this is a check against ABCPandasArray + in each constructor. + + But, for testing, we need to allow them in pandas. So we patch + the _typ of PandasArray, so that we evade the ABCPandasArray + check. + """ + with monkeypatch.context() as m: + m.setattr(PandasArray, "_typ", "extension") + yield + + +@pytest.fixture +def data(allow_in_pandas, dtype): + if dtype.numpy_dtype == "object": + return pd.Series([(i,) for i in range(100)]).array + return PandasArray(np.arange(1, 101, dtype=dtype._dtype)) + + +@pytest.fixture +def data_missing(allow_in_pandas, dtype): + # For NumPy <1.16, np.array([np.nan, (1,)]) raises + # ValueError: setting an array element with a sequence. + if dtype.numpy_dtype == "object": + if _np_version_under1p16: + raise pytest.skip("Skipping for NumPy <1.16") + return PandasArray(np.array([np.nan, (1,)], dtype=object)) + return PandasArray(np.array([np.nan, 1.0])) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def na_cmp(): + def cmp(a, b): + return np.isnan(a) and np.isnan(b) + + return cmp + + +@pytest.fixture +def data_for_sorting(allow_in_pandas, dtype): + """Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + """ + if dtype.numpy_dtype == "object": + # Use an empty tuple for first element, then remove, + # to disable np.array's shape inference. + return PandasArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:]) + return PandasArray(np.array([1, 2, 0])) + + +@pytest.fixture +def data_missing_for_sorting(allow_in_pandas, dtype): + """Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + """ + if dtype.numpy_dtype == "object": + return PandasArray(np.array([(1,), np.nan, (0,)], dtype=object)) + return PandasArray(np.array([1, np.nan, 0])) + + +@pytest.fixture +def data_for_grouping(allow_in_pandas, dtype): + """Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + if dtype.numpy_dtype == "object": + a, b, c = (1,), (2,), (3,) + else: + a, b, c = np.arange(3) + return PandasArray( + np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype) + ) + + +@pytest.fixture +def skip_numpy_object(dtype): + """ + Tests for PandasArray with nested data. Users typically won't create + these objects via `pd.array`, but they can show up through `.array` + on a Series with nested data. Many of the base tests fail, as they aren't + appropriate for nested data. + + This fixture allows these tests to be skipped when used as a usefixtures + marker to either an individual test or a test class. + """ + if dtype == "object": + raise pytest.skip("Skipping for object dtype.") + + +skip_nested = pytest.mark.usefixtures("skip_numpy_object") + + +class BaseNumPyTests: + pass + + +class TestCasting(BaseNumPyTests, base.BaseCastingTests): + @skip_nested + def test_astype_str(self, data): + # ValueError: setting an array element with a sequence + super().test_astype_str(data) + + +class TestConstructors(BaseNumPyTests, base.BaseConstructorsTests): + @pytest.mark.skip(reason="We don't register our dtype") + # We don't want to register. This test should probably be split in two. + def test_from_dtype(self, data): + pass + + @skip_nested + def test_array_from_scalars(self, data): + # ValueError: PandasArray must be 1-dimensional. + super().test_array_from_scalars(data) + + +class TestDtype(BaseNumPyTests, base.BaseDtypeTests): + @pytest.mark.skip(reason="Incorrect expected.") + # we unsurprisingly clash with a NumPy name. + def test_check_dtype(self, data): + pass + + +class TestGetitem(BaseNumPyTests, base.BaseGetitemTests): + @skip_nested + def test_getitem_scalar(self, data): + # AssertionError + super().test_getitem_scalar(data) + + @skip_nested + def test_take_series(self, data): + # ValueError: PandasArray must be 1-dimensional. + super().test_take_series(data) + + @pytest.mark.xfail(reason="astype doesn't recognize data.dtype") + def test_loc_iloc_frame_single_dtype(self, data): + super().test_loc_iloc_frame_single_dtype(data) + + +class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests): + @skip_nested + def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): + # ValueError: Names should be list-like for a MultiIndex + super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) + + +class TestInterface(BaseNumPyTests, base.BaseInterfaceTests): + @skip_nested + def test_array_interface(self, data): + # NumPy array shape inference + super().test_array_interface(data) + + +class TestMethods(BaseNumPyTests, base.BaseMethodsTests): + @pytest.mark.skip(reason="TODO: remove?") + def test_value_counts(self, all_data, dropna): + pass + + @pytest.mark.skip(reason="Incorrect expected") + # We have a bool dtype, so the result is an ExtensionArray + # but expected is not + def test_combine_le(self, data_repeated): + super().test_combine_le(data_repeated) + + @skip_nested + def test_combine_add(self, data_repeated): + # Not numeric + super().test_combine_add(data_repeated) + + @skip_nested + def test_shift_fill_value(self, data): + # np.array shape inference. Shift implementation fails. + super().test_shift_fill_value(data) + + @skip_nested + @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) + @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) + def test_unique(self, data, box, method): + # Fails creating expected + super().test_unique(data, box, method) + + @skip_nested + def test_fillna_copy_frame(self, data_missing): + # The "scalar" for this array isn't a scalar. + super().test_fillna_copy_frame(data_missing) + + @skip_nested + def test_fillna_copy_series(self, data_missing): + # The "scalar" for this array isn't a scalar. + super().test_fillna_copy_series(data_missing) + + @skip_nested + def test_hash_pandas_object_works(self, data, as_frame): + # ndarray of tuples not hashable + super().test_hash_pandas_object_works(data, as_frame) + + @skip_nested + def test_searchsorted(self, data_for_sorting, as_series): + # Test setup fails. + super().test_searchsorted(data_for_sorting, as_series) + + @skip_nested + def test_where_series(self, data, na_value, as_frame): + # Test setup fails. + super().test_where_series(data, na_value, as_frame) + + @skip_nested + @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) + def test_repeat(self, data, repeats, as_series, use_numpy): + # Fails creating expected + super().test_repeat(data, repeats, as_series, use_numpy) + + @pytest.mark.xfail(reason="PandasArray.diff may fail on dtype") + def test_diff(self, data, periods): + return super().test_diff(data, periods) + + +@skip_nested +class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): + divmod_exc = None + series_scalar_exc = None + frame_scalar_exc = None + series_array_exc = None + + def test_divmod_series_array(self, data): + s = pd.Series(data) + self._check_divmod_op(s, divmod, data, exc=None) + + @pytest.mark.skip("We implement ops") + def test_error(self, data, all_arithmetic_operators): + pass + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + super().test_arith_series_with_array(data, all_arithmetic_operators) + + +class TestPrinting(BaseNumPyTests, base.BasePrintingTests): + pass + + +@skip_nested +class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests): + def check_reduce(self, s, op_name, skipna): + result = getattr(s, op_name)(skipna=skipna) + # avoid coercing int -> float. Just cast to the actual numpy type. + expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna) + tm.assert_almost_equal(result, expected) + + +@skip_nested +class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests): + pass + + +class TestMissing(BaseNumPyTests, base.BaseMissingTests): + @skip_nested + def test_fillna_scalar(self, data_missing): + # Non-scalar "scalar" values. + super().test_fillna_scalar(data_missing) + + @skip_nested + def test_fillna_series_method(self, data_missing, fillna_method): + # Non-scalar "scalar" values. + super().test_fillna_series_method(data_missing, fillna_method) + + @skip_nested + def test_fillna_series(self, data_missing): + # Non-scalar "scalar" values. + super().test_fillna_series(data_missing) + + @skip_nested + def test_fillna_frame(self, data_missing): + # Non-scalar "scalar" values. + super().test_fillna_frame(data_missing) + + +class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): + @pytest.mark.skip("Incorrect parent test") + # not actually a mixed concat, since we concat int and int. + def test_concat_mixed_dtypes(self, data): + super().test_concat_mixed_dtypes(data) + + @skip_nested + def test_merge(self, data, na_value): + # Fails creating expected + super().test_merge(data, na_value) + + @skip_nested + def test_merge_on_extension_array(self, data): + # Fails creating expected + super().test_merge_on_extension_array(data) + + @skip_nested + def test_merge_on_extension_array_duplicates(self, data): + # Fails creating expected + super().test_merge_on_extension_array_duplicates(data) + + @skip_nested + def test_transpose(self, data): + super().test_transpose(data) + + +class TestSetitem(BaseNumPyTests, base.BaseSetitemTests): + @skip_nested + def test_setitem_scalar_series(self, data, box_in_series): + # AssertionError + super().test_setitem_scalar_series(data, box_in_series) + + @skip_nested + def test_setitem_sequence(self, data, box_in_series): + # ValueError: shape mismatch: value array of shape (2,1) could not + # be broadcast to indexing result of shape (2,) + super().test_setitem_sequence(data, box_in_series) + + @skip_nested + def test_setitem_sequence_mismatched_length_raises(self, data, as_array): + # ValueError: PandasArray must be 1-dimensional. + super().test_setitem_sequence_mismatched_length_raises(data, as_array) + + @skip_nested + def test_setitem_sequence_broadcasts(self, data, box_in_series): + # ValueError: cannot set using a list-like indexer with a different + # length than the value + super().test_setitem_sequence_broadcasts(data, box_in_series) + + @skip_nested + def test_setitem_loc_scalar_mixed(self, data): + # AssertionError + super().test_setitem_loc_scalar_mixed(data) + + @skip_nested + def test_setitem_loc_scalar_multiple_homogoneous(self, data): + # AssertionError + super().test_setitem_loc_scalar_multiple_homogoneous(data) + + @skip_nested + def test_setitem_iloc_scalar_mixed(self, data): + # AssertionError + super().test_setitem_iloc_scalar_mixed(data) + + @skip_nested + def test_setitem_iloc_scalar_multiple_homogoneous(self, data): + # AssertionError + super().test_setitem_iloc_scalar_multiple_homogoneous(data) + + @skip_nested + @pytest.mark.parametrize("setter", ["loc", None]) + def test_setitem_mask_broadcast(self, data, setter): + # ValueError: cannot set using a list-like indexer with a different + # length than the value + super().test_setitem_mask_broadcast(data, setter) + + @skip_nested + def test_setitem_scalar_key_sequence_raise(self, data): + # Failed: DID NOT RAISE + super().test_setitem_scalar_key_sequence_raise(data) + + # TODO: there is some issue with PandasArray, therefore, + # skip the setitem test for now, and fix it later (GH 31446) + + @skip_nested + @pytest.mark.parametrize( + "mask", + [ + np.array([True, True, True, False, False]), + pd.array([True, True, True, False, False], dtype="boolean"), + ], + ids=["numpy-array", "boolean-array"], + ) + def test_setitem_mask(self, data, mask, box_in_series): + super().test_setitem_mask(data, mask, box_in_series) + + @skip_nested + def test_setitem_mask_raises(self, data, box_in_series): + super().test_setitem_mask_raises(data, box_in_series) + + @skip_nested + @pytest.mark.parametrize( + "idx", + [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], + ids=["list", "integer-array", "numpy-array"], + ) + def test_setitem_integer_array(self, data, idx, box_in_series): + super().test_setitem_integer_array(data, idx, box_in_series) + + @skip_nested + @pytest.mark.parametrize( + "idx, box_in_series", + [ + ([0, 1, 2, pd.NA], False), + pytest.param([0, 1, 2, pd.NA], True, marks=pytest.mark.xfail), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), + ], + ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], + ) + def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): + super().test_setitem_integer_with_missing_raises(data, idx, box_in_series) + + @skip_nested + def test_setitem_slice(self, data, box_in_series): + super().test_setitem_slice(data, box_in_series) + + @skip_nested + def test_setitem_loc_iloc_slice(self, data): + super().test_setitem_loc_iloc_slice(data) + + +@skip_nested +class TestParsing(BaseNumPyTests, base.BaseParsingTests): + pass diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py new file mode 100644 index 00000000..c439b8b5 --- /dev/null +++ b/pandas/tests/extension/test_period.py @@ -0,0 +1,161 @@ +import numpy as np +import pytest + +from pandas._libs.tslib import iNaT + +from pandas.core.dtypes.dtypes import PeriodDtype + +import pandas as pd +from pandas.core.arrays import PeriodArray +from pandas.tests.extension import base + + +@pytest.fixture +def dtype(): + return PeriodDtype(freq="D") + + +@pytest.fixture +def data(dtype): + return PeriodArray(np.arange(1970, 2070), freq=dtype.freq) + + +@pytest.fixture +def data_for_twos(dtype): + return PeriodArray(np.ones(100) * 2, freq=dtype.freq) + + +@pytest.fixture +def data_for_sorting(dtype): + return PeriodArray([2018, 2019, 2017], freq=dtype.freq) + + +@pytest.fixture +def data_missing(dtype): + return PeriodArray([iNaT, 2017], freq=dtype.freq) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return PeriodArray([2018, iNaT, 2017], freq=dtype.freq) + + +@pytest.fixture +def data_for_grouping(dtype): + B = 2018 + NA = iNaT + A = 2017 + C = 2019 + return PeriodArray([B, B, NA, NA, A, A, B, C], freq=dtype.freq) + + +@pytest.fixture +def na_value(): + return pd.NaT + + +class BasePeriodTests: + pass + + +class TestPeriodDtype(BasePeriodTests, base.BaseDtypeTests): + pass + + +class TestConstructors(BasePeriodTests, base.BaseConstructorsTests): + pass + + +class TestGetitem(BasePeriodTests, base.BaseGetitemTests): + pass + + +class TestMethods(BasePeriodTests, base.BaseMethodsTests): + def test_combine_add(self, data_repeated): + # Period + Period is not defined. + pass + + +class TestInterface(BasePeriodTests, base.BaseInterfaceTests): + + pass + + +class TestArithmeticOps(BasePeriodTests, base.BaseArithmeticOpsTests): + implements = {"__sub__", "__rsub__"} + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + # we implement substitution... + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + super()._check_divmod_op(s, op, other, exc=TypeError) + + def test_add_series_with_extension_array(self, data): + # we don't implement + for Period + s = pd.Series(data) + msg = ( + r"unsupported operand type\(s\) for \+: " + r"\'PeriodArray\' and \'PeriodArray\'" + ) + with pytest.raises(TypeError, match=msg): + s + data + + def test_error(self): + pass + + def test_direct_arith_with_series_returns_not_implemented(self, data): + # Override to use __sub__ instead of __add__ + other = pd.Series(data) + result = data.__sub__(other) + assert result is NotImplemented + + +class TestCasting(BasePeriodTests, base.BaseCastingTests): + pass + + +class TestComparisonOps(BasePeriodTests, base.BaseComparisonOpsTests): + def _compare_other(self, s, data, op_name, other): + # the base test is not appropriate for us. We raise on comparison + # with (some) integers, depending on the value. + pass + + +class TestMissing(BasePeriodTests, base.BaseMissingTests): + pass + + +class TestReshaping(BasePeriodTests, base.BaseReshapingTests): + pass + + +class TestSetitem(BasePeriodTests, base.BaseSetitemTests): + pass + + +class TestGroupby(BasePeriodTests, base.BaseGroupbyTests): + pass + + +class TestPrinting(BasePeriodTests, base.BasePrintingTests): + pass + + +class TestParsing(BasePeriodTests, base.BaseParsingTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data): + super().test_EA_types(engine, data) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py new file mode 100644 index 00000000..198a228b --- /dev/null +++ b/pandas/tests/extension/test_sparse.py @@ -0,0 +1,379 @@ +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import SparseDtype +import pandas._testing as tm +from pandas.arrays import SparseArray +from pandas.tests.extension import base + + +def make_data(fill_value): + if np.isnan(fill_value): + data = np.random.uniform(size=100) + else: + data = np.random.randint(1, 100, size=100) + if data[0] == data[1]: + data[0] += 1 + + data[2::3] = fill_value + return data + + +@pytest.fixture +def dtype(): + return SparseDtype() + + +@pytest.fixture(params=[0, np.nan]) +def data(request): + """Length-100 PeriodArray for semantics test.""" + res = SparseArray(make_data(request.param), fill_value=request.param) + return res + + +@pytest.fixture +def data_for_twos(request): + return SparseArray(np.ones(100) * 2) + + +@pytest.fixture(params=[0, np.nan]) +def data_missing(request): + """Length 2 array with [NA, Valid]""" + return SparseArray([np.nan, 1], fill_value=request.param) + + +@pytest.fixture(params=[0, np.nan]) +def data_repeated(request): + """Return different versions of data for count times""" + + def gen(count): + for _ in range(count): + yield SparseArray(make_data(request.param), fill_value=request.param) + + yield gen + + +@pytest.fixture(params=[0, np.nan]) +def data_for_sorting(request): + return SparseArray([2, 3, 1], fill_value=request.param) + + +@pytest.fixture(params=[0, np.nan]) +def data_missing_for_sorting(request): + return SparseArray([2, np.nan, 1], fill_value=request.param) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def na_cmp(): + return lambda left, right: pd.isna(left) and pd.isna(right) + + +@pytest.fixture(params=[0, np.nan]) +def data_for_grouping(request): + return SparseArray([1, 1, np.nan, np.nan, 2, 2, 1, 3], fill_value=request.param) + + +class BaseSparseTests: + def _check_unsupported(self, data): + if data.dtype == SparseDtype(int, 0): + pytest.skip("Can't store nan in int array.") + + @pytest.mark.xfail(reason="SparseArray does not support setitem") + def test_ravel(self, data): + super().test_ravel(data) + + +class TestDtype(BaseSparseTests, base.BaseDtypeTests): + def test_array_type_with_arg(self, data, dtype): + assert dtype.construct_array_type() is SparseArray + + +class TestInterface(BaseSparseTests, base.BaseInterfaceTests): + def test_no_values_attribute(self, data): + pytest.skip("We have values") + + def test_copy(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.copy() + + def test_view(self, data): + # __setitem__ does not work, so we only have a smoke-test + data.view() + + +class TestConstructors(BaseSparseTests, base.BaseConstructorsTests): + pass + + +class TestReshaping(BaseSparseTests, base.BaseReshapingTests): + def test_concat_mixed_dtypes(self, data): + # https://github.com/pandas-dev/pandas/issues/20762 + # This should be the same, aside from concat([sparse, float]) + df1 = pd.DataFrame({"A": data[:3]}) + df2 = pd.DataFrame({"A": [1, 2, 3]}) + df3 = pd.DataFrame({"A": ["a", "b", "c"]}).astype("category") + dfs = [df1, df2, df3] + + # dataframes + result = pd.concat(dfs) + expected = pd.concat( + [x.apply(lambda s: np.asarray(s).astype(object)) for x in dfs] + ) + self.assert_frame_equal(result, expected) + + def test_concat_columns(self, data, na_value): + self._check_unsupported(data) + super().test_concat_columns(data, na_value) + + def test_concat_extension_arrays_copy_false(self, data, na_value): + self._check_unsupported(data) + super().test_concat_extension_arrays_copy_false(data, na_value) + + def test_align(self, data, na_value): + self._check_unsupported(data) + super().test_align(data, na_value) + + def test_align_frame(self, data, na_value): + self._check_unsupported(data) + super().test_align_frame(data, na_value) + + def test_align_series_frame(self, data, na_value): + self._check_unsupported(data) + super().test_align_series_frame(data, na_value) + + def test_merge(self, data, na_value): + self._check_unsupported(data) + super().test_merge(data, na_value) + + +class TestGetitem(BaseSparseTests, base.BaseGetitemTests): + def test_get(self, data): + s = pd.Series(data, index=[2 * i for i in range(len(data))]) + if np.isnan(s.values.fill_value): + assert np.isnan(s.get(4)) and np.isnan(s.iloc[2]) + else: + assert s.get(4) == s.iloc[2] + assert s.get(2) == s.iloc[1] + + def test_reindex(self, data, na_value): + self._check_unsupported(data) + super().test_reindex(data, na_value) + + +# Skipping TestSetitem, since we don't implement it. + + +class TestMissing(BaseSparseTests, base.BaseMissingTests): + def test_isna(self, data_missing): + expected_dtype = SparseDtype(bool, pd.isna(data_missing.dtype.fill_value)) + expected = SparseArray([True, False], dtype=expected_dtype) + + result = pd.isna(data_missing) + self.assert_equal(result, expected) + + result = pd.Series(data_missing).isna() + expected = pd.Series(expected) + self.assert_series_equal(result, expected) + + # GH 21189 + result = pd.Series(data_missing).drop([0, 1]).isna() + expected = pd.Series([], dtype=expected_dtype) + self.assert_series_equal(result, expected) + + def test_fillna_limit_pad(self, data_missing): + with tm.assert_produces_warning(PerformanceWarning): + super().test_fillna_limit_pad(data_missing) + + def test_fillna_limit_backfill(self, data_missing): + with tm.assert_produces_warning(PerformanceWarning): + super().test_fillna_limit_backfill(data_missing) + + def test_fillna_series_method(self, data_missing): + with tm.assert_produces_warning(PerformanceWarning): + super().test_fillna_limit_backfill(data_missing) + + @pytest.mark.skip(reason="Unsupported") + def test_fillna_series(self): + # this one looks doable. + pass + + def test_fillna_frame(self, data_missing): + # Have to override to specify that fill_value will change. + fill_value = data_missing[1] + + result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value) + + if pd.isna(data_missing.fill_value): + dtype = SparseDtype(data_missing.dtype, fill_value) + else: + dtype = data_missing.dtype + + expected = pd.DataFrame( + { + "A": data_missing._from_sequence([fill_value, fill_value], dtype=dtype), + "B": [1, 2], + } + ) + + self.assert_frame_equal(result, expected) + + +class TestMethods(BaseSparseTests, base.BaseMethodsTests): + def test_combine_le(self, data_repeated): + # We return a Series[SparseArray].__le__ returns a + # Series[Sparse[bool]] + # rather than Series[bool] + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series( + SparseArray( + [a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))], + fill_value=False, + ) + ) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 <= x2) + expected = pd.Series( + SparseArray([a <= val for a in list(orig_data1)], fill_value=False) + ) + self.assert_series_equal(result, expected) + + def test_fillna_copy_frame(self, data_missing): + arr = data_missing.take([1, 1]) + df = pd.DataFrame({"A": arr}) + + filled_val = df.iloc[0, 0] + result = df.fillna(filled_val) + + assert df.values.base is not result.values.base + assert df.A._values.to_dense() is arr.to_dense() + + def test_fillna_copy_series(self, data_missing): + arr = data_missing.take([1, 1]) + ser = pd.Series(arr) + + filled_val = ser[0] + result = ser.fillna(filled_val) + + assert ser._values is not result._values + assert ser._values.to_dense() is arr.to_dense() + + @pytest.mark.skip(reason="Not Applicable") + def test_fillna_length_mismatch(self, data_missing): + pass + + def test_where_series(self, data, na_value): + assert data[0] != data[1] + cls = type(data) + a, b = data[:2] + + ser = pd.Series(cls._from_sequence([a, a, b, b], dtype=data.dtype)) + + cond = np.array([True, True, False, False]) + result = ser.where(cond) + + new_dtype = SparseDtype("float", 0.0) + expected = pd.Series( + cls._from_sequence([a, a, na_value, na_value], dtype=new_dtype) + ) + self.assert_series_equal(result, expected) + + other = cls._from_sequence([a, b, a, b], dtype=data.dtype) + cond = np.array([True, False, True, True]) + result = ser.where(cond, other) + expected = pd.Series(cls._from_sequence([a, b, b, b], dtype=data.dtype)) + self.assert_series_equal(result, expected) + + def test_combine_first(self, data): + if data.dtype.subtype == "int": + # Right now this is upcasted to float, just like combine_first + # for Series[int] + pytest.skip("TODO(SparseArray.__setitem__ will preserve dtype.") + super().test_combine_first(data) + + def test_searchsorted(self, data_for_sorting, as_series): + with tm.assert_produces_warning(PerformanceWarning): + super().test_searchsorted(data_for_sorting, as_series) + + +class TestCasting(BaseSparseTests, base.BaseCastingTests): + pass + + +class TestArithmeticOps(BaseSparseTests, base.BaseArithmeticOpsTests): + series_scalar_exc = None + frame_scalar_exc = None + divmod_exc = None + series_array_exc = None + + def _skip_if_different_combine(self, data): + if data.fill_value == 0: + # arith ops call on dtype.fill_value so that the sparsity + # is maintained. Combine can't be called on a dtype in + # general, so we can't make the expected. This is tested elsewhere + raise pytest.skip("Incorrected expected from Series.combine") + + def test_error(self, data, all_arithmetic_operators): + pass + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + self._skip_if_different_combine(data) + super().test_arith_series_with_scalar(data, all_arithmetic_operators) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + self._skip_if_different_combine(data) + super().test_arith_series_with_array(data, all_arithmetic_operators) + + +class TestComparisonOps(BaseSparseTests, base.BaseComparisonOpsTests): + def _compare_other(self, s, data, op_name, other): + op = self.get_op_from_name(op_name) + + # array + result = pd.Series(op(data, other)) + # hard to test the fill value, since we don't know what expected + # is in general. + # Rely on tests in `tests/sparse` to validate that. + assert isinstance(result.dtype, SparseDtype) + assert result.dtype.subtype == np.dtype("bool") + + with np.errstate(all="ignore"): + expected = pd.Series( + SparseArray( + op(np.asarray(data), np.asarray(other)), + fill_value=result.values.fill_value, + ) + ) + + tm.assert_series_equal(result, expected) + + # series + s = pd.Series(data) + result = op(s, other) + tm.assert_series_equal(result, expected) + + +class TestPrinting(BaseSparseTests, base.BasePrintingTests): + @pytest.mark.xfail(reason="Different repr", strict=True) + def test_array_repr(self, data, size): + super().test_array_repr(data, size) + + +class TestParsing(BaseSparseTests, base.BaseParsingTests): + @pytest.mark.parametrize("engine", ["c", "python"]) + def test_EA_types(self, engine, data): + expected_msg = r".*must implement _from_sequence_of_strings.*" + with pytest.raises(NotImplementedError, match=expected_msg): + super().test_EA_types(engine, data) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py new file mode 100644 index 00000000..86aed671 --- /dev/null +++ b/pandas/tests/extension/test_string.py @@ -0,0 +1,114 @@ +import string + +import numpy as np +import pytest + +import pandas as pd +from pandas.core.arrays.string_ import StringArray, StringDtype +from pandas.tests.extension import base + + +@pytest.fixture +def dtype(): + return StringDtype() + + +@pytest.fixture +def data(): + strings = np.random.choice(list(string.ascii_letters), size=100) + while strings[0] == strings[1]: + strings = np.random.choice(list(string.ascii_letters), size=100) + + return StringArray._from_sequence(strings) + + +@pytest.fixture +def data_missing(): + """Length 2 array with [NA, Valid]""" + return StringArray._from_sequence([pd.NA, "A"]) + + +@pytest.fixture +def data_for_sorting(): + return StringArray._from_sequence(["B", "C", "A"]) + + +@pytest.fixture +def data_missing_for_sorting(): + return StringArray._from_sequence(["B", pd.NA, "A"]) + + +@pytest.fixture +def na_value(): + return pd.NA + + +@pytest.fixture +def data_for_grouping(): + return StringArray._from_sequence(["B", "B", pd.NA, pd.NA, "A", "A", "B", "C"]) + + +class TestDtype(base.BaseDtypeTests): + pass + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestSetitem(base.BaseSetitemTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestNoReduce(base.BaseNoReduceTests): + pass + + +class TestMethods(base.BaseMethodsTests): + @pytest.mark.skip(reason="returns nullable") + def test_value_counts(self, all_data, dropna): + return super().test_value_counts(all_data, dropna) + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestComparisonOps(base.BaseComparisonOpsTests): + def _compare_other(self, s, data, op_name, other): + result = getattr(s, op_name)(other) + expected = getattr(s.astype(object), op_name)(other).astype("boolean") + self.assert_series_equal(result, expected) + + def test_compare_scalar(self, data, all_compare_operators): + op_name = all_compare_operators + s = pd.Series(data) + self._compare_other(s, data, op_name, "abc") + + +class TestParsing(base.BaseParsingTests): + pass + + +class TestPrinting(base.BasePrintingTests): + pass + + +class TestGroupBy(base.BaseGroupbyTests): + pass diff --git a/pandas/tests/frame/__init__.py b/pandas/tests/frame/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py new file mode 100644 index 00000000..463a1409 --- /dev/null +++ b/pandas/tests/frame/common.py @@ -0,0 +1,31 @@ +def _check_mixed_float(df, dtype=None): + # float16 are most likely to be upcasted to float32 + dtypes = dict(A="float32", B="float32", C="float16", D="float64") + if isinstance(dtype, str): + dtypes = {k: dtype for k, v in dtypes.items()} + elif isinstance(dtype, dict): + dtypes.update(dtype) + if dtypes.get("A"): + assert df.dtypes["A"] == dtypes["A"] + if dtypes.get("B"): + assert df.dtypes["B"] == dtypes["B"] + if dtypes.get("C"): + assert df.dtypes["C"] == dtypes["C"] + if dtypes.get("D"): + assert df.dtypes["D"] == dtypes["D"] + + +def _check_mixed_int(df, dtype=None): + dtypes = dict(A="int32", B="uint64", C="uint8", D="int64") + if isinstance(dtype, str): + dtypes = {k: dtype for k, v in dtypes.items()} + elif isinstance(dtype, dict): + dtypes.update(dtype) + if dtypes.get("A"): + assert df.dtypes["A"] == dtypes["A"] + if dtypes.get("B"): + assert df.dtypes["B"] == dtypes["B"] + if dtypes.get("C"): + assert df.dtypes["C"] == dtypes["C"] + if dtypes.get("D"): + assert df.dtypes["D"] == dtypes["D"] diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py new file mode 100644 index 00000000..774eb443 --- /dev/null +++ b/pandas/tests/frame/conftest.py @@ -0,0 +1,330 @@ +import numpy as np +import pytest + +from pandas import DataFrame, NaT, date_range +import pandas._testing as tm + + +@pytest.fixture +def float_frame_with_na(): + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + + A B C D + ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997 + DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872 + neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522 + 0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018 + 3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826 + soujjZ0A08 NaN NaN NaN NaN + 7W6NLGsjB9 NaN NaN NaN NaN + ... ... ... ... ... + uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590 + n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717 + ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189 + uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503 + 3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947 + 2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083 + sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517 + + [30 rows x 4 columns] + """ + df = DataFrame(tm.getSeriesData()) + # set some NAs + df.loc[5:10] = np.nan + df.loc[15:20, -2:] = np.nan + return df + + +@pytest.fixture +def bool_frame_with_na(): + """ + Fixture for DataFrame of booleans with index of unique strings + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + + A B C D + zBZxY2IDGd False False False False + IhBWBMWllt False True True True + ctjdvZSR6R True False True True + AVTujptmxb False True False True + G9lrImrSWq False False False True + sFFwdIUfz2 NaN NaN NaN NaN + s15ptEJnRb NaN NaN NaN NaN + ... ... ... ... ... + UW41KkDyZ4 True True False False + l9l6XkOdqV True False False False + X2MeZfzDYA False True False False + xWkIKU7vfX False True False True + QOhL6VmpGU False False False True + 22PwkRJdat False True False False + kfboQ3VeIK True False True False + + [30 rows x 4 columns] + """ + df = DataFrame(tm.getSeriesData()) > 0 + df = df.astype(object) + # set some NAs + df.loc[5:10] = np.nan + df.loc[15:20, -2:] = np.nan + return df + + +@pytest.fixture +def int_frame(): + """ + Fixture for DataFrame of ints with index of unique strings + + Columns are ['A', 'B', 'C', 'D'] + + A B C D + vpBeWjM651 1 0 1 0 + 5JyxmrP1En -1 0 0 0 + qEDaoD49U2 -1 1 0 0 + m66TkTfsFe 0 0 0 0 + EHPaNzEUFm -1 0 -1 0 + fpRJCevQhi 2 0 0 0 + OlQvnmfi3Q 0 0 -2 0 + ... .. .. .. .. + uB1FPlz4uP 0 0 0 1 + EcSe6yNzCU 0 0 -1 0 + L50VudaiI8 -1 1 -2 0 + y3bpw4nwIp 0 -1 0 0 + H0RdLLwrCT 1 1 0 0 + rY82K0vMwm 0 0 0 0 + 1OPIUjnkjk 2 0 0 0 + + [30 rows x 4 columns] + """ + df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) + # force these all to int64 to avoid platform testing issues + return DataFrame({c: s for c, s in df.items()}, dtype=np.int64) + + +@pytest.fixture +def datetime_frame(): + """ + Fixture for DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D'] + + A B C D + 2000-01-03 -1.122153 0.468535 0.122226 1.693711 + 2000-01-04 0.189378 0.486100 0.007864 -1.216052 + 2000-01-05 0.041401 -0.835752 -0.035279 -0.414357 + 2000-01-06 0.430050 0.894352 0.090719 0.036939 + 2000-01-07 -0.620982 -0.668211 -0.706153 1.466335 + 2000-01-10 -0.752633 0.328434 -0.815325 0.699674 + 2000-01-11 -2.236969 0.615737 -0.829076 -1.196106 + ... ... ... ... ... + 2000-02-03 1.642618 -0.579288 0.046005 1.385249 + 2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351 + 2000-02-07 -2.656149 -0.601387 1.410148 0.444150 + 2000-02-08 -1.201881 -1.289040 0.772992 -1.445300 + 2000-02-09 1.377373 0.398619 1.008453 -0.928207 + 2000-02-10 0.473194 -0.636677 0.984058 0.511519 + 2000-02-11 -0.965556 0.408313 -1.312844 -0.381948 + + [30 rows x 4 columns] + """ + return DataFrame(tm.getTimeSeriesData()) + + +@pytest.fixture +def float_string_frame(): + """ + Fixture for DataFrame of floats and strings with index of unique strings + + Columns are ['A', 'B', 'C', 'D', 'foo']. + + A B C D foo + w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar + PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar + ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar + 3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar + khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar + LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar + HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar + ... ... ... ... ... ... + 9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar + h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar + mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar + oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar + 9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar + jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar + lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar + + [30 rows x 5 columns] + """ + df = DataFrame(tm.getSeriesData()) + df["foo"] = "bar" + return df + + +@pytest.fixture +def mixed_float_frame(): + """ + Fixture for DataFrame of different float types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + + A B C D + GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993 + KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588 + VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731 + kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607 + CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266 + 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541 + tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710 + ... ... ... ... ... + 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237 + 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612 + B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653 + hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427 + 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827 + 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204 + xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502 + + [30 rows x 4 columns] + """ + df = DataFrame(tm.getSeriesData()) + df.A = df.A.astype("float32") + df.B = df.B.astype("float32") + df.C = df.C.astype("float16") + df.D = df.D.astype("float64") + return df + + +@pytest.fixture +def mixed_int_frame(): + """ + Fixture for DataFrame of different int types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + + A B C D + mUrCZ67juP 0 1 2 2 + rw99ACYaKS 0 1 0 0 + 7QsEcpaaVU 0 1 1 1 + xkrimI2pcE 0 1 0 0 + dz01SuzoS8 0 1 255 255 + ccQkqOHX75 -1 1 0 0 + DN0iXaoDLd 0 1 0 0 + ... .. .. ... ... + Dfb141wAaQ 1 1 254 254 + IPD8eQOVu5 0 1 0 0 + CcaKulsCmv 0 1 0 0 + rIBa8gu7E5 0 1 0 0 + RP6peZmh5o 0 1 1 1 + NMb9pipQWQ 0 1 0 0 + PqgbJEzjib 0 1 3 3 + + [30 rows x 4 columns] + """ + df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) + df.A = df.A.astype("int32") + df.B = np.ones(len(df.B), dtype="uint64") + df.C = df.C.astype("uint8") + df.D = df.C.astype("int64") + return df + + +@pytest.fixture +def mixed_type_frame(): + """ + Fixture for DataFrame of float/int/string columns with RangeIndex + Columns are ['a', 'b', 'c', 'float32', 'int32']. + """ + return DataFrame( + { + "a": 1.0, + "b": 2, + "c": "foo", + "float32": np.array([1.0] * 10, dtype="float32"), + "int32": np.array([1] * 10, dtype="int32"), + }, + index=np.arange(10), + ) + + +@pytest.fixture +def timezone_frame(): + """ + Fixture for DataFrame of date_range Series with different time zones + + Columns are ['A', 'B', 'C']; some entries are missing + + A B C + 0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00 + 1 2013-01-02 NaT NaT + 2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00 + """ + df = DataFrame( + { + "A": date_range("20130101", periods=3), + "B": date_range("20130101", periods=3, tz="US/Eastern"), + "C": date_range("20130101", periods=3, tz="CET"), + } + ) + df.iloc[1, 1] = NaT + df.iloc[1, 2] = NaT + return df + + +@pytest.fixture +def uint64_frame(): + """ + Fixture for DataFrame with uint64 values + + Columns are ['A', 'B'] + """ + return DataFrame( + {"A": np.arange(3), "B": [2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10]}, dtype=np.uint64 + ) + + +@pytest.fixture +def simple_frame(): + """ + Fixture for simple 3x3 DataFrame + + Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. + + one two three + a 1.0 2.0 3.0 + b 4.0 5.0 6.0 + c 7.0 8.0 9.0 + """ + arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) + + return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"]) + + +@pytest.fixture +def frame_of_index_cols(): + """ + Fixture for DataFrame of columns that can be used for indexing + + Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')]; + 'A' & 'B' contain duplicates (but are jointly unique), the rest are unique. + + A B C D E (tuple, as, label) + 0 foo one a 0.608477 -0.012500 -1.664297 + 1 foo two b -0.633460 0.249614 -0.364411 + 2 foo three c 0.615256 2.154968 -0.834666 + 3 bar one d 0.234246 1.085675 0.718445 + 4 bar two e 0.533841 -0.005702 -3.533912 + """ + df = DataFrame( + { + "A": ["foo", "foo", "foo", "bar", "bar"], + "B": ["one", "two", "three", "one", "two"], + "C": ["a", "b", "c", "d", "e"], + "D": np.random.randn(5), + "E": np.random.randn(5), + ("tuple", "as", "label"): np.random.randn(5), + } + ) + return df diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py new file mode 100644 index 00000000..a29c1936 --- /dev/null +++ b/pandas/tests/frame/indexing/test_categorical.py @@ -0,0 +1,398 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import Categorical, DataFrame, Index, Series +import pandas._testing as tm + + +class TestDataFrameIndexingCategorical: + def test_assignment(self): + # assignment + df = DataFrame( + {"value": np.array(np.random.randint(0, 10000, 100), dtype="int32")} + ) + labels = Categorical( + ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + ) + + df = df.sort_values(by=["value"], ascending=True) + s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels) + d = s.values + df["D"] = d + str(df) + + result = df.dtypes + expected = Series( + [np.dtype("int32"), CategoricalDtype(categories=labels, ordered=False)], + index=["value", "D"], + ) + tm.assert_series_equal(result, expected) + + df["E"] = s + str(df) + + result = df.dtypes + expected = Series( + [ + np.dtype("int32"), + CategoricalDtype(categories=labels, ordered=False), + CategoricalDtype(categories=labels, ordered=False), + ], + index=["value", "D", "E"], + ) + tm.assert_series_equal(result, expected) + + result1 = df["D"] + result2 = df["E"] + tm.assert_categorical_equal(result1._data._block.values, d) + + # sorting + s.name = "E" + tm.assert_series_equal(result2.sort_index(), s.sort_index()) + + cat = Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) + df = DataFrame(Series(cat)) + + def test_assigning_ops(self): + # systematically test the assigning operations: + # for all slicing ops: + # for value in categories and value not in categories: + + # - assign a single value -> exp_single_cats_value + + # - assign a complete row (mixed values) -> exp_single_row + + # assign multiple rows (mixed values) (-> array) -> exp_multi_row + + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col + + # assign a part of a column with dtype != categorical -> + # exp_parts_cats_col + + cats = Categorical(["a", "a", "a", "a", "a", "a", "a"], categories=["a", "b"]) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 1, 1, 1, 1, 1, 1] + orig = DataFrame({"cats": cats, "values": values}, index=idx) + + # the expected values + # changed single row + cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) + idx1 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values1 = [1, 1, 2, 1, 1, 1, 1] + exp_single_row = DataFrame({"cats": cats1, "values": values1}, index=idx1) + + # changed multiple rows + cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx2 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values2 = [1, 1, 2, 2, 1, 1, 1] + exp_multi_row = DataFrame({"cats": cats2, "values": values2}, index=idx2) + + # changed part of the cats column + cats3 = Categorical(["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx3 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values3 = [1, 1, 1, 1, 1, 1, 1] + exp_parts_cats_col = DataFrame({"cats": cats3, "values": values3}, index=idx3) + + # changed single value in cats col + cats4 = Categorical(["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) + idx4 = Index(["h", "i", "j", "k", "l", "m", "n"]) + values4 = [1, 1, 1, 1, 1, 1, 1] + exp_single_cats_value = DataFrame( + {"cats": cats4, "values": values4}, index=idx4 + ) + + # iloc + # ############### + # - assign a single value -> exp_single_cats_value + df = orig.copy() + df.iloc[2, 0] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + df = orig.copy() + df.iloc[df.index == "j", 0] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + # - assign a single value not in the current categories set + with pytest.raises(ValueError): + df = orig.copy() + df.iloc[2, 0] = "c" + + # - assign a complete row (mixed values) -> exp_single_row + df = orig.copy() + df.iloc[2, :] = ["b", 2] + tm.assert_frame_equal(df, exp_single_row) + + # - assign a complete row (mixed values) not in categories set + with pytest.raises(ValueError): + df = orig.copy() + df.iloc[2, :] = ["c", 2] + + # - assign multiple rows (mixed values) -> exp_multi_row + df = orig.copy() + df.iloc[2:4, :] = [["b", 2], ["b", 2]] + tm.assert_frame_equal(df, exp_multi_row) + + with pytest.raises(ValueError): + df = orig.copy() + df.iloc[2:4, :] = [["c", 2], ["c", 2]] + + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col + df = orig.copy() + df.iloc[2:4, 0] = Categorical(["b", "b"], categories=["a", "b"]) + tm.assert_frame_equal(df, exp_parts_cats_col) + + with pytest.raises(ValueError): + # different categories -> not sure if this should fail or pass + df = orig.copy() + df.iloc[2:4, 0] = Categorical(list("bb"), categories=list("abc")) + + with pytest.raises(ValueError): + # different values + df = orig.copy() + df.iloc[2:4, 0] = Categorical(list("cc"), categories=list("abc")) + + # assign a part of a column with dtype != categorical -> + # exp_parts_cats_col + df = orig.copy() + df.iloc[2:4, 0] = ["b", "b"] + tm.assert_frame_equal(df, exp_parts_cats_col) + + with pytest.raises(ValueError): + df.iloc[2:4, 0] = ["c", "c"] + + # loc + # ############## + # - assign a single value -> exp_single_cats_value + df = orig.copy() + df.loc["j", "cats"] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + df = orig.copy() + df.loc[df.index == "j", "cats"] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + # - assign a single value not in the current categories set + with pytest.raises(ValueError): + df = orig.copy() + df.loc["j", "cats"] = "c" + + # - assign a complete row (mixed values) -> exp_single_row + df = orig.copy() + df.loc["j", :] = ["b", 2] + tm.assert_frame_equal(df, exp_single_row) + + # - assign a complete row (mixed values) not in categories set + with pytest.raises(ValueError): + df = orig.copy() + df.loc["j", :] = ["c", 2] + + # - assign multiple rows (mixed values) -> exp_multi_row + df = orig.copy() + df.loc["j":"k", :] = [["b", 2], ["b", 2]] + tm.assert_frame_equal(df, exp_multi_row) + + with pytest.raises(ValueError): + df = orig.copy() + df.loc["j":"k", :] = [["c", 2], ["c", 2]] + + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col + df = orig.copy() + df.loc["j":"k", "cats"] = Categorical(["b", "b"], categories=["a", "b"]) + tm.assert_frame_equal(df, exp_parts_cats_col) + + with pytest.raises(ValueError): + # different categories -> not sure if this should fail or pass + df = orig.copy() + df.loc["j":"k", "cats"] = Categorical( + ["b", "b"], categories=["a", "b", "c"] + ) + + with pytest.raises(ValueError): + # different values + df = orig.copy() + df.loc["j":"k", "cats"] = Categorical( + ["c", "c"], categories=["a", "b", "c"] + ) + + # assign a part of a column with dtype != categorical -> + # exp_parts_cats_col + df = orig.copy() + df.loc["j":"k", "cats"] = ["b", "b"] + tm.assert_frame_equal(df, exp_parts_cats_col) + + with pytest.raises(ValueError): + df.loc["j":"k", "cats"] = ["c", "c"] + + # loc + # ############## + # - assign a single value -> exp_single_cats_value + df = orig.copy() + df.loc["j", df.columns[0]] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + df = orig.copy() + df.loc[df.index == "j", df.columns[0]] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + # - assign a single value not in the current categories set + with pytest.raises(ValueError): + df = orig.copy() + df.loc["j", df.columns[0]] = "c" + + # - assign a complete row (mixed values) -> exp_single_row + df = orig.copy() + df.loc["j", :] = ["b", 2] + tm.assert_frame_equal(df, exp_single_row) + + # - assign a complete row (mixed values) not in categories set + with pytest.raises(ValueError): + df = orig.copy() + df.loc["j", :] = ["c", 2] + + # - assign multiple rows (mixed values) -> exp_multi_row + df = orig.copy() + df.loc["j":"k", :] = [["b", 2], ["b", 2]] + tm.assert_frame_equal(df, exp_multi_row) + + with pytest.raises(ValueError): + df = orig.copy() + df.loc["j":"k", :] = [["c", 2], ["c", 2]] + + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col + df = orig.copy() + df.loc["j":"k", df.columns[0]] = Categorical(["b", "b"], categories=["a", "b"]) + tm.assert_frame_equal(df, exp_parts_cats_col) + + with pytest.raises(ValueError): + # different categories -> not sure if this should fail or pass + df = orig.copy() + df.loc["j":"k", df.columns[0]] = Categorical( + ["b", "b"], categories=["a", "b", "c"] + ) + + with pytest.raises(ValueError): + # different values + df = orig.copy() + df.loc["j":"k", df.columns[0]] = Categorical( + ["c", "c"], categories=["a", "b", "c"] + ) + + # assign a part of a column with dtype != categorical -> + # exp_parts_cats_col + df = orig.copy() + df.loc["j":"k", df.columns[0]] = ["b", "b"] + tm.assert_frame_equal(df, exp_parts_cats_col) + + with pytest.raises(ValueError): + df.loc["j":"k", df.columns[0]] = ["c", "c"] + + # iat + df = orig.copy() + df.iat[2, 0] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + # - assign a single value not in the current categories set + with pytest.raises(ValueError): + df = orig.copy() + df.iat[2, 0] = "c" + + # at + # - assign a single value -> exp_single_cats_value + df = orig.copy() + df.at["j", "cats"] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + # - assign a single value not in the current categories set + with pytest.raises(ValueError): + df = orig.copy() + df.at["j", "cats"] = "c" + + # fancy indexing + catsf = Categorical( + ["a", "a", "c", "c", "a", "a", "a"], categories=["a", "b", "c"] + ) + idxf = Index(["h", "i", "j", "k", "l", "m", "n"]) + valuesf = [1, 1, 3, 3, 1, 1, 1] + df = DataFrame({"cats": catsf, "values": valuesf}, index=idxf) + + exp_fancy = exp_multi_row.copy() + exp_fancy["cats"].cat.set_categories(["a", "b", "c"], inplace=True) + + df[df["cats"] == "c"] = ["b", 2] + # category c is kept in .categories + tm.assert_frame_equal(df, exp_fancy) + + # set_value + df = orig.copy() + df.at["j", "cats"] = "b" + tm.assert_frame_equal(df, exp_single_cats_value) + + with pytest.raises(ValueError): + df = orig.copy() + df.at["j", "cats"] = "c" + + # Assigning a Category to parts of a int/... column uses the values of + # the Categorical + df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")}) + exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")}) + df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"]) + df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) + tm.assert_frame_equal(df, exp) + + def test_functions_no_warnings(self): + df = DataFrame({"value": np.random.randint(0, 100, 20)}) + labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] + with tm.assert_produces_warning(False): + df["group"] = pd.cut( + df.value, range(0, 105, 10), right=False, labels=labels + ) + + def test_setitem_single_row_categorical(self): + # GH 25495 + df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) + categories = pd.Categorical(df["Alpha"], categories=["a", "b", "c"]) + df.loc[:, "Alpha"] = categories + + result = df["Alpha"] + expected = Series(categories, index=df.index, name="Alpha") + tm.assert_series_equal(result, expected) + + def test_loc_indexing_preserves_index_category_dtype(self): + # GH 15166 + df = DataFrame( + data=np.arange(2, 22, 2), + index=pd.MultiIndex( + levels=[pd.CategoricalIndex(["a", "b"]), range(10)], + codes=[[0] * 5 + [1] * 5, range(10)], + names=["Index1", "Index2"], + ), + ) + + expected = pd.CategoricalIndex( + ["a", "b"], + categories=["a", "b"], + ordered=False, + name="Index1", + dtype="category", + ) + + result = df.index.levels[0] + tm.assert_index_equal(result, expected) + + result = df.loc[["a"]].index.levels[0] + tm.assert_index_equal(result, expected) + + def test_wrong_length_cat_dtype_raises(self): + # GH29523 + cat = pd.Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"]) + df = pd.DataFrame({"bar": range(10)}) + err = "Length of values does not match length of index" + with pytest.raises(ValueError, match=err): + df["foo"] = cat diff --git a/pandas/tests/frame/indexing/test_datetime.py b/pandas/tests/frame/indexing/test_datetime.py new file mode 100644 index 00000000..a1c12be2 --- /dev/null +++ b/pandas/tests/frame/indexing/test_datetime.py @@ -0,0 +1,62 @@ +import pandas as pd +from pandas import DataFrame, Index, Series, date_range, notna +import pandas._testing as tm + + +class TestDataFrameIndexingDatetimeWithTZ: + def test_setitem(self, timezone_frame): + + df = timezone_frame + idx = df["B"].rename("foo") + + # setitem + df["C"] = idx + tm.assert_series_equal(df["C"], Series(idx, name="C")) + + df["D"] = "foo" + df["D"] = idx + tm.assert_series_equal(df["D"], Series(idx, name="D")) + del df["D"] + + # assert that A & C are not sharing the same base (e.g. they + # are copies) + b1 = df._data.blocks[1] + b2 = df._data.blocks[2] + tm.assert_extension_array_equal(b1.values, b2.values) + assert id(b1.values._data.base) != id(b2.values._data.base) + + # with nan + df2 = df.copy() + df2.iloc[1, 1] = pd.NaT + df2.iloc[1, 2] = pd.NaT + result = df2["B"] + tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) + tm.assert_series_equal(df2.dtypes, df.dtypes) + + def test_set_reset(self): + + idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") + + # set/reset + df = DataFrame({"A": [0, 1, 2]}, index=idx) + result = df.reset_index() + assert result["foo"].dtype, "M8[ns, US/Eastern" + + df = result.set_index("foo") + tm.assert_index_equal(df.index, idx) + + def test_transpose(self, timezone_frame): + + result = timezone_frame.T + expected = DataFrame(timezone_frame.values.T) + expected.index = ["A", "B", "C"] + tm.assert_frame_equal(result, expected) + + def test_scalar_assignment(self): + # issue #19843 + df = pd.DataFrame(index=(0, 1, 2)) + df["now"] = pd.Timestamp("20130101", tz="UTC") + expected = pd.DataFrame( + {"now": pd.Timestamp("20130101", tz="UTC")}, index=[0, 1, 2] + ) + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py new file mode 100644 index 00000000..db0c0504 --- /dev/null +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -0,0 +1,2345 @@ +from datetime import date, datetime, time, timedelta +import re + +import numpy as np +import pytest + +from pandas._libs.tslib import iNaT + +from pandas.core.dtypes.common import is_float_dtype, is_integer + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + isna, + notna, +) +import pandas._testing as tm +from pandas.arrays import SparseArray +import pandas.core.common as com +from pandas.core.indexing import IndexingError + +from pandas.tseries.offsets import BDay + + +class TestDataFrameIndexing: + def test_getitem(self, float_frame): + # Slicing + sl = float_frame[:20] + assert len(sl.index) == 20 + + # Column access + for _, series in sl.items(): + assert len(series.index) == 20 + assert tm.equalContents(series.index, sl.index) + + for key, _ in float_frame._series.items(): + assert float_frame[key] is not None + + assert "random" not in float_frame + with pytest.raises(KeyError, match="random"): + float_frame["random"] + + df = float_frame.copy() + df["$10"] = np.random.randn(len(df)) + + ad = np.random.randn(len(df)) + df["@awesome_domain"] = ad + + with pytest.raises(KeyError, match=re.escape("'df[\"$10\"]'")): + df.__getitem__('df["$10"]') + + res = df["@awesome_domain"] + tm.assert_numpy_array_equal(ad, res.values) + + def test_getitem_dupe_cols(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) + msg = "\"None of [Index(['baf'], dtype='object')] are in the [columns]\"" + with pytest.raises(KeyError, match=re.escape(msg)): + df[["baf"]] + + def test_get(self, float_frame): + b = float_frame.get("B") + tm.assert_series_equal(b, float_frame["B"]) + + assert float_frame.get("foo") is None + tm.assert_series_equal( + float_frame.get("foo", float_frame["B"]), float_frame["B"] + ) + + @pytest.mark.parametrize( + "df", + [ + DataFrame(), + DataFrame(columns=list("AB")), + DataFrame(columns=list("AB"), index=range(3)), + ], + ) + def test_get_none(self, df): + # see gh-5652 + assert df.get(None) is None + + @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) + def test_loc_iterable(self, float_frame, key_type): + idx = key_type(["A", "B", "C"]) + result = float_frame.loc[:, idx] + expected = float_frame.loc[:, ["A", "B", "C"]] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "idx_type", + [ + list, + iter, + Index, + set, + lambda l: dict(zip(l, range(len(l)))), + lambda l: dict(zip(l, range(len(l)))).keys(), + ], + ids=["list", "iter", "Index", "set", "dict", "dict_keys"], + ) + @pytest.mark.parametrize("levels", [1, 2]) + def test_getitem_listlike(self, idx_type, levels, float_frame): + # GH 21294 + + if levels == 1: + frame, missing = float_frame, "food" + else: + # MultiIndex columns + frame = DataFrame( + np.random.randn(8, 3), + columns=Index( + [("foo", "bar"), ("baz", "qux"), ("peek", "aboo")], + name=("sth", "sth2"), + ), + ) + missing = ("good", "food") + + keys = [frame.columns[1], frame.columns[0]] + idx = idx_type(keys) + idx_check = list(idx_type(keys)) + + result = frame[idx] + + expected = frame.loc[:, idx_check] + expected.columns.names = frame.columns.names + + tm.assert_frame_equal(result, expected) + + idx = idx_type(keys + [missing]) + with pytest.raises(KeyError, match="not in index"): + frame[idx] + + @pytest.mark.parametrize( + "val,expected", [(2 ** 63 - 1, Series([1])), (2 ** 63, Series([2]))] + ) + def test_loc_uint64(self, val, expected): + # see gh-19399 + df = DataFrame([1, 2], index=[2 ** 63 - 1, 2 ** 63]) + result = df.loc[val] + + expected.name = val + tm.assert_series_equal(result, expected) + + def test_getitem_callable(self, float_frame): + # GH 12533 + result = float_frame[lambda x: "A"] + tm.assert_series_equal(result, float_frame.loc[:, "A"]) + + result = float_frame[lambda x: ["A", "B"]] + tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]]) + + df = float_frame[:3] + result = df[lambda x: [True, False, True]] + tm.assert_frame_equal(result, float_frame.iloc[[0, 2], :]) + + def test_setitem_list(self, float_frame): + + float_frame["E"] = "foo" + data = float_frame[["A", "B"]] + float_frame[["B", "A"]] = data + + tm.assert_series_equal(float_frame["B"], data["A"], check_names=False) + tm.assert_series_equal(float_frame["A"], data["B"], check_names=False) + + msg = "Columns must be same length as key" + with pytest.raises(ValueError, match=msg): + data[["A"]] = float_frame[["A", "B"]] + + msg = "Length of values does not match length of index" + with pytest.raises(ValueError, match=msg): + data["A"] = range(len(data.index) - 1) + + df = DataFrame(0, index=range(3), columns=["tt1", "tt2"], dtype=np.int_) + df.loc[1, ["tt1", "tt2"]] = [1, 2] + + result = df.loc[df.index[1], ["tt1", "tt2"]] + expected = Series([1, 2], df.columns, dtype=np.int_, name=1) + tm.assert_series_equal(result, expected) + + df["tt1"] = df["tt2"] = "0" + df.loc[df.index[1], ["tt1", "tt2"]] = ["1", "2"] + result = df.loc[df.index[1], ["tt1", "tt2"]] + expected = Series(["1", "2"], df.columns, name=1) + tm.assert_series_equal(result, expected) + + def test_setitem_list_not_dataframe(self, float_frame): + data = np.random.randn(len(float_frame), 2) + float_frame[["A", "B"]] = data + tm.assert_almost_equal(float_frame[["A", "B"]].values, data) + + def test_setitem_list_of_tuples(self, float_frame): + tuples = list(zip(float_frame["A"], float_frame["B"])) + float_frame["tuples"] = tuples + + result = float_frame["tuples"] + expected = Series(tuples, index=float_frame.index, name="tuples") + tm.assert_series_equal(result, expected) + + def test_setitem_mulit_index(self): + # GH7655, test that assigning to a sub-frame of a frame + # with multi-index columns aligns both rows and columns + it = ["jim", "joe", "jolie"], ["first", "last"], ["left", "center", "right"] + + cols = MultiIndex.from_product(it) + index = pd.date_range("20141006", periods=20) + vals = np.random.randint(1, 1000, (len(index), len(cols))) + df = pd.DataFrame(vals, columns=cols, index=index) + + i, j = df.index.values.copy(), it[-1][:] + + np.random.shuffle(i) + df["jim"] = df["jolie"].loc[i, ::-1] + tm.assert_frame_equal(df["jim"], df["jolie"]) + + np.random.shuffle(j) + df[("joe", "first")] = df[("jolie", "last")].loc[i, j] + tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")]) + + np.random.shuffle(j) + df[("joe", "last")] = df[("jolie", "first")].loc[i, j] + tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")]) + + def test_setitem_callable(self): + # GH 12533 + df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}) + df[lambda x: "A"] = [11, 12, 13, 14] + + exp = pd.DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]}) + tm.assert_frame_equal(df, exp) + + def test_setitem_other_callable(self): + # GH 13299 + def inc(x): + return x + 1 + + df = pd.DataFrame([[-1, 1], [1, -1]]) + df[df > 0] = inc + + expected = pd.DataFrame([[-1, inc], [inc, -1]]) + tm.assert_frame_equal(df, expected) + + def test_getitem_boolean( + self, float_string_frame, mixed_float_frame, mixed_int_frame, datetime_frame + ): + # boolean indexing + d = datetime_frame.index[10] + indexer = datetime_frame.index > d + indexer_obj = indexer.astype(object) + + subindex = datetime_frame.index[indexer] + subframe = datetime_frame[indexer] + + tm.assert_index_equal(subindex, subframe.index) + with pytest.raises(ValueError, match="Item wrong length"): + datetime_frame[indexer[:-1]] + + subframe_obj = datetime_frame[indexer_obj] + tm.assert_frame_equal(subframe_obj, subframe) + + with pytest.raises(ValueError, match="Boolean array expected"): + datetime_frame[datetime_frame] + + # test that Series work + indexer_obj = Series(indexer_obj, datetime_frame.index) + + subframe_obj = datetime_frame[indexer_obj] + tm.assert_frame_equal(subframe_obj, subframe) + + # test that Series indexers reindex + # we are producing a warning that since the passed boolean + # key is not the same as the given index, we will reindex + # not sure this is really necessary + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + indexer_obj = indexer_obj.reindex(datetime_frame.index[::-1]) + subframe_obj = datetime_frame[indexer_obj] + tm.assert_frame_equal(subframe_obj, subframe) + + # test df[df > 0] + for df in [ + datetime_frame, + float_string_frame, + mixed_float_frame, + mixed_int_frame, + ]: + if df is float_string_frame: + continue + + data = df._get_numeric_data() + bif = df[df > 0] + bifw = DataFrame( + {c: np.where(data[c] > 0, data[c], np.nan) for c in data.columns}, + index=data.index, + columns=data.columns, + ) + + # add back other columns to compare + for c in df.columns: + if c not in bifw: + bifw[c] = df[c] + bifw = bifw.reindex(columns=df.columns) + + tm.assert_frame_equal(bif, bifw, check_dtype=False) + for c in df.columns: + if bif[c].dtype != bifw[c].dtype: + assert bif[c].dtype == df[c].dtype + + def test_getitem_boolean_casting(self, datetime_frame): + + # don't upcast if we don't need to + df = datetime_frame.copy() + df["E"] = 1 + df["E"] = df["E"].astype("int32") + df["E1"] = df["E"].copy() + df["F"] = 1 + df["F"] = df["F"].astype("int64") + df["F1"] = df["F"].copy() + + casted = df[df > 0] + result = casted.dtypes + expected = Series( + [np.dtype("float64")] * 4 + + [np.dtype("int32")] * 2 + + [np.dtype("int64")] * 2, + index=["A", "B", "C", "D", "E", "E1", "F", "F1"], + ) + tm.assert_series_equal(result, expected) + + # int block splitting + df.loc[df.index[1:3], ["E1", "F1"]] = 0 + casted = df[df > 0] + result = casted.dtypes + expected = Series( + [np.dtype("float64")] * 4 + + [np.dtype("int32")] + + [np.dtype("float64")] + + [np.dtype("int64")] + + [np.dtype("float64")], + index=["A", "B", "C", "D", "E", "E1", "F", "F1"], + ) + tm.assert_series_equal(result, expected) + + # where dtype conversions + # GH 3733 + df = DataFrame(data=np.random.randn(100, 50)) + df = df.where(df > 0) # create nans + bools = df > 0 + mask = isna(df) + expected = bools.astype(float).mask(mask) + result = bools.mask(mask) + tm.assert_frame_equal(result, expected) + + def test_getitem_boolean_list(self): + df = DataFrame(np.arange(12).reshape(3, 4)) + + def _checkit(lst): + result = df[lst] + expected = df.loc[df.index[lst]] + tm.assert_frame_equal(result, expected) + + _checkit([True, False, True]) + _checkit([True, True, True]) + _checkit([False, False, False]) + + def test_getitem_boolean_iadd(self): + arr = np.random.randn(5, 5) + + df = DataFrame(arr.copy(), columns=["A", "B", "C", "D", "E"]) + + df[df < 0] += 1 + arr[arr < 0] += 1 + + tm.assert_almost_equal(df.values, arr) + + def test_boolean_index_empty_corner(self): + # #2096 + blah = DataFrame(np.empty([0, 1]), columns=["A"], index=DatetimeIndex([])) + + # both of these should succeed trivially + k = np.array([], bool) + + blah[k] + blah[k] = 0 + + def test_getitem_ix_mixed_integer(self): + df = DataFrame( + np.random.randn(4, 3), index=[1, 10, "C", "E"], columns=[1, 2, 3] + ) + + result = df.iloc[:-1] + expected = df.loc[df.index[:-1]] + tm.assert_frame_equal(result, expected) + + result = df.loc[[1, 10]] + expected = df.loc[Index([1, 10])] + tm.assert_frame_equal(result, expected) + + # 11320 + df = pd.DataFrame( + { + "rna": (1.5, 2.2, 3.2, 4.5), + -1000: [11, 21, 36, 40], + 0: [10, 22, 43, 34], + 1000: [0, 10, 20, 30], + }, + columns=["rna", -1000, 0, 1000], + ) + result = df[[1000]] + expected = df.iloc[:, [3]] + tm.assert_frame_equal(result, expected) + result = df[[-1000]] + expected = df.iloc[:, [1]] + tm.assert_frame_equal(result, expected) + + def test_getattr(self, float_frame): + tm.assert_series_equal(float_frame.A, float_frame["A"]) + msg = "'DataFrame' object has no attribute 'NONEXISTENT_NAME'" + with pytest.raises(AttributeError, match=msg): + float_frame.NONEXISTENT_NAME + + def test_setattr_column(self): + df = DataFrame({"foobar": 1}, index=range(10)) + + df.foobar = 5 + assert (df.foobar == 5).all() + + def test_setitem(self, float_frame): + # not sure what else to do here + series = float_frame["A"][::2] + float_frame["col5"] = series + assert "col5" in float_frame + + assert len(series) == 15 + assert len(float_frame) == 30 + + exp = np.ravel(np.column_stack((series.values, [np.nan] * 15))) + exp = Series(exp, index=float_frame.index, name="col5") + tm.assert_series_equal(float_frame["col5"], exp) + + series = float_frame["A"] + float_frame["col6"] = series + tm.assert_series_equal(series, float_frame["col6"], check_names=False) + + msg = ( + r"\"None of \[Float64Index\(\[.*dtype='float64'\)\] are in the" + r" \[columns\]\"" + ) + with pytest.raises(KeyError, match=msg): + float_frame[np.random.randn(len(float_frame) + 1)] = 1 + + # set ndarray + arr = np.random.randn(len(float_frame)) + float_frame["col9"] = arr + assert (float_frame["col9"] == arr).all() + + float_frame["col7"] = 5 + assert (float_frame["col7"] == 5).all() + + float_frame["col0"] = 3.14 + assert (float_frame["col0"] == 3.14).all() + + float_frame["col8"] = "foo" + assert (float_frame["col8"] == "foo").all() + + # this is partially a view (e.g. some blocks are view) + # so raise/warn + smaller = float_frame[:2] + + with pytest.raises(com.SettingWithCopyError): + smaller["col10"] = ["1", "2"] + + assert smaller["col10"].dtype == np.object_ + assert (smaller["col10"] == ["1", "2"]).all() + + # dtype changing GH4204 + df = DataFrame([[0, 0]]) + df.iloc[0] = np.nan + expected = DataFrame([[np.nan, np.nan]]) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[0, 0]]) + df.loc[0] = np.nan + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"]) + def test_setitem_dtype(self, dtype, float_frame): + arr = np.random.randn(len(float_frame)) + + float_frame[dtype] = np.array(arr, dtype=dtype) + assert float_frame[dtype].dtype.name == dtype + + def test_setitem_tuple(self, float_frame): + float_frame["A", "B"] = float_frame["A"] + tm.assert_series_equal( + float_frame["A", "B"], float_frame["A"], check_names=False + ) + + def test_setitem_always_copy(self, float_frame): + s = float_frame["A"].copy() + float_frame["E"] = s + + float_frame["E"][5:10] = np.nan + assert notna(s[5:10]).all() + + def test_setitem_boolean(self, float_frame): + df = float_frame.copy() + values = float_frame.values + + df[df["A"] > 0] = 4 + values[values[:, 0] > 0] = 4 + tm.assert_almost_equal(df.values, values) + + # test that column reindexing works + series = df["A"] == 4 + series = series.reindex(df.index[::-1]) + df[series] = 1 + values[values[:, 0] == 4] = 1 + tm.assert_almost_equal(df.values, values) + + df[df > 0] = 5 + values[values > 0] = 5 + tm.assert_almost_equal(df.values, values) + + df[df == 5] = 0 + values[values == 5] = 0 + tm.assert_almost_equal(df.values, values) + + # a df that needs alignment first + df[df[:-1] < 0] = 2 + np.putmask(values[:-1], values[:-1] < 0, 2) + tm.assert_almost_equal(df.values, values) + + # indexed with same shape but rows-reversed df + df[df[::-1] == 2] = 3 + values[values == 2] = 3 + tm.assert_almost_equal(df.values, values) + + msg = "Must pass DataFrame or 2-d ndarray with boolean values only" + with pytest.raises(TypeError, match=msg): + df[df * 0] = 2 + + # index with DataFrame + mask = df > np.abs(df) + expected = df.copy() + df[df > np.abs(df)] = np.nan + expected.values[mask.values] = np.nan + tm.assert_frame_equal(df, expected) + + # set from DataFrame + expected = df.copy() + df[df > np.abs(df)] = df * 2 + np.putmask(expected.values, mask.values, df.values * 2) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "mask_type", + [lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values], + ids=["dataframe", "array"], + ) + def test_setitem_boolean_mask(self, mask_type, float_frame): + + # Test for issue #18582 + df = float_frame.copy() + mask = mask_type(df) + + # index with boolean mask + result = df.copy() + result[mask] = np.nan + + expected = df.copy() + expected.values[np.array(mask)] = np.nan + tm.assert_frame_equal(result, expected) + + def test_setitem_cast(self, float_frame): + float_frame["D"] = float_frame["D"].astype("i8") + assert float_frame["D"].dtype == np.int64 + + # #669, should not cast? + # this is now set to int64, which means a replacement of the column to + # the value dtype (and nothing to do with the existing dtype) + float_frame["B"] = 0 + assert float_frame["B"].dtype == np.int64 + + # cast if pass array of course + float_frame["B"] = np.arange(len(float_frame)) + assert issubclass(float_frame["B"].dtype.type, np.integer) + + float_frame["foo"] = "bar" + float_frame["foo"] = 0 + assert float_frame["foo"].dtype == np.int64 + + float_frame["foo"] = "bar" + float_frame["foo"] = 2.5 + assert float_frame["foo"].dtype == np.float64 + + float_frame["something"] = 0 + assert float_frame["something"].dtype == np.int64 + float_frame["something"] = 2 + assert float_frame["something"].dtype == np.int64 + float_frame["something"] = 2.5 + assert float_frame["something"].dtype == np.float64 + + # GH 7704 + # dtype conversion on setting + df = DataFrame(np.random.rand(30, 3), columns=tuple("ABC")) + df["event"] = np.nan + df.loc[10, "event"] = "foo" + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 3 + [np.dtype("object")], + index=["A", "B", "C", "event"], + ) + tm.assert_series_equal(result, expected) + + # Test that data type is preserved . #5782 + df = DataFrame({"one": np.arange(6, dtype=np.int8)}) + df.loc[1, "one"] = 6 + assert df.dtypes.one == np.dtype(np.int8) + df.one = np.int8(7) + assert df.dtypes.one == np.dtype(np.int8) + + def test_setitem_boolean_column(self, float_frame): + expected = float_frame.copy() + mask = float_frame["A"] > 0 + + float_frame.loc[mask, "B"] = 0 + expected.values[mask.values, 1] = 0 + + tm.assert_frame_equal(float_frame, expected) + + def test_frame_setitem_timestamp(self): + # GH#2155 + columns = date_range(start="1/1/2012", end="2/1/2012", freq=BDay()) + data = DataFrame(columns=columns, index=range(10)) + t = datetime(2012, 11, 1) + ts = Timestamp(t) + data[ts] = np.nan # works, mostly a smoke-test + assert np.isnan(data[ts]).all() + + def test_setitem_corner(self, float_frame): + # corner case + df = DataFrame({"B": [1.0, 2.0, 3.0], "C": ["a", "b", "c"]}, index=np.arange(3)) + del df["B"] + df["B"] = [1.0, 2.0, 3.0] + assert "B" in df + assert len(df.columns) == 2 + + df["A"] = "beginning" + df["E"] = "foo" + df["D"] = "bar" + df[datetime.now()] = "date" + df[datetime.now()] = 5.0 + + # what to do when empty frame with index + dm = DataFrame(index=float_frame.index) + dm["A"] = "foo" + dm["B"] = "bar" + assert len(dm.columns) == 2 + assert dm.values.dtype == np.object_ + + # upcast + dm["C"] = 1 + assert dm["C"].dtype == np.int64 + + dm["E"] = 1.0 + assert dm["E"].dtype == np.float64 + + # set existing column + dm["A"] = "bar" + assert "bar" == dm["A"][0] + + dm = DataFrame(index=np.arange(3)) + dm["A"] = 1 + dm["foo"] = "bar" + del dm["foo"] + dm["foo"] = "bar" + assert dm["foo"].dtype == np.object_ + + dm["coercable"] = ["1", "2", "3"] + assert dm["coercable"].dtype == np.object_ + + def test_setitem_corner2(self): + data = { + "title": ["foobar", "bar", "foobar"] + ["foobar"] * 17, + "cruft": np.random.random(20), + } + + df = DataFrame(data) + ix = df[df["title"] == "bar"].index + + df.loc[ix, ["title"]] = "foobar" + df.loc[ix, ["cruft"]] = 0 + + assert df.loc[1, "title"] == "foobar" + assert df.loc[1, "cruft"] == 0 + + def test_setitem_ambig(self): + # Difficulties with mixed-type data + from decimal import Decimal + + # Created as float type + dm = DataFrame(index=range(3), columns=range(3)) + + coercable_series = Series([Decimal(1) for _ in range(3)], index=range(3)) + uncoercable_series = Series(["foo", "bzr", "baz"], index=range(3)) + + dm[0] = np.ones(3) + assert len(dm.columns) == 3 + + dm[1] = coercable_series + assert len(dm.columns) == 3 + + dm[2] = uncoercable_series + assert len(dm.columns) == 3 + assert dm[2].dtype == np.object_ + + def test_setitem_clear_caches(self): + # see gh-304 + df = DataFrame( + {"x": [1.1, 2.1, 3.1, 4.1], "y": [5.1, 6.1, 7.1, 8.1]}, index=[0, 1, 2, 3] + ) + df.insert(2, "z", np.nan) + + # cache it + foo = df["z"] + df.loc[df.index[2:], "z"] = 42 + + expected = Series([np.nan, np.nan, 42, 42], index=df.index, name="z") + + assert df["z"] is not foo + tm.assert_series_equal(df["z"], expected) + + def test_setitem_None(self, float_frame): + # GH #766 + float_frame[None] = float_frame["A"] + tm.assert_series_equal( + float_frame.iloc[:, -1], float_frame["A"], check_names=False + ) + tm.assert_series_equal( + float_frame.loc[:, None], float_frame["A"], check_names=False + ) + tm.assert_series_equal(float_frame[None], float_frame["A"], check_names=False) + repr(float_frame) + + def test_setitem_empty(self): + # GH 9596 + df = pd.DataFrame( + {"a": ["1", "2", "3"], "b": ["11", "22", "33"], "c": ["111", "222", "333"]} + ) + + result = df.copy() + result.loc[result.b.isna(), "a"] = result.a + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize("dtype", ["float", "int64"]) + @pytest.mark.parametrize("kwargs", [dict(), dict(index=[1]), dict(columns=["A"])]) + def test_setitem_empty_frame_with_boolean(self, dtype, kwargs): + # see gh-10126 + kwargs["dtype"] = dtype + df = DataFrame(**kwargs) + + df2 = df.copy() + df[df > df2] = 47 + tm.assert_frame_equal(df, df2) + + def test_setitem_with_empty_listlike(self): + # GH #17101 + index = pd.Index([], name="idx") + result = pd.DataFrame(columns=["A"], index=index) + result["A"] = [] + expected = pd.DataFrame(columns=["A"], index=index) + tm.assert_index_equal(result.index, expected.index) + + def test_setitem_scalars_no_index(self): + # GH16823 / 17894 + df = DataFrame() + df["foo"] = 1 + expected = DataFrame(columns=["foo"]).astype(np.int64) + tm.assert_frame_equal(df, expected) + + def test_getitem_empty_frame_with_boolean(self): + # Test for issue #11859 + + df = pd.DataFrame() + df2 = df[df > 0] + tm.assert_frame_equal(df, df2) + + def test_delitem_corner(self, float_frame): + f = float_frame.copy() + del f["D"] + assert len(f.columns) == 3 + with pytest.raises(KeyError, match=r"^'D'$"): + del f["D"] + del f["B"] + assert len(f.columns) == 2 + + def test_slice_floats(self): + index = [52195.504153, 52196.303147, 52198.369883] + df = DataFrame(np.random.rand(3, 2), index=index) + + s1 = df.loc[52195.1:52196.5] + assert len(s1) == 2 + + s1 = df.loc[52195.1:52196.6] + assert len(s1) == 2 + + s1 = df.loc[52195.1:52198.9] + assert len(s1) == 3 + + def test_getitem_fancy_slice_integers_step(self): + df = DataFrame(np.random.randn(10, 5)) + + # this is OK + result = df.iloc[:8:2] # noqa + df.iloc[:8:2] = np.nan + assert isna(df.iloc[:8:2]).values.all() + + def test_getitem_setitem_integer_slice_keyerrors(self): + df = DataFrame(np.random.randn(10, 5), index=range(0, 20, 2)) + + # this is OK + cp = df.copy() + cp.iloc[4:10] = 0 + assert (cp.iloc[4:10] == 0).values.all() + + # so is this + cp = df.copy() + cp.iloc[3:11] = 0 + assert (cp.iloc[3:11] == 0).values.all() + + result = df.iloc[2:6] + result2 = df.loc[3:11] + expected = df.reindex([4, 6, 8, 10]) + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + # non-monotonic, raise KeyError + df2 = df.iloc[list(range(5)) + list(range(5, 10))[::-1]] + with pytest.raises(KeyError, match=r"^3$"): + df2.loc[3:11] + with pytest.raises(KeyError, match=r"^3$"): + df2.loc[3:11] = 0 + + def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): + sliced = float_string_frame.iloc[:, -3:] + assert sliced["D"].dtype == np.float64 + + # get view with single block + # setting it triggers setting with copy + sliced = float_frame.iloc[:, -3:] + + with pytest.raises(com.SettingWithCopyError): + sliced["C"] = 4.0 + + assert (float_frame["C"] == 4).all() + + def test_setitem_slice_position(self): + # GH#31469 + df = pd.DataFrame(np.zeros((100, 1))) + df[-4:] = 1 + arr = np.zeros((100, 1)) + arr[-4:] = 1 + expected = pd.DataFrame(arr) + tm.assert_frame_equal(df, expected) + + def test_getitem_setitem_non_ix_labels(self): + df = tm.makeTimeDataFrame() + + start, end = df.index[[5, 10]] + + result = df.loc[start:end] + result2 = df[start:end] + expected = df[5:11] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + result = df.copy() + result.loc[start:end] = 0 + result2 = df.copy() + result2[start:end] = 0 + expected = df.copy() + expected[5:11] = 0 + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_ix_multi_take(self): + df = DataFrame(np.random.randn(3, 2)) + rs = df.loc[df.index == 0, :] + xp = df.reindex([0]) + tm.assert_frame_equal(rs, xp) + + # FIXME: dont leave commented-out + """ #1321 + df = DataFrame(np.random.randn(3, 2)) + rs = df.loc[df.index==0, df.columns==1] + xp = df.reindex([0], [1]) + tm.assert_frame_equal(rs, xp) + """ + + def test_getitem_fancy_scalar(self, float_frame): + f = float_frame + ix = f.loc + + # individual value + for col in f.columns: + ts = f[col] + for idx in f.index[::5]: + assert ix[idx, col] == ts[idx] + + def test_setitem_fancy_scalar(self, float_frame): + f = float_frame + expected = float_frame.copy() + ix = f.loc + + # individual value + for j, col in enumerate(f.columns): + ts = f[col] # noqa + for idx in f.index[::5]: + i = f.index.get_loc(idx) + val = np.random.randn() + expected.values[i, j] = val + + ix[idx, col] = val + tm.assert_frame_equal(f, expected) + + def test_getitem_fancy_boolean(self, float_frame): + f = float_frame + ix = f.loc + + expected = f.reindex(columns=["B", "D"]) + result = ix[:, [False, True, False, True]] + tm.assert_frame_equal(result, expected) + + expected = f.reindex(index=f.index[5:10], columns=["B", "D"]) + result = ix[f.index[5:10], [False, True, False, True]] + tm.assert_frame_equal(result, expected) + + boolvec = f.index > f.index[7] + expected = f.reindex(index=f.index[boolvec]) + result = ix[boolvec] + tm.assert_frame_equal(result, expected) + result = ix[boolvec, :] + tm.assert_frame_equal(result, expected) + + result = ix[boolvec, f.columns[2:]] + expected = f.reindex(index=f.index[boolvec], columns=["C", "D"]) + tm.assert_frame_equal(result, expected) + + def test_setitem_fancy_boolean(self, float_frame): + # from 2d, set with booleans + frame = float_frame.copy() + expected = float_frame.copy() + + mask = frame["A"] > 0 + frame.loc[mask] = 0.0 + expected.values[mask.values] = 0.0 + tm.assert_frame_equal(frame, expected) + + frame = float_frame.copy() + expected = float_frame.copy() + frame.loc[mask, ["A", "B"]] = 0.0 + expected.values[mask.values, :2] = 0.0 + tm.assert_frame_equal(frame, expected) + + def test_getitem_fancy_ints(self, float_frame): + result = float_frame.iloc[[1, 4, 7]] + expected = float_frame.loc[float_frame.index[[1, 4, 7]]] + tm.assert_frame_equal(result, expected) + + result = float_frame.iloc[:, [2, 0, 1]] + expected = float_frame.loc[:, float_frame.columns[[2, 0, 1]]] + tm.assert_frame_equal(result, expected) + + def test_getitem_setitem_fancy_exceptions(self, float_frame): + ix = float_frame.iloc + with pytest.raises(IndexingError, match="Too many indexers"): + ix[:, :, :] + + with pytest.raises(IndexingError): + ix[:, :, :] = 1 + + def test_getitem_setitem_boolean_misaligned(self, float_frame): + # boolean index misaligned labels + mask = float_frame["A"][::-1] > 1 + + result = float_frame.loc[mask] + expected = float_frame.loc[mask[::-1]] + tm.assert_frame_equal(result, expected) + + cp = float_frame.copy() + expected = float_frame.copy() + cp.loc[mask] = 0 + expected.loc[mask] = 0 + tm.assert_frame_equal(cp, expected) + + def test_getitem_setitem_boolean_multi(self): + df = DataFrame(np.random.randn(3, 2)) + + # get + k1 = np.array([True, False, True]) + k2 = np.array([False, True]) + result = df.loc[k1, k2] + expected = df.loc[[0, 2], [1]] + tm.assert_frame_equal(result, expected) + + expected = df.copy() + df.loc[np.array([True, False, True]), np.array([False, True])] = 5 + expected.loc[[0, 2], [1]] = 5 + tm.assert_frame_equal(df, expected) + + def test_getitem_setitem_float_labels(self): + index = Index([1.5, 2, 3, 4, 5]) + df = DataFrame(np.random.randn(5, 5), index=index) + + result = df.loc[1.5:4] + expected = df.reindex([1.5, 2, 3, 4]) + tm.assert_frame_equal(result, expected) + assert len(result) == 4 + + result = df.loc[4:5] + expected = df.reindex([4, 5]) # reindex with int + tm.assert_frame_equal(result, expected, check_index_type=False) + assert len(result) == 2 + + result = df.loc[4:5] + expected = df.reindex([4.0, 5.0]) # reindex with float + tm.assert_frame_equal(result, expected) + assert len(result) == 2 + + # loc_float changes this to work properly + result = df.loc[1:2] + expected = df.iloc[0:2] + tm.assert_frame_equal(result, expected) + + df.loc[1:2] = 0 + result = df[1:2] + assert (result == 0).all().all() + + # #2727 + index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) + df = DataFrame(np.random.randn(5, 5), index=index) + + # positional slicing only via iloc! + msg = ( + "cannot do slice indexing on" + r" with" + r" these indexers \[1.0\] of " + ) + with pytest.raises(TypeError, match=msg): + df.iloc[1.0:5] + + result = df.iloc[4:5] + expected = df.reindex([5.0]) + tm.assert_frame_equal(result, expected) + assert len(result) == 1 + + cp = df.copy() + + with pytest.raises(TypeError): + cp.iloc[1.0:5] = 0 + + with pytest.raises(TypeError): + result = cp.iloc[1.0:5] == 0 # noqa + + assert result.values.all() + assert (cp.iloc[0:1] == df.iloc[0:1]).values.all() + + cp = df.copy() + cp.iloc[4:5] = 0 + assert (cp.iloc[4:5] == 0).values.all() + assert (cp.iloc[0:4] == df.iloc[0:4]).values.all() + + # float slicing + result = df.loc[1.0:5] + expected = df + tm.assert_frame_equal(result, expected) + assert len(result) == 5 + + result = df.loc[1.1:5] + expected = df.reindex([2.5, 3.5, 4.5, 5.0]) + tm.assert_frame_equal(result, expected) + assert len(result) == 4 + + result = df.loc[4.51:5] + expected = df.reindex([5.0]) + tm.assert_frame_equal(result, expected) + assert len(result) == 1 + + result = df.loc[1.0:5.0] + expected = df.reindex([1.0, 2.5, 3.5, 4.5, 5.0]) + tm.assert_frame_equal(result, expected) + assert len(result) == 5 + + cp = df.copy() + cp.loc[1.0:5.0] = 0 + result = cp.loc[1.0:5.0] + assert (result == 0).values.all() + + def test_setitem_single_column_mixed(self): + df = DataFrame( + np.random.randn(5, 3), + index=["a", "b", "c", "d", "e"], + columns=["foo", "bar", "baz"], + ) + df["str"] = "qux" + df.loc[df.index[::2], "str"] = np.nan + expected = np.array([np.nan, "qux", np.nan, "qux", np.nan], dtype=object) + tm.assert_almost_equal(df["str"].values, expected) + + def test_setitem_single_column_mixed_datetime(self): + df = DataFrame( + np.random.randn(5, 3), + index=["a", "b", "c", "d", "e"], + columns=["foo", "bar", "baz"], + ) + + df["timestamp"] = Timestamp("20010102") + + # check our dtypes + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 3 + [np.dtype("datetime64[ns]")], + index=["foo", "bar", "baz", "timestamp"], + ) + tm.assert_series_equal(result, expected) + + # GH#16674 iNaT is treated as an integer when given by the user + df.loc["b", "timestamp"] = iNaT + assert not isna(df.loc["b", "timestamp"]) + assert df["timestamp"].dtype == np.object_ + assert df.loc["b", "timestamp"] == iNaT + + # allow this syntax + df.loc["c", "timestamp"] = np.nan + assert isna(df.loc["c", "timestamp"]) + + # allow this syntax + df.loc["d", :] = np.nan + assert not isna(df.loc["c", :]).all() + + # FIXME: don't leave commented-out + # as of GH 3216 this will now work! + # try to set with a list like item + # pytest.raises( + # Exception, df.loc.__setitem__, ('d', 'timestamp'), [np.nan]) + + def test_setitem_mixed_datetime(self): + # GH 9336 + expected = DataFrame( + { + "a": [0, 0, 0, 0, 13, 14], + "b": [ + datetime(2012, 1, 1), + 1, + "x", + "y", + datetime(2013, 1, 1), + datetime(2014, 1, 1), + ], + } + ) + df = pd.DataFrame(0, columns=list("ab"), index=range(6)) + df["b"] = pd.NaT + df.loc[0, "b"] = datetime(2012, 1, 1) + df.loc[1, "b"] = 1 + df.loc[[2, 3], "b"] = "x", "y" + A = np.array( + [ + [13, np.datetime64("2013-01-01T00:00:00")], + [14, np.datetime64("2014-01-01T00:00:00")], + ] + ) + df.loc[[4, 5], ["a", "b"]] = A + tm.assert_frame_equal(df, expected) + + def test_setitem_frame_float(self, float_frame): + piece = float_frame.loc[float_frame.index[:2], ["A", "B"]] + float_frame.loc[float_frame.index[-2] :, ["A", "B"]] = piece.values + result = float_frame.loc[float_frame.index[-2:], ["A", "B"]].values + expected = piece.values + tm.assert_almost_equal(result, expected) + + def test_setitem_frame_mixed(self, float_string_frame): + # GH 3216 + + # already aligned + f = float_string_frame.copy() + piece = DataFrame( + [[1.0, 2.0], [3.0, 4.0]], index=f.index[0:2], columns=["A", "B"] + ) + key = (slice(None, 2), ["A", "B"]) + f.loc[key] = piece + tm.assert_almost_equal(f.loc[f.index[0:2], ["A", "B"]].values, piece.values) + + # rows unaligned + f = float_string_frame.copy() + piece = DataFrame( + [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]], + index=list(f.index[0:2]) + ["foo", "bar"], + columns=["A", "B"], + ) + key = (slice(None, 2), ["A", "B"]) + f.loc[key] = piece + tm.assert_almost_equal( + f.loc[f.index[0:2:], ["A", "B"]].values, piece.values[0:2] + ) + + # key is unaligned with values + f = float_string_frame.copy() + piece = f.loc[f.index[:2], ["A"]] + piece.index = f.index[-2:] + key = (slice(-2, None), ["A", "B"]) + f.loc[key] = piece + piece["B"] = np.nan + tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values) + + # ndarray + f = float_string_frame.copy() + piece = float_string_frame.loc[f.index[:2], ["A", "B"]] + key = (slice(-2, None), ["A", "B"]) + f.loc[key] = piece.values + tm.assert_almost_equal(f.loc[f.index[-2:], ["A", "B"]].values, piece.values) + + def test_setitem_frame_upcast(self): + # needs upcasting + df = DataFrame([[1, 2, "foo"], [3, 4, "bar"]], columns=["A", "B", "C"]) + df2 = df.copy() + df2.loc[:, ["A", "B"]] = df.loc[:, ["A", "B"]] + 0.5 + expected = df.reindex(columns=["A", "B"]) + expected += 0.5 + expected["C"] = df["C"] + tm.assert_frame_equal(df2, expected) + + def test_setitem_frame_align(self, float_frame): + piece = float_frame.loc[float_frame.index[:2], ["A", "B"]] + piece.index = float_frame.index[-2:] + piece.columns = ["A", "B"] + float_frame.loc[float_frame.index[-2:], ["A", "B"]] = piece + result = float_frame.loc[float_frame.index[-2:], ["A", "B"]].values + expected = piece.values + tm.assert_almost_equal(result, expected) + + def test_getitem_setitem_ix_duplicates(self): + # #1201 + df = DataFrame(np.random.randn(5, 3), index=["foo", "foo", "bar", "baz", "bar"]) + + result = df.loc["foo"] + expected = df[:2] + tm.assert_frame_equal(result, expected) + + result = df.loc["bar"] + expected = df.iloc[[2, 4]] + tm.assert_frame_equal(result, expected) + + result = df.loc["baz"] + expected = df.iloc[3] + tm.assert_series_equal(result, expected) + + def test_getitem_ix_boolean_duplicates_multiple(self): + # #1201 + df = DataFrame(np.random.randn(5, 3), index=["foo", "foo", "bar", "baz", "bar"]) + + result = df.loc[["bar"]] + exp = df.iloc[[2, 4]] + tm.assert_frame_equal(result, exp) + + result = df.loc[df[1] > 0] + exp = df[df[1] > 0] + tm.assert_frame_equal(result, exp) + + result = df.loc[df[0] > 0] + exp = df[df[0] > 0] + tm.assert_frame_equal(result, exp) + + def test_getitem_setitem_ix_bool_keyerror(self): + # #2199 + df = DataFrame({"a": [1, 2, 3]}) + + with pytest.raises(KeyError, match=r"^False$"): + df.loc[False] + with pytest.raises(KeyError, match=r"^True$"): + df.loc[True] + + msg = "cannot use a single bool to index into setitem" + with pytest.raises(KeyError, match=msg): + df.loc[False] = 0 + with pytest.raises(KeyError, match=msg): + df.loc[True] = 0 + + def test_getitem_list_duplicates(self): + # #1943 + df = DataFrame(np.random.randn(4, 4), columns=list("AABC")) + df.columns.name = "foo" + + result = df[["B", "C"]] + assert result.columns.name == "foo" + + expected = df.iloc[:, 2:] + tm.assert_frame_equal(result, expected) + + def test_get_value(self, float_frame): + for idx in float_frame.index: + for col in float_frame.columns: + result = float_frame._get_value(idx, col) + expected = float_frame[col][idx] + assert result == expected + + def test_lookup_float(self, float_frame): + df = float_frame + rows = list(df.index) * len(df.columns) + cols = list(df.columns) * len(df.index) + result = df.lookup(rows, cols) + + expected = np.array([df.loc[r, c] for r, c in zip(rows, cols)]) + tm.assert_numpy_array_equal(result, expected) + + def test_lookup_mixed(self, float_string_frame): + df = float_string_frame + rows = list(df.index) * len(df.columns) + cols = list(df.columns) * len(df.index) + result = df.lookup(rows, cols) + + expected = np.array( + [df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_ + ) + tm.assert_almost_equal(result, expected) + + def test_lookup_bool(self): + df = DataFrame( + { + "label": ["a", "b", "a", "c"], + "mask_a": [True, True, False, True], + "mask_b": [True, False, False, False], + "mask_c": [False, True, False, True], + } + ) + df["mask"] = df.lookup(df.index, "mask_" + df["label"]) + + exp_mask = np.array( + [df.loc[r, c] for r, c in zip(df.index, "mask_" + df["label"])] + ) + + tm.assert_series_equal(df["mask"], pd.Series(exp_mask, name="mask")) + assert df["mask"].dtype == np.bool_ + + def test_lookup_raises(self, float_frame): + with pytest.raises(KeyError, match="'One or more row labels was not found'"): + float_frame.lookup(["xyz"], ["A"]) + + with pytest.raises(KeyError, match="'One or more column labels was not found'"): + float_frame.lookup([float_frame.index[0]], ["xyz"]) + + with pytest.raises(ValueError, match="same size"): + float_frame.lookup(["a", "b", "c"], ["a"]) + + def test_set_value(self, float_frame): + for idx in float_frame.index: + for col in float_frame.columns: + float_frame._set_value(idx, col, 1) + assert float_frame[col][idx] == 1 + + def test_set_value_resize(self, float_frame): + + res = float_frame._set_value("foobar", "B", 0) + assert res is float_frame + assert res.index[-1] == "foobar" + assert res._get_value("foobar", "B") == 0 + + float_frame.loc["foobar", "qux"] = 0 + assert float_frame._get_value("foobar", "qux") == 0 + + res = float_frame.copy() + res3 = res._set_value("foobar", "baz", "sam") + assert res3["baz"].dtype == np.object_ + + res = float_frame.copy() + res3 = res._set_value("foobar", "baz", True) + assert res3["baz"].dtype == np.object_ + + res = float_frame.copy() + res3 = res._set_value("foobar", "baz", 5) + assert is_float_dtype(res3["baz"]) + assert isna(res3["baz"].drop(["foobar"])).all() + msg = "could not convert string to float: 'sam'" + with pytest.raises(ValueError, match=msg): + res3._set_value("foobar", "baz", "sam") + + def test_set_value_with_index_dtype_change(self): + df_orig = DataFrame(np.random.randn(3, 3), index=range(3), columns=list("ABC")) + + # this is actually ambiguous as the 2 is interpreted as a positional + # so column is not created + df = df_orig.copy() + df._set_value("C", 2, 1.0) + assert list(df.index) == list(df_orig.index) + ["C"] + # assert list(df.columns) == list(df_orig.columns) + [2] + + df = df_orig.copy() + df.loc["C", 2] = 1.0 + assert list(df.index) == list(df_orig.index) + ["C"] + # assert list(df.columns) == list(df_orig.columns) + [2] + + # create both new + df = df_orig.copy() + df._set_value("C", "D", 1.0) + assert list(df.index) == list(df_orig.index) + ["C"] + assert list(df.columns) == list(df_orig.columns) + ["D"] + + df = df_orig.copy() + df.loc["C", "D"] = 1.0 + assert list(df.index) == list(df_orig.index) + ["C"] + assert list(df.columns) == list(df_orig.columns) + ["D"] + + def test_get_set_value_no_partial_indexing(self): + # partial w/ MultiIndex raise exception + index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)]) + df = DataFrame(index=index, columns=range(4)) + with pytest.raises(KeyError, match=r"^0$"): + df._get_value(0, 1) + + # TODO: rename? remove? + def test_single_element_ix_dont_upcast(self, float_frame): + float_frame["E"] = 1 + assert issubclass(float_frame["E"].dtype.type, (int, np.integer)) + + result = float_frame.loc[float_frame.index[5], "E"] + assert is_integer(result) + + # GH 11617 + df = pd.DataFrame(dict(a=[1.23])) + df["b"] = 666 + + result = df.loc[0, "b"] + assert is_integer(result) + + expected = Series([666], [0], name="b") + result = df.loc[[0], "b"] + tm.assert_series_equal(result, expected) + + def test_iloc_row(self): + df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2)) + + result = df.iloc[1] + exp = df.loc[2] + tm.assert_series_equal(result, exp) + + result = df.iloc[2] + exp = df.loc[4] + tm.assert_series_equal(result, exp) + + # slice + result = df.iloc[slice(4, 8)] + expected = df.loc[8:14] + tm.assert_frame_equal(result, expected) + + # verify slice is view + # setting it makes it raise/warn + with pytest.raises(com.SettingWithCopyError): + result[2] = 0.0 + + exp_col = df[2].copy() + exp_col[4:8] = 0.0 + tm.assert_series_equal(df[2], exp_col) + + # list of integers + result = df.iloc[[1, 2, 4, 6]] + expected = df.reindex(df.index[[1, 2, 4, 6]]) + tm.assert_frame_equal(result, expected) + + def test_iloc_col(self): + + df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2)) + + result = df.iloc[:, 1] + exp = df.loc[:, 2] + tm.assert_series_equal(result, exp) + + result = df.iloc[:, 2] + exp = df.loc[:, 4] + tm.assert_series_equal(result, exp) + + # slice + result = df.iloc[:, slice(4, 8)] + expected = df.loc[:, 8:14] + tm.assert_frame_equal(result, expected) + + # verify slice is view + # and that we are setting a copy + with pytest.raises(com.SettingWithCopyError): + result[8] = 0.0 + + assert (df[8] == 0).all() + + # list of integers + result = df.iloc[:, [1, 2, 4, 6]] + expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) + tm.assert_frame_equal(result, expected) + + def test_iloc_duplicates(self): + + df = DataFrame(np.random.rand(3, 3), columns=list("ABC"), index=list("aab")) + + result = df.iloc[0] + assert isinstance(result, Series) + tm.assert_almost_equal(result.values, df.values[0]) + + result = df.T.iloc[:, 0] + assert isinstance(result, Series) + tm.assert_almost_equal(result.values, df.values[0]) + + # #2259 + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2]) + result = df.iloc[:, [0]] + expected = df.take([0], axis=1) + tm.assert_frame_equal(result, expected) + + def test_loc_duplicates(self): + # gh-17105 + + # insert a duplicate element to the index + trange = pd.date_range( + start=pd.Timestamp(year=2017, month=1, day=1), + end=pd.Timestamp(year=2017, month=1, day=5), + ) + + trange = trange.insert(loc=5, item=pd.Timestamp(year=2017, month=1, day=5)) + + df = pd.DataFrame(0, index=trange, columns=["A", "B"]) + bool_idx = np.array([False, False, False, False, False, True]) + + # assignment + df.loc[trange[bool_idx], "A"] = 6 + + expected = pd.DataFrame( + {"A": [0, 0, 0, 0, 6, 6], "B": [0, 0, 0, 0, 0, 0]}, index=trange + ) + tm.assert_frame_equal(df, expected) + + # in-place + df = pd.DataFrame(0, index=trange, columns=["A", "B"]) + df.loc[trange[bool_idx], "A"] += 6 + tm.assert_frame_equal(df, expected) + + def test_iat(self, float_frame): + + for i, row in enumerate(float_frame.index): + for j, col in enumerate(float_frame.columns): + result = float_frame.iat[i, j] + expected = float_frame.at[row, col] + assert result == expected + + @pytest.mark.parametrize( + "method,expected_values", + [ + ("nearest", [0, 1, 1, 2]), + ("pad", [np.nan, 0, 1, 1]), + ("backfill", [0, 1, 2, 2]), + ], + ) + def test_reindex_methods(self, method, expected_values): + df = pd.DataFrame({"x": list(range(5))}) + target = np.array([-0.1, 0.9, 1.1, 1.5]) + + expected = pd.DataFrame({"x": expected_values}, index=target) + actual = df.reindex(target, method=method) + tm.assert_frame_equal(expected, actual) + + actual = df.reindex_like(df, method=method, tolerance=0) + tm.assert_frame_equal(df, actual) + actual = df.reindex_like(df, method=method, tolerance=[0, 0, 0, 0]) + tm.assert_frame_equal(df, actual) + + actual = df.reindex(target, method=method, tolerance=1) + tm.assert_frame_equal(expected, actual) + actual = df.reindex(target, method=method, tolerance=[1, 1, 1, 1]) + tm.assert_frame_equal(expected, actual) + + e2 = expected[::-1] + actual = df.reindex(target[::-1], method=method) + tm.assert_frame_equal(e2, actual) + + new_order = [3, 0, 2, 1] + e2 = expected.iloc[new_order] + actual = df.reindex(target[new_order], method=method) + tm.assert_frame_equal(e2, actual) + + switched_method = ( + "pad" if method == "backfill" else "backfill" if method == "pad" else method + ) + actual = df[::-1].reindex(target, method=switched_method) + tm.assert_frame_equal(expected, actual) + + def test_reindex_subclass(self): + # https://github.com/pandas-dev/pandas/issues/31925 + class MyDataFrame(DataFrame): + pass + + expected = DataFrame() + df = MyDataFrame() + result = df.reindex_like(expected) + + tm.assert_frame_equal(result, expected) + + def test_reindex_methods_nearest_special(self): + df = pd.DataFrame({"x": list(range(5))}) + target = np.array([-0.1, 0.9, 1.1, 1.5]) + + expected = pd.DataFrame({"x": [0, 1, 1, np.nan]}, index=target) + actual = df.reindex(target, method="nearest", tolerance=0.2) + tm.assert_frame_equal(expected, actual) + + expected = pd.DataFrame({"x": [0, np.nan, 1, np.nan]}, index=target) + actual = df.reindex(target, method="nearest", tolerance=[0.5, 0.01, 0.4, 0.1]) + tm.assert_frame_equal(expected, actual) + + def test_reindex_nearest_tz(self, tz_aware_fixture): + # GH26683 + tz = tz_aware_fixture + idx = pd.date_range("2019-01-01", periods=5, tz=tz) + df = pd.DataFrame({"x": list(range(5))}, index=idx) + + expected = df.head(3) + actual = df.reindex(idx[:3], method="nearest") + tm.assert_frame_equal(expected, actual) + + def test_reindex_nearest_tz_empty_frame(self): + # https://github.com/pandas-dev/pandas/issues/31964 + dti = pd.DatetimeIndex(["2016-06-26 14:27:26+00:00"]) + df = pd.DataFrame(index=pd.DatetimeIndex(["2016-07-04 14:00:59+00:00"])) + expected = pd.DataFrame(index=dti) + result = df.reindex(dti, method="nearest") + tm.assert_frame_equal(result, expected) + + def test_reindex_frame_add_nat(self): + rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s") + df = DataFrame({"A": np.random.randn(len(rng)), "B": rng}) + + result = df.reindex(range(15)) + assert np.issubdtype(result["B"].dtype, np.dtype("M8[ns]")) + + mask = com.isna(result)["B"] + assert mask[-5:].all() + assert not mask[:-5].any() + + def test_reindex_limit(self): + # GH 28631 + data = [["A", "A", "A"], ["B", "B", "B"], ["C", "C", "C"], ["D", "D", "D"]] + exp_data = [ + ["A", "A", "A"], + ["B", "B", "B"], + ["C", "C", "C"], + ["D", "D", "D"], + ["D", "D", "D"], + [np.nan, np.nan, np.nan], + ] + df = DataFrame(data) + result = df.reindex([0, 1, 2, 3, 4, 5], method="ffill", limit=1) + expected = DataFrame(exp_data) + tm.assert_frame_equal(result, expected) + + def test_set_dataframe_column_ns_dtype(self): + x = DataFrame([datetime.now(), datetime.now()]) + assert x[0].dtype == np.dtype("M8[ns]") + + def test_non_monotonic_reindex_methods(self): + dr = pd.date_range("2013-08-01", periods=6, freq="B") + data = np.random.randn(6, 1) + df = pd.DataFrame(data, index=dr, columns=list("A")) + df_rev = pd.DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]], columns=list("A")) + # index is not monotonic increasing or decreasing + msg = "index must be monotonic increasing or decreasing" + with pytest.raises(ValueError, match=msg): + df_rev.reindex(df.index, method="pad") + with pytest.raises(ValueError, match=msg): + df_rev.reindex(df.index, method="ffill") + with pytest.raises(ValueError, match=msg): + df_rev.reindex(df.index, method="bfill") + with pytest.raises(ValueError, match=msg): + df_rev.reindex(df.index, method="nearest") + + def test_reindex_level(self): + from itertools import permutations + + icol = ["jim", "joe", "jolie"] + + def verify_first_level(df, level, idx, check_index_type=True): + def f(val): + return np.nonzero((df[level] == val).to_numpy())[0] + + i = np.concatenate(list(map(f, idx))) + left = df.set_index(icol).reindex(idx, level=level) + right = df.iloc[i].set_index(icol) + tm.assert_frame_equal(left, right, check_index_type=check_index_type) + + def verify(df, level, idx, indexer, check_index_type=True): + left = df.set_index(icol).reindex(idx, level=level) + right = df.iloc[indexer].set_index(icol) + tm.assert_frame_equal(left, right, check_index_type=check_index_type) + + df = pd.DataFrame( + { + "jim": list("B" * 4 + "A" * 2 + "C" * 3), + "joe": list("abcdeabcd")[::-1], + "jolie": [10, 20, 30] * 3, + "joline": np.random.randint(0, 1000, 9), + } + ) + + target = [ + ["C", "B", "A"], + ["F", "C", "A", "D"], + ["A"], + ["A", "B", "C"], + ["C", "A", "B"], + ["C", "B"], + ["C", "A"], + ["A", "B"], + ["B", "A", "C"], + ] + + for idx in target: + verify_first_level(df, "jim", idx) + + # reindex by these causes different MultiIndex levels + for idx in [["D", "F"], ["A", "C", "B"]]: + verify_first_level(df, "jim", idx, check_index_type=False) + + verify(df, "joe", list("abcde"), [3, 2, 1, 0, 5, 4, 8, 7, 6]) + verify(df, "joe", list("abcd"), [3, 2, 1, 0, 5, 8, 7, 6]) + verify(df, "joe", list("abc"), [3, 2, 1, 8, 7, 6]) + verify(df, "joe", list("eca"), [1, 3, 4, 6, 8]) + verify(df, "joe", list("edc"), [0, 1, 4, 5, 6]) + verify(df, "joe", list("eadbc"), [3, 0, 2, 1, 4, 5, 8, 7, 6]) + verify(df, "joe", list("edwq"), [0, 4, 5]) + verify(df, "joe", list("wq"), [], check_index_type=False) + + df = DataFrame( + { + "jim": ["mid"] * 5 + ["btm"] * 8 + ["top"] * 7, + "joe": ["3rd"] * 2 + + ["1st"] * 3 + + ["2nd"] * 3 + + ["1st"] * 2 + + ["3rd"] * 3 + + ["1st"] * 2 + + ["3rd"] * 3 + + ["2nd"] * 2, + # this needs to be jointly unique with jim and joe or + # reindexing will fail ~1.5% of the time, this works + # out to needing unique groups of same size as joe + "jolie": np.concatenate( + [ + np.random.choice(1000, x, replace=False) + for x in [2, 3, 3, 2, 3, 2, 3, 2] + ] + ), + "joline": np.random.randn(20).round(3) * 10, + } + ) + + for idx in permutations(df["jim"].unique()): + for i in range(3): + verify_first_level(df, "jim", idx[: i + 1]) + + i = [2, 3, 4, 0, 1, 8, 9, 5, 6, 7, 10, 11, 12, 13, 14, 18, 19, 15, 16, 17] + verify(df, "joe", ["1st", "2nd", "3rd"], i) + + i = [0, 1, 2, 3, 4, 10, 11, 12, 5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 13, 14] + verify(df, "joe", ["3rd", "2nd", "1st"], i) + + i = [0, 1, 5, 6, 7, 10, 11, 12, 18, 19, 15, 16, 17] + verify(df, "joe", ["2nd", "3rd"], i) + + i = [0, 1, 2, 3, 4, 10, 11, 12, 8, 9, 15, 16, 17, 13, 14] + verify(df, "joe", ["3rd", "1st"], i) + + def test_getitem_ix_float_duplicates(self): + df = pd.DataFrame( + np.random.randn(3, 3), index=[0.1, 0.2, 0.2], columns=list("abc") + ) + expect = df.iloc[1:] + tm.assert_frame_equal(df.loc[0.2], expect) + + expect = df.iloc[1:, 0] + tm.assert_series_equal(df.loc[0.2, "a"], expect) + + df.index = [1, 0.2, 0.2] + expect = df.iloc[1:] + tm.assert_frame_equal(df.loc[0.2], expect) + + expect = df.iloc[1:, 0] + tm.assert_series_equal(df.loc[0.2, "a"], expect) + + df = pd.DataFrame( + np.random.randn(4, 3), index=[1, 0.2, 0.2, 1], columns=list("abc") + ) + expect = df.iloc[1:-1] + tm.assert_frame_equal(df.loc[0.2], expect) + + expect = df.iloc[1:-1, 0] + tm.assert_series_equal(df.loc[0.2, "a"], expect) + + df.index = [0.1, 0.2, 2, 0.2] + expect = df.iloc[[1, -1]] + tm.assert_frame_equal(df.loc[0.2], expect) + + expect = df.iloc[[1, -1], 0] + tm.assert_series_equal(df.loc[0.2, "a"], expect) + + def test_getitem_sparse_column(self): + # https://github.com/pandas-dev/pandas/issues/23559 + data = SparseArray([0, 1]) + df = pd.DataFrame({"A": data}) + expected = pd.Series(data, name="A") + result = df["A"] + tm.assert_series_equal(result, expected) + + result = df.iloc[:, 0] + tm.assert_series_equal(result, expected) + + result = df.loc[:, "A"] + tm.assert_series_equal(result, expected) + + def test_setitem_with_sparse_value(self): + # GH8131 + df = pd.DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) + sp_array = SparseArray([0, 0, 1]) + df["new_column"] = sp_array + tm.assert_series_equal( + df["new_column"], pd.Series(sp_array, name="new_column"), check_names=False + ) + + def test_setitem_with_unaligned_sparse_value(self): + df = pd.DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) + sp_series = pd.Series(SparseArray([0, 0, 1]), index=[2, 1, 0]) + df["new_column"] = sp_series + exp = pd.Series(SparseArray([1, 0, 0]), name="new_column") + tm.assert_series_equal(df["new_column"], exp) + + def test_setitem_with_unaligned_tz_aware_datetime_column(self): + # GH 12981 + # Assignment of unaligned offset-aware datetime series. + # Make sure timezone isn't lost + column = pd.Series( + pd.date_range("2015-01-01", periods=3, tz="utc"), name="dates" + ) + df = pd.DataFrame({"dates": column}) + df["dates"] = column[[1, 0, 2]] + tm.assert_series_equal(df["dates"], column) + + df = pd.DataFrame({"dates": column}) + df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]] + tm.assert_series_equal(df["dates"], column) + + def test_setitem_datetime_coercion(self): + # gh-1048 + df = pd.DataFrame({"c": [pd.Timestamp("2010-10-01")] * 3}) + df.loc[0:1, "c"] = np.datetime64("2008-08-08") + assert pd.Timestamp("2008-08-08") == df.loc[0, "c"] + assert pd.Timestamp("2008-08-08") == df.loc[1, "c"] + df.loc[2, "c"] = date(2005, 5, 5) + assert pd.Timestamp("2005-05-05") == df.loc[2, "c"] + + def test_setitem_datetimelike_with_inference(self): + # GH 7592 + # assignment of timedeltas with NaT + + one_hour = timedelta(hours=1) + df = DataFrame(index=date_range("20130101", periods=4)) + df["A"] = np.array([1 * one_hour] * 4, dtype="m8[ns]") + df.loc[:, "B"] = np.array([2 * one_hour] * 4, dtype="m8[ns]") + df.loc[:3, "C"] = np.array([3 * one_hour] * 3, dtype="m8[ns]") + df.loc[:, "D"] = np.array([4 * one_hour] * 4, dtype="m8[ns]") + df.loc[df.index[:3], "E"] = np.array([5 * one_hour] * 3, dtype="m8[ns]") + df["F"] = np.timedelta64("NaT") + df.loc[df.index[:-1], "F"] = np.array([6 * one_hour] * 3, dtype="m8[ns]") + df.loc[df.index[-3] :, "G"] = date_range("20130101", periods=3) + df["H"] = np.datetime64("NaT") + result = df.dtypes + expected = Series( + [np.dtype("timedelta64[ns]")] * 6 + [np.dtype("datetime64[ns]")] * 2, + index=list("ABCDEFGH"), + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("idxer", ["var", ["var"]]) + def test_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture): + # GH 11365 + tz = tz_naive_fixture + idx = date_range(start="2015-07-12", periods=3, freq="H", tz=tz) + expected = DataFrame(1.2, index=idx, columns=["var"]) + result = DataFrame(index=idx, columns=["var"]) + result.loc[:, idxer] = expected + tm.assert_frame_equal(result, expected) + + def test_at_time_between_time_datetimeindex(self): + index = date_range("2012-01-01", "2012-01-05", freq="30min") + df = DataFrame(np.random.randn(len(index), 5), index=index) + akey = time(12, 0, 0) + bkey = slice(time(13, 0, 0), time(14, 0, 0)) + ainds = [24, 72, 120, 168] + binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172] + + result = df.at_time(akey) + expected = df.loc[akey] + expected2 = df.iloc[ainds] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected2) + assert len(result) == 4 + + result = df.between_time(bkey.start, bkey.stop) + expected = df.loc[bkey] + expected2 = df.iloc[binds] + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected2) + assert len(result) == 12 + + result = df.copy() + result.loc[akey] = 0 + result = result.loc[akey] + expected = df.loc[akey].copy() + expected.loc[:] = 0 + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[akey] = 0 + result.loc[akey] = df.iloc[ainds] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[bkey] = 0 + result = result.loc[bkey] + expected = df.loc[bkey].copy() + expected.loc[:] = 0 + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.loc[bkey] = 0 + result.loc[bkey] = df.iloc[binds] + tm.assert_frame_equal(result, df) + + def test_xs(self, float_frame, datetime_frame): + idx = float_frame.index[5] + xs = float_frame.xs(idx) + for item, value in xs.items(): + if np.isnan(value): + assert np.isnan(float_frame[item][idx]) + else: + assert value == float_frame[item][idx] + + # mixed-type xs + test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} + frame = DataFrame(test_data) + xs = frame.xs("1") + assert xs.dtype == np.object_ + assert xs["A"] == 1 + assert xs["B"] == "1" + + with pytest.raises( + KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00', freq='B')") + ): + datetime_frame.xs(datetime_frame.index[0] - BDay()) + + # xs get column + series = float_frame.xs("A", axis=1) + expected = float_frame["A"] + tm.assert_series_equal(series, expected) + + # view is returned if possible + series = float_frame.xs("A", axis=1) + series[:] = 5 + assert (expected == 5).all() + + def test_xs_corner(self): + # pathological mixed-type reordering case + df = DataFrame(index=[0]) + df["A"] = 1.0 + df["B"] = "foo" + df["C"] = 2.0 + df["D"] = "bar" + df["E"] = 3.0 + + xs = df.xs(0) + exp = pd.Series([1.0, "foo", 2.0, "bar", 3.0], index=list("ABCDE"), name=0) + tm.assert_series_equal(xs, exp) + + # no columns but Index(dtype=object) + df = DataFrame(index=["a", "b", "c"]) + result = df.xs("a") + expected = Series([], name="a", index=pd.Index([]), dtype=np.float64) + tm.assert_series_equal(result, expected) + + def test_xs_duplicates(self): + df = DataFrame(np.random.randn(5, 2), index=["b", "b", "c", "b", "a"]) + + cross = df.xs("c") + exp = df.iloc[2] + tm.assert_series_equal(cross, exp) + + def test_xs_keep_level(self): + df = DataFrame( + { + "day": {0: "sat", 1: "sun"}, + "flavour": {0: "strawberry", 1: "strawberry"}, + "sales": {0: 10, 1: 12}, + "year": {0: 2008, 1: 2008}, + } + ).set_index(["year", "flavour", "day"]) + result = df.xs("sat", level="day", drop_level=False) + expected = df[:1] + tm.assert_frame_equal(result, expected) + + result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False) + tm.assert_frame_equal(result, expected) + + def test_xs_view(self): + # in 0.14 this will return a view if possible a copy otherwise, but + # this is numpy dependent + + dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5)) + + dm.xs(2)[:] = 10 + assert (dm.xs(2) == 10).all() + + def test_index_namedtuple(self): + from collections import namedtuple + + IndexType = namedtuple("IndexType", ["a", "b"]) + idx1 = IndexType("foo", "bar") + idx2 = IndexType("baz", "bof") + index = Index([idx1, idx2], name="composite_index", tupleize_cols=False) + df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"]) + + result = df.loc[IndexType("foo", "bar")]["A"] + assert result == 1 + + @pytest.mark.parametrize("tpl", [tuple([1]), tuple([1, 2])]) + def test_index_single_double_tuples(self, tpl): + # GH 20991 + idx = pd.Index([tuple([1]), tuple([1, 2])], name="A", tupleize_cols=False) + df = DataFrame(index=idx) + + result = df.loc[[tpl]] + idx = pd.Index([tpl], name="A", tupleize_cols=False) + expected = DataFrame(index=idx) + tm.assert_frame_equal(result, expected) + + def test_boolean_indexing(self): + idx = list(range(3)) + cols = ["A", "B", "C"] + df1 = DataFrame( + index=idx, + columns=cols, + data=np.array( + [[0.0, 0.5, 1.0], [1.5, 2.0, 2.5], [3.0, 3.5, 4.0]], dtype=float + ), + ) + df2 = DataFrame(index=idx, columns=cols, data=np.ones((len(idx), len(cols)))) + + expected = DataFrame( + index=idx, + columns=cols, + data=np.array([[0.0, 0.5, 1.0], [1.5, 2.0, -1], [-1, -1, -1]], dtype=float), + ) + + df1[df1 > 2.0 * df2] = -1 + tm.assert_frame_equal(df1, expected) + with pytest.raises(ValueError, match="Item wrong length"): + df1[df1.index[:-1] > 2] = -1 + + def test_boolean_indexing_mixed(self): + df = DataFrame( + { + 0: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + 1: { + 35: np.nan, + 40: 0.32632316859446198, + 43: np.nan, + 49: 0.32632316859446198, + 50: 0.39114724480578139, + }, + 2: { + 35: np.nan, + 40: np.nan, + 43: 0.29012581014105987, + 49: np.nan, + 50: np.nan, + }, + 3: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + 4: { + 35: 0.34215328467153283, + 40: np.nan, + 43: np.nan, + 49: np.nan, + 50: np.nan, + }, + "y": {35: 0, 40: 0, 43: 0, 49: 0, 50: 1}, + } + ) + + # mixed int/float ok + df2 = df.copy() + df2[df2 > 0.3] = 1 + expected = df.copy() + expected.loc[40, 1] = 1 + expected.loc[49, 1] = 1 + expected.loc[50, 1] = 1 + expected.loc[35, 4] = 1 + tm.assert_frame_equal(df2, expected) + + df["foo"] = "test" + msg = "not supported between instances|unorderable types" + + with pytest.raises(TypeError, match=msg): + df[df > 0.3] = 1 + + def test_mask(self): + df = DataFrame(np.random.randn(5, 3)) + cond = df > 0 + + rs = df.where(cond, np.nan) + tm.assert_frame_equal(rs, df.mask(df <= 0)) + tm.assert_frame_equal(rs, df.mask(~cond)) + + other = DataFrame(np.random.randn(5, 3)) + rs = df.where(cond, other) + tm.assert_frame_equal(rs, df.mask(df <= 0, other)) + tm.assert_frame_equal(rs, df.mask(~cond, other)) + + # see gh-21891 + df = DataFrame([1, 2]) + res = df.mask([[True], [False]]) + + exp = DataFrame([np.nan, 2]) + tm.assert_frame_equal(res, exp) + + def test_mask_inplace(self): + # GH8801 + df = DataFrame(np.random.randn(5, 3)) + cond = df > 0 + + rdf = df.copy() + + rdf.where(cond, inplace=True) + tm.assert_frame_equal(rdf, df.where(cond)) + tm.assert_frame_equal(rdf, df.mask(~cond)) + + rdf = df.copy() + rdf.where(cond, -df, inplace=True) + tm.assert_frame_equal(rdf, df.where(cond, -df)) + tm.assert_frame_equal(rdf, df.mask(~cond, -df)) + + def test_mask_edge_case_1xN_frame(self): + # GH4071 + df = DataFrame([[1, 2]]) + res = df.mask(DataFrame([[True, False]])) + expec = DataFrame([[np.nan, 2]]) + tm.assert_frame_equal(res, expec) + + def test_mask_callable(self): + # GH 12533 + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + result = df.mask(lambda x: x > 4, lambda x: x + 1) + exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.mask(df > 4, df + 1)) + + # return ndarray and scalar + result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99) + exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99)) + + # chain + result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10) + exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10)) + + def test_head_tail(self, float_frame): + tm.assert_frame_equal(float_frame.head(), float_frame[:5]) + tm.assert_frame_equal(float_frame.tail(), float_frame[-5:]) + + tm.assert_frame_equal(float_frame.head(0), float_frame[0:0]) + tm.assert_frame_equal(float_frame.tail(0), float_frame[0:0]) + + tm.assert_frame_equal(float_frame.head(-1), float_frame[:-1]) + tm.assert_frame_equal(float_frame.tail(-1), float_frame[1:]) + tm.assert_frame_equal(float_frame.head(1), float_frame[:1]) + tm.assert_frame_equal(float_frame.tail(1), float_frame[-1:]) + # with a float index + df = float_frame.copy() + df.index = np.arange(len(float_frame)) + 0.1 + tm.assert_frame_equal(df.head(), df.iloc[:5]) + tm.assert_frame_equal(df.tail(), df.iloc[-5:]) + tm.assert_frame_equal(df.head(0), df[0:0]) + tm.assert_frame_equal(df.tail(0), df[0:0]) + tm.assert_frame_equal(df.head(-1), df.iloc[:-1]) + tm.assert_frame_equal(df.tail(-1), df.iloc[1:]) + # test empty dataframe + empty_df = DataFrame() + tm.assert_frame_equal(empty_df.tail(), empty_df) + tm.assert_frame_equal(empty_df.head(), empty_df) + + def test_type_error_multiindex(self): + # See gh-12218 + df = DataFrame( + columns=["i", "c", "x", "y"], + data=[[0, 0, 1, 2], [1, 0, 3, 4], [0, 1, 1, 2], [1, 1, 3, 4]], + ) + dg = df.pivot_table(index="i", columns="c", values=["x", "y"]) + + with pytest.raises(TypeError, match="is an invalid key"): + str(dg[:, 0]) + + index = Index(range(2), name="i") + columns = MultiIndex( + levels=[["x", "y"], [0, 1]], codes=[[0, 1], [0, 0]], names=[None, "c"] + ) + expected = DataFrame([[1, 2], [3, 4]], columns=columns, index=index) + + result = dg.loc[:, (slice(None), 0)] + tm.assert_frame_equal(result, expected) + + name = ("x", 0) + index = Index(range(2), name="i") + expected = Series([1, 3], index=index, name=name) + + result = dg["x", 0] + tm.assert_series_equal(result, expected) + + def test_interval_index(self): + # GH 19977 + index = pd.interval_range(start=0, periods=3) + df = pd.DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"] + ) + + expected = 1 + result = df.loc[0.5, "A"] + tm.assert_almost_equal(result, expected) + + index = pd.interval_range(start=0, periods=3, closed="both") + df = pd.DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"] + ) + + index_exp = pd.interval_range(start=0, periods=2, freq=1, closed="both") + expected = pd.Series([1, 4], index=index_exp, name="A") + result = df.loc[1, "A"] + tm.assert_series_equal(result, expected) + + +class TestDataFrameIndexingUInt64: + def test_setitem(self, uint64_frame): + + df = uint64_frame + idx = df["A"].rename("foo") + + # setitem + df["C"] = idx + tm.assert_series_equal(df["C"], Series(idx, name="C")) + + df["D"] = "foo" + df["D"] = idx + tm.assert_series_equal(df["D"], Series(idx, name="D")) + del df["D"] + + # With NaN: because uint64 has no NaN element, + # the column should be cast to object. + df2 = df.copy() + df2.iloc[1, 1] = pd.NaT + df2.iloc[1, 2] = pd.NaT + result = df2["B"] + tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) + tm.assert_series_equal( + df2.dtypes, + Series( + [np.dtype("uint64"), np.dtype("O"), np.dtype("O")], + index=["A", "B", "C"], + ), + ) + + def test_set_reset(self): + + idx = Index([2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10], name="foo") + + # set/reset + df = DataFrame({"A": [0, 1, 2]}, index=idx) + result = df.reset_index() + assert result["foo"].dtype == np.dtype("uint64") + + df = result.set_index("foo") + tm.assert_index_equal(df.index, idx) + + def test_transpose(self, uint64_frame): + + result = uint64_frame.T + expected = DataFrame(uint64_frame.values.T) + expected.index = ["A", "B"] + tm.assert_frame_equal(result, expected) + + +def test_object_casting_indexing_wraps_datetimelike(): + # GH#31649, check the indexing methods all the way down the stack + df = pd.DataFrame( + { + "A": [1, 2], + "B": pd.date_range("2000", periods=2), + "C": pd.timedelta_range("1 Day", periods=2), + } + ) + + ser = df.loc[0] + assert isinstance(ser.values[1], pd.Timestamp) + assert isinstance(ser.values[2], pd.Timedelta) + + ser = df.iloc[0] + assert isinstance(ser.values[1], pd.Timestamp) + assert isinstance(ser.values[2], pd.Timedelta) + + ser = df.xs(0, axis=0) + assert isinstance(ser.values[1], pd.Timestamp) + assert isinstance(ser.values[2], pd.Timedelta) + + mgr = df._data + arr = mgr.fast_xs(0) + assert isinstance(arr[1], pd.Timestamp) + assert isinstance(arr[2], pd.Timedelta) + + blk = mgr.blocks[mgr._blknos[1]] + assert blk.dtype == "M8[ns]" # we got the right block + val = blk.iget((0, 0)) + assert isinstance(val, pd.Timestamp) + + blk = mgr.blocks[mgr._blknos[2]] + assert blk.dtype == "m8[ns]" # we got the right block + val = blk.iget((0, 0)) + assert isinstance(val, pd.Timedelta) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py new file mode 100644 index 00000000..df1b128d --- /dev/null +++ b/pandas/tests/frame/indexing/test_where.py @@ -0,0 +1,582 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_scalar + +import pandas as pd +from pandas import DataFrame, DatetimeIndex, Series, Timestamp, date_range, isna +import pandas._testing as tm + + +class TestDataFrameIndexingWhere: + def test_where(self, float_string_frame, mixed_float_frame, mixed_int_frame): + default_frame = DataFrame(np.random.randn(5, 3), columns=["A", "B", "C"]) + + def _safe_add(df): + # only add to the numeric items + def is_ok(s): + return ( + issubclass(s.dtype.type, (np.integer, np.floating)) + and s.dtype != "uint8" + ) + + return DataFrame( + dict((c, s + 1) if is_ok(s) else (c, s) for c, s in df.items()) + ) + + def _check_get(df, cond, check_dtypes=True): + other1 = _safe_add(df) + rs = df.where(cond, other1) + rs2 = df.where(cond.values, other1) + for k, v in rs.items(): + exp = Series(np.where(cond[k], df[k], other1[k]), index=v.index) + tm.assert_series_equal(v, exp, check_names=False) + tm.assert_frame_equal(rs, rs2) + + # dtypes + if check_dtypes: + assert (rs.dtypes == df.dtypes).all() + + # check getting + for df in [ + default_frame, + float_string_frame, + mixed_float_frame, + mixed_int_frame, + ]: + if df is float_string_frame: + with pytest.raises(TypeError): + df > 0 + continue + cond = df > 0 + _check_get(df, cond) + + # upcasting case (GH # 2794) + df = DataFrame( + { + c: Series([1] * 3, dtype=c) + for c in ["float32", "float64", "int32", "int64"] + } + ) + df.iloc[1, :] = 0 + result = df.dtypes + expected = Series( + [ + np.dtype("float32"), + np.dtype("float64"), + np.dtype("int32"), + np.dtype("int64"), + ], + index=["float32", "float64", "int32", "int64"], + ) + + # when we don't preserve boolean casts + # + # expected = Series({ 'float32' : 1, 'float64' : 3 }) + + tm.assert_series_equal(result, expected) + + # aligning + def _check_align(df, cond, other, check_dtypes=True): + rs = df.where(cond, other) + for i, k in enumerate(rs.columns): + result = rs[k] + d = df[k].values + c = cond[k].reindex(df[k].index).fillna(False).values + + if is_scalar(other): + o = other + else: + if isinstance(other, np.ndarray): + o = Series(other[:, i], index=result.index).values + else: + o = other[k].values + + new_values = d if c.all() else np.where(c, d, o) + expected = Series(new_values, index=result.index, name=k) + + # since we can't always have the correct numpy dtype + # as numpy doesn't know how to downcast, don't check + tm.assert_series_equal(result, expected, check_dtype=False) + + # dtypes + # can't check dtype when other is an ndarray + + if check_dtypes and not isinstance(other, np.ndarray): + assert (rs.dtypes == df.dtypes).all() + + for df in [float_string_frame, mixed_float_frame, mixed_int_frame]: + if df is float_string_frame: + with pytest.raises(TypeError): + df > 0 + continue + + # other is a frame + cond = (df > 0)[1:] + _check_align(df, cond, _safe_add(df)) + + # check other is ndarray + cond = df > 0 + _check_align(df, cond, (_safe_add(df).values)) + + # integers are upcast, so don't check the dtypes + cond = df > 0 + check_dtypes = all(not issubclass(s.type, np.integer) for s in df.dtypes) + _check_align(df, cond, np.nan, check_dtypes=check_dtypes) + + # invalid conditions + df = default_frame + err1 = (df + 1).values[0:2, :] + msg = "other must be the same shape as self when an ndarray" + with pytest.raises(ValueError, match=msg): + df.where(cond, err1) + + err2 = cond.iloc[:2, :].values + other1 = _safe_add(df) + msg = "Array conditional must be same shape as self" + with pytest.raises(ValueError, match=msg): + df.where(err2, other1) + + with pytest.raises(ValueError, match=msg): + df.mask(True) + with pytest.raises(ValueError, match=msg): + df.mask(0) + + # where inplace + def _check_set(df, cond, check_dtypes=True): + dfi = df.copy() + econd = cond.reindex_like(df).fillna(True) + expected = dfi.mask(~econd) + + dfi.where(cond, np.nan, inplace=True) + tm.assert_frame_equal(dfi, expected) + + # dtypes (and confirm upcasts)x + if check_dtypes: + for k, v in df.dtypes.items(): + if issubclass(v.type, np.integer) and not cond[k].all(): + v = np.dtype("float64") + assert dfi[k].dtype == v + + for df in [ + default_frame, + float_string_frame, + mixed_float_frame, + mixed_int_frame, + ]: + if df is float_string_frame: + with pytest.raises(TypeError): + df > 0 + continue + + cond = df > 0 + _check_set(df, cond) + + cond = df >= 0 + _check_set(df, cond) + + # aligning + cond = (df >= 0)[1:] + _check_set(df, cond) + + # GH 10218 + # test DataFrame.where with Series slicing + df = DataFrame({"a": range(3), "b": range(4, 7)}) + result = df.where(df["a"] == 1) + expected = df[df["a"] == 1].reindex(df.index) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("klass", [list, tuple, np.array]) + def test_where_array_like(self, klass): + # see gh-15414 + df = DataFrame({"a": [1, 2, 3]}) + cond = [[False], [True], [True]] + expected = DataFrame({"a": [np.nan, 2, 3]}) + + result = df.where(klass(cond)) + tm.assert_frame_equal(result, expected) + + df["b"] = 2 + expected["b"] = [2, np.nan, 2] + cond = [[False, True], [True, False], [True, True]] + + result = df.where(klass(cond)) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "cond", + [ + [[1], [0], [1]], + Series([[2], [5], [7]]), + DataFrame({"a": [2, 5, 7]}), + [["True"], ["False"], ["True"]], + [[Timestamp("2017-01-01")], [pd.NaT], [Timestamp("2017-01-02")]], + ], + ) + def test_where_invalid_input_single(self, cond): + # see gh-15414: only boolean arrays accepted + df = DataFrame({"a": [1, 2, 3]}) + msg = "Boolean array expected for the condition" + + with pytest.raises(ValueError, match=msg): + df.where(cond) + + @pytest.mark.parametrize( + "cond", + [ + [[0, 1], [1, 0], [1, 1]], + Series([[0, 2], [5, 0], [4, 7]]), + [["False", "True"], ["True", "False"], ["True", "True"]], + DataFrame({"a": [2, 5, 7], "b": [4, 8, 9]}), + [ + [pd.NaT, Timestamp("2017-01-01")], + [Timestamp("2017-01-02"), pd.NaT], + [Timestamp("2017-01-03"), Timestamp("2017-01-03")], + ], + ], + ) + def test_where_invalid_input_multiple(self, cond): + # see gh-15414: only boolean arrays accepted + df = DataFrame({"a": [1, 2, 3], "b": [2, 2, 2]}) + msg = "Boolean array expected for the condition" + + with pytest.raises(ValueError, match=msg): + df.where(cond) + + def test_where_dataframe_col_match(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + cond = DataFrame([[True, False, True], [False, False, True]]) + + result = df.where(cond) + expected = DataFrame([[1.0, np.nan, 3], [np.nan, np.nan, 6]]) + tm.assert_frame_equal(result, expected) + + # this *does* align, though has no matching columns + cond.columns = ["a", "b", "c"] + result = df.where(cond) + expected = DataFrame(np.nan, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + def test_where_ndframe_align(self): + msg = "Array conditional must be same shape as self" + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + + cond = [True] + with pytest.raises(ValueError, match=msg): + df.where(cond) + + expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]]) + + out = df.where(Series(cond)) + tm.assert_frame_equal(out, expected) + + cond = np.array([False, True, False, True]) + with pytest.raises(ValueError, match=msg): + df.where(cond) + + expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]]) + + out = df.where(Series(cond)) + tm.assert_frame_equal(out, expected) + + def test_where_bug(self): + # see gh-2793 + df = DataFrame( + {"a": [1.0, 2.0, 3.0, 4.0], "b": [4.0, 3.0, 2.0, 1.0]}, dtype="float64" + ) + expected = DataFrame( + {"a": [np.nan, np.nan, 3.0, 4.0], "b": [4.0, 3.0, np.nan, np.nan]}, + dtype="float64", + ) + result = df.where(df > 2, np.nan) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.where(result > 2, np.nan, inplace=True) + tm.assert_frame_equal(result, expected) + + def test_where_bug_mixed(self, sint_dtype): + # see gh-2793 + df = DataFrame( + { + "a": np.array([1, 2, 3, 4], dtype=sint_dtype), + "b": np.array([4.0, 3.0, 2.0, 1.0], dtype="float64"), + } + ) + + expected = DataFrame( + {"a": [np.nan, np.nan, 3.0, 4.0], "b": [4.0, 3.0, np.nan, np.nan]}, + dtype="float64", + ) + + result = df.where(df > 2, np.nan) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.where(result > 2, np.nan, inplace=True) + tm.assert_frame_equal(result, expected) + + def test_where_bug_transposition(self): + # see gh-7506 + a = DataFrame({0: [1, 2], 1: [3, 4], 2: [5, 6]}) + b = DataFrame({0: [np.nan, 8], 1: [9, np.nan], 2: [np.nan, np.nan]}) + do_not_replace = b.isna() | (a > b) + + expected = a.copy() + expected[~do_not_replace] = b + + result = a.where(do_not_replace, b) + tm.assert_frame_equal(result, expected) + + a = DataFrame({0: [4, 6], 1: [1, 0]}) + b = DataFrame({0: [np.nan, 3], 1: [3, np.nan]}) + do_not_replace = b.isna() | (a > b) + + expected = a.copy() + expected[~do_not_replace] = b + + result = a.where(do_not_replace, b) + tm.assert_frame_equal(result, expected) + + def test_where_datetime(self): + + # GH 3311 + df = DataFrame( + dict( + A=date_range("20130102", periods=5), + B=date_range("20130104", periods=5), + C=np.random.randn(5), + ) + ) + + stamp = datetime(2013, 1, 3) + with pytest.raises(TypeError): + df > stamp + + result = df[df.iloc[:, :-1] > stamp] + + expected = df.copy() + expected.loc[[0, 1], "A"] = np.nan + expected.loc[:, "C"] = np.nan + tm.assert_frame_equal(result, expected) + + def test_where_none(self): + # GH 4667 + # setting with None changes dtype + df = DataFrame({"series": Series(range(10))}).astype(float) + df[df > 7] = None + expected = DataFrame( + {"series": Series([0, 1, 2, 3, 4, 5, 6, 7, np.nan, np.nan])} + ) + tm.assert_frame_equal(df, expected) + + # GH 7656 + df = DataFrame( + [ + {"A": 1, "B": np.nan, "C": "Test"}, + {"A": np.nan, "B": "Test", "C": np.nan}, + ] + ) + msg = "boolean setting on mixed-type" + + with pytest.raises(TypeError, match=msg): + df.where(~isna(df), None, inplace=True) + + def test_where_empty_df_and_empty_cond_having_non_bool_dtypes(self): + # see gh-21947 + df = pd.DataFrame(columns=["a"]) + cond = df.applymap(lambda x: x > 0) + + result = df.where(cond) + tm.assert_frame_equal(result, df) + + def test_where_align(self): + def create(): + df = DataFrame(np.random.randn(10, 3)) + df.iloc[3:5, 0] = np.nan + df.iloc[4:6, 1] = np.nan + df.iloc[5:8, 2] = np.nan + return df + + # series + df = create() + expected = df.fillna(df.mean()) + result = df.where(pd.notna(df), df.mean(), axis="columns") + tm.assert_frame_equal(result, expected) + + df.where(pd.notna(df), df.mean(), inplace=True, axis="columns") + tm.assert_frame_equal(df, expected) + + df = create().fillna(0) + expected = df.apply(lambda x, y: x.where(x > 0, y), y=df[0]) + result = df.where(df > 0, df[0], axis="index") + tm.assert_frame_equal(result, expected) + result = df.where(df > 0, df[0], axis="rows") + tm.assert_frame_equal(result, expected) + + # frame + df = create() + expected = df.fillna(1) + result = df.where( + pd.notna(df), DataFrame(1, index=df.index, columns=df.columns) + ) + tm.assert_frame_equal(result, expected) + + def test_where_complex(self): + # GH 6345 + expected = DataFrame([[1 + 1j, 2], [np.nan, 4 + 1j]], columns=["a", "b"]) + df = DataFrame([[1 + 1j, 2], [5 + 1j, 4 + 1j]], columns=["a", "b"]) + df[df.abs() >= 5] = np.nan + tm.assert_frame_equal(df, expected) + + def test_where_axis(self): + # GH 9736 + df = DataFrame(np.random.randn(2, 2)) + mask = DataFrame([[False, False], [False, False]]) + s = Series([0, 1]) + + expected = DataFrame([[0, 0], [1, 1]], dtype="float64") + result = df.where(mask, s, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.where(mask, s, axis="index", inplace=True) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[0, 1], [0, 1]], dtype="float64") + result = df.where(mask, s, axis="columns") + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.where(mask, s, axis="columns", inplace=True) + tm.assert_frame_equal(result, expected) + + # Upcast needed + df = DataFrame([[1, 2], [3, 4]], dtype="int64") + mask = DataFrame([[False, False], [False, False]]) + s = Series([0, np.nan]) + + expected = DataFrame([[0, 0], [np.nan, np.nan]], dtype="float64") + result = df.where(mask, s, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.where(mask, s, axis="index", inplace=True) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[0, np.nan], [0, np.nan]]) + result = df.where(mask, s, axis="columns") + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + { + 0: np.array([0, 0], dtype="int64"), + 1: np.array([np.nan, np.nan], dtype="float64"), + } + ) + result = df.copy() + result.where(mask, s, axis="columns", inplace=True) + tm.assert_frame_equal(result, expected) + + # Multiple dtypes (=> multiple Blocks) + df = pd.concat( + [ + DataFrame(np.random.randn(10, 2)), + DataFrame(np.random.randint(0, 10, size=(10, 2)), dtype="int64"), + ], + ignore_index=True, + axis=1, + ) + mask = DataFrame(False, columns=df.columns, index=df.index) + s1 = Series(1, index=df.columns) + s2 = Series(2, index=df.index) + + result = df.where(mask, s1, axis="columns") + expected = DataFrame(1.0, columns=df.columns, index=df.index) + expected[2] = expected[2].astype("int64") + expected[3] = expected[3].astype("int64") + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.where(mask, s1, axis="columns", inplace=True) + tm.assert_frame_equal(result, expected) + + result = df.where(mask, s2, axis="index") + expected = DataFrame(2.0, columns=df.columns, index=df.index) + expected[2] = expected[2].astype("int64") + expected[3] = expected[3].astype("int64") + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.where(mask, s2, axis="index", inplace=True) + tm.assert_frame_equal(result, expected) + + # DataFrame vs DataFrame + d1 = df.copy().drop(1, axis=0) + expected = df.copy() + expected.loc[1, :] = np.nan + + result = df.where(mask, d1) + tm.assert_frame_equal(result, expected) + result = df.where(mask, d1, axis="index") + tm.assert_frame_equal(result, expected) + result = df.copy() + result.where(mask, d1, inplace=True) + tm.assert_frame_equal(result, expected) + result = df.copy() + result.where(mask, d1, inplace=True, axis="index") + tm.assert_frame_equal(result, expected) + + d2 = df.copy().drop(1, axis=1) + expected = df.copy() + expected.loc[:, 1] = np.nan + + result = df.where(mask, d2) + tm.assert_frame_equal(result, expected) + result = df.where(mask, d2, axis="columns") + tm.assert_frame_equal(result, expected) + result = df.copy() + result.where(mask, d2, inplace=True) + tm.assert_frame_equal(result, expected) + result = df.copy() + result.where(mask, d2, inplace=True, axis="columns") + tm.assert_frame_equal(result, expected) + + def test_where_callable(self): + # GH 12533 + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + result = df.where(lambda x: x > 4, lambda x: x + 1) + exp = DataFrame([[2, 3, 4], [5, 5, 6], [7, 8, 9]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.where(df > 4, df + 1)) + + # return ndarray and scalar + result = df.where(lambda x: (x % 2 == 0).values, lambda x: 99) + exp = DataFrame([[99, 2, 99], [4, 99, 6], [99, 8, 99]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, df.where(df % 2 == 0, 99)) + + # chain + result = (df + 2).where(lambda x: x > 8, lambda x: x + 10) + exp = DataFrame([[13, 14, 15], [16, 17, 18], [9, 10, 11]]) + tm.assert_frame_equal(result, exp) + tm.assert_frame_equal(result, (df + 2).where((df + 2) > 8, (df + 2) + 10)) + + def test_where_tz_values(self, tz_naive_fixture): + df1 = DataFrame( + DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture), + columns=["date"], + ) + df2 = DataFrame( + DatetimeIndex(["20150103", "20150104", "20150105"], tz=tz_naive_fixture), + columns=["date"], + ) + mask = DataFrame([True, True, False], columns=["date"]) + exp = DataFrame( + DatetimeIndex(["20150101", "20150102", "20150105"], tz=tz_naive_fixture), + columns=["date"], + ) + result = df1.where(mask, df2) + tm.assert_frame_equal(exp, result) diff --git a/pandas/tests/frame/methods/__init__.py b/pandas/tests/frame/methods/__init__.py new file mode 100644 index 00000000..245594bf --- /dev/null +++ b/pandas/tests/frame/methods/__init__.py @@ -0,0 +1,7 @@ +""" +Test files dedicated to individual (stand-alone) DataFrame methods + +Ideally these files/tests should correspond 1-to-1 with tests.series.methods + +These may also present opportunities for sharing/de-duplicating test code. +""" diff --git a/pandas/tests/frame/methods/test_append.py b/pandas/tests/frame/methods/test_append.py new file mode 100644 index 00000000..d128a51f --- /dev/null +++ b/pandas/tests/frame/methods/test_append.py @@ -0,0 +1,195 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, Timestamp +import pandas._testing as tm + + +class TestDataFrameAppend: + def test_append_empty_list(self): + # GH 28769 + df = DataFrame() + result = df.append([]) + expected = df + tm.assert_frame_equal(result, expected) + assert result is not df + + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + result = df.append([]) + expected = df + tm.assert_frame_equal(result, expected) + assert result is not df # .append() should return a new object + + def test_append_series_dict(self): + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + + series = df.loc[4] + msg = "Indexes have overlapping values" + with pytest.raises(ValueError, match=msg): + df.append(series, verify_integrity=True) + + series.name = None + msg = "Can only append a Series if ignore_index=True" + with pytest.raises(TypeError, match=msg): + df.append(series, verify_integrity=True) + + result = df.append(series[::-1], ignore_index=True) + expected = df.append( + DataFrame({0: series[::-1]}, index=df.columns).T, ignore_index=True + ) + tm.assert_frame_equal(result, expected) + + # dict + result = df.append(series.to_dict(), ignore_index=True) + tm.assert_frame_equal(result, expected) + + result = df.append(series[::-1][:3], ignore_index=True) + expected = df.append( + DataFrame({0: series[::-1][:3]}).T, ignore_index=True, sort=True + ) + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + # can append when name set + row = df.loc[4] + row.name = 5 + result = df.append(row) + expected = df.append(df[-1:], ignore_index=True) + tm.assert_frame_equal(result, expected) + + def test_append_list_of_series_dicts(self): + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + + dicts = [x.to_dict() for idx, x in df.iterrows()] + + result = df.append(dicts, ignore_index=True) + expected = df.append(df, ignore_index=True) + tm.assert_frame_equal(result, expected) + + # different columns + dicts = [ + {"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4}, + {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8}, + ] + result = df.append(dicts, ignore_index=True, sort=True) + expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) + tm.assert_frame_equal(result, expected) + + def test_append_missing_cols(self): + # GH22252 + # exercise the conditional branch in append method where the data + # to be appended is a list and does not contain all columns that are in + # the target DataFrame + df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"]) + + dicts = [{"foo": 9}, {"bar": 10}] + with tm.assert_produces_warning(None): + result = df.append(dicts, ignore_index=True, sort=True) + + expected = df.append(DataFrame(dicts), ignore_index=True, sort=True) + tm.assert_frame_equal(result, expected) + + def test_append_empty_dataframe(self): + + # Empty df append empty df + df1 = DataFrame() + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + # Non-empty df append empty df + df1 = DataFrame(np.random.randn(5, 2)) + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + # Empty df with columns append empty df + df1 = DataFrame(columns=["bar", "foo"]) + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + # Non-Empty df with columns append empty df + df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"]) + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + def test_append_dtypes(self): + + # GH 5754 + # row appends of different dtypes (so need to do by-item) + # can sometimes infer the correct type + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(5)) + df2 = DataFrame() + result = df1.append(df2) + expected = df1.copy() + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": "foo"}, index=range(1, 2)) + result = df1.append(df2) + expected = DataFrame({"bar": [Timestamp("20130101"), "foo"]}) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": np.nan}, index=range(1, 2)) + result = df1.append(df2) + expected = DataFrame( + {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} + ) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": np.nan}, index=range(1, 2), dtype=object) + result = df1.append(df2) + expected = DataFrame( + {"bar": Series([Timestamp("20130101"), np.nan], dtype="M8[ns]")} + ) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": np.nan}, index=range(1)) + df2 = DataFrame({"bar": Timestamp("20130101")}, index=range(1, 2)) + result = df1.append(df2) + expected = DataFrame( + {"bar": Series([np.nan, Timestamp("20130101")], dtype="M8[ns]")} + ) + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"bar": Timestamp("20130101")}, index=range(1)) + df2 = DataFrame({"bar": 1}, index=range(1, 2), dtype=object) + result = df1.append(df2) + expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"] + ) + def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): + # GH 30238 + tz = tz_naive_fixture + df = pd.DataFrame([pd.Timestamp(timestamp, tz=tz)]) + result = df.append(df.iloc[0]).iloc[-1] + expected = pd.Series(pd.Timestamp(timestamp, tz=tz), name=0) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data, dtype", + [ + ([1], pd.Int64Dtype()), + ([1], pd.CategoricalDtype()), + ([pd.Interval(left=0, right=5)], pd.IntervalDtype()), + ([pd.Period("2000-03", freq="M")], pd.PeriodDtype("M")), + ([1], pd.SparseDtype()), + ], + ) + def test_other_dtypes(self, data, dtype): + df = pd.DataFrame(data, dtype=dtype) + result = df.append(df.iloc[0]).iloc[-1] + expected = pd.Series(data, name=0, dtype=dtype) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py new file mode 100644 index 00000000..e2b41797 --- /dev/null +++ b/pandas/tests/frame/methods/test_asof.py @@ -0,0 +1,158 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Period, Series, Timestamp, date_range, to_datetime +import pandas._testing as tm + + +@pytest.fixture +def date_range_frame(): + """ + Fixture for DataFrame of ints with date_range index + + Columns are ['A', 'B']. + """ + N = 50 + rng = date_range("1/1/1990", periods=N, freq="53s") + return DataFrame({"A": np.arange(N), "B": np.arange(N)}, index=rng) + + +class TestFrameAsof: + def test_basic(self, date_range_frame): + df = date_range_frame + N = 50 + df.loc[15:30, "A"] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + + result = df.asof(dates) + assert result.notna().all(1).all() + lb = df.index[14] + ub = df.index[30] + + dates = list(dates) + + result = df.asof(dates) + assert result.notna().all(1).all() + + mask = (result.index >= lb) & (result.index < ub) + rs = result[mask] + assert (rs == 14).all(1).all() + + def test_subset(self, date_range_frame): + N = 10 + df = date_range_frame.iloc[:N].copy() + df.loc[4:8, "A"] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + + # with a subset of A should be the same + result = df.asof(dates, subset="A") + expected = df.asof(dates) + tm.assert_frame_equal(result, expected) + + # same with A/B + result = df.asof(dates, subset=["A", "B"]) + expected = df.asof(dates) + tm.assert_frame_equal(result, expected) + + # B gives df.asof + result = df.asof(dates, subset="B") + expected = df.resample("25s", closed="right").ffill().reindex(dates) + expected.iloc[20:] = 9 + + tm.assert_frame_equal(result, expected) + + def test_missing(self, date_range_frame): + # GH 15118 + # no match found - `where` value before earliest date in index + N = 10 + df = date_range_frame.iloc[:N].copy() + + result = df.asof("1989-12-31") + + expected = Series( + index=["A", "B"], name=Timestamp("1989-12-31"), dtype=np.float64 + ) + tm.assert_series_equal(result, expected) + + result = df.asof(to_datetime(["1989-12-31"])) + expected = DataFrame( + index=to_datetime(["1989-12-31"]), columns=["A", "B"], dtype="float64" + ) + tm.assert_frame_equal(result, expected) + + # Check that we handle PeriodIndex correctly, dont end up with + # period.ordinal for series name + df = df.to_period("D") + result = df.asof("1989-12-31") + assert isinstance(result.name, Period) + + def test_all_nans(self, date_range_frame): + # GH 15713 + # DataFrame is all nans + result = DataFrame([np.nan]).asof([0]) + expected = DataFrame([np.nan]) + tm.assert_frame_equal(result, expected) + + # testing non-default indexes, multiple inputs + N = 150 + rng = date_range_frame.index + dates = date_range("1/1/1990", periods=N, freq="25s") + result = DataFrame(np.nan, index=rng, columns=["A"]).asof(dates) + expected = DataFrame(np.nan, index=dates, columns=["A"]) + tm.assert_frame_equal(result, expected) + + # testing multiple columns + dates = date_range("1/1/1990", periods=N, freq="25s") + result = DataFrame(np.nan, index=rng, columns=["A", "B", "C"]).asof(dates) + expected = DataFrame(np.nan, index=dates, columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + + # testing scalar input + result = DataFrame(np.nan, index=[1, 2], columns=["A", "B"]).asof([3]) + expected = DataFrame(np.nan, index=[3], columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + result = DataFrame(np.nan, index=[1, 2], columns=["A", "B"]).asof(3) + expected = Series(np.nan, index=["A", "B"], name=3) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "stamp,expected", + [ + ( + Timestamp("2018-01-01 23:22:43.325+00:00"), + Series(2.0, name=Timestamp("2018-01-01 23:22:43.325+00:00")), + ), + ( + Timestamp("2018-01-01 22:33:20.682+01:00"), + Series(1.0, name=Timestamp("2018-01-01 22:33:20.682+01:00")), + ), + ], + ) + def test_time_zone_aware_index(self, stamp, expected): + # GH21194 + # Testing awareness of DataFrame index considering different + # UTC and timezone + df = DataFrame( + data=[1, 2], + index=[ + Timestamp("2018-01-01 21:00:05.001+00:00"), + Timestamp("2018-01-01 22:35:10.550+00:00"), + ], + ) + + result = df.asof(stamp) + tm.assert_series_equal(result, expected) + + def test_is_copy(self, date_range_frame): + # GH-27357, GH-30784: ensure the result of asof is an actual copy and + # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings + df = date_range_frame + N = 50 + df.loc[15:30, "A"] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + + result = df.asof(dates) + + with tm.assert_produces_warning(None): + result["C"] = 1 diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py new file mode 100644 index 00000000..34727da3 --- /dev/null +++ b/pandas/tests/frame/methods/test_clip.py @@ -0,0 +1,157 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm + + +class TestDataFrameClip: + def test_clip(self, float_frame): + median = float_frame.median().median() + original = float_frame.copy() + + double = float_frame.clip(upper=median, lower=median) + assert not (double.values != median).any() + + # Verify that float_frame was not changed inplace + assert (float_frame.values == original.values).all() + + def test_inplace_clip(self, float_frame): + # GH#15388 + median = float_frame.median().median() + frame_copy = float_frame.copy() + + frame_copy.clip(upper=median, lower=median, inplace=True) + assert not (frame_copy.values != median).any() + + def test_dataframe_clip(self): + # GH#2747 + df = DataFrame(np.random.randn(1000, 2)) + + for lb, ub in [(-1, 1), (1, -1)]: + clipped_df = df.clip(lb, ub) + + lb, ub = min(lb, ub), max(ub, lb) + lb_mask = df.values <= lb + ub_mask = df.values >= ub + mask = ~lb_mask & ~ub_mask + assert (clipped_df.values[lb_mask] == lb).all() + assert (clipped_df.values[ub_mask] == ub).all() + assert (clipped_df.values[mask] == df.values[mask]).all() + + def test_clip_mixed_numeric(self): + # TODO(jreback) + # clip on mixed integer or floats + # with integer clippers coerces to float + df = DataFrame({"A": [1, 2, 3], "B": [1.0, np.nan, 3.0]}) + result = df.clip(1, 2) + expected = DataFrame({"A": [1, 2, 2], "B": [1.0, np.nan, 2.0]}) + tm.assert_frame_equal(result, expected, check_like=True) + + # GH#24162, clipping now preserves numeric types per column + df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], columns=["foo", "bar", "baz"]) + expected = df.dtypes + result = df.clip(upper=3).dtypes + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("inplace", [True, False]) + def test_clip_against_series(self, inplace): + # GH#6966 + + df = DataFrame(np.random.randn(1000, 2)) + lb = Series(np.random.randn(1000)) + ub = lb + 1 + + original = df.copy() + clipped_df = df.clip(lb, ub, axis=0, inplace=inplace) + + if inplace: + clipped_df = df + + for i in range(2): + lb_mask = original.iloc[:, i] <= lb + ub_mask = original.iloc[:, i] >= ub + mask = ~lb_mask & ~ub_mask + + result = clipped_df.loc[lb_mask, i] + tm.assert_series_equal(result, lb[lb_mask], check_names=False) + assert result.name == i + + result = clipped_df.loc[ub_mask, i] + tm.assert_series_equal(result, ub[ub_mask], check_names=False) + assert result.name == i + + tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i]) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])]) + @pytest.mark.parametrize( + "axis,res", + [ + (0, [[2.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 7.0, 7.0]]), + (1, [[2.0, 3.0, 4.0], [4.0, 5.0, 6.0], [5.0, 6.0, 7.0]]), + ], + ) + def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res): + # GH#15390 + original = simple_frame.copy(deep=True) + + result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace) + + expected = pd.DataFrame(res, columns=original.columns, index=original.index) + if inplace: + result = original + tm.assert_frame_equal(result, expected, check_exact=True) + + @pytest.mark.parametrize("axis", [0, 1, None]) + def test_clip_against_frame(self, axis): + df = DataFrame(np.random.randn(1000, 2)) + lb = DataFrame(np.random.randn(1000, 2)) + ub = lb + 1 + + clipped_df = df.clip(lb, ub, axis=axis) + + lb_mask = df <= lb + ub_mask = df >= ub + mask = ~lb_mask & ~ub_mask + + tm.assert_frame_equal(clipped_df[lb_mask], lb[lb_mask]) + tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask]) + tm.assert_frame_equal(clipped_df[mask], df[mask]) + + def test_clip_against_unordered_columns(self): + # GH#20911 + df1 = DataFrame(np.random.randn(1000, 4), columns=["A", "B", "C", "D"]) + df2 = DataFrame(np.random.randn(1000, 4), columns=["D", "A", "B", "C"]) + df3 = DataFrame(df2.values - 1, columns=["B", "D", "C", "A"]) + result_upper = df1.clip(lower=0, upper=df2) + expected_upper = df1.clip(lower=0, upper=df2[df1.columns]) + result_lower = df1.clip(lower=df3, upper=3) + expected_lower = df1.clip(lower=df3[df1.columns], upper=3) + result_lower_upper = df1.clip(lower=df3, upper=df2) + expected_lower_upper = df1.clip(lower=df3[df1.columns], upper=df2[df1.columns]) + tm.assert_frame_equal(result_upper, expected_upper) + tm.assert_frame_equal(result_lower, expected_lower) + tm.assert_frame_equal(result_lower_upper, expected_lower_upper) + + def test_clip_with_na_args(self, float_frame): + """Should process np.nan argument as None """ + # GH#17276 + tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) + tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame) + + # GH#19992 + df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}) + + result = df.clip(lower=[4, 5, np.nan], axis=0) + expected = DataFrame( + {"col_0": [4, 5, np.nan], "col_1": [4, 5, np.nan], "col_2": [7, 8, np.nan]} + ) + tm.assert_frame_equal(result, expected) + + result = df.clip(lower=[4, 5, np.nan], axis=1) + expected = DataFrame( + {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [np.nan, np.nan, np.nan]} + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py new file mode 100644 index 00000000..13a93e3e --- /dev/null +++ b/pandas/tests/frame/methods/test_count.py @@ -0,0 +1,36 @@ +from pandas import DataFrame, Series +import pandas._testing as tm + + +class TestDataFrameCount: + def test_count(self): + # corner case + frame = DataFrame() + ct1 = frame.count(1) + assert isinstance(ct1, Series) + + ct2 = frame.count(0) + assert isinstance(ct2, Series) + + # GH#423 + df = DataFrame(index=range(10)) + result = df.count(1) + expected = Series(0, index=df.index) + tm.assert_series_equal(result, expected) + + df = DataFrame(columns=range(10)) + result = df.count(0) + expected = Series(0, index=df.columns) + tm.assert_series_equal(result, expected) + + df = DataFrame() + result = df.count() + expected = Series(0, index=[]) + tm.assert_series_equal(result, expected) + + def test_count_objects(self, float_string_frame): + dm = DataFrame(float_string_frame._series) + df = DataFrame(float_string_frame._series) + + tm.assert_series_equal(dm.count(), df.count()) + tm.assert_series_equal(dm.count(1), df.count(1)) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py new file mode 100644 index 00000000..5c13b60a --- /dev/null +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -0,0 +1,272 @@ +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, Series, isna +import pandas._testing as tm + + +class TestDataFrameCov: + def test_cov(self, float_frame, float_string_frame): + # min_periods no NAs (corner case) + expected = float_frame.cov() + result = float_frame.cov(min_periods=len(float_frame)) + + tm.assert_frame_equal(expected, result) + + result = float_frame.cov(min_periods=len(float_frame) + 1) + assert isna(result.values).all() + + # with NAs + frame = float_frame.copy() + frame["A"][:5] = np.nan + frame["B"][5:10] = np.nan + result = float_frame.cov(min_periods=len(float_frame) - 8) + expected = float_frame.cov() + expected.loc["A", "B"] = np.nan + expected.loc["B", "A"] = np.nan + + # regular + float_frame["A"][:5] = np.nan + float_frame["B"][:10] = np.nan + cov = float_frame.cov() + + tm.assert_almost_equal(cov["A"]["C"], float_frame["A"].cov(float_frame["C"])) + + # exclude non-numeric types + result = float_string_frame.cov() + expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov() + tm.assert_frame_equal(result, expected) + + # Single column frame + df = DataFrame(np.linspace(0.0, 1.0, 10)) + result = df.cov() + expected = DataFrame( + np.cov(df.values.T).reshape((1, 1)), index=df.columns, columns=df.columns + ) + tm.assert_frame_equal(result, expected) + df.loc[0] = np.nan + result = df.cov() + expected = DataFrame( + np.cov(df.values[1:].T).reshape((1, 1)), + index=df.columns, + columns=df.columns, + ) + tm.assert_frame_equal(result, expected) + + +class TestDataFrameCorr: + # DataFrame.corr(), as opposed to DataFrame.corrwith + + @pytest.mark.parametrize("method", ["pearson", "kendall", "spearman"]) + @td.skip_if_no_scipy + def test_corr_scipy_method(self, float_frame, method): + float_frame["A"][:5] = np.nan + float_frame["B"][5:10] = np.nan + + correls = float_frame.corr(method=method) + expected = float_frame["A"].corr(float_frame["C"], method=method) + tm.assert_almost_equal(correls["A"]["C"], expected) + + # --------------------------------------------------------------------- + + @td.skip_if_no_scipy + def test_corr_non_numeric(self, float_frame, float_string_frame): + float_frame["A"][:5] = np.nan + float_frame["B"][5:10] = np.nan + + # exclude non-numeric types + result = float_string_frame.corr() + expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].corr() + tm.assert_frame_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"]) + def test_corr_nooverlap(self, meth): + # nothing in common + df = DataFrame( + { + "A": [1, 1.5, 1, np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, np.nan, 1, 1.5, 1], + "C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + } + ) + rs = df.corr(meth) + assert isna(rs.loc["A", "B"]) + assert isna(rs.loc["B", "A"]) + assert rs.loc["A", "A"] == 1 + assert rs.loc["B", "B"] == 1 + assert isna(rs.loc["C", "C"]) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("meth", ["pearson", "spearman"]) + def test_corr_constant(self, meth): + # constant --> all NA + + df = DataFrame( + { + "A": [1, 1, 1, np.nan, np.nan, np.nan], + "B": [np.nan, np.nan, np.nan, 1, 1, 1], + } + ) + rs = df.corr(meth) + assert isna(rs.values).all() + + @td.skip_if_no_scipy + def test_corr_int_and_boolean(self): + # when dtypes of pandas series are different + # then ndarray will have dtype=object, + # so it need to be properly handled + df = DataFrame({"a": [True, False], "b": [1, 0]}) + + expected = DataFrame(np.ones((2, 2)), index=["a", "b"], columns=["a", "b"]) + for meth in ["pearson", "kendall", "spearman"]: + + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + result = df.corr(meth) + tm.assert_frame_equal(result, expected) + + def test_corr_cov_independent_index_column(self): + # GH#14617 + df = pd.DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd")) + for method in ["cov", "corr"]: + result = getattr(df, method)() + assert result.index is not result.columns + assert result.index.equals(result.columns) + + def test_corr_invalid_method(self): + # GH#22298 + df = pd.DataFrame(np.random.normal(size=(10, 2))) + msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " + with pytest.raises(ValueError, match=msg): + df.corr(method="____") + + def test_corr_int(self): + # dtypes other than float64 GH#1761 + df3 = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]}) + + df3.cov() + df3.corr() + + +class TestDataFrameCorrWith: + def test_corrwith(self, datetime_frame): + a = datetime_frame + noise = Series(np.random.randn(len(a)), index=a.index) + + b = datetime_frame.add(noise, axis=0) + + # make sure order does not matter + b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][10:]) + del b["B"] + + colcorr = a.corrwith(b, axis=0) + tm.assert_almost_equal(colcorr["A"], a["A"].corr(b["A"])) + + rowcorr = a.corrwith(b, axis=1) + tm.assert_series_equal(rowcorr, a.T.corrwith(b.T, axis=0)) + + dropped = a.corrwith(b, axis=0, drop=True) + tm.assert_almost_equal(dropped["A"], a["A"].corr(b["A"])) + assert "B" not in dropped + + dropped = a.corrwith(b, axis=1, drop=True) + assert a.index[-1] not in dropped.index + + # non time-series data + index = ["a", "b", "c", "d", "e"] + columns = ["one", "two", "three", "four"] + df1 = DataFrame(np.random.randn(5, 4), index=index, columns=columns) + df2 = DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns) + correls = df1.corrwith(df2, axis=1) + for row in index[:4]: + tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row])) + + def test_corrwith_with_objects(self): + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame() + cols = ["A", "B", "C", "D"] + + df1["obj"] = "foo" + df2["obj"] = "bar" + + result = df1.corrwith(df2) + expected = df1.loc[:, cols].corrwith(df2.loc[:, cols]) + tm.assert_series_equal(result, expected) + + result = df1.corrwith(df2, axis=1) + expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1) + tm.assert_series_equal(result, expected) + + def test_corrwith_series(self, datetime_frame): + result = datetime_frame.corrwith(datetime_frame["A"]) + expected = datetime_frame.apply(datetime_frame["A"].corr) + + tm.assert_series_equal(result, expected) + + def test_corrwith_matches_corrcoef(self): + df1 = DataFrame(np.arange(10000), columns=["a"]) + df2 = DataFrame(np.arange(10000) ** 2, columns=["a"]) + c1 = df1.corrwith(df2)["a"] + c2 = np.corrcoef(df1["a"], df2["a"])[0][1] + + tm.assert_almost_equal(c1, c2) + assert c1 < 1 + + def test_corrwith_mixed_dtypes(self): + # GH#18570 + df = pd.DataFrame( + {"a": [1, 4, 3, 2], "b": [4, 6, 7, 3], "c": ["a", "b", "c", "d"]} + ) + s = pd.Series([0, 6, 7, 3]) + result = df.corrwith(s) + corrs = [df["a"].corr(s), df["b"].corr(s)] + expected = pd.Series(data=corrs, index=["a", "b"]) + tm.assert_series_equal(result, expected) + + def test_corrwith_index_intersection(self): + df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) + df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) + + result = df1.corrwith(df2, drop=True).index.sort_values() + expected = df1.columns.intersection(df2.columns).sort_values() + tm.assert_index_equal(result, expected) + + def test_corrwith_index_union(self): + df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"]) + df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"]) + + result = df1.corrwith(df2, drop=False).index.sort_values() + expected = df1.columns.union(df2.columns).sort_values() + tm.assert_index_equal(result, expected) + + def test_corrwith_dup_cols(self): + # GH#21925 + df1 = pd.DataFrame(np.vstack([np.arange(10)] * 3).T) + df2 = df1.copy() + df2 = pd.concat((df2, df2[0]), axis=1) + + result = df1.corrwith(df2) + expected = pd.Series(np.ones(4), index=[0, 0, 1, 2]) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_corrwith_spearman(self): + # GH#21925 + df = pd.DataFrame(np.random.random(size=(100, 3))) + result = df.corrwith(df ** 2, method="spearman") + expected = Series(np.ones(len(result))) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_corrwith_kendall(self): + # GH#21925 + df = pd.DataFrame(np.random.random(size=(100, 3))) + result = df.corrwith(df ** 2, method="kendall") + expected = Series(np.ones(len(result))) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py new file mode 100644 index 00000000..251563e5 --- /dev/null +++ b/pandas/tests/frame/methods/test_describe.py @@ -0,0 +1,333 @@ +import numpy as np + +import pandas as pd +from pandas import Categorical, DataFrame, Series, Timestamp, date_range +import pandas._testing as tm + + +class TestDataFrameDescribe: + def test_describe_bool_in_mixed_frame(self): + df = DataFrame( + { + "string_data": ["a", "b", "c", "d", "e"], + "bool_data": [True, True, False, False, False], + "int_data": [10, 20, 30, 40, 50], + } + ) + + # Integer data are included in .describe() output, + # Boolean and string data are not. + result = df.describe() + expected = DataFrame( + {"int_data": [5, 30, df.int_data.std(), 10, 20, 30, 40, 50]}, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_frame_equal(result, expected) + + # Top value is a boolean value that is False + result = df.describe(include=["bool"]) + + expected = DataFrame( + {"bool_data": [5, 2, False, 3]}, index=["count", "unique", "top", "freq"] + ) + tm.assert_frame_equal(result, expected) + + def test_describe_empty_object(self): + # GH#27183 + df = pd.DataFrame({"A": [None, None]}, dtype=object) + result = df.describe() + expected = pd.DataFrame( + {"A": [0, 0, np.nan, np.nan]}, + dtype=object, + index=["count", "unique", "top", "freq"], + ) + tm.assert_frame_equal(result, expected) + + result = df.iloc[:0].describe() + tm.assert_frame_equal(result, expected) + + def test_describe_bool_frame(self): + # GH#13891 + df = pd.DataFrame( + { + "bool_data_1": [False, False, True, True], + "bool_data_2": [False, True, True, True], + } + ) + result = df.describe() + expected = DataFrame( + {"bool_data_1": [4, 2, True, 2], "bool_data_2": [4, 2, True, 3]}, + index=["count", "unique", "top", "freq"], + ) + tm.assert_frame_equal(result, expected) + + df = pd.DataFrame( + { + "bool_data": [False, False, True, True, False], + "int_data": [0, 1, 2, 3, 4], + } + ) + result = df.describe() + expected = DataFrame( + {"int_data": [5, 2, df.int_data.std(), 0, 1, 2, 3, 4]}, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_frame_equal(result, expected) + + df = pd.DataFrame( + {"bool_data": [False, False, True, True], "str_data": ["a", "b", "c", "a"]} + ) + result = df.describe() + expected = DataFrame( + {"bool_data": [4, 2, True, 2], "str_data": [4, 3, "a", 2]}, + index=["count", "unique", "top", "freq"], + ) + tm.assert_frame_equal(result, expected) + + def test_describe_categorical(self): + df = DataFrame({"value": np.random.randint(0, 10000, 100)}) + labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=["value"], ascending=True) + df["value_group"] = pd.cut( + df.value, range(0, 10500, 500), right=False, labels=cat_labels + ) + cat = df + + # Categoricals should not show up together with numerical columns + result = cat.describe() + assert len(result.columns) == 1 + + # In a frame, describe() for the cat should be the same as for string + # arrays (count, unique, top, freq) + + cat = Categorical( + ["a", "b", "b", "b"], categories=["a", "b", "c"], ordered=True + ) + s = Series(cat) + result = s.describe() + expected = Series([4, 2, "b", 3], index=["count", "unique", "top", "freq"]) + tm.assert_series_equal(result, expected) + + cat = Series(Categorical(["a", "b", "c", "c"])) + df3 = DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]}) + result = df3.describe() + tm.assert_numpy_array_equal(result["cat"].values, result["s"].values) + + def test_describe_empty_categorical_column(self): + # GH#26397 + # Ensure the index of an an empty categorical DataFrame column + # also contains (count, unique, top, freq) + df = pd.DataFrame({"empty_col": Categorical([])}) + result = df.describe() + expected = DataFrame( + {"empty_col": [0, 0, np.nan, np.nan]}, + index=["count", "unique", "top", "freq"], + dtype="object", + ) + tm.assert_frame_equal(result, expected) + # ensure NaN, not None + assert np.isnan(result.iloc[2, 0]) + assert np.isnan(result.iloc[3, 0]) + + def test_describe_categorical_columns(self): + # GH#11558 + columns = pd.CategoricalIndex(["int1", "int2", "obj"], ordered=True, name="XXX") + df = DataFrame( + { + "int1": [10, 20, 30, 40, 50], + "int2": [10, 20, 30, 40, 50], + "obj": ["A", 0, None, "X", 1], + }, + columns=columns, + ) + result = df.describe() + + exp_columns = pd.CategoricalIndex( + ["int1", "int2"], + categories=["int1", "int2", "obj"], + ordered=True, + name="XXX", + ) + expected = DataFrame( + { + "int1": [5, 30, df.int1.std(), 10, 20, 30, 40, 50], + "int2": [5, 30, df.int2.std(), 10, 20, 30, 40, 50], + }, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + columns=exp_columns, + ) + + tm.assert_frame_equal(result, expected) + tm.assert_categorical_equal(result.columns.values, expected.columns.values) + + def test_describe_datetime_columns(self): + columns = pd.DatetimeIndex( + ["2011-01-01", "2011-02-01", "2011-03-01"], + freq="MS", + tz="US/Eastern", + name="XXX", + ) + df = DataFrame( + { + 0: [10, 20, 30, 40, 50], + 1: [10, 20, 30, 40, 50], + 2: ["A", 0, None, "X", 1], + } + ) + df.columns = columns + result = df.describe() + + exp_columns = pd.DatetimeIndex( + ["2011-01-01", "2011-02-01"], freq="MS", tz="US/Eastern", name="XXX" + ) + expected = DataFrame( + { + 0: [5, 30, df.iloc[:, 0].std(), 10, 20, 30, 40, 50], + 1: [5, 30, df.iloc[:, 1].std(), 10, 20, 30, 40, 50], + }, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + expected.columns = exp_columns + tm.assert_frame_equal(result, expected) + assert result.columns.freq == "MS" + assert result.columns.tz == expected.columns.tz + + def test_describe_timedelta_values(self): + # GH#6145 + t1 = pd.timedelta_range("1 days", freq="D", periods=5) + t2 = pd.timedelta_range("1 hours", freq="H", periods=5) + df = pd.DataFrame({"t1": t1, "t2": t2}) + + expected = DataFrame( + { + "t1": [ + 5, + pd.Timedelta("3 days"), + df.iloc[:, 0].std(), + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + pd.Timedelta("4 days"), + pd.Timedelta("5 days"), + ], + "t2": [ + 5, + pd.Timedelta("3 hours"), + df.iloc[:, 1].std(), + pd.Timedelta("1 hours"), + pd.Timedelta("2 hours"), + pd.Timedelta("3 hours"), + pd.Timedelta("4 hours"), + pd.Timedelta("5 hours"), + ], + }, + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + + result = df.describe() + tm.assert_frame_equal(result, expected) + + exp_repr = ( + " t1 t2\n" + "count 5 5\n" + "mean 3 days 00:00:00 0 days 03:00:00\n" + "std 1 days 13:56:50.394919 0 days 01:34:52.099788\n" + "min 1 days 00:00:00 0 days 01:00:00\n" + "25% 2 days 00:00:00 0 days 02:00:00\n" + "50% 3 days 00:00:00 0 days 03:00:00\n" + "75% 4 days 00:00:00 0 days 04:00:00\n" + "max 5 days 00:00:00 0 days 05:00:00" + ) + assert repr(result) == exp_repr + + def test_describe_tz_values(self, tz_naive_fixture): + # GH#21332 + tz = tz_naive_fixture + s1 = Series(range(5)) + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + s2 = Series(date_range(start, end, tz=tz)) + df = pd.DataFrame({"s1": s1, "s2": s2}) + + expected = DataFrame( + { + "s1": [ + 5, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + 2, + 1.581139, + 0, + 1, + 2, + 3, + 4, + ], + "s2": [ + 5, + 5, + s2.value_counts().index[0], + 1, + start.tz_localize(tz), + end.tz_localize(tz), + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + ], + }, + index=[ + "count", + "unique", + "top", + "freq", + "first", + "last", + "mean", + "std", + "min", + "25%", + "50%", + "75%", + "max", + ], + ) + result = df.describe(include="all") + tm.assert_frame_equal(result, expected) + + def test_describe_percentiles_integer_idx(self): + # GH#26660 + df = pd.DataFrame({"x": [1]}) + pct = np.linspace(0, 1, 10 + 1) + result = df.describe(percentiles=pct) + + expected = DataFrame( + {"x": [1.0, 1.0, np.NaN, 1.0, *[1.0 for _ in pct], 1.0]}, + index=[ + "count", + "mean", + "std", + "min", + "0%", + "10%", + "20%", + "30%", + "40%", + "50%", + "60%", + "70%", + "80%", + "90%", + "100%", + "max", + ], + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py new file mode 100644 index 00000000..43c25f4c --- /dev/null +++ b/pandas/tests/frame/methods/test_diff.py @@ -0,0 +1,120 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, Timestamp, date_range +import pandas._testing as tm + + +class TestDataFrameDiff: + def test_diff(self, datetime_frame): + the_diff = datetime_frame.diff(1) + + tm.assert_series_equal( + the_diff["A"], datetime_frame["A"] - datetime_frame["A"].shift(1) + ) + + # int dtype + a = 10000000000000000 + b = a + 1 + s = Series([a, b]) + + rs = DataFrame({"s": s}).diff() + assert rs.s[1] == 1 + + # mixed numeric + tf = datetime_frame.astype("float32") + the_diff = tf.diff(1) + tm.assert_series_equal(the_diff["A"], tf["A"] - tf["A"].shift(1)) + + # GH#10907 + df = pd.DataFrame({"y": pd.Series([2]), "z": pd.Series([3])}) + df.insert(0, "x", 1) + result = df.diff(axis=1) + expected = pd.DataFrame( + {"x": np.nan, "y": pd.Series(1), "z": pd.Series(1)} + ).astype("float64") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_diff_datetime_axis0(self, tz): + # GH#18578 + df = DataFrame( + { + 0: date_range("2010", freq="D", periods=2, tz=tz), + 1: date_range("2010", freq="D", periods=2, tz=tz), + } + ) + + result = df.diff(axis=0) + expected = DataFrame( + { + 0: pd.TimedeltaIndex(["NaT", "1 days"]), + 1: pd.TimedeltaIndex(["NaT", "1 days"]), + } + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_diff_datetime_axis1(self, tz): + # GH#18578 + df = DataFrame( + { + 0: date_range("2010", freq="D", periods=2, tz=tz), + 1: date_range("2010", freq="D", periods=2, tz=tz), + } + ) + if tz is None: + result = df.diff(axis=1) + expected = DataFrame( + { + 0: pd.TimedeltaIndex(["NaT", "NaT"]), + 1: pd.TimedeltaIndex(["0 days", "0 days"]), + } + ) + tm.assert_frame_equal(result, expected) + else: + with pytest.raises(NotImplementedError): + result = df.diff(axis=1) + + def test_diff_timedelta(self): + # GH#4533 + df = DataFrame( + dict( + time=[Timestamp("20130101 9:01"), Timestamp("20130101 9:02")], + value=[1.0, 2.0], + ) + ) + + res = df.diff() + exp = DataFrame( + [[pd.NaT, np.nan], [pd.Timedelta("00:01:00"), 1]], columns=["time", "value"] + ) + tm.assert_frame_equal(res, exp) + + def test_diff_mixed_dtype(self): + df = DataFrame(np.random.randn(5, 3)) + df["A"] = np.array([1, 2, 3, 4, 5], dtype=object) + + result = df.diff() + assert result[0].dtype == np.float64 + + def test_diff_neg_n(self, datetime_frame): + rs = datetime_frame.diff(-1) + xp = datetime_frame - datetime_frame.shift(-1) + tm.assert_frame_equal(rs, xp) + + def test_diff_float_n(self, datetime_frame): + rs = datetime_frame.diff(1.0) + xp = datetime_frame.diff(1) + tm.assert_frame_equal(rs, xp) + + def test_diff_axis(self): + # GH#9727 + df = DataFrame([[1.0, 2.0], [3.0, 4.0]]) + tm.assert_frame_equal( + df.diff(axis=1), DataFrame([[np.nan, 1.0], [np.nan, 1.0]]) + ) + tm.assert_frame_equal( + df.diff(axis=0), DataFrame([[np.nan, np.nan], [2.0, 2.0]]) + ) diff --git a/pandas/tests/frame/methods/test_drop_duplicates.py b/pandas/tests/frame/methods/test_drop_duplicates.py new file mode 100644 index 00000000..fd4bae26 --- /dev/null +++ b/pandas/tests/frame/methods/test_drop_duplicates.py @@ -0,0 +1,420 @@ +import re + +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +@pytest.mark.parametrize("subset", ["a", ["a"], ["a", "B"]]) +def test_drop_duplicates_with_misspelled_column_name(subset): + # GH 19730 + df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]}) + msg = re.escape("Index(['a'], dtype='object')") + + with pytest.raises(KeyError, match=msg): + df.drop_duplicates(subset) + + +def test_drop_duplicates(): + df = DataFrame( + { + "AAA": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates("AAA") + expected = df[:2] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("AAA", keep="last") + expected = df.loc[[6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("AAA", keep=False) + expected = df.loc[[]] + tm.assert_frame_equal(result, expected) + assert len(result) == 0 + + # multi column + expected = df.loc[[0, 1, 2, 3]] + result = df.drop_duplicates(np.array(["AAA", "B"])) + tm.assert_frame_equal(result, expected) + result = df.drop_duplicates(["AAA", "B"]) + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(("AAA", "B"), keep="last") + expected = df.loc[[0, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(("AAA", "B"), keep=False) + expected = df.loc[[0]] + tm.assert_frame_equal(result, expected) + + # consider everything + df2 = df.loc[:, ["AAA", "B", "C"]] + + result = df2.drop_duplicates() + # in this case only + expected = df2.drop_duplicates(["AAA", "B"]) + tm.assert_frame_equal(result, expected) + + result = df2.drop_duplicates(keep="last") + expected = df2.drop_duplicates(["AAA", "B"], keep="last") + tm.assert_frame_equal(result, expected) + + result = df2.drop_duplicates(keep=False) + expected = df2.drop_duplicates(["AAA", "B"], keep=False) + tm.assert_frame_equal(result, expected) + + # integers + result = df.drop_duplicates("C") + expected = df.iloc[[0, 2]] + tm.assert_frame_equal(result, expected) + result = df.drop_duplicates("C", keep="last") + expected = df.iloc[[-2, -1]] + tm.assert_frame_equal(result, expected) + + df["E"] = df["C"].astype("int8") + result = df.drop_duplicates("E") + expected = df.iloc[[0, 2]] + tm.assert_frame_equal(result, expected) + result = df.drop_duplicates("E", keep="last") + expected = df.iloc[[-2, -1]] + tm.assert_frame_equal(result, expected) + + # GH 11376 + df = DataFrame({"x": [7, 6, 3, 3, 4, 8, 0], "y": [0, 6, 5, 5, 9, 1, 2]}) + expected = df.loc[df.index != 3] + tm.assert_frame_equal(df.drop_duplicates(), expected) + + df = DataFrame([[1, 0], [0, 2]]) + tm.assert_frame_equal(df.drop_duplicates(), df) + + df = DataFrame([[-2, 0], [0, -4]]) + tm.assert_frame_equal(df.drop_duplicates(), df) + + x = np.iinfo(np.int64).max / 3 * 2 + df = DataFrame([[-x, x], [0, x + 4]]) + tm.assert_frame_equal(df.drop_duplicates(), df) + + df = DataFrame([[-x, x], [x, x + 4]]) + tm.assert_frame_equal(df.drop_duplicates(), df) + + # GH 11864 + df = DataFrame([i] * 9 for i in range(16)) + df = df.append([[1] + [0] * 8], ignore_index=True) + + for keep in ["first", "last", False]: + assert df.duplicated(keep=keep).sum() == 0 + + +def test_drop_duplicates_with_duplicate_column_names(): + # GH17836 + df = DataFrame([[1, 2, 5], [3, 4, 6], [3, 4, 7]], columns=["a", "a", "b"]) + + result0 = df.drop_duplicates() + tm.assert_frame_equal(result0, df) + + result1 = df.drop_duplicates("a") + expected1 = df[:2] + tm.assert_frame_equal(result1, expected1) + + +def test_drop_duplicates_for_take_all(): + df = DataFrame( + { + "AAA": ["foo", "bar", "baz", "bar", "foo", "bar", "qux", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates("AAA") + expected = df.iloc[[0, 1, 2, 6]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("AAA", keep="last") + expected = df.iloc[[2, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("AAA", keep=False) + expected = df.iloc[[2, 6]] + tm.assert_frame_equal(result, expected) + + # multiple columns + result = df.drop_duplicates(["AAA", "B"]) + expected = df.iloc[[0, 1, 2, 3, 4, 6]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["AAA", "B"], keep="last") + expected = df.iloc[[0, 1, 2, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["AAA", "B"], keep=False) + expected = df.iloc[[0, 1, 2, 6]] + tm.assert_frame_equal(result, expected) + + +def test_drop_duplicates_tuple(): + df = DataFrame( + { + ("AA", "AB"): ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates(("AA", "AB")) + expected = df[:2] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(("AA", "AB"), keep="last") + expected = df.loc[[6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(("AA", "AB"), keep=False) + expected = df.loc[[]] # empty df + assert len(result) == 0 + tm.assert_frame_equal(result, expected) + + # multi column + expected = df.loc[[0, 1, 2, 3]] + result = df.drop_duplicates((("AA", "AB"), "B")) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "df", + [ + DataFrame(), + DataFrame(columns=[]), + DataFrame(columns=["A", "B", "C"]), + DataFrame(index=[]), + DataFrame(index=["A", "B", "C"]), + ], +) +def test_drop_duplicates_empty(df): + # GH 20516 + result = df.drop_duplicates() + tm.assert_frame_equal(result, df) + + result = df.copy() + result.drop_duplicates(inplace=True) + tm.assert_frame_equal(result, df) + + +def test_drop_duplicates_NA(): + # none + df = DataFrame( + { + "A": [None, None, "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1.0, np.nan, np.nan, np.nan, 1.0, 1.0, 1, 1.0], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates("A") + expected = df.loc[[0, 2, 3]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("A", keep="last") + expected = df.loc[[1, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("A", keep=False) + expected = df.loc[[]] # empty df + tm.assert_frame_equal(result, expected) + assert len(result) == 0 + + # multi column + result = df.drop_duplicates(["A", "B"]) + expected = df.loc[[0, 2, 3, 6]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["A", "B"], keep="last") + expected = df.loc[[1, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["A", "B"], keep=False) + expected = df.loc[[6]] + tm.assert_frame_equal(result, expected) + + # nan + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1.0, np.nan, np.nan, np.nan, 1.0, 1.0, 1, 1.0], + "D": range(8), + } + ) + # single column + result = df.drop_duplicates("C") + expected = df[:2] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("C", keep="last") + expected = df.loc[[3, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("C", keep=False) + expected = df.loc[[]] # empty df + tm.assert_frame_equal(result, expected) + assert len(result) == 0 + + # multi column + result = df.drop_duplicates(["C", "B"]) + expected = df.loc[[0, 1, 2, 4]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["C", "B"], keep="last") + expected = df.loc[[1, 3, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates(["C", "B"], keep=False) + expected = df.loc[[1]] + tm.assert_frame_equal(result, expected) + + +def test_drop_duplicates_NA_for_take_all(): + # none + df = DataFrame( + { + "A": [None, None, "foo", "bar", "foo", "baz", "bar", "qux"], + "C": [1.0, np.nan, np.nan, np.nan, 1.0, 2.0, 3, 1.0], + } + ) + + # single column + result = df.drop_duplicates("A") + expected = df.iloc[[0, 2, 3, 5, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("A", keep="last") + expected = df.iloc[[1, 4, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("A", keep=False) + expected = df.iloc[[5, 7]] + tm.assert_frame_equal(result, expected) + + # nan + + # single column + result = df.drop_duplicates("C") + expected = df.iloc[[0, 1, 5, 6]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("C", keep="last") + expected = df.iloc[[3, 5, 6, 7]] + tm.assert_frame_equal(result, expected) + + result = df.drop_duplicates("C", keep=False) + expected = df.iloc[[5, 6]] + tm.assert_frame_equal(result, expected) + + +def test_drop_duplicates_inplace(): + orig = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "bar", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": [1, 1, 2, 2, 2, 2, 1, 2], + "D": range(8), + } + ) + # single column + df = orig.copy() + df.drop_duplicates("A", inplace=True) + expected = orig[:2] + result = df + tm.assert_frame_equal(result, expected) + + df = orig.copy() + df.drop_duplicates("A", keep="last", inplace=True) + expected = orig.loc[[6, 7]] + result = df + tm.assert_frame_equal(result, expected) + + df = orig.copy() + df.drop_duplicates("A", keep=False, inplace=True) + expected = orig.loc[[]] + result = df + tm.assert_frame_equal(result, expected) + assert len(df) == 0 + + # multi column + df = orig.copy() + df.drop_duplicates(["A", "B"], inplace=True) + expected = orig.loc[[0, 1, 2, 3]] + result = df + tm.assert_frame_equal(result, expected) + + df = orig.copy() + df.drop_duplicates(["A", "B"], keep="last", inplace=True) + expected = orig.loc[[0, 5, 6, 7]] + result = df + tm.assert_frame_equal(result, expected) + + df = orig.copy() + df.drop_duplicates(["A", "B"], keep=False, inplace=True) + expected = orig.loc[[0]] + result = df + tm.assert_frame_equal(result, expected) + + # consider everything + orig2 = orig.loc[:, ["A", "B", "C"]].copy() + + df2 = orig2.copy() + df2.drop_duplicates(inplace=True) + # in this case only + expected = orig2.drop_duplicates(["A", "B"]) + result = df2 + tm.assert_frame_equal(result, expected) + + df2 = orig2.copy() + df2.drop_duplicates(keep="last", inplace=True) + expected = orig2.drop_duplicates(["A", "B"], keep="last") + result = df2 + tm.assert_frame_equal(result, expected) + + df2 = orig2.copy() + df2.drop_duplicates(keep=False, inplace=True) + expected = orig2.drop_duplicates(["A", "B"], keep=False) + result = df2 + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize( + "origin_dict, output_dict, ignore_index, output_index", + [ + ({"A": [2, 2, 3]}, {"A": [2, 3]}, True, [0, 1]), + ({"A": [2, 2, 3]}, {"A": [2, 3]}, False, [0, 2]), + ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, True, [0, 1]), + ({"A": [2, 2, 3], "B": [2, 2, 4]}, {"A": [2, 3], "B": [2, 4]}, False, [0, 2]), + ], +) +def test_drop_duplicates_ignore_index( + inplace, origin_dict, output_dict, ignore_index, output_index +): + # GH 30114 + df = DataFrame(origin_dict) + expected = DataFrame(output_dict, index=output_index) + + if inplace: + result_df = df.copy() + result_df.drop_duplicates(ignore_index=ignore_index, inplace=inplace) + else: + result_df = df.drop_duplicates(ignore_index=ignore_index, inplace=inplace) + + tm.assert_frame_equal(result_df, expected) + tm.assert_frame_equal(df, DataFrame(origin_dict)) diff --git a/pandas/tests/frame/methods/test_duplicated.py b/pandas/tests/frame/methods/test_duplicated.py new file mode 100644 index 00000000..72eec875 --- /dev/null +++ b/pandas/tests/frame/methods/test_duplicated.py @@ -0,0 +1,100 @@ +import re + +import numpy as np +import pytest + +from pandas import DataFrame, Series +import pandas._testing as tm + + +@pytest.mark.parametrize("subset", ["a", ["a"], ["a", "B"]]) +def test_duplicated_with_misspelled_column_name(subset): + # GH 19730 + df = DataFrame({"A": [0, 0, 1], "B": [0, 0, 1], "C": [0, 0, 1]}) + msg = re.escape("Index(['a'], dtype='object')") + + with pytest.raises(KeyError, match=msg): + df.duplicated(subset) + + +@pytest.mark.slow +def test_duplicated_do_not_fail_on_wide_dataframes(): + # gh-21524 + # Given the wide dataframe with a lot of columns + # with different (important!) values + data = { + "col_{0:02d}".format(i): np.random.randint(0, 1000, 30000) for i in range(100) + } + df = DataFrame(data).T + result = df.duplicated() + + # Then duplicates produce the bool Series as a result and don't fail during + # calculation. Actual values doesn't matter here, though usually it's all + # False in this case + assert isinstance(result, Series) + assert result.dtype == np.bool + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, True, False, True])), + ("last", Series([True, True, False, False, False])), + (False, Series([True, True, True, False, True])), + ], +) +def test_duplicated_keep(keep, expected): + df = DataFrame({"A": [0, 1, 1, 2, 0], "B": ["a", "b", "b", "c", "a"]}) + + result = df.duplicated(keep=keep) + tm.assert_series_equal(result, expected) + + +@pytest.mark.xfail(reason="GH#21720; nan/None falsely considered equal") +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, True, False, True])), + ("last", Series([True, True, False, False, False])), + (False, Series([True, True, True, False, True])), + ], +) +def test_duplicated_nan_none(keep, expected): + df = DataFrame({"C": [np.nan, 3, 3, None, np.nan]}, dtype=object) + + result = df.duplicated(keep=keep) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("keep", ["first", "last", False]) +@pytest.mark.parametrize("subset", [None, ["A", "B"], "A"]) +def test_duplicated_subset(subset, keep): + df = DataFrame( + { + "A": [0, 1, 1, 2, 0], + "B": ["a", "b", "b", "c", "a"], + "C": [np.nan, 3, 3, None, np.nan], + } + ) + + if subset is None: + subset = list(df.columns) + elif isinstance(subset, str): + # need to have a DataFrame, not a Series + # -> select columns with singleton list, not string + subset = [subset] + + expected = df[subset].duplicated(keep=keep) + result = df.duplicated(keep=keep, subset=subset) + tm.assert_series_equal(result, expected) + + +def test_duplicated_on_empty_frame(): + # GH 25184 + + df = DataFrame(columns=["a", "b"]) + dupes = df.duplicated("a") + + result = df[dupes] + expected = df.copy() + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py new file mode 100644 index 00000000..76c87ed3 --- /dev/null +++ b/pandas/tests/frame/methods/test_explode.py @@ -0,0 +1,164 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_error(): + df = pd.DataFrame( + {"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1} + ) + with pytest.raises(ValueError): + df.explode(list("AA")) + + df.columns = list("AA") + with pytest.raises(ValueError): + df.explode("A") + + +def test_basic(): + df = pd.DataFrame( + {"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")), "B": 1} + ) + result = df.explode("A") + expected = pd.DataFrame( + { + "A": pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object + ), + "B": 1, + } + ) + tm.assert_frame_equal(result, expected) + + +def test_multi_index_rows(): + df = pd.DataFrame( + {"A": np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), "B": 1}, + index=pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]), + ) + + result = df.explode("A") + expected = pd.DataFrame( + { + "A": pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], + index=pd.MultiIndex.from_tuples( + [ + ("a", 1), + ("a", 1), + ("a", 1), + ("a", 2), + ("b", 1), + ("b", 2), + ("b", 2), + ] + ), + dtype=object, + ), + "B": 1, + } + ) + tm.assert_frame_equal(result, expected) + + +def test_multi_index_columns(): + df = pd.DataFrame( + {("A", 1): np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), ("A", 2): 1} + ) + + result = df.explode(("A", 1)) + expected = pd.DataFrame( + { + ("A", 1): pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], + index=pd.Index([0, 0, 0, 1, 2, 3, 3]), + dtype=object, + ), + ("A", 2): 1, + } + ) + tm.assert_frame_equal(result, expected) + + +def test_usecase(): + # explode a single column + # gh-10511 + df = pd.DataFrame( + [[11, range(5), 10], [22, range(3), 20]], columns=list("ABC") + ).set_index("C") + result = df.explode("B") + + expected = pd.DataFrame( + { + "A": [11, 11, 11, 11, 11, 22, 22, 22], + "B": np.array([0, 1, 2, 3, 4, 0, 1, 2], dtype=object), + "C": [10, 10, 10, 10, 10, 20, 20, 20], + }, + columns=list("ABC"), + ).set_index("C") + + tm.assert_frame_equal(result, expected) + + # gh-8517 + df = pd.DataFrame( + [["2014-01-01", "Alice", "A B"], ["2014-01-02", "Bob", "C D"]], + columns=["dt", "name", "text"], + ) + result = df.assign(text=df.text.str.split(" ")).explode("text") + expected = pd.DataFrame( + [ + ["2014-01-01", "Alice", "A"], + ["2014-01-01", "Alice", "B"], + ["2014-01-02", "Bob", "C"], + ["2014-01-02", "Bob", "D"], + ], + columns=["dt", "name", "text"], + index=[0, 0, 1, 1], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "input_dict, input_index, expected_dict, expected_index", + [ + ( + {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]}, + [0, 0], + {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]}, + [0, 0, 0, 0], + ), + ( + {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]}, + pd.Index([0, 0], name="my_index"), + {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]}, + pd.Index([0, 0, 0, 0], name="my_index"), + ), + ( + {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]}, + pd.MultiIndex.from_arrays( + [[0, 0], [1, 1]], names=["my_first_index", "my_second_index"] + ), + {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]}, + pd.MultiIndex.from_arrays( + [[0, 0, 0, 0], [1, 1, 1, 1]], + names=["my_first_index", "my_second_index"], + ), + ), + ( + {"col1": [[1, 2], [3, 4]], "col2": ["foo", "bar"]}, + pd.MultiIndex.from_arrays([[0, 0], [1, 1]], names=["my_index", None]), + {"col1": [1, 2, 3, 4], "col2": ["foo", "foo", "bar", "bar"]}, + pd.MultiIndex.from_arrays( + [[0, 0, 0, 0], [1, 1, 1, 1]], names=["my_index", None] + ), + ), + ], +) +def test_duplicate_index(input_dict, input_index, expected_dict, expected_index): + # GH 28005 + df = pd.DataFrame(input_dict, index=input_index) + result = df.explode("col1") + expected = pd.DataFrame(expected_dict, index=expected_index, dtype=object) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_isin.py b/pandas/tests/frame/methods/test_isin.py new file mode 100644 index 00000000..0eb94afc --- /dev/null +++ b/pandas/tests/frame/methods/test_isin.py @@ -0,0 +1,186 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, MultiIndex, Series +import pandas._testing as tm + + +class TestDataFrameIsIn: + def test_isin(self): + # GH#4211 + df = DataFrame( + { + "vals": [1, 2, 3, 4], + "ids": ["a", "b", "f", "n"], + "ids2": ["a", "n", "c", "n"], + }, + index=["foo", "bar", "baz", "qux"], + ) + other = ["a", "b", "c"] + + result = df.isin(other) + expected = DataFrame([df.loc[s].isin(other) for s in df.index]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) + def test_isin_empty(self, empty): + # GH#16991 + df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]}) + expected = DataFrame(False, df.index, df.columns) + + result = df.isin(empty) + tm.assert_frame_equal(result, expected) + + def test_isin_dict(self): + df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]}) + d = {"A": ["a"]} + + expected = DataFrame(False, df.index, df.columns) + expected.loc[0, "A"] = True + + result = df.isin(d) + tm.assert_frame_equal(result, expected) + + # non unique columns + df = DataFrame({"A": ["a", "b", "c"], "B": ["a", "e", "f"]}) + df.columns = ["A", "A"] + expected = DataFrame(False, df.index, df.columns) + expected.loc[0, "A"] = True + result = df.isin(d) + tm.assert_frame_equal(result, expected) + + def test_isin_with_string_scalar(self): + # GH#4763 + df = DataFrame( + { + "vals": [1, 2, 3, 4], + "ids": ["a", "b", "f", "n"], + "ids2": ["a", "n", "c", "n"], + }, + index=["foo", "bar", "baz", "qux"], + ) + with pytest.raises(TypeError): + df.isin("a") + + with pytest.raises(TypeError): + df.isin("aaa") + + def test_isin_df(self): + df1 = DataFrame({"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]}) + df2 = DataFrame({"A": [0, 2, 12, 4], "B": [2, np.nan, 4, 5]}) + expected = DataFrame(False, df1.index, df1.columns) + result = df1.isin(df2) + expected["A"].loc[[1, 3]] = True + expected["B"].loc[[0, 2]] = True + tm.assert_frame_equal(result, expected) + + # partial overlapping columns + df2.columns = ["A", "C"] + result = df1.isin(df2) + expected["B"] = False + tm.assert_frame_equal(result, expected) + + def test_isin_tuples(self): + # GH#16394 + df = pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "f"]}) + df["C"] = list(zip(df["A"], df["B"])) + result = df["C"].isin([(1, "a")]) + tm.assert_series_equal(result, Series([True, False, False], name="C")) + + def test_isin_df_dupe_values(self): + df1 = DataFrame({"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]}) + # just cols duped + df2 = DataFrame([[0, 2], [12, 4], [2, np.nan], [4, 5]], columns=["B", "B"]) + with pytest.raises(ValueError): + df1.isin(df2) + + # just index duped + df2 = DataFrame( + [[0, 2], [12, 4], [2, np.nan], [4, 5]], + columns=["A", "B"], + index=[0, 0, 1, 1], + ) + with pytest.raises(ValueError): + df1.isin(df2) + + # cols and index: + df2.columns = ["B", "B"] + with pytest.raises(ValueError): + df1.isin(df2) + + def test_isin_dupe_self(self): + other = DataFrame({"A": [1, 0, 1, 0], "B": [1, 1, 0, 0]}) + df = DataFrame([[1, 1], [1, 0], [0, 0]], columns=["A", "A"]) + result = df.isin(other) + expected = DataFrame(False, index=df.index, columns=df.columns) + expected.loc[0] = True + expected.iloc[1, 1] = True + tm.assert_frame_equal(result, expected) + + def test_isin_against_series(self): + df = pd.DataFrame( + {"A": [1, 2, 3, 4], "B": [2, np.nan, 4, 4]}, index=["a", "b", "c", "d"] + ) + s = pd.Series([1, 3, 11, 4], index=["a", "b", "c", "d"]) + expected = DataFrame(False, index=df.index, columns=df.columns) + expected["A"].loc["a"] = True + expected.loc["d"] = True + result = df.isin(s) + tm.assert_frame_equal(result, expected) + + def test_isin_multiIndex(self): + idx = MultiIndex.from_tuples( + [ + (0, "a", "foo"), + (0, "a", "bar"), + (0, "b", "bar"), + (0, "b", "baz"), + (2, "a", "foo"), + (2, "a", "bar"), + (2, "c", "bar"), + (2, "c", "baz"), + (1, "b", "foo"), + (1, "b", "bar"), + (1, "c", "bar"), + (1, "c", "baz"), + ] + ) + df1 = DataFrame({"A": np.ones(12), "B": np.zeros(12)}, index=idx) + df2 = DataFrame( + { + "A": [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], + "B": [1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1], + } + ) + # against regular index + expected = DataFrame(False, index=df1.index, columns=df1.columns) + result = df1.isin(df2) + tm.assert_frame_equal(result, expected) + + df2.index = idx + expected = df2.values.astype(np.bool) + expected[:, 1] = ~expected[:, 1] + expected = DataFrame(expected, columns=["A", "B"], index=idx) + + result = df1.isin(df2) + tm.assert_frame_equal(result, expected) + + def test_isin_empty_datetimelike(self): + # GH#15473 + df1_ts = DataFrame({"date": pd.to_datetime(["2014-01-01", "2014-01-02"])}) + df1_td = DataFrame({"date": [pd.Timedelta(1, "s"), pd.Timedelta(2, "s")]}) + df2 = DataFrame({"date": []}) + df3 = DataFrame() + + expected = DataFrame({"date": [False, False]}) + + result = df1_ts.isin(df2) + tm.assert_frame_equal(result, expected) + result = df1_ts.isin(df3) + tm.assert_frame_equal(result, expected) + + result = df1_td.isin(df2) + tm.assert_frame_equal(result, expected) + result = df1_td.isin(df3) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py new file mode 100644 index 00000000..4ce47423 --- /dev/null +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -0,0 +1,211 @@ +""" +Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo" +but are implicitly also testing nsmallest_foo. +""" +from string import ascii_lowercase + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.fixture +def df_duplicates(): + return pd.DataFrame( + {"a": [1, 2, 3, 4, 4], "b": [1, 1, 1, 1, 1], "c": [0, 1, 2, 5, 4]}, + index=[0, 0, 1, 1, 1], + ) + + +@pytest.fixture +def df_strings(): + return pd.DataFrame( + { + "a": np.random.permutation(10), + "b": list(ascii_lowercase[:10]), + "c": np.random.permutation(10).astype("float64"), + } + ) + + +@pytest.fixture +def df_main_dtypes(): + return pd.DataFrame( + { + "group": [1, 1, 2], + "int": [1, 2, 3], + "float": [4.0, 5.0, 6.0], + "string": list("abc"), + "category_string": pd.Series(list("abc")).astype("category"), + "category_int": [7, 8, 9], + "datetime": pd.date_range("20130101", periods=3), + "datetimetz": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), + }, + columns=[ + "group", + "int", + "float", + "string", + "category_string", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ], + ) + + +class TestNLargestNSmallest: + + # ---------------------------------------------------------------------- + # Top / bottom + @pytest.mark.parametrize( + "order", + [ + ["a"], + ["c"], + ["a", "b"], + ["a", "c"], + ["b", "a"], + ["b", "c"], + ["a", "b", "c"], + ["c", "a", "b"], + ["c", "b", "a"], + ["b", "c", "a"], + ["b", "a", "c"], + # dups! + ["b", "c", "c"], + ], + ) + @pytest.mark.parametrize("n", range(1, 11)) + def test_nlargest_n(self, df_strings, nselect_method, n, order): + # GH#10393 + df = df_strings + if "b" in order: + + error_msg = ( + f"Column 'b' has dtype object, " + f"cannot use method '{nselect_method}' with this dtype" + ) + with pytest.raises(TypeError, match=error_msg): + getattr(df, nselect_method)(n, order) + else: + ascending = nselect_method == "nsmallest" + result = getattr(df, nselect_method)(n, order) + expected = df.sort_values(order, ascending=ascending).head(n) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "columns", [["group", "category_string"], ["group", "string"]] + ) + def test_nlargest_error(self, df_main_dtypes, nselect_method, columns): + df = df_main_dtypes + col = columns[1] + error_msg = ( + f"Column '{col}' has dtype {df[col].dtype}, " + f"cannot use method '{nselect_method}' with this dtype" + ) + # escape some characters that may be in the repr + error_msg = ( + error_msg.replace("(", "\\(") + .replace(")", "\\)") + .replace("[", "\\[") + .replace("]", "\\]") + ) + with pytest.raises(TypeError, match=error_msg): + getattr(df, nselect_method)(2, columns) + + def test_nlargest_all_dtypes(self, df_main_dtypes): + df = df_main_dtypes + df.nsmallest(2, list(set(df) - {"category_string", "string"})) + df.nlargest(2, list(set(df) - {"category_string", "string"})) + + def test_nlargest_duplicates_on_starter_columns(self): + # regression test for GH#22752 + + df = pd.DataFrame({"a": [2, 2, 2, 1, 1, 1], "b": [1, 2, 3, 3, 2, 1]}) + + result = df.nlargest(4, columns=["a", "b"]) + expected = pd.DataFrame( + {"a": [2, 2, 2, 1], "b": [3, 2, 1, 3]}, index=[2, 1, 0, 3] + ) + tm.assert_frame_equal(result, expected) + + result = df.nsmallest(4, columns=["a", "b"]) + expected = pd.DataFrame( + {"a": [1, 1, 1, 2], "b": [1, 2, 3, 1]}, index=[5, 4, 3, 0] + ) + tm.assert_frame_equal(result, expected) + + def test_nlargest_n_identical_values(self): + # GH#15297 + df = pd.DataFrame({"a": [1] * 5, "b": [1, 2, 3, 4, 5]}) + + result = df.nlargest(3, "a") + expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}, index=[0, 1, 2]) + tm.assert_frame_equal(result, expected) + + result = df.nsmallest(3, "a") + expected = pd.DataFrame({"a": [1] * 3, "b": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "order", + [["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]], + ) + @pytest.mark.parametrize("n", range(1, 6)) + def test_nlargest_n_duplicate_index(self, df_duplicates, n, order): + # GH#13412 + + df = df_duplicates + result = df.nsmallest(n, order) + expected = df.sort_values(order).head(n) + tm.assert_frame_equal(result, expected) + + result = df.nlargest(n, order) + expected = df.sort_values(order, ascending=False).head(n) + tm.assert_frame_equal(result, expected) + + def test_nlargest_duplicate_keep_all_ties(self): + # GH#16818 + df = pd.DataFrame( + {"a": [5, 4, 4, 2, 3, 3, 3, 3], "b": [10, 9, 8, 7, 5, 50, 10, 20]} + ) + result = df.nlargest(4, "a", keep="all") + expected = pd.DataFrame( + { + "a": {0: 5, 1: 4, 2: 4, 4: 3, 5: 3, 6: 3, 7: 3}, + "b": {0: 10, 1: 9, 2: 8, 4: 5, 5: 50, 6: 10, 7: 20}, + } + ) + tm.assert_frame_equal(result, expected) + + result = df.nsmallest(2, "a", keep="all") + expected = pd.DataFrame( + { + "a": {3: 2, 4: 3, 5: 3, 6: 3, 7: 3}, + "b": {3: 7, 4: 5, 5: 50, 6: 10, 7: 20}, + } + ) + tm.assert_frame_equal(result, expected) + + def test_nlargest_multiindex_column_lookup(self): + # Check whether tuples are correctly treated as multi-level lookups. + # GH#23033 + df = pd.DataFrame( + columns=pd.MultiIndex.from_product([["x"], ["a", "b"]]), + data=[[0.33, 0.13], [0.86, 0.25], [0.25, 0.70], [0.85, 0.91]], + ) + + # nsmallest + result = df.nsmallest(3, ("x", "a")) + expected = df.iloc[[2, 0, 3]] + tm.assert_frame_equal(result, expected) + + # nlargest + result = df.nlargest(3, ("x", "b")) + expected = df.iloc[[3, 2, 1]] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_pct_change.py b/pandas/tests/frame/methods/test_pct_change.py new file mode 100644 index 00000000..8f3f37fb --- /dev/null +++ b/pandas/tests/frame/methods/test_pct_change.py @@ -0,0 +1,96 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Series +import pandas._testing as tm + + +class TestDataFramePctChange: + def test_pct_change_numeric(self): + # GH#11150 + pnl = DataFrame( + [np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange(0, 40, 10)] + ).astype(np.float64) + pnl.iat[1, 0] = np.nan + pnl.iat[1, 1] = np.nan + pnl.iat[2, 3] = 60 + + for axis in range(2): + expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift(axis=axis) - 1 + result = pnl.pct_change(axis=axis, fill_method="pad") + + tm.assert_frame_equal(result, expected) + + def test_pct_change(self, datetime_frame): + rs = datetime_frame.pct_change(fill_method=None) + tm.assert_frame_equal(rs, datetime_frame / datetime_frame.shift(1) - 1) + + rs = datetime_frame.pct_change(2) + filled = datetime_frame.fillna(method="pad") + tm.assert_frame_equal(rs, filled / filled.shift(2) - 1) + + rs = datetime_frame.pct_change(fill_method="bfill", limit=1) + filled = datetime_frame.fillna(method="bfill", limit=1) + tm.assert_frame_equal(rs, filled / filled.shift(1) - 1) + + rs = datetime_frame.pct_change(freq="5D") + filled = datetime_frame.fillna(method="pad") + tm.assert_frame_equal( + rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) + ) + + def test_pct_change_shift_over_nas(self): + s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) + + df = DataFrame({"a": s, "b": s}) + + chg = df.pct_change() + expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) + edf = DataFrame({"a": expected, "b": expected}) + tm.assert_frame_equal(chg, edf) + + @pytest.mark.parametrize( + "freq, periods, fill_method, limit", + [ + ("5B", 5, None, None), + ("3B", 3, None, None), + ("3B", 3, "bfill", None), + ("7B", 7, "pad", 1), + ("7B", 7, "bfill", 3), + ("14B", 14, None, None), + ], + ) + def test_pct_change_periods_freq( + self, datetime_frame, freq, periods, fill_method, limit + ): + # GH#7292 + rs_freq = datetime_frame.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + rs_periods = datetime_frame.pct_change( + periods, fill_method=fill_method, limit=limit + ) + tm.assert_frame_equal(rs_freq, rs_periods) + + empty_ts = DataFrame(index=datetime_frame.index, columns=datetime_frame.columns) + rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) + rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) + tm.assert_frame_equal(rs_freq, rs_periods) + + +@pytest.mark.parametrize("fill_method", ["pad", "ffill", None]) +def test_pct_change_with_duplicated_indices(fill_method): + # GH30463 + data = DataFrame( + {0: [np.nan, 1, 2, 3, 9, 18], 1: [0, 1, np.nan, 3, 9, 18]}, index=["a", "b"] * 3 + ) + result = data.pct_change(fill_method=fill_method) + if fill_method is None: + second_column = [np.nan, np.inf, np.nan, np.nan, 2.0, 1.0] + else: + second_column = [np.nan, np.inf, 0.0, 2.0, 2.0, 1.0] + expected = DataFrame( + {0: [np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], 1: second_column}, + index=["a", "b"] * 3, + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py new file mode 100644 index 00000000..64461c08 --- /dev/null +++ b/pandas/tests/frame/methods/test_quantile.py @@ -0,0 +1,492 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, Timestamp +import pandas._testing as tm + + +class TestDataFrameQuantile: + def test_quantile_sparse(self): + # GH#17198 + s = pd.Series(pd.arrays.SparseArray([1, 2])) + s1 = pd.Series(pd.arrays.SparseArray([3, 4])) + df = pd.DataFrame({0: s, 1: s1}) + result = df.quantile() + + expected = pd.Series([1.5, 3.5], name=0.5) + tm.assert_series_equal(result, expected) + + def test_quantile(self, datetime_frame): + from numpy import percentile + + df = datetime_frame + q = df.quantile(0.1, axis=0) + assert q["A"] == percentile(df["A"], 10) + tm.assert_index_equal(q.index, df.columns) + + q = df.quantile(0.9, axis=1) + assert q["2000-01-17"] == percentile(df.loc["2000-01-17"], 90) + tm.assert_index_equal(q.index, df.index) + + # test degenerate case + q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0) + assert np.isnan(q["x"]) and np.isnan(q["y"]) + + # non-numeric exclusion + df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) + rs = df.quantile(0.5) + xp = df.median().rename(0.5) + tm.assert_series_equal(rs, xp) + + # axis + df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) + result = df.quantile(0.5, axis=1) + expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) + tm.assert_series_equal(result, expected) + + result = df.quantile([0.5, 0.75], axis=1) + expected = DataFrame( + {1: [1.5, 1.75], 2: [2.5, 2.75], 3: [3.5, 3.75]}, index=[0.5, 0.75] + ) + tm.assert_frame_equal(result, expected, check_index_type=True) + + # We may want to break API in the future to change this + # so that we exclude non-numeric along the same axis + # See GH #7312 + df = DataFrame([[1, 2, 3], ["a", "b", 4]]) + result = df.quantile(0.5, axis=1) + expected = Series([3.0, 4.0], index=[0, 1], name=0.5) + tm.assert_series_equal(result, expected) + + def test_quantile_axis_mixed(self): + + # mixed on axis=1 + df = DataFrame( + { + "A": [1, 2, 3], + "B": [2.0, 3.0, 4.0], + "C": pd.date_range("20130101", periods=3), + "D": ["foo", "bar", "baz"], + } + ) + result = df.quantile(0.5, axis=1) + expected = Series([1.5, 2.5, 3.5], name=0.5) + tm.assert_series_equal(result, expected) + + # must raise + with pytest.raises(TypeError): + df.quantile(0.5, axis=1, numeric_only=False) + + def test_quantile_axis_parameter(self): + # GH 9543/9544 + + df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) + + result = df.quantile(0.5, axis=0) + + expected = Series([2.0, 3.0], index=["A", "B"], name=0.5) + tm.assert_series_equal(result, expected) + + expected = df.quantile(0.5, axis="index") + tm.assert_series_equal(result, expected) + + result = df.quantile(0.5, axis=1) + + expected = Series([1.5, 2.5, 3.5], index=[1, 2, 3], name=0.5) + tm.assert_series_equal(result, expected) + + result = df.quantile(0.5, axis="columns") + tm.assert_series_equal(result, expected) + + msg = "No axis named -1 for object type " + with pytest.raises(ValueError, match=msg): + df.quantile(0.1, axis=-1) + msg = ( + "No axis named column for object type " + "" + ) + with pytest.raises(ValueError, match=msg): + df.quantile(0.1, axis="column") + + def test_quantile_interpolation(self): + # see gh-10174 + + # interpolation method other than default linear + df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3]) + result = df.quantile(0.5, axis=1, interpolation="nearest") + expected = Series([1, 2, 3], index=[1, 2, 3], name=0.5) + tm.assert_series_equal(result, expected) + + # cross-check interpolation=nearest results in original dtype + exp = np.percentile( + np.array([[1, 2, 3], [2, 3, 4]]), 0.5, axis=0, interpolation="nearest" + ) + expected = Series(exp, index=[1, 2, 3], name=0.5, dtype="int64") + tm.assert_series_equal(result, expected) + + # float + df = DataFrame({"A": [1.0, 2.0, 3.0], "B": [2.0, 3.0, 4.0]}, index=[1, 2, 3]) + result = df.quantile(0.5, axis=1, interpolation="nearest") + expected = Series([1.0, 2.0, 3.0], index=[1, 2, 3], name=0.5) + tm.assert_series_equal(result, expected) + exp = np.percentile( + np.array([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]), + 0.5, + axis=0, + interpolation="nearest", + ) + expected = Series(exp, index=[1, 2, 3], name=0.5, dtype="float64") + tm.assert_series_equal(result, expected) + + # axis + result = df.quantile([0.5, 0.75], axis=1, interpolation="lower") + expected = DataFrame( + {1: [1.0, 1.0], 2: [2.0, 2.0], 3: [3.0, 3.0]}, index=[0.5, 0.75] + ) + tm.assert_frame_equal(result, expected) + + # test degenerate case + df = DataFrame({"x": [], "y": []}) + q = df.quantile(0.1, axis=0, interpolation="higher") + assert np.isnan(q["x"]) and np.isnan(q["y"]) + + # multi + df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) + result = df.quantile([0.25, 0.5], interpolation="midpoint") + + # https://github.com/numpy/numpy/issues/7163 + expected = DataFrame( + [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], + index=[0.25, 0.5], + columns=["a", "b", "c"], + ) + tm.assert_frame_equal(result, expected) + + def test_quantile_interpolation_datetime(self, datetime_frame): + # see gh-10174 + + # interpolation = linear (default case) + df = datetime_frame + q = df.quantile(0.1, axis=0, interpolation="linear") + assert q["A"] == np.percentile(df["A"], 10) + + def test_quantile_interpolation_int(self, int_frame): + # see gh-10174 + + df = int_frame + # interpolation = linear (default case) + q = df.quantile(0.1) + assert q["A"] == np.percentile(df["A"], 10) + + # test with and without interpolation keyword + q1 = df.quantile(0.1, axis=0, interpolation="linear") + assert q1["A"] == np.percentile(df["A"], 10) + tm.assert_series_equal(q, q1) + + def test_quantile_multi(self): + df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]], columns=["a", "b", "c"]) + result = df.quantile([0.25, 0.5]) + expected = DataFrame( + [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], + index=[0.25, 0.5], + columns=["a", "b", "c"], + ) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.quantile([0.25, 0.5], axis=1) + expected = DataFrame( + [[1.5, 1.5, 1.5], [2.0, 2.0, 2.0]], index=[0.25, 0.5], columns=[0, 1, 2] + ) + + # empty + result = DataFrame({"x": [], "y": []}).quantile([0.1, 0.9], axis=0) + expected = DataFrame( + {"x": [np.nan, np.nan], "y": [np.nan, np.nan]}, index=[0.1, 0.9] + ) + tm.assert_frame_equal(result, expected) + + def test_quantile_datetime(self): + df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]}) + + # exclude datetime + result = df.quantile(0.5) + expected = Series([2.5], index=["b"]) + + # datetime + result = df.quantile(0.5, numeric_only=False) + expected = Series( + [Timestamp("2010-07-02 12:00:00"), 2.5], index=["a", "b"], name=0.5 + ) + tm.assert_series_equal(result, expected) + + # datetime w/ multi + result = df.quantile([0.5], numeric_only=False) + expected = DataFrame( + [[Timestamp("2010-07-02 12:00:00"), 2.5]], index=[0.5], columns=["a", "b"] + ) + tm.assert_frame_equal(result, expected) + + # axis = 1 + df["c"] = pd.to_datetime(["2011", "2012"]) + result = df[["a", "c"]].quantile(0.5, axis=1, numeric_only=False) + expected = Series( + [Timestamp("2010-07-02 12:00:00"), Timestamp("2011-07-02 12:00:00")], + index=[0, 1], + name=0.5, + ) + tm.assert_series_equal(result, expected) + + result = df[["a", "c"]].quantile([0.5], axis=1, numeric_only=False) + expected = DataFrame( + [[Timestamp("2010-07-02 12:00:00"), Timestamp("2011-07-02 12:00:00")]], + index=[0.5], + columns=[0, 1], + ) + tm.assert_frame_equal(result, expected) + + # empty when numeric_only=True + # FIXME (gives empty frame in 0.18.1, broken in 0.19.0) + # result = df[['a', 'c']].quantile(.5) + # result = df[['a', 'c']].quantile([.5]) + + def test_quantile_invalid(self, datetime_frame): + msg = "percentiles should all be in the interval \\[0, 1\\]" + for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: + with pytest.raises(ValueError, match=msg): + datetime_frame.quantile(invalid) + + def test_quantile_box(self): + df = DataFrame( + { + "A": [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + ], + "B": [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timestamp("2011-01-03", tz="US/Eastern"), + ], + "C": [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + ], + } + ) + + res = df.quantile(0.5, numeric_only=False) + + exp = pd.Series( + [ + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timedelta("2 days"), + ], + name=0.5, + index=["A", "B", "C"], + ) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5], numeric_only=False) + exp = pd.DataFrame( + [ + [ + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timedelta("2 days"), + ] + ], + index=[0.5], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(res, exp) + + # DatetimeBlock may be consolidated and contain NaT in different loc + df = DataFrame( + { + "A": [ + pd.Timestamp("2011-01-01"), + pd.NaT, + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + ], + "a": [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.NaT, + pd.Timestamp("2011-01-03"), + ], + "B": [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.NaT, + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timestamp("2011-01-03", tz="US/Eastern"), + ], + "b": [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.NaT, + pd.Timestamp("2011-01-03", tz="US/Eastern"), + ], + "C": [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + pd.NaT, + ], + "c": [ + pd.NaT, + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + ], + }, + columns=list("AaBbCc"), + ) + + res = df.quantile(0.5, numeric_only=False) + exp = pd.Series( + [ + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timedelta("2 days"), + pd.Timedelta("2 days"), + ], + name=0.5, + index=list("AaBbCc"), + ) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5], numeric_only=False) + exp = pd.DataFrame( + [ + [ + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timedelta("2 days"), + pd.Timedelta("2 days"), + ] + ], + index=[0.5], + columns=list("AaBbCc"), + ) + tm.assert_frame_equal(res, exp) + + def test_quantile_nan(self): + + # GH 14357 - float block where some cols have missing values + df = DataFrame({"a": np.arange(1, 6.0), "b": np.arange(1, 6.0)}) + df.iloc[-1, 1] = np.nan + + res = df.quantile(0.5) + exp = Series([3.0, 2.5], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5, 0.75]) + exp = DataFrame({"a": [3.0, 4.0], "b": [2.5, 3.25]}, index=[0.5, 0.75]) + tm.assert_frame_equal(res, exp) + + res = df.quantile(0.5, axis=1) + exp = Series(np.arange(1.0, 6.0), name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5, 0.75], axis=1) + exp = DataFrame([np.arange(1.0, 6.0)] * 2, index=[0.5, 0.75]) + tm.assert_frame_equal(res, exp) + + # full-nan column + df["b"] = np.nan + + res = df.quantile(0.5) + exp = Series([3.0, np.nan], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5, 0.75]) + exp = DataFrame({"a": [3.0, 4.0], "b": [np.nan, np.nan]}, index=[0.5, 0.75]) + tm.assert_frame_equal(res, exp) + + def test_quantile_nat(self): + + # full NaT column + df = DataFrame({"a": [pd.NaT, pd.NaT, pd.NaT]}) + + res = df.quantile(0.5, numeric_only=False) + exp = Series([pd.NaT], index=["a"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5], numeric_only=False) + exp = DataFrame({"a": [pd.NaT]}, index=[0.5]) + tm.assert_frame_equal(res, exp) + + # mixed non-null / full null column + df = DataFrame( + { + "a": [ + pd.Timestamp("2012-01-01"), + pd.Timestamp("2012-01-02"), + pd.Timestamp("2012-01-03"), + ], + "b": [pd.NaT, pd.NaT, pd.NaT], + } + ) + + res = df.quantile(0.5, numeric_only=False) + exp = Series([pd.Timestamp("2012-01-02"), pd.NaT], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5], numeric_only=False) + exp = DataFrame( + [[pd.Timestamp("2012-01-02"), pd.NaT]], index=[0.5], columns=["a", "b"] + ) + tm.assert_frame_equal(res, exp) + + def test_quantile_empty_no_rows(self): + + # floats + df = DataFrame(columns=["a", "b"], dtype="float64") + + res = df.quantile(0.5) + exp = Series([np.nan, np.nan], index=["a", "b"], name=0.5) + tm.assert_series_equal(res, exp) + + res = df.quantile([0.5]) + exp = DataFrame([[np.nan, np.nan]], columns=["a", "b"], index=[0.5]) + tm.assert_frame_equal(res, exp) + + # FIXME (gives empty frame in 0.18.1, broken in 0.19.0) + # res = df.quantile(0.5, axis=1) + # res = df.quantile([0.5], axis=1) + + # ints + df = DataFrame(columns=["a", "b"], dtype="int64") + + # FIXME (gives empty frame in 0.18.1, broken in 0.19.0) + # res = df.quantile(0.5) + + # datetimes + df = DataFrame(columns=["a", "b"], dtype="datetime64[ns]") + + # FIXME (gives NaNs instead of NaT in 0.18.1 or 0.19.0) + # res = df.quantile(0.5, numeric_only=False) + + def test_quantile_empty_no_columns(self): + # GH#23925 _get_numeric_data may drop all columns + df = pd.DataFrame(pd.date_range("1/1/18", periods=5)) + df.columns.name = "captain tightpants" + result = df.quantile(0.5) + expected = pd.Series([], index=[], name=0.5, dtype=np.float64) + expected.index.name = "captain tightpants" + tm.assert_series_equal(result, expected) + + result = df.quantile([0.5]) + expected = pd.DataFrame([], index=[0.5], columns=[]) + expected.columns.name = "captain tightpants" + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py new file mode 100644 index 00000000..bab2db31 --- /dev/null +++ b/pandas/tests/frame/methods/test_rank.py @@ -0,0 +1,331 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame, Series +import pandas._testing as tm + + +class TestRank: + s = Series([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3]) + df = DataFrame({"A": s, "B": s}) + + results = { + "average": np.array([1.5, 5.5, 7.0, 3.5, np.nan, 3.5, 1.5, 8.0, np.nan, 5.5]), + "min": np.array([1, 5, 7, 3, np.nan, 3, 1, 8, np.nan, 5]), + "max": np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6]), + "first": np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6]), + "dense": np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3]), + } + + @pytest.fixture(params=["average", "min", "max", "first", "dense"]) + def method(self, request): + """ + Fixture for trying all rank methods + """ + return request.param + + @td.skip_if_no_scipy + def test_rank(self, float_frame): + import scipy.stats # noqa:F401 + from scipy.stats import rankdata + + float_frame["A"][::2] = np.nan + float_frame["B"][::3] = np.nan + float_frame["C"][::4] = np.nan + float_frame["D"][::5] = np.nan + + ranks0 = float_frame.rank() + ranks1 = float_frame.rank(1) + mask = np.isnan(float_frame.values) + + fvals = float_frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fvals) + exp0[mask] = np.nan + + exp1 = np.apply_along_axis(rankdata, 1, fvals) + exp1[mask] = np.nan + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # integers + df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) + + result = df.rank() + exp = df.astype(float).rank() + tm.assert_frame_equal(result, exp) + + result = df.rank(1) + exp = df.astype(float).rank(1) + tm.assert_frame_equal(result, exp) + + def test_rank2(self): + df = DataFrame([[1, 3, 2], [1, 2, 3]]) + expected = DataFrame([[1.0, 3.0, 2.0], [1, 2, 3]]) / 3.0 + result = df.rank(1, pct=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame([[1, 3, 2], [1, 2, 3]]) + expected = df.rank(0) / 2.0 + result = df.rank(0, pct=True) + tm.assert_frame_equal(result, expected) + + df = DataFrame([["b", "c", "a"], ["a", "c", "b"]]) + expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]]) + result = df.rank(1, numeric_only=False) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]]) + result = df.rank(0, numeric_only=False) + tm.assert_frame_equal(result, expected) + + df = DataFrame([["b", np.nan, "a"], ["a", "c", "b"]]) + expected = DataFrame([[2.0, np.nan, 1.0], [1.0, 3.0, 2.0]]) + result = df.rank(1, numeric_only=False) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[2.0, np.nan, 1.0], [1.0, 1.0, 2.0]]) + result = df.rank(0, numeric_only=False) + tm.assert_frame_equal(result, expected) + + # f7u12, this does not work without extensive workaround + data = [ + [datetime(2001, 1, 5), np.nan, datetime(2001, 1, 2)], + [datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 1)], + ] + df = DataFrame(data) + + # check the rank + expected = DataFrame([[2.0, np.nan, 1.0], [2.0, 3.0, 1.0]]) + result = df.rank(1, numeric_only=False, ascending=True) + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[1.0, np.nan, 2.0], [2.0, 1.0, 3.0]]) + result = df.rank(1, numeric_only=False, ascending=False) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10, 1e60, 1e80, 1e-30]}) + exp = DataFrame({"a": [3.5, 1.0, 3.5, 5.0, 6.0, 7.0, 2.0]}) + tm.assert_frame_equal(df.rank(), exp) + + def test_rank_does_not_mutate(self): + # GH#18521 + # Check rank does not mutate DataFrame + df = DataFrame(np.random.randn(10, 3), dtype="float64") + expected = df.copy() + df.rank() + result = df + tm.assert_frame_equal(result, expected) + + def test_rank_mixed_frame(self, float_string_frame): + float_string_frame["datetime"] = datetime.now() + float_string_frame["timedelta"] = timedelta(days=1, seconds=1) + + result = float_string_frame.rank(1) + expected = float_string_frame.rank(1, numeric_only=True) + tm.assert_frame_equal(result, expected) + + @td.skip_if_no_scipy + def test_rank_na_option(self, float_frame): + import scipy.stats # noqa:F401 + from scipy.stats import rankdata + + float_frame["A"][::2] = np.nan + float_frame["B"][::3] = np.nan + float_frame["C"][::4] = np.nan + float_frame["D"][::5] = np.nan + + # bottom + ranks0 = float_frame.rank(na_option="bottom") + ranks1 = float_frame.rank(1, na_option="bottom") + + fvals = float_frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fvals) + exp1 = np.apply_along_axis(rankdata, 1, fvals) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # top + ranks0 = float_frame.rank(na_option="top") + ranks1 = float_frame.rank(1, na_option="top") + + fval0 = float_frame.fillna((float_frame.min() - 1).to_dict()).values + fval1 = float_frame.T + fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T + fval1 = fval1.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, fval0) + exp1 = np.apply_along_axis(rankdata, 1, fval1) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # descending + + # bottom + ranks0 = float_frame.rank(na_option="top", ascending=False) + ranks1 = float_frame.rank(1, na_option="top", ascending=False) + + fvals = float_frame.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, -fvals) + exp1 = np.apply_along_axis(rankdata, 1, -fvals) + + tm.assert_almost_equal(ranks0.values, exp0) + tm.assert_almost_equal(ranks1.values, exp1) + + # descending + + # top + ranks0 = float_frame.rank(na_option="bottom", ascending=False) + ranks1 = float_frame.rank(1, na_option="bottom", ascending=False) + + fval0 = float_frame.fillna((float_frame.min() - 1).to_dict()).values + fval1 = float_frame.T + fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T + fval1 = fval1.fillna(np.inf).values + + exp0 = np.apply_along_axis(rankdata, 0, -fval0) + exp1 = np.apply_along_axis(rankdata, 1, -fval1) + + tm.assert_numpy_array_equal(ranks0.values, exp0) + tm.assert_numpy_array_equal(ranks1.values, exp1) + + # bad values throw error + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + + with pytest.raises(ValueError, match=msg): + float_frame.rank(na_option="bad", ascending=False) + + # invalid type + with pytest.raises(ValueError, match=msg): + float_frame.rank(na_option=True, ascending=False) + + def test_rank_axis(self): + # check if using axes' names gives the same result + df = DataFrame([[2, 1], [4, 3]]) + tm.assert_frame_equal(df.rank(axis=0), df.rank(axis="index")) + tm.assert_frame_equal(df.rank(axis=1), df.rank(axis="columns")) + + @td.skip_if_no_scipy + def test_rank_methods_frame(self): + import scipy.stats # noqa:F401 + from scipy.stats import rankdata + + xs = np.random.randint(0, 21, (100, 26)) + xs = (xs - 10.0) / 10.0 + cols = [chr(ord("z") - i) for i in range(xs.shape[1])] + + for vals in [xs, xs + 1e6, xs * 1e-6]: + df = DataFrame(vals, columns=cols) + + for ax in [0, 1]: + for m in ["average", "min", "max", "first", "dense"]: + result = df.rank(axis=ax, method=m) + sprank = np.apply_along_axis( + rankdata, ax, vals, m if m != "first" else "ordinal" + ) + sprank = sprank.astype(np.float64) + expected = DataFrame(sprank, columns=cols).astype("float64") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) + def test_rank_descending(self, method, dtype): + + if "i" in dtype: + df = self.df.dropna() + else: + df = self.df.astype(dtype) + + res = df.rank(ascending=False) + expected = (df.max() - df).rank() + tm.assert_frame_equal(res, expected) + + if method == "first" and dtype == "O": + return + + expected = (df.max() - df).rank(method=method) + + if dtype != "O": + res2 = df.rank(method=method, ascending=False, numeric_only=True) + tm.assert_frame_equal(res2, expected) + + res3 = df.rank(method=method, ascending=False, numeric_only=False) + tm.assert_frame_equal(res3, expected) + + @pytest.mark.parametrize("axis", [0, 1]) + @pytest.mark.parametrize("dtype", [None, object]) + def test_rank_2d_tie_methods(self, method, axis, dtype): + df = self.df + + def _check2d(df, expected, method="average", axis=0): + exp_df = DataFrame({"A": expected, "B": expected}) + + if axis == 1: + df = df.T + exp_df = exp_df.T + + result = df.rank(method=method, axis=axis) + tm.assert_frame_equal(result, exp_df) + + disabled = {(object, "first")} + if (dtype, method) in disabled: + return + frame = df if dtype is None else df.astype(dtype) + _check2d(frame, self.results[method], method=method, axis=axis) + + @pytest.mark.parametrize( + "method,exp", + [ + ("dense", [[1.0, 1.0, 1.0], [1.0, 0.5, 2.0 / 3], [1.0, 0.5, 1.0 / 3]]), + ( + "min", + [ + [1.0 / 3, 1.0, 1.0], + [1.0 / 3, 1.0 / 3, 2.0 / 3], + [1.0 / 3, 1.0 / 3, 1.0 / 3], + ], + ), + ( + "max", + [[1.0, 1.0, 1.0], [1.0, 2.0 / 3, 2.0 / 3], [1.0, 2.0 / 3, 1.0 / 3]], + ), + ( + "average", + [[2.0 / 3, 1.0, 1.0], [2.0 / 3, 0.5, 2.0 / 3], [2.0 / 3, 0.5, 1.0 / 3]], + ), + ( + "first", + [ + [1.0 / 3, 1.0, 1.0], + [2.0 / 3, 1.0 / 3, 2.0 / 3], + [3.0 / 3, 2.0 / 3, 1.0 / 3], + ], + ), + ], + ) + def test_rank_pct_true(self, method, exp): + # see gh-15630. + + df = DataFrame([[2012, 66, 3], [2012, 65, 2], [2012, 65, 1]]) + result = df.rank(method=method, pct=True) + + expected = DataFrame(exp) + tm.assert_frame_equal(result, expected) + + @pytest.mark.single + @pytest.mark.high_memory + def test_pct_max_many_rows(self): + # GH 18271 + df = DataFrame( + {"A": np.arange(2 ** 24 + 1), "B": np.arange(2 ** 24 + 1, 0, -1)} + ) + result = df.rank(pct=True).max() + assert (result == 1).all() diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py new file mode 100644 index 00000000..97ef36bb --- /dev/null +++ b/pandas/tests/frame/methods/test_replace.py @@ -0,0 +1,1373 @@ +from datetime import datetime +from io import StringIO +import re +from typing import Dict, List, Union + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, Series, Timestamp, date_range +import pandas._testing as tm + + +@pytest.fixture +def mix_ab() -> Dict[str, List[Union[int, str]]]: + return {"a": list(range(4)), "b": list("ab..")} + + +@pytest.fixture +def mix_abc() -> Dict[str, List[Union[float, str]]]: + return {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} + + +class TestDataFrameReplace: + def test_replace_inplace(self, datetime_frame, float_string_frame): + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + + tsframe = datetime_frame.copy() + tsframe.replace(np.nan, 0, inplace=True) + tm.assert_frame_equal(tsframe, datetime_frame.fillna(0)) + + # mixed type + mf = float_string_frame + mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan + mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan + + result = float_string_frame.replace(np.nan, 0) + expected = float_string_frame.fillna(value=0) + tm.assert_frame_equal(result, expected) + + tsframe = datetime_frame.copy() + tsframe.replace([np.nan], [0], inplace=True) + tm.assert_frame_equal(tsframe, datetime_frame.fillna(0)) + + def test_regex_replace_scalar(self, mix_ab): + obj = {"a": list("ab.."), "b": list("efgh")} + dfobj = DataFrame(obj) + dfmix = DataFrame(mix_ab) + + # simplest cases + # regex -> value + # obj frame + res = dfobj.replace(r"\s*\.\s*", np.nan, regex=True) + tm.assert_frame_equal(dfobj, res.fillna(".")) + + # mixed + res = dfmix.replace(r"\s*\.\s*", np.nan, regex=True) + tm.assert_frame_equal(dfmix, res.fillna(".")) + + # regex -> regex + # obj frame + res = dfobj.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) + objc = obj.copy() + objc["a"] = ["a", "b", "...", "..."] + expec = DataFrame(objc) + tm.assert_frame_equal(res, expec) + + # with mixed + res = dfmix.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + # everything with compiled regexs as well + res = dfobj.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) + tm.assert_frame_equal(dfobj, res.fillna(".")) + + # mixed + res = dfmix.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) + tm.assert_frame_equal(dfmix, res.fillna(".")) + + # regex -> regex + # obj frame + res = dfobj.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") + objc = obj.copy() + objc["a"] = ["a", "b", "...", "..."] + expec = DataFrame(objc) + tm.assert_frame_equal(res, expec) + + # with mixed + res = dfmix.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + res = dfmix.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1") + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + res = dfmix.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1") + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + def test_regex_replace_scalar_inplace(self, mix_ab): + obj = {"a": list("ab.."), "b": list("efgh")} + dfobj = DataFrame(obj) + dfmix = DataFrame(mix_ab) + + # simplest cases + # regex -> value + # obj frame + res = dfobj.copy() + res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) + tm.assert_frame_equal(dfobj, res.fillna(".")) + + # mixed + res = dfmix.copy() + res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) + tm.assert_frame_equal(dfmix, res.fillna(".")) + + # regex -> regex + # obj frame + res = dfobj.copy() + res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True) + objc = obj.copy() + objc["a"] = ["a", "b", "...", "..."] + expec = DataFrame(objc) + tm.assert_frame_equal(res, expec) + + # with mixed + res = dfmix.copy() + res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True) + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + # everything with compiled regexs as well + res = dfobj.copy() + res.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True) + tm.assert_frame_equal(dfobj, res.fillna(".")) + + # mixed + res = dfmix.copy() + res.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True) + tm.assert_frame_equal(dfmix, res.fillna(".")) + + # regex -> regex + # obj frame + res = dfobj.copy() + res.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True) + objc = obj.copy() + objc["a"] = ["a", "b", "...", "..."] + expec = DataFrame(objc) + tm.assert_frame_equal(res, expec) + + # with mixed + res = dfmix.copy() + res.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True) + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + res = dfobj.copy() + res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) + tm.assert_frame_equal(dfobj, res.fillna(".")) + + # mixed + res = dfmix.copy() + res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) + tm.assert_frame_equal(dfmix, res.fillna(".")) + + # regex -> regex + # obj frame + res = dfobj.copy() + res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True) + objc = obj.copy() + objc["a"] = ["a", "b", "...", "..."] + expec = DataFrame(objc) + tm.assert_frame_equal(res, expec) + + # with mixed + res = dfmix.copy() + res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True) + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + # everything with compiled regexs as well + res = dfobj.copy() + res.replace(regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True) + tm.assert_frame_equal(dfobj, res.fillna(".")) + + # mixed + res = dfmix.copy() + res.replace(regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True) + tm.assert_frame_equal(dfmix, res.fillna(".")) + + # regex -> regex + # obj frame + res = dfobj.copy() + res.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True) + objc = obj.copy() + objc["a"] = ["a", "b", "...", "..."] + expec = DataFrame(objc) + tm.assert_frame_equal(res, expec) + + # with mixed + res = dfmix.copy() + res.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True) + mixc = mix_ab.copy() + mixc["b"] = ["a", "b", "...", "..."] + expec = DataFrame(mixc) + tm.assert_frame_equal(res, expec) + + def test_regex_replace_list_obj(self): + obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")} + dfobj = DataFrame(obj) + + # lists of regexes and values + # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] + to_replace_res = [r"\s*\.\s*", r"e|f|g"] + values = [np.nan, "crap"] + res = dfobj.replace(to_replace_res, values, regex=True) + expec = DataFrame( + { + "a": ["a", "b", np.nan, np.nan], + "b": ["crap"] * 3 + ["h"], + "c": ["h", "crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] + to_replace_res = [r"\s*(\.)\s*", r"(e|f|g)"] + values = [r"\1\1", r"\1_crap"] + res = dfobj.replace(to_replace_res, values, regex=True) + expec = DataFrame( + { + "a": ["a", "b", "..", ".."], + "b": ["e_crap", "f_crap", "g_crap", "h"], + "c": ["h", "e_crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN + # or vN)] + to_replace_res = [r"\s*(\.)\s*", r"e"] + values = [r"\1\1", r"crap"] + res = dfobj.replace(to_replace_res, values, regex=True) + expec = DataFrame( + { + "a": ["a", "b", "..", ".."], + "b": ["crap", "f", "g", "h"], + "c": ["h", "crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + to_replace_res = [r"\s*(\.)\s*", r"e"] + values = [r"\1\1", r"crap"] + res = dfobj.replace(value=values, regex=to_replace_res) + expec = DataFrame( + { + "a": ["a", "b", "..", ".."], + "b": ["crap", "f", "g", "h"], + "c": ["h", "crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + def test_regex_replace_list_obj_inplace(self): + # same as above with inplace=True + # lists of regexes and values + obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")} + dfobj = DataFrame(obj) + + # lists of regexes and values + # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] + to_replace_res = [r"\s*\.\s*", r"e|f|g"] + values = [np.nan, "crap"] + res = dfobj.copy() + res.replace(to_replace_res, values, inplace=True, regex=True) + expec = DataFrame( + { + "a": ["a", "b", np.nan, np.nan], + "b": ["crap"] * 3 + ["h"], + "c": ["h", "crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] + to_replace_res = [r"\s*(\.)\s*", r"(e|f|g)"] + values = [r"\1\1", r"\1_crap"] + res = dfobj.copy() + res.replace(to_replace_res, values, inplace=True, regex=True) + expec = DataFrame( + { + "a": ["a", "b", "..", ".."], + "b": ["e_crap", "f_crap", "g_crap", "h"], + "c": ["h", "e_crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN + # or vN)] + to_replace_res = [r"\s*(\.)\s*", r"e"] + values = [r"\1\1", r"crap"] + res = dfobj.copy() + res.replace(to_replace_res, values, inplace=True, regex=True) + expec = DataFrame( + { + "a": ["a", "b", "..", ".."], + "b": ["crap", "f", "g", "h"], + "c": ["h", "crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + to_replace_res = [r"\s*(\.)\s*", r"e"] + values = [r"\1\1", r"crap"] + res = dfobj.copy() + res.replace(value=values, regex=to_replace_res, inplace=True) + expec = DataFrame( + { + "a": ["a", "b", "..", ".."], + "b": ["crap", "f", "g", "h"], + "c": ["h", "crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + def test_regex_replace_list_mixed(self, mix_ab): + # mixed frame to make sure this doesn't break things + dfmix = DataFrame(mix_ab) + + # lists of regexes and values + # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] + to_replace_res = [r"\s*\.\s*", r"a"] + values = [np.nan, "crap"] + mix2 = {"a": list(range(4)), "b": list("ab.."), "c": list("halo")} + dfmix2 = DataFrame(mix2) + res = dfmix2.replace(to_replace_res, values, regex=True) + expec = DataFrame( + { + "a": mix2["a"], + "b": ["crap", "b", np.nan, np.nan], + "c": ["h", "crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] + to_replace_res = [r"\s*(\.)\s*", r"(a|b)"] + values = [r"\1\1", r"\1_crap"] + res = dfmix.replace(to_replace_res, values, regex=True) + expec = DataFrame({"a": mix_ab["a"], "b": ["a_crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN + # or vN)] + to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] + values = [r"\1\1", r"crap", r"\1_crap"] + res = dfmix.replace(to_replace_res, values, regex=True) + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] + values = [r"\1\1", r"crap", r"\1_crap"] + res = dfmix.replace(regex=to_replace_res, value=values) + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + def test_regex_replace_list_mixed_inplace(self, mix_ab): + dfmix = DataFrame(mix_ab) + # the same inplace + # lists of regexes and values + # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] + to_replace_res = [r"\s*\.\s*", r"a"] + values = [np.nan, "crap"] + res = dfmix.copy() + res.replace(to_replace_res, values, inplace=True, regex=True) + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b", np.nan, np.nan]}) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] + to_replace_res = [r"\s*(\.)\s*", r"(a|b)"] + values = [r"\1\1", r"\1_crap"] + res = dfmix.copy() + res.replace(to_replace_res, values, inplace=True, regex=True) + expec = DataFrame({"a": mix_ab["a"], "b": ["a_crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN + # or vN)] + to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] + values = [r"\1\1", r"crap", r"\1_crap"] + res = dfmix.copy() + res.replace(to_replace_res, values, inplace=True, regex=True) + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] + values = [r"\1\1", r"crap", r"\1_crap"] + res = dfmix.copy() + res.replace(regex=to_replace_res, value=values, inplace=True) + expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) + tm.assert_frame_equal(res, expec) + + def test_regex_replace_dict_mixed(self, mix_abc): + dfmix = DataFrame(mix_abc) + + # dicts + # single dict {re1: v1}, search the whole frame + # need test for this... + + # list of dicts {re1: v1, re2: v2, ..., re3: v3}, search the whole + # frame + res = dfmix.replace({"b": r"\s*\.\s*"}, {"b": np.nan}, regex=True) + res2 = dfmix.copy() + res2.replace({"b": r"\s*\.\s*"}, {"b": np.nan}, inplace=True, regex=True) + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + # list of dicts {re1: re11, re2: re12, ..., reN: re1N}, search the + # whole frame + res = dfmix.replace({"b": r"\s*(\.)\s*"}, {"b": r"\1ty"}, regex=True) + res2 = dfmix.copy() + res2.replace({"b": r"\s*(\.)\s*"}, {"b": r"\1ty"}, inplace=True, regex=True) + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", ".ty", ".ty"], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + res = dfmix.replace(regex={"b": r"\s*(\.)\s*"}, value={"b": r"\1ty"}) + res2 = dfmix.copy() + res2.replace(regex={"b": r"\s*(\.)\s*"}, value={"b": r"\1ty"}, inplace=True) + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", ".ty", ".ty"], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + # scalar -> dict + # to_replace regex, {value: value} + expec = DataFrame( + {"a": mix_abc["a"], "b": [np.nan, "b", ".", "."], "c": mix_abc["c"]} + ) + res = dfmix.replace("a", {"b": np.nan}, regex=True) + res2 = dfmix.copy() + res2.replace("a", {"b": np.nan}, regex=True, inplace=True) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + res = dfmix.replace("a", {"b": np.nan}, regex=True) + res2 = dfmix.copy() + res2.replace(regex="a", value={"b": np.nan}, inplace=True) + expec = DataFrame( + {"a": mix_abc["a"], "b": [np.nan, "b", ".", "."], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + + def test_regex_replace_dict_nested(self, mix_abc): + # nested dicts will not work until this is implemented for Series + dfmix = DataFrame(mix_abc) + res = dfmix.replace({"b": {r"\s*\.\s*": np.nan}}, regex=True) + res2 = dfmix.copy() + res4 = dfmix.copy() + res2.replace({"b": {r"\s*\.\s*": np.nan}}, inplace=True, regex=True) + res3 = dfmix.replace(regex={"b": {r"\s*\.\s*": np.nan}}) + res4.replace(regex={"b": {r"\s*\.\s*": np.nan}}, inplace=True) + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + tm.assert_frame_equal(res4, expec) + + def test_regex_replace_dict_nested_non_first_character(self): + # GH 25259 + df = pd.DataFrame({"first": ["abc", "bca", "cab"]}) + expected = pd.DataFrame({"first": [".bc", "bc.", "c.b"]}) + result = df.replace({"a": "."}, regex=True) + tm.assert_frame_equal(result, expected) + + def test_regex_replace_dict_nested_gh4115(self): + df = pd.DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2}) + expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2}) + result = df.replace({"Type": {"Q": 0, "T": 1}}) + tm.assert_frame_equal(result, expected) + + def test_regex_replace_list_to_scalar(self, mix_abc): + df = DataFrame(mix_abc) + expec = DataFrame( + { + "a": mix_abc["a"], + "b": np.array([np.nan] * 4), + "c": [np.nan, np.nan, np.nan, "d"], + } + ) + res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True) + res2 = df.copy() + res3 = df.copy() + res2.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True) + res3.replace(regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + + def test_regex_replace_str_to_numeric(self, mix_abc): + # what happens when you try to replace a numeric value with a regex? + df = DataFrame(mix_abc) + res = df.replace(r"\s*\.\s*", 0, regex=True) + res2 = df.copy() + res2.replace(r"\s*\.\s*", 0, inplace=True, regex=True) + res3 = df.copy() + res3.replace(regex=r"\s*\.\s*", value=0, inplace=True) + expec = DataFrame({"a": mix_abc["a"], "b": ["a", "b", 0, 0], "c": mix_abc["c"]}) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + + def test_regex_replace_regex_list_to_numeric(self, mix_abc): + df = DataFrame(mix_abc) + res = df.replace([r"\s*\.\s*", "b"], 0, regex=True) + res2 = df.copy() + res2.replace([r"\s*\.\s*", "b"], 0, regex=True, inplace=True) + res3 = df.copy() + res3.replace(regex=[r"\s*\.\s*", "b"], value=0, inplace=True) + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", 0, 0, 0], "c": ["a", 0, np.nan, "d"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + + def test_regex_replace_series_of_regexes(self, mix_abc): + df = DataFrame(mix_abc) + s1 = Series({"b": r"\s*\.\s*"}) + s2 = Series({"b": np.nan}) + res = df.replace(s1, s2, regex=True) + res2 = df.copy() + res2.replace(s1, s2, inplace=True, regex=True) + res3 = df.copy() + res3.replace(regex=s1, value=s2, inplace=True) + expec = DataFrame( + {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]} + ) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + + def test_regex_replace_numeric_to_object_conversion(self, mix_abc): + df = DataFrame(mix_abc) + expec = DataFrame({"a": ["a", 1, 2, 3], "b": mix_abc["b"], "c": mix_abc["c"]}) + res = df.replace(0, "a") + tm.assert_frame_equal(res, expec) + assert res.a.dtype == np.object_ + + @pytest.mark.parametrize("metachar", ["[]", "()", r"\d", r"\w", r"\s"]) + def test_replace_regex_metachar(self, metachar): + df = DataFrame({"a": [metachar, "else"]}) + result = df.replace({"a": {metachar: "paren"}}) + expected = DataFrame({"a": ["paren", "else"]}) + tm.assert_frame_equal(result, expected) + + def test_replace(self, datetime_frame): + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + + zero_filled = datetime_frame.replace(np.nan, -1e8) + tm.assert_frame_equal(zero_filled, datetime_frame.fillna(-1e8)) + tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), datetime_frame) + + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + datetime_frame["B"][:5] = -1e8 + + # empty + df = DataFrame(index=["a", "b"]) + tm.assert_frame_equal(df, df.replace(5, 7)) + + # GH 11698 + # test for mixed data types. + df = pd.DataFrame( + [("-", pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))] + ) + df1 = df.replace("-", np.nan) + expected_df = pd.DataFrame( + [(np.nan, pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))] + ) + tm.assert_frame_equal(df1, expected_df) + + def test_replace_list(self): + obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")} + dfobj = DataFrame(obj) + + # lists of regexes and values + # list of [v1, v2, ..., vN] -> [v1, v2, ..., vN] + to_replace_res = [r".", r"e"] + values = [np.nan, "crap"] + res = dfobj.replace(to_replace_res, values) + expec = DataFrame( + { + "a": ["a", "b", np.nan, np.nan], + "b": ["crap", "f", "g", "h"], + "c": ["h", "crap", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + # list of [v1, v2, ..., vN] -> [v1, v2, .., vN] + to_replace_res = [r".", r"f"] + values = [r"..", r"crap"] + res = dfobj.replace(to_replace_res, values) + expec = DataFrame( + { + "a": ["a", "b", "..", ".."], + "b": ["e", "crap", "g", "h"], + "c": ["h", "e", "l", "o"], + } + ) + tm.assert_frame_equal(res, expec) + + def test_replace_with_empty_list(self): + # GH 21977 + s = pd.Series([["a", "b"], [], np.nan, [1]]) + df = pd.DataFrame({"col": s}) + expected = df + result = df.replace([], np.nan) + tm.assert_frame_equal(result, expected) + + # GH 19266 + with pytest.raises(ValueError, match="cannot assign mismatch"): + df.replace({np.nan: []}) + with pytest.raises(ValueError, match="cannot assign mismatch"): + df.replace({np.nan: ["dummy", "alt"]}) + + def test_replace_series_dict(self): + # from GH 3064 + df = DataFrame({"zero": {"a": 0.0, "b": 1}, "one": {"a": 2.0, "b": 0}}) + result = df.replace(0, {"zero": 0.5, "one": 1.0}) + expected = DataFrame({"zero": {"a": 0.5, "b": 1}, "one": {"a": 2.0, "b": 1.0}}) + tm.assert_frame_equal(result, expected) + + result = df.replace(0, df.mean()) + tm.assert_frame_equal(result, expected) + + # series to series/dict + df = DataFrame({"zero": {"a": 0.0, "b": 1}, "one": {"a": 2.0, "b": 0}}) + s = Series({"zero": 0.0, "one": 2.0}) + result = df.replace(s, {"zero": 0.5, "one": 1.0}) + expected = DataFrame({"zero": {"a": 0.5, "b": 1}, "one": {"a": 1.0, "b": 0.0}}) + tm.assert_frame_equal(result, expected) + + result = df.replace(s, df.mean()) + tm.assert_frame_equal(result, expected) + + def test_replace_convert(self): + # gh 3907 + df = DataFrame([["foo", "bar", "bah"], ["bar", "foo", "bah"]]) + m = {"foo": 1, "bar": 2, "bah": 3} + rep = df.replace(m) + expec = Series([np.int64] * 3) + res = rep.dtypes + tm.assert_series_equal(expec, res) + + def test_replace_mixed(self, float_string_frame): + mf = float_string_frame + mf.iloc[5:20, mf.columns.get_loc("foo")] = np.nan + mf.iloc[-10:, mf.columns.get_loc("A")] = np.nan + + result = float_string_frame.replace(np.nan, -18) + expected = float_string_frame.fillna(value=-18) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result.replace(-18, np.nan), float_string_frame) + + result = float_string_frame.replace(np.nan, -1e8) + expected = float_string_frame.fillna(value=-1e8) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result.replace(-1e8, np.nan), float_string_frame) + + # int block upcasting + df = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0, 1], dtype="int64"), + } + ) + expected = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0.5, 1], dtype="float64"), + } + ) + result = df.replace(0, 0.5) + tm.assert_frame_equal(result, expected) + + df.replace(0, 0.5, inplace=True) + tm.assert_frame_equal(df, expected) + + # int block splitting + df = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0, 1], dtype="int64"), + "C": Series([1, 2], dtype="int64"), + } + ) + expected = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0.5, 1], dtype="float64"), + "C": Series([1, 2], dtype="int64"), + } + ) + result = df.replace(0, 0.5) + tm.assert_frame_equal(result, expected) + + # to object block upcasting + df = DataFrame( + { + "A": Series([1.0, 2.0], dtype="float64"), + "B": Series([0, 1], dtype="int64"), + } + ) + expected = DataFrame( + { + "A": Series([1, "foo"], dtype="object"), + "B": Series([0, 1], dtype="int64"), + } + ) + result = df.replace(2, "foo") + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + { + "A": Series(["foo", "bar"], dtype="object"), + "B": Series([0, "foo"], dtype="object"), + } + ) + result = df.replace([1, 2], ["foo", "bar"]) + tm.assert_frame_equal(result, expected) + + # test case from + df = DataFrame( + {"A": Series([3, 0], dtype="int64"), "B": Series([0, 3], dtype="int64")} + ) + result = df.replace(3, df.mean().to_dict()) + expected = df.copy().astype("float64") + m = df.mean() + expected.iloc[0, 0] = m[0] + expected.iloc[1, 1] = m[1] + tm.assert_frame_equal(result, expected) + + def test_replace_simple_nested_dict(self): + df = DataFrame({"col": range(1, 5)}) + expected = DataFrame({"col": ["a", 2, 3, "b"]}) + + result = df.replace({"col": {1: "a", 4: "b"}}) + tm.assert_frame_equal(expected, result) + + # in this case, should be the same as the not nested version + result = df.replace({1: "a", 4: "b"}) + tm.assert_frame_equal(expected, result) + + def test_replace_simple_nested_dict_with_nonexistent_value(self): + df = DataFrame({"col": range(1, 5)}) + expected = DataFrame({"col": ["a", 2, 3, "b"]}) + + result = df.replace({-1: "-", 1: "a", 4: "b"}) + tm.assert_frame_equal(expected, result) + + result = df.replace({"col": {-1: "-", 1: "a", 4: "b"}}) + tm.assert_frame_equal(expected, result) + + def test_replace_value_is_none(self, datetime_frame): + orig_value = datetime_frame.iloc[0, 0] + orig2 = datetime_frame.iloc[1, 0] + + datetime_frame.iloc[0, 0] = np.nan + datetime_frame.iloc[1, 0] = 1 + + result = datetime_frame.replace(to_replace={np.nan: 0}) + expected = datetime_frame.T.replace(to_replace={np.nan: 0}).T + tm.assert_frame_equal(result, expected) + + result = datetime_frame.replace(to_replace={np.nan: 0, 1: -1e8}) + tsframe = datetime_frame.copy() + tsframe.iloc[0, 0] = 0 + tsframe.iloc[1, 0] = -1e8 + expected = tsframe + tm.assert_frame_equal(expected, result) + datetime_frame.iloc[0, 0] = orig_value + datetime_frame.iloc[1, 0] = orig2 + + def test_replace_for_new_dtypes(self, datetime_frame): + + # dtypes + tsframe = datetime_frame.copy().astype(np.float32) + tsframe["A"][:5] = np.nan + tsframe["A"][-5:] = np.nan + + zero_filled = tsframe.replace(np.nan, -1e8) + tm.assert_frame_equal(zero_filled, tsframe.fillna(-1e8)) + tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), tsframe) + + tsframe["A"][:5] = np.nan + tsframe["A"][-5:] = np.nan + tsframe["B"][:5] = -1e8 + + b = tsframe["B"] + b[b == -1e8] = np.nan + tsframe["B"] = b + result = tsframe.fillna(method="bfill") + tm.assert_frame_equal(result, tsframe.fillna(method="bfill")) + + @pytest.mark.parametrize( + "frame, to_replace, value, expected", + [ + (DataFrame({"ints": [1, 2, 3]}), 1, 0, DataFrame({"ints": [0, 2, 3]})), + ( + DataFrame({"ints": [1, 2, 3]}, dtype=np.int32), + 1, + 0, + DataFrame({"ints": [0, 2, 3]}, dtype=np.int32), + ), + ( + DataFrame({"ints": [1, 2, 3]}, dtype=np.int16), + 1, + 0, + DataFrame({"ints": [0, 2, 3]}, dtype=np.int16), + ), + ( + DataFrame({"bools": [True, False, True]}), + False, + True, + DataFrame({"bools": [True, True, True]}), + ), + ( + DataFrame({"complex": [1j, 2j, 3j]}), + 1j, + 0, + DataFrame({"complex": [0j, 2j, 3j]}), + ), + ( + DataFrame( + { + "datetime64": Index( + [ + datetime(2018, 5, 28), + datetime(2018, 7, 28), + datetime(2018, 5, 28), + ] + ) + } + ), + datetime(2018, 5, 28), + datetime(2018, 7, 28), + DataFrame({"datetime64": Index([datetime(2018, 7, 28)] * 3)}), + ), + # GH 20380 + ( + DataFrame({"dt": [datetime(3017, 12, 20)], "str": ["foo"]}), + "foo", + "bar", + DataFrame({"dt": [datetime(3017, 12, 20)], "str": ["bar"]}), + ), + ( + DataFrame( + { + "A": date_range("20130101", periods=3, tz="US/Eastern"), + "B": [0, np.nan, 2], + } + ), + Timestamp("20130102", tz="US/Eastern"), + Timestamp("20130104", tz="US/Eastern"), + DataFrame( + { + "A": [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130104", tz="US/Eastern"), + Timestamp("20130103", tz="US/Eastern"), + ], + "B": [0, np.nan, 2], + } + ), + ), + ], + ) + def test_replace_dtypes(self, frame, to_replace, value, expected): + result = getattr(frame, "replace")(to_replace, value) + tm.assert_frame_equal(result, expected) + + def test_replace_input_formats_listlike(self): + # both dicts + to_rep = {"A": np.nan, "B": 0, "C": ""} + values = {"A": 0, "B": -1, "C": "missing"} + df = DataFrame( + {"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]} + ) + filled = df.replace(to_rep, values) + expected = {k: v.replace(to_rep[k], values[k]) for k, v in df.items()} + tm.assert_frame_equal(filled, DataFrame(expected)) + + result = df.replace([0, 2, 5], [5, 2, 0]) + expected = DataFrame( + {"A": [np.nan, 5, np.inf], "B": [5, 2, 0], "C": ["", "asdf", "fd"]} + ) + tm.assert_frame_equal(result, expected) + + # scalar to dict + values = {"A": 0, "B": -1, "C": "missing"} + df = DataFrame( + {"A": [np.nan, 0, np.nan], "B": [0, 2, 5], "C": ["", "asdf", "fd"]} + ) + filled = df.replace(np.nan, values) + expected = {k: v.replace(np.nan, values[k]) for k, v in df.items()} + tm.assert_frame_equal(filled, DataFrame(expected)) + + # list to list + to_rep = [np.nan, 0, ""] + values = [-2, -1, "missing"] + result = df.replace(to_rep, values) + expected = df.copy() + for i in range(len(to_rep)): + expected.replace(to_rep[i], values[i], inplace=True) + tm.assert_frame_equal(result, expected) + + msg = r"Replacement lists must match in length\. Expecting 3 got 2" + with pytest.raises(ValueError, match=msg): + df.replace(to_rep, values[1:]) + + def test_replace_input_formats_scalar(self): + df = DataFrame( + {"A": [np.nan, 0, np.inf], "B": [0, 2, 5], "C": ["", "asdf", "fd"]} + ) + + # dict to scalar + to_rep = {"A": np.nan, "B": 0, "C": ""} + filled = df.replace(to_rep, 0) + expected = {k: v.replace(to_rep[k], 0) for k, v in df.items()} + tm.assert_frame_equal(filled, DataFrame(expected)) + + msg = "value argument must be scalar, dict, or Series" + with pytest.raises(TypeError, match=msg): + df.replace(to_rep, [np.nan, 0, ""]) + + # list to scalar + to_rep = [np.nan, 0, ""] + result = df.replace(to_rep, -1) + expected = df.copy() + for i in range(len(to_rep)): + expected.replace(to_rep[i], -1, inplace=True) + tm.assert_frame_equal(result, expected) + + def test_replace_limit(self): + pass + + def test_replace_dict_no_regex(self): + answer = Series( + { + 0: "Strongly Agree", + 1: "Agree", + 2: "Neutral", + 3: "Disagree", + 4: "Strongly Disagree", + } + ) + weights = { + "Agree": 4, + "Disagree": 2, + "Neutral": 3, + "Strongly Agree": 5, + "Strongly Disagree": 1, + } + expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1}) + result = answer.replace(weights) + tm.assert_series_equal(result, expected) + + def test_replace_series_no_regex(self): + answer = Series( + { + 0: "Strongly Agree", + 1: "Agree", + 2: "Neutral", + 3: "Disagree", + 4: "Strongly Disagree", + } + ) + weights = Series( + { + "Agree": 4, + "Disagree": 2, + "Neutral": 3, + "Strongly Agree": 5, + "Strongly Disagree": 1, + } + ) + expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1}) + result = answer.replace(weights) + tm.assert_series_equal(result, expected) + + def test_replace_dict_tuple_list_ordering_remains_the_same(self): + df = DataFrame(dict(A=[np.nan, 1])) + res1 = df.replace(to_replace={np.nan: 0, 1: -1e8}) + res2 = df.replace(to_replace=(1, np.nan), value=[-1e8, 0]) + res3 = df.replace(to_replace=[1, np.nan], value=[-1e8, 0]) + + expected = DataFrame({"A": [0, -1e8]}) + tm.assert_frame_equal(res1, res2) + tm.assert_frame_equal(res2, res3) + tm.assert_frame_equal(res3, expected) + + def test_replace_doesnt_replace_without_regex(self): + raw = """fol T_opp T_Dir T_Enh + 0 1 0 0 vo + 1 2 vr 0 0 + 2 2 0 0 0 + 3 3 0 bt 0""" + df = pd.read_csv(StringIO(raw), sep=r"\s+") + res = df.replace({r"\D": 1}) + tm.assert_frame_equal(df, res) + + def test_replace_bool_with_string(self): + df = DataFrame({"a": [True, False], "b": list("ab")}) + result = df.replace(True, "a") + expected = DataFrame({"a": ["a", False], "b": df.b}) + tm.assert_frame_equal(result, expected) + + def test_replace_pure_bool_with_string_no_op(self): + df = DataFrame(np.random.rand(2, 2) > 0.5) + result = df.replace("asdf", "fdsa") + tm.assert_frame_equal(df, result) + + def test_replace_bool_with_bool(self): + df = DataFrame(np.random.rand(2, 2) > 0.5) + result = df.replace(False, True) + expected = DataFrame(np.ones((2, 2), dtype=bool)) + tm.assert_frame_equal(result, expected) + + def test_replace_with_dict_with_bool_keys(self): + df = DataFrame({0: [True, False], 1: [False, True]}) + with pytest.raises(TypeError, match="Cannot compare types .+"): + df.replace({"asdf": "asdb", True: "yes"}) + + def test_replace_truthy(self): + df = DataFrame({"a": [True, True]}) + r = df.replace([np.inf, -np.inf], np.nan) + e = df + tm.assert_frame_equal(r, e) + + def test_nested_dict_overlapping_keys_replace_int(self): + # GH 27660 keep behaviour consistent for simple dictionary and + # nested dictionary replacement + df = DataFrame({"a": list(range(1, 5))}) + + result = df.replace({"a": dict(zip(range(1, 5), range(2, 6)))}) + expected = df.replace(dict(zip(range(1, 5), range(2, 6)))) + tm.assert_frame_equal(result, expected) + + def test_nested_dict_overlapping_keys_replace_str(self): + # GH 27660 + a = np.arange(1, 5) + astr = a.astype(str) + bstr = np.arange(2, 6).astype(str) + df = DataFrame({"a": astr}) + result = df.replace(dict(zip(astr, bstr))) + expected = df.replace({"a": dict(zip(astr, bstr))}) + tm.assert_frame_equal(result, expected) + + def test_replace_swapping_bug(self): + df = pd.DataFrame({"a": [True, False, True]}) + res = df.replace({"a": {True: "Y", False: "N"}}) + expect = pd.DataFrame({"a": ["Y", "N", "Y"]}) + tm.assert_frame_equal(res, expect) + + df = pd.DataFrame({"a": [0, 1, 0]}) + res = df.replace({"a": {0: "Y", 1: "N"}}) + expect = pd.DataFrame({"a": ["Y", "N", "Y"]}) + tm.assert_frame_equal(res, expect) + + def test_replace_period(self): + d = { + "fname": { + "out_augmented_AUG_2011.json": pd.Period(year=2011, month=8, freq="M"), + "out_augmented_JAN_2011.json": pd.Period(year=2011, month=1, freq="M"), + "out_augmented_MAY_2012.json": pd.Period(year=2012, month=5, freq="M"), + "out_augmented_SUBSIDY_WEEK.json": pd.Period( + year=2011, month=4, freq="M" + ), + "out_augmented_AUG_2012.json": pd.Period(year=2012, month=8, freq="M"), + "out_augmented_MAY_2011.json": pd.Period(year=2011, month=5, freq="M"), + "out_augmented_SEP_2013.json": pd.Period(year=2013, month=9, freq="M"), + } + } + + df = pd.DataFrame( + [ + "out_augmented_AUG_2012.json", + "out_augmented_SEP_2013.json", + "out_augmented_SUBSIDY_WEEK.json", + "out_augmented_MAY_2012.json", + "out_augmented_MAY_2011.json", + "out_augmented_AUG_2011.json", + "out_augmented_JAN_2011.json", + ], + columns=["fname"], + ) + assert set(df.fname.values) == set(d["fname"].keys()) + # We don't support converting object -> specialized EA in + # replace yet. + expected = DataFrame( + {"fname": [d["fname"][k] for k in df.fname.values]}, dtype=object + ) + result = df.replace(d) + tm.assert_frame_equal(result, expected) + + def test_replace_datetime(self): + d = { + "fname": { + "out_augmented_AUG_2011.json": pd.Timestamp("2011-08"), + "out_augmented_JAN_2011.json": pd.Timestamp("2011-01"), + "out_augmented_MAY_2012.json": pd.Timestamp("2012-05"), + "out_augmented_SUBSIDY_WEEK.json": pd.Timestamp("2011-04"), + "out_augmented_AUG_2012.json": pd.Timestamp("2012-08"), + "out_augmented_MAY_2011.json": pd.Timestamp("2011-05"), + "out_augmented_SEP_2013.json": pd.Timestamp("2013-09"), + } + } + + df = pd.DataFrame( + [ + "out_augmented_AUG_2012.json", + "out_augmented_SEP_2013.json", + "out_augmented_SUBSIDY_WEEK.json", + "out_augmented_MAY_2012.json", + "out_augmented_MAY_2011.json", + "out_augmented_AUG_2011.json", + "out_augmented_JAN_2011.json", + ], + columns=["fname"], + ) + assert set(df.fname.values) == set(d["fname"].keys()) + expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]}) + result = df.replace(d) + tm.assert_frame_equal(result, expected) + + def test_replace_datetimetz(self): + + # GH 11326 + # behaving poorly when presented with a datetime64[ns, tz] + df = DataFrame( + { + "A": date_range("20130101", periods=3, tz="US/Eastern"), + "B": [0, np.nan, 2], + } + ) + result = df.replace(np.nan, 1) + expected = DataFrame( + { + "A": date_range("20130101", periods=3, tz="US/Eastern"), + "B": Series([0, 1, 2], dtype="float64"), + } + ) + tm.assert_frame_equal(result, expected) + + result = df.fillna(1) + tm.assert_frame_equal(result, expected) + + result = df.replace(0, np.nan) + expected = DataFrame( + { + "A": date_range("20130101", periods=3, tz="US/Eastern"), + "B": [np.nan, np.nan, 2], + } + ) + tm.assert_frame_equal(result, expected) + + result = df.replace( + Timestamp("20130102", tz="US/Eastern"), + Timestamp("20130104", tz="US/Eastern"), + ) + expected = DataFrame( + { + "A": [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130104", tz="US/Eastern"), + Timestamp("20130103", tz="US/Eastern"), + ], + "B": [0, np.nan, 2], + } + ) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.iloc[1, 0] = np.nan + result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Eastern")) + tm.assert_frame_equal(result, expected) + + # coerce to object + result = df.copy() + result.iloc[1, 0] = np.nan + result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific")) + expected = DataFrame( + { + "A": [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130104", tz="US/Pacific"), + Timestamp("20130103", tz="US/Eastern"), + ], + "B": [0, np.nan, 2], + } + ) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result.iloc[1, 0] = np.nan + result = result.replace({"A": np.nan}, Timestamp("20130104")) + expected = DataFrame( + { + "A": [ + Timestamp("20130101", tz="US/Eastern"), + Timestamp("20130104"), + Timestamp("20130103", tz="US/Eastern"), + ], + "B": [0, np.nan, 2], + } + ) + tm.assert_frame_equal(result, expected) + + def test_replace_with_empty_dictlike(self, mix_abc): + # GH 15289 + df = DataFrame(mix_abc) + tm.assert_frame_equal(df, df.replace({})) + tm.assert_frame_equal(df, df.replace(Series([], dtype=object))) + + tm.assert_frame_equal(df, df.replace({"b": {}})) + tm.assert_frame_equal(df, df.replace(Series({"b": {}}))) + + @pytest.mark.parametrize( + "to_replace, method, expected", + [ + (0, "bfill", {"A": [1, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}), + ( + np.nan, + "bfill", + {"A": [0, 1, 2], "B": [5.0, 7.0, 7.0], "C": ["a", "b", "c"]}, + ), + ("d", "ffill", {"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}), + ( + [0, 2], + "bfill", + {"A": [1, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}, + ), + ( + [1, 2], + "pad", + {"A": [0, 0, 0], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}, + ), + ( + (1, 2), + "bfill", + {"A": [0, 2, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}, + ), + ( + ["b", "c"], + "ffill", + {"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "a", "a"]}, + ), + ], + ) + def test_replace_method(self, to_replace, method, expected): + # GH 19632 + df = DataFrame({"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}) + + result = df.replace(to_replace=to_replace, value=None, method=method) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "replace_dict, final_data", + [({"a": 1, "b": 1}, [[3, 3], [2, 2]]), ({"a": 1, "b": 2}, [[3, 1], [2, 3]])], + ) + def test_categorical_replace_with_dict(self, replace_dict, final_data): + # GH 26988 + df = DataFrame([[1, 1], [2, 2]], columns=["a", "b"], dtype="category") + expected = DataFrame(final_data, columns=["a", "b"], dtype="category") + expected["a"] = expected["a"].cat.set_categories([1, 2, 3]) + expected["b"] = expected["b"].cat.set_categories([1, 2, 3]) + result = df.replace(replace_dict, 3) + tm.assert_frame_equal(result, expected) + with pytest.raises(AssertionError): + # ensure non-inplace call does not affect original + tm.assert_frame_equal(df, expected) + df.replace(replace_dict, 3, inplace=True) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "df, to_replace, exp", + [ + ( + {"col1": [1, 2, 3], "col2": [4, 5, 6]}, + {4: 5, 5: 6, 6: 7}, + {"col1": [1, 2, 3], "col2": [5, 6, 7]}, + ), + ( + {"col1": [1, 2, 3], "col2": ["4", "5", "6"]}, + {"4": "5", "5": "6", "6": "7"}, + {"col1": [1, 2, 3], "col2": ["5", "6", "7"]}, + ), + ], + ) + def test_replace_commutative(self, df, to_replace, exp): + # GH 16051 + # DataFrame.replace() overwrites when values are non-numeric + # also added to data frame whilst issue was for series + + df = pd.DataFrame(df) + + expected = pd.DataFrame(exp) + result = df.replace(to_replace) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "replacer", + [ + pd.Timestamp("20170827"), + np.int8(1), + np.int16(1), + np.float32(1), + np.float64(1), + ], + ) + def test_replace_replacer_dtype(self, replacer): + # GH26632 + df = pd.DataFrame(["a"]) + result = df.replace({"a": replacer, "b": replacer}) + expected = pd.DataFrame([replacer]) + tm.assert_frame_equal(result, expected) + + def test_replace_after_convert_dtypes(self): + # GH31517 + df = pd.DataFrame({"grp": [1, 2, 3, 4, 5]}, dtype="Int64") + result = df.replace(1, 10) + expected = pd.DataFrame({"grp": [10, 2, 3, 4, 5]}, dtype="Int64") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["float", "float64", "int64", "Int64", "boolean"]) + @pytest.mark.parametrize("value", [np.nan, pd.NA]) + def test_replace_no_replacement_dtypes(self, dtype, value): + # https://github.com/pandas-dev/pandas/issues/32988 + df = pd.DataFrame(np.eye(2), dtype=dtype) + result = df.replace(to_replace=[None, -np.inf, np.inf], value=value) + tm.assert_frame_equal(result, df) diff --git a/pandas/tests/frame/methods/test_round.py b/pandas/tests/frame/methods/test_round.py new file mode 100644 index 00000000..0865e03c --- /dev/null +++ b/pandas/tests/frame/methods/test_round.py @@ -0,0 +1,217 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, date_range +import pandas._testing as tm + + +class TestDataFrameRound: + def test_round(self): + # GH#2665 + + # Test that rounding an empty DataFrame does nothing + df = DataFrame() + tm.assert_frame_equal(df, df.round()) + + # Here's the test frame we'll be working with + df = DataFrame({"col1": [1.123, 2.123, 3.123], "col2": [1.234, 2.234, 3.234]}) + + # Default round to integer (i.e. decimals=0) + expected_rounded = DataFrame({"col1": [1.0, 2.0, 3.0], "col2": [1.0, 2.0, 3.0]}) + tm.assert_frame_equal(df.round(), expected_rounded) + + # Round with an integer + decimals = 2 + expected_rounded = DataFrame( + {"col1": [1.12, 2.12, 3.12], "col2": [1.23, 2.23, 3.23]} + ) + tm.assert_frame_equal(df.round(decimals), expected_rounded) + + # This should also work with np.round (since np.round dispatches to + # df.round) + tm.assert_frame_equal(np.round(df, decimals), expected_rounded) + + # Round with a list + round_list = [1, 2] + with pytest.raises(TypeError): + df.round(round_list) + + # Round with a dictionary + expected_rounded = DataFrame( + {"col1": [1.1, 2.1, 3.1], "col2": [1.23, 2.23, 3.23]} + ) + round_dict = {"col1": 1, "col2": 2} + tm.assert_frame_equal(df.round(round_dict), expected_rounded) + + # Incomplete dict + expected_partially_rounded = DataFrame( + {"col1": [1.123, 2.123, 3.123], "col2": [1.2, 2.2, 3.2]} + ) + partial_round_dict = {"col2": 1} + tm.assert_frame_equal(df.round(partial_round_dict), expected_partially_rounded) + + # Dict with unknown elements + wrong_round_dict = {"col3": 2, "col2": 1} + tm.assert_frame_equal(df.round(wrong_round_dict), expected_partially_rounded) + + # float input to `decimals` + non_int_round_dict = {"col1": 1, "col2": 0.5} + with pytest.raises(TypeError): + df.round(non_int_round_dict) + + # String input + non_int_round_dict = {"col1": 1, "col2": "foo"} + with pytest.raises(TypeError): + df.round(non_int_round_dict) + + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError): + df.round(non_int_round_Series) + + # List input + non_int_round_dict = {"col1": 1, "col2": [1, 2]} + with pytest.raises(TypeError): + df.round(non_int_round_dict) + + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError): + df.round(non_int_round_Series) + + # Non integer Series inputs + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError): + df.round(non_int_round_Series) + + non_int_round_Series = Series(non_int_round_dict) + with pytest.raises(TypeError): + df.round(non_int_round_Series) + + # Negative numbers + negative_round_dict = {"col1": -1, "col2": -2} + big_df = df * 100 + expected_neg_rounded = DataFrame( + {"col1": [110.0, 210, 310], "col2": [100.0, 200, 300]} + ) + tm.assert_frame_equal(big_df.round(negative_round_dict), expected_neg_rounded) + + # nan in Series round + nan_round_Series = Series({"col1": np.nan, "col2": 1}) + + # TODO(wesm): unused? + expected_nan_round = DataFrame( # noqa + {"col1": [1.123, 2.123, 3.123], "col2": [1.2, 2.2, 3.2]} + ) + + with pytest.raises(TypeError): + df.round(nan_round_Series) + + # Make sure this doesn't break existing Series.round + tm.assert_series_equal(df["col1"].round(1), expected_rounded["col1"]) + + # named columns + # GH#11986 + decimals = 2 + expected_rounded = DataFrame( + {"col1": [1.12, 2.12, 3.12], "col2": [1.23, 2.23, 3.23]} + ) + df.columns.name = "cols" + expected_rounded.columns.name = "cols" + tm.assert_frame_equal(df.round(decimals), expected_rounded) + + # interaction of named columns & series + tm.assert_series_equal(df["col1"].round(decimals), expected_rounded["col1"]) + tm.assert_series_equal(df.round(decimals)["col1"], expected_rounded["col1"]) + + def test_round_numpy(self): + # GH#12600 + df = DataFrame([[1.53, 1.36], [0.06, 7.01]]) + out = np.round(df, decimals=0) + expected = DataFrame([[2.0, 1.0], [0.0, 7.0]]) + tm.assert_frame_equal(out, expected) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.round(df, decimals=0, out=df) + + def test_round_numpy_with_nan(self): + # See GH#14197 + df = Series([1.53, np.nan, 0.06]).to_frame() + with tm.assert_produces_warning(None): + result = df.round() + expected = Series([2.0, np.nan, 0.0]).to_frame() + tm.assert_frame_equal(result, expected) + + def test_round_mixed_type(self): + # GH#11885 + df = DataFrame( + { + "col1": [1.1, 2.2, 3.3, 4.4], + "col2": ["1", "a", "c", "f"], + "col3": date_range("20111111", periods=4), + } + ) + round_0 = DataFrame( + { + "col1": [1.0, 2.0, 3.0, 4.0], + "col2": ["1", "a", "c", "f"], + "col3": date_range("20111111", periods=4), + } + ) + tm.assert_frame_equal(df.round(), round_0) + tm.assert_frame_equal(df.round(1), df) + tm.assert_frame_equal(df.round({"col1": 1}), df) + tm.assert_frame_equal(df.round({"col1": 0}), round_0) + tm.assert_frame_equal(df.round({"col1": 0, "col2": 1}), round_0) + tm.assert_frame_equal(df.round({"col3": 1}), df) + + def test_round_with_duplicate_columns(self): + # GH#11611 + + df = pd.DataFrame( + np.random.random([3, 3]), + columns=["A", "B", "C"], + index=["first", "second", "third"], + ) + + dfs = pd.concat((df, df), axis=1) + rounded = dfs.round() + tm.assert_index_equal(rounded.index, dfs.index) + + decimals = pd.Series([1, 0, 2], index=["A", "B", "A"]) + msg = "Index of decimals must be unique" + with pytest.raises(ValueError, match=msg): + df.round(decimals) + + def test_round_builtin(self): + # GH#11763 + # Here's the test frame we'll be working with + df = DataFrame({"col1": [1.123, 2.123, 3.123], "col2": [1.234, 2.234, 3.234]}) + + # Default round to integer (i.e. decimals=0) + expected_rounded = DataFrame({"col1": [1.0, 2.0, 3.0], "col2": [1.0, 2.0, 3.0]}) + tm.assert_frame_equal(round(df), expected_rounded) + + def test_round_nonunique_categorical(self): + # See GH#21809 + idx = pd.CategoricalIndex(["low"] * 3 + ["hi"] * 3) + df = pd.DataFrame(np.random.rand(6, 3), columns=list("abc")) + + expected = df.round(3) + expected.index = idx + + df_categorical = df.copy().set_index(idx) + assert df_categorical.shape == (6, 3) + result = df_categorical.round(3) + assert result.shape == (6, 3) + + tm.assert_frame_equal(result, expected) + + def test_round_interval_category_columns(self): + # GH#30063 + columns = pd.CategoricalIndex(pd.interval_range(0, 2)) + df = DataFrame([[0.66, 1.1], [0.3, 0.25]], columns=columns) + + result = df.round() + expected = DataFrame([[1.0, 1.0], [0.0, 0.0]], columns=columns) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py new file mode 100644 index 00000000..f6c89172 --- /dev/null +++ b/pandas/tests/frame/methods/test_shift.py @@ -0,0 +1,210 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, Series, date_range, offsets +import pandas._testing as tm + + +class TestDataFrameShift: + def test_shift(self, datetime_frame, int_frame): + # naive shift + shiftedFrame = datetime_frame.shift(5) + tm.assert_index_equal(shiftedFrame.index, datetime_frame.index) + + shiftedSeries = datetime_frame["A"].shift(5) + tm.assert_series_equal(shiftedFrame["A"], shiftedSeries) + + shiftedFrame = datetime_frame.shift(-5) + tm.assert_index_equal(shiftedFrame.index, datetime_frame.index) + + shiftedSeries = datetime_frame["A"].shift(-5) + tm.assert_series_equal(shiftedFrame["A"], shiftedSeries) + + # shift by 0 + unshifted = datetime_frame.shift(0) + tm.assert_frame_equal(unshifted, datetime_frame) + + # shift by DateOffset + shiftedFrame = datetime_frame.shift(5, freq=offsets.BDay()) + assert len(shiftedFrame) == len(datetime_frame) + + shiftedFrame2 = datetime_frame.shift(5, freq="B") + tm.assert_frame_equal(shiftedFrame, shiftedFrame2) + + d = datetime_frame.index[0] + shifted_d = d + offsets.BDay(5) + tm.assert_series_equal( + datetime_frame.xs(d), shiftedFrame.xs(shifted_d), check_names=False + ) + + # shift int frame + int_shifted = int_frame.shift(1) # noqa + + # Shifting with PeriodIndex + ps = tm.makePeriodFrame() + shifted = ps.shift(1) + unshifted = shifted.shift(-1) + tm.assert_index_equal(shifted.index, ps.index) + tm.assert_index_equal(unshifted.index, ps.index) + tm.assert_numpy_array_equal( + unshifted.iloc[:, 0].dropna().values, ps.iloc[:-1, 0].values + ) + + shifted2 = ps.shift(1, "B") + shifted3 = ps.shift(1, offsets.BDay()) + tm.assert_frame_equal(shifted2, shifted3) + tm.assert_frame_equal(ps, shifted2.shift(-1, "B")) + + msg = "does not match PeriodIndex freq" + with pytest.raises(ValueError, match=msg): + ps.shift(freq="D") + + # shift other axis + # GH#6371 + df = DataFrame(np.random.rand(10, 5)) + expected = pd.concat( + [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], + ignore_index=True, + axis=1, + ) + result = df.shift(1, axis=1) + tm.assert_frame_equal(result, expected) + + # shift named axis + df = DataFrame(np.random.rand(10, 5)) + expected = pd.concat( + [DataFrame(np.nan, index=df.index, columns=[0]), df.iloc[:, 0:-1]], + ignore_index=True, + axis=1, + ) + result = df.shift(1, axis="columns") + tm.assert_frame_equal(result, expected) + + def test_shift_bool(self): + df = DataFrame({"high": [True, False], "low": [False, False]}) + rs = df.shift(1) + xp = DataFrame( + np.array([[np.nan, np.nan], [True, False]], dtype=object), + columns=["high", "low"], + ) + tm.assert_frame_equal(rs, xp) + + def test_shift_categorical(self): + # GH#9416 + s1 = pd.Series(["a", "b", "c"], dtype="category") + s2 = pd.Series(["A", "B", "C"], dtype="category") + df = DataFrame({"one": s1, "two": s2}) + rs = df.shift(1) + xp = DataFrame({"one": s1.shift(1), "two": s2.shift(1)}) + tm.assert_frame_equal(rs, xp) + + def test_shift_fill_value(self): + # GH#24128 + df = DataFrame( + [1, 2, 3, 4, 5], index=date_range("1/1/2000", periods=5, freq="H") + ) + exp = DataFrame( + [0, 1, 2, 3, 4], index=date_range("1/1/2000", periods=5, freq="H") + ) + result = df.shift(1, fill_value=0) + tm.assert_frame_equal(result, exp) + + exp = DataFrame( + [0, 0, 1, 2, 3], index=date_range("1/1/2000", periods=5, freq="H") + ) + result = df.shift(2, fill_value=0) + tm.assert_frame_equal(result, exp) + + def test_shift_empty(self): + # Regression test for GH#8019 + df = DataFrame({"foo": []}) + rs = df.shift(-1) + + tm.assert_frame_equal(df, rs) + + def test_shift_duplicate_columns(self): + # GH#9092; verify that position-based shifting works + # in the presence of duplicate columns + column_lists = [list(range(5)), [1] * 5, [1, 1, 2, 2, 1]] + data = np.random.randn(20, 5) + + shifted = [] + for columns in column_lists: + df = pd.DataFrame(data.copy(), columns=columns) + for s in range(5): + df.iloc[:, s] = df.iloc[:, s].shift(s + 1) + df.columns = range(5) + shifted.append(df) + + # sanity check the base case + nulls = shifted[0].isna().sum() + tm.assert_series_equal(nulls, Series(range(1, 6), dtype="int64")) + + # check all answers are the same + tm.assert_frame_equal(shifted[0], shifted[1]) + tm.assert_frame_equal(shifted[0], shifted[2]) + + def test_tshift(self, datetime_frame): + # PeriodIndex + ps = tm.makePeriodFrame() + shifted = ps.tshift(1) + unshifted = shifted.tshift(-1) + + tm.assert_frame_equal(unshifted, ps) + + shifted2 = ps.tshift(freq="B") + tm.assert_frame_equal(shifted, shifted2) + + shifted3 = ps.tshift(freq=offsets.BDay()) + tm.assert_frame_equal(shifted, shifted3) + + with pytest.raises(ValueError, match="does not match"): + ps.tshift(freq="M") + + # DatetimeIndex + shifted = datetime_frame.tshift(1) + unshifted = shifted.tshift(-1) + + tm.assert_frame_equal(datetime_frame, unshifted) + + shifted2 = datetime_frame.tshift(freq=datetime_frame.index.freq) + tm.assert_frame_equal(shifted, shifted2) + + inferred_ts = DataFrame( + datetime_frame.values, + Index(np.asarray(datetime_frame.index)), + columns=datetime_frame.columns, + ) + shifted = inferred_ts.tshift(1) + unshifted = shifted.tshift(-1) + tm.assert_frame_equal(shifted, datetime_frame.tshift(1)) + tm.assert_frame_equal(unshifted, inferred_ts) + + no_freq = datetime_frame.iloc[[0, 5, 7], :] + msg = "Freq was not given and was not set in the index" + with pytest.raises(ValueError, match=msg): + no_freq.tshift() + + def test_shift_dt64values_int_fill_deprecated(self): + # GH#31971 + ser = pd.Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]) + df = ser.to_frame() + + with tm.assert_produces_warning(FutureWarning): + result = df.shift(1, fill_value=0) + + expected = pd.Series([pd.Timestamp(0), ser[0]]).to_frame() + tm.assert_frame_equal(result, expected) + + # axis = 1 + df2 = pd.DataFrame({"A": ser, "B": ser}) + df2._consolidate_inplace() + + with tm.assert_produces_warning(FutureWarning): + result = df2.shift(1, axis=1, fill_value=0) + + expected = pd.DataFrame( + {"A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"]} + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_sort_index.py b/pandas/tests/frame/methods/test_sort_index.py new file mode 100644 index 00000000..2c25e1f3 --- /dev/null +++ b/pandas/tests/frame/methods/test_sort_index.py @@ -0,0 +1,320 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import CategoricalDtype, DataFrame, IntervalIndex, MultiIndex, Series +import pandas._testing as tm + + +class TestDataFrameSortIndex: + def test_sort_index_nan(self): + # GH#3917 + + # Test DataFrame with nan label + df = DataFrame( + {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}, + index=[1, 2, 3, 4, 5, 6, np.nan], + ) + + # NaN label, ascending=True, na_position='last' + sorted_df = df.sort_index(kind="quicksort", ascending=True, na_position="last") + expected = DataFrame( + {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}, + index=[1, 2, 3, 4, 5, 6, np.nan], + ) + tm.assert_frame_equal(sorted_df, expected) + + # NaN label, ascending=True, na_position='first' + sorted_df = df.sort_index(na_position="first") + expected = DataFrame( + {"A": [4, 1, 2, np.nan, 1, 6, 8], "B": [5, 9, np.nan, 5, 2, 5, 4]}, + index=[np.nan, 1, 2, 3, 4, 5, 6], + ) + tm.assert_frame_equal(sorted_df, expected) + + # NaN label, ascending=False, na_position='last' + sorted_df = df.sort_index(kind="quicksort", ascending=False) + expected = DataFrame( + {"A": [8, 6, 1, np.nan, 2, 1, 4], "B": [4, 5, 2, 5, np.nan, 9, 5]}, + index=[6, 5, 4, 3, 2, 1, np.nan], + ) + tm.assert_frame_equal(sorted_df, expected) + + # NaN label, ascending=False, na_position='first' + sorted_df = df.sort_index( + kind="quicksort", ascending=False, na_position="first" + ) + expected = DataFrame( + {"A": [4, 8, 6, 1, np.nan, 2, 1], "B": [5, 4, 5, 2, 5, np.nan, 9]}, + index=[np.nan, 6, 5, 4, 3, 2, 1], + ) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_index_multi_index(self): + # GH#25775, testing that sorting by index works with a multi-index. + df = DataFrame( + {"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")} + ) + result = df.set_index(list("abc")).sort_index(level=list("ba")) + + expected = DataFrame( + {"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")} + ) + expected = expected.set_index(list("abc")) + + tm.assert_frame_equal(result, expected) + + def test_sort_index_inplace(self): + frame = DataFrame( + np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] + ) + + # axis=0 + unordered = frame.loc[[3, 2, 4, 1]] + a_id = id(unordered["A"]) + df = unordered.copy() + df.sort_index(inplace=True) + expected = frame + tm.assert_frame_equal(df, expected) + assert a_id != id(df["A"]) + + df = unordered.copy() + df.sort_index(ascending=False, inplace=True) + expected = frame[::-1] + tm.assert_frame_equal(df, expected) + + # axis=1 + unordered = frame.loc[:, ["D", "B", "C", "A"]] + df = unordered.copy() + df.sort_index(axis=1, inplace=True) + expected = frame + tm.assert_frame_equal(df, expected) + + df = unordered.copy() + df.sort_index(axis=1, ascending=False, inplace=True) + expected = frame.iloc[:, ::-1] + tm.assert_frame_equal(df, expected) + + def test_sort_index_different_sortorder(self): + A = np.arange(20).repeat(5) + B = np.tile(np.arange(5), 20) + + indexer = np.random.permutation(100) + A = A.take(indexer) + B = B.take(indexer) + + df = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) + + ex_indexer = np.lexsort((df.B.max() - df.B, df.A)) + expected = df.take(ex_indexer) + + # test with multiindex, too + idf = df.set_index(["A", "B"]) + + result = idf.sort_index(ascending=[1, 0]) + expected = idf.take(ex_indexer) + tm.assert_frame_equal(result, expected) + + # also, Series! + result = idf["C"].sort_index(ascending=[1, 0]) + tm.assert_series_equal(result, expected["C"]) + + def test_sort_index_level(self): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + df = DataFrame([[1, 2], [3, 4]], mi) + + result = df.sort_index(level="A", sort_remaining=False) + expected = df + tm.assert_frame_equal(result, expected) + + result = df.sort_index(level=["A", "B"], sort_remaining=False) + expected = df + tm.assert_frame_equal(result, expected) + + # Error thrown by sort_index when + # first index is sorted last (GH#26053) + result = df.sort_index(level=["C", "B", "A"]) + expected = df.iloc[[1, 0]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(level=["B", "C", "A"]) + expected = df.iloc[[1, 0]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(level=["C", "A"]) + expected = df.iloc[[1, 0]] + tm.assert_frame_equal(result, expected) + + def test_sort_index_categorical_index(self): + + df = DataFrame( + { + "A": np.arange(6, dtype="int64"), + "B": Series(list("aabbca")).astype(CategoricalDtype(list("cab"))), + } + ).set_index("B") + + result = df.sort_index() + expected = df.iloc[[4, 0, 1, 5, 2, 3]] + tm.assert_frame_equal(result, expected) + + result = df.sort_index(ascending=False) + expected = df.iloc[[2, 3, 0, 1, 5, 4]] + tm.assert_frame_equal(result, expected) + + def test_sort_index(self): + # GH#13496 + + frame = DataFrame( + np.arange(16).reshape(4, 4), + index=[1, 2, 3, 4], + columns=["A", "B", "C", "D"], + ) + + # axis=0 : sort rows by index labels + unordered = frame.loc[[3, 2, 4, 1]] + result = unordered.sort_index(axis=0) + expected = frame + tm.assert_frame_equal(result, expected) + + result = unordered.sort_index(ascending=False) + expected = frame[::-1] + tm.assert_frame_equal(result, expected) + + # axis=1 : sort columns by column names + unordered = frame.iloc[:, [2, 1, 3, 0]] + result = unordered.sort_index(axis=1) + tm.assert_frame_equal(result, frame) + + result = unordered.sort_index(axis=1, ascending=False) + expected = frame.iloc[:, ::-1] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("level", ["A", 0]) # GH#21052 + def test_sort_index_multiindex(self, level): + # GH#13496 + + # sort rows by specified level of multi-index + mi = MultiIndex.from_tuples( + [[2, 1, 3], [2, 1, 2], [1, 1, 1]], names=list("ABC") + ) + df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mi) + + expected_mi = MultiIndex.from_tuples( + [[1, 1, 1], [2, 1, 2], [2, 1, 3]], names=list("ABC") + ) + expected = pd.DataFrame([[5, 6], [3, 4], [1, 2]], index=expected_mi) + result = df.sort_index(level=level) + tm.assert_frame_equal(result, expected) + + # sort_remaining=False + expected_mi = MultiIndex.from_tuples( + [[1, 1, 1], [2, 1, 3], [2, 1, 2]], names=list("ABC") + ) + expected = pd.DataFrame([[5, 6], [1, 2], [3, 4]], index=expected_mi) + result = df.sort_index(level=level, sort_remaining=False) + tm.assert_frame_equal(result, expected) + + def test_sort_index_intervalindex(self): + # this is a de-facto sort via unstack + # confirming that we sort in the order of the bins + y = Series(np.random.randn(100)) + x1 = Series(np.sign(np.random.randn(100))) + x2 = pd.cut(Series(np.random.randn(100)), bins=[-3, -0.5, 0, 0.5, 3]) + model = pd.concat([y, x1, x2], axis=1, keys=["Y", "X1", "X2"]) + + result = model.groupby(["X1", "X2"], observed=True).mean().unstack() + expected = IntervalIndex.from_tuples( + [(-3.0, -0.5), (-0.5, 0.0), (0.0, 0.5), (0.5, 3.0)], closed="right" + ) + result = result.columns.levels[1].categories + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_dict, sorted_dict, ascending, ignore_index, output_index", + [ + ({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, False, [5, 3, 2]), + ({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, False, [2, 3, 5]), + ], + ) + def test_sort_index_ignore_index( + self, inplace, original_dict, sorted_dict, ascending, ignore_index, output_index + ): + # GH 30114 + original_index = [2, 5, 3] + df = DataFrame(original_dict, index=original_index) + expected_df = DataFrame(sorted_dict, index=output_index) + kwargs = { + "ascending": ascending, + "ignore_index": ignore_index, + "inplace": inplace, + } + + if inplace: + result_df = df.copy() + result_df.sort_index(**kwargs) + else: + result_df = df.sort_index(**kwargs) + + tm.assert_frame_equal(result_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict, index=original_index)) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_dict, sorted_dict, ascending, ignore_index, output_index", + [ + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [1, 2], "M2": [3, 4]}, + True, + True, + [0, 1], + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [2, 1], "M2": [4, 3]}, + False, + True, + [0, 1], + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [1, 2], "M2": [3, 4]}, + True, + False, + MultiIndex.from_tuples([[2, 1], [3, 4]], names=list("AB")), + ), + ( + {"M1": [1, 2], "M2": [3, 4]}, + {"M1": [2, 1], "M2": [4, 3]}, + False, + False, + MultiIndex.from_tuples([[3, 4], [2, 1]], names=list("AB")), + ), + ], + ) + def test_sort_index_ignore_index_multi_index( + self, inplace, original_dict, sorted_dict, ascending, ignore_index, output_index + ): + # GH 30114, this is to test ignore_index on MulitIndex of index + mi = MultiIndex.from_tuples([[2, 1], [3, 4]], names=list("AB")) + df = DataFrame(original_dict, index=mi) + expected_df = DataFrame(sorted_dict, index=output_index) + + kwargs = { + "ascending": ascending, + "ignore_index": ignore_index, + "inplace": inplace, + } + + if inplace: + result_df = df.copy() + result_df.sort_index(**kwargs) + else: + result_df = df.sort_index(**kwargs) + + tm.assert_frame_equal(result_df, expected_df) + tm.assert_frame_equal(df, DataFrame(original_dict, index=mi)) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py new file mode 100644 index 00000000..96f4d6ed --- /dev/null +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -0,0 +1,518 @@ +import random + +import numpy as np +import pytest + +import pandas as pd +from pandas import Categorical, DataFrame, NaT, Timestamp, date_range +import pandas._testing as tm + + +class TestDataFrameSortValues: + def test_sort_values(self): + frame = DataFrame( + [[1, 1, 2], [3, 1, 0], [4, 5, 6]], index=[1, 2, 3], columns=list("ABC") + ) + + # by column (axis=0) + sorted_df = frame.sort_values(by="A") + indexer = frame["A"].argsort().values + expected = frame.loc[frame.index[indexer]] + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by="A", ascending=False) + indexer = indexer[::-1] + expected = frame.loc[frame.index[indexer]] + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by="A", ascending=False) + tm.assert_frame_equal(sorted_df, expected) + + # GH4839 + sorted_df = frame.sort_values(by=["A"], ascending=[False]) + tm.assert_frame_equal(sorted_df, expected) + + # multiple bys + sorted_df = frame.sort_values(by=["B", "C"]) + expected = frame.loc[[2, 1, 3]] + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=["B", "C"], ascending=False) + tm.assert_frame_equal(sorted_df, expected[::-1]) + + sorted_df = frame.sort_values(by=["B", "A"], ascending=[True, False]) + tm.assert_frame_equal(sorted_df, expected) + + msg = "No axis named 2 for object type " + with pytest.raises(ValueError, match=msg): + frame.sort_values(by=["A", "B"], axis=2, inplace=True) + + # by row (axis=1): GH#10806 + sorted_df = frame.sort_values(by=3, axis=1) + expected = frame + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=3, axis=1, ascending=False) + expected = frame.reindex(columns=["C", "B", "A"]) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=[1, 2], axis="columns") + expected = frame.reindex(columns=["B", "A", "C"]) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=[True, False]) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.sort_values(by=[1, 3], axis=1, ascending=False) + expected = frame.reindex(columns=["C", "B", "A"]) + tm.assert_frame_equal(sorted_df, expected) + + msg = r"Length of ascending \(5\) != length of by \(2\)" + with pytest.raises(ValueError, match=msg): + frame.sort_values(by=["A", "B"], axis=0, ascending=[True] * 5) + + def test_sort_values_inplace(self): + frame = DataFrame( + np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] + ) + + sorted_df = frame.copy() + sorted_df.sort_values(by="A", inplace=True) + expected = frame.sort_values(by="A") + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + sorted_df.sort_values(by=1, axis=1, inplace=True) + expected = frame.sort_values(by=1, axis=1) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + sorted_df.sort_values(by="A", ascending=False, inplace=True) + expected = frame.sort_values(by="A", ascending=False) + tm.assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + sorted_df.sort_values(by=["A", "B"], ascending=False, inplace=True) + expected = frame.sort_values(by=["A", "B"], ascending=False) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_multicolumn(self): + A = np.arange(5).repeat(20) + B = np.tile(np.arange(5), 20) + random.shuffle(A) + random.shuffle(B) + frame = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) + + result = frame.sort_values(by=["A", "B"]) + indexer = np.lexsort((frame["B"], frame["A"])) + expected = frame.take(indexer) + tm.assert_frame_equal(result, expected) + + result = frame.sort_values(by=["A", "B"], ascending=False) + indexer = np.lexsort( + (frame["B"].rank(ascending=False), frame["A"].rank(ascending=False)) + ) + expected = frame.take(indexer) + tm.assert_frame_equal(result, expected) + + result = frame.sort_values(by=["B", "A"]) + indexer = np.lexsort((frame["A"], frame["B"])) + expected = frame.take(indexer) + tm.assert_frame_equal(result, expected) + + def test_sort_values_multicolumn_uint64(self): + # GH#9918 + # uint64 multicolumn sort + + df = pd.DataFrame( + { + "a": pd.Series([18446637057563306014, 1162265347240853609]), + "b": pd.Series([1, 2]), + } + ) + df["a"] = df["a"].astype(np.uint64) + result = df.sort_values(["a", "b"]) + + expected = pd.DataFrame( + { + "a": pd.Series([18446637057563306014, 1162265347240853609]), + "b": pd.Series([1, 2]), + }, + index=pd.Index([1, 0]), + ) + + tm.assert_frame_equal(result, expected) + + def test_sort_values_nan(self): + # GH#3917 + df = DataFrame( + {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]} + ) + + # sort one column only + expected = DataFrame( + {"A": [np.nan, 1, 1, 2, 4, 6, 8], "B": [5, 9, 2, np.nan, 5, 5, 4]}, + index=[2, 0, 3, 1, 6, 4, 5], + ) + sorted_df = df.sort_values(["A"], na_position="first") + tm.assert_frame_equal(sorted_df, expected) + + expected = DataFrame( + {"A": [np.nan, 8, 6, 4, 2, 1, 1], "B": [5, 4, 5, 5, np.nan, 9, 2]}, + index=[2, 5, 4, 6, 1, 0, 3], + ) + sorted_df = df.sort_values(["A"], na_position="first", ascending=False) + tm.assert_frame_equal(sorted_df, expected) + + expected = df.reindex(columns=["B", "A"]) + sorted_df = df.sort_values(by=1, axis=1, na_position="first") + tm.assert_frame_equal(sorted_df, expected) + + # na_position='last', order + expected = DataFrame( + {"A": [1, 1, 2, 4, 6, 8, np.nan], "B": [2, 9, np.nan, 5, 5, 4, 5]}, + index=[3, 0, 1, 6, 4, 5, 2], + ) + sorted_df = df.sort_values(["A", "B"]) + tm.assert_frame_equal(sorted_df, expected) + + # na_position='first', order + expected = DataFrame( + {"A": [np.nan, 1, 1, 2, 4, 6, 8], "B": [5, 2, 9, np.nan, 5, 5, 4]}, + index=[2, 3, 0, 1, 6, 4, 5], + ) + sorted_df = df.sort_values(["A", "B"], na_position="first") + tm.assert_frame_equal(sorted_df, expected) + + # na_position='first', not order + expected = DataFrame( + {"A": [np.nan, 1, 1, 2, 4, 6, 8], "B": [5, 9, 2, np.nan, 5, 5, 4]}, + index=[2, 0, 3, 1, 6, 4, 5], + ) + sorted_df = df.sort_values(["A", "B"], ascending=[1, 0], na_position="first") + tm.assert_frame_equal(sorted_df, expected) + + # na_position='last', not order + expected = DataFrame( + {"A": [8, 6, 4, 2, 1, 1, np.nan], "B": [4, 5, 5, np.nan, 2, 9, 5]}, + index=[5, 4, 6, 1, 3, 0, 2], + ) + sorted_df = df.sort_values(["A", "B"], ascending=[0, 1], na_position="last") + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_stable_descending_sort(self): + # GH#6399 + df = DataFrame( + [[2, "first"], [2, "second"], [1, "a"], [1, "b"]], + columns=["sort_col", "order"], + ) + sorted_df = df.sort_values(by="sort_col", kind="mergesort", ascending=False) + tm.assert_frame_equal(df, sorted_df) + + def test_sort_values_stable_descending_multicolumn_sort(self): + df = DataFrame( + {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]} + ) + # test stable mergesort + expected = DataFrame( + {"A": [np.nan, 8, 6, 4, 2, 1, 1], "B": [5, 4, 5, 5, np.nan, 2, 9]}, + index=[2, 5, 4, 6, 1, 3, 0], + ) + sorted_df = df.sort_values( + ["A", "B"], ascending=[0, 1], na_position="first", kind="mergesort" + ) + tm.assert_frame_equal(sorted_df, expected) + + expected = DataFrame( + {"A": [np.nan, 8, 6, 4, 2, 1, 1], "B": [5, 4, 5, 5, np.nan, 9, 2]}, + index=[2, 5, 4, 6, 1, 0, 3], + ) + sorted_df = df.sort_values( + ["A", "B"], ascending=[0, 0], na_position="first", kind="mergesort" + ) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_stable_categorial(self): + # GH#16793 + df = DataFrame({"x": pd.Categorical(np.repeat([1, 2, 3, 4], 5), ordered=True)}) + expected = df.copy() + sorted_df = df.sort_values("x", kind="mergesort") + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_datetimes(self): + + # GH#3461, argsort / lexsort differences for a datetime column + df = DataFrame( + ["a", "a", "a", "b", "c", "d", "e", "f", "g"], + columns=["A"], + index=date_range("20130101", periods=9), + ) + dts = [ + Timestamp(x) + for x in [ + "2004-02-11", + "2004-01-21", + "2004-01-26", + "2005-09-20", + "2010-10-04", + "2009-05-12", + "2008-11-12", + "2010-09-28", + "2010-09-28", + ] + ] + df["B"] = dts[::2] + dts[1::2] + df["C"] = 2.0 + df["A1"] = 3.0 + + df1 = df.sort_values(by="A") + df2 = df.sort_values(by=["A"]) + tm.assert_frame_equal(df1, df2) + + df1 = df.sort_values(by="B") + df2 = df.sort_values(by=["B"]) + tm.assert_frame_equal(df1, df2) + + df1 = df.sort_values(by="B") + + df2 = df.sort_values(by=["C", "B"]) + tm.assert_frame_equal(df1, df2) + + def test_sort_values_frame_column_inplace_sort_exception(self, float_frame): + s = float_frame["A"] + with pytest.raises(ValueError, match="This Series is a view"): + s.sort_values(inplace=True) + + cp = s.copy() + cp.sort_values() # it works! + + def test_sort_values_nat_values_in_int_column(self): + + # GH#14922: "sorting with large float and multiple columns incorrect" + + # cause was that the int64 value NaT was considered as "na". Which is + # only correct for datetime64 columns. + + int_values = (2, int(NaT)) + float_values = (2.0, -1.797693e308) + + df = DataFrame( + dict(int=int_values, float=float_values), columns=["int", "float"] + ) + + df_reversed = DataFrame( + dict(int=int_values[::-1], float=float_values[::-1]), + columns=["int", "float"], + index=[1, 0], + ) + + # NaT is not a "na" for int64 columns, so na_position must not + # influence the result: + df_sorted = df.sort_values(["int", "float"], na_position="last") + tm.assert_frame_equal(df_sorted, df_reversed) + + df_sorted = df.sort_values(["int", "float"], na_position="first") + tm.assert_frame_equal(df_sorted, df_reversed) + + # reverse sorting order + df_sorted = df.sort_values(["int", "float"], ascending=False) + tm.assert_frame_equal(df_sorted, df) + + # and now check if NaT is still considered as "na" for datetime64 + # columns: + df = DataFrame( + dict(datetime=[Timestamp("2016-01-01"), NaT], float=float_values), + columns=["datetime", "float"], + ) + + df_reversed = DataFrame( + dict(datetime=[NaT, Timestamp("2016-01-01")], float=float_values[::-1]), + columns=["datetime", "float"], + index=[1, 0], + ) + + df_sorted = df.sort_values(["datetime", "float"], na_position="first") + tm.assert_frame_equal(df_sorted, df_reversed) + + df_sorted = df.sort_values(["datetime", "float"], na_position="last") + tm.assert_frame_equal(df_sorted, df) + + # Ascending should not affect the results. + df_sorted = df.sort_values(["datetime", "float"], ascending=False) + tm.assert_frame_equal(df_sorted, df) + + def test_sort_values_na_position_with_categories(self): + # GH#22556 + # Positioning missing value properly when column is Categorical. + categories = ["A", "B", "C"] + category_indices = [0, 2, 4] + list_of_nans = [np.nan, np.nan] + na_indices = [1, 3] + na_position_first = "first" + na_position_last = "last" + column_name = "c" + + reversed_categories = sorted(categories, reverse=True) + reversed_category_indices = sorted(category_indices, reverse=True) + reversed_na_indices = sorted(na_indices) + + df = pd.DataFrame( + { + column_name: pd.Categorical( + ["A", np.nan, "B", np.nan, "C"], categories=categories, ordered=True + ) + } + ) + # sort ascending with na first + result = df.sort_values( + by=column_name, ascending=True, na_position=na_position_first + ) + expected = DataFrame( + { + column_name: Categorical( + list_of_nans + categories, categories=categories, ordered=True + ) + }, + index=na_indices + category_indices, + ) + + tm.assert_frame_equal(result, expected) + + # sort ascending with na last + result = df.sort_values( + by=column_name, ascending=True, na_position=na_position_last + ) + expected = DataFrame( + { + column_name: Categorical( + categories + list_of_nans, categories=categories, ordered=True + ) + }, + index=category_indices + na_indices, + ) + + tm.assert_frame_equal(result, expected) + + # sort descending with na first + result = df.sort_values( + by=column_name, ascending=False, na_position=na_position_first + ) + expected = DataFrame( + { + column_name: Categorical( + list_of_nans + reversed_categories, + categories=categories, + ordered=True, + ) + }, + index=reversed_na_indices + reversed_category_indices, + ) + + tm.assert_frame_equal(result, expected) + + # sort descending with na last + result = df.sort_values( + by=column_name, ascending=False, na_position=na_position_last + ) + expected = DataFrame( + { + column_name: Categorical( + reversed_categories + list_of_nans, + categories=categories, + ordered=True, + ) + }, + index=reversed_category_indices + reversed_na_indices, + ) + + tm.assert_frame_equal(result, expected) + + def test_sort_values_nat(self): + + # GH#16836 + + d1 = [Timestamp(x) for x in ["2016-01-01", "2015-01-01", np.nan, "2016-01-01"]] + d2 = [ + Timestamp(x) + for x in ["2017-01-01", "2014-01-01", "2016-01-01", "2015-01-01"] + ] + df = pd.DataFrame({"a": d1, "b": d2}, index=[0, 1, 2, 3]) + + d3 = [Timestamp(x) for x in ["2015-01-01", "2016-01-01", "2016-01-01", np.nan]] + d4 = [ + Timestamp(x) + for x in ["2014-01-01", "2015-01-01", "2017-01-01", "2016-01-01"] + ] + expected = pd.DataFrame({"a": d3, "b": d4}, index=[1, 3, 0, 2]) + sorted_df = df.sort_values(by=["a", "b"]) + tm.assert_frame_equal(sorted_df, expected) + + def test_sort_values_na_position_with_categories_raises(self): + df = pd.DataFrame( + { + "c": pd.Categorical( + ["A", np.nan, "B", np.nan, "C"], + categories=["A", "B", "C"], + ordered=True, + ) + } + ) + + with pytest.raises(ValueError): + df.sort_values(by="c", ascending=False, na_position="bad_position") + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_dict, sorted_dict, ignore_index, output_index", + [ + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, True, [0, 1, 2]), + ({"A": [1, 2, 3]}, {"A": [3, 2, 1]}, False, [2, 1, 0]), + ( + {"A": [1, 2, 3], "B": [2, 3, 4]}, + {"A": [3, 2, 1], "B": [4, 3, 2]}, + True, + [0, 1, 2], + ), + ( + {"A": [1, 2, 3], "B": [2, 3, 4]}, + {"A": [3, 2, 1], "B": [4, 3, 2]}, + False, + [2, 1, 0], + ), + ], + ) + def test_sort_values_ignore_index( + self, inplace, original_dict, sorted_dict, ignore_index, output_index + ): + # GH 30114 + df = DataFrame(original_dict) + expected = DataFrame(sorted_dict, index=output_index) + kwargs = {"ignore_index": ignore_index, "inplace": inplace} + + if inplace: + result_df = df.copy() + result_df.sort_values("A", ascending=False, **kwargs) + else: + result_df = df.sort_values("A", ascending=False, **kwargs) + + tm.assert_frame_equal(result_df, expected) + tm.assert_frame_equal(df, DataFrame(original_dict)) + + def test_sort_values_nat_na_position_default(self): + # GH 13230 + expected = pd.DataFrame( + { + "A": [1, 2, 3, 4, 4], + "date": pd.DatetimeIndex( + [ + "2010-01-01 09:00:00", + "2010-01-01 09:00:01", + "2010-01-01 09:00:02", + "2010-01-01 09:00:03", + "NaT", + ] + ), + } + ) + result = expected.sort_values(["A", "date"]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py new file mode 100644 index 00000000..7b0adceb --- /dev/null +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -0,0 +1,258 @@ +from collections import OrderedDict, defaultdict +from datetime import datetime + +import numpy as np +import pytest +import pytz + +from pandas import DataFrame, Series, Timestamp +import pandas._testing as tm + + +class TestDataFrameToDict: + def test_to_dict_timestamp(self): + + # GH#11247 + # split/records producing np.datetime64 rather than Timestamps + # on datetime64[ns] dtypes only + + tsmp = Timestamp("20130101") + test_data = DataFrame({"A": [tsmp, tsmp], "B": [tsmp, tsmp]}) + test_data_mixed = DataFrame({"A": [tsmp, tsmp], "B": [1, 2]}) + + expected_records = [{"A": tsmp, "B": tsmp}, {"A": tsmp, "B": tsmp}] + expected_records_mixed = [{"A": tsmp, "B": 1}, {"A": tsmp, "B": 2}] + + assert test_data.to_dict(orient="records") == expected_records + assert test_data_mixed.to_dict(orient="records") == expected_records_mixed + + expected_series = { + "A": Series([tsmp, tsmp], name="A"), + "B": Series([tsmp, tsmp], name="B"), + } + expected_series_mixed = { + "A": Series([tsmp, tsmp], name="A"), + "B": Series([1, 2], name="B"), + } + + tm.assert_dict_equal(test_data.to_dict(orient="series"), expected_series) + tm.assert_dict_equal( + test_data_mixed.to_dict(orient="series"), expected_series_mixed + ) + + expected_split = { + "index": [0, 1], + "data": [[tsmp, tsmp], [tsmp, tsmp]], + "columns": ["A", "B"], + } + expected_split_mixed = { + "index": [0, 1], + "data": [[tsmp, 1], [tsmp, 2]], + "columns": ["A", "B"], + } + + tm.assert_dict_equal(test_data.to_dict(orient="split"), expected_split) + tm.assert_dict_equal( + test_data_mixed.to_dict(orient="split"), expected_split_mixed + ) + + def test_to_dict_index_not_unique_with_index_orient(self): + # GH#22801 + # Data loss when indexes are not unique. Raise ValueError. + df = DataFrame({"a": [1, 2], "b": [0.5, 0.75]}, index=["A", "A"]) + msg = "DataFrame index must be unique for orient='index'" + with pytest.raises(ValueError, match=msg): + df.to_dict(orient="index") + + def test_to_dict_invalid_orient(self): + df = DataFrame({"A": [0, 1]}) + msg = "orient 'xinvalid' not understood" + with pytest.raises(ValueError, match=msg): + df.to_dict(orient="xinvalid") + + @pytest.mark.parametrize("mapping", [dict, defaultdict(list), OrderedDict]) + def test_to_dict(self, mapping): + test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} + + # GH#16122 + recons_data = DataFrame(test_data).to_dict(into=mapping) + + for k, v in test_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k][k2] + + recons_data = DataFrame(test_data).to_dict("l", mapping) + + for k, v in test_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k][int(k2) - 1] + + recons_data = DataFrame(test_data).to_dict("s", mapping) + + for k, v in test_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k][k2] + + recons_data = DataFrame(test_data).to_dict("sp", mapping) + expected_split = { + "columns": ["A", "B"], + "index": ["1", "2", "3"], + "data": [[1.0, "1"], [2.0, "2"], [np.nan, "3"]], + } + tm.assert_dict_equal(recons_data, expected_split) + + recons_data = DataFrame(test_data).to_dict("r", mapping) + expected_records = [ + {"A": 1.0, "B": "1"}, + {"A": 2.0, "B": "2"}, + {"A": np.nan, "B": "3"}, + ] + assert isinstance(recons_data, list) + assert len(recons_data) == 3 + for l, r in zip(recons_data, expected_records): + tm.assert_dict_equal(l, r) + + # GH#10844 + recons_data = DataFrame(test_data).to_dict("i") + + for k, v in test_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k2][k] + + df = DataFrame(test_data) + df["duped"] = df[df.columns[0]] + recons_data = df.to_dict("i") + comp_data = test_data.copy() + comp_data["duped"] = comp_data[df.columns[0]] + for k, v in comp_data.items(): + for k2, v2 in v.items(): + assert v2 == recons_data[k2][k] + + @pytest.mark.parametrize("mapping", [list, defaultdict, []]) + def test_to_dict_errors(self, mapping): + # GH#16122 + df = DataFrame(np.random.randn(3, 3)) + with pytest.raises(TypeError): + df.to_dict(into=mapping) + + def test_to_dict_not_unique_warning(self): + # GH#16927: When converting to a dict, if a column has a non-unique name + # it will be dropped, throwing a warning. + df = DataFrame([[1, 2, 3]], columns=["a", "a", "b"]) + with tm.assert_produces_warning(UserWarning): + df.to_dict() + + # orient - orient argument to to_dict function + # item_getter - function for extracting value from + # the resulting dict using column name and index + @pytest.mark.parametrize( + "orient,item_getter", + [ + ("dict", lambda d, col, idx: d[col][idx]), + ("records", lambda d, col, idx: d[idx][col]), + ("list", lambda d, col, idx: d[col][idx]), + ("split", lambda d, col, idx: d["data"][idx][d["columns"].index(col)]), + ("index", lambda d, col, idx: d[idx][col]), + ], + ) + def test_to_dict_box_scalars(self, orient, item_getter): + # GH#14216, GH#23753 + # make sure that we are boxing properly + df = DataFrame({"a": [1, 2], "b": [0.1, 0.2]}) + result = df.to_dict(orient=orient) + assert isinstance(item_getter(result, "a", 0), int) + assert isinstance(item_getter(result, "b", 0), float) + + def test_to_dict_tz(self): + # GH#18372 When converting to dict with orient='records' columns of + # datetime that are tz-aware were not converted to required arrays + data = [ + (datetime(2017, 11, 18, 21, 53, 0, 219225, tzinfo=pytz.utc),), + (datetime(2017, 11, 18, 22, 6, 30, 61810, tzinfo=pytz.utc),), + ] + df = DataFrame(list(data), columns=["d"]) + + result = df.to_dict(orient="records") + expected = [ + {"d": Timestamp("2017-11-18 21:53:00.219225+0000", tz=pytz.utc)}, + {"d": Timestamp("2017-11-18 22:06:30.061810+0000", tz=pytz.utc)}, + ] + tm.assert_dict_equal(result[0], expected[0]) + tm.assert_dict_equal(result[1], expected[1]) + + @pytest.mark.parametrize( + "into, expected", + [ + ( + dict, + { + 0: {"int_col": 1, "float_col": 1.0}, + 1: {"int_col": 2, "float_col": 2.0}, + 2: {"int_col": 3, "float_col": 3.0}, + }, + ), + ( + OrderedDict, + OrderedDict( + [ + (0, {"int_col": 1, "float_col": 1.0}), + (1, {"int_col": 2, "float_col": 2.0}), + (2, {"int_col": 3, "float_col": 3.0}), + ] + ), + ), + ( + defaultdict(dict), + defaultdict( + dict, + { + 0: {"int_col": 1, "float_col": 1.0}, + 1: {"int_col": 2, "float_col": 2.0}, + 2: {"int_col": 3, "float_col": 3.0}, + }, + ), + ), + ], + ) + def test_to_dict_index_dtypes(self, into, expected): + # GH#18580 + # When using to_dict(orient='index') on a dataframe with int + # and float columns only the int columns were cast to float + + df = DataFrame({"int_col": [1, 2, 3], "float_col": [1.0, 2.0, 3.0]}) + + result = df.to_dict(orient="index", into=into) + cols = ["int_col", "float_col"] + result = DataFrame.from_dict(result, orient="index")[cols] + expected = DataFrame.from_dict(expected, orient="index")[cols] + tm.assert_frame_equal(result, expected) + + def test_to_dict_numeric_names(self): + # GH#24940 + df = DataFrame({str(i): [i] for i in range(5)}) + result = set(df.to_dict("records")[0].keys()) + expected = set(df.columns) + assert result == expected + + def test_to_dict_wide(self): + # GH#24939 + df = DataFrame({("A_{:d}".format(i)): [i] for i in range(256)}) + result = df.to_dict("records")[0] + expected = {"A_{:d}".format(i): i for i in range(256)} + assert result == expected + + def test_to_dict_orient_dtype(self): + # GH#22620 + # Input Data + input_data = {"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["X", "Y", "Z"]} + df = DataFrame(input_data) + # Expected Dtypes + expected = {"a": int, "b": float, "c": str} + # Extracting dtypes out of to_dict operation + for df_dict in df.to_dict("records"): + result = { + "a": type(df_dict["a"]), + "b": type(df_dict["b"]), + "c": type(df_dict["c"]), + } + assert result == expected diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py new file mode 100644 index 00000000..d0181f03 --- /dev/null +++ b/pandas/tests/frame/methods/test_to_records.py @@ -0,0 +1,360 @@ +from collections import abc + +import numpy as np +import pytest + +from pandas import CategoricalDtype, DataFrame, MultiIndex, Series, date_range +import pandas._testing as tm + + +class TestDataFrameToRecords: + def test_to_records_dt64(self): + df = DataFrame( + [["one", "two", "three"], ["four", "five", "six"]], + index=date_range("2012-01-01", "2012-01-02"), + ) + + expected = df.index.values[0] + result = df.to_records()["index"][0] + assert expected == result + + def test_to_records_with_multindex(self): + # GH#3189 + index = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + data = np.zeros((8, 4)) + df = DataFrame(data, index=index) + r = df.to_records(index=True)["level_0"] + assert "bar" in r + assert "one" not in r + + def test_to_records_with_Mapping_type(self): + import email + from email.parser import Parser + + abc.Mapping.register(email.message.Message) + + headers = Parser().parsestr( + "From: \n" + "To: \n" + "Subject: Test message\n" + "\n" + "Body would go here\n" + ) + + frame = DataFrame.from_records([headers]) + all(x in frame for x in ["Type", "Subject", "From"]) + + def test_to_records_floats(self): + df = DataFrame(np.random.rand(10, 10)) + df.to_records() + + def test_to_records_index_name(self): + df = DataFrame(np.random.randn(3, 3)) + df.index.name = "X" + rs = df.to_records() + assert "X" in rs.dtype.fields + + df = DataFrame(np.random.randn(3, 3)) + rs = df.to_records() + assert "index" in rs.dtype.fields + + df.index = MultiIndex.from_tuples([("a", "x"), ("a", "y"), ("b", "z")]) + df.index.names = ["A", None] + rs = df.to_records() + assert "level_0" in rs.dtype.fields + + def test_to_records_with_unicode_index(self): + # GH#13172 + # unicode_literals conflict with to_records + result = DataFrame([{"a": "x", "b": "y"}]).set_index("a").to_records() + expected = np.rec.array([("x", "y")], dtype=[("a", "O"), ("b", "O")]) + tm.assert_almost_equal(result, expected) + + def test_to_records_with_unicode_column_names(self): + # xref issue: https://github.com/numpy/numpy/issues/2407 + # Issue GH#11879. to_records used to raise an exception when used + # with column names containing non-ascii characters in Python 2 + result = DataFrame(data={"accented_name_é": [1.0]}).to_records() + + # Note that numpy allows for unicode field names but dtypes need + # to be specified using dictionary instead of list of tuples. + expected = np.rec.array( + [(0, 1.0)], + dtype={"names": ["index", "accented_name_é"], "formats": ["=i8", "=f8"]}, + ) + tm.assert_almost_equal(result, expected) + + def test_to_records_with_categorical(self): + # GH#8626 + + # dict creation + df = DataFrame({"A": list("abc")}, dtype="category") + expected = Series(list("abc"), dtype="category", name="A") + tm.assert_series_equal(df["A"], expected) + + # list-like creation + df = DataFrame(list("abc"), dtype="category") + expected = Series(list("abc"), dtype="category", name=0) + tm.assert_series_equal(df[0], expected) + + # to record array + # this coerces + result = df.to_records() + expected = np.rec.array( + [(0, "a"), (1, "b"), (2, "c")], dtype=[("index", "=i8"), ("0", "O")] + ) + tm.assert_almost_equal(result, expected) + + @pytest.mark.parametrize( + "kwargs,expected", + [ + # No dtypes --> default to array dtypes. + ( + dict(), + np.rec.array( + [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")], + dtype=[("index", " bool: + return key in self.d + + def keys(self): + return self.d.keys() + + df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]}) + + dtype_mappings = dict( + column_dtypes=DictLike(**{"A": np.int8, "B": np.float32}), + index_dtypes="= 1)] + result = df2.set_index("key") + tm.assert_frame_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # Add list-of-list constructor because list is ambiguous -> lambda + # also test index name if append=True (name is duplicate here for B) + @pytest.mark.parametrize( + "box", + [ + Series, + Index, + np.array, + list, + lambda x: [list(x)], + lambda x: MultiIndex.from_arrays([x]), + ], + ) + @pytest.mark.parametrize( + "append, index_name", [(True, None), (True, "B"), (True, "test"), (False, None)] + ) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_pass_single_array( + self, frame_of_index_cols, drop, append, index_name, box + ): + df = frame_of_index_cols + df.index.name = index_name + + key = box(df["B"]) + if box == list: + # list of strings gets interpreted as list of keys + msg = "['one', 'two', 'three', 'one', 'two']" + with pytest.raises(KeyError, match=msg): + df.set_index(key, drop=drop, append=append) + else: + # np.array/list-of-list "forget" the name of B + name_mi = getattr(key, "names", None) + name = [getattr(key, "name", None)] if name_mi is None else name_mi + + result = df.set_index(key, drop=drop, append=append) + + # only valid column keys are dropped + # since B is always passed as array above, nothing is dropped + expected = df.set_index(["B"], drop=False, append=append) + expected.index.names = [index_name] + name if append else name + + tm.assert_frame_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # also test index name if append=True (name is duplicate here for A & B) + @pytest.mark.parametrize( + "box", [Series, Index, np.array, list, lambda x: MultiIndex.from_arrays([x])] + ) + @pytest.mark.parametrize( + "append, index_name", + [(True, None), (True, "A"), (True, "B"), (True, "test"), (False, None)], + ) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_pass_arrays( + self, frame_of_index_cols, drop, append, index_name, box + ): + df = frame_of_index_cols + df.index.name = index_name + + keys = ["A", box(df["B"])] + # np.array/list "forget" the name of B + names = ["A", None if box in [np.array, list, tuple, iter] else "B"] + + result = df.set_index(keys, drop=drop, append=append) + + # only valid column keys are dropped + # since B is always passed as array above, only A is dropped, if at all + expected = df.set_index(["A", "B"], drop=False, append=append) + expected = expected.drop("A", axis=1) if drop else expected + expected.index.names = [index_name] + names if append else names + + tm.assert_frame_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # We also emulate a "constructor" for the label -> lambda + # also test index name if append=True (name is duplicate here for A) + @pytest.mark.parametrize( + "box2", + [ + Series, + Index, + np.array, + list, + iter, + lambda x: MultiIndex.from_arrays([x]), + lambda x: x.name, + ], + ) + @pytest.mark.parametrize( + "box1", + [ + Series, + Index, + np.array, + list, + iter, + lambda x: MultiIndex.from_arrays([x]), + lambda x: x.name, + ], + ) + @pytest.mark.parametrize( + "append, index_name", [(True, None), (True, "A"), (True, "test"), (False, None)] + ) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_pass_arrays_duplicate( + self, frame_of_index_cols, drop, append, index_name, box1, box2 + ): + df = frame_of_index_cols + df.index.name = index_name + + keys = [box1(df["A"]), box2(df["A"])] + result = df.set_index(keys, drop=drop, append=append) + + # if either box is iter, it has been consumed; re-read + keys = [box1(df["A"]), box2(df["A"])] + + # need to adapt first drop for case that both keys are 'A' -- + # cannot drop the same column twice; + # use "is" because == would give ambiguous Boolean error for containers + first_drop = ( + False if (keys[0] is "A" and keys[1] is "A") else drop # noqa: F632 + ) + # to test against already-tested behaviour, we add sequentially, + # hence second append always True; must wrap keys in list, otherwise + # box = list would be interpreted as keys + expected = df.set_index([keys[0]], drop=first_drop, append=append) + expected = expected.set_index([keys[1]], drop=drop, append=True) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("append", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_pass_multiindex(self, frame_of_index_cols, drop, append): + df = frame_of_index_cols + keys = MultiIndex.from_arrays([df["A"], df["B"]], names=["A", "B"]) + + result = df.set_index(keys, drop=drop, append=append) + + # setting with a MultiIndex will never drop columns + expected = df.set_index(["A", "B"], drop=False, append=append) + + tm.assert_frame_equal(result, expected) + + def test_set_index_verify_integrity(self, frame_of_index_cols): + df = frame_of_index_cols + + with pytest.raises(ValueError, match="Index has duplicate keys"): + df.set_index("A", verify_integrity=True) + # with MultiIndex + with pytest.raises(ValueError, match="Index has duplicate keys"): + df.set_index([df["A"], df["A"]], verify_integrity=True) + + @pytest.mark.parametrize("append", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_raise_keys(self, frame_of_index_cols, drop, append): + df = frame_of_index_cols + + with pytest.raises(KeyError, match="['foo', 'bar', 'baz']"): + # column names are A-E, as well as one tuple + df.set_index(["foo", "bar", "baz"], drop=drop, append=append) + + # non-existent key in list with arrays + with pytest.raises(KeyError, match="X"): + df.set_index([df["A"], df["B"], "X"], drop=drop, append=append) + + msg = "[('foo', 'foo', 'foo', 'bar', 'bar')]" + # tuples always raise KeyError + with pytest.raises(KeyError, match=msg): + df.set_index(tuple(df["A"]), drop=drop, append=append) + + # also within a list + with pytest.raises(KeyError, match=msg): + df.set_index(["A", df["A"], tuple(df["A"])], drop=drop, append=append) + + @pytest.mark.parametrize("append", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + @pytest.mark.parametrize("box", [set], ids=["set"]) + def test_set_index_raise_on_type(self, frame_of_index_cols, box, drop, append): + df = frame_of_index_cols + + msg = 'The parameter "keys" may be a column key, .*' + # forbidden type, e.g. set + with pytest.raises(TypeError, match=msg): + df.set_index(box(df["A"]), drop=drop, append=append) + + # forbidden type in list, e.g. set + with pytest.raises(TypeError, match=msg): + df.set_index(["A", df["A"], box(df["A"])], drop=drop, append=append) + + # MultiIndex constructor does not work directly on Series -> lambda + @pytest.mark.parametrize( + "box", + [Series, Index, np.array, iter, lambda x: MultiIndex.from_arrays([x])], + ids=["Series", "Index", "np.array", "iter", "MultiIndex"], + ) + @pytest.mark.parametrize("length", [4, 6], ids=["too_short", "too_long"]) + @pytest.mark.parametrize("append", [True, False]) + @pytest.mark.parametrize("drop", [True, False]) + def test_set_index_raise_on_len( + self, frame_of_index_cols, box, length, drop, append + ): + # GH 24984 + df = frame_of_index_cols # has length 5 + + values = np.random.randint(0, 10, (length,)) + + msg = "Length mismatch: Expected 5 rows, received array of length.*" + + # wrong length directly + with pytest.raises(ValueError, match=msg): + df.set_index(box(values), drop=drop, append=append) + + # wrong length in list + with pytest.raises(ValueError, match=msg): + df.set_index(["A", df.A, box(values)], drop=drop, append=append) + + def test_set_index_custom_label_type(self): + # GH 24969 + + class Thing: + def __init__(self, name, color): + self.name = name + self.color = color + + def __str__(self) -> str: + return f"" + + # necessary for pretty KeyError + __repr__ = __str__ + + thing1 = Thing("One", "red") + thing2 = Thing("Two", "blue") + df = DataFrame({thing1: [0, 1], thing2: [2, 3]}) + expected = DataFrame({thing1: [0, 1]}, index=Index([2, 3], name=thing2)) + + # use custom label directly + result = df.set_index(thing2) + tm.assert_frame_equal(result, expected) + + # custom label wrapped in list + result = df.set_index([thing2]) + tm.assert_frame_equal(result, expected) + + # missing key + thing3 = Thing("Three", "pink") + msg = "" + with pytest.raises(KeyError, match=msg): + # missing label directly + df.set_index(thing3) + + with pytest.raises(KeyError, match=msg): + # missing label in list + df.set_index([thing3]) + + def test_set_index_custom_label_hashable_iterable(self): + # GH 24969 + + # actual example discussed in GH 24984 was e.g. for shapely.geometry + # objects (e.g. a collection of Points) that can be both hashable and + # iterable; using frozenset as a stand-in for testing here + + class Thing(frozenset): + # need to stabilize repr for KeyError (due to random order in sets) + def __repr__(self) -> str: + tmp = sorted(self) + # double curly brace prints one brace in format string + return "frozenset({{{}}})".format(", ".join(map(repr, tmp))) + + thing1 = Thing(["One", "red"]) + thing2 = Thing(["Two", "blue"]) + df = DataFrame({thing1: [0, 1], thing2: [2, 3]}) + expected = DataFrame({thing1: [0, 1]}, index=Index([2, 3], name=thing2)) + + # use custom label directly + result = df.set_index(thing2) + tm.assert_frame_equal(result, expected) + + # custom label wrapped in list + result = df.set_index([thing2]) + tm.assert_frame_equal(result, expected) + + # missing key + thing3 = Thing(["Three", "pink"]) + msg = r"frozenset\(\{'Three', 'pink'\}\)" + with pytest.raises(KeyError, match=msg): + # missing label directly + df.set_index(thing3) + + with pytest.raises(KeyError, match=msg): + # missing label in list + df.set_index([thing3]) + + def test_set_index_custom_label_type_raises(self): + # GH 24969 + + # purposefully inherit from something unhashable + class Thing(set): + def __init__(self, name, color): + self.name = name + self.color = color + + def __str__(self) -> str: + return f"" + + thing1 = Thing("One", "red") + thing2 = Thing("Two", "blue") + df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2]) + + msg = 'The parameter "keys" may be a column key, .*' + + with pytest.raises(TypeError, match=msg): + # use custom label directly + df.set_index(thing2) + + with pytest.raises(TypeError, match=msg): + # custom label wrapped in list + df.set_index([thing2]) + + def test_construction_with_categorical_index(self): + ci = tm.makeCategoricalIndex(10) + ci.name = "B" + + # with Categorical + df = DataFrame({"A": np.random.randn(10), "B": ci.values}) + idf = df.set_index("B") + tm.assert_index_equal(idf.index, ci) + + # from a CategoricalIndex + df = DataFrame({"A": np.random.randn(10), "B": ci}) + idf = df.set_index("B") + tm.assert_index_equal(idf.index, ci) + + # round-trip + idf = idf.reset_index().set_index("B") + tm.assert_index_equal(idf.index, ci) + + def test_set_index_cast_datetimeindex(self): + df = DataFrame( + { + "A": [datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], + "B": np.random.randn(1000), + } + ) + + idf = df.set_index("A") + assert isinstance(idf.index, DatetimeIndex) + + def test_convert_dti_to_series(self): + # don't cast a DatetimeIndex WITH a tz, leave as object + # GH 6032 + idx = DatetimeIndex( + to_datetime(["2013-1-1 13:00", "2013-1-2 14:00"]), name="B" + ).tz_localize("US/Pacific") + df = DataFrame(np.random.randn(2, 1), columns=["A"]) + + expected = Series( + np.array( + [ + Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"), + Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"), + ], + dtype="object", + ), + name="B", + ) + + # convert index to series + result = Series(idx) + tm.assert_series_equal(result, expected) + + # assign to frame + df["B"] = idx + result = df["B"] + tm.assert_series_equal(result, expected) + + # convert to series while keeping the timezone + msg = "stop passing 'keep_tz'" + with tm.assert_produces_warning(FutureWarning) as m: + result = idx.to_series(keep_tz=True, index=[0, 1]) + tm.assert_series_equal(result, expected) + assert msg in str(m[0].message) + + # convert to utc + with tm.assert_produces_warning(FutureWarning) as m: + df["B"] = idx.to_series(keep_tz=False, index=[0, 1]) + result = df["B"] + comp = Series(DatetimeIndex(expected.values).tz_localize(None), name="B") + tm.assert_series_equal(result, comp) + msg = "do 'idx.tz_convert(None)' before calling" + assert msg in str(m[0].message) + + result = idx.to_series(index=[0, 1]) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning) as m: + result = idx.to_series(keep_tz=False, index=[0, 1]) + tm.assert_series_equal(result, expected.dt.tz_convert(None)) + msg = "do 'idx.tz_convert(None)' before calling" + assert msg in str(m[0].message) + + # list of datetimes with a tz + df["B"] = idx.to_pydatetime() + result = df["B"] + tm.assert_series_equal(result, expected) + + # GH 6785 + # set the index manually + import pytz + + df = DataFrame([{"ts": datetime(2014, 4, 1, tzinfo=pytz.utc), "foo": 1}]) + expected = df.set_index("ts") + df.index = df["ts"] + df.pop("ts") + tm.assert_frame_equal(df, expected) + + def test_reset_index_tz(self, tz_aware_fixture): + # GH 3950 + # reset_index with single level + tz = tz_aware_fixture + idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx") + df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx) + + expected = DataFrame( + { + "idx": [ + datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5), + ], + "a": range(5), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx", "a", "b"], + ) + expected["idx"] = expected["idx"].apply(lambda d: Timestamp(d, tz=tz)) + tm.assert_frame_equal(df.reset_index(), expected) + + def test_set_index_timezone(self): + # GH 12358 + # tz-aware Series should retain the tz + idx = to_datetime(["2014-01-01 10:10:10"], utc=True).tz_convert("Europe/Rome") + df = DataFrame({"A": idx}) + assert df.set_index(idx).index[0].hour == 11 + assert DatetimeIndex(Series(df.A))[0].hour == 11 + assert df.set_index(df.A).index[0].hour == 11 + + def test_set_index_dst(self): + di = date_range("2006-10-29 00:00:00", periods=3, freq="H", tz="US/Pacific") + + df = DataFrame(data={"a": [0, 1, 2], "b": [3, 4, 5]}, index=di).reset_index() + # single level + res = df.set_index("index") + exp = DataFrame( + data={"a": [0, 1, 2], "b": [3, 4, 5]}, index=Index(di, name="index") + ) + tm.assert_frame_equal(res, exp) + + # GH 12920 + res = df.set_index(["index", "a"]) + exp_index = MultiIndex.from_arrays([di, [0, 1, 2]], names=["index", "a"]) + exp = DataFrame({"b": [3, 4, 5]}, index=exp_index) + tm.assert_frame_equal(res, exp) + + def test_reset_index_with_intervals(self): + idx = IntervalIndex.from_breaks(np.arange(11), name="x") + original = DataFrame({"x": idx, "y": np.arange(10)})[["x", "y"]] + + result = original.set_index("x") + expected = DataFrame({"y": np.arange(10)}, index=idx) + tm.assert_frame_equal(result, expected) + + result2 = result.reset_index() + tm.assert_frame_equal(result2, original) + + def test_set_index_multiindexcolumns(self): + columns = MultiIndex.from_tuples([("foo", 1), ("foo", 2), ("bar", 1)]) + df = DataFrame(np.random.randn(3, 3), columns=columns) + result = df.set_index(df.columns[0]) + expected = df.iloc[:, 1:] + expected.index = df.iloc[:, 0].values + expected.index.names = [df.columns[0]] + tm.assert_frame_equal(result, expected) + + def test_set_index_empty_column(self): + # GH 1971 + df = DataFrame( + [ + {"a": 1, "p": 0}, + {"a": 2, "m": 10}, + {"a": 3, "m": 11, "p": 20}, + {"a": 4, "m": 12, "p": 21}, + ], + columns=("a", "m", "p", "x"), + ) + + result = df.set_index(["a", "x"]) + expected = df[["m", "p"]] + expected.index = MultiIndex.from_arrays([df["a"], df["x"]], names=["a", "x"]) + tm.assert_frame_equal(result, expected) + + def test_set_columns(self, float_string_frame): + cols = Index(np.arange(len(float_string_frame.columns))) + float_string_frame.columns = cols + with pytest.raises(ValueError, match="Length mismatch"): + float_string_frame.columns = cols[::2] + + def test_dti_set_index_reindex(self): + # GH 6631 + df = DataFrame(np.random.random(6)) + idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern") + idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo") + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.reindex(idx2) + tm.assert_index_equal(df.index, idx2) + + # GH 11314 + # with tz + index = date_range( + datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern" + ) + df = DataFrame(np.random.randn(24, 1), columns=["a"], index=index) + new_index = date_range( + datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern" + ) + + result = df.set_index(new_index) + assert result.index.freq == index.freq + + # Renaming + + def test_rename(self, float_frame): + mapping = {"A": "a", "B": "b", "C": "c", "D": "d"} + + renamed = float_frame.rename(columns=mapping) + renamed2 = float_frame.rename(columns=str.lower) + + tm.assert_frame_equal(renamed, renamed2) + tm.assert_frame_equal( + renamed2.rename(columns=str.upper), float_frame, check_names=False + ) + + # index + data = {"A": {"foo": 0, "bar": 1}} + + # gets sorted alphabetical + df = DataFrame(data) + renamed = df.rename(index={"foo": "bar", "bar": "foo"}) + tm.assert_index_equal(renamed.index, Index(["foo", "bar"])) + + renamed = df.rename(index=str.upper) + tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"])) + + # have to pass something + with pytest.raises(TypeError, match="must pass an index to rename"): + float_frame.rename() + + # partial columns + renamed = float_frame.rename(columns={"C": "foo", "D": "bar"}) + tm.assert_index_equal(renamed.columns, Index(["A", "B", "foo", "bar"])) + + # other axis + renamed = float_frame.T.rename(index={"C": "foo", "D": "bar"}) + tm.assert_index_equal(renamed.index, Index(["A", "B", "foo", "bar"])) + + # index with name + index = Index(["foo", "bar"], name="name") + renamer = DataFrame(data, index=index) + renamed = renamer.rename(index={"foo": "bar", "bar": "foo"}) + tm.assert_index_equal(renamed.index, Index(["bar", "foo"], name="name")) + assert renamed.index.name == renamer.index.name + + @pytest.mark.parametrize( + "args,kwargs", + [ + ((ChainMap({"A": "a"}, {"B": "b"}),), dict(axis="columns")), + ((), dict(columns=ChainMap({"A": "a"}, {"B": "b"}))), + ], + ) + def test_rename_chainmap(self, args, kwargs): + # see gh-23859 + colAData = range(1, 11) + colBdata = np.random.randn(10) + + df = DataFrame({"A": colAData, "B": colBdata}) + result = df.rename(*args, **kwargs) + + expected = DataFrame({"a": colAData, "b": colBdata}) + tm.assert_frame_equal(result, expected) + + def test_rename_axis_inplace(self, float_frame): + # GH 15704 + expected = float_frame.rename_axis("foo") + result = float_frame.copy() + no_return = result.rename_axis("foo", inplace=True) + + assert no_return is None + tm.assert_frame_equal(result, expected) + + expected = float_frame.rename_axis("bar", axis=1) + result = float_frame.copy() + no_return = result.rename_axis("bar", axis=1, inplace=True) + + assert no_return is None + tm.assert_frame_equal(result, expected) + + def test_rename_axis_raises(self): + # https://github.com/pandas-dev/pandas/issues/17833 + df = DataFrame({"A": [1, 2], "B": [1, 2]}) + with pytest.raises(ValueError, match="Use `.rename`"): + df.rename_axis(id, axis=0) + + with pytest.raises(ValueError, match="Use `.rename`"): + df.rename_axis({0: 10, 1: 20}, axis=0) + + with pytest.raises(ValueError, match="Use `.rename`"): + df.rename_axis(id, axis=1) + + with pytest.raises(ValueError, match="Use `.rename`"): + df["A"].rename_axis(id) + + def test_rename_axis_mapper(self): + # GH 19978 + mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"]) + df = DataFrame( + {"x": list(range(len(mi))), "y": [i * 10 for i in range(len(mi))]}, index=mi + ) + + # Test for rename of the Index object of columns + result = df.rename_axis("cols", axis=1) + tm.assert_index_equal(result.columns, Index(["x", "y"], name="cols")) + + # Test for rename of the Index object of columns using dict + result = result.rename_axis(columns={"cols": "new"}, axis=1) + tm.assert_index_equal(result.columns, Index(["x", "y"], name="new")) + + # Test for renaming index using dict + result = df.rename_axis(index={"ll": "foo"}) + assert result.index.names == ["foo", "nn"] + + # Test for renaming index using a function + result = df.rename_axis(index=str.upper, axis=0) + assert result.index.names == ["LL", "NN"] + + # Test for renaming index providing complete list + result = df.rename_axis(index=["foo", "goo"]) + assert result.index.names == ["foo", "goo"] + + # Test for changing index and columns at same time + sdf = df.reset_index().set_index("nn").drop(columns=["ll", "y"]) + result = sdf.rename_axis(index="foo", columns="meh") + assert result.index.name == "foo" + assert result.columns.name == "meh" + + # Test different error cases + with pytest.raises(TypeError, match="Must pass"): + df.rename_axis(index="wrong") + + with pytest.raises(ValueError, match="Length of names"): + df.rename_axis(index=["wrong"]) + + with pytest.raises(TypeError, match="bogus"): + df.rename_axis(bogus=None) + + @pytest.mark.parametrize( + "kwargs, rename_index, rename_columns", + [ + ({"mapper": None, "axis": 0}, True, False), + ({"mapper": None, "axis": 1}, False, True), + ({"index": None}, True, False), + ({"columns": None}, False, True), + ({"index": None, "columns": None}, True, True), + ({}, False, False), + ], + ) + def test_rename_axis_none(self, kwargs, rename_index, rename_columns): + # GH 25034 + index = Index(list("abc"), name="foo") + columns = Index(["col1", "col2"], name="bar") + data = np.arange(6).reshape(3, 2) + df = DataFrame(data, index, columns) + + result = df.rename_axis(**kwargs) + expected_index = index.rename(None) if rename_index else index + expected_columns = columns.rename(None) if rename_columns else columns + expected = DataFrame(data, expected_index, expected_columns) + tm.assert_frame_equal(result, expected) + + def test_rename_multiindex(self): + + tuples_index = [("foo1", "bar1"), ("foo2", "bar2")] + tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")] + index = MultiIndex.from_tuples(tuples_index, names=["foo", "bar"]) + columns = MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"]) + df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns) + + # + # without specifying level -> across all levels + + renamed = df.rename( + index={"foo1": "foo3", "bar2": "bar3"}, + columns={"fizz1": "fizz3", "buzz2": "buzz3"}, + ) + new_index = MultiIndex.from_tuples( + [("foo3", "bar1"), ("foo2", "bar3")], names=["foo", "bar"] + ) + new_columns = MultiIndex.from_tuples( + [("fizz3", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] + ) + tm.assert_index_equal(renamed.index, new_index) + tm.assert_index_equal(renamed.columns, new_columns) + assert renamed.index.names == df.index.names + assert renamed.columns.names == df.columns.names + + # + # with specifying a level (GH13766) + + # dict + new_columns = MultiIndex.from_tuples( + [("fizz3", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz") + tm.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples( + [("fizz1", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz") + tm.assert_index_equal(renamed.columns, new_columns) + + # function + func = str.upper + new_columns = MultiIndex.from_tuples( + [("FIZZ1", "buzz1"), ("FIZZ2", "buzz2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns=func, level=0) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level="fizz") + tm.assert_index_equal(renamed.columns, new_columns) + + new_columns = MultiIndex.from_tuples( + [("fizz1", "BUZZ1"), ("fizz2", "BUZZ2")], names=["fizz", "buzz"] + ) + renamed = df.rename(columns=func, level=1) + tm.assert_index_equal(renamed.columns, new_columns) + renamed = df.rename(columns=func, level="buzz") + tm.assert_index_equal(renamed.columns, new_columns) + + # index + new_index = MultiIndex.from_tuples( + [("foo3", "bar1"), ("foo2", "bar2")], names=["foo", "bar"] + ) + renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) + tm.assert_index_equal(renamed.index, new_index) + + def test_rename_nocopy(self, float_frame): + renamed = float_frame.rename(columns={"C": "foo"}, copy=False) + renamed["foo"] = 1.0 + assert (float_frame["C"] == 1.0).all() + + def test_rename_inplace(self, float_frame): + float_frame.rename(columns={"C": "foo"}) + assert "C" in float_frame + assert "foo" not in float_frame + + c_id = id(float_frame["C"]) + float_frame = float_frame.copy() + float_frame.rename(columns={"C": "foo"}, inplace=True) + + assert "C" not in float_frame + assert "foo" in float_frame + assert id(float_frame["foo"]) != c_id + + def test_rename_bug(self): + # GH 5344 + # rename set ref_locs, and set_index was not resetting + df = DataFrame({0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]}) + df = df.rename(columns={0: "a"}) + df = df.rename(columns={1: "b"}) + df = df.set_index(["a", "b"]) + df.columns = ["2001-01-01"] + expected = DataFrame( + [[1], [2]], + index=MultiIndex.from_tuples( + [("foo", "bah"), ("bar", "bas")], names=["a", "b"] + ), + columns=["2001-01-01"], + ) + tm.assert_frame_equal(df, expected) + + def test_rename_bug2(self): + # GH 19497 + # rename was changing Index to MultiIndex if Index contained tuples + + df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)], columns=["a"]) + df = df.rename({(1, 1): (5, 4)}, axis="index") + expected = DataFrame( + data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"] + ) + tm.assert_frame_equal(df, expected) + + def test_rename_errors_raises(self): + df = DataFrame(columns=["A", "B", "C", "D"]) + with pytest.raises(KeyError, match="'E'] not found in axis"): + df.rename(columns={"A": "a", "E": "e"}, errors="raise") + + @pytest.mark.parametrize( + "mapper, errors, expected_columns", + [ + ({"A": "a", "E": "e"}, "ignore", ["a", "B", "C", "D"]), + ({"A": "a"}, "raise", ["a", "B", "C", "D"]), + (str.lower, "raise", ["a", "b", "c", "d"]), + ], + ) + def test_rename_errors(self, mapper, errors, expected_columns): + # GH 13473 + # rename now works with errors parameter + df = DataFrame(columns=["A", "B", "C", "D"]) + result = df.rename(columns=mapper, errors=errors) + expected = DataFrame(columns=expected_columns) + tm.assert_frame_equal(result, expected) + + def test_reorder_levels(self): + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + names=["L0", "L1", "L2"], + ) + df = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=index) + + # no change, position + result = df.reorder_levels([0, 1, 2]) + tm.assert_frame_equal(df, result) + + # no change, labels + result = df.reorder_levels(["L0", "L1", "L2"]) + tm.assert_frame_equal(df, result) + + # rotate, position + result = df.reorder_levels([1, 2, 0]) + e_idx = MultiIndex( + levels=[["one", "two", "three"], [0, 1], ["bar"]], + codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0]], + names=["L1", "L2", "L0"], + ) + expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) + tm.assert_frame_equal(result, expected) + + result = df.reorder_levels([0, 0, 0]) + e_idx = MultiIndex( + levels=[["bar"], ["bar"], ["bar"]], + codes=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]], + names=["L0", "L0", "L0"], + ) + expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx) + tm.assert_frame_equal(result, expected) + + result = df.reorder_levels(["L0", "L0", "L0"]) + tm.assert_frame_equal(result, expected) + + def test_reset_index(self, float_frame): + stacked = float_frame.stack()[::2] + stacked = DataFrame({"foo": stacked, "bar": stacked}) + + names = ["first", "second"] + stacked.index.names = names + deleveled = stacked.reset_index() + for i, (lev, level_codes) in enumerate( + zip(stacked.index.levels, stacked.index.codes) + ): + values = lev.take(level_codes) + name = names[i] + tm.assert_index_equal(values, Index(deleveled[name])) + + stacked.index.names = [None, None] + deleveled2 = stacked.reset_index() + tm.assert_series_equal( + deleveled["first"], deleveled2["level_0"], check_names=False + ) + tm.assert_series_equal( + deleveled["second"], deleveled2["level_1"], check_names=False + ) + + # default name assigned + rdf = float_frame.reset_index() + exp = Series(float_frame.index.values, name="index") + tm.assert_series_equal(rdf["index"], exp) + + # default name assigned, corner case + df = float_frame.copy() + df["index"] = "foo" + rdf = df.reset_index() + exp = Series(float_frame.index.values, name="level_0") + tm.assert_series_equal(rdf["level_0"], exp) + + # but this is ok + float_frame.index.name = "index" + deleveled = float_frame.reset_index() + tm.assert_series_equal(deleveled["index"], Series(float_frame.index)) + tm.assert_index_equal(deleveled.index, Index(np.arange(len(deleveled)))) + + # preserve column names + float_frame.columns.name = "columns" + resetted = float_frame.reset_index() + assert resetted.columns.name == "columns" + + # only remove certain columns + df = float_frame.reset_index().set_index(["index", "A", "B"]) + rs = df.reset_index(["A", "B"]) + + # TODO should reset_index check_names ? + tm.assert_frame_equal(rs, float_frame, check_names=False) + + rs = df.reset_index(["index", "A", "B"]) + tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) + + rs = df.reset_index(["index", "A", "B"]) + tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) + + rs = df.reset_index("A") + xp = float_frame.reset_index().set_index(["index", "B"]) + tm.assert_frame_equal(rs, xp, check_names=False) + + # test resetting in place + df = float_frame.copy() + resetted = float_frame.reset_index() + df.reset_index(inplace=True) + tm.assert_frame_equal(df, resetted, check_names=False) + + df = float_frame.reset_index().set_index(["index", "A", "B"]) + rs = df.reset_index("A", drop=True) + xp = float_frame.copy() + del xp["A"] + xp = xp.set_index(["B"], append=True) + tm.assert_frame_equal(rs, xp, check_names=False) + + def test_reset_index_name(self): + df = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], + columns=["A", "B", "C", "D"], + index=Index(range(2), name="x"), + ) + assert df.reset_index().index.name is None + assert df.reset_index(drop=True).index.name is None + df.reset_index(inplace=True) + assert df.index.name is None + + def test_reset_index_level(self): + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"]) + + for levels in ["A", "B"], [0, 1]: + # With MultiIndex + result = df.set_index(["A", "B"]).reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = df.set_index(["A", "B"]).reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = df.set_index(["A", "B"]).reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) + tm.assert_frame_equal(result, df[["C", "D"]]) + + # With single-level Index (GH 16263) + result = df.set_index("A").reset_index(level=levels[0]) + tm.assert_frame_equal(result, df) + + result = df.set_index("A").reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A"]).reset_index(level=levels[0], drop=True) + tm.assert_frame_equal(result, df[["B", "C", "D"]]) + + # Missing levels - for both MultiIndex and single-level Index: + for idx_lev in ["A", "B"], ["A"]: + with pytest.raises(KeyError, match=r"(L|l)evel \(?E\)?"): + df.set_index(idx_lev).reset_index(level=["A", "E"]) + with pytest.raises(IndexError, match="Too many levels"): + df.set_index(idx_lev).reset_index(level=[0, 1, 2]) + + def test_reset_index_right_dtype(self): + time = np.arange(0.0, 10, np.sqrt(2) / 2) + s1 = Series( + (9.81 * time ** 2) / 2, index=Index(time, name="time"), name="speed" + ) + df = DataFrame(s1) + + resetted = s1.reset_index() + assert resetted["time"].dtype == np.float64 + + resetted = df.reset_index() + assert resetted["time"].dtype == np.float64 + + def test_reset_index_multiindex_col(self): + vals = np.random.randn(3, 3).astype(object) + idx = ["x", "y", "z"] + full = np.hstack(([[x] for x in idx], vals)) + df = DataFrame( + vals, + Index(idx, name="a"), + columns=[["b", "b", "c"], ["mean", "median", "mean"]], + ) + rs = df.reset_index() + xp = DataFrame( + full, columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index(col_fill=None) + xp = DataFrame( + full, columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index(col_level=1, col_fill="blah") + xp = DataFrame( + full, columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]] + ) + tm.assert_frame_equal(rs, xp) + + df = DataFrame( + vals, + MultiIndex.from_arrays([[0, 1, 2], ["x", "y", "z"]], names=["d", "a"]), + columns=[["b", "b", "c"], ["mean", "median", "mean"]], + ) + rs = df.reset_index("a") + xp = DataFrame( + full, + Index([0, 1, 2], name="d"), + columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index("a", col_fill=None) + xp = DataFrame( + full, + Index(range(3), name="d"), + columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + rs = df.reset_index("a", col_fill="blah", col_level=1) + xp = DataFrame( + full, + Index(range(3), name="d"), + columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]], + ) + tm.assert_frame_equal(rs, xp) + + def test_reset_index_multiindex_nan(self): + # GH6322, testing reset_index on MultiIndexes + # when we have a nan or all nan + df = DataFrame( + {"A": ["a", "b", "c"], "B": [0, 1, np.nan], "C": np.random.rand(3)} + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame( + {"A": [np.nan, "b", "c"], "B": [0, 1, 2], "C": np.random.rand(3)} + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame({"A": ["a", "b", "c"], "B": [0, 1, 2], "C": [np.nan, 1.1, 2.2]}) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + df = DataFrame( + { + "A": ["a", "b", "c"], + "B": [np.nan, np.nan, np.nan], + "C": np.random.rand(3), + } + ) + rs = df.set_index(["A", "B"]).reset_index() + tm.assert_frame_equal(rs, df) + + def test_reset_index_with_datetimeindex_cols(self): + # GH5818 + # + df = DataFrame( + [[1, 2], [3, 4]], + columns=date_range("1/1/2013", "1/2/2013"), + index=["A", "B"], + ) + + result = df.reset_index() + expected = DataFrame( + [["A", 1, 2], ["B", 3, 4]], + columns=["index", datetime(2013, 1, 1), datetime(2013, 1, 2)], + ) + tm.assert_frame_equal(result, expected) + + def test_reset_index_range(self): + # GH 12071 + df = DataFrame([[0, 0], [1, 1]], columns=["A", "B"], index=RangeIndex(stop=2)) + result = df.reset_index() + assert isinstance(result.index, RangeIndex) + expected = DataFrame( + [[0, 0, 0], [1, 1, 1]], + columns=["index", "A", "B"], + index=RangeIndex(stop=2), + ) + tm.assert_frame_equal(result, expected) + + def test_set_index_names(self): + df = tm.makeDataFrame() + df.index.name = "name" + + assert df.set_index(df.index).index.names == ["name"] + + mi = MultiIndex.from_arrays(df[["A", "B"]].T.values, names=["A", "B"]) + mi2 = MultiIndex.from_arrays( + df[["A", "B", "A", "B"]].T.values, names=["A", "B", "C", "D"] + ) + + df = df.set_index(["A", "B"]) + + assert df.set_index(df.index).index.names == ["A", "B"] + + # Check that set_index isn't converting a MultiIndex into an Index + assert isinstance(df.set_index(df.index).index, MultiIndex) + + # Check actual equality + tm.assert_index_equal(df.set_index(df.index).index, mi) + + idx2 = df.index.rename(["C", "D"]) + + # Check that [MultiIndex, MultiIndex] yields a MultiIndex rather + # than a pair of tuples + assert isinstance(df.set_index([df.index, idx2]).index, MultiIndex) + + # Check equality + tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) + + def test_rename_objects(self, float_string_frame): + renamed = float_string_frame.rename(columns=str.upper) + + assert "FOO" in renamed + assert "foo" not in renamed + + def test_rename_axis_style(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"]) + expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) + + result = df.rename(str.lower, axis=1) + tm.assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis="columns") + tm.assert_frame_equal(result, expected) + + result = df.rename({"A": "a", "B": "b"}, axis=1) + tm.assert_frame_equal(result, expected) + + result = df.rename({"A": "a", "B": "b"}, axis="columns") + tm.assert_frame_equal(result, expected) + + # Index + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) + result = df.rename(str.lower, axis=0) + tm.assert_frame_equal(result, expected) + + result = df.rename(str.lower, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.rename({"X": "x", "Y": "y"}, axis=0) + tm.assert_frame_equal(result, expected) + + result = df.rename({"X": "x", "Y": "y"}, axis="index") + tm.assert_frame_equal(result, expected) + + result = df.rename(mapper=str.lower, axis="index") + tm.assert_frame_equal(result, expected) + + def test_rename_mapper_multi(self): + df = DataFrame({"A": ["a", "b"], "B": ["c", "d"], "C": [1, 2]}).set_index( + ["A", "B"] + ) + result = df.rename(str.upper) + expected = df.rename(index=str.upper) + tm.assert_frame_equal(result, expected) + + def test_rename_positional_named(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"]) + result = df.rename(index=str.lower, columns=str.upper) + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"]) + tm.assert_frame_equal(result, expected) + + def test_rename_axis_style_raises(self): + # see gh-12392 + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"]) + + # Named target and axis + over_spec_msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis=1) + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis="columns") + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(columns=str.lower, axis="columns") + + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(index=str.lower, axis=0) + + # Multiple targets and axis + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(str.lower, index=str.lower, axis="columns") + + # Too many targets + over_spec_msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=over_spec_msg): + df.rename(str.lower, index=str.lower, columns=str.lower) + + # Duplicates + with pytest.raises(TypeError, match="multiple values"): + df.rename(id, mapper=id) + + def test_reindex_api_equivalence(self): + # equivalence of the labels/axis and index/columns API's + df = DataFrame( + [[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=["a", "b", "c"], + columns=["d", "e", "f"], + ) + + res1 = df.reindex(["b", "a"]) + res2 = df.reindex(index=["b", "a"]) + res3 = df.reindex(labels=["b", "a"]) + res4 = df.reindex(labels=["b", "a"], axis=0) + res5 = df.reindex(["b", "a"], axis=0) + for res in [res2, res3, res4, res5]: + tm.assert_frame_equal(res1, res) + + res1 = df.reindex(columns=["e", "d"]) + res2 = df.reindex(["e", "d"], axis=1) + res3 = df.reindex(labels=["e", "d"], axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + res1 = df.reindex(index=["b", "a"], columns=["e", "d"]) + res2 = df.reindex(columns=["e", "d"], index=["b", "a"]) + res3 = df.reindex(labels=["b", "a"], axis=0).reindex(labels=["e", "d"], axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + def test_rename_positional_raises(self): + # GH 29136 + df = DataFrame(columns=["A", "B"]) + msg = r"rename\(\) takes from 1 to 2 positional arguments" + + with pytest.raises(TypeError, match=msg): + df.rename(None, str.lower) + + def test_rename_no_mappings_raises(self): + # GH 29136 + df = DataFrame([[1]]) + msg = "must pass an index to rename" + with pytest.raises(TypeError, match=msg): + df.rename() + + with pytest.raises(TypeError, match=msg): + df.rename(None, index=None) + + with pytest.raises(TypeError, match=msg): + df.rename(None, columns=None) + + with pytest.raises(TypeError, match=msg): + df.rename(None, columns=None, index=None) + + def test_rename_mapper_and_positional_arguments_raises(self): + # GH 29136 + df = DataFrame([[1]]) + msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'" + with pytest.raises(TypeError, match=msg): + df.rename({}, index={}) + + with pytest.raises(TypeError, match=msg): + df.rename({}, columns={}) + + with pytest.raises(TypeError, match=msg): + df.rename({}, columns={}, index={}) + + def test_assign_columns(self, float_frame): + float_frame["hi"] = "there" + + df = float_frame.copy() + df.columns = ["foo", "bar", "baz", "quux", "foo2"] + tm.assert_series_equal(float_frame["C"], df["baz"], check_names=False) + tm.assert_series_equal(float_frame["hi"], df["foo2"], check_names=False) + + def test_set_index_preserve_categorical_dtype(self): + # GH13743, GH13854 + df = DataFrame( + { + "A": [1, 2, 1, 1, 2], + "B": [10, 16, 22, 28, 34], + "C1": Categorical(list("abaab"), categories=list("bac"), ordered=False), + "C2": Categorical(list("abaab"), categories=list("bac"), ordered=True), + } + ) + for cols in ["C1", "C2", ["A", "C1"], ["A", "C2"], ["C1", "C2"]]: + result = df.set_index(cols).reset_index() + result = result.reindex(columns=df.columns) + tm.assert_frame_equal(result, df) + + def test_rename_signature(self): + sig = inspect.signature(DataFrame.rename) + parameters = set(sig.parameters) + assert parameters == { + "self", + "mapper", + "index", + "columns", + "axis", + "inplace", + "copy", + "level", + "errors", + } + + def test_reindex_signature(self): + sig = inspect.signature(DataFrame.reindex) + parameters = set(sig.parameters) + assert parameters == { + "self", + "labels", + "index", + "columns", + "axis", + "limit", + "copy", + "level", + "method", + "fill_value", + "tolerance", + } + + def test_droplevel(self): + # GH20342 + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]) + df = df.set_index([0, 1]).rename_axis(["a", "b"]) + df.columns = MultiIndex.from_tuples( + [("c", "e"), ("d", "f")], names=["level_1", "level_2"] + ) + + # test that dropping of a level in index works + expected = df.reset_index("a", drop=True) + result = df.droplevel("a", axis="index") + tm.assert_frame_equal(result, expected) + + # test that dropping of a level in columns works + expected = df.copy() + expected.columns = Index(["c", "d"], name="level_1") + result = df.droplevel("level_2", axis="columns") + tm.assert_frame_equal(result, expected) + + +class TestIntervalIndex: + def test_setitem(self): + + df = DataFrame({"A": range(10)}) + s = cut(df.A, 5) + assert isinstance(s.cat.categories, IntervalIndex) + + # B & D end up as Categoricals + # the remainer are converted to in-line objects + # contining an IntervalIndex.values + df["B"] = s + df["C"] = np.array(s) + df["D"] = s.values + df["E"] = np.array(s.values) + + assert is_categorical_dtype(df["B"]) + assert is_interval_dtype(df["B"].cat.categories) + assert is_categorical_dtype(df["D"]) + assert is_interval_dtype(df["D"].cat.categories) + + assert is_object_dtype(df["C"]) + assert is_object_dtype(df["E"]) + + # they compare equal as Index + # when converted to numpy objects + c = lambda x: Index(np.array(x)) + tm.assert_index_equal(c(df.B), c(df.B), check_names=False) + tm.assert_index_equal(c(df.B), c(df.C), check_names=False) + tm.assert_index_equal(c(df.B), c(df.D), check_names=False) + tm.assert_index_equal(c(df.B), c(df.D), check_names=False) + + # B & D are the same Series + tm.assert_series_equal(df["B"], df["B"], check_names=False) + tm.assert_series_equal(df["B"], df["D"], check_names=False) + + # C & E are the same Series + tm.assert_series_equal(df["C"], df["C"], check_names=False) + tm.assert_series_equal(df["C"], df["E"], check_names=False) + + def test_set_reset_index(self): + + df = DataFrame({"A": range(10)}) + s = cut(df.A, 5) + df["B"] = s + df = df.set_index("B") + + df = df.reset_index() + + def test_set_axis_inplace(self): + # GH14636 + df = DataFrame( + {"A": [1.1, 2.2, 3.3], "B": [5.0, 6.1, 7.2], "C": [4.4, 5.5, 6.6]}, + index=[2010, 2011, 2012], + ) + + expected = {0: df.copy(), 1: df.copy()} + expected[0].index = list("abc") + expected[1].columns = list("abc") + expected["index"] = expected[0] + expected["columns"] = expected[1] + + for axis in expected: + result = df.copy() + result.set_axis(list("abc"), axis=axis, inplace=True) + tm.assert_frame_equal(result, expected[axis]) + + # inplace=False + result = df.set_axis(list("abc"), axis=axis) + tm.assert_frame_equal(expected[axis], result) + + # omitting the "axis" parameter + with tm.assert_produces_warning(None): + result = df.set_axis(list("abc")) + tm.assert_frame_equal(result, expected[0]) + + # wrong values for the "axis" parameter + for axis in 3, "foo": + with pytest.raises(ValueError, match="No axis named"): + df.set_axis(list("abc"), axis=axis) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py new file mode 100644 index 00000000..8e1c3eff --- /dev/null +++ b/pandas/tests/frame/test_analytics.py @@ -0,0 +1,1286 @@ +from datetime import timedelta +from decimal import Decimal +import operator + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + MultiIndex, + Series, + Timestamp, + date_range, + isna, + notna, + to_datetime, + to_timedelta, +) +import pandas._testing as tm +import pandas.core.algorithms as algorithms +import pandas.core.nanops as nanops + + +def assert_stat_op_calc( + opname, + alternative, + frame, + has_skipna=True, + check_dtype=True, + check_dates=False, + check_less_precise=False, + skipna_alternative=None, +): + """ + Check that operator opname works as advertised on frame + + Parameters + ---------- + opname : string + Name of the operator to test on frame + alternative : function + Function that opname is tested against; i.e. "frame.opname()" should + equal "alternative(frame)". + frame : DataFrame + The object that the tests are executed on + has_skipna : bool, default True + Whether the method "opname" has the kwarg "skip_na" + check_dtype : bool, default True + Whether the dtypes of the result of "frame.opname()" and + "alternative(frame)" should be checked. + check_dates : bool, default false + Whether opname should be tested on a Datetime Series + check_less_precise : bool, default False + Whether results should only be compared approximately; + passed on to tm.assert_series_equal + skipna_alternative : function, default None + NaN-safe version of alternative + """ + + f = getattr(frame, opname) + + if check_dates: + df = DataFrame({"b": date_range("1/1/2001", periods=2)}) + result = getattr(df, opname)() + assert isinstance(result, Series) + + df["a"] = range(len(df)) + result = getattr(df, opname)() + assert isinstance(result, Series) + assert len(result) + + if has_skipna: + + def wrapper(x): + return alternative(x.values) + + skipna_wrapper = tm._make_skipna_wrapper(alternative, skipna_alternative) + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) + tm.assert_series_equal( + result0, + frame.apply(wrapper), + check_dtype=check_dtype, + check_less_precise=check_less_precise, + ) + # HACK: win32 + tm.assert_series_equal( + result1, + frame.apply(wrapper, axis=1), + check_dtype=False, + check_less_precise=check_less_precise, + ) + else: + skipna_wrapper = alternative + + result0 = f(axis=0) + result1 = f(axis=1) + tm.assert_series_equal( + result0, + frame.apply(skipna_wrapper), + check_dtype=check_dtype, + check_less_precise=check_less_precise, + ) + + if opname in ["sum", "prod"]: + expected = frame.apply(skipna_wrapper, axis=1) + tm.assert_series_equal( + result1, expected, check_dtype=False, check_less_precise=check_less_precise + ) + + # check dtypes + if check_dtype: + lcd_dtype = frame.values.dtype + assert lcd_dtype == result0.dtype + assert lcd_dtype == result1.dtype + + # bad axis + with pytest.raises(ValueError, match="No axis named 2"): + f(axis=2) + + # all NA case + if has_skipna: + all_na = frame * np.NaN + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) + if opname in ["sum", "prod"]: + unit = 1 if opname == "prod" else 0 # result for empty sum/prod + expected = pd.Series(unit, index=r0.index, dtype=r0.dtype) + tm.assert_series_equal(r0, expected) + expected = pd.Series(unit, index=r1.index, dtype=r1.dtype) + tm.assert_series_equal(r1, expected) + + +def assert_stat_op_api(opname, float_frame, float_string_frame, has_numeric_only=False): + """ + Check that API for operator opname works as advertised on frame + + Parameters + ---------- + opname : string + Name of the operator to test on frame + float_frame : DataFrame + DataFrame with columns of type float + float_string_frame : DataFrame + DataFrame with both float and string columns + has_numeric_only : bool, default False + Whether the method "opname" has the kwarg "numeric_only" + """ + + # make sure works on mixed-type frame + getattr(float_string_frame, opname)(axis=0) + getattr(float_string_frame, opname)(axis=1) + + if has_numeric_only: + getattr(float_string_frame, opname)(axis=0, numeric_only=True) + getattr(float_string_frame, opname)(axis=1, numeric_only=True) + getattr(float_frame, opname)(axis=0, numeric_only=False) + getattr(float_frame, opname)(axis=1, numeric_only=False) + + +def assert_bool_op_calc(opname, alternative, frame, has_skipna=True): + """ + Check that bool operator opname works as advertised on frame + + Parameters + ---------- + opname : string + Name of the operator to test on frame + alternative : function + Function that opname is tested against; i.e. "frame.opname()" should + equal "alternative(frame)". + frame : DataFrame + The object that the tests are executed on + has_skipna : bool, default True + Whether the method "opname" has the kwarg "skip_na" + """ + + f = getattr(frame, opname) + + if has_skipna: + + def skipna_wrapper(x): + nona = x.dropna().values + return alternative(nona) + + def wrapper(x): + return alternative(x.values) + + result0 = f(axis=0, skipna=False) + result1 = f(axis=1, skipna=False) + + tm.assert_series_equal(result0, frame.apply(wrapper)) + tm.assert_series_equal( + result1, frame.apply(wrapper, axis=1), check_dtype=False + ) # HACK: win32 + else: + skipna_wrapper = alternative + wrapper = alternative + + result0 = f(axis=0) + result1 = f(axis=1) + + tm.assert_series_equal(result0, frame.apply(skipna_wrapper)) + tm.assert_series_equal( + result1, frame.apply(skipna_wrapper, axis=1), check_dtype=False + ) + + # bad axis + with pytest.raises(ValueError, match="No axis named 2"): + f(axis=2) + + # all NA case + if has_skipna: + all_na = frame * np.NaN + r0 = getattr(all_na, opname)(axis=0) + r1 = getattr(all_na, opname)(axis=1) + if opname == "any": + assert not r0.any() + assert not r1.any() + else: + assert r0.all() + assert r1.all() + + +def assert_bool_op_api( + opname, bool_frame_with_na, float_string_frame, has_bool_only=False +): + """ + Check that API for boolean operator opname works as advertised on frame + + Parameters + ---------- + opname : string + Name of the operator to test on frame + float_frame : DataFrame + DataFrame with columns of type float + float_string_frame : DataFrame + DataFrame with both float and string columns + has_bool_only : bool, default False + Whether the method "opname" has the kwarg "bool_only" + """ + # make sure op works on mixed-type frame + mixed = float_string_frame + mixed["_bool_"] = np.random.randn(len(mixed)) > 0.5 + getattr(mixed, opname)(axis=0) + getattr(mixed, opname)(axis=1) + + if has_bool_only: + getattr(mixed, opname)(axis=0, bool_only=True) + getattr(mixed, opname)(axis=1, bool_only=True) + getattr(bool_frame_with_na, opname)(axis=0, bool_only=False) + getattr(bool_frame_with_na, opname)(axis=1, bool_only=False) + + +class TestDataFrameAnalytics: + + # --------------------------------------------------------------------- + # Reductions + + def test_stat_op_api(self, float_frame, float_string_frame): + assert_stat_op_api( + "count", float_frame, float_string_frame, has_numeric_only=True + ) + assert_stat_op_api( + "sum", float_frame, float_string_frame, has_numeric_only=True + ) + + assert_stat_op_api("nunique", float_frame, float_string_frame) + assert_stat_op_api("mean", float_frame, float_string_frame) + assert_stat_op_api("product", float_frame, float_string_frame) + assert_stat_op_api("median", float_frame, float_string_frame) + assert_stat_op_api("min", float_frame, float_string_frame) + assert_stat_op_api("max", float_frame, float_string_frame) + assert_stat_op_api("mad", float_frame, float_string_frame) + assert_stat_op_api("var", float_frame, float_string_frame) + assert_stat_op_api("std", float_frame, float_string_frame) + assert_stat_op_api("sem", float_frame, float_string_frame) + assert_stat_op_api("median", float_frame, float_string_frame) + + try: + from scipy.stats import skew, kurtosis # noqa:F401 + + assert_stat_op_api("skew", float_frame, float_string_frame) + assert_stat_op_api("kurt", float_frame, float_string_frame) + except ImportError: + pass + + def test_stat_op_calc(self, float_frame_with_na, mixed_float_frame): + def count(s): + return notna(s).sum() + + def nunique(s): + return len(algorithms.unique1d(s.dropna())) + + def mad(x): + return np.abs(x - x.mean()).mean() + + def var(x): + return np.var(x, ddof=1) + + def std(x): + return np.std(x, ddof=1) + + def sem(x): + return np.std(x, ddof=1) / np.sqrt(len(x)) + + def skewness(x): + from scipy.stats import skew # noqa:F811 + + if len(x) < 3: + return np.nan + return skew(x, bias=False) + + def kurt(x): + from scipy.stats import kurtosis # noqa:F811 + + if len(x) < 4: + return np.nan + return kurtosis(x, bias=False) + + assert_stat_op_calc( + "nunique", + nunique, + float_frame_with_na, + has_skipna=False, + check_dtype=False, + check_dates=True, + ) + + # mixed types (with upcasting happening) + assert_stat_op_calc( + "sum", + np.sum, + mixed_float_frame.astype("float32"), + check_dtype=False, + check_less_precise=True, + ) + + assert_stat_op_calc( + "sum", np.sum, float_frame_with_na, skipna_alternative=np.nansum + ) + assert_stat_op_calc("mean", np.mean, float_frame_with_na, check_dates=True) + assert_stat_op_calc("product", np.prod, float_frame_with_na) + + assert_stat_op_calc("mad", mad, float_frame_with_na) + assert_stat_op_calc("var", var, float_frame_with_na) + assert_stat_op_calc("std", std, float_frame_with_na) + assert_stat_op_calc("sem", sem, float_frame_with_na) + + assert_stat_op_calc( + "count", + count, + float_frame_with_na, + has_skipna=False, + check_dtype=False, + check_dates=True, + ) + + try: + from scipy import skew, kurtosis # noqa:F401 + + assert_stat_op_calc("skew", skewness, float_frame_with_na) + assert_stat_op_calc("kurt", kurt, float_frame_with_na) + except ImportError: + pass + + # TODO: Ensure warning isn't emitted in the first place + @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") + def test_median(self, float_frame_with_na, int_frame): + def wrapper(x): + if isna(x).any(): + return np.nan + return np.median(x) + + assert_stat_op_calc("median", wrapper, float_frame_with_na, check_dates=True) + assert_stat_op_calc( + "median", wrapper, int_frame, check_dtype=False, check_dates=True + ) + + @pytest.mark.parametrize( + "method", ["sum", "mean", "prod", "var", "std", "skew", "min", "max"] + ) + def test_stat_operators_attempt_obj_array(self, method): + # GH#676 + data = { + "a": [ + -0.00049987540199591344, + -0.0016467257772919831, + 0.00067695870775883013, + ], + "b": [-0, -0, 0.0], + "c": [ + 0.00031111847529610595, + 0.0014902627951905339, + -0.00094099200035979691, + ], + } + df1 = DataFrame(data, index=["foo", "bar", "baz"], dtype="O") + + df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3], 2: [np.nan, 4]}, dtype=object) + + for df in [df1, df2]: + assert df.values.dtype == np.object_ + result = getattr(df, method)(1) + expected = getattr(df.astype("f8"), method)(1) + + if method in ["sum", "prod"]: + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"]) + def test_mixed_ops(self, op): + # GH#16116 + df = DataFrame( + { + "int": [1, 2, 3, 4], + "float": [1.0, 2.0, 3.0, 4.0], + "str": ["a", "b", "c", "d"], + } + ) + + result = getattr(df, op)() + assert len(result) == 2 + + with pd.option_context("use_bottleneck", False): + result = getattr(df, op)() + assert len(result) == 2 + + def test_reduce_mixed_frame(self): + # GH 6806 + df = DataFrame( + { + "bool_data": [True, True, False, False, False], + "int_data": [10, 20, 30, 40, 50], + "string_data": ["a", "b", "c", "d", "e"], + } + ) + df.reindex(columns=["bool_data", "int_data", "string_data"]) + test = df.sum(axis=0) + tm.assert_numpy_array_equal( + test.values, np.array([2, 150, "abcde"], dtype=object) + ) + tm.assert_series_equal(test, df.T.sum(axis=1)) + + def test_nunique(self): + df = DataFrame({"A": [1, 1, 1], "B": [1, 2, 3], "C": [1, np.nan, 3]}) + tm.assert_series_equal(df.nunique(), Series({"A": 1, "B": 3, "C": 2})) + tm.assert_series_equal( + df.nunique(dropna=False), Series({"A": 1, "B": 3, "C": 3}) + ) + tm.assert_series_equal(df.nunique(axis=1), Series({0: 1, 1: 2, 2: 2})) + tm.assert_series_equal( + df.nunique(axis=1, dropna=False), Series({0: 1, 1: 3, 2: 2}) + ) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_mean_mixed_datetime_numeric(self, tz): + # https://github.com/pandas-dev/pandas/issues/24752 + df = pd.DataFrame({"A": [1, 1], "B": [pd.Timestamp("2000", tz=tz)] * 2}) + result = df.mean() + expected = pd.Series([1.0], index=["A"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_mean_excludes_datetimes(self, tz): + # https://github.com/pandas-dev/pandas/issues/24752 + # Our long-term desired behavior is unclear, but the behavior in + # 0.24.0rc1 was buggy. + df = pd.DataFrame({"A": [pd.Timestamp("2000", tz=tz)] * 2}) + result = df.mean() + expected = pd.Series(dtype=np.float64) + tm.assert_series_equal(result, expected) + + def test_mean_mixed_string_decimal(self): + # GH 11670 + # possible bug when calculating mean of DataFrame? + + d = [ + {"A": 2, "B": None, "C": Decimal("628.00")}, + {"A": 1, "B": None, "C": Decimal("383.00")}, + {"A": 3, "B": None, "C": Decimal("651.00")}, + {"A": 2, "B": None, "C": Decimal("575.00")}, + {"A": 4, "B": None, "C": Decimal("1114.00")}, + {"A": 1, "B": "TEST", "C": Decimal("241.00")}, + {"A": 2, "B": None, "C": Decimal("572.00")}, + {"A": 4, "B": None, "C": Decimal("609.00")}, + {"A": 3, "B": None, "C": Decimal("820.00")}, + {"A": 5, "B": None, "C": Decimal("1223.00")}, + ] + + df = pd.DataFrame(d) + + result = df.mean() + expected = pd.Series([2.7, 681.6], index=["A", "C"]) + tm.assert_series_equal(result, expected) + + def test_var_std(self, datetime_frame): + result = datetime_frame.std(ddof=4) + expected = datetime_frame.apply(lambda x: x.std(ddof=4)) + tm.assert_almost_equal(result, expected) + + result = datetime_frame.var(ddof=4) + expected = datetime_frame.apply(lambda x: x.var(ddof=4)) + tm.assert_almost_equal(result, expected) + + arr = np.repeat(np.random.random((1, 1000)), 1000, 0) + result = nanops.nanvar(arr, axis=0) + assert not (result < 0).any() + + with pd.option_context("use_bottleneck", False): + result = nanops.nanvar(arr, axis=0) + assert not (result < 0).any() + + @pytest.mark.parametrize("meth", ["sem", "var", "std"]) + def test_numeric_only_flag(self, meth): + # GH 9201 + df1 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]) + # set one entry to a number in str format + df1.loc[0, "foo"] = "100" + + df2 = DataFrame(np.random.randn(5, 3), columns=["foo", "bar", "baz"]) + # set one entry to a non-number str + df2.loc[0, "foo"] = "a" + + result = getattr(df1, meth)(axis=1, numeric_only=True) + expected = getattr(df1[["bar", "baz"]], meth)(axis=1) + tm.assert_series_equal(expected, result) + + result = getattr(df2, meth)(axis=1, numeric_only=True) + expected = getattr(df2[["bar", "baz"]], meth)(axis=1) + tm.assert_series_equal(expected, result) + + # df1 has all numbers, df2 has a letter inside + msg = r"unsupported operand type\(s\) for -: 'float' and 'str'" + with pytest.raises(TypeError, match=msg): + getattr(df1, meth)(axis=1, numeric_only=False) + msg = "could not convert string to float: 'a'" + with pytest.raises(TypeError, match=msg): + getattr(df2, meth)(axis=1, numeric_only=False) + + def test_sem(self, datetime_frame): + result = datetime_frame.sem(ddof=4) + expected = datetime_frame.apply(lambda x: x.std(ddof=4) / np.sqrt(len(x))) + tm.assert_almost_equal(result, expected) + + arr = np.repeat(np.random.random((1, 1000)), 1000, 0) + result = nanops.nansem(arr, axis=0) + assert not (result < 0).any() + + with pd.option_context("use_bottleneck", False): + result = nanops.nansem(arr, axis=0) + assert not (result < 0).any() + + @td.skip_if_no_scipy + def test_kurt(self): + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + df = DataFrame(np.random.randn(6, 3), index=index) + + kurt = df.kurt() + kurt2 = df.kurt(level=0).xs("bar") + tm.assert_series_equal(kurt, kurt2, check_names=False) + assert kurt.name is None + assert kurt2.name == "bar" + + @pytest.mark.parametrize( + "dropna, expected", + [ + ( + True, + { + "A": [12], + "B": [10.0], + "C": [1.0], + "D": ["a"], + "E": Categorical(["a"], categories=["a"]), + "F": to_datetime(["2000-1-2"]), + "G": to_timedelta(["1 days"]), + }, + ), + ( + False, + { + "A": [12], + "B": [10.0], + "C": [np.nan], + "D": np.array([np.nan], dtype=object), + "E": Categorical([np.nan], categories=["a"]), + "F": [pd.NaT], + "G": to_timedelta([pd.NaT]), + }, + ), + ( + True, + { + "H": [8, 9, np.nan, np.nan], + "I": [8, 9, np.nan, np.nan], + "J": [1, np.nan, np.nan, np.nan], + "K": Categorical(["a", np.nan, np.nan, np.nan], categories=["a"]), + "L": to_datetime(["2000-1-2", "NaT", "NaT", "NaT"]), + "M": to_timedelta(["1 days", "nan", "nan", "nan"]), + "N": [0, 1, 2, 3], + }, + ), + ( + False, + { + "H": [8, 9, np.nan, np.nan], + "I": [8, 9, np.nan, np.nan], + "J": [1, np.nan, np.nan, np.nan], + "K": Categorical([np.nan, "a", np.nan, np.nan], categories=["a"]), + "L": to_datetime(["NaT", "2000-1-2", "NaT", "NaT"]), + "M": to_timedelta(["nan", "1 days", "nan", "nan"]), + "N": [0, 1, 2, 3], + }, + ), + ], + ) + def test_mode_dropna(self, dropna, expected): + + df = DataFrame( + { + "A": [12, 12, 19, 11], + "B": [10, 10, np.nan, 3], + "C": [1, np.nan, np.nan, np.nan], + "D": [np.nan, np.nan, "a", np.nan], + "E": Categorical([np.nan, np.nan, "a", np.nan]), + "F": to_datetime(["NaT", "2000-1-2", "NaT", "NaT"]), + "G": to_timedelta(["1 days", "nan", "nan", "nan"]), + "H": [8, 8, 9, 9], + "I": [9, 9, 8, 8], + "J": [1, 1, np.nan, np.nan], + "K": Categorical(["a", np.nan, "a", np.nan]), + "L": to_datetime(["2000-1-2", "2000-1-2", "NaT", "NaT"]), + "M": to_timedelta(["1 days", "nan", "1 days", "nan"]), + "N": np.arange(4, dtype="int64"), + } + ) + + result = df[sorted(expected.keys())].mode(dropna=dropna) + expected = DataFrame(expected) + tm.assert_frame_equal(result, expected) + + def test_mode_sortwarning(self): + # Check for the warning that is raised when the mode + # results cannot be sorted + + df = DataFrame({"A": [np.nan, np.nan, "a", "a"]}) + expected = DataFrame({"A": ["a", np.nan]}) + + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + result = df.mode(dropna=False) + result = result.sort_values(by="A").reset_index(drop=True) + + tm.assert_frame_equal(result, expected) + + def test_operators_timedelta64(self): + df = DataFrame( + dict( + A=date_range("2012-1-1", periods=3, freq="D"), + B=date_range("2012-1-2", periods=3, freq="D"), + C=Timestamp("20120101") - timedelta(minutes=5, seconds=5), + ) + ) + + diffs = DataFrame(dict(A=df["A"] - df["C"], B=df["A"] - df["B"])) + + # min + result = diffs.min() + assert result[0] == diffs.loc[0, "A"] + assert result[1] == diffs.loc[0, "B"] + + result = diffs.min(axis=1) + assert (result == diffs.loc[0, "B"]).all() + + # max + result = diffs.max() + assert result[0] == diffs.loc[2, "A"] + assert result[1] == diffs.loc[2, "B"] + + result = diffs.max(axis=1) + assert (result == diffs["A"]).all() + + # abs + result = diffs.abs() + result2 = abs(diffs) + expected = DataFrame(dict(A=df["A"] - df["C"], B=df["B"] - df["A"])) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + # mixed frame + mixed = diffs.copy() + mixed["C"] = "foo" + mixed["D"] = 1 + mixed["E"] = 1.0 + mixed["F"] = Timestamp("20130101") + + # results in an object array + result = mixed.min() + expected = Series( + [ + pd.Timedelta(timedelta(seconds=5 * 60 + 5)), + pd.Timedelta(timedelta(days=-1)), + "foo", + 1, + 1.0, + Timestamp("20130101"), + ], + index=mixed.columns, + ) + tm.assert_series_equal(result, expected) + + # excludes numeric + result = mixed.min(axis=1) + expected = Series([1, 1, 1.0], index=[0, 1, 2]) + tm.assert_series_equal(result, expected) + + # works when only those columns are selected + result = mixed[["A", "B"]].min(1) + expected = Series([timedelta(days=-1)] * 3) + tm.assert_series_equal(result, expected) + + result = mixed[["A", "B"]].min() + expected = Series( + [timedelta(seconds=5 * 60 + 5), timedelta(days=-1)], index=["A", "B"] + ) + tm.assert_series_equal(result, expected) + + # GH 3106 + df = DataFrame( + { + "time": date_range("20130102", periods=5), + "time2": date_range("20130105", periods=5), + } + ) + df["off1"] = df["time2"] - df["time"] + assert df["off1"].dtype == "timedelta64[ns]" + + df["off2"] = df["time"] - df["time2"] + df._consolidate_inplace() + assert df["off1"].dtype == "timedelta64[ns]" + assert df["off2"].dtype == "timedelta64[ns]" + + def test_sum_corner(self): + empty_frame = DataFrame() + + axis0 = empty_frame.sum(0) + axis1 = empty_frame.sum(1) + assert isinstance(axis0, Series) + assert isinstance(axis1, Series) + assert len(axis0) == 0 + assert len(axis1) == 0 + + @pytest.mark.parametrize("method, unit", [("sum", 0), ("prod", 1)]) + def test_sum_prod_nanops(self, method, unit): + idx = ["a", "b", "c"] + df = pd.DataFrame( + {"a": [unit, unit], "b": [unit, np.nan], "c": [np.nan, np.nan]} + ) + # The default + result = getattr(df, method) + expected = pd.Series([unit, unit, unit], index=idx, dtype="float64") + + # min_count=1 + result = getattr(df, method)(min_count=1) + expected = pd.Series([unit, unit, np.nan], index=idx) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = getattr(df, method)(min_count=0) + expected = pd.Series([unit, unit, unit], index=idx, dtype="float64") + tm.assert_series_equal(result, expected) + + result = getattr(df.iloc[1:], method)(min_count=1) + expected = pd.Series([unit, np.nan, np.nan], index=idx) + tm.assert_series_equal(result, expected) + + # min_count > 1 + df = pd.DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5}) + result = getattr(df, method)(min_count=5) + expected = pd.Series(result, index=["A", "B"]) + tm.assert_series_equal(result, expected) + + result = getattr(df, method)(min_count=6) + expected = pd.Series(result, index=["A", "B"]) + tm.assert_series_equal(result, expected) + + def test_sum_nanops_timedelta(self): + # prod isn't defined on timedeltas + idx = ["a", "b", "c"] + df = pd.DataFrame({"a": [0, 0], "b": [0, np.nan], "c": [np.nan, np.nan]}) + + df2 = df.apply(pd.to_timedelta) + + # 0 by default + result = df2.sum() + expected = pd.Series([0, 0, 0], dtype="m8[ns]", index=idx) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = df2.sum(min_count=0) + tm.assert_series_equal(result, expected) + + # min_count=1 + result = df2.sum(min_count=1) + expected = pd.Series([0, 0, np.nan], dtype="m8[ns]", index=idx) + tm.assert_series_equal(result, expected) + + def test_sum_object(self, float_frame): + values = float_frame.values.astype(int) + frame = DataFrame(values, index=float_frame.index, columns=float_frame.columns) + deltas = frame * timedelta(1) + deltas.sum() + + def test_sum_bool(self, float_frame): + # ensure this works, bug report + bools = np.isnan(float_frame) + bools.sum(1) + bools.sum(0) + + def test_sum_mixed_datetime(self): + # GH#30886 + df = pd.DataFrame( + {"A": pd.date_range("2000", periods=4), "B": [1, 2, 3, 4]} + ).reindex([2, 3, 4]) + result = df.sum() + + expected = pd.Series({"B": 7.0}) + tm.assert_series_equal(result, expected) + + def test_mean_corner(self, float_frame, float_string_frame): + # unit test when have object data + the_mean = float_string_frame.mean(axis=0) + the_sum = float_string_frame.sum(axis=0, numeric_only=True) + tm.assert_index_equal(the_sum.index, the_mean.index) + assert len(the_mean.index) < len(float_string_frame.columns) + + # xs sum mixed type, just want to know it works... + the_mean = float_string_frame.mean(axis=1) + the_sum = float_string_frame.sum(axis=1, numeric_only=True) + tm.assert_index_equal(the_sum.index, the_mean.index) + + # take mean of boolean column + float_frame["bool"] = float_frame["A"] > 0 + means = float_frame.mean(0) + assert means["bool"] == float_frame["bool"].values.mean() + + def test_mean_datetimelike(self): + # GH#24757 check that datetimelike are excluded by default, handled + # correctly with numeric_only=True + + df = pd.DataFrame( + { + "A": np.arange(3), + "B": pd.date_range("2016-01-01", periods=3), + "C": pd.timedelta_range("1D", periods=3), + "D": pd.period_range("2016", periods=3, freq="A"), + } + ) + result = df.mean(numeric_only=True) + expected = pd.Series({"A": 1.0}) + tm.assert_series_equal(result, expected) + + result = df.mean() + expected = pd.Series({"A": 1.0, "C": df.loc[1, "C"]}) + tm.assert_series_equal(result, expected) + + @pytest.mark.xfail( + reason="casts to object-dtype and then tries to add timestamps", + raises=TypeError, + strict=True, + ) + def test_mean_datetimelike_numeric_only_false(self): + df = pd.DataFrame( + { + "A": np.arange(3), + "B": pd.date_range("2016-01-01", periods=3), + "C": pd.timedelta_range("1D", periods=3), + "D": pd.period_range("2016", periods=3, freq="A"), + } + ) + + result = df.mean(numeric_only=False) + expected = pd.Series( + {"A": 1, "B": df.loc[1, "B"], "C": df.loc[1, "C"], "D": df.loc[1, "D"]} + ) + tm.assert_series_equal(result, expected) + + # mean of period is not allowed + df["D"] = pd.period_range("2016", periods=3, freq="A") + + with pytest.raises(TypeError, match="mean is not implemented for Period"): + df.mean(numeric_only=False) + + def test_mean_extensionarray_numeric_only_true(self): + # https://github.com/pandas-dev/pandas/issues/33256 + arr = np.random.randint(1000, size=(10, 5)) + df = pd.DataFrame(arr, dtype="Int64") + result = df.mean(numeric_only=True) + expected = pd.DataFrame(arr).mean() + tm.assert_series_equal(result, expected) + + def test_stats_mixed_type(self, float_string_frame): + # don't blow up + float_string_frame.std(1) + float_string_frame.var(1) + float_string_frame.mean(1) + float_string_frame.skew(1) + + def test_sum_bools(self): + df = DataFrame(index=range(1), columns=range(10)) + bools = isna(df) + assert bools.sum(axis=1)[0] == 10 + + # ---------------------------------------------------------------------- + # Index of max / min + + def test_idxmin(self, float_frame, int_frame): + frame = float_frame + frame.loc[5:10] = np.nan + frame.loc[15:20, -2:] = np.nan + for skipna in [True, False]: + for axis in [0, 1]: + for df in [frame, int_frame]: + result = df.idxmin(axis=axis, skipna=skipna) + expected = df.apply(Series.idxmin, axis=axis, skipna=skipna) + tm.assert_series_equal(result, expected) + + msg = "No axis named 2 for object type " + with pytest.raises(ValueError, match=msg): + frame.idxmin(axis=2) + + def test_idxmax(self, float_frame, int_frame): + frame = float_frame + frame.loc[5:10] = np.nan + frame.loc[15:20, -2:] = np.nan + for skipna in [True, False]: + for axis in [0, 1]: + for df in [frame, int_frame]: + result = df.idxmax(axis=axis, skipna=skipna) + expected = df.apply(Series.idxmax, axis=axis, skipna=skipna) + tm.assert_series_equal(result, expected) + + msg = "No axis named 2 for object type " + with pytest.raises(ValueError, match=msg): + frame.idxmax(axis=2) + + # ---------------------------------------------------------------------- + # Logical reductions + + @pytest.mark.parametrize("opname", ["any", "all"]) + def test_any_all(self, opname, bool_frame_with_na, float_string_frame): + assert_bool_op_calc( + opname, getattr(np, opname), bool_frame_with_na, has_skipna=True + ) + assert_bool_op_api( + opname, bool_frame_with_na, float_string_frame, has_bool_only=True + ) + + def test_any_all_extra(self): + df = DataFrame( + { + "A": [True, False, False], + "B": [True, True, False], + "C": [True, True, True], + }, + index=["a", "b", "c"], + ) + result = df[["A", "B"]].any(1) + expected = Series([True, True, False], index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) + + result = df[["A", "B"]].any(1, bool_only=True) + tm.assert_series_equal(result, expected) + + result = df.all(1) + expected = Series([True, False, False], index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) + + result = df.all(1, bool_only=True) + tm.assert_series_equal(result, expected) + + # Axis is None + result = df.all(axis=None).item() + assert result is False + + result = df.any(axis=None).item() + assert result is True + + result = df[["C"]].all(axis=None).item() + assert result is True + + def test_any_datetime(self): + + # GH 23070 + float_data = [1, np.nan, 3, np.nan] + datetime_data = [ + pd.Timestamp("1960-02-15"), + pd.Timestamp("1960-02-16"), + pd.NaT, + pd.NaT, + ] + df = DataFrame({"A": float_data, "B": datetime_data}) + + result = df.any(1) + expected = Series([True, True, True, False]) + tm.assert_series_equal(result, expected) + + def test_any_all_bool_only(self): + + # GH 25101 + df = DataFrame( + {"col1": [1, 2, 3], "col2": [4, 5, 6], "col3": [None, None, None]} + ) + + result = df.all(bool_only=True) + expected = Series(dtype=np.bool) + tm.assert_series_equal(result, expected) + + df = DataFrame( + { + "col1": [1, 2, 3], + "col2": [4, 5, 6], + "col3": [None, None, None], + "col4": [False, False, True], + } + ) + + result = df.all(bool_only=True) + expected = Series({"col4": False}) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "func, data, expected", + [ + (np.any, {}, False), + (np.all, {}, True), + (np.any, {"A": []}, False), + (np.all, {"A": []}, True), + (np.any, {"A": [False, False]}, False), + (np.all, {"A": [False, False]}, False), + (np.any, {"A": [True, False]}, True), + (np.all, {"A": [True, False]}, False), + (np.any, {"A": [True, True]}, True), + (np.all, {"A": [True, True]}, True), + (np.any, {"A": [False], "B": [False]}, False), + (np.all, {"A": [False], "B": [False]}, False), + (np.any, {"A": [False, False], "B": [False, True]}, True), + (np.all, {"A": [False, False], "B": [False, True]}, False), + # other types + (np.all, {"A": pd.Series([0.0, 1.0], dtype="float")}, False), + (np.any, {"A": pd.Series([0.0, 1.0], dtype="float")}, True), + (np.all, {"A": pd.Series([0, 1], dtype=int)}, False), + (np.any, {"A": pd.Series([0, 1], dtype=int)}, True), + pytest.param( + np.all, + {"A": pd.Series([0, 1], dtype="M8[ns]")}, + False, + marks=[td.skip_if_np_lt("1.15")], + ), + pytest.param( + np.any, + {"A": pd.Series([0, 1], dtype="M8[ns]")}, + True, + marks=[td.skip_if_np_lt("1.15")], + ), + pytest.param( + np.all, + {"A": pd.Series([1, 2], dtype="M8[ns]")}, + True, + marks=[td.skip_if_np_lt("1.15")], + ), + pytest.param( + np.any, + {"A": pd.Series([1, 2], dtype="M8[ns]")}, + True, + marks=[td.skip_if_np_lt("1.15")], + ), + pytest.param( + np.all, + {"A": pd.Series([0, 1], dtype="m8[ns]")}, + False, + marks=[td.skip_if_np_lt("1.15")], + ), + pytest.param( + np.any, + {"A": pd.Series([0, 1], dtype="m8[ns]")}, + True, + marks=[td.skip_if_np_lt("1.15")], + ), + pytest.param( + np.all, + {"A": pd.Series([1, 2], dtype="m8[ns]")}, + True, + marks=[td.skip_if_np_lt("1.15")], + ), + pytest.param( + np.any, + {"A": pd.Series([1, 2], dtype="m8[ns]")}, + True, + marks=[td.skip_if_np_lt("1.15")], + ), + (np.all, {"A": pd.Series([0, 1], dtype="category")}, False), + (np.any, {"A": pd.Series([0, 1], dtype="category")}, True), + (np.all, {"A": pd.Series([1, 2], dtype="category")}, True), + (np.any, {"A": pd.Series([1, 2], dtype="category")}, True), + # Mix GH#21484 + pytest.param( + np.all, + { + "A": pd.Series([10, 20], dtype="M8[ns]"), + "B": pd.Series([10, 20], dtype="m8[ns]"), + }, + True, + # In 1.13.3 and 1.14 np.all(df) returns a Timedelta here + marks=[td.skip_if_np_lt("1.15")], + ), + ], + ) + def test_any_all_np_func(self, func, data, expected): + # GH 19976 + data = DataFrame(data) + result = func(data) + assert isinstance(result, np.bool_) + assert result.item() is expected + + # method version + result = getattr(DataFrame(data), func.__name__)(axis=None) + assert isinstance(result, np.bool_) + assert result.item() is expected + + def test_any_all_object(self): + # GH 19976 + result = np.all(DataFrame(columns=["a", "b"])).item() + assert result is True + + result = np.any(DataFrame(columns=["a", "b"])).item() + assert result is False + + @pytest.mark.parametrize("method", ["any", "all"]) + def test_any_all_level_axis_none_raises(self, method): + df = DataFrame( + {"A": 1}, + index=MultiIndex.from_product( + [["A", "B"], ["a", "b"]], names=["out", "in"] + ), + ) + xpr = "Must specify 'axis' when aggregating by level." + with pytest.raises(ValueError, match=xpr): + getattr(df, method)(axis=None, level="out") + + # --------------------------------------------------------------------- + # Matrix-like + + def test_dot(self): + a = DataFrame( + np.random.randn(3, 4), index=["a", "b", "c"], columns=["p", "q", "r", "s"] + ) + b = DataFrame( + np.random.randn(4, 2), index=["p", "q", "r", "s"], columns=["one", "two"] + ) + + result = a.dot(b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + # Check alignment + b1 = b.reindex(index=reversed(b.index)) + result = a.dot(b) + tm.assert_frame_equal(result, expected) + + # Check series argument + result = a.dot(b["one"]) + tm.assert_series_equal(result, expected["one"], check_names=False) + assert result.name is None + + result = a.dot(b1["one"]) + tm.assert_series_equal(result, expected["one"], check_names=False) + assert result.name is None + + # can pass correct-length arrays + row = a.iloc[0].values + + result = a.dot(row) + expected = a.dot(a.iloc[0]) + tm.assert_series_equal(result, expected) + + with pytest.raises(ValueError, match="Dot product shape mismatch"): + a.dot(row[:-1]) + + a = np.random.rand(1, 5) + b = np.random.rand(5, 1) + A = DataFrame(a) + + # TODO(wesm): unused + B = DataFrame(b) # noqa + + # it works + result = A.dot(b) + + # unaligned + df = DataFrame(np.random.randn(3, 4), index=[1, 2, 3], columns=range(4)) + df2 = DataFrame(np.random.randn(5, 3), index=range(5), columns=[1, 2, 3]) + + with pytest.raises(ValueError, match="aligned"): + df.dot(df2) + + def test_matmul(self): + # matmul test is for GH 10259 + a = DataFrame( + np.random.randn(3, 4), index=["a", "b", "c"], columns=["p", "q", "r", "s"] + ) + b = DataFrame( + np.random.randn(4, 2), index=["p", "q", "r", "s"], columns=["one", "two"] + ) + + # DataFrame @ DataFrame + result = operator.matmul(a, b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + tm.assert_frame_equal(result, expected) + + # DataFrame @ Series + result = operator.matmul(a, b.one) + expected = Series(np.dot(a.values, b.one.values), index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) + + # np.array @ DataFrame + result = operator.matmul(a.values, b) + assert isinstance(result, DataFrame) + assert result.columns.equals(b.columns) + assert result.index.equals(pd.Index(range(3))) + expected = np.dot(a.values, b.values) + tm.assert_almost_equal(result.values, expected) + + # nested list @ DataFrame (__rmatmul__) + result = operator.matmul(a.values.tolist(), b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + tm.assert_almost_equal(result.values, expected.values) + + # mixed dtype DataFrame @ DataFrame + a["q"] = a.q.round().astype(int) + result = operator.matmul(a, b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + tm.assert_frame_equal(result, expected) + + # different dtypes DataFrame @ DataFrame + a = a.astype(int) + result = operator.matmul(a, b) + expected = DataFrame( + np.dot(a.values, b.values), index=["a", "b", "c"], columns=["one", "two"] + ) + tm.assert_frame_equal(result, expected) + + # unaligned + df = DataFrame(np.random.randn(3, 4), index=[1, 2, 3], columns=range(4)) + df2 = DataFrame(np.random.randn(5, 3), index=range(5), columns=[1, 2, 3]) + + with pytest.raises(ValueError, match="aligned"): + operator.matmul(df, df2) + + # --------------------------------------------------------------------- + # Unsorted + + def test_series_broadcasting(self): + # smoke test for numpy warnings + # GH 16378, GH 16306 + df = DataFrame([1.0, 1.0, 1.0]) + df_nan = DataFrame({"A": [np.nan, 2.0, np.nan]}) + s = Series([1, 1, 1]) + s_nan = Series([np.nan, np.nan, 1]) + + with tm.assert_produces_warning(None): + df_nan.clip(lower=s, axis=0) + for op in ["lt", "le", "gt", "ge", "eq", "ne"]: + getattr(df, op)(s_nan, axis=0) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py new file mode 100644 index 00000000..18c03cd2 --- /dev/null +++ b/pandas/tests/frame/test_api.py @@ -0,0 +1,572 @@ +from copy import deepcopy +import datetime +import pydoc + +import numpy as np +import pytest + +from pandas.compat import PY37 +from pandas.util._test_decorators import async_mark + +import pandas as pd +from pandas import Categorical, DataFrame, Series, compat, date_range, timedelta_range +import pandas._testing as tm + + +class TestDataFrameMisc: + def test_copy_index_name_checking(self, float_frame): + # don't want to be able to modify the index stored elsewhere after + # making a copy + for attr in ("index", "columns"): + ind = getattr(float_frame, attr) + ind.name = None + cp = float_frame.copy() + getattr(cp, attr).name = "foo" + assert getattr(float_frame, attr).name is None + + def test_getitem_pop_assign_name(self, float_frame): + s = float_frame["A"] + assert s.name == "A" + + s = float_frame.pop("A") + assert s.name == "A" + + s = float_frame.loc[:, "B"] + assert s.name == "B" + + s2 = s.loc[:] + assert s2.name == "B" + + def test_get_value(self, float_frame): + for idx in float_frame.index: + for col in float_frame.columns: + result = float_frame._get_value(idx, col) + expected = float_frame[col][idx] + tm.assert_almost_equal(result, expected) + + def test_add_prefix_suffix(self, float_frame): + with_prefix = float_frame.add_prefix("foo#") + expected = pd.Index(["foo#{c}".format(c=c) for c in float_frame.columns]) + tm.assert_index_equal(with_prefix.columns, expected) + + with_suffix = float_frame.add_suffix("#foo") + expected = pd.Index(["{c}#foo".format(c=c) for c in float_frame.columns]) + tm.assert_index_equal(with_suffix.columns, expected) + + with_pct_prefix = float_frame.add_prefix("%") + expected = pd.Index(["%{c}".format(c=c) for c in float_frame.columns]) + tm.assert_index_equal(with_pct_prefix.columns, expected) + + with_pct_suffix = float_frame.add_suffix("%") + expected = pd.Index(["{c}%".format(c=c) for c in float_frame.columns]) + tm.assert_index_equal(with_pct_suffix.columns, expected) + + def test_get_axis(self, float_frame): + f = float_frame + assert f._get_axis_number(0) == 0 + assert f._get_axis_number(1) == 1 + assert f._get_axis_number("index") == 0 + assert f._get_axis_number("rows") == 0 + assert f._get_axis_number("columns") == 1 + + assert f._get_axis_name(0) == "index" + assert f._get_axis_name(1) == "columns" + assert f._get_axis_name("index") == "index" + assert f._get_axis_name("rows") == "index" + assert f._get_axis_name("columns") == "columns" + + assert f._get_axis(0) is f.index + assert f._get_axis(1) is f.columns + + with pytest.raises(ValueError, match="No axis named"): + f._get_axis_number(2) + + with pytest.raises(ValueError, match="No axis.*foo"): + f._get_axis_name("foo") + + with pytest.raises(ValueError, match="No axis.*None"): + f._get_axis_name(None) + + with pytest.raises(ValueError, match="No axis named"): + f._get_axis_number(None) + + def test_keys(self, float_frame): + getkeys = float_frame.keys + assert getkeys() is float_frame.columns + + def test_column_contains_raises(self, float_frame): + with pytest.raises(TypeError, match="unhashable type: 'Index'"): + float_frame.columns in float_frame + + def test_tab_completion(self): + # DataFrame whose columns are identifiers shall have them in __dir__. + df = pd.DataFrame([list("abcd"), list("efgh")], columns=list("ABCD")) + for key in list("ABCD"): + assert key in dir(df) + assert isinstance(df.__getitem__("A"), pd.Series) + + # DataFrame whose first-level columns are identifiers shall have + # them in __dir__. + df = pd.DataFrame( + [list("abcd"), list("efgh")], + columns=pd.MultiIndex.from_tuples(list(zip("ABCD", "EFGH"))), + ) + for key in list("ABCD"): + assert key in dir(df) + for key in list("EFGH"): + assert key not in dir(df) + assert isinstance(df.__getitem__("A"), pd.DataFrame) + + def test_not_hashable(self): + empty_frame = DataFrame() + + df = DataFrame([1]) + msg = "'DataFrame' objects are mutable, thus they cannot be hashed" + with pytest.raises(TypeError, match=msg): + hash(df) + with pytest.raises(TypeError, match=msg): + hash(empty_frame) + + def test_new_empty_index(self): + df1 = DataFrame(np.random.randn(0, 3)) + df2 = DataFrame(np.random.randn(0, 3)) + df1.index.name = "foo" + assert df2.index.name is None + + def test_array_interface(self, float_frame): + with np.errstate(all="ignore"): + result = np.sqrt(float_frame) + assert isinstance(result, type(float_frame)) + assert result.index is float_frame.index + assert result.columns is float_frame.columns + + tm.assert_frame_equal(result, float_frame.apply(np.sqrt)) + + def test_get_agg_axis(self, float_frame): + cols = float_frame._get_agg_axis(0) + assert cols is float_frame.columns + + idx = float_frame._get_agg_axis(1) + assert idx is float_frame.index + + msg = r"Axis must be 0 or 1 \(got 2\)" + with pytest.raises(ValueError, match=msg): + float_frame._get_agg_axis(2) + + def test_nonzero(self, float_frame, float_string_frame): + empty_frame = DataFrame() + assert empty_frame.empty + + assert not float_frame.empty + assert not float_string_frame.empty + + # corner case + df = DataFrame({"A": [1.0, 2.0, 3.0], "B": ["a", "b", "c"]}, index=np.arange(3)) + del df["A"] + assert not df.empty + + def test_iteritems(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) + for k, v in df.items(): + assert isinstance(v, DataFrame._constructor_sliced) + + def test_items(self): + # GH 17213, GH 13918 + cols = ["a", "b", "c"] + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols) + for c, (k, v) in zip(cols, df.items()): + assert c == k + assert isinstance(v, Series) + assert (df[k] == v).all() + + def test_iter(self, float_frame): + assert tm.equalContents(list(float_frame), float_frame.columns) + + def test_iterrows(self, float_frame, float_string_frame): + for k, v in float_frame.iterrows(): + exp = float_frame.loc[k] + tm.assert_series_equal(v, exp) + + for k, v in float_string_frame.iterrows(): + exp = float_string_frame.loc[k] + tm.assert_series_equal(v, exp) + + def test_iterrows_iso8601(self): + # GH 19671 + s = DataFrame( + { + "non_iso8601": ["M1701", "M1802", "M1903", "M2004"], + "iso8601": date_range("2000-01-01", periods=4, freq="M"), + } + ) + for k, v in s.iterrows(): + exp = s.loc[k] + tm.assert_series_equal(v, exp) + + def test_iterrows_corner(self): + # gh-12222 + df = DataFrame( + { + "a": [datetime.datetime(2015, 1, 1)], + "b": [None], + "c": [None], + "d": [""], + "e": [[]], + "f": [set()], + "g": [{}], + } + ) + expected = Series( + [datetime.datetime(2015, 1, 1), None, None, "", [], set(), {}], + index=list("abcdefg"), + name=0, + dtype="object", + ) + _, result = next(df.iterrows()) + tm.assert_series_equal(result, expected) + + def test_itertuples(self, float_frame): + for i, tup in enumerate(float_frame.itertuples()): + s = DataFrame._constructor_sliced(tup[1:]) + s.name = tup[0] + expected = float_frame.iloc[i, :].reset_index(drop=True) + tm.assert_series_equal(s, expected) + + df = DataFrame( + {"floats": np.random.randn(5), "ints": range(5)}, columns=["floats", "ints"] + ) + + for tup in df.itertuples(index=False): + assert isinstance(tup[1], int) + + df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) + dfaa = df[["a", "a"]] + + assert list(dfaa.itertuples()) == [(0, 1, 1), (1, 2, 2), (2, 3, 3)] + + # repr with int on 32-bit/windows + if not (compat.is_platform_windows() or compat.is_platform_32bit()): + assert ( + repr(list(df.itertuples(name=None))) + == "[(0, 1, 4), (1, 2, 5), (2, 3, 6)]" + ) + + tup = next(df.itertuples(name="TestName")) + assert tup._fields == ("Index", "a", "b") + assert (tup.Index, tup.a, tup.b) == tup + assert type(tup).__name__ == "TestName" + + df.columns = ["def", "return"] + tup2 = next(df.itertuples(name="TestName")) + assert tup2 == (0, 1, 4) + assert tup2._fields == ("Index", "_1", "_2") + + df3 = DataFrame({"f" + str(i): [i] for i in range(1024)}) + # will raise SyntaxError if trying to create namedtuple + tup3 = next(df3.itertuples()) + assert isinstance(tup3, tuple) + if PY37: + assert hasattr(tup3, "_fields") + else: + assert not hasattr(tup3, "_fields") + + # GH 28282 + df_254_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(254)}]) + result_254_columns = next(df_254_columns.itertuples(index=False)) + assert isinstance(result_254_columns, tuple) + assert hasattr(result_254_columns, "_fields") + + df_255_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(255)}]) + result_255_columns = next(df_255_columns.itertuples(index=False)) + assert isinstance(result_255_columns, tuple) + + # Dataframes with >=255 columns will fallback to regular tuples on python < 3.7 + if PY37: + assert hasattr(result_255_columns, "_fields") + else: + assert not hasattr(result_255_columns, "_fields") + + def test_sequence_like_with_categorical(self): + + # GH 7839 + # make sure can iterate + df = DataFrame( + {"id": [1, 2, 3, 4, 5, 6], "raw_grade": ["a", "b", "b", "a", "a", "e"]} + ) + df["grade"] = Categorical(df["raw_grade"]) + + # basic sequencing testing + result = list(df.grade.values) + expected = np.array(df.grade.values).tolist() + tm.assert_almost_equal(result, expected) + + # iteration + for t in df.itertuples(index=False): + str(t) + + for row, s in df.iterrows(): + str(s) + + for c, col in df.items(): + str(s) + + def test_len(self, float_frame): + assert len(float_frame) == len(float_frame.index) + + def test_values_mixed_dtypes(self, float_frame, float_string_frame): + frame = float_frame + arr = frame.values + + frame_cols = frame.columns + for i, row in enumerate(arr): + for j, value in enumerate(row): + col = frame_cols[j] + if np.isnan(value): + assert np.isnan(frame[col][i]) + else: + assert value == frame[col][i] + + # mixed type + arr = float_string_frame[["foo", "A"]].values + assert arr[0, 0] == "bar" + + df = DataFrame({"complex": [1j, 2j, 3j], "real": [1, 2, 3]}) + arr = df.values + assert arr[0, 0] == 1j + + # single block corner case + arr = float_frame[["A", "B"]].values + expected = float_frame.reindex(columns=["A", "B"]).values + tm.assert_almost_equal(arr, expected) + + def test_to_numpy(self): + df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]}) + expected = np.array([[1, 3], [2, 4.5]]) + result = df.to_numpy() + tm.assert_numpy_array_equal(result, expected) + + def test_to_numpy_dtype(self): + df = pd.DataFrame({"A": [1, 2], "B": [3, 4.5]}) + expected = np.array([[1, 3], [2, 4]], dtype="int64") + result = df.to_numpy(dtype="int64") + tm.assert_numpy_array_equal(result, expected) + + def test_to_numpy_copy(self): + arr = np.random.randn(4, 3) + df = pd.DataFrame(arr) + assert df.values.base is arr + assert df.to_numpy(copy=False).base is arr + assert df.to_numpy(copy=True).base is None + + def test_transpose(self, float_frame): + frame = float_frame + dft = frame.T + for idx, series in dft.items(): + for col, value in series.items(): + if np.isnan(value): + assert np.isnan(frame[col][idx]) + else: + assert value == frame[col][idx] + + # mixed type + index, data = tm.getMixedTypeDict() + mixed = DataFrame(data, index=index) + + mixed_T = mixed.T + for col, s in mixed_T.items(): + assert s.dtype == np.object_ + + def test_swapaxes(self): + df = DataFrame(np.random.randn(10, 5)) + tm.assert_frame_equal(df.T, df.swapaxes(0, 1)) + tm.assert_frame_equal(df.T, df.swapaxes(1, 0)) + tm.assert_frame_equal(df, df.swapaxes(0, 0)) + msg = ( + "No axis named 2 for object type" + r" " + ) + with pytest.raises(ValueError, match=msg): + df.swapaxes(2, 5) + + def test_axis_aliases(self, float_frame): + f = float_frame + + # reg name + expected = f.sum(axis=0) + result = f.sum(axis="index") + tm.assert_series_equal(result, expected) + + expected = f.sum(axis=1) + result = f.sum(axis="columns") + tm.assert_series_equal(result, expected) + + def test_class_axis(self): + # GH 18147 + # no exception and no empty docstring + assert pydoc.getdoc(DataFrame.index) + assert pydoc.getdoc(DataFrame.columns) + + def test_more_values(self, float_string_frame): + values = float_string_frame.values + assert values.shape[1] == len(float_string_frame.columns) + + def test_repr_with_mi_nat(self, float_string_frame): + df = DataFrame( + {"X": [1, 2]}, index=[[pd.NaT, pd.Timestamp("20130101")], ["a", "b"]] + ) + result = repr(df) + expected = " X\nNaT a 1\n2013-01-01 b 2" + assert result == expected + + def test_items_names(self, float_string_frame): + for k, v in float_string_frame.items(): + assert v.name == k + + def test_series_put_names(self, float_string_frame): + series = float_string_frame._series + for k, v in series.items(): + assert v.name == k + + def test_empty_nonzero(self): + df = DataFrame([1, 2, 3]) + assert not df.empty + df = DataFrame(index=[1], columns=[1]) + assert not df.empty + df = DataFrame(index=["a", "b"], columns=["c", "d"]).dropna() + assert df.empty + assert df.T.empty + empty_frames = [ + DataFrame(), + DataFrame(index=[1]), + DataFrame(columns=[1]), + DataFrame({1: []}), + ] + for df in empty_frames: + assert df.empty + assert df.T.empty + + def test_with_datetimelikes(self): + + df = DataFrame( + { + "A": date_range("20130101", periods=10), + "B": timedelta_range("1 day", periods=10), + } + ) + t = df.T + + result = t.dtypes.value_counts() + expected = Series({np.dtype("object"): 10}) + tm.assert_series_equal(result, expected) + + def test_values(self, float_frame): + float_frame.values[:, 0] = 5.0 + assert (float_frame.values[:, 0] == 5).all() + + def test_deepcopy(self, float_frame): + cp = deepcopy(float_frame) + series = cp["A"] + series[:] = 10 + for idx, value in series.items(): + assert float_frame["A"][idx] != value + + def test_transpose_get_view(self, float_frame): + dft = float_frame.T + dft.values[:, 5:10] = 5 + + assert (float_frame.values[5:10] == 5).all() + + def test_inplace_return_self(self): + # GH 1893 + + data = DataFrame( + {"a": ["foo", "bar", "baz", "qux"], "b": [0, 0, 1, 1], "c": [1, 2, 3, 4]} + ) + + def _check_f(base, f): + result = f(base) + assert result is None + + # -----DataFrame----- + + # set_index + f = lambda x: x.set_index("a", inplace=True) + _check_f(data.copy(), f) + + # reset_index + f = lambda x: x.reset_index(inplace=True) + _check_f(data.set_index("a"), f) + + # drop_duplicates + f = lambda x: x.drop_duplicates(inplace=True) + _check_f(data.copy(), f) + + # sort + f = lambda x: x.sort_values("b", inplace=True) + _check_f(data.copy(), f) + + # sort_index + f = lambda x: x.sort_index(inplace=True) + _check_f(data.copy(), f) + + # fillna + f = lambda x: x.fillna(0, inplace=True) + _check_f(data.copy(), f) + + # replace + f = lambda x: x.replace(1, 0, inplace=True) + _check_f(data.copy(), f) + + # rename + f = lambda x: x.rename({1: "foo"}, inplace=True) + _check_f(data.copy(), f) + + # -----Series----- + d = data.copy()["c"] + + # reset_index + f = lambda x: x.reset_index(inplace=True, drop=True) + _check_f(data.set_index("a")["c"], f) + + # fillna + f = lambda x: x.fillna(0, inplace=True) + _check_f(d.copy(), f) + + # replace + f = lambda x: x.replace(1, 0, inplace=True) + _check_f(d.copy(), f) + + # rename + f = lambda x: x.rename({1: "foo"}, inplace=True) + _check_f(d.copy(), f) + + @async_mark() + async def test_tab_complete_warning(self, ip): + # GH 16409 + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + code = "import pandas as pd; df = pd.DataFrame()" + await ip.run_code(code) + + # TODO: remove it when Ipython updates + # GH 33567, jedi version raises Deprecation warning in Ipython + import jedi + + if jedi.__version__ < "0.17.0": + warning = tm.assert_produces_warning(None) + else: + warning = tm.assert_produces_warning( + DeprecationWarning, check_stacklevel=False + ) + with warning: + with provisionalcompleter("ignore"): + list(ip.Completer.completions("df.", 1)) + + def test_attrs(self): + df = pd.DataFrame({"A": [2, 3]}) + assert df.attrs == {} + df.attrs["version"] = 1 + + result = df.rename(columns=str) + assert result.attrs == {"version": 1} diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py new file mode 100644 index 00000000..fe6abef9 --- /dev/null +++ b/pandas/tests/frame/test_apply.py @@ -0,0 +1,1414 @@ +from collections import OrderedDict +from datetime import datetime +from itertools import chain +import operator +import warnings + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, notna +import pandas._testing as tm +from pandas.conftest import _get_cython_table_params +from pandas.core.apply import frame_apply +from pandas.core.base import SpecificationError + + +@pytest.fixture +def int_frame_const_col(): + """ + Fixture for DataFrame of ints which are constant per column + + Columns are ['A', 'B', 'C'], with values (per column): [1, 2, 3] + """ + df = DataFrame( + np.tile(np.arange(3, dtype="int64"), 6).reshape(6, -1) + 1, + columns=["A", "B", "C"], + ) + return df + + +class TestDataFrameApply: + def test_apply(self, float_frame): + with np.errstate(all="ignore"): + # ufunc + applied = float_frame.apply(np.sqrt) + tm.assert_series_equal(np.sqrt(float_frame["A"]), applied["A"]) + + # aggregator + applied = float_frame.apply(np.mean) + assert applied["A"] == np.mean(float_frame["A"]) + + d = float_frame.index[0] + applied = float_frame.apply(np.mean, axis=1) + assert applied[d] == np.mean(float_frame.xs(d)) + assert applied.index is float_frame.index # want this + + # invalid axis + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"]) + with pytest.raises(ValueError): + df.apply(lambda x: x, 2) + + # GH 9573 + df = DataFrame({"c0": ["A", "A", "B", "B"], "c1": ["C", "C", "D", "D"]}) + df = df.apply(lambda ts: ts.astype("category")) + + assert df.shape == (4, 2) + assert isinstance(df["c0"].dtype, CategoricalDtype) + assert isinstance(df["c1"].dtype, CategoricalDtype) + + def test_apply_mixed_datetimelike(self): + # mixed datetimelike + # GH 7778 + df = DataFrame( + { + "A": date_range("20130101", periods=3), + "B": pd.to_timedelta(np.arange(3), unit="s"), + } + ) + result = df.apply(lambda x: x, axis=1) + tm.assert_frame_equal(result, df) + + def test_apply_empty(self, float_frame): + # empty + empty_frame = DataFrame() + + applied = empty_frame.apply(np.sqrt) + assert applied.empty + + applied = empty_frame.apply(np.mean) + assert applied.empty + + no_rows = float_frame[:0] + result = no_rows.apply(lambda x: x.mean()) + expected = Series(np.nan, index=float_frame.columns) + tm.assert_series_equal(result, expected) + + no_cols = float_frame.loc[:, []] + result = no_cols.apply(lambda x: x.mean(), axis=1) + expected = Series(np.nan, index=float_frame.index) + tm.assert_series_equal(result, expected) + + # GH 2476 + expected = DataFrame(index=["a"]) + result = expected.apply(lambda x: x["a"], axis=1) + tm.assert_frame_equal(expected, result) + + def test_apply_with_reduce_empty(self): + # reduce with an empty DataFrame + empty_frame = DataFrame() + + x = [] + result = empty_frame.apply(x.append, axis=1, result_type="expand") + tm.assert_frame_equal(result, empty_frame) + result = empty_frame.apply(x.append, axis=1, result_type="reduce") + expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + tm.assert_series_equal(result, expected) + + empty_with_cols = DataFrame(columns=["a", "b", "c"]) + result = empty_with_cols.apply(x.append, axis=1, result_type="expand") + tm.assert_frame_equal(result, empty_with_cols) + result = empty_with_cols.apply(x.append, axis=1, result_type="reduce") + expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + tm.assert_series_equal(result, expected) + + # Ensure that x.append hasn't been called + assert x == [] + + @pytest.mark.parametrize("func", ["sum", "prod", "any", "all"]) + def test_apply_funcs_over_empty(self, func): + # GH 28213 + df = DataFrame(columns=["a", "b", "c"]) + + result = df.apply(getattr(np, func)) + expected = getattr(df, func)() + tm.assert_series_equal(result, expected) + + def test_nunique_empty(self): + # GH 28213 + df = DataFrame(columns=["a", "b", "c"]) + + result = df.nunique() + expected = Series(0, index=df.columns) + tm.assert_series_equal(result, expected) + + result = df.T.nunique() + expected = Series([], index=pd.Index([]), dtype=np.float64) + tm.assert_series_equal(result, expected) + + def test_apply_standard_nonunique(self): + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"]) + + result = df.apply(lambda s: s[0], axis=1) + expected = Series([1, 4, 7], ["a", "a", "c"]) + tm.assert_series_equal(result, expected) + + result = df.T.apply(lambda s: s[0], axis=0) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"]) + @pytest.mark.parametrize( + "args,kwds", + [ + pytest.param([], {}, id="no_args_or_kwds"), + pytest.param([1], {}, id="axis_from_args"), + pytest.param([], {"axis": 1}, id="axis_from_kwds"), + pytest.param([], {"numeric_only": True}, id="optional_kwds"), + pytest.param([1, None], {"numeric_only": True}, id="args_and_kwds"), + ], + ) + def test_apply_with_string_funcs(self, float_frame, func, args, kwds): + result = float_frame.apply(func, *args, **kwds) + expected = getattr(float_frame, func)(*args, **kwds) + tm.assert_series_equal(result, expected) + + def test_apply_broadcast(self, float_frame, int_frame_const_col): + + # scalars + result = float_frame.apply(np.mean, result_type="broadcast") + expected = DataFrame([float_frame.mean()], index=float_frame.index) + tm.assert_frame_equal(result, expected) + + result = float_frame.apply(np.mean, axis=1, result_type="broadcast") + m = float_frame.mean(axis=1) + expected = DataFrame({c: m for c in float_frame.columns}) + tm.assert_frame_equal(result, expected) + + # lists + result = float_frame.apply( + lambda x: list(range(len(float_frame.columns))), + axis=1, + result_type="broadcast", + ) + m = list(range(len(float_frame.columns))) + expected = DataFrame( + [m] * len(float_frame.index), + dtype="float64", + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(result, expected) + + result = float_frame.apply( + lambda x: list(range(len(float_frame.index))), result_type="broadcast" + ) + m = list(range(len(float_frame.index))) + expected = DataFrame( + {c: m for c in float_frame.columns}, + dtype="float64", + index=float_frame.index, + ) + tm.assert_frame_equal(result, expected) + + # preserve columns + df = int_frame_const_col + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="broadcast") + tm.assert_frame_equal(result, df) + + df = int_frame_const_col + result = df.apply( + lambda x: Series([1, 2, 3], index=list("abc")), + axis=1, + result_type="broadcast", + ) + expected = df.copy() + tm.assert_frame_equal(result, expected) + + def test_apply_broadcast_error(self, int_frame_const_col): + df = int_frame_const_col + + # > 1 ndim + with pytest.raises(ValueError): + df.apply( + lambda x: np.array([1, 2]).reshape(-1, 2), + axis=1, + result_type="broadcast", + ) + + # cannot broadcast + with pytest.raises(ValueError): + df.apply(lambda x: [1, 2], axis=1, result_type="broadcast") + + with pytest.raises(ValueError): + df.apply(lambda x: Series([1, 2]), axis=1, result_type="broadcast") + + def test_apply_raw(self, float_frame): + result0 = float_frame.apply(np.mean, raw=True) + result1 = float_frame.apply(np.mean, axis=1, raw=True) + + expected0 = float_frame.apply(lambda x: x.values.mean()) + expected1 = float_frame.apply(lambda x: x.values.mean(), axis=1) + + tm.assert_series_equal(result0, expected0) + tm.assert_series_equal(result1, expected1) + + # no reduction + result = float_frame.apply(lambda x: x * 2, raw=True) + expected = float_frame * 2 + tm.assert_frame_equal(result, expected) + + def test_apply_axis1(self, float_frame): + d = float_frame.index[0] + tapplied = float_frame.apply(np.mean, axis=1) + assert tapplied[d] == np.mean(float_frame.xs(d)) + + def test_apply_ignore_failures(self, float_string_frame): + result = frame_apply( + float_string_frame, np.mean, 0, ignore_failures=True + ).apply_standard() + expected = float_string_frame._get_numeric_data().apply(np.mean) + tm.assert_series_equal(result, expected) + + def test_apply_mixed_dtype_corner(self): + df = DataFrame({"A": ["foo"], "B": [1.0]}) + result = df[:0].apply(np.mean, axis=1) + # the result here is actually kind of ambiguous, should it be a Series + # or a DataFrame? + expected = Series(np.nan, index=pd.Index([], dtype="int64")) + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": ["foo"], "B": [1.0]}) + result = df.apply(lambda x: x["A"], axis=1) + expected = Series(["foo"], index=[0]) + tm.assert_series_equal(result, expected) + + result = df.apply(lambda x: x["B"], axis=1) + expected = Series([1.0], index=[0]) + tm.assert_series_equal(result, expected) + + def test_apply_empty_infer_type(self): + no_cols = DataFrame(index=["a", "b", "c"]) + no_index = DataFrame(columns=["a", "b", "c"]) + + def _check(df, f): + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + test_res = f(np.array([], dtype="f8")) + is_reduction = not isinstance(test_res, np.ndarray) + + def _checkit(axis=0, raw=False): + result = df.apply(f, axis=axis, raw=raw) + if is_reduction: + agg_axis = df._get_agg_axis(axis) + assert isinstance(result, Series) + assert result.index is agg_axis + else: + assert isinstance(result, DataFrame) + + _checkit() + _checkit(axis=1) + _checkit(raw=True) + _checkit(axis=0, raw=True) + + with np.errstate(all="ignore"): + _check(no_cols, lambda x: x) + _check(no_cols, lambda x: x.mean()) + _check(no_index, lambda x: x) + _check(no_index, lambda x: x.mean()) + + result = no_cols.apply(lambda x: x.mean(), result_type="broadcast") + assert isinstance(result, DataFrame) + + def test_apply_with_args_kwds(self, float_frame): + def add_some(x, howmuch=0): + return x + howmuch + + def agg_and_add(x, howmuch=0): + return x.mean() + howmuch + + def subtract_and_divide(x, sub, divide=1): + return (x - sub) / divide + + result = float_frame.apply(add_some, howmuch=2) + expected = float_frame.apply(lambda x: x + 2) + tm.assert_frame_equal(result, expected) + + result = float_frame.apply(agg_and_add, howmuch=2) + expected = float_frame.apply(lambda x: x.mean() + 2) + tm.assert_series_equal(result, expected) + + result = float_frame.apply(subtract_and_divide, args=(2,), divide=2) + expected = float_frame.apply(lambda x: (x - 2.0) / 2.0) + tm.assert_frame_equal(result, expected) + + def test_apply_yield_list(self, float_frame): + result = float_frame.apply(list) + tm.assert_frame_equal(result, float_frame) + + def test_apply_reduce_Series(self, float_frame): + float_frame.loc[::2, "A"] = np.nan + expected = float_frame.mean(1) + result = float_frame.apply(np.mean, axis=1) + tm.assert_series_equal(result, expected) + + def test_apply_reduce_rows_to_dict(self): + # GH 25196 + data = pd.DataFrame([[1, 2], [3, 4]]) + expected = pd.Series([{0: 1, 1: 3}, {0: 2, 1: 4}]) + result = data.apply(dict) + tm.assert_series_equal(result, expected) + + def test_apply_differently_indexed(self): + df = DataFrame(np.random.randn(20, 10)) + + result0 = df.apply(Series.describe, axis=0) + expected0 = DataFrame( + {i: v.describe() for i, v in df.items()}, columns=df.columns + ) + tm.assert_frame_equal(result0, expected0) + + result1 = df.apply(Series.describe, axis=1) + expected1 = DataFrame( + {i: v.describe() for i, v in df.T.items()}, columns=df.index + ).T + tm.assert_frame_equal(result1, expected1) + + def test_apply_modify_traceback(self): + data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + data.loc[4, "C"] = np.nan + + def transform(row): + if row["C"].startswith("shin") and row["A"] == "foo": + row["D"] = 7 + return row + + def transform2(row): + if notna(row["C"]) and row["C"].startswith("shin") and row["A"] == "foo": + row["D"] = 7 + return row + + msg = "'float' object has no attribute 'startswith'" + with pytest.raises(AttributeError, match=msg): + data.apply(transform, axis=1) + + def test_apply_bug(self): + + # GH 6125 + positions = pd.DataFrame( + [ + [1, "ABC0", 50], + [1, "YUM0", 20], + [1, "DEF0", 20], + [2, "ABC1", 50], + [2, "YUM1", 20], + [2, "DEF1", 20], + ], + columns=["a", "market", "position"], + ) + + def f(r): + return r["market"] + + expected = positions.apply(f, axis=1) + + positions = DataFrame( + [ + [datetime(2013, 1, 1), "ABC0", 50], + [datetime(2013, 1, 2), "YUM0", 20], + [datetime(2013, 1, 3), "DEF0", 20], + [datetime(2013, 1, 4), "ABC1", 50], + [datetime(2013, 1, 5), "YUM1", 20], + [datetime(2013, 1, 6), "DEF1", 20], + ], + columns=["a", "market", "position"], + ) + result = positions.apply(f, axis=1) + tm.assert_series_equal(result, expected) + + def test_apply_convert_objects(self): + data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + result = data.apply(lambda x: x, axis=1) + tm.assert_frame_equal(result._convert(datetime=True), data) + + def test_apply_attach_name(self, float_frame): + result = float_frame.apply(lambda x: x.name) + expected = Series(float_frame.columns, index=float_frame.columns) + tm.assert_series_equal(result, expected) + + result = float_frame.apply(lambda x: x.name, axis=1) + expected = Series(float_frame.index, index=float_frame.index) + tm.assert_series_equal(result, expected) + + # non-reductions + result = float_frame.apply(lambda x: np.repeat(x.name, len(x))) + expected = DataFrame( + np.tile(float_frame.columns, (len(float_frame.index), 1)), + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(result, expected) + + result = float_frame.apply(lambda x: np.repeat(x.name, len(x)), axis=1) + expected = Series( + np.repeat(t[0], len(float_frame.columns)) for t in float_frame.itertuples() + ) + expected.index = float_frame.index + tm.assert_series_equal(result, expected) + + def test_apply_multi_index(self, float_frame): + index = MultiIndex.from_arrays([["a", "a", "b"], ["c", "d", "d"]]) + s = DataFrame([[1, 2], [3, 4], [5, 6]], index=index, columns=["col1", "col2"]) + result = s.apply(lambda x: Series({"min": min(x), "max": max(x)}), 1) + expected = DataFrame( + [[1, 2], [3, 4], [5, 6]], index=index, columns=["min", "max"] + ) + tm.assert_frame_equal(result, expected, check_like=True) + + def test_apply_dict(self): + + # GH 8735 + A = DataFrame([["foo", "bar"], ["spam", "eggs"]]) + A_dicts = Series( + [dict([(0, "foo"), (1, "spam")]), dict([(0, "bar"), (1, "eggs")])] + ) + B = DataFrame([[0, 1], [2, 3]]) + B_dicts = Series([dict([(0, 0), (1, 2)]), dict([(0, 1), (1, 3)])]) + fn = lambda x: x.to_dict() + + for df, dicts in [(A, A_dicts), (B, B_dicts)]: + reduce_true = df.apply(fn, result_type="reduce") + reduce_false = df.apply(fn, result_type="expand") + reduce_none = df.apply(fn) + + tm.assert_series_equal(reduce_true, dicts) + tm.assert_frame_equal(reduce_false, df) + tm.assert_series_equal(reduce_none, dicts) + + def test_applymap(self, float_frame): + applied = float_frame.applymap(lambda x: x * 2) + tm.assert_frame_equal(applied, float_frame * 2) + float_frame.applymap(type) + + # GH 465: function returning tuples + result = float_frame.applymap(lambda x: (x, x)) + assert isinstance(result["A"][0], tuple) + + # GH 2909: object conversion to float in constructor? + df = DataFrame(data=[1, "a"]) + result = df.applymap(lambda x: x) + assert result.dtypes[0] == object + + df = DataFrame(data=[1.0, "a"]) + result = df.applymap(lambda x: x) + assert result.dtypes[0] == object + + # GH 2786 + df = DataFrame(np.random.random((3, 4))) + df2 = df.copy() + cols = ["a", "a", "a", "a"] + df.columns = cols + + expected = df2.applymap(str) + expected.columns = cols + result = df.applymap(str) + tm.assert_frame_equal(result, expected) + + # datetime/timedelta + df["datetime"] = Timestamp("20130101") + df["timedelta"] = pd.Timedelta("1 min") + result = df.applymap(str) + for f in ["datetime", "timedelta"]: + assert result.loc[0, f] == str(df.loc[0, f]) + + # GH 8222 + empty_frames = [ + pd.DataFrame(), + pd.DataFrame(columns=list("ABC")), + pd.DataFrame(index=list("ABC")), + pd.DataFrame({"A": [], "B": [], "C": []}), + ] + for frame in empty_frames: + for func in [round, lambda x: x]: + result = frame.applymap(func) + tm.assert_frame_equal(result, frame) + + def test_applymap_box_timestamps(self): + # GH 2689, GH 2627 + ser = pd.Series(date_range("1/1/2000", periods=10)) + + def func(x): + return (x.hour, x.day, x.month) + + # it works! + pd.DataFrame(ser).applymap(func) + + def test_applymap_box(self): + # ufunc will not be boxed. Same test cases as the test_map_box + df = pd.DataFrame( + { + "a": [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")], + "b": [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + ], + "c": [pd.Timedelta("1 days"), pd.Timedelta("2 days")], + "d": [ + pd.Period("2011-01-01", freq="M"), + pd.Period("2011-01-02", freq="M"), + ], + } + ) + + result = df.applymap(lambda x: type(x).__name__) + expected = pd.DataFrame( + { + "a": ["Timestamp", "Timestamp"], + "b": ["Timestamp", "Timestamp"], + "c": ["Timedelta", "Timedelta"], + "d": ["Period", "Period"], + } + ) + tm.assert_frame_equal(result, expected) + + def test_frame_apply_dont_convert_datetime64(self): + from pandas.tseries.offsets import BDay + + df = DataFrame({"x1": [datetime(1996, 1, 1)]}) + + df = df.applymap(lambda x: x + BDay()) + df = df.applymap(lambda x: x + BDay()) + + assert df.x1.dtype == "M8[ns]" + + def test_apply_non_numpy_dtype(self): + # GH 12244 + df = DataFrame( + {"dt": pd.date_range("2015-01-01", periods=3, tz="Europe/Brussels")} + ) + result = df.apply(lambda x: x) + tm.assert_frame_equal(result, df) + + result = df.apply(lambda x: x + pd.Timedelta("1day")) + expected = DataFrame( + {"dt": pd.date_range("2015-01-02", periods=3, tz="Europe/Brussels")} + ) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"dt": ["a", "b", "c", "a"]}, dtype="category") + result = df.apply(lambda x: x) + tm.assert_frame_equal(result, df) + + def test_apply_dup_names_multi_agg(self): + # GH 21063 + df = pd.DataFrame([[0, 1], [2, 3]], columns=["a", "a"]) + expected = pd.DataFrame([[0, 1]], columns=["a", "a"], index=["min"]) + result = df.agg(["min"]) + + tm.assert_frame_equal(result, expected) + + def test_apply_nested_result_axis_1(self): + # GH 13820 + def apply_list(row): + return [2 * row["A"], 2 * row["C"], 2 * row["B"]] + + df = pd.DataFrame(np.zeros((4, 4)), columns=list("ABCD")) + result = df.apply(apply_list, axis=1) + expected = Series( + [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] + ) + tm.assert_series_equal(result, expected) + + def test_apply_noreduction_tzaware_object(self): + # https://github.com/pandas-dev/pandas/issues/31505 + df = pd.DataFrame({"foo": [pd.Timestamp("2020", tz="UTC")]}, dtype="object") + result = df.apply(lambda x: x) + tm.assert_frame_equal(result, df) + result = df.apply(lambda x: x.copy()) + tm.assert_frame_equal(result, df) + + +class TestInferOutputShape: + # the user has supplied an opaque UDF where + # they are transforming the input that requires + # us to infer the output + + def test_infer_row_shape(self): + # GH 17437 + # if row shape is changing, infer it + df = pd.DataFrame(np.random.rand(10, 2)) + result = df.apply(np.fft.fft, axis=0) + assert result.shape == (10, 2) + + result = df.apply(np.fft.rfft, axis=0) + assert result.shape == (6, 2) + + def test_with_dictlike_columns(self): + # GH 17602 + df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1) + expected = Series([{"s": 3} for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + df["tm"] = [ + pd.Timestamp("2017-05-01 00:00:00"), + pd.Timestamp("2017-05-02 00:00:00"), + ] + result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1) + tm.assert_series_equal(result, expected) + + # compose a series + result = (df["a"] + df["b"]).apply(lambda x: {"s": x}) + expected = Series([{"s": 3}, {"s": 3}]) + tm.assert_series_equal(result, expected) + + # GH 18775 + df = DataFrame() + df["author"] = ["X", "Y", "Z"] + df["publisher"] = ["BBC", "NBC", "N24"] + df["date"] = pd.to_datetime( + ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"] + ) + result = df.apply(lambda x: {}, axis=1) + expected = Series([{}, {}, {}]) + tm.assert_series_equal(result, expected) + + def test_with_dictlike_columns_with_infer(self): + # GH 17602 + df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + result = df.apply( + lambda x: {"s": x["a"] + x["b"]}, axis=1, result_type="expand" + ) + expected = DataFrame({"s": [3, 3]}) + tm.assert_frame_equal(result, expected) + + df["tm"] = [ + pd.Timestamp("2017-05-01 00:00:00"), + pd.Timestamp("2017-05-02 00:00:00"), + ] + result = df.apply( + lambda x: {"s": x["a"] + x["b"]}, axis=1, result_type="expand" + ) + tm.assert_frame_equal(result, expected) + + def test_with_listlike_columns(self): + # GH 17348 + df = DataFrame( + { + "a": Series(np.random.randn(4)), + "b": ["a", "list", "of", "words"], + "ts": date_range("2016-10-01", periods=4, freq="H"), + } + ) + + result = df[["a", "b"]].apply(tuple, axis=1) + expected = Series([t[1:] for t in df[["a", "b"]].itertuples()]) + tm.assert_series_equal(result, expected) + + result = df[["a", "ts"]].apply(tuple, axis=1) + expected = Series([t[1:] for t in df[["a", "ts"]].itertuples()]) + tm.assert_series_equal(result, expected) + + # GH 18919 + df = DataFrame( + {"x": Series([["a", "b"], ["q"]]), "y": Series([["z"], ["q", "t"]])} + ) + df.index = MultiIndex.from_tuples([("i0", "j0"), ("i1", "j1")]) + + result = df.apply(lambda row: [el for el in row["x"] if el in row["y"]], axis=1) + expected = Series([[], ["q"]], index=df.index) + tm.assert_series_equal(result, expected) + + def test_infer_output_shape_columns(self): + # GH 18573 + + df = DataFrame( + { + "number": [1.0, 2.0], + "string": ["foo", "bar"], + "datetime": [ + pd.Timestamp("2017-11-29 03:30:00"), + pd.Timestamp("2017-11-29 03:45:00"), + ], + } + ) + result = df.apply(lambda row: (row.number, row.string), axis=1) + expected = Series([(t.number, t.string) for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + def test_infer_output_shape_listlike_columns(self): + # GH 16353 + + df = DataFrame(np.random.randn(6, 3), columns=["A", "B", "C"]) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + expected = Series([[1, 2, 3] for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + result = df.apply(lambda x: [1, 2], axis=1) + expected = Series([[1, 2] for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + # GH 17970 + df = DataFrame({"a": [1, 2, 3]}, index=list("abc")) + + result = df.apply(lambda row: np.ones(1), axis=1) + expected = Series([np.ones(1) for t in df.itertuples()], index=df.index) + tm.assert_series_equal(result, expected) + + result = df.apply(lambda row: np.ones(2), axis=1) + expected = Series([np.ones(2) for t in df.itertuples()], index=df.index) + tm.assert_series_equal(result, expected) + + # GH 17892 + df = pd.DataFrame( + { + "a": [ + pd.Timestamp("2010-02-01"), + pd.Timestamp("2010-02-04"), + pd.Timestamp("2010-02-05"), + pd.Timestamp("2010-02-06"), + ], + "b": [9, 5, 4, 3], + "c": [5, 3, 4, 2], + "d": [1, 2, 3, 4], + } + ) + + def fun(x): + return (1, 2) + + result = df.apply(fun, axis=1) + expected = Series([(1, 2) for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + def test_consistent_coerce_for_shapes(self): + # we want column names to NOT be propagated + # just because the shape matches the input shape + df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + expected = Series([[1, 2, 3] for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + result = df.apply(lambda x: [1, 2], axis=1) + expected = Series([[1, 2] for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + def test_consistent_names(self, int_frame_const_col): + # if a Series is returned, we should use the resulting index names + df = int_frame_const_col + + result = df.apply( + lambda x: Series([1, 2, 3], index=["test", "other", "cols"]), axis=1 + ) + expected = int_frame_const_col.rename( + columns={"A": "test", "B": "other", "C": "cols"} + ) + tm.assert_frame_equal(result, expected) + + result = df.apply(lambda x: Series([1, 2], index=["test", "other"]), axis=1) + expected = expected[["test", "other"]] + tm.assert_frame_equal(result, expected) + + def test_result_type(self, int_frame_const_col): + # result_type should be consistent no matter which + # path we take in the code + df = int_frame_const_col + + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand") + expected = df.copy() + expected.columns = [0, 1, 2] + tm.assert_frame_equal(result, expected) + + result = df.apply(lambda x: [1, 2], axis=1, result_type="expand") + expected = df[["A", "B"]].copy() + expected.columns = [0, 1] + tm.assert_frame_equal(result, expected) + + # broadcast result + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="broadcast") + expected = df.copy() + tm.assert_frame_equal(result, expected) + + columns = ["other", "col", "names"] + result = df.apply( + lambda x: Series([1, 2, 3], index=columns), axis=1, result_type="broadcast" + ) + expected = df.copy() + tm.assert_frame_equal(result, expected) + + # series result + result = df.apply(lambda x: Series([1, 2, 3], index=x.index), axis=1) + expected = df.copy() + tm.assert_frame_equal(result, expected) + + # series result with other index + columns = ["other", "col", "names"] + result = df.apply(lambda x: Series([1, 2, 3], index=columns), axis=1) + expected = df.copy() + expected.columns = columns + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("result_type", ["foo", 1]) + def test_result_type_error(self, result_type, int_frame_const_col): + # allowed result_type + df = int_frame_const_col + + with pytest.raises(ValueError): + df.apply(lambda x: [1, 2, 3], axis=1, result_type=result_type) + + @pytest.mark.parametrize( + "box", + [lambda x: list(x), lambda x: tuple(x), lambda x: np.array(x, dtype="int64")], + ids=["list", "tuple", "array"], + ) + def test_consistency_for_boxed(self, box, int_frame_const_col): + # passing an array or list should not affect the output shape + df = int_frame_const_col + + result = df.apply(lambda x: box([1, 2]), axis=1) + expected = Series([box([1, 2]) for t in df.itertuples()]) + tm.assert_series_equal(result, expected) + + result = df.apply(lambda x: box([1, 2]), axis=1, result_type="expand") + expected = int_frame_const_col[["A", "B"]].rename(columns={"A": 0, "B": 1}) + tm.assert_frame_equal(result, expected) + + +def zip_frames(frames, axis=1): + """ + take a list of frames, zip them together under the + assumption that these all have the first frames' index/columns. + + Returns + ------- + new_frame : DataFrame + """ + if axis == 1: + columns = frames[0].columns + zipped = [f.loc[:, c] for c in columns for f in frames] + return pd.concat(zipped, axis=1) + else: + index = frames[0].index + zipped = [f.loc[i, :] for i in index for f in frames] + return pd.DataFrame(zipped) + + +class TestDataFrameAggregate: + def test_agg_transform(self, axis, float_frame): + other_axis = 1 if axis in {0, "index"} else 0 + + with np.errstate(all="ignore"): + + f_abs = np.abs(float_frame) + f_sqrt = np.sqrt(float_frame) + + # ufunc + result = float_frame.transform(np.sqrt, axis=axis) + expected = f_sqrt.copy() + tm.assert_frame_equal(result, expected) + + result = float_frame.apply(np.sqrt, axis=axis) + tm.assert_frame_equal(result, expected) + + result = float_frame.transform(np.sqrt, axis=axis) + tm.assert_frame_equal(result, expected) + + # list-like + result = float_frame.apply([np.sqrt], axis=axis) + expected = f_sqrt.copy() + if axis in {0, "index"}: + expected.columns = pd.MultiIndex.from_product( + [float_frame.columns, ["sqrt"]] + ) + else: + expected.index = pd.MultiIndex.from_product( + [float_frame.index, ["sqrt"]] + ) + tm.assert_frame_equal(result, expected) + + result = float_frame.transform([np.sqrt], axis=axis) + tm.assert_frame_equal(result, expected) + + # multiple items in list + # these are in the order as if we are applying both + # functions per series and then concatting + result = float_frame.apply([np.abs, np.sqrt], axis=axis) + expected = zip_frames([f_abs, f_sqrt], axis=other_axis) + if axis in {0, "index"}: + expected.columns = pd.MultiIndex.from_product( + [float_frame.columns, ["absolute", "sqrt"]] + ) + else: + expected.index = pd.MultiIndex.from_product( + [float_frame.index, ["absolute", "sqrt"]] + ) + tm.assert_frame_equal(result, expected) + + result = float_frame.transform([np.abs, "sqrt"], axis=axis) + tm.assert_frame_equal(result, expected) + + def test_transform_and_agg_err(self, axis, float_frame): + # cannot both transform and agg + with pytest.raises(ValueError): + float_frame.transform(["max", "min"], axis=axis) + + with pytest.raises(ValueError): + with np.errstate(all="ignore"): + float_frame.agg(["max", "sqrt"], axis=axis) + + with pytest.raises(ValueError): + with np.errstate(all="ignore"): + float_frame.transform(["max", "sqrt"], axis=axis) + + df = pd.DataFrame({"A": range(5), "B": 5}) + + def f(): + with np.errstate(all="ignore"): + df.agg({"A": ["abs", "sum"], "B": ["mean", "max"]}, axis=axis) + + @pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"]) + def test_transform_method_name(self, method): + # GH 19760 + df = pd.DataFrame({"A": [-1, 2]}) + result = df.transform(method) + expected = operator.methodcaller(method)(df) + tm.assert_frame_equal(result, expected) + + def test_demo(self): + # demonstration tests + df = pd.DataFrame({"A": range(5), "B": 5}) + + result = df.agg(["min", "max"]) + expected = DataFrame( + {"A": [0, 4], "B": [5, 5]}, columns=["A", "B"], index=["min", "max"] + ) + tm.assert_frame_equal(result, expected) + + result = df.agg({"A": ["min", "max"], "B": ["sum", "max"]}) + expected = DataFrame( + {"A": [4.0, 0.0, np.nan], "B": [5.0, np.nan, 25.0]}, + columns=["A", "B"], + index=["max", "min", "sum"], + ) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + def test_agg_multiple_mixed_no_warning(self): + # GH 20909 + mdf = pd.DataFrame( + { + "A": [1, 2, 3], + "B": [1.0, 2.0, 3.0], + "C": ["foo", "bar", "baz"], + "D": pd.date_range("20130101", periods=3), + } + ) + expected = pd.DataFrame( + { + "A": [1, 6], + "B": [1.0, 6.0], + "C": ["bar", "foobarbaz"], + "D": [pd.Timestamp("2013-01-01"), pd.NaT], + }, + index=["min", "sum"], + ) + # sorted index + with tm.assert_produces_warning(None): + result = mdf.agg(["min", "sum"]) + + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(None): + result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"]) + + # For backwards compatibility, the result's index is + # still sorted by function name, so it's ['min', 'sum'] + # not ['sum', 'min']. + expected = expected[["D", "C", "B", "A"]] + tm.assert_frame_equal(result, expected) + + def test_agg_dict_nested_renaming_depr(self): + + df = pd.DataFrame({"A": range(5), "B": 5}) + + # nested renaming + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + df.agg({"A": {"foo": "min"}, "B": {"bar": "max"}}) + + def test_agg_reduce(self, axis, float_frame): + other_axis = 1 if axis in {0, "index"} else 0 + name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values() + + # all reducers + expected = pd.concat( + [ + float_frame.mean(axis=axis), + float_frame.max(axis=axis), + float_frame.sum(axis=axis), + ], + axis=1, + ) + expected.columns = ["mean", "max", "sum"] + expected = expected.T if axis in {0, "index"} else expected + + result = float_frame.agg(["mean", "max", "sum"], axis=axis) + tm.assert_frame_equal(result, expected) + + # dict input with scalars + func = OrderedDict([(name1, "mean"), (name2, "sum")]) + result = float_frame.agg(func, axis=axis) + expected = Series( + [ + float_frame.loc(other_axis)[name1].mean(), + float_frame.loc(other_axis)[name2].sum(), + ], + index=[name1, name2], + ) + tm.assert_series_equal(result, expected) + + # dict input with lists + func = OrderedDict([(name1, ["mean"]), (name2, ["sum"])]) + result = float_frame.agg(func, axis=axis) + expected = DataFrame( + { + name1: Series( + [float_frame.loc(other_axis)[name1].mean()], index=["mean"] + ), + name2: Series( + [float_frame.loc(other_axis)[name2].sum()], index=["sum"] + ), + } + ) + expected = expected.T if axis in {1, "columns"} else expected + tm.assert_frame_equal(result, expected) + + # dict input with lists with multiple + func = OrderedDict([(name1, ["mean", "sum"]), (name2, ["sum", "max"])]) + result = float_frame.agg(func, axis=axis) + expected = DataFrame( + OrderedDict( + [ + ( + name1, + Series( + [ + float_frame.loc(other_axis)[name1].mean(), + float_frame.loc(other_axis)[name1].sum(), + ], + index=["mean", "sum"], + ), + ), + ( + name2, + Series( + [ + float_frame.loc(other_axis)[name2].sum(), + float_frame.loc(other_axis)[name2].max(), + ], + index=["sum", "max"], + ), + ), + ] + ) + ) + expected = expected.T if axis in {1, "columns"} else expected + tm.assert_frame_equal(result, expected) + + def test_nuiscance_columns(self): + + # GH 15015 + df = DataFrame( + { + "A": [1, 2, 3], + "B": [1.0, 2.0, 3.0], + "C": ["foo", "bar", "baz"], + "D": pd.date_range("20130101", periods=3), + } + ) + + result = df.agg("min") + expected = Series([1, 1.0, "bar", pd.Timestamp("20130101")], index=df.columns) + tm.assert_series_equal(result, expected) + + result = df.agg(["min"]) + expected = DataFrame( + [[1, 1.0, "bar", pd.Timestamp("20130101")]], + index=["min"], + columns=df.columns, + ) + tm.assert_frame_equal(result, expected) + + result = df.agg("sum") + expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"]) + tm.assert_series_equal(result, expected) + + result = df.agg(["sum"]) + expected = DataFrame( + [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"] + ) + tm.assert_frame_equal(result, expected) + + def test_non_callable_aggregates(self): + + # GH 16405 + # 'size' is a property of frame/series + # validate that this is working + df = DataFrame( + {"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]} + ) + + # Function aggregate + result = df.agg({"A": "count"}) + expected = Series({"A": 2}) + + tm.assert_series_equal(result, expected) + + # Non-function aggregate + result = df.agg({"A": "size"}) + expected = Series({"A": 3}) + + tm.assert_series_equal(result, expected) + + # Mix function and non-function aggs + result1 = df.agg(["count", "size"]) + result2 = df.agg( + {"A": ["count", "size"], "B": ["count", "size"], "C": ["count", "size"]} + ) + expected = pd.DataFrame( + { + "A": {"count": 2, "size": 3}, + "B": {"count": 2, "size": 3}, + "C": {"count": 2, "size": 3}, + } + ) + + tm.assert_frame_equal(result1, result2, check_like=True) + tm.assert_frame_equal(result2, expected, check_like=True) + + # Just functional string arg is same as calling df.arg() + result = df.agg("count") + expected = df.count() + + tm.assert_series_equal(result, expected) + + # Just a string attribute arg same as calling df.arg + result = df.agg("size") + expected = df.size + + assert result == expected + + def test_agg_listlike_result(self): + # GH-29587 user defined function returning list-likes + df = DataFrame( + {"A": [2, 2, 3], "B": [1.5, np.nan, 1.5], "C": ["foo", None, "bar"]} + ) + + def func(group_col): + return list(group_col.dropna().unique()) + + result = df.agg(func) + expected = pd.Series([[2, 3], [1.5], ["foo", "bar"]], index=["A", "B", "C"]) + tm.assert_series_equal(result, expected) + + result = df.agg([func]) + expected = expected.to_frame("func").T + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "df, func, expected", + chain( + _get_cython_table_params( + DataFrame(), + [ + ("sum", Series(dtype="float64")), + ("max", Series(dtype="float64")), + ("min", Series(dtype="float64")), + ("all", Series(dtype=bool)), + ("any", Series(dtype=bool)), + ("mean", Series(dtype="float64")), + ("prod", Series(dtype="float64")), + ("std", Series(dtype="float64")), + ("var", Series(dtype="float64")), + ("median", Series(dtype="float64")), + ], + ), + _get_cython_table_params( + DataFrame([[np.nan, 1], [1, 2]]), + [ + ("sum", Series([1.0, 3])), + ("max", Series([1.0, 2])), + ("min", Series([1.0, 1])), + ("all", Series([True, True])), + ("any", Series([True, True])), + ("mean", Series([1, 1.5])), + ("prod", Series([1.0, 2])), + ("std", Series([np.nan, 0.707107])), + ("var", Series([np.nan, 0.5])), + ("median", Series([1, 1.5])), + ], + ), + ), + ) + def test_agg_cython_table(self, df, func, expected, axis): + # GH 21224 + # test reducing functions in + # pandas.core.base.SelectionMixin._cython_table + result = df.agg(func, axis=axis) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "df, func, expected", + chain( + _get_cython_table_params( + DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())] + ), + _get_cython_table_params( + DataFrame([[np.nan, 1], [1, 2]]), + [ + ("cumprod", DataFrame([[np.nan, 1], [1, 2]])), + ("cumsum", DataFrame([[np.nan, 1], [1, 3]])), + ], + ), + ), + ) + def test_agg_cython_table_transform(self, df, func, expected, axis): + # GH 21224 + # test transforming functions in + # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + if axis == "columns" or axis == 1: + # operating blockwise doesn't let us preserve dtypes + expected = expected.astype("float64") + + result = df.agg(func, axis=axis) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "df, func, expected", + _get_cython_table_params( + DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]] + ), + ) + def test_agg_cython_table_raises(self, df, func, expected, axis): + # GH 21224 + with pytest.raises(expected): + df.agg(func, axis=axis) + + @pytest.mark.parametrize("num_cols", [2, 3, 5]) + def test_frequency_is_original(self, num_cols): + # GH 22150 + index = pd.DatetimeIndex(["1950-06-30", "1952-10-24", "1953-05-29"]) + original = index.copy() + df = DataFrame(1, index=index, columns=range(num_cols)) + df.apply(lambda x: x) + assert index.freq == original.freq + + def test_apply_datetime_tz_issue(self): + # GH 29052 + + timestamps = [ + pd.Timestamp("2019-03-15 12:34:31.909000+0000", tz="UTC"), + pd.Timestamp("2019-03-15 12:34:34.359000+0000", tz="UTC"), + pd.Timestamp("2019-03-15 12:34:34.660000+0000", tz="UTC"), + ] + df = DataFrame(data=[0, 1, 2], index=timestamps) + result = df.apply(lambda x: x.name, axis=1) + expected = pd.Series(index=timestamps, data=timestamps) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("df", [pd.DataFrame({"A": ["a", None], "B": ["c", "d"]})]) + @pytest.mark.parametrize("method", ["min", "max", "sum"]) + def test_consistency_of_aggregates_of_columns_with_missing_values(self, df, method): + # GH 16832 + none_in_first_column_result = getattr(df[["A", "B"]], method)() + none_in_second_column_result = getattr(df[["B", "A"]], method)() + + tm.assert_series_equal( + none_in_first_column_result, none_in_second_column_result + ) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py new file mode 100644 index 00000000..141144c1 --- /dev/null +++ b/pandas/tests/frame/test_arithmetic.py @@ -0,0 +1,782 @@ +from collections import deque +from datetime import datetime +import operator + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.frame.common import _check_mixed_float, _check_mixed_int + +# ------------------------------------------------------------------- +# Comparisons + + +class TestFrameComparisons: + # Specifically _not_ flex-comparisons + + def test_frame_in_list(self): + # GH#12689 this should raise at the DataFrame level, not blocks + df = pd.DataFrame(np.random.randn(6, 4), columns=list("ABCD")) + msg = "The truth value of a DataFrame is ambiguous" + with pytest.raises(ValueError, match=msg): + df in [None] + + def test_comparison_invalid(self): + def check(df, df2): + + for (x, y) in [(df, df2), (df2, df)]: + # we expect the result to match Series comparisons for + # == and !=, inequalities should raise + result = x == y + expected = pd.DataFrame( + {col: x[col] == y[col] for col in x.columns}, + index=x.index, + columns=x.columns, + ) + tm.assert_frame_equal(result, expected) + + result = x != y + expected = pd.DataFrame( + {col: x[col] != y[col] for col in x.columns}, + index=x.index, + columns=x.columns, + ) + tm.assert_frame_equal(result, expected) + + with pytest.raises(TypeError): + x >= y + with pytest.raises(TypeError): + x > y + with pytest.raises(TypeError): + x < y + with pytest.raises(TypeError): + x <= y + + # GH4968 + # invalid date/int comparisons + df = pd.DataFrame(np.random.randint(10, size=(10, 1)), columns=["a"]) + df["dates"] = pd.date_range("20010101", periods=len(df)) + + df2 = df.copy() + df2["dates"] = df["a"] + check(df, df2) + + df = pd.DataFrame(np.random.randint(10, size=(10, 2)), columns=["a", "b"]) + df2 = pd.DataFrame( + { + "a": pd.date_range("20010101", periods=len(df)), + "b": pd.date_range("20100101", periods=len(df)), + } + ) + check(df, df2) + + def test_timestamp_compare(self): + # make sure we can compare Timestamps on the right AND left hand side + # GH#4982 + df = pd.DataFrame( + { + "dates1": pd.date_range("20010101", periods=10), + "dates2": pd.date_range("20010102", periods=10), + "intcol": np.random.randint(1000000000, size=10), + "floatcol": np.random.randn(10), + "stringcol": list(tm.rands(10)), + } + ) + df.loc[np.random.rand(len(df)) > 0.5, "dates2"] = pd.NaT + ops = {"gt": "lt", "lt": "gt", "ge": "le", "le": "ge", "eq": "eq", "ne": "ne"} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + if left in ["eq", "ne"]: + expected = left_f(df, pd.Timestamp("20010109")) + result = right_f(pd.Timestamp("20010109"), df) + tm.assert_frame_equal(result, expected) + else: + with pytest.raises(TypeError): + left_f(df, pd.Timestamp("20010109")) + with pytest.raises(TypeError): + right_f(pd.Timestamp("20010109"), df) + # nats + expected = left_f(df, pd.Timestamp("nat")) + result = right_f(pd.Timestamp("nat"), df) + tm.assert_frame_equal(result, expected) + + def test_mixed_comparison(self): + # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False, + # not raise TypeError + # (this appears to be fixed before GH#22163, not sure when) + df = pd.DataFrame([["1989-08-01", 1], ["1989-08-01", 2]]) + other = pd.DataFrame([["a", "b"], ["c", "d"]]) + + result = df == other + assert not result.any().any() + + result = df != other + assert result.all().all() + + def test_df_boolean_comparison_error(self): + # GH#4576, GH#22880 + # comparing DataFrame against list/tuple with len(obj) matching + # len(df.columns) is supported as of GH#22800 + df = pd.DataFrame(np.arange(6).reshape((3, 2))) + + expected = pd.DataFrame([[False, False], [True, False], [False, False]]) + + result = df == (2, 2) + tm.assert_frame_equal(result, expected) + + result = df == [2, 2] + tm.assert_frame_equal(result, expected) + + def test_df_float_none_comparison(self): + df = pd.DataFrame( + np.random.randn(8, 3), index=range(8), columns=["A", "B", "C"] + ) + + result = df.__eq__(None) + assert not result.any().any() + + def test_df_string_comparison(self): + df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}]) + mask_a = df.a > 1 + tm.assert_frame_equal(df[mask_a], df.loc[1:1, :]) + tm.assert_frame_equal(df[-mask_a], df.loc[0:0, :]) + + mask_b = df.b == "foo" + tm.assert_frame_equal(df[mask_b], df.loc[0:0, :]) + tm.assert_frame_equal(df[-mask_b], df.loc[1:1, :]) + + +class TestFrameFlexComparisons: + # TODO: test_bool_flex_frame needs a better name + def test_bool_flex_frame(self): + data = np.random.randn(5, 3) + other_data = np.random.randn(5, 3) + df = pd.DataFrame(data) + other = pd.DataFrame(other_data) + ndim_5 = np.ones(df.shape + (1, 3)) + + # Unaligned + def _check_unaligned_frame(meth, op, df, other): + part_o = other.loc[3:, 1:].copy() + rs = meth(part_o) + xp = op(df, part_o.reindex(index=df.index, columns=df.columns)) + tm.assert_frame_equal(rs, xp) + + # DataFrame + assert df.eq(df).values.all() + assert not df.ne(df).values.any() + for op in ["eq", "ne", "gt", "lt", "ge", "le"]: + f = getattr(df, op) + o = getattr(operator, op) + # No NAs + tm.assert_frame_equal(f(other), o(df, other)) + _check_unaligned_frame(f, o, df, other) + # ndarray + tm.assert_frame_equal(f(other.values), o(df, other.values)) + # scalar + tm.assert_frame_equal(f(0), o(df, 0)) + # NAs + msg = "Unable to coerce to Series/DataFrame" + tm.assert_frame_equal(f(np.nan), o(df, np.nan)) + with pytest.raises(ValueError, match=msg): + f(ndim_5) + + # Series + def _test_seq(df, idx_ser, col_ser): + idx_eq = df.eq(idx_ser, axis=0) + col_eq = df.eq(col_ser) + idx_ne = df.ne(idx_ser, axis=0) + col_ne = df.ne(col_ser) + tm.assert_frame_equal(col_eq, df == pd.Series(col_ser)) + tm.assert_frame_equal(col_eq, -col_ne) + tm.assert_frame_equal(idx_eq, -idx_ne) + tm.assert_frame_equal(idx_eq, df.T.eq(idx_ser).T) + tm.assert_frame_equal(col_eq, df.eq(list(col_ser))) + tm.assert_frame_equal(idx_eq, df.eq(pd.Series(idx_ser), axis=0)) + tm.assert_frame_equal(idx_eq, df.eq(list(idx_ser), axis=0)) + + idx_gt = df.gt(idx_ser, axis=0) + col_gt = df.gt(col_ser) + idx_le = df.le(idx_ser, axis=0) + col_le = df.le(col_ser) + + tm.assert_frame_equal(col_gt, df > pd.Series(col_ser)) + tm.assert_frame_equal(col_gt, -col_le) + tm.assert_frame_equal(idx_gt, -idx_le) + tm.assert_frame_equal(idx_gt, df.T.gt(idx_ser).T) + + idx_ge = df.ge(idx_ser, axis=0) + col_ge = df.ge(col_ser) + idx_lt = df.lt(idx_ser, axis=0) + col_lt = df.lt(col_ser) + tm.assert_frame_equal(col_ge, df >= pd.Series(col_ser)) + tm.assert_frame_equal(col_ge, -col_lt) + tm.assert_frame_equal(idx_ge, -idx_lt) + tm.assert_frame_equal(idx_ge, df.T.ge(idx_ser).T) + + idx_ser = pd.Series(np.random.randn(5)) + col_ser = pd.Series(np.random.randn(3)) + _test_seq(df, idx_ser, col_ser) + + # list/tuple + _test_seq(df, idx_ser.values, col_ser.values) + + # NA + df.loc[0, 0] = np.nan + rs = df.eq(df) + assert not rs.loc[0, 0] + rs = df.ne(df) + assert rs.loc[0, 0] + rs = df.gt(df) + assert not rs.loc[0, 0] + rs = df.lt(df) + assert not rs.loc[0, 0] + rs = df.ge(df) + assert not rs.loc[0, 0] + rs = df.le(df) + assert not rs.loc[0, 0] + + def test_bool_flex_frame_complex_dtype(self): + # complex + arr = np.array([np.nan, 1, 6, np.nan]) + arr2 = np.array([2j, np.nan, 7, None]) + df = pd.DataFrame({"a": arr}) + df2 = pd.DataFrame({"a": arr2}) + + msg = "|".join( + [ + "'>' not supported between instances of '.*' and 'complex'", + r"unorderable types: .*complex\(\)", # PY35 + ] + ) + with pytest.raises(TypeError, match=msg): + # inequalities are not well-defined for complex numbers + df.gt(df2) + with pytest.raises(TypeError, match=msg): + # regression test that we get the same behavior for Series + df["a"].gt(df2["a"]) + with pytest.raises(TypeError, match=msg): + # Check that we match numpy behavior here + df.values > df2.values + + rs = df.ne(df2) + assert rs.values.all() + + arr3 = np.array([2j, np.nan, None]) + df3 = pd.DataFrame({"a": arr3}) + + with pytest.raises(TypeError, match=msg): + # inequalities are not well-defined for complex numbers + df3.gt(2j) + with pytest.raises(TypeError, match=msg): + # regression test that we get the same behavior for Series + df3["a"].gt(2j) + with pytest.raises(TypeError, match=msg): + # Check that we match numpy behavior here + df3.values > 2j + + def test_bool_flex_frame_object_dtype(self): + # corner, dtype=object + df1 = pd.DataFrame({"col": ["foo", np.nan, "bar"]}) + df2 = pd.DataFrame({"col": ["foo", datetime.now(), "bar"]}) + result = df1.ne(df2) + exp = pd.DataFrame({"col": [False, True, False]}) + tm.assert_frame_equal(result, exp) + + def test_flex_comparison_nat(self): + # GH 15697, GH 22163 df.eq(pd.NaT) should behave like df == pd.NaT, + # and _definitely_ not be NaN + df = pd.DataFrame([pd.NaT]) + + result = df == pd.NaT + # result.iloc[0, 0] is a np.bool_ object + assert result.iloc[0, 0].item() is False + + result = df.eq(pd.NaT) + assert result.iloc[0, 0].item() is False + + result = df != pd.NaT + assert result.iloc[0, 0].item() is True + + result = df.ne(pd.NaT) + assert result.iloc[0, 0].item() is True + + @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) + def test_df_flex_cmp_constant_return_types(self, opname): + # GH 15077, non-empty DataFrame + df = pd.DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) + const = 2 + + result = getattr(df, opname)(const).dtypes.value_counts() + tm.assert_series_equal(result, pd.Series([2], index=[np.dtype(bool)])) + + @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) + def test_df_flex_cmp_constant_return_types_empty(self, opname): + # GH 15077 empty DataFrame + df = pd.DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]}) + const = 2 + + empty = df.iloc[:0] + result = getattr(empty, opname)(const).dtypes.value_counts() + tm.assert_series_equal(result, pd.Series([2], index=[np.dtype(bool)])) + + +# ------------------------------------------------------------------- +# Arithmetic + + +class TestFrameFlexArithmetic: + def test_df_add_td64_columnwise(self): + # GH 22534 Check that column-wise addition broadcasts correctly + dti = pd.date_range("2016-01-01", periods=10) + tdi = pd.timedelta_range("1", periods=10) + tser = pd.Series(tdi) + df = pd.DataFrame({0: dti, 1: tdi}) + + result = df.add(tser, axis=0) + expected = pd.DataFrame({0: dti + tdi, 1: tdi + tdi}) + tm.assert_frame_equal(result, expected) + + def test_df_add_flex_filled_mixed_dtypes(self): + # GH 19611 + dti = pd.date_range("2016-01-01", periods=3) + ser = pd.Series(["1 Day", "NaT", "2 Days"], dtype="timedelta64[ns]") + df = pd.DataFrame({"A": dti, "B": ser}) + other = pd.DataFrame({"A": ser, "B": ser}) + fill = pd.Timedelta(days=1).to_timedelta64() + result = df.add(other, fill_value=fill) + + expected = pd.DataFrame( + { + "A": pd.Series( + ["2016-01-02", "2016-01-03", "2016-01-05"], dtype="datetime64[ns]" + ), + "B": ser * 2, + } + ) + tm.assert_frame_equal(result, expected) + + def test_arith_flex_frame( + self, all_arithmetic_operators, float_frame, mixed_float_frame + ): + # one instance of parametrized fixture + op = all_arithmetic_operators + + def f(x, y): + # r-versions not in operator-stdlib; get op without "r" and invert + if op.startswith("__r"): + return getattr(operator, op.replace("__r", "__"))(y, x) + return getattr(operator, op)(x, y) + + result = getattr(float_frame, op)(2 * float_frame) + expected = f(float_frame, 2 * float_frame) + tm.assert_frame_equal(result, expected) + + # vs mix float + result = getattr(mixed_float_frame, op)(2 * mixed_float_frame) + expected = f(mixed_float_frame, 2 * mixed_float_frame) + tm.assert_frame_equal(result, expected) + _check_mixed_float(result, dtype=dict(C=None)) + + @pytest.mark.parametrize("op", ["__add__", "__sub__", "__mul__"]) + def test_arith_flex_frame_mixed( + self, op, int_frame, mixed_int_frame, mixed_float_frame + ): + f = getattr(operator, op) + + # vs mix int + result = getattr(mixed_int_frame, op)(2 + mixed_int_frame) + expected = f(mixed_int_frame, 2 + mixed_int_frame) + + # no overflow in the uint + dtype = None + if op in ["__sub__"]: + dtype = dict(B="uint64", C=None) + elif op in ["__add__", "__mul__"]: + dtype = dict(C=None) + tm.assert_frame_equal(result, expected) + _check_mixed_int(result, dtype=dtype) + + # vs mix float + result = getattr(mixed_float_frame, op)(2 * mixed_float_frame) + expected = f(mixed_float_frame, 2 * mixed_float_frame) + tm.assert_frame_equal(result, expected) + _check_mixed_float(result, dtype=dict(C=None)) + + # vs plain int + result = getattr(int_frame, op)(2 * int_frame) + expected = f(int_frame, 2 * int_frame) + tm.assert_frame_equal(result, expected) + + def test_arith_flex_frame_raise(self, all_arithmetic_operators, float_frame): + # one instance of parametrized fixture + op = all_arithmetic_operators + + # Check that arrays with dim >= 3 raise + for dim in range(3, 6): + arr = np.ones((1,) * dim) + msg = "Unable to coerce to Series/DataFrame" + with pytest.raises(ValueError, match=msg): + getattr(float_frame, op)(arr) + + def test_arith_flex_frame_corner(self, float_frame): + + const_add = float_frame.add(1) + tm.assert_frame_equal(const_add, float_frame + 1) + + # corner cases + result = float_frame.add(float_frame[:0]) + tm.assert_frame_equal(result, float_frame * np.nan) + + result = float_frame[:0].add(float_frame) + tm.assert_frame_equal(result, float_frame * np.nan) + + with pytest.raises(NotImplementedError, match="fill_value"): + float_frame.add(float_frame.iloc[0], fill_value=3) + + with pytest.raises(NotImplementedError, match="fill_value"): + float_frame.add(float_frame.iloc[0], axis="index", fill_value=3) + + def test_arith_flex_series(self, simple_frame): + df = simple_frame + + row = df.xs("a") + col = df["two"] + # after arithmetic refactor, add truediv here + ops = ["add", "sub", "mul", "mod"] + for op in ops: + f = getattr(df, op) + op = getattr(operator, op) + tm.assert_frame_equal(f(row), op(df, row)) + tm.assert_frame_equal(f(col, axis=0), op(df.T, col).T) + + # special case for some reason + tm.assert_frame_equal(df.add(row, axis=None), df + row) + + # cases which will be refactored after big arithmetic refactor + tm.assert_frame_equal(df.div(row), df / row) + tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T) + + # broadcasting issue in GH 7325 + df = pd.DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype="int64") + expected = pd.DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) + result = df.div(df[0], axis="index") + tm.assert_frame_equal(result, expected) + + df = pd.DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype="float64") + expected = pd.DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]]) + result = df.div(df[0], axis="index") + tm.assert_frame_equal(result, expected) + + def test_arith_flex_zero_len_raises(self): + # GH 19522 passing fill_value to frame flex arith methods should + # raise even in the zero-length special cases + ser_len0 = pd.Series([], dtype=object) + df_len0 = pd.DataFrame(columns=["A", "B"]) + df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + + with pytest.raises(NotImplementedError, match="fill_value"): + df.add(ser_len0, fill_value="E") + + with pytest.raises(NotImplementedError, match="fill_value"): + df_len0.sub(df["A"], axis=None, fill_value=3) + + +class TestFrameArithmetic: + def test_td64_op_nat_casting(self): + # Make sure we don't accidentally treat timedelta64(NaT) as datetime64 + # when calling dispatch_to_series in DataFrame arithmetic + ser = pd.Series(["NaT", "NaT"], dtype="timedelta64[ns]") + df = pd.DataFrame([[1, 2], [3, 4]]) + + result = df * ser + expected = pd.DataFrame({0: ser, 1: ser}) + tm.assert_frame_equal(result, expected) + + def test_df_add_2d_array_rowlike_broadcasts(self): + # GH#23000 + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + + rowlike = arr[[1], :] # shape --> (1, ncols) + assert rowlike.shape == (1, df.shape[1]) + + expected = pd.DataFrame( + [[2, 4], [4, 6], [6, 8]], + columns=df.columns, + index=df.index, + # specify dtype explicitly to avoid failing + # on 32bit builds + dtype=arr.dtype, + ) + result = df + rowlike + tm.assert_frame_equal(result, expected) + result = rowlike + df + tm.assert_frame_equal(result, expected) + + def test_df_add_2d_array_collike_broadcasts(self): + # GH#23000 + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + + collike = arr[:, [1]] # shape --> (nrows, 1) + assert collike.shape == (df.shape[0], 1) + + expected = pd.DataFrame( + [[1, 2], [5, 6], [9, 10]], + columns=df.columns, + index=df.index, + # specify dtype explicitly to avoid failing + # on 32bit builds + dtype=arr.dtype, + ) + result = df + collike + tm.assert_frame_equal(result, expected) + result = collike + df + tm.assert_frame_equal(result, expected) + + def test_df_arith_2d_array_rowlike_broadcasts(self, all_arithmetic_operators): + # GH#23000 + opname = all_arithmetic_operators + + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + + rowlike = arr[[1], :] # shape --> (1, ncols) + assert rowlike.shape == (1, df.shape[1]) + + exvals = [ + getattr(df.loc["A"], opname)(rowlike.squeeze()), + getattr(df.loc["B"], opname)(rowlike.squeeze()), + getattr(df.loc["C"], opname)(rowlike.squeeze()), + ] + + expected = pd.DataFrame(exvals, columns=df.columns, index=df.index) + + if opname in ["__rmod__", "__rfloordiv__"]: + # exvals will have dtypes [f8, i8, i8] so expected will be + # all-f8, but the DataFrame operation will return mixed dtypes + # use exvals[-1].dtype instead of "i8" for compat with 32-bit + # systems/pythons + expected[False] = expected[False].astype(exvals[-1].dtype) + + result = getattr(df, opname)(rowlike) + tm.assert_frame_equal(result, expected) + + def test_df_arith_2d_array_collike_broadcasts(self, all_arithmetic_operators): + # GH#23000 + opname = all_arithmetic_operators + + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr, columns=[True, False], index=["A", "B", "C"]) + + collike = arr[:, [1]] # shape --> (nrows, 1) + assert collike.shape == (df.shape[0], 1) + + exvals = { + True: getattr(df[True], opname)(collike.squeeze()), + False: getattr(df[False], opname)(collike.squeeze()), + } + + dtype = None + if opname in ["__rmod__", "__rfloordiv__"]: + # Series ops may return mixed int/float dtypes in cases where + # DataFrame op will return all-float. So we upcast `expected` + dtype = np.common_type(*[x.values for x in exvals.values()]) + + expected = pd.DataFrame(exvals, columns=df.columns, index=df.index, dtype=dtype) + + result = getattr(df, opname)(collike) + tm.assert_frame_equal(result, expected) + + def test_df_bool_mul_int(self): + # GH 22047, GH 22163 multiplication by 1 should result in int dtype, + # not object dtype + df = pd.DataFrame([[False, True], [False, False]]) + result = df * 1 + + # On appveyor this comes back as np.int32 instead of np.int64, + # so we check dtype.kind instead of just dtype + kinds = result.dtypes.apply(lambda x: x.kind) + assert (kinds == "i").all() + + result = 1 * df + kinds = result.dtypes.apply(lambda x: x.kind) + assert (kinds == "i").all() + + def test_arith_mixed(self): + + left = pd.DataFrame({"A": ["a", "b", "c"], "B": [1, 2, 3]}) + + result = left + left + expected = pd.DataFrame({"A": ["aa", "bb", "cc"], "B": [2, 4, 6]}) + tm.assert_frame_equal(result, expected) + + def test_arith_getitem_commute(self): + df = pd.DataFrame({"A": [1.1, 3.3], "B": [2.5, -3.9]}) + + def _test_op(df, op): + result = op(df, 1) + + if not df.columns.is_unique: + raise ValueError("Only unique columns supported by this test") + + for col in result.columns: + tm.assert_series_equal(result[col], op(df[col], 1)) + + _test_op(df, operator.add) + _test_op(df, operator.sub) + _test_op(df, operator.mul) + _test_op(df, operator.truediv) + _test_op(df, operator.floordiv) + _test_op(df, operator.pow) + + _test_op(df, lambda x, y: y + x) + _test_op(df, lambda x, y: y - x) + _test_op(df, lambda x, y: y * x) + _test_op(df, lambda x, y: y / x) + _test_op(df, lambda x, y: y ** x) + + _test_op(df, lambda x, y: x + y) + _test_op(df, lambda x, y: x - y) + _test_op(df, lambda x, y: x * y) + _test_op(df, lambda x, y: x / y) + _test_op(df, lambda x, y: x ** y) + + @pytest.mark.parametrize( + "values", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3), deque([1, 2])] + ) + def test_arith_alignment_non_pandas_object(self, values): + # GH#17901 + df = pd.DataFrame({"A": [1, 1], "B": [1, 1]}) + expected = pd.DataFrame({"A": [2, 2], "B": [3, 3]}) + result = df + values + tm.assert_frame_equal(result, expected) + + def test_arith_non_pandas_object(self): + df = pd.DataFrame( + np.arange(1, 10, dtype="f8").reshape(3, 3), + columns=["one", "two", "three"], + index=["a", "b", "c"], + ) + + val1 = df.xs("a").values + added = pd.DataFrame(df.values + val1, index=df.index, columns=df.columns) + tm.assert_frame_equal(df + val1, added) + + added = pd.DataFrame((df.values.T + val1).T, index=df.index, columns=df.columns) + tm.assert_frame_equal(df.add(val1, axis=0), added) + + val2 = list(df["two"]) + + added = pd.DataFrame(df.values + val2, index=df.index, columns=df.columns) + tm.assert_frame_equal(df + val2, added) + + added = pd.DataFrame((df.values.T + val2).T, index=df.index, columns=df.columns) + tm.assert_frame_equal(df.add(val2, axis="index"), added) + + val3 = np.random.rand(*df.shape) + added = pd.DataFrame(df.values + val3, index=df.index, columns=df.columns) + tm.assert_frame_equal(df.add(val3), added) + + def test_operations_with_interval_categories_index(self, all_arithmetic_operators): + # GH#27415 + op = all_arithmetic_operators + ind = pd.CategoricalIndex(pd.interval_range(start=0.0, end=2.0)) + data = [1, 2] + df = pd.DataFrame([data], columns=ind) + num = 10 + result = getattr(df, op)(num) + expected = pd.DataFrame([[getattr(n, op)(num) for n in data]], columns=ind) + tm.assert_frame_equal(result, expected) + + def test_frame_with_frame_reindex(self): + # GH#31623 + df = pd.DataFrame( + { + "foo": [pd.Timestamp("2019"), pd.Timestamp("2020")], + "bar": [pd.Timestamp("2018"), pd.Timestamp("2021")], + }, + columns=["foo", "bar"], + ) + df2 = df[["foo"]] + + result = df - df2 + + expected = pd.DataFrame( + {"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]}, + columns=["bar", "foo"], + ) + tm.assert_frame_equal(result, expected) + + +def test_frame_with_zero_len_series_corner_cases(): + # GH#28600 + # easy all-float case + df = pd.DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "B"]) + ser = pd.Series(dtype=np.float64) + + result = df + ser + expected = pd.DataFrame(df.values * np.nan, columns=df.columns) + tm.assert_frame_equal(result, expected) + + result = df == ser + expected = pd.DataFrame(False, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + # non-float case should not raise on comparison + df2 = pd.DataFrame(df.values.view("M8[ns]"), columns=df.columns) + result = df2 == ser + expected = pd.DataFrame(False, index=df.index, columns=df.columns) + tm.assert_frame_equal(result, expected) + + +def test_zero_len_frame_with_series_corner_cases(): + # GH#28600 + df = pd.DataFrame(columns=["A", "B"], dtype=np.float64) + ser = pd.Series([1, 2], index=["A", "B"]) + + result = df + ser + expected = df + tm.assert_frame_equal(result, expected) + + +def test_frame_single_columns_object_sum_axis_1(): + # GH 13758 + data = { + "One": pd.Series(["A", 1.2, np.nan]), + } + df = pd.DataFrame(data) + result = df.sum(axis=1) + expected = pd.Series(["A", 1.2, 0]) + tm.assert_series_equal(result, expected) + + +def test_pow_with_realignment(): + # GH#32685 pow has special semantics for operating with null values + left = pd.DataFrame({"A": [0, 1, 2]}) + right = pd.DataFrame(index=[0, 1, 2]) + + result = left ** right + expected = pd.DataFrame({"A": [np.nan, 1.0, np.nan]}) + tm.assert_frame_equal(result, expected) + + +# TODO: move to tests.arithmetic and parametrize +def test_pow_nan_with_zero(): + left = pd.DataFrame({"A": [np.nan, np.nan, np.nan]}) + right = pd.DataFrame({"A": [0, 0, 0]}) + + expected = pd.DataFrame({"A": [1.0, 1.0, 1.0]}) + + result = left ** right + tm.assert_frame_equal(result, expected) + + result = left["A"] ** right["A"] + tm.assert_series_equal(result, expected["A"]) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py new file mode 100644 index 00000000..7effa98f --- /dev/null +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -0,0 +1,1154 @@ +from datetime import datetime +import re + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import Categorical, DataFrame, Index, MultiIndex, Series, date_range, isna +import pandas._testing as tm + + +class TestDataFrameSelectReindex: + # These are specific reindex-based tests; other indexing tests should go in + # test_indexing + + def test_drop_names(self): + df = DataFrame( + [[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=["a", "b", "c"], + columns=["d", "e", "f"], + ) + df.index.name, df.columns.name = "first", "second" + df_dropped_b = df.drop("b") + df_dropped_e = df.drop("e", axis=1) + df_inplace_b, df_inplace_e = df.copy(), df.copy() + df_inplace_b.drop("b", inplace=True) + df_inplace_e.drop("e", axis=1, inplace=True) + for obj in (df_dropped_b, df_dropped_e, df_inplace_b, df_inplace_e): + assert obj.index.name == "first" + assert obj.columns.name == "second" + assert list(df.columns) == ["d", "e", "f"] + + msg = r"\['g'\] not found in axis" + with pytest.raises(KeyError, match=msg): + df.drop(["g"]) + with pytest.raises(KeyError, match=msg): + df.drop(["g"], 1) + + # errors = 'ignore' + dropped = df.drop(["g"], errors="ignore") + expected = Index(["a", "b", "c"], name="first") + tm.assert_index_equal(dropped.index, expected) + + dropped = df.drop(["b", "g"], errors="ignore") + expected = Index(["a", "c"], name="first") + tm.assert_index_equal(dropped.index, expected) + + dropped = df.drop(["g"], axis=1, errors="ignore") + expected = Index(["d", "e", "f"], name="second") + tm.assert_index_equal(dropped.columns, expected) + + dropped = df.drop(["d", "g"], axis=1, errors="ignore") + expected = Index(["e", "f"], name="second") + tm.assert_index_equal(dropped.columns, expected) + + # GH 16398 + dropped = df.drop([], errors="ignore") + expected = Index(["a", "b", "c"], name="first") + tm.assert_index_equal(dropped.index, expected) + + def test_drop_col_still_multiindex(self): + arrays = [["a", "b", "c", "top"], ["", "", "", "OD"], ["", "", "", "wx"]] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + + df = DataFrame(np.random.randn(3, 4), columns=index) + del df[("a", "", "")] + assert isinstance(df.columns, MultiIndex) + + def test_drop(self): + simple = DataFrame({"A": [1, 2, 3, 4], "B": [0, 1, 2, 3]}) + tm.assert_frame_equal(simple.drop("A", axis=1), simple[["B"]]) + tm.assert_frame_equal(simple.drop(["A", "B"], axis="columns"), simple[[]]) + tm.assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.loc[[2], :]) + tm.assert_frame_equal(simple.drop([0, 3], axis="index"), simple.loc[[1, 2], :]) + + with pytest.raises(KeyError, match=r"\[5\] not found in axis"): + simple.drop(5) + with pytest.raises(KeyError, match=r"\['C'\] not found in axis"): + simple.drop("C", 1) + with pytest.raises(KeyError, match=r"\[5\] not found in axis"): + simple.drop([1, 5]) + with pytest.raises(KeyError, match=r"\['C'\] not found in axis"): + simple.drop(["A", "C"], 1) + + # errors = 'ignore' + tm.assert_frame_equal(simple.drop(5, errors="ignore"), simple) + tm.assert_frame_equal( + simple.drop([0, 5], errors="ignore"), simple.loc[[1, 2, 3], :] + ) + tm.assert_frame_equal(simple.drop("C", axis=1, errors="ignore"), simple) + tm.assert_frame_equal( + simple.drop(["A", "C"], axis=1, errors="ignore"), simple[["B"]] + ) + + # non-unique - wheee! + nu_df = DataFrame( + list(zip(range(3), range(-3, 1), list("abc"))), columns=["a", "a", "b"] + ) + tm.assert_frame_equal(nu_df.drop("a", axis=1), nu_df[["b"]]) + tm.assert_frame_equal(nu_df.drop("b", axis="columns"), nu_df["a"]) + tm.assert_frame_equal(nu_df.drop([]), nu_df) # GH 16398 + + nu_df = nu_df.set_index(pd.Index(["X", "Y", "X"])) + nu_df.columns = list("abc") + tm.assert_frame_equal(nu_df.drop("X", axis="rows"), nu_df.loc[["Y"], :]) + tm.assert_frame_equal(nu_df.drop(["X", "Y"], axis=0), nu_df.loc[[], :]) + + # inplace cache issue + # GH 5628 + df = pd.DataFrame(np.random.randn(10, 3), columns=list("abc")) + expected = df[~(df.b > 0)] + df.drop(labels=df[df.b > 0].index, inplace=True) + tm.assert_frame_equal(df, expected) + + def test_drop_multiindex_not_lexsorted(self): + # GH 11640 + + # define the lexsorted version + lexsorted_mi = MultiIndex.from_tuples( + [("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"] + ) + lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) + assert lexsorted_df.columns.is_lexsorted() + + # define the non-lexsorted version + not_lexsorted_df = DataFrame( + columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] + ) + not_lexsorted_df = not_lexsorted_df.pivot_table( + index="a", columns=["b", "c"], values="d" + ) + not_lexsorted_df = not_lexsorted_df.reset_index() + assert not not_lexsorted_df.columns.is_lexsorted() + + # compare the results + tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) + + expected = lexsorted_df.drop("a", axis=1) + with tm.assert_produces_warning(PerformanceWarning): + result = not_lexsorted_df.drop("a", axis=1) + + tm.assert_frame_equal(result, expected) + + def test_drop_api_equivalence(self): + # equivalence of the labels/axis and index/columns API's (GH12392) + df = DataFrame( + [[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=["a", "b", "c"], + columns=["d", "e", "f"], + ) + + res1 = df.drop("a") + res2 = df.drop(index="a") + tm.assert_frame_equal(res1, res2) + + res1 = df.drop("d", 1) + res2 = df.drop(columns="d") + tm.assert_frame_equal(res1, res2) + + res1 = df.drop(labels="e", axis=1) + res2 = df.drop(columns="e") + tm.assert_frame_equal(res1, res2) + + res1 = df.drop(["a"], axis=0) + res2 = df.drop(index=["a"]) + tm.assert_frame_equal(res1, res2) + + res1 = df.drop(["a"], axis=0).drop(["d"], axis=1) + res2 = df.drop(index=["a"], columns=["d"]) + tm.assert_frame_equal(res1, res2) + + with pytest.raises(ValueError): + df.drop(labels="a", index="b") + + with pytest.raises(ValueError): + df.drop(labels="a", columns="b") + + with pytest.raises(ValueError): + df.drop(axis=1) + + def test_merge_join_different_levels(self): + # GH 9455 + + # first dataframe + df1 = DataFrame(columns=["a", "b"], data=[[1, 11], [0, 22]]) + + # second dataframe + columns = MultiIndex.from_tuples([("a", ""), ("c", "c1")]) + df2 = DataFrame(columns=columns, data=[[1, 33], [0, 44]]) + + # merge + columns = ["a", "b", ("c", "c1")] + expected = DataFrame(columns=columns, data=[[1, 11, 33], [0, 22, 44]]) + with tm.assert_produces_warning(UserWarning): + result = pd.merge(df1, df2, on="a") + tm.assert_frame_equal(result, expected) + + # join, see discussion in GH 12219 + columns = ["a", "b", ("a", ""), ("c", "c1")] + expected = DataFrame(columns=columns, data=[[1, 11, 0, 44], [0, 22, 1, 33]]) + with tm.assert_produces_warning(UserWarning): + result = df1.join(df2, on="a") + tm.assert_frame_equal(result, expected) + + def test_reindex(self, float_frame): + datetime_series = tm.makeTimeSeries(nper=30) + + newFrame = float_frame.reindex(datetime_series.index) + + for col in newFrame.columns: + for idx, val in newFrame[col].items(): + if idx in float_frame.index: + if np.isnan(val): + assert np.isnan(float_frame[col][idx]) + else: + assert val == float_frame[col][idx] + else: + assert np.isnan(val) + + for col, series in newFrame.items(): + assert tm.equalContents(series.index, newFrame.index) + emptyFrame = float_frame.reindex(Index([])) + assert len(emptyFrame.index) == 0 + + # Cython code should be unit-tested directly + nonContigFrame = float_frame.reindex(datetime_series.index[::2]) + + for col in nonContigFrame.columns: + for idx, val in nonContigFrame[col].items(): + if idx in float_frame.index: + if np.isnan(val): + assert np.isnan(float_frame[col][idx]) + else: + assert val == float_frame[col][idx] + else: + assert np.isnan(val) + + for col, series in nonContigFrame.items(): + assert tm.equalContents(series.index, nonContigFrame.index) + + # corner cases + + # Same index, copies values but not index if copy=False + newFrame = float_frame.reindex(float_frame.index, copy=False) + assert newFrame.index is float_frame.index + + # length zero + newFrame = float_frame.reindex([]) + assert newFrame.empty + assert len(newFrame.columns) == len(float_frame.columns) + + # length zero with columns reindexed with non-empty index + newFrame = float_frame.reindex([]) + newFrame = newFrame.reindex(float_frame.index) + assert len(newFrame.index) == len(float_frame.index) + assert len(newFrame.columns) == len(float_frame.columns) + + # pass non-Index + newFrame = float_frame.reindex(list(datetime_series.index)) + tm.assert_index_equal(newFrame.index, datetime_series.index) + + # copy with no axes + result = float_frame.reindex() + tm.assert_frame_equal(result, float_frame) + assert result is not float_frame + + def test_reindex_nan(self): + df = pd.DataFrame( + [[1, 2], [3, 5], [7, 11], [9, 23]], + index=[2, np.nan, 1, 5], + columns=["joe", "jim"], + ) + + i, j = [np.nan, 5, 5, np.nan, 1, 2, np.nan], [1, 3, 3, 1, 2, 0, 1] + tm.assert_frame_equal(df.reindex(i), df.iloc[j]) + + df.index = df.index.astype("object") + tm.assert_frame_equal(df.reindex(i), df.iloc[j], check_index_type=False) + + # GH10388 + df = pd.DataFrame( + { + "other": ["a", "b", np.nan, "c"], + "date": ["2015-03-22", np.nan, "2012-01-08", np.nan], + "amount": [2, 3, 4, 5], + } + ) + + df["date"] = pd.to_datetime(df.date) + df["delta"] = (pd.to_datetime("2015-06-18") - df["date"]).shift(1) + + left = df.set_index(["delta", "other", "date"]).reset_index() + right = df.reindex(columns=["delta", "other", "date", "amount"]) + tm.assert_frame_equal(left, right) + + def test_reindex_name_remains(self): + s = Series(np.random.rand(10)) + df = DataFrame(s, index=np.arange(len(s))) + i = Series(np.arange(10), name="iname") + + df = df.reindex(i) + assert df.index.name == "iname" + + df = df.reindex(Index(np.arange(10), name="tmpname")) + assert df.index.name == "tmpname" + + s = Series(np.random.rand(10)) + df = DataFrame(s.T, index=np.arange(len(s))) + i = Series(np.arange(10), name="iname") + df = df.reindex(columns=i) + assert df.columns.name == "iname" + + def test_reindex_int(self, int_frame): + smaller = int_frame.reindex(int_frame.index[::2]) + + assert smaller["A"].dtype == np.int64 + + bigger = smaller.reindex(int_frame.index) + assert bigger["A"].dtype == np.float64 + + smaller = int_frame.reindex(columns=["A", "B"]) + assert smaller["A"].dtype == np.int64 + + def test_reindex_like(self, float_frame): + other = float_frame.reindex(index=float_frame.index[:10], columns=["C", "B"]) + + tm.assert_frame_equal(other, float_frame.reindex_like(other)) + + def test_reindex_columns(self, float_frame): + new_frame = float_frame.reindex(columns=["A", "B", "E"]) + + tm.assert_series_equal(new_frame["B"], float_frame["B"]) + assert np.isnan(new_frame["E"]).all() + assert "C" not in new_frame + + # Length zero + new_frame = float_frame.reindex(columns=[]) + assert new_frame.empty + + def test_reindex_columns_method(self): + + # GH 14992, reindexing over columns ignored method + df = DataFrame( + data=[[11, 12, 13], [21, 22, 23], [31, 32, 33]], + index=[1, 2, 4], + columns=[1, 2, 4], + dtype=float, + ) + + # default method + result = df.reindex(columns=range(6)) + expected = DataFrame( + data=[ + [np.nan, 11, 12, np.nan, 13, np.nan], + [np.nan, 21, 22, np.nan, 23, np.nan], + [np.nan, 31, 32, np.nan, 33, np.nan], + ], + index=[1, 2, 4], + columns=range(6), + dtype=float, + ) + tm.assert_frame_equal(result, expected) + + # method='ffill' + result = df.reindex(columns=range(6), method="ffill") + expected = DataFrame( + data=[ + [np.nan, 11, 12, 12, 13, 13], + [np.nan, 21, 22, 22, 23, 23], + [np.nan, 31, 32, 32, 33, 33], + ], + index=[1, 2, 4], + columns=range(6), + dtype=float, + ) + tm.assert_frame_equal(result, expected) + + # method='bfill' + result = df.reindex(columns=range(6), method="bfill") + expected = DataFrame( + data=[ + [11, 11, 12, 13, 13, np.nan], + [21, 21, 22, 23, 23, np.nan], + [31, 31, 32, 33, 33, np.nan], + ], + index=[1, 2, 4], + columns=range(6), + dtype=float, + ) + tm.assert_frame_equal(result, expected) + + def test_reindex_axes(self): + # GH 3317, reindexing by both axes loses freq of the index + df = DataFrame( + np.ones((3, 3)), + index=[datetime(2012, 1, 1), datetime(2012, 1, 2), datetime(2012, 1, 3)], + columns=["a", "b", "c"], + ) + time_freq = date_range("2012-01-01", "2012-01-03", freq="d") + some_cols = ["a", "b"] + + index_freq = df.reindex(index=time_freq).index.freq + both_freq = df.reindex(index=time_freq, columns=some_cols).index.freq + seq_freq = df.reindex(index=time_freq).reindex(columns=some_cols).index.freq + assert index_freq == both_freq + assert index_freq == seq_freq + + def test_reindex_fill_value(self): + df = DataFrame(np.random.randn(10, 4)) + + # axis=0 + result = df.reindex(list(range(15))) + assert np.isnan(result.values[-5:]).all() + + result = df.reindex(range(15), fill_value=0) + expected = df.reindex(range(15)).fillna(0) + tm.assert_frame_equal(result, expected) + + # axis=1 + result = df.reindex(columns=range(5), fill_value=0.0) + expected = df.copy() + expected[4] = 0.0 + tm.assert_frame_equal(result, expected) + + result = df.reindex(columns=range(5), fill_value=0) + expected = df.copy() + expected[4] = 0 + tm.assert_frame_equal(result, expected) + + result = df.reindex(columns=range(5), fill_value="foo") + expected = df.copy() + expected[4] = "foo" + tm.assert_frame_equal(result, expected) + + # other dtypes + df["foo"] = "foo" + result = df.reindex(range(15), fill_value=0) + expected = df.reindex(range(15)).fillna(0) + tm.assert_frame_equal(result, expected) + + def test_reindex_dups(self): + + # GH4746, reindex on duplicate index error messages + arr = np.random.randn(10) + df = DataFrame(arr, index=[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]) + + # set index is ok + result = df.copy() + result.index = list(range(len(df))) + expected = DataFrame(arr, index=list(range(len(df)))) + tm.assert_frame_equal(result, expected) + + # reindex fails + msg = "cannot reindex from a duplicate axis" + with pytest.raises(ValueError, match=msg): + df.reindex(index=list(range(len(df)))) + + def test_reindex_axis_style(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = pd.DataFrame( + {"A": [1, 2, np.nan], "B": [4, 5, np.nan]}, index=[0, 1, 3] + ) + result = df.reindex([0, 1, 3]) + tm.assert_frame_equal(result, expected) + + result = df.reindex([0, 1, 3], axis=0) + tm.assert_frame_equal(result, expected) + + result = df.reindex([0, 1, 3], axis="index") + tm.assert_frame_equal(result, expected) + + def test_reindex_positional_warns(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + expected = pd.DataFrame({"A": [1.0, 2], "B": [4.0, 5], "C": [np.nan, np.nan]}) + with tm.assert_produces_warning(FutureWarning): + result = df.reindex([0, 1], ["A", "B", "C"]) + + tm.assert_frame_equal(result, expected) + + def test_reindex_axis_style_raises(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex([0, 1], ["A"], axis=1) + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex([0, 1], ["A"], axis="index") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis="index") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis="columns") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(columns=[0, 1], axis="columns") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], columns=[0, 1], axis="columns") + + with pytest.raises(TypeError, match="Cannot specify all"): + df.reindex([0, 1], [0], ["A"]) + + # Mixing styles + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis="index") + + with pytest.raises(TypeError, match="Cannot specify both 'axis'"): + df.reindex(index=[0, 1], axis="columns") + + # Duplicates + with pytest.raises(TypeError, match="multiple values"): + df.reindex([0, 1], labels=[0, 1]) + + def test_reindex_single_named_indexer(self): + # https://github.com/pandas-dev/pandas/issues/12392 + df = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}) + result = df.reindex([0, 1], columns=["A"]) + expected = pd.DataFrame({"A": [1, 2]}) + tm.assert_frame_equal(result, expected) + + def test_reindex_api_equivalence(self): + # https://github.com/pandas-dev/pandas/issues/12392 + # equivalence of the labels/axis and index/columns API's + df = DataFrame( + [[1, 2, 3], [3, 4, 5], [5, 6, 7]], + index=["a", "b", "c"], + columns=["d", "e", "f"], + ) + + res1 = df.reindex(["b", "a"]) + res2 = df.reindex(index=["b", "a"]) + res3 = df.reindex(labels=["b", "a"]) + res4 = df.reindex(labels=["b", "a"], axis=0) + res5 = df.reindex(["b", "a"], axis=0) + for res in [res2, res3, res4, res5]: + tm.assert_frame_equal(res1, res) + + res1 = df.reindex(columns=["e", "d"]) + res2 = df.reindex(["e", "d"], axis=1) + res3 = df.reindex(labels=["e", "d"], axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + with tm.assert_produces_warning(FutureWarning) as m: + res1 = df.reindex(["b", "a"], ["e", "d"]) + assert "reindex" in str(m[0].message) + res2 = df.reindex(columns=["e", "d"], index=["b", "a"]) + res3 = df.reindex(labels=["b", "a"], axis=0).reindex(labels=["e", "d"], axis=1) + for res in [res2, res3]: + tm.assert_frame_equal(res1, res) + + def test_align_float(self, float_frame): + af, bf = float_frame.align(float_frame) + assert af._data is not float_frame._data + + af, bf = float_frame.align(float_frame, copy=False) + assert af._data is float_frame._data + + # axis = 0 + other = float_frame.iloc[:-5, :3] + af, bf = float_frame.align(other, axis=0, fill_value=-1) + + tm.assert_index_equal(bf.columns, other.columns) + + # test fill value + join_idx = float_frame.index.join(other.index) + diff_a = float_frame.index.difference(join_idx) + diff_b = other.index.difference(join_idx) + diff_a_vals = af.reindex(diff_a).values + diff_b_vals = bf.reindex(diff_b).values + assert (diff_a_vals == -1).all() + + af, bf = float_frame.align(other, join="right", axis=0) + tm.assert_index_equal(bf.columns, other.columns) + tm.assert_index_equal(bf.index, other.index) + tm.assert_index_equal(af.index, other.index) + + # axis = 1 + other = float_frame.iloc[:-5, :3].copy() + af, bf = float_frame.align(other, axis=1) + tm.assert_index_equal(bf.columns, float_frame.columns) + tm.assert_index_equal(bf.index, other.index) + + # test fill value + join_idx = float_frame.index.join(other.index) + diff_a = float_frame.index.difference(join_idx) + diff_b = other.index.difference(join_idx) + diff_a_vals = af.reindex(diff_a).values + + # TODO(wesm): unused? + diff_b_vals = bf.reindex(diff_b).values # noqa + + assert (diff_a_vals == -1).all() + + af, bf = float_frame.align(other, join="inner", axis=1) + tm.assert_index_equal(bf.columns, other.columns) + + af, bf = float_frame.align(other, join="inner", axis=1, method="pad") + tm.assert_index_equal(bf.columns, other.columns) + + af, bf = float_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=None + ) + tm.assert_index_equal(bf.index, Index([])) + + af, bf = float_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 + ) + tm.assert_index_equal(bf.index, Index([])) + + # Try to align DataFrame to Series along bad axis + with pytest.raises(ValueError): + float_frame.align(af.iloc[0, :3], join="inner", axis=2) + + # align dataframe to series with broadcast or not + idx = float_frame.index + s = Series(range(len(idx)), index=idx) + + left, right = float_frame.align(s, axis=0) + tm.assert_index_equal(left.index, float_frame.index) + tm.assert_index_equal(right.index, float_frame.index) + assert isinstance(right, Series) + + left, right = float_frame.align(s, broadcast_axis=1) + tm.assert_index_equal(left.index, float_frame.index) + expected = {c: s for c in float_frame.columns} + expected = DataFrame( + expected, index=float_frame.index, columns=float_frame.columns + ) + tm.assert_frame_equal(right, expected) + + # see gh-9558 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = df[df["a"] == 2] + expected = DataFrame([[2, 5]], index=[1], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + result = df.where(df["a"] == 2, 0) + expected = DataFrame({"a": [0, 2, 0], "b": [0, 5, 0]}) + tm.assert_frame_equal(result, expected) + + def test_align_int(self, int_frame): + # test other non-float types + other = DataFrame(index=range(5), columns=["A", "B", "C"]) + + af, bf = int_frame.align(other, join="inner", axis=1, method="pad") + tm.assert_index_equal(bf.columns, other.columns) + + def test_align_mixed_type(self, float_string_frame): + + af, bf = float_string_frame.align( + float_string_frame, join="inner", axis=1, method="pad" + ) + tm.assert_index_equal(bf.columns, float_string_frame.columns) + + def test_align_mixed_float(self, mixed_float_frame): + # mixed floats/ints + other = DataFrame(index=range(5), columns=["A", "B", "C"]) + + af, bf = mixed_float_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 + ) + tm.assert_index_equal(bf.index, Index([])) + + def test_align_mixed_int(self, mixed_int_frame): + other = DataFrame(index=range(5), columns=["A", "B", "C"]) + + af, bf = mixed_int_frame.align( + other.iloc[:, 0], join="inner", axis=1, method=None, fill_value=0 + ) + tm.assert_index_equal(bf.index, Index([])) + + def _check_align(self, a, b, axis, fill_axis, how, method, limit=None): + aa, ab = a.align( + b, axis=axis, join=how, method=method, limit=limit, fill_axis=fill_axis + ) + + join_index, join_columns = None, None + + ea, eb = a, b + if axis is None or axis == 0: + join_index = a.index.join(b.index, how=how) + ea = ea.reindex(index=join_index) + eb = eb.reindex(index=join_index) + + if axis is None or axis == 1: + join_columns = a.columns.join(b.columns, how=how) + ea = ea.reindex(columns=join_columns) + eb = eb.reindex(columns=join_columns) + + ea = ea.fillna(axis=fill_axis, method=method, limit=limit) + eb = eb.fillna(axis=fill_axis, method=method, limit=limit) + + tm.assert_frame_equal(aa, ea) + tm.assert_frame_equal(ab, eb) + + @pytest.mark.parametrize("meth", ["pad", "bfill"]) + @pytest.mark.parametrize("ax", [0, 1, None]) + @pytest.mark.parametrize("fax", [0, 1]) + @pytest.mark.parametrize("how", ["inner", "outer", "left", "right"]) + def test_align_fill_method(self, how, meth, ax, fax, float_frame): + df = float_frame + self._check_align_fill(df, how, meth, ax, fax) + + def _check_align_fill(self, frame, kind, meth, ax, fax): + left = frame.iloc[0:4, :10] + right = frame.iloc[2:, 6:] + empty = frame.iloc[:0, :0] + + self._check_align(left, right, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + left, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + # empty left + self._check_align(empty, right, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + empty, right, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + # empty right + self._check_align(left, empty, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + left, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + # both empty + self._check_align(empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth) + self._check_align( + empty, empty, axis=ax, fill_axis=fax, how=kind, method=meth, limit=1 + ) + + def test_align_int_fill_bug(self): + # GH #910 + X = np.arange(10 * 10, dtype="float64").reshape(10, 10) + Y = np.ones((10, 1), dtype=int) + + df1 = DataFrame(X) + df1["0.X"] = Y.squeeze() + + df2 = df1.astype(float) + + result = df1 - df1.mean() + expected = df2 - df2.mean() + tm.assert_frame_equal(result, expected) + + def test_align_multiindex(self): + # GH 10665 + # same test cases as test_align_multiindex in test_series.py + + midx = pd.MultiIndex.from_product( + [range(2), range(3), range(2)], names=("a", "b", "c") + ) + idx = pd.Index(range(2), name="b") + df1 = pd.DataFrame(np.arange(12, dtype="int64"), index=midx) + df2 = pd.DataFrame(np.arange(2, dtype="int64"), index=idx) + + # these must be the same results (but flipped) + res1l, res1r = df1.align(df2, join="left") + res2l, res2r = df2.align(df1, join="right") + + expl = df1 + tm.assert_frame_equal(expl, res1l) + tm.assert_frame_equal(expl, res2r) + expr = pd.DataFrame([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) + tm.assert_frame_equal(expr, res1r) + tm.assert_frame_equal(expr, res2l) + + res1l, res1r = df1.align(df2, join="right") + res2l, res2r = df2.align(df1, join="left") + + exp_idx = pd.MultiIndex.from_product( + [range(2), range(2), range(2)], names=("a", "b", "c") + ) + expl = pd.DataFrame([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) + tm.assert_frame_equal(expl, res1l) + tm.assert_frame_equal(expl, res2r) + expr = pd.DataFrame([0, 0, 1, 1] * 2, index=exp_idx) + tm.assert_frame_equal(expr, res1r) + tm.assert_frame_equal(expr, res2l) + + def test_align_series_combinations(self): + df = pd.DataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")) + s = pd.Series([1, 2, 4], index=list("ABD"), name="x") + + # frame + series + res1, res2 = df.align(s, axis=0) + exp1 = pd.DataFrame( + {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]}, + index=list("ABCDE"), + ) + exp2 = pd.Series([1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x") + + tm.assert_frame_equal(res1, exp1) + tm.assert_series_equal(res2, exp2) + + # series + frame + res1, res2 = s.align(df) + tm.assert_series_equal(res1, exp2) + tm.assert_frame_equal(res2, exp1) + + def test_filter(self, float_frame, float_string_frame): + # Items + filtered = float_frame.filter(["A", "B", "E"]) + assert len(filtered.columns) == 2 + assert "E" not in filtered + + filtered = float_frame.filter(["A", "B", "E"], axis="columns") + assert len(filtered.columns) == 2 + assert "E" not in filtered + + # Other axis + idx = float_frame.index[0:4] + filtered = float_frame.filter(idx, axis="index") + expected = float_frame.reindex(index=idx) + tm.assert_frame_equal(filtered, expected) + + # like + fcopy = float_frame.copy() + fcopy["AA"] = 1 + + filtered = fcopy.filter(like="A") + assert len(filtered.columns) == 2 + assert "AA" in filtered + + # like with ints in column names + df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"]) + filtered = df.filter(like="_") + assert len(filtered.columns) == 2 + + # regex with ints in column names + # from PR #10384 + df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"]) + expected = DataFrame( + 0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object) + ) + filtered = df.filter(regex="^[0-9]+$") + tm.assert_frame_equal(filtered, expected) + + expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"]) + # shouldn't remove anything + filtered = expected.filter(regex="^[0-9]+$") + tm.assert_frame_equal(filtered, expected) + + # pass in None + with pytest.raises(TypeError, match="Must pass"): + float_frame.filter() + with pytest.raises(TypeError, match="Must pass"): + float_frame.filter(items=None) + with pytest.raises(TypeError, match="Must pass"): + float_frame.filter(axis=1) + + # test mutually exclusive arguments + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], regex="e$", like="bbi") + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], regex="e$", axis=1) + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], regex="e$") + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], like="bbi", axis=0) + with pytest.raises(TypeError, match="mutually exclusive"): + float_frame.filter(items=["one", "three"], like="bbi") + + # objects + filtered = float_string_frame.filter(like="foo") + assert "foo" in filtered + + # unicode columns, won't ascii-encode + df = float_frame.rename(columns={"B": "\u2202"}) + filtered = df.filter(like="C") + assert "C" in filtered + + def test_filter_regex_search(self, float_frame): + fcopy = float_frame.copy() + fcopy["AA"] = 1 + + # regex + filtered = fcopy.filter(regex="[A]+") + assert len(filtered.columns) == 2 + assert "AA" in filtered + + # doesn't have to be at beginning + df = DataFrame( + {"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]} + ) + + result = df.filter(regex="BB") + exp = df[[x for x in df.columns if "BB" in x]] + tm.assert_frame_equal(result, exp) + + @pytest.mark.parametrize( + "name,expected", + [ + ("a", DataFrame({"a": [1, 2]})), + ("a", DataFrame({"a": [1, 2]})), + ("あ", DataFrame({"あ": [3, 4]})), + ], + ) + def test_filter_unicode(self, name, expected): + # GH13101 + df = DataFrame({"a": [1, 2], "あ": [3, 4]}) + + tm.assert_frame_equal(df.filter(like=name), expected) + tm.assert_frame_equal(df.filter(regex=name), expected) + + @pytest.mark.parametrize("name", ["a", "a"]) + def test_filter_bytestring(self, name): + # GH13101 + df = DataFrame({b"a": [1, 2], b"b": [3, 4]}) + expected = DataFrame({b"a": [1, 2]}) + + tm.assert_frame_equal(df.filter(like=name), expected) + tm.assert_frame_equal(df.filter(regex=name), expected) + + def test_filter_corner(self): + empty = DataFrame() + + result = empty.filter([]) + tm.assert_frame_equal(result, empty) + + result = empty.filter(like="foo") + tm.assert_frame_equal(result, empty) + + def test_filter_regex_non_string(self): + # GH#5798 trying to filter on non-string columns should drop, + # not raise + df = pd.DataFrame(np.random.random((3, 2)), columns=["STRING", 123]) + result = df.filter(regex="STRING") + expected = df[["STRING"]] + tm.assert_frame_equal(result, expected) + + def test_take(self, float_frame): + # homogeneous + order = [3, 1, 2, 0] + for df in [float_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["D", "B", "C", "A"]] + tm.assert_frame_equal(result, expected, check_names=False) + + # negative indices + order = [2, 1, -1] + for df in [float_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + result = df.take(order, axis=0) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["C", "B", "D"]] + tm.assert_frame_equal(result, expected, check_names=False) + + # illegal indices + msg = "indices are out-of-bounds" + with pytest.raises(IndexError, match=msg): + df.take([3, 1, 2, 30], axis=0) + with pytest.raises(IndexError, match=msg): + df.take([3, 1, 2, -31], axis=0) + with pytest.raises(IndexError, match=msg): + df.take([3, 1, 2, 5], axis=1) + with pytest.raises(IndexError, match=msg): + df.take([3, 1, 2, -5], axis=1) + + def test_take_mixed_type(self, float_string_frame): + + # mixed-dtype + order = [4, 1, 2, 0, 3] + for df in [float_string_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["foo", "B", "C", "A", "D"]] + tm.assert_frame_equal(result, expected) + + # negative indices + order = [4, 1, -2] + for df in [float_string_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["foo", "B", "D"]] + tm.assert_frame_equal(result, expected) + + def test_take_mixed_numeric(self, mixed_float_frame, mixed_int_frame): + # by dtype + order = [1, 2, 0, 3] + for df in [mixed_float_frame, mixed_int_frame]: + + result = df.take(order, axis=0) + expected = df.reindex(df.index.take(order)) + tm.assert_frame_equal(result, expected) + + # axis = 1 + result = df.take(order, axis=1) + expected = df.loc[:, ["B", "C", "A", "D"]] + tm.assert_frame_equal(result, expected) + + def test_reindex_boolean(self): + frame = DataFrame( + np.ones((10, 2), dtype=bool), index=np.arange(0, 20, 2), columns=[0, 2] + ) + + reindexed = frame.reindex(np.arange(10)) + assert reindexed.values.dtype == np.object_ + assert isna(reindexed[0][1]) + + reindexed = frame.reindex(columns=range(3)) + assert reindexed.values.dtype == np.object_ + assert isna(reindexed[1]).all() + + def test_reindex_objects(self, float_string_frame): + reindexed = float_string_frame.reindex(columns=["foo", "A", "B"]) + assert "foo" in reindexed + + reindexed = float_string_frame.reindex(columns=["A", "B"]) + assert "foo" not in reindexed + + def test_reindex_corner(self, int_frame): + index = Index(["a", "b", "c"]) + dm = DataFrame({}).reindex(index=[1, 2, 3]) + reindexed = dm.reindex(columns=index) + tm.assert_index_equal(reindexed.columns, index) + + # ints are weird + smaller = int_frame.reindex(columns=["A", "B", "E"]) + assert smaller["E"].dtype == np.float64 + + def test_reindex_with_nans(self): + df = DataFrame( + [[1, 2], [3, 4], [np.nan, np.nan], [7, 8], [9, 10]], + columns=["a", "b"], + index=[100.0, 101.0, np.nan, 102.0, 103.0], + ) + + result = df.reindex(index=[101.0, 102.0, 103.0]) + expected = df.iloc[[1, 3, 4]] + tm.assert_frame_equal(result, expected) + + result = df.reindex(index=[103.0]) + expected = df.iloc[[4]] + tm.assert_frame_equal(result, expected) + + result = df.reindex(index=[101.0]) + expected = df.iloc[[1]] + tm.assert_frame_equal(result, expected) + + def test_reindex_multi(self): + df = DataFrame(np.random.randn(3, 3)) + + result = df.reindex(index=range(4), columns=range(4)) + expected = df.reindex(list(range(4))).reindex(columns=range(4)) + + tm.assert_frame_equal(result, expected) + + df = DataFrame(np.random.randint(0, 10, (3, 3))) + + result = df.reindex(index=range(4), columns=range(4)) + expected = df.reindex(list(range(4))).reindex(columns=range(4)) + + tm.assert_frame_equal(result, expected) + + df = DataFrame(np.random.randint(0, 10, (3, 3))) + + result = df.reindex(index=range(2), columns=range(2)) + expected = df.reindex(range(2)).reindex(columns=range(2)) + + tm.assert_frame_equal(result, expected) + + df = DataFrame(np.random.randn(5, 3) + 1j, columns=["a", "b", "c"]) + + result = df.reindex(index=[0, 1], columns=["a", "b"]) + expected = df.reindex([0, 1]).reindex(columns=["a", "b"]) + + tm.assert_frame_equal(result, expected) + + def test_reindex_multi_categorical_time(self): + # https://github.com/pandas-dev/pandas/issues/21390 + midx = pd.MultiIndex.from_product( + [ + Categorical(["a", "b", "c"]), + Categorical(date_range("2012-01-01", periods=3, freq="H")), + ] + ) + df = pd.DataFrame({"a": range(len(midx))}, index=midx) + df2 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 8]] + + result = df2.reindex(midx) + expected = pd.DataFrame({"a": [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx) + tm.assert_frame_equal(result, expected) + + data = [[1, 2, 3], [1, 2, 3]] + + @pytest.mark.parametrize( + "actual", + [ + DataFrame(data=data, index=["a", "a"]), + DataFrame(data=data, index=["a", "b"]), + DataFrame(data=data, index=["a", "b"]).set_index([0, 1]), + DataFrame(data=data, index=["a", "a"]).set_index([0, 1]), + ], + ) + def test_raise_on_drop_duplicate_index(self, actual): + + # issue 19186 + level = 0 if isinstance(actual.index, MultiIndex) else None + msg = re.escape("\"['c'] not found in axis\"") + with pytest.raises(KeyError, match=msg): + actual.drop("c", level=level, axis=0) + with pytest.raises(KeyError, match=msg): + actual.T.drop("c", level=level, axis=1) + expected_no_err = actual.drop("c", axis=0, level=level, errors="ignore") + tm.assert_frame_equal(expected_no_err, actual) + expected_no_err = actual.T.drop("c", axis=1, level=level, errors="ignore") + tm.assert_frame_equal(expected_no_err.T, actual) + + @pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 2]]) + @pytest.mark.parametrize("drop_labels", [[], [1], [2]]) + def test_drop_empty_list(self, index, drop_labels): + # GH 21494 + expected_index = [i for i in index if i not in drop_labels] + frame = pd.DataFrame(index=index).drop(drop_labels) + tm.assert_frame_equal(frame, pd.DataFrame(index=expected_index)) + + @pytest.mark.parametrize("index", [[1, 2, 3], [1, 2, 2]]) + @pytest.mark.parametrize("drop_labels", [[1, 4], [4, 5]]) + def test_drop_non_empty_list(self, index, drop_labels): + # GH 21494 + with pytest.raises(KeyError, match="not found in axis"): + pd.DataFrame(index=index).drop(drop_labels) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py new file mode 100644 index 00000000..d301ed96 --- /dev/null +++ b/pandas/tests/frame/test_block_internals.py @@ -0,0 +1,624 @@ +from datetime import datetime, timedelta +from io import StringIO +import itertools + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Series, + Timestamp, + compat, + date_range, + option_context, +) +import pandas._testing as tm +from pandas.core.arrays import IntervalArray, integer_array +from pandas.core.internals import ObjectBlock +from pandas.core.internals.blocks import IntBlock + +# Segregated collection of methods that require the BlockManager internal data +# structure + + +class TestDataFrameBlockInternals: + def test_setitem_invalidates_datetime_index_freq(self): + # GH#24096 altering a datetime64tz column inplace invalidates the + # `freq` attribute on the underlying DatetimeIndex + + dti = date_range("20130101", periods=3, tz="US/Eastern") + ts = dti[1] + + df = DataFrame({"B": dti}) + assert df["B"]._values.freq == "D" + + df.iloc[1, 0] = pd.NaT + assert df["B"]._values.freq is None + + # check that the DatetimeIndex was not altered in place + assert dti.freq == "D" + assert dti[1] == ts + + def test_cast_internals(self, float_frame): + casted = DataFrame(float_frame._data, dtype=int) + expected = DataFrame(float_frame._series, dtype=int) + tm.assert_frame_equal(casted, expected) + + casted = DataFrame(float_frame._data, dtype=np.int32) + expected = DataFrame(float_frame._series, dtype=np.int32) + tm.assert_frame_equal(casted, expected) + + def test_consolidate(self, float_frame): + float_frame["E"] = 7.0 + consolidated = float_frame._consolidate() + assert len(consolidated._data.blocks) == 1 + + # Ensure copy, do I want this? + recons = consolidated._consolidate() + assert recons is not consolidated + tm.assert_frame_equal(recons, consolidated) + + float_frame["F"] = 8.0 + assert len(float_frame._data.blocks) == 3 + + float_frame._consolidate(inplace=True) + assert len(float_frame._data.blocks) == 1 + + def test_consolidate_inplace(self, float_frame): + frame = float_frame.copy() # noqa + + # triggers in-place consolidation + for letter in range(ord("A"), ord("Z")): + float_frame[chr(letter)] = chr(letter) + + def test_values_consolidate(self, float_frame): + float_frame["E"] = 7.0 + assert not float_frame._data.is_consolidated() + _ = float_frame.values # noqa + assert float_frame._data.is_consolidated() + + def test_modify_values(self, float_frame): + float_frame.values[5] = 5 + assert (float_frame.values[5] == 5).all() + + # unconsolidated + float_frame["E"] = 7.0 + float_frame.values[6] = 6 + assert (float_frame.values[6] == 6).all() + + def test_boolean_set_uncons(self, float_frame): + float_frame["E"] = 7.0 + + expected = float_frame.values.copy() + expected[expected > 1] = 2 + + float_frame[float_frame > 1] = 2 + tm.assert_almost_equal(expected, float_frame.values) + + def test_values_numeric_cols(self, float_frame): + float_frame["foo"] = "bar" + + values = float_frame[["A", "B", "C", "D"]].values + assert values.dtype == np.float64 + + def test_values_lcd(self, mixed_float_frame, mixed_int_frame): + + # mixed lcd + values = mixed_float_frame[["A", "B", "C", "D"]].values + assert values.dtype == np.float64 + + values = mixed_float_frame[["A", "B", "C"]].values + assert values.dtype == np.float32 + + values = mixed_float_frame[["C"]].values + assert values.dtype == np.float16 + + # GH 10364 + # B uint64 forces float because there are other signed int types + values = mixed_int_frame[["A", "B", "C", "D"]].values + assert values.dtype == np.float64 + + values = mixed_int_frame[["A", "D"]].values + assert values.dtype == np.int64 + + # B uint64 forces float because there are other signed int types + values = mixed_int_frame[["A", "B", "C"]].values + assert values.dtype == np.float64 + + # as B and C are both unsigned, no forcing to float is needed + values = mixed_int_frame[["B", "C"]].values + assert values.dtype == np.uint64 + + values = mixed_int_frame[["A", "C"]].values + assert values.dtype == np.int32 + + values = mixed_int_frame[["C", "D"]].values + assert values.dtype == np.int64 + + values = mixed_int_frame[["A"]].values + assert values.dtype == np.int32 + + values = mixed_int_frame[["C"]].values + assert values.dtype == np.uint8 + + def test_constructor_with_convert(self): + # this is actually mostly a test of lib.maybe_convert_objects + # #2845 + df = DataFrame({"A": [2 ** 63 - 1]}) + result = df["A"] + expected = Series(np.asarray([2 ** 63 - 1], np.int64), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [2 ** 63]}) + result = df["A"] + expected = Series(np.asarray([2 ** 63], np.uint64), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [datetime(2005, 1, 1), True]}) + result = df["A"] + expected = Series( + np.asarray([datetime(2005, 1, 1), True], np.object_), name="A" + ) + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [None, 1]}) + result = df["A"] + expected = Series(np.asarray([np.nan, 1], np.float_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0, 2]}) + result = df["A"] + expected = Series(np.asarray([1.0, 2], np.float_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0 + 2.0j, 3]}) + result = df["A"] + expected = Series(np.asarray([1.0 + 2.0j, 3], np.complex_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0 + 2.0j, 3.0]}) + result = df["A"] + expected = Series(np.asarray([1.0 + 2.0j, 3.0], np.complex_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0 + 2.0j, True]}) + result = df["A"] + expected = Series(np.asarray([1.0 + 2.0j, True], np.object_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0, None]}) + result = df["A"] + expected = Series(np.asarray([1.0, np.nan], np.float_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [1.0 + 2.0j, None]}) + result = df["A"] + expected = Series(np.asarray([1.0 + 2.0j, np.nan], np.complex_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [2.0, 1, True, None]}) + result = df["A"] + expected = Series(np.asarray([2.0, 1, True, None], np.object_), name="A") + tm.assert_series_equal(result, expected) + + df = DataFrame({"A": [2.0, 1, datetime(2006, 1, 1), None]}) + result = df["A"] + expected = Series( + np.asarray([2.0, 1, datetime(2006, 1, 1), None], np.object_), name="A" + ) + tm.assert_series_equal(result, expected) + + def test_construction_with_mixed(self, float_string_frame): + # test construction edge cases with mixed types + + # f7u12, this does not work without extensive workaround + data = [ + [datetime(2001, 1, 5), np.nan, datetime(2001, 1, 2)], + [datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 1)], + ] + df = DataFrame(data) + + # check dtypes + result = df.dtypes + expected = Series({"datetime64[ns]": 3}) + + # mixed-type frames + float_string_frame["datetime"] = datetime.now() + float_string_frame["timedelta"] = timedelta(days=1, seconds=1) + assert float_string_frame["datetime"].dtype == "M8[ns]" + assert float_string_frame["timedelta"].dtype == "m8[ns]" + result = float_string_frame.dtypes + expected = Series( + [np.dtype("float64")] * 4 + + [ + np.dtype("object"), + np.dtype("datetime64[ns]"), + np.dtype("timedelta64[ns]"), + ], + index=list("ABCD") + ["foo", "datetime", "timedelta"], + ) + tm.assert_series_equal(result, expected) + + def test_construction_with_conversions(self): + + # convert from a numpy array of non-ns timedelta64 + arr = np.array([1, 2, 3], dtype="timedelta64[s]") + df = DataFrame(index=range(3)) + df["A"] = arr + expected = DataFrame( + {"A": pd.timedelta_range("00:00:01", periods=3, freq="s")}, index=range(3) + ) + tm.assert_frame_equal(df, expected) + + expected = DataFrame( + { + "dt1": Timestamp("20130101"), + "dt2": date_range("20130101", periods=3), + # 'dt3' : date_range('20130101 00:00:01',periods=3,freq='s'), + }, + index=range(3), + ) + + df = DataFrame(index=range(3)) + df["dt1"] = np.datetime64("2013-01-01") + df["dt2"] = np.array( + ["2013-01-01", "2013-01-02", "2013-01-03"], dtype="datetime64[D]" + ) + + # df['dt3'] = np.array(['2013-01-01 00:00:01','2013-01-01 + # 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]') + + tm.assert_frame_equal(df, expected) + + def test_constructor_compound_dtypes(self): + # GH 5191 + # compound dtypes should raise not-implementederror + + def f(dtype): + data = list(itertools.repeat((datetime(2001, 1, 1), "aa", 20), 9)) + return DataFrame(data=data, columns=["A", "B", "C"], dtype=dtype) + + msg = "compound dtypes are not implemented in the DataFrame constructor" + with pytest.raises(NotImplementedError, match=msg): + f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")]) + + # these work (though results may be unexpected) + f("int64") + f("float64") + + # 10822 + # invalid error message on dt inference + if not compat.is_platform_windows(): + f("M8[ns]") + + def test_equals_different_blocks(self): + # GH 9330 + df0 = pd.DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]}) + df1 = df0.reset_index()[["A", "B", "C"]] + # this assert verifies that the above operations have + # induced a block rearrangement + assert df0._data.blocks[0].dtype != df1._data.blocks[0].dtype + + # do the real tests + tm.assert_frame_equal(df0, df1) + assert df0.equals(df1) + assert df1.equals(df0) + + def test_copy_blocks(self, float_frame): + # API/ENH 9607 + df = DataFrame(float_frame, copy=True) + column = df.columns[0] + + # use the default copy=True, change a column + blocks = df._to_dict_of_blocks(copy=True) + for dtype, _df in blocks.items(): + if column in _df: + _df.loc[:, column] = _df[column] + 1 + + # make sure we did not change the original DataFrame + assert not _df[column].equals(df[column]) + + def test_no_copy_blocks(self, float_frame): + # API/ENH 9607 + df = DataFrame(float_frame, copy=True) + column = df.columns[0] + + # use the copy=False, change a column + blocks = df._to_dict_of_blocks(copy=False) + for dtype, _df in blocks.items(): + if column in _df: + _df.loc[:, column] = _df[column] + 1 + + # make sure we did change the original DataFrame + assert _df[column].equals(df[column]) + + def test_copy(self, float_frame, float_string_frame): + cop = float_frame.copy() + cop["E"] = cop["A"] + assert "E" not in float_frame + + # copy objects + copy = float_string_frame.copy() + assert copy._data is not float_string_frame._data + + def test_pickle(self, float_string_frame, timezone_frame): + empty_frame = DataFrame() + + unpickled = tm.round_trip_pickle(float_string_frame) + tm.assert_frame_equal(float_string_frame, unpickled) + + # buglet + float_string_frame._data.ndim + + # empty + unpickled = tm.round_trip_pickle(empty_frame) + repr(unpickled) + + # tz frame + unpickled = tm.round_trip_pickle(timezone_frame) + tm.assert_frame_equal(timezone_frame, unpickled) + + def test_consolidate_datetime64(self): + # numpy vstack bug + + data = """\ +starting,ending,measure +2012-06-21 00:00,2012-06-23 07:00,77 +2012-06-23 07:00,2012-06-23 16:30,65 +2012-06-23 16:30,2012-06-25 08:00,77 +2012-06-25 08:00,2012-06-26 12:00,0 +2012-06-26 12:00,2012-06-27 08:00,77 +""" + df = pd.read_csv(StringIO(data), parse_dates=[0, 1]) + + ser_starting = df.starting + ser_starting.index = ser_starting.values + ser_starting = ser_starting.tz_localize("US/Eastern") + ser_starting = ser_starting.tz_convert("UTC") + ser_starting.index.name = "starting" + + ser_ending = df.ending + ser_ending.index = ser_ending.values + ser_ending = ser_ending.tz_localize("US/Eastern") + ser_ending = ser_ending.tz_convert("UTC") + ser_ending.index.name = "ending" + + df.starting = ser_starting.index + df.ending = ser_ending.index + + tm.assert_index_equal(pd.DatetimeIndex(df.starting), ser_starting.index) + tm.assert_index_equal(pd.DatetimeIndex(df.ending), ser_ending.index) + + def test_is_mixed_type(self, float_frame, float_string_frame): + assert not float_frame._is_mixed_type + assert float_string_frame._is_mixed_type + + def test_get_numeric_data(self): + # TODO(wesm): unused? + intname = np.dtype(np.int_).name # noqa + floatname = np.dtype(np.float_).name # noqa + + datetime64name = np.dtype("M8[ns]").name + objectname = np.dtype(np.object_).name + + df = DataFrame( + {"a": 1.0, "b": 2, "c": "foo", "f": Timestamp("20010102")}, + index=np.arange(10), + ) + result = df.dtypes + expected = Series( + [ + np.dtype("float64"), + np.dtype("int64"), + np.dtype(objectname), + np.dtype(datetime64name), + ], + index=["a", "b", "c", "f"], + ) + tm.assert_series_equal(result, expected) + + df = DataFrame( + { + "a": 1.0, + "b": 2, + "c": "foo", + "d": np.array([1.0] * 10, dtype="float32"), + "e": np.array([1] * 10, dtype="int32"), + "f": np.array([1] * 10, dtype="int16"), + "g": Timestamp("20010102"), + }, + index=np.arange(10), + ) + + result = df._get_numeric_data() + expected = df.loc[:, ["a", "b", "d", "e", "f"]] + tm.assert_frame_equal(result, expected) + + only_obj = df.loc[:, ["c", "g"]] + result = only_obj._get_numeric_data() + expected = df.loc[:, []] + tm.assert_frame_equal(result, expected) + + df = DataFrame.from_dict({"a": [1, 2], "b": ["foo", "bar"], "c": [np.pi, np.e]}) + result = df._get_numeric_data() + expected = DataFrame.from_dict({"a": [1, 2], "c": [np.pi, np.e]}) + tm.assert_frame_equal(result, expected) + + df = result.copy() + result = df._get_numeric_data() + expected = df + tm.assert_frame_equal(result, expected) + + def test_get_numeric_data_extension_dtype(self): + # GH 22290 + df = DataFrame( + { + "A": integer_array([-10, np.nan, 0, 10, 20, 30], dtype="Int64"), + "B": Categorical(list("abcabc")), + "C": integer_array([0, 1, 2, 3, np.nan, 5], dtype="UInt8"), + "D": IntervalArray.from_breaks(range(7)), + } + ) + result = df._get_numeric_data() + expected = df.loc[:, ["A", "C"]] + tm.assert_frame_equal(result, expected) + + def test_convert_objects(self, float_string_frame): + + oops = float_string_frame.T.T + converted = oops._convert(datetime=True) + tm.assert_frame_equal(converted, float_string_frame) + assert converted["A"].dtype == np.float64 + + # force numeric conversion + float_string_frame["H"] = "1." + float_string_frame["I"] = "1" + + # add in some items that will be nan + length = len(float_string_frame) + float_string_frame["J"] = "1." + float_string_frame["K"] = "1" + float_string_frame.loc[0:5, ["J", "K"]] = "garbled" + converted = float_string_frame._convert(datetime=True, numeric=True) + assert converted["H"].dtype == "float64" + assert converted["I"].dtype == "int64" + assert converted["J"].dtype == "float64" + assert converted["K"].dtype == "float64" + assert len(converted["J"].dropna()) == length - 5 + assert len(converted["K"].dropna()) == length - 5 + + # via astype + converted = float_string_frame.copy() + converted["H"] = converted["H"].astype("float64") + converted["I"] = converted["I"].astype("int64") + assert converted["H"].dtype == "float64" + assert converted["I"].dtype == "int64" + + # via astype, but errors + converted = float_string_frame.copy() + with pytest.raises(ValueError, match="invalid literal"): + converted["H"].astype("int32") + + # mixed in a single column + df = DataFrame(dict(s=Series([1, "na", 3, 4]))) + result = df._convert(datetime=True, numeric=True) + expected = DataFrame(dict(s=Series([1, np.nan, 3, 4]))) + tm.assert_frame_equal(result, expected) + + def test_convert_objects_no_conversion(self): + mixed1 = DataFrame({"a": [1, 2, 3], "b": [4.0, 5, 6], "c": ["x", "y", "z"]}) + mixed2 = mixed1._convert(datetime=True) + tm.assert_frame_equal(mixed1, mixed2) + + def test_infer_objects(self): + # GH 11221 + df = DataFrame( + { + "a": ["a", 1, 2, 3], + "b": ["b", 2.0, 3.0, 4.1], + "c": [ + "c", + datetime(2016, 1, 1), + datetime(2016, 1, 2), + datetime(2016, 1, 3), + ], + "d": [1, 2, 3, "d"], + }, + columns=["a", "b", "c", "d"], + ) + df = df.iloc[1:].infer_objects() + + assert df["a"].dtype == "int64" + assert df["b"].dtype == "float64" + assert df["c"].dtype == "M8[ns]" + assert df["d"].dtype == "object" + + expected = DataFrame( + { + "a": [1, 2, 3], + "b": [2.0, 3.0, 4.1], + "c": [datetime(2016, 1, 1), datetime(2016, 1, 2), datetime(2016, 1, 3)], + "d": [2, 3, "d"], + }, + columns=["a", "b", "c", "d"], + ) + # reconstruct frame to verify inference is same + tm.assert_frame_equal(df.reset_index(drop=True), expected) + + def test_stale_cached_series_bug_473(self): + + # this is chained, but ok + with option_context("chained_assignment", None): + Y = DataFrame( + np.random.random((4, 4)), + index=("a", "b", "c", "d"), + columns=("e", "f", "g", "h"), + ) + repr(Y) + Y["e"] = Y["e"].astype("object") + Y["g"]["c"] = np.NaN + repr(Y) + result = Y.sum() # noqa + exp = Y["g"].sum() # noqa + assert pd.isna(Y["g"]["c"]) + + def test_get_X_columns(self): + # numeric and object columns + + df = DataFrame( + { + "a": [1, 2, 3], + "b": [True, False, True], + "c": ["foo", "bar", "baz"], + "d": [None, None, None], + "e": [3.14, 0.577, 2.773], + } + ) + + tm.assert_index_equal(df._get_numeric_data().columns, pd.Index(["a", "b", "e"])) + + def test_strange_column_corruption_issue(self): + # (wesm) Unclear how exactly this is related to internal matters + df = DataFrame(index=[0, 1]) + df[0] = np.nan + wasCol = {} + + for i, dt in enumerate(df.index): + for col in range(100, 200): + if col not in wasCol: + wasCol[col] = 1 + df[col] = np.nan + df[col][dt] = i + + myid = 100 + + first = len(df.loc[pd.isna(df[myid]), [myid]]) + second = len(df.loc[pd.isna(df[myid]), [myid]]) + assert first == second == 0 + + def test_constructor_no_pandas_array(self): + # Ensure that PandasArray isn't allowed inside Series + # See https://github.com/pandas-dev/pandas/issues/23995 for more. + arr = pd.Series([1, 2, 3]).array + result = pd.DataFrame({"A": arr}) + expected = pd.DataFrame({"A": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) + assert isinstance(result._data.blocks[0], IntBlock) + + def test_add_column_with_pandas_array(self): + # GH 26390 + df = pd.DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) + df["c"] = pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)) + df2 = pd.DataFrame( + { + "a": [1, 2, 3, 4], + "b": ["a", "b", "c", "d"], + "c": pd.arrays.PandasArray(np.array([1, 2, None, 3], dtype=object)), + } + ) + assert type(df["c"]._data.blocks[0]) == ObjectBlock + assert type(df2["c"]._data.blocks[0]) == ObjectBlock + tm.assert_frame_equal(df, df2) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py new file mode 100644 index 00000000..9bad54b0 --- /dev/null +++ b/pandas/tests/frame/test_combine_concat.py @@ -0,0 +1,798 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, Series, Timestamp, date_range +import pandas._testing as tm + + +class TestDataFrameConcatCommon: + def test_concat_multiple_frames_dtypes(self): + + # GH 2759 + A = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64) + B = DataFrame(data=np.ones((10, 2)), dtype=np.float32) + results = pd.concat((A, B), axis=1).dtypes + expected = Series( + [np.dtype("float64")] * 2 + [np.dtype("float32")] * 2, + index=["foo", "bar", 0, 1], + ) + tm.assert_series_equal(results, expected) + + @pytest.mark.parametrize( + "data", + [ + pd.date_range("2000", periods=4), + pd.date_range("2000", periods=4, tz="US/Central"), + pd.period_range("2000", periods=4), + pd.timedelta_range(0, periods=4), + ], + ) + def test_combine_datetlike_udf(self, data): + # https://github.com/pandas-dev/pandas/issues/23079 + df = pd.DataFrame({"A": data}) + other = df.copy() + df.iloc[1, 0] = None + + def combiner(a, b): + return b + + result = df.combine(other, combiner) + tm.assert_frame_equal(result, other) + + def test_concat_multiple_tzs(self): + # GH 12467 + # combining datetime tz-aware and naive DataFrames + ts1 = Timestamp("2015-01-01", tz=None) + ts2 = Timestamp("2015-01-01", tz="UTC") + ts3 = Timestamp("2015-01-01", tz="EST") + + df1 = DataFrame(dict(time=[ts1])) + df2 = DataFrame(dict(time=[ts2])) + df3 = DataFrame(dict(time=[ts3])) + + results = pd.concat([df1, df2]).reset_index(drop=True) + expected = DataFrame(dict(time=[ts1, ts2]), dtype=object) + tm.assert_frame_equal(results, expected) + + results = pd.concat([df1, df3]).reset_index(drop=True) + expected = DataFrame(dict(time=[ts1, ts3]), dtype=object) + tm.assert_frame_equal(results, expected) + + results = pd.concat([df2, df3]).reset_index(drop=True) + expected = DataFrame(dict(time=[ts2, ts3])) + tm.assert_frame_equal(results, expected) + + @pytest.mark.parametrize( + "t1", + [ + "2015-01-01", + pytest.param( + pd.NaT, + marks=pytest.mark.xfail( + reason="GH23037 incorrect dtype when concatenating" + ), + ), + ], + ) + def test_concat_tz_NaT(self, t1): + # GH 22796 + # Concating tz-aware multicolumn DataFrames + ts1 = Timestamp(t1, tz="UTC") + ts2 = Timestamp("2015-01-01", tz="UTC") + ts3 = Timestamp("2015-01-01", tz="UTC") + + df1 = DataFrame([[ts1, ts2]]) + df2 = DataFrame([[ts3]]) + + result = pd.concat([df1, df2]) + expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0]) + + tm.assert_frame_equal(result, expected) + + def test_concat_tz_not_aligned(self): + # GH 22796 + ts = pd.to_datetime([1, 2]).tz_localize("UTC") + a = pd.DataFrame({"A": ts}) + b = pd.DataFrame({"A": ts, "B": ts}) + result = pd.concat([a, b], sort=True, ignore_index=True) + expected = pd.DataFrame( + {"A": list(ts) + list(ts), "B": [pd.NaT, pd.NaT] + list(ts)} + ) + tm.assert_frame_equal(result, expected) + + def test_concat_tuple_keys(self): + # GH 14438 + df1 = pd.DataFrame(np.ones((2, 2)), columns=list("AB")) + df2 = pd.DataFrame(np.ones((3, 2)) * 2, columns=list("AB")) + results = pd.concat((df1, df2), keys=[("bee", "bah"), ("bee", "boo")]) + expected = pd.DataFrame( + { + "A": { + ("bee", "bah", 0): 1.0, + ("bee", "bah", 1): 1.0, + ("bee", "boo", 0): 2.0, + ("bee", "boo", 1): 2.0, + ("bee", "boo", 2): 2.0, + }, + "B": { + ("bee", "bah", 0): 1.0, + ("bee", "bah", 1): 1.0, + ("bee", "boo", 0): 2.0, + ("bee", "boo", 1): 2.0, + ("bee", "boo", 2): 2.0, + }, + } + ) + tm.assert_frame_equal(results, expected) + + def test_update(self): + df = DataFrame( + [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) + + df.update(other) + + expected = DataFrame( + [[1.5, np.nan, 3], [3.6, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]] + ) + tm.assert_frame_equal(df, expected) + + def test_update_dtypes(self): + + # gh 3016 + df = DataFrame( + [[1.0, 2.0, False, True], [4.0, 5.0, True, False]], + columns=["A", "B", "bool1", "bool2"], + ) + + other = DataFrame([[45, 45]], index=[0], columns=["A", "B"]) + df.update(other) + + expected = DataFrame( + [[45.0, 45.0, False, True], [4.0, 5.0, True, False]], + columns=["A", "B", "bool1", "bool2"], + ) + tm.assert_frame_equal(df, expected) + + def test_update_nooverwrite(self): + df = DataFrame( + [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) + + df.update(other, overwrite=False) + + expected = DataFrame( + [[1.5, np.nan, 3], [1.5, 2, 3], [1.5, np.nan, 3], [1.5, np.nan, 3.0]] + ) + tm.assert_frame_equal(df, expected) + + def test_update_filtered(self): + df = DataFrame( + [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[3.6, 2.0, np.nan], [np.nan, np.nan, 7]], index=[1, 3]) + + df.update(other, filter_func=lambda x: x > 2) + + expected = DataFrame( + [[1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 3], [1.5, np.nan, 7.0]] + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "bad_kwarg, exception, msg", + [ + # errors must be 'ignore' or 'raise' + ({"errors": "something"}, ValueError, "The parameter errors must.*"), + ({"join": "inner"}, NotImplementedError, "Only left join is supported"), + ], + ) + def test_update_raise_bad_parameter(self, bad_kwarg, exception, msg): + df = DataFrame([[1.5, 1, 3.0]]) + with pytest.raises(exception, match=msg): + df.update(df, **bad_kwarg) + + def test_update_raise_on_overlap(self): + df = DataFrame( + [[1.5, 1, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] + ) + + other = DataFrame([[2.0, np.nan], [np.nan, 7]], index=[1, 3], columns=[1, 2]) + with pytest.raises(ValueError, match="Data overlaps"): + df.update(other, errors="raise") + + def test_update_from_non_df(self): + d = {"a": Series([1, 2, 3, 4]), "b": Series([5, 6, 7, 8])} + df = DataFrame(d) + + d["a"] = Series([5, 6, 7, 8]) + df.update(d) + + expected = DataFrame(d) + + tm.assert_frame_equal(df, expected) + + d = {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]} + df = DataFrame(d) + + d["a"] = [5, 6, 7, 8] + df.update(d) + + expected = DataFrame(d) + + tm.assert_frame_equal(df, expected) + + def test_update_datetime_tz(self): + # GH 25807 + result = DataFrame([pd.Timestamp("2019", tz="UTC")]) + result.update(result) + expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) + tm.assert_frame_equal(result, expected) + + def test_join_str_datetime(self): + str_dates = ["20120209", "20120222"] + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + A = DataFrame(str_dates, index=range(2), columns=["aa"]) + C = DataFrame([[1, 2], [3, 4]], index=str_dates, columns=dt_dates) + + tst = A.join(C, on="aa") + + assert len(tst.columns) == 3 + + def test_join_multiindex_leftright(self): + # GH 10741 + df1 = pd.DataFrame( + [ + ["a", "x", 0.471780], + ["a", "y", 0.774908], + ["a", "z", 0.563634], + ["b", "x", -0.353756], + ["b", "y", 0.368062], + ["b", "z", -1.721840], + ["c", "x", 1], + ["c", "y", 2], + ["c", "z", 3], + ], + columns=["first", "second", "value1"], + ).set_index(["first", "second"]) + + df2 = pd.DataFrame( + [["a", 10], ["b", 20]], columns=["first", "value2"] + ).set_index(["first"]) + + exp = pd.DataFrame( + [ + [0.471780, 10], + [0.774908, 10], + [0.563634, 10], + [-0.353756, 20], + [0.368062, 20], + [-1.721840, 20], + [1.000000, np.nan], + [2.000000, np.nan], + [3.000000, np.nan], + ], + index=df1.index, + columns=["value1", "value2"], + ) + + # these must be the same results (but columns are flipped) + tm.assert_frame_equal(df1.join(df2, how="left"), exp) + tm.assert_frame_equal(df2.join(df1, how="right"), exp[["value2", "value1"]]) + + exp_idx = pd.MultiIndex.from_product( + [["a", "b"], ["x", "y", "z"]], names=["first", "second"] + ) + exp = pd.DataFrame( + [ + [0.471780, 10], + [0.774908, 10], + [0.563634, 10], + [-0.353756, 20], + [0.368062, 20], + [-1.721840, 20], + ], + index=exp_idx, + columns=["value1", "value2"], + ) + + tm.assert_frame_equal(df1.join(df2, how="right"), exp) + tm.assert_frame_equal(df2.join(df1, how="left"), exp[["value2", "value1"]]) + + def test_concat_named_keys(self): + # GH 14252 + df = pd.DataFrame({"foo": [1, 2], "bar": [0.1, 0.2]}) + index = Index(["a", "b"], name="baz") + concatted_named_from_keys = pd.concat([df, df], keys=index) + expected_named = pd.DataFrame( + {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]}, + index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=["baz", None]), + ) + tm.assert_frame_equal(concatted_named_from_keys, expected_named) + + index_no_name = Index(["a", "b"], name=None) + concatted_named_from_names = pd.concat( + [df, df], keys=index_no_name, names=["baz"] + ) + tm.assert_frame_equal(concatted_named_from_names, expected_named) + + concatted_unnamed = pd.concat([df, df], keys=index_no_name) + expected_unnamed = pd.DataFrame( + {"foo": [1, 2, 1, 2], "bar": [0.1, 0.2, 0.1, 0.2]}, + index=pd.MultiIndex.from_product((["a", "b"], [0, 1]), names=[None, None]), + ) + tm.assert_frame_equal(concatted_unnamed, expected_unnamed) + + def test_concat_axis_parameter(self): + # GH 14369 + df1 = pd.DataFrame({"A": [0.1, 0.2]}, index=range(2)) + df2 = pd.DataFrame({"A": [0.3, 0.4]}, index=range(2)) + + # Index/row/0 DataFrame + expected_index = pd.DataFrame({"A": [0.1, 0.2, 0.3, 0.4]}, index=[0, 1, 0, 1]) + + concatted_index = pd.concat([df1, df2], axis="index") + tm.assert_frame_equal(concatted_index, expected_index) + + concatted_row = pd.concat([df1, df2], axis="rows") + tm.assert_frame_equal(concatted_row, expected_index) + + concatted_0 = pd.concat([df1, df2], axis=0) + tm.assert_frame_equal(concatted_0, expected_index) + + # Columns/1 DataFrame + expected_columns = pd.DataFrame( + [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=["A", "A"] + ) + + concatted_columns = pd.concat([df1, df2], axis="columns") + tm.assert_frame_equal(concatted_columns, expected_columns) + + concatted_1 = pd.concat([df1, df2], axis=1) + tm.assert_frame_equal(concatted_1, expected_columns) + + series1 = pd.Series([0.1, 0.2]) + series2 = pd.Series([0.3, 0.4]) + + # Index/row/0 Series + expected_index_series = pd.Series([0.1, 0.2, 0.3, 0.4], index=[0, 1, 0, 1]) + + concatted_index_series = pd.concat([series1, series2], axis="index") + tm.assert_series_equal(concatted_index_series, expected_index_series) + + concatted_row_series = pd.concat([series1, series2], axis="rows") + tm.assert_series_equal(concatted_row_series, expected_index_series) + + concatted_0_series = pd.concat([series1, series2], axis=0) + tm.assert_series_equal(concatted_0_series, expected_index_series) + + # Columns/1 Series + expected_columns_series = pd.DataFrame( + [[0.1, 0.3], [0.2, 0.4]], index=[0, 1], columns=[0, 1] + ) + + concatted_columns_series = pd.concat([series1, series2], axis="columns") + tm.assert_frame_equal(concatted_columns_series, expected_columns_series) + + concatted_1_series = pd.concat([series1, series2], axis=1) + tm.assert_frame_equal(concatted_1_series, expected_columns_series) + + # Testing ValueError + with pytest.raises(ValueError, match="No axis named"): + pd.concat([series1, series2], axis="something") + + def test_concat_numerical_names(self): + # #15262 # #12223 + df = pd.DataFrame( + {"col": range(9)}, + dtype="int32", + index=( + pd.MultiIndex.from_product( + [["A0", "A1", "A2"], ["B0", "B1", "B2"]], names=[1, 2] + ) + ), + ) + result = pd.concat((df.iloc[:2, :], df.iloc[-2:, :])) + expected = pd.DataFrame( + {"col": [0, 1, 7, 8]}, + dtype="int32", + index=pd.MultiIndex.from_tuples( + [("A0", "B0"), ("A0", "B1"), ("A2", "B1"), ("A2", "B2")], names=[1, 2] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_concat_astype_dup_col(self): + # gh 23049 + df = pd.DataFrame([{"a": "b"}]) + df = pd.concat([df, df], axis=1) + + result = df.astype("category") + expected = pd.DataFrame( + np.array(["b", "b"]).reshape(1, 2), columns=["a", "a"] + ).astype("category") + tm.assert_frame_equal(result, expected) + + +class TestDataFrameCombineFirst: + def test_combine_first_mixed(self): + a = Series(["a", "b"], index=range(2)) + b = Series(range(2), index=range(2)) + f = DataFrame({"A": a, "B": b}) + + a = Series(["a", "b"], index=range(5, 7)) + b = Series(range(2), index=range(5, 7)) + g = DataFrame({"A": a, "B": b}) + + exp = pd.DataFrame( + {"A": list("abab"), "B": [0.0, 1.0, 0.0, 1.0]}, index=[0, 1, 5, 6] + ) + combined = f.combine_first(g) + tm.assert_frame_equal(combined, exp) + + def test_combine_first(self, float_frame): + # disjoint + head, tail = float_frame[:5], float_frame[5:] + + combined = head.combine_first(tail) + reordered_frame = float_frame.reindex(combined.index) + tm.assert_frame_equal(combined, reordered_frame) + assert tm.equalContents(combined.columns, float_frame.columns) + tm.assert_series_equal(combined["A"], reordered_frame["A"]) + + # same index + fcopy = float_frame.copy() + fcopy["A"] = 1 + del fcopy["C"] + + fcopy2 = float_frame.copy() + fcopy2["B"] = 0 + del fcopy2["D"] + + combined = fcopy.combine_first(fcopy2) + + assert (combined["A"] == 1).all() + tm.assert_series_equal(combined["B"], fcopy["B"]) + tm.assert_series_equal(combined["C"], fcopy2["C"]) + tm.assert_series_equal(combined["D"], fcopy["D"]) + + # overlap + head, tail = reordered_frame[:10].copy(), reordered_frame + head["A"] = 1 + + combined = head.combine_first(tail) + assert (combined["A"][:10] == 1).all() + + # reverse overlap + tail["A"][:10] = 0 + combined = tail.combine_first(head) + assert (combined["A"][:10] == 0).all() + + # no overlap + f = float_frame[:10] + g = float_frame[10:] + combined = f.combine_first(g) + tm.assert_series_equal(combined["A"].reindex(f.index), f["A"]) + tm.assert_series_equal(combined["A"].reindex(g.index), g["A"]) + + # corner cases + comb = float_frame.combine_first(DataFrame()) + tm.assert_frame_equal(comb, float_frame) + + comb = DataFrame().combine_first(float_frame) + tm.assert_frame_equal(comb, float_frame) + + comb = float_frame.combine_first(DataFrame(index=["faz", "boo"])) + assert "faz" in comb.index + + # #2525 + df = DataFrame({"a": [1]}, index=[datetime(2012, 1, 1)]) + df2 = DataFrame(columns=["b"]) + result = df.combine_first(df2) + assert "b" in result + + def test_combine_first_mixed_bug(self): + idx = Index(["a", "b", "c", "e"]) + ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx) + ser2 = Series(["a", "b", "c", "e"], index=idx) + ser3 = Series([12, 4, 5, 97], index=idx) + + frame1 = DataFrame({"col0": ser1, "col2": ser2, "col3": ser3}) + + idx = Index(["a", "b", "c", "f"]) + ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx) + ser2 = Series(["a", "b", "c", "f"], index=idx) + ser3 = Series([12, 4, 5, 97], index=idx) + + frame2 = DataFrame({"col1": ser1, "col2": ser2, "col5": ser3}) + + combined = frame1.combine_first(frame2) + assert len(combined.columns) == 5 + + # gh 3016 (same as in update) + df = DataFrame( + [[1.0, 2.0, False, True], [4.0, 5.0, True, False]], + columns=["A", "B", "bool1", "bool2"], + ) + + other = DataFrame([[45, 45]], index=[0], columns=["A", "B"]) + result = df.combine_first(other) + tm.assert_frame_equal(result, df) + + df.loc[0, "A"] = np.nan + result = df.combine_first(other) + df.loc[0, "A"] = 45 + tm.assert_frame_equal(result, df) + + # doc example + df1 = DataFrame( + {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]} + ) + + df2 = DataFrame( + { + "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0], + "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0], + } + ) + + result = df1.combine_first(df2) + expected = DataFrame({"A": [1, 2, 3, 5, 3, 7.0], "B": [np.nan, 2, 3, 4, 6, 8]}) + tm.assert_frame_equal(result, expected) + + # GH3552, return object dtype with bools + df1 = DataFrame( + [[np.nan, 3.0, True], [-4.6, np.nan, True], [np.nan, 7.0, False]] + ) + df2 = DataFrame([[-42.6, np.nan, True], [-5.0, 1.6, False]], index=[1, 2]) + + result = df1.combine_first(df2)[2] + expected = Series([True, True, False], name=2) + tm.assert_series_equal(result, expected) + + # GH 3593, converting datetime64[ns] incorrectly + df0 = DataFrame( + {"a": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]} + ) + df1 = DataFrame({"a": [None, None, None]}) + df2 = df1.combine_first(df0) + tm.assert_frame_equal(df2, df0) + + df2 = df0.combine_first(df1) + tm.assert_frame_equal(df2, df0) + + df0 = DataFrame( + {"a": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]} + ) + df1 = DataFrame({"a": [datetime(2000, 1, 2), None, None]}) + df2 = df1.combine_first(df0) + result = df0.copy() + result.iloc[0, :] = df1.iloc[0, :] + tm.assert_frame_equal(df2, result) + + df2 = df0.combine_first(df1) + tm.assert_frame_equal(df2, df0) + + def test_combine_first_align_nan(self): + # GH 7509 (not fixed) + dfa = pd.DataFrame([[pd.Timestamp("2011-01-01"), 2]], columns=["a", "b"]) + dfb = pd.DataFrame([[4], [5]], columns=["b"]) + assert dfa["a"].dtype == "datetime64[ns]" + assert dfa["b"].dtype == "int64" + + res = dfa.combine_first(dfb) + exp = pd.DataFrame( + {"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2.0, 5.0]}, + columns=["a", "b"], + ) + tm.assert_frame_equal(res, exp) + assert res["a"].dtype == "datetime64[ns]" + # ToDo: this must be int64 + assert res["b"].dtype == "float64" + + res = dfa.iloc[:0].combine_first(dfb) + exp = pd.DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"]) + tm.assert_frame_equal(res, exp) + # ToDo: this must be datetime64 + assert res["a"].dtype == "float64" + # ToDo: this must be int64 + assert res["b"].dtype == "int64" + + def test_combine_first_timezone(self): + # see gh-7630 + data1 = pd.to_datetime("20100101 01:01").tz_localize("UTC") + df1 = pd.DataFrame( + columns=["UTCdatetime", "abc"], + data=data1, + index=pd.date_range("20140627", periods=1), + ) + data2 = pd.to_datetime("20121212 12:12").tz_localize("UTC") + df2 = pd.DataFrame( + columns=["UTCdatetime", "xyz"], + data=data2, + index=pd.date_range("20140628", periods=1), + ) + res = df2[["UTCdatetime"]].combine_first(df1) + exp = pd.DataFrame( + { + "UTCdatetime": [ + pd.Timestamp("2010-01-01 01:01", tz="UTC"), + pd.Timestamp("2012-12-12 12:12", tz="UTC"), + ], + "abc": [pd.Timestamp("2010-01-01 01:01:00", tz="UTC"), pd.NaT], + }, + columns=["UTCdatetime", "abc"], + index=pd.date_range("20140627", periods=2, freq="D"), + ) + tm.assert_frame_equal(res, exp) + assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]" + assert res["abc"].dtype == "datetime64[ns, UTC]" + + # see gh-10567 + dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="UTC") + df1 = pd.DataFrame({"DATE": dts1}) + dts2 = pd.date_range("2015-01-03", "2015-01-05", tz="UTC") + df2 = pd.DataFrame({"DATE": dts2}) + + res = df1.combine_first(df2) + tm.assert_frame_equal(res, df1) + assert res["DATE"].dtype == "datetime64[ns, UTC]" + + dts1 = pd.DatetimeIndex( + ["2011-01-01", "NaT", "2011-01-03", "2011-01-04"], tz="US/Eastern" + ) + df1 = pd.DataFrame({"DATE": dts1}, index=[1, 3, 5, 7]) + dts2 = pd.DatetimeIndex( + ["2012-01-01", "2012-01-02", "2012-01-03"], tz="US/Eastern" + ) + df2 = pd.DataFrame({"DATE": dts2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = pd.DatetimeIndex( + [ + "2011-01-01", + "2012-01-01", + "NaT", + "2012-01-02", + "2011-01-03", + "2011-01-04", + ], + tz="US/Eastern", + ) + exp = pd.DataFrame({"DATE": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + + # different tz + dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="US/Eastern") + df1 = pd.DataFrame({"DATE": dts1}) + dts2 = pd.date_range("2015-01-03", "2015-01-05") + df2 = pd.DataFrame({"DATE": dts2}) + + # if df1 doesn't have NaN, keep its dtype + res = df1.combine_first(df2) + tm.assert_frame_equal(res, df1) + assert res["DATE"].dtype == "datetime64[ns, US/Eastern]" + + dts1 = pd.date_range("2015-01-01", "2015-01-02", tz="US/Eastern") + df1 = pd.DataFrame({"DATE": dts1}) + dts2 = pd.date_range("2015-01-01", "2015-01-03") + df2 = pd.DataFrame({"DATE": dts2}) + + res = df1.combine_first(df2) + exp_dts = [ + pd.Timestamp("2015-01-01", tz="US/Eastern"), + pd.Timestamp("2015-01-02", tz="US/Eastern"), + pd.Timestamp("2015-01-03"), + ] + exp = pd.DataFrame({"DATE": exp_dts}) + tm.assert_frame_equal(res, exp) + assert res["DATE"].dtype == "object" + + def test_combine_first_timedelta(self): + data1 = pd.TimedeltaIndex(["1 day", "NaT", "3 day", "4day"]) + df1 = pd.DataFrame({"TD": data1}, index=[1, 3, 5, 7]) + data2 = pd.TimedeltaIndex(["10 day", "11 day", "12 day"]) + df2 = pd.DataFrame({"TD": data2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = pd.TimedeltaIndex( + ["1 day", "10 day", "NaT", "11 day", "3 day", "4 day"] + ) + exp = pd.DataFrame({"TD": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + assert res["TD"].dtype == "timedelta64[ns]" + + def test_combine_first_period(self): + data1 = pd.PeriodIndex(["2011-01", "NaT", "2011-03", "2011-04"], freq="M") + df1 = pd.DataFrame({"P": data1}, index=[1, 3, 5, 7]) + data2 = pd.PeriodIndex(["2012-01-01", "2012-02", "2012-03"], freq="M") + df2 = pd.DataFrame({"P": data2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = pd.PeriodIndex( + ["2011-01", "2012-01", "NaT", "2012-02", "2011-03", "2011-04"], freq="M" + ) + exp = pd.DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + assert res["P"].dtype == data1.dtype + + # different freq + dts2 = pd.PeriodIndex(["2012-01-01", "2012-01-02", "2012-01-03"], freq="D") + df2 = pd.DataFrame({"P": dts2}, index=[2, 4, 5]) + + res = df1.combine_first(df2) + exp_dts = [ + pd.Period("2011-01", freq="M"), + pd.Period("2012-01-01", freq="D"), + pd.NaT, + pd.Period("2012-01-02", freq="D"), + pd.Period("2011-03", freq="M"), + pd.Period("2011-04", freq="M"), + ] + exp = pd.DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7]) + tm.assert_frame_equal(res, exp) + assert res["P"].dtype == "object" + + def test_combine_first_int(self): + # GH14687 - integer series that do no align exactly + + df1 = pd.DataFrame({"a": [0, 1, 3, 5]}, dtype="int64") + df2 = pd.DataFrame({"a": [1, 4]}, dtype="int64") + + res = df1.combine_first(df2) + tm.assert_frame_equal(res, df1) + assert res["a"].dtype == "int64" + + @pytest.mark.parametrize("val", [1, 1.0]) + def test_combine_first_with_asymmetric_other(self, val): + # see gh-20699 + df1 = pd.DataFrame({"isNum": [val]}) + df2 = pd.DataFrame({"isBool": [True]}) + + res = df1.combine_first(df2) + exp = pd.DataFrame({"isBool": [True], "isNum": [val]}) + + tm.assert_frame_equal(res, exp) + + def test_concat_datetime_datetime64_frame(self): + # #2624 + rows = [] + rows.append([datetime(2010, 1, 1), 1]) + rows.append([datetime(2010, 1, 2), "hi"]) + + df2_obj = DataFrame.from_records(rows, columns=["date", "test"]) + + ind = date_range(start="2000/1/1", freq="D", periods=10) + df1 = DataFrame({"date": ind, "test": range(10)}) + + # it works! + pd.concat([df1, df2_obj]) + + +class TestDataFrameUpdate: + def test_update_nan(self): + # #15593 #15617 + # test 1 + df1 = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)}) + df2 = DataFrame({"A": [None, 2, 3]}) + expected = df1.copy() + df1.update(df2, overwrite=False) + + tm.assert_frame_equal(df1, expected) + + # test 2 + df1 = DataFrame({"A": [1.0, None, 3], "B": date_range("2000", periods=3)}) + df2 = DataFrame({"A": [None, 2, 3]}) + expected = DataFrame({"A": [1.0, 2, 3], "B": date_range("2000", periods=3)}) + df1.update(df2, overwrite=False) + + tm.assert_frame_equal(df1, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py new file mode 100644 index 00000000..61af0209 --- /dev/null +++ b/pandas/tests/frame/test_constructors.py @@ -0,0 +1,2572 @@ +from collections import OrderedDict, abc +from datetime import date, datetime, timedelta +import functools +import itertools + +import numpy as np +import numpy.ma as ma +import numpy.ma.mrecords as mrecords +import pytest + +from pandas.compat import is_platform_little_endian +from pandas.compat.numpy import _is_numpy_dev + +from pandas.core.dtypes.common import is_integer_dtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + RangeIndex, + Series, + Timedelta, + Timestamp, + date_range, + isna, +) +import pandas._testing as tm +from pandas.arrays import IntervalArray, PeriodArray, SparseArray +from pandas.core.construction import create_series_with_explicit_dtype + +MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"] +MIXED_INT_DTYPES = [ + "uint8", + "uint16", + "uint32", + "uint64", + "int8", + "int16", + "int32", + "int64", +] + + +class TestDataFrameConstructors: + def test_series_with_name_not_matching_column(self): + # GH#9232 + x = pd.Series(range(5), name=1) + y = pd.Series(range(5), name=0) + + result = pd.DataFrame(x, columns=[0]) + expected = pd.DataFrame([], columns=[0]) + tm.assert_frame_equal(result, expected) + + result = pd.DataFrame(y, columns=[1]) + expected = pd.DataFrame([], columns=[1]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "constructor", + [ + lambda: DataFrame(), + lambda: DataFrame(None), + lambda: DataFrame({}), + lambda: DataFrame(()), + lambda: DataFrame([]), + lambda: DataFrame((_ for _ in [])), + lambda: DataFrame(range(0)), + lambda: DataFrame(data=None), + lambda: DataFrame(data={}), + lambda: DataFrame(data=()), + lambda: DataFrame(data=[]), + lambda: DataFrame(data=(_ for _ in [])), + lambda: DataFrame(data=range(0)), + ], + ) + def test_empty_constructor(self, constructor): + expected = DataFrame() + result = constructor() + assert len(result.index) == 0 + assert len(result.columns) == 0 + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "emptylike,expected_index,expected_columns", + [ + ([[]], RangeIndex(1), RangeIndex(0)), + ([[], []], RangeIndex(2), RangeIndex(0)), + ([(_ for _ in [])], RangeIndex(1), RangeIndex(0)), + ], + ) + def test_emptylike_constructor(self, emptylike, expected_index, expected_columns): + expected = DataFrame(index=expected_index, columns=expected_columns) + result = DataFrame(emptylike) + tm.assert_frame_equal(result, expected) + + def test_constructor_mixed(self, float_string_frame): + index, data = tm.getMixedTypeDict() + + # TODO(wesm), incomplete test? + indexed_frame = DataFrame(data, index=index) # noqa + unindexed_frame = DataFrame(data) # noqa + + assert float_string_frame["foo"].dtype == np.object_ + + def test_constructor_cast_failure(self): + foo = DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64) + assert foo["a"].dtype == object + + # GH 3010, constructing with odd arrays + df = DataFrame(np.ones((4, 2))) + + # this is ok + df["foo"] = np.ones((4, 2)).tolist() + + # this is not ok + msg = "Wrong number of items passed 2, placement implies 1" + with pytest.raises(ValueError, match=msg): + df["test"] = np.ones((4, 2)) + + # this is ok + df["foo2"] = np.ones((4, 2)).tolist() + + def test_constructor_dtype_copy(self): + orig_df = DataFrame({"col1": [1.0], "col2": [2.0], "col3": [3.0]}) + + new_df = pd.DataFrame(orig_df, dtype=float, copy=True) + + new_df["col1"] = 200.0 + assert orig_df["col1"][0] == 1.0 + + def test_constructor_dtype_nocast_view(self): + df = DataFrame([[1, 2]]) + should_be_view = DataFrame(df, dtype=df[0].dtype) + should_be_view[0][0] = 99 + assert df.values[0, 0] == 99 + + should_be_view = DataFrame(df.values, dtype=df[0].dtype) + should_be_view[0][0] = 97 + assert df.values[0, 0] == 97 + + def test_constructor_dtype_list_data(self): + df = DataFrame([[1, "2"], [None, "a"]], dtype=object) + assert df.loc[1, 0] is None + assert df.loc[0, 1] == "2" + + @pytest.mark.xfail(_is_numpy_dev, reason="Interprets list of frame as 3D") + def test_constructor_list_frames(self): + # see gh-3243 + result = DataFrame([DataFrame()]) + assert result.shape == (1, 0) + + result = DataFrame([DataFrame(dict(A=np.arange(5)))]) + assert isinstance(result.iloc[0, 0], DataFrame) + + def test_constructor_mixed_dtypes(self): + def _make_mixed_dtypes_df(typ, ad=None): + + if typ == "int": + dtypes = MIXED_INT_DTYPES + arrays = [np.array(np.random.rand(10), dtype=d) for d in dtypes] + elif typ == "float": + dtypes = MIXED_FLOAT_DTYPES + arrays = [ + np.array(np.random.randint(10, size=10), dtype=d) for d in dtypes + ] + + for d, a in zip(dtypes, arrays): + assert a.dtype == d + if ad is None: + ad = dict() + ad.update({d: a for d, a in zip(dtypes, arrays)}) + return DataFrame(ad) + + def _check_mixed_dtypes(df, dtypes=None): + if dtypes is None: + dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES + for d in dtypes: + if d in df: + assert df.dtypes[d] == d + + # mixed floating and integer coexist in the same frame + df = _make_mixed_dtypes_df("float") + _check_mixed_dtypes(df) + + # add lots of types + df = _make_mixed_dtypes_df("float", dict(A=1, B="foo", C="bar")) + _check_mixed_dtypes(df) + + # GH 622 + df = _make_mixed_dtypes_df("int") + _check_mixed_dtypes(df) + + def test_constructor_complex_dtypes(self): + # GH10952 + a = np.random.rand(10).astype(np.complex64) + b = np.random.rand(10).astype(np.complex128) + + df = DataFrame({"a": a, "b": b}) + assert a.dtype == df.a.dtype + assert b.dtype == df.b.dtype + + def test_constructor_dtype_str_na_values(self, string_dtype): + # https://github.com/pandas-dev/pandas/issues/21083 + df = DataFrame({"A": ["x", None]}, dtype=string_dtype) + result = df.isna() + expected = DataFrame({"A": [False, True]}) + tm.assert_frame_equal(result, expected) + assert df.iloc[1, 0] is None + + df = DataFrame({"A": ["x", np.nan]}, dtype=string_dtype) + assert np.isnan(df.iloc[1, 0]) + + def test_constructor_rec(self, float_frame): + rec = float_frame.to_records(index=False) + rec.dtype.names = list(rec.dtype.names)[::-1] + + index = float_frame.index + + df = DataFrame(rec) + tm.assert_index_equal(df.columns, pd.Index(rec.dtype.names)) + + df2 = DataFrame(rec, index=index) + tm.assert_index_equal(df2.columns, pd.Index(rec.dtype.names)) + tm.assert_index_equal(df2.index, index) + + rng = np.arange(len(rec))[::-1] + df3 = DataFrame(rec, index=rng, columns=["C", "B"]) + expected = DataFrame(rec, index=rng).reindex(columns=["C", "B"]) + tm.assert_frame_equal(df3, expected) + + def test_constructor_bool(self): + df = DataFrame({0: np.ones(10, dtype=bool), 1: np.zeros(10, dtype=bool)}) + assert df.values.dtype == np.bool_ + + def test_constructor_overflow_int64(self): + # see gh-14881 + values = np.array([2 ** 64 - i for i in range(1, 10)], dtype=np.uint64) + + result = DataFrame({"a": values}) + assert result["a"].dtype == np.uint64 + + # see gh-2355 + data_scores = [ + (6311132704823138710, 273), + (2685045978526272070, 23), + (8921811264899370420, 45), + (17019687244989530680, 270), + (9930107427299601010, 273), + ] + dtype = [("uid", "u8"), ("score", "u8")] + data = np.zeros((len(data_scores),), dtype=dtype) + data[:] = data_scores + df_crawls = DataFrame(data) + assert df_crawls["uid"].dtype == np.uint64 + + @pytest.mark.parametrize( + "values", + [ + np.array([2 ** 64], dtype=object), + np.array([2 ** 65]), + [2 ** 64 + 1], + np.array([-(2 ** 63) - 4], dtype=object), + np.array([-(2 ** 64) - 1]), + [-(2 ** 65) - 2], + ], + ) + def test_constructor_int_overflow(self, values): + # see gh-18584 + value = values[0] + result = DataFrame(values) + + assert result[0].dtype == object + assert result[0][0] == value + + def test_constructor_ordereddict(self): + import random + + nitems = 100 + nums = list(range(nitems)) + random.shuffle(nums) + expected = ["A{i:d}".format(i=i) for i in nums] + df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems))) + assert expected == list(df.columns) + + def test_constructor_dict(self): + datetime_series = tm.makeTimeSeries(nper=30) + # test expects index shifted by 5 + datetime_series_short = tm.makeTimeSeries(nper=30)[5:] + + frame = DataFrame({"col1": datetime_series, "col2": datetime_series_short}) + + # col2 is padded with NaN + assert len(datetime_series) == 30 + assert len(datetime_series_short) == 25 + + tm.assert_series_equal(frame["col1"], datetime_series.rename("col1")) + + exp = pd.Series( + np.concatenate([[np.nan] * 5, datetime_series_short.values]), + index=datetime_series.index, + name="col2", + ) + tm.assert_series_equal(exp, frame["col2"]) + + frame = DataFrame( + {"col1": datetime_series, "col2": datetime_series_short}, + columns=["col2", "col3", "col4"], + ) + + assert len(frame) == len(datetime_series_short) + assert "col1" not in frame + assert isna(frame["col3"]).all() + + # Corner cases + assert len(DataFrame()) == 0 + + # mix dict and array, wrong size - no spec for which error should raise + # first + with pytest.raises(ValueError): + DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]}) + + # Length-one dict micro-optimization + frame = DataFrame({"A": {"1": 1, "2": 2}}) + tm.assert_index_equal(frame.index, pd.Index(["1", "2"])) + + # empty dict plus index + idx = Index([0, 1, 2]) + frame = DataFrame({}, index=idx) + assert frame.index is idx + + # empty dict with index and columns + idx = Index([0, 1, 2]) + frame = DataFrame({}, index=idx, columns=idx) + assert frame.index is idx + assert frame.columns is idx + assert len(frame._series) == 3 + + # with dict of empty list and Series + frame = DataFrame({"A": [], "B": []}, columns=["A", "B"]) + tm.assert_index_equal(frame.index, Index([], dtype=np.int64)) + + # GH 14381 + # Dict with None value + frame_none = DataFrame(dict(a=None), index=[0]) + frame_none_list = DataFrame(dict(a=[None]), index=[0]) + assert frame_none._get_value(0, "a") is None + assert frame_none_list._get_value(0, "a") is None + tm.assert_frame_equal(frame_none, frame_none_list) + + # GH10856 + # dict with scalar values should raise error, even if columns passed + msg = "If using all scalar values, you must pass an index" + with pytest.raises(ValueError, match=msg): + DataFrame({"a": 0.7}) + + with pytest.raises(ValueError, match=msg): + DataFrame({"a": 0.7}, columns=["a"]) + + @pytest.mark.parametrize("scalar", [2, np.nan, None, "D"]) + def test_constructor_invalid_items_unused(self, scalar): + # No error if invalid (scalar) value is in fact not used: + result = DataFrame({"a": scalar}, columns=["b"]) + expected = DataFrame(columns=["b"]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("value", [2, np.nan, None, float("nan")]) + def test_constructor_dict_nan_key(self, value): + # GH 18455 + cols = [1, value, 3] + idx = ["a", value] + values = [[0, 3], [1, 4], [2, 5]] + data = {cols[c]: Series(values[c], index=idx) for c in range(3)} + result = DataFrame(data).sort_values(1).sort_values("a", axis=1) + expected = DataFrame( + np.arange(6, dtype="int64").reshape(2, 3), index=idx, columns=cols + ) + tm.assert_frame_equal(result, expected) + + result = DataFrame(data, index=idx).sort_values("a", axis=1) + tm.assert_frame_equal(result, expected) + + result = DataFrame(data, index=idx, columns=cols) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("value", [np.nan, None, float("nan")]) + def test_constructor_dict_nan_tuple_key(self, value): + # GH 18455 + cols = Index([(11, 21), (value, 22), (13, value)]) + idx = Index([("a", value), (value, 2)]) + values = [[0, 3], [1, 4], [2, 5]] + data = {cols[c]: Series(values[c], index=idx) for c in range(3)} + result = DataFrame(data).sort_values((11, 21)).sort_values(("a", value), axis=1) + expected = DataFrame( + np.arange(6, dtype="int64").reshape(2, 3), index=idx, columns=cols + ) + tm.assert_frame_equal(result, expected) + + result = DataFrame(data, index=idx).sort_values(("a", value), axis=1) + tm.assert_frame_equal(result, expected) + + result = DataFrame(data, index=idx, columns=cols) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_order_insertion(self): + datetime_series = tm.makeTimeSeries(nper=30) + datetime_series_short = tm.makeTimeSeries(nper=25) + + # GH19018 + # initialization ordering: by insertion order if python>= 3.6 + d = {"b": datetime_series_short, "a": datetime_series} + frame = DataFrame(data=d) + expected = DataFrame(data=d, columns=list("ba")) + tm.assert_frame_equal(frame, expected) + + def test_constructor_multi_index(self): + # GH 4078 + # construction error with mi and all-nan frame + tuples = [(2, 3), (3, 3), (3, 3)] + mi = MultiIndex.from_tuples(tuples) + df = DataFrame(index=mi, columns=mi) + assert pd.isna(df).values.ravel().all() + + tuples = [(3, 3), (2, 3), (3, 3)] + mi = MultiIndex.from_tuples(tuples) + df = DataFrame(index=mi, columns=mi) + assert pd.isna(df).values.ravel().all() + + def test_constructor_2d_index(self): + # GH 25416 + # handling of 2d index in construction + df = pd.DataFrame([[1]], columns=[[1]], index=[1, 2]) + expected = pd.DataFrame( + [1, 1], + index=pd.Int64Index([1, 2], dtype="int64"), + columns=pd.MultiIndex(levels=[[1]], codes=[[0]]), + ) + tm.assert_frame_equal(df, expected) + + df = pd.DataFrame([[1]], columns=[[1]], index=[[1, 2]]) + expected = pd.DataFrame( + [1, 1], + index=pd.MultiIndex(levels=[[1, 2]], codes=[[0, 1]]), + columns=pd.MultiIndex(levels=[[1]], codes=[[0]]), + ) + tm.assert_frame_equal(df, expected) + + def test_constructor_error_msgs(self): + msg = "Empty data passed with indices specified." + # passing an empty array with columns specified. + with pytest.raises(ValueError, match=msg): + DataFrame(np.empty(0), columns=list("abc")) + + msg = "Mixing dicts with non-Series may lead to ambiguous ordering." + # mix dict and array, wrong size + with pytest.raises(ValueError, match=msg): + DataFrame({"A": {"a": "a", "b": "b"}, "B": ["a", "b", "c"]}) + + # wrong size ndarray, GH 3105 + msg = r"Shape of passed values is \(4, 3\), indices imply \(3, 3\)" + with pytest.raises(ValueError, match=msg): + DataFrame( + np.arange(12).reshape((4, 3)), + columns=["foo", "bar", "baz"], + index=pd.date_range("2000-01-01", periods=3), + ) + + arr = np.array([[4, 5, 6]]) + msg = r"Shape of passed values is \(1, 3\), indices imply \(1, 4\)" + with pytest.raises(ValueError, match=msg): + DataFrame(index=[0], columns=range(0, 4), data=arr) + + arr = np.array([4, 5, 6]) + msg = r"Shape of passed values is \(3, 1\), indices imply \(1, 4\)" + with pytest.raises(ValueError, match=msg): + DataFrame(index=[0], columns=range(0, 4), data=arr) + + # higher dim raise exception + with pytest.raises(ValueError, match="Must pass 2-d input"): + DataFrame(np.zeros((3, 3, 3)), columns=["A", "B", "C"], index=[1]) + + # wrong size axis labels + msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" + with pytest.raises(ValueError, match=msg): + DataFrame(np.random.rand(2, 3), columns=["A", "B", "C"], index=[1]) + + msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)" + with pytest.raises(ValueError, match=msg): + DataFrame(np.random.rand(2, 3), columns=["A", "B"], index=[1, 2]) + + # gh-26429 + msg = "2 columns passed, passed data had 10 columns" + with pytest.raises(ValueError, match=msg): + DataFrame((range(10), range(10, 20)), columns=("ones", "twos")) + + msg = "If using all scalar values, you must pass an index" + with pytest.raises(ValueError, match=msg): + DataFrame({"a": False, "b": True}) + + @pytest.mark.xfail(_is_numpy_dev, reason="Interprets embedded frame as 3D") + def test_constructor_with_embedded_frames(self): + + # embedded data frames + df1 = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}) + df2 = DataFrame([df1, df1 + 10]) + + df2.dtypes + str(df2) + + result = df2.loc[0, 0] + tm.assert_frame_equal(result, df1) + + result = df2.loc[1, 0] + tm.assert_frame_equal(result, df1 + 10) + + def test_constructor_subclass_dict(self, float_frame, dict_subclass): + # Test for passing dict subclass to constructor + data = { + "col1": dict_subclass((x, 10.0 * x) for x in range(10)), + "col2": dict_subclass((x, 20.0 * x) for x in range(10)), + } + df = DataFrame(data) + refdf = DataFrame({col: dict(val.items()) for col, val in data.items()}) + tm.assert_frame_equal(refdf, df) + + data = dict_subclass(data.items()) + df = DataFrame(data) + tm.assert_frame_equal(refdf, df) + + # try with defaultdict + from collections import defaultdict + + data = {} + float_frame["B"][:10] = np.nan + for k, v in float_frame.items(): + dct = defaultdict(dict) + dct.update(v.to_dict()) + data[k] = dct + frame = DataFrame(data) + expected = frame.reindex(index=float_frame.index) + tm.assert_frame_equal(float_frame, expected) + + def test_constructor_dict_block(self): + expected = np.array([[4.0, 3.0, 2.0, 1.0]]) + df = DataFrame( + {"d": [4.0], "c": [3.0], "b": [2.0], "a": [1.0]}, + columns=["d", "c", "b", "a"], + ) + tm.assert_numpy_array_equal(df.values, expected) + + def test_constructor_dict_cast(self): + # cast float tests + test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} + frame = DataFrame(test_data, dtype=float) + assert len(frame) == 3 + assert frame["B"].dtype == np.float64 + assert frame["A"].dtype == np.float64 + + frame = DataFrame(test_data) + assert len(frame) == 3 + assert frame["B"].dtype == np.object_ + assert frame["A"].dtype == np.float64 + + # can't cast to float + test_data = { + "A": dict(zip(range(20), tm.makeStringIndex(20))), + "B": dict(zip(range(15), np.random.randn(15))), + } + frame = DataFrame(test_data, dtype=float) + assert len(frame) == 20 + assert frame["A"].dtype == np.object_ + assert frame["B"].dtype == np.float64 + + def test_constructor_dict_dont_upcast(self): + d = {"Col1": {"Row1": "A String", "Row2": np.nan}} + df = DataFrame(d) + assert isinstance(df["Col1"]["Row2"], float) + + dm = DataFrame([[1, 2], ["a", "b"]], index=[1, 2], columns=[1, 2]) + assert isinstance(dm[1][1], int) + + def test_constructor_dict_of_tuples(self): + # GH #1491 + data = {"a": (1, 2, 3), "b": (4, 5, 6)} + + result = DataFrame(data) + expected = DataFrame({k: list(v) for k, v in data.items()}) + tm.assert_frame_equal(result, expected, check_dtype=False) + + def test_constructor_dict_of_ranges(self): + # GH 26356 + data = {"a": range(3), "b": range(3, 6)} + + result = DataFrame(data) + expected = DataFrame({"a": [0, 1, 2], "b": [3, 4, 5]}) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_of_iterators(self): + # GH 26349 + data = {"a": iter(range(3)), "b": reversed(range(3))} + + result = DataFrame(data) + expected = DataFrame({"a": [0, 1, 2], "b": [2, 1, 0]}) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_of_generators(self): + # GH 26349 + data = {"a": (i for i in (range(3))), "b": (i for i in reversed(range(3)))} + result = DataFrame(data) + expected = DataFrame({"a": [0, 1, 2], "b": [2, 1, 0]}) + tm.assert_frame_equal(result, expected) + + def test_constructor_dict_multiindex(self): + def check(result, expected): + return tm.assert_frame_equal( + result, + expected, + check_dtype=True, + check_index_type=True, + check_column_type=True, + check_names=True, + ) + + d = { + ("a", "a"): {("i", "i"): 0, ("i", "j"): 1, ("j", "i"): 2}, + ("b", "a"): {("i", "i"): 6, ("i", "j"): 5, ("j", "i"): 4}, + ("b", "c"): {("i", "i"): 7, ("i", "j"): 8, ("j", "i"): 9}, + } + _d = sorted(d.items()) + df = DataFrame(d) + expected = DataFrame( + [x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d]) + ).T + expected.index = MultiIndex.from_tuples(expected.index) + check(df, expected) + + d["z"] = {"y": 123.0, ("i", "i"): 111, ("i", "j"): 111, ("j", "i"): 111} + _d.insert(0, ("z", d["z"])) + expected = DataFrame( + [x[1] for x in _d], index=Index([x[0] for x in _d], tupleize_cols=False) + ).T + expected.index = Index(expected.index, tupleize_cols=False) + df = DataFrame(d) + df = df.reindex(columns=expected.columns, index=expected.index) + check(df, expected) + + def test_constructor_dict_datetime64_index(self): + # GH 10160 + dates_as_str = ["1984-02-19", "1988-11-06", "1989-12-03", "1990-03-15"] + + def create_data(constructor): + return {i: {constructor(s): 2 * i} for i, s in enumerate(dates_as_str)} + + data_datetime64 = create_data(np.datetime64) + data_datetime = create_data(lambda x: datetime.strptime(x, "%Y-%m-%d")) + data_Timestamp = create_data(Timestamp) + + expected = DataFrame( + [ + {0: 0, 1: None, 2: None, 3: None}, + {0: None, 1: 2, 2: None, 3: None}, + {0: None, 1: None, 2: 4, 3: None}, + {0: None, 1: None, 2: None, 3: 6}, + ], + index=[Timestamp(dt) for dt in dates_as_str], + ) + + result_datetime64 = DataFrame(data_datetime64) + result_datetime = DataFrame(data_datetime) + result_Timestamp = DataFrame(data_Timestamp) + tm.assert_frame_equal(result_datetime64, expected) + tm.assert_frame_equal(result_datetime, expected) + tm.assert_frame_equal(result_Timestamp, expected) + + def test_constructor_dict_timedelta64_index(self): + # GH 10160 + td_as_int = [1, 2, 3, 4] + + def create_data(constructor): + return {i: {constructor(s): 2 * i} for i, s in enumerate(td_as_int)} + + data_timedelta64 = create_data(lambda x: np.timedelta64(x, "D")) + data_timedelta = create_data(lambda x: timedelta(days=x)) + data_Timedelta = create_data(lambda x: Timedelta(x, "D")) + + expected = DataFrame( + [ + {0: 0, 1: None, 2: None, 3: None}, + {0: None, 1: 2, 2: None, 3: None}, + {0: None, 1: None, 2: 4, 3: None}, + {0: None, 1: None, 2: None, 3: 6}, + ], + index=[Timedelta(td, "D") for td in td_as_int], + ) + + result_timedelta64 = DataFrame(data_timedelta64) + result_timedelta = DataFrame(data_timedelta) + result_Timedelta = DataFrame(data_Timedelta) + tm.assert_frame_equal(result_timedelta64, expected) + tm.assert_frame_equal(result_timedelta, expected) + tm.assert_frame_equal(result_Timedelta, expected) + + def test_constructor_period(self): + # PeriodIndex + a = pd.PeriodIndex(["2012-01", "NaT", "2012-04"], freq="M") + b = pd.PeriodIndex(["2012-02-01", "2012-03-01", "NaT"], freq="D") + df = pd.DataFrame({"a": a, "b": b}) + assert df["a"].dtype == a.dtype + assert df["b"].dtype == b.dtype + + # list of periods + df = pd.DataFrame( + {"a": a.astype(object).tolist(), "b": b.astype(object).tolist()} + ) + assert df["a"].dtype == a.dtype + assert df["b"].dtype == b.dtype + + def test_nested_dict_frame_constructor(self): + rng = pd.period_range("1/1/2000", periods=5) + df = DataFrame(np.random.randn(10, 5), columns=rng) + + data = {} + for col in df.columns: + for row in df.index: + data.setdefault(col, {})[row] = df._get_value(row, col) + + result = DataFrame(data, columns=rng) + tm.assert_frame_equal(result, df) + + data = {} + for col in df.columns: + for row in df.index: + data.setdefault(row, {})[col] = df._get_value(row, col) + + result = DataFrame(data, index=rng).T + tm.assert_frame_equal(result, df) + + def _check_basic_constructor(self, empty): + # mat: 2d matrix with shape (3, 2) to input. empty - makes sized + # objects + mat = empty((2, 3), dtype=float) + # 2-D input + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + + # 1-D input + frame = DataFrame(empty((3,)), columns=["A"], index=[1, 2, 3]) + assert len(frame.index) == 3 + assert len(frame.columns) == 1 + + # cast type + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64) + assert frame.values.dtype == np.int64 + + # wrong size axis labels + msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" + with pytest.raises(ValueError, match=msg): + DataFrame(mat, columns=["A", "B", "C"], index=[1]) + msg = r"Shape of passed values is \(2, 3\), indices imply \(2, 2\)" + with pytest.raises(ValueError, match=msg): + DataFrame(mat, columns=["A", "B"], index=[1, 2]) + + # higher dim raise exception + with pytest.raises(ValueError, match="Must pass 2-d input"): + DataFrame(empty((3, 3, 3)), columns=["A", "B", "C"], index=[1]) + + # automatic labeling + frame = DataFrame(mat) + tm.assert_index_equal(frame.index, pd.Int64Index(range(2))) + tm.assert_index_equal(frame.columns, pd.Int64Index(range(3))) + + frame = DataFrame(mat, index=[1, 2]) + tm.assert_index_equal(frame.columns, pd.Int64Index(range(3))) + + frame = DataFrame(mat, columns=["A", "B", "C"]) + tm.assert_index_equal(frame.index, pd.Int64Index(range(2))) + + # 0-length axis + frame = DataFrame(empty((0, 3))) + assert len(frame.index) == 0 + + frame = DataFrame(empty((3, 0))) + assert len(frame.columns) == 0 + + def test_constructor_ndarray(self): + self._check_basic_constructor(np.ones) + + frame = DataFrame(["foo", "bar"], index=[0, 1], columns=["A"]) + assert len(frame) == 2 + + def test_constructor_maskedarray(self): + self._check_basic_constructor(ma.masked_all) + + # Check non-masked values + mat = ma.masked_all((2, 3), dtype=float) + mat[0, 0] = 1.0 + mat[1, 2] = 2.0 + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + assert 1.0 == frame["A"][1] + assert 2.0 == frame["C"][2] + + # what is this even checking?? + mat = ma.masked_all((2, 3), dtype=float) + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + assert np.all(~np.asarray(frame == frame)) + + def test_constructor_maskedarray_nonfloat(self): + # masked int promoted to float + mat = ma.masked_all((2, 3), dtype=int) + # 2-D input + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + assert np.all(~np.asarray(frame == frame)) + + # cast type + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.float64) + assert frame.values.dtype == np.float64 + + # Check non-masked values + mat2 = ma.copy(mat) + mat2[0, 0] = 1 + mat2[1, 2] = 2 + frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2]) + assert 1 == frame["A"][1] + assert 2 == frame["C"][2] + + # masked np.datetime64 stays (use NaT as null) + mat = ma.masked_all((2, 3), dtype="M8[ns]") + # 2-D input + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + assert isna(frame).values.all() + + # cast type + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=np.int64) + assert frame.values.dtype == np.int64 + + # Check non-masked values + mat2 = ma.copy(mat) + mat2[0, 0] = 1 + mat2[1, 2] = 2 + frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2]) + assert 1 == frame["A"].view("i8")[1] + assert 2 == frame["C"].view("i8")[2] + + # masked bool promoted to object + mat = ma.masked_all((2, 3), dtype=bool) + # 2-D input + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2]) + + assert len(frame.index) == 2 + assert len(frame.columns) == 3 + assert np.all(~np.asarray(frame == frame)) + + # cast type + frame = DataFrame(mat, columns=["A", "B", "C"], index=[1, 2], dtype=object) + assert frame.values.dtype == object + + # Check non-masked values + mat2 = ma.copy(mat) + mat2[0, 0] = True + mat2[1, 2] = False + frame = DataFrame(mat2, columns=["A", "B", "C"], index=[1, 2]) + assert frame["A"][1] is True + assert frame["C"][2] is False + + def test_constructor_maskedarray_hardened(self): + # Check numpy masked arrays with hard masks -- from GH24574 + mat_hard = ma.masked_all((2, 2), dtype=float).harden_mask() + result = pd.DataFrame(mat_hard, columns=["A", "B"], index=[1, 2]) + expected = pd.DataFrame( + {"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, + columns=["A", "B"], + index=[1, 2], + dtype=float, + ) + tm.assert_frame_equal(result, expected) + # Check case where mask is hard but no data are masked + mat_hard = ma.ones((2, 2), dtype=float).harden_mask() + result = pd.DataFrame(mat_hard, columns=["A", "B"], index=[1, 2]) + expected = pd.DataFrame( + {"A": [1.0, 1.0], "B": [1.0, 1.0]}, + columns=["A", "B"], + index=[1, 2], + dtype=float, + ) + tm.assert_frame_equal(result, expected) + + def test_constructor_maskedrecarray_dtype(self): + # Ensure constructor honors dtype + data = np.ma.array( + np.ma.zeros(5, dtype=[("date", "0 + df = DataFrame( + { + "a": 1.0, + "b": 2, + "c": "foo", + floatname: np.array([1.0] * 10, dtype=floatname), + intname: np.array([1] * 10, dtype=intname), + }, + index=np.arange(10), + ) + result = df.dtypes + expected = Series( + [np.dtype("float64")] + + [np.dtype("int64")] + + [np.dtype("object")] + + [np.dtype("float64")] + + [np.dtype(intname)], + index=["a", "b", "c", floatname, intname], + ) + tm.assert_series_equal(result, expected) + + # GH 2809 + ind = date_range(start="2000-01-01", freq="D", periods=10) + datetimes = [ts.to_pydatetime() for ts in ind] + datetime_s = Series(datetimes) + assert datetime_s.dtype == "M8[ns]" + + # GH 2810 + ind = date_range(start="2000-01-01", freq="D", periods=10) + datetimes = [ts.to_pydatetime() for ts in ind] + dates = [ts.date() for ts in ind] + df = DataFrame(datetimes, columns=["datetimes"]) + df["dates"] = dates + result = df.dtypes + expected = Series( + [np.dtype("datetime64[ns]"), np.dtype("object")], + index=["datetimes", "dates"], + ) + tm.assert_series_equal(result, expected) + + # GH 7594 + # don't coerce tz-aware + import pytz + + tz = pytz.timezone("US/Eastern") + dt = tz.localize(datetime(2012, 1, 1)) + + df = DataFrame({"End Date": dt}, index=[0]) + assert df.iat[0, 0] == dt + tm.assert_series_equal( + df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"}) + ) + + df = DataFrame([{"End Date": dt}]) + assert df.iat[0, 0] == dt + tm.assert_series_equal( + df.dtypes, Series({"End Date": "datetime64[ns, US/Eastern]"}) + ) + + # tz-aware (UTC and other tz's) + # GH 8411 + dr = date_range("20130101", periods=3) + df = DataFrame({"value": dr}) + assert df.iat[0, 0].tz is None + dr = date_range("20130101", periods=3, tz="UTC") + df = DataFrame({"value": dr}) + assert str(df.iat[0, 0].tz) == "UTC" + dr = date_range("20130101", periods=3, tz="US/Eastern") + df = DataFrame({"value": dr}) + assert str(df.iat[0, 0].tz) == "US/Eastern" + + # GH 7822 + # preserver an index with a tz on dict construction + i = date_range("1/1/2011", periods=5, freq="10s", tz="US/Eastern") + + expected = DataFrame({"a": i.to_series().reset_index(drop=True)}) + df = DataFrame() + df["a"] = i + tm.assert_frame_equal(df, expected) + + df = DataFrame({"a": i}) + tm.assert_frame_equal(df, expected) + + # multiples + i_no_tz = date_range("1/1/2011", periods=5, freq="10s") + df = DataFrame({"a": i, "b": i_no_tz}) + expected = DataFrame({"a": i.to_series().reset_index(drop=True), "b": i_no_tz}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "arr", + [ + np.array([None, None, None, None, datetime.now(), None]), + np.array([None, None, datetime.now(), None]), + [[np.datetime64("NaT")], [None]], + [[np.datetime64("NaT")], [pd.NaT]], + [[None], [np.datetime64("NaT")]], + [[None], [pd.NaT]], + [[pd.NaT], [np.datetime64("NaT")]], + [[pd.NaT], [None]], + ], + ) + def test_constructor_datetimes_with_nulls(self, arr): + # gh-15869, GH#11220 + result = DataFrame(arr).dtypes + expected = Series([np.dtype("datetime64[ns]")]) + tm.assert_series_equal(result, expected) + + def test_constructor_for_list_with_dtypes(self): + # test list of lists/ndarrays + df = DataFrame([np.arange(5) for x in range(5)]) + result = df.dtypes + expected = Series([np.dtype("int64")] * 5) + tm.assert_series_equal(result, expected) + + df = DataFrame([np.array(np.arange(5), dtype="int32") for x in range(5)]) + result = df.dtypes + expected = Series([np.dtype("int64")] * 5) + tm.assert_series_equal(result, expected) + + # overflow issue? (we always expecte int64 upcasting here) + df = DataFrame({"a": [2 ** 31, 2 ** 31 + 1]}) + assert df.dtypes.iloc[0] == np.dtype("int64") + + # GH #2751 (construction with no index specified), make sure we cast to + # platform values + df = DataFrame([1, 2]) + assert df.dtypes.iloc[0] == np.dtype("int64") + + df = DataFrame([1.0, 2.0]) + assert df.dtypes.iloc[0] == np.dtype("float64") + + df = DataFrame({"a": [1, 2]}) + assert df.dtypes.iloc[0] == np.dtype("int64") + + df = DataFrame({"a": [1.0, 2.0]}) + assert df.dtypes.iloc[0] == np.dtype("float64") + + df = DataFrame({"a": 1}, index=range(3)) + assert df.dtypes.iloc[0] == np.dtype("int64") + + df = DataFrame({"a": 1.0}, index=range(3)) + assert df.dtypes.iloc[0] == np.dtype("float64") + + # with object list + df = DataFrame( + { + "a": [1, 2, 4, 7], + "b": [1.2, 2.3, 5.1, 6.3], + "c": list("abcd"), + "d": [datetime(2000, 1, 1) for i in range(4)], + "e": [1.0, 2, 4.0, 7], + } + ) + result = df.dtypes + expected = Series( + [ + np.dtype("int64"), + np.dtype("float64"), + np.dtype("object"), + np.dtype("datetime64[ns]"), + np.dtype("float64"), + ], + index=list("abcde"), + ) + tm.assert_series_equal(result, expected) + + def test_constructor_frame_copy(self, float_frame): + cop = DataFrame(float_frame, copy=True) + cop["A"] = 5 + assert (cop["A"] == 5).all() + assert not (float_frame["A"] == 5).all() + + def test_constructor_ndarray_copy(self, float_frame): + df = DataFrame(float_frame.values) + + float_frame.values[5] = 5 + assert (df.values[5] == 5).all() + + df = DataFrame(float_frame.values, copy=True) + float_frame.values[6] = 6 + assert not (df.values[6] == 6).all() + + def test_constructor_series_copy(self, float_frame): + series = float_frame._series + + df = DataFrame({"A": series["A"]}) + df["A"][:] = 5 + + assert not (series["A"] == 5).all() + + def test_constructor_with_nas(self): + # GH 5016 + # na's in indices + + def check(df): + for i in range(len(df.columns)): + df.iloc[:, i] + + indexer = np.arange(len(df.columns))[isna(df.columns)] + + # No NaN found -> error + if len(indexer) == 0: + msg = ( + "cannot do label indexing on" + r" " + r" with these indexers \[nan\] of " + ) + with pytest.raises(TypeError, match=msg): + df.loc[:, np.nan] + # single nan should result in Series + elif len(indexer) == 1: + tm.assert_series_equal(df.iloc[:, indexer[0]], df.loc[:, np.nan]) + # multiple nans should result in DataFrame + else: + tm.assert_frame_equal(df.iloc[:, indexer], df.loc[:, np.nan]) + + df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan]) + check(df) + + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1.1, 2.2, np.nan]) + check(df) + + df = DataFrame([[0, 1, 2, 3], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan]) + check(df) + + df = DataFrame( + [[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1.1, 2.2, np.nan] + ) + check(df) + + # GH 21428 (non-unique columns) + df = DataFrame([[0.0, 1, 2, 3.0], [4, 5, 6, 7]], columns=[np.nan, 1, 2, 2]) + check(df) + + def test_constructor_lists_to_object_dtype(self): + # from #1074 + d = DataFrame({"a": [np.nan, False]}) + assert d["a"].dtype == np.object_ + assert not d["a"][1] + + def test_constructor_categorical(self): + + # GH8626 + + # dict creation + df = DataFrame({"A": list("abc")}, dtype="category") + expected = Series(list("abc"), dtype="category", name="A") + tm.assert_series_equal(df["A"], expected) + + # to_frame + s = Series(list("abc"), dtype="category") + result = s.to_frame() + expected = Series(list("abc"), dtype="category", name=0) + tm.assert_series_equal(result[0], expected) + result = s.to_frame(name="foo") + expected = Series(list("abc"), dtype="category", name="foo") + tm.assert_series_equal(result["foo"], expected) + + # list-like creation + df = DataFrame(list("abc"), dtype="category") + expected = Series(list("abc"), dtype="category", name=0) + tm.assert_series_equal(df[0], expected) + + # ndim != 1 + df = DataFrame([Categorical(list("abc"))]) + expected = DataFrame({0: Series(list("abc"), dtype="category")}) + tm.assert_frame_equal(df, expected) + + df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))]) + expected = DataFrame( + { + 0: Series(list("abc"), dtype="category"), + 1: Series(list("abd"), dtype="category"), + }, + columns=[0, 1], + ) + tm.assert_frame_equal(df, expected) + + # mixed + df = DataFrame([Categorical(list("abc")), list("def")]) + expected = DataFrame( + {0: Series(list("abc"), dtype="category"), 1: list("def")}, columns=[0, 1] + ) + tm.assert_frame_equal(df, expected) + + # invalid (shape) + msg = r"Shape of passed values is \(6, 2\), indices imply \(3, 2\)" + with pytest.raises(ValueError, match=msg): + DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))]) + + # ndim > 1 + msg = "> 1 ndim Categorical are not supported at this time" + with pytest.raises(NotImplementedError, match=msg): + Categorical(np.array([list("abcd")])) + + def test_constructor_categorical_series(self): + + items = [1, 2, 3, 1] + exp = Series(items).astype("category") + res = Series(items, dtype="category") + tm.assert_series_equal(res, exp) + + items = ["a", "b", "c", "a"] + exp = Series(items).astype("category") + res = Series(items, dtype="category") + tm.assert_series_equal(res, exp) + + # insert into frame with different index + # GH 8076 + index = date_range("20000101", periods=3) + expected = Series( + Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"]) + ) + expected.index = index + + expected = DataFrame({"x": expected}) + df = DataFrame({"x": Series(["a", "b", "c"], dtype="category")}, index=index) + tm.assert_frame_equal(df, expected) + + def test_from_records_to_records(self): + # from numpy documentation + arr = np.zeros((2,), dtype=("i4,f4,a10")) + arr[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")] + + # TODO(wesm): unused + frame = DataFrame.from_records(arr) # noqa + + index = pd.Index(np.arange(len(arr))[::-1]) + indexed_frame = DataFrame.from_records(arr, index=index) + tm.assert_index_equal(indexed_frame.index, index) + + # without names, it should go to last ditch + arr2 = np.zeros((2, 3)) + tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2)) + + # wrong length + msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)" + with pytest.raises(ValueError, match=msg): + DataFrame.from_records(arr, index=index[:-1]) + + indexed_frame = DataFrame.from_records(arr, index="f1") + + # what to do? + records = indexed_frame.to_records() + assert len(records.dtype.names) == 3 + + records = indexed_frame.to_records(index=False) + assert len(records.dtype.names) == 2 + assert "index" not in records.dtype.names + + def test_from_records_nones(self): + tuples = [(1, 2, None, 3), (1, 2, None, 3), (None, 2, 5, 3)] + + df = DataFrame.from_records(tuples, columns=["a", "b", "c", "d"]) + assert np.isnan(df["c"][0]) + + def test_from_records_iterator(self): + arr = np.array( + [(1.0, 1.0, 2, 2), (3.0, 3.0, 4, 4), (5.0, 5.0, 6, 6), (7.0, 7.0, 8, 8)], + dtype=[ + ("x", np.float64), + ("u", np.float32), + ("y", np.int64), + ("z", np.int32), + ], + ) + df = DataFrame.from_records(iter(arr), nrows=2) + xp = DataFrame( + { + "x": np.array([1.0, 3.0], dtype=np.float64), + "u": np.array([1.0, 3.0], dtype=np.float32), + "y": np.array([2, 4], dtype=np.int64), + "z": np.array([2, 4], dtype=np.int32), + } + ) + tm.assert_frame_equal(df.reindex_like(xp), xp) + + # no dtypes specified here, so just compare with the default + arr = [(1.0, 2), (3.0, 4), (5.0, 6), (7.0, 8)] + df = DataFrame.from_records(iter(arr), columns=["x", "y"], nrows=2) + tm.assert_frame_equal(df, xp.reindex(columns=["x", "y"]), check_dtype=False) + + def test_from_records_tuples_generator(self): + def tuple_generator(length): + for i in range(length): + letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + yield (i, letters[i % len(letters)], i / length) + + columns_names = ["Integer", "String", "Float"] + columns = [ + [i[j] for i in tuple_generator(10)] for j in range(len(columns_names)) + ] + data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]} + expected = DataFrame(data, columns=columns_names) + + generator = tuple_generator(10) + result = DataFrame.from_records(generator, columns=columns_names) + tm.assert_frame_equal(result, expected) + + def test_from_records_lists_generator(self): + def list_generator(length): + for i in range(length): + letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + yield [i, letters[i % len(letters)], i / length] + + columns_names = ["Integer", "String", "Float"] + columns = [ + [i[j] for i in list_generator(10)] for j in range(len(columns_names)) + ] + data = {"Integer": columns[0], "String": columns[1], "Float": columns[2]} + expected = DataFrame(data, columns=columns_names) + + generator = list_generator(10) + result = DataFrame.from_records(generator, columns=columns_names) + tm.assert_frame_equal(result, expected) + + def test_from_records_columns_not_modified(self): + tuples = [(1, 2, 3), (1, 2, 3), (2, 5, 3)] + + columns = ["a", "b", "c"] + original_columns = list(columns) + + df = DataFrame.from_records(tuples, columns=columns, index="a") # noqa + + assert columns == original_columns + + def test_from_records_decimal(self): + from decimal import Decimal + + tuples = [(Decimal("1.5"),), (Decimal("2.5"),), (None,)] + + df = DataFrame.from_records(tuples, columns=["a"]) + assert df["a"].dtype == object + + df = DataFrame.from_records(tuples, columns=["a"], coerce_float=True) + assert df["a"].dtype == np.float64 + assert np.isnan(df["a"].values[-1]) + + def test_from_records_duplicates(self): + result = DataFrame.from_records([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"]) + + expected = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "a"]) + + tm.assert_frame_equal(result, expected) + + def test_from_records_set_index_name(self): + def create_dict(order_id): + return { + "order_id": order_id, + "quantity": np.random.randint(1, 10), + "price": np.random.randint(1, 10), + } + + documents = [create_dict(i) for i in range(10)] + # demo missing data + documents.append({"order_id": 10, "quantity": 5}) + + result = DataFrame.from_records(documents, index="order_id") + assert result.index.name == "order_id" + + # MultiIndex + result = DataFrame.from_records(documents, index=["order_id", "quantity"]) + assert result.index.names == ("order_id", "quantity") + + def test_from_records_misc_brokenness(self): + # #2179 + + data = {1: ["foo"], 2: ["bar"]} + + result = DataFrame.from_records(data, columns=["a", "b"]) + exp = DataFrame(data, columns=["a", "b"]) + tm.assert_frame_equal(result, exp) + + # overlap in index/index_names + + data = {"a": [1, 2, 3], "b": [4, 5, 6]} + + result = DataFrame.from_records(data, index=["a", "b", "c"]) + exp = DataFrame(data, index=["a", "b", "c"]) + tm.assert_frame_equal(result, exp) + + # GH 2623 + rows = [] + rows.append([datetime(2010, 1, 1), 1]) + rows.append([datetime(2010, 1, 2), "hi"]) # test col upconverts to obj + df2_obj = DataFrame.from_records(rows, columns=["date", "test"]) + result = df2_obj.dtypes + expected = Series( + [np.dtype("datetime64[ns]"), np.dtype("object")], index=["date", "test"] + ) + tm.assert_series_equal(result, expected) + + rows = [] + rows.append([datetime(2010, 1, 1), 1]) + rows.append([datetime(2010, 1, 2), 1]) + df2_obj = DataFrame.from_records(rows, columns=["date", "test"]) + result = df2_obj.dtypes + expected = Series( + [np.dtype("datetime64[ns]"), np.dtype("int64")], index=["date", "test"] + ) + tm.assert_series_equal(result, expected) + + def test_from_records_empty(self): + # 3562 + result = DataFrame.from_records([], columns=["a", "b", "c"]) + expected = DataFrame(columns=["a", "b", "c"]) + tm.assert_frame_equal(result, expected) + + result = DataFrame.from_records([], columns=["a", "b", "b"]) + expected = DataFrame(columns=["a", "b", "b"]) + tm.assert_frame_equal(result, expected) + + def test_from_records_empty_with_nonempty_fields_gh3682(self): + a = np.array([(1, 2)], dtype=[("id", np.int64), ("value", np.int64)]) + df = DataFrame.from_records(a, index="id") + tm.assert_index_equal(df.index, Index([1], name="id")) + assert df.index.name == "id" + tm.assert_index_equal(df.columns, Index(["value"])) + + b = np.array([], dtype=[("id", np.int64), ("value", np.int64)]) + df = DataFrame.from_records(b, index="id") + tm.assert_index_equal(df.index, Index([], name="id")) + assert df.index.name == "id" + + def test_from_records_with_datetimes(self): + + # this may fail on certain platforms because of a numpy issue + # related GH6140 + if not is_platform_little_endian(): + pytest.skip("known failure of test on non-little endian") + + # construction with a null in a recarray + # GH 6140 + expected = DataFrame({"EXPIRY": [datetime(2005, 3, 1, 0, 0), None]}) + + arrdata = [np.array([datetime(2005, 3, 1, 0, 0), None])] + dtypes = [("EXPIRY", " 0 + result = float_string_frame.dtypes + expected = Series( + {k: v.dtype for k, v in float_string_frame.items()}, index=result.index + ) + tm.assert_series_equal(result, expected) + + # compat, GH 8722 + with option_context("use_inf_as_na", True): + df = DataFrame([[1]]) + result = df.dtypes + tm.assert_series_equal(result, Series({0: np.dtype("int64")})) + + def test_astype_float(self, float_frame): + casted = float_frame.astype(int) + expected = DataFrame( + float_frame.values.astype(int), + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(casted, expected) + + casted = float_frame.astype(np.int32) + expected = DataFrame( + float_frame.values.astype(np.int32), + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(casted, expected) + + float_frame["foo"] = "5" + casted = float_frame.astype(int) + expected = DataFrame( + float_frame.values.astype(int), + index=float_frame.index, + columns=float_frame.columns, + ) + tm.assert_frame_equal(casted, expected) + + def test_astype_mixed_float(self, mixed_float_frame): + # mixed casting + casted = mixed_float_frame.reindex(columns=["A", "B"]).astype("float32") + _check_cast(casted, "float32") + + casted = mixed_float_frame.reindex(columns=["A", "B"]).astype("float16") + _check_cast(casted, "float16") + + def test_astype_mixed_type(self, mixed_type_frame): + # mixed casting + mn = mixed_type_frame._get_numeric_data().copy() + mn["little_float"] = np.array(12345.0, dtype="float16") + mn["big_float"] = np.array(123456789101112.0, dtype="float64") + + casted = mn.astype("float64") + _check_cast(casted, "float64") + + casted = mn.astype("int64") + _check_cast(casted, "int64") + + casted = mn.reindex(columns=["little_float"]).astype("float16") + _check_cast(casted, "float16") + + casted = mn.astype("float32") + _check_cast(casted, "float32") + + casted = mn.astype("int32") + _check_cast(casted, "int32") + + # to object + casted = mn.astype("O") + _check_cast(casted, "object") + + def test_astype_with_exclude_string(self, float_frame): + df = float_frame.copy() + expected = float_frame.astype(int) + df["string"] = "foo" + casted = df.astype(int, errors="ignore") + + expected["string"] = "foo" + tm.assert_frame_equal(casted, expected) + + df = float_frame.copy() + expected = float_frame.astype(np.int32) + df["string"] = "foo" + casted = df.astype(np.int32, errors="ignore") + + expected["string"] = "foo" + tm.assert_frame_equal(casted, expected) + + def test_astype_with_view_float(self, float_frame): + + # this is the only real reason to do it this way + tf = np.round(float_frame).astype(np.int32) + casted = tf.astype(np.float32, copy=False) + + # TODO(wesm): verification? + tf = float_frame.astype(np.float64) + casted = tf.astype(np.int64, copy=False) # noqa + + def test_astype_with_view_mixed_float(self, mixed_float_frame): + + tf = mixed_float_frame.reindex(columns=["A", "B", "C"]) + + casted = tf.astype(np.int64) + casted = tf.astype(np.float32) # noqa + + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) + @pytest.mark.parametrize("val", [np.nan, np.inf]) + def test_astype_cast_nan_inf_int(self, val, dtype): + # see gh-14265 + # + # Check NaN and inf --> raise error when converting to int. + msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" + df = DataFrame([val]) + + with pytest.raises(ValueError, match=msg): + df.astype(dtype) + + def test_astype_str(self): + # see gh-9757 + a = Series(date_range("2010-01-04", periods=5)) + b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern")) + c = Series([Timedelta(x, unit="d") for x in range(5)]) + d = Series(range(5)) + e = Series([0.0, 0.2, 0.4, 0.6, 0.8]) + + df = DataFrame({"a": a, "b": b, "c": c, "d": d, "e": e}) + + # Datetime-like + result = df.astype(str) + + expected = DataFrame( + { + "a": list(map(str, map(lambda x: Timestamp(x)._date_repr, a._values))), + "b": list(map(str, map(Timestamp, b._values))), + "c": list( + map( + str, + map(lambda x: Timedelta(x)._repr_base(format="all"), c._values), + ) + ), + "d": list(map(str, d._values)), + "e": list(map(str, e._values)), + } + ) + + tm.assert_frame_equal(result, expected) + + def test_astype_str_float(self): + # see gh-11302 + result = DataFrame([np.NaN]).astype(str) + expected = DataFrame(["nan"]) + + tm.assert_frame_equal(result, expected) + result = DataFrame([1.12345678901234567890]).astype(str) + + # < 1.14 truncates + # >= 1.14 preserves the full repr + val = "1.12345678901" if _np_version_under1p14 else "1.1234567890123457" + expected = DataFrame([val]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype_class", [dict, Series]) + def test_astype_dict_like(self, dtype_class): + # GH7271 & GH16717 + a = Series(date_range("2010-01-04", periods=5)) + b = Series(range(5)) + c = Series([0.0, 0.2, 0.4, 0.6, 0.8]) + d = Series(["1.0", "2", "3.14", "4", "5.4"]) + df = DataFrame({"a": a, "b": b, "c": c, "d": d}) + original = df.copy(deep=True) + + # change type of a subset of columns + dt1 = dtype_class({"b": "str", "d": "float32"}) + result = df.astype(dt1) + expected = DataFrame( + { + "a": a, + "b": Series(["0", "1", "2", "3", "4"]), + "c": c, + "d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float32"), + } + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(df, original) + + dt2 = dtype_class({"b": np.float32, "c": "float32", "d": np.float64}) + result = df.astype(dt2) + expected = DataFrame( + { + "a": a, + "b": Series([0.0, 1.0, 2.0, 3.0, 4.0], dtype="float32"), + "c": Series([0.0, 0.2, 0.4, 0.6, 0.8], dtype="float32"), + "d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float64"), + } + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(df, original) + + # change all columns + dt3 = dtype_class({"a": str, "b": str, "c": str, "d": str}) + tm.assert_frame_equal(df.astype(dt3), df.astype(str)) + tm.assert_frame_equal(df, original) + + # error should be raised when using something other than column labels + # in the keys of the dtype dict + dt4 = dtype_class({"b": str, 2: str}) + dt5 = dtype_class({"e": str}) + msg = "Only a column name can be used for the key in a dtype mappings argument" + with pytest.raises(KeyError, match=msg): + df.astype(dt4) + with pytest.raises(KeyError, match=msg): + df.astype(dt5) + tm.assert_frame_equal(df, original) + + # if the dtypes provided are the same as the original dtypes, the + # resulting DataFrame should be the same as the original DataFrame + dt6 = dtype_class({col: df[col].dtype for col in df.columns}) + equiv = df.astype(dt6) + tm.assert_frame_equal(df, equiv) + tm.assert_frame_equal(df, original) + + # GH 16717 + # if dtypes provided is empty, the resulting DataFrame + # should be the same as the original DataFrame + dt7 = dtype_class({}) if dtype_class is dict else dtype_class({}, dtype=object) + equiv = df.astype(dt7) + tm.assert_frame_equal(df, equiv) + tm.assert_frame_equal(df, original) + + def test_astype_duplicate_col(self): + a1 = Series([1, 2, 3, 4, 5], name="a") + b = Series([0.1, 0.2, 0.4, 0.6, 0.8], name="b") + a2 = Series([0, 1, 2, 3, 4], name="a") + df = concat([a1, b, a2], axis=1) + + result = df.astype(str) + a1_str = Series(["1", "2", "3", "4", "5"], dtype="str", name="a") + b_str = Series(["0.1", "0.2", "0.4", "0.6", "0.8"], dtype=str, name="b") + a2_str = Series(["0", "1", "2", "3", "4"], dtype="str", name="a") + expected = concat([a1_str, b_str, a2_str], axis=1) + tm.assert_frame_equal(result, expected) + + result = df.astype({"a": "str"}) + expected = concat([a1_str, b, a2_str], axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [ + "category", + CategoricalDtype(), + CategoricalDtype(ordered=True), + CategoricalDtype(ordered=False), + CategoricalDtype(categories=list("abcdef")), + CategoricalDtype(categories=list("edba"), ordered=False), + CategoricalDtype(categories=list("edcb"), ordered=True), + ], + ids=repr, + ) + def test_astype_categorical(self, dtype): + # GH 18099 + d = {"A": list("abbc"), "B": list("bccd"), "C": list("cdde")} + df = DataFrame(d) + result = df.astype(dtype) + expected = DataFrame({k: Categorical(d[k], dtype=dtype) for k in d}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("cls", [CategoricalDtype, DatetimeTZDtype, IntervalDtype]) + def test_astype_categoricaldtype_class_raises(self, cls): + df = DataFrame({"A": ["a", "a", "b", "c"]}) + xpr = "Expected an instance of {}".format(cls.__name__) + with pytest.raises(TypeError, match=xpr): + df.astype({"A": cls}) + + with pytest.raises(TypeError, match=xpr): + df["A"].astype(cls) + + def test_singlerow_slice_categoricaldtype_gives_series(self): + # GH29521 + df = pd.DataFrame({"x": pd.Categorical("a b c d e".split())}) + result = df.iloc[0] + raw_cat = pd.Categorical(["a"], categories=["a", "b", "c", "d", "e"]) + expected = pd.Series(raw_cat, index=["x"], name=0, dtype="category") + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["Int64", "Int32", "Int16"]) + def test_astype_extension_dtypes(self, dtype): + # GH 22578 + df = pd.DataFrame([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], columns=["a", "b"]) + + expected1 = pd.DataFrame( + { + "a": integer_array([1, 3, 5], dtype=dtype), + "b": integer_array([2, 4, 6], dtype=dtype), + } + ) + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) + tm.assert_frame_equal(df.astype(dtype).astype("float64"), df) + + df = pd.DataFrame([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], columns=["a", "b"]) + df["b"] = df["b"].astype(dtype) + expected2 = pd.DataFrame( + {"a": [1.0, 3.0, 5.0], "b": integer_array([2, 4, 6], dtype=dtype)} + ) + tm.assert_frame_equal(df, expected2) + + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) + + @pytest.mark.parametrize("dtype", ["Int64", "Int32", "Int16"]) + def test_astype_extension_dtypes_1d(self, dtype): + # GH 22578 + df = pd.DataFrame({"a": [1.0, 2.0, 3.0]}) + + expected1 = pd.DataFrame({"a": integer_array([1, 2, 3], dtype=dtype)}) + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) + + df = pd.DataFrame({"a": [1.0, 2.0, 3.0]}) + df["a"] = df["a"].astype(dtype) + expected2 = pd.DataFrame({"a": integer_array([1, 2, 3], dtype=dtype)}) + tm.assert_frame_equal(df, expected2) + + tm.assert_frame_equal(df.astype(dtype), expected1) + tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) + + @pytest.mark.parametrize("dtype", ["category", "Int64"]) + def test_astype_extension_dtypes_duplicate_col(self, dtype): + # GH 24704 + a1 = Series([0, np.nan, 4], name="a") + a2 = Series([np.nan, 3, 5], name="a") + df = concat([a1, a2], axis=1) + + result = df.astype(dtype) + expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("kwargs", [dict(), dict(other=None)]) + def test_df_where_with_category(self, kwargs): + # GH 16979 + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, True], [False, True, True]]) + + # change type to category + df.A = df.A.astype("category") + df.B = df.B.astype("category") + df.C = df.C.astype("category") + + result = df.A.where(mask[:, 0], **kwargs) + expected = Series(pd.Categorical([0, np.nan], categories=[0, 3]), name="A") + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", [{100: "float64", 200: "uint64"}, "category", "float64"] + ) + def test_astype_column_metadata(self, dtype): + # GH 19920 + columns = pd.UInt64Index([100, 200, 300], name="foo") + df = DataFrame(np.arange(15).reshape(5, 3), columns=columns) + df = df.astype(dtype) + tm.assert_index_equal(df.columns, columns) + + def test_df_where_change_dtype(self): + # GH 16979 + df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + mask = np.array([[True, False, False], [False, False, True]]) + + result = df.where(mask) + expected = DataFrame( + [[0, np.nan, np.nan], [np.nan, np.nan, 5]], columns=list("ABC") + ) + + tm.assert_frame_equal(result, expected) + + # change type to category + df.A = df.A.astype("category") + df.B = df.B.astype("category") + df.C = df.C.astype("category") + + result = df.where(mask) + A = pd.Categorical([0, np.nan], categories=[0, 3]) + B = pd.Categorical([np.nan, np.nan], categories=[1, 4]) + C = pd.Categorical([np.nan, 5], categories=[2, 5]) + expected = DataFrame({"A": A, "B": B, "C": C}) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_astype_from_datetimelike_to_objectt(self, dtype, unit): + # tests astype to object dtype + # gh-19223 / gh-12425 + dtype = "{}[{}]".format(dtype, unit) + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(object) + assert (result.dtypes == object).all() + + if dtype.startswith("M8"): + assert result.iloc[0, 0] == pd.to_datetime(1, unit=unit) + else: + assert result.iloc[0, 0] == pd.to_timedelta(1, unit=unit) + + @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit): + # tests all units from numeric origination + # gh-19223 / gh-12425 + dtype = "{}[{}]".format(dtype, unit) + arr = np.array([[1, 2, 3]], dtype=arr_dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_astype_to_datetime_unit(self, unit): + # tests all units from datetime origination + # gh-19223 + dtype = "M8[{}]".format(unit) + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["ns"]) + def test_astype_to_timedelta_unit_ns(self, unit): + # preserver the timedelta conversion + # gh-19223 + dtype = "m8[{}]".format(unit) + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(arr.astype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["us", "ms", "s", "h", "m", "D"]) + def test_astype_to_timedelta_unit(self, unit): + # coerce to float + # gh-19223 + dtype = "m8[{}]".format(unit) + arr = np.array([[1, 2, 3]], dtype=dtype) + df = DataFrame(arr) + result = df.astype(dtype) + expected = DataFrame(df.values.astype(dtype).astype(float)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_astype_to_incorrect_datetimelike(self, unit): + # trying to astype a m to a M, or vice-versa + # gh-19224 + dtype = "M8[{}]".format(unit) + other = "m8[{}]".format(unit) + + df = DataFrame(np.array([[1, 2, 3]], dtype=dtype)) + msg = ( + r"cannot astype a datetimelike from \[datetime64\[ns\]\] to" + r" \[timedelta64\[{}\]\]" + ).format(unit) + with pytest.raises(TypeError, match=msg): + df.astype(other) + + msg = ( + r"cannot astype a timedelta from \[timedelta64\[ns\]\] to" + r" \[datetime64\[{}\]\]" + ).format(unit) + df = DataFrame(np.array([[1, 2, 3]], dtype=other)) + with pytest.raises(TypeError, match=msg): + df.astype(dtype) + + def test_timedeltas(self): + df = DataFrame( + dict( + A=Series(date_range("2012-1-1", periods=3, freq="D")), + B=Series([timedelta(days=i) for i in range(3)]), + ) + ) + result = df.dtypes + expected = Series( + [np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")], index=list("AB") + ) + tm.assert_series_equal(result, expected) + + df["C"] = df["A"] + df["B"] + result = df.dtypes + expected = Series( + [ + np.dtype("datetime64[ns]"), + np.dtype("timedelta64[ns]"), + np.dtype("datetime64[ns]"), + ], + index=list("ABC"), + ) + tm.assert_series_equal(result, expected) + + # mixed int types + df["D"] = 1 + result = df.dtypes + expected = Series( + [ + np.dtype("datetime64[ns]"), + np.dtype("timedelta64[ns]"), + np.dtype("datetime64[ns]"), + np.dtype("int64"), + ], + index=list("ABCD"), + ) + tm.assert_series_equal(result, expected) + + def test_arg_for_errors_in_astype(self): + # issue #14878 + + df = DataFrame([1, 2, 3]) + + with pytest.raises(ValueError): + df.astype(np.float64, errors=True) + + df.astype(np.int8, errors="ignore") + + def test_arg_for_errors_in_astype_dictlist(self): + # GH-25905 + df = pd.DataFrame( + [ + {"a": "1", "b": "16.5%", "c": "test"}, + {"a": "2.2", "b": "15.3", "c": "another_test"}, + ] + ) + expected = pd.DataFrame( + [ + {"a": 1.0, "b": "16.5%", "c": "test"}, + {"a": 2.2, "b": "15.3", "c": "another_test"}, + ] + ) + type_dict = {"a": "float64", "b": "float64", "c": "object"} + + result = df.astype(dtype=type_dict, errors="ignore") + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "input_vals", + [ + ([1, 2]), + (["1", "2"]), + (list(pd.date_range("1/1/2011", periods=2, freq="H"))), + (list(pd.date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))), + ([pd.Interval(left=0, right=5)]), + ], + ) + def test_constructor_list_str(self, input_vals, string_dtype): + # GH 16605 + # Ensure that data elements are converted to strings when + # dtype is str, 'str', or 'U' + + result = DataFrame({"A": input_vals}, dtype=string_dtype) + expected = DataFrame({"A": input_vals}).astype({"A": string_dtype}) + tm.assert_frame_equal(result, expected) + + def test_constructor_list_str_na(self, string_dtype): + + result = DataFrame({"A": [1.0, 2.0, None]}, dtype=string_dtype) + expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "data, expected", + [ + # empty + (DataFrame(), True), + # multi-same + (DataFrame({"A": [1, 2], "B": [1, 2]}), True), + # multi-object + ( + DataFrame( + { + "A": np.array([1, 2], dtype=object), + "B": np.array(["a", "b"], dtype=object), + } + ), + True, + ), + # multi-extension + ( + DataFrame( + {"A": pd.Categorical(["a", "b"]), "B": pd.Categorical(["a", "b"])} + ), + True, + ), + # differ types + (DataFrame({"A": [1, 2], "B": [1.0, 2.0]}), False), + # differ sizes + ( + DataFrame( + { + "A": np.array([1, 2], dtype=np.int32), + "B": np.array([1, 2], dtype=np.int64), + } + ), + False, + ), + # multi-extension differ + ( + DataFrame( + {"A": pd.Categorical(["a", "b"]), "B": pd.Categorical(["b", "c"])} + ), + False, + ), + ], + ) + def test_is_homogeneous_type(self, data, expected): + assert data._is_homogeneous_type is expected + + def test_asarray_homogenous(self): + df = pd.DataFrame({"A": pd.Categorical([1, 2]), "B": pd.Categorical([1, 2])}) + result = np.asarray(df) + # may change from object in the future + expected = np.array([[1, 1], [2, 2]], dtype="object") + tm.assert_numpy_array_equal(result, expected) + + def test_str_to_small_float_conversion_type(self): + # GH 20388 + np.random.seed(13) + col_data = [str(np.random.random() * 1e-12) for _ in range(5)] + result = pd.DataFrame(col_data, columns=["A"]) + expected = pd.DataFrame(col_data, columns=["A"], dtype=object) + tm.assert_frame_equal(result, expected) + # change the dtype of the elements from object to float one by one + result.loc[result.index, "A"] = [float(x) for x in col_data] + expected = pd.DataFrame(col_data, columns=["A"], dtype=float) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")] + ) + def test_convert_dtypes(self, convert_integer, expected): + # Specific types are tested in tests/series/test_dtypes.py + # Just check that it works for DataFrame here + df = pd.DataFrame( + { + "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")), + "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")), + } + ) + result = df.convert_dtypes(True, True, convert_integer, False) + expected = pd.DataFrame( + { + "a": pd.Series([1, 2, 3], dtype=expected), + "b": pd.Series(["x", "y", "z"], dtype="string"), + } + ) + tm.assert_frame_equal(result, expected) + + +class TestDataFrameDatetimeWithTZ: + def test_interleave(self, timezone_frame): + + # interleave with object + result = timezone_frame.assign(D="foo").values + expected = np.array( + [ + [ + Timestamp("2013-01-01 00:00:00"), + Timestamp("2013-01-02 00:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + [ + Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), + pd.NaT, + Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), + ], + [ + Timestamp("2013-01-01 00:00:00+0100", tz="CET"), + pd.NaT, + Timestamp("2013-01-03 00:00:00+0100", tz="CET"), + ], + ["foo", "foo", "foo"], + ], + dtype=object, + ).T + tm.assert_numpy_array_equal(result, expected) + + # interleave with only datetime64[ns] + result = timezone_frame.values + expected = np.array( + [ + [ + Timestamp("2013-01-01 00:00:00"), + Timestamp("2013-01-02 00:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + [ + Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), + pd.NaT, + Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), + ], + [ + Timestamp("2013-01-01 00:00:00+0100", tz="CET"), + pd.NaT, + Timestamp("2013-01-03 00:00:00+0100", tz="CET"), + ], + ], + dtype=object, + ).T + tm.assert_numpy_array_equal(result, expected) + + def test_astype(self, timezone_frame): + # astype + expected = np.array( + [ + [ + Timestamp("2013-01-01 00:00:00"), + Timestamp("2013-01-02 00:00:00"), + Timestamp("2013-01-03 00:00:00"), + ], + [ + Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), + pd.NaT, + Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), + ], + [ + Timestamp("2013-01-01 00:00:00+0100", tz="CET"), + pd.NaT, + Timestamp("2013-01-03 00:00:00+0100", tz="CET"), + ], + ], + dtype=object, + ).T + expected = DataFrame( + expected, + index=timezone_frame.index, + columns=timezone_frame.columns, + dtype=object, + ) + result = timezone_frame.astype(object) + tm.assert_frame_equal(result, expected) + + result = timezone_frame.astype("datetime64[ns]") + expected = DataFrame( + { + "A": date_range("20130101", periods=3), + "B": ( + date_range("20130101", periods=3, tz="US/Eastern") + .tz_convert("UTC") + .tz_localize(None) + ), + "C": ( + date_range("20130101", periods=3, tz="CET") + .tz_convert("UTC") + .tz_localize(None) + ), + } + ) + expected.iloc[1, 1] = pd.NaT + expected.iloc[1, 2] = pd.NaT + tm.assert_frame_equal(result, expected) + + def test_astype_str(self, timezone_frame): + # str formatting + result = timezone_frame.astype(str) + expected = DataFrame( + [ + [ + "2013-01-01", + "2013-01-01 00:00:00-05:00", + "2013-01-01 00:00:00+01:00", + ], + ["2013-01-02", "NaT", "NaT"], + [ + "2013-01-03", + "2013-01-03 00:00:00-05:00", + "2013-01-03 00:00:00+01:00", + ], + ], + columns=timezone_frame.columns, + ) + tm.assert_frame_equal(result, expected) + + with option_context("display.max_columns", 20): + result = str(timezone_frame) + assert ( + "0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00" + ) in result + assert ( + "1 2013-01-02 NaT NaT" + ) in result + assert ( + "2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00" + ) in result diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py new file mode 100644 index 00000000..c6e28f3c --- /dev/null +++ b/pandas/tests/frame/test_join.py @@ -0,0 +1,218 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Index, period_range +import pandas._testing as tm + + +@pytest.fixture +def frame_with_period_index(): + return DataFrame( + data=np.arange(20).reshape(4, 5), + columns=list("abcde"), + index=period_range(start="2000", freq="A", periods=4), + ) + + +@pytest.fixture +def left(): + return DataFrame({"a": [20, 10, 0]}, index=[2, 1, 0]) + + +@pytest.fixture +def right(): + return DataFrame({"b": [300, 100, 200]}, index=[3, 1, 2]) + + +@pytest.mark.parametrize( + "how, sort, expected", + [ + ("inner", False, DataFrame({"a": [20, 10], "b": [200, 100]}, index=[2, 1])), + ("inner", True, DataFrame({"a": [10, 20], "b": [100, 200]}, index=[1, 2])), + ( + "left", + False, + DataFrame({"a": [20, 10, 0], "b": [200, 100, np.nan]}, index=[2, 1, 0]), + ), + ( + "left", + True, + DataFrame({"a": [0, 10, 20], "b": [np.nan, 100, 200]}, index=[0, 1, 2]), + ), + ( + "right", + False, + DataFrame({"a": [np.nan, 10, 20], "b": [300, 100, 200]}, index=[3, 1, 2]), + ), + ( + "right", + True, + DataFrame({"a": [10, 20, np.nan], "b": [100, 200, 300]}, index=[1, 2, 3]), + ), + ( + "outer", + False, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), + ), + ( + "outer", + True, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), + ), + ], +) +def test_join(left, right, how, sort, expected): + + result = left.join(right, how=how, sort=sort) + tm.assert_frame_equal(result, expected) + + +def test_join_index(float_frame): + # left / right + + f = float_frame.loc[float_frame.index[:10], ["A", "B"]] + f2 = float_frame.loc[float_frame.index[5:], ["C", "D"]].iloc[::-1] + + joined = f.join(f2) + tm.assert_index_equal(f.index, joined.index) + expected_columns = Index(["A", "B", "C", "D"]) + tm.assert_index_equal(joined.columns, expected_columns) + + joined = f.join(f2, how="left") + tm.assert_index_equal(joined.index, f.index) + tm.assert_index_equal(joined.columns, expected_columns) + + joined = f.join(f2, how="right") + tm.assert_index_equal(joined.index, f2.index) + tm.assert_index_equal(joined.columns, expected_columns) + + # inner + + joined = f.join(f2, how="inner") + tm.assert_index_equal(joined.index, f.index[5:10]) + tm.assert_index_equal(joined.columns, expected_columns) + + # outer + + joined = f.join(f2, how="outer") + tm.assert_index_equal(joined.index, float_frame.index.sort_values()) + tm.assert_index_equal(joined.columns, expected_columns) + + with pytest.raises(ValueError, match="join method"): + f.join(f2, how="foo") + + # corner case - overlapping columns + msg = "columns overlap but no suffix" + for how in ("outer", "left", "inner"): + with pytest.raises(ValueError, match=msg): + float_frame.join(float_frame, how=how) + + +def test_join_index_more(float_frame): + af = float_frame.loc[:, ["A", "B"]] + bf = float_frame.loc[::2, ["C", "D"]] + + expected = af.copy() + expected["C"] = float_frame["C"][::2] + expected["D"] = float_frame["D"][::2] + + result = af.join(bf) + tm.assert_frame_equal(result, expected) + + result = af.join(bf, how="right") + tm.assert_frame_equal(result, expected[::2]) + + result = bf.join(af, how="right") + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + +def test_join_index_series(float_frame): + df = float_frame.copy() + s = df.pop(float_frame.columns[-1]) + joined = df.join(s) + + # TODO should this check_names ? + tm.assert_frame_equal(joined, float_frame, check_names=False) + + s.name = None + with pytest.raises(ValueError, match="must have a name"): + df.join(s) + + +def test_join_overlap(float_frame): + df1 = float_frame.loc[:, ["A", "B", "C"]] + df2 = float_frame.loc[:, ["B", "C", "D"]] + + joined = df1.join(df2, lsuffix="_df1", rsuffix="_df2") + df1_suf = df1.loc[:, ["B", "C"]].add_suffix("_df1") + df2_suf = df2.loc[:, ["B", "C"]].add_suffix("_df2") + + no_overlap = float_frame.loc[:, ["A", "D"]] + expected = df1_suf.join(df2_suf).join(no_overlap) + + # column order not necessarily sorted + tm.assert_frame_equal(joined, expected.loc[:, joined.columns]) + + +def test_join_period_index(frame_with_period_index): + other = frame_with_period_index.rename(columns=lambda x: "{key}{key}".format(key=x)) + + joined_values = np.concatenate([frame_with_period_index.values] * 2, axis=1) + + joined_cols = frame_with_period_index.columns.append(other.columns) + + joined = frame_with_period_index.join(other) + expected = DataFrame( + data=joined_values, columns=joined_cols, index=frame_with_period_index.index + ) + + tm.assert_frame_equal(joined, expected) + + +def test_join_left_sequence_non_unique_index(): + # https://github.com/pandas-dev/pandas/issues/19607 + df1 = DataFrame({"a": [0, 10, 20]}, index=[1, 2, 3]) + df2 = DataFrame({"b": [100, 200, 300]}, index=[4, 3, 2]) + df3 = DataFrame({"c": [400, 500, 600]}, index=[2, 2, 4]) + + joined = df1.join([df2, df3], how="left") + + expected = DataFrame( + { + "a": [0, 10, 10, 20], + "b": [np.nan, 300, 300, 200], + "c": [np.nan, 400, 500, np.nan], + }, + index=[1, 2, 2, 3], + ) + + tm.assert_frame_equal(joined, expected) + + +@pytest.mark.parametrize("sort_kw", [True, False]) +def test_suppress_future_warning_with_sort_kw(sort_kw): + a = DataFrame({"col1": [1, 2]}, index=["c", "a"]) + + b = DataFrame({"col2": [4, 5]}, index=["b", "a"]) + + c = DataFrame({"col3": [7, 8]}, index=["a", "b"]) + + expected = DataFrame( + { + "col1": {"a": 2.0, "b": float("nan"), "c": 1.0}, + "col2": {"a": 5.0, "b": 4.0, "c": float("nan")}, + "col3": {"a": 7.0, "b": 8.0, "c": float("nan")}, + } + ) + if sort_kw is False: + expected = expected.reindex(index=["c", "a", "b"]) + + with tm.assert_produces_warning(None, check_stacklevel=False): + result = a.join([b, c], how="outer", sort=sort_kw) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py new file mode 100644 index 00000000..2e6759cb --- /dev/null +++ b/pandas/tests/frame/test_missing.py @@ -0,0 +1,985 @@ +import datetime + +import dateutil +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import Categorical, DataFrame, Series, Timestamp, date_range +import pandas._testing as tm +from pandas.tests.frame.common import _check_mixed_float + + +class TestDataFrameMissingData: + def test_dropEmptyRows(self, float_frame): + N = len(float_frame.index) + mat = np.random.randn(N) + mat[:5] = np.nan + + frame = DataFrame({"foo": mat}, index=float_frame.index) + original = Series(mat, index=float_frame.index, name="foo") + expected = original.dropna() + inplace_frame1, inplace_frame2 = frame.copy(), frame.copy() + + smaller_frame = frame.dropna(how="all") + # check that original was preserved + tm.assert_series_equal(frame["foo"], original) + inplace_frame1.dropna(how="all", inplace=True) + tm.assert_series_equal(smaller_frame["foo"], expected) + tm.assert_series_equal(inplace_frame1["foo"], expected) + + smaller_frame = frame.dropna(how="all", subset=["foo"]) + inplace_frame2.dropna(how="all", subset=["foo"], inplace=True) + tm.assert_series_equal(smaller_frame["foo"], expected) + tm.assert_series_equal(inplace_frame2["foo"], expected) + + def test_dropIncompleteRows(self, float_frame): + N = len(float_frame.index) + mat = np.random.randn(N) + mat[:5] = np.nan + + frame = DataFrame({"foo": mat}, index=float_frame.index) + frame["bar"] = 5 + original = Series(mat, index=float_frame.index, name="foo") + inp_frame1, inp_frame2 = frame.copy(), frame.copy() + + smaller_frame = frame.dropna() + tm.assert_series_equal(frame["foo"], original) + inp_frame1.dropna(inplace=True) + + exp = Series(mat[5:], index=float_frame.index[5:], name="foo") + tm.assert_series_equal(smaller_frame["foo"], exp) + tm.assert_series_equal(inp_frame1["foo"], exp) + + samesize_frame = frame.dropna(subset=["bar"]) + tm.assert_series_equal(frame["foo"], original) + assert (frame["bar"] == 5).all() + inp_frame2.dropna(subset=["bar"], inplace=True) + tm.assert_index_equal(samesize_frame.index, float_frame.index) + tm.assert_index_equal(inp_frame2.index, float_frame.index) + + def test_dropna(self): + df = DataFrame(np.random.randn(6, 4)) + df[2][:2] = np.nan + + dropped = df.dropna(axis=1) + expected = df.loc[:, [0, 1, 3]] + inp = df.copy() + inp.dropna(axis=1, inplace=True) + tm.assert_frame_equal(dropped, expected) + tm.assert_frame_equal(inp, expected) + + dropped = df.dropna(axis=0) + expected = df.loc[list(range(2, 6))] + inp = df.copy() + inp.dropna(axis=0, inplace=True) + tm.assert_frame_equal(dropped, expected) + tm.assert_frame_equal(inp, expected) + + # threshold + dropped = df.dropna(axis=1, thresh=5) + expected = df.loc[:, [0, 1, 3]] + inp = df.copy() + inp.dropna(axis=1, thresh=5, inplace=True) + tm.assert_frame_equal(dropped, expected) + tm.assert_frame_equal(inp, expected) + + dropped = df.dropna(axis=0, thresh=4) + expected = df.loc[range(2, 6)] + inp = df.copy() + inp.dropna(axis=0, thresh=4, inplace=True) + tm.assert_frame_equal(dropped, expected) + tm.assert_frame_equal(inp, expected) + + dropped = df.dropna(axis=1, thresh=4) + tm.assert_frame_equal(dropped, df) + + dropped = df.dropna(axis=1, thresh=3) + tm.assert_frame_equal(dropped, df) + + # subset + dropped = df.dropna(axis=0, subset=[0, 1, 3]) + inp = df.copy() + inp.dropna(axis=0, subset=[0, 1, 3], inplace=True) + tm.assert_frame_equal(dropped, df) + tm.assert_frame_equal(inp, df) + + # all + dropped = df.dropna(axis=1, how="all") + tm.assert_frame_equal(dropped, df) + + df[2] = np.nan + dropped = df.dropna(axis=1, how="all") + expected = df.loc[:, [0, 1, 3]] + tm.assert_frame_equal(dropped, expected) + + # bad input + msg = "No axis named 3 for object type " + with pytest.raises(ValueError, match=msg): + df.dropna(axis=3) + + def test_drop_and_dropna_caching(self): + # tst that cacher updates + original = Series([1, 2, np.nan], name="A") + expected = Series([1, 2], dtype=original.dtype, name="A") + df = pd.DataFrame({"A": original.values.copy()}) + df2 = df.copy() + df["A"].dropna() + tm.assert_series_equal(df["A"], original) + df["A"].dropna(inplace=True) + tm.assert_series_equal(df["A"], expected) + df2["A"].drop([1]) + tm.assert_series_equal(df2["A"], original) + df2["A"].drop([1], inplace=True) + tm.assert_series_equal(df2["A"], original.drop([1])) + + def test_dropna_corner(self, float_frame): + # bad input + msg = "invalid how option: foo" + with pytest.raises(ValueError, match=msg): + float_frame.dropna(how="foo") + msg = "must specify how or thresh" + with pytest.raises(TypeError, match=msg): + float_frame.dropna(how=None) + # non-existent column - 8303 + with pytest.raises(KeyError, match=r"^\['X'\]$"): + float_frame.dropna(subset=["A", "X"]) + + def test_dropna_multiple_axes(self): + df = DataFrame( + [ + [1, np.nan, 2, 3], + [4, np.nan, 5, 6], + [np.nan, np.nan, np.nan, np.nan], + [7, np.nan, 8, 9], + ] + ) + + # GH20987 + with pytest.raises(TypeError, match="supplying multiple axes"): + df.dropna(how="all", axis=[0, 1]) + with pytest.raises(TypeError, match="supplying multiple axes"): + df.dropna(how="all", axis=(0, 1)) + + inp = df.copy() + with pytest.raises(TypeError, match="supplying multiple axes"): + inp.dropna(how="all", axis=(0, 1), inplace=True) + + def test_dropna_tz_aware_datetime(self): + # GH13407 + df = DataFrame() + dt1 = datetime.datetime(2015, 1, 1, tzinfo=dateutil.tz.tzutc()) + dt2 = datetime.datetime(2015, 2, 2, tzinfo=dateutil.tz.tzutc()) + df["Time"] = [dt1] + result = df.dropna(axis=0) + expected = DataFrame({"Time": [dt1]}) + tm.assert_frame_equal(result, expected) + + # Ex2 + df = DataFrame({"Time": [dt1, None, np.nan, dt2]}) + result = df.dropna(axis=0) + expected = DataFrame([dt1, dt2], columns=["Time"], index=[0, 3]) + tm.assert_frame_equal(result, expected) + + def test_dropna_categorical_interval_index(self): + # GH 25087 + ii = pd.IntervalIndex.from_breaks([0, 2.78, 3.14, 6.28]) + ci = pd.CategoricalIndex(ii) + df = pd.DataFrame({"A": list("abc")}, index=ci) + + expected = df + result = df.dropna() + tm.assert_frame_equal(result, expected) + + def test_fillna_datetime(self, datetime_frame): + tf = datetime_frame + tf.loc[tf.index[:5], "A"] = np.nan + tf.loc[tf.index[-5:], "A"] = np.nan + + zero_filled = datetime_frame.fillna(0) + assert (zero_filled.loc[zero_filled.index[:5], "A"] == 0).all() + + padded = datetime_frame.fillna(method="pad") + assert np.isnan(padded.loc[padded.index[:5], "A"]).all() + assert ( + padded.loc[padded.index[-5:], "A"] == padded.loc[padded.index[-5], "A"] + ).all() + + msg = "Must specify a fill 'value' or 'method'" + with pytest.raises(ValueError, match=msg): + datetime_frame.fillna() + msg = "Cannot specify both 'value' and 'method'" + with pytest.raises(ValueError, match=msg): + datetime_frame.fillna(5, method="ffill") + + def test_fillna_mixed_type(self, float_string_frame): + + mf = float_string_frame + mf.loc[mf.index[5:20], "foo"] = np.nan + mf.loc[mf.index[-10:], "A"] = np.nan + # TODO: make stronger assertion here, GH 25640 + mf.fillna(value=0) + mf.fillna(method="pad") + + def test_fillna_mixed_float(self, mixed_float_frame): + + # mixed numeric (but no float16) + mf = mixed_float_frame.reindex(columns=["A", "B", "D"]) + mf.loc[mf.index[-10:], "A"] = np.nan + result = mf.fillna(value=0) + _check_mixed_float(result, dtype=dict(C=None)) + + result = mf.fillna(method="pad") + _check_mixed_float(result, dtype=dict(C=None)) + + def test_fillna_empty(self): + # empty frame (GH #2778) + df = DataFrame(columns=["x"]) + for m in ["pad", "backfill"]: + df.x.fillna(method=m, inplace=True) + df.x.fillna(method=m) + + def test_fillna_different_dtype(self): + # with different dtype (GH#3386) + df = DataFrame( + [["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]] + ) + + result = df.fillna({2: "foo"}) + expected = DataFrame( + [["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]] + ) + tm.assert_frame_equal(result, expected) + + df.fillna({2: "foo"}, inplace=True) + tm.assert_frame_equal(df, expected) + + def test_fillna_limit_and_value(self): + # limit and value + df = DataFrame(np.random.randn(10, 3)) + df.iloc[2:7, 0] = np.nan + df.iloc[3:5, 2] = np.nan + + expected = df.copy() + expected.iloc[2, 0] = 999 + expected.iloc[3, 2] = 999 + result = df.fillna(999, limit=1) + tm.assert_frame_equal(result, expected) + + def test_fillna_datelike(self): + # with datelike + # GH#6344 + df = DataFrame( + { + "Date": [pd.NaT, Timestamp("2014-1-1")], + "Date2": [Timestamp("2013-1-1"), pd.NaT], + } + ) + + expected = df.copy() + expected["Date"] = expected["Date"].fillna(df.loc[df.index[0], "Date2"]) + result = df.fillna(value={"Date": df["Date2"]}) + tm.assert_frame_equal(result, expected) + + def test_fillna_tzaware(self): + # with timezone + # GH#15855 + df = pd.DataFrame({"A": [pd.Timestamp("2012-11-11 00:00:00+01:00"), pd.NaT]}) + exp = pd.DataFrame( + { + "A": [ + pd.Timestamp("2012-11-11 00:00:00+01:00"), + pd.Timestamp("2012-11-11 00:00:00+01:00"), + ] + } + ) + tm.assert_frame_equal(df.fillna(method="pad"), exp) + + df = pd.DataFrame({"A": [pd.NaT, pd.Timestamp("2012-11-11 00:00:00+01:00")]}) + exp = pd.DataFrame( + { + "A": [ + pd.Timestamp("2012-11-11 00:00:00+01:00"), + pd.Timestamp("2012-11-11 00:00:00+01:00"), + ] + } + ) + tm.assert_frame_equal(df.fillna(method="bfill"), exp) + + def test_fillna_tzaware_different_column(self): + # with timezone in another column + # GH#15522 + df = pd.DataFrame( + { + "A": pd.date_range("20130101", periods=4, tz="US/Eastern"), + "B": [1, 2, np.nan, np.nan], + } + ) + result = df.fillna(method="pad") + expected = pd.DataFrame( + { + "A": pd.date_range("20130101", periods=4, tz="US/Eastern"), + "B": [1.0, 2.0, 2.0, 2.0], + } + ) + tm.assert_frame_equal(result, expected) + + def test_na_actions_categorical(self): + + cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) + vals = ["a", "b", np.nan, "d"] + df = DataFrame({"cats": cat, "vals": vals}) + cat2 = Categorical([1, 2, 3, 3], categories=[1, 2, 3]) + vals2 = ["a", "b", "b", "d"] + df_exp_fill = DataFrame({"cats": cat2, "vals": vals2}) + cat3 = Categorical([1, 2, 3], categories=[1, 2, 3]) + vals3 = ["a", "b", np.nan] + df_exp_drop_cats = DataFrame({"cats": cat3, "vals": vals3}) + cat4 = Categorical([1, 2], categories=[1, 2, 3]) + vals4 = ["a", "b"] + df_exp_drop_all = DataFrame({"cats": cat4, "vals": vals4}) + + # fillna + res = df.fillna(value={"cats": 3, "vals": "b"}) + tm.assert_frame_equal(res, df_exp_fill) + + with pytest.raises(ValueError, match=("fill value must be in categories")): + df.fillna(value={"cats": 4, "vals": "c"}) + + res = df.fillna(method="pad") + tm.assert_frame_equal(res, df_exp_fill) + + # dropna + res = df.dropna(subset=["cats"]) + tm.assert_frame_equal(res, df_exp_drop_cats) + + res = df.dropna() + tm.assert_frame_equal(res, df_exp_drop_all) + + # make sure that fillna takes missing values into account + c = Categorical([np.nan, "b", np.nan], categories=["a", "b"]) + df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]}) + + cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"]) + df_exp = DataFrame({"cats": cat_exp, "vals": [1, 2, 3]}) + + res = df.fillna("a") + tm.assert_frame_equal(res, df_exp) + + def test_fillna_categorical_nan(self): + # GH 14021 + # np.nan should always be a valid filler + cat = Categorical([np.nan, 2, np.nan]) + val = Categorical([np.nan, np.nan, np.nan]) + df = DataFrame({"cats": cat, "vals": val}) + with tm.assert_produces_warning(RuntimeWarning): + res = df.fillna(df.median()) + v_exp = [np.nan, np.nan, np.nan] + df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category") + tm.assert_frame_equal(res, df_exp) + + result = df.cats.fillna(np.nan) + tm.assert_series_equal(result, df.cats) + + result = df.vals.fillna(np.nan) + tm.assert_series_equal(result, df.vals) + + idx = pd.DatetimeIndex( + ["2011-01-01 09:00", "2016-01-01 23:45", "2011-01-01 09:00", pd.NaT, pd.NaT] + ) + df = DataFrame({"a": Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=pd.NaT), df) + + idx = pd.PeriodIndex( + ["2011-01", "2011-01", "2011-01", pd.NaT, pd.NaT], freq="M" + ) + df = DataFrame({"a": Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=pd.NaT), df) + + idx = pd.TimedeltaIndex(["1 days", "2 days", "1 days", pd.NaT, pd.NaT]) + df = DataFrame({"a": Categorical(idx)}) + tm.assert_frame_equal(df.fillna(value=pd.NaT), df) + + def test_fillna_downcast(self): + # GH 15277 + # infer int64 from float64 + df = pd.DataFrame({"a": [1.0, np.nan]}) + result = df.fillna(0, downcast="infer") + expected = pd.DataFrame({"a": [1, 0]}) + tm.assert_frame_equal(result, expected) + + # infer int64 from float64 when fillna value is a dict + df = pd.DataFrame({"a": [1.0, np.nan]}) + result = df.fillna({"a": 0}, downcast="infer") + expected = pd.DataFrame({"a": [1, 0]}) + tm.assert_frame_equal(result, expected) + + def test_fillna_dtype_conversion(self): + # make sure that fillna on an empty frame works + df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) + result = df.dtypes + expected = Series([np.dtype("object")] * 5, index=[1, 2, 3, 4, 5]) + tm.assert_series_equal(result, expected) + + result = df.fillna(1) + expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5]) + tm.assert_frame_equal(result, expected) + + # empty block + df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64") + result = df.fillna("nan") + expected = DataFrame("nan", index=range(3), columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + # equiv of replace + df = DataFrame(dict(A=[1, np.nan], B=[1.0, 2.0])) + for v in ["", 1, np.nan, 1.0]: + expected = df.replace(np.nan, v) + result = df.fillna(v) + tm.assert_frame_equal(result, expected) + + def test_fillna_datetime_columns(self): + # GH 7095 + df = pd.DataFrame( + { + "A": [-1, -2, np.nan], + "B": date_range("20130101", periods=3), + "C": ["foo", "bar", None], + "D": ["foo2", "bar2", None], + }, + index=date_range("20130110", periods=3), + ) + result = df.fillna("?") + expected = pd.DataFrame( + { + "A": [-1, -2, "?"], + "B": date_range("20130101", periods=3), + "C": ["foo", "bar", "?"], + "D": ["foo2", "bar2", "?"], + }, + index=date_range("20130110", periods=3), + ) + tm.assert_frame_equal(result, expected) + + df = pd.DataFrame( + { + "A": [-1, -2, np.nan], + "B": [pd.Timestamp("2013-01-01"), pd.Timestamp("2013-01-02"), pd.NaT], + "C": ["foo", "bar", None], + "D": ["foo2", "bar2", None], + }, + index=date_range("20130110", periods=3), + ) + result = df.fillna("?") + expected = pd.DataFrame( + { + "A": [-1, -2, "?"], + "B": [pd.Timestamp("2013-01-01"), pd.Timestamp("2013-01-02"), "?"], + "C": ["foo", "bar", "?"], + "D": ["foo2", "bar2", "?"], + }, + index=pd.date_range("20130110", periods=3), + ) + tm.assert_frame_equal(result, expected) + + def test_ffill(self, datetime_frame): + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + + tm.assert_frame_equal( + datetime_frame.ffill(), datetime_frame.fillna(method="ffill") + ) + + def test_bfill(self, datetime_frame): + datetime_frame["A"][:5] = np.nan + datetime_frame["A"][-5:] = np.nan + + tm.assert_frame_equal( + datetime_frame.bfill(), datetime_frame.fillna(method="bfill") + ) + + def test_frame_pad_backfill_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index, method="pad", limit=5) + + expected = df[:2].reindex(index).fillna(method="pad") + expected.values[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index, method="backfill", limit=5) + + expected = df[-2:].reindex(index).fillna(method="backfill") + expected.values[:3] = np.nan + tm.assert_frame_equal(result, expected) + + def test_frame_fillna_limit(self): + index = np.arange(10) + df = DataFrame(np.random.randn(10, 4), index=index) + + result = df[:2].reindex(index) + result = result.fillna(method="pad", limit=5) + + expected = df[:2].reindex(index).fillna(method="pad") + expected.values[-3:] = np.nan + tm.assert_frame_equal(result, expected) + + result = df[-2:].reindex(index) + result = result.fillna(method="backfill", limit=5) + + expected = df[-2:].reindex(index).fillna(method="backfill") + expected.values[:3] = np.nan + tm.assert_frame_equal(result, expected) + + def test_fillna_skip_certain_blocks(self): + # don't try to fill boolean, int blocks + + df = DataFrame(np.random.randn(10, 4).astype(int)) + + # it works! + df.fillna(np.nan) + + @pytest.mark.parametrize("type", [int, float]) + def test_fillna_positive_limit(self, type): + df = DataFrame(np.random.randn(10, 4)).astype(type) + + msg = "Limit must be greater than 0" + with pytest.raises(ValueError, match=msg): + df.fillna(0, limit=-5) + + @pytest.mark.parametrize("type", [int, float]) + def test_fillna_integer_limit(self, type): + df = DataFrame(np.random.randn(10, 4)).astype(type) + + msg = "Limit must be an integer" + with pytest.raises(ValueError, match=msg): + df.fillna(0, limit=0.5) + + def test_fillna_inplace(self): + df = DataFrame(np.random.randn(10, 4)) + df[1][:4] = np.nan + df[3][-4:] = np.nan + + expected = df.fillna(value=0) + assert expected is not df + + df.fillna(value=0, inplace=True) + tm.assert_frame_equal(df, expected) + + expected = df.fillna(value={0: 0}, inplace=True) + assert expected is None + + df[1][:4] = np.nan + df[3][-4:] = np.nan + expected = df.fillna(method="ffill") + assert expected is not df + + df.fillna(method="ffill", inplace=True) + tm.assert_frame_equal(df, expected) + + def test_fillna_dict_series(self): + df = DataFrame( + { + "a": [np.nan, 1, 2, np.nan, np.nan], + "b": [1, 2, 3, np.nan, np.nan], + "c": [np.nan, 1, 2, 3, 4], + } + ) + + result = df.fillna({"a": 0, "b": 5}) + + expected = df.copy() + expected["a"] = expected["a"].fillna(0) + expected["b"] = expected["b"].fillna(5) + tm.assert_frame_equal(result, expected) + + # it works + result = df.fillna({"a": 0, "b": 5, "d": 7}) + + # Series treated same as dict + result = df.fillna(df.max()) + expected = df.fillna(df.max().to_dict()) + tm.assert_frame_equal(result, expected) + + # disable this for now + with pytest.raises(NotImplementedError, match="column by column"): + df.fillna(df.max(1), axis=1) + + def test_fillna_dataframe(self): + # GH 8377 + df = DataFrame( + { + "a": [np.nan, 1, 2, np.nan, np.nan], + "b": [1, 2, 3, np.nan, np.nan], + "c": [np.nan, 1, 2, 3, 4], + }, + index=list("VWXYZ"), + ) + + # df2 may have different index and columns + df2 = DataFrame( + { + "a": [np.nan, 10, 20, 30, 40], + "b": [50, 60, 70, 80, 90], + "foo": ["bar"] * 5, + }, + index=list("VWXuZ"), + ) + + result = df.fillna(df2) + + # only those columns and indices which are shared get filled + expected = DataFrame( + { + "a": [np.nan, 1, 2, np.nan, 40], + "b": [1, 2, 3, np.nan, 90], + "c": [np.nan, 1, 2, 3, 4], + }, + index=list("VWXYZ"), + ) + + tm.assert_frame_equal(result, expected) + + def test_fillna_columns(self): + df = DataFrame(np.random.randn(10, 10)) + df.values[:, ::2] = np.nan + + result = df.fillna(method="ffill", axis=1) + expected = df.T.fillna(method="pad").T + tm.assert_frame_equal(result, expected) + + df.insert(6, "foo", 5) + result = df.fillna(method="ffill", axis=1) + expected = df.astype(float).fillna(method="ffill", axis=1) + tm.assert_frame_equal(result, expected) + + def test_fillna_invalid_method(self, float_frame): + with pytest.raises(ValueError, match="ffil"): + float_frame.fillna(method="ffil") + + def test_fillna_invalid_value(self, float_frame): + # list + msg = '"value" parameter must be a scalar or dict, but you passed a "{}"' + with pytest.raises(TypeError, match=msg.format("list")): + float_frame.fillna([1, 2]) + # tuple + with pytest.raises(TypeError, match=msg.format("tuple")): + float_frame.fillna((1, 2)) + # frame with series + msg = ( + '"value" parameter must be a scalar, dict or Series, but you' + ' passed a "DataFrame"' + ) + with pytest.raises(TypeError, match=msg): + float_frame.iloc[:, 0].fillna(float_frame) + + def test_fillna_col_reordering(self): + cols = ["COL." + str(i) for i in range(5, 0, -1)] + data = np.random.rand(20, 5) + df = DataFrame(index=range(20), columns=cols, data=data) + filled = df.fillna(method="ffill") + assert df.columns.tolist() == filled.columns.tolist() + + def test_fill_corner(self, float_frame, float_string_frame): + mf = float_string_frame + mf.loc[mf.index[5:20], "foo"] = np.nan + mf.loc[mf.index[-10:], "A"] = np.nan + + filled = float_string_frame.fillna(value=0) + assert (filled.loc[filled.index[5:20], "foo"] == 0).all() + del float_string_frame["foo"] + + empty_float = float_frame.reindex(columns=[]) + + # TODO(wesm): unused? + result = empty_float.fillna(value=0) # noqa + + def test_fill_value_when_combine_const(self): + # GH12723 + dat = np.array([0, 1, np.nan, 3, 4, 5], dtype="float") + df = DataFrame({"foo": dat}, index=range(6)) + + exp = df.fillna(0).add(2) + res = df.add(2, fill_value=0) + tm.assert_frame_equal(res, exp) + + +class TestDataFrameInterpolate: + def test_interp_basic(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + expected = DataFrame( + { + "A": [1.0, 2.0, 3.0, 4.0], + "B": [1.0, 4.0, 9.0, 9.0], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + result = df.interpolate() + tm.assert_frame_equal(result, expected) + + result = df.set_index("C").interpolate() + expected = df.set_index("C") + expected.loc[3, "A"] = 3 + expected.loc[5, "B"] = 9 + tm.assert_frame_equal(result, expected) + + def test_interp_bad_method(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + with pytest.raises(ValueError): + df.interpolate(method="not_a_method") + + def test_interp_combo(self): + df = DataFrame( + { + "A": [1.0, 2.0, np.nan, 4.0], + "B": [1, 4, 9, np.nan], + "C": [1, 2, 3, 5], + "D": list("abcd"), + } + ) + + result = df["A"].interpolate() + expected = Series([1.0, 2.0, 3.0, 4.0], name="A") + tm.assert_series_equal(result, expected) + + result = df["A"].interpolate(downcast="infer") + expected = Series([1, 2, 3, 4], name="A") + tm.assert_series_equal(result, expected) + + def test_interp_nan_idx(self): + df = DataFrame({"A": [1, 2, np.nan, 4], "B": [np.nan, 2, 3, 4]}) + df = df.set_index("A") + with pytest.raises(NotImplementedError): + df.interpolate(method="values") + + @td.skip_if_no_scipy + def test_interp_various(self): + df = DataFrame( + {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} + ) + df = df.set_index("C") + expected = df.copy() + result = df.interpolate(method="polynomial", order=1) + + expected.A.loc[3] = 2.66666667 + expected.A.loc[13] = 5.76923076 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="cubic") + # GH #15662. + expected.A.loc[3] = 2.81547781 + expected.A.loc[13] = 5.52964175 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="nearest") + expected.A.loc[3] = 2 + expected.A.loc[13] = 5 + tm.assert_frame_equal(result, expected, check_dtype=False) + + result = df.interpolate(method="quadratic") + expected.A.loc[3] = 2.82150771 + expected.A.loc[13] = 6.12648668 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="slinear") + expected.A.loc[3] = 2.66666667 + expected.A.loc[13] = 5.76923077 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="zero") + expected.A.loc[3] = 2.0 + expected.A.loc[13] = 5 + tm.assert_frame_equal(result, expected, check_dtype=False) + + @td.skip_if_no_scipy + def test_interp_alt_scipy(self): + df = DataFrame( + {"A": [1, 2, np.nan, 4, 5, np.nan, 7], "C": [1, 2, 3, 5, 8, 13, 21]} + ) + result = df.interpolate(method="barycentric") + expected = df.copy() + expected.loc[2, "A"] = 3 + expected.loc[5, "A"] = 6 + tm.assert_frame_equal(result, expected) + + result = df.interpolate(method="barycentric", downcast="infer") + tm.assert_frame_equal(result, expected.astype(np.int64)) + + result = df.interpolate(method="krogh") + expectedk = df.copy() + expectedk["A"] = expected["A"] + tm.assert_frame_equal(result, expectedk) + + result = df.interpolate(method="pchip") + expected.loc[2, "A"] = 3 + expected.loc[5, "A"] = 6.0 + + tm.assert_frame_equal(result, expected) + + def test_interp_rowwise(self): + df = DataFrame( + { + 0: [1, 2, np.nan, 4], + 1: [2, 3, 4, np.nan], + 2: [np.nan, 4, 5, 6], + 3: [4, np.nan, 6, 7], + 4: [1, 2, 3, 4], + } + ) + result = df.interpolate(axis=1) + expected = df.copy() + expected.loc[3, 1] = 5 + expected.loc[0, 2] = 3 + expected.loc[1, 3] = 3 + expected[4] = expected[4].astype(np.float64) + tm.assert_frame_equal(result, expected) + + result = df.interpolate(axis=1, method="values") + tm.assert_frame_equal(result, expected) + + result = df.interpolate(axis=0) + expected = df.interpolate() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "axis_name, axis_number", + [ + pytest.param("rows", 0, id="rows_0"), + pytest.param("index", 0, id="index_0"), + pytest.param("columns", 1, id="columns_1"), + ], + ) + def test_interp_axis_names(self, axis_name, axis_number): + # GH 29132: test axis names + data = {0: [0, np.nan, 6], 1: [1, np.nan, 7], 2: [2, 5, 8]} + + df = DataFrame(data, dtype=np.float64) + result = df.interpolate(axis=axis_name, method="linear") + expected = df.interpolate(axis=axis_number, method="linear") + tm.assert_frame_equal(result, expected) + + def test_rowwise_alt(self): + df = DataFrame( + { + 0: [0, 0.5, 1.0, np.nan, 4, 8, np.nan, np.nan, 64], + 1: [1, 2, 3, 4, 3, 2, 1, 0, -1], + } + ) + df.interpolate(axis=0) + + @pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] + ) + def test_interp_leading_nans(self, check_scipy): + df = DataFrame( + {"A": [np.nan, np.nan, 0.5, 0.25, 0], "B": [np.nan, -3, -3.5, np.nan, -4]} + ) + result = df.interpolate() + expected = df.copy() + expected["B"].loc[3] = -3.75 + tm.assert_frame_equal(result, expected) + + if check_scipy: + result = df.interpolate(method="polynomial", order=1) + tm.assert_frame_equal(result, expected) + + def test_interp_raise_on_only_mixed(self): + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": ["a", "b", "c", "d"], + "C": [np.nan, 2, 5, 7], + "D": [np.nan, np.nan, 9, 9], + "E": [1, 2, 3, 4], + } + ) + with pytest.raises(TypeError): + df.interpolate(axis=1) + + def test_interp_raise_on_all_object_dtype(self): + # GH 22985 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, dtype="object") + msg = ( + "Cannot interpolate with all object-dtype columns " + "in the DataFrame. Try setting at least one " + "column to a numeric dtype." + ) + with pytest.raises(TypeError, match=msg): + df.interpolate() + + def test_interp_inplace(self): + df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]}) + expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]}) + result = df.copy() + result["a"].interpolate(inplace=True) + tm.assert_frame_equal(result, expected) + + result = df.copy() + result["a"].interpolate(inplace=True, downcast="infer") + tm.assert_frame_equal(result, expected.astype("int64")) + + def test_interp_inplace_row(self): + # GH 10395 + result = DataFrame( + {"a": [1.0, 2.0, 3.0, 4.0], "b": [np.nan, 2.0, 3.0, 4.0], "c": [3, 2, 2, 2]} + ) + expected = result.interpolate(method="linear", axis=1, inplace=False) + result.interpolate(method="linear", axis=1, inplace=True) + tm.assert_frame_equal(result, expected) + + def test_interp_ignore_all_good(self): + # GH + df = DataFrame( + { + "A": [1, 2, np.nan, 4], + "B": [1, 2, 3, 4], + "C": [1.0, 2.0, np.nan, 4.0], + "D": [1.0, 2.0, 3.0, 4.0], + } + ) + expected = DataFrame( + { + "A": np.array([1, 2, 3, 4], dtype="float64"), + "B": np.array([1, 2, 3, 4], dtype="int64"), + "C": np.array([1.0, 2.0, 3, 4.0], dtype="float64"), + "D": np.array([1.0, 2.0, 3.0, 4.0], dtype="float64"), + } + ) + + result = df.interpolate(downcast=None) + tm.assert_frame_equal(result, expected) + + # all good + result = df[["B", "D"]].interpolate(downcast=None) + tm.assert_frame_equal(result, df[["B", "D"]]) + + @pytest.mark.parametrize("axis", [0, 1]) + def test_interp_time_inplace_axis(self, axis): + # GH 9687 + periods = 5 + idx = pd.date_range(start="2014-01-01", periods=periods) + data = np.random.rand(periods, periods) + data[data < 0.5] = np.nan + expected = pd.DataFrame(index=idx, columns=idx, data=data) + + result = expected.interpolate(axis=0, method="time") + expected.interpolate(axis=0, method="time", inplace=True) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py new file mode 100644 index 00000000..8bc2aa21 --- /dev/null +++ b/pandas/tests/frame/test_mutate_columns.py @@ -0,0 +1,262 @@ +import re + +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex, Series +import pandas._testing as tm + +# Column add, remove, delete. + + +class TestDataFrameMutateColumns: + def test_assign(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + original = df.copy() + result = df.assign(C=df.B / df.A) + expected = df.copy() + expected["C"] = [4, 2.5, 2] + tm.assert_frame_equal(result, expected) + + # lambda syntax + result = df.assign(C=lambda x: x.B / x.A) + tm.assert_frame_equal(result, expected) + + # original is unmodified + tm.assert_frame_equal(df, original) + + # Non-Series array-like + result = df.assign(C=[4, 2.5, 2]) + tm.assert_frame_equal(result, expected) + # original is unmodified + tm.assert_frame_equal(df, original) + + result = df.assign(B=df.B / df.A) + expected = expected.drop("B", axis=1).rename(columns={"C": "B"}) + tm.assert_frame_equal(result, expected) + + # overwrite + result = df.assign(A=df.A + df.B) + expected = df.copy() + expected["A"] = [5, 7, 9] + tm.assert_frame_equal(result, expected) + + # lambda + result = df.assign(A=lambda x: x.A + x.B) + tm.assert_frame_equal(result, expected) + + def test_assign_multiple(self): + df = DataFrame([[1, 4], [2, 5], [3, 6]], columns=["A", "B"]) + result = df.assign(C=[7, 8, 9], D=df.A, E=lambda x: x.B) + expected = DataFrame( + [[1, 4, 7, 1, 4], [2, 5, 8, 2, 5], [3, 6, 9, 3, 6]], columns=list("ABCDE") + ) + tm.assert_frame_equal(result, expected) + + def test_assign_order(self): + # GH 9818 + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + result = df.assign(D=df.A + df.B, C=df.A - df.B) + + expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]], columns=list("ABDC")) + tm.assert_frame_equal(result, expected) + result = df.assign(C=df.A - df.B, D=df.A + df.B) + + expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]], columns=list("ABCD")) + + tm.assert_frame_equal(result, expected) + + def test_assign_bad(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + # non-keyword argument + with pytest.raises(TypeError): + df.assign(lambda x: x.A) + with pytest.raises(AttributeError): + df.assign(C=df.A, D=df.A + df.C) + + def test_assign_dependent(self): + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + + result = df.assign(C=df.A, D=lambda x: x["A"] + x["C"]) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) + tm.assert_frame_equal(result, expected) + + result = df.assign(C=lambda df: df.A, D=lambda df: df["A"] + df["C"]) + expected = DataFrame([[1, 3, 1, 2], [2, 4, 2, 4]], columns=list("ABCD")) + tm.assert_frame_equal(result, expected) + + def test_insert_error_msmgs(self): + + # GH 7432 + df = DataFrame( + {"foo": ["a", "b", "c"], "bar": [1, 2, 3], "baz": ["d", "e", "f"]} + ).set_index("foo") + s = DataFrame( + {"foo": ["a", "b", "c", "a"], "fiz": ["g", "h", "i", "j"]} + ).set_index("foo") + msg = "cannot reindex from a duplicate axis" + with pytest.raises(ValueError, match=msg): + df["newcol"] = s + + # GH 4107, more descriptive error message + df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"]) + + msg = "incompatible index of inserted column with frame index" + with pytest.raises(TypeError, match=msg): + df["gr"] = df.groupby(["b", "c"]).count() + + def test_insert_benchmark(self): + # from the vb_suite/frame_methods/frame_insert_columns + N = 10 + K = 5 + df = DataFrame(index=range(N)) + new_col = np.random.randn(N) + for i in range(K): + df[i] = new_col + expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=range(N)) + tm.assert_frame_equal(df, expected) + + def test_insert(self): + df = DataFrame( + np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"] + ) + + df.insert(0, "foo", df["a"]) + tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"])) + tm.assert_series_equal(df["a"], df["foo"], check_names=False) + + df.insert(2, "bar", df["c"]) + tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"])) + tm.assert_almost_equal(df["c"], df["bar"], check_names=False) + + # diff dtype + + # new item + df["x"] = df["a"].astype("float32") + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 5 + [np.dtype("float32")], + index=["foo", "c", "bar", "b", "a", "x"], + ) + tm.assert_series_equal(result, expected) + + # replacing current (in different block) + df["a"] = df["a"].astype("float32") + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2, + index=["foo", "c", "bar", "b", "a", "x"], + ) + tm.assert_series_equal(result, expected) + + df["y"] = df["a"].astype("int32") + result = df.dtypes + expected = Series( + [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2 + [np.dtype("int32")], + index=["foo", "c", "bar", "b", "a", "x", "y"], + ) + tm.assert_series_equal(result, expected) + + with pytest.raises(ValueError, match="already exists"): + df.insert(1, "a", df["b"]) + msg = "cannot insert c, already exists" + with pytest.raises(ValueError, match=msg): + df.insert(1, "c", df["b"]) + + df.columns.name = "some_name" + # preserve columns name field + df.insert(0, "baz", df["c"]) + assert df.columns.name == "some_name" + + # GH 13522 + df = DataFrame(index=["A", "B", "C"]) + df["X"] = df.index + df["X"] = ["x", "y", "z"] + exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"]) + tm.assert_frame_equal(df, exp) + + def test_delitem(self, float_frame): + del float_frame["A"] + assert "A" not in float_frame + + def test_delitem_multiindex(self): + midx = MultiIndex.from_product([["A", "B"], [1, 2]]) + df = DataFrame(np.random.randn(4, 4), columns=midx) + assert len(df.columns) == 4 + assert ("A",) in df.columns + assert "A" in df.columns + + result = df["A"] + assert isinstance(result, DataFrame) + del df["A"] + + assert len(df.columns) == 2 + + # A still in the levels, BUT get a KeyError if trying + # to delete + assert ("A",) not in df.columns + with pytest.raises(KeyError, match=re.escape("('A',)")): + del df[("A",)] + + # behavior of dropped/deleted MultiIndex levels changed from + # GH 2770 to GH 19027: MultiIndex no longer '.__contains__' + # levels which are dropped/deleted + assert "A" not in df.columns + with pytest.raises(KeyError, match=re.escape("('A',)")): + del df["A"] + + def test_pop(self, float_frame): + float_frame.columns.name = "baz" + + float_frame.pop("A") + assert "A" not in float_frame + + float_frame["foo"] = "bar" + float_frame.pop("foo") + assert "foo" not in float_frame + assert float_frame.columns.name == "baz" + + # gh-10912: inplace ops cause caching issue + a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"], index=["X", "Y"]) + b = a.pop("B") + b += 1 + + # original frame + expected = DataFrame([[1, 3], [4, 6]], columns=["A", "C"], index=["X", "Y"]) + tm.assert_frame_equal(a, expected) + + # result + expected = Series([2, 5], index=["X", "Y"], name="B") + 1 + tm.assert_series_equal(b, expected) + + def test_pop_non_unique_cols(self): + df = DataFrame({0: [0, 1], 1: [0, 1], 2: [4, 5]}) + df.columns = ["a", "b", "a"] + + res = df.pop("a") + assert type(res) == DataFrame + assert len(res) == 2 + assert len(df.columns) == 1 + assert "b" in df.columns + assert "a" not in df.columns + assert len(df.index) == 2 + + def test_insert_column_bug_4032(self): + + # GH4032, inserting a column and renaming causing errors + df = DataFrame({"b": [1.1, 2.2]}) + df = df.rename(columns={}) + df.insert(0, "a", [1, 2]) + + result = df.rename(columns={}) + str(result) + expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + df.insert(0, "c", [1.3, 2.3]) + + result = df.rename(columns={}) + str(result) + + expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py new file mode 100644 index 00000000..32ead406 --- /dev/null +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -0,0 +1,526 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, MultiIndex, Series, date_range +import pandas._testing as tm + + +class TestDataFrameNonuniqueIndexes: + def test_column_dups_operations(self): + def check(result, expected=None): + if expected is not None: + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + + # assignment + # GH 3687 + arr = np.random.randn(3, 2) + idx = list(range(2)) + df = DataFrame(arr, columns=["A", "A"]) + df.columns = idx + expected = DataFrame(arr, columns=idx) + check(df, expected) + + idx = date_range("20130101", periods=4, freq="Q-NOV") + df = DataFrame( + [[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=["a", "a", "a", "a"] + ) + df.columns = idx + expected = DataFrame([[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], columns=idx) + check(df, expected) + + # insert + df = DataFrame( + [[1, 1, 1, 5], [1, 1, 2, 5], [2, 1, 3, 5]], + columns=["foo", "bar", "foo", "hello"], + ) + df["string"] = "bah" + expected = DataFrame( + [[1, 1, 1, 5, "bah"], [1, 1, 2, 5, "bah"], [2, 1, 3, 5, "bah"]], + columns=["foo", "bar", "foo", "hello", "string"], + ) + check(df, expected) + with pytest.raises(ValueError, match="Length of value"): + df.insert(0, "AnotherColumn", range(len(df.index) - 1)) + + # insert same dtype + df["foo2"] = 3 + expected = DataFrame( + [[1, 1, 1, 5, "bah", 3], [1, 1, 2, 5, "bah", 3], [2, 1, 3, 5, "bah", 3]], + columns=["foo", "bar", "foo", "hello", "string", "foo2"], + ) + check(df, expected) + + # set (non-dup) + df["foo2"] = 4 + expected = DataFrame( + [[1, 1, 1, 5, "bah", 4], [1, 1, 2, 5, "bah", 4], [2, 1, 3, 5, "bah", 4]], + columns=["foo", "bar", "foo", "hello", "string", "foo2"], + ) + check(df, expected) + df["foo2"] = 3 + + # delete (non dup) + del df["bar"] + expected = DataFrame( + [[1, 1, 5, "bah", 3], [1, 2, 5, "bah", 3], [2, 3, 5, "bah", 3]], + columns=["foo", "foo", "hello", "string", "foo2"], + ) + check(df, expected) + + # try to delete again (its not consolidated) + del df["hello"] + expected = DataFrame( + [[1, 1, "bah", 3], [1, 2, "bah", 3], [2, 3, "bah", 3]], + columns=["foo", "foo", "string", "foo2"], + ) + check(df, expected) + + # consolidate + df = df._consolidate() + expected = DataFrame( + [[1, 1, "bah", 3], [1, 2, "bah", 3], [2, 3, "bah", 3]], + columns=["foo", "foo", "string", "foo2"], + ) + check(df, expected) + + # insert + df.insert(2, "new_col", 5.0) + expected = DataFrame( + [[1, 1, 5.0, "bah", 3], [1, 2, 5.0, "bah", 3], [2, 3, 5.0, "bah", 3]], + columns=["foo", "foo", "new_col", "string", "foo2"], + ) + check(df, expected) + + # insert a dup + with pytest.raises(ValueError, match="cannot insert"): + df.insert(2, "new_col", 4.0) + + df.insert(2, "new_col", 4.0, allow_duplicates=True) + expected = DataFrame( + [ + [1, 1, 4.0, 5.0, "bah", 3], + [1, 2, 4.0, 5.0, "bah", 3], + [2, 3, 4.0, 5.0, "bah", 3], + ], + columns=["foo", "foo", "new_col", "new_col", "string", "foo2"], + ) + check(df, expected) + + # delete (dup) + del df["foo"] + expected = DataFrame( + [[4.0, 5.0, "bah", 3], [4.0, 5.0, "bah", 3], [4.0, 5.0, "bah", 3]], + columns=["new_col", "new_col", "string", "foo2"], + ) + tm.assert_frame_equal(df, expected) + + # dup across dtypes + df = DataFrame( + [[1, 1, 1.0, 5], [1, 1, 2.0, 5], [2, 1, 3.0, 5]], + columns=["foo", "bar", "foo", "hello"], + ) + check(df) + + df["foo2"] = 7.0 + expected = DataFrame( + [[1, 1, 1.0, 5, 7.0], [1, 1, 2.0, 5, 7.0], [2, 1, 3.0, 5, 7.0]], + columns=["foo", "bar", "foo", "hello", "foo2"], + ) + check(df, expected) + + result = df["foo"] + expected = DataFrame([[1, 1.0], [1, 2.0], [2, 3.0]], columns=["foo", "foo"]) + check(result, expected) + + # multiple replacements + df["foo"] = "string" + expected = DataFrame( + [ + ["string", 1, "string", 5, 7.0], + ["string", 1, "string", 5, 7.0], + ["string", 1, "string", 5, 7.0], + ], + columns=["foo", "bar", "foo", "hello", "foo2"], + ) + check(df, expected) + + del df["foo"] + expected = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "hello", "foo2"] + ) + check(df, expected) + + # values + df = DataFrame([[1, 2.5], [3, 4.5]], index=[1, 2], columns=["x", "x"]) + result = df.values + expected = np.array([[1, 2.5], [3, 4.5]]) + assert (result == expected).all().all() + + # rename, GH 4403 + df4 = DataFrame( + {"RT": [0.0454], "TClose": [22.02], "TExg": [0.0422]}, + index=MultiIndex.from_tuples( + [(600809, 20130331)], names=["STK_ID", "RPT_Date"] + ), + ) + + df5 = DataFrame( + { + "RPT_Date": [20120930, 20121231, 20130331], + "STK_ID": [600809] * 3, + "STK_Name": ["饡驦", "饡驦", "饡驦"], + "TClose": [38.05, 41.66, 30.01], + }, + index=MultiIndex.from_tuples( + [(600809, 20120930), (600809, 20121231), (600809, 20130331)], + names=["STK_ID", "RPT_Date"], + ), + ) + + k = pd.merge(df4, df5, how="inner", left_index=True, right_index=True) + result = k.rename(columns={"TClose_x": "TClose", "TClose_y": "QT_Close"}) + str(result) + result.dtypes + + expected = DataFrame( + [[0.0454, 22.02, 0.0422, 20130331, 600809, "饡驦", 30.01]], + columns=[ + "RT", + "TClose", + "TExg", + "RPT_Date", + "STK_ID", + "STK_Name", + "QT_Close", + ], + ).set_index(["STK_ID", "RPT_Date"], drop=False) + tm.assert_frame_equal(result, expected) + + # reindex is invalid! + df = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] + ) + msg = "cannot reindex from a duplicate axis" + with pytest.raises(ValueError, match=msg): + df.reindex(columns=["bar"]) + with pytest.raises(ValueError, match=msg): + df.reindex(columns=["bar", "foo"]) + + # drop + df = DataFrame( + [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] + ) + result = df.drop(["a"], axis=1) + expected = DataFrame([[1], [1], [1]], columns=["bar"]) + check(result, expected) + result = df.drop("a", axis=1) + check(result, expected) + + # describe + df = DataFrame( + [[1, 1, 1], [2, 2, 2], [3, 3, 3]], + columns=["bar", "a", "a"], + dtype="float64", + ) + result = df.describe() + s = df.iloc[:, 0].describe() + expected = pd.concat([s, s, s], keys=df.columns, axis=1) + check(result, expected) + + # check column dups with index equal and not equal to df's index + df = DataFrame( + np.random.randn(5, 3), + index=["a", "b", "c", "d", "e"], + columns=["A", "B", "A"], + ) + for index in [df.index, pd.Index(list("edcba"))]: + this_df = df.copy() + expected_ser = pd.Series(index.values, index=this_df.index) + expected_df = DataFrame( + {"A": expected_ser, "B": this_df["B"], "A": expected_ser}, + columns=["A", "B", "A"], + ) + this_df["A"] = index + check(this_df, expected_df) + + # operations + for op in ["__add__", "__mul__", "__sub__", "__truediv__"]: + df = DataFrame(dict(A=np.arange(10), B=np.random.rand(10))) + expected = getattr(df, op)(df) + expected.columns = ["A", "A"] + df.columns = ["A", "A"] + result = getattr(df, op)(df) + check(result, expected) + + # multiple assignments that change dtypes + # the location indexer is a slice + # GH 6120 + df = DataFrame(np.random.randn(5, 2), columns=["that", "that"]) + expected = DataFrame(1.0, index=range(5), columns=["that", "that"]) + + df["that"] = 1.0 + check(df, expected) + + df = DataFrame(np.random.rand(5, 2), columns=["that", "that"]) + expected = DataFrame(1, index=range(5), columns=["that", "that"]) + + df["that"] = 1 + check(df, expected) + + def test_column_dups2(self): + + # drop buggy GH 6240 + df = DataFrame( + { + "A": np.random.randn(5), + "B": np.random.randn(5), + "C": np.random.randn(5), + "D": ["a", "b", "c", "d", "e"], + } + ) + + expected = df.take([0, 1, 1], axis=1) + df2 = df.take([2, 0, 1, 2, 1], axis=1) + result = df2.drop("C", axis=1) + tm.assert_frame_equal(result, expected) + + # dropna + df = DataFrame( + { + "A": np.random.randn(5), + "B": np.random.randn(5), + "C": np.random.randn(5), + "D": ["a", "b", "c", "d", "e"], + } + ) + df.iloc[2, [0, 1, 2]] = np.nan + df.iloc[0, 0] = np.nan + df.iloc[1, 1] = np.nan + df.iloc[:, 3] = np.nan + expected = df.dropna(subset=["A", "B", "C"], how="all") + expected.columns = ["A", "A", "B", "C"] + + df.columns = ["A", "A", "B", "C"] + + result = df.dropna(subset=["A", "C"], how="all") + tm.assert_frame_equal(result, expected) + + def test_column_dups_indexing(self): + def check(result, expected=None): + if expected is not None: + tm.assert_frame_equal(result, expected) + result.dtypes + str(result) + + # boolean indexing + # GH 4879 + dups = ["A", "A", "C", "D"] + df = DataFrame( + np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" + ) + expected = df[df.C > 6] + expected.columns = dups + df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") + result = df[df.C > 6] + check(result, expected) + + # where + df = DataFrame( + np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"], dtype="float64" + ) + expected = df[df > 6] + expected.columns = dups + df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") + result = df[df > 6] + check(result, expected) + + # boolean with the duplicate raises + df = DataFrame(np.arange(12).reshape(3, 4), columns=dups, dtype="float64") + msg = "cannot reindex from a duplicate axis" + with pytest.raises(ValueError, match=msg): + df[df.A > 6] + + # dup aligning operations should work + # GH 5185 + df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3]) + df2 = DataFrame([1, 2, 3], index=[1, 2, 3]) + expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3]) + result = df1.sub(df2) + tm.assert_frame_equal(result, expected) + + # equality + df1 = DataFrame([[1, 2], [2, np.nan], [3, 4], [4, 4]], columns=["A", "B"]) + df2 = DataFrame([[0, 1], [2, 4], [2, np.nan], [4, 5]], columns=["A", "A"]) + + # not-comparing like-labelled + msg = "Can only compare identically-labeled DataFrame objects" + with pytest.raises(ValueError, match=msg): + df1 == df2 + + df1r = df1.reindex_like(df2) + result = df1r == df2 + expected = DataFrame( + [[False, True], [True, False], [False, False], [True, False]], + columns=["A", "A"], + ) + tm.assert_frame_equal(result, expected) + + # mixed column selection + # GH 5639 + dfbool = DataFrame( + { + "one": Series([True, True, False], index=["a", "b", "c"]), + "two": Series([False, False, True, False], index=["a", "b", "c", "d"]), + "three": Series([False, True, True, True], index=["a", "b", "c", "d"]), + } + ) + expected = pd.concat([dfbool["one"], dfbool["three"], dfbool["one"]], axis=1) + result = dfbool[["one", "three", "one"]] + check(result, expected) + + # multi-axis dups + # GH 6121 + df = DataFrame( + np.arange(25.0).reshape(5, 5), + index=["a", "b", "c", "d", "e"], + columns=["A", "B", "C", "D", "E"], + ) + z = df[["A", "C", "A"]].copy() + expected = z.loc[["a", "c", "a"]] + + df = DataFrame( + np.arange(25.0).reshape(5, 5), + index=["a", "b", "c", "d", "e"], + columns=["A", "B", "C", "D", "E"], + ) + z = df[["A", "C", "A"]] + result = z.loc[["a", "c", "a"]] + check(result, expected) + + def test_column_dups_indexing2(self): + + # GH 8363 + # datetime ops with a non-unique index + df = DataFrame( + {"A": np.arange(5, dtype="int64"), "B": np.arange(1, 6, dtype="int64")}, + index=[2, 2, 3, 3, 4], + ) + result = df.B - df.A + expected = Series(1, index=[2, 2, 3, 3, 4]) + tm.assert_series_equal(result, expected) + + df = DataFrame( + { + "A": date_range("20130101", periods=5), + "B": date_range("20130101 09:00:00", periods=5), + }, + index=[2, 2, 3, 3, 4], + ) + result = df.B - df.A + expected = Series(pd.Timedelta("9 hours"), index=[2, 2, 3, 3, 4]) + tm.assert_series_equal(result, expected) + + def test_columns_with_dups(self): + # GH 3468 related + + # basic + df = DataFrame([[1, 2]], columns=["a", "a"]) + df.columns = ["a", "a.1"] + str(df) + expected = DataFrame([[1, 2]], columns=["a", "a.1"]) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, 3]], columns=["b", "a", "a"]) + df.columns = ["b", "a", "a.1"] + str(df) + expected = DataFrame([[1, 2, 3]], columns=["b", "a", "a.1"]) + tm.assert_frame_equal(df, expected) + + # with a dup index + df = DataFrame([[1, 2]], columns=["a", "a"]) + df.columns = ["b", "b"] + str(df) + expected = DataFrame([[1, 2]], columns=["b", "b"]) + tm.assert_frame_equal(df, expected) + + # multi-dtype + df = DataFrame( + [[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], + columns=["a", "a", "b", "b", "d", "c", "c"], + ) + df.columns = list("ABCDEFG") + str(df) + expected = DataFrame( + [[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + df = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a", "a", "a"]) + df.columns = ["a", "a.1", "a.2", "a.3"] + str(df) + expected = DataFrame([[1, 2, "foo", "bar"]], columns=["a", "a.1", "a.2", "a.3"]) + tm.assert_frame_equal(df, expected) + + # dups across blocks + df_float = DataFrame(np.random.randn(10, 3), dtype="float64") + df_int = DataFrame(np.random.randn(10, 3), dtype="int64") + df_bool = DataFrame(True, index=df_float.index, columns=df_float.columns) + df_object = DataFrame("foo", index=df_float.index, columns=df_float.columns) + df_dt = DataFrame( + pd.Timestamp("20010101"), index=df_float.index, columns=df_float.columns + ) + df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) + + assert len(df._data._blknos) == len(df.columns) + assert len(df._data._blklocs) == len(df.columns) + + # testing iloc + for i in range(len(df.columns)): + df.iloc[:, i] + + # dup columns across dtype GH 2079/2194 + vals = [[1, -1, 2.0], [2, -2, 3.0]] + rs = DataFrame(vals, columns=["A", "A", "B"]) + xp = DataFrame(vals) + xp.columns = ["A", "A", "B"] + tm.assert_frame_equal(rs, xp) + + def test_values_duplicates(self): + df = DataFrame( + [[1, 2, "a", "b"], [1, 2, "a", "b"]], columns=["one", "one", "two", "two"] + ) + + result = df.values + expected = np.array([[1, 2, "a", "b"], [1, 2, "a", "b"]], dtype=object) + + tm.assert_numpy_array_equal(result, expected) + + def test_set_value_by_index(self): + # See gh-12344 + df = DataFrame(np.arange(9).reshape(3, 3).T) + df.columns = list("AAA") + expected = df.iloc[:, 2] + + df.iloc[:, 0] = 3 + tm.assert_series_equal(df.iloc[:, 2], expected) + + df = DataFrame(np.arange(9).reshape(3, 3).T) + df.columns = [2, float(2), str(2)] + expected = df.iloc[:, 1] + + df.iloc[:, 0] = 3 + tm.assert_series_equal(df.iloc[:, 1], expected) + + def test_insert_with_columns_dups(self): + # GH 14291 + df = pd.DataFrame() + df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True) + df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True) + df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True) + exp = pd.DataFrame( + [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"] + ) + tm.assert_frame_equal(df, exp) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py new file mode 100644 index 00000000..55f1216a --- /dev/null +++ b/pandas/tests/frame/test_operators.py @@ -0,0 +1,911 @@ +from decimal import Decimal +import operator + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, MultiIndex, Series +import pandas._testing as tm +import pandas.core.common as com +from pandas.tests.frame.common import _check_mixed_float + + +class TestDataFrameUnaryOperators: + # __pos__, __neg__, __inv__ + + @pytest.mark.parametrize( + "df,expected", + [ + (pd.DataFrame({"a": [-1, 1]}), pd.DataFrame({"a": [1, -1]})), + (pd.DataFrame({"a": [False, True]}), pd.DataFrame({"a": [True, False]})), + ( + pd.DataFrame({"a": pd.Series(pd.to_timedelta([-1, 1]))}), + pd.DataFrame({"a": pd.Series(pd.to_timedelta([1, -1]))}), + ), + ], + ) + def test_neg_numeric(self, df, expected): + tm.assert_frame_equal(-df, expected) + tm.assert_series_equal(-df["a"], expected["a"]) + + @pytest.mark.parametrize( + "df, expected", + [ + (np.array([1, 2], dtype=object), np.array([-1, -2], dtype=object)), + ([Decimal("1.0"), Decimal("2.0")], [Decimal("-1.0"), Decimal("-2.0")]), + ], + ) + def test_neg_object(self, df, expected): + # GH#21380 + df = pd.DataFrame({"a": df}) + expected = pd.DataFrame({"a": expected}) + tm.assert_frame_equal(-df, expected) + tm.assert_series_equal(-df["a"], expected["a"]) + + @pytest.mark.parametrize( + "df", + [ + pd.DataFrame({"a": ["a", "b"]}), + pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])}), + ], + ) + def test_neg_raises(self, df): + with pytest.raises(TypeError): + (-df) + with pytest.raises(TypeError): + (-df["a"]) + + def test_invert(self, float_frame): + df = float_frame + + tm.assert_frame_equal(-(df < 0), ~(df < 0)) + + def test_invert_mixed(self): + shape = (10, 5) + df = pd.concat( + [ + pd.DataFrame(np.zeros(shape, dtype="bool")), + pd.DataFrame(np.zeros(shape, dtype=int)), + ], + axis=1, + ignore_index=True, + ) + result = ~df + expected = pd.concat( + [ + pd.DataFrame(np.ones(shape, dtype="bool")), + pd.DataFrame(-np.ones(shape, dtype=int)), + ], + axis=1, + ignore_index=True, + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "df", + [ + pd.DataFrame({"a": [-1, 1]}), + pd.DataFrame({"a": [False, True]}), + pd.DataFrame({"a": pd.Series(pd.to_timedelta([-1, 1]))}), + ], + ) + def test_pos_numeric(self, df): + # GH#16073 + tm.assert_frame_equal(+df, df) + tm.assert_series_equal(+df["a"], df["a"]) + + @pytest.mark.parametrize( + "df", + [ + # numpy changing behavior in the future + pytest.param( + pd.DataFrame({"a": ["a", "b"]}), + marks=[pytest.mark.filterwarnings("ignore")], + ), + pd.DataFrame({"a": np.array([-1, 2], dtype=object)}), + pd.DataFrame({"a": [Decimal("-1.0"), Decimal("2.0")]}), + ], + ) + def test_pos_object(self, df): + # GH#21380 + tm.assert_frame_equal(+df, df) + tm.assert_series_equal(+df["a"], df["a"]) + + @pytest.mark.parametrize( + "df", [pd.DataFrame({"a": pd.to_datetime(["2017-01-22", "1970-01-01"])})] + ) + def test_pos_raises(self, df): + with pytest.raises(TypeError): + (+df) + with pytest.raises(TypeError): + (+df["a"]) + + +class TestDataFrameLogicalOperators: + # &, |, ^ + + def test_logical_ops_empty_frame(self): + # GH#5808 + # empty frames, non-mixed dtype + df = DataFrame(index=[1]) + + result = df & df + tm.assert_frame_equal(result, df) + + result = df | df + tm.assert_frame_equal(result, df) + + df2 = DataFrame(index=[1, 2]) + result = df & df2 + tm.assert_frame_equal(result, df2) + + dfa = DataFrame(index=[1], columns=["A"]) + + result = dfa & dfa + expected = DataFrame(False, index=[1], columns=["A"]) + tm.assert_frame_equal(result, expected) + + def test_logical_ops_bool_frame(self): + # GH#5808 + df1a_bool = DataFrame(True, index=[1], columns=["A"]) + + result = df1a_bool & df1a_bool + tm.assert_frame_equal(result, df1a_bool) + + result = df1a_bool | df1a_bool + tm.assert_frame_equal(result, df1a_bool) + + def test_logical_ops_int_frame(self): + # GH#5808 + df1a_int = DataFrame(1, index=[1], columns=["A"]) + df1a_bool = DataFrame(True, index=[1], columns=["A"]) + + result = df1a_int | df1a_bool + tm.assert_frame_equal(result, df1a_bool) + + # Check that this matches Series behavior + res_ser = df1a_int["A"] | df1a_bool["A"] + tm.assert_series_equal(res_ser, df1a_bool["A"]) + + def test_logical_ops_invalid(self): + # GH#5808 + + df1 = DataFrame(1.0, index=[1], columns=["A"]) + df2 = DataFrame(True, index=[1], columns=["A"]) + with pytest.raises(TypeError): + df1 | df2 + + df1 = DataFrame("foo", index=[1], columns=["A"]) + df2 = DataFrame(True, index=[1], columns=["A"]) + with pytest.raises(TypeError): + df1 | df2 + + def test_logical_operators(self): + def _check_bin_op(op): + result = op(df1, df2) + expected = DataFrame( + op(df1.values, df2.values), index=df1.index, columns=df1.columns + ) + assert result.values.dtype == np.bool_ + tm.assert_frame_equal(result, expected) + + def _check_unary_op(op): + result = op(df1) + expected = DataFrame(op(df1.values), index=df1.index, columns=df1.columns) + assert result.values.dtype == np.bool_ + tm.assert_frame_equal(result, expected) + + df1 = { + "a": {"a": True, "b": False, "c": False, "d": True, "e": True}, + "b": {"a": False, "b": True, "c": False, "d": False, "e": False}, + "c": {"a": False, "b": False, "c": True, "d": False, "e": False}, + "d": {"a": True, "b": False, "c": False, "d": True, "e": True}, + "e": {"a": True, "b": False, "c": False, "d": True, "e": True}, + } + + df2 = { + "a": {"a": True, "b": False, "c": True, "d": False, "e": False}, + "b": {"a": False, "b": True, "c": False, "d": False, "e": False}, + "c": {"a": True, "b": False, "c": True, "d": False, "e": False}, + "d": {"a": False, "b": False, "c": False, "d": True, "e": False}, + "e": {"a": False, "b": False, "c": False, "d": False, "e": True}, + } + + df1 = DataFrame(df1) + df2 = DataFrame(df2) + + _check_bin_op(operator.and_) + _check_bin_op(operator.or_) + _check_bin_op(operator.xor) + + _check_unary_op(operator.inv) # TODO: belongs elsewhere + + def test_logical_with_nas(self): + d = DataFrame({"a": [np.nan, False], "b": [True, True]}) + + # GH4947 + # bool comparisons should return bool + result = d["a"] | d["b"] + expected = Series([False, True]) + tm.assert_series_equal(result, expected) + + # GH4604, automatic casting here + result = d["a"].fillna(False) | d["b"] + expected = Series([True, True]) + tm.assert_series_equal(result, expected) + + result = d["a"].fillna(False, downcast=False) | d["b"] + expected = Series([True, True]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "left, right, op, expected", + [ + ( + [True, False, np.nan], + [True, False, True], + operator.and_, + [True, False, False], + ), + ( + [True, False, True], + [True, False, np.nan], + operator.and_, + [True, False, False], + ), + ( + [True, False, np.nan], + [True, False, True], + operator.or_, + [True, False, False], + ), + ( + [True, False, True], + [True, False, np.nan], + operator.or_, + [True, False, True], + ), + ], + ) + def test_logical_operators_nans(self, left, right, op, expected): + # GH 13896 + result = op(DataFrame(left), DataFrame(right)) + expected = DataFrame(expected) + + tm.assert_frame_equal(result, expected) + + +class TestDataFrameOperators: + @pytest.mark.parametrize( + "op", [operator.add, operator.sub, operator.mul, operator.truediv] + ) + def test_operators_none_as_na(self, op): + df = DataFrame( + {"col1": [2, 5.0, 123, None], "col2": [1, 2, 3, 4]}, dtype=object + ) + + # since filling converts dtypes from object, changed expected to be + # object + filled = df.fillna(np.nan) + result = op(df, 3) + expected = op(filled, 3).astype(object) + expected[com.isna(expected)] = None + tm.assert_frame_equal(result, expected) + + result = op(df, df) + expected = op(filled, filled).astype(object) + expected[com.isna(expected)] = None + tm.assert_frame_equal(result, expected) + + result = op(df, df.fillna(7)) + tm.assert_frame_equal(result, expected) + + result = op(df.fillna(7), df) + tm.assert_frame_equal(result, expected, check_dtype=False) + + @pytest.mark.parametrize("op,res", [("__eq__", False), ("__ne__", True)]) + # TODO: not sure what's correct here. + @pytest.mark.filterwarnings("ignore:elementwise:FutureWarning") + def test_logical_typeerror_with_non_valid(self, op, res, float_frame): + # we are comparing floats vs a string + result = getattr(float_frame, op)("foo") + assert bool(result.all().all()) is res + + def test_binary_ops_align(self): + + # test aligning binary ops + + # GH 6681 + index = MultiIndex.from_product( + [list("abc"), ["one", "two", "three"], [1, 2, 3]], + names=["first", "second", "third"], + ) + + df = DataFrame( + np.arange(27 * 3).reshape(27, 3), + index=index, + columns=["value1", "value2", "value3"], + ).sort_index() + + idx = pd.IndexSlice + for op in ["add", "sub", "mul", "div", "truediv"]: + opa = getattr(operator, op, None) + if opa is None: + continue + + x = Series([1.0, 10.0, 100.0], [1, 2, 3]) + result = getattr(df, op)(x, level="third", axis=0) + + expected = pd.concat( + [opa(df.loc[idx[:, :, i], :], v) for i, v in x.items()] + ).sort_index() + tm.assert_frame_equal(result, expected) + + x = Series([1.0, 10.0], ["two", "three"]) + result = getattr(df, op)(x, level="second", axis=0) + + expected = ( + pd.concat([opa(df.loc[idx[:, i], :], v) for i, v in x.items()]) + .reindex_like(df) + .sort_index() + ) + tm.assert_frame_equal(result, expected) + + # GH9463 (alignment level of dataframe with series) + + midx = MultiIndex.from_product([["A", "B"], ["a", "b"]]) + df = DataFrame(np.ones((2, 4), dtype="int64"), columns=midx) + s = pd.Series({"a": 1, "b": 2}) + + df2 = df.copy() + df2.columns.names = ["lvl0", "lvl1"] + s2 = s.copy() + s2.index.name = "lvl1" + + # different cases of integer/string level names: + res1 = df.mul(s, axis=1, level=1) + res2 = df.mul(s2, axis=1, level=1) + res3 = df2.mul(s, axis=1, level=1) + res4 = df2.mul(s2, axis=1, level=1) + res5 = df2.mul(s, axis=1, level="lvl1") + res6 = df2.mul(s2, axis=1, level="lvl1") + + exp = DataFrame( + np.array([[1, 2, 1, 2], [1, 2, 1, 2]], dtype="int64"), columns=midx + ) + + for res in [res1, res2]: + tm.assert_frame_equal(res, exp) + + exp.columns.names = ["lvl0", "lvl1"] + for res in [res3, res4, res5, res6]: + tm.assert_frame_equal(res, exp) + + def test_dti_tz_convert_to_utc(self): + base = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC") + idx1 = base.tz_convert("Asia/Tokyo")[:2] + idx2 = base.tz_convert("US/Eastern")[1:] + + df1 = DataFrame({"A": [1, 2]}, index=idx1) + df2 = DataFrame({"A": [1, 1]}, index=idx2) + exp = DataFrame({"A": [np.nan, 3, np.nan]}, index=base) + tm.assert_frame_equal(df1 + df2, exp) + + def test_combineFrame(self, float_frame, mixed_float_frame, mixed_int_frame): + frame_copy = float_frame.reindex(float_frame.index[::2]) + + del frame_copy["D"] + frame_copy["C"][:5] = np.nan + + added = float_frame + frame_copy + + indexer = added["A"].dropna().index + exp = (float_frame["A"] * 2).copy() + + tm.assert_series_equal(added["A"].dropna(), exp.loc[indexer]) + + exp.loc[~exp.index.isin(indexer)] = np.nan + tm.assert_series_equal(added["A"], exp.loc[added["A"].index]) + + assert np.isnan(added["C"].reindex(frame_copy.index)[:5]).all() + + # assert(False) + + assert np.isnan(added["D"]).all() + + self_added = float_frame + float_frame + tm.assert_index_equal(self_added.index, float_frame.index) + + added_rev = frame_copy + float_frame + assert np.isnan(added["D"]).all() + assert np.isnan(added_rev["D"]).all() + + # corner cases + + # empty + plus_empty = float_frame + DataFrame() + assert np.isnan(plus_empty.values).all() + + empty_plus = DataFrame() + float_frame + assert np.isnan(empty_plus.values).all() + + empty_empty = DataFrame() + DataFrame() + assert empty_empty.empty + + # out of order + reverse = float_frame.reindex(columns=float_frame.columns[::-1]) + + tm.assert_frame_equal(reverse + float_frame, float_frame * 2) + + # mix vs float64, upcast + added = float_frame + mixed_float_frame + _check_mixed_float(added, dtype="float64") + added = mixed_float_frame + float_frame + _check_mixed_float(added, dtype="float64") + + # mix vs mix + added = mixed_float_frame + mixed_float_frame + _check_mixed_float(added, dtype=dict(C=None)) + + # with int + added = float_frame + mixed_int_frame + _check_mixed_float(added, dtype="float64") + + def test_combine_series( + self, float_frame, mixed_float_frame, mixed_int_frame, datetime_frame + ): + + # Series + series = float_frame.xs(float_frame.index[0]) + + added = float_frame + series + + for key, s in added.items(): + tm.assert_series_equal(s, float_frame[key] + series[key]) + + larger_series = series.to_dict() + larger_series["E"] = 1 + larger_series = Series(larger_series) + larger_added = float_frame + larger_series + + for key, s in float_frame.items(): + tm.assert_series_equal(larger_added[key], s + series[key]) + assert "E" in larger_added + assert np.isnan(larger_added["E"]).all() + + # no upcast needed + added = mixed_float_frame + series + _check_mixed_float(added) + + # vs mix (upcast) as needed + added = mixed_float_frame + series.astype("float32") + _check_mixed_float(added, dtype=dict(C=None)) + added = mixed_float_frame + series.astype("float16") + _check_mixed_float(added, dtype=dict(C=None)) + + # FIXME: don't leave commented-out + # these raise with numexpr.....as we are adding an int64 to an + # uint64....weird vs int + + # added = mixed_int_frame + (100*series).astype('int64') + # _check_mixed_int(added, dtype = dict(A = 'int64', B = 'float64', C = + # 'int64', D = 'int64')) + # added = mixed_int_frame + (100*series).astype('int32') + # _check_mixed_int(added, dtype = dict(A = 'int32', B = 'float64', C = + # 'int32', D = 'int64')) + + # TimeSeries + ts = datetime_frame["A"] + + # 10890 + # we no longer allow auto timeseries broadcasting + # and require explicit broadcasting + added = datetime_frame.add(ts, axis="index") + + for key, col in datetime_frame.items(): + result = col + ts + tm.assert_series_equal(added[key], result, check_names=False) + assert added[key].name == key + if col.name == ts.name: + assert result.name == "A" + else: + assert result.name is None + + smaller_frame = datetime_frame[:-5] + smaller_added = smaller_frame.add(ts, axis="index") + + tm.assert_index_equal(smaller_added.index, datetime_frame.index) + + smaller_ts = ts[:-5] + smaller_added2 = datetime_frame.add(smaller_ts, axis="index") + tm.assert_frame_equal(smaller_added, smaller_added2) + + # length 0, result is all-nan + result = datetime_frame.add(ts[:0], axis="index") + expected = DataFrame( + np.nan, index=datetime_frame.index, columns=datetime_frame.columns + ) + tm.assert_frame_equal(result, expected) + + # Frame is all-nan + result = datetime_frame[:0].add(ts, axis="index") + expected = DataFrame( + np.nan, index=datetime_frame.index, columns=datetime_frame.columns + ) + tm.assert_frame_equal(result, expected) + + # empty but with non-empty index + frame = datetime_frame[:1].reindex(columns=[]) + result = frame.mul(ts, axis="index") + assert len(result) == len(ts) + + def test_combineFunc(self, float_frame, mixed_float_frame): + result = float_frame * 2 + tm.assert_numpy_array_equal(result.values, float_frame.values * 2) + + # vs mix + result = mixed_float_frame * 2 + for c, s in result.items(): + tm.assert_numpy_array_equal(s.values, mixed_float_frame[c].values * 2) + _check_mixed_float(result, dtype=dict(C=None)) + + result = DataFrame() * 2 + assert result.index.equals(DataFrame().index) + assert len(result.columns) == 0 + + def test_comparisons(self, simple_frame, float_frame): + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame() + + row = simple_frame.xs("a") + ndim_5 = np.ones(df1.shape + (1, 1, 1)) + + def test_comp(func): + result = func(df1, df2) + tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values)) + + with pytest.raises(ValueError, match="dim must be <= 2"): + func(df1, ndim_5) + + result2 = func(simple_frame, row) + tm.assert_numpy_array_equal( + result2.values, func(simple_frame.values, row.values) + ) + + result3 = func(float_frame, 0) + tm.assert_numpy_array_equal(result3.values, func(float_frame.values, 0)) + + msg = "Can only compare identically-labeled DataFrame" + with pytest.raises(ValueError, match=msg): + func(simple_frame, simple_frame[:2]) + + test_comp(operator.eq) + test_comp(operator.ne) + test_comp(operator.lt) + test_comp(operator.gt) + test_comp(operator.ge) + test_comp(operator.le) + + def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne): + # GH 11565 + df = DataFrame( + {x: {"x": "foo", "y": "bar", "z": "baz"} for x in ["a", "b", "c"]} + ) + + f = getattr(operator, compare_operators_no_eq_ne) + with pytest.raises(TypeError): + f(df, 0) + + def test_comparison_protected_from_errstate(self): + missing_df = tm.makeDataFrame() + missing_df.iloc[0]["A"] = np.nan + with np.errstate(invalid="ignore"): + expected = missing_df.values < 0 + with np.errstate(invalid="raise"): + result = (missing_df < 0).values + tm.assert_numpy_array_equal(result, expected) + + def test_boolean_comparison(self): + + # GH 4576 + # boolean comparisons with a tuple/list give unexpected results + df = DataFrame(np.arange(6).reshape((3, 2))) + b = np.array([2, 2]) + b_r = np.atleast_2d([2, 2]) + b_c = b_r.T + lst = [2, 2, 2] + tup = tuple(lst) + + # gt + expected = DataFrame([[False, False], [False, True], [True, True]]) + result = df > b + tm.assert_frame_equal(result, expected) + + result = df.values > b + tm.assert_numpy_array_equal(result, expected.values) + + msg1d = "Unable to coerce to Series, length must be 2: given 3" + msg2d = "Unable to coerce to DataFrame, shape must be" + msg2db = "operands could not be broadcast together with shapes" + with pytest.raises(ValueError, match=msg1d): + # wrong shape + df > lst + + with pytest.raises(ValueError, match=msg1d): + # wrong shape + result = df > tup + + # broadcasts like ndarray (GH#23000) + result = df > b_r + tm.assert_frame_equal(result, expected) + + result = df.values > b_r + tm.assert_numpy_array_equal(result, expected.values) + + with pytest.raises(ValueError, match=msg2d): + df > b_c + + with pytest.raises(ValueError, match=msg2db): + df.values > b_c + + # == + expected = DataFrame([[False, False], [True, False], [False, False]]) + result = df == b + tm.assert_frame_equal(result, expected) + + with pytest.raises(ValueError, match=msg1d): + result = df == lst + + with pytest.raises(ValueError, match=msg1d): + result = df == tup + + # broadcasts like ndarray (GH#23000) + result = df == b_r + tm.assert_frame_equal(result, expected) + + result = df.values == b_r + tm.assert_numpy_array_equal(result, expected.values) + + with pytest.raises(ValueError, match=msg2d): + df == b_c + + assert df.values.shape != b_c.shape + + # with alignment + df = DataFrame( + np.arange(6).reshape((3, 2)), columns=list("AB"), index=list("abc") + ) + expected.index = df.index + expected.columns = df.columns + + with pytest.raises(ValueError, match=msg1d): + result = df == lst + + with pytest.raises(ValueError, match=msg1d): + result = df == tup + + def test_combine_generic(self, float_frame): + df1 = float_frame + df2 = float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]] + + combined = df1.combine(df2, np.add) + combined2 = df2.combine(df1, np.add) + assert combined["D"].isna().all() + assert combined2["D"].isna().all() + + chunk = combined.loc[combined.index[:-5], ["A", "B", "C"]] + chunk2 = combined2.loc[combined2.index[:-5], ["A", "B", "C"]] + + exp = ( + float_frame.loc[float_frame.index[:-5], ["A", "B", "C"]].reindex_like(chunk) + * 2 + ) + tm.assert_frame_equal(chunk, exp) + tm.assert_frame_equal(chunk2, exp) + + def test_inplace_ops_alignment(self): + + # inplace ops / ops alignment + # GH 8511 + + columns = list("abcdefg") + X_orig = DataFrame( + np.arange(10 * len(columns)).reshape(-1, len(columns)), + columns=columns, + index=range(10), + ) + Z = 100 * X_orig.iloc[:, 1:-1].copy() + block1 = list("bedcf") + subs = list("bcdef") + + # add + X = X_orig.copy() + result1 = (X[block1] + Z).reindex(columns=subs) + + X[block1] += Z + result2 = X.reindex(columns=subs) + + X = X_orig.copy() + result3 = (X[block1] + Z[block1]).reindex(columns=subs) + + X[block1] += Z[block1] + result4 = X.reindex(columns=subs) + + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) + tm.assert_frame_equal(result1, result4) + + # sub + X = X_orig.copy() + result1 = (X[block1] - Z).reindex(columns=subs) + + X[block1] -= Z + result2 = X.reindex(columns=subs) + + X = X_orig.copy() + result3 = (X[block1] - Z[block1]).reindex(columns=subs) + + X[block1] -= Z[block1] + result4 = X.reindex(columns=subs) + + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) + tm.assert_frame_equal(result1, result4) + + def test_inplace_ops_identity(self): + + # GH 5104 + # make sure that we are actually changing the object + s_orig = Series([1, 2, 3]) + df_orig = DataFrame(np.random.randint(0, 5, size=10).reshape(-1, 5)) + + # no dtype change + s = s_orig.copy() + s2 = s + s += 1 + tm.assert_series_equal(s, s2) + tm.assert_series_equal(s_orig + 1, s) + assert s is s2 + assert s._data is s2._data + + df = df_orig.copy() + df2 = df + df += 1 + tm.assert_frame_equal(df, df2) + tm.assert_frame_equal(df_orig + 1, df) + assert df is df2 + assert df._data is df2._data + + # dtype change + s = s_orig.copy() + s2 = s + s += 1.5 + tm.assert_series_equal(s, s2) + tm.assert_series_equal(s_orig + 1.5, s) + + df = df_orig.copy() + df2 = df + df += 1.5 + tm.assert_frame_equal(df, df2) + tm.assert_frame_equal(df_orig + 1.5, df) + assert df is df2 + assert df._data is df2._data + + # mixed dtype + arr = np.random.randint(0, 10, size=5) + df_orig = DataFrame({"A": arr.copy(), "B": "foo"}) + df = df_orig.copy() + df2 = df + df["A"] += 1 + expected = DataFrame({"A": arr.copy() + 1, "B": "foo"}) + tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df2, expected) + assert df._data is df2._data + + df = df_orig.copy() + df2 = df + df["A"] += 1.5 + expected = DataFrame({"A": arr.copy() + 1.5, "B": "foo"}) + tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df2, expected) + assert df._data is df2._data + + @pytest.mark.parametrize( + "op", + [ + "add", + "and", + "div", + "floordiv", + "mod", + "mul", + "or", + "pow", + "sub", + "truediv", + "xor", + ], + ) + def test_inplace_ops_identity2(self, op): + + if op == "div": + return + + df = DataFrame({"a": [1.0, 2.0, 3.0], "b": [1, 2, 3]}) + + operand = 2 + if op in ("and", "or", "xor"): + # cannot use floats for boolean ops + df["a"] = [True, False, True] + + df_copy = df.copy() + iop = "__i{}__".format(op) + op = "__{}__".format(op) + + # no id change and value is correct + getattr(df, iop)(operand) + expected = getattr(df_copy, op)(operand) + tm.assert_frame_equal(df, expected) + expected = id(df) + assert id(df) == expected + + def test_alignment_non_pandas(self): + index = ["A", "B", "C"] + columns = ["X", "Y", "Z"] + df = pd.DataFrame(np.random.randn(3, 3), index=index, columns=columns) + + align = pd.core.ops._align_method_FRAME + for val in [ + [1, 2, 3], + (1, 2, 3), + np.array([1, 2, 3], dtype=np.int64), + range(1, 4), + ]: + + tm.assert_series_equal( + align(df, val, "index"), Series([1, 2, 3], index=df.index) + ) + tm.assert_series_equal( + align(df, val, "columns"), Series([1, 2, 3], index=df.columns) + ) + + # length mismatch + msg = "Unable to coerce to Series, length must be 3: given 2" + for val in [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)]: + + with pytest.raises(ValueError, match=msg): + align(df, val, "index") + + with pytest.raises(ValueError, match=msg): + align(df, val, "columns") + + val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + tm.assert_frame_equal( + align(df, val, "index"), DataFrame(val, index=df.index, columns=df.columns) + ) + tm.assert_frame_equal( + align(df, val, "columns"), + DataFrame(val, index=df.index, columns=df.columns), + ) + + # shape mismatch + msg = "Unable to coerce to DataFrame, shape must be" + val = np.array([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(ValueError, match=msg): + align(df, val, "index") + + with pytest.raises(ValueError, match=msg): + align(df, val, "columns") + + val = np.zeros((3, 3, 3)) + with pytest.raises(ValueError): + align(df, val, "index") + with pytest.raises(ValueError): + align(df, val, "columns") + + def test_no_warning(self, all_arithmetic_operators): + df = pd.DataFrame({"A": [0.0, 0.0], "B": [0.0, None]}) + b = df["B"] + with tm.assert_produces_warning(None): + getattr(df, all_arithmetic_operators)(b, 0) diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py new file mode 100644 index 00000000..a6b2b334 --- /dev/null +++ b/pandas/tests/frame/test_period.py @@ -0,0 +1,156 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + PeriodIndex, + Timedelta, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + + +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestPeriodIndex: + def test_as_frame_columns(self): + rng = period_range("1/1/2000", periods=5) + df = DataFrame(np.random.randn(10, 5), columns=rng) + + ts = df[rng[0]] + tm.assert_series_equal(ts, df.iloc[:, 0]) + + # GH # 1211 + repr(df) + + ts = df["1/1/2000"] + tm.assert_series_equal(ts, df.iloc[:, 0]) + + def test_frame_setitem(self): + rng = period_range("1/1/2000", periods=5, name="index") + df = DataFrame(np.random.randn(5, 3), index=rng) + + df["Index"] = rng + rs = Index(df["Index"]) + tm.assert_index_equal(rs, rng, check_names=False) + assert rs.name == "Index" + assert rng.name == "index" + + rs = df.reset_index().set_index("index") + assert isinstance(rs.index, PeriodIndex) + tm.assert_index_equal(rs.index, rng) + + def test_frame_to_time_stamp(self): + K = 5 + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + df = DataFrame(np.random.randn(len(index), K), index=index) + df["mix"] = "a" + + exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") + exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + result = df.to_timestamp("D", "end") + tm.assert_index_equal(result.index, exp_index) + tm.assert_numpy_array_equal(result.values, df.values) + + exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") + result = df.to_timestamp("D", "start") + tm.assert_index_equal(result.index, exp_index) + + def _get_with_delta(delta, freq="A-DEC"): + return date_range( + to_datetime("1/1/2001") + delta, + to_datetime("12/31/2009") + delta, + freq=freq, + ) + + delta = timedelta(hours=23) + result = df.to_timestamp("H", "end") + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = df.to_timestamp("T", "end") + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + result = df.to_timestamp("S", "end") + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + # columns + df = df.T + + exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") + exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + result = df.to_timestamp("D", "end", axis=1) + tm.assert_index_equal(result.columns, exp_index) + tm.assert_numpy_array_equal(result.values, df.values) + + exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") + result = df.to_timestamp("D", "start", axis=1) + tm.assert_index_equal(result.columns, exp_index) + + delta = timedelta(hours=23) + result = df.to_timestamp("H", "end", axis=1) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + tm.assert_index_equal(result.columns, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = df.to_timestamp("T", "end", axis=1) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + tm.assert_index_equal(result.columns, exp_index) + + result = df.to_timestamp("S", "end", axis=1) + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result.columns, exp_index) + + # invalid axis + with pytest.raises(ValueError, match="axis"): + df.to_timestamp(axis=2) + + result1 = df.to_timestamp("5t", axis=1) + result2 = df.to_timestamp("t", axis=1) + expected = pd.date_range("2001-01-01", "2009-01-01", freq="AS") + assert isinstance(result1.columns, DatetimeIndex) + assert isinstance(result2.columns, DatetimeIndex) + tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) + tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) + # PeriodIndex.to_timestamp always use 'infer' + assert result1.columns.freqstr == "AS-JAN" + assert result2.columns.freqstr == "AS-JAN" + + def test_frame_index_to_string(self): + index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M") + frame = DataFrame(np.random.randn(3, 4), index=index) + + # it works! + frame.to_string() + + def test_align_frame(self): + rng = period_range("1/1/2000", "1/1/2010", freq="A") + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts + ts[::2] + expected = ts + ts + expected.values[1::2] = np.nan + tm.assert_frame_equal(result, expected) + + result = ts + _permute(ts[::2]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py new file mode 100644 index 00000000..703e0599 --- /dev/null +++ b/pandas/tests/frame/test_query_eval.py @@ -0,0 +1,1178 @@ +from io import StringIO +import operator + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series, date_range +import pandas._testing as tm +from pandas.core.computation.check import _NUMEXPR_INSTALLED + +PARSERS = "python", "pandas" +ENGINES = "python", pytest.param("numexpr", marks=td.skip_if_no_ne) + + +@pytest.fixture(params=PARSERS, ids=lambda x: x) +def parser(request): + return request.param + + +@pytest.fixture(params=ENGINES, ids=lambda x: x) +def engine(request): + return request.param + + +def skip_if_no_pandas_parser(parser): + if parser != "pandas": + pytest.skip(f"cannot evaluate with parser {repr(parser)}") + + +class TestCompat: + def setup_method(self, method): + self.df = DataFrame({"A": [1, 2, 3]}) + self.expected1 = self.df[self.df.A > 0] + self.expected2 = self.df.A + 1 + + def test_query_default(self): + + # GH 12749 + # this should always work, whether _NUMEXPR_INSTALLED or not + df = self.df + result = df.query("A>0") + tm.assert_frame_equal(result, self.expected1) + result = df.eval("A+1") + tm.assert_series_equal(result, self.expected2, check_names=False) + + def test_query_None(self): + + df = self.df + result = df.query("A>0", engine=None) + tm.assert_frame_equal(result, self.expected1) + result = df.eval("A+1", engine=None) + tm.assert_series_equal(result, self.expected2, check_names=False) + + def test_query_python(self): + + df = self.df + result = df.query("A>0", engine="python") + tm.assert_frame_equal(result, self.expected1) + result = df.eval("A+1", engine="python") + tm.assert_series_equal(result, self.expected2, check_names=False) + + def test_query_numexpr(self): + + df = self.df + if _NUMEXPR_INSTALLED: + result = df.query("A>0", engine="numexpr") + tm.assert_frame_equal(result, self.expected1) + result = df.eval("A+1", engine="numexpr") + tm.assert_series_equal(result, self.expected2, check_names=False) + else: + with pytest.raises(ImportError): + df.query("A>0", engine="numexpr") + with pytest.raises(ImportError): + df.eval("A+1", engine="numexpr") + + +class TestDataFrameEval: + def test_ops(self): + + # tst ops and reversed ops in evaluation + # GH7198 + + # smaller hits python, larger hits numexpr + for n in [4, 4000]: + + df = DataFrame(1, index=range(n), columns=list("abcd")) + df.iloc[0] = 2 + m = df.mean() + + for op_str, op, rop in [ + ("+", "__add__", "__radd__"), + ("-", "__sub__", "__rsub__"), + ("*", "__mul__", "__rmul__"), + ("/", "__truediv__", "__rtruediv__"), + ]: + + base = DataFrame( # noqa + np.tile(m.values, n).reshape(n, -1), columns=list("abcd") + ) + + expected = eval("base{op}df".format(op=op_str)) + + # ops as strings + result = eval("m{op}df".format(op=op_str)) + tm.assert_frame_equal(result, expected) + + # these are commutative + if op in ["+", "*"]: + result = getattr(df, op)(m) + tm.assert_frame_equal(result, expected) + + # these are not + elif op in ["-", "/"]: + result = getattr(df, rop)(m) + tm.assert_frame_equal(result, expected) + + # GH7192: Note we need a large number of rows to ensure this + # goes through the numexpr path + df = DataFrame(dict(A=np.random.randn(25000))) + df.iloc[0:5] = np.nan + expected = 1 - np.isnan(df.iloc[0:25]) + result = (1 - np.isnan(df)).iloc[0:25] + tm.assert_frame_equal(result, expected) + + def test_query_non_str(self): + # GH 11485 + df = pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "b"]}) + + msg = "expr must be a string to be evaluated" + with pytest.raises(ValueError, match=msg): + df.query(lambda x: x.B == "b") + + with pytest.raises(ValueError, match=msg): + df.query(111) + + def test_query_empty_string(self): + # GH 13139 + df = pd.DataFrame({"A": [1, 2, 3]}) + + msg = "expr cannot be an empty string" + with pytest.raises(ValueError, match=msg): + df.query("") + + def test_eval_resolvers_as_list(self): + # GH 14095 + df = DataFrame(np.random.randn(10, 2), columns=list("ab")) + dict1 = {"a": 1} + dict2 = {"b": 2} + assert df.eval("a + b", resolvers=[dict1, dict2]) == dict1["a"] + dict2["b"] + assert pd.eval("a + b", resolvers=[dict1, dict2]) == dict1["a"] + dict2["b"] + + +class TestDataFrameQueryWithMultiIndex: + def test_query_with_named_multiindex(self, parser, engine): + skip_if_no_pandas_parser(parser) + a = np.random.choice(["red", "green"], size=10) + b = np.random.choice(["eggs", "ham"], size=10) + index = MultiIndex.from_arrays([a, b], names=["color", "food"]) + df = DataFrame(np.random.randn(10, 2), index=index) + ind = Series( + df.index.get_level_values("color").values, index=index, name="color" + ) + + # equality + res1 = df.query('color == "red"', parser=parser, engine=engine) + res2 = df.query('"red" == color', parser=parser, engine=engine) + exp = df[ind == "red"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # inequality + res1 = df.query('color != "red"', parser=parser, engine=engine) + res2 = df.query('"red" != color', parser=parser, engine=engine) + exp = df[ind != "red"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # list equality (really just set membership) + res1 = df.query('color == ["red"]', parser=parser, engine=engine) + res2 = df.query('["red"] == color', parser=parser, engine=engine) + exp = df[ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('color != ["red"]', parser=parser, engine=engine) + res2 = df.query('["red"] != color', parser=parser, engine=engine) + exp = df[~ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # in/not in ops + res1 = df.query('["red"] in color', parser=parser, engine=engine) + res2 = df.query('"red" in color', parser=parser, engine=engine) + exp = df[ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('["red"] not in color', parser=parser, engine=engine) + res2 = df.query('"red" not in color', parser=parser, engine=engine) + exp = df[~ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + def test_query_with_unnamed_multiindex(self, parser, engine): + skip_if_no_pandas_parser(parser) + a = np.random.choice(["red", "green"], size=10) + b = np.random.choice(["eggs", "ham"], size=10) + index = MultiIndex.from_arrays([a, b]) + df = DataFrame(np.random.randn(10, 2), index=index) + ind = Series(df.index.get_level_values(0).values, index=index) + + res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine) + res2 = df.query('"red" == ilevel_0', parser=parser, engine=engine) + exp = df[ind == "red"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # inequality + res1 = df.query('ilevel_0 != "red"', parser=parser, engine=engine) + res2 = df.query('"red" != ilevel_0', parser=parser, engine=engine) + exp = df[ind != "red"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # list equality (really just set membership) + res1 = df.query('ilevel_0 == ["red"]', parser=parser, engine=engine) + res2 = df.query('["red"] == ilevel_0', parser=parser, engine=engine) + exp = df[ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('ilevel_0 != ["red"]', parser=parser, engine=engine) + res2 = df.query('["red"] != ilevel_0', parser=parser, engine=engine) + exp = df[~ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # in/not in ops + res1 = df.query('["red"] in ilevel_0', parser=parser, engine=engine) + res2 = df.query('"red" in ilevel_0', parser=parser, engine=engine) + exp = df[ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('["red"] not in ilevel_0', parser=parser, engine=engine) + res2 = df.query('"red" not in ilevel_0', parser=parser, engine=engine) + exp = df[~ind.isin(["red"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # ## LEVEL 1 + ind = Series(df.index.get_level_values(1).values, index=index) + res1 = df.query('ilevel_1 == "eggs"', parser=parser, engine=engine) + res2 = df.query('"eggs" == ilevel_1', parser=parser, engine=engine) + exp = df[ind == "eggs"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # inequality + res1 = df.query('ilevel_1 != "eggs"', parser=parser, engine=engine) + res2 = df.query('"eggs" != ilevel_1', parser=parser, engine=engine) + exp = df[ind != "eggs"] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # list equality (really just set membership) + res1 = df.query('ilevel_1 == ["eggs"]', parser=parser, engine=engine) + res2 = df.query('["eggs"] == ilevel_1', parser=parser, engine=engine) + exp = df[ind.isin(["eggs"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('ilevel_1 != ["eggs"]', parser=parser, engine=engine) + res2 = df.query('["eggs"] != ilevel_1', parser=parser, engine=engine) + exp = df[~ind.isin(["eggs"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + # in/not in ops + res1 = df.query('["eggs"] in ilevel_1', parser=parser, engine=engine) + res2 = df.query('"eggs" in ilevel_1', parser=parser, engine=engine) + exp = df[ind.isin(["eggs"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + res1 = df.query('["eggs"] not in ilevel_1', parser=parser, engine=engine) + res2 = df.query('"eggs" not in ilevel_1', parser=parser, engine=engine) + exp = df[~ind.isin(["eggs"])] + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) + + def test_query_with_partially_named_multiindex(self, parser, engine): + skip_if_no_pandas_parser(parser) + a = np.random.choice(["red", "green"], size=10) + b = np.arange(10) + index = MultiIndex.from_arrays([a, b]) + index.names = [None, "rating"] + df = DataFrame(np.random.randn(10, 2), index=index) + res = df.query("rating == 1", parser=parser, engine=engine) + ind = Series( + df.index.get_level_values("rating").values, index=index, name="rating" + ) + exp = df[ind == 1] + tm.assert_frame_equal(res, exp) + + res = df.query("rating != 1", parser=parser, engine=engine) + ind = Series( + df.index.get_level_values("rating").values, index=index, name="rating" + ) + exp = df[ind != 1] + tm.assert_frame_equal(res, exp) + + res = df.query('ilevel_0 == "red"', parser=parser, engine=engine) + ind = Series(df.index.get_level_values(0).values, index=index) + exp = df[ind == "red"] + tm.assert_frame_equal(res, exp) + + res = df.query('ilevel_0 != "red"', parser=parser, engine=engine) + ind = Series(df.index.get_level_values(0).values, index=index) + exp = df[ind != "red"] + tm.assert_frame_equal(res, exp) + + def test_query_multiindex_get_index_resolvers(self): + df = tm.makeCustomDataframe( + 10, 3, r_idx_nlevels=2, r_idx_names=["spam", "eggs"] + ) + resolvers = df._get_index_resolvers() + + def to_series(mi, level): + level_values = mi.get_level_values(level) + s = level_values.to_series() + s.index = mi + return s + + col_series = df.columns.to_series() + expected = { + "index": df.index, + "columns": col_series, + "spam": to_series(df.index, "spam"), + "eggs": to_series(df.index, "eggs"), + "C0": col_series, + } + for k, v in resolvers.items(): + if isinstance(v, Index): + assert v.is_(expected[k]) + elif isinstance(v, Series): + tm.assert_series_equal(v, expected[k]) + else: + raise AssertionError("object must be a Series or Index") + + +@td.skip_if_no_ne +class TestDataFrameQueryNumExprPandas: + @classmethod + def setup_class(cls): + cls.engine = "numexpr" + cls.parser = "pandas" + + @classmethod + def teardown_class(cls): + del cls.engine, cls.parser + + def test_date_query_with_attribute_access(self): + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + df = DataFrame(np.random.randn(5, 3)) + df["dates1"] = date_range("1/1/2012", periods=5) + df["dates2"] = date_range("1/1/2013", periods=5) + df["dates3"] = date_range("1/1/2014", periods=5) + res = df.query( + "@df.dates1 < 20130101 < @df.dates3", engine=engine, parser=parser + ) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_query_no_attribute_access(self): + engine, parser = self.engine, self.parser + df = DataFrame(np.random.randn(5, 3)) + df["dates1"] = date_range("1/1/2012", periods=5) + df["dates2"] = date_range("1/1/2013", periods=5) + df["dates3"] = date_range("1/1/2014", periods=5) + res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_query_with_NaT(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates2"] = date_range("1/1/2013", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT + res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.set_index("dates1", inplace=True, drop=True) + res = df.query("index < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query_with_NaT(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.iloc[0, 0] = pd.NaT + df.set_index("dates1", inplace=True, drop=True) + res = df.query("index < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query_with_NaT_duplicates(self): + engine, parser = self.engine, self.parser + n = 10 + d = {} + d["dates1"] = date_range("1/1/2012", periods=n) + d["dates3"] = date_range("1/1/2014", periods=n) + df = DataFrame(d) + df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + df.set_index("dates1", inplace=True, drop=True) + res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) + expec = df[(df.index.to_series() < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_query_with_non_date(self): + engine, parser = self.engine, self.parser + + n = 10 + df = DataFrame( + {"dates": date_range("1/1/2012", periods=n), "nondate": np.arange(n)} + ) + + result = df.query("dates == nondate", parser=parser, engine=engine) + assert len(result) == 0 + + result = df.query("dates != nondate", parser=parser, engine=engine) + tm.assert_frame_equal(result, df) + + for op in ["<", ">", "<=", ">="]: + with pytest.raises(TypeError): + df.query( + "dates {op} nondate".format(op=op), parser=parser, engine=engine + ) + + def test_query_syntax_error(self): + engine, parser = self.engine, self.parser + df = DataFrame({"i": range(10), "+": range(3, 13), "r": range(4, 14)}) + with pytest.raises(SyntaxError): + df.query("i - +", engine=engine, parser=parser) + + def test_query_scope(self): + from pandas.core.computation.ops import UndefinedVariableError + + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + + df = DataFrame(np.random.randn(20, 2), columns=list("ab")) + + a, b = 1, 2 # noqa + res = df.query("a > b", engine=engine, parser=parser) + expected = df[df.a > df.b] + tm.assert_frame_equal(res, expected) + + res = df.query("@a > b", engine=engine, parser=parser) + expected = df[a > df.b] + tm.assert_frame_equal(res, expected) + + # no local variable c + with pytest.raises( + UndefinedVariableError, match="local variable 'c' is not defined" + ): + df.query("@a > b > @c", engine=engine, parser=parser) + + # no column named 'c' + with pytest.raises(UndefinedVariableError, match="name 'c' is not defined"): + df.query("@a > b > c", engine=engine, parser=parser) + + def test_query_doesnt_pickup_local(self): + from pandas.core.computation.ops import UndefinedVariableError + + engine, parser = self.engine, self.parser + n = m = 10 + df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + + # we don't pick up the local 'sin' + with pytest.raises(UndefinedVariableError, match="name 'sin' is not defined"): + df.query("sin > 5", engine=engine, parser=parser) + + def test_query_builtin(self): + from pandas.core.computation.engines import NumExprClobberingError + + engine, parser = self.engine, self.parser + + n = m = 10 + df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + + df.index.name = "sin" + msg = "Variables in expression.+" + with pytest.raises(NumExprClobberingError, match=msg): + df.query("sin > 5", engine=engine, parser=parser) + + def test_query(self): + engine, parser = self.engine, self.parser + df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + + tm.assert_frame_equal( + df.query("a < b", engine=engine, parser=parser), df[df.a < df.b] + ) + tm.assert_frame_equal( + df.query("a + b > b * c", engine=engine, parser=parser), + df[df.a + df.b > df.b * df.c], + ) + + def test_query_index_with_name(self): + engine, parser = self.engine, self.parser + df = DataFrame( + np.random.randint(10, size=(10, 3)), + index=Index(range(10), name="blob"), + columns=["a", "b", "c"], + ) + res = df.query("(blob < 5) & (a < b)", engine=engine, parser=parser) + expec = df[(df.index < 5) & (df.a < df.b)] + tm.assert_frame_equal(res, expec) + + res = df.query("blob < b", engine=engine, parser=parser) + expec = df[df.index < df.b] + + tm.assert_frame_equal(res, expec) + + def test_query_index_without_name(self): + engine, parser = self.engine, self.parser + df = DataFrame( + np.random.randint(10, size=(10, 3)), + index=range(10), + columns=["a", "b", "c"], + ) + + # "index" should refer to the index + res = df.query("index < b", engine=engine, parser=parser) + expec = df[df.index < df.b] + tm.assert_frame_equal(res, expec) + + # test against a scalar + res = df.query("index < 5", engine=engine, parser=parser) + expec = df[df.index < 5] + tm.assert_frame_equal(res, expec) + + def test_nested_scope(self): + engine = self.engine + parser = self.parser + + skip_if_no_pandas_parser(parser) + + df = DataFrame(np.random.randn(5, 3)) + df2 = DataFrame(np.random.randn(5, 3)) + expected = df[(df > 0) & (df2 > 0)] + + result = df.query("(@df > 0) & (@df2 > 0)", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + result = pd.eval("df[df > 0 and df2 > 0]", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + result = pd.eval( + "df[df > 0 and df2 > 0 and df[df > 0] > 0]", engine=engine, parser=parser + ) + expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)] + tm.assert_frame_equal(result, expected) + + result = pd.eval("df[(df>0) & (df2>0)]", engine=engine, parser=parser) + expected = df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser) + tm.assert_frame_equal(result, expected) + + def test_nested_raises_on_local_self_reference(self): + from pandas.core.computation.ops import UndefinedVariableError + + df = DataFrame(np.random.randn(5, 3)) + + # can't reference ourself b/c we're a local so @ is necessary + with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"): + df.query("df > 0", engine=self.engine, parser=self.parser) + + def test_local_syntax(self): + skip_if_no_pandas_parser(self.parser) + + engine, parser = self.engine, self.parser + df = DataFrame(np.random.randn(100, 10), columns=list("abcdefghij")) + b = 1 + expect = df[df.a < b] + result = df.query("a < @b", engine=engine, parser=parser) + tm.assert_frame_equal(result, expect) + + expect = df[df.a < df.b] + result = df.query("a < b", engine=engine, parser=parser) + tm.assert_frame_equal(result, expect) + + def test_chained_cmp_and_in(self): + skip_if_no_pandas_parser(self.parser) + engine, parser = self.engine, self.parser + cols = list("abc") + df = DataFrame(np.random.randn(100, len(cols)), columns=cols) + res = df.query( + "a < b < c and a not in b not in c", engine=engine, parser=parser + ) + ind = ( + (df.a < df.b) & (df.b < df.c) & ~df.b.isin(df.a) & ~df.c.isin(df.b) + ) # noqa + expec = df[ind] + tm.assert_frame_equal(res, expec) + + def test_local_variable_with_in(self): + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + a = Series(np.random.randint(3, size=15), name="a") + b = Series(np.random.randint(10, size=15), name="b") + df = DataFrame({"a": a, "b": b}) + + expected = df.loc[(df.b - 1).isin(a)] + result = df.query("b - 1 in a", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + b = Series(np.random.randint(10, size=15), name="b") + expected = df.loc[(b - 1).isin(a)] + result = df.query("@b - 1 in a", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + def test_at_inside_string(self): + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + c = 1 # noqa + df = DataFrame({"a": ["a", "a", "b", "b", "@c", "@c"]}) + result = df.query('a == "@c"', engine=engine, parser=parser) + expected = df[df.a == "@c"] + tm.assert_frame_equal(result, expected) + + def test_query_undefined_local(self): + from pandas.core.computation.ops import UndefinedVariableError + + engine, parser = self.engine, self.parser + skip_if_no_pandas_parser(parser) + + df = DataFrame(np.random.rand(10, 2), columns=list("ab")) + with pytest.raises( + UndefinedVariableError, match="local variable 'c' is not defined" + ): + df.query("a == @c", engine=engine, parser=parser) + + def test_index_resolvers_come_after_columns_with_the_same_name(self): + n = 1 # noqa + a = np.r_[20:101:20] + + df = DataFrame({"index": a, "b": np.random.randn(a.size)}) + df.index.name = "index" + result = df.query("index > 5", engine=self.engine, parser=self.parser) + expected = df[df["index"] > 5] + tm.assert_frame_equal(result, expected) + + df = DataFrame({"index": a, "b": np.random.randn(a.size)}) + result = df.query("ilevel_0 > 5", engine=self.engine, parser=self.parser) + expected = df.loc[df.index[df.index > 5]] + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": a, "b": np.random.randn(a.size)}) + df.index.name = "a" + result = df.query("a > 5", engine=self.engine, parser=self.parser) + expected = df[df.a > 5] + tm.assert_frame_equal(result, expected) + + result = df.query("index > 5", engine=self.engine, parser=self.parser) + expected = df.loc[df.index[df.index > 5]] + tm.assert_frame_equal(result, expected) + + def test_inf(self): + n = 10 + df = DataFrame({"a": np.random.rand(n), "b": np.random.rand(n)}) + df.loc[::2, 0] = np.inf + ops = "==", "!=" + d = dict(zip(ops, (operator.eq, operator.ne))) + for op, f in d.items(): + q = "a {op} inf".format(op=op) + expected = df[f(df.a, np.inf)] + result = df.query(q, engine=self.engine, parser=self.parser) + tm.assert_frame_equal(result, expected) + + +@td.skip_if_no_ne +class TestDataFrameQueryNumExprPython(TestDataFrameQueryNumExprPandas): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "numexpr" + cls.parser = "python" + + def test_date_query_no_attribute_access(self): + engine, parser = self.engine, self.parser + df = DataFrame(np.random.randn(5, 3)) + df["dates1"] = date_range("1/1/2012", periods=5) + df["dates2"] = date_range("1/1/2013", periods=5) + df["dates3"] = date_range("1/1/2014", periods=5) + res = df.query( + "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser + ) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_query_with_NaT(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates2"] = date_range("1/1/2013", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT + res = df.query( + "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser + ) + expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.set_index("dates1", inplace=True, drop=True) + res = df.query( + "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser + ) + expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query_with_NaT(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.iloc[0, 0] = pd.NaT + df.set_index("dates1", inplace=True, drop=True) + res = df.query( + "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser + ) + expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] + tm.assert_frame_equal(res, expec) + + def test_date_index_query_with_NaT_duplicates(self): + engine, parser = self.engine, self.parser + n = 10 + df = DataFrame(np.random.randn(n, 3)) + df["dates1"] = date_range("1/1/2012", periods=n) + df["dates3"] = date_range("1/1/2014", periods=n) + df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT + df.set_index("dates1", inplace=True, drop=True) + with pytest.raises(NotImplementedError): + df.query("index < 20130101 < dates3", engine=engine, parser=parser) + + def test_nested_scope(self): + from pandas.core.computation.ops import UndefinedVariableError + + engine = self.engine + parser = self.parser + # smoke test + x = 1 # noqa + result = pd.eval("x + 1", engine=engine, parser=parser) + assert result == 2 + + df = DataFrame(np.random.randn(5, 3)) + df2 = DataFrame(np.random.randn(5, 3)) + + # don't have the pandas parser + with pytest.raises(SyntaxError): + df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser) + + with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"): + df.query("(df>0) & (df2>0)", engine=engine, parser=parser) + + expected = df[(df > 0) & (df2 > 0)] + result = pd.eval("df[(df > 0) & (df2 > 0)]", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)] + result = pd.eval( + "df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]", engine=engine, parser=parser + ) + tm.assert_frame_equal(expected, result) + + +class TestDataFrameQueryPythonPandas(TestDataFrameQueryNumExprPandas): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = "python" + cls.parser = "pandas" + + def test_query_builtin(self): + engine, parser = self.engine, self.parser + + n = m = 10 + df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + + df.index.name = "sin" + expected = df[df.index > 5] + result = df.query("sin > 5", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + +class TestDataFrameQueryPythonPython(TestDataFrameQueryNumExprPython): + @classmethod + def setup_class(cls): + super().setup_class() + cls.engine = cls.parser = "python" + + def test_query_builtin(self): + engine, parser = self.engine, self.parser + + n = m = 10 + df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc")) + + df.index.name = "sin" + expected = df[df.index > 5] + result = df.query("sin > 5", engine=engine, parser=parser) + tm.assert_frame_equal(expected, result) + + +class TestDataFrameQueryStrings: + def test_str_query_method(self, parser, engine): + df = DataFrame(np.random.randn(10, 1), columns=["b"]) + df["strings"] = Series(list("aabbccddee")) + expect = df[df.strings == "a"] + + if parser != "pandas": + col = "strings" + lst = '"a"' + + lhs = [col] * 2 + [lst] * 2 + rhs = lhs[::-1] + + eq, ne = "==", "!=" + ops = 2 * ([eq] + [ne]) + + for lhs, op, rhs in zip(lhs, ops, rhs): + ex = "{lhs} {op} {rhs}".format(lhs=lhs, op=op, rhs=rhs) + msg = r"'(Not)?In' nodes are not implemented" + with pytest.raises(NotImplementedError, match=msg): + df.query( + ex, + engine=engine, + parser=parser, + local_dict={"strings": df.strings}, + ) + else: + res = df.query('"a" == strings', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + res = df.query('strings == "a"', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + tm.assert_frame_equal(res, df[df.strings.isin(["a"])]) + + expect = df[df.strings != "a"] + res = df.query('strings != "a"', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + res = df.query('"a" != strings', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + tm.assert_frame_equal(res, df[~df.strings.isin(["a"])]) + + def test_str_list_query_method(self, parser, engine): + df = DataFrame(np.random.randn(10, 1), columns=["b"]) + df["strings"] = Series(list("aabbccddee")) + expect = df[df.strings.isin(["a", "b"])] + + if parser != "pandas": + col = "strings" + lst = '["a", "b"]' + + lhs = [col] * 2 + [lst] * 2 + rhs = lhs[::-1] + + eq, ne = "==", "!=" + ops = 2 * ([eq] + [ne]) + + for lhs, op, rhs in zip(lhs, ops, rhs): + ex = "{lhs} {op} {rhs}".format(lhs=lhs, op=op, rhs=rhs) + with pytest.raises(NotImplementedError): + df.query(ex, engine=engine, parser=parser) + else: + res = df.query('strings == ["a", "b"]', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + res = df.query('["a", "b"] == strings', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + expect = df[~df.strings.isin(["a", "b"])] + + res = df.query('strings != ["a", "b"]', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + res = df.query('["a", "b"] != strings', engine=engine, parser=parser) + tm.assert_frame_equal(res, expect) + + def test_query_with_string_columns(self, parser, engine): + df = DataFrame( + { + "a": list("aaaabbbbcccc"), + "b": list("aabbccddeeff"), + "c": np.random.randint(5, size=12), + "d": np.random.randint(9, size=12), + } + ) + if parser == "pandas": + res = df.query("a in b", parser=parser, engine=engine) + expec = df[df.a.isin(df.b)] + tm.assert_frame_equal(res, expec) + + res = df.query("a in b and c < d", parser=parser, engine=engine) + expec = df[df.a.isin(df.b) & (df.c < df.d)] + tm.assert_frame_equal(res, expec) + else: + with pytest.raises(NotImplementedError): + df.query("a in b", parser=parser, engine=engine) + + with pytest.raises(NotImplementedError): + df.query("a in b and c < d", parser=parser, engine=engine) + + def test_object_array_eq_ne(self, parser, engine): + df = DataFrame( + { + "a": list("aaaabbbbcccc"), + "b": list("aabbccddeeff"), + "c": np.random.randint(5, size=12), + "d": np.random.randint(9, size=12), + } + ) + res = df.query("a == b", parser=parser, engine=engine) + exp = df[df.a == df.b] + tm.assert_frame_equal(res, exp) + + res = df.query("a != b", parser=parser, engine=engine) + exp = df[df.a != df.b] + tm.assert_frame_equal(res, exp) + + def test_query_with_nested_strings(self, parser, engine): + skip_if_no_pandas_parser(parser) + raw = """id event timestamp + 1 "page 1 load" 1/1/2014 0:00:01 + 1 "page 1 exit" 1/1/2014 0:00:31 + 2 "page 2 load" 1/1/2014 0:01:01 + 2 "page 2 exit" 1/1/2014 0:01:31 + 3 "page 3 load" 1/1/2014 0:02:01 + 3 "page 3 exit" 1/1/2014 0:02:31 + 4 "page 1 load" 2/1/2014 1:00:01 + 4 "page 1 exit" 2/1/2014 1:00:31 + 5 "page 2 load" 2/1/2014 1:01:01 + 5 "page 2 exit" 2/1/2014 1:01:31 + 6 "page 3 load" 2/1/2014 1:02:01 + 6 "page 3 exit" 2/1/2014 1:02:31 + """ + df = pd.read_csv( + StringIO(raw), sep=r"\s{2,}", engine="python", parse_dates=["timestamp"] + ) + expected = df[df.event == '"page 1 load"'] + res = df.query("""'"page 1 load"' in event""", parser=parser, engine=engine) + tm.assert_frame_equal(expected, res) + + def test_query_with_nested_special_character(self, parser, engine): + skip_if_no_pandas_parser(parser) + df = DataFrame({"a": ["a", "b", "test & test"], "b": [1, 2, 3]}) + res = df.query('a == "test & test"', parser=parser, engine=engine) + expec = df[df.a == "test & test"] + tm.assert_frame_equal(res, expec) + + def test_query_lex_compare_strings(self, parser, engine): + + a = Series(np.random.choice(list("abcde"), 20)) + b = Series(np.arange(a.size)) + df = DataFrame({"X": a, "Y": b}) + + ops = {"<": operator.lt, ">": operator.gt, "<=": operator.le, ">=": operator.ge} + + for op, func in ops.items(): + res = df.query(f'X {op} "d"', engine=engine, parser=parser) + expected = df[func(df.X, "d")] + tm.assert_frame_equal(res, expected) + + def test_query_single_element_booleans(self, parser, engine): + columns = "bid", "bidsize", "ask", "asksize" + data = np.random.randint(2, size=(1, len(columns))).astype(bool) + df = DataFrame(data, columns=columns) + res = df.query("bid & ask", engine=engine, parser=parser) + expected = df[df.bid & df.ask] + tm.assert_frame_equal(res, expected) + + def test_query_string_scalar_variable(self, parser, engine): + skip_if_no_pandas_parser(parser) + df = pd.DataFrame( + { + "Symbol": ["BUD US", "BUD US", "IBM US", "IBM US"], + "Price": [109.70, 109.72, 183.30, 183.35], + } + ) + e = df[df.Symbol == "BUD US"] + symb = "BUD US" # noqa + r = df.query("Symbol == @symb", parser=parser, engine=engine) + tm.assert_frame_equal(e, r) + + +class TestDataFrameEvalWithFrame: + def setup_method(self, method): + self.frame = DataFrame(np.random.randn(10, 3), columns=list("abc")) + + def teardown_method(self, method): + del self.frame + + def test_simple_expr(self, parser, engine): + res = self.frame.eval("a + b", engine=engine, parser=parser) + expect = self.frame.a + self.frame.b + tm.assert_series_equal(res, expect) + + def test_bool_arith_expr(self, parser, engine): + res = self.frame.eval("a[a < 1] + b", engine=engine, parser=parser) + expect = self.frame.a[self.frame.a < 1] + self.frame.b + tm.assert_series_equal(res, expect) + + @pytest.mark.parametrize("op", ["+", "-", "*", "/"]) + def test_invalid_type_for_operator_raises(self, parser, engine, op): + df = DataFrame({"a": [1, 2], "b": ["c", "d"]}) + msg = r"unsupported operand type\(s\) for .+: '.+' and '.+'" + + with pytest.raises(TypeError, match=msg): + df.eval("a {0} b".format(op), engine=engine, parser=parser) + + +class TestDataFrameQueryBacktickQuoting: + @pytest.fixture(scope="class") + def df(self): + """ + Yields a dataframe with strings that may or may not need escaping + by backticks. The last two columns cannot be escaped by backticks + and should raise a ValueError. + """ + yield DataFrame( + { + "A": [1, 2, 3], + "B B": [3, 2, 1], + "C C": [4, 5, 6], + "C C": [7, 4, 3], + "C_C": [8, 9, 10], + "D_D D": [11, 1, 101], + "E.E": [6, 3, 5], + "F-F": [8, 1, 10], + "1e1": [2, 4, 8], + "def": [10, 11, 2], + "A (x)": [4, 1, 3], + "B(x)": [1, 1, 5], + "B (x)": [2, 7, 4], + " &^ :!€$?(} > <++*'' ": [2, 5, 6], + "": [10, 11, 1], + " A": [4, 7, 9], + " ": [1, 2, 1], + "it's": [6, 3, 1], + "that's": [9, 1, 8], + "☺": [8, 7, 6], + "foo#bar": [2, 4, 5], + 1: [5, 7, 9], + } + ) + + def test_single_backtick_variable_query(self, df): + res = df.query("1 < `B B`") + expect = df[1 < df["B B"]] + tm.assert_frame_equal(res, expect) + + def test_two_backtick_variables_query(self, df): + res = df.query("1 < `B B` and 4 < `C C`") + expect = df[(1 < df["B B"]) & (4 < df["C C"])] + tm.assert_frame_equal(res, expect) + + def test_single_backtick_variable_expr(self, df): + res = df.eval("A + `B B`") + expect = df["A"] + df["B B"] + tm.assert_series_equal(res, expect) + + def test_two_backtick_variables_expr(self, df): + res = df.eval("`B B` + `C C`") + expect = df["B B"] + df["C C"] + tm.assert_series_equal(res, expect) + + def test_already_underscore_variable(self, df): + res = df.eval("`C_C` + A") + expect = df["C_C"] + df["A"] + tm.assert_series_equal(res, expect) + + def test_same_name_but_underscores(self, df): + res = df.eval("C_C + `C C`") + expect = df["C_C"] + df["C C"] + tm.assert_series_equal(res, expect) + + def test_mixed_underscores_and_spaces(self, df): + res = df.eval("A + `D_D D`") + expect = df["A"] + df["D_D D"] + tm.assert_series_equal(res, expect) + + def test_backtick_quote_name_with_no_spaces(self, df): + res = df.eval("A + `C_C`") + expect = df["A"] + df["C_C"] + tm.assert_series_equal(res, expect) + + def test_special_characters(self, df): + res = df.eval("`E.E` + `F-F` - A") + expect = df["E.E"] + df["F-F"] - df["A"] + tm.assert_series_equal(res, expect) + + def test_start_with_digit(self, df): + res = df.eval("A + `1e1`") + expect = df["A"] + df["1e1"] + tm.assert_series_equal(res, expect) + + def test_keyword(self, df): + res = df.eval("A + `def`") + expect = df["A"] + df["def"] + tm.assert_series_equal(res, expect) + + def test_unneeded_quoting(self, df): + res = df.query("`A` > 2") + expect = df[df["A"] > 2] + tm.assert_frame_equal(res, expect) + + def test_parenthesis(self, df): + res = df.query("`A (x)` > 2") + expect = df[df["A (x)"] > 2] + tm.assert_frame_equal(res, expect) + + def test_empty_string(self, df): + res = df.query("`` > 5") + expect = df[df[""] > 5] + tm.assert_frame_equal(res, expect) + + def test_multiple_spaces(self, df): + res = df.query("`C C` > 5") + expect = df[df["C C"] > 5] + tm.assert_frame_equal(res, expect) + + def test_start_with_spaces(self, df): + res = df.eval("` A` + ` `") + expect = df[" A"] + df[" "] + tm.assert_series_equal(res, expect) + + def test_lots_of_operators_string(self, df): + res = df.query("` &^ :!€$?(} > <++*'' ` > 4") + expect = df[df[" &^ :!€$?(} > <++*'' "] > 4] + tm.assert_frame_equal(res, expect) + + def test_failing_quote(self, df): + with pytest.raises(SyntaxError): + df.query("`it's` > `that's`") + + def test_failing_character_outside_range(self, df): + with pytest.raises(SyntaxError): + df.query("`☺` > 4") + + def test_failing_hashtag(self, df): + with pytest.raises(SyntaxError): + df.query("`foo#bar` > 4") diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py new file mode 100644 index 00000000..05bdec4a --- /dev/null +++ b/pandas/tests/frame/test_repr_info.py @@ -0,0 +1,579 @@ +from datetime import datetime, timedelta +from io import StringIO +import re +import sys +import textwrap +import warnings + +import numpy as np +import pytest + +from pandas.compat import PYPY + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Series, + date_range, + option_context, + period_range, +) +import pandas._testing as tm + +import pandas.io.formats.format as fmt + +# Segregated collection of methods that require the BlockManager internal data +# structure + + +class TestDataFrameReprInfoEtc: + def test_repr_empty(self): + # empty + repr(DataFrame()) + + # empty with index + frame = DataFrame(index=np.arange(1000)) + repr(frame) + + def test_repr_mixed(self, float_string_frame): + buf = StringIO() + + # mixed + repr(float_string_frame) + float_string_frame.info(verbose=False, buf=buf) + + @pytest.mark.slow + def test_repr_mixed_big(self): + # big mixed + biggie = DataFrame( + {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, index=range(200) + ) + biggie.loc[:20, "A"] = np.nan + biggie.loc[:20, "B"] = np.nan + + repr(biggie) + + def test_repr(self, float_frame): + buf = StringIO() + + # small one + repr(float_frame) + float_frame.info(verbose=False, buf=buf) + + # even smaller + float_frame.reindex(columns=["A"]).info(verbose=False, buf=buf) + float_frame.reindex(columns=["A", "B"]).info(verbose=False, buf=buf) + + # exhausting cases in DataFrame.info + + # columns but no index + no_index = DataFrame(columns=[0, 1, 3]) + repr(no_index) + + # no columns or index + DataFrame().info(buf=buf) + + df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"]) + assert "\t" not in repr(df) + assert "\r" not in repr(df) + assert "a\n" not in repr(df) + + def test_repr_dimensions(self): + df = DataFrame([[1, 2], [3, 4]]) + with option_context("display.show_dimensions", True): + assert "2 rows x 2 columns" in repr(df) + + with option_context("display.show_dimensions", False): + assert "2 rows x 2 columns" not in repr(df) + + with option_context("display.show_dimensions", "truncate"): + assert "2 rows x 2 columns" not in repr(df) + + @pytest.mark.slow + def test_repr_big(self): + # big one + biggie = DataFrame(np.zeros((200, 4)), columns=range(4), index=range(200)) + repr(biggie) + + def test_repr_unsortable(self, float_frame): + # columns are not sortable + + warn_filters = warnings.filters + warnings.filterwarnings("ignore", category=FutureWarning, module=".*format") + + unsortable = DataFrame( + { + "foo": [1] * 50, + datetime.today(): [1] * 50, + "bar": ["bar"] * 50, + datetime.today() + timedelta(1): ["bar"] * 50, + }, + index=np.arange(50), + ) + repr(unsortable) + + fmt.set_option("display.precision", 3, "display.column_space", 10) + repr(float_frame) + + fmt.set_option("display.max_rows", 10, "display.max_columns", 2) + repr(float_frame) + + fmt.set_option("display.max_rows", 1000, "display.max_columns", 1000) + repr(float_frame) + + tm.reset_display_options() + + warnings.filters = warn_filters + + def test_repr_unicode(self): + uval = "\u03c3\u03c3\u03c3\u03c3" + + # TODO(wesm): is this supposed to be used? + bval = uval.encode("utf-8") # noqa + + df = DataFrame({"A": [uval, uval]}) + + result = repr(df) + ex_top = " A" + assert result.split("\n")[0].rstrip() == ex_top + + df = DataFrame({"A": [uval, uval]}) + result = repr(df) + assert result.split("\n")[0].rstrip() == ex_top + + def test_unicode_string_with_unicode(self): + df = DataFrame({"A": ["\u05d0"]}) + str(df) + + def test_str_to_bytes_raises(self): + # GH 26447 + df = DataFrame({"A": ["abc"]}) + msg = "^'str' object cannot be interpreted as an integer$" + with pytest.raises(TypeError, match=msg): + bytes(df) + + def test_very_wide_info_repr(self): + df = DataFrame(np.random.randn(10, 20), columns=tm.rands_array(10, 20)) + repr(df) + + def test_repr_column_name_unicode_truncation_bug(self): + # #1906 + df = DataFrame( + { + "Id": [7117434], + "StringCol": ( + "Is it possible to modify drop plot code" + " so that the output graph is displayed " + "in iphone simulator, Is it possible to " + "modify drop plot code so that the " + "output graph is \xe2\x80\xa8displayed " + "in iphone simulator.Now we are adding " + "the CSV file externally. I want to Call" + " the File through the code.." + ), + } + ) + + with option_context("display.max_columns", 20): + assert "StringCol" in repr(df) + + def test_latex_repr(self): + result = r"""\begin{tabular}{llll} +\toprule +{} & 0 & 1 & 2 \\ +\midrule +0 & $\alpha$ & b & c \\ +1 & 1 & 2 & 3 \\ +\bottomrule +\end{tabular} +""" + with option_context("display.latex.escape", False, "display.latex.repr", True): + df = DataFrame([[r"$\alpha$", "b", "c"], [1, 2, 3]]) + assert result == df._repr_latex_() + + # GH 12182 + assert df._repr_latex_() is None + + def test_info(self, float_frame, datetime_frame): + io = StringIO() + float_frame.info(buf=io) + datetime_frame.info(buf=io) + + frame = DataFrame(np.random.randn(5, 3)) + + frame.info() + frame.info(verbose=False) + + def test_info_verbose(self): + buf = StringIO() + size = 1001 + start = 5 + frame = DataFrame(np.random.randn(3, size)) + frame.info(verbose=True, buf=buf) + + res = buf.getvalue() + header = " # Column Dtype \n--- ------ ----- " + assert header in res + + frame.info(verbose=True, buf=buf) + buf.seek(0) + lines = buf.readlines() + assert len(lines) > 0 + + for i, line in enumerate(lines): + if i >= start and i < start + size: + index = i - start + line_nr = " {} ".format(index) + assert line.startswith(line_nr) + + def test_info_memory(self): + # https://github.com/pandas-dev/pandas/issues/21056 + df = pd.DataFrame({"a": pd.Series([1, 2], dtype="i8")}) + buf = StringIO() + df.info(buf=buf) + result = buf.getvalue() + bytes = float(df.memory_usage().sum()) + + expected = textwrap.dedent( + """\ + + RangeIndex: 2 entries, 0 to 1 + Data columns (total 1 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 a 2 non-null int64 + dtypes: int64(1) + memory usage: {} bytes + """.format( + bytes + ) + ) + + assert result == expected + + def test_info_wide(self): + from pandas import set_option, reset_option + + io = StringIO() + df = DataFrame(np.random.randn(5, 101)) + df.info(buf=io) + + io = StringIO() + df.info(buf=io, max_cols=101) + rs = io.getvalue() + assert len(rs.splitlines()) > 100 + xp = rs + + set_option("display.max_info_columns", 101) + io = StringIO() + df.info(buf=io) + assert rs == xp + reset_option("display.max_info_columns") + + def test_info_duplicate_columns(self): + io = StringIO() + + # it works! + frame = DataFrame(np.random.randn(1500, 4), columns=["a", "a", "b", "b"]) + frame.info(buf=io) + + def test_info_duplicate_columns_shows_correct_dtypes(self): + # GH11761 + io = StringIO() + + frame = DataFrame([[1, 2.0]], columns=["a", "a"]) + frame.info(buf=io) + io.seek(0) + lines = io.readlines() + assert " 0 a 1 non-null int64 \n" == lines[5] + assert " 1 a 1 non-null float64\n" == lines[6] + + def test_info_shows_column_dtypes(self): + dtypes = [ + "int64", + "float64", + "datetime64[ns]", + "timedelta64[ns]", + "complex128", + "object", + "bool", + ] + data = {} + n = 10 + for i, dtype in enumerate(dtypes): + data[i] = np.random.randint(2, size=n).astype(dtype) + df = DataFrame(data) + buf = StringIO() + df.info(buf=buf) + res = buf.getvalue() + header = ( + " # Column Non-Null Count Dtype \n" + "--- ------ -------------- ----- " + ) + assert header in res + for i, dtype in enumerate(dtypes): + name = " {i:d} {i:d} {n:d} non-null {dtype}".format( + i=i, n=n, dtype=dtype + ) + assert name in res + + def test_info_max_cols(self): + df = DataFrame(np.random.randn(10, 5)) + for len_, verbose in [(5, None), (5, False), (12, True)]: + # For verbose always ^ setting ^ summarize ^ full output + with option_context("max_info_columns", 4): + buf = StringIO() + df.info(buf=buf, verbose=verbose) + res = buf.getvalue() + assert len(res.strip().split("\n")) == len_ + + for len_, verbose in [(12, None), (5, False), (12, True)]: + + # max_cols not exceeded + with option_context("max_info_columns", 5): + buf = StringIO() + df.info(buf=buf, verbose=verbose) + res = buf.getvalue() + assert len(res.strip().split("\n")) == len_ + + for len_, max_cols in [(12, 5), (5, 4)]: + # setting truncates + with option_context("max_info_columns", 4): + buf = StringIO() + df.info(buf=buf, max_cols=max_cols) + res = buf.getvalue() + assert len(res.strip().split("\n")) == len_ + + # setting wouldn't truncate + with option_context("max_info_columns", 5): + buf = StringIO() + df.info(buf=buf, max_cols=max_cols) + res = buf.getvalue() + assert len(res.strip().split("\n")) == len_ + + def test_info_memory_usage(self): + # Ensure memory usage is displayed, when asserted, on the last line + dtypes = [ + "int64", + "float64", + "datetime64[ns]", + "timedelta64[ns]", + "complex128", + "object", + "bool", + ] + data = {} + n = 10 + for i, dtype in enumerate(dtypes): + data[i] = np.random.randint(2, size=n).astype(dtype) + df = DataFrame(data) + buf = StringIO() + + # display memory usage case + df.info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + assert "memory usage: " in res[-1] + + # do not display memory usage case + df.info(buf=buf, memory_usage=False) + res = buf.getvalue().splitlines() + assert "memory usage: " not in res[-1] + + df.info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + + # memory usage is a lower bound, so print it as XYZ+ MB + assert re.match(r"memory usage: [^+]+\+", res[-1]) + + df.iloc[:, :5].info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + + # excluded column with object dtype, so estimate is accurate + assert not re.match(r"memory usage: [^+]+\+", res[-1]) + + # Test a DataFrame with duplicate columns + dtypes = ["int64", "int64", "int64", "float64"] + data = {} + n = 100 + for i, dtype in enumerate(dtypes): + data[i] = np.random.randint(2, size=n).astype(dtype) + df = DataFrame(data) + df.columns = dtypes + + df_with_object_index = pd.DataFrame({"a": [1]}, index=["foo"]) + df_with_object_index.info(buf=buf, memory_usage=True) + res = buf.getvalue().splitlines() + assert re.match(r"memory usage: [^+]+\+", res[-1]) + + df_with_object_index.info(buf=buf, memory_usage="deep") + res = buf.getvalue().splitlines() + assert re.match(r"memory usage: [^+]+$", res[-1]) + + # Ensure df size is as expected + # (cols * rows * bytes) + index size + df_size = df.memory_usage().sum() + exp_size = len(dtypes) * n * 8 + df.index.nbytes + assert df_size == exp_size + + # Ensure number of cols in memory_usage is the same as df + size_df = np.size(df.columns.values) + 1 # index=True; default + assert size_df == np.size(df.memory_usage()) + + # assert deep works only on object + assert df.memory_usage().sum() == df.memory_usage(deep=True).sum() + + # test for validity + DataFrame(1, index=["a"], columns=["A"]).memory_usage(index=True) + DataFrame(1, index=["a"], columns=["A"]).index.nbytes + df = DataFrame( + data=1, + index=pd.MultiIndex.from_product([["a"], range(1000)]), + columns=["A"], + ) + df.index.nbytes + df.memory_usage(index=True) + df.index.values.nbytes + + mem = df.memory_usage(deep=True).sum() + assert mem > 0 + + @pytest.mark.skipif(PYPY, reason="on PyPy deep=True doesn't change result") + def test_info_memory_usage_deep_not_pypy(self): + df_with_object_index = pd.DataFrame({"a": [1]}, index=["foo"]) + assert ( + df_with_object_index.memory_usage(index=True, deep=True).sum() + > df_with_object_index.memory_usage(index=True).sum() + ) + + df_object = pd.DataFrame({"a": ["a"]}) + assert df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum() + + @pytest.mark.skipif(not PYPY, reason="on PyPy deep=True does not change result") + def test_info_memory_usage_deep_pypy(self): + df_with_object_index = pd.DataFrame({"a": [1]}, index=["foo"]) + assert ( + df_with_object_index.memory_usage(index=True, deep=True).sum() + == df_with_object_index.memory_usage(index=True).sum() + ) + + df_object = pd.DataFrame({"a": ["a"]}) + assert df_object.memory_usage(deep=True).sum() == df_object.memory_usage().sum() + + @pytest.mark.skipif(PYPY, reason="PyPy getsizeof() fails by design") + def test_usage_via_getsizeof(self): + df = DataFrame( + data=1, + index=pd.MultiIndex.from_product([["a"], range(1000)]), + columns=["A"], + ) + mem = df.memory_usage(deep=True).sum() + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = mem - sys.getsizeof(df) + assert abs(diff) < 100 + + def test_info_memory_usage_qualified(self): + + buf = StringIO() + df = DataFrame(1, columns=list("ab"), index=[1, 2, 3]) + df.info(buf=buf) + assert "+" not in buf.getvalue() + + buf = StringIO() + df = DataFrame(1, columns=list("ab"), index=list("ABC")) + df.info(buf=buf) + assert "+" in buf.getvalue() + + buf = StringIO() + df = DataFrame( + 1, + columns=list("ab"), + index=pd.MultiIndex.from_product([range(3), range(3)]), + ) + df.info(buf=buf) + assert "+" not in buf.getvalue() + + buf = StringIO() + df = DataFrame( + 1, + columns=list("ab"), + index=pd.MultiIndex.from_product([range(3), ["foo", "bar"]]), + ) + df.info(buf=buf) + assert "+" in buf.getvalue() + + def test_info_memory_usage_bug_on_multiindex(self): + # GH 14308 + # memory usage introspection should not materialize .values + + from string import ascii_uppercase as uppercase + + def memory_usage(f): + return f.memory_usage(deep=True).sum() + + N = 100 + M = len(uppercase) + index = pd.MultiIndex.from_product( + [list(uppercase), pd.date_range("20160101", periods=N)], + names=["id", "date"], + ) + df = DataFrame({"value": np.random.randn(N * M)}, index=index) + + unstacked = df.unstack("id") + assert df.values.nbytes == unstacked.values.nbytes + assert memory_usage(df) > memory_usage(unstacked) + + # high upper bound + assert memory_usage(unstacked) - memory_usage(df) < 2000 + + def test_info_categorical(self): + # GH14298 + idx = pd.CategoricalIndex(["a", "b"]) + df = pd.DataFrame(np.zeros((2, 2)), index=idx, columns=idx) + + buf = StringIO() + df.info(buf=buf) + + def test_info_categorical_column(self): + + # make sure it works + n = 2500 + df = DataFrame({"int64": np.random.randint(100, size=n)}) + df["category"] = Series( + np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n)) + ).astype("category") + df.isna() + buf = StringIO() + df.info(buf=buf) + + df2 = df[df["category"] == "d"] + buf = StringIO() + df2.info(buf=buf) + + def test_repr_categorical_dates_periods(self): + # normal DataFrame + dt = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + p = period_range("2011-01", freq="M", periods=5) + df = DataFrame({"dt": dt, "p": p}) + exp = """ dt p +0 2011-01-01 09:00:00-05:00 2011-01 +1 2011-01-01 10:00:00-05:00 2011-02 +2 2011-01-01 11:00:00-05:00 2011-03 +3 2011-01-01 12:00:00-05:00 2011-04 +4 2011-01-01 13:00:00-05:00 2011-05""" + + assert repr(df) == exp + + df2 = DataFrame({"dt": Categorical(dt), "p": Categorical(p)}) + assert repr(df2) == exp + + @pytest.mark.parametrize("arg", [np.datetime64, np.timedelta64]) + @pytest.mark.parametrize( + "box, expected", + [[Series, "0 NaT\ndtype: object"], [DataFrame, " 0\n0 NaT"]], + ) + def test_repr_np_nat_with_object(self, arg, box, expected): + # GH 25445 + result = repr(box([arg("NaT")], dtype=object)) + assert result == expected diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py new file mode 100644 index 00000000..56a0c8cf --- /dev/null +++ b/pandas/tests/frame/test_reshape.py @@ -0,0 +1,1161 @@ +from datetime import datetime +import itertools + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Period, Series, Timedelta, date_range +import pandas._testing as tm + + +class TestDataFrameReshape: + def test_pivot(self): + data = { + "index": ["A", "B", "C", "C", "B", "A"], + "columns": ["One", "One", "One", "Two", "Two", "Two"], + "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0], + } + + frame = DataFrame(data) + pivoted = frame.pivot(index="index", columns="columns", values="values") + + expected = DataFrame( + { + "One": {"A": 1.0, "B": 2.0, "C": 3.0}, + "Two": {"A": 1.0, "B": 2.0, "C": 3.0}, + } + ) + + expected.index.name, expected.columns.name = "index", "columns" + tm.assert_frame_equal(pivoted, expected) + + # name tracking + assert pivoted.index.name == "index" + assert pivoted.columns.name == "columns" + + # don't specify values + pivoted = frame.pivot(index="index", columns="columns") + assert pivoted.index.name == "index" + assert pivoted.columns.names == (None, "columns") + + def test_pivot_duplicates(self): + data = DataFrame( + { + "a": ["bar", "bar", "foo", "foo", "foo"], + "b": ["one", "two", "one", "one", "two"], + "c": [1.0, 2.0, 3.0, 3.0, 4.0], + } + ) + with pytest.raises(ValueError, match="duplicate entries"): + data.pivot("a", "b", "c") + + def test_pivot_empty(self): + df = DataFrame(columns=["a", "b", "c"]) + result = df.pivot("a", "b", "c") + expected = DataFrame() + tm.assert_frame_equal(result, expected, check_names=False) + + def test_pivot_integer_bug(self): + df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")]) + + result = df.pivot(index=1, columns=0, values=2) + repr(result) + tm.assert_index_equal(result.columns, Index(["A", "B"], name=0)) + + def test_pivot_index_none(self): + # gh-3962 + data = { + "index": ["A", "B", "C", "C", "B", "A"], + "columns": ["One", "One", "One", "Two", "Two", "Two"], + "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0], + } + + frame = DataFrame(data).set_index("index") + result = frame.pivot(columns="columns", values="values") + expected = DataFrame( + { + "One": {"A": 1.0, "B": 2.0, "C": 3.0}, + "Two": {"A": 1.0, "B": 2.0, "C": 3.0}, + } + ) + + expected.index.name, expected.columns.name = "index", "columns" + tm.assert_frame_equal(result, expected) + + # omit values + result = frame.pivot(columns="columns") + + expected.columns = pd.MultiIndex.from_tuples( + [("values", "One"), ("values", "Two")], names=[None, "columns"] + ) + expected.index.name = "index" + tm.assert_frame_equal(result, expected, check_names=False) + assert result.index.name == "index" + assert result.columns.names == (None, "columns") + expected.columns = expected.columns.droplevel(0) + result = frame.pivot(columns="columns", values="values") + + expected.columns.name = "columns" + tm.assert_frame_equal(result, expected) + + def test_stack_unstack(self, float_frame): + df = float_frame.copy() + df[:] = np.arange(np.prod(df.shape)).reshape(df.shape) + + stacked = df.stack() + stacked_df = DataFrame({"foo": stacked, "bar": stacked}) + + unstacked = stacked.unstack() + unstacked_df = stacked_df.unstack() + + tm.assert_frame_equal(unstacked, df) + tm.assert_frame_equal(unstacked_df["bar"], df) + + unstacked_cols = stacked.unstack(0) + unstacked_cols_df = stacked_df.unstack(0) + tm.assert_frame_equal(unstacked_cols.T, df) + tm.assert_frame_equal(unstacked_cols_df["bar"].T, df) + + def test_stack_mixed_level(self): + # GH 18310 + levels = [range(3), [3, "a", "b"], [1, 2]] + + # flat columns: + df = DataFrame(1, index=levels[0], columns=levels[1]) + result = df.stack() + expected = Series(1, index=MultiIndex.from_product(levels[:2])) + tm.assert_series_equal(result, expected) + + # MultiIndex columns: + df = DataFrame(1, index=levels[0], columns=MultiIndex.from_product(levels[1:])) + result = df.stack(1) + expected = DataFrame( + 1, index=MultiIndex.from_product([levels[0], levels[2]]), columns=levels[1] + ) + tm.assert_frame_equal(result, expected) + + # as above, but used labels in level are actually of homogeneous type + result = df[["a", "b"]].stack(1) + expected = expected[["a", "b"]] + tm.assert_frame_equal(result, expected) + + def test_unstack_fill(self): + + # GH #9746: fill_value keyword argument for Series + # and DataFrame unstack + + # From a series + data = Series([1, 2, 4, 5], dtype=np.int16) + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = data.unstack(fill_value=-1) + expected = DataFrame( + {"a": [1, -1, 5], "b": [2, 4, -1]}, index=["x", "y", "z"], dtype=np.int16 + ) + tm.assert_frame_equal(result, expected) + + # From a series with incorrect data type for fill_value + result = data.unstack(fill_value=0.5) + expected = DataFrame( + {"a": [1, 0.5, 5], "b": [2, 4, 0.5]}, index=["x", "y", "z"], dtype=np.float + ) + tm.assert_frame_equal(result, expected) + + # GH #13971: fill_value when unstacking multiple levels: + df = DataFrame( + {"x": ["a", "a", "b"], "y": ["j", "k", "j"], "z": [0, 1, 2], "w": [0, 1, 2]} + ).set_index(["x", "y", "z"]) + unstacked = df.unstack(["x", "y"], fill_value=0) + key = ("w", "b", "j") + expected = unstacked[key] + result = pd.Series([0, 0, 2], index=unstacked.index, name=key) + tm.assert_series_equal(result, expected) + + stacked = unstacked.stack(["x", "y"]) + stacked.index = stacked.index.reorder_levels(df.index.names) + # Workaround for GH #17886 (unnecessarily casts to float): + stacked = stacked.astype(np.int64) + result = stacked.loc[df.index] + tm.assert_frame_equal(result, df) + + # From a series + s = df["w"] + result = s.unstack(["x", "y"], fill_value=0) + expected = unstacked["w"] + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame(self): + + # From a dataframe + rows = [[1, 2], [3, 4], [5, 6], [7, 8]] + df = DataFrame(rows, columns=list("AB"), dtype=np.int32) + df.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = df.unstack(fill_value=-1) + + rows = [[1, 3, 2, 4], [-1, 5, -1, 6], [7, -1, 8, -1]] + expected = DataFrame(rows, index=list("xyz"), dtype=np.int32) + expected.columns = MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ) + tm.assert_frame_equal(result, expected) + + # From a mixed type dataframe + df["A"] = df["A"].astype(np.int16) + df["B"] = df["B"].astype(np.float64) + + result = df.unstack(fill_value=-1) + expected["A"] = expected["A"].astype(np.int16) + expected["B"] = expected["B"].astype(np.float64) + tm.assert_frame_equal(result, expected) + + # From a dataframe with incorrect data type for fill_value + result = df.unstack(fill_value=0.5) + + rows = [[1, 3, 2, 4], [0.5, 5, 0.5, 6], [7, 0.5, 8, 0.5]] + expected = DataFrame(rows, index=list("xyz"), dtype=np.float) + expected.columns = MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame_datetime(self): + + # Test unstacking with date times + dv = pd.date_range("2012-01-01", periods=4).values + data = Series(dv) + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = data.unstack() + expected = DataFrame( + {"a": [dv[0], pd.NaT, dv[3]], "b": [dv[1], dv[2], pd.NaT]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + result = data.unstack(fill_value=dv[0]) + expected = DataFrame( + {"a": [dv[0], dv[0], dv[3]], "b": [dv[1], dv[2], dv[0]]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame_timedelta(self): + + # Test unstacking with time deltas + td = [Timedelta(days=i) for i in range(4)] + data = Series(td) + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = data.unstack() + expected = DataFrame( + {"a": [td[0], pd.NaT, td[3]], "b": [td[1], td[2], pd.NaT]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + result = data.unstack(fill_value=td[1]) + expected = DataFrame( + {"a": [td[0], td[1], td[3]], "b": [td[1], td[2], td[1]]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame_period(self): + + # Test unstacking with period + periods = [ + Period("2012-01"), + Period("2012-02"), + Period("2012-03"), + Period("2012-04"), + ] + data = Series(periods) + data.index = MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + result = data.unstack() + expected = DataFrame( + {"a": [periods[0], None, periods[3]], "b": [periods[1], periods[2], None]}, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + result = data.unstack(fill_value=periods[1]) + expected = DataFrame( + { + "a": [periods[0], periods[1], periods[3]], + "b": [periods[1], periods[2], periods[1]], + }, + index=["x", "y", "z"], + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_fill_frame_categorical(self): + + # Test unstacking with categorical + data = pd.Series(["a", "b", "c", "a"], dtype="category") + data.index = pd.MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + # By default missing values will be NaN + result = data.unstack() + expected = DataFrame( + { + "a": pd.Categorical(list("axa"), categories=list("abc")), + "b": pd.Categorical(list("bcx"), categories=list("abc")), + }, + index=list("xyz"), + ) + tm.assert_frame_equal(result, expected) + + # Fill with non-category results in a TypeError + msg = r"'fill_value' \('d'\) is not in" + with pytest.raises(TypeError, match=msg): + data.unstack(fill_value="d") + + # Fill with category value replaces missing values as expected + result = data.unstack(fill_value="c") + expected = DataFrame( + { + "a": pd.Categorical(list("aca"), categories=list("abc")), + "b": pd.Categorical(list("bcc"), categories=list("abc")), + }, + index=list("xyz"), + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_preserve_dtypes(self): + # Checks fix for #11847 + df = pd.DataFrame( + dict( + state=["IL", "MI", "NC"], + index=["a", "b", "c"], + some_categories=pd.Series(["a", "b", "c"]).astype("category"), + A=np.random.rand(3), + B=1, + C="foo", + D=pd.Timestamp("20010102"), + E=pd.Series([1.0, 50.0, 100.0]).astype("float32"), + F=pd.Series([3.0, 4.0, 5.0]).astype("float64"), + G=False, + H=pd.Series([1, 200, 923442], dtype="int8"), + ) + ) + + def unstack_and_compare(df, column_name): + unstacked1 = df.unstack([column_name]) + unstacked2 = df.unstack(column_name) + tm.assert_frame_equal(unstacked1, unstacked2) + + df1 = df.set_index(["state", "index"]) + unstack_and_compare(df1, "index") + + df1 = df.set_index(["state", "some_categories"]) + unstack_and_compare(df1, "some_categories") + + df1 = df.set_index(["F", "C"]) + unstack_and_compare(df1, "F") + + df1 = df.set_index(["G", "B", "state"]) + unstack_and_compare(df1, "B") + + df1 = df.set_index(["E", "A"]) + unstack_and_compare(df1, "E") + + df1 = df.set_index(["state", "index"]) + s = df1["A"] + unstack_and_compare(s, "index") + + def test_stack_ints(self): + columns = MultiIndex.from_tuples(list(itertools.product(range(3), repeat=3))) + df = DataFrame(np.random.randn(30, 27), columns=columns) + + tm.assert_frame_equal(df.stack(level=[1, 2]), df.stack(level=1).stack(level=1)) + tm.assert_frame_equal( + df.stack(level=[-2, -1]), df.stack(level=1).stack(level=1) + ) + + df_named = df.copy() + df_named.columns.set_names(range(3), inplace=True) + + tm.assert_frame_equal( + df_named.stack(level=[1, 2]), df_named.stack(level=1).stack(level=1) + ) + + def test_stack_mixed_levels(self): + columns = MultiIndex.from_tuples( + [ + ("A", "cat", "long"), + ("B", "cat", "long"), + ("A", "dog", "short"), + ("B", "dog", "short"), + ], + names=["exp", "animal", "hair_length"], + ) + df = DataFrame(np.random.randn(4, 4), columns=columns) + + animal_hair_stacked = df.stack(level=["animal", "hair_length"]) + exp_hair_stacked = df.stack(level=["exp", "hair_length"]) + + # GH #8584: Need to check that stacking works when a number + # is passed that is both a level name and in the range of + # the level numbers + df2 = df.copy() + df2.columns.names = ["exp", "animal", 1] + tm.assert_frame_equal( + df2.stack(level=["animal", 1]), animal_hair_stacked, check_names=False + ) + tm.assert_frame_equal( + df2.stack(level=["exp", 1]), exp_hair_stacked, check_names=False + ) + + # When mixed types are passed and the ints are not level + # names, raise + msg = ( + "level should contain all level names or all level numbers, not" + " a mixture of the two" + ) + with pytest.raises(ValueError, match=msg): + df2.stack(level=["animal", 0]) + + # GH #8584: Having 0 in the level names could raise a + # strange error about lexsort depth + df3 = df.copy() + df3.columns.names = ["exp", "animal", 0] + tm.assert_frame_equal( + df3.stack(level=["animal", 0]), animal_hair_stacked, check_names=False + ) + + def test_stack_int_level_names(self): + columns = MultiIndex.from_tuples( + [ + ("A", "cat", "long"), + ("B", "cat", "long"), + ("A", "dog", "short"), + ("B", "dog", "short"), + ], + names=["exp", "animal", "hair_length"], + ) + df = DataFrame(np.random.randn(4, 4), columns=columns) + + exp_animal_stacked = df.stack(level=["exp", "animal"]) + animal_hair_stacked = df.stack(level=["animal", "hair_length"]) + exp_hair_stacked = df.stack(level=["exp", "hair_length"]) + + df2 = df.copy() + df2.columns.names = [0, 1, 2] + tm.assert_frame_equal( + df2.stack(level=[1, 2]), animal_hair_stacked, check_names=False + ) + tm.assert_frame_equal( + df2.stack(level=[0, 1]), exp_animal_stacked, check_names=False + ) + tm.assert_frame_equal( + df2.stack(level=[0, 2]), exp_hair_stacked, check_names=False + ) + + # Out-of-order int column names + df3 = df.copy() + df3.columns.names = [2, 0, 1] + tm.assert_frame_equal( + df3.stack(level=[0, 1]), animal_hair_stacked, check_names=False + ) + tm.assert_frame_equal( + df3.stack(level=[2, 0]), exp_animal_stacked, check_names=False + ) + tm.assert_frame_equal( + df3.stack(level=[2, 1]), exp_hair_stacked, check_names=False + ) + + def test_unstack_bool(self): + df = DataFrame( + [False, False], + index=MultiIndex.from_arrays([["a", "b"], ["c", "l"]]), + columns=["col"], + ) + rs = df.unstack() + xp = DataFrame( + np.array([[False, np.nan], [np.nan, False]], dtype=object), + index=["a", "b"], + columns=MultiIndex.from_arrays([["col", "col"], ["c", "l"]]), + ) + tm.assert_frame_equal(rs, xp) + + def test_unstack_level_binding(self): + # GH9856 + mi = pd.MultiIndex( + levels=[["foo", "bar"], ["one", "two"], ["a", "b"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]], + names=["first", "second", "third"], + ) + s = pd.Series(0, index=mi) + result = s.unstack([1, 2]).stack(0) + + expected_mi = pd.MultiIndex( + levels=[["foo", "bar"], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=["first", "second"], + ) + + expected = pd.DataFrame( + np.array( + [[np.nan, 0], [0, np.nan], [np.nan, 0], [0, np.nan]], dtype=np.float64 + ), + index=expected_mi, + columns=pd.Index(["a", "b"], name="third"), + ) + + tm.assert_frame_equal(result, expected) + + def test_unstack_to_series(self, float_frame): + # check reversibility + data = float_frame.unstack() + + assert isinstance(data, Series) + undo = data.unstack().T + tm.assert_frame_equal(undo, float_frame) + + # check NA handling + data = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]}) + data.index = Index(["a", "b", "c"]) + result = data.unstack() + + midx = MultiIndex( + levels=[["x", "y"], ["a", "b", "c"]], + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + ) + expected = Series([1, 2, np.NaN, 3, 4, np.NaN], index=midx) + + tm.assert_series_equal(result, expected) + + # check composability of unstack + old_data = data.copy() + for _ in range(4): + data = data.unstack() + tm.assert_frame_equal(old_data, data) + + def test_unstack_dtypes(self): + + # GH 2929 + rows = [[1, 1, 3, 4], [1, 2, 3, 4], [2, 1, 3, 4], [2, 2, 3, 4]] + + df = DataFrame(rows, columns=list("ABCD")) + result = df.dtypes + expected = Series([np.dtype("int64")] * 4, index=list("ABCD")) + tm.assert_series_equal(result, expected) + + # single dtype + df2 = df.set_index(["A", "B"]) + df3 = df2.unstack("B") + result = df3.dtypes + expected = Series( + [np.dtype("int64")] * 4, + index=pd.MultiIndex.from_arrays( + [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B") + ), + ) + tm.assert_series_equal(result, expected) + + # mixed + df2 = df.set_index(["A", "B"]) + df2["C"] = 3.0 + df3 = df2.unstack("B") + result = df3.dtypes + expected = Series( + [np.dtype("float64")] * 2 + [np.dtype("int64")] * 2, + index=pd.MultiIndex.from_arrays( + [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B") + ), + ) + tm.assert_series_equal(result, expected) + df2["D"] = "foo" + df3 = df2.unstack("B") + result = df3.dtypes + expected = Series( + [np.dtype("float64")] * 2 + [np.dtype("object")] * 2, + index=pd.MultiIndex.from_arrays( + [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B") + ), + ) + tm.assert_series_equal(result, expected) + + # GH7405 + for c, d in ( + (np.zeros(5), np.zeros(5)), + (np.arange(5, dtype="f8"), np.arange(5, 10, dtype="f8")), + ): + + df = DataFrame( + { + "A": ["a"] * 5, + "C": c, + "D": d, + "B": pd.date_range("2012-01-01", periods=5), + } + ) + + right = df.iloc[:3].copy(deep=True) + + df = df.set_index(["A", "B"]) + df["D"] = df["D"].astype("int64") + + left = df.iloc[:3].unstack(0) + right = right.set_index(["A", "B"]).unstack(0) + right[("D", "a")] = right[("D", "a")].astype("int64") + + assert left.shape == (3, 2) + tm.assert_frame_equal(left, right) + + def test_unstack_non_unique_index_names(self): + idx = MultiIndex.from_tuples([("a", "b"), ("c", "d")], names=["c1", "c1"]) + df = DataFrame([1, 2], index=idx) + with pytest.raises(ValueError): + df.unstack("c1") + + with pytest.raises(ValueError): + df.T.stack("c1") + + def test_unstack_unused_levels(self): + # GH 17845: unused codes in index make unstack() cast int to float + idx = pd.MultiIndex.from_product([["a"], ["A", "B", "C", "D"]])[:-1] + df = pd.DataFrame([[1, 0]] * 3, index=idx) + + result = df.unstack() + exp_col = pd.MultiIndex.from_product([[0, 1], ["A", "B", "C"]]) + expected = pd.DataFrame([[1, 1, 1, 0, 0, 0]], index=["a"], columns=exp_col) + tm.assert_frame_equal(result, expected) + assert (result.columns.levels[1] == idx.levels[1]).all() + + # Unused items on both levels + levels = [[0, 1, 7], [0, 1, 2, 3]] + codes = [[0, 0, 1, 1], [0, 2, 0, 2]] + idx = pd.MultiIndex(levels, codes) + block = np.arange(4).reshape(2, 2) + df = pd.DataFrame(np.concatenate([block, block + 4]), index=idx) + result = df.unstack() + expected = pd.DataFrame( + np.concatenate([block * 2, block * 2 + 1], axis=1), columns=idx + ) + tm.assert_frame_equal(result, expected) + assert (result.columns.levels[1] == idx.levels[1]).all() + + # With mixed dtype and NaN + levels = [["a", 2, "c"], [1, 3, 5, 7]] + codes = [[0, -1, 1, 1], [0, 2, -1, 2]] + idx = pd.MultiIndex(levels, codes) + data = np.arange(8) + df = pd.DataFrame(data.reshape(4, 2), index=idx) + + cases = ( + (0, [13, 16, 6, 9, 2, 5, 8, 11], [np.nan, "a", 2], [np.nan, 5, 1]), + (1, [8, 11, 1, 4, 12, 15, 13, 16], [np.nan, 5, 1], [np.nan, "a", 2]), + ) + for level, idces, col_level, idx_level in cases: + result = df.unstack(level=level) + exp_data = np.zeros(18) * np.nan + exp_data[idces] = data + cols = pd.MultiIndex.from_product([[0, 1], col_level]) + expected = pd.DataFrame( + exp_data.reshape(3, 6), index=idx_level, columns=cols + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("cols", [["A", "C"], slice(None)]) + def test_unstack_unused_level(self, cols): + # GH 18562 : unused codes on the unstacked level + df = pd.DataFrame( + [[2010, "a", "I"], [2011, "b", "II"]], columns=["A", "B", "C"] + ) + + ind = df.set_index(["A", "B", "C"], drop=False) + selection = ind.loc[(slice(None), slice(None), "I"), cols] + result = selection.unstack() + + expected = ind.iloc[[0]][cols] + expected.columns = MultiIndex.from_product( + [expected.columns, ["I"]], names=[None, "C"] + ) + expected.index = expected.index.droplevel("C") + tm.assert_frame_equal(result, expected) + + def test_unstack_nan_index(self): # GH7466 + cast = lambda val: "{0:1}".format("" if val != val else val) + + def verify(df): + mk_list = lambda a: list(a) if isinstance(a, tuple) else [a] + rows, cols = df.notna().values.nonzero() + for i, j in zip(rows, cols): + left = sorted(df.iloc[i, j].split(".")) + right = mk_list(df.index[i]) + mk_list(df.columns[j]) + right = sorted(map(cast, right)) + assert left == right + + df = DataFrame( + { + "jim": ["a", "b", np.nan, "d"], + "joe": ["w", "x", "y", "z"], + "jolie": ["a.w", "b.x", " .y", "d.z"], + } + ) + + left = df.set_index(["jim", "joe"]).unstack()["jolie"] + right = df.set_index(["joe", "jim"]).unstack()["jolie"].T + tm.assert_frame_equal(left, right) + + for idx in itertools.permutations(df.columns[:2]): + mi = df.set_index(list(idx)) + for lev in range(2): + udf = mi.unstack(level=lev) + assert udf.notna().values.sum() == len(df) + verify(udf["jolie"]) + + df = DataFrame( + { + "1st": ["d"] * 3 + + [np.nan] * 5 + + ["a"] * 2 + + ["c"] * 3 + + ["e"] * 2 + + ["b"] * 5, + "2nd": ["y"] * 2 + + ["w"] * 3 + + [np.nan] * 3 + + ["z"] * 4 + + [np.nan] * 3 + + ["x"] * 3 + + [np.nan] * 2, + "3rd": [ + 67, + 39, + 53, + 72, + 57, + 80, + 31, + 18, + 11, + 30, + 59, + 50, + 62, + 59, + 76, + 52, + 14, + 53, + 60, + 51, + ], + } + ) + + df["4th"], df["5th"] = ( + df.apply(lambda r: ".".join(map(cast, r)), axis=1), + df.apply(lambda r: ".".join(map(cast, r.iloc[::-1])), axis=1), + ) + + for idx in itertools.permutations(["1st", "2nd", "3rd"]): + mi = df.set_index(list(idx)) + for lev in range(3): + udf = mi.unstack(level=lev) + assert udf.notna().values.sum() == 2 * len(df) + for col in ["4th", "5th"]: + verify(udf[col]) + + # GH7403 + df = pd.DataFrame({"A": list("aaaabbbb"), "B": range(8), "C": range(8)}) + df.iloc[3, 1] = np.NaN + left = df.set_index(["A", "B"]).unstack(0) + + vals = [ + [3, 0, 1, 2, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, 4, 5, 6, 7], + ] + vals = list(map(list, zip(*vals))) + idx = Index([np.nan, 0, 1, 2, 4, 5, 6, 7], name="B") + cols = MultiIndex( + levels=[["C"], ["a", "b"]], codes=[[0, 0], [0, 1]], names=[None, "A"] + ) + + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + df = DataFrame({"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)}) + df.iloc[2, 1] = np.NaN + left = df.set_index(["A", "B"]).unstack(0) + + vals = [[2, np.nan], [0, 4], [1, 5], [np.nan, 6], [3, 7]] + cols = MultiIndex( + levels=[["C"], ["a", "b"]], codes=[[0, 0], [0, 1]], names=[None, "A"] + ) + idx = Index([np.nan, 0, 1, 2, 3], name="B") + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + df = pd.DataFrame( + {"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)} + ) + df.iloc[3, 1] = np.NaN + left = df.set_index(["A", "B"]).unstack(0) + + vals = [[3, np.nan], [0, 4], [1, 5], [2, 6], [np.nan, 7]] + cols = MultiIndex( + levels=[["C"], ["a", "b"]], codes=[[0, 0], [0, 1]], names=[None, "A"] + ) + idx = Index([np.nan, 0, 1, 2, 3], name="B") + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + # GH7401 + df = pd.DataFrame( + { + "A": list("aaaaabbbbb"), + "B": (date_range("2012-01-01", periods=5).tolist() * 2), + "C": np.arange(10), + } + ) + + df.iloc[3, 1] = np.NaN + left = df.set_index(["A", "B"]).unstack() + + vals = np.array([[3, 0, 1, 2, np.nan, 4], [np.nan, 5, 6, 7, 8, 9]]) + idx = Index(["a", "b"], name="A") + cols = MultiIndex( + levels=[["C"], date_range("2012-01-01", periods=5)], + codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + names=[None, "B"], + ) + + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + # GH4862 + vals = [ + ["Hg", np.nan, np.nan, 680585148], + ["U", 0.0, np.nan, 680585148], + ["Pb", 7.07e-06, np.nan, 680585148], + ["Sn", 2.3614e-05, 0.0133, 680607017], + ["Ag", 0.0, 0.0133, 680607017], + ["Hg", -0.00015, 0.0133, 680607017], + ] + df = DataFrame( + vals, + columns=["agent", "change", "dosage", "s_id"], + index=[17263, 17264, 17265, 17266, 17267, 17268], + ) + + left = df.copy().set_index(["s_id", "dosage", "agent"]).unstack() + + vals = [ + [np.nan, np.nan, 7.07e-06, np.nan, 0.0], + [0.0, -0.00015, np.nan, 2.3614e-05, np.nan], + ] + + idx = MultiIndex( + levels=[[680585148, 680607017], [0.0133]], + codes=[[0, 1], [-1, 0]], + names=["s_id", "dosage"], + ) + + cols = MultiIndex( + levels=[["change"], ["Ag", "Hg", "Pb", "Sn", "U"]], + codes=[[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]], + names=[None, "agent"], + ) + + right = DataFrame(vals, columns=cols, index=idx) + tm.assert_frame_equal(left, right) + + left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"]) + tm.assert_frame_equal(left.unstack(), right) + + # GH9497 - multiple unstack with nulls + df = DataFrame( + { + "1st": [1, 2, 1, 2, 1, 2], + "2nd": pd.date_range("2014-02-01", periods=6, freq="D"), + "jim": 100 + np.arange(6), + "joe": (np.random.randn(6) * 10).round(2), + } + ) + + df["3rd"] = df["2nd"] - pd.Timestamp("2014-02-02") + df.loc[1, "2nd"] = df.loc[3, "2nd"] = np.nan + df.loc[1, "3rd"] = df.loc[4, "3rd"] = np.nan + + left = df.set_index(["1st", "2nd", "3rd"]).unstack(["2nd", "3rd"]) + assert left.notna().values.sum() == 2 * len(df) + + for col in ["jim", "joe"]: + for _, r in df.iterrows(): + key = r["1st"], (col, r["2nd"], r["3rd"]) + assert r[col] == left.loc[key] + + def test_stack_datetime_column_multiIndex(self): + # GH 8039 + t = datetime(2014, 1, 1) + df = DataFrame([1, 2, 3, 4], columns=MultiIndex.from_tuples([(t, "A", "B")])) + result = df.stack() + + eidx = MultiIndex.from_product([(0, 1, 2, 3), ("B",)]) + ecols = MultiIndex.from_tuples([(t, "A")]) + expected = DataFrame([1, 2, 3, 4], index=eidx, columns=ecols) + tm.assert_frame_equal(result, expected) + + def test_stack_partial_multiIndex(self): + # GH 8844 + def _test_stack_with_multiindex(multiindex): + df = DataFrame( + np.arange(3 * len(multiindex)).reshape(3, len(multiindex)), + columns=multiindex, + ) + for level in (-1, 0, 1, [0, 1], [1, 0]): + result = df.stack(level=level, dropna=False) + + if isinstance(level, int): + # Stacking a single level should not make any all-NaN rows, + # so df.stack(level=level, dropna=False) should be the same + # as df.stack(level=level, dropna=True). + expected = df.stack(level=level, dropna=True) + if isinstance(expected, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + df.columns = MultiIndex.from_tuples( + df.columns.to_numpy(), names=df.columns.names + ) + expected = df.stack(level=level, dropna=False) + if isinstance(expected, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + full_multiindex = MultiIndex.from_tuples( + [("B", "x"), ("B", "z"), ("A", "y"), ("C", "x"), ("C", "u")], + names=["Upper", "Lower"], + ) + for multiindex_columns in ( + [0, 1, 2, 3, 4], + [0, 1, 2, 3], + [0, 1, 2, 4], + [0, 1, 2], + [1, 2, 3], + [2, 3, 4], + [0, 1], + [0, 2], + [0, 3], + [0], + [2], + [4], + ): + _test_stack_with_multiindex(full_multiindex[multiindex_columns]) + if len(multiindex_columns) > 1: + multiindex_columns.reverse() + _test_stack_with_multiindex(full_multiindex[multiindex_columns]) + + df = DataFrame(np.arange(6).reshape(2, 3), columns=full_multiindex[[0, 1, 3]]) + result = df.stack(dropna=False) + expected = DataFrame( + [[0, 2], [1, np.nan], [3, 5], [4, np.nan]], + index=MultiIndex( + levels=[[0, 1], ["u", "x", "y", "z"]], + codes=[[0, 0, 1, 1], [1, 3, 1, 3]], + names=[None, "Lower"], + ), + columns=Index(["B", "C"], name="Upper"), + dtype=df.dtypes[0], + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("ordered", [False, True]) + @pytest.mark.parametrize("labels", [list("yxz"), list("yxy")]) + def test_stack_preserve_categorical_dtype(self, ordered, labels): + # GH13854 + cidx = pd.CategoricalIndex(labels, categories=list("xyz"), ordered=ordered) + df = DataFrame([[10, 11, 12]], columns=cidx) + result = df.stack() + + # `MultiIndex.from_product` preserves categorical dtype - + # it's tested elsewhere. + midx = pd.MultiIndex.from_product([df.index, cidx]) + expected = Series([10, 11, 12], index=midx) + + tm.assert_series_equal(result, expected) + + def test_stack_preserve_categorical_dtype_values(self): + # GH-23077 + cat = pd.Categorical(["a", "a", "b", "c"]) + df = pd.DataFrame({"A": cat, "B": cat}) + result = df.stack() + index = pd.MultiIndex.from_product([[0, 1, 2, 3], ["A", "B"]]) + expected = pd.Series( + pd.Categorical(["a", "a", "a", "a", "b", "b", "c", "c"]), index=index + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "index, columns", + [ + ([0, 0, 1, 1], pd.MultiIndex.from_product([[1, 2], ["a", "b"]])), + ([0, 0, 2, 3], pd.MultiIndex.from_product([[1, 2], ["a", "b"]])), + ([0, 1, 2, 3], pd.MultiIndex.from_product([[1, 2], ["a", "b"]])), + ], + ) + def test_stack_multi_columns_non_unique_index(self, index, columns): + # GH-28301 + df = pd.DataFrame(index=index, columns=columns).fillna(1) + stacked = df.stack() + new_index = pd.MultiIndex.from_tuples(stacked.index.to_numpy()) + expected = pd.DataFrame( + stacked.to_numpy(), index=new_index, columns=stacked.columns + ) + tm.assert_frame_equal(stacked, expected) + stacked_codes = np.asarray(stacked.index.codes) + expected_codes = np.asarray(new_index.codes) + tm.assert_numpy_array_equal(stacked_codes, expected_codes) + + @pytest.mark.parametrize("level", [0, 1]) + def test_unstack_mixed_extension_types(self, level): + index = pd.MultiIndex.from_tuples( + [("A", 0), ("A", 1), ("B", 1)], names=["a", "b"] + ) + df = pd.DataFrame( + { + "A": pd.core.arrays.integer_array([0, 1, None]), + "B": pd.Categorical(["a", "a", "b"]), + }, + index=index, + ) + + result = df.unstack(level=level) + expected = df.astype(object).unstack(level=level) + + expected_dtypes = pd.Series( + [df.A.dtype] * 2 + [df.B.dtype] * 2, index=result.columns + ) + tm.assert_series_equal(result.dtypes, expected_dtypes) + tm.assert_frame_equal(result.astype(object), expected) + + @pytest.mark.parametrize("level", [0, "baz"]) + def test_unstack_swaplevel_sortlevel(self, level): + # GH 20994 + mi = pd.MultiIndex.from_product([[0], ["d", "c"]], names=["bar", "baz"]) + df = pd.DataFrame([[0, 2], [1, 3]], index=mi, columns=["B", "A"]) + df.columns.name = "foo" + + expected = pd.DataFrame( + [[3, 1, 2, 0]], + columns=pd.MultiIndex.from_tuples( + [("c", "A"), ("c", "B"), ("d", "A"), ("d", "B")], names=["baz", "foo"] + ), + ) + expected.index.name = "bar" + + result = df.unstack().swaplevel(axis=1).sort_index(axis=1, level=level) + tm.assert_frame_equal(result, expected) + + +def test_unstack_fill_frame_object(): + # GH12815 Test unstacking with object. + data = pd.Series(["a", "b", "c", "a"], dtype="object") + data.index = pd.MultiIndex.from_tuples( + [("x", "a"), ("x", "b"), ("y", "b"), ("z", "a")] + ) + + # By default missing values will be NaN + result = data.unstack() + expected = pd.DataFrame( + {"a": ["a", np.nan, "a"], "b": ["b", "c", np.nan]}, index=list("xyz") + ) + tm.assert_frame_equal(result, expected) + + # Fill with any value replaces missing values as expected + result = data.unstack(fill_value="d") + expected = pd.DataFrame( + {"a": ["a", "d", "a"], "b": ["b", "c", "d"]}, index=list("xyz") + ) + tm.assert_frame_equal(result, expected) + + +def test_unstack_timezone_aware_values(): + # GH 18338 + df = pd.DataFrame( + { + "timestamp": [pd.Timestamp("2017-08-27 01:00:00.709949+0000", tz="UTC")], + "a": ["a"], + "b": ["b"], + "c": ["c"], + }, + columns=["timestamp", "a", "b", "c"], + ) + result = df.set_index(["a", "b"]).unstack() + expected = pd.DataFrame( + [[pd.Timestamp("2017-08-27 01:00:00.709949+0000", tz="UTC"), "c"]], + index=pd.Index(["a"], name="a"), + columns=pd.MultiIndex( + levels=[["timestamp", "c"], ["b"]], + codes=[[0, 1], [0, 0]], + names=[None, "b"], + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_stack_timezone_aware_values(): + # GH 19420 + ts = pd.date_range( + freq="D", start="20180101", end="20180103", tz="America/New_York" + ) + df = pd.DataFrame({"A": ts}, index=["a", "b", "c"]) + result = df.stack() + expected = pd.Series( + ts, + index=pd.MultiIndex( + levels=[["a", "b", "c"], ["A"]], codes=[[0, 1, 2], [0, 0, 0]] + ), + ) + tm.assert_series_equal(result, expected) + + +def test_unstacking_multi_index_df(): + # see gh-30740 + df = DataFrame( + { + "name": ["Alice", "Bob"], + "score": [9.5, 8], + "employed": [False, True], + "kids": [0, 0], + "gender": ["female", "male"], + } + ) + df = df.set_index(["name", "employed", "kids", "gender"]) + df = df.unstack(["gender"], fill_value=0) + expected = df.unstack("employed", fill_value=0).unstack("kids", fill_value=0) + result = df.unstack(["employed", "kids"], fill_value=0) + expected = DataFrame( + [[9.5, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 8.0]], + index=Index(["Alice", "Bob"], name="name"), + columns=MultiIndex.from_tuples( + [ + ("score", "female", False, 0), + ("score", "female", True, 0), + ("score", "male", False, 0), + ("score", "male", True, 0), + ], + names=[None, "gender", "employed", "kids"], + ), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_sort_values_level_as_str.py b/pandas/tests/frame/test_sort_values_level_as_str.py new file mode 100644 index 00000000..40526ab2 --- /dev/null +++ b/pandas/tests/frame/test_sort_values_level_as_str.py @@ -0,0 +1,92 @@ +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +from pandas import DataFrame +import pandas._testing as tm + + +@pytest.fixture +def df_none(): + return DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 2, 2, 1, 1], + "A": np.arange(6, 0, -1), + ("B", 5): ["one", "one", "two", "two", "one", "one"], + } + ) + + +@pytest.fixture(params=[["outer"], ["outer", "inner"]]) +def df_idx(request, df_none): + levels = request.param + return df_none.set_index(levels) + + +@pytest.fixture( + params=[ + "inner", # index level + ["outer"], # list of index level + "A", # column + [("B", 5)], # list of column + ["inner", "outer"], # two index levels + [("B", 5), "outer"], # index level and column + ["A", ("B", 5)], # Two columns + ["inner", "outer"], # two index levels and column + ] +) +def sort_names(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def ascending(request): + return request.param + + +def test_sort_index_level_and_column_label(df_none, df_idx, sort_names, ascending): + + # GH 14353 + + # Get index levels from df_idx + levels = df_idx.index.names + + # Compute expected by sorting on columns and the setting index + expected = df_none.sort_values( + by=sort_names, ascending=ascending, axis=0 + ).set_index(levels) + + # Compute result sorting on mix on columns and index levels + result = df_idx.sort_values(by=sort_names, ascending=ascending, axis=0) + + tm.assert_frame_equal(result, expected) + + +def test_sort_column_level_and_index_label(df_none, df_idx, sort_names, ascending): + + # GH 14353 + + # Get levels from df_idx + levels = df_idx.index.names + + # Compute expected by sorting on axis=0, setting index levels, and then + # transposing. For some cases this will result in a frame with + # multiple column levels + expected = ( + df_none.sort_values(by=sort_names, ascending=ascending, axis=0) + .set_index(levels) + .T + ) + + # Compute result by transposing and sorting on axis=1. + result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1) + + if len(levels) > 1: + # Accessing multi-level columns that are not lexsorted raises a + # performance warning + with tm.assert_produces_warning(PerformanceWarning, check_stacklevel=False): + tm.assert_frame_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py new file mode 100644 index 00000000..4a436d70 --- /dev/null +++ b/pandas/tests/frame/test_subclass.py @@ -0,0 +1,559 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series +import pandas._testing as tm + + +class TestDataFrameSubclassing: + def test_frame_subclassing_and_slicing(self): + # Subclass frame and ensure it returns the right class on slicing it + # In reference to PR 9632 + + class CustomSeries(Series): + @property + def _constructor(self): + return CustomSeries + + def custom_series_function(self): + return "OK" + + class CustomDataFrame(DataFrame): + """ + Subclasses pandas DF, fills DF with simulation results, adds some + custom plotting functions. + """ + + def __init__(self, *args, **kw): + super().__init__(*args, **kw) + + @property + def _constructor(self): + return CustomDataFrame + + _constructor_sliced = CustomSeries + + def custom_frame_function(self): + return "OK" + + data = {"col1": range(10), "col2": range(10)} + cdf = CustomDataFrame(data) + + # Did we get back our own DF class? + assert isinstance(cdf, CustomDataFrame) + + # Do we get back our own Series class after selecting a column? + cdf_series = cdf.col1 + assert isinstance(cdf_series, CustomSeries) + assert cdf_series.custom_series_function() == "OK" + + # Do we get back our own DF class after slicing row-wise? + cdf_rows = cdf[1:5] + assert isinstance(cdf_rows, CustomDataFrame) + assert cdf_rows.custom_frame_function() == "OK" + + # Make sure sliced part of multi-index frame is custom class + mcol = pd.MultiIndex.from_tuples([("A", "A"), ("A", "B")]) + cdf_multi = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) + assert isinstance(cdf_multi["A"], CustomDataFrame) + + mcol = pd.MultiIndex.from_tuples([("A", ""), ("B", "")]) + cdf_multi2 = CustomDataFrame([[0, 1], [2, 3]], columns=mcol) + assert isinstance(cdf_multi2["A"], CustomSeries) + + def test_dataframe_metadata(self): + df = tm.SubclassedDataFrame( + {"X": [1, 2, 3], "Y": [1, 2, 3]}, index=["a", "b", "c"] + ) + df.testattr = "XXX" + + assert df.testattr == "XXX" + assert df[["X"]].testattr == "XXX" + assert df.loc[["a", "b"], :].testattr == "XXX" + assert df.iloc[[0, 1], :].testattr == "XXX" + + # see gh-9776 + assert df.iloc[0:1, :].testattr == "XXX" + + # see gh-10553 + unpickled = tm.round_trip_pickle(df) + tm.assert_frame_equal(df, unpickled) + assert df._metadata == unpickled._metadata + assert df.testattr == unpickled.testattr + + def test_indexing_sliced(self): + # GH 11559 + df = tm.SubclassedDataFrame( + {"X": [1, 2, 3], "Y": [4, 5, 6], "Z": [7, 8, 9]}, index=["a", "b", "c"] + ) + res = df.loc[:, "X"] + exp = tm.SubclassedSeries([1, 2, 3], index=list("abc"), name="X") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.iloc[:, 1] + exp = tm.SubclassedSeries([4, 5, 6], index=list("abc"), name="Y") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.loc[:, "Z"] + exp = tm.SubclassedSeries([7, 8, 9], index=list("abc"), name="Z") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.loc["a", :] + exp = tm.SubclassedSeries([1, 4, 7], index=list("XYZ"), name="a") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.iloc[1, :] + exp = tm.SubclassedSeries([2, 5, 8], index=list("XYZ"), name="b") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + res = df.loc["c", :] + exp = tm.SubclassedSeries([3, 6, 9], index=list("XYZ"), name="c") + tm.assert_series_equal(res, exp) + assert isinstance(res, tm.SubclassedSeries) + + def test_subclass_attr_err_propagation(self): + # GH 11808 + class A(DataFrame): + @property + def bar(self): + return self.i_dont_exist + + with pytest.raises(AttributeError, match=".*i_dont_exist.*"): + A().bar + + def test_subclass_align(self): + # GH 12983 + df1 = tm.SubclassedDataFrame( + {"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE") + ) + df2 = tm.SubclassedDataFrame( + {"c": [1, 2, 4], "d": [1, 2, 4]}, index=list("ABD") + ) + + res1, res2 = df1.align(df2, axis=0) + exp1 = tm.SubclassedDataFrame( + {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]}, + index=list("ABCDE"), + ) + exp2 = tm.SubclassedDataFrame( + {"c": [1, 2, np.nan, 4, np.nan], "d": [1, 2, np.nan, 4, np.nan]}, + index=list("ABCDE"), + ) + assert isinstance(res1, tm.SubclassedDataFrame) + tm.assert_frame_equal(res1, exp1) + assert isinstance(res2, tm.SubclassedDataFrame) + tm.assert_frame_equal(res2, exp2) + + res1, res2 = df1.a.align(df2.c) + assert isinstance(res1, tm.SubclassedSeries) + tm.assert_series_equal(res1, exp1.a) + assert isinstance(res2, tm.SubclassedSeries) + tm.assert_series_equal(res2, exp2.c) + + def test_subclass_align_combinations(self): + # GH 12983 + df = tm.SubclassedDataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")) + s = tm.SubclassedSeries([1, 2, 4], index=list("ABD"), name="x") + + # frame + series + res1, res2 = df.align(s, axis=0) + exp1 = pd.DataFrame( + {"a": [1, np.nan, 3, np.nan, 5], "b": [1, np.nan, 3, np.nan, 5]}, + index=list("ABCDE"), + ) + # name is lost when + exp2 = pd.Series([1, 2, np.nan, 4, np.nan], index=list("ABCDE"), name="x") + + assert isinstance(res1, tm.SubclassedDataFrame) + tm.assert_frame_equal(res1, exp1) + assert isinstance(res2, tm.SubclassedSeries) + tm.assert_series_equal(res2, exp2) + + # series + frame + res1, res2 = s.align(df) + assert isinstance(res1, tm.SubclassedSeries) + tm.assert_series_equal(res1, exp2) + assert isinstance(res2, tm.SubclassedDataFrame) + tm.assert_frame_equal(res2, exp1) + + def test_subclass_iterrows(self): + # GH 13977 + df = tm.SubclassedDataFrame({"a": [1]}) + for i, row in df.iterrows(): + assert isinstance(row, tm.SubclassedSeries) + tm.assert_series_equal(row, df.loc[i]) + + def test_subclass_stack(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["a", "b", "c"], + columns=["X", "Y", "Z"], + ) + + res = df.stack() + exp = tm.SubclassedSeries( + [1, 2, 3, 4, 5, 6, 7, 8, 9], index=[list("aaabbbccc"), list("XYZXYZXYZ")] + ) + + tm.assert_series_equal(res, exp) + + def test_subclass_stack_multi(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + + exp = tm.SubclassedDataFrame( + [ + [10, 12], + [11, 13], + [20, 22], + [21, 23], + [30, 32], + [31, 33], + [40, 42], + [41, 43], + ], + index=MultiIndex.from_tuples( + list(zip(list("AAAABBBB"), list("ccddccdd"), list("yzyzyzyz"))), + names=["aaa", "ccc", "yyy"], + ), + columns=Index(["W", "X"], name="www"), + ) + + res = df.stack() + tm.assert_frame_equal(res, exp) + + res = df.stack("yyy") + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame( + [ + [10, 11], + [12, 13], + [20, 21], + [22, 23], + [30, 31], + [32, 33], + [40, 41], + [42, 43], + ], + index=MultiIndex.from_tuples( + list(zip(list("AAAABBBB"), list("ccddccdd"), list("WXWXWXWX"))), + names=["aaa", "ccc", "www"], + ), + columns=Index(["y", "z"], name="yyy"), + ) + + res = df.stack("www") + tm.assert_frame_equal(res, exp) + + def test_subclass_stack_multi_mixed(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [ + [10, 11, 12.0, 13.0], + [20, 21, 22.0, 23.0], + [30, 31, 32.0, 33.0], + [40, 41, 42.0, 43.0], + ], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + + exp = tm.SubclassedDataFrame( + [ + [10, 12.0], + [11, 13.0], + [20, 22.0], + [21, 23.0], + [30, 32.0], + [31, 33.0], + [40, 42.0], + [41, 43.0], + ], + index=MultiIndex.from_tuples( + list(zip(list("AAAABBBB"), list("ccddccdd"), list("yzyzyzyz"))), + names=["aaa", "ccc", "yyy"], + ), + columns=Index(["W", "X"], name="www"), + ) + + res = df.stack() + tm.assert_frame_equal(res, exp) + + res = df.stack("yyy") + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame( + [ + [10.0, 11.0], + [12.0, 13.0], + [20.0, 21.0], + [22.0, 23.0], + [30.0, 31.0], + [32.0, 33.0], + [40.0, 41.0], + [42.0, 43.0], + ], + index=MultiIndex.from_tuples( + list(zip(list("AAAABBBB"), list("ccddccdd"), list("WXWXWXWX"))), + names=["aaa", "ccc", "www"], + ), + columns=Index(["y", "z"], name="yyy"), + ) + + res = df.stack("www") + tm.assert_frame_equal(res, exp) + + def test_subclass_unstack(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["a", "b", "c"], + columns=["X", "Y", "Z"], + ) + + res = df.unstack() + exp = tm.SubclassedSeries( + [1, 4, 7, 2, 5, 8, 3, 6, 9], index=[list("XXXYYYZZZ"), list("abcabcabc")] + ) + + tm.assert_series_equal(res, exp) + + def test_subclass_unstack_multi(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + + exp = tm.SubclassedDataFrame( + [[10, 20, 11, 21, 12, 22, 13, 23], [30, 40, 31, 41, 32, 42, 33, 43]], + index=Index(["A", "B"], name="aaa"), + columns=MultiIndex.from_tuples( + list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("cdcdcdcd"))), + names=["www", "yyy", "ccc"], + ), + ) + + res = df.unstack() + tm.assert_frame_equal(res, exp) + + res = df.unstack("ccc") + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame( + [[10, 30, 11, 31, 12, 32, 13, 33], [20, 40, 21, 41, 22, 42, 23, 43]], + index=Index(["c", "d"], name="ccc"), + columns=MultiIndex.from_tuples( + list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("ABABABAB"))), + names=["www", "yyy", "aaa"], + ), + ) + + res = df.unstack("aaa") + tm.assert_frame_equal(res, exp) + + def test_subclass_unstack_multi_mixed(self): + # GH 15564 + df = tm.SubclassedDataFrame( + [ + [10, 11, 12.0, 13.0], + [20, 21, 22.0, 23.0], + [30, 31, 32.0, 33.0], + [40, 41, 42.0, 43.0], + ], + index=MultiIndex.from_tuples( + list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"] + ), + columns=MultiIndex.from_tuples( + list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"] + ), + ) + + exp = tm.SubclassedDataFrame( + [ + [10, 20, 11, 21, 12.0, 22.0, 13.0, 23.0], + [30, 40, 31, 41, 32.0, 42.0, 33.0, 43.0], + ], + index=Index(["A", "B"], name="aaa"), + columns=MultiIndex.from_tuples( + list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("cdcdcdcd"))), + names=["www", "yyy", "ccc"], + ), + ) + + res = df.unstack() + tm.assert_frame_equal(res, exp) + + res = df.unstack("ccc") + tm.assert_frame_equal(res, exp) + + exp = tm.SubclassedDataFrame( + [ + [10, 30, 11, 31, 12.0, 32.0, 13.0, 33.0], + [20, 40, 21, 41, 22.0, 42.0, 23.0, 43.0], + ], + index=Index(["c", "d"], name="ccc"), + columns=MultiIndex.from_tuples( + list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("ABABABAB"))), + names=["www", "yyy", "aaa"], + ), + ) + + res = df.unstack("aaa") + tm.assert_frame_equal(res, exp) + + def test_subclass_pivot(self): + # GH 15564 + df = tm.SubclassedDataFrame( + { + "index": ["A", "B", "C", "C", "B", "A"], + "columns": ["One", "One", "One", "Two", "Two", "Two"], + "values": [1.0, 2.0, 3.0, 3.0, 2.0, 1.0], + } + ) + + pivoted = df.pivot(index="index", columns="columns", values="values") + + expected = tm.SubclassedDataFrame( + { + "One": {"A": 1.0, "B": 2.0, "C": 3.0}, + "Two": {"A": 1.0, "B": 2.0, "C": 3.0}, + } + ) + + expected.index.name, expected.columns.name = "index", "columns" + + tm.assert_frame_equal(pivoted, expected) + + def test_subclassed_melt(self): + # GH 15564 + cheese = tm.SubclassedDataFrame( + { + "first": ["John", "Mary"], + "last": ["Doe", "Bo"], + "height": [5.5, 6.0], + "weight": [130, 150], + } + ) + + melted = pd.melt(cheese, id_vars=["first", "last"]) + + expected = tm.SubclassedDataFrame( + [ + ["John", "Doe", "height", 5.5], + ["Mary", "Bo", "height", 6.0], + ["John", "Doe", "weight", 130], + ["Mary", "Bo", "weight", 150], + ], + columns=["first", "last", "variable", "value"], + ) + + tm.assert_frame_equal(melted, expected) + + def test_subclassed_wide_to_long(self): + # GH 9762 + + np.random.seed(123) + x = np.random.randn(3) + df = tm.SubclassedDataFrame( + { + "A1970": {0: "a", 1: "b", 2: "c"}, + "A1980": {0: "d", 1: "e", 2: "f"}, + "B1970": {0: 2.5, 1: 1.2, 2: 0.7}, + "B1980": {0: 3.2, 1: 1.3, 2: 0.1}, + "X": dict(zip(range(3), x)), + } + ) + + df["id"] = df.index + exp_data = { + "X": x.tolist() + x.tolist(), + "A": ["a", "b", "c", "d", "e", "f"], + "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2], + } + expected = tm.SubclassedDataFrame(exp_data) + expected = expected.set_index(["id", "year"])[["X", "A", "B"]] + long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year") + + tm.assert_frame_equal(long_frame, expected) + + def test_subclassed_apply(self): + # GH 19822 + + def check_row_subclass(row): + assert isinstance(row, tm.SubclassedSeries) + + def strech(row): + if row["variable"] == "height": + row["value"] += 0.5 + return row + + df = tm.SubclassedDataFrame( + [ + ["John", "Doe", "height", 5.5], + ["Mary", "Bo", "height", 6.0], + ["John", "Doe", "weight", 130], + ["Mary", "Bo", "weight", 150], + ], + columns=["first", "last", "variable", "value"], + ) + + df.apply(lambda x: check_row_subclass(x)) + df.apply(lambda x: check_row_subclass(x), axis=1) + + expected = tm.SubclassedDataFrame( + [ + ["John", "Doe", "height", 6.0], + ["Mary", "Bo", "height", 6.5], + ["John", "Doe", "weight", 130], + ["Mary", "Bo", "weight", 150], + ], + columns=["first", "last", "variable", "value"], + ) + + result = df.apply(lambda x: strech(x), axis=1) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + expected = tm.SubclassedDataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]) + + result = df.apply(lambda x: tm.SubclassedSeries([1, 2, 3]), axis=1) + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand") + assert isinstance(result, tm.SubclassedDataFrame) + tm.assert_frame_equal(result, expected) + + expected = tm.SubclassedSeries([[1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3]]) + + result = df.apply(lambda x: [1, 2, 3], axis=1) + assert not isinstance(result, tm.SubclassedDataFrame) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py new file mode 100644 index 00000000..e89f4ee0 --- /dev/null +++ b/pandas/tests/frame/test_timeseries.py @@ -0,0 +1,549 @@ +from datetime import datetime, time +from itertools import product + +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + +import pandas.tseries.offsets as offsets + + +@pytest.fixture(params=product([True, False], [True, False])) +def close_open_fixture(request): + return request.param + + +class TestDataFrameTimeSeriesMethods: + def test_frame_ctor_datetime64_column(self): + rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") + dates = np.asarray(rng) + + df = DataFrame({"A": np.random.randn(len(rng)), "B": dates}) + assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]")) + + def test_frame_append_datetime64_column(self): + rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") + df = DataFrame(index=np.arange(len(rng))) + + df["A"] = rng + assert np.issubdtype(df["A"].dtype, np.dtype("M8[ns]")) + + def test_frame_datetime64_pre1900_repr(self): + df = DataFrame({"year": date_range("1/1/1700", periods=50, freq="A-DEC")}) + # it works! + repr(df) + + def test_frame_append_datetime64_col_other_units(self): + n = 100 + + units = ["h", "m", "s", "ms", "D", "M", "Y"] + + ns_dtype = np.dtype("M8[ns]") + + for unit in units: + dtype = np.dtype("M8[{unit}]".format(unit=unit)) + vals = np.arange(n, dtype=np.int64).view(dtype) + + df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) + df[unit] = vals + + ex_vals = to_datetime(vals.astype("O")).values + + assert df[unit].dtype == ns_dtype + assert (df[unit].values == ex_vals).all() + + # Test insertion into existing datetime64 column + df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) + df["dates"] = np.arange(n, dtype=np.int64).view(ns_dtype) + + for unit in units: + dtype = np.dtype("M8[{unit}]".format(unit=unit)) + vals = np.arange(n, dtype=np.int64).view(dtype) + + tmp = df.copy() + + tmp["dates"] = vals + ex_vals = to_datetime(vals.astype("O")).values + + assert (tmp["dates"].values == ex_vals).all() + + def test_asfreq(self, datetime_frame): + offset_monthly = datetime_frame.asfreq(offsets.BMonthEnd()) + rule_monthly = datetime_frame.asfreq("BM") + + tm.assert_almost_equal(offset_monthly["A"], rule_monthly["A"]) + + filled = rule_monthly.asfreq("B", method="pad") # noqa + # TODO: actually check that this worked. + + # don't forget! + filled_dep = rule_monthly.asfreq("B", method="pad") # noqa + + # test does not blow up on length-0 DataFrame + zero_length = datetime_frame.reindex([]) + result = zero_length.asfreq("BM") + assert result is not zero_length + + def test_asfreq_datetimeindex(self): + df = DataFrame( + {"A": [1, 2, 3]}, + index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)], + ) + df = df.asfreq("B") + assert isinstance(df.index, DatetimeIndex) + + ts = df["A"].asfreq("B") + assert isinstance(ts.index, DatetimeIndex) + + def test_asfreq_fillvalue(self): + # test for fill value during upsampling, related to issue 3715 + + # setup + rng = pd.date_range("1/1/2016", periods=10, freq="2S") + ts = pd.Series(np.arange(len(rng)), index=rng) + df = pd.DataFrame({"one": ts}) + + # insert pre-existing missing value + df.loc["2016-01-01 00:00:08", "one"] = None + + actual_df = df.asfreq(freq="1S", fill_value=9.0) + expected_df = df.asfreq(freq="1S").fillna(9.0) + expected_df.loc["2016-01-01 00:00:08", "one"] = None + tm.assert_frame_equal(expected_df, actual_df) + + expected_series = ts.asfreq(freq="1S").fillna(9.0) + actual_series = ts.asfreq(freq="1S", fill_value=9.0) + tm.assert_series_equal(expected_series, actual_series) + + @pytest.mark.parametrize( + "data,idx,expected_first,expected_last", + [ + ({"A": [1, 2, 3]}, [1, 1, 2], 1, 2), + ({"A": [1, 2, 3]}, [1, 2, 2], 1, 2), + ({"A": [1, 2, 3, 4]}, ["d", "d", "d", "d"], "d", "d"), + ({"A": [1, np.nan, 3]}, [1, 1, 2], 1, 2), + ({"A": [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2), + ({"A": [1, np.nan, 3]}, [1, 2, 2], 1, 2), + ], + ) + def test_first_last_valid( + self, float_frame, data, idx, expected_first, expected_last + ): + N = len(float_frame.index) + mat = np.random.randn(N) + mat[:5] = np.nan + mat[-5:] = np.nan + + frame = DataFrame({"foo": mat}, index=float_frame.index) + index = frame.first_valid_index() + + assert index == frame.index[5] + + index = frame.last_valid_index() + assert index == frame.index[-6] + + # GH12800 + empty = DataFrame() + assert empty.last_valid_index() is None + assert empty.first_valid_index() is None + + # GH17400: no valid entries + frame[:] = np.nan + assert frame.last_valid_index() is None + assert frame.first_valid_index() is None + + # GH20499: its preserves freq with holes + frame.index = date_range("20110101", periods=N, freq="B") + frame.iloc[1] = 1 + frame.iloc[-2] = 1 + assert frame.first_valid_index() == frame.index[1] + assert frame.last_valid_index() == frame.index[-2] + assert frame.first_valid_index().freq == frame.index.freq + assert frame.last_valid_index().freq == frame.index.freq + + # GH 21441 + df = DataFrame(data, index=idx) + assert expected_first == df.first_valid_index() + assert expected_last == df.last_valid_index() + + @pytest.mark.parametrize("klass", [Series, DataFrame]) + def test_first_valid_index_all_nan(self, klass): + # GH#9752 Series/DataFrame should both return None, not raise + obj = klass([np.nan]) + + assert obj.first_valid_index() is None + assert obj.iloc[:0].first_valid_index() is None + + def test_first_subset(self): + ts = tm.makeTimeDataFrame(freq="12h") + result = ts.first("10d") + assert len(result) == 20 + + ts = tm.makeTimeDataFrame(freq="D") + result = ts.first("10d") + assert len(result) == 10 + + result = ts.first("3M") + expected = ts[:"3/31/2000"] + tm.assert_frame_equal(result, expected) + + result = ts.first("21D") + expected = ts[:21] + tm.assert_frame_equal(result, expected) + + result = ts[:0].first("3M") + tm.assert_frame_equal(result, ts[:0]) + + def test_first_raises(self): + # GH20725 + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.first("1D") + + def test_last_subset(self): + ts = tm.makeTimeDataFrame(freq="12h") + result = ts.last("10d") + assert len(result) == 20 + + ts = tm.makeTimeDataFrame(nper=30, freq="D") + result = ts.last("10d") + assert len(result) == 10 + + result = ts.last("21D") + expected = ts["2000-01-10":] + tm.assert_frame_equal(result, expected) + + result = ts.last("21D") + expected = ts[-21:] + tm.assert_frame_equal(result, expected) + + result = ts[:0].last("3M") + tm.assert_frame_equal(result, ts[:0]) + + def test_last_raises(self): + # GH20725 + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.last("1D") + + def test_at_time(self): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + rs = ts.at_time(rng[1]) + assert (rs.index.hour == rng[1].hour).all() + assert (rs.index.minute == rng[1].minute).all() + assert (rs.index.second == rng[1].second).all() + + result = ts.at_time("9:30") + expected = ts.at_time(time(9, 30)) + tm.assert_frame_equal(result, expected) + + result = ts.loc[time(9, 30)] + expected = ts.loc[(rng.hour == 9) & (rng.minute == 30)] + + tm.assert_frame_equal(result, expected) + + # midnight, everything + rng = date_range("1/1/2000", "1/31/2000") + ts = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts.at_time(time(0, 0)) + tm.assert_frame_equal(result, ts) + + # time doesn't exist + rng = date_range("1/1/2012", freq="23Min", periods=384) + ts = DataFrame(np.random.randn(len(rng), 2), rng) + rs = ts.at_time("16:00") + assert len(rs) == 0 + + @pytest.mark.parametrize( + "hour", ["1:00", "1:00AM", time(1), time(1, tzinfo=pytz.UTC)] + ) + def test_at_time_errors(self, hour): + # GH 24043 + dti = pd.date_range("2018", periods=3, freq="H") + df = pd.DataFrame(list(range(len(dti))), index=dti) + if getattr(hour, "tzinfo", None) is None: + result = df.at_time(hour) + expected = df.iloc[1:2] + tm.assert_frame_equal(result, expected) + else: + with pytest.raises(ValueError, match="Index must be timezone"): + df.at_time(hour) + + def test_at_time_tz(self): + # GH 24043 + dti = pd.date_range("2018", periods=3, freq="H", tz="US/Pacific") + df = pd.DataFrame(list(range(len(dti))), index=dti) + result = df.at_time(time(4, tzinfo=pytz.timezone("US/Eastern"))) + expected = df.iloc[1:2] + tm.assert_frame_equal(result, expected) + + def test_at_time_raises(self): + # GH20725 + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.at_time("00:00") + + @pytest.mark.parametrize("axis", ["index", "columns", 0, 1]) + def test_at_time_axis(self, axis): + # issue 8839 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), len(rng))) + ts.index, ts.columns = rng, rng + + indices = rng[(rng.hour == 9) & (rng.minute == 30) & (rng.second == 0)] + + if axis in ["index", 0]: + expected = ts.loc[indices, :] + elif axis in ["columns", 1]: + expected = ts.loc[:, indices] + + result = ts.at_time("9:30", axis=axis) + tm.assert_frame_equal(result, expected) + + def test_between_time(self, close_open_fixture): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(0, 0) + etime = time(1, 0) + inc_start, inc_end = close_open_fixture + + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert t >= stime + else: + assert t > stime + + if inc_end: + assert t <= etime + else: + assert t < etime + + result = ts.between_time("00:00", "01:00") + expected = ts.between_time(stime, etime) + tm.assert_frame_equal(result, expected) + + # across midnight + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + stime = time(22, 0) + etime = time(9, 0) + + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert (t >= stime) or (t <= etime) + else: + assert (t > stime) or (t <= etime) + + if inc_end: + assert (t <= etime) or (t >= stime) + else: + assert (t < etime) or (t >= stime) + + def test_between_time_raises(self): + # GH20725 + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises(TypeError): # index is not a DatetimeIndex + df.between_time(start_time="00:00", end_time="12:00") + + def test_between_time_axis(self, axis): + # issue 8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + ts = DataFrame(np.random.randn(len(rng), len(rng))) + stime, etime = ("08:00:00", "09:00:00") + exp_len = 7 + + if axis in ["index", 0]: + ts.index = rng + assert len(ts.between_time(stime, etime)) == exp_len + assert len(ts.between_time(stime, etime, axis=0)) == exp_len + + if axis in ["columns", 1]: + ts.columns = rng + selected = ts.between_time(stime, etime, axis=1).columns + assert len(selected) == exp_len + + def test_between_time_axis_raises(self, axis): + # issue 8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + mask = np.arange(0, len(rng)) + rand_data = np.random.randn(len(rng), len(rng)) + ts = DataFrame(rand_data, index=rng, columns=rng) + stime, etime = ("08:00:00", "09:00:00") + + msg = "Index must be DatetimeIndex" + if axis in ["columns", 1]: + ts.index = mask + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime) + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime, axis=0) + + if axis in ["index", 0]: + ts.columns = mask + with pytest.raises(TypeError, match=msg): + ts.between_time(stime, etime, axis=1) + + def test_operation_on_NaT(self): + # Both NaT and Timestamp are in DataFrame. + df = pd.DataFrame({"foo": [pd.NaT, pd.NaT, pd.Timestamp("2012-05-01")]}) + + res = df.min() + exp = pd.Series([pd.Timestamp("2012-05-01")], index=["foo"]) + tm.assert_series_equal(res, exp) + + res = df.max() + exp = pd.Series([pd.Timestamp("2012-05-01")], index=["foo"]) + tm.assert_series_equal(res, exp) + + # GH12941, only NaTs are in DataFrame. + df = pd.DataFrame({"foo": [pd.NaT, pd.NaT]}) + + res = df.min() + exp = pd.Series([pd.NaT], index=["foo"]) + tm.assert_series_equal(res, exp) + + res = df.max() + exp = pd.Series([pd.NaT], index=["foo"]) + tm.assert_series_equal(res, exp) + + def test_datetime_assignment_with_NaT_and_diff_time_units(self): + # GH 7492 + data_ns = np.array([1, "nat"], dtype="datetime64[ns]") + result = pd.Series(data_ns).to_frame() + result["new"] = data_ns + expected = pd.DataFrame( + {0: [1, None], "new": [1, None]}, dtype="datetime64[ns]" + ) + tm.assert_frame_equal(result, expected) + # OutOfBoundsDatetime error shouldn't occur + data_s = np.array([1, "nat"], dtype="datetime64[s]") + result["new"] = data_s + expected = pd.DataFrame( + {0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]" + ) + tm.assert_frame_equal(result, expected) + + def test_frame_to_period(self): + K = 5 + + dr = date_range("1/1/2000", "1/1/2001") + pr = period_range("1/1/2000", "1/1/2001") + df = DataFrame(np.random.randn(len(dr), K), index=dr) + df["mix"] = "a" + + pts = df.to_period() + exp = df.copy() + exp.index = pr + tm.assert_frame_equal(pts, exp) + + pts = df.to_period("M") + tm.assert_index_equal(pts.index, exp.index.asfreq("M")) + + df = df.T + pts = df.to_period(axis=1) + exp = df.copy() + exp.columns = pr + tm.assert_frame_equal(pts, exp) + + pts = df.to_period("M", axis=1) + tm.assert_index_equal(pts.columns, exp.columns.asfreq("M")) + + msg = "No axis named 2 for object type " + with pytest.raises(ValueError, match=msg): + df.to_period(axis=2) + + @pytest.mark.parametrize("fn", ["tz_localize", "tz_convert"]) + def test_tz_convert_and_localize(self, fn): + l0 = date_range("20140701", periods=5, freq="D") + l1 = date_range("20140701", periods=5, freq="D") + + int_idx = Index(range(5)) + + if fn == "tz_convert": + l0 = l0.tz_localize("UTC") + l1 = l1.tz_localize("UTC") + + for idx in [l0, l1]: + + l0_expected = getattr(idx, fn)("US/Pacific") + l1_expected = getattr(idx, fn)("US/Pacific") + + df1 = DataFrame(np.ones(5), index=l0) + df1 = getattr(df1, fn)("US/Pacific") + tm.assert_index_equal(df1.index, l0_expected) + + # MultiIndex + # GH7846 + df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) + + df3 = getattr(df2, fn)("US/Pacific", level=0) + assert not df3.index.levels[0].equals(l0) + tm.assert_index_equal(df3.index.levels[0], l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1) + assert not df3.index.levels[1].equals(l1_expected) + + df3 = getattr(df2, fn)("US/Pacific", level=1) + tm.assert_index_equal(df3.index.levels[0], l0) + assert not df3.index.levels[0].equals(l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1_expected) + assert not df3.index.levels[1].equals(l1) + + df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) + + # TODO: untested + df5 = getattr(df4, fn)("US/Pacific", level=1) # noqa + + tm.assert_index_equal(df3.index.levels[0], l0) + assert not df3.index.levels[0].equals(l0_expected) + tm.assert_index_equal(df3.index.levels[1], l1_expected) + assert not df3.index.levels[1].equals(l1) + + # Bad Inputs + + # Not DatetimeIndex / PeriodIndex + with pytest.raises(TypeError, match="DatetimeIndex"): + df = DataFrame(index=int_idx) + df = getattr(df, fn)("US/Pacific") + + # Not DatetimeIndex / PeriodIndex + with pytest.raises(TypeError, match="DatetimeIndex"): + df = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) + df = getattr(df, fn)("US/Pacific", level=0) + + # Invalid level + with pytest.raises(ValueError, match="not valid"): + df = DataFrame(index=l0) + df = getattr(df, fn)("US/Pacific", level=1) diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py new file mode 100644 index 00000000..b60f2052 --- /dev/null +++ b/pandas/tests/frame/test_timezones.py @@ -0,0 +1,215 @@ +""" +Tests for DataFrame timezone-related methods +""" +from datetime import datetime + +import numpy as np +import pytest +import pytz + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.core.indexes.datetimes import date_range + + +class TestDataFrameTimezones: + def test_frame_values_with_tz(self): + tz = "US/Central" + df = DataFrame({"A": date_range("2000", periods=4, tz=tz)}) + result = df.values + expected = np.array( + [ + [pd.Timestamp("2000-01-01", tz=tz)], + [pd.Timestamp("2000-01-02", tz=tz)], + [pd.Timestamp("2000-01-03", tz=tz)], + [pd.Timestamp("2000-01-04", tz=tz)], + ] + ) + tm.assert_numpy_array_equal(result, expected) + + # two columns, homogenous + + df = df.assign(B=df.A) + result = df.values + expected = np.concatenate([expected, expected], axis=1) + tm.assert_numpy_array_equal(result, expected) + + # three columns, heterogeneous + est = "US/Eastern" + df = df.assign(C=df.A.dt.tz_convert(est)) + + new = np.array( + [ + [pd.Timestamp("2000-01-01T01:00:00", tz=est)], + [pd.Timestamp("2000-01-02T01:00:00", tz=est)], + [pd.Timestamp("2000-01-03T01:00:00", tz=est)], + [pd.Timestamp("2000-01-04T01:00:00", tz=est)], + ] + ) + expected = np.concatenate([expected, new], axis=1) + result = df.values + tm.assert_numpy_array_equal(result, expected) + + def test_frame_from_records_utc(self): + rec = {"datum": 1.5, "begin_time": datetime(2006, 4, 27, tzinfo=pytz.utc)} + + # it works + DataFrame.from_records([rec], index="begin_time") + + def test_frame_tz_localize(self): + rng = date_range("1/1/2011", periods=100, freq="H") + + df = DataFrame({"a": 1}, index=rng) + result = df.tz_localize("utc") + expected = DataFrame({"a": 1}, rng.tz_localize("UTC")) + assert result.index.tz.zone == "UTC" + tm.assert_frame_equal(result, expected) + + df = df.T + result = df.tz_localize("utc", axis=1) + assert result.columns.tz.zone == "UTC" + tm.assert_frame_equal(result, expected.T) + + def test_frame_tz_convert(self): + rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") + + df = DataFrame({"a": 1}, index=rng) + result = df.tz_convert("Europe/Berlin") + expected = DataFrame({"a": 1}, rng.tz_convert("Europe/Berlin")) + assert result.index.tz.zone == "Europe/Berlin" + tm.assert_frame_equal(result, expected) + + df = df.T + result = df.tz_convert("Europe/Berlin", axis=1) + assert result.columns.tz.zone == "Europe/Berlin" + tm.assert_frame_equal(result, expected.T) + + def test_frame_join_tzaware(self): + test1 = DataFrame( + np.zeros((6, 3)), + index=date_range( + "2012-11-15 00:00:00", periods=6, freq="100L", tz="US/Central" + ), + ) + test2 = DataFrame( + np.zeros((3, 3)), + index=date_range( + "2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central" + ), + columns=range(3, 6), + ) + + result = test1.join(test2, how="outer") + ex_index = test1.index.union(test2.index) + + tm.assert_index_equal(result.index, ex_index) + assert result.index.tz.zone == "US/Central" + + def test_frame_add_tz_mismatch_converts_to_utc(self): + rng = date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern") + df = DataFrame(np.random.randn(len(rng)), index=rng, columns=["a"]) + + df_moscow = df.tz_convert("Europe/Moscow") + result = df + df_moscow + assert result.index.tz is pytz.utc + + result = df_moscow + df + assert result.index.tz is pytz.utc + + def test_frame_align_aware(self): + idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") + idx2 = date_range("2001", periods=5, freq="2H", tz="US/Eastern") + df1 = DataFrame(np.random.randn(len(idx1), 3), idx1) + df2 = DataFrame(np.random.randn(len(idx2), 3), idx2) + new1, new2 = df1.align(df2) + assert df1.index.tz == new1.index.tz + assert df2.index.tz == new2.index.tz + + # different timezones convert to UTC + + # frame with frame + df1_central = df1.tz_convert("US/Central") + new1, new2 = df1.align(df1_central) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + # frame with Series + new1, new2 = df1.align(df1_central[0], axis=0) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + df1[0].align(df1_central, axis=0) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_frame_no_datetime64_dtype(self, tz): + # after GH#7822 + # these retain the timezones on dict construction + dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") + dr_tz = dr.tz_localize(tz) + df = DataFrame({"A": "foo", "B": dr_tz}, index=dr) + tz_expected = DatetimeTZDtype("ns", dr_tz.tzinfo) + assert df["B"].dtype == tz_expected + + # GH#2810 (with timezones) + datetimes_naive = [ts.to_pydatetime() for ts in dr] + datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] + df = DataFrame({"dr": dr}) + df["dr_tz"] = dr_tz + df["datetimes_naive"] = datetimes_naive + df["datetimes_with_tz"] = datetimes_with_tz + result = df.dtypes + expected = Series( + [ + np.dtype("datetime64[ns]"), + DatetimeTZDtype(tz=tz), + np.dtype("datetime64[ns]"), + DatetimeTZDtype(tz=tz), + ], + index=["dr", "dr_tz", "datetimes_naive", "datetimes_with_tz"], + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_frame_reset_index(self, tz): + dr = date_range("2012-06-02", periods=10, tz=tz) + df = DataFrame(np.random.randn(len(dr)), dr) + roundtripped = df.reset_index().set_index("index") + xp = df.index.tz + rs = roundtripped.index.tz + assert xp == rs + + @pytest.mark.parametrize("tz", [None, "America/New_York"]) + def test_boolean_compare_transpose_tzindex_with_dst(self, tz): + # GH 19970 + idx = date_range("20161101", "20161130", freq="4H", tz=tz) + df = DataFrame({"a": range(len(idx)), "b": range(len(idx))}, index=idx) + result = df.T == df.T + expected = DataFrame(True, index=list("ab"), columns=idx) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("copy", [True, False]) + @pytest.mark.parametrize( + "method, tz", [["tz_localize", None], ["tz_convert", "Europe/Berlin"]] + ) + def test_tz_localize_convert_copy_inplace_mutate(self, copy, method, tz): + # GH 6326 + result = DataFrame( + np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz=tz) + ) + getattr(result, method)("UTC", copy=copy) + expected = DataFrame( + np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz=tz) + ) + tm.assert_frame_equal(result, expected) + + def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): + # GH 25843 + tz = tz_aware_fixture + result = DataFrame({"d": [pd.Timestamp("2019", tz=tz)]}, dtype="datetime64[ns]") + expected = DataFrame({"d": [pd.Timestamp("2019")]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py new file mode 100644 index 00000000..aeff9297 --- /dev/null +++ b/pandas/tests/frame/test_to_csv.py @@ -0,0 +1,1358 @@ +import csv +from io import StringIO +import os + +import numpy as np +import pytest + +from pandas.errors import ParserError + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + read_csv, + to_datetime, +) +import pandas._testing as tm +import pandas.core.common as com + +from pandas.io.common import get_handle + +MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"] +MIXED_INT_DTYPES = [ + "uint8", + "uint16", + "uint32", + "uint64", + "int8", + "int16", + "int32", + "int64", +] + + +class TestDataFrameToCSV: + def read_csv(self, path, **kwargs): + params = dict(index_col=0, parse_dates=True) + params.update(**kwargs) + + return pd.read_csv(path, **params) + + def test_to_csv_from_csv1(self, float_frame, datetime_frame): + + with tm.ensure_clean("__tmp_to_csv_from_csv1__") as path: + float_frame["A"][:5] = np.nan + + float_frame.to_csv(path) + float_frame.to_csv(path, columns=["A", "B"]) + float_frame.to_csv(path, header=False) + float_frame.to_csv(path, index=False) + + # test roundtrip + datetime_frame.to_csv(path) + recons = self.read_csv(path) + tm.assert_frame_equal(datetime_frame, recons) + + datetime_frame.to_csv(path, index_label="index") + recons = self.read_csv(path, index_col=None) + + assert len(recons.columns) == len(datetime_frame.columns) + 1 + + # no index + datetime_frame.to_csv(path, index=False) + recons = self.read_csv(path, index_col=None) + tm.assert_almost_equal(datetime_frame.values, recons.values) + + # corner case + dm = DataFrame( + { + "s1": Series(range(3), index=np.arange(3)), + "s2": Series(range(2), index=np.arange(2)), + } + ) + dm.to_csv(path) + + recons = self.read_csv(path) + tm.assert_frame_equal(dm, recons) + + def test_to_csv_from_csv2(self, float_frame): + + with tm.ensure_clean("__tmp_to_csv_from_csv2__") as path: + + # duplicate index + df = DataFrame( + np.random.randn(3, 3), index=["a", "a", "b"], columns=["x", "y", "z"] + ) + df.to_csv(path) + result = self.read_csv(path) + tm.assert_frame_equal(result, df) + + midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) + df = DataFrame(np.random.randn(3, 3), index=midx, columns=["x", "y", "z"]) + + df.to_csv(path) + result = self.read_csv(path, index_col=[0, 1, 2], parse_dates=False) + tm.assert_frame_equal(result, df, check_names=False) + + # column aliases + col_aliases = Index(["AA", "X", "Y", "Z"]) + float_frame.to_csv(path, header=col_aliases) + + rs = self.read_csv(path) + xp = float_frame.copy() + xp.columns = col_aliases + tm.assert_frame_equal(xp, rs) + + msg = "Writing 4 cols but got 2 aliases" + with pytest.raises(ValueError, match=msg): + float_frame.to_csv(path, header=["AA", "X"]) + + def test_to_csv_from_csv3(self): + + with tm.ensure_clean("__tmp_to_csv_from_csv3__") as path: + df1 = DataFrame(np.random.randn(3, 1)) + df2 = DataFrame(np.random.randn(3, 1)) + + df1.to_csv(path) + df2.to_csv(path, mode="a", header=False) + xp = pd.concat([df1, df2]) + rs = pd.read_csv(path, index_col=0) + rs.columns = [int(label) for label in rs.columns] + xp.columns = [int(label) for label in xp.columns] + tm.assert_frame_equal(xp, rs) + + def test_to_csv_from_csv4(self): + + with tm.ensure_clean("__tmp_to_csv_from_csv4__") as path: + # GH 10833 (TimedeltaIndex formatting) + dt = pd.Timedelta(seconds=1) + df = pd.DataFrame( + {"dt_data": [i * dt for i in range(3)]}, + index=pd.Index([i * dt for i in range(3)], name="dt_index"), + ) + df.to_csv(path) + + result = pd.read_csv(path, index_col="dt_index") + result.index = pd.to_timedelta(result.index) + # TODO: remove renaming when GH 10875 is solved + result.index = result.index.rename("dt_index") + result["dt_data"] = pd.to_timedelta(result["dt_data"]) + + tm.assert_frame_equal(df, result, check_index_type=True) + + def test_to_csv_from_csv5(self, timezone_frame): + + # tz, 8260 + with tm.ensure_clean("__tmp_to_csv_from_csv5__") as path: + + timezone_frame.to_csv(path) + result = pd.read_csv(path, index_col=0, parse_dates=["A"]) + + converter = ( + lambda c: to_datetime(result[c]) + .dt.tz_convert("UTC") + .dt.tz_convert(timezone_frame[c].dt.tz) + ) + result["B"] = converter("B") + result["C"] = converter("C") + tm.assert_frame_equal(result, timezone_frame) + + def test_to_csv_cols_reordering(self): + # GH3454 + import pandas as pd + + chunksize = 5 + N = int(chunksize * 2.5) + + df = tm.makeCustomDataframe(N, 3) + cs = df.columns + cols = [cs[2], cs[0]] + + with tm.ensure_clean() as path: + df.to_csv(path, columns=cols, chunksize=chunksize) + rs_c = pd.read_csv(path, index_col=0) + + tm.assert_frame_equal(df[cols], rs_c, check_names=False) + + def test_to_csv_new_dupe_cols(self): + import pandas as pd + + def _check_df(df, cols=None): + with tm.ensure_clean() as path: + df.to_csv(path, columns=cols, chunksize=chunksize) + rs_c = pd.read_csv(path, index_col=0) + + # we wrote them in a different order + # so compare them in that order + if cols is not None: + + if df.columns.is_unique: + rs_c.columns = cols + else: + indexer, missing = df.columns.get_indexer_non_unique(cols) + rs_c.columns = df.columns.take(indexer) + + for c in cols: + obj_df = df[c] + obj_rs = rs_c[c] + if isinstance(obj_df, Series): + tm.assert_series_equal(obj_df, obj_rs) + else: + tm.assert_frame_equal(obj_df, obj_rs, check_names=False) + + # wrote in the same order + else: + rs_c.columns = df.columns + tm.assert_frame_equal(df, rs_c, check_names=False) + + chunksize = 5 + N = int(chunksize * 2.5) + + # dupe cols + df = tm.makeCustomDataframe(N, 3) + df.columns = ["a", "a", "b"] + _check_df(df, None) + + # dupe cols with selection + cols = ["b", "a"] + _check_df(df, cols) + + @pytest.mark.slow + def test_to_csv_dtnat(self): + # GH3437 + from pandas import NaT + + def make_dtnat_arr(n, nnat=None): + if nnat is None: + nnat = int(n * 0.1) # 10% + s = list(date_range("2000", freq="5min", periods=n)) + if nnat: + for i in np.random.randint(0, len(s), nnat): + s[i] = NaT + i = np.random.randint(100) + s[-i] = NaT + s[i] = NaT + return s + + chunksize = 1000 + # N=35000 + s1 = make_dtnat_arr(chunksize + 5) + s2 = make_dtnat_arr(chunksize + 5, 0) + + # s3=make_dtnjat_arr(chunksize+5,0) + with tm.ensure_clean("1.csv") as pth: + df = DataFrame(dict(a=s1, b=s2)) + df.to_csv(pth, chunksize=chunksize) + + recons = self.read_csv(pth)._convert(datetime=True, coerce=True) + tm.assert_frame_equal( + df, recons, check_names=False, check_less_precise=True + ) + + @pytest.mark.slow + def test_to_csv_moar(self): + def _do_test( + df, r_dtype=None, c_dtype=None, rnlvl=None, cnlvl=None, dupe_col=False + ): + + kwargs = dict(parse_dates=False) + if cnlvl: + if rnlvl is not None: + kwargs["index_col"] = list(range(rnlvl)) + kwargs["header"] = list(range(cnlvl)) + + with tm.ensure_clean("__tmp_to_csv_moar__") as path: + df.to_csv(path, encoding="utf8", chunksize=chunksize) + recons = self.read_csv(path, **kwargs) + else: + kwargs["header"] = 0 + + with tm.ensure_clean("__tmp_to_csv_moar__") as path: + df.to_csv(path, encoding="utf8", chunksize=chunksize) + recons = self.read_csv(path, **kwargs) + + def _to_uni(x): + if not isinstance(x, str): + return x.decode("utf8") + return x + + if dupe_col: + # read_Csv disambiguates the columns by + # labeling them dupe.1,dupe.2, etc'. monkey patch columns + recons.columns = df.columns + if rnlvl and not cnlvl: + delta_lvl = [recons.iloc[:, i].values for i in range(rnlvl - 1)] + ix = MultiIndex.from_arrays([list(recons.index)] + delta_lvl) + recons.index = ix + recons = recons.iloc[:, rnlvl - 1 :] + + type_map = dict(i="i", f="f", s="O", u="O", dt="O", p="O") + if r_dtype: + if r_dtype == "u": # unicode + r_dtype = "O" + recons.index = np.array( + [_to_uni(label) for label in recons.index], dtype=r_dtype + ) + df.index = np.array( + [_to_uni(label) for label in df.index], dtype=r_dtype + ) + elif r_dtype == "dt": # unicode + r_dtype = "O" + recons.index = np.array( + [Timestamp(label) for label in recons.index], dtype=r_dtype + ) + df.index = np.array( + [Timestamp(label) for label in df.index], dtype=r_dtype + ) + elif r_dtype == "p": + r_dtype = "O" + idx_list = to_datetime(recons.index) + recons.index = np.array( + [Timestamp(label) for label in idx_list], dtype=r_dtype + ) + df.index = np.array( + list(map(Timestamp, df.index.to_timestamp())), dtype=r_dtype + ) + else: + r_dtype = type_map.get(r_dtype) + recons.index = np.array(recons.index, dtype=r_dtype) + df.index = np.array(df.index, dtype=r_dtype) + if c_dtype: + if c_dtype == "u": + c_dtype = "O" + recons.columns = np.array( + [_to_uni(label) for label in recons.columns], dtype=c_dtype + ) + df.columns = np.array( + [_to_uni(label) for label in df.columns], dtype=c_dtype + ) + elif c_dtype == "dt": + c_dtype = "O" + recons.columns = np.array( + [Timestamp(label) for label in recons.columns], dtype=c_dtype + ) + df.columns = np.array( + [Timestamp(label) for label in df.columns], dtype=c_dtype + ) + elif c_dtype == "p": + c_dtype = "O" + col_list = to_datetime(recons.columns) + recons.columns = np.array( + [Timestamp(label) for label in col_list], dtype=c_dtype + ) + col_list = df.columns.to_timestamp() + df.columns = np.array( + [Timestamp(label) for label in col_list], dtype=c_dtype + ) + else: + c_dtype = type_map.get(c_dtype) + recons.columns = np.array(recons.columns, dtype=c_dtype) + df.columns = np.array(df.columns, dtype=c_dtype) + + tm.assert_frame_equal( + df, recons, check_names=False, check_less_precise=True + ) + + N = 100 + chunksize = 1000 + + for ncols in [4]: + base = int((chunksize // ncols or 1) or 1) + for nrows in [ + 2, + 10, + N - 1, + N, + N + 1, + N + 2, + 2 * N - 2, + 2 * N - 1, + 2 * N, + 2 * N + 1, + 2 * N + 2, + base - 1, + base, + base + 1, + ]: + _do_test( + tm.makeCustomDataframe( + nrows, ncols, r_idx_type="dt", c_idx_type="s" + ), + "dt", + "s", + ) + + for ncols in [4]: + base = int((chunksize // ncols or 1) or 1) + for nrows in [ + 2, + 10, + N - 1, + N, + N + 1, + N + 2, + 2 * N - 2, + 2 * N - 1, + 2 * N, + 2 * N + 1, + 2 * N + 2, + base - 1, + base, + base + 1, + ]: + _do_test( + tm.makeCustomDataframe( + nrows, ncols, r_idx_type="dt", c_idx_type="s" + ), + "dt", + "s", + ) + pass + + for r_idx_type, c_idx_type in [("i", "i"), ("s", "s"), ("u", "dt"), ("p", "p")]: + for ncols in [1, 2, 3, 4]: + base = int((chunksize // ncols or 1) or 1) + for nrows in [ + 2, + 10, + N - 1, + N, + N + 1, + N + 2, + 2 * N - 2, + 2 * N - 1, + 2 * N, + 2 * N + 1, + 2 * N + 2, + base - 1, + base, + base + 1, + ]: + _do_test( + tm.makeCustomDataframe( + nrows, ncols, r_idx_type=r_idx_type, c_idx_type=c_idx_type + ), + r_idx_type, + c_idx_type, + ) + + for ncols in [1, 2, 3, 4]: + base = int((chunksize // ncols or 1) or 1) + for nrows in [ + 10, + N - 2, + N - 1, + N, + N + 1, + N + 2, + 2 * N - 2, + 2 * N - 1, + 2 * N, + 2 * N + 1, + 2 * N + 2, + base - 1, + base, + base + 1, + ]: + _do_test(tm.makeCustomDataframe(nrows, ncols)) + + for nrows in [10, N - 2, N - 1, N, N + 1, N + 2]: + df = tm.makeCustomDataframe(nrows, 3) + cols = list(df.columns) + cols[:2] = ["dupe", "dupe"] + cols[-2:] = ["dupe", "dupe"] + ix = list(df.index) + ix[:2] = ["rdupe", "rdupe"] + ix[-2:] = ["rdupe", "rdupe"] + df.index = ix + df.columns = cols + _do_test(df, dupe_col=True) + + _do_test(DataFrame(index=np.arange(10))) + _do_test( + tm.makeCustomDataframe(chunksize // 2 + 1, 2, r_idx_nlevels=2), rnlvl=2 + ) + for ncols in [2, 3, 4]: + base = int(chunksize // ncols) + for nrows in [ + 10, + N - 2, + N - 1, + N, + N + 1, + N + 2, + 2 * N - 2, + 2 * N - 1, + 2 * N, + 2 * N + 1, + 2 * N + 2, + base - 1, + base, + base + 1, + ]: + _do_test(tm.makeCustomDataframe(nrows, ncols, r_idx_nlevels=2), rnlvl=2) + _do_test(tm.makeCustomDataframe(nrows, ncols, c_idx_nlevels=2), cnlvl=2) + _do_test( + tm.makeCustomDataframe( + nrows, ncols, r_idx_nlevels=2, c_idx_nlevels=2 + ), + rnlvl=2, + cnlvl=2, + ) + + def test_to_csv_from_csv_w_some_infs(self, float_frame): + + # test roundtrip with inf, -inf, nan, as full columns and mix + float_frame["G"] = np.nan + f = lambda x: [np.inf, np.nan][np.random.rand() < 0.5] + float_frame["H"] = float_frame.index.map(f) + + with tm.ensure_clean() as path: + float_frame.to_csv(path) + recons = self.read_csv(path) + + # TODO to_csv drops column name + tm.assert_frame_equal(float_frame, recons, check_names=False) + tm.assert_frame_equal( + np.isinf(float_frame), np.isinf(recons), check_names=False + ) + + def test_to_csv_from_csv_w_all_infs(self, float_frame): + + # test roundtrip with inf, -inf, nan, as full columns and mix + float_frame["E"] = np.inf + float_frame["F"] = -np.inf + + with tm.ensure_clean() as path: + float_frame.to_csv(path) + recons = self.read_csv(path) + + # TODO to_csv drops column name + tm.assert_frame_equal(float_frame, recons, check_names=False) + tm.assert_frame_equal( + np.isinf(float_frame), np.isinf(recons), check_names=False + ) + + def test_to_csv_no_index(self): + # GH 3624, after appending columns, to_csv fails + with tm.ensure_clean("__tmp_to_csv_no_index__") as path: + df = DataFrame({"c1": [1, 2, 3], "c2": [4, 5, 6]}) + df.to_csv(path, index=False) + result = read_csv(path) + tm.assert_frame_equal(df, result) + df["c3"] = Series([7, 8, 9], dtype="int64") + df.to_csv(path, index=False) + result = read_csv(path) + tm.assert_frame_equal(df, result) + + def test_to_csv_with_mix_columns(self): + # gh-11637: incorrect output when a mix of integer and string column + # names passed as columns parameter in to_csv + + df = DataFrame({0: ["a", "b", "c"], 1: ["aa", "bb", "cc"]}) + df["test"] = "txt" + assert df.to_csv() == df.to_csv(columns=[0, 1, "test"]) + + def test_to_csv_headers(self): + # GH6186, the presence or absence of `index` incorrectly + # causes to_csv to have different header semantics. + from_df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + to_df = DataFrame([[1, 2], [3, 4]], columns=["X", "Y"]) + with tm.ensure_clean("__tmp_to_csv_headers__") as path: + from_df.to_csv(path, header=["X", "Y"]) + recons = self.read_csv(path) + + tm.assert_frame_equal(to_df, recons) + + from_df.to_csv(path, index=False, header=["X", "Y"]) + recons = self.read_csv(path) + + recons.reset_index(inplace=True) + tm.assert_frame_equal(to_df, recons) + + def test_to_csv_multiindex(self, float_frame, datetime_frame): + + frame = float_frame + old_index = frame.index + arrays = np.arange(len(old_index) * 2).reshape(2, -1) + new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) + frame.index = new_index + + with tm.ensure_clean("__tmp_to_csv_multiindex__") as path: + + frame.to_csv(path, header=False) + frame.to_csv(path, columns=["A", "B"]) + + # round trip + frame.to_csv(path) + + df = self.read_csv(path, index_col=[0, 1], parse_dates=False) + + # TODO to_csv drops column name + tm.assert_frame_equal(frame, df, check_names=False) + assert frame.index.names == df.index.names + + # needed if setUp becomes a class method + float_frame.index = old_index + + # try multiindex with dates + tsframe = datetime_frame + old_index = tsframe.index + new_index = [old_index, np.arange(len(old_index))] + tsframe.index = MultiIndex.from_arrays(new_index) + + tsframe.to_csv(path, index_label=["time", "foo"]) + recons = self.read_csv(path, index_col=[0, 1]) + + # TODO to_csv drops column name + tm.assert_frame_equal(tsframe, recons, check_names=False) + + # do not load index + tsframe.to_csv(path) + recons = self.read_csv(path, index_col=None) + assert len(recons.columns) == len(tsframe.columns) + 2 + + # no index + tsframe.to_csv(path, index=False) + recons = self.read_csv(path, index_col=None) + tm.assert_almost_equal(recons.values, datetime_frame.values) + + # needed if setUp becomes class method + datetime_frame.index = old_index + + with tm.ensure_clean("__tmp_to_csv_multiindex__") as path: + # GH3571, GH1651, GH3141 + + def _make_frame(names=None): + if names is True: + names = ["first", "second"] + return DataFrame( + np.random.randint(0, 10, size=(3, 3)), + columns=MultiIndex.from_tuples( + [("bah", "foo"), ("bah", "bar"), ("ban", "baz")], names=names + ), + dtype="int64", + ) + + # column & index are multi-index + df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1]) + tm.assert_frame_equal(df, result) + + # column is mi + df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=1, c_idx_nlevels=4) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], index_col=0) + tm.assert_frame_equal(df, result) + + # dup column names? + df = tm.makeCustomDataframe(5, 3, r_idx_nlevels=3, c_idx_nlevels=4) + df.to_csv(path) + result = read_csv(path, header=[0, 1, 2, 3], index_col=[0, 1, 2]) + tm.assert_frame_equal(df, result) + + # writing with no index + df = _make_frame() + df.to_csv(path, index=False) + result = read_csv(path, header=[0, 1]) + tm.assert_frame_equal(df, result) + + # we lose the names here + df = _make_frame(True) + df.to_csv(path, index=False) + result = read_csv(path, header=[0, 1]) + assert com.all_none(*result.columns.names) + result.columns.names = df.columns.names + tm.assert_frame_equal(df, result) + + # whatsnew example + df = _make_frame() + df.to_csv(path) + result = read_csv(path, header=[0, 1], index_col=[0]) + tm.assert_frame_equal(df, result) + + df = _make_frame(True) + df.to_csv(path) + result = read_csv(path, header=[0, 1], index_col=[0]) + tm.assert_frame_equal(df, result) + + # invalid options + df = _make_frame(True) + df.to_csv(path) + + for i in [6, 7]: + msg = "len of {i}, but only 5 lines in file".format(i=i) + with pytest.raises(ParserError, match=msg): + read_csv(path, header=list(range(i)), index_col=0) + + # write with cols + msg = "cannot specify cols with a MultiIndex" + with pytest.raises(TypeError, match=msg): + df.to_csv(path, columns=["foo", "bar"]) + + with tm.ensure_clean("__tmp_to_csv_multiindex__") as path: + # empty + tsframe[:0].to_csv(path) + recons = self.read_csv(path) + + exp = tsframe[:0] + exp.index = [] + + tm.assert_index_equal(recons.columns, exp.columns) + assert len(recons) == 0 + + def test_to_csv_interval_index(self): + # GH 28210 + df = DataFrame({"A": list("abc"), "B": range(3)}, index=pd.interval_range(0, 3)) + + with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path: + df.to_csv(path) + result = self.read_csv(path, index_col=0) + + # can't roundtrip intervalindex via read_csv so check string repr (GH 23595) + expected = df.copy() + expected.index = expected.index.astype(str) + + tm.assert_frame_equal(result, expected) + + def test_to_csv_float32_nanrep(self): + df = DataFrame(np.random.randn(1, 4).astype(np.float32)) + df[1] = np.nan + + with tm.ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path: + df.to_csv(path, na_rep=999) + + with open(path) as f: + lines = f.readlines() + assert lines[1].split(",")[2] == "999" + + def test_to_csv_withcommas(self): + + # Commas inside fields should be correctly escaped when saving as CSV. + df = DataFrame({"A": [1, 2, 3], "B": ["5,6", "7,8", "9,0"]}) + + with tm.ensure_clean("__tmp_to_csv_withcommas__.csv") as path: + df.to_csv(path) + df2 = self.read_csv(path) + tm.assert_frame_equal(df2, df) + + def test_to_csv_mixed(self): + def create_cols(name): + return ["{name}{i:03d}".format(name=name, i=i) for i in range(5)] + + df_float = DataFrame( + np.random.randn(100, 5), dtype="float64", columns=create_cols("float") + ) + df_int = DataFrame( + np.random.randn(100, 5), dtype="int64", columns=create_cols("int") + ) + df_bool = DataFrame(True, index=df_float.index, columns=create_cols("bool")) + df_object = DataFrame( + "foo", index=df_float.index, columns=create_cols("object") + ) + df_dt = DataFrame( + Timestamp("20010101"), index=df_float.index, columns=create_cols("date") + ) + + # add in some nans + df_float.loc[30:50, 1:3] = np.nan + + # ## this is a bug in read_csv right now #### + # df_dt.loc[30:50,1:3] = np.nan + + df = pd.concat([df_float, df_int, df_bool, df_object, df_dt], axis=1) + + # dtype + dtypes = dict() + for n, dtype in [ + ("float", np.float64), + ("int", np.int64), + ("bool", np.bool), + ("object", np.object), + ]: + for c in create_cols(n): + dtypes[c] = dtype + + with tm.ensure_clean() as filename: + df.to_csv(filename) + rs = read_csv( + filename, index_col=0, dtype=dtypes, parse_dates=create_cols("date") + ) + tm.assert_frame_equal(rs, df) + + def test_to_csv_dups_cols(self): + + df = DataFrame( + np.random.randn(1000, 30), + columns=list(range(15)) + list(range(15)), + dtype="float64", + ) + + with tm.ensure_clean() as filename: + df.to_csv(filename) # single dtype, fine + result = read_csv(filename, index_col=0) + result.columns = df.columns + tm.assert_frame_equal(result, df) + + df_float = DataFrame(np.random.randn(1000, 3), dtype="float64") + df_int = DataFrame(np.random.randn(1000, 3), dtype="int64") + df_bool = DataFrame(True, index=df_float.index, columns=range(3)) + df_object = DataFrame("foo", index=df_float.index, columns=range(3)) + df_dt = DataFrame(Timestamp("20010101"), index=df_float.index, columns=range(3)) + df = pd.concat( + [df_float, df_int, df_bool, df_object, df_dt], axis=1, ignore_index=True + ) + + cols = [] + for i in range(5): + cols.extend([0, 1, 2]) + df.columns = cols + + with tm.ensure_clean() as filename: + df.to_csv(filename) + result = read_csv(filename, index_col=0) + + # date cols + for i in ["0.4", "1.4", "2.4"]: + result[i] = to_datetime(result[i]) + + result.columns = df.columns + tm.assert_frame_equal(result, df) + + # GH3457 + + N = 10 + df = tm.makeCustomDataframe(N, 3) + df.columns = ["a", "a", "b"] + + with tm.ensure_clean() as filename: + df.to_csv(filename) + + # read_csv will rename the dups columns + result = read_csv(filename, index_col=0) + result = result.rename(columns={"a.1": "a"}) + tm.assert_frame_equal(result, df) + + def test_to_csv_chunking(self): + + aa = DataFrame({"A": range(100000)}) + aa["B"] = aa.A + 1.0 + aa["C"] = aa.A + 2.0 + aa["D"] = aa.A + 3.0 + + for chunksize in [10000, 50000, 100000]: + with tm.ensure_clean() as filename: + aa.to_csv(filename, chunksize=chunksize) + rs = read_csv(filename, index_col=0) + tm.assert_frame_equal(rs, aa) + + @pytest.mark.slow + def test_to_csv_wide_frame_formatting(self): + # Issue #8621 + df = DataFrame(np.random.randn(1, 100010), columns=None, index=None) + with tm.ensure_clean() as filename: + df.to_csv(filename, header=False, index=False) + rs = read_csv(filename, header=None) + tm.assert_frame_equal(rs, df) + + def test_to_csv_bug(self): + f1 = StringIO("a,1.0\nb,2.0") + df = self.read_csv(f1, header=None) + newdf = DataFrame({"t": df[df.columns[0]]}) + + with tm.ensure_clean() as path: + newdf.to_csv(path) + + recons = read_csv(path, index_col=0) + # don't check_names as t != 1 + tm.assert_frame_equal(recons, newdf, check_names=False) + + def test_to_csv_unicode(self): + + df = DataFrame({"c/\u03c3": [1, 2, 3]}) + with tm.ensure_clean() as path: + + df.to_csv(path, encoding="UTF-8") + df2 = read_csv(path, index_col=0, encoding="UTF-8") + tm.assert_frame_equal(df, df2) + + df.to_csv(path, encoding="UTF-8", index=False) + df2 = read_csv(path, index_col=None, encoding="UTF-8") + tm.assert_frame_equal(df, df2) + + def test_to_csv_unicode_index_col(self): + buf = StringIO("") + df = DataFrame( + [["\u05d0", "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]], + columns=["\u05d0", "\u05d1", "\u05d2", "\u05d3"], + index=["\u05d0", "\u05d1"], + ) + + df.to_csv(buf, encoding="UTF-8") + buf.seek(0) + + df2 = read_csv(buf, index_col=0, encoding="UTF-8") + tm.assert_frame_equal(df, df2) + + def test_to_csv_stringio(self, float_frame): + buf = StringIO() + float_frame.to_csv(buf) + buf.seek(0) + recons = read_csv(buf, index_col=0) + # TODO to_csv drops column name + tm.assert_frame_equal(recons, float_frame, check_names=False) + + def test_to_csv_float_format(self): + + df = DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + + with tm.ensure_clean() as filename: + + df.to_csv(filename, float_format="%.2f") + + rs = read_csv(filename, index_col=0) + xp = DataFrame( + [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + tm.assert_frame_equal(rs, xp) + + def test_to_csv_unicodewriter_quoting(self): + df = DataFrame({"A": [1, 2, 3], "B": ["foo", "bar", "baz"]}) + + buf = StringIO() + df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC, encoding="utf-8") + + result = buf.getvalue() + expected_rows = ['"A","B"', '1,"foo"', '2,"bar"', '3,"baz"'] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_to_csv_quote_none(self): + # GH4328 + df = DataFrame({"A": ["hello", '{"hello"}']}) + for encoding in (None, "utf-8"): + buf = StringIO() + df.to_csv(buf, quoting=csv.QUOTE_NONE, encoding=encoding, index=False) + + result = buf.getvalue() + expected_rows = ["A", "hello", '{"hello"}'] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_to_csv_index_no_leading_comma(self): + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"]) + + buf = StringIO() + df.to_csv(buf, index_label=False) + + expected_rows = ["A,B", "one,1,4", "two,2,5", "three,3,6"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert buf.getvalue() == expected + + def test_to_csv_line_terminators(self): + # see gh-20353 + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["one", "two", "three"]) + + with tm.ensure_clean() as path: + # case 1: CRLF as line terminator + df.to_csv(path, line_terminator="\r\n") + expected = b",A,B\r\none,1,4\r\ntwo,2,5\r\nthree,3,6\r\n" + + with open(path, mode="rb") as f: + assert f.read() == expected + + with tm.ensure_clean() as path: + # case 2: LF as line terminator + df.to_csv(path, line_terminator="\n") + expected = b",A,B\none,1,4\ntwo,2,5\nthree,3,6\n" + + with open(path, mode="rb") as f: + assert f.read() == expected + + with tm.ensure_clean() as path: + # case 3: The default line terminator(=os.linesep)(gh-21406) + df.to_csv(path) + os_linesep = os.linesep.encode("utf-8") + expected = ( + b",A,B" + + os_linesep + + b"one,1,4" + + os_linesep + + b"two,2,5" + + os_linesep + + b"three,3,6" + + os_linesep + ) + + with open(path, mode="rb") as f: + assert f.read() == expected + + def test_to_csv_from_csv_categorical(self): + + # CSV with categoricals should result in the same output + # as when one would add a "normal" Series/DataFrame. + s = Series(pd.Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])) + s2 = Series(["a", "b", "b", "a", "a", "c", "c", "c"]) + res = StringIO() + + s.to_csv(res, header=False) + exp = StringIO() + + s2.to_csv(exp, header=False) + assert res.getvalue() == exp.getvalue() + + df = DataFrame({"s": s}) + df2 = DataFrame({"s": s2}) + + res = StringIO() + df.to_csv(res) + + exp = StringIO() + df2.to_csv(exp) + + assert res.getvalue() == exp.getvalue() + + def test_to_csv_path_is_none(self, float_frame): + # GH 8215 + # Make sure we return string for consistency with + # Series.to_csv() + csv_str = float_frame.to_csv(path_or_buf=None) + assert isinstance(csv_str, str) + recons = pd.read_csv(StringIO(csv_str), index_col=0) + tm.assert_frame_equal(float_frame, recons) + + @pytest.mark.parametrize( + "df,encoding", + [ + ( + DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ), + None, + ), + # GH 21241, 21118 + (DataFrame([["abc", "def", "ghi"]], columns=["X", "Y", "Z"]), "ascii"), + (DataFrame(5 * [[123, "你好", "世界"]], columns=["X", "Y", "Z"]), "gb2312"), + ( + DataFrame(5 * [[123, "Γειά σου", "Κόσμε"]], columns=["X", "Y", "Z"]), + "cp737", + ), + ], + ) + def test_to_csv_compression(self, df, encoding, compression): + + with tm.ensure_clean() as filename: + + df.to_csv(filename, compression=compression, encoding=encoding) + # test the round trip - to_csv -> read_csv + result = read_csv( + filename, compression=compression, index_col=0, encoding=encoding + ) + tm.assert_frame_equal(df, result) + + # test the round trip using file handle - to_csv -> read_csv + f, _handles = get_handle( + filename, "w", compression=compression, encoding=encoding + ) + with f: + df.to_csv(f, encoding=encoding) + result = pd.read_csv( + filename, + compression=compression, + encoding=encoding, + index_col=0, + squeeze=True, + ) + tm.assert_frame_equal(df, result) + + # explicitly make sure file is compressed + with tm.decompress_file(filename, compression) as fh: + text = fh.read().decode(encoding or "utf8") + for col in df.columns: + assert col in text + + with tm.decompress_file(filename, compression) as fh: + tm.assert_frame_equal(df, read_csv(fh, index_col=0, encoding=encoding)) + + def test_to_csv_date_format(self, datetime_frame): + with tm.ensure_clean("__tmp_to_csv_date_format__") as path: + dt_index = datetime_frame.index + datetime_frame = DataFrame( + {"A": dt_index, "B": dt_index.shift(1)}, index=dt_index + ) + datetime_frame.to_csv(path, date_format="%Y%m%d") + + # Check that the data was put in the specified format + test = read_csv(path, index_col=0) + + datetime_frame_int = datetime_frame.applymap( + lambda x: int(x.strftime("%Y%m%d")) + ) + datetime_frame_int.index = datetime_frame_int.index.map( + lambda x: int(x.strftime("%Y%m%d")) + ) + + tm.assert_frame_equal(test, datetime_frame_int) + + datetime_frame.to_csv(path, date_format="%Y-%m-%d") + + # Check that the data was put in the specified format + test = read_csv(path, index_col=0) + datetime_frame_str = datetime_frame.applymap( + lambda x: x.strftime("%Y-%m-%d") + ) + datetime_frame_str.index = datetime_frame_str.index.map( + lambda x: x.strftime("%Y-%m-%d") + ) + + tm.assert_frame_equal(test, datetime_frame_str) + + # Check that columns get converted + datetime_frame_columns = datetime_frame.T + datetime_frame_columns.to_csv(path, date_format="%Y%m%d") + + test = read_csv(path, index_col=0) + + datetime_frame_columns = datetime_frame_columns.applymap( + lambda x: int(x.strftime("%Y%m%d")) + ) + # Columns don't get converted to ints by read_csv + datetime_frame_columns.columns = datetime_frame_columns.columns.map( + lambda x: x.strftime("%Y%m%d") + ) + + tm.assert_frame_equal(test, datetime_frame_columns) + + # test NaTs + nat_index = to_datetime( + ["NaT"] * 10 + ["2000-01-01", "1/1/2000", "1-1-2000"] + ) + nat_frame = DataFrame({"A": nat_index}, index=nat_index) + nat_frame.to_csv(path, date_format="%Y-%m-%d") + + test = read_csv(path, parse_dates=[0, 1], index_col=0) + + tm.assert_frame_equal(test, nat_frame) + + def test_to_csv_with_dst_transitions(self): + + with tm.ensure_clean("csv_date_format_with_dst") as path: + # make sure we are not failing on transitions + times = pd.date_range( + "2013-10-26 23:00", + "2013-10-27 01:00", + tz="Europe/London", + freq="H", + ambiguous="infer", + ) + + for i in [times, times + pd.Timedelta("10s")]: + time_range = np.array(range(len(i)), dtype="int64") + df = DataFrame({"A": time_range}, index=i) + df.to_csv(path, index=True) + # we have to reconvert the index as we + # don't parse the tz's + result = read_csv(path, index_col=0) + result.index = to_datetime(result.index, utc=True).tz_convert( + "Europe/London" + ) + tm.assert_frame_equal(result, df) + + # GH11619 + idx = pd.date_range("2015-01-01", "2015-12-31", freq="H", tz="Europe/Paris") + df = DataFrame({"values": 1, "idx": idx}, index=idx) + with tm.ensure_clean("csv_date_format_with_dst") as path: + df.to_csv(path, index=True) + result = read_csv(path, index_col=0) + result.index = to_datetime(result.index, utc=True).tz_convert( + "Europe/Paris" + ) + result["idx"] = to_datetime(result["idx"], utc=True).astype( + "datetime64[ns, Europe/Paris]" + ) + tm.assert_frame_equal(result, df) + + # assert working + df.astype(str) + + with tm.ensure_clean("csv_date_format_with_dst") as path: + df.to_pickle(path) + result = pd.read_pickle(path) + tm.assert_frame_equal(result, df) + + def test_to_csv_quoting(self): + df = DataFrame( + { + "c_bool": [True, False], + "c_float": [1.0, 3.2], + "c_int": [42, np.nan], + "c_string": ["a", "b,c"], + } + ) + + expected_rows = [ + ",c_bool,c_float,c_int,c_string", + "0,True,1.0,42.0,a", + '1,False,3.2,,"b,c"', + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + result = df.to_csv() + assert result == expected + + result = df.to_csv(quoting=None) + assert result == expected + + expected_rows = [ + ",c_bool,c_float,c_int,c_string", + "0,True,1.0,42.0,a", + '1,False,3.2,,"b,c"', + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + result = df.to_csv(quoting=csv.QUOTE_MINIMAL) + assert result == expected + + expected_rows = [ + '"","c_bool","c_float","c_int","c_string"', + '"0","True","1.0","42.0","a"', + '"1","False","3.2","","b,c"', + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + + result = df.to_csv(quoting=csv.QUOTE_ALL) + assert result == expected + + # see gh-12922, gh-13259: make sure changes to + # the formatters do not break this behaviour + expected_rows = [ + '"","c_bool","c_float","c_int","c_string"', + '0,True,1.0,42.0,"a"', + '1,False,3.2,"","b,c"', + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC) + assert result == expected + + msg = "need to escape, but no escapechar set" + with pytest.raises(csv.Error, match=msg): + df.to_csv(quoting=csv.QUOTE_NONE) + + with pytest.raises(csv.Error, match=msg): + df.to_csv(quoting=csv.QUOTE_NONE, escapechar=None) + + expected_rows = [ + ",c_bool,c_float,c_int,c_string", + "0,True,1.0,42.0,a", + "1,False,3.2,,b!,c", + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar="!") + assert result == expected + + expected_rows = [ + ",c_bool,c_ffloat,c_int,c_string", + "0,True,1.0,42.0,a", + "1,False,3.2,,bf,c", + ] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + result = df.to_csv(quoting=csv.QUOTE_NONE, escapechar="f") + assert result == expected + + # see gh-3503: quoting Windows line terminators + # presents with encoding? + text_rows = ["a,b,c", '1,"test \r\n",3'] + text = tm.convert_rows_list_to_csv_str(text_rows) + df = pd.read_csv(StringIO(text)) + + buf = StringIO() + df.to_csv(buf, encoding="utf-8", index=False) + assert buf.getvalue() == text + + # xref gh-7791: make sure the quoting parameter is passed through + # with multi-indexes + df = pd.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}) + df = df.set_index(["a", "b"]) + + expected_rows = ['"a","b","c"', '"1","3","5"', '"2","4","6"'] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert df.to_csv(quoting=csv.QUOTE_ALL) == expected + + def test_period_index_date_overflow(self): + # see gh-15982 + + dates = ["1990-01-01", "2000-01-01", "3005-01-01"] + index = pd.PeriodIndex(dates, freq="D") + + df = pd.DataFrame([4, 5, 6], index=index) + result = df.to_csv() + + expected_rows = [",0", "1990-01-01,4", "2000-01-01,5", "3005-01-01,6"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + date_format = "%m-%d-%Y" + result = df.to_csv(date_format=date_format) + + expected_rows = [",0", "01-01-1990,4", "01-01-2000,5", "01-01-3005,6"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + # Overflow with pd.NaT + dates = ["1990-01-01", pd.NaT, "3005-01-01"] + index = pd.PeriodIndex(dates, freq="D") + + df = pd.DataFrame([4, 5, 6], index=index) + result = df.to_csv() + + expected_rows = [",0", "1990-01-01,4", ",5", "3005-01-01,6"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_multi_index_header(self): + # see gh-5539 + columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]) + df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) + df.columns = columns + + header = ["a", "b", "c", "d"] + result = df.to_csv(header=header) + + expected_rows = [",a,b,c,d", "0,1,2,3,4", "1,5,6,7,8"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + assert result == expected + + def test_to_csv_single_level_multi_index(self): + # see gh-26303 + index = pd.Index([(1,), (2,), (3,)]) + df = pd.DataFrame([[1, 2, 3]], columns=index) + df = df.reindex(columns=[(1,), (3,)]) + expected = ",1,3\n0,1,3\n" + result = df.to_csv(line_terminator="\n") + tm.assert_almost_equal(result, expected) + + def test_gz_lineend(self): + # GH 25311 + df = pd.DataFrame({"a": [1, 2]}) + expected_rows = ["a", "1", "2"] + expected = tm.convert_rows_list_to_csv_str(expected_rows) + with tm.ensure_clean("__test_gz_lineend.csv.gz") as path: + df.to_csv(path, index=False) + with tm.decompress_file(path, compression="gzip") as f: + result = f.read().decode("utf-8") + + assert result == expected diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py new file mode 100644 index 00000000..c7270322 --- /dev/null +++ b/pandas/tests/frame/test_validate.py @@ -0,0 +1,41 @@ +import pytest + +from pandas.core.frame import DataFrame + + +@pytest.fixture +def dataframe(): + return DataFrame({"a": [1, 2], "b": [3, 4]}) + + +class TestDataFrameValidate: + """Tests for error handling related to data types of method arguments.""" + + @pytest.mark.parametrize( + "func", + [ + "query", + "eval", + "set_index", + "reset_index", + "dropna", + "drop_duplicates", + "sort_values", + ], + ) + @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, dataframe, func, inplace): + msg = 'For argument "inplace" expected type bool' + kwargs = dict(inplace=inplace) + + if func == "query": + kwargs["expr"] = "a > b" + elif func == "eval": + kwargs["expr"] = "a + b" + elif func == "set_index": + kwargs["keys"] = ["a"] + elif func == "sort_values": + kwargs["by"] = ["a"] + + with pytest.raises(ValueError, match=msg): + getattr(dataframe, func)(**kwargs) diff --git a/pandas/tests/generic/__init__.py b/pandas/tests/generic/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py new file mode 100644 index 00000000..7fe22e77 --- /dev/null +++ b/pandas/tests/generic/test_frame.py @@ -0,0 +1,282 @@ +from copy import deepcopy +from distutils.version import LooseVersion +from operator import methodcaller + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, MultiIndex, Series, date_range +import pandas._testing as tm + +from .test_generic import Generic + +try: + import xarray + + _XARRAY_INSTALLED = True +except ImportError: + _XARRAY_INSTALLED = False + + +class TestDataFrame(Generic): + _typ = DataFrame + _comparator = lambda self, x, y: tm.assert_frame_equal(x, y) + + def test_rename_mi(self): + df = DataFrame( + [11, 21, 31], + index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]), + ) + df.rename(str.lower) + + def test_set_axis_name(self): + df = pd.DataFrame([[1, 2], [3, 4]]) + funcs = ["_set_axis_name", "rename_axis"] + for func in funcs: + result = methodcaller(func, "foo")(df) + assert df.index.name is None + assert result.index.name == "foo" + + result = methodcaller(func, "cols", axis=1)(df) + assert df.columns.name is None + assert result.columns.name == "cols" + + def test_set_axis_name_mi(self): + df = DataFrame( + np.empty((3, 3)), + index=MultiIndex.from_tuples([("A", x) for x in list("aBc")]), + columns=MultiIndex.from_tuples([("C", x) for x in list("xyz")]), + ) + + level_names = ["L1", "L2"] + funcs = ["_set_axis_name", "rename_axis"] + for func in funcs: + result = methodcaller(func, level_names)(df) + assert result.index.names == level_names + assert result.columns.names == [None, None] + + result = methodcaller(func, level_names, axis=1)(df) + assert result.columns.names == ["L1", "L2"] + assert result.index.names == [None, None] + + def test_nonzero_single_element(self): + + # allow single item via bool method + df = DataFrame([[True]]) + assert df.bool() + + df = DataFrame([[False]]) + assert not df.bool() + + df = DataFrame([[False, False]]) + with pytest.raises(ValueError): + df.bool() + with pytest.raises(ValueError): + bool(df) + + def test_get_numeric_data_preserve_dtype(self): + + # get the numeric data + o = DataFrame({"A": [1, "2", 3.0]}) + result = o._get_numeric_data() + expected = DataFrame(index=[0, 1, 2], dtype=object) + self._compare(result, expected) + + def test_metadata_propagation_indiv(self): + + # groupby + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + result = df.groupby("A").sum() + self.check_metadata(df, result) + + # resample + df = DataFrame( + np.random.randn(1000, 2), + index=date_range("20130101", periods=1000, freq="s"), + ) + result = df.resample("1T") + self.check_metadata(df, result) + + # merging with override + # GH 6923 + _metadata = DataFrame._metadata + _finalize = DataFrame.__finalize__ + + np.random.seed(10) + df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=["a", "b"]) + df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=["c", "d"]) + DataFrame._metadata = ["filename"] + df1.filename = "fname1.csv" + df2.filename = "fname2.csv" + + def finalize(self, other, method=None, **kwargs): + + for name in self._metadata: + if method == "merge": + left, right = other.left, other.right + value = getattr(left, name, "") + "|" + getattr(right, name, "") + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, "")) + + return self + + DataFrame.__finalize__ = finalize + result = df1.merge(df2, left_on=["a"], right_on=["c"], how="inner") + assert result.filename == "fname1.csv|fname2.csv" + + # concat + # GH 6927 + DataFrame._metadata = ["filename"] + df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list("ab")) + df1.filename = "foo" + + def finalize(self, other, method=None, **kwargs): + for name in self._metadata: + if method == "concat": + value = "+".join( + [getattr(o, name) for o in other.objs if getattr(o, name, None)] + ) + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, None)) + + return self + + DataFrame.__finalize__ = finalize + + result = pd.concat([df1, df1]) + assert result.filename == "foo+foo" + + # reset + DataFrame._metadata = _metadata + DataFrame.__finalize__ = _finalize + + def test_set_attribute(self): + # Test for consistent setattr behavior when an attribute and a column + # have the same name (Issue #8994) + df = DataFrame({"x": [1, 2, 3]}) + + df.y = 2 + df["y"] = [2, 4, 6] + df.y = 5 + + assert df.y == 5 + tm.assert_series_equal(df["y"], Series([2, 4, 6], name="y")) + + @pytest.mark.skipif( + not _XARRAY_INSTALLED + or _XARRAY_INSTALLED + and LooseVersion(xarray.__version__) < LooseVersion("0.10.0"), + reason="xarray >= 0.10.0 required", + ) + @pytest.mark.parametrize( + "index", + [ + "FloatIndex", + "IntIndex", + "StringIndex", + "UnicodeIndex", + "DateIndex", + "PeriodIndex", + "CategoricalIndex", + "TimedeltaIndex", + ], + ) + def test_to_xarray_index_types(self, index): + from xarray import Dataset + + index = getattr(tm, f"make{index}") + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + } + ) + + df.index = index(3) + df.index.name = "foo" + df.columns.name = "bar" + result = df.to_xarray() + assert result.dims["foo"] == 3 + assert len(result.coords) == 1 + assert len(result.data_vars) == 8 + tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) + assert isinstance(result, Dataset) + + # idempotency + # categoricals are not preserved + # datetimes w/tz are preserved + # column names are lost + expected = df.copy() + expected["f"] = expected["f"].astype(object) + expected.columns.name = None + tm.assert_frame_equal( + result.to_dataframe(), + expected, + check_index_type=False, + check_categorical=False, + ) + + @td.skip_if_no("xarray", min_version="0.7.0") + def test_to_xarray(self): + from xarray import Dataset + + df = DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + "c": np.arange(3, 6).astype("u1"), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.Categorical(list("abc")), + "g": pd.date_range("20130101", periods=3), + "h": pd.date_range("20130101", periods=3, tz="US/Eastern"), + } + ) + + df.index.name = "foo" + result = df[0:0].to_xarray() + assert result.dims["foo"] == 0 + assert isinstance(result, Dataset) + + # available in 0.7.1 + # MultiIndex + df.index = pd.MultiIndex.from_product([["a"], range(3)], names=["one", "two"]) + result = df.to_xarray() + assert result.dims["one"] == 1 + assert result.dims["two"] == 3 + assert len(result.coords) == 2 + assert len(result.data_vars) == 8 + tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) + assert isinstance(result, Dataset) + + result = result.to_dataframe() + expected = df.copy() + expected["f"] = expected["f"].astype(object) + expected.columns.name = None + tm.assert_frame_equal(result, expected, check_index_type=False) + + def test_deepcopy_empty(self): + # This test covers empty frame copying with non-empty column sets + # as reported in issue GH15370 + empty_frame = DataFrame(data=[], index=[], columns=["A"]) + empty_frame_copy = deepcopy(empty_frame) + + self._compare(empty_frame_copy, empty_frame) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py new file mode 100644 index 00000000..a6841718 --- /dev/null +++ b/pandas/tests/generic/test_generic.py @@ -0,0 +1,976 @@ +from copy import copy, deepcopy + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_scalar + +import pandas as pd +from pandas import DataFrame, MultiIndex, Series, date_range +import pandas._testing as tm + +# ---------------------------------------------------------------------- +# Generic types test cases + + +class Generic: + @property + def _ndim(self): + return self._typ._AXIS_LEN + + def _axes(self): + """ return the axes for my object typ """ + return self._typ._AXIS_ORDERS + + def _construct(self, shape, value=None, dtype=None, **kwargs): + """ construct an object for the given shape + if value is specified use that if its a scalar + if value is an array, repeat it as needed """ + + if isinstance(shape, int): + shape = tuple([shape] * self._ndim) + if value is not None: + if is_scalar(value): + if value == "empty": + arr = None + dtype = np.float64 + + # remove the info axis + kwargs.pop(self._typ._info_axis_name, None) + else: + arr = np.empty(shape, dtype=dtype) + arr.fill(value) + else: + fshape = np.prod(shape) + arr = value.ravel() + new_shape = fshape / arr.shape[0] + if fshape % arr.shape[0] != 0: + raise Exception("invalid value passed in _construct") + + arr = np.repeat(arr, new_shape).reshape(shape) + else: + arr = np.random.randn(*shape) + return self._typ(arr, dtype=dtype, **kwargs) + + def _compare(self, result, expected): + self._comparator(result, expected) + + def test_rename(self): + + # single axis + idx = list("ABCD") + # relabeling values passed into self.rename + args = [ + str.lower, + {x: x.lower() for x in idx}, + Series({x: x.lower() for x in idx}), + ] + + for axis in self._axes(): + kwargs = {axis: idx} + obj = self._construct(4, **kwargs) + + for arg in args: + # rename a single axis + result = obj.rename(**{axis: arg}) + expected = obj.copy() + setattr(expected, axis, list("abcd")) + self._compare(result, expected) + + # multiple axes at once + + def test_get_numeric_data(self): + + n = 4 + kwargs = {self._typ._AXIS_NAMES[i]: list(range(n)) for i in range(self._ndim)} + + # get the numeric data + o = self._construct(n, **kwargs) + result = o._get_numeric_data() + self._compare(result, o) + + # non-inclusion + result = o._get_bool_data() + expected = self._construct(n, value="empty", **kwargs) + self._compare(result, expected) + + # get the bool data + arr = np.array([True, True, False, True]) + o = self._construct(n, value=arr, **kwargs) + result = o._get_numeric_data() + self._compare(result, o) + + # _get_numeric_data is includes _get_bool_data, so can't test for + # non-inclusion + + def test_get_default(self): + + # GH 7725 + d0 = "a", "b", "c", "d" + d1 = np.arange(4, dtype="int64") + others = "e", 10 + + for data, index in ((d0, d1), (d1, d0)): + s = Series(data, index=index) + for i, d in zip(index, data): + assert s.get(i) == d + assert s.get(i, d) == d + assert s.get(i, "z") == d + for other in others: + assert s.get(other, "z") == "z" + assert s.get(other, other) == other + + def test_nonzero(self): + + # GH 4633 + # look at the boolean/nonzero behavior for objects + obj = self._construct(shape=4) + msg = f"The truth value of a {self._typ.__name__} is ambiguous" + with pytest.raises(ValueError, match=msg): + bool(obj == 0) + with pytest.raises(ValueError, match=msg): + bool(obj == 1) + with pytest.raises(ValueError, match=msg): + bool(obj) + + obj = self._construct(shape=4, value=1) + with pytest.raises(ValueError, match=msg): + bool(obj == 0) + with pytest.raises(ValueError, match=msg): + bool(obj == 1) + with pytest.raises(ValueError, match=msg): + bool(obj) + + obj = self._construct(shape=4, value=np.nan) + with pytest.raises(ValueError, match=msg): + bool(obj == 0) + with pytest.raises(ValueError, match=msg): + bool(obj == 1) + with pytest.raises(ValueError, match=msg): + bool(obj) + + # empty + obj = self._construct(shape=0) + with pytest.raises(ValueError, match=msg): + bool(obj) + + # invalid behaviors + + obj1 = self._construct(shape=4, value=1) + obj2 = self._construct(shape=4, value=1) + + with pytest.raises(ValueError, match=msg): + if obj1: + pass + + with pytest.raises(ValueError, match=msg): + obj1 and obj2 + with pytest.raises(ValueError, match=msg): + obj1 or obj2 + with pytest.raises(ValueError, match=msg): + not obj1 + + def test_downcast(self): + # test close downcasting + + o = self._construct(shape=4, value=9, dtype=np.int64) + result = o.copy() + result._data = o._data.downcast(dtypes="infer") + self._compare(result, o) + + o = self._construct(shape=4, value=9.0) + expected = o.astype(np.int64) + result = o.copy() + result._data = o._data.downcast(dtypes="infer") + self._compare(result, expected) + + o = self._construct(shape=4, value=9.5) + result = o.copy() + result._data = o._data.downcast(dtypes="infer") + self._compare(result, o) + + # are close + o = self._construct(shape=4, value=9.000000000005) + result = o.copy() + result._data = o._data.downcast(dtypes="infer") + expected = o.astype(np.int64) + self._compare(result, expected) + + def test_constructor_compound_dtypes(self): + # see gh-5191 + # Compound dtypes should raise NotImplementedError. + + def f(dtype): + return self._construct(shape=3, value=1, dtype=dtype) + + msg = "compound dtypes are not implemented" + f"in the {self._typ.__name__} constructor" + + with pytest.raises(NotImplementedError, match=msg): + f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")]) + + # these work (though results may be unexpected) + f("int64") + f("float64") + f("M8[ns]") + + def check_metadata(self, x, y=None): + for m in x._metadata: + v = getattr(x, m, None) + if y is None: + assert v is None + else: + assert v == getattr(y, m, None) + + def test_metadata_propagation(self): + # check that the metadata matches up on the resulting ops + + o = self._construct(shape=3) + o.name = "foo" + o2 = self._construct(shape=3) + o2.name = "bar" + + # ---------- + # preserving + # ---------- + + # simple ops with scalars + for op in ["__add__", "__sub__", "__truediv__", "__mul__"]: + result = getattr(o, op)(1) + self.check_metadata(o, result) + + # ops with like + for op in ["__add__", "__sub__", "__truediv__", "__mul__"]: + result = getattr(o, op)(o) + self.check_metadata(o, result) + + # simple boolean + for op in ["__eq__", "__le__", "__ge__"]: + v1 = getattr(o, op)(o) + self.check_metadata(o, v1) + self.check_metadata(o, v1 & v1) + self.check_metadata(o, v1 | v1) + + # combine_first + result = o.combine_first(o2) + self.check_metadata(o, result) + + # --------------------------- + # non-preserving (by default) + # --------------------------- + + # add non-like + result = o + o2 + self.check_metadata(result) + + # simple boolean + for op in ["__eq__", "__le__", "__ge__"]: + + # this is a name matching op + v1 = getattr(o, op)(o) + v2 = getattr(o, op)(o2) + self.check_metadata(v2) + self.check_metadata(v1 & v2) + self.check_metadata(v1 | v2) + + def test_head_tail(self): + # GH5370 + + o = self._construct(shape=10) + + # check all index types + for index in [ + tm.makeFloatIndex, + tm.makeIntIndex, + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeDateIndex, + tm.makePeriodIndex, + ]: + axis = o._get_axis_name(0) + setattr(o, axis, index(len(getattr(o, axis)))) + + o.head() + + self._compare(o.head(), o.iloc[:5]) + self._compare(o.tail(), o.iloc[-5:]) + + # 0-len + self._compare(o.head(0), o.iloc[0:0]) + self._compare(o.tail(0), o.iloc[0:0]) + + # bounded + self._compare(o.head(len(o) + 1), o) + self._compare(o.tail(len(o) + 1), o) + + # neg index + self._compare(o.head(-3), o.head(7)) + self._compare(o.tail(-3), o.tail(7)) + + def test_sample(self): + # Fixes issue: 2419 + + o = self._construct(shape=10) + + ### + # Check behavior of random_state argument + ### + + # Check for stability when receives seed or random state -- run 10 + # times. + for test in range(10): + seed = np.random.randint(0, 100) + self._compare( + o.sample(n=4, random_state=seed), o.sample(n=4, random_state=seed) + ) + + self._compare( + o.sample(frac=0.7, random_state=seed), + o.sample(frac=0.7, random_state=seed), + ) + + self._compare( + o.sample(n=4, random_state=np.random.RandomState(test)), + o.sample(n=4, random_state=np.random.RandomState(test)), + ) + + self._compare( + o.sample(frac=0.7, random_state=np.random.RandomState(test)), + o.sample(frac=0.7, random_state=np.random.RandomState(test)), + ) + + self._compare( + o.sample( + frac=2, replace=True, random_state=np.random.RandomState(test) + ), + o.sample( + frac=2, replace=True, random_state=np.random.RandomState(test) + ), + ) + + os1, os2 = [], [] + for _ in range(2): + np.random.seed(test) + os1.append(o.sample(n=4)) + os2.append(o.sample(frac=0.7)) + self._compare(*os1) + self._compare(*os2) + + # Check for error when random_state argument invalid. + with pytest.raises(ValueError): + o.sample(random_state="astring!") + + ### + # Check behavior of `frac` and `N` + ### + + # Giving both frac and N throws error + with pytest.raises(ValueError): + o.sample(n=3, frac=0.3) + + # Check that raises right error for negative lengths + with pytest.raises(ValueError): + o.sample(n=-3) + with pytest.raises(ValueError): + o.sample(frac=-0.3) + + # Make sure float values of `n` give error + with pytest.raises(ValueError): + o.sample(n=3.2) + + # Check lengths are right + assert len(o.sample(n=4) == 4) + assert len(o.sample(frac=0.34) == 3) + assert len(o.sample(frac=0.36) == 4) + + ### + # Check weights + ### + + # Weight length must be right + with pytest.raises(ValueError): + o.sample(n=3, weights=[0, 1]) + + with pytest.raises(ValueError): + bad_weights = [0.5] * 11 + o.sample(n=3, weights=bad_weights) + + with pytest.raises(ValueError): + bad_weight_series = Series([0, 0, 0.2]) + o.sample(n=4, weights=bad_weight_series) + + # Check won't accept negative weights + with pytest.raises(ValueError): + bad_weights = [-0.1] * 10 + o.sample(n=3, weights=bad_weights) + + # Check inf and -inf throw errors: + with pytest.raises(ValueError): + weights_with_inf = [0.1] * 10 + weights_with_inf[0] = np.inf + o.sample(n=3, weights=weights_with_inf) + + with pytest.raises(ValueError): + weights_with_ninf = [0.1] * 10 + weights_with_ninf[0] = -np.inf + o.sample(n=3, weights=weights_with_ninf) + + # All zeros raises errors + zero_weights = [0] * 10 + with pytest.raises(ValueError): + o.sample(n=3, weights=zero_weights) + + # All missing weights + nan_weights = [np.nan] * 10 + with pytest.raises(ValueError): + o.sample(n=3, weights=nan_weights) + + # Check np.nan are replaced by zeros. + weights_with_nan = [np.nan] * 10 + weights_with_nan[5] = 0.5 + self._compare(o.sample(n=1, axis=0, weights=weights_with_nan), o.iloc[5:6]) + + # Check None are also replaced by zeros. + weights_with_None = [None] * 10 + weights_with_None[5] = 0.5 + self._compare(o.sample(n=1, axis=0, weights=weights_with_None), o.iloc[5:6]) + + def test_sample_upsampling_without_replacement(self): + # GH27451 + + df = pd.DataFrame({"A": list("abc")}) + msg = ( + "Replace has to be set to `True` when " + "upsampling the population `frac` > 1." + ) + with pytest.raises(ValueError, match=msg): + df.sample(frac=2, replace=False) + + def test_sample_is_copy(self): + # GH-27357, GH-30784: ensure the result of sample is an actual copy and + # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings + df = pd.DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) + df2 = df.sample(3) + + with tm.assert_produces_warning(None): + df2["d"] = 1 + + def test_size_compat(self): + # GH8846 + # size property should be defined + + o = self._construct(shape=10) + assert o.size == np.prod(o.shape) + assert o.size == 10 ** len(o.axes) + + def test_split_compat(self): + # xref GH8846 + o = self._construct(shape=10) + assert len(np.array_split(o, 5)) == 5 + assert len(np.array_split(o, 2)) == 2 + + def test_unexpected_keyword(self): # GH8597 + df = DataFrame(np.random.randn(5, 2), columns=["jim", "joe"]) + ca = pd.Categorical([0, 0, 2, 2, 3, np.nan]) + ts = df["joe"].copy() + ts[2] = np.nan + + with pytest.raises(TypeError, match="unexpected keyword"): + df.drop("joe", axis=1, in_place=True) + + with pytest.raises(TypeError, match="unexpected keyword"): + df.reindex([1, 0], inplace=True) + + with pytest.raises(TypeError, match="unexpected keyword"): + ca.fillna(0, inplace=True) + + with pytest.raises(TypeError, match="unexpected keyword"): + ts.fillna(0, in_place=True) + + # See gh-12301 + def test_stat_unexpected_keyword(self): + obj = self._construct(5) + starwars = "Star Wars" + errmsg = "unexpected keyword" + + with pytest.raises(TypeError, match=errmsg): + obj.max(epic=starwars) # stat_function + with pytest.raises(TypeError, match=errmsg): + obj.var(epic=starwars) # stat_function_ddof + with pytest.raises(TypeError, match=errmsg): + obj.sum(epic=starwars) # cum_function + with pytest.raises(TypeError, match=errmsg): + obj.any(epic=starwars) # logical_function + + def test_api_compat(self): + + # GH 12021 + # compat for __name__, __qualname__ + + obj = self._construct(5) + for func in ["sum", "cumsum", "any", "var"]: + f = getattr(obj, func) + assert f.__name__ == func + assert f.__qualname__.endswith(func) + + def test_stat_non_defaults_args(self): + obj = self._construct(5) + out = np.array([0]) + errmsg = "the 'out' parameter is not supported" + + with pytest.raises(ValueError, match=errmsg): + obj.max(out=out) # stat_function + with pytest.raises(ValueError, match=errmsg): + obj.var(out=out) # stat_function_ddof + with pytest.raises(ValueError, match=errmsg): + obj.sum(out=out) # cum_function + with pytest.raises(ValueError, match=errmsg): + obj.any(out=out) # logical_function + + def test_truncate_out_of_bounds(self): + # GH11382 + + # small + shape = [int(2e3)] + ([1] * (self._ndim - 1)) + small = self._construct(shape, dtype="int8", value=1) + self._compare(small.truncate(), small) + self._compare(small.truncate(before=0, after=3e3), small) + self._compare(small.truncate(before=-1, after=2e3), small) + + # big + shape = [int(2e6)] + ([1] * (self._ndim - 1)) + big = self._construct(shape, dtype="int8", value=1) + self._compare(big.truncate(), big) + self._compare(big.truncate(before=0, after=3e6), big) + self._compare(big.truncate(before=-1, after=2e6), big) + + def test_validate_bool_args(self): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + invalid_values = [1, "True", [1, 2, 3], 5.0] + + for value in invalid_values: + with pytest.raises(ValueError): + super(DataFrame, df).rename_axis( + mapper={"a": "x", "b": "y"}, axis=1, inplace=value + ) + + with pytest.raises(ValueError): + super(DataFrame, df).drop("a", axis=1, inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).sort_index(inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df)._consolidate(inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).fillna(value=0, inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).replace(to_replace=1, value=7, inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).interpolate(inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df)._where(cond=df.a > 2, inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).mask(cond=df.a > 2, inplace=value) + + def test_copy_and_deepcopy(self): + # GH 15444 + for shape in [0, 1, 2]: + obj = self._construct(shape) + for func in [ + copy, + deepcopy, + lambda x: x.copy(deep=False), + lambda x: x.copy(deep=True), + ]: + obj_copy = func(obj) + assert obj_copy is not obj + self._compare(obj_copy, obj) + + @pytest.mark.parametrize( + "periods,fill_method,limit,exp", + [ + (1, "ffill", None, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, 0]), + (1, "ffill", 1, [np.nan, np.nan, np.nan, 1, 1, 1.5, 0, np.nan]), + (1, "bfill", None, [np.nan, 0, 0, 1, 1, 1.5, np.nan, np.nan]), + (1, "bfill", 1, [np.nan, np.nan, 0, 1, 1, 1.5, np.nan, np.nan]), + (-1, "ffill", None, [np.nan, np.nan, -0.5, -0.5, -0.6, 0, 0, np.nan]), + (-1, "ffill", 1, [np.nan, np.nan, -0.5, -0.5, -0.6, 0, np.nan, np.nan]), + (-1, "bfill", None, [0, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]), + (-1, "bfill", 1, [np.nan, 0, -0.5, -0.5, -0.6, np.nan, np.nan, np.nan]), + ], + ) + def test_pct_change(self, periods, fill_method, limit, exp): + vals = [np.nan, np.nan, 1, 2, 4, 10, np.nan, np.nan] + obj = self._typ(vals) + func = getattr(obj, "pct_change") + res = func(periods=periods, fill_method=fill_method, limit=limit) + if type(obj) is DataFrame: + tm.assert_frame_equal(res, DataFrame(exp)) + else: + tm.assert_series_equal(res, Series(exp)) + + +class TestNDFrame: + # tests that don't fit elsewhere + + def test_sample(sel): + # Fixes issue: 2419 + # additional specific object based tests + + # A few dataframe test with degenerate weights. + easy_weight_list = [0] * 10 + easy_weight_list[5] = 1 + + df = pd.DataFrame( + { + "col1": range(10, 20), + "col2": range(20, 30), + "colString": ["a"] * 10, + "easyweights": easy_weight_list, + } + ) + sample1 = df.sample(n=1, weights="easyweights") + tm.assert_frame_equal(sample1, df.iloc[5:6]) + + # Ensure proper error if string given as weight for Series or + # DataFrame with axis = 1. + s = Series(range(10)) + with pytest.raises(ValueError): + s.sample(n=3, weights="weight_column") + + with pytest.raises(ValueError): + df.sample(n=1, weights="weight_column", axis=1) + + # Check weighting key error + with pytest.raises( + KeyError, match="'String passed to weights not a valid column'" + ): + df.sample(n=3, weights="not_a_real_column_name") + + # Check that re-normalizes weights that don't sum to one. + weights_less_than_1 = [0] * 10 + weights_less_than_1[0] = 0.5 + tm.assert_frame_equal(df.sample(n=1, weights=weights_less_than_1), df.iloc[:1]) + + ### + # Test axis argument + ### + + # Test axis argument + df = pd.DataFrame({"col1": range(10), "col2": ["a"] * 10}) + second_column_weight = [0, 1] + tm.assert_frame_equal( + df.sample(n=1, axis=1, weights=second_column_weight), df[["col2"]] + ) + + # Different axis arg types + tm.assert_frame_equal( + df.sample(n=1, axis="columns", weights=second_column_weight), df[["col2"]] + ) + + weight = [0] * 10 + weight[5] = 0.5 + tm.assert_frame_equal(df.sample(n=1, axis="rows", weights=weight), df.iloc[5:6]) + tm.assert_frame_equal( + df.sample(n=1, axis="index", weights=weight), df.iloc[5:6] + ) + + # Check out of range axis values + with pytest.raises(ValueError): + df.sample(n=1, axis=2) + + with pytest.raises(ValueError): + df.sample(n=1, axis="not_a_name") + + with pytest.raises(ValueError): + s = pd.Series(range(10)) + s.sample(n=1, axis=1) + + # Test weight length compared to correct axis + with pytest.raises(ValueError): + df.sample(n=1, axis=1, weights=[0.5] * 10) + + # Check weights with axis = 1 + easy_weight_list = [0] * 3 + easy_weight_list[2] = 1 + + df = pd.DataFrame( + {"col1": range(10, 20), "col2": range(20, 30), "colString": ["a"] * 10} + ) + sample1 = df.sample(n=1, axis=1, weights=easy_weight_list) + tm.assert_frame_equal(sample1, df[["colString"]]) + + # Test default axes + tm.assert_frame_equal( + df.sample(n=3, random_state=42), df.sample(n=3, axis=0, random_state=42) + ) + + # Test that function aligns weights with frame + df = DataFrame({"col1": [5, 6, 7], "col2": ["a", "b", "c"]}, index=[9, 5, 3]) + s = Series([1, 0, 0], index=[3, 5, 9]) + tm.assert_frame_equal(df.loc[[3]], df.sample(1, weights=s)) + + # Weights have index values to be dropped because not in + # sampled DataFrame + s2 = Series([0.001, 0, 10000], index=[3, 5, 10]) + tm.assert_frame_equal(df.loc[[3]], df.sample(1, weights=s2)) + + # Weights have empty values to be filed with zeros + s3 = Series([0.01, 0], index=[3, 5]) + tm.assert_frame_equal(df.loc[[3]], df.sample(1, weights=s3)) + + # No overlap in weight and sampled DataFrame indices + s4 = Series([1, 0], index=[1, 2]) + with pytest.raises(ValueError): + df.sample(1, weights=s4) + + def test_squeeze(self): + # noop + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: + tm.assert_series_equal(s.squeeze(), s) + for df in [tm.makeTimeDataFrame()]: + tm.assert_frame_equal(df.squeeze(), df) + + # squeezing + df = tm.makeTimeDataFrame().reindex(columns=["A"]) + tm.assert_series_equal(df.squeeze(), df["A"]) + + # don't fail with 0 length dimensions GH11229 & GH8999 + empty_series = Series([], name="five", dtype=np.float64) + empty_frame = DataFrame([empty_series]) + tm.assert_series_equal(empty_series, empty_series.squeeze()) + tm.assert_series_equal(empty_series, empty_frame.squeeze()) + + # axis argument + df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] + assert df.shape == (1, 1) + tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) + tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0]) + assert df.squeeze() == df.iloc[0, 0] + msg = "No axis named 2 for object type " + with pytest.raises(ValueError, match=msg): + df.squeeze(axis=2) + msg = "No axis named x for object type " + with pytest.raises(ValueError, match=msg): + df.squeeze(axis="x") + + df = tm.makeTimeDataFrame(3) + tm.assert_frame_equal(df.squeeze(axis=0), df) + + def test_numpy_squeeze(self): + s = tm.makeFloatSeries() + tm.assert_series_equal(np.squeeze(s), s) + + df = tm.makeTimeDataFrame().reindex(columns=["A"]) + tm.assert_series_equal(np.squeeze(df), df["A"]) + + def test_transpose(self): + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: + # calls implementation in pandas/core/base.py + tm.assert_series_equal(s.transpose(), s) + for df in [tm.makeTimeDataFrame()]: + tm.assert_frame_equal(df.transpose().transpose(), df) + + def test_numpy_transpose(self): + msg = "the 'axes' parameter is not supported" + + s = tm.makeFloatSeries() + tm.assert_series_equal(np.transpose(s), s) + + with pytest.raises(ValueError, match=msg): + np.transpose(s, axes=1) + + df = tm.makeTimeDataFrame() + tm.assert_frame_equal(np.transpose(np.transpose(df)), df) + + with pytest.raises(ValueError, match=msg): + np.transpose(df, axes=1) + + def test_take(self): + indices = [1, 5, -2, 6, 3, -1] + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]: + out = s.take(indices) + expected = Series( + data=s.values.take(indices), index=s.index.take(indices), dtype=s.dtype + ) + tm.assert_series_equal(out, expected) + for df in [tm.makeTimeDataFrame()]: + out = df.take(indices) + expected = DataFrame( + data=df.values.take(indices, axis=0), + index=df.index.take(indices), + columns=df.columns, + ) + tm.assert_frame_equal(out, expected) + + def test_take_invalid_kwargs(self): + indices = [-3, 2, 0, 1] + s = tm.makeFloatSeries() + df = tm.makeTimeDataFrame() + + for obj in (s, df): + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + obj.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + obj.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + obj.take(indices, mode="clip") + + @pytest.mark.parametrize("is_copy", [True, False]) + def test_depr_take_kwarg_is_copy(self, is_copy): + # GH 27357 + df = DataFrame({"A": [1, 2, 3]}) + msg = ( + "is_copy is deprecated and will be removed in a future version. " + "'take' always returns a copy, so there is no need to specify this." + ) + with tm.assert_produces_warning(FutureWarning) as w: + df.take([0, 1], is_copy=is_copy) + + assert w[0].message.args[0] == msg + + s = Series([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + s.take([0, 1], is_copy=is_copy) + + def test_equals(self): + s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) + s2 = s1.copy() + assert s1.equals(s2) + + s1[1] = 99 + assert not s1.equals(s2) + + # NaNs compare as equal + s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3]) + s2 = s1.copy() + assert s1.equals(s2) + + s2[0] = 9.9 + assert not s1.equals(s2) + + idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")]) + s1 = Series([1, 2, np.nan], index=idx) + s2 = s1.copy() + assert s1.equals(s2) + + # Add object dtype column with nans + index = np.random.random(10) + df1 = DataFrame(np.random.random(10), index=index, columns=["floats"]) + df1["text"] = "the sky is so blue. we could use more chocolate.".split() + df1["start"] = date_range("2000-1-1", periods=10, freq="T") + df1["end"] = date_range("2000-1-1", periods=10, freq="D") + df1["diff"] = df1["end"] - df1["start"] + df1["bool"] = np.arange(10) % 3 == 0 + df1.loc[::2] = np.nan + df2 = df1.copy() + assert df1["text"].equals(df2["text"]) + assert df1["start"].equals(df2["start"]) + assert df1["end"].equals(df2["end"]) + assert df1["diff"].equals(df2["diff"]) + assert df1["bool"].equals(df2["bool"]) + assert df1.equals(df2) + assert not df1.equals(object) + + # different dtype + different = df1.copy() + different["floats"] = different["floats"].astype("float32") + assert not df1.equals(different) + + # different index + different_index = -index + different = df2.set_index(different_index) + assert not df1.equals(different) + + # different columns + different = df2.copy() + different.columns = df2.columns[::-1] + assert not df1.equals(different) + + # DatetimeIndex + index = pd.date_range("2000-1-1", periods=10, freq="T") + df1 = df1.set_index(index) + df2 = df1.copy() + assert df1.equals(df2) + + # MultiIndex + df3 = df1.set_index(["text"], append=True) + df2 = df1.set_index(["text"], append=True) + assert df3.equals(df2) + + df2 = df1.set_index(["floats"], append=True) + assert not df3.equals(df2) + + # NaN in index + df3 = df1.set_index(["floats"], append=True) + df2 = df1.set_index(["floats"], append=True) + assert df3.equals(df2) + + # GH 8437 + a = pd.Series([False, np.nan]) + b = pd.Series([False, np.nan]) + c = pd.Series(index=range(2), dtype=object) + d = c.copy() + e = c.copy() + f = c.copy() + c[:-1] = d[:-1] = e[0] = f[0] = False + assert a.equals(a) + assert a.equals(b) + assert a.equals(c) + assert a.equals(d) + assert a.equals(e) + assert e.equals(f) + + def test_pipe(self): + df = DataFrame({"A": [1, 2, 3]}) + f = lambda x, y: x ** y + result = df.pipe(f, 2) + expected = DataFrame({"A": [1, 4, 9]}) + tm.assert_frame_equal(result, expected) + + result = df.A.pipe(f, 2) + tm.assert_series_equal(result, expected.A) + + def test_pipe_tuple(self): + df = DataFrame({"A": [1, 2, 3]}) + f = lambda x, y: y + result = df.pipe((f, "y"), 0) + tm.assert_frame_equal(result, df) + + result = df.A.pipe((f, "y"), 0) + tm.assert_series_equal(result, df.A) + + def test_pipe_tuple_error(self): + df = DataFrame({"A": [1, 2, 3]}) + f = lambda x, y: y + with pytest.raises(ValueError): + df.pipe((f, "y"), x=1, y=0) + + with pytest.raises(ValueError): + df.A.pipe((f, "y"), x=1, y=0) + + @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame]) + def test_axis_classmethods(self, box): + obj = box(dtype=object) + values = ( + list(box._AXIS_NAMES.keys()) + + list(box._AXIS_NUMBERS.keys()) + + list(box._AXIS_ALIASES.keys()) + ) + for v in values: + assert obj._get_axis_number(v) == box._get_axis_number(v) + assert obj._get_axis_name(v) == box._get_axis_name(v) + assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v) diff --git a/pandas/tests/generic/test_label_or_level_utils.py b/pandas/tests/generic/test_label_or_level_utils.py new file mode 100644 index 00000000..d3566f16 --- /dev/null +++ b/pandas/tests/generic/test_label_or_level_utils.py @@ -0,0 +1,339 @@ +import pytest + +from pandas.core.dtypes.missing import array_equivalent + +import pandas as pd + + +# Fixtures +# ======== +@pytest.fixture +def df(): + """DataFrame with columns 'L1', 'L2', and 'L3' """ + return pd.DataFrame({"L1": [1, 2, 3], "L2": [11, 12, 13], "L3": ["A", "B", "C"]}) + + +@pytest.fixture(params=[[], ["L1"], ["L1", "L2"], ["L1", "L2", "L3"]]) +def df_levels(request, df): + """DataFrame with columns or index levels 'L1', 'L2', and 'L3' """ + levels = request.param + + if levels: + df = df.set_index(levels) + + return df + + +@pytest.fixture +def df_ambig(df): + """DataFrame with levels 'L1' and 'L2' and labels 'L1' and 'L3' """ + df = df.set_index(["L1", "L2"]) + + df["L1"] = df["L3"] + + return df + + +@pytest.fixture +def df_duplabels(df): + """DataFrame with level 'L1' and labels 'L2', 'L3', and 'L2' """ + df = df.set_index(["L1"]) + df = pd.concat([df, df["L2"]], axis=1) + + return df + + +# Test is label/level reference +# ============================= +def get_labels_levels(df_levels): + expected_labels = list(df_levels.columns) + expected_levels = [name for name in df_levels.index.names if name is not None] + return expected_labels, expected_levels + + +def assert_label_reference(frame, labels, axis): + for label in labels: + assert frame._is_label_reference(label, axis=axis) + assert not frame._is_level_reference(label, axis=axis) + assert frame._is_label_or_level_reference(label, axis=axis) + + +def assert_level_reference(frame, levels, axis): + for level in levels: + assert frame._is_level_reference(level, axis=axis) + assert not frame._is_label_reference(level, axis=axis) + assert frame._is_label_or_level_reference(level, axis=axis) + + +# DataFrame +# --------- +def test_is_level_or_label_reference_df_simple(df_levels, axis): + + # Compute expected labels and levels + expected_labels, expected_levels = get_labels_levels(df_levels) + + # Transpose frame if axis == 1 + if axis in {1, "columns"}: + df_levels = df_levels.T + + # Perform checks + assert_level_reference(df_levels, expected_levels, axis=axis) + assert_label_reference(df_levels, expected_labels, axis=axis) + + +def test_is_level_reference_df_ambig(df_ambig, axis): + + # Transpose frame if axis == 1 + if axis in {1, "columns"}: + df_ambig = df_ambig.T + + # df has both an on-axis level and off-axis label named L1 + # Therefore L1 should reference the label, not the level + assert_label_reference(df_ambig, ["L1"], axis=axis) + + # df has an on-axis level named L2 and it is not ambiguous + # Therefore L2 is an level reference + assert_level_reference(df_ambig, ["L2"], axis=axis) + + # df has a column named L3 and it not an level reference + assert_label_reference(df_ambig, ["L3"], axis=axis) + + +# Series +# ------ +def test_is_level_reference_series_simple_axis0(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + assert_level_reference(s, ["L1"], axis=0) + assert not s._is_level_reference("L2") + + # Make series with L1 and L2 as index + s = df.set_index(["L1", "L2"]).L3 + assert_level_reference(s, ["L1", "L2"], axis=0) + assert not s._is_level_reference("L3") + + +def test_is_level_reference_series_axis1_error(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + + with pytest.raises(ValueError, match="No axis named 1"): + s._is_level_reference("L1", axis=1) + + +# Test _check_label_or_level_ambiguity_df +# ======================================= + +# DataFrame +# --------- +def test_check_label_or_level_ambiguity_df(df_ambig, axis): + + # Transpose frame if axis == 1 + if axis in {1, "columns"}: + df_ambig = df_ambig.T + + if axis in {0, "index"}: + msg = "'L1' is both an index level and a column label" + else: + msg = "'L1' is both a column level and an index label" + + # df_ambig has both an on-axis level and off-axis label named L1 + # Therefore, L1 is ambiguous. + with pytest.raises(ValueError, match=msg): + df_ambig._check_label_or_level_ambiguity("L1", axis=axis) + + # df_ambig has an on-axis level named L2,, and it is not ambiguous. + df_ambig._check_label_or_level_ambiguity("L2", axis=axis) + + # df_ambig has an off-axis label named L3, and it is not ambiguous + assert not df_ambig._check_label_or_level_ambiguity("L3", axis=axis) + + +# Series +# ------ +def test_check_label_or_level_ambiguity_series(df): + + # A series has no columns and therefore references are never ambiguous + + # Make series with L1 as index + s = df.set_index("L1").L2 + s._check_label_or_level_ambiguity("L1", axis=0) + s._check_label_or_level_ambiguity("L2", axis=0) + + # Make series with L1 and L2 as index + s = df.set_index(["L1", "L2"]).L3 + s._check_label_or_level_ambiguity("L1", axis=0) + s._check_label_or_level_ambiguity("L2", axis=0) + s._check_label_or_level_ambiguity("L3", axis=0) + + +def test_check_label_or_level_ambiguity_series_axis1_error(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + + with pytest.raises(ValueError, match="No axis named 1"): + s._check_label_or_level_ambiguity("L1", axis=1) + + +# Test _get_label_or_level_values +# =============================== +def assert_label_values(frame, labels, axis): + for label in labels: + if axis in {0, "index"}: + expected = frame[label]._values + else: + expected = frame.loc[label]._values + + result = frame._get_label_or_level_values(label, axis=axis) + assert array_equivalent(expected, result) + + +def assert_level_values(frame, levels, axis): + for level in levels: + if axis in {0, "index"}: + expected = frame.index.get_level_values(level=level)._values + else: + expected = frame.columns.get_level_values(level=level)._values + + result = frame._get_label_or_level_values(level, axis=axis) + assert array_equivalent(expected, result) + + +# DataFrame +# --------- +def test_get_label_or_level_values_df_simple(df_levels, axis): + + # Compute expected labels and levels + expected_labels, expected_levels = get_labels_levels(df_levels) + + # Transpose frame if axis == 1 + if axis in {1, "columns"}: + df_levels = df_levels.T + + # Perform checks + assert_label_values(df_levels, expected_labels, axis=axis) + assert_level_values(df_levels, expected_levels, axis=axis) + + +def test_get_label_or_level_values_df_ambig(df_ambig, axis): + + # Transpose frame if axis == 1 + if axis in {1, "columns"}: + df_ambig = df_ambig.T + + # df has an on-axis level named L2, and it is not ambiguous. + assert_level_values(df_ambig, ["L2"], axis=axis) + + # df has an off-axis label named L3, and it is not ambiguous. + assert_label_values(df_ambig, ["L3"], axis=axis) + + +def test_get_label_or_level_values_df_duplabels(df_duplabels, axis): + + # Transpose frame if axis == 1 + if axis in {1, "columns"}: + df_duplabels = df_duplabels.T + + # df has unambiguous level 'L1' + assert_level_values(df_duplabels, ["L1"], axis=axis) + + # df has unique label 'L3' + assert_label_values(df_duplabels, ["L3"], axis=axis) + + # df has duplicate labels 'L2' + if axis in {0, "index"}: + expected_msg = "The column label 'L2' is not unique" + else: + expected_msg = "The index label 'L2' is not unique" + + with pytest.raises(ValueError, match=expected_msg): + assert_label_values(df_duplabels, ["L2"], axis=axis) + + +# Series +# ------ +def test_get_label_or_level_values_series_axis0(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + assert_level_values(s, ["L1"], axis=0) + + # Make series with L1 and L2 as index + s = df.set_index(["L1", "L2"]).L3 + assert_level_values(s, ["L1", "L2"], axis=0) + + +def test_get_label_or_level_values_series_axis1_error(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + + with pytest.raises(ValueError, match="No axis named 1"): + s._get_label_or_level_values("L1", axis=1) + + +# Test _drop_labels_or_levels +# =========================== +def assert_labels_dropped(frame, labels, axis): + for label in labels: + df_dropped = frame._drop_labels_or_levels(label, axis=axis) + + if axis in {0, "index"}: + assert label in frame.columns + assert label not in df_dropped.columns + else: + assert label in frame.index + assert label not in df_dropped.index + + +def assert_levels_dropped(frame, levels, axis): + for level in levels: + df_dropped = frame._drop_labels_or_levels(level, axis=axis) + + if axis in {0, "index"}: + assert level in frame.index.names + assert level not in df_dropped.index.names + else: + assert level in frame.columns.names + assert level not in df_dropped.columns.names + + +# DataFrame +# --------- +def test_drop_labels_or_levels_df(df_levels, axis): + + # Compute expected labels and levels + expected_labels, expected_levels = get_labels_levels(df_levels) + + # Transpose frame if axis == 1 + if axis in {1, "columns"}: + df_levels = df_levels.T + + # Perform checks + assert_labels_dropped(df_levels, expected_labels, axis=axis) + assert_levels_dropped(df_levels, expected_levels, axis=axis) + + with pytest.raises(ValueError, match="not valid labels or levels"): + df_levels._drop_labels_or_levels("L4", axis=axis) + + +# Series +# ------ +def test_drop_labels_or_levels_series(df): + + # Make series with L1 as index + s = df.set_index("L1").L2 + assert_levels_dropped(s, ["L1"], axis=0) + + with pytest.raises(ValueError, match="not valid labels or levels"): + s._drop_labels_or_levels("L4", axis=0) + + # Make series with L1 and L2 as index + s = df.set_index(["L1", "L2"]).L3 + assert_levels_dropped(s, ["L1", "L2"], axis=0) + + with pytest.raises(ValueError, match="not valid labels or levels"): + s._drop_labels_or_levels("L4", axis=0) diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py new file mode 100644 index 00000000..8ad8355f --- /dev/null +++ b/pandas/tests/generic/test_series.py @@ -0,0 +1,263 @@ +from distutils.version import LooseVersion +from operator import methodcaller + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import MultiIndex, Series, date_range +import pandas._testing as tm + +from .test_generic import Generic + +try: + import xarray + + _XARRAY_INSTALLED = True +except ImportError: + _XARRAY_INSTALLED = False + + +class TestSeries(Generic): + _typ = Series + _comparator = lambda self, x, y: tm.assert_series_equal(x, y) + + def setup_method(self): + self.ts = tm.makeTimeSeries() # Was at top level in test_series + self.ts.name = "ts" + + self.series = tm.makeStringSeries() + self.series.name = "series" + + def test_rename_mi(self): + s = Series( + [11, 21, 31], + index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]), + ) + s.rename(str.lower) + + def test_set_axis_name(self): + s = Series([1, 2, 3], index=["a", "b", "c"]) + funcs = ["rename_axis", "_set_axis_name"] + name = "foo" + for func in funcs: + result = methodcaller(func, name)(s) + assert s.index.name is None + assert result.index.name == name + + def test_set_axis_name_mi(self): + s = Series( + [11, 21, 31], + index=MultiIndex.from_tuples( + [("A", x) for x in ["a", "B", "c"]], names=["l1", "l2"] + ), + ) + funcs = ["rename_axis", "_set_axis_name"] + for func in funcs: + result = methodcaller(func, ["L1", "L2"])(s) + assert s.index.name is None + assert s.index.names == ["l1", "l2"] + assert result.index.name is None + assert result.index.names, ["L1", "L2"] + + def test_set_axis_name_raises(self): + s = pd.Series([1]) + with pytest.raises(ValueError): + s._set_axis_name(name="a", axis=1) + + def test_get_numeric_data_preserve_dtype(self): + + # get the numeric data + o = Series([1, 2, 3]) + result = o._get_numeric_data() + self._compare(result, o) + + o = Series([1, "2", 3.0]) + result = o._get_numeric_data() + expected = Series([], dtype=object, index=pd.Index([], dtype=object)) + self._compare(result, expected) + + o = Series([True, False, True]) + result = o._get_numeric_data() + self._compare(result, o) + + o = Series([True, False, True]) + result = o._get_bool_data() + self._compare(result, o) + + o = Series(date_range("20130101", periods=3)) + result = o._get_numeric_data() + expected = Series([], dtype="M8[ns]", index=pd.Index([], dtype=object)) + self._compare(result, expected) + + def test_nonzero_single_element(self): + + # allow single item via bool method + s = Series([True]) + assert s.bool() + + s = Series([False]) + assert not s.bool() + + msg = "The truth value of a Series is ambiguous" + # single item nan to raise + for s in [Series([np.nan]), Series([pd.NaT]), Series([True]), Series([False])]: + with pytest.raises(ValueError, match=msg): + bool(s) + + msg = "bool cannot act on a non-boolean single element Series" + for s in [Series([np.nan]), Series([pd.NaT])]: + with pytest.raises(ValueError, match=msg): + s.bool() + + # multiple bool are still an error + msg = "The truth value of a Series is ambiguous" + for s in [Series([True, True]), Series([False, False])]: + with pytest.raises(ValueError, match=msg): + bool(s) + with pytest.raises(ValueError, match=msg): + s.bool() + + # single non-bool are an error + for s in [Series([1]), Series([0]), Series(["a"]), Series([0.0])]: + msg = "The truth value of a Series is ambiguous" + with pytest.raises(ValueError, match=msg): + bool(s) + msg = "bool cannot act on a non-boolean single element Series" + with pytest.raises(ValueError, match=msg): + s.bool() + + def test_metadata_propagation_indiv(self): + # check that the metadata matches up on the resulting ops + + o = Series(range(3), range(3)) + o.name = "foo" + o2 = Series(range(3), range(3)) + o2.name = "bar" + + result = o.T + self.check_metadata(o, result) + + # resample + ts = Series( + np.random.rand(1000), + index=date_range("20130101", periods=1000, freq="s"), + name="foo", + ) + result = ts.resample("1T").mean() + self.check_metadata(ts, result) + + result = ts.resample("1T").min() + self.check_metadata(ts, result) + + result = ts.resample("1T").apply(lambda x: x.sum()) + self.check_metadata(ts, result) + + _metadata = Series._metadata + _finalize = Series.__finalize__ + Series._metadata = ["name", "filename"] + o.filename = "foo" + o2.filename = "bar" + + def finalize(self, other, method=None, **kwargs): + for name in self._metadata: + if method == "concat" and name == "filename": + value = "+".join( + [getattr(o, name) for o in other.objs if getattr(o, name, None)] + ) + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, None)) + + return self + + Series.__finalize__ = finalize + + result = pd.concat([o, o2]) + assert result.filename == "foo+bar" + assert result.name is None + + # reset + Series._metadata = _metadata + Series.__finalize__ = _finalize + + @pytest.mark.skipif( + not _XARRAY_INSTALLED + or _XARRAY_INSTALLED + and LooseVersion(xarray.__version__) < LooseVersion("0.10.0"), + reason="xarray >= 0.10.0 required", + ) + @pytest.mark.parametrize( + "index", + [ + "FloatIndex", + "IntIndex", + "StringIndex", + "UnicodeIndex", + "DateIndex", + "PeriodIndex", + "TimedeltaIndex", + "CategoricalIndex", + ], + ) + def test_to_xarray_index_types(self, index): + from xarray import DataArray + + index = getattr(tm, f"make{index}") + s = Series(range(6), index=index(6)) + s.index.name = "foo" + result = s.to_xarray() + repr(result) + assert len(result) == 6 + assert len(result.coords) == 1 + tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) + assert isinstance(result, DataArray) + + # idempotency + tm.assert_series_equal( + result.to_series(), s, check_index_type=False, check_categorical=True + ) + + @td.skip_if_no("xarray", min_version="0.7.0") + def test_to_xarray(self): + from xarray import DataArray + + s = Series([], dtype=object) + s.index.name = "foo" + result = s.to_xarray() + assert len(result) == 0 + assert len(result.coords) == 1 + tm.assert_almost_equal(list(result.coords.keys()), ["foo"]) + assert isinstance(result, DataArray) + + s = Series(range(6)) + s.index.name = "foo" + s.index = pd.MultiIndex.from_product( + [["a", "b"], range(3)], names=["one", "two"] + ) + result = s.to_xarray() + assert len(result) == 2 + tm.assert_almost_equal(list(result.coords.keys()), ["one", "two"]) + assert isinstance(result, DataArray) + tm.assert_series_equal(result.to_series(), s) + + @pytest.mark.parametrize( + "s", + [ + Series([np.arange(5)]), + pd.date_range("1/1/2011", periods=24, freq="H"), + pd.Series(range(5), index=pd.date_range("2017", periods=5)), + ], + ) + @pytest.mark.parametrize("shift_size", [0, 1, 2]) + def test_shift_always_copy(self, s, shift_size): + # GH22397 + assert s.shift(shift_size) is not s + + @pytest.mark.parametrize("move_by_freq", [pd.Timedelta("1D"), pd.Timedelta("1M")]) + def test_datetime_shift_always_copy(self, move_by_freq): + # GH22397 + s = pd.Series(range(5), index=pd.date_range("2017", periods=5)) + assert s.shift(freq=move_by_freq) is not s diff --git a/pandas/tests/groupby/__init__.py b/pandas/tests/groupby/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/groupby/aggregate/__init__.py b/pandas/tests/groupby/aggregate/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py new file mode 100644 index 00000000..94c5563e --- /dev/null +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -0,0 +1,997 @@ +""" +test .agg behavior / note that .apply is tested generally in test_groupby.py +""" +import functools + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series, concat +import pandas._testing as tm +from pandas.core.base import SpecificationError +from pandas.core.groupby.generic import _make_unique, _maybe_mangle_lambdas +from pandas.core.groupby.grouper import Grouping + + +def test_groupby_agg_no_extra_calls(): + # GH#31760 + df = pd.DataFrame({"key": ["a", "b", "c", "c"], "value": [1, 2, 3, 4]}) + gb = df.groupby("key")["value"] + + def dummy_func(x): + assert len(x) != 0 + return x.sum() + + gb.agg(dummy_func) + + +def test_agg_regression1(tsframe): + grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +def test_agg_must_agg(df): + grouped = df.groupby("A")["C"] + + msg = "Must produce aggregated value" + with pytest.raises(Exception, match=msg): + grouped.agg(lambda x: x.describe()) + with pytest.raises(Exception, match=msg): + grouped.agg(lambda x: x.index[:2]) + + +def test_agg_ser_multi_key(df): + # TODO(wesm): unused + ser = df.C # noqa + + f = lambda x: x.sum() + results = df.C.groupby([df.A, df.B]).aggregate(f) + expected = df.groupby(["A", "B"]).sum()["C"] + tm.assert_series_equal(results, expected) + + +def test_groupby_aggregation_mixed_dtype(): + + # GH 6212 + expected = DataFrame( + { + "v1": [5, 5, 7, np.nan, 3, 3, 4, 1], + "v2": [55, 55, 77, np.nan, 33, 33, 44, 11], + }, + index=MultiIndex.from_tuples( + [ + (1, 95), + (1, 99), + (2, 95), + (2, 99), + ("big", "damp"), + ("blue", "dry"), + ("red", "red"), + ("red", "wet"), + ], + names=["by1", "by2"], + ), + ) + + df = DataFrame( + { + "v1": [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9], + "v2": [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99], + "by1": ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12], + "by2": [ + "wet", + "dry", + 99, + 95, + np.nan, + "damp", + 95, + 99, + "red", + 99, + np.nan, + np.nan, + ], + } + ) + + g = df.groupby(["by1", "by2"]) + result = g[["v1", "v2"]].mean() + tm.assert_frame_equal(result, expected) + + +def test_groupby_aggregation_multi_level_column(): + # GH 29772 + lst = [ + [True, True, True, False], + [True, False, np.nan, False], + [True, True, np.nan, False], + [True, True, np.nan, False], + ] + df = pd.DataFrame( + data=lst, + columns=pd.MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]), + ) + + result = df.groupby(level=1, axis=1).sum() + expected = pd.DataFrame({0: [2.0, 1, 1, 1], 1: [1, 0, 1, 1]}) + + tm.assert_frame_equal(result, expected) + + +def test_agg_apply_corner(ts, tsframe): + # nothing to group, all NA + grouped = ts.groupby(ts * np.nan) + assert ts.dtype == np.float64 + + # groupby float64 values results in Float64Index + exp = Series([], dtype=np.float64, index=pd.Index([], dtype=np.float64)) + tm.assert_series_equal(grouped.sum(), exp) + tm.assert_series_equal(grouped.agg(np.sum), exp) + tm.assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False) + + # DataFrame + grouped = tsframe.groupby(tsframe["A"] * np.nan) + exp_df = DataFrame( + columns=tsframe.columns, dtype=float, index=pd.Index([], dtype=np.float64) + ) + tm.assert_frame_equal(grouped.sum(), exp_df, check_names=False) + tm.assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False) + tm.assert_frame_equal(grouped.apply(np.sum), exp_df.iloc[:, :0], check_names=False) + + +def test_agg_grouping_is_list_tuple(ts): + df = tm.makeTimeDataFrame() + + grouped = df.groupby(lambda x: x.year) + grouper = grouped.grouper.groupings[0].grouper + grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper)) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + grouped.grouper.groupings[0] = Grouping(ts.index, tuple(grouper)) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +def test_agg_python_multiindex(mframe): + grouped = mframe.groupby(["A", "B"]) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "groupbyfunc", [lambda x: x.weekday(), [lambda x: x.month, lambda x: x.weekday()]] +) +def test_aggregate_str_func(tsframe, groupbyfunc): + grouped = tsframe.groupby(groupbyfunc) + + # single series + result = grouped["A"].agg("std") + expected = grouped["A"].std() + tm.assert_series_equal(result, expected) + + # group frame by function name + result = grouped.aggregate("var") + expected = grouped.var() + tm.assert_frame_equal(result, expected) + + # group frame by function dict + result = grouped.agg({"A": "var", "B": "std", "C": "mean", "D": "sem"}) + expected = DataFrame( + { + "A": grouped["A"].var(), + "B": grouped["B"].std(), + "C": grouped["C"].mean(), + "D": grouped["D"].sem(), + } + ) + tm.assert_frame_equal(result, expected) + + +def test_aggregate_item_by_item(df): + grouped = df.groupby("A") + + aggfun = lambda ser: ser.size + result = grouped.agg(aggfun) + foo = (df.A == "foo").sum() + bar = (df.A == "bar").sum() + K = len(result.columns) + + # GH5782 + # odd comparisons can result here, so cast to make easy + exp = pd.Series( + np.array([foo] * K), index=list("BCD"), dtype=np.float64, name="foo" + ) + tm.assert_series_equal(result.xs("foo"), exp) + + exp = pd.Series( + np.array([bar] * K), index=list("BCD"), dtype=np.float64, name="bar" + ) + tm.assert_almost_equal(result.xs("bar"), exp) + + def aggfun(ser): + return ser.size + + result = DataFrame().groupby(df.A).agg(aggfun) + assert isinstance(result, DataFrame) + assert len(result) == 0 + + +def test_wrap_agg_out(three_group): + grouped = three_group.groupby(["A", "B"]) + + def func(ser): + if ser.dtype == np.object: + raise TypeError + else: + return ser.sum() + + result = grouped.aggregate(func) + exp_grouped = three_group.loc[:, three_group.columns != "C"] + expected = exp_grouped.groupby(["A", "B"]).aggregate(func) + tm.assert_frame_equal(result, expected) + + +def test_agg_multiple_functions_maintain_order(df): + # GH #610 + funcs = [("mean", np.mean), ("max", np.max), ("min", np.min)] + result = df.groupby("A")["C"].agg(funcs) + exp_cols = Index(["mean", "max", "min"]) + + tm.assert_index_equal(result.columns, exp_cols) + + +def test_multiple_functions_tuples_and_non_tuples(df): + # #1359 + funcs = [("foo", "mean"), "std"] + ex_funcs = [("foo", "mean"), ("std", "std")] + + result = df.groupby("A")["C"].agg(funcs) + expected = df.groupby("A")["C"].agg(ex_funcs) + tm.assert_frame_equal(result, expected) + + result = df.groupby("A").agg(funcs) + expected = df.groupby("A").agg(ex_funcs) + tm.assert_frame_equal(result, expected) + + +def test_more_flexible_frame_multi_function(df): + grouped = df.groupby("A") + + exmean = grouped.agg({"C": np.mean, "D": np.mean}) + exstd = grouped.agg({"C": np.std, "D": np.std}) + + expected = concat([exmean, exstd], keys=["mean", "std"], axis=1) + expected = expected.swaplevel(0, 1, axis=1).sort_index(level=0, axis=1) + + d = {"C": [np.mean, np.std], "D": [np.mean, np.std]} + result = grouped.aggregate(d) + + tm.assert_frame_equal(result, expected) + + # be careful + result = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]}) + expected = grouped.aggregate({"C": np.mean, "D": [np.mean, np.std]}) + tm.assert_frame_equal(result, expected) + + def foo(x): + return np.mean(x) + + def bar(x): + return np.std(x, ddof=1) + + # this uses column selection & renaming + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + d = dict([["C", np.mean], ["D", dict([["foo", np.mean], ["bar", np.std]])]]) + grouped.aggregate(d) + + # But without renaming, these functions are OK + d = {"C": [np.mean], "D": [foo, bar]} + grouped.aggregate(d) + + +def test_multi_function_flexible_mix(df): + # GH #1268 + grouped = df.groupby("A") + + # Expected + d = {"C": {"foo": "mean", "bar": "std"}, "D": {"sum": "sum"}} + # this uses column selection & renaming + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + grouped.aggregate(d) + + # Test 1 + d = {"C": {"foo": "mean", "bar": "std"}, "D": "sum"} + # this uses column selection & renaming + with pytest.raises(SpecificationError, match=msg): + grouped.aggregate(d) + + # Test 2 + d = {"C": {"foo": "mean", "bar": "std"}, "D": "sum"} + # this uses column selection & renaming + with pytest.raises(SpecificationError, match=msg): + grouped.aggregate(d) + + +def test_groupby_agg_coercing_bools(): + # issue 14873 + dat = pd.DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3], "c": [None, None, 1, 1]}) + gp = dat.groupby("a") + + index = Index([1, 2], name="a") + + result = gp["b"].aggregate(lambda x: (x != 0).all()) + expected = Series([False, True], index=index, name="b") + tm.assert_series_equal(result, expected) + + result = gp["c"].aggregate(lambda x: x.isnull().all()) + expected = Series([True, False], index=index, name="c") + tm.assert_series_equal(result, expected) + + +def test_order_aggregate_multiple_funcs(): + # GH 25692 + df = pd.DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]}) + + res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"]) + result = res.columns.levels[1] + + expected = pd.Index(["sum", "max", "mean", "ohlc", "min"]) + + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("dtype", [np.int64, np.uint64]) +@pytest.mark.parametrize("how", ["first", "last", "min", "max", "mean", "median"]) +def test_uint64_type_handling(dtype, how): + # GH 26310 + df = pd.DataFrame({"x": 6903052872240755750, "y": [1, 2]}) + expected = df.groupby("y").agg({"x": how}) + df.x = df.x.astype(dtype) + result = df.groupby("y").agg({"x": how}) + result.x = result.x.astype(np.int64) + tm.assert_frame_equal(result, expected, check_exact=True) + + +def test_func_duplicates_raises(): + # GH28426 + msg = "Function names" + df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + with pytest.raises(SpecificationError, match=msg): + df.groupby("A").agg(["min", "min"]) + + +@pytest.mark.parametrize( + "index", + [ + pd.CategoricalIndex(list("abc")), + pd.interval_range(0, 3), + pd.period_range("2020", periods=3, freq="D"), + pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), + ], +) +def test_agg_index_has_complex_internals(index): + # GH 31223 + df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) + result = df.groupby("group").agg({"value": Series.nunique}) + expected = DataFrame({"group": [1, 2], "value": [2, 1]}).set_index("group") + tm.assert_frame_equal(result, expected) + + +def test_agg_split_block(): + # https://github.com/pandas-dev/pandas/issues/31522 + df = pd.DataFrame( + { + "key1": ["a", "a", "b", "b", "a"], + "key2": ["one", "two", "one", "two", "one"], + "key3": ["three", "three", "three", "six", "six"], + } + ) + result = df.groupby("key1").min() + expected = pd.DataFrame( + {"key2": ["one", "one"], "key3": ["six", "six"]}, + index=pd.Index(["a", "b"], name="key1"), + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_split_object_part_datetime(): + # https://github.com/pandas-dev/pandas/pull/31616 + df = pd.DataFrame( + { + "A": pd.date_range("2000", periods=4), + "B": ["a", "b", "c", "d"], + "C": [1, 2, 3, 4], + "D": ["b", "c", "d", "e"], + "E": pd.date_range("2000", periods=4), + "F": [1, 2, 3, 4], + } + ).astype(object) + result = df.groupby([0, 0, 0, 0]).min() + expected = pd.DataFrame( + { + "A": [pd.Timestamp("2000")], + "B": ["a"], + "C": [1], + "D": ["b"], + "E": [pd.Timestamp("2000")], + "F": [1], + } + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_cython_category_not_implemented_fallback(): + # https://github.com/pandas-dev/pandas/issues/31450 + df = pd.DataFrame({"col_num": [1, 1, 2, 3]}) + df["col_cat"] = df["col_num"].astype("category") + + result = df.groupby("col_num").col_cat.first() + expected = pd.Series( + [1, 2, 3], index=pd.Index([1, 2, 3], name="col_num"), name="col_cat" + ) + tm.assert_series_equal(result, expected) + + result = df.groupby("col_num").agg({"col_cat": "first"}) + expected = expected.to_frame() + tm.assert_frame_equal(result, expected) + + +class TestNamedAggregationSeries: + def test_series_named_agg(self): + df = pd.Series([1, 2, 3, 4]) + gr = df.groupby([0, 0, 1, 1]) + result = gr.agg(a="sum", b="min") + expected = pd.DataFrame( + {"a": [3, 7], "b": [1, 3]}, columns=["a", "b"], index=[0, 1] + ) + tm.assert_frame_equal(result, expected) + + result = gr.agg(b="min", a="sum") + expected = expected[["b", "a"]] + tm.assert_frame_equal(result, expected) + + def test_no_args_raises(self): + gr = pd.Series([1, 2]).groupby([0, 1]) + with pytest.raises(TypeError, match="Must provide"): + gr.agg() + + # but we do allow this + result = gr.agg([]) + expected = pd.DataFrame() + tm.assert_frame_equal(result, expected) + + def test_series_named_agg_duplicates_no_raises(self): + # GH28426 + gr = pd.Series([1, 2, 3]).groupby([0, 0, 1]) + grouped = gr.agg(a="sum", b="sum") + expected = pd.DataFrame({"a": [3, 3], "b": [3, 3]}) + tm.assert_frame_equal(expected, grouped) + + def test_mangled(self): + gr = pd.Series([1, 2, 3]).groupby([0, 0, 1]) + result = gr.agg(a=lambda x: 0, b=lambda x: 1) + expected = pd.DataFrame({"a": [0, 0], "b": [1, 1]}) + tm.assert_frame_equal(result, expected) + + +class TestNamedAggregationDataFrame: + def test_agg_relabel(self): + df = pd.DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + result = df.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max")) + expected = pd.DataFrame( + {"a_max": [1, 3], "b_max": [6, 8]}, + index=pd.Index(["a", "b"], name="group"), + columns=["a_max", "b_max"], + ) + tm.assert_frame_equal(result, expected) + + # order invariance + p98 = functools.partial(np.percentile, q=98) + result = df.groupby("group").agg( + b_min=("B", "min"), + a_min=("A", min), + a_mean=("A", np.mean), + a_max=("A", "max"), + b_max=("B", "max"), + a_98=("A", p98), + ) + expected = pd.DataFrame( + { + "b_min": [5, 7], + "a_min": [0, 2], + "a_mean": [0.5, 2.5], + "a_max": [1, 3], + "b_max": [6, 8], + "a_98": [0.98, 2.98], + }, + index=pd.Index(["a", "b"], name="group"), + columns=["b_min", "a_min", "a_mean", "a_max", "b_max", "a_98"], + ) + tm.assert_frame_equal(result, expected) + + def test_agg_relabel_non_identifier(self): + df = pd.DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + + result = df.groupby("group").agg(**{"my col": ("A", "max")}) + expected = pd.DataFrame( + {"my col": [1, 3]}, index=pd.Index(["a", "b"], name="group") + ) + tm.assert_frame_equal(result, expected) + + def test_duplicate_no_raises(self): + # GH 28426, if use same input function on same column, + # no error should raise + df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + + grouped = df.groupby("A").agg(a=("B", "min"), b=("B", "min")) + expected = pd.DataFrame( + {"a": [1, 3], "b": [1, 3]}, index=pd.Index([0, 1], name="A") + ) + tm.assert_frame_equal(grouped, expected) + + quant50 = functools.partial(np.percentile, q=50) + quant70 = functools.partial(np.percentile, q=70) + quant50.__name__ = "quant50" + quant70.__name__ = "quant70" + + test = pd.DataFrame( + {"col1": ["a", "a", "b", "b", "b"], "col2": [1, 2, 3, 4, 5]} + ) + + grouped = test.groupby("col1").agg( + quantile_50=("col2", quant50), quantile_70=("col2", quant70) + ) + expected = pd.DataFrame( + {"quantile_50": [1.5, 4.0], "quantile_70": [1.7, 4.4]}, + index=pd.Index(["a", "b"], name="col1"), + ) + tm.assert_frame_equal(grouped, expected) + + def test_agg_relabel_with_level(self): + df = pd.DataFrame( + {"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}, + index=pd.MultiIndex.from_product([["A", "B"], ["a", "b"]]), + ) + result = df.groupby(level=0).agg( + aa=("A", "max"), bb=("A", "min"), cc=("B", "mean") + ) + expected = pd.DataFrame( + {"aa": [0, 1], "bb": [0, 1], "cc": [1.5, 3.5]}, index=["A", "B"] + ) + tm.assert_frame_equal(result, expected) + + def test_agg_relabel_other_raises(self): + df = pd.DataFrame({"A": [0, 0, 1], "B": [1, 2, 3]}) + grouped = df.groupby("A") + match = "Must provide" + with pytest.raises(TypeError, match=match): + grouped.agg(foo=1) + + with pytest.raises(TypeError, match=match): + grouped.agg() + + with pytest.raises(TypeError, match=match): + grouped.agg(a=("B", "max"), b=(1, 2, 3)) + + def test_missing_raises(self): + df = pd.DataFrame({"A": [0, 1], "B": [1, 2]}) + with pytest.raises(KeyError, match="Column 'C' does not exist"): + df.groupby("A").agg(c=("C", "sum")) + + def test_agg_namedtuple(self): + df = pd.DataFrame({"A": [0, 1], "B": [1, 2]}) + result = df.groupby("A").agg( + b=pd.NamedAgg("B", "sum"), c=pd.NamedAgg(column="B", aggfunc="count") + ) + expected = df.groupby("A").agg(b=("B", "sum"), c=("B", "count")) + tm.assert_frame_equal(result, expected) + + def test_mangled(self): + df = pd.DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]}) + result = df.groupby("A").agg(b=("B", lambda x: 0), c=("C", lambda x: 1)) + expected = pd.DataFrame( + {"b": [0, 0], "c": [1, 1]}, index=pd.Index([0, 1], name="A") + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3", + [ + ( + (("y", "A"), "max"), + (("y", "A"), np.min), + (("y", "B"), "mean"), + [1, 3], + [0, 2], + [5.5, 7.5], + ), + ( + (("y", "A"), lambda x: max(x)), + (("y", "A"), lambda x: 1), + (("y", "B"), "mean"), + [1, 3], + [1, 1], + [5.5, 7.5], + ), + ( + pd.NamedAgg(("y", "A"), "max"), + pd.NamedAgg(("y", "B"), np.mean), + pd.NamedAgg(("y", "A"), lambda x: 1), + [1, 3], + [5.5, 7.5], + [1, 1], + ), + ], +) +def test_agg_relabel_multiindex_column( + agg_col1, agg_col2, agg_col3, agg_result1, agg_result2, agg_result3 +): + # GH 29422, add tests for multiindex column cases + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + idx = pd.Index(["a", "b"], name=("x", "group")) + + result = df.groupby(("x", "group")).agg(a_max=(("y", "A"), "max")) + expected = DataFrame({"a_max": [1, 3]}, index=idx) + tm.assert_frame_equal(result, expected) + + result = df.groupby(("x", "group")).agg( + col_1=agg_col1, col_2=agg_col2, col_3=agg_col3 + ) + expected = DataFrame( + {"col_1": agg_result1, "col_2": agg_result2, "col_3": agg_result3}, index=idx + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_relabel_multiindex_raises_not_exist(): + # GH 29422, add test for raises senario when aggregate column does not exist + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + + with pytest.raises(KeyError, match="does not exist"): + df.groupby(("x", "group")).agg(a=(("Y", "a"), "max")) + + +def test_agg_relabel_multiindex_duplicates(): + # GH29422, add test for raises senario when getting duplicates + # GH28426, after this change, duplicates should also work if the relabelling is + # different + df = DataFrame( + {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} + ) + df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + + result = df.groupby(("x", "group")).agg( + a=(("y", "A"), "min"), b=(("y", "A"), "min") + ) + idx = pd.Index(["a", "b"], name=("x", "group")) + expected = DataFrame({"a": [0, 2], "b": [0, 2]}, index=idx) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "func", [lambda s: s.mean(), lambda s: np.mean(s), lambda s: np.nanmean(s)] +) +def test_multiindex_custom_func(func): + # GH 31777 + data = [[1, 4, 2], [5, 7, 1]] + df = pd.DataFrame(data, columns=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 3]])) + result = df.groupby(np.array([0, 1])).agg(func) + expected_dict = {(1, 3): {0: 1, 1: 5}, (1, 4): {0: 4, 1: 7}, (2, 3): {0: 2, 1: 1}} + expected = pd.DataFrame(expected_dict) + tm.assert_frame_equal(result, expected) + + +def myfunc(s): + return np.percentile(s, q=0.90) + + +@pytest.mark.parametrize("func", [lambda s: np.percentile(s, q=0.90), myfunc]) +def test_lambda_named_agg(func): + # see gh-28467 + animals = DataFrame( + { + "kind": ["cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0], + "weight": [7.9, 7.5, 9.9, 198.0], + } + ) + + result = animals.groupby("kind").agg( + mean_height=("height", "mean"), perc90=("height", func) + ) + expected = DataFrame( + [[9.3, 9.1036], [20.0, 6.252]], + columns=["mean_height", "perc90"], + index=Index(["cat", "dog"], name="kind"), + ) + + tm.assert_frame_equal(result, expected) + + +def test_aggregate_mixed_types(): + # GH 16916 + df = pd.DataFrame( + data=np.array([0] * 9).reshape(3, 3), columns=list("XYZ"), index=list("abc") + ) + df["grouping"] = ["group 1", "group 1", 2] + result = df.groupby("grouping").aggregate(lambda x: x.tolist()) + expected_data = [[[0], [0], [0]], [[0, 0], [0, 0], [0, 0]]] + expected = pd.DataFrame( + expected_data, + index=Index([2, "group 1"], dtype="object", name="grouping"), + columns=Index(["X", "Y", "Z"], dtype="object"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_aggregate_udf_na_extension_type(): + # https://github.com/pandas-dev/pandas/pull/31359 + # This is currently failing to cast back to Int64Dtype. + # The presence of the NA causes two problems + # 1. NA is not an instance of Int64Dtype.type (numpy.int64) + # 2. The presence of an NA forces object type, so the non-NA values is + # a Python int rather than a NumPy int64. Python ints aren't + # instances of numpy.int64. + def aggfunc(x): + if all(x > 2): + return 1 + else: + return pd.NA + + df = pd.DataFrame({"A": pd.array([1, 2, 3])}) + result = df.groupby([1, 1, 2]).agg(aggfunc) + expected = pd.DataFrame({"A": pd.array([1, pd.NA], dtype="Int64")}, index=[1, 2]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_groupby_aggregate_period_column(func): + # GH 31471 + groups = [1, 2] + periods = pd.period_range("2020", periods=2, freq="Y") + df = pd.DataFrame({"a": groups, "b": periods}) + + result = getattr(df.groupby("a")["b"], func)() + idx = pd.Int64Index([1, 2], name="a") + expected = pd.Series(periods, index=idx, name="b") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max"]) +def test_groupby_aggregate_period_frame(func): + # GH 31471 + groups = [1, 2] + periods = pd.period_range("2020", periods=2, freq="Y") + df = pd.DataFrame({"a": groups, "b": periods}) + + result = getattr(df.groupby("a"), func)() + idx = pd.Int64Index([1, 2], name="a") + expected = pd.DataFrame({"b": periods}, index=idx) + + tm.assert_frame_equal(result, expected) + + +class TestLambdaMangling: + def test_maybe_mangle_lambdas_passthrough(self): + assert _maybe_mangle_lambdas("mean") == "mean" + assert _maybe_mangle_lambdas(lambda x: x).__name__ == "" + # don't mangel single lambda. + assert _maybe_mangle_lambdas([lambda x: x])[0].__name__ == "" + + def test_maybe_mangle_lambdas_listlike(self): + aggfuncs = [lambda x: 1, lambda x: 2] + result = _maybe_mangle_lambdas(aggfuncs) + assert result[0].__name__ == "" + assert result[1].__name__ == "" + assert aggfuncs[0](None) == result[0](None) + assert aggfuncs[1](None) == result[1](None) + + def test_maybe_mangle_lambdas(self): + func = {"A": [lambda x: 0, lambda x: 1]} + result = _maybe_mangle_lambdas(func) + assert result["A"][0].__name__ == "" + assert result["A"][1].__name__ == "" + + def test_maybe_mangle_lambdas_args(self): + func = {"A": [lambda x, a, b=1: (0, a, b), lambda x: 1]} + result = _maybe_mangle_lambdas(func) + assert result["A"][0].__name__ == "" + assert result["A"][1].__name__ == "" + + assert func["A"][0](0, 1) == (0, 1, 1) + assert func["A"][0](0, 1, 2) == (0, 1, 2) + assert func["A"][0](0, 2, b=3) == (0, 2, 3) + + def test_maybe_mangle_lambdas_named(self): + func = {"C": np.mean, "D": {"foo": np.mean, "bar": np.mean}} + result = _maybe_mangle_lambdas(func) + assert result == func + + def test_basic(self): + df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) + result = df.groupby("A").agg({"B": [lambda x: 0, lambda x: 1]}) + + expected = pd.DataFrame( + {("B", ""): [0, 0], ("B", ""): [1, 1]}, + index=pd.Index([0, 1], name="A"), + ) + tm.assert_frame_equal(result, expected) + + def test_mangle_series_groupby(self): + gr = pd.Series([1, 2, 3, 4]).groupby([0, 0, 1, 1]) + result = gr.agg([lambda x: 0, lambda x: 1]) + expected = pd.DataFrame({"": [0, 0], "": [1, 1]}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.") + def test_with_kwargs(self): + f1 = lambda x, y, b=1: x.sum() + y + b + f2 = lambda x, y, b=2: x.sum() + y * b + result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0) + expected = pd.DataFrame({"": [4], "": [6]}) + tm.assert_frame_equal(result, expected) + + result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10) + expected = pd.DataFrame({"": [13], "": [30]}) + tm.assert_frame_equal(result, expected) + + def test_agg_with_one_lambda(self): + # GH 25719, write tests for DataFrameGroupby.agg with only one lambda + df = pd.DataFrame( + { + "kind": ["cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0], + "weight": [7.9, 7.5, 9.9, 198.0], + } + ) + + columns = ["height_sqr_min", "height_max", "weight_max"] + expected = pd.DataFrame( + { + "height_sqr_min": [82.81, 36.00], + "height_max": [9.5, 34.0], + "weight_max": [9.9, 198.0], + }, + index=pd.Index(["cat", "dog"], name="kind"), + columns=columns, + ) + + # check pd.NameAgg case + result1 = df.groupby(by="kind").agg( + height_sqr_min=pd.NamedAgg( + column="height", aggfunc=lambda x: np.min(x ** 2) + ), + height_max=pd.NamedAgg(column="height", aggfunc="max"), + weight_max=pd.NamedAgg(column="weight", aggfunc="max"), + ) + tm.assert_frame_equal(result1, expected) + + # check agg(key=(col, aggfunc)) case + result2 = df.groupby(by="kind").agg( + height_sqr_min=("height", lambda x: np.min(x ** 2)), + height_max=("height", "max"), + weight_max=("weight", "max"), + ) + tm.assert_frame_equal(result2, expected) + + def test_agg_multiple_lambda(self): + # GH25719, test for DataFrameGroupby.agg with multiple lambdas + # with mixed aggfunc + df = pd.DataFrame( + { + "kind": ["cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0], + "weight": [7.9, 7.5, 9.9, 198.0], + } + ) + columns = [ + "height_sqr_min", + "height_max", + "weight_max", + "height_max_2", + "weight_min", + ] + expected = pd.DataFrame( + { + "height_sqr_min": [82.81, 36.00], + "height_max": [9.5, 34.0], + "weight_max": [9.9, 198.0], + "height_max_2": [9.5, 34.0], + "weight_min": [7.9, 7.5], + }, + index=pd.Index(["cat", "dog"], name="kind"), + columns=columns, + ) + + # check agg(key=(col, aggfunc)) case + result1 = df.groupby(by="kind").agg( + height_sqr_min=("height", lambda x: np.min(x ** 2)), + height_max=("height", "max"), + weight_max=("weight", "max"), + height_max_2=("height", lambda x: np.max(x)), + weight_min=("weight", lambda x: np.min(x)), + ) + tm.assert_frame_equal(result1, expected) + + # check pd.NamedAgg case + result2 = df.groupby(by="kind").agg( + height_sqr_min=pd.NamedAgg( + column="height", aggfunc=lambda x: np.min(x ** 2) + ), + height_max=pd.NamedAgg(column="height", aggfunc="max"), + weight_max=pd.NamedAgg(column="weight", aggfunc="max"), + height_max_2=pd.NamedAgg(column="height", aggfunc=lambda x: np.max(x)), + weight_min=pd.NamedAgg(column="weight", aggfunc=lambda x: np.min(x)), + ) + tm.assert_frame_equal(result2, expected) + + @pytest.mark.parametrize( + "order, expected_reorder", + [ + ( + [ + ("height", ""), + ("height", "max"), + ("weight", "max"), + ("height", ""), + ("weight", ""), + ], + [ + ("height", "_0"), + ("height", "max"), + ("weight", "max"), + ("height", "_1"), + ("weight", ""), + ], + ), + ( + [ + ("col2", "min"), + ("col1", ""), + ("col1", ""), + ("col1", ""), + ], + [ + ("col2", "min"), + ("col1", "_0"), + ("col1", "_1"), + ("col1", "_2"), + ], + ), + ( + [("col", ""), ("col", ""), ("col", "")], + [("col", "_0"), ("col", "_1"), ("col", "_2")], + ), + ], + ) + def test_make_unique(self, order, expected_reorder): + # GH 27519, test if make_unique function reorders correctly + result = _make_unique(order) + + assert result == expected_reorder diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py new file mode 100644 index 00000000..5ddda264 --- /dev/null +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -0,0 +1,238 @@ +""" +test cython .agg behavior +""" + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, NaT, Series, Timedelta, Timestamp, bdate_range +import pandas._testing as tm +from pandas.core.groupby.groupby import DataError + + +@pytest.mark.parametrize( + "op_name", + [ + "count", + "sum", + "std", + "var", + "sem", + "mean", + pytest.param( + "median", + # ignore mean of empty slice + # and all-NaN + marks=[pytest.mark.filterwarnings("ignore::RuntimeWarning")], + ), + "prod", + "min", + "max", + ], +) +def test_cythonized_aggers(op_name): + data = { + "A": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1.0, np.nan, np.nan], + "B": ["A", "B"] * 6, + "C": np.random.randn(12), + } + df = DataFrame(data) + df.loc[2:10:2, "C"] = np.nan + + op = lambda x: getattr(x, op_name)() + + # single column + grouped = df.drop(["B"], axis=1).groupby("A") + exp = {cat: op(group["C"]) for cat, group in grouped} + exp = DataFrame({"C": exp}) + exp.index.name = "A" + result = op(grouped) + tm.assert_frame_equal(result, exp) + + # multiple columns + grouped = df.groupby(["A", "B"]) + expd = {} + for (cat1, cat2), group in grouped: + expd.setdefault(cat1, {})[cat2] = op(group["C"]) + exp = DataFrame(expd).T.stack(dropna=False) + exp.index.names = ["A", "B"] + exp.name = "C" + + result = op(grouped)["C"] + if op_name in ["sum", "prod"]: + tm.assert_series_equal(result, exp) + + +def test_cython_agg_boolean(): + frame = DataFrame( + { + "a": np.random.randint(0, 5, 50), + "b": np.random.randint(0, 2, 50).astype("bool"), + } + ) + result = frame.groupby("a")["b"].mean() + expected = frame.groupby("a")["b"].agg(np.mean) + + tm.assert_series_equal(result, expected) + + +def test_cython_agg_nothing_to_agg(): + frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25}) + msg = "No numeric types to aggregate" + + with pytest.raises(DataError, match=msg): + frame.groupby("a")["b"].mean() + + frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25}) + with pytest.raises(DataError, match=msg): + frame[["b"]].groupby(frame["a"]).mean() + + +def test_cython_agg_nothing_to_agg_with_dates(): + frame = DataFrame( + { + "a": np.random.randint(0, 5, 50), + "b": ["foo", "bar"] * 25, + "dates": pd.date_range("now", periods=50, freq="T"), + } + ) + msg = "No numeric types to aggregate" + with pytest.raises(DataError, match=msg): + frame.groupby("b").dates.mean() + + +def test_cython_agg_frame_columns(): + # #2113 + df = DataFrame({"x": [1, 2, 3], "y": [3, 4, 5]}) + + df.groupby(level=0, axis="columns").mean() + df.groupby(level=0, axis="columns").mean() + df.groupby(level=0, axis="columns").mean() + df.groupby(level=0, axis="columns").mean() + + +def test_cython_agg_return_dict(): + # GH 16741 + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + + ts = df.groupby("A")["B"].agg(lambda x: x.value_counts().to_dict()) + expected = Series( + [{"two": 1, "one": 1, "three": 1}, {"two": 2, "one": 2, "three": 1}], + index=Index(["bar", "foo"], name="A"), + name="B", + ) + tm.assert_series_equal(ts, expected) + + +def test_cython_fail_agg(): + dr = bdate_range("1/1/2000", periods=50) + ts = Series(["A", "B", "C", "D", "E"] * 10, index=dr) + + grouped = ts.groupby(lambda x: x.month) + summed = grouped.sum() + expected = grouped.agg(np.sum) + tm.assert_series_equal(summed, expected) + + +@pytest.mark.parametrize( + "op, targop", + [ + ("mean", np.mean), + ("median", np.median), + ("var", np.var), + ("add", np.sum), + ("prod", np.prod), + ("min", np.min), + ("max", np.max), + ("first", lambda x: x.iloc[0]), + ("last", lambda x: x.iloc[-1]), + ], +) +def test__cython_agg_general(op, targop): + df = DataFrame(np.random.randn(1000)) + labels = np.random.randint(0, 50, size=1000).astype(float) + + result = df.groupby(labels)._cython_agg_general(op) + expected = df.groupby(labels).agg(targop) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "op, targop", + [ + ("mean", np.mean), + ("median", lambda x: np.median(x) if len(x) > 0 else np.nan), + ("var", lambda x: np.var(x, ddof=1)), + ("min", np.min), + ("max", np.max), + ], +) +def test_cython_agg_empty_buckets(op, targop, observed): + df = pd.DataFrame([11, 12, 13]) + grps = range(0, 55, 5) + + # calling _cython_agg_general directly, instead of via the user API + # which sets different values for min_count, so do that here. + g = df.groupby(pd.cut(df[0], grps), observed=observed) + result = g._cython_agg_general(op) + + g = df.groupby(pd.cut(df[0], grps), observed=observed) + expected = g.agg(lambda x: targop(x)) + tm.assert_frame_equal(result, expected) + + +def test_cython_agg_empty_buckets_nanops(observed): + # GH-18869 can't call nanops on empty groups, so hardcode expected + # for these + df = pd.DataFrame([11, 12, 13], columns=["a"]) + grps = range(0, 25, 5) + # add / sum + result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general( + "add" + ) + intervals = pd.interval_range(0, 20, freq=5) + expected = pd.DataFrame( + {"a": [0, 0, 36, 0]}, + index=pd.CategoricalIndex(intervals, name="a", ordered=True), + ) + if observed: + expected = expected[expected.a != 0] + + tm.assert_frame_equal(result, expected) + + # prod + result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general( + "prod" + ) + expected = pd.DataFrame( + {"a": [1, 1, 1716, 1]}, + index=pd.CategoricalIndex(intervals, name="a", ordered=True), + ) + if observed: + expected = expected[expected.a != 1] + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("op", ["first", "last", "max", "min"]) +@pytest.mark.parametrize( + "data", [Timestamp("2016-10-14 21:00:44.557"), Timedelta("17088 days 21:00:44.557")] +) +def test_cython_with_timestamp_and_nat(op, data): + # https://github.com/pandas-dev/pandas/issues/19526 + df = DataFrame({"a": [0, 1], "b": [data, NaT]}) + index = Index([0, 1], name="a") + + # We will group by a and test the cython aggregations + expected = DataFrame({"b": [data, NaT]}, index=index) + + result = df.groupby("a").aggregate(op) + tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py new file mode 100644 index 00000000..52ee3e65 --- /dev/null +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -0,0 +1,644 @@ +""" +test all other .agg behavior +""" + +import datetime as dt +from functools import partial + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + PeriodIndex, + Series, + date_range, + period_range, +) +import pandas._testing as tm +from pandas.core.base import SpecificationError + +from pandas.io.formats.printing import pprint_thing + + +def test_agg_api(): + # GH 6337 + # https://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error + # different api for agg when passed custom function with mixed frame + + df = DataFrame( + { + "data1": np.random.randn(5), + "data2": np.random.randn(5), + "key1": ["a", "a", "b", "b", "a"], + "key2": ["one", "two", "one", "two", "one"], + } + ) + grouped = df.groupby("key1") + + def peak_to_peak(arr): + return arr.max() - arr.min() + + expected = grouped.agg([peak_to_peak]) + expected.columns = ["data1", "data2"] + result = grouped.agg(peak_to_peak) + tm.assert_frame_equal(result, expected) + + +def test_agg_datetimes_mixed(): + data = [[1, "2012-01-01", 1.0], [2, "2012-01-02", 2.0], [3, None, 3.0]] + + df1 = DataFrame( + { + "key": [x[0] for x in data], + "date": [x[1] for x in data], + "value": [x[2] for x in data], + } + ) + + data = [ + [ + row[0], + (dt.datetime.strptime(row[1], "%Y-%m-%d").date() if row[1] else None), + row[2], + ] + for row in data + ] + + df2 = DataFrame( + { + "key": [x[0] for x in data], + "date": [x[1] for x in data], + "value": [x[2] for x in data], + } + ) + + df1["weights"] = df1["value"] / df1["value"].sum() + gb1 = df1.groupby("date").aggregate(np.sum) + + df2["weights"] = df1["value"] / df1["value"].sum() + gb2 = df2.groupby("date").aggregate(np.sum) + + assert len(gb1) == len(gb2) + + +def test_agg_period_index(): + prng = period_range("2012-1-1", freq="M", periods=3) + df = DataFrame(np.random.randn(3, 2), index=prng) + rs = df.groupby(level=0).sum() + assert isinstance(rs.index, PeriodIndex) + + # GH 3579 + index = period_range(start="1999-01", periods=5, freq="M") + s1 = Series(np.random.rand(len(index)), index=index) + s2 = Series(np.random.rand(len(index)), index=index) + df = DataFrame.from_dict({"s1": s1, "s2": s2}) + grouped = df.groupby(df.index.month) + list(grouped) + + +def test_agg_dict_parameter_cast_result_dtypes(): + # GH 12821 + + df = DataFrame( + { + "class": ["A", "A", "B", "B", "C", "C", "D", "D"], + "time": date_range("1/1/2011", periods=8, freq="H"), + } + ) + df.loc[[0, 1, 2, 5], "time"] = None + + # test for `first` function + exp = df.loc[[0, 3, 4, 6]].set_index("class") + grouped = df.groupby("class") + tm.assert_frame_equal(grouped.first(), exp) + tm.assert_frame_equal(grouped.agg("first"), exp) + tm.assert_frame_equal(grouped.agg({"time": "first"}), exp) + tm.assert_series_equal(grouped.time.first(), exp["time"]) + tm.assert_series_equal(grouped.time.agg("first"), exp["time"]) + + # test for `last` function + exp = df.loc[[0, 3, 4, 7]].set_index("class") + grouped = df.groupby("class") + tm.assert_frame_equal(grouped.last(), exp) + tm.assert_frame_equal(grouped.agg("last"), exp) + tm.assert_frame_equal(grouped.agg({"time": "last"}), exp) + tm.assert_series_equal(grouped.time.last(), exp["time"]) + tm.assert_series_equal(grouped.time.agg("last"), exp["time"]) + + # count + exp = pd.Series([2, 2, 2, 2], index=Index(list("ABCD"), name="class"), name="time") + tm.assert_series_equal(grouped.time.agg(len), exp) + tm.assert_series_equal(grouped.time.size(), exp) + + exp = pd.Series([0, 1, 1, 2], index=Index(list("ABCD"), name="class"), name="time") + tm.assert_series_equal(grouped.time.count(), exp) + + +def test_agg_cast_results_dtypes(): + # similar to GH12821 + # xref #11444 + u = [dt.datetime(2015, x + 1, 1) for x in range(12)] + v = list("aaabbbbbbccd") + df = pd.DataFrame({"X": v, "Y": u}) + + result = df.groupby("X")["Y"].agg(len) + expected = df.groupby("X")["Y"].count() + tm.assert_series_equal(result, expected) + + +def test_aggregate_float64_no_int64(): + # see gh-11199 + df = DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 2, 4, 5], "c": [1, 2, 3, 4, 5]}) + + expected = DataFrame({"a": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5]) + expected.index.name = "b" + + result = df.groupby("b")[["a"]].mean() + tm.assert_frame_equal(result, expected) + + expected = DataFrame({"a": [1, 2.5, 4, 5], "c": [1, 2.5, 4, 5]}, index=[1, 2, 4, 5]) + expected.index.name = "b" + + result = df.groupby("b")[["a", "c"]].mean() + tm.assert_frame_equal(result, expected) + + +def test_aggregate_api_consistency(): + # GH 9052 + # make sure that the aggregates via dict + # are consistent + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": np.random.randn(8) + 1.0, + "D": np.arange(8), + } + ) + + grouped = df.groupby(["A", "B"]) + c_mean = grouped["C"].mean() + c_sum = grouped["C"].sum() + d_mean = grouped["D"].mean() + d_sum = grouped["D"].sum() + + result = grouped["D"].agg(["sum", "mean"]) + expected = pd.concat([d_sum, d_mean], axis=1) + expected.columns = ["sum", "mean"] + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped.agg([np.sum, np.mean]) + expected = pd.concat([c_sum, c_mean, d_sum, d_mean], axis=1) + expected.columns = MultiIndex.from_product([["C", "D"], ["sum", "mean"]]) + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped[["D", "C"]].agg([np.sum, np.mean]) + expected = pd.concat([d_sum, d_mean, c_sum, c_mean], axis=1) + expected.columns = MultiIndex.from_product([["D", "C"], ["sum", "mean"]]) + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped.agg({"C": "mean", "D": "sum"}) + expected = pd.concat([d_sum, c_mean], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) + + result = grouped.agg({"C": ["mean", "sum"], "D": ["mean", "sum"]}) + expected = pd.concat([c_mean, c_sum, d_mean, d_sum], axis=1) + expected.columns = MultiIndex.from_product([["C", "D"], ["mean", "sum"]]) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + grouped[["D", "C"]].agg({"r": np.sum, "r2": np.mean}) + + +def test_agg_dict_renaming_deprecation(): + # 15931 + df = pd.DataFrame({"A": [1, 1, 1, 2, 2], "B": range(5), "C": range(5)}) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + df.groupby("A").agg( + {"B": {"foo": ["sum", "max"]}, "C": {"bar": ["count", "min"]}} + ) + + with pytest.raises(SpecificationError, match=msg): + df.groupby("A")[["B", "C"]].agg({"ma": "max"}) + + with pytest.raises(SpecificationError, match=msg): + df.groupby("A").B.agg({"foo": "count"}) + + +def test_agg_compat(): + # GH 12334 + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": np.random.randn(8) + 1.0, + "D": np.arange(8), + } + ) + + g = df.groupby(["A", "B"]) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + g["D"].agg({"C": ["sum", "std"]}) + + with pytest.raises(SpecificationError, match=msg): + g["D"].agg({"C": "sum", "D": "std"}) + + +def test_agg_nested_dicts(): + # API change for disallowing these types of nested dicts + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": np.random.randn(8) + 1.0, + "D": np.arange(8), + } + ) + + g = df.groupby(["A", "B"]) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + g.aggregate({"r1": {"C": ["mean", "sum"]}, "r2": {"D": ["mean", "sum"]}}) + + with pytest.raises(SpecificationError, match=msg): + g.agg({"C": {"ra": ["mean", "std"]}, "D": {"rb": ["mean", "std"]}}) + + # same name as the original column + # GH9052 + with pytest.raises(SpecificationError, match=msg): + g["D"].agg({"result1": np.sum, "result2": np.mean}) + + with pytest.raises(SpecificationError, match=msg): + g["D"].agg({"D": np.sum, "result2": np.mean}) + + +def test_agg_item_by_item_raise_typeerror(): + df = DataFrame(np.random.randint(10, size=(20, 10))) + + def raiseException(df): + pprint_thing("----------------------------------------") + pprint_thing(df.to_string()) + raise TypeError("test") + + with pytest.raises(TypeError, match="test"): + df.groupby(0).agg(raiseException) + + +def test_series_agg_multikey(): + ts = tm.makeTimeSeries() + grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) + + result = grouped.agg(np.sum) + expected = grouped.sum() + tm.assert_series_equal(result, expected) + + +def test_series_agg_multi_pure_python(): + data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + def bad(x): + assert len(x.values.base) > 0 + return "foo" + + result = data.groupby(["A", "B"]).agg(bad) + expected = data.groupby(["A", "B"]).agg(lambda x: "foo") + tm.assert_frame_equal(result, expected) + + +def test_agg_consistency(): + # agg with ([]) and () not consistent + # GH 6715 + def P1(a): + return np.percentile(a.dropna(), q=1) + + df = DataFrame( + { + "col1": [1, 2, 3, 4], + "col2": [10, 25, 26, 31], + "date": [ + dt.date(2013, 2, 10), + dt.date(2013, 2, 10), + dt.date(2013, 2, 11), + dt.date(2013, 2, 11), + ], + } + ) + + g = df.groupby("date") + + expected = g.agg([P1]) + expected.columns = expected.columns.levels[0] + + result = g.agg(P1) + tm.assert_frame_equal(result, expected) + + +def test_agg_callables(): + # GH 7929 + df = DataFrame({"foo": [1, 2], "bar": [3, 4]}).astype(np.int64) + + class fn_class: + def __call__(self, x): + return sum(x) + + equiv_callables = [ + sum, + np.sum, + lambda x: sum(x), + lambda x: x.sum(), + partial(sum), + fn_class(), + ] + + expected = df.groupby("foo").agg(sum) + for ecall in equiv_callables: + result = df.groupby("foo").agg(ecall) + tm.assert_frame_equal(result, expected) + + +def test_agg_over_numpy_arrays(): + # GH 3788 + df = pd.DataFrame( + [ + [1, np.array([10, 20, 30])], + [1, np.array([40, 50, 60])], + [2, np.array([20, 30, 40])], + ], + columns=["category", "arraydata"], + ) + result = df.groupby("category").agg(sum) + + expected_data = [[np.array([50, 70, 90])], [np.array([20, 30, 40])]] + expected_index = pd.Index([1, 2], name="category") + expected_column = ["arraydata"] + expected = pd.DataFrame( + expected_data, index=expected_index, columns=expected_column + ) + + tm.assert_frame_equal(result, expected) + + +def test_agg_tzaware_non_datetime_result(): + # discussed in GH#29589, fixed in GH#29641, operating on tzaware values + # with function that is not dtype-preserving + dti = pd.date_range("2012-01-01", periods=4, tz="UTC") + df = pd.DataFrame({"a": [0, 0, 1, 1], "b": dti}) + gb = df.groupby("a") + + # Case that _does_ preserve the dtype + result = gb["b"].agg(lambda x: x.iloc[0]) + expected = pd.Series(dti[::2], name="b") + expected.index.name = "a" + tm.assert_series_equal(result, expected) + + # Cases that do _not_ preserve the dtype + result = gb["b"].agg(lambda x: x.iloc[0].year) + expected = pd.Series([2012, 2012], name="b") + expected.index.name = "a" + tm.assert_series_equal(result, expected) + + result = gb["b"].agg(lambda x: x.iloc[-1] - x.iloc[0]) + expected = pd.Series([pd.Timedelta(days=1), pd.Timedelta(days=1)], name="b") + expected.index.name = "a" + tm.assert_series_equal(result, expected) + + +def test_agg_timezone_round_trip(): + # GH 15426 + ts = pd.Timestamp("2016-01-01 12:00:00", tz="US/Pacific") + df = pd.DataFrame( + {"a": 1, "b": [ts + dt.timedelta(minutes=nn) for nn in range(10)]} + ) + + result1 = df.groupby("a")["b"].agg(np.min).iloc[0] + result2 = df.groupby("a")["b"].agg(lambda x: np.min(x)).iloc[0] + result3 = df.groupby("a")["b"].min().iloc[0] + + assert result1 == ts + assert result2 == ts + assert result3 == ts + + dates = [ + pd.Timestamp(f"2016-01-0{i:d} 12:00:00", tz="US/Pacific") for i in range(1, 5) + ] + df = pd.DataFrame({"A": ["a", "b"] * 2, "B": dates}) + grouped = df.groupby("A") + + ts = df["B"].iloc[0] + assert ts == grouped.nth(0)["B"].iloc[0] + assert ts == grouped.head(1)["B"].iloc[0] + assert ts == grouped.first()["B"].iloc[0] + + # GH#27110 applying iloc should return a DataFrame + assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 0] + + ts = df["B"].iloc[2] + assert ts == grouped.last()["B"].iloc[0] + + # GH#27110 applying iloc should return a DataFrame + assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 0] + + +def test_sum_uint64_overflow(): + # see gh-14758 + # Convert to uint64 and don't overflow + df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], dtype=object) + df = df + 9223372036854775807 + + index = pd.Index( + [9223372036854775808, 9223372036854775810, 9223372036854775812], dtype=np.uint64 + ) + expected = pd.DataFrame( + {1: [9223372036854775809, 9223372036854775811, 9223372036854775813]}, + index=index, + ) + + expected.index.name = 0 + result = df.groupby(0).sum() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "structure, expected", + [ + (tuple, pd.DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})), + (list, pd.DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})), + ( + lambda x: tuple(x), + pd.DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}}), + ), + ( + lambda x: list(x), + pd.DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}}), + ), + ], +) +def test_agg_structs_dataframe(structure, expected): + df = pd.DataFrame( + {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]} + ) + + result = df.groupby(["A", "B"]).aggregate(structure) + expected.index.names = ["A", "B"] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "structure, expected", + [ + (tuple, pd.Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")), + (list, pd.Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")), + (lambda x: tuple(x), pd.Series([(1, 1, 1), (3, 4, 4)], index=[1, 3], name="C")), + (lambda x: list(x), pd.Series([[1, 1, 1], [3, 4, 4]], index=[1, 3], name="C")), + ], +) +def test_agg_structs_series(structure, expected): + # Issue #18079 + df = pd.DataFrame( + {"A": [1, 1, 1, 3, 3, 3], "B": [1, 1, 1, 4, 4, 4], "C": [1, 1, 1, 3, 4, 4]} + ) + + result = df.groupby("A")["C"].aggregate(structure) + expected.index.name = "A" + tm.assert_series_equal(result, expected) + + +def test_agg_category_nansum(observed): + categories = ["a", "b", "c"] + df = pd.DataFrame( + {"A": pd.Categorical(["a", "a", "b"], categories=categories), "B": [1, 2, 3]} + ) + result = df.groupby("A", observed=observed).B.agg(np.nansum) + expected = pd.Series( + [3, 3, 0], + index=pd.CategoricalIndex(["a", "b", "c"], categories=categories, name="A"), + name="B", + ) + if observed: + expected = expected[expected != 0] + tm.assert_series_equal(result, expected) + + +def test_agg_list_like_func(): + # GH 18473 + df = pd.DataFrame( + {"A": [str(x) for x in range(3)], "B": [str(x) for x in range(3)]} + ) + grouped = df.groupby("A", as_index=False, sort=False) + result = grouped.agg({"B": lambda x: list(x)}) + expected = pd.DataFrame( + {"A": [str(x) for x in range(3)], "B": [[str(x)] for x in range(3)]} + ) + tm.assert_frame_equal(result, expected) + + +def test_agg_lambda_with_timezone(): + # GH 23683 + df = pd.DataFrame( + { + "tag": [1, 1], + "date": [ + pd.Timestamp("2018-01-01", tz="UTC"), + pd.Timestamp("2018-01-02", tz="UTC"), + ], + } + ) + result = df.groupby("tag").agg({"date": lambda e: e.head(1)}) + expected = pd.DataFrame( + [pd.Timestamp("2018-01-01", tz="UTC")], + index=pd.Index([1], name="tag"), + columns=["date"], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "err_cls", + [ + NotImplementedError, + RuntimeError, + KeyError, + IndexError, + OSError, + ValueError, + ArithmeticError, + AttributeError, + ], +) +def test_groupby_agg_err_catching(err_cls): + # make sure we suppress anything other than TypeError or AssertionError + # in _python_agg_general + + # Use a non-standard EA to make sure we don't go down ndarray paths + from pandas.tests.extension.decimal.array import DecimalArray, make_data, to_decimal + + data = make_data()[:5] + df = pd.DataFrame( + {"id1": [0, 0, 0, 1, 1], "id2": [0, 1, 0, 1, 1], "decimals": DecimalArray(data)} + ) + + expected = pd.Series(to_decimal([data[0], data[3]])) + + def weird_func(x): + # weird function that raise something other than TypeError or IndexError + # in _python_agg_general + if len(x) == 0: + raise err_cls + return x.iloc[0] + + result = df["decimals"].groupby(df["id1"]).agg(weird_func) + tm.assert_series_equal(result, expected, check_names=False) diff --git a/pandas/tests/groupby/conftest.py b/pandas/tests/groupby/conftest.py new file mode 100644 index 00000000..8901af7a --- /dev/null +++ b/pandas/tests/groupby/conftest.py @@ -0,0 +1,124 @@ +import numpy as np +import pytest + +from pandas import DataFrame, MultiIndex +import pandas._testing as tm +from pandas.core.groupby.base import reduction_kernels, transformation_kernels + + +@pytest.fixture +def mframe(): + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + return DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) + + +@pytest.fixture +def df(): + return DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + + +@pytest.fixture +def ts(): + return tm.makeTimeSeries() + + +@pytest.fixture +def tsd(): + return tm.getTimeSeriesData() + + +@pytest.fixture +def tsframe(tsd): + return DataFrame(tsd) + + +@pytest.fixture +def df_mixed_floats(): + return DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.array(np.random.randn(8), dtype="float32"), + } + ) + + +@pytest.fixture +def three_group(): + return DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + +@pytest.fixture(params=sorted(reduction_kernels)) +def reduction_func(request): + """yields the string names of all groupby reduction functions, one at a time. + """ + return request.param + + +@pytest.fixture(params=transformation_kernels) +def transformation_func(request): + """yields the string names of all groupby transformation functions.""" + return request.param + + +@pytest.fixture(params=sorted(reduction_kernels) + sorted(transformation_kernels)) +def groupby_func(request): + """yields both aggregation and transformation functions.""" + return request.param diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py new file mode 100644 index 00000000..4a879e50 --- /dev/null +++ b/pandas/tests/groupby/test_apply.py @@ -0,0 +1,811 @@ +from datetime import datetime +from io import StringIO + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series, bdate_range +import pandas._testing as tm + + +def test_apply_issues(): + # GH 5788 + + s = """2011.05.16,00:00,1.40893 +2011.05.16,01:00,1.40760 +2011.05.16,02:00,1.40750 +2011.05.16,03:00,1.40649 +2011.05.17,02:00,1.40893 +2011.05.17,03:00,1.40760 +2011.05.17,04:00,1.40750 +2011.05.17,05:00,1.40649 +2011.05.18,02:00,1.40893 +2011.05.18,03:00,1.40760 +2011.05.18,04:00,1.40750 +2011.05.18,05:00,1.40649""" + + df = pd.read_csv( + StringIO(s), + header=None, + names=["date", "time", "value"], + parse_dates=[["date", "time"]], + ) + df = df.set_index("date_time") + + expected = df.groupby(df.index.date).idxmax() + result = df.groupby(df.index.date).apply(lambda x: x.idxmax()) + tm.assert_frame_equal(result, expected) + + # GH 5789 + # don't auto coerce dates + df = pd.read_csv(StringIO(s), header=None, names=["date", "time", "value"]) + exp_idx = pd.Index( + ["2011.05.16", "2011.05.17", "2011.05.18"], dtype=object, name="date" + ) + expected = Series(["00:00", "02:00", "02:00"], index=exp_idx) + result = df.groupby("date").apply(lambda x: x["time"][x["value"].idxmax()]) + tm.assert_series_equal(result, expected) + + +def test_apply_trivial(): + # GH 20066 + # trivial apply: ignore input and return a constant dataframe. + df = pd.DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + expected = pd.concat([df.iloc[1:], df.iloc[1:]], axis=1, keys=["float64", "object"]) + result = df.groupby([str(x) for x in df.dtypes], axis=1).apply( + lambda x: df.iloc[1:] + ) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.xfail( + reason="GH#20066; function passed into apply " + "returns a DataFrame with the same index " + "as the one to create GroupBy object." +) +def test_apply_trivial_fail(): + # GH 20066 + # trivial apply fails if the constant dataframe has the same index + # with the one used to create GroupBy object. + df = pd.DataFrame( + {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=["key", "data"], + ) + expected = pd.concat([df, df], axis=1, keys=["float64", "object"]) + result = df.groupby([str(x) for x in df.dtypes], axis=1).apply(lambda x: df) + + tm.assert_frame_equal(result, expected) + + +def test_fast_apply(): + # make sure that fast apply is correctly called + # rather than raising any kind of error + # otherwise the python path will be callsed + # which slows things down + N = 1000 + labels = np.random.randint(0, 2000, size=N) + labels2 = np.random.randint(0, 3, size=N) + df = DataFrame( + { + "key": labels, + "key2": labels2, + "value1": np.random.randn(N), + "value2": ["foo", "bar", "baz", "qux"] * (N // 4), + } + ) + + def f(g): + return 1 + + g = df.groupby(["key", "key2"]) + + grouper = g.grouper + + splitter = grouper._get_splitter(g._selected_obj, axis=g.axis) + group_keys = grouper._get_group_keys() + + values, mutated = splitter.fast_apply(f, group_keys) + + assert not mutated + + +@pytest.mark.parametrize( + "df, group_names", + [ + (DataFrame({"a": [1, 1, 1, 2, 3], "b": ["a", "a", "a", "b", "c"]}), [1, 2, 3]), + (DataFrame({"a": [0, 0, 1, 1], "b": [0, 1, 0, 1]}), [0, 1]), + (DataFrame({"a": [1]}), [1]), + (DataFrame({"a": [1, 1, 1, 2, 2, 1, 1, 2], "b": range(8)}), [1, 2]), + (DataFrame({"a": [1, 2, 3, 1, 2, 3], "two": [4, 5, 6, 7, 8, 9]}), [1, 2, 3]), + ( + DataFrame( + { + "a": list("aaabbbcccc"), + "B": [3, 4, 3, 6, 5, 2, 1, 9, 5, 4], + "C": [4, 0, 2, 2, 2, 7, 8, 6, 2, 8], + } + ), + ["a", "b", "c"], + ), + (DataFrame([[1, 2, 3], [2, 2, 3]], columns=["a", "b", "c"]), [1, 2]), + ], + ids=[ + "GH2936", + "GH7739 & GH10519", + "GH10519", + "GH2656", + "GH12155", + "GH20084", + "GH21417", + ], +) +def test_group_apply_once_per_group(df, group_names): + # GH2936, GH7739, GH10519, GH2656, GH12155, GH20084, GH21417 + + # This test should ensure that a function is only evaluated + # once per group. Previously the function has been evaluated twice + # on the first group to check if the Cython index slider is safe to use + # This test ensures that the side effect (append to list) is only triggered + # once per group + + names = [] + # cannot parameterize over the functions since they need external + # `names` to detect side effects + + def f_copy(group): + # this takes the fast apply path + names.append(group.name) + return group.copy() + + def f_nocopy(group): + # this takes the slow apply path + names.append(group.name) + return group + + def f_scalar(group): + # GH7739, GH2656 + names.append(group.name) + return 0 + + def f_none(group): + # GH10519, GH12155, GH21417 + names.append(group.name) + return None + + def f_constant_df(group): + # GH2936, GH20084 + names.append(group.name) + return DataFrame({"a": [1], "b": [1]}) + + for func in [f_copy, f_nocopy, f_scalar, f_none, f_constant_df]: + del names[:] + + df.groupby("a").apply(func) + assert names == group_names + + +def test_apply_with_mixed_dtype(): + # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 + df = DataFrame( + { + "foo1": np.random.randn(6), + "foo2": ["one", "two", "two", "three", "one", "two"], + } + ) + result = df.apply(lambda x: x, axis=1).dtypes + expected = df.dtypes + tm.assert_series_equal(result, expected) + + # GH 3610 incorrect dtype conversion with as_index=False + df = DataFrame({"c1": [1, 2, 6, 6, 8]}) + df["c2"] = df.c1 / 2.0 + result1 = df.groupby("c2").mean().reset_index().c2 + result2 = df.groupby("c2", as_index=False).mean().c2 + tm.assert_series_equal(result1, result2) + + +def test_groupby_as_index_apply(df): + # GH #4648 and #3417 + df = DataFrame( + { + "item_id": ["b", "b", "a", "c", "a", "b"], + "user_id": [1, 2, 1, 1, 3, 1], + "time": range(6), + } + ) + + g_as = df.groupby("user_id", as_index=True) + g_not_as = df.groupby("user_id", as_index=False) + + res_as = g_as.head(2).index + res_not_as = g_not_as.head(2).index + exp = Index([0, 1, 2, 4]) + tm.assert_index_equal(res_as, exp) + tm.assert_index_equal(res_not_as, exp) + + res_as_apply = g_as.apply(lambda x: x.head(2)).index + res_not_as_apply = g_not_as.apply(lambda x: x.head(2)).index + + # apply doesn't maintain the original ordering + # changed in GH5610 as the as_index=False returns a MI here + exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (2, 4)]) + tp = [(1, 0), (1, 2), (2, 1), (3, 4)] + exp_as_apply = MultiIndex.from_tuples(tp, names=["user_id", None]) + + tm.assert_index_equal(res_as_apply, exp_as_apply) + tm.assert_index_equal(res_not_as_apply, exp_not_as_apply) + + ind = Index(list("abcde")) + df = DataFrame([[1, 2], [2, 3], [1, 4], [1, 5], [2, 6]], index=ind) + res = df.groupby(0, as_index=False).apply(lambda x: x).index + tm.assert_index_equal(res, ind) + + +def test_apply_concat_preserve_names(three_group): + grouped = three_group.groupby(["A", "B"]) + + def desc(group): + result = group.describe() + result.index.name = "stat" + return result + + def desc2(group): + result = group.describe() + result.index.name = "stat" + result = result[: len(group)] + # weirdo + return result + + def desc3(group): + result = group.describe() + + # names are different + result.index.name = f"stat_{len(group):d}" + + result = result[: len(group)] + # weirdo + return result + + result = grouped.apply(desc) + assert result.index.names == ("A", "B", "stat") + + result2 = grouped.apply(desc2) + assert result2.index.names == ("A", "B", "stat") + + result3 = grouped.apply(desc3) + assert result3.index.names == ("A", "B", None) + + +def test_apply_series_to_frame(): + def f(piece): + with np.errstate(invalid="ignore"): + logged = np.log(piece) + return DataFrame( + {"value": piece, "demeaned": piece - piece.mean(), "logged": logged} + ) + + dr = bdate_range("1/1/2000", periods=100) + ts = Series(np.random.randn(100), index=dr) + + grouped = ts.groupby(lambda x: x.month) + result = grouped.apply(f) + + assert isinstance(result, DataFrame) + tm.assert_index_equal(result.index, ts.index) + + +def test_apply_series_yield_constant(df): + result = df.groupby(["A", "B"])["C"].apply(len) + assert result.index.names[:2] == ("A", "B") + + +def test_apply_frame_yield_constant(df): + # GH13568 + result = df.groupby(["A", "B"]).apply(len) + assert isinstance(result, Series) + assert result.name is None + + result = df.groupby(["A", "B"])[["C", "D"]].apply(len) + assert isinstance(result, Series) + assert result.name is None + + +def test_apply_frame_to_series(df): + grouped = df.groupby(["A", "B"]) + result = grouped.apply(len) + expected = grouped.count()["C"] + tm.assert_index_equal(result.index, expected.index) + tm.assert_numpy_array_equal(result.values, expected.values) + + +def test_apply_frame_concat_series(): + def trans(group): + return group.groupby("B")["C"].sum().sort_values()[:2] + + def trans2(group): + grouped = group.groupby(df.reindex(group.index)["B"]) + return grouped.sum().sort_values()[:2] + + df = DataFrame( + { + "A": np.random.randint(0, 5, 1000), + "B": np.random.randint(0, 5, 1000), + "C": np.random.randn(1000), + } + ) + + result = df.groupby("A").apply(trans) + exp = df.groupby("A")["C"].apply(trans2) + tm.assert_series_equal(result, exp, check_names=False) + assert result.name == "C" + + +def test_apply_transform(ts): + grouped = ts.groupby(lambda x: x.month) + result = grouped.apply(lambda x: x * 2) + expected = grouped.transform(lambda x: x * 2) + tm.assert_series_equal(result, expected) + + +def test_apply_multikey_corner(tsframe): + grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) + + def f(group): + return group.sort_values("A")[-5:] + + result = grouped.apply(f) + for key, group in grouped: + tm.assert_frame_equal(result.loc[key], f(group)) + + +def test_apply_chunk_view(): + # Low level tinkering could be unsafe, make sure not + df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)}) + + result = df.groupby("key", group_keys=False).apply(lambda x: x[:2]) + expected = df.take([0, 1, 3, 4, 6, 7]) + tm.assert_frame_equal(result, expected) + + +def test_apply_no_name_column_conflict(): + df = DataFrame( + { + "name": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2], + "name2": [0, 0, 0, 1, 1, 1, 0, 0, 1, 1], + "value": range(9, -1, -1), + } + ) + + # it works! #2605 + grouped = df.groupby(["name", "name2"]) + grouped.apply(lambda x: x.sort_values("value", inplace=True)) + + +def test_apply_typecast_fail(): + df = DataFrame( + { + "d": [1.0, 1.0, 1.0, 2.0, 2.0, 2.0], + "c": np.tile(["a", "b", "c"], 2), + "v": np.arange(1.0, 7.0), + } + ) + + def f(group): + v = group["v"] + group["v2"] = (v - v.min()) / (v.max() - v.min()) + return group + + result = df.groupby("d").apply(f) + + expected = df.copy() + expected["v2"] = np.tile([0.0, 0.5, 1], 2) + + tm.assert_frame_equal(result, expected) + + +def test_apply_multiindex_fail(): + index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]]) + df = DataFrame( + { + "d": [1.0, 1.0, 1.0, 2.0, 2.0, 2.0], + "c": np.tile(["a", "b", "c"], 2), + "v": np.arange(1.0, 7.0), + }, + index=index, + ) + + def f(group): + v = group["v"] + group["v2"] = (v - v.min()) / (v.max() - v.min()) + return group + + result = df.groupby("d").apply(f) + + expected = df.copy() + expected["v2"] = np.tile([0.0, 0.5, 1], 2) + + tm.assert_frame_equal(result, expected) + + +def test_apply_corner(tsframe): + result = tsframe.groupby(lambda x: x.year).apply(lambda x: x * 2) + expected = tsframe * 2 + tm.assert_frame_equal(result, expected) + + +def test_apply_without_copy(): + # GH 5545 + # returning a non-copy in an applied function fails + + data = DataFrame( + { + "id_field": [100, 100, 200, 300], + "category": ["a", "b", "c", "c"], + "value": [1, 2, 3, 4], + } + ) + + def filt1(x): + if x.shape[0] == 1: + return x.copy() + else: + return x[x.category == "c"] + + def filt2(x): + if x.shape[0] == 1: + return x + else: + return x[x.category == "c"] + + expected = data.groupby("id_field").apply(filt1) + result = data.groupby("id_field").apply(filt2) + tm.assert_frame_equal(result, expected) + + +def test_apply_corner_cases(): + # #535, can't use sliding iterator + + N = 1000 + labels = np.random.randint(0, 100, size=N) + df = DataFrame( + { + "key": labels, + "value1": np.random.randn(N), + "value2": ["foo", "bar", "baz", "qux"] * (N // 4), + } + ) + + grouped = df.groupby("key") + + def f(g): + g["value3"] = g["value1"] * 2 + return g + + result = grouped.apply(f) + assert "value3" in result + + +def test_apply_numeric_coercion_when_datetime(): + # In the past, group-by/apply operations have been over-eager + # in converting dtypes to numeric, in the presence of datetime + # columns. Various GH issues were filed, the reproductions + # for which are here. + + # GH 15670 + df = pd.DataFrame( + {"Number": [1, 2], "Date": ["2017-03-02"] * 2, "Str": ["foo", "inf"]} + ) + expected = df.groupby(["Number"]).apply(lambda x: x.iloc[0]) + df.Date = pd.to_datetime(df.Date) + result = df.groupby(["Number"]).apply(lambda x: x.iloc[0]) + tm.assert_series_equal(result["Str"], expected["Str"]) + + # GH 15421 + df = pd.DataFrame( + {"A": [10, 20, 30], "B": ["foo", "3", "4"], "T": [pd.Timestamp("12:31:22")] * 3} + ) + + def get_B(g): + return g.iloc[0][["B"]] + + result = df.groupby("A").apply(get_B)["B"] + expected = df.B + expected.index = df.A + tm.assert_series_equal(result, expected) + + # GH 14423 + def predictions(tool): + out = pd.Series(index=["p1", "p2", "useTime"], dtype=object) + if "step1" in list(tool.State): + out["p1"] = str(tool[tool.State == "step1"].Machine.values[0]) + if "step2" in list(tool.State): + out["p2"] = str(tool[tool.State == "step2"].Machine.values[0]) + out["useTime"] = str(tool[tool.State == "step2"].oTime.values[0]) + return out + + df1 = pd.DataFrame( + { + "Key": ["B", "B", "A", "A"], + "State": ["step1", "step2", "step1", "step2"], + "oTime": ["", "2016-09-19 05:24:33", "", "2016-09-19 23:59:04"], + "Machine": ["23", "36L", "36R", "36R"], + } + ) + df2 = df1.copy() + df2.oTime = pd.to_datetime(df2.oTime) + expected = df1.groupby("Key").apply(predictions).p1 + result = df2.groupby("Key").apply(predictions).p1 + tm.assert_series_equal(expected, result) + + +def test_apply_aggregating_timedelta_and_datetime(): + # Regression test for GH 15562 + # The following groupby caused ValueErrors and IndexErrors pre 0.20.0 + + df = pd.DataFrame( + { + "clientid": ["A", "B", "C"], + "datetime": [np.datetime64("2017-02-01 00:00:00")] * 3, + } + ) + df["time_delta_zero"] = df.datetime - df.datetime + result = df.groupby("clientid").apply( + lambda ddf: pd.Series( + dict(clientid_age=ddf.time_delta_zero.min(), date=ddf.datetime.min()) + ) + ) + expected = pd.DataFrame( + { + "clientid": ["A", "B", "C"], + "clientid_age": [np.timedelta64(0, "D")] * 3, + "date": [np.datetime64("2017-02-01 00:00:00")] * 3, + } + ).set_index("clientid") + + tm.assert_frame_equal(result, expected) + + +def test_time_field_bug(): + # Test a fix for the following error related to GH issue 11324 When + # non-key fields in a group-by dataframe contained time-based fields + # that were not returned by the apply function, an exception would be + # raised. + + df = pd.DataFrame({"a": 1, "b": [datetime.now() for nn in range(10)]}) + + def func_with_no_date(batch): + return pd.Series({"c": 2}) + + def func_with_date(batch): + return pd.Series({"b": datetime(2015, 1, 1), "c": 2}) + + dfg_no_conversion = df.groupby(by=["a"]).apply(func_with_no_date) + dfg_no_conversion_expected = pd.DataFrame({"c": 2}, index=[1]) + dfg_no_conversion_expected.index.name = "a" + + dfg_conversion = df.groupby(by=["a"]).apply(func_with_date) + dfg_conversion_expected = pd.DataFrame( + {"b": datetime(2015, 1, 1), "c": 2}, index=[1] + ) + dfg_conversion_expected.index.name = "a" + + tm.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected) + tm.assert_frame_equal(dfg_conversion, dfg_conversion_expected) + + +def test_gb_apply_list_of_unequal_len_arrays(): + + # GH1738 + df = DataFrame( + { + "group1": ["a", "a", "a", "b", "b", "b", "a", "a", "a", "b", "b", "b"], + "group2": ["c", "c", "d", "d", "d", "e", "c", "c", "d", "d", "d", "e"], + "weight": [1.1, 2, 3, 4, 5, 6, 2, 4, 6, 8, 1, 2], + "value": [7.1, 8, 9, 10, 11, 12, 8, 7, 6, 5, 4, 3], + } + ) + df = df.set_index(["group1", "group2"]) + df_grouped = df.groupby(level=["group1", "group2"], sort=True) + + def noddy(value, weight): + out = np.array(value * weight).repeat(3) + return out + + # the kernel function returns arrays of unequal length + # pandas sniffs the first one, sees it's an array and not + # a list, and assumed the rest are of equal length + # and so tries a vstack + + # don't die + df_grouped.apply(lambda x: noddy(x.value, x.weight)) + + +def test_groupby_apply_all_none(): + # Tests to make sure no errors if apply function returns all None + # values. Issue 9684. + test_df = DataFrame({"groups": [0, 0, 1, 1], "random_vars": [8, 7, 4, 5]}) + + def test_func(x): + pass + + result = test_df.groupby("groups").apply(test_func) + expected = DataFrame() + tm.assert_frame_equal(result, expected) + + +def test_groupby_apply_none_first(): + # GH 12824. Tests if apply returns None first. + test_df1 = DataFrame({"groups": [1, 1, 1, 2], "vars": [0, 1, 2, 3]}) + test_df2 = DataFrame({"groups": [1, 2, 2, 2], "vars": [0, 1, 2, 3]}) + + def test_func(x): + if x.shape[0] < 2: + return None + return x.iloc[[0, -1]] + + result1 = test_df1.groupby("groups").apply(test_func) + result2 = test_df2.groupby("groups").apply(test_func) + index1 = MultiIndex.from_arrays([[1, 1], [0, 2]], names=["groups", None]) + index2 = MultiIndex.from_arrays([[2, 2], [1, 3]], names=["groups", None]) + expected1 = DataFrame({"groups": [1, 1], "vars": [0, 2]}, index=index1) + expected2 = DataFrame({"groups": [2, 2], "vars": [1, 3]}, index=index2) + tm.assert_frame_equal(result1, expected1) + tm.assert_frame_equal(result2, expected2) + + +def test_groupby_apply_return_empty_chunk(): + # GH 22221: apply filter which returns some empty groups + df = pd.DataFrame(dict(value=[0, 1], group=["filled", "empty"])) + groups = df.groupby("group") + result = groups.apply(lambda group: group[group.value != 1]["value"]) + expected = pd.Series( + [0], + name="value", + index=MultiIndex.from_product( + [["empty", "filled"], [0]], names=["group", None] + ).drop("empty"), + ) + tm.assert_series_equal(result, expected) + + +def test_apply_with_mixed_types(): + # gh-20949 + df = pd.DataFrame({"A": "a a b".split(), "B": [1, 2, 3], "C": [4, 6, 5]}) + g = df.groupby("A") + + result = g.transform(lambda x: x / x.sum()) + expected = pd.DataFrame({"B": [1 / 3.0, 2 / 3.0, 1], "C": [0.4, 0.6, 1.0]}) + tm.assert_frame_equal(result, expected) + + result = g.apply(lambda x: x / x.sum()) + tm.assert_frame_equal(result, expected) + + +def test_func_returns_object(): + # GH 28652 + df = DataFrame({"a": [1, 2]}, index=pd.Int64Index([1, 2])) + result = df.groupby("a").apply(lambda g: g.index) + expected = Series( + [pd.Int64Index([1]), pd.Int64Index([2])], index=pd.Int64Index([1, 2], name="a") + ) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "group_column_dtlike", + [datetime.today(), datetime.today().date(), datetime.today().time()], +) +def test_apply_datetime_issue(group_column_dtlike): + # GH-28247 + # groupby-apply throws an error if one of the columns in the DataFrame + # is a datetime object and the column labels are different from + # standard int values in range(len(num_columns)) + + df = pd.DataFrame({"a": ["foo"], "b": [group_column_dtlike]}) + result = df.groupby("a").apply(lambda x: pd.Series(["spam"], index=[42])) + + expected = pd.DataFrame( + ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42] + ) + tm.assert_frame_equal(result, expected) + + +def test_apply_series_return_dataframe_groups(): + # GH 10078 + tdf = DataFrame( + { + "day": { + 0: pd.Timestamp("2015-02-24 00:00:00"), + 1: pd.Timestamp("2015-02-24 00:00:00"), + 2: pd.Timestamp("2015-02-24 00:00:00"), + 3: pd.Timestamp("2015-02-24 00:00:00"), + 4: pd.Timestamp("2015-02-24 00:00:00"), + }, + "userAgent": { + 0: "some UA string", + 1: "some UA string", + 2: "some UA string", + 3: "another UA string", + 4: "some UA string", + }, + "userId": { + 0: "17661101", + 1: "17661101", + 2: "17661101", + 3: "17661101", + 4: "17661101", + }, + } + ) + + def most_common_values(df): + return Series({c: s.value_counts().index[0] for c, s in df.iteritems()}) + + result = tdf.groupby("day").apply(most_common_values)["userId"] + expected = pd.Series( + ["17661101"], index=pd.DatetimeIndex(["2015-02-24"], name="day"), name="userId" + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("category", [False, True]) +def test_apply_multi_level_name(category): + # https://github.com/pandas-dev/pandas/issues/31068 + b = [1, 2] * 5 + if category: + b = pd.Categorical(b, categories=[1, 2, 3]) + df = pd.DataFrame( + {"A": np.arange(10), "B": b, "C": list(range(10)), "D": list(range(10))} + ).set_index(["A", "B"]) + result = df.groupby("B").apply(lambda x: x.sum()) + expected = pd.DataFrame( + {"C": [20, 25], "D": [20, 25]}, index=pd.Index([1, 2], name="B") + ) + tm.assert_frame_equal(result, expected) + assert df.index.names == ["A", "B"] + + +@pytest.mark.parametrize( + "index", + [ + pd.CategoricalIndex(list("abc")), + pd.interval_range(0, 3), + pd.period_range("2020", periods=3, freq="D"), + pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), + ], +) +def test_apply_index_has_complex_internals(index): + # GH 31248 + df = DataFrame({"group": [1, 1, 2], "value": [0, 1, 0]}, index=index) + result = df.groupby("group").apply(lambda x: x) + tm.assert_frame_equal(result, df) + + +@pytest.mark.parametrize( + "function, expected_values", + [ + (lambda x: x.index.to_list(), [[0, 1], [2, 3]]), + (lambda x: set(x.index.to_list()), [{0, 1}, {2, 3}]), + (lambda x: tuple(x.index.to_list()), [(0, 1), (2, 3)]), + ( + lambda x: {n: i for (n, i) in enumerate(x.index.to_list())}, + [{0: 0, 1: 1}, {0: 2, 1: 3}], + ), + ( + lambda x: [{n: i} for (n, i) in enumerate(x.index.to_list())], + [[{0: 0}, {1: 1}], [{0: 2}, {1: 3}]], + ), + ], +) +def test_apply_function_returns_non_pandas_non_scalar(function, expected_values): + # GH 31441 + df = pd.DataFrame(["A", "A", "B", "B"], columns=["groups"]) + result = df.groupby("groups").apply(function) + expected = pd.Series(expected_values, index=pd.Index(["A", "B"], name="groups")) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py new file mode 100644 index 00000000..15ce28e0 --- /dev/null +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -0,0 +1,172 @@ +import numpy as np +import pytest + +from pandas._libs import groupby, lib, reduction as libreduction + +from pandas.core.dtypes.common import ensure_int64 + +import pandas as pd +from pandas import Index, Series, isna +import pandas._testing as tm + + +def test_series_grouper(): + obj = Series(np.random.randn(10)) + dummy = obj[:0] + + labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) + + grouper = libreduction.SeriesGrouper(obj, np.mean, labels, 2, dummy) + result, counts = grouper.get_result() + + expected = np.array([obj[3:6].mean(), obj[6:].mean()]) + tm.assert_almost_equal(result, expected) + + exp_counts = np.array([3, 4], dtype=np.int64) + tm.assert_almost_equal(counts, exp_counts) + + +def test_series_grouper_requires_nonempty_raises(): + # GH#29500 + obj = Series(np.random.randn(10)) + dummy = obj[:0] + labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) + + with pytest.raises(ValueError, match="SeriesGrouper requires non-empty `series`"): + libreduction.SeriesGrouper(dummy, np.mean, labels, 2, dummy) + + +def test_series_bin_grouper(): + obj = Series(np.random.randn(10)) + dummy = obj[:0] + + bins = np.array([3, 6]) + + grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins, dummy) + result, counts = grouper.get_result() + + expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()]) + tm.assert_almost_equal(result, expected) + + exp_counts = np.array([3, 3, 4], dtype=np.int64) + tm.assert_almost_equal(counts, exp_counts) + + +def assert_block_lengths(x): + assert len(x) == len(x._data.blocks[0].mgr_locs) + return 0 + + +def cumsum_max(x): + x.cumsum().max() + return 0 + + +@pytest.mark.parametrize("func", [cumsum_max, assert_block_lengths]) +def test_mgr_locs_updated(func): + # https://github.com/pandas-dev/pandas/issues/31802 + # Some operations may require creating new blocks, which requires + # valid mgr_locs + df = pd.DataFrame({"A": ["a", "a", "a"], "B": ["a", "b", "b"], "C": [1, 1, 1]}) + result = df.groupby(["A", "B"]).agg(func) + expected = pd.DataFrame( + {"C": [0, 0]}, + index=pd.MultiIndex.from_product([["a"], ["a", "b"]], names=["A", "B"]), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "binner,closed,expected", + [ + ( + np.array([0, 3, 6, 9], dtype=np.int64), + "left", + np.array([2, 5, 6], dtype=np.int64), + ), + ( + np.array([0, 3, 6, 9], dtype=np.int64), + "right", + np.array([3, 6, 6], dtype=np.int64), + ), + (np.array([0, 3, 6], dtype=np.int64), "left", np.array([2, 5], dtype=np.int64)), + ( + np.array([0, 3, 6], dtype=np.int64), + "right", + np.array([3, 6], dtype=np.int64), + ), + ], +) +def test_generate_bins(binner, closed, expected): + values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) + result = lib.generate_bins_dt64(values, binner, closed=closed) + tm.assert_numpy_array_equal(result, expected) + + +def test_group_ohlc(): + def _check(dtype): + obj = np.array(np.random.randn(20), dtype=dtype) + + bins = np.array([6, 12, 20]) + out = np.zeros((3, 4), dtype) + counts = np.zeros(len(out), dtype=np.int64) + labels = ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins]))) + + func = getattr(groupby, f"group_ohlc_{dtype}") + func(out, counts, obj[:, None], labels) + + def _ohlc(group): + if isna(group).all(): + return np.repeat(np.nan, 4) + return [group[0], group.max(), group.min(), group[-1]] + + expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])]) + + tm.assert_almost_equal(out, expected) + tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64)) + + obj[:6] = np.nan + func(out, counts, obj[:, None], labels) + expected[0] = np.nan + tm.assert_almost_equal(out, expected) + + _check("float32") + _check("float64") + + +class TestMoments: + pass + + +class TestReducer: + def test_int_index(self): + arr = np.random.randn(100, 4) + + msg = "Must pass either dummy and labels, or neither" + # we must pass either both labels and dummy, or neither + with pytest.raises(ValueError, match=msg): + libreduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4))) + + with pytest.raises(ValueError, match=msg): + libreduction.compute_reduction( + arr, np.sum, axis=1, labels=Index(np.arange(100)) + ) + + dummy = Series(0.0, index=np.arange(100)) + result = libreduction.compute_reduction( + arr, np.sum, dummy=dummy, labels=Index(np.arange(4)) + ) + expected = arr.sum(0) + tm.assert_almost_equal(result, expected) + + dummy = Series(0.0, index=np.arange(4)) + result = libreduction.compute_reduction( + arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100)) + ) + expected = arr.sum(1) + tm.assert_almost_equal(result, expected) + + result = libreduction.compute_reduction( + arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100)) + ) + tm.assert_almost_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py new file mode 100644 index 00000000..1c2de8c8 --- /dev/null +++ b/pandas/tests/groupby/test_categorical.py @@ -0,0 +1,1378 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas.compat import PY37 + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + Series, + qcut, +) +import pandas._testing as tm + + +def cartesian_product_for_groupers(result, args, names): + """ Reindex to a cartesian production for the groupers, + preserving the nature (Categorical) of each grouper """ + + def f(a): + if isinstance(a, (CategoricalIndex, Categorical)): + categories = a.categories + a = Categorical.from_codes( + np.arange(len(categories)), categories=categories, ordered=a.ordered + ) + return a + + index = MultiIndex.from_product(map(f, args), names=names) + return result.reindex(index).sort_index() + + +def test_apply_use_categorical_name(df): + cats = qcut(df.C, 4) + + def get_stats(group): + return { + "min": group.min(), + "max": group.max(), + "count": group.count(), + "mean": group.mean(), + } + + result = df.groupby(cats, observed=False).D.apply(get_stats) + assert result.index.names[0] == "C" + + +def test_basic(): + + cats = Categorical( + ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ) + data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats}) + + exp_index = CategoricalIndex(list("abcd"), name="b", ordered=True) + expected = DataFrame({"a": [1, 2, 4, np.nan]}, index=exp_index) + result = data.groupby("b", observed=False).mean() + tm.assert_frame_equal(result, expected) + + cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True) + cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True) + df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + + # single grouper + gb = df.groupby("A", observed=False) + exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True) + expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)}) + result = gb.sum() + tm.assert_frame_equal(result, expected) + + # GH 8623 + x = DataFrame( + [[1, "John P. Doe"], [2, "Jane Dove"], [1, "John P. Doe"]], + columns=["person_id", "person_name"], + ) + x["person_name"] = Categorical(x.person_name) + + g = x.groupby(["person_id"], observed=False) + result = g.transform(lambda x: x) + tm.assert_frame_equal(result, x[["person_name"]]) + + result = x.drop_duplicates("person_name") + expected = x.iloc[[0, 1]] + tm.assert_frame_equal(result, expected) + + def f(x): + return x.drop_duplicates("person_name").iloc[0] + + result = g.apply(f) + expected = x.iloc[[0, 1]].copy() + expected.index = Index([1, 2], name="person_id") + expected["person_name"] = expected["person_name"].astype("object") + tm.assert_frame_equal(result, expected) + + # GH 9921 + # Monotonic + df = DataFrame({"a": [5, 15, 25]}) + c = pd.cut(df.a, bins=[0, 10, 20, 30, 40]) + + result = df.a.groupby(c, observed=False).transform(sum) + tm.assert_series_equal(result, df["a"]) + + tm.assert_series_equal( + df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df["a"] + ) + tm.assert_frame_equal(df.groupby(c, observed=False).transform(sum), df[["a"]]) + tm.assert_frame_equal( + df.groupby(c, observed=False).transform(lambda xs: np.max(xs)), df[["a"]] + ) + + # Filter + tm.assert_series_equal(df.a.groupby(c, observed=False).filter(np.all), df["a"]) + tm.assert_frame_equal(df.groupby(c, observed=False).filter(np.all), df) + + # Non-monotonic + df = DataFrame({"a": [5, 15, 25, -5]}) + c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40]) + + result = df.a.groupby(c, observed=False).transform(sum) + tm.assert_series_equal(result, df["a"]) + + tm.assert_series_equal( + df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df["a"] + ) + tm.assert_frame_equal(df.groupby(c, observed=False).transform(sum), df[["a"]]) + tm.assert_frame_equal( + df.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df[["a"]] + ) + + # GH 9603 + df = DataFrame({"a": [1, 0, 0, 0]}) + c = pd.cut(df.a, [0, 1, 2, 3, 4], labels=Categorical(list("abcd"))) + result = df.groupby(c, observed=False).apply(len) + + exp_index = CategoricalIndex(c.values.categories, ordered=c.values.ordered) + expected = Series([1, 0, 0, 0], index=exp_index) + expected.index.name = "a" + tm.assert_series_equal(result, expected) + + # more basic + levels = ["foo", "bar", "baz", "qux"] + codes = np.random.randint(0, 4, size=100) + + cats = Categorical.from_codes(codes, levels, ordered=True) + + data = DataFrame(np.random.randn(100, 4)) + + result = data.groupby(cats, observed=False).mean() + + expected = data.groupby(np.asarray(cats), observed=False).mean() + exp_idx = CategoricalIndex(levels, categories=cats.categories, ordered=True) + expected = expected.reindex(exp_idx) + + tm.assert_frame_equal(result, expected) + + grouped = data.groupby(cats, observed=False) + desc_result = grouped.describe() + + idx = cats.codes.argsort() + ord_labels = np.asarray(cats).take(idx) + ord_data = data.take(idx) + + exp_cats = Categorical( + ord_labels, ordered=True, categories=["foo", "bar", "baz", "qux"] + ) + expected = ord_data.groupby(exp_cats, sort=False, observed=False).describe() + tm.assert_frame_equal(desc_result, expected) + + # GH 10460 + expc = Categorical.from_codes(np.arange(4).repeat(8), levels, ordered=True) + exp = CategoricalIndex(expc) + tm.assert_index_equal((desc_result.stack().index.get_level_values(0)), exp) + exp = Index(["count", "mean", "std", "min", "25%", "50%", "75%", "max"] * 4) + tm.assert_index_equal((desc_result.stack().index.get_level_values(1)), exp) + + +def test_level_get_group(observed): + # GH15155 + df = DataFrame( + data=np.arange(2, 22, 2), + index=MultiIndex( + levels=[CategoricalIndex(["a", "b"]), range(10)], + codes=[[0] * 5 + [1] * 5, range(10)], + names=["Index1", "Index2"], + ), + ) + g = df.groupby(level=["Index1"], observed=observed) + + # expected should equal test.loc[["a"]] + # GH15166 + expected = DataFrame( + data=np.arange(2, 12, 2), + index=MultiIndex( + levels=[CategoricalIndex(["a", "b"]), range(5)], + codes=[[0] * 5, range(5)], + names=["Index1", "Index2"], + ), + ) + result = g.get_group("a") + + tm.assert_frame_equal(result, expected) + + +# GH#21636 flaky on py37; may be related to older numpy, see discussion +# https://github.com/MacPython/pandas-wheels/pull/64 +@pytest.mark.xfail(PY37, reason="Flaky, GH-27902", strict=False) +@pytest.mark.parametrize("ordered", [True, False]) +def test_apply(ordered): + # GH 10138 + + dense = Categorical(list("abc"), ordered=ordered) + + # 'b' is in the categories but not in the list + missing = Categorical(list("aaa"), categories=["a", "b"], ordered=ordered) + values = np.arange(len(dense)) + df = DataFrame({"missing": missing, "dense": dense, "values": values}) + grouped = df.groupby(["missing", "dense"], observed=True) + + # missing category 'b' should still exist in the output index + idx = MultiIndex.from_arrays([missing, dense], names=["missing", "dense"]) + expected = DataFrame([0, 1, 2.0], index=idx, columns=["values"]) + + # GH#21636 tracking down the xfail, in some builds np.mean(df.loc[[0]]) + # is coming back as Series([0., 1., 0.], index=["missing", "dense", "values"]) + # when we expect Series(0., index=["values"]) + result = grouped.apply(lambda x: np.mean(x)) + tm.assert_frame_equal(result, expected) + + # we coerce back to ints + expected = expected.astype("int") + result = grouped.mean() + tm.assert_frame_equal(result, expected) + + result = grouped.agg(np.mean) + tm.assert_frame_equal(result, expected) + + # but for transform we should still get back the original index + idx = MultiIndex.from_arrays([missing, dense], names=["missing", "dense"]) + expected = Series(1, index=idx) + result = grouped.apply(lambda x: 1) + tm.assert_series_equal(result, expected) + + +def test_observed(observed): + # multiple groupers, don't re-expand the output space + # of the grouper + # gh-14942 (implement) + # gh-10132 (back-compat) + # gh-8138 (back-compat) + # gh-8869 + + cat1 = Categorical(["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True) + cat2 = Categorical(["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True) + df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + df["C"] = ["foo", "bar"] * 2 + + # multiple groupers with a non-cat + gb = df.groupby(["A", "B", "C"], observed=observed) + exp_index = MultiIndex.from_arrays( + [cat1, cat2, ["foo", "bar"] * 2], names=["A", "B", "C"] + ) + expected = DataFrame({"values": Series([1, 2, 3, 4], index=exp_index)}).sort_index() + result = gb.sum() + if not observed: + expected = cartesian_product_for_groupers( + expected, [cat1, cat2, ["foo", "bar"]], list("ABC") + ) + + tm.assert_frame_equal(result, expected) + + gb = df.groupby(["A", "B"], observed=observed) + exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"]) + expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index) + result = gb.sum() + if not observed: + expected = cartesian_product_for_groupers(expected, [cat1, cat2], list("AB")) + + tm.assert_frame_equal(result, expected) + + # https://github.com/pandas-dev/pandas/issues/8138 + d = { + "cat": Categorical( + ["a", "b", "a", "b"], categories=["a", "b", "c"], ordered=True + ), + "ints": [1, 1, 2, 2], + "val": [10, 20, 30, 40], + } + df = DataFrame(d) + + # Grouping on a single column + groups_single_key = df.groupby("cat", observed=observed) + result = groups_single_key.mean() + + exp_index = CategoricalIndex( + list("ab"), name="cat", categories=list("abc"), ordered=True + ) + expected = DataFrame({"ints": [1.5, 1.5], "val": [20.0, 30]}, index=exp_index) + if not observed: + index = CategoricalIndex( + list("abc"), name="cat", categories=list("abc"), ordered=True + ) + expected = expected.reindex(index) + + tm.assert_frame_equal(result, expected) + + # Grouping on two columns + groups_double_key = df.groupby(["cat", "ints"], observed=observed) + result = groups_double_key.agg("mean") + expected = DataFrame( + { + "val": [10, 30, 20, 40], + "cat": Categorical( + ["a", "a", "b", "b"], categories=["a", "b", "c"], ordered=True + ), + "ints": [1, 2, 1, 2], + } + ).set_index(["cat", "ints"]) + if not observed: + expected = cartesian_product_for_groupers( + expected, [df.cat.values, [1, 2]], ["cat", "ints"] + ) + + tm.assert_frame_equal(result, expected) + + # GH 10132 + for key in [("a", 1), ("b", 2), ("b", 1), ("a", 2)]: + c, i = key + result = groups_double_key.get_group(key) + expected = df[(df.cat == c) & (df.ints == i)] + tm.assert_frame_equal(result, expected) + + # gh-8869 + # with as_index + d = { + "foo": [10, 8, 4, 8, 4, 1, 1], + "bar": [10, 20, 30, 40, 50, 60, 70], + "baz": ["d", "c", "e", "a", "a", "d", "c"], + } + df = DataFrame(d) + cat = pd.cut(df["foo"], np.linspace(0, 10, 3)) + df["range"] = cat + groups = df.groupby(["range", "baz"], as_index=False, observed=observed) + result = groups.agg("mean") + + groups2 = df.groupby(["range", "baz"], as_index=True, observed=observed) + expected = groups2.agg("mean").reset_index() + tm.assert_frame_equal(result, expected) + + +def test_observed_codes_remap(observed): + d = {"C1": [3, 3, 4, 5], "C2": [1, 2, 3, 4], "C3": [10, 100, 200, 34]} + df = DataFrame(d) + values = pd.cut(df["C1"], [1, 2, 3, 6]) + values.name = "cat" + groups_double_key = df.groupby([values, "C2"], observed=observed) + + idx = MultiIndex.from_arrays([values, [1, 2, 3, 4]], names=["cat", "C2"]) + expected = DataFrame({"C1": [3, 3, 4, 5], "C3": [10, 100, 200, 34]}, index=idx) + if not observed: + expected = cartesian_product_for_groupers( + expected, [values.values, [1, 2, 3, 4]], ["cat", "C2"] + ) + + result = groups_double_key.agg("mean") + tm.assert_frame_equal(result, expected) + + +def test_observed_perf(): + # we create a cartesian product, so this is + # non-performant if we don't use observed values + # gh-14942 + df = DataFrame( + { + "cat": np.random.randint(0, 255, size=30000), + "int_id": np.random.randint(0, 255, size=30000), + "other_id": np.random.randint(0, 10000, size=30000), + "foo": 0, + } + ) + df["cat"] = df.cat.astype(str).astype("category") + + grouped = df.groupby(["cat", "int_id", "other_id"], observed=True) + result = grouped.count() + assert result.index.levels[0].nunique() == df.cat.nunique() + assert result.index.levels[1].nunique() == df.int_id.nunique() + assert result.index.levels[2].nunique() == df.other_id.nunique() + + +def test_observed_groups(observed): + # gh-20583 + # test that we have the appropriate groups + + cat = Categorical(["a", "c", "a"], categories=["a", "b", "c"]) + df = DataFrame({"cat": cat, "vals": [1, 2, 3]}) + g = df.groupby("cat", observed=observed) + + result = g.groups + if observed: + expected = {"a": Index([0, 2], dtype="int64"), "c": Index([1], dtype="int64")} + else: + expected = { + "a": Index([0, 2], dtype="int64"), + "b": Index([], dtype="int64"), + "c": Index([1], dtype="int64"), + } + + tm.assert_dict_equal(result, expected) + + +def test_observed_groups_with_nan(observed): + # GH 24740 + df = DataFrame( + { + "cat": Categorical(["a", np.nan, "a"], categories=["a", "b", "d"]), + "vals": [1, 2, 3], + } + ) + g = df.groupby("cat", observed=observed) + result = g.groups + if observed: + expected = {"a": Index([0, 2], dtype="int64")} + else: + expected = { + "a": Index([0, 2], dtype="int64"), + "b": Index([], dtype="int64"), + "d": Index([], dtype="int64"), + } + tm.assert_dict_equal(result, expected) + + +def test_observed_nth(): + # GH 26385 + cat = pd.Categorical(["a", np.nan, np.nan], categories=["a", "b", "c"]) + ser = pd.Series([1, 2, 3]) + df = pd.DataFrame({"cat": cat, "ser": ser}) + + result = df.groupby("cat", observed=False)["ser"].nth(0) + + index = pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]) + expected = pd.Series([1, np.nan, np.nan], index=index, name="ser") + expected.index.name = "cat" + + tm.assert_series_equal(result, expected) + + +def test_dataframe_categorical_with_nan(observed): + # GH 21151 + s1 = Categorical([np.nan, "a", np.nan, "a"], categories=["a", "b", "c"]) + s2 = Series([1, 2, 3, 4]) + df = DataFrame({"s1": s1, "s2": s2}) + result = df.groupby("s1", observed=observed).first().reset_index() + if observed: + expected = DataFrame( + {"s1": Categorical(["a"], categories=["a", "b", "c"]), "s2": [2]} + ) + else: + expected = DataFrame( + { + "s1": Categorical(["a", "b", "c"], categories=["a", "b", "c"]), + "s2": [2, np.nan, np.nan], + } + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("ordered", [True, False]) +@pytest.mark.parametrize("observed", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort): + # GH 25871: Fix groupby sorting on ordered Categoricals + # GH 25167: Groupby with observed=True doesn't sort + + # Build a dataframe with cat having one unobserved category ('missing'), + # and a Series with identical values + label = Categorical( + ["d", "a", "b", "a", "d", "b"], + categories=["a", "b", "missing", "d"], + ordered=ordered, + ) + val = Series(["d", "a", "b", "a", "d", "b"]) + df = DataFrame({"label": label, "val": val}) + + # aggregate on the Categorical + result = df.groupby("label", observed=observed, sort=sort)["val"].aggregate("first") + + # If ordering works, we expect index labels equal to aggregation results, + # except for 'observed=False': label 'missing' has aggregation None + label = Series(result.index.array, dtype="object") + aggr = Series(result.array) + if not observed: + aggr[aggr.isna()] = "missing" + if not all(label == aggr): + msg = ( + f"Labels and aggregation results not consistently sorted\n" + + "for (ordered={ordered}, observed={observed}, sort={sort})\n" + + "Result:\n{result}" + ) + assert False, msg + + +def test_datetime(): + # GH9049: ensure backward compatibility + levels = pd.date_range("2014-01-01", periods=4) + codes = np.random.randint(0, 4, size=100) + + cats = Categorical.from_codes(codes, levels, ordered=True) + + data = DataFrame(np.random.randn(100, 4)) + result = data.groupby(cats, observed=False).mean() + + expected = data.groupby(np.asarray(cats), observed=False).mean() + expected = expected.reindex(levels) + expected.index = CategoricalIndex( + expected.index, categories=expected.index, ordered=True + ) + + tm.assert_frame_equal(result, expected) + + grouped = data.groupby(cats, observed=False) + desc_result = grouped.describe() + + idx = cats.codes.argsort() + ord_labels = cats.take(idx) + ord_data = data.take(idx) + expected = ord_data.groupby(ord_labels, observed=False).describe() + tm.assert_frame_equal(desc_result, expected) + tm.assert_index_equal(desc_result.index, expected.index) + tm.assert_index_equal( + desc_result.index.get_level_values(0), expected.index.get_level_values(0) + ) + + # GH 10460 + expc = Categorical.from_codes(np.arange(4).repeat(8), levels, ordered=True) + exp = CategoricalIndex(expc) + tm.assert_index_equal((desc_result.stack().index.get_level_values(0)), exp) + exp = Index(["count", "mean", "std", "min", "25%", "50%", "75%", "max"] * 4) + tm.assert_index_equal((desc_result.stack().index.get_level_values(1)), exp) + + +def test_categorical_index(): + + s = np.random.RandomState(12345) + levels = ["foo", "bar", "baz", "qux"] + codes = s.randint(0, 4, size=20) + cats = Categorical.from_codes(codes, levels, ordered=True) + df = DataFrame(np.repeat(np.arange(20), 4).reshape(-1, 4), columns=list("abcd")) + df["cats"] = cats + + # with a cat index + result = df.set_index("cats").groupby(level=0, observed=False).sum() + expected = df[list("abcd")].groupby(cats.codes, observed=False).sum() + expected.index = CategoricalIndex( + Categorical.from_codes([0, 1, 2, 3], levels, ordered=True), name="cats" + ) + tm.assert_frame_equal(result, expected) + + # with a cat column, should produce a cat index + result = df.groupby("cats", observed=False).sum() + expected = df[list("abcd")].groupby(cats.codes, observed=False).sum() + expected.index = CategoricalIndex( + Categorical.from_codes([0, 1, 2, 3], levels, ordered=True), name="cats" + ) + tm.assert_frame_equal(result, expected) + + +def test_describe_categorical_columns(): + # GH 11558 + cats = CategoricalIndex( + ["qux", "foo", "baz", "bar"], + categories=["foo", "bar", "baz", "qux"], + ordered=True, + ) + df = DataFrame(np.random.randn(20, 4), columns=cats) + result = df.groupby([1, 2, 3, 4] * 5).describe() + + tm.assert_index_equal(result.stack().columns, cats) + tm.assert_categorical_equal(result.stack().columns.values, cats.values) + + +def test_unstack_categorical(): + # GH11558 (example is taken from the original issue) + df = DataFrame( + {"a": range(10), "medium": ["A", "B"] * 5, "artist": list("XYXXY") * 2} + ) + df["medium"] = df["medium"].astype("category") + + gcat = df.groupby(["artist", "medium"], observed=False)["a"].count().unstack() + result = gcat.describe() + + exp_columns = CategoricalIndex(["A", "B"], ordered=False, name="medium") + tm.assert_index_equal(result.columns, exp_columns) + tm.assert_categorical_equal(result.columns.values, exp_columns.values) + + result = gcat["A"] + gcat["B"] + expected = Series([6, 4], index=Index(["X", "Y"], name="artist")) + tm.assert_series_equal(result, expected) + + +def test_bins_unequal_len(): + # GH3011 + series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4]) + bins = pd.cut(series.dropna().values, 4) + + # len(bins) != len(series) here + with pytest.raises(ValueError): + series.groupby(bins).mean() + + +def test_as_index(): + # GH13204 + df = DataFrame( + { + "cat": Categorical([1, 2, 2], [1, 2, 3]), + "A": [10, 11, 11], + "B": [101, 102, 103], + } + ) + result = df.groupby(["cat", "A"], as_index=False, observed=True).sum() + expected = DataFrame( + { + "cat": Categorical([1, 2], categories=df.cat.cat.categories), + "A": [10, 11], + "B": [101, 205], + }, + columns=["cat", "A", "B"], + ) + tm.assert_frame_equal(result, expected) + + # function grouper + f = lambda r: df.loc[r, "A"] + result = df.groupby(["cat", f], as_index=False, observed=True).sum() + expected = DataFrame( + { + "cat": Categorical([1, 2], categories=df.cat.cat.categories), + "A": [10, 22], + "B": [101, 205], + }, + columns=["cat", "A", "B"], + ) + tm.assert_frame_equal(result, expected) + + # another not in-axis grouper (conflicting names in index) + s = Series(["a", "b", "b"], name="cat") + result = df.groupby(["cat", s], as_index=False, observed=True).sum() + tm.assert_frame_equal(result, expected) + + # is original index dropped? + group_columns = ["cat", "A"] + expected = DataFrame( + { + "cat": Categorical([1, 2], categories=df.cat.cat.categories), + "A": [10, 11], + "B": [101, 205], + }, + columns=["cat", "A", "B"], + ) + + for name in [None, "X", "B"]: + df.index = Index(list("abc"), name=name) + result = df.groupby(group_columns, as_index=False, observed=True).sum() + + tm.assert_frame_equal(result, expected) + + +def test_preserve_categories(): + # GH-13179 + categories = list("abc") + + # ordered=True + df = DataFrame({"A": Categorical(list("ba"), categories=categories, ordered=True)}) + index = CategoricalIndex(categories, categories, ordered=True, name="A") + tm.assert_index_equal( + df.groupby("A", sort=True, observed=False).first().index, index + ) + tm.assert_index_equal( + df.groupby("A", sort=False, observed=False).first().index, index + ) + + # ordered=False + df = DataFrame({"A": Categorical(list("ba"), categories=categories, ordered=False)}) + sort_index = CategoricalIndex(categories, categories, ordered=False, name="A") + nosort_index = CategoricalIndex(list("bac"), list("bac"), ordered=False, name="A") + tm.assert_index_equal( + df.groupby("A", sort=True, observed=False).first().index, sort_index + ) + tm.assert_index_equal( + df.groupby("A", sort=False, observed=False).first().index, nosort_index + ) + + +def test_preserve_categorical_dtype(): + # GH13743, GH13854 + df = DataFrame( + { + "A": [1, 2, 1, 1, 2], + "B": [10, 16, 22, 28, 34], + "C1": Categorical(list("abaab"), categories=list("bac"), ordered=False), + "C2": Categorical(list("abaab"), categories=list("bac"), ordered=True), + } + ) + # single grouper + exp_full = DataFrame( + { + "A": [2.0, 1.0, np.nan], + "B": [25.0, 20.0, np.nan], + "C1": Categorical(list("bac"), categories=list("bac"), ordered=False), + "C2": Categorical(list("bac"), categories=list("bac"), ordered=True), + } + ) + for col in ["C1", "C2"]: + result1 = df.groupby(by=col, as_index=False, observed=False).mean() + result2 = df.groupby(by=col, as_index=True, observed=False).mean().reset_index() + expected = exp_full.reindex(columns=result1.columns) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + + +@pytest.mark.parametrize( + "func, values", + [ + ("first", ["second", "first"]), + ("last", ["fourth", "third"]), + ("min", ["fourth", "first"]), + ("max", ["second", "third"]), + ], +) +def test_preserve_on_ordered_ops(func, values): + # gh-18502 + # preserve the categoricals on ops + c = pd.Categorical(["first", "second", "third", "fourth"], ordered=True) + df = pd.DataFrame({"payload": [-1, -2, -1, -2], "col": c}) + g = df.groupby("payload") + result = getattr(g, func)() + expected = pd.DataFrame( + {"payload": [-2, -1], "col": pd.Series(values, dtype=c.dtype)} + ).set_index("payload") + tm.assert_frame_equal(result, expected) + + +def test_categorical_no_compress(): + data = Series(np.random.randn(9)) + + codes = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]) + cats = Categorical.from_codes(codes, [0, 1, 2], ordered=True) + + result = data.groupby(cats, observed=False).mean() + exp = data.groupby(codes, observed=False).mean() + + exp.index = CategoricalIndex( + exp.index, categories=cats.categories, ordered=cats.ordered + ) + tm.assert_series_equal(result, exp) + + codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3]) + cats = Categorical.from_codes(codes, [0, 1, 2, 3], ordered=True) + + result = data.groupby(cats, observed=False).mean() + exp = data.groupby(codes, observed=False).mean().reindex(cats.categories) + exp.index = CategoricalIndex( + exp.index, categories=cats.categories, ordered=cats.ordered + ) + tm.assert_series_equal(result, exp) + + cats = Categorical( + ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ) + data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats}) + + result = data.groupby("b", observed=False).mean() + result = result["a"].values + exp = np.array([1, 2, 4, np.nan]) + tm.assert_numpy_array_equal(result, exp) + + +def test_groupby_empty_with_category(): + # GH-9614 + # test fix for when group by on None resulted in + # coercion of dtype categorical -> float + df = pd.DataFrame( + {"A": [None] * 3, "B": pd.Categorical(["train", "train", "test"])} + ) + result = df.groupby("A").first()["B"] + expected = pd.Series( + pd.Categorical([], categories=["test", "train"]), + index=pd.Series([], dtype="object", name="A"), + name="B", + ) + tm.assert_series_equal(result, expected) + + +def test_sort(): + + # https://stackoverflow.com/questions/23814368/sorting-pandas- + # categorical-labels-after-groupby + # This should result in a properly sorted Series so that the plot + # has a sorted x axis + # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') + + df = DataFrame({"value": np.random.randint(0, 10000, 100)}) + labels = [f"{i} - {i+499}" for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=["value"], ascending=True) + df["value_group"] = pd.cut( + df.value, range(0, 10500, 500), right=False, labels=cat_labels + ) + + res = df.groupby(["value_group"], observed=False)["value_group"].count() + exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] + exp.index = CategoricalIndex(exp.index, name=exp.index.name) + tm.assert_series_equal(res, exp) + + +def test_sort2(): + # dataframe groupby sort was being ignored # GH 8868 + df = DataFrame( + [ + ["(7.5, 10]", 10, 10], + ["(7.5, 10]", 8, 20], + ["(2.5, 5]", 5, 30], + ["(5, 7.5]", 6, 40], + ["(2.5, 5]", 4, 50], + ["(0, 2.5]", 1, 60], + ["(5, 7.5]", 7, 70], + ], + columns=["range", "foo", "bar"], + ) + df["range"] = Categorical(df["range"], ordered=True) + index = CategoricalIndex( + ["(0, 2.5]", "(2.5, 5]", "(5, 7.5]", "(7.5, 10]"], name="range", ordered=True + ) + expected_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=["foo", "bar"], index=index + ) + + col = "range" + result_sort = df.groupby(col, sort=True, observed=False).first() + tm.assert_frame_equal(result_sort, expected_sort) + + # when categories is ordered, group is ordered by category's order + expected_sort = result_sort + result_sort = df.groupby(col, sort=False, observed=False).first() + tm.assert_frame_equal(result_sort, expected_sort) + + df["range"] = Categorical(df["range"], ordered=False) + index = CategoricalIndex( + ["(0, 2.5]", "(2.5, 5]", "(5, 7.5]", "(7.5, 10]"], name="range" + ) + expected_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=["foo", "bar"], index=index + ) + + index = CategoricalIndex( + ["(7.5, 10]", "(2.5, 5]", "(5, 7.5]", "(0, 2.5]"], + categories=["(7.5, 10]", "(2.5, 5]", "(5, 7.5]", "(0, 2.5]"], + name="range", + ) + expected_nosort = DataFrame( + [[10, 10], [5, 30], [6, 40], [1, 60]], index=index, columns=["foo", "bar"] + ) + + col = "range" + + # this is an unordered categorical, but we allow this #### + result_sort = df.groupby(col, sort=True, observed=False).first() + tm.assert_frame_equal(result_sort, expected_sort) + + result_nosort = df.groupby(col, sort=False, observed=False).first() + tm.assert_frame_equal(result_nosort, expected_nosort) + + +def test_sort_datetimelike(): + # GH10505 + + # use same data as test_groupby_sort_categorical, which category is + # corresponding to datetime.month + df = DataFrame( + { + "dt": [ + datetime(2011, 7, 1), + datetime(2011, 7, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 2, 1), + datetime(2011, 1, 1), + datetime(2011, 5, 1), + ], + "foo": [10, 8, 5, 6, 4, 1, 7], + "bar": [10, 20, 30, 40, 50, 60, 70], + }, + columns=["dt", "foo", "bar"], + ) + + # ordered=True + df["dt"] = Categorical(df["dt"], ordered=True) + index = [ + datetime(2011, 1, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 7, 1), + ] + result_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=["foo", "bar"] + ) + result_sort.index = CategoricalIndex(index, name="dt", ordered=True) + + index = [ + datetime(2011, 7, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 1, 1), + ] + result_nosort = DataFrame( + [[10, 10], [5, 30], [6, 40], [1, 60]], columns=["foo", "bar"] + ) + result_nosort.index = CategoricalIndex( + index, categories=index, name="dt", ordered=True + ) + + col = "dt" + tm.assert_frame_equal( + result_sort, df.groupby(col, sort=True, observed=False).first() + ) + + # when categories is ordered, group is ordered by category's order + tm.assert_frame_equal( + result_sort, df.groupby(col, sort=False, observed=False).first() + ) + + # ordered = False + df["dt"] = Categorical(df["dt"], ordered=False) + index = [ + datetime(2011, 1, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 7, 1), + ] + result_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=["foo", "bar"] + ) + result_sort.index = CategoricalIndex(index, name="dt") + + index = [ + datetime(2011, 7, 1), + datetime(2011, 2, 1), + datetime(2011, 5, 1), + datetime(2011, 1, 1), + ] + result_nosort = DataFrame( + [[10, 10], [5, 30], [6, 40], [1, 60]], columns=["foo", "bar"] + ) + result_nosort.index = CategoricalIndex(index, categories=index, name="dt") + + col = "dt" + tm.assert_frame_equal( + result_sort, df.groupby(col, sort=True, observed=False).first() + ) + tm.assert_frame_equal( + result_nosort, df.groupby(col, sort=False, observed=False).first() + ) + + +def test_empty_sum(): + # https://github.com/pandas-dev/pandas/issues/18678 + df = DataFrame( + {"A": Categorical(["a", "a", "b"], categories=["a", "b", "c"]), "B": [1, 2, 1]} + ) + expected_idx = CategoricalIndex(["a", "b", "c"], name="A") + + # 0 by default + result = df.groupby("A", observed=False).B.sum() + expected = Series([3, 1, 0], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count=0 + result = df.groupby("A", observed=False).B.sum(min_count=0) + expected = Series([3, 1, 0], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count=1 + result = df.groupby("A", observed=False).B.sum(min_count=1) + expected = Series([3, 1, np.nan], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count>1 + result = df.groupby("A", observed=False).B.sum(min_count=2) + expected = Series([3, np.nan, np.nan], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + +def test_empty_prod(): + # https://github.com/pandas-dev/pandas/issues/18678 + df = DataFrame( + {"A": Categorical(["a", "a", "b"], categories=["a", "b", "c"]), "B": [1, 2, 1]} + ) + + expected_idx = CategoricalIndex(["a", "b", "c"], name="A") + + # 1 by default + result = df.groupby("A", observed=False).B.prod() + expected = Series([2, 1, 1], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count=0 + result = df.groupby("A", observed=False).B.prod(min_count=0) + expected = Series([2, 1, 1], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + # min_count=1 + result = df.groupby("A", observed=False).B.prod(min_count=1) + expected = Series([2, 1, np.nan], expected_idx, name="B") + tm.assert_series_equal(result, expected) + + +def test_groupby_multiindex_categorical_datetime(): + # https://github.com/pandas-dev/pandas/issues/21390 + + df = DataFrame( + { + "key1": Categorical(list("abcbabcba")), + "key2": Categorical( + list(pd.date_range("2018-06-01 00", freq="1T", periods=3)) * 3 + ), + "values": np.arange(9), + } + ) + result = df.groupby(["key1", "key2"]).mean() + + idx = MultiIndex.from_product( + [ + Categorical(["a", "b", "c"]), + Categorical(pd.date_range("2018-06-01 00", freq="1T", periods=3)), + ], + names=["key1", "key2"], + ) + expected = DataFrame({"values": [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "as_index, expected", + [ + ( + True, + Series( + index=MultiIndex.from_arrays( + [Series([1, 1, 2], dtype="category"), [1, 2, 2]], names=["a", "b"] + ), + data=[1, 2, 3], + name="x", + ), + ), + ( + False, + DataFrame( + { + "a": Series([1, 1, 2], dtype="category"), + "b": [1, 2, 2], + "x": [1, 2, 3], + } + ), + ), + ], +) +def test_groupby_agg_observed_true_single_column(as_index, expected): + # GH-23970 + df = DataFrame( + {"a": Series([1, 1, 2], dtype="category"), "b": [1, 2, 2], "x": [1, 2, 3]} + ) + + result = df.groupby(["a", "b"], as_index=as_index, observed=True)["x"].sum() + + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("fill_value", [None, np.nan, pd.NaT]) +def test_shift(fill_value): + ct = Categorical( + ["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False + ) + expected = Categorical( + [None, "a", "b", "c"], categories=["a", "b", "c", "d"], ordered=False + ) + res = ct.shift(1, fill_value=fill_value) + tm.assert_equal(res, expected) + + +@pytest.fixture +def df_cat(df): + """ + DataFrame with multiple categorical columns and a column of integers. + Shortened so as not to contain all possible combinations of categories. + Useful for testing `observed` kwarg functionality on GroupBy objects. + + Parameters + ---------- + df: DataFrame + Non-categorical, longer DataFrame from another fixture, used to derive + this one + + Returns + ------- + df_cat: DataFrame + """ + df_cat = df.copy()[:4] # leave out some groups + df_cat["A"] = df_cat["A"].astype("category") + df_cat["B"] = df_cat["B"].astype("category") + df_cat["C"] = Series([1, 2, 3, 4]) + df_cat = df_cat.drop(["D"], axis=1) + return df_cat + + +@pytest.mark.parametrize( + "operation, kwargs", [("agg", dict(dtype="category")), ("apply", dict())] +) +def test_seriesgroupby_observed_true(df_cat, operation, kwargs): + # GH 24880 + index = MultiIndex.from_frame( + DataFrame( + {"A": ["foo", "foo", "bar", "bar"], "B": ["one", "two", "one", "three"]}, + **kwargs, + ) + ) + expected = Series(data=[1, 3, 2, 4], index=index, name="C") + grouped = df_cat.groupby(["A", "B"], observed=True)["C"] + result = getattr(grouped, operation)(sum) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("operation", ["agg", "apply"]) +@pytest.mark.parametrize("observed", [False, None]) +def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation): + # GH 24880 + index, _ = MultiIndex.from_product( + [ + CategoricalIndex(["bar", "foo"], ordered=False), + CategoricalIndex(["one", "three", "two"], ordered=False), + ], + names=["A", "B"], + ).sortlevel() + + expected = Series(data=[2, 4, np.nan, 1, np.nan, 3], index=index, name="C") + grouped = df_cat.groupby(["A", "B"], observed=observed)["C"] + result = getattr(grouped, operation)(sum) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "observed, index, data", + [ + ( + True, + MultiIndex.from_tuples( + [ + ("foo", "one", "min"), + ("foo", "one", "max"), + ("foo", "two", "min"), + ("foo", "two", "max"), + ("bar", "one", "min"), + ("bar", "one", "max"), + ("bar", "three", "min"), + ("bar", "three", "max"), + ], + names=["A", "B", None], + ), + [1, 1, 3, 3, 2, 2, 4, 4], + ), + ( + False, + MultiIndex.from_product( + [ + CategoricalIndex(["bar", "foo"], ordered=False), + CategoricalIndex(["one", "three", "two"], ordered=False), + Index(["min", "max"]), + ], + names=["A", "B", None], + ), + [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3], + ), + ( + None, + MultiIndex.from_product( + [ + CategoricalIndex(["bar", "foo"], ordered=False), + CategoricalIndex(["one", "three", "two"], ordered=False), + Index(["min", "max"]), + ], + names=["A", "B", None], + ), + [2, 2, 4, 4, np.nan, np.nan, 1, 1, np.nan, np.nan, 3, 3], + ), + ], +) +def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data): + # GH 24880 + expected = Series(data=data, index=index, name="C") + result = df_cat.groupby(["A", "B"], observed=observed)["C"].apply( + lambda x: {"min": x.min(), "max": x.max()} + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_categorical_series_dataframe_consistent(df_cat): + # GH 20416 + expected = df_cat.groupby(["A", "B"])["C"].mean() + result = df_cat.groupby(["A", "B"]).mean()["C"] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("code", [([1, 0, 0]), ([0, 0, 0])]) +def test_groupby_categorical_axis_1(code): + # GH 13420 + df = DataFrame({"a": [1, 2, 3, 4], "b": [-1, -2, -3, -4], "c": [5, 6, 7, 8]}) + cat = pd.Categorical.from_codes(code, categories=list("abc")) + result = df.groupby(cat, axis=1).mean() + expected = df.T.groupby(cat, axis=0).mean().T + tm.assert_frame_equal(result, expected) + + +def test_groupby_cat_preserves_structure(observed, ordered_fixture): + # GH 28787 + df = DataFrame( + {"Name": Categorical(["Bob", "Greg"], ordered=ordered_fixture), "Item": [1, 2]}, + columns=["Name", "Item"], + ) + expected = df.copy() + + result = ( + df.groupby("Name", observed=observed) + .agg(pd.DataFrame.sum, skipna=True) + .reset_index() + ) + + tm.assert_frame_equal(result, expected) + + +def test_get_nonexistent_category(): + # Accessing a Category that is not in the dataframe + df = pd.DataFrame({"var": ["a", "a", "b", "b"], "val": range(4)}) + with pytest.raises(KeyError, match="'vau'"): + df.groupby("var").apply( + lambda rows: pd.DataFrame( + {"var": [rows.iloc[-1]["var"]], "val": [rows.iloc[-1]["vau"]]} + ) + ) + + +def test_series_groupby_on_2_categoricals_unobserved( + reduction_func: str, observed: bool +): + # GH 17605 + + if reduction_func == "ngroup": + pytest.skip("ngroup is not truly a reduction") + + df = pd.DataFrame( + { + "cat_1": pd.Categorical(list("AABB"), categories=list("ABCD")), + "cat_2": pd.Categorical(list("AB") * 2, categories=list("ABCD")), + "value": [0.1] * 4, + } + ) + args = {"nth": [0]}.get(reduction_func, []) + + expected_length = 4 if observed else 16 + + series_groupby = df.groupby(["cat_1", "cat_2"], observed=observed)["value"] + agg = getattr(series_groupby, reduction_func) + result = agg(*args) + + assert len(result) == expected_length + + +@pytest.mark.parametrize( + "func, zero_or_nan", + [ + ("all", np.NaN), + ("any", np.NaN), + ("count", 0), + ("first", np.NaN), + ("idxmax", np.NaN), + ("idxmin", np.NaN), + ("last", np.NaN), + ("mad", np.NaN), + ("max", np.NaN), + ("mean", np.NaN), + ("median", np.NaN), + ("min", np.NaN), + ("nth", np.NaN), + ("nunique", 0), + ("prod", np.NaN), + ("quantile", np.NaN), + ("sem", np.NaN), + ("size", 0), + ("skew", np.NaN), + ("std", np.NaN), + ("sum", np.NaN), + ("var", np.NaN), + ], +) +def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans(func, zero_or_nan): + # GH 17605 + # Tests whether the unobserved categories in the result contain 0 or NaN + df = pd.DataFrame( + { + "cat_1": pd.Categorical(list("AABB"), categories=list("ABC")), + "cat_2": pd.Categorical(list("AB") * 2, categories=list("ABC")), + "value": [0.1] * 4, + } + ) + unobserved = [tuple("AC"), tuple("BC"), tuple("CA"), tuple("CB"), tuple("CC")] + args = {"nth": [0]}.get(func, []) + + series_groupby = df.groupby(["cat_1", "cat_2"], observed=False)["value"] + agg = getattr(series_groupby, func) + result = agg(*args) + + for idx in unobserved: + val = result.loc[idx] + assert (pd.isna(zero_or_nan) and pd.isna(val)) or (val == zero_or_nan) + + # If we expect unobserved values to be zero, we also expect the dtype to be int + if zero_or_nan == 0: + assert np.issubdtype(result.dtype, np.integer) + + +def test_series_groupby_categorical_aggregation_getitem(): + # GH 8870 + d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]} + df = pd.DataFrame(d) + cat = pd.cut(df["foo"], np.linspace(0, 20, 5)) + df["range"] = cat + groups = df.groupby(["range", "baz"], as_index=True, sort=True) + result = groups["foo"].agg("mean") + expected = groups.agg("mean")["foo"] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "func, expected_values", + [(pd.Series.nunique, [1, 1, 2]), (pd.Series.count, [1, 2, 2])], +) +def test_groupby_agg_categorical_columns(func, expected_values): + # 31256 + df = pd.DataFrame( + { + "id": [0, 1, 2, 3, 4], + "groups": [0, 1, 1, 2, 2], + "value": pd.Categorical([0, 0, 0, 0, 1]), + } + ).set_index("id") + result = df.groupby("groups").agg(func) + + expected = pd.DataFrame( + {"value": expected_values}, index=pd.Index([0, 1, 2], name="groups"), + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_agg_non_numeric(): + df = pd.DataFrame( + {"A": pd.Categorical(["a", "a", "b"], categories=["a", "b", "c"])} + ) + expected = pd.DataFrame({"A": [2, 1]}, index=[1, 2]) + + result = df.groupby([1, 2, 1]).agg(pd.Series.nunique) + tm.assert_frame_equal(result, expected) + + result = df.groupby([1, 2, 1]).nunique() + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py new file mode 100644 index 00000000..b4239d7d --- /dev/null +++ b/pandas/tests/groupby/test_counting.py @@ -0,0 +1,222 @@ +from itertools import product + +import numpy as np +import pytest + +from pandas import DataFrame, MultiIndex, Period, Series, Timedelta, Timestamp +import pandas._testing as tm + + +class TestCounting: + def test_cumcount(self): + df = DataFrame([["a"], ["a"], ["a"], ["b"], ["a"]], columns=["A"]) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 1, 2, 0, 3]) + + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_empty(self): + ge = DataFrame().groupby(level=0) + se = Series(dtype=object).groupby(level=0) + + # edge case, as this is usually considered float + e = Series(dtype="int64") + + tm.assert_series_equal(e, ge.cumcount()) + tm.assert_series_equal(e, se.cumcount()) + + def test_cumcount_dupe_index(self): + df = DataFrame( + [["a"], ["a"], ["a"], ["b"], ["a"]], columns=["A"], index=[0] * 5 + ) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=[0] * 5) + + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_mi(self): + mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) + df = DataFrame([["a"], ["a"], ["a"], ["b"], ["a"]], columns=["A"], index=mi) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=mi) + + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) + + def test_cumcount_groupby_not_col(self): + df = DataFrame( + [["a"], ["a"], ["a"], ["b"], ["a"]], columns=["A"], index=[0] * 5 + ) + g = df.groupby([0, 0, 0, 1, 0]) + sg = g.A + + expected = Series([0, 1, 2, 0, 3], index=[0] * 5) + + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) + + def test_ngroup(self): + df = DataFrame({"A": list("aaaba")}) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 0, 0, 1, 0]) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_distinct(self): + df = DataFrame({"A": list("abcde")}) + g = df.groupby("A") + sg = g.A + + expected = Series(range(5), dtype="int64") + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_one_group(self): + df = DataFrame({"A": [0] * 5}) + g = df.groupby("A") + sg = g.A + + expected = Series([0] * 5) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_empty(self): + ge = DataFrame().groupby(level=0) + se = Series(dtype=object).groupby(level=0) + + # edge case, as this is usually considered float + e = Series(dtype="int64") + + tm.assert_series_equal(e, ge.ngroup()) + tm.assert_series_equal(e, se.ngroup()) + + def test_ngroup_series_matches_frame(self): + df = DataFrame({"A": list("aaaba")}) + s = Series(list("aaaba")) + + tm.assert_series_equal(df.groupby(s).ngroup(), s.groupby(s).ngroup()) + + def test_ngroup_dupe_index(self): + df = DataFrame({"A": list("aaaba")}, index=[0] * 5) + g = df.groupby("A") + sg = g.A + + expected = Series([0, 0, 0, 1, 0], index=[0] * 5) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_mi(self): + mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) + df = DataFrame({"A": list("aaaba")}, index=mi) + g = df.groupby("A") + sg = g.A + expected = Series([0, 0, 0, 1, 0], index=mi) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_groupby_not_col(self): + df = DataFrame({"A": list("aaaba")}, index=[0] * 5) + g = df.groupby([0, 0, 0, 1, 0]) + sg = g.A + + expected = Series([0, 0, 0, 1, 0], index=[0] * 5) + + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) + + def test_ngroup_descending(self): + df = DataFrame(["a", "a", "b", "a", "b"], columns=["A"]) + g = df.groupby(["A"]) + + ascending = Series([0, 0, 1, 0, 1]) + descending = Series([1, 1, 0, 1, 0]) + + tm.assert_series_equal(descending, (g.ngroups - 1) - ascending) + tm.assert_series_equal(ascending, g.ngroup(ascending=True)) + tm.assert_series_equal(descending, g.ngroup(ascending=False)) + + def test_ngroup_matches_cumcount(self): + # verify one manually-worked out case works + df = DataFrame( + [["a", "x"], ["a", "y"], ["b", "x"], ["a", "x"], ["b", "y"]], + columns=["A", "X"], + ) + g = df.groupby(["A", "X"]) + g_ngroup = g.ngroup() + g_cumcount = g.cumcount() + expected_ngroup = Series([0, 1, 2, 0, 3]) + expected_cumcount = Series([0, 0, 0, 1, 0]) + + tm.assert_series_equal(g_ngroup, expected_ngroup) + tm.assert_series_equal(g_cumcount, expected_cumcount) + + def test_ngroup_cumcount_pair(self): + # brute force comparison for all small series + for p in product(range(3), repeat=4): + df = DataFrame({"a": p}) + g = df.groupby(["a"]) + + order = sorted(set(p)) + ngroupd = [order.index(val) for val in p] + cumcounted = [p[:i].count(val) for i, val in enumerate(p)] + + tm.assert_series_equal(g.ngroup(), Series(ngroupd)) + tm.assert_series_equal(g.cumcount(), Series(cumcounted)) + + def test_ngroup_respects_groupby_order(self): + np.random.seed(0) + df = DataFrame({"a": np.random.choice(list("abcdef"), 100)}) + for sort_flag in (False, True): + g = df.groupby(["a"], sort=sort_flag) + df["group_id"] = -1 + df["group_index"] = -1 + + for i, (_, group) in enumerate(g): + df.loc[group.index, "group_id"] = i + for j, ind in enumerate(group.index): + df.loc[ind, "group_index"] = j + + tm.assert_series_equal(Series(df["group_id"].values), g.ngroup()) + tm.assert_series_equal(Series(df["group_index"].values), g.cumcount()) + + @pytest.mark.parametrize( + "datetimelike", + [ + [Timestamp(f"2016-05-{i:02d} 20:09:25+00:00") for i in range(1, 4)], + [Timestamp(f"2016-05-{i:02d} 20:09:25") for i in range(1, 4)], + [Timedelta(x, unit="h") for x in range(1, 4)], + [Period(freq="2W", year=2017, month=x) for x in range(1, 4)], + ], + ) + def test_count_with_datetimelike(self, datetimelike): + # test for #13393, where DataframeGroupBy.count() fails + # when counting a datetimelike column. + + df = DataFrame({"x": ["a", "a", "b"], "y": datetimelike}) + res = df.groupby("x").count() + expected = DataFrame({"y": [2, 1]}, index=["a", "b"]) + expected.index.name = "x" + tm.assert_frame_equal(expected, res) + + def test_count_with_only_nans_in_first_group(self): + # GH21956 + df = DataFrame({"A": [np.nan, np.nan], "B": ["a", "b"], "C": [1, 2]}) + result = df.groupby(["A", "B"]).C.count() + mi = MultiIndex(levels=[[], ["a", "b"]], codes=[[], []], names=["A", "B"]) + expected = Series([], index=mi, dtype=np.int64, name="C") + tm.assert_series_equal(result, expected, check_index_type=False) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py new file mode 100644 index 00000000..c16ad812 --- /dev/null +++ b/pandas/tests/groupby/test_filters.py @@ -0,0 +1,597 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, Timestamp +import pandas._testing as tm + + +def test_filter_series(): + s = pd.Series([1, 3, 20, 5, 22, 24, 7]) + expected_odd = pd.Series([1, 3, 5, 7], index=[0, 1, 3, 6]) + expected_even = pd.Series([20, 22, 24], index=[2, 4, 5]) + grouper = s.apply(lambda x: x % 2) + grouped = s.groupby(grouper) + tm.assert_series_equal(grouped.filter(lambda x: x.mean() < 10), expected_odd) + tm.assert_series_equal(grouped.filter(lambda x: x.mean() > 10), expected_even) + # Test dropna=False. + tm.assert_series_equal( + grouped.filter(lambda x: x.mean() < 10, dropna=False), + expected_odd.reindex(s.index), + ) + tm.assert_series_equal( + grouped.filter(lambda x: x.mean() > 10, dropna=False), + expected_even.reindex(s.index), + ) + + +def test_filter_single_column_df(): + df = pd.DataFrame([1, 3, 20, 5, 22, 24, 7]) + expected_odd = pd.DataFrame([1, 3, 5, 7], index=[0, 1, 3, 6]) + expected_even = pd.DataFrame([20, 22, 24], index=[2, 4, 5]) + grouper = df[0].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + tm.assert_frame_equal(grouped.filter(lambda x: x.mean() < 10), expected_odd) + tm.assert_frame_equal(grouped.filter(lambda x: x.mean() > 10), expected_even) + # Test dropna=False. + tm.assert_frame_equal( + grouped.filter(lambda x: x.mean() < 10, dropna=False), + expected_odd.reindex(df.index), + ) + tm.assert_frame_equal( + grouped.filter(lambda x: x.mean() > 10, dropna=False), + expected_even.reindex(df.index), + ) + + +def test_filter_multi_column_df(): + df = pd.DataFrame({"A": [1, 12, 12, 1], "B": [1, 1, 1, 1]}) + grouper = df["A"].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + expected = pd.DataFrame({"A": [12, 12], "B": [1, 1]}, index=[1, 2]) + tm.assert_frame_equal( + grouped.filter(lambda x: x["A"].sum() - x["B"].sum() > 10), expected + ) + + +def test_filter_mixed_df(): + df = pd.DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) + grouper = df["A"].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + expected = pd.DataFrame({"A": [12, 12], "B": ["b", "c"]}, index=[1, 2]) + tm.assert_frame_equal(grouped.filter(lambda x: x["A"].sum() > 10), expected) + + +def test_filter_out_all_groups(): + s = pd.Series([1, 3, 20, 5, 22, 24, 7]) + grouper = s.apply(lambda x: x % 2) + grouped = s.groupby(grouper) + tm.assert_series_equal(grouped.filter(lambda x: x.mean() > 1000), s[[]]) + df = pd.DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) + grouper = df["A"].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + tm.assert_frame_equal(grouped.filter(lambda x: x["A"].sum() > 1000), df.loc[[]]) + + +def test_filter_out_no_groups(): + s = pd.Series([1, 3, 20, 5, 22, 24, 7]) + grouper = s.apply(lambda x: x % 2) + grouped = s.groupby(grouper) + filtered = grouped.filter(lambda x: x.mean() > 0) + tm.assert_series_equal(filtered, s) + df = pd.DataFrame({"A": [1, 12, 12, 1], "B": "a b c d".split()}) + grouper = df["A"].apply(lambda x: x % 2) + grouped = df.groupby(grouper) + filtered = grouped.filter(lambda x: x["A"].mean() > 0) + tm.assert_frame_equal(filtered, df) + + +def test_filter_out_all_groups_in_df(): + # GH12768 + df = pd.DataFrame({"a": [1, 1, 2], "b": [1, 2, 0]}) + res = df.groupby("a") + res = res.filter(lambda x: x["b"].sum() > 5, dropna=False) + expected = pd.DataFrame({"a": [np.nan] * 3, "b": [np.nan] * 3}) + tm.assert_frame_equal(expected, res) + + df = pd.DataFrame({"a": [1, 1, 2], "b": [1, 2, 0]}) + res = df.groupby("a") + res = res.filter(lambda x: x["b"].sum() > 5, dropna=True) + expected = pd.DataFrame({"a": [], "b": []}, dtype="int64") + tm.assert_frame_equal(expected, res) + + +def test_filter_condition_raises(): + def raise_if_sum_is_zero(x): + if x.sum() == 0: + raise ValueError + else: + return x.sum() > 0 + + s = pd.Series([-1, 0, 1, 2]) + grouper = s.apply(lambda x: x % 2) + grouped = s.groupby(grouper) + msg = "the filter must return a boolean result" + with pytest.raises(TypeError, match=msg): + grouped.filter(raise_if_sum_is_zero) + + +def test_filter_with_axis_in_groupby(): + # issue 11041 + index = pd.MultiIndex.from_product([range(10), [0, 1]]) + data = pd.DataFrame(np.arange(100).reshape(-1, 20), columns=index, dtype="int64") + result = data.groupby(level=0, axis=1).filter(lambda x: x.iloc[0, 0] > 10) + expected = data.iloc[:, 12:20] + tm.assert_frame_equal(result, expected) + + +def test_filter_bad_shapes(): + df = DataFrame({"A": np.arange(8), "B": list("aabbbbcc"), "C": np.arange(8)}) + s = df["B"] + g_df = df.groupby("B") + g_s = s.groupby(s) + + f = lambda x: x + msg = "filter function returned a DataFrame, but expected a scalar bool" + with pytest.raises(TypeError, match=msg): + g_df.filter(f) + msg = "the filter must return a boolean result" + with pytest.raises(TypeError, match=msg): + g_s.filter(f) + + f = lambda x: x == 1 + msg = "filter function returned a DataFrame, but expected a scalar bool" + with pytest.raises(TypeError, match=msg): + g_df.filter(f) + msg = "the filter must return a boolean result" + with pytest.raises(TypeError, match=msg): + g_s.filter(f) + + f = lambda x: np.outer(x, x) + msg = "can't multiply sequence by non-int of type 'str'" + with pytest.raises(TypeError, match=msg): + g_df.filter(f) + msg = "the filter must return a boolean result" + with pytest.raises(TypeError, match=msg): + g_s.filter(f) + + +def test_filter_nan_is_false(): + df = DataFrame({"A": np.arange(8), "B": list("aabbbbcc"), "C": np.arange(8)}) + s = df["B"] + g_df = df.groupby(df["B"]) + g_s = s.groupby(s) + + f = lambda x: np.nan + tm.assert_frame_equal(g_df.filter(f), df.loc[[]]) + tm.assert_series_equal(g_s.filter(f), s[[]]) + + +def test_filter_against_workaround(): + np.random.seed(0) + # Series of ints + s = Series(np.random.randint(0, 100, 1000)) + grouper = s.apply(lambda x: np.round(x, -1)) + grouped = s.groupby(grouper) + f = lambda x: x.mean() > 10 + + old_way = s[grouped.transform(f).astype("bool")] + new_way = grouped.filter(f) + tm.assert_series_equal(new_way.sort_values(), old_way.sort_values()) + + # Series of floats + s = 100 * Series(np.random.random(1000)) + grouper = s.apply(lambda x: np.round(x, -1)) + grouped = s.groupby(grouper) + f = lambda x: x.mean() > 10 + old_way = s[grouped.transform(f).astype("bool")] + new_way = grouped.filter(f) + tm.assert_series_equal(new_way.sort_values(), old_way.sort_values()) + + # Set up DataFrame of ints, floats, strings. + from string import ascii_lowercase + + letters = np.array(list(ascii_lowercase)) + N = 1000 + random_letters = letters.take(np.random.randint(0, 26, N)) + df = DataFrame( + { + "ints": Series(np.random.randint(0, 100, N)), + "floats": N / 10 * Series(np.random.random(N)), + "letters": Series(random_letters), + } + ) + + # Group by ints; filter on floats. + grouped = df.groupby("ints") + old_way = df[grouped.floats.transform(lambda x: x.mean() > N / 20).astype("bool")] + new_way = grouped.filter(lambda x: x["floats"].mean() > N / 20) + tm.assert_frame_equal(new_way, old_way) + + # Group by floats (rounded); filter on strings. + grouper = df.floats.apply(lambda x: np.round(x, -1)) + grouped = df.groupby(grouper) + old_way = df[grouped.letters.transform(lambda x: len(x) < N / 10).astype("bool")] + new_way = grouped.filter(lambda x: len(x.letters) < N / 10) + tm.assert_frame_equal(new_way, old_way) + + # Group by strings; filter on ints. + grouped = df.groupby("letters") + old_way = df[grouped.ints.transform(lambda x: x.mean() > N / 20).astype("bool")] + new_way = grouped.filter(lambda x: x["ints"].mean() > N / 20) + tm.assert_frame_equal(new_way, old_way) + + +def test_filter_using_len(): + # BUG GH4447 + df = DataFrame({"A": np.arange(8), "B": list("aabbbbcc"), "C": np.arange(8)}) + grouped = df.groupby("B") + actual = grouped.filter(lambda x: len(x) > 2) + expected = DataFrame( + {"A": np.arange(2, 6), "B": list("bbbb"), "C": np.arange(2, 6)}, + index=np.arange(2, 6), + ) + tm.assert_frame_equal(actual, expected) + + actual = grouped.filter(lambda x: len(x) > 4) + expected = df.loc[[]] + tm.assert_frame_equal(actual, expected) + + # Series have always worked properly, but we'll test anyway. + s = df["B"] + grouped = s.groupby(s) + actual = grouped.filter(lambda x: len(x) > 2) + expected = Series(4 * ["b"], index=np.arange(2, 6), name="B") + tm.assert_series_equal(actual, expected) + + actual = grouped.filter(lambda x: len(x) > 4) + expected = s[[]] + tm.assert_series_equal(actual, expected) + + +def test_filter_maintains_ordering(): + # Simple case: index is sequential. #4621 + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]} + ) + s = df["pid"] + grouped = df.groupby("tag") + actual = grouped.filter(lambda x: len(x) > 1) + expected = df.iloc[[1, 2, 4, 7]] + tm.assert_frame_equal(actual, expected) + + grouped = s.groupby(df["tag"]) + actual = grouped.filter(lambda x: len(x) > 1) + expected = s.iloc[[1, 2, 4, 7]] + tm.assert_series_equal(actual, expected) + + # Now index is sequentially decreasing. + df.index = np.arange(len(df) - 1, -1, -1) + s = df["pid"] + grouped = df.groupby("tag") + actual = grouped.filter(lambda x: len(x) > 1) + expected = df.iloc[[1, 2, 4, 7]] + tm.assert_frame_equal(actual, expected) + + grouped = s.groupby(df["tag"]) + actual = grouped.filter(lambda x: len(x) > 1) + expected = s.iloc[[1, 2, 4, 7]] + tm.assert_series_equal(actual, expected) + + # Index is shuffled. + SHUFFLED = [4, 6, 7, 2, 1, 0, 5, 3] + df.index = df.index[SHUFFLED] + s = df["pid"] + grouped = df.groupby("tag") + actual = grouped.filter(lambda x: len(x) > 1) + expected = df.iloc[[1, 2, 4, 7]] + tm.assert_frame_equal(actual, expected) + + grouped = s.groupby(df["tag"]) + actual = grouped.filter(lambda x: len(x) > 1) + expected = s.iloc[[1, 2, 4, 7]] + tm.assert_series_equal(actual, expected) + + +def test_filter_multiple_timestamp(): + # GH 10114 + df = DataFrame( + { + "A": np.arange(5, dtype="int64"), + "B": ["foo", "bar", "foo", "bar", "bar"], + "C": Timestamp("20130101"), + } + ) + + grouped = df.groupby(["B", "C"]) + + result = grouped["A"].filter(lambda x: True) + tm.assert_series_equal(df["A"], result) + + result = grouped["A"].transform(len) + expected = Series([2, 3, 2, 3, 3], name="A") + tm.assert_series_equal(result, expected) + + result = grouped.filter(lambda x: True) + tm.assert_frame_equal(df, result) + + result = grouped.transform("sum") + expected = DataFrame({"A": [2, 8, 2, 8, 8]}) + tm.assert_frame_equal(result, expected) + + result = grouped.transform(len) + expected = DataFrame({"A": [2, 3, 2, 3, 3]}) + tm.assert_frame_equal(result, expected) + + +def test_filter_and_transform_with_non_unique_int_index(): + # GH4620 + index = [1, 1, 1, 2, 1, 1, 0, 1] + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_and_transform_with_multiple_non_unique_int_index(): + # GH4620 + index = [1, 1, 1, 2, 0, 0, 0, 1] + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_and_transform_with_non_unique_float_index(): + # GH4620 + index = np.array([1, 1, 1, 2, 1, 1, 0, 1], dtype=float) + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_and_transform_with_non_unique_timestamp_index(): + # GH4620 + t0 = Timestamp("2013-09-30 00:05:00") + t1 = Timestamp("2013-10-30 00:05:00") + t2 = Timestamp("2013-11-30 00:05:00") + index = [t1, t1, t1, t2, t1, t1, t0, t1] + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_and_transform_with_non_unique_string_index(): + # GH4620 + index = list("bbbcbbab") + df = DataFrame( + {"pid": [1, 1, 1, 2, 2, 3, 3, 3], "tag": [23, 45, 62, 24, 45, 34, 25, 62]}, + index=index, + ) + grouped_df = df.groupby("tag") + ser = df["pid"] + grouped_ser = ser.groupby(df["tag"]) + expected_indexes = [1, 2, 4, 7] + + # Filter DataFrame + actual = grouped_df.filter(lambda x: len(x) > 1) + expected = df.iloc[expected_indexes] + tm.assert_frame_equal(actual, expected) + + actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) + expected = df.copy() + expected.iloc[[0, 3, 5, 6]] = np.nan + tm.assert_frame_equal(actual, expected) + + # Filter Series + actual = grouped_ser.filter(lambda x: len(x) > 1) + expected = ser.take(expected_indexes) + tm.assert_series_equal(actual, expected) + + actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) + NA = np.nan + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name="pid") + # ^ made manually because this can get confusing! + tm.assert_series_equal(actual, expected) + + # Transform Series + actual = grouped_ser.transform(len) + expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index, name="pid") + tm.assert_series_equal(actual, expected) + + # Transform (a column from) DataFrameGroupBy + actual = grouped_df.pid.transform(len) + tm.assert_series_equal(actual, expected) + + +def test_filter_has_access_to_grouped_cols(): + df = DataFrame([[1, 2], [1, 3], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + # previously didn't have access to col A #???? + filt = g.filter(lambda x: x["A"].sum() == 2) + tm.assert_frame_equal(filt, df.iloc[[0, 1]]) + + +def test_filter_enforces_scalarness(): + df = pd.DataFrame( + [ + ["best", "a", "x"], + ["worst", "b", "y"], + ["best", "c", "x"], + ["best", "d", "y"], + ["worst", "d", "y"], + ["worst", "d", "y"], + ["best", "d", "z"], + ], + columns=["a", "b", "c"], + ) + with pytest.raises(TypeError, match="filter function returned a.*"): + df.groupby("c").filter(lambda g: g["a"] == "best") + + +def test_filter_non_bool_raises(): + df = pd.DataFrame( + [ + ["best", "a", 1], + ["worst", "b", 1], + ["best", "c", 1], + ["best", "d", 1], + ["worst", "d", 1], + ["worst", "d", 1], + ["best", "d", 1], + ], + columns=["a", "b", "c"], + ) + with pytest.raises(TypeError, match="filter function returned a.*"): + df.groupby("a").filter(lambda g: g.c.mean()) + + +def test_filter_dropna_with_empty_groups(): + # GH 10780 + data = pd.Series(np.random.rand(9), index=np.repeat([1, 2, 3], 3)) + groupped = data.groupby(level=0) + result_false = groupped.filter(lambda x: x.mean() > 1, dropna=False) + expected_false = pd.Series([np.nan] * 9, index=np.repeat([1, 2, 3], 3)) + tm.assert_series_equal(result_false, expected_false) + + result_true = groupped.filter(lambda x: x.mean() > 1, dropna=True) + expected_true = pd.Series(index=pd.Index([], dtype=int), dtype=np.float64) + tm.assert_series_equal(result_true, expected_true) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py new file mode 100644 index 00000000..16aec6e5 --- /dev/null +++ b/pandas/tests/groupby/test_function.py @@ -0,0 +1,1621 @@ +import builtins +import datetime as dt +from io import StringIO +from itertools import product +from string import ascii_lowercase + +import numpy as np +import pytest + +from pandas.errors import UnsupportedFunctionCall + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + NaT, + Series, + Timestamp, + _is_numpy_dev, + date_range, + isna, +) +import pandas._testing as tm +import pandas.core.nanops as nanops +from pandas.util import _test_decorators as td + + +@pytest.mark.parametrize("agg_func", ["any", "all"]) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize( + "vals", + [ + ["foo", "bar", "baz"], + ["foo", "", ""], + ["", "", ""], + [1, 2, 3], + [1, 0, 0], + [0, 0, 0], + [1.0, 2.0, 3.0], + [1.0, 0.0, 0.0], + [0.0, 0.0, 0.0], + [True, True, True], + [True, False, False], + [False, False, False], + [np.nan, np.nan, np.nan], + ], +) +def test_groupby_bool_aggs(agg_func, skipna, vals): + df = DataFrame({"key": ["a"] * 3 + ["b"] * 3, "val": vals * 2}) + + # Figure out expectation using Python builtin + exp = getattr(builtins, agg_func)(vals) + + # edge case for missing data with skipna and 'any' + if skipna and all(isna(vals)) and agg_func == "any": + exp = False + + exp_df = DataFrame([exp] * 2, columns=["val"], index=Index(["a", "b"], name="key")) + result = getattr(df.groupby("key"), agg_func)(skipna=skipna) + tm.assert_frame_equal(result, exp_df) + + +def test_max_min_non_numeric(): + # #2700 + aa = DataFrame({"nn": [11, 11, 22, 22], "ii": [1, 2, 3, 4], "ss": 4 * ["mama"]}) + + result = aa.groupby("nn").max() + assert "ss" in result + + result = aa.groupby("nn").max(numeric_only=False) + assert "ss" in result + + result = aa.groupby("nn").min() + assert "ss" in result + + result = aa.groupby("nn").min(numeric_only=False) + assert "ss" in result + + +def test_intercept_builtin_sum(): + s = Series([1.0, 2.0, np.nan, 3.0]) + grouped = s.groupby([0, 1, 2, 2]) + + result = grouped.agg(builtins.sum) + result2 = grouped.apply(builtins.sum) + expected = grouped.sum() + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + +# @pytest.mark.parametrize("f", [max, min, sum]) +# def test_builtins_apply(f): + + +@pytest.mark.parametrize("f", [max, min, sum]) +@pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key +def test_builtins_apply(keys, f): + # see gh-8155 + df = pd.DataFrame(np.random.randint(1, 50, (1000, 2)), columns=["jim", "joe"]) + df["jolie"] = np.random.randn(1000) + + fname = f.__name__ + result = df.groupby(keys).apply(f) + ngroups = len(df.drop_duplicates(subset=keys)) + + assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))" + assert result.shape == (ngroups, 3), assert_msg + + tm.assert_frame_equal( + result, # numpy's equivalent function + df.groupby(keys).apply(getattr(np, fname)), + ) + + if f != sum: + expected = df.groupby(keys).agg(fname).reset_index() + expected.set_index(keys, inplace=True, drop=False) + tm.assert_frame_equal(result, expected, check_dtype=False) + + tm.assert_series_equal(getattr(result, fname)(), getattr(df, fname)()) + + +def test_arg_passthru(): + # make sure that we are passing thru kwargs + # to our agg functions + + # GH3668 + # GH5724 + df = pd.DataFrame( + { + "group": [1, 1, 2], + "int": [1, 2, 3], + "float": [4.0, 5.0, 6.0], + "string": list("abc"), + "category_string": pd.Series(list("abc")).astype("category"), + "category_int": [7, 8, 9], + "datetime": pd.date_range("20130101", periods=3), + "datetimetz": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), + }, + columns=[ + "group", + "int", + "float", + "string", + "category_string", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ], + ) + + expected_columns_numeric = Index(["int", "float", "category_int"]) + + # mean / median + expected = pd.DataFrame( + { + "category_int": [7.5, 9], + "float": [4.5, 6.0], + "timedelta": [pd.Timedelta("1.5s"), pd.Timedelta("3s")], + "int": [1.5, 3], + "datetime": [ + pd.Timestamp("2013-01-01 12:00:00"), + pd.Timestamp("2013-01-03 00:00:00"), + ], + "datetimetz": [ + pd.Timestamp("2013-01-01 12:00:00", tz="US/Eastern"), + pd.Timestamp("2013-01-03 00:00:00", tz="US/Eastern"), + ], + }, + index=Index([1, 2], name="group"), + columns=["int", "float", "category_int", "datetime", "datetimetz", "timedelta"], + ) + + for attr in ["mean", "median"]: + f = getattr(df.groupby("group"), attr) + result = f() + tm.assert_index_equal(result.columns, expected_columns_numeric) + + result = f(numeric_only=False) + tm.assert_frame_equal(result.reindex_like(expected), expected) + + # TODO: min, max *should* handle + # categorical (ordered) dtype + expected_columns = Index( + [ + "int", + "float", + "string", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ] + ) + for attr in ["min", "max"]: + f = getattr(df.groupby("group"), attr) + result = f() + tm.assert_index_equal(result.columns, expected_columns) + + result = f(numeric_only=False) + tm.assert_index_equal(result.columns, expected_columns) + + expected_columns = Index( + [ + "int", + "float", + "string", + "category_string", + "category_int", + "datetime", + "datetimetz", + "timedelta", + ] + ) + for attr in ["first", "last"]: + f = getattr(df.groupby("group"), attr) + result = f() + tm.assert_index_equal(result.columns, expected_columns) + + result = f(numeric_only=False) + tm.assert_index_equal(result.columns, expected_columns) + + expected_columns = Index(["int", "float", "string", "category_int", "timedelta"]) + for attr in ["sum"]: + f = getattr(df.groupby("group"), attr) + result = f() + tm.assert_index_equal(result.columns, expected_columns_numeric) + + result = f(numeric_only=False) + tm.assert_index_equal(result.columns, expected_columns) + + expected_columns = Index(["int", "float", "category_int"]) + for attr in ["prod", "cumprod"]: + f = getattr(df.groupby("group"), attr) + result = f() + tm.assert_index_equal(result.columns, expected_columns_numeric) + + result = f(numeric_only=False) + tm.assert_index_equal(result.columns, expected_columns) + + # like min, max, but don't include strings + expected_columns = Index( + ["int", "float", "category_int", "datetime", "datetimetz", "timedelta"] + ) + for attr in ["cummin", "cummax"]: + f = getattr(df.groupby("group"), attr) + result = f() + # GH 15561: numeric_only=False set by default like min/max + tm.assert_index_equal(result.columns, expected_columns) + + result = f(numeric_only=False) + tm.assert_index_equal(result.columns, expected_columns) + + expected_columns = Index(["int", "float", "category_int", "timedelta"]) + for attr in ["cumsum"]: + f = getattr(df.groupby("group"), attr) + result = f() + tm.assert_index_equal(result.columns, expected_columns_numeric) + + result = f(numeric_only=False) + tm.assert_index_equal(result.columns, expected_columns) + + +def test_non_cython_api(): + + # GH5610 + # non-cython calls should not include the grouper + + df = DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], columns=["A", "B", "C"] + ) + g = df.groupby("A") + gni = df.groupby("A", as_index=False) + + # mad + expected = DataFrame([[0], [np.nan]], columns=["B"], index=[1, 3]) + expected.index.name = "A" + result = g.mad() + tm.assert_frame_equal(result, expected) + + expected = DataFrame([[0.0, 0.0], [0, np.nan]], columns=["A", "B"], index=[0, 1]) + result = gni.mad() + tm.assert_frame_equal(result, expected) + + # describe + expected_index = pd.Index([1, 3], name="A") + expected_col = pd.MultiIndex( + levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]], + codes=[[0] * 8, list(range(8))], + ) + expected = pd.DataFrame( + [ + [1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0], + [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + ], + index=expected_index, + columns=expected_col, + ) + result = g.describe() + tm.assert_frame_equal(result, expected) + + expected = pd.concat( + [ + df[df.A == 1].describe().unstack().to_frame().T, + df[df.A == 3].describe().unstack().to_frame().T, + ] + ) + expected.index = pd.Index([0, 1]) + result = gni.describe() + tm.assert_frame_equal(result, expected) + + # any + expected = DataFrame( + [[True, True], [False, True]], columns=["B", "C"], index=[1, 3] + ) + expected.index.name = "A" + result = g.any() + tm.assert_frame_equal(result, expected) + + # idxmax + expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) + expected.index.name = "A" + result = g.idxmax() + tm.assert_frame_equal(result, expected) + + +def test_cython_api2(): + + # this takes the fast apply path + + # cumsum (GH5614) + df = DataFrame([[1, 2, np.nan], [1, np.nan, 9], [3, 4, 9]], columns=["A", "B", "C"]) + expected = DataFrame([[2, np.nan], [np.nan, 9], [4, 9]], columns=["B", "C"]) + result = df.groupby("A").cumsum() + tm.assert_frame_equal(result, expected) + + # GH 5755 - cumsum is a transformer and should ignore as_index + result = df.groupby("A", as_index=False).cumsum() + tm.assert_frame_equal(result, expected) + + # GH 13994 + result = df.groupby("A").cumsum(axis=1) + expected = df.cumsum(axis=1) + tm.assert_frame_equal(result, expected) + result = df.groupby("A").cumprod(axis=1) + expected = df.cumprod(axis=1) + tm.assert_frame_equal(result, expected) + + +def test_cython_median(): + df = DataFrame(np.random.randn(1000)) + df.values[::2] = np.nan + + labels = np.random.randint(0, 50, size=1000).astype(float) + labels[::17] = np.nan + + result = df.groupby(labels).median() + exp = df.groupby(labels).agg(nanops.nanmedian) + tm.assert_frame_equal(result, exp) + + df = DataFrame(np.random.randn(1000, 5)) + rs = df.groupby(labels).agg(np.median) + xp = df.groupby(labels).median() + tm.assert_frame_equal(rs, xp) + + +def test_median_empty_bins(observed): + df = pd.DataFrame(np.random.randint(0, 44, 500)) + + grps = range(0, 55, 5) + bins = pd.cut(df[0], grps) + + result = df.groupby(bins, observed=observed).median() + expected = df.groupby(bins, observed=observed).agg(lambda x: x.median()) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "dtype", ["int8", "int16", "int32", "int64", "float32", "float64", "uint64"] +) +@pytest.mark.parametrize( + "method,data", + [ + ("first", {"df": [{"a": 1, "b": 1}, {"a": 2, "b": 3}]}), + ("last", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}]}), + ("min", {"df": [{"a": 1, "b": 1}, {"a": 2, "b": 3}]}), + ("max", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}]}), + ("nth", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}], "args": [1]}), + ("count", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 2}], "out_type": "int64"}), + ], +) +def test_groupby_non_arithmetic_agg_types(dtype, method, data): + # GH9311, GH6620 + df = pd.DataFrame( + [{"a": 1, "b": 1}, {"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 2, "b": 4}] + ) + + df["b"] = df.b.astype(dtype) + + if "args" not in data: + data["args"] = [] + + if "out_type" in data: + out_type = data["out_type"] + else: + out_type = dtype + + exp = data["df"] + df_out = pd.DataFrame(exp) + + df_out["b"] = df_out.b.astype(out_type) + df_out.set_index("a", inplace=True) + + grpd = df.groupby("a") + t = getattr(grpd, method)(*data["args"]) + tm.assert_frame_equal(t, df_out) + + +@pytest.mark.parametrize( + "i", + [ + ( + Timestamp("2011-01-15 12:50:28.502376"), + Timestamp("2011-01-20 12:50:28.593448"), + ), + (24650000000000001, 24650000000000002), + ], +) +def test_groupby_non_arithmetic_agg_int_like_precision(i): + # see gh-6620, gh-9311 + df = pd.DataFrame([{"a": 1, "b": i[0]}, {"a": 1, "b": i[1]}]) + + grp_exp = { + "first": {"expected": i[0]}, + "last": {"expected": i[1]}, + "min": {"expected": i[0]}, + "max": {"expected": i[1]}, + "nth": {"expected": i[1], "args": [1]}, + "count": {"expected": 2}, + } + + for method, data in grp_exp.items(): + if "args" not in data: + data["args"] = [] + + grouped = df.groupby("a") + res = getattr(grouped, method)(*data["args"]) + + assert res.iloc[0].b == data["expected"] + + +@pytest.mark.parametrize( + "func, values", + [ + ("idxmin", {"c_int": [0, 2], "c_float": [1, 3], "c_date": [1, 2]}), + ("idxmax", {"c_int": [1, 3], "c_float": [0, 2], "c_date": [0, 3]}), + ], +) +def test_idxmin_idxmax_returns_int_types(func, values): + # GH 25444 + df = pd.DataFrame( + { + "name": ["A", "A", "B", "B"], + "c_int": [1, 2, 3, 4], + "c_float": [4.02, 3.03, 2.04, 1.05], + "c_date": ["2019", "2018", "2016", "2017"], + } + ) + df["c_date"] = pd.to_datetime(df["c_date"]) + + result = getattr(df.groupby("name"), func)() + + expected = pd.DataFrame(values, index=Index(["A", "B"], name="name")) + + tm.assert_frame_equal(result, expected) + + +def test_fill_consistency(): + + # GH9221 + # pass thru keyword arguments to the generated wrapper + # are set if the passed kw is None (only) + df = DataFrame( + index=pd.MultiIndex.from_product( + [["value1", "value2"], date_range("2014-01-01", "2014-01-06")] + ), + columns=Index(["1", "2"], name="id"), + ) + df["1"] = [ + np.nan, + 1, + np.nan, + np.nan, + 11, + np.nan, + np.nan, + 2, + np.nan, + np.nan, + 22, + np.nan, + ] + df["2"] = [ + np.nan, + 3, + np.nan, + np.nan, + 33, + np.nan, + np.nan, + 4, + np.nan, + np.nan, + 44, + np.nan, + ] + + expected = df.groupby(level=0, axis=0).fillna(method="ffill") + result = df.T.groupby(level=0, axis=1).fillna(method="ffill").T + tm.assert_frame_equal(result, expected) + + +def test_groupby_cumprod(): + # GH 4095 + df = pd.DataFrame({"key": ["b"] * 10, "value": 2}) + + actual = df.groupby("key")["value"].cumprod() + expected = df.groupby("key")["value"].apply(lambda x: x.cumprod()) + expected.name = "value" + tm.assert_series_equal(actual, expected) + + df = pd.DataFrame({"key": ["b"] * 100, "value": 2}) + actual = df.groupby("key")["value"].cumprod() + # if overflows, groupby product casts to float + # while numpy passes back invalid values + df["value"] = df["value"].astype(float) + expected = df.groupby("key")["value"].apply(lambda x: x.cumprod()) + expected.name = "value" + tm.assert_series_equal(actual, expected) + + +def scipy_sem(*args, **kwargs): + from scipy.stats import sem + + return sem(*args, ddof=1, **kwargs) + + +@pytest.mark.parametrize( + "op,targop", + [ + ("mean", np.mean), + ("median", np.median), + ("std", np.std), + ("var", np.var), + ("sum", np.sum), + ("prod", np.prod), + ("min", np.min), + ("max", np.max), + ("first", lambda x: x.iloc[0]), + ("last", lambda x: x.iloc[-1]), + ("count", np.size), + pytest.param("sem", scipy_sem, marks=td.skip_if_no_scipy), + ], +) +def test_ops_general(op, targop): + df = DataFrame(np.random.randn(1000)) + labels = np.random.randint(0, 50, size=1000).astype(float) + + result = getattr(df.groupby(labels), op)().astype(float) + expected = df.groupby(labels).agg(targop) + tm.assert_frame_equal(result, expected) + + +def test_max_nan_bug(): + raw = """,Date,app,File +-04-23,2013-04-23 00:00:00,,log080001.log +-05-06,2013-05-06 00:00:00,,log.log +-05-07,2013-05-07 00:00:00,OE,xlsx""" + + df = pd.read_csv(StringIO(raw), parse_dates=[0]) + gb = df.groupby("Date") + r = gb[["File"]].max() + e = gb["File"].max().to_frame() + tm.assert_frame_equal(r, e) + assert not r["File"].isna().any() + + +def test_nlargest(): + a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) + b = Series(list("a" * 5 + "b" * 5)) + gb = a.groupby(b) + r = gb.nlargest(3) + e = Series( + [7, 5, 3, 10, 9, 6], + index=MultiIndex.from_arrays([list("aaabbb"), [3, 2, 1, 9, 5, 8]]), + ) + tm.assert_series_equal(r, e) + + a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0]) + gb = a.groupby(b) + e = Series( + [3, 2, 1, 3, 3, 2], + index=MultiIndex.from_arrays([list("aaabbb"), [2, 3, 1, 6, 5, 7]]), + ) + tm.assert_series_equal(gb.nlargest(3, keep="last"), e) + + +def test_nlargest_mi_grouper(): + # see gh-21411 + npr = np.random.RandomState(123456789) + + dts = date_range("20180101", periods=10) + iterables = [dts, ["one", "two"]] + + idx = MultiIndex.from_product(iterables, names=["first", "second"]) + s = Series(npr.randn(20), index=idx) + + result = s.groupby("first").nlargest(1) + + exp_idx = MultiIndex.from_tuples( + [ + (dts[0], dts[0], "one"), + (dts[1], dts[1], "one"), + (dts[2], dts[2], "one"), + (dts[3], dts[3], "two"), + (dts[4], dts[4], "one"), + (dts[5], dts[5], "one"), + (dts[6], dts[6], "one"), + (dts[7], dts[7], "one"), + (dts[8], dts[8], "two"), + (dts[9], dts[9], "one"), + ], + names=["first", "first", "second"], + ) + + exp_values = [ + 2.2129019979039612, + 1.8417114045748335, + 0.858963679564603, + 1.3759151378258088, + 0.9430284594687134, + 0.5296914208183142, + 0.8318045593815487, + -0.8476703342910327, + 0.3804446884133735, + -0.8028845810770998, + ] + + expected = Series(exp_values, index=exp_idx) + tm.assert_series_equal(result, expected, check_exact=False, check_less_precise=True) + + +def test_nsmallest(): + a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) + b = Series(list("a" * 5 + "b" * 5)) + gb = a.groupby(b) + r = gb.nsmallest(3) + e = Series( + [1, 2, 3, 0, 4, 6], + index=MultiIndex.from_arrays([list("aaabbb"), [0, 4, 1, 6, 7, 8]]), + ) + tm.assert_series_equal(r, e) + + a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0]) + gb = a.groupby(b) + e = Series( + [0, 1, 1, 0, 1, 2], + index=MultiIndex.from_arrays([list("aaabbb"), [4, 1, 0, 9, 8, 7]]), + ) + tm.assert_series_equal(gb.nsmallest(3, keep="last"), e) + + +@pytest.mark.parametrize("func", ["mean", "var", "std", "cumprod", "cumsum"]) +def test_numpy_compat(func): + # see gh-12811 + df = pd.DataFrame({"A": [1, 2, 1], "B": [1, 2, 3]}) + g = df.groupby("A") + + msg = "numpy operations are not valid with groupby" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(g, func)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(g, func)(foo=1) + + +@pytest.mark.xfail( + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, +) +def test_cummin_cummax(): + # GH 15048 + num_types = [np.int32, np.int64, np.float32, np.float64] + num_mins = [ + np.iinfo(np.int32).min, + np.iinfo(np.int64).min, + np.finfo(np.float32).min, + np.finfo(np.float64).min, + ] + num_max = [ + np.iinfo(np.int32).max, + np.iinfo(np.int64).max, + np.finfo(np.float32).max, + np.finfo(np.float64).max, + ] + base_df = pd.DataFrame( + {"A": [1, 1, 1, 1, 2, 2, 2, 2], "B": [3, 4, 3, 2, 2, 3, 2, 1]} + ) + expected_mins = [3, 3, 3, 2, 2, 2, 2, 1] + expected_maxs = [3, 4, 4, 4, 2, 3, 3, 3] + + for dtype, min_val, max_val in zip(num_types, num_mins, num_max): + df = base_df.astype(dtype) + + # cummin + expected = pd.DataFrame({"B": expected_mins}).astype(dtype) + result = df.groupby("A").cummin() + tm.assert_frame_equal(result, expected) + result = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(result, expected) + + # Test cummin w/ min value for dtype + df.loc[[2, 6], "B"] = min_val + expected.loc[[2, 3, 6, 7], "B"] = min_val + result = df.groupby("A").cummin() + tm.assert_frame_equal(result, expected) + expected = df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(result, expected) + + # cummax + expected = pd.DataFrame({"B": expected_maxs}).astype(dtype) + result = df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + result = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(result, expected) + + # Test cummax w/ max value for dtype + df.loc[[2, 6], "B"] = max_val + expected.loc[[2, 3, 6, 7], "B"] = max_val + result = df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + expected = df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(result, expected) + + # Test nan in some values + base_df.loc[[0, 2, 4, 6], "B"] = np.nan + expected = pd.DataFrame({"B": [np.nan, 4, np.nan, 2, np.nan, 3, np.nan, 1]}) + result = base_df.groupby("A").cummin() + tm.assert_frame_equal(result, expected) + expected = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(result, expected) + + expected = pd.DataFrame({"B": [np.nan, 4, np.nan, 4, np.nan, 3, np.nan, 3]}) + result = base_df.groupby("A").cummax() + tm.assert_frame_equal(result, expected) + expected = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(result, expected) + + # Test nan in entire column + base_df["B"] = np.nan + expected = pd.DataFrame({"B": [np.nan] * 8}) + result = base_df.groupby("A").cummin() + tm.assert_frame_equal(expected, result) + result = base_df.groupby("A").B.apply(lambda x: x.cummin()).to_frame() + tm.assert_frame_equal(expected, result) + result = base_df.groupby("A").cummax() + tm.assert_frame_equal(expected, result) + result = base_df.groupby("A").B.apply(lambda x: x.cummax()).to_frame() + tm.assert_frame_equal(expected, result) + + # GH 15561 + df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(["2001"]))) + expected = pd.Series(pd.to_datetime("2001"), index=[0], name="b") + for method in ["cummax", "cummin"]: + result = getattr(df.groupby("a")["b"], method)() + tm.assert_series_equal(expected, result) + + # GH 15635 + df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1])) + result = df.groupby("a").b.cummax() + expected = pd.Series([2, 1, 2], name="b") + tm.assert_series_equal(result, expected) + + df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2])) + result = df.groupby("a").b.cummin() + expected = pd.Series([1, 2, 1], name="b") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "in_vals, out_vals", + [ + # Basics: strictly increasing (T), strictly decreasing (F), + # abs val increasing (F), non-strictly increasing (T) + ([1, 2, 5, 3, 2, 0, 4, 5, -6, 1, 1], [True, False, False, True]), + # Test with inf vals + ( + [1, 2.1, np.inf, 3, 2, np.inf, -np.inf, 5, 11, 1, -np.inf], + [True, False, True, False], + ), + # Test with nan vals; should always be False + ( + [1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan], + [False, False, False, False], + ), + ], +) +def test_is_monotonic_increasing(in_vals, out_vals): + # GH 17015 + source_dict = { + "A": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"], + "B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"], + "C": in_vals, + } + df = pd.DataFrame(source_dict) + result = df.groupby("B").C.is_monotonic_increasing + index = Index(list("abcd"), name="B") + expected = pd.Series(index=index, data=out_vals, name="C") + tm.assert_series_equal(result, expected) + + # Also check result equal to manually taking x.is_monotonic_increasing. + expected = df.groupby(["B"]).C.apply(lambda x: x.is_monotonic_increasing) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "in_vals, out_vals", + [ + # Basics: strictly decreasing (T), strictly increasing (F), + # abs val decreasing (F), non-strictly increasing (T) + ([10, 9, 7, 3, 4, 5, -3, 2, 0, 1, 1], [True, False, False, True]), + # Test with inf vals + ( + [np.inf, 1, -np.inf, np.inf, 2, -3, -np.inf, 5, -3, -np.inf, -np.inf], + [True, True, False, True], + ), + # Test with nan vals; should always be False + ( + [1, 2, np.nan, 3, 2, np.nan, np.nan, 5, -np.inf, 1, np.nan], + [False, False, False, False], + ), + ], +) +def test_is_monotonic_decreasing(in_vals, out_vals): + # GH 17015 + source_dict = { + "A": ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"], + "B": ["a", "a", "a", "b", "b", "b", "c", "c", "c", "d", "d"], + "C": in_vals, + } + + df = pd.DataFrame(source_dict) + result = df.groupby("B").C.is_monotonic_decreasing + index = Index(list("abcd"), name="B") + expected = pd.Series(index=index, data=out_vals, name="C") + tm.assert_series_equal(result, expected) + + +# describe +# -------------------------------- + + +def test_apply_describe_bug(mframe): + grouped = mframe.groupby(level="first") + grouped.describe() # it works! + + +def test_series_describe_multikey(): + ts = tm.makeTimeSeries() + grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.describe() + tm.assert_series_equal(result["mean"], grouped.mean(), check_names=False) + tm.assert_series_equal(result["std"], grouped.std(), check_names=False) + tm.assert_series_equal(result["min"], grouped.min(), check_names=False) + + +def test_series_describe_single(): + ts = tm.makeTimeSeries() + grouped = ts.groupby(lambda x: x.month) + result = grouped.apply(lambda x: x.describe()) + expected = grouped.describe().stack() + tm.assert_series_equal(result, expected) + + +def test_series_index_name(df): + grouped = df.loc[:, ["C"]].groupby(df["A"]) + result = grouped.agg(lambda x: x.mean()) + assert result.index.name == "A" + + +def test_frame_describe_multikey(tsframe): + grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.describe() + desc_groups = [] + for col in tsframe: + group = grouped[col].describe() + # GH 17464 - Remove duplicate MultiIndex levels + group_col = pd.MultiIndex( + levels=[[col], group.columns], + codes=[[0] * len(group.columns), range(len(group.columns))], + ) + group = pd.DataFrame(group.values, columns=group_col, index=group.index) + desc_groups.append(group) + expected = pd.concat(desc_groups, axis=1) + tm.assert_frame_equal(result, expected) + + groupedT = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1) + result = groupedT.describe() + expected = tsframe.describe().T + expected.index = pd.MultiIndex( + levels=[[0, 1], expected.index], + codes=[[0, 0, 1, 1], range(len(expected.index))], + ) + tm.assert_frame_equal(result, expected) + + +def test_frame_describe_tupleindex(): + + # GH 14848 - regression from 0.19.0 to 0.19.1 + df1 = DataFrame( + { + "x": [1, 2, 3, 4, 5] * 3, + "y": [10, 20, 30, 40, 50] * 3, + "z": [100, 200, 300, 400, 500] * 3, + } + ) + df1["k"] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 + df2 = df1.rename(columns={"k": "key"}) + msg = "Names should be list-like for a MultiIndex" + with pytest.raises(ValueError, match=msg): + df1.groupby("k").describe() + with pytest.raises(ValueError, match=msg): + df2.groupby("key").describe() + + +def test_frame_describe_unstacked_format(): + # GH 4792 + prices = { + pd.Timestamp("2011-01-06 10:59:05", tz=None): 24990, + pd.Timestamp("2011-01-06 12:43:33", tz=None): 25499, + pd.Timestamp("2011-01-06 12:54:09", tz=None): 25499, + } + volumes = { + pd.Timestamp("2011-01-06 10:59:05", tz=None): 1500000000, + pd.Timestamp("2011-01-06 12:43:33", tz=None): 5000000000, + pd.Timestamp("2011-01-06 12:54:09", tz=None): 100000000, + } + df = pd.DataFrame({"PRICE": prices, "VOLUME": volumes}) + result = df.groupby("PRICE").VOLUME.describe() + data = [ + df[df.PRICE == 24990].VOLUME.describe().values.tolist(), + df[df.PRICE == 25499].VOLUME.describe().values.tolist(), + ] + expected = pd.DataFrame( + data, + index=pd.Index([24990, 25499], name="PRICE"), + columns=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_frame_equal(result, expected) + + +# nunique +# -------------------------------- + + +@pytest.mark.parametrize("n", 10 ** np.arange(2, 6)) +@pytest.mark.parametrize("m", [10, 100, 1000]) +@pytest.mark.parametrize("sort", [False, True]) +@pytest.mark.parametrize("dropna", [False, True]) +def test_series_groupby_nunique(n, m, sort, dropna): + def check_nunique(df, keys, as_index=True): + original_df = df.copy() + gr = df.groupby(keys, as_index=as_index, sort=sort) + left = gr["julie"].nunique(dropna=dropna) + + gr = df.groupby(keys, as_index=as_index, sort=sort) + right = gr["julie"].apply(Series.nunique, dropna=dropna) + if not as_index: + right = right.reset_index(drop=True) + + tm.assert_series_equal(left, right, check_names=False) + tm.assert_frame_equal(df, original_df) + + days = date_range("2015-08-23", periods=10) + + frame = DataFrame( + { + "jim": np.random.choice(list(ascii_lowercase), n), + "joe": np.random.choice(days, n), + "julie": np.random.randint(0, m, n), + } + ) + + check_nunique(frame, ["jim"]) + check_nunique(frame, ["jim", "joe"]) + + frame.loc[1::17, "jim"] = None + frame.loc[3::37, "joe"] = None + frame.loc[7::19, "julie"] = None + frame.loc[8::19, "julie"] = None + frame.loc[9::19, "julie"] = None + + check_nunique(frame, ["jim"]) + check_nunique(frame, ["jim", "joe"]) + check_nunique(frame, ["jim"], as_index=False) + check_nunique(frame, ["jim", "joe"], as_index=False) + + +def test_nunique(): + df = DataFrame({"A": list("abbacc"), "B": list("abxacc"), "C": list("abbacx")}) + + expected = DataFrame({"A": [1] * 3, "B": [1, 2, 1], "C": [1, 1, 2]}) + result = df.groupby("A", as_index=False).nunique() + tm.assert_frame_equal(result, expected) + + # as_index + expected.index = list("abc") + expected.index.name = "A" + result = df.groupby("A").nunique() + tm.assert_frame_equal(result, expected) + + # with na + result = df.replace({"x": None}).groupby("A").nunique(dropna=False) + tm.assert_frame_equal(result, expected) + + # dropna + expected = DataFrame({"A": [1] * 3, "B": [1] * 3, "C": [1] * 3}, index=list("abc")) + expected.index.name = "A" + result = df.replace({"x": None}).groupby("A").nunique() + tm.assert_frame_equal(result, expected) + + +def test_nunique_with_object(): + # GH 11077 + data = pd.DataFrame( + [ + [100, 1, "Alice"], + [200, 2, "Bob"], + [300, 3, "Charlie"], + [-400, 4, "Dan"], + [500, 5, "Edith"], + ], + columns=["amount", "id", "name"], + ) + + result = data.groupby(["id", "amount"])["name"].nunique() + index = MultiIndex.from_arrays([data.id, data.amount]) + expected = pd.Series([1] * 5, name="name", index=index) + tm.assert_series_equal(result, expected) + + +def test_nunique_with_empty_series(): + # GH 12553 + data = pd.Series(name="name", dtype=object) + result = data.groupby(level=0).nunique() + expected = pd.Series(name="name", dtype="int64") + tm.assert_series_equal(result, expected) + + +def test_nunique_with_timegrouper(): + # GH 13453 + test = pd.DataFrame( + { + "time": [ + Timestamp("2016-06-28 09:35:35"), + Timestamp("2016-06-28 16:09:30"), + Timestamp("2016-06-28 16:46:28"), + ], + "data": ["1", "2", "3"], + } + ).set_index("time") + result = test.groupby(pd.Grouper(freq="h"))["data"].nunique() + expected = test.groupby(pd.Grouper(freq="h"))["data"].apply(pd.Series.nunique) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "key, data, dropna, expected", + [ + ( + ["x", "x", "x"], + [Timestamp("2019-01-01"), NaT, Timestamp("2019-01-01")], + True, + Series([1], index=pd.Index(["x"], name="key"), name="data"), + ), + ( + ["x", "x", "x"], + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], + True, + Series([1], index=pd.Index(["x"], name="key"), name="data"), + ), + ( + ["x", "x", "x", "y", "y"], + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], + False, + Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"), + ), + ( + ["x", "x", "x", "x", "y"], + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], + False, + Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"), + ), + ], +) +def test_nunique_with_NaT(key, data, dropna, expected): + # GH 27951 + df = pd.DataFrame({"key": key, "data": data}) + result = df.groupby(["key"])["data"].nunique(dropna=dropna) + tm.assert_series_equal(result, expected) + + +def test_nunique_preserves_column_level_names(): + # GH 23222 + test = pd.DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0")) + result = test.groupby([0, 0, 0]).nunique() + expected = pd.DataFrame([2], columns=test.columns) + tm.assert_frame_equal(result, expected) + + +# count +# -------------------------------- + + +def test_groupby_timedelta_cython_count(): + df = DataFrame( + {"g": list("ab" * 2), "delt": np.arange(4).astype("timedelta64[ns]")} + ) + expected = Series([2, 2], index=pd.Index(["a", "b"], name="g"), name="delt") + result = df.groupby("g").delt.count() + tm.assert_series_equal(expected, result) + + +def test_count(): + n = 1 << 15 + dr = date_range("2015-08-30", periods=n // 10, freq="T") + + df = DataFrame( + { + "1st": np.random.choice(list(ascii_lowercase), n), + "2nd": np.random.randint(0, 5, n), + "3rd": np.random.randn(n).round(3), + "4th": np.random.randint(-10, 10, n), + "5th": np.random.choice(dr, n), + "6th": np.random.randn(n).round(3), + "7th": np.random.randn(n).round(3), + "8th": np.random.choice(dr, n) - np.random.choice(dr, 1), + "9th": np.random.choice(list(ascii_lowercase), n), + } + ) + + for col in df.columns.drop(["1st", "2nd", "4th"]): + df.loc[np.random.choice(n, n // 10), col] = np.nan + + df["9th"] = df["9th"].astype("category") + + for key in ["1st", "2nd", ["1st", "2nd"]]: + left = df.groupby(key).count() + right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1) + tm.assert_frame_equal(left, right) + + +def test_count_non_nulls(): + # GH#5610 + # count counts non-nulls + df = pd.DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, np.nan]], + columns=["A", "B", "C"], + ) + + count_as = df.groupby("A").count() + count_not_as = df.groupby("A", as_index=False).count() + + expected = DataFrame([[1, 2], [0, 0]], columns=["B", "C"], index=[1, 3]) + expected.index.name = "A" + tm.assert_frame_equal(count_not_as, expected.reset_index()) + tm.assert_frame_equal(count_as, expected) + + count_B = df.groupby("A")["B"].count() + tm.assert_series_equal(count_B, expected["B"]) + + +def test_count_object(): + df = pd.DataFrame({"a": ["a"] * 3 + ["b"] * 3, "c": [2] * 3 + [3] * 3}) + result = df.groupby("c").a.count() + expected = pd.Series([3, 3], index=pd.Index([2, 3], name="c"), name="a") + tm.assert_series_equal(result, expected) + + df = pd.DataFrame({"a": ["a", np.nan, np.nan] + ["b"] * 3, "c": [2] * 3 + [3] * 3}) + result = df.groupby("c").a.count() + expected = pd.Series([1, 3], index=pd.Index([2, 3], name="c"), name="a") + tm.assert_series_equal(result, expected) + + +def test_count_cross_type(): + # GH8169 + vals = np.hstack( + (np.random.randint(0, 5, (100, 2)), np.random.randint(0, 2, (100, 2))) + ) + + df = pd.DataFrame(vals, columns=["a", "b", "c", "d"]) + df[df == 2] = np.nan + expected = df.groupby(["c", "d"]).count() + + for t in ["float32", "object"]: + df["a"] = df["a"].astype(t) + df["b"] = df["b"].astype(t) + result = df.groupby(["c", "d"]).count() + tm.assert_frame_equal(result, expected) + + +def test_lower_int_prec_count(): + df = DataFrame( + { + "a": np.array([0, 1, 2, 100], np.int8), + "b": np.array([1, 2, 3, 6], np.uint32), + "c": np.array([4, 5, 6, 8], np.int16), + "grp": list("ab" * 2), + } + ) + result = df.groupby("grp").count() + expected = DataFrame( + {"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=pd.Index(list("ab"), name="grp") + ) + tm.assert_frame_equal(result, expected) + + +def test_count_uses_size_on_exception(): + class RaisingObjectException(Exception): + pass + + class RaisingObject: + def __init__(self, msg="I will raise inside Cython"): + super().__init__() + self.msg = msg + + def __eq__(self, other): + # gets called in Cython to check that raising calls the method + raise RaisingObjectException(self.msg) + + df = DataFrame({"a": [RaisingObject() for _ in range(4)], "grp": list("ab" * 2)}) + result = df.groupby("grp").count() + expected = DataFrame({"a": [2, 2]}, index=pd.Index(list("ab"), name="grp")) + tm.assert_frame_equal(result, expected) + + +# size +# -------------------------------- + + +def test_size(df): + grouped = df.groupby(["A", "B"]) + result = grouped.size() + for key, group in grouped: + assert result[key] == len(group) + + grouped = df.groupby("A") + result = grouped.size() + for key, group in grouped: + assert result[key] == len(group) + + grouped = df.groupby("B") + result = grouped.size() + for key, group in grouped: + assert result[key] == len(group) + + df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("abc")) + for sort, key in product((False, True), ("a", "b", ["a", "b"])): + left = df.groupby(key, sort=sort).size() + right = df.groupby(key, sort=sort)["c"].apply(lambda a: a.shape[0]) + tm.assert_series_equal(left, right, check_names=False) + + # GH11699 + df = DataFrame(columns=["A", "B"]) + out = Series(dtype="int64", index=Index([], name="A")) + tm.assert_series_equal(df.groupby("A").size(), out) + + +def test_size_groupby_all_null(): + # GH23050 + # Assert no 'Value Error : Length of passed values is 2, index implies 0' + df = DataFrame({"A": [None, None]}) # all-null groups + result = df.groupby("A").size() + expected = Series(dtype="int64", index=Index([], name="A")) + tm.assert_series_equal(result, expected) + + +def test_size_period_index(): + # https://github.com/pandas-dev/pandas/issues/34010 + ser = Series([1], index=pd.PeriodIndex(["2000"], name="A", freq="D")) + grp = ser.groupby(level="A") + result = grp.size() + tm.assert_series_equal(result, ser) + + +# quantile +# -------------------------------- +@pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] +) +@pytest.mark.parametrize( + "a_vals,b_vals", + [ + # Ints + ([1, 2, 3, 4, 5], [5, 4, 3, 2, 1]), + ([1, 2, 3, 4], [4, 3, 2, 1]), + ([1, 2, 3, 4, 5], [4, 3, 2, 1]), + # Floats + ([1.0, 2.0, 3.0, 4.0, 5.0], [5.0, 4.0, 3.0, 2.0, 1.0]), + # Missing data + ([1.0, np.nan, 3.0, np.nan, 5.0], [5.0, np.nan, 3.0, np.nan, 1.0]), + ([np.nan, 4.0, np.nan, 2.0, np.nan], [np.nan, 4.0, np.nan, 2.0, np.nan]), + # Timestamps + ( + list(pd.date_range("1/1/18", freq="D", periods=5)), + list(pd.date_range("1/1/18", freq="D", periods=5))[::-1], + ), + # All NA + ([np.nan] * 5, [np.nan] * 5), + ], +) +@pytest.mark.parametrize("q", [0, 0.25, 0.5, 0.75, 1]) +def test_quantile(interpolation, a_vals, b_vals, q): + if interpolation == "nearest" and q == 0.5 and b_vals == [4, 3, 2, 1]: + pytest.skip( + "Unclear numpy expectation for nearest result with equidistant data" + ) + + a_expected = pd.Series(a_vals).quantile(q, interpolation=interpolation) + b_expected = pd.Series(b_vals).quantile(q, interpolation=interpolation) + + df = DataFrame( + {"key": ["a"] * len(a_vals) + ["b"] * len(b_vals), "val": a_vals + b_vals} + ) + + expected = DataFrame( + [a_expected, b_expected], columns=["val"], index=Index(["a", "b"], name="key") + ) + result = df.groupby("key").quantile(q, interpolation=interpolation) + + tm.assert_frame_equal(result, expected) + + +def test_quantile_array(): + # https://github.com/pandas-dev/pandas/issues/27526 + df = pd.DataFrame({"A": [0, 1, 2, 3, 4]}) + result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25]) + + index = pd.MultiIndex.from_product([[0, 1], [0.25]]) + expected = pd.DataFrame({"A": [0.25, 2.50]}, index=index) + tm.assert_frame_equal(result, expected) + + df = pd.DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]}) + index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]]) + + result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75]) + expected = pd.DataFrame( + {"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index + ) + tm.assert_frame_equal(result, expected) + + +def test_quantile_array2(): + # https://github.com/pandas-dev/pandas/pull/28085#issuecomment-524066959 + df = pd.DataFrame( + np.random.RandomState(0).randint(0, 5, size=(10, 3)), columns=list("ABC") + ) + result = df.groupby("A").quantile([0.3, 0.7]) + expected = pd.DataFrame( + { + "B": [0.9, 2.1, 2.2, 3.4, 1.6, 2.4, 2.3, 2.7, 0.0, 0.0], + "C": [1.2, 2.8, 1.8, 3.0, 0.0, 0.0, 1.9, 3.1, 3.0, 3.0], + }, + index=pd.MultiIndex.from_product( + [[0, 1, 2, 3, 4], [0.3, 0.7]], names=["A", None] + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_quantile_array_no_sort(): + df = pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]}) + result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75]) + expected = pd.DataFrame( + {"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]}, + index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]), + ) + tm.assert_frame_equal(result, expected) + + result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25]) + expected = pd.DataFrame( + {"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]}, + index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_quantile_array_multiple_levels(): + df = pd.DataFrame( + {"A": [0, 1, 2], "B": [3, 4, 5], "c": ["a", "a", "a"], "d": ["a", "a", "b"]} + ) + result = df.groupby(["c", "d"]).quantile([0.25, 0.75]) + index = pd.MultiIndex.from_tuples( + [("a", "a", 0.25), ("a", "a", 0.75), ("a", "b", 0.25), ("a", "b", 0.75)], + names=["c", "d", None], + ) + expected = pd.DataFrame( + {"A": [0.25, 0.75, 2.0, 2.0], "B": [3.25, 3.75, 5.0, 5.0]}, index=index + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("frame_size", [(2, 3), (100, 10)]) +@pytest.mark.parametrize("groupby", [[0], [0, 1]]) +@pytest.mark.parametrize("q", [[0.5, 0.6]]) +def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, q): + # GH30289 + nrow, ncol = frame_size + df = pd.DataFrame( + np.array([ncol * [_ % 4] for _ in range(nrow)]), columns=range(ncol) + ) + + idx_levels = [list(range(min(nrow, 4)))] * len(groupby) + [q] + idx_codes = [[x for x in range(min(nrow, 4)) for _ in q]] * len(groupby) + [ + list(range(len(q))) * min(nrow, 4) + ] + expected_index = pd.MultiIndex( + levels=idx_levels, codes=idx_codes, names=groupby + [None] + ) + expected_values = [ + [float(x)] * (ncol - len(groupby)) for x in range(min(nrow, 4)) for _ in q + ] + expected_columns = [x for x in range(ncol) if x not in groupby] + expected = pd.DataFrame( + expected_values, index=expected_index, columns=expected_columns + ) + result = df.groupby(groupby).quantile(q) + + tm.assert_frame_equal(result, expected) + + +def test_quantile_raises(): + df = pd.DataFrame( + [["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"] + ) + + with pytest.raises(TypeError, match="cannot be performed against 'object' dtypes"): + df.groupby("key").quantile() + + +def test_quantile_out_of_bounds_q_raises(): + # https://github.com/pandas-dev/pandas/issues/27470 + df = pd.DataFrame(dict(a=[0, 0, 0, 1, 1, 1], b=range(6))) + g = df.groupby([0, 0, 0, 1, 1, 1]) + with pytest.raises(ValueError, match="Got '50.0' instead"): + g.quantile(50) + + with pytest.raises(ValueError, match="Got '-1.0' instead"): + g.quantile(-1) + + +def test_quantile_missing_group_values_no_segfaults(): + # GH 28662 + data = np.array([1.0, np.nan, 1.0]) + df = pd.DataFrame(dict(key=data, val=range(3))) + + # Random segfaults; would have been guaranteed in loop + grp = df.groupby("key") + for _ in range(100): + grp.quantile() + + +@pytest.mark.parametrize( + "key, val, expected_key, expected_val", + [ + ([1.0, np.nan, 3.0, np.nan], range(4), [1.0, 3.0], [0.0, 2.0]), + ([1.0, np.nan, 2.0, 2.0], range(4), [1.0, 2.0], [0.0, 2.5]), + (["a", "b", "b", np.nan], range(4), ["a", "b"], [0, 1.5]), + ([0], [42], [0], [42.0]), + ([], [], np.array([], dtype="float64"), np.array([], dtype="float64")), + ], +) +def test_quantile_missing_group_values_correct_results( + key, val, expected_key, expected_val +): + # GH 28662, GH 33200, GH 33569 + df = pd.DataFrame({"key": key, "val": val}) + + expected = pd.DataFrame( + expected_val, index=pd.Index(expected_key, name="key"), columns=["val"] + ) + + grp = df.groupby("key") + + result = grp.quantile(0.5) + tm.assert_frame_equal(result, expected) + + result = grp.quantile() + tm.assert_frame_equal(result, expected) + + +# pipe +# -------------------------------- + + +def test_pipe(): + # Test the pipe method of DataFrameGroupBy. + # Issue #17871 + + random_state = np.random.RandomState(1234567890) + + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": random_state.randn(8), + "C": random_state.randn(8), + } + ) + + def f(dfgb): + return dfgb.B.max() - dfgb.C.min().min() + + def square(srs): + return srs ** 2 + + # Note that the transformations are + # GroupBy -> Series + # Series -> Series + # This then chains the GroupBy.pipe and the + # NDFrame.pipe methods + result = df.groupby("A").pipe(f).pipe(square) + + index = Index(["bar", "foo"], dtype="object", name="A") + expected = pd.Series([8.99110003361, 8.17516964785], name="B", index=index) + + tm.assert_series_equal(expected, result) + + +def test_pipe_args(): + # Test passing args to the pipe method of DataFrameGroupBy. + # Issue #17871 + + df = pd.DataFrame( + { + "group": ["A", "A", "B", "B", "C"], + "x": [1.0, 2.0, 3.0, 2.0, 5.0], + "y": [10.0, 100.0, 1000.0, -100.0, -1000.0], + } + ) + + def f(dfgb, arg1): + return dfgb.filter(lambda grp: grp.y.mean() > arg1, dropna=False).groupby( + dfgb.grouper + ) + + def g(dfgb, arg2): + return dfgb.sum() / dfgb.sum().sum() + arg2 + + def h(df, arg3): + return df.x + df.y - arg3 + + result = df.groupby("group").pipe(f, 0).pipe(g, 10).pipe(h, 100) + + # Assert the results here + index = pd.Index(["A", "B", "C"], name="group") + expected = pd.Series([-79.5160891089, -78.4839108911, -80], index=index) + + tm.assert_series_equal(expected, result) + + # test SeriesGroupby.pipe + ser = pd.Series([1, 1, 2, 2, 3, 3]) + result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count()) + + expected = pd.Series([4, 8, 12], index=pd.Int64Index([1, 2, 3])) + + tm.assert_series_equal(result, expected) + + +def test_groupby_mean_no_overflow(): + # Regression test for (#22487) + df = pd.DataFrame( + { + "user": ["A", "A", "A", "A", "A"], + "connections": [4970, 4749, 4719, 4704, 18446744073699999744], + } + ) + assert df.groupby("user")["connections"].mean()["A"] == 3689348814740003840 + + +@pytest.mark.parametrize( + "values", + [ + { + "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], + }, + {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, + ], +) +@pytest.mark.parametrize("function", ["mean", "median", "var"]) +def test_apply_to_nullable_integer_returns_float(values, function): + # https://github.com/pandas-dev/pandas/issues/32219 + output = 0.5 if function == "var" else 1.5 + arr = np.array([output] * 3, dtype=float) + idx = pd.Index([1, 2, 3], dtype=object, name="a") + expected = pd.DataFrame({"b": arr}, index=idx) + + groups = pd.DataFrame(values, dtype="Int64").groupby("a") + + result = getattr(groups, function)() + tm.assert_frame_equal(result, expected) + + result = groups.agg(function) + tm.assert_frame_equal(result, expected) + + result = groups.agg([function]) + expected.columns = MultiIndex.from_tuples([("b", function)]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py new file mode 100644 index 00000000..7e374811 --- /dev/null +++ b/pandas/tests/groupby/test_groupby.py @@ -0,0 +1,2032 @@ +from datetime import datetime +from decimal import Decimal +from io import StringIO + +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, read_csv +import pandas._testing as tm +from pandas.core.base import SpecificationError +import pandas.core.common as com + + +def test_repr(): + # GH18203 + result = repr(pd.Grouper(key="A", level="B")) + expected = "Grouper(key='A', level='B', axis=0, sort=False)" + assert result == expected + + +@pytest.mark.parametrize("dtype", ["int64", "int32", "float64", "float32"]) +def test_basic(dtype): + + data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype) + + index = np.arange(9) + np.random.shuffle(index) + data = data.reindex(index) + + grouped = data.groupby(lambda x: x // 3) + + for k, v in grouped: + assert len(v) == 3 + + agged = grouped.aggregate(np.mean) + assert agged[1] == 1 + + tm.assert_series_equal(agged, grouped.agg(np.mean)) # shorthand + tm.assert_series_equal(agged, grouped.mean()) + tm.assert_series_equal(grouped.agg(np.sum), grouped.sum()) + + expected = grouped.apply(lambda x: x * x.sum()) + transformed = grouped.transform(lambda x: x * x.sum()) + assert transformed[7] == 12 + tm.assert_series_equal(transformed, expected) + + value_grouped = data.groupby(data) + tm.assert_series_equal( + value_grouped.aggregate(np.mean), agged, check_index_type=False + ) + + # complex agg + agged = grouped.aggregate([np.mean, np.std]) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + grouped.aggregate({"one": np.mean, "two": np.std}) + + group_constants = {0: 10, 1: 20, 2: 30} + agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) + assert agged[1] == 21 + + # corner cases + msg = "Must produce aggregated value" + # exception raised is type Exception + with pytest.raises(Exception, match=msg): + grouped.aggregate(lambda x: x * 2) + + +def test_groupby_nonobject_dtype(mframe, df_mixed_floats): + key = mframe.index.codes[0] + grouped = mframe.groupby(key) + result = grouped.sum() + + expected = mframe.groupby(key.astype("O")).sum() + tm.assert_frame_equal(result, expected) + + # GH 3911, mixed frame non-conversion + df = df_mixed_floats.copy() + df["value"] = range(len(df)) + + def max_value(group): + return group.loc[group["value"].idxmax()] + + applied = df.groupby("A").apply(max_value) + result = applied.dtypes + expected = Series( + [np.dtype("object")] * 2 + [np.dtype("float64")] * 2 + [np.dtype("int64")], + index=["A", "B", "C", "D", "value"], + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_return_type(): + + # GH2893, return a reduced type + df1 = DataFrame( + [ + {"val1": 1, "val2": 20}, + {"val1": 1, "val2": 19}, + {"val1": 2, "val2": 27}, + {"val1": 2, "val2": 12}, + ] + ) + + def func(dataf): + return dataf["val2"] - dataf["val2"].mean() + + result = df1.groupby("val1", squeeze=True).apply(func) + assert isinstance(result, Series) + + df2 = DataFrame( + [ + {"val1": 1, "val2": 20}, + {"val1": 1, "val2": 19}, + {"val1": 1, "val2": 27}, + {"val1": 1, "val2": 12}, + ] + ) + + def func(dataf): + return dataf["val2"] - dataf["val2"].mean() + + result = df2.groupby("val1", squeeze=True).apply(func) + assert isinstance(result, Series) + + # GH3596, return a consistent type (regression in 0.11 from 0.10.1) + df = DataFrame([[1, 1], [1, 1]], columns=["X", "Y"]) + result = df.groupby("X", squeeze=False).count() + assert isinstance(result, DataFrame) + + +def test_inconsistent_return_type(): + # GH5592 + # inconsistent return type + df = DataFrame( + dict( + A=["Tiger", "Tiger", "Tiger", "Lamb", "Lamb", "Pony", "Pony"], + B=Series(np.arange(7), dtype="int64"), + C=date_range("20130101", periods=7), + ) + ) + + def f(grp): + return grp.iloc[0] + + expected = df.groupby("A").first()[["B"]] + result = df.groupby("A").apply(f)[["B"]] + tm.assert_frame_equal(result, expected) + + def f(grp): + if grp.name == "Tiger": + return None + return grp.iloc[0] + + result = df.groupby("A").apply(f)[["B"]] + e = expected.copy() + e.loc["Tiger"] = np.nan + tm.assert_frame_equal(result, e) + + def f(grp): + if grp.name == "Pony": + return None + return grp.iloc[0] + + result = df.groupby("A").apply(f)[["B"]] + e = expected.copy() + e.loc["Pony"] = np.nan + tm.assert_frame_equal(result, e) + + # 5592 revisited, with datetimes + def f(grp): + if grp.name == "Pony": + return None + return grp.iloc[0] + + result = df.groupby("A").apply(f)[["C"]] + e = df.groupby("A").first()[["C"]] + e.loc["Pony"] = pd.NaT + tm.assert_frame_equal(result, e) + + # scalar outputs + def f(grp): + if grp.name == "Pony": + return None + return grp.iloc[0].loc["C"] + + result = df.groupby("A").apply(f) + e = df.groupby("A").first()["C"].copy() + e.loc["Pony"] = np.nan + e.name = None + tm.assert_series_equal(result, e) + + +def test_pass_args_kwargs(ts, tsframe): + def f(x, q=None, axis=0): + return np.percentile(x, q, axis=axis) + + g = lambda x: np.percentile(x, 80, axis=0) + + # Series + ts_grouped = ts.groupby(lambda x: x.month) + agg_result = ts_grouped.agg(np.percentile, 80, axis=0) + apply_result = ts_grouped.apply(np.percentile, 80, axis=0) + trans_result = ts_grouped.transform(np.percentile, 80, axis=0) + + agg_expected = ts_grouped.quantile(0.8) + trans_expected = ts_grouped.transform(g) + + tm.assert_series_equal(apply_result, agg_expected) + tm.assert_series_equal(agg_result, agg_expected) + tm.assert_series_equal(trans_result, trans_expected) + + agg_result = ts_grouped.agg(f, q=80) + apply_result = ts_grouped.apply(f, q=80) + trans_result = ts_grouped.transform(f, q=80) + tm.assert_series_equal(agg_result, agg_expected) + tm.assert_series_equal(apply_result, agg_expected) + tm.assert_series_equal(trans_result, trans_expected) + + # DataFrame + df_grouped = tsframe.groupby(lambda x: x.month) + agg_result = df_grouped.agg(np.percentile, 80, axis=0) + apply_result = df_grouped.apply(DataFrame.quantile, 0.8) + expected = df_grouped.quantile(0.8) + tm.assert_frame_equal(apply_result, expected, check_names=False) + tm.assert_frame_equal(agg_result, expected) + + agg_result = df_grouped.agg(f, q=80) + apply_result = df_grouped.apply(DataFrame.quantile, q=0.8) + tm.assert_frame_equal(agg_result, expected) + tm.assert_frame_equal(apply_result, expected, check_names=False) + + +def test_len(): + df = tm.makeTimeDataFrame() + grouped = df.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]) + assert len(grouped) == len(df) + + grouped = df.groupby([lambda x: x.year, lambda x: x.month]) + expected = len({(x.year, x.month) for x in df.index}) + assert len(grouped) == expected + + # issue 11016 + df = pd.DataFrame(dict(a=[np.nan] * 3, b=[1, 2, 3])) + assert len(df.groupby(("a"))) == 0 + assert len(df.groupby(("b"))) == 3 + assert len(df.groupby(["a", "b"])) == 3 + + +def test_basic_regression(): + # regression + result = Series([1.0 * x for x in list(range(1, 10)) * 10]) + + data = np.random.random(1100) * 10.0 + groupings = Series(data) + + grouped = result.groupby(groupings) + grouped.mean() + + +@pytest.mark.parametrize( + "dtype", ["float64", "float32", "int64", "int32", "int16", "int8"] +) +def test_with_na_groups(dtype): + index = Index(np.arange(10)) + values = Series(np.ones(10), index, dtype=dtype) + labels = Series( + [np.nan, "foo", "bar", "bar", np.nan, np.nan, "bar", "bar", np.nan, "foo"], + index=index, + ) + + # this SHOULD be an int + grouped = values.groupby(labels) + agged = grouped.agg(len) + expected = Series([4, 2], index=["bar", "foo"]) + + tm.assert_series_equal(agged, expected, check_dtype=False) + + # assert issubclass(agged.dtype.type, np.integer) + + # explicitly return a float from my function + def f(x): + return float(len(x)) + + agged = grouped.agg(f) + expected = Series([4, 2], index=["bar", "foo"]) + + tm.assert_series_equal(agged, expected, check_dtype=False) + assert issubclass(agged.dtype.type, np.dtype(dtype).type) + + +def test_indices_concatenation_order(): + + # GH 2808 + + def f1(x): + y = x[(x.b % 2) == 1] ** 2 + if y.empty: + multiindex = MultiIndex(levels=[[]] * 2, codes=[[]] * 2, names=["b", "c"]) + res = DataFrame(columns=["a"], index=multiindex) + return res + else: + y = y.set_index(["b", "c"]) + return y + + def f2(x): + y = x[(x.b % 2) == 1] ** 2 + if y.empty: + return DataFrame() + else: + y = y.set_index(["b", "c"]) + return y + + def f3(x): + y = x[(x.b % 2) == 1] ** 2 + if y.empty: + multiindex = MultiIndex( + levels=[[]] * 2, codes=[[]] * 2, names=["foo", "bar"] + ) + res = DataFrame(columns=["a", "b"], index=multiindex) + return res + else: + return y + + df = DataFrame({"a": [1, 2, 2, 2], "b": range(4), "c": range(5, 9)}) + + df2 = DataFrame({"a": [3, 2, 2, 2], "b": range(4), "c": range(5, 9)}) + + # correct result + result1 = df.groupby("a").apply(f1) + result2 = df2.groupby("a").apply(f1) + tm.assert_frame_equal(result1, result2) + + # should fail (not the same number of levels) + msg = "Cannot concat indices that do not have the same number of levels" + with pytest.raises(AssertionError, match=msg): + df.groupby("a").apply(f2) + with pytest.raises(AssertionError, match=msg): + df2.groupby("a").apply(f2) + + # should fail (incorrect shape) + with pytest.raises(AssertionError, match=msg): + df.groupby("a").apply(f3) + with pytest.raises(AssertionError, match=msg): + df2.groupby("a").apply(f3) + + +def test_attr_wrapper(ts): + grouped = ts.groupby(lambda x: x.weekday()) + + result = grouped.std() + expected = grouped.agg(lambda x: np.std(x, ddof=1)) + tm.assert_series_equal(result, expected) + + # this is pretty cool + result = grouped.describe() + expected = {name: gp.describe() for name, gp in grouped} + expected = DataFrame(expected).T + tm.assert_frame_equal(result, expected) + + # get attribute + result = grouped.dtype + expected = grouped.agg(lambda x: x.dtype) + + # make sure raises error + msg = "'SeriesGroupBy' object has no attribute 'foo'" + with pytest.raises(AttributeError, match=msg): + getattr(grouped, "foo") + + +def test_frame_groupby(tsframe): + grouped = tsframe.groupby(lambda x: x.weekday()) + + # aggregate + aggregated = grouped.aggregate(np.mean) + assert len(aggregated) == 5 + assert len(aggregated.columns) == 4 + + # by string + tscopy = tsframe.copy() + tscopy["weekday"] = [x.weekday() for x in tscopy.index] + stragged = tscopy.groupby("weekday").aggregate(np.mean) + tm.assert_frame_equal(stragged, aggregated, check_names=False) + + # transform + grouped = tsframe.head(30).groupby(lambda x: x.weekday()) + transformed = grouped.transform(lambda x: x - x.mean()) + assert len(transformed) == 30 + assert len(transformed.columns) == 4 + + # transform propagate + transformed = grouped.transform(lambda x: x.mean()) + for name, group in grouped: + mean = group.mean() + for idx in group.index: + tm.assert_series_equal(transformed.xs(idx), mean, check_names=False) + + # iterate + for weekday, group in grouped: + assert group.index[0].weekday() == weekday + + # groups / group_indices + groups = grouped.groups + indices = grouped.indices + + for k, v in groups.items(): + samething = tsframe.index.take(indices[k]) + assert (samething == v).all() + + +def test_frame_groupby_columns(tsframe): + mapping = {"A": 0, "B": 0, "C": 1, "D": 1} + grouped = tsframe.groupby(mapping, axis=1) + + # aggregate + aggregated = grouped.aggregate(np.mean) + assert len(aggregated) == len(tsframe) + assert len(aggregated.columns) == 2 + + # transform + tf = lambda x: x - x.mean() + groupedT = tsframe.T.groupby(mapping, axis=0) + tm.assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf)) + + # iterate + for k, v in grouped: + assert len(v.columns) == 2 + + +def test_frame_set_name_single(df): + grouped = df.groupby("A") + + result = grouped.mean() + assert result.index.name == "A" + + result = df.groupby("A", as_index=False).mean() + assert result.index.name != "A" + + result = grouped.agg(np.mean) + assert result.index.name == "A" + + result = grouped.agg({"C": np.mean, "D": np.std}) + assert result.index.name == "A" + + result = grouped["C"].mean() + assert result.index.name == "A" + result = grouped["C"].agg(np.mean) + assert result.index.name == "A" + result = grouped["C"].agg([np.mean, np.std]) + assert result.index.name == "A" + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + grouped["C"].agg({"foo": np.mean, "bar": np.std}) + + +def test_multi_func(df): + col1 = df["A"] + col2 = df["B"] + + grouped = df.groupby([col1.get, col2.get]) + agged = grouped.mean() + expected = df.groupby(["A", "B"]).mean() + + # TODO groupby get drops names + tm.assert_frame_equal( + agged.loc[:, ["C", "D"]], expected.loc[:, ["C", "D"]], check_names=False + ) + + # some "groups" with no data + df = DataFrame( + { + "v1": np.random.randn(6), + "v2": np.random.randn(6), + "k1": np.array(["b", "b", "b", "a", "a", "a"]), + "k2": np.array(["1", "1", "1", "2", "2", "2"]), + }, + index=["one", "two", "three", "four", "five", "six"], + ) + # only verify that it works for now + grouped = df.groupby(["k1", "k2"]) + grouped.agg(np.sum) + + +def test_multi_key_multiple_functions(df): + grouped = df.groupby(["A", "B"])["C"] + + agged = grouped.agg([np.mean, np.std]) + expected = DataFrame({"mean": grouped.agg(np.mean), "std": grouped.agg(np.std)}) + tm.assert_frame_equal(agged, expected) + + +def test_frame_multi_key_function_list(): + data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + grouped = data.groupby(["A", "B"]) + funcs = [np.mean, np.std] + agged = grouped.agg(funcs) + expected = pd.concat( + [grouped["D"].agg(funcs), grouped["E"].agg(funcs), grouped["F"].agg(funcs)], + keys=["D", "E", "F"], + axis=1, + ) + assert isinstance(agged.index, MultiIndex) + assert isinstance(expected.index, MultiIndex) + tm.assert_frame_equal(agged, expected) + + +@pytest.mark.parametrize("op", [lambda x: x.sum(), lambda x: x.mean()]) +def test_groupby_multiple_columns(df, op): + data = df + grouped = data.groupby(["A", "B"]) + + result1 = op(grouped) + + keys = [] + values = [] + for n1, gp1 in data.groupby("A"): + for n2, gp2 in gp1.groupby("B"): + keys.append((n1, n2)) + values.append(op(gp2.loc[:, ["C", "D"]])) + + mi = MultiIndex.from_tuples(keys, names=["A", "B"]) + expected = pd.concat(values, axis=1).T + expected.index = mi + + # a little bit crude + for col in ["C", "D"]: + result_col = op(grouped[col]) + pivoted = result1[col] + exp = expected[col] + tm.assert_series_equal(result_col, exp) + tm.assert_series_equal(pivoted, exp) + + # test single series works the same + result = data["C"].groupby([data["A"], data["B"]]).mean() + expected = data.groupby(["A", "B"]).mean()["C"] + + tm.assert_series_equal(result, expected) + + +def test_as_index_select_column(): + # GH 5764 + df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + result = df.groupby("A", as_index=False)["B"].get_group(1) + expected = pd.Series([2, 4], name="B") + tm.assert_series_equal(result, expected) + + result = df.groupby("A", as_index=False)["B"].apply(lambda x: x.cumsum()) + expected = pd.Series( + [2, 6, 6], name="B", index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 2)]) + ) + tm.assert_series_equal(result, expected) + + +def test_groupby_as_index_agg(df): + grouped = df.groupby("A", as_index=False) + + # single-key + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + result2 = grouped.agg({"C": np.mean, "D": np.sum}) + expected2 = grouped.mean() + expected2["D"] = grouped.sum()["D"] + tm.assert_frame_equal(result2, expected2) + + grouped = df.groupby("A", as_index=True) + + msg = r"nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + grouped["C"].agg({"Q": np.sum}) + + # multi-key + + grouped = df.groupby(["A", "B"], as_index=False) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + result2 = grouped.agg({"C": np.mean, "D": np.sum}) + expected2 = grouped.mean() + expected2["D"] = grouped.sum()["D"] + tm.assert_frame_equal(result2, expected2) + + expected3 = grouped["C"].sum() + expected3 = DataFrame(expected3).rename(columns={"C": "Q"}) + result3 = grouped["C"].agg({"Q": np.sum}) + tm.assert_frame_equal(result3, expected3) + + # GH7115 & GH8112 & GH8582 + df = DataFrame(np.random.randint(0, 100, (50, 3)), columns=["jim", "joe", "jolie"]) + ts = Series(np.random.randint(5, 10, 50), name="jim") + + gr = df.groupby(ts) + gr.nth(0) # invokes set_selection_from_grouper internally + tm.assert_frame_equal(gr.apply(sum), df.groupby(ts).apply(sum)) + + for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]: + gr = df.groupby(ts, as_index=False) + left = getattr(gr, attr)() + + gr = df.groupby(ts.values, as_index=True) + right = getattr(gr, attr)().reset_index(drop=True) + + tm.assert_frame_equal(left, right) + + +def test_as_index_series_return_frame(df): + grouped = df.groupby("A", as_index=False) + grouped2 = df.groupby(["A", "B"], as_index=False) + + result = grouped["C"].agg(np.sum) + expected = grouped.agg(np.sum).loc[:, ["A", "C"]] + assert isinstance(result, DataFrame) + tm.assert_frame_equal(result, expected) + + result2 = grouped2["C"].agg(np.sum) + expected2 = grouped2.agg(np.sum).loc[:, ["A", "B", "C"]] + assert isinstance(result2, DataFrame) + tm.assert_frame_equal(result2, expected2) + + result = grouped["C"].sum() + expected = grouped.sum().loc[:, ["A", "C"]] + assert isinstance(result, DataFrame) + tm.assert_frame_equal(result, expected) + + result2 = grouped2["C"].sum() + expected2 = grouped2.sum().loc[:, ["A", "B", "C"]] + assert isinstance(result2, DataFrame) + tm.assert_frame_equal(result2, expected2) + + +def test_as_index_series_column_slice_raises(df): + # GH15072 + grouped = df.groupby("A", as_index=False) + msg = r"Column\(s\) C already selected" + + with pytest.raises(IndexError, match=msg): + grouped["C"].__getitem__("D") + + +def test_groupby_as_index_cython(df): + data = df + + # single-key + grouped = data.groupby("A", as_index=False) + result = grouped.mean() + expected = data.groupby(["A"]).mean() + expected.insert(0, "A", expected.index) + expected.index = np.arange(len(expected)) + tm.assert_frame_equal(result, expected) + + # multi-key + grouped = data.groupby(["A", "B"], as_index=False) + result = grouped.mean() + expected = data.groupby(["A", "B"]).mean() + + arrays = list(zip(*expected.index.values)) + expected.insert(0, "A", arrays[0]) + expected.insert(1, "B", arrays[1]) + expected.index = np.arange(len(expected)) + tm.assert_frame_equal(result, expected) + + +def test_groupby_as_index_series_scalar(df): + grouped = df.groupby(["A", "B"], as_index=False) + + # GH #421 + + result = grouped["C"].agg(len) + expected = grouped.agg(len).loc[:, ["A", "B", "C"]] + tm.assert_frame_equal(result, expected) + + +def test_groupby_as_index_corner(df, ts): + msg = "as_index=False only valid with DataFrame" + with pytest.raises(TypeError, match=msg): + ts.groupby(lambda x: x.weekday(), as_index=False) + + msg = "as_index=False only valid for axis=0" + with pytest.raises(ValueError, match=msg): + df.groupby(lambda x: x.lower(), as_index=False, axis=1) + + +def test_groupby_multiple_key(df): + df = tm.makeTimeDataFrame() + grouped = df.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]) + agged = grouped.sum() + tm.assert_almost_equal(df.values, agged.values) + + grouped = df.T.groupby( + [lambda x: x.year, lambda x: x.month, lambda x: x.day], axis=1 + ) + + agged = grouped.agg(lambda x: x.sum()) + tm.assert_index_equal(agged.index, df.columns) + tm.assert_almost_equal(df.T.values, agged.values) + + agged = grouped.agg(lambda x: x.sum()) + tm.assert_almost_equal(df.T.values, agged.values) + + +def test_groupby_multi_corner(df): + # test that having an all-NA column doesn't mess you up + df = df.copy() + df["bad"] = np.nan + agged = df.groupby(["A", "B"]).mean() + + expected = df.groupby(["A", "B"]).mean() + expected["bad"] = np.nan + + tm.assert_frame_equal(agged, expected) + + +def test_omit_nuisance(df): + grouped = df.groupby("A") + + result = grouped.mean() + expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean() + tm.assert_frame_equal(result, expected) + + agged = grouped.agg(np.mean) + exp = grouped.mean() + tm.assert_frame_equal(agged, exp) + + df = df.loc[:, ["A", "C", "D"]] + df["E"] = datetime.now() + grouped = df.groupby("A") + result = grouped.agg(np.sum) + expected = grouped.sum() + tm.assert_frame_equal(result, expected) + + # won't work with axis = 1 + grouped = df.groupby({"A": 0, "C": 0, "D": 1, "E": 1}, axis=1) + msg = "reduction operation 'sum' not allowed for this dtype" + with pytest.raises(TypeError, match=msg): + grouped.agg(lambda x: x.sum(0, numeric_only=False)) + + +def test_omit_nuisance_python_multiple(three_group): + grouped = three_group.groupby(["A", "B"]) + + agged = grouped.agg(np.mean) + exp = grouped.mean() + tm.assert_frame_equal(agged, exp) + + +def test_empty_groups_corner(mframe): + # handle empty groups + df = DataFrame( + { + "k1": np.array(["b", "b", "b", "a", "a", "a"]), + "k2": np.array(["1", "1", "1", "2", "2", "2"]), + "k3": ["foo", "bar"] * 3, + "v1": np.random.randn(6), + "v2": np.random.randn(6), + } + ) + + grouped = df.groupby(["k1", "k2"]) + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + grouped = mframe[3:5].groupby(level=0) + agged = grouped.apply(lambda x: x.mean()) + agged_A = grouped["A"].apply(np.mean) + tm.assert_series_equal(agged["A"], agged_A) + assert agged.index.name == "first" + + +def test_nonsense_func(): + df = DataFrame([0]) + msg = r"unsupported operand type\(s\) for \+: 'int' and 'str'" + with pytest.raises(TypeError, match=msg): + df.groupby(lambda x: x + "foo") + + +def test_wrap_aggregated_output_multindex(mframe): + df = mframe.T + df["baz", "two"] = "peekaboo" + + keys = [np.array([0, 0, 1]), np.array([0, 0, 1])] + agged = df.groupby(keys).agg(np.mean) + assert isinstance(agged.columns, MultiIndex) + + def aggfun(ser): + if ser.name == ("foo", "one"): + raise TypeError + else: + return ser.sum() + + agged2 = df.groupby(keys).aggregate(aggfun) + assert len(agged2.columns) + 1 == len(df.columns) + + +def test_groupby_level_apply(mframe): + + result = mframe.groupby(level=0).count() + assert result.index.name == "first" + result = mframe.groupby(level=1).count() + assert result.index.name == "second" + + result = mframe["A"].groupby(level=0).count() + assert result.index.name == "first" + + +def test_groupby_level_mapper(mframe): + deleveled = mframe.reset_index() + + mapper0 = {"foo": 0, "bar": 0, "baz": 1, "qux": 1} + mapper1 = {"one": 0, "two": 0, "three": 1} + + result0 = mframe.groupby(mapper0, level=0).sum() + result1 = mframe.groupby(mapper1, level=1).sum() + + mapped_level0 = np.array([mapper0.get(x) for x in deleveled["first"]]) + mapped_level1 = np.array([mapper1.get(x) for x in deleveled["second"]]) + expected0 = mframe.groupby(mapped_level0).sum() + expected1 = mframe.groupby(mapped_level1).sum() + expected0.index.name, expected1.index.name = "first", "second" + + tm.assert_frame_equal(result0, expected0) + tm.assert_frame_equal(result1, expected1) + + +def test_groupby_level_nonmulti(): + # GH 1313, GH 13901 + s = Series([1, 2, 3, 10, 4, 5, 20, 6], Index([1, 2, 3, 1, 4, 5, 2, 6], name="foo")) + expected = Series([11, 22, 3, 4, 5, 6], Index(range(1, 7), name="foo")) + + result = s.groupby(level=0).sum() + tm.assert_series_equal(result, expected) + result = s.groupby(level=[0]).sum() + tm.assert_series_equal(result, expected) + result = s.groupby(level=-1).sum() + tm.assert_series_equal(result, expected) + result = s.groupby(level=[-1]).sum() + tm.assert_series_equal(result, expected) + + msg = "level > 0 or level < -1 only valid with MultiIndex" + with pytest.raises(ValueError, match=msg): + s.groupby(level=1) + with pytest.raises(ValueError, match=msg): + s.groupby(level=-2) + msg = "No group keys passed!" + with pytest.raises(ValueError, match=msg): + s.groupby(level=[]) + msg = "multiple levels only valid with MultiIndex" + with pytest.raises(ValueError, match=msg): + s.groupby(level=[0, 0]) + with pytest.raises(ValueError, match=msg): + s.groupby(level=[0, 1]) + msg = "level > 0 or level < -1 only valid with MultiIndex" + with pytest.raises(ValueError, match=msg): + s.groupby(level=[1]) + + +def test_groupby_complex(): + # GH 12902 + a = Series(data=np.arange(4) * (1 + 2j), index=[0, 0, 1, 1]) + expected = Series((1 + 2j, 5 + 10j)) + + result = a.groupby(level=0).sum() + tm.assert_series_equal(result, expected) + + result = a.sum(level=0) + tm.assert_series_equal(result, expected) + + +def test_mutate_groups(): + + # GH3380 + + df = DataFrame( + { + "cat1": ["a"] * 8 + ["b"] * 6, + "cat2": ["c"] * 2 + + ["d"] * 2 + + ["e"] * 2 + + ["f"] * 2 + + ["c"] * 2 + + ["d"] * 2 + + ["e"] * 2, + "cat3": [f"g{x}" for x in range(1, 15)], + "val": np.random.randint(100, size=14), + } + ) + + def f_copy(x): + x = x.copy() + x["rank"] = x.val.rank(method="min") + return x.groupby("cat2")["rank"].min() + + def f_no_copy(x): + x["rank"] = x.val.rank(method="min") + return x.groupby("cat2")["rank"].min() + + grpby_copy = df.groupby("cat1").apply(f_copy) + grpby_no_copy = df.groupby("cat1").apply(f_no_copy) + tm.assert_series_equal(grpby_copy, grpby_no_copy) + + +def test_no_mutate_but_looks_like(): + + # GH 8467 + # first show's mutation indicator + # second does not, but should yield the same results + df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)}) + + result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].key) + result2 = df.groupby("key", group_keys=True).apply(lambda x: x.key) + tm.assert_series_equal(result1, result2) + + +def test_groupby_series_indexed_differently(): + s1 = Series( + [5.0, -9.0, 4.0, 100.0, -5.0, 55.0, 6.7], + index=Index(["a", "b", "c", "d", "e", "f", "g"]), + ) + s2 = Series( + [1.0, 1.0, 4.0, 5.0, 5.0, 7.0], index=Index(["a", "b", "d", "f", "g", "h"]) + ) + + grouped = s1.groupby(s2) + agged = grouped.mean() + exp = s1.groupby(s2.reindex(s1.index).get).mean() + tm.assert_series_equal(agged, exp) + + +def test_groupby_with_hier_columns(): + tuples = list( + zip( + *[ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + ) + ) + index = MultiIndex.from_tuples(tuples) + columns = MultiIndex.from_tuples( + [("A", "cat"), ("B", "dog"), ("B", "cat"), ("A", "dog")] + ) + df = DataFrame(np.random.randn(8, 4), index=index, columns=columns) + + result = df.groupby(level=0).mean() + tm.assert_index_equal(result.columns, columns) + + result = df.groupby(level=0, axis=1).mean() + tm.assert_index_equal(result.index, df.index) + + result = df.groupby(level=0).agg(np.mean) + tm.assert_index_equal(result.columns, columns) + + result = df.groupby(level=0).apply(lambda x: x.mean()) + tm.assert_index_equal(result.columns, columns) + + result = df.groupby(level=0, axis=1).agg(lambda x: x.mean(1)) + tm.assert_index_equal(result.columns, Index(["A", "B"])) + tm.assert_index_equal(result.index, df.index) + + # add a nuisance column + sorted_columns, _ = columns.sortlevel(0) + df["A", "foo"] = "bar" + result = df.groupby(level=0).mean() + tm.assert_index_equal(result.columns, df.columns[:-1]) + + +def test_grouping_ndarray(df): + grouped = df.groupby(df["A"].values) + + result = grouped.sum() + expected = df.groupby("A").sum() + tm.assert_frame_equal( + result, expected, check_names=False + ) # Note: no names when grouping by value + + +def test_groupby_wrong_multi_labels(): + data = """index,foo,bar,baz,spam,data +0,foo1,bar1,baz1,spam2,20 +1,foo1,bar2,baz1,spam3,30 +2,foo2,bar2,baz1,spam2,40 +3,foo1,bar1,baz2,spam1,50 +4,foo3,bar1,baz2,spam1,60""" + + data = read_csv(StringIO(data), index_col=0) + + grouped = data.groupby(["foo", "bar", "baz", "spam"]) + + result = grouped.agg(np.mean) + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + +def test_groupby_series_with_name(df): + result = df.groupby(df["A"]).mean() + result2 = df.groupby(df["A"], as_index=False).mean() + assert result.index.name == "A" + assert "A" in result2 + + result = df.groupby([df["A"], df["B"]]).mean() + result2 = df.groupby([df["A"], df["B"]], as_index=False).mean() + assert result.index.names == ("A", "B") + assert "A" in result2 + assert "B" in result2 + + +def test_seriesgroupby_name_attr(df): + # GH 6265 + result = df.groupby("A")["C"] + assert result.count().name == "C" + assert result.mean().name == "C" + + testFunc = lambda x: np.sum(x) * 2 + assert result.agg(testFunc).name == "C" + + +def test_consistency_name(): + # GH 12363 + + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "two", "two", "two", "one", "two"], + "C": np.random.randn(8) + 1.0, + "D": np.arange(8), + } + ) + + expected = df.groupby(["A"]).B.count() + result = df.B.groupby(df.A).count() + tm.assert_series_equal(result, expected) + + +def test_groupby_name_propagation(df): + # GH 6124 + def summarize(df, name=None): + return Series({"count": 1, "mean": 2, "omissions": 3}, name=name) + + def summarize_random_name(df): + # Provide a different name for each Series. In this case, groupby + # should not attempt to propagate the Series name since they are + # inconsistent. + return Series({"count": 1, "mean": 2, "omissions": 3}, name=df.iloc[0]["A"]) + + metrics = df.groupby("A").apply(summarize) + assert metrics.columns.name is None + metrics = df.groupby("A").apply(summarize, "metrics") + assert metrics.columns.name == "metrics" + metrics = df.groupby("A").apply(summarize_random_name) + assert metrics.columns.name is None + + +def test_groupby_nonstring_columns(): + df = DataFrame([np.arange(10) for x in range(10)]) + grouped = df.groupby(0) + result = grouped.mean() + expected = df.groupby(df[0]).mean() + tm.assert_frame_equal(result, expected) + + +def test_groupby_mixed_type_columns(): + # GH 13432, unorderable types in py3 + df = DataFrame([[0, 1, 2]], columns=["A", "B", 0]) + expected = DataFrame([[1, 2]], columns=["B", 0], index=Index([0], name="A")) + + result = df.groupby("A").first() + tm.assert_frame_equal(result, expected) + + result = df.groupby("A").sum() + tm.assert_frame_equal(result, expected) + + +# TODO: Ensure warning isn't emitted in the first place +@pytest.mark.filterwarnings("ignore:Mean of:RuntimeWarning") +def test_cython_grouper_series_bug_noncontig(): + arr = np.empty((100, 100)) + arr.fill(np.nan) + obj = Series(arr[:, 0]) + inds = np.tile(range(10), 10) + + result = obj.groupby(inds).agg(Series.median) + assert result.isna().all() + + +def test_series_grouper_noncontig_index(): + index = Index(tm.rands_array(10, 100)) + + values = Series(np.random.randn(50), index=index[::2]) + labels = np.random.randint(0, 5, 50) + + # it works! + grouped = values.groupby(labels) + + # accessing the index elements causes segfault + f = lambda x: len(set(map(id, x.index))) + grouped.agg(f) + + +def test_convert_objects_leave_decimal_alone(): + + s = Series(range(5)) + labels = np.array(["a", "b", "c", "d", "e"], dtype="O") + + def convert_fast(x): + return Decimal(str(x.mean())) + + def convert_force_pure(x): + # base will be length 0 + assert len(x.values.base) > 0 + return Decimal(str(x.mean())) + + grouped = s.groupby(labels) + + result = grouped.agg(convert_fast) + assert result.dtype == np.object_ + assert isinstance(result[0], Decimal) + + result = grouped.agg(convert_force_pure) + assert result.dtype == np.object_ + assert isinstance(result[0], Decimal) + + +def test_groupby_dtype_inference_empty(): + # GH 6733 + df = DataFrame({"x": [], "range": np.arange(0, dtype="int64")}) + assert df["x"].dtype == np.float64 + + result = df.groupby("x").first() + exp_index = Index([], name="x", dtype=np.float64) + expected = DataFrame({"range": Series([], index=exp_index, dtype="int64")}) + tm.assert_frame_equal(result, expected, by_blocks=True) + + +def test_groupby_list_infer_array_like(df): + result = df.groupby(list(df["A"])).mean() + expected = df.groupby(df["A"]).mean() + tm.assert_frame_equal(result, expected, check_names=False) + + with pytest.raises(KeyError, match=r"^'foo'$"): + df.groupby(list(df["A"][:-1])) + + # pathological case of ambiguity + df = DataFrame({"foo": [0, 1], "bar": [3, 4], "val": np.random.randn(2)}) + + result = df.groupby(["foo", "bar"]).mean() + expected = df.groupby([df["foo"], df["bar"]]).mean()[["val"]] + + +def test_groupby_keys_same_size_as_index(): + # GH 11185 + freq = "s" + index = pd.date_range( + start=pd.Timestamp("2015-09-29T11:34:44-0700"), periods=2, freq=freq + ) + df = pd.DataFrame([["A", 10], ["B", 15]], columns=["metric", "values"], index=index) + result = df.groupby([pd.Grouper(level=0, freq=freq), "metric"]).mean() + expected = df.set_index([df.index, "metric"]) + + tm.assert_frame_equal(result, expected) + + +def test_groupby_one_row(): + # GH 11741 + msg = r"^'Z'$" + df1 = pd.DataFrame(np.random.randn(1, 4), columns=list("ABCD")) + with pytest.raises(KeyError, match=msg): + df1.groupby("Z") + df2 = pd.DataFrame(np.random.randn(2, 4), columns=list("ABCD")) + with pytest.raises(KeyError, match=msg): + df2.groupby("Z") + + +def test_groupby_nat_exclude(): + # GH 6992 + df = pd.DataFrame( + { + "values": np.random.randn(8), + "dt": [ + np.nan, + pd.Timestamp("2013-01-01"), + np.nan, + pd.Timestamp("2013-02-01"), + np.nan, + pd.Timestamp("2013-02-01"), + np.nan, + pd.Timestamp("2013-01-01"), + ], + "str": [np.nan, "a", np.nan, "a", np.nan, "a", np.nan, "b"], + } + ) + grouped = df.groupby("dt") + + expected = [pd.Index([1, 7]), pd.Index([3, 5])] + keys = sorted(grouped.groups.keys()) + assert len(keys) == 2 + for k, e in zip(keys, expected): + # grouped.groups keys are np.datetime64 with system tz + # not to be affected by tz, only compare values + tm.assert_index_equal(grouped.groups[k], e) + + # confirm obj is not filtered + tm.assert_frame_equal(grouped.grouper.groupings[0].obj, df) + assert grouped.ngroups == 2 + + expected = { + Timestamp("2013-01-01 00:00:00"): np.array([1, 7], dtype=np.int64), + Timestamp("2013-02-01 00:00:00"): np.array([3, 5], dtype=np.int64), + } + + for k in grouped.indices: + tm.assert_numpy_array_equal(grouped.indices[k], expected[k]) + + tm.assert_frame_equal(grouped.get_group(Timestamp("2013-01-01")), df.iloc[[1, 7]]) + tm.assert_frame_equal(grouped.get_group(Timestamp("2013-02-01")), df.iloc[[3, 5]]) + + with pytest.raises(KeyError, match=r"^NaT$"): + grouped.get_group(pd.NaT) + + nan_df = DataFrame( + {"nan": [np.nan, np.nan, np.nan], "nat": [pd.NaT, pd.NaT, pd.NaT]} + ) + assert nan_df["nan"].dtype == "float64" + assert nan_df["nat"].dtype == "datetime64[ns]" + + for key in ["nan", "nat"]: + grouped = nan_df.groupby(key) + assert grouped.groups == {} + assert grouped.ngroups == 0 + assert grouped.indices == {} + with pytest.raises(KeyError, match=r"^nan$"): + grouped.get_group(np.nan) + with pytest.raises(KeyError, match=r"^NaT$"): + grouped.get_group(pd.NaT) + + +def test_groupby_2d_malformed(): + d = DataFrame(index=range(2)) + d["group"] = ["g1", "g2"] + d["zeros"] = [0, 0] + d["ones"] = [1, 1] + d["label"] = ["l1", "l2"] + tmp = d.groupby(["group"]).mean() + res_values = np.array([[0, 1], [0, 1]], dtype=np.int64) + tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) + tm.assert_numpy_array_equal(tmp.values, res_values) + + +def test_int32_overflow(): + B = np.concatenate((np.arange(10000), np.arange(10000), np.arange(5000))) + A = np.arange(25000) + df = DataFrame({"A": A, "B": B, "C": A, "D": B, "E": np.random.randn(25000)}) + + left = df.groupby(["A", "B", "C", "D"]).sum() + right = df.groupby(["D", "C", "B", "A"]).sum() + assert len(left) == len(right) + + +def test_groupby_sort_multi(): + df = DataFrame( + { + "a": ["foo", "bar", "baz"], + "b": [3, 2, 1], + "c": [0, 1, 2], + "d": np.random.randn(3), + } + ) + + tups = [tuple(row) for row in df[["a", "b", "c"]].values] + tups = com.asarray_tuplesafe(tups) + result = df.groupby(["a", "b", "c"], sort=True).sum() + tm.assert_numpy_array_equal(result.index.values, tups[[1, 2, 0]]) + + tups = [tuple(row) for row in df[["c", "a", "b"]].values] + tups = com.asarray_tuplesafe(tups) + result = df.groupby(["c", "a", "b"], sort=True).sum() + tm.assert_numpy_array_equal(result.index.values, tups) + + tups = [tuple(x) for x in df[["b", "c", "a"]].values] + tups = com.asarray_tuplesafe(tups) + result = df.groupby(["b", "c", "a"], sort=True).sum() + tm.assert_numpy_array_equal(result.index.values, tups[[2, 1, 0]]) + + df = DataFrame( + {"a": [0, 1, 2, 0, 1, 2], "b": [0, 0, 0, 1, 1, 1], "d": np.random.randn(6)} + ) + grouped = df.groupby(["a", "b"])["d"] + result = grouped.sum() + + def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): + tups = [tuple(row) for row in df[keys].values] + tups = com.asarray_tuplesafe(tups) + expected = f(df.groupby(tups)[field]) + for k, v in expected.items(): + assert result[k] == v + + _check_groupby(df, result, ["a", "b"], "d") + + +def test_dont_clobber_name_column(): + df = DataFrame( + {"key": ["a", "a", "a", "b", "b", "b"], "name": ["foo", "bar", "baz"] * 2} + ) + + result = df.groupby("key").apply(lambda x: x) + tm.assert_frame_equal(result, df) + + +def test_skip_group_keys(): + + tsf = tm.makeTimeDataFrame() + + grouped = tsf.groupby(lambda x: x.month, group_keys=False) + result = grouped.apply(lambda x: x.sort_values(by="A")[:3]) + + pieces = [group.sort_values(by="A")[:3] for key, group in grouped] + + expected = pd.concat(pieces) + tm.assert_frame_equal(result, expected) + + grouped = tsf["A"].groupby(lambda x: x.month, group_keys=False) + result = grouped.apply(lambda x: x.sort_values()[:3]) + + pieces = [group.sort_values()[:3] for key, group in grouped] + + expected = pd.concat(pieces) + tm.assert_series_equal(result, expected) + + +def test_no_nonsense_name(float_frame): + # GH #995 + s = float_frame["C"].copy() + s.name = None + + result = s.groupby(float_frame["A"]).agg(np.sum) + assert result.name is None + + +def test_multifunc_sum_bug(): + # GH #1065 + x = DataFrame(np.arange(9).reshape(3, 3)) + x["test"] = 0 + x["fl"] = [1.3, 1.5, 1.6] + + grouped = x.groupby("test") + result = grouped.agg({"fl": "sum", 2: "size"}) + assert result["fl"].dtype == np.float64 + + +def test_handle_dict_return_value(df): + def f(group): + return {"max": group.max(), "min": group.min()} + + def g(group): + return Series({"max": group.max(), "min": group.min()}) + + result = df.groupby("A")["C"].apply(f) + expected = df.groupby("A")["C"].apply(g) + + assert isinstance(result, Series) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("grouper", ["A", ["A", "B"]]) +def test_set_group_name(df, grouper): + def f(group): + assert group.name is not None + return group + + def freduce(group): + assert group.name is not None + return group.sum() + + def foo(x): + return freduce(x) + + grouped = df.groupby(grouper) + + # make sure all these work + grouped.apply(f) + grouped.aggregate(freduce) + grouped.aggregate({"C": freduce, "D": freduce}) + grouped.transform(f) + + grouped["C"].apply(f) + grouped["C"].aggregate(freduce) + grouped["C"].aggregate([freduce, foo]) + grouped["C"].transform(f) + + +def test_group_name_available_in_inference_pass(): + # gh-15062 + df = pd.DataFrame({"a": [0, 0, 1, 1, 2, 2], "b": np.arange(6)}) + + names = [] + + def f(group): + names.append(group.name) + return group.copy() + + df.groupby("a", sort=False, group_keys=False).apply(f) + + expected_names = [0, 1, 2] + assert names == expected_names + + +def test_no_dummy_key_names(df): + # see gh-1291 + result = df.groupby(df["A"].values).sum() + assert result.index.name is None + + result = df.groupby([df["A"].values, df["B"].values]).sum() + assert result.index.names == (None, None) + + +def test_groupby_sort_multiindex_series(): + # series multiindex groupby sort argument was not being passed through + # _compress_group_index + # GH 9444 + index = MultiIndex( + levels=[[1, 2], [1, 2]], + codes=[[0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0]], + names=["a", "b"], + ) + mseries = Series([0, 1, 2, 3, 4, 5], index=index) + index = MultiIndex( + levels=[[1, 2], [1, 2]], codes=[[0, 0, 1], [1, 0, 0]], names=["a", "b"] + ) + mseries_result = Series([0, 2, 4], index=index) + + result = mseries.groupby(level=["a", "b"], sort=False).first() + tm.assert_series_equal(result, mseries_result) + result = mseries.groupby(level=["a", "b"], sort=True).first() + tm.assert_series_equal(result, mseries_result.sort_index()) + + +def test_groupby_reindex_inside_function(): + + periods = 1000 + ind = date_range(start="2012/1/1", freq="5min", periods=periods) + df = DataFrame({"high": np.arange(periods), "low": np.arange(periods)}, index=ind) + + def agg_before(hour, func, fix=False): + """ + Run an aggregate func on the subset of data. + """ + + def _func(data): + d = data.loc[data.index.map(lambda x: x.hour < 11)].dropna() + if fix: + data[data.index[0]] + if len(d) == 0: + return None + return func(d) + + return _func + + def afunc(data): + d = data.select(lambda x: x.hour < 11).dropna() + return np.max(d) + + grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day)) + closure_bad = grouped.agg({"high": agg_before(11, np.max)}) + closure_good = grouped.agg({"high": agg_before(11, np.max, True)}) + + tm.assert_frame_equal(closure_bad, closure_good) + + +def test_groupby_multiindex_missing_pair(): + # GH9049 + df = DataFrame( + { + "group1": ["a", "a", "a", "b"], + "group2": ["c", "c", "d", "c"], + "value": [1, 1, 1, 5], + } + ) + df = df.set_index(["group1", "group2"]) + df_grouped = df.groupby(level=["group1", "group2"], sort=True) + + res = df_grouped.agg("sum") + idx = MultiIndex.from_tuples( + [("a", "c"), ("a", "d"), ("b", "c")], names=["group1", "group2"] + ) + exp = DataFrame([[2], [1], [5]], index=idx, columns=["value"]) + + tm.assert_frame_equal(res, exp) + + +def test_groupby_multiindex_not_lexsorted(): + # GH 11640 + + # define the lexsorted version + lexsorted_mi = MultiIndex.from_tuples( + [("a", ""), ("b1", "c1"), ("b2", "c2")], names=["b", "c"] + ) + lexsorted_df = DataFrame([[1, 3, 4]], columns=lexsorted_mi) + assert lexsorted_df.columns.is_lexsorted() + + # define the non-lexsorted version + not_lexsorted_df = DataFrame( + columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] + ) + not_lexsorted_df = not_lexsorted_df.pivot_table( + index="a", columns=["b", "c"], values="d" + ) + not_lexsorted_df = not_lexsorted_df.reset_index() + assert not not_lexsorted_df.columns.is_lexsorted() + + # compare the results + tm.assert_frame_equal(lexsorted_df, not_lexsorted_df) + + expected = lexsorted_df.groupby("a").mean() + with tm.assert_produces_warning(PerformanceWarning): + result = not_lexsorted_df.groupby("a").mean() + tm.assert_frame_equal(expected, result) + + # a transforming function should work regardless of sort + # GH 14776 + df = DataFrame( + {"x": ["a", "a", "b", "a"], "y": [1, 1, 2, 2], "z": [1, 2, 3, 4]} + ).set_index(["x", "y"]) + assert not df.index.is_lexsorted() + + for level in [0, 1, [0, 1]]: + for sort in [False, True]: + result = df.groupby(level=level, sort=sort).apply(DataFrame.drop_duplicates) + expected = df + tm.assert_frame_equal(expected, result) + + result = ( + df.sort_index() + .groupby(level=level, sort=sort) + .apply(DataFrame.drop_duplicates) + ) + expected = df.sort_index() + tm.assert_frame_equal(expected, result) + + +def test_index_label_overlaps_location(): + # checking we don't have any label/location confusion in the + # the wake of GH5375 + df = DataFrame(list("ABCDE"), index=[2, 0, 2, 1, 1]) + g = df.groupby(list("ababb")) + actual = g.filter(lambda x: len(x) > 2) + expected = df.iloc[[1, 3, 4]] + tm.assert_frame_equal(actual, expected) + + ser = df[0] + g = ser.groupby(list("ababb")) + actual = g.filter(lambda x: len(x) > 2) + expected = ser.take([1, 3, 4]) + tm.assert_series_equal(actual, expected) + + # ... and again, with a generic Index of floats + df.index = df.index.astype(float) + g = df.groupby(list("ababb")) + actual = g.filter(lambda x: len(x) > 2) + expected = df.iloc[[1, 3, 4]] + tm.assert_frame_equal(actual, expected) + + ser = df[0] + g = ser.groupby(list("ababb")) + actual = g.filter(lambda x: len(x) > 2) + expected = ser.take([1, 3, 4]) + tm.assert_series_equal(actual, expected) + + +def test_transform_doesnt_clobber_ints(): + # GH 7972 + n = 6 + x = np.arange(n) + df = DataFrame({"a": x // 2, "b": 2.0 * x, "c": 3.0 * x}) + df2 = DataFrame({"a": x // 2 * 1.0, "b": 2.0 * x, "c": 3.0 * x}) + + gb = df.groupby("a") + result = gb.transform("mean") + + gb2 = df2.groupby("a") + expected = gb2.transform("mean") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "sort_column", + ["ints", "floats", "strings", ["ints", "floats"], ["ints", "strings"]], +) +@pytest.mark.parametrize( + "group_column", ["int_groups", "string_groups", ["int_groups", "string_groups"]] +) +def test_groupby_preserves_sort(sort_column, group_column): + # Test to ensure that groupby always preserves sort order of original + # object. Issue #8588 and #9651 + + df = DataFrame( + { + "int_groups": [3, 1, 0, 1, 0, 3, 3, 3], + "string_groups": ["z", "a", "z", "a", "a", "g", "g", "g"], + "ints": [8, 7, 4, 5, 2, 9, 1, 1], + "floats": [2.3, 5.3, 6.2, -2.4, 2.2, 1.1, 1.1, 5], + "strings": ["z", "d", "a", "e", "word", "word2", "42", "47"], + } + ) + + # Try sorting on different types and with different group types + + df = df.sort_values(by=sort_column) + g = df.groupby(group_column) + + def test_sort(x): + tm.assert_frame_equal(x, x.sort_values(by=sort_column)) + + g.apply(test_sort) + + +def test_group_shift_with_null_key(): + # This test is designed to replicate the segfault in issue #13813. + n_rows = 1200 + + # Generate a moderately large dataframe with occasional missing + # values in column `B`, and then group by [`A`, `B`]. This should + # force `-1` in `labels` array of `g.grouper.group_info` exactly + # at those places, where the group-by key is partially missing. + df = DataFrame( + [(i % 12, i % 3 if i % 3 else np.nan, i) for i in range(n_rows)], + dtype=float, + columns=["A", "B", "Z"], + index=None, + ) + g = df.groupby(["A", "B"]) + + expected = DataFrame( + [(i + 12 if i % 3 and i < n_rows - 12 else np.nan) for i in range(n_rows)], + dtype=float, + columns=["Z"], + index=None, + ) + result = g.shift(-1) + + tm.assert_frame_equal(result, expected) + + +def test_group_shift_with_fill_value(): + # GH #24128 + n_rows = 24 + df = DataFrame( + [(i % 12, i % 3, i) for i in range(n_rows)], + dtype=float, + columns=["A", "B", "Z"], + index=None, + ) + g = df.groupby(["A", "B"]) + + expected = DataFrame( + [(i + 12 if i < n_rows - 12 else 0) for i in range(n_rows)], + dtype=float, + columns=["Z"], + index=None, + ) + result = g.shift(-1, fill_value=0)[["Z"]] + + tm.assert_frame_equal(result, expected) + + +def test_group_shift_lose_timezone(): + # GH 30134 + now_dt = pd.Timestamp.utcnow() + df = DataFrame({"a": [1, 1], "date": now_dt}) + result = df.groupby("a").shift(0).iloc[0] + expected = Series({"date": now_dt}, name=result.name) + tm.assert_series_equal(result, expected) + + +def test_pivot_table_values_key_error(): + # This test is designed to replicate the error in issue #14938 + df = pd.DataFrame( + { + "eventDate": pd.date_range(datetime.today(), periods=20, freq="M").tolist(), + "thename": range(0, 20), + } + ) + + df["year"] = df.set_index("eventDate").index.year + df["month"] = df.set_index("eventDate").index.month + + with pytest.raises(KeyError, match="'badname'"): + df.reset_index().pivot_table( + index="year", columns="month", values="badname", aggfunc="count" + ) + + +def test_empty_dataframe_groupby(): + # GH8093 + df = DataFrame(columns=["A", "B", "C"]) + + result = df.groupby("A").sum() + expected = DataFrame(columns=["B", "C"], dtype=np.float64) + expected.index.name = "A" + + tm.assert_frame_equal(result, expected) + + +def test_tuple_as_grouping(): + # https://github.com/pandas-dev/pandas/issues/18314 + df = pd.DataFrame( + { + ("a", "b"): [1, 1, 1, 1], + "a": [2, 2, 2, 2], + "b": [2, 2, 2, 2], + "c": [1, 1, 1, 1], + } + ) + + with pytest.raises(KeyError): + df[["a", "b", "c"]].groupby(("a", "b")) + + result = df.groupby(("a", "b"))["c"].sum() + expected = pd.Series([4], name="c", index=pd.Index([1], name=("a", "b"))) + tm.assert_series_equal(result, expected) + + +def test_tuple_correct_keyerror(): + # https://github.com/pandas-dev/pandas/issues/18798 + df = pd.DataFrame( + 1, index=range(3), columns=pd.MultiIndex.from_product([[1, 2], [3, 4]]) + ) + with pytest.raises(KeyError, match=r"^\(7, 8\)$"): + df.groupby((7, 8)).mean() + + +def test_groupby_agg_ohlc_non_first(): + # GH 21716 + df = pd.DataFrame( + [[1], [1]], + columns=["foo"], + index=pd.date_range("2018-01-01", periods=2, freq="D"), + ) + + expected = pd.DataFrame( + [[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]], + columns=pd.MultiIndex.from_tuples( + ( + ("foo", "sum", "foo"), + ("foo", "ohlc", "open"), + ("foo", "ohlc", "high"), + ("foo", "ohlc", "low"), + ("foo", "ohlc", "close"), + ) + ), + index=pd.date_range("2018-01-01", periods=2, freq="D"), + ) + + result = df.groupby(pd.Grouper(freq="D")).agg(["sum", "ohlc"]) + + tm.assert_frame_equal(result, expected) + + +def test_groupby_multiindex_nat(): + # GH 9236 + values = [ + (pd.NaT, "a"), + (datetime(2012, 1, 2), "a"), + (datetime(2012, 1, 2), "b"), + (datetime(2012, 1, 3), "a"), + ] + mi = pd.MultiIndex.from_tuples(values, names=["date", None]) + ser = pd.Series([3, 2, 2.5, 4], index=mi) + + result = ser.groupby(level=1).mean() + expected = pd.Series([3.0, 2.5], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + +def test_groupby_empty_list_raises(): + # GH 5289 + values = zip(range(10), range(10)) + df = DataFrame(values, columns=["apple", "b"]) + msg = "Grouper and axis must be same length" + with pytest.raises(ValueError, match=msg): + df.groupby([[]]) + + +def test_groupby_multiindex_series_keys_len_equal_group_axis(): + # GH 25704 + index_array = [["x", "x"], ["a", "b"], ["k", "k"]] + index_names = ["first", "second", "third"] + ri = pd.MultiIndex.from_arrays(index_array, names=index_names) + s = pd.Series(data=[1, 2], index=ri) + result = s.groupby(["first", "third"]).sum() + + index_array = [["x"], ["k"]] + index_names = ["first", "third"] + ei = pd.MultiIndex.from_arrays(index_array, names=index_names) + expected = pd.Series([3], index=ei) + + tm.assert_series_equal(result, expected) + + +def test_groupby_groups_in_BaseGrouper(): + # GH 26326 + # Test if DataFrame grouped with a pandas.Grouper has correct groups + mi = pd.MultiIndex.from_product([["A", "B"], ["C", "D"]], names=["alpha", "beta"]) + df = pd.DataFrame({"foo": [1, 2, 1, 2], "bar": [1, 2, 3, 4]}, index=mi) + result = df.groupby([pd.Grouper(level="alpha"), "beta"]) + expected = df.groupby(["alpha", "beta"]) + assert result.groups == expected.groups + + result = df.groupby(["beta", pd.Grouper(level="alpha")]) + expected = df.groupby(["beta", "alpha"]) + assert result.groups == expected.groups + + +@pytest.mark.parametrize("group_name", ["x", ["x"]]) +def test_groupby_axis_1(group_name): + # GH 27614 + df = pd.DataFrame( + np.arange(12).reshape(3, 4), index=[0, 1, 0], columns=[10, 20, 10, 20] + ) + df.index.name = "y" + df.columns.name = "x" + + results = df.groupby(group_name, axis=1).sum() + expected = df.T.groupby(group_name).sum().T + tm.assert_frame_equal(results, expected) + + # test on MI column + iterables = [["bar", "baz", "foo"], ["one", "two"]] + mi = pd.MultiIndex.from_product(iterables=iterables, names=["x", "x1"]) + df = pd.DataFrame(np.arange(18).reshape(3, 6), index=[0, 1, 0], columns=mi) + results = df.groupby(group_name, axis=1).sum() + expected = df.T.groupby(group_name).sum().T + tm.assert_frame_equal(results, expected) + + +@pytest.mark.parametrize( + "op, expected", + [ + ( + "shift", + { + "time": [ + None, + None, + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + None, + None, + ] + }, + ), + ( + "bfill", + { + "time": [ + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + ] + }, + ), + ( + "ffill", + { + "time": [ + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + ] + }, + ), + ], +) +def test_shift_bfill_ffill_tz(tz_naive_fixture, op, expected): + # GH19995, GH27992: Check that timezone does not drop in shift, bfill, and ffill + tz = tz_naive_fixture + data = { + "id": ["A", "B", "A", "B", "A", "B"], + "time": [ + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + None, + None, + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + ], + } + df = DataFrame(data).assign(time=lambda x: x.time.dt.tz_localize(tz)) + + grouped = df.groupby("id") + result = getattr(grouped, op)() + expected = DataFrame(expected).assign(time=lambda x: x.time.dt.tz_localize(tz)) + tm.assert_frame_equal(result, expected) + + +def test_groupby_only_none_group(): + # see GH21624 + # this was crashing with "ValueError: Length of passed values is 1, index implies 0" + df = pd.DataFrame({"g": [None], "x": 1}) + actual = df.groupby("g")["x"].transform("sum") + expected = pd.Series([np.nan], name="x") + + tm.assert_series_equal(actual, expected) + + +def test_groupby_duplicate_index(): + # GH#29189 the groupby call here used to raise + ser = pd.Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0]) + gb = ser.groupby(level=0) + + result = gb.mean() + expected = pd.Series([2, 5.5, 8], index=[2.0, 4.0, 5.0]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +def test_bool_aggs_dup_column_labels(bool_agg_func): + # 21668 + df = pd.DataFrame([[True, True]], columns=["a", "a"]) + grp_by = df.groupby([0]) + result = getattr(grp_by, bool_agg_func)() + + expected = df + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "idx", [pd.Index(["a", "a"]), pd.MultiIndex.from_tuples((("a", "a"), ("a", "a")))] +) +def test_dup_labels_output_shape(groupby_func, idx): + if groupby_func in {"size", "ngroup", "cumcount"}: + pytest.skip("Not applicable") + + df = pd.DataFrame([[1, 1]], columns=idx) + grp_by = df.groupby([0]) + + args = [] + if groupby_func in {"fillna", "nth"}: + args.append(0) + elif groupby_func == "corrwith": + args.append(df) + elif groupby_func == "tshift": + df.index = [pd.Timestamp("today")] + args.extend([1, "D"]) + + result = getattr(grp_by, groupby_func)(*args) + + assert result.shape == (1, 2) + tm.assert_index_equal(result.columns, idx) + + +def test_groupby_crash_on_nunique(axis): + # Fix following 30253 + df = pd.DataFrame({("A", "B"): [1, 2], ("A", "C"): [1, 3], ("D", "B"): [0, 0]}) + + axis_number = df._get_axis_number(axis) + if not axis_number: + df = df.T + + result = df.groupby(axis=axis_number, level=0).nunique() + + expected = pd.DataFrame({"A": [1, 2], "D": [1, 1]}) + if not axis_number: + expected = expected.T + + tm.assert_frame_equal(result, expected) + + +def test_groupby_list_level(): + # GH 9790 + expected = pd.DataFrame(np.arange(0, 9).reshape(3, 3)) + result = expected.groupby(level=[0]).mean() + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py new file mode 100644 index 00000000..7245d6f4 --- /dev/null +++ b/pandas/tests/groupby/test_grouping.py @@ -0,0 +1,966 @@ +""" test where we are determining what we are grouping, or getting groups """ + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + CategoricalIndex, + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.groupby.grouper import Grouping + +# selection +# -------------------------------- + + +class TestSelection: + def test_select_bad_cols(self): + df = DataFrame([[1, 2]], columns=["A", "B"]) + g = df.groupby("A") + with pytest.raises(KeyError, match="\"Columns not found: 'C'\""): + g[["C"]] + + with pytest.raises(KeyError, match="^[^A]+$"): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[["A", "C"]] + + def test_groupby_duplicated_column_errormsg(self): + # GH7511 + df = DataFrame( + columns=["A", "B", "A", "C"], data=[range(4), range(2, 6), range(0, 8, 2)] + ) + + msg = "Grouper for 'A' not 1-dimensional" + with pytest.raises(ValueError, match=msg): + df.groupby("A") + with pytest.raises(ValueError, match=msg): + df.groupby(["A", "B"]) + + grouped = df.groupby("B") + c = grouped.count() + assert c.columns.nlevels == 1 + assert c.columns.size == 3 + + def test_column_select_via_attr(self, df): + result = df.groupby("A").C.sum() + expected = df.groupby("A")["C"].sum() + tm.assert_series_equal(result, expected) + + df["mean"] = 1.5 + result = df.groupby("A").mean() + expected = df.groupby("A").agg(np.mean) + tm.assert_frame_equal(result, expected) + + def test_getitem_list_of_columns(self): + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + "E": np.random.randn(8), + } + ) + + result = df.groupby("A")[["C", "D"]].mean() + result2 = df.groupby("A")[df.columns[2:4]].mean() + + expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean() + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_getitem_numeric_column_names(self): + # GH #13731 + df = DataFrame( + { + 0: list("abcd") * 2, + 2: np.random.randn(8), + 4: np.random.randn(8), + 6: np.random.randn(8), + } + ) + result = df.groupby(0)[df.columns[1:3]].mean() + result2 = df.groupby(0)[[2, 4]].mean() + + expected = df.loc[:, [0, 2, 4]].groupby(0).mean() + + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + # per GH 23566 this should raise a FutureWarning + with tm.assert_produces_warning(FutureWarning): + df.groupby(0)[2, 4].mean() + + def test_getitem_single_list_of_columns(self, df): + # per GH 23566 this should raise a FutureWarning + with tm.assert_produces_warning(FutureWarning): + df.groupby("A")["C", "D"].mean() + + def test_getitem_single_column(self): + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + "E": np.random.randn(8), + } + ) + + result = df.groupby("A")["C"].mean() + + as_frame = df.loc[:, ["A", "C"]].groupby("A").mean() + as_series = as_frame.iloc[:, 0] + expected = as_series + + tm.assert_series_equal(result, expected) + + +# grouping +# -------------------------------- + + +class TestGrouping: + def test_grouper_index_types(self): + # related GH5375 + # groupby misbehaving when using a Floatlike index + df = DataFrame(np.arange(10).reshape(5, 2), columns=list("AB")) + for index in [ + tm.makeFloatIndex, + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeIntIndex, + tm.makeDateIndex, + tm.makePeriodIndex, + ]: + + df.index = index(len(df)) + df.groupby(list("abcde")).apply(lambda x: x) + + df.index = list(reversed(df.index.tolist())) + df.groupby(list("abcde")).apply(lambda x: x) + + def test_grouper_multilevel_freq(self): + + # GH 7885 + # with level and freq specified in a pd.Grouper + from datetime import date, timedelta + + d0 = date.today() - timedelta(days=14) + dates = date_range(d0, date.today()) + date_index = pd.MultiIndex.from_product([dates, dates], names=["foo", "bar"]) + df = pd.DataFrame(np.random.randint(0, 100, 225), index=date_index) + + # Check string level + expected = ( + df.reset_index() + .groupby([pd.Grouper(key="foo", freq="W"), pd.Grouper(key="bar", freq="W")]) + .sum() + ) + # reset index changes columns dtype to object + expected.columns = pd.Index([0], dtype="int64") + + result = df.groupby( + [pd.Grouper(level="foo", freq="W"), pd.Grouper(level="bar", freq="W")] + ).sum() + tm.assert_frame_equal(result, expected) + + # Check integer level + result = df.groupby( + [pd.Grouper(level=0, freq="W"), pd.Grouper(level=1, freq="W")] + ).sum() + tm.assert_frame_equal(result, expected) + + def test_grouper_creation_bug(self): + + # GH 8795 + df = DataFrame({"A": [0, 0, 1, 1, 2, 2], "B": [1, 2, 3, 4, 5, 6]}) + g = df.groupby("A") + expected = g.sum() + + g = df.groupby(pd.Grouper(key="A")) + result = g.sum() + tm.assert_frame_equal(result, expected) + + result = g.apply(lambda x: x.sum()) + tm.assert_frame_equal(result, expected) + + g = df.groupby(pd.Grouper(key="A", axis=0)) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # GH14334 + # pd.Grouper(key=...) may be passed in a list + df = DataFrame( + {"A": [0, 0, 0, 1, 1, 1], "B": [1, 1, 2, 2, 3, 3], "C": [1, 2, 3, 4, 5, 6]} + ) + # Group by single column + expected = df.groupby("A").sum() + g = df.groupby([pd.Grouper(key="A")]) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # Group by two columns + # using a combination of strings and Grouper objects + expected = df.groupby(["A", "B"]).sum() + + # Group with two Grouper objects + g = df.groupby([pd.Grouper(key="A"), pd.Grouper(key="B")]) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # Group with a string and a Grouper object + g = df.groupby(["A", pd.Grouper(key="B")]) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # Group with a Grouper object and a string + g = df.groupby([pd.Grouper(key="A"), "B"]) + result = g.sum() + tm.assert_frame_equal(result, expected) + + # GH8866 + s = Series( + np.arange(8, dtype="int64"), + index=pd.MultiIndex.from_product( + [list("ab"), range(2), date_range("20130101", periods=2)], + names=["one", "two", "three"], + ), + ) + result = s.groupby(pd.Grouper(level="three", freq="M")).sum() + expected = Series( + [28], index=Index([Timestamp("2013-01-31")], freq="M", name="three") + ) + tm.assert_series_equal(result, expected) + + # just specifying a level breaks + result = s.groupby(pd.Grouper(level="one")).sum() + expected = s.groupby(level="one").sum() + tm.assert_series_equal(result, expected) + + def test_grouper_column_and_index(self): + # GH 14327 + + # Grouping a multi-index frame by a column and an index level should + # be equivalent to resetting the index and grouping by two columns + idx = pd.MultiIndex.from_tuples( + [("a", 1), ("a", 2), ("a", 3), ("b", 1), ("b", 2), ("b", 3)] + ) + idx.names = ["outer", "inner"] + df_multi = pd.DataFrame( + {"A": np.arange(6), "B": ["one", "one", "two", "two", "one", "one"]}, + index=idx, + ) + result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean() + expected = df_multi.reset_index().groupby(["B", "inner"]).mean() + tm.assert_frame_equal(result, expected) + + # Test the reverse grouping order + result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean() + expected = df_multi.reset_index().groupby(["inner", "B"]).mean() + tm.assert_frame_equal(result, expected) + + # Grouping a single-index frame by a column and the index should + # be equivalent to resetting the index and grouping by two columns + df_single = df_multi.reset_index("outer") + result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean() + expected = df_single.reset_index().groupby(["B", "inner"]).mean() + tm.assert_frame_equal(result, expected) + + # Test the reverse grouping order + result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean() + expected = df_single.reset_index().groupby(["inner", "B"]).mean() + tm.assert_frame_equal(result, expected) + + def test_groupby_levels_and_columns(self): + # GH9344, GH9049 + idx_names = ["x", "y"] + idx = pd.MultiIndex.from_tuples( + [(1, 1), (1, 2), (3, 4), (5, 6)], names=idx_names + ) + df = pd.DataFrame(np.arange(12).reshape(-1, 3), index=idx) + + by_levels = df.groupby(level=idx_names).mean() + # reset_index changes columns dtype to object + by_columns = df.reset_index().groupby(idx_names).mean() + + tm.assert_frame_equal(by_levels, by_columns, check_column_type=False) + + by_columns.columns = pd.Index(by_columns.columns, dtype=np.int64) + tm.assert_frame_equal(by_levels, by_columns) + + def test_groupby_categorical_index_and_columns(self, observed): + # GH18432, adapted for GH25871 + columns = ["A", "B", "A", "B"] + categories = ["B", "A"] + data = np.array( + [[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2]], int + ) + cat_columns = CategoricalIndex(columns, categories=categories, ordered=True) + df = DataFrame(data=data, columns=cat_columns) + result = df.groupby(axis=1, level=0, observed=observed).sum() + expected_data = np.array([[4, 2], [4, 2], [4, 2], [4, 2], [4, 2]], int) + expected_columns = CategoricalIndex( + categories, categories=categories, ordered=True + ) + expected = DataFrame(data=expected_data, columns=expected_columns) + tm.assert_frame_equal(result, expected) + + # test transposed version + df = DataFrame(data.T, index=cat_columns) + result = df.groupby(axis=0, level=0, observed=observed).sum() + expected = DataFrame(data=expected_data.T, index=expected_columns) + tm.assert_frame_equal(result, expected) + + def test_grouper_getting_correct_binner(self): + + # GH 10063 + # using a non-time-based grouper and a time-based grouper + # and specifying levels + df = DataFrame( + {"A": 1}, + index=pd.MultiIndex.from_product( + [list("ab"), date_range("20130101", periods=80)], names=["one", "two"] + ), + ) + result = df.groupby( + [pd.Grouper(level="one"), pd.Grouper(level="two", freq="M")] + ).sum() + expected = DataFrame( + {"A": [31, 28, 21, 31, 28, 21]}, + index=MultiIndex.from_product( + [list("ab"), date_range("20130101", freq="M", periods=3)], + names=["one", "two"], + ), + ) + tm.assert_frame_equal(result, expected) + + def test_grouper_iter(self, df): + assert sorted(df.groupby("A").grouper) == ["bar", "foo"] + + def test_empty_groups(self, df): + # see gh-1048 + with pytest.raises(ValueError, match="No group keys passed!"): + df.groupby([]) + + def test_groupby_grouper(self, df): + grouped = df.groupby("A") + + result = df.groupby(grouped.grouper).mean() + expected = grouped.mean() + tm.assert_frame_equal(result, expected) + + def test_groupby_dict_mapping(self): + # GH #679 + from pandas import Series + + s = Series({"T1": 5}) + result = s.groupby({"T1": "T2"}).agg(sum) + expected = s.groupby(["T2"]).agg(sum) + tm.assert_series_equal(result, expected) + + s = Series([1.0, 2.0, 3.0, 4.0], index=list("abcd")) + mapping = {"a": 0, "b": 0, "c": 1, "d": 1} + + result = s.groupby(mapping).mean() + result2 = s.groupby(mapping).agg(np.mean) + expected = s.groupby([0, 0, 1, 1]).mean() + expected2 = s.groupby([0, 0, 1, 1]).mean() + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, result2) + tm.assert_series_equal(result, expected2) + + def test_groupby_grouper_f_sanity_checked(self): + dates = date_range("01-Jan-2013", periods=12, freq="MS") + ts = Series(np.random.randn(12), index=dates) + + # GH3035 + # index.map is used to apply grouper to the index + # if it fails on the elements, map tries it on the entire index as + # a sequence. That can yield invalid results that cause trouble + # down the line. + # the surprise comes from using key[0:6] rather then str(key)[0:6] + # when the elements are Timestamp. + # the result is Index[0:6], very confusing. + + msg = r"Grouper result violates len\(labels\) == len\(data\)" + with pytest.raises(AssertionError, match=msg): + ts.groupby(lambda key: key[0:6]) + + def test_grouping_error_on_multidim_input(self, df): + msg = "Grouper for '' not 1-dimensional" + with pytest.raises(ValueError, match=msg): + Grouping(df.index, df[["A", "A"]]) + + def test_multiindex_passthru(self): + + # GH 7997 + # regression from 0.14.1 + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + df.columns = pd.MultiIndex.from_tuples([(0, 1), (1, 1), (2, 1)]) + + result = df.groupby(axis=1, level=[0, 1]).first() + tm.assert_frame_equal(result, df) + + def test_multiindex_negative_level(self, mframe): + # GH 13901 + result = mframe.groupby(level=-1).sum() + expected = mframe.groupby(level="second").sum() + tm.assert_frame_equal(result, expected) + + result = mframe.groupby(level=-2).sum() + expected = mframe.groupby(level="first").sum() + tm.assert_frame_equal(result, expected) + + result = mframe.groupby(level=[-2, -1]).sum() + expected = mframe + tm.assert_frame_equal(result, expected) + + result = mframe.groupby(level=[-1, "first"]).sum() + expected = mframe.groupby(level=["second", "first"]).sum() + tm.assert_frame_equal(result, expected) + + def test_multifunc_select_col_integer_cols(self, df): + df.columns = np.arange(len(df.columns)) + + # it works! + df.groupby(1, as_index=False)[2].agg({"Q": np.mean}) + + def test_multiindex_columns_empty_level(self): + lst = [["count", "values"], ["to filter", ""]] + midx = MultiIndex.from_tuples(lst) + + df = DataFrame([[1, "A"]], columns=midx) + + grouped = df.groupby("to filter").groups + assert grouped["A"] == [0] + + grouped = df.groupby([("to filter", "")]).groups + assert grouped["A"] == [0] + + df = DataFrame([[1, "A"], [2, "B"]], columns=midx) + + expected = df.groupby("to filter").groups + result = df.groupby([("to filter", "")]).groups + assert result == expected + + df = DataFrame([[1, "A"], [2, "A"]], columns=midx) + + expected = df.groupby("to filter").groups + result = df.groupby([("to filter", "")]).groups + tm.assert_dict_equal(result, expected) + + def test_groupby_multiindex_tuple(self): + # GH 17979 + df = pd.DataFrame( + [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]], + columns=pd.MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]), + ) + expected = df.groupby([("b", 1)]).groups + result = df.groupby(("b", 1)).groups + tm.assert_dict_equal(expected, result) + + df2 = pd.DataFrame( + df.values, + columns=pd.MultiIndex.from_arrays( + [["a", "b", "b", "c"], ["d", "d", "e", "e"]] + ), + ) + expected = df2.groupby([("b", "d")]).groups + result = df.groupby(("b", 1)).groups + tm.assert_dict_equal(expected, result) + + df3 = pd.DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"]) + expected = df3.groupby([("b", "d")]).groups + result = df.groupby(("b", 1)).groups + tm.assert_dict_equal(expected, result) + + @pytest.mark.parametrize("sort", [True, False]) + def test_groupby_level(self, sort, mframe, df): + # GH 17537 + frame = mframe + deleveled = frame.reset_index() + + result0 = frame.groupby(level=0, sort=sort).sum() + result1 = frame.groupby(level=1, sort=sort).sum() + + expected0 = frame.groupby(deleveled["first"].values, sort=sort).sum() + expected1 = frame.groupby(deleveled["second"].values, sort=sort).sum() + + expected0.index.name = "first" + expected1.index.name = "second" + + assert result0.index.name == "first" + assert result1.index.name == "second" + + tm.assert_frame_equal(result0, expected0) + tm.assert_frame_equal(result1, expected1) + assert result0.index.name == frame.index.names[0] + assert result1.index.name == frame.index.names[1] + + # groupby level name + result0 = frame.groupby(level="first", sort=sort).sum() + result1 = frame.groupby(level="second", sort=sort).sum() + tm.assert_frame_equal(result0, expected0) + tm.assert_frame_equal(result1, expected1) + + # axis=1 + + result0 = frame.T.groupby(level=0, axis=1, sort=sort).sum() + result1 = frame.T.groupby(level=1, axis=1, sort=sort).sum() + tm.assert_frame_equal(result0, expected0.T) + tm.assert_frame_equal(result1, expected1.T) + + # raise exception for non-MultiIndex + msg = "level > 0 or level < -1 only valid with MultiIndex" + with pytest.raises(ValueError, match=msg): + df.groupby(level=1) + + def test_groupby_level_index_names(self, axis): + # GH4014 this used to raise ValueError since 'exp'>1 (in py2) + df = DataFrame({"exp": ["A"] * 3 + ["B"] * 3, "var1": range(6)}).set_index( + "exp" + ) + if axis in (1, "columns"): + df = df.T + df.groupby(level="exp", axis=axis) + msg = f"level name foo is not the name of the {df._get_axis_name(axis)}" + with pytest.raises(ValueError, match=msg): + df.groupby(level="foo", axis=axis) + + @pytest.mark.parametrize("sort", [True, False]) + def test_groupby_level_with_nas(self, sort): + # GH 17537 + index = MultiIndex( + levels=[[1, 0], [0, 1, 2, 3]], + codes=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], + ) + + # factorizing doesn't confuse things + s = Series(np.arange(8.0), index=index) + result = s.groupby(level=0, sort=sort).sum() + expected = Series([6.0, 22.0], index=[0, 1]) + tm.assert_series_equal(result, expected) + + index = MultiIndex( + levels=[[1, 0], [0, 1, 2, 3]], + codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], + ) + + # factorizing doesn't confuse things + s = Series(np.arange(8.0), index=index) + result = s.groupby(level=0, sort=sort).sum() + expected = Series([6.0, 18.0], index=[0.0, 1.0]) + tm.assert_series_equal(result, expected) + + def test_groupby_args(self, mframe): + # PR8618 and issue 8015 + frame = mframe + + msg = "You have to supply one of 'by' and 'level'" + with pytest.raises(TypeError, match=msg): + frame.groupby() + + msg = "You have to supply one of 'by' and 'level'" + with pytest.raises(TypeError, match=msg): + frame.groupby(by=None, level=None) + + @pytest.mark.parametrize( + "sort,labels", + [ + [True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]], + [False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]], + ], + ) + def test_level_preserve_order(self, sort, labels, mframe): + # GH 17537 + grouped = mframe.groupby(level=0, sort=sort) + exp_labels = np.array(labels, np.intp) + tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels) + + def test_grouping_labels(self, mframe): + grouped = mframe.groupby(mframe.index.get_level_values(0)) + exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp) + tm.assert_almost_equal(grouped.grouper.codes[0], exp_labels) + + def test_list_grouper_with_nat(self): + # GH 14715 + df = pd.DataFrame({"date": pd.date_range("1/1/2011", periods=365, freq="D")}) + df.iloc[-1] = pd.NaT + grouper = pd.Grouper(key="date", freq="AS") + + # Grouper in a list grouping + result = df.groupby([grouper]) + expected = {pd.Timestamp("2011-01-01"): pd.Index(list(range(364)))} + tm.assert_dict_equal(result.groups, expected) + + # Test case without a list + result = df.groupby(grouper) + expected = {pd.Timestamp("2011-01-01"): 365} + tm.assert_dict_equal(result.groups, expected) + + @pytest.mark.parametrize( + "func,expected", + [ + ( + "transform", + pd.Series(name=2, dtype=np.float64, index=pd.RangeIndex(0, 0, 1)), + ), + ( + "agg", + pd.Series(name=2, dtype=np.float64, index=pd.Float64Index([], name=1)), + ), + ( + "apply", + pd.Series(name=2, dtype=np.float64, index=pd.Float64Index([], name=1)), + ), + ], + ) + def test_evaluate_with_empty_groups(self, func, expected): + # 26208 + # test transform'ing empty groups + # (not testing other agg fns, because they return + # different index objects. + df = pd.DataFrame({1: [], 2: []}) + g = df.groupby(1) + result = getattr(g[2], func)(lambda x: x) + tm.assert_series_equal(result, expected) + + def test_groupby_empty(self): + # https://github.com/pandas-dev/pandas/issues/27190 + s = pd.Series([], name="name", dtype="float64") + gr = s.groupby([]) + + result = gr.mean() + tm.assert_series_equal(result, s) + + # check group properties + assert len(gr.grouper.groupings) == 1 + tm.assert_numpy_array_equal( + gr.grouper.group_info[0], np.array([], dtype=np.dtype("int64")) + ) + + tm.assert_numpy_array_equal( + gr.grouper.group_info[1], np.array([], dtype=np.dtype("int")) + ) + + assert gr.grouper.group_info[2] == 0 + + # check name + assert s.groupby(s).grouper.names == ["name"] + + def test_groupby_level_index_value_all_na(self): + # issue 20519 + df = DataFrame( + [["x", np.nan, 10], [None, np.nan, 20]], columns=["A", "B", "C"] + ).set_index(["A", "B"]) + result = df.groupby(level=["A", "B"]).sum() + expected = DataFrame( + data=[], + index=MultiIndex( + levels=[Index(["x"], dtype="object"), Index([], dtype="float64")], + codes=[[], []], + names=["A", "B"], + ), + columns=["C"], + dtype="int64", + ) + tm.assert_frame_equal(result, expected) + + def test_groupby_multiindex_level_empty(self): + # https://github.com/pandas-dev/pandas/issues/31670 + df = pd.DataFrame( + [[123, "a", 1.0], [123, "b", 2.0]], columns=["id", "category", "value"] + ) + df = df.set_index(["id", "category"]) + empty = df[df.value < 0] + result = empty.groupby("id").sum() + expected = pd.DataFrame( + dtype="float64", columns=["value"], index=pd.Int64Index([], name="id") + ) + tm.assert_frame_equal(result, expected) + + +# get_group +# -------------------------------- + + +class TestGetGroup: + def test_get_group(self): + # GH 5267 + # be datelike friendly + df = DataFrame( + { + "DATE": pd.to_datetime( + [ + "10-Oct-2013", + "10-Oct-2013", + "10-Oct-2013", + "11-Oct-2013", + "11-Oct-2013", + "11-Oct-2013", + ] + ), + "label": ["foo", "foo", "bar", "foo", "foo", "bar"], + "VAL": [1, 2, 3, 4, 5, 6], + } + ) + + g = df.groupby("DATE") + key = list(g.groups)[0] + result1 = g.get_group(key) + result2 = g.get_group(Timestamp(key).to_pydatetime()) + result3 = g.get_group(str(Timestamp(key))) + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) + + g = df.groupby(["DATE", "label"]) + + key = list(g.groups)[0] + result1 = g.get_group(key) + result2 = g.get_group((Timestamp(key[0]).to_pydatetime(), key[1])) + result3 = g.get_group((str(Timestamp(key[0])), key[1])) + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) + + # must pass a same-length tuple with multiple keys + msg = "must supply a tuple to get_group with multiple grouping keys" + with pytest.raises(ValueError, match=msg): + g.get_group("foo") + with pytest.raises(ValueError, match=msg): + g.get_group(("foo")) + msg = ( + "must supply a same-length tuple to get_group with multiple" + " grouping keys" + ) + with pytest.raises(ValueError, match=msg): + g.get_group(("foo", "bar", "baz")) + + def test_get_group_empty_bins(self, observed): + + d = pd.DataFrame([3, 1, 7, 6]) + bins = [0, 5, 10, 15] + g = d.groupby(pd.cut(d[0], bins), observed=observed) + + # TODO: should prob allow a str of Interval work as well + # IOW '(0, 5]' + result = g.get_group(pd.Interval(0, 5)) + expected = DataFrame([3, 1], index=[0, 1]) + tm.assert_frame_equal(result, expected) + + msg = r"Interval\(10, 15, closed='right'\)" + with pytest.raises(KeyError, match=msg): + g.get_group(pd.Interval(10, 15)) + + def test_get_group_grouped_by_tuple(self): + # GH 8121 + df = DataFrame([[(1,), (1, 2), (1,), (1, 2)]], index=["ids"]).T + gr = df.groupby("ids") + expected = DataFrame({"ids": [(1,), (1,)]}, index=[0, 2]) + result = gr.get_group((1,)) + tm.assert_frame_equal(result, expected) + + dt = pd.to_datetime(["2010-01-01", "2010-01-02", "2010-01-01", "2010-01-02"]) + df = DataFrame({"ids": [(x,) for x in dt]}) + gr = df.groupby("ids") + result = gr.get_group(("2010-01-01",)) + expected = DataFrame({"ids": [(dt[0],), (dt[0],)]}, index=[0, 2]) + tm.assert_frame_equal(result, expected) + + def test_groupby_with_empty(self): + index = pd.DatetimeIndex(()) + data = () + series = pd.Series(data, index, dtype=object) + grouper = pd.Grouper(freq="D") + grouped = series.groupby(grouper) + assert next(iter(grouped), None) is None + + def test_groupby_with_single_column(self): + df = pd.DataFrame({"a": list("abssbab")}) + tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]]) + # GH 13530 + exp = pd.DataFrame(index=pd.Index(["a", "b", "s"], name="a")) + tm.assert_frame_equal(df.groupby("a").count(), exp) + tm.assert_frame_equal(df.groupby("a").sum(), exp) + tm.assert_frame_equal(df.groupby("a").nth(1), exp) + + def test_gb_key_len_equal_axis_len(self): + # GH16843 + # test ensures that index and column keys are recognized correctly + # when number of keys equals axis length of groupby + df = pd.DataFrame( + [["foo", "bar", "B", 1], ["foo", "bar", "B", 2], ["foo", "baz", "C", 3]], + columns=["first", "second", "third", "one"], + ) + df = df.set_index(["first", "second"]) + df = df.groupby(["first", "second", "third"]).size() + assert df.loc[("foo", "bar", "B")] == 2 + assert df.loc[("foo", "baz", "C")] == 1 + + +# groups & iteration +# -------------------------------- + + +class TestIteration: + def test_groups(self, df): + grouped = df.groupby(["A"]) + groups = grouped.groups + assert groups is grouped.groups # caching works + + for k, v in grouped.groups.items(): + assert (df.loc[v]["A"] == k).all() + + grouped = df.groupby(["A", "B"]) + groups = grouped.groups + assert groups is grouped.groups # caching works + + for k, v in grouped.groups.items(): + assert (df.loc[v]["A"] == k[0]).all() + assert (df.loc[v]["B"] == k[1]).all() + + def test_grouping_is_iterable(self, tsframe): + # this code path isn't used anywhere else + # not sure it's useful + grouped = tsframe.groupby([lambda x: x.weekday(), lambda x: x.year]) + + # test it works + for g in grouped.grouper.groupings[0]: + pass + + def test_multi_iter(self): + s = Series(np.arange(6)) + k1 = np.array(["a", "a", "a", "b", "b", "b"]) + k2 = np.array(["1", "2", "1", "2", "1", "2"]) + + grouped = s.groupby([k1, k2]) + + iterated = list(grouped) + expected = [ + ("a", "1", s[[0, 2]]), + ("a", "2", s[[1]]), + ("b", "1", s[[4]]), + ("b", "2", s[[3, 5]]), + ] + for i, ((one, two), three) in enumerate(iterated): + e1, e2, e3 = expected[i] + assert e1 == one + assert e2 == two + tm.assert_series_equal(three, e3) + + def test_multi_iter_frame(self, three_group): + k1 = np.array(["b", "b", "b", "a", "a", "a"]) + k2 = np.array(["1", "2", "1", "2", "1", "2"]) + df = DataFrame( + {"v1": np.random.randn(6), "v2": np.random.randn(6), "k1": k1, "k2": k2}, + index=["one", "two", "three", "four", "five", "six"], + ) + + grouped = df.groupby(["k1", "k2"]) + + # things get sorted! + iterated = list(grouped) + idx = df.index + expected = [ + ("a", "1", df.loc[idx[[4]]]), + ("a", "2", df.loc[idx[[3, 5]]]), + ("b", "1", df.loc[idx[[0, 2]]]), + ("b", "2", df.loc[idx[[1]]]), + ] + for i, ((one, two), three) in enumerate(iterated): + e1, e2, e3 = expected[i] + assert e1 == one + assert e2 == two + tm.assert_frame_equal(three, e3) + + # don't iterate through groups with no data + df["k1"] = np.array(["b", "b", "b", "a", "a", "a"]) + df["k2"] = np.array(["1", "1", "1", "2", "2", "2"]) + grouped = df.groupby(["k1", "k2"]) + groups = {key: gp for key, gp in grouped} + assert len(groups) == 2 + + # axis = 1 + three_levels = three_group.groupby(["A", "B", "C"]).mean() + grouped = three_levels.T.groupby(axis=1, level=(1, 2)) + for key, group in grouped: + pass + + def test_dictify(self, df): + dict(iter(df.groupby("A"))) + dict(iter(df.groupby(["A", "B"]))) + dict(iter(df["C"].groupby(df["A"]))) + dict(iter(df["C"].groupby([df["A"], df["B"]]))) + dict(iter(df.groupby("A")["C"])) + dict(iter(df.groupby(["A", "B"])["C"])) + + def test_groupby_with_small_elem(self): + # GH 8542 + # length=2 + df = pd.DataFrame( + {"event": ["start", "start"], "change": [1234, 5678]}, + index=pd.DatetimeIndex(["2014-09-10", "2013-10-10"]), + ) + grouped = df.groupby([pd.Grouper(freq="M"), "event"]) + assert len(grouped.groups) == 2 + assert grouped.ngroups == 2 + assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups + assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups + + res = grouped.get_group((pd.Timestamp("2014-09-30"), "start")) + tm.assert_frame_equal(res, df.iloc[[0], :]) + res = grouped.get_group((pd.Timestamp("2013-10-31"), "start")) + tm.assert_frame_equal(res, df.iloc[[1], :]) + + df = pd.DataFrame( + {"event": ["start", "start", "start"], "change": [1234, 5678, 9123]}, + index=pd.DatetimeIndex(["2014-09-10", "2013-10-10", "2014-09-15"]), + ) + grouped = df.groupby([pd.Grouper(freq="M"), "event"]) + assert len(grouped.groups) == 2 + assert grouped.ngroups == 2 + assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups + assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups + + res = grouped.get_group((pd.Timestamp("2014-09-30"), "start")) + tm.assert_frame_equal(res, df.iloc[[0, 2], :]) + res = grouped.get_group((pd.Timestamp("2013-10-31"), "start")) + tm.assert_frame_equal(res, df.iloc[[1], :]) + + # length=3 + df = pd.DataFrame( + {"event": ["start", "start", "start"], "change": [1234, 5678, 9123]}, + index=pd.DatetimeIndex(["2014-09-10", "2013-10-10", "2014-08-05"]), + ) + grouped = df.groupby([pd.Grouper(freq="M"), "event"]) + assert len(grouped.groups) == 3 + assert grouped.ngroups == 3 + assert (pd.Timestamp("2014-09-30"), "start") in grouped.groups + assert (pd.Timestamp("2013-10-31"), "start") in grouped.groups + assert (pd.Timestamp("2014-08-31"), "start") in grouped.groups + + res = grouped.get_group((pd.Timestamp("2014-09-30"), "start")) + tm.assert_frame_equal(res, df.iloc[[0], :]) + res = grouped.get_group((pd.Timestamp("2013-10-31"), "start")) + tm.assert_frame_equal(res, df.iloc[[1], :]) + res = grouped.get_group((pd.Timestamp("2014-08-31"), "start")) + tm.assert_frame_equal(res, df.iloc[[2], :]) + + def test_grouping_string_repr(self): + # GH 13394 + mi = MultiIndex.from_arrays([list("AAB"), list("aba")]) + df = DataFrame([[1, 2, 3]], columns=mi) + gr = df.groupby(df[("A", "a")]) + + result = gr.grouper.groupings[0].__repr__() + expected = "Grouping(('A', 'a'))" + assert result == expected diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py new file mode 100644 index 00000000..971a447b --- /dev/null +++ b/pandas/tests/groupby/test_index_as_string.py @@ -0,0 +1,82 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.fixture(params=[["inner"], ["inner", "outer"]]) +def frame(request): + levels = request.param + df = pd.DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 3, 1, 2, 3], + "A": np.arange(6), + "B": ["one", "one", "two", "two", "one", "one"], + } + ) + if levels: + df = df.set_index(levels) + + return df + + +@pytest.fixture() +def series(): + df = pd.DataFrame( + { + "outer": ["a", "a", "a", "b", "b", "b"], + "inner": [1, 2, 3, 1, 2, 3], + "A": np.arange(6), + "B": ["one", "one", "two", "two", "one", "one"], + } + ) + s = df.set_index(["outer", "inner", "B"])["A"] + + return s + + +@pytest.mark.parametrize( + "key_strs,groupers", + [ + ("inner", pd.Grouper(level="inner")), # Index name + (["inner"], [pd.Grouper(level="inner")]), # List of index name + (["B", "inner"], ["B", pd.Grouper(level="inner")]), # Column and index + (["inner", "B"], [pd.Grouper(level="inner"), "B"]), # Index and column + ], +) +def test_grouper_index_level_as_string(frame, key_strs, groupers): + result = frame.groupby(key_strs).mean() + expected = frame.groupby(groupers).mean() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "levels", + [ + "inner", + "outer", + "B", + ["inner"], + ["outer"], + ["B"], + ["inner", "outer"], + ["outer", "inner"], + ["inner", "outer", "B"], + ["B", "outer", "inner"], + ], +) +def test_grouper_index_level_as_string_series(series, levels): + + # Compute expected result + if isinstance(levels, list): + groupers = [pd.Grouper(level=lv) for lv in levels] + else: + groupers = pd.Grouper(level=levels) + + expected = series.groupby(groupers).mean() + + # Compute and check result + result = series.groupby(levels).mean() + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py new file mode 100644 index 00000000..947907ca --- /dev/null +++ b/pandas/tests/groupby/test_nth.py @@ -0,0 +1,609 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, isna +import pandas._testing as tm + + +def test_first_last_nth(df): + # tests for first / last / nth + grouped = df.groupby("A") + first = grouped.first() + expected = df.loc[[1, 0], ["B", "C", "D"]] + expected.index = Index(["bar", "foo"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(first, expected) + + nth = grouped.nth(0) + tm.assert_frame_equal(nth, expected) + + last = grouped.last() + expected = df.loc[[5, 7], ["B", "C", "D"]] + expected.index = Index(["bar", "foo"], name="A") + tm.assert_frame_equal(last, expected) + + nth = grouped.nth(-1) + tm.assert_frame_equal(nth, expected) + + nth = grouped.nth(1) + expected = df.loc[[2, 3], ["B", "C", "D"]].copy() + expected.index = Index(["foo", "bar"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(nth, expected) + + # it works! + grouped["B"].first() + grouped["B"].last() + grouped["B"].nth(0) + + df.loc[df["A"] == "foo", "B"] = np.nan + assert isna(grouped["B"].first()["foo"]) + assert isna(grouped["B"].last()["foo"]) + assert isna(grouped["B"].nth(0)["foo"]) + + # v0.14.0 whatsnew + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + result = g.first() + expected = df.iloc[[1, 2]].set_index("A") + tm.assert_frame_equal(result, expected) + + expected = df.iloc[[1, 2]].set_index("A") + result = g.nth(0, dropna="any") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("method", ["first", "last"]) +def test_first_last_with_na_object(method, nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/32123 + groups = pd.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, nulls_fixture]}).groupby( + "a" + ) + result = getattr(groups, method)() + + if method == "first": + values = [1, 3] + else: + values = [2, 3] + + values = np.array(values, dtype=result["b"].dtype) + idx = pd.Index([1, 2], name="a") + expected = pd.DataFrame({"b": values}, index=idx) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("index", [0, -1]) +def test_nth_with_na_object(index, nulls_fixture): + # https://github.com/pandas-dev/pandas/issues/32123 + groups = pd.DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 3, nulls_fixture]}).groupby( + "a" + ) + result = groups.nth(index) + + if index == 0: + values = [1, 3] + else: + values = [2, nulls_fixture] + + values = np.array(values, dtype=result["b"].dtype) + idx = pd.Index([1, 2], name="a") + expected = pd.DataFrame({"b": values}, index=idx) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("method", ["first", "last"]) +def test_first_last_with_None(method): + # https://github.com/pandas-dev/pandas/issues/32800 + # None should be preserved as object dtype + df = pd.DataFrame.from_dict({"id": ["a"], "value": [None]}) + groups = df.groupby("id", as_index=False) + result = getattr(groups, method)() + + tm.assert_frame_equal(result, df) + + +def test_first_last_nth_dtypes(df_mixed_floats): + + df = df_mixed_floats.copy() + df["E"] = True + df["F"] = 1 + + # tests for first / last / nth + grouped = df.groupby("A") + first = grouped.first() + expected = df.loc[[1, 0], ["B", "C", "D", "E", "F"]] + expected.index = Index(["bar", "foo"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(first, expected) + + last = grouped.last() + expected = df.loc[[5, 7], ["B", "C", "D", "E", "F"]] + expected.index = Index(["bar", "foo"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(last, expected) + + nth = grouped.nth(1) + expected = df.loc[[3, 2], ["B", "C", "D", "E", "F"]] + expected.index = Index(["bar", "foo"], name="A") + expected = expected.sort_index() + tm.assert_frame_equal(nth, expected) + + # GH 2763, first/last shifting dtypes + idx = list(range(10)) + idx.append(9) + s = Series(data=range(11), index=idx, name="IntCol") + assert s.dtype == "int64" + f = s.groupby(level=0).first() + assert f.dtype == "int64" + + +def test_first_strings_timestamps(): + # GH 11244 + test = pd.DataFrame( + { + pd.Timestamp("2012-01-01 00:00:00"): ["a", "b"], + pd.Timestamp("2012-01-02 00:00:00"): ["c", "d"], + "name": ["e", "e"], + "aaaa": ["f", "g"], + } + ) + result = test.groupby("name").first() + expected = DataFrame( + [["a", "c", "f"]], + columns=Index([Timestamp("2012-01-01"), Timestamp("2012-01-02"), "aaaa"]), + index=Index(["e"], name="name"), + ) + tm.assert_frame_equal(result, expected) + + +def test_nth(): + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + + tm.assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index("A")) + tm.assert_frame_equal(g.nth(1), df.iloc[[1]].set_index("A")) + tm.assert_frame_equal(g.nth(2), df.loc[[]].set_index("A")) + tm.assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index("A")) + tm.assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index("A")) + tm.assert_frame_equal(g.nth(-3), df.loc[[]].set_index("A")) + tm.assert_series_equal(g.B.nth(0), df.set_index("A").B.iloc[[0, 2]]) + tm.assert_series_equal(g.B.nth(1), df.set_index("A").B.iloc[[1]]) + tm.assert_frame_equal(g[["B"]].nth(0), df.loc[[0, 2], ["A", "B"]].set_index("A")) + + exp = df.set_index("A") + tm.assert_frame_equal(g.nth(0, dropna="any"), exp.iloc[[1, 2]]) + tm.assert_frame_equal(g.nth(-1, dropna="any"), exp.iloc[[1, 2]]) + + exp["B"] = np.nan + tm.assert_frame_equal(g.nth(7, dropna="any"), exp.iloc[[1, 2]]) + tm.assert_frame_equal(g.nth(2, dropna="any"), exp.iloc[[1, 2]]) + + # out of bounds, regression from 0.13.1 + # GH 6621 + df = DataFrame( + { + "color": {0: "green", 1: "green", 2: "red", 3: "red", 4: "red"}, + "food": {0: "ham", 1: "eggs", 2: "eggs", 3: "ham", 4: "pork"}, + "two": { + 0: 1.5456590000000001, + 1: -0.070345000000000005, + 2: -2.4004539999999999, + 3: 0.46206000000000003, + 4: 0.52350799999999997, + }, + "one": { + 0: 0.56573799999999996, + 1: -0.9742360000000001, + 2: 1.033801, + 3: -0.78543499999999999, + 4: 0.70422799999999997, + }, + } + ).set_index(["color", "food"]) + + result = df.groupby(level=0, as_index=False).nth(2) + expected = df.iloc[[-1]] + tm.assert_frame_equal(result, expected) + + result = df.groupby(level=0, as_index=False).nth(3) + expected = df.loc[[]] + tm.assert_frame_equal(result, expected) + + # GH 7559 + # from the vbench + df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype="int64") + s = df[1] + g = df[0] + expected = s.groupby(g).first() + expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) + tm.assert_series_equal(expected2, expected, check_names=False) + assert expected.name == 1 + assert expected2.name == 1 + + # validate first + v = s[g == 1].iloc[0] + assert expected.iloc[0] == v + assert expected2.iloc[0] == v + + # this is NOT the same as .first (as sorted is default!) + # as it keeps the order in the series (and not the group order) + # related GH 7287 + expected = s.groupby(g, sort=False).first() + result = s.groupby(g, sort=False).nth(0, dropna="all") + tm.assert_series_equal(result, expected) + + with pytest.raises(ValueError, match="For a DataFrame groupby"): + s.groupby(g, sort=False).nth(0, dropna=True) + + # doc example + df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) + g = df.groupby("A") + result = g.B.nth(0, dropna="all") + expected = g.B.first() + tm.assert_series_equal(result, expected) + + # test multiple nth values + df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], columns=["A", "B"]) + g = df.groupby("A") + + tm.assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index("A")) + tm.assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index("A")) + tm.assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index("A")) + tm.assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index("A")) + + business_dates = pd.date_range(start="4/1/2014", end="6/30/2014", freq="B") + df = DataFrame(1, index=business_dates, columns=["a", "b"]) + # get the first, fourth and last two business days for each month + key = [df.index.year, df.index.month] + result = df.groupby(key, as_index=False).nth([0, 3, -2, -1]) + expected_dates = pd.to_datetime( + [ + "2014/4/1", + "2014/4/4", + "2014/4/29", + "2014/4/30", + "2014/5/1", + "2014/5/6", + "2014/5/29", + "2014/5/30", + "2014/6/2", + "2014/6/5", + "2014/6/27", + "2014/6/30", + ] + ) + expected = DataFrame(1, columns=["a", "b"], index=expected_dates) + tm.assert_frame_equal(result, expected) + + +def test_nth_multi_index(three_group): + # PR 9090, related to issue 8979 + # test nth on MultiIndex, should match .first() + grouped = three_group.groupby(["A", "B"]) + result = grouped.nth(0) + expected = grouped.first() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data, expected_first, expected_last", + [ + ( + { + "id": ["A"], + "time": Timestamp("2012-02-01 14:00:00", tz="US/Central"), + "foo": [1], + }, + { + "id": ["A"], + "time": Timestamp("2012-02-01 14:00:00", tz="US/Central"), + "foo": [1], + }, + { + "id": ["A"], + "time": Timestamp("2012-02-01 14:00:00", tz="US/Central"), + "foo": [1], + }, + ), + ( + { + "id": ["A", "B", "A"], + "time": [ + Timestamp("2012-01-01 13:00:00", tz="America/New_York"), + Timestamp("2012-02-01 14:00:00", tz="US/Central"), + Timestamp("2012-03-01 12:00:00", tz="Europe/London"), + ], + "foo": [1, 2, 3], + }, + { + "id": ["A", "B"], + "time": [ + Timestamp("2012-01-01 13:00:00", tz="America/New_York"), + Timestamp("2012-02-01 14:00:00", tz="US/Central"), + ], + "foo": [1, 2], + }, + { + "id": ["A", "B"], + "time": [ + Timestamp("2012-03-01 12:00:00", tz="Europe/London"), + Timestamp("2012-02-01 14:00:00", tz="US/Central"), + ], + "foo": [3, 2], + }, + ), + ], +) +def test_first_last_tz(data, expected_first, expected_last): + # GH15884 + # Test that the timezone is retained when calling first + # or last on groupby with as_index=False + + df = DataFrame(data) + + result = df.groupby("id", as_index=False).first() + expected = DataFrame(expected_first) + cols = ["id", "time", "foo"] + tm.assert_frame_equal(result[cols], expected[cols]) + + result = df.groupby("id", as_index=False)["time"].first() + tm.assert_frame_equal(result, expected[["id", "time"]]) + + result = df.groupby("id", as_index=False).last() + expected = DataFrame(expected_last) + cols = ["id", "time", "foo"] + tm.assert_frame_equal(result[cols], expected[cols]) + + result = df.groupby("id", as_index=False)["time"].last() + tm.assert_frame_equal(result, expected[["id", "time"]]) + + +@pytest.mark.parametrize( + "method, ts, alpha", + [ + ["first", Timestamp("2013-01-01", tz="US/Eastern"), "a"], + ["last", Timestamp("2013-01-02", tz="US/Eastern"), "b"], + ], +) +def test_first_last_tz_multi_column(method, ts, alpha): + # GH 21603 + category_string = pd.Series(list("abc")).astype("category") + df = pd.DataFrame( + { + "group": [1, 1, 2], + "category_string": category_string, + "datetimetz": pd.date_range("20130101", periods=3, tz="US/Eastern"), + } + ) + result = getattr(df.groupby("group"), method)() + expected = pd.DataFrame( + { + "category_string": pd.Categorical( + [alpha, "c"], dtype=category_string.dtype + ), + "datetimetz": [ts, Timestamp("2013-01-03", tz="US/Eastern")], + }, + index=pd.Index([1, 2], name="group"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + pd.array([True, False], dtype="boolean"), + pd.array([1, 2], dtype="Int64"), + pd.to_datetime(["2020-01-01", "2020-02-01"]), + pd.to_timedelta([1, 2], unit="D"), + ], +) +@pytest.mark.parametrize("function", ["first", "last", "min", "max"]) +def test_first_last_extension_array_keeps_dtype(values, function): + # https://github.com/pandas-dev/pandas/issues/33071 + # https://github.com/pandas-dev/pandas/issues/32194 + df = DataFrame({"a": [1, 2], "b": values}) + grouped = df.groupby("a") + idx = Index([1, 2], name="a") + expected_series = Series(values, name="b", index=idx) + expected_frame = DataFrame({"b": values}, index=idx) + + result_series = getattr(grouped["b"], function)() + tm.assert_series_equal(result_series, expected_series) + + result_frame = grouped.agg({"b": function}) + tm.assert_frame_equal(result_frame, expected_frame) + + +def test_nth_multi_index_as_expected(): + # PR 9090, related to issue 8979 + # test nth on MultiIndex + three_group = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + } + ) + grouped = three_group.groupby(["A", "B"]) + result = grouped.nth(0) + expected = DataFrame( + {"C": ["dull", "dull", "dull", "dull"]}, + index=MultiIndex.from_arrays( + [["bar", "bar", "foo", "foo"], ["one", "two", "one", "two"]], + names=["A", "B"], + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_groupby_head_tail(): + df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + g_as = df.groupby("A", as_index=True) + g_not_as = df.groupby("A", as_index=False) + + # as_index= False, much easier + tm.assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1)) + tm.assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1)) + + empty_not_as = DataFrame( + columns=df.columns, index=pd.Index([], dtype=df.index.dtype) + ) + empty_not_as["A"] = empty_not_as["A"].astype(df.A.dtype) + empty_not_as["B"] = empty_not_as["B"].astype(df.B.dtype) + tm.assert_frame_equal(empty_not_as, g_not_as.head(0)) + tm.assert_frame_equal(empty_not_as, g_not_as.tail(0)) + tm.assert_frame_equal(empty_not_as, g_not_as.head(-1)) + tm.assert_frame_equal(empty_not_as, g_not_as.tail(-1)) + + tm.assert_frame_equal(df, g_not_as.head(7)) # contains all + tm.assert_frame_equal(df, g_not_as.tail(7)) + + # as_index=True, (used to be different) + df_as = df + + tm.assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1)) + tm.assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1)) + + empty_as = DataFrame(index=df_as.index[:0], columns=df.columns) + empty_as["A"] = empty_not_as["A"].astype(df.A.dtype) + empty_as["B"] = empty_not_as["B"].astype(df.B.dtype) + tm.assert_frame_equal(empty_as, g_as.head(0)) + tm.assert_frame_equal(empty_as, g_as.tail(0)) + tm.assert_frame_equal(empty_as, g_as.head(-1)) + tm.assert_frame_equal(empty_as, g_as.tail(-1)) + + tm.assert_frame_equal(df_as, g_as.head(7)) # contains all + tm.assert_frame_equal(df_as, g_as.tail(7)) + + # test with selection + tm.assert_frame_equal(g_as[[]].head(1), df_as.loc[[0, 2], []]) + tm.assert_frame_equal(g_as[["A"]].head(1), df_as.loc[[0, 2], ["A"]]) + tm.assert_frame_equal(g_as[["B"]].head(1), df_as.loc[[0, 2], ["B"]]) + tm.assert_frame_equal(g_as[["A", "B"]].head(1), df_as.loc[[0, 2]]) + + tm.assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0, 2], []]) + tm.assert_frame_equal(g_not_as[["A"]].head(1), df_as.loc[[0, 2], ["A"]]) + tm.assert_frame_equal(g_not_as[["B"]].head(1), df_as.loc[[0, 2], ["B"]]) + tm.assert_frame_equal(g_not_as[["A", "B"]].head(1), df_as.loc[[0, 2]]) + + +def test_group_selection_cache(): + # GH 12839 nth, head, and tail should return same result consistently + df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + expected = df.iloc[[0, 2]].set_index("A") + + g = df.groupby("A") + result1 = g.head(n=2) + result2 = g.nth(0) + tm.assert_frame_equal(result1, df) + tm.assert_frame_equal(result2, expected) + + g = df.groupby("A") + result1 = g.tail(n=2) + result2 = g.nth(0) + tm.assert_frame_equal(result1, df) + tm.assert_frame_equal(result2, expected) + + g = df.groupby("A") + result1 = g.nth(0) + result2 = g.head(n=2) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, df) + + g = df.groupby("A") + result1 = g.nth(0) + result2 = g.tail(n=2) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, df) + + +def test_nth_empty(): + # GH 16064 + df = DataFrame(index=[0], columns=["a", "b", "c"]) + result = df.groupby("a").nth(10) + expected = DataFrame(index=Index([], name="a"), columns=["b", "c"]) + tm.assert_frame_equal(result, expected) + + result = df.groupby(["a", "b"]).nth(10) + expected = DataFrame( + index=MultiIndex([[], []], [[], []], names=["a", "b"]), columns=["c"] + ) + tm.assert_frame_equal(result, expected) + + +def test_nth_column_order(): + # GH 20760 + # Check that nth preserves column order + df = DataFrame( + [[1, "b", 100], [1, "a", 50], [1, "a", np.nan], [2, "c", 200], [2, "d", 150]], + columns=["A", "C", "B"], + ) + result = df.groupby("A").nth(0) + expected = DataFrame( + [["b", 100.0], ["c", 200.0]], columns=["C", "B"], index=Index([1, 2], name="A") + ) + tm.assert_frame_equal(result, expected) + + result = df.groupby("A").nth(-1, dropna="any") + expected = DataFrame( + [["a", 50.0], ["d", 150.0]], columns=["C", "B"], index=Index([1, 2], name="A") + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [None, "any", "all"]) +def test_nth_nan_in_grouper(dropna): + # GH 26011 + df = DataFrame( + [[np.nan, 0, 1], ["abc", 2, 3], [np.nan, 4, 5], ["def", 6, 7], [np.nan, 8, 9]], + columns=list("abc"), + ) + result = df.groupby("a").nth(0, dropna=dropna) + expected = pd.DataFrame( + [[2, 3], [6, 7]], columns=list("bc"), index=Index(["abc", "def"], name="a") + ) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py new file mode 100644 index 00000000..3461bf6e --- /dev/null +++ b/pandas/tests/groupby/test_rank.py @@ -0,0 +1,445 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, concat +import pandas._testing as tm +from pandas.core.base import DataError + + +def test_rank_apply(): + lev1 = tm.rands_array(10, 100) + lev2 = tm.rands_array(10, 130) + lab1 = np.random.randint(0, 100, size=500) + lab2 = np.random.randint(0, 130, size=500) + + df = DataFrame( + { + "value": np.random.randn(500), + "key1": lev1.take(lab1), + "key2": lev2.take(lab2), + } + ) + + result = df.groupby(["key1", "key2"]).value.rank() + + expected = [piece.value.rank() for key, piece in df.groupby(["key1", "key2"])] + expected = concat(expected, axis=0) + expected = expected.reindex(result.index) + tm.assert_series_equal(result, expected) + + result = df.groupby(["key1", "key2"]).value.rank(pct=True) + + expected = [ + piece.value.rank(pct=True) for key, piece in df.groupby(["key1", "key2"]) + ] + expected = concat(expected, axis=0) + expected = expected.reindex(result.index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("grps", [["qux"], ["qux", "quux"]]) +@pytest.mark.parametrize( + "vals", + [ + [2, 2, 8, 2, 6], + [ + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-08"), + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-06"), + ], + ], +) +@pytest.mark.parametrize( + "ties_method,ascending,pct,exp", + [ + ("average", True, False, [2.0, 2.0, 5.0, 2.0, 4.0]), + ("average", True, True, [0.4, 0.4, 1.0, 0.4, 0.8]), + ("average", False, False, [4.0, 4.0, 1.0, 4.0, 2.0]), + ("average", False, True, [0.8, 0.8, 0.2, 0.8, 0.4]), + ("min", True, False, [1.0, 1.0, 5.0, 1.0, 4.0]), + ("min", True, True, [0.2, 0.2, 1.0, 0.2, 0.8]), + ("min", False, False, [3.0, 3.0, 1.0, 3.0, 2.0]), + ("min", False, True, [0.6, 0.6, 0.2, 0.6, 0.4]), + ("max", True, False, [3.0, 3.0, 5.0, 3.0, 4.0]), + ("max", True, True, [0.6, 0.6, 1.0, 0.6, 0.8]), + ("max", False, False, [5.0, 5.0, 1.0, 5.0, 2.0]), + ("max", False, True, [1.0, 1.0, 0.2, 1.0, 0.4]), + ("first", True, False, [1.0, 2.0, 5.0, 3.0, 4.0]), + ("first", True, True, [0.2, 0.4, 1.0, 0.6, 0.8]), + ("first", False, False, [3.0, 4.0, 1.0, 5.0, 2.0]), + ("first", False, True, [0.6, 0.8, 0.2, 1.0, 0.4]), + ("dense", True, False, [1.0, 1.0, 3.0, 1.0, 2.0]), + ("dense", True, True, [1.0 / 3.0, 1.0 / 3.0, 3.0 / 3.0, 1.0 / 3.0, 2.0 / 3.0]), + ("dense", False, False, [3.0, 3.0, 1.0, 3.0, 2.0]), + ("dense", False, True, [3.0 / 3.0, 3.0 / 3.0, 1.0 / 3.0, 3.0 / 3.0, 2.0 / 3.0]), + ], +) +def test_rank_args(grps, vals, ties_method, ascending, pct, exp): + key = np.repeat(grps, len(vals)) + vals = vals * len(grps) + df = DataFrame({"key": key, "val": vals}) + result = df.groupby("key").rank(method=ties_method, ascending=ascending, pct=pct) + + exp_df = DataFrame(exp * len(grps), columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize("grps", [["qux"], ["qux", "quux"]]) +@pytest.mark.parametrize( + "vals", [[-np.inf, -np.inf, np.nan, 1.0, np.nan, np.inf, np.inf]] +) +@pytest.mark.parametrize( + "ties_method,ascending,na_option,exp", + [ + ("average", True, "keep", [1.5, 1.5, np.nan, 3, np.nan, 4.5, 4.5]), + ("average", True, "top", [3.5, 3.5, 1.5, 5.0, 1.5, 6.5, 6.5]), + ("average", True, "bottom", [1.5, 1.5, 6.5, 3.0, 6.5, 4.5, 4.5]), + ("average", False, "keep", [4.5, 4.5, np.nan, 3, np.nan, 1.5, 1.5]), + ("average", False, "top", [6.5, 6.5, 1.5, 5.0, 1.5, 3.5, 3.5]), + ("average", False, "bottom", [4.5, 4.5, 6.5, 3.0, 6.5, 1.5, 1.5]), + ("min", True, "keep", [1.0, 1.0, np.nan, 3.0, np.nan, 4.0, 4.0]), + ("min", True, "top", [3.0, 3.0, 1.0, 5.0, 1.0, 6.0, 6.0]), + ("min", True, "bottom", [1.0, 1.0, 6.0, 3.0, 6.0, 4.0, 4.0]), + ("min", False, "keep", [4.0, 4.0, np.nan, 3.0, np.nan, 1.0, 1.0]), + ("min", False, "top", [6.0, 6.0, 1.0, 5.0, 1.0, 3.0, 3.0]), + ("min", False, "bottom", [4.0, 4.0, 6.0, 3.0, 6.0, 1.0, 1.0]), + ("max", True, "keep", [2.0, 2.0, np.nan, 3.0, np.nan, 5.0, 5.0]), + ("max", True, "top", [4.0, 4.0, 2.0, 5.0, 2.0, 7.0, 7.0]), + ("max", True, "bottom", [2.0, 2.0, 7.0, 3.0, 7.0, 5.0, 5.0]), + ("max", False, "keep", [5.0, 5.0, np.nan, 3.0, np.nan, 2.0, 2.0]), + ("max", False, "top", [7.0, 7.0, 2.0, 5.0, 2.0, 4.0, 4.0]), + ("max", False, "bottom", [5.0, 5.0, 7.0, 3.0, 7.0, 2.0, 2.0]), + ("first", True, "keep", [1.0, 2.0, np.nan, 3.0, np.nan, 4.0, 5.0]), + ("first", True, "top", [3.0, 4.0, 1.0, 5.0, 2.0, 6.0, 7.0]), + ("first", True, "bottom", [1.0, 2.0, 6.0, 3.0, 7.0, 4.0, 5.0]), + ("first", False, "keep", [4.0, 5.0, np.nan, 3.0, np.nan, 1.0, 2.0]), + ("first", False, "top", [6.0, 7.0, 1.0, 5.0, 2.0, 3.0, 4.0]), + ("first", False, "bottom", [4.0, 5.0, 6.0, 3.0, 7.0, 1.0, 2.0]), + ("dense", True, "keep", [1.0, 1.0, np.nan, 2.0, np.nan, 3.0, 3.0]), + ("dense", True, "top", [2.0, 2.0, 1.0, 3.0, 1.0, 4.0, 4.0]), + ("dense", True, "bottom", [1.0, 1.0, 4.0, 2.0, 4.0, 3.0, 3.0]), + ("dense", False, "keep", [3.0, 3.0, np.nan, 2.0, np.nan, 1.0, 1.0]), + ("dense", False, "top", [4.0, 4.0, 1.0, 3.0, 1.0, 2.0, 2.0]), + ("dense", False, "bottom", [3.0, 3.0, 4.0, 2.0, 4.0, 1.0, 1.0]), + ], +) +def test_infs_n_nans(grps, vals, ties_method, ascending, na_option, exp): + # GH 20561 + key = np.repeat(grps, len(vals)) + vals = vals * len(grps) + df = DataFrame({"key": key, "val": vals}) + result = df.groupby("key").rank( + method=ties_method, ascending=ascending, na_option=na_option + ) + exp_df = DataFrame(exp * len(grps), columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize("grps", [["qux"], ["qux", "quux"]]) +@pytest.mark.parametrize( + "vals", + [ + [2, 2, np.nan, 8, 2, 6, np.nan, np.nan], + [ + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-02"), + np.nan, + pd.Timestamp("2018-01-08"), + pd.Timestamp("2018-01-02"), + pd.Timestamp("2018-01-06"), + np.nan, + np.nan, + ], + ], +) +@pytest.mark.parametrize( + "ties_method,ascending,na_option,pct,exp", + [ + ( + "average", + True, + "keep", + False, + [2.0, 2.0, np.nan, 5.0, 2.0, 4.0, np.nan, np.nan], + ), + ( + "average", + True, + "keep", + True, + [0.4, 0.4, np.nan, 1.0, 0.4, 0.8, np.nan, np.nan], + ), + ( + "average", + False, + "keep", + False, + [4.0, 4.0, np.nan, 1.0, 4.0, 2.0, np.nan, np.nan], + ), + ( + "average", + False, + "keep", + True, + [0.8, 0.8, np.nan, 0.2, 0.8, 0.4, np.nan, np.nan], + ), + ("min", True, "keep", False, [1.0, 1.0, np.nan, 5.0, 1.0, 4.0, np.nan, np.nan]), + ("min", True, "keep", True, [0.2, 0.2, np.nan, 1.0, 0.2, 0.8, np.nan, np.nan]), + ( + "min", + False, + "keep", + False, + [3.0, 3.0, np.nan, 1.0, 3.0, 2.0, np.nan, np.nan], + ), + ("min", False, "keep", True, [0.6, 0.6, np.nan, 0.2, 0.6, 0.4, np.nan, np.nan]), + ("max", True, "keep", False, [3.0, 3.0, np.nan, 5.0, 3.0, 4.0, np.nan, np.nan]), + ("max", True, "keep", True, [0.6, 0.6, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan]), + ( + "max", + False, + "keep", + False, + [5.0, 5.0, np.nan, 1.0, 5.0, 2.0, np.nan, np.nan], + ), + ("max", False, "keep", True, [1.0, 1.0, np.nan, 0.2, 1.0, 0.4, np.nan, np.nan]), + ( + "first", + True, + "keep", + False, + [1.0, 2.0, np.nan, 5.0, 3.0, 4.0, np.nan, np.nan], + ), + ( + "first", + True, + "keep", + True, + [0.2, 0.4, np.nan, 1.0, 0.6, 0.8, np.nan, np.nan], + ), + ( + "first", + False, + "keep", + False, + [3.0, 4.0, np.nan, 1.0, 5.0, 2.0, np.nan, np.nan], + ), + ( + "first", + False, + "keep", + True, + [0.6, 0.8, np.nan, 0.2, 1.0, 0.4, np.nan, np.nan], + ), + ( + "dense", + True, + "keep", + False, + [1.0, 1.0, np.nan, 3.0, 1.0, 2.0, np.nan, np.nan], + ), + ( + "dense", + True, + "keep", + True, + [ + 1.0 / 3.0, + 1.0 / 3.0, + np.nan, + 3.0 / 3.0, + 1.0 / 3.0, + 2.0 / 3.0, + np.nan, + np.nan, + ], + ), + ( + "dense", + False, + "keep", + False, + [3.0, 3.0, np.nan, 1.0, 3.0, 2.0, np.nan, np.nan], + ), + ( + "dense", + False, + "keep", + True, + [ + 3.0 / 3.0, + 3.0 / 3.0, + np.nan, + 1.0 / 3.0, + 3.0 / 3.0, + 2.0 / 3.0, + np.nan, + np.nan, + ], + ), + ("average", True, "bottom", False, [2.0, 2.0, 7.0, 5.0, 2.0, 4.0, 7.0, 7.0]), + ( + "average", + True, + "bottom", + True, + [0.25, 0.25, 0.875, 0.625, 0.25, 0.5, 0.875, 0.875], + ), + ("average", False, "bottom", False, [4.0, 4.0, 7.0, 1.0, 4.0, 2.0, 7.0, 7.0]), + ( + "average", + False, + "bottom", + True, + [0.5, 0.5, 0.875, 0.125, 0.5, 0.25, 0.875, 0.875], + ), + ("min", True, "bottom", False, [1.0, 1.0, 6.0, 5.0, 1.0, 4.0, 6.0, 6.0]), + ( + "min", + True, + "bottom", + True, + [0.125, 0.125, 0.75, 0.625, 0.125, 0.5, 0.75, 0.75], + ), + ("min", False, "bottom", False, [3.0, 3.0, 6.0, 1.0, 3.0, 2.0, 6.0, 6.0]), + ( + "min", + False, + "bottom", + True, + [0.375, 0.375, 0.75, 0.125, 0.375, 0.25, 0.75, 0.75], + ), + ("max", True, "bottom", False, [3.0, 3.0, 8.0, 5.0, 3.0, 4.0, 8.0, 8.0]), + ("max", True, "bottom", True, [0.375, 0.375, 1.0, 0.625, 0.375, 0.5, 1.0, 1.0]), + ("max", False, "bottom", False, [5.0, 5.0, 8.0, 1.0, 5.0, 2.0, 8.0, 8.0]), + ( + "max", + False, + "bottom", + True, + [0.625, 0.625, 1.0, 0.125, 0.625, 0.25, 1.0, 1.0], + ), + ("first", True, "bottom", False, [1.0, 2.0, 6.0, 5.0, 3.0, 4.0, 7.0, 8.0]), + ( + "first", + True, + "bottom", + True, + [0.125, 0.25, 0.75, 0.625, 0.375, 0.5, 0.875, 1.0], + ), + ("first", False, "bottom", False, [3.0, 4.0, 6.0, 1.0, 5.0, 2.0, 7.0, 8.0]), + ( + "first", + False, + "bottom", + True, + [0.375, 0.5, 0.75, 0.125, 0.625, 0.25, 0.875, 1.0], + ), + ("dense", True, "bottom", False, [1.0, 1.0, 4.0, 3.0, 1.0, 2.0, 4.0, 4.0]), + ("dense", True, "bottom", True, [0.25, 0.25, 1.0, 0.75, 0.25, 0.5, 1.0, 1.0]), + ("dense", False, "bottom", False, [3.0, 3.0, 4.0, 1.0, 3.0, 2.0, 4.0, 4.0]), + ("dense", False, "bottom", True, [0.75, 0.75, 1.0, 0.25, 0.75, 0.5, 1.0, 1.0]), + ], +) +def test_rank_args_missing(grps, vals, ties_method, ascending, na_option, pct, exp): + key = np.repeat(grps, len(vals)) + vals = vals * len(grps) + df = DataFrame({"key": key, "val": vals}) + result = df.groupby("key").rank( + method=ties_method, ascending=ascending, na_option=na_option, pct=pct + ) + + exp_df = DataFrame(exp * len(grps), columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize( + "pct,exp", [(False, [3.0, 3.0, 3.0, 3.0, 3.0]), (True, [0.6, 0.6, 0.6, 0.6, 0.6])] +) +def test_rank_resets_each_group(pct, exp): + df = DataFrame( + {"key": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"], "val": [1] * 10} + ) + result = df.groupby("key").rank(pct=pct) + exp_df = DataFrame(exp * 2, columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +def test_rank_avg_even_vals(): + df = DataFrame({"key": ["a"] * 4, "val": [1] * 4}) + result = df.groupby("key").rank() + exp_df = DataFrame([2.5, 2.5, 2.5, 2.5], columns=["val"]) + tm.assert_frame_equal(result, exp_df) + + +@pytest.mark.parametrize("ties_method", ["average", "min", "max", "first", "dense"]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("na_option", ["keep", "top", "bottom"]) +@pytest.mark.parametrize("pct", [True, False]) +@pytest.mark.parametrize( + "vals", [["bar", "bar", "foo", "bar", "baz"], ["bar", np.nan, "foo", np.nan, "baz"]] +) +def test_rank_object_raises(ties_method, ascending, na_option, pct, vals): + df = DataFrame({"key": ["foo"] * 5, "val": vals}) + + with pytest.raises(DataError, match="No numeric types to aggregate"): + df.groupby("key").rank( + method=ties_method, ascending=ascending, na_option=na_option, pct=pct + ) + + +@pytest.mark.parametrize("na_option", [True, "bad", 1]) +@pytest.mark.parametrize("ties_method", ["average", "min", "max", "first", "dense"]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("pct", [True, False]) +@pytest.mark.parametrize( + "vals", + [ + ["bar", "bar", "foo", "bar", "baz"], + ["bar", np.nan, "foo", np.nan, "baz"], + [1, np.nan, 2, np.nan, 3], + ], +) +def test_rank_naoption_raises(ties_method, ascending, na_option, pct, vals): + df = DataFrame({"key": ["foo"] * 5, "val": vals}) + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + + with pytest.raises(ValueError, match=msg): + df.groupby("key").rank( + method=ties_method, ascending=ascending, na_option=na_option, pct=pct + ) + + +def test_rank_empty_group(): + # see gh-22519 + column = "A" + df = DataFrame({"A": [0, 1, 0], "B": [1.0, np.nan, 2.0]}) + + result = df.groupby(column).B.rank(pct=True) + expected = Series([0.5, np.nan, 1.0], name="B") + tm.assert_series_equal(result, expected) + + result = df.groupby(column).rank(pct=True) + expected = DataFrame({"B": [0.5, np.nan, 1.0]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "input_key,input_value,output_value", + [ + ([1, 2], [1, 1], [1.0, 1.0]), + ([1, 1, 2, 2], [1, 2, 1, 2], [0.5, 1.0, 0.5, 1.0]), + ([1, 1, 2, 2], [1, 2, 1, np.nan], [0.5, 1.0, 1.0, np.nan]), + ([1, 1, 2], [1, 2, np.nan], [0.5, 1.0, np.nan]), + ], +) +def test_rank_zero_div(input_key, input_value, output_value): + # GH 23666 + df = DataFrame({"A": input_key, "B": input_value}) + + result = df.groupby("A").rank(method="dense", pct=True) + expected = DataFrame({"B": output_value}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py new file mode 100644 index 00000000..6b8bd9e8 --- /dev/null +++ b/pandas/tests/groupby/test_timegrouper.py @@ -0,0 +1,757 @@ +""" test with the TimeGrouper / grouping with datetimes """ + +from datetime import datetime +from io import StringIO + +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range +import pandas._testing as tm +from pandas.core.groupby.grouper import Grouper +from pandas.core.groupby.ops import BinGrouper + + +class TestGroupBy: + def test_groupby_with_timegrouper(self): + # GH 4161 + # TimeGrouper requires a sorted index + # also verifies that the resultant index has the correct name + df_original = DataFrame( + { + "Buyer": "Carl Carl Carl Carl Joe Carl".split(), + "Quantity": [18, 3, 5, 1, 9, 3], + "Date": [ + datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), + ], + } + ) + + # GH 6908 change target column's order + df_reordered = df_original.sort_values(by="Quantity") + + for df in [df_original, df_reordered]: + df = df.set_index(["Date"]) + + expected = DataFrame( + {"Quantity": 0}, + index=date_range( + "20130901", "20131205", freq="5D", name="Date", closed="left" + ), + ) + expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64") + + result1 = df.resample("5D").sum() + tm.assert_frame_equal(result1, expected) + + df_sorted = df.sort_index() + result2 = df_sorted.groupby(pd.Grouper(freq="5D")).sum() + tm.assert_frame_equal(result2, expected) + + result3 = df.groupby(pd.Grouper(freq="5D")).sum() + tm.assert_frame_equal(result3, expected) + + @pytest.mark.parametrize("should_sort", [True, False]) + def test_groupby_with_timegrouper_methods(self, should_sort): + # GH 3881 + # make sure API of timegrouper conforms + + df = pd.DataFrame( + { + "Branch": "A A A A A B".split(), + "Buyer": "Carl Mark Carl Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 8, 9, 3], + "Date": [ + datetime(2013, 1, 1, 13, 0), + datetime(2013, 1, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 12, 2, 14, 0), + ], + } + ) + + if should_sort: + df = df.sort_values(by="Quantity", ascending=False) + + df = df.set_index("Date", drop=False) + g = df.groupby(pd.Grouper(freq="6M")) + assert g.group_keys + + assert isinstance(g.grouper, BinGrouper) + groups = g.groups + assert isinstance(groups, dict) + assert len(groups) == 3 + + def test_timegrouper_with_reg_groups(self): + + # GH 3794 + # allow combination of timegrouper/reg groups + + df_original = DataFrame( + { + "Branch": "A A A A A A A B".split(), + "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 1, 8, 1, 9, 3], + "Date": [ + datetime(2013, 1, 1, 13, 0), + datetime(2013, 1, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 12, 2, 14, 0), + ], + } + ).set_index("Date") + + df_sorted = df_original.sort_values(by="Quantity", ascending=False) + + for df in [df_original, df_sorted]: + expected = DataFrame( + { + "Buyer": "Carl Joe Mark".split(), + "Quantity": [10, 18, 3], + "Date": [ + datetime(2013, 12, 31, 0, 0), + datetime(2013, 12, 31, 0, 0), + datetime(2013, 12, 31, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + + result = df.groupby([pd.Grouper(freq="A"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + { + "Buyer": "Carl Mark Carl Joe".split(), + "Quantity": [1, 3, 9, 18], + "Date": [ + datetime(2013, 1, 1, 0, 0), + datetime(2013, 1, 1, 0, 0), + datetime(2013, 7, 1, 0, 0), + datetime(2013, 7, 1, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + result = df.groupby([pd.Grouper(freq="6MS"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + df_original = DataFrame( + { + "Branch": "A A A A A A A B".split(), + "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 1, 8, 1, 9, 3], + "Date": [ + datetime(2013, 10, 1, 13, 0), + datetime(2013, 10, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 2, 12, 0), + datetime(2013, 10, 2, 14, 0), + ], + } + ).set_index("Date") + + df_sorted = df_original.sort_values(by="Quantity", ascending=False) + for df in [df_original, df_sorted]: + + expected = DataFrame( + { + "Buyer": "Carl Joe Mark Carl Joe".split(), + "Quantity": [6, 8, 3, 4, 10], + "Date": [ + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 1, 0, 0), + datetime(2013, 10, 2, 0, 0), + datetime(2013, 10, 2, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + + result = df.groupby([pd.Grouper(freq="1D"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + result = df.groupby([pd.Grouper(freq="1M"), "Buyer"]).sum() + expected = DataFrame( + { + "Buyer": "Carl Joe Mark".split(), + "Quantity": [10, 18, 3], + "Date": [ + datetime(2013, 10, 31, 0, 0), + datetime(2013, 10, 31, 0, 0), + datetime(2013, 10, 31, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + tm.assert_frame_equal(result, expected) + + # passing the name + df = df.reset_index() + result = df.groupby([pd.Grouper(freq="1M", key="Date"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError, match="'The grouper name foo is not found'"): + df.groupby([pd.Grouper(freq="1M", key="foo"), "Buyer"]).sum() + + # passing the level + df = df.set_index("Date") + result = df.groupby([pd.Grouper(freq="1M", level="Date"), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + result = df.groupby([pd.Grouper(freq="1M", level=0), "Buyer"]).sum() + tm.assert_frame_equal(result, expected) + + with pytest.raises(ValueError): + df.groupby([pd.Grouper(freq="1M", level="foo"), "Buyer"]).sum() + + # multi names + df = df.copy() + df["Date"] = df.index + pd.offsets.MonthEnd(2) + result = df.groupby([pd.Grouper(freq="1M", key="Date"), "Buyer"]).sum() + expected = DataFrame( + { + "Buyer": "Carl Joe Mark".split(), + "Quantity": [10, 18, 3], + "Date": [ + datetime(2013, 11, 30, 0, 0), + datetime(2013, 11, 30, 0, 0), + datetime(2013, 11, 30, 0, 0), + ], + } + ).set_index(["Date", "Buyer"]) + tm.assert_frame_equal(result, expected) + + # error as we have both a level and a name! + with pytest.raises(ValueError): + df.groupby( + [pd.Grouper(freq="1M", key="Date", level="Date"), "Buyer"] + ).sum() + + # single groupers + expected = DataFrame( + {"Quantity": [31], "Date": [datetime(2013, 10, 31, 0, 0)]} + ).set_index("Date") + result = df.groupby(pd.Grouper(freq="1M")).sum() + tm.assert_frame_equal(result, expected) + + result = df.groupby([pd.Grouper(freq="1M")]).sum() + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + {"Quantity": [31], "Date": [datetime(2013, 11, 30, 0, 0)]} + ).set_index("Date") + result = df.groupby(pd.Grouper(freq="1M", key="Date")).sum() + tm.assert_frame_equal(result, expected) + + result = df.groupby([pd.Grouper(freq="1M", key="Date")]).sum() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("freq", ["D", "M", "A", "Q-APR"]) + def test_timegrouper_with_reg_groups_freq(self, freq): + # GH 6764 multiple grouping with/without sort + df = DataFrame( + { + "date": pd.to_datetime( + [ + "20121002", + "20121007", + "20130130", + "20130202", + "20130305", + "20121002", + "20121207", + "20130130", + "20130202", + "20130305", + "20130202", + "20130305", + ] + ), + "user_id": [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], + "whole_cost": [ + 1790, + 364, + 280, + 259, + 201, + 623, + 90, + 312, + 359, + 301, + 359, + 801, + ], + "cost1": [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12], + } + ).set_index("date") + + expected = ( + df.groupby("user_id")["whole_cost"] + .resample(freq) + .sum(min_count=1) # XXX + .dropna() + .reorder_levels(["date", "user_id"]) + .sort_index() + .astype("int64") + ) + expected.name = "whole_cost" + + result1 = ( + df.sort_index() + .groupby([pd.Grouper(freq=freq), "user_id"])["whole_cost"] + .sum() + ) + tm.assert_series_equal(result1, expected) + + result2 = df.groupby([pd.Grouper(freq=freq), "user_id"])["whole_cost"].sum() + tm.assert_series_equal(result2, expected) + + def test_timegrouper_get_group(self): + # GH 6914 + + df_original = DataFrame( + { + "Buyer": "Carl Joe Joe Carl Joe Carl".split(), + "Quantity": [18, 3, 5, 1, 9, 3], + "Date": [ + datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), + ], + } + ) + df_reordered = df_original.sort_values(by="Quantity") + + # single grouping + expected_list = [ + df_original.iloc[[0, 1, 5]], + df_original.iloc[[2, 3]], + df_original.iloc[[4]], + ] + dt_list = ["2013-09-30", "2013-10-31", "2013-12-31"] + + for df in [df_original, df_reordered]: + grouped = df.groupby(pd.Grouper(freq="M", key="Date")) + for t, expected in zip(dt_list, expected_list): + dt = pd.Timestamp(t) + result = grouped.get_group(dt) + tm.assert_frame_equal(result, expected) + + # multiple grouping + expected_list = [ + df_original.iloc[[1]], + df_original.iloc[[3]], + df_original.iloc[[4]], + ] + g_list = [("Joe", "2013-09-30"), ("Carl", "2013-10-31"), ("Joe", "2013-12-31")] + + for df in [df_original, df_reordered]: + grouped = df.groupby(["Buyer", pd.Grouper(freq="M", key="Date")]) + for (b, t), expected in zip(g_list, expected_list): + dt = pd.Timestamp(t) + result = grouped.get_group((b, dt)) + tm.assert_frame_equal(result, expected) + + # with index + df_original = df_original.set_index("Date") + df_reordered = df_original.sort_values(by="Quantity") + + expected_list = [ + df_original.iloc[[0, 1, 5]], + df_original.iloc[[2, 3]], + df_original.iloc[[4]], + ] + + for df in [df_original, df_reordered]: + grouped = df.groupby(pd.Grouper(freq="M")) + for t, expected in zip(dt_list, expected_list): + dt = pd.Timestamp(t) + result = grouped.get_group(dt) + tm.assert_frame_equal(result, expected) + + def test_timegrouper_apply_return_type_series(self): + # Using `apply` with the `TimeGrouper` should give the + # same return type as an `apply` with a `Grouper`. + # Issue #11742 + df = pd.DataFrame({"date": ["10/10/2000", "11/10/2000"], "value": [10, 13]}) + df_dt = df.copy() + df_dt["date"] = pd.to_datetime(df_dt["date"]) + + def sumfunc_series(x): + return pd.Series([x["value"].sum()], ("sum",)) + + expected = df.groupby(pd.Grouper(key="date")).apply(sumfunc_series) + result = df_dt.groupby(pd.Grouper(freq="M", key="date")).apply(sumfunc_series) + tm.assert_frame_equal( + result.reset_index(drop=True), expected.reset_index(drop=True) + ) + + def test_timegrouper_apply_return_type_value(self): + # Using `apply` with the `TimeGrouper` should give the + # same return type as an `apply` with a `Grouper`. + # Issue #11742 + df = pd.DataFrame({"date": ["10/10/2000", "11/10/2000"], "value": [10, 13]}) + df_dt = df.copy() + df_dt["date"] = pd.to_datetime(df_dt["date"]) + + def sumfunc_value(x): + return x.value.sum() + + expected = df.groupby(pd.Grouper(key="date")).apply(sumfunc_value) + result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_value) + tm.assert_series_equal( + result.reset_index(drop=True), expected.reset_index(drop=True) + ) + + def test_groupby_groups_datetimeindex(self): + # GH#1430 + periods = 1000 + ind = pd.date_range(start="2012/1/1", freq="5min", periods=periods) + df = DataFrame( + {"high": np.arange(periods), "low": np.arange(periods)}, index=ind + ) + grouped = df.groupby(lambda x: datetime(x.year, x.month, x.day)) + + # it works! + groups = grouped.groups + assert isinstance(list(groups.keys())[0], datetime) + + # GH#11442 + index = pd.date_range("2015/01/01", periods=5, name="date") + df = pd.DataFrame({"A": [5, 6, 7, 8, 9], "B": [1, 2, 3, 4, 5]}, index=index) + result = df.groupby(level="date").groups + dates = ["2015-01-05", "2015-01-04", "2015-01-03", "2015-01-02", "2015-01-01"] + expected = { + pd.Timestamp(date): pd.DatetimeIndex([date], name="date") for date in dates + } + tm.assert_dict_equal(result, expected) + + grouped = df.groupby(level="date") + for date in dates: + result = grouped.get_group(date) + data = [[df.loc[date, "A"], df.loc[date, "B"]]] + expected_index = pd.DatetimeIndex([date], name="date") + expected = pd.DataFrame(data, columns=list("AB"), index=expected_index) + tm.assert_frame_equal(result, expected) + + def test_groupby_groups_datetimeindex_tz(self): + # GH 3950 + dates = [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ] + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "datetime": dates, + "value1": np.arange(6, dtype="int64"), + "value2": [1, 2] * 3, + } + ) + df["datetime"] = df["datetime"].apply(lambda d: Timestamp(d, tz="US/Pacific")) + + exp_idx1 = pd.DatetimeIndex( + [ + "2011-07-19 07:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 09:00:00", + ], + tz="US/Pacific", + name="datetime", + ) + exp_idx2 = Index(["a", "b"] * 3, name="label") + exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame( + {"value1": [0, 3, 1, 4, 2, 5], "value2": [1, 2, 2, 1, 1, 2]}, + index=exp_idx, + columns=["value1", "value2"], + ) + + result = df.groupby(["datetime", "label"]).sum() + tm.assert_frame_equal(result, expected) + + # by level + didx = pd.DatetimeIndex(dates, tz="Asia/Tokyo") + df = DataFrame( + {"value1": np.arange(6, dtype="int64"), "value2": [1, 2, 3, 1, 2, 3]}, + index=didx, + ) + + exp_idx = pd.DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + tz="Asia/Tokyo", + ) + expected = DataFrame( + {"value1": [3, 5, 7], "value2": [2, 4, 6]}, + index=exp_idx, + columns=["value1", "value2"], + ) + + result = df.groupby(level=0).sum() + tm.assert_frame_equal(result, expected) + + def test_frame_datetime64_handling_groupby(self): + # it works! + df = DataFrame( + [(3, np.datetime64("2012-07-03")), (3, np.datetime64("2012-07-04"))], + columns=["a", "date"], + ) + result = df.groupby("a").first() + assert result["date"][3] == Timestamp("2012-07-03") + + def test_groupby_multi_timezone(self): + + # combining multiple / different timezones yields UTC + + data = """0,2000-01-28 16:47:00,America/Chicago +1,2000-01-29 16:48:00,America/Chicago +2,2000-01-30 16:49:00,America/Los_Angeles +3,2000-01-31 16:50:00,America/Chicago +4,2000-01-01 16:50:00,America/New_York""" + + df = pd.read_csv(StringIO(data), header=None, names=["value", "date", "tz"]) + result = df.groupby("tz").date.apply( + lambda x: pd.to_datetime(x).dt.tz_localize(x.name) + ) + + expected = Series( + [ + Timestamp("2000-01-28 16:47:00-0600", tz="America/Chicago"), + Timestamp("2000-01-29 16:48:00-0600", tz="America/Chicago"), + Timestamp("2000-01-30 16:49:00-0800", tz="America/Los_Angeles"), + Timestamp("2000-01-31 16:50:00-0600", tz="America/Chicago"), + Timestamp("2000-01-01 16:50:00-0500", tz="America/New_York"), + ], + name="date", + dtype=object, + ) + tm.assert_series_equal(result, expected) + + tz = "America/Chicago" + res_values = df.groupby("tz").date.get_group(tz) + result = pd.to_datetime(res_values).dt.tz_localize(tz) + exp_values = Series( + ["2000-01-28 16:47:00", "2000-01-29 16:48:00", "2000-01-31 16:50:00"], + index=[0, 1, 3], + name="date", + ) + expected = pd.to_datetime(exp_values).dt.tz_localize(tz) + tm.assert_series_equal(result, expected) + + def test_groupby_groups_periods(self): + dates = [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ] + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "period": [pd.Period(d, freq="H") for d in dates], + "value1": np.arange(6, dtype="int64"), + "value2": [1, 2] * 3, + } + ) + + exp_idx1 = pd.PeriodIndex( + [ + "2011-07-19 07:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 09:00:00", + ], + freq="H", + name="period", + ) + exp_idx2 = Index(["a", "b"] * 3, name="label") + exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame( + {"value1": [0, 3, 1, 4, 2, 5], "value2": [1, 2, 2, 1, 1, 2]}, + index=exp_idx, + columns=["value1", "value2"], + ) + + result = df.groupby(["period", "label"]).sum() + tm.assert_frame_equal(result, expected) + + # by level + didx = pd.PeriodIndex(dates, freq="H") + df = DataFrame( + {"value1": np.arange(6, dtype="int64"), "value2": [1, 2, 3, 1, 2, 3]}, + index=didx, + ) + + exp_idx = pd.PeriodIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + freq="H", + ) + expected = DataFrame( + {"value1": [3, 5, 7], "value2": [2, 4, 6]}, + index=exp_idx, + columns=["value1", "value2"], + ) + + result = df.groupby(level=0).sum() + tm.assert_frame_equal(result, expected) + + def test_groupby_first_datetime64(self): + df = DataFrame([(1, 1351036800000000000), (2, 1351036800000000000)]) + df[1] = df[1].view("M8[ns]") + + assert issubclass(df[1].dtype.type, np.datetime64) + + result = df.groupby(level=0).first() + got_dt = result[1].dtype + assert issubclass(got_dt.type, np.datetime64) + + result = df[1].groupby(level=0).first() + got_dt = result.dtype + assert issubclass(got_dt.type, np.datetime64) + + def test_groupby_max_datetime64(self): + # GH 5869 + # datetimelike dtype conversion from int + df = DataFrame(dict(A=Timestamp("20130101"), B=np.arange(5))) + expected = df.groupby("A")["A"].apply(lambda x: x.max()) + result = df.groupby("A")["A"].max() + tm.assert_series_equal(result, expected) + + def test_groupby_datetime64_32_bit(self): + # GH 6410 / numpy 4328 + # 32-bit under 1.9-dev indexing issue + + df = DataFrame({"A": range(2), "B": [pd.Timestamp("2000-01-1")] * 2}) + result = df.groupby("A")["B"].transform(min) + expected = Series([pd.Timestamp("2000-01-1")] * 2, name="B") + tm.assert_series_equal(result, expected) + + def test_groupby_with_timezone_selection(self): + # GH 11616 + # Test that column selection returns output in correct timezone. + np.random.seed(42) + df = pd.DataFrame( + { + "factor": np.random.randint(0, 3, size=60), + "time": pd.date_range( + "01/01/2000 00:00", periods=60, freq="s", tz="UTC" + ), + } + ) + df1 = df.groupby("factor").max()["time"] + df2 = df.groupby("factor")["time"].max() + tm.assert_series_equal(df1, df2) + + def test_timezone_info(self): + # see gh-11682: Timezone info lost when broadcasting + # scalar datetime to DataFrame + + df = pd.DataFrame({"a": [1], "b": [datetime.now(pytz.utc)]}) + assert df["b"][0].tzinfo == pytz.utc + df = pd.DataFrame({"a": [1, 2, 3]}) + df["b"] = datetime.now(pytz.utc) + assert df["b"][0].tzinfo == pytz.utc + + def test_datetime_count(self): + df = DataFrame( + {"a": [1, 2, 3] * 2, "dates": pd.date_range("now", periods=6, freq="T")} + ) + result = df.groupby("a").dates.count() + expected = Series([2, 2, 2], index=Index([1, 2, 3], name="a"), name="dates") + tm.assert_series_equal(result, expected) + + def test_first_last_max_min_on_time_data(self): + # GH 10295 + # Verify that NaT is not in the result of max, min, first and last on + # Dataframe with datetime or timedelta values. + from datetime import timedelta as td + + df_test = DataFrame( + { + "dt": [ + np.nan, + "2015-07-24 10:10", + "2015-07-25 11:11", + "2015-07-23 12:12", + np.nan, + ], + "td": [np.nan, td(days=1), td(days=2), td(days=3), np.nan], + } + ) + df_test.dt = pd.to_datetime(df_test.dt) + df_test["group"] = "A" + df_ref = df_test[df_test.dt.notna()] + + grouped_test = df_test.groupby("group") + grouped_ref = df_ref.groupby("group") + + tm.assert_frame_equal(grouped_ref.max(), grouped_test.max()) + tm.assert_frame_equal(grouped_ref.min(), grouped_test.min()) + tm.assert_frame_equal(grouped_ref.first(), grouped_test.first()) + tm.assert_frame_equal(grouped_ref.last(), grouped_test.last()) + + def test_nunique_with_timegrouper_and_nat(self): + # GH 17575 + test = pd.DataFrame( + { + "time": [ + Timestamp("2016-06-28 09:35:35"), + pd.NaT, + Timestamp("2016-06-28 16:46:28"), + ], + "data": ["1", "2", "3"], + } + ) + + grouper = pd.Grouper(key="time", freq="h") + result = test.groupby(grouper)["data"].nunique() + expected = test[test.time.notnull()].groupby(grouper)["data"].nunique() + tm.assert_series_equal(result, expected) + + def test_scalar_call_versus_list_call(self): + # Issue: 17530 + data_frame = { + "location": ["shanghai", "beijing", "shanghai"], + "time": pd.Series( + ["2017-08-09 13:32:23", "2017-08-11 23:23:15", "2017-08-11 22:23:15"], + dtype="datetime64[ns]", + ), + "value": [1, 2, 3], + } + data_frame = pd.DataFrame(data_frame).set_index("time") + grouper = pd.Grouper(freq="D") + + grouped = data_frame.groupby(grouper) + result = grouped.count() + grouped = data_frame.groupby([grouper]) + expected = grouped.count() + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py new file mode 100644 index 00000000..6c05c403 --- /dev/null +++ b/pandas/tests/groupby/test_transform.py @@ -0,0 +1,1170 @@ +""" test with the .transform """ +from io import StringIO + +import numpy as np +import pytest + +from pandas._libs import groupby + +from pandas.core.dtypes.common import ensure_platform_int, is_timedelta64_dtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + MultiIndex, + Series, + Timestamp, + concat, + date_range, +) +import pandas._testing as tm +from pandas.core.groupby.groupby import DataError + + +def assert_fp_equal(a, b): + assert (np.abs(a - b) < 1e-12).all() + + +def test_transform(): + data = Series(np.arange(9) // 3, index=np.arange(9)) + + index = np.arange(9) + np.random.shuffle(index) + data = data.reindex(index) + + grouped = data.groupby(lambda x: x // 3) + + transformed = grouped.transform(lambda x: x * x.sum()) + assert transformed[7] == 12 + + # GH 8046 + # make sure that we preserve the input order + + df = DataFrame( + np.arange(6, dtype="int64").reshape(3, 2), columns=["a", "b"], index=[0, 2, 1] + ) + key = [0, 0, 1] + expected = ( + df.sort_index() + .groupby(key) + .transform(lambda x: x - x.mean()) + .groupby(key) + .mean() + ) + result = df.groupby(key).transform(lambda x: x - x.mean()).groupby(key).mean() + tm.assert_frame_equal(result, expected) + + def demean(arr): + return arr - arr.mean() + + people = DataFrame( + np.random.randn(5, 5), + columns=["a", "b", "c", "d", "e"], + index=["Joe", "Steve", "Wes", "Jim", "Travis"], + ) + key = ["one", "two", "one", "two", "one"] + result = people.groupby(key).transform(demean).groupby(key).mean() + expected = people.groupby(key).apply(demean).groupby(key).mean() + tm.assert_frame_equal(result, expected) + + # GH 8430 + df = tm.makeTimeDataFrame() + g = df.groupby(pd.Grouper(freq="M")) + g.transform(lambda x: x - 1) + + # GH 9700 + df = DataFrame({"a": range(5, 10), "b": range(5)}) + result = df.groupby("a").transform(max) + expected = DataFrame({"b": range(5)}) + tm.assert_frame_equal(result, expected) + + +def test_transform_fast(): + + df = DataFrame({"id": np.arange(100000) / 3, "val": np.random.randn(100000)}) + + grp = df.groupby("id")["val"] + + values = np.repeat(grp.mean().values, ensure_platform_int(grp.count().values)) + expected = pd.Series(values, index=df.index, name="val") + + result = grp.transform(np.mean) + tm.assert_series_equal(result, expected) + + result = grp.transform("mean") + tm.assert_series_equal(result, expected) + + # GH 12737 + df = pd.DataFrame( + { + "grouping": [0, 1, 1, 3], + "f": [1.1, 2.1, 3.1, 4.5], + "d": pd.date_range("2014-1-1", "2014-1-4"), + "i": [1, 2, 3, 4], + }, + columns=["grouping", "f", "i", "d"], + ) + result = df.groupby("grouping").transform("first") + + dates = [ + pd.Timestamp("2014-1-1"), + pd.Timestamp("2014-1-2"), + pd.Timestamp("2014-1-2"), + pd.Timestamp("2014-1-4"), + ] + expected = pd.DataFrame( + {"f": [1.1, 2.1, 2.1, 4.5], "d": dates, "i": [1, 2, 2, 4]}, + columns=["f", "i", "d"], + ) + tm.assert_frame_equal(result, expected) + + # selection + result = df.groupby("grouping")[["f", "i"]].transform("first") + expected = expected[["f", "i"]] + tm.assert_frame_equal(result, expected) + + # dup columns + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["g", "a", "a"]) + result = df.groupby("g").transform("first") + expected = df.drop("g", axis=1) + tm.assert_frame_equal(result, expected) + + +def test_transform_broadcast(tsframe, ts): + grouped = ts.groupby(lambda x: x.month) + result = grouped.transform(np.mean) + + tm.assert_index_equal(result.index, ts.index) + for _, gp in grouped: + assert_fp_equal(result.reindex(gp.index), gp.mean()) + + grouped = tsframe.groupby(lambda x: x.month) + result = grouped.transform(np.mean) + tm.assert_index_equal(result.index, tsframe.index) + for _, gp in grouped: + agged = gp.mean() + res = result.reindex(gp.index) + for col in tsframe: + assert_fp_equal(res[col], agged[col]) + + # group columns + grouped = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1) + result = grouped.transform(np.mean) + tm.assert_index_equal(result.index, tsframe.index) + tm.assert_index_equal(result.columns, tsframe.columns) + for _, gp in grouped: + agged = gp.mean(1) + res = result.reindex(columns=gp.columns) + for idx in gp.index: + assert_fp_equal(res.xs(idx), agged[idx]) + + +def test_transform_axis(tsframe): + + # make sure that we are setting the axes + # correctly when on axis=0 or 1 + # in the presence of a non-monotonic indexer + # GH12713 + + base = tsframe.iloc[0:5] + r = len(base.index) + c = len(base.columns) + tso = DataFrame( + np.random.randn(r, c), index=base.index, columns=base.columns, dtype="float64" + ) + # monotonic + ts = tso + grouped = ts.groupby(lambda x: x.weekday()) + result = ts - grouped.transform("mean") + expected = grouped.apply(lambda x: x - x.mean()) + tm.assert_frame_equal(result, expected) + + ts = ts.T + grouped = ts.groupby(lambda x: x.weekday(), axis=1) + result = ts - grouped.transform("mean") + expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) + tm.assert_frame_equal(result, expected) + + # non-monotonic + ts = tso.iloc[[1, 0] + list(range(2, len(base)))] + grouped = ts.groupby(lambda x: x.weekday()) + result = ts - grouped.transform("mean") + expected = grouped.apply(lambda x: x - x.mean()) + tm.assert_frame_equal(result, expected) + + ts = ts.T + grouped = ts.groupby(lambda x: x.weekday(), axis=1) + result = ts - grouped.transform("mean") + expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) + tm.assert_frame_equal(result, expected) + + +def test_transform_dtype(): + # GH 9807 + # Check transform dtype output is preserved + df = DataFrame([[1, 3], [2, 3]]) + result = df.groupby(1).transform("mean") + expected = DataFrame([[1.5], [1.5]]) + tm.assert_frame_equal(result, expected) + + +def test_transform_bug(): + # GH 5712 + # transforming on a datetime column + df = DataFrame(dict(A=Timestamp("20130101"), B=np.arange(5))) + result = df.groupby("A")["B"].transform(lambda x: x.rank(ascending=False)) + expected = Series(np.arange(5, 0, step=-1), name="B") + tm.assert_series_equal(result, expected) + + +def test_transform_numeric_to_boolean(): + # GH 16875 + # inconsistency in transforming boolean values + expected = pd.Series([True, True], name="A") + + df = pd.DataFrame({"A": [1.1, 2.2], "B": [1, 2]}) + result = df.groupby("B").A.transform(lambda x: True) + tm.assert_series_equal(result, expected) + + df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}) + result = df.groupby("B").A.transform(lambda x: True) + tm.assert_series_equal(result, expected) + + +def test_transform_datetime_to_timedelta(): + # GH 15429 + # transforming a datetime to timedelta + df = DataFrame(dict(A=Timestamp("20130101"), B=np.arange(5))) + expected = pd.Series([Timestamp("20130101") - Timestamp("20130101")] * 5, name="A") + + # this does date math without changing result type in transform + base_time = df["A"][0] + result = ( + df.groupby("A")["A"].transform(lambda x: x.max() - x.min() + base_time) + - base_time + ) + tm.assert_series_equal(result, expected) + + # this does date math and causes the transform to return timedelta + result = df.groupby("A")["A"].transform(lambda x: x.max() - x.min()) + tm.assert_series_equal(result, expected) + + +def test_transform_datetime_to_numeric(): + # GH 10972 + # convert dt to float + df = DataFrame({"a": 1, "b": date_range("2015-01-01", periods=2, freq="D")}) + result = df.groupby("a").b.transform( + lambda x: x.dt.dayofweek - x.dt.dayofweek.mean() + ) + + expected = Series([-0.5, 0.5], name="b") + tm.assert_series_equal(result, expected) + + # convert dt to int + df = DataFrame({"a": 1, "b": date_range("2015-01-01", periods=2, freq="D")}) + result = df.groupby("a").b.transform( + lambda x: x.dt.dayofweek - x.dt.dayofweek.min() + ) + + expected = Series([0, 1], name="b") + tm.assert_series_equal(result, expected) + + +def test_transform_casting(): + # 13046 + data = """ + idx A ID3 DATETIME + 0 B-028 b76cd912ff "2014-10-08 13:43:27" + 1 B-054 4a57ed0b02 "2014-10-08 14:26:19" + 2 B-076 1a682034f8 "2014-10-08 14:29:01" + 3 B-023 b76cd912ff "2014-10-08 18:39:34" + 4 B-023 f88g8d7sds "2014-10-08 18:40:18" + 5 B-033 b76cd912ff "2014-10-08 18:44:30" + 6 B-032 b76cd912ff "2014-10-08 18:46:00" + 7 B-037 b76cd912ff "2014-10-08 18:52:15" + 8 B-046 db959faf02 "2014-10-08 18:59:59" + 9 B-053 b76cd912ff "2014-10-08 19:17:48" + 10 B-065 b76cd912ff "2014-10-08 19:21:38" + """ + df = pd.read_csv( + StringIO(data), sep=r"\s+", index_col=[0], parse_dates=["DATETIME"] + ) + + result = df.groupby("ID3")["DATETIME"].transform(lambda x: x.diff()) + assert is_timedelta64_dtype(result.dtype) + + result = df[["ID3", "DATETIME"]].groupby("ID3").transform(lambda x: x.diff()) + assert is_timedelta64_dtype(result.DATETIME.dtype) + + +def test_transform_multiple(ts): + grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) + + grouped.transform(lambda x: x * 2) + grouped.transform(np.mean) + + +def test_dispatch_transform(tsframe): + df = tsframe[::5].reindex(tsframe.index) + + grouped = df.groupby(lambda x: x.month) + + filled = grouped.fillna(method="pad") + fillit = lambda x: x.fillna(method="pad") + expected = df.groupby(lambda x: x.month).transform(fillit) + tm.assert_frame_equal(filled, expected) + + +def test_transform_select_columns(df): + f = lambda x: x.mean() + result = df.groupby("A")[["C", "D"]].transform(f) + + selection = df[["C", "D"]] + expected = selection.groupby(df["A"]).transform(f) + + tm.assert_frame_equal(result, expected) + + +def test_transform_exclude_nuisance(df): + + # this also tests orderings in transform between + # series/frame to make sure it's consistent + expected = {} + grouped = df.groupby("A") + expected["C"] = grouped["C"].transform(np.mean) + expected["D"] = grouped["D"].transform(np.mean) + expected = DataFrame(expected) + result = df.groupby("A").transform(np.mean) + + tm.assert_frame_equal(result, expected) + + +def test_transform_function_aliases(df): + result = df.groupby("A").transform("mean") + expected = df.groupby("A").transform(np.mean) + tm.assert_frame_equal(result, expected) + + result = df.groupby("A")["C"].transform("mean") + expected = df.groupby("A")["C"].transform(np.mean) + tm.assert_series_equal(result, expected) + + +def test_series_fast_transform_date(): + # GH 13191 + df = pd.DataFrame( + {"grouping": [np.nan, 1, 1, 3], "d": pd.date_range("2014-1-1", "2014-1-4")} + ) + result = df.groupby("grouping")["d"].transform("first") + dates = [ + pd.NaT, + pd.Timestamp("2014-1-2"), + pd.Timestamp("2014-1-2"), + pd.Timestamp("2014-1-4"), + ] + expected = pd.Series(dates, name="d") + tm.assert_series_equal(result, expected) + + +def test_transform_length(): + # GH 9697 + df = pd.DataFrame({"col1": [1, 1, 2, 2], "col2": [1, 2, 3, np.nan]}) + expected = pd.Series([3.0] * 4) + + def nsum(x): + return np.nansum(x) + + results = [ + df.groupby("col1").transform(sum)["col2"], + df.groupby("col1")["col2"].transform(sum), + df.groupby("col1").transform(nsum)["col2"], + df.groupby("col1")["col2"].transform(nsum), + ] + for result in results: + tm.assert_series_equal(result, expected, check_names=False) + + +def test_transform_coercion(): + + # 14457 + # when we are transforming be sure to not coerce + # via assignment + df = pd.DataFrame(dict(A=["a", "a"], B=[0, 1])) + g = df.groupby("A") + + expected = g.transform(np.mean) + result = g.transform(lambda x: np.mean(x)) + tm.assert_frame_equal(result, expected) + + +def test_groupby_transform_with_int(): + + # GH 3740, make sure that we might upcast on item-by-item transform + + # floats + df = DataFrame( + dict( + A=[1, 1, 1, 2, 2, 2], + B=Series(1, dtype="float64"), + C=Series([1, 2, 3, 1, 2, 3], dtype="float64"), + D="foo", + ) + ) + with np.errstate(all="ignore"): + result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) + expected = DataFrame( + dict(B=np.nan, C=Series([-1, 0, 1, -1, 0, 1], dtype="float64")) + ) + tm.assert_frame_equal(result, expected) + + # int case + df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=[1, 2, 3, 1, 2, 3], D="foo")) + with np.errstate(all="ignore"): + result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) + expected = DataFrame(dict(B=np.nan, C=[-1, 0, 1, -1, 0, 1])) + tm.assert_frame_equal(result, expected) + + # int that needs float conversion + s = Series([2, 3, 4, 10, 5, -1]) + df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=s, D="foo")) + with np.errstate(all="ignore"): + result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) + + s1 = s.iloc[0:3] + s1 = (s1 - s1.mean()) / s1.std() + s2 = s.iloc[3:6] + s2 = (s2 - s2.mean()) / s2.std() + expected = DataFrame(dict(B=np.nan, C=concat([s1, s2]))) + tm.assert_frame_equal(result, expected) + + # int downcasting + result = df.groupby("A").transform(lambda x: x * 2 / 2) + expected = DataFrame(dict(B=1, C=[2, 3, 4, 10, 5, -1])) + tm.assert_frame_equal(result, expected) + + +def test_groupby_transform_with_nan_group(): + # GH 9941 + df = pd.DataFrame({"a": range(10), "b": [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]}) + result = df.groupby(df.b)["a"].transform(max) + expected = pd.Series( + [1.0, 1.0, 2.0, 3.0, np.nan, 6.0, 6.0, 9.0, 9.0, 9.0], name="a" + ) + tm.assert_series_equal(result, expected) + + +def test_transform_mixed_type(): + index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]]) + df = DataFrame( + { + "d": [1.0, 1.0, 1.0, 2.0, 2.0, 2.0], + "c": np.tile(["a", "b", "c"], 2), + "v": np.arange(1.0, 7.0), + }, + index=index, + ) + + def f(group): + group["g"] = group["d"] * 2 + return group[:1] + + grouped = df.groupby("c") + result = grouped.apply(f) + + assert result["d"].dtype == np.float64 + + # this is by definition a mutating operation! + with pd.option_context("mode.chained_assignment", None): + for key, group in grouped: + res = f(group) + tm.assert_frame_equal(res, result.loc[key]) + + +def _check_cython_group_transform_cumulative(pd_op, np_op, dtype): + """ + Check a group transform that executes a cumulative function. + + Parameters + ---------- + pd_op : callable + The pandas cumulative function. + np_op : callable + The analogous one in NumPy. + dtype : type + The specified dtype of the data. + """ + + is_datetimelike = False + + data = np.array([[1], [2], [3], [4]], dtype=dtype) + ans = np.zeros_like(data) + + labels = np.array([0, 0, 0, 0], dtype=np.int64) + ngroups = 1 + pd_op(ans, data, labels, ngroups, is_datetimelike) + + tm.assert_numpy_array_equal(np_op(data), ans[:, 0], check_dtype=False) + + +def test_cython_group_transform_cumsum(any_real_dtype): + # see gh-4095 + dtype = np.dtype(any_real_dtype).type + pd_op, np_op = groupby.group_cumsum, np.cumsum + _check_cython_group_transform_cumulative(pd_op, np_op, dtype) + + +def test_cython_group_transform_cumprod(): + # see gh-4095 + dtype = np.float64 + pd_op, np_op = groupby.group_cumprod_float64, np.cumproduct + _check_cython_group_transform_cumulative(pd_op, np_op, dtype) + + +def test_cython_group_transform_algos(): + # see gh-4095 + is_datetimelike = False + + # with nans + labels = np.array([0, 0, 0, 0, 0], dtype=np.int64) + ngroups = 1 + + data = np.array([[1], [2], [3], [np.nan], [4]], dtype="float64") + actual = np.zeros_like(data) + actual.fill(np.nan) + groupby.group_cumprod_float64(actual, data, labels, ngroups, is_datetimelike) + expected = np.array([1, 2, 6, np.nan, 24], dtype="float64") + tm.assert_numpy_array_equal(actual[:, 0], expected) + + actual = np.zeros_like(data) + actual.fill(np.nan) + groupby.group_cumsum(actual, data, labels, ngroups, is_datetimelike) + expected = np.array([1, 3, 6, np.nan, 10], dtype="float64") + tm.assert_numpy_array_equal(actual[:, 0], expected) + + # timedelta + is_datetimelike = True + data = np.array([np.timedelta64(1, "ns")] * 5, dtype="m8[ns]")[:, None] + actual = np.zeros_like(data, dtype="int64") + groupby.group_cumsum(actual, data.view("int64"), labels, ngroups, is_datetimelike) + expected = np.array( + [ + np.timedelta64(1, "ns"), + np.timedelta64(2, "ns"), + np.timedelta64(3, "ns"), + np.timedelta64(4, "ns"), + np.timedelta64(5, "ns"), + ] + ) + tm.assert_numpy_array_equal(actual[:, 0].view("m8[ns]"), expected) + + +@pytest.mark.parametrize( + "op, args, targop", + [ + ("cumprod", (), lambda x: x.cumprod()), + ("cumsum", (), lambda x: x.cumsum()), + ("shift", (-1,), lambda x: x.shift(-1)), + ("shift", (1,), lambda x: x.shift()), + ], +) +def test_cython_transform_series(op, args, targop): + # GH 4095 + s = Series(np.random.randn(1000)) + s_missing = s.copy() + s_missing.iloc[2:10] = np.nan + labels = np.random.randint(0, 50, size=1000).astype(float) + + # series + for data in [s, s_missing]: + # print(data.head()) + expected = data.groupby(labels).transform(targop) + + tm.assert_series_equal(expected, data.groupby(labels).transform(op, *args)) + tm.assert_series_equal(expected, getattr(data.groupby(labels), op)(*args)) + + +@pytest.mark.parametrize("op", ["cumprod", "cumsum"]) +@pytest.mark.parametrize("skipna", [False, True]) +@pytest.mark.parametrize( + "input, exp", + [ + # When everything is NaN + ({"key": ["b"] * 10, "value": np.nan}, pd.Series([np.nan] * 10, name="value")), + # When there is a single NaN + ( + {"key": ["b"] * 10 + ["a"] * 2, "value": [3] * 3 + [np.nan] + [3] * 8}, + { + ("cumprod", False): [3.0, 9.0, 27.0] + [np.nan] * 7 + [3.0, 9.0], + ("cumprod", True): [ + 3.0, + 9.0, + 27.0, + np.nan, + 81.0, + 243.0, + 729.0, + 2187.0, + 6561.0, + 19683.0, + 3.0, + 9.0, + ], + ("cumsum", False): [3.0, 6.0, 9.0] + [np.nan] * 7 + [3.0, 6.0], + ("cumsum", True): [ + 3.0, + 6.0, + 9.0, + np.nan, + 12.0, + 15.0, + 18.0, + 21.0, + 24.0, + 27.0, + 3.0, + 6.0, + ], + }, + ), + ], +) +def test_groupby_cum_skipna(op, skipna, input, exp): + df = pd.DataFrame(input) + result = df.groupby("key")["value"].transform(op, skipna=skipna) + if isinstance(exp, dict): + expected = exp[(op, skipna)] + else: + expected = exp + expected = pd.Series(expected, name="value") + tm.assert_series_equal(expected, result) + + +@pytest.mark.parametrize( + "op, args, targop", + [ + ("cumprod", (), lambda x: x.cumprod()), + ("cumsum", (), lambda x: x.cumsum()), + ("shift", (-1,), lambda x: x.shift(-1)), + ("shift", (1,), lambda x: x.shift()), + ], +) +def test_cython_transform_frame(op, args, targop): + s = Series(np.random.randn(1000)) + s_missing = s.copy() + s_missing.iloc[2:10] = np.nan + labels = np.random.randint(0, 50, size=1000).astype(float) + strings = list("qwertyuiopasdfghjklz") + strings_missing = strings[:] + strings_missing[5] = np.nan + df = DataFrame( + { + "float": s, + "float_missing": s_missing, + "int": [1, 1, 1, 1, 2] * 200, + "datetime": pd.date_range("1990-1-1", periods=1000), + "timedelta": pd.timedelta_range(1, freq="s", periods=1000), + "string": strings * 50, + "string_missing": strings_missing * 50, + }, + columns=[ + "float", + "float_missing", + "int", + "datetime", + "timedelta", + "string", + "string_missing", + ], + ) + df["cat"] = df["string"].astype("category") + + df2 = df.copy() + df2.index = pd.MultiIndex.from_product([range(100), range(10)]) + + # DataFrame - Single and MultiIndex, + # group by values, index level, columns + for df in [df, df2]: + for gb_target in [ + dict(by=labels), + dict(level=0), + dict(by="string"), + ]: # dict(by='string_missing')]: + # dict(by=['int','string'])]: + + gb = df.groupby(**gb_target) + # whitelisted methods set the selection before applying + # bit a of hack to make sure the cythonized shift + # is equivalent to pre 0.17.1 behavior + if op == "shift": + gb._set_group_selection() + + if op != "shift" and "int" not in gb_target: + # numeric apply fastpath promotes dtype so have + # to apply separately and concat + i = gb[["int"]].apply(targop) + f = gb[["float", "float_missing"]].apply(targop) + expected = pd.concat([f, i], axis=1) + else: + expected = gb.apply(targop) + + expected = expected.sort_index(axis=1) + tm.assert_frame_equal(expected, gb.transform(op, *args).sort_index(axis=1)) + tm.assert_frame_equal(expected, getattr(gb, op)(*args).sort_index(axis=1)) + # individual columns + for c in df: + if c not in ["float", "int", "float_missing"] and op != "shift": + msg = "No numeric types to aggregate" + with pytest.raises(DataError, match=msg): + gb[c].transform(op) + with pytest.raises(DataError, match=msg): + getattr(gb[c], op)() + else: + expected = gb[c].apply(targop) + expected.name = c + tm.assert_series_equal(expected, gb[c].transform(op, *args)) + tm.assert_series_equal(expected, getattr(gb[c], op)(*args)) + + +def test_transform_with_non_scalar_group(): + # GH 10165 + cols = pd.MultiIndex.from_tuples( + [ + ("syn", "A"), + ("mis", "A"), + ("non", "A"), + ("syn", "C"), + ("mis", "C"), + ("non", "C"), + ("syn", "T"), + ("mis", "T"), + ("non", "T"), + ("syn", "G"), + ("mis", "G"), + ("non", "G"), + ] + ) + df = pd.DataFrame( + np.random.randint(1, 10, (4, 12)), columns=cols, index=["A", "C", "G", "T"] + ) + + msg = "transform must return a scalar value for each group.*" + with pytest.raises(ValueError, match=msg): + df.groupby(axis=1, level=1).transform(lambda z: z.div(z.sum(axis=1), axis=0)) + + +@pytest.mark.parametrize( + "cols,exp,comp_func", + [ + ("a", pd.Series([1, 1, 1], name="a"), tm.assert_series_equal), + ( + ["a", "c"], + pd.DataFrame({"a": [1, 1, 1], "c": [1, 1, 1]}), + tm.assert_frame_equal, + ), + ], +) +@pytest.mark.parametrize("agg_func", ["count", "rank", "size"]) +def test_transform_numeric_ret(cols, exp, comp_func, agg_func, request): + if agg_func == "size" and isinstance(cols, list): + # https://github.com/pytest-dev/pytest/issues/6300 + # workaround to xfail fixture/param permutations + reason = "'size' transformation not supported with NDFrameGroupy" + request.node.add_marker(pytest.mark.xfail(reason=reason)) + + # GH 19200 + df = pd.DataFrame( + {"a": pd.date_range("2018-01-01", periods=3), "b": range(3), "c": range(7, 10)} + ) + + result = df.groupby("b")[cols].transform(agg_func) + + if agg_func == "rank": + exp = exp.astype("float") + + comp_func(result, exp) + + +@pytest.mark.parametrize("mix_groupings", [True, False]) +@pytest.mark.parametrize("as_series", [True, False]) +@pytest.mark.parametrize("val1,val2", [("foo", "bar"), (1, 2), (1.0, 2.0)]) +@pytest.mark.parametrize( + "fill_method,limit,exp_vals", + [ + ( + "ffill", + None, + [np.nan, np.nan, "val1", "val1", "val1", "val2", "val2", "val2"], + ), + ("ffill", 1, [np.nan, np.nan, "val1", "val1", np.nan, "val2", "val2", np.nan]), + ( + "bfill", + None, + ["val1", "val1", "val1", "val2", "val2", "val2", np.nan, np.nan], + ), + ("bfill", 1, [np.nan, "val1", "val1", np.nan, "val2", "val2", np.nan, np.nan]), + ], +) +def test_group_fill_methods( + mix_groupings, as_series, val1, val2, fill_method, limit, exp_vals +): + vals = [np.nan, np.nan, val1, np.nan, np.nan, val2, np.nan, np.nan] + _exp_vals = list(exp_vals) + # Overwrite placeholder values + for index, exp_val in enumerate(_exp_vals): + if exp_val == "val1": + _exp_vals[index] = val1 + elif exp_val == "val2": + _exp_vals[index] = val2 + + # Need to modify values and expectations depending on the + # Series / DataFrame that we ultimately want to generate + if mix_groupings: # ['a', 'b', 'a, 'b', ...] + keys = ["a", "b"] * len(vals) + + def interweave(list_obj): + temp = list() + for x in list_obj: + temp.extend([x, x]) + + return temp + + _exp_vals = interweave(_exp_vals) + vals = interweave(vals) + else: # ['a', 'a', 'a', ... 'b', 'b', 'b'] + keys = ["a"] * len(vals) + ["b"] * len(vals) + _exp_vals = _exp_vals * 2 + vals = vals * 2 + + df = DataFrame({"key": keys, "val": vals}) + if as_series: + result = getattr(df.groupby("key")["val"], fill_method)(limit=limit) + exp = Series(_exp_vals, name="val") + tm.assert_series_equal(result, exp) + else: + result = getattr(df.groupby("key"), fill_method)(limit=limit) + exp = DataFrame({"val": _exp_vals}) + tm.assert_frame_equal(result, exp) + + +@pytest.mark.parametrize("fill_method", ["ffill", "bfill"]) +def test_pad_stable_sorting(fill_method): + # GH 21207 + x = [0] * 20 + y = [np.nan] * 10 + [1] * 10 + + if fill_method == "bfill": + y = y[::-1] + + df = pd.DataFrame({"x": x, "y": y}) + expected = df.drop("x", 1) + + result = getattr(df.groupby("x"), fill_method)() + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("test_series", [True, False]) +@pytest.mark.parametrize( + "freq", + [ + None, + pytest.param( + "D", + marks=pytest.mark.xfail( + reason="GH#23918 before method uses freq in vectorized approach" + ), + ), + ], +) +@pytest.mark.parametrize("periods", [1, -1]) +@pytest.mark.parametrize("fill_method", ["ffill", "bfill", None]) +@pytest.mark.parametrize("limit", [None, 1]) +def test_pct_change(test_series, freq, periods, fill_method, limit): + # GH 21200, 21621, 30463 + vals = [3, np.nan, np.nan, np.nan, 1, 2, 4, 10, np.nan, 4] + keys = ["a", "b"] + key_v = np.repeat(keys, len(vals)) + df = DataFrame({"key": key_v, "vals": vals * 2}) + + df_g = df + if fill_method is not None: + df_g = getattr(df.groupby("key"), fill_method)(limit=limit) + grp = df_g.groupby(df.key) + + expected = grp["vals"].obj / grp["vals"].shift(periods) - 1 + + if test_series: + result = df.groupby("key")["vals"].pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq + ) + tm.assert_series_equal(result, expected) + else: + result = df.groupby("key").pct_change( + periods=periods, fill_method=fill_method, limit=limit, freq=freq + ) + tm.assert_frame_equal(result, expected.to_frame("vals")) + + +@pytest.mark.parametrize( + "func, expected_status", + [ + ("ffill", ["shrt", "shrt", "lng", np.nan, "shrt", "ntrl", "ntrl"]), + ("bfill", ["shrt", "lng", "lng", "shrt", "shrt", "ntrl", np.nan]), + ], +) +def test_ffill_bfill_non_unique_multilevel(func, expected_status): + # GH 19437 + date = pd.to_datetime( + [ + "2018-01-01", + "2018-01-01", + "2018-01-01", + "2018-01-01", + "2018-01-02", + "2018-01-01", + "2018-01-02", + ] + ) + symbol = ["MSFT", "MSFT", "MSFT", "AAPL", "AAPL", "TSLA", "TSLA"] + status = ["shrt", np.nan, "lng", np.nan, "shrt", "ntrl", np.nan] + + df = DataFrame({"date": date, "symbol": symbol, "status": status}) + df = df.set_index(["date", "symbol"]) + result = getattr(df.groupby("symbol")["status"], func)() + + index = MultiIndex.from_tuples( + tuples=list(zip(*[date, symbol])), names=["date", "symbol"] + ) + expected = Series(expected_status, index=index, name="status") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", [np.any, np.all]) +def test_any_all_np_func(func): + # GH 20653 + df = pd.DataFrame( + [["foo", True], [np.nan, True], ["foo", True]], columns=["key", "val"] + ) + + exp = pd.Series([True, np.nan, True], name="val") + + res = df.groupby("key")["val"].transform(func) + tm.assert_series_equal(res, exp) + + +def test_groupby_transform_rename(): + # https://github.com/pandas-dev/pandas/issues/23461 + def demean_rename(x): + result = x - x.mean() + + if isinstance(x, pd.Series): + return result + + result = result.rename(columns={c: "{c}_demeaned" for c in result.columns}) + + return result + + df = pd.DataFrame({"group": list("ababa"), "value": [1, 1, 1, 2, 2]}) + expected = pd.DataFrame({"value": [-1.0 / 3, -0.5, -1.0 / 3, 0.5, 2.0 / 3]}) + + result = df.groupby("group").transform(demean_rename) + tm.assert_frame_equal(result, expected) + result_single = df.groupby("group").value.transform(demean_rename) + tm.assert_series_equal(result_single, expected["value"]) + + +@pytest.mark.parametrize("func", [min, max, np.min, np.max, "first", "last"]) +def test_groupby_transform_timezone_column(func): + # GH 24198 + ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore") + result = pd.DataFrame({"end_time": [ts], "id": [1]}) + result["max_end_time"] = result.groupby("id").end_time.transform(func) + expected = pd.DataFrame([[ts, 1, ts]], columns=["end_time", "id", "max_end_time"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "func, values", + [ + ("idxmin", ["1/1/2011"] * 2 + ["1/3/2011"] * 7 + ["1/10/2011"]), + ("idxmax", ["1/2/2011"] * 2 + ["1/9/2011"] * 7 + ["1/10/2011"]), + ], +) +def test_groupby_transform_with_datetimes(func, values): + # GH 15306 + dates = pd.date_range("1/1/2011", periods=10, freq="D") + + stocks = pd.DataFrame({"price": np.arange(10.0)}, index=dates) + stocks["week_id"] = pd.to_datetime(stocks.index).week + + result = stocks.groupby(stocks["week_id"])["price"].transform(func) + + expected = pd.Series(data=pd.to_datetime(values), index=dates, name="price") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["cumsum", "cumprod", "cummin", "cummax"]) +def test_transform_absent_categories(func): + # GH 16771 + # cython transforms with more groups than rows + x_vals = [1] + x_cats = range(2) + y = [1] + df = DataFrame(dict(x=Categorical(x_vals, x_cats), y=y)) + result = getattr(df.y.groupby(df.x), func)() + expected = df.y + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["ffill", "bfill", "shift"]) +@pytest.mark.parametrize("key, val", [("level", 0), ("by", Series([0]))]) +def test_ffill_not_in_axis(func, key, val): + # GH 21521 + df = pd.DataFrame([[np.nan]]) + result = getattr(df.groupby(**{key: val}), func)() + expected = df + + tm.assert_frame_equal(result, expected) + + +def test_transform_invalid_name_raises(): + # GH#27486 + df = DataFrame(dict(a=[0, 1, 1, 2])) + g = df.groupby(["a", "b", "b", "c"]) + with pytest.raises(ValueError, match="not a valid function name"): + g.transform("some_arbitrary_name") + + # method exists on the object, but is not a valid transformation/agg + assert hasattr(g, "aggregate") # make sure the method exists + with pytest.raises(ValueError, match="not a valid function name"): + g.transform("aggregate") + + # Test SeriesGroupBy + g = df["a"].groupby(["a", "b", "b", "c"]) + with pytest.raises(ValueError, match="not a valid function name"): + g.transform("some_arbitrary_name") + + +@pytest.mark.parametrize( + "obj", + [ + DataFrame( + dict(a=[0, 0, 0, 1, 1, 1], b=range(6)), index=["A", "B", "C", "D", "E", "F"] + ), + Series([0, 0, 0, 1, 1, 1], index=["A", "B", "C", "D", "E", "F"]), + ], +) +def test_transform_agg_by_name(reduction_func, obj): + func = reduction_func + g = obj.groupby(np.repeat([0, 1], 3)) + + if func == "ngroup": # GH#27468 + pytest.xfail("TODO: g.transform('ngroup') doesn't work") + if func == "size": # GH#27469 + pytest.xfail("TODO: g.transform('size') doesn't work") + + args = {"nth": [0], "quantile": [0.5]}.get(func, []) + + result = g.transform(func, *args) + + # this is the *definition* of a transformation + tm.assert_index_equal(result.index, obj.index) + if hasattr(obj, "columns"): + tm.assert_index_equal(result.columns, obj.columns) + + # verify that values were broadcasted across each group + assert len(set(DataFrame(result).iloc[-3:, -1])) == 1 + + +def test_transform_lambda_with_datetimetz(): + # GH 27496 + df = DataFrame( + { + "time": [ + Timestamp("2010-07-15 03:14:45"), + Timestamp("2010-11-19 18:47:06"), + ], + "timezone": ["Etc/GMT+4", "US/Eastern"], + } + ) + result = df.groupby(["timezone"])["time"].transform( + lambda x: x.dt.tz_localize(x.name) + ) + expected = Series( + [ + Timestamp("2010-07-15 03:14:45", tz="Etc/GMT+4"), + Timestamp("2010-11-19 18:47:06", tz="US/Eastern"), + ], + name="time", + ) + tm.assert_series_equal(result, expected) + + +def test_transform_fastpath_raises(): + # GH#29631 case where fastpath defined in groupby.generic _choose_path + # raises, but slow_path does not + + df = pd.DataFrame({"A": [1, 1, 2, 2], "B": [1, -1, 1, 2]}) + gb = df.groupby("A") + + def func(grp): + # we want a function such that func(frame) fails but func.apply(frame) + # works + if grp.ndim == 2: + # Ensure that fast_path fails + raise NotImplementedError("Don't cross the streams") + return grp * 2 + + # Check that the fastpath raises, see _transform_general + obj = gb._obj_with_exclusions + gen = gb.grouper.get_iterator(obj, axis=gb.axis) + fast_path, slow_path = gb._define_paths(func) + _, group = next(gen) + + with pytest.raises(NotImplementedError, match="Don't cross the streams"): + fast_path(group) + + result = gb.transform(func) + + expected = pd.DataFrame([2, -2, 2, 4], columns=["B"]) + tm.assert_frame_equal(result, expected) + + +def test_transform_lambda_indexing(): + # GH 7883 + df = pd.DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "flux", "foo", "flux"], + "B": ["one", "one", "two", "three", "two", "six", "five", "three"], + "C": range(8), + "D": range(8), + "E": range(8), + } + ) + df = df.set_index(["A", "B"]) + df = df.sort_index() + result = df.groupby(level="A").transform(lambda x: x.iloc[-1]) + expected = DataFrame( + { + "C": [3, 3, 7, 7, 4, 4, 4, 4], + "D": [3, 3, 7, 7, 4, 4, 4, 4], + "E": [3, 3, 7, 7, 4, 4, 4, 4], + }, + index=MultiIndex.from_tuples( + [ + ("bar", "one"), + ("bar", "three"), + ("flux", "six"), + ("flux", "three"), + ("foo", "five"), + ("foo", "one"), + ("foo", "two"), + ("foo", "two"), + ], + names=["A", "B"], + ), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py new file mode 100644 index 00000000..c86cb453 --- /dev/null +++ b/pandas/tests/groupby/test_value_counts.py @@ -0,0 +1,109 @@ +""" +these are systematically testing all of the args to value_counts +with different size combinations. This is to ensure stability of the sorting +and proper parameter handling +""" + +from itertools import product + +import numpy as np +import pytest + +from pandas import DataFrame, Grouper, MultiIndex, Series, date_range, to_datetime +import pandas._testing as tm + + +# our starting frame +def seed_df(seed_nans, n, m): + np.random.seed(1234) + days = date_range("2015-08-24", periods=10) + + frame = DataFrame( + { + "1st": np.random.choice(list("abcd"), n), + "2nd": np.random.choice(days, n), + "3rd": np.random.randint(1, m + 1, n), + } + ) + + if seed_nans: + frame.loc[1::11, "1st"] = np.nan + frame.loc[3::17, "2nd"] = np.nan + frame.loc[7::19, "3rd"] = np.nan + frame.loc[8::19, "3rd"] = np.nan + frame.loc[9::19, "3rd"] = np.nan + + return frame + + +# create input df, keys, and the bins +binned = [] +ids = [] +for seed_nans in [True, False]: + for n, m in product((100, 1000), (5, 20)): + + df = seed_df(seed_nans, n, m) + bins = None, np.arange(0, max(5, df["3rd"].max()) + 1, 2) + keys = "1st", "2nd", ["1st", "2nd"] + for k, b in product(keys, bins): + binned.append((df, k, b, n, m)) + ids.append(f"{k}-{n}-{m}") + + +@pytest.mark.slow +@pytest.mark.parametrize("df, keys, bins, n, m", binned, ids=ids) +@pytest.mark.parametrize("isort", [True, False]) +@pytest.mark.parametrize("normalize", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("dropna", [True, False]) +def test_series_groupby_value_counts( + df, keys, bins, n, m, isort, normalize, sort, ascending, dropna +): + def rebuild_index(df): + arr = list(map(df.index.get_level_values, range(df.index.nlevels))) + df.index = MultiIndex.from_arrays(arr, names=df.index.names) + return df + + kwargs = dict( + normalize=normalize, sort=sort, ascending=ascending, dropna=dropna, bins=bins + ) + + gr = df.groupby(keys, sort=isort) + left = gr["3rd"].value_counts(**kwargs) + + gr = df.groupby(keys, sort=isort) + right = gr["3rd"].apply(Series.value_counts, **kwargs) + right.index.names = right.index.names[:-1] + ["3rd"] + + # have to sort on index because of unstable sort on values + left, right = map(rebuild_index, (left, right)) # xref GH9212 + tm.assert_series_equal(left.sort_index(), right.sort_index()) + + +def test_series_groupby_value_counts_with_grouper(): + # GH28479 + df = DataFrame( + { + "Timestamp": [ + 1565083561, + 1565083561 + 86400, + 1565083561 + 86500, + 1565083561 + 86400 * 2, + 1565083561 + 86400 * 3, + 1565083561 + 86500 * 3, + 1565083561 + 86400 * 4, + ], + "Food": ["apple", "apple", "banana", "banana", "orange", "orange", "pear"], + } + ).drop([3]) + + df["Datetime"] = to_datetime(df["Timestamp"].apply(lambda t: str(t)), unit="s") + dfg = df.groupby(Grouper(freq="1D", key="Datetime")) + + # have to sort on index because of unstable sort on values xref GH9212 + result = dfg["Food"].value_counts().sort_index() + expected = dfg["Food"].apply(Series.value_counts).sort_index() + expected.index.names = result.index.names + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py new file mode 100644 index 00000000..8e387e92 --- /dev/null +++ b/pandas/tests/groupby/test_whitelist.py @@ -0,0 +1,436 @@ +""" +test methods relating to generic function evaluation +the so-called white/black lists +""" + +from string import ascii_lowercase + +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex, Series, date_range +import pandas._testing as tm +from pandas.core.groupby.base import ( + groupby_other_methods, + reduction_kernels, + transformation_kernels, +) + +AGG_FUNCTIONS = [ + "sum", + "prod", + "min", + "max", + "median", + "mean", + "skew", + "mad", + "std", + "var", + "sem", +] +AGG_FUNCTIONS_WITH_SKIPNA = ["skew", "mad"] + +df_whitelist = [ + "quantile", + "fillna", + "mad", + "take", + "idxmax", + "idxmin", + "tshift", + "skew", + "plot", + "hist", + "dtypes", + "corrwith", + "corr", + "cov", + "diff", +] + + +@pytest.fixture(params=df_whitelist) +def df_whitelist_fixture(request): + return request.param + + +s_whitelist = [ + "quantile", + "fillna", + "mad", + "take", + "idxmax", + "idxmin", + "tshift", + "skew", + "plot", + "hist", + "dtype", + "corr", + "cov", + "diff", + "unique", + "nlargest", + "nsmallest", + "is_monotonic_increasing", + "is_monotonic_decreasing", +] + + +@pytest.fixture(params=s_whitelist) +def s_whitelist_fixture(request): + return request.param + + +@pytest.fixture +def mframe(): + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + return DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) + + +@pytest.fixture +def df(): + return DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + + +@pytest.fixture +def df_letters(): + letters = np.array(list(ascii_lowercase)) + N = 10 + random_letters = letters.take(np.random.randint(0, 26, N)) + df = DataFrame( + { + "floats": N / 10 * Series(np.random.random(N)), + "letters": Series(random_letters), + } + ) + return df + + +@pytest.mark.parametrize("whitelist", [df_whitelist, s_whitelist]) +def test_groupby_whitelist(df_letters, whitelist): + df = df_letters + if whitelist == df_whitelist: + # dataframe + obj = df_letters + else: + obj = df_letters["floats"] + + gb = obj.groupby(df.letters) + + assert set(whitelist) == set(gb._apply_whitelist) + + +def check_whitelist(obj, df, m): + # check the obj for a particular whitelist m + + gb = obj.groupby(df.letters) + + f = getattr(type(gb), m) + + # name + try: + n = f.__name__ + except AttributeError: + return + assert n == m + + # qualname + try: + n = f.__qualname__ + except AttributeError: + return + assert n.endswith(m) + + +def test_groupby_series_whitelist(df_letters, s_whitelist_fixture): + m = s_whitelist_fixture + df = df_letters + check_whitelist(df.letters, df, m) + + +def test_groupby_frame_whitelist(df_letters, df_whitelist_fixture): + m = df_whitelist_fixture + df = df_letters + check_whitelist(df, df, m) + + +@pytest.fixture +def raw_frame(): + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + raw_frame = DataFrame( + np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp") + ) + raw_frame.iloc[1, [1, 2]] = np.nan + raw_frame.iloc[7, [0, 1]] = np.nan + return raw_frame + + +@pytest.mark.parametrize("op", AGG_FUNCTIONS) +@pytest.mark.parametrize("level", [0, 1]) +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna, sort): + # GH6944 + # GH 17537 + # explicitly test the whitelist methods + + if axis == 0: + frame = raw_frame + else: + frame = raw_frame.T + + if op in AGG_FUNCTIONS_WITH_SKIPNA: + grouped = frame.groupby(level=level, axis=axis, sort=sort) + result = getattr(grouped, op)(skipna=skipna) + expected = getattr(frame, op)(level=level, axis=axis, skipna=skipna) + if sort: + expected = expected.sort_index(axis=axis, level=level) + tm.assert_frame_equal(result, expected) + else: + grouped = frame.groupby(level=level, axis=axis, sort=sort) + result = getattr(grouped, op)() + expected = getattr(frame, op)(level=level, axis=axis) + if sort: + expected = expected.sort_index(axis=axis, level=level) + tm.assert_frame_equal(result, expected) + + +def test_groupby_blacklist(df_letters): + df = df_letters + s = df_letters.floats + + blacklist = [ + "eval", + "query", + "abs", + "where", + "mask", + "align", + "groupby", + "clip", + "astype", + "at", + "combine", + "consolidate", + "convert_objects", + ] + to_methods = [method for method in dir(df) if method.startswith("to_")] + + blacklist.extend(to_methods) + + for bl in blacklist: + for obj in (df, s): + gb = obj.groupby(df.letters) + + # e.g., to_csv + defined_but_not_allowed = ( + f"(?:^Cannot.+{repr(bl)}.+'{type(gb).__name__}'.+try " + f"using the 'apply' method$)" + ) + + # e.g., query, eval + not_defined = ( + f"(?:^'{type(gb).__name__}' object has no attribute {repr(bl)}$)" + ) + + msg = f"{defined_but_not_allowed}|{not_defined}" + + with pytest.raises(AttributeError, match=msg): + getattr(gb, bl) + + +def test_tab_completion(mframe): + grp = mframe.groupby(level="second") + results = {v for v in dir(grp) if not v.startswith("_")} + expected = { + "A", + "B", + "C", + "agg", + "aggregate", + "apply", + "boxplot", + "filter", + "first", + "get_group", + "groups", + "hist", + "indices", + "last", + "max", + "mean", + "median", + "min", + "ngroups", + "nth", + "ohlc", + "plot", + "prod", + "size", + "std", + "sum", + "transform", + "var", + "sem", + "count", + "nunique", + "head", + "describe", + "cummax", + "quantile", + "rank", + "cumprod", + "tail", + "resample", + "cummin", + "fillna", + "cumsum", + "cumcount", + "ngroup", + "all", + "shift", + "skew", + "take", + "tshift", + "pct_change", + "any", + "mad", + "corr", + "corrwith", + "cov", + "dtypes", + "ndim", + "diff", + "idxmax", + "idxmin", + "ffill", + "bfill", + "pad", + "backfill", + "rolling", + "expanding", + "pipe", + } + assert results == expected + + +def test_groupby_function_rename(mframe): + grp = mframe.groupby(level="second") + for name in ["sum", "prod", "min", "max", "first", "last"]: + f = getattr(grp, name) + assert f.__name__ == name + + +def test_groupby_selection_with_methods(df): + # some methods which require DatetimeIndex + rng = date_range("2014", periods=len(df)) + df.index = rng + + g = df.groupby(["A"])[["C"]] + g_exp = df[["C"]].groupby(df["A"]) + # TODO check groupby with > 1 col ? + + # methods which are called as .foo() + methods = [ + "count", + "corr", + "cummax", + "cummin", + "cumprod", + "describe", + "rank", + "quantile", + "diff", + "shift", + "all", + "any", + "idxmin", + "idxmax", + "ffill", + "bfill", + "pct_change", + "tshift", + ] + + for m in methods: + res = getattr(g, m)() + exp = getattr(g_exp, m)() + + # should always be frames! + tm.assert_frame_equal(res, exp) + + # methods which aren't just .foo() + tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0)) + tm.assert_frame_equal(g.dtypes, g_exp.dtypes) + tm.assert_frame_equal(g.apply(lambda x: x.sum()), g_exp.apply(lambda x: x.sum())) + + tm.assert_frame_equal(g.resample("D").mean(), g_exp.resample("D").mean()) + tm.assert_frame_equal(g.resample("D").ohlc(), g_exp.resample("D").ohlc()) + + tm.assert_frame_equal( + g.filter(lambda x: len(x) == 3), g_exp.filter(lambda x: len(x) == 3) + ) + + +def test_all_methods_categorized(mframe): + grp = mframe.groupby(mframe.iloc[:, 0]) + names = {_ for _ in dir(grp) if not _.startswith("_")} - set(mframe.columns) + new_names = set(names) + new_names -= reduction_kernels + new_names -= transformation_kernels + new_names -= groupby_other_methods + + assert not (reduction_kernels & transformation_kernels) + assert not (reduction_kernels & groupby_other_methods) + assert not (transformation_kernels & groupby_other_methods) + + # new public method? + if new_names: + msg = f""" +There are uncatgeorized methods defined on the Grouper class: +{names}. + +Was a new method recently added? + +Every public method On Grouper must appear in exactly one the +following three lists defined in pandas.core.groupby.base: +- `reduction_kernels` +- `transformation_kernels` +- `groupby_other_methods` +see the comments in pandas/core/groupby/base.py for guidance on +how to fix this test. + """ + raise AssertionError(msg) + + # removed a public method? + all_categorized = reduction_kernels | transformation_kernels | groupby_other_methods + print(names) + print(all_categorized) + if not (names == all_categorized): + msg = f""" +Some methods which are supposed to be on the Grouper class +are missing: +{all_categorized - names}. + +They're still defined in one of the lists that live in pandas/core/groupby/base.py. +If you removed a method, you should update them +""" + raise AssertionError(msg) diff --git a/pandas/tests/indexes/__init__.py b/pandas/tests/indexes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/categorical/__init__.py b/pandas/tests/indexes/categorical/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py new file mode 100644 index 00000000..d870259c --- /dev/null +++ b/pandas/tests/indexes/categorical/test_category.py @@ -0,0 +1,994 @@ +import numpy as np +import pytest + +import pandas._config.config as cf + +from pandas._libs import index as libindex + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import Categorical, IntervalIndex +import pandas._testing as tm +from pandas.core.indexes.api import CategoricalIndex, Index + +from ..common import Base + + +class TestCategoricalIndex(Base): + _holder = CategoricalIndex + + @pytest.fixture + def indices(self, request): + return tm.makeCategoricalIndex(100) + + def create_index(self, categories=None, ordered=False): + if categories is None: + categories = list("cab") + return CategoricalIndex(list("aabbca"), categories=categories, ordered=ordered) + + def test_can_hold_identifiers(self): + idx = self.create_index(categories=list("abcd")) + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True + + @pytest.mark.parametrize( + "func,op_name", + [ + (lambda idx: idx - idx, "__sub__"), + (lambda idx: idx + idx, "__add__"), + (lambda idx: idx - ["a", "b"], "__sub__"), + (lambda idx: idx + ["a", "b"], "__add__"), + (lambda idx: ["a", "b"] - idx, "__rsub__"), + (lambda idx: ["a", "b"] + idx, "__radd__"), + ], + ) + def test_disallow_addsub_ops(self, func, op_name): + # GH 10039 + # set ops (+/-) raise TypeError + idx = pd.Index(pd.Categorical(["a", "b"])) + msg = f"cannot perform {op_name} with this index type: CategoricalIndex" + with pytest.raises(TypeError, match=msg): + func(idx) + + def test_method_delegation(self): + + ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) + result = ci.set_categories(list("cab")) + tm.assert_index_equal( + result, CategoricalIndex(list("aabbca"), categories=list("cab")) + ) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + result = ci.rename_categories(list("efg")) + tm.assert_index_equal( + result, CategoricalIndex(list("ffggef"), categories=list("efg")) + ) + + # GH18862 (let rename_categories take callables) + result = ci.rename_categories(lambda x: x.upper()) + tm.assert_index_equal( + result, CategoricalIndex(list("AABBCA"), categories=list("CAB")) + ) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + result = ci.add_categories(["d"]) + tm.assert_index_equal( + result, CategoricalIndex(list("aabbca"), categories=list("cabd")) + ) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab")) + result = ci.remove_categories(["c"]) + tm.assert_index_equal( + result, + CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")), + ) + + ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) + result = ci.as_unordered() + tm.assert_index_equal(result, ci) + + ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) + result = ci.as_ordered() + tm.assert_index_equal( + result, + CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True), + ) + + # invalid + msg = "cannot use inplace with CategoricalIndex" + with pytest.raises(ValueError, match=msg): + ci.set_categories(list("cab"), inplace=True) + + def test_contains(self): + + ci = self.create_index(categories=list("cabdef")) + + assert "a" in ci + assert "z" not in ci + assert "e" not in ci + assert np.nan not in ci + + # assert codes NOT in index + assert 0 not in ci + assert 1 not in ci + + ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef")) + assert np.nan in ci + + @pytest.mark.parametrize( + "item, expected", + [ + (pd.Interval(0, 1), True), + (1.5, True), + (pd.Interval(0.5, 1.5), False), + ("a", False), + (pd.Timestamp(1), False), + (pd.Timedelta(1), False), + ], + ids=str, + ) + def test_contains_interval(self, item, expected): + # GH 23705 + ci = CategoricalIndex(IntervalIndex.from_breaks(range(3))) + result = item in ci + assert result is expected + + def test_contains_list(self): + # GH#21729 + idx = pd.CategoricalIndex([1, 2, 3]) + + assert "a" not in idx + + with pytest.raises(TypeError, match="unhashable type"): + ["a"] in idx + + with pytest.raises(TypeError, match="unhashable type"): + ["a", "b"] in idx + + def test_map(self): + ci = pd.CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) + result = ci.map(lambda x: x.lower()) + exp = pd.CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_index_equal(result, exp) + + ci = pd.CategoricalIndex( + list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" + ) + result = ci.map(lambda x: x.lower()) + exp = pd.CategoricalIndex( + list("ababc"), categories=list("bac"), ordered=False, name="XXX" + ) + tm.assert_index_equal(result, exp) + + # GH 12766: Return an index not an array + tm.assert_index_equal( + ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") + ) + + # change categories dtype + ci = pd.CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) + + def f(x): + return {"A": 10, "B": 20, "C": 30}.get(x) + + result = ci.map(f) + exp = pd.CategoricalIndex( + [10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False + ) + tm.assert_index_equal(result, exp) + + result = ci.map(pd.Series([10, 20, 30], index=["A", "B", "C"])) + tm.assert_index_equal(result, exp) + + result = ci.map({"A": 10, "B": 20, "C": 30}) + tm.assert_index_equal(result, exp) + + def test_map_with_categorical_series(self): + # GH 12756 + a = pd.Index([1, 2, 3, 4]) + b = pd.Series(["even", "odd", "even", "odd"], dtype="category") + c = pd.Series(["even", "odd", "even", "odd"]) + + exp = CategoricalIndex(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(b), exp) + exp = pd.Index(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(c), exp) + + @pytest.mark.parametrize( + ("data", "f"), + ( + ([1, 1, np.nan], pd.isna), + ([1, 2, np.nan], pd.isna), + ([1, 1, np.nan], {1: False}), + ([1, 2, np.nan], {1: False, 2: False}), + ([1, 1, np.nan], pd.Series([False, False])), + ([1, 2, np.nan], pd.Series([False, False, False])), + ), + ) + def test_map_with_nan(self, data, f): # GH 24241 + values = pd.Categorical(data) + result = values.map(f) + if data[1] == 1: + expected = pd.Categorical([False, False, np.nan]) + tm.assert_categorical_equal(result, expected) + else: + expected = pd.Index([False, False, np.nan]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series]) + def test_where(self, klass): + i = self.create_index() + cond = [True] * len(i) + expected = i + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + + cond = [False] + [True] * (len(i) - 1) + expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories) + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + + def test_append(self): + + ci = self.create_index() + categories = ci.categories + + # append cats with the same categories + result = ci[:3].append(ci[3:]) + tm.assert_index_equal(result, ci, exact=True) + + foos = [ci[:1], ci[1:3], ci[3:]] + result = foos[0].append(foos[1:]) + tm.assert_index_equal(result, ci, exact=True) + + # empty + result = ci.append([]) + tm.assert_index_equal(result, ci, exact=True) + + # appending with different categories or reordered is not ok + msg = "all inputs must be Index" + with pytest.raises(TypeError, match=msg): + ci.append(ci.values.set_categories(list("abcd"))) + with pytest.raises(TypeError, match=msg): + ci.append(ci.values.reorder_categories(list("abc"))) + + # with objects + result = ci.append(Index(["c", "a"])) + expected = CategoricalIndex(list("aabbcaca"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # invalid objects + msg = "cannot append a non-category item to a CategoricalIndex" + with pytest.raises(TypeError, match=msg): + ci.append(Index(["a", "d"])) + + # GH14298 - if base object is not categorical -> coerce to object + result = Index(["c", "a"]).append(ci) + expected = Index(list("caaabbca")) + tm.assert_index_equal(result, expected, exact=True) + + def test_append_to_another(self): + # hits Index._concat_same_dtype + fst = Index(["a", "b"]) + snd = CategoricalIndex(["d", "e"]) + result = fst.append(snd) + expected = Index(["a", "b", "d", "e"]) + tm.assert_index_equal(result, expected) + + def test_insert(self): + + ci = self.create_index() + categories = ci.categories + + # test 0th element + result = ci.insert(0, "a") + expected = CategoricalIndex(list("aaabbca"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # test Nth element that follows Python list behavior + result = ci.insert(-1, "a") + expected = CategoricalIndex(list("aabbcaa"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # test empty + result = CategoricalIndex(categories=categories).insert(0, "a") + expected = CategoricalIndex(["a"], categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + # invalid + msg = ( + "cannot insert an item into a CategoricalIndex that is not " + "already an existing category" + ) + with pytest.raises(TypeError, match=msg): + ci.insert(0, "d") + + # GH 18295 (test missing) + expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"]) + for na in (np.nan, pd.NaT, None): + result = CategoricalIndex(list("aabcb")).insert(1, na) + tm.assert_index_equal(result, expected) + + def test_delete(self): + + ci = self.create_index() + categories = ci.categories + + result = ci.delete(0) + expected = CategoricalIndex(list("abbca"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + result = ci.delete(-1) + expected = CategoricalIndex(list("aabbc"), categories=categories) + tm.assert_index_equal(result, expected, exact=True) + + with pytest.raises((IndexError, ValueError)): + # Either depending on NumPy version + ci.delete(10) + + def test_astype(self): + + ci = self.create_index() + result = ci.astype(object) + tm.assert_index_equal(result, Index(np.array(ci))) + + # this IS equal, but not the same class + assert result.equals(ci) + assert isinstance(result, Index) + assert not isinstance(result, CategoricalIndex) + + # interval + ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right") + + ci = CategoricalIndex( + Categorical.from_codes([0, 1, -1], categories=ii, ordered=True) + ) + + result = ci.astype("interval") + expected = ii.take([0, 1, -1]) + tm.assert_index_equal(result, expected) + + result = IntervalIndex(result.values) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("name", [None, "foo"]) + @pytest.mark.parametrize("dtype_ordered", [True, False]) + @pytest.mark.parametrize("index_ordered", [True, False]) + def test_astype_category(self, name, dtype_ordered, index_ordered): + # GH 18630 + index = self.create_index(ordered=index_ordered) + if name: + index = index.rename(name) + + # standard categories + dtype = CategoricalDtype(ordered=dtype_ordered) + result = index.astype(dtype) + expected = CategoricalIndex( + index.tolist(), + name=name, + categories=index.categories, + ordered=dtype_ordered, + ) + tm.assert_index_equal(result, expected) + + # non-standard categories + dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered) + result = index.astype(dtype) + expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype) + tm.assert_index_equal(result, expected) + + if dtype_ordered is False: + # dtype='category' can't specify ordered, so only test once + result = index.astype("category") + expected = index + tm.assert_index_equal(result, expected) + + def test_reindex_base(self): + # Determined by cat ordering. + idx = CategoricalIndex(list("cab"), categories=list("cab")) + expected = np.arange(len(idx), dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with pytest.raises(ValueError, match="Invalid fill method"): + idx.get_indexer(idx, method="invalid") + + def test_reindexing(self): + np.random.seed(123456789) + + ci = self.create_index() + oidx = Index(np.array(ci)) + + for n in [1, 2, 5, len(ci)]: + finder = oidx[np.random.randint(0, len(ci), size=n)] + expected = oidx.get_indexer_non_unique(finder)[0] + + actual = ci.get_indexer(finder) + tm.assert_numpy_array_equal(expected, actual) + + # see gh-17323 + # + # Even when indexer is equal to the + # members in the index, we should + # respect duplicates instead of taking + # the fast-track path. + for finder in [list("aabbca"), list("aababca")]: + expected = oidx.get_indexer_non_unique(finder)[0] + + actual = ci.get_indexer(finder) + tm.assert_numpy_array_equal(expected, actual) + + def test_reindex_dtype(self): + c = CategoricalIndex(["a", "b", "c", "a"]) + res, indexer = c.reindex(["a", "c"]) + tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + c = CategoricalIndex(["a", "b", "c", "a"]) + res, indexer = c.reindex(Categorical(["a", "c"])) + + exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + res, indexer = c.reindex(["a", "c"]) + exp = Index(["a", "a", "c"], dtype="object") + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + res, indexer = c.reindex(Categorical(["a", "c"])) + exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + def test_reindex_duplicate_target(self): + # See GH25459 + cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"]) + res, indexer = cat.reindex(["a", "c", "c"]) + exp = Index(["a", "c", "c"], dtype="object") + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) + + res, indexer = cat.reindex( + CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) + ) + exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) + + def test_reindex_empty_index(self): + # See GH16770 + c = CategoricalIndex([]) + res, indexer = c.reindex(["a", "b"]) + tm.assert_index_equal(res, Index(["a", "b"]), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp)) + + @pytest.mark.parametrize( + "data, non_lexsorted_data", + [[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]], + ) + def test_is_monotonic(self, data, non_lexsorted_data): + c = CategoricalIndex(data) + assert c.is_monotonic_increasing is True + assert c.is_monotonic_decreasing is False + + c = CategoricalIndex(data, ordered=True) + assert c.is_monotonic_increasing is True + assert c.is_monotonic_decreasing is False + + c = CategoricalIndex(data, categories=reversed(data)) + assert c.is_monotonic_increasing is False + assert c.is_monotonic_decreasing is True + + c = CategoricalIndex(data, categories=reversed(data), ordered=True) + assert c.is_monotonic_increasing is False + assert c.is_monotonic_decreasing is True + + # test when data is neither monotonic increasing nor decreasing + reordered_data = [data[0], data[2], data[1]] + c = CategoricalIndex(reordered_data, categories=reversed(data)) + assert c.is_monotonic_increasing is False + assert c.is_monotonic_decreasing is False + + # non lexsorted categories + categories = non_lexsorted_data + + c = CategoricalIndex(categories[:2], categories=categories) + assert c.is_monotonic_increasing is True + assert c.is_monotonic_decreasing is False + + c = CategoricalIndex(categories[1:3], categories=categories) + assert c.is_monotonic_increasing is True + assert c.is_monotonic_decreasing is False + + def test_has_duplicates(self): + + idx = CategoricalIndex([0, 0, 0], name="foo") + assert idx.is_unique is False + assert idx.has_duplicates is True + + def test_drop_duplicates(self): + + idx = CategoricalIndex([0, 0, 0], name="foo") + expected = CategoricalIndex([0], name="foo") + tm.assert_index_equal(idx.drop_duplicates(), expected) + tm.assert_index_equal(idx.unique(), expected) + + def test_get_indexer(self): + + idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) + idx2 = CategoricalIndex(list("abf")) + + for indexer in [idx2, list("abf"), Index(list("abf"))]: + r1 = idx1.get_indexer(idx2) + tm.assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) + + msg = ( + "method='pad' and method='backfill' not implemented yet for " + "CategoricalIndex" + ) + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="pad") + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="backfill") + + msg = "method='nearest' not implemented yet for CategoricalIndex" + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="nearest") + + def test_get_loc(self): + # GH 12531 + cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc")) + idx1 = Index(list("abcde")) + assert cidx1.get_loc("a") == idx1.get_loc("a") + assert cidx1.get_loc("e") == idx1.get_loc("e") + + for i in [cidx1, idx1]: + with pytest.raises(KeyError, match="'NOT-EXIST'"): + i.get_loc("NOT-EXIST") + + # non-unique + cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc")) + idx2 = Index(list("aacded")) + + # results in bool array + res = cidx2.get_loc("d") + tm.assert_numpy_array_equal(res, idx2.get_loc("d")) + tm.assert_numpy_array_equal( + res, np.array([False, False, False, True, False, True]) + ) + # unique element results in scalar + res = cidx2.get_loc("e") + assert res == idx2.get_loc("e") + assert res == 4 + + for i in [cidx2, idx2]: + with pytest.raises(KeyError, match="'NOT-EXIST'"): + i.get_loc("NOT-EXIST") + + # non-unique, sliceable + cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc")) + idx3 = Index(list("aabbb")) + + # results in slice + res = cidx3.get_loc("a") + assert res == idx3.get_loc("a") + assert res == slice(0, 2, None) + + res = cidx3.get_loc("b") + assert res == idx3.get_loc("b") + assert res == slice(2, 5, None) + + for i in [cidx3, idx3]: + with pytest.raises(KeyError, match="'c'"): + i.get_loc("c") + + def test_repr_roundtrip(self): + + ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) + str(ci) + tm.assert_index_equal(eval(repr(ci)), ci, exact=True) + + # formatting + str(ci) + + # long format + # this is not reprable + ci = CategoricalIndex(np.random.randint(0, 5, size=100)) + str(ci) + + def test_isin(self): + + ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) + tm.assert_numpy_array_equal( + ci.isin(["c"]), np.array([False, False, False, True, False, False]) + ) + tm.assert_numpy_array_equal( + ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False]) + ) + tm.assert_numpy_array_equal( + ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6) + ) + + # mismatched categorical -> coerced to ndarray so doesn't matter + result = ci.isin(ci.set_categories(list("abcdefghi"))) + expected = np.array([True] * 6) + tm.assert_numpy_array_equal(result, expected) + + result = ci.isin(ci.set_categories(list("defghi"))) + expected = np.array([False] * 5 + [True]) + tm.assert_numpy_array_equal(result, expected) + + def test_identical(self): + + ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) + ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True) + assert ci1.identical(ci1) + assert ci1.identical(ci1.copy()) + assert not ci1.identical(ci2) + + def test_ensure_copied_data(self, indices): + # gh-12309: Check the "copy" argument of each + # Index.__new__ is honored. + # + # Must be tested separately from other indexes because + # self.values is not an ndarray. + # GH#29918 Index.base has been removed + # FIXME: is this test still meaningful? + _base = lambda ar: ar if getattr(ar, "base", None) is None else ar.base + + result = CategoricalIndex(indices.values, copy=True) + tm.assert_index_equal(indices, result) + assert _base(indices.values) is not _base(result.values) + + result = CategoricalIndex(indices.values, copy=False) + assert _base(indices.values) is _base(result.values) + + def test_equals_categorical(self): + ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) + ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True) + + assert ci1.equals(ci1) + assert not ci1.equals(ci2) + assert ci1.equals(ci1.astype(object)) + assert ci1.astype(object).equals(ci1) + + assert (ci1 == ci1).all() + assert not (ci1 != ci1).all() + assert not (ci1 > ci1).all() + assert not (ci1 < ci1).all() + assert (ci1 <= ci1).all() + assert (ci1 >= ci1).all() + + assert not (ci1 == 1).all() + assert (ci1 == Index(["a", "b"])).all() + assert (ci1 == ci1.values).all() + + # invalid comparisons + with pytest.raises(ValueError, match="Lengths must match"): + ci1 == Index(["a", "b", "c"]) + + msg = ( + "categorical index comparisons must have the same categories " + "and ordered attributes" + "|" + "Categoricals can only be compared if 'categories' are the same. " + "Categories are different lengths" + "|" + "Categoricals can only be compared if 'ordered' is the same" + ) + with pytest.raises(TypeError, match=msg): + ci1 == ci2 + with pytest.raises(TypeError, match=msg): + ci1 == Categorical(ci1.values, ordered=False) + with pytest.raises(TypeError, match=msg): + ci1 == Categorical(ci1.values, categories=list("abc")) + + # tests + # make sure that we are testing for category inclusion properly + ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"]) + assert not ci.equals(list("aabca")) + # Same categories, but different order + # Unordered + assert ci.equals(CategoricalIndex(list("aabca"))) + # Ordered + assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True)) + assert ci.equals(ci.copy()) + + ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) + assert not ci.equals(list("aabca")) + assert not ci.equals(CategoricalIndex(list("aabca"))) + assert ci.equals(ci.copy()) + + ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"]) + assert not ci.equals(list("aabca") + [np.nan]) + assert ci.equals(CategoricalIndex(list("aabca") + [np.nan])) + assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True)) + assert ci.equals(ci.copy()) + + def test_equals_categoridcal_unordered(self): + # https://github.com/pandas-dev/pandas/issues/16603 + a = pd.CategoricalIndex(["A"], categories=["A", "B"]) + b = pd.CategoricalIndex(["A"], categories=["B", "A"]) + c = pd.CategoricalIndex(["C"], categories=["B", "A"]) + assert a.equals(b) + assert not a.equals(c) + assert not b.equals(c) + + def test_frame_repr(self): + df = pd.DataFrame({"A": [1, 2, 3]}, index=pd.CategoricalIndex(["a", "b", "c"])) + result = repr(df) + expected = " A\na 1\nb 2\nc 3" + assert result == expected + + def test_string_categorical_index_repr(self): + # short + idx = pd.CategoricalIndex(["a", "bb", "ccc"]) + expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa + assert repr(idx) == expected + + # multiple lines + idx = pd.CategoricalIndex(["a", "bb", "ccc"] * 10) + expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', + 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', + 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa + + assert repr(idx) == expected + + # truncated + idx = pd.CategoricalIndex(["a", "bb", "ccc"] * 100) + expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', + ... + 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], + categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa + + assert repr(idx) == expected + + # larger categories + idx = pd.CategoricalIndex(list("abcdefghijklmmo")) + expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', + 'm', 'm', 'o'], + categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" # noqa + + assert repr(idx) == expected + + # short + idx = pd.CategoricalIndex(["あ", "いい", "ううう"]) + expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa + assert repr(idx) == expected + + # multiple lines + idx = pd.CategoricalIndex(["あ", "いい", "ううう"] * 10) + expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', + 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa + + assert repr(idx) == expected + + # truncated + idx = pd.CategoricalIndex(["あ", "いい", "ううう"] * 100) + expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', + ... + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa + + assert repr(idx) == expected + + # larger categories + idx = pd.CategoricalIndex(list("あいうえおかきくけこさしすせそ")) + expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', + 'す', 'せ', 'そ'], + categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa + + assert repr(idx) == expected + + # Emable Unicode option ----------------------------------------- + with cf.option_context("display.unicode.east_asian_width", True): + + # short + idx = pd.CategoricalIndex(["あ", "いい", "ううう"]) + expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa + assert repr(idx) == expected + + # multiple lines + idx = pd.CategoricalIndex(["あ", "いい", "ううう"] * 10) + expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa + + assert repr(idx) == expected + + # truncated + idx = pd.CategoricalIndex(["あ", "いい", "ううう"] * 100) + expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', + 'ううう', 'あ', + ... + 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', + 'あ', 'いい', 'ううう'], + categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa + + assert repr(idx) == expected + + # larger categories + idx = pd.CategoricalIndex(list("あいうえおかきくけこさしすせそ")) + expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', + 'さ', 'し', 'す', 'せ', 'そ'], + categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa + + assert repr(idx) == expected + + def test_fillna_categorical(self): + # GH 11343 + idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x") + # fill by value in categories + exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x") + tm.assert_index_equal(idx.fillna(1.0), exp) + + # fill by value not in categories raises ValueError + msg = "fill value must be in categories" + with pytest.raises(ValueError, match=msg): + idx.fillna(2.0) + + def test_take_fill_value(self): + # GH 12631 + + # numeric category + idx = pd.CategoricalIndex([1, 2, 3], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = pd.CategoricalIndex([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.CategoricalIndex([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # object category + idx = pd.CategoricalIndex( + list("CBA"), categories=list("ABC"), ordered=True, name="xxx" + ) + result = idx.take(np.array([1, 0, -1])) + expected = pd.CategoricalIndex( + list("BCA"), categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.CategoricalIndex( + ["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.CategoricalIndex( + list("BCA"), categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + def test_take_fill_value_datetime(self): + + # datetime category + idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx") + idx = pd.CategoricalIndex(idx) + result = idx.take(np.array([1, 0, -1])) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" + ) + expected = pd.CategoricalIndex(expected) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx") + exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"]) + expected = pd.CategoricalIndex(expected, categories=exp_cats) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" + ) + expected = pd.CategoricalIndex(expected) + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + def test_take_invalid_kwargs(self): + idx = pd.CategoricalIndex([1, 2, 3], name="foo") + indices = [1, 0, -1] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + @pytest.mark.parametrize( + "dtype, engine_type", + [ + (np.int8, libindex.Int8Engine), + (np.int16, libindex.Int16Engine), + (np.int32, libindex.Int32Engine), + (np.int64, libindex.Int64Engine), + ], + ) + def test_engine_type(self, dtype, engine_type): + if dtype != np.int64: + # num. of uniques required to push CategoricalIndex.codes to a + # dtype (128 categories required for .codes dtype to be int16 etc.) + num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype] + ci = pd.CategoricalIndex(range(num_uniques)) + else: + # having 2**32 - 2**31 categories would be very memory-intensive, + # so we cheat a bit with the dtype + ci = pd.CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1) + ci.values._codes = ci.values._codes.astype("int64") + assert np.issubdtype(ci.codes.dtype, dtype) + assert isinstance(ci._engine, engine_type) + + @pytest.mark.parametrize( + "data, categories", + [ + (list("abcbca"), list("cab")), + (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), + ], + ids=["string", "interval"], + ) + def test_map_str(self, data, categories, ordered_fixture): + # GH 31202 - override base class since we want to maintain categorical/ordered + index = CategoricalIndex(data, categories=categories, ordered=ordered_fixture) + result = index.map(str) + expected = CategoricalIndex( + map(str, data), categories=map(str, categories), ordered=ordered_fixture + ) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/categorical/test_constructors.py b/pandas/tests/indexes/categorical/test_constructors.py new file mode 100644 index 00000000..1df0874e --- /dev/null +++ b/pandas/tests/indexes/categorical/test_constructors.py @@ -0,0 +1,147 @@ +import numpy as np +import pytest + +from pandas import Categorical, CategoricalDtype, CategoricalIndex, Index +import pandas._testing as tm + + +class TestCategoricalIndexConstructors: + def test_construction(self): + + ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False) + categories = ci.categories + + result = Index(ci) + tm.assert_index_equal(result, ci, exact=True) + assert not result.ordered + + result = Index(ci.values) + tm.assert_index_equal(result, ci, exact=True) + assert not result.ordered + + # empty + result = CategoricalIndex(categories=categories) + tm.assert_index_equal(result.categories, Index(categories)) + tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8")) + assert not result.ordered + + # passing categories + result = CategoricalIndex(list("aabbca"), categories=categories) + tm.assert_index_equal(result.categories, Index(categories)) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") + ) + + c = Categorical(list("aabbca")) + result = CategoricalIndex(c) + tm.assert_index_equal(result.categories, Index(list("abc"))) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") + ) + assert not result.ordered + + result = CategoricalIndex(c, categories=categories) + tm.assert_index_equal(result.categories, Index(categories)) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") + ) + assert not result.ordered + + ci = CategoricalIndex(c, categories=list("abcd")) + result = CategoricalIndex(ci) + tm.assert_index_equal(result.categories, Index(categories)) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8") + ) + assert not result.ordered + + result = CategoricalIndex(ci, categories=list("ab")) + tm.assert_index_equal(result.categories, Index(list("ab"))) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8") + ) + assert not result.ordered + + result = CategoricalIndex(ci, categories=list("ab"), ordered=True) + tm.assert_index_equal(result.categories, Index(list("ab"))) + tm.assert_numpy_array_equal( + result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8") + ) + assert result.ordered + + result = CategoricalIndex(ci, categories=list("ab"), ordered=True) + expected = CategoricalIndex( + ci, categories=list("ab"), ordered=True, dtype="category" + ) + tm.assert_index_equal(result, expected, exact=True) + + # turn me to an Index + result = Index(np.array(ci)) + assert isinstance(result, Index) + assert not isinstance(result, CategoricalIndex) + + def test_construction_with_dtype(self): + + # specify dtype + ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False) + + result = Index(np.array(ci), dtype="category") + tm.assert_index_equal(result, ci, exact=True) + + result = Index(np.array(ci).tolist(), dtype="category") + tm.assert_index_equal(result, ci, exact=True) + + # these are generally only equal when the categories are reordered + ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + + result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories) + tm.assert_index_equal(result, ci, exact=True) + + # make sure indexes are handled + expected = CategoricalIndex([0, 1, 2], categories=[0, 1, 2], ordered=True) + idx = Index(range(3)) + result = CategoricalIndex(idx, categories=idx, ordered=True) + tm.assert_index_equal(result, expected, exact=True) + + def test_construction_empty_with_bool_categories(self): + # see GH#22702 + cat = CategoricalIndex([], categories=[True, False]) + categories = sorted(cat.categories.tolist()) + assert categories == [False, True] + + def test_construction_with_categorical_dtype(self): + # construction with CategoricalDtype + # GH#18109 + data, cats, ordered = "a a b b".split(), "c b a".split(), True + dtype = CategoricalDtype(categories=cats, ordered=ordered) + + result = CategoricalIndex(data, dtype=dtype) + expected = CategoricalIndex(data, categories=cats, ordered=ordered) + tm.assert_index_equal(result, expected, exact=True) + + # GH#19032 + result = Index(data, dtype=dtype) + tm.assert_index_equal(result, expected, exact=True) + + # error when combining categories/ordered and dtype kwargs + msg = "Cannot specify `categories` or `ordered` together with `dtype`." + with pytest.raises(ValueError, match=msg): + CategoricalIndex(data, categories=cats, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + Index(data, categories=cats, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + CategoricalIndex(data, ordered=ordered, dtype=dtype) + + with pytest.raises(ValueError, match=msg): + Index(data, ordered=ordered, dtype=dtype) + + def test_create_categorical(self): + # GH#17513 The public CI constructor doesn't hit this code path with + # instances of CategoricalIndex, but we still want to test the code + ci = CategoricalIndex(["a", "b", "c"]) + # First ci is self, second ci is data. + result = CategoricalIndex._create_categorical(ci, ci) + expected = Categorical(["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py new file mode 100644 index 00000000..cbffb9d3 --- /dev/null +++ b/pandas/tests/indexes/common.py @@ -0,0 +1,892 @@ +import gc +from typing import Optional, Type + +import numpy as np +import pytest + +from pandas._libs.tslib import iNaT + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + CategoricalIndex, + DatetimeIndex, + Index, + Int64Index, + IntervalIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, + UInt64Index, + isna, +) +import pandas._testing as tm +from pandas.core.indexes.base import InvalidIndexError +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin + + +class Base: + """ base class for index sub-class tests """ + + _holder: Optional[Type[Index]] = None + _compat_props = ["shape", "ndim", "size", "nbytes"] + + def test_pickle_compat_construction(self): + # need an object to create with + msg = ( + r"Index\(\.\.\.\) must be called with a collection of some" + r" kind, None was passed|" + r"__new__\(\) missing 1 required positional argument: 'data'|" + r"__new__\(\) takes at least 2 arguments \(1 given\)" + ) + with pytest.raises(TypeError, match=msg): + self._holder() + + def test_to_series(self): + # assert that we are creating a copy of the index + + idx = self.create_index() + s = idx.to_series() + assert s.values is not idx.values + assert s.index is not idx + assert s.name == idx.name + + def test_to_series_with_arguments(self): + # GH18699 + + # index kwarg + idx = self.create_index() + s = idx.to_series(index=idx) + + assert s.values is not idx.values + assert s.index is idx + assert s.name == idx.name + + # name kwarg + idx = self.create_index() + s = idx.to_series(name="__test") + + assert s.values is not idx.values + assert s.index is not idx + assert s.name != idx.name + + @pytest.mark.parametrize("name", [None, "new_name"]) + def test_to_frame(self, name): + # see GH-15230, GH-22580 + idx = self.create_index() + + if name: + idx_name = name + else: + idx_name = idx.name or 0 + + df = idx.to_frame(name=idx_name) + + assert df.index is idx + assert len(df.columns) == 1 + assert df.columns[0] == idx_name + assert df[idx_name].values is not idx.values + + df = idx.to_frame(index=False, name=idx_name) + assert df.index is not idx + + def test_shift(self): + + # GH8083 test the base class for shift + idx = self.create_index() + msg = "Not supported for type {}".format(type(idx).__name__) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1, 2) + + def test_constructor_name_unhashable(self): + # GH#29069 check that name is hashable + # See also same-named test in tests.series.test_constructors + idx = self.create_index() + with pytest.raises(TypeError, match="Index.name must be a hashable type"): + type(idx)(idx, name=[]) + + def test_create_index_existing_name(self): + + # GH11193, when an existing index is passed, and a new name is not + # specified, the new index should inherit the previous object name + expected = self.create_index() + if not isinstance(expected, MultiIndex): + expected.name = "foo" + result = pd.Index(expected) + tm.assert_index_equal(result, expected) + + result = pd.Index(expected, name="bar") + expected.name = "bar" + tm.assert_index_equal(result, expected) + else: + expected.names = ["foo", "bar"] + result = pd.Index(expected) + tm.assert_index_equal( + result, + Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ), + names=["foo", "bar"], + ), + ) + + result = pd.Index(expected, names=["A", "B"]) + tm.assert_index_equal( + result, + Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ), + names=["A", "B"], + ), + ) + + def test_numeric_compat(self): + + idx = self.create_index() + with pytest.raises(TypeError, match="cannot perform __mul__"): + idx * 1 + with pytest.raises(TypeError, match="cannot perform __rmul__"): + 1 * idx + + div_err = "cannot perform __truediv__" + with pytest.raises(TypeError, match=div_err): + idx / 1 + + div_err = div_err.replace(" __", " __r") + with pytest.raises(TypeError, match=div_err): + 1 / idx + with pytest.raises(TypeError, match="cannot perform __floordiv__"): + idx // 1 + with pytest.raises(TypeError, match="cannot perform __rfloordiv__"): + 1 // idx + + def test_logical_compat(self): + idx = self.create_index() + with pytest.raises(TypeError, match="cannot perform all"): + idx.all() + with pytest.raises(TypeError, match="cannot perform any"): + idx.any() + + def test_boolean_context_compat(self): + + # boolean context compat + idx = self.create_index() + + with pytest.raises(ValueError, match="The truth value of a"): + if idx: + pass + + def test_reindex_base(self): + idx = self.create_index() + expected = np.arange(idx.size, dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with pytest.raises(ValueError, match="Invalid fill method"): + idx.get_indexer(idx, method="invalid") + + def test_get_indexer_consistency(self, indices): + # See GH 16819 + if isinstance(indices, IntervalIndex): + return + + if indices.is_unique or isinstance(indices, CategoricalIndex): + indexer = indices.get_indexer(indices[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + else: + e = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=e): + indices.get_indexer(indices[0:2]) + + indexer, _ = indices.get_indexer_non_unique(indices[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + + def test_ndarray_compat_properties(self): + idx = self.create_index() + assert idx.T.equals(idx) + assert idx.transpose().equals(idx) + + values = idx.values + for prop in self._compat_props: + assert getattr(idx, prop) == getattr(values, prop) + + # test for validity + idx.nbytes + idx.values.nbytes + + def test_repr_roundtrip(self): + + idx = self.create_index() + tm.assert_index_equal(eval(repr(idx)), idx) + + def test_str(self): + + # test the string repr + idx = self.create_index() + idx.name = "foo" + assert "'foo'" in str(idx) + assert type(idx).__name__ in str(idx) + + def test_repr_max_seq_item_setting(self): + # GH10182 + idx = self.create_index() + idx = idx.repeat(50) + with pd.option_context("display.max_seq_items", None): + repr(idx) + assert "..." not in str(idx) + + def test_copy_name(self, indices): + # gh-12309: Check that the "name" argument + # passed at initialization is honored. + if isinstance(indices, MultiIndex): + return + + first = type(indices)(indices, copy=True, name="mario") + second = type(first)(first, copy=False) + + # Even though "copy=False", we want a new object. + assert first is not second + + # Not using tm.assert_index_equal() since names differ. + assert indices.equals(first) + + assert first.name == "mario" + assert second.name == "mario" + + s1 = Series(2, index=first) + s2 = Series(3, index=second[:-1]) + + if not isinstance(indices, CategoricalIndex): + # See gh-13365 + s3 = s1 * s2 + assert s3.index.name == "mario" + + def test_ensure_copied_data(self, indices): + # Check the "copy" argument of each Index.__new__ is honoured + # GH12309 + init_kwargs = {} + if isinstance(indices, PeriodIndex): + # Needs "freq" specification: + init_kwargs["freq"] = indices.freq + elif isinstance(indices, (RangeIndex, MultiIndex, CategoricalIndex)): + # RangeIndex cannot be initialized from data + # MultiIndex and CategoricalIndex are tested separately + return + + index_type = type(indices) + result = index_type(indices.values, copy=True, **init_kwargs) + tm.assert_index_equal(indices, result) + tm.assert_numpy_array_equal( + indices._ndarray_values, result._ndarray_values, check_same="copy" + ) + + if isinstance(indices, PeriodIndex): + # .values an object array of Period, thus copied + result = index_type(ordinal=indices.asi8, copy=False, **init_kwargs) + tm.assert_numpy_array_equal( + indices._ndarray_values, result._ndarray_values, check_same="same" + ) + elif isinstance(indices, IntervalIndex): + # checked in test_interval.py + pass + else: + result = index_type(indices.values, copy=False, **init_kwargs) + tm.assert_numpy_array_equal( + indices.values, result.values, check_same="same" + ) + tm.assert_numpy_array_equal( + indices._ndarray_values, result._ndarray_values, check_same="same" + ) + + def test_memory_usage(self, indices): + indices._engine.clear_mapping() + result = indices.memory_usage() + if indices.empty: + # we report 0 for no-length + assert result == 0 + return + + # non-zero length + indices.get_loc(indices[0]) + result2 = indices.memory_usage() + result3 = indices.memory_usage(deep=True) + + # RangeIndex, IntervalIndex + # don't have engines + if not isinstance(indices, (RangeIndex, IntervalIndex)): + assert result2 > result + + if indices.inferred_type == "object": + assert result3 > result2 + + def test_argsort(self, request, indices): + # separately tested + if isinstance(indices, CategoricalIndex): + return + + result = indices.argsort() + expected = np.array(indices).argsort() + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + def test_numpy_argsort(self, indices): + result = np.argsort(indices) + expected = indices.argsort() + tm.assert_numpy_array_equal(result, expected) + + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas.core.indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if isinstance(type(indices), (CategoricalIndex, RangeIndex)): + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(indices, axis=1) + + msg = "the 'kind' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(indices, kind="mergesort") + + msg = "the 'order' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(indices, order=("a", "b")) + + def test_take(self, indices): + indexer = [4, 3, 0, 2] + if len(indices) < 5: + # not enough elements; ignore + return + + result = indices.take(indexer) + expected = indices[indexer] + assert result.equals(expected) + + if not isinstance(indices, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + # GH 10791 + with pytest.raises(AttributeError): + indices.freq + + def test_take_invalid_kwargs(self): + idx = self.create_index() + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + def test_repeat(self): + rep = 2 + i = self.create_index() + expected = pd.Index(i.values.repeat(rep), name=i.name) + tm.assert_index_equal(i.repeat(rep), expected) + + i = self.create_index() + rep = np.arange(len(i)) + expected = pd.Index(i.values.repeat(rep), name=i.name) + tm.assert_index_equal(i.repeat(rep), expected) + + def test_numpy_repeat(self): + rep = 2 + i = self.create_index() + expected = i.repeat(rep) + tm.assert_index_equal(np.repeat(i, rep), expected) + + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.repeat(i, rep, axis=0) + + @pytest.mark.parametrize("klass", [list, tuple, np.array, Series]) + def test_where(self, klass): + i = self.create_index() + + cond = [True] * len(i) + result = i.where(klass(cond)) + expected = i + tm.assert_index_equal(result, expected) + + cond = [False] + [True] * len(i[1:]) + expected = pd.Index([i._na_value] + i[1:].tolist(), dtype=i.dtype) + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("case", [0.5, "xxx"]) + @pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] + ) + def test_set_ops_error_cases(self, case, method, indices): + # non-iterable input + msg = "Input must be Index or array-like" + with pytest.raises(TypeError, match=msg): + getattr(indices, method)(case) + + def test_intersection_base(self, indices): + if isinstance(indices, CategoricalIndex): + return + + first = indices[:5] + second = indices[:3] + intersect = first.intersection(second) + assert tm.equalContents(intersect, second) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.intersection(case) + assert tm.equalContents(result, second) + + if isinstance(indices, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.intersection([1, 2, 3]) + + def test_union_base(self, indices): + first = indices[3:] + second = indices[:5] + everything = indices + union = first.union(second) + assert tm.equalContents(union, everything) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + if not isinstance(indices, CategoricalIndex): + result = first.union(case) + assert tm.equalContents(result, everything) + + if isinstance(indices, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.union([1, 2, 3]) + + @pytest.mark.parametrize("sort", [None, False]) + def test_difference_base(self, sort, indices): + if isinstance(indices, CategoricalIndex): + return + + first = indices[2:] + second = indices[:4] + answer = indices[4:] + result = first.difference(second, sort) + assert tm.equalContents(result, answer) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + if isinstance(indices, (DatetimeIndex, TimedeltaIndex)): + assert type(result) == type(answer) + tm.assert_numpy_array_equal( + result.sort_values().asi8, answer.sort_values().asi8 + ) + else: + result = first.difference(case, sort) + assert tm.equalContents(result, answer) + + if isinstance(indices, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3], sort) + + def test_symmetric_difference(self, indices): + if isinstance(indices, CategoricalIndex): + return + + first = indices[1:] + second = indices[:-1] + answer = indices[[0, -1]] + result = first.symmetric_difference(second) + assert tm.equalContents(result, answer) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.symmetric_difference(case) + assert tm.equalContents(result, answer) + + if isinstance(indices, MultiIndex): + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.symmetric_difference([1, 2, 3]) + + def test_insert_base(self, indices): + result = indices[1:4] + + if not len(indices): + return + + # test 0th element + assert indices[0:4].equals(result.insert(0, indices[0])) + + def test_delete_base(self, indices): + if not len(indices): + return + + if isinstance(indices, RangeIndex): + # tested in class + return + + expected = indices[1:] + result = indices.delete(0) + assert result.equals(expected) + assert result.name == expected.name + + expected = indices[:-1] + result = indices.delete(-1) + assert result.equals(expected) + assert result.name == expected.name + + with pytest.raises((IndexError, ValueError)): + # either depending on numpy version + indices.delete(len(indices)) + + def test_equals(self, indices): + if isinstance(indices, IntervalIndex): + # IntervalIndex tested separately + return + + assert indices.equals(indices) + assert indices.equals(indices.copy()) + assert indices.equals(indices.astype(object)) + + assert not indices.equals(list(indices)) + assert not indices.equals(np.array(indices)) + + # Cannot pass in non-int64 dtype to RangeIndex + if not isinstance(indices, RangeIndex): + same_values = Index(indices, dtype=object) + assert indices.equals(same_values) + assert same_values.equals(indices) + + if indices.nlevels == 1: + # do not test MultiIndex + assert not indices.equals(Series(indices)) + + def test_equals_op(self): + # GH9947, GH10637 + index_a = self.create_index() + if isinstance(index_a, PeriodIndex): + pytest.skip("Skip check for PeriodIndex") + + n = len(index_a) + index_b = index_a[0:-1] + index_c = index_a[0:-1].append(index_a[-2:-1]) + index_d = index_a[0:1] + + msg = "Lengths must match|could not be broadcast" + with pytest.raises(ValueError, match=msg): + index_a == index_b + expected1 = np.array([True] * n) + expected2 = np.array([True] * (n - 1) + [False]) + tm.assert_numpy_array_equal(index_a == index_a, expected1) + tm.assert_numpy_array_equal(index_a == index_c, expected2) + + # test comparisons with numpy arrays + array_a = np.array(index_a) + array_b = np.array(index_a[0:-1]) + array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) + array_d = np.array(index_a[0:1]) + with pytest.raises(ValueError, match=msg): + index_a == array_b + tm.assert_numpy_array_equal(index_a == array_a, expected1) + tm.assert_numpy_array_equal(index_a == array_c, expected2) + + # test comparisons with Series + series_a = Series(array_a) + series_b = Series(array_b) + series_c = Series(array_c) + series_d = Series(array_d) + with pytest.raises(ValueError, match=msg): + index_a == series_b + + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) + + # cases where length is 1 for one of them + with pytest.raises(ValueError, match="Lengths must match"): + index_a == index_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == array_d + msg = "Can only compare identically-labeled Series objects" + with pytest.raises(ValueError, match=msg): + series_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + series_a == array_d + + # comparing with a scalar should broadcast; note that we are excluding + # MultiIndex because in this case each item in the index is a tuple of + # length 2, and therefore is considered an array of length 2 in the + # comparison instead of a scalar + if not isinstance(index_a, MultiIndex): + expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) + # assuming the 2nd to last item is unique in the data + item = index_a[-2] + tm.assert_numpy_array_equal(index_a == item, expected3) + tm.assert_series_equal(series_a == item, Series(expected3)) + + def test_hasnans_isnans(self, indices): + # GH 11343, added tests for hasnans / isnans + if isinstance(indices, MultiIndex): + return + + # cases in indices doesn't include NaN + idx = indices.copy(deep=True) + expected = np.array([False] * len(idx), dtype=bool) + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans is False + + idx = indices.copy(deep=True) + values = np.asarray(idx.values) + + if len(indices) == 0: + return + elif isinstance(indices, DatetimeIndexOpsMixin): + values[1] = iNaT + elif isinstance(indices, (Int64Index, UInt64Index)): + return + else: + values[1] = np.nan + + if isinstance(indices, PeriodIndex): + idx = type(indices)(values, freq=indices.freq) + else: + idx = type(indices)(values) + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans is True + + def test_fillna(self, indices): + # GH 11343 + if len(indices) == 0: + pass + elif isinstance(indices, MultiIndex): + idx = indices.copy(deep=True) + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.fillna(idx[0]) + else: + idx = indices.copy(deep=True) + result = idx.fillna(idx[0]) + tm.assert_index_equal(result, idx) + assert result is not idx + + msg = "'value' must be a scalar, passed: " + with pytest.raises(TypeError, match=msg): + idx.fillna([idx[0]]) + + idx = indices.copy(deep=True) + values = np.asarray(idx.values) + + if isinstance(indices, DatetimeIndexOpsMixin): + values[1] = iNaT + elif isinstance(indices, (Int64Index, UInt64Index)): + return + else: + values[1] = np.nan + + if isinstance(indices, PeriodIndex): + idx = type(indices)(values, freq=indices.freq) + else: + idx = type(indices)(values) + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans is True + + def test_nulls(self, indices): + # this is really a smoke test for the methods + # as these are adequately tested for function elsewhere + if len(indices) == 0: + tm.assert_numpy_array_equal(indices.isna(), np.array([], dtype=bool)) + elif isinstance(indices, MultiIndex): + idx = indices.copy() + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.isna() + elif not indices.hasnans: + tm.assert_numpy_array_equal( + indices.isna(), np.zeros(len(indices), dtype=bool) + ) + tm.assert_numpy_array_equal( + indices.notna(), np.ones(len(indices), dtype=bool) + ) + else: + result = isna(indices) + tm.assert_numpy_array_equal(indices.isna(), result) + tm.assert_numpy_array_equal(indices.notna(), ~result) + + def test_empty(self): + # GH 15270 + index = self.create_index() + assert not index.empty + assert index[:0].empty + + def test_join_self_unique(self, join_type): + index = self.create_index() + if index.is_unique: + joined = index.join(index, how=join_type) + assert (index == joined).all() + + def test_map(self): + # callable + index = self.create_index() + + # we don't infer UInt64 + if isinstance(index, pd.UInt64Index): + expected = index.astype("int64") + else: + expected = index + + result = index.map(lambda x: x) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index), + ], + ) + def test_map_dictlike(self, mapper): + + index = self.create_index() + if isinstance(index, (pd.CategoricalIndex, pd.IntervalIndex)): + pytest.skip("skipping tests for {}".format(type(index))) + + identity = mapper(index.values, index) + + # we don't infer to UInt64 for a dict + if isinstance(index, pd.UInt64Index) and isinstance(identity, dict): + expected = index.astype("int64") + else: + expected = index + + result = index.map(identity) + tm.assert_index_equal(result, expected) + + # empty mappable + expected = pd.Index([np.nan] * len(index)) + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + def test_map_str(self): + # GH 31202 + index = self.create_index() + result = index.map(str) + expected = Index([str(x) for x in index], dtype=object) + tm.assert_index_equal(result, expected) + + def test_putmask_with_wrong_mask(self): + # GH18368 + index = self.create_index() + + with pytest.raises(ValueError): + index.putmask(np.ones(len(index) + 1, np.bool), 1) + + with pytest.raises(ValueError): + index.putmask(np.ones(len(index) - 1, np.bool), 1) + + with pytest.raises(ValueError): + index.putmask("foo", 1) + + @pytest.mark.parametrize("copy", [True, False]) + @pytest.mark.parametrize("name", [None, "foo"]) + @pytest.mark.parametrize("ordered", [True, False]) + def test_astype_category(self, copy, name, ordered): + # GH 18630 + index = self.create_index() + if name: + index = index.rename(name) + + # standard categories + dtype = CategoricalDtype(ordered=ordered) + result = index.astype(dtype, copy=copy) + expected = CategoricalIndex(index.values, name=name, ordered=ordered) + tm.assert_index_equal(result, expected) + + # non-standard categories + dtype = CategoricalDtype(index.unique().tolist()[:-1], ordered) + result = index.astype(dtype, copy=copy) + expected = CategoricalIndex(index.values, name=name, dtype=dtype) + tm.assert_index_equal(result, expected) + + if ordered is False: + # dtype='category' defaults to ordered=False, so only test once + result = index.astype("category", copy=copy) + expected = CategoricalIndex(index.values, name=name) + tm.assert_index_equal(result, expected) + + def test_is_unique(self): + # initialize a unique index + index = self.create_index().drop_duplicates() + assert index.is_unique is True + + # empty index should be unique + index_empty = index[:0] + assert index_empty.is_unique is True + + # test basic dupes + index_dup = index.insert(0, index[0]) + assert index_dup.is_unique is False + + # single NA should be unique + index_na = index.insert(0, np.nan) + assert index_na.is_unique is True + + # multiple NA should not be unique + index_na_dup = index_na.insert(0, np.nan) + assert index_na_dup.is_unique is False + + def test_engine_reference_cycle(self): + # GH27585 + index = self.create_index() + nrefs_pre = len(gc.get_referrers(index)) + index._engine + assert len(gc.get_referrers(index)) == nrefs_pre + + def test_getitem_2d_deprecated(self): + # GH#30588 + idx = self.create_index() + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + res = idx[:, None] + + assert isinstance(res, np.ndarray), type(res) diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py new file mode 100644 index 00000000..e3e7ff40 --- /dev/null +++ b/pandas/tests/indexes/conftest.py @@ -0,0 +1,52 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.core.indexes.api import Index, MultiIndex + +indices_dict = { + "unicode": tm.makeUnicodeIndex(100), + "string": tm.makeStringIndex(100), + "datetime": tm.makeDateIndex(100), + "period": tm.makePeriodIndex(100), + "timedelta": tm.makeTimedeltaIndex(100), + "int": tm.makeIntIndex(100), + "uint": tm.makeUIntIndex(100), + "range": tm.makeRangeIndex(100), + "float": tm.makeFloatIndex(100), + "bool": Index([True, False]), + "categorical": tm.makeCategoricalIndex(100), + "interval": tm.makeIntervalIndex(100), + "empty": Index([]), + "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), + "repeats": Index([0, 0, 1, 1, 2, 2]), +} + + +@pytest.fixture(params=indices_dict.keys()) +def indices(request): + # copy to avoid mutation, e.g. setting .name + return indices_dict[request.param].copy() + + +@pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) +def one(request): + # zero-dim integer array behaves like an integer + return request.param + + +zeros = [ + box([0] * 5, dtype=dtype) + for box in [pd.Index, np.array] + for dtype in [np.int64, np.uint64, np.float64] +] +zeros.extend([np.array(0, dtype=dtype) for dtype in [np.int64, np.uint64, np.float64]]) +zeros.extend([0, 0.0]) + + +@pytest.fixture(params=zeros) +def zero(request): + # For testing division by (or of) zero for Index with length 5, this + # gives several scalar-zeros and length-5 vector-zeros + return request.param diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py new file mode 100644 index 00000000..3c72d34d --- /dev/null +++ b/pandas/tests/indexes/datetimelike.py @@ -0,0 +1,97 @@ +""" generic datetimelike tests """ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +from .common import Base + + +class DatetimeLike(Base): + def test_argmax_axis_invalid(self): + # GH#23081 + rng = self.create_index() + with pytest.raises(ValueError): + rng.argmax(axis=1) + with pytest.raises(ValueError): + rng.argmin(axis=2) + with pytest.raises(ValueError): + rng.min(axis=-2) + with pytest.raises(ValueError): + rng.max(axis=-3) + + def test_can_hold_identifiers(self): + idx = self.create_index() + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False + + def test_shift_identity(self): + + idx = self.create_index() + tm.assert_index_equal(idx, idx.shift(0)) + + def test_str(self): + + # test the string repr + idx = self.create_index() + idx.name = "foo" + assert not "length={}".format(len(idx)) in str(idx) + assert "'foo'" in str(idx) + assert type(idx).__name__ in str(idx) + + if hasattr(idx, "tz"): + if idx.tz is not None: + assert idx.tz in str(idx) + if hasattr(idx, "freq"): + assert "freq='{idx.freqstr}'".format(idx=idx) in str(idx) + + def test_view(self): + i = self.create_index() + + i_view = i.view("i8") + result = self._holder(i) + tm.assert_index_equal(result, i) + + i_view = i.view(self._holder) + result = self._holder(i) + tm.assert_index_equal(result, i_view) + + def test_map_callable(self): + index = self.create_index() + expected = index + index.freq + result = index.map(lambda x: x + x.freq) + tm.assert_index_equal(result, expected) + + # map to NaT + result = index.map(lambda x: pd.NaT if x == index[0] else x) + expected = pd.Index([pd.NaT] + index[1:].tolist()) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index, dtype=object), + ], + ) + def test_map_dictlike(self, mapper): + index = self.create_index() + expected = index + index.freq + + # don't compare the freqs + if isinstance(expected, pd.DatetimeIndex): + expected._data.freq = None + + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + expected = pd.Index([pd.NaT] + index[1:].tolist()) + result = index.map(mapper(expected, index)) + tm.assert_index_equal(result, expected) + + # empty map; these map to np.nan because we cannot know + # to re-infer things + expected = pd.Index([np.nan] * len(index)) + result = index.map(mapper([], [])) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/__init__.py b/pandas/tests/indexes/datetimes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py new file mode 100644 index 00000000..6139726d --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -0,0 +1,378 @@ +from datetime import datetime + +import dateutil +from dateutil.tz import tzlocal +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + Int64Index, + NaT, + Period, + Series, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestDatetimeIndex: + def test_astype(self): + # GH 13149, GH 13209 + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) + + result = idx.astype(object) + expected = Index([Timestamp("2016-05-16")] + [NaT] * 3, dtype=object) + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index( + [1463356800000000000] + [-9223372036854775808] * 3, dtype=np.int64 + ) + tm.assert_index_equal(result, expected) + + rng = date_range("1/1/2000", periods=10) + result = rng.astype("i8") + tm.assert_index_equal(result, Index(rng.asi8)) + tm.assert_numpy_array_equal(result.values, rng.asi8) + + def test_astype_uint(self): + arr = date_range("2000", periods=2) + expected = pd.UInt64Index( + np.array([946684800000000000, 946771200000000000], dtype="uint64") + ) + + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) + + def test_astype_with_tz(self): + + # with tz + rng = date_range("1/1/2000", periods=10, tz="US/Eastern") + result = rng.astype("datetime64[ns]") + expected = ( + date_range("1/1/2000", periods=10, tz="US/Eastern") + .tz_convert("UTC") + .tz_localize(None) + ) + tm.assert_index_equal(result, expected) + + # BUG#10442 : testing astype(str) is correct for Series/DatetimeIndex + result = pd.Series(pd.date_range("2012-01-01", periods=3)).astype(str) + expected = pd.Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object) + tm.assert_series_equal(result, expected) + + result = Series(pd.date_range("2012-01-01", periods=3, tz="US/Eastern")).astype( + str + ) + expected = Series( + [ + "2012-01-01 00:00:00-05:00", + "2012-01-02 00:00:00-05:00", + "2012-01-03 00:00:00-05:00", + ], + dtype=object, + ) + tm.assert_series_equal(result, expected) + + # GH 18951: tz-aware to tz-aware + idx = date_range("20170101", periods=4, tz="US/Pacific") + result = idx.astype("datetime64[ns, US/Eastern]") + expected = date_range("20170101 03:00:00", periods=4, tz="US/Eastern") + tm.assert_index_equal(result, expected) + + # GH 18951: tz-naive to tz-aware + idx = date_range("20170101", periods=4) + result = idx.astype("datetime64[ns, US/Eastern]") + expected = date_range("20170101", periods=4, tz="US/Eastern") + tm.assert_index_equal(result, expected) + + def test_astype_str_compat(self): + # GH 13149, GH 13209 + # verify that we are returning NaT as a string (and not unicode) + + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) + result = idx.astype(str) + expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object) + tm.assert_index_equal(result, expected) + + def test_astype_str(self): + # test astype string - #10442 + result = date_range("2012-01-01", periods=4, name="test_name").astype(str) + expected = Index( + ["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"], + name="test_name", + dtype=object, + ) + tm.assert_index_equal(result, expected) + + # test astype string with tz and name + result = date_range( + "2012-01-01", periods=3, name="test_name", tz="US/Eastern" + ).astype(str) + expected = Index( + [ + "2012-01-01 00:00:00-05:00", + "2012-01-02 00:00:00-05:00", + "2012-01-03 00:00:00-05:00", + ], + name="test_name", + dtype=object, + ) + tm.assert_index_equal(result, expected) + + # test astype string with freqH and name + result = date_range("1/1/2011", periods=3, freq="H", name="test_name").astype( + str + ) + expected = Index( + ["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"], + name="test_name", + dtype=object, + ) + tm.assert_index_equal(result, expected) + + # test astype string with freqH and timezone + result = date_range( + "3/6/2012 00:00", periods=2, freq="H", tz="Europe/London", name="test_name" + ).astype(str) + expected = Index( + ["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"], + dtype=object, + name="test_name", + ) + tm.assert_index_equal(result, expected) + + def test_astype_datetime64(self): + # GH 13149, GH 13209 + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) + + result = idx.astype("datetime64[ns]") + tm.assert_index_equal(result, idx) + assert result is not idx + + result = idx.astype("datetime64[ns]", copy=False) + tm.assert_index_equal(result, idx) + assert result is idx + + idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST") + result = idx_tz.astype("datetime64[ns]") + expected = DatetimeIndex( + ["2016-05-16 05:00:00", "NaT", "NaT", "NaT"], dtype="datetime64[ns]" + ) + tm.assert_index_equal(result, expected) + + def test_astype_object(self): + rng = date_range("1/1/2000", periods=20) + + casted = rng.astype("O") + exp_values = list(rng) + + tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_)) + assert casted.tolist() == exp_values + + @pytest.mark.parametrize("tz", [None, "Asia/Tokyo"]) + def test_astype_object_tz(self, tz): + idx = pd.date_range(start="2013-01-01", periods=4, freq="M", name="idx", tz=tz) + expected_list = [ + Timestamp("2013-01-31", tz=tz), + Timestamp("2013-02-28", tz=tz), + Timestamp("2013-03-31", tz=tz), + Timestamp("2013-04-30", tz=tz), + ] + expected = pd.Index(expected_list, dtype=object, name="idx") + result = idx.astype(object) + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + def test_astype_object_with_nat(self): + idx = DatetimeIndex( + [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, datetime(2013, 1, 4)], + name="idx", + ) + expected_list = [ + Timestamp("2013-01-01"), + Timestamp("2013-01-02"), + pd.NaT, + Timestamp("2013-01-04"), + ] + expected = pd.Index(expected_list, dtype=object, name="idx") + result = idx.astype(object) + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + @pytest.mark.parametrize( + "dtype", + [float, "timedelta64", "timedelta64[ns]", "datetime64", "datetime64[D]"], + ) + def test_astype_raises(self, dtype): + # GH 13149, GH 13209 + idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) + msg = "Cannot cast DatetimeArray to dtype" + with pytest.raises(TypeError, match=msg): + idx.astype(dtype) + + def test_index_convert_to_datetime_array(self): + def _check_rng(rng): + converted = rng.to_pydatetime() + assert isinstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + assert isinstance(x, datetime) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo + + rng = date_range("20090415", "20090519") + rng_eastern = date_range("20090415", "20090519", tz="US/Eastern") + rng_utc = date_range("20090415", "20090519", tz="utc") + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_explicit_pytz(self): + def _check_rng(rng): + converted = rng.to_pydatetime() + assert isinstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + assert isinstance(x, datetime) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo + + rng = date_range("20090415", "20090519") + rng_eastern = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern")) + rng_utc = date_range("20090415", "20090519", tz=pytz.utc) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + def test_index_convert_to_datetime_array_dateutil(self): + def _check_rng(rng): + converted = rng.to_pydatetime() + assert isinstance(converted, np.ndarray) + for x, stamp in zip(converted, rng): + assert isinstance(x, datetime) + assert x == stamp.to_pydatetime() + assert x.tzinfo == stamp.tzinfo + + rng = date_range("20090415", "20090519") + rng_eastern = date_range("20090415", "20090519", tz="dateutil/US/Eastern") + rng_utc = date_range("20090415", "20090519", tz=dateutil.tz.tzutc()) + + _check_rng(rng) + _check_rng(rng_eastern) + _check_rng(rng_utc) + + @pytest.mark.parametrize( + "tz, dtype", + [["US/Pacific", "datetime64[ns, US/Pacific]"], [None, "datetime64[ns]"]], + ) + def test_integer_index_astype_datetime(self, tz, dtype): + # GH 20997, 20964, 24559 + val = [pd.Timestamp("2018-01-01", tz=tz).value] + result = pd.Index(val).astype(dtype) + expected = pd.DatetimeIndex(["2018-01-01"], tz=tz) + tm.assert_index_equal(result, expected) + + +class TestToPeriod: + def setup_method(self, method): + data = [ + Timestamp("2007-01-01 10:11:12.123456Z"), + Timestamp("2007-01-01 10:11:13.789123Z"), + ] + self.index = DatetimeIndex(data) + + def test_to_period_millisecond(self): + index = self.index + + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + period = index.to_period(freq="L") + assert 2 == len(period) + assert period[0] == Period("2007-01-01 10:11:12.123Z", "L") + assert period[1] == Period("2007-01-01 10:11:13.789Z", "L") + + def test_to_period_microsecond(self): + index = self.index + + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + period = index.to_period(freq="U") + assert 2 == len(period) + assert period[0] == Period("2007-01-01 10:11:12.123456Z", "U") + assert period[1] == Period("2007-01-01 10:11:13.789123Z", "U") + + @pytest.mark.parametrize( + "tz", + ["US/Eastern", pytz.utc, tzlocal(), "dateutil/US/Eastern", dateutil.tz.tzutc()], + ) + def test_to_period_tz(self, tz): + ts = date_range("1/1/2000", "2/1/2000", tz=tz) + + with tm.assert_produces_warning(UserWarning): + # GH#21333 warning that timezone info will be lost + result = ts.to_period()[0] + expected = ts[0].to_period() + + assert result == expected + + expected = date_range("1/1/2000", "2/1/2000").to_period() + + with tm.assert_produces_warning(UserWarning): + # GH#21333 warning that timezone info will be lost + result = ts.to_period() + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", ["Etc/GMT-1", "Etc/GMT+1"]) + def test_to_period_tz_utc_offset_consistency(self, tz): + # GH 22905 + ts = pd.date_range("1/1/2000", "2/1/2000", tz="Etc/GMT-1") + with tm.assert_produces_warning(UserWarning): + result = ts.to_period()[0] + expected = ts[0].to_period() + assert result == expected + + def test_to_period_nofreq(self): + idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"]) + with pytest.raises(ValueError): + idx.to_period() + + idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="infer") + assert idx.freqstr == "D" + expected = pd.PeriodIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="D") + tm.assert_index_equal(idx.to_period(), expected) + + # GH 7606 + idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"]) + assert idx.freqstr is None + tm.assert_index_equal(idx.to_period(), expected) + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_astype_category(self, tz): + obj = pd.date_range("2000", periods=2, tz=tz) + result = obj.astype("category") + expected = pd.CategoricalIndex( + [pd.Timestamp("2000-01-01", tz=tz), pd.Timestamp("2000-01-02", tz=tz)] + ) + tm.assert_index_equal(result, expected) + + result = obj._data.astype("category") + expected = expected.values + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_astype_array_fallback(self, tz): + obj = pd.date_range("2000", periods=2, tz=tz) + result = obj.astype(bool) + expected = pd.Index(np.array([True, True])) + tm.assert_index_equal(result, expected) + + result = obj._data.astype(bool) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py new file mode 100644 index 00000000..ffe51dd1 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -0,0 +1,952 @@ +from datetime import datetime, timedelta +from functools import partial +from operator import attrgetter + +import dateutil +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import OutOfBoundsDatetime, conversion + +import pandas as pd +from pandas import DatetimeIndex, Index, Timestamp, date_range, offsets, to_datetime +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray, period_array + + +class TestDatetimeIndex: + @pytest.mark.parametrize("dt_cls", [DatetimeIndex, DatetimeArray._from_sequence]) + def test_freq_validation_with_nat(self, dt_cls): + # GH#11587 make sure we get a useful error message when generate_range + # raises + msg = ( + "Inferred frequency None from passed values does not conform " + "to passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + dt_cls([pd.NaT, pd.Timestamp("2011-01-01")], freq="D") + with pytest.raises(ValueError, match=msg): + dt_cls([pd.NaT, pd.Timestamp("2011-01-01").value], freq="D") + + # TODO: better place for tests shared by DTI/TDI? + @pytest.mark.parametrize( + "index", + [ + pd.date_range("2016-01-01", periods=5, tz="US/Pacific"), + pd.timedelta_range("1 Day", periods=5), + ], + ) + def test_shallow_copy_inherits_array_freq(self, index): + # If we pass a DTA/TDA to shallow_copy and dont specify a freq, + # we should inherit the array's freq, not our own. + array = index._data + + arr = array[[0, 3, 2, 4, 1]] + assert arr.freq is None + + result = index._shallow_copy(arr) + assert result.freq is None + + def test_categorical_preserves_tz(self): + # GH#18664 retain tz when going DTI-->Categorical-->DTI + # TODO: parametrize over DatetimeIndex/DatetimeArray + # once CategoricalIndex(DTA) works + + dti = pd.DatetimeIndex( + [pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern" + ) + + ci = pd.CategoricalIndex(dti) + carr = pd.Categorical(dti) + cser = pd.Series(ci) + + for obj in [ci, carr, cser]: + result = pd.DatetimeIndex(obj) + tm.assert_index_equal(result, dti) + + def test_dti_with_period_data_raises(self): + # GH#23675 + data = pd.PeriodIndex(["2016Q1", "2016Q2"], freq="Q") + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + DatetimeIndex(data) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + to_datetime(data) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + DatetimeIndex(period_array(data)) + + with pytest.raises(TypeError, match="PeriodDtype data is invalid"): + to_datetime(period_array(data)) + + def test_dti_with_timedelta64_data_raises(self): + # GH#23675 deprecated, enforrced in GH#29794 + data = np.array([0], dtype="m8[ns]") + msg = r"timedelta64\[ns\] cannot be converted to datetime64" + with pytest.raises(TypeError, match=msg): + DatetimeIndex(data) + + with pytest.raises(TypeError, match=msg): + to_datetime(data) + + with pytest.raises(TypeError, match=msg): + DatetimeIndex(pd.TimedeltaIndex(data)) + + with pytest.raises(TypeError, match=msg): + to_datetime(pd.TimedeltaIndex(data)) + + def test_construction_caching(self): + + df = pd.DataFrame( + { + "dt": pd.date_range("20130101", periods=3), + "dttz": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "dt_with_null": [ + pd.Timestamp("20130101"), + pd.NaT, + pd.Timestamp("20130103"), + ], + "dtns": pd.date_range("20130101", periods=3, freq="ns"), + } + ) + assert df.dttz.dtype.tz.zone == "US/Eastern" + + @pytest.mark.parametrize( + "kwargs", + [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}], + ) + def test_construction_with_alt(self, kwargs, tz_aware_fixture): + tz = tz_aware_fixture + i = pd.date_range("20130101", periods=5, freq="H", tz=tz) + kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} + result = DatetimeIndex(i, **kwargs) + tm.assert_index_equal(i, result) + + @pytest.mark.parametrize( + "kwargs", + [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}], + ) + def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture): + tz = tz_aware_fixture + i = pd.date_range("20130101", periods=5, freq="H", tz=tz) + kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} + + if "tz" in kwargs: + result = DatetimeIndex(i.asi8, tz="UTC").tz_convert(kwargs["tz"]) + + expected = DatetimeIndex(i, **kwargs) + tm.assert_index_equal(result, expected) + + # localize into the provided tz + i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC") + expected = i.tz_localize(None).tz_localize("UTC") + tm.assert_index_equal(i2, expected) + + # incompat tz/dtype + msg = "cannot supply both a tz and a dtype with a tz" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific") + + def test_construction_index_with_mixed_timezones(self): + # gh-11488: no tz results in DatetimeIndex + result = Index([Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx") + exp = DatetimeIndex( + [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None + + # same tz results in DatetimeIndex + result = Index( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")], + tz="Asia/Tokyo", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + # same tz results in DatetimeIndex (DST) + result = Index( + [ + Timestamp("2011-01-01 10:00", tz="US/Eastern"), + Timestamp("2011-08-01 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")], + tz="US/Eastern", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + # Different tz results in Index(dtype=object) + result = Index( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = Index( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) + + result = Index( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = Index( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) + + # length = 1 + result = Index([Timestamp("2011-01-01")], name="idx") + exp = DatetimeIndex([Timestamp("2011-01-01")], name="idx") + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None + + # length = 1 with tz + result = Index([Timestamp("2011-01-01 10:00", tz="Asia/Tokyo")], name="idx") + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00")], tz="Asia/Tokyo", name="idx" + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + def test_construction_index_with_mixed_timezones_with_NaT(self): + # see gh-11488 + result = Index( + [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")], + name="idx", + ) + exp = DatetimeIndex( + [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")], + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None + + # Same tz results in DatetimeIndex + result = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), + ], + name="idx", + ) + exp = DatetimeIndex( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00"), + ], + tz="Asia/Tokyo", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + # same tz results in DatetimeIndex (DST) + result = Index( + [ + Timestamp("2011-01-01 10:00", tz="US/Eastern"), + pd.NaT, + Timestamp("2011-08-01 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")], + tz="US/Eastern", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + # different tz results in Index(dtype=object) + result = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) + + result = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="object", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert not isinstance(result, DatetimeIndex) + + # all NaT + result = Index([pd.NaT, pd.NaT], name="idx") + exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx") + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is None + + # all NaT with tz + result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") + exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") + + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + assert result.tz is not None + assert result.tz == exp.tz + + def test_construction_dti_with_mixed_timezones(self): + # GH 11488 (not changed, added explicit tests) + + # no tz results in DatetimeIndex + result = DatetimeIndex( + [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + + # same tz results in DatetimeIndex + result = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")], + tz="Asia/Tokyo", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + + # same tz results in DatetimeIndex (DST) + result = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="US/Eastern"), + Timestamp("2011-08-01 10:00", tz="US/Eastern"), + ], + name="idx", + ) + exp = DatetimeIndex( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")], + tz="US/Eastern", + name="idx", + ) + tm.assert_index_equal(result, exp, exact=True) + assert isinstance(result, DatetimeIndex) + + # tz mismatch affecting to tz-aware raises TypeError/ValueError + + with pytest.raises(ValueError): + DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + name="idx", + ) + + msg = "cannot be converted to datetime64" + with pytest.raises(ValueError, match=msg): + DatetimeIndex( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="Asia/Tokyo", + name="idx", + ) + + with pytest.raises(ValueError): + DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="US/Eastern", + name="idx", + ) + + with pytest.raises(ValueError, match=msg): + # passing tz should results in DatetimeIndex, then mismatch raises + # TypeError + Index( + [ + pd.NaT, + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="Asia/Tokyo", + name="idx", + ) + + def test_construction_base_constructor(self): + arr = [pd.Timestamp("2011-01-01"), pd.NaT, pd.Timestamp("2011-01-03")] + tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Timestamp("2011-01-03")] + tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr))) + + def test_construction_outofbounds(self): + # GH 13663 + dates = [ + datetime(3000, 1, 1), + datetime(4000, 1, 1), + datetime(5000, 1, 1), + datetime(6000, 1, 1), + ] + exp = Index(dates, dtype=object) + # coerces to object + tm.assert_index_equal(Index(dates), exp) + + with pytest.raises(OutOfBoundsDatetime): + # can't create DatetimeIndex + DatetimeIndex(dates) + + def test_construction_with_ndarray(self): + # GH 5152 + dates = [datetime(2013, 10, 7), datetime(2013, 10, 8), datetime(2013, 10, 9)] + data = DatetimeIndex(dates, freq=pd.offsets.BDay()).values + result = DatetimeIndex(data, freq=pd.offsets.BDay()) + expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B") + tm.assert_index_equal(result, expected) + + def test_integer_values_and_tz_interpreted_as_utc(self): + # GH-24559 + val = np.datetime64("2000-01-01 00:00:00", "ns") + values = np.array([val.view("i8")]) + + result = DatetimeIndex(values).tz_localize("US/Central") + + expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central") + tm.assert_index_equal(result, expected) + + # but UTC is *not* deprecated. + with tm.assert_produces_warning(None): + result = DatetimeIndex(values, tz="UTC") + expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central") + + def test_constructor_coverage(self): + rng = date_range("1/1/2000", periods=10.5) + exp = date_range("1/1/2000", periods=10) + tm.assert_index_equal(rng, exp) + + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + date_range(start="1/1/2000", periods="foo", freq="D") + + with pytest.raises(TypeError): + DatetimeIndex("1/1/2000") + + # generator expression + gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) + result = DatetimeIndex(gen) + expected = DatetimeIndex( + [datetime(2000, 1, 1) + timedelta(i) for i in range(10)] + ) + tm.assert_index_equal(result, expected) + + # NumPy string array + strings = np.array(["2000-01-01", "2000-01-02", "2000-01-03"]) + result = DatetimeIndex(strings) + expected = DatetimeIndex(strings.astype("O")) + tm.assert_index_equal(result, expected) + + from_ints = DatetimeIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # string with NaT + strings = np.array(["2000-01-01", "2000-01-02", "NaT"]) + result = DatetimeIndex(strings) + expected = DatetimeIndex(strings.astype("O")) + tm.assert_index_equal(result, expected) + + from_ints = DatetimeIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # non-conforming + msg = ( + "Inferred frequency None from passed values does not conform " + "to passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"], freq="D") + + msg = ( + "Of the four parameters: start, end, periods, and freq, exactly " + "three must be specified" + ) + with pytest.raises(ValueError, match=msg): + date_range(start="2011-01-01", freq="b") + with pytest.raises(ValueError, match=msg): + date_range(end="2011-01-01", freq="B") + with pytest.raises(ValueError, match=msg): + date_range(periods=10, freq="D") + + @pytest.mark.parametrize("freq", ["AS", "W-SUN"]) + def test_constructor_datetime64_tzformat(self, freq): + # see GH#6572: ISO 8601 format results in pytz.FixedOffset + idx = date_range( + "2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq + ) + expected = date_range( + "2013-01-01T00:00:00", + "2016-01-01T23:59:59", + freq=freq, + tz=pytz.FixedOffset(-300), + ) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range( + "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima" + ) + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range( + "2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq + ) + expected = date_range( + "2013-01-01T00:00:00", + "2016-01-01T23:59:59", + freq=freq, + tz=pytz.FixedOffset(540), + ) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range( + "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo" + ) + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + # Non ISO 8601 format results in dateutil.tz.tzoffset + idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq) + expected = date_range( + "2013-01-01T00:00:00", + "2016-01-01T23:59:59", + freq=freq, + tz=pytz.FixedOffset(-300), + ) + tm.assert_index_equal(idx, expected) + # Unable to use `US/Eastern` because of DST + expected_i8 = date_range( + "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima" + ) + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq) + expected = date_range( + "2013-01-01T00:00:00", + "2016-01-01T23:59:59", + freq=freq, + tz=pytz.FixedOffset(540), + ) + tm.assert_index_equal(idx, expected) + expected_i8 = date_range( + "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo" + ) + tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) + + def test_constructor_dtype(self): + + # passing a dtype with a tz should localize + idx = DatetimeIndex( + ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]" + ) + expected = DatetimeIndex(["2013-01-01", "2013-01-02"]).tz_localize("US/Eastern") + tm.assert_index_equal(idx, expected) + + idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern") + tm.assert_index_equal(idx, expected) + + # if we already have a tz and its not the same, then raise + idx = DatetimeIndex( + ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]" + ) + + msg = ( + "cannot supply both a tz and a timezone-naive dtype" + r" \(i\.e\. datetime64\[ns\]\)" + ) + with pytest.raises(ValueError, match=msg): + DatetimeIndex(idx, dtype="datetime64[ns]") + + # this is effectively trying to convert tz's + msg = "data is already tz-aware US/Eastern, unable to set specified tz: CET" + with pytest.raises(TypeError, match=msg): + DatetimeIndex(idx, dtype="datetime64[ns, CET]") + msg = "cannot supply both a tz and a dtype with a tz" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(idx, tz="CET", dtype="datetime64[ns, US/Eastern]") + + result = DatetimeIndex(idx, dtype="datetime64[ns, US/Eastern]") + tm.assert_index_equal(idx, result) + + @pytest.mark.parametrize("dtype", [object, np.int32, np.int64]) + def test_constructor_invalid_dtype_raises(self, dtype): + # GH 23986 + with pytest.raises(ValueError): + DatetimeIndex([1, 2], dtype=dtype) + + def test_constructor_name(self): + idx = date_range(start="2000-01-01", periods=1, freq="A", name="TEST") + assert idx.name == "TEST" + + def test_000constructor_resolution(self): + # 2252 + t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1) + idx = DatetimeIndex([t1]) + + assert idx.nanosecond[0] == t1.nanosecond + + def test_disallow_setting_tz(self): + # GH 3746 + dti = DatetimeIndex(["2010"], tz="UTC") + with pytest.raises(AttributeError): + dti.tz = pytz.timezone("US/Pacific") + + @pytest.mark.parametrize( + "tz", + [ + None, + "America/Los_Angeles", + pytz.timezone("America/Los_Angeles"), + Timestamp("2000", tz="America/Los_Angeles").tz, + ], + ) + def test_constructor_start_end_with_tz(self, tz): + # GH 18595 + start = Timestamp("2013-01-01 06:00:00", tz="America/Los_Angeles") + end = Timestamp("2013-01-02 06:00:00", tz="America/Los_Angeles") + result = date_range(freq="D", start=start, end=end, tz=tz) + expected = DatetimeIndex( + ["2013-01-01 06:00:00", "2013-01-02 06:00:00"], tz="America/Los_Angeles" + ) + tm.assert_index_equal(result, expected) + # Especially assert that the timezone is consistent for pytz + assert pytz.timezone("America/Los_Angeles") is result.tz + + @pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"]) + def test_constructor_with_non_normalized_pytz(self, tz): + # GH 18595 + non_norm_tz = Timestamp("2010", tz=tz).tz + result = DatetimeIndex(["2010"], tz=non_norm_tz) + assert pytz.timezone(tz) is result.tz + + def test_constructor_timestamp_near_dst(self): + # GH 20854 + ts = [ + Timestamp("2016-10-30 03:00:00+0300", tz="Europe/Helsinki"), + Timestamp("2016-10-30 03:00:00+0200", tz="Europe/Helsinki"), + ] + result = DatetimeIndex(ts) + expected = DatetimeIndex([ts[0].to_pydatetime(), ts[1].to_pydatetime()]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [Index, DatetimeIndex]) + @pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list]) + @pytest.mark.parametrize( + "tz, dtype", + [("US/Pacific", "datetime64[ns, US/Pacific]"), (None, "datetime64[ns]")], + ) + def test_constructor_with_int_tz(self, klass, box, tz, dtype): + # GH 20997, 20964 + ts = Timestamp("2018-01-01", tz=tz) + result = klass(box([ts.value]), dtype=dtype) + expected = klass([ts]) + assert result == expected + + def test_construction_int_rountrip(self, tz_naive_fixture): + # GH 12619, GH#24559 + tz = tz_naive_fixture + + result = 1293858000000000000 + expected = DatetimeIndex([result], tz=tz).asi8[0] + assert result == expected + + def test_construction_from_replaced_timestamps_with_dst(self): + # GH 18785 + index = pd.date_range( + pd.Timestamp(2000, 1, 1), + pd.Timestamp(2005, 1, 1), + freq="MS", + tz="Australia/Melbourne", + ) + test = pd.DataFrame({"data": range(len(index))}, index=index) + test = test.resample("Y").mean() + result = pd.DatetimeIndex([x.replace(month=6, day=1) for x in test.index]) + expected = pd.DatetimeIndex( + [ + "2000-06-01 00:00:00", + "2001-06-01 00:00:00", + "2002-06-01 00:00:00", + "2003-06-01 00:00:00", + "2004-06-01 00:00:00", + "2005-06-01 00:00:00", + ], + tz="Australia/Melbourne", + ) + tm.assert_index_equal(result, expected) + + def test_construction_with_tz_and_tz_aware_dti(self): + # GH 23579 + dti = date_range("2016-01-01", periods=3, tz="US/Central") + with pytest.raises(TypeError): + DatetimeIndex(dti, tz="Asia/Tokyo") + + def test_construction_with_nat_and_tzlocal(self): + tz = dateutil.tz.tzlocal() + result = DatetimeIndex(["2018", "NaT"], tz=tz) + expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT]) + tm.assert_index_equal(result, expected) + + def test_constructor_no_precision_raises(self): + # GH-24753, GH-24739 + + msg = "with no precision is not allowed" + with pytest.raises(ValueError, match=msg): + pd.DatetimeIndex(["2000"], dtype="datetime64") + + with pytest.raises(ValueError, match=msg): + pd.Index(["2000"], dtype="datetime64") + + def test_constructor_wrong_precision_raises(self): + with pytest.raises(ValueError): + pd.DatetimeIndex(["2000"], dtype="datetime64[us]") + + def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self): + # GH 27011 + result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object)) + expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT]) + tm.assert_index_equal(result, expected) + + +class TestTimeSeries: + def test_dti_constructor_preserve_dti_freq(self): + rng = date_range("1/1/2000", "1/2/2000", freq="5min") + + rng2 = DatetimeIndex(rng) + assert rng.freq == rng2.freq + + def test_dti_constructor_years_only(self, tz_naive_fixture): + tz = tz_naive_fixture + # GH 6961 + rng1 = date_range("2014", "2015", freq="M", tz=tz) + expected1 = date_range("2014-01-31", "2014-12-31", freq="M", tz=tz) + + rng2 = date_range("2014", "2015", freq="MS", tz=tz) + expected2 = date_range("2014-01-01", "2015-01-01", freq="MS", tz=tz) + + rng3 = date_range("2014", "2020", freq="A", tz=tz) + expected3 = date_range("2014-12-31", "2019-12-31", freq="A", tz=tz) + + rng4 = date_range("2014", "2020", freq="AS", tz=tz) + expected4 = date_range("2014-01-01", "2020-01-01", freq="AS", tz=tz) + + for rng, expected in [ + (rng1, expected1), + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + tm.assert_index_equal(rng, expected) + + def test_dti_constructor_small_int(self, any_int_dtype): + # see gh-13721 + exp = DatetimeIndex( + [ + "1970-01-01 00:00:00.00000000", + "1970-01-01 00:00:00.00000001", + "1970-01-01 00:00:00.00000002", + ] + ) + + arr = np.array([0, 10, 20], dtype=any_int_dtype) + tm.assert_index_equal(DatetimeIndex(arr), exp) + + def test_ctor_str_intraday(self): + rng = DatetimeIndex(["1-1-2000 00:00:01"]) + assert rng[0].second == 1 + + def test_is_(self): + dti = date_range(start="1/1/2005", end="12/1/2005", freq="M") + assert dti.is_(dti) + assert dti.is_(dti.view()) + assert not dti.is_(dti.copy()) + + def test_index_cast_datetime64_other_units(self): + arr = np.arange(0, 100, 10, dtype=np.int64).view("M8[D]") + idx = Index(arr) + + assert (idx.values == conversion.ensure_datetime64ns(arr)).all() + + def test_constructor_int64_nocopy(self): + # GH#1624 + arr = np.arange(1000, dtype=np.int64) + index = DatetimeIndex(arr) + + arr[50:100] = -1 + assert (index.asi8[50:100] == -1).all() + + arr = np.arange(1000, dtype=np.int64) + index = DatetimeIndex(arr, copy=True) + + arr[50:100] = -1 + assert (index.asi8[50:100] != -1).all() + + @pytest.mark.parametrize( + "freq", ["M", "Q", "A", "D", "B", "BH", "T", "S", "L", "U", "H", "N", "C"] + ) + def test_from_freq_recreate_from_data(self, freq): + org = date_range(start="2001/02/01 09:00", freq=freq, periods=1) + idx = DatetimeIndex(org, freq=freq) + tm.assert_index_equal(idx, org) + + org = date_range( + start="2001/02/01 09:00", freq=freq, tz="US/Pacific", periods=1 + ) + idx = DatetimeIndex(org, freq=freq, tz="US/Pacific") + tm.assert_index_equal(idx, org) + + def test_datetimeindex_constructor_misc(self): + arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"] + msg = r"(\(')?Unknown string format(:', 'Jn 3, 2005'\))?" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(arr) + + arr = ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"] + idx1 = DatetimeIndex(arr) + + arr = [datetime(2005, 1, 1), "1/2/2005", "1/3/2005", "2005-01-04"] + idx2 = DatetimeIndex(arr) + + arr = [Timestamp(datetime(2005, 1, 1)), "1/2/2005", "1/3/2005", "2005-01-04"] + idx3 = DatetimeIndex(arr) + + arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O") + idx4 = DatetimeIndex(arr) + + arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]) + idx5 = DatetimeIndex(arr) + + arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"]) + idx6 = DatetimeIndex(arr) + + idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True) + idx8 = DatetimeIndex( + ["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True + ) + tm.assert_index_equal(idx7, idx8) + + for other in [idx2, idx3, idx4, idx5, idx6]: + assert (idx1.values == other.values).all() + + sdate = datetime(1999, 12, 25) + edate = datetime(2000, 1, 1) + idx = date_range(start=sdate, freq="1B", periods=20) + assert len(idx) == 20 + assert idx[0] == sdate + 0 * offsets.BDay() + assert idx.freq == "B" + + idx = date_range(end=edate, freq=("D", 5), periods=20) + assert len(idx) == 20 + assert idx[-1] == edate + assert idx.freq == "5D" + + idx1 = date_range(start=sdate, end=edate, freq="W-SUN") + idx2 = date_range(start=sdate, end=edate, freq=offsets.Week(weekday=6)) + assert len(idx1) == len(idx2) + assert idx1.freq == idx2.freq + + idx1 = date_range(start=sdate, end=edate, freq="QS") + idx2 = date_range( + start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1) + ) + assert len(idx1) == len(idx2) + assert idx1.freq == idx2.freq + + idx1 = date_range(start=sdate, end=edate, freq="BQ") + idx2 = date_range( + start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12) + ) + assert len(idx1) == len(idx2) + assert idx1.freq == idx2.freq diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py new file mode 100644 index 00000000..4d0beecb --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -0,0 +1,963 @@ +""" +test date_range, bdate_range construction from the convenience range functions +""" + +from datetime import datetime, time, timedelta + +import numpy as np +import pytest +import pytz +from pytz import timezone + +from pandas.errors import OutOfBoundsDatetime +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DatetimeIndex, Timestamp, bdate_range, date_range, offsets +import pandas._testing as tm + +from pandas.tseries.offsets import ( + BDay, + CDay, + DateOffset, + MonthEnd, + generate_range, + prefix_mapping, +) + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +class TestTimestampEquivDateRange: + # Older tests in TestTimeSeries constructed their `stamp` objects + # using `date_range` instead of the `Timestamp` constructor. + # TestTimestampEquivDateRange checks that these are equivalent in the + # pertinent cases. + + def test_date_range_timestamp_equiv(self): + rng = date_range("20090415", "20090519", tz="US/Eastern") + stamp = rng[0] + + ts = Timestamp("20090415", tz="US/Eastern", freq="D") + assert ts == stamp + + def test_date_range_timestamp_equiv_dateutil(self): + rng = date_range("20090415", "20090519", tz="dateutil/US/Eastern") + stamp = rng[0] + + ts = Timestamp("20090415", tz="dateutil/US/Eastern", freq="D") + assert ts == stamp + + def test_date_range_timestamp_equiv_explicit_pytz(self): + rng = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern")) + stamp = rng[0] + + ts = Timestamp("20090415", tz=pytz.timezone("US/Eastern"), freq="D") + assert ts == stamp + + @td.skip_if_windows_python_3 + def test_date_range_timestamp_equiv_explicit_dateutil(self): + from pandas._libs.tslibs.timezones import dateutil_gettz as gettz + + rng = date_range("20090415", "20090519", tz=gettz("US/Eastern")) + stamp = rng[0] + + ts = Timestamp("20090415", tz=gettz("US/Eastern"), freq="D") + assert ts == stamp + + def test_date_range_timestamp_equiv_from_datetime_instance(self): + datetime_instance = datetime(2014, 3, 4) + # build a timestamp with a frequency, since then it supports + # addition/subtraction of integers + timestamp_instance = date_range(datetime_instance, periods=1, freq="D")[0] + + ts = Timestamp(datetime_instance, freq="D") + assert ts == timestamp_instance + + def test_date_range_timestamp_equiv_preserve_frequency(self): + timestamp_instance = date_range("2014-03-05", periods=1, freq="D")[0] + ts = Timestamp("2014-03-05", freq="D") + + assert timestamp_instance == ts + + +class TestDateRanges: + def test_date_range_nat(self): + # GH#11587 + msg = "Neither `start` nor `end` can be NaT" + with pytest.raises(ValueError, match=msg): + date_range(start="2016-01-01", end=pd.NaT, freq="D") + with pytest.raises(ValueError, match=msg): + date_range(start=pd.NaT, end="2016-01-01", freq="D") + + def test_date_range_multiplication_overflow(self): + # GH#24255 + # check that overflows in calculating `addend = periods * stride` + # are caught + with tm.assert_produces_warning(None): + # we should _not_ be seeing a overflow RuntimeWarning + dti = date_range(start="1677-09-22", periods=213503, freq="D") + + assert dti[0] == Timestamp("1677-09-22") + assert len(dti) == 213503 + + msg = "Cannot generate range with" + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range("1969-05-04", periods=200000000, freq="30000D") + + def test_date_range_unsigned_overflow_handling(self): + # GH#24255 + # case where `addend = periods * stride` overflows int64 bounds + # but not uint64 bounds + dti = date_range(start="1677-09-22", end="2262-04-11", freq="D") + + dti2 = date_range(start=dti[0], periods=len(dti), freq="D") + assert dti2.equals(dti) + + dti3 = date_range(end=dti[-1], periods=len(dti), freq="D") + assert dti3.equals(dti) + + def test_date_range_int64_overflow_non_recoverable(self): + # GH#24255 + # case with start later than 1970-01-01, overflow int64 but not uint64 + msg = "Cannot generate range with" + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range(start="1970-02-01", periods=106752 * 24, freq="H") + + # case with end before 1970-01-01, overflow int64 but not uint64 + with pytest.raises(OutOfBoundsDatetime, match=msg): + date_range(end="1969-11-14", periods=106752 * 24, freq="H") + + def test_date_range_int64_overflow_stride_endpoint_different_signs(self): + # cases where stride * periods overflow int64 and stride/endpoint + # have different signs + start = Timestamp("2262-02-23") + end = Timestamp("1969-11-14") + + expected = date_range(start=start, end=end, freq="-1H") + assert expected[0] == start + assert expected[-1] == end + + dti = date_range(end=end, periods=len(expected), freq="-1H") + tm.assert_index_equal(dti, expected) + + start2 = Timestamp("1970-02-01") + end2 = Timestamp("1677-10-22") + + expected2 = date_range(start=start2, end=end2, freq="-1H") + assert expected2[0] == start2 + assert expected2[-1] == end2 + + dti2 = date_range(start=start2, periods=len(expected2), freq="-1H") + tm.assert_index_equal(dti2, expected2) + + def test_date_range_out_of_bounds(self): + # GH#14187 + with pytest.raises(OutOfBoundsDatetime): + date_range("2016-01-01", periods=100000, freq="D") + with pytest.raises(OutOfBoundsDatetime): + date_range(end="1763-10-12", periods=100000, freq="D") + + def test_date_range_gen_error(self): + rng = date_range("1/1/2000 00:00", "1/1/2000 00:18", freq="5min") + assert len(rng) == 4 + + @pytest.mark.parametrize("freq", ["AS", "YS"]) + def test_begin_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = pd.DatetimeIndex( + ["2013-01-01", "2014-01-01", "2015-01-01", "2016-01-01", "2017-01-01"], + freq=freq, + ) + tm.assert_index_equal(rng, exp) + + @pytest.mark.parametrize("freq", ["A", "Y"]) + def test_end_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = pd.DatetimeIndex( + ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-31"], freq=freq + ) + tm.assert_index_equal(rng, exp) + + @pytest.mark.parametrize("freq", ["BA", "BY"]) + def test_business_end_year_alias(self, freq): + # see gh-9313 + rng = date_range("1/1/2013", "7/1/2017", freq=freq) + exp = pd.DatetimeIndex( + ["2013-12-31", "2014-12-31", "2015-12-31", "2016-12-30"], freq=freq + ) + tm.assert_index_equal(rng, exp) + + def test_date_range_negative_freq(self): + # GH 11018 + rng = date_range("2011-12-31", freq="-2A", periods=3) + exp = pd.DatetimeIndex(["2011-12-31", "2009-12-31", "2007-12-31"], freq="-2A") + tm.assert_index_equal(rng, exp) + assert rng.freq == "-2A" + + rng = date_range("2011-01-31", freq="-2M", periods=3) + exp = pd.DatetimeIndex(["2011-01-31", "2010-11-30", "2010-09-30"], freq="-2M") + tm.assert_index_equal(rng, exp) + assert rng.freq == "-2M" + + def test_date_range_bms_bug(self): + # #1645 + rng = date_range("1/1/2000", periods=10, freq="BMS") + + ex_first = Timestamp("2000-01-03") + assert rng[0] == ex_first + + def test_date_range_normalize(self): + snap = datetime.today() + n = 50 + + rng = date_range(snap, periods=n, normalize=False, freq="2D") + + offset = timedelta(2) + values = DatetimeIndex([snap + i * offset for i in range(n)]) + + tm.assert_index_equal(rng, values) + + rng = date_range("1/1/2000 08:15", periods=n, normalize=False, freq="B") + the_time = time(8, 15) + for val in rng: + assert val.time() == the_time + + def test_date_range_fy5252(self): + dr = date_range( + start="2013-01-01", + periods=2, + freq=offsets.FY5253(startingMonth=1, weekday=3, variation="nearest"), + ) + assert dr[0] == Timestamp("2013-01-31") + assert dr[1] == Timestamp("2014-01-30") + + def test_date_range_ambiguous_arguments(self): + # #2538 + start = datetime(2011, 1, 1, 5, 3, 40) + end = datetime(2011, 1, 1, 8, 9, 40) + + msg = ( + "Of the four parameters: start, end, periods, and " + "freq, exactly three must be specified" + ) + with pytest.raises(ValueError, match=msg): + date_range(start, end, periods=10, freq="s") + + def test_date_range_convenience_periods(self): + # GH 20808 + result = date_range("2018-04-24", "2018-04-27", periods=3) + expected = DatetimeIndex( + ["2018-04-24 00:00:00", "2018-04-25 12:00:00", "2018-04-27 00:00:00"], + freq=None, + ) + + tm.assert_index_equal(result, expected) + + # Test if spacing remains linear if tz changes to dst in range + result = date_range( + "2018-04-01 01:00:00", + "2018-04-01 04:00:00", + tz="Australia/Sydney", + periods=3, + ) + expected = DatetimeIndex( + [ + Timestamp("2018-04-01 01:00:00+1100", tz="Australia/Sydney"), + Timestamp("2018-04-01 02:00:00+1000", tz="Australia/Sydney"), + Timestamp("2018-04-01 04:00:00+1000", tz="Australia/Sydney"), + ] + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start,end,result_tz", + [ + ["20180101", "20180103", "US/Eastern"], + [datetime(2018, 1, 1), datetime(2018, 1, 3), "US/Eastern"], + [Timestamp("20180101"), Timestamp("20180103"), "US/Eastern"], + [ + Timestamp("20180101", tz="US/Eastern"), + Timestamp("20180103", tz="US/Eastern"), + "US/Eastern", + ], + [ + Timestamp("20180101", tz="US/Eastern"), + Timestamp("20180103", tz="US/Eastern"), + None, + ], + ], + ) + def test_date_range_linspacing_tz(self, start, end, result_tz): + # GH 20983 + result = date_range(start, end, periods=3, tz=result_tz) + expected = date_range("20180101", periods=3, freq="D", tz="US/Eastern") + tm.assert_index_equal(result, expected) + + def test_date_range_businesshour(self): + idx = DatetimeIndex( + [ + "2014-07-04 09:00", + "2014-07-04 10:00", + "2014-07-04 11:00", + "2014-07-04 12:00", + "2014-07-04 13:00", + "2014-07-04 14:00", + "2014-07-04 15:00", + "2014-07-04 16:00", + ], + freq="BH", + ) + rng = date_range("2014-07-04 09:00", "2014-07-04 16:00", freq="BH") + tm.assert_index_equal(idx, rng) + + idx = DatetimeIndex(["2014-07-04 16:00", "2014-07-07 09:00"], freq="BH") + rng = date_range("2014-07-04 16:00", "2014-07-07 09:00", freq="BH") + tm.assert_index_equal(idx, rng) + + idx = DatetimeIndex( + [ + "2014-07-04 09:00", + "2014-07-04 10:00", + "2014-07-04 11:00", + "2014-07-04 12:00", + "2014-07-04 13:00", + "2014-07-04 14:00", + "2014-07-04 15:00", + "2014-07-04 16:00", + "2014-07-07 09:00", + "2014-07-07 10:00", + "2014-07-07 11:00", + "2014-07-07 12:00", + "2014-07-07 13:00", + "2014-07-07 14:00", + "2014-07-07 15:00", + "2014-07-07 16:00", + "2014-07-08 09:00", + "2014-07-08 10:00", + "2014-07-08 11:00", + "2014-07-08 12:00", + "2014-07-08 13:00", + "2014-07-08 14:00", + "2014-07-08 15:00", + "2014-07-08 16:00", + ], + freq="BH", + ) + rng = date_range("2014-07-04 09:00", "2014-07-08 16:00", freq="BH") + tm.assert_index_equal(idx, rng) + + def test_range_misspecified(self): + # GH #1095 + msg = ( + "Of the four parameters: start, end, periods, and " + "freq, exactly three must be specified" + ) + + with pytest.raises(ValueError, match=msg): + date_range(start="1/1/2000") + + with pytest.raises(ValueError, match=msg): + date_range(end="1/1/2000") + + with pytest.raises(ValueError, match=msg): + date_range(periods=10) + + with pytest.raises(ValueError, match=msg): + date_range(start="1/1/2000", freq="H") + + with pytest.raises(ValueError, match=msg): + date_range(end="1/1/2000", freq="H") + + with pytest.raises(ValueError, match=msg): + date_range(periods=10, freq="H") + + with pytest.raises(ValueError, match=msg): + date_range() + + def test_compat_replace(self): + # https://github.com/statsmodels/statsmodels/issues/3349 + # replace should take ints/longs for compat + result = date_range( + Timestamp("1960-04-01 00:00:00", freq="QS-JAN"), periods=76, freq="QS-JAN" + ) + assert len(result) == 76 + + def test_catch_infinite_loop(self): + offset = offsets.DateOffset(minute=5) + # blow up, don't loop forever + msg = "Offset did not increment date" + with pytest.raises(ValueError, match=msg): + date_range(datetime(2011, 11, 11), datetime(2011, 11, 12), freq=offset) + + @pytest.mark.parametrize("periods", (1, 2)) + def test_wom_len(self, periods): + # https://github.com/pandas-dev/pandas/issues/20517 + res = date_range(start="20110101", periods=periods, freq="WOM-1MON") + assert len(res) == periods + + def test_construct_over_dst(self): + # GH 20854 + pre_dst = Timestamp("2010-11-07 01:00:00").tz_localize( + "US/Pacific", ambiguous=True + ) + pst_dst = Timestamp("2010-11-07 01:00:00").tz_localize( + "US/Pacific", ambiguous=False + ) + expect_data = [ + Timestamp("2010-11-07 00:00:00", tz="US/Pacific"), + pre_dst, + pst_dst, + ] + expected = DatetimeIndex(expect_data) + result = date_range(start="2010-11-7", periods=3, freq="H", tz="US/Pacific") + tm.assert_index_equal(result, expected) + + def test_construct_with_different_start_end_string_format(self): + # GH 12064 + result = date_range( + "2013-01-01 00:00:00+09:00", "2013/01/01 02:00:00+09:00", freq="H" + ) + expected = DatetimeIndex( + [ + Timestamp("2013-01-01 00:00:00+09:00"), + Timestamp("2013-01-01 01:00:00+09:00"), + Timestamp("2013-01-01 02:00:00+09:00"), + ] + ) + tm.assert_index_equal(result, expected) + + def test_error_with_zero_monthends(self): + msg = r"Offset <0 \* MonthEnds> did not increment date" + with pytest.raises(ValueError, match=msg): + date_range("1/1/2000", "1/1/2001", freq=MonthEnd(0)) + + def test_range_bug(self): + # GH #770 + offset = DateOffset(months=3) + result = date_range("2011-1-1", "2012-1-31", freq=offset) + + start = datetime(2011, 1, 1) + expected = DatetimeIndex([start + i * offset for i in range(5)]) + tm.assert_index_equal(result, expected) + + def test_range_tz_pytz(self): + # see gh-2906 + tz = timezone("US/Eastern") + start = tz.localize(datetime(2011, 1, 1)) + end = tz.localize(datetime(2011, 1, 3)) + + dr = date_range(start=start, periods=3) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end + + dr = date_range(end=end, periods=3) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end + + dr = date_range(start=start, end=end) + assert dr.tz.zone == tz.zone + assert dr[0] == start + assert dr[2] == end + + @pytest.mark.parametrize( + "start, end", + [ + [ + Timestamp(datetime(2014, 3, 6), tz="US/Eastern"), + Timestamp(datetime(2014, 3, 12), tz="US/Eastern"), + ], + [ + Timestamp(datetime(2013, 11, 1), tz="US/Eastern"), + Timestamp(datetime(2013, 11, 6), tz="US/Eastern"), + ], + ], + ) + def test_range_tz_dst_straddle_pytz(self, start, end): + dr = date_range(start, end, freq="D") + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + dr = date_range(start, end, freq="D", tz="US/Eastern") + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + dr = date_range( + start.replace(tzinfo=None), + end.replace(tzinfo=None), + freq="D", + tz="US/Eastern", + ) + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + def test_range_tz_dateutil(self): + # see gh-2906 + + # Use maybe_get_tz to fix filename in tz under dateutil. + from pandas._libs.tslibs.timezones import maybe_get_tz + + tz = lambda x: maybe_get_tz("dateutil/" + x) + + start = datetime(2011, 1, 1, tzinfo=tz("US/Eastern")) + end = datetime(2011, 1, 3, tzinfo=tz("US/Eastern")) + + dr = date_range(start=start, periods=3) + assert dr.tz == tz("US/Eastern") + assert dr[0] == start + assert dr[2] == end + + dr = date_range(end=end, periods=3) + assert dr.tz == tz("US/Eastern") + assert dr[0] == start + assert dr[2] == end + + dr = date_range(start=start, end=end) + assert dr.tz == tz("US/Eastern") + assert dr[0] == start + assert dr[2] == end + + @pytest.mark.parametrize("freq", ["1D", "3D", "2M", "7W", "3H", "A"]) + def test_range_closed(self, freq): + begin = datetime(2011, 1, 1) + end = datetime(2014, 1, 1) + + closed = date_range(begin, end, closed=None, freq=freq) + left = date_range(begin, end, closed="left", freq=freq) + right = date_range(begin, end, closed="right", freq=freq) + expected_left = left + expected_right = right + + if end == closed[-1]: + expected_left = closed[:-1] + if begin == closed[0]: + expected_right = closed[1:] + + tm.assert_index_equal(expected_left, left) + tm.assert_index_equal(expected_right, right) + + def test_range_closed_with_tz_aware_start_end(self): + # GH12409, GH12684 + begin = Timestamp("2011/1/1", tz="US/Eastern") + end = Timestamp("2014/1/1", tz="US/Eastern") + + for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: + closed = date_range(begin, end, closed=None, freq=freq) + left = date_range(begin, end, closed="left", freq=freq) + right = date_range(begin, end, closed="right", freq=freq) + expected_left = left + expected_right = right + + if end == closed[-1]: + expected_left = closed[:-1] + if begin == closed[0]: + expected_right = closed[1:] + + tm.assert_index_equal(expected_left, left) + tm.assert_index_equal(expected_right, right) + + begin = Timestamp("2011/1/1") + end = Timestamp("2014/1/1") + begintz = Timestamp("2011/1/1", tz="US/Eastern") + endtz = Timestamp("2014/1/1", tz="US/Eastern") + + for freq in ["1D", "3D", "2M", "7W", "3H", "A"]: + closed = date_range(begin, end, closed=None, freq=freq, tz="US/Eastern") + left = date_range(begin, end, closed="left", freq=freq, tz="US/Eastern") + right = date_range(begin, end, closed="right", freq=freq, tz="US/Eastern") + expected_left = left + expected_right = right + + if endtz == closed[-1]: + expected_left = closed[:-1] + if begintz == closed[0]: + expected_right = closed[1:] + + tm.assert_index_equal(expected_left, left) + tm.assert_index_equal(expected_right, right) + + @pytest.mark.parametrize("closed", ["right", "left", None]) + def test_range_closed_boundary(self, closed): + # GH#11804 + right_boundary = date_range( + "2015-09-12", "2015-12-01", freq="QS-MAR", closed=closed + ) + left_boundary = date_range( + "2015-09-01", "2015-09-12", freq="QS-MAR", closed=closed + ) + both_boundary = date_range( + "2015-09-01", "2015-12-01", freq="QS-MAR", closed=closed + ) + expected_right = expected_left = expected_both = both_boundary + + if closed == "right": + expected_left = both_boundary[1:] + if closed == "left": + expected_right = both_boundary[:-1] + if closed is None: + expected_right = both_boundary[1:] + expected_left = both_boundary[:-1] + + tm.assert_index_equal(right_boundary, expected_right) + tm.assert_index_equal(left_boundary, expected_left) + tm.assert_index_equal(both_boundary, expected_both) + + def test_years_only(self): + # GH 6961 + dr = date_range("2014", "2015", freq="M") + assert dr[0] == datetime(2014, 1, 31) + assert dr[-1] == datetime(2014, 12, 31) + + def test_freq_divides_end_in_nanos(self): + # GH 10885 + result_1 = date_range("2005-01-12 10:00", "2005-01-12 16:00", freq="345min") + result_2 = date_range("2005-01-13 10:00", "2005-01-13 16:00", freq="345min") + expected_1 = DatetimeIndex( + ["2005-01-12 10:00:00", "2005-01-12 15:45:00"], + dtype="datetime64[ns]", + freq="345T", + tz=None, + ) + expected_2 = DatetimeIndex( + ["2005-01-13 10:00:00", "2005-01-13 15:45:00"], + dtype="datetime64[ns]", + freq="345T", + tz=None, + ) + tm.assert_index_equal(result_1, expected_1) + tm.assert_index_equal(result_2, expected_2) + + def test_cached_range_bug(self): + rng = date_range("2010-09-01 05:00:00", periods=50, freq=DateOffset(hours=6)) + assert len(rng) == 50 + assert rng[0] == datetime(2010, 9, 1, 5) + + def test_timezone_comparaison_bug(self): + # smoke test + start = Timestamp("20130220 10:00", tz="US/Eastern") + result = date_range(start, periods=2, tz="US/Eastern") + assert len(result) == 2 + + def test_timezone_comparaison_assert(self): + start = Timestamp("20130220 10:00", tz="US/Eastern") + msg = "Inferred time zone not equal to passed time zone" + with pytest.raises(AssertionError, match=msg): + date_range(start, periods=2, tz="Europe/Berlin") + + def test_negative_non_tick_frequency_descending_dates(self, tz_aware_fixture): + # GH 23270 + tz = tz_aware_fixture + result = pd.date_range(start="2011-06-01", end="2011-01-01", freq="-1MS", tz=tz) + expected = pd.date_range( + end="2011-06-01", start="2011-01-01", freq="1MS", tz=tz + )[::-1] + tm.assert_index_equal(result, expected) + + +class TestGenRangeGeneration: + def test_generate(self): + rng1 = list(generate_range(START, END, offset=BDay())) + rng2 = list(generate_range(START, END, offset="B")) + assert rng1 == rng2 + + def test_generate_cday(self): + rng1 = list(generate_range(START, END, offset=CDay())) + rng2 = list(generate_range(START, END, offset="C")) + assert rng1 == rng2 + + def test_1(self): + rng = list(generate_range(start=datetime(2009, 3, 25), periods=2)) + expected = [datetime(2009, 3, 25), datetime(2009, 3, 26)] + assert rng == expected + + def test_2(self): + rng = list(generate_range(start=datetime(2008, 1, 1), end=datetime(2008, 1, 3))) + expected = [datetime(2008, 1, 1), datetime(2008, 1, 2), datetime(2008, 1, 3)] + assert rng == expected + + def test_3(self): + rng = list(generate_range(start=datetime(2008, 1, 5), end=datetime(2008, 1, 6))) + expected = [] + assert rng == expected + + def test_precision_finer_than_offset(self): + # GH#9907 + result1 = pd.date_range( + start="2015-04-15 00:00:03", end="2016-04-22 00:00:00", freq="Q" + ) + result2 = pd.date_range( + start="2015-04-15 00:00:03", end="2015-06-22 00:00:04", freq="W" + ) + expected1_list = [ + "2015-06-30 00:00:03", + "2015-09-30 00:00:03", + "2015-12-31 00:00:03", + "2016-03-31 00:00:03", + ] + expected2_list = [ + "2015-04-19 00:00:03", + "2015-04-26 00:00:03", + "2015-05-03 00:00:03", + "2015-05-10 00:00:03", + "2015-05-17 00:00:03", + "2015-05-24 00:00:03", + "2015-05-31 00:00:03", + "2015-06-07 00:00:03", + "2015-06-14 00:00:03", + "2015-06-21 00:00:03", + ] + expected1 = DatetimeIndex( + expected1_list, dtype="datetime64[ns]", freq="Q-DEC", tz=None + ) + expected2 = DatetimeIndex( + expected2_list, dtype="datetime64[ns]", freq="W-SUN", tz=None + ) + tm.assert_index_equal(result1, expected1) + tm.assert_index_equal(result2, expected2) + + dt1, dt2 = "2017-01-01", "2017-01-01" + tz1, tz2 = "US/Eastern", "Europe/London" + + @pytest.mark.parametrize( + "start,end", + [ + (pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2)), + (pd.Timestamp(dt1), pd.Timestamp(dt2, tz=tz2)), + (pd.Timestamp(dt1, tz=tz1), pd.Timestamp(dt2, tz=tz2)), + (pd.Timestamp(dt1, tz=tz2), pd.Timestamp(dt2, tz=tz1)), + ], + ) + def test_mismatching_tz_raises_err(self, start, end): + # issue 18488 + with pytest.raises(TypeError): + pd.date_range(start, end) + with pytest.raises(TypeError): + pd.date_range(start, end, freq=BDay()) + + +class TestBusinessDateRange: + def test_constructor(self): + bdate_range(START, END, freq=BDay()) + bdate_range(START, periods=20, freq=BDay()) + bdate_range(end=START, periods=20, freq=BDay()) + + msg = "periods must be a number, got B" + with pytest.raises(TypeError, match=msg): + date_range("2011-1-1", "2012-1-1", "B") + + with pytest.raises(TypeError, match=msg): + bdate_range("2011-1-1", "2012-1-1", "B") + + msg = "freq must be specified for bdate_range; use date_range instead" + with pytest.raises(TypeError, match=msg): + bdate_range(START, END, periods=10, freq=None) + + def test_naive_aware_conflicts(self): + naive = bdate_range(START, END, freq=BDay(), tz=None) + aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") + + msg = "tz-naive.*tz-aware" + with pytest.raises(TypeError, match=msg): + naive.join(aware) + + with pytest.raises(TypeError, match=msg): + aware.join(naive) + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = bdate_range(end=end, periods=20) + firstDate = end - 19 * BDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_date_parse_failure(self): + badly_formed_date = "2007/100/1" + + with pytest.raises(ValueError): + Timestamp(badly_formed_date) + + with pytest.raises(ValueError): + bdate_range(start=badly_formed_date, periods=10) + + with pytest.raises(ValueError): + bdate_range(end=badly_formed_date, periods=10) + + with pytest.raises(ValueError): + bdate_range(badly_formed_date, badly_formed_date) + + def test_daterange_bug_456(self): + # GH #456 + rng1 = bdate_range("12/5/2011", "12/5/2011") + rng2 = bdate_range("12/2/2011", "12/5/2011") + rng2._data.freq = BDay() # TODO: shouldn't this already be set? + + result = rng1.union(rng2) + assert isinstance(result, DatetimeIndex) + + @pytest.mark.parametrize("closed", ["left", "right"]) + def test_bdays_and_open_boundaries(self, closed): + # GH 6673 + start = "2018-07-21" # Saturday + end = "2018-07-29" # Sunday + result = pd.date_range(start, end, freq="B", closed=closed) + + bday_start = "2018-07-23" # Monday + bday_end = "2018-07-27" # Friday + expected = pd.date_range(bday_start, bday_end, freq="D") + tm.assert_index_equal(result, expected) + + def test_bday_near_overflow(self): + # GH#24252 avoid doing unnecessary addition that _would_ overflow + start = pd.Timestamp.max.floor("D").to_pydatetime() + rng = pd.date_range(start, end=None, periods=1, freq="B") + expected = pd.DatetimeIndex([start], freq="B") + tm.assert_index_equal(rng, expected) + + def test_bday_overflow_error(self): + # GH#24252 check that we get OutOfBoundsDatetime and not OverflowError + start = pd.Timestamp.max.floor("D").to_pydatetime() + with pytest.raises(OutOfBoundsDatetime): + pd.date_range(start, periods=2, freq="B") + + +class TestCustomDateRange: + def test_constructor(self): + bdate_range(START, END, freq=CDay()) + bdate_range(START, periods=20, freq=CDay()) + bdate_range(end=START, periods=20, freq=CDay()) + + msg = "periods must be a number, got C" + with pytest.raises(TypeError, match=msg): + date_range("2011-1-1", "2012-1-1", "C") + + with pytest.raises(TypeError, match=msg): + bdate_range("2011-1-1", "2012-1-1", "C") + + def test_misc(self): + end = datetime(2009, 5, 13) + dr = bdate_range(end=end, periods=20, freq="C") + firstDate = end - 19 * CDay() + + assert len(dr) == 20 + assert dr[0] == firstDate + assert dr[-1] == end + + def test_daterange_bug_456(self): + # GH #456 + rng1 = bdate_range("12/5/2011", "12/5/2011", freq="C") + rng2 = bdate_range("12/2/2011", "12/5/2011", freq="C") + rng2._data.freq = CDay() # TODO: shouldn't this already be set? + + result = rng1.union(rng2) + assert isinstance(result, DatetimeIndex) + + def test_cdaterange(self): + result = bdate_range("2013-05-01", periods=3, freq="C") + expected = DatetimeIndex(["2013-05-01", "2013-05-02", "2013-05-03"]) + tm.assert_index_equal(result, expected) + + def test_cdaterange_weekmask(self): + result = bdate_range( + "2013-05-01", periods=3, freq="C", weekmask="Sun Mon Tue Wed Thu" + ) + expected = DatetimeIndex(["2013-05-01", "2013-05-02", "2013-05-05"]) + tm.assert_index_equal(result, expected) + + # raise with non-custom freq + msg = ( + "a custom frequency string is required when holidays or " + "weekmask are passed, got frequency B" + ) + with pytest.raises(ValueError, match=msg): + bdate_range("2013-05-01", periods=3, weekmask="Sun Mon Tue Wed Thu") + + def test_cdaterange_holidays(self): + result = bdate_range("2013-05-01", periods=3, freq="C", holidays=["2013-05-01"]) + expected = DatetimeIndex(["2013-05-02", "2013-05-03", "2013-05-06"]) + tm.assert_index_equal(result, expected) + + # raise with non-custom freq + msg = ( + "a custom frequency string is required when holidays or " + "weekmask are passed, got frequency B" + ) + with pytest.raises(ValueError, match=msg): + bdate_range("2013-05-01", periods=3, holidays=["2013-05-01"]) + + def test_cdaterange_weekmask_and_holidays(self): + result = bdate_range( + "2013-05-01", + periods=3, + freq="C", + weekmask="Sun Mon Tue Wed Thu", + holidays=["2013-05-01"], + ) + expected = DatetimeIndex(["2013-05-02", "2013-05-05", "2013-05-06"]) + tm.assert_index_equal(result, expected) + + # raise with non-custom freq + msg = ( + "a custom frequency string is required when holidays or " + "weekmask are passed, got frequency B" + ) + with pytest.raises(ValueError, match=msg): + bdate_range( + "2013-05-01", + periods=3, + weekmask="Sun Mon Tue Wed Thu", + holidays=["2013-05-01"], + ) + + @pytest.mark.parametrize( + "freq", [freq for freq in prefix_mapping if freq.startswith("C")] + ) + def test_all_custom_freq(self, freq): + # should not raise + bdate_range( + START, END, freq=freq, weekmask="Mon Wed Fri", holidays=["2009-03-14"] + ) + + bad_freq = freq + "FOO" + msg = "invalid custom frequency string: {freq}" + with pytest.raises(ValueError, match=msg.format(freq=bad_freq)): + bdate_range(START, END, freq=bad_freq) + + @pytest.mark.parametrize( + "start_end", + [ + ("2018-01-01T00:00:01.000Z", "2018-01-03T00:00:01.000Z"), + ("2018-01-01T00:00:00.010Z", "2018-01-03T00:00:00.010Z"), + ("2001-01-01T00:00:00.010Z", "2001-01-03T00:00:00.010Z"), + ], + ) + def test_range_with_millisecond_resolution(self, start_end): + # https://github.com/pandas-dev/pandas/issues/24110 + start, end = start_end + result = pd.date_range(start=start, end=end, periods=2, closed="left") + expected = DatetimeIndex([start]) + tm.assert_index_equal(result, expected) + + +def test_date_range_with_custom_holidays(): + # GH 30593 + freq = pd.offsets.CustomBusinessHour(start="15:00", holidays=["2020-11-26"]) + result = pd.date_range(start="2020-11-25 15:00", periods=4, freq=freq) + expected = pd.DatetimeIndex( + [ + "2020-11-25 15:00:00", + "2020-11-25 16:00:00", + "2020-11-27 15:00:00", + "2020-11-27 16:00:00", + ], + freq=freq, + ) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py new file mode 100644 index 00000000..ca18d6fb --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -0,0 +1,445 @@ +from datetime import date + +import dateutil +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, DatetimeIndex, Index, Timestamp, date_range, offsets +import pandas._testing as tm + +randn = np.random.randn + + +class TestDatetimeIndex: + def test_roundtrip_pickle_with_tz(self): + + # GH 8367 + # round-trip of timezone + index = date_range("20130101", periods=3, tz="US/Eastern", name="foo") + unpickled = tm.round_trip_pickle(index) + tm.assert_index_equal(index, unpickled) + + def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self): + # GH7774 + index = date_range("20130101", periods=3, tz="US/Eastern") + assert str(index.reindex([])[0].tz) == "US/Eastern" + assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern" + + def test_time_loc(self): # GH8667 + from datetime import time + from pandas._libs.index import _SIZE_CUTOFF + + ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) + key = time(15, 11, 30) + start = key.hour * 3600 + key.minute * 60 + key.second + step = 24 * 3600 + + for n in ns: + idx = pd.date_range("2014-11-26", periods=n, freq="S") + ts = pd.Series(np.random.randn(n), index=idx) + i = np.arange(start, n, step) + + tm.assert_numpy_array_equal(ts.index.get_loc(key), i, check_dtype=False) + tm.assert_series_equal(ts[key], ts.iloc[i]) + + left, right = ts.copy(), ts.copy() + left[key] *= -10 + right.iloc[i] *= -10 + tm.assert_series_equal(left, right) + + def test_time_overflow_for_32bit_machines(self): + # GH8943. On some machines NumPy defaults to np.int32 (for example, + # 32-bit Linux machines). In the function _generate_regular_range + # found in tseries/index.py, `periods` gets multiplied by `strides` + # (which has value 1e9) and since the max value for np.int32 is ~2e9, + # and since those machines won't promote np.int32 to np.int64, we get + # overflow. + periods = np.int_(1000) + + idx1 = pd.date_range(start="2000", periods=periods, freq="S") + assert len(idx1) == periods + + idx2 = pd.date_range(end="2000", periods=periods, freq="S") + assert len(idx2) == periods + + def test_nat(self): + assert DatetimeIndex([np.nan])[0] is pd.NaT + + def test_week_of_month_frequency(self): + # GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise + d1 = date(2002, 9, 1) + d2 = date(2013, 10, 27) + d3 = date(2012, 9, 30) + idx1 = DatetimeIndex([d1, d2]) + idx2 = DatetimeIndex([d3]) + result_append = idx1.append(idx2) + expected = DatetimeIndex([d1, d2, d3]) + tm.assert_index_equal(result_append, expected) + result_union = idx1.union(idx2) + expected = DatetimeIndex([d1, d3, d2]) + tm.assert_index_equal(result_union, expected) + + # GH 5115 + result = date_range("2013-1-1", periods=4, freq="WOM-1SAT") + dates = ["2013-01-05", "2013-02-02", "2013-03-02", "2013-04-06"] + expected = DatetimeIndex(dates, freq="WOM-1SAT") + tm.assert_index_equal(result, expected) + + def test_hash_error(self): + index = date_range("20010101", periods=10) + with pytest.raises( + TypeError, match=f"unhashable type: '{type(index).__name__}'" + ): + hash(index) + + def test_stringified_slice_with_tz(self): + # GH#2658 + start = "2013-01-07" + idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern") + df = DataFrame(np.arange(10), index=idx) + df["2013-01-14 23:44:34.437768-05:00":] # no exception here + + def test_append_join_nondatetimeindex(self): + rng = date_range("1/1/2000", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.append(idx) + assert isinstance(result[0], Timestamp) + + # it works + rng.join(idx, how="outer") + + def test_map(self): + rng = date_range("1/1/2000", periods=10) + + f = lambda x: x.strftime("%Y%m%d") + result = rng.map(f) + exp = Index([f(x) for x in rng], dtype="= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + def test_take_fill_value_with_timezone(self): + idx = pd.DatetimeIndex( + ["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern" + ) + result = idx.take(np.array([1, 0, -1])) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern" + ) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern" + ) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern" + ) + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + +class TestGetLoc: + def test_get_loc_tz_aware(self): + # https://github.com/pandas-dev/pandas/issues/32140 + dti = pd.date_range( + pd.Timestamp("2019-12-12 00:00:00", tz="US/Eastern"), + pd.Timestamp("2019-12-13 00:00:00", tz="US/Eastern"), + freq="5s", + ) + key = pd.Timestamp("2019-12-12 10:19:25", tz="US/Eastern") + result = dti.get_loc(key, method="nearest") + assert result == 7433 + + +class TestDatetimeIndex: + @pytest.mark.parametrize( + "null", [None, np.nan, np.datetime64("NaT"), pd.NaT, pd.NA] + ) + @pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"]) + def test_insert_nat(self, tz, null): + # GH#16537, GH#18295 (test missing) + idx = pd.DatetimeIndex(["2017-01-01"], tz=tz) + expected = pd.DatetimeIndex(["NaT", "2017-01-01"], tz=tz) + res = idx.insert(0, null) + tm.assert_index_equal(res, expected) + + @pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"]) + def test_insert_invalid_na(self, tz): + idx = pd.DatetimeIndex(["2017-01-01"], tz=tz) + with pytest.raises(TypeError, match="incompatible label"): + idx.insert(0, np.timedelta64("NaT")) + + def test_insert(self): + idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"], name="idx") + + result = idx.insert(2, datetime(2000, 1, 5)) + exp = DatetimeIndex( + ["2000-01-04", "2000-01-01", "2000-01-05", "2000-01-02"], name="idx" + ) + tm.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, "inserted") + expected = Index( + [ + datetime(2000, 1, 4), + "inserted", + datetime(2000, 1, 1), + datetime(2000, 1, 2), + ], + name="idx", + ) + assert not isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + idx = date_range("1/1/2000", periods=3, freq="M", name="idx") + + # preserve freq + expected_0 = DatetimeIndex( + ["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"], + name="idx", + freq="M", + ) + expected_3 = DatetimeIndex( + ["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"], + name="idx", + freq="M", + ) + + # reset freq to None + expected_1_nofreq = DatetimeIndex( + ["2000-01-31", "2000-01-31", "2000-02-29", "2000-03-31"], + name="idx", + freq=None, + ) + expected_3_nofreq = DatetimeIndex( + ["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"], + name="idx", + freq=None, + ) + + cases = [ + (0, datetime(1999, 12, 31), expected_0), + (-3, datetime(1999, 12, 31), expected_0), + (3, datetime(2000, 4, 30), expected_3), + (1, datetime(2000, 1, 31), expected_1_nofreq), + (3, datetime(2000, 1, 2), expected_3_nofreq), + ] + + for n, d, expected in cases: + result = idx.insert(n, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + # reset freq to None + result = idx.insert(3, datetime(2000, 1, 2)) + expected = DatetimeIndex( + ["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"], + name="idx", + freq=None, + ) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq is None + + # see gh-7299 + idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") + with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"): + idx.insert(3, pd.Timestamp("2000-01-04")) + with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"): + idx.insert(3, datetime(2000, 1, 4)) + with pytest.raises(ValueError): + idx.insert(3, pd.Timestamp("2000-01-04", tz="US/Eastern")) + with pytest.raises(ValueError): + idx.insert(3, datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern"))) + + for tz in ["US/Pacific", "Asia/Singapore"]: + idx = date_range("1/1/2000 09:00", periods=6, freq="H", tz=tz, name="idx") + # preserve freq + expected = date_range( + "1/1/2000 09:00", periods=7, freq="H", tz=tz, name="idx" + ) + for d in [ + pd.Timestamp("2000-01-01 15:00", tz=tz), + pytz.timezone(tz).localize(datetime(2000, 1, 1, 15)), + ]: + + result = idx.insert(6, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz + + expected = DatetimeIndex( + [ + "2000-01-01 09:00", + "2000-01-01 10:00", + "2000-01-01 11:00", + "2000-01-01 12:00", + "2000-01-01 13:00", + "2000-01-01 14:00", + "2000-01-01 10:00", + ], + name="idx", + tz=tz, + freq=None, + ) + # reset freq to None + for d in [ + pd.Timestamp("2000-01-01 10:00", tz=tz), + pytz.timezone(tz).localize(datetime(2000, 1, 1, 10)), + ]: + result = idx.insert(6, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.tz == expected.tz + assert result.freq is None + + def test_delete(self): + idx = date_range(start="2000-01-01", periods=5, freq="M", name="idx") + + # preserve freq + expected_0 = date_range(start="2000-02-01", periods=4, freq="M", name="idx") + expected_4 = date_range(start="2000-01-01", periods=4, freq="M", name="idx") + + # reset freq to None + expected_1 = DatetimeIndex( + ["2000-01-31", "2000-03-31", "2000-04-30", "2000-05-31"], + freq=None, + name="idx", + ) + + cases = { + 0: expected_0, + -5: expected_0, + -1: expected_4, + 4: expected_4, + 1: expected_1, + } + for n, expected in cases.items(): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + with pytest.raises((IndexError, ValueError)): + # either depending on numpy version + idx.delete(5) + + for tz in [None, "Asia/Tokyo", "US/Pacific"]: + idx = date_range( + start="2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz + ) + + expected = date_range( + start="2000-01-01 10:00", periods=9, freq="H", name="idx", tz=tz + ) + result = idx.delete(0) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freqstr == "H" + assert result.tz == expected.tz + + expected = date_range( + start="2000-01-01 09:00", periods=9, freq="H", name="idx", tz=tz + ) + result = idx.delete(-1) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freqstr == "H" + assert result.tz == expected.tz + + def test_delete_slice(self): + idx = date_range(start="2000-01-01", periods=10, freq="D", name="idx") + + # preserve freq + expected_0_2 = date_range(start="2000-01-04", periods=7, freq="D", name="idx") + expected_7_9 = date_range(start="2000-01-01", periods=7, freq="D", name="idx") + + # reset freq to None + expected_3_5 = DatetimeIndex( + [ + "2000-01-01", + "2000-01-02", + "2000-01-03", + "2000-01-07", + "2000-01-08", + "2000-01-09", + "2000-01-10", + ], + freq=None, + name="idx", + ) + + cases = { + (0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5, + } + for n, expected in cases.items(): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + result = idx.delete(slice(n[0], n[-1] + 1)) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + for tz in [None, "Asia/Tokyo", "US/Pacific"]: + ts = pd.Series( + 1, + index=pd.date_range( + "2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz + ), + ) + # preserve freq + result = ts.drop(ts.index[:5]).index + expected = pd.date_range( + "2000-01-01 14:00", periods=5, freq="H", name="idx", tz=tz + ) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz + + # reset freq to None + result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index + expected = DatetimeIndex( + [ + "2000-01-01 09:00", + "2000-01-01 11:00", + "2000-01-01 13:00", + "2000-01-01 15:00", + "2000-01-01 17:00", + ], + freq=None, + name="idx", + tz=tz, + ) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz + + def test_get_value(self): + # specifically make sure we have test for np.datetime64 key + dti = pd.date_range("2016-01-01", periods=3) + + arr = np.arange(6, 8) + + key = dti[1] + + result = dti.get_value(arr, key) + assert result == 7 + + result = dti.get_value(arr, key.to_pydatetime()) + assert result == 7 + + result = dti.get_value(arr, key.to_datetime64()) + assert result == 7 + + def test_get_loc(self): + idx = pd.date_range("2000-01-01", periods=3) + + for method in [None, "pad", "backfill", "nearest"]: + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + if method is not None: + assert ( + idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1 + ) + + assert idx.get_loc("2000-01-01", method="nearest") == 0 + assert idx.get_loc("2000-01-01T12", method="nearest") == 1 + + assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1 + assert ( + idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D")) + == 1 + ) + assert ( + idx.get_loc( + "2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D") + ) + == 1 + ) + assert ( + idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1 + ) + with pytest.raises(ValueError, match="unit abbreviation w/o a number"): + idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo") + with pytest.raises(KeyError, match="'2000-01-01T03'"): + idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours") + with pytest.raises( + ValueError, match="tolerance size must match target index size" + ): + idx.get_loc( + "2000-01-01", + method="nearest", + tolerance=[ + pd.Timedelta("1day").to_timedelta64(), + pd.Timedelta("1day").to_timedelta64(), + ], + ) + + assert idx.get_loc("2000", method="nearest") == slice(0, 3) + assert idx.get_loc("2000-01", method="nearest") == slice(0, 3) + + assert idx.get_loc("1999", method="nearest") == 0 + assert idx.get_loc("2001", method="nearest") == 2 + + with pytest.raises(KeyError, match="'1999'"): + idx.get_loc("1999", method="pad") + with pytest.raises(KeyError, match="'2001'"): + idx.get_loc("2001", method="backfill") + + with pytest.raises(KeyError, match="'foobar'"): + idx.get_loc("foobar") + with pytest.raises(TypeError): + idx.get_loc(slice(2)) + + idx = pd.to_datetime(["2000-01-01", "2000-01-04"]) + assert idx.get_loc("2000-01-02", method="nearest") == 0 + assert idx.get_loc("2000-01-03", method="nearest") == 1 + assert idx.get_loc("2000-01", method="nearest") == slice(0, 2) + + # time indexing + idx = pd.date_range("2000-01-01", periods=24, freq="H") + tm.assert_numpy_array_equal( + idx.get_loc(time(12)), np.array([12]), check_dtype=False + ) + tm.assert_numpy_array_equal( + idx.get_loc(time(12, 30)), np.array([]), check_dtype=False + ) + with pytest.raises(NotImplementedError): + idx.get_loc(time(12, 30), method="pad") + + def test_get_indexer(self): + idx = pd.date_range("2000-01-01", periods=3) + exp = np.array([0, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) + + target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")), + np.array([0, -1, 1], dtype=np.intp), + ) + tol_raw = [ + pd.Timedelta("1 hour"), + pd.Timedelta("1 hour"), + pd.Timedelta("1 hour").to_timedelta64(), + ] + tm.assert_numpy_array_equal( + idx.get_indexer( + target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw] + ), + np.array([0, -1, 1], dtype=np.intp), + ) + tol_bad = [ + pd.Timedelta("2 hour").to_timedelta64(), + pd.Timedelta("1 hour").to_timedelta64(), + "foo", + ] + with pytest.raises(ValueError, match="abbreviation w/o a number"): + idx.get_indexer(target, "nearest", tolerance=tol_bad) + with pytest.raises(ValueError): + idx.get_indexer(idx[[0]], method="nearest", tolerance="foo") + + def test_reasonable_key_error(self): + # GH#1062 + index = DatetimeIndex(["1/3/2000"]) + with pytest.raises(KeyError, match="2000"): + index.get_loc("1/1/2000") + + @pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)]) + def test_timedelta_invalid_key(self, key): + # GH#20464 + dti = pd.date_range("1970-01-01", periods=10) + with pytest.raises(TypeError): + dti.get_loc(key) + + def test_get_loc_nat(self): + # GH#20464 + index = DatetimeIndex(["1/3/2000", "NaT"]) + assert index.get_loc(pd.NaT) == 1 diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py new file mode 100644 index 00000000..f2f88fd7 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -0,0 +1,144 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import DatetimeIndex, Index, Timestamp, date_range, to_datetime +import pandas._testing as tm + +from pandas.tseries.offsets import BDay, BMonthEnd + + +class TestJoin: + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args, **kwargs: np.random.randn(), + r_idx_type="i", + c_idx_type="dt", + ) + cols = df.columns.join(df.index, how="outer") + joined = cols.join(df.columns) + assert cols.dtype == np.dtype("O") + assert cols.dtype == joined.dtype + tm.assert_numpy_array_equal(cols.values, joined.values) + + def test_join_self(self, join_type): + index = date_range("1/1/2000", periods=10) + joined = index.join(index, how=join_type) + assert index is joined + + def test_join_with_period_index(self, join_type): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args: np.random.randint(2), + c_idx_type="p", + r_idx_type="dt", + ) + s = df.iloc[:5, 0] + + expected = df.columns.astype("O").join(s.index, how=join_type) + result = df.columns.join(s.index, how=join_type) + tm.assert_index_equal(expected, result) + + def test_join_object_index(self): + rng = date_range("1/1/2000", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.join(idx, how="outer") + assert isinstance(result[0], Timestamp) + + def test_join_utc_convert(self, join_type): + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + + left = rng.tz_convert("US/Eastern") + right = rng.tz_convert("Europe/Berlin") + + result = left.join(left[:-5], how=join_type) + assert isinstance(result, DatetimeIndex) + assert result.tz == left.tz + + result = left.join(right[:-5], how=join_type) + assert isinstance(result, DatetimeIndex) + assert result.tz.zone == "UTC" + + @pytest.mark.parametrize("sort", [None, False]) + def test_datetimeindex_union_join_empty(self, sort): + dti = date_range(start="1/1/2001", end="2/1/2001", freq="D") + empty = Index([]) + + result = dti.union(empty, sort=sort) + expected = dti.astype("O") + tm.assert_index_equal(result, expected) + + result = dti.join(empty) + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, dti) + + def test_join_nonunique(self): + idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"]) + idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"]) + rs = idx1.join(idx2, how="outer") + assert rs.is_monotonic + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_outer_join(self, freq): + # should just behave as union + start, end = datetime(2009, 1, 1), datetime(2010, 1, 1) + rng = date_range(start=start, end=end, freq=freq) + + # overlapping + left = rng[:10] + right = rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = rng[:5] + right = rng[10:] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + # non-overlapping, no gap + left = rng[:5] + right = rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # overlapping, but different offset + other = date_range(start, end, freq=BMonthEnd()) + + the_join = rng.join(other, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + def test_naive_aware_conflicts(self): + start, end = datetime(2009, 1, 1), datetime(2010, 1, 1) + naive = date_range(start, end, freq=BDay(), tz=None) + aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong") + + msg = "tz-naive.*tz-aware" + with pytest.raises(TypeError, match=msg): + naive.join(aware) + + with pytest.raises(TypeError, match=msg): + aware.join(naive) + + @pytest.mark.parametrize("tz", [None, "US/Pacific"]) + def test_join_preserves_freq(self, tz): + # GH#32157 + dti = date_range("2016-01-01", periods=10, tz=tz) + result = dti[:5].join(dti[5:], how="outer") + assert result.freq == dti.freq + tm.assert_index_equal(result, dti) + + result = dti[:5].join(dti[6:], how="outer") + assert result.freq is None + expected = dti.delete(5) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py new file mode 100644 index 00000000..340f53b2 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -0,0 +1,384 @@ +import calendar +from datetime import datetime +import locale +import unicodedata + +import numpy as np +import pytest + +import pandas as pd +from pandas import DatetimeIndex, Index, Timestamp, date_range, offsets +import pandas._testing as tm + + +class TestTimeSeries: + def test_pass_datetimeindex_to_index(self): + # Bugs in #1396 + rng = date_range("1/1/2000", "3/1/2000") + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pydatetime(), dtype=object) + + tm.assert_numpy_array_equal(idx.values, expected.values) + + def test_range_edges(self): + # GH#13672 + idx = pd.date_range( + start=Timestamp("1970-01-01 00:00:00.000000001"), + end=Timestamp("1970-01-01 00:00:00.000000004"), + freq="N", + ) + exp = DatetimeIndex( + [ + "1970-01-01 00:00:00.000000001", + "1970-01-01 00:00:00.000000002", + "1970-01-01 00:00:00.000000003", + "1970-01-01 00:00:00.000000004", + ] + ) + tm.assert_index_equal(idx, exp) + + idx = pd.date_range( + start=Timestamp("1970-01-01 00:00:00.000000004"), + end=Timestamp("1970-01-01 00:00:00.000000001"), + freq="N", + ) + exp = DatetimeIndex([]) + tm.assert_index_equal(idx, exp) + + idx = pd.date_range( + start=Timestamp("1970-01-01 00:00:00.000000001"), + end=Timestamp("1970-01-01 00:00:00.000000001"), + freq="N", + ) + exp = DatetimeIndex(["1970-01-01 00:00:00.000000001"]) + tm.assert_index_equal(idx, exp) + + idx = pd.date_range( + start=Timestamp("1970-01-01 00:00:00.000001"), + end=Timestamp("1970-01-01 00:00:00.000004"), + freq="U", + ) + exp = DatetimeIndex( + [ + "1970-01-01 00:00:00.000001", + "1970-01-01 00:00:00.000002", + "1970-01-01 00:00:00.000003", + "1970-01-01 00:00:00.000004", + ] + ) + tm.assert_index_equal(idx, exp) + + idx = pd.date_range( + start=Timestamp("1970-01-01 00:00:00.001"), + end=Timestamp("1970-01-01 00:00:00.004"), + freq="L", + ) + exp = DatetimeIndex( + [ + "1970-01-01 00:00:00.001", + "1970-01-01 00:00:00.002", + "1970-01-01 00:00:00.003", + "1970-01-01 00:00:00.004", + ] + ) + tm.assert_index_equal(idx, exp) + + idx = pd.date_range( + start=Timestamp("1970-01-01 00:00:01"), + end=Timestamp("1970-01-01 00:00:04"), + freq="S", + ) + exp = DatetimeIndex( + [ + "1970-01-01 00:00:01", + "1970-01-01 00:00:02", + "1970-01-01 00:00:03", + "1970-01-01 00:00:04", + ] + ) + tm.assert_index_equal(idx, exp) + + idx = pd.date_range( + start=Timestamp("1970-01-01 00:01"), + end=Timestamp("1970-01-01 00:04"), + freq="T", + ) + exp = DatetimeIndex( + [ + "1970-01-01 00:01", + "1970-01-01 00:02", + "1970-01-01 00:03", + "1970-01-01 00:04", + ] + ) + tm.assert_index_equal(idx, exp) + + idx = pd.date_range( + start=Timestamp("1970-01-01 01:00"), + end=Timestamp("1970-01-01 04:00"), + freq="H", + ) + exp = DatetimeIndex( + [ + "1970-01-01 01:00", + "1970-01-01 02:00", + "1970-01-01 03:00", + "1970-01-01 04:00", + ] + ) + tm.assert_index_equal(idx, exp) + + idx = pd.date_range( + start=Timestamp("1970-01-01"), end=Timestamp("1970-01-04"), freq="D" + ) + exp = DatetimeIndex(["1970-01-01", "1970-01-02", "1970-01-03", "1970-01-04"]) + tm.assert_index_equal(idx, exp) + + +class TestDatetime64: + def test_datetimeindex_accessors(self): + dti_naive = pd.date_range(freq="D", start=datetime(1998, 1, 1), periods=365) + # GH#13303 + dti_tz = pd.date_range( + freq="D", start=datetime(1998, 1, 1), periods=365, tz="US/Eastern" + ) + for dti in [dti_naive, dti_tz]: + + assert dti.year[0] == 1998 + assert dti.month[0] == 1 + assert dti.day[0] == 1 + assert dti.hour[0] == 0 + assert dti.minute[0] == 0 + assert dti.second[0] == 0 + assert dti.microsecond[0] == 0 + assert dti.dayofweek[0] == 3 + + assert dti.dayofyear[0] == 1 + assert dti.dayofyear[120] == 121 + + assert dti.weekofyear[0] == 1 + assert dti.weekofyear[120] == 18 + + assert dti.quarter[0] == 1 + assert dti.quarter[120] == 2 + + assert dti.days_in_month[0] == 31 + assert dti.days_in_month[90] == 30 + + assert dti.is_month_start[0] + assert not dti.is_month_start[1] + assert dti.is_month_start[31] + assert dti.is_quarter_start[0] + assert dti.is_quarter_start[90] + assert dti.is_year_start[0] + assert not dti.is_year_start[364] + assert not dti.is_month_end[0] + assert dti.is_month_end[30] + assert not dti.is_month_end[31] + assert dti.is_month_end[364] + assert not dti.is_quarter_end[0] + assert not dti.is_quarter_end[30] + assert dti.is_quarter_end[89] + assert dti.is_quarter_end[364] + assert not dti.is_year_end[0] + assert dti.is_year_end[364] + + assert len(dti.year) == 365 + assert len(dti.month) == 365 + assert len(dti.day) == 365 + assert len(dti.hour) == 365 + assert len(dti.minute) == 365 + assert len(dti.second) == 365 + assert len(dti.microsecond) == 365 + assert len(dti.dayofweek) == 365 + assert len(dti.dayofyear) == 365 + assert len(dti.weekofyear) == 365 + assert len(dti.quarter) == 365 + assert len(dti.is_month_start) == 365 + assert len(dti.is_month_end) == 365 + assert len(dti.is_quarter_start) == 365 + assert len(dti.is_quarter_end) == 365 + assert len(dti.is_year_start) == 365 + assert len(dti.is_year_end) == 365 + + dti.name = "name" + + # non boolean accessors -> return Index + for accessor in DatetimeIndex._field_ops: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, Index) + assert res.name == "name" + + # boolean accessors -> return array + for accessor in DatetimeIndex._bool_ops: + res = getattr(dti, accessor) + assert len(res) == 365 + assert isinstance(res, np.ndarray) + + # test boolean indexing + res = dti[dti.is_quarter_start] + exp = dti[[0, 90, 181, 273]] + tm.assert_index_equal(res, exp) + res = dti[dti.is_leap_year] + exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name") + tm.assert_index_equal(res, exp) + + dti = pd.date_range(freq="BQ-FEB", start=datetime(1998, 1, 1), periods=4) + + assert sum(dti.is_quarter_start) == 0 + assert sum(dti.is_quarter_end) == 4 + assert sum(dti.is_year_start) == 0 + assert sum(dti.is_year_end) == 1 + + # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, + bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu") + dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) + msg = "Custom business days is not supported by is_month_start" + with pytest.raises(ValueError, match=msg): + dti.is_month_start + + dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"]) + + assert dti.is_month_start[0] == 1 + + tests = [ + (Timestamp("2013-06-01", freq="M").is_month_start, 1), + (Timestamp("2013-06-01", freq="BM").is_month_start, 0), + (Timestamp("2013-06-03", freq="M").is_month_start, 0), + (Timestamp("2013-06-03", freq="BM").is_month_start, 1), + (Timestamp("2013-02-28", freq="Q-FEB").is_month_end, 1), + (Timestamp("2013-02-28", freq="Q-FEB").is_quarter_end, 1), + (Timestamp("2013-02-28", freq="Q-FEB").is_year_end, 1), + (Timestamp("2013-03-01", freq="Q-FEB").is_month_start, 1), + (Timestamp("2013-03-01", freq="Q-FEB").is_quarter_start, 1), + (Timestamp("2013-03-01", freq="Q-FEB").is_year_start, 1), + (Timestamp("2013-03-31", freq="QS-FEB").is_month_end, 1), + (Timestamp("2013-03-31", freq="QS-FEB").is_quarter_end, 0), + (Timestamp("2013-03-31", freq="QS-FEB").is_year_end, 0), + (Timestamp("2013-02-01", freq="QS-FEB").is_month_start, 1), + (Timestamp("2013-02-01", freq="QS-FEB").is_quarter_start, 1), + (Timestamp("2013-02-01", freq="QS-FEB").is_year_start, 1), + (Timestamp("2013-06-30", freq="BQ").is_month_end, 0), + (Timestamp("2013-06-30", freq="BQ").is_quarter_end, 0), + (Timestamp("2013-06-30", freq="BQ").is_year_end, 0), + (Timestamp("2013-06-28", freq="BQ").is_month_end, 1), + (Timestamp("2013-06-28", freq="BQ").is_quarter_end, 1), + (Timestamp("2013-06-28", freq="BQ").is_year_end, 0), + (Timestamp("2013-06-30", freq="BQS-APR").is_month_end, 0), + (Timestamp("2013-06-30", freq="BQS-APR").is_quarter_end, 0), + (Timestamp("2013-06-30", freq="BQS-APR").is_year_end, 0), + (Timestamp("2013-06-28", freq="BQS-APR").is_month_end, 1), + (Timestamp("2013-06-28", freq="BQS-APR").is_quarter_end, 1), + (Timestamp("2013-03-29", freq="BQS-APR").is_year_end, 1), + (Timestamp("2013-11-01", freq="AS-NOV").is_year_start, 1), + (Timestamp("2013-10-31", freq="AS-NOV").is_year_end, 1), + (Timestamp("2012-02-01").days_in_month, 29), + (Timestamp("2013-02-01").days_in_month, 28), + ] + + for ts, value in tests: + assert ts == value + + # GH 6538: Check that DatetimeIndex and its TimeStamp elements + # return the same weekofyear accessor close to new year w/ tz + dates = ["2013/12/29", "2013/12/30", "2013/12/31"] + dates = DatetimeIndex(dates, tz="Europe/Brussels") + expected = [52, 1, 1] + assert dates.weekofyear.tolist() == expected + assert [d.weekofyear for d in dates] == expected + + # GH 12806 + @pytest.mark.parametrize( + "time_locale", [None] if tm.get_locales() is None else [None] + tm.get_locales() + ) + def test_datetime_name_accessors(self, time_locale): + # Test Monday -> Sunday and January -> December, in that sequence + if time_locale is None: + # If the time_locale is None, day-name and month_name should + # return the english attributes + expected_days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + expected_months = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + ] + else: + with tm.set_locale(time_locale, locale.LC_TIME): + expected_days = calendar.day_name[:] + expected_months = calendar.month_name[1:] + + # GH#11128 + dti = pd.date_range(freq="D", start=datetime(1998, 1, 1), periods=365) + english_days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + for day, name, eng_name in zip(range(4, 11), expected_days, english_days): + name = name.capitalize() + assert dti.day_name(locale=time_locale)[day] == name + ts = Timestamp(datetime(2016, 4, day)) + assert ts.day_name(locale=time_locale) == name + dti = dti.append(DatetimeIndex([pd.NaT])) + assert np.isnan(dti.day_name(locale=time_locale)[-1]) + ts = Timestamp(pd.NaT) + assert np.isnan(ts.day_name(locale=time_locale)) + + # GH#12805 + dti = pd.date_range(freq="M", start="2012", end="2013") + result = dti.month_name(locale=time_locale) + expected = Index([month.capitalize() for month in expected_months]) + + # work around different normalization schemes + # https://github.com/pandas-dev/pandas/issues/22342 + result = result.str.normalize("NFD") + expected = expected.str.normalize("NFD") + + tm.assert_index_equal(result, expected) + + for date, expected in zip(dti, expected_months): + result = date.month_name(locale=time_locale) + expected = expected.capitalize() + + result = unicodedata.normalize("NFD", result) + expected = unicodedata.normalize("NFD", result) + + assert result == expected + dti = dti.append(DatetimeIndex([pd.NaT])) + assert np.isnan(dti.month_name(locale=time_locale)[-1]) + + def test_nanosecond_field(self): + dti = DatetimeIndex(np.arange(10)) + + tm.assert_index_equal(dti.nanosecond, pd.Index(np.arange(10, dtype=np.int64))) + + +def test_iter_readonly(): + # GH#28055 ints_to_pydatetime with readonly array + arr = np.array([np.datetime64("2012-02-15T12:00:00.000000000")]) + arr.setflags(write=False) + dti = pd.to_datetime(arr) + list(dti) diff --git a/pandas/tests/indexes/datetimes/test_missing.py b/pandas/tests/indexes/datetimes/test_missing.py new file mode 100644 index 00000000..3399c8ea --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_missing.py @@ -0,0 +1,62 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestDatetimeIndex: + @pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"]) + def test_fillna_datetime64(self, tz): + # GH 11343 + idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"]) + + exp = pd.DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"] + ) + tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp) + + # tz mismatch + exp = pd.Index( + [ + pd.Timestamp("2011-01-01 09:00"), + pd.Timestamp("2011-01-01 10:00", tz=tz), + pd.Timestamp("2011-01-01 11:00"), + ], + dtype=object, + ) + tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp) + + # object + exp = pd.Index( + [pd.Timestamp("2011-01-01 09:00"), "x", pd.Timestamp("2011-01-01 11:00")], + dtype=object, + ) + tm.assert_index_equal(idx.fillna("x"), exp) + + idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], tz=tz) + + exp = pd.DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], tz=tz + ) + tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp) + + exp = pd.Index( + [ + pd.Timestamp("2011-01-01 09:00", tz=tz), + pd.Timestamp("2011-01-01 10:00"), + pd.Timestamp("2011-01-01 11:00", tz=tz), + ], + dtype=object, + ) + tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp) + + # object + exp = pd.Index( + [ + pd.Timestamp("2011-01-01 09:00", tz=tz), + "x", + pd.Timestamp("2011-01-01 11:00", tz=tz), + ], + dtype=object, + ) + tm.assert_index_equal(idx.fillna("x"), exp) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py new file mode 100644 index 00000000..ecd4ace7 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -0,0 +1,546 @@ +from datetime import datetime +import warnings + +import numpy as np +import pytest + +from pandas.core.dtypes.generic import ABCDateOffset + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + PeriodIndex, + Series, + Timestamp, + bdate_range, + date_range, +) +import pandas._testing as tm +from pandas.tests.base.test_ops import Ops + +from pandas.tseries.offsets import BDay, BMonthEnd, CDay, Day, Hour + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +class TestDatetimeIndexOps(Ops): + def setup_method(self, method): + super().setup_method(method) + mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex)) + self.is_valid_objs = [o for o in self.objs if mask(o)] + self.not_valid_objs = [o for o in self.objs if not mask(o)] + + def test_ops_properties(self): + f = lambda x: isinstance(x, DatetimeIndex) + self.check_ops_properties(DatetimeIndex._field_ops, f) + self.check_ops_properties(DatetimeIndex._object_ops, f) + self.check_ops_properties(DatetimeIndex._bool_ops, f) + + def test_ops_properties_basic(self): + + # sanity check that the behavior didn't change + # GH#7206 + for op in ["year", "day", "second", "weekday"]: + msg = f"'Series' object has no attribute '{op}'" + with pytest.raises(AttributeError, match=msg): + getattr(self.dt_series, op) + + # attribute access should still work! + s = Series(dict(year=2000, month=1, day=10)) + assert s.year == 2000 + assert s.month == 1 + assert s.day == 10 + msg = "'Series' object has no attribute 'weekday'" + with pytest.raises(AttributeError, match=msg): + s.weekday + + def test_repeat_range(self, tz_naive_fixture): + tz = tz_naive_fixture + rng = date_range("1/1/2000", "1/1/2001") + + result = rng.repeat(5) + assert result.freq is None + assert len(result) == 5 * len(rng) + + index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz) + exp = pd.DatetimeIndex( + ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz + ) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz) + exp = pd.DatetimeIndex( + ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz + ) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = pd.DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) + exp = pd.DatetimeIndex( + [ + "2001-01-01", + "2001-01-01", + "2001-01-01", + "NaT", + "NaT", + "NaT", + "2003-01-01", + "2003-01-01", + "2003-01-01", + ], + tz=tz, + ) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + def test_repeat(self, tz_naive_fixture): + tz = tz_naive_fixture + reps = 2 + msg = "the 'axis' parameter is not supported" + + rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) + + expected_rng = DatetimeIndex( + [ + Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), + Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), + Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), + Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), + ] + ) + + res = rng.repeat(reps) + tm.assert_index_equal(res, expected_rng) + assert res.freq is None + + tm.assert_index_equal(np.repeat(rng, reps), expected_rng) + with pytest.raises(ValueError, match=msg): + np.repeat(rng, reps, axis=1) + + def test_resolution(self, tz_naive_fixture): + tz = tz_naive_fixture + for freq, expected in zip( + ["A", "Q", "M", "D", "H", "T", "S", "L", "U"], + [ + "day", + "day", + "day", + "day", + "hour", + "minute", + "second", + "millisecond", + "microsecond", + ], + ): + idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) + assert idx.resolution == expected + + def test_value_counts_unique(self, tz_naive_fixture): + tz = tz_naive_fixture + # GH 7735 + idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) + + exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) + expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) + tm.assert_index_equal(idx.unique(), expected) + + idx = DatetimeIndex( + [ + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 08:00", + "2013-01-01 08:00", + pd.NaT, + ], + tz=tz, + ) + + exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz) + expected = Series([3, 2], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz) + expected = Series([3, 2, 1], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) + + def test_nonunique_contains(self): + # GH 9512 + for idx in map( + DatetimeIndex, + ( + [0, 1, 0], + [0, 0, -1], + [0, -1, -1], + ["2015", "2015", "2016"], + ["2015", "2015", "2014"], + ), + ): + assert idx[0] in idx + + @pytest.mark.parametrize( + "idx", + [ + DatetimeIndex( + ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx" + ), + DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], + freq="H", + name="tzidx", + tz="Asia/Tokyo", + ), + ], + ) + def test_order_with_freq(self, idx): + ordered = idx.sort_values() + tm.assert_index_equal(ordered, idx) + assert ordered.freq == idx.freq + + ordered = idx.sort_values(ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + assert ordered.freq == expected.freq + assert ordered.freq.n == -1 + + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, idx) + tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) + assert ordered.freq == idx.freq + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) + assert ordered.freq == expected.freq + assert ordered.freq.n == -1 + + @pytest.mark.parametrize( + "index_dates,expected_dates", + [ + ( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], + ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], + ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ( + [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT], + [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ], + ) + def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture): + tz = tz_naive_fixture + + # without freq + index = DatetimeIndex(index_dates, tz=tz, name="idx") + expected = DatetimeIndex(expected_dates, tz=tz, name="idx") + + ordered = index.sort_values() + tm.assert_index_equal(ordered, expected) + assert ordered.freq is None + + ordered = index.sort_values(ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + assert ordered.freq is None + + ordered, indexer = index.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2]) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + assert ordered.freq is None + + ordered, indexer = index.sort_values(return_indexer=True, ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 4, 0]) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + assert ordered.freq is None + + def test_drop_duplicates_metadata(self): + # GH 10115 + idx = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx") + result = idx.drop_duplicates() + tm.assert_index_equal(idx, result) + assert idx.freq == result.freq + + idx_dup = idx.append(idx) + assert idx_dup.freq is None # freq is reset + result = idx_dup.drop_duplicates() + tm.assert_index_equal(idx, result) + assert result.freq is None + + def test_drop_duplicates(self): + # to check Index/Series compat + base = pd.date_range("2011-01-01", "2011-01-31", freq="D", name="idx") + idx = base.append(base[:5]) + + res = idx.drop_duplicates() + tm.assert_index_equal(res, base) + res = Series(idx).drop_duplicates() + tm.assert_series_equal(res, Series(base)) + + res = idx.drop_duplicates(keep="last") + exp = base[5:].append(base[:5]) + tm.assert_index_equal(res, exp) + res = Series(idx).drop_duplicates(keep="last") + tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) + + res = idx.drop_duplicates(keep=False) + tm.assert_index_equal(res, base[5:]) + res = Series(idx).drop_duplicates(keep=False) + tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) + + @pytest.mark.parametrize( + "freq", + [ + "A", + "2A", + "-2A", + "Q", + "-1Q", + "M", + "-1M", + "D", + "3D", + "-3D", + "W", + "-1W", + "H", + "2H", + "-2H", + "T", + "2T", + "S", + "-3S", + ], + ) + def test_infer_freq(self, freq): + # GH 11018 + idx = pd.date_range("2011-01-01 09:00:00", freq=freq, periods=10) + result = pd.DatetimeIndex(idx.asi8, freq="infer") + tm.assert_index_equal(idx, result) + assert result.freq == freq + + def test_nat(self, tz_naive_fixture): + tz = tz_naive_fixture + assert pd.DatetimeIndex._na_value is pd.NaT + assert pd.DatetimeIndex([])._na_value is pd.NaT + + idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + assert idx.hasnans is False + tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) + + idx = pd.DatetimeIndex(["2011-01-01", "NaT"], tz=tz) + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + assert idx.hasnans is True + tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) + + def test_equals(self): + # GH 13107 + idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # same internal, different tz + idx3 = pd.DatetimeIndex._simple_new(idx.asi8, tz="US/Pacific") + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.astype(object)) + assert not idx.astype(object).equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) + + # check that we do not raise when comparing with OutOfBounds objects + oob = pd.Index([datetime(2500, 1, 1)] * 3, dtype=object) + assert not idx.equals(oob) + assert not idx2.equals(oob) + assert not idx3.equals(oob) + + # check that we do not raise when comparing with OutOfBounds dt64 + oob2 = oob.map(np.datetime64) + assert not idx.equals(oob2) + assert not idx2.equals(oob2) + assert not idx3.equals(oob2) + + @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) + @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_freq_setter(self, values, freq, tz): + # GH 20678 + idx = DatetimeIndex(values, tz=tz) + + # can set to an offset, converting from string if necessary + idx._data.freq = freq + assert idx.freq == freq + assert isinstance(idx.freq, ABCDateOffset) + + # can reset to None + idx._data.freq = None + assert idx.freq is None + + def test_freq_setter_errors(self): + # GH 20678 + idx = DatetimeIndex(["20180101", "20180103", "20180105"]) + + # setting with an incompatible freq + msg = ( + "Inferred frequency 2D from passed values does not conform to " + "passed frequency 5D" + ) + with pytest.raises(ValueError, match=msg): + idx._data.freq = "5D" + + # setting with non-freq string + with pytest.raises(ValueError, match="Invalid frequency"): + idx._data.freq = "foo" + + +class TestBusinessDatetimeIndex: + def setup_method(self, method): + self.rng = bdate_range(START, END) + + def test_comparison(self): + d = self.rng[10] + + comp = self.rng > d + assert comp[11] + assert not comp[9] + + def test_pickle_unpickle(self): + unpickled = tm.round_trip_pickle(self.rng) + assert unpickled.freq is not None + + def test_copy(self): + cp = self.rng.copy() + repr(cp) + tm.assert_index_equal(cp, self.rng) + + def test_shift(self): + shifted = self.rng.shift(5) + assert shifted[0] == self.rng[5] + assert shifted.freq == self.rng.freq + + shifted = self.rng.shift(-5) + assert shifted[5] == self.rng[0] + assert shifted.freq == self.rng.freq + + shifted = self.rng.shift(0) + assert shifted[0] == self.rng[0] + assert shifted.freq == self.rng.freq + + rng = date_range(START, END, freq=BMonthEnd()) + shifted = rng.shift(1, freq=BDay()) + assert shifted[0] == rng[0] + BDay() + + def test_equals(self): + assert not self.rng.equals(list(self.rng)) + + def test_identical(self): + t1 = self.rng.copy() + t2 = self.rng.copy() + assert t1.identical(t2) + + # name + t1 = t1.rename("foo") + assert t1.equals(t2) + assert not t1.identical(t2) + t2 = t2.rename("foo") + assert t1.identical(t2) + + # freq + t2v = Index(t2.values) + assert t1.equals(t2v) + assert not t1.identical(t2v) + + +class TestCustomDatetimeIndex: + def setup_method(self, method): + self.rng = bdate_range(START, END, freq="C") + + def test_comparison(self): + d = self.rng[10] + + comp = self.rng > d + assert comp[11] + assert not comp[9] + + def test_copy(self): + cp = self.rng.copy() + repr(cp) + tm.assert_index_equal(cp, self.rng) + + def test_shift(self): + + shifted = self.rng.shift(5) + assert shifted[0] == self.rng[5] + assert shifted.freq == self.rng.freq + + shifted = self.rng.shift(-5) + assert shifted[5] == self.rng[0] + assert shifted.freq == self.rng.freq + + shifted = self.rng.shift(0) + assert shifted[0] == self.rng[0] + assert shifted.freq == self.rng.freq + + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", pd.errors.PerformanceWarning) + rng = date_range(START, END, freq=BMonthEnd()) + shifted = rng.shift(1, freq=CDay()) + assert shifted[0] == rng[0] + CDay() + + def test_shift_periods(self): + # GH#22458 : argument 'n' was deprecated in favor of 'periods' + idx = pd.date_range(start=START, end=END, periods=3) + tm.assert_index_equal(idx.shift(periods=0), idx) + tm.assert_index_equal(idx.shift(0), idx) + + def test_pickle_unpickle(self): + unpickled = tm.round_trip_pickle(self.rng) + assert unpickled.freq is not None + + def test_equals(self): + assert not self.rng.equals(list(self.rng)) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py new file mode 100644 index 00000000..e30cc444 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -0,0 +1,481 @@ +""" test partial slicing on Series/Frame """ + +from datetime import datetime +import operator + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm +from pandas.core.indexing import IndexingError + + +class TestSlicing: + def test_dti_slicing(self): + dti = date_range(start="1/1/2005", end="12/1/2005", freq="M") + dti2 = dti[[1, 3, 5]] + + v1 = dti2[0] + v2 = dti2[1] + v3 = dti2[2] + + assert v1 == Timestamp("2/28/2005") + assert v2 == Timestamp("4/30/2005") + assert v3 == Timestamp("6/30/2005") + + # don't carry freq through irregular slicing + assert dti2.freq is None + + def test_slice_keeps_name(self): + # GH4226 + st = pd.Timestamp("2013-07-01 00:00:00", tz="America/Los_Angeles") + et = pd.Timestamp("2013-07-02 00:00:00", tz="America/Los_Angeles") + dr = pd.date_range(st, et, freq="H", name="timebucket") + assert dr[1:].name == dr.name + + def test_slice_with_negative_step(self): + ts = Series(np.arange(20), date_range("2014-01-01", periods=20, freq="MS")) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc]) + tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + + assert_slices_equivalent(SLC[Timestamp("2014-10-01") :: -1], SLC[9::-1]) + assert_slices_equivalent(SLC["2014-10-01"::-1], SLC[9::-1]) + + assert_slices_equivalent(SLC[: Timestamp("2014-10-01") : -1], SLC[:8:-1]) + assert_slices_equivalent(SLC[:"2014-10-01":-1], SLC[:8:-1]) + + assert_slices_equivalent(SLC["2015-02-01":"2014-10-01":-1], SLC[13:8:-1]) + assert_slices_equivalent( + SLC[Timestamp("2015-02-01") : Timestamp("2014-10-01") : -1], SLC[13:8:-1] + ) + assert_slices_equivalent( + SLC["2015-02-01" : Timestamp("2014-10-01") : -1], SLC[13:8:-1] + ) + assert_slices_equivalent( + SLC[Timestamp("2015-02-01") : "2014-10-01" : -1], SLC[13:8:-1] + ) + + assert_slices_equivalent(SLC["2014-10-01":"2015-02-01":-1], SLC[:0]) + + def test_slice_with_zero_step_raises(self): + ts = Series(np.arange(20), date_range("2014-01-01", periods=20, freq="MS")) + with pytest.raises(ValueError, match="slice step cannot be zero"): + ts[::0] + with pytest.raises(ValueError, match="slice step cannot be zero"): + ts.loc[::0] + with pytest.raises(ValueError, match="slice step cannot be zero"): + ts.loc[::0] + + def test_slice_bounds_empty(self): + # GH#14354 + empty_idx = date_range(freq="1H", periods=0, end="2015") + + right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right", "loc") + exp = Timestamp("2015-01-02 23:59:59.999999999") + assert right == exp + + left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left", "loc") + exp = Timestamp("2015-01-02 00:00:00") + assert left == exp + + def test_slice_duplicate_monotonic(self): + # https://github.com/pandas-dev/pandas/issues/16515 + idx = pd.DatetimeIndex(["2017", "2017"]) + result = idx._maybe_cast_slice_bound("2017-01-01", "left", "loc") + expected = Timestamp("2017-01-01") + assert result == expected + + def test_monotone_DTI_indexing_bug(self): + # GH 19362 + # Testing accessing the first element in a monotonic descending + # partial string indexing. + + df = pd.DataFrame(list(range(5))) + date_list = [ + "2018-01-02", + "2017-02-10", + "2016-03-10", + "2015-03-15", + "2014-03-16", + ] + date_index = pd.to_datetime(date_list) + df["date"] = date_index + expected = pd.DataFrame({0: list(range(5)), "date": date_index}) + tm.assert_frame_equal(df, expected) + + df = pd.DataFrame( + {"A": [1, 2, 3]}, index=pd.date_range("20170101", periods=3)[::-1] + ) + expected = pd.DataFrame({"A": 1}, index=pd.date_range("20170103", periods=1)) + tm.assert_frame_equal(df.loc["2017-01-03"], expected) + + def test_slice_year(self): + dti = date_range(freq="B", start=datetime(2005, 1, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + result = s["2005"] + expected = s[s.index.year == 2005] + tm.assert_series_equal(result, expected) + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + result = df.loc["2005"] + expected = df[df.index.year == 2005] + tm.assert_frame_equal(result, expected) + + rng = date_range("1/1/2000", "1/1/2010") + + result = rng.get_loc("2009") + expected = slice(3288, 3653) + assert result == expected + + def test_slice_quarter(self): + dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500) + + s = Series(np.arange(len(dti)), index=dti) + assert len(s["2001Q1"]) == 90 + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + assert len(df.loc["1Q01"]) == 90 + + def test_slice_month(self): + dti = date_range(freq="D", start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(dti)), index=dti) + assert len(s["2005-11"]) == 30 + + df = DataFrame(np.random.rand(len(dti), 5), index=dti) + assert len(df.loc["2005-11"]) == 30 + + tm.assert_series_equal(s["2005-11"], s["11-2005"]) + + def test_partial_slice(self): + rng = date_range(freq="D", start=datetime(2005, 1, 1), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["2005-05":"2006-02"] + expected = s["20050501":"20060228"] + tm.assert_series_equal(result, expected) + + result = s["2005-05":] + expected = s["20050501":] + tm.assert_series_equal(result, expected) + + result = s[:"2006-02"] + expected = s[:"20060228"] + tm.assert_series_equal(result, expected) + + result = s["2005-1-1"] + assert result == s.iloc[0] + + with pytest.raises(KeyError, match=r"^'2004-12-31'$"): + s["2004-12-31"] + + def test_partial_slice_daily(self): + rng = date_range(freq="H", start=datetime(2005, 1, 31), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["2005-1-31"] + tm.assert_series_equal(result, s.iloc[:24]) + + with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"): + s["2004-12-31 00"] + + def test_partial_slice_hourly(self): + rng = date_range(freq="T", start=datetime(2005, 1, 1, 20, 0, 0), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["2005-1-1"] + tm.assert_series_equal(result, s.iloc[: 60 * 4]) + + result = s["2005-1-1 20"] + tm.assert_series_equal(result, s.iloc[:60]) + + assert s["2005-1-1 20:00"] == s.iloc[0] + with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"): + s["2004-12-31 00:15"] + + def test_partial_slice_minutely(self): + rng = date_range(freq="S", start=datetime(2005, 1, 1, 23, 59, 0), periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["2005-1-1 23:59"] + tm.assert_series_equal(result, s.iloc[:60]) + + result = s["2005-1-1"] + tm.assert_series_equal(result, s.iloc[:60]) + + assert s[Timestamp("2005-1-1 23:59:00")] == s.iloc[0] + with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"): + s["2004-12-31 00:00:00"] + + def test_partial_slice_second_precision(self): + rng = date_range( + start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990), + periods=20, + freq="US", + ) + s = Series(np.arange(20), rng) + + tm.assert_series_equal(s["2005-1-1 00:00"], s.iloc[:10]) + tm.assert_series_equal(s["2005-1-1 00:00:59"], s.iloc[:10]) + + tm.assert_series_equal(s["2005-1-1 00:01"], s.iloc[10:]) + tm.assert_series_equal(s["2005-1-1 00:01:00"], s.iloc[10:]) + + assert s[Timestamp("2005-1-1 00:00:59.999990")] == s.iloc[0] + with pytest.raises(KeyError, match="2005-1-1 00:00:00"): + s["2005-1-1 00:00:00"] + + def test_partial_slicing_dataframe(self): + # GH14856 + # Test various combinations of string slicing resolution vs. + # index resolution + # - If string resolution is less precise than index resolution, + # string is considered a slice + # - If string resolution is equal to or more precise than index + # resolution, string is considered an exact match + formats = [ + "%Y", + "%Y-%m", + "%Y-%m-%d", + "%Y-%m-%d %H", + "%Y-%m-%d %H:%M", + "%Y-%m-%d %H:%M:%S", + ] + resolutions = ["year", "month", "day", "hour", "minute", "second"] + for rnum, resolution in enumerate(resolutions[2:], 2): + # we check only 'day', 'hour', 'minute' and 'second' + unit = Timedelta("1 " + resolution) + middate = datetime(2012, 1, 1, 0, 0, 0) + index = DatetimeIndex([middate - unit, middate, middate + unit]) + values = [1, 2, 3] + df = DataFrame({"a": values}, index, dtype=np.int64) + assert df.index.resolution == resolution + + # Timestamp with the same resolution as index + # Should be exact match for Series (return scalar) + # and raise KeyError for Frame + for timestamp, expected in zip(index, values): + ts_string = timestamp.strftime(formats[rnum]) + # make ts_string as precise as index + result = df["a"][ts_string] + assert isinstance(result, np.int64) + assert result == expected + msg = fr"^'{ts_string}'$" + with pytest.raises(KeyError, match=msg): + df[ts_string] + + # Timestamp with resolution less precise than index + for fmt in formats[:rnum]: + for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]: + ts_string = index[element].strftime(fmt) + + # Series should return slice + result = df["a"][ts_string] + expected = df["a"][theslice] + tm.assert_series_equal(result, expected) + + # Frame should return slice as well + result = df[ts_string] + expected = df[theslice] + tm.assert_frame_equal(result, expected) + + # Timestamp with resolution more precise than index + # Compatible with existing key + # Should return scalar for Series + # and raise KeyError for Frame + for fmt in formats[rnum + 1 :]: + ts_string = index[1].strftime(fmt) + result = df["a"][ts_string] + assert isinstance(result, np.int64) + assert result == 2 + msg = fr"^'{ts_string}'$" + with pytest.raises(KeyError, match=msg): + df[ts_string] + + # Not compatible with existing key + # Should raise KeyError + for fmt, res in list(zip(formats, resolutions))[rnum + 1 :]: + ts = index[1] + Timedelta("1 " + res) + ts_string = ts.strftime(fmt) + msg = fr"^'{ts_string}'$" + with pytest.raises(KeyError, match=msg): + df["a"][ts_string] + with pytest.raises(KeyError, match=msg): + df[ts_string] + + def test_partial_slicing_with_multiindex(self): + + # GH 4758 + # partial string indexing with a multi-index buggy + df = DataFrame( + { + "ACCOUNT": ["ACCT1", "ACCT1", "ACCT1", "ACCT2"], + "TICKER": ["ABC", "MNP", "XYZ", "XYZ"], + "val": [1, 2, 3, 4], + }, + index=date_range("2013-06-19 09:30:00", periods=4, freq="5T"), + ) + df_multi = df.set_index(["ACCOUNT", "TICKER"], append=True) + + expected = DataFrame( + [[1]], index=Index(["ABC"], name="TICKER"), columns=["val"] + ) + result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1")] + tm.assert_frame_equal(result, expected) + + expected = df_multi.loc[ + (pd.Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC") + ] + result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")] + tm.assert_series_equal(result, expected) + + # this is an IndexingError as we don't do partial string selection on + # multi-levels. + msg = "Too many indexers" + with pytest.raises(IndexingError, match=msg): + df_multi.loc[("2013-06-19", "ACCT1", "ABC")] + + # GH 4294 + # partial slice on a series mi + s = pd.DataFrame( + np.random.rand(1000, 1000), index=pd.date_range("2000-1-1", periods=1000) + ).stack() + + s2 = s[:-1].copy() + expected = s2["2000-1-4"] + result = s2[pd.Timestamp("2000-1-4")] + tm.assert_series_equal(result, expected) + + result = s[pd.Timestamp("2000-1-4")] + expected = s["2000-1-4"] + tm.assert_series_equal(result, expected) + + df2 = pd.DataFrame(s) + expected = df2.xs("2000-1-4") + result = df2.loc[pd.Timestamp("2000-1-4")] + tm.assert_frame_equal(result, expected) + + def test_partial_slice_doesnt_require_monotonicity(self): + # For historical reasons. + s = pd.Series(np.arange(10), pd.date_range("2014-01-01", periods=10)) + + nonmonotonic = s[[3, 5, 4]] + expected = nonmonotonic.iloc[:0] + timestamp = pd.Timestamp("2014-01-10") + + tm.assert_series_equal(nonmonotonic["2014-01-10":], expected) + with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): + nonmonotonic[timestamp:] + + tm.assert_series_equal(nonmonotonic.loc["2014-01-10":], expected) + with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"): + nonmonotonic.loc[timestamp:] + + def test_loc_datetime_length_one(self): + # GH16071 + df = pd.DataFrame( + columns=["1"], + index=pd.date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"), + ) + result = df.loc[datetime(2016, 10, 1) :] + tm.assert_frame_equal(result, df) + + result = df.loc["2016-10-01T00:00:00":] + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize( + "datetimelike", + [ + Timestamp("20130101"), + datetime(2013, 1, 1), + np.datetime64("2013-01-01T00:00", "ns"), + ], + ) + @pytest.mark.parametrize( + "op,expected", + [ + (operator.lt, [True, False, False, False]), + (operator.le, [True, True, False, False]), + (operator.eq, [False, True, False, False]), + (operator.gt, [False, False, False, True]), + ], + ) + def test_selection_by_datetimelike(self, datetimelike, op, expected): + # GH issue #17965, test for ability to compare datetime64[ns] columns + # to datetimelike + df = DataFrame( + { + "A": [ + pd.Timestamp("20120101"), + pd.Timestamp("20130101"), + np.nan, + pd.Timestamp("20130103"), + ] + } + ) + result = op(df.A, datetimelike) + expected = Series(expected, name="A") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "start", + [ + "2018-12-02 21:50:00+00:00", + pd.Timestamp("2018-12-02 21:50:00+00:00"), + pd.Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(), + ], + ) + @pytest.mark.parametrize( + "end", + [ + "2018-12-02 21:52:00+00:00", + pd.Timestamp("2018-12-02 21:52:00+00:00"), + pd.Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(), + ], + ) + def test_getitem_with_datestring_with_UTC_offset(self, start, end): + # GH 24076 + idx = pd.date_range( + start="2018-12-02 14:50:00-07:00", + end="2018-12-02 14:50:00-07:00", + freq="1min", + ) + df = pd.DataFrame(1, index=idx, columns=["A"]) + result = df[start:end] + expected = df.iloc[0:3, :] + tm.assert_frame_equal(result, expected) + + # GH 16785 + start = str(start) + end = str(end) + with pytest.raises(ValueError, match="Both dates must"): + df[start : end[:-4] + "1:00"] + + with pytest.raises(ValueError, match="The index must be timezone"): + df = df.tz_localize(None) + df[start:end] + + def test_slice_reduce_to_series(self): + # GH 27516 + df = pd.DataFrame( + {"A": range(24)}, index=pd.date_range("2000", periods=24, freq="M") + ) + expected = pd.Series( + range(12), index=pd.date_range("2000", periods=12, freq="M"), name="A" + ) + result = df.loc["2000", "A"] + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py new file mode 100644 index 00000000..84eee241 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -0,0 +1,338 @@ +""" +Tests for DatetimeIndex methods behaving like their Timestamp counterparts +""" +from datetime import datetime + +import numpy as np +import pytest + +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime + +import pandas as pd +from pandas import DatetimeIndex, Timestamp, date_range +import pandas._testing as tm + +from pandas.tseries.frequencies import to_offset + + +class TestDatetimeIndexOps: + def test_dti_time(self): + rng = date_range("1/1/2000", freq="12min", periods=10) + result = pd.Index(rng).time + expected = [t.time() for t in rng] + assert (result == expected).all() + + def test_dti_date(self): + rng = date_range("1/1/2000", freq="12H", periods=10) + result = pd.Index(rng).date + expected = [t.date() for t in rng] + assert (result == expected).all() + + @pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]]) + def test_dti_date_out_of_range(self, data): + # GH#1475 + msg = "Out of bounds nanosecond timestamp: 1400-01-01 00:00:00" + with pytest.raises(OutOfBoundsDatetime, match=msg): + DatetimeIndex(data) + + @pytest.mark.parametrize( + "field", + [ + "dayofweek", + "dayofyear", + "week", + "weekofyear", + "quarter", + "days_in_month", + "is_month_start", + "is_month_end", + "is_quarter_start", + "is_quarter_end", + "is_year_start", + "is_year_end", + ], + ) + def test_dti_timestamp_fields(self, field): + # extra fields from DatetimeIndex like quarter and week + idx = tm.makeDateIndex(100) + expected = getattr(idx, field)[-1] + result = getattr(Timestamp(idx[-1]), field) + assert result == expected + + def test_dti_timestamp_freq_fields(self): + # extra fields from DatetimeIndex like quarter and week + idx = tm.makeDateIndex(100) + + assert idx.freq == Timestamp(idx[-1], idx.freq).freq + assert idx.freqstr == Timestamp(idx[-1], idx.freq).freqstr + + # ---------------------------------------------------------------- + # DatetimeIndex.round + + def test_round_daily(self): + dti = date_range("20130101 09:10:11", periods=5) + result = dti.round("D") + expected = date_range("20130101", periods=5) + tm.assert_index_equal(result, expected) + + dti = dti.tz_localize("UTC").tz_convert("US/Eastern") + result = dti.round("D") + expected = date_range("20130101", periods=5).tz_localize("US/Eastern") + tm.assert_index_equal(result, expected) + + result = dti.round("s") + tm.assert_index_equal(result, dti) + + @pytest.mark.parametrize( + "freq, error_msg", + [ + ("Y", " is a non-fixed frequency"), + ("M", " is a non-fixed frequency"), + ("foobar", "Invalid frequency: foobar"), + ], + ) + def test_round_invalid(self, freq, error_msg): + dti = date_range("20130101 09:10:11", periods=5) + dti = dti.tz_localize("UTC").tz_convert("US/Eastern") + with pytest.raises(ValueError, match=error_msg): + dti.round(freq) + + def test_round(self, tz_naive_fixture): + tz = tz_naive_fixture + rng = date_range(start="2016-01-01", periods=5, freq="30Min", tz=tz) + elt = rng[1] + + expected_rng = DatetimeIndex( + [ + Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), + Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), + Timestamp("2016-01-01 01:00:00", tz=tz, freq="30T"), + Timestamp("2016-01-01 02:00:00", tz=tz, freq="30T"), + Timestamp("2016-01-01 02:00:00", tz=tz, freq="30T"), + ] + ) + expected_elt = expected_rng[1] + + tm.assert_index_equal(rng.round(freq="H"), expected_rng) + assert elt.round(freq="H") == expected_elt + + msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + rng.round(freq="foo") + with pytest.raises(ValueError, match=msg): + elt.round(freq="foo") + + msg = " is a non-fixed frequency" + with pytest.raises(ValueError, match=msg): + rng.round(freq="M") + with pytest.raises(ValueError, match=msg): + elt.round(freq="M") + + # GH#14440 & GH#15578 + index = DatetimeIndex(["2016-10-17 12:00:00.0015"], tz=tz) + result = index.round("ms") + expected = DatetimeIndex(["2016-10-17 12:00:00.002000"], tz=tz) + tm.assert_index_equal(result, expected) + + for freq in ["us", "ns"]: + tm.assert_index_equal(index, index.round(freq)) + + index = DatetimeIndex(["2016-10-17 12:00:00.00149"], tz=tz) + result = index.round("ms") + expected = DatetimeIndex(["2016-10-17 12:00:00.001000"], tz=tz) + tm.assert_index_equal(result, expected) + + index = DatetimeIndex(["2016-10-17 12:00:00.001501031"]) + result = index.round("10ns") + expected = DatetimeIndex(["2016-10-17 12:00:00.001501030"]) + tm.assert_index_equal(result, expected) + + with tm.assert_produces_warning(False): + ts = "2016-10-17 12:00:00.001501031" + DatetimeIndex([ts]).round("1010ns") + + def test_no_rounding_occurs(self, tz_naive_fixture): + # GH 21262 + tz = tz_naive_fixture + rng = date_range(start="2016-01-01", periods=5, freq="2Min", tz=tz) + + expected_rng = DatetimeIndex( + [ + Timestamp("2016-01-01 00:00:00", tz=tz, freq="2T"), + Timestamp("2016-01-01 00:02:00", tz=tz, freq="2T"), + Timestamp("2016-01-01 00:04:00", tz=tz, freq="2T"), + Timestamp("2016-01-01 00:06:00", tz=tz, freq="2T"), + Timestamp("2016-01-01 00:08:00", tz=tz, freq="2T"), + ] + ) + + tm.assert_index_equal(rng.round(freq="2T"), expected_rng) + + @pytest.mark.parametrize( + "test_input, rounder, freq, expected", + [ + (["2117-01-01 00:00:45"], "floor", "15s", ["2117-01-01 00:00:45"]), + (["2117-01-01 00:00:45"], "ceil", "15s", ["2117-01-01 00:00:45"]), + ( + ["2117-01-01 00:00:45.000000012"], + "floor", + "10ns", + ["2117-01-01 00:00:45.000000010"], + ), + ( + ["1823-01-01 00:00:01.000000012"], + "ceil", + "10ns", + ["1823-01-01 00:00:01.000000020"], + ), + (["1823-01-01 00:00:01"], "floor", "1s", ["1823-01-01 00:00:01"]), + (["1823-01-01 00:00:01"], "ceil", "1s", ["1823-01-01 00:00:01"]), + (["2018-01-01 00:15:00"], "ceil", "15T", ["2018-01-01 00:15:00"]), + (["2018-01-01 00:15:00"], "floor", "15T", ["2018-01-01 00:15:00"]), + (["1823-01-01 03:00:00"], "ceil", "3H", ["1823-01-01 03:00:00"]), + (["1823-01-01 03:00:00"], "floor", "3H", ["1823-01-01 03:00:00"]), + ( + ("NaT", "1823-01-01 00:00:01"), + "floor", + "1s", + ("NaT", "1823-01-01 00:00:01"), + ), + ( + ("NaT", "1823-01-01 00:00:01"), + "ceil", + "1s", + ("NaT", "1823-01-01 00:00:01"), + ), + ], + ) + def test_ceil_floor_edge(self, test_input, rounder, freq, expected): + dt = DatetimeIndex(list(test_input)) + func = getattr(dt, rounder) + result = func(freq) + expected = DatetimeIndex(list(expected)) + assert expected.equals(result) + + @pytest.mark.parametrize( + "start, index_freq, periods", + [("2018-01-01", "12H", 25), ("2018-01-01 0:0:0.124999", "1ns", 1000)], + ) + @pytest.mark.parametrize( + "round_freq", + [ + "2ns", + "3ns", + "4ns", + "5ns", + "6ns", + "7ns", + "250ns", + "500ns", + "750ns", + "1us", + "19us", + "250us", + "500us", + "750us", + "1s", + "2s", + "3s", + "12H", + "1D", + ], + ) + def test_round_int64(self, start, index_freq, periods, round_freq): + dt = date_range(start=start, freq=index_freq, periods=periods) + unit = to_offset(round_freq).nanos + + # test floor + result = dt.floor(round_freq) + diff = dt.asi8 - result.asi8 + mod = result.asi8 % unit + assert (mod == 0).all(), "floor not a {} multiple".format(round_freq) + assert (0 <= diff).all() and (diff < unit).all(), "floor error" + + # test ceil + result = dt.ceil(round_freq) + diff = result.asi8 - dt.asi8 + mod = result.asi8 % unit + assert (mod == 0).all(), "ceil not a {} multiple".format(round_freq) + assert (0 <= diff).all() and (diff < unit).all(), "ceil error" + + # test round + result = dt.round(round_freq) + diff = abs(result.asi8 - dt.asi8) + mod = result.asi8 % unit + assert (mod == 0).all(), "round not a {} multiple".format(round_freq) + assert (diff <= unit // 2).all(), "round error" + if unit % 2 == 0: + assert ( + result.asi8[diff == unit // 2] % 2 == 0 + ).all(), "round half to even error" + + # ---------------------------------------------------------------- + # DatetimeIndex.normalize + + def test_normalize(self): + rng = date_range("1/1/2000 9:30", periods=10, freq="D") + + result = rng.normalize() + expected = date_range("1/1/2000", periods=10, freq="D") + tm.assert_index_equal(result, expected) + + arr_ns = np.array([1380585623454345752, 1380585612343234312]).astype( + "datetime64[ns]" + ) + rng_ns = DatetimeIndex(arr_ns) + rng_ns_normalized = rng_ns.normalize() + + arr_ns = np.array([1380585600000000000, 1380585600000000000]).astype( + "datetime64[ns]" + ) + expected = DatetimeIndex(arr_ns) + tm.assert_index_equal(rng_ns_normalized, expected) + + assert result.is_normalized + assert not rng.is_normalized + + def test_normalize_nat(self): + dti = DatetimeIndex([pd.NaT, Timestamp("2018-01-01 01:00:00")]) + result = dti.normalize() + expected = DatetimeIndex([pd.NaT, Timestamp("2018-01-01")]) + tm.assert_index_equal(result, expected) + + +class TestDateTimeIndexToJulianDate: + def test_1700(self): + dr = date_range(start=Timestamp("1710-10-01"), periods=5, freq="D") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) + + def test_2000(self): + dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="D") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) + + def test_hour(self): + dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="H") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) + + def test_minute(self): + dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="T") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) + + def test_second(self): + dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="S") + r1 = pd.Index([x.to_julian_date() for x in dr]) + r2 = dr.to_julian_date() + assert isinstance(r2, pd.Float64Index) + tm.assert_index_equal(r1, r2) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py new file mode 100644 index 00000000..78188c54 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -0,0 +1,596 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + Int64Index, + Series, + bdate_range, + date_range, + to_datetime, +) +import pandas._testing as tm + +from pandas.tseries.offsets import BMonthEnd, Minute, MonthEnd + +START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) + + +class TestDatetimeIndexSetOps: + tz = [ + None, + "UTC", + "Asia/Tokyo", + "US/Eastern", + "dateutil/Asia/Singapore", + "dateutil/US/Pacific", + ] + + # TODO: moved from test_datetimelike; dedup with version below + @pytest.mark.parametrize("sort", [None, False]) + def test_union2(self, sort): + everything = tm.makeDateIndex(10) + first = everything[:5] + second = everything[5:] + union = first.union(second, sort=sort) + tm.assert_index_equal(union, everything) + + @pytest.mark.parametrize("box", [np.array, Series, list]) + @pytest.mark.parametrize("sort", [None, False]) + def test_union3(self, sort, box): + everything = tm.makeDateIndex(10) + first = everything[:5] + second = everything[5:] + + # GH 10149 + expected = ( + first.astype("O").union(pd.Index(second.values, dtype="O")).astype("O") + ) + case = box(second.values) + result = first.union(case, sort=sort) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", tz) + @pytest.mark.parametrize("sort", [None, False]) + def test_union(self, tz, sort): + rng1 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz) + other1 = pd.date_range("1/6/2000", freq="D", periods=5, tz=tz) + expected1 = pd.date_range("1/1/2000", freq="D", periods=10, tz=tz) + expected1_notsorted = pd.DatetimeIndex(list(other1) + list(rng1)) + + rng2 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz) + other2 = pd.date_range("1/4/2000", freq="D", periods=5, tz=tz) + expected2 = pd.date_range("1/1/2000", freq="D", periods=8, tz=tz) + expected2_notsorted = pd.DatetimeIndex(list(other2) + list(rng2[:3])) + + rng3 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz) + other3 = pd.DatetimeIndex([], tz=tz) + expected3 = pd.date_range("1/1/2000", freq="D", periods=5, tz=tz) + expected3_notsorted = rng3 + + for rng, other, exp, exp_notsorted in [ + (rng1, other1, expected1, expected1_notsorted), + (rng2, other2, expected2, expected2_notsorted), + (rng3, other3, expected3, expected3_notsorted), + ]: + + result_union = rng.union(other, sort=sort) + tm.assert_index_equal(result_union, exp) + + result_union = other.union(rng, sort=sort) + if sort is None: + tm.assert_index_equal(result_union, exp) + else: + tm.assert_index_equal(result_union, exp_notsorted) + + @pytest.mark.parametrize("sort", [None, False]) + def test_union_coverage(self, sort): + idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"]) + ordered = DatetimeIndex(idx.sort_values(), freq="infer") + result = ordered.union(idx, sort=sort) + tm.assert_index_equal(result, ordered) + + result = ordered[:0].union(ordered, sort=sort) + tm.assert_index_equal(result, ordered) + assert result.freq == ordered.freq + + @pytest.mark.parametrize("sort", [None, False]) + def test_union_bug_1730(self, sort): + rng_a = date_range("1/1/2012", periods=4, freq="3H") + rng_b = date_range("1/1/2012", periods=4, freq="4H") + + result = rng_a.union(rng_b, sort=sort) + exp = list(rng_a) + list(rng_b[1:]) + if sort is None: + exp = DatetimeIndex(sorted(exp)) + else: + exp = DatetimeIndex(exp) + tm.assert_index_equal(result, exp) + + @pytest.mark.parametrize("sort", [None, False]) + def test_union_bug_1745(self, sort): + left = DatetimeIndex(["2012-05-11 15:19:49.695000"]) + right = DatetimeIndex( + [ + "2012-05-29 13:04:21.322000", + "2012-05-11 15:27:24.873000", + "2012-05-11 15:31:05.350000", + ] + ) + + result = left.union(right, sort=sort) + exp = DatetimeIndex( + [ + "2012-05-11 15:19:49.695000", + "2012-05-29 13:04:21.322000", + "2012-05-11 15:27:24.873000", + "2012-05-11 15:31:05.350000", + ] + ) + if sort is None: + exp = exp.sort_values() + tm.assert_index_equal(result, exp) + + @pytest.mark.parametrize("sort", [None, False]) + def test_union_bug_4564(self, sort): + from pandas import DateOffset + + left = date_range("2013-01-01", "2013-02-01") + right = left + DateOffset(minutes=15) + + result = left.union(right, sort=sort) + exp = list(left) + list(right) + if sort is None: + exp = DatetimeIndex(sorted(exp)) + else: + exp = DatetimeIndex(exp) + tm.assert_index_equal(result, exp) + + @pytest.mark.parametrize("sort", [None, False]) + def test_union_freq_both_none(self, sort): + # GH11086 + expected = bdate_range("20150101", periods=10) + expected._data.freq = None + + result = expected.union(expected, sort=sort) + tm.assert_index_equal(result, expected) + assert result.freq is None + + def test_union_freq_infer(self): + # When taking the union of two DatetimeIndexes, we infer + # a freq even if the arguments don't have freq. This matches + # TimedeltaIndex behavior. + dti = pd.date_range("2016-01-01", periods=5) + left = dti[[0, 1, 3, 4]] + right = dti[[2, 3, 1]] + + assert left.freq is None + assert right.freq is None + + result = left.union(right) + tm.assert_index_equal(result, dti) + assert result.freq == "D" + + def test_union_dataframe_index(self): + rng1 = date_range("1/1/1999", "1/1/2012", freq="MS") + s1 = Series(np.random.randn(len(rng1)), rng1) + + rng2 = date_range("1/1/1980", "12/1/2001", freq="MS") + s2 = Series(np.random.randn(len(rng2)), rng2) + df = DataFrame({"s1": s1, "s2": s2}) + + exp = pd.date_range("1/1/1980", "1/1/2012", freq="MS") + tm.assert_index_equal(df.index, exp) + + @pytest.mark.parametrize("sort", [None, False]) + def test_union_with_DatetimeIndex(self, sort): + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D") + # Works + i1.union(i2, sort=sort) + # Fails with "AttributeError: can't set attribute" + i2.union(i1, sort=sort) + + # TODO: moved from test_datetimelike; de-duplicate with version below + def test_intersection2(self): + first = tm.makeDateIndex(10) + second = first[5:] + intersect = first.intersection(second) + assert tm.equalContents(intersect, second) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.intersection(case) + assert tm.equalContents(result, second) + + third = Index(["a", "b", "c"]) + result = first.intersection(third) + expected = pd.Index([], dtype=object) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"] + ) + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection(self, tz, sort): + # GH 4690 (with tz) + base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx") + + # if target has the same name, it is preserved + rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx") + expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx") + + # if target name is different, it will be reset + rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other") + expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None) + + rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx") + expected4 = DatetimeIndex([], name="idx") + + for (rng, expected) in [ + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + result = base.intersection(rng) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + assert result.tz == expected.tz + + # non-monotonic + base = DatetimeIndex( + ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx" + ) + + rng2 = DatetimeIndex( + ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx" + ) + expected2 = DatetimeIndex(["2011-01-04", "2011-01-02"], tz=tz, name="idx") + + rng3 = DatetimeIndex( + ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], + tz=tz, + name="other", + ) + expected3 = DatetimeIndex(["2011-01-04", "2011-01-02"], tz=tz, name=None) + + # GH 7880 + rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx") + expected4 = DatetimeIndex([], tz=tz, name="idx") + + for (rng, expected) in [ + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq is None + assert result.tz == expected.tz + + def test_intersection_empty(self): + # empty same freq GH2129 + rng = date_range("6/1/2000", "6/15/2000", freq="T") + result = rng[0:0].intersection(rng) + assert len(result) == 0 + + result = rng.intersection(rng[0:0]) + assert len(result) == 0 + + def test_intersection_bug_1708(self): + from pandas import DateOffset + + index_1 = date_range("1/1/2012", periods=4, freq="12H") + index_2 = index_1 + DateOffset(hours=1) + + result = index_1 & index_2 + assert len(result) == 0 + + @pytest.mark.parametrize("tz", tz) + @pytest.mark.parametrize("sort", [None, False]) + def test_difference(self, tz, sort): + rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"] + + rng1 = pd.DatetimeIndex(rng_dates, tz=tz) + other1 = pd.date_range("1/6/2000", freq="D", periods=5, tz=tz) + expected1 = pd.DatetimeIndex(rng_dates, tz=tz) + + rng2 = pd.DatetimeIndex(rng_dates, tz=tz) + other2 = pd.date_range("1/4/2000", freq="D", periods=5, tz=tz) + expected2 = pd.DatetimeIndex(rng_dates[:3], tz=tz) + + rng3 = pd.DatetimeIndex(rng_dates, tz=tz) + other3 = pd.DatetimeIndex([], tz=tz) + expected3 = pd.DatetimeIndex(rng_dates, tz=tz) + + for rng, other, expected in [ + (rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), + ]: + result_diff = rng.difference(other, sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result_diff, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_difference_freq(self, sort): + # GH14323: difference of DatetimeIndex should not preserve frequency + + index = date_range("20160920", "20160925", freq="D") + other = date_range("20160921", "20160924", freq="D") + expected = DatetimeIndex(["20160920", "20160925"], freq=None) + idx_diff = index.difference(other, sort) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + other = date_range("20160922", "20160925", freq="D") + idx_diff = index.difference(other, sort) + expected = DatetimeIndex(["20160920", "20160921"], freq=None) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_datetimeindex_diff(self, sort): + dti1 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=100) + dti2 = date_range(freq="Q-JAN", start=datetime(1997, 12, 31), periods=98) + assert len(dti1.difference(dti2, sort)) == 2 + + @pytest.mark.parametrize("sort", [None, False]) + def test_datetimeindex_union_join_empty(self, sort): + dti = date_range(start="1/1/2001", end="2/1/2001", freq="D") + empty = Index([]) + + result = dti.union(empty, sort=sort) + expected = dti.astype("O") + tm.assert_index_equal(result, expected) + + result = dti.join(empty) + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, dti) + + def test_join_nonunique(self): + idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"]) + idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"]) + rs = idx1.join(idx2, how="outer") + assert rs.is_monotonic + + +class TestBusinessDatetimeIndex: + def setup_method(self, method): + self.rng = bdate_range(START, END) + + @pytest.mark.parametrize("sort", [None, False]) + def test_union(self, sort): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + # order does not matter + if sort is None: + tm.assert_index_equal(right.union(left, sort=sort), the_union) + else: + expected = pd.DatetimeIndex(list(right) + list(left)) + tm.assert_index_equal(right.union(left, sort=sort), expected) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + def test_outer_join(self): + # should just behave as union + + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_join = self.rng.join(rng, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + @pytest.mark.parametrize("sort", [None, False]) + def test_union_not_cacheable(self, sort): + rng = date_range("1/1/2000", periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_union = rng1.union(rng2, sort=sort) + if sort is None: + tm.assert_index_equal(the_union, rng) + else: + expected = pd.DatetimeIndex(list(rng[10:]) + list(rng[:10])) + tm.assert_index_equal(the_union, expected) + + rng1 = rng[10:] + rng2 = rng[15:35] + the_union = rng1.union(rng2, sort=sort) + expected = rng[10:] + tm.assert_index_equal(the_union, expected) + + def test_intersection(self): + rng = date_range("1/1/2000", periods=50, freq=Minute()) + rng1 = rng[10:] + rng2 = rng[:25] + the_int = rng1.intersection(rng2) + expected = rng[10:25] + tm.assert_index_equal(the_int, expected) + assert isinstance(the_int, DatetimeIndex) + assert the_int.freq == rng.freq + + the_int = rng1.intersection(rng2.view(DatetimeIndex)) + tm.assert_index_equal(the_int, expected) + + # non-overlapping + the_int = rng[:10].intersection(rng[10:]) + expected = DatetimeIndex([]) + tm.assert_index_equal(the_int, expected) + + def test_intersection_bug(self): + # GH #771 + a = bdate_range("11/30/2011", "12/31/2011") + b = bdate_range("12/10/2011", "12/20/2011") + result = a.intersection(b) + tm.assert_index_equal(result, b) + + @pytest.mark.parametrize("sort", [None, False]) + def test_month_range_union_tz_pytz(self, sort): + from pytz import timezone + + tz = timezone("US/Eastern") + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd()) + + early_dr.union(late_dr, sort=sort) + + @td.skip_if_windows_python_3 + @pytest.mark.parametrize("sort", [None, False]) + def test_month_range_union_tz_dateutil(self, sort): + from pandas._libs.tslibs.timezones import dateutil_gettz + + tz = dateutil_gettz("US/Eastern") + + early_start = datetime(2011, 1, 1) + early_end = datetime(2011, 3, 1) + + late_start = datetime(2011, 3, 1) + late_end = datetime(2011, 5, 1) + + early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd()) + late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd()) + + early_dr.union(late_dr, sort=sort) + + +class TestCustomDatetimeIndex: + def setup_method(self, method): + self.rng = bdate_range(START, END, freq="C") + + @pytest.mark.parametrize("sort", [None, False]) + def test_union(self, sort): + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_union = left.union(right, sort) + assert isinstance(the_union, Index) + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_union = left.union(right, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + # order does not matter + if sort is None: + tm.assert_index_equal(right.union(left, sort=sort), the_union) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_union = self.rng.union(rng, sort=sort) + assert isinstance(the_union, DatetimeIndex) + + def test_outer_join(self): + # should just behave as union + + # overlapping + left = self.rng[:10] + right = self.rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = self.rng[:5] + right = self.rng[10:] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + # non-overlapping, no gap + left = self.rng[:5] + right = self.rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # overlapping, but different offset + rng = date_range(START, END, freq=BMonthEnd()) + + the_join = self.rng.join(rng, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + def test_intersection_bug(self): + # GH #771 + a = bdate_range("11/30/2011", "12/31/2011", freq="C") + b = bdate_range("12/10/2011", "12/20/2011", freq="C") + result = a.intersection(b) + tm.assert_index_equal(result, b) diff --git a/pandas/tests/indexes/datetimes/test_shift.py b/pandas/tests/indexes/datetimes/test_shift.py new file mode 100644 index 00000000..1c879959 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_shift.py @@ -0,0 +1,117 @@ +from datetime import datetime + +import pytest +import pytz + +from pandas.errors import NullFrequencyError + +import pandas as pd +from pandas import DatetimeIndex, Series, date_range +import pandas._testing as tm + + +class TestDatetimeIndexShift: + + # ------------------------------------------------------------- + # DatetimeIndex.shift is used in integer addition + + def test_dti_shift_tzaware(self, tz_naive_fixture): + # GH#9903 + tz = tz_naive_fixture + idx = pd.DatetimeIndex([], name="xxx", tz=tz) + tm.assert_index_equal(idx.shift(0, freq="H"), idx) + tm.assert_index_equal(idx.shift(3, freq="H"), idx) + + idx = pd.DatetimeIndex( + ["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"], + name="xxx", + tz=tz, + ) + tm.assert_index_equal(idx.shift(0, freq="H"), idx) + exp = pd.DatetimeIndex( + ["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"], + name="xxx", + tz=tz, + ) + tm.assert_index_equal(idx.shift(3, freq="H"), exp) + exp = pd.DatetimeIndex( + ["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"], + name="xxx", + tz=tz, + ) + tm.assert_index_equal(idx.shift(-3, freq="H"), exp) + + def test_dti_shift_freqs(self): + # test shift for DatetimeIndex and non DatetimeIndex + # GH#8083 + drange = pd.date_range("20130101", periods=5) + result = drange.shift(1) + expected = pd.DatetimeIndex( + ["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"], + freq="D", + ) + tm.assert_index_equal(result, expected) + + result = drange.shift(-1) + expected = pd.DatetimeIndex( + ["2012-12-31", "2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04"], + freq="D", + ) + tm.assert_index_equal(result, expected) + + result = drange.shift(3, freq="2D") + expected = pd.DatetimeIndex( + ["2013-01-07", "2013-01-08", "2013-01-09", "2013-01-10", "2013-01-11"], + freq="D", + ) + tm.assert_index_equal(result, expected) + + def test_dti_shift_int(self): + rng = date_range("1/1/2000", periods=20) + + result = rng + 5 * rng.freq + expected = rng.shift(5) + tm.assert_index_equal(result, expected) + + result = rng - 5 * rng.freq + expected = rng.shift(-5) + tm.assert_index_equal(result, expected) + + def test_dti_shift_no_freq(self): + # GH#19147 + dti = pd.DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None) + with pytest.raises(NullFrequencyError): + dti.shift(2) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_shift_localized(self, tzstr): + dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") + dr_tz = dr.tz_localize(tzstr) + + result = dr_tz.shift(1, "10T") + assert result.tz == dr_tz.tz + + def test_dti_shift_across_dst(self): + # GH 8616 + idx = date_range("2013-11-03", tz="America/Chicago", periods=7, freq="H") + s = Series(index=idx[:-1], dtype=object) + result = s.shift(freq="H") + expected = Series(index=idx[1:], dtype=object) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "shift, result_time", + [ + [0, "2014-11-14 00:00:00"], + [-1, "2014-11-13 23:00:00"], + [1, "2014-11-14 01:00:00"], + ], + ) + def test_dti_shift_near_midnight(self, shift, result_time): + # GH 8616 + dt = datetime(2014, 11, 14, 0) + dt_est = pytz.timezone("EST").localize(dt) + s = Series(data=[1], index=[dt_est]) + result = s.shift(shift, freq="H") + expected = Series(1, index=DatetimeIndex([result_time], tz="EST")) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py new file mode 100644 index 00000000..1505ac1d --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -0,0 +1,1251 @@ +""" +Tests for DatetimeIndex timezone-related methods +""" +from datetime import date, datetime, time, timedelta, tzinfo + +import dateutil +from dateutil.tz import gettz, tzlocal +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import conversion, timezones +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + Timestamp, + bdate_range, + date_range, + isna, + to_datetime, +) +import pandas._testing as tm + + +class FixedOffset(tzinfo): + """Fixed offset in minutes east from UTC.""" + + def __init__(self, offset, name): + self.__offset = timedelta(minutes=offset) + self.__name = name + + def utcoffset(self, dt): + return self.__offset + + def tzname(self, dt): + return self.__name + + def dst(self, dt): + return timedelta(0) + + +fixed_off = FixedOffset(-420, "-07:00") +fixed_off_no_name = FixedOffset(-330, None) + + +class TestDatetimeIndexTimezones: + # ------------------------------------------------------------- + # DatetimeIndex.tz_convert + def test_tz_convert_nat(self): + # GH#5546 + dates = [pd.NaT] + idx = DatetimeIndex(dates) + idx = idx.tz_localize("US/Pacific") + tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific")) + idx = idx.tz_convert("US/Eastern") + tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Eastern")) + idx = idx.tz_convert("UTC") + tm.assert_index_equal(idx, DatetimeIndex(dates, tz="UTC")) + + dates = ["2010-12-01 00:00", "2010-12-02 00:00", pd.NaT] + idx = DatetimeIndex(dates) + idx = idx.tz_localize("US/Pacific") + tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific")) + idx = idx.tz_convert("US/Eastern") + expected = ["2010-12-01 03:00", "2010-12-02 03:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) + + idx = idx + pd.offsets.Hour(5) + expected = ["2010-12-01 08:00", "2010-12-02 08:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) + idx = idx.tz_convert("US/Pacific") + expected = ["2010-12-01 05:00", "2010-12-02 05:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific")) + + idx = idx + np.timedelta64(3, "h") + expected = ["2010-12-01 08:00", "2010-12-02 08:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific")) + + idx = idx.tz_convert("US/Eastern") + expected = ["2010-12-01 11:00", "2010-12-02 11:00", pd.NaT] + tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) + + @pytest.mark.parametrize("prefix", ["", "dateutil/"]) + def test_dti_tz_convert_compat_timestamp(self, prefix): + strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] + idx = DatetimeIndex(strdates, tz=prefix + "US/Eastern") + + conv = idx[0].tz_convert(prefix + "US/Pacific") + expected = idx.tz_convert(prefix + "US/Pacific")[0] + + assert conv == expected + + def test_dti_tz_convert_hour_overflow_dst(self): + # Regression test for: + # https://github.com/pandas-dev/pandas/issues/13306 + + # sorted case US/Eastern -> UTC + ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2009-05-12 09:50:32"] + tt = DatetimeIndex(ts).tz_localize("US/Eastern") + ut = tt.tz_convert("UTC") + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # sorted case UTC -> US/Eastern + ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2009-05-12 13:50:32"] + tt = DatetimeIndex(ts).tz_localize("UTC") + ut = tt.tz_convert("US/Eastern") + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case US/Eastern -> UTC + ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2008-05-12 09:50:32"] + tt = DatetimeIndex(ts).tz_localize("US/Eastern") + ut = tt.tz_convert("UTC") + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case UTC -> US/Eastern + ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2008-05-12 13:50:32"] + tt = DatetimeIndex(ts).tz_localize("UTC") + ut = tt.tz_convert("US/Eastern") + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): + # Regression test for GH#13306 + + # sorted case US/Eastern -> UTC + ts = [ + Timestamp("2008-05-12 09:50:00", tz=tz), + Timestamp("2008-12-12 09:50:35", tz=tz), + Timestamp("2009-05-12 09:50:32", tz=tz), + ] + tt = DatetimeIndex(ts) + ut = tt.tz_convert("UTC") + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # sorted case UTC -> US/Eastern + ts = [ + Timestamp("2008-05-12 13:50:00", tz="UTC"), + Timestamp("2008-12-12 14:50:35", tz="UTC"), + Timestamp("2009-05-12 13:50:32", tz="UTC"), + ] + tt = DatetimeIndex(ts) + ut = tt.tz_convert("US/Eastern") + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case US/Eastern -> UTC + ts = [ + Timestamp("2008-05-12 09:50:00", tz=tz), + Timestamp("2008-12-12 09:50:35", tz=tz), + Timestamp("2008-05-12 09:50:32", tz=tz), + ] + tt = DatetimeIndex(ts) + ut = tt.tz_convert("UTC") + expected = Index([13, 14, 13]) + tm.assert_index_equal(ut.hour, expected) + + # unsorted case UTC -> US/Eastern + ts = [ + Timestamp("2008-05-12 13:50:00", tz="UTC"), + Timestamp("2008-12-12 14:50:35", tz="UTC"), + Timestamp("2008-05-12 13:50:32", tz="UTC"), + ] + tt = DatetimeIndex(ts) + ut = tt.tz_convert("US/Eastern") + expected = Index([9, 9, 9]) + tm.assert_index_equal(ut.hour, expected) + + @pytest.mark.parametrize("freq, n", [("H", 1), ("T", 60), ("S", 3600)]) + def test_dti_tz_convert_trans_pos_plus_1__bug(self, freq, n): + # Regression test for tslib.tz_convert(vals, tz1, tz2). + # See https://github.com/pandas-dev/pandas/issues/4496 for details. + idx = date_range(datetime(2011, 3, 26, 23), datetime(2011, 3, 27, 1), freq=freq) + idx = idx.tz_localize("UTC") + idx = idx.tz_convert("Europe/Moscow") + + expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) + tm.assert_index_equal(idx.hour, Index(expected)) + + def test_dti_tz_convert_dst(self): + for freq, n in [("H", 1), ("T", 60), ("S", 3600)]: + # Start DST + idx = date_range( + "2014-03-08 23:00", "2014-03-09 09:00", freq=freq, tz="UTC" + ) + idx = idx.tz_convert("US/Eastern") + expected = np.repeat( + np.array([18, 19, 20, 21, 22, 23, 0, 1, 3, 4, 5]), + np.array([n, n, n, n, n, n, n, n, n, n, 1]), + ) + tm.assert_index_equal(idx.hour, Index(expected)) + + idx = date_range( + "2014-03-08 18:00", "2014-03-09 05:00", freq=freq, tz="US/Eastern" + ) + idx = idx.tz_convert("UTC") + expected = np.repeat( + np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + np.array([n, n, n, n, n, n, n, n, n, n, 1]), + ) + tm.assert_index_equal(idx.hour, Index(expected)) + + # End DST + idx = date_range( + "2014-11-01 23:00", "2014-11-02 09:00", freq=freq, tz="UTC" + ) + idx = idx.tz_convert("US/Eastern") + expected = np.repeat( + np.array([19, 20, 21, 22, 23, 0, 1, 1, 2, 3, 4]), + np.array([n, n, n, n, n, n, n, n, n, n, 1]), + ) + tm.assert_index_equal(idx.hour, Index(expected)) + + idx = date_range( + "2014-11-01 18:00", "2014-11-02 05:00", freq=freq, tz="US/Eastern" + ) + idx = idx.tz_convert("UTC") + expected = np.repeat( + np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), + np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1]), + ) + tm.assert_index_equal(idx.hour, Index(expected)) + + # daily + # Start DST + idx = date_range("2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="UTC") + idx = idx.tz_convert("US/Eastern") + tm.assert_index_equal(idx.hour, Index([19, 19])) + + idx = date_range( + "2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="US/Eastern" + ) + idx = idx.tz_convert("UTC") + tm.assert_index_equal(idx.hour, Index([5, 5])) + + # End DST + idx = date_range("2014-11-01 00:00", "2014-11-02 00:00", freq="D", tz="UTC") + idx = idx.tz_convert("US/Eastern") + tm.assert_index_equal(idx.hour, Index([20, 20])) + + idx = date_range( + "2014-11-01 00:00", "2014-11-02 000:00", freq="D", tz="US/Eastern" + ) + idx = idx.tz_convert("UTC") + tm.assert_index_equal(idx.hour, Index([4, 4])) + + def test_tz_convert_roundtrip(self, tz_aware_fixture): + tz = tz_aware_fixture + idx1 = date_range(start="2014-01-01", end="2014-12-31", freq="M", tz="UTC") + exp1 = date_range(start="2014-01-01", end="2014-12-31", freq="M") + + idx2 = date_range(start="2014-01-01", end="2014-12-31", freq="D", tz="UTC") + exp2 = date_range(start="2014-01-01", end="2014-12-31", freq="D") + + idx3 = date_range(start="2014-01-01", end="2014-03-01", freq="H", tz="UTC") + exp3 = date_range(start="2014-01-01", end="2014-03-01", freq="H") + + idx4 = date_range(start="2014-08-01", end="2014-10-31", freq="T", tz="UTC") + exp4 = date_range(start="2014-08-01", end="2014-10-31", freq="T") + + for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3), (idx4, exp4)]: + converted = idx.tz_convert(tz) + reset = converted.tz_convert(None) + tm.assert_index_equal(reset, expected) + assert reset.tzinfo is None + expected = converted.tz_convert("UTC").tz_localize(None) + tm.assert_index_equal(reset, expected) + + def test_dti_tz_convert_tzlocal(self): + # GH#13583 + # tz_convert doesn't affect to internal + dti = date_range(start="2001-01-01", end="2001-03-01", tz="UTC") + dti2 = dti.tz_convert(dateutil.tz.tzlocal()) + tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) + + dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal()) + dti2 = dti.tz_convert(None) + tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) + + @pytest.mark.parametrize( + "tz", + [ + "US/Eastern", + "dateutil/US/Eastern", + pytz.timezone("US/Eastern"), + gettz("US/Eastern"), + ], + ) + def test_dti_tz_convert_utc_to_local_no_modify(self, tz): + rng = date_range("3/11/2012", "3/12/2012", freq="H", tz="utc") + rng_eastern = rng.tz_convert(tz) + + # Values are unmodified + tm.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8) + + assert timezones.tz_compare(rng_eastern.tz, timezones.maybe_get_tz(tz)) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_tz_convert_unsorted(self, tzstr): + dr = date_range("2012-03-09", freq="H", periods=100, tz="utc") + dr = dr.tz_convert(tzstr) + + result = dr[::-1].hour + exp = dr.hour[::-1] + tm.assert_almost_equal(result, exp) + + # ------------------------------------------------------------- + # DatetimeIndex.tz_localize + + def test_dti_tz_localize_nonexistent_raise_coerce(self): + # GH#13057 + times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"] + index = DatetimeIndex(times) + tz = "US/Eastern" + with pytest.raises(pytz.NonExistentTimeError): + index.tz_localize(tz=tz) + + with pytest.raises(pytz.NonExistentTimeError): + index.tz_localize(tz=tz, nonexistent="raise") + + result = index.tz_localize(tz=tz, nonexistent="NaT") + test_times = ["2015-03-08 01:00-05:00", "NaT", "2015-03-08 03:00-04:00"] + dti = to_datetime(test_times, utc=True) + expected = dti.tz_convert("US/Eastern") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_tz_localize_ambiguous_infer(self, tz): + # November 6, 2011, fall back, repeat 2 AM hour + # With no repeated hours, we cannot infer the transition + dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=pd.offsets.Hour()) + with pytest.raises(pytz.AmbiguousTimeError): + dr.tz_localize(tz) + + # With repeated hours, we can infer the transition + dr = date_range( + datetime(2011, 11, 6, 0), periods=5, freq=pd.offsets.Hour(), tz=tz + ) + times = [ + "11/06/2011 00:00", + "11/06/2011 01:00", + "11/06/2011 01:00", + "11/06/2011 02:00", + "11/06/2011 03:00", + ] + di = DatetimeIndex(times) + localized = di.tz_localize(tz, ambiguous="infer") + tm.assert_index_equal(dr, localized) + tm.assert_index_equal(dr, DatetimeIndex(times, tz=tz, ambiguous="infer")) + + # When there is no dst transition, nothing special happens + dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=pd.offsets.Hour()) + localized = dr.tz_localize(tz) + localized_infer = dr.tz_localize(tz, ambiguous="infer") + tm.assert_index_equal(localized, localized_infer) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_tz_localize_ambiguous_times(self, tz): + # March 13, 2011, spring forward, skip from 2 AM to 3 AM + dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, freq=pd.offsets.Hour()) + with pytest.raises(pytz.NonExistentTimeError): + dr.tz_localize(tz) + + # after dst transition, it works + dr = date_range( + datetime(2011, 3, 13, 3, 30), periods=3, freq=pd.offsets.Hour(), tz=tz + ) + + # November 6, 2011, fall back, repeat 2 AM hour + dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, freq=pd.offsets.Hour()) + with pytest.raises(pytz.AmbiguousTimeError): + dr.tz_localize(tz) + + # UTC is OK + dr = date_range( + datetime(2011, 3, 13), periods=48, freq=pd.offsets.Minute(30), tz=pytz.utc + ) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_tz_localize_pass_dates_to_utc(self, tzstr): + strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] + + idx = DatetimeIndex(strdates) + conv = idx.tz_localize(tzstr) + + fromdates = DatetimeIndex(strdates, tz=tzstr) + + assert conv.tz == fromdates.tz + tm.assert_numpy_array_equal(conv.values, fromdates.values) + + @pytest.mark.parametrize("prefix", ["", "dateutil/"]) + def test_dti_tz_localize(self, prefix): + tzstr = prefix + "US/Eastern" + dti = pd.date_range(start="1/1/2005", end="1/1/2005 0:00:30.256", freq="L") + dti2 = dti.tz_localize(tzstr) + + dti_utc = pd.date_range( + start="1/1/2005 05:00", end="1/1/2005 5:00:30.256", freq="L", tz="utc" + ) + + tm.assert_numpy_array_equal(dti2.values, dti_utc.values) + + dti3 = dti2.tz_convert(prefix + "US/Pacific") + tm.assert_numpy_array_equal(dti3.values, dti_utc.values) + + dti = pd.date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="L") + with pytest.raises(pytz.AmbiguousTimeError): + dti.tz_localize(tzstr) + + dti = pd.date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="L") + with pytest.raises(pytz.NonExistentTimeError): + dti.tz_localize(tzstr) + + @pytest.mark.parametrize( + "tz", + [ + "US/Eastern", + "dateutil/US/Eastern", + pytz.timezone("US/Eastern"), + gettz("US/Eastern"), + ], + ) + def test_dti_tz_localize_utc_conversion(self, tz): + # Localizing to time zone should: + # 1) check for DST ambiguities + # 2) convert to UTC + + rng = date_range("3/10/2012", "3/11/2012", freq="30T") + + converted = rng.tz_localize(tz) + expected_naive = rng + pd.offsets.Hour(5) + tm.assert_numpy_array_equal(converted.asi8, expected_naive.asi8) + + # DST ambiguity, this should fail + rng = date_range("3/11/2012", "3/12/2012", freq="30T") + # Is this really how it should fail?? + with pytest.raises(pytz.NonExistentTimeError): + rng.tz_localize(tz) + + def test_dti_tz_localize_roundtrip(self, tz_aware_fixture): + # note: this tz tests that a tz-naive index can be localized + # and de-localized successfully, when there are no DST transitions + # in the range. + idx = date_range(start="2014-06-01", end="2014-08-30", freq="15T") + tz = tz_aware_fixture + localized = idx.tz_localize(tz) + # cant localize a tz-aware object + with pytest.raises(TypeError): + localized.tz_localize(tz) + reset = localized.tz_localize(None) + assert reset.tzinfo is None + tm.assert_index_equal(reset, idx) + + def test_dti_tz_localize_naive(self): + rng = date_range("1/1/2011", periods=100, freq="H") + + conv = rng.tz_localize("US/Pacific") + exp = date_range("1/1/2011", periods=100, freq="H", tz="US/Pacific") + + tm.assert_index_equal(conv, exp) + + def test_dti_tz_localize_tzlocal(self): + # GH#13583 + offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) + offset = int(offset.total_seconds() * 1000000000) + + dti = date_range(start="2001-01-01", end="2001-03-01") + dti2 = dti.tz_localize(dateutil.tz.tzlocal()) + tm.assert_numpy_array_equal(dti2.asi8 + offset, dti.asi8) + + dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal()) + dti2 = dti.tz_localize(None) + tm.assert_numpy_array_equal(dti2.asi8 - offset, dti.asi8) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_tz_localize_ambiguous_nat(self, tz): + times = [ + "11/06/2011 00:00", + "11/06/2011 01:00", + "11/06/2011 01:00", + "11/06/2011 02:00", + "11/06/2011 03:00", + ] + di = DatetimeIndex(times) + localized = di.tz_localize(tz, ambiguous="NaT") + + times = [ + "11/06/2011 00:00", + np.NaN, + np.NaN, + "11/06/2011 02:00", + "11/06/2011 03:00", + ] + di_test = DatetimeIndex(times, tz="US/Eastern") + + # left dtype is datetime64[ns, US/Eastern] + # right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')] + tm.assert_numpy_array_equal(di_test.values, localized.values) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_tz_localize_ambiguous_flags(self, tz): + # November 6, 2011, fall back, repeat 2 AM hour + + # Pass in flags to determine right dst transition + dr = date_range( + datetime(2011, 11, 6, 0), periods=5, freq=pd.offsets.Hour(), tz=tz + ) + times = [ + "11/06/2011 00:00", + "11/06/2011 01:00", + "11/06/2011 01:00", + "11/06/2011 02:00", + "11/06/2011 03:00", + ] + + # Test tz_localize + di = DatetimeIndex(times) + is_dst = [1, 1, 0, 0, 0] + localized = di.tz_localize(tz, ambiguous=is_dst) + tm.assert_index_equal(dr, localized) + tm.assert_index_equal(dr, DatetimeIndex(times, tz=tz, ambiguous=is_dst)) + + localized = di.tz_localize(tz, ambiguous=np.array(is_dst)) + tm.assert_index_equal(dr, localized) + + localized = di.tz_localize(tz, ambiguous=np.array(is_dst).astype("bool")) + tm.assert_index_equal(dr, localized) + + # Test constructor + localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst) + tm.assert_index_equal(dr, localized) + + # Test duplicate times where inferring the dst fails + times += times + di = DatetimeIndex(times) + + # When the sizes are incompatible, make sure error is raised + with pytest.raises(Exception): + di.tz_localize(tz, ambiguous=is_dst) + + # When sizes are compatible and there are repeats ('infer' won't work) + is_dst = np.hstack((is_dst, is_dst)) + localized = di.tz_localize(tz, ambiguous=is_dst) + dr = dr.append(dr) + tm.assert_index_equal(dr, localized) + + # When there is no dst transition, nothing special happens + dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=pd.offsets.Hour()) + is_dst = np.array([1] * 10) + localized = dr.tz_localize(tz) + localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst) + tm.assert_index_equal(localized, localized_is_dst) + + # TODO: belongs outside tz_localize tests? + @pytest.mark.parametrize("tz", ["Europe/London", "dateutil/Europe/London"]) + def test_dti_construction_ambiguous_endpoint(self, tz): + # construction with an ambiguous end-point + # GH#11626 + + with pytest.raises(pytz.AmbiguousTimeError): + date_range( + "2013-10-26 23:00", "2013-10-27 01:00", tz="Europe/London", freq="H" + ) + + times = date_range( + "2013-10-26 23:00", "2013-10-27 01:00", freq="H", tz=tz, ambiguous="infer" + ) + assert times[0] == Timestamp("2013-10-26 23:00", tz=tz, freq="H") + + if str(tz).startswith("dateutil"): + # fixed ambiguous behavior + # see GH#14621 + assert times[-1] == Timestamp("2013-10-27 01:00:00+0100", tz=tz, freq="H") + else: + assert times[-1] == Timestamp("2013-10-27 01:00:00+0000", tz=tz, freq="H") + + @pytest.mark.parametrize( + "tz, option, expected", + [ + ["US/Pacific", "shift_forward", "2019-03-10 03:00"], + ["dateutil/US/Pacific", "shift_forward", "2019-03-10 03:00"], + ["US/Pacific", "shift_backward", "2019-03-10 01:00"], + pytest.param( + "dateutil/US/Pacific", + "shift_backward", + "2019-03-10 01:00", + marks=pytest.mark.xfail(reason="GH 24329"), + ), + ["US/Pacific", timedelta(hours=1), "2019-03-10 03:00"], + ], + ) + def test_dti_construction_nonexistent_endpoint(self, tz, option, expected): + # construction with an nonexistent end-point + + with pytest.raises(pytz.NonExistentTimeError): + date_range( + "2019-03-10 00:00", "2019-03-10 02:00", tz="US/Pacific", freq="H" + ) + + times = date_range( + "2019-03-10 00:00", "2019-03-10 02:00", freq="H", tz=tz, nonexistent=option + ) + assert times[-1] == Timestamp(expected, tz=tz, freq="H") + + def test_dti_tz_localize_bdate_range(self): + dr = pd.bdate_range("1/1/2009", "1/1/2010") + dr_utc = pd.bdate_range("1/1/2009", "1/1/2010", tz=pytz.utc) + localized = dr.tz_localize(pytz.utc) + tm.assert_index_equal(dr_utc, localized) + + @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) + @pytest.mark.parametrize( + "method, exp", [["NaT", pd.NaT], ["raise", None], ["foo", "invalid"]] + ) + def test_dti_tz_localize_nonexistent(self, tz, method, exp): + # GH 8917 + n = 60 + dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") + if method == "raise": + with pytest.raises(pytz.NonExistentTimeError): + dti.tz_localize(tz, nonexistent=method) + elif exp == "invalid": + with pytest.raises(ValueError): + dti.tz_localize(tz, nonexistent=method) + else: + result = dti.tz_localize(tz, nonexistent=method) + expected = DatetimeIndex([exp] * n, tz=tz) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start_ts, tz, end_ts, shift", + [ + ["2015-03-29 02:20:00", "Europe/Warsaw", "2015-03-29 03:00:00", "forward"], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 01:59:59.999999999", + "backward", + ], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 03:20:00", + timedelta(hours=1), + ], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 01:20:00", + timedelta(hours=-1), + ], + ["2018-03-11 02:33:00", "US/Pacific", "2018-03-11 03:00:00", "forward"], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 01:59:59.999999999", + "backward", + ], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 03:33:00", + timedelta(hours=1), + ], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 01:33:00", + timedelta(hours=-1), + ], + ], + ) + @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) + def test_dti_tz_localize_nonexistent_shift( + self, start_ts, tz, end_ts, shift, tz_type + ): + # GH 8917 + tz = tz_type + tz + if isinstance(shift, str): + shift = "shift_" + shift + dti = DatetimeIndex([Timestamp(start_ts)]) + result = dti.tz_localize(tz, nonexistent=shift) + expected = DatetimeIndex([Timestamp(end_ts)]).tz_localize(tz) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("offset", [-1, 1]) + @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) + def test_dti_tz_localize_nonexistent_shift_invalid(self, offset, tz_type): + # GH 8917 + tz = tz_type + "Europe/Warsaw" + dti = DatetimeIndex([Timestamp("2015-03-29 02:20:00")]) + msg = "The provided timedelta will relocalize on a nonexistent time" + with pytest.raises(ValueError, match=msg): + dti.tz_localize(tz, nonexistent=timedelta(seconds=offset)) + + # ------------------------------------------------------------- + # DatetimeIndex.normalize + + def test_normalize_tz(self): + rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="US/Eastern") + + result = rng.normalize() + expected = date_range("1/1/2000", periods=10, freq="D", tz="US/Eastern") + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="UTC") + + result = rng.normalize() + expected = date_range("1/1/2000", periods=10, freq="D", tz="UTC") + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal()) + result = rng.normalize() + expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal()) + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + @td.skip_if_windows + @pytest.mark.parametrize( + "timezone", + [ + "US/Pacific", + "US/Eastern", + "UTC", + "Asia/Kolkata", + "Asia/Shanghai", + "Australia/Canberra", + ], + ) + def test_normalize_tz_local(self, timezone): + # GH#13459 + with tm.set_timezone(timezone): + rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal()) + + result = rng.normalize() + expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal()) + tm.assert_index_equal(result, expected) + + assert result.is_normalized + assert not rng.is_normalized + + # ------------------------------------------------------------ + # DatetimeIndex.__new__ + + @pytest.mark.parametrize("prefix", ["", "dateutil/"]) + def test_dti_constructor_static_tzinfo(self, prefix): + # it works! + index = DatetimeIndex([datetime(2012, 1, 1)], tz=prefix + "EST") + index.hour + index[0] + + def test_dti_constructor_with_fixed_tz(self): + off = FixedOffset(420, "+07:00") + start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) + end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) + rng = date_range(start=start, end=end) + assert off == rng.tz + + rng2 = date_range(start, periods=len(rng), tz=off) + tm.assert_index_equal(rng, rng2) + + rng3 = date_range("3/11/2012 05:00:00+07:00", "6/11/2012 05:00:00+07:00") + assert (rng.values == rng3.values).all() + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_convert_datetime_list(self, tzstr): + dr = date_range("2012-06-02", periods=10, tz=tzstr, name="foo") + dr2 = DatetimeIndex(list(dr), name="foo") + tm.assert_index_equal(dr, dr2) + assert dr.tz == dr2.tz + assert dr2.name == "foo" + + def test_dti_construction_univalent(self): + rng = date_range("03/12/2012 00:00", periods=10, freq="W-FRI", tz="US/Eastern") + rng2 = DatetimeIndex(data=rng, tz="US/Eastern") + tm.assert_index_equal(rng, rng2) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_from_tzaware_datetime(self, tz): + d = [datetime(2012, 8, 19, tzinfo=tz)] + + index = DatetimeIndex(d) + assert timezones.tz_compare(index.tz, tz) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_tz_constructors(self, tzstr): + """ Test different DatetimeIndex constructions with timezone + Follow-up of GH#4229 + """ + + arr = ["11/10/2005 08:00:00", "11/10/2005 09:00:00"] + + idx1 = to_datetime(arr).tz_localize(tzstr) + idx2 = pd.date_range(start="2005-11-10 08:00:00", freq="H", periods=2, tz=tzstr) + idx3 = DatetimeIndex(arr, tz=tzstr) + idx4 = DatetimeIndex(np.array(arr), tz=tzstr) + + for other in [idx2, idx3, idx4]: + tm.assert_index_equal(idx1, other) + + # ------------------------------------------------------------- + # Unsorted + + def test_join_utc_convert(self, join_type): + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + + left = rng.tz_convert("US/Eastern") + right = rng.tz_convert("Europe/Berlin") + + result = left.join(left[:-5], how=join_type) + assert isinstance(result, DatetimeIndex) + assert result.tz == left.tz + + result = left.join(right[:-5], how=join_type) + assert isinstance(result, DatetimeIndex) + assert result.tz.zone == "UTC" + + @pytest.mark.parametrize( + "dtype", + [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"], + ) + def test_date_accessor(self, dtype): + # Regression test for GH#21230 + expected = np.array([date(2018, 6, 4), pd.NaT]) + + index = DatetimeIndex(["2018-06-04 10:00:00", pd.NaT], dtype=dtype) + result = index.date + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"], + ) + def test_time_accessor(self, dtype): + # Regression test for GH#21267 + expected = np.array([time(10, 20, 30), pd.NaT]) + + index = DatetimeIndex(["2018-06-04 10:20:30", pd.NaT], dtype=dtype) + result = index.time + + tm.assert_numpy_array_equal(result, expected) + + def test_timetz_accessor(self, tz_naive_fixture): + # GH21358 + tz = timezones.maybe_get_tz(tz_naive_fixture) + + expected = np.array([time(10, 20, 30, tzinfo=tz), pd.NaT]) + + index = DatetimeIndex(["2018-06-04 10:20:30", pd.NaT], tz=tz) + result = index.timetz + + tm.assert_numpy_array_equal(result, expected) + + def test_dti_drop_dont_lose_tz(self): + # GH#2621 + ind = date_range("2012-12-01", periods=10, tz="utc") + ind = ind.drop(ind[-1]) + + assert ind.tz is not None + + def test_dti_tz_conversion_freq(self, tz_naive_fixture): + # GH25241 + t3 = DatetimeIndex(["2019-01-01 10:00"], freq="H") + assert t3.tz_localize(tz=tz_naive_fixture).freq == t3.freq + t4 = DatetimeIndex(["2019-01-02 12:00"], tz="UTC", freq="T") + assert t4.tz_convert(tz="UTC").freq == t4.freq + + def test_drop_dst_boundary(self): + # see gh-18031 + tz = "Europe/Brussels" + freq = "15min" + + start = pd.Timestamp("201710290100", tz=tz) + end = pd.Timestamp("201710290300", tz=tz) + index = pd.date_range(start=start, end=end, freq=freq) + + expected = DatetimeIndex( + [ + "201710290115", + "201710290130", + "201710290145", + "201710290200", + "201710290215", + "201710290230", + "201710290245", + "201710290200", + "201710290215", + "201710290230", + "201710290245", + "201710290300", + ], + tz=tz, + freq=freq, + ambiguous=[ + True, + True, + True, + True, + True, + True, + True, + False, + False, + False, + False, + False, + ], + ) + result = index.drop(index[0]) + tm.assert_index_equal(result, expected) + + def test_date_range_localize(self): + rng = date_range("3/11/2012 03:00", periods=15, freq="H", tz="US/Eastern") + rng2 = DatetimeIndex(["3/11/2012 03:00", "3/11/2012 04:00"], tz="US/Eastern") + rng3 = date_range("3/11/2012 03:00", periods=15, freq="H") + rng3 = rng3.tz_localize("US/Eastern") + + tm.assert_index_equal(rng, rng3) + + # DST transition time + val = rng[0] + exp = Timestamp("3/11/2012 03:00", tz="US/Eastern") + + assert val.hour == 3 + assert exp.hour == 3 + assert val == exp # same UTC value + tm.assert_index_equal(rng[:2], rng2) + + # Right before the DST transition + rng = date_range("3/11/2012 00:00", periods=2, freq="H", tz="US/Eastern") + rng2 = DatetimeIndex(["3/11/2012 00:00", "3/11/2012 01:00"], tz="US/Eastern") + tm.assert_index_equal(rng, rng2) + exp = Timestamp("3/11/2012 00:00", tz="US/Eastern") + assert exp.hour == 0 + assert rng[0] == exp + exp = Timestamp("3/11/2012 01:00", tz="US/Eastern") + assert exp.hour == 1 + assert rng[1] == exp + + rng = date_range("3/11/2012 00:00", periods=10, freq="H", tz="US/Eastern") + assert rng[2].hour == 3 + + def test_timestamp_equality_different_timezones(self): + utc_range = date_range("1/1/2000", periods=20, tz="UTC") + eastern_range = utc_range.tz_convert("US/Eastern") + berlin_range = utc_range.tz_convert("Europe/Berlin") + + for a, b, c in zip(utc_range, eastern_range, berlin_range): + assert a == b + assert b == c + assert a == c + + assert (utc_range == eastern_range).all() + assert (utc_range == berlin_range).all() + assert (berlin_range == eastern_range).all() + + def test_dti_intersection(self): + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + + left = rng[10:90][::-1] + right = rng[20:80][::-1] + + assert left.tz == rng.tz + result = left.intersection(right) + assert result.tz == left.tz + + def test_dti_equals_with_tz(self): + left = date_range("1/1/2011", periods=100, freq="H", tz="utc") + right = date_range("1/1/2011", periods=100, freq="H", tz="US/Eastern") + + assert not left.equals(right) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_tz_nat(self, tzstr): + idx = DatetimeIndex([Timestamp("2013-1-1", tz=tzstr), pd.NaT]) + + assert isna(idx[1]) + assert idx[0].tzinfo is not None + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_astype_asobject_tzinfos(self, tzstr): + # GH#1345 + + # dates around a dst transition + rng = date_range("2/13/2010", "5/6/2010", tz=tzstr) + + objs = rng.astype(object) + for i, x in enumerate(objs): + exval = rng[i] + assert x == exval + assert x.tzinfo == exval.tzinfo + + objs = rng.astype(object) + for i, x in enumerate(objs): + exval = rng[i] + assert x == exval + assert x.tzinfo == exval.tzinfo + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_with_timezone_repr(self, tzstr): + rng = date_range("4/13/2010", "5/6/2010") + + rng_eastern = rng.tz_localize(tzstr) + + rng_repr = repr(rng_eastern) + assert "2010-04-13 00:00:00" in rng_repr + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_dti_take_dont_lose_meta(self, tzstr): + rng = date_range("1/1/2000", periods=20, tz=tzstr) + + result = rng.take(range(5)) + assert result.tz == rng.tz + assert result.freq == rng.freq + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_utc_box_timestamp_and_localize(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("3/11/2012", "3/12/2012", freq="H", tz="utc") + rng_eastern = rng.tz_convert(tzstr) + + expected = rng[-1].astimezone(tz) + + stamp = rng_eastern[-1] + assert stamp == expected + assert stamp.tzinfo == expected.tzinfo + + # right tzinfo + rng = date_range("3/13/2012", "3/14/2012", freq="H", tz="utc") + rng_eastern = rng.tz_convert(tzstr) + # test not valid for dateutil timezones. + # assert 'EDT' in repr(rng_eastern[0].tzinfo) + assert "EDT" in repr(rng_eastern[0].tzinfo) or "tzfile" in repr( + rng_eastern[0].tzinfo + ) + + def test_dti_to_pydatetime(self): + dt = dateutil.parser.parse("2012-06-13T01:39:00Z") + dt = dt.replace(tzinfo=tzlocal()) + + arr = np.array([dt], dtype=object) + + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + rng = date_range("2012-11-03 03:00", "2012-11-05 03:00", tz=tzlocal()) + arr = rng.to_pydatetime() + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + def test_dti_to_pydatetime_fizedtz(self): + dates = np.array( + [ + datetime(2000, 1, 1, tzinfo=fixed_off), + datetime(2000, 1, 2, tzinfo=fixed_off), + datetime(2000, 1, 3, tzinfo=fixed_off), + ] + ) + dti = DatetimeIndex(dates) + + result = dti.to_pydatetime() + tm.assert_numpy_array_equal(dates, result) + + result = dti._mpl_repr() + tm.assert_numpy_array_equal(dates, result) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Central"), gettz("US/Central")]) + def test_with_tz(self, tz): + # just want it to work + start = datetime(2011, 3, 12, tzinfo=pytz.utc) + dr = bdate_range(start, periods=50, freq=pd.offsets.Hour()) + assert dr.tz is pytz.utc + + # DateRange with naive datetimes + dr = bdate_range("1/1/2005", "1/1/2009", tz=pytz.utc) + dr = bdate_range("1/1/2005", "1/1/2009", tz=tz) + + # normalized + central = dr.tz_convert(tz) + assert central.tz is tz + naive = central[0].to_pydatetime().replace(tzinfo=None) + comp = conversion.localize_pydatetime(naive, tz).tzinfo + assert central[0].tz is comp + + # compare vs a localized tz + naive = dr[0].to_pydatetime().replace(tzinfo=None) + comp = conversion.localize_pydatetime(naive, tz).tzinfo + assert central[0].tz is comp + + # datetimes with tzinfo set + dr = bdate_range( + datetime(2005, 1, 1, tzinfo=pytz.utc), datetime(2009, 1, 1, tzinfo=pytz.utc) + ) + with pytest.raises(Exception): + bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), "1/1/2009", tz=tz) + + @pytest.mark.parametrize("prefix", ["", "dateutil/"]) + def test_field_access_localize(self, prefix): + strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] + rng = DatetimeIndex(strdates, tz=prefix + "US/Eastern") + assert (rng.hour == 0).all() + + # a more unusual time zone, #1946 + dr = date_range( + "2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan" + ) + + expected = Index(np.arange(10, dtype=np.int64)) + tm.assert_index_equal(dr.hour, expected) + + @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) + def test_dti_convert_tz_aware_datetime_datetime(self, tz): + # GH#1581 + dates = [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)] + + dates_aware = [conversion.localize_pydatetime(x, tz) for x in dates] + result = DatetimeIndex(dates_aware) + assert timezones.tz_compare(result.tz, tz) + + converted = to_datetime(dates_aware, utc=True) + ex_vals = np.array([Timestamp(x).value for x in dates_aware]) + tm.assert_numpy_array_equal(converted.asi8, ex_vals) + assert converted.tz is pytz.utc + + def test_dti_union_aware(self): + # non-overlapping + rng = date_range("2012-11-15 00:00:00", periods=6, freq="H", tz="US/Central") + + rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="H", tz="US/Eastern") + + result = rng.union(rng2) + expected = rng.astype("O").union(rng2.astype("O")) + tm.assert_index_equal(result, expected) + assert result[0].tz.zone == "US/Central" + assert result[-1].tz.zone == "US/Eastern" + + def test_dti_union_mixed(self): + # GH 21671 + rng = DatetimeIndex([pd.Timestamp("2011-01-01"), pd.NaT]) + rng2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz="Asia/Tokyo") + result = rng.union(rng2) + expected = Index( + [ + pd.Timestamp("2011-01-01"), + pd.NaT, + pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), + pd.Timestamp("2012-01-02", tz="Asia/Tokyo"), + ], + dtype=object, + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "tz", [None, "UTC", "US/Central", dateutil.tz.tzoffset(None, -28800)] + ) + @pytest.mark.usefixtures("datetime_tz_utc") + def test_iteration_preserves_nanoseconds(self, tz): + # GH 19603 + index = DatetimeIndex( + ["2018-02-08 15:00:00.168456358", "2018-02-08 15:00:00.168456359"], tz=tz + ) + for i, ts in enumerate(index): + assert ts == index[i] + + +class TestDateRange: + """Tests for date_range with timezones""" + + def test_hongkong_tz_convert(self): + # GH#1673 smoke test + dr = date_range("2012-01-01", "2012-01-10", freq="D", tz="Hongkong") + + # it works! + dr.hour + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_date_range_span_dst_transition(self, tzstr): + # GH#1778 + + # Standard -> Daylight Savings Time + dr = date_range("03/06/2012 00:00", periods=200, freq="W-FRI", tz="US/Eastern") + + assert (dr.hour == 0).all() + + dr = date_range("2012-11-02", periods=10, tz=tzstr) + result = dr.hour + expected = Index([0] * 10) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_date_range_timezone_str_argument(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + result = date_range("1/1/2000", periods=10, tz=tzstr) + expected = date_range("1/1/2000", periods=10, tz=tz) + + tm.assert_index_equal(result, expected) + + def test_date_range_with_fixedoffset_noname(self): + off = fixed_off_no_name + start = datetime(2012, 3, 11, 5, 0, 0, tzinfo=off) + end = datetime(2012, 6, 11, 5, 0, 0, tzinfo=off) + rng = date_range(start=start, end=end) + assert off == rng.tz + + idx = Index([start, end]) + assert off == idx.tz + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_date_range_with_tz(self, tzstr): + stamp = Timestamp("3/11/2012 05:00", tz=tzstr) + assert stamp.hour == 5 + + rng = date_range("3/11/2012 04:00", periods=10, freq="H", tz=tzstr) + + assert stamp == rng[1] + + +class TestToDatetime: + """Tests for the to_datetime constructor with timezones""" + + def test_to_datetime_utc(self): + arr = np.array([dateutil.parser.parse("2012-06-13T01:39:00Z")], dtype=object) + + result = to_datetime(arr, utc=True) + assert result.tz is pytz.utc + + def test_to_datetime_fixed_offset(self): + dates = [ + datetime(2000, 1, 1, tzinfo=fixed_off), + datetime(2000, 1, 2, tzinfo=fixed_off), + datetime(2000, 1, 3, tzinfo=fixed_off), + ] + result = to_datetime(dates) + assert result.tz == fixed_off diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py new file mode 100644 index 00000000..6cae6f47 --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -0,0 +1,2324 @@ +""" test to_datetime """ + +import calendar +from collections import deque +from datetime import datetime, time +import locale + +from dateutil.parser import parse +from dateutil.tz.tz import tzoffset +import numpy as np +import pytest +import pytz + +from pandas._libs import tslib +from pandas._libs.tslibs import iNaT, parsing +from pandas.errors import OutOfBoundsDatetime +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_datetime64_ns_dtype + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + NaT, + Series, + Timestamp, + date_range, + isna, + to_datetime, +) +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray +from pandas.core.tools import datetimes as tools + + +class TestTimeConversionFormats: + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_format(self, cache): + values = ["1/1/2000", "1/2/2000", "1/3/2000"] + + results1 = [Timestamp("20000101"), Timestamp("20000201"), Timestamp("20000301")] + results2 = [Timestamp("20000101"), Timestamp("20000102"), Timestamp("20000103")] + for vals, expecteds in [ + (values, (Index(results1), Index(results2))), + (Series(values), (Series(results1), Series(results2))), + (values[0], (results1[0], results2[0])), + (values[1], (results1[1], results2[1])), + (values[2], (results1[2], results2[2])), + ]: + + for i, fmt in enumerate(["%d/%m/%Y", "%m/%d/%Y"]): + result = to_datetime(vals, format=fmt, cache=cache) + expected = expecteds[i] + + if isinstance(expected, Series): + tm.assert_series_equal(result, Series(expected)) + elif isinstance(expected, Timestamp): + assert result == expected + else: + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_format_YYYYMMDD(self, cache): + s = Series([19801222, 19801222] + [19810105] * 5) + expected = Series([Timestamp(x) for x in s.apply(str)]) + + result = to_datetime(s, format="%Y%m%d", cache=cache) + tm.assert_series_equal(result, expected) + + result = to_datetime(s.apply(str), format="%Y%m%d", cache=cache) + tm.assert_series_equal(result, expected) + + # with NaT + expected = Series( + [Timestamp("19801222"), Timestamp("19801222")] + [Timestamp("19810105")] * 5 + ) + expected[2] = np.nan + s[2] = np.nan + + result = to_datetime(s, format="%Y%m%d", cache=cache) + tm.assert_series_equal(result, expected) + + # string with NaT + s = s.apply(str) + s[2] = "nat" + result = to_datetime(s, format="%Y%m%d", cache=cache) + tm.assert_series_equal(result, expected) + + # coercion + # GH 7930 + s = Series([20121231, 20141231, 99991231]) + result = pd.to_datetime(s, format="%Y%m%d", errors="ignore", cache=cache) + expected = Series( + [datetime(2012, 12, 31), datetime(2014, 12, 31), datetime(9999, 12, 31)], + dtype=object, + ) + tm.assert_series_equal(result, expected) + + result = pd.to_datetime(s, format="%Y%m%d", errors="coerce", cache=cache) + expected = Series(["20121231", "20141231", "NaT"], dtype="M8[ns]") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "input_s", + [ + # Null values with Strings + ["19801222", "20010112", None], + ["19801222", "20010112", np.nan], + ["19801222", "20010112", pd.NaT], + ["19801222", "20010112", "NaT"], + # Null values with Integers + [19801222, 20010112, None], + [19801222, 20010112, np.nan], + [19801222, 20010112, pd.NaT], + [19801222, 20010112, "NaT"], + ], + ) + def test_to_datetime_format_YYYYMMDD_with_none(self, input_s): + # GH 30011 + # format='%Y%m%d' + # with None + expected = Series([Timestamp("19801222"), Timestamp("20010112"), pd.NaT]) + result = Series(pd.to_datetime(input_s, format="%Y%m%d")) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "input_s, expected", + [ + # NaN before strings with invalid date values + [ + Series(["19801222", np.nan, "20010012", "10019999"]), + Series([Timestamp("19801222"), np.nan, np.nan, np.nan]), + ], + # NaN after strings with invalid date values + [ + Series(["19801222", "20010012", "10019999", np.nan]), + Series([Timestamp("19801222"), np.nan, np.nan, np.nan]), + ], + # NaN before integers with invalid date values + [ + Series([20190813, np.nan, 20010012, 20019999]), + Series([Timestamp("20190813"), np.nan, np.nan, np.nan]), + ], + # NaN after integers with invalid date values + [ + Series([20190813, 20010012, np.nan, 20019999]), + Series([Timestamp("20190813"), np.nan, np.nan, np.nan]), + ], + ], + ) + def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected): + # GH 25512 + # format='%Y%m%d', errors='coerce' + result = pd.to_datetime(input_s, format="%Y%m%d", errors="coerce") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_format_integer(self, cache): + # GH 10178 + s = Series([2000, 2001, 2002]) + expected = Series([Timestamp(x) for x in s.apply(str)]) + + result = to_datetime(s, format="%Y", cache=cache) + tm.assert_series_equal(result, expected) + + s = Series([200001, 200105, 200206]) + expected = Series([Timestamp(x[:4] + "-" + x[4:]) for x in s.apply(str)]) + + result = to_datetime(s, format="%Y%m", cache=cache) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "int_date, expected", + [ + # valid date, length == 8 + [20121030, datetime(2012, 10, 30)], + # short valid date, length == 6 + [199934, datetime(1999, 3, 4)], + # long integer date partially parsed to datetime(2012,1,1), length > 8 + [2012010101, 2012010101], + # invalid date partially parsed to datetime(2012,9,9), length == 8 + [20129930, 20129930], + # short integer date partially parsed to datetime(2012,9,9), length < 8 + [2012993, 2012993], + # short invalid date, length == 4 + [2121, 2121], + ], + ) + def test_int_to_datetime_format_YYYYMMDD_typeerror(self, int_date, expected): + # GH 26583 + result = to_datetime(int_date, format="%Y%m%d", errors="ignore") + assert result == expected + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_format_microsecond(self, cache): + + # these are locale dependent + lang, _ = locale.getlocale() + month_abbr = calendar.month_abbr[4] + val = "01-{}-2011 00:00:01.978".format(month_abbr) + + format = "%d-%b-%Y %H:%M:%S.%f" + result = to_datetime(val, format=format, cache=cache) + exp = datetime.strptime(val, format) + assert result == exp + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_format_time(self, cache): + data = [ + ["01/10/2010 15:20", "%m/%d/%Y %H:%M", Timestamp("2010-01-10 15:20")], + ["01/10/2010 05:43", "%m/%d/%Y %I:%M", Timestamp("2010-01-10 05:43")], + [ + "01/10/2010 13:56:01", + "%m/%d/%Y %H:%M:%S", + Timestamp("2010-01-10 13:56:01"), + ] # , + # ['01/10/2010 08:14 PM', '%m/%d/%Y %I:%M %p', + # Timestamp('2010-01-10 20:14')], + # ['01/10/2010 07:40 AM', '%m/%d/%Y %I:%M %p', + # Timestamp('2010-01-10 07:40')], + # ['01/10/2010 09:12:56 AM', '%m/%d/%Y %I:%M:%S %p', + # Timestamp('2010-01-10 09:12:56')] + ] + for s, format, dt in data: + assert to_datetime(s, format=format, cache=cache) == dt + + @td.skip_if_has_locale + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_with_non_exact(self, cache): + # GH 10834 + # 8904 + # exact kw + s = Series( + ["19MAY11", "foobar19MAY11", "19MAY11:00:00:00", "19MAY11 00:00:00Z"] + ) + result = to_datetime(s, format="%d%b%y", exact=False, cache=cache) + expected = to_datetime( + s.str.extract(r"(\d+\w+\d+)", expand=False), format="%d%b%y", cache=cache + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("cache", [True, False]) + def test_parse_nanoseconds_with_formula(self, cache): + + # GH8989 + # truncating the nanoseconds when a format was provided + for v in [ + "2012-01-01 09:00:00.000000001", + "2012-01-01 09:00:00.000001", + "2012-01-01 09:00:00.001", + "2012-01-01 09:00:00.001000", + "2012-01-01 09:00:00.001000000", + ]: + expected = pd.to_datetime(v, cache=cache) + result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f", cache=cache) + assert result == expected + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_format_weeks(self, cache): + data = [ + ["2009324", "%Y%W%w", Timestamp("2009-08-13")], + ["2013020", "%Y%U%w", Timestamp("2013-01-13")], + ] + for s, format, dt in data: + assert to_datetime(s, format=format, cache=cache) == dt + + @pytest.mark.parametrize( + "fmt,dates,expected_dates", + [ + [ + "%Y-%m-%d %H:%M:%S %Z", + ["2010-01-01 12:00:00 UTC"] * 2, + [pd.Timestamp("2010-01-01 12:00:00", tz="UTC")] * 2, + ], + [ + "%Y-%m-%d %H:%M:%S %Z", + [ + "2010-01-01 12:00:00 UTC", + "2010-01-01 12:00:00 GMT", + "2010-01-01 12:00:00 US/Pacific", + ], + [ + pd.Timestamp("2010-01-01 12:00:00", tz="UTC"), + pd.Timestamp("2010-01-01 12:00:00", tz="GMT"), + pd.Timestamp("2010-01-01 12:00:00", tz="US/Pacific"), + ], + ], + [ + "%Y-%m-%d %H:%M:%S%z", + ["2010-01-01 12:00:00+0100"] * 2, + [pd.Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60))] * 2, + ], + [ + "%Y-%m-%d %H:%M:%S %z", + ["2010-01-01 12:00:00 +0100"] * 2, + [pd.Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60))] * 2, + ], + [ + "%Y-%m-%d %H:%M:%S %z", + ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"], + [ + pd.Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(60)), + pd.Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(-60)), + ], + ], + [ + "%Y-%m-%d %H:%M:%S %z", + ["2010-01-01 12:00:00 Z", "2010-01-01 12:00:00 Z"], + [ + pd.Timestamp( + "2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0) + ), # pytz coerces to UTC + pd.Timestamp("2010-01-01 12:00:00", tzinfo=pytz.FixedOffset(0)), + ], + ], + ], + ) + def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates): + # GH 13486 + result = pd.to_datetime(dates, format=fmt) + expected = pd.Index(expected_dates) + tm.assert_equal(result, expected) + + with pytest.raises(ValueError): + pd.to_datetime(dates, format=fmt, utc=True) + + @pytest.mark.parametrize( + "offset", ["+0", "-1foo", "UTCbar", ":10", "+01:000:01", ""] + ) + def test_to_datetime_parse_timezone_malformed(self, offset): + fmt = "%Y-%m-%d %H:%M:%S %z" + date = "2010-01-01 12:00:00 " + offset + with pytest.raises(ValueError): + pd.to_datetime([date], format=fmt) + + def test_to_datetime_parse_timezone_keeps_name(self): + # GH 21697 + fmt = "%Y-%m-%d %H:%M:%S %z" + arg = pd.Index(["2010-01-01 12:00:00 Z"], name="foo") + result = pd.to_datetime(arg, format=fmt) + expected = pd.DatetimeIndex(["2010-01-01 12:00:00"], tz="UTC", name="foo") + tm.assert_index_equal(result, expected) + + +class TestToDatetime: + @pytest.mark.parametrize( + "s, _format, dt", + [ + ["2015-1-1", "%G-%V-%u", datetime(2014, 12, 29, 0, 0)], + ["2015-1-4", "%G-%V-%u", datetime(2015, 1, 1, 0, 0)], + ["2015-1-7", "%G-%V-%u", datetime(2015, 1, 4, 0, 0)], + ], + ) + def test_to_datetime_iso_week_year_format(self, s, _format, dt): + # See GH#16607 + assert to_datetime(s, format=_format) == dt + + @pytest.mark.parametrize( + "msg, s, _format", + [ + [ + "ISO week directive '%V' must be used with the ISO year directive " + "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", + "1999 50", + "%Y %V", + ], + [ + "ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", + "1999 51", + "%G %V", + ], + [ + "ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", + "1999 Monday", + "%G %A", + ], + [ + "ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", + "1999 Mon", + "%G %a", + ], + [ + "ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", + "1999 6", + "%G %w", + ], + [ + "ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", + "1999 6", + "%G %u", + ], + [ + "ISO year directive '%G' must be used with the ISO week directive " + "'%V' and a weekday directive '%A', '%a', '%w', or '%u'.", + "2051", + "%G", + ], + [ + "Day of the year directive '%j' is not compatible with ISO year " + "directive '%G'. Use '%Y' instead.", + "1999 51 6 256", + "%G %V %u %j", + ], + [ + "ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", + "1999 51 Sunday", + "%Y %V %A", + ], + [ + "ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", + "1999 51 Sun", + "%Y %V %a", + ], + [ + "ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", + "1999 51 1", + "%Y %V %w", + ], + [ + "ISO week directive '%V' is incompatible with the year directive " + "'%Y'. Use the ISO year '%G' instead.", + "1999 51 1", + "%Y %V %u", + ], + [ + "ISO week directive '%V' must be used with the ISO year directive " + "'%G' and a weekday directive '%A', '%a', '%w', or '%u'.", + "20", + "%V", + ], + ], + ) + def test_error_iso_week_year(self, msg, s, _format): + # See GH#16607 + # This test checks for errors thrown when giving the wrong format + # However, as discussed on PR#25541, overriding the locale + # causes a different error to be thrown due to the format being + # locale specific, but the test data is in english. + # Therefore, the tests only run when locale is not overwritten, + # as a sort of solution to this problem. + if locale.getlocale() != ("zh_CN", "UTF-8") and locale.getlocale() != ( + "it_IT", + "UTF-8", + ): + with pytest.raises(ValueError, match=msg): + to_datetime(s, format=_format) + + @pytest.mark.parametrize("tz", [None, "US/Central"]) + def test_to_datetime_dtarr(self, tz): + # DatetimeArray + dti = date_range("1965-04-03", periods=19, freq="2W", tz=tz) + arr = DatetimeArray(dti) + + result = to_datetime(arr) + assert result is arr + + result = to_datetime(arr) + assert result is arr + + def test_to_datetime_pydatetime(self): + actual = pd.to_datetime(datetime(2008, 1, 15)) + assert actual == datetime(2008, 1, 15) + + def test_to_datetime_YYYYMMDD(self): + actual = pd.to_datetime("20080115") + assert actual == datetime(2008, 1, 15) + + def test_to_datetime_unparseable_ignore(self): + # unparseable + s = "Month 1, 1999" + assert pd.to_datetime(s, errors="ignore") == s + + @td.skip_if_windows # `tm.set_timezone` does not work in windows + def test_to_datetime_now(self): + # See GH#18666 + with tm.set_timezone("US/Eastern"): + npnow = np.datetime64("now").astype("datetime64[ns]") + pdnow = pd.to_datetime("now") + pdnow2 = pd.to_datetime(["now"])[0] + + # These should all be equal with infinite perf; this gives + # a generous margin of 10 seconds + assert abs(pdnow.value - npnow.astype(np.int64)) < 1e10 + assert abs(pdnow2.value - npnow.astype(np.int64)) < 1e10 + + assert pdnow.tzinfo is None + assert pdnow2.tzinfo is None + + @td.skip_if_windows # `tm.set_timezone` does not work in windows + def test_to_datetime_today(self): + # See GH#18666 + # Test with one timezone far ahead of UTC and another far behind, so + # one of these will _almost_ always be in a different day from UTC. + # Unfortunately this test between 12 and 1 AM Samoa time + # this both of these timezones _and_ UTC will all be in the same day, + # so this test will not detect the regression introduced in #18666. + with tm.set_timezone("Pacific/Auckland"): # 12-13 hours ahead of UTC + nptoday = np.datetime64("today").astype("datetime64[ns]").astype(np.int64) + pdtoday = pd.to_datetime("today") + pdtoday2 = pd.to_datetime(["today"])[0] + + tstoday = pd.Timestamp("today") + tstoday2 = pd.Timestamp.today() + + # These should all be equal with infinite perf; this gives + # a generous margin of 10 seconds + assert abs(pdtoday.normalize().value - nptoday) < 1e10 + assert abs(pdtoday2.normalize().value - nptoday) < 1e10 + assert abs(pdtoday.value - tstoday.value) < 1e10 + assert abs(pdtoday.value - tstoday2.value) < 1e10 + + assert pdtoday.tzinfo is None + assert pdtoday2.tzinfo is None + + with tm.set_timezone("US/Samoa"): # 11 hours behind UTC + nptoday = np.datetime64("today").astype("datetime64[ns]").astype(np.int64) + pdtoday = pd.to_datetime("today") + pdtoday2 = pd.to_datetime(["today"])[0] + + # These should all be equal with infinite perf; this gives + # a generous margin of 10 seconds + assert abs(pdtoday.normalize().value - nptoday) < 1e10 + assert abs(pdtoday2.normalize().value - nptoday) < 1e10 + + assert pdtoday.tzinfo is None + assert pdtoday2.tzinfo is None + + def test_to_datetime_today_now_unicode_bytes(self): + to_datetime(["now"]) + to_datetime(["today"]) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_dt64s(self, cache): + in_bound_dts = [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")] + + for dt in in_bound_dts: + assert pd.to_datetime(dt, cache=cache) == Timestamp(dt) + + @pytest.mark.parametrize( + "dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")] + ) + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): + msg = "Out of bounds nanosecond timestamp: {}".format(dt) + with pytest.raises(OutOfBoundsDatetime, match=msg): + pd.to_datetime(dt, errors="raise") + with pytest.raises(OutOfBoundsDatetime, match=msg): + Timestamp(dt) + assert pd.to_datetime(dt, errors="coerce", cache=cache) is NaT + + @pytest.mark.parametrize("cache", [True, False]) + @pytest.mark.parametrize("unit", ["s", "D"]) + def test_to_datetime_array_of_dt64s(self, cache, unit): + # https://github.com/pandas-dev/pandas/issues/31491 + # Need at least 50 to ensure cache is used. + dts = [ + np.datetime64("2000-01-01", unit), + np.datetime64("2000-01-02", unit), + ] * 30 + # Assuming all datetimes are in bounds, to_datetime() returns + # an array that is equal to Timestamp() parsing + tm.assert_index_equal( + pd.to_datetime(dts, cache=cache), + pd.DatetimeIndex([Timestamp(x).asm8 for x in dts]), + ) + + # A list of datetimes where the last one is out of bounds + dts_with_oob = dts + [np.datetime64("9999-01-01")] + + msg = "Out of bounds nanosecond timestamp: 9999-01-01 00:00:00" + with pytest.raises(OutOfBoundsDatetime, match=msg): + pd.to_datetime(dts_with_oob, errors="raise") + + tm.assert_index_equal( + pd.to_datetime(dts_with_oob, errors="coerce", cache=cache), + pd.DatetimeIndex( + [Timestamp(dts_with_oob[0]).asm8, Timestamp(dts_with_oob[1]).asm8] * 30 + + [pd.NaT], + ), + ) + + # With errors='ignore', out of bounds datetime64s + # are converted to their .item(), which depending on the version of + # numpy is either a python datetime.datetime or datetime.date + tm.assert_index_equal( + pd.to_datetime(dts_with_oob, errors="ignore", cache=cache), + pd.Index([dt.item() for dt in dts_with_oob]), + ) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_tz(self, cache): + + # xref 8260 + # uniform returns a DatetimeIndex + arr = [ + pd.Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"), + pd.Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"), + ] + result = pd.to_datetime(arr, cache=cache) + expected = DatetimeIndex( + ["2013-01-01 13:00:00", "2013-01-02 14:00:00"], tz="US/Pacific" + ) + tm.assert_index_equal(result, expected) + + # mixed tzs will raise + arr = [ + pd.Timestamp("2013-01-01 13:00:00", tz="US/Pacific"), + pd.Timestamp("2013-01-02 14:00:00", tz="US/Eastern"), + ] + msg = ( + "Tz-aware datetime.datetime cannot be " + "converted to datetime64 unless utc=True" + ) + with pytest.raises(ValueError, match=msg): + pd.to_datetime(arr, cache=cache) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_different_offsets(self, cache): + # inspired by asv timeseries.ToDatetimeNONISO8601 benchmark + # see GH-26097 for more + ts_string_1 = "March 1, 2018 12:00:00+0400" + ts_string_2 = "March 1, 2018 12:00:00+0500" + arr = [ts_string_1] * 5 + [ts_string_2] * 5 + expected = pd.Index([parse(x) for x in arr]) + result = pd.to_datetime(arr, cache=cache) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_tz_pytz(self, cache): + # see gh-8260 + us_eastern = pytz.timezone("US/Eastern") + arr = np.array( + [ + us_eastern.localize( + datetime(year=2000, month=1, day=1, hour=3, minute=0) + ), + us_eastern.localize( + datetime(year=2000, month=6, day=1, hour=3, minute=0) + ), + ], + dtype=object, + ) + result = pd.to_datetime(arr, utc=True, cache=cache) + expected = DatetimeIndex( + ["2000-01-01 08:00:00+00:00", "2000-06-01 07:00:00+00:00"], + dtype="datetime64[ns, UTC]", + freq=None, + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("cache", [True, False]) + @pytest.mark.parametrize( + "init_constructor, end_constructor, test_method", + [ + (Index, DatetimeIndex, tm.assert_index_equal), + (list, DatetimeIndex, tm.assert_index_equal), + (np.array, DatetimeIndex, tm.assert_index_equal), + (Series, Series, tm.assert_series_equal), + ], + ) + def test_to_datetime_utc_true( + self, cache, init_constructor, end_constructor, test_method + ): + # See gh-11934 & gh-6415 + data = ["20100102 121314", "20100102 121315"] + expected_data = [ + pd.Timestamp("2010-01-02 12:13:14", tz="utc"), + pd.Timestamp("2010-01-02 12:13:15", tz="utc"), + ] + + result = pd.to_datetime( + init_constructor(data), format="%Y%m%d %H%M%S", utc=True, cache=cache + ) + expected = end_constructor(expected_data) + test_method(result, expected) + + # Test scalar case as well + for scalar, expected in zip(data, expected_data): + result = pd.to_datetime( + scalar, format="%Y%m%d %H%M%S", utc=True, cache=cache + ) + assert result == expected + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_utc_true_with_series_single_value(self, cache): + # GH 15760 UTC=True with Series + ts = 1.5e18 + result = pd.to_datetime(pd.Series([ts]), utc=True, cache=cache) + expected = pd.Series([pd.Timestamp(ts, tz="utc")]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_utc_true_with_series_tzaware_string(self, cache): + ts = "2013-01-01 00:00:00-01:00" + expected_ts = "2013-01-01 01:00:00" + data = pd.Series([ts] * 3) + result = pd.to_datetime(data, utc=True, cache=cache) + expected = pd.Series([pd.Timestamp(expected_ts, tz="utc")] * 3) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("cache", [True, False]) + @pytest.mark.parametrize( + "date, dtype", + [ + ("2013-01-01 01:00:00", "datetime64[ns]"), + ("2013-01-01 01:00:00", "datetime64[ns, UTC]"), + ], + ) + def test_to_datetime_utc_true_with_series_datetime_ns(self, cache, date, dtype): + expected = pd.Series([pd.Timestamp("2013-01-01 01:00:00", tz="UTC")]) + result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True, cache=cache) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("cache", [True, False]) + @td.skip_if_no("psycopg2") + def test_to_datetime_tz_psycopg2(self, cache): + + # xref 8260 + import psycopg2 + + # misc cases + tz1 = psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None) + tz2 = psycopg2.tz.FixedOffsetTimezone(offset=-240, name=None) + arr = np.array( + [ + datetime(2000, 1, 1, 3, 0, tzinfo=tz1), + datetime(2000, 6, 1, 3, 0, tzinfo=tz2), + ], + dtype=object, + ) + + result = pd.to_datetime(arr, errors="coerce", utc=True, cache=cache) + expected = DatetimeIndex( + ["2000-01-01 08:00:00+00:00", "2000-06-01 07:00:00+00:00"], + dtype="datetime64[ns, UTC]", + freq=None, + ) + tm.assert_index_equal(result, expected) + + # dtype coercion + i = pd.DatetimeIndex( + ["2000-01-01 08:00:00"], + tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None), + ) + assert is_datetime64_ns_dtype(i) + + # tz coercion + result = pd.to_datetime(i, errors="coerce", cache=cache) + tm.assert_index_equal(result, i) + + result = pd.to_datetime(i, errors="coerce", utc=True, cache=cache) + expected = pd.DatetimeIndex( + ["2000-01-01 13:00:00"], dtype="datetime64[ns, UTC]" + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("cache", [True, False]) + def test_datetime_bool(self, cache): + # GH13176 + with pytest.raises(TypeError): + to_datetime(False) + assert to_datetime(False, errors="coerce", cache=cache) is NaT + assert to_datetime(False, errors="ignore", cache=cache) is False + with pytest.raises(TypeError): + to_datetime(True) + assert to_datetime(True, errors="coerce", cache=cache) is NaT + assert to_datetime(True, errors="ignore", cache=cache) is True + with pytest.raises(TypeError): + to_datetime([False, datetime.today()], cache=cache) + with pytest.raises(TypeError): + to_datetime(["20130101", True], cache=cache) + tm.assert_index_equal( + to_datetime([0, False, NaT, 0.0], errors="coerce", cache=cache), + DatetimeIndex( + [to_datetime(0, cache=cache), NaT, NaT, to_datetime(0, cache=cache)] + ), + ) + + def test_datetime_invalid_datatype(self): + # GH13176 + + with pytest.raises(TypeError): + pd.to_datetime(bool) + with pytest.raises(TypeError): + pd.to_datetime(pd.to_datetime) + + @pytest.mark.parametrize("value", ["a", "00:01:99"]) + @pytest.mark.parametrize("infer", [True, False]) + @pytest.mark.parametrize("format", [None, "H%:M%:S%"]) + def test_datetime_invalid_scalar(self, value, format, infer): + # GH24763 + res = pd.to_datetime( + value, errors="ignore", format=format, infer_datetime_format=infer + ) + assert res == value + + res = pd.to_datetime( + value, errors="coerce", format=format, infer_datetime_format=infer + ) + assert res is pd.NaT + + with pytest.raises(ValueError): + pd.to_datetime( + value, errors="raise", format=format, infer_datetime_format=infer + ) + + @pytest.mark.parametrize("value", ["3000/12/11 00:00:00"]) + @pytest.mark.parametrize("infer", [True, False]) + @pytest.mark.parametrize("format", [None, "H%:M%:S%"]) + def test_datetime_outofbounds_scalar(self, value, format, infer): + # GH24763 + res = pd.to_datetime( + value, errors="ignore", format=format, infer_datetime_format=infer + ) + assert res == value + + res = pd.to_datetime( + value, errors="coerce", format=format, infer_datetime_format=infer + ) + assert res is pd.NaT + + if format is not None: + with pytest.raises(ValueError): + pd.to_datetime( + value, errors="raise", format=format, infer_datetime_format=infer + ) + else: + with pytest.raises(OutOfBoundsDatetime): + pd.to_datetime( + value, errors="raise", format=format, infer_datetime_format=infer + ) + + @pytest.mark.parametrize("values", [["a"], ["00:01:99"], ["a", "b", "99:00:00"]]) + @pytest.mark.parametrize("infer", [True, False]) + @pytest.mark.parametrize("format", [None, "H%:M%:S%"]) + def test_datetime_invalid_index(self, values, format, infer): + # GH24763 + res = pd.to_datetime( + values, errors="ignore", format=format, infer_datetime_format=infer + ) + tm.assert_index_equal(res, pd.Index(values)) + + res = pd.to_datetime( + values, errors="coerce", format=format, infer_datetime_format=infer + ) + tm.assert_index_equal(res, pd.DatetimeIndex([pd.NaT] * len(values))) + + with pytest.raises(ValueError): + pd.to_datetime( + values, errors="raise", format=format, infer_datetime_format=infer + ) + + @pytest.mark.parametrize("utc", [True, None]) + @pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None]) + @pytest.mark.parametrize("constructor", [list, tuple, np.array, pd.Index, deque]) + def test_to_datetime_cache(self, utc, format, constructor): + date = "20130101 00:00:00" + test_dates = [date] * 10 ** 5 + data = constructor(test_dates) + + result = pd.to_datetime(data, utc=utc, format=format, cache=True) + expected = pd.to_datetime(data, utc=utc, format=format, cache=False) + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "listlike", + [ + (deque([pd.Timestamp("2010-06-02 09:30:00")] * 51)), + ([pd.Timestamp("2010-06-02 09:30:00")] * 51), + (tuple([pd.Timestamp("2010-06-02 09:30:00")] * 51)), + ], + ) + def test_no_slicing_errors_in_should_cache(self, listlike): + # GH 29403 + assert tools.should_cache(listlike) is True + + def test_to_datetime_from_deque(self): + # GH 29403 + result = pd.to_datetime(deque([pd.Timestamp("2010-06-02 09:30:00")] * 51)) + expected = pd.to_datetime([pd.Timestamp("2010-06-02 09:30:00")] * 51) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("utc", [True, None]) + @pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None]) + def test_to_datetime_cache_series(self, utc, format): + date = "20130101 00:00:00" + test_dates = [date] * 10 ** 5 + data = pd.Series(test_dates) + result = pd.to_datetime(data, utc=utc, format=format, cache=True) + expected = pd.to_datetime(data, utc=utc, format=format, cache=False) + tm.assert_series_equal(result, expected) + + def test_to_datetime_cache_scalar(self): + date = "20130101 00:00:00" + result = pd.to_datetime(date, cache=True) + expected = pd.Timestamp("20130101 00:00:00") + assert result == expected + + @pytest.mark.parametrize( + "date, format", + [ + ("2017-20", "%Y-%W"), + ("20 Sunday", "%W %A"), + ("20 Sun", "%W %a"), + ("2017-21", "%Y-%U"), + ("20 Sunday", "%U %A"), + ("20 Sun", "%U %a"), + ], + ) + def test_week_without_day_and_calendar_year(self, date, format): + # GH16774 + + msg = "Cannot use '%W' or '%U' without day and year" + with pytest.raises(ValueError, match=msg): + pd.to_datetime(date, format=format) + + def test_to_datetime_coerce(self): + # GH 26122 + ts_strings = [ + "March 1, 2018 12:00:00+0400", + "March 1, 2018 12:00:00+0500", + "20100240", + ] + result = to_datetime(ts_strings, errors="coerce") + expected = Index( + [ + datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 14400)), + datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 18000)), + NaT, + ] + ) + tm.assert_index_equal(result, expected) + + def test_to_datetime_coerce_malformed(self): + # GH 28299 + ts_strings = ["200622-12-31", "111111-24-11"] + result = to_datetime(ts_strings, errors="coerce") + expected = Index([NaT, NaT]) + tm.assert_index_equal(result, expected) + + def test_iso_8601_strings_with_same_offset(self): + # GH 17697, 11736 + ts_str = "2015-11-18 15:30:00+05:30" + result = to_datetime(ts_str) + expected = Timestamp(ts_str) + assert result == expected + + expected = DatetimeIndex([Timestamp(ts_str)] * 2) + result = to_datetime([ts_str] * 2) + tm.assert_index_equal(result, expected) + + result = DatetimeIndex([ts_str] * 2) + tm.assert_index_equal(result, expected) + + def test_iso_8601_strings_with_different_offsets(self): + # GH 17697, 11736 + ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT] + result = to_datetime(ts_strings) + expected = np.array( + [ + datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), + datetime(2015, 11, 18, 16, 30, tzinfo=tzoffset(None, 23400)), + NaT, + ], + dtype=object, + ) + # GH 21864 + expected = Index(expected) + tm.assert_index_equal(result, expected) + + result = to_datetime(ts_strings, utc=True) + expected = DatetimeIndex( + [Timestamp(2015, 11, 18, 10), Timestamp(2015, 11, 18, 10), NaT], tz="UTC" + ) + tm.assert_index_equal(result, expected) + + def test_iso8601_strings_mixed_offsets_with_naive(self): + # GH 24992 + result = pd.to_datetime( + [ + "2018-11-28T00:00:00", + "2018-11-28T00:00:00+12:00", + "2018-11-28T00:00:00", + "2018-11-28T00:00:00+06:00", + "2018-11-28T00:00:00", + ], + utc=True, + ) + expected = pd.to_datetime( + [ + "2018-11-28T00:00:00", + "2018-11-27T12:00:00", + "2018-11-28T00:00:00", + "2018-11-27T18:00:00", + "2018-11-28T00:00:00", + ], + utc=True, + ) + tm.assert_index_equal(result, expected) + + items = ["2018-11-28T00:00:00+12:00", "2018-11-28T00:00:00"] + result = pd.to_datetime(items, utc=True) + expected = pd.to_datetime(list(reversed(items)), utc=True)[::-1] + tm.assert_index_equal(result, expected) + + def test_mixed_offsets_with_native_datetime_raises(self): + # GH 25978 + s = pd.Series( + [ + "nan", + pd.Timestamp("1990-01-01"), + "2015-03-14T16:15:14.123-08:00", + "2019-03-04T21:56:32.620-07:00", + None, + ] + ) + with pytest.raises(ValueError, match="Tz-aware datetime.datetime"): + pd.to_datetime(s) + + def test_non_iso_strings_with_tz_offset(self): + result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2) + expected = DatetimeIndex( + [datetime(2018, 3, 1, 12, tzinfo=pytz.FixedOffset(240))] * 2 + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "ts, expected", + [ + (Timestamp("2018-01-01"), Timestamp("2018-01-01", tz="UTC")), + ( + Timestamp("2018-01-01", tz="US/Pacific"), + Timestamp("2018-01-01 08:00", tz="UTC"), + ), + ], + ) + def test_timestamp_utc_true(self, ts, expected): + # GH 24415 + result = to_datetime(ts, utc=True) + assert result == expected + + @pytest.mark.parametrize("dt_str", ["00010101", "13000101", "30000101", "99990101"]) + def test_to_datetime_with_format_out_of_bounds(self, dt_str): + # GH 9107 + with pytest.raises(OutOfBoundsDatetime): + pd.to_datetime(dt_str, format="%Y%m%d") + + +class TestToDatetimeUnit: + @pytest.mark.parametrize("cache", [True, False]) + def test_unit(self, cache): + # GH 11758 + # test proper behavior with errors + + with pytest.raises(ValueError): + to_datetime([1], unit="D", format="%Y%m%d", cache=cache) + + values = [11111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""] + result = to_datetime(values, unit="D", errors="ignore", cache=cache) + expected = Index( + [ + 11111111, + Timestamp("1970-01-02"), + Timestamp("1970-01-02"), + NaT, + NaT, + NaT, + NaT, + NaT, + ], + dtype=object, + ) + tm.assert_index_equal(result, expected) + + result = to_datetime(values, unit="D", errors="coerce", cache=cache) + expected = DatetimeIndex( + ["NaT", "1970-01-02", "1970-01-02", "NaT", "NaT", "NaT", "NaT", "NaT"] + ) + tm.assert_index_equal(result, expected) + + with pytest.raises(tslib.OutOfBoundsDatetime): + to_datetime(values, unit="D", errors="raise", cache=cache) + + values = [1420043460000, iNaT, NaT, np.nan, "NaT"] + + result = to_datetime(values, errors="ignore", unit="s", cache=cache) + expected = Index([1420043460000, NaT, NaT, NaT, NaT], dtype=object) + tm.assert_index_equal(result, expected) + + result = to_datetime(values, errors="coerce", unit="s", cache=cache) + expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"]) + tm.assert_index_equal(result, expected) + + with pytest.raises(tslib.OutOfBoundsDatetime): + to_datetime(values, errors="raise", unit="s", cache=cache) + + # if we have a string, then we raise a ValueError + # and NOT an OutOfBoundsDatetime + for val in ["foo", Timestamp("20130101")]: + try: + to_datetime(val, errors="raise", unit="s", cache=cache) + except tslib.OutOfBoundsDatetime: + raise AssertionError("incorrect exception raised") + except ValueError: + pass + + @pytest.mark.parametrize("cache", [True, False]) + def test_unit_consistency(self, cache): + + # consistency of conversions + expected = Timestamp("1970-05-09 14:25:11") + result = pd.to_datetime(11111111, unit="s", errors="raise", cache=cache) + assert result == expected + assert isinstance(result, Timestamp) + + result = pd.to_datetime(11111111, unit="s", errors="coerce", cache=cache) + assert result == expected + assert isinstance(result, Timestamp) + + result = pd.to_datetime(11111111, unit="s", errors="ignore", cache=cache) + assert result == expected + assert isinstance(result, Timestamp) + + @pytest.mark.parametrize("cache", [True, False]) + def test_unit_with_numeric(self, cache): + + # GH 13180 + # coercions from floats/ints are ok + expected = DatetimeIndex(["2015-06-19 05:33:20", "2015-05-27 22:33:20"]) + arr1 = [1.434692e18, 1.432766e18] + arr2 = np.array(arr1).astype("int64") + for errors in ["ignore", "raise", "coerce"]: + result = pd.to_datetime(arr1, errors=errors, cache=cache) + tm.assert_index_equal(result, expected) + + result = pd.to_datetime(arr2, errors=errors, cache=cache) + tm.assert_index_equal(result, expected) + + # but we want to make sure that we are coercing + # if we have ints/strings + expected = DatetimeIndex(["NaT", "2015-06-19 05:33:20", "2015-05-27 22:33:20"]) + arr = ["foo", 1.434692e18, 1.432766e18] + result = pd.to_datetime(arr, errors="coerce", cache=cache) + tm.assert_index_equal(result, expected) + + expected = DatetimeIndex( + ["2015-06-19 05:33:20", "2015-05-27 22:33:20", "NaT", "NaT"] + ) + arr = [1.434692e18, 1.432766e18, "foo", "NaT"] + result = pd.to_datetime(arr, errors="coerce", cache=cache) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("cache", [True, False]) + def test_unit_mixed(self, cache): + + # mixed integers/datetimes + expected = DatetimeIndex(["2013-01-01", "NaT", "NaT"]) + arr = [pd.Timestamp("20130101"), 1.434692e18, 1.432766e18] + result = pd.to_datetime(arr, errors="coerce", cache=cache) + tm.assert_index_equal(result, expected) + + with pytest.raises(ValueError): + pd.to_datetime(arr, errors="raise", cache=cache) + + expected = DatetimeIndex(["NaT", "NaT", "2013-01-01"]) + arr = [1.434692e18, 1.432766e18, pd.Timestamp("20130101")] + result = pd.to_datetime(arr, errors="coerce", cache=cache) + tm.assert_index_equal(result, expected) + + with pytest.raises(ValueError): + pd.to_datetime(arr, errors="raise", cache=cache) + + @pytest.mark.parametrize("cache", [True, False]) + def test_unit_rounding(self, cache): + # GH 14156: argument will incur floating point errors but no + # premature rounding + result = pd.to_datetime(1434743731.8770001, unit="s", cache=cache) + expected = pd.Timestamp("2015-06-19 19:55:31.877000093") + assert result == expected + + @pytest.mark.parametrize("cache", [True, False]) + def test_unit_ignore_keeps_name(self, cache): + # GH 21697 + expected = pd.Index([15e9] * 2, name="name") + result = pd.to_datetime(expected, errors="ignore", unit="s", cache=cache) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("cache", [True, False]) + def test_dataframe(self, cache): + + df = DataFrame( + { + "year": [2015, 2016], + "month": [2, 3], + "day": [4, 5], + "hour": [6, 7], + "minute": [58, 59], + "second": [10, 11], + "ms": [1, 1], + "us": [2, 2], + "ns": [3, 3], + } + ) + + result = to_datetime( + {"year": df["year"], "month": df["month"], "day": df["day"]}, cache=cache + ) + expected = Series( + [Timestamp("20150204 00:00:00"), Timestamp("20160305 00:0:00")] + ) + tm.assert_series_equal(result, expected) + + # dict-like + result = to_datetime(df[["year", "month", "day"]].to_dict(), cache=cache) + tm.assert_series_equal(result, expected) + + # dict but with constructable + df2 = df[["year", "month", "day"]].to_dict() + df2["month"] = 2 + result = to_datetime(df2, cache=cache) + expected2 = Series( + [Timestamp("20150204 00:00:00"), Timestamp("20160205 00:0:00")] + ) + tm.assert_series_equal(result, expected2) + + # unit mappings + units = [ + { + "year": "years", + "month": "months", + "day": "days", + "hour": "hours", + "minute": "minutes", + "second": "seconds", + }, + { + "year": "year", + "month": "month", + "day": "day", + "hour": "hour", + "minute": "minute", + "second": "second", + }, + ] + + for d in units: + result = to_datetime(df[list(d.keys())].rename(columns=d), cache=cache) + expected = Series( + [Timestamp("20150204 06:58:10"), Timestamp("20160305 07:59:11")] + ) + tm.assert_series_equal(result, expected) + + d = { + "year": "year", + "month": "month", + "day": "day", + "hour": "hour", + "minute": "minute", + "second": "second", + "ms": "ms", + "us": "us", + "ns": "ns", + } + + result = to_datetime(df.rename(columns=d), cache=cache) + expected = Series( + [ + Timestamp("20150204 06:58:10.001002003"), + Timestamp("20160305 07:59:11.001002003"), + ] + ) + tm.assert_series_equal(result, expected) + + # coerce back to int + result = to_datetime(df.astype(str), cache=cache) + tm.assert_series_equal(result, expected) + + # passing coerce + df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]}) + + msg = ( + "cannot assemble the datetimes: time data .+ does not " + r"match format '%Y%m%d' \(match\)" + ) + with pytest.raises(ValueError, match=msg): + to_datetime(df2, cache=cache) + result = to_datetime(df2, errors="coerce", cache=cache) + expected = Series([Timestamp("20150204 00:00:00"), NaT]) + tm.assert_series_equal(result, expected) + + # extra columns + msg = r"extra keys have been passed to the datetime assemblage: \[foo\]" + with pytest.raises(ValueError, match=msg): + df2 = df.copy() + df2["foo"] = 1 + to_datetime(df2, cache=cache) + + # not enough + msg = ( + r"to assemble mappings requires at least that \[year, month, " + r"day\] be specified: \[.+\] is missing" + ) + for c in [ + ["year"], + ["year", "month"], + ["year", "month", "second"], + ["month", "day"], + ["year", "day", "second"], + ]: + with pytest.raises(ValueError, match=msg): + to_datetime(df[c], cache=cache) + + # duplicates + msg = "cannot assemble with duplicate keys" + df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]}) + df2.columns = ["year", "year", "day"] + with pytest.raises(ValueError, match=msg): + to_datetime(df2, cache=cache) + + df2 = DataFrame( + {"year": [2015, 2016], "month": [2, 20], "day": [4, 5], "hour": [4, 5]} + ) + df2.columns = ["year", "month", "day", "day"] + with pytest.raises(ValueError, match=msg): + to_datetime(df2, cache=cache) + + @pytest.mark.parametrize("cache", [True, False]) + def test_dataframe_dtypes(self, cache): + # #13451 + df = DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) + + # int16 + result = to_datetime(df.astype("int16"), cache=cache) + expected = Series( + [Timestamp("20150204 00:00:00"), Timestamp("20160305 00:00:00")] + ) + tm.assert_series_equal(result, expected) + + # mixed dtypes + df["month"] = df["month"].astype("int8") + df["day"] = df["day"].astype("int8") + result = to_datetime(df, cache=cache) + expected = Series( + [Timestamp("20150204 00:00:00"), Timestamp("20160305 00:00:00")] + ) + tm.assert_series_equal(result, expected) + + # float + df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]}) + with pytest.raises(ValueError): + to_datetime(df, cache=cache) + + def test_dataframe_utc_true(self): + # GH 23760 + df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) + result = pd.to_datetime(df, utc=True) + expected = pd.Series( + np.array(["2015-02-04", "2016-03-05"], dtype="datetime64[ns]") + ).dt.tz_localize("UTC") + tm.assert_series_equal(result, expected) + + def test_to_datetime_errors_ignore_utc_true(self): + # GH 23758 + result = pd.to_datetime([1], unit="s", utc=True, errors="ignore") + expected = DatetimeIndex(["1970-01-01 00:00:01"], tz="UTC") + tm.assert_index_equal(result, expected) + + +class TestToDatetimeMisc: + def test_to_datetime_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object) + + with pytest.raises(OutOfBoundsDatetime): + to_datetime(arr) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_iso8601(self, cache): + result = to_datetime(["2012-01-01 00:00:00"], cache=cache) + exp = Timestamp("2012-01-01 00:00:00") + assert result[0] == exp + + result = to_datetime(["20121001"], cache=cache) # bad iso 8601 + exp = Timestamp("2012-10-01") + assert result[0] == exp + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_default(self, cache): + rs = to_datetime("2001", cache=cache) + xp = datetime(2001, 1, 1) + assert rs == xp + + # dayfirst is essentially broken + + # to_datetime('01-13-2012', dayfirst=True) + # pytest.raises(ValueError, to_datetime('01-13-2012', + # dayfirst=True)) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_on_datetime64_series(self, cache): + # #2699 + s = Series(date_range("1/1/2000", periods=10)) + + result = to_datetime(s, cache=cache) + assert result[0] == s[0] + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_with_space_in_series(self, cache): + # GH 6428 + s = Series(["10/18/2006", "10/18/2008", " "]) + msg = r"(\(')?String does not contain a date(:', ' '\))?" + with pytest.raises(ValueError, match=msg): + to_datetime(s, errors="raise", cache=cache) + result_coerce = to_datetime(s, errors="coerce", cache=cache) + expected_coerce = Series([datetime(2006, 10, 18), datetime(2008, 10, 18), NaT]) + tm.assert_series_equal(result_coerce, expected_coerce) + result_ignore = to_datetime(s, errors="ignore", cache=cache) + tm.assert_series_equal(result_ignore, s) + + @td.skip_if_has_locale + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_with_apply(self, cache): + # this is only locale tested with US/None locales + # GH 5195 + # with a format and coerce a single item to_datetime fails + td = Series(["May 04", "Jun 02", "Dec 11"], index=[1, 2, 3]) + expected = pd.to_datetime(td, format="%b %y", cache=cache) + result = td.apply(pd.to_datetime, format="%b %y", cache=cache) + tm.assert_series_equal(result, expected) + + td = pd.Series(["May 04", "Jun 02", ""], index=[1, 2, 3]) + msg = r"time data '' does not match format '%b %y' \(match\)" + with pytest.raises(ValueError, match=msg): + pd.to_datetime(td, format="%b %y", errors="raise", cache=cache) + with pytest.raises(ValueError, match=msg): + td.apply(pd.to_datetime, format="%b %y", errors="raise", cache=cache) + expected = pd.to_datetime(td, format="%b %y", errors="coerce", cache=cache) + + result = td.apply( + lambda x: pd.to_datetime(x, format="%b %y", errors="coerce", cache=cache) + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_types(self, cache): + + # empty string + result = to_datetime("", cache=cache) + assert result is NaT + + result = to_datetime(["", ""], cache=cache) + assert isna(result).all() + + # ints + result = Timestamp(0) + expected = to_datetime(0, cache=cache) + assert result == expected + + # GH 3888 (strings) + expected = to_datetime(["2012"], cache=cache)[0] + result = to_datetime("2012", cache=cache) + assert result == expected + + # array = ['2012','20120101','20120101 12:01:01'] + array = ["20120101", "20120101 12:01:01"] + expected = list(to_datetime(array, cache=cache)) + result = [Timestamp(date_str) for date_str in array] + tm.assert_almost_equal(result, expected) + + # currently fails ### + # result = Timestamp('2012') + # expected = to_datetime('2012') + # assert result == expected + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_unprocessable_input(self, cache): + # GH 4928 + # GH 21864 + result = to_datetime([1, "1"], errors="ignore", cache=cache) + + expected = Index(np.array([1, "1"], dtype="O")) + tm.assert_equal(result, expected) + msg = "invalid string coercion to datetime" + with pytest.raises(TypeError, match=msg): + to_datetime([1, "1"], errors="raise", cache=cache) + + def test_to_datetime_other_datetime64_units(self): + # 5/25/2012 + scalar = np.int64(1337904000000000).view("M8[us]") + as_obj = scalar.astype("O") + + index = DatetimeIndex([scalar]) + assert index[0] == scalar.astype("O") + + value = Timestamp(scalar) + assert value == as_obj + + def test_to_datetime_list_of_integers(self): + rng = date_range("1/1/2000", periods=20) + rng = DatetimeIndex(rng.values) + + ints = list(rng.asi8) + + result = DatetimeIndex(ints) + + tm.assert_index_equal(rng, result) + + def test_to_datetime_overflow(self): + # gh-17637 + # we are overflowing Timedelta range here + + with pytest.raises(OverflowError): + date_range(start="1/1/1700", freq="B", periods=100000) + + @pytest.mark.parametrize("cache", [True, False]) + def test_string_na_nat_conversion(self, cache): + # GH #999, #858 + + strings = np.array( + ["1/1/2000", "1/2/2000", np.nan, "1/4/2000, 12:34:56"], dtype=object + ) + + expected = np.empty(4, dtype="M8[ns]") + for i, val in enumerate(strings): + if isna(val): + expected[i] = iNaT + else: + expected[i] = parse(val) + + result = tslib.array_to_datetime(strings)[0] + tm.assert_almost_equal(result, expected) + + result2 = to_datetime(strings, cache=cache) + assert isinstance(result2, DatetimeIndex) + tm.assert_numpy_array_equal(result, result2.values) + + malformed = np.array(["1/100/2000", np.nan], dtype=object) + + # GH 10636, default is now 'raise' + msg = r"Unknown string format:|day is out of range for month" + with pytest.raises(ValueError, match=msg): + to_datetime(malformed, errors="raise", cache=cache) + + result = to_datetime(malformed, errors="ignore", cache=cache) + # GH 21864 + expected = Index(malformed) + tm.assert_index_equal(result, expected) + + with pytest.raises(ValueError, match=msg): + to_datetime(malformed, errors="raise", cache=cache) + + idx = ["a", "b", "c", "d", "e"] + series = Series( + ["1/1/2000", np.nan, "1/3/2000", np.nan, "1/5/2000"], index=idx, name="foo" + ) + dseries = Series( + [ + to_datetime("1/1/2000", cache=cache), + np.nan, + to_datetime("1/3/2000", cache=cache), + np.nan, + to_datetime("1/5/2000", cache=cache), + ], + index=idx, + name="foo", + ) + + result = to_datetime(series, cache=cache) + dresult = to_datetime(dseries, cache=cache) + + expected = Series(np.empty(5, dtype="M8[ns]"), index=idx) + for i in range(5): + x = series[i] + if isna(x): + expected[i] = iNaT + else: + expected[i] = to_datetime(x, cache=cache) + + tm.assert_series_equal(result, expected, check_names=False) + assert result.name == "foo" + + tm.assert_series_equal(dresult, expected, check_names=False) + assert dresult.name == "foo" + + @pytest.mark.parametrize( + "dtype", + [ + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + ], + ) + @pytest.mark.parametrize("cache", [True, False]) + def test_dti_constructor_numpy_timeunits(self, cache, dtype): + # GH 9114 + base = pd.to_datetime( + ["2000-01-01T00:00", "2000-01-02T00:00", "NaT"], cache=cache + ) + + values = base.values.astype(dtype) + + tm.assert_index_equal(DatetimeIndex(values), base) + tm.assert_index_equal(to_datetime(values, cache=cache), base) + + @pytest.mark.parametrize("cache", [True, False]) + def test_dayfirst(self, cache): + # GH 5917 + arr = ["10/02/2014", "11/02/2014", "12/02/2014"] + expected = DatetimeIndex( + [datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)] + ) + idx1 = DatetimeIndex(arr, dayfirst=True) + idx2 = DatetimeIndex(np.array(arr), dayfirst=True) + idx3 = to_datetime(arr, dayfirst=True, cache=cache) + idx4 = to_datetime(np.array(arr), dayfirst=True, cache=cache) + idx5 = DatetimeIndex(Index(arr), dayfirst=True) + idx6 = DatetimeIndex(Series(arr), dayfirst=True) + tm.assert_index_equal(expected, idx1) + tm.assert_index_equal(expected, idx2) + tm.assert_index_equal(expected, idx3) + tm.assert_index_equal(expected, idx4) + tm.assert_index_equal(expected, idx5) + tm.assert_index_equal(expected, idx6) + + @pytest.mark.parametrize("klass", [DatetimeIndex, DatetimeArray]) + def test_to_datetime_dta_tz(self, klass): + # GH#27733 + dti = date_range("2015-04-05", periods=3).rename("foo") + expected = dti.tz_localize("UTC") + + obj = klass(dti) + expected = klass(expected) + + result = to_datetime(obj, utc=True) + tm.assert_equal(result, expected) + + +class TestGuessDatetimeFormat: + @td.skip_if_not_us_locale + def test_guess_datetime_format_for_array(self): + expected_format = "%Y-%m-%d %H:%M:%S.%f" + dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) + + test_arrays = [ + np.array([dt_string, dt_string, dt_string], dtype="O"), + np.array([np.nan, np.nan, dt_string], dtype="O"), + np.array([dt_string, "random_string"], dtype="O"), + ] + + for test_array in test_arrays: + assert tools._guess_datetime_format_for_array(test_array) == expected_format + + format_for_string_of_nans = tools._guess_datetime_format_for_array( + np.array([np.nan, np.nan, np.nan], dtype="O") + ) + assert format_for_string_of_nans is None + + +class TestToDatetimeInferFormat: + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_infer_datetime_format_consistent_format(self, cache): + s = pd.Series(pd.date_range("20000101", periods=50, freq="H")) + + test_formats = ["%m-%d-%Y", "%m/%d/%Y %H:%M:%S.%f", "%Y-%m-%dT%H:%M:%S.%f"] + + for test_format in test_formats: + s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) + + with_format = pd.to_datetime( + s_as_dt_strings, format=test_format, cache=cache + ) + no_infer = pd.to_datetime( + s_as_dt_strings, infer_datetime_format=False, cache=cache + ) + yes_infer = pd.to_datetime( + s_as_dt_strings, infer_datetime_format=True, cache=cache + ) + + # Whether the format is explicitly passed, it is inferred, or + # it is not inferred, the results should all be the same + tm.assert_series_equal(with_format, no_infer) + tm.assert_series_equal(no_infer, yes_infer) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_infer_datetime_format_inconsistent_format(self, cache): + s = pd.Series( + np.array( + ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"] + ) + ) + + # When the format is inconsistent, infer_datetime_format should just + # fallback to the default parsing + tm.assert_series_equal( + pd.to_datetime(s, infer_datetime_format=False, cache=cache), + pd.to_datetime(s, infer_datetime_format=True, cache=cache), + ) + + s = pd.Series(np.array(["Jan/01/2011", "Feb/01/2011", "Mar/01/2011"])) + + tm.assert_series_equal( + pd.to_datetime(s, infer_datetime_format=False, cache=cache), + pd.to_datetime(s, infer_datetime_format=True, cache=cache), + ) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_infer_datetime_format_series_with_nans(self, cache): + s = pd.Series( + np.array(["01/01/2011 00:00:00", np.nan, "01/03/2011 00:00:00", np.nan]) + ) + tm.assert_series_equal( + pd.to_datetime(s, infer_datetime_format=False, cache=cache), + pd.to_datetime(s, infer_datetime_format=True, cache=cache), + ) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_infer_datetime_format_series_start_with_nans(self, cache): + s = pd.Series( + np.array( + [ + np.nan, + np.nan, + "01/01/2011 00:00:00", + "01/02/2011 00:00:00", + "01/03/2011 00:00:00", + ] + ) + ) + + tm.assert_series_equal( + pd.to_datetime(s, infer_datetime_format=False, cache=cache), + pd.to_datetime(s, infer_datetime_format=True, cache=cache), + ) + + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_iso8601_noleading_0s(self, cache): + # GH 11871 + s = pd.Series(["2014-1-1", "2014-2-2", "2015-3-3"]) + expected = pd.Series( + [ + pd.Timestamp("2014-01-01"), + pd.Timestamp("2014-02-02"), + pd.Timestamp("2015-03-03"), + ] + ) + tm.assert_series_equal(pd.to_datetime(s, cache=cache), expected) + tm.assert_series_equal( + pd.to_datetime(s, format="%Y-%m-%d", cache=cache), expected + ) + + +class TestDaysInMonth: + # tests for issue #10154 + + @pytest.mark.parametrize("cache", [True, False]) + def test_day_not_in_month_coerce(self, cache): + assert isna(to_datetime("2015-02-29", errors="coerce", cache=cache)) + assert isna( + to_datetime("2015-02-29", format="%Y-%m-%d", errors="coerce", cache=cache) + ) + assert isna( + to_datetime("2015-02-32", format="%Y-%m-%d", errors="coerce", cache=cache) + ) + assert isna( + to_datetime("2015-04-31", format="%Y-%m-%d", errors="coerce", cache=cache) + ) + + @pytest.mark.parametrize("cache", [True, False]) + def test_day_not_in_month_raise(self, cache): + msg = "day is out of range for month" + with pytest.raises(ValueError, match=msg): + to_datetime("2015-02-29", errors="raise", cache=cache) + + msg = "time data 2015-02-29 doesn't match format specified" + with pytest.raises(ValueError, match=msg): + to_datetime("2015-02-29", errors="raise", format="%Y-%m-%d", cache=cache) + + msg = "time data 2015-02-32 doesn't match format specified" + with pytest.raises(ValueError, match=msg): + to_datetime("2015-02-32", errors="raise", format="%Y-%m-%d", cache=cache) + + msg = "time data 2015-04-31 doesn't match format specified" + with pytest.raises(ValueError, match=msg): + to_datetime("2015-04-31", errors="raise", format="%Y-%m-%d", cache=cache) + + @pytest.mark.parametrize("cache", [True, False]) + def test_day_not_in_month_ignore(self, cache): + assert to_datetime("2015-02-29", errors="ignore", cache=cache) == "2015-02-29" + assert ( + to_datetime("2015-02-29", errors="ignore", format="%Y-%m-%d", cache=cache) + == "2015-02-29" + ) + assert ( + to_datetime("2015-02-32", errors="ignore", format="%Y-%m-%d", cache=cache) + == "2015-02-32" + ) + assert ( + to_datetime("2015-04-31", errors="ignore", format="%Y-%m-%d", cache=cache) + == "2015-04-31" + ) + + +class TestDatetimeParsingWrappers: + @pytest.mark.parametrize( + "date_str,expected", + list( + { + "2011-01-01": datetime(2011, 1, 1), + "2Q2005": datetime(2005, 4, 1), + "2Q05": datetime(2005, 4, 1), + "2005Q1": datetime(2005, 1, 1), + "05Q1": datetime(2005, 1, 1), + "2011Q3": datetime(2011, 7, 1), + "11Q3": datetime(2011, 7, 1), + "3Q2011": datetime(2011, 7, 1), + "3Q11": datetime(2011, 7, 1), + # quarterly without space + "2000Q4": datetime(2000, 10, 1), + "00Q4": datetime(2000, 10, 1), + "4Q2000": datetime(2000, 10, 1), + "4Q00": datetime(2000, 10, 1), + "2000q4": datetime(2000, 10, 1), + "2000-Q4": datetime(2000, 10, 1), + "00-Q4": datetime(2000, 10, 1), + "4Q-2000": datetime(2000, 10, 1), + "4Q-00": datetime(2000, 10, 1), + "00q4": datetime(2000, 10, 1), + "2005": datetime(2005, 1, 1), + "2005-11": datetime(2005, 11, 1), + "2005 11": datetime(2005, 11, 1), + "11-2005": datetime(2005, 11, 1), + "11 2005": datetime(2005, 11, 1), + "200511": datetime(2020, 5, 11), + "20051109": datetime(2005, 11, 9), + "20051109 10:15": datetime(2005, 11, 9, 10, 15), + "20051109 08H": datetime(2005, 11, 9, 8, 0), + "2005-11-09 10:15": datetime(2005, 11, 9, 10, 15), + "2005-11-09 08H": datetime(2005, 11, 9, 8, 0), + "2005/11/09 10:15": datetime(2005, 11, 9, 10, 15), + "2005/11/09 08H": datetime(2005, 11, 9, 8, 0), + "Thu Sep 25 10:36:28 2003": datetime(2003, 9, 25, 10, 36, 28), + "Thu Sep 25 2003": datetime(2003, 9, 25), + "Sep 25 2003": datetime(2003, 9, 25), + "January 1 2014": datetime(2014, 1, 1), + # GHE10537 + "2014-06": datetime(2014, 6, 1), + "06-2014": datetime(2014, 6, 1), + "2014-6": datetime(2014, 6, 1), + "6-2014": datetime(2014, 6, 1), + "20010101 12": datetime(2001, 1, 1, 12), + "20010101 1234": datetime(2001, 1, 1, 12, 34), + "20010101 123456": datetime(2001, 1, 1, 12, 34, 56), + }.items() + ), + ) + @pytest.mark.parametrize("cache", [True, False]) + def test_parsers(self, date_str, expected, cache): + + # dateutil >= 2.5.0 defaults to yearfirst=True + # https://github.com/dateutil/dateutil/issues/217 + yearfirst = True + + result1, _, _ = parsing.parse_time_string(date_str, yearfirst=yearfirst) + result2 = to_datetime(date_str, yearfirst=yearfirst) + result3 = to_datetime([date_str], yearfirst=yearfirst) + # result5 is used below + result4 = to_datetime( + np.array([date_str], dtype=object), yearfirst=yearfirst, cache=cache + ) + result6 = DatetimeIndex([date_str], yearfirst=yearfirst) + # result7 is used below + result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) + result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst) + + for res in [result1, result2]: + assert res == expected + for res in [result3, result4, result6, result8, result9]: + exp = DatetimeIndex([pd.Timestamp(expected)]) + tm.assert_index_equal(res, exp) + + # these really need to have yearfirst, but we don't support + if not yearfirst: + result5 = Timestamp(date_str) + assert result5 == expected + result7 = date_range(date_str, freq="S", periods=1, yearfirst=yearfirst) + assert result7 == expected + + @pytest.mark.parametrize("cache", [True, False]) + def test_na_values_with_cache( + self, cache, unique_nulls_fixture, unique_nulls_fixture2 + ): + # GH22305 + expected = Index([NaT, NaT], dtype="datetime64[ns]") + result = to_datetime([unique_nulls_fixture, unique_nulls_fixture2], cache=cache) + tm.assert_index_equal(result, expected) + + def test_parsers_nat(self): + # Test that each of several string-accepting methods return pd.NaT + result1, _, _ = parsing.parse_time_string("NaT") + result2 = to_datetime("NaT") + result3 = Timestamp("NaT") + result4 = DatetimeIndex(["NaT"])[0] + assert result1 is NaT + assert result2 is NaT + assert result3 is NaT + assert result4 is NaT + + @pytest.mark.parametrize("cache", [True, False]) + def test_parsers_dayfirst_yearfirst(self, cache): + # OK + # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 + # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 + # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 + + # OK + # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 + + # bug fix in 2.5.2 + # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00 + # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 + # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 + + # OK + # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 + + # revert of bug in 2.5.2 + # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 + # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12 + # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 + + # OK + # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 + + # str : dayfirst, yearfirst, expected + cases = { + "10-11-12": [ + (False, False, datetime(2012, 10, 11)), + (True, False, datetime(2012, 11, 10)), + (False, True, datetime(2010, 11, 12)), + (True, True, datetime(2010, 12, 11)), + ], + "20/12/21": [ + (False, False, datetime(2021, 12, 20)), + (True, False, datetime(2021, 12, 20)), + (False, True, datetime(2020, 12, 21)), + (True, True, datetime(2020, 12, 21)), + ], + } + + for date_str, values in cases.items(): + for dayfirst, yearfirst, expected in values: + + # compare with dateutil result + dateutil_result = parse( + date_str, dayfirst=dayfirst, yearfirst=yearfirst + ) + assert dateutil_result == expected + + result1, _, _ = parsing.parse_time_string( + date_str, dayfirst=dayfirst, yearfirst=yearfirst + ) + + # we don't support dayfirst/yearfirst here: + if not dayfirst and not yearfirst: + result2 = Timestamp(date_str) + assert result2 == expected + + result3 = to_datetime( + date_str, dayfirst=dayfirst, yearfirst=yearfirst, cache=cache + ) + + result4 = DatetimeIndex( + [date_str], dayfirst=dayfirst, yearfirst=yearfirst + )[0] + + assert result1 == expected + assert result3 == expected + assert result4 == expected + + @pytest.mark.parametrize("cache", [True, False]) + def test_parsers_timestring(self, cache): + # must be the same as dateutil result + cases = { + "10:15": (parse("10:15"), datetime(1, 1, 1, 10, 15)), + "9:05": (parse("9:05"), datetime(1, 1, 1, 9, 5)), + } + + for date_str, (exp_now, exp_def) in cases.items(): + result1, _, _ = parsing.parse_time_string(date_str) + result2 = to_datetime(date_str) + result3 = to_datetime([date_str]) + result4 = Timestamp(date_str) + result5 = DatetimeIndex([date_str])[0] + # parse time string return time string based on default date + # others are not, and can't be changed because it is used in + # time series plot + assert result1 == exp_def + assert result2 == exp_now + assert result3 == exp_now + assert result4 == exp_now + assert result5 == exp_now + + @td.skip_if_has_locale + def test_parsers_time(self): + # GH11818 + strings = [ + "14:15", + "1415", + "2:15pm", + "0215pm", + "14:15:00", + "141500", + "2:15:00pm", + "021500pm", + time(14, 15), + ] + expected = time(14, 15) + + for time_string in strings: + assert tools.to_time(time_string) == expected + + new_string = "14.15" + msg = r"Cannot convert arg \['14\.15'\] to a time" + with pytest.raises(ValueError, match=msg): + tools.to_time(new_string) + assert tools.to_time(new_string, format="%H.%M") == expected + + arg = ["14:15", "20:20"] + expected_arr = [time(14, 15), time(20, 20)] + assert tools.to_time(arg) == expected_arr + assert tools.to_time(arg, format="%H:%M") == expected_arr + assert tools.to_time(arg, infer_time_format=True) == expected_arr + assert tools.to_time(arg, format="%I:%M%p", errors="coerce") == [None, None] + + res = tools.to_time(arg, format="%I:%M%p", errors="ignore") + tm.assert_numpy_array_equal(res, np.array(arg, dtype=np.object_)) + + with pytest.raises(ValueError): + tools.to_time(arg, format="%I:%M%p", errors="raise") + + tm.assert_series_equal( + tools.to_time(Series(arg, name="test")), Series(expected_arr, name="test") + ) + + res = tools.to_time(np.array(arg)) + assert isinstance(res, list) + assert res == expected_arr + + @pytest.mark.parametrize("cache", [True, False]) + @pytest.mark.parametrize( + "dt_string, tz, dt_string_repr", + [ + ( + "2013-01-01 05:45+0545", + pytz.FixedOffset(345), + "Timestamp('2013-01-01 05:45:00+0545', tz='pytz.FixedOffset(345)')", + ), + ( + "2013-01-01 05:30+0530", + pytz.FixedOffset(330), + "Timestamp('2013-01-01 05:30:00+0530', tz='pytz.FixedOffset(330)')", + ), + ], + ) + def test_parsers_timezone_minute_offsets_roundtrip( + self, cache, dt_string, tz, dt_string_repr + ): + # GH11708 + base = to_datetime("2013-01-01 00:00:00", cache=cache) + base = base.tz_localize("UTC").tz_convert(tz) + dt_time = to_datetime(dt_string, cache=cache) + assert base == dt_time + assert dt_string_repr == repr(dt_time) + + +@pytest.fixture(params=["D", "s", "ms", "us", "ns"]) +def units(request): + """Day and some time units. + + * D + * s + * ms + * us + * ns + """ + return request.param + + +@pytest.fixture +def epoch_1960(): + """Timestamp at 1960-01-01.""" + return Timestamp("1960-01-01") + + +@pytest.fixture +def units_from_epochs(): + return list(range(5)) + + +@pytest.fixture(params=["timestamp", "pydatetime", "datetime64", "str_1960"]) +def epochs(epoch_1960, request): + """Timestamp at 1960-01-01 in various forms. + + * pd.Timestamp + * datetime.datetime + * numpy.datetime64 + * str + """ + assert request.param in {"timestamp", "pydatetime", "datetime64", "str_1960"} + if request.param == "timestamp": + return epoch_1960 + elif request.param == "pydatetime": + return epoch_1960.to_pydatetime() + elif request.param == "datetime64": + return epoch_1960.to_datetime64() + else: + return str(epoch_1960) + + +@pytest.fixture +def julian_dates(): + return pd.date_range("2014-1-1", periods=10).to_julian_date().values + + +class TestOrigin: + def test_to_basic(self, julian_dates): + # gh-11276, gh-11745 + # for origin as julian + + result = Series(pd.to_datetime(julian_dates, unit="D", origin="julian")) + expected = Series( + pd.to_datetime(julian_dates - pd.Timestamp(0).to_julian_date(), unit="D") + ) + tm.assert_series_equal(result, expected) + + result = Series(pd.to_datetime([0, 1, 2], unit="D", origin="unix")) + expected = Series( + [Timestamp("1970-01-01"), Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ) + tm.assert_series_equal(result, expected) + + # default + result = Series(pd.to_datetime([0, 1, 2], unit="D")) + expected = Series( + [Timestamp("1970-01-01"), Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ) + tm.assert_series_equal(result, expected) + + def test_julian_round_trip(self): + result = pd.to_datetime(2456658, origin="julian", unit="D") + assert result.to_julian_date() == 2456658 + + # out-of-bounds + with pytest.raises(ValueError): + pd.to_datetime(1, origin="julian", unit="D") + + def test_invalid_unit(self, units, julian_dates): + + # checking for invalid combination of origin='julian' and unit != D + if units != "D": + with pytest.raises(ValueError): + pd.to_datetime(julian_dates, unit=units, origin="julian") + + def test_invalid_origin(self): + + # need to have a numeric specified + with pytest.raises(ValueError): + pd.to_datetime("2005-01-01", origin="1960-01-01") + + with pytest.raises(ValueError): + pd.to_datetime("2005-01-01", origin="1960-01-01", unit="D") + + def test_epoch(self, units, epochs, epoch_1960, units_from_epochs): + + expected = Series( + [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs] + ) + + result = Series(pd.to_datetime(units_from_epochs, unit=units, origin=epochs)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "origin, exc", + [ + ("random_string", ValueError), + ("epoch", ValueError), + ("13-24-1990", ValueError), + (datetime(1, 1, 1), tslib.OutOfBoundsDatetime), + ], + ) + def test_invalid_origins(self, origin, exc, units, units_from_epochs): + + with pytest.raises(exc): + pd.to_datetime(units_from_epochs, unit=units, origin=origin) + + def test_invalid_origins_tzinfo(self): + # GH16842 + with pytest.raises(ValueError): + pd.to_datetime(1, unit="D", origin=datetime(2000, 1, 1, tzinfo=pytz.utc)) + + @pytest.mark.parametrize("format", [None, "%Y-%m-%d %H:%M:%S"]) + def test_to_datetime_out_of_bounds_with_format_arg(self, format): + # see gh-23830 + msg = "Out of bounds nanosecond timestamp" + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime("2417-10-27 00:00:00", format=format) + + def test_processing_order(self): + # make sure we handle out-of-bounds *before* + # constructing the dates + + result = pd.to_datetime(200 * 365, unit="D") + expected = Timestamp("2169-11-13 00:00:00") + assert result == expected + + result = pd.to_datetime(200 * 365, unit="D", origin="1870-01-01") + expected = Timestamp("2069-11-13 00:00:00") + assert result == expected + + result = pd.to_datetime(300 * 365, unit="D", origin="1870-01-01") + expected = Timestamp("2169-10-20 00:00:00") + assert result == expected + + @pytest.mark.parametrize( + "offset,utc,exp", + [ + ["Z", True, "2019-01-01T00:00:00.000Z"], + ["Z", None, "2019-01-01T00:00:00.000Z"], + ["-01:00", True, "2019-01-01T01:00:00.000Z"], + ["-01:00", None, "2019-01-01T00:00:00.000-01:00"], + ], + ) + def test_arg_tz_ns_unit(self, offset, utc, exp): + # GH 25546 + arg = "2019-01-01T00:00:00.000" + offset + result = to_datetime([arg], unit="ns", utc=utc) + expected = to_datetime([exp]) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "listlike,do_caching", + [([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], False), ([1, 1, 1, 1, 4, 5, 6, 7, 8, 9], True)], +) +def test_should_cache(listlike, do_caching): + assert ( + tools.should_cache(listlike, check_count=len(listlike), unique_share=0.7) + == do_caching + ) + + +@pytest.mark.parametrize( + "unique_share,check_count, err_message", + [ + (0.5, 11, r"check_count must be in next bounds: \[0; len\(arg\)\]"), + (10, 2, r"unique_share must be in next bounds: \(0; 1\)"), + ], +) +def test_should_cache_errors(unique_share, check_count, err_message): + arg = [5] * 10 + + with pytest.raises(AssertionError, match=err_message): + tools.should_cache(arg, unique_share, check_count) + + +def test_nullable_integer_to_datetime(): + # Test for #30050 + ser = pd.Series([1, 2, None, 2 ** 61, None]) + ser = ser.astype("Int64") + ser_copy = ser.copy() + + res = pd.to_datetime(ser, unit="ns") + + expected = pd.Series( + [ + np.datetime64("1970-01-01 00:00:00.000000001"), + np.datetime64("1970-01-01 00:00:00.000000002"), + np.datetime64("NaT"), + np.datetime64("2043-01-25 23:56:49.213693952"), + np.datetime64("NaT"), + ] + ) + tm.assert_series_equal(res, expected) + # Check that ser isn't mutated + tm.assert_series_equal(ser, ser_copy) + + +@pytest.mark.parametrize("klass", [np.array, list]) +def test_na_to_datetime(nulls_fixture, klass): + result = pd.to_datetime(klass([nulls_fixture])) + + assert result[0] is pd.NaT diff --git a/pandas/tests/indexes/interval/__init__.py b/pandas/tests/indexes/interval/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py new file mode 100644 index 00000000..c94af6c0 --- /dev/null +++ b/pandas/tests/indexes/interval/test_astype.py @@ -0,0 +1,223 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype, IntervalDtype + +from pandas import ( + CategoricalIndex, + Index, + IntervalIndex, + NaT, + Timedelta, + Timestamp, + interval_range, +) +import pandas._testing as tm + + +class Base: + """Tests common to IntervalIndex with any subtype""" + + def test_astype_idempotent(self, index): + result = index.astype("interval") + tm.assert_index_equal(result, index) + + result = index.astype(index.dtype) + tm.assert_index_equal(result, index) + + def test_astype_object(self, index): + result = index.astype(object) + expected = Index(index.values, dtype="object") + tm.assert_index_equal(result, expected) + assert not result.equals(index) + + def test_astype_category(self, index): + result = index.astype("category") + expected = CategoricalIndex(index.values) + tm.assert_index_equal(result, expected) + + result = index.astype(CategoricalDtype()) + tm.assert_index_equal(result, expected) + + # non-default params + categories = index.dropna().unique().values[:-1] + dtype = CategoricalDtype(categories=categories, ordered=True) + result = index.astype(dtype) + expected = CategoricalIndex(index.values, categories=categories, ordered=True) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [ + "int64", + "uint64", + "float64", + "complex128", + "period[M]", + "timedelta64", + "timedelta64[ns]", + "datetime64", + "datetime64[ns]", + "datetime64[ns, US/Eastern]", + ], + ) + def test_astype_cannot_cast(self, index, dtype): + msg = "Cannot cast IntervalIndex to dtype" + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + def test_astype_invalid_dtype(self, index): + msg = "data type [\"']fake_dtype[\"'] not understood" + with pytest.raises(TypeError, match=msg): + index.astype("fake_dtype") + + +class TestIntSubtype(Base): + """Tests specific to IntervalIndex with integer-like subtype""" + + indexes = [ + IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")), + IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize( + "subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"] + ) + def test_subtype_conversion(self, index, subtype): + dtype = IntervalDtype(subtype) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")] + ) + def test_subtype_integer(self, subtype_start, subtype_end): + index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start)) + dtype = IntervalDtype(subtype_end) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype_end), + index.right.astype(subtype_end), + closed=index.closed, + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(reason="GH#15832") + def test_subtype_integer_errors(self): + # int64 -> uint64 fails with negative values + index = interval_range(-10, 10) + dtype = IntervalDtype("uint64") + with pytest.raises(ValueError): + index.astype(dtype) + + +class TestFloatSubtype(Base): + """Tests specific to IntervalIndex with float subtype""" + + indexes = [ + interval_range(-10.0, 10.0, closed="neither"), + IntervalIndex.from_arrays( + [-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both" + ), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize("subtype", ["int64", "uint64"]) + def test_subtype_integer(self, subtype): + index = interval_range(0.0, 10.0) + dtype = IntervalDtype(subtype) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + # raises with NA + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + index.insert(0, np.nan).astype(dtype) + + @pytest.mark.xfail(reason="GH#15832") + def test_subtype_integer_errors(self): + # float64 -> uint64 fails with negative values + index = interval_range(-10.0, 10.0) + dtype = IntervalDtype("uint64") + with pytest.raises(ValueError): + index.astype(dtype) + + # float64 -> integer-like fails with non-integer valued floats + index = interval_range(0.0, 10.0, freq=0.25) + dtype = IntervalDtype("int64") + with pytest.raises(ValueError): + index.astype(dtype) + + dtype = IntervalDtype("uint64") + with pytest.raises(ValueError): + index.astype(dtype) + + @pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"]) + def test_subtype_datetimelike(self, index, subtype): + dtype = IntervalDtype(subtype) + msg = "Cannot convert .* to .*; subtypes are incompatible" + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + +class TestDatetimelikeSubtype(Base): + """Tests specific to IntervalIndex with datetime-like subtype""" + + indexes = [ + interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"), + interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT), + interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10), + interval_range(Timedelta("0 days"), periods=10, closed="both"), + interval_range(Timedelta("0 days"), periods=10).insert(2, NaT), + ] + + @pytest.fixture(params=indexes) + def index(self, request): + return request.param + + @pytest.mark.parametrize("subtype", ["int64", "uint64"]) + def test_subtype_integer(self, index, subtype): + dtype = IntervalDtype(subtype) + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), index.right.astype(subtype), closed=index.closed + ) + tm.assert_index_equal(result, expected) + + def test_subtype_float(self, index): + dtype = IntervalDtype("float64") + msg = "Cannot convert .* to .*; subtypes are incompatible" + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + def test_subtype_datetimelike(self): + # datetime -> timedelta raises + dtype = IntervalDtype("timedelta64[ns]") + msg = "Cannot convert .* to .*; subtypes are incompatible" + + index = interval_range(Timestamp("2018-01-01"), periods=10) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) + + # timedelta -> datetime raises + dtype = IntervalDtype("datetime64[ns]") + index = interval_range(Timedelta("0 days"), periods=10) + with pytest.raises(TypeError, match=msg): + index.astype(dtype) diff --git a/pandas/tests/indexes/interval/test_base.py b/pandas/tests/indexes/interval/test_base.py new file mode 100644 index 00000000..d8c2ba84 --- /dev/null +++ b/pandas/tests/indexes/interval/test_base.py @@ -0,0 +1,88 @@ +import numpy as np +import pytest + +from pandas import IntervalIndex, Series, date_range +import pandas._testing as tm +from pandas.tests.indexes.common import Base + + +class TestBase(Base): + """ + Tests specific to the shared common index tests; unrelated tests should be placed + in test_interval.py or the specific test file (e.g. test_astype.py) + """ + + _holder = IntervalIndex + + @pytest.fixture + def indices(self): + return tm.makeIntervalIndex(10) + + def create_index(self, closed="right"): + return IntervalIndex.from_breaks(range(11), closed=closed) + + def test_equals(self, closed): + expected = IntervalIndex.from_breaks(np.arange(5), closed=closed) + assert expected.equals(expected) + assert expected.equals(expected.copy()) + + assert not expected.equals(expected.astype(object)) + assert not expected.equals(np.array(expected)) + assert not expected.equals(list(expected)) + + assert not expected.equals([1, 2]) + assert not expected.equals(np.array([1, 2])) + assert not expected.equals(date_range("20130101", periods=2)) + + expected_name1 = IntervalIndex.from_breaks( + np.arange(5), closed=closed, name="foo" + ) + expected_name2 = IntervalIndex.from_breaks( + np.arange(5), closed=closed, name="bar" + ) + assert expected.equals(expected_name1) + assert expected_name1.equals(expected_name2) + + for other_closed in {"left", "right", "both", "neither"} - {closed}: + expected_other_closed = IntervalIndex.from_breaks( + np.arange(5), closed=other_closed + ) + assert not expected.equals(expected_other_closed) + + def test_repr_max_seq_item_setting(self): + # override base test: not a valid repr as we use interval notation + pass + + def test_repr_roundtrip(self): + # override base test: not a valid repr as we use interval notation + pass + + def test_take(self, closed): + index = self.create_index(closed=closed) + + result = index.take(range(10)) + tm.assert_index_equal(result, index) + + result = index.take([0, 0, 1]) + expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [list, tuple, np.array, Series]) + def test_where(self, closed, klass): + idx = self.create_index(closed=closed) + cond = [True] * len(idx) + expected = idx + result = expected.where(klass(cond)) + tm.assert_index_equal(result, expected) + + cond = [False] + [True] * len(idx[1:]) + expected = IntervalIndex([np.nan] + idx[1:].tolist()) + result = idx.where(klass(cond)) + tm.assert_index_equal(result, expected) + + def test_getitem_2d_deprecated(self): + # GH#30588 multi-dim indexing is deprecated, but raising is also acceptable + idx = self.create_index() + with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + idx[:, None] diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py new file mode 100644 index 00000000..837c124d --- /dev/null +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -0,0 +1,423 @@ +from functools import partial + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_categorical_dtype +from pandas.core.dtypes.dtypes import IntervalDtype + +from pandas import ( + Categorical, + CategoricalIndex, + Float64Index, + Index, + Int64Index, + Interval, + IntervalIndex, + date_range, + notna, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays import IntervalArray +import pandas.core.common as com + + +@pytest.fixture(params=[None, "foo"]) +def name(request): + return request.param + + +class Base: + """ + Common tests for all variations of IntervalIndex construction. Input data + to be supplied in breaks format, then converted by the subclass method + get_kwargs_from_breaks to the expected format. + """ + + @pytest.mark.parametrize( + "breaks", + [ + [3, 14, 15, 92, 653], + np.arange(10, dtype="int64"), + Int64Index(range(-10, 11)), + Float64Index(np.arange(20, 30, 0.5)), + date_range("20180101", periods=10), + date_range("20180101", periods=10, tz="US/Eastern"), + timedelta_range("1 day", periods=10), + ], + ) + def test_constructor(self, constructor, breaks, closed, name): + result_kwargs = self.get_kwargs_from_breaks(breaks, closed) + result = constructor(closed=closed, name=name, **result_kwargs) + + assert result.closed == closed + assert result.name == name + assert result.dtype.subtype == getattr(breaks, "dtype", "int64") + tm.assert_index_equal(result.left, Index(breaks[:-1])) + tm.assert_index_equal(result.right, Index(breaks[1:])) + + @pytest.mark.parametrize( + "breaks, subtype", + [ + (Int64Index([0, 1, 2, 3, 4]), "float64"), + (Int64Index([0, 1, 2, 3, 4]), "datetime64[ns]"), + (Int64Index([0, 1, 2, 3, 4]), "timedelta64[ns]"), + (Float64Index([0, 1, 2, 3, 4]), "int64"), + (date_range("2017-01-01", periods=5), "int64"), + (timedelta_range("1 day", periods=5), "int64"), + ], + ) + def test_constructor_dtype(self, constructor, breaks, subtype): + # GH 19262: conversion via dtype parameter + expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) + expected = constructor(**expected_kwargs) + + result_kwargs = self.get_kwargs_from_breaks(breaks) + iv_dtype = IntervalDtype(subtype) + for dtype in (iv_dtype, str(iv_dtype)): + result = constructor(dtype=dtype, **result_kwargs) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) + def test_constructor_nan(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_subtype = np.float64 + expected_values = np.array(breaks[:-1], dtype=object) + + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(result._ndarray_values, expected_values) + + @pytest.mark.parametrize( + "breaks", + [ + [], + np.array([], dtype="int64"), + np.array([], dtype="float64"), + np.array([], dtype="datetime64[ns]"), + np.array([], dtype="timedelta64[ns]"), + ], + ) + def test_constructor_empty(self, constructor, breaks, closed): + # GH 18421 + result_kwargs = self.get_kwargs_from_breaks(breaks) + result = constructor(closed=closed, **result_kwargs) + + expected_values = np.array([], dtype=object) + expected_subtype = getattr(breaks, "dtype", np.int64) + + assert result.empty + assert result.closed == closed + assert result.dtype.subtype == expected_subtype + tm.assert_numpy_array_equal(result._ndarray_values, expected_values) + + @pytest.mark.parametrize( + "breaks", + [ + tuple("0123456789"), + list("abcdefghij"), + np.array(list("abcdefghij"), dtype=object), + np.array(list("abcdefghij"), dtype=" Interval(0.5, 1.5) + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == self.index + expected = np.array([True, True]) + tm.assert_numpy_array_equal(actual, expected) + actual = self.index <= self.index + tm.assert_numpy_array_equal(actual, expected) + actual = self.index >= self.index + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index < self.index + expected = np.array([False, False]) + tm.assert_numpy_array_equal(actual, expected) + actual = self.index > self.index + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") + tm.assert_numpy_array_equal(actual, expected) + + actual = self.index == self.index.values + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index.values == self.index + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index <= self.index.values + tm.assert_numpy_array_equal(actual, np.array([True, True])) + actual = self.index != self.index.values + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index > self.index.values + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index.values > self.index + tm.assert_numpy_array_equal(actual, np.array([False, False])) + + # invalid comparisons + actual = self.index == 0 + tm.assert_numpy_array_equal(actual, np.array([False, False])) + actual = self.index == self.index.left + tm.assert_numpy_array_equal(actual, np.array([False, False])) + + with pytest.raises(TypeError, match="unorderable types"): + self.index > 0 + with pytest.raises(TypeError, match="unorderable types"): + self.index <= 0 + msg = r"unorderable types: Interval\(\) > int\(\)" + with pytest.raises(TypeError, match=msg): + self.index > np.arange(2) + msg = "Lengths must match to compare" + with pytest.raises(ValueError, match=msg): + self.index > np.arange(3) + + def test_missing_values(self, closed): + idx = Index( + [np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)] + ) + idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) + assert idx.equals(idx2) + + msg = ( + "missing values must be missing in the same location both left " + "and right sides" + ) + with pytest.raises(ValueError, match=msg): + IntervalIndex.from_arrays( + [np.nan, 0, 1], np.array([0, 1, 2]), closed=closed + ) + + tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) + + def test_sort_values(self, closed): + index = self.create_index(closed=closed) + + result = index.sort_values() + tm.assert_index_equal(result, index) + + result = index.sort_values(ascending=False) + tm.assert_index_equal(result, index[::-1]) + + # with nan + index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) + + result = index.sort_values() + expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) + tm.assert_index_equal(result, expected) + + result = index.sort_values(ascending=False) + expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + def test_datetime(self, tz): + start = Timestamp("2000-01-01", tz=tz) + dates = date_range(start=start, periods=10) + index = IntervalIndex.from_breaks(dates) + + # test mid + start = Timestamp("2000-01-01T12:00", tz=tz) + expected = date_range(start=start, periods=9) + tm.assert_index_equal(index.mid, expected) + + # __contains__ doesn't check individual points + assert Timestamp("2000-01-01", tz=tz) not in index + assert Timestamp("2000-01-01T12", tz=tz) not in index + assert Timestamp("2000-01-02", tz=tz) not in index + iv_true = Interval( + Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz) + ) + iv_false = Interval( + Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz) + ) + assert iv_true in index + assert iv_false not in index + + # .contains does check individual points + assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() + assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() + assert index.contains(Timestamp("2000-01-02", tz=tz)).any() + + # test get_indexer + start = Timestamp("1999-12-31T12:00", tz=tz) + target = date_range(start=start, periods=7, freq="12H") + actual = index.get_indexer(target) + expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") + tm.assert_numpy_array_equal(actual, expected) + + start = Timestamp("2000-01-08T18:00", tz=tz) + target = date_range(start=start, periods=7, freq="6H") + actual = index.get_indexer(target) + expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") + tm.assert_numpy_array_equal(actual, expected) + + def test_append(self, closed): + + index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) + index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) + + result = index1.append(index2) + expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) + tm.assert_index_equal(result, expected) + + result = index1.append([index1, index2]) + expected = IntervalIndex.from_arrays( + [0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed + ) + tm.assert_index_equal(result, expected) + + msg = ( + "can only append two IntervalIndex objects that are closed " + "on the same side" + ) + for other_closed in {"left", "right", "both", "neither"} - {closed}: + index_other_closed = IntervalIndex.from_arrays( + [0, 1], [1, 2], closed=other_closed + ) + with pytest.raises(ValueError, match=msg): + index1.append(index_other_closed) + + def test_is_non_overlapping_monotonic(self, closed): + # Should be True in all cases + tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is True + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is True + + # Should be False in all cases (overlapping) + tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False in all cases (non-monotonic) + tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] + idx = IntervalIndex.from_tuples(tpls, closed=closed) + assert idx.is_non_overlapping_monotonic is False + + idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) + assert idx.is_non_overlapping_monotonic is False + + # Should be False for closed='both', otherwise True (GH16560) + if closed == "both": + idx = IntervalIndex.from_breaks(range(4), closed=closed) + assert idx.is_non_overlapping_monotonic is False + else: + idx = IntervalIndex.from_breaks(range(4), closed=closed) + assert idx.is_non_overlapping_monotonic is True + + @pytest.mark.parametrize( + "start, shift, na_value", + [ + (0, 1, np.nan), + (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), + (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), + ], + ) + def test_is_overlapping(self, start, shift, na_value, closed): + # GH 23309 + # see test_interval_tree.py for extensive tests; interface tests here + + # non-overlapping + tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is False + + # non-overlapping with NA + tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is False + + # overlapping + tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is True + + # overlapping with NA + tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + assert index.is_overlapping is True + + # common endpoints + tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + result = index.is_overlapping + expected = closed == "both" + assert result is expected + + # common endpoints with NA + tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] + index = IntervalIndex.from_tuples(tuples, closed=closed) + result = index.is_overlapping + assert result is expected + + @pytest.mark.parametrize( + "tuples", + [ + list(zip(range(10), range(1, 11))), + list( + zip( + date_range("20170101", periods=10), + date_range("20170101", periods=10), + ) + ), + list( + zip( + timedelta_range("0 days", periods=10), + timedelta_range("1 day", periods=10), + ) + ), + ], + ) + def test_to_tuples(self, tuples): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples() + expected = Index(com.asarray_tuplesafe(tuples)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "tuples", + [ + list(zip(range(10), range(1, 11))) + [np.nan], + list( + zip( + date_range("20170101", periods=10), + date_range("20170101", periods=10), + ) + ) + + [np.nan], + list( + zip( + timedelta_range("0 days", periods=10), + timedelta_range("1 day", periods=10), + ) + ) + + [np.nan], + ], + ) + @pytest.mark.parametrize("na_tuple", [True, False]) + def test_to_tuples_na(self, tuples, na_tuple): + # GH 18756 + idx = IntervalIndex.from_tuples(tuples) + result = idx.to_tuples(na_tuple=na_tuple) + + # check the non-NA portion + expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) + result_notna = result[:-1] + tm.assert_index_equal(result_notna, expected_notna) + + # check the NA portion + result_na = result[-1] + if na_tuple: + assert isinstance(result_na, tuple) + assert len(result_na) == 2 + assert all(isna(x) for x in result_na) + else: + assert isna(result_na) + + def test_nbytes(self): + # GH 19209 + left = np.arange(0, 4, dtype="i8") + right = np.arange(1, 5, dtype="i8") + + result = IntervalIndex.from_arrays(left, right).nbytes + expected = 64 # 4 * 8 * 2 + assert result == expected + + @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) + def test_set_closed(self, name, closed, new_closed): + # GH 21670 + index = interval_range(0, 5, closed=closed, name=name) + result = index.set_closed(new_closed) + expected = interval_range(0, 5, closed=new_closed, name=name) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) + def test_set_closed_errors(self, bad_closed): + # GH 21670 + index = interval_range(0, 5) + msg = "invalid option for 'closed': {closed}".format(closed=bad_closed) + with pytest.raises(ValueError, match=msg): + index.set_closed(bad_closed) + + def test_is_all_dates(self): + # GH 23576 + year_2017 = pd.Interval( + pd.Timestamp("2017-01-01 00:00:00"), pd.Timestamp("2018-01-01 00:00:00") + ) + year_2017_index = pd.IntervalIndex([year_2017]) + assert not year_2017_index.is_all_dates + + +def test_dir(): + # GH#27571 dir(interval_index) should not raise + index = IntervalIndex.from_arrays([0, 1], [1, 2]) + result = dir(index) + assert "str" not in result diff --git a/pandas/tests/indexes/interval/test_interval_range.py b/pandas/tests/indexes/interval/test_interval_range.py new file mode 100644 index 00000000..2f28c33a --- /dev/null +++ b/pandas/tests/indexes/interval/test_interval_range.py @@ -0,0 +1,355 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer + +from pandas import ( + DateOffset, + Interval, + IntervalIndex, + Timedelta, + Timestamp, + date_range, + interval_range, + timedelta_range, +) +import pandas._testing as tm + +from pandas.tseries.offsets import Day + + +@pytest.fixture(scope="class", params=[None, "foo"]) +def name(request): + return request.param + + +class TestIntervalRange: + @pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)]) + def test_constructor_numeric(self, closed, name, freq, periods): + start, end = 0, 100 + breaks = np.arange(101, step=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # GH 20976: linspace behavior defined from start/end/periods + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "US/Eastern"]) + @pytest.mark.parametrize( + "freq, periods", [("D", 364), ("2D", 182), ("22D18H", 16), ("M", 11)] + ) + def test_constructor_timestamp(self, closed, name, freq, periods, tz): + start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz) + breaks = date_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # GH 20976: linspace behavior defined from start/end/periods + if not breaks.freq.is_anchored() and tz is None: + # matches expected only for non-anchored offsets and tz naive + # (anchored/DST transitions cause unequal spacing in expected) + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "freq, periods", [("D", 100), ("2D12H", 40), ("5D", 20), ("25D", 4)] + ) + def test_constructor_timedelta(self, closed, name, freq, periods): + start, end = Timedelta("0 days"), Timedelta("100 days") + breaks = timedelta_range(start=start, end=end, freq=freq) + expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed) + + # defined from start/end/freq + result = interval_range( + start=start, end=end, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from start/periods/freq + result = interval_range( + start=start, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # defined from end/periods/freq + result = interval_range( + end=end, periods=periods, freq=freq, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + # GH 20976: linspace behavior defined from start/end/periods + result = interval_range( + start=start, end=end, periods=periods, name=name, closed=closed + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start, end, freq, expected_endpoint", + [ + (0, 10, 3, 9), + (0, 10, 1.5, 9), + (0.5, 10, 3, 9.5), + (Timedelta("0D"), Timedelta("10D"), "2D4H", Timedelta("8D16H")), + ( + Timestamp("2018-01-01"), + Timestamp("2018-02-09"), + "MS", + Timestamp("2018-02-01"), + ), + ( + Timestamp("2018-01-01", tz="US/Eastern"), + Timestamp("2018-01-20", tz="US/Eastern"), + "5D12H", + Timestamp("2018-01-17 12:00:00", tz="US/Eastern"), + ), + ], + ) + def test_early_truncation(self, start, end, freq, expected_endpoint): + # index truncates early if freq causes end to be skipped + result = interval_range(start=start, end=end, freq=freq) + result_endpoint = result.right[-1] + assert result_endpoint == expected_endpoint + + @pytest.mark.parametrize( + "start, end, freq", + [(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)], + ) + def test_no_invalid_float_truncation(self, start, end, freq): + # GH 21161 + if freq is None: + breaks = [0.5, 1.5, 2.5, 3.5, 4.5] + else: + breaks = [0.5, 2.0, 3.5, 5.0, 6.5] + expected = IntervalIndex.from_breaks(breaks) + + result = interval_range(start=start, end=end, periods=4, freq=freq) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "start, mid, end", + [ + ( + Timestamp("2018-03-10", tz="US/Eastern"), + Timestamp("2018-03-10 23:30:00", tz="US/Eastern"), + Timestamp("2018-03-12", tz="US/Eastern"), + ), + ( + Timestamp("2018-11-03", tz="US/Eastern"), + Timestamp("2018-11-04 00:30:00", tz="US/Eastern"), + Timestamp("2018-11-05", tz="US/Eastern"), + ), + ], + ) + def test_linspace_dst_transition(self, start, mid, end): + # GH 20976: linspace behavior defined from start/end/periods + # accounts for the hour gained/lost during DST transition + result = interval_range(start=start, end=end, periods=2) + expected = IntervalIndex.from_breaks([start, mid, end]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("freq", [2, 2.0]) + @pytest.mark.parametrize("end", [10, 10.0]) + @pytest.mark.parametrize("start", [0, 0.0]) + def test_float_subtype(self, start, end, freq): + # Has float subtype if any of start/end/freq are float, even if all + # resulting endpoints can safely be upcast to integers + + # defined from start/end/freq + index = interval_range(start=start, end=end, freq=freq) + result = index.dtype.subtype + expected = "int64" if is_integer(start + end + freq) else "float64" + assert result == expected + + # defined from start/periods/freq + index = interval_range(start=start, periods=5, freq=freq) + result = index.dtype.subtype + expected = "int64" if is_integer(start + freq) else "float64" + assert result == expected + + # defined from end/periods/freq + index = interval_range(end=end, periods=5, freq=freq) + result = index.dtype.subtype + expected = "int64" if is_integer(end + freq) else "float64" + assert result == expected + + # GH 20976: linspace behavior defined from start/end/periods + index = interval_range(start=start, end=end, periods=5) + result = index.dtype.subtype + expected = "int64" if is_integer(start + end) else "float64" + assert result == expected + + def test_constructor_coverage(self): + # float value for periods + expected = interval_range(start=0, periods=10) + result = interval_range(start=0, periods=10.5) + tm.assert_index_equal(result, expected) + + # equivalent timestamp-like start/end + start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15") + expected = interval_range(start=start, end=end) + + result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime()) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timestamp + equiv_freq = [ + "D", + Day(), + Timedelta(days=1), + timedelta(days=1), + DateOffset(days=1), + ] + for freq in equiv_freq: + result = interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + # equivalent timedelta-like start/end + start, end = Timedelta(days=1), Timedelta(days=10) + expected = interval_range(start=start, end=end) + + result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta()) + tm.assert_index_equal(result, expected) + + result = interval_range(start=start.asm8, end=end.asm8) + tm.assert_index_equal(result, expected) + + # equivalent freq with timedelta + equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)] + for freq in equiv_freq: + result = interval_range(start=start, end=end, freq=freq) + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ( + "Of the four parameters: start, end, periods, and freq, " + "exactly three must be specified" + ) + + with pytest.raises(ValueError, match=msg): + interval_range(start=0) + + with pytest.raises(ValueError, match=msg): + interval_range(end=5) + + with pytest.raises(ValueError, match=msg): + interval_range(periods=2) + + with pytest.raises(ValueError, match=msg): + interval_range() + + # too many params + with pytest.raises(ValueError, match=msg): + interval_range(start=0, end=5, periods=6, freq=1.5) + + # mixed units + msg = "start, end, freq need to be type compatible" + with pytest.raises(TypeError, match=msg): + interval_range(start=0, end=Timestamp("20130101"), freq=2) + + with pytest.raises(TypeError, match=msg): + interval_range(start=0, end=Timedelta("1 day"), freq=2) + + with pytest.raises(TypeError, match=msg): + interval_range(start=0, end=10, freq="D") + + with pytest.raises(TypeError, match=msg): + interval_range(start=Timestamp("20130101"), end=10, freq="D") + + with pytest.raises(TypeError, match=msg): + interval_range( + start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D" + ) + + with pytest.raises(TypeError, match=msg): + interval_range( + start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2 + ) + + with pytest.raises(TypeError, match=msg): + interval_range(start=Timedelta("1 day"), end=10, freq="D") + + with pytest.raises(TypeError, match=msg): + interval_range( + start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D" + ) + + with pytest.raises(TypeError, match=msg): + interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2) + + # invalid periods + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + interval_range(start=0, periods="foo") + + # invalid start + msg = "start must be numeric or datetime-like, got foo" + with pytest.raises(ValueError, match=msg): + interval_range(start="foo", periods=10) + + # invalid end + msg = r"end must be numeric or datetime-like, got \(0, 1\]" + with pytest.raises(ValueError, match=msg): + interval_range(end=Interval(0, 1), periods=10) + + # invalid freq for datetime-like + msg = "freq must be numeric or convertible to DateOffset, got foo" + with pytest.raises(ValueError, match=msg): + interval_range(start=0, end=10, freq="foo") + + with pytest.raises(ValueError, match=msg): + interval_range(start=Timestamp("20130101"), periods=10, freq="foo") + + with pytest.raises(ValueError, match=msg): + interval_range(end=Timedelta("1 day"), periods=10, freq="foo") + + # mixed tz + start = Timestamp("2017-01-01", tz="US/Eastern") + end = Timestamp("2017-01-07", tz="US/Pacific") + msg = "Start and end cannot both be tz-aware with different timezones" + with pytest.raises(TypeError, match=msg): + interval_range(start=start, end=end) diff --git a/pandas/tests/indexes/interval/test_interval_tree.py b/pandas/tests/indexes/interval/test_interval_tree.py new file mode 100644 index 00000000..476ec1dd --- /dev/null +++ b/pandas/tests/indexes/interval/test_interval_tree.py @@ -0,0 +1,193 @@ +from itertools import permutations + +import numpy as np +import pytest + +from pandas._libs.interval import IntervalTree + +from pandas import compat +import pandas._testing as tm + + +def skipif_32bit(param): + """ + Skip parameters in a parametrize on 32bit systems. Specifically used + here to skip leaf_size parameters related to GH 23440. + """ + marks = pytest.mark.skipif( + compat.is_platform_32bit(), reason="GH 23440: int type mismatch on 32bit" + ) + return pytest.param(param, marks=marks) + + +@pytest.fixture(scope="class", params=["int64", "float64", "uint64"]) +def dtype(request): + return request.param + + +@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10]) +def leaf_size(request): + """ + Fixture to specify IntervalTree leaf_size parameter; to be used with the + tree fixture. + """ + return request.param + + +@pytest.fixture( + params=[ + np.arange(5, dtype="int64"), + np.arange(5, dtype="uint64"), + np.arange(5, dtype="float64"), + np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"), + ] +) +def tree(request, leaf_size): + left = request.param + return IntervalTree(left, left + 2, leaf_size=leaf_size) + + +class TestIntervalTree: + def test_get_indexer(self, tree): + result = tree.get_indexer(np.array([1.0, 5.5, 6.5])) + expected = np.array([0, 4, -1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + with pytest.raises( + KeyError, match="'indexer does not intersect a unique set of intervals'" + ): + tree.get_indexer(np.array([3.0])) + + @pytest.mark.parametrize( + "dtype, target_value, target_dtype", + [("int64", 2 ** 63 + 1, "uint64"), ("uint64", -1, "int64")], + ) + def test_get_indexer_overflow(self, dtype, target_value, target_dtype): + left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype) + tree = IntervalTree(left, right) + + result = tree.get_indexer(np.array([target_value], dtype=target_dtype)) + expected = np.array([-1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_non_unique(self, tree): + indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5])) + + result = indexer[:1] + expected = np.array([0], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = np.sort(indexer[1:3]) + expected = np.array([0, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = np.sort(indexer[3:]) + expected = np.array([-1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = missing + expected = np.array([2], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, target_value, target_dtype", + [("int64", 2 ** 63 + 1, "uint64"), ("uint64", -1, "int64")], + ) + def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype): + left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype) + tree = IntervalTree(left, right) + target = np.array([target_value], dtype=target_dtype) + + result_indexer, result_missing = tree.get_indexer_non_unique(target) + expected_indexer = np.array([-1], dtype="intp") + tm.assert_numpy_array_equal(result_indexer, expected_indexer) + + expected_missing = np.array([0], dtype="intp") + tm.assert_numpy_array_equal(result_missing, expected_missing) + + def test_duplicates(self, dtype): + left = np.array([0, 0, 0], dtype=dtype) + tree = IntervalTree(left, left + 1) + + with pytest.raises( + KeyError, match="'indexer does not intersect a unique set of intervals'" + ): + tree.get_indexer(np.array([0.5])) + + indexer, missing = tree.get_indexer_non_unique(np.array([0.5])) + result = np.sort(indexer) + expected = np.array([0, 1, 2], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + result = missing + expected = np.array([], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000] + ) + def test_get_indexer_closed(self, closed, leaf_size): + x = np.arange(1000, dtype="float64") + found = x.astype("intp") + not_found = (-1 * np.ones(1000)).astype("intp") + + tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size) + tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25)) + + expected = found if tree.closed_left else not_found + tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0)) + + expected = found if tree.closed_right else not_found + tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5)) + + @pytest.mark.parametrize( + "left, right, expected", + [ + (np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True), + (np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True), + (np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True), + (np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False), + (np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False), + ], + ) + @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) + def test_is_overlapping(self, closed, order, left, right, expected): + # GH 23309 + tree = IntervalTree(left[order], right[order], closed=closed) + result = tree.is_overlapping + assert result is expected + + @pytest.mark.parametrize("order", (list(x) for x in permutations(range(3)))) + def test_is_overlapping_endpoints(self, closed, order): + """shared endpoints are marked as overlapping""" + # GH 23309 + left, right = np.arange(3, dtype="int64"), np.arange(1, 4) + tree = IntervalTree(left[order], right[order], closed=closed) + result = tree.is_overlapping + expected = closed == "both" + assert result is expected + + @pytest.mark.parametrize( + "left, right", + [ + (np.array([], dtype="int64"), np.array([], dtype="int64")), + (np.array([0], dtype="int64"), np.array([1], dtype="int64")), + (np.array([np.nan]), np.array([np.nan])), + (np.array([np.nan] * 3), np.array([np.nan] * 3)), + ], + ) + def test_is_overlapping_trivial(self, closed, left, right): + # GH 23309 + tree = IntervalTree(left, right, closed=closed) + assert tree.is_overlapping is False + + @pytest.mark.skipif(compat.is_platform_32bit(), reason="GH 23440") + def test_construction_overflow(self): + # GH 25485 + left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101 + tree = IntervalTree(left, right) + + # pivot should be average of left/right medians + result = tree.root.pivot + expected = (50 + np.iinfo(np.int64).max) / 2 + assert result == expected diff --git a/pandas/tests/indexes/interval/test_setops.py b/pandas/tests/indexes/interval/test_setops.py new file mode 100644 index 00000000..3246ac6b --- /dev/null +++ b/pandas/tests/indexes/interval/test_setops.py @@ -0,0 +1,187 @@ +import numpy as np +import pytest + +from pandas import Index, IntervalIndex, Timestamp, interval_range +import pandas._testing as tm + + +@pytest.fixture(scope="class", params=[None, "foo"]) +def name(request): + return request.param + + +@pytest.fixture(params=[None, False]) +def sort(request): + return request.param + + +def monotonic_index(start, end, dtype="int64", closed="right"): + return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed) + + +def empty_index(dtype="int64", closed="right"): + return IntervalIndex(np.array([], dtype=dtype), closed=closed) + + +class TestIntervalIndex: + def test_union(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + other = monotonic_index(5, 13, closed=closed) + + expected = monotonic_index(0, 13, closed=closed) + result = index[::-1].union(other, sort=sort) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + result = other[::-1].union(index, sort=sort) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + tm.assert_index_equal(index.union(index, sort=sort), index) + tm.assert_index_equal(index.union(index[:1], sort=sort), index) + + # GH 19101: empty result, same dtype + index = empty_index(dtype="int64", closed=closed) + result = index.union(index, sort=sort) + tm.assert_index_equal(result, index) + + # GH 19101: empty result, different dtypes + other = empty_index(dtype="float64", closed=closed) + result = index.union(other, sort=sort) + tm.assert_index_equal(result, index) + + def test_intersection(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + other = monotonic_index(5, 13, closed=closed) + + expected = monotonic_index(5, 11, closed=closed) + result = index[::-1].intersection(other, sort=sort) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + result = other[::-1].intersection(index, sort=sort) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + tm.assert_index_equal(index.intersection(index, sort=sort), index) + + # GH 19101: empty result, same dtype + other = monotonic_index(300, 314, closed=closed) + expected = empty_index(dtype="int64", closed=closed) + result = index.intersection(other, sort=sort) + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, different dtypes + other = monotonic_index(300, 314, dtype="float64", closed=closed) + result = index.intersection(other, sort=sort) + tm.assert_index_equal(result, expected) + + # GH 26225: nested intervals + index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)]) + other = IntervalIndex.from_tuples([(1, 2), (1, 3)]) + expected = IntervalIndex.from_tuples([(1, 2), (1, 3)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + # GH 26225: duplicate element + index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)]) + other = IntervalIndex.from_tuples([(1, 2), (2, 3)]) + expected = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + # GH 26225 + index = IntervalIndex.from_tuples([(0, 3), (0, 2)]) + other = IntervalIndex.from_tuples([(0, 2), (1, 3)]) + expected = IntervalIndex.from_tuples([(0, 2)]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + # GH 26225: duplicate nan element + index = IntervalIndex([np.nan, np.nan]) + other = IntervalIndex([np.nan]) + expected = IntervalIndex([np.nan]) + result = index.intersection(other) + tm.assert_index_equal(result, expected) + + def test_difference(self, closed, sort): + index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed) + result = index.difference(index[:1], sort=sort) + expected = index[1:] + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, same dtype + result = index.difference(index, sort=sort) + expected = empty_index(dtype="int64", closed=closed) + tm.assert_index_equal(result, expected) + + # GH 19101: empty result, different dtypes + other = IntervalIndex.from_arrays( + index.left.astype("float64"), index.right, closed=closed + ) + result = index.difference(other, sort=sort) + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self, closed, sort): + index = monotonic_index(0, 11, closed=closed) + result = index[1:].symmetric_difference(index[:-1], sort=sort) + expected = IntervalIndex([index[0], index[-1]]) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + # GH 19101: empty result, same dtype + result = index.symmetric_difference(index, sort=sort) + expected = empty_index(dtype="int64", closed=closed) + if sort is None: + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + # GH 19101: empty result, different dtypes + other = IntervalIndex.from_arrays( + index.left.astype("float64"), index.right, closed=closed + ) + result = index.symmetric_difference(other, sort=sort) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "op_name", ["union", "intersection", "difference", "symmetric_difference"] + ) + @pytest.mark.parametrize("sort", [None, False]) + def test_set_incompatible_types(self, closed, op_name, sort): + index = monotonic_index(0, 11, closed=closed) + set_op = getattr(index, op_name) + + # TODO: standardize return type of non-union setops type(self vs other) + # non-IntervalIndex + if op_name == "difference": + expected = index + else: + expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3])) + result = set_op(Index([1, 2, 3]), sort=sort) + tm.assert_index_equal(result, expected) + + # mixed closed + msg = ( + "can only do set operations between two IntervalIndex objects " + "that are closed on the same side" + ) + for other_closed in {"right", "left", "both", "neither"} - {closed}: + other = monotonic_index(0, 11, closed=other_closed) + with pytest.raises(ValueError, match=msg): + set_op(other, sort=sort) + + # GH 19016: incompatible dtypes + other = interval_range(Timestamp("20180101"), periods=9, closed=closed) + msg = ( + "can only do {op} between two IntervalIndex objects that have " + "compatible dtypes" + ).format(op=op_name) + with pytest.raises(TypeError, match=msg): + set_op(other, sort=sort) diff --git a/pandas/tests/indexes/multi/__init__.py b/pandas/tests/indexes/multi/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/multi/conftest.py b/pandas/tests/indexes/multi/conftest.py new file mode 100644 index 00000000..acaea4ff --- /dev/null +++ b/pandas/tests/indexes/multi/conftest.py @@ -0,0 +1,85 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Index, MultiIndex + + +@pytest.fixture +def idx(): + # a MultiIndex used to test the general functionality of the + # general functionality of this object + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + index_names = ["first", "second"] + mi = MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=index_names, + verify_integrity=False, + ) + return mi + + +@pytest.fixture +def idx_dup(): + # compare tests/indexes/multi/conftest.py + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 1, 0, 1, 1]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + index_names = ["first", "second"] + mi = MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=index_names, + verify_integrity=False, + ) + return mi + + +@pytest.fixture +def index_names(): + # names that match those in the idx fixture for testing equality of + # names assigned to the idx + return ["first", "second"] + + +@pytest.fixture +def holder(): + # the MultiIndex constructor used to base compatibility with pickle + return MultiIndex + + +@pytest.fixture +def compat_props(): + # a MultiIndex must have these properties associated with it + return ["shape", "ndim", "size"] + + +@pytest.fixture +def narrow_multi_index(): + """ + Return a MultiIndex that is narrower than the display (<80 characters). + """ + n = 1000 + ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n)) + dti = pd.date_range("2000-01-01", freq="s", periods=n * 2) + return pd.MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"]) + + +@pytest.fixture +def wide_multi_index(): + """ + Return a MultiIndex that is wider than the display (>80 characters). + """ + n = 1000 + ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n)) + dti = pd.date_range("2000-01-01", freq="s", periods=n * 2) + levels = [ci, ci.codes + 9, dti, dti, dti] + names = ["a", "b", "dti_1", "dti_2", "dti_3"] + return pd.MultiIndex.from_arrays(levels, names=names) diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py new file mode 100644 index 00000000..f04776e5 --- /dev/null +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -0,0 +1,356 @@ +import numpy as np +import pytest + +from pandas.compat.numpy import _np_version_under1p17 + +import pandas as pd +from pandas import Index, MultiIndex, date_range, period_range +import pandas._testing as tm + + +def test_shift(idx): + + # GH8083 test the base class for shift + msg = "Not supported for type MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1) + with pytest.raises(NotImplementedError, match=msg): + idx.shift(1, 2) + + +def test_groupby(idx): + groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2])) + labels = idx.tolist() + exp = {1: labels[:3], 2: labels[3:]} + tm.assert_dict_equal(groups, exp) + + # GH5620 + groups = idx.groupby(idx) + exp = {key: [key] for key in idx} + tm.assert_dict_equal(groups, exp) + + +def test_truncate(): + major_axis = Index(list(range(4))) + minor_axis = Index(list(range(2))) + + major_codes = np.array([0, 0, 1, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + + result = index.truncate(before=1) + assert "foo" not in result.levels[0] + assert 1 in result.levels[0] + + result = index.truncate(after=1) + assert 2 not in result.levels[0] + assert 1 in result.levels[0] + + result = index.truncate(before=1, after=2) + assert len(result.levels[0]) == 2 + + msg = "after < before" + with pytest.raises(ValueError, match=msg): + index.truncate(3, 1) + + +def test_where(): + i = MultiIndex.from_tuples([("A", 1), ("A", 2)]) + + msg = r"\.where is not supported for MultiIndex operations" + with pytest.raises(NotImplementedError, match=msg): + i.where(True) + + +@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series]) +def test_where_array_like(klass): + i = MultiIndex.from_tuples([("A", 1), ("A", 2)]) + cond = [False, True] + msg = r"\.where is not supported for MultiIndex operations" + with pytest.raises(NotImplementedError, match=msg): + i.where(klass(cond)) + + +# TODO: reshape + + +def test_reorder_levels(idx): + # this blows up + with pytest.raises(IndexError, match="^Too many levels"): + idx.reorder_levels([2, 1, 0]) + + +def test_numpy_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(["foo", "bar"]) + + m = MultiIndex.from_product([numbers, names], names=names) + expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(np.repeat(m, reps), expected) + + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.repeat(m, reps, axis=1) + + +def test_append_mixed_dtypes(): + # GH 13660 + dti = date_range("2011-01-01", freq="M", periods=3) + dti_tz = date_range("2011-01-01", freq="M", periods=3, tz="US/Eastern") + pi = period_range("2011-01", freq="M", periods=3) + + mi = MultiIndex.from_arrays( + [[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi] + ) + assert mi.nlevels == 6 + + res = mi.append(mi) + exp = MultiIndex.from_arrays( + [ + [1, 2, 3, 1, 2, 3], + [1.1, np.nan, 3.3, 1.1, np.nan, 3.3], + ["a", "b", "c", "a", "b", "c"], + dti.append(dti), + dti_tz.append(dti_tz), + pi.append(pi), + ] + ) + tm.assert_index_equal(res, exp) + + other = MultiIndex.from_arrays( + [ + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ["x", "y", "z"], + ] + ) + + res = mi.append(other) + exp = MultiIndex.from_arrays( + [ + [1, 2, 3, "x", "y", "z"], + [1.1, np.nan, 3.3, "x", "y", "z"], + ["a", "b", "c", "x", "y", "z"], + dti.append(pd.Index(["x", "y", "z"])), + dti_tz.append(pd.Index(["x", "y", "z"])), + pi.append(pd.Index(["x", "y", "z"])), + ] + ) + tm.assert_index_equal(res, exp) + + +def test_take(idx): + indexer = [4, 3, 0, 2] + result = idx.take(indexer) + expected = idx[indexer] + assert result.equals(expected) + + # TODO: Remove Commented Code + # if not isinstance(idx, + # (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + # GH 10791 + msg = "'MultiIndex' object has no attribute 'freq'" + with pytest.raises(AttributeError, match=msg): + idx.freq + + +def test_take_invalid_kwargs(idx): + idx = idx + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + +def test_take_fill_value(): + # GH 12631 + vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]] + idx = pd.MultiIndex.from_product(vals, names=["str", "dt"]) + + result = idx.take(np.array([1, 0, -1])) + exp_vals = [ + ("A", pd.Timestamp("2011-01-02")), + ("A", pd.Timestamp("2011-01-01")), + ("B", pd.Timestamp("2011-01-02")), + ] + expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + exp_vals = [ + ("A", pd.Timestamp("2011-01-02")), + ("A", pd.Timestamp("2011-01-01")), + (np.nan, pd.NaT), + ] + expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + exp_vals = [ + ("A", pd.Timestamp("2011-01-02")), + ("A", pd.Timestamp("2011-01-01")), + ("B", pd.Timestamp("2011-01-02")), + ] + expected = pd.MultiIndex.from_tuples(exp_vals, names=["str", "dt"]) + tm.assert_index_equal(result, expected) + + msg = "When allow_fill=True and fill_value is not None, all indices must be >= -1" + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for( axis 0 with)? size 4" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +def test_iter(idx): + result = list(idx) + expected = [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ] + assert result == expected + + +def test_sub(idx): + + first = idx + + # - now raises (previously was set op difference) + msg = "cannot perform __sub__ with this index type: MultiIndex" + with pytest.raises(TypeError, match=msg): + first - idx[-3:] + with pytest.raises(TypeError, match=msg): + idx[-3:] - first + with pytest.raises(TypeError, match=msg): + idx[-3:] - first.tolist() + msg = "cannot perform __rsub__ with this index type: MultiIndex" + with pytest.raises(TypeError, match=msg): + first.tolist() - idx[-3:] + + +def test_map(idx): + # callable + index = idx + + # we don't infer UInt64 + if isinstance(index, pd.UInt64Index): + expected = index.astype("int64") + else: + expected = index + + result = index.map(lambda x: x) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "mapper", + [ + lambda values, idx: {i: e for e, i in zip(values, idx)}, + lambda values, idx: pd.Series(values, idx), + ], +) +def test_map_dictlike(idx, mapper): + + if isinstance(idx, (pd.CategoricalIndex, pd.IntervalIndex)): + pytest.skip(f"skipping tests for {type(idx)}") + + identity = mapper(idx.values, idx) + + # we don't infer to UInt64 for a dict + if isinstance(idx, pd.UInt64Index) and isinstance(identity, dict): + expected = idx.astype("int64") + else: + expected = idx + + result = idx.map(identity) + tm.assert_index_equal(result, expected) + + # empty mappable + expected = pd.Index([np.nan] * len(idx)) + result = idx.map(mapper(expected, idx)) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "func", + [ + np.exp, + np.exp2, + np.expm1, + np.log, + np.log2, + np.log10, + np.log1p, + np.sqrt, + np.sin, + np.cos, + np.tan, + np.arcsin, + np.arccos, + np.arctan, + np.sinh, + np.cosh, + np.tanh, + np.arcsinh, + np.arccosh, + np.arctanh, + np.deg2rad, + np.rad2deg, + ], + ids=lambda func: func.__name__, +) +def test_numpy_ufuncs(idx, func): + # test ufuncs of numpy. see: + # http://docs.scipy.org/doc/numpy/reference/ufuncs.html + + if _np_version_under1p17: + expected_exception = AttributeError + msg = f"'tuple' object has no attribute '{func.__name__}'" + else: + expected_exception = TypeError + msg = ( + "loop of ufunc does not support argument 0 of type tuple which" + f" has no callable {func.__name__} method" + ) + with pytest.raises(expected_exception, match=msg): + func(idx) + + +@pytest.mark.parametrize( + "func", + [np.isfinite, np.isinf, np.isnan, np.signbit], + ids=lambda func: func.__name__, +) +def test_numpy_type_funcs(idx, func): + msg = ( + f"ufunc '{func.__name__}' not supported for the input types, and the inputs " + "could not be safely coerced to any supported types according to " + "the casting rule ''safe''" + ) + with pytest.raises(TypeError, match=msg): + func(idx) diff --git a/pandas/tests/indexes/multi/test_astype.py b/pandas/tests/indexes/multi/test_astype.py new file mode 100644 index 00000000..29908537 --- /dev/null +++ b/pandas/tests/indexes/multi/test_astype.py @@ -0,0 +1,30 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas._testing as tm + + +def test_astype(idx): + expected = idx.copy() + actual = idx.astype("O") + tm.assert_copy(actual.levels, expected.levels) + tm.assert_copy(actual.codes, expected.codes) + assert actual.names == list(expected.names) + + with pytest.raises(TypeError, match="^Setting.*dtype.*object"): + idx.astype(np.dtype(int)) + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_astype_category(idx, ordered): + # GH 18630 + msg = "> 1 ndim Categorical are not supported at this time" + with pytest.raises(NotImplementedError, match=msg): + idx.astype(CategoricalDtype(ordered=ordered)) + + if ordered is False: + # dtype='category' defaults to ordered=False, so only test once + with pytest.raises(NotImplementedError, match=msg): + idx.astype("category") diff --git a/pandas/tests/indexes/multi/test_compat.py b/pandas/tests/indexes/multi/test_compat.py new file mode 100644 index 00000000..d92cff1e --- /dev/null +++ b/pandas/tests/indexes/multi/test_compat.py @@ -0,0 +1,123 @@ +import numpy as np +import pytest + +from pandas import MultiIndex +import pandas._testing as tm + + +def test_numeric_compat(idx): + with pytest.raises(TypeError, match="cannot perform __mul__"): + idx * 1 + + with pytest.raises(TypeError, match="cannot perform __rmul__"): + 1 * idx + + div_err = "cannot perform __truediv__" + with pytest.raises(TypeError, match=div_err): + idx / 1 + + div_err = div_err.replace(" __", " __r") + with pytest.raises(TypeError, match=div_err): + 1 / idx + + with pytest.raises(TypeError, match="cannot perform __floordiv__"): + idx // 1 + + with pytest.raises(TypeError, match="cannot perform __rfloordiv__"): + 1 // idx + + +@pytest.mark.parametrize("method", ["all", "any"]) +def test_logical_compat(idx, method): + msg = "cannot perform {method}".format(method=method) + + with pytest.raises(TypeError, match=msg): + getattr(idx, method)() + + +def test_boolean_context_compat(idx): + + with pytest.raises(ValueError): + bool(idx) + + +def test_boolean_context_compat2(): + + # boolean context compat + # GH7897 + i1 = MultiIndex.from_tuples([("A", 1), ("A", 2)]) + i2 = MultiIndex.from_tuples([("A", 1), ("A", 3)]) + common = i1.intersection(i2) + + with pytest.raises(ValueError): + bool(common) + + +def test_inplace_mutation_resets_values(): + levels = [["a", "b", "c"], [4]] + levels2 = [[1, 2, 3], ["a"]] + codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]] + + mi1 = MultiIndex(levels=levels, codes=codes) + mi2 = MultiIndex(levels=levels2, codes=codes) + vals = mi1.values.copy() + vals2 = mi2.values.copy() + + assert mi1._tuples is not None + + # Make sure level setting works + new_vals = mi1.set_levels(levels2).values + tm.assert_almost_equal(vals2, new_vals) + + # Non-inplace doesn't kill _tuples [implementation detail] + tm.assert_almost_equal(mi1._tuples, vals) + + # ...and values is still same too + tm.assert_almost_equal(mi1.values, vals) + + # Inplace should kill _tuples + mi1.set_levels(levels2, inplace=True) + tm.assert_almost_equal(mi1.values, vals2) + + # Make sure label setting works too + codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]] + exp_values = np.empty((6,), dtype=object) + exp_values[:] = [(1, "a")] * 6 + + # Must be 1d array of tuples + assert exp_values.shape == (6,) + new_values = mi2.set_codes(codes2).values + + # Not inplace shouldn't change + tm.assert_almost_equal(mi2._tuples, vals2) + + # Should have correct values + tm.assert_almost_equal(exp_values, new_values) + + # ...and again setting inplace should kill _tuples, etc + mi2.set_codes(codes2, inplace=True) + tm.assert_almost_equal(mi2.values, new_values) + + +def test_ndarray_compat_properties(idx, compat_props): + assert idx.T.equals(idx) + assert idx.transpose().equals(idx) + + values = idx.values + for prop in compat_props: + assert getattr(idx, prop) == getattr(values, prop) + + # test for validity + idx.nbytes + idx.values.nbytes + + +def test_compat(indices): + assert indices.tolist() == list(indices) + + +def test_pickle_compat_construction(holder): + # this is testing for pickle compat + # need an object to create with + with pytest.raises(TypeError, match="Must pass both levels and codes"): + holder() diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py new file mode 100644 index 00000000..2c4b3ce0 --- /dev/null +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -0,0 +1,725 @@ +import numpy as np +import pytest + +from pandas._libs.tslib import Timestamp + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + +import pandas as pd +from pandas import Index, MultiIndex, date_range +import pandas._testing as tm + + +def test_constructor_single_level(): + result = MultiIndex( + levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"] + ) + assert isinstance(result, MultiIndex) + expected = Index(["foo", "bar", "baz", "qux"], name="first") + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ["first"] + + +def test_constructor_no_levels(): + msg = "non-zero number of levels/codes" + with pytest.raises(ValueError, match=msg): + MultiIndex(levels=[], codes=[]) + + msg = "Must pass both levels and codes" + with pytest.raises(TypeError, match=msg): + MultiIndex(levels=[]) + with pytest.raises(TypeError, match=msg): + MultiIndex(codes=[]) + + +def test_constructor_nonhashable_names(): + # GH 20527 + levels = [[1, 2], ["one", "two"]] + codes = [[0, 0, 1, 1], [0, 1, 0, 1]] + names = (["foo"], ["bar"]) + msg = r"MultiIndex\.name must be a hashable type" + with pytest.raises(TypeError, match=msg): + MultiIndex(levels=levels, codes=codes, names=names) + + # With .rename() + mi = MultiIndex( + levels=[[1, 2], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=("foo", "bar"), + ) + renamed = [["foor"], ["barr"]] + with pytest.raises(TypeError, match=msg): + mi.rename(names=renamed) + + # With .set_names() + with pytest.raises(TypeError, match=msg): + mi.set_names(names=renamed) + + +def test_constructor_mismatched_codes_levels(idx): + codes = [np.array([1]), np.array([2]), np.array([3])] + levels = ["a"] + + msg = "Length of levels and codes must be the same" + with pytest.raises(ValueError, match=msg): + MultiIndex(levels=levels, codes=codes) + + length_error = ( + r"On level 0, code max \(3\) >= length of level \(1\)\. " + "NOTE: this index is in an inconsistent state" + ) + label_error = r"Unequal code lengths: \[4, 2\]" + code_value_error = r"On level 0, code value \(-2\) < -1" + + # important to check that it's looking at the right thing. + with pytest.raises(ValueError, match=length_error): + MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]]) + + with pytest.raises(ValueError, match=label_error): + MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]]) + + # external API + with pytest.raises(ValueError, match=length_error): + idx.copy().set_levels([["a"], ["b"]]) + + with pytest.raises(ValueError, match=label_error): + idx.copy().set_codes([[0, 0, 0, 0], [0, 0]]) + + # test set_codes with verify_integrity=False + # the setting should not raise any value error + idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False) + + # code value smaller than -1 + with pytest.raises(ValueError, match=code_value_error): + MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]]) + + +def test_na_levels(): + # GH26408 + # test if codes are re-assigned value -1 for levels + # with mising values (NaN, NaT, None) + result = MultiIndex( + levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]] + ) + expected = MultiIndex( + levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]] + ) + tm.assert_index_equal(result, expected) + + result = MultiIndex( + levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]] + ) + expected = MultiIndex( + levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]] + ) + tm.assert_index_equal(result, expected) + + # verify set_levels and set_codes + result = MultiIndex( + levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]] + ).set_levels([[np.nan, "s", pd.NaT, 128, None]]) + tm.assert_index_equal(result, expected) + + result = MultiIndex( + levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]] + ).set_codes([[0, -1, 1, 2, 3, 4]]) + tm.assert_index_equal(result, expected) + + +def test_copy_in_constructor(): + levels = np.array(["a", "b", "c"]) + codes = np.array([1, 1, 2, 0, 0, 1, 1]) + val = codes[0] + mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True) + assert mi.codes[0][0] == val + codes[0] = 15 + assert mi.codes[0][0] == val + val = levels[0] + levels[0] = "PANDA" + assert mi.levels[0][0] == val + + +# ---------------------------------------------------------------------------- +# from_arrays +# ---------------------------------------------------------------------------- +def test_from_arrays(idx): + arrays = [ + np.asarray(lev).take(level_codes) + for lev, level_codes in zip(idx.levels, idx.codes) + ] + + # list of arrays as input + result = MultiIndex.from_arrays(arrays, names=idx.names) + tm.assert_index_equal(result, idx) + + # infer correctly + result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]]) + assert result.levels[0].equals(Index([Timestamp("20130101")])) + assert result.levels[1].equals(Index(["a", "b"])) + + +def test_from_arrays_iterator(idx): + # GH 18434 + arrays = [ + np.asarray(lev).take(level_codes) + for lev, level_codes in zip(idx.levels, idx.codes) + ] + + # iterator as input + result = MultiIndex.from_arrays(iter(arrays), names=idx.names) + tm.assert_index_equal(result, idx) + + # invalid iterator input + msg = "Input must be a list / sequence of array-likes." + with pytest.raises(TypeError, match=msg): + MultiIndex.from_arrays(0) + + +def test_from_arrays_tuples(idx): + arrays = tuple( + tuple(np.asarray(lev).take(level_codes)) + for lev, level_codes in zip(idx.levels, idx.codes) + ) + + # tuple of tuples as input + result = MultiIndex.from_arrays(arrays, names=idx.names) + tm.assert_index_equal(result, idx) + + +def test_from_arrays_index_series_datetimetz(): + idx1 = pd.date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") + idx2 = pd.date_range("2015-01-01 10:00", freq="H", periods=3, tz="Asia/Tokyo") + result = pd.MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_series_timedelta(): + idx1 = pd.timedelta_range("1 days", freq="D", periods=3) + idx2 = pd.timedelta_range("2 hours", freq="H", periods=3) + result = pd.MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_series_period(): + idx1 = pd.period_range("2011-01-01", freq="D", periods=3) + idx2 = pd.period_range("2015-01-01", freq="H", periods=3) + result = pd.MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_datetimelike_mixed(): + idx1 = pd.date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") + idx2 = pd.date_range("2015-01-01 10:00", freq="H", periods=3) + idx3 = pd.timedelta_range("1 days", freq="D", periods=3) + idx4 = pd.period_range("2011-01-01", freq="D", periods=3) + + result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + tm.assert_index_equal(result.get_level_values(2), idx3) + tm.assert_index_equal(result.get_level_values(3), idx4) + + result2 = pd.MultiIndex.from_arrays( + [pd.Series(idx1), pd.Series(idx2), pd.Series(idx3), pd.Series(idx4)] + ) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + tm.assert_index_equal(result2.get_level_values(2), idx3) + tm.assert_index_equal(result2.get_level_values(3), idx4) + + tm.assert_index_equal(result, result2) + + +def test_from_arrays_index_series_categorical(): + # GH13743 + idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False) + idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True) + + result = pd.MultiIndex.from_arrays([idx1, idx2]) + tm.assert_index_equal(result.get_level_values(0), idx1) + tm.assert_index_equal(result.get_level_values(1), idx2) + + result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) + tm.assert_index_equal(result2.get_level_values(0), idx1) + tm.assert_index_equal(result2.get_level_values(1), idx2) + + result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values]) + tm.assert_index_equal(result3.get_level_values(0), idx1) + tm.assert_index_equal(result3.get_level_values(1), idx2) + + +def test_from_arrays_empty(): + # 0 levels + msg = "Must pass non-zero number of levels/codes" + with pytest.raises(ValueError, match=msg): + MultiIndex.from_arrays(arrays=[]) + + # 1 level + result = MultiIndex.from_arrays(arrays=[[]], names=["A"]) + assert isinstance(result, MultiIndex) + expected = Index([], name="A") + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ["A"] + + # N levels + for N in [2, 3]: + arrays = [[]] * N + names = list("ABC")[:N] + result = MultiIndex.from_arrays(arrays=arrays, names=names) + expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "invalid_sequence_of_arrays", + [ + 1, + [1], + [1, 2], + [[1], 2], + [1, [2]], + "a", + ["a"], + ["a", "b"], + [["a"], "b"], + (1,), + (1, 2), + ([1], 2), + (1, [2]), + "a", + ("a",), + ("a", "b"), + (["a"], "b"), + [(1,), 2], + [1, (2,)], + [("a",), "b"], + ((1,), 2), + (1, (2,)), + (("a",), "b"), + ], +) +def test_from_arrays_invalid_input(invalid_sequence_of_arrays): + msg = "Input must be a list / sequence of array-likes" + with pytest.raises(TypeError, match=msg): + MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays) + + +@pytest.mark.parametrize( + "idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])] +) +def test_from_arrays_different_lengths(idx1, idx2): + # see gh-13599 + msg = "^all arrays must be same length$" + with pytest.raises(ValueError, match=msg): + MultiIndex.from_arrays([idx1, idx2]) + + +def test_from_arrays_respects_none_names(): + # GH27292 + a = pd.Series([1, 2, 3], name="foo") + b = pd.Series(["a", "b", "c"], name="bar") + + result = MultiIndex.from_arrays([a, b], names=None) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None + ) + + tm.assert_index_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# from_tuples +# ---------------------------------------------------------------------------- +def test_from_tuples(): + msg = "Cannot infer number of levels from empty list" + with pytest.raises(TypeError, match=msg): + MultiIndex.from_tuples([]) + + expected = MultiIndex( + levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] + ) + + # input tuples + result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"]) + tm.assert_index_equal(result, expected) + + +def test_from_tuples_iterator(): + # GH 18434 + # input iterator for tuples + expected = MultiIndex( + levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"] + ) + + result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"]) + tm.assert_index_equal(result, expected) + + # input non-iterables + msg = "Input must be a list / sequence of tuple-likes." + with pytest.raises(TypeError, match=msg): + MultiIndex.from_tuples(0) + + +def test_from_tuples_empty(): + # GH 16777 + result = MultiIndex.from_tuples([], names=["a", "b"]) + expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) + tm.assert_index_equal(result, expected) + + +def test_from_tuples_index_values(idx): + result = MultiIndex.from_tuples(idx) + assert (result.values == idx.values).all() + + +def test_tuples_with_name_string(): + # GH 15110 and GH 14848 + + li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] + msg = "Names should be list-like for a MultiIndex" + with pytest.raises(ValueError, match=msg): + pd.Index(li, name="abc") + with pytest.raises(ValueError, match=msg): + pd.Index(li, name="a") + + +def test_from_tuples_with_tuple_label(): + # GH 15457 + expected = pd.DataFrame( + [[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"] + ).set_index(["a", "b"]) + idx = pd.MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b")) + result = pd.DataFrame([2, 3], columns=["c"], index=idx) + tm.assert_frame_equal(expected, result) + + +# ---------------------------------------------------------------------------- +# from_product +# ---------------------------------------------------------------------------- +def test_from_product_empty_zero_levels(): + # 0 levels + msg = "Must pass non-zero number of levels/codes" + with pytest.raises(ValueError, match=msg): + MultiIndex.from_product([]) + + +def test_from_product_empty_one_level(): + result = MultiIndex.from_product([[]], names=["A"]) + expected = pd.Index([], name="A") + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ["A"] + + +@pytest.mark.parametrize( + "first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])] +) +def test_from_product_empty_two_levels(first, second): + names = ["A", "B"] + result = MultiIndex.from_product([first, second], names=names) + expected = MultiIndex(levels=[first, second], codes=[[], []], names=names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("N", list(range(4))) +def test_from_product_empty_three_levels(N): + # GH12258 + names = ["A", "B", "C"] + lvl2 = list(range(N)) + result = MultiIndex.from_product([[], lvl2, []], names=names) + expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]] +) +def test_from_product_invalid_input(invalid_input): + msg = r"Input must be a list / sequence of iterables|Input must be list-like" + with pytest.raises(TypeError, match=msg): + MultiIndex.from_product(iterables=invalid_input) + + +def test_from_product_datetimeindex(): + dt_index = date_range("2000-01-01", periods=2) + mi = pd.MultiIndex.from_product([[1, 2], dt_index]) + etalon = construct_1d_object_array_from_listlike( + [ + (1, pd.Timestamp("2000-01-01")), + (1, pd.Timestamp("2000-01-02")), + (2, pd.Timestamp("2000-01-01")), + (2, pd.Timestamp("2000-01-02")), + ] + ) + tm.assert_numpy_array_equal(mi.values, etalon) + + +@pytest.mark.parametrize("ordered", [False, True]) +@pytest.mark.parametrize("f", [lambda x: x, lambda x: pd.Series(x), lambda x: x.values]) +def test_from_product_index_series_categorical(ordered, f): + # GH13743 + first = ["foo", "bar"] + + idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered) + expected = pd.CategoricalIndex( + list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered + ) + + result = pd.MultiIndex.from_product([first, f(idx)]) + tm.assert_index_equal(result.get_level_values(1), expected) + + +def test_from_product(): + + first = ["foo", "bar", "buz"] + second = ["a", "b", "c"] + names = ["first", "second"] + result = MultiIndex.from_product([first, second], names=names) + + tuples = [ + ("foo", "a"), + ("foo", "b"), + ("foo", "c"), + ("bar", "a"), + ("bar", "b"), + ("bar", "c"), + ("buz", "a"), + ("buz", "b"), + ("buz", "c"), + ] + expected = MultiIndex.from_tuples(tuples, names=names) + + tm.assert_index_equal(result, expected) + + +def test_from_product_iterator(): + # GH 18434 + first = ["foo", "bar", "buz"] + second = ["a", "b", "c"] + names = ["first", "second"] + tuples = [ + ("foo", "a"), + ("foo", "b"), + ("foo", "c"), + ("bar", "a"), + ("bar", "b"), + ("bar", "c"), + ("buz", "a"), + ("buz", "b"), + ("buz", "c"), + ] + expected = MultiIndex.from_tuples(tuples, names=names) + + # iterator as input + result = MultiIndex.from_product(iter([first, second]), names=names) + tm.assert_index_equal(result, expected) + + # Invalid non-iterable input + msg = "Input must be a list / sequence of iterables." + with pytest.raises(TypeError, match=msg): + MultiIndex.from_product(0) + + +@pytest.mark.parametrize( + "a, b, expected_names", + [ + ( + pd.Series([1, 2, 3], name="foo"), + pd.Series(["a", "b"], name="bar"), + ["foo", "bar"], + ), + (pd.Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]), + ([1, 2, 3], ["a", "b"], None), + ], +) +def test_from_product_infer_names(a, b, expected_names): + # GH27292 + result = MultiIndex.from_product([a, b]) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=expected_names, + ) + tm.assert_index_equal(result, expected) + + +def test_from_product_respects_none_names(): + # GH27292 + a = pd.Series([1, 2, 3], name="foo") + b = pd.Series(["a", "b"], name="bar") + + result = MultiIndex.from_product([a, b], names=None) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=None, + ) + tm.assert_index_equal(result, expected) + + +def test_from_product_readonly(): + # GH#15286 passing read-only array to from_product + a = np.array(range(3)) + b = ["a", "b"] + expected = MultiIndex.from_product([a, b]) + + a.setflags(write=False) + result = MultiIndex.from_product([a, b]) + tm.assert_index_equal(result, expected) + + +def test_create_index_existing_name(idx): + + # GH11193, when an existing index is passed, and a new name is not + # specified, the new index should inherit the previous object name + index = idx + index.names = ["foo", "bar"] + result = pd.Index(index) + expected = Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ) + ) + tm.assert_index_equal(result, expected) + + result = pd.Index(index, name="A") + expected = Index( + Index( + [ + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ], + dtype="object", + ), + name="A", + ) + tm.assert_index_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# from_frame +# ---------------------------------------------------------------------------- +def test_from_frame(): + # GH 22420 + df = pd.DataFrame( + [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"] + ) + expected = pd.MultiIndex.from_tuples( + [("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"] + ) + result = pd.MultiIndex.from_frame(df) + tm.assert_index_equal(expected, result) + + +@pytest.mark.parametrize( + "non_frame", + [ + pd.Series([1, 2, 3, 4]), + [1, 2, 3, 4], + [[1, 2], [3, 4], [5, 6]], + pd.Index([1, 2, 3, 4]), + np.array([[1, 2], [3, 4], [5, 6]]), + 27, + ], +) +def test_from_frame_error(non_frame): + # GH 22420 + with pytest.raises(TypeError, match="Input must be a DataFrame"): + pd.MultiIndex.from_frame(non_frame) + + +def test_from_frame_dtype_fidelity(): + # GH 22420 + df = pd.DataFrame( + { + "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"), + "a": [1, 1, 1, 2, 2, 2], + "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + "c": ["x", "x", "y", "z", "x", "y"], + } + ) + original_dtypes = df.dtypes.to_dict() + + expected_mi = pd.MultiIndex.from_arrays( + [ + pd.date_range("19910905", periods=6, tz="US/Eastern"), + [1, 1, 1, 2, 2, 2], + pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + ["x", "x", "y", "z", "x", "y"], + ], + names=["dates", "a", "b", "c"], + ) + mi = pd.MultiIndex.from_frame(df) + mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} + + tm.assert_index_equal(expected_mi, mi) + assert original_dtypes == mi_dtypes + + +@pytest.mark.parametrize( + "names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])] +) +def test_from_frame_valid_names(names_in, names_out): + # GH 22420 + df = pd.DataFrame( + [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], + columns=pd.MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]), + ) + mi = pd.MultiIndex.from_frame(df, names=names_in) + assert mi.names == names_out + + +@pytest.mark.parametrize( + "names,expected_error_msg", + [ + ("bad_input", "Names should be list-like for a MultiIndex"), + (["a", "b", "c"], "Length of names must match number of levels in MultiIndex"), + ], +) +def test_from_frame_invalid_names(names, expected_error_msg): + # GH 22420 + df = pd.DataFrame( + [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], + columns=pd.MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]), + ) + with pytest.raises(ValueError, match=expected_error_msg): + pd.MultiIndex.from_frame(df, names=names) + + +def test_index_equal_empty_iterable(): + # #16844 + a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"]) + b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"]) + tm.assert_index_equal(a, b) diff --git a/pandas/tests/indexes/multi/test_contains.py b/pandas/tests/indexes/multi/test_contains.py new file mode 100644 index 00000000..49aa6321 --- /dev/null +++ b/pandas/tests/indexes/multi/test_contains.py @@ -0,0 +1,124 @@ +import numpy as np +import pytest + +from pandas.compat import PYPY + +import pandas as pd +from pandas import MultiIndex +import pandas._testing as tm + + +def test_contains_top_level(): + midx = MultiIndex.from_product([["A", "B"], [1, 2]]) + assert "A" in midx + assert "A" not in midx._engine + + +def test_contains_with_nat(): + # MI with a NaT + mi = MultiIndex( + levels=[["C"], pd.date_range("2012-01-01", periods=5)], + codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]], + names=[None, "B"], + ) + assert ("C", pd.Timestamp("2012-01-01")) in mi + for val in mi.values: + assert val in mi + + +def test_contains(idx): + assert ("foo", "two") in idx + assert ("bar", "two") not in idx + assert None not in idx + + +@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy") +def test_isin_nan_pypy(): + idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True])) + tm.assert_numpy_array_equal( + idx.isin([("bar", float("nan"))]), np.array([False, True]) + ) + + +def test_isin(): + values = [("foo", 2), ("bar", 3), ("quux", 4)] + + idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)]) + result = idx.isin(values) + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # empty, return dtype bool + idx = MultiIndex.from_arrays([[], []]) + result = idx.isin(values) + assert len(result) == 0 + assert result.dtype == np.bool_ + + +@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy") +def test_isin_nan_not_pypy(): + idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, False])) + tm.assert_numpy_array_equal( + idx.isin([("bar", float("nan"))]), np.array([False, False]) + ) + + +def test_isin_level_kwarg(): + idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)]) + + vals_0 = ["foo", "bar", "quux"] + vals_1 = [2, 3, 10] + + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2)) + + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1)) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1)) + + msg = "Too many levels: Index has only 2 levels, not 6" + with pytest.raises(IndexError, match=msg): + idx.isin(vals_0, level=5) + msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number" + with pytest.raises(IndexError, match=msg): + idx.isin(vals_0, level=-5) + + with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"): + idx.isin(vals_0, level=1.0) + with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"): + idx.isin(vals_1, level=-1.0) + with pytest.raises(KeyError, match="'Level A not found'"): + idx.isin(vals_1, level="A") + + idx.names = ["A", "B"] + tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A")) + tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B")) + + with pytest.raises(KeyError, match="'Level C not found'"): + idx.isin(vals_1, level="C") + + +def test_contains_with_missing_value(): + # issue 19132 + idx = MultiIndex.from_arrays([[1, np.nan, 2]]) + assert np.nan in idx + + idx = MultiIndex.from_arrays([[1, 2], [np.nan, 3]]) + assert np.nan not in idx + assert (1, np.nan) in idx + + +@pytest.mark.parametrize( + "labels,expected,level", + [ + ([("b", np.nan)], np.array([False, False, True]), None,), + ([np.nan, "a"], np.array([True, True, False]), 0), + (["d", np.nan], np.array([False, True, True]), 1), + ], +) +def test_isin_multi_index_with_missing_value(labels, expected, level): + # GH 19132 + midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]]) + tm.assert_numpy_array_equal(midx.isin(labels, level=level), expected) diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py new file mode 100644 index 00000000..8956e6ed --- /dev/null +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -0,0 +1,197 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, MultiIndex, date_range +import pandas._testing as tm + + +def test_tolist(idx): + result = idx.tolist() + exp = list(idx.values) + assert result == exp + + +def test_to_numpy(idx): + result = idx.to_numpy() + exp = idx.values + tm.assert_numpy_array_equal(result, exp) + + +def test_to_frame(): + tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")] + + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")] + index = MultiIndex.from_tuples(tuples, names=["first", "second"]) + result = index.to_frame(index=False) + expected = DataFrame(tuples) + expected.columns = ["first", "second"] + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + # See GH-22580 + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False, name=["first", "second"]) + expected = DataFrame(tuples) + expected.columns = ["first", "second"] + tm.assert_frame_equal(result, expected) + + result = index.to_frame(name=["first", "second"]) + expected.index = index + expected.columns = ["first", "second"] + tm.assert_frame_equal(result, expected) + + msg = "'name' must be a list / sequence of column names." + with pytest.raises(TypeError, match=msg): + index.to_frame(name="first") + + msg = "'name' should have same length as number of levels on index." + with pytest.raises(ValueError, match=msg): + index.to_frame(name=["first"]) + + # Tests for datetime index + index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)]) + result = index.to_frame(index=False) + expected = DataFrame( + { + 0: np.repeat(np.arange(5, dtype="int64"), 3), + 1: np.tile(pd.date_range("20130101", periods=3), 5), + } + ) + tm.assert_frame_equal(result, expected) + + result = index.to_frame() + expected.index = index + tm.assert_frame_equal(result, expected) + + # See GH-22580 + result = index.to_frame(index=False, name=["first", "second"]) + expected = DataFrame( + { + "first": np.repeat(np.arange(5, dtype="int64"), 3), + "second": np.tile(pd.date_range("20130101", periods=3), 5), + } + ) + tm.assert_frame_equal(result, expected) + + result = index.to_frame(name=["first", "second"]) + expected.index = index + tm.assert_frame_equal(result, expected) + + +def test_to_frame_dtype_fidelity(): + # GH 22420 + mi = pd.MultiIndex.from_arrays( + [ + pd.date_range("19910905", periods=6, tz="US/Eastern"), + [1, 1, 1, 2, 2, 2], + pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + ["x", "x", "y", "z", "x", "y"], + ], + names=["dates", "a", "b", "c"], + ) + original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)} + + expected_df = pd.DataFrame( + { + "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"), + "a": [1, 1, 1, 2, 2, 2], + "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True), + "c": ["x", "x", "y", "z", "x", "y"], + } + ) + df = mi.to_frame(index=False) + df_dtypes = df.dtypes.to_dict() + + tm.assert_frame_equal(df, expected_df) + assert original_dtypes == df_dtypes + + +def test_to_frame_resulting_column_order(): + # GH 22420 + expected = ["z", 0, "a"] + mi = pd.MultiIndex.from_arrays( + [["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected + ) + result = mi.to_frame().columns.tolist() + assert result == expected + + +def test_roundtrip_pickle_with_tz(): + return # FIXME: this can't be right? + + # GH 8367 + # round-trip of timezone + index = MultiIndex.from_product( + [[1, 2], ["a", "b"], date_range("20130101", periods=3, tz="US/Eastern")], + names=["one", "two", "three"], + ) + unpickled = tm.round_trip_pickle(index) + assert index.equal_levels(unpickled) + + +def test_pickle(indices): + return # FIXME: this can't be right? + + unpickled = tm.round_trip_pickle(indices) + assert indices.equals(unpickled) + original_name, indices.name = indices.name, "foo" + unpickled = tm.round_trip_pickle(indices) + assert indices.equals(unpickled) + indices.name = original_name + + +def test_to_series(idx): + # assert that we are creating a copy of the index + + s = idx.to_series() + assert s.values is not idx.values + assert s.index is not idx + assert s.name == idx.name + + +def test_to_series_with_arguments(idx): + # GH18699 + + # index kwarg + s = idx.to_series(index=idx) + + assert s.values is not idx.values + assert s.index is idx + assert s.name == idx.name + + # name kwarg + idx = idx + s = idx.to_series(name="__test") + + assert s.values is not idx.values + assert s.index is not idx + assert s.name != idx.name + + +def test_to_flat_index(idx): + expected = pd.Index( + ( + ("foo", "one"), + ("foo", "two"), + ("bar", "one"), + ("baz", "two"), + ("qux", "one"), + ("qux", "two"), + ), + tupleize_cols=False, + ) + result = idx.to_flat_index() + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_copy.py b/pandas/tests/indexes/multi/test_copy.py new file mode 100644 index 00000000..1acc65ae --- /dev/null +++ b/pandas/tests/indexes/multi/test_copy.py @@ -0,0 +1,88 @@ +from copy import copy, deepcopy + +import pytest + +from pandas import MultiIndex +import pandas._testing as tm + + +def assert_multiindex_copied(copy, original): + # Levels should be (at least, shallow copied) + tm.assert_copy(copy.levels, original.levels) + tm.assert_almost_equal(copy.codes, original.codes) + + # Labels doesn't matter which way copied + tm.assert_almost_equal(copy.codes, original.codes) + assert copy.codes is not original.codes + + # Names doesn't matter which way copied + assert copy.names == original.names + assert copy.names is not original.names + + # Sort order should be copied + assert copy.sortorder == original.sortorder + + +def test_copy(idx): + i_copy = idx.copy() + + assert_multiindex_copied(i_copy, idx) + + +def test_shallow_copy(idx): + i_copy = idx._shallow_copy() + + assert_multiindex_copied(i_copy, idx) + + +def test_view(idx): + i_view = idx.view() + assert_multiindex_copied(i_view, idx) + + +@pytest.mark.parametrize("func", [copy, deepcopy]) +def test_copy_and_deepcopy(func): + + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + idx_copy = func(idx) + assert idx_copy is not idx + assert idx_copy.equals(idx) + + +@pytest.mark.parametrize("deep", [True, False]) +def test_copy_method(deep): + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + idx_copy = idx.copy(deep=deep) + assert idx_copy.equals(idx) + + +@pytest.mark.parametrize("deep", [True, False]) +@pytest.mark.parametrize( + "kwarg, value", + [ + ("names", ["third", "fourth"]), + ("levels", [["foo2", "bar2"], ["fizz2", "buzz2"]]), + ("codes", [[1, 0, 0, 0], [1, 1, 0, 0]]), + ], +) +def test_copy_method_kwargs(deep, kwarg, value): + # gh-12309: Check that the "name" argument as well other kwargs are honored + idx = MultiIndex( + levels=[["foo", "bar"], ["fizz", "buzz"]], + codes=[[0, 0, 0, 1], [0, 0, 1, 1]], + names=["first", "second"], + ) + return + idx_copy = idx.copy(**{kwarg: value, "deep": deep}) + if kwarg == "names": + assert getattr(idx_copy, kwarg) == value + else: + assert [list(i) for i in getattr(idx_copy, kwarg)] == value diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py new file mode 100644 index 00000000..b909025b --- /dev/null +++ b/pandas/tests/indexes/multi/test_drop.py @@ -0,0 +1,190 @@ +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import Index, MultiIndex +import pandas._testing as tm + + +def test_drop(idx): + dropped = idx.drop([("foo", "two"), ("qux", "one")]) + + index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")]) + dropped2 = idx.drop(index) + + expected = idx[[0, 2, 3, 5]] + tm.assert_index_equal(dropped, expected) + tm.assert_index_equal(dropped2, expected) + + dropped = idx.drop(["bar"]) + expected = idx[[0, 1, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop("foo") + expected = idx[[2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + index = MultiIndex.from_tuples([("bar", "two")]) + with pytest.raises(KeyError, match=r"^10$"): + idx.drop([("bar", "two")]) + with pytest.raises(KeyError, match=r"^10$"): + idx.drop(index) + with pytest.raises(KeyError, match=r"^'two'$"): + idx.drop(["foo", "two"]) + + # partially correct argument + mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")]) + with pytest.raises(KeyError, match=r"^10$"): + idx.drop(mixed_index) + + # error='ignore' + dropped = idx.drop(index, errors="ignore") + expected = idx[[0, 1, 2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop(mixed_index, errors="ignore") + expected = idx[[0, 1, 2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + dropped = idx.drop(["foo", "two"], errors="ignore") + expected = idx[[2, 3, 4, 5]] + tm.assert_index_equal(dropped, expected) + + # mixed partial / full drop + dropped = idx.drop(["foo", ("qux", "one")]) + expected = idx[[2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + # mixed partial / full drop / error='ignore' + mixed_index = ["foo", ("qux", "one"), "two"] + with pytest.raises(KeyError, match=r"^'two'$"): + idx.drop(mixed_index) + dropped = idx.drop(mixed_index, errors="ignore") + expected = idx[[2, 3, 5]] + tm.assert_index_equal(dropped, expected) + + +def test_droplevel_with_names(idx): + index = idx[idx.get_loc("foo")] + dropped = index.droplevel(0) + assert dropped.name == "second" + + index = MultiIndex( + levels=[Index(range(4)), Index(range(4)), Index(range(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + names=["one", "two", "three"], + ) + dropped = index.droplevel(0) + assert dropped.names == ("two", "three") + + dropped = index.droplevel("two") + expected = index.droplevel(1) + assert dropped.equals(expected) + + +def test_droplevel_list(): + index = MultiIndex( + levels=[Index(range(4)), Index(range(4)), Index(range(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + names=["one", "two", "three"], + ) + + dropped = index[:2].droplevel(["three", "one"]) + expected = index[:2].droplevel(2).droplevel(0) + assert dropped.equals(expected) + + dropped = index[:2].droplevel([]) + expected = index[:2] + assert dropped.equals(expected) + + msg = ( + "Cannot remove 3 levels from an index with 3 levels: " + "at least one level must be left" + ) + with pytest.raises(ValueError, match=msg): + index[:2].droplevel(["one", "two", "three"]) + + with pytest.raises(KeyError, match="'Level four not found'"): + index[:2].droplevel(["one", "four"]) + + +def test_drop_not_lexsorted(): + # GH 12078 + + # define the lexsorted version of the multi-index + tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")] + lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"]) + assert lexsorted_mi.is_lexsorted() + + # and the not-lexsorted version + df = pd.DataFrame( + columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]] + ) + df = df.pivot_table(index="a", columns=["b", "c"], values="d") + df = df.reset_index() + not_lexsorted_mi = df.columns + assert not not_lexsorted_mi.is_lexsorted() + + # compare the results + tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a")) + + +@pytest.mark.parametrize( + "msg,labels,level", + [ + (r"labels \[4\] not found in level", 4, "a"), + (r"labels \[7\] not found in level", 7, "b"), + ], +) +def test_drop_raise_exception_if_labels_not_in_level(msg, labels, level): + # GH 8594 + mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) + s = pd.Series([10, 20, 30], index=mi) + df = pd.DataFrame([10, 20, 30], index=mi) + + with pytest.raises(KeyError, match=msg): + s.drop(labels, level=level) + with pytest.raises(KeyError, match=msg): + df.drop(labels, level=level) + + +@pytest.mark.parametrize("labels,level", [(4, "a"), (7, "b")]) +def test_drop_errors_ignore(labels, level): + # GH 8594 + mi = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) + s = pd.Series([10, 20, 30], index=mi) + df = pd.DataFrame([10, 20, 30], index=mi) + + expected_s = s.drop(labels, level=level, errors="ignore") + tm.assert_series_equal(s, expected_s) + + expected_df = df.drop(labels, level=level, errors="ignore") + tm.assert_frame_equal(df, expected_df) + + +def test_drop_with_non_unique_datetime_index_and_invalid_keys(): + # GH 30399 + + # define dataframe with unique datetime index + df = pd.DataFrame( + np.random.randn(5, 3), + columns=["a", "b", "c"], + index=pd.date_range("2012", freq="H", periods=5), + ) + # create dataframe with non-unique datetime index + df = df.iloc[[0, 2, 2, 3]].copy() + + with pytest.raises(KeyError, match="not found in axis"): + df.drop(["a", "b"]) # Dropping with labels not exist in the index diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py new file mode 100644 index 00000000..93e1de53 --- /dev/null +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -0,0 +1,276 @@ +from itertools import product + +import numpy as np +import pytest + +from pandas._libs import hashtable + +from pandas import DatetimeIndex, MultiIndex +import pandas._testing as tm + + +@pytest.mark.parametrize("names", [None, ["first", "second"]]) +def test_unique(names): + mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names) + + res = mi.unique() + exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names) + tm.assert_index_equal(res, exp) + + mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names) + res = mi.unique() + exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names) + tm.assert_index_equal(res, exp) + + mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names) + res = mi.unique() + exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names) + tm.assert_index_equal(res, exp) + + # GH #20568 - empty MI + mi = MultiIndex.from_arrays([[], []], names=names) + res = mi.unique() + tm.assert_index_equal(mi, res) + + +def test_unique_datetimelike(): + idx1 = DatetimeIndex( + ["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"] + ) + idx2 = DatetimeIndex( + ["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"], + tz="Asia/Tokyo", + ) + result = MultiIndex.from_arrays([idx1, idx2]).unique() + + eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"]) + eidx2 = DatetimeIndex( + ["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo" + ) + exp = MultiIndex.from_arrays([eidx1, eidx2]) + tm.assert_index_equal(result, exp) + + +@pytest.mark.parametrize("level", [0, "first", 1, "second"]) +def test_unique_level(idx, level): + # GH #17896 - with level= argument + result = idx.unique(level=level) + expected = idx.get_level_values(level).unique() + tm.assert_index_equal(result, expected) + + # With already unique level + mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"]) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + tm.assert_index_equal(result, expected) + + # With empty MI + mi = MultiIndex.from_arrays([[], []], names=["first", "second"]) + result = mi.unique(level=level) + expected = mi.get_level_values(level) + + +@pytest.mark.parametrize("dropna", [True, False]) +def test_get_unique_index(idx, dropna): + mi = idx[[0, 1, 0, 1, 1, 0, 0]] + expected = mi._shallow_copy(mi[[0, 1]]) + + result = mi._get_unique_index(dropna=dropna) + assert result.unique + tm.assert_index_equal(result, expected) + + +def test_duplicate_multiindex_codes(): + # GH 17464 + # Make sure that a MultiIndex with duplicate levels throws a ValueError + with pytest.raises(ValueError): + mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)]) + + # And that using set_levels with duplicate levels fails + mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) + with pytest.raises(ValueError): + mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]], inplace=True) + + +@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]]) +def test_duplicate_level_names(names): + # GH18872, GH19029 + mi = MultiIndex.from_product([[0, 1]] * 3, names=names) + assert mi.names == names + + # With .rename() + mi = MultiIndex.from_product([[0, 1]] * 3) + mi = mi.rename(names) + assert mi.names == names + + # With .rename(., level=) + mi.rename(names[1], level=1, inplace=True) + mi = mi.rename([names[0], names[2]], level=[0, 2]) + assert mi.names == names + + +def test_duplicate_meta_data(): + # GH 10115 + mi = MultiIndex( + levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]] + ) + + for idx in [ + mi, + mi.set_names([None, None]), + mi.set_names([None, "Num"]), + mi.set_names(["Upper", "Num"]), + ]: + assert idx.has_duplicates + assert idx.drop_duplicates().names == idx.names + + +def test_has_duplicates(idx, idx_dup): + # see fixtures + assert idx.is_unique is True + assert idx.has_duplicates is False + assert idx_dup.is_unique is False + assert idx_dup.has_duplicates is True + + mi = MultiIndex( + levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]] + ) + assert mi.is_unique is False + assert mi.has_duplicates is True + + # single instance of NaN + mi_nan = MultiIndex( + levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]] + ) + assert mi_nan.is_unique is True + assert mi_nan.has_duplicates is False + + # multiple instances of NaN + mi_nan_dup = MultiIndex( + levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]] + ) + assert mi_nan_dup.is_unique is False + assert mi_nan_dup.has_duplicates is True + + +def test_has_duplicates_from_tuples(): + # GH 9075 + t = [ + ("x", "out", "z", 5, "y", "in", "z", 169), + ("x", "out", "z", 7, "y", "in", "z", 119), + ("x", "out", "z", 9, "y", "in", "z", 135), + ("x", "out", "z", 13, "y", "in", "z", 145), + ("x", "out", "z", 14, "y", "in", "z", 158), + ("x", "out", "z", 16, "y", "in", "z", 122), + ("x", "out", "z", 17, "y", "in", "z", 160), + ("x", "out", "z", 18, "y", "in", "z", 180), + ("x", "out", "z", 20, "y", "in", "z", 143), + ("x", "out", "z", 21, "y", "in", "z", 128), + ("x", "out", "z", 22, "y", "in", "z", 129), + ("x", "out", "z", 25, "y", "in", "z", 111), + ("x", "out", "z", 28, "y", "in", "z", 114), + ("x", "out", "z", 29, "y", "in", "z", 121), + ("x", "out", "z", 31, "y", "in", "z", 126), + ("x", "out", "z", 32, "y", "in", "z", 155), + ("x", "out", "z", 33, "y", "in", "z", 123), + ("x", "out", "z", 12, "y", "in", "z", 144), + ] + + mi = MultiIndex.from_tuples(t) + assert not mi.has_duplicates + + +def test_has_duplicates_overflow(): + # handle int64 overflow if possible + def check(nlevels, with_nulls): + codes = np.tile(np.arange(500), 2) + level = np.arange(500) + + if with_nulls: # inject some null values + codes[500] = -1 # common nan value + codes = [codes.copy() for i in range(nlevels)] + for i in range(nlevels): + codes[i][500 + i - nlevels // 2] = -1 + + codes += [np.array([-1, 1]).repeat(500)] + else: + codes = [codes] * nlevels + [np.arange(2).repeat(500)] + + levels = [level] * nlevels + [[0, 1]] + + # no dups + mi = MultiIndex(levels=levels, codes=codes) + assert not mi.has_duplicates + + # with a dup + if with_nulls: + + def f(a): + return np.insert(a, 1000, a[0]) + + codes = list(map(f, codes)) + mi = MultiIndex(levels=levels, codes=codes) + else: + values = mi.values.tolist() + mi = MultiIndex.from_tuples(values + [values[0]]) + + assert mi.has_duplicates + + # no overflow + check(4, False) + check(4, True) + + # overflow possible + check(8, False) + check(8, True) + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", np.array([False, False, False, True, True, False])), + ("last", np.array([False, True, True, False, False, False])), + (False, np.array([False, True, True, True, True, False])), + ], +) +def test_duplicated(idx_dup, keep, expected): + result = idx_dup.duplicated(keep=keep) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("keep", ["first", "last", False]) +def test_duplicated_large(keep): + # GH 9125 + n, k = 200, 5000 + levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)] + codes = [np.random.choice(n, k * n) for lev in levels] + mi = MultiIndex(levels=levels, codes=codes) + + result = mi.duplicated(keep=keep) + expected = hashtable.duplicated_object(mi.values, keep=keep) + tm.assert_numpy_array_equal(result, expected) + + +def test_duplicated2(): + # TODO: more informative test name + # GH5873 + for a in [101, 102]: + mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) + assert not mi.has_duplicates + + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool")) + + for n in range(1, 6): # 1st level shape + for m in range(1, 5): # 2nd level shape + # all possible unique combinations, including nan + codes = product(range(-1, n), range(-1, m)) + mi = MultiIndex( + levels=[list("abcde")[:n], list("WXYZ")[:m]], + codes=np.random.permutation(list(codes)).T, + ) + assert len(mi) == (n + 1) * (m + 1) + assert not mi.has_duplicates + + tm.assert_numpy_array_equal( + mi.duplicated(), np.zeros(len(mi), dtype="bool") + ) diff --git a/pandas/tests/indexes/multi/test_equivalence.py b/pandas/tests/indexes/multi/test_equivalence.py new file mode 100644 index 00000000..063ede02 --- /dev/null +++ b/pandas/tests/indexes/multi/test_equivalence.py @@ -0,0 +1,226 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Index, MultiIndex, Series +import pandas._testing as tm + + +def test_equals(idx): + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + + assert not idx.equals(list(idx)) + assert not idx.equals(np.array(idx)) + + same_values = Index(idx, dtype=object) + assert idx.equals(same_values) + assert same_values.equals(idx) + + if idx.nlevels == 1: + # do not test MultiIndex + assert not idx.equals(pd.Series(idx)) + + +def test_equals_op(idx): + # GH9947, GH10637 + index_a = idx + + n = len(index_a) + index_b = index_a[0:-1] + index_c = index_a[0:-1].append(index_a[-2:-1]) + index_d = index_a[0:1] + with pytest.raises(ValueError, match="Lengths must match"): + index_a == index_b + expected1 = np.array([True] * n) + expected2 = np.array([True] * (n - 1) + [False]) + tm.assert_numpy_array_equal(index_a == index_a, expected1) + tm.assert_numpy_array_equal(index_a == index_c, expected2) + + # test comparisons with numpy arrays + array_a = np.array(index_a) + array_b = np.array(index_a[0:-1]) + array_c = np.array(index_a[0:-1].append(index_a[-2:-1])) + array_d = np.array(index_a[0:1]) + with pytest.raises(ValueError, match="Lengths must match"): + index_a == array_b + tm.assert_numpy_array_equal(index_a == array_a, expected1) + tm.assert_numpy_array_equal(index_a == array_c, expected2) + + # test comparisons with Series + series_a = Series(array_a) + series_b = Series(array_b) + series_c = Series(array_c) + series_d = Series(array_d) + with pytest.raises(ValueError, match="Lengths must match"): + index_a == series_b + + tm.assert_numpy_array_equal(index_a == series_a, expected1) + tm.assert_numpy_array_equal(index_a == series_c, expected2) + + # cases where length is 1 for one of them + with pytest.raises(ValueError, match="Lengths must match"): + index_a == index_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + index_a == array_d + msg = "Can only compare identically-labeled Series objects" + with pytest.raises(ValueError, match=msg): + series_a == series_d + with pytest.raises(ValueError, match="Lengths must match"): + series_a == array_d + + # comparing with a scalar should broadcast; note that we are excluding + # MultiIndex because in this case each item in the index is a tuple of + # length 2, and therefore is considered an array of length 2 in the + # comparison instead of a scalar + if not isinstance(index_a, MultiIndex): + expected3 = np.array([False] * (len(index_a) - 2) + [True, False]) + # assuming the 2nd to last item is unique in the data + item = index_a[-2] + tm.assert_numpy_array_equal(index_a == item, expected3) + tm.assert_series_equal(series_a == item, Series(expected3)) + + +def test_equals_multi(idx): + assert idx.equals(idx) + assert not idx.equals(idx.values) + assert idx.equals(Index(idx.values)) + + assert idx.equal_levels(idx) + assert not idx.equals(idx[:-1]) + assert not idx.equals(idx[-1]) + + # different number of levels + index = MultiIndex( + levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + + index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1]) + assert not index.equals(index2) + assert not index.equal_levels(index2) + + # levels are different + major_axis = Index(list(range(4))) + minor_axis = Index(list(range(2))) + + major_codes = np.array([0, 0, 1, 2, 2, 3]) + minor_codes = np.array([0, 1, 0, 0, 1, 0]) + + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + assert not idx.equals(index) + assert not idx.equal_levels(index) + + # some of the labels are different + major_axis = Index(["foo", "bar", "baz", "qux"]) + minor_axis = Index(["one", "two"]) + + major_codes = np.array([0, 0, 2, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 0, 1]) + + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + assert not idx.equals(index) + + +def test_identical(idx): + mi = idx.copy() + mi2 = idx.copy() + assert mi.identical(mi2) + + mi = mi.set_names(["new1", "new2"]) + assert mi.equals(mi2) + assert not mi.identical(mi2) + + mi2 = mi2.set_names(["new1", "new2"]) + assert mi.identical(mi2) + + mi3 = Index(mi.tolist(), names=mi.names) + msg = r"Unexpected keyword arguments {'names'}" + with pytest.raises(TypeError, match=msg): + Index(mi.tolist(), names=mi.names, tupleize_cols=False) + mi4 = Index(mi.tolist(), tupleize_cols=False) + assert mi.identical(mi3) + assert not mi.identical(mi4) + assert mi.equals(mi4) + + +def test_equals_operator(idx): + # GH9785 + assert (idx == idx).all() + + +def test_equals_missing_values(): + # make sure take is not using -1 + i = pd.MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))]) + result = i[0:1].equals(i[0]) + assert not result + result = i[1:2].equals(i[1]) + assert not result + + +def test_is_(): + mi = MultiIndex.from_tuples(zip(range(10), range(10))) + assert mi.is_(mi) + assert mi.is_(mi.view()) + assert mi.is_(mi.view().view().view().view()) + mi2 = mi.view() + # names are metadata, they don't change id + mi2.names = ["A", "B"] + assert mi2.is_(mi) + assert mi.is_(mi2) + + assert not mi.is_(mi.set_names(["C", "D"])) + mi2 = mi.view() + mi2.set_names(["E", "F"], inplace=True) + assert mi.is_(mi2) + # levels are inherent properties, they change identity + mi3 = mi2.set_levels([list(range(10)), list(range(10))]) + assert not mi3.is_(mi2) + # shouldn't change + assert mi2.is_(mi) + mi4 = mi3.view() + + # GH 17464 - Remove duplicate MultiIndex levels + mi4.set_levels([list(range(10)), list(range(10))], inplace=True) + assert not mi4.is_(mi3) + mi5 = mi.view() + mi5.set_levels(mi5.levels, inplace=True) + assert not mi5.is_(mi) + + +def test_is_all_dates(idx): + assert not idx.is_all_dates + + +def test_is_numeric(idx): + # MultiIndex is never numeric + assert not idx.is_numeric() + + +def test_multiindex_compare(): + # GH 21149 + # Ensure comparison operations for MultiIndex with nlevels == 1 + # behave consistently with those for MultiIndex with nlevels > 1 + + midx = pd.MultiIndex.from_product([[0, 1]]) + + # Equality self-test: MultiIndex object vs self + expected = pd.Series([True, True]) + result = pd.Series(midx == midx) + tm.assert_series_equal(result, expected) + + # Greater than comparison: MultiIndex object vs self + expected = pd.Series([False, False]) + result = pd.Series(midx > midx) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py new file mode 100644 index 00000000..75f23fb2 --- /dev/null +++ b/pandas/tests/indexes/multi/test_format.py @@ -0,0 +1,197 @@ +import warnings + +import pytest + +import pandas as pd +from pandas import MultiIndex +import pandas._testing as tm + + +def test_format(idx): + idx.format() + idx[:0].format() + + +def test_format_integer_names(): + index = MultiIndex( + levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1] + ) + index.format(names=True) + + +def test_format_sparse_config(idx): + warn_filters = warnings.filters + warnings.filterwarnings("ignore", category=FutureWarning, module=".*format") + # GH1538 + pd.set_option("display.multi_sparse", False) + + result = idx.format() + assert result[1] == "foo two" + + tm.reset_display_options() + + warnings.filters = warn_filters + + +def test_format_sparse_display(): + index = MultiIndex( + levels=[[0, 1], [0, 1], [0, 1], [0]], + codes=[ + [0, 0, 0, 1, 1, 1], + [0, 0, 1, 0, 0, 1], + [0, 1, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0], + ], + ) + + result = index.format() + assert result[3] == "1 0 0 0" + + +def test_repr_with_unicode_data(): + with pd.option_context("display.encoding", "UTF-8"): + d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + index = pd.DataFrame(d).set_index(["a", "b"]).index + assert "\\" not in repr(index) # we don't want unicode-escaped + + +def test_repr_roundtrip_raises(): + mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"]) + with pytest.raises(TypeError): + eval(repr(mi)) + + +def test_unicode_string_with_unicode(): + d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + idx = pd.DataFrame(d).set_index(["a", "b"]).index + str(idx) + + +def test_repr_max_seq_item_setting(idx): + # GH10182 + idx = idx.repeat(50) + with pd.option_context("display.max_seq_items", None): + repr(idx) + assert "..." not in str(idx) + + +class TestRepr: + def test_repr(self, idx): + result = idx[:1].__repr__() + expected = """\ +MultiIndex([('foo', 'one')], + names=['first', 'second'])""" + assert result == expected + + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ('bar', 'one'), + ('baz', 'two'), + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'])""" + assert result == expected + + with pd.option_context("display.max_seq_items", 5): + result = idx.__repr__() + expected = """\ +MultiIndex([('foo', 'one'), + ('foo', 'two'), + ... + ('qux', 'one'), + ('qux', 'two')], + names=['first', 'second'], length=6)""" + assert result == expected + + def test_rjust(self, narrow_multi_index): + mi = narrow_multi_index + result = mi[:1].__repr__() + expected = """\ +MultiIndex([('a', 9, '2000-01-01 00:00:00')], + names=['a', 'b', 'dti'])""" + assert result == expected + + result = mi[::500].__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), + ( 'a', 9, '2000-01-01 00:08:20'), + ('abc', 10, '2000-01-01 00:16:40'), + ('abc', 10, '2000-01-01 00:25:00')], + names=['a', 'b', 'dti'])""" + assert result == expected + + result = mi.__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00'), + ( 'a', 9, '2000-01-01 00:00:01'), + ( 'a', 9, '2000-01-01 00:00:02'), + ( 'a', 9, '2000-01-01 00:00:03'), + ( 'a', 9, '2000-01-01 00:00:04'), + ( 'a', 9, '2000-01-01 00:00:05'), + ( 'a', 9, '2000-01-01 00:00:06'), + ( 'a', 9, '2000-01-01 00:00:07'), + ( 'a', 9, '2000-01-01 00:00:08'), + ( 'a', 9, '2000-01-01 00:00:09'), + ... + ('abc', 10, '2000-01-01 00:33:10'), + ('abc', 10, '2000-01-01 00:33:11'), + ('abc', 10, '2000-01-01 00:33:12'), + ('abc', 10, '2000-01-01 00:33:13'), + ('abc', 10, '2000-01-01 00:33:14'), + ('abc', 10, '2000-01-01 00:33:15'), + ('abc', 10, '2000-01-01 00:33:16'), + ('abc', 10, '2000-01-01 00:33:17'), + ('abc', 10, '2000-01-01 00:33:18'), + ('abc', 10, '2000-01-01 00:33:19')], + names=['a', 'b', 'dti'], length=2000)""" + assert result == expected + + def test_tuple_width(self, wide_multi_index): + mi = wide_multi_index + result = mi[:1].__repr__() + expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + assert result == expected + + result = mi[:10].__repr__() + expected = """\ +MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" + assert result == expected + + result = mi.__repr__() + expected = """\ +MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...), + ( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...), + ( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...), + ( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...), + ( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...), + ( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...), + ( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...), + ( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...), + ( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...), + ( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...), + ... + ('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...), + ('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...), + ('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...), + ('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...), + ('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...), + ('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...), + ('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...), + ('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...), + ('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...), + ('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)], + names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)""" # noqa + assert result == expected diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py new file mode 100644 index 00000000..57d16a73 --- /dev/null +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -0,0 +1,417 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import CategoricalIndex, Index, MultiIndex +import pandas._testing as tm + + +def assert_matching(actual, expected, check_dtype=False): + # avoid specifying internal representation + # as much as possible + assert len(actual) == len(expected) + for act, exp in zip(actual, expected): + act = np.asarray(act) + exp = np.asarray(exp) + tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) + + +def test_get_level_number_integer(idx): + idx.names = [1, 0] + assert idx._get_level_number(1) == 0 + assert idx._get_level_number(0) == 1 + msg = "Too many levels: Index has only 2 levels, not 3" + with pytest.raises(IndexError, match=msg): + idx._get_level_number(2) + with pytest.raises(KeyError, match="Level fourth not found"): + idx._get_level_number("fourth") + + +def test_get_level_values(idx): + result = idx.get_level_values(0) + expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first") + tm.assert_index_equal(result, expected) + assert result.name == "first" + + result = idx.get_level_values("first") + expected = idx.get_level_values(0) + tm.assert_index_equal(result, expected) + + # GH 10460 + index = MultiIndex( + levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])], + codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])], + ) + + exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"]) + tm.assert_index_equal(index.get_level_values(0), exp) + exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) + tm.assert_index_equal(index.get_level_values(1), exp) + + +def test_get_value_duplicates(): + index = MultiIndex( + levels=[["D", "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]], + codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=["tag", "day"], + ) + + assert index.get_loc("D") == slice(0, 3) + with pytest.raises(KeyError, match=r"^'D'$"): + index._engine.get_value(np.array([]), "D") + + +def test_get_level_values_all_na(): + # GH 17924 when level entirely consists of nan + arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64) + tm.assert_index_equal(result, expected) + + result = index.get_level_values(1) + expected = pd.Index(["a", np.nan, 1], dtype=object) + tm.assert_index_equal(result, expected) + + +def test_get_level_values_int_with_na(): + # GH 17924 + arrays = [["a", "b", "b"], [1, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = Index([1, np.nan, 2]) + tm.assert_index_equal(result, expected) + + arrays = [["a", "b", "b"], [np.nan, np.nan, 2]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = Index([np.nan, np.nan, 2]) + tm.assert_index_equal(result, expected) + + +def test_get_level_values_na(): + arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([np.nan, np.nan, np.nan]) + tm.assert_index_equal(result, expected) + + result = index.get_level_values(1) + expected = pd.Index(["a", np.nan, 1]) + tm.assert_index_equal(result, expected) + + arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(1) + expected = pd.DatetimeIndex([0, 1, pd.NaT]) + tm.assert_index_equal(result, expected) + + arrays = [[], []] + index = pd.MultiIndex.from_arrays(arrays) + result = index.get_level_values(0) + expected = pd.Index([], dtype=object) + tm.assert_index_equal(result, expected) + + +def test_set_name_methods(idx, index_names): + # so long as these are synonyms, we don't need to test set_names + assert idx.rename == idx.set_names + new_names = [name + "SUFFIX" for name in index_names] + ind = idx.set_names(new_names) + assert idx.names == index_names + assert ind.names == new_names + msg = "Length of names must match number of levels in MultiIndex" + with pytest.raises(ValueError, match=msg): + ind.set_names(new_names + new_names) + new_names2 = [name + "SUFFIX2" for name in new_names] + res = ind.set_names(new_names2, inplace=True) + assert res is None + assert ind.names == new_names2 + + # set names for specific level (# GH7792) + ind = idx.set_names(new_names[0], level=0) + assert idx.names == index_names + assert ind.names == [new_names[0], index_names[1]] + + res = ind.set_names(new_names2[0], level=0, inplace=True) + assert res is None + assert ind.names == [new_names2[0], index_names[1]] + + # set names for multiple levels + ind = idx.set_names(new_names, level=[0, 1]) + assert idx.names == index_names + assert ind.names == new_names + + res = ind.set_names(new_names2, level=[0, 1], inplace=True) + assert res is None + assert ind.names == new_names2 + + +def test_set_levels_codes_directly(idx): + # setting levels/codes directly raises AttributeError + + levels = idx.levels + new_levels = [[lev + "a" for lev in level] for level in levels] + + codes = idx.codes + major_codes, minor_codes = codes + major_codes = [(x + 1) % 3 for x in major_codes] + minor_codes = [(x + 1) % 1 for x in minor_codes] + new_codes = [major_codes, minor_codes] + + msg = "[Cc]an't set attribute" + with pytest.raises(AttributeError, match=msg): + idx.levels = new_levels + with pytest.raises(AttributeError, match=msg): + idx.codes = new_codes + + +def test_set_levels(idx): + # side note - you probably wouldn't want to use levels and codes + # directly like this - but it is possible. + levels = idx.levels + new_levels = [[lev + "a" for lev in level] for level in levels] + + # level changing [w/o mutation] + ind2 = idx.set_levels(new_levels) + assert_matching(ind2.levels, new_levels) + assert_matching(idx.levels, levels) + + # level changing [w/ mutation] + ind2 = idx.copy() + inplace_return = ind2.set_levels(new_levels, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, new_levels) + + # level changing specific level [w/o mutation] + ind2 = idx.set_levels(new_levels[0], level=0) + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(idx.levels, levels) + + ind2 = idx.set_levels(new_levels[1], level=1) + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(idx.levels, levels) + + # level changing multiple levels [w/o mutation] + ind2 = idx.set_levels(new_levels, level=[0, 1]) + assert_matching(ind2.levels, new_levels) + assert_matching(idx.levels, levels) + + # level changing specific level [w/ mutation] + ind2 = idx.copy() + inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, [new_levels[0], levels[1]]) + assert_matching(idx.levels, levels) + + ind2 = idx.copy() + inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, [levels[0], new_levels[1]]) + assert_matching(idx.levels, levels) + + # level changing multiple levels [w/ mutation] + ind2 = idx.copy() + inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True) + assert inplace_return is None + assert_matching(ind2.levels, new_levels) + assert_matching(idx.levels, levels) + + # illegal level changing should not change levels + # GH 13754 + original_index = idx.copy() + for inplace in [True, False]: + with pytest.raises(ValueError, match="^On"): + idx.set_levels(["c"], level=0, inplace=inplace) + assert_matching(idx.levels, original_index.levels, check_dtype=True) + + with pytest.raises(ValueError, match="^On"): + idx.set_codes([0, 1, 2, 3, 4, 5], level=0, inplace=inplace) + assert_matching(idx.codes, original_index.codes, check_dtype=True) + + with pytest.raises(TypeError, match="^Levels"): + idx.set_levels("c", level=0, inplace=inplace) + assert_matching(idx.levels, original_index.levels, check_dtype=True) + + with pytest.raises(TypeError, match="^Codes"): + idx.set_codes(1, level=0, inplace=inplace) + assert_matching(idx.codes, original_index.codes, check_dtype=True) + + +def test_set_codes(idx): + # side note - you probably wouldn't want to use levels and codes + # directly like this - but it is possible. + codes = idx.codes + major_codes, minor_codes = codes + major_codes = [(x + 1) % 3 for x in major_codes] + minor_codes = [(x + 1) % 1 for x in minor_codes] + new_codes = [major_codes, minor_codes] + + # changing codes w/o mutation + ind2 = idx.set_codes(new_codes) + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) + + # changing label w/ mutation + ind2 = idx.copy() + inplace_return = ind2.set_codes(new_codes, inplace=True) + assert inplace_return is None + assert_matching(ind2.codes, new_codes) + + # codes changing specific level w/o mutation + ind2 = idx.set_codes(new_codes[0], level=0) + assert_matching(ind2.codes, [new_codes[0], codes[1]]) + assert_matching(idx.codes, codes) + + ind2 = idx.set_codes(new_codes[1], level=1) + assert_matching(ind2.codes, [codes[0], new_codes[1]]) + assert_matching(idx.codes, codes) + + # codes changing multiple levels w/o mutation + ind2 = idx.set_codes(new_codes, level=[0, 1]) + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) + + # label changing specific level w/ mutation + ind2 = idx.copy() + inplace_return = ind2.set_codes(new_codes[0], level=0, inplace=True) + assert inplace_return is None + assert_matching(ind2.codes, [new_codes[0], codes[1]]) + assert_matching(idx.codes, codes) + + ind2 = idx.copy() + inplace_return = ind2.set_codes(new_codes[1], level=1, inplace=True) + assert inplace_return is None + assert_matching(ind2.codes, [codes[0], new_codes[1]]) + assert_matching(idx.codes, codes) + + # codes changing multiple levels [w/ mutation] + ind2 = idx.copy() + inplace_return = ind2.set_codes(new_codes, level=[0, 1], inplace=True) + assert inplace_return is None + assert_matching(ind2.codes, new_codes) + assert_matching(idx.codes, codes) + + # label changing for levels of different magnitude of categories + ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)]) + new_codes = range(129, -1, -1) + expected = pd.MultiIndex.from_tuples([(0, i) for i in new_codes]) + + # [w/o mutation] + result = ind.set_codes(codes=new_codes, level=1) + assert result.equals(expected) + + # [w/ mutation] + result = ind.copy() + result.set_codes(codes=new_codes, level=1, inplace=True) + assert result.equals(expected) + + +def test_set_levels_codes_names_bad_input(idx): + levels, codes = idx.levels, idx.codes + names = idx.names + + with pytest.raises(ValueError, match="Length of levels"): + idx.set_levels([levels[0]]) + + with pytest.raises(ValueError, match="Length of codes"): + idx.set_codes([codes[0]]) + + with pytest.raises(ValueError, match="Length of names"): + idx.set_names([names[0]]) + + # shouldn't scalar data error, instead should demand list-like + with pytest.raises(TypeError, match="list of lists-like"): + idx.set_levels(levels[0]) + + # shouldn't scalar data error, instead should demand list-like + with pytest.raises(TypeError, match="list of lists-like"): + idx.set_codes(codes[0]) + + # shouldn't scalar data error, instead should demand list-like + with pytest.raises(TypeError, match="list-like"): + idx.set_names(names[0]) + + # should have equal lengths + with pytest.raises(TypeError, match="list of lists-like"): + idx.set_levels(levels[0], level=[0, 1]) + + with pytest.raises(TypeError, match="list-like"): + idx.set_levels(levels, level=0) + + # should have equal lengths + with pytest.raises(TypeError, match="list of lists-like"): + idx.set_codes(codes[0], level=[0, 1]) + + with pytest.raises(TypeError, match="list-like"): + idx.set_codes(codes, level=0) + + # should have equal lengths + with pytest.raises(ValueError, match="Length of names"): + idx.set_names(names[0], level=[0, 1]) + + with pytest.raises(TypeError, match="Names must be a"): + idx.set_names(names, level=0) + + +@pytest.mark.parametrize("inplace", [True, False]) +def test_set_names_with_nlevel_1(inplace): + # GH 21149 + # Ensure that .set_names for MultiIndex with + # nlevels == 1 does not raise any errors + expected = pd.MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"]) + m = pd.MultiIndex.from_product([[0, 1]]) + result = m.set_names("first", level=0, inplace=inplace) + + if inplace: + result = m + + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_set_levels_categorical(ordered): + # GH13854 + index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]]) + + cidx = CategoricalIndex(list("bac"), ordered=ordered) + result = index.set_levels(cidx, 0) + expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes) + tm.assert_index_equal(result, expected) + + result_lvl = result.get_level_values(0) + expected_lvl = CategoricalIndex( + list("bacb"), categories=cidx.categories, ordered=cidx.ordered + ) + tm.assert_index_equal(result_lvl, expected_lvl) + + +def test_set_value_keeps_names(): + # motivating example from #3742 + lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"] + lev2 = ["1", "2", "3"] * 2 + idx = pd.MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"]) + df = pd.DataFrame( + np.random.randn(6, 4), columns=["one", "two", "three", "four"], index=idx + ) + df = df.sort_index() + assert df._is_copy is None + assert df.index.names == ("Name", "Number") + df.at[("grethe", "4"), "one"] = 99.34 + assert df._is_copy is None + assert df.index.names == ("Name", "Number") + + +def test_set_levels_with_iterable(): + # GH23273 + sizes = [1, 2, 3] + colors = ["black"] * 3 + index = pd.MultiIndex.from_arrays([sizes, colors], names=["size", "color"]) + + result = index.set_levels(map(int, ["3", "2", "1"]), level="size") + + expected_sizes = [3, 2, 1] + expected = pd.MultiIndex.from_arrays( + [expected_sizes, colors], names=["size", "color"] + ) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py new file mode 100644 index 00000000..b08280a7 --- /dev/null +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -0,0 +1,528 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + Index, + IntervalIndex, + MultiIndex, + date_range, +) +import pandas._testing as tm +from pandas.core.indexes.base import InvalidIndexError + + +def test_slice_locs_partial(idx): + sorted_idx, _ = idx.sortlevel(0) + + result = sorted_idx.slice_locs(("foo", "two"), ("qux", "one")) + assert result == (1, 5) + + result = sorted_idx.slice_locs(None, ("qux", "one")) + assert result == (0, 5) + + result = sorted_idx.slice_locs(("foo", "two"), None) + assert result == (1, len(sorted_idx)) + + result = sorted_idx.slice_locs("bar", "baz") + assert result == (2, 4) + + +def test_slice_locs(): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + + slob = slice(*idx.slice_locs(df.index[5], df.index[15])) + sliced = stacked[slob] + expected = df[5:16].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + slob = slice( + *idx.slice_locs( + df.index[5] + timedelta(seconds=30), df.index[15] - timedelta(seconds=30) + ) + ) + sliced = stacked[slob] + expected = df[6:15].stack() + tm.assert_almost_equal(sliced.values, expected.values) + + +def test_slice_locs_with_type_mismatch(): + df = tm.makeTimeDataFrame() + stacked = df.stack() + idx = stacked.index + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs((1, 3)) + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2)) + df = tm.makeCustomDataframe(5, 5) + stacked = df.stack() + idx = stacked.index + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs(timedelta(seconds=30)) + # TODO: Try creating a UnicodeDecodeError in exception message + with pytest.raises(TypeError, match="^Level type mismatch"): + idx.slice_locs(df.index[1], (16, "a")) + + +def test_slice_locs_not_sorted(): + index = MultiIndex( + levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + msg = "[Kk]ey length.*greater than MultiIndex lexsort depth" + with pytest.raises(KeyError, match=msg): + index.slice_locs((1, 0, 1), (2, 1, 0)) + + # works + sorted_index, _ = index.sortlevel(0) + # should there be a test case here??? + sorted_index.slice_locs((1, 0, 1), (2, 1, 0)) + + +def test_slice_locs_not_contained(): + # some searchsorted action + + index = MultiIndex( + levels=[[0, 2, 4, 6], [0, 2, 4]], + codes=[[0, 0, 0, 1, 1, 2, 3, 3, 3], [0, 1, 2, 1, 2, 2, 0, 1, 2]], + ) + + result = index.slice_locs((1, 0), (5, 2)) + assert result == (3, 6) + + result = index.slice_locs(1, 5) + assert result == (3, 6) + + result = index.slice_locs((2, 2), (5, 2)) + assert result == (3, 6) + + result = index.slice_locs(2, 5) + assert result == (3, 6) + + result = index.slice_locs((1, 0), (6, 3)) + assert result == (3, 8) + + result = index.slice_locs(-1, 10) + assert result == (0, len(index)) + + +def test_putmask_with_wrong_mask(idx): + # GH18368 + + msg = "putmask: mask and data must be the same size" + with pytest.raises(ValueError, match=msg): + idx.putmask(np.ones(len(idx) + 1, np.bool), 1) + + with pytest.raises(ValueError, match=msg): + idx.putmask(np.ones(len(idx) - 1, np.bool), 1) + + with pytest.raises(ValueError, match=msg): + idx.putmask("foo", 1) + + +def test_get_indexer(): + major_axis = Index(np.arange(4)) + minor_axis = Index(np.arange(2)) + + major_codes = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp) + minor_codes = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp) + + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + idx1 = index[:5] + idx2 = index[[1, 3, 5]] + + r1 = idx1.get_indexer(idx2) + tm.assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp)) + + r1 = idx2.get_indexer(idx1, method="pad") + e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp) + tm.assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method="pad") + tm.assert_almost_equal(r2, e1[::-1]) + + rffill1 = idx2.get_indexer(idx1, method="ffill") + tm.assert_almost_equal(r1, rffill1) + + r1 = idx2.get_indexer(idx1, method="backfill") + e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp) + tm.assert_almost_equal(r1, e1) + + r2 = idx2.get_indexer(idx1[::-1], method="backfill") + tm.assert_almost_equal(r2, e1[::-1]) + + rbfill1 = idx2.get_indexer(idx1, method="bfill") + tm.assert_almost_equal(r1, rbfill1) + + # pass non-MultiIndex + r1 = idx1.get_indexer(idx2.values) + rexp1 = idx1.get_indexer(idx2) + tm.assert_almost_equal(r1, rexp1) + + r1 = idx1.get_indexer([1, 2, 3]) + assert (r1 == [-1, -1, -1]).all() + + # create index with duplicates + idx1 = Index(list(range(10)) + list(range(10))) + idx2 = Index(list(range(20))) + + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + idx1.get_indexer(idx2) + + +def test_get_indexer_nearest(): + midx = MultiIndex.from_tuples([("a", 1), ("b", 2)]) + msg = "method='nearest' not implemented yet for MultiIndex; see GitHub issue 9365" + with pytest.raises(NotImplementedError, match=msg): + midx.get_indexer(["a"], method="nearest") + msg = "tolerance not implemented yet for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + midx.get_indexer(["a"], method="pad", tolerance=2) + + +def test_getitem(idx): + # scalar + assert idx[2] == ("bar", "one") + + # slice + result = idx[2:5] + expected = idx[[2, 3, 4]] + assert result.equals(expected) + + # boolean + result = idx[[True, False, True, False, True, True]] + result2 = idx[np.array([True, False, True, False, True, True])] + expected = idx[[0, 2, 4, 5]] + assert result.equals(expected) + assert result2.equals(expected) + + +def test_getitem_group_select(idx): + sorted_idx, _ = idx.sortlevel(0) + assert sorted_idx.get_loc("baz") == slice(3, 4) + assert sorted_idx.get_loc("foo") == slice(0, 2) + + +def test_get_indexer_consistency(idx): + # See GH 16819 + if isinstance(idx, IntervalIndex): + pass + + if idx.is_unique or isinstance(idx, CategoricalIndex): + indexer = idx.get_indexer(idx[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + else: + e = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=e): + idx.get_indexer(idx[0:2]) + + indexer, _ = idx.get_indexer_non_unique(idx[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + + +@pytest.mark.parametrize("ind1", [[True] * 5, pd.Index([True] * 5)]) +@pytest.mark.parametrize( + "ind2", + [[True, False, True, False, False], pd.Index([True, False, True, False, False])], +) +def test_getitem_bool_index_all(ind1, ind2): + # GH#22533 + idx = MultiIndex.from_tuples([(10, 1), (20, 2), (30, 3), (40, 4), (50, 5)]) + tm.assert_index_equal(idx[ind1], idx) + + expected = MultiIndex.from_tuples([(10, 1), (30, 3)]) + tm.assert_index_equal(idx[ind2], expected) + + +@pytest.mark.parametrize("ind1", [[True], pd.Index([True])]) +@pytest.mark.parametrize("ind2", [[False], pd.Index([False])]) +def test_getitem_bool_index_single(ind1, ind2): + # GH#22533 + idx = MultiIndex.from_tuples([(10, 1)]) + tm.assert_index_equal(idx[ind1], idx) + + expected = pd.MultiIndex( + levels=[np.array([], dtype=np.int64), np.array([], dtype=np.int64)], + codes=[[], []], + ) + tm.assert_index_equal(idx[ind2], expected) + + +def test_get_loc(idx): + assert idx.get_loc(("foo", "two")) == 1 + assert idx.get_loc(("baz", "two")) == 3 + with pytest.raises(KeyError, match=r"^10$"): + idx.get_loc(("bar", "two")) + with pytest.raises(KeyError, match=r"^'quux'$"): + idx.get_loc("quux") + + msg = "only the default get_loc method is currently supported for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.get_loc("foo", method="nearest") + + # 3 levels + index = MultiIndex( + levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + with pytest.raises(KeyError, match=r"^\(1, 1\)$"): + index.get_loc((1, 1)) + assert index.get_loc((2, 0)) == slice(3, 5) + + +def test_get_loc_duplicates(): + index = Index([2, 2, 2, 2]) + result = index.get_loc(2) + expected = slice(0, 4) + assert result == expected + # pytest.raises(Exception, index.get_loc, 2) + + index = Index(["c", "a", "a", "b", "b"]) + rs = index.get_loc("c") + xp = 0 + assert rs == xp + + +def test_get_loc_level(): + index = MultiIndex( + levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], + codes=[ + np.array([0, 0, 1, 2, 2, 2, 3, 3]), + np.array([0, 1, 0, 0, 0, 1, 0, 1]), + np.array([1, 0, 1, 1, 0, 0, 1, 0]), + ], + ) + loc, new_index = index.get_loc_level((0, 1)) + expected = slice(1, 2) + exp_index = index[expected].droplevel(0).droplevel(0) + assert loc == expected + assert new_index.equals(exp_index) + + loc, new_index = index.get_loc_level((0, 1, 0)) + expected = 1 + assert loc == expected + assert new_index is None + + with pytest.raises(KeyError, match=r"^\(2, 2\)$"): + index.get_loc_level((2, 2)) + # GH 22221: unused label + with pytest.raises(KeyError, match=r"^2$"): + index.drop(2).get_loc_level(2) + # Unused label on unsorted level: + with pytest.raises(KeyError, match=r"^2$"): + index.drop(1, level=2).get_loc_level(2, level=2) + + index = MultiIndex( + levels=[[2000], list(range(4))], + codes=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])], + ) + result, new_index = index.get_loc_level((2000, slice(None, None))) + expected = slice(None, None) + assert result == expected + assert new_index.equals(index.droplevel(0)) + + +@pytest.mark.parametrize("dtype1", [int, float, bool, str]) +@pytest.mark.parametrize("dtype2", [int, float, bool, str]) +def test_get_loc_multiple_dtypes(dtype1, dtype2): + # GH 18520 + levels = [np.array([0, 1]).astype(dtype1), np.array([0, 1]).astype(dtype2)] + idx = pd.MultiIndex.from_product(levels) + assert idx.get_loc(idx[2]) == 2 + + +@pytest.mark.parametrize("level", [0, 1]) +@pytest.mark.parametrize("dtypes", [[int, float], [float, int]]) +def test_get_loc_implicit_cast(level, dtypes): + # GH 18818, GH 15994 : as flat index, cast int to float and vice-versa + levels = [["a", "b"], ["c", "d"]] + key = ["b", "d"] + lev_dtype, key_dtype = dtypes + levels[level] = np.array([0, 1], dtype=lev_dtype) + key[level] = key_dtype(1) + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + +def test_get_loc_cast_bool(): + # GH 19086 : int is casted to bool, but not vice-versa + levels = [[False, True], np.arange(2, dtype="int64")] + idx = MultiIndex.from_product(levels) + + assert idx.get_loc((0, 1)) == 1 + assert idx.get_loc((1, 0)) == 2 + + with pytest.raises(KeyError, match=r"^\(False, True\)$"): + idx.get_loc((False, True)) + with pytest.raises(KeyError, match=r"^\(True, False\)$"): + idx.get_loc((True, False)) + + +@pytest.mark.parametrize("level", [0, 1]) +def test_get_loc_nan(level, nulls_fixture): + # GH 18485 : NaN in MultiIndex + levels = [["a", "b"], ["c", "d"]] + key = ["b", "d"] + levels[level] = np.array([0, nulls_fixture], dtype=type(nulls_fixture)) + key[level] = nulls_fixture + idx = MultiIndex.from_product(levels) + assert idx.get_loc(tuple(key)) == 3 + + +def test_get_loc_missing_nan(): + # GH 8569 + idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]]) + assert isinstance(idx.get_loc(1), slice) + with pytest.raises(KeyError, match=r"^3\.0$"): + idx.get_loc(3) + with pytest.raises(KeyError, match=r"^nan$"): + idx.get_loc(np.nan) + with pytest.raises(TypeError, match="unhashable type: 'list'"): + # listlike/non-hashable raises TypeError + idx.get_loc([np.nan]) + + +def test_get_indexer_categorical_time(): + # https://github.com/pandas-dev/pandas/issues/21390 + midx = MultiIndex.from_product( + [ + Categorical(["a", "b", "c"]), + Categorical(date_range("2012-01-01", periods=3, freq="H")), + ] + ) + result = midx.get_indexer(midx) + tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp)) + + +def test_timestamp_multiindex_indexer(): + # https://github.com/pandas-dev/pandas/issues/26944 + idx = pd.MultiIndex.from_product( + [ + pd.date_range("2019-01-01T00:15:33", periods=100, freq="H", name="date"), + ["x"], + [3], + ] + ) + df = pd.DataFrame({"foo": np.arange(len(idx))}, idx) + result = df.loc[pd.IndexSlice["2019-1-2":, "x", :], "foo"] + qidx = pd.MultiIndex.from_product( + [ + pd.date_range( + start="2019-01-02T00:15:33", + end="2019-01-05T02:15:33", + freq="H", + name="date", + ), + ["x"], + [3], + ] + ) + should_be = pd.Series(data=np.arange(24, len(qidx) + 24), index=qidx, name="foo") + tm.assert_series_equal(result, should_be) + + +def test_get_loc_with_values_including_missing_values(): + # issue 19132 + idx = MultiIndex.from_product([[np.nan, 1]] * 2) + expected = slice(0, 2, None) + assert idx.get_loc(np.nan) == expected + + idx = MultiIndex.from_arrays([[np.nan, 1, 2, np.nan]]) + expected = np.array([True, False, False, True]) + tm.assert_numpy_array_equal(idx.get_loc(np.nan), expected) + + idx = MultiIndex.from_product([[np.nan, 1]] * 3) + expected = slice(2, 4, None) + assert idx.get_loc((np.nan, 1)) == expected + + +@pytest.mark.parametrize( + "index_arr,labels,expected", + [ + ( + [[1, np.nan, 2], [3, 4, 5]], + [1, np.nan, 2], + np.array([-1, -1, -1], dtype=np.intp), + ), + ([[1, np.nan, 2], [3, 4, 5]], [(np.nan, 4)], np.array([1], dtype=np.intp)), + ([[1, 2, 3], [np.nan, 4, 5]], [(1, np.nan)], np.array([0], dtype=np.intp)), + ( + [[1, 2, 3], [np.nan, 4, 5]], + [np.nan, 4, 5], + np.array([-1, -1, -1], dtype=np.intp), + ), + ], +) +def test_get_indexer_with_missing_value(index_arr, labels, expected): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + result = idx.get_indexer(labels) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "index_arr,expected,target,algo", + [ + ([[np.nan, "a", "b"], ["c", "d", "e"]], 0, np.nan, "left"), + ([[np.nan, "a", "b"], ["c", "d", "e"]], 1, (np.nan, "c"), "right"), + ([["a", "b", "c"], ["d", np.nan, "d"]], 1, ("b", np.nan), "left"), + ], +) +def test_get_slice_bound_with_missing_value(index_arr, expected, target, algo): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + result = idx.get_slice_bound(target, side=algo, kind="loc") + assert result == expected + + +@pytest.mark.parametrize( + "index_arr,expected,start_idx,end_idx", + [ + ([[np.nan, 1, 2], [3, 4, 5]], slice(0, 2, None), np.nan, 1), + ([[np.nan, 1, 2], [3, 4, 5]], slice(0, 3, None), np.nan, (2, 5)), + ([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), 3), + ([[1, 2, 3], [4, np.nan, 5]], slice(1, 3, None), (2, np.nan), (3, 5)), + ], +) +def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_idx): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + result = idx.slice_indexer(start=start_idx, end=end_idx) + assert result == expected + + +@pytest.mark.parametrize( + "index_arr,expected,start_idx,end_idx", + [ + ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, None), + ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, "b"), + ([[np.nan, "a", "b"], ["c", "d", "e"]], (0, 3), np.nan, ("b", "e")), + ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), None), + ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), "c"), + ([["a", "b", "c"], ["d", np.nan, "e"]], (1, 3), ("b", np.nan), ("c", "e")), + ], +) +def test_slice_locs_with_missing_value(index_arr, expected, start_idx, end_idx): + # issue 19132 + idx = MultiIndex.from_arrays(index_arr) + result = idx.slice_locs(start=start_idx, end=end_idx) + assert result == expected diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py new file mode 100644 index 00000000..f2ec15e0 --- /dev/null +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -0,0 +1,294 @@ +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + +import pandas as pd +from pandas import IntervalIndex, MultiIndex, RangeIndex +import pandas._testing as tm + + +def test_labels_dtypes(): + + # GH 8456 + i = MultiIndex.from_tuples([("A", 1), ("A", 2)]) + assert i.codes[0].dtype == "int8" + assert i.codes[1].dtype == "int8" + + i = MultiIndex.from_product([["a"], range(40)]) + assert i.codes[1].dtype == "int8" + i = MultiIndex.from_product([["a"], range(400)]) + assert i.codes[1].dtype == "int16" + i = MultiIndex.from_product([["a"], range(40000)]) + assert i.codes[1].dtype == "int32" + + i = pd.MultiIndex.from_product([["a"], range(1000)]) + assert (i.codes[0] >= 0).all() + assert (i.codes[1] >= 0).all() + + +def test_values_boxed(): + tuples = [ + (1, pd.Timestamp("2000-01-01")), + (2, pd.NaT), + (3, pd.Timestamp("2000-01-03")), + (1, pd.Timestamp("2000-01-04")), + (2, pd.Timestamp("2000-01-02")), + (3, pd.Timestamp("2000-01-03")), + ] + result = pd.MultiIndex.from_tuples(tuples) + expected = construct_1d_object_array_from_listlike(tuples) + tm.assert_numpy_array_equal(result.values, expected) + # Check that code branches for boxed values produce identical results + tm.assert_numpy_array_equal(result.values[:4], result[:4].values) + + +def test_values_multiindex_datetimeindex(): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(10 ** 18, 10 ** 18 + 5) + naive = pd.DatetimeIndex(ints) + + aware = pd.DatetimeIndex(ints, tz="US/Central") + + idx = pd.MultiIndex.from_arrays([naive, aware]) + result = idx.values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.DatetimeIndex([x[0] for x in result]) + tm.assert_index_equal(outer, naive[:2]) + + inner = pd.DatetimeIndex([x[1] for x in result]) + tm.assert_index_equal(inner, aware[:2]) + + +def test_values_multiindex_periodindex(): + # Test to ensure we hit the boxing / nobox part of MI.values + ints = np.arange(2007, 2012) + pidx = pd.PeriodIndex(ints, freq="D") + + idx = pd.MultiIndex.from_arrays([ints, pidx]) + result = idx.values + + outer = pd.Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, pd.Int64Index(ints)) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx) + + # n_lev > n_lab + result = idx[:2].values + + outer = pd.Int64Index([x[0] for x in result]) + tm.assert_index_equal(outer, pd.Int64Index(ints[:2])) + + inner = pd.PeriodIndex([x[1] for x in result]) + tm.assert_index_equal(inner, pidx[:2]) + + +def test_consistency(): + # need to construct an overflow + major_axis = list(range(70000)) + minor_axis = list(range(10)) + + major_codes = np.arange(70000) + minor_codes = np.repeat(range(10), 7000) + + # the fact that is works means it's consistent + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + + # inconsistent + major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3]) + minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1]) + index = MultiIndex( + levels=[major_axis, minor_axis], codes=[major_codes, minor_codes] + ) + + assert index.is_unique is False + + +def test_hash_collisions(): + # non-smoke test that we don't get hash collisions + + index = MultiIndex.from_product( + [np.arange(1000), np.arange(1000)], names=["one", "two"] + ) + result = index.get_indexer(index.values) + tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp")) + + for i in [0, 1, len(index) - 2, len(index) - 1]: + result = index.get_loc(index[i]) + assert result == i + + +def test_dims(): + pass + + +def take_invalid_kwargs(): + vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]] + idx = pd.MultiIndex.from_product(vals, names=["str", "dt"]) + indices = [1, 2] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + +def test_isna_behavior(idx): + # should not segfault GH5123 + # NOTE: if MI representation changes, may make sense to allow + # isna(MI) + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + pd.isna(idx) + + +def test_large_multiindex_error(): + # GH12527 + df_below_1000000 = pd.DataFrame( + 1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]), columns=["dest"] + ) + with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): + df_below_1000000.loc[(-1, 0), "dest"] + with pytest.raises(KeyError, match=r"^\(3, 0\)$"): + df_below_1000000.loc[(3, 0), "dest"] + df_above_1000000 = pd.DataFrame( + 1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]), columns=["dest"] + ) + with pytest.raises(KeyError, match=r"^\(-1, 0\)$"): + df_above_1000000.loc[(-1, 0), "dest"] + with pytest.raises(KeyError, match=r"^\(3, 0\)$"): + df_above_1000000.loc[(3, 0), "dest"] + + +def test_million_record_attribute_error(): + # GH 18165 + r = list(range(1000000)) + df = pd.DataFrame( + {"a": r, "b": r}, index=pd.MultiIndex.from_tuples([(x, x) for x in r]) + ) + + msg = "'Series' object has no attribute 'foo'" + with pytest.raises(AttributeError, match=msg): + df["a"].foo() + + +def test_can_hold_identifiers(idx): + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is True + + +def test_metadata_immutable(idx): + levels, codes = idx.levels, idx.codes + # shouldn't be able to set at either the top level or base level + mutable_regex = re.compile("does not support mutable operations") + with pytest.raises(TypeError, match=mutable_regex): + levels[0] = levels[0] + with pytest.raises(TypeError, match=mutable_regex): + levels[0][0] = levels[0][0] + # ditto for labels + with pytest.raises(TypeError, match=mutable_regex): + codes[0] = codes[0] + with pytest.raises(ValueError, match="assignment destination is read-only"): + codes[0][0] = codes[0][0] + # and for names + names = idx.names + with pytest.raises(TypeError, match=mutable_regex): + names[0] = names[0] + + +def test_level_setting_resets_attributes(): + ind = pd.MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) + assert ind.is_monotonic + ind.set_levels([["A", "B"], [1, 3, 2]], inplace=True) + # if this fails, probably didn't reset the cache correctly. + assert not ind.is_monotonic + + +def test_rangeindex_fallback_coercion_bug(): + # GH 12893 + foo = pd.DataFrame(np.arange(100).reshape((10, 10))) + bar = pd.DataFrame(np.arange(100).reshape((10, 10))) + df = pd.concat({"foo": foo.stack(), "bar": bar.stack()}, axis=1) + df.index.names = ["fizz", "buzz"] + + str(df) + expected = pd.DataFrame( + {"bar": np.arange(100), "foo": np.arange(100)}, + index=pd.MultiIndex.from_product( + [range(10), range(10)], names=["fizz", "buzz"] + ), + ) + tm.assert_frame_equal(df, expected, check_like=True) + + result = df.index.get_level_values("fizz") + expected = pd.Int64Index(np.arange(10), name="fizz").repeat(10) + tm.assert_index_equal(result, expected) + + result = df.index.get_level_values("buzz") + expected = pd.Int64Index(np.tile(np.arange(10), 10), name="buzz") + tm.assert_index_equal(result, expected) + + +def test_hash_error(indices): + index = indices + with pytest.raises(TypeError, match=f"unhashable type: '{type(index).__name__}'"): + hash(indices) + + +def test_mutability(indices): + if not len(indices): + return + msg = "Index does not support mutable operations" + with pytest.raises(TypeError, match=msg): + indices[0] = indices[0] + + +def test_wrong_number_names(indices): + with pytest.raises(ValueError, match="^Length"): + indices.names = ["apple", "banana", "carrot"] + + +def test_memory_usage(idx): + result = idx.memory_usage() + if len(idx): + idx.get_loc(idx[0]) + result2 = idx.memory_usage() + result3 = idx.memory_usage(deep=True) + + # RangeIndex, IntervalIndex + # don't have engines + if not isinstance(idx, (RangeIndex, IntervalIndex)): + assert result2 > result + + if idx.inferred_type == "object": + assert result3 > result2 + + else: + + # we report 0 for no-length + assert result == 0 + + +def test_nlevels(idx): + assert idx.nlevels == 2 diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py new file mode 100644 index 00000000..062fb92c --- /dev/null +++ b/pandas/tests/indexes/multi/test_join.py @@ -0,0 +1,105 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Index, MultiIndex +import pandas._testing as tm + + +@pytest.mark.parametrize( + "other", [Index(["three", "one", "two"]), Index(["one"]), Index(["one", "three"])] +) +def test_join_level(idx, other, join_type): + join_index, lidx, ridx = other.join( + idx, how=join_type, level="second", return_indexers=True + ) + + exp_level = other.join(idx.levels[1], how=join_type) + assert join_index.levels[0].equals(idx.levels[0]) + assert join_index.levels[1].equals(exp_level) + + # pare down levels + mask = np.array([x[1] in exp_level for x in idx], dtype=bool) + exp_values = idx.values[mask] + tm.assert_numpy_array_equal(join_index.values, exp_values) + + if join_type in ("outer", "inner"): + join_index2, ridx2, lidx2 = idx.join( + other, how=join_type, level="second", return_indexers=True + ) + + assert join_index.equals(join_index2) + tm.assert_numpy_array_equal(lidx, lidx2) + tm.assert_numpy_array_equal(ridx, ridx2) + tm.assert_numpy_array_equal(join_index2.values, exp_values) + + +def test_join_level_corner_case(idx): + # some corner cases + index = Index(["three", "one", "two"]) + result = index.join(idx, level="second") + assert isinstance(result, MultiIndex) + + with pytest.raises(TypeError, match="Join.*MultiIndex.*ambiguous"): + idx.join(idx, level=1) + + +def test_join_self(idx, join_type): + joined = idx.join(idx, how=join_type) + assert idx is joined + + +def test_join_multi(): + # GH 10665 + midx = pd.MultiIndex.from_product([np.arange(4), np.arange(4)], names=["a", "b"]) + idx = pd.Index([1, 2, 5], name="b") + + # inner + jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True) + exp_idx = pd.MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"]) + exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp) + exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True) + tm.assert_index_equal(jidx, exp_idx) + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + # keep MultiIndex + jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True) + exp_ridx = np.array( + [-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp + ) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + # flip + jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True) + tm.assert_index_equal(jidx, midx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) + + +def test_join_self_unique(idx, join_type): + if idx.is_unique: + joined = idx.join(idx, how=join_type) + assert (idx == joined).all() + + +def test_join_multi_wrong_order(): + # GH 25760 + # GH 28956 + + midx1 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) + midx2 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=["b", "a"]) + + join_idx, lidx, ridx = midx1.join(midx2, return_indexers=False) + + exp_ridx = np.array([-1, -1, -1, -1], dtype=np.intp) + + tm.assert_index_equal(midx1, join_idx) + assert lidx is None + tm.assert_numpy_array_equal(ridx, exp_ridx) diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py new file mode 100644 index 00000000..a17e1e99 --- /dev/null +++ b/pandas/tests/indexes/multi/test_missing.py @@ -0,0 +1,143 @@ +import numpy as np +import pytest + +from pandas._libs.tslib import iNaT + +import pandas as pd +from pandas import Int64Index, MultiIndex, PeriodIndex, UInt64Index +import pandas._testing as tm +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin + + +def test_fillna(idx): + # GH 11343 + + # TODO: Remove or Refactor. Not Implemented for MultiIndex + for name, index in [("idx", idx)]: + if len(index) == 0: + pass + elif isinstance(index, MultiIndex): + idx = index.copy() + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.fillna(idx[0]) + else: + idx = index.copy() + result = idx.fillna(idx[0]) + tm.assert_index_equal(result, idx) + assert result is not idx + + msg = "'value' must be a scalar, passed: " + with pytest.raises(TypeError, match=msg): + idx.fillna([idx[0]]) + + idx = index.copy() + values = idx.values + + if isinstance(index, DatetimeIndexOpsMixin): + values[1] = iNaT + elif isinstance(index, (Int64Index, UInt64Index)): + continue + else: + values[1] = np.nan + + if isinstance(index, PeriodIndex): + idx = type(index)(values, freq=index.freq) + else: + idx = type(index)(values) + + expected = np.array([False] * len(idx), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(idx._isnan, expected) + assert idx.hasnans is True + + +def test_dropna(): + # GH 6194 + idx = pd.MultiIndex.from_arrays( + [ + [1, np.nan, 3, np.nan, 5], + [1, 2, np.nan, np.nan, 5], + ["a", "b", "c", np.nan, "e"], + ] + ) + + exp = pd.MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]]) + tm.assert_index_equal(idx.dropna(), exp) + tm.assert_index_equal(idx.dropna(how="any"), exp) + + exp = pd.MultiIndex.from_arrays( + [[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]] + ) + tm.assert_index_equal(idx.dropna(how="all"), exp) + + msg = "invalid how option: xxx" + with pytest.raises(ValueError, match=msg): + idx.dropna(how="xxx") + + # GH26408 + # test if missing values are dropped for multiindex constructed + # from codes and values + idx = MultiIndex( + levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]], + codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]], + ) + expected = MultiIndex.from_arrays([["128", 2], ["128", 2]]) + tm.assert_index_equal(idx.dropna(), expected) + tm.assert_index_equal(idx.dropna(how="any"), expected) + + expected = MultiIndex.from_arrays( + [[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]] + ) + tm.assert_index_equal(idx.dropna(how="all"), expected) + + +def test_nulls(idx): + # this is really a smoke test for the methods + # as these are adequately tested for function elsewhere + + msg = "isna is not defined for MultiIndex" + with pytest.raises(NotImplementedError, match=msg): + idx.isna() + + +@pytest.mark.xfail(reason="isna is not defined for MultiIndex") +def test_hasnans_isnans(idx): + # GH 11343, added tests for hasnans / isnans + index = idx.copy() + + # cases in indices doesn't include NaN + expected = np.array([False] * len(index), dtype=bool) + tm.assert_numpy_array_equal(index._isnan, expected) + assert index.hasnans is False + + index = idx.copy() + values = index.values + values[1] = np.nan + + index = type(idx)(values) + + expected = np.array([False] * len(index), dtype=bool) + expected[1] = True + tm.assert_numpy_array_equal(index._isnan, expected) + assert index.hasnans is True + + +def test_nan_stays_float(): + + # GH 7031 + idx0 = pd.MultiIndex( + levels=[["A", "B"], []], codes=[[1, 0], [-1, -1]], names=[0, 1] + ) + idx1 = pd.MultiIndex(levels=[["C"], ["D"]], codes=[[0], [0]], names=[0, 1]) + idxm = idx0.join(idx1, how="outer") + assert pd.isna(idx0.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(idxm.get_level_values(1)[:-1]).all() + + df0 = pd.DataFrame([[1, 2]], index=idx0) + df1 = pd.DataFrame([[3, 4]], index=idx1) + dfm = df0 - df1 + assert pd.isna(df0.index.get_level_values(1)).all() + # the following failed in 0.14.1 + assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() diff --git a/pandas/tests/indexes/multi/test_monotonic.py b/pandas/tests/indexes/multi/test_monotonic.py new file mode 100644 index 00000000..b5c73d5e --- /dev/null +++ b/pandas/tests/indexes/multi/test_monotonic.py @@ -0,0 +1,230 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Index, IntervalIndex, MultiIndex +from pandas.api.types import is_scalar + + +def test_is_monotonic_increasing(): + i = MultiIndex.from_product([np.arange(10), np.arange(10)], names=["one", "two"]) + assert i.is_monotonic is True + assert i._is_strictly_monotonic_increasing is True + assert Index(i.values).is_monotonic is True + assert i._is_strictly_monotonic_increasing is True + + i = MultiIndex.from_product( + [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"] + ) + assert i.is_monotonic is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values).is_monotonic is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + i = MultiIndex.from_product( + [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"] + ) + assert i.is_monotonic is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values).is_monotonic is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + i = MultiIndex.from_product([[1.0, np.nan, 2.0], ["a", "b", "c"]]) + assert i.is_monotonic is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values).is_monotonic is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + # string ordering + i = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic is False + assert Index(i.values).is_monotonic is False + assert i._is_strictly_monotonic_increasing is False + assert Index(i.values)._is_strictly_monotonic_increasing is False + + i = MultiIndex( + levels=[["bar", "baz", "foo", "qux"], ["mom", "next", "zenith"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic is True + assert Index(i.values).is_monotonic is True + assert i._is_strictly_monotonic_increasing is True + assert Index(i.values)._is_strictly_monotonic_increasing is True + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[ + [1, 2, 3, 4], + [ + "gb00b03mlx29", + "lu0197800237", + "nl0000289783", + "nl0000289965", + "nl0000301109", + ], + ], + codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=["household_id", "asset_id"], + ) + + assert i.is_monotonic is False + assert i._is_strictly_monotonic_increasing is False + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic is True + assert Index(i.values).is_monotonic is True + assert i._is_strictly_monotonic_increasing is True + assert Index(i.values)._is_strictly_monotonic_increasing is True + + +def test_is_monotonic_decreasing(): + i = MultiIndex.from_product( + [np.arange(9, -1, -1), np.arange(9, -1, -1)], names=["one", "two"] + ) + assert i.is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + assert Index(i.values).is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + + i = MultiIndex.from_product( + [np.arange(10), np.arange(10, 0, -1)], names=["one", "two"] + ) + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + i = MultiIndex.from_product( + [np.arange(10, 0, -1), np.arange(10)], names=["one", "two"] + ) + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + i = MultiIndex.from_product([[2.0, np.nan, 1.0], ["c", "b", "a"]]) + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + # string ordering + i = MultiIndex( + levels=[["qux", "foo", "baz", "bar"], ["three", "two", "one"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic_decreasing is False + assert Index(i.values).is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + assert Index(i.values)._is_strictly_monotonic_decreasing is False + + i = MultiIndex( + levels=[["qux", "foo", "baz", "bar"], ["zenith", "next", "mom"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + assert i.is_monotonic_decreasing is True + assert Index(i.values).is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + assert Index(i.values)._is_strictly_monotonic_decreasing is True + + # mixed levels, hits the TypeError + i = MultiIndex( + levels=[ + [4, 3, 2, 1], + [ + "nl0000301109", + "nl0000289965", + "nl0000289783", + "lu0197800237", + "gb00b03mlx29", + ], + ], + codes=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]], + names=["household_id", "asset_id"], + ) + + assert i.is_monotonic_decreasing is False + assert i._is_strictly_monotonic_decreasing is False + + # empty + i = MultiIndex.from_arrays([[], []]) + assert i.is_monotonic_decreasing is True + assert Index(i.values).is_monotonic_decreasing is True + assert i._is_strictly_monotonic_decreasing is True + assert Index(i.values)._is_strictly_monotonic_decreasing is True + + +def test_is_strictly_monotonic_increasing(): + idx = pd.MultiIndex( + levels=[["bar", "baz"], ["mom", "next"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]] + ) + assert idx.is_monotonic_increasing is True + assert idx._is_strictly_monotonic_increasing is False + + +def test_is_strictly_monotonic_decreasing(): + idx = pd.MultiIndex( + levels=[["baz", "bar"], ["next", "mom"]], codes=[[0, 0, 1, 1], [0, 0, 0, 1]] + ) + assert idx.is_monotonic_decreasing is True + assert idx._is_strictly_monotonic_decreasing is False + + +def test_searchsorted_monotonic(indices): + # GH17271 + # not implemented for tuple searches in MultiIndex + # or Intervals searches in IntervalIndex + if isinstance(indices, (MultiIndex, IntervalIndex)): + return + + # nothing to test if the index is empty + if indices.empty: + return + value = indices[0] + + # determine the expected results (handle dupes for 'right') + expected_left, expected_right = 0, (indices == value).argmin() + if expected_right == 0: + # all values are the same, expected_right should be length + expected_right = len(indices) + + # test _searchsorted_monotonic in all cases + # test searchsorted only for increasing + if indices.is_monotonic_increasing: + ssm_left = indices._searchsorted_monotonic(value, side="left") + assert is_scalar(ssm_left) + assert expected_left == ssm_left + + ssm_right = indices._searchsorted_monotonic(value, side="right") + assert is_scalar(ssm_right) + assert expected_right == ssm_right + + ss_left = indices.searchsorted(value, side="left") + assert is_scalar(ss_left) + assert expected_left == ss_left + + ss_right = indices.searchsorted(value, side="right") + assert is_scalar(ss_right) + assert expected_right == ss_right + + elif indices.is_monotonic_decreasing: + ssm_left = indices._searchsorted_monotonic(value, side="left") + assert is_scalar(ssm_left) + assert expected_left == ssm_left + + ssm_right = indices._searchsorted_monotonic(value, side="right") + assert is_scalar(ssm_right) + assert expected_right == ssm_right + + else: + # non-monotonic should raise. + with pytest.raises(ValueError): + indices._searchsorted_monotonic(value, side="left") diff --git a/pandas/tests/indexes/multi/test_names.py b/pandas/tests/indexes/multi/test_names.py new file mode 100644 index 00000000..479b5ef0 --- /dev/null +++ b/pandas/tests/indexes/multi/test_names.py @@ -0,0 +1,143 @@ +import pytest + +import pandas as pd +from pandas import MultiIndex +import pandas._testing as tm + + +def check_level_names(index, names): + assert [level.name for level in index.levels] == list(names) + + +def test_slice_keep_name(): + x = MultiIndex.from_tuples([("a", "b"), (1, 2), ("c", "d")], names=["x", "y"]) + assert x[1:].names == x.names + + +def test_index_name_retained(): + # GH9857 + result = pd.DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]}) + result = result.set_index("z") + result.loc[10] = [9, 10] + df_expected = pd.DataFrame( + {"x": [1, 2, 6, 9], "y": [2, 2, 8, 10], "z": [-5, 0, 5, 10]} + ) + df_expected = df_expected.set_index("z") + tm.assert_frame_equal(result, df_expected) + + +def test_changing_names(idx): + assert [level.name for level in idx.levels] == ["first", "second"] + + view = idx.view() + copy = idx.copy() + shallow_copy = idx._shallow_copy() + + # changing names should not change level names on object + new_names = [name + "a" for name in idx.names] + idx.names = new_names + check_level_names(idx, ["firsta", "seconda"]) + + # and not on copies + check_level_names(view, ["first", "second"]) + check_level_names(copy, ["first", "second"]) + check_level_names(shallow_copy, ["first", "second"]) + + # and copies shouldn't change original + shallow_copy.names = [name + "c" for name in shallow_copy.names] + check_level_names(idx, ["firsta", "seconda"]) + + +def test_take_preserve_name(idx): + taken = idx.take([3, 0, 1]) + assert taken.names == idx.names + + +def test_copy_names(): + # Check that adding a "names" parameter to the copy is honored + # GH14302 + multi_idx = pd.Index([(1, 2), (3, 4)], names=["MyName1", "MyName2"]) + multi_idx1 = multi_idx.copy() + + assert multi_idx.equals(multi_idx1) + assert multi_idx.names == ["MyName1", "MyName2"] + assert multi_idx1.names == ["MyName1", "MyName2"] + + multi_idx2 = multi_idx.copy(names=["NewName1", "NewName2"]) + + assert multi_idx.equals(multi_idx2) + assert multi_idx.names == ["MyName1", "MyName2"] + assert multi_idx2.names == ["NewName1", "NewName2"] + + multi_idx3 = multi_idx.copy(name=["NewName1", "NewName2"]) + + assert multi_idx.equals(multi_idx3) + assert multi_idx.names == ["MyName1", "MyName2"] + assert multi_idx3.names == ["NewName1", "NewName2"] + + +def test_names(idx, index_names): + + # names are assigned in setup + assert index_names == ["first", "second"] + level_names = [level.name for level in idx.levels] + assert level_names == index_names + + # setting bad names on existing + index = idx + with pytest.raises(ValueError, match="^Length of names"): + setattr(index, "names", list(index.names) + ["third"]) + with pytest.raises(ValueError, match="^Length of names"): + setattr(index, "names", []) + + # initializing with bad names (should always be equivalent) + major_axis, minor_axis = idx.levels + major_codes, minor_codes = idx.codes + with pytest.raises(ValueError, match="^Length of names"): + MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=["first"], + ) + with pytest.raises(ValueError, match="^Length of names"): + MultiIndex( + levels=[major_axis, minor_axis], + codes=[major_codes, minor_codes], + names=["first", "second", "third"], + ) + + # names are assigned on index, but not transferred to the levels + index.names = ["a", "b"] + level_names = [level.name for level in index.levels] + assert level_names == ["a", "b"] + + +def test_duplicate_level_names_access_raises(idx): + # GH19029 + idx.names = ["foo", "foo"] + with pytest.raises(ValueError, match="name foo occurs multiple times"): + idx._get_level_number("foo") + + +def test_get_names_from_levels(): + idx = pd.MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"]) + + assert idx.levels[0].name == "a" + assert idx.levels[1].name == "b" + + +def test_setting_names_from_levels_raises(): + idx = pd.MultiIndex.from_product([["a"], [1, 2]], names=["a", "b"]) + with pytest.raises(RuntimeError, match="set_names"): + idx.levels[0].name = "foo" + + with pytest.raises(RuntimeError, match="set_names"): + idx.levels[1].name = "foo" + + new = pd.Series(1, index=idx.levels[0]) + with pytest.raises(RuntimeError, match="set_names"): + new.index.name = "bar" + + assert pd.Index._no_setting_name is False + assert pd.Int64Index._no_setting_name is False + assert pd.RangeIndex._no_setting_name is False diff --git a/pandas/tests/indexes/multi/test_partial_indexing.py b/pandas/tests/indexes/multi/test_partial_indexing.py new file mode 100644 index 00000000..b00018d2 --- /dev/null +++ b/pandas/tests/indexes/multi/test_partial_indexing.py @@ -0,0 +1,96 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, MultiIndex, date_range +import pandas._testing as tm + + +def test_partial_string_timestamp_multiindex(): + # GH10331 + dr = pd.date_range("2016-01-01", "2016-01-03", freq="12H") + abc = ["a", "b", "c"] + ix = pd.MultiIndex.from_product([dr, abc]) + df = pd.DataFrame({"c1": range(0, 15)}, index=ix) + idx = pd.IndexSlice + + # c1 + # 2016-01-01 00:00:00 a 0 + # b 1 + # c 2 + # 2016-01-01 12:00:00 a 3 + # b 4 + # c 5 + # 2016-01-02 00:00:00 a 6 + # b 7 + # c 8 + # 2016-01-02 12:00:00 a 9 + # b 10 + # c 11 + # 2016-01-03 00:00:00 a 12 + # b 13 + # c 14 + + # partial string matching on a single index + for df_swap in (df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)): + df_swap = df_swap.sort_index() + just_a = df_swap.loc["a"] + result = just_a.loc["2016-01-01"] + expected = df.loc[idx[:, "a"], :].iloc[0:2] + expected.index = expected.index.droplevel(1) + tm.assert_frame_equal(result, expected) + + # indexing with IndexSlice + result = df.loc[idx["2016-01-01":"2016-02-01", :], :] + expected = df + tm.assert_frame_equal(result, expected) + + # match on secondary index + result = df_swap.loc[idx[:, "2016-01-01":"2016-01-01"], :] + expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]] + tm.assert_frame_equal(result, expected) + + # Even though this syntax works on a single index, this is somewhat + # ambiguous and we don't want to extend this behavior forward to work + # in multi-indexes. This would amount to selecting a scalar from a + # column. + with pytest.raises(KeyError, match="'2016-01-01'"): + df["2016-01-01"] + + # partial string match on year only + result = df.loc["2016"] + expected = df + tm.assert_frame_equal(result, expected) + + # partial string match on date + result = df.loc["2016-01-01"] + expected = df.iloc[0:6] + tm.assert_frame_equal(result, expected) + + # partial string match on date and hour, from middle + result = df.loc["2016-01-02 12"] + expected = df.iloc[9:12] + tm.assert_frame_equal(result, expected) + + # partial string match on secondary index + result = df_swap.loc[idx[:, "2016-01-02"], :] + expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]] + tm.assert_frame_equal(result, expected) + + # tuple selector with partial string match on date + result = df.loc[("2016-01-01", "a"), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + # Slicing date on first level should break (of course) + with pytest.raises(KeyError, match="'2016-01-01'"): + df_swap.loc["2016-01-01"] + + # GH12685 (partial string with daily resolution or below) + dr = date_range("2013-01-01", periods=100, freq="D") + ix = MultiIndex.from_product([dr, ["a", "b"]]) + df = DataFrame(np.random.randn(200, 1), columns=["A"], index=ix) + + result = df.loc[idx["2013-03":"2013-03", :], :] + expected = df.iloc[118:180] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/multi/test_reindex.py b/pandas/tests/indexes/multi/test_reindex.py new file mode 100644 index 00000000..ceb14aa8 --- /dev/null +++ b/pandas/tests/indexes/multi/test_reindex.py @@ -0,0 +1,103 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Index, MultiIndex +import pandas._testing as tm + + +def test_reindex(idx): + result, indexer = idx.reindex(list(idx[:4])) + assert isinstance(result, MultiIndex) + assert result.names == ["first", "second"] + assert [level.name for level in result.levels] == ["first", "second"] + + result, indexer = idx.reindex(list(idx)) + assert isinstance(result, MultiIndex) + assert indexer is None + assert result.names == ["first", "second"] + assert [level.name for level in result.levels] == ["first", "second"] + + +def test_reindex_level(idx): + index = Index(["one"]) + + target, indexer = idx.reindex(index, level="second") + target2, indexer2 = index.reindex(idx, level="second") + + exp_index = idx.join(index, level="second", how="right") + exp_index2 = idx.join(index, level="second", how="left") + + assert target.equals(exp_index) + exp_indexer = np.array([0, 2, 4]) + tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False) + + assert target2.equals(exp_index2) + exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) + tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) + + with pytest.raises(TypeError, match="Fill method not supported"): + idx.reindex(idx, method="pad", level="second") + + with pytest.raises(TypeError, match="Fill method not supported"): + index.reindex(index, method="bfill", level="first") + + +def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx): + # GH6552 + idx = idx.copy() + target = idx.copy() + idx.names = target.names = [None, None] + + other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]]) + + # list & ndarray cases + assert idx.reindex([])[0].names == [None, None] + assert idx.reindex(np.array([]))[0].names == [None, None] + assert idx.reindex(target.tolist())[0].names == [None, None] + assert idx.reindex(target.values)[0].names == [None, None] + assert idx.reindex(other_dtype.tolist())[0].names == [None, None] + assert idx.reindex(other_dtype.values)[0].names == [None, None] + + idx.names = ["foo", "bar"] + assert idx.reindex([])[0].names == ["foo", "bar"] + assert idx.reindex(np.array([]))[0].names == ["foo", "bar"] + assert idx.reindex(target.tolist())[0].names == ["foo", "bar"] + assert idx.reindex(target.values)[0].names == ["foo", "bar"] + assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"] + assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"] + + +def test_reindex_lvl_preserves_names_when_target_is_list_or_array(): + # GH7774 + idx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"]) + assert idx.reindex([], level=0)[0].names == ["foo", "bar"] + assert idx.reindex([], level=1)[0].names == ["foo", "bar"] + + +def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(): + # GH7774 + idx = pd.MultiIndex.from_product([[0, 1], ["a", "b"]]) + assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64 + assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_ + + +def test_reindex_base(idx): + idx = idx + expected = np.arange(idx.size, dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with pytest.raises(ValueError, match="Invalid fill method"): + idx.get_indexer(idx, method="invalid") + + +def test_reindex_non_unique(): + idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)]) + a = pd.Series(np.arange(4), index=idx) + new_idx = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + + msg = "cannot handle a non-unique multi-index!" + with pytest.raises(ValueError, match=msg): + a.reindex(new_idx) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py new file mode 100644 index 00000000..2e39c714 --- /dev/null +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -0,0 +1,130 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Index, MultiIndex +import pandas._testing as tm + + +def test_insert(idx): + # key contained in all levels + new_index = idx.insert(0, ("bar", "two")) + assert new_index.equal_levels(idx) + assert new_index[0] == ("bar", "two") + + # key not contained in all levels + new_index = idx.insert(0, ("abc", "three")) + + exp0 = Index(list(idx.levels[0]) + ["abc"], name="first") + tm.assert_index_equal(new_index.levels[0], exp0) + assert new_index.names == ["first", "second"] + + exp1 = Index(list(idx.levels[1]) + ["three"], name="second") + tm.assert_index_equal(new_index.levels[1], exp1) + assert new_index[0] == ("abc", "three") + + # key wrong length + msg = "Item must have length equal to number of levels" + with pytest.raises(ValueError, match=msg): + idx.insert(0, ("foo2",)) + + left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"]) + left.set_index(["1st", "2nd"], inplace=True) + ts = left["3rd"].copy(deep=True) + + left.loc[("b", "x"), "3rd"] = 2 + left.loc[("b", "a"), "3rd"] = -1 + left.loc[("b", "b"), "3rd"] = 3 + left.loc[("a", "x"), "3rd"] = 4 + left.loc[("a", "w"), "3rd"] = 5 + left.loc[("a", "a"), "3rd"] = 6 + + ts.loc[("b", "x")] = 2 + ts.loc["b", "a"] = -1 + ts.loc[("b", "b")] = 3 + ts.loc["a", "x"] = 4 + ts.loc[("a", "w")] = 5 + ts.loc["a", "a"] = 6 + + right = pd.DataFrame( + [ + ["a", "b", 0], + ["b", "d", 1], + ["b", "x", 2], + ["b", "a", -1], + ["b", "b", 3], + ["a", "x", 4], + ["a", "w", 5], + ["a", "a", 6], + ], + columns=["1st", "2nd", "3rd"], + ) + right.set_index(["1st", "2nd"], inplace=True) + # FIXME data types changes to float because + # of intermediate nan insertion; + tm.assert_frame_equal(left, right, check_dtype=False) + tm.assert_series_equal(ts, right["3rd"]) + + # GH9250 + idx = ( + [("test1", i) for i in range(5)] + + [("test2", i) for i in range(6)] + + [("test", 17), ("test", 18)] + ) + + left = pd.Series(np.linspace(0, 10, 11), pd.MultiIndex.from_tuples(idx[:-2])) + + left.loc[("test", 17)] = 11 + left.loc[("test", 18)] = 12 + + right = pd.Series(np.linspace(0, 12, 13), pd.MultiIndex.from_tuples(idx)) + + tm.assert_series_equal(left, right) + + +def test_append(idx): + result = idx[:3].append(idx[3:]) + assert result.equals(idx) + + foos = [idx[:1], idx[1:3], idx[3:]] + result = foos[0].append(foos[1:]) + assert result.equals(idx) + + # empty + result = idx.append([]) + assert result.equals(idx) + + +def test_repeat(): + reps = 2 + numbers = [1, 2, 3] + names = np.array(["foo", "bar"]) + + m = MultiIndex.from_product([numbers, names], names=names) + expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names) + tm.assert_index_equal(m.repeat(reps), expected) + + +def test_insert_base(idx): + + result = idx[1:4] + + # test 0th element + assert idx[0:4].equals(result.insert(0, idx[0])) + + +def test_delete_base(idx): + + expected = idx[1:] + result = idx.delete(0) + assert result.equals(expected) + assert result.name == expected.name + + expected = idx[:-1] + result = idx.delete(-1) + assert result.equals(expected) + assert result.name == expected.name + + with pytest.raises((IndexError, ValueError)): + # Exception raised depends on NumPy version. + idx.delete(len(idx)) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py new file mode 100644 index 00000000..841e3b3f --- /dev/null +++ b/pandas/tests/indexes/multi/test_setops.py @@ -0,0 +1,363 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import MultiIndex, Series +import pandas._testing as tm + + +@pytest.mark.parametrize("case", [0.5, "xxx"]) +@pytest.mark.parametrize("sort", [None, False]) +@pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] +) +def test_set_ops_error_cases(idx, case, sort, method): + # non-iterable input + msg = "Input must be Index or array-like" + with pytest.raises(TypeError, match=msg): + getattr(idx, method)(case, sort=sort) + + +@pytest.mark.parametrize("sort", [None, False]) +def test_intersection_base(idx, sort): + first = idx[:5] + second = idx[:3] + intersect = first.intersection(second, sort=sort) + + if sort is None: + tm.assert_index_equal(intersect, second.sort_values()) + assert tm.equalContents(intersect, second) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.intersection(case, sort=sort) + if sort is None: + tm.assert_index_equal(result, second.sort_values()) + assert tm.equalContents(result, second) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.intersection([1, 2, 3], sort=sort) + + +@pytest.mark.parametrize("sort", [None, False]) +def test_union_base(idx, sort): + first = idx[3:] + second = idx[:5] + everything = idx + union = first.union(second, sort=sort) + if sort is None: + tm.assert_index_equal(union, everything.sort_values()) + assert tm.equalContents(union, everything) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.union(case, sort=sort) + if sort is None: + tm.assert_index_equal(result, everything.sort_values()) + assert tm.equalContents(result, everything) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.union([1, 2, 3], sort=sort) + + +@pytest.mark.parametrize("sort", [None, False]) +def test_difference_base(idx, sort): + second = idx[4:] + answer = idx[:4] + result = idx.difference(second, sort=sort) + + if sort is None: + answer = answer.sort_values() + + assert result.equals(answer) + tm.assert_index_equal(result, answer) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = idx.difference(case, sort=sort) + tm.assert_index_equal(result, answer) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + idx.difference([1, 2, 3], sort=sort) + + +@pytest.mark.parametrize("sort", [None, False]) +def test_symmetric_difference(idx, sort): + first = idx[1:] + second = idx[:-1] + answer = idx[[-1, 0]] + result = first.symmetric_difference(second, sort=sort) + + if sort is None: + answer = answer.sort_values() + + tm.assert_index_equal(result, answer) + + # GH 10149 + cases = [klass(second.values) for klass in [np.array, Series, list]] + for case in cases: + result = first.symmetric_difference(case, sort=sort) + tm.assert_index_equal(result, answer) + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.symmetric_difference([1, 2, 3], sort=sort) + + +def test_empty(idx): + # GH 15270 + assert not idx.empty + assert idx[:0].empty + + +@pytest.mark.parametrize("sort", [None, False]) +def test_difference(idx, sort): + + first = idx + result = first.difference(idx[-3:], sort=sort) + vals = idx[:-3].values + + if sort is None: + vals = sorted(vals) + + expected = MultiIndex.from_tuples(vals, sortorder=0, names=idx.names) + + assert isinstance(result, MultiIndex) + assert result.equals(expected) + assert result.names == idx.names + tm.assert_index_equal(result, expected) + + # empty difference: reflexive + result = idx.difference(idx, sort=sort) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # empty difference: superset + result = idx[-3:].difference(idx, sort=sort) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # empty difference: degenerate + result = idx[:0].difference(idx, sort=sort) + expected = idx[:0] + assert result.equals(expected) + assert result.names == idx.names + + # names not the same + chunklet = idx[-3:] + chunklet.names = ["foo", "baz"] + result = first.difference(chunklet, sort=sort) + assert result.names == (None, None) + + # empty, but non-equal + result = idx.difference(idx.sortlevel(1)[0], sort=sort) + assert len(result) == 0 + + # raise Exception called with non-MultiIndex + result = first.difference(first.values, sort=sort) + assert result.equals(first[:0]) + + # name from empty array + result = first.difference([], sort=sort) + assert first.equals(result) + assert first.names == result.names + + # name from non-empty array + result = first.difference([("foo", "one")], sort=sort) + expected = pd.MultiIndex.from_tuples( + [("bar", "one"), ("baz", "two"), ("foo", "two"), ("qux", "one"), ("qux", "two")] + ) + expected.names = first.names + assert first.names == result.names + + msg = "other must be a MultiIndex or a list of tuples" + with pytest.raises(TypeError, match=msg): + first.difference([1, 2, 3, 4, 5], sort=sort) + + +def test_difference_sort_special(): + # GH-24959 + idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]]) + # sort=None, the default + result = idx.difference([]) + tm.assert_index_equal(result, idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_difference_sort_special_true(): + # TODO decide on True behaviour + idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]]) + result = idx.difference([], sort=True) + expected = pd.MultiIndex.from_product([[0, 1], ["a", "b"]]) + tm.assert_index_equal(result, expected) + + +def test_difference_sort_incomparable(): + # GH-24959 + idx = pd.MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]]) + + other = pd.MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]]) + # sort=None, the default + # MultiIndex.difference deviates here from other difference + # implementations in not catching the TypeError + with pytest.raises(TypeError): + result = idx.difference(other) + + # sort=False + result = idx.difference(other, sort=False) + tm.assert_index_equal(result, idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_difference_sort_incomparable_true(): + # TODO decide on True behaviour + # # sort=True, raises + idx = pd.MultiIndex.from_product([[1, pd.Timestamp("2000"), 2], ["a", "b"]]) + other = pd.MultiIndex.from_product([[3, pd.Timestamp("2000"), 4], ["c", "d"]]) + + with pytest.raises(TypeError): + idx.difference(other, sort=True) + + +@pytest.mark.parametrize("sort", [None, False]) +def test_union(idx, sort): + piece1 = idx[:5][::-1] + piece2 = idx[3:] + + the_union = piece1.union(piece2, sort=sort) + + if sort is None: + tm.assert_index_equal(the_union, idx.sort_values()) + + assert tm.equalContents(the_union, idx) + + # corner case, pass self or empty thing: + the_union = idx.union(idx, sort=sort) + assert the_union is idx + + the_union = idx.union(idx[:0], sort=sort) + assert the_union is idx + + # won't work in python 3 + # tuples = _index.values + # result = _index[:4] | tuples[4:] + # assert result.equals(tuples) + + # not valid for python 3 + # def test_union_with_regular_index(self): + # other = Index(['A', 'B', 'C']) + + # result = other.union(idx) + # assert ('foo', 'one') in result + # assert 'B' in result + + # result2 = _index.union(other) + # assert result.equals(result2) + + +@pytest.mark.parametrize("sort", [None, False]) +def test_intersection(idx, sort): + piece1 = idx[:5][::-1] + piece2 = idx[3:] + + the_int = piece1.intersection(piece2, sort=sort) + + if sort is None: + tm.assert_index_equal(the_int, idx[3:5]) + assert tm.equalContents(the_int, idx[3:5]) + + # corner case, pass self + the_int = idx.intersection(idx, sort=sort) + assert the_int is idx + + # empty intersection: disjoint + empty = idx[:2].intersection(idx[2:], sort=sort) + expected = idx[:0] + assert empty.equals(expected) + + # can't do in python 3 + # tuples = _index.values + # result = _index & tuples + # assert result.equals(tuples) + + +def test_intersect_equal_sort(): + # GH-24959 + idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]]) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_intersect_equal_sort_true(): + # TODO decide on True behaviour + idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]]) + sorted_ = pd.MultiIndex.from_product([[0, 1], ["a", "b"]]) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + + +@pytest.mark.parametrize("slice_", [slice(None), slice(0)]) +def test_union_sort_other_empty(slice_): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]]) + + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + # MultiIndex does not special case empty.union(idx) + # tm.assert_index_equal(other.union(idx), idx) + + # sort=False + tm.assert_index_equal(idx.union(other, sort=False), idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_union_sort_other_empty_sort(slice_): + # TODO decide on True behaviour + # # sort=True + idx = pd.MultiIndex.from_product([[1, 0], ["a", "b"]]) + other = idx[:0] + result = idx.union(other, sort=True) + expected = pd.MultiIndex.from_product([[0, 1], ["a", "b"]]) + tm.assert_index_equal(result, expected) + + +def test_union_sort_other_incomparable(): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = pd.MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]]) + + # default, sort=None + result = idx.union(idx[:1]) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_union_sort_other_incomparable_sort(): + # TODO decide on True behaviour + # # sort=True + idx = pd.MultiIndex.from_product([[1, pd.Timestamp("2000")], ["a", "b"]]) + with pytest.raises(TypeError, match="Cannot compare"): + idx.union(idx[:1], sort=True) + + +@pytest.mark.parametrize( + "method", ["union", "intersection", "difference", "symmetric_difference"] +) +def test_setops_disallow_true(method): + idx1 = pd.MultiIndex.from_product([["a", "b"], [1, 2]]) + idx2 = pd.MultiIndex.from_product([["b", "c"], [1, 2]]) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=True) diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py new file mode 100644 index 00000000..277bd79c --- /dev/null +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -0,0 +1,276 @@ +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning, UnsortedIndexError + +import pandas as pd +from pandas import CategoricalIndex, DataFrame, Index, MultiIndex, RangeIndex +import pandas._testing as tm + + +def test_sortlevel(idx): + import random + + tuples = list(idx) + random.shuffle(tuples) + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + +def test_sortlevel_not_sort_remaining(): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + sorted_idx, _ = mi.sortlevel("A", sort_remaining=False) + assert sorted_idx.equals(mi) + + +def test_sortlevel_deterministic(): + tuples = [ + ("bar", "one"), + ("foo", "two"), + ("qux", "two"), + ("foo", "one"), + ("baz", "two"), + ("qux", "one"), + ] + + index = MultiIndex.from_tuples(tuples) + + sorted_idx, _ = index.sortlevel(0) + expected = MultiIndex.from_tuples(sorted(tuples)) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(0, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + sorted_idx, _ = index.sortlevel(1) + by1 = sorted(tuples, key=lambda x: (x[1], x[0])) + expected = MultiIndex.from_tuples(by1) + assert sorted_idx.equals(expected) + + sorted_idx, _ = index.sortlevel(1, ascending=False) + assert sorted_idx.equals(expected[::-1]) + + +def test_sort(indices): + with pytest.raises(TypeError): + indices.sort() + + +def test_numpy_argsort(idx): + result = np.argsort(idx) + expected = idx.argsort() + tm.assert_numpy_array_equal(result, expected) + + # these are the only two types that perform + # pandas compatibility input validation - the + # rest already perform separate (or no) such + # validation via their 'values' attribute as + # defined in pandas.core.indexes/base.py - they + # cannot be changed at the moment due to + # backwards compatibility concerns + if isinstance(type(idx), (CategoricalIndex, RangeIndex)): + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, axis=1) + + msg = "the 'kind' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, kind="mergesort") + + msg = "the 'order' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argsort(idx, order=("a", "b")) + + +def test_unsortedindex(): + # GH 11897 + mi = pd.MultiIndex.from_tuples( + [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], + names=["one", "two"], + ) + df = pd.DataFrame([[i, 10 * i] for i in range(6)], index=mi, columns=["one", "two"]) + + # GH 16734: not sorted, but no real slicing + result = df.loc(axis=0)["z", "a"] + expected = df.iloc[0] + tm.assert_series_equal(result, expected) + + with pytest.raises(UnsortedIndexError): + df.loc(axis=0)["z", slice("a")] + df.sort_index(inplace=True) + assert len(df.loc(axis=0)["z", :]) == 2 + + with pytest.raises(KeyError, match="'q'"): + df.loc(axis=0)["q", :] + + +def test_unsortedindex_doc_examples(): + # https://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa + dfm = DataFrame( + {"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], "jolie": np.random.rand(4)} + ) + + dfm = dfm.set_index(["jim", "joe"]) + with tm.assert_produces_warning(PerformanceWarning): + dfm.loc[(1, "z")] + + with pytest.raises(UnsortedIndexError): + dfm.loc[(0, "y"):(1, "z")] + + assert not dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 1 + + # sort it + dfm = dfm.sort_index() + dfm.loc[(1, "z")] + dfm.loc[(0, "y"):(1, "z")] + + assert dfm.index.is_lexsorted() + assert dfm.index.lexsort_depth == 2 + + +def test_reconstruct_sort(): + + # starts off lexsorted & monotonic + mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]]) + assert mi.is_lexsorted() + assert mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert recons.is_lexsorted() + assert recons.is_monotonic + assert mi is recons + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = pd.MultiIndex.from_tuples( + [("z", "a"), ("x", "a"), ("y", "b"), ("x", "b"), ("y", "a"), ("z", "b")], + names=["one", "two"], + ) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # cannot convert to lexsorted + mi = MultiIndex( + levels=[["b", "d", "a"], [1, 2, 3]], + codes=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=["col1", "col2"], + ) + assert not mi.is_lexsorted() + assert not mi.is_monotonic + + recons = mi._sort_levels_monotonic() + assert not recons.is_lexsorted() + assert not recons.is_monotonic + + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + +def test_reconstruct_remove_unused(): + # xref to GH 2770 + df = DataFrame( + [["deleteMe", 1, 9], ["keepMe", 2, 9], ["keepMeToo", 3, 9]], + columns=["first", "second", "third"], + ) + df2 = df.set_index(["first", "second"], drop=False) + df2 = df2[df2["first"] != "deleteMe"] + + # removed levels are there + expected = MultiIndex( + levels=[["deleteMe", "keepMe", "keepMeToo"], [1, 2, 3]], + codes=[[1, 2], [1, 2]], + names=["first", "second"], + ) + result = df2.index + tm.assert_index_equal(result, expected) + + expected = MultiIndex( + levels=[["keepMe", "keepMeToo"], [2, 3]], + codes=[[0, 1], [0, 1]], + names=["first", "second"], + ) + result = df2.index.remove_unused_levels() + tm.assert_index_equal(result, expected) + + # idempotent + result2 = result.remove_unused_levels() + tm.assert_index_equal(result2, expected) + assert result2.is_(result) + + +@pytest.mark.parametrize( + "first_type,second_type", [("int64", "int64"), ("datetime64[D]", "str")] +) +def test_remove_unused_levels_large(first_type, second_type): + # GH16556 + + # because tests should be deterministic (and this test in particular + # checks that levels are removed, which is not the case for every + # random input): + rng = np.random.RandomState(4) # seed is arbitrary value that works + + size = 1 << 16 + df = DataFrame( + dict( + first=rng.randint(0, 1 << 13, size).astype(first_type), + second=rng.randint(0, 1 << 10, size).astype(second_type), + third=rng.rand(size), + ) + ) + df = df.groupby(["first", "second"]).sum() + df = df[df.third < 0.1] + + result = df.index.remove_unused_levels() + assert len(result.levels[0]) < len(df.index.levels[0]) + assert len(result.levels[1]) < len(df.index.levels[1]) + assert result.equals(df.index) + + expected = df.reset_index().set_index(["first", "second"]).index + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("level0", [["a", "d", "b"], ["a", "d", "b", "unused"]]) +@pytest.mark.parametrize( + "level1", [["w", "x", "y", "z"], ["w", "x", "y", "z", "unused"]] +) +def test_remove_unused_nan(level0, level1): + # GH 18417 + mi = pd.MultiIndex( + levels=[level0, level1], codes=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]] + ) + + result = mi.remove_unused_levels() + tm.assert_index_equal(result, mi) + for level in 0, 1: + assert "unused" not in result.levels[level] + + +def test_argsort(idx): + result = idx.argsort() + expected = idx.values.argsort() + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/period/__init__.py b/pandas/tests/indexes/period/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/period/test_asfreq.py b/pandas/tests/indexes/period/test_asfreq.py new file mode 100644 index 00000000..88e800d6 --- /dev/null +++ b/pandas/tests/indexes/period/test_asfreq.py @@ -0,0 +1,149 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, PeriodIndex, Series, period_range +import pandas._testing as tm + + +class TestPeriodIndex: + def test_asfreq(self): + pi1 = period_range(freq="A", start="1/1/2001", end="1/1/2001") + pi2 = period_range(freq="Q", start="1/1/2001", end="1/1/2001") + pi3 = period_range(freq="M", start="1/1/2001", end="1/1/2001") + pi4 = period_range(freq="D", start="1/1/2001", end="1/1/2001") + pi5 = period_range(freq="H", start="1/1/2001", end="1/1/2001 00:00") + pi6 = period_range(freq="Min", start="1/1/2001", end="1/1/2001 00:00") + pi7 = period_range(freq="S", start="1/1/2001", end="1/1/2001 00:00:00") + + assert pi1.asfreq("Q", "S") == pi2 + assert pi1.asfreq("Q", "s") == pi2 + assert pi1.asfreq("M", "start") == pi3 + assert pi1.asfreq("D", "StarT") == pi4 + assert pi1.asfreq("H", "beGIN") == pi5 + assert pi1.asfreq("Min", "S") == pi6 + assert pi1.asfreq("S", "S") == pi7 + + assert pi2.asfreq("A", "S") == pi1 + assert pi2.asfreq("M", "S") == pi3 + assert pi2.asfreq("D", "S") == pi4 + assert pi2.asfreq("H", "S") == pi5 + assert pi2.asfreq("Min", "S") == pi6 + assert pi2.asfreq("S", "S") == pi7 + + assert pi3.asfreq("A", "S") == pi1 + assert pi3.asfreq("Q", "S") == pi2 + assert pi3.asfreq("D", "S") == pi4 + assert pi3.asfreq("H", "S") == pi5 + assert pi3.asfreq("Min", "S") == pi6 + assert pi3.asfreq("S", "S") == pi7 + + assert pi4.asfreq("A", "S") == pi1 + assert pi4.asfreq("Q", "S") == pi2 + assert pi4.asfreq("M", "S") == pi3 + assert pi4.asfreq("H", "S") == pi5 + assert pi4.asfreq("Min", "S") == pi6 + assert pi4.asfreq("S", "S") == pi7 + + assert pi5.asfreq("A", "S") == pi1 + assert pi5.asfreq("Q", "S") == pi2 + assert pi5.asfreq("M", "S") == pi3 + assert pi5.asfreq("D", "S") == pi4 + assert pi5.asfreq("Min", "S") == pi6 + assert pi5.asfreq("S", "S") == pi7 + + assert pi6.asfreq("A", "S") == pi1 + assert pi6.asfreq("Q", "S") == pi2 + assert pi6.asfreq("M", "S") == pi3 + assert pi6.asfreq("D", "S") == pi4 + assert pi6.asfreq("H", "S") == pi5 + assert pi6.asfreq("S", "S") == pi7 + + assert pi7.asfreq("A", "S") == pi1 + assert pi7.asfreq("Q", "S") == pi2 + assert pi7.asfreq("M", "S") == pi3 + assert pi7.asfreq("D", "S") == pi4 + assert pi7.asfreq("H", "S") == pi5 + assert pi7.asfreq("Min", "S") == pi6 + + msg = "How must be one of S or E" + with pytest.raises(ValueError, match=msg): + pi7.asfreq("T", "foo") + result1 = pi1.asfreq("3M") + result2 = pi1.asfreq("M") + expected = period_range(freq="M", start="2001-12", end="2001-12") + tm.assert_numpy_array_equal(result1.asi8, expected.asi8) + assert result1.freqstr == "3M" + tm.assert_numpy_array_equal(result2.asi8, expected.asi8) + assert result2.freqstr == "M" + + def test_asfreq_nat(self): + idx = PeriodIndex(["2011-01", "2011-02", "NaT", "2011-04"], freq="M") + result = idx.asfreq(freq="Q") + expected = PeriodIndex(["2011Q1", "2011Q1", "NaT", "2011Q2"], freq="Q") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("freq", ["D", "3D"]) + def test_asfreq_mult_pi(self, freq): + pi = PeriodIndex(["2001-01", "2001-02", "NaT", "2001-03"], freq="2M") + + result = pi.asfreq(freq) + exp = PeriodIndex(["2001-02-28", "2001-03-31", "NaT", "2001-04-30"], freq=freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + result = pi.asfreq(freq, how="S") + exp = PeriodIndex(["2001-01-01", "2001-02-01", "NaT", "2001-03-01"], freq=freq) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + def test_asfreq_combined_pi(self): + pi = pd.PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="H") + exp = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="25H") + for freq, how in zip(["1D1H", "1H1D"], ["S", "E"]): + result = pi.asfreq(freq, how=how) + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + for freq in ["1D1H", "1H1D"]: + pi = pd.PeriodIndex( + ["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq=freq + ) + result = pi.asfreq("H") + exp = PeriodIndex(["2001-01-02 00:00", "2001-01-03 02:00", "NaT"], freq="H") + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + pi = pd.PeriodIndex( + ["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq=freq + ) + result = pi.asfreq("H", how="S") + exp = PeriodIndex(["2001-01-01 00:00", "2001-01-02 02:00", "NaT"], freq="H") + tm.assert_index_equal(result, exp) + assert result.freq == exp.freq + + def test_asfreq_ts(self): + index = period_range(freq="A", start="1/1/2001", end="12/31/2010") + ts = Series(np.random.randn(len(index)), index=index) + df = DataFrame(np.random.randn(len(index), 3), index=index) + + result = ts.asfreq("D", how="end") + df_result = df.asfreq("D", how="end") + exp_index = index.asfreq("D", how="end") + assert len(result) == len(ts) + tm.assert_index_equal(result.index, exp_index) + tm.assert_index_equal(df_result.index, exp_index) + + result = ts.asfreq("D", how="start") + assert len(result) == len(ts) + tm.assert_index_equal(result.index, index.asfreq("D", how="start")) + + def test_astype_asfreq(self): + pi1 = PeriodIndex(["2011-01-01", "2011-02-01", "2011-03-01"], freq="D") + exp = PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="M") + tm.assert_index_equal(pi1.asfreq("M"), exp) + tm.assert_index_equal(pi1.astype("period[M]"), exp) + + exp = PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="3M") + tm.assert_index_equal(pi1.asfreq("3M"), exp) + tm.assert_index_equal(pi1.astype("period[3M]"), exp) diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py new file mode 100644 index 00000000..ec386dd9 --- /dev/null +++ b/pandas/tests/indexes/period/test_astype.py @@ -0,0 +1,128 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Index, Int64Index, NaT, Period, PeriodIndex, period_range +import pandas._testing as tm + + +class TestPeriodIndexAsType: + @pytest.mark.parametrize("dtype", [float, "timedelta64", "timedelta64[ns]"]) + def test_astype_raises(self, dtype): + # GH#13149, GH#13209 + idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D") + msg = "Cannot cast PeriodArray to dtype" + with pytest.raises(TypeError, match=msg): + idx.astype(dtype) + + def test_astype_conversion(self): + # GH#13149, GH#13209 + idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D") + + result = idx.astype(object) + expected = Index( + [Period("2016-05-16", freq="D")] + [Period(NaT, freq="D")] * 3, + dtype="object", + ) + tm.assert_index_equal(result, expected) + + result = idx.astype(np.int64) + expected = Int64Index([16937] + [-9223372036854775808] * 3, dtype=np.int64) + tm.assert_index_equal(result, expected) + + result = idx.astype(str) + expected = Index(str(x) for x in idx) + tm.assert_index_equal(result, expected) + + idx = period_range("1990", "2009", freq="A") + result = idx.astype("i8") + tm.assert_index_equal(result, Index(idx.asi8)) + tm.assert_numpy_array_equal(result.values, idx.asi8) + + def test_astype_uint(self): + arr = period_range("2000", periods=2) + expected = pd.UInt64Index(np.array([10957, 10958], dtype="uint64")) + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) + + def test_astype_object(self): + idx = pd.PeriodIndex([], freq="M") + + exp = np.array([], dtype=object) + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M") + + exp = np.array([pd.Period("2011-01", freq="M"), pd.NaT], dtype=object) + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + exp = np.array([pd.Period("2011-01-01", freq="D"), pd.NaT], dtype=object) + idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D") + tm.assert_numpy_array_equal(idx.astype(object).values, exp) + tm.assert_numpy_array_equal(idx._mpl_repr(), exp) + + # TODO: de-duplicate this version (from test_ops) with the one above + # (from test_period) + def test_astype_object2(self): + idx = pd.period_range(start="2013-01-01", periods=4, freq="M", name="idx") + expected_list = [ + pd.Period("2013-01-31", freq="M"), + pd.Period("2013-02-28", freq="M"), + pd.Period("2013-03-31", freq="M"), + pd.Period("2013-04-30", freq="M"), + ] + expected = pd.Index(expected_list, dtype=object, name="idx") + result = idx.astype(object) + assert isinstance(result, Index) + assert result.dtype == object + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert idx.tolist() == expected_list + + idx = PeriodIndex( + ["2013-01-01", "2013-01-02", "NaT", "2013-01-04"], freq="D", name="idx" + ) + expected_list = [ + pd.Period("2013-01-01", freq="D"), + pd.Period("2013-01-02", freq="D"), + pd.Period("NaT", freq="D"), + pd.Period("2013-01-04", freq="D"), + ] + expected = pd.Index(expected_list, dtype=object, name="idx") + result = idx.astype(object) + assert isinstance(result, Index) + assert result.dtype == object + tm.assert_index_equal(result, expected) + for i in [0, 1, 3]: + assert result[i] == expected[i] + assert result[2] is pd.NaT + assert result.name == expected.name + + result_list = idx.tolist() + for i in [0, 1, 3]: + assert result_list[i] == expected_list[i] + assert result_list[2] is pd.NaT + + def test_astype_category(self): + obj = pd.period_range("2000", periods=2) + result = obj.astype("category") + expected = pd.CategoricalIndex( + [pd.Period("2000-01-01", freq="D"), pd.Period("2000-01-02", freq="D")] + ) + tm.assert_index_equal(result, expected) + + result = obj._data.astype("category") + expected = expected.values + tm.assert_categorical_equal(result, expected) + + def test_astype_array_fallback(self): + obj = pd.period_range("2000", periods=2) + result = obj.astype(bool) + expected = pd.Index(np.array([True, True])) + tm.assert_index_equal(result, expected) + + result = obj._data.astype(bool) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py new file mode 100644 index 00000000..27ee915e --- /dev/null +++ b/pandas/tests/indexes/period/test_constructors.py @@ -0,0 +1,520 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs.period import IncompatibleFrequency + +from pandas.core.dtypes.dtypes import PeriodDtype + +import pandas as pd +from pandas import Index, Period, PeriodIndex, Series, date_range, offsets, period_range +import pandas._testing as tm +from pandas.core.arrays import PeriodArray + + +class TestPeriodIndex: + def test_construction_base_constructor(self): + # GH 13664 + arr = [pd.Period("2011-01", freq="M"), pd.NaT, pd.Period("2011-03", freq="M")] + tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), pd.PeriodIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Period("2011-03", freq="M")] + tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), pd.PeriodIndex(np.array(arr))) + + arr = [pd.Period("2011-01", freq="M"), pd.NaT, pd.Period("2011-03", freq="D")] + tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object)) + + tm.assert_index_equal( + pd.Index(np.array(arr)), pd.Index(np.array(arr), dtype=object) + ) + + def test_base_constructor_with_period_dtype(self): + dtype = PeriodDtype("D") + values = ["2011-01-01", "2012-03-04", "2014-05-01"] + result = pd.Index(values, dtype=dtype) + + expected = pd.PeriodIndex(values, dtype=dtype) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "values_constructor", [list, np.array, PeriodIndex, PeriodArray._from_sequence] + ) + def test_index_object_dtype(self, values_constructor): + # Index(periods, dtype=object) is an Index (not an PeriodIndex) + periods = [ + pd.Period("2011-01", freq="M"), + pd.NaT, + pd.Period("2011-03", freq="M"), + ] + values = values_constructor(periods) + result = Index(values, dtype=object) + + assert type(result) is Index + tm.assert_numpy_array_equal(result.values, np.array(values)) + + def test_constructor_use_start_freq(self): + # GH #1118 + p = Period("4/2/2012", freq="B") + expected = period_range(start="4/2/2012", periods=10, freq="B") + + index = period_range(start=p, periods=10) + tm.assert_index_equal(index, expected) + + def test_constructor_field_arrays(self): + # GH #1264 + + years = np.arange(1990, 2010).repeat(4)[2:-2] + quarters = np.tile(np.arange(1, 5), 20)[2:-2] + + index = PeriodIndex(year=years, quarter=quarters, freq="Q-DEC") + expected = period_range("1990Q3", "2009Q2", freq="Q-DEC") + tm.assert_index_equal(index, expected) + + index2 = PeriodIndex(year=years, quarter=quarters, freq="2Q-DEC") + tm.assert_numpy_array_equal(index.asi8, index2.asi8) + + index = PeriodIndex(year=years, quarter=quarters) + tm.assert_index_equal(index, expected) + + years = [2007, 2007, 2007] + months = [1, 2] + + msg = "Mismatched Period array lengths" + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=years, month=months, freq="M") + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=years, month=months, freq="2M") + + years = [2007, 2007, 2007] + months = [1, 2, 3] + idx = PeriodIndex(year=years, month=months, freq="M") + exp = period_range("2007-01", periods=3, freq="M") + tm.assert_index_equal(idx, exp) + + def test_constructor_U(self): + # U was used as undefined period + with pytest.raises(ValueError, match="Invalid frequency: X"): + period_range("2007-1-1", periods=500, freq="X") + + def test_constructor_nano(self): + idx = period_range( + start=Period(ordinal=1, freq="N"), end=Period(ordinal=4, freq="N"), freq="N" + ) + exp = PeriodIndex( + [ + Period(ordinal=1, freq="N"), + Period(ordinal=2, freq="N"), + Period(ordinal=3, freq="N"), + Period(ordinal=4, freq="N"), + ], + freq="N", + ) + tm.assert_index_equal(idx, exp) + + def test_constructor_arrays_negative_year(self): + years = np.arange(1960, 2000, dtype=np.int64).repeat(4) + quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40) + + pindex = PeriodIndex(year=years, quarter=quarters) + + tm.assert_index_equal(pindex.year, pd.Index(years)) + tm.assert_index_equal(pindex.quarter, pd.Index(quarters)) + + def test_constructor_invalid_quarters(self): + msg = "Quarter must be 1 <= q <= 4" + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=range(2000, 2004), quarter=list(range(4)), freq="Q-DEC") + + def test_constructor_corner(self): + result = period_range("2007-01", periods=10.5, freq="M") + exp = period_range("2007-01", periods=10, freq="M") + tm.assert_index_equal(result, exp) + + def test_constructor_fromarraylike(self): + idx = period_range("2007-01", periods=20, freq="M") + + # values is an array of Period, thus can retrieve freq + tm.assert_index_equal(PeriodIndex(idx.values), idx) + tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) + + msg = "freq not specified and cannot be inferred" + with pytest.raises(ValueError, match=msg): + PeriodIndex(idx._ndarray_values) + with pytest.raises(ValueError, match=msg): + PeriodIndex(list(idx._ndarray_values)) + + msg = "'Period' object is not iterable" + with pytest.raises(TypeError, match=msg): + PeriodIndex(data=Period("2007", freq="A")) + + result = PeriodIndex(iter(idx)) + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx) + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx, freq="M") + tm.assert_index_equal(result, idx) + + result = PeriodIndex(idx, freq=offsets.MonthEnd()) + tm.assert_index_equal(result, idx) + assert result.freq == "M" + + result = PeriodIndex(idx, freq="2M") + tm.assert_index_equal(result, idx.asfreq("2M")) + assert result.freq == "2M" + + result = PeriodIndex(idx, freq=offsets.MonthEnd(2)) + tm.assert_index_equal(result, idx.asfreq("2M")) + assert result.freq == "2M" + + result = PeriodIndex(idx, freq="D") + exp = idx.asfreq("D", "e") + tm.assert_index_equal(result, exp) + + def test_constructor_datetime64arr(self): + vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64) + vals = vals.view(np.dtype("M8[us]")) + + msg = r"Wrong dtype: datetime64\[us\]" + with pytest.raises(ValueError, match=msg): + PeriodIndex(vals, freq="D") + + @pytest.mark.parametrize("box", [None, "series", "index"]) + def test_constructor_datetime64arr_ok(self, box): + # https://github.com/pandas-dev/pandas/issues/23438 + data = pd.date_range("2017", periods=4, freq="M") + if box is None: + data = data._values + elif box == "series": + data = pd.Series(data) + + result = PeriodIndex(data, freq="D") + expected = PeriodIndex( + ["2017-01-31", "2017-02-28", "2017-03-31", "2017-04-30"], freq="D" + ) + tm.assert_index_equal(result, expected) + + def test_constructor_dtype(self): + # passing a dtype with a tz should localize + idx = PeriodIndex(["2013-01", "2013-03"], dtype="period[M]") + exp = PeriodIndex(["2013-01", "2013-03"], freq="M") + tm.assert_index_equal(idx, exp) + assert idx.dtype == "period[M]" + + idx = PeriodIndex(["2013-01-05", "2013-03-05"], dtype="period[3D]") + exp = PeriodIndex(["2013-01-05", "2013-03-05"], freq="3D") + tm.assert_index_equal(idx, exp) + assert idx.dtype == "period[3D]" + + # if we already have a freq and its not the same, then asfreq + # (not changed) + idx = PeriodIndex(["2013-01-01", "2013-01-02"], freq="D") + + res = PeriodIndex(idx, dtype="period[M]") + exp = PeriodIndex(["2013-01", "2013-01"], freq="M") + tm.assert_index_equal(res, exp) + assert res.dtype == "period[M]" + + res = PeriodIndex(idx, freq="M") + tm.assert_index_equal(res, exp) + assert res.dtype == "period[M]" + + msg = "specified freq and dtype are different" + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex(["2011-01"], freq="M", dtype="period[D]") + + def test_constructor_empty(self): + idx = pd.PeriodIndex([], freq="M") + assert isinstance(idx, PeriodIndex) + assert len(idx) == 0 + assert idx.freq == "M" + + with pytest.raises(ValueError, match="freq not specified"): + pd.PeriodIndex([]) + + def test_constructor_pi_nat(self): + idx = PeriodIndex( + [Period("2011-01", freq="M"), pd.NaT, Period("2011-01", freq="M")] + ) + exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex( + np.array([Period("2011-01", freq="M"), pd.NaT, Period("2011-01", freq="M")]) + ) + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex( + [pd.NaT, pd.NaT, Period("2011-01", freq="M"), Period("2011-01", freq="M")] + ) + exp = PeriodIndex(["NaT", "NaT", "2011-01", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex( + np.array( + [ + pd.NaT, + pd.NaT, + Period("2011-01", freq="M"), + Period("2011-01", freq="M"), + ] + ) + ) + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([pd.NaT, pd.NaT, "2011-01", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex([pd.NaT, pd.NaT]) + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex(np.array([pd.NaT, pd.NaT])) + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex(["NaT", "NaT"]) + + with pytest.raises(ValueError, match="freq not specified"): + PeriodIndex(np.array(["NaT", "NaT"])) + + def test_constructor_incompat_freq(self): + msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)" + + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex( + [Period("2011-01", freq="M"), pd.NaT, Period("2011-01", freq="D")] + ) + + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex( + np.array( + [Period("2011-01", freq="M"), pd.NaT, Period("2011-01", freq="D")] + ) + ) + + # first element is pd.NaT + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex( + [pd.NaT, Period("2011-01", freq="M"), Period("2011-01", freq="D")] + ) + + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex( + np.array( + [pd.NaT, Period("2011-01", freq="M"), Period("2011-01", freq="D")] + ) + ) + + def test_constructor_mixed(self): + idx = PeriodIndex(["2011-01", pd.NaT, Period("2011-01", freq="M")]) + exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex(["NaT", pd.NaT, Period("2011-01", freq="M")]) + exp = PeriodIndex(["NaT", "NaT", "2011-01"], freq="M") + tm.assert_index_equal(idx, exp) + + idx = PeriodIndex([Period("2011-01-01", freq="D"), pd.NaT, "2012-01-01"]) + exp = PeriodIndex(["2011-01-01", "NaT", "2012-01-01"], freq="D") + tm.assert_index_equal(idx, exp) + + def test_constructor_simple_new(self): + idx = period_range("2007-01", name="p", periods=2, freq="M") + result = idx._simple_new(idx, name="p", freq=idx.freq) + tm.assert_index_equal(result, idx) + + result = idx._simple_new(idx.astype("i8"), name="p", freq=idx.freq) + tm.assert_index_equal(result, idx) + + def test_constructor_simple_new_empty(self): + # GH13079 + idx = PeriodIndex([], freq="M", name="p") + result = idx._simple_new(idx, name="p", freq="M") + tm.assert_index_equal(result, idx) + + @pytest.mark.parametrize("floats", [[1.1, 2.1], np.array([1.1, 2.1])]) + def test_constructor_floats(self, floats): + msg = r"PeriodIndex\._simple_new does not accept floats" + with pytest.raises(TypeError, match=msg): + pd.PeriodIndex._simple_new(floats, freq="M") + + msg = "PeriodIndex does not allow floating point in construction" + with pytest.raises(TypeError, match=msg): + pd.PeriodIndex(floats, freq="M") + + def test_constructor_nat(self): + msg = "start and end must not be NaT" + with pytest.raises(ValueError, match=msg): + period_range(start="NaT", end="2011-01-01", freq="M") + with pytest.raises(ValueError, match=msg): + period_range(start="2011-01-01", end="NaT", freq="M") + + def test_constructor_year_and_quarter(self): + year = pd.Series([2001, 2002, 2003]) + quarter = year - 2000 + idx = PeriodIndex(year=year, quarter=quarter) + strs = ["{t[0]:d}Q{t[1]:d}".format(t=t) for t in zip(quarter, year)] + lops = list(map(Period, strs)) + p = PeriodIndex(lops) + tm.assert_index_equal(p, idx) + + def test_constructor_freq_mult(self): + # GH #7811 + pidx = period_range(start="2014-01", freq="2M", periods=4) + expected = PeriodIndex(["2014-01", "2014-03", "2014-05", "2014-07"], freq="2M") + tm.assert_index_equal(pidx, expected) + + pidx = period_range(start="2014-01-02", end="2014-01-15", freq="3D") + expected = PeriodIndex( + ["2014-01-02", "2014-01-05", "2014-01-08", "2014-01-11", "2014-01-14"], + freq="3D", + ) + tm.assert_index_equal(pidx, expected) + + pidx = period_range(end="2014-01-01 17:00", freq="4H", periods=3) + expected = PeriodIndex( + ["2014-01-01 09:00", "2014-01-01 13:00", "2014-01-01 17:00"], freq="4H" + ) + tm.assert_index_equal(pidx, expected) + + msg = "Frequency must be positive, because it represents span: -1M" + with pytest.raises(ValueError, match=msg): + PeriodIndex(["2011-01"], freq="-1M") + + msg = "Frequency must be positive, because it represents span: 0M" + with pytest.raises(ValueError, match=msg): + PeriodIndex(["2011-01"], freq="0M") + + msg = "Frequency must be positive, because it represents span: 0M" + with pytest.raises(ValueError, match=msg): + period_range("2011-01", periods=3, freq="0M") + + @pytest.mark.parametrize("freq", ["A", "M", "D", "T", "S"]) + @pytest.mark.parametrize("mult", [1, 2, 3, 4, 5]) + def test_constructor_freq_mult_dti_compat(self, mult, freq): + freqstr = str(mult) + freq + pidx = period_range(start="2014-04-01", freq=freqstr, periods=10) + expected = date_range(start="2014-04-01", freq=freqstr, periods=10).to_period( + freqstr + ) + tm.assert_index_equal(pidx, expected) + + def test_constructor_freq_combined(self): + for freq in ["1D1H", "1H1D"]: + pidx = PeriodIndex(["2016-01-01", "2016-01-02"], freq=freq) + expected = PeriodIndex(["2016-01-01 00:00", "2016-01-02 00:00"], freq="25H") + for freq in ["1D1H", "1H1D"]: + pidx = period_range(start="2016-01-01", periods=2, freq=freq) + expected = PeriodIndex(["2016-01-01 00:00", "2016-01-02 01:00"], freq="25H") + tm.assert_index_equal(pidx, expected) + + def test_constructor(self): + pi = period_range(freq="A", start="1/1/2001", end="12/1/2009") + assert len(pi) == 9 + + pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009") + assert len(pi) == 4 * 9 + + pi = period_range(freq="M", start="1/1/2001", end="12/1/2009") + assert len(pi) == 12 * 9 + + pi = period_range(freq="D", start="1/1/2001", end="12/31/2009") + assert len(pi) == 365 * 9 + 2 + + pi = period_range(freq="B", start="1/1/2001", end="12/31/2009") + assert len(pi) == 261 * 9 + + pi = period_range(freq="H", start="1/1/2001", end="12/31/2001 23:00") + assert len(pi) == 365 * 24 + + pi = period_range(freq="Min", start="1/1/2001", end="1/1/2001 23:59") + assert len(pi) == 24 * 60 + + pi = period_range(freq="S", start="1/1/2001", end="1/1/2001 23:59:59") + assert len(pi) == 24 * 60 * 60 + + start = Period("02-Apr-2005", "B") + i1 = period_range(start=start, periods=20) + assert len(i1) == 20 + assert i1.freq == start.freq + assert i1[0] == start + + end_intv = Period("2006-12-31", "W") + i1 = period_range(end=end_intv, periods=10) + assert len(i1) == 10 + assert i1.freq == end_intv.freq + assert i1[-1] == end_intv + + end_intv = Period("2006-12-31", "1w") + i2 = period_range(end=end_intv, periods=10) + assert len(i1) == len(i2) + assert (i1 == i2).all() + assert i1.freq == i2.freq + + end_intv = Period("2006-12-31", ("w", 1)) + i2 = period_range(end=end_intv, periods=10) + assert len(i1) == len(i2) + assert (i1 == i2).all() + assert i1.freq == i2.freq + + end_intv = Period("2005-05-01", "B") + i1 = period_range(start=start, end=end_intv) + + # infer freq from first element + i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")]) + assert len(i2) == 2 + assert i2[0] == end_intv + + i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")])) + assert len(i2) == 2 + assert i2[0] == end_intv + + # Mixed freq should fail + vals = [end_intv, Period("2006-12-31", "w")] + msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex(vals) + vals = np.array(vals) + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex(vals) + + @pytest.mark.parametrize( + "freq", ["M", "Q", "A", "D", "B", "T", "S", "L", "U", "N", "H"] + ) + def test_recreate_from_data(self, freq): + org = period_range(start="2001/04/01", freq=freq, periods=1) + idx = PeriodIndex(org.values, freq=freq) + tm.assert_index_equal(idx, org) + + def test_map_with_string_constructor(self): + raw = [2005, 2007, 2009] + index = PeriodIndex(raw, freq="A") + + expected = Index([str(num) for num in raw]) + res = index.map(str) + + # should return an Index + assert isinstance(res, Index) + + # preserve element types + assert all(isinstance(resi, str) for resi in res) + + # lastly, values should compare equal + tm.assert_index_equal(res, expected) + + +class TestSeriesPeriod: + def setup_method(self, method): + self.series = Series(period_range("2000-01-01", periods=10, freq="D")) + + def test_constructor_cant_cast_period(self): + msg = "Cannot cast PeriodArray to dtype float64" + with pytest.raises(TypeError, match=msg): + Series(period_range("2000-01-01", periods=10, freq="D"), dtype=float) + + def test_constructor_cast_object(self): + s = Series(period_range("1/1/2000", periods=10), dtype=PeriodDtype("D")) + exp = Series(period_range("1/1/2000", periods=10)) + tm.assert_series_equal(s, exp) diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py new file mode 100644 index 00000000..5db373a9 --- /dev/null +++ b/pandas/tests/indexes/period/test_formats.py @@ -0,0 +1,211 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import PeriodIndex +import pandas._testing as tm + + +def test_to_native_types(): + index = PeriodIndex(["2017-01-01", "2017-01-02", "2017-01-03"], freq="D") + + # First, with no arguments. + expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype="=U10") + + result = index.to_native_types() + tm.assert_numpy_array_equal(result, expected) + + # No NaN values, so na_rep has no effect + result = index.to_native_types(na_rep="pandas") + tm.assert_numpy_array_equal(result, expected) + + # Make sure slicing works + expected = np.array(["2017-01-01", "2017-01-03"], dtype="=U10") + + result = index.to_native_types([0, 2]) + tm.assert_numpy_array_equal(result, expected) + + # Make sure date formatting works + expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype="=U10") + + result = index.to_native_types(date_format="%m-%Y-%d") + tm.assert_numpy_array_equal(result, expected) + + # NULL object handling should work + index = PeriodIndex(["2017-01-01", pd.NaT, "2017-01-03"], freq="D") + expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object) + + result = index.to_native_types() + tm.assert_numpy_array_equal(result, expected) + + expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object) + + result = index.to_native_types(na_rep="pandas") + tm.assert_numpy_array_equal(result, expected) + + +class TestPeriodIndexRendering: + def test_frame_repr(self): + df = pd.DataFrame({"A": [1, 2, 3]}, index=pd.date_range("2000", periods=3)) + result = repr(df) + expected = " A\n2000-01-01 1\n2000-01-02 2\n2000-01-03 3" + assert result == expected + + @pytest.mark.parametrize("method", ["__repr__", "__str__"]) + def test_representation(self, method): + # GH#7601 + idx1 = PeriodIndex([], freq="D") + idx2 = PeriodIndex(["2011-01-01"], freq="D") + idx3 = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") + idx4 = PeriodIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D") + idx5 = PeriodIndex(["2011", "2012", "2013"], freq="A") + idx6 = PeriodIndex(["2011-01-01 09:00", "2012-02-01 10:00", "NaT"], freq="H") + idx7 = pd.period_range("2013Q1", periods=1, freq="Q") + idx8 = pd.period_range("2013Q1", periods=2, freq="Q") + idx9 = pd.period_range("2013Q1", periods=3, freq="Q") + idx10 = PeriodIndex(["2011-01-01", "2011-02-01"], freq="3D") + + exp1 = "PeriodIndex([], dtype='period[D]', freq='D')" + + exp2 = "PeriodIndex(['2011-01-01'], dtype='period[D]', freq='D')" + + exp3 = "PeriodIndex(['2011-01-01', '2011-01-02'], dtype='period[D]', freq='D')" + + exp4 = ( + "PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " + "dtype='period[D]', freq='D')" + ) + + exp5 = ( + "PeriodIndex(['2011', '2012', '2013'], dtype='period[A-DEC]', " + "freq='A-DEC')" + ) + + exp6 = ( + "PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], " + "dtype='period[H]', freq='H')" + ) + + exp7 = "PeriodIndex(['2013Q1'], dtype='period[Q-DEC]', freq='Q-DEC')" + + exp8 = "PeriodIndex(['2013Q1', '2013Q2'], dtype='period[Q-DEC]', freq='Q-DEC')" + + exp9 = ( + "PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], " + "dtype='period[Q-DEC]', freq='Q-DEC')" + ) + + exp10 = ( + "PeriodIndex(['2011-01-01', '2011-02-01'], " + "dtype='period[3D]', freq='3D')" + ) + + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9, idx10], + [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9, exp10], + ): + result = getattr(idx, method)() + assert result == expected + + def test_representation_to_series(self): + # GH#10971 + idx1 = PeriodIndex([], freq="D") + idx2 = PeriodIndex(["2011-01-01"], freq="D") + idx3 = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") + idx4 = PeriodIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D") + idx5 = PeriodIndex(["2011", "2012", "2013"], freq="A") + idx6 = PeriodIndex(["2011-01-01 09:00", "2012-02-01 10:00", "NaT"], freq="H") + + idx7 = pd.period_range("2013Q1", periods=1, freq="Q") + idx8 = pd.period_range("2013Q1", periods=2, freq="Q") + idx9 = pd.period_range("2013Q1", periods=3, freq="Q") + + exp1 = """Series([], dtype: period[D])""" + + exp2 = """0 2011-01-01 +dtype: period[D]""" + + exp3 = """0 2011-01-01 +1 2011-01-02 +dtype: period[D]""" + + exp4 = """0 2011-01-01 +1 2011-01-02 +2 2011-01-03 +dtype: period[D]""" + + exp5 = """0 2011 +1 2012 +2 2013 +dtype: period[A-DEC]""" + + exp6 = """0 2011-01-01 09:00 +1 2012-02-01 10:00 +2 NaT +dtype: period[H]""" + + exp7 = """0 2013Q1 +dtype: period[Q-DEC]""" + + exp8 = """0 2013Q1 +1 2013Q2 +dtype: period[Q-DEC]""" + + exp9 = """0 2013Q1 +1 2013Q2 +2 2013Q3 +dtype: period[Q-DEC]""" + + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9], + [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9], + ): + result = repr(pd.Series(idx)) + assert result == expected + + def test_summary(self): + # GH#9116 + idx1 = PeriodIndex([], freq="D") + idx2 = PeriodIndex(["2011-01-01"], freq="D") + idx3 = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") + idx4 = PeriodIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D") + idx5 = PeriodIndex(["2011", "2012", "2013"], freq="A") + idx6 = PeriodIndex(["2011-01-01 09:00", "2012-02-01 10:00", "NaT"], freq="H") + + idx7 = pd.period_range("2013Q1", periods=1, freq="Q") + idx8 = pd.period_range("2013Q1", periods=2, freq="Q") + idx9 = pd.period_range("2013Q1", periods=3, freq="Q") + + exp1 = """PeriodIndex: 0 entries +Freq: D""" + + exp2 = """PeriodIndex: 1 entries, 2011-01-01 to 2011-01-01 +Freq: D""" + + exp3 = """PeriodIndex: 2 entries, 2011-01-01 to 2011-01-02 +Freq: D""" + + exp4 = """PeriodIndex: 3 entries, 2011-01-01 to 2011-01-03 +Freq: D""" + + exp5 = """PeriodIndex: 3 entries, 2011 to 2013 +Freq: A-DEC""" + + exp6 = """PeriodIndex: 3 entries, 2011-01-01 09:00 to NaT +Freq: H""" + + exp7 = """PeriodIndex: 1 entries, 2013Q1 to 2013Q1 +Freq: Q-DEC""" + + exp8 = """PeriodIndex: 2 entries, 2013Q1 to 2013Q2 +Freq: Q-DEC""" + + exp9 = """PeriodIndex: 3 entries, 2013Q1 to 2013Q3 +Freq: Q-DEC""" + + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9], + [exp1, exp2, exp3, exp4, exp5, exp6, exp7, exp8, exp9], + ): + result = idx._summary() + assert result == expected diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py new file mode 100644 index 00000000..8a5bb2bf --- /dev/null +++ b/pandas/tests/indexes/period/test_indexing.py @@ -0,0 +1,731 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +from pandas._libs.tslibs import period as libperiod + +import pandas as pd +from pandas import DatetimeIndex, Period, PeriodIndex, Series, notna, period_range +import pandas._testing as tm + + +class TestGetItem: + def test_ellipsis(self): + # GH#21282 + idx = period_range("2011-01-01", "2011-01-31", freq="D", name="idx") + + result = idx[...] + assert result.equals(idx) + assert result is not idx + + def test_getitem(self): + idx1 = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx") + + for idx in [idx1]: + result = idx[0] + assert result == pd.Period("2011-01-01", freq="D") + + result = idx[-1] + assert result == pd.Period("2011-01-31", freq="D") + + result = idx[0:5] + expected = pd.period_range("2011-01-01", "2011-01-05", freq="D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + assert result.freq == "D" + + result = idx[0:10:2] + expected = pd.PeriodIndex( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-07", "2011-01-09"], + freq="D", + name="idx", + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + assert result.freq == "D" + + result = idx[-20:-5:3] + expected = pd.PeriodIndex( + ["2011-01-12", "2011-01-15", "2011-01-18", "2011-01-21", "2011-01-24"], + freq="D", + name="idx", + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + assert result.freq == "D" + + result = idx[4::-1] + expected = PeriodIndex( + ["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"], + freq="D", + name="idx", + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + assert result.freq == "D" + + def test_getitem_index(self): + idx = period_range("2007-01", periods=10, freq="M", name="x") + + result = idx[[1, 3, 5]] + exp = pd.PeriodIndex(["2007-02", "2007-04", "2007-06"], freq="M", name="x") + tm.assert_index_equal(result, exp) + + result = idx[[True, True, False, False, False, True, True, False, False, False]] + exp = pd.PeriodIndex( + ["2007-01", "2007-02", "2007-06", "2007-07"], freq="M", name="x" + ) + tm.assert_index_equal(result, exp) + + def test_getitem_partial(self): + rng = period_range("2007-01", periods=50, freq="M") + ts = Series(np.random.randn(len(rng)), rng) + + with pytest.raises(KeyError, match=r"^'2006'$"): + ts["2006"] + + result = ts["2008"] + assert (result.index.year == 2008).all() + + result = ts["2008":"2009"] + assert len(result) == 24 + + result = ts["2008-1":"2009-12"] + assert len(result) == 24 + + result = ts["2008Q1":"2009Q4"] + assert len(result) == 24 + + result = ts[:"2009"] + assert len(result) == 36 + + result = ts["2009":] + assert len(result) == 50 - 24 + + exp = result + result = ts[24:] + tm.assert_series_equal(exp, result) + + ts = ts[10:].append(ts[10:]) + msg = "left slice bound for non-unique label: '2008'" + with pytest.raises(KeyError, match=msg): + ts[slice("2008", "2009")] + + def test_getitem_datetime(self): + rng = period_range(start="2012-01-01", periods=10, freq="W-MON") + ts = Series(range(len(rng)), index=rng) + + dt1 = datetime(2011, 10, 2) + dt4 = datetime(2012, 4, 20) + + rs = ts[dt1:dt4] + tm.assert_series_equal(rs, ts) + + def test_getitem_nat(self): + idx = pd.PeriodIndex(["2011-01", "NaT", "2011-02"], freq="M") + assert idx[0] == pd.Period("2011-01", freq="M") + assert idx[1] is pd.NaT + + s = pd.Series([0, 1, 2], index=idx) + assert s[pd.NaT] == 1 + + s = pd.Series(idx, index=idx) + assert s[pd.Period("2011-01", freq="M")] == pd.Period("2011-01", freq="M") + assert s[pd.NaT] is pd.NaT + + def test_getitem_list_periods(self): + # GH 7710 + rng = period_range(start="2012-01-01", periods=10, freq="D") + ts = Series(range(len(rng)), index=rng) + exp = ts.iloc[[1]] + tm.assert_series_equal(ts[[Period("2012-01-02", freq="D")]], exp) + + def test_getitem_seconds(self): + # GH#6716 + didx = pd.date_range(start="2013/01/01 09:00:00", freq="S", periods=4000) + pidx = period_range(start="2013/01/01 09:00:00", freq="S", periods=4000) + + for idx in [didx, pidx]: + # getitem against index should raise ValueError + values = [ + "2014", + "2013/02", + "2013/01/02", + "2013/02/01 9H", + "2013/02/01 09:00", + ] + for v in values: + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with pytest.raises(IndexError): + # idx[v] + continue + + s = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(s["2013/01/01 10:00"], s[3600:3660]) + tm.assert_series_equal(s["2013/01/01 9H"], s[:3600]) + for d in ["2013/01/01", "2013/01", "2013"]: + tm.assert_series_equal(s[d], s) + + def test_getitem_day(self): + # GH#6716 + # Confirm DatetimeIndex and PeriodIndex works identically + didx = pd.date_range(start="2013/01/01", freq="D", periods=400) + pidx = period_range(start="2013/01/01", freq="D", periods=400) + + for idx in [didx, pidx]: + # getitem against index should raise ValueError + values = [ + "2014", + "2013/02", + "2013/01/02", + "2013/02/01 9H", + "2013/02/01 09:00", + ] + for v in values: + + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + # with pytest.raises(IndexError): + # idx[v] + continue + + s = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(s["2013/01"], s[0:31]) + tm.assert_series_equal(s["2013/02"], s[31:59]) + tm.assert_series_equal(s["2014"], s[365:]) + + invalid = ["2013/02/01 9H", "2013/02/01 09:00"] + for v in invalid: + with pytest.raises(KeyError, match=v): + s[v] + + +class TestWhere: + @pytest.mark.parametrize("klass", [list, tuple, np.array, Series]) + def test_where(self, klass): + i = period_range("20130101", periods=5, freq="D") + cond = [True] * len(i) + expected = i + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + + cond = [False] + [True] * (len(i) - 1) + expected = PeriodIndex([pd.NaT] + i[1:].tolist(), freq="D") + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + + def test_where_other(self): + i = period_range("20130101", periods=5, freq="D") + for arr in [np.nan, pd.NaT]: + result = i.where(notna(i), other=np.nan) + expected = i + tm.assert_index_equal(result, expected) + + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq="D") + result = i.where(notna(i2), i2) + tm.assert_index_equal(result, i2) + + i2 = i.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq="D") + result = i.where(notna(i2), i2.values) + tm.assert_index_equal(result, i2) + + def test_where_invalid_dtypes(self): + pi = period_range("20130101", periods=5, freq="D") + + i2 = pi.copy() + i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + pi[2:].tolist(), freq="D") + + with pytest.raises(TypeError, match="Where requires matching dtype"): + pi.where(notna(i2), i2.asi8) + + with pytest.raises(TypeError, match="Where requires matching dtype"): + pi.where(notna(i2), i2.asi8.view("timedelta64[ns]")) + + with pytest.raises(TypeError, match="Where requires matching dtype"): + pi.where(notna(i2), i2.to_timestamp("S")) + + +class TestTake: + def test_take(self): + # GH#10295 + idx1 = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx") + + for idx in [idx1]: + result = idx.take([0]) + assert result == pd.Period("2011-01-01", freq="D") + + result = idx.take([5]) + assert result == pd.Period("2011-01-06", freq="D") + + result = idx.take([0, 1, 2]) + expected = pd.period_range("2011-01-01", "2011-01-03", freq="D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == "D" + assert result.freq == expected.freq + + result = idx.take([0, 2, 4]) + expected = pd.PeriodIndex( + ["2011-01-01", "2011-01-03", "2011-01-05"], freq="D", name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + assert result.freq == "D" + + result = idx.take([7, 4, 1]) + expected = pd.PeriodIndex( + ["2011-01-08", "2011-01-05", "2011-01-02"], freq="D", name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + assert result.freq == "D" + + result = idx.take([3, 2, 5]) + expected = PeriodIndex( + ["2011-01-04", "2011-01-03", "2011-01-06"], freq="D", name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + assert result.freq == "D" + + result = idx.take([-3, 2, 5]) + expected = PeriodIndex( + ["2011-01-29", "2011-01-03", "2011-01-06"], freq="D", name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + assert result.freq == "D" + + def test_take_misc(self): + index = period_range(start="1/1/10", end="12/31/12", freq="D", name="idx") + expected = PeriodIndex( + [ + datetime(2010, 1, 6), + datetime(2010, 1, 7), + datetime(2010, 1, 9), + datetime(2010, 1, 13), + ], + freq="D", + name="idx", + ) + + taken1 = index.take([5, 6, 8, 12]) + taken2 = index[[5, 6, 8, 12]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + assert isinstance(taken, PeriodIndex) + assert taken.freq == index.freq + assert taken.name == expected.name + + def test_take_fill_value(self): + # GH#12631 + idx = pd.PeriodIndex( + ["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", freq="D" + ) + result = idx.take(np.array([1, 0, -1])) + expected = pd.PeriodIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", freq="D" + ) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.PeriodIndex( + ["2011-02-01", "2011-01-01", "NaT"], name="xxx", freq="D" + ) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.PeriodIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", freq="D" + ) + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + msg = "index -5 is out of bounds for( axis 0 with)? size 3" + with pytest.raises(IndexError, match=msg): + idx.take(np.array([1, -5])) + + +class TestIndexing: + def test_get_loc_msg(self): + idx = period_range("2000-1-1", freq="A", periods=10) + bad_period = Period("2012", "A") + with pytest.raises(KeyError, match=r"^Period\('2012', 'A-DEC'\)$"): + idx.get_loc(bad_period) + + try: + idx.get_loc(bad_period) + except KeyError as inst: + assert inst.args[0] == bad_period + + def test_get_loc_nat(self): + didx = DatetimeIndex(["2011-01-01", "NaT", "2011-01-03"]) + pidx = PeriodIndex(["2011-01-01", "NaT", "2011-01-03"], freq="M") + + # check DatetimeIndex compat + for idx in [didx, pidx]: + assert idx.get_loc(pd.NaT) == 1 + assert idx.get_loc(None) == 1 + assert idx.get_loc(float("nan")) == 1 + assert idx.get_loc(np.nan) == 1 + + def test_get_loc(self): + # GH 17717 + p0 = pd.Period("2017-09-01") + p1 = pd.Period("2017-09-02") + p2 = pd.Period("2017-09-03") + + # get the location of p1/p2 from + # monotonic increasing PeriodIndex with non-duplicate + idx0 = pd.PeriodIndex([p0, p1, p2]) + expected_idx1_p1 = 1 + expected_idx1_p2 = 2 + + assert idx0.get_loc(p1) == expected_idx1_p1 + assert idx0.get_loc(str(p1)) == expected_idx1_p1 + assert idx0.get_loc(p2) == expected_idx1_p2 + assert idx0.get_loc(str(p2)) == expected_idx1_p2 + + msg = "Cannot interpret 'foo' as period" + with pytest.raises(KeyError, match=msg): + idx0.get_loc("foo") + with pytest.raises(KeyError, match=r"^1\.1$"): + idx0.get_loc(1.1) + + msg = ( + r"'PeriodIndex\(\['2017-09-01', '2017-09-02', '2017-09-03'\]," + r" dtype='period\[D\]', freq='D'\)' is an invalid key" + ) + with pytest.raises(TypeError, match=msg): + idx0.get_loc(idx0) + + # get the location of p1/p2 from + # monotonic increasing PeriodIndex with duplicate + idx1 = pd.PeriodIndex([p1, p1, p2]) + expected_idx1_p1 = slice(0, 2) + expected_idx1_p2 = 2 + + assert idx1.get_loc(p1) == expected_idx1_p1 + assert idx1.get_loc(str(p1)) == expected_idx1_p1 + assert idx1.get_loc(p2) == expected_idx1_p2 + assert idx1.get_loc(str(p2)) == expected_idx1_p2 + + msg = "Cannot interpret 'foo' as period" + with pytest.raises(KeyError, match=msg): + idx1.get_loc("foo") + + with pytest.raises(KeyError, match=r"^1\.1$"): + idx1.get_loc(1.1) + + msg = ( + r"'PeriodIndex\(\['2017-09-02', '2017-09-02', '2017-09-03'\]," + r" dtype='period\[D\]', freq='D'\)' is an invalid key" + ) + with pytest.raises(TypeError, match=msg): + idx1.get_loc(idx1) + + # get the location of p1/p2 from + # non-monotonic increasing/decreasing PeriodIndex with duplicate + idx2 = pd.PeriodIndex([p2, p1, p2]) + expected_idx2_p1 = 1 + expected_idx2_p2 = np.array([True, False, True]) + + assert idx2.get_loc(p1) == expected_idx2_p1 + assert idx2.get_loc(str(p1)) == expected_idx2_p1 + tm.assert_numpy_array_equal(idx2.get_loc(p2), expected_idx2_p2) + tm.assert_numpy_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2) + + def test_is_monotonic_increasing(self): + # GH 17717 + p0 = pd.Period("2017-09-01") + p1 = pd.Period("2017-09-02") + p2 = pd.Period("2017-09-03") + + idx_inc0 = pd.PeriodIndex([p0, p1, p2]) + idx_inc1 = pd.PeriodIndex([p0, p1, p1]) + idx_dec0 = pd.PeriodIndex([p2, p1, p0]) + idx_dec1 = pd.PeriodIndex([p2, p1, p1]) + idx = pd.PeriodIndex([p1, p2, p0]) + + assert idx_inc0.is_monotonic_increasing is True + assert idx_inc1.is_monotonic_increasing is True + assert idx_dec0.is_monotonic_increasing is False + assert idx_dec1.is_monotonic_increasing is False + assert idx.is_monotonic_increasing is False + + def test_is_monotonic_decreasing(self): + # GH 17717 + p0 = pd.Period("2017-09-01") + p1 = pd.Period("2017-09-02") + p2 = pd.Period("2017-09-03") + + idx_inc0 = pd.PeriodIndex([p0, p1, p2]) + idx_inc1 = pd.PeriodIndex([p0, p1, p1]) + idx_dec0 = pd.PeriodIndex([p2, p1, p0]) + idx_dec1 = pd.PeriodIndex([p2, p1, p1]) + idx = pd.PeriodIndex([p1, p2, p0]) + + assert idx_inc0.is_monotonic_decreasing is False + assert idx_inc1.is_monotonic_decreasing is False + assert idx_dec0.is_monotonic_decreasing is True + assert idx_dec1.is_monotonic_decreasing is True + assert idx.is_monotonic_decreasing is False + + def test_contains(self): + # GH 17717 + p0 = pd.Period("2017-09-01") + p1 = pd.Period("2017-09-02") + p2 = pd.Period("2017-09-03") + p3 = pd.Period("2017-09-04") + + ps0 = [p0, p1, p2] + idx0 = pd.PeriodIndex(ps0) + + for p in ps0: + assert p in idx0 + assert str(p) in idx0 + + assert "2017-09-01 00:00:01" in idx0 + + assert "2017-09" in idx0 + + assert p3 not in idx0 + + def test_get_value(self): + # GH 17717 + p0 = pd.Period("2017-09-01") + p1 = pd.Period("2017-09-02") + p2 = pd.Period("2017-09-03") + + idx0 = pd.PeriodIndex([p0, p1, p2]) + input0 = np.array([1, 2, 3]) + expected0 = 2 + + result0 = idx0.get_value(input0, p1) + assert result0 == expected0 + + idx1 = pd.PeriodIndex([p1, p1, p2]) + input1 = np.array([1, 2, 3]) + expected1 = np.array([1, 2]) + + result1 = idx1.get_value(input1, p1) + tm.assert_numpy_array_equal(result1, expected1) + + idx2 = pd.PeriodIndex([p1, p2, p1]) + input2 = np.array([1, 2, 3]) + expected2 = np.array([1, 3]) + + result2 = idx2.get_value(input2, p1) + tm.assert_numpy_array_equal(result2, expected2) + + def test_get_indexer(self): + # GH 17717 + p1 = pd.Period("2017-09-01") + p2 = pd.Period("2017-09-04") + p3 = pd.Period("2017-09-07") + + tp0 = pd.Period("2017-08-31") + tp1 = pd.Period("2017-09-02") + tp2 = pd.Period("2017-09-05") + tp3 = pd.Period("2017-09-09") + + idx = pd.PeriodIndex([p1, p2, p3]) + + tm.assert_numpy_array_equal( + idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) + ) + + target = pd.PeriodIndex([tp0, tp1, tp2, tp3]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2, -1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 0, 1, 2], dtype=np.intp) + ) + + res = idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 day")) + tm.assert_numpy_array_equal(res, np.array([0, 0, 1, -1], dtype=np.intp)) + + def test_get_indexer_mismatched_dtype(self): + # Check that we return all -1s and do not raise or cast incorrectly + + dti = pd.date_range("2016-01-01", periods=3) + pi = dti.to_period("D") + pi2 = dti.to_period("W") + + expected = np.array([-1, -1, -1], dtype=np.intp) + + result = pi.get_indexer(dti) + tm.assert_numpy_array_equal(result, expected) + + # This should work in both directions + result = dti.get_indexer(pi) + tm.assert_numpy_array_equal(result, expected) + + result = pi.get_indexer(pi2) + tm.assert_numpy_array_equal(result, expected) + + # We expect the same from get_indexer_non_unique + result = pi.get_indexer_non_unique(dti)[0] + tm.assert_numpy_array_equal(result, expected) + + result = dti.get_indexer_non_unique(pi)[0] + tm.assert_numpy_array_equal(result, expected) + + result = pi.get_indexer_non_unique(pi2)[0] + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_non_unique(self): + # GH 17717 + p1 = pd.Period("2017-09-02") + p2 = pd.Period("2017-09-03") + p3 = pd.Period("2017-09-04") + p4 = pd.Period("2017-09-05") + + idx1 = pd.PeriodIndex([p1, p2, p1]) + idx2 = pd.PeriodIndex([p2, p1, p3, p4]) + + result = idx1.get_indexer_non_unique(idx2) + expected_indexer = np.array([1, 0, 2, -1, -1], dtype=np.intp) + expected_missing = np.array([2, 3], dtype=np.int64) + + tm.assert_numpy_array_equal(result[0], expected_indexer) + tm.assert_numpy_array_equal(result[1], expected_missing) + + # TODO: This method came from test_period; de-dup with version above + def test_get_loc2(self): + idx = pd.period_range("2000-01-01", periods=3) + + for method in [None, "pad", "backfill", "nearest"]: + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].asfreq("H", how="start"), method) == 1 + assert idx.get_loc(idx[1].to_timestamp(), method) == 1 + assert idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + idx = pd.period_range("2000-01-01", periods=5)[::2] + assert idx.get_loc("2000-01-02T12", method="nearest", tolerance="1 day") == 1 + assert ( + idx.get_loc("2000-01-02T12", method="nearest", tolerance=pd.Timedelta("1D")) + == 1 + ) + assert ( + idx.get_loc( + "2000-01-02T12", method="nearest", tolerance=np.timedelta64(1, "D") + ) + == 1 + ) + assert ( + idx.get_loc("2000-01-02T12", method="nearest", tolerance=timedelta(1)) == 1 + ) + + msg = "unit abbreviation w/o a number" + with pytest.raises(ValueError, match=msg): + idx.get_loc("2000-01-10", method="nearest", tolerance="foo") + + msg = "Input has different freq=None from PeriodArray\\(freq=D\\)" + with pytest.raises(ValueError, match=msg): + idx.get_loc("2000-01-10", method="nearest", tolerance="1 hour") + with pytest.raises(KeyError, match=r"^Period\('2000-01-10', 'D'\)$"): + idx.get_loc("2000-01-10", method="nearest", tolerance="1 day") + with pytest.raises( + ValueError, match="list-like tolerance size must match target index size" + ): + idx.get_loc( + "2000-01-10", + method="nearest", + tolerance=[ + pd.Timedelta("1 day").to_timedelta64(), + pd.Timedelta("1 day").to_timedelta64(), + ], + ) + + # TODO: This method came from test_period; de-dup with version above + def test_get_indexer2(self): + idx = pd.period_range("2000-01-01", periods=3).asfreq("H", how="start") + tm.assert_numpy_array_equal( + idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) + ) + + target = pd.PeriodIndex( + ["1999-12-31T23", "2000-01-01T12", "2000-01-02T01"], freq="H" + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest", tolerance="1 hour"), + np.array([0, -1, 1], dtype=np.intp), + ) + + msg = "Input has different freq=None from PeriodArray\\(freq=H\\)" + with pytest.raises(ValueError, match=msg): + idx.get_indexer(target, "nearest", tolerance="1 minute") + + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest", tolerance="1 day"), + np.array([0, 1, 1], dtype=np.intp), + ) + tol_raw = [ + pd.Timedelta("1 hour"), + pd.Timedelta("1 hour"), + np.timedelta64(1, "D"), + ] + tm.assert_numpy_array_equal( + idx.get_indexer( + target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw] + ), + np.array([0, -1, 1], dtype=np.intp), + ) + tol_bad = [ + pd.Timedelta("2 hour").to_timedelta64(), + pd.Timedelta("1 hour").to_timedelta64(), + np.timedelta64(1, "M"), + ] + with pytest.raises( + libperiod.IncompatibleFrequency, match="Input has different freq=None from" + ): + idx.get_indexer(target, "nearest", tolerance=tol_bad) + + def test_indexing(self): + # GH 4390, iat incorrectly indexing + index = period_range("1/1/2001", periods=10) + s = Series(np.random.randn(10), index=index) + expected = s[index[0]] + result = s.iat[0] + assert expected == result + + def test_period_index_indexer(self): + # GH4125 + idx = pd.period_range("2002-01", "2003-12", freq="M") + df = pd.DataFrame(np.random.randn(24, 10), index=idx) + tm.assert_frame_equal(df, df.loc[idx]) + tm.assert_frame_equal(df, df.loc[list(idx)]) + tm.assert_frame_equal(df, df.loc[list(idx)]) + tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]]) + tm.assert_frame_equal(df, df.loc[list(idx)]) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py new file mode 100644 index 00000000..427d9ab7 --- /dev/null +++ b/pandas/tests/indexes/period/test_ops.py @@ -0,0 +1,347 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DatetimeIndex, Index, NaT, PeriodIndex, Series +import pandas._testing as tm +from pandas.core.arrays import PeriodArray +from pandas.tests.base.test_ops import Ops + + +class TestPeriodIndexOps(Ops): + def setup_method(self, method): + super().setup_method(method) + mask = lambda x: (isinstance(x, DatetimeIndex) or isinstance(x, PeriodIndex)) + self.is_valid_objs = [o for o in self.objs if mask(o)] + self.not_valid_objs = [o for o in self.objs if not mask(o)] + + def test_ops_properties(self): + f = lambda x: isinstance(x, PeriodIndex) + self.check_ops_properties(PeriodArray._field_ops, f) + self.check_ops_properties(PeriodArray._object_ops, f) + self.check_ops_properties(PeriodArray._bool_ops, f) + + def test_resolution(self): + for freq, expected in zip( + ["A", "Q", "M", "D", "H", "T", "S", "L", "U"], + [ + "day", + "day", + "day", + "day", + "hour", + "minute", + "second", + "millisecond", + "microsecond", + ], + ): + + idx = pd.period_range(start="2013-04-01", periods=30, freq=freq) + assert idx.resolution == expected + + def test_value_counts_unique(self): + # GH 7735 + idx = pd.period_range("2011-01-01 09:00", freq="H", periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)), freq="H") + + exp_idx = PeriodIndex( + [ + "2011-01-01 18:00", + "2011-01-01 17:00", + "2011-01-01 16:00", + "2011-01-01 15:00", + "2011-01-01 14:00", + "2011-01-01 13:00", + "2011-01-01 12:00", + "2011-01-01 11:00", + "2011-01-01 10:00", + "2011-01-01 09:00", + ], + freq="H", + ) + expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + expected = pd.period_range("2011-01-01 09:00", freq="H", periods=10) + tm.assert_index_equal(idx.unique(), expected) + + idx = PeriodIndex( + [ + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 09:00", + "2013-01-01 08:00", + "2013-01-01 08:00", + NaT, + ], + freq="H", + ) + + exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00"], freq="H") + expected = Series([3, 2], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00", NaT], freq="H") + expected = Series([3, 2, 1], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) + + def test_drop_duplicates_metadata(self): + # GH 10115 + idx = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx") + result = idx.drop_duplicates() + tm.assert_index_equal(idx, result) + assert idx.freq == result.freq + + idx_dup = idx.append(idx) # freq will not be reset + result = idx_dup.drop_duplicates() + tm.assert_index_equal(idx, result) + assert idx.freq == result.freq + + def test_drop_duplicates(self): + # to check Index/Series compat + base = pd.period_range("2011-01-01", "2011-01-31", freq="D", name="idx") + idx = base.append(base[:5]) + + res = idx.drop_duplicates() + tm.assert_index_equal(res, base) + res = Series(idx).drop_duplicates() + tm.assert_series_equal(res, Series(base)) + + res = idx.drop_duplicates(keep="last") + exp = base[5:].append(base[:5]) + tm.assert_index_equal(res, exp) + res = Series(idx).drop_duplicates(keep="last") + tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) + + res = idx.drop_duplicates(keep=False) + tm.assert_index_equal(res, base[5:]) + res = Series(idx).drop_duplicates(keep=False) + tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) + + def test_order_compat(self): + def _check_freq(index, expected_index): + if isinstance(index, PeriodIndex): + assert index.freq == expected_index.freq + + pidx = PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A") + # for compatibility check + iidx = Index([2011, 2012, 2013], name="idx") + for idx in [pidx, iidx]: + ordered = idx.sort_values() + tm.assert_index_equal(ordered, idx) + _check_freq(ordered, idx) + + ordered = idx.sort_values(ascending=False) + tm.assert_index_equal(ordered, idx[::-1]) + _check_freq(ordered, idx[::-1]) + + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, idx) + tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) + _check_freq(ordered, idx) + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + tm.assert_index_equal(ordered, idx[::-1]) + tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) + _check_freq(ordered, idx[::-1]) + + pidx = PeriodIndex( + ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A" + ) + pexpected = PeriodIndex( + ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A" + ) + # for compatibility check + iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") + iexpected = Index([2011, 2011, 2012, 2013, 2015], name="idx") + for idx, expected in [(pidx, pexpected), (iidx, iexpected)]: + ordered = idx.sort_values() + tm.assert_index_equal(ordered, expected) + _check_freq(ordered, idx) + + ordered = idx.sort_values(ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + _check_freq(ordered, idx) + + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2]) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + _check_freq(ordered, idx) + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 4, 0]) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + _check_freq(ordered, idx) + + pidx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D") + + result = pidx.sort_values() + expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D") + tm.assert_index_equal(result, expected) + assert result.freq == "D" + + result = pidx.sort_values(ascending=False) + expected = PeriodIndex(["2013", "2011", "2011", "NaT"], name="pidx", freq="D") + tm.assert_index_equal(result, expected) + assert result.freq == "D" + + def test_order(self): + for freq in ["D", "2D", "4D"]: + idx = PeriodIndex( + ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx" + ) + + ordered = idx.sort_values() + tm.assert_index_equal(ordered, idx) + assert ordered.freq == idx.freq + + ordered = idx.sort_values(ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + assert ordered.freq == expected.freq + assert ordered.freq == freq + + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, idx) + tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) + assert ordered.freq == idx.freq + assert ordered.freq == freq + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) + assert ordered.freq == expected.freq + assert ordered.freq == freq + + idx1 = PeriodIndex( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], + freq="D", + name="idx1", + ) + exp1 = PeriodIndex( + ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], + freq="D", + name="idx1", + ) + + idx2 = PeriodIndex( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], + freq="D", + name="idx2", + ) + exp2 = PeriodIndex( + ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], + freq="D", + name="idx2", + ) + + idx3 = PeriodIndex( + [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], freq="D", name="idx3" + ) + exp3 = PeriodIndex( + [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], freq="D", name="idx3" + ) + + for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: + ordered = idx.sort_values() + tm.assert_index_equal(ordered, expected) + assert ordered.freq == "D" + + ordered = idx.sort_values(ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + assert ordered.freq == "D" + + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2]) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + assert ordered.freq == "D" + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 4, 0]) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + assert ordered.freq == "D" + + def test_shift(self): + # This is tested in test_arithmetic + pass + + def test_nat(self): + assert pd.PeriodIndex._na_value is NaT + assert pd.PeriodIndex([], freq="M")._na_value is NaT + + idx = pd.PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + assert idx.hasnans is False + tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) + + idx = pd.PeriodIndex(["2011-01-01", "NaT"], freq="D") + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + assert idx.hasnans is True + tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) + + @pytest.mark.parametrize("freq", ["D", "M"]) + def test_equals(self, freq): + # GH#13107 + idx = pd.PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = pd.PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="H") + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # same internal, different tz + idx3 = pd.PeriodIndex._simple_new( + idx._values._simple_new(idx._values.asi8, freq="H") + ) + tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) + assert not idx.equals(idx3) + assert not idx.equals(idx3.copy()) + assert not idx.equals(idx3.astype(object)) + assert not idx.astype(object).equals(idx3) + assert not idx.equals(list(idx3)) + assert not idx.equals(pd.Series(idx3)) + + def test_freq_setter_deprecated(self): + # GH 20678 + idx = pd.period_range("2018Q1", periods=4, freq="Q") + + # no warning for getter + with tm.assert_produces_warning(None): + idx.freq + + # warning for setter + with pytest.raises(AttributeError, match="can't set attribute"): + idx.freq = pd.offsets.Day() diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py new file mode 100644 index 00000000..9ca2dd16 --- /dev/null +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -0,0 +1,135 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Period, Series, period_range +import pandas._testing as tm + + +class TestPeriodIndex: + def setup_method(self, method): + pass + + def test_slice_with_negative_step(self): + ts = Series(np.arange(20), period_range("2014-01", periods=20, freq="M")) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc]) + tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + + assert_slices_equivalent(SLC[Period("2014-10") :: -1], SLC[9::-1]) + assert_slices_equivalent(SLC["2014-10"::-1], SLC[9::-1]) + + assert_slices_equivalent(SLC[: Period("2014-10") : -1], SLC[:8:-1]) + assert_slices_equivalent(SLC[:"2014-10":-1], SLC[:8:-1]) + + assert_slices_equivalent(SLC["2015-02":"2014-10":-1], SLC[13:8:-1]) + assert_slices_equivalent( + SLC[Period("2015-02") : Period("2014-10") : -1], SLC[13:8:-1] + ) + assert_slices_equivalent(SLC["2015-02" : Period("2014-10") : -1], SLC[13:8:-1]) + assert_slices_equivalent(SLC[Period("2015-02") : "2014-10" : -1], SLC[13:8:-1]) + + assert_slices_equivalent(SLC["2014-10":"2015-02":-1], SLC[:0]) + + def test_slice_with_zero_step_raises(self): + ts = Series(np.arange(20), period_range("2014-01", periods=20, freq="M")) + with pytest.raises(ValueError, match="slice step cannot be zero"): + ts[::0] + with pytest.raises(ValueError, match="slice step cannot be zero"): + ts.loc[::0] + with pytest.raises(ValueError, match="slice step cannot be zero"): + ts.loc[::0] + + def test_slice_keep_name(self): + idx = period_range("20010101", periods=10, freq="D", name="bob") + assert idx.name == idx[1:].name + + def test_pindex_slice_index(self): + pi = period_range(start="1/1/10", end="12/31/12", freq="M") + s = Series(np.random.rand(len(pi)), index=pi) + res = s["2010"] + exp = s[0:12] + tm.assert_series_equal(res, exp) + res = s["2011"] + exp = s[12:24] + tm.assert_series_equal(res, exp) + + def test_range_slice_day(self): + # GH#6716 + didx = pd.date_range(start="2013/01/01", freq="D", periods=400) + pidx = period_range(start="2013/01/01", freq="D", periods=400) + + for idx in [didx, pidx]: + # slices against index should raise IndexError + values = [ + "2014", + "2013/02", + "2013/01/02", + "2013/02/01 9H", + "2013/02/01 09:00", + ] + for v in values: + with pytest.raises(TypeError): + idx[v:] + + s = Series(np.random.rand(len(idx)), index=idx) + + tm.assert_series_equal(s["2013/01/02":], s[1:]) + tm.assert_series_equal(s["2013/01/02":"2013/01/05"], s[1:5]) + tm.assert_series_equal(s["2013/02":], s[31:]) + tm.assert_series_equal(s["2014":], s[365:]) + + invalid = ["2013/02/01 9H", "2013/02/01 09:00"] + for v in invalid: + with pytest.raises(TypeError): + idx[v:] + + def test_range_slice_seconds(self): + # GH#6716 + didx = pd.date_range(start="2013/01/01 09:00:00", freq="S", periods=4000) + pidx = period_range(start="2013/01/01 09:00:00", freq="S", periods=4000) + + for idx in [didx, pidx]: + # slices against index should raise IndexError + values = [ + "2014", + "2013/02", + "2013/01/02", + "2013/02/01 9H", + "2013/02/01 09:00", + ] + for v in values: + with pytest.raises(TypeError): + idx[v:] + + s = Series(np.random.rand(len(idx)), index=idx) + + tm.assert_series_equal(s["2013/01/01 09:05":"2013/01/01 09:10"], s[300:660]) + tm.assert_series_equal( + s["2013/01/01 10:00":"2013/01/01 10:05"], s[3600:3960] + ) + tm.assert_series_equal(s["2013/01/01 10H":], s[3600:]) + tm.assert_series_equal(s[:"2013/01/01 09:30"], s[:1860]) + for d in ["2013/01/01", "2013/01", "2013"]: + tm.assert_series_equal(s[d:], s) + + def test_range_slice_outofbounds(self): + # GH#5407 + didx = pd.date_range(start="2013/10/01", freq="D", periods=10) + pidx = period_range(start="2013/10/01", freq="D", periods=10) + + for idx in [didx, pidx]: + df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx) + empty = DataFrame(index=type(idx)([], freq="D"), columns=["units"]) + empty["units"] = empty["units"].astype("int64") + + tm.assert_frame_equal(df["2013/09/01":"2013/09/30"], empty) + tm.assert_frame_equal(df["2013/09/30":"2013/10/02"], df.iloc[:2]) + tm.assert_frame_equal(df["2013/10/01":"2013/10/02"], df.iloc[:2]) + tm.assert_frame_equal(df["2013/10/02":"2013/09/30"], empty) + tm.assert_frame_equal(df["2013/10/15":"2013/10/17"], empty) + tm.assert_frame_equal(df["2013-06":"2013-09"], empty) + tm.assert_frame_equal(df["2013-11":"2013-12"], empty) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py new file mode 100644 index 00000000..16fa0b0c --- /dev/null +++ b/pandas/tests/indexes/period/test_period.py @@ -0,0 +1,664 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs.period import IncompatibleFrequency +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + NaT, + Period, + PeriodIndex, + Series, + date_range, + offsets, + period_range, +) +import pandas._testing as tm + +from ..datetimelike import DatetimeLike + + +class TestPeriodIndex(DatetimeLike): + _holder = PeriodIndex + + @pytest.fixture( + params=[ + tm.makePeriodIndex(10), + period_range("20130101", periods=10, freq="D")[::-1], + ], + ids=["index_inc", "index_dec"], + ) + def indices(self, request): + return request.param + + def create_index(self): + return period_range("20130101", periods=5, freq="D") + + def test_pickle_compat_construction(self): + pass + + @pytest.mark.parametrize("freq", ["D", "M", "A"]) + def test_pickle_round_trip(self, freq): + idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq=freq) + result = tm.round_trip_pickle(idx) + tm.assert_index_equal(result, idx) + + def test_where(self): + # This is handled in test_indexing + pass + + @pytest.mark.parametrize("use_numpy", [True, False]) + @pytest.mark.parametrize( + "index", + [ + pd.period_range("2000-01-01", periods=3, freq="D"), + pd.period_range("2001-01-01", periods=3, freq="2D"), + pd.PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"), + ], + ) + def test_repeat_freqstr(self, index, use_numpy): + # GH10183 + expected = PeriodIndex([p for p in index for _ in range(3)]) + result = np.repeat(index, 3) if use_numpy else index.repeat(3) + tm.assert_index_equal(result, expected) + assert result.freqstr == index.freqstr + + def test_fillna_period(self): + # GH 11343 + idx = pd.PeriodIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], freq="H") + + exp = pd.PeriodIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H" + ) + tm.assert_index_equal(idx.fillna(pd.Period("2011-01-01 10:00", freq="H")), exp) + + exp = pd.Index( + [ + pd.Period("2011-01-01 09:00", freq="H"), + "x", + pd.Period("2011-01-01 11:00", freq="H"), + ], + dtype=object, + ) + tm.assert_index_equal(idx.fillna("x"), exp) + + exp = pd.Index( + [ + pd.Period("2011-01-01 09:00", freq="H"), + pd.Period("2011-01-01", freq="D"), + pd.Period("2011-01-01 11:00", freq="H"), + ], + dtype=object, + ) + tm.assert_index_equal(idx.fillna(pd.Period("2011-01-01", freq="D")), exp) + + def test_no_millisecond_field(self): + msg = "type object 'DatetimeIndex' has no attribute 'millisecond'" + with pytest.raises(AttributeError, match=msg): + DatetimeIndex.millisecond + + msg = "'DatetimeIndex' object has no attribute 'millisecond'" + with pytest.raises(AttributeError, match=msg): + DatetimeIndex([]).millisecond + + def test_hash_error(self): + index = period_range("20010101", periods=10) + msg = f"unhashable type: '{type(index).__name__}'" + with pytest.raises(TypeError, match=msg): + hash(index) + + def test_make_time_series(self): + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + series = Series(1, index=index) + assert isinstance(series, Series) + + def test_shallow_copy_empty(self): + + # GH13067 + idx = PeriodIndex([], freq="M") + result = idx._shallow_copy() + expected = idx + + tm.assert_index_equal(result, expected) + + def test_shallow_copy_i8(self): + # GH-24391 + pi = period_range("2018-01-01", periods=3, freq="2D") + result = pi._shallow_copy(pi.asi8, freq=pi.freq) + tm.assert_index_equal(result, pi) + + def test_shallow_copy_changing_freq_raises(self): + pi = period_range("2018-01-01", periods=3, freq="2D") + msg = "specified freq and dtype are different" + with pytest.raises(IncompatibleFrequency, match=msg): + pi._shallow_copy(pi, freq="H") + + def test_view_asi8(self): + idx = pd.PeriodIndex([], freq="M") + + exp = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(idx.view("i8"), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M") + + exp = np.array([492, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx.view("i8"), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + exp = np.array([14975, -9223372036854775808], dtype=np.int64) + idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D") + tm.assert_numpy_array_equal(idx.view("i8"), exp) + tm.assert_numpy_array_equal(idx.asi8, exp) + + def test_values(self): + idx = pd.PeriodIndex([], freq="M") + + exp = np.array([], dtype=np.object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.to_numpy(), exp) + + exp = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(idx._ndarray_values, exp) + + idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M") + + exp = np.array([pd.Period("2011-01", freq="M"), pd.NaT], dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.to_numpy(), exp) + exp = np.array([492, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx._ndarray_values, exp) + + idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D") + + exp = np.array([pd.Period("2011-01-01", freq="D"), pd.NaT], dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.to_numpy(), exp) + exp = np.array([14975, -9223372036854775808], dtype=np.int64) + tm.assert_numpy_array_equal(idx._ndarray_values, exp) + + def test_period_index_length(self): + pi = period_range(freq="A", start="1/1/2001", end="12/1/2009") + assert len(pi) == 9 + + pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009") + assert len(pi) == 4 * 9 + + pi = period_range(freq="M", start="1/1/2001", end="12/1/2009") + assert len(pi) == 12 * 9 + + start = Period("02-Apr-2005", "B") + i1 = period_range(start=start, periods=20) + assert len(i1) == 20 + assert i1.freq == start.freq + assert i1[0] == start + + end_intv = Period("2006-12-31", "W") + i1 = period_range(end=end_intv, periods=10) + assert len(i1) == 10 + assert i1.freq == end_intv.freq + assert i1[-1] == end_intv + + end_intv = Period("2006-12-31", "1w") + i2 = period_range(end=end_intv, periods=10) + assert len(i1) == len(i2) + assert (i1 == i2).all() + assert i1.freq == i2.freq + + end_intv = Period("2006-12-31", ("w", 1)) + i2 = period_range(end=end_intv, periods=10) + assert len(i1) == len(i2) + assert (i1 == i2).all() + assert i1.freq == i2.freq + + msg = "start and end must have same freq" + with pytest.raises(ValueError, match=msg): + period_range(start=start, end=end_intv) + + end_intv = Period("2005-05-01", "B") + i1 = period_range(start=start, end=end_intv) + + msg = ( + "Of the three parameters: start, end, and periods, exactly two " + "must be specified" + ) + with pytest.raises(ValueError, match=msg): + period_range(start=start) + + # infer freq from first element + i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")]) + assert len(i2) == 2 + assert i2[0] == end_intv + + i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")])) + assert len(i2) == 2 + assert i2[0] == end_intv + + # Mixed freq should fail + vals = [end_intv, Period("2006-12-31", "w")] + msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex(vals) + vals = np.array(vals) + with pytest.raises(ValueError, match=msg): + PeriodIndex(vals) + + def test_fields(self): + # year, month, day, hour, minute + # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter + # qyear + pi = period_range(freq="A", start="1/1/2001", end="12/1/2005") + self._check_all_fields(pi) + + pi = period_range(freq="Q", start="1/1/2001", end="12/1/2002") + self._check_all_fields(pi) + + pi = period_range(freq="M", start="1/1/2001", end="1/1/2002") + self._check_all_fields(pi) + + pi = period_range(freq="D", start="12/1/2001", end="6/1/2001") + self._check_all_fields(pi) + + pi = period_range(freq="B", start="12/1/2001", end="6/1/2001") + self._check_all_fields(pi) + + pi = period_range(freq="H", start="12/31/2001", end="1/1/2002 23:00") + self._check_all_fields(pi) + + pi = period_range(freq="Min", start="12/31/2001", end="1/1/2002 00:20") + self._check_all_fields(pi) + + pi = period_range( + freq="S", start="12/31/2001 00:00:00", end="12/31/2001 00:05:00" + ) + self._check_all_fields(pi) + + end_intv = Period("2006-12-31", "W") + i1 = period_range(end=end_intv, periods=10) + self._check_all_fields(i1) + + def _check_all_fields(self, periodindex): + fields = [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "weekofyear", + "week", + "dayofweek", + "dayofyear", + "quarter", + "qyear", + "days_in_month", + ] + + periods = list(periodindex) + s = pd.Series(periodindex) + + for field in fields: + field_idx = getattr(periodindex, field) + assert len(periodindex) == len(field_idx) + for x, val in zip(periods, field_idx): + assert getattr(x, field) == val + + if len(s) == 0: + continue + + field_s = getattr(s.dt, field) + assert len(periodindex) == len(field_s) + for x, val in zip(periods, field_s): + assert getattr(x, field) == val + + def test_period_set_index_reindex(self): + # GH 6631 + df = DataFrame(np.random.random(6)) + idx1 = period_range("2011/01/01", periods=6, freq="M") + idx2 = period_range("2013", periods=6, freq="A") + + df = df.set_index(idx1) + tm.assert_index_equal(df.index, idx1) + df = df.set_index(idx2) + tm.assert_index_equal(df.index, idx2) + + @pytest.mark.parametrize( + "p_values, o_values, values, expected_values", + [ + ( + [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], + [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC"), "All"], + [1.0, 1.0], + [1.0, 1.0, np.nan], + ), + ( + [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], + [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], + [1.0, 1.0], + [1.0, 1.0], + ), + ], + ) + def test_period_reindex_with_object( + self, p_values, o_values, values, expected_values + ): + # GH 28337 + period_index = PeriodIndex(p_values) + object_index = Index(o_values) + + s = pd.Series(values, index=period_index) + result = s.reindex(object_index) + expected = pd.Series(expected_values, index=object_index) + tm.assert_series_equal(result, expected) + + def test_factorize(self): + idx1 = PeriodIndex( + ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M" + ) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + arr, idx = idx1.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + idx2 = pd.PeriodIndex( + ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M" + ) + + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) + arr, idx = idx2.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) + exp_idx = PeriodIndex(["2014-03", "2014-02", "2014-01"], freq="M") + arr, idx = idx2.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + def test_is_(self): + create_index = lambda: period_range(freq="A", start="1/1/2001", end="12/1/2009") + index = create_index() + assert index.is_(index) + assert not index.is_(create_index()) + assert index.is_(index.view()) + assert index.is_(index.view().view().view().view().view()) + assert index.view().is_(index) + ind2 = index.view() + index.name = "Apple" + assert ind2.is_(index) + assert not index.is_(index[:]) + assert not index.is_(index.asfreq("M")) + assert not index.is_(index.asfreq("A")) + + assert not index.is_(index - 2) + assert not index.is_(index - 0) + + def test_contains(self): + rng = period_range("2007-01", freq="M", periods=10) + + assert Period("2007-01", freq="M") in rng + assert not Period("2007-01", freq="D") in rng + assert not Period("2007-01", freq="2M") in rng + + def test_contains_nat(self): + # see gh-13582 + idx = period_range("2007-01", freq="M", periods=10) + assert pd.NaT not in idx + assert None not in idx + assert float("nan") not in idx + assert np.nan not in idx + + idx = pd.PeriodIndex(["2011-01", "NaT", "2011-02"], freq="M") + assert pd.NaT in idx + assert None in idx + assert float("nan") in idx + assert np.nan in idx + + def test_periods_number_check(self): + msg = ( + "Of the three parameters: start, end, and periods, exactly two " + "must be specified" + ) + with pytest.raises(ValueError, match=msg): + period_range("2011-1-1", "2012-1-1", "B") + + def test_start_time(self): + # GH 17157 + index = period_range(freq="M", start="2016-01-01", end="2016-05-31") + expected_index = date_range("2016-01-01", end="2016-05-31", freq="MS") + tm.assert_index_equal(index.start_time, expected_index) + + def test_end_time(self): + # GH 17157 + index = period_range(freq="M", start="2016-01-01", end="2016-05-31") + expected_index = date_range("2016-01-01", end="2016-05-31", freq="M") + expected_index = expected_index.shift(1, freq="D").shift(-1, freq="ns") + tm.assert_index_equal(index.end_time, expected_index) + + def test_index_duplicate_periods(self): + # monotonic + idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts["2007"] + expected = ts[1:3] + tm.assert_series_equal(result, expected) + result[:] = 1 + assert (ts[1:3] == 1).all() + + # not monotonic + idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN") + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts["2007"] + expected = ts[idx == "2007"] + tm.assert_series_equal(result, expected) + + def test_index_unique(self): + idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") + expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN") + tm.assert_index_equal(idx.unique(), expected) + assert idx.nunique() == 3 + + idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN", tz="US/Eastern") + expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN", tz="US/Eastern") + tm.assert_index_equal(idx.unique(), expected) + assert idx.nunique() == 3 + + def test_shift(self): + # This is tested in test_arithmetic + pass + + @td.skip_if_32bit + def test_ndarray_compat_properties(self): + super().test_ndarray_compat_properties() + + def test_negative_ordinals(self): + Period(ordinal=-1000, freq="A") + Period(ordinal=0, freq="A") + + idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq="A") + idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq="A") + tm.assert_index_equal(idx1, idx2) + + def test_pindex_fieldaccessor_nat(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2012-03", "2012-04"], freq="D", name="name" + ) + + exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name="name") + tm.assert_index_equal(idx.year, exp) + exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name="name") + tm.assert_index_equal(idx.month, exp) + + def test_pindex_qaccess(self): + pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q") + s = Series(np.random.rand(len(pi)), index=pi).cumsum() + # Todo: fix these accessors! + assert s["05Q4"] == s[2] + + def test_pindex_multiples(self): + expected = PeriodIndex( + ["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"], + freq="2M", + ) + + pi = period_range(start="1/1/11", end="12/31/11", freq="2M") + tm.assert_index_equal(pi, expected) + assert pi.freq == offsets.MonthEnd(2) + assert pi.freqstr == "2M" + + pi = period_range(start="1/1/11", periods=6, freq="2M") + tm.assert_index_equal(pi, expected) + assert pi.freq == offsets.MonthEnd(2) + assert pi.freqstr == "2M" + + def test_iteration(self): + index = period_range(start="1/1/10", periods=4, freq="B") + + result = list(index) + assert isinstance(result[0], Period) + assert result[0].freq == index.freq + + def test_is_full(self): + index = PeriodIndex([2005, 2007, 2009], freq="A") + assert not index.is_full + + index = PeriodIndex([2005, 2006, 2007], freq="A") + assert index.is_full + + index = PeriodIndex([2005, 2005, 2007], freq="A") + assert not index.is_full + + index = PeriodIndex([2005, 2005, 2006], freq="A") + assert index.is_full + + index = PeriodIndex([2006, 2005, 2005], freq="A") + with pytest.raises(ValueError, match="Index is not monotonic"): + index.is_full + + assert index[:0].is_full + + def test_with_multi_index(self): + # #1705 + index = date_range("1/1/2012", periods=4, freq="12H") + index_as_arrays = [index.to_period(freq="D"), index.hour] + + s = Series([0, 1, 2, 3], index_as_arrays) + + assert isinstance(s.index.levels[0], PeriodIndex) + + assert isinstance(s.index.values[0][0], Period) + + def test_convert_array_of_periods(self): + rng = period_range("1/1/2000", periods=20, freq="D") + periods = list(rng) + + result = pd.Index(periods) + assert isinstance(result, PeriodIndex) + + def test_append_concat(self): + # #1815 + d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC") + d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC") + + s1 = Series(np.random.randn(10), d1) + s2 = Series(np.random.randn(10), d2) + + s1 = s1.to_period() + s2 = s2.to_period() + + # drops index + result = pd.concat([s1, s2]) + assert isinstance(result.index, PeriodIndex) + assert result.index[0] == s1.index[0] + + def test_pickle_freq(self): + # GH2891 + prng = period_range("1/1/2011", "1/1/2012", freq="M") + new_prng = tm.round_trip_pickle(prng) + assert new_prng.freq == offsets.MonthEnd() + assert new_prng.freqstr == "M" + + def test_map(self): + # test_map_dictlike generally tests + + index = PeriodIndex([2005, 2007, 2009], freq="A") + result = index.map(lambda x: x.ordinal) + exp = Index([x.ordinal for x in index]) + tm.assert_index_equal(result, exp) + + def test_join_self(self, join_type): + index = period_range("1/1/2000", periods=10) + joined = index.join(index, how=join_type) + assert index is joined + + def test_insert(self): + # GH 18295 (test missing) + expected = PeriodIndex( + ["2017Q1", pd.NaT, "2017Q2", "2017Q3", "2017Q4"], freq="Q" + ) + for na in (np.nan, pd.NaT, None): + result = period_range("2017Q1", periods=4, freq="Q").insert(1, na) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "msg, key", + [ + (r"Period\('2019', 'A-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")), + (r"Period\('2019', 'A-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")), + (r"Period\('2019', 'A-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")), + ( + r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'", + (Period(2018), Period(2016), "bar"), + ), + (r"Period\('2018', 'A-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")), + ( + r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)", + (Period(2017), "foo", Period(2015)), + ), + (r"Period\('2017', 'A-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")), + ], + ) + def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key): + # issue 20684 + """ + parse_time_string return parameter if type not matched. + PeriodIndex.get_loc takes returned value from parse_time_string as a tuple. + If first argument is Period and a tuple has 3 items, + process go on not raise exception + """ + df = DataFrame( + { + "A": [Period(2019), "x1", "x2"], + "B": [Period(2018), Period(2016), "y1"], + "C": [Period(2017), "z1", Period(2015)], + "V1": [1, 2, 3], + "V2": [10, 20, 30], + } + ).set_index(["A", "B", "C"]) + with pytest.raises(KeyError, match=msg): + df.loc[key] + + +def test_maybe_convert_timedelta(): + pi = PeriodIndex(["2000", "2001"], freq="D") + offset = offsets.Day(2) + assert pi._maybe_convert_timedelta(offset) == 2 + assert pi._maybe_convert_timedelta(2) == 2 + + offset = offsets.BusinessDay() + msg = r"Input has different freq=B from PeriodIndex\(freq=D\)" + with pytest.raises(ValueError, match=msg): + pi._maybe_convert_timedelta(offset) diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py new file mode 100644 index 00000000..2c3d2219 --- /dev/null +++ b/pandas/tests/indexes/period/test_period_range.py @@ -0,0 +1,99 @@ +import pytest + +from pandas import NaT, Period, PeriodIndex, date_range, period_range +import pandas._testing as tm + + +class TestPeriodRange: + @pytest.mark.parametrize("freq", ["D", "W", "M", "Q", "A"]) + def test_construction_from_string(self, freq): + # non-empty + expected = date_range( + start="2017-01-01", periods=5, freq=freq, name="foo" + ).to_period() + start, end = str(expected[0]), str(expected[-1]) + + result = period_range(start=start, end=end, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(start=start, periods=5, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=5, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq=freq, name="foo") + + result = period_range(start=start, periods=0, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq=freq, name="foo") + tm.assert_index_equal(result, expected) + + def test_construction_from_period(self): + # upsampling + start, end = Period("2017Q1", freq="Q"), Period("2018Q1", freq="Q") + expected = date_range( + start="2017-03-31", end="2018-03-31", freq="M", name="foo" + ).to_period() + result = period_range(start=start, end=end, freq="M", name="foo") + tm.assert_index_equal(result, expected) + + # downsampling + start, end = Period("2017-1", freq="M"), Period("2019-12", freq="M") + expected = date_range( + start="2017-01-31", end="2019-12-31", freq="Q", name="foo" + ).to_period() + result = period_range(start=start, end=end, freq="Q", name="foo") + tm.assert_index_equal(result, expected) + + # empty + expected = PeriodIndex([], freq="W", name="foo") + + result = period_range(start=start, periods=0, freq="W", name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(end=end, periods=0, freq="W", name="foo") + tm.assert_index_equal(result, expected) + + result = period_range(start=end, end=start, freq="W", name="foo") + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ( + "Of the three parameters: start, end, and periods, " + "exactly two must be specified" + ) + with pytest.raises(ValueError, match=msg): + period_range(start="2017Q1") + + with pytest.raises(ValueError, match=msg): + period_range(end="2017Q1") + + with pytest.raises(ValueError, match=msg): + period_range(periods=5) + + with pytest.raises(ValueError, match=msg): + period_range() + + # too many params + with pytest.raises(ValueError, match=msg): + period_range(start="2017Q1", end="2018Q1", periods=8, freq="Q") + + # start/end NaT + msg = "start and end must not be NaT" + with pytest.raises(ValueError, match=msg): + period_range(start=NaT, end="2018Q1") + + with pytest.raises(ValueError, match=msg): + period_range(start="2017Q1", end=NaT) + + # invalid periods param + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + period_range(start="2017Q1", periods="foo") diff --git a/pandas/tests/indexes/period/test_scalar_compat.py b/pandas/tests/indexes/period/test_scalar_compat.py new file mode 100644 index 00000000..d9809f0f --- /dev/null +++ b/pandas/tests/indexes/period/test_scalar_compat.py @@ -0,0 +1,17 @@ +"""Tests for PeriodIndex behaving like a vectorized Period scalar""" + +from pandas import Timedelta, date_range, period_range +import pandas._testing as tm + + +class TestPeriodIndexOps: + def test_start_time(self): + index = period_range(freq="M", start="2016-01-01", end="2016-05-31") + expected_index = date_range("2016-01-01", end="2016-05-31", freq="MS") + tm.assert_index_equal(index.start_time, expected_index) + + def test_end_time(self): + index = period_range(freq="M", start="2016-01-01", end="2016-05-31") + expected_index = date_range("2016-01-01", end="2016-05-31", freq="M") + expected_index += Timedelta(1, "D") - Timedelta(1, "ns") + tm.assert_index_equal(index.end_time, expected_index) diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py new file mode 100644 index 00000000..dc780588 --- /dev/null +++ b/pandas/tests/indexes/period/test_setops.py @@ -0,0 +1,375 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs import IncompatibleFrequency + +import pandas as pd +from pandas import Index, PeriodIndex, date_range, period_range +import pandas._testing as tm + + +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestPeriodIndex: + def test_joins(self, join_type): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + joined = index.join(index[:-5], how=join_type) + + assert isinstance(joined, PeriodIndex) + assert joined.freq == index.freq + + def test_join_self(self, join_type): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + res = index.join(index, how=join_type) + assert index is res + + def test_join_does_not_recur(self): + df = tm.makeCustomDataframe( + 3, + 2, + data_gen_f=lambda *args: np.random.randint(2), + c_idx_type="p", + r_idx_type="dt", + ) + s = df.iloc[:2, 0] + + res = s.index.join(df.columns, how="outer") + expected = Index([s.index[0], s.index[1], df.columns[0], df.columns[1]], object) + tm.assert_index_equal(res, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_union(self, sort): + # union + other1 = pd.period_range("1/1/2000", freq="D", periods=5) + rng1 = pd.period_range("1/6/2000", freq="D", periods=5) + expected1 = pd.PeriodIndex( + [ + "2000-01-06", + "2000-01-07", + "2000-01-08", + "2000-01-09", + "2000-01-10", + "2000-01-01", + "2000-01-02", + "2000-01-03", + "2000-01-04", + "2000-01-05", + ], + freq="D", + ) + + rng2 = pd.period_range("1/1/2000", freq="D", periods=5) + other2 = pd.period_range("1/4/2000", freq="D", periods=5) + expected2 = pd.period_range("1/1/2000", freq="D", periods=8) + + rng3 = pd.period_range("1/1/2000", freq="D", periods=5) + other3 = pd.PeriodIndex([], freq="D") + expected3 = pd.period_range("1/1/2000", freq="D", periods=5) + + rng4 = pd.period_range("2000-01-01 09:00", freq="H", periods=5) + other4 = pd.period_range("2000-01-02 09:00", freq="H", periods=5) + expected4 = pd.PeriodIndex( + [ + "2000-01-01 09:00", + "2000-01-01 10:00", + "2000-01-01 11:00", + "2000-01-01 12:00", + "2000-01-01 13:00", + "2000-01-02 09:00", + "2000-01-02 10:00", + "2000-01-02 11:00", + "2000-01-02 12:00", + "2000-01-02 13:00", + ], + freq="H", + ) + + rng5 = pd.PeriodIndex( + ["2000-01-01 09:01", "2000-01-01 09:03", "2000-01-01 09:05"], freq="T" + ) + other5 = pd.PeriodIndex( + ["2000-01-01 09:01", "2000-01-01 09:05", "2000-01-01 09:08"], freq="T" + ) + expected5 = pd.PeriodIndex( + [ + "2000-01-01 09:01", + "2000-01-01 09:03", + "2000-01-01 09:05", + "2000-01-01 09:08", + ], + freq="T", + ) + + rng6 = pd.period_range("2000-01-01", freq="M", periods=7) + other6 = pd.period_range("2000-04-01", freq="M", periods=7) + expected6 = pd.period_range("2000-01-01", freq="M", periods=10) + + rng7 = pd.period_range("2003-01-01", freq="A", periods=5) + other7 = pd.period_range("1998-01-01", freq="A", periods=8) + expected7 = pd.PeriodIndex( + [ + "2003", + "2004", + "2005", + "2006", + "2007", + "1998", + "1999", + "2000", + "2001", + "2002", + ], + freq="A", + ) + + rng8 = pd.PeriodIndex( + ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"], freq="D" + ) + other8 = pd.period_range("1/6/2000", freq="D", periods=5) + expected8 = pd.PeriodIndex( + [ + "1/3/2000", + "1/2/2000", + "1/1/2000", + "1/5/2000", + "1/4/2000", + "1/6/2000", + "1/7/2000", + "1/8/2000", + "1/9/2000", + "1/10/2000", + ], + freq="D", + ) + + for rng, other, expected in [ + (rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), + (rng4, other4, expected4), + (rng5, other5, expected5), + (rng6, other6, expected6), + (rng7, other7, expected7), + (rng8, other8, expected8), + ]: + + result_union = rng.union(other, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result_union, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_union_misc(self, sort): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + result = index[:-5].union(index[10:], sort=sort) + tm.assert_index_equal(result, index) + + # not in order + result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort) + if sort is None: + tm.assert_index_equal(result, index) + assert tm.equalContents(result, index) + + # raise if different frequencies + index = period_range("1/1/2000", "1/20/2000", freq="D") + index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED") + with pytest.raises(IncompatibleFrequency): + index.union(index2, sort=sort) + + index3 = period_range("1/1/2000", "1/20/2000", freq="2D") + with pytest.raises(IncompatibleFrequency): + index.join(index3) + + def test_union_dataframe_index(self): + rng1 = pd.period_range("1/1/1999", "1/1/2012", freq="M") + s1 = pd.Series(np.random.randn(len(rng1)), rng1) + + rng2 = pd.period_range("1/1/1980", "12/1/2001", freq="M") + s2 = pd.Series(np.random.randn(len(rng2)), rng2) + df = pd.DataFrame({"s1": s1, "s2": s2}) + + exp = pd.period_range("1/1/1980", "1/1/2012", freq="M") + tm.assert_index_equal(df.index, exp) + + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection(self, sort): + index = period_range("1/1/2000", "1/20/2000", freq="D") + + result = index[:-5].intersection(index[10:], sort=sort) + tm.assert_index_equal(result, index[10:-5]) + + # not in order + left = _permute(index[:-5]) + right = _permute(index[10:]) + result = left.intersection(right, sort=sort) + if sort is None: + tm.assert_index_equal(result, index[10:-5]) + assert tm.equalContents(result, index[10:-5]) + + # raise if different frequencies + index = period_range("1/1/2000", "1/20/2000", freq="D") + index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED") + with pytest.raises(IncompatibleFrequency): + index.intersection(index2, sort=sort) + + index3 = period_range("1/1/2000", "1/20/2000", freq="2D") + with pytest.raises(IncompatibleFrequency): + index.intersection(index3, sort=sort) + + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_cases(self, sort): + base = period_range("6/1/2000", "6/30/2000", freq="D", name="idx") + + # if target has the same name, it is preserved + rng2 = period_range("5/15/2000", "6/20/2000", freq="D", name="idx") + expected2 = period_range("6/1/2000", "6/20/2000", freq="D", name="idx") + + # if target name is different, it will be reset + rng3 = period_range("5/15/2000", "6/20/2000", freq="D", name="other") + expected3 = period_range("6/1/2000", "6/20/2000", freq="D", name=None) + + rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx") + expected4 = PeriodIndex([], name="idx", freq="D") + + for (rng, expected) in [ + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + result = base.intersection(rng, sort=sort) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + # non-monotonic + base = PeriodIndex( + ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], + freq="D", + name="idx", + ) + + rng2 = PeriodIndex( + ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], + freq="D", + name="idx", + ) + expected2 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name="idx") + + rng3 = PeriodIndex( + ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], + freq="D", + name="other", + ) + expected3 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name=None) + + rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx") + expected4 = PeriodIndex([], freq="D", name="idx") + + for (rng, expected) in [ + (rng2, expected2), + (rng3, expected3), + (rng4, expected4), + ]: + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == "D" + + # empty same freq + rng = date_range("6/1/2000", "6/15/2000", freq="T") + result = rng[0:0].intersection(rng) + assert len(result) == 0 + + result = rng.intersection(rng[0:0]) + assert len(result) == 0 + + @pytest.mark.parametrize("sort", [None, False]) + def test_difference(self, sort): + # diff + period_rng = ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"] + rng1 = pd.PeriodIndex(period_rng, freq="D") + other1 = pd.period_range("1/6/2000", freq="D", periods=5) + expected1 = rng1 + + rng2 = pd.PeriodIndex(period_rng, freq="D") + other2 = pd.period_range("1/4/2000", freq="D", periods=5) + expected2 = pd.PeriodIndex(["1/3/2000", "1/2/2000", "1/1/2000"], freq="D") + + rng3 = pd.PeriodIndex(period_rng, freq="D") + other3 = pd.PeriodIndex([], freq="D") + expected3 = rng3 + + period_rng = [ + "2000-01-01 10:00", + "2000-01-01 09:00", + "2000-01-01 12:00", + "2000-01-01 11:00", + "2000-01-01 13:00", + ] + rng4 = pd.PeriodIndex(period_rng, freq="H") + other4 = pd.period_range("2000-01-02 09:00", freq="H", periods=5) + expected4 = rng4 + + rng5 = pd.PeriodIndex( + ["2000-01-01 09:03", "2000-01-01 09:01", "2000-01-01 09:05"], freq="T" + ) + other5 = pd.PeriodIndex(["2000-01-01 09:01", "2000-01-01 09:05"], freq="T") + expected5 = pd.PeriodIndex(["2000-01-01 09:03"], freq="T") + + period_rng = [ + "2000-02-01", + "2000-01-01", + "2000-06-01", + "2000-07-01", + "2000-05-01", + "2000-03-01", + "2000-04-01", + ] + rng6 = pd.PeriodIndex(period_rng, freq="M") + other6 = pd.period_range("2000-04-01", freq="M", periods=7) + expected6 = pd.PeriodIndex(["2000-02-01", "2000-01-01", "2000-03-01"], freq="M") + + period_rng = ["2003", "2007", "2006", "2005", "2004"] + rng7 = pd.PeriodIndex(period_rng, freq="A") + other7 = pd.period_range("1998-01-01", freq="A", periods=8) + expected7 = pd.PeriodIndex(["2007", "2006"], freq="A") + + for rng, other, expected in [ + (rng1, other1, expected1), + (rng2, other2, expected2), + (rng3, other3, expected3), + (rng4, other4, expected4), + (rng5, other5, expected5), + (rng6, other6, expected6), + (rng7, other7, expected7), + ]: + result_difference = rng.difference(other, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result_difference, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_difference_freq(self, sort): + # GH14323: difference of Period MUST preserve frequency + # but the ability to union results must be preserved + + index = period_range("20160920", "20160925", freq="D") + + other = period_range("20160921", "20160924", freq="D") + expected = PeriodIndex(["20160920", "20160925"], freq="D") + idx_diff = index.difference(other, sort) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + other = period_range("20160922", "20160925", freq="D") + idx_diff = index.difference(other, sort) + expected = PeriodIndex(["20160920", "20160921"], freq="D") + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) diff --git a/pandas/tests/indexes/period/test_shift.py b/pandas/tests/indexes/period/test_shift.py new file mode 100644 index 00000000..5689e98c --- /dev/null +++ b/pandas/tests/indexes/period/test_shift.py @@ -0,0 +1,119 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import PeriodIndex, period_range +import pandas._testing as tm + + +class TestPeriodIndexShift: + # --------------------------------------------------------------- + # PeriodIndex.shift is used by __add__ and __sub__ + + def test_pi_shift_ndarray(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + result = idx.shift(np.array([1, 2, 3, 4])) + expected = PeriodIndex( + ["2011-02", "2011-04", "NaT", "2011-08"], freq="M", name="idx" + ) + tm.assert_index_equal(result, expected) + + result = idx.shift(np.array([1, -2, 3, -4])) + expected = PeriodIndex( + ["2011-02", "2010-12", "NaT", "2010-12"], freq="M", name="idx" + ) + tm.assert_index_equal(result, expected) + + def test_shift(self): + pi1 = period_range(freq="A", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="A", start="1/1/2002", end="12/1/2010") + + tm.assert_index_equal(pi1.shift(0), pi1) + + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = period_range(freq="A", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="A", start="1/1/2000", end="12/1/2008") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = period_range(freq="M", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="M", start="2/1/2001", end="1/1/2010") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = period_range(freq="M", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="M", start="12/1/2000", end="11/1/2009") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + pi1 = period_range(freq="D", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="D", start="1/2/2001", end="12/2/2009") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(1), pi2) + + pi1 = period_range(freq="D", start="1/1/2001", end="12/1/2009") + pi2 = period_range(freq="D", start="12/31/2000", end="11/30/2009") + assert len(pi1) == len(pi2) + tm.assert_index_equal(pi1.shift(-1), pi2) + + def test_shift_corner_cases(self): + # GH#9903 + idx = pd.PeriodIndex([], name="xxx", freq="H") + + with pytest.raises(TypeError): + # period shift doesn't accept freq + idx.shift(1, freq="H") + + tm.assert_index_equal(idx.shift(0), idx) + tm.assert_index_equal(idx.shift(3), idx) + + idx = pd.PeriodIndex( + ["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"], + name="xxx", + freq="H", + ) + tm.assert_index_equal(idx.shift(0), idx) + exp = pd.PeriodIndex( + ["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"], + name="xxx", + freq="H", + ) + tm.assert_index_equal(idx.shift(3), exp) + exp = pd.PeriodIndex( + ["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"], + name="xxx", + freq="H", + ) + tm.assert_index_equal(idx.shift(-3), exp) + + def test_shift_nat(self): + idx = PeriodIndex( + ["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx" + ) + result = idx.shift(1) + expected = PeriodIndex( + ["2011-02", "2011-03", "NaT", "2011-05"], freq="M", name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + def test_shift_gh8083(self): + # test shift for PeriodIndex + # GH#8083 + drange = pd.period_range("20130101", periods=5, freq="D") + result = drange.shift(1) + expected = PeriodIndex( + ["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"], + freq="D", + ) + tm.assert_index_equal(result, expected) + + def test_shift_periods(self): + # GH #22458 : argument 'n' was deprecated in favor of 'periods' + idx = period_range(freq="A", start="1/1/2001", end="12/1/2009") + tm.assert_index_equal(idx.shift(periods=0), idx) + tm.assert_index_equal(idx.shift(0), idx) diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py new file mode 100644 index 00000000..28ab14af --- /dev/null +++ b/pandas/tests/indexes/period/test_tools.py @@ -0,0 +1,381 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +from pandas._libs.tslibs import IncompatibleFrequency +from pandas._libs.tslibs.ccalendar import MONTHS + +import pandas as pd +from pandas import ( + DatetimeIndex, + Period, + PeriodIndex, + Series, + Timedelta, + Timestamp, + date_range, + period_range, + to_datetime, +) +import pandas._testing as tm + + +class TestPeriodRepresentation: + """ + Wish to match NumPy units + """ + + def _check_freq(self, freq, base_date): + rng = period_range(start=base_date, periods=10, freq=freq) + exp = np.arange(10, dtype=np.int64) + + tm.assert_numpy_array_equal(rng.asi8, exp) + + def test_annual(self): + self._check_freq("A", 1970) + + def test_monthly(self): + self._check_freq("M", "1970-01") + + @pytest.mark.parametrize("freq", ["W-THU", "D", "B", "H", "T", "S", "L", "U", "N"]) + def test_freq(self, freq): + self._check_freq(freq, "1970-01-01") + + def test_negone_ordinals(self): + freqs = ["A", "M", "Q", "D", "H", "T", "S"] + + period = Period(ordinal=-1, freq="D") + for freq in freqs: + repr(period.asfreq(freq)) + + for freq in freqs: + period = Period(ordinal=-1, freq=freq) + repr(period) + assert period.year == 1969 + + period = Period(ordinal=-1, freq="B") + repr(period) + period = Period(ordinal=-1, freq="W") + repr(period) + + +class TestPeriodIndex: + def test_to_timestamp(self): + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + series = Series(1, index=index, name="foo") + + exp_index = date_range("1/1/2001", end="12/31/2009", freq="A-DEC") + result = series.to_timestamp(how="end") + exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + assert result.name == "foo" + + exp_index = date_range("1/1/2001", end="1/1/2009", freq="AS-JAN") + result = series.to_timestamp(how="start") + tm.assert_index_equal(result.index, exp_index) + + def _get_with_delta(delta, freq="A-DEC"): + return date_range( + to_datetime("1/1/2001") + delta, + to_datetime("12/31/2009") + delta, + freq=freq, + ) + + delta = timedelta(hours=23) + result = series.to_timestamp("H", "end") + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + delta = timedelta(hours=23, minutes=59) + result = series.to_timestamp("T", "end") + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + result = series.to_timestamp("S", "end") + delta = timedelta(hours=23, minutes=59, seconds=59) + exp_index = _get_with_delta(delta) + exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + + index = period_range(freq="H", start="1/1/2001", end="1/2/2001") + series = Series(1, index=index, name="foo") + + exp_index = date_range("1/1/2001 00:59:59", end="1/2/2001 00:59:59", freq="H") + result = series.to_timestamp(how="end") + exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result.index, exp_index) + assert result.name == "foo" + + def test_to_timestamp_freq(self): + idx = pd.period_range("2017", periods=12, freq="A-DEC") + result = idx.to_timestamp() + expected = pd.date_range("2017", periods=12, freq="AS-JAN") + tm.assert_index_equal(result, expected) + + def test_to_timestamp_repr_is_code(self): + zs = [ + Timestamp("99-04-17 00:00:00", tz="UTC"), + Timestamp("2001-04-17 00:00:00", tz="UTC"), + Timestamp("2001-04-17 00:00:00", tz="America/Los_Angeles"), + Timestamp("2001-04-17 00:00:00", tz=None), + ] + for z in zs: + assert eval(repr(z)) == z + + def test_to_timestamp_to_period_astype(self): + idx = DatetimeIndex([pd.NaT, "2011-01-01", "2011-02-01"], name="idx") + + res = idx.astype("period[M]") + exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx") + tm.assert_index_equal(res, exp) + + res = idx.astype("period[3M]") + exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx") + tm.assert_index_equal(res, exp) + + def test_dti_to_period(self): + dti = pd.date_range(start="1/1/2005", end="12/1/2005", freq="M") + pi1 = dti.to_period() + pi2 = dti.to_period(freq="D") + pi3 = dti.to_period(freq="3D") + + assert pi1[0] == Period("Jan 2005", freq="M") + assert pi2[0] == Period("1/31/2005", freq="D") + assert pi3[0] == Period("1/31/2005", freq="3D") + + assert pi1[-1] == Period("Nov 2005", freq="M") + assert pi2[-1] == Period("11/30/2005", freq="D") + assert pi3[-1], Period("11/30/2005", freq="3D") + + tm.assert_index_equal(pi1, period_range("1/1/2005", "11/1/2005", freq="M")) + tm.assert_index_equal( + pi2, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("D") + ) + tm.assert_index_equal( + pi3, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("3D") + ) + + @pytest.mark.parametrize("month", MONTHS) + def test_to_period_quarterly(self, month): + # make sure we can make the round trip + freq = "Q-{month}".format(month=month) + rng = period_range("1989Q3", "1991Q3", freq=freq) + stamps = rng.to_timestamp() + result = stamps.to_period(freq) + tm.assert_index_equal(rng, result) + + @pytest.mark.parametrize("off", ["BQ", "QS", "BQS"]) + def test_to_period_quarterlyish(self, off): + rng = date_range("01-Jan-2012", periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == "Q-DEC" + + @pytest.mark.parametrize("off", ["BA", "AS", "BAS"]) + def test_to_period_annualish(self, off): + rng = date_range("01-Jan-2012", periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == "A-DEC" + + def test_to_period_monthish(self): + offsets = ["MS", "BM"] + for off in offsets: + rng = date_range("01-Jan-2012", periods=8, freq=off) + prng = rng.to_period() + assert prng.freq == "M" + + rng = date_range("01-Jan-2012", periods=8, freq="M") + prng = rng.to_period() + assert prng.freq == "M" + + msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + date_range("01-Jan-2012", periods=8, freq="EOM") + + def test_period_dt64_round_trip(self): + dti = date_range("1/1/2000", "1/7/2002", freq="B") + pi = dti.to_period() + tm.assert_index_equal(pi.to_timestamp(), dti) + + dti = date_range("1/1/2000", "1/7/2002", freq="B") + pi = dti.to_period(freq="H") + tm.assert_index_equal(pi.to_timestamp(), dti) + + def test_combine_first(self): + # GH#3367 + didx = pd.date_range(start="1950-01-31", end="1950-07-31", freq="M") + pidx = pd.period_range( + start=pd.Period("1950-1"), end=pd.Period("1950-7"), freq="M" + ) + # check to be consistent with DatetimeIndex + for idx in [didx, pidx]: + a = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx) + b = pd.Series([9, 9, 9, 9, 9, 9, 9], index=idx) + + result = a.combine_first(b) + expected = pd.Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("freq", ["D", "2D"]) + def test_searchsorted(self, freq): + pidx = pd.PeriodIndex( + ["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"], + freq=freq, + ) + + p1 = pd.Period("2014-01-01", freq=freq) + assert pidx.searchsorted(p1) == 0 + + p2 = pd.Period("2014-01-04", freq=freq) + assert pidx.searchsorted(p2) == 3 + + assert pidx.searchsorted(pd.NaT) == 0 + + msg = "Input has different freq=H from PeriodArray" + with pytest.raises(IncompatibleFrequency, match=msg): + pidx.searchsorted(pd.Period("2014-01-01", freq="H")) + + msg = "Input has different freq=5D from PeriodArray" + with pytest.raises(IncompatibleFrequency, match=msg): + pidx.searchsorted(pd.Period("2014-01-01", freq="5D")) + + def test_searchsorted_invalid(self): + pidx = pd.PeriodIndex( + ["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"], + freq="D", + ) + + other = np.array([0, 1], dtype=np.int64) + + msg = "requires either a Period or PeriodArray" + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(other) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(other.astype("timedelta64[ns]")) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.timedelta64(4)) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.timedelta64("NaT", "ms")) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.datetime64(4, "ns")) + + with pytest.raises(TypeError, match=msg): + pidx.searchsorted(np.datetime64("NaT", "ns")) + + +class TestPeriodIndexConversion: + def test_tolist(self): + index = period_range(freq="A", start="1/1/2001", end="12/1/2009") + rs = index.tolist() + for x in rs: + assert isinstance(x, Period) + + recon = PeriodIndex(rs) + tm.assert_index_equal(index, recon) + + def test_to_timestamp_pi_nat(self): + # GH#7228 + index = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx") + + result = index.to_timestamp("D") + expected = DatetimeIndex( + [pd.NaT, datetime(2011, 1, 1), datetime(2011, 2, 1)], name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.name == "idx" + + result2 = result.to_period(freq="M") + tm.assert_index_equal(result2, index) + assert result2.name == "idx" + + result3 = result.to_period(freq="3M") + exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx") + tm.assert_index_equal(result3, exp) + assert result3.freqstr == "3M" + + msg = "Frequency must be positive, because it represents span: -2A" + with pytest.raises(ValueError, match=msg): + result.to_period(freq="-2A") + + def test_to_timestamp_preserve_name(self): + index = period_range(freq="A", start="1/1/2001", end="12/1/2009", name="foo") + assert index.name == "foo" + + conv = index.to_timestamp("D") + assert conv.name == "foo" + + def test_to_timestamp_quarterly_bug(self): + years = np.arange(1960, 2000).repeat(4) + quarters = np.tile(list(range(1, 5)), 40) + + pindex = PeriodIndex(year=years, quarter=quarters) + + stamps = pindex.to_timestamp("D", "end") + expected = DatetimeIndex([x.to_timestamp("D", "end") for x in pindex]) + tm.assert_index_equal(stamps, expected) + + def test_to_timestamp_pi_mult(self): + idx = PeriodIndex(["2011-01", "NaT", "2011-02"], freq="2M", name="idx") + + result = idx.to_timestamp() + expected = DatetimeIndex(["2011-01-01", "NaT", "2011-02-01"], name="idx") + tm.assert_index_equal(result, expected) + + result = idx.to_timestamp(how="E") + expected = DatetimeIndex(["2011-02-28", "NaT", "2011-03-31"], name="idx") + expected = expected + Timedelta(1, "D") - Timedelta(1, "ns") + tm.assert_index_equal(result, expected) + + def test_to_timestamp_pi_combined(self): + idx = period_range(start="2011", periods=2, freq="1D1H", name="idx") + + result = idx.to_timestamp() + expected = DatetimeIndex(["2011-01-01 00:00", "2011-01-02 01:00"], name="idx") + tm.assert_index_equal(result, expected) + + result = idx.to_timestamp(how="E") + expected = DatetimeIndex( + ["2011-01-02 00:59:59", "2011-01-03 01:59:59"], name="idx" + ) + expected = expected + Timedelta(1, "s") - Timedelta(1, "ns") + tm.assert_index_equal(result, expected) + + result = idx.to_timestamp(how="E", freq="H") + expected = DatetimeIndex(["2011-01-02 00:00", "2011-01-03 01:00"], name="idx") + expected = expected + Timedelta(1, "h") - Timedelta(1, "ns") + tm.assert_index_equal(result, expected) + + def test_period_astype_to_timestamp(self): + pi = pd.PeriodIndex(["2011-01", "2011-02", "2011-03"], freq="M") + + exp = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"]) + tm.assert_index_equal(pi.astype("datetime64[ns]"), exp) + + exp = pd.DatetimeIndex(["2011-01-31", "2011-02-28", "2011-03-31"]) + exp = exp + Timedelta(1, "D") - Timedelta(1, "ns") + tm.assert_index_equal(pi.astype("datetime64[ns]", how="end"), exp) + + exp = pd.DatetimeIndex( + ["2011-01-01", "2011-02-01", "2011-03-01"], tz="US/Eastern" + ) + res = pi.astype("datetime64[ns, US/Eastern]") + tm.assert_index_equal(pi.astype("datetime64[ns, US/Eastern]"), exp) + + exp = pd.DatetimeIndex( + ["2011-01-31", "2011-02-28", "2011-03-31"], tz="US/Eastern" + ) + exp = exp + Timedelta(1, "D") - Timedelta(1, "ns") + res = pi.astype("datetime64[ns, US/Eastern]", how="end") + tm.assert_index_equal(res, exp) + + def test_to_timestamp_1703(self): + index = period_range("1/1/2012", periods=4, freq="D") + + result = index.to_timestamp() + assert result[0] == Timestamp("1/1/2012") diff --git a/pandas/tests/indexes/ranges/__init__.py b/pandas/tests/indexes/ranges/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/ranges/test_constructors.py b/pandas/tests/indexes/ranges/test_constructors.py new file mode 100644 index 00000000..ba1de6d5 --- /dev/null +++ b/pandas/tests/indexes/ranges/test_constructors.py @@ -0,0 +1,154 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import Index, RangeIndex, Series +import pandas._testing as tm + + +class TestRangeIndexConstructors: + @pytest.mark.parametrize("name", [None, "foo"]) + @pytest.mark.parametrize( + "args, kwargs, start, stop, step", + [ + ((5,), dict(), 0, 5, 1), + ((1, 5), dict(), 1, 5, 1), + ((1, 5, 2), dict(), 1, 5, 2), + ((0,), dict(), 0, 0, 1), + ((0, 0), dict(), 0, 0, 1), + (tuple(), dict(start=0), 0, 0, 1), + (tuple(), dict(stop=0), 0, 0, 1), + ], + ) + def test_constructor(self, args, kwargs, start, stop, step, name): + result = RangeIndex(*args, name=name, **kwargs) + expected = Index(np.arange(start, stop, step, dtype=np.int64), name=name) + assert isinstance(result, RangeIndex) + assert result.name is name + assert result._range == range(start, stop, step) + tm.assert_index_equal(result, expected) + + def test_constructor_invalid_args(self): + msg = "RangeIndex\\(\\.\\.\\.\\) must be called with integers" + with pytest.raises(TypeError, match=msg): + RangeIndex() + + with pytest.raises(TypeError, match=msg): + RangeIndex(name="Foo") + + # invalid args + for i in [ + Index(["a", "b"]), + Series(["a", "b"]), + np.array(["a", "b"]), + [], + "foo", + datetime(2000, 1, 1, 0, 0), + np.arange(0, 10), + np.array([1]), + [1], + ]: + with pytest.raises(TypeError): + RangeIndex(i) + + # we don't allow on a bare Index + msg = ( + r"Index\(\.\.\.\) must be called with a collection of some " + r"kind, 0 was passed" + ) + with pytest.raises(TypeError, match=msg): + Index(0, 1000) + + def test_constructor_same(self): + + # pass thru w and w/o copy + index = RangeIndex(1, 5, 2) + result = RangeIndex(index, copy=False) + assert result.identical(index) + + result = RangeIndex(index, copy=True) + tm.assert_index_equal(result, index, exact=True) + + result = RangeIndex(index) + tm.assert_index_equal(result, index, exact=True) + + with pytest.raises( + ValueError, + match="Incorrect `dtype` passed: expected signed integer, received float64", + ): + RangeIndex(index, dtype="float64") + + def test_constructor_range(self): + + msg = "Value needs to be a scalar value, was type " + with pytest.raises(TypeError, match=msg): + result = RangeIndex(range(1, 5, 2)) + + result = RangeIndex.from_range(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + tm.assert_index_equal(result, expected, exact=True) + + result = RangeIndex.from_range(range(5, 6)) + expected = RangeIndex(5, 6, 1) + tm.assert_index_equal(result, expected, exact=True) + + # an invalid range + result = RangeIndex.from_range(range(5, 1)) + expected = RangeIndex(0, 0, 1) + tm.assert_index_equal(result, expected, exact=True) + + result = RangeIndex.from_range(range(5)) + expected = RangeIndex(0, 5, 1) + tm.assert_index_equal(result, expected, exact=True) + + result = Index(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + tm.assert_index_equal(result, expected, exact=True) + + with pytest.raises( + ValueError, + match="Incorrect `dtype` passed: expected signed integer, received float64", + ): + Index(range(1, 5, 2), dtype="float64") + msg = r"^from_range\(\) got an unexpected keyword argument" + with pytest.raises(TypeError, match=msg): + RangeIndex.from_range(range(10), copy=True) + + def test_constructor_name(self): + # GH#12288 + orig = RangeIndex(10) + orig.name = "original" + + copy = RangeIndex(orig) + copy.name = "copy" + + assert orig.name == "original" + assert copy.name == "copy" + + new = Index(copy) + assert new.name == "copy" + + new.name = "new" + assert orig.name == "original" + assert copy.name == "copy" + assert new.name == "new" + + def test_constructor_corner(self): + arr = np.array([1, 2, 3, 4], dtype=object) + index = RangeIndex(1, 5) + assert index.values.dtype == np.int64 + tm.assert_index_equal(index, Index(arr)) + + # non-int raise Exception + with pytest.raises(TypeError): + RangeIndex("1", "10", "1") + with pytest.raises(TypeError): + RangeIndex(1.1, 10.2, 1.3) + + # invalid passed type + with pytest.raises( + ValueError, + match="Incorrect `dtype` passed: expected signed integer, received float64", + ): + RangeIndex(1, 5, dtype="float64") diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py new file mode 100644 index 00000000..8d98ab18 --- /dev/null +++ b/pandas/tests/indexes/ranges/test_range.py @@ -0,0 +1,742 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import ensure_platform_int + +import pandas as pd +from pandas import Float64Index, Index, Int64Index, RangeIndex +import pandas._testing as tm + +from ..test_numeric import Numeric + +# aliases to make some tests easier to read +RI = RangeIndex +I64 = Int64Index +F64 = Float64Index +OI = Index + + +class TestRangeIndex(Numeric): + _holder = RangeIndex + _compat_props = ["shape", "ndim", "size"] + + @pytest.fixture( + params=[ + RangeIndex(start=0, stop=20, step=2, name="foo"), + RangeIndex(start=18, stop=-1, step=-2, name="bar"), + ], + ids=["index_inc", "index_dec"], + ) + def indices(self, request): + return request.param + + def create_index(self): + return RangeIndex(start=0, stop=20, step=2) + + def test_can_hold_identifiers(self): + idx = self.create_index() + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False + + def test_too_many_names(self): + index = self.create_index() + with pytest.raises(ValueError, match="^Length"): + index.names = ["roger", "harold"] + + @pytest.mark.parametrize( + "index, start, stop, step", + [ + (RangeIndex(5), 0, 5, 1), + (RangeIndex(0, 5), 0, 5, 1), + (RangeIndex(5, step=2), 0, 5, 2), + (RangeIndex(1, 5, 2), 1, 5, 2), + ], + ) + def test_start_stop_step_attrs(self, index, start, stop, step): + # GH 25710 + assert index.start == start + assert index.stop == stop + assert index.step == step + + @pytest.mark.parametrize("attr_name", ["_start", "_stop", "_step"]) + def test_deprecated_start_stop_step_attrs(self, attr_name): + # GH 26581 + idx = self.create_index() + with tm.assert_produces_warning(FutureWarning): + getattr(idx, attr_name) + + def test_copy(self): + i = RangeIndex(5, name="Foo") + i_copy = i.copy() + assert i_copy is not i + assert i_copy.identical(i) + assert i_copy._range == range(0, 5, 1) + assert i_copy.name == "Foo" + + def test_repr(self): + i = RangeIndex(5, name="Foo") + result = repr(i) + expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')" + assert result == expected + + result = eval(result) + tm.assert_index_equal(result, i, exact=True) + + i = RangeIndex(5, 0, -1) + result = repr(i) + expected = "RangeIndex(start=5, stop=0, step=-1)" + assert result == expected + + result = eval(result) + tm.assert_index_equal(result, i, exact=True) + + def test_insert(self): + + idx = RangeIndex(5, name="Foo") + result = idx[1:4] + + # test 0th element + tm.assert_index_equal(idx[0:4], result.insert(0, idx[0])) + + # GH 18295 (test missing) + expected = Float64Index([0, np.nan, 1, 2, 3, 4]) + for na in (np.nan, pd.NaT, None): + result = RangeIndex(5).insert(1, na) + tm.assert_index_equal(result, expected) + + def test_delete(self): + + idx = RangeIndex(5, name="Foo") + expected = idx[1:].astype(int) + result = idx.delete(0) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + expected = idx[:-1].astype(int) + result = idx.delete(-1) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + with pytest.raises((IndexError, ValueError)): + # either depending on numpy version + result = idx.delete(len(idx)) + + def test_view(self): + i = RangeIndex(0, name="Foo") + i_view = i.view() + assert i_view.name == "Foo" + + i_view = i.view("i8") + tm.assert_numpy_array_equal(i.values, i_view) + + i_view = i.view(RangeIndex) + tm.assert_index_equal(i, i_view) + + def test_dtype(self): + index = self.create_index() + assert index.dtype == np.int64 + + def test_cached_data(self): + # GH 26565, GH26617 + # Calling RangeIndex._data caches an int64 array of the same length at + # self._cached_data. This test checks whether _cached_data has been set + idx = RangeIndex(0, 100, 10) + + assert idx._cached_data is None + + repr(idx) + assert idx._cached_data is None + + str(idx) + assert idx._cached_data is None + + idx.get_loc(20) + assert idx._cached_data is None + + 90 in idx + assert idx._cached_data is None + + 91 in idx + assert idx._cached_data is None + + idx.all() + assert idx._cached_data is None + + idx.any() + assert idx._cached_data is None + + df = pd.DataFrame({"a": range(10)}, index=idx) + + df.loc[50] + assert idx._cached_data is None + + with pytest.raises(KeyError, match="51"): + df.loc[51] + assert idx._cached_data is None + + df.loc[10:50] + assert idx._cached_data is None + + df.iloc[5:10] + assert idx._cached_data is None + + # actually calling idx._data + assert isinstance(idx._data, np.ndarray) + assert isinstance(idx._cached_data, np.ndarray) + + def test_is_monotonic(self): + index = RangeIndex(0, 20, 2) + assert index.is_monotonic is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is False + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is False + + index = RangeIndex(4, 0, -1) + assert index.is_monotonic is False + assert index._is_strictly_monotonic_increasing is False + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_decreasing is True + + index = RangeIndex(1, 2) + assert index.is_monotonic is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + index = RangeIndex(2, 1) + assert index.is_monotonic is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + index = RangeIndex(1, 1) + assert index.is_monotonic is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + def test_equals_range(self): + equiv_pairs = [ + (RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), + (RangeIndex(0), RangeIndex(1, -1, 3)), + (RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)), + (RangeIndex(0, -9, -2), RangeIndex(0, -10, -2)), + ] + for left, right in equiv_pairs: + assert left.equals(right) + assert right.equals(left) + + def test_logical_compat(self): + idx = self.create_index() + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() + + def test_identical(self): + index = self.create_index() + i = Index(index.copy()) + assert i.identical(index) + + # we don't allow object dtype for RangeIndex + if isinstance(index, RangeIndex): + return + + same_values_different_type = Index(i, dtype=object) + assert not i.identical(same_values_different_type) + + i = index.copy(dtype=object) + i = i.rename("foo") + same_values = Index(i, dtype=object) + assert same_values.identical(index.copy(dtype=object)) + + assert not i.identical(index) + assert Index(same_values, name="foo", dtype=object).identical(i) + + assert not index.copy(dtype=object).identical(index.copy(dtype="int64")) + + def test_get_indexer(self): + index = self.create_index() + target = RangeIndex(10) + indexer = index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_pad(self): + index = self.create_index() + target = RangeIndex(10) + indexer = index.get_indexer(target, method="pad") + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_backfill(self): + index = self.create_index() + target = RangeIndex(10) + indexer = index.get_indexer(target, method="backfill") + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_limit(self): + # GH 28631 + idx = RangeIndex(4) + target = RangeIndex(6) + result = idx.get_indexer(target, method="pad", limit=1) + expected = np.array([0, 1, 2, 3, 3, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("stop", [0, -1, -2]) + def test_get_indexer_decreasing(self, stop): + # GH 28678 + index = RangeIndex(7, stop, -3) + result = index.get_indexer(range(9)) + expected = np.array([-1, 2, -1, -1, 1, -1, -1, 0, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_join_outer(self): + # join with Int64Index + index = self.create_index() + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = Int64Index( + [0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] + ) + elidx = np.array( + [0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, -1, -1, -1, -1, -1, -1, -1], + dtype=np.intp, + ) + eridx = np.array( + [-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], + dtype=np.intp, + ) + + assert isinstance(res, Int64Index) + assert not isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # join with RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + assert isinstance(res, Int64Index) + assert not isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_inner(self): + # Join with non-RangeIndex + index = self.create_index() + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([16, 18]) + elidx = np.array([8, 9], dtype=np.intp) + eridx = np.array([9, 7], dtype=np.intp) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # Join two RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self): + # Join with Int64Index + index = self.create_index() + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + eres = index + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.intp) + + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # Join withRangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + + assert isinstance(res, RangeIndex) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + # Join with Int64Index + index = self.create_index() + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], dtype=np.intp) + + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # Join withRangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + + assert isinstance(other, RangeIndex) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + def test_join_non_int_index(self): + index = self.create_index() + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = index.join(other, how="outer") + outer2 = other.join(index, how="outer") + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index.join(other, how="inner") + inner2 = other.join(index, how="inner") + expected = Index([6, 8, 10]) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index.join(other, how="left") + tm.assert_index_equal(left, index.astype(object)) + + left2 = other.join(index, how="left") + tm.assert_index_equal(left2, other) + + right = index.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index, how="right") + tm.assert_index_equal(right2, index.astype(object)) + + def test_join_non_unique(self): + index = self.create_index() + other = Index([4, 4, 3, 3]) + + res, lidx, ridx = index.join(other, return_indexers=True) + + eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) + elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp) + eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_self(self, join_type): + index = self.create_index() + joined = index.join(index, how=join_type) + assert index is joined + + def test_nbytes(self): + + # memory savings vs int index + i = RangeIndex(0, 1000) + assert i.nbytes < i._int64index.nbytes / 10 + + # constant memory usage + i2 = RangeIndex(0, 10) + assert i.nbytes == i2.nbytes + + def test_cant_or_shouldnt_cast(self): + # can't + with pytest.raises(TypeError): + RangeIndex("foo", "bar", "baz") + + # shouldn't + with pytest.raises(TypeError): + RangeIndex("0", "1", "2") + + def test_view_index(self): + index = self.create_index() + index.view(Index) + + def test_prevent_casting(self): + index = self.create_index() + result = index.astype("O") + assert result.dtype == np.object_ + + def test_take_preserve_name(self): + index = RangeIndex(1, 5, name="foo") + taken = index.take([3, 0, 1]) + assert index.name == taken.name + + def test_take_fill_value(self): + # GH 12631 + idx = pd.RangeIndex(1, 4, name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = pd.Int64Index([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + msg = "Unable to fill values because RangeIndex cannot contain NA" + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -1]), fill_value=True) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.Int64Index([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + + msg = "Unable to fill values because RangeIndex cannot contain NA" + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + def test_print_unicode_columns(self): + df = pd.DataFrame({"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + repr(df.columns) # should not raise UnicodeDecodeError + + def test_repr_roundtrip(self): + index = self.create_index() + tm.assert_index_equal(eval(repr(index)), index) + + def test_slice_keep_name(self): + idx = RangeIndex(1, 2, name="asdf") + assert idx.name == idx[1:].name + + def test_explicit_conversions(self): + + # GH 8608 + # add/sub are overridden explicitly for Float/Int Index + idx = RangeIndex(5) + + # float conversions + arr = np.arange(5, dtype="int64") * 3.2 + expected = Float64Index(arr) + fidx = idx * 3.2 + tm.assert_index_equal(fidx, expected) + fidx = 3.2 * idx + tm.assert_index_equal(fidx, expected) + + # interops with numpy arrays + expected = Float64Index(arr) + a = np.zeros(5, dtype="float64") + result = fidx - a + tm.assert_index_equal(result, expected) + + expected = Float64Index(-arr) + a = np.zeros(5, dtype="float64") + result = a - fidx + tm.assert_index_equal(result, expected) + + def test_has_duplicates(self, indices): + assert indices.is_unique + assert not indices.has_duplicates + + def test_extended_gcd(self): + index = self.create_index() + result = index._extended_gcd(6, 10) + assert result[0] == result[1] * 6 + result[2] * 10 + assert 2 == result[0] + + result = index._extended_gcd(10, 6) + assert 2 == result[1] * 10 + result[2] * 6 + assert 2 == result[0] + + def test_min_fitting_element(self): + result = RangeIndex(0, 20, 2)._min_fitting_element(1) + assert 2 == result + + result = RangeIndex(1, 6)._min_fitting_element(1) + assert 1 == result + + result = RangeIndex(18, -2, -2)._min_fitting_element(1) + assert 2 == result + + result = RangeIndex(5, 0, -1)._min_fitting_element(1) + assert 1 == result + + big_num = 500000000000000000000000 + + result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num) + assert big_num == result + + def test_max_fitting_element(self): + result = RangeIndex(0, 20, 2)._max_fitting_element(17) + assert 16 == result + + result = RangeIndex(1, 6)._max_fitting_element(4) + assert 4 == result + + result = RangeIndex(18, -2, -2)._max_fitting_element(17) + assert 16 == result + + result = RangeIndex(5, 0, -1)._max_fitting_element(4) + assert 4 == result + + big_num = 500000000000000000000000 + + result = RangeIndex(5, big_num * 2, 1)._max_fitting_element(big_num) + assert big_num == result + + def test_pickle_compat_construction(self): + # RangeIndex() is a valid constructor + pass + + def test_slice_specialised(self): + index = self.create_index() + index.name = "foo" + + # scalar indexing + res = index[1] + expected = 2 + assert res == expected + + res = index[-1] + expected = 18 + assert res == expected + + # slicing + # slice value completion + index_slice = index[:] + expected = index + tm.assert_index_equal(index_slice, expected) + + # positive slice values + index_slice = index[7:10:2] + expected = Index(np.array([14, 18]), name="foo") + tm.assert_index_equal(index_slice, expected) + + # negative slice values + index_slice = index[-1:-5:-2] + expected = Index(np.array([18, 14]), name="foo") + tm.assert_index_equal(index_slice, expected) + + # stop overshoot + index_slice = index[2:100:4] + expected = Index(np.array([4, 12]), name="foo") + tm.assert_index_equal(index_slice, expected) + + # reverse + index_slice = index[::-1] + expected = Index(index.values[::-1], name="foo") + tm.assert_index_equal(index_slice, expected) + + index_slice = index[-8::-1] + expected = Index(np.array([4, 2, 0]), name="foo") + tm.assert_index_equal(index_slice, expected) + + index_slice = index[-40::-1] + expected = Index(np.array([], dtype=np.int64), name="foo") + tm.assert_index_equal(index_slice, expected) + + index_slice = index[40::-1] + expected = Index(index.values[40::-1], name="foo") + tm.assert_index_equal(index_slice, expected) + + index_slice = index[10::-1] + expected = Index(index.values[::-1], name="foo") + tm.assert_index_equal(index_slice, expected) + + @pytest.mark.parametrize("step", set(range(-5, 6)) - {0}) + def test_len_specialised(self, step): + # make sure that our len is the same as np.arange calc + start, stop = (0, 5) if step > 0 else (5, 0) + + arr = np.arange(start, stop, step) + index = RangeIndex(start, stop, step) + assert len(index) == len(arr) + + index = RangeIndex(stop, start, step) + assert len(index) == 0 + + @pytest.fixture( + params=[ + ([RI(1, 12, 5)], RI(1, 12, 5)), + ([RI(0, 6, 4)], RI(0, 6, 4)), + ([RI(1, 3), RI(3, 7)], RI(1, 7)), + ([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)), + ([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)), + ([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)), + ([RI(-4, -8), RI(-8, -12)], RI(0, 0)), + ([RI(-4, -8), RI(3, -4)], RI(0, 0)), + ([RI(-4, -8), RI(3, 5)], RI(3, 5)), + ([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])), + ([RI(-2), RI(3, 5)], RI(3, 5)), + ([RI(2), RI(2)], I64([0, 1, 0, 1])), + ([RI(2), RI(2, 5), RI(5, 8, 4)], RI(0, 6)), + ([RI(2), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])), + ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)), + ([RI(3), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])), + ([RI(3), F64([-1, 3.1, 15.0])], F64([0, 1, 2, -1, 3.1, 15.0])), + ([RI(3), OI(["a", None, 14])], OI([0, 1, 2, "a", None, 14])), + ([RI(3, 1), OI(["a", None, 14])], OI(["a", None, 14])), + ] + ) + def appends(self, request): + """Inputs and expected outputs for RangeIndex.append test""" + + return request.param + + def test_append(self, appends): + # GH16212 + + indices, expected = appends + + result = indices[0].append(indices[1:]) + tm.assert_index_equal(result, expected, exact=True) + + if len(indices) == 2: + # Append single item rather than list + result2 = indices[0].append(indices[1]) + tm.assert_index_equal(result2, expected, exact=True) + + def test_engineless_lookup(self): + # GH 16685 + # Standard lookup on RangeIndex should not require the engine to be + # created + idx = RangeIndex(2, 10, 3) + + assert idx.get_loc(5) == 1 + tm.assert_numpy_array_equal( + idx.get_indexer([2, 8]), ensure_platform_int(np.array([0, 2])) + ) + with pytest.raises(KeyError, match="3"): + idx.get_loc(3) + + assert "_engine" not in idx._cache + + # The engine is still required for lookup of a different dtype scalar: + with pytest.raises(KeyError, match="'a'"): + assert idx.get_loc("a") == -1 + + assert "_engine" in idx._cache diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py new file mode 100644 index 00000000..5bedc408 --- /dev/null +++ b/pandas/tests/indexes/ranges/test_setops.py @@ -0,0 +1,244 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +from pandas import Index, Int64Index, RangeIndex +import pandas._testing as tm + + +class TestRangeIndexSetOps: + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection(self, sort): + # intersect with Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Index(np.arange(1, 6)) + result = index.intersection(other, sort=sort) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index, sort=sort) + expected = Index( + np.sort(np.asarray(np.intersect1d(index.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + # intersect with increasing RangeIndex + other = RangeIndex(1, 6) + result = index.intersection(other, sort=sort) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected) + + # intersect with decreasing RangeIndex + other = RangeIndex(5, 0, -1) + result = index.intersection(other, sort=sort) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected) + + # reversed (GH 17296) + result = other.intersection(index, sort=sort) + tm.assert_index_equal(result, expected) + + # GH 17296: intersect two decreasing RangeIndexes + first = RangeIndex(10, -2, -2) + other = RangeIndex(5, -4, -1) + expected = first.astype(int).intersection(other.astype(int), sort=sort) + result = first.intersection(other, sort=sort).astype(int) + tm.assert_index_equal(result, expected) + + # reversed + result = other.intersection(first, sort=sort).astype(int) + tm.assert_index_equal(result, expected) + + index = RangeIndex(5) + + # intersect of non-overlapping indices + other = RangeIndex(5, 10, 1) + result = index.intersection(other, sort=sort) + expected = RangeIndex(0, 0, 1) + tm.assert_index_equal(result, expected) + + other = RangeIndex(-1, -5, -1) + result = index.intersection(other, sort=sort) + expected = RangeIndex(0, 0, 1) + tm.assert_index_equal(result, expected) + + # intersection of empty indices + other = RangeIndex(0, 0, 1) + result = index.intersection(other, sort=sort) + expected = RangeIndex(0, 0, 1) + tm.assert_index_equal(result, expected) + + result = other.intersection(index, sort=sort) + tm.assert_index_equal(result, expected) + + # intersection of non-overlapping values based on start value and gcd + index = RangeIndex(1, 10, 2) + other = RangeIndex(0, 10, 4) + result = index.intersection(other, sort=sort) + expected = RangeIndex(0, 0, 1) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("sort", [False, None]) + def test_union_noncomparable(self, sort): + # corner case, non-Int64Index + index = RangeIndex(start=0, stop=20, step=2) + other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) + result = index.union(other, sort=sort) + expected = Index(np.concatenate((index, other))) + tm.assert_index_equal(result, expected) + + result = other.union(index, sort=sort) + expected = Index(np.concatenate((other, index))) + tm.assert_index_equal(result, expected) + + @pytest.fixture( + params=[ + ( + RangeIndex(0, 10, 1), + RangeIndex(0, 10, 1), + RangeIndex(0, 10, 1), + RangeIndex(0, 10, 1), + ), + ( + RangeIndex(0, 10, 1), + RangeIndex(5, 20, 1), + RangeIndex(0, 20, 1), + Int64Index(range(20)), + ), + ( + RangeIndex(0, 10, 1), + RangeIndex(10, 20, 1), + RangeIndex(0, 20, 1), + Int64Index(range(20)), + ), + ( + RangeIndex(0, -10, -1), + RangeIndex(0, -10, -1), + RangeIndex(0, -10, -1), + RangeIndex(0, -10, -1), + ), + ( + RangeIndex(0, -10, -1), + RangeIndex(-10, -20, -1), + RangeIndex(-19, 1, 1), + Int64Index(range(0, -20, -1)), + ), + ( + RangeIndex(0, 10, 2), + RangeIndex(1, 10, 2), + RangeIndex(0, 10, 1), + Int64Index(list(range(0, 10, 2)) + list(range(1, 10, 2))), + ), + ( + RangeIndex(0, 11, 2), + RangeIndex(1, 12, 2), + RangeIndex(0, 12, 1), + Int64Index(list(range(0, 11, 2)) + list(range(1, 12, 2))), + ), + ( + RangeIndex(0, 21, 4), + RangeIndex(-2, 24, 4), + RangeIndex(-2, 24, 2), + Int64Index(list(range(0, 21, 4)) + list(range(-2, 24, 4))), + ), + ( + RangeIndex(0, -20, -2), + RangeIndex(-1, -21, -2), + RangeIndex(-19, 1, 1), + Int64Index(list(range(0, -20, -2)) + list(range(-1, -21, -2))), + ), + ( + RangeIndex(0, 100, 5), + RangeIndex(0, 100, 20), + RangeIndex(0, 100, 5), + Int64Index(range(0, 100, 5)), + ), + ( + RangeIndex(0, -100, -5), + RangeIndex(5, -100, -20), + RangeIndex(-95, 10, 5), + Int64Index(list(range(0, -100, -5)) + [5]), + ), + ( + RangeIndex(0, -11, -1), + RangeIndex(1, -12, -4), + RangeIndex(-11, 2, 1), + Int64Index(list(range(0, -11, -1)) + [1, -11]), + ), + (RangeIndex(0), RangeIndex(0), RangeIndex(0), RangeIndex(0)), + ( + RangeIndex(0, -10, -2), + RangeIndex(0), + RangeIndex(0, -10, -2), + RangeIndex(0, -10, -2), + ), + ( + RangeIndex(0, 100, 2), + RangeIndex(100, 150, 200), + RangeIndex(0, 102, 2), + Int64Index(range(0, 102, 2)), + ), + ( + RangeIndex(0, -100, -2), + RangeIndex(-100, 50, 102), + RangeIndex(-100, 4, 2), + Int64Index(list(range(0, -100, -2)) + [-100, 2]), + ), + ( + RangeIndex(0, -100, -1), + RangeIndex(0, -50, -3), + RangeIndex(-99, 1, 1), + Int64Index(list(range(0, -100, -1))), + ), + ( + RangeIndex(0, 1, 1), + RangeIndex(5, 6, 10), + RangeIndex(0, 6, 5), + Int64Index([0, 5]), + ), + ( + RangeIndex(0, 10, 5), + RangeIndex(-5, -6, -20), + RangeIndex(-5, 10, 5), + Int64Index([0, 5, -5]), + ), + ( + RangeIndex(0, 3, 1), + RangeIndex(4, 5, 1), + Int64Index([0, 1, 2, 4]), + Int64Index([0, 1, 2, 4]), + ), + ( + RangeIndex(0, 10, 1), + Int64Index([]), + RangeIndex(0, 10, 1), + RangeIndex(0, 10, 1), + ), + ( + RangeIndex(0), + Int64Index([1, 5, 6]), + Int64Index([1, 5, 6]), + Int64Index([1, 5, 6]), + ), + ] + ) + def unions(self, request): + """Inputs and expected outputs for RangeIndex.union tests""" + + return request.param + + def test_union_sorted(self, unions): + + idx1, idx2, expected_sorted, expected_notsorted = unions + + res1 = idx1.union(idx2, sort=None) + tm.assert_index_equal(res1, expected_sorted, exact=True) + + res1 = idx1.union(idx2, sort=False) + tm.assert_index_equal(res1, expected_notsorted, exact=True) + + res2 = idx2.union(idx1, sort=None) + res3 = idx1._int64index.union(idx2, sort=None) + tm.assert_index_equal(res2, expected_sorted, exact=True) + tm.assert_index_equal(res3, expected_sorted) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py new file mode 100644 index 00000000..7b2a7328 --- /dev/null +++ b/pandas/tests/indexes/test_base.py @@ -0,0 +1,2841 @@ +from collections import defaultdict +from datetime import datetime, timedelta +from io import StringIO +import math +import operator +import re + +import numpy as np +import pytest + +import pandas._config.config as cf + +from pandas._libs.tslib import Timestamp +from pandas.compat.numpy import np_datetime64_compat +from pandas.util._test_decorators import async_mark + +from pandas.core.dtypes.common import is_unsigned_integer_dtype +from pandas.core.dtypes.generic import ABCIndex + +import pandas as pd +from pandas import ( + CategoricalIndex, + DataFrame, + DatetimeIndex, + Float64Index, + Int64Index, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, + UInt64Index, + date_range, + isna, + period_range, +) +import pandas._testing as tm +from pandas.core.algorithms import safe_sort +from pandas.core.indexes.api import ( + Index, + MultiIndex, + _get_combined_index, + ensure_index, + ensure_index_from_sequences, +) +from pandas.tests.indexes.common import Base +from pandas.tests.indexes.conftest import indices_dict + + +class TestIndex(Base): + _holder = Index + + @pytest.fixture + def index(self, request): + """ + Fixture for selectively parametrizing indices_dict via indirect parametrization + (parametrize over indices_dict keys with indirect=True). Defaults to string + index if no keys are provided. + """ + key = getattr(request, "param", "string") + + # copy to avoid mutation, e.g. setting .name + return indices_dict[key].copy() + + def create_index(self): + return Index(list("abcde")) + + def test_can_hold_identifiers(self): + index = self.create_index() + key = index[0] + assert index._can_hold_identifiers_and_holds_name(key) is True + + @pytest.mark.parametrize("index", ["datetime"], indirect=True) + def test_new_axis(self, index): + with tm.assert_produces_warning(DeprecationWarning): + # GH#30588 multi-dimensional indexing deprecated + new_index = index[None, :] + assert new_index.ndim == 2 + assert isinstance(new_index, np.ndarray) + + @pytest.mark.parametrize("index", ["int", "uint", "float"], indirect=True) + def test_copy_and_deepcopy(self, index): + new_copy2 = index.copy(dtype=int) + assert new_copy2.dtype.kind == "i" + + def test_constructor_regular(self, indices): + tm.assert_contains_all(indices, indices) + + def test_constructor_casting(self, index): + # casting + arr = np.array(index) + new_index = Index(arr) + tm.assert_contains_all(arr, new_index) + tm.assert_index_equal(index, new_index) + + def test_constructor_copy(self, index): + # copy + # index = self.create_index() + arr = np.array(index) + new_index = Index(arr, copy=True, name="name") + assert isinstance(new_index, Index) + assert new_index.name == "name" + tm.assert_numpy_array_equal(arr, new_index.values) + arr[0] = "SOMEBIGLONGSTRING" + assert new_index[0] != "SOMEBIGLONGSTRING" + + # FIXME: dont leave commented-out + # what to do here? + # arr = np.array(5.) + # pytest.raises(Exception, arr.view, Index) + + def test_constructor_corner(self): + # corner case + msg = ( + r"Index\(\.\.\.\) must be called with a collection of some " + "kind, 0 was passed" + ) + with pytest.raises(TypeError, match=msg): + Index(0) + + @pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]]) + def test_construction_list_mixed_tuples(self, index_vals): + # see gh-10697: if we are constructing from a mixed list of tuples, + # make sure that we are independent of the sorting order. + index = Index(index_vals) + assert isinstance(index, Index) + assert not isinstance(index, MultiIndex) + + @pytest.mark.parametrize("na_value", [None, np.nan]) + @pytest.mark.parametrize("vtype", [list, tuple, iter]) + def test_construction_list_tuples_nan(self, na_value, vtype): + # GH 18505 : valid tuples containing NaN + values = [(1, "two"), (3.0, na_value)] + result = Index(vtype(values)) + expected = MultiIndex.from_tuples(values) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("cast_as_obj", [True, False]) + @pytest.mark.parametrize( + "index", + [ + pd.date_range( + "2015-01-01 10:00", + freq="D", + periods=3, + tz="US/Eastern", + name="Green Eggs & Ham", + ), # DTI with tz + pd.date_range("2015-01-01 10:00", freq="D", periods=3), # DTI no tz + pd.timedelta_range("1 days", freq="D", periods=3), # td + pd.period_range("2015-01-01", freq="D", periods=3), # period + ], + ) + def test_constructor_from_index_dtlike(self, cast_as_obj, index): + if cast_as_obj: + result = pd.Index(index.astype(object)) + else: + result = pd.Index(index) + + tm.assert_index_equal(result, index) + + if isinstance(index, pd.DatetimeIndex): + assert result.tz == index.tz + if cast_as_obj: + # GH#23524 check that Index(dti, dtype=object) does not + # incorrectly raise ValueError, and that nanoseconds are not + # dropped + index += pd.Timedelta(nanoseconds=50) + result = pd.Index(index, dtype=object) + assert result.dtype == np.object_ + assert list(result) == list(index) + + @pytest.mark.parametrize( + "index,has_tz", + [ + ( + pd.date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"), + True, + ), # datetimetz + (pd.timedelta_range("1 days", freq="D", periods=3), False), # td + (pd.period_range("2015-01-01", freq="D", periods=3), False), # period + ], + ) + def test_constructor_from_series_dtlike(self, index, has_tz): + result = pd.Index(pd.Series(index)) + tm.assert_index_equal(result, index) + + if has_tz: + assert result.tz == index.tz + + @pytest.mark.parametrize("klass", [Index, DatetimeIndex]) + def test_constructor_from_series(self, klass): + expected = DatetimeIndex( + [Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")] + ) + s = Series( + [Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")] + ) + result = klass(s) + tm.assert_index_equal(result, expected) + + def test_constructor_from_series_freq(self): + # GH 6273 + # create from a series, passing a freq + dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"] + expected = DatetimeIndex(dts, freq="MS") + + s = Series(pd.to_datetime(dts)) + result = DatetimeIndex(s, freq="MS") + + tm.assert_index_equal(result, expected) + + def test_constructor_from_frame_series_freq(self): + # GH 6273 + # create from a series, passing a freq + dts = ["1-1-1990", "2-1-1990", "3-1-1990", "4-1-1990", "5-1-1990"] + expected = DatetimeIndex(dts, freq="MS") + + df = pd.DataFrame(np.random.rand(5, 3)) + df["date"] = dts + result = DatetimeIndex(df["date"], freq="MS") + + assert df["date"].dtype == object + expected.name = "date" + tm.assert_index_equal(result, expected) + + expected = pd.Series(dts, name="date") + tm.assert_series_equal(df["date"], expected) + + # GH 6274 + # infer freq of same + freq = pd.infer_freq(df["date"]) + assert freq == "MS" + + @pytest.mark.parametrize( + "array", + [ + np.arange(5), + np.array(["a", "b", "c"]), + date_range("2000-01-01", periods=3).values, + ], + ) + def test_constructor_ndarray_like(self, array): + # GH 5460#issuecomment-44474502 + # it should be possible to convert any object that satisfies the numpy + # ndarray interface directly into an Index + class ArrayLike: + def __init__(self, array): + self.array = array + + def __array__(self, dtype=None) -> np.ndarray: + return self.array + + expected = pd.Index(array) + result = pd.Index(ArrayLike(array)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "dtype", + [int, "int64", "int32", "int16", "int8", "uint64", "uint32", "uint16", "uint8"], + ) + def test_constructor_int_dtype_float(self, dtype): + # GH 18400 + if is_unsigned_integer_dtype(dtype): + index_type = UInt64Index + else: + index_type = Int64Index + + expected = index_type([0, 1, 2, 3]) + result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype) + tm.assert_index_equal(result, expected) + + def test_constructor_int_dtype_nan(self): + # see gh-15187 + data = [np.nan] + expected = Float64Index(data) + result = Index(data, dtype="float") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["int64", "uint64"]) + def test_constructor_int_dtype_nan_raises(self, dtype): + # see gh-15187 + data = [np.nan] + msg = "cannot convert" + with pytest.raises(ValueError, match=msg): + Index(data, dtype=dtype) + + def test_constructor_no_pandas_array(self): + ser = pd.Series([1, 2, 3]) + result = pd.Index(ser.array) + expected = pd.Index([1, 2, 3]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "klass,dtype,na_val", + [ + (pd.Float64Index, np.float64, np.nan), + (pd.DatetimeIndex, "datetime64[ns]", pd.NaT), + ], + ) + def test_index_ctor_infer_nan_nat(self, klass, dtype, na_val): + # GH 13467 + na_list = [na_val, na_val] + expected = klass(na_list) + assert expected.dtype == dtype + + result = Index(na_list) + tm.assert_index_equal(result, expected) + + result = Index(np.array(na_list)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("pos", [0, 1]) + @pytest.mark.parametrize( + "klass,dtype,ctor", + [ + (pd.DatetimeIndex, "datetime64[ns]", np.datetime64("nat")), + (pd.TimedeltaIndex, "timedelta64[ns]", np.timedelta64("nat")), + ], + ) + def test_index_ctor_infer_nat_dt_like(self, pos, klass, dtype, ctor, nulls_fixture): + expected = klass([pd.NaT, pd.NaT]) + assert expected.dtype == dtype + data = [ctor] + data.insert(pos, nulls_fixture) + + result = Index(data) + tm.assert_index_equal(result, expected) + + result = Index(np.array(data, dtype=object)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("swap_objs", [True, False]) + def test_index_ctor_nat_result(self, swap_objs): + # mixed np.datetime64/timedelta64 nat results in object + data = [np.datetime64("nat"), np.timedelta64("nat")] + if swap_objs: + data = data[::-1] + + expected = pd.Index(data, dtype=object) + tm.assert_index_equal(Index(data), expected) + tm.assert_index_equal(Index(np.array(data, dtype=object)), expected) + + def test_index_ctor_infer_periodindex(self): + xp = period_range("2012-1-1", freq="M", periods=3) + rs = Index(xp) + tm.assert_index_equal(rs, xp) + assert isinstance(rs, PeriodIndex) + + @pytest.mark.parametrize( + "vals,dtype", + [ + ([1, 2, 3, 4, 5], "int"), + ([1.1, np.nan, 2.2, 3.0], "float"), + (["A", "B", "C", np.nan], "obj"), + ], + ) + def test_constructor_simple_new(self, vals, dtype): + index = Index(vals, name=dtype) + result = index._simple_new(index.values, dtype) + tm.assert_index_equal(result, index) + + def test_constructor_wrong_kwargs(self): + # GH #19348 + with pytest.raises(TypeError, match="Unexpected keyword arguments {'foo'}"): + Index([], foo="bar") + + @pytest.mark.parametrize( + "vals", + [ + [1, 2, 3], + np.array([1, 2, 3]), + np.array([1, 2, 3], dtype=int), + # below should coerce + [1.0, 2.0, 3.0], + np.array([1.0, 2.0, 3.0], dtype=float), + ], + ) + def test_constructor_dtypes_to_int64(self, vals): + index = Index(vals, dtype=int) + assert isinstance(index, Int64Index) + + @pytest.mark.parametrize( + "vals", + [ + [1, 2, 3], + [1.0, 2.0, 3.0], + np.array([1.0, 2.0, 3.0]), + np.array([1, 2, 3], dtype=int), + np.array([1.0, 2.0, 3.0], dtype=float), + ], + ) + def test_constructor_dtypes_to_float64(self, vals): + index = Index(vals, dtype=float) + assert isinstance(index, Float64Index) + + @pytest.mark.parametrize("cast_index", [True, False]) + @pytest.mark.parametrize( + "vals", [[True, False, True], np.array([True, False, True], dtype=bool)] + ) + def test_constructor_dtypes_to_object(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=bool) + else: + index = Index(vals) + + assert isinstance(index, Index) + assert index.dtype == object + + @pytest.mark.parametrize( + "vals", + [ + [1, 2, 3], + np.array([1, 2, 3], dtype=int), + np.array( + [np_datetime64_compat("2011-01-01"), np_datetime64_compat("2011-01-02")] + ), + [datetime(2011, 1, 1), datetime(2011, 1, 2)], + ], + ) + def test_constructor_dtypes_to_categorical(self, vals): + index = Index(vals, dtype="category") + assert isinstance(index, CategoricalIndex) + + @pytest.mark.parametrize("cast_index", [True, False]) + @pytest.mark.parametrize( + "vals", + [ + Index( + np.array( + [ + np_datetime64_compat("2011-01-01"), + np_datetime64_compat("2011-01-02"), + ] + ) + ), + Index([datetime(2011, 1, 1), datetime(2011, 1, 2)]), + ], + ) + def test_constructor_dtypes_to_datetime(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=object) + assert isinstance(index, Index) + assert index.dtype == object + else: + index = Index(vals) + assert isinstance(index, DatetimeIndex) + + @pytest.mark.parametrize("cast_index", [True, False]) + @pytest.mark.parametrize( + "vals", + [ + np.array([np.timedelta64(1, "D"), np.timedelta64(1, "D")]), + [timedelta(1), timedelta(1)], + ], + ) + def test_constructor_dtypes_to_timedelta(self, cast_index, vals): + if cast_index: + index = Index(vals, dtype=object) + assert isinstance(index, Index) + assert index.dtype == object + else: + index = Index(vals) + assert isinstance(index, TimedeltaIndex) + + @pytest.mark.parametrize("attr", ["values", "asi8"]) + @pytest.mark.parametrize("klass", [pd.Index, pd.DatetimeIndex]) + def test_constructor_dtypes_datetime(self, tz_naive_fixture, attr, klass): + # Test constructing with a datetimetz dtype + # .values produces numpy datetimes, so these are considered naive + # .asi8 produces integers, so these are considered epoch timestamps + # ^the above will be true in a later version. Right now we `.view` + # the i8 values as NS_DTYPE, effectively treating them as wall times. + index = pd.date_range("2011-01-01", periods=5) + arg = getattr(index, attr) + index = index.tz_localize(tz_naive_fixture) + dtype = index.dtype + + if attr == "asi8": + result = pd.DatetimeIndex(arg).tz_localize(tz_naive_fixture) + else: + result = klass(arg, tz=tz_naive_fixture) + tm.assert_index_equal(result, index) + + if attr == "asi8": + result = pd.DatetimeIndex(arg).astype(dtype) + else: + result = klass(arg, dtype=dtype) + tm.assert_index_equal(result, index) + + if attr == "asi8": + result = pd.DatetimeIndex(list(arg)).tz_localize(tz_naive_fixture) + else: + result = klass(list(arg), tz=tz_naive_fixture) + tm.assert_index_equal(result, index) + + if attr == "asi8": + result = pd.DatetimeIndex(list(arg)).astype(dtype) + else: + result = klass(list(arg), dtype=dtype) + tm.assert_index_equal(result, index) + + @pytest.mark.parametrize("attr", ["values", "asi8"]) + @pytest.mark.parametrize("klass", [pd.Index, pd.TimedeltaIndex]) + def test_constructor_dtypes_timedelta(self, attr, klass): + index = pd.timedelta_range("1 days", periods=5) + dtype = index.dtype + + values = getattr(index, attr) + + result = klass(values, dtype=dtype) + tm.assert_index_equal(result, index) + + result = klass(list(values), dtype=dtype) + tm.assert_index_equal(result, index) + + @pytest.mark.parametrize("value", [[], iter([]), (_ for _ in [])]) + @pytest.mark.parametrize( + "klass", + [ + Index, + Float64Index, + Int64Index, + UInt64Index, + CategoricalIndex, + DatetimeIndex, + TimedeltaIndex, + ], + ) + def test_constructor_empty(self, value, klass): + empty = klass(value) + assert isinstance(empty, klass) + assert not len(empty) + + @pytest.mark.parametrize( + "empty,klass", + [ + (PeriodIndex([], freq="B"), PeriodIndex), + (PeriodIndex(iter([]), freq="B"), PeriodIndex), + (PeriodIndex((_ for _ in []), freq="B"), PeriodIndex), + (RangeIndex(step=1), pd.RangeIndex), + (MultiIndex(levels=[[1, 2], ["blue", "red"]], codes=[[], []]), MultiIndex), + ], + ) + def test_constructor_empty_special(self, empty, klass): + assert isinstance(empty, klass) + assert not len(empty) + + def test_constructor_overflow_int64(self): + # see gh-15832 + msg = ( + "The elements provided in the data cannot " + "all be casted to the dtype int64" + ) + with pytest.raises(OverflowError, match=msg): + Index([np.iinfo(np.uint64).max - 1], dtype="int64") + + @pytest.mark.xfail(reason="see GH#21311: Index doesn't enforce dtype argument") + def test_constructor_cast(self): + msg = "could not convert string to float" + with pytest.raises(ValueError, match=msg): + Index(["a", "b", "c"], dtype=float) + + @pytest.mark.parametrize( + "index", + [ + "datetime", + "float", + "int", + "period", + "range", + "repeats", + "timedelta", + "tuples", + "uint", + ], + indirect=True, + ) + def test_view_with_args(self, index): + index.view("i8") + + @pytest.mark.parametrize( + "index", + [ + "unicode", + "string", + pytest.param("categorical", marks=pytest.mark.xfail(reason="gh-25464")), + "bool", + "empty", + ], + indirect=True, + ) + def test_view_with_args_object_array_raises(self, index): + msg = "Cannot change data-type for object array" + with pytest.raises(TypeError, match=msg): + index.view("i8") + + @pytest.mark.parametrize("index", ["int", "range"], indirect=True) + def test_astype(self, index): + casted = index.astype("i8") + + # it works! + casted.get_loc(5) + + # pass on name + index.name = "foobar" + casted = index.astype("i8") + assert casted.name == "foobar" + + def test_equals_object(self): + # same + assert Index(["a", "b", "c"]).equals(Index(["a", "b", "c"])) + + @pytest.mark.parametrize( + "comp", [Index(["a", "b"]), Index(["a", "b", "d"]), ["a", "b", "c"]] + ) + def test_not_equals_object(self, comp): + assert not Index(["a", "b", "c"]).equals(comp) + + def test_insert(self): + + # GH 7256 + # validate neg/pos inserts + result = Index(["b", "c", "d"]) + + # test 0th element + tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a")) + + # test Nth element that follows Python list behavior + tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e")) + + # test loc +/- neq (0, -1) + tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z")) + + # test empty + null_index = Index([]) + tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a")) + + def test_insert_missing(self, nulls_fixture): + # GH 22295 + # test there is no mangling of NA values + expected = Index(["a", nulls_fixture, "b", "c"]) + result = Index(list("abc")).insert(1, nulls_fixture) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "pos,expected", + [ + (0, Index(["b", "c", "d"], name="index")), + (-1, Index(["a", "b", "c"], name="index")), + ], + ) + def test_delete(self, pos, expected): + index = Index(["a", "b", "c", "d"], name="index") + result = index.delete(pos) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + def test_delete_raises(self): + index = Index(["a", "b", "c", "d"], name="index") + msg = "index 5 is out of bounds for axis 0 with size 4" + with pytest.raises(IndexError, match=msg): + index.delete(5) + + def test_identical(self): + + # index + i1 = Index(["a", "b", "c"]) + i2 = Index(["a", "b", "c"]) + + assert i1.identical(i2) + + i1 = i1.rename("foo") + assert i1.equals(i2) + assert not i1.identical(i2) + + i2 = i2.rename("foo") + assert i1.identical(i2) + + i3 = Index([("a", "a"), ("a", "b"), ("b", "a")]) + i4 = Index([("a", "a"), ("a", "b"), ("b", "a")], tupleize_cols=False) + assert not i3.identical(i4) + + def test_is_(self): + ind = Index(range(10)) + assert ind.is_(ind) + assert ind.is_(ind.view().view().view().view()) + assert not ind.is_(Index(range(10))) + assert not ind.is_(ind.copy()) + assert not ind.is_(ind.copy(deep=False)) + assert not ind.is_(ind[:]) + assert not ind.is_(np.array(range(10))) + + # quasi-implementation dependent + assert ind.is_(ind.view()) + ind2 = ind.view() + ind2.name = "bob" + assert ind.is_(ind2) + assert ind2.is_(ind) + # doesn't matter if Indices are *actually* views of underlying data, + assert not ind.is_(Index(ind.values)) + arr = np.array(range(1, 11)) + ind1 = Index(arr, copy=False) + ind2 = Index(arr, copy=False) + assert not ind1.is_(ind2) + + @pytest.mark.parametrize("index", ["datetime"], indirect=True) + def test_asof(self, index): + d = index[0] + assert index.asof(d) == d + assert isna(index.asof(d - timedelta(1))) + + d = index[-1] + assert index.asof(d + timedelta(1)) == d + + d = index[0].to_pydatetime() + assert isinstance(index.asof(d), Timestamp) + + def test_asof_datetime_partial(self): + index = pd.date_range("2010-01-01", periods=2, freq="m") + expected = Timestamp("2010-02-28") + result = index.asof("2010-02") + assert result == expected + assert not isinstance(result, Index) + + def test_nanosecond_index_access(self): + s = Series([Timestamp("20130101")]).values.view("i8")[0] + r = DatetimeIndex([s + 50 + i for i in range(100)]) + x = Series(np.random.randn(100), index=r) + + first_value = x.asof(x.index[0]) + + # this does not yet work, as parsing strings is done via dateutil + # assert first_value == x['2013-01-01 00:00:00.000000050+0000'] + + expected_ts = np_datetime64_compat("2013-01-01 00:00:00.000000050+0000", "ns") + assert first_value == x[Timestamp(expected_ts)] + + def test_booleanindex(self, index): + bool_index = np.ones(len(index), dtype=bool) + bool_index[5:30:2] = False + + sub_index = index[bool_index] + + for i, val in enumerate(sub_index): + assert sub_index.get_loc(val) == i + + sub_index = index[list(bool_index)] + for i, val in enumerate(sub_index): + assert sub_index.get_loc(val) == i + + def test_fancy(self): + index = self.create_index() + sl = index[[1, 2, 3]] + for i in sl: + assert i == sl[sl.get_loc(i)] + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + @pytest.mark.parametrize("dtype", [np.int_, np.bool_]) + def test_empty_fancy(self, index, dtype): + empty_arr = np.array([], dtype=dtype) + empty_index = type(index)([]) + + assert index[[]].identical(empty_index) + assert index[empty_arr].identical(empty_index) + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + def test_empty_fancy_raises(self, index): + # pd.DatetimeIndex is excluded, because it overrides getitem and should + # be tested separately. + empty_farr = np.array([], dtype=np.float_) + empty_index = type(index)([]) + + assert index[[]].identical(empty_index) + # np.ndarray only accepts ndarray of int & bool dtypes, so should Index + msg = r"arrays used as indices must be of integer \(or boolean\) type" + with pytest.raises(IndexError, match=msg): + index[empty_farr] + + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection(self, index, sort): + first = index[:20] + second = index[:10] + intersect = first.intersection(second, sort=sort) + if sort is None: + tm.assert_index_equal(intersect, second.sort_values()) + assert tm.equalContents(intersect, second) + + # Corner cases + inter = first.intersection(first, sort=sort) + assert inter is first + + @pytest.mark.parametrize( + "index2,keeps_name", + [ + (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name + (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names + (Index([3, 4, 5, 6, 7]), False), + ], + ) + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_name_preservation(self, index2, keeps_name, sort): + index1 = Index([1, 2, 3, 4, 5], name="index") + expected = Index([3, 4, 5]) + result = index1.intersection(index2, sort) + + if keeps_name: + expected.name = "index" + + assert result.name == expected.name + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "first_name,second_name,expected_name", + [("A", "A", "A"), ("A", "B", None), (None, "B", None)], + ) + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_name_preservation2( + self, index, first_name, second_name, expected_name, sort + ): + first = index[5:20] + second = index[:10] + first.name = first_name + second.name = second_name + intersect = first.intersection(second, sort=sort) + assert intersect.name == expected_name + + @pytest.mark.parametrize( + "index2,keeps_name", + [ + (Index([4, 7, 6, 5, 3], name="index"), True), + (Index([4, 7, 6, 5, 3], name="other"), False), + ], + ) + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_monotonic(self, index2, keeps_name, sort): + index1 = Index([5, 3, 2, 4, 1], name="index") + expected = Index([5, 3, 4]) + + if keeps_name: + expected.name = "index" + + result = index1.intersection(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "index2,expected_arr", + [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B", "A"])], + ) + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort): + # non-monotonic non-unique + index1 = Index(["A", "B", "A", "C"]) + expected = Index(expected_arr, dtype="object") + result = index1.intersection(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_intersect_str_dates(self, sort): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(["aa"], dtype=object) + result = i2.intersection(i1, sort=sort) + + assert len(result) == 0 + + def test_intersect_nosort(self): + result = pd.Index(["c", "b", "a"]).intersection(["b", "a"]) + expected = pd.Index(["b", "a"]) + tm.assert_index_equal(result, expected) + + def test_intersection_equal_sort(self): + idx = pd.Index(["c", "a", "b"]) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + + @pytest.mark.xfail(reason="Not implemented") + def test_intersection_equal_sort_true(self): + # TODO decide on True behaviour + idx = pd.Index(["c", "a", "b"]) + sorted_ = pd.Index(["a", "b", "c"]) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + + @pytest.mark.parametrize("sort", [None, False]) + def test_chained_union(self, sort): + # Chained unions handles names correctly + i1 = Index([1, 2], name="i1") + i2 = Index([5, 6], name="i2") + i3 = Index([3, 4], name="i3") + union = i1.union(i2.union(i3, sort=sort), sort=sort) + expected = i1.union(i2, sort=sort).union(i3, sort=sort) + tm.assert_index_equal(union, expected) + + j1 = Index([1, 2], name="j1") + j2 = Index([], name="j2") + j3 = Index([], name="j3") + union = j1.union(j2.union(j3, sort=sort), sort=sort) + expected = j1.union(j2, sort=sort).union(j3, sort=sort) + tm.assert_index_equal(union, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_union(self, index, sort): + first = index[5:20] + second = index[:10] + everything = index[:20] + + union = first.union(second, sort=sort) + if sort is None: + tm.assert_index_equal(union, everything.sort_values()) + assert tm.equalContents(union, everything) + + @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) + def test_union_sort_other_special(self, slice_): + # https://github.com/pandas-dev/pandas/issues/24959 + + idx = pd.Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + tm.assert_index_equal(other.union(idx), idx) + + # sort=False + tm.assert_index_equal(idx.union(other, sort=False), idx) + + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) + def test_union_sort_special_true(self, slice_): + # TODO decide on True behaviour + # sort=True + idx = pd.Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + + result = idx.union(other, sort=True) + expected = pd.Index([0, 1, 2]) + tm.assert_index_equal(result, expected) + + def test_union_sort_other_incomparable(self): + # https://github.com/pandas-dev/pandas/issues/24959 + idx = pd.Index([1, pd.Timestamp("2000")]) + # default (sort=None) + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1]) + + tm.assert_index_equal(result, idx) + + # sort=None + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1], sort=None) + tm.assert_index_equal(result, idx) + + # sort=False + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) + + @pytest.mark.xfail(reason="Not implemented") + def test_union_sort_other_incomparable_true(self): + # TODO decide on True behaviour + # sort=True + idx = pd.Index([1, pd.Timestamp("2000")]) + with pytest.raises(TypeError, match=".*"): + idx.union(idx[:1], sort=True) + + @pytest.mark.parametrize("klass", [np.array, Series, list]) + @pytest.mark.parametrize("sort", [None, False]) + def test_union_from_iterables(self, index, klass, sort): + # GH 10149 + first = index[5:20] + second = index[:10] + everything = index[:20] + + case = klass(second.values) + result = first.union(case, sort=sort) + if sort is None: + tm.assert_index_equal(result, everything.sort_values()) + assert tm.equalContents(result, everything) + + @pytest.mark.parametrize("sort", [None, False]) + def test_union_identity(self, index, sort): + first = index[5:20] + + union = first.union(first, sort=sort) + # i.e. identity is not preserved when sort is True + assert (union is first) is (not sort) + + # This should no longer be the same object, since [] is not consistent, + # both objects will be recast to dtype('O') + union = first.union([], sort=sort) + assert (union is first) is (not sort) + + union = Index([]).union(first, sort=sort) + assert (union is first) is (not sort) + + @pytest.mark.parametrize("first_list", [list("ba"), list()]) + @pytest.mark.parametrize("second_list", [list("ab"), list()]) + @pytest.mark.parametrize( + "first_name, second_name, expected_name", + [("A", "B", None), (None, "B", None), ("A", None, None)], + ) + @pytest.mark.parametrize("sort", [None, False]) + def test_union_name_preservation( + self, first_list, second_list, first_name, second_name, expected_name, sort + ): + first = Index(first_list, name=first_name) + second = Index(second_list, name=second_name) + union = first.union(second, sort=sort) + + vals = set(first_list).union(second_list) + + if sort is None and len(first_list) > 0 and len(second_list) > 0: + expected = Index(sorted(vals), name=expected_name) + tm.assert_index_equal(union, expected) + else: + expected = Index(vals, name=expected_name) + assert tm.equalContents(union, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_union_dt_as_obj(self, sort): + # TODO: Replace with fixturesult + index = self.create_index() + date_index = pd.date_range("2019-01-01", periods=10) + first_cat = index.union(date_index) + second_cat = index.union(index) + + if date_index.dtype == np.object_: + appended = np.append(index, date_index) + else: + appended = np.append(index, date_index.astype("O")) + + assert tm.equalContents(first_cat, appended) + assert tm.equalContents(second_cat, index) + tm.assert_contains_all(index, first_cat) + tm.assert_contains_all(index, second_cat) + tm.assert_contains_all(date_index, first_cat) + + @pytest.mark.parametrize( + "method", ["union", "intersection", "difference", "symmetric_difference"] + ) + def test_setops_disallow_true(self, method): + idx1 = pd.Index(["a", "b"]) + idx2 = pd.Index(["b", "c"]) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=True) + + def test_map_identity_mapping(self, indices): + # GH 12766 + tm.assert_index_equal(indices, indices.map(lambda x: x)) + + def test_map_with_tuples(self): + # GH 12766 + + # Test that returning a single tuple from an Index + # returns an Index. + index = tm.makeIntIndex(3) + result = tm.makeIntIndex(3).map(lambda x: (x,)) + expected = Index([(i,) for i in index]) + tm.assert_index_equal(result, expected) + + # Test that returning a tuple from a map of a single index + # returns a MultiIndex object. + result = index.map(lambda x: (x, x == 1)) + expected = MultiIndex.from_tuples([(i, i == 1) for i in index]) + tm.assert_index_equal(result, expected) + + def test_map_with_tuples_mi(self): + # Test that returning a single object from a MultiIndex + # returns an Index. + first_level = ["foo", "bar", "baz"] + multi_index = MultiIndex.from_tuples(zip(first_level, [1, 2, 3])) + reduced_index = multi_index.map(lambda x: x[0]) + tm.assert_index_equal(reduced_index, Index(first_level)) + + @pytest.mark.parametrize( + "attr", ["makeDateIndex", "makePeriodIndex", "makeTimedeltaIndex"] + ) + def test_map_tseries_indices_return_index(self, attr): + index = getattr(tm, attr)(10) + expected = Index([1] * 10) + result = index.map(lambda x: 1) + tm.assert_index_equal(expected, result) + + def test_map_tseries_indices_accsr_return_index(self): + date_index = tm.makeDateIndex(24, freq="h", name="hourly") + expected = Index(range(24), name="hourly") + tm.assert_index_equal(expected, date_index.map(lambda x: x.hour)) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index), + ], + ) + def test_map_dictlike_simple(self, mapper): + # GH 12756 + expected = Index(["foo", "bar", "baz"]) + index = tm.makeIntIndex(3) + result = index.map(mapper(expected.values, index)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [ + lambda values, index: {i: e for e, i in zip(values, index)}, + lambda values, index: pd.Series(values, index), + ], + ) + def test_map_dictlike(self, indices, mapper): + # GH 12756 + if isinstance(indices, CategoricalIndex): + # Tested in test_categorical + return + elif not indices.is_unique: + # Cannot map duplicated index + return + + if indices.empty: + # to match proper result coercion for uints + expected = Index([]) + else: + expected = Index(np.arange(len(indices), 0, -1)) + + result = indices.map(mapper(expected, indices)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "mapper", + [Series(["foo", 2.0, "baz"], index=[0, 2, -1]), {0: "foo", 2: 2.0, -1: "baz"}], + ) + def test_map_with_non_function_missing_values(self, mapper): + # GH 12756 + expected = Index([2.0, np.nan, "foo"]) + result = Index([2, 1, 0]).map(mapper) + + tm.assert_index_equal(expected, result) + + def test_map_na_exclusion(self): + index = Index([1.5, np.nan, 3, np.nan, 5]) + + result = index.map(lambda x: x * 2, na_action="ignore") + expected = index * 2 + tm.assert_index_equal(result, expected) + + def test_map_defaultdict(self): + index = Index([1, 2, 3]) + default_dict = defaultdict(lambda: "blank") + default_dict[1] = "stuff" + result = index.map(default_dict) + expected = Index(["stuff", "blank", "blank"]) + tm.assert_index_equal(result, expected) + + def test_append_multiple(self): + index = Index(["a", "b", "c", "d", "e", "f"]) + + foos = [index[:2], index[2:4], index[4:]] + result = foos[0].append(foos[1:]) + tm.assert_index_equal(result, index) + + # empty + result = index.append([]) + tm.assert_index_equal(result, index) + + @pytest.mark.parametrize("name,expected", [("foo", "foo"), ("bar", None)]) + def test_append_empty_preserve_name(self, name, expected): + left = Index([], name="foo") + right = Index([1, 2, 3], name=name) + + result = left.append(right) + assert result.name == expected + + @pytest.mark.parametrize("second_name,expected", [(None, None), ("name", "name")]) + @pytest.mark.parametrize("sort", [None, False]) + def test_difference_name_preservation(self, index, second_name, expected, sort): + first = index[5:20] + second = index[:10] + answer = index[10:20] + + first.name = "name" + second.name = second_name + result = first.difference(second, sort=sort) + + assert tm.equalContents(result, answer) + + if expected is None: + assert result.name is None + else: + assert result.name == expected + + @pytest.mark.parametrize("sort", [None, False]) + def test_difference_empty_arg(self, index, sort): + first = index[5:20] + first.name == "name" + result = first.difference([], sort) + + assert tm.equalContents(result, first) + assert result.name == first.name + + @pytest.mark.parametrize("sort", [None, False]) + def test_difference_identity(self, index, sort): + first = index[5:20] + first.name == "name" + result = first.difference(first, sort) + + assert len(result) == 0 + assert result.name == first.name + + @pytest.mark.parametrize("sort", [None, False]) + def test_difference_sort(self, index, sort): + first = index[5:20] + second = index[:10] + + result = first.difference(second, sort) + expected = index[10:20] + + if sort is None: + expected = expected.sort_values() + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_symmetric_difference(self, sort): + # smoke + index1 = Index([5, 2, 3, 4], name="index1") + index2 = Index([2, 3, 4, 1]) + result = index1.symmetric_difference(index2, sort=sort) + expected = Index([5, 1]) + assert tm.equalContents(result, expected) + assert result.name is None + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + # __xor__ syntax + expected = index1 ^ index2 + assert tm.equalContents(result, expected) + assert result.name is None + + @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) + def test_difference_incomparable(self, opname): + a = pd.Index([3, pd.Timestamp("2000"), 1]) + b = pd.Index([2, pd.Timestamp("1999"), 1]) + op = operator.methodcaller(opname, b) + + # sort=None, the default + result = op(a) + expected = pd.Index([3, pd.Timestamp("2000"), 2, pd.Timestamp("1999")]) + if opname == "difference": + expected = expected[:2] + tm.assert_index_equal(result, expected) + + # sort=False + op = operator.methodcaller(opname, b, sort=False) + result = op(a) + tm.assert_index_equal(result, expected) + + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"]) + def test_difference_incomparable_true(self, opname): + # TODO decide on True behaviour + # # sort=True, raises + a = pd.Index([3, pd.Timestamp("2000"), 1]) + b = pd.Index([2, pd.Timestamp("1999"), 1]) + op = operator.methodcaller(opname, b, sort=True) + + with pytest.raises(TypeError, match="Cannot compare"): + op(a) + + @pytest.mark.parametrize("sort", [None, False]) + def test_symmetric_difference_mi(self, sort): + index1 = MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])) + index2 = MultiIndex.from_tuples([("foo", 1), ("bar", 3)]) + result = index1.symmetric_difference(index2, sort=sort) + expected = MultiIndex.from_tuples([("bar", 2), ("baz", 3), ("bar", 3)]) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert tm.equalContents(result, expected) + + @pytest.mark.parametrize( + "index2,expected", + [ + (Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])), + (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0])), + ], + ) + @pytest.mark.parametrize("sort", [None, False]) + def test_symmetric_difference_missing(self, index2, expected, sort): + # GH 13514 change: {nan} - {nan} == {} + # (GH 6444, sorting of nans, is no longer an issue) + index1 = Index([1, np.nan, 2, 3]) + + result = index1.symmetric_difference(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_symmetric_difference_non_index(self, sort): + index1 = Index([1, 2, 3, 4], name="index1") + index2 = np.array([2, 3, 4, 5]) + expected = Index([1, 5]) + result = index1.symmetric_difference(index2, sort=sort) + assert tm.equalContents(result, expected) + assert result.name == "index1" + + result = index1.symmetric_difference(index2, result_name="new_name", sort=sort) + assert tm.equalContents(result, expected) + assert result.name == "new_name" + + @pytest.mark.parametrize("sort", [None, False]) + def test_difference_type(self, indices, sort): + # GH 20040 + # If taking difference of a set and itself, it + # needs to preserve the type of the index + if not indices.is_unique: + return + result = indices.difference(indices, sort=sort) + expected = indices.drop(indices) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_difference(self, indices, sort): + # GH 20040 + # Test that the intersection of an index with an + # empty index produces the same index as the difference + # of an index with itself. Test for all types + if not indices.is_unique: + return + inter = indices.intersection(indices.drop(indices)) + diff = indices.difference(indices, sort=sort) + tm.assert_index_equal(inter, diff) + + @pytest.mark.parametrize( + "index, expected", + [ + ("string", False), + ("bool", False), + ("categorical", False), + ("int", True), + ("datetime", False), + ("float", True), + ], + indirect=["index"], + ) + def test_is_numeric(self, index, expected): + assert index.is_numeric() is expected + + @pytest.mark.parametrize( + "index, expected", + [ + ("string", True), + ("bool", True), + ("categorical", False), + ("int", False), + ("datetime", False), + ("float", False), + ], + indirect=["index"], + ) + def test_is_object(self, index, expected): + assert index.is_object() is expected + + @pytest.mark.parametrize( + "index, expected", + [ + ("string", False), + ("bool", False), + ("categorical", False), + ("int", False), + ("datetime", True), + ("float", False), + ], + indirect=["index"], + ) + def test_is_all_dates(self, index, expected): + assert index.is_all_dates is expected + + def test_summary(self, indices): + self._check_method_works(Index._summary, indices) + + def test_summary_bug(self): + # GH3869` + ind = Index(["{other}%s", "~:{range}:0"], name="A") + result = ind._summary() + # shouldn't be formatted accidentally. + assert "~:{range}:0" in result + assert "{other}%s" in result + + def test_format(self, indices): + self._check_method_works(Index.format, indices) + + def test_format_bug(self): + # GH 14626 + # windows has different precision on datetime.datetime.now (it doesn't + # include us since the default for Timestamp shows these but Index + # formatting does not we are skipping) + now = datetime.now() + if not str(now).endswith("000"): + index = Index([now]) + formatted = index.format() + expected = [str(index[0])] + assert formatted == expected + + Index([]).format() + + @pytest.mark.parametrize("vals", [[1, 2.0 + 3.0j, 4.0], ["a", "b", "c"]]) + def test_format_missing(self, vals, nulls_fixture): + # 2845 + vals = list(vals) # Copy for each iteration + vals.append(nulls_fixture) + index = Index(vals) + + formatted = index.format() + expected = [str(index[0]), str(index[1]), str(index[2]), "NaN"] + + assert formatted == expected + assert index[3] is nulls_fixture + + def test_format_with_name_time_info(self): + # bug I fixed 12/20/2011 + dates = date_range("2011-01-01 04:00:00", periods=10, name="something") + + formatted = dates.format(name=True) + assert formatted[0] == "something" + + def test_format_datetime_with_time(self): + t = Index([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)]) + + result = t.format() + expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"] + assert len(result) == 2 + assert result == expected + + @pytest.mark.parametrize("op", ["any", "all"]) + def test_logical_compat(self, op): + index = self.create_index() + assert getattr(index, op)() == getattr(index.values, op)() + + def _check_method_works(self, method, index): + method(index) + + def test_get_indexer(self): + index1 = Index([1, 2, 3, 4, 5]) + index2 = Index([2, 4, 6]) + + r1 = index1.get_indexer(index2) + e1 = np.array([1, 3, -1], dtype=np.intp) + tm.assert_almost_equal(r1, e1) + + @pytest.mark.parametrize("reverse", [True, False]) + @pytest.mark.parametrize( + "expected,method", + [ + (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "pad"), + (np.array([-1, 0, 0, 1, 1], dtype=np.intp), "ffill"), + (np.array([0, 0, 1, 1, 2], dtype=np.intp), "backfill"), + (np.array([0, 0, 1, 1, 2], dtype=np.intp), "bfill"), + ], + ) + def test_get_indexer_methods(self, reverse, expected, method): + index1 = Index([1, 2, 3, 4, 5]) + index2 = Index([2, 4, 6]) + + if reverse: + index1 = index1[::-1] + expected = expected[::-1] + + result = index2.get_indexer(index1, method=method) + tm.assert_almost_equal(result, expected) + + def test_get_indexer_invalid(self): + # GH10411 + index = Index(np.arange(10)) + + with pytest.raises(ValueError, match="tolerance argument"): + index.get_indexer([1, 0], tolerance=1) + + with pytest.raises(ValueError, match="limit argument"): + index.get_indexer([1, 0], limit=1) + + @pytest.mark.parametrize( + "method, tolerance, indexer, expected", + [ + ("pad", None, [0, 5, 9], [0, 5, 9]), + ("backfill", None, [0, 5, 9], [0, 5, 9]), + ("nearest", None, [0, 5, 9], [0, 5, 9]), + ("pad", 0, [0, 5, 9], [0, 5, 9]), + ("backfill", 0, [0, 5, 9], [0, 5, 9]), + ("nearest", 0, [0, 5, 9], [0, 5, 9]), + ("pad", None, [0.2, 1.8, 8.5], [0, 1, 8]), + ("backfill", None, [0.2, 1.8, 8.5], [1, 2, 9]), + ("nearest", None, [0.2, 1.8, 8.5], [0, 2, 9]), + ("pad", 1, [0.2, 1.8, 8.5], [0, 1, 8]), + ("backfill", 1, [0.2, 1.8, 8.5], [1, 2, 9]), + ("nearest", 1, [0.2, 1.8, 8.5], [0, 2, 9]), + ("pad", 0.2, [0.2, 1.8, 8.5], [0, -1, -1]), + ("backfill", 0.2, [0.2, 1.8, 8.5], [-1, 2, -1]), + ("nearest", 0.2, [0.2, 1.8, 8.5], [0, 2, -1]), + ], + ) + def test_get_indexer_nearest(self, method, tolerance, indexer, expected): + index = Index(np.arange(10)) + + actual = index.get_indexer(indexer, method=method, tolerance=tolerance) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) + + @pytest.mark.parametrize("listtype", [list, tuple, Series, np.array]) + @pytest.mark.parametrize( + "tolerance, expected", + list( + zip( + [[0.3, 0.3, 0.1], [0.2, 0.1, 0.1], [0.1, 0.5, 0.5]], + [[0, 2, -1], [0, -1, -1], [-1, 2, 9]], + ) + ), + ) + def test_get_indexer_nearest_listlike_tolerance( + self, tolerance, expected, listtype + ): + index = Index(np.arange(10)) + + actual = index.get_indexer( + [0.2, 1.8, 8.5], method="nearest", tolerance=listtype(tolerance) + ) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) + + def test_get_indexer_nearest_error(self): + index = Index(np.arange(10)) + with pytest.raises(ValueError, match="limit argument"): + index.get_indexer([1, 0], method="nearest", limit=1) + + with pytest.raises(ValueError, match="tolerance size must match"): + index.get_indexer([1, 0], method="nearest", tolerance=[1, 2, 3]) + + @pytest.mark.parametrize( + "method,expected", + [("pad", [8, 7, 0]), ("backfill", [9, 8, 1]), ("nearest", [9, 7, 0])], + ) + def test_get_indexer_nearest_decreasing(self, method, expected): + index = Index(np.arange(10))[::-1] + + actual = index.get_indexer([0, 5, 9], method=method) + tm.assert_numpy_array_equal(actual, np.array([9, 4, 0], dtype=np.intp)) + + actual = index.get_indexer([0.2, 1.8, 8.5], method=method) + tm.assert_numpy_array_equal(actual, np.array(expected, dtype=np.intp)) + + @pytest.mark.parametrize( + "method,expected", + [ + ("pad", np.array([-1, 0, 1, 1], dtype=np.intp)), + ("backfill", np.array([0, 0, 1, -1], dtype=np.intp)), + ], + ) + def test_get_indexer_strings(self, method, expected): + index = pd.Index(["b", "c"]) + actual = index.get_indexer(["a", "b", "c", "d"], method=method) + + tm.assert_numpy_array_equal(actual, expected) + + def test_get_indexer_strings_raises(self): + index = pd.Index(["b", "c"]) + + msg = r"unsupported operand type\(s\) for -: 'str' and 'str'" + with pytest.raises(TypeError, match=msg): + index.get_indexer(["a", "b", "c", "d"], method="nearest") + + with pytest.raises(TypeError, match=msg): + index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2) + + with pytest.raises(TypeError, match=msg): + index.get_indexer( + ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2] + ) + + @pytest.mark.parametrize("idx_class", [Int64Index, RangeIndex, Float64Index]) + def test_get_indexer_numeric_index_boolean_target(self, idx_class): + # GH 16877 + + numeric_index = idx_class(RangeIndex((4))) + result = numeric_index.get_indexer([True, False, True]) + expected = np.array([-1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_with_NA_values( + self, unique_nulls_fixture, unique_nulls_fixture2 + ): + # GH 22332 + # check pairwise, that no pair of na values + # is mangled + if unique_nulls_fixture is unique_nulls_fixture2: + return # skip it, values are not unique + arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=np.object) + index = pd.Index(arr, dtype=np.object) + result = index.get_indexer( + [unique_nulls_fixture, unique_nulls_fixture2, "Unknown"] + ) + expected = np.array([0, 1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) + def test_get_loc(self, method): + index = pd.Index([0, 1, 2]) + assert index.get_loc(1, method=method) == 1 + + if method: + assert index.get_loc(1, method=method, tolerance=0) == 1 + + @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) + def test_get_loc_raises_bad_label(self, method): + index = pd.Index([0, 1, 2]) + if method: + msg = "not supported between" + else: + msg = "invalid key" + + with pytest.raises(TypeError, match=msg): + index.get_loc([1, 2], method=method) + + @pytest.mark.parametrize( + "method,loc", [("pad", 1), ("backfill", 2), ("nearest", 1)] + ) + def test_get_loc_tolerance(self, method, loc): + index = pd.Index([0, 1, 2]) + assert index.get_loc(1.1, method) == loc + assert index.get_loc(1.1, method, tolerance=1) == loc + + @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"]) + def test_get_loc_outside_tolerance_raises(self, method): + index = pd.Index([0, 1, 2]) + with pytest.raises(KeyError, match="1.1"): + index.get_loc(1.1, method, tolerance=0.05) + + def test_get_loc_bad_tolerance_raises(self): + index = pd.Index([0, 1, 2]) + with pytest.raises(ValueError, match="must be numeric"): + index.get_loc(1.1, "nearest", tolerance="invalid") + + def test_get_loc_tolerance_no_method_raises(self): + index = pd.Index([0, 1, 2]) + with pytest.raises(ValueError, match="tolerance .* valid if"): + index.get_loc(1.1, tolerance=1) + + def test_get_loc_raises_missized_tolerance(self): + index = pd.Index([0, 1, 2]) + with pytest.raises(ValueError, match="tolerance size must match"): + index.get_loc(1.1, "nearest", tolerance=[1, 1]) + + def test_get_loc_raises_object_nearest(self): + index = pd.Index(["a", "c"]) + with pytest.raises(TypeError, match="unsupported operand type"): + index.get_loc("a", method="nearest") + + def test_get_loc_raises_object_tolerance(self): + index = pd.Index(["a", "c"]) + with pytest.raises(TypeError, match="unsupported operand type"): + index.get_loc("a", method="pad", tolerance="invalid") + + @pytest.mark.parametrize("dtype", [int, float]) + def test_slice_locs(self, dtype): + index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) + n = len(index) + + assert index.slice_locs(start=2) == (2, n) + assert index.slice_locs(start=3) == (3, n) + assert index.slice_locs(3, 8) == (3, 6) + assert index.slice_locs(5, 10) == (3, n) + assert index.slice_locs(end=8) == (0, 6) + assert index.slice_locs(end=9) == (0, 7) + + # reversed + index2 = index[::-1] + assert index2.slice_locs(8, 2) == (2, 6) + assert index2.slice_locs(7, 3) == (2, 5) + + @pytest.mark.parametrize("dtype", [int, float]) + def test_slice_float_locs(self, dtype): + index = Index(np.array([0, 1, 2, 5, 6, 7, 9, 10], dtype=dtype)) + n = len(index) + assert index.slice_locs(5.0, 10.0) == (3, n) + assert index.slice_locs(4.5, 10.5) == (3, 8) + + index2 = index[::-1] + assert index2.slice_locs(8.5, 1.5) == (2, 6) + assert index2.slice_locs(10.5, -1) == (0, n) + + def test_slice_locs_dup(self): + index = Index(["a", "a", "b", "c", "d", "d"]) + assert index.slice_locs("a", "d") == (0, 6) + assert index.slice_locs(end="d") == (0, 6) + assert index.slice_locs("a", "c") == (0, 4) + assert index.slice_locs("b", "d") == (2, 6) + + index2 = index[::-1] + assert index2.slice_locs("d", "a") == (0, 6) + assert index2.slice_locs(end="a") == (0, 6) + assert index2.slice_locs("d", "b") == (0, 4) + assert index2.slice_locs("c", "a") == (2, 6) + + @pytest.mark.parametrize("dtype", [int, float]) + def test_slice_locs_dup_numeric(self, dtype): + index = Index(np.array([10, 12, 12, 14], dtype=dtype)) + assert index.slice_locs(12, 12) == (1, 3) + assert index.slice_locs(11, 13) == (1, 3) + + index2 = index[::-1] + assert index2.slice_locs(12, 12) == (1, 3) + assert index2.slice_locs(13, 11) == (1, 3) + + def test_slice_locs_na(self): + index = Index([np.nan, 1, 2]) + assert index.slice_locs(1) == (1, 3) + assert index.slice_locs(np.nan) == (0, 3) + + index = Index([0, np.nan, np.nan, 1, 2]) + assert index.slice_locs(np.nan) == (1, 5) + + def test_slice_locs_na_raises(self): + index = Index([np.nan, 1, 2]) + with pytest.raises(KeyError, match=""): + index.slice_locs(start=1.5) + + with pytest.raises(KeyError, match=""): + index.slice_locs(end=1.5) + + @pytest.mark.parametrize( + "in_slice,expected", + [ + (pd.IndexSlice[::-1], "yxdcb"), + (pd.IndexSlice["b":"y":-1], ""), # type: ignore + (pd.IndexSlice["b"::-1], "b"), # type: ignore + (pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore + (pd.IndexSlice[:"y":-1], "y"), # type: ignore + (pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore + (pd.IndexSlice["y"::-4], "yb"), # type: ignore + # absent labels + (pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore + (pd.IndexSlice[:"a":-2], "ydb"), # type: ignore + (pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore + (pd.IndexSlice["z"::-3], "yc"), # type: ignore + (pd.IndexSlice["m"::-1], "dcb"), # type: ignore + (pd.IndexSlice[:"m":-1], "yx"), # type: ignore + (pd.IndexSlice["a":"a":-1], ""), # type: ignore + (pd.IndexSlice["z":"z":-1], ""), # type: ignore + (pd.IndexSlice["m":"m":-1], ""), # type: ignore + ], + ) + def test_slice_locs_negative_step(self, in_slice, expected): + index = Index(list("bcdxy")) + + s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step) + result = index[s_start : s_stop : in_slice.step] + expected = pd.Index(list(expected)) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + def test_drop_by_str_label(self, index): + n = len(index) + drop = index[list(range(5, 10))] + dropped = index.drop(drop) + + expected = index[list(range(5)) + list(range(10, n))] + tm.assert_index_equal(dropped, expected) + + dropped = index.drop(index[0]) + expected = index[1:] + tm.assert_index_equal(dropped, expected) + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + @pytest.mark.parametrize("keys", [["foo", "bar"], ["1", "bar"]]) + def test_drop_by_str_label_raises_missing_keys(self, index, keys): + with pytest.raises(KeyError, match=""): + index.drop(keys) + + @pytest.mark.parametrize("index", ["string", "int", "float"], indirect=True) + def test_drop_by_str_label_errors_ignore(self, index): + n = len(index) + drop = index[list(range(5, 10))] + mixed = drop.tolist() + ["foo"] + dropped = index.drop(mixed, errors="ignore") + + expected = index[list(range(5)) + list(range(10, n))] + tm.assert_index_equal(dropped, expected) + + dropped = index.drop(["foo", "bar"], errors="ignore") + expected = index[list(range(n))] + tm.assert_index_equal(dropped, expected) + + def test_drop_by_numeric_label_loc(self): + # TODO: Parametrize numeric and str tests after self.strIndex fixture + index = Index([1, 2, 3]) + dropped = index.drop(1) + expected = Index([2, 3]) + + tm.assert_index_equal(dropped, expected) + + def test_drop_by_numeric_label_raises_missing_keys(self): + index = Index([1, 2, 3]) + with pytest.raises(KeyError, match=""): + index.drop([3, 4]) + + @pytest.mark.parametrize( + "key,expected", [(4, Index([1, 2, 3])), ([3, 4, 5], Index([1, 2]))] + ) + def test_drop_by_numeric_label_errors_ignore(self, key, expected): + index = Index([1, 2, 3]) + dropped = index.drop(key, errors="ignore") + + tm.assert_index_equal(dropped, expected) + + @pytest.mark.parametrize( + "values", + [["a", "b", ("c", "d")], ["a", ("c", "d"), "b"], [("c", "d"), "a", "b"]], + ) + @pytest.mark.parametrize("to_drop", [[("c", "d"), "a"], ["a", ("c", "d")]]) + def test_drop_tuple(self, values, to_drop): + # GH 18304 + index = pd.Index(values) + expected = pd.Index(["b"]) + + result = index.drop(to_drop) + tm.assert_index_equal(result, expected) + + removed = index.drop(to_drop[0]) + for drop_me in to_drop[1], [to_drop[1]]: + result = removed.drop(drop_me) + tm.assert_index_equal(result, expected) + + removed = index.drop(to_drop[1]) + msg = fr"\"\[{re.escape(to_drop[1].__repr__())}\] not found in axis\"" + for drop_me in to_drop[1], [to_drop[1]]: + with pytest.raises(KeyError, match=msg): + removed.drop(drop_me) + + @pytest.mark.parametrize( + "method,expected,sort", + [ + ( + "intersection", + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B")], + dtype=[("num", int), ("let", "a1")], + ), + False, + ), + ( + "intersection", + np.array( + [(1, "A"), (1, "B"), (2, "A"), (2, "B")], + dtype=[("num", int), ("let", "a1")], + ), + None, + ), + ( + "union", + np.array( + [(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")], + dtype=[("num", int), ("let", "a1")], + ), + None, + ), + ], + ) + def test_tuple_union_bug(self, method, expected, sort): + index1 = Index( + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B")], + dtype=[("num", int), ("let", "a1")], + ) + ) + index2 = Index( + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")], + dtype=[("num", int), ("let", "a1")], + ) + ) + + result = getattr(index1, method)(index2, sort=sort) + assert result.ndim == 1 + + expected = Index(expected) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "attr", + [ + "is_monotonic_increasing", + "is_monotonic_decreasing", + "_is_strictly_monotonic_increasing", + "_is_strictly_monotonic_decreasing", + ], + ) + def test_is_monotonic_incomparable(self, attr): + index = Index([5, datetime.now(), 7]) + assert not getattr(index, attr) + + def test_set_value_deprecated(self): + # GH 28621 + idx = self.create_index() + arr = np.array([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + idx.set_value(arr, idx[1], 80) + assert arr[1] == 80 + + @pytest.mark.parametrize( + "index", ["string", "int", "datetime", "timedelta"], indirect=True + ) + def test_get_value(self, index): + # TODO: Remove function? GH 19728 + values = np.random.randn(100) + value = index[67] + + tm.assert_almost_equal(index.get_value(values, value), values[67]) + + @pytest.mark.parametrize("values", [["foo", "bar", "quux"], {"foo", "bar", "quux"}]) + @pytest.mark.parametrize( + "index,expected", + [ + (Index(["qux", "baz", "foo", "bar"]), np.array([False, False, True, True])), + (Index([]), np.array([], dtype=bool)), # empty + ], + ) + def test_isin(self, values, index, expected): + result = index.isin(values) + tm.assert_numpy_array_equal(result, expected) + + def test_isin_nan_common_object(self, nulls_fixture, nulls_fixture2): + # Test cartesian product of null fixtures and ensure that we don't + # mangle the various types (save a corner case with PyPy) + + # all nans are the same + if ( + isinstance(nulls_fixture, float) + and isinstance(nulls_fixture2, float) + and math.isnan(nulls_fixture) + and math.isnan(nulls_fixture2) + ): + tm.assert_numpy_array_equal( + Index(["a", nulls_fixture]).isin([nulls_fixture2]), + np.array([False, True]), + ) + + elif nulls_fixture is nulls_fixture2: # should preserve NA type + tm.assert_numpy_array_equal( + Index(["a", nulls_fixture]).isin([nulls_fixture2]), + np.array([False, True]), + ) + + else: + tm.assert_numpy_array_equal( + Index(["a", nulls_fixture]).isin([nulls_fixture2]), + np.array([False, False]), + ) + + def test_isin_nan_common_float64(self, nulls_fixture): + if nulls_fixture is pd.NaT: + pytest.skip("pd.NaT not compatible with Float64Index") + + # Float64Index overrides isin, so must be checked separately + tm.assert_numpy_array_equal( + Float64Index([1.0, nulls_fixture]).isin([np.nan]), np.array([False, True]) + ) + + # we cannot compare NaT with NaN + tm.assert_numpy_array_equal( + Float64Index([1.0, nulls_fixture]).isin([pd.NaT]), np.array([False, False]) + ) + + @pytest.mark.parametrize("level", [0, -1]) + @pytest.mark.parametrize( + "index", + [ + Index(["qux", "baz", "foo", "bar"]), + # Float64Index overrides isin, so must be checked separately + Float64Index([1.0, 2.0, 3.0, 4.0]), + ], + ) + def test_isin_level_kwarg(self, level, index): + values = index.tolist()[-2:] + ["nonexisting"] + + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(expected, index.isin(values, level=level)) + + index.name = "foobar" + tm.assert_numpy_array_equal(expected, index.isin(values, level="foobar")) + + @pytest.mark.parametrize("level", [2, 10, -3]) + def test_isin_level_kwarg_bad_level_raises(self, level, indices): + index = indices + with pytest.raises(IndexError, match="Too many levels"): + index.isin([], level=level) + + @pytest.mark.parametrize("label", [1.0, "foobar", "xyzzy", np.nan]) + def test_isin_level_kwarg_bad_label_raises(self, label, indices): + index = indices + if isinstance(index, MultiIndex): + index = index.rename(["foo", "bar"]) + msg = f"'Level {label} not found'" + else: + index = index.rename("foo") + msg = fr"Requested level \({label}\) does not match index name \(foo\)" + with pytest.raises(KeyError, match=msg): + index.isin([], level=label) + + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) + def test_isin_empty(self, empty): + # see gh-16991 + index = Index(["a", "b"]) + expected = np.array([False, False]) + + result = index.isin(empty) + tm.assert_numpy_array_equal(expected, result) + + @pytest.mark.parametrize( + "values", + [ + [1, 2, 3, 4], + [1.0, 2.0, 3.0, 4.0], + [True, True, True, True], + ["foo", "bar", "baz", "qux"], + pd.date_range("2018-01-01", freq="D", periods=4), + ], + ) + def test_boolean_cmp(self, values): + index = Index(values) + result = index == values + expected = np.array([True, True, True, True], dtype=bool) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("name,level", [(None, 0), ("a", "a")]) + def test_get_level_values(self, index, name, level): + expected = index.copy() + if name: + expected.name = name + + result = expected.get_level_values(level) + tm.assert_index_equal(result, expected) + + def test_slice_keep_name(self): + index = Index(["a", "b"], name="asdf") + assert index.name == index[1:].name + + @pytest.mark.parametrize( + "index", ["unicode", "string", "datetime", "int", "float"], indirect=True + ) + def test_join_self(self, index, join_type): + joined = index.join(index, how=join_type) + assert index is joined + + @pytest.mark.parametrize("method", ["strip", "rstrip", "lstrip"]) + def test_str_attribute(self, method): + # GH9068 + index = Index([" jack", "jill ", " jesse ", "frank"]) + expected = Index([getattr(str, method)(x) for x in index.values]) + + result = getattr(index.str, method)() + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + Index(range(5)), + tm.makeDateIndex(10), + MultiIndex.from_tuples([("foo", "1"), ("bar", "3")]), + period_range(start="2000", end="2010", freq="A"), + ], + ) + def test_str_attribute_raises(self, index): + with pytest.raises(AttributeError, match="only use .str accessor"): + index.str.repeat(2) + + @pytest.mark.parametrize( + "expand,expected", + [ + (None, Index([["a", "b", "c"], ["d", "e"], ["f"]])), + (False, Index([["a", "b", "c"], ["d", "e"], ["f"]])), + ( + True, + MultiIndex.from_tuples( + [("a", "b", "c"), ("d", "e", np.nan), ("f", np.nan, np.nan)] + ), + ), + ], + ) + def test_str_split(self, expand, expected): + index = Index(["a b c", "d e", "f"]) + if expand is not None: + result = index.str.split(expand=expand) + else: + result = index.str.split() + + tm.assert_index_equal(result, expected) + + def test_str_bool_return(self): + # test boolean case, should return np.array instead of boolean Index + index = Index(["a1", "a2", "b1", "b2"]) + result = index.str.startswith("a") + expected = np.array([True, True, False, False]) + + tm.assert_numpy_array_equal(result, expected) + assert isinstance(result, np.ndarray) + + def test_str_bool_series_indexing(self): + index = Index(["a1", "a2", "b1", "b2"]) + s = Series(range(4), index=index) + + result = s[s.index.str.startswith("a")] + expected = Series(range(2), index=["a1", "a2"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "index,expected", [(Index(list("abcd")), True), (Index(range(4)), False)] + ) + def test_tab_completion(self, index, expected): + # GH 9910 + result = "str" in dir(index) + assert result == expected + + def test_indexing_doesnt_change_class(self): + index = Index([1, 2, 3, "a", "b", "c"]) + + assert index[1:3].identical(pd.Index([2, 3], dtype=np.object_)) + assert index[[0, 1]].identical(pd.Index([1, 2], dtype=np.object_)) + + def test_outer_join_sort(self): + left_index = Index(np.random.permutation(15)) + right_index = tm.makeDateIndex(10) + + with tm.assert_produces_warning(RuntimeWarning): + result = left_index.join(right_index, how="outer") + + # right_index in this case because DatetimeIndex has join precedence + # over Int64Index + with tm.assert_produces_warning(RuntimeWarning): + expected = right_index.astype(object).union(left_index.astype(object)) + + tm.assert_index_equal(result, expected) + + def test_nan_first_take_datetime(self): + index = Index([pd.NaT, Timestamp("20130101"), Timestamp("20130102")]) + result = index.take([-1, 0, 1]) + expected = Index([index[-1], index[0], index[1]]) + tm.assert_index_equal(result, expected) + + def test_take_fill_value(self): + # GH 12631 + index = pd.Index(list("ABC"), name="xxx") + result = index.take(np.array([1, 0, -1])) + expected = pd.Index(list("BAC"), name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + result = index.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.Index(["B", "A", np.nan], name="xxx") + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = index.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.Index(["B", "A", "C"], name="xxx") + tm.assert_index_equal(result, expected) + + def test_take_fill_value_none_raises(self): + index = pd.Index(list("ABC"), name="xxx") + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + + with pytest.raises(ValueError, match=msg): + index.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + index.take(np.array([1, 0, -5]), fill_value=True) + + def test_take_bad_bounds_raises(self): + index = pd.Index(list("ABC"), name="xxx") + with pytest.raises(IndexError, match="out of bounds"): + index.take(np.array([1, -5])) + + @pytest.mark.parametrize("name", [None, "foobar"]) + @pytest.mark.parametrize( + "labels", + [ + [], + np.array([]), + ["A", "B", "C"], + ["C", "B", "A"], + np.array(["A", "B", "C"]), + np.array(["C", "B", "A"]), + # Must preserve name even if dtype changes + pd.date_range("20130101", periods=3).values, + pd.date_range("20130101", periods=3).tolist(), + ], + ) + def test_reindex_preserves_name_if_target_is_list_or_ndarray(self, name, labels): + # GH6552 + index = pd.Index([0, 1, 2]) + index.name = name + assert index.reindex(labels)[0].name == name + + @pytest.mark.parametrize("labels", [[], np.array([]), np.array([], dtype=np.int64)]) + def test_reindex_preserves_type_if_target_is_empty_list_or_array(self, labels): + # GH7774 + index = pd.Index(list("abc")) + assert index.reindex(labels)[0].dtype.type == np.object_ + + @pytest.mark.parametrize( + "labels,dtype", + [ + (pd.Int64Index([]), np.int64), + (pd.Float64Index([]), np.float64), + (pd.DatetimeIndex([]), np.datetime64), + ], + ) + def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self, labels, dtype): + # GH7774 + index = pd.Index(list("abc")) + assert index.reindex(labels)[0].dtype.type == dtype + + def test_reindex_no_type_preserve_target_empty_mi(self): + index = pd.Index(list("abc")) + result = index.reindex( + pd.MultiIndex([pd.Int64Index([]), pd.Float64Index([])], [[], []]) + )[0] + assert result.levels[0].dtype.type == np.int64 + assert result.levels[1].dtype.type == np.float64 + + def test_groupby(self): + index = Index(range(5)) + result = index.groupby(np.array([1, 1, 2, 2, 2])) + expected = {1: pd.Index([0, 1]), 2: pd.Index([2, 3, 4])} + + tm.assert_dict_equal(result, expected) + + @pytest.mark.parametrize( + "mi,expected", + [ + (MultiIndex.from_tuples([(1, 2), (4, 5)]), np.array([True, True])), + (MultiIndex.from_tuples([(1, 2), (4, 6)]), np.array([True, False])), + ], + ) + def test_equals_op_multiindex(self, mi, expected): + # GH9785 + # test comparisons of multiindex + df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) + + result = df.index == mi + tm.assert_numpy_array_equal(result, expected) + + def test_equals_op_multiindex_identify(self): + df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) + + result = df.index == df.index + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]), + Index(["foo", "bar", "baz"]), + ], + ) + def test_equals_op_mismatched_multiindex_raises(self, index): + df = pd.read_csv(StringIO("a,b,c\n1,2,3\n4,5,6"), index_col=[0, 1]) + + with pytest.raises(ValueError, match="Lengths must match"): + df.index == index + + def test_equals_op_index_vs_mi_same_length(self): + mi = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)]) + index = Index(["foo", "bar", "baz"]) + + result = mi == index + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("dt_conv", [pd.to_datetime, pd.to_timedelta]) + def test_dt_conversion_preserves_name(self, dt_conv): + # GH 10875 + index = pd.Index(["01:02:03", "01:02:04"], name="label") + assert index.name == dt_conv(index).name + + @pytest.mark.parametrize( + "index,expected", + [ + # ASCII + # short + ( + pd.Index(["a", "bb", "ccc"]), + """Index(['a', 'bb', 'ccc'], dtype='object')""", + ), + # multiple lines + ( + pd.Index(["a", "bb", "ccc"] * 10), + """\ +Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', + 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', + 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], + dtype='object')""", + ), + # truncated + ( + pd.Index(["a", "bb", "ccc"] * 100), + """\ +Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', + ... + 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], + dtype='object', length=300)""", + ), + # Non-ASCII + # short + ( + pd.Index(["あ", "いい", "ううう"]), + """Index(['あ', 'いい', 'ううう'], dtype='object')""", + ), + # multiple lines + ( + pd.Index(["あ", "いい", "ううう"] * 10), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " + "'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " + "'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう'],\n" + " dtype='object')" + ), + ), + # truncated + ( + pd.Index(["あ", "いい", "ううう"] * 100), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', " + "'あ', 'いい', 'ううう', 'あ',\n" + " ...\n" + " 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう'],\n" + " dtype='object', length=300)" + ), + ), + ], + ) + def test_string_index_repr(self, index, expected): + result = repr(index) + assert result == expected + + @pytest.mark.parametrize( + "index,expected", + [ + # short + ( + pd.Index(["あ", "いい", "ううう"]), + ("Index(['あ', 'いい', 'ううう'], dtype='object')"), + ), + # multiple lines + ( + pd.Index(["あ", "いい", "ううう"] * 10), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ', 'いい', 'ううう'],\n" + " dtype='object')" + "" + ), + ), + # truncated + ( + pd.Index(["あ", "いい", "ううう"] * 100), + ( + "Index(['あ', 'いい', 'ううう', 'あ', 'いい', " + "'ううう', 'あ', 'いい', 'ううう',\n" + " 'あ',\n" + " ...\n" + " 'ううう', 'あ', 'いい', 'ううう', 'あ', " + "'いい', 'ううう', 'あ', 'いい',\n" + " 'ううう'],\n" + " dtype='object', length=300)" + ), + ), + ], + ) + def test_string_index_repr_with_unicode_option(self, index, expected): + # Enable Unicode option ----------------------------------------- + with cf.option_context("display.unicode.east_asian_width", True): + result = repr(index) + assert result == expected + + def test_cached_properties_not_settable(self): + index = pd.Index([1, 2, 3]) + with pytest.raises(AttributeError, match="Can't set attribute"): + index.is_unique = False + + @async_mark() + async def test_tab_complete_warning(self, ip): + # https://github.com/pandas-dev/pandas/issues/16409 + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + code = "import pandas as pd; idx = pd.Index([1, 2])" + await ip.run_code(code) + + # GH 31324 newer jedi version raises Deprecation warning + import jedi + + if jedi.__version__ < "0.16.0": + warning = tm.assert_produces_warning(None) + else: + warning = tm.assert_produces_warning( + DeprecationWarning, check_stacklevel=False + ) + with warning: + with provisionalcompleter("ignore"): + list(ip.Completer.completions("idx.", 4)) + + def test_contains_method_removed(self, indices): + # GH#30103 method removed for all types except IntervalIndex + if isinstance(indices, pd.IntervalIndex): + indices.contains(1) + else: + with pytest.raises(AttributeError): + indices.contains(1) + + +class TestMixedIntIndex(Base): + # Mostly the tests from common.py for which the results differ + # in py2 and py3 because ints and strings are uncomparable in py3 + # (GH 13514) + + _holder = Index + + @pytest.fixture(params=[[0, "a", 1, "b", 2, "c"]], ids=["mixedIndex"]) + def indices(self, request): + return Index(request.param) + + def create_index(self): + return Index([0, "a", 1, "b", 2, "c"]) + + def test_argsort(self): + index = self.create_index() + with pytest.raises(TypeError, match="'>|<' not supported"): + index.argsort() + + def test_numpy_argsort(self): + index = self.create_index() + with pytest.raises(TypeError, match="'>|<' not supported"): + np.argsort(index) + + def test_copy_name(self): + # Check that "name" argument passed at initialization is honoured + # GH12309 + index = self.create_index() + + first = type(index)(index, copy=True, name="mario") + second = type(first)(first, copy=False) + + # Even though "copy=False", we want a new object. + assert first is not second + tm.assert_index_equal(first, second) + + assert first.name == "mario" + assert second.name == "mario" + + s1 = Series(2, index=first) + s2 = Series(3, index=second[:-1]) + + s3 = s1 * s2 + + assert s3.index.name == "mario" + + def test_copy_name2(self): + # Check that adding a "name" parameter to the copy is honored + # GH14302 + index = pd.Index([1, 2], name="MyName") + index1 = index.copy() + + tm.assert_index_equal(index, index1) + + index2 = index.copy(name="NewName") + tm.assert_index_equal(index, index2, check_names=False) + assert index.name == "MyName" + assert index2.name == "NewName" + + index3 = index.copy(names=["NewName"]) + tm.assert_index_equal(index, index3, check_names=False) + assert index.name == "MyName" + assert index.names == ["MyName"] + assert index3.name == "NewName" + assert index3.names == ["NewName"] + + def test_union_base(self): + index = self.create_index() + first = index[3:] + second = index[:5] + + result = first.union(second) + + expected = Index([0, 1, 2, "a", "b", "c"]) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [np.array, Series, list]) + def test_union_different_type_base(self, klass): + # GH 10149 + index = self.create_index() + first = index[3:] + second = index[:5] + + result = first.union(klass(second.values)) + + assert tm.equalContents(result, index) + + def test_unique_na(self): + idx = pd.Index([2, np.nan, 2, 1], name="my_index") + expected = pd.Index([2, np.nan, 1], name="my_index") + result = idx.unique() + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_base(self, sort): + # (same results for py2 and py3 but sortedness not tested elsewhere) + index = self.create_index() + first = index[:5] + second = index[:3] + + expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1]) + result = first.intersection(second, sort=sort) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("klass", [np.array, Series, list]) + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_different_type_base(self, klass, sort): + # GH 10149 + index = self.create_index() + first = index[:5] + second = index[:3] + + result = first.intersection(klass(second.values), sort=sort) + assert tm.equalContents(result, second) + + @pytest.mark.parametrize("sort", [None, False]) + def test_difference_base(self, sort): + # (same results for py2 and py3 but sortedness not tested elsewhere) + index = self.create_index() + first = index[:4] + second = index[3:] + + result = first.difference(second, sort) + expected = Index([0, "a", 1]) + if sort is None: + expected = Index(safe_sort(expected)) + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self): + # (same results for py2 and py3 but sortedness not tested elsewhere) + index = self.create_index() + first = index[:4] + second = index[3:] + + result = first.symmetric_difference(second) + expected = Index([0, 1, 2, "a", "c"]) + tm.assert_index_equal(result, expected) + + def test_logical_compat(self): + index = self.create_index() + assert index.all() == index.values.all() + assert index.any() == index.values.any() + + @pytest.mark.parametrize("how", ["any", "all"]) + @pytest.mark.parametrize("dtype", [None, object, "category"]) + @pytest.mark.parametrize( + "vals,expected", + [ + ([1, 2, 3], [1, 2, 3]), + ([1.0, 2.0, 3.0], [1.0, 2.0, 3.0]), + ([1.0, 2.0, np.nan, 3.0], [1.0, 2.0, 3.0]), + (["A", "B", "C"], ["A", "B", "C"]), + (["A", np.nan, "B", "C"], ["A", "B", "C"]), + ], + ) + def test_dropna(self, how, dtype, vals, expected): + # GH 6194 + index = pd.Index(vals, dtype=dtype) + result = index.dropna(how=how) + expected = pd.Index(expected, dtype=dtype) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("how", ["any", "all"]) + @pytest.mark.parametrize( + "index,expected", + [ + ( + pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]), + pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]), + ), + ( + pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", pd.NaT]), + pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"]), + ), + ( + pd.TimedeltaIndex(["1 days", "2 days", "3 days"]), + pd.TimedeltaIndex(["1 days", "2 days", "3 days"]), + ), + ( + pd.TimedeltaIndex([pd.NaT, "1 days", "2 days", "3 days", pd.NaT]), + pd.TimedeltaIndex(["1 days", "2 days", "3 days"]), + ), + ( + pd.PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"), + pd.PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"), + ), + ( + pd.PeriodIndex(["2012-02", "2012-04", "NaT", "2012-05"], freq="M"), + pd.PeriodIndex(["2012-02", "2012-04", "2012-05"], freq="M"), + ), + ], + ) + def test_dropna_dt_like(self, how, index, expected): + result = index.dropna(how=how) + tm.assert_index_equal(result, expected) + + def test_dropna_invalid_how_raises(self): + msg = "invalid how option: xxx" + with pytest.raises(ValueError, match=msg): + pd.Index([1, 2, 3]).dropna(how="xxx") + + def test_get_combined_index(self): + result = _get_combined_index([]) + expected = Index([]) + tm.assert_index_equal(result, expected) + + def test_repeat(self): + repeats = 2 + index = pd.Index([1, 2, 3]) + expected = pd.Index([1, 1, 2, 2, 3, 3]) + + result = index.repeat(repeats) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + pd.Index([np.nan]), + pd.Index([np.nan, 1]), + pd.Index([1, 2, np.nan]), + pd.Index(["a", "b", np.nan]), + pd.to_datetime(["NaT"]), + pd.to_datetime(["NaT", "2000-01-01"]), + pd.to_datetime(["2000-01-01", "NaT", "2000-01-02"]), + pd.to_timedelta(["1 day", "NaT"]), + ], + ) + def test_is_monotonic_na(self, index): + assert index.is_monotonic_increasing is False + assert index.is_monotonic_decreasing is False + assert index._is_strictly_monotonic_increasing is False + assert index._is_strictly_monotonic_decreasing is False + + def test_repr_summary(self): + with cf.option_context("display.max_seq_items", 10): + result = repr(pd.Index(np.arange(1000))) + assert len(result) < 200 + assert "..." in result + + @pytest.mark.parametrize("klass", [Series, DataFrame]) + def test_int_name_format(self, klass): + index = Index(["a", "b", "c"], name=0) + result = klass(list(range(3)), index=index) + assert "0" in repr(result) + + def test_print_unicode_columns(self): + df = pd.DataFrame({"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + repr(df.columns) # should not raise UnicodeDecodeError + + def test_str_to_bytes_raises(self): + # GH 26447 + index = Index([str(x) for x in range(10)]) + msg = "^'str' object cannot be interpreted as an integer$" + with pytest.raises(TypeError, match=msg): + bytes(index) + + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + index1 = Index(dt_dates, dtype=object) + index2 = Index(["aa"], dtype=object) + result = index2.intersection(index1) + + expected = Index([], dtype=object) + tm.assert_index_equal(result, expected) + + def test_index_repr_bool_nan(self): + # GH32146 + arr = Index([True, False, np.nan], dtype=object) + exp1 = arr.format() + out1 = ["True", "False", "NaN"] + assert out1 == exp1 + + exp2 = repr(arr) + out2 = "Index([True, False, nan], dtype='object')" + assert out2 == exp2 + + +class TestIndexUtils: + @pytest.mark.parametrize( + "data, names, expected", + [ + ([[1, 2, 3]], None, Index([1, 2, 3])), + ([[1, 2, 3]], ["name"], Index([1, 2, 3], name="name")), + ( + [["a", "a"], ["c", "d"]], + None, + MultiIndex([["a"], ["c", "d"]], [[0, 0], [0, 1]]), + ), + ( + [["a", "a"], ["c", "d"]], + ["L1", "L2"], + MultiIndex([["a"], ["c", "d"]], [[0, 0], [0, 1]], names=["L1", "L2"]), + ), + ], + ) + def test_ensure_index_from_sequences(self, data, names, expected): + result = ensure_index_from_sequences(data, names) + tm.assert_index_equal(result, expected) + + def test_ensure_index_mixed_closed_intervals(self): + # GH27172 + intervals = [ + pd.Interval(0, 1, closed="left"), + pd.Interval(1, 2, closed="right"), + pd.Interval(2, 3, closed="neither"), + pd.Interval(3, 4, closed="both"), + ] + result = ensure_index(intervals) + expected = Index(intervals, dtype=object) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "opname", + [ + "eq", + "ne", + "le", + "lt", + "ge", + "gt", + "add", + "radd", + "sub", + "rsub", + "mul", + "rmul", + "truediv", + "rtruediv", + "floordiv", + "rfloordiv", + "pow", + "rpow", + "mod", + "divmod", + ], +) +def test_generated_op_names(opname, indices): + if isinstance(indices, ABCIndex) and opname == "rsub": + # pd.Index.__rsub__ does not exist; though the method does exist + # for subclasses. see GH#19723 + return + opname = f"__{opname}__" + method = getattr(indices, opname) + assert method.__name__ == opname + + +@pytest.mark.parametrize("index_maker", tm.index_subclass_makers_generator()) +def test_index_subclass_constructor_wrong_kwargs(index_maker): + # GH #19348 + with pytest.raises(TypeError, match="unexpected keyword argument"): + index_maker(foo="bar") + + +def test_deprecated_fastpath(): + msg = "[Uu]nexpected keyword argument" + with pytest.raises(TypeError, match=msg): + pd.Index(np.array(["a", "b"], dtype=object), name="test", fastpath=True) + + with pytest.raises(TypeError, match=msg): + pd.Int64Index(np.array([1, 2, 3], dtype="int64"), name="test", fastpath=True) + + with pytest.raises(TypeError, match=msg): + pd.RangeIndex(0, 5, 2, name="test", fastpath=True) + + with pytest.raises(TypeError, match=msg): + pd.CategoricalIndex(["a", "b", "c"], name="test", fastpath=True) + + +def test_shape_of_invalid_index(): + # Currently, it is possible to create "invalid" index objects backed by + # a multi-dimensional array (see https://github.com/pandas-dev/pandas/issues/27125 + # about this). However, as long as this is not solved in general,this test ensures + # that the returned shape is consistent with this underlying array for + # compat with matplotlib (see https://github.com/pandas-dev/pandas/issues/27775) + idx = pd.Index([0, 1, 2, 3]) + with tm.assert_produces_warning(DeprecationWarning): + # GH#30588 multi-dimensional indexing deprecated + assert idx[:, None].shape == (4, 1) + + +def test_validate_1d_input(): + # GH#27125 check that we do not have >1-dimensional input + msg = "Index data must be 1-dimensional" + + arr = np.arange(8).reshape(2, 2, 2) + with pytest.raises(ValueError, match=msg): + pd.Index(arr) + + with pytest.raises(ValueError, match=msg): + pd.Float64Index(arr.astype(np.float64)) + + with pytest.raises(ValueError, match=msg): + pd.Int64Index(arr.astype(np.int64)) + + with pytest.raises(ValueError, match=msg): + pd.UInt64Index(arr.astype(np.uint64)) + + df = pd.DataFrame(arr.reshape(4, 2)) + with pytest.raises(ValueError, match=msg): + pd.Index(df) + + # GH#13601 trying to assign a multi-dimensional array to an index is not + # allowed + ser = pd.Series(0, range(4)) + with pytest.raises(ValueError, match=msg): + ser.index = np.array([[2, 3]] * 4) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py new file mode 100644 index 00000000..7e302333 --- /dev/null +++ b/pandas/tests/indexes/test_common.py @@ -0,0 +1,354 @@ +""" +Collection of tests asserting things that should be true for +any index subclass. Makes use of the `indices` fixture defined +in pandas/tests/indexes/conftest.py. +""" +import re + +import numpy as np +import pytest + +from pandas._libs.tslibs import iNaT + +from pandas.core.dtypes.common import needs_i8_conversion + +import pandas as pd +from pandas import CategoricalIndex, MultiIndex, RangeIndex +import pandas._testing as tm + + +class TestCommon: + def test_droplevel(self, indices): + # GH 21115 + if isinstance(indices, MultiIndex): + # Tested separately in test_multi.py + return + + assert indices.droplevel([]).equals(indices) + + for level in indices.name, [indices.name]: + if isinstance(indices.name, tuple) and level is indices.name: + # GH 21121 : droplevel with tuple name + continue + with pytest.raises(ValueError): + indices.droplevel(level) + + for level in "wrong", ["wrong"]: + with pytest.raises( + KeyError, + match=r"'Requested level \(wrong\) does not match index name \(None\)'", + ): + indices.droplevel(level) + + def test_constructor_non_hashable_name(self, indices): + # GH 20527 + + if isinstance(indices, MultiIndex): + pytest.skip("multiindex handled in test_multi.py") + + message = "Index.name must be a hashable type" + renamed = [["1"]] + + # With .rename() + with pytest.raises(TypeError, match=message): + indices.rename(name=renamed) + + # With .set_names() + with pytest.raises(TypeError, match=message): + indices.set_names(names=renamed) + + def test_constructor_unwraps_index(self, indices): + if isinstance(indices, pd.MultiIndex): + raise pytest.skip("MultiIndex has no ._data") + a = indices + b = type(a)(a) + tm.assert_equal(a._data, b._data) + + @pytest.mark.parametrize("itm", [101, "no_int"]) + # FutureWarning from non-tuple sequence of nd indexing + @pytest.mark.filterwarnings("ignore::FutureWarning") + def test_getitem_error(self, indices, itm): + with pytest.raises(IndexError): + indices[itm] + + @pytest.mark.parametrize( + "fname, sname, expected_name", + [ + ("A", "A", "A"), + ("A", "B", None), + ("A", None, None), + (None, "B", None), + (None, None, None), + ], + ) + def test_corner_union(self, indices, fname, sname, expected_name): + # GH 9943 9862 + # Test unions with various name combinations + # Do not test MultiIndex or repeats + + if isinstance(indices, MultiIndex) or not indices.is_unique: + pytest.skip("Not for MultiIndex or repeated indices") + + # Test copy.union(copy) + first = indices.copy().set_names(fname) + second = indices.copy().set_names(sname) + union = first.union(second) + expected = indices.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test copy.union(empty) + first = indices.copy().set_names(fname) + second = indices.drop(indices).set_names(sname) + union = first.union(second) + expected = indices.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test empty.union(copy) + first = indices.drop(indices).set_names(fname) + second = indices.copy().set_names(sname) + union = first.union(second) + expected = indices.copy().set_names(expected_name) + tm.assert_index_equal(union, expected) + + # Test empty.union(empty) + first = indices.drop(indices).set_names(fname) + second = indices.drop(indices).set_names(sname) + union = first.union(second) + expected = indices.drop(indices).set_names(expected_name) + tm.assert_index_equal(union, expected) + + def test_to_flat_index(self, indices): + # 22866 + if isinstance(indices, MultiIndex): + pytest.skip("Separate expectation for MultiIndex") + + result = indices.to_flat_index() + tm.assert_index_equal(result, indices) + + def test_wrong_number_names(self, indices): + with pytest.raises(ValueError, match="^Length"): + indices.names = ["apple", "banana", "carrot"] + + def test_set_name_methods(self, indices): + new_name = "This is the new name for this index" + + # don't tests a MultiIndex here (as its tested separated) + if isinstance(indices, MultiIndex): + pytest.skip("Skip check for MultiIndex") + original_name = indices.name + new_ind = indices.set_names([new_name]) + assert new_ind.name == new_name + assert indices.name == original_name + res = indices.rename(new_name, inplace=True) + + # should return None + assert res is None + assert indices.name == new_name + assert indices.names == [new_name] + # FIXME: dont leave commented-out + # with pytest.raises(TypeError, match="list-like"): + # # should still fail even if it would be the right length + # ind.set_names("a") + with pytest.raises(ValueError, match="Level must be None"): + indices.set_names("a", level=0) + + # rename in place just leaves tuples and other containers alone + name = ("A", "B") + indices.rename(name, inplace=True) + assert indices.name == name + assert indices.names == [name] + + def test_hash_error(self, indices): + index = indices + with pytest.raises( + TypeError, match=f"unhashable type: '{type(index).__name__}'" + ): + hash(indices) + + def test_copy_and_deepcopy(self, indices): + from copy import copy, deepcopy + + if isinstance(indices, MultiIndex): + pytest.skip("Skip check for MultiIndex") + + for func in (copy, deepcopy): + idx_copy = func(indices) + assert idx_copy is not indices + assert idx_copy.equals(indices) + + new_copy = indices.copy(deep=True, name="banana") + assert new_copy.name == "banana" + + def test_unique(self, indices): + # don't test a MultiIndex here (as its tested separated) + # don't test a CategoricalIndex because categories change (GH 18291) + if isinstance(indices, (MultiIndex, CategoricalIndex)): + pytest.skip("Skip check for MultiIndex/CategoricalIndex") + + # GH 17896 + expected = indices.drop_duplicates() + for level in 0, indices.name, None: + result = indices.unique(level=level) + tm.assert_index_equal(result, expected) + + msg = "Too many levels: Index has only 1 level, not 4" + with pytest.raises(IndexError, match=msg): + indices.unique(level=3) + + msg = ( + fr"Requested level \(wrong\) does not match index name " + fr"\({re.escape(indices.name.__repr__())}\)" + ) + with pytest.raises(KeyError, match=msg): + indices.unique(level="wrong") + + def test_get_unique_index(self, indices): + # MultiIndex tested separately + if not len(indices) or isinstance(indices, MultiIndex): + pytest.skip("Skip check for empty Index and MultiIndex") + + idx = indices[[0] * 5] + idx_unique = indices[[0]] + + # We test against `idx_unique`, so first we make sure it's unique + # and doesn't contain nans. + assert idx_unique.is_unique is True + try: + assert idx_unique.hasnans is False + except NotImplementedError: + pass + + for dropna in [False, True]: + result = idx._get_unique_index(dropna=dropna) + tm.assert_index_equal(result, idx_unique) + + # nans: + if not indices._can_hold_na: + pytest.skip("Skip na-check if index cannot hold na") + + if needs_i8_conversion(indices): + vals = indices.asi8[[0] * 5] + vals[0] = iNaT + else: + vals = indices.values[[0] * 5] + vals[0] = np.nan + + vals_unique = vals[:2] + idx_nan = indices._shallow_copy(vals) + idx_unique_nan = indices._shallow_copy(vals_unique) + assert idx_unique_nan.is_unique is True + + assert idx_nan.dtype == indices.dtype + assert idx_unique_nan.dtype == indices.dtype + + for dropna, expected in zip([False, True], [idx_unique_nan, idx_unique]): + for i in [idx_nan, idx_unique_nan]: + result = i._get_unique_index(dropna=dropna) + tm.assert_index_equal(result, expected) + + def test_sort(self, indices): + msg = "cannot sort an Index object in-place, use sort_values instead" + with pytest.raises(TypeError, match=msg): + indices.sort() + + def test_mutability(self, indices): + if not len(indices): + pytest.skip("Skip check for empty Index") + msg = "Index does not support mutable operations" + with pytest.raises(TypeError, match=msg): + indices[0] = indices[0] + + def test_view(self, indices): + assert indices.view().name == indices.name + + def test_compat(self, indices): + assert indices.tolist() == list(indices) + + def test_searchsorted_monotonic(self, indices): + # GH17271 + # not implemented for tuple searches in MultiIndex + # or Intervals searches in IntervalIndex + if isinstance(indices, (MultiIndex, pd.IntervalIndex)): + pytest.skip("Skip check for MultiIndex/IntervalIndex") + + # nothing to test if the index is empty + if indices.empty: + pytest.skip("Skip check for empty Index") + value = indices[0] + + # determine the expected results (handle dupes for 'right') + expected_left, expected_right = 0, (indices == value).argmin() + if expected_right == 0: + # all values are the same, expected_right should be length + expected_right = len(indices) + + # test _searchsorted_monotonic in all cases + # test searchsorted only for increasing + if indices.is_monotonic_increasing: + ssm_left = indices._searchsorted_monotonic(value, side="left") + assert expected_left == ssm_left + + ssm_right = indices._searchsorted_monotonic(value, side="right") + assert expected_right == ssm_right + + ss_left = indices.searchsorted(value, side="left") + assert expected_left == ss_left + + ss_right = indices.searchsorted(value, side="right") + assert expected_right == ss_right + + elif indices.is_monotonic_decreasing: + ssm_left = indices._searchsorted_monotonic(value, side="left") + assert expected_left == ssm_left + + ssm_right = indices._searchsorted_monotonic(value, side="right") + assert expected_right == ssm_right + else: + # non-monotonic should raise. + with pytest.raises(ValueError): + indices._searchsorted_monotonic(value, side="left") + + def test_pickle(self, indices): + original_name, indices.name = indices.name, "foo" + unpickled = tm.round_trip_pickle(indices) + assert indices.equals(unpickled) + indices.name = original_name + + @pytest.mark.parametrize("keep", ["first", "last", False]) + def test_duplicated(self, indices, keep): + if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)): + # MultiIndex tested separately in: + # tests/indexes/multi/test_unique_and_duplicates + pytest.skip("Skip check for empty Index, MultiIndex, RangeIndex") + + holder = type(indices) + + idx = holder(indices) + if idx.has_duplicates: + # We are testing the duplicated-method here, so we need to know + # exactly which indices are duplicate and how (for the result). + # This is not possible if "idx" has duplicates already, which we + # therefore remove. This is seemingly circular, as drop_duplicates + # invokes duplicated, but in the end, it all works out because we + # cross-check with Series.duplicated, which is tested separately. + idx = idx.drop_duplicates() + + n, k = len(idx), 10 + duplicated_selection = np.random.choice(n, k * n) + expected = pd.Series(duplicated_selection).duplicated(keep=keep).values + idx = holder(idx.values[duplicated_selection]) + + result = idx.duplicated(keep=keep) + tm.assert_numpy_array_equal(result, expected) + + def test_has_duplicates(self, indices): + holder = type(indices) + if not len(indices) or isinstance(indices, (MultiIndex, RangeIndex)): + # MultiIndex tested separately in: + # tests/indexes/multi/test_unique_and_duplicates. + # RangeIndex is unique by definition. + pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex") + + idx = holder([indices[0]] * 5) + assert idx.is_unique is False + assert idx.has_duplicates is True diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py new file mode 100644 index 00000000..2e53e29c --- /dev/null +++ b/pandas/tests/indexes/test_frozen.py @@ -0,0 +1,104 @@ +import re + +import pytest + +from pandas.core.indexes.frozen import FrozenList + + +class TestFrozenList: + + unicode_container = FrozenList(["\u05d0", "\u05d1", "c"]) + + def setup_method(self, _): + self.lst = [1, 2, 3, 4, 5] + self.container = FrozenList(self.lst) + + def check_mutable_error(self, *args, **kwargs): + # Pass whatever function you normally would to pytest.raises + # (after the Exception kind). + mutable_regex = re.compile("does not support mutable operations") + with pytest.raises(TypeError): + mutable_regex(*args, **kwargs) + + def test_no_mutable_funcs(self): + def setitem(): + self.container[0] = 5 + + self.check_mutable_error(setitem) + + def setslice(): + self.container[1:2] = 3 + + self.check_mutable_error(setslice) + + def delitem(): + del self.container[0] + + self.check_mutable_error(delitem) + + def delslice(): + del self.container[0:3] + + self.check_mutable_error(delslice) + + mutable_methods = ("extend", "pop", "remove", "insert") + + for meth in mutable_methods: + self.check_mutable_error(getattr(self.container, meth)) + + def test_slicing_maintains_type(self): + result = self.container[1:2] + expected = self.lst[1:2] + self.check_result(result, expected) + + def check_result(self, result, expected): + assert isinstance(result, FrozenList) + assert result == expected + + def test_string_methods_dont_fail(self): + repr(self.container) + str(self.container) + bytes(self.container) + + def test_tricky_container(self): + repr(self.unicode_container) + str(self.unicode_container) + + def test_add(self): + result = self.container + (1, 2, 3) + expected = FrozenList(self.lst + [1, 2, 3]) + self.check_result(result, expected) + + result = (1, 2, 3) + self.container + expected = FrozenList([1, 2, 3] + self.lst) + self.check_result(result, expected) + + def test_iadd(self): + q = r = self.container + + q += [5] + self.check_result(q, self.lst + [5]) + + # Other shouldn't be mutated. + self.check_result(r, self.lst) + + def test_union(self): + result = self.container.union((1, 2, 3)) + expected = FrozenList(self.lst + [1, 2, 3]) + self.check_result(result, expected) + + def test_difference(self): + result = self.container.difference([2]) + expected = FrozenList([1, 3, 4, 5]) + self.check_result(result, expected) + + def test_difference_dupe(self): + result = FrozenList([1, 2, 3, 2]).difference([2]) + expected = FrozenList([1, 3]) + self.check_result(result, expected) + + def test_tricky_container_to_bytes_raises(self): + # GH 26447 + msg = "^'str' object cannot be interpreted as an integer$" + with pytest.raises(TypeError, match=msg): + bytes(self.unicode_container) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py new file mode 100644 index 00000000..f0251686 --- /dev/null +++ b/pandas/tests/indexes/test_numeric.py @@ -0,0 +1,1243 @@ +from datetime import datetime, timedelta +import re + +import numpy as np +import pytest + +from pandas._libs.tslibs import Timestamp + +import pandas as pd +from pandas import Float64Index, Index, Int64Index, Series, UInt64Index +import pandas._testing as tm +from pandas.api.types import pandas_dtype +from pandas.tests.indexes.common import Base + + +class Numeric(Base): + def test_can_hold_identifiers(self): + idx = self.create_index() + key = idx[0] + assert idx._can_hold_identifiers_and_holds_name(key) is False + + def test_numeric_compat(self): + pass # override Base method + + def test_explicit_conversions(self): + + # GH 8608 + # add/sub are overridden explicitly for Float/Int Index + idx = self._holder(np.arange(5, dtype="int64")) + + # float conversions + arr = np.arange(5, dtype="int64") * 3.2 + expected = Float64Index(arr) + fidx = idx * 3.2 + tm.assert_index_equal(fidx, expected) + fidx = 3.2 * idx + tm.assert_index_equal(fidx, expected) + + # interops with numpy arrays + expected = Float64Index(arr) + a = np.zeros(5, dtype="float64") + result = fidx - a + tm.assert_index_equal(result, expected) + + expected = Float64Index(-arr) + a = np.zeros(5, dtype="float64") + result = a - fidx + tm.assert_index_equal(result, expected) + + def test_index_groupby(self): + int_idx = Index(range(6)) + float_idx = Index(np.arange(0, 0.6, 0.1)) + obj_idx = Index("A B C D E F".split()) + dt_idx = pd.date_range("2013-01-01", freq="M", periods=6) + + for idx in [int_idx, float_idx, obj_idx, dt_idx]: + to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1]) + tm.assert_dict_equal( + idx.groupby(to_groupby), {1.0: idx[[0, 5]], 2.0: idx[[1, 4]]} + ) + + to_groupby = Index( + [ + datetime(2011, 11, 1), + datetime(2011, 12, 1), + pd.NaT, + pd.NaT, + datetime(2011, 12, 1), + datetime(2011, 11, 1), + ], + tz="UTC", + ).values + + ex_keys = [Timestamp("2011-11-01"), Timestamp("2011-12-01")] + expected = {ex_keys[0]: idx[[0, 5]], ex_keys[1]: idx[[1, 4]]} + tm.assert_dict_equal(idx.groupby(to_groupby), expected) + + @pytest.mark.parametrize("klass", [list, tuple, np.array, Series]) + def test_where(self, klass): + i = self.create_index() + cond = [True] * len(i) + expected = i + result = i.where(klass(cond)) + + cond = [False] + [True] * (len(i) - 1) + expected = Float64Index([i._na_value] + i[1:].tolist()) + result = i.where(klass(cond)) + tm.assert_index_equal(result, expected) + + def test_insert(self, nulls_fixture): + # GH 18295 (test missing) + index = self.create_index() + expected = Float64Index([index[0], np.nan] + list(index[1:])) + result = index.insert(1, nulls_fixture) + tm.assert_index_equal(result, expected) + + +class TestFloat64Index(Numeric): + _holder = Float64Index + + @pytest.fixture( + params=[ + [1.5, 2, 3, 4, 5], + [0.0, 2.5, 5.0, 7.5, 10.0], + [5, 4, 3, 2, 1.5], + [10.0, 7.5, 5.0, 2.5, 0.0], + ], + ids=["mixed", "float", "mixed_dec", "float_dec"], + ) + def indices(self, request): + return Float64Index(request.param) + + @pytest.fixture + def mixed_index(self): + return Float64Index([1.5, 2, 3, 4, 5]) + + @pytest.fixture + def float_index(self): + return Float64Index([0.0, 2.5, 5.0, 7.5, 10.0]) + + def create_index(self): + return Float64Index(np.arange(5, dtype="float64")) + + def test_repr_roundtrip(self, indices): + tm.assert_index_equal(eval(repr(indices)), indices) + + def check_is_index(self, i): + assert isinstance(i, Index) + assert not isinstance(i, Float64Index) + + def check_coerce(self, a, b, is_float_index=True): + assert a.equals(b) + tm.assert_index_equal(a, b, exact=False) + if is_float_index: + assert isinstance(b, Float64Index) + else: + self.check_is_index(b) + + def test_constructor(self): + + # explicit construction + index = Float64Index([1, 2, 3, 4, 5]) + assert isinstance(index, Float64Index) + expected = np.array([1, 2, 3, 4, 5], dtype="float64") + tm.assert_numpy_array_equal(index.values, expected) + index = Float64Index(np.array([1, 2, 3, 4, 5])) + assert isinstance(index, Float64Index) + index = Float64Index([1.0, 2, 3, 4, 5]) + assert isinstance(index, Float64Index) + index = Float64Index(np.array([1.0, 2, 3, 4, 5])) + assert isinstance(index, Float64Index) + assert index.dtype == float + + index = Float64Index(np.array([1.0, 2, 3, 4, 5]), dtype=np.float32) + assert isinstance(index, Float64Index) + assert index.dtype == np.float64 + + index = Float64Index(np.array([1, 2, 3, 4, 5]), dtype=np.float32) + assert isinstance(index, Float64Index) + assert index.dtype == np.float64 + + # nan handling + result = Float64Index([np.nan, np.nan]) + assert pd.isna(result.values).all() + result = Float64Index(np.array([np.nan])) + assert pd.isna(result.values).all() + result = Index(np.array([np.nan])) + assert pd.isna(result.values).all() + + @pytest.mark.parametrize( + "index, dtype", + [ + (pd.Int64Index, "float64"), + (pd.UInt64Index, "categorical"), + (pd.Float64Index, "datetime64"), + (pd.RangeIndex, "float64"), + ], + ) + def test_invalid_dtype(self, index, dtype): + # GH 29539 + with pytest.raises( + ValueError, + match=rf"Incorrect `dtype` passed: expected \w+(?: \w+)?, received {dtype}", + ): + index([1, 2, 3], dtype=dtype) + + def test_constructor_invalid(self): + + # invalid + msg = ( + r"Float64Index\(\.\.\.\) must be called with a collection of" + r" some kind, 0\.0 was passed" + ) + with pytest.raises(TypeError, match=msg): + Float64Index(0.0) + msg = ( + "String dtype not supported, " + "you may need to explicitly cast to a numeric type" + ) + with pytest.raises(TypeError, match=msg): + Float64Index(["a", "b", 0.0]) + msg = r"float\(\) argument must be a string or a number, not 'Timestamp'" + with pytest.raises(TypeError, match=msg): + Float64Index([Timestamp("20130101")]) + + def test_constructor_coerce(self, mixed_index, float_index): + + self.check_coerce(mixed_index, Index([1.5, 2, 3, 4, 5])) + self.check_coerce(float_index, Index(np.arange(5) * 2.5)) + self.check_coerce( + float_index, Index(np.array(np.arange(5) * 2.5, dtype=object)) + ) + + def test_constructor_explicit(self, mixed_index, float_index): + + # these don't auto convert + self.check_coerce( + float_index, Index((np.arange(5) * 2.5), dtype=object), is_float_index=False + ) + self.check_coerce( + mixed_index, Index([1.5, 2, 3, 4, 5], dtype=object), is_float_index=False + ) + + def test_astype(self, mixed_index, float_index): + + result = float_index.astype(object) + assert result.equals(float_index) + assert float_index.equals(result) + self.check_is_index(result) + + i = mixed_index.copy() + i.name = "foo" + result = i.astype(object) + assert result.equals(i) + assert i.equals(result) + self.check_is_index(result) + + # GH 12881 + # a float astype int + for dtype in ["int16", "int32", "int64"]: + i = Float64Index([0, 1, 2]) + result = i.astype(dtype) + expected = Int64Index([0, 1, 2]) + tm.assert_index_equal(result, expected) + + i = Float64Index([0, 1.1, 2]) + result = i.astype(dtype) + expected = Int64Index([0, 1, 2]) + tm.assert_index_equal(result, expected) + + for dtype in ["float32", "float64"]: + i = Float64Index([0, 1, 2]) + result = i.astype(dtype) + expected = i + tm.assert_index_equal(result, expected) + + i = Float64Index([0, 1.1, 2]) + result = i.astype(dtype) + expected = Index(i.values.astype(dtype)) + tm.assert_index_equal(result, expected) + + # invalid + for dtype in ["M8[ns]", "m8[ns]"]: + msg = ( + f"Cannot convert Float64Index to dtype {pandas_dtype(dtype)}; " + f"integer values are required for conversion" + ) + with pytest.raises(TypeError, match=re.escape(msg)): + i.astype(dtype) + + # GH 13149 + for dtype in ["int16", "int32", "int64"]: + i = Float64Index([0, 1.1, np.NAN]) + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + i.astype(dtype) + + def test_cannot_cast_inf_to_int(self): + idx = pd.Float64Index([1, 2, np.inf]) + + msg = r"Cannot convert non-finite values \(NA or inf\) to integer" + with pytest.raises(ValueError, match=msg): + idx.astype(int) + + def test_type_coercion_fail(self, any_int_dtype): + # see gh-15832 + msg = "Trying to coerce float values to integers" + with pytest.raises(ValueError, match=msg): + Index([1, 2, 3.5], dtype=any_int_dtype) + + def test_type_coercion_valid(self, float_dtype): + # There is no Float32Index, so we always + # generate Float64Index. + i = Index([1, 2, 3.5], dtype=float_dtype) + tm.assert_index_equal(i, Index([1, 2, 3.5])) + + def test_equals_numeric(self): + + i = Float64Index([1.0, 2.0]) + assert i.equals(i) + assert i.identical(i) + + i2 = Float64Index([1.0, 2.0]) + assert i.equals(i2) + + i = Float64Index([1.0, np.nan]) + assert i.equals(i) + assert i.identical(i) + + i2 = Float64Index([1.0, np.nan]) + assert i.equals(i2) + + def test_get_indexer(self): + idx = Float64Index([0.0, 1.0, 2.0]) + tm.assert_numpy_array_equal( + idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) + ) + + target = [-0.1, 0.5, 1.1] + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + + def test_get_loc(self): + idx = Float64Index([0.0, 1.0, 2.0]) + for method in [None, "pad", "backfill", "nearest"]: + assert idx.get_loc(1, method) == 1 + if method is not None: + assert idx.get_loc(1, method, tolerance=0) == 1 + + for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: + assert idx.get_loc(1.1, method) == loc + assert idx.get_loc(1.1, method, tolerance=0.9) == loc + + with pytest.raises(KeyError, match="^'foo'$"): + idx.get_loc("foo") + with pytest.raises(KeyError, match=r"^1\.5$"): + idx.get_loc(1.5) + with pytest.raises(KeyError, match=r"^1\.5$"): + idx.get_loc(1.5, method="pad", tolerance=0.1) + with pytest.raises(KeyError, match="^True$"): + idx.get_loc(True) + with pytest.raises(KeyError, match="^False$"): + idx.get_loc(False) + + with pytest.raises(ValueError, match="must be numeric"): + idx.get_loc(1.4, method="nearest", tolerance="foo") + + with pytest.raises(ValueError, match="must contain numeric elements"): + idx.get_loc(1.4, method="nearest", tolerance=np.array(["foo"])) + + with pytest.raises( + ValueError, match="tolerance size must match target index size" + ): + idx.get_loc(1.4, method="nearest", tolerance=np.array([1, 2])) + + def test_get_loc_na(self): + idx = Float64Index([np.nan, 1, 2]) + assert idx.get_loc(1) == 1 + assert idx.get_loc(np.nan) == 0 + + idx = Float64Index([np.nan, 1, np.nan]) + assert idx.get_loc(1) == 1 + + # representable by slice [0:2:2] + # pytest.raises(KeyError, idx.slice_locs, np.nan) + sliced = idx.slice_locs(np.nan) + assert isinstance(sliced, tuple) + assert sliced == (0, 3) + + # not representable by slice + idx = Float64Index([np.nan, 1, np.nan, np.nan]) + assert idx.get_loc(1) == 1 + msg = "'Cannot get left slice bound for non-unique label: nan" + with pytest.raises(KeyError, match=msg): + idx.slice_locs(np.nan) + + def test_get_loc_missing_nan(self): + # GH 8569 + idx = Float64Index([1, 2]) + assert idx.get_loc(1) == 0 + with pytest.raises(KeyError, match=r"^3\.0$"): + idx.get_loc(3) + with pytest.raises(KeyError, match="^nan$"): + idx.get_loc(np.nan) + with pytest.raises(KeyError, match=r"^\[nan\]$"): + idx.get_loc([np.nan]) + + def test_contains_nans(self): + i = Float64Index([1.0, 2.0, np.nan]) + assert np.nan in i + + def test_contains_not_nans(self): + i = Float64Index([1.0, 2.0, np.nan]) + assert 1.0 in i + + def test_doesnt_contain_all_the_things(self): + i = Float64Index([np.nan]) + assert not i.isin([0]).item() + assert not i.isin([1]).item() + assert i.isin([np.nan]).item() + + def test_nan_multiple_containment(self): + i = Float64Index([1.0, np.nan]) + tm.assert_numpy_array_equal(i.isin([1.0]), np.array([True, False])) + tm.assert_numpy_array_equal(i.isin([2.0, np.pi]), np.array([False, False])) + tm.assert_numpy_array_equal(i.isin([np.nan]), np.array([False, True])) + tm.assert_numpy_array_equal(i.isin([1.0, np.nan]), np.array([True, True])) + i = Float64Index([1.0, 2.0]) + tm.assert_numpy_array_equal(i.isin([np.nan]), np.array([False, False])) + + def test_astype_from_object(self): + index = Index([1.0, np.nan, 0.2], dtype="object") + result = index.astype(float) + expected = Float64Index([1.0, np.nan, 0.2]) + assert result.dtype == expected.dtype + tm.assert_index_equal(result, expected) + + def test_fillna_float64(self): + # GH 11343 + idx = Index([1.0, np.nan, 3.0], dtype=float, name="x") + # can't downcast + exp = Index([1.0, 0.1, 3.0], name="x") + tm.assert_index_equal(idx.fillna(0.1), exp) + + # downcast + exp = Float64Index([1.0, 2.0, 3.0], name="x") + tm.assert_index_equal(idx.fillna(2), exp) + + # object + exp = Index([1.0, "obj", 3.0], name="x") + tm.assert_index_equal(idx.fillna("obj"), exp) + + def test_take_fill_value(self): + # GH 12631 + idx = pd.Float64Index([1.0, 2.0, 3.0], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = pd.Float64Index([2.0, 1.0, 3.0], name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.Float64Index([2.0, 1.0, np.nan], name="xxx") + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.Float64Index([2.0, 1.0, 3.0], name="xxx") + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + +class NumericInt(Numeric): + def test_view(self): + i = self._holder([], name="Foo") + i_view = i.view() + assert i_view.name == "Foo" + + i_view = i.view(self._dtype) + tm.assert_index_equal(i, self._holder(i_view, name="Foo")) + + i_view = i.view(self._holder) + tm.assert_index_equal(i, self._holder(i_view, name="Foo")) + + def test_is_monotonic(self): + index = self._holder([1, 2, 3, 4]) + assert index.is_monotonic is True + assert index.is_monotonic_increasing is True + assert index._is_strictly_monotonic_increasing is True + assert index.is_monotonic_decreasing is False + assert index._is_strictly_monotonic_decreasing is False + + index = self._holder([4, 3, 2, 1]) + assert index.is_monotonic is False + assert index._is_strictly_monotonic_increasing is False + assert index._is_strictly_monotonic_decreasing is True + + index = self._holder([1]) + assert index.is_monotonic is True + assert index.is_monotonic_increasing is True + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_increasing is True + assert index._is_strictly_monotonic_decreasing is True + + def test_is_strictly_monotonic(self): + index = self._holder([1, 1, 2, 3]) + assert index.is_monotonic_increasing is True + assert index._is_strictly_monotonic_increasing is False + + index = self._holder([3, 2, 1, 1]) + assert index.is_monotonic_decreasing is True + assert index._is_strictly_monotonic_decreasing is False + + index = self._holder([1, 1]) + assert index.is_monotonic_increasing + assert index.is_monotonic_decreasing + assert not index._is_strictly_monotonic_increasing + assert not index._is_strictly_monotonic_decreasing + + def test_logical_compat(self): + idx = self.create_index() + assert idx.all() == idx.values.all() + assert idx.any() == idx.values.any() + + def test_identical(self): + index = self.create_index() + i = Index(index.copy()) + assert i.identical(index) + + same_values_different_type = Index(i, dtype=object) + assert not i.identical(same_values_different_type) + + i = index.copy(dtype=object) + i = i.rename("foo") + same_values = Index(i, dtype=object) + assert same_values.identical(i) + + assert not i.identical(index) + assert Index(same_values, name="foo", dtype=object).identical(i) + + assert not index.copy(dtype=object).identical(index.copy(dtype=self._dtype)) + + def test_join_non_unique(self): + left = Index([4, 4, 3, 3]) + + joined, lidx, ridx = left.join(left, return_indexers=True) + + exp_joined = Index([3, 3, 3, 3, 4, 4, 4, 4]) + tm.assert_index_equal(joined, exp_joined) + + exp_lidx = np.array([2, 2, 3, 3, 0, 0, 1, 1], dtype=np.intp) + tm.assert_numpy_array_equal(lidx, exp_lidx) + + exp_ridx = np.array([2, 3, 2, 3, 0, 1, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + def test_join_self(self, join_type): + index = self.create_index() + joined = index.join(index, how=join_type) + assert index is joined + + def test_union_noncomparable(self): + # corner case, non-Int64Index + index = self.create_index() + other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) + result = index.union(other) + expected = Index(np.concatenate((index, other))) + tm.assert_index_equal(result, expected) + + result = other.union(index) + expected = Index(np.concatenate((other, index))) + tm.assert_index_equal(result, expected) + + def test_cant_or_shouldnt_cast(self): + msg = ( + "String dtype not supported, " + "you may need to explicitly cast to a numeric type" + ) + # can't + data = ["foo", "bar", "baz"] + with pytest.raises(TypeError, match=msg): + self._holder(data) + + # shouldn't + data = ["0", "1", "2"] + with pytest.raises(TypeError, match=msg): + self._holder(data) + + def test_view_index(self): + index = self.create_index() + index.view(Index) + + def test_prevent_casting(self): + index = self.create_index() + result = index.astype("O") + assert result.dtype == np.object_ + + def test_take_preserve_name(self): + index = self._holder([1, 2, 3, 4], name="foo") + taken = index.take([3, 0, 1]) + assert index.name == taken.name + + def test_take_fill_value(self): + # see gh-12631 + idx = self._holder([1, 2, 3], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = self._holder([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + + name = self._holder.__name__ + msg = f"Unable to fill values because {name} cannot contain NA" + + # fill_value=True + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -1]), fill_value=True) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = self._holder([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + def test_slice_keep_name(self): + idx = self._holder([1, 2], name="asdf") + assert idx.name == idx[1:].name + + +class TestInt64Index(NumericInt): + _dtype = "int64" + _holder = Int64Index + + @pytest.fixture( + params=[range(0, 20, 2), range(19, -1, -1)], ids=["index_inc", "index_dec"] + ) + def indices(self, request): + return Int64Index(request.param) + + def create_index(self): + # return Int64Index(np.arange(5, dtype="int64")) + return Int64Index(range(0, 20, 2)) + + def test_constructor(self): + # pass list, coerce fine + index = Int64Index([-5, 0, 1, 2]) + expected = Index([-5, 0, 1, 2], dtype=np.int64) + tm.assert_index_equal(index, expected) + + # from iterable + index = Int64Index(iter([-5, 0, 1, 2])) + tm.assert_index_equal(index, expected) + + # scalar raise Exception + msg = ( + r"Int64Index\(\.\.\.\) must be called with a collection of some " + "kind, 5 was passed" + ) + with pytest.raises(TypeError, match=msg): + Int64Index(5) + + # copy + arr = index.values + new_index = Int64Index(arr, copy=True) + tm.assert_index_equal(new_index, index) + val = arr[0] + 3000 + + # this should not change index + arr[0] = val + assert new_index[0] != val + + # interpret list-like + expected = Int64Index([5, 0]) + for cls in [Index, Int64Index]: + for idx in [ + cls([5, 0], dtype="int64"), + cls(np.array([5, 0]), dtype="int64"), + cls(Series([5, 0]), dtype="int64"), + ]: + tm.assert_index_equal(idx, expected) + + def test_constructor_corner(self): + arr = np.array([1, 2, 3, 4], dtype=object) + index = Int64Index(arr) + assert index.values.dtype == np.int64 + tm.assert_index_equal(index, Index(arr)) + + # preventing casting + arr = np.array([1, "2", 3, "4"], dtype=object) + with pytest.raises(TypeError, match="casting"): + Int64Index(arr) + + arr_with_floats = [0, 2, 3, 4, 5, 1.25, 3, -1] + with pytest.raises(TypeError, match="casting"): + Int64Index(arr_with_floats) + + def test_constructor_coercion_signed_to_unsigned(self, uint_dtype): + + # see gh-15832 + msg = "Trying to coerce negative values to unsigned integers" + + with pytest.raises(OverflowError, match=msg): + Index([-1], dtype=uint_dtype) + + def test_constructor_unwraps_index(self): + idx = pd.Index([1, 2]) + result = pd.Int64Index(idx) + expected = np.array([1, 2], dtype="int64") + tm.assert_numpy_array_equal(result._data, expected) + + def test_coerce_list(self): + # coerce things + arr = Index([1, 2, 3, 4]) + assert isinstance(arr, Int64Index) + + # but not if explicit dtype passed + arr = Index([1, 2, 3, 4], dtype=object) + assert isinstance(arr, Index) + + def test_get_indexer(self): + index = self.create_index() + target = Int64Index(np.arange(10)) + indexer = index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = Int64Index(np.arange(10)) + indexer = index.get_indexer(target, method="pad") + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = Int64Index(np.arange(10)) + indexer = index.get_indexer(target, method="backfill") + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_nan(self): + # GH 7820 + result = Index([1, 2, np.nan]).get_indexer([np.nan]) + expected = np.array([2], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + + def test_intersection(self): + index = self.create_index() + other = Index([1, 2, 3, 4, 5]) + result = index.intersection(other) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index) + expected = Index( + np.sort(np.asarray(np.intersect1d(index.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + def test_join_inner(self): + index = self.create_index() + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = index.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([2, 12]) + elidx = np.array([1, 6], dtype=np.intp) + eridx = np.array([4, 1], dtype=np.intp) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="inner", return_indexers=True) + + res2 = index.intersection(other_mono) + tm.assert_index_equal(res, res2) + + elidx = np.array([1, 6], dtype=np.intp) + eridx = np.array([1, 4], dtype=np.intp) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self): + index = self.create_index() + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = index.join(other, how="left", return_indexers=True) + eres = index + eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], dtype=np.intp) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="left", return_indexers=True) + eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], dtype=np.intp) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # non-unique + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) + eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + index = self.create_index() + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + res, lidx, ridx = index.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.intp) + + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # monotonic + res, lidx, ridx = index.join(other_mono, how="right", return_indexers=True) + eres = other_mono + elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.intp) + assert isinstance(other, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + assert ridx is None + + # non-unique + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) + eres = Index([1, 1, 2, 5, 7, 9]) # 1 is in idx2, so it should be x2 + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_non_int_index(self): + index = self.create_index() + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = index.join(other, how="outer") + outer2 = other.join(index, how="outer") + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index.join(other, how="inner") + inner2 = other.join(index, how="inner") + expected = Index([6, 8, 10]) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index.join(other, how="left") + tm.assert_index_equal(left, index.astype(object)) + + left2 = other.join(index, how="left") + tm.assert_index_equal(left2, other) + + right = index.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index, how="right") + tm.assert_index_equal(right2, index.astype(object)) + + def test_join_outer(self): + index = self.create_index() + other = Int64Index([7, 12, 25, 1, 2, 5]) + other_mono = Int64Index([1, 2, 5, 7, 12, 25]) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = index.join(other, how="outer", return_indexers=True) + noidx_res = index.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = Int64Index([0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 25]) + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) + eridx = np.array( + [-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], dtype=np.intp + ) + + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index.join(other_mono, how="outer", return_indexers=True) + noidx_res = index.join(other_mono, how="outer") + tm.assert_index_equal(res, noidx_res) + + elidx = np.array([0, -1, 1, 2, -1, 3, -1, 4, 5, 6, 7, 8, 9, -1], dtype=np.intp) + eridx = np.array( + [-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.intp + ) + assert isinstance(res, Int64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + +class TestUInt64Index(NumericInt): + + _dtype = "uint64" + _holder = UInt64Index + + @pytest.fixture( + params=[ + [2 ** 63, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20, 2 ** 63 + 25], + [2 ** 63 + 25, 2 ** 63 + 20, 2 ** 63 + 15, 2 ** 63 + 10, 2 ** 63], + ], + ids=["index_inc", "index_dec"], + ) + def indices(self, request): + return UInt64Index(request.param) + + @pytest.fixture + def index_large(self): + # large values used in TestUInt64Index where no compat needed with Int64/Float64 + large = [2 ** 63, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20, 2 ** 63 + 25] + return UInt64Index(large) + + def create_index(self): + # compat with shared Int64/Float64 tests; use index_large for UInt64 only tests + return UInt64Index(np.arange(5, dtype="uint64")) + + def test_constructor(self): + idx = UInt64Index([1, 2, 3]) + res = Index([1, 2, 3], dtype=np.uint64) + tm.assert_index_equal(res, idx) + + idx = UInt64Index([1, 2 ** 63]) + res = Index([1, 2 ** 63], dtype=np.uint64) + tm.assert_index_equal(res, idx) + + idx = UInt64Index([1, 2 ** 63]) + res = Index([1, 2 ** 63]) + tm.assert_index_equal(res, idx) + + idx = Index([-1, 2 ** 63], dtype=object) + res = Index(np.array([-1, 2 ** 63], dtype=object)) + tm.assert_index_equal(res, idx) + + # https://github.com/pandas-dev/pandas/issues/29526 + idx = UInt64Index([1, 2 ** 63 + 1], dtype=np.uint64) + res = Index([1, 2 ** 63 + 1], dtype=np.uint64) + tm.assert_index_equal(res, idx) + + def test_get_indexer(self, index_large): + target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2 ** 63) + indexer = index_large.get_indexer(target) + expected = np.array([0, -1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2 ** 63) + indexer = index_large.get_indexer(target, method="pad") + expected = np.array([0, 0, 1, 2, 3, 4, 4, 4, 4, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + target = UInt64Index(np.arange(10).astype("uint64") * 5 + 2 ** 63) + indexer = index_large.get_indexer(target, method="backfill") + expected = np.array([0, 1, 1, 2, 3, 4, -1, -1, -1, -1], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, expected) + + def test_intersection(self, index_large): + other = Index([2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20]) + result = index_large.intersection(other) + expected = Index(np.sort(np.intersect1d(index_large.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index_large) + expected = Index( + np.sort(np.asarray(np.intersect1d(index_large.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + def test_join_inner(self, index_large): + other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="inner", return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = UInt64Index(2 ** 63 + np.array([10, 25], dtype="uint64")) + elidx = np.array([1, 4], dtype=np.intp) + eridx = np.array([5, 2], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="inner", return_indexers=True + ) + + res2 = index_large.intersection(other_mono) + tm.assert_index_equal(res, res2) + + elidx = np.array([1, 4], dtype=np.intp) + eridx = np.array([3, 5], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self, index_large): + other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="left", return_indexers=True) + eres = index_large + eridx = np.array([-1, 5, -1, -1, 2], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join(other_mono, how="left", return_indexers=True) + eridx = np.array([-1, 3, -1, -1, 5], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + assert lidx is None + tm.assert_numpy_array_equal(ridx, eridx) + + # non-unique + idx = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5], dtype="uint64")) + idx2 = UInt64Index(2 ** 63 + np.array([1, 2, 5, 7, 9], dtype="uint64")) + res, lidx, ridx = idx2.join(idx, how="left", return_indexers=True) + + # 1 is in idx2, so it should be x2 + eres = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64")) + eridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + elidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self, index_large): + other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + res, lidx, ridx = index_large.join(other, how="right", return_indexers=True) + eres = other + elidx = np.array([-1, -1, 4, -1, -1, 1], dtype=np.intp) + + tm.assert_numpy_array_equal(lidx, elidx) + assert isinstance(other, UInt64Index) + tm.assert_index_equal(res, eres) + assert ridx is None + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="right", return_indexers=True + ) + eres = other_mono + elidx = np.array([-1, -1, -1, 1, -1, 4], dtype=np.intp) + + assert isinstance(other, UInt64Index) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_index_equal(res, eres) + assert ridx is None + + # non-unique + idx = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5], dtype="uint64")) + idx2 = UInt64Index(2 ** 63 + np.array([1, 2, 5, 7, 9], dtype="uint64")) + res, lidx, ridx = idx.join(idx2, how="right", return_indexers=True) + + # 1 is in idx2, so it should be x2 + eres = UInt64Index(2 ** 63 + np.array([1, 1, 2, 5, 7, 9], dtype="uint64")) + elidx = np.array([0, 1, 2, 3, -1, -1], dtype=np.intp) + eridx = np.array([0, 0, 1, 2, 3, 4], dtype=np.intp) + + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + def test_join_non_int_index(self, index_large): + other = Index( + 2 ** 63 + np.array([1, 5, 7, 10, 20], dtype="uint64"), dtype=object + ) + + outer = index_large.join(other, how="outer") + outer2 = other.join(index_large, how="outer") + expected = Index( + 2 ** 63 + np.array([0, 1, 5, 7, 10, 15, 20, 25], dtype="uint64") + ) + tm.assert_index_equal(outer, outer2) + tm.assert_index_equal(outer, expected) + + inner = index_large.join(other, how="inner") + inner2 = other.join(index_large, how="inner") + expected = Index(2 ** 63 + np.array([10, 20], dtype="uint64")) + tm.assert_index_equal(inner, inner2) + tm.assert_index_equal(inner, expected) + + left = index_large.join(other, how="left") + tm.assert_index_equal(left, index_large.astype(object)) + + left2 = other.join(index_large, how="left") + tm.assert_index_equal(left2, other) + + right = index_large.join(other, how="right") + tm.assert_index_equal(right, other) + + right2 = other.join(index_large, how="right") + tm.assert_index_equal(right2, index_large.astype(object)) + + def test_join_outer(self, index_large): + other = UInt64Index(2 ** 63 + np.array([7, 12, 25, 1, 2, 10], dtype="uint64")) + other_mono = UInt64Index( + 2 ** 63 + np.array([1, 2, 7, 10, 12, 25], dtype="uint64") + ) + + # not monotonic + # guarantee of sortedness + res, lidx, ridx = index_large.join(other, how="outer", return_indexers=True) + noidx_res = index_large.join(other, how="outer") + tm.assert_index_equal(res, noidx_res) + + eres = UInt64Index( + 2 ** 63 + np.array([0, 1, 2, 7, 10, 12, 15, 20, 25], dtype="uint64") + ) + elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + eridx = np.array([-1, 3, 4, 0, 5, 1, -1, -1, 2], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + # monotonic + res, lidx, ridx = index_large.join( + other_mono, how="outer", return_indexers=True + ) + noidx_res = index_large.join(other_mono, how="outer") + tm.assert_index_equal(res, noidx_res) + + elidx = np.array([0, -1, -1, -1, 1, -1, 2, 3, 4], dtype=np.intp) + eridx = np.array([-1, 0, 1, 2, 3, 4, -1, -1, 5], dtype=np.intp) + + assert isinstance(res, UInt64Index) + tm.assert_index_equal(res, eres) + tm.assert_numpy_array_equal(lidx, elidx) + tm.assert_numpy_array_equal(ridx, eridx) + + +@pytest.mark.parametrize("dtype", ["int64", "uint64"]) +def test_int_float_union_dtype(dtype): + # https://github.com/pandas-dev/pandas/issues/26778 + # [u]int | float -> float + index = pd.Index([0, 2, 3], dtype=dtype) + other = pd.Float64Index([0.5, 1.5]) + expected = pd.Float64Index([0.0, 0.5, 1.5, 2.0, 3.0]) + result = index.union(other) + tm.assert_index_equal(result, expected) + + result = other.union(index) + tm.assert_index_equal(result, expected) + + +def test_range_float_union_dtype(): + # https://github.com/pandas-dev/pandas/issues/26778 + index = pd.RangeIndex(start=0, stop=3) + other = pd.Float64Index([0.5, 1.5]) + result = index.union(other) + expected = pd.Float64Index([0.0, 0.5, 1, 1.5, 2.0]) + tm.assert_index_equal(result, expected) + + result = other.union(index) + tm.assert_index_equal(result, expected) + + +def test_uint_index_does_not_convert_to_float64(): + # https://github.com/pandas-dev/pandas/issues/28279 + # https://github.com/pandas-dev/pandas/issues/28023 + series = pd.Series( + [0, 1, 2, 3, 4, 5], + index=[ + 7606741985629028552, + 17876870360202815256, + 17876870360202815256, + 13106359306506049338, + 8991270399732411471, + 8991270399732411472, + ], + ) + + result = series.loc[[7606741985629028552, 17876870360202815256]] + + expected = UInt64Index( + [7606741985629028552, 17876870360202815256, 17876870360202815256], + dtype="uint64", + ) + tm.assert_index_equal(result.index, expected) + + tm.assert_equal(result, series[:3]) diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py new file mode 100644 index 00000000..58355665 --- /dev/null +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -0,0 +1,130 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Float64Index, + Index, + Int64Index, + PeriodIndex, + TimedeltaIndex, + UInt64Index, + _np_version_under1p17, + _np_version_under1p18, +) +import pandas._testing as tm +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin + + +@pytest.mark.parametrize( + "func", + [ + np.exp, + np.exp2, + np.expm1, + np.log, + np.log2, + np.log10, + np.log1p, + np.sqrt, + np.sin, + np.cos, + np.tan, + np.arcsin, + np.arccos, + np.arctan, + np.sinh, + np.cosh, + np.tanh, + np.arcsinh, + np.arccosh, + np.arctanh, + np.deg2rad, + np.rad2deg, + ], + ids=lambda x: x.__name__, +) +def test_numpy_ufuncs_basic(indices, func): + # test ufuncs of numpy, see: + # http://docs.scipy.org/doc/numpy/reference/ufuncs.html + + idx = indices + if isinstance(idx, DatetimeIndexOpsMixin): + # raise TypeError or ValueError (PeriodIndex) + with pytest.raises(Exception): + with np.errstate(all="ignore"): + func(idx) + elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): + # coerces to float (e.g. np.sin) + with np.errstate(all="ignore"): + result = func(idx) + exp = Index(func(idx.values), name=idx.name) + + tm.assert_index_equal(result, exp) + assert isinstance(result, Float64Index) + else: + # raise AttributeError or TypeError + if len(idx) == 0: + pass + else: + with pytest.raises(Exception): + with np.errstate(all="ignore"): + func(idx) + + +@pytest.mark.parametrize( + "func", [np.isfinite, np.isinf, np.isnan, np.signbit], ids=lambda x: x.__name__ +) +def test_numpy_ufuncs_other(indices, func): + # test ufuncs of numpy, see: + # http://docs.scipy.org/doc/numpy/reference/ufuncs.html + + idx = indices + if isinstance(idx, (DatetimeIndex, TimedeltaIndex)): + + if not _np_version_under1p18 and func in [np.isfinite, np.isinf, np.isnan]: + # numpy 1.18(dev) changed isinf and isnan to not raise on dt64/tfd64 + result = func(idx) + assert isinstance(result, np.ndarray) + + elif not _np_version_under1p17 and func in [np.isfinite]: + # ok under numpy >= 1.17 + # Results in bool array + result = func(idx) + assert isinstance(result, np.ndarray) + else: + # raise TypeError or ValueError (PeriodIndex) + with pytest.raises(Exception): + func(idx) + + elif isinstance(idx, PeriodIndex): + # raise TypeError or ValueError (PeriodIndex) + with pytest.raises(Exception): + func(idx) + + elif isinstance(idx, (Float64Index, Int64Index, UInt64Index)): + # Results in bool array + result = func(idx) + assert isinstance(result, np.ndarray) + assert not isinstance(result, Index) + else: + if len(idx) == 0: + pass + else: + with pytest.raises(Exception): + func(idx) + + +def test_elementwise_comparison_warning(): + # https://github.com/pandas-dev/pandas/issues/22698#issuecomment-458968300 + # np.array([1, 2]) == 'a' returns False, and produces a + # FutureWarning that it'll be [False, False] in the future. + # We just want to ensure that comes through. + # When NumPy dev actually enforces this change, we'll need to skip + # this test. + idx = Index([1, 2]) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = idx == "a" + + expected = np.array([False, False]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py new file mode 100644 index 00000000..abfa413d --- /dev/null +++ b/pandas/tests/indexes/test_setops.py @@ -0,0 +1,107 @@ +""" +The tests in this package are to ensure the proper resultant dtypes of +set operations. +""" +import itertools as it + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_dtype_equal + +import pandas as pd +from pandas import Float64Index, Int64Index, RangeIndex, UInt64Index +import pandas._testing as tm +from pandas.api.types import pandas_dtype +from pandas.tests.indexes.conftest import indices_dict + +COMPATIBLE_INCONSISTENT_PAIRS = { + (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex), + (Float64Index, Int64Index): (tm.makeFloatIndex, tm.makeIntIndex), + (Float64Index, RangeIndex): (tm.makeFloatIndex, tm.makeIntIndex), + (Float64Index, UInt64Index): (tm.makeFloatIndex, tm.makeUIntIndex), +} + + +@pytest.fixture(params=it.combinations(indices_dict, 2), ids="-".join) +def index_pair(request): + """ + Create all combinations of 2 index types. + """ + return indices_dict[request.param[0]], indices_dict[request.param[1]] + + +def test_union_same_types(indices): + # Union with a non-unique, non-monotonic index raises error + # Only needed for bool index factory + idx1 = indices.sort_values() + idx2 = indices.sort_values() + assert idx1.union(idx2).dtype == idx1.dtype + + +def test_union_different_types(index_pair): + # GH 23525 + idx1, idx2 = index_pair + type_pair = tuple(sorted([type(idx1), type(idx2)], key=lambda x: str(x))) + if type_pair in COMPATIBLE_INCONSISTENT_PAIRS: + pytest.xfail("This test only considers non compatible indexes.") + + if any(isinstance(idx, pd.MultiIndex) for idx in index_pair): + pytest.xfail("This test doesn't consider multiindixes.") + + if is_dtype_equal(idx1.dtype, idx2.dtype): + pytest.xfail("This test only considers non matching dtypes.") + + # A union with a CategoricalIndex (even as dtype('O')) and a + # non-CategoricalIndex can only be made if both indices are monotonic. + # This is true before this PR as well. + + # Union with a non-unique, non-monotonic index raises error + # This applies to the boolean index + idx1 = idx1.sort_values() + idx2 = idx2.sort_values() + + assert idx1.union(idx2).dtype == np.dtype("O") + assert idx2.union(idx1).dtype == np.dtype("O") + + +@pytest.mark.parametrize("idx_fact1,idx_fact2", COMPATIBLE_INCONSISTENT_PAIRS.values()) +def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2): + # GH 23525 + idx1 = idx_fact1(10) + idx2 = idx_fact2(20) + + res1 = idx1.union(idx2) + res2 = idx2.union(idx1) + + assert res1.dtype in (idx1.dtype, idx2.dtype) + assert res2.dtype in (idx1.dtype, idx2.dtype) + + +@pytest.mark.parametrize( + "left, right, expected", + [ + ("int64", "int64", "int64"), + ("int64", "uint64", "object"), + ("int64", "float64", "float64"), + ("uint64", "float64", "float64"), + ("uint64", "uint64", "uint64"), + ("float64", "float64", "float64"), + ("datetime64[ns]", "int64", "object"), + ("datetime64[ns]", "uint64", "object"), + ("datetime64[ns]", "float64", "object"), + ("datetime64[ns, CET]", "int64", "object"), + ("datetime64[ns, CET]", "uint64", "object"), + ("datetime64[ns, CET]", "float64", "object"), + ("Period[D]", "int64", "object"), + ("Period[D]", "uint64", "object"), + ("Period[D]", "float64", "object"), + ], +) +def test_union_dtypes(left, right, expected): + left = pandas_dtype(left) + right = pandas_dtype(right) + a = pd.Index([], dtype=left) + b = pd.Index([], dtype=right) + result = (a | b).dtype + assert result == expected diff --git a/pandas/tests/indexes/timedeltas/__init__.py b/pandas/tests/indexes/timedeltas/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexes/timedeltas/test_astype.py b/pandas/tests/indexes/timedeltas/test_astype.py new file mode 100644 index 00000000..82c9d995 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_astype.py @@ -0,0 +1,123 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Float64Index, + Index, + Int64Index, + NaT, + Timedelta, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +class TestTimedeltaIndex: + def test_astype_object(self): + idx = timedelta_range(start="1 days", periods=4, freq="D", name="idx") + expected_list = [ + Timedelta("1 days"), + Timedelta("2 days"), + Timedelta("3 days"), + Timedelta("4 days"), + ] + result = idx.astype(object) + expected = Index(expected_list, dtype=object, name="idx") + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + def test_astype_object_with_nat(self): + idx = TimedeltaIndex( + [timedelta(days=1), timedelta(days=2), NaT, timedelta(days=4)], name="idx" + ) + expected_list = [ + Timedelta("1 days"), + Timedelta("2 days"), + NaT, + Timedelta("4 days"), + ] + result = idx.astype(object) + expected = Index(expected_list, dtype=object, name="idx") + tm.assert_index_equal(result, expected) + assert idx.tolist() == expected_list + + def test_astype(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN]) + + result = idx.astype(object) + expected = Index([Timedelta("1 days 03:46:40")] + [NaT] * 3, dtype=object) + tm.assert_index_equal(result, expected) + + result = idx.astype(int) + expected = Int64Index( + [100000000000000] + [-9223372036854775808] * 3, dtype=np.int64 + ) + tm.assert_index_equal(result, expected) + + result = idx.astype(str) + expected = Index(str(x) for x in idx) + tm.assert_index_equal(result, expected) + + rng = timedelta_range("1 days", periods=10) + result = rng.astype("i8") + tm.assert_index_equal(result, Index(rng.asi8)) + tm.assert_numpy_array_equal(rng.asi8, result.values) + + def test_astype_uint(self): + arr = timedelta_range("1H", periods=2) + expected = pd.UInt64Index( + np.array([3600000000000, 90000000000000], dtype="uint64") + ) + + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) + + def test_astype_timedelta64(self): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN]) + + result = idx.astype("timedelta64") + expected = Float64Index([1e14] + [np.NaN] * 3, dtype="float64") + tm.assert_index_equal(result, expected) + + result = idx.astype("timedelta64[ns]") + tm.assert_index_equal(result, idx) + assert result is not idx + + result = idx.astype("timedelta64[ns]", copy=False) + tm.assert_index_equal(result, idx) + assert result is idx + + @pytest.mark.parametrize("dtype", [float, "datetime64", "datetime64[ns]"]) + def test_astype_raises(self, dtype): + # GH 13149, GH 13209 + idx = TimedeltaIndex([1e14, "NaT", NaT, np.NaN]) + msg = "Cannot cast TimedeltaArray to dtype" + with pytest.raises(TypeError, match=msg): + idx.astype(dtype) + + def test_astype_category(self): + obj = pd.timedelta_range("1H", periods=2, freq="H") + + result = obj.astype("category") + expected = pd.CategoricalIndex([pd.Timedelta("1H"), pd.Timedelta("2H")]) + tm.assert_index_equal(result, expected) + + result = obj._data.astype("category") + expected = expected.values + tm.assert_categorical_equal(result, expected) + + def test_astype_array_fallback(self): + obj = pd.timedelta_range("1H", periods=2) + result = obj.astype(bool) + expected = pd.Index(np.array([True, True])) + tm.assert_index_equal(result, expected) + + result = obj._data.astype(bool) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py new file mode 100644 index 00000000..39abbf59 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -0,0 +1,212 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import Timedelta, TimedeltaIndex, timedelta_range, to_timedelta +import pandas._testing as tm +from pandas.core.arrays import TimedeltaArray + + +class TestTimedeltaIndex: + def test_int64_nocopy(self): + # GH#23539 check that a copy isn't made when we pass int64 data + # and copy=False + arr = np.arange(10, dtype=np.int64) + tdi = TimedeltaIndex(arr, copy=False) + assert tdi._data._data.base is arr + + def test_infer_from_tdi(self): + # GH#23539 + # fast-path for inferring a frequency if the passed data already + # has one + tdi = pd.timedelta_range("1 second", periods=10 ** 7, freq="1s") + + result = pd.TimedeltaIndex(tdi, freq="infer") + assert result.freq == tdi.freq + + # check that inferred_freq was not called by checking that the + # value has not been cached + assert "inferred_freq" not in getattr(result, "_cache", {}) + + def test_infer_from_tdi_mismatch(self): + # GH#23539 + # fast-path for invalidating a frequency if the passed data already + # has one and it does not match the `freq` input + tdi = pd.timedelta_range("1 second", periods=100, freq="1s") + + msg = ( + "Inferred frequency .* from passed values does " + "not conform to passed frequency" + ) + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(tdi, freq="D") + + with pytest.raises(ValueError, match=msg): + # GH#23789 + TimedeltaArray(tdi, freq="D") + + def test_dt64_data_invalid(self): + # GH#23539 + # passing tz-aware DatetimeIndex raises, naive or ndarray[datetime64] + # raise as of GH#29794 + dti = pd.date_range("2016-01-01", periods=3) + + msg = "cannot be converted to timedelta64" + with pytest.raises(TypeError, match=msg): + TimedeltaIndex(dti.tz_localize("Europe/Brussels")) + + with pytest.raises(TypeError, match=msg): + TimedeltaIndex(dti) + + with pytest.raises(TypeError, match=msg): + TimedeltaIndex(np.asarray(dti)) + + def test_float64_ns_rounded(self): + # GH#23539 without specifying a unit, floats are regarded as nanos, + # and fractional portions are truncated + tdi = TimedeltaIndex([2.3, 9.7]) + expected = TimedeltaIndex([2, 9]) + tm.assert_index_equal(tdi, expected) + + # integral floats are non-lossy + tdi = TimedeltaIndex([2.0, 9.0]) + expected = TimedeltaIndex([2, 9]) + tm.assert_index_equal(tdi, expected) + + # NaNs get converted to NaT + tdi = TimedeltaIndex([2.0, np.nan]) + expected = TimedeltaIndex([pd.Timedelta(nanoseconds=2), pd.NaT]) + tm.assert_index_equal(tdi, expected) + + def test_float64_unit_conversion(self): + # GH#23539 + tdi = TimedeltaIndex([1.5, 2.25], unit="D") + expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)]) + tm.assert_index_equal(tdi, expected) + + def test_construction_base_constructor(self): + arr = [pd.Timedelta("1 days"), pd.NaT, pd.Timedelta("3 days")] + tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), pd.TimedeltaIndex(np.array(arr))) + + arr = [np.nan, pd.NaT, pd.Timedelta("1 days")] + tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) + tm.assert_index_equal(pd.Index(np.array(arr)), pd.TimedeltaIndex(np.array(arr))) + + def test_constructor(self): + expected = TimedeltaIndex( + [ + "1 days", + "1 days 00:00:05", + "2 days", + "2 days 00:00:02", + "0 days 00:00:03", + ] + ) + result = TimedeltaIndex( + [ + "1 days", + "1 days, 00:00:05", + np.timedelta64(2, "D"), + timedelta(days=2, seconds=2), + pd.offsets.Second(3), + ] + ) + tm.assert_index_equal(result, expected) + + # unicode + result = TimedeltaIndex( + [ + "1 days", + "1 days, 00:00:05", + np.timedelta64(2, "D"), + timedelta(days=2, seconds=2), + pd.offsets.Second(3), + ] + ) + + expected = TimedeltaIndex( + ["0 days 00:00:00", "0 days 00:00:01", "0 days 00:00:02"] + ) + tm.assert_index_equal(TimedeltaIndex(range(3), unit="s"), expected) + expected = TimedeltaIndex( + ["0 days 00:00:00", "0 days 00:00:05", "0 days 00:00:09"] + ) + tm.assert_index_equal(TimedeltaIndex([0, 5, 9], unit="s"), expected) + expected = TimedeltaIndex( + ["0 days 00:00:00.400", "0 days 00:00:00.450", "0 days 00:00:01.200"] + ) + tm.assert_index_equal(TimedeltaIndex([400, 450, 1200], unit="ms"), expected) + + def test_constructor_iso(self): + # GH #21877 + expected = timedelta_range("1s", periods=9, freq="s") + durations = ["P0DT0H0M{}S".format(i) for i in range(1, 10)] + result = to_timedelta(durations) + tm.assert_index_equal(result, expected) + + def test_constructor_coverage(self): + rng = timedelta_range("1 days", periods=10.5) + exp = timedelta_range("1 days", periods=10) + tm.assert_index_equal(rng, exp) + + msg = "periods must be a number, got foo" + with pytest.raises(TypeError, match=msg): + timedelta_range(start="1 days", periods="foo", freq="D") + + with pytest.raises(TypeError): + TimedeltaIndex("1 days") + + # generator expression + gen = (timedelta(i) for i in range(10)) + result = TimedeltaIndex(gen) + expected = TimedeltaIndex([timedelta(i) for i in range(10)]) + tm.assert_index_equal(result, expected) + + # NumPy string array + strings = np.array(["1 days", "2 days", "3 days"]) + result = TimedeltaIndex(strings) + expected = to_timedelta([1, 2, 3], unit="d") + tm.assert_index_equal(result, expected) + + from_ints = TimedeltaIndex(expected.asi8) + tm.assert_index_equal(from_ints, expected) + + # non-conforming freq + msg = ( + "Inferred frequency None from passed values does not conform to " + "passed frequency D" + ) + with pytest.raises(ValueError, match=msg): + TimedeltaIndex(["1 days", "2 days", "4 days"], freq="D") + + msg = ( + "Of the four parameters: start, end, periods, and freq, exactly " + "three must be specified" + ) + with pytest.raises(ValueError, match=msg): + timedelta_range(periods=10, freq="D") + + def test_constructor_name(self): + idx = timedelta_range(start="1 days", periods=1, freq="D", name="TEST") + assert idx.name == "TEST" + + # GH10025 + idx2 = TimedeltaIndex(idx, name="something else") + assert idx2.name == "something else" + + def test_constructor_no_precision_raises(self): + # GH-24753, GH-24739 + + msg = "with no precision is not allowed" + with pytest.raises(ValueError, match=msg): + pd.TimedeltaIndex(["2000"], dtype="timedelta64") + + with pytest.raises(ValueError, match=msg): + pd.Index(["2000"], dtype="timedelta64") + + def test_constructor_wrong_precision_raises(self): + with pytest.raises(ValueError): + pd.TimedeltaIndex(["2000"], dtype="timedelta64[us]") diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py new file mode 100644 index 00000000..1dfc5b53 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -0,0 +1,90 @@ +import pytest + +import pandas as pd +from pandas import TimedeltaIndex + + +class TestTimedeltaIndexRendering: + @pytest.mark.parametrize("method", ["__repr__", "__str__"]) + def test_representation(self, method): + idx1 = TimedeltaIndex([], freq="D") + idx2 = TimedeltaIndex(["1 days"], freq="D") + idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D") + idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") + idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) + + exp1 = "TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')" + + exp2 = "TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')" + + exp3 = "TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')" + + exp4 = ( + "TimedeltaIndex(['1 days', '2 days', '3 days'], " + "dtype='timedelta64[ns]', freq='D')" + ) + + exp5 = ( + "TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', " + "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)" + ) + + with pd.option_context("display.width", 300): + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5] + ): + result = getattr(idx, method)() + assert result == expected + + def test_representation_to_series(self): + idx1 = TimedeltaIndex([], freq="D") + idx2 = TimedeltaIndex(["1 days"], freq="D") + idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D") + idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") + idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) + + exp1 = """Series([], dtype: timedelta64[ns])""" + + exp2 = "0 1 days\ndtype: timedelta64[ns]" + + exp3 = "0 1 days\n1 2 days\ndtype: timedelta64[ns]" + + exp4 = "0 1 days\n1 2 days\n2 3 days\ndtype: timedelta64[ns]" + + exp5 = ( + "0 1 days 00:00:01\n" + "1 2 days 00:00:00\n" + "2 3 days 00:00:00\n" + "dtype: timedelta64[ns]" + ) + + with pd.option_context("display.width", 300): + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5] + ): + result = repr(pd.Series(idx)) + assert result == expected + + def test_summary(self): + # GH#9116 + idx1 = TimedeltaIndex([], freq="D") + idx2 = TimedeltaIndex(["1 days"], freq="D") + idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D") + idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") + idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) + + exp1 = "TimedeltaIndex: 0 entries\nFreq: D" + + exp2 = "TimedeltaIndex: 1 entries, 1 days to 1 days\nFreq: D" + + exp3 = "TimedeltaIndex: 2 entries, 1 days to 2 days\nFreq: D" + + exp4 = "TimedeltaIndex: 3 entries, 1 days to 3 days\nFreq: D" + + exp5 = "TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days 00:00:00" + + for idx, expected in zip( + [idx1, idx2, idx3, idx4, idx5], [exp1, exp2, exp3, exp4, exp5] + ): + result = idx._summary() + assert result == expected diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py new file mode 100644 index 00000000..e8665ee1 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -0,0 +1,384 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import Index, Timedelta, TimedeltaIndex, notna, timedelta_range +import pandas._testing as tm + + +class TestGetItem: + def test_ellipsis(self): + # GH#21282 + idx = timedelta_range("1 day", "31 day", freq="D", name="idx") + + result = idx[...] + assert result.equals(idx) + assert result is not idx + + def test_getitem(self): + idx1 = timedelta_range("1 day", "31 day", freq="D", name="idx") + + for idx in [idx1]: + result = idx[0] + assert result == Timedelta("1 day") + + result = idx[0:5] + expected = timedelta_range("1 day", "5 day", freq="D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[0:10:2] + expected = timedelta_range("1 day", "9 day", freq="2D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[-20:-5:3] + expected = timedelta_range("12 day", "24 day", freq="3D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx[4::-1] + expected = TimedeltaIndex( + ["5 day", "4 day", "3 day", "2 day", "1 day"], freq="-1D", name="idx" + ) + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + @pytest.mark.parametrize( + "key", + [pd.Timestamp("1970-01-01"), pd.Timestamp("1970-01-02"), datetime(1970, 1, 1)], + ) + def test_timestamp_invalid_key(self, key): + # GH#20464 + tdi = pd.timedelta_range(0, periods=10) + with pytest.raises(TypeError): + tdi.get_loc(key) + + +class TestWhere: + def test_where_invalid_dtypes(self): + tdi = timedelta_range("1 day", periods=3, freq="D", name="idx") + + i2 = tdi.copy() + i2 = Index([pd.NaT, pd.NaT] + tdi[2:].tolist()) + + with pytest.raises(TypeError, match="Where requires matching dtype"): + tdi.where(notna(i2), i2.asi8) + + with pytest.raises(TypeError, match="Where requires matching dtype"): + tdi.where(notna(i2), i2 + pd.Timestamp.now()) + + with pytest.raises(TypeError, match="Where requires matching dtype"): + tdi.where(notna(i2), (i2 + pd.Timestamp.now()).to_period("D")) + + +class TestTake: + def test_take(self): + # GH 10295 + idx1 = timedelta_range("1 day", "31 day", freq="D", name="idx") + + for idx in [idx1]: + result = idx.take([0]) + assert result == Timedelta("1 day") + + result = idx.take([-1]) + assert result == Timedelta("31 day") + + result = idx.take([0, 1, 2]) + expected = timedelta_range("1 day", "3 day", freq="D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([0, 2, 4]) + expected = timedelta_range("1 day", "5 day", freq="2D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([7, 4, 1]) + expected = timedelta_range("8 day", "2 day", freq="-3D", name="idx") + tm.assert_index_equal(result, expected) + assert result.freq == expected.freq + + result = idx.take([3, 2, 5]) + expected = TimedeltaIndex(["4 day", "3 day", "6 day"], name="idx") + tm.assert_index_equal(result, expected) + assert result.freq is None + + result = idx.take([-3, 2, 5]) + expected = TimedeltaIndex(["29 day", "3 day", "6 day"], name="idx") + tm.assert_index_equal(result, expected) + assert result.freq is None + + def test_take_invalid_kwargs(self): + idx = timedelta_range("1 day", "31 day", freq="D", name="idx") + indices = [1, 6, 5, 9, 10, 13, 15, 3] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + # TODO: This method came from test_timedelta; de-dup with version above + def test_take2(self): + tds = ["1day 02:00:00", "1 day 04:00:00", "1 day 10:00:00"] + idx = timedelta_range(start="1d", end="2d", freq="H", name="idx") + expected = TimedeltaIndex(tds, freq=None, name="idx") + + taken1 = idx.take([2, 4, 10]) + taken2 = idx[[2, 4, 10]] + + for taken in [taken1, taken2]: + tm.assert_index_equal(taken, expected) + assert isinstance(taken, TimedeltaIndex) + assert taken.freq is None + assert taken.name == expected.name + + def test_take_fill_value(self): + # GH 12631 + idx = TimedeltaIndex(["1 days", "2 days", "3 days"], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx") + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = TimedeltaIndex(["2 days", "1 days", "NaT"], name="xxx") + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx") + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + +class TestTimedeltaIndex: + def test_insert_empty(self): + # Corner case inserting with length zero doesnt raise IndexError + idx = timedelta_range("1 Day", periods=3) + td = idx[0] + + idx[:0].insert(0, td) + idx[:0].insert(1, td) + idx[:0].insert(-1, td) + + def test_insert(self): + + idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") + + result = idx.insert(2, timedelta(days=5)) + exp = TimedeltaIndex(["4day", "1day", "5day", "2day"], name="idx") + tm.assert_index_equal(result, exp) + + # insertion of non-datetime should coerce to object index + result = idx.insert(1, "inserted") + expected = Index( + [Timedelta("4day"), "inserted", Timedelta("1day"), Timedelta("2day")], + name="idx", + ) + assert not isinstance(result, TimedeltaIndex) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + idx = timedelta_range("1day 00:00:01", periods=3, freq="s", name="idx") + + # preserve freq + expected_0 = TimedeltaIndex( + ["1day", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"], + name="idx", + freq="s", + ) + expected_3 = TimedeltaIndex( + ["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:04"], + name="idx", + freq="s", + ) + + # reset freq to None + expected_1_nofreq = TimedeltaIndex( + ["1day 00:00:01", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"], + name="idx", + freq=None, + ) + expected_3_nofreq = TimedeltaIndex( + ["1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:05"], + name="idx", + freq=None, + ) + + cases = [ + (0, Timedelta("1day"), expected_0), + (-3, Timedelta("1day"), expected_0), + (3, Timedelta("1day 00:00:04"), expected_3), + (1, Timedelta("1day 00:00:01"), expected_1_nofreq), + (3, Timedelta("1day 00:00:05"), expected_3_nofreq), + ] + + for n, d, expected in cases: + result = idx.insert(n, d) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + @pytest.mark.parametrize( + "null", [None, np.nan, np.timedelta64("NaT"), pd.NaT, pd.NA] + ) + def test_insert_nat(self, null): + # GH 18295 (test missing) + idx = timedelta_range("1day", "3day") + result = idx.insert(1, null) + expected = TimedeltaIndex(["1day", pd.NaT, "2day", "3day"]) + tm.assert_index_equal(result, expected) + + def test_insert_invalid_na(self): + idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") + with pytest.raises(TypeError, match="incompatible label"): + idx.insert(0, np.datetime64("NaT")) + + def test_insert_dont_cast_strings(self): + # To match DatetimeIndex and PeriodIndex behavior, dont try to + # parse strings to Timedelta + idx = timedelta_range("1day", "3day") + + result = idx.insert(0, "1 Day") + assert result.dtype == object + assert result[0] == "1 Day" + + def test_delete(self): + idx = timedelta_range(start="1 Days", periods=5, freq="D", name="idx") + + # preserve freq + expected_0 = timedelta_range(start="2 Days", periods=4, freq="D", name="idx") + expected_4 = timedelta_range(start="1 Days", periods=4, freq="D", name="idx") + + # reset freq to None + expected_1 = TimedeltaIndex( + ["1 day", "3 day", "4 day", "5 day"], freq=None, name="idx" + ) + + cases = { + 0: expected_0, + -5: expected_0, + -1: expected_4, + 4: expected_4, + 1: expected_1, + } + for n, expected in cases.items(): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + with pytest.raises((IndexError, ValueError)): + # either depending on numpy version + idx.delete(5) + + def test_delete_slice(self): + idx = timedelta_range(start="1 days", periods=10, freq="D", name="idx") + + # preserve freq + expected_0_2 = timedelta_range(start="4 days", periods=7, freq="D", name="idx") + expected_7_9 = timedelta_range(start="1 days", periods=7, freq="D", name="idx") + + # reset freq to None + expected_3_5 = TimedeltaIndex( + ["1 d", "2 d", "3 d", "7 d", "8 d", "9 d", "10d"], freq=None, name="idx" + ) + + cases = { + (0, 1, 2): expected_0_2, + (7, 8, 9): expected_7_9, + (3, 4, 5): expected_3_5, + } + for n, expected in cases.items(): + result = idx.delete(n) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + result = idx.delete(slice(n[0], n[-1] + 1)) + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + def test_get_loc(self): + idx = pd.to_timedelta(["0 days", "1 days", "2 days"]) + + for method in [None, "pad", "backfill", "nearest"]: + assert idx.get_loc(idx[1], method) == 1 + assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1 + assert idx.get_loc(str(idx[1]), method) == 1 + + assert idx.get_loc(idx[1], "pad", tolerance=Timedelta(0)) == 1 + assert idx.get_loc(idx[1], "pad", tolerance=np.timedelta64(0, "s")) == 1 + assert idx.get_loc(idx[1], "pad", tolerance=timedelta(0)) == 1 + + with pytest.raises(ValueError, match="unit abbreviation w/o a number"): + idx.get_loc(idx[1], method="nearest", tolerance="foo") + + with pytest.raises(ValueError, match="tolerance size must match"): + idx.get_loc( + idx[1], + method="nearest", + tolerance=[ + Timedelta(0).to_timedelta64(), + Timedelta(0).to_timedelta64(), + ], + ) + + for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: + assert idx.get_loc("1 day 1 hour", method) == loc + + # GH 16909 + assert idx.get_loc(idx[1].to_timedelta64()) == 1 + + # GH 16896 + assert idx.get_loc("0 days") == 0 + + def test_get_loc_nat(self): + tidx = TimedeltaIndex(["1 days 01:00:00", "NaT", "2 days 01:00:00"]) + + assert tidx.get_loc(pd.NaT) == 1 + assert tidx.get_loc(None) == 1 + assert tidx.get_loc(float("nan")) == 1 + assert tidx.get_loc(np.nan) == 1 + + def test_get_indexer(self): + idx = pd.to_timedelta(["0 days", "1 days", "2 days"]) + tm.assert_numpy_array_equal( + idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp) + ) + + target = pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp) + ) + tm.assert_numpy_array_equal( + idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp) + ) + + res = idx.get_indexer(target, "nearest", tolerance=Timedelta("1 hour")) + tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp)) diff --git a/pandas/tests/indexes/timedeltas/test_join.py b/pandas/tests/indexes/timedeltas/test_join.py new file mode 100644 index 00000000..aaf4ef29 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_join.py @@ -0,0 +1,49 @@ +import numpy as np + +from pandas import Index, Timedelta, timedelta_range +import pandas._testing as tm + + +class TestJoin: + def test_append_join_nondatetimeindex(self): + rng = timedelta_range("1 days", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.append(idx) + assert isinstance(result[0], Timedelta) + + # it works + rng.join(idx, how="outer") + + def test_join_self(self, join_type): + index = timedelta_range("1 day", periods=10) + joined = index.join(index, how=join_type) + tm.assert_index_equal(index, joined) + + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args, **kwargs: np.random.randn(), + r_idx_type="i", + c_idx_type="td", + ) + str(df) + + cols = df.columns.join(df.index, how="outer") + joined = cols.join(df.columns) + assert cols.dtype == np.dtype("O") + assert cols.dtype == joined.dtype + tm.assert_index_equal(cols, joined) + + def test_join_preserves_freq(self): + # GH#32157 + tdi = timedelta_range("1 day", periods=10) + result = tdi[:5].join(tdi[5:], how="outer") + assert result.freq == tdi.freq + tm.assert_index_equal(result, tdi) + + result = tdi[:5].join(tdi[6:], how="outer") + assert result.freq is None + expected = tdi.delete(5) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py new file mode 100644 index 00000000..25f27da7 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -0,0 +1,316 @@ +from datetime import timedelta + +import numpy as np +import pytest + +from pandas.core.dtypes.generic import ABCDateOffset + +import pandas as pd +from pandas import Series, TimedeltaIndex, timedelta_range +import pandas._testing as tm +from pandas.tests.base.test_ops import Ops + +from pandas.tseries.offsets import Day, Hour + + +class TestTimedeltaIndexOps(Ops): + def setup_method(self, method): + super().setup_method(method) + mask = lambda x: isinstance(x, TimedeltaIndex) + self.is_valid_objs = [o for o in self.objs if mask(o)] + self.not_valid_objs = [] + + def test_ops_properties(self): + f = lambda x: isinstance(x, TimedeltaIndex) + self.check_ops_properties(TimedeltaIndex._field_ops, f) + self.check_ops_properties(TimedeltaIndex._object_ops, f) + + def test_value_counts_unique(self): + # GH 7735 + + idx = timedelta_range("1 days 09:00:00", freq="H", periods=10) + # create repeated values, 'n'th element is repeated by n+1 times + idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) + + exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10) + expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + expected = timedelta_range("1 days 09:00:00", freq="H", periods=10) + tm.assert_index_equal(idx.unique(), expected) + + idx = TimedeltaIndex( + [ + "1 days 09:00:00", + "1 days 09:00:00", + "1 days 09:00:00", + "1 days 08:00:00", + "1 days 08:00:00", + pd.NaT, + ] + ) + + exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"]) + expected = Series([3, 2], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(), expected) + + exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", pd.NaT]) + expected = Series([3, 2, 1], index=exp_idx) + + for obj in [idx, Series(idx)]: + tm.assert_series_equal(obj.value_counts(dropna=False), expected) + + tm.assert_index_equal(idx.unique(), exp_idx) + + def test_nonunique_contains(self): + # GH 9512 + for idx in map( + TimedeltaIndex, + ( + [0, 1, 0], + [0, 0, -1], + [0, -1, -1], + ["00:01:00", "00:01:00", "00:02:00"], + ["00:01:00", "00:01:00", "00:00:01"], + ), + ): + assert idx[0] in idx + + def test_unknown_attribute(self): + # see gh-9680 + tdi = pd.timedelta_range(start=0, periods=10, freq="1s") + ts = pd.Series(np.random.normal(size=10), index=tdi) + assert "foo" not in ts.__dict__.keys() + msg = "'Series' object has no attribute 'foo'" + with pytest.raises(AttributeError, match=msg): + ts.foo + + def test_order(self): + # GH 10295 + idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx") + idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx") + + for idx in [idx1, idx2]: + ordered = idx.sort_values() + tm.assert_index_equal(ordered, idx) + assert ordered.freq == idx.freq + + ordered = idx.sort_values(ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + assert ordered.freq == expected.freq + assert ordered.freq.n == -1 + + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, idx) + tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) + assert ordered.freq == idx.freq + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + tm.assert_index_equal(ordered, idx[::-1]) + assert ordered.freq == expected.freq + assert ordered.freq.n == -1 + + idx1 = TimedeltaIndex( + ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1" + ) + exp1 = TimedeltaIndex( + ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1" + ) + + idx2 = TimedeltaIndex( + ["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2" + ) + + # TODO(wesm): unused? + # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day', + # '3 day', '5 day'], name='idx2') + + # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute', + # '2 minute', pd.NaT], name='idx3') + # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute', + # '5 minute'], name='idx3') + + for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: + ordered = idx.sort_values() + tm.assert_index_equal(ordered, expected) + assert ordered.freq is None + + ordered = idx.sort_values(ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + assert ordered.freq is None + + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2]) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + assert ordered.freq is None + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 4, 0]) + tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) + assert ordered.freq is None + + def test_drop_duplicates_metadata(self): + # GH 10115 + idx = pd.timedelta_range("1 day", "31 day", freq="D", name="idx") + result = idx.drop_duplicates() + tm.assert_index_equal(idx, result) + assert idx.freq == result.freq + + idx_dup = idx.append(idx) + assert idx_dup.freq is None # freq is reset + result = idx_dup.drop_duplicates() + tm.assert_index_equal(idx, result) + assert result.freq is None + + def test_drop_duplicates(self): + # to check Index/Series compat + base = pd.timedelta_range("1 day", "31 day", freq="D", name="idx") + idx = base.append(base[:5]) + + res = idx.drop_duplicates() + tm.assert_index_equal(res, base) + res = Series(idx).drop_duplicates() + tm.assert_series_equal(res, Series(base)) + + res = idx.drop_duplicates(keep="last") + exp = base[5:].append(base[:5]) + tm.assert_index_equal(res, exp) + res = Series(idx).drop_duplicates(keep="last") + tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36))) + + res = idx.drop_duplicates(keep=False) + tm.assert_index_equal(res, base[5:]) + res = Series(idx).drop_duplicates(keep=False) + tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31))) + + @pytest.mark.parametrize( + "freq", ["D", "3D", "-3D", "H", "2H", "-2H", "T", "2T", "S", "-3S"] + ) + def test_infer_freq(self, freq): + # GH#11018 + idx = pd.timedelta_range("1", freq=freq, periods=10) + result = pd.TimedeltaIndex(idx.asi8, freq="infer") + tm.assert_index_equal(idx, result) + assert result.freq == freq + + def test_shift(self): + pass # handled in test_arithmetic.py + + def test_repeat(self): + index = pd.timedelta_range("1 days", periods=2, freq="D") + exp = pd.TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"]) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = TimedeltaIndex(["1 days", "NaT", "3 days"]) + exp = TimedeltaIndex( + [ + "1 days", + "1 days", + "1 days", + "NaT", + "NaT", + "NaT", + "3 days", + "3 days", + "3 days", + ] + ) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + def test_nat(self): + assert pd.TimedeltaIndex._na_value is pd.NaT + assert pd.TimedeltaIndex([])._na_value is pd.NaT + + idx = pd.TimedeltaIndex(["1 days", "2 days"]) + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + assert idx.hasnans is False + tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) + + idx = pd.TimedeltaIndex(["1 days", "NaT"]) + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + assert idx.hasnans is True + tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) + + def test_equals(self): + # GH 13107 + idx = pd.TimedeltaIndex(["1 days", "2 days", "NaT"]) + assert idx.equals(idx) + assert idx.equals(idx.copy()) + assert idx.equals(idx.astype(object)) + assert idx.astype(object).equals(idx) + assert idx.astype(object).equals(idx.astype(object)) + assert not idx.equals(list(idx)) + assert not idx.equals(pd.Series(idx)) + + idx2 = pd.TimedeltaIndex(["2 days", "1 days", "NaT"]) + assert not idx.equals(idx2) + assert not idx.equals(idx2.copy()) + assert not idx.equals(idx2.astype(object)) + assert not idx.astype(object).equals(idx2) + assert not idx.astype(object).equals(idx2.astype(object)) + assert not idx.equals(list(idx2)) + assert not idx.equals(pd.Series(idx2)) + + # Check that we dont raise OverflowError on comparisons outside the + # implementation range + oob = pd.Index([timedelta(days=10 ** 6)] * 3, dtype=object) + assert not idx.equals(oob) + assert not idx2.equals(oob) + + # FIXME: oob.apply(np.timedelta64) incorrectly overflows + oob2 = pd.Index([np.timedelta64(x) for x in oob], dtype=object) + assert not idx.equals(oob2) + assert not idx2.equals(oob2) + + @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []]) + @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)]) + def test_freq_setter(self, values, freq): + # GH 20678 + idx = TimedeltaIndex(values) + + # can set to an offset, converting from string if necessary + idx._data.freq = freq + assert idx.freq == freq + assert isinstance(idx.freq, ABCDateOffset) + + # can reset to None + idx._data.freq = None + assert idx.freq is None + + def test_freq_setter_errors(self): + # GH 20678 + idx = TimedeltaIndex(["0 days", "2 days", "4 days"]) + + # setting with an incompatible freq + msg = ( + "Inferred frequency 2D from passed values does not conform to " + "passed frequency 5D" + ) + with pytest.raises(ValueError, match=msg): + idx._data.freq = "5D" + + # setting with a non-fixed frequency + msg = r"<2 \* BusinessDays> is a non-fixed frequency" + with pytest.raises(ValueError, match=msg): + idx._data.freq = "2B" + + # setting with non-freq string + with pytest.raises(ValueError, match="Invalid frequency"): + idx._data.freq = "foo" diff --git a/pandas/tests/indexes/timedeltas/test_partial_slicing.py b/pandas/tests/indexes/timedeltas/test_partial_slicing.py new file mode 100644 index 00000000..29e2c7dd --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_partial_slicing.py @@ -0,0 +1,90 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Series, Timedelta, timedelta_range +import pandas._testing as tm + + +class TestSlicing: + def test_slice_keeps_name(self): + # GH4226 + dr = pd.timedelta_range("1d", "5d", freq="H", name="timebucket") + assert dr[1:].name == dr.name + + def test_partial_slice(self): + rng = timedelta_range("1 day 10:11:12", freq="h", periods=500) + s = Series(np.arange(len(rng)), index=rng) + + result = s["5 day":"6 day"] + expected = s.iloc[86:134] + tm.assert_series_equal(result, expected) + + result = s["5 day":] + expected = s.iloc[86:] + tm.assert_series_equal(result, expected) + + result = s[:"6 day"] + expected = s.iloc[:134] + tm.assert_series_equal(result, expected) + + result = s["6 days, 23:11:12"] + assert result == s.iloc[133] + + msg = r"^Timedelta\('50 days 00:00:00'\)$" + with pytest.raises(KeyError, match=msg): + s["50 days"] + + def test_partial_slice_high_reso(self): + + # higher reso + rng = timedelta_range("1 day 10:11:12", freq="us", periods=2000) + s = Series(np.arange(len(rng)), index=rng) + + result = s["1 day 10:11:12":] + expected = s.iloc[0:] + tm.assert_series_equal(result, expected) + + result = s["1 day 10:11:12.001":] + expected = s.iloc[1000:] + tm.assert_series_equal(result, expected) + + result = s["1 days, 10:11:12.001001"] + assert result == s.iloc[1001] + + def test_slice_with_negative_step(self): + ts = Series(np.arange(20), timedelta_range("0", periods=20, freq="H")) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + tm.assert_series_equal(ts[l_slc], ts.iloc[i_slc]) + tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + tm.assert_series_equal(ts.loc[l_slc], ts.iloc[i_slc]) + + assert_slices_equivalent(SLC[Timedelta(hours=7) :: -1], SLC[7::-1]) + assert_slices_equivalent(SLC["7 hours"::-1], SLC[7::-1]) + + assert_slices_equivalent(SLC[: Timedelta(hours=7) : -1], SLC[:6:-1]) + assert_slices_equivalent(SLC[:"7 hours":-1], SLC[:6:-1]) + + assert_slices_equivalent(SLC["15 hours":"7 hours":-1], SLC[15:6:-1]) + assert_slices_equivalent( + SLC[Timedelta(hours=15) : Timedelta(hours=7) : -1], SLC[15:6:-1] + ) + assert_slices_equivalent( + SLC["15 hours" : Timedelta(hours=7) : -1], SLC[15:6:-1] + ) + assert_slices_equivalent( + SLC[Timedelta(hours=15) : "7 hours" : -1], SLC[15:6:-1] + ) + + assert_slices_equivalent(SLC["7 hours":"15 hours":-1], SLC[:0]) + + def test_slice_with_zero_step_raises(self): + ts = Series(np.arange(20), timedelta_range("0", periods=20, freq="H")) + with pytest.raises(ValueError, match="slice step cannot be zero"): + ts[::0] + with pytest.raises(ValueError, match="slice step cannot be zero"): + ts.loc[::0] + with pytest.raises(ValueError, match="slice step cannot be zero"): + ts.loc[::0] diff --git a/pandas/tests/indexes/timedeltas/test_scalar_compat.py b/pandas/tests/indexes/timedeltas/test_scalar_compat.py new file mode 100644 index 00000000..44f4a2ad --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_scalar_compat.py @@ -0,0 +1,71 @@ +""" +Tests for TimedeltaIndex methods behaving like their Timedelta counterparts +""" + +import numpy as np +import pytest + +import pandas as pd +from pandas import Index, Series, Timedelta, TimedeltaIndex, timedelta_range +import pandas._testing as tm + + +class TestVectorizedTimedelta: + def test_tdi_total_seconds(self): + # GH#10939 + # test index + rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s") + expt = [ + 1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456.0 / 1e9, + 1 * 86400 + 10 * 3600 + 11 * 60 + 13 + 100123456.0 / 1e9, + ] + tm.assert_almost_equal(rng.total_seconds(), Index(expt)) + + # test Series + ser = Series(rng) + s_expt = Series(expt, index=[0, 1]) + tm.assert_series_equal(ser.dt.total_seconds(), s_expt) + + # with nat + ser[1] = np.nan + s_expt = Series( + [1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456.0 / 1e9, np.nan], + index=[0, 1], + ) + tm.assert_series_equal(ser.dt.total_seconds(), s_expt) + + # with both nat + ser = Series([np.nan, np.nan], dtype="timedelta64[ns]") + tm.assert_series_equal( + ser.dt.total_seconds(), Series([np.nan, np.nan], index=[0, 1]) + ) + + def test_tdi_round(self): + td = pd.timedelta_range(start="16801 days", periods=5, freq="30Min") + elt = td[1] + + expected_rng = TimedeltaIndex( + [ + Timedelta("16801 days 00:00:00"), + Timedelta("16801 days 00:00:00"), + Timedelta("16801 days 01:00:00"), + Timedelta("16801 days 02:00:00"), + Timedelta("16801 days 02:00:00"), + ] + ) + expected_elt = expected_rng[1] + + tm.assert_index_equal(td.round(freq="H"), expected_rng) + assert elt.round(freq="H") == expected_elt + + msg = pd._libs.tslibs.frequencies.INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + td.round(freq="foo") + with pytest.raises(ValueError, match=msg): + elt.round(freq="foo") + + msg = " is a non-fixed frequency" + with pytest.raises(ValueError, match=msg): + td.round(freq="M") + with pytest.raises(ValueError, match=msg): + elt.round(freq="M") diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py new file mode 100644 index 00000000..0aa784cb --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -0,0 +1,260 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Int64Index, TimedeltaIndex, timedelta_range +import pandas._testing as tm + +from pandas.tseries.offsets import Hour + + +class TestTimedeltaIndex: + def test_union(self): + + i1 = timedelta_range("1day", periods=5) + i2 = timedelta_range("3day", periods=5) + result = i1.union(i2) + expected = timedelta_range("1day", periods=7) + tm.assert_index_equal(result, expected) + + i1 = Int64Index(np.arange(0, 20, 2)) + i2 = timedelta_range(start="1 day", periods=10, freq="D") + i1.union(i2) # Works + i2.union(i1) # Fails with "AttributeError: can't set attribute" + + def test_union_sort_false(self): + tdi = timedelta_range("1day", periods=5) + + left = tdi[3:] + right = tdi[:3] + + # Check that we are testing the desired code path + assert left._can_fast_union(right) + + result = left.union(right) + tm.assert_index_equal(result, tdi) + + result = left.union(right, sort=False) + expected = pd.TimedeltaIndex(["4 Days", "5 Days", "1 Days", "2 Day", "3 Days"]) + tm.assert_index_equal(result, expected) + + def test_union_coverage(self): + + idx = TimedeltaIndex(["3d", "1d", "2d"]) + ordered = TimedeltaIndex(idx.sort_values(), freq="infer") + result = ordered.union(idx) + tm.assert_index_equal(result, ordered) + + result = ordered[:0].union(ordered) + tm.assert_index_equal(result, ordered) + assert result.freq == ordered.freq + + def test_union_bug_1730(self): + + rng_a = timedelta_range("1 day", periods=4, freq="3H") + rng_b = timedelta_range("1 day", periods=4, freq="4H") + + result = rng_a.union(rng_b) + exp = TimedeltaIndex(sorted(set(rng_a) | set(rng_b))) + tm.assert_index_equal(result, exp) + + def test_union_bug_1745(self): + + left = TimedeltaIndex(["1 day 15:19:49.695000"]) + right = TimedeltaIndex( + ["2 day 13:04:21.322000", "1 day 15:27:24.873000", "1 day 15:31:05.350000"] + ) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(left) | set(right))) + tm.assert_index_equal(result, exp) + + def test_union_bug_4564(self): + + left = timedelta_range("1 day", "30d") + right = left + pd.offsets.Minute(15) + + result = left.union(right) + exp = TimedeltaIndex(sorted(set(left) | set(right))) + tm.assert_index_equal(result, exp) + + def test_union_freq_infer(self): + # When taking the union of two TimedeltaIndexes, we infer + # a freq even if the arguments don't have freq. This matches + # DatetimeIndex behavior. + tdi = pd.timedelta_range("1 Day", periods=5) + left = tdi[[0, 1, 3, 4]] + right = tdi[[2, 3, 1]] + + assert left.freq is None + assert right.freq is None + + result = left.union(right) + tm.assert_index_equal(result, tdi) + assert result.freq == "D" + + def test_intersection_bug_1708(self): + index_1 = timedelta_range("1 day", periods=4, freq="h") + index_2 = index_1 + pd.offsets.Hour(5) + + result = index_1 & index_2 + assert len(result) == 0 + + index_1 = timedelta_range("1 day", periods=4, freq="h") + index_2 = index_1 + pd.offsets.Hour(1) + + result = index_1 & index_2 + expected = timedelta_range("1 day 01:00:00", periods=3, freq="h") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_equal(self, sort): + # GH 24471 Test intersection outcome given the sort keyword + # for equal indicies intersection should return the original index + first = timedelta_range("1 day", periods=4, freq="h") + second = timedelta_range("1 day", periods=4, freq="h") + intersect = first.intersection(second, sort=sort) + if sort is None: + tm.assert_index_equal(intersect, second.sort_values()) + assert tm.equalContents(intersect, second) + + # Corner cases + inter = first.intersection(first, sort=sort) + assert inter is first + + @pytest.mark.parametrize("period_1, period_2", [(0, 4), (4, 0)]) + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_zero_length(self, period_1, period_2, sort): + # GH 24471 test for non overlap the intersection should be zero length + index_1 = timedelta_range("1 day", periods=period_1, freq="h") + index_2 = timedelta_range("1 day", periods=period_2, freq="h") + expected = timedelta_range("1 day", periods=0, freq="h") + result = index_1.intersection(index_2, sort=sort) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_zero_length_input_index(self, sort): + # GH 24966 test for 0-len intersections are copied + index_1 = timedelta_range("1 day", periods=0, freq="h") + index_2 = timedelta_range("1 day", periods=3, freq="h") + result = index_1.intersection(index_2, sort=sort) + assert index_1 is not result + assert index_2 is not result + tm.assert_copy(result, index_1) + + @pytest.mark.parametrize( + "rng, expected", + # if target has the same name, it is preserved + [ + ( + timedelta_range("1 day", periods=5, freq="h", name="idx"), + timedelta_range("1 day", periods=4, freq="h", name="idx"), + ), + # if target name is different, it will be reset + ( + timedelta_range("1 day", periods=5, freq="h", name="other"), + timedelta_range("1 day", periods=4, freq="h", name=None), + ), + # if no overlap exists return empty index + ( + timedelta_range("1 day", periods=10, freq="h", name="idx")[5:], + TimedeltaIndex([], name="idx"), + ), + ], + ) + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection(self, rng, expected, sort): + # GH 4690 (with tz) + base = timedelta_range("1 day", periods=4, freq="h", name="idx") + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name + assert result.freq == expected.freq + + @pytest.mark.parametrize( + "rng, expected", + # part intersection works + [ + ( + TimedeltaIndex(["5 hour", "2 hour", "4 hour", "9 hour"], name="idx"), + TimedeltaIndex(["2 hour", "4 hour"], name="idx"), + ), + # reordered part intersection + ( + TimedeltaIndex(["2 hour", "5 hour", "5 hour", "1 hour"], name="other"), + TimedeltaIndex(["1 hour", "2 hour"], name=None), + ), + # reveresed index + ( + TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx")[ + ::-1 + ], + TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx"), + ), + ], + ) + @pytest.mark.parametrize("sort", [None, False]) + def test_intersection_non_monotonic(self, rng, expected, sort): + # 24471 non-monotonic + base = TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx") + result = base.intersection(rng, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + assert result.name == expected.name + + # if reveresed order, frequency is still the same + if all(base == rng[::-1]) and sort is None: + assert isinstance(result.freq, Hour) + else: + assert result.freq is None + + +class TestTimedeltaIndexDifference: + @pytest.mark.parametrize("sort", [None, False]) + def test_difference_freq(self, sort): + # GH14323: Difference of TimedeltaIndex should not preserve frequency + + index = timedelta_range("0 days", "5 days", freq="D") + + other = timedelta_range("1 days", "4 days", freq="D") + expected = TimedeltaIndex(["0 days", "5 days"], freq=None) + idx_diff = index.difference(other, sort) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + other = timedelta_range("2 days", "5 days", freq="D") + idx_diff = index.difference(other, sort) + expected = TimedeltaIndex(["0 days", "1 days"], freq=None) + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + @pytest.mark.parametrize("sort", [None, False]) + def test_difference_sort(self, sort): + + index = pd.TimedeltaIndex( + ["5 days", "3 days", "2 days", "4 days", "1 days", "0 days"] + ) + + other = timedelta_range("1 days", "4 days", freq="D") + idx_diff = index.difference(other, sort) + + expected = TimedeltaIndex(["5 days", "0 days"], freq=None) + + if sort is None: + expected = expected.sort_values() + + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) + + other = timedelta_range("2 days", "5 days", freq="D") + idx_diff = index.difference(other, sort) + expected = TimedeltaIndex(["1 days", "0 days"], freq=None) + + if sort is None: + expected = expected.sort_values() + + tm.assert_index_equal(idx_diff, expected) + tm.assert_attr_equal("freq", idx_diff, expected) diff --git a/pandas/tests/indexes/timedeltas/test_shift.py b/pandas/tests/indexes/timedeltas/test_shift.py new file mode 100644 index 00000000..98933ff0 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_shift.py @@ -0,0 +1,75 @@ +import pytest + +from pandas.errors import NullFrequencyError + +import pandas as pd +from pandas import TimedeltaIndex +import pandas._testing as tm + + +class TestTimedeltaIndexShift: + + # ------------------------------------------------------------- + # TimedeltaIndex.shift is used by __add__/__sub__ + + def test_tdi_shift_empty(self): + # GH#9903 + idx = pd.TimedeltaIndex([], name="xxx") + tm.assert_index_equal(idx.shift(0, freq="H"), idx) + tm.assert_index_equal(idx.shift(3, freq="H"), idx) + + def test_tdi_shift_hours(self): + # GH#9903 + idx = pd.TimedeltaIndex(["5 hours", "6 hours", "9 hours"], name="xxx") + tm.assert_index_equal(idx.shift(0, freq="H"), idx) + exp = pd.TimedeltaIndex(["8 hours", "9 hours", "12 hours"], name="xxx") + tm.assert_index_equal(idx.shift(3, freq="H"), exp) + exp = pd.TimedeltaIndex(["2 hours", "3 hours", "6 hours"], name="xxx") + tm.assert_index_equal(idx.shift(-3, freq="H"), exp) + + def test_tdi_shift_minutes(self): + # GH#9903 + idx = pd.TimedeltaIndex(["5 hours", "6 hours", "9 hours"], name="xxx") + tm.assert_index_equal(idx.shift(0, freq="T"), idx) + exp = pd.TimedeltaIndex(["05:03:00", "06:03:00", "9:03:00"], name="xxx") + tm.assert_index_equal(idx.shift(3, freq="T"), exp) + exp = pd.TimedeltaIndex(["04:57:00", "05:57:00", "8:57:00"], name="xxx") + tm.assert_index_equal(idx.shift(-3, freq="T"), exp) + + def test_tdi_shift_int(self): + # GH#8083 + trange = pd.to_timedelta(range(5), unit="d") + pd.offsets.Hour(1) + result = trange.shift(1) + expected = TimedeltaIndex( + [ + "1 days 01:00:00", + "2 days 01:00:00", + "3 days 01:00:00", + "4 days 01:00:00", + "5 days 01:00:00", + ], + freq="D", + ) + tm.assert_index_equal(result, expected) + + def test_tdi_shift_nonstandard_freq(self): + # GH#8083 + trange = pd.to_timedelta(range(5), unit="d") + pd.offsets.Hour(1) + result = trange.shift(3, freq="2D 1s") + expected = TimedeltaIndex( + [ + "6 days 01:00:03", + "7 days 01:00:03", + "8 days 01:00:03", + "9 days 01:00:03", + "10 days 01:00:03", + ], + freq="D", + ) + tm.assert_index_equal(result, expected) + + def test_shift_no_freq(self): + # GH#19147 + tdi = TimedeltaIndex(["1 days 01:00:00", "2 days 01:00:00"], freq=None) + with pytest.raises(NullFrequencyError): + tdi.shift(2) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py new file mode 100644 index 00000000..3b52b93f --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -0,0 +1,300 @@ +from datetime import timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + Int64Index, + Series, + Timedelta, + TimedeltaIndex, + date_range, + timedelta_range, +) +import pandas._testing as tm + +from ..datetimelike import DatetimeLike + +randn = np.random.randn + + +class TestTimedeltaIndex(DatetimeLike): + _holder = TimedeltaIndex + + @pytest.fixture + def indices(self): + return tm.makeTimedeltaIndex(10) + + def create_index(self): + return pd.to_timedelta(range(5), unit="d") + pd.offsets.Hour(1) + + def test_numeric_compat(self): + # Dummy method to override super's version; this test is now done + # in test_arithmetic.py + pass + + def test_shift(self): + pass # this is handled in test_arithmetic.py + + def test_pickle_compat_construction(self): + pass + + def test_fillna_timedelta(self): + # GH 11343 + idx = pd.TimedeltaIndex(["1 day", pd.NaT, "3 day"]) + + exp = pd.TimedeltaIndex(["1 day", "2 day", "3 day"]) + tm.assert_index_equal(idx.fillna(pd.Timedelta("2 day")), exp) + + exp = pd.TimedeltaIndex(["1 day", "3 hour", "3 day"]) + idx.fillna(pd.Timedelta("3 hour")) + + exp = pd.Index( + [pd.Timedelta("1 day"), "x", pd.Timedelta("3 day")], dtype=object + ) + tm.assert_index_equal(idx.fillna("x"), exp) + + def test_isin(self): + + index = tm.makeTimedeltaIndex(4) + result = index.isin(index) + assert result.all() + + result = index.isin(list(index)) + assert result.all() + + tm.assert_almost_equal( + index.isin([index[2], 5]), np.array([False, False, True, False]) + ) + + def test_factorize(self): + idx1 = TimedeltaIndex(["1 day", "1 day", "2 day", "2 day", "3 day", "3 day"]) + + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) + exp_idx = TimedeltaIndex(["1 day", "2 day", "3 day"]) + + arr, idx = idx1.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + arr, idx = idx1.factorize(sort=True) + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, exp_idx) + + # freq must be preserved + idx3 = timedelta_range("1 day", periods=4, freq="s") + exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) + arr, idx = idx3.factorize() + tm.assert_numpy_array_equal(arr, exp_arr) + tm.assert_index_equal(idx, idx3) + + def test_join_self(self, join_type): + index = timedelta_range("1 day", periods=10) + joined = index.join(index, how=join_type) + tm.assert_index_equal(index, joined) + + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args, **kwargs: randn(), + r_idx_type="i", + c_idx_type="td", + ) + str(df) + + cols = df.columns.join(df.index, how="outer") + joined = cols.join(df.columns) + assert cols.dtype == np.dtype("O") + assert cols.dtype == joined.dtype + tm.assert_index_equal(cols, joined) + + def test_sort_values(self): + + idx = TimedeltaIndex(["4d", "1d", "2d"]) + + ordered = idx.sort_values() + assert ordered.is_monotonic + + ordered = idx.sort_values(ascending=False) + assert ordered[::-1].is_monotonic + + ordered, dexer = idx.sort_values(return_indexer=True) + assert ordered.is_monotonic + + tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0]), check_dtype=False) + + ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) + assert ordered[::-1].is_monotonic + + tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1]), check_dtype=False) + + def test_argmin_argmax(self): + idx = TimedeltaIndex(["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]) + assert idx.argmin() == 1 + assert idx.argmax() == 0 + + def test_misc_coverage(self): + + rng = timedelta_range("1 day", periods=5) + result = rng.groupby(rng.days) + assert isinstance(list(result.values())[0][0], Timedelta) + + idx = TimedeltaIndex(["3d", "1d", "2d"]) + assert not idx.equals(list(idx)) + + non_td = Index(list("abc")) + assert not idx.equals(list(non_td)) + + def test_map(self): + # test_map_dictlike generally tests + + rng = timedelta_range("1 day", periods=10) + + f = lambda x: x.days + result = rng.map(f) + exp = Int64Index([f(x) for x in rng]) + tm.assert_index_equal(result, exp) + + def test_pass_TimedeltaIndex_to_index(self): + + rng = timedelta_range("1 days", "10 days") + idx = Index(rng, dtype=object) + + expected = Index(rng.to_pytimedelta(), dtype=object) + + tm.assert_numpy_array_equal(idx.values, expected.values) + + def test_pickle(self): + + rng = timedelta_range("1 days", periods=10) + rng_p = tm.round_trip_pickle(rng) + tm.assert_index_equal(rng, rng_p) + + def test_hash_error(self): + index = timedelta_range("1 days", periods=10) + with pytest.raises( + TypeError, match=(f"unhashable type: {repr(type(index).__name__)}") + ): + hash(index) + + def test_append_join_nondatetimeindex(self): + rng = timedelta_range("1 days", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.append(idx) + assert isinstance(result[0], Timedelta) + + # it works + rng.join(idx, how="outer") + + def test_append_numpy_bug_1681(self): + + td = timedelta_range("1 days", "10 days", freq="2D") + a = DataFrame() + c = DataFrame({"A": "foo", "B": td}, index=td) + str(c) + + result = a.append(c) + assert (result["B"] == td).all() + + def test_delete_doesnt_infer_freq(self): + # GH#30655 behavior matches DatetimeIndex + + tdi = pd.TimedeltaIndex(["1 Day", "2 Days", None, "3 Days", "4 Days"]) + result = tdi.delete(2) + assert result.freq is None + + def test_fields(self): + rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s") + tm.assert_index_equal(rng.days, Index([1, 1], dtype="int64")) + tm.assert_index_equal( + rng.seconds, + Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], dtype="int64"), + ) + tm.assert_index_equal( + rng.microseconds, Index([100 * 1000 + 123, 100 * 1000 + 123], dtype="int64") + ) + tm.assert_index_equal(rng.nanoseconds, Index([456, 456], dtype="int64")) + + msg = "'TimedeltaIndex' object has no attribute '{}'" + with pytest.raises(AttributeError, match=msg.format("hours")): + rng.hours + with pytest.raises(AttributeError, match=msg.format("minutes")): + rng.minutes + with pytest.raises(AttributeError, match=msg.format("milliseconds")): + rng.milliseconds + + # with nat + s = Series(rng) + s[1] = np.nan + + tm.assert_series_equal(s.dt.days, Series([1, np.nan], index=[0, 1])) + tm.assert_series_equal( + s.dt.seconds, Series([10 * 3600 + 11 * 60 + 12, np.nan], index=[0, 1]) + ) + + # preserve name (GH15589) + rng.name = "name" + assert rng.days.name == "name" + + def test_freq_conversion(self): + + # doc example + + # series + td = Series(date_range("20130101", periods=4)) - Series( + date_range("20121201", periods=4) + ) + td[2] += timedelta(minutes=5, seconds=3) + td[3] = np.nan + + result = td / np.timedelta64(1, "D") + expected = Series([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan]) + tm.assert_series_equal(result, expected) + + result = td.astype("timedelta64[D]") + expected = Series([31, 31, 31, np.nan]) + tm.assert_series_equal(result, expected) + + result = td / np.timedelta64(1, "s") + expected = Series([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, np.nan]) + tm.assert_series_equal(result, expected) + + result = td.astype("timedelta64[s]") + tm.assert_series_equal(result, expected) + + # tdi + td = TimedeltaIndex(td) + + result = td / np.timedelta64(1, "D") + expected = Index([31, 31, (31 * 86400 + 5 * 60 + 3) / 86400.0, np.nan]) + tm.assert_index_equal(result, expected) + + result = td.astype("timedelta64[D]") + expected = Index([31, 31, 31, np.nan]) + tm.assert_index_equal(result, expected) + + result = td / np.timedelta64(1, "s") + expected = Index([31 * 86400, 31 * 86400, 31 * 86400 + 5 * 60 + 3, np.nan]) + tm.assert_index_equal(result, expected) + + result = td.astype("timedelta64[s]") + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("unit", ["Y", "y", "M"]) + def test_unit_m_y_raises(self, unit): + msg = "Units 'M' and 'Y' are no longer supported" + with pytest.raises(ValueError, match=msg): + TimedeltaIndex([1, 3, 7], unit) + + +class TestTimeSeries: + def test_series_box_timedelta(self): + rng = timedelta_range("1 day 1 s", periods=5, freq="h") + s = Series(rng) + assert isinstance(s[1], Timedelta) + assert isinstance(s.iat[2], Timedelta) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py new file mode 100644 index 00000000..1cef9de6 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -0,0 +1,80 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import timedelta_range, to_timedelta +import pandas._testing as tm + +from pandas.tseries.offsets import Day, Second + + +class TestTimedeltas: + def test_timedelta_range(self): + + expected = to_timedelta(np.arange(5), unit="D") + result = timedelta_range("0 days", periods=5, freq="D") + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(11), unit="D") + result = timedelta_range("0 days", "10 days", freq="D") + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day() + result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D") + tm.assert_index_equal(result, expected) + + expected = to_timedelta([1, 3, 5, 7, 9], unit="D") + Second(2) + result = timedelta_range("1 days, 00:00:02", periods=5, freq="2D") + tm.assert_index_equal(result, expected) + + expected = to_timedelta(np.arange(50), unit="T") * 30 + result = timedelta_range("0 days", freq="30T", periods=50) + tm.assert_index_equal(result, expected) + + # GH 11776 + arr = np.arange(10).reshape(2, 5) + df = pd.DataFrame(np.arange(10).reshape(2, 5)) + for arg in (arr, df): + with pytest.raises(TypeError, match="1-d array"): + to_timedelta(arg) + for errors in ["ignore", "raise", "coerce"]: + with pytest.raises(TypeError, match="1-d array"): + to_timedelta(arg, errors=errors) + + # issue10583 + df = pd.DataFrame(np.random.normal(size=(10, 4))) + df.index = pd.timedelta_range(start="0s", periods=10, freq="s") + expected = df.loc[pd.Timedelta("0s") :, :] + result = df.loc["0s":, :] + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "periods, freq", [(3, "2D"), (5, "D"), (6, "19H12T"), (7, "16H"), (9, "12H")] + ) + def test_linspace_behavior(self, periods, freq): + # GH 20976 + result = timedelta_range(start="0 days", end="4 days", periods=periods) + expected = timedelta_range(start="0 days", end="4 days", freq=freq) + tm.assert_index_equal(result, expected) + + def test_errors(self): + # not enough params + msg = ( + "Of the four parameters: start, end, periods, and freq, " + "exactly three must be specified" + ) + with pytest.raises(ValueError, match=msg): + timedelta_range(start="0 days") + + with pytest.raises(ValueError, match=msg): + timedelta_range(end="5 days") + + with pytest.raises(ValueError, match=msg): + timedelta_range(periods=2) + + with pytest.raises(ValueError, match=msg): + timedelta_range() + + # too many params + with pytest.raises(ValueError, match=msg): + timedelta_range(start="0 days", end="5 days", periods=10, freq="H") diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py new file mode 100644 index 00000000..477fc092 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -0,0 +1,146 @@ +from datetime import time, timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import Series, TimedeltaIndex, isna, to_timedelta +import pandas._testing as tm + + +class TestTimedeltas: + def test_to_timedelta(self): + + result = to_timedelta(["", ""]) + assert isna(result).all() + + # pass thru + result = to_timedelta(np.array([np.timedelta64(1, "s")])) + expected = pd.Index(np.array([np.timedelta64(1, "s")])) + tm.assert_index_equal(result, expected) + + # Series + expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) + result = to_timedelta(Series(["1d", "1days 00:00:01"])) + tm.assert_series_equal(result, expected) + + # with units + result = TimedeltaIndex( + [np.timedelta64(0, "ns"), np.timedelta64(10, "s").astype("m8[ns]")] + ) + expected = to_timedelta([0, 10], unit="s") + tm.assert_index_equal(result, expected) + + # arrays of various dtypes + arr = np.array([1] * 5, dtype="int64") + result = to_timedelta(arr, unit="s") + expected = TimedeltaIndex([np.timedelta64(1, "s")] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype="int64") + result = to_timedelta(arr, unit="m") + expected = TimedeltaIndex([np.timedelta64(1, "m")] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype="int64") + result = to_timedelta(arr, unit="h") + expected = TimedeltaIndex([np.timedelta64(1, "h")] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype="timedelta64[s]") + result = to_timedelta(arr) + expected = TimedeltaIndex([np.timedelta64(1, "s")] * 5) + tm.assert_index_equal(result, expected) + + arr = np.array([1] * 5, dtype="timedelta64[D]") + result = to_timedelta(arr) + expected = TimedeltaIndex([np.timedelta64(1, "D")] * 5) + tm.assert_index_equal(result, expected) + + def test_to_timedelta_invalid(self): + + # bad value for errors parameter + msg = "errors must be one of" + with pytest.raises(ValueError, match=msg): + to_timedelta(["foo"], errors="never") + + # these will error + msg = "invalid unit abbreviation: foo" + with pytest.raises(ValueError, match=msg): + to_timedelta([1, 2], unit="foo") + with pytest.raises(ValueError, match=msg): + to_timedelta(1, unit="foo") + + # time not supported ATM + msg = ( + "Value must be Timedelta, string, integer, float, timedelta or convertible" + ) + with pytest.raises(ValueError, match=msg): + to_timedelta(time(second=1)) + assert to_timedelta(time(second=1), errors="coerce") is pd.NaT + + msg = "unit abbreviation w/o a number" + with pytest.raises(ValueError, match=msg): + to_timedelta(["foo", "bar"]) + tm.assert_index_equal( + TimedeltaIndex([pd.NaT, pd.NaT]), + to_timedelta(["foo", "bar"], errors="coerce"), + ) + + tm.assert_index_equal( + TimedeltaIndex(["1 day", pd.NaT, "1 min"]), + to_timedelta(["1 day", "bar", "1 min"], errors="coerce"), + ) + + # gh-13613: these should not error because errors='ignore' + invalid_data = "apple" + assert invalid_data == to_timedelta(invalid_data, errors="ignore") + + invalid_data = ["apple", "1 days"] + tm.assert_numpy_array_equal( + np.array(invalid_data, dtype=object), + to_timedelta(invalid_data, errors="ignore"), + ) + + invalid_data = pd.Index(["apple", "1 days"]) + tm.assert_index_equal(invalid_data, to_timedelta(invalid_data, errors="ignore")) + + invalid_data = Series(["apple", "1 days"]) + tm.assert_series_equal( + invalid_data, to_timedelta(invalid_data, errors="ignore") + ) + + def test_to_timedelta_via_apply(self): + # GH 5458 + expected = Series([np.timedelta64(1, "s")]) + result = Series(["00:00:01"]).apply(to_timedelta) + tm.assert_series_equal(result, expected) + + result = Series([to_timedelta("00:00:01")]) + tm.assert_series_equal(result, expected) + + def test_to_timedelta_on_missing_values(self): + # GH5438 + timedelta_NaT = np.timedelta64("NaT") + + actual = pd.to_timedelta(Series(["00:00:01", np.nan])) + expected = Series( + [np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype=" obj.ndim - 1: + return + + try: + rs = getattr(obj, method1).__getitem__(_axify(obj, key1, axis)) + + try: + xp = self.get_result(obj=obj, method=method2, key=key2, axis=axis) + except (KeyError, IndexError): + # TODO: why is this allowed? + return + + if is_scalar(rs) and is_scalar(xp): + assert rs == xp + else: + tm.assert_equal(rs, xp) + + except (IndexError, TypeError, KeyError) as detail: + + # if we are in fails, the ok, otherwise raise it + if fails is not None: + if isinstance(detail, fails): + result = f"ok ({type(detail).__name__})" + return + + result = type(detail).__name__ + raise AssertionError(result, detail) + + if typs is None: + typs = self._typs + + if axes is None: + axes = [0, 1] + elif not isinstance(axes, (tuple, list)): + assert isinstance(axes, int) + axes = [axes] + + # check + for kind in self._kinds: + + d = getattr(self, kind) + for ax in axes: + for typ in typs: + if typ not in self._typs: + continue + + obj = d[typ] + _eq(axis=ax, obj=obj, key1=key1, key2=key2) diff --git a/pandas/tests/indexing/conftest.py b/pandas/tests/indexing/conftest.py new file mode 100644 index 00000000..142bedaa --- /dev/null +++ b/pandas/tests/indexing/conftest.py @@ -0,0 +1,23 @@ +import numpy as np +import pytest + +from pandas._libs import index as libindex + + +@pytest.fixture( + params=[ + (libindex.Int64Engine, np.int64), + (libindex.Int32Engine, np.int32), + (libindex.Int16Engine, np.int16), + (libindex.Int8Engine, np.int8), + (libindex.UInt64Engine, np.uint64), + (libindex.UInt32Engine, np.uint32), + (libindex.UInt16Engine, np.uint16), + (libindex.UInt8Engine, np.uint8), + (libindex.Float64Engine, np.float64), + (libindex.Float32Engine, np.float32), + ], + ids=lambda x: x[0].__name__, +) +def numeric_indexing_engine_type_and_dtype(request): + return request.param diff --git a/pandas/tests/indexing/interval/__init__.py b/pandas/tests/indexing/interval/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py new file mode 100644 index 00000000..63402098 --- /dev/null +++ b/pandas/tests/indexing/interval/test_interval.py @@ -0,0 +1,149 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, IntervalIndex, Series +import pandas._testing as tm + + +class TestIntervalIndex: + def setup_method(self, method): + self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + + def test_getitem_with_scalar(self): + + s = self.s + + expected = s.iloc[:3] + tm.assert_series_equal(expected, s[:3]) + tm.assert_series_equal(expected, s[:2.5]) + tm.assert_series_equal(expected, s[0.1:2.5]) + + expected = s.iloc[1:4] + tm.assert_series_equal(expected, s[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, s[[2, 3, 4]]) + tm.assert_series_equal(expected, s[[1.5, 3, 4]]) + + expected = s.iloc[2:5] + tm.assert_series_equal(expected, s[s >= 2]) + + @pytest.mark.parametrize("direction", ["increasing", "decreasing"]) + def test_nonoverlapping_monotonic(self, direction, closed): + tpls = [(0, 1), (2, 3), (4, 5)] + if direction == "decreasing": + tpls = tpls[::-1] + + idx = IntervalIndex.from_tuples(tpls, closed=closed) + s = Series(list("abc"), idx) + + for key, expected in zip(idx.left, s): + if idx.closed_left: + assert s[key] == expected + assert s.loc[key] == expected + else: + with pytest.raises(KeyError, match=str(key)): + s[key] + with pytest.raises(KeyError, match=str(key)): + s.loc[key] + + for key, expected in zip(idx.right, s): + if idx.closed_right: + assert s[key] == expected + assert s.loc[key] == expected + else: + with pytest.raises(KeyError, match=str(key)): + s[key] + with pytest.raises(KeyError, match=str(key)): + s.loc[key] + + for key, expected in zip(idx.mid, s): + assert s[key] == expected + assert s.loc[key] == expected + + def test_non_matching(self): + s = self.s + + # this is a departure from our current + # indexing scheme, but simpler + with pytest.raises(KeyError, match="^$"): + s.loc[[-1, 3, 4, 5]] + + with pytest.raises(KeyError, match="^$"): + s.loc[[-1, 3]] + + def test_large_series(self): + s = Series( + np.arange(1000000), index=IntervalIndex.from_breaks(np.arange(1000001)) + ) + + result1 = s.loc[:80000] + result2 = s.loc[0:80000] + result3 = s.loc[0:80000:1] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + + def test_loc_getitem_frame(self): + + df = DataFrame({"A": range(10)}) + s = pd.cut(df.A, 5) + df["B"] = s + df = df.set_index("B") + + result = df.loc[4] + expected = df.iloc[4:6] + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError, match="10"): + df.loc[10] + + # single list-like + result = df.loc[[4]] + expected = df.iloc[4:6] + tm.assert_frame_equal(result, expected) + + # non-unique + result = df.loc[[4, 5]] + expected = df.take([4, 5, 4, 5]) + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError, match="^$"): + df.loc[[10]] + + # partial missing + with pytest.raises(KeyError, match="^$"): + df.loc[[10, 4]] + + +class TestIntervalIndexInsideMultiIndex: + def test_mi_intervalindex_slicing_with_scalar(self): + # GH#27456 + idx = pd.MultiIndex.from_arrays( + [ + pd.Index(["FC", "FC", "FC", "FC", "OWNER", "OWNER", "OWNER", "OWNER"]), + pd.Index( + ["RID1", "RID1", "RID2", "RID2", "RID1", "RID1", "RID2", "RID2"] + ), + pd.IntervalIndex.from_arrays( + [0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12] + ), + ] + ) + + idx.names = ["Item", "RID", "MP"] + df = pd.DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]}) + df.index = idx + query_df = pd.DataFrame( + { + "Item": ["FC", "OWNER", "FC", "OWNER", "OWNER"], + "RID": ["RID1", "RID1", "RID1", "RID2", "RID2"], + "MP": [0.2, 1.5, 1.6, 11.1, 10.9], + } + ) + + query_df = query_df.sort_index() + + idx = pd.MultiIndex.from_arrays([query_df.Item, query_df.RID, query_df.MP]) + query_df.index = idx + result = df.value.loc[query_df.index] + expected = pd.Series([1, 6, 2, 8, 7], index=idx, name="value") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/interval/test_interval_new.py b/pandas/tests/indexing/interval/test_interval_new.py new file mode 100644 index 00000000..43036fbb --- /dev/null +++ b/pandas/tests/indexing/interval/test_interval_new.py @@ -0,0 +1,248 @@ +import re + +import numpy as np +import pytest + +from pandas import Interval, IntervalIndex, Series +import pandas._testing as tm + + +class TestIntervalIndex: + def setup_method(self, method): + self.s = Series(np.arange(5), IntervalIndex.from_breaks(np.arange(6))) + + def test_loc_with_interval(self): + + # loc with single label / list of labels: + # - Intervals: only exact matches + # - scalars: those that contain it + + s = self.s + + expected = 0 + result = s.loc[Interval(0, 1)] + assert result == expected + result = s[Interval(0, 1)] + assert result == expected + + expected = s.iloc[3:5] + result = s.loc[[Interval(3, 4), Interval(4, 5)]] + tm.assert_series_equal(expected, result) + result = s[[Interval(3, 4), Interval(4, 5)]] + tm.assert_series_equal(expected, result) + + # missing or not exact + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")): + s.loc[Interval(3, 5, closed="left")] + + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='left')")): + s[Interval(3, 5, closed="left")] + + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): + s[Interval(3, 5)] + + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): + s.loc[Interval(3, 5)] + + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): + s[Interval(3, 5)] + + with pytest.raises( + KeyError, match=re.escape("Interval(-2, 0, closed='right')") + ): + s.loc[Interval(-2, 0)] + + with pytest.raises( + KeyError, match=re.escape("Interval(-2, 0, closed='right')") + ): + s[Interval(-2, 0)] + + with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")): + s.loc[Interval(5, 6)] + + with pytest.raises(KeyError, match=re.escape("Interval(5, 6, closed='right')")): + s[Interval(5, 6)] + + def test_loc_with_scalar(self): + + # loc with single label / list of labels: + # - Intervals: only exact matches + # - scalars: those that contain it + + s = self.s + + assert s.loc[1] == 0 + assert s.loc[1.5] == 1 + assert s.loc[2] == 1 + + assert s[1] == 0 + assert s[1.5] == 1 + assert s[2] == 1 + + expected = s.iloc[1:4] + tm.assert_series_equal(expected, s.loc[[1.5, 2.5, 3.5]]) + tm.assert_series_equal(expected, s.loc[[2, 3, 4]]) + tm.assert_series_equal(expected, s.loc[[1.5, 3, 4]]) + + expected = s.iloc[[1, 1, 2, 1]] + tm.assert_series_equal(expected, s.loc[[1.5, 2, 2.5, 1.5]]) + + expected = s.iloc[2:5] + tm.assert_series_equal(expected, s.loc[s >= 2]) + + def test_loc_with_slices(self): + + # loc with slices: + # - Interval objects: only works with exact matches + # - scalars: only works for non-overlapping, monotonic intervals, + # and start/stop select location based on the interval that + # contains them: + # (slice_loc(start, stop) == (idx.get_loc(start), idx.get_loc(stop)) + + s = self.s + + # slice of interval + + expected = s.iloc[:3] + result = s.loc[Interval(0, 1) : Interval(2, 3)] + tm.assert_series_equal(expected, result) + result = s[Interval(0, 1) : Interval(2, 3)] + tm.assert_series_equal(expected, result) + + expected = s.iloc[3:] + result = s.loc[Interval(3, 4) :] + tm.assert_series_equal(expected, result) + result = s[Interval(3, 4) :] + tm.assert_series_equal(expected, result) + + msg = "Interval objects are not currently supported" + with pytest.raises(NotImplementedError, match=msg): + s.loc[Interval(3, 6) :] + + with pytest.raises(NotImplementedError, match=msg): + s[Interval(3, 6) :] + + with pytest.raises(NotImplementedError, match=msg): + s.loc[Interval(3, 4, closed="left") :] + + with pytest.raises(NotImplementedError, match=msg): + s[Interval(3, 4, closed="left") :] + + # TODO with non-existing intervals ? + # s.loc[Interval(-1, 0):Interval(2, 3)] + + # slice of scalar + + expected = s.iloc[:3] + tm.assert_series_equal(expected, s.loc[:3]) + tm.assert_series_equal(expected, s.loc[:2.5]) + tm.assert_series_equal(expected, s.loc[0.1:2.5]) + tm.assert_series_equal(expected, s.loc[-1:3]) + + tm.assert_series_equal(expected, s[:3]) + tm.assert_series_equal(expected, s[:2.5]) + tm.assert_series_equal(expected, s[0.1:2.5]) + + # slice of scalar with step != 1 + with pytest.raises(ValueError): + s[0:4:2] + + def test_loc_with_overlap(self): + + idx = IntervalIndex.from_tuples([(1, 5), (3, 7)]) + s = Series(range(len(idx)), index=idx) + + # scalar + expected = s + result = s.loc[4] + tm.assert_series_equal(expected, result) + + result = s[4] + tm.assert_series_equal(expected, result) + + result = s.loc[[4]] + tm.assert_series_equal(expected, result) + + result = s[[4]] + tm.assert_series_equal(expected, result) + + # interval + expected = 0 + result = s.loc[Interval(1, 5)] + result == expected + + result = s[Interval(1, 5)] + result == expected + + expected = s + result = s.loc[[Interval(1, 5), Interval(3, 7)]] + tm.assert_series_equal(expected, result) + + result = s[[Interval(1, 5), Interval(3, 7)]] + tm.assert_series_equal(expected, result) + + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): + s.loc[Interval(3, 5)] + + with pytest.raises(KeyError, match="^$"): + s.loc[[Interval(3, 5)]] + + with pytest.raises(KeyError, match=re.escape("Interval(3, 5, closed='right')")): + s[Interval(3, 5)] + + with pytest.raises(KeyError, match="^$"): + s[[Interval(3, 5)]] + + # slices with interval (only exact matches) + expected = s + result = s.loc[Interval(1, 5) : Interval(3, 7)] + tm.assert_series_equal(expected, result) + + result = s[Interval(1, 5) : Interval(3, 7)] + tm.assert_series_equal(expected, result) + + msg = "'can only get slices from an IntervalIndex if bounds are" + " non-overlapping and all monotonic increasing or decreasing'" + with pytest.raises(KeyError, match=msg): + s.loc[Interval(1, 6) : Interval(3, 8)] + + with pytest.raises(KeyError, match=msg): + s[Interval(1, 6) : Interval(3, 8)] + + # slices with scalar raise for overlapping intervals + # TODO KeyError is the appropriate error? + with pytest.raises(KeyError, match=msg): + s.loc[1:4] + + def test_non_unique(self): + + idx = IntervalIndex.from_tuples([(1, 3), (3, 7)]) + s = Series(range(len(idx)), index=idx) + + result = s.loc[Interval(1, 3)] + assert result == 0 + + result = s.loc[[Interval(1, 3)]] + expected = s.iloc[0:1] + tm.assert_series_equal(expected, result) + + def test_non_unique_moar(self): + + idx = IntervalIndex.from_tuples([(1, 3), (1, 3), (3, 7)]) + s = Series(range(len(idx)), index=idx) + + expected = s.iloc[[0, 1]] + result = s.loc[Interval(1, 3)] + tm.assert_series_equal(expected, result) + + expected = s + result = s.loc[Interval(1, 3) :] + tm.assert_series_equal(expected, result) + + expected = s + result = s[Interval(1, 3) :] + tm.assert_series_equal(expected, result) + + expected = s.iloc[[0, 1]] + result = s[[Interval(1, 3)]] + tm.assert_series_equal(expected, result) diff --git a/pandas/tests/indexing/multiindex/__init__.py b/pandas/tests/indexing/multiindex/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/indexing/multiindex/conftest.py b/pandas/tests/indexing/multiindex/conftest.py new file mode 100644 index 00000000..e6d5a9eb --- /dev/null +++ b/pandas/tests/indexing/multiindex/conftest.py @@ -0,0 +1,30 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex +import pandas._testing as tm + + +@pytest.fixture +def multiindex_dataframe_random_data(): + """DataFrame with 2 level MultiIndex with random data""" + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + return DataFrame( + np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp") + ) + + +@pytest.fixture +def multiindex_year_month_day_dataframe_random_data(): + """DataFrame with 3 level MultiIndex (year, month, day) covering + first 100 business days from 2000-01-01 with random data""" + tdf = tm.makeTimeDataFrame(100) + ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() + # use Int64Index, to make sure things work + ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels], inplace=True) + ymd.index.set_names(["year", "month", "day"], inplace=True) + return ymd diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py new file mode 100644 index 00000000..8bfba8c1 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -0,0 +1,64 @@ +import numpy as np +import pytest + +from pandas import DataFrame, MultiIndex, Series +import pandas._testing as tm +import pandas.core.common as com + + +def test_detect_chained_assignment(): + # Inplace ops, originally from: + # https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug + a = [12, 23] + b = [123, None] + c = [1234, 2345] + d = [12345, 23456] + tuples = [("eyes", "left"), ("eyes", "right"), ("ears", "left"), ("ears", "right")] + events = { + ("eyes", "left"): a, + ("eyes", "right"): b, + ("ears", "left"): c, + ("ears", "right"): d, + } + multiind = MultiIndex.from_tuples(tuples, names=["part", "side"]) + zed = DataFrame(events, index=["a", "b"], columns=multiind) + + with pytest.raises(com.SettingWithCopyError): + zed["eyes"]["right"].fillna(value=555, inplace=True) + + +def test_cache_updating(): + # 5216 + # make sure that we don't try to set a dead cache + a = np.random.rand(10, 3) + df = DataFrame(a, columns=["x", "y", "z"]) + tuples = [(i, j) for i in range(5) for j in range(2)] + index = MultiIndex.from_tuples(tuples) + df.index = index + + # setting via chained assignment + # but actually works, since everything is a view + df.loc[0]["z"].iloc[0] = 1.0 + result = df.loc[(0, 0), "z"] + assert result == 1 + + # correct setting + df.loc[(0, 0), "z"] = 2 + result = df.loc[(0, 0), "z"] + assert result == 2 + + +def test_indexer_caching(): + # GH5727 + # make sure that indexers are in the _internal_names_set + n = 1000001 + arrays = (range(n), range(n)) + index = MultiIndex.from_tuples(zip(*arrays)) + s = Series(np.zeros(n), index=index) + str(s) + + # setitem + expected = Series(np.ones(n), index=index) + s = Series(np.zeros(n), index=index) + s[s == 0] = 1 + tm.assert_series_equal(s, expected) diff --git a/pandas/tests/indexing/multiindex/test_datetime.py b/pandas/tests/indexing/multiindex/test_datetime.py new file mode 100644 index 00000000..907d20cd --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_datetime.py @@ -0,0 +1,22 @@ +from datetime import datetime + +import numpy as np + +from pandas import Index, Period, Series, period_range + + +def test_multiindex_period_datetime(): + # GH4861, using datetime in period of multiindex raises exception + + idx1 = Index(["a", "a", "a", "b", "b"]) + idx2 = period_range("2012-01", periods=len(idx1), freq="M") + s = Series(np.random.randn(len(idx1)), [idx1, idx2]) + + # try Period as index + expected = s.iloc[0] + result = s.loc["a", Period("2012-01")] + assert result == expected + + # try datetime as index + result = s.loc["a", datetime(2012, 1, 1)] + assert result == expected diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py new file mode 100644 index 00000000..8ea825da --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -0,0 +1,252 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex, Series +import pandas._testing as tm +from pandas.core.indexing import IndexingError + +# ---------------------------------------------------------------------------- +# test indexing of Series with multi-level Index +# ---------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "access_method", + [lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)], +) +@pytest.mark.parametrize( + "level1_value, expected", + [(0, Series([1], index=[0])), (1, Series([2, 3], index=[1, 2]))], +) +def test_series_getitem_multiindex(access_method, level1_value, expected): + + # GH 6018 + # series regression getitem with a multi-index + + s = Series([1, 2, 3]) + s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)]) + result = access_method(s, level1_value) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("level0_value", ["D", "A"]) +def test_series_getitem_duplicates_multiindex(level0_value): + # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise + # the appropriate error, only in PY3 of course! + + index = MultiIndex( + levels=[[level0_value, "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]], + codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=["tag", "day"], + ) + arr = np.random.randn(len(index), 1) + df = DataFrame(arr, index=index, columns=["val"]) + + # confirm indexing on missing value raises KeyError + if level0_value != "A": + with pytest.raises(KeyError, match=r"^'A'$"): + df.val["A"] + + with pytest.raises(KeyError, match=r"^'X'$"): + df.val["X"] + + result = df.val[level0_value] + expected = Series( + arr.ravel()[0:3], name="val", index=Index([26, 37, 57], name="day") + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("indexer", [lambda s: s[2000, 3], lambda s: s.loc[2000, 3]]) +def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer): + s = multiindex_year_month_day_dataframe_random_data["A"] + expected = s.reindex(s.index[42:65]) + expected.index = expected.index.droplevel(0).droplevel(0) + + result = indexer(s) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer", [lambda s: s[2000, 3, 10], lambda s: s.loc[2000, 3, 10]] +) +def test_series_getitem_returns_scalar( + multiindex_year_month_day_dataframe_random_data, indexer +): + s = multiindex_year_month_day_dataframe_random_data["A"] + expected = s.iloc[49] + + result = indexer(s) + assert result == expected + + +@pytest.mark.parametrize( + "indexer,expected_error,expected_error_msg", + [ + (lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^\(2000, 3, 4\)$"), + (lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), + (lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), + (lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"), + (lambda s: s.__getitem__(len(s)), IndexError, "index out of bounds"), + (lambda s: s[len(s)], IndexError, "index out of bounds"), + ( + lambda s: s.iloc[len(s)], + IndexError, + "single positional indexer is out-of-bounds", + ), + ], +) +def test_series_getitem_indexing_errors( + multiindex_year_month_day_dataframe_random_data, + indexer, + expected_error, + expected_error_msg, +): + s = multiindex_year_month_day_dataframe_random_data["A"] + with pytest.raises(expected_error, match=expected_error_msg): + indexer(s) + + +def test_series_getitem_corner_generator( + multiindex_year_month_day_dataframe_random_data, +): + s = multiindex_year_month_day_dataframe_random_data["A"] + result = s[(x > 0 for x in s)] + expected = s[s > 0] + tm.assert_series_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# test indexing of DataFrame with multi-level Index +# ---------------------------------------------------------------------------- + + +def test_getitem_simple(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data.T + expected = df.values[:, 0] + result = df["foo", "one"].values + tm.assert_almost_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer,expected_error_msg", + [ + (lambda df: df[("foo", "four")], r"^\('foo', 'four'\)$"), + (lambda df: df["foobar"], r"^'foobar'$"), + ], +) +def test_frame_getitem_simple_key_error( + multiindex_dataframe_random_data, indexer, expected_error_msg +): + df = multiindex_dataframe_random_data.T + with pytest.raises(KeyError, match=expected_error_msg): + indexer(df) + + +def test_frame_getitem_multicolumn_empty_level(): + df = DataFrame({"a": ["1", "2", "3"], "b": ["2", "3", "4"]}) + df.columns = [ + ["level1 item1", "level1 item2"], + ["", "level2 item2"], + ["level3 item1", "level3 item2"], + ] + + result = df["level1 item1"] + expected = DataFrame( + [["1"], ["2"], ["3"]], index=df.index, columns=["level3 item1"] + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer,expected_slice", + [ + (lambda df: df["foo"], slice(3)), + (lambda df: df["bar"], slice(3, 5)), + (lambda df: df.loc[:, "bar"], slice(3, 5)), + ], +) +def test_frame_getitem_toplevel( + multiindex_dataframe_random_data, indexer, expected_slice +): + df = multiindex_dataframe_random_data.T + expected = df.reindex(columns=df.columns[expected_slice]) + expected.columns = expected.columns.droplevel(0) + result = indexer(df) + tm.assert_frame_equal(result, expected) + + +def test_frame_mixed_depth_get(): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) + + result = df["a"] + expected = df["a", "", ""].rename("a") + tm.assert_series_equal(result, expected) + + result = df["routine1", "result1"] + expected = df["routine1", "result1", ""] + expected = expected.rename(("routine1", "result1")) + tm.assert_series_equal(result, expected) + + +# ---------------------------------------------------------------------------- +# test indexing of DataFrame with multi-level Index with duplicates +# ---------------------------------------------------------------------------- + + +@pytest.fixture +def dataframe_with_duplicate_index(): + """Fixture for DataFrame used in tests for gh-4145 and gh-4146""" + data = [["a", "d", "e", "c", "f", "b"], [1, 4, 5, 3, 6, 2], [1, 4, 5, 3, 6, 2]] + index = ["h1", "h3", "h5"] + columns = MultiIndex( + levels=[["A", "B"], ["A1", "A2", "B1", "B2"]], + codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]], + names=["main", "sub"], + ) + return DataFrame(data, index=index, columns=columns) + + +@pytest.mark.parametrize( + "indexer", [lambda df: df[("A", "A1")], lambda df: df.loc[:, ("A", "A1")]] +) +def test_frame_mi_access(dataframe_with_duplicate_index, indexer): + # GH 4145 + df = dataframe_with_duplicate_index + index = Index(["h1", "h3", "h5"]) + columns = MultiIndex.from_tuples([("A", "A1")], names=["main", "sub"]) + expected = DataFrame([["a", 1, 1]], index=columns, columns=index).T + + result = indexer(df) + tm.assert_frame_equal(result, expected) + + +def test_frame_mi_access_returns_series(dataframe_with_duplicate_index): + # GH 4146, not returning a block manager when selecting a unique index + # from a duplicate index + # as of 4879, this returns a Series (which is similar to what happens + # with a non-unique) + df = dataframe_with_duplicate_index + expected = Series(["a", 1, 1], index=["h1", "h3", "h5"], name="A1") + result = df["A"]["A1"] + tm.assert_series_equal(result, expected) + + +def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index): + # selecting a non_unique from the 2nd level + df = dataframe_with_duplicate_index + expected = DataFrame( + [["d", 4, 4], ["e", 5, 5]], + index=Index(["B2", "B2"], name="sub"), + columns=["h1", "h3", "h5"], + ).T + result = df["A"]["B2"] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py new file mode 100644 index 00000000..9859c723 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -0,0 +1,171 @@ +import numpy as np +import pytest + +from pandas import DataFrame, MultiIndex, Series +import pandas._testing as tm + + +@pytest.fixture +def simple_multiindex_dataframe(): + """ + Factory function to create simple 3 x 3 dataframe with + both columns and row MultiIndex using supplied data or + random data by default. + """ + + def _simple_multiindex_dataframe(data=None): + if data is None: + data = np.random.randn(3, 3) + return DataFrame( + data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]] + ) + + return _simple_multiindex_dataframe + + +@pytest.mark.parametrize( + "indexer, expected", + [ + ( + lambda df: df.iloc[0], + lambda arr: Series(arr[0], index=[[2, 2, 4], [6, 8, 10]], name=(4, 8)), + ), + ( + lambda df: df.iloc[2], + lambda arr: Series(arr[2], index=[[2, 2, 4], [6, 8, 10]], name=(8, 12)), + ), + ( + lambda df: df.iloc[:, 2], + lambda arr: Series(arr[:, 2], index=[[4, 4, 8], [8, 10, 12]], name=(4, 10)), + ), + ], +) +def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe): + arr = np.random.randn(3, 3) + df = simple_multiindex_dataframe(arr) + result = indexer(df) + expected = expected(arr) + tm.assert_series_equal(result, expected) + + +def test_iloc_returns_dataframe(simple_multiindex_dataframe): + df = simple_multiindex_dataframe() + result = df.iloc[[0, 1]] + expected = df.xs(4, drop_level=False) + tm.assert_frame_equal(result, expected) + + +def test_iloc_returns_scalar(simple_multiindex_dataframe): + arr = np.random.randn(3, 3) + df = simple_multiindex_dataframe(arr) + result = df.iloc[2, 2] + expected = arr[2, 2] + assert result == expected + + +def test_iloc_getitem_multiple_items(): + # GH 5528 + tup = zip(*[["a", "a", "b", "b"], ["x", "y", "x", "y"]]) + index = MultiIndex.from_tuples(tup) + df = DataFrame(np.random.randn(4, 4), index=index) + result = df.iloc[[2, 3]] + expected = df.xs("b", drop_level=False) + tm.assert_frame_equal(result, expected) + + +def test_iloc_getitem_labels(): + # this is basically regular indexing + arr = np.random.randn(4, 3) + df = DataFrame( + arr, + columns=[["i", "i", "j"], ["A", "A", "B"]], + index=[["i", "i", "j", "k"], ["X", "X", "Y", "Y"]], + ) + result = df.iloc[2, 2] + expected = arr[2, 2] + assert result == expected + + +def test_frame_getitem_slice(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.iloc[:4] + expected = df[:4] + tm.assert_frame_equal(result, expected) + + +def test_frame_setitem_slice(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + df.iloc[:4] = 0 + + assert (df.values[:4] == 0).all() + assert (df.values[4:] != 0).all() + + +def test_indexing_ambiguity_bug_1678(): + # GH 1678 + columns = MultiIndex.from_tuples( + [("Ohio", "Green"), ("Ohio", "Red"), ("Colorado", "Green")] + ) + index = MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]) + + df = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns) + + result = df.iloc[:, 1] + expected = df.loc[:, ("Ohio", "Red")] + tm.assert_series_equal(result, expected) + + +def test_iloc_integer_locations(): + # GH 13797 + data = [ + ["str00", "str01"], + ["str10", "str11"], + ["str20", "srt21"], + ["str30", "str31"], + ["str40", "str41"], + ] + + index = MultiIndex.from_tuples( + [("CC", "A"), ("CC", "B"), ("CC", "B"), ("BB", "a"), ("BB", "b")] + ) + + expected = DataFrame(data) + df = DataFrame(data, index=index) + + result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)]) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data, indexes, values, expected_k", + [ + # test without indexer value in first level of MultiIndex + ([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]), + # test like code sample 1 in the issue + ([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100], [755, 1066]), + # test like code sample 2 in the issue + ([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]), + # test like code sample 3 in the issue + ([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10], [8, 15, 13]), + ], +) +def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k): + # GH17148 + df = DataFrame(data=data, columns=["i", "j", "k"]) + df = df.set_index(["i", "j"]) + + series = df.k.copy() + for i, v in zip(indexes, values): + series.iloc[i] += v + + df["k"] = expected_k + expected = df.k + tm.assert_series_equal(series, expected) + + +def test_getitem_iloc(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.iloc[2] + expected = df.xs(df.index[2]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py new file mode 100644 index 00000000..8ea1cebd --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py @@ -0,0 +1,92 @@ +import warnings + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, MultiIndex, Series +import pandas._testing as tm + + +@pytest.mark.slow +@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") +def test_multiindex_get_loc(): # GH7724, GH2646 + + with warnings.catch_warnings(record=True): + + # test indexing into a multi-index before & past the lexsort depth + from numpy.random import randint, choice, randn + + cols = ["jim", "joe", "jolie", "joline", "jolia"] + + def validate(mi, df, key): + mask = np.ones(len(df)).astype("bool") + + # test for all partials of this key + for i, k in enumerate(key): + mask &= df.iloc[:, i] == k + + if not mask.any(): + assert key[: i + 1] not in mi.index + continue + + assert key[: i + 1] in mi.index + right = df[mask].copy() + + if i + 1 != len(key): # partial key + right.drop(cols[: i + 1], axis=1, inplace=True) + right.set_index(cols[i + 1 : -1], inplace=True) + tm.assert_frame_equal(mi.loc[key[: i + 1]], right) + + else: # full key + right.set_index(cols[:-1], inplace=True) + if len(right) == 1: # single hit + right = Series( + right["jolia"].values, name=right.index[0], index=["jolia"] + ) + tm.assert_series_equal(mi.loc[key[: i + 1]], right) + else: # multi hit + tm.assert_frame_equal(mi.loc[key[: i + 1]], right) + + def loop(mi, df, keys): + for key in keys: + validate(mi, df, key) + + n, m = 1000, 50 + + vals = [ + randint(0, 10, n), + choice(list("abcdefghij"), n), + choice(pd.date_range("20141009", periods=10).tolist(), n), + choice(list("ZYXWVUTSRQ"), n), + randn(n), + ] + vals = list(map(tuple, zip(*vals))) + + # bunch of keys for testing + keys = [ + randint(0, 11, m), + choice(list("abcdefghijk"), m), + choice(pd.date_range("20141009", periods=11).tolist(), m), + choice(list("ZYXWVUTSRQP"), m), + ] + keys = list(map(tuple, zip(*keys))) + keys += list(map(lambda t: t[:-1], vals[:: n // m])) + + # covers both unique index and non-unique index + df = DataFrame(vals, columns=cols) + a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1]) + + for frame in a, b: + for i in range(5): # lexsort depth + df = frame.copy() if i == 0 else frame.sort_values(by=cols[:i]) + mi = df.set_index(cols[:-1]) + assert not mi.index.lexsort_depth < i + loop(mi, df, keys) + + +@pytest.mark.slow +def test_large_mi_dataframe_indexing(): + # GH10645 + result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)]) + assert not (10 ** 6, 0) in result diff --git a/pandas/tests/indexing/multiindex/test_ix.py b/pandas/tests/indexing/multiindex/test_ix.py new file mode 100644 index 00000000..01b0b392 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_ix.py @@ -0,0 +1,63 @@ +import numpy as np +import pytest + +from pandas.errors import PerformanceWarning + +from pandas import DataFrame, MultiIndex +import pandas._testing as tm + + +class TestMultiIndex: + def test_frame_setitem_loc(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + frame.loc[("bar", "two"), "B"] = 5 + assert frame.loc[("bar", "two"), "B"] == 5 + + # with integer labels + df = frame.copy() + df.columns = list(range(3)) + df.loc[("bar", "two"), 1] = 7 + assert df.loc[("bar", "two"), 1] == 7 + + def test_loc_general(self): + + # GH 2817 + data = { + "amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, + "col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, + "year": {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}, + } + df = DataFrame(data).set_index(keys=["col", "year"]) + key = 4.0, 2012 + + # emits a PerformanceWarning, ok + with tm.assert_produces_warning(PerformanceWarning): + tm.assert_frame_equal(df.loc[key], df.iloc[2:]) + + # this is ok + df.sort_index(inplace=True) + res = df.loc[key] + + # col has float dtype, result should be Float64Index + index = MultiIndex.from_arrays([[4.0] * 3, [2012] * 3], names=["col", "year"]) + expected = DataFrame({"amount": [222, 333, 444]}, index=index) + tm.assert_frame_equal(res, expected) + + def test_loc_multiindex_missing_label_raises(self): + # GH 21593 + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + + with pytest.raises(KeyError, match=r"^2$"): + df.loc[2] + + def test_series_loc_getitem_fancy( + self, multiindex_year_month_day_dataframe_random_data + ): + s = multiindex_year_month_day_dataframe_random_data["A"] + expected = s.reindex(s.index[49:51]) + result = s.loc[[(2000, 3, 10), (2000, 3, 13)]] + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py new file mode 100644 index 00000000..3b8aa963 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -0,0 +1,470 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series +import pandas._testing as tm +from pandas.core.indexing import IndexingError + + +@pytest.fixture +def single_level_multiindex(): + """single level MultiIndex""" + return MultiIndex( + levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"] + ) + + +@pytest.fixture +def frame_random_data_integer_multi_index(): + levels = [[0, 1], [0, 1, 2]] + codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + index = MultiIndex(levels=levels, codes=codes) + return DataFrame(np.random.randn(6, 2), index=index) + + +class TestMultiIndexLoc: + def test_loc_getitem_series(self): + # GH14730 + # passing a series as a key with a MultiIndex + index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]]) + x = Series(index=index, data=range(9), dtype=np.float64) + y = Series([1, 3]) + expected = Series( + data=[0, 1, 2, 6, 7, 8], + index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]), + dtype=np.float64, + ) + result = x.loc[y] + tm.assert_series_equal(result, expected) + + result = x.loc[[1, 3]] + tm.assert_series_equal(result, expected) + + # GH15424 + y1 = Series([1, 3], index=[1, 2]) + result = x.loc[y1] + tm.assert_series_equal(result, expected) + + empty = Series(data=[], dtype=np.float64) + expected = Series( + [], + index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64), + dtype=np.float64, + ) + result = x.loc[empty] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_array(self): + # GH15434 + # passing an array as a key with a MultiIndex + index = MultiIndex.from_product([[1, 2, 3], ["A", "B", "C"]]) + x = Series(index=index, data=range(9), dtype=np.float64) + y = np.array([1, 3]) + expected = Series( + data=[0, 1, 2, 6, 7, 8], + index=MultiIndex.from_product([[1, 3], ["A", "B", "C"]]), + dtype=np.float64, + ) + result = x.loc[y] + tm.assert_series_equal(result, expected) + + # empty array: + empty = np.array([]) + expected = Series( + [], + index=MultiIndex(levels=index.levels, codes=[[], []], dtype=np.float64), + dtype="float64", + ) + result = x.loc[empty] + tm.assert_series_equal(result, expected) + + # 0-dim array (scalar): + scalar = np.int64(1) + expected = Series(data=[0, 1, 2], index=["A", "B", "C"], dtype=np.float64) + result = x.loc[scalar] + tm.assert_series_equal(result, expected) + + def test_loc_multiindex_labels(self): + df = DataFrame( + np.random.randn(3, 3), + columns=[["i", "i", "j"], ["A", "A", "B"]], + index=[["i", "i", "j"], ["X", "X", "Y"]], + ) + + # the first 2 rows + expected = df.iloc[[0, 1]].droplevel(0) + result = df.loc["i"] + tm.assert_frame_equal(result, expected) + + # 2nd (last) column + expected = df.iloc[:, [2]].droplevel(0, axis=1) + result = df.loc[:, "j"] + tm.assert_frame_equal(result, expected) + + # bottom right corner + expected = df.iloc[[2], [2]].droplevel(0).droplevel(0, axis=1) + result = df.loc["j"].loc[:, "j"] + tm.assert_frame_equal(result, expected) + + # with a tuple + expected = df.iloc[[0, 1]] + result = df.loc[("i", "X")] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_ints(self): + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + expected = df.iloc[[0, 1]].droplevel(0) + result = df.loc[4] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_missing_label_raises(self): + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + + with pytest.raises(KeyError, match=r"^2$"): + df.loc[2] + + @pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])]) + def test_loc_multiindex_list_missing_label(self, key, pos): + # GH 27148 - lists with missing labels do not raise: + df = DataFrame( + np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]], + ) + + expected = df.iloc[pos] + result = df.loc[key] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_too_many_dims_raises(self): + # GH 14885 + s = Series( + range(8), + index=MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]), + ) + + with pytest.raises(KeyError, match=r"^\('a', 'b'\)$"): + s.loc["a", "b"] + with pytest.raises(KeyError, match=r"^\('a', 'd', 'g'\)$"): + s.loc["a", "d", "g"] + with pytest.raises(IndexingError, match="Too many indexers"): + s.loc["a", "d", "g", "j"] + + def test_loc_multiindex_indexer_none(self): + + # GH6788 + # multi-index indexer is None (meaning take all) + attributes = ["Attribute" + str(i) for i in range(1)] + attribute_values = ["Value" + str(i) for i in range(5)] + + index = MultiIndex.from_product([attributes, attribute_values]) + df = 0.1 * np.random.randn(10, 1 * 5) + 0.5 + df = DataFrame(df, columns=index) + result = df[attributes] + tm.assert_frame_equal(result, df) + + # GH 7349 + # loc with a multi-index seems to be doing fallback + df = DataFrame( + np.arange(12).reshape(-1, 1), + index=MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]]), + ) + + expected = df.loc[([1, 2],), :] + result = df.loc[[1, 2]] + tm.assert_frame_equal(result, expected) + + def test_loc_multiindex_incomplete(self): + + # GH 7399 + # incomplete indexers + s = Series( + np.arange(15, dtype="int64"), + MultiIndex.from_product([range(5), ["a", "b", "c"]]), + ) + expected = s.loc[:, "a":"c"] + + result = s.loc[0:4, "a":"c"] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + result = s.loc[:4, "a":"c"] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + result = s.loc[0:, "a":"c"] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + # GH 7400 + # multiindexer gettitem with list of indexers skips wrong element + s = Series( + np.arange(15, dtype="int64"), + MultiIndex.from_product([range(5), ["a", "b", "c"]]), + ) + expected = s.iloc[[6, 7, 8, 12, 13, 14]] + result = s.loc[2:4:2, "a":"c"] + tm.assert_series_equal(result, expected) + + def test_get_loc_single_level(self, single_level_multiindex): + single_level = single_level_multiindex + s = Series(np.random.randn(len(single_level)), index=single_level) + for k in single_level.values: + s[k] + + def test_loc_getitem_int_slice(self): + # GH 3053 + # loc should treat integer slices like label slices + + index = MultiIndex.from_product([[6, 7, 8], ["a", "b"]]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[6:8, :] + expected = df + tm.assert_frame_equal(result, expected) + + index = MultiIndex.from_product([[10, 20, 30], ["a", "b"]]) + df = DataFrame(np.random.randn(6, 6), index, index) + result = df.loc[20:30, :] + expected = df.iloc[2:] + tm.assert_frame_equal(result, expected) + + # doc examples + result = df.loc[10, :] + expected = df.iloc[0:2] + expected.index = ["a", "b"] + tm.assert_frame_equal(result, expected) + + result = df.loc[:, 10] + expected = df[10] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "indexer_type_1", (list, tuple, set, slice, np.ndarray, Series, Index) + ) + @pytest.mark.parametrize( + "indexer_type_2", (list, tuple, set, slice, np.ndarray, Series, Index) + ) + def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2): + # GH #19686 + # .loc should work with nested indexers which can be + # any list-like objects (see `pandas.api.types.is_list_like`) or slices + + def convert_nested_indexer(indexer_type, keys): + if indexer_type == np.ndarray: + return np.array(keys) + if indexer_type == slice: + return slice(*keys) + return indexer_type(keys) + + a = [10, 20, 30] + b = [1, 2, 3] + index = MultiIndex.from_product([a, b]) + df = DataFrame( + np.arange(len(index), dtype="int64"), index=index, columns=["Data"] + ) + + keys = ([10, 20], [2, 3]) + types = (indexer_type_1, indexer_type_2) + + # check indexers with all the combinations of nested objects + # of all the valid types + indexer = tuple( + convert_nested_indexer(indexer_type, k) + for indexer_type, k in zip(types, keys) + ) + + result = df.loc[indexer, "Data"] + expected = Series( + [1, 2, 4, 5], name="Data", index=MultiIndex.from_product(keys) + ) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer, pos", + [ + ([], []), # empty ok + (["A"], slice(3)), + (["A", "D"], slice(3)), + (["D", "E"], []), # no values found - fine + (["D"], []), # same, with single item list: GH 27148 + (pd.IndexSlice[:, ["foo"]], slice(2, None, 3)), + (pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)), + ], +) +def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos): + # GH 7866 + # multi-index slicing with missing indexers + idx = MultiIndex.from_product( + [["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"] + ) + s = Series(np.arange(9, dtype="int64"), index=idx).sort_index() + expected = s.iloc[pos] + result = s.loc[indexer] + tm.assert_series_equal(result, expected) + + +def test_series_loc_getitem_fancy(multiindex_year_month_day_dataframe_random_data): + s = multiindex_year_month_day_dataframe_random_data["A"] + expected = s.reindex(s.index[49:51]) + result = s.loc[[(2000, 3, 10), (2000, 3, 13)]] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("columns_indexer", [([], slice(None)), (["foo"], [])]) +def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer): + # GH 8737 + # empty indexer + multi_index = MultiIndex.from_product((["foo", "bar", "baz"], ["alpha", "beta"])) + df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index) + df = df.sort_index(level=0, axis=1) + + expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0]) + result = df.loc[:, columns_indexer] + tm.assert_frame_equal(result, expected) + + +def test_loc_getitem_duplicates_multiindex_non_scalar_type_object(): + # regression from < 0.14.0 + # GH 7914 + df = DataFrame( + [[np.mean, np.median], ["mean", "median"]], + columns=MultiIndex.from_tuples([("functs", "mean"), ("functs", "median")]), + index=["function", "name"], + ) + result = df.loc["function", ("functs", "mean")] + expected = np.mean + assert result == expected + + +def test_loc_getitem_tuple_plus_slice(): + # GH 671 + df = DataFrame( + { + "a": np.arange(10), + "b": np.arange(10), + "c": np.random.randn(10), + "d": np.random.randn(10), + } + ).set_index(["a", "b"]) + expected = df.loc[0, 0] + result = df.loc[(0, 0), :] + tm.assert_series_equal(result, expected) + + +def test_loc_getitem_int(frame_random_data_integer_multi_index): + df = frame_random_data_integer_multi_index + result = df.loc[1] + expected = df[-3:] + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + + +def test_loc_getitem_int_raises_exception(frame_random_data_integer_multi_index): + df = frame_random_data_integer_multi_index + with pytest.raises(KeyError, match=r"^3$"): + df.loc[3] + + +def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + + # test setup - check key not in dataframe + with pytest.raises(KeyError, match=r"^\('bar', 'three'\)$"): + df.loc[("bar", "three"), "B"] + + # in theory should be inserting in a sorted space???? + df.loc[("bar", "three"), "B"] = 0 + expected = 0 + result = df.sort_index().loc[("bar", "three"), "B"] + assert result == expected + + +def test_loc_setitem_single_column_slice(): + # case from https://github.com/pandas-dev/pandas/issues/27841 + df = DataFrame( + "string", + index=list("abcd"), + columns=MultiIndex.from_product([["Main"], ("another", "one")]), + ) + df["labels"] = "a" + df.loc[:, "labels"] = df.index + tm.assert_numpy_array_equal(np.asarray(df["labels"]), np.asarray(df.index)) + + # test with non-object block + df = DataFrame( + np.nan, + index=range(4), + columns=MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]), + ) + expected = df.copy() + df.loc[:, "B"] = np.arange(4) + expected.iloc[:, 2] = np.arange(4) + tm.assert_frame_equal(df, expected) + + +def test_loc_nan_multiindex(): + # GH 5286 + tups = [ + ("Good Things", "C", np.nan), + ("Good Things", "R", np.nan), + ("Bad Things", "C", np.nan), + ("Bad Things", "T", np.nan), + ("Okay Things", "N", "B"), + ("Okay Things", "N", "D"), + ("Okay Things", "B", np.nan), + ("Okay Things", "D", np.nan), + ] + df = DataFrame( + np.ones((8, 4)), + columns=Index(["d1", "d2", "d3", "d4"]), + index=MultiIndex.from_tuples(tups, names=["u1", "u2", "u3"]), + ) + result = df.loc["Good Things"].loc["C"] + expected = DataFrame( + np.ones((1, 4)), + index=Index([np.nan], dtype="object", name="u3"), + columns=Index(["d1", "d2", "d3", "d4"], dtype="object"), + ) + tm.assert_frame_equal(result, expected) + + +def test_loc_period_string_indexing(): + # GH 9892 + a = pd.period_range("2013Q1", "2013Q4", freq="Q") + i = (1111, 2222, 3333) + idx = pd.MultiIndex.from_product((a, i), names=("Periode", "CVR")) + df = pd.DataFrame( + index=idx, + columns=( + "OMS", + "OMK", + "RES", + "DRIFT_IND", + "OEVRIG_IND", + "FIN_IND", + "VARE_UD", + "LOEN_UD", + "FIN_UD", + ), + ) + result = df.loc[("2013Q1", 1111), "OMS"] + expected = pd.Series( + [np.nan], + dtype=object, + name="OMS", + index=pd.MultiIndex.from_tuples( + [(pd.Period("2013Q1"), 1111)], names=["Periode", "CVR"] + ), + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py new file mode 100644 index 00000000..0064187a --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -0,0 +1,113 @@ +import numpy as np + +import pandas._libs.index as _index +from pandas.errors import PerformanceWarning + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series +import pandas._testing as tm + + +class TestMultiIndexBasic: + def test_multiindex_perf_warn(self): + + df = DataFrame( + { + "jim": [0, 0, 1, 1], + "joe": ["x", "x", "z", "y"], + "jolie": np.random.rand(4), + } + ).set_index(["jim", "joe"]) + + with tm.assert_produces_warning(PerformanceWarning): + df.loc[(1, "z")] + + df = df.iloc[[2, 1, 3, 0]] + with tm.assert_produces_warning(PerformanceWarning): + df.loc[(0,)] + + def test_multiindex_contains_dropped(self): + # GH 19027 + # test that dropped MultiIndex levels are not in the MultiIndex + # despite continuing to be in the MultiIndex's levels + idx = MultiIndex.from_product([[1, 2], [3, 4]]) + assert 2 in idx + idx = idx.drop(2) + + # drop implementation keeps 2 in the levels + assert 2 in idx.levels[0] + # but it should no longer be in the index itself + assert 2 not in idx + + # also applies to strings + idx = MultiIndex.from_product([["a", "b"], ["c", "d"]]) + assert "a" in idx + idx = idx.drop("a") + assert "a" in idx.levels[0] + assert "a" not in idx + + def test_indexing_over_hashtable_size_cutoff(self): + n = 10000 + + old_cutoff = _index._SIZE_CUTOFF + _index._SIZE_CUTOFF = 20000 + + s = Series(np.arange(n), MultiIndex.from_arrays((["a"] * n, np.arange(n)))) + + # hai it works! + assert s[("a", 5)] == 5 + assert s[("a", 6)] == 6 + assert s[("a", 7)] == 7 + + _index._SIZE_CUTOFF = old_cutoff + + def test_multi_nan_indexing(self): + + # GH 3588 + df = DataFrame( + { + "a": ["R1", "R2", np.nan, "R4"], + "b": ["C1", "C2", "C3", "C4"], + "c": [10, 15, np.nan, 20], + } + ) + result = df.set_index(["a", "b"], drop=False) + expected = DataFrame( + { + "a": ["R1", "R2", np.nan, "R4"], + "b": ["C1", "C2", "C3", "C4"], + "c": [10, 15, np.nan, 20], + }, + index=[ + Index(["R1", "R2", np.nan, "R4"], name="a"), + Index(["C1", "C2", "C3", "C4"], name="b"), + ], + ) + tm.assert_frame_equal(result, expected) + + def test_contains(self): + # GH 24570 + tx = pd.timedelta_range("09:30:00", "16:00:00", freq="30 min") + idx = MultiIndex.from_arrays([tx, np.arange(len(tx))]) + assert tx[0] in idx + assert "element_not_exit" not in idx + assert "0 day 09:30:00" in idx + + def test_nested_tuples_duplicates(self): + # GH#30892 + + dti = pd.to_datetime(["20190101", "20190101", "20190102"]) + idx = pd.Index(["a", "a", "c"]) + mi = pd.MultiIndex.from_arrays([dti, idx], names=["index1", "index2"]) + + df = pd.DataFrame({"c1": [1, 2, 3], "c2": [np.nan, np.nan, np.nan]}, index=mi) + + expected = pd.DataFrame({"c1": df["c1"], "c2": [1.0, 1.0, np.nan]}, index=mi) + + df2 = df.copy(deep=True) + df2.loc[(dti[0], "a"), "c2"] = 1.0 + tm.assert_frame_equal(df2, expected) + + df3 = df.copy(deep=True) + df3.loc[[(dti[0], "a")], "c2"] = 1.0 + tm.assert_frame_equal(df3, expected) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py new file mode 100644 index 00000000..9d181bdc --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -0,0 +1,194 @@ +import numpy as np +import pytest + +from pandas import DataFrame, MultiIndex +import pandas._testing as tm + + +class TestMultiIndexPartial: + def test_getitem_partial_int(self): + # GH 12416 + # with single item + l1 = [10, 20] + l2 = ["a", "b"] + df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2])) + expected = DataFrame(index=range(2), columns=l2) + result = df[20] + tm.assert_frame_equal(result, expected) + + # with list + expected = DataFrame( + index=range(2), columns=MultiIndex.from_product([l1[1:], l2]) + ) + result = df[[20]] + tm.assert_frame_equal(result, expected) + + # missing item: + with pytest.raises(KeyError, match="1"): + df[1] + with pytest.raises(KeyError, match=r"'\[1\] not in index'"): + df[[1]] + + def test_series_slice_partial(self): + pass + + def test_xs_partial( + self, + multiindex_dataframe_random_data, + multiindex_year_month_day_dataframe_random_data, + ): + frame = multiindex_dataframe_random_data + ymd = multiindex_year_month_day_dataframe_random_data + result = frame.xs("foo") + result2 = frame.loc["foo"] + expected = frame.T["foo"].T + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, result2) + + result = ymd.xs((2000, 4)) + expected = ymd.loc[2000, 4] + tm.assert_frame_equal(result, expected) + + # ex from #1796 + index = MultiIndex( + levels=[["foo", "bar"], ["one", "two"], [-1, 1]], + codes=[ + [0, 0, 0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 0, 0, 1, 1], + [0, 1, 0, 1, 0, 1, 0, 1], + ], + ) + df = DataFrame(np.random.randn(8, 4), index=index, columns=list("abcd")) + + result = df.xs(["foo", "one"]) + expected = df.loc["foo", "one"] + tm.assert_frame_equal(result, expected) + + def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + ymd = ymd.T + result = ymd[2000, 2] + + expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1]) + expected.columns = expected.columns.droplevel(0).droplevel(0) + tm.assert_frame_equal(result, expected) + + def test_fancy_slice_partial( + self, + multiindex_dataframe_random_data, + multiindex_year_month_day_dataframe_random_data, + ): + frame = multiindex_dataframe_random_data + result = frame.loc["bar":"baz"] + expected = frame[3:7] + tm.assert_frame_equal(result, expected) + + ymd = multiindex_year_month_day_dataframe_random_data + result = ymd.loc[(2000, 2):(2000, 4)] + lev = ymd.index.codes[1] + expected = ymd[(lev >= 1) & (lev <= 3)] + tm.assert_frame_equal(result, expected) + + def test_getitem_partial_column_select(self): + idx = MultiIndex( + codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]], + levels=[["a", "b"], ["x", "y"], ["p", "q"]], + ) + df = DataFrame(np.random.rand(3, 2), index=idx) + + result = df.loc[("a", "y"), :] + expected = df.loc[("a", "y")] + tm.assert_frame_equal(result, expected) + + result = df.loc[("a", "y"), [1, 0]] + expected = df.loc[("a", "y")][[1, 0]] + tm.assert_frame_equal(result, expected) + + with pytest.raises(KeyError, match=r"\('a', 'foo'\)"): + df.loc[("a", "foo"), :] + + def test_partial_set(self, multiindex_year_month_day_dataframe_random_data): + # GH #397 + ymd = multiindex_year_month_day_dataframe_random_data + df = ymd.copy() + exp = ymd.copy() + df.loc[2000, 4] = 0 + exp.loc[2000, 4].values[:] = 0 + tm.assert_frame_equal(df, exp) + + df["A"].loc[2000, 4] = 1 + exp["A"].loc[2000, 4].values[:] = 1 + tm.assert_frame_equal(df, exp) + + df.loc[2000] = 5 + exp.loc[2000].values[:] = 5 + tm.assert_frame_equal(df, exp) + + # this works...for now + df["A"].iloc[14] = 5 + assert df["A"][14] == 5 + + # --------------------------------------------------------------------- + # AMBIGUOUS CASES! + + def test_partial_loc_missing(self, multiindex_year_month_day_dataframe_random_data): + pytest.skip("skipping for now") + + ymd = multiindex_year_month_day_dataframe_random_data + result = ymd.loc[2000, 0] + expected = ymd.loc[2000]["A"] + tm.assert_series_equal(result, expected) + + # need to put in some work here + + # self.ymd.loc[2000, 0] = 0 + # assert (self.ymd.loc[2000]['A'] == 0).all() + + # Pretty sure the second (and maybe even the first) is already wrong. + with pytest.raises(Exception): + ymd.loc[(2000, 6)] + with pytest.raises(Exception): + ymd.loc[(2000, 6), 0] + + # --------------------------------------------------------------------- + + def test_setitem_multiple_partial(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + expected = frame.copy() + result = frame.copy() + result.loc[["foo", "bar"]] = 0 + expected.loc["foo"] = 0 + expected.loc["bar"] = 0 + tm.assert_frame_equal(result, expected) + + expected = frame.copy() + result = frame.copy() + result.loc["foo":"bar"] = 0 + expected.loc["foo"] = 0 + expected.loc["bar"] = 0 + tm.assert_frame_equal(result, expected) + + expected = frame["A"].copy() + result = frame["A"].copy() + result.loc[["foo", "bar"]] = 0 + expected.loc["foo"] = 0 + expected.loc["bar"] = 0 + tm.assert_series_equal(result, expected) + + expected = frame["A"].copy() + result = frame["A"].copy() + result.loc["foo":"bar"] = 0 + expected.loc["foo"] = 0 + expected.loc["bar"] = 0 + tm.assert_series_equal(result, expected) + + +def test_loc_getitem_partial_both_axis(): + # gh-12660 + iterables = [["a", "b"], [2, 1]] + columns = MultiIndex.from_product(iterables, names=["col1", "col2"]) + rows = MultiIndex.from_product(iterables, names=["row1", "row2"]) + df = DataFrame(np.random.randn(4, 4), index=rows, columns=columns) + expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1) + result = df.loc["a", "b"] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_set_ops.py b/pandas/tests/indexing/multiindex/test_set_ops.py new file mode 100644 index 00000000..f2cbfadb --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_set_ops.py @@ -0,0 +1,41 @@ +from numpy.random import randn + +from pandas import DataFrame, MultiIndex, Series +import pandas._testing as tm + + +class TestMultiIndexSetOps: + def test_multiindex_symmetric_difference(self): + # GH 13490 + idx = MultiIndex.from_product([["a", "b"], ["A", "B"]], names=["a", "b"]) + result = idx ^ idx + assert result.names == idx.names + + idx2 = idx.copy().rename(["A", "B"]) + result = idx ^ idx2 + assert result.names == [None, None] + + def test_mixed_depth_insert(self): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(randn(4, 6), columns=index) + + result = df.copy() + expected = df.copy() + result["b"] = [1, 2, 3, 4] + expected["b", "", ""] = [1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + def test_dataframe_insert_column_all_na(self): + # GH #1534 + mix = MultiIndex.from_tuples([("1a", "2a"), ("1a", "2b"), ("1a", "2c")]) + df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix) + s = Series({(1, 1): 1, (1, 2): 2}) + df["new"] = s + assert df["new"].isna().all() diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py new file mode 100644 index 00000000..1e641760 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -0,0 +1,453 @@ +import numpy as np +from numpy.random import randn +import pytest + +import pandas as pd +from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna +import pandas._testing as tm +import pandas.core.common as com + + +class TestMultiIndexSetItem: + def test_setitem_multiindex(self): + for index_fn in ("loc",): + + def assert_equal(a, b): + assert a == b + + def check(target, indexers, value, compare_fn, expected=None): + fn = getattr(target, index_fn) + fn.__setitem__(indexers, value) + result = fn.__getitem__(indexers) + if expected is None: + expected = value + compare_fn(result, expected) + + # GH7190 + index = MultiIndex.from_product( + [np.arange(0, 100), np.arange(0, 80)], names=["time", "firm"] + ) + t, n = 0, 2 + df = DataFrame( + np.nan, + columns=["A", "w", "l", "a", "x", "X", "d", "profit"], + index=index, + ) + check(target=df, indexers=((t, n), "X"), value=0, compare_fn=assert_equal) + + df = DataFrame( + -999, columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index + ) + check(target=df, indexers=((t, n), "X"), value=1, compare_fn=assert_equal) + + df = DataFrame( + columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index + ) + check(target=df, indexers=((t, n), "X"), value=2, compare_fn=assert_equal) + + # gh-7218: assigning with 0-dim arrays + df = DataFrame( + -999, columns=["A", "w", "l", "a", "x", "X", "d", "profit"], index=index + ) + check( + target=df, + indexers=((t, n), "X"), + value=np.array(3), + compare_fn=assert_equal, + expected=3, + ) + + # GH5206 + df = DataFrame( + np.arange(25).reshape(5, 5), columns="A,B,C,D,E".split(","), dtype=float + ) + df["F"] = 99 + row_selection = df["A"] % 2 == 0 + col_selection = ["B", "C"] + df.loc[row_selection, col_selection] = df["F"] + output = DataFrame(99.0, index=[0, 2, 4], columns=["B", "C"]) + tm.assert_frame_equal(df.loc[row_selection, col_selection], output) + check( + target=df, + indexers=(row_selection, col_selection), + value=df["F"], + compare_fn=tm.assert_frame_equal, + expected=output, + ) + + # GH11372 + idx = MultiIndex.from_product( + [["A", "B", "C"], date_range("2015-01-01", "2015-04-01", freq="MS")] + ) + cols = MultiIndex.from_product( + [["foo", "bar"], date_range("2016-01-01", "2016-02-01", freq="MS")] + ) + + df = DataFrame(np.random.random((12, 4)), index=idx, columns=cols) + + subidx = MultiIndex.from_tuples( + [("A", Timestamp("2015-01-01")), ("A", Timestamp("2015-02-01"))] + ) + subcols = MultiIndex.from_tuples( + [("foo", Timestamp("2016-01-01")), ("foo", Timestamp("2016-02-01"))] + ) + + vals = DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols) + check( + target=df, + indexers=(subidx, subcols), + value=vals, + compare_fn=tm.assert_frame_equal, + ) + # set all columns + vals = DataFrame(np.random.random((2, 4)), index=subidx, columns=cols) + check( + target=df, + indexers=(subidx, slice(None, None, None)), + value=vals, + compare_fn=tm.assert_frame_equal, + ) + # identity + copy = df.copy() + check( + target=df, + indexers=(df.index, df.columns), + value=df, + compare_fn=tm.assert_frame_equal, + expected=copy, + ) + + def test_multiindex_setitem(self): + + # GH 3738 + # setting with a multi-index right hand side + arrays = [ + np.array(["bar", "bar", "baz", "qux", "qux", "bar"]), + np.array(["one", "two", "one", "one", "two", "one"]), + np.arange(0, 6, 1), + ] + + df_orig = DataFrame( + np.random.randn(6, 3), index=arrays, columns=["A", "B", "C"] + ).sort_index() + + expected = df_orig.loc[["bar"]] * 2 + df = df_orig.copy() + df.loc[["bar"]] *= 2 + tm.assert_frame_equal(df.loc[["bar"]], expected) + + # raise because these have differing levels + with pytest.raises(TypeError): + df.loc["bar"] *= 2 + + # from SO + # https://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation + df_orig = DataFrame.from_dict( + { + "price": { + ("DE", "Coal", "Stock"): 2, + ("DE", "Gas", "Stock"): 4, + ("DE", "Elec", "Demand"): 1, + ("FR", "Gas", "Stock"): 5, + ("FR", "Solar", "SupIm"): 0, + ("FR", "Wind", "SupIm"): 0, + } + } + ) + df_orig.index = MultiIndex.from_tuples( + df_orig.index, names=["Sit", "Com", "Type"] + ) + + expected = df_orig.copy() + expected.iloc[[0, 2, 3]] *= 2 + + idx = pd.IndexSlice + df = df_orig.copy() + df.loc[idx[:, :, "Stock"], :] *= 2 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[idx[:, :, "Stock"], "price"] *= 2 + tm.assert_frame_equal(df, expected) + + def test_multiindex_assignment(self): + + # GH3777 part 2 + + # mixed dtype + df = DataFrame( + np.random.randint(5, 10, size=9).reshape(3, 3), + columns=list("abc"), + index=[[4, 4, 8], [8, 10, 12]], + ) + df["d"] = np.nan + arr = np.array([0.0, 1.0]) + + df.loc[4, "d"] = arr + tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d")) + + # single dtype + df = DataFrame( + np.random.randint(5, 10, size=9).reshape(3, 3), + columns=list("abc"), + index=[[4, 4, 8], [8, 10, 12]], + ) + + df.loc[4, "c"] = arr + exp = Series(arr, index=[8, 10], name="c", dtype="float64") + tm.assert_series_equal(df.loc[4, "c"], exp) + + # scalar ok + df.loc[4, "c"] = 10 + exp = Series(10, index=[8, 10], name="c", dtype="float64") + tm.assert_series_equal(df.loc[4, "c"], exp) + + # invalid assignments + with pytest.raises(ValueError): + df.loc[4, "c"] = [0, 1, 2, 3] + + with pytest.raises(ValueError): + df.loc[4, "c"] = [0] + + # groupby example + NUM_ROWS = 100 + NUM_COLS = 10 + col_names = ["A" + num for num in map(str, np.arange(NUM_COLS).tolist())] + index_cols = col_names[:5] + + df = DataFrame( + np.random.randint(5, size=(NUM_ROWS, NUM_COLS)), + dtype=np.int64, + columns=col_names, + ) + df = df.set_index(index_cols).sort_index() + grp = df.groupby(level=index_cols[:4]) + df["new_col"] = np.nan + + f_index = np.arange(5) + + def f(name, df2): + return Series(np.arange(df2.shape[0]), name=df2.index.values[0]).reindex( + f_index + ) + + # TODO(wesm): unused? + # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T + + # we are actually operating on a copy here + # but in this case, that's ok + for name, df2 in grp: + new_vals = np.arange(df2.shape[0]) + df.loc[name, "new_col"] = new_vals + + def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + s = ymd["A"] + + s[2000, 3] = np.nan + assert isna(s.values[42:65]).all() + assert notna(s.values[:42]).all() + assert notna(s.values[65:]).all() + + s[2000, 3, 10] = np.nan + assert isna(s[49]) + + def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + df = frame.T.copy() + values = df.values + + result = df[df > 0] + expected = df.where(df > 0) + tm.assert_frame_equal(result, expected) + + df[df > 0] = 5 + values[values > 0] = 5 + tm.assert_almost_equal(df.values, values) + + df[df == 5] = 0 + values[values == 5] = 0 + tm.assert_almost_equal(df.values, values) + + # a df that needs alignment first + df[df[:-1] < 0] = 2 + np.putmask(values[:-1], values[:-1] < 0, 2) + tm.assert_almost_equal(df.values, values) + + with pytest.raises(TypeError, match="boolean values only"): + df[df * 0] = 2 + + def test_frame_getitem_setitem_multislice(self): + levels = [["t1", "t2"], ["a", "b", "c"]] + codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]] + midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"]) + df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx) + + result = df.loc[:, "value"] + tm.assert_series_equal(df["value"], result) + + result = df.loc[df.index[1:3], "value"] + tm.assert_series_equal(df["value"][1:3], result) + + result = df.loc[:, :] + tm.assert_frame_equal(df, result) + + result = df + df.loc[:, "value"] = 10 + result["value"] = 10 + tm.assert_frame_equal(df, result) + + df.loc[:, :] = 10 + tm.assert_frame_equal(df, result) + + def test_frame_setitem_multi_column(self): + df = DataFrame(randn(10, 4), columns=[["a", "a", "b", "b"], [0, 1, 0, 1]]) + + cp = df.copy() + cp["a"] = cp["b"] + tm.assert_frame_equal(cp["a"], cp["b"]) + + # set with ndarray + cp = df.copy() + cp["a"] = cp["b"].values + tm.assert_frame_equal(cp["a"], cp["b"]) + + # --------------------------------------- + # #1803 + columns = MultiIndex.from_tuples([("A", "1"), ("A", "2"), ("B", "1")]) + df = DataFrame(index=[1, 3, 5], columns=columns) + + # Works, but adds a column instead of updating the two existing ones + df["A"] = 0.0 # Doesn't work + assert (df["A"].values == 0).all() + + # it broadcasts + df["B", "1"] = [1, 2, 3] + df["A"] = df["B", "1"] + + sliced_a1 = df["A", "1"] + sliced_a2 = df["A", "2"] + sliced_b1 = df["B", "1"] + tm.assert_series_equal(sliced_a1, sliced_b1, check_names=False) + tm.assert_series_equal(sliced_a2, sliced_b1, check_names=False) + assert sliced_a1.name == ("A", "1") + assert sliced_a2.name == ("A", "2") + assert sliced_b1.name == ("B", "1") + + def test_getitem_setitem_tuple_plus_columns( + self, multiindex_year_month_day_dataframe_random_data + ): + # GH #1013 + ymd = multiindex_year_month_day_dataframe_random_data + df = ymd[:5] + + result = df.loc[(2000, 1, 6), ["A", "B", "C"]] + expected = df.loc[2000, 1, 6][["A", "B", "C"]] + tm.assert_series_equal(result, expected) + + def test_getitem_setitem_slice_integers(self): + index = MultiIndex( + levels=[[0, 1, 2], [0, 2]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]] + ) + + frame = DataFrame( + np.random.randn(len(index), 4), index=index, columns=["a", "b", "c", "d"] + ) + res = frame.loc[1:2] + exp = frame.reindex(frame.index[2:]) + tm.assert_frame_equal(res, exp) + + frame.loc[1:2] = 7 + assert (frame.loc[1:2] == 7).values.all() + + series = Series(np.random.randn(len(index)), index=index) + + res = series.loc[1:2] + exp = series.reindex(series.index[2:]) + tm.assert_series_equal(res, exp) + + series.loc[1:2] = 7 + assert (series.loc[1:2] == 7).values.all() + + def test_setitem_change_dtype(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + dft = frame.T + s = dft["foo", "two"] + dft["foo", "two"] = s > s.median() + tm.assert_series_equal(dft["foo", "two"], s > s.median()) + # assert isinstance(dft._data.blocks[1].items, MultiIndex) + + reindexed = dft.reindex(columns=[("foo", "two")]) + tm.assert_series_equal(reindexed["foo", "two"], s > s.median()) + + def test_set_column_scalar_with_loc(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + subset = frame.index[[1, 4, 5]] + + frame.loc[subset] = 99 + assert (frame.loc[subset].values == 99).all() + + col = frame["B"] + col[subset] = 97 + assert (frame.loc[subset, "B"] == 97).all() + + def test_nonunique_assignment_1750(self): + df = DataFrame( + [[1, 1, "x", "X"], [1, 1, "y", "Y"], [1, 2, "z", "Z"]], columns=list("ABCD") + ) + + df = df.set_index(["A", "B"]) + ix = MultiIndex.from_tuples([(1, 1)]) + + df.loc[ix, "C"] = "_" + + assert (df.xs((1, 1))["C"] == "_").all() + + def test_astype_assignment_with_dups(self): + + # GH 4686 + # assignment with dups that has a dtype change + cols = MultiIndex.from_tuples([("A", "1"), ("B", "1"), ("A", "2")]) + df = DataFrame(np.arange(3).reshape((1, 3)), columns=cols, dtype=object) + index = df.index.copy() + + df["A"] = df["A"].astype(np.float64) + tm.assert_index_equal(df.index, index) + + def test_setitem_nonmonotonic(self): + # https://github.com/pandas-dev/pandas/issues/31449 + index = pd.MultiIndex.from_tuples( + [("a", "c"), ("b", "x"), ("a", "d")], names=["l1", "l2"] + ) + df = pd.DataFrame(data=[0, 1, 2], index=index, columns=["e"]) + df.loc["a", "e"] = np.arange(99, 101, dtype="int64") + expected = pd.DataFrame({"e": [99, 1, 100]}, index=index) + tm.assert_frame_equal(df, expected) + + +def test_frame_setitem_view_direct(multiindex_dataframe_random_data): + # this works because we are modifying the underlying array + # really a no-no + df = multiindex_dataframe_random_data.T + df["foo"].values[:] = 0 + assert (df["foo"].values == 0).all() + + +def test_frame_setitem_copy_raises(multiindex_dataframe_random_data): + # will raise/warn as its chained assignment + df = multiindex_dataframe_random_data.T + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + df["foo"]["one"] = 2 + + +def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data.T + expected = frame + df = frame.copy() + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + df["foo"]["one"] = 2 + + result = df + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py new file mode 100644 index 00000000..6fa9d3bd --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -0,0 +1,740 @@ +import numpy as np +import pytest + +from pandas.errors import UnsortedIndexError + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series, Timestamp +import pandas._testing as tm +from pandas.core.indexing import _non_reducing_slice +from pandas.tests.indexing.common import _mklbl + + +class TestMultiIndexSlicers: + def test_per_axis_per_level_getitem(self): + + # GH6134 + # example test case + ix = MultiIndex.from_product( + [_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)] + ) + df = DataFrame(np.arange(len(ix.to_numpy())), index=ix) + + result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :] + expected = df.loc[ + [ + tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + + expected = df.loc[ + [ + tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == "A1" or a == "A2" or a == "A3") + and (c == "C1" or c == "C2" or c == "C3") + ] + ] + result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :] + tm.assert_frame_equal(result, expected) + + # test multi-index slicing with per axis and per index controls + index = MultiIndex.from_tuples( + [("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"] + ) + columns = MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], + names=["lvl0", "lvl1"], + ) + + df = DataFrame( + np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns + ) + df = df.sort_index(axis=0).sort_index(axis=1) + + # identity + result = df.loc[(slice(None), slice(None)), :] + tm.assert_frame_equal(result, df) + result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] + tm.assert_frame_equal(result, df) + result = df.loc[:, (slice(None), slice(None))] + tm.assert_frame_equal(result, df) + + # index + result = df.loc[(slice(None), [1]), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), 1), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + # columns + result = df.loc[:, (slice(None), ["foo"])] + expected = df.iloc[:, [1, 3]] + tm.assert_frame_equal(result, expected) + + # both + result = df.loc[(slice(None), 1), (slice(None), ["foo"])] + expected = df.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(result, expected) + + result = df.loc["A", "a"] + expected = DataFrame( + dict(bar=[1, 5, 9], foo=[0, 4, 8]), + index=Index([1, 2, 3], name="two"), + columns=Index(["bar", "foo"], name="lvl1"), + ) + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), [1, 2]), :] + expected = df.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + # multi-level series + s = Series(np.arange(len(ix.to_numpy())), index=ix) + result = s.loc["A1":"A3", :, ["C1", "C3"]] + expected = s.loc[ + [ + tuple([a, b, c, d]) + for a, b, c, d in s.index.values + if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") + ] + ] + tm.assert_series_equal(result, expected) + + # boolean indexers + result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :] + expected = df.iloc[[2, 3]] + tm.assert_frame_equal(result, expected) + + with pytest.raises(ValueError): + df.loc[(slice(None), np.array([True, False])), :] + + # ambiguous notation + # this is interpreted as slicing on both axes (GH #16396) + result = df.loc[slice(None), [1]] + expected = df.iloc[:, []] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), [1]), :] + expected = df.iloc[[0, 3]] + tm.assert_frame_equal(result, expected) + + # not lexsorted + assert df.index.lexsort_depth == 2 + df = df.sort_index(level=1, axis=0) + assert df.index.lexsort_depth == 0 + + msg = ( + "MultiIndex slicing requires the index to be " + r"lexsorted: slicing on levels \[1\], lexsort depth 0" + ) + with pytest.raises(UnsortedIndexError, match=msg): + df.loc[(slice(None), slice("bar")), :] + + # GH 16734: not sorted, but no real slicing + result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :] + tm.assert_frame_equal(result, df.iloc[[1, 3], :]) + + def test_multiindex_slicers_non_unique(self): + + # GH 7106 + # non-unique mi index support + df = ( + DataFrame( + dict( + A=["foo", "foo", "foo", "foo"], + B=["a", "a", "a", "a"], + C=[1, 2, 1, 3], + D=[1, 2, 3, 4], + ) + ) + .set_index(["A", "B", "C"]) + .sort_index() + ) + assert not df.index.is_unique + expected = ( + DataFrame(dict(A=["foo", "foo"], B=["a", "a"], C=[1, 1], D=[1, 3])) + .set_index(["A", "B", "C"]) + .sort_index() + ) + result = df.loc[(slice(None), slice(None), 1), :] + tm.assert_frame_equal(result, expected) + + # this is equivalent of an xs expression + result = df.xs(1, level=2, drop_level=False) + tm.assert_frame_equal(result, expected) + + df = ( + DataFrame( + dict( + A=["foo", "foo", "foo", "foo"], + B=["a", "a", "a", "a"], + C=[1, 2, 1, 2], + D=[1, 2, 3, 4], + ) + ) + .set_index(["A", "B", "C"]) + .sort_index() + ) + assert not df.index.is_unique + expected = ( + DataFrame(dict(A=["foo", "foo"], B=["a", "a"], C=[1, 1], D=[1, 3])) + .set_index(["A", "B", "C"]) + .sort_index() + ) + result = df.loc[(slice(None), slice(None), 1), :] + assert not result.index.is_unique + tm.assert_frame_equal(result, expected) + + # GH12896 + # numpy-implementation dependent bug + ints = [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 12, + 13, + 14, + 14, + 16, + 17, + 18, + 19, + 200000, + 200000, + ] + n = len(ints) + idx = MultiIndex.from_arrays([["a"] * n, ints]) + result = Series([1] * n, index=idx) + result = result.sort_index() + result = result.loc[(slice(None), slice(100000))] + expected = Series([1] * (n - 2), index=idx[:-2]).sort_index() + tm.assert_series_equal(result, expected) + + def test_multiindex_slicers_datetimelike(self): + + # GH 7429 + # buggy/inconsistent behavior when slicing with datetime-like + import datetime + + dates = [ + datetime.datetime(2012, 1, 1, 12, 12, 12) + datetime.timedelta(days=i) + for i in range(6) + ] + freq = [1, 2] + index = MultiIndex.from_product([dates, freq], names=["date", "frequency"]) + + df = DataFrame( + np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4), + index=index, + columns=list("ABCD"), + ) + + # multi-axis slicing + idx = pd.IndexSlice + expected = df.iloc[[0, 2, 4], [0, 1]] + result = df.loc[ + ( + slice( + Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12") + ), + slice(1, 1), + ), + slice("A", "B"), + ] + tm.assert_frame_equal(result, expected) + + result = df.loc[ + ( + idx[ + Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12") + ], + idx[1:1], + ), + slice("A", "B"), + ] + tm.assert_frame_equal(result, expected) + + result = df.loc[ + ( + slice( + Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12") + ), + 1, + ), + slice("A", "B"), + ] + tm.assert_frame_equal(result, expected) + + # with strings + result = df.loc[ + (slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)), + slice("A", "B"), + ] + tm.assert_frame_equal(result, expected) + + result = df.loc[ + (idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"] + ] + tm.assert_frame_equal(result, expected) + + def test_multiindex_slicers_edges(self): + # GH 8132 + # various edge cases + df = DataFrame( + { + "A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5, + "B": ["B0", "B0", "B1", "B1", "B2"] * 3, + "DATE": [ + "2013-06-11", + "2013-07-02", + "2013-07-09", + "2013-07-30", + "2013-08-06", + "2013-06-11", + "2013-07-02", + "2013-07-09", + "2013-07-30", + "2013-08-06", + "2013-09-03", + "2013-10-01", + "2013-07-09", + "2013-08-06", + "2013-09-03", + ], + "VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2], + } + ) + + df["DATE"] = pd.to_datetime(df["DATE"]) + df1 = df.set_index(["A", "B", "DATE"]) + df1 = df1.sort_index() + + # A1 - Get all values under "A0" and "A1" + result = df1.loc[(slice("A1")), :] + expected = df1.iloc[0:10] + tm.assert_frame_equal(result, expected) + + # A2 - Get all values from the start to "A2" + result = df1.loc[(slice("A2")), :] + expected = df1 + tm.assert_frame_equal(result, expected) + + # A3 - Get all values under "B1" or "B2" + result = df1.loc[(slice(None), slice("B1", "B2")), :] + expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]] + tm.assert_frame_equal(result, expected) + + # A4 - Get all values between 2013-07-02 and 2013-07-09 + result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :] + expected = df1.iloc[[1, 2, 6, 7, 12]] + tm.assert_frame_equal(result, expected) + + # B1 - Get all values in B0 that are also under A0, A1 and A2 + result = df1.loc[(slice("A2"), slice("B0")), :] + expected = df1.iloc[[0, 1, 5, 6, 10, 11]] + tm.assert_frame_equal(result, expected) + + # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for + # the As) + result = df1.loc[(slice(None), slice("B2")), :] + expected = df1 + tm.assert_frame_equal(result, expected) + + # B3 - Get all values from B1 to B2 and up to 2013-08-06 + result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :] + expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]] + tm.assert_frame_equal(result, expected) + + # B4 - Same as A4 but the start of the date slice is not a key. + # shows indexing on a partial selection slice + result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :] + expected = df1.iloc[[1, 2, 6, 7, 12]] + tm.assert_frame_equal(result, expected) + + def test_per_axis_per_level_doc_examples(self): + + # test index maker + idx = pd.IndexSlice + + # from indexing.rst / advanced + index = MultiIndex.from_product( + [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)] + ) + columns = MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], + names=["lvl0", "lvl1"], + ) + df = DataFrame( + np.arange(len(index) * len(columns), dtype="int64").reshape( + (len(index), len(columns)) + ), + index=index, + columns=columns, + ) + result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :] + expected = df.loc[ + [ + tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :] + tm.assert_frame_equal(result, expected) + + result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :] + expected = df.loc[ + [ + tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + result = df.loc[idx[:, :, ["C1", "C3"]], :] + tm.assert_frame_equal(result, expected) + + # not sorted + with pytest.raises(UnsortedIndexError): + df.loc["A1", ("a", slice("foo"))] + + # GH 16734: not sorted, but no real slicing + tm.assert_frame_equal( + df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]] + ) + + df = df.sort_index(axis=1) + + # slicing + df.loc["A1", (slice(None), "foo")] + df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")] + + # setitem + df.loc(axis=0)[:, :, ["C1", "C3"]] = -10 + + def test_loc_axis_arguments(self): + + index = MultiIndex.from_product( + [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)] + ) + columns = MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], + names=["lvl0", "lvl1"], + ) + df = ( + DataFrame( + np.arange(len(index) * len(columns), dtype="int64").reshape( + (len(index), len(columns)) + ), + index=index, + columns=columns, + ) + .sort_index() + .sort_index(axis=1) + ) + + # axis 0 + result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]] + expected = df.loc[ + [ + tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == "A1" or a == "A2" or a == "A3") and (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis="index")[:, :, ["C1", "C3"]] + expected = df.loc[ + [ + tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (c == "C1" or c == "C3") + ] + ] + tm.assert_frame_equal(result, expected) + + # axis 1 + result = df.loc(axis=1)[:, "foo"] + expected = df.loc[:, (slice(None), "foo")] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis="columns")[:, "foo"] + expected = df.loc[:, (slice(None), "foo")] + tm.assert_frame_equal(result, expected) + + # invalid axis + with pytest.raises(ValueError): + df.loc(axis=-1)[:, :, ["C1", "C3"]] + + with pytest.raises(ValueError): + df.loc(axis=2)[:, :, ["C1", "C3"]] + + with pytest.raises(ValueError): + df.loc(axis="foo")[:, :, ["C1", "C3"]] + + def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self): + + # GH29519 + df = pd.DataFrame( + np.arange(27).reshape(3, 9), + columns=pd.MultiIndex.from_product( + [["a1", "a2", "a3"], ["b1", "b2", "b3"]] + ), + ) + result = df.loc(axis=1)["a1":"a2"] + expected = df.iloc[:, :-3] + + tm.assert_frame_equal(result, expected) + + def test_loc_axis_single_level_single_col_indexing_multiindex_col_df(self): + + # GH29519 + df = pd.DataFrame( + np.arange(27).reshape(3, 9), + columns=pd.MultiIndex.from_product( + [["a1", "a2", "a3"], ["b1", "b2", "b3"]] + ), + ) + result = df.loc(axis=1)["a1"] + expected = df.iloc[:, :3] + expected.columns = ["b1", "b2", "b3"] + + tm.assert_frame_equal(result, expected) + + def test_loc_ax_single_level_indexer_simple_df(self): + + # GH29519 + # test single level indexing on single index column data frame + df = pd.DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"]) + result = df.loc(axis=1)["a"] + expected = pd.Series(np.array([0, 3, 6]), name="a") + tm.assert_series_equal(result, expected) + + def test_per_axis_per_level_setitem(self): + + # test index maker + idx = pd.IndexSlice + + # test multi-index slicing with per axis and per index controls + index = MultiIndex.from_tuples( + [("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"] + ) + columns = MultiIndex.from_tuples( + [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], + names=["lvl0", "lvl1"], + ) + + df_orig = DataFrame( + np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns + ) + df_orig = df_orig.sort_index(axis=0).sort_index(axis=1) + + # identity + df = df_orig.copy() + df.loc[(slice(None), slice(None)), :] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc(axis=0)[:, :] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[:, (slice(None), slice(None))] = 100 + expected = df_orig.copy() + expected.iloc[:, :] = 100 + tm.assert_frame_equal(df, expected) + + # index + df = df_orig.copy() + df.loc[(slice(None), [1]), :] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), 1), :] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc(axis=0)[:, 1] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # columns + df = df_orig.copy() + df.loc[:, (slice(None), ["foo"])] = 100 + expected = df_orig.copy() + expected.iloc[:, [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # both + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[idx[:, 1], idx[:, ["foo"]]] = 100 + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc["A", "a"] = 100 + expected = df_orig.copy() + expected.iloc[0:3, 0:2] = 100 + tm.assert_frame_equal(df, expected) + + # setting with a list-like + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array( + [[100, 100], [100, 100]], dtype="int64" + ) + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = 100 + tm.assert_frame_equal(df, expected) + + # not enough values + df = df_orig.copy() + + with pytest.raises(ValueError): + df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array( + [[100], [100, 100]], dtype="int64" + ) + + with pytest.raises(ValueError): + df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array( + [100, 100, 100, 100], dtype="int64" + ) + + # with an alignable rhs + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] = ( + df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5 + ) + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5 + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[ + (slice(None), 1), (slice(None), ["foo"]) + ] + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(df, expected) + + rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy() + rhs.loc[:, ("c", "bah")] = 10 + df = df_orig.copy() + df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs + expected = df_orig.copy() + expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] + tm.assert_frame_equal(df, expected) + + def test_multiindex_label_slicing_with_negative_step(self): + s = Series( + np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)]) + ) + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) + tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) + + assert_slices_equivalent(SLC[::-1], SLC[::-1]) + + assert_slices_equivalent(SLC["d"::-1], SLC[15::-1]) + assert_slices_equivalent(SLC[("d",)::-1], SLC[15::-1]) + + assert_slices_equivalent(SLC[:"d":-1], SLC[:11:-1]) + assert_slices_equivalent(SLC[:("d",):-1], SLC[:11:-1]) + + assert_slices_equivalent(SLC["d":"b":-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC[("d",):"b":-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC["d":("b",):-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC[("d",):("b",):-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC["b":"d":-1], SLC[:0]) + + assert_slices_equivalent(SLC[("c", 2)::-1], SLC[10::-1]) + assert_slices_equivalent(SLC[:("c", 2):-1], SLC[:9:-1]) + assert_slices_equivalent(SLC[("e", 0):("c", 2):-1], SLC[16:9:-1]) + + def test_multiindex_slice_first_level(self): + # GH 12697 + freq = ["a", "b", "c", "d"] + idx = MultiIndex.from_product([freq, np.arange(500)]) + df = DataFrame(list(range(2000)), index=idx, columns=["Test"]) + df_slice = df.loc[pd.IndexSlice[:, 30:70], :] + result = df_slice.loc["a"] + expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71)) + tm.assert_frame_equal(result, expected) + result = df_slice.loc["d"] + expected = DataFrame( + list(range(1530, 1571)), columns=["Test"], index=range(30, 71) + ) + tm.assert_frame_equal(result, expected) + + def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + s = ymd["A"] + result = s[5:] + expected = s.reindex(s.index[5:]) + tm.assert_series_equal(result, expected) + + exp = ymd["A"].copy() + s[5:] = 0 + exp.values[5:] = 0 + tm.assert_numpy_array_equal(s.values, exp.values) + + result = ymd[5:] + expected = ymd.reindex(s.index[5:]) + tm.assert_frame_equal(result, expected) + + def test_non_reducing_slice_on_multiindex(self): + # GH 19861 + dic = { + ("a", "d"): [1, 4], + ("a", "c"): [2, 3], + ("b", "c"): [3, 2], + ("b", "d"): [4, 1], + } + df = pd.DataFrame(dic, index=[0, 1]) + idx = pd.IndexSlice + slice_ = idx[:, idx["b", "d"]] + tslice_ = _non_reducing_slice(slice_) + + result = df.loc[tslice_] + expected = pd.DataFrame({("b", "d"): [4, 1]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_sorted.py b/pandas/tests/indexing/multiindex/test_sorted.py new file mode 100644 index 00000000..4bec0f42 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_sorted.py @@ -0,0 +1,97 @@ +import numpy as np +from numpy.random import randn + +from pandas import DataFrame, MultiIndex, Series +import pandas._testing as tm + + +class TestMultiIndexSorted: + def test_getitem_multilevel_index_tuple_not_sorted(self): + index_columns = list("abc") + df = DataFrame( + [[0, 1, 0, "x"], [0, 0, 1, "y"]], columns=index_columns + ["data"] + ) + df = df.set_index(index_columns) + query_index = df.index[:1] + rs = df.loc[query_index, "data"] + + xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=["a", "b", "c"]) + xp = Series(["x"], index=xp_idx, name="data") + tm.assert_series_equal(rs, xp) + + def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + df = frame.sort_index(level=1).T + + # buglet with int typechecking + result = df.iloc[:, : np.int32(3)] + expected = df.reindex(columns=df.columns[:3]) + tm.assert_frame_equal(result, expected) + + def test_frame_getitem_not_sorted2(self): + # 13431 + df = DataFrame( + { + "col1": ["b", "d", "b", "a"], + "col2": [3, 1, 1, 2], + "data": ["one", "two", "three", "four"], + } + ) + + df2 = df.set_index(["col1", "col2"]) + df2_original = df2.copy() + + df2.index.set_levels(["b", "d", "a"], level="col1", inplace=True) + df2.index.set_codes([0, 1, 0, 2], level="col1", inplace=True) + assert not df2.index.is_lexsorted() + assert not df2.index.is_monotonic + + assert df2_original.index.equals(df2.index) + expected = df2.sort_index() + assert expected.index.is_lexsorted() + assert expected.index.is_monotonic + + result = df2.sort_index(level=0) + assert result.index.is_lexsorted() + assert result.index.is_monotonic + tm.assert_frame_equal(result, expected) + + def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + df = frame.T + df["foo", "four"] = "foo" + + arrays = [np.array(x) for x in zip(*df.columns.values)] + + result = df["foo"] + result2 = df.loc[:, "foo"] + expected = df.reindex(columns=df.columns[arrays[0] == "foo"]) + expected.columns = expected.columns.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + df = df.T + result = df.xs("foo") + result2 = df.loc["foo"] + expected = df.reindex(df.index[arrays[0] == "foo"]) + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_series_getitem_not_sorted(self): + arrays = [ + ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = zip(*arrays) + index = MultiIndex.from_tuples(tuples) + s = Series(randn(8), index=index) + + arrays = [np.array(x) for x in zip(*index.values)] + + result = s["qux"] + result2 = s.loc["qux"] + expected = s[arrays[0] == "qux"] + expected.index = expected.index.droplevel(0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) diff --git a/pandas/tests/indexing/multiindex/test_xs.py b/pandas/tests/indexing/multiindex/test_xs.py new file mode 100644 index 00000000..db8c0c64 --- /dev/null +++ b/pandas/tests/indexing/multiindex/test_xs.py @@ -0,0 +1,245 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex, Series, concat, date_range +import pandas._testing as tm +import pandas.core.common as com + + +@pytest.fixture +def four_level_index_dataframe(): + arr = np.array( + [ + [-0.5109, -2.3358, -0.4645, 0.05076, 0.364], + [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], + [-0.6662, -0.5243, -0.358, 0.89145, 2.5838], + ] + ) + index = MultiIndex( + levels=[["a", "x"], ["b", "q"], [10.0032, 20.0, 30.0], [3, 4, 5]], + codes=[[0, 0, 1], [0, 1, 1], [0, 1, 2], [2, 1, 0]], + names=["one", "two", "three", "four"], + ) + return DataFrame(arr, index=index, columns=list("ABCDE")) + + +@pytest.mark.parametrize( + "key, level, exp_arr, exp_index", + [ + ("a", "lvl0", lambda x: x[:, 0:2], Index(["bar", "foo"], name="lvl1")), + ("foo", "lvl1", lambda x: x[:, 1:2], Index(["a"], name="lvl0")), + ], +) +def test_xs_named_levels_axis_eq_1(key, level, exp_arr, exp_index): + # see gh-2903 + arr = np.random.randn(4, 4) + index = MultiIndex( + levels=[["a", "b"], ["bar", "foo", "hello", "world"]], + codes=[[0, 0, 1, 1], [0, 1, 2, 3]], + names=["lvl0", "lvl1"], + ) + df = DataFrame(arr, columns=index) + result = df.xs(key, level=level, axis=1) + expected = DataFrame(exp_arr(arr), columns=exp_index) + tm.assert_frame_equal(result, expected) + + +def test_xs_values(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.xs(("bar", "two")).values + expected = df.values[4] + tm.assert_almost_equal(result, expected) + + +def test_xs_loc_equality(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.xs(("bar", "two")) + expected = df.loc[("bar", "two")] + tm.assert_series_equal(result, expected) + + +def test_xs_missing_values_in_index(): + # see gh-6574 + # missing values in returned index should be preserved + acc = [ + ("a", "abcde", 1), + ("b", "bbcde", 2), + ("y", "yzcde", 25), + ("z", "xbcde", 24), + ("z", None, 26), + ("z", "zbcde", 25), + ("z", "ybcde", 26), + ] + df = DataFrame(acc, columns=["a1", "a2", "cnt"]).set_index(["a1", "a2"]) + expected = DataFrame( + {"cnt": [24, 26, 25, 26]}, + index=Index(["xbcde", np.nan, "zbcde", "ybcde"], name="a2"), + ) + + result = df.xs("z", level="a1") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])]) +def test_xs_with_duplicates(key, level, multiindex_dataframe_random_data): + # see gh-13719 + frame = multiindex_dataframe_random_data + df = concat([frame] * 2) + assert df.index.is_unique is False + expected = concat([frame.xs("one", level="second")] * 2) + + result = df.xs(key, level=level) + tm.assert_frame_equal(result, expected) + + +def test_xs_level(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.xs("two", level="second") + expected = df[df.index.get_level_values(1) == "two"] + expected.index = Index(["foo", "bar", "baz", "qux"], name="first") + tm.assert_frame_equal(result, expected) + + +def test_xs_level_eq_2(): + arr = np.random.randn(3, 5) + index = MultiIndex( + levels=[["a", "p", "x"], ["b", "q", "y"], ["c", "r", "z"]], + codes=[[2, 0, 1], [2, 0, 1], [2, 0, 1]], + ) + df = DataFrame(arr, index=index) + expected = DataFrame(arr[1:2], index=[["a"], ["b"]]) + result = df.xs("c", level=2) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer", + [ + lambda df: df.xs(("a", 4), level=["one", "four"]), + lambda df: df.xs("a").xs(4, level="four"), + ], +) +def test_xs_level_multiple(indexer, four_level_index_dataframe): + df = four_level_index_dataframe + expected_values = [[0.4473, 1.4152, 0.2834, 1.00661, 0.1744]] + expected_index = MultiIndex( + levels=[["q"], [20.0]], codes=[[0], [0]], names=["two", "three"] + ) + expected = DataFrame(expected_values, index=expected_index, columns=list("ABCDE")) + result = indexer(df) + tm.assert_frame_equal(result, expected) + + +def test_xs_setting_with_copy_error(multiindex_dataframe_random_data): + # this is a copy in 0.14 + df = multiindex_dataframe_random_data + result = df.xs("two", level="second") + + # setting this will give a SettingWithCopyError + # as we are trying to write a view + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + result[:] = 10 + + +def test_xs_setting_with_copy_error_multiple(four_level_index_dataframe): + # this is a copy in 0.14 + df = four_level_index_dataframe + result = df.xs(("a", 4), level=["one", "four"]) + + # setting this will give a SettingWithCopyError + # as we are trying to write a view + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + result[:] = 10 + + +def test_xs_integer_key(): + # see gh-2107 + dates = range(20111201, 20111205) + ids = list("abcde") + index = MultiIndex.from_product([dates, ids], names=["date", "secid"]) + df = DataFrame(np.random.randn(len(index), 3), index, ["X", "Y", "Z"]) + + result = df.xs(20111201, level="date") + expected = df.loc[20111201, :] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer", [lambda df: df.xs("a", level=0), lambda df: df.xs("a")] +) +def test_xs_level0(indexer, four_level_index_dataframe): + df = four_level_index_dataframe + expected_values = [ + [-0.5109, -2.3358, -0.4645, 0.05076, 0.364], + [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], + ] + expected_index = MultiIndex( + levels=[["b", "q"], [10.0032, 20.0], [4, 5]], + codes=[[0, 1], [0, 1], [1, 0]], + names=["two", "three", "four"], + ) + expected = DataFrame(expected_values, index=expected_index, columns=list("ABCDE")) + + result = indexer(df) + tm.assert_frame_equal(result, expected) + + +def test_xs_level_series(multiindex_dataframe_random_data): + # this test is not explicitly testing .xs functionality + # TODO: move to another module or refactor + df = multiindex_dataframe_random_data + s = df["A"] + result = s[:, "two"] + expected = df.xs("two", level=1)["A"] + tm.assert_series_equal(result, expected) + + +def test_xs_level_series_ymd(multiindex_year_month_day_dataframe_random_data): + # this test is not explicitly testing .xs functionality + # TODO: move to another module or refactor + df = multiindex_year_month_day_dataframe_random_data + s = df["A"] + result = s[2000, 5] + expected = df.loc[2000, 5]["A"] + tm.assert_series_equal(result, expected) + + +def test_xs_level_series_slice_not_implemented( + multiindex_year_month_day_dataframe_random_data, +): + # this test is not explicitly testing .xs functionality + # TODO: move to another module or refactor + # not implementing this for now + df = multiindex_year_month_day_dataframe_random_data + s = df["A"] + + msg = r"\(2000, slice\(3, 4, None\)\)" + with pytest.raises(TypeError, match=msg): + s[2000, 3:4] + + +def test_series_getitem_multiindex_xs(): + # GH6258 + dt = list(date_range("20130903", periods=3)) + idx = MultiIndex.from_product([list("AB"), dt]) + s = Series([1, 3, 4, 1, 3, 4], index=idx) + expected = Series([1, 1], index=list("AB")) + + result = s.xs("20130903", level=1) + tm.assert_series_equal(result, expected) + + +def test_series_getitem_multiindex_xs_by_label(): + # GH5684 + idx = MultiIndex.from_tuples( + [("a", "one"), ("a", "two"), ("b", "one"), ("b", "two")] + ) + s = Series([1, 2, 3, 4], index=idx) + s.index.set_names(["L1", "L2"], inplace=True) + expected = Series([1, 3], index=["a", "b"]) + expected.index.set_names(["L1"], inplace=True) + + result = s.xs("one", level="L2") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py new file mode 100644 index 00000000..621417eb --- /dev/null +++ b/pandas/tests/indexing/test_callable.py @@ -0,0 +1,260 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm + + +class TestIndexingCallable: + def test_frame_loc_callable(self): + # GH 11485 + df = pd.DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) + # iloc cannot use boolean Series (see GH3635) + + # return bool indexer + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[lambda x: x.A > 2] + tm.assert_frame_equal(res, df.loc[df.A > 2]) + + res = df.loc[ + lambda x: x.A > 2, + ] # noqa: E231 + tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231 + + res = df.loc[ + lambda x: x.A > 2, + ] # noqa: E231 + tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231 + + res = df.loc[lambda x: x.B == "b", :] + tm.assert_frame_equal(res, df.loc[df.B == "b", :]) + + res = df.loc[lambda x: x.B == "b", :] + tm.assert_frame_equal(res, df.loc[df.B == "b", :]) + + res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] + tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) + + res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"] + tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]]) + + res = df.loc[lambda x: x.A > 2, lambda x: "B"] + tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) + + res = df.loc[lambda x: x.A > 2, lambda x: "B"] + tm.assert_series_equal(res, df.loc[df.A > 2, "B"]) + + res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]]) + + # scalar + res = df.loc[lambda x: 1, lambda x: "A"] + assert res == df.loc[1, "A"] + + res = df.loc[lambda x: 1, lambda x: "A"] + assert res == df.loc[1, "A"] + + def test_frame_loc_callable_mixture(self): + # GH 11485 + df = pd.DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]}) + + res = df.loc[lambda x: x.A > 2, ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[lambda x: x.A > 2, ["A", "B"]] + tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]]) + + res = df.loc[[2, 3], lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) + + res = df.loc[[2, 3], lambda x: ["A", "B"]] + tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]]) + + res = df.loc[3, lambda x: ["A", "B"]] + tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) + + res = df.loc[3, lambda x: ["A", "B"]] + tm.assert_series_equal(res, df.loc[3, ["A", "B"]]) + + def test_frame_loc_callable_labels(self): + # GH 11485 + df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return label + res = df.loc[lambda x: ["A", "C"]] + tm.assert_frame_equal(res, df.loc[["A", "C"]]) + + res = df.loc[ + lambda x: ["A", "C"], + ] # noqa: E231 + tm.assert_frame_equal(res, df.loc[["A", "C"],]) # noqa: E231 + + res = df.loc[lambda x: ["A", "C"], :] + tm.assert_frame_equal(res, df.loc[["A", "C"], :]) + + res = df.loc[lambda x: ["A", "C"], lambda x: "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + # mixture + res = df.loc[["A", "C"], lambda x: "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[["A", "C"], lambda x: ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + res = df.loc[lambda x: ["A", "C"], "X"] + tm.assert_series_equal(res, df.loc[["A", "C"], "X"]) + + res = df.loc[lambda x: ["A", "C"], ["X"]] + tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]]) + + def test_frame_loc_callable_setitem(self): + # GH 11485 + df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return label + res = df.copy() + res.loc[lambda x: ["A", "C"]] = -20 + exp = df.copy() + exp.loc[["A", "C"]] = -20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], :] = 20 + exp = df.copy() + exp.loc[["A", "C"], :] = 20 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1 + exp = df.copy() + exp.loc[["A", "C"], "X"] = -1 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10] + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = [5, 10] + tm.assert_frame_equal(res, exp) + + # mixture + res = df.copy() + res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2]) + exp = df.copy() + exp.loc[["A", "C"], "X"] = np.array([-1, -2]) + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[["A", "C"], lambda x: ["X"]] = 10 + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = 10 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], "X"] = -2 + exp = df.copy() + exp.loc[["A", "C"], "X"] = -2 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.loc[lambda x: ["A", "C"], ["X"]] = -4 + exp = df.copy() + exp.loc[["A", "C"], ["X"]] = -4 + tm.assert_frame_equal(res, exp) + + def test_frame_iloc_callable(self): + # GH 11485 + df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return location + res = df.iloc[lambda x: [1, 3]] + tm.assert_frame_equal(res, df.iloc[[1, 3]]) + + res = df.iloc[lambda x: [1, 3], :] + tm.assert_frame_equal(res, df.iloc[[1, 3], :]) + + res = df.iloc[lambda x: [1, 3], lambda x: 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[lambda x: [1, 3], lambda x: [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + # mixture + res = df.iloc[[1, 3], lambda x: 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[[1, 3], lambda x: [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + res = df.iloc[lambda x: [1, 3], 0] + tm.assert_series_equal(res, df.iloc[[1, 3], 0]) + + res = df.iloc[lambda x: [1, 3], [0]] + tm.assert_frame_equal(res, df.iloc[[1, 3], [0]]) + + def test_frame_iloc_callable_setitem(self): + # GH 11485 + df = pd.DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD")) + + # return location + res = df.copy() + res.iloc[lambda x: [1, 3]] = 0 + exp = df.copy() + exp.iloc[[1, 3]] = 0 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], :] = -1 + exp = df.copy() + exp.iloc[[1, 3], :] = -1 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], lambda x: 0] = 5 + exp = df.copy() + exp.iloc[[1, 3], 0] = 5 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], lambda x: [0]] = 25 + exp = df.copy() + exp.iloc[[1, 3], [0]] = 25 + tm.assert_frame_equal(res, exp) + + # mixture + res = df.copy() + res.iloc[[1, 3], lambda x: 0] = -3 + exp = df.copy() + exp.iloc[[1, 3], 0] = -3 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[[1, 3], lambda x: [0]] = -5 + exp = df.copy() + exp.iloc[[1, 3], [0]] = -5 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], 0] = 10 + exp = df.copy() + exp.iloc[[1, 3], 0] = 10 + tm.assert_frame_equal(res, exp) + + res = df.copy() + res.iloc[lambda x: [1, 3], [0]] = [-5, -5] + exp = df.copy() + exp.iloc[[1, 3], [0]] = [-5, -5] + tm.assert_frame_equal(res, exp) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py new file mode 100644 index 00000000..8c8dece5 --- /dev/null +++ b/pandas/tests/indexing/test_categorical.py @@ -0,0 +1,822 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_categorical_dtype +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + Index, + Interval, + Series, + Timedelta, + Timestamp, + conftest, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT + + +class TestCategoricalIndex: + def setup_method(self, method): + + self.df = DataFrame( + { + "A": np.arange(6, dtype="int64"), + "B": Series(list("aabbca")).astype(CDT(list("cab"))), + } + ).set_index("B") + self.df2 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + "B": Series(list("aabbca")).astype(CDT(list("cabe"))), + } + ).set_index("B") + self.df3 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + "B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=True))), + } + ).set_index("B") + self.df4 = DataFrame( + { + "A": np.arange(6, dtype="int64"), + "B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))), + } + ).set_index("B") + + def test_loc_scalar(self): + result = self.df.loc["a"] + expected = DataFrame( + {"A": [0, 1, 5], "B": (Series(list("aaa")).astype(CDT(list("cab"))))} + ).set_index("B") + tm.assert_frame_equal(result, expected) + + df = self.df.copy() + df.loc["a"] = 20 + expected = DataFrame( + { + "A": [20, 20, 2, 3, 4, 20], + "B": (Series(list("aabbca")).astype(CDT(list("cab")))), + } + ).set_index("B") + tm.assert_frame_equal(df, expected) + + # value not in the categories + with pytest.raises(KeyError, match=r"^'d'$"): + df.loc["d"] + + msg = "cannot append a non-category item to a CategoricalIndex" + with pytest.raises(TypeError, match=msg): + df.loc["d"] = 10 + + msg = ( + "cannot insert an item into a CategoricalIndex that is not " + "already an existing category" + ) + with pytest.raises(TypeError, match=msg): + df.loc["d", "A"] = 10 + with pytest.raises(TypeError, match=msg): + df.loc["d", "C"] = 10 + + msg = ( + r"cannot do label indexing on with these indexers \[1\] of " + ) + with pytest.raises(TypeError, match=msg): + df.loc[1] + + def test_getitem_scalar(self): + + cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) + + s = Series([1, 2], index=cats) + + expected = s.iloc[0] + result = s[cats[0]] + assert result == expected + + def test_slicing_directly(self): + cat = Categorical(["a", "b", "c", "d", "a", "b", "c"]) + sliced = cat[3] + assert sliced == "d" + sliced = cat[3:5] + expected = Categorical(["d", "a"], categories=["a", "b", "c", "d"]) + tm.assert_numpy_array_equal(sliced._codes, expected._codes) + tm.assert_index_equal(sliced.categories, expected.categories) + + def test_slicing(self): + cat = Series(Categorical([1, 2, 3, 4])) + reversed = cat[::-1] + exp = np.array([4, 3, 2, 1], dtype=np.int64) + tm.assert_numpy_array_equal(reversed.__array__(), exp) + + df = DataFrame({"value": (np.arange(100) + 1).astype("int64")}) + df["D"] = pd.cut(df.value, bins=[0, 25, 50, 75, 100]) + + expected = Series([11, Interval(0, 25)], index=["value", "D"], name=10) + result = df.iloc[10] + tm.assert_series_equal(result, expected) + + expected = DataFrame( + {"value": np.arange(11, 21).astype("int64")}, + index=np.arange(10, 20).astype("int64"), + ) + expected["D"] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100]) + result = df.iloc[10:20] + tm.assert_frame_equal(result, expected) + + expected = Series([9, Interval(0, 25)], index=["value", "D"], name=8) + result = df.loc[8] + tm.assert_series_equal(result, expected) + + def test_slicing_and_getting_ops(self): + + # systematically test the slicing operations: + # for all slicing ops: + # - returning a dataframe + # - returning a column + # - returning a row + # - returning a single value + + cats = Categorical( + ["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"] + ) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 2, 3, 4, 5, 6, 7] + df = DataFrame({"cats": cats, "values": values}, index=idx) + + # the expected values + cats2 = Categorical(["b", "c"], categories=["a", "b", "c"]) + idx2 = Index(["j", "k"]) + values2 = [3, 4] + + # 2:4,: | "j":"k",: + exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2) + + # :,"cats" | :,0 + exp_col = Series(cats, index=idx, name="cats") + + # "j",: | 2,: + exp_row = Series(["b", 3], index=["cats", "values"], dtype="object", name="j") + + # "j","cats | 2,0 + exp_val = "b" + + # iloc + # frame + res_df = df.iloc[2:4, :] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"]) + + # row + res_row = df.iloc[2, :] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], str) + + # col + res_col = df.iloc[:, 0] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col) + + # single value + res_val = df.iloc[2, 0] + assert res_val == exp_val + + # loc + # frame + res_df = df.loc["j":"k", :] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"]) + + # row + res_row = df.loc["j", :] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], str) + + # col + res_col = df.loc[:, "cats"] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col) + + # single value + res_val = df.loc["j", "cats"] + assert res_val == exp_val + + # ix + # frame + # res_df = df.loc["j":"k",[0,1]] # doesn't work? + res_df = df.loc["j":"k", :] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"]) + + # row + res_row = df.loc["j", :] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], str) + + # col + res_col = df.loc[:, "cats"] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col) + + # single value + res_val = df.loc["j", df.columns[0]] + assert res_val == exp_val + + # iat + res_val = df.iat[2, 0] + assert res_val == exp_val + + # at + res_val = df.at["j", "cats"] + assert res_val == exp_val + + # fancy indexing + exp_fancy = df.iloc[[2]] + + res_fancy = df[df["cats"] == "b"] + tm.assert_frame_equal(res_fancy, exp_fancy) + res_fancy = df[df["values"] == 3] + tm.assert_frame_equal(res_fancy, exp_fancy) + + # get_value + res_val = df.at["j", "cats"] + assert res_val == exp_val + + # i : int, slice, or sequence of integers + res_row = df.iloc[2] + tm.assert_series_equal(res_row, exp_row) + assert isinstance(res_row["cats"], str) + + res_df = df.iloc[slice(2, 4)] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"]) + + res_df = df.iloc[[2, 3]] + tm.assert_frame_equal(res_df, exp_df) + assert is_categorical_dtype(res_df["cats"]) + + res_col = df.iloc[:, 0] + tm.assert_series_equal(res_col, exp_col) + assert is_categorical_dtype(res_col) + + res_df = df.iloc[:, slice(0, 2)] + tm.assert_frame_equal(res_df, df) + assert is_categorical_dtype(res_df["cats"]) + + res_df = df.iloc[:, [0, 1]] + tm.assert_frame_equal(res_df, df) + assert is_categorical_dtype(res_df["cats"]) + + def test_slicing_doc_examples(self): + + # GH 7918 + cats = Categorical( + ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c"] + ) + idx = Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 2, 2, 2, 3, 4, 5] + df = DataFrame({"cats": cats, "values": values}, index=idx) + + result = df.iloc[2:4, :] + expected = DataFrame( + { + "cats": Categorical(["b", "b"], categories=["a", "b", "c"]), + "values": [2, 2], + }, + index=["j", "k"], + ) + tm.assert_frame_equal(result, expected) + + result = df.iloc[2:4, :].dtypes + expected = Series(["category", "int64"], ["cats", "values"]) + tm.assert_series_equal(result, expected) + + result = df.loc["h":"j", "cats"] + expected = Series( + Categorical(["a", "b", "b"], categories=["a", "b", "c"]), + index=["h", "i", "j"], + name="cats", + ) + tm.assert_series_equal(result, expected) + + result = df.loc["h":"j", df.columns[0:1]] + expected = DataFrame( + {"cats": Categorical(["a", "b", "b"], categories=["a", "b", "c"])}, + index=["h", "i", "j"], + ) + tm.assert_frame_equal(result, expected) + + def test_getitem_category_type(self): + # GH 14580 + # test iloc() on Series with Categorical data + + s = Series([1, 2, 3]).astype("category") + + # get slice + result = s.iloc[0:2] + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # get list of indexes + result = s.iloc[[0, 1]] + expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # get boolean array + result = s.iloc[[True, False, False]] + expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) + tm.assert_series_equal(result, expected) + + def test_loc_listlike(self): + + # list of labels + result = self.df.loc[["c", "a"]] + expected = self.df.iloc[[4, 0, 1, 5]] + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = self.df2.loc[["a", "b", "e"]] + exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B") + expected = DataFrame({"A": [0, 1, 5, 2, 3, np.nan]}, index=exp_index) + tm.assert_frame_equal(result, expected, check_index_type=True) + + # element in the categories but not in the values + with pytest.raises(KeyError, match=r"^'e'$"): + self.df2.loc["e"] + + # assign is ok + df = self.df2.copy() + df.loc["e"] = 20 + result = df.loc[["a", "b", "e"]] + exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B") + expected = DataFrame({"A": [0, 1, 5, 2, 3, 20]}, index=exp_index) + tm.assert_frame_equal(result, expected) + + df = self.df2.copy() + result = df.loc[["a", "b", "e"]] + exp_index = CategoricalIndex(list("aaabbe"), categories=list("cabe"), name="B") + expected = DataFrame({"A": [0, 1, 5, 2, 3, np.nan]}, index=exp_index) + tm.assert_frame_equal(result, expected, check_index_type=True) + + # not all labels in the categories + with pytest.raises( + KeyError, + match=( + "'a list-indexer must only include values that are in the categories'" + ), + ): + self.df2.loc[["a", "d"]] + + def test_loc_listlike_dtypes(self): + # GH 11586 + + # unique categories and codes + index = CategoricalIndex(["a", "b", "c"]) + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index) + + # unique slice + res = df.loc[["a", "b"]] + exp_index = CategoricalIndex(["a", "b"], categories=index.categories) + exp = DataFrame({"A": [1, 2], "B": [4, 5]}, index=exp_index) + tm.assert_frame_equal(res, exp, check_index_type=True) + + # duplicated slice + res = df.loc[["a", "a", "b"]] + + exp_index = CategoricalIndex(["a", "a", "b"], categories=index.categories) + exp = DataFrame({"A": [1, 1, 2], "B": [4, 4, 5]}, index=exp_index) + tm.assert_frame_equal(res, exp, check_index_type=True) + + msg = "a list-indexer must only include values that are in the categories" + with pytest.raises(KeyError, match=msg): + df.loc[["a", "x"]] + + # duplicated categories and codes + index = CategoricalIndex(["a", "b", "a"]) + df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=index) + + # unique slice + res = df.loc[["a", "b"]] + exp = DataFrame( + {"A": [1, 3, 2], "B": [4, 6, 5]}, index=CategoricalIndex(["a", "a", "b"]) + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + # duplicated slice + res = df.loc[["a", "a", "b"]] + exp = DataFrame( + {"A": [1, 3, 1, 3, 2], "B": [4, 6, 4, 6, 5]}, + index=CategoricalIndex(["a", "a", "a", "a", "b"]), + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + msg = "a list-indexer must only include values that are in the categories" + with pytest.raises(KeyError, match=msg): + df.loc[["a", "x"]] + + # contains unused category + index = CategoricalIndex(["a", "b", "a", "c"], categories=list("abcde")) + df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]}, index=index) + + res = df.loc[["a", "b"]] + exp = DataFrame( + {"A": [1, 3, 2], "B": [5, 7, 6]}, + index=CategoricalIndex(["a", "a", "b"], categories=list("abcde")), + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + res = df.loc[["a", "e"]] + exp = DataFrame( + {"A": [1, 3, np.nan], "B": [5, 7, np.nan]}, + index=CategoricalIndex(["a", "a", "e"], categories=list("abcde")), + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + # duplicated slice + res = df.loc[["a", "a", "b"]] + exp = DataFrame( + {"A": [1, 3, 1, 3, 2], "B": [5, 7, 5, 7, 6]}, + index=CategoricalIndex(["a", "a", "a", "a", "b"], categories=list("abcde")), + ) + tm.assert_frame_equal(res, exp, check_index_type=True) + + msg = "a list-indexer must only include values that are in the categories" + with pytest.raises(KeyError, match=msg): + df.loc[["a", "x"]] + + def test_get_indexer_array(self): + arr = np.array( + [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")], + dtype=object, + ) + cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")] + ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category") + result = ci.get_indexer(arr) + expected = np.array([0, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_same_categories_same_order(self): + ci = CategoricalIndex(["a", "b"], categories=["a", "b"]) + + result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"])) + expected = np.array([1, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + def test_get_indexer_same_categories_different_order(self): + # https://github.com/pandas-dev/pandas/issues/19551 + ci = CategoricalIndex(["a", "b"], categories=["a", "b"]) + + result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"])) + expected = np.array([1, 1], dtype="intp") + tm.assert_numpy_array_equal(result, expected) + + def test_getitem_with_listlike(self): + # GH 16115 + cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")]) + + expected = DataFrame( + [[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats + ) + dummies = pd.get_dummies(cats) + result = dummies[list(dummies.columns)] + tm.assert_frame_equal(result, expected) + + def test_setitem_listlike(self): + + # GH 9469 + # properly coerce the input indexers + np.random.seed(1) + c = Categorical( + np.random.randint(0, 5, size=150000).astype(np.int8) + ).add_categories([-1000]) + indexer = np.array([100000]).astype(np.int64) + c[indexer] = -1000 + + # we are asserting the code result here + # which maps to the -1000 category + result = c.codes[np.array([100000]).astype(np.int64)] + tm.assert_numpy_array_equal(result, np.array([5], dtype="int8")) + + def test_ix_categorical_index(self): + # GH 12531 + df = DataFrame(np.random.randn(3, 3), index=list("ABC"), columns=list("XYZ")) + cdf = df.copy() + cdf.index = CategoricalIndex(df.index) + cdf.columns = CategoricalIndex(df.columns) + + expect = Series(df.loc["A", :], index=cdf.columns, name="A") + tm.assert_series_equal(cdf.loc["A", :], expect) + + expect = Series(df.loc[:, "X"], index=cdf.index, name="X") + tm.assert_series_equal(cdf.loc[:, "X"], expect) + + exp_index = CategoricalIndex(list("AB"), categories=["A", "B", "C"]) + expect = DataFrame(df.loc[["A", "B"], :], columns=cdf.columns, index=exp_index) + tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect) + + exp_columns = CategoricalIndex(list("XY"), categories=["X", "Y", "Z"]) + expect = DataFrame(df.loc[:, ["X", "Y"]], index=cdf.index, columns=exp_columns) + tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect) + + # non-unique + df = DataFrame(np.random.randn(3, 3), index=list("ABA"), columns=list("XYX")) + cdf = df.copy() + cdf.index = CategoricalIndex(df.index) + cdf.columns = CategoricalIndex(df.columns) + + exp_index = CategoricalIndex(list("AA"), categories=["A", "B"]) + expect = DataFrame(df.loc["A", :], columns=cdf.columns, index=exp_index) + tm.assert_frame_equal(cdf.loc["A", :], expect) + + exp_columns = CategoricalIndex(list("XX"), categories=["X", "Y"]) + expect = DataFrame(df.loc[:, "X"], index=cdf.index, columns=exp_columns) + tm.assert_frame_equal(cdf.loc[:, "X"], expect) + + expect = DataFrame( + df.loc[["A", "B"], :], + columns=cdf.columns, + index=CategoricalIndex(list("AAB")), + ) + tm.assert_frame_equal(cdf.loc[["A", "B"], :], expect) + + expect = DataFrame( + df.loc[:, ["X", "Y"]], + index=cdf.index, + columns=CategoricalIndex(list("XXY")), + ) + tm.assert_frame_equal(cdf.loc[:, ["X", "Y"]], expect) + + def test_read_only_source(self): + # GH 10043 + rw_array = np.eye(10) + rw_df = DataFrame(rw_array) + + ro_array = np.eye(10) + ro_array.setflags(write=False) + ro_df = DataFrame(ro_array) + + tm.assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]]) + tm.assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]]) + tm.assert_series_equal(rw_df.iloc[1], ro_df.iloc[1]) + tm.assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3]) + + tm.assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]]) + tm.assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]]) + tm.assert_series_equal(rw_df.loc[1], ro_df.loc[1]) + tm.assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3]) + + def test_reindexing(self): + df = DataFrame( + { + "A": np.arange(3, dtype="int64"), + "B": Series(list("abc")).astype(CDT(list("cabe"))), + } + ).set_index("B") + + # reindexing + # convert to a regular index + result = df.reindex(["a", "b", "e"]) + expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index( + "B" + ) + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["a", "b"]) + expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["e"]) + expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["d"]) + expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + # since we are actually reindexing with a Categorical + # then return a Categorical + cats = list("cabe") + + result = df.reindex(Categorical(["a", "e"], categories=cats)) + expected = DataFrame( + {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))} + ).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(Categorical(["a"], categories=cats)) + expected = DataFrame( + {"A": [0], "B": Series(list("a")).astype(CDT(cats))} + ).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["a", "b", "e"]) + expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index( + "B" + ) + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["a", "b"]) + expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(["e"]) + expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + # give back the type of categorical that we received + result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True)) + expected = DataFrame( + {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))} + ).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + result = df.reindex(Categorical(["a", "d"], categories=["a", "d"])) + expected = DataFrame( + {"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))} + ).set_index("B") + tm.assert_frame_equal(result, expected, check_index_type=True) + + # passed duplicate indexers are not allowed + msg = "cannot reindex from a duplicate axis" + with pytest.raises(ValueError, match=msg): + self.df2.reindex(["a", "b"]) + + # args NotImplemented ATM + msg = r"argument {} is not implemented for CategoricalIndex\.reindex" + with pytest.raises(NotImplementedError, match=msg.format("method")): + df.reindex(["a"], method="ffill") + with pytest.raises(NotImplementedError, match=msg.format("level")): + df.reindex(["a"], level=1) + with pytest.raises(NotImplementedError, match=msg.format("limit")): + df.reindex(["a"], limit=2) + + def test_loc_slice(self): + # GH9748 + with pytest.raises(KeyError, match="1"): + self.df.loc[1:5] + + result = self.df.loc["b":"c"] + expected = self.df.iloc[[2, 3, 4]] + tm.assert_frame_equal(result, expected) + + def test_loc_and_at_with_categorical_index(self): + # GH 20629 + s = Series([1, 2, 3], index=pd.CategoricalIndex(["A", "B", "C"])) + assert s.loc["A"] == 1 + assert s.at["A"] == 1 + df = DataFrame( + [[1, 2], [3, 4], [5, 6]], index=pd.CategoricalIndex(["A", "B", "C"]) + ) + assert df.loc["B", 1] == 4 + assert df.at["B", 1] == 4 + + def test_boolean_selection(self): + + df3 = self.df3 + df4 = self.df4 + + result = df3[df3.index == "a"] + expected = df3.iloc[[]] + tm.assert_frame_equal(result, expected) + + result = df4[df4.index == "a"] + expected = df4.iloc[[]] + tm.assert_frame_equal(result, expected) + + result = df3[df3.index == 1] + expected = df3.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + result = df4[df4.index == 1] + expected = df4.iloc[[0, 1, 3]] + tm.assert_frame_equal(result, expected) + + # since we have an ordered categorical + + # CategoricalIndex([1, 1, 2, 1, 3, 2], + # categories=[3, 2, 1], + # ordered=True, + # name='B') + result = df3[df3.index < 2] + expected = df3.iloc[[4]] + tm.assert_frame_equal(result, expected) + + result = df3[df3.index > 1] + expected = df3.iloc[[]] + tm.assert_frame_equal(result, expected) + + # unordered + # cannot be compared + + # CategoricalIndex([1, 1, 2, 1, 3, 2], + # categories=[3, 2, 1], + # ordered=False, + # name='B') + msg = "Unordered Categoricals can only compare equality or not" + with pytest.raises(TypeError, match=msg): + df4[df4.index < 2] + with pytest.raises(TypeError, match=msg): + df4[df4.index > 1] + + def test_indexing_with_category(self): + + # https://github.com/pandas-dev/pandas/issues/12564 + # consistent result if comparing as Dataframe + + cat = DataFrame({"A": ["foo", "bar", "baz"]}) + exp = DataFrame({"A": [True, False, False]}) + + res = cat[["A"]] == "foo" + tm.assert_frame_equal(res, exp) + + cat["A"] = cat["A"].astype("category") + + res = cat[["A"]] == "foo" + tm.assert_frame_equal(res, exp) + + def test_map_with_dict_or_series(self): + orig_values = ["a", "B", 1, "a"] + new_values = ["one", 2, 3.0, "one"] + cur_index = pd.CategoricalIndex(orig_values, name="XXX") + expected = pd.CategoricalIndex( + new_values, name="XXX", categories=[3.0, 2, "one"] + ) + + mapper = pd.Series(new_values[:-1], index=orig_values[:-1]) + output = cur_index.map(mapper) + # Order of categories in output can be different + tm.assert_index_equal(expected, output) + + mapper = {o: n for o, n in zip(orig_values[:-1], new_values[:-1])} + output = cur_index.map(mapper) + # Order of categories in output can be different + tm.assert_index_equal(expected, output) + + @pytest.mark.parametrize( + "idx_values", + [ + # python types + [1, 2, 3], + [-1, -2, -3], + [1.5, 2.5, 3.5], + [-1.5, -2.5, -3.5], + # numpy int/uint + *[np.array([1, 2, 3], dtype=dtype) for dtype in conftest.ALL_INT_DTYPES], + # numpy floats + *[np.array([1.5, 2.5, 3.5], dtype=dtyp) for dtyp in conftest.FLOAT_DTYPES], + # numpy object + np.array([1, "b", 3.5], dtype=object), + # pandas scalars + [Interval(1, 4), Interval(4, 6), Interval(6, 9)], + [Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)], + [Timedelta(1, "d"), Timedelta(2, "d"), Timedelta(3, "D")], + # pandas Integer arrays + *[pd.array([1, 2, 3], dtype=dtype) for dtype in conftest.ALL_EA_INT_DTYPES], + # other pandas arrays + pd.IntervalIndex.from_breaks([1, 4, 6, 9]).array, + pd.date_range("2019-01-01", periods=3).array, + pd.timedelta_range(start="1d", periods=3).array, + ], + ) + def test_loc_with_non_string_categories(self, idx_values, ordered_fixture): + # GH-17569 + cat_idx = CategoricalIndex(idx_values, ordered=ordered_fixture) + df = DataFrame({"A": ["foo", "bar", "baz"]}, index=cat_idx) + sl = slice(idx_values[0], idx_values[1]) + + # scalar selection + result = df.loc[idx_values[0]] + expected = Series(["foo"], index=["A"], name=idx_values[0]) + tm.assert_series_equal(result, expected) + + # list selection + result = df.loc[idx_values[:2]] + expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"]) + tm.assert_frame_equal(result, expected) + + # slice selection + result = df.loc[sl] + expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"]) + tm.assert_frame_equal(result, expected) + + # scalar assignment + result = df.copy() + result.loc[idx_values[0]] = "qux" + expected = DataFrame({"A": ["qux", "bar", "baz"]}, index=cat_idx) + tm.assert_frame_equal(result, expected) + + # list assignment + result = df.copy() + result.loc[idx_values[:2], "A"] = ["qux", "qux2"] + expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx) + tm.assert_frame_equal(result, expected) + + # slice assignment + result = df.copy() + result.loc[sl, "A"] = ["qux", "qux2"] + expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py new file mode 100644 index 00000000..e845487f --- /dev/null +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -0,0 +1,394 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, Timestamp, date_range, option_context +import pandas._testing as tm +import pandas.core.common as com + + +class TestCaching: + def test_slice_consolidate_invalidate_item_cache(self): + + # this is chained assignment, but will 'work' + with option_context("chained_assignment", None): + + # #3970 + df = DataFrame({"aa": np.arange(5), "bb": [2.2] * 5}) + + # Creates a second float block + df["cc"] = 0.0 + + # caches a reference to the 'bb' series + df["bb"] + + # repr machinery triggers consolidation + repr(df) + + # Assignment to wrong series + df["bb"].iloc[0] = 0.17 + df._clear_item_cache() + tm.assert_almost_equal(df["bb"][0], 0.17) + + def test_setitem_cache_updating(self): + # GH 5424 + cont = ["one", "two", "three", "four", "five", "six", "seven"] + + for do_ref in [False, False]: + df = DataFrame({"a": cont, "b": cont[3:] + cont[:3], "c": np.arange(7)}) + + # ref the cache + if do_ref: + df.loc[0, "c"] + + # set it + df.loc[7, "c"] = 1 + + assert df.loc[0, "c"] == 0.0 + assert df.loc[7, "c"] == 1.0 + + # GH 7084 + # not updating cache on series setting with slices + expected = DataFrame( + {"A": [600, 600, 600]}, index=date_range("5/7/2014", "5/9/2014") + ) + out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) + df = DataFrame({"C": ["A", "A", "A"], "D": [100, 200, 300]}) + + # loop through df to update out + six = Timestamp("5/7/2014") + eix = Timestamp("5/9/2014") + for ix, row in df.iterrows(): + out.loc[six:eix, row["C"]] = out.loc[six:eix, row["C"]] + row["D"] + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out["A"], expected["A"]) + + # try via a chain indexing + # this actually works + out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) + for ix, row in df.iterrows(): + v = out[row["C"]][six:eix] + row["D"] + out[row["C"]][six:eix] = v + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out["A"], expected["A"]) + + out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) + for ix, row in df.iterrows(): + out.loc[six:eix, row["C"]] += row["D"] + + tm.assert_frame_equal(out, expected) + tm.assert_series_equal(out["A"], expected["A"]) + + +class TestChaining: + def test_setitem_chained_setfault(self): + + # GH6026 + data = ["right", "left", "left", "left", "right", "left", "timeout"] + mdata = ["right", "left", "left", "left", "right", "left", "none"] + + df = DataFrame({"response": np.array(data)}) + mask = df.response == "timeout" + df.response[mask] = "none" + tm.assert_frame_equal(df, DataFrame({"response": mdata})) + + recarray = np.rec.fromarrays([data], names=["response"]) + df = DataFrame(recarray) + mask = df.response == "timeout" + df.response[mask] = "none" + tm.assert_frame_equal(df, DataFrame({"response": mdata})) + + df = DataFrame({"response": data, "response1": data}) + mask = df.response == "timeout" + df.response[mask] = "none" + tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) + + # GH 6056 + expected = DataFrame(dict(A=[np.nan, "bar", "bah", "foo", "bar"])) + df = DataFrame(dict(A=np.array(["foo", "bar", "bah", "foo", "bar"]))) + df["A"].iloc[0] = np.nan + result = df.head() + tm.assert_frame_equal(result, expected) + + df = DataFrame(dict(A=np.array(["foo", "bar", "bah", "foo", "bar"]))) + df.A.iloc[0] = np.nan + result = df.head() + tm.assert_frame_equal(result, expected) + + def test_detect_chained_assignment(self): + + pd.set_option("chained_assignment", "raise") + + # work with the chain + expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB")) + df = DataFrame(np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64") + assert df._is_copy is None + + df["A"][0] = -5 + df["A"][1] = -6 + tm.assert_frame_equal(df, expected) + + # test with the chaining + df = DataFrame( + { + "A": Series(range(2), dtype="int64"), + "B": np.array(np.arange(2, 4), dtype=np.float64), + } + ) + assert df._is_copy is None + + with pytest.raises(com.SettingWithCopyError): + df["A"][0] = -5 + + with pytest.raises(com.SettingWithCopyError): + df["A"][1] = np.nan + + assert df["A"]._is_copy is None + + # Using a copy (the chain), fails + df = DataFrame( + { + "A": Series(range(2), dtype="int64"), + "B": np.array(np.arange(2, 4), dtype=np.float64), + } + ) + + with pytest.raises(com.SettingWithCopyError): + df.loc[0]["A"] = -5 + + # Doc example + df = DataFrame( + { + "a": ["one", "one", "two", "three", "two", "one", "six"], + "c": Series(range(7), dtype="int64"), + } + ) + assert df._is_copy is None + + with pytest.raises(com.SettingWithCopyError): + indexer = df.a.str.startswith("o") + df[indexer]["c"] = 42 + + expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]}) + df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) + + with pytest.raises(com.SettingWithCopyError): + df["A"][0] = 111 + + with pytest.raises(com.SettingWithCopyError): + df.loc[0]["A"] = 111 + + df.loc[0, "A"] = 111 + tm.assert_frame_equal(df, expected) + + # gh-5475: Make sure that is_copy is picked up reconstruction + df = DataFrame({"A": [1, 2]}) + assert df._is_copy is None + + with tm.ensure_clean("__tmp__pickle") as path: + df.to_pickle(path) + df2 = pd.read_pickle(path) + df2["B"] = df2["A"] + df2["B"] = df2["A"] + + # gh-5597: a spurious raise as we are setting the entire column here + from string import ascii_letters as letters + + def random_text(nobs=100): + df = [] + for i in range(nobs): + idx = np.random.randint(len(letters), size=2) + idx.sort() + + df.append([letters[idx[0] : idx[1]]]) + + return DataFrame(df, columns=["letters"]) + + df = random_text(100000) + + # Always a copy + x = df.iloc[[0, 1, 2]] + assert x._is_copy is not None + + x = df.iloc[[0, 1, 2, 4]] + assert x._is_copy is not None + + # Explicitly copy + indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.loc[indexer].copy() + + assert df._is_copy is None + df["letters"] = df["letters"].apply(str.lower) + + # Implicitly take + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + df = df.loc[indexer] + + assert df._is_copy is not None + df["letters"] = df["letters"].apply(str.lower) + + # Implicitly take 2 + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + + df = df.loc[indexer] + assert df._is_copy is not None + df.loc[:, "letters"] = df["letters"].apply(str.lower) + + # Should be ok even though it's a copy! + assert df._is_copy is None + + df["letters"] = df["letters"].apply(str.lower) + assert df._is_copy is None + + df = random_text(100000) + indexer = df.letters.apply(lambda x: len(x) > 10) + df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower) + + # an identical take, so no copy + df = DataFrame({"a": [1]}).dropna() + assert df._is_copy is None + df["a"] += 1 + + df = DataFrame(np.random.randn(10, 4)) + s = df.iloc[:, 0].sort_values() + + tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) + tm.assert_series_equal(s, df[0].sort_values()) + + # see gh-6025: false positives + df = DataFrame({"column1": ["a", "a", "a"], "column2": [4, 8, 9]}) + str(df) + + df["column1"] = df["column1"] + "b" + str(df) + + df = df[df["column2"] != 8] + str(df) + + df["column1"] = df["column1"] + "c" + str(df) + + # from SO: + # https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc + df = DataFrame(np.arange(0, 9), columns=["count"]) + df["group"] = "b" + + with pytest.raises(com.SettingWithCopyError): + df.iloc[0:5]["group"] = "a" + + # Mixed type setting but same dtype & changing dtype + df = DataFrame( + dict( + A=date_range("20130101", periods=5), + B=np.random.randn(5), + C=np.arange(5, dtype="int64"), + D=list("abcde"), + ) + ) + + with pytest.raises(com.SettingWithCopyError): + df.loc[2]["D"] = "foo" + + with pytest.raises(com.SettingWithCopyError): + df.loc[2]["C"] = "foo" + + with pytest.raises(com.SettingWithCopyError): + df["C"][2] = "foo" + + def test_setting_with_copy_bug(self): + + # operating on a copy + df = DataFrame( + {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} + ) + mask = pd.isna(df.c) + + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + df[["c"]][mask] = df[["b"]][mask] + + # invalid warning as we are returning a new object + # GH 8730 + df1 = DataFrame({"x": Series(["a", "b", "c"]), "y": Series(["d", "e", "f"])}) + df2 = df1[["x"]] + + # this should not raise + df2["y"] = ["g", "h", "i"] + + def test_detect_chained_assignment_warnings(self): + with option_context("chained_assignment", "warn"): + df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) + + with tm.assert_produces_warning(com.SettingWithCopyWarning): + df.loc[0]["A"] = 111 + + def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self): + # xref gh-13017. + with option_context("chained_assignment", "warn"): + df = pd.DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"] + ) + + with tm.assert_produces_warning(com.SettingWithCopyWarning): + df.c.loc[df.c > 0] = None + + expected = pd.DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, -9]], columns=["a", "a", "c"] + ) + tm.assert_frame_equal(df, expected) + + def test_chained_getitem_with_lists(self): + + # GH6394 + # Regression in chained getitem indexing with embedded list-like from + # 0.12 + def check(result, expected): + tm.assert_numpy_array_equal(result, expected) + assert isinstance(result, np.ndarray) + + df = DataFrame({"A": 5 * [np.zeros(3)], "B": 5 * [np.ones(3)]}) + expected = df["A"].iloc[2] + result = df.loc[2, "A"] + check(result, expected) + result2 = df.iloc[2]["A"] + check(result2, expected) + result3 = df["A"].loc[2] + check(result3, expected) + result4 = df["A"].iloc[2] + check(result4, expected) + + def test_cache_updating(self): + # GH 4939, make sure to update the cache on setitem + + df = tm.makeDataFrame() + df["A"] # cache series + df.loc["Hello Friend"] = df.iloc[0] + assert "Hello Friend" in df["A"].index + assert "Hello Friend" in df["B"].index + + # 10264 + df = DataFrame( + np.zeros((5, 5), dtype="int64"), + columns=["a", "b", "c", "d", "e"], + index=range(5), + ) + df["f"] = 0 + df.f.values[3] = 1 + + # TODO(wesm): unused? + # y = df.iloc[np.arange(2, len(df))] + + df.f.values[3] = 2 + expected = DataFrame( + np.zeros((5, 6), dtype="int64"), + columns=["a", "b", "c", "d", "e", "f"], + index=range(5), + ) + expected.at[3, "f"] = 2 + tm.assert_frame_equal(df, expected) + expected = Series([0, 0, 0, 2, 0], name="f") + tm.assert_series_equal(df.f, expected) diff --git a/pandas/tests/indexing/test_check_indexer.py b/pandas/tests/indexing/test_check_indexer.py new file mode 100644 index 00000000..69d40652 --- /dev/null +++ b/pandas/tests/indexing/test_check_indexer.py @@ -0,0 +1,99 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.api.indexers import check_array_indexer + + +@pytest.mark.parametrize( + "indexer, expected", + [ + # integer + ([1, 2], np.array([1, 2], dtype=np.intp)), + (np.array([1, 2], dtype="int64"), np.array([1, 2], dtype=np.intp)), + (pd.array([1, 2], dtype="Int32"), np.array([1, 2], dtype=np.intp)), + (pd.Index([1, 2]), np.array([1, 2], dtype=np.intp)), + # boolean + ([True, False, True], np.array([True, False, True], dtype=np.bool_)), + (np.array([True, False, True]), np.array([True, False, True], dtype=np.bool_)), + ( + pd.array([True, False, True], dtype="boolean"), + np.array([True, False, True], dtype=np.bool_), + ), + # other + ([], np.array([], dtype=np.intp)), + ], +) +def test_valid_input(indexer, expected): + array = np.array([1, 2, 3]) + result = check_array_indexer(array, indexer) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer", [[True, False, None], pd.array([True, False, None], dtype="boolean")], +) +def test_boolean_na_returns_indexer(indexer): + # https://github.com/pandas-dev/pandas/issues/31503 + arr = np.array([1, 2, 3]) + + result = check_array_indexer(arr, indexer) + expected = np.array([True, False, False], dtype=bool) + + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "indexer", + [ + [True, False], + pd.array([True, False], dtype="boolean"), + np.array([True, False], dtype=np.bool_), + ], +) +def test_bool_raise_length(indexer): + array = np.array([1, 2, 3]) + + msg = "Boolean index has wrong length" + with pytest.raises(IndexError, match=msg): + check_array_indexer(array, indexer) + + +@pytest.mark.parametrize( + "indexer", [[0, 1, None], pd.array([0, 1, pd.NA], dtype="Int64")], +) +def test_int_raise_missing_values(indexer): + array = np.array([1, 2, 3]) + + msg = "Cannot index with an integer indexer containing NA values" + with pytest.raises(ValueError, match=msg): + check_array_indexer(array, indexer) + + +@pytest.mark.parametrize( + "indexer", + [ + [0.0, 1.0], + np.array([1.0, 2.0], dtype="float64"), + np.array([True, False], dtype=object), + pd.Index([True, False], dtype=object), + pd.array(["a", "b"], dtype="string"), + ], +) +def test_raise_invalid_array_dtypes(indexer): + array = np.array([1, 2, 3]) + + msg = "arrays used as indices must be of integer or boolean type" + with pytest.raises(IndexError, match=msg): + check_array_indexer(array, indexer) + + +@pytest.mark.parametrize( + "indexer", [None, Ellipsis, slice(0, 3), (None,)], +) +def test_pass_through_non_array_likes(indexer): + array = np.array([1, 2, 3]) + + result = check_array_indexer(array, indexer) + assert result == indexer diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py new file mode 100644 index 00000000..b904755b --- /dev/null +++ b/pandas/tests/indexing/test_coercion.py @@ -0,0 +1,1087 @@ +import itertools +from typing import Dict, List + +import numpy as np +import pytest + +import pandas.compat as compat + +import pandas as pd +import pandas._testing as tm + +############################################################### +# Index / Series common tests which may trigger dtype coercions +############################################################### + + +@pytest.fixture(autouse=True, scope="class") +def check_comprehensiveness(request): + # Iterate over combination of dtype, method and klass + # and ensure that each are contained within a collected test + cls = request.cls + combos = itertools.product(cls.klasses, cls.dtypes, [cls.method]) + + def has_test(combo): + klass, dtype, method = combo + cls_funcs = request.node.session.items + return any( + klass in x.name and dtype in x.name and method in x.name for x in cls_funcs + ) + + for combo in combos: + if not has_test(combo): + msg = "test method is not defined: {0}, {1}" + raise AssertionError(msg.format(cls.__name__, combo)) + + yield + + +class CoercionBase: + + klasses = ["index", "series"] + dtypes = [ + "object", + "int64", + "float64", + "complex128", + "bool", + "datetime64", + "datetime64tz", + "timedelta64", + "period", + ] + + @property + def method(self): + raise NotImplementedError(self) + + def _assert(self, left, right, dtype): + # explicitly check dtype to avoid any unexpected result + if isinstance(left, pd.Series): + tm.assert_series_equal(left, right) + elif isinstance(left, pd.Index): + tm.assert_index_equal(left, right) + else: + raise NotImplementedError + assert left.dtype == dtype + assert right.dtype == dtype + + +class TestSetitemCoercion(CoercionBase): + + method = "setitem" + + def _assert_setitem_series_conversion( + self, original_series, loc_value, expected_series, expected_dtype + ): + """ test series value's coercion triggered by assignment """ + temp = original_series.copy() + temp[1] = loc_value + tm.assert_series_equal(temp, expected_series) + # check dtype explicitly for sure + assert temp.dtype == expected_dtype + + # .loc works different rule, temporary disable + # temp = original_series.copy() + # temp.loc[1] = loc_value + # tm.assert_series_equal(temp, expected_series) + + @pytest.mark.parametrize( + "val,exp_dtype", + [(1, np.object), (1.1, np.object), (1 + 1j, np.object), (True, np.object)], + ) + def test_setitem_series_object(self, val, exp_dtype): + obj = pd.Series(list("abcd")) + assert obj.dtype == np.object + + exp = pd.Series(["a", val, "c", "d"]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, np.object)], + ) + def test_setitem_series_int64(self, val, exp_dtype): + obj = pd.Series([1, 2, 3, 4]) + assert obj.dtype == np.int64 + + if exp_dtype is np.float64: + exp = pd.Series([1, 1, 3, 4]) + self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64) + pytest.xfail("GH12747 The result must be float") + + exp = pd.Series([1, val, 3, 4]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", [(np.int32(1), np.int8), (np.int16(2 ** 9), np.int16)] + ) + def test_setitem_series_int8(self, val, exp_dtype): + obj = pd.Series([1, 2, 3, 4], dtype=np.int8) + assert obj.dtype == np.int8 + + if exp_dtype is np.int16: + exp = pd.Series([1, 0, 3, 4], dtype=np.int8) + self._assert_setitem_series_conversion(obj, val, exp, np.int8) + pytest.xfail("BUG: it must be Series([1, 1, 3, 4], dtype=np.int16") + + exp = pd.Series([1, val, 3, 4], dtype=np.int8) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [ + (1, np.float64), + (1.1, np.float64), + (1 + 1j, np.complex128), + (True, np.object), + ], + ) + def test_setitem_series_float64(self, val, exp_dtype): + obj = pd.Series([1.1, 2.2, 3.3, 4.4]) + assert obj.dtype == np.float64 + + exp = pd.Series([1.1, val, 3.3, 4.4]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [ + (1, np.complex128), + (1.1, np.complex128), + (1 + 1j, np.complex128), + (True, np.object), + ], + ) + def test_setitem_series_complex128(self, val, exp_dtype): + obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) + assert obj.dtype == np.complex128 + + exp = pd.Series([1 + 1j, val, 3 + 3j, 4 + 4j]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [ + (1, np.int64), + (3, np.int64), + (1.1, np.float64), + (1 + 1j, np.complex128), + (True, np.bool), + ], + ) + def test_setitem_series_bool(self, val, exp_dtype): + obj = pd.Series([True, False, True, False]) + assert obj.dtype == np.bool + + if exp_dtype is np.int64: + exp = pd.Series([True, True, True, False]) + self._assert_setitem_series_conversion(obj, val, exp, np.bool) + pytest.xfail("TODO_GH12747 The result must be int") + elif exp_dtype is np.float64: + exp = pd.Series([True, True, True, False]) + self._assert_setitem_series_conversion(obj, val, exp, np.bool) + pytest.xfail("TODO_GH12747 The result must be float") + elif exp_dtype is np.complex128: + exp = pd.Series([True, True, True, False]) + self._assert_setitem_series_conversion(obj, val, exp, np.bool) + pytest.xfail("TODO_GH12747 The result must be complex") + + exp = pd.Series([True, val, True, False]) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [ + (pd.Timestamp("2012-01-01"), "datetime64[ns]"), + (1, np.object), + ("x", np.object), + ], + ) + def test_setitem_series_datetime64(self, val, exp_dtype): + obj = pd.Series( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + assert obj.dtype == "datetime64[ns]" + + exp = pd.Series( + [ + pd.Timestamp("2011-01-01"), + val, + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [ + (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), + (pd.Timestamp("2012-01-01", tz="US/Pacific"), np.object), + (pd.Timestamp("2012-01-01"), np.object), + (1, np.object), + ], + ) + def test_setitem_series_datetime64tz(self, val, exp_dtype): + tz = "US/Eastern" + obj = pd.Series( + [ + pd.Timestamp("2011-01-01", tz=tz), + pd.Timestamp("2011-01-02", tz=tz), + pd.Timestamp("2011-01-03", tz=tz), + pd.Timestamp("2011-01-04", tz=tz), + ] + ) + assert obj.dtype == "datetime64[ns, US/Eastern]" + + exp = pd.Series( + [ + pd.Timestamp("2011-01-01", tz=tz), + val, + pd.Timestamp("2011-01-03", tz=tz), + pd.Timestamp("2011-01-04", tz=tz), + ] + ) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", + [(pd.Timedelta("12 day"), "timedelta64[ns]"), (1, np.object), ("x", np.object)], + ) + def test_setitem_series_timedelta64(self, val, exp_dtype): + obj = pd.Series( + [ + pd.Timedelta("1 day"), + pd.Timedelta("2 day"), + pd.Timedelta("3 day"), + pd.Timedelta("4 day"), + ] + ) + assert obj.dtype == "timedelta64[ns]" + + exp = pd.Series( + [pd.Timedelta("1 day"), val, pd.Timedelta("3 day"), pd.Timedelta("4 day")] + ) + self._assert_setitem_series_conversion(obj, val, exp, exp_dtype) + + def _assert_setitem_index_conversion( + self, original_series, loc_key, expected_index, expected_dtype + ): + """ test index's coercion triggered by assign key """ + temp = original_series.copy() + temp[loc_key] = 5 + exp = pd.Series([1, 2, 3, 4, 5], index=expected_index) + tm.assert_series_equal(temp, exp) + # check dtype explicitly for sure + assert temp.index.dtype == expected_dtype + + temp = original_series.copy() + temp.loc[loc_key] = 5 + exp = pd.Series([1, 2, 3, 4, 5], index=expected_index) + tm.assert_series_equal(temp, exp) + # check dtype explicitly for sure + assert temp.index.dtype == expected_dtype + + @pytest.mark.parametrize( + "val,exp_dtype", [("x", np.object), (5, IndexError), (1.1, np.object)] + ) + def test_setitem_index_object(self, val, exp_dtype): + obj = pd.Series([1, 2, 3, 4], index=list("abcd")) + assert obj.index.dtype == np.object + + if exp_dtype is IndexError: + temp = obj.copy() + with pytest.raises(exp_dtype): + temp[5] = 5 + else: + exp_index = pd.Index(list("abcd") + [val]) + self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", [(5, np.int64), (1.1, np.float64), ("x", np.object)] + ) + def test_setitem_index_int64(self, val, exp_dtype): + obj = pd.Series([1, 2, 3, 4]) + assert obj.index.dtype == np.int64 + + exp_index = pd.Index([0, 1, 2, 3, val]) + self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) + + @pytest.mark.parametrize( + "val,exp_dtype", [(5, IndexError), (5.1, np.float64), ("x", np.object)] + ) + def test_setitem_index_float64(self, val, exp_dtype): + obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1]) + assert obj.index.dtype == np.float64 + + if exp_dtype is IndexError: + # float + int -> int + temp = obj.copy() + with pytest.raises(exp_dtype): + temp[5] = 5 + pytest.xfail("TODO_GH12747 The result must be float") + + exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val]) + self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype) + + def test_setitem_series_period(self): + pass + + def test_setitem_index_complex128(self): + pass + + def test_setitem_index_bool(self): + pass + + def test_setitem_index_datetime64(self): + pass + + def test_setitem_index_datetime64tz(self): + pass + + def test_setitem_index_timedelta64(self): + pass + + def test_setitem_index_period(self): + pass + + +class TestInsertIndexCoercion(CoercionBase): + + klasses = ["index"] + method = "insert" + + def _assert_insert_conversion(self, original, value, expected, expected_dtype): + """ test coercion triggered by insert """ + target = original.copy() + res = target.insert(1, value) + tm.assert_index_equal(res, expected) + assert res.dtype == expected_dtype + + @pytest.mark.parametrize( + "insert, coerced_val, coerced_dtype", + [ + (1, 1, np.object), + (1.1, 1.1, np.object), + (False, False, np.object), + ("x", "x", np.object), + ], + ) + def test_insert_index_object(self, insert, coerced_val, coerced_dtype): + obj = pd.Index(list("abcd")) + assert obj.dtype == np.object + + exp = pd.Index(["a", coerced_val, "b", "c", "d"]) + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + @pytest.mark.parametrize( + "insert, coerced_val, coerced_dtype", + [ + (1, 1, np.int64), + (1.1, 1.1, np.float64), + (False, 0, np.int64), + ("x", "x", np.object), + ], + ) + def test_insert_index_int64(self, insert, coerced_val, coerced_dtype): + obj = pd.Int64Index([1, 2, 3, 4]) + assert obj.dtype == np.int64 + + exp = pd.Index([1, coerced_val, 2, 3, 4]) + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + @pytest.mark.parametrize( + "insert, coerced_val, coerced_dtype", + [ + (1, 1.0, np.float64), + (1.1, 1.1, np.float64), + (False, 0.0, np.float64), + ("x", "x", np.object), + ], + ) + def test_insert_index_float64(self, insert, coerced_val, coerced_dtype): + obj = pd.Float64Index([1.0, 2.0, 3.0, 4.0]) + assert obj.dtype == np.float64 + + exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0]) + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [ + (pd.Timestamp("2012-01-01"), "datetime64[ns]"), + (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), + ], + ids=["datetime64", "datetime64tz"], + ) + def test_insert_index_datetimes(self, fill_val, exp_dtype): + obj = pd.DatetimeIndex( + ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], tz=fill_val.tz + ) + assert obj.dtype == exp_dtype + + exp = pd.DatetimeIndex( + ["2011-01-01", fill_val.date(), "2011-01-02", "2011-01-03", "2011-01-04"], + tz=fill_val.tz, + ) + self._assert_insert_conversion(obj, fill_val, exp, exp_dtype) + + if fill_val.tz: + msg = "Cannot compare tz-naive and tz-aware" + with pytest.raises(TypeError, match=msg): + obj.insert(1, pd.Timestamp("2012-01-01")) + + msg = "Timezones don't match" + with pytest.raises(ValueError, match=msg): + obj.insert(1, pd.Timestamp("2012-01-01", tz="Asia/Tokyo")) + + else: + msg = "Cannot compare tz-naive and tz-aware" + with pytest.raises(TypeError, match=msg): + obj.insert(1, pd.Timestamp("2012-01-01", tz="Asia/Tokyo")) + + msg = "cannot insert DatetimeIndex with incompatible label" + with pytest.raises(TypeError, match=msg): + obj.insert(1, 1) + + pytest.xfail("ToDo: must coerce to object") + + def test_insert_index_timedelta64(self): + obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"]) + assert obj.dtype == "timedelta64[ns]" + + # timedelta64 + timedelta64 => timedelta64 + exp = pd.TimedeltaIndex(["1 day", "10 day", "2 day", "3 day", "4 day"]) + self._assert_insert_conversion( + obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]" + ) + + # ToDo: must coerce to object + msg = "cannot insert TimedeltaIndex with incompatible label" + with pytest.raises(TypeError, match=msg): + obj.insert(1, pd.Timestamp("2012-01-01")) + + # ToDo: must coerce to object + msg = "cannot insert TimedeltaIndex with incompatible label" + with pytest.raises(TypeError, match=msg): + obj.insert(1, 1) + + @pytest.mark.parametrize( + "insert, coerced_val, coerced_dtype", + [ + (pd.Period("2012-01", freq="M"), "2012-01", "period[M]"), + (pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), np.object), + (1, 1, np.object), + ("x", "x", np.object), + ], + ) + def test_insert_index_period(self, insert, coerced_val, coerced_dtype): + obj = pd.PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq="M") + assert obj.dtype == "period[M]" + + data = [ + pd.Period("2011-01", freq="M"), + coerced_val, + pd.Period("2011-02", freq="M"), + pd.Period("2011-03", freq="M"), + pd.Period("2011-04", freq="M"), + ] + if isinstance(insert, pd.Period): + exp = pd.PeriodIndex(data, freq="M") + self._assert_insert_conversion(obj, insert, exp, coerced_dtype) + else: + msg = r"Unexpected keyword arguments {'freq'}" + with pytest.raises(TypeError, match=msg): + pd.Index(data, freq="M") + + def test_insert_index_complex128(self): + pass + + def test_insert_index_bool(self): + pass + + +class TestWhereCoercion(CoercionBase): + + method = "where" + + def _assert_where_conversion( + self, original, cond, values, expected, expected_dtype + ): + """ test coercion triggered by where """ + target = original.copy() + res = target.where(cond, values) + self._assert(res, expected, expected_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [(1, np.object), (1.1, np.object), (1 + 1j, np.object), (True, np.object)], + ) + def test_where_object(self, index_or_series, fill_val, exp_dtype): + klass = index_or_series + obj = klass(list("abcd")) + assert obj.dtype == np.object + cond = klass([True, False, True, False]) + + if fill_val is True and klass is pd.Series: + ret_val = 1 + else: + ret_val = fill_val + + exp = klass(["a", ret_val, "c", ret_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + if fill_val is True: + values = klass([True, False, True, True]) + else: + values = klass(fill_val * x for x in [5, 6, 7, 8]) + + exp = klass(["a", values[1], "c", values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, np.object)], + ) + def test_where_int64(self, index_or_series, fill_val, exp_dtype): + klass = index_or_series + if klass is pd.Index and exp_dtype is np.complex128: + pytest.skip("Complex Index not supported") + obj = klass([1, 2, 3, 4]) + assert obj.dtype == np.int64 + cond = klass([True, False, True, False]) + + exp = klass([1, fill_val, 3, fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + if fill_val is True: + values = klass([True, False, True, True]) + else: + values = klass(x * fill_val for x in [5, 6, 7, 8]) + exp = klass([1, values[1], 3, values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize( + "fill_val, exp_dtype", + [ + (1, np.float64), + (1.1, np.float64), + (1 + 1j, np.complex128), + (True, np.object), + ], + ) + def test_where_float64(self, index_or_series, fill_val, exp_dtype): + klass = index_or_series + if klass is pd.Index and exp_dtype is np.complex128: + pytest.skip("Complex Index not supported") + obj = klass([1.1, 2.2, 3.3, 4.4]) + assert obj.dtype == np.float64 + cond = klass([True, False, True, False]) + + exp = klass([1.1, fill_val, 3.3, fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + if fill_val is True: + values = klass([True, False, True, True]) + else: + values = klass(x * fill_val for x in [5, 6, 7, 8]) + exp = klass([1.1, values[1], 3.3, values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [ + (1, np.complex128), + (1.1, np.complex128), + (1 + 1j, np.complex128), + (True, np.object), + ], + ) + def test_where_series_complex128(self, fill_val, exp_dtype): + obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j]) + assert obj.dtype == np.complex128 + cond = pd.Series([True, False, True, False]) + + exp = pd.Series([1 + 1j, fill_val, 3 + 3j, fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + if fill_val is True: + values = pd.Series([True, False, True, True]) + else: + values = pd.Series(x * fill_val for x in [5, 6, 7, 8]) + exp = pd.Series([1 + 1j, values[1], 3 + 3j, values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [(1, np.object), (1.1, np.object), (1 + 1j, np.object), (True, np.bool)], + ) + def test_where_series_bool(self, fill_val, exp_dtype): + + obj = pd.Series([True, False, True, False]) + assert obj.dtype == np.bool + cond = pd.Series([True, False, True, False]) + + exp = pd.Series([True, fill_val, True, fill_val]) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + if fill_val is True: + values = pd.Series([True, False, True, True]) + else: + values = pd.Series(x * fill_val for x in [5, 6, 7, 8]) + exp = pd.Series([True, values[1], True, values[3]]) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.parametrize( + "fill_val,exp_dtype", + [ + (pd.Timestamp("2012-01-01"), "datetime64[ns]"), + (pd.Timestamp("2012-01-01", tz="US/Eastern"), np.object), + ], + ids=["datetime64", "datetime64tz"], + ) + def test_where_series_datetime64(self, fill_val, exp_dtype): + obj = pd.Series( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + assert obj.dtype == "datetime64[ns]" + cond = pd.Series([True, False, True, False]) + + exp = pd.Series( + [pd.Timestamp("2011-01-01"), fill_val, pd.Timestamp("2011-01-03"), fill_val] + ) + self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype) + + values = pd.Series(pd.date_range(fill_val, periods=4)) + if fill_val.tz: + exp = pd.Series( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2012-01-02 00:00", tz="US/Eastern"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2012-01-04 00:00", tz="US/Eastern"), + ] + ) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + exp = pd.Series( + [ + pd.Timestamp("2011-01-01"), + values[1], + pd.Timestamp("2011-01-03"), + values[3], + ] + ) + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + def test_where_index_datetime(self): + fill_val = pd.Timestamp("2012-01-01") + exp_dtype = "datetime64[ns]" + obj = pd.Index( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + assert obj.dtype == "datetime64[ns]" + cond = pd.Index([True, False, True, False]) + + msg = "Index\\(\\.\\.\\.\\) must be called with a collection of some kind" + with pytest.raises(TypeError, match=msg): + obj.where(cond, fill_val) + + values = pd.Index(pd.date_range(fill_val, periods=4)) + exp = pd.Index( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2012-01-02"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2012-01-04"), + ] + ) + + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + @pytest.mark.xfail(reason="GH 22839: do not ignore timezone, must be object") + def test_where_index_datetimetz(self): + fill_val = pd.Timestamp("2012-01-01", tz="US/Eastern") + exp_dtype = np.object + obj = pd.Index( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + assert obj.dtype == "datetime64[ns]" + cond = pd.Index([True, False, True, False]) + + msg = "Index\\(\\.\\.\\.\\) must be called with a collection of some kind" + with pytest.raises(TypeError, match=msg): + obj.where(cond, fill_val) + + values = pd.Index(pd.date_range(fill_val, periods=4)) + exp = pd.Index( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2012-01-02", tz="US/Eastern"), + pd.Timestamp("2011-01-03"), + pd.Timestamp("2012-01-04", tz="US/Eastern"), + ], + dtype=exp_dtype, + ) + + self._assert_where_conversion(obj, cond, values, exp, exp_dtype) + + def test_where_index_complex128(self): + pass + + def test_where_index_bool(self): + pass + + def test_where_series_datetime64tz(self): + pass + + def test_where_series_timedelta64(self): + pass + + def test_where_series_period(self): + pass + + def test_where_index_datetime64tz(self): + pass + + def test_where_index_timedelta64(self): + pass + + def test_where_index_period(self): + pass + + +class TestFillnaSeriesCoercion(CoercionBase): + + # not indexing, but place here for consistency + + method = "fillna" + + def test_has_comprehensive_tests(self): + pass + + def _assert_fillna_conversion(self, original, value, expected, expected_dtype): + """ test coercion triggered by fillna """ + target = original.copy() + res = target.fillna(value) + self._assert(res, expected, expected_dtype) + + @pytest.mark.parametrize( + "fill_val, fill_dtype", + [(1, np.object), (1.1, np.object), (1 + 1j, np.object), (True, np.object)], + ) + def test_fillna_object(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + obj = klass(["a", np.nan, "c", "d"]) + assert obj.dtype == np.object + + exp = klass(["a", fill_val, "c", "d"]) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val,fill_dtype", + [ + (1, np.float64), + (1.1, np.float64), + (1 + 1j, np.complex128), + (True, np.object), + ], + ) + def test_fillna_float64(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + obj = klass([1.1, np.nan, 3.3, 4.4]) + assert obj.dtype == np.float64 + + exp = klass([1.1, fill_val, 3.3, 4.4]) + # float + complex -> we don't support a complex Index + # complex for Series, + # object for Index + if fill_dtype == np.complex128 and klass == pd.Index: + fill_dtype = np.object + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val,fill_dtype", + [ + (1, np.complex128), + (1.1, np.complex128), + (1 + 1j, np.complex128), + (True, np.object), + ], + ) + def test_fillna_series_complex128(self, fill_val, fill_dtype): + obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j]) + assert obj.dtype == np.complex128 + + exp = pd.Series([1 + 1j, fill_val, 3 + 3j, 4 + 4j]) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val,fill_dtype", + [ + (pd.Timestamp("2012-01-01"), "datetime64[ns]"), + (pd.Timestamp("2012-01-01", tz="US/Eastern"), np.object), + (1, np.object), + ("x", np.object), + ], + ids=["datetime64", "datetime64tz", "object", "object"], + ) + def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + obj = klass( + [ + pd.Timestamp("2011-01-01"), + pd.NaT, + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + assert obj.dtype == "datetime64[ns]" + + exp = klass( + [ + pd.Timestamp("2011-01-01"), + fill_val, + pd.Timestamp("2011-01-03"), + pd.Timestamp("2011-01-04"), + ] + ) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + @pytest.mark.parametrize( + "fill_val,fill_dtype", + [ + (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), + (pd.Timestamp("2012-01-01"), np.object), + (pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), np.object), + (1, np.object), + ("x", np.object), + ], + ) + def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype): + klass = index_or_series + tz = "US/Eastern" + + obj = klass( + [ + pd.Timestamp("2011-01-01", tz=tz), + pd.NaT, + pd.Timestamp("2011-01-03", tz=tz), + pd.Timestamp("2011-01-04", tz=tz), + ] + ) + assert obj.dtype == "datetime64[ns, US/Eastern]" + + exp = klass( + [ + pd.Timestamp("2011-01-01", tz=tz), + fill_val, + pd.Timestamp("2011-01-03", tz=tz), + pd.Timestamp("2011-01-04", tz=tz), + ] + ) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + + def test_fillna_series_int64(self): + pass + + def test_fillna_index_int64(self): + pass + + def test_fillna_series_bool(self): + pass + + def test_fillna_index_bool(self): + pass + + def test_fillna_series_timedelta64(self): + pass + + def test_fillna_series_period(self): + pass + + def test_fillna_index_timedelta64(self): + pass + + def test_fillna_index_period(self): + pass + + +class TestReplaceSeriesCoercion(CoercionBase): + + klasses = ["series"] + method = "replace" + + rep: Dict[str, List] = {} + rep["object"] = ["a", "b"] + rep["int64"] = [4, 5] + rep["float64"] = [1.1, 2.2] + rep["complex128"] = [1 + 1j, 2 + 2j] + rep["bool"] = [True, False] + rep["datetime64[ns]"] = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-03")] + + for tz in ["UTC", "US/Eastern"]: + # to test tz => different tz replacement + key = "datetime64[ns, {0}]".format(tz) + rep[key] = [ + pd.Timestamp("2011-01-01", tz=tz), + pd.Timestamp("2011-01-03", tz=tz), + ] + + rep["timedelta64[ns]"] = [pd.Timedelta("1 day"), pd.Timedelta("2 day")] + + @pytest.mark.parametrize("how", ["dict", "series"]) + @pytest.mark.parametrize( + "to_key", + [ + "object", + "int64", + "float64", + "complex128", + "bool", + "datetime64[ns]", + "datetime64[ns, UTC]", + "datetime64[ns, US/Eastern]", + "timedelta64[ns]", + ], + ids=[ + "object", + "int64", + "float64", + "complex128", + "bool", + "datetime64", + "datetime64tz", + "datetime64tz", + "timedelta64", + ], + ) + @pytest.mark.parametrize( + "from_key", + [ + "object", + "int64", + "float64", + "complex128", + "bool", + "datetime64[ns]", + "datetime64[ns, UTC]", + "datetime64[ns, US/Eastern]", + "timedelta64[ns]", + ], + ) + def test_replace_series(self, how, to_key, from_key): + index = pd.Index([3, 4], name="xxx") + obj = pd.Series(self.rep[from_key], index=index, name="yyy") + assert obj.dtype == from_key + + if from_key.startswith("datetime") and to_key.startswith("datetime"): + # tested below + return + elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]: + # tested below + return + + if how == "dict": + replacer = dict(zip(self.rep[from_key], self.rep[to_key])) + elif how == "series": + replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) + else: + raise ValueError + + result = obj.replace(replacer) + + if (from_key == "float64" and to_key in ("int64")) or ( + from_key == "complex128" and to_key in ("int64", "float64") + ): + + if compat.is_platform_32bit() or compat.is_platform_windows(): + pytest.skip( + "32-bit platform buggy: {0} -> {1}".format(from_key, to_key) + ) + + # Expected: do not downcast by replacement + exp = pd.Series(self.rep[to_key], index=index, name="yyy", dtype=from_key) + + else: + exp = pd.Series(self.rep[to_key], index=index, name="yyy") + assert exp.dtype == to_key + + tm.assert_series_equal(result, exp) + + @pytest.mark.parametrize("how", ["dict", "series"]) + @pytest.mark.parametrize( + "to_key", + ["timedelta64[ns]", "bool", "object", "complex128", "float64", "int64"], + ) + @pytest.mark.parametrize( + "from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"] + ) + def test_replace_series_datetime_tz(self, how, to_key, from_key): + index = pd.Index([3, 4], name="xyz") + obj = pd.Series(self.rep[from_key], index=index, name="yyy") + assert obj.dtype == from_key + + if how == "dict": + replacer = dict(zip(self.rep[from_key], self.rep[to_key])) + elif how == "series": + replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) + else: + raise ValueError + + result = obj.replace(replacer) + exp = pd.Series(self.rep[to_key], index=index, name="yyy") + assert exp.dtype == to_key + + tm.assert_series_equal(result, exp) + + @pytest.mark.parametrize("how", ["dict", "series"]) + @pytest.mark.parametrize( + "to_key", + ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], + ) + @pytest.mark.parametrize( + "from_key", + ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], + ) + def test_replace_series_datetime_datetime(self, how, to_key, from_key): + index = pd.Index([3, 4], name="xyz") + obj = pd.Series(self.rep[from_key], index=index, name="yyy") + assert obj.dtype == from_key + + if how == "dict": + replacer = dict(zip(self.rep[from_key], self.rep[to_key])) + elif how == "series": + replacer = pd.Series(self.rep[to_key], index=self.rep[from_key]) + else: + raise ValueError + + result = obj.replace(replacer) + exp = pd.Series(self.rep[to_key], index=index, name="yyy") + assert exp.dtype == to_key + + tm.assert_series_equal(result, exp) + + def test_replace_series_period(self): + pass diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py new file mode 100644 index 00000000..c8c2d1ed --- /dev/null +++ b/pandas/tests/indexing/test_datetime.py @@ -0,0 +1,372 @@ +from datetime import date, datetime, timedelta + +from dateutil import tz +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, Series, Timestamp, date_range +import pandas._testing as tm + + +class TestDatetimeIndex: + def test_setitem_with_datetime_tz(self): + # 16889 + # support .loc with alignment and tz-aware DatetimeIndex + mask = np.array([True, False, True, False]) + + idx = date_range("20010101", periods=4, tz="UTC") + df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") + + result = df.copy() + result.loc[mask, :] = df.loc[mask, :] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[mask] = df.loc[mask] + tm.assert_frame_equal(result, df) + + idx = date_range("20010101", periods=4) + df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64") + + result = df.copy() + result.loc[mask, :] = df.loc[mask, :] + tm.assert_frame_equal(result, df) + + result = df.copy() + result.loc[mask] = df.loc[mask] + tm.assert_frame_equal(result, df) + + def test_indexing_with_datetime_tz(self): + + # GH#8260 + # support datetime64 with tz + + idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo") + dr = date_range("20130110", periods=3) + df = DataFrame({"A": idx, "B": dr}) + df["C"] = idx + df.iloc[1, 1] = pd.NaT + df.iloc[1, 2] = pd.NaT + + # indexing + result = df.iloc[1] + expected = Series( + [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], + index=list("ABC"), + dtype="object", + name=1, + ) + tm.assert_series_equal(result, expected) + result = df.loc[1] + expected = Series( + [Timestamp("2013-01-02 00:00:00-0500", tz="US/Eastern"), pd.NaT, pd.NaT], + index=list("ABC"), + dtype="object", + name=1, + ) + tm.assert_series_equal(result, expected) + + # indexing - fast_xs + df = DataFrame({"a": date_range("2014-01-01", periods=10, tz="UTC")}) + result = df.iloc[5] + expected = Series( + [Timestamp("2014-01-06 00:00:00+0000", tz="UTC")], index=["a"], name=5 + ) + tm.assert_series_equal(result, expected) + + result = df.loc[5] + tm.assert_series_equal(result, expected) + + # indexing - boolean + result = df[df.a > df.a[3]] + expected = df.iloc[4:] + tm.assert_frame_equal(result, expected) + + # indexing - setting an element + df = DataFrame( + data=pd.to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]), + columns=["time"], + ) + df["new_col"] = ["new", "old"] + df.time = df.set_index("time").index.tz_localize("UTC") + v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific") + + # trying to set a single element on a part of a different timezone + # this converts to object + df2 = df.copy() + df2.loc[df2.new_col == "new", "time"] = v + + expected = Series([v[0], df.loc[1, "time"]], name="time") + tm.assert_series_equal(df2.time, expected) + + v = df.loc[df.new_col == "new", "time"] + pd.Timedelta("1s") + df.loc[df.new_col == "new", "time"] = v + tm.assert_series_equal(df.loc[df.new_col == "new", "time"], v) + + def test_consistency_with_tz_aware_scalar(self): + # xef gh-12938 + # various ways of indexing the same tz-aware scalar + df = Series([Timestamp("2016-03-30 14:35:25", tz="Europe/Brussels")]).to_frame() + + df = pd.concat([df, df]).reset_index(drop=True) + expected = Timestamp("2016-03-30 14:35:25+0200", tz="Europe/Brussels") + + result = df[0][0] + assert result == expected + + result = df.iloc[0, 0] + assert result == expected + + result = df.loc[0, 0] + assert result == expected + + result = df.iat[0, 0] + assert result == expected + + result = df.at[0, 0] + assert result == expected + + result = df[0].loc[0] + assert result == expected + + result = df[0].at[0] + assert result == expected + + def test_indexing_with_datetimeindex_tz(self): + + # GH 12050 + # indexing on a series with a datetimeindex with tz + index = date_range("2015-01-01", periods=2, tz="utc") + + ser = Series(range(2), index=index, dtype="int64") + + # list-like indexing + + for sel in (index, list(index)): + # getitem + tm.assert_series_equal(ser[sel], ser) + + # setitem + result = ser.copy() + result[sel] = 1 + expected = Series(1, index=index) + tm.assert_series_equal(result, expected) + + # .loc getitem + tm.assert_series_equal(ser.loc[sel], ser) + + # .loc setitem + result = ser.copy() + result.loc[sel] = 1 + expected = Series(1, index=index) + tm.assert_series_equal(result, expected) + + # single element indexing + + # getitem + assert ser[index[1]] == 1 + + # setitem + result = ser.copy() + result[index[1]] = 5 + expected = Series([0, 5], index=index) + tm.assert_series_equal(result, expected) + + # .loc getitem + assert ser.loc[index[1]] == 1 + + # .loc setitem + result = ser.copy() + result.loc[index[1]] = 5 + expected = Series([0, 5], index=index) + tm.assert_series_equal(result, expected) + + def test_partial_setting_with_datetimelike_dtype(self): + + # GH9478 + # a datetimeindex alignment issue with partial setting + df = DataFrame( + np.arange(6.0).reshape(3, 2), + columns=list("AB"), + index=date_range("1/1/2000", periods=3, freq="1H"), + ) + expected = df.copy() + expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT] + + mask = df.A < 1 + df.loc[mask, "C"] = df.loc[mask].index + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_datetime(self): + + # GH 9516 + dt1 = Timestamp("20130101 09:00:00") + dt2 = Timestamp("20130101 10:00:00") + + for conv in [ + lambda x: x, + lambda x: x.to_datetime64(), + lambda x: x.to_pydatetime(), + lambda x: np.datetime64(x), + ]: + + df = DataFrame() + df.loc[conv(dt1), "one"] = 100 + df.loc[conv(dt2), "one"] = 200 + + expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2]) + tm.assert_frame_equal(df, expected) + + def test_series_partial_set_datetime(self): + # GH 11497 + + idx = date_range("2011-01-01", "2011-01-02", freq="D", name="idx") + ser = Series([0.1, 0.2], index=idx, name="s") + + result = ser.loc[[Timestamp("2011-01-01"), Timestamp("2011-01-02")]] + exp = Series([0.1, 0.2], index=idx, name="s") + tm.assert_series_equal(result, exp, check_index_type=True) + + keys = [ + Timestamp("2011-01-02"), + Timestamp("2011-01-02"), + Timestamp("2011-01-01"), + ] + exp = Series( + [0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name="idx"), name="s" + ) + tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + + keys = [ + Timestamp("2011-01-03"), + Timestamp("2011-01-02"), + Timestamp("2011-01-03"), + ] + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[keys] + + def test_series_partial_set_period(self): + # GH 11497 + + idx = pd.period_range("2011-01-01", "2011-01-02", freq="D", name="idx") + ser = Series([0.1, 0.2], index=idx, name="s") + + result = ser.loc[ + [pd.Period("2011-01-01", freq="D"), pd.Period("2011-01-02", freq="D")] + ] + exp = Series([0.1, 0.2], index=idx, name="s") + tm.assert_series_equal(result, exp, check_index_type=True) + + keys = [ + pd.Period("2011-01-02", freq="D"), + pd.Period("2011-01-02", freq="D"), + pd.Period("2011-01-01", freq="D"), + ] + exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name="idx"), name="s") + tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + + keys = [ + pd.Period("2011-01-03", freq="D"), + pd.Period("2011-01-02", freq="D"), + pd.Period("2011-01-03", freq="D"), + ] + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[keys] + + def test_nanosecond_getitem_setitem_with_tz(self): + # GH 11679 + data = ["2016-06-28 08:30:00.123456789"] + index = pd.DatetimeIndex(data, dtype="datetime64[ns, America/Chicago]") + df = DataFrame({"a": [10]}, index=index) + result = df.loc[df.index[0]] + expected = Series(10, index=["a"], name=df.index[0]) + tm.assert_series_equal(result, expected) + + result = df.copy() + result.loc[df.index[0], "a"] = -1 + expected = DataFrame(-1, index=index, columns=["a"]) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_across_dst(self): + # GH 21846 + idx = pd.date_range( + "2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min" + ) + series2 = pd.Series([0, 1, 2, 3, 4], index=idx) + + t_1 = pd.Timestamp( + "2017-10-29 02:30:00+02:00", tz="Europe/Berlin", freq="30min" + ) + t_2 = pd.Timestamp( + "2017-10-29 02:00:00+01:00", tz="Europe/Berlin", freq="30min" + ) + result = series2.loc[t_1:t_2] + expected = pd.Series([2, 3], index=idx[2:4]) + tm.assert_series_equal(result, expected) + + result = series2[t_1] + expected = 2 + assert result == expected + + def test_loc_incremental_setitem_with_dst(self): + # GH 20724 + base = datetime(2015, 11, 1, tzinfo=tz.gettz("US/Pacific")) + idxs = [base + timedelta(seconds=i * 900) for i in range(16)] + result = pd.Series([0], index=[idxs[0]]) + for ts in idxs: + result.loc[ts] = 1 + expected = pd.Series(1, index=idxs) + tm.assert_series_equal(result, expected) + + def test_loc_setitem_with_existing_dst(self): + # GH 18308 + start = pd.Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid") + end = pd.Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid") + ts = pd.Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid") + idx = pd.date_range(start, end, closed="left", freq="H") + result = pd.DataFrame(index=idx, columns=["value"]) + result.loc[ts, "value"] = 12 + expected = pd.DataFrame( + [np.nan] * len(idx) + [12], + index=idx.append(pd.DatetimeIndex([ts])), + columns=["value"], + dtype=object, + ) + tm.assert_frame_equal(result, expected) + + def test_loc_str_slicing(self): + ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") + ser = ix.to_series() + result = ser.loc[:"2017-12"] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_label_slicing(self): + ix = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") + ser = ix.to_series() + result = ser.loc[: ix[-2]] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "slice_, positions", + [ + [slice(date(2018, 1, 1), None), [0, 1, 2]], + [slice(date(2019, 1, 2), None), [2]], + [slice(date(2020, 1, 1), None), []], + [slice(None, date(2020, 1, 1)), [0, 1, 2]], + [slice(None, date(2019, 1, 1)), [0]], + ], + ) + def test_getitem_slice_date(self, slice_, positions): + # https://github.com/pandas-dev/pandas/issues/31501 + s = pd.Series( + [0, 1, 2], + pd.DatetimeIndex(["2019-01-01", "2019-01-01T06:00:00", "2019-01-02"]), + ) + result = s[slice_] + expected = s.take(positions) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py new file mode 100644 index 00000000..2cc82325 --- /dev/null +++ b/pandas/tests/indexing/test_floats.py @@ -0,0 +1,925 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Float64Index, Index, Int64Index, RangeIndex, Series +import pandas._testing as tm + + +class TestFloatIndexers: + def check(self, result, original, indexer, getitem): + """ + comparator for results + we need to take care if we are indexing on a + Series or a frame + """ + if isinstance(original, Series): + expected = original.iloc[indexer] + else: + if getitem: + expected = original.iloc[:, indexer] + else: + expected = original.iloc[indexer] + + tm.assert_almost_equal(result, expected) + + def test_scalar_error(self): + + # GH 4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + # this duplicates the code below + # but is specifically testing for the error + # message + + for index in [ + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeCategoricalIndex, + tm.makeDateIndex, + tm.makeTimedeltaIndex, + tm.makePeriodIndex, + tm.makeIntIndex, + tm.makeRangeIndex, + ]: + + i = index(5) + + s = Series(np.arange(len(i)), index=i) + + msg = "Cannot index by location index" + with pytest.raises(TypeError, match=msg): + s.iloc[3.0] + + msg = ( + "cannot do positional indexing on {klass} with these " + r"indexers \[3\.0\] of {kind}".format(klass=type(i), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s.iloc[3.0] = 0 + + def test_scalar_non_numeric(self): + + # GH 4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + + for index in [ + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeCategoricalIndex, + tm.makeDateIndex, + tm.makeTimedeltaIndex, + tm.makePeriodIndex, + ]: + + i = index(5) + + for s in [ + Series(np.arange(len(i)), index=i), + DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i), + ]: + + # getting + for idxr, getitem in [(lambda x: x.iloc, False), (lambda x: x, True)]: + + # gettitem on a DataFrame is a KeyError as it is indexing + # via labels on the columns + if getitem and isinstance(s, DataFrame): + error = KeyError + msg = r"^3(\.0)?$" + else: + error = TypeError + msg = ( + r"cannot do (label|index|positional) indexing " + r"on {klass} with these indexers \[3\.0\] of " + r"{kind}|" + "Cannot index by location index with a " + "non-integer key".format(klass=type(i), kind=str(float)) + ) + with pytest.raises(error, match=msg): + idxr(s)[3.0] + + # label based can be a TypeError or KeyError + if s.index.inferred_type in { + "categorical", + "string", + "unicode", + "mixed", + }: + error = KeyError + msg = r"^3$" + else: + error = TypeError + msg = ( + r"cannot do (label|index) indexing " + r"on {klass} with these indexers \[3\.0\] of " + r"{kind}".format(klass=type(i), kind=str(float)) + ) + with pytest.raises(error, match=msg): + s.loc[3.0] + + # contains + assert 3.0 not in s + + # setting with a float fails with iloc + msg = ( + r"cannot do (label|index|positional) indexing" + r" on {klass} with these indexers \[3\.0\] of" + r" {kind}".format(klass=type(i), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s.iloc[3.0] = 0 + + # setting with an indexer + if s.index.inferred_type in ["categorical"]: + # Value or Type Error + pass + elif s.index.inferred_type in ["datetime64", "timedelta64", "period"]: + + # these should prob work + # and are inconsistent between series/dataframe ATM + # for idxr in [lambda x: x]: + # s2 = s.copy() + # + # with pytest.raises(TypeError): + # idxr(s2)[3.0] = 0 + pass + + else: + + s2 = s.copy() + s2.loc[3.0] = 10 + assert s2.index.is_object() + + for idxr in [lambda x: x]: + s2 = s.copy() + idxr(s2)[3.0] = 0 + assert s2.index.is_object() + + # fallsback to position selection, series only + s = Series(np.arange(len(i)), index=i) + s[3] + msg = ( + r"cannot do (label|index) indexing" + r" on {klass} with these indexers \[3\.0\] of" + r" {kind}".format(klass=type(i), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s[3.0] + + def test_scalar_with_mixed(self): + + s2 = Series([1, 2, 3], index=["a", "b", "c"]) + s3 = Series([1, 2, 3], index=["a", "b", 1.5]) + + # lookup in a pure stringstr + # with an invalid indexer + for idxr in [lambda x: x, lambda x: x.iloc]: + + msg = ( + r"cannot do label indexing" + r" on {klass} with these indexers \[1\.0\] of" + r" {kind}|" + "Cannot index by location index with a non-integer key".format( + klass=str(Index), kind=str(float) + ) + ) + with pytest.raises(TypeError, match=msg): + idxr(s2)[1.0] + + with pytest.raises(KeyError, match=r"^1$"): + s2.loc[1.0] + + result = s2.loc["b"] + expected = 2 + assert result == expected + + # mixed index so we have label + # indexing + for idxr in [lambda x: x]: + + msg = ( + r"cannot do label indexing" + r" on {klass} with these indexers \[1\.0\] of" + r" {kind}".format(klass=str(Index), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + idxr(s3)[1.0] + + result = idxr(s3)[1] + expected = 2 + assert result == expected + + msg = "Cannot index by location index with a non-integer key" + with pytest.raises(TypeError, match=msg): + s3.iloc[1.0] + with pytest.raises(KeyError, match=r"^1$"): + s3.loc[1.0] + + result = s3.loc[1.5] + expected = 3 + assert result == expected + + def test_scalar_integer(self): + + # test how scalar float indexers work on int indexes + + # integer index + for i in [Int64Index(range(5)), RangeIndex(5)]: + + for s in [ + Series(np.arange(len(i))), + DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i), + ]: + + # coerce to equal int + for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]: + + result = idxr(s)[3.0] + self.check(result, s, 3, getitem) + + # coerce to equal int + for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]: + + if isinstance(s, Series): + + def compare(x, y): + assert x == y + + expected = 100 + else: + compare = tm.assert_series_equal + if getitem: + expected = Series(100, index=range(len(s)), name=3) + else: + expected = Series(100.0, index=range(len(s)), name=3) + + s2 = s.copy() + idxr(s2)[3.0] = 100 + + result = idxr(s2)[3.0] + compare(result, expected) + + result = idxr(s2)[3] + compare(result, expected) + + # contains + # coerce to equal int + assert 3.0 in s + + def test_scalar_float(self): + + # scalar float indexers work on a float index + index = Index(np.arange(5.0)) + for s in [ + Series(np.arange(len(index)), index=index), + DataFrame( + np.random.randn(len(index), len(index)), index=index, columns=index + ), + ]: + + # assert all operations except for iloc are ok + indexer = index[3] + for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]: + + # getting + result = idxr(s)[indexer] + self.check(result, s, 3, getitem) + + # setting + s2 = s.copy() + + result = idxr(s2)[indexer] + self.check(result, s, 3, getitem) + + # random integer is a KeyError + with pytest.raises(KeyError, match=r"^3\.5$"): + idxr(s)[3.5] + + # contains + assert 3.0 in s + + # iloc succeeds with an integer + expected = s.iloc[3] + s2 = s.copy() + + s2.iloc[3] = expected + result = s2.iloc[3] + self.check(result, s, 3, False) + + # iloc raises with a float + msg = "Cannot index by location index with a non-integer key" + with pytest.raises(TypeError, match=msg): + s.iloc[3.0] + + msg = ( + r"cannot do positional indexing" + r" on {klass} with these indexers \[3\.0\] of" + r" {kind}".format(klass=str(Float64Index), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s2.iloc[3.0] = 0 + + def test_slice_non_numeric(self): + + # GH 4892 + # float_indexers should raise exceptions + # on appropriate Index types & accessors + + for index in [ + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeDateIndex, + tm.makeTimedeltaIndex, + tm.makePeriodIndex, + ]: + + index = index(5) + for s in [ + Series(range(5), index=index), + DataFrame(np.random.randn(5, 2), index=index), + ]: + + # getitem + for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + + msg = ( + "cannot do slice indexing " + r"on {klass} with these indexers \[(3|4)\.0\] of " + "{kind}".format(klass=type(index), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s.iloc[l] + + for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]: + + msg = ( + "cannot do slice indexing " + r"on {klass} with these indexers " + r"\[(3|4)(\.0)?\] " + r"of ({kind_float}|{kind_int})".format( + klass=type(index), + kind_float=str(float), + kind_int=str(int), + ) + ) + with pytest.raises(TypeError, match=msg): + idxr(s)[l] + + # setitem + for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + + msg = ( + "cannot do slice indexing " + r"on {klass} with these indexers \[(3|4)\.0\] of " + "{kind}".format(klass=type(index), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s.iloc[l] = 0 + + for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]: + msg = ( + "cannot do slice indexing" + r" on {klass} with these indexers" + r" \[(3|4)(\.0)?\]" + r" of ({kind_float}|{kind_int})".format( + klass=type(index), + kind_float=str(float), + kind_int=str(int), + ) + ) + with pytest.raises(TypeError, match=msg): + idxr(s)[l] = 0 + + def test_slice_integer(self): + + # same as above, but for Integer based indexes + # these coerce to a like integer + # oob indicates if we are out of bounds + # of positional indexing + for index, oob in [ + (Int64Index(range(5)), False), + (RangeIndex(5), False), + (Int64Index(range(5)) + 10, True), + ]: + + # s is an in-range index + s = Series(range(5), index=index) + + # getitem + for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + + for idxr in [lambda x: x.loc]: + + result = idxr(s)[l] + + # these are all label indexing + # except getitem which is positional + # empty + if oob: + indexer = slice(0, 0) + else: + indexer = slice(3, 5) + self.check(result, s, indexer, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + r"on {klass} with these indexers \[(3|4)\.0\] of " + "{kind}".format(klass=type(index), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s[l] + + # getitem out-of-bounds + for l in [slice(-6, 6), slice(-6.0, 6.0)]: + + for idxr in [lambda x: x.loc]: + result = idxr(s)[l] + + # these are all label indexing + # except getitem which is positional + # empty + if oob: + indexer = slice(0, 0) + else: + indexer = slice(-6, 6) + self.check(result, s, indexer, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + r"on {klass} with these indexers \[-6\.0\] of " + "{kind}".format(klass=type(index), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s[slice(-6.0, 6.0)] + + # getitem odd floats + for l, res1 in [ + (slice(2.5, 4), slice(3, 5)), + (slice(2, 3.5), slice(2, 4)), + (slice(2.5, 3.5), slice(3, 4)), + ]: + + for idxr in [lambda x: x.loc]: + + result = idxr(s)[l] + if oob: + res = slice(0, 0) + else: + res = res1 + + self.check(result, s, res, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + r"on {klass} with these indexers \[(2|3)\.5\] of " + "{kind}".format(klass=type(index), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s[l] + + # setitem + for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + + for idxr in [lambda x: x.loc]: + sc = s.copy() + idxr(sc)[l] = 0 + result = idxr(sc)[l].values.ravel() + assert (result == 0).all() + + # positional indexing + msg = ( + "cannot do slice indexing " + r"on {klass} with these indexers \[(3|4)\.0\] of " + "{kind}".format(klass=type(index), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s[l] = 0 + + def test_integer_positional_indexing(self): + """ make sure that we are raising on positional indexing + w.r.t. an integer index """ + + s = Series(range(2, 6), index=range(2, 6)) + + result = s[2:4] + expected = s.iloc[2:4] + tm.assert_series_equal(result, expected) + + for idxr in [lambda x: x, lambda x: x.iloc]: + + for l in [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)]: + + klass = RangeIndex + msg = ( + "cannot do slice indexing " + r"on {klass} with these indexers \[(2|4)\.0\] of " + "{kind}".format(klass=str(klass), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + idxr(s)[l] + + def test_slice_integer_frame_getitem(self): + + # similar to above, but on the getitem dim (of a DataFrame) + for index in [Int64Index(range(5)), RangeIndex(5)]: + + s = DataFrame(np.random.randn(5, 2), index=index) + + def f(idxr): + + # getitem + for l in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]: + + result = idxr(s)[l] + indexer = slice(0, 2) + self.check(result, s, indexer, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + r"on {klass} with these indexers \[(0|1)\.0\] of " + "{kind}".format(klass=type(index), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s[l] + + # getitem out-of-bounds + for l in [slice(-10, 10), slice(-10.0, 10.0)]: + + result = idxr(s)[l] + self.check(result, s, slice(-10, 10), True) + + # positional indexing + msg = ( + "cannot do slice indexing " + r"on {klass} with these indexers \[-10\.0\] of " + "{kind}".format(klass=type(index), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s[slice(-10.0, 10.0)] + + # getitem odd floats + for l, res in [ + (slice(0.5, 1), slice(1, 2)), + (slice(0, 0.5), slice(0, 1)), + (slice(0.5, 1.5), slice(1, 2)), + ]: + + result = idxr(s)[l] + self.check(result, s, res, False) + + # positional indexing + msg = ( + "cannot do slice indexing " + r"on {klass} with these indexers \[0\.5\] of " + "{kind}".format(klass=type(index), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s[l] + + # setitem + for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + + sc = s.copy() + idxr(sc)[l] = 0 + result = idxr(sc)[l].values.ravel() + assert (result == 0).all() + + # positional indexing + msg = ( + "cannot do slice indexing " + r"on {klass} with these indexers \[(3|4)\.0\] of " + "{kind}".format(klass=type(index), kind=str(float)) + ) + with pytest.raises(TypeError, match=msg): + s[l] = 0 + + f(lambda x: x.loc) + + def test_slice_float(self): + + # same as above, but for floats + index = Index(np.arange(5.0)) + 0.1 + for s in [ + Series(range(5), index=index), + DataFrame(np.random.randn(5, 2), index=index), + ]: + + for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]: + + expected = s.iloc[3:4] + for idxr in [lambda x: x.loc, lambda x: x]: + + # getitem + result = idxr(s)[l] + if isinstance(s, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + # setitem + s2 = s.copy() + idxr(s2)[l] = 0 + result = idxr(s2)[l].values.ravel() + assert (result == 0).all() + + def test_floating_index_doc_example(self): + + index = Index([1.5, 2, 3, 4.5, 5]) + s = Series(range(5), index=index) + assert s[3] == 2 + assert s.loc[3] == 2 + assert s.loc[3] == 2 + assert s.iloc[3] == 3 + + def test_floating_misc(self): + + # related 236 + # scalar/slicing of a float index + s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64) + + # label based slicing + result1 = s[1.0:3.0] + result2 = s.loc[1.0:3.0] + result3 = s.loc[1.0:3.0] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + + # exact indexing when found + result1 = s[5.0] + result2 = s.loc[5.0] + result3 = s.loc[5.0] + assert result1 == result2 + assert result1 == result3 + + result1 = s[5] + result2 = s.loc[5] + result3 = s.loc[5] + assert result1 == result2 + assert result1 == result3 + + assert s[5.0] == s[5] + + # value not found (and no fallbacking at all) + + # scalar integers + with pytest.raises(KeyError, match=r"^4\.0$"): + s.loc[4] + with pytest.raises(KeyError, match=r"^4\.0$"): + s.loc[4] + with pytest.raises(KeyError, match=r"^4\.0$"): + s[4] + + # fancy floats/integers create the correct entry (as nan) + # fancy tests + expected = Series([2, 0], index=Float64Index([5.0, 0.0])) + for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float + tm.assert_series_equal(s[fancy_idx], expected) + tm.assert_series_equal(s.loc[fancy_idx], expected) + tm.assert_series_equal(s.loc[fancy_idx], expected) + + expected = Series([2, 0], index=Index([5, 0], dtype="int64")) + for fancy_idx in [[5, 0], np.array([5, 0])]: # int + tm.assert_series_equal(s[fancy_idx], expected) + tm.assert_series_equal(s.loc[fancy_idx], expected) + tm.assert_series_equal(s.loc[fancy_idx], expected) + + # all should return the same as we are slicing 'the same' + result1 = s.loc[2:5] + result2 = s.loc[2.0:5.0] + result3 = s.loc[2.0:5] + result4 = s.loc[2.1:5] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + tm.assert_series_equal(result1, result4) + + # previously this did fallback indexing + result1 = s[2:5] + result2 = s[2.0:5.0] + result3 = s[2.0:5] + result4 = s[2.1:5] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + tm.assert_series_equal(result1, result4) + + result1 = s.loc[2:5] + result2 = s.loc[2.0:5.0] + result3 = s.loc[2.0:5] + result4 = s.loc[2.1:5] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + tm.assert_series_equal(result1, result4) + + # combined test + result1 = s.loc[2:5] + result2 = s.loc[2:5] + result3 = s[2:5] + + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + + # list selection + result1 = s[[0.0, 5, 10]] + result2 = s.loc[[0.0, 5, 10]] + result3 = s.loc[[0.0, 5, 10]] + result4 = s.iloc[[0, 2, 4]] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + tm.assert_series_equal(result1, result4) + + with pytest.raises(KeyError, match="with any missing labels"): + s[[1.6, 5, 10]] + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[1.6, 5, 10]] + + with pytest.raises(KeyError, match="with any missing labels"): + s[[0, 1, 2]] + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[0, 1, 2]] + + result1 = s.loc[[2.5, 5]] + result2 = s.loc[[2.5, 5]] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, Series([1, 2], index=[2.5, 5.0])) + + result1 = s[[2.5]] + result2 = s.loc[[2.5]] + result3 = s.loc[[2.5]] + tm.assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result3) + tm.assert_series_equal(result1, Series([1], index=[2.5])) + + def test_floating_tuples(self): + # see gh-13509 + s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo") + + result = s[0.0] + assert result == (1, 1) + + expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo") + s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo") + + result = s[0.0] + tm.assert_series_equal(result, expected) + + def test_float64index_slicing_bug(self): + # GH 5557, related to slicing a float index + ser = { + 256: 2321.0, + 1: 78.0, + 2: 2716.0, + 3: 0.0, + 4: 369.0, + 5: 0.0, + 6: 269.0, + 7: 0.0, + 8: 0.0, + 9: 0.0, + 10: 3536.0, + 11: 0.0, + 12: 24.0, + 13: 0.0, + 14: 931.0, + 15: 0.0, + 16: 101.0, + 17: 78.0, + 18: 9643.0, + 19: 0.0, + 20: 0.0, + 21: 0.0, + 22: 63761.0, + 23: 0.0, + 24: 446.0, + 25: 0.0, + 26: 34773.0, + 27: 0.0, + 28: 729.0, + 29: 78.0, + 30: 0.0, + 31: 0.0, + 32: 3374.0, + 33: 0.0, + 34: 1391.0, + 35: 0.0, + 36: 361.0, + 37: 0.0, + 38: 61808.0, + 39: 0.0, + 40: 0.0, + 41: 0.0, + 42: 6677.0, + 43: 0.0, + 44: 802.0, + 45: 0.0, + 46: 2691.0, + 47: 0.0, + 48: 3582.0, + 49: 0.0, + 50: 734.0, + 51: 0.0, + 52: 627.0, + 53: 70.0, + 54: 2584.0, + 55: 0.0, + 56: 324.0, + 57: 0.0, + 58: 605.0, + 59: 0.0, + 60: 0.0, + 61: 0.0, + 62: 3989.0, + 63: 10.0, + 64: 42.0, + 65: 0.0, + 66: 904.0, + 67: 0.0, + 68: 88.0, + 69: 70.0, + 70: 8172.0, + 71: 0.0, + 72: 0.0, + 73: 0.0, + 74: 64902.0, + 75: 0.0, + 76: 347.0, + 77: 0.0, + 78: 36605.0, + 79: 0.0, + 80: 379.0, + 81: 70.0, + 82: 0.0, + 83: 0.0, + 84: 3001.0, + 85: 0.0, + 86: 1630.0, + 87: 7.0, + 88: 364.0, + 89: 0.0, + 90: 67404.0, + 91: 9.0, + 92: 0.0, + 93: 0.0, + 94: 7685.0, + 95: 0.0, + 96: 1017.0, + 97: 0.0, + 98: 2831.0, + 99: 0.0, + 100: 2963.0, + 101: 0.0, + 102: 854.0, + 103: 0.0, + 104: 0.0, + 105: 0.0, + 106: 0.0, + 107: 0.0, + 108: 0.0, + 109: 0.0, + 110: 0.0, + 111: 0.0, + 112: 0.0, + 113: 0.0, + 114: 0.0, + 115: 0.0, + 116: 0.0, + 117: 0.0, + 118: 0.0, + 119: 0.0, + 120: 0.0, + 121: 0.0, + 122: 0.0, + 123: 0.0, + 124: 0.0, + 125: 0.0, + 126: 67744.0, + 127: 22.0, + 128: 264.0, + 129: 0.0, + 260: 197.0, + 268: 0.0, + 265: 0.0, + 269: 0.0, + 261: 0.0, + 266: 1198.0, + 267: 0.0, + 262: 2629.0, + 258: 775.0, + 257: 0.0, + 263: 0.0, + 259: 0.0, + 264: 163.0, + 250: 10326.0, + 251: 0.0, + 252: 1228.0, + 253: 0.0, + 254: 2769.0, + 255: 0.0, + } + + # smoke test for the repr + s = Series(ser) + result = s.value_counts() + str(result) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py new file mode 100644 index 00000000..a6bf0ef2 --- /dev/null +++ b/pandas/tests/indexing/test_iloc.py @@ -0,0 +1,690 @@ +""" test positional based indexing with iloc """ + +from datetime import datetime +from warnings import catch_warnings, simplefilter + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, concat, date_range, isna +import pandas._testing as tm +from pandas.api.types import is_scalar +from pandas.core.indexing import IndexingError +from pandas.tests.indexing.common import Base + + +class TestiLoc(Base): + def test_iloc_exceeds_bounds(self): + + # GH6296 + # iloc should allow indexers that exceed the bounds + df = DataFrame(np.random.random_sample((20, 5)), columns=list("ABCDE")) + + # lists of positions should raise IndexError! + msg = "positional indexers are out-of-bounds" + with pytest.raises(IndexError, match=msg): + df.iloc[:, [0, 1, 2, 3, 4, 5]] + with pytest.raises(IndexError, match=msg): + df.iloc[[1, 30]] + with pytest.raises(IndexError, match=msg): + df.iloc[[1, -30]] + with pytest.raises(IndexError, match=msg): + df.iloc[[100]] + + s = df["A"] + with pytest.raises(IndexError, match=msg): + s.iloc[[100]] + with pytest.raises(IndexError, match=msg): + s.iloc[[-100]] + + # still raise on a single indexer + msg = "single positional indexer is out-of-bounds" + with pytest.raises(IndexError, match=msg): + df.iloc[30] + with pytest.raises(IndexError, match=msg): + df.iloc[-30] + + # GH10779 + # single positive/negative indexer exceeding Series bounds should raise + # an IndexError + with pytest.raises(IndexError, match=msg): + s.iloc[30] + with pytest.raises(IndexError, match=msg): + s.iloc[-30] + + # slices are ok + result = df.iloc[:, 4:10] # 0 < start < len < stop + expected = df.iloc[:, 4:] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -4:-10] # stop < 0 < start < len + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) + expected = df.iloc[:, :4:-1] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) + expected = df.iloc[:, 4::-1] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -10:4] # start < 0 < stop < len + expected = df.iloc[:, :4] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:4] # 0 < stop < len < start + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 10:11] # 0 < len < start < stop + expected = df.iloc[:, :0] + tm.assert_frame_equal(result, expected) + + # slice bounds exceeding is ok + result = s.iloc[18:30] + expected = s.iloc[18:] + tm.assert_series_equal(result, expected) + + result = s.iloc[30:] + expected = s.iloc[:0] + tm.assert_series_equal(result, expected) + + result = s.iloc[30::-1] + expected = s.iloc[::-1] + tm.assert_series_equal(result, expected) + + # doc example + def check(result, expected): + str(result) + result.dtypes + tm.assert_frame_equal(result, expected) + + dfl = DataFrame(np.random.randn(5, 2), columns=list("AB")) + check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) + check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) + check(dfl.iloc[4:6], dfl.iloc[[4]]) + + msg = "positional indexers are out-of-bounds" + with pytest.raises(IndexError, match=msg): + dfl.iloc[[4, 5, 6]] + msg = "single positional indexer is out-of-bounds" + with pytest.raises(IndexError, match=msg): + dfl.iloc[:, 4] + + @pytest.mark.parametrize("index,columns", [(np.arange(20), list("ABCDE"))]) + @pytest.mark.parametrize( + "index_vals,column_vals", + [ + ([slice(None), ["A", "D"]]), + (["1", "2"], slice(None)), + ([datetime(2019, 1, 1)], slice(None)), + ], + ) + def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals): + # GH 25753 + df = DataFrame( + np.random.randn(len(index), len(columns)), index=index, columns=columns + ) + msg = ".iloc requires numeric indexers, got" + with pytest.raises(IndexError, match=msg): + df.iloc[index_vals, column_vals] + + def test_iloc_getitem_int(self): + # integer + self.check_result( + "iloc", + 2, + "iloc", + 2, + typs=["labels", "mixed", "ts", "floats", "empty"], + fails=IndexError, + ) + + def test_iloc_getitem_neg_int(self): + # neg integer + self.check_result( + "iloc", + -1, + "iloc", + -1, + typs=["labels", "mixed", "ts", "floats", "empty"], + fails=IndexError, + ) + + @pytest.mark.parametrize("dims", [1, 2]) + def test_iloc_getitem_invalid_scalar(self, dims): + # GH 21982 + + if dims == 1: + s = Series(np.arange(10)) + else: + s = DataFrame(np.arange(100).reshape(10, 10)) + + with pytest.raises(TypeError, match="Cannot index by location index"): + s.iloc["a"] + + def test_iloc_array_not_mutating_negative_indices(self): + + # GH 21867 + array_with_neg_numbers = np.array([1, 2, -1]) + array_copy = array_with_neg_numbers.copy() + df = pd.DataFrame( + {"A": [100, 101, 102], "B": [103, 104, 105], "C": [106, 107, 108]}, + index=[1, 2, 3], + ) + df.iloc[array_with_neg_numbers] + tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy) + df.iloc[:, array_with_neg_numbers] + tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy) + + def test_iloc_getitem_list_int(self): + self.check_result( + "iloc", + [0, 1, 2], + "iloc", + [0, 1, 2], + typs=["labels", "mixed", "ts", "floats", "empty"], + fails=IndexError, + ) + + # array of ints (GH5006), make sure that a single indexer is returning + # the correct type + + def test_iloc_getitem_neg_int_can_reach_first_index(self): + # GH10547 and GH10779 + # negative integers should be able to reach index 0 + df = DataFrame({"A": [2, 3, 5], "B": [7, 11, 13]}) + s = df["A"] + + expected = df.iloc[0] + result = df.iloc[-3] + tm.assert_series_equal(result, expected) + + expected = df.iloc[[0]] + result = df.iloc[[-3]] + tm.assert_frame_equal(result, expected) + + expected = s.iloc[0] + result = s.iloc[-3] + assert result == expected + + expected = s.iloc[[0]] + result = s.iloc[[-3]] + tm.assert_series_equal(result, expected) + + # check the length 1 Series case highlighted in GH10547 + expected = Series(["a"], index=["A"]) + result = expected.iloc[[-1]] + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_dups(self): + # GH 6766 + df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) + df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) + df = concat([df1, df2], axis=1) + + # cross-sectional indexing + result = df.iloc[0, 0] + assert isna(result) + + result = df.iloc[0, :] + expected = Series([np.nan, 1, 3, 3], index=["A", "B", "A", "B"], name=0) + tm.assert_series_equal(result, expected) + + def test_iloc_getitem_array(self): + # TODO: test something here? + pass + + def test_iloc_getitem_bool(self): + # TODO: test something here? + pass + + @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]]) + def test_iloc_getitem_bool_diff_len(self, index): + # GH26658 + s = Series([1, 2, 3]) + msg = "Boolean index has wrong length: {} instead of {}".format( + len(index), len(s) + ) + with pytest.raises(IndexError, match=msg): + _ = s.iloc[index] + + def test_iloc_getitem_slice(self): + # TODO: test something here? + pass + + def test_iloc_getitem_slice_dups(self): + + df1 = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]) + df2 = DataFrame( + np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + ) + + # axis=1 + df = concat([df1, df2], axis=1) + tm.assert_frame_equal(df.iloc[:, :4], df1) + tm.assert_frame_equal(df.iloc[:, 4:], df2) + + df = concat([df2, df1], axis=1) + tm.assert_frame_equal(df.iloc[:, :2], df2) + tm.assert_frame_equal(df.iloc[:, 2:], df1) + + exp = concat([df2, df1.iloc[:, [0]]], axis=1) + tm.assert_frame_equal(df.iloc[:, 0:3], exp) + + # axis=0 + df = concat([df, df], axis=0) + tm.assert_frame_equal(df.iloc[0:10, :2], df2) + tm.assert_frame_equal(df.iloc[0:10, 2:], df1) + tm.assert_frame_equal(df.iloc[10:, :2], df2) + tm.assert_frame_equal(df.iloc[10:, 2:], df1) + + def test_iloc_setitem(self): + df = self.frame_ints + + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] + assert result == 1 + + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] + tm.assert_frame_equal(result, expected) + + # GH5771 + s = Series(0, index=[4, 5, 6]) + s.iloc[1:2] += 1 + expected = Series([0, 1, 0], index=[4, 5, 6]) + tm.assert_series_equal(s, expected) + + def test_iloc_setitem_list(self): + + # setitem with an iloc list + df = DataFrame( + np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"] + ) + df.iloc[[0, 1], [1, 2]] + df.iloc[[0, 1], [1, 2]] += 100 + + expected = DataFrame( + np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)), + index=["A", "B", "C"], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(df, expected) + + def test_iloc_setitem_pandas_object(self): + # GH 17193 + s_orig = Series([0, 1, 2, 3]) + expected = Series([0, -1, -2, 3]) + + s = s_orig.copy() + s.iloc[Series([1, 2])] = [-1, -2] + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.iloc[pd.Index([1, 2])] = [-1, -2] + tm.assert_series_equal(s, expected) + + def test_iloc_setitem_dups(self): + + # GH 6766 + # iloc with a mask aligning from another iloc + df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) + df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) + df = concat([df1, df2], axis=1) + + expected = df.fillna(3) + expected["A"] = expected["A"].astype("float64") + inds = np.isnan(df.iloc[:, 0]) + mask = inds[inds].index + df.iloc[mask, 0] = df.iloc[mask, 2] + tm.assert_frame_equal(df, expected) + + # del a dup column across blocks + expected = DataFrame({0: [1, 2], 1: [3, 4]}) + expected.columns = ["B", "B"] + del df["A"] + tm.assert_frame_equal(df, expected) + + # assign back to self + df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] + tm.assert_frame_equal(df, expected) + + # reversed x 2 + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) + tm.assert_frame_equal(df, expected) + + # TODO: GH#27620 this test used to compare iloc against ix; check if this + # is redundant with another test comparing iloc against loc + def test_iloc_getitem_frame(self): + df = DataFrame( + np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0, 8, 2) + ) + + result = df.iloc[2] + exp = df.loc[4] + tm.assert_series_equal(result, exp) + + result = df.iloc[2, 2] + exp = df.loc[4, 4] + assert result == exp + + # slice + result = df.iloc[4:8] + expected = df.loc[8:14] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:, 2:3] + expected = df.loc[:, 4:5] + tm.assert_frame_equal(result, expected) + + # list of integers + result = df.iloc[[0, 1, 3]] + expected = df.loc[[0, 2, 6]] + tm.assert_frame_equal(result, expected) + + result = df.iloc[[0, 1, 3], [0, 1]] + expected = df.loc[[0, 2, 6], [0, 2]] + tm.assert_frame_equal(result, expected) + + # neg indices + result = df.iloc[[-1, 1, 3], [-1, 1]] + expected = df.loc[[18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # dups indices + result = df.iloc[[-1, -1, 1, 3], [-1, 1]] + expected = df.loc[[18, 18, 2, 6], [6, 2]] + tm.assert_frame_equal(result, expected) + + # with index-like + s = Series(index=range(1, 5), dtype=object) + result = df.iloc[s.index] + expected = df.loc[[2, 4, 6, 8]] + tm.assert_frame_equal(result, expected) + + def test_iloc_getitem_labelled_frame(self): + # try with labelled frame + df = DataFrame( + np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD") + ) + + result = df.iloc[1, 1] + exp = df.loc["b", "B"] + assert result == exp + + result = df.iloc[:, 2:3] + expected = df.loc[:, ["C"]] + tm.assert_frame_equal(result, expected) + + # negative indexing + result = df.iloc[-1, -1] + exp = df.loc["j", "D"] + assert result == exp + + # out-of-bounds exception + msg = "single positional indexer is out-of-bounds" + with pytest.raises(IndexError, match=msg): + df.iloc[10, 5] + + # trying to use a label + msg = ( + r"Location based indexing can only have \[integer, integer" + r" slice \(START point is INCLUDED, END point is EXCLUDED\)," + r" listlike of integers, boolean array\] types" + ) + with pytest.raises(ValueError, match=msg): + df.iloc["j", "D"] + + def test_iloc_getitem_doc_issue(self): + + # multi axis slicing issue with single block + # surfaced in GH 6059 + + arr = np.random.randn(6, 4) + index = date_range("20130101", periods=6) + columns = list("ABCD") + df = DataFrame(arr, index=index, columns=columns) + + # defines ref_locs + df.describe() + + result = df.iloc[3:5, 0:2] + str(result) + result.dtypes + + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=columns[0:2]) + tm.assert_frame_equal(result, expected) + + # for dups + df.columns = list("aaaa") + result = df.iloc[3:5, 0:2] + str(result) + result.dtypes + + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=list("aa")) + tm.assert_frame_equal(result, expected) + + # related + arr = np.random.randn(6, 4) + index = list(range(0, 12, 2)) + columns = list(range(0, 8, 2)) + df = DataFrame(arr, index=index, columns=columns) + + df._data.blocks[0].mgr_locs + result = df.iloc[1:5, 2:4] + str(result) + result.dtypes + expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4]) + tm.assert_frame_equal(result, expected) + + def test_iloc_setitem_series(self): + df = DataFrame( + np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD") + ) + + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] + assert result == 1 + + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] + tm.assert_frame_equal(result, expected) + + s = Series(np.random.randn(10), index=range(0, 20, 2)) + + s.iloc[1] = 1 + result = s.iloc[1] + assert result == 1 + + s.iloc[:4] = 0 + expected = s.iloc[:4] + result = s.iloc[:4] + tm.assert_series_equal(result, expected) + + s = Series([-1] * 6) + s.iloc[0::2] = [0, 2, 4] + s.iloc[1::2] = [1, 3, 5] + result = s + expected = Series([0, 1, 2, 3, 4, 5]) + tm.assert_series_equal(result, expected) + + def test_iloc_setitem_list_of_lists(self): + + # GH 7551 + # list-of-list is set incorrectly in mixed vs. single dtyped frames + df = DataFrame( + dict(A=np.arange(5, dtype="int64"), B=np.arange(5, 10, dtype="int64")) + ) + df.iloc[2:4] = [[10, 11], [12, 13]] + expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9])) + tm.assert_frame_equal(df, expected) + + df = DataFrame(dict(A=list("abcde"), B=np.arange(5, 10, dtype="int64"))) + df.iloc[2:4] = [["x", 11], ["y", 13]] + expected = DataFrame(dict(A=["a", "b", "x", "y", "e"], B=[5, 6, 11, 13, 9])) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])]) + @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + def test_iloc_setitem_with_scalar_index(self, indexer, value): + # GH #19474 + # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated + # elementwisely, not using "setter('A', ['Z'])". + + df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + df.iloc[0, indexer] = value + result = df.iloc[0, 0] + + assert is_scalar(result) and result == "Z" + + def test_iloc_mask(self): + + # GH 3631, iloc with a mask (of a series) should raise + df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"]) + mask = df.a % 2 == 0 + msg = "iLocation based boolean indexing cannot use an indexable as a mask" + with pytest.raises(ValueError, match=msg): + df.iloc[mask] + mask.index = range(len(mask)) + msg = "iLocation based boolean indexing on an integer type is not available" + with pytest.raises(NotImplementedError, match=msg): + df.iloc[mask] + + # ndarray ok + result = df.iloc[np.array([True] * len(mask), dtype=bool)] + tm.assert_frame_equal(result, df) + + # the possibilities + locs = np.arange(4) + nums = 2 ** locs + reps = [bin(num) for num in nums] + df = DataFrame({"locs": locs, "nums": nums}, reps) + + expected = { + (None, ""): "0b1100", + (None, ".loc"): "0b1100", + (None, ".iloc"): "0b1100", + ("index", ""): "0b11", + ("index", ".loc"): "0b11", + ("index", ".iloc"): ( + "iLocation based boolean indexing cannot use an indexable as a mask" + ), + ("locs", ""): "Unalignable boolean Series provided as indexer " + "(index of the boolean Series and of the indexed " + "object do not match).", + ("locs", ".loc"): "Unalignable boolean Series provided as indexer " + "(index of the boolean Series and of the " + "indexed object do not match).", + ("locs", ".iloc"): ( + "iLocation based boolean indexing on an " + "integer type is not available" + ), + } + + # UserWarnings from reindex of a boolean mask + with catch_warnings(record=True): + simplefilter("ignore", UserWarning) + result = dict() + for idx in [None, "index", "locs"]: + mask = (df.nums > 2).values + if idx: + mask = Series(mask, list(reversed(getattr(df, idx)))) + for method in ["", ".loc", ".iloc"]: + try: + if method: + accessor = getattr(df, method[1:]) + else: + accessor = df + ans = str(bin(accessor[mask]["nums"].sum())) + except (ValueError, IndexingError, NotImplementedError) as e: + ans = str(e) + + key = tuple([idx, method]) + r = expected.get(key) + if r != ans: + raise AssertionError( + "[{key}] does not match [{ans}], received [{r}]".format( + key=key, ans=ans, r=r + ) + ) + + def test_iloc_non_unique_indexing(self): + + # GH 4017, non-unique indexing (on the axis) + df = DataFrame({"A": [0.1] * 3000, "B": [1] * 3000}) + idx = np.arange(30) * 99 + expected = df.iloc[idx] + + df3 = concat([df, 2 * df, 3 * df]) + result = df3.iloc[idx] + + tm.assert_frame_equal(result, expected) + + df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000}) + df2 = concat([df2, 2 * df2, 3 * df2]) + + with pytest.raises(KeyError, match="with any missing labels"): + df2.loc[idx] + + def test_iloc_empty_list_indexer_is_ok(self): + + df = tm.makeCustomDataframe(5, 2) + # vertical empty + tm.assert_frame_equal( + df.iloc[:, []], + df.iloc[:, :0], + check_index_type=True, + check_column_type=True, + ) + # horizontal empty + tm.assert_frame_equal( + df.iloc[[], :], + df.iloc[:0, :], + check_index_type=True, + check_column_type=True, + ) + # horizontal empty + tm.assert_frame_equal( + df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True + ) + + def test_identity_slice_returns_new_object(self): + # GH13873 + original_df = DataFrame({"a": [1, 2, 3]}) + sliced_df = original_df.iloc[:] + assert sliced_df is not original_df + + # should be a shallow copy + original_df["a"] = [4, 4, 4] + assert (sliced_df["a"] == 4).all() + + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.iloc[:] + assert sliced_series is not original_series + + # should also be a shallow copy + original_series[:3] = [7, 8, 9] + assert all(sliced_series[:3] == [7, 8, 9]) + + def test_indexing_zerodim_np_array(self): + # GH24919 + df = DataFrame([[1, 2], [3, 4]]) + result = df.iloc[np.array(0)] + s = pd.Series([1, 2], name=0) + tm.assert_series_equal(result, s) + + def test_series_indexing_zerodim_np_array(self): + # GH24919 + s = Series([1, 2]) + result = s.iloc[np.array(0)] + assert result == 1 diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py new file mode 100644 index 00000000..448a0607 --- /dev/null +++ b/pandas/tests/indexing/test_indexing.py @@ -0,0 +1,1186 @@ +""" test fancy indexing & misc """ + +from datetime import datetime +import re +import weakref + +import numpy as np +import pytest + +from pandas.errors import AbstractMethodError + +from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype + +import pandas as pd +from pandas import DataFrame, Index, NaT, Series +import pandas._testing as tm +from pandas.core.generic import NDFrame +from pandas.core.indexers import validate_indices +from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice +from pandas.tests.indexing.common import Base, _mklbl + +# ------------------------------------------------------------------------ +# Indexing test cases + + +class TestFancy(Base): + """ pure get/set item & fancy indexing """ + + def test_setitem_ndarray_1d(self): + # GH5508 + + # len of indexer vs length of the 1d ndarray + df = DataFrame(index=Index(np.arange(1, 11))) + df["foo"] = np.zeros(10, dtype=np.float64) + df["bar"] = np.zeros(10, dtype=np.complex) + + # invalid + with pytest.raises(ValueError): + df.loc[df.index[2:5], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) + + # valid + df.loc[df.index[2:6], "bar"] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) + + result = df.loc[df.index[2:6], "bar"] + expected = Series( + [2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[3, 4, 5, 6], name="bar" + ) + tm.assert_series_equal(result, expected) + + # dtype getting changed? + df = DataFrame(index=Index(np.arange(1, 11))) + df["foo"] = np.zeros(10, dtype=np.float64) + df["bar"] = np.zeros(10, dtype=np.complex) + + with pytest.raises(ValueError): + df[2:5] = np.arange(1, 4) * 1j + + @pytest.mark.parametrize( + "index", tm.all_index_generator(5), ids=lambda x: type(x).__name__ + ) + @pytest.mark.parametrize( + "obj", + [ + lambda i: Series(np.arange(len(i)), index=i), + lambda i: DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i), + ], + ids=["Series", "DataFrame"], + ) + @pytest.mark.parametrize( + "idxr, idxr_id", + [ + (lambda x: x, "getitem"), + (lambda x: x.loc, "loc"), + (lambda x: x.iloc, "iloc"), + ], + ) + def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id): + # GH 25567 + obj = obj(index) + idxr = idxr(obj) + nd3 = np.random.randint(5, size=(2, 2, 2)) + + msg = ( + r"Buffer has wrong number of dimensions \(expected 1," + r" got 3\)|" + "Cannot index with multidimensional key|" + r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]|" + "Index data must be 1-dimensional" + ) + + if ( + isinstance(obj, Series) + and idxr_id == "getitem" + and index.inferred_type + in [ + "string", + "datetime64", + "period", + "timedelta64", + "boolean", + "categorical", + ] + ): + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + idxr[nd3] + else: + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(DeprecationWarning): + idxr[nd3] + + @pytest.mark.parametrize( + "index", tm.all_index_generator(5), ids=lambda x: type(x).__name__ + ) + @pytest.mark.parametrize( + "obj", + [ + lambda i: Series(np.arange(len(i)), index=i), + lambda i: DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i), + ], + ids=["Series", "DataFrame"], + ) + @pytest.mark.parametrize( + "idxr, idxr_id", + [ + (lambda x: x, "setitem"), + (lambda x: x.loc, "loc"), + (lambda x: x.iloc, "iloc"), + ], + ) + def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): + # GH 25567 + obj = obj(index) + idxr = idxr(obj) + nd3 = np.random.randint(5, size=(2, 2, 2)) + + msg = ( + r"Buffer has wrong number of dimensions \(expected 1," + r" got 3\)|" + "'pandas._libs.interval.IntervalTree' object has no attribute " + "'set_value'|" # AttributeError + "unhashable type: 'numpy.ndarray'|" # TypeError + "No matching signature found|" # TypeError + r"^\[\[\[|" # pandas.core.indexing.IndexingError + "Index data must be 1-dimensional" + ) + + if (idxr_id == "iloc") or ( + ( + isinstance(obj, Series) + and idxr_id == "setitem" + and index.inferred_type + in [ + "floating", + "string", + "datetime64", + "period", + "timedelta64", + "boolean", + "categorical", + ] + ) + ): + idxr[nd3] = 0 + else: + err = (ValueError, AttributeError) + with pytest.raises(err, match=msg): + idxr[nd3] = 0 + + def test_inf_upcast(self): + # GH 16957 + # We should be able to use np.inf as a key + # np.inf should cause an index to convert to float + + # Test with np.inf in rows + df = DataFrame(columns=[0]) + df.loc[1] = 1 + df.loc[2] = 2 + df.loc[np.inf] = 3 + + # make sure we can look up the value + assert df.loc[np.inf, 0] == 3 + + result = df.index + expected = pd.Float64Index([1, 2, np.inf]) + tm.assert_index_equal(result, expected) + + # Test with np.inf in columns + df = DataFrame() + df.loc[0, 0] = 1 + df.loc[1, 1] = 2 + df.loc[0, np.inf] = 3 + + result = df.columns + expected = pd.Float64Index([0, 1, np.inf]) + tm.assert_index_equal(result, expected) + + def test_setitem_dtype_upcast(self): + + # GH3216 + df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) + df["c"] = np.nan + assert df["c"].dtype == np.float64 + + df.loc[0, "c"] = "foo" + expected = DataFrame( + [{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}] + ) + tm.assert_frame_equal(df, expected) + + # GH10280 + df = DataFrame( + np.arange(6, dtype="int64").reshape(2, 3), + index=list("ab"), + columns=["foo", "bar", "baz"], + ) + + for val in [3.14, "wxyz"]: + left = df.copy() + left.loc["a", "bar"] = val + right = DataFrame( + [[0, val, 2], [3, 4, 5]], + index=list("ab"), + columns=["foo", "bar", "baz"], + ) + + tm.assert_frame_equal(left, right) + assert is_integer_dtype(left["foo"]) + assert is_integer_dtype(left["baz"]) + + left = DataFrame( + np.arange(6, dtype="int64").reshape(2, 3) / 10.0, + index=list("ab"), + columns=["foo", "bar", "baz"], + ) + left.loc["a", "bar"] = "wxyz" + + right = DataFrame( + [[0, "wxyz", 0.2], [0.3, 0.4, 0.5]], + index=list("ab"), + columns=["foo", "bar", "baz"], + ) + + tm.assert_frame_equal(left, right) + assert is_float_dtype(left["foo"]) + assert is_float_dtype(left["baz"]) + + def test_dups_fancy_indexing(self): + + # GH 3455 + + df = tm.makeCustomDataframe(10, 3) + df.columns = ["a", "a", "b"] + result = df[["b", "a"]].columns + expected = Index(["b", "a", "a"]) + tm.assert_index_equal(result, expected) + + # across dtypes + df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("aaaaaaa")) + df.head() + str(df) + result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]]) + result.columns = list("aaaaaaa") + + # TODO(wesm): unused? + df_v = df.iloc[:, 4] # noqa + res_v = result.iloc[:, 4] # noqa + + tm.assert_frame_equal(df, result) + + # GH 3561, dups not in selected order + df = DataFrame( + {"test": [5, 7, 9, 11], "test1": [4.0, 5, 6, 7], "other": list("abcd")}, + index=["A", "A", "B", "C"], + ) + rows = ["C", "B"] + expected = DataFrame( + {"test": [11, 9], "test1": [7.0, 6], "other": ["d", "c"]}, index=rows + ) + result = df.loc[rows] + tm.assert_frame_equal(result, expected) + + result = df.loc[Index(rows)] + tm.assert_frame_equal(result, expected) + + rows = ["C", "B", "E"] + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[rows] + + # see GH5553, make sure we use the right indexer + rows = ["F", "G", "H", "C", "B", "E"] + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[rows] + + # List containing only missing label + dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD")) + with pytest.raises( + KeyError, + match=re.escape( + "\"None of [Index(['E'], dtype='object')] are in the [index]\"" + ), + ): + dfnu.loc[["E"]] + + # ToDo: check_index_type can be True after GH 11497 + + # GH 4619; duplicate indexer with missing label + df = DataFrame({"A": [0, 1, 2]}) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[[0, 8, 0]] + + df = DataFrame({"A": list("abc")}) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[[0, 8, 0]] + + # non unique with non unique selector + df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"]) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[["A", "A", "E"]] + + def test_dups_fancy_indexing2(self): + # GH 5835 + # dups on index and missing values + df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"]) + + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[:, ["A", "B", "C"]] + + # GH 6504, multi-axis indexing + df = DataFrame( + np.random.randn(9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=["a", "b"] + ) + + expected = df.iloc[0:6] + result = df.loc[[1, 2]] + tm.assert_frame_equal(result, expected) + + expected = df + result = df.loc[:, ["a", "b"]] + tm.assert_frame_equal(result, expected) + + expected = df.iloc[0:6, :] + result = df.loc[[1, 2], ["a", "b"]] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("case", [lambda s: s, lambda s: s.loc]) + def test_duplicate_int_indexing(self, case): + # GH 17347 + s = pd.Series(range(3), index=[1, 1, 3]) + expected = s[1] + result = case(s)[[1]] + tm.assert_series_equal(result, expected) + + def test_indexing_mixed_frame_bug(self): + + # GH3492 + df = DataFrame( + {"a": {1: "aaa", 2: "bbb", 3: "ccc"}, "b": {1: 111, 2: 222, 3: 333}} + ) + + # this works, new column is created correctly + df["test"] = df["a"].apply(lambda x: "_" if x == "aaa" else x) + + # this does not work, ie column test is not changed + idx = df["test"] == "_" + temp = df.loc[idx, "a"].apply(lambda x: "-----" if x == "aaa" else x) + df.loc[idx, "test"] = temp + assert df.iloc[0, 2] == "-----" + + def test_multitype_list_index_access(self): + # GH 10610 + df = DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23]) + + with pytest.raises(KeyError, match=re.escape("'[-8, 26] not in index'")): + df[[22, 26, -8]] + assert df[21].shape[0] == df.shape[0] + + def test_set_index_nan(self): + + # GH 3586 + df = DataFrame( + { + "PRuid": { + 17: "nonQC", + 18: "nonQC", + 19: "nonQC", + 20: "10", + 21: "11", + 22: "12", + 23: "13", + 24: "24", + 25: "35", + 26: "46", + 27: "47", + 28: "48", + 29: "59", + 30: "10", + }, + "QC": { + 17: 0.0, + 18: 0.0, + 19: 0.0, + 20: np.nan, + 21: np.nan, + 22: np.nan, + 23: np.nan, + 24: 1.0, + 25: np.nan, + 26: np.nan, + 27: np.nan, + 28: np.nan, + 29: np.nan, + 30: np.nan, + }, + "data": { + 17: 7.9544899999999998, + 18: 8.0142609999999994, + 19: 7.8591520000000008, + 20: 0.86140349999999999, + 21: 0.87853110000000001, + 22: 0.8427041999999999, + 23: 0.78587700000000005, + 24: 0.73062459999999996, + 25: 0.81668560000000001, + 26: 0.81927080000000008, + 27: 0.80705009999999999, + 28: 0.81440240000000008, + 29: 0.80140849999999997, + 30: 0.81307740000000006, + }, + "year": { + 17: 2006, + 18: 2007, + 19: 2008, + 20: 1985, + 21: 1985, + 22: 1985, + 23: 1985, + 24: 1985, + 25: 1985, + 26: 1985, + 27: 1985, + 28: 1985, + 29: 1985, + 30: 1986, + }, + } + ).reset_index() + + result = ( + df.set_index(["year", "PRuid", "QC"]) + .reset_index() + .reindex(columns=df.columns) + ) + tm.assert_frame_equal(result, df) + + def test_multi_assign(self): + + # GH 3626, an assignment of a sub-df to a df + df = DataFrame( + { + "FC": ["a", "b", "a", "b", "a", "b"], + "PF": [0, 0, 0, 0, 1, 1], + "col1": list(range(6)), + "col2": list(range(6, 12)), + } + ) + df.iloc[1, 0] = np.nan + df2 = df.copy() + + mask = ~df2.FC.isna() + cols = ["col1", "col2"] + + dft = df2 * 2 + dft.iloc[3, 3] = np.nan + + expected = DataFrame( + { + "FC": ["a", np.nan, "a", "b", "a", "b"], + "PF": [0, 0, 0, 0, 1, 1], + "col1": Series([0, 1, 4, 6, 8, 10]), + "col2": [12, 7, 16, np.nan, 20, 22], + } + ) + + # frame on rhs + df2.loc[mask, cols] = dft.loc[mask, cols] + tm.assert_frame_equal(df2, expected) + + df2.loc[mask, cols] = dft.loc[mask, cols] + tm.assert_frame_equal(df2, expected) + + # with an ndarray on rhs + # coerces to float64 because values has float64 dtype + # GH 14001 + expected = DataFrame( + { + "FC": ["a", np.nan, "a", "b", "a", "b"], + "PF": [0, 0, 0, 0, 1, 1], + "col1": [0.0, 1.0, 4.0, 6.0, 8.0, 10.0], + "col2": [12, 7, 16, np.nan, 20, 22], + } + ) + df2 = df.copy() + df2.loc[mask, cols] = dft.loc[mask, cols].values + tm.assert_frame_equal(df2, expected) + df2.loc[mask, cols] = dft.loc[mask, cols].values + tm.assert_frame_equal(df2, expected) + + # broadcasting on the rhs is required + df = DataFrame( + dict( + A=[1, 2, 0, 0, 0], + B=[0, 0, 0, 10, 11], + C=[0, 0, 0, 10, 11], + D=[3, 4, 5, 6, 7], + ) + ) + + expected = df.copy() + mask = expected["A"] == 0 + for col in ["A", "B"]: + expected.loc[mask, col] = df["D"] + + df.loc[df["A"] == 0, ["A", "B"]] = df["D"] + tm.assert_frame_equal(df, expected) + + def test_setitem_list(self): + + # GH 6043 + # iloc with a list + df = DataFrame(index=[0, 1], columns=[0]) + df.iloc[1, 0] = [1, 2, 3] + df.iloc[1, 0] = [1, 2] + + result = DataFrame(index=[0, 1], columns=[0]) + result.iloc[1, 0] = [1, 2] + + tm.assert_frame_equal(result, df) + + # iloc with an object + class TO: + def __init__(self, value): + self.value = value + + def __str__(self) -> str: + return "[{0}]".format(self.value) + + __repr__ = __str__ + + def __eq__(self, other) -> bool: + return self.value == other.value + + def view(self): + return self + + df = DataFrame(index=[0, 1], columns=[0]) + df.iloc[1, 0] = TO(1) + df.iloc[1, 0] = TO(2) + + result = DataFrame(index=[0, 1], columns=[0]) + result.iloc[1, 0] = TO(2) + + tm.assert_frame_equal(result, df) + + # remains object dtype even after setting it back + df = DataFrame(index=[0, 1], columns=[0]) + df.iloc[1, 0] = TO(1) + df.iloc[1, 0] = np.nan + result = DataFrame(index=[0, 1], columns=[0]) + + tm.assert_frame_equal(result, df) + + def test_string_slice(self): + # GH 14424 + # string indexing against datetimelike with object + # dtype should properly raises KeyError + df = DataFrame([1], Index([pd.Timestamp("2011-01-01")], dtype=object)) + assert df.index.is_all_dates + with pytest.raises(KeyError, match="'2011'"): + df["2011"] + + with pytest.raises(KeyError, match="'2011'"): + df.loc["2011", 0] + + df = DataFrame() + assert not df.index.is_all_dates + with pytest.raises(KeyError, match="'2011'"): + df["2011"] + + with pytest.raises(KeyError, match="'2011'"): + df.loc["2011", 0] + + def test_astype_assignment(self): + + # GH4312 (iloc) + df_orig = DataFrame( + [["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + + df = df_orig.copy() + df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64) + expected = DataFrame( + [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True) + expected = DataFrame( + [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + # GH5702 (loc) + df = df_orig.copy() + df.loc[:, "A"] = df.loc[:, "A"].astype(np.int64) + expected = DataFrame( + [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + df = df_orig.copy() + df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64) + expected = DataFrame( + [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") + ) + tm.assert_frame_equal(df, expected) + + # full replacements / no nans + df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) + df.iloc[:, 0] = df["A"].astype(np.int64) + expected = DataFrame({"A": [1, 2, 3, 4]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) + df.loc[:, "A"] = df["A"].astype(np.int64) + expected = DataFrame({"A": [1, 2, 3, 4]}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "index,val", + [ + (Index([0, 1, 2]), 2), + (Index([0, 1, "2"]), "2"), + (Index([0, 1, 2, np.inf, 4]), 4), + (Index([0, 1, 2, np.nan, 4]), 4), + (Index([0, 1, 2, np.inf]), np.inf), + (Index([0, 1, 2, np.nan]), np.nan), + ], + ) + def test_index_contains(self, index, val): + assert val in index + + @pytest.mark.parametrize( + "index,val", + [ + (Index([0, 1, 2]), "2"), + (Index([0, 1, "2"]), 2), + (Index([0, 1, 2, np.inf]), 4), + (Index([0, 1, 2, np.nan]), 4), + (Index([0, 1, 2, np.inf]), np.nan), + (Index([0, 1, 2, np.nan]), np.inf), + # Checking if np.inf in Int64Index should not cause an OverflowError + # Related to GH 16957 + (pd.Int64Index([0, 1, 2]), np.inf), + (pd.Int64Index([0, 1, 2]), np.nan), + (pd.UInt64Index([0, 1, 2]), np.inf), + (pd.UInt64Index([0, 1, 2]), np.nan), + ], + ) + def test_index_not_contains(self, index, val): + assert val not in index + + @pytest.mark.parametrize( + "index,val", [(Index([0, 1, "2"]), 0), (Index([0, 1, "2"]), "2")] + ) + def test_mixed_index_contains(self, index, val): + # GH 19860 + assert val in index + + @pytest.mark.parametrize( + "index,val", [(Index([0, 1, "2"]), "1"), (Index([0, 1, "2"]), 2)] + ) + def test_mixed_index_not_contains(self, index, val): + # GH 19860 + assert val not in index + + def test_contains_with_float_index(self): + # GH#22085 + integer_index = pd.Int64Index([0, 1, 2, 3]) + uinteger_index = pd.UInt64Index([0, 1, 2, 3]) + float_index = pd.Float64Index([0.1, 1.1, 2.2, 3.3]) + + for index in (integer_index, uinteger_index): + assert 1.1 not in index + assert 1.0 in index + assert 1 in index + + assert 1.1 in float_index + assert 1.0 not in float_index + assert 1 not in float_index + + def test_index_type_coercion(self): + + # GH 11836 + # if we have an index type and set it with something that looks + # to numpy like the same, but is actually, not + # (e.g. setting with a float or string '0') + # then we need to coerce to object + + # integer indexes + for s in [Series(range(5)), Series(range(5), index=range(1, 6))]: + + assert s.index.is_integer() + + for indexer in [lambda x: x.loc, lambda x: x]: + s2 = s.copy() + indexer(s2)[0.1] = 0 + assert s2.index.is_floating() + assert indexer(s2)[0.1] == 0 + + s2 = s.copy() + indexer(s2)[0.0] = 0 + exp = s.index + if 0 not in s: + exp = Index(s.index.tolist() + [0]) + tm.assert_index_equal(s2.index, exp) + + s2 = s.copy() + indexer(s2)["0"] = 0 + assert s2.index.is_object() + + for s in [Series(range(5), index=np.arange(5.0))]: + + assert s.index.is_floating() + + for idxr in [lambda x: x.loc, lambda x: x]: + + s2 = s.copy() + idxr(s2)[0.1] = 0 + assert s2.index.is_floating() + assert idxr(s2)[0.1] == 0 + + s2 = s.copy() + idxr(s2)[0.0] = 0 + tm.assert_index_equal(s2.index, s.index) + + s2 = s.copy() + idxr(s2)["0"] = 0 + assert s2.index.is_object() + + +class TestMisc(Base): + def test_float_index_to_mixed(self): + df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) + df["a"] = 10 + tm.assert_frame_equal( + DataFrame({0.0: df[0.0], 1.0: df[1.0], "a": [10] * 10}), df + ) + + def test_float_index_non_scalar_assignment(self): + df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0]) + df.loc[df.index[:2]] = 1 + expected = DataFrame({"a": [1, 1, 3], "b": [1, 1, 5]}, index=df.index) + tm.assert_frame_equal(expected, df) + + df = DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}, index=[1.0, 2.0, 3.0]) + df2 = df.copy() + df.loc[df.index] = df.loc[df.index] + tm.assert_frame_equal(df, df2) + + def test_float_index_at_iat(self): + s = Series([1, 2, 3], index=[0.1, 0.2, 0.3]) + for el, item in s.items(): + assert s.at[el] == item + for i in range(len(s)): + assert s.iat[i] == i + 1 + + def test_mixed_index_assignment(self): + # GH 19860 + s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) + s.at["a"] = 11 + assert s.iat[0] == 11 + s.at[1] = 22 + assert s.iat[3] == 22 + + def test_mixed_index_no_fallback(self): + # GH 19860 + s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) + with pytest.raises(KeyError, match="^0$"): + s.at[0] + with pytest.raises(KeyError, match="^4$"): + s.at[4] + + def test_rhs_alignment(self): + # GH8258, tests that both rows & columns are aligned to what is + # assigned to. covers both uniform data-type & multi-type cases + def run_tests(df, rhs, right): + # label, index, slice + lbl_one, idx_one, slice_one = list("bcd"), [1, 2, 3], slice(1, 4) + lbl_two, idx_two, slice_two = ["joe", "jolie"], [1, 2], slice(1, 3) + + left = df.copy() + left.loc[lbl_one, lbl_two] = rhs + tm.assert_frame_equal(left, right) + + left = df.copy() + left.iloc[idx_one, idx_two] = rhs + tm.assert_frame_equal(left, right) + + left = df.copy() + left.iloc[slice_one, slice_two] = rhs + tm.assert_frame_equal(left, right) + + xs = np.arange(20).reshape(5, 4) + cols = ["jim", "joe", "jolie", "joline"] + df = DataFrame(xs, columns=cols, index=list("abcde")) + + # right hand side; permute the indices and multiplpy by -2 + rhs = -2 * df.iloc[3:0:-1, 2:0:-1] + + # expected `right` result; just multiply by -2 + right = df.copy() + right.iloc[1:4, 1:3] *= -2 + + # run tests with uniform dtypes + run_tests(df, rhs, right) + + # make frames multi-type & re-run tests + for frame in [df, rhs, right]: + frame["joe"] = frame["joe"].astype("float64") + frame["jolie"] = frame["jolie"].map("@{0}".format) + + run_tests(df, rhs, right) + + def test_str_label_slicing_with_negative_step(self): + SLC = pd.IndexSlice + + def assert_slices_equivalent(l_slc, i_slc): + tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) + + if not idx.is_integer: + # For integer indices, .loc and plain getitem are position-based. + tm.assert_series_equal(s[l_slc], s.iloc[i_slc]) + tm.assert_series_equal(s.loc[l_slc], s.iloc[i_slc]) + + for idx in [_mklbl("A", 20), np.arange(20) + 100, np.linspace(100, 150, 20)]: + idx = Index(idx) + s = Series(np.arange(20), index=idx) + assert_slices_equivalent(SLC[idx[9] :: -1], SLC[9::-1]) + assert_slices_equivalent(SLC[: idx[9] : -1], SLC[:8:-1]) + assert_slices_equivalent(SLC[idx[13] : idx[9] : -1], SLC[13:8:-1]) + assert_slices_equivalent(SLC[idx[9] : idx[13] : -1], SLC[:0]) + + def test_slice_with_zero_step_raises(self): + s = Series(np.arange(20), index=_mklbl("A", 20)) + with pytest.raises(ValueError, match="slice step cannot be zero"): + s[::0] + with pytest.raises(ValueError, match="slice step cannot be zero"): + s.loc[::0] + + def test_indexing_assignment_dict_already_exists(self): + df = DataFrame({"x": [1, 2, 6], "y": [2, 2, 8], "z": [-5, 0, 5]}).set_index("z") + expected = df.copy() + rhs = dict(x=9, y=99) + df.loc[5] = rhs + expected.loc[5] = [9, 99] + tm.assert_frame_equal(df, expected) + + def test_indexing_dtypes_on_empty(self): + # Check that .iloc returns correct dtypes GH9983 + df = DataFrame({"a": [1, 2, 3], "b": ["b", "b2", "b3"]}) + df2 = df.iloc[[], :] + + assert df2.loc[:, "a"].dtype == np.int64 + tm.assert_series_equal(df2.loc[:, "a"], df2.iloc[:, 0]) + + def test_range_in_series_indexing(self): + # range can cause an indexing error + # GH 11652 + for x in [5, 999999, 1000000]: + s = Series(index=range(x), dtype=np.float64) + s.loc[range(1)] = 42 + tm.assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) + + s.loc[range(2)] = 43 + tm.assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) + + def test_non_reducing_slice(self): + df = DataFrame([[0, 1], [2, 3]]) + + slices = [ + # pd.IndexSlice[:, :], + pd.IndexSlice[:, 1], + pd.IndexSlice[1, :], + pd.IndexSlice[[1], [1]], + pd.IndexSlice[1, [1]], + pd.IndexSlice[[1], 1], + pd.IndexSlice[1], + pd.IndexSlice[1, 1], + slice(None, None, None), + [0, 1], + np.array([0, 1]), + Series([0, 1]), + ] + for slice_ in slices: + tslice_ = _non_reducing_slice(slice_) + assert isinstance(df.loc[tslice_], DataFrame) + + def test_list_slice(self): + # like dataframe getitem + slices = [["A"], Series(["A"]), np.array(["A"])] + df = DataFrame({"A": [1, 2], "B": [3, 4]}, index=["A", "B"]) + expected = pd.IndexSlice[:, ["A"]] + for subset in slices: + result = _non_reducing_slice(subset) + tm.assert_frame_equal(df.loc[result], df.loc[expected]) + + def test_maybe_numeric_slice(self): + df = DataFrame({"A": [1, 2], "B": ["c", "d"], "C": [True, False]}) + result = _maybe_numeric_slice(df, slice_=None) + expected = pd.IndexSlice[:, ["A"]] + assert result == expected + + result = _maybe_numeric_slice(df, None, include_bool=True) + expected = pd.IndexSlice[:, ["A", "C"]] + result = _maybe_numeric_slice(df, [1]) + expected = [1] + assert result == expected + + def test_partial_boolean_frame_indexing(self): + # GH 17170 + df = DataFrame( + np.arange(9.0).reshape(3, 3), index=list("abc"), columns=list("ABC") + ) + index_df = DataFrame(1, index=list("ab"), columns=list("AB")) + result = df[index_df.notnull()] + expected = DataFrame( + np.array([[0.0, 1.0, np.nan], [3.0, 4.0, np.nan], [np.nan] * 3]), + index=list("abc"), + columns=list("ABC"), + ) + tm.assert_frame_equal(result, expected) + + def test_no_reference_cycle(self): + df = DataFrame({"a": [0, 1], "b": [2, 3]}) + for name in ("loc", "iloc", "at", "iat"): + getattr(df, name) + wr = weakref.ref(df) + del df + assert wr() is None + + +class TestSeriesNoneCoercion: + EXPECTED_RESULTS = [ + # For numeric series, we should coerce to NaN. + ([1, 2, 3], [np.nan, 2, 3]), + ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]), + # For datetime series, we should coerce to NaT. + ( + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)], + ), + # For objects, we should preserve the None value. + (["foo", "bar", "baz"], [None, "bar", "baz"]), + ] + + def test_coercion_with_setitem(self): + for start_data, expected_result in self.EXPECTED_RESULTS: + start_series = Series(start_data) + start_series[0] = None + + expected_series = Series(expected_result) + tm.assert_series_equal(start_series, expected_series) + + def test_coercion_with_loc_setitem(self): + for start_data, expected_result in self.EXPECTED_RESULTS: + start_series = Series(start_data) + start_series.loc[0] = None + + expected_series = Series(expected_result) + tm.assert_series_equal(start_series, expected_series) + + def test_coercion_with_setitem_and_series(self): + for start_data, expected_result in self.EXPECTED_RESULTS: + start_series = Series(start_data) + start_series[start_series == start_series[0]] = None + + expected_series = Series(expected_result) + tm.assert_series_equal(start_series, expected_series) + + def test_coercion_with_loc_and_series(self): + for start_data, expected_result in self.EXPECTED_RESULTS: + start_series = Series(start_data) + start_series.loc[start_series == start_series[0]] = None + + expected_series = Series(expected_result) + tm.assert_series_equal(start_series, expected_series) + + +class TestDataframeNoneCoercion: + EXPECTED_SINGLE_ROW_RESULTS = [ + # For numeric series, we should coerce to NaN. + ([1, 2, 3], [np.nan, 2, 3]), + ([1.0, 2.0, 3.0], [np.nan, 2.0, 3.0]), + # For datetime series, we should coerce to NaT. + ( + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)], + ), + # For objects, we should preserve the None value. + (["foo", "bar", "baz"], [None, "bar", "baz"]), + ] + + def test_coercion_with_loc(self): + for start_data, expected_result in self.EXPECTED_SINGLE_ROW_RESULTS: + start_dataframe = DataFrame({"foo": start_data}) + start_dataframe.loc[0, ["foo"]] = None + + expected_dataframe = DataFrame({"foo": expected_result}) + tm.assert_frame_equal(start_dataframe, expected_dataframe) + + def test_coercion_with_setitem_and_dataframe(self): + for start_data, expected_result in self.EXPECTED_SINGLE_ROW_RESULTS: + start_dataframe = DataFrame({"foo": start_data}) + start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None + + expected_dataframe = DataFrame({"foo": expected_result}) + tm.assert_frame_equal(start_dataframe, expected_dataframe) + + def test_none_coercion_loc_and_dataframe(self): + for start_data, expected_result in self.EXPECTED_SINGLE_ROW_RESULTS: + start_dataframe = DataFrame({"foo": start_data}) + start_dataframe.loc[ + start_dataframe["foo"] == start_dataframe["foo"][0] + ] = None + + expected_dataframe = DataFrame({"foo": expected_result}) + tm.assert_frame_equal(start_dataframe, expected_dataframe) + + def test_none_coercion_mixed_dtypes(self): + start_dataframe = DataFrame( + { + "a": [1, 2, 3], + "b": [1.0, 2.0, 3.0], + "c": [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], + "d": ["a", "b", "c"], + } + ) + start_dataframe.iloc[0] = None + + exp = DataFrame( + { + "a": [np.nan, 2, 3], + "b": [np.nan, 2.0, 3.0], + "c": [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)], + "d": [None, "b", "c"], + } + ) + tm.assert_frame_equal(start_dataframe, exp) + + +def test_validate_indices_ok(): + indices = np.asarray([0, 1]) + validate_indices(indices, 2) + validate_indices(indices[:0], 0) + validate_indices(np.array([-1, -1]), 0) + + +def test_validate_indices_low(): + indices = np.asarray([0, -2]) + with pytest.raises(ValueError, match="'indices' contains"): + validate_indices(indices, 2) + + +def test_validate_indices_high(): + indices = np.asarray([0, 1, 2]) + with pytest.raises(IndexError, match="indices are out"): + validate_indices(indices, 2) + + +def test_validate_indices_empty(): + with pytest.raises(IndexError, match="indices are out"): + validate_indices(np.array([0, 1]), 0) + + +def test_extension_array_cross_section(): + # A cross-section of a homogeneous EA should be an EA + df = pd.DataFrame( + { + "A": pd.core.arrays.integer_array([1, 2]), + "B": pd.core.arrays.integer_array([3, 4]), + }, + index=["a", "b"], + ) + expected = pd.Series( + pd.core.arrays.integer_array([1, 3]), index=["A", "B"], name="a" + ) + result = df.loc["a"] + tm.assert_series_equal(result, expected) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + +def test_extension_array_cross_section_converts(): + df = pd.DataFrame( + {"A": pd.core.arrays.integer_array([1, 2]), "B": np.array([1, 2])}, + index=["a", "b"], + ) + result = df.loc["a"] + expected = pd.Series([1, 1], dtype=object, index=["A", "B"], name="a") + tm.assert_series_equal(result, expected) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "idxr, error, error_message", + [ + (lambda x: x, AbstractMethodError, None), + ( + lambda x: x.loc, + AttributeError, + "type object 'NDFrame' has no attribute '_AXIS_NAMES'", + ), + ( + lambda x: x.iloc, + AttributeError, + "type object 'NDFrame' has no attribute '_AXIS_NAMES'", + ), + ], +) +def test_ndframe_indexing_raises(idxr, error, error_message): + # GH 25567 + frame = NDFrame(np.random.randint(5, size=(2, 2, 2))) + with pytest.raises(error, match=error_message): + idxr(frame)[0] + + +def test_readonly_indices(): + # GH#17192 iloc with read-only array raising TypeError + df = pd.DataFrame({"data": np.ones(100, dtype="float64")}) + indices = np.array([1, 3, 6]) + indices.flags.writeable = False + + result = df.iloc[indices] + expected = df.loc[[1, 3, 6]] + tm.assert_frame_equal(result, expected) + + result = df["data"].iloc[indices] + expected = df["data"].loc[[1, 3, 6]] + tm.assert_series_equal(result, expected) + + +def test_1tuple_without_multiindex(): + ser = pd.Series(range(5)) + key = (slice(3),) + + result = ser[key] + expected = ser[key[0]] + tm.assert_series_equal(result, expected) + + +def test_duplicate_index_mistyped_key_raises_keyerror(): + # GH#29189 float_index.get_loc(None) should raise KeyError, not TypeError + ser = pd.Series([2, 5, 6, 8], index=[2.0, 4.0, 4.0, 5.0]) + with pytest.raises(KeyError): + ser[None] + + with pytest.raises(KeyError): + ser.index.get_loc(None) + + with pytest.raises(KeyError): + ser.index._engine.get_loc(None) + + +def test_setitem_with_bool_mask_and_values_matching_n_trues_in_length(): + # GH 30567 + ser = pd.Series([None] * 10) + mask = [False] * 3 + [True] * 5 + [False] * 2 + ser[mask] = range(5) + result = ser + expected = pd.Series([None] * 3 + list(range(5)) + [None] * 2).astype("object") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_indexing_engines.py b/pandas/tests/indexing/test_indexing_engines.py new file mode 100644 index 00000000..edb5d7d7 --- /dev/null +++ b/pandas/tests/indexing/test_indexing_engines.py @@ -0,0 +1,163 @@ +import numpy as np + +from pandas._libs import algos as libalgos, index as libindex + +import pandas._testing as tm + + +class TestNumericEngine: + def test_is_monotonic(self, numeric_indexing_engine_type_and_dtype): + engine_type, dtype = numeric_indexing_engine_type_and_dtype + num = 1000 + arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype) + + # monotonic increasing + engine = engine_type(lambda: arr, len(arr)) + assert engine.is_monotonic_increasing is True + assert engine.is_monotonic_decreasing is False + + # monotonic decreasing + engine = engine_type(lambda: arr[::-1], len(arr)) + assert engine.is_monotonic_increasing is False + assert engine.is_monotonic_decreasing is True + + # neither monotonic increasing or decreasing + arr = np.array([1] * num + [2] * num + [1] * num, dtype=dtype) + engine = engine_type(lambda: arr[::-1], len(arr)) + assert engine.is_monotonic_increasing is False + assert engine.is_monotonic_decreasing is False + + def test_is_unique(self, numeric_indexing_engine_type_and_dtype): + engine_type, dtype = numeric_indexing_engine_type_and_dtype + + # unique + arr = np.array([1, 3, 2], dtype=dtype) + engine = engine_type(lambda: arr, len(arr)) + assert engine.is_unique is True + + # not unique + arr = np.array([1, 2, 1], dtype=dtype) + engine = engine_type(lambda: arr, len(arr)) + assert engine.is_unique is False + + def test_get_loc(self, numeric_indexing_engine_type_and_dtype): + engine_type, dtype = numeric_indexing_engine_type_and_dtype + + # unique + arr = np.array([1, 2, 3], dtype=dtype) + engine = engine_type(lambda: arr, len(arr)) + assert engine.get_loc(2) == 1 + + # monotonic + num = 1000 + arr = np.array([1] * num + [2] * num + [3] * num, dtype=dtype) + engine = engine_type(lambda: arr, len(arr)) + assert engine.get_loc(2) == slice(1000, 2000) + + # not monotonic + arr = np.array([1, 2, 3] * num, dtype=dtype) + engine = engine_type(lambda: arr, len(arr)) + expected = np.array([False, True, False] * num, dtype=bool) + result = engine.get_loc(2) + assert (result == expected).all() + + def test_get_backfill_indexer(self, numeric_indexing_engine_type_and_dtype): + engine_type, dtype = numeric_indexing_engine_type_and_dtype + + arr = np.array([1, 5, 10], dtype=dtype) + engine = engine_type(lambda: arr, len(arr)) + + new = np.arange(12, dtype=dtype) + result = engine.get_backfill_indexer(new) + + expected = libalgos.backfill(arr, new) + tm.assert_numpy_array_equal(result, expected) + + def test_get_pad_indexer(self, numeric_indexing_engine_type_and_dtype): + engine_type, dtype = numeric_indexing_engine_type_and_dtype + + arr = np.array([1, 5, 10], dtype=dtype) + engine = engine_type(lambda: arr, len(arr)) + + new = np.arange(12, dtype=dtype) + result = engine.get_pad_indexer(new) + + expected = libalgos.pad(arr, new) + tm.assert_numpy_array_equal(result, expected) + + +class TestObjectEngine: + engine_type = libindex.ObjectEngine + dtype = np.object_ + values = list("abc") + + def test_is_monotonic(self): + + num = 1000 + arr = np.array(["a"] * num + ["a"] * num + ["c"] * num, dtype=self.dtype) + + # monotonic increasing + engine = self.engine_type(lambda: arr, len(arr)) + assert engine.is_monotonic_increasing is True + assert engine.is_monotonic_decreasing is False + + # monotonic decreasing + engine = self.engine_type(lambda: arr[::-1], len(arr)) + assert engine.is_monotonic_increasing is False + assert engine.is_monotonic_decreasing is True + + # neither monotonic increasing or decreasing + arr = np.array(["a"] * num + ["b"] * num + ["a"] * num, dtype=self.dtype) + engine = self.engine_type(lambda: arr[::-1], len(arr)) + assert engine.is_monotonic_increasing is False + assert engine.is_monotonic_decreasing is False + + def test_is_unique(self): + # unique + arr = np.array(self.values, dtype=self.dtype) + engine = self.engine_type(lambda: arr, len(arr)) + assert engine.is_unique is True + + # not unique + arr = np.array(["a", "b", "a"], dtype=self.dtype) + engine = self.engine_type(lambda: arr, len(arr)) + assert engine.is_unique is False + + def test_get_loc(self): + # unique + arr = np.array(self.values, dtype=self.dtype) + engine = self.engine_type(lambda: arr, len(arr)) + assert engine.get_loc("b") == 1 + + # monotonic + num = 1000 + arr = np.array(["a"] * num + ["b"] * num + ["c"] * num, dtype=self.dtype) + engine = self.engine_type(lambda: arr, len(arr)) + assert engine.get_loc("b") == slice(1000, 2000) + + # not monotonic + arr = np.array(self.values * num, dtype=self.dtype) + engine = self.engine_type(lambda: arr, len(arr)) + expected = np.array([False, True, False] * num, dtype=bool) + result = engine.get_loc("b") + assert (result == expected).all() + + def test_get_backfill_indexer(self): + arr = np.array(["a", "e", "j"], dtype=self.dtype) + engine = self.engine_type(lambda: arr, len(arr)) + + new = np.array(list("abcdefghij"), dtype=self.dtype) + result = engine.get_backfill_indexer(new) + + expected = libalgos.backfill["object"](arr, new) + tm.assert_numpy_array_equal(result, expected) + + def test_get_pad_indexer(self): + arr = np.array(["a", "e", "j"], dtype=self.dtype) + engine = self.engine_type(lambda: arr, len(arr)) + + new = np.array(list("abcdefghij"), dtype=self.dtype) + result = engine.get_pad_indexer(new) + + expected = libalgos.pad["object"](arr, new) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py new file mode 100644 index 00000000..2ffa44be --- /dev/null +++ b/pandas/tests/indexing/test_indexing_slow.py @@ -0,0 +1,14 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +class TestIndexingSlow: + @pytest.mark.slow + def test_large_dataframe_indexing(self): + # GH10692 + result = DataFrame({"x": range(10 ** 6)}, dtype="int64") + result.loc[len(result)] = len(result) + 1 + expected = DataFrame({"x": range(10 ** 6 + 1)}, dtype="int64") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py new file mode 100644 index 00000000..e5930b25 --- /dev/null +++ b/pandas/tests/indexing/test_loc.py @@ -0,0 +1,1004 @@ +""" test label based indexing with loc """ +from io import StringIO +import re + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series, Timestamp, date_range +import pandas._testing as tm +from pandas.api.types import is_scalar +from pandas.tests.indexing.common import Base + + +class TestLoc(Base): + def test_loc_getitem_dups(self): + # GH 5678 + # repeated getitems on a dup index returning a ndarray + df = DataFrame( + np.random.random_sample((20, 5)), index=["ABCDE"[x % 5] for x in range(20)] + ) + expected = df.loc["A", 0] + result = df.loc[:, 0].loc["A"] + tm.assert_series_equal(result, expected) + + def test_loc_getitem_dups2(self): + + # GH4726 + # dup indexing with iloc/loc + df = DataFrame( + [[1, 2, "foo", "bar", Timestamp("20130101")]], + columns=["a", "a", "a", "a", "a"], + index=[1], + ) + expected = Series( + [1, 2, "foo", "bar", Timestamp("20130101")], + index=["a", "a", "a", "a", "a"], + name=1, + ) + + result = df.iloc[0] + tm.assert_series_equal(result, expected) + + result = df.loc[1] + tm.assert_series_equal(result, expected) + + def test_loc_setitem_dups(self): + + # GH 6541 + df_orig = DataFrame( + { + "me": list("rttti"), + "foo": list("aaade"), + "bar": np.arange(5, dtype="float64") * 1.34 + 2, + "bar2": np.arange(5, dtype="float64") * -0.34 + 2, + } + ).set_index("me") + + indexer = tuple(["r", ["bar", "bar2"]]) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + indexer = tuple(["r", "bar"]) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + assert df.loc[indexer] == 2.0 * df_orig.loc[indexer] + + indexer = tuple(["t", ["bar", "bar2"]]) + df = df_orig.copy() + df.loc[indexer] *= 2.0 + tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) + + def test_loc_setitem_slice(self): + # GH10503 + + # assigning the same type should not change the type + df1 = DataFrame({"a": [0, 1, 1], "b": Series([100, 200, 300], dtype="uint32")}) + ix = df1["a"] == 1 + newb1 = df1.loc[ix, "b"] + 1 + df1.loc[ix, "b"] = newb1 + expected = DataFrame( + {"a": [0, 1, 1], "b": Series([100, 201, 301], dtype="uint32")} + ) + tm.assert_frame_equal(df1, expected) + + # assigning a new type should get the inferred type + df2 = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") + ix = df1["a"] == 1 + newb2 = df2.loc[ix, "b"] + df1.loc[ix, "b"] = newb2 + expected = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") + tm.assert_frame_equal(df2, expected) + + def test_loc_getitem_int(self): + + # int label + self.check_result("loc", 2, "loc", 2, typs=["label"], fails=KeyError) + + def test_loc_getitem_label(self): + + # label + self.check_result("loc", "c", "loc", "c", typs=["empty"], fails=KeyError) + + def test_loc_getitem_label_out_of_range(self): + + # out of range label + self.check_result( + "loc", + "f", + "loc", + "f", + typs=["ints", "uints", "labels", "mixed", "ts"], + fails=KeyError, + ) + self.check_result("loc", "f", "ix", "f", typs=["floats"], fails=KeyError) + self.check_result("loc", "f", "loc", "f", typs=["floats"], fails=KeyError) + self.check_result( + "loc", 20, "loc", 20, typs=["ints", "uints", "mixed"], fails=KeyError, + ) + self.check_result("loc", 20, "loc", 20, typs=["labels"], fails=TypeError) + self.check_result("loc", 20, "loc", 20, typs=["ts"], axes=0, fails=TypeError) + self.check_result("loc", 20, "loc", 20, typs=["floats"], axes=0, fails=KeyError) + + def test_loc_getitem_label_list(self): + # TODO: test something here? + # list of labels + pass + + def test_loc_getitem_label_list_with_missing(self): + self.check_result( + "loc", [0, 1, 2], "loc", [0, 1, 2], typs=["empty"], fails=KeyError, + ) + self.check_result( + "loc", + [0, 2, 10], + "ix", + [0, 2, 10], + typs=["ints", "uints", "floats"], + axes=0, + fails=KeyError, + ) + + self.check_result( + "loc", + [3, 6, 7], + "ix", + [3, 6, 7], + typs=["ints", "uints", "floats"], + axes=1, + fails=KeyError, + ) + + # GH 17758 - MultiIndex and missing keys + self.check_result( + "loc", + [(1, 3), (1, 4), (2, 5)], + "ix", + [(1, 3), (1, 4), (2, 5)], + typs=["multi"], + axes=0, + fails=KeyError, + ) + + def test_getitem_label_list_with_missing(self): + s = Series(range(3), index=["a", "b", "c"]) + + # consistency + with pytest.raises(KeyError, match="with any missing labels"): + s[["a", "d"]] + + s = Series(range(3)) + with pytest.raises(KeyError, match="with any missing labels"): + s[[0, 3]] + + def test_loc_getitem_label_list_fails(self): + # fails + self.check_result( + "loc", + [20, 30, 40], + "loc", + [20, 30, 40], + typs=["ints", "uints"], + axes=1, + fails=KeyError, + ) + + def test_loc_getitem_label_array_like(self): + # TODO: test something? + # array like + pass + + def test_loc_getitem_bool(self): + # boolean indexers + b = [True, False, True, False] + + self.check_result("loc", b, "loc", b, typs=["empty"], fails=IndexError) + + @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]]) + def test_loc_getitem_bool_diff_len(self, index): + # GH26658 + s = Series([1, 2, 3]) + msg = "Boolean index has wrong length: {} instead of {}".format( + len(index), len(s) + ) + with pytest.raises(IndexError, match=msg): + _ = s.loc[index] + + def test_loc_getitem_int_slice(self): + # TODO: test something here? + pass + + def test_loc_to_fail(self): + + # GH3449 + df = DataFrame( + np.random.random((3, 3)), index=["a", "b", "c"], columns=["e", "f", "g"] + ) + + # raise a KeyError? + msg = ( + r"\"None of \[Int64Index\(\[1, 2\], dtype='int64'\)\] are" + r" in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + df.loc[[1, 2], [1, 2]] + + # GH 7496 + # loc should not fallback + + s = Series(dtype=object) + s.loc[1] = 1 + s.loc["a"] = 2 + + with pytest.raises(KeyError, match=r"^-1$"): + s.loc[-1] + + msg = ( + r"\"None of \[Int64Index\(\[-1, -2\], dtype='int64'\)\] are" + r" in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + s.loc[[-1, -2]] + + msg = r"\"None of \[Index\(\['4'\], dtype='object'\)\] are in the \[index\]\"" + with pytest.raises(KeyError, match=msg): + s.loc[["4"]] + + s.loc[-1] = 3 + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[-1, -2]] + + s["a"] = 2 + msg = ( + r"\"None of \[Int64Index\(\[-2\], dtype='int64'\)\] are" + r" in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + s.loc[[-2]] + + del s["a"] + + with pytest.raises(KeyError, match=msg): + s.loc[[-2]] = 0 + + # inconsistency between .loc[values] and .loc[values,:] + # GH 7999 + df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"]) + + msg = ( + r"\"None of \[Int64Index\(\[3\], dtype='int64'\)\] are" + r" in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + df.loc[[3], :] + + with pytest.raises(KeyError, match=msg): + df.loc[[3]] + + def test_loc_getitem_list_with_fail(self): + # 15747 + # should KeyError if *any* missing labels + + s = Series([1, 2, 3]) + + s.loc[[2]] + + with pytest.raises( + KeyError, + match=re.escape( + "\"None of [Int64Index([3], dtype='int64')] are in the [index]\"" + ), + ): + s.loc[[3]] + + # a non-match and a match + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[2, 3]] + + def test_loc_getitem_label_slice(self): + + # label slices (with ints) + + # real label slices + + # GH 14316 + + self.check_result( + "loc", + slice(1, 3), + "loc", + slice(1, 3), + typs=["labels", "mixed", "empty", "ts", "floats"], + fails=TypeError, + ) + + self.check_result( + "loc", + slice("20130102", "20130104"), + "loc", + slice("20130102", "20130104"), + typs=["ts"], + axes=1, + fails=TypeError, + ) + + self.check_result( + "loc", + slice(2, 8), + "loc", + slice(2, 8), + typs=["mixed"], + axes=0, + fails=TypeError, + ) + self.check_result( + "loc", + slice(2, 8), + "loc", + slice(2, 8), + typs=["mixed"], + axes=1, + fails=KeyError, + ) + + self.check_result( + "loc", + slice(2, 4, 2), + "loc", + slice(2, 4, 2), + typs=["mixed"], + axes=0, + fails=TypeError, + ) + + def test_loc_index(self): + # gh-17131 + # a boolean index should index like a boolean numpy array + + df = DataFrame( + np.random.random(size=(5, 10)), + index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"], + ) + + mask = df.index.map(lambda x: "alpha" in x) + expected = df.loc[np.array(mask)] + + result = df.loc[mask] + tm.assert_frame_equal(result, expected) + + result = df.loc[mask.values] + tm.assert_frame_equal(result, expected) + + result = df.loc[pd.array(mask, dtype="boolean")] + tm.assert_frame_equal(result, expected) + + def test_loc_general(self): + + df = DataFrame( + np.random.rand(4, 4), + columns=["A", "B", "C", "D"], + index=["A", "B", "C", "D"], + ) + + # want this to work + result = df.loc[:, "A":"B"].iloc[0:2, :] + assert (result.columns == ["A", "B"]).all() + assert (result.index == ["A", "B"]).all() + + # mixed type + result = DataFrame({"a": [Timestamp("20130101")], "b": [1]}).iloc[0] + expected = Series([Timestamp("20130101"), 1], index=["a", "b"], name=0) + tm.assert_series_equal(result, expected) + assert result.dtype == object + + def test_loc_setitem_consistency(self): + # GH 6149 + # coerce similarly for setitem and loc when rows have a null-slice + expected = DataFrame( + { + "date": Series(0, index=range(5), dtype=np.int64), + "val": Series(range(5), dtype=np.int64), + } + ) + + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + df.loc[:, "date"] = 0 + tm.assert_frame_equal(df, expected) + + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + df.loc[:, "date"] = np.array(0, dtype=np.int64) + tm.assert_frame_equal(df, expected) + + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + df.loc[:, "date"] = np.array([0, 0, 0, 0, 0], dtype=np.int64) + tm.assert_frame_equal(df, expected) + + expected = DataFrame( + { + "date": Series("foo", index=range(5)), + "val": Series(range(5), dtype=np.int64), + } + ) + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + df.loc[:, "date"] = "foo" + tm.assert_frame_equal(df, expected) + + expected = DataFrame( + { + "date": Series(1.0, index=range(5)), + "val": Series(range(5), dtype=np.int64), + } + ) + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + df.loc[:, "date"] = 1.0 + tm.assert_frame_equal(df, expected) + + # GH 15494 + # setting on frame with single row + df = DataFrame({"date": Series([Timestamp("20180101")])}) + df.loc[:, "date"] = "string" + expected = DataFrame({"date": Series(["string"])}) + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_empty(self): + # empty (essentially noops) + expected = DataFrame(columns=["x", "y"]) + expected["x"] = expected["x"].astype(np.int64) + df = DataFrame(columns=["x", "y"]) + df.loc[:, "x"] = 1 + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=["x", "y"]) + df["x"] = 1 + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_consistency_slice_column_len(self): + # .loc[:,column] setting with slice == len of the column + # GH10408 + data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat +Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse +Region,Site,RespondentID,,,,, +Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes, +Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes +Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, +Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" + + df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) + df.loc[:, ("Respondent", "StartDate")] = pd.to_datetime( + df.loc[:, ("Respondent", "StartDate")] + ) + df.loc[:, ("Respondent", "EndDate")] = pd.to_datetime( + df.loc[:, ("Respondent", "EndDate")] + ) + df.loc[:, ("Respondent", "Duration")] = ( + df.loc[:, ("Respondent", "EndDate")] + - df.loc[:, ("Respondent", "StartDate")] + ) + + df.loc[:, ("Respondent", "Duration")] = df.loc[ + :, ("Respondent", "Duration") + ].astype("timedelta64[s]") + expected = Series( + [1380, 720, 840, 2160.0], index=df.index, name=("Respondent", "Duration") + ) + tm.assert_series_equal(df[("Respondent", "Duration")], expected) + + @pytest.mark.parametrize("unit", ["Y", "M", "D", "h", "m", "s", "ms", "us"]) + def test_loc_assign_non_ns_datetime(self, unit): + # GH 27395, non-ns dtype assignment via .loc should work + # and return the same result when using simple assignment + df = DataFrame( + { + "timestamp": [ + np.datetime64("2017-02-11 12:41:29"), + np.datetime64("1991-11-07 04:22:37"), + ] + } + ) + + df.loc[:, unit] = df.loc[:, "timestamp"].values.astype( + "datetime64[{unit}]".format(unit=unit) + ) + df["expected"] = df.loc[:, "timestamp"].values.astype( + "datetime64[{unit}]".format(unit=unit) + ) + expected = Series(df.loc[:, "expected"], name=unit) + tm.assert_series_equal(df.loc[:, unit], expected) + + def test_loc_modify_datetime(self): + # see gh-28837 + df = DataFrame.from_dict( + {"date": [1485264372711, 1485265925110, 1540215845888, 1540282121025]} + ) + + df["date_dt"] = pd.to_datetime(df["date"], unit="ms", cache=True) + + df.loc[:, "date_dt_cp"] = df.loc[:, "date_dt"] + df.loc[[2, 3], "date_dt_cp"] = df.loc[[2, 3], "date_dt"] + + expected = DataFrame( + [ + [1485264372711, "2017-01-24 13:26:12.711", "2017-01-24 13:26:12.711"], + [1485265925110, "2017-01-24 13:52:05.110", "2017-01-24 13:52:05.110"], + [1540215845888, "2018-10-22 13:44:05.888", "2018-10-22 13:44:05.888"], + [1540282121025, "2018-10-23 08:08:41.025", "2018-10-23 08:08:41.025"], + ], + columns=["date", "date_dt", "date_dt_cp"], + ) + + columns = ["date_dt", "date_dt_cp"] + expected[columns] = expected[columns].apply(pd.to_datetime) + + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame(self): + df = self.frame_labels + + result = df.iloc[0, 0] + + df.loc["a", "A"] = 1 + result = df.loc["a", "A"] + assert result == 1 + + result = df.iloc[0, 0] + assert result == 1 + + df.loc[:, "B":"D"] = 0 + expected = df.loc[:, "B":"D"] + result = df.iloc[:, 1:] + tm.assert_frame_equal(result, expected) + + # GH 6254 + # setting issue + df = DataFrame(index=[3, 5, 4], columns=["A"]) + df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") + expected = DataFrame(dict(A=Series([1, 2, 3], index=[4, 3, 5]))).reindex( + index=[3, 5, 4] + ) + tm.assert_frame_equal(df, expected) + + # GH 6252 + # setting with an empty frame + keys1 = ["@" + str(i) for i in range(5)] + val1 = np.arange(5, dtype="int64") + + keys2 = ["@" + str(i) for i in range(4)] + val2 = np.arange(4, dtype="int64") + + index = list(set(keys1).union(keys2)) + df = DataFrame(index=index) + df["A"] = np.nan + df.loc[keys1, "A"] = val1 + + df["B"] = np.nan + df.loc[keys2, "B"] = val2 + + expected = DataFrame( + dict(A=Series(val1, index=keys1), B=Series(val2, index=keys2)) + ).reindex(index=index) + tm.assert_frame_equal(df, expected) + + # GH 8669 + # invalid coercion of nan -> int + df = DataFrame({"A": [1, 2, 3], "B": np.nan}) + df.loc[df.B > df.A, "B"] = df.A + expected = DataFrame({"A": [1, 2, 3], "B": np.nan}) + tm.assert_frame_equal(df, expected) + + # GH 6546 + # setting with mixed labels + df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]}) + + result = df.loc[0, [1, 2]] + expected = Series([1, 3], index=[1, 2], dtype=object, name=0) + tm.assert_series_equal(result, expected) + + expected = DataFrame({1: [5, 2], 2: [6, 4], "a": ["a", "b"]}) + df.loc[0, [1, 2]] = [5, 6] + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_frame_multiples(self): + # multiple setting + df = DataFrame( + {"A": ["foo", "bar", "baz"], "B": Series(range(3), dtype=np.int64)} + ) + rhs = df.loc[1:2] + rhs.index = df.index[0:2] + df.loc[0:1] = rhs + expected = DataFrame( + {"A": ["bar", "baz", "baz"], "B": Series([1, 2, 2], dtype=np.int64)} + ) + tm.assert_frame_equal(df, expected) + + # multiple setting with frame on rhs (with M8) + df = DataFrame( + { + "date": date_range("2000-01-01", "2000-01-5"), + "val": Series(range(5), dtype=np.int64), + } + ) + expected = DataFrame( + { + "date": [ + Timestamp("20000101"), + Timestamp("20000102"), + Timestamp("20000101"), + Timestamp("20000102"), + Timestamp("20000103"), + ], + "val": Series([0, 1, 0, 1, 2], dtype=np.int64), + } + ) + rhs = df.loc[0:2] + rhs.index = df.index[2:5] + df.loc[2:4] = rhs + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "indexer", [["A"], slice(None, "A", None), np.array(["A"])] + ) + @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + def test_loc_setitem_with_scalar_index(self, indexer, value): + # GH #19474 + # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated + # elementwisely, not using "setter('A', ['Z'])". + + df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + df.loc[0, indexer] = value + result = df.loc[0, "A"] + + assert is_scalar(result) and result == "Z" + + def test_loc_coercion(self): + + # 12411 + df = DataFrame({"date": [Timestamp("20130101").tz_localize("UTC"), pd.NaT]}) + expected = df.dtypes + + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) + + # 12045 + import datetime + + df = DataFrame( + {"date": [datetime.datetime(2012, 1, 1), datetime.datetime(1012, 1, 2)]} + ) + expected = df.dtypes + + result = df.iloc[[0]] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[[1]] + tm.assert_series_equal(result.dtypes, expected) + + # 11594 + df = DataFrame({"text": ["some words"] + [None] * 9}) + expected = df.dtypes + + result = df.iloc[0:2] + tm.assert_series_equal(result.dtypes, expected) + + result = df.iloc[3:] + tm.assert_series_equal(result.dtypes, expected) + + def test_setitem_new_key_tz(self): + # GH#12862 should not raise on assigning the second value + vals = [ + pd.to_datetime(42).tz_localize("UTC"), + pd.to_datetime(666).tz_localize("UTC"), + ] + expected = pd.Series(vals, index=["foo", "bar"]) + + ser = pd.Series(dtype=object) + ser["foo"] = vals[0] + ser["bar"] = vals[1] + + tm.assert_series_equal(ser, expected) + + ser = pd.Series(dtype=object) + ser.loc["foo"] = vals[0] + ser.loc["bar"] = vals[1] + + tm.assert_series_equal(ser, expected) + + def test_loc_non_unique(self): + # GH3659 + # non-unique indexer with loc slice + # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs + + # these are going to raise because the we are non monotonic + df = DataFrame( + {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3] + ) + msg = "'Cannot get left slice bound for non-unique label: 1'" + with pytest.raises(KeyError, match=msg): + df.loc[1:] + msg = "'Cannot get left slice bound for non-unique label: 0'" + with pytest.raises(KeyError, match=msg): + df.loc[0:] + msg = "'Cannot get left slice bound for non-unique label: 1'" + with pytest.raises(KeyError, match=msg): + df.loc[1:2] + + # monotonic are ok + df = DataFrame( + {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3] + ).sort_index(axis=0) + result = df.loc[1:] + expected = DataFrame({"A": [2, 4, 5, 6], "B": [4, 6, 7, 8]}, index=[1, 1, 2, 3]) + tm.assert_frame_equal(result, expected) + + result = df.loc[0:] + tm.assert_frame_equal(result, df) + + result = df.loc[1:2] + expected = DataFrame({"A": [2, 4, 5], "B": [4, 6, 7]}, index=[1, 1, 2]) + tm.assert_frame_equal(result, expected) + + def test_loc_non_unique_memory_error(self): + + # GH 4280 + # non_unique index with a large selection triggers a memory error + + columns = list("ABCDEFG") + + def gen_test(l, l2): + return pd.concat( + [ + DataFrame( + np.random.randn(l, len(columns)), + index=np.arange(l), + columns=columns, + ), + DataFrame( + np.ones((l2, len(columns))), index=[0] * l2, columns=columns + ), + ] + ) + + def gen_expected(df, mask): + len_mask = len(mask) + return pd.concat( + [ + df.take([0]), + DataFrame( + np.ones((len_mask, len(columns))), + index=[0] * len_mask, + columns=columns, + ), + df.take(mask[1:]), + ] + ) + + df = gen_test(900, 100) + assert df.index.is_unique is False + + mask = np.arange(100) + result = df.loc[mask] + expected = gen_expected(df, mask) + tm.assert_frame_equal(result, expected) + + df = gen_test(900000, 100000) + assert df.index.is_unique is False + + mask = np.arange(100000) + result = df.loc[mask] + expected = gen_expected(df, mask) + tm.assert_frame_equal(result, expected) + + def test_loc_name(self): + # GH 3880 + df = DataFrame([[1, 1], [1, 1]]) + df.index.name = "index_name" + result = df.iloc[[0, 1]].index.name + assert result == "index_name" + + result = df.loc[[0, 1]].index.name + assert result == "index_name" + + def test_loc_empty_list_indexer_is_ok(self): + + df = tm.makeCustomDataframe(5, 2) + # vertical empty + tm.assert_frame_equal( + df.loc[:, []], df.iloc[:, :0], check_index_type=True, check_column_type=True + ) + # horizontal empty + tm.assert_frame_equal( + df.loc[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True + ) + # horizontal empty + tm.assert_frame_equal( + df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True + ) + + def test_identity_slice_returns_new_object(self): + # GH13873 + original_df = DataFrame({"a": [1, 2, 3]}) + sliced_df = original_df.loc[:] + assert sliced_df is not original_df + assert original_df[:] is not original_df + + # should be a shallow copy + original_df["a"] = [4, 4, 4] + assert (sliced_df["a"] == 4).all() + + # These should not return copies + assert original_df is original_df.loc[:, :] + df = DataFrame(np.random.randn(10, 4)) + assert df[0] is df.loc[:, 0] + + # Same tests for Series + original_series = Series([1, 2, 3, 4, 5, 6]) + sliced_series = original_series.loc[:] + assert sliced_series is not original_series + assert original_series[:] is not original_series + + original_series[:3] = [7, 8, 9] + assert all(sliced_series[:3] == [7, 8, 9]) + + def test_loc_uint64(self): + # GH20722 + # Test whether loc accept uint64 max value as index. + s = pd.Series( + [1, 2], index=[np.iinfo("uint64").max - 1, np.iinfo("uint64").max] + ) + + result = s.loc[np.iinfo("uint64").max - 1] + expected = s.iloc[0] + assert result == expected + + result = s.loc[[np.iinfo("uint64").max - 1]] + expected = s.iloc[[0]] + tm.assert_series_equal(result, expected) + + result = s.loc[[np.iinfo("uint64").max - 1, np.iinfo("uint64").max]] + tm.assert_series_equal(result, s) + + def test_loc_setitem_empty_append(self): + # GH6173, various appends to an empty dataframe + + data = [1, 2, 3] + expected = DataFrame({"x": data, "y": [None] * len(data)}) + + # appends to fit length of data + df = DataFrame(columns=["x", "y"]) + df.loc[:, "x"] = data + tm.assert_frame_equal(df, expected) + + # only appends one value + expected = DataFrame({"x": [1.0], "y": [np.nan]}) + df = DataFrame(columns=["x", "y"], dtype=np.float) + df.loc[0, "x"] = expected.loc[0, "x"] + tm.assert_frame_equal(df, expected) + + def test_loc_setitem_empty_append_raises(self): + # GH6173, various appends to an empty dataframe + + data = [1, 2] + df = DataFrame(columns=["x", "y"]) + msg = ( + r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] " + r"are in the \[index\]" + ) + with pytest.raises(KeyError, match=msg): + df.loc[[0, 1], "x"] = data + + msg = "cannot copy sequence with size 2 to array axis with dimension 0" + with pytest.raises(ValueError, match=msg): + df.loc[0:2, "x"] = data + + def test_indexing_zerodim_np_array(self): + # GH24924 + df = DataFrame([[1, 2], [3, 4]]) + result = df.loc[np.array(0)] + s = pd.Series([1, 2], name=0) + tm.assert_series_equal(result, s) + + def test_series_indexing_zerodim_np_array(self): + # GH24924 + s = Series([1, 2]) + result = s.loc[np.array(0)] + assert result == 1 + + def test_loc_reverse_assignment(self): + # GH26939 + data = [1, 2, 3, 4, 5, 6] + [None] * 4 + expected = Series(data, index=range(2010, 2020)) + + result = pd.Series(index=range(2010, 2020), dtype=np.float64) + result.loc[2015:2010:-1] = [6, 5, 4, 3, 2, 1] + + tm.assert_series_equal(result, expected) + + +def test_series_loc_getitem_label_list_missing_values(): + # gh-11428 + key = np.array( + ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64" + ) + s = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4)) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[key] + + +@pytest.mark.parametrize( + "columns, column_key, expected_columns, check_column_type", + [ + ([2011, 2012, 2013], [2011, 2012], [0, 1], True), + ([2011, 2012, "All"], [2011, 2012], [0, 1], False), + ([2011, 2012, "All"], [2011, "All"], [0, 2], True), + ], +) +def test_loc_getitem_label_list_integer_labels( + columns, column_key, expected_columns, check_column_type +): + # gh-14836 + df = DataFrame(np.random.rand(3, 3), columns=columns, index=list("ABC")) + expected = df.iloc[:, expected_columns] + result = df.loc[["A", "B", "C"], column_key] + tm.assert_frame_equal(result, expected, check_column_type=check_column_type) + + +def test_loc_setitem_float_intindex(): + # GH 8720 + rand_data = np.random.randn(8, 4) + result = pd.DataFrame(rand_data) + result.loc[:, 0.5] = np.nan + expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1))) + expected = pd.DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5]) + tm.assert_frame_equal(result, expected) + + result = pd.DataFrame(rand_data) + result.loc[:, 0.5] = np.nan + tm.assert_frame_equal(result, expected) + + +def test_loc_axis_1_slice(): + # GH 10586 + cols = [(yr, m) for yr in [2014, 2015] for m in [7, 8, 9, 10]] + df = pd.DataFrame( + np.ones((10, 8)), + index=tuple("ABCDEFGHIJ"), + columns=pd.MultiIndex.from_tuples(cols), + ) + result = df.loc(axis=1)[(2014, 9):(2015, 8)] + expected = pd.DataFrame( + np.ones((10, 4)), + index=tuple("ABCDEFGHIJ"), + columns=pd.MultiIndex.from_tuples( + [(2014, 9), (2014, 10), (2015, 7), (2015, 8)] + ), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py new file mode 100644 index 00000000..345ca30e --- /dev/null +++ b/pandas/tests/indexing/test_na_indexing.py @@ -0,0 +1,90 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "values, dtype", + [ + ([1, 2, 3], "int64"), + ([1.0, 2.0, 3.0], "float64"), + (["a", "b", "c"], "object"), + (["a", "b", "c"], "string"), + ([1, 2, 3], "datetime64[ns]"), + ([1, 2, 3], "datetime64[ns, CET]"), + ([1, 2, 3], "timedelta64[ns]"), + (["2000", "2001", "2002"], "Period[D]"), + ([1, 0, 3], "Sparse"), + ([pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(3, 4)], "interval"), + ], +) +@pytest.mark.parametrize( + "mask", [[True, False, False], [True, True, True], [False, False, False]] +) +@pytest.mark.parametrize("box_mask", [True, False]) +@pytest.mark.parametrize("frame", [True, False]) +def test_series_mask_boolean(values, dtype, mask, box_mask, frame): + ser = pd.Series(values, dtype=dtype, index=["a", "b", "c"]) + if frame: + ser = ser.to_frame() + mask = pd.array(mask, dtype="boolean") + if box_mask: + mask = pd.Series(mask, index=ser.index) + + expected = ser[mask.astype("bool")] + + result = ser[mask] + tm.assert_equal(result, expected) + + if not box_mask: + # Series.iloc[Series[bool]] isn't allowed + result = ser.iloc[mask] + tm.assert_equal(result, expected) + + result = ser.loc[mask] + tm.assert_equal(result, expected) + + # empty + mask = mask[:0] + ser = ser.iloc[:0] + expected = ser[mask.astype("bool")] + result = ser[mask] + tm.assert_equal(result, expected) + + if not box_mask: + # Series.iloc[Series[bool]] isn't allowed + result = ser.iloc[mask] + tm.assert_equal(result, expected) + + result = ser.loc[mask] + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("frame", [True, False]) +def test_na_treated_as_false(frame): + # https://github.com/pandas-dev/pandas/issues/31503 + s = pd.Series([1, 2, 3], name="name") + + if frame: + s = s.to_frame() + + mask = pd.array([True, False, None], dtype="boolean") + + result = s[mask] + expected = s[mask.fillna(False)] + + result_loc = s.loc[mask] + expected_loc = s.loc[mask.fillna(False)] + + result_iloc = s.iloc[mask] + expected_iloc = s.iloc[mask.fillna(False)] + + if frame: + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result_loc, expected_loc) + tm.assert_frame_equal(result_iloc, expected_iloc) + else: + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result_loc, expected_loc) + tm.assert_series_equal(result_iloc, expected_iloc) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py new file mode 100644 index 00000000..5fda7590 --- /dev/null +++ b/pandas/tests/indexing/test_partial.py @@ -0,0 +1,527 @@ +""" +test setting *parts* of objects both positionally and label based + +TODO: these should be split among the indexer tests +""" + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, Series, date_range +import pandas._testing as tm + + +class TestPartialSetting: + def test_partial_setting(self): + + # GH2578, allow ix and friends to partially set + + # series + s_orig = Series([1, 2, 3]) + + s = s_orig.copy() + s[5] = 5 + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.loc[5] = 5 + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s[5] = 5.0 + expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + s = s_orig.copy() + s.loc[5] = 5.0 + expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) + tm.assert_series_equal(s, expected) + + # iloc/iat raise + s = s_orig.copy() + + with pytest.raises(IndexError): + s.iloc[3] = 5.0 + + with pytest.raises(IndexError): + s.iat[3] = 5.0 + + # ## frame ## + + df_orig = DataFrame( + np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64" + ) + + # iloc/iat raise + df = df_orig.copy() + + with pytest.raises(IndexError): + df.iloc[4, 2] = 5.0 + + with pytest.raises(IndexError): + df.iat[4, 2] = 5.0 + + # row setting where it exists + expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]})) + df = df_orig.copy() + df.iloc[1] = df.iloc[2] + tm.assert_frame_equal(df, expected) + + expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]})) + df = df_orig.copy() + df.loc[1] = df.loc[2] + tm.assert_frame_equal(df, expected) + + # like 2578, partial setting with dtype preservation + expected = DataFrame(dict({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]})) + df = df_orig.copy() + df.loc[3] = df.loc[2] + tm.assert_frame_equal(df, expected) + + # single dtype frame, overwrite + expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]})) + df = df_orig.copy() + df.loc[:, "B"] = df.loc[:, "A"] + tm.assert_frame_equal(df, expected) + + # mixed dtype frame, overwrite + expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])})) + df = df_orig.copy() + df["B"] = df["B"].astype(np.float64) + df.loc[:, "B"] = df.loc[:, "A"] + tm.assert_frame_equal(df, expected) + + # single dtype frame, partial setting + expected = df_orig.copy() + expected["C"] = df["A"] + df = df_orig.copy() + df.loc[:, "C"] = df.loc[:, "A"] + tm.assert_frame_equal(df, expected) + + # mixed frame, partial setting + expected = df_orig.copy() + expected["C"] = df["A"] + df = df_orig.copy() + df.loc[:, "C"] = df.loc[:, "A"] + tm.assert_frame_equal(df, expected) + + # GH 8473 + dates = date_range("1/1/2000", periods=8) + df_orig = DataFrame( + np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"] + ) + + expected = pd.concat( + [df_orig, DataFrame({"A": 7}, index=[dates[-1] + dates.freq])], sort=True + ) + df = df_orig.copy() + df.loc[dates[-1] + dates.freq, "A"] = 7 + tm.assert_frame_equal(df, expected) + df = df_orig.copy() + df.at[dates[-1] + dates.freq, "A"] = 7 + tm.assert_frame_equal(df, expected) + + exp_other = DataFrame({0: 7}, index=[dates[-1] + dates.freq]) + expected = pd.concat([df_orig, exp_other], axis=1) + + df = df_orig.copy() + df.loc[dates[-1] + dates.freq, 0] = 7 + tm.assert_frame_equal(df, expected) + df = df_orig.copy() + df.at[dates[-1] + dates.freq, 0] = 7 + tm.assert_frame_equal(df, expected) + + def test_partial_setting_mixed_dtype(self): + + # in a mixed dtype environment, try to preserve dtypes + # by appending + df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) + + s = df.loc[1].copy() + s.name = 2 + expected = df.append(s) + + df.loc[2] = df.loc[1] + tm.assert_frame_equal(df, expected) + + # columns will align + df = DataFrame(columns=["A", "B"]) + df.loc[0] = Series(1, index=range(4)) + tm.assert_frame_equal(df, DataFrame(columns=["A", "B"], index=[0])) + + # columns will align + df = DataFrame(columns=["A", "B"]) + df.loc[0] = Series(1, index=["B"]) + + exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64") + tm.assert_frame_equal(df, exp) + + # list-like must conform + df = DataFrame(columns=["A", "B"]) + + with pytest.raises(ValueError): + df.loc[0] = [1, 2, 3] + + # TODO: #15657, these are left as object and not coerced + df = DataFrame(columns=["A", "B"]) + df.loc[3] = [6, 7] + + exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object") + tm.assert_frame_equal(df, exp) + + def test_series_partial_set(self): + # partial set with new index + # Regression from GH4825 + ser = Series([0.1, 0.2], index=[1, 2]) + + # loc equiv to .reindex + expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) + with pytest.raises(KeyError, match="with any missing labels"): + result = ser.loc[[3, 2, 3]] + + result = ser.reindex([3, 2, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"]) + with pytest.raises(KeyError, match="with any missing labels"): + result = ser.loc[[3, 2, 3, "x"]] + + result = ser.reindex([3, 2, 3, "x"]) + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) + result = ser.loc[[2, 2, 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1]) + with pytest.raises(KeyError, match="with any missing labels"): + result = ser.loc[[2, 2, "x", 1]] + + result = ser.reindex([2, 2, "x", 1]) + tm.assert_series_equal(result, expected, check_index_type=True) + + # raises as nothing in in the index + msg = ( + r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are" + r" in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + ser.loc[[3, 3, 3]] + + expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[2, 2, 3]] + + result = ser.reindex([2, 2, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3], index=[1, 2, 3]) + expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[3, 4, 4]] + + result = s.reindex([3, 4, 4]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) + expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[5, 3, 3]] + + result = s.reindex([5, 3, 3]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) + expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[5, 4, 4]] + + result = s.reindex([5, 4, 4]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7]) + expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[7, 2, 2]] + + result = s.reindex([7, 2, 2]) + tm.assert_series_equal(result, expected, check_index_type=True) + + s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) + expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[4, 5, 5]] + + result = s.reindex([4, 5, 5]) + tm.assert_series_equal(result, expected, check_index_type=True) + + # iloc + expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) + result = ser.iloc[[1, 1, 0, 0]] + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_series_partial_set_with_name(self): + # GH 11497 + + idx = Index([1, 2], dtype="int64", name="idx") + ser = Series([0.1, 0.2], index=idx, name="s") + + # loc + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[3, 2, 3]] + + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[3, 2, 3, "x"]] + + exp_idx = Index([2, 2, 1], dtype="int64", name="idx") + expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s") + result = ser.loc[[2, 2, 1]] + tm.assert_series_equal(result, expected, check_index_type=True) + + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[2, 2, "x", 1]] + + # raises as nothing in in the index + msg = ( + r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'," + r" name='idx'\)\] are in the \[index\]\"" + ) + with pytest.raises(KeyError, match=msg): + ser.loc[[3, 3, 3]] + + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[2, 2, 3]] + + idx = Index([1, 2, 3], dtype="int64", name="idx") + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]] + + idx = Index([1, 2, 3, 4], dtype="int64", name="idx") + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]] + + idx = Index([1, 2, 3, 4], dtype="int64", name="idx") + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]] + + idx = Index([4, 5, 6, 7], dtype="int64", name="idx") + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]] + + idx = Index([1, 2, 3, 4], dtype="int64", name="idx") + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]] + + # iloc + exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx") + expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s") + result = ser.iloc[[1, 1, 0, 0]] + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_partial_set_invalid(self): + + # GH 4940 + # allow only setting of 'valid' values + + orig = tm.makeTimeDataFrame() + df = orig.copy() + + # don't allow not string inserts + with pytest.raises(TypeError): + df.loc[100.0, :] = df.iloc[0] + + with pytest.raises(TypeError): + df.loc[100, :] = df.iloc[0] + + # allow object conversion here + df = orig.copy() + df.loc["a", :] = df.iloc[0] + exp = orig.append(Series(df.iloc[0], name="a")) + tm.assert_frame_equal(df, exp) + tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) + assert df.index.dtype == "object" + + def test_partial_set_empty_series(self): + + # GH5226 + + # partially set with an empty object series + s = Series(dtype=object) + s.loc[1] = 1 + tm.assert_series_equal(s, Series([1], index=[1])) + s.loc[3] = 3 + tm.assert_series_equal(s, Series([1, 3], index=[1, 3])) + + s = Series(dtype=object) + s.loc[1] = 1.0 + tm.assert_series_equal(s, Series([1.0], index=[1])) + s.loc[3] = 3.0 + tm.assert_series_equal(s, Series([1.0, 3.0], index=[1, 3])) + + s = Series(dtype=object) + s.loc["foo"] = 1 + tm.assert_series_equal(s, Series([1], index=["foo"])) + s.loc["bar"] = 3 + tm.assert_series_equal(s, Series([1, 3], index=["foo", "bar"])) + s.loc[3] = 4 + tm.assert_series_equal(s, Series([1, 3, 4], index=["foo", "bar", 3])) + + def test_partial_set_empty_frame(self): + + # partially set with an empty object + # frame + df = DataFrame() + + with pytest.raises(ValueError): + df.loc[1] = 1 + + with pytest.raises(ValueError): + df.loc[1] = Series([1], index=["foo"]) + + with pytest.raises(ValueError): + df.loc[:, 1] = 1 + + # these work as they don't really change + # anything but the index + # GH5632 + expected = DataFrame(columns=["foo"], index=Index([], dtype="object")) + + def f(): + df = DataFrame(index=Index([], dtype="object")) + df["foo"] = Series([], dtype="object") + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + df["foo"] = Series(df.index) + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame() + df["foo"] = df.index + return df + + tm.assert_frame_equal(f(), expected) + + expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) + expected["foo"] = expected["foo"].astype("float64") + + def f(): + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = [] + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = Series(np.arange(len(df)), dtype="float64") + return df + + tm.assert_frame_equal(f(), expected) + + def f(): + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = range(len(df)) + return df + + expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) + expected["foo"] = expected["foo"].astype("float64") + tm.assert_frame_equal(f(), expected) + + df = DataFrame() + tm.assert_index_equal(df.columns, Index([], dtype=object)) + df2 = DataFrame() + df2[1] = Series([1], index=["foo"]) + df.loc[:, 1] = Series([1], index=["foo"]) + tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1])) + tm.assert_frame_equal(df, df2) + + # no index to start + expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0]) + + df = DataFrame(columns=["A", "B"]) + df[0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=["A", "B"]) + df.loc[:, 0] = Series(1, index=range(4)) + df.dtypes + str(df) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_row(self): + # GH5720, GH5744 + # don't create rows when empty + expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64")) + expected["A"] = expected["A"].astype("int64") + expected["B"] = expected["B"].astype("float64") + expected["New"] = expected["New"].astype("float64") + + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + y["New"] = np.nan + tm.assert_frame_equal(y, expected) + # tm.assert_frame_equal(y,expected) + + expected = DataFrame(columns=["a", "b", "c c", "d"]) + expected["d"] = expected["d"].astype("int64") + df = DataFrame(columns=["a", "b", "c c"]) + df["d"] = 3 + tm.assert_frame_equal(df, expected) + tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object)) + + # reindex columns is ok + df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) + y = df[df.A > 5] + result = y.reindex(columns=["A", "B", "C"]) + expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64")) + expected["A"] = expected["A"].astype("int64") + expected["B"] = expected["B"].astype("float64") + expected["C"] = expected["C"].astype("float64") + tm.assert_frame_equal(result, expected) + + def test_partial_set_empty_frame_set_series(self): + # GH 5756 + # setting with empty Series + df = DataFrame(Series(dtype=object)) + tm.assert_frame_equal(df, DataFrame({0: Series(dtype=object)})) + + df = DataFrame(Series(name="foo", dtype=object)) + tm.assert_frame_equal(df, DataFrame({"foo": Series(dtype=object)})) + + def test_partial_set_empty_frame_empty_copy_assignment(self): + # GH 5932 + # copy on empty with assignment fails + df = DataFrame(index=[0]) + df = df.copy() + df["a"] = 0 + expected = DataFrame(0, index=[0], columns=["a"]) + tm.assert_frame_equal(df, expected) + + def test_partial_set_empty_frame_empty_consistencies(self): + # GH 6171 + # consistency on empty frames + df = DataFrame(columns=["x", "y"]) + df["x"] = [1, 2] + expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan])) + tm.assert_frame_equal(df, expected, check_dtype=False) + + df = DataFrame(columns=["x", "y"]) + df["x"] = ["1", "2"] + expected = DataFrame(dict(x=["1", "2"], y=[np.nan, np.nan]), dtype=object) + tm.assert_frame_equal(df, expected) + + df = DataFrame(columns=["x", "y"]) + df.loc[0, "x"] = 1 + expected = DataFrame(dict(x=[1], y=[np.nan])) + tm.assert_frame_equal(df, expected, check_dtype=False) diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py new file mode 100644 index 00000000..a567fb9b --- /dev/null +++ b/pandas/tests/indexing/test_scalar.py @@ -0,0 +1,247 @@ +""" test scalar indexing, including at and iat """ + +import numpy as np +import pytest + +from pandas import DataFrame, Series, Timedelta, Timestamp, date_range +import pandas._testing as tm +from pandas.tests.indexing.common import Base + + +class TestScalar(Base): + def test_at_and_iat_get(self): + def _check(f, func, values=False): + + if f is not None: + indicies = self.generate_indices(f, values) + for i in indicies: + result = getattr(f, func)[i] + expected = self.get_value(func, f, i, values) + tm.assert_almost_equal(result, expected) + + for kind in self._kinds: + + d = getattr(self, kind) + + # iat + for f in [d["ints"], d["uints"]]: + _check(f, "iat", values=True) + + for f in [d["labels"], d["ts"], d["floats"]]: + if f is not None: + msg = "iAt based indexing can only have integer indexers" + with pytest.raises(ValueError, match=msg): + self.check_values(f, "iat") + + # at + for f in [d["ints"], d["uints"], d["labels"], d["ts"], d["floats"]]: + _check(f, "at") + + def test_at_and_iat_set(self): + def _check(f, func, values=False): + + if f is not None: + indicies = self.generate_indices(f, values) + for i in indicies: + getattr(f, func)[i] = 1 + expected = self.get_value(func, f, i, values) + tm.assert_almost_equal(expected, 1) + + for kind in self._kinds: + + d = getattr(self, kind) + + # iat + for f in [d["ints"], d["uints"]]: + _check(f, "iat", values=True) + + for f in [d["labels"], d["ts"], d["floats"]]: + if f is not None: + msg = "iAt based indexing can only have integer indexers" + with pytest.raises(ValueError, match=msg): + _check(f, "iat") + + # at + for f in [d["ints"], d["uints"], d["labels"], d["ts"], d["floats"]]: + _check(f, "at") + + def test_at_iat_coercion(self): + + # as timestamp is not a tuple! + dates = date_range("1/1/2000", periods=8) + df = DataFrame(np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"]) + s = df["A"] + + result = s.at[dates[5]] + xp = s.values[5] + assert result == xp + + # GH 7729 + # make sure we are boxing the returns + s = Series(["2014-01-01", "2014-02-02"], dtype="datetime64[ns]") + expected = Timestamp("2014-02-02") + + for r in [lambda: s.iat[1], lambda: s.iloc[1]]: + result = r() + assert result == expected + + s = Series(["1 days", "2 days"], dtype="timedelta64[ns]") + expected = Timedelta("2 days") + + for r in [lambda: s.iat[1], lambda: s.iloc[1]]: + result = r() + assert result == expected + + def test_iat_invalid_args(self): + pass + + def test_imethods_with_dups(self): + + # GH6493 + # iat/iloc with dups + + s = Series(range(5), index=[1, 1, 2, 2, 3], dtype="int64") + result = s.iloc[2] + assert result == 2 + result = s.iat[2] + assert result == 2 + + msg = "index 10 is out of bounds for axis 0 with size 5" + with pytest.raises(IndexError, match=msg): + s.iat[10] + msg = "index -10 is out of bounds for axis 0 with size 5" + with pytest.raises(IndexError, match=msg): + s.iat[-10] + + result = s.iloc[[2, 3]] + expected = Series([2, 3], [2, 2], dtype="int64") + tm.assert_series_equal(result, expected) + + df = s.to_frame() + result = df.iloc[2] + expected = Series(2, index=[0], name=2) + tm.assert_series_equal(result, expected) + + result = df.iat[2, 0] + assert result == 2 + + def test_at_to_fail(self): + # at should not fallback + # GH 7814 + s = Series([1, 2, 3], index=list("abc")) + result = s.at["a"] + assert result == 1 + msg = ( + "At based indexing on an non-integer index can only have " + "non-integer indexers" + ) + with pytest.raises(ValueError, match=msg): + s.at[0] + + df = DataFrame({"A": [1, 2, 3]}, index=list("abc")) + result = df.at["a", "A"] + assert result == 1 + with pytest.raises(ValueError, match=msg): + df.at["a", 0] + + s = Series([1, 2, 3], index=[3, 2, 1]) + result = s.at[1] + assert result == 3 + msg = "At based indexing on an integer index can only have integer indexers" + with pytest.raises(ValueError, match=msg): + s.at["a"] + + df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) + result = df.at[1, 0] + assert result == 3 + with pytest.raises(ValueError, match=msg): + df.at["a", 0] + + # GH 13822, incorrect error string with non-unique columns when missing + # column is accessed + df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]}) + df.columns = ["x", "x", "z"] + + # Check that we get the correct value in the KeyError + with pytest.raises(KeyError, match=r"\['y'\] not in index"): + df[["x", "y", "z"]] + + def test_at_with_tz(self): + # gh-15822 + df = DataFrame( + { + "name": ["John", "Anderson"], + "date": [ + Timestamp(2017, 3, 13, 13, 32, 56), + Timestamp(2017, 2, 16, 12, 10, 3), + ], + } + ) + df["date"] = df["date"].dt.tz_localize("Asia/Shanghai") + + expected = Timestamp("2017-03-13 13:32:56+0800", tz="Asia/Shanghai") + + result = df.loc[0, "date"] + assert result == expected + + result = df.at[0, "date"] + assert result == expected + + def test_series_set_tz_timestamp(self, tz_naive_fixture): + # GH 25506 + ts = Timestamp("2017-08-05 00:00:00+0100", tz=tz_naive_fixture) + result = Series(ts) + result.at[1] = ts + expected = Series([ts, ts]) + tm.assert_series_equal(result, expected) + + def test_mixed_index_at_iat_loc_iloc_series(self): + # GH 19860 + s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) + for el, item in s.items(): + assert s.at[el] == s.loc[el] == item + for i in range(len(s)): + assert s.iat[i] == s.iloc[i] == i + 1 + + with pytest.raises(KeyError, match="^4$"): + s.at[4] + with pytest.raises(KeyError, match="^4$"): + s.loc[4] + + def test_mixed_index_at_iat_loc_iloc_dataframe(self): + # GH 19860 + df = DataFrame( + [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], columns=["a", "b", "c", 1, 2] + ) + for rowIdx, row in df.iterrows(): + for el, item in row.items(): + assert df.at[rowIdx, el] == df.loc[rowIdx, el] == item + + for row in range(2): + for i in range(5): + assert df.iat[row, i] == df.iloc[row, i] == row * 5 + i + + with pytest.raises(KeyError, match="^3$"): + df.at[0, 3] + with pytest.raises(KeyError, match="^3$"): + df.loc[0, 3] + + def test_iat_setter_incompatible_assignment(self): + # GH 23236 + result = DataFrame({"a": [0, 1], "b": [4, 5]}) + result.iat[0, 0] = None + expected = DataFrame({"a": [None, 1], "b": [4, 5]}) + tm.assert_frame_equal(result, expected) + + def test_getitem_zerodim_np_array(self): + # GH24924 + # dataframe __getitem__ + df = DataFrame([[1, 2], [3, 4]]) + result = df[np.array(0)] + expected = Series([1, 3], name=0) + tm.assert_series_equal(result, expected) + + # series __getitem__ + s = Series([1, 2]) + result = s[np.array(0)] + assert result == 1 diff --git a/pandas/tests/indexing/test_timedelta.py b/pandas/tests/indexing/test_timedelta.py new file mode 100644 index 00000000..dd475012 --- /dev/null +++ b/pandas/tests/indexing/test_timedelta.py @@ -0,0 +1,120 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestTimedeltaIndexing: + def test_boolean_indexing(self): + # GH 14946 + df = pd.DataFrame({"x": range(10)}) + df.index = pd.to_timedelta(range(10), unit="s") + conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3] + expected_data = [ + [0, 1, 2, 3, 10, 10, 10, 10, 10, 10], + [0, 1, 2, 10, 4, 5, 6, 7, 8, 9], + [10, 10, 10, 3, 4, 5, 6, 7, 8, 9], + ] + for cond, data in zip(conditions, expected_data): + result = df.assign(x=df.mask(cond, 10).astype("int64")) + expected = pd.DataFrame( + data, + index=pd.to_timedelta(range(10), unit="s"), + columns=["x"], + dtype="int64", + ) + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "indexer, expected", + [ + (0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + (slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]), + ([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9]), + ], + ) + def test_list_like_indexing(self, indexer, expected): + # GH 16637 + df = pd.DataFrame({"x": range(10)}, dtype="int64") + df.index = pd.to_timedelta(range(10), unit="s") + + df.loc[df.index[indexer], "x"] = 20 + + expected = pd.DataFrame( + expected, + index=pd.to_timedelta(range(10), unit="s"), + columns=["x"], + dtype="int64", + ) + + tm.assert_frame_equal(expected, df) + + def test_string_indexing(self): + # GH 16896 + df = pd.DataFrame({"x": range(3)}, index=pd.to_timedelta(range(3), unit="days")) + expected = df.iloc[0] + sliced = df.loc["0 days"] + tm.assert_series_equal(sliced, expected) + + @pytest.mark.parametrize("value", [None, pd.NaT, np.nan]) + def test_masked_setitem(self, value): + # issue (#18586) + series = pd.Series([0, 1, 2], dtype="timedelta64[ns]") + series[series == series[0]] = value + expected = pd.Series([pd.NaT, 1, 2], dtype="timedelta64[ns]") + tm.assert_series_equal(series, expected) + + @pytest.mark.parametrize("value", [None, pd.NaT, np.nan]) + def test_listlike_setitem(self, value): + # issue (#18586) + series = pd.Series([0, 1, 2], dtype="timedelta64[ns]") + series.iloc[0] = value + expected = pd.Series([pd.NaT, 1, 2], dtype="timedelta64[ns]") + tm.assert_series_equal(series, expected) + + @pytest.mark.parametrize( + "start,stop, expected_slice", + [ + [np.timedelta64(0, "ns"), None, slice(0, 11)], + [np.timedelta64(1, "D"), np.timedelta64(6, "D"), slice(1, 7)], + [None, np.timedelta64(4, "D"), slice(0, 5)], + ], + ) + def test_numpy_timedelta_scalar_indexing(self, start, stop, expected_slice): + # GH 20393 + s = pd.Series(range(11), pd.timedelta_range("0 days", "10 days")) + result = s.loc[slice(start, stop)] + expected = s.iloc[expected_slice] + tm.assert_series_equal(result, expected) + + def test_roundtrip_thru_setitem(self): + # PR 23462 + dt1 = pd.Timedelta(0) + dt2 = pd.Timedelta(28767471428571405) + df = pd.DataFrame({"dt": pd.Series([dt1, dt2])}) + df_copy = df.copy() + s = pd.Series([dt1]) + + expected = df["dt"].iloc[1].value + df.loc[[True, False]] = s + result = df["dt"].iloc[1].value + + assert expected == result + tm.assert_frame_equal(df, df_copy) + + def test_loc_str_slicing(self): + ix = pd.timedelta_range(start="1 day", end="2 days", freq="1H") + ser = ix.to_series() + result = ser.loc[:"1 days"] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) + + def test_loc_slicing(self): + ix = pd.timedelta_range(start="1 day", end="2 days", freq="1H") + ser = ix.to_series() + result = ser.loc[: ix[-2]] + expected = ser.iloc[:-1] + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/internals/__init__.py b/pandas/tests/internals/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py new file mode 100644 index 00000000..15b1434f --- /dev/null +++ b/pandas/tests/internals/test_internals.py @@ -0,0 +1,1285 @@ +from collections import OrderedDict +from datetime import date, datetime +import itertools +import operator +import re + +import numpy as np +import pytest + +from pandas._libs.internals import BlockPlacement + +import pandas as pd +from pandas import Categorical, DataFrame, DatetimeIndex, Index, MultiIndex, Series +import pandas._testing as tm +import pandas.core.algorithms as algos +from pandas.core.arrays import DatetimeArray, SparseArray, TimedeltaArray +from pandas.core.internals import BlockManager, SingleBlockManager, make_block + + +@pytest.fixture +def mgr(): + return create_mgr( + "a: f8; b: object; c: f8; d: object; e: f8;" + "f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;" + "k: M8[ns, US/Eastern]; l: M8[ns, CET];" + ) + + +def assert_block_equal(left, right): + tm.assert_numpy_array_equal(left.values, right.values) + assert left.dtype == right.dtype + assert isinstance(left.mgr_locs, BlockPlacement) + assert isinstance(right.mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array) + + +def get_numeric_mat(shape): + arr = np.arange(shape[0]) + return np.lib.stride_tricks.as_strided( + x=arr, shape=shape, strides=(arr.itemsize,) + (0,) * (len(shape) - 1) + ).copy() + + +N = 10 + + +def create_block(typestr, placement, item_shape=None, num_offset=0): + """ + Supported typestr: + + * float, f8, f4, f2 + * int, i8, i4, i2, i1 + * uint, u8, u4, u2, u1 + * complex, c16, c8 + * bool + * object, string, O + * datetime, dt, M8[ns], M8[ns, tz] + * timedelta, td, m8[ns] + * sparse (SparseArray with fill_value=0.0) + * sparse_na (SparseArray with fill_value=np.nan) + * category, category2 + + """ + placement = BlockPlacement(placement) + num_items = len(placement) + + if item_shape is None: + item_shape = (N,) + + shape = (num_items,) + item_shape + + mat = get_numeric_mat(shape) + + if typestr in ( + "float", + "f8", + "f4", + "f2", + "int", + "i8", + "i4", + "i2", + "i1", + "uint", + "u8", + "u4", + "u2", + "u1", + ): + values = mat.astype(typestr) + num_offset + elif typestr in ("complex", "c16", "c8"): + values = 1.0j * (mat.astype(typestr) + num_offset) + elif typestr in ("object", "string", "O"): + values = np.reshape( + ["A{i:d}".format(i=i) for i in mat.ravel() + num_offset], shape + ) + elif typestr in ("b", "bool"): + values = np.ones(shape, dtype=np.bool_) + elif typestr in ("datetime", "dt", "M8[ns]"): + values = (mat * 1e9).astype("M8[ns]") + elif typestr.startswith("M8[ns"): + # datetime with tz + m = re.search(r"M8\[ns,\s*(\w+\/?\w*)\]", typestr) + assert m is not None, "incompatible typestr -> {0}".format(typestr) + tz = m.groups()[0] + assert num_items == 1, "must have only 1 num items for a tz-aware" + values = DatetimeIndex(np.arange(N) * 1e9, tz=tz) + elif typestr in ("timedelta", "td", "m8[ns]"): + values = (mat * 1).astype("m8[ns]") + elif typestr in ("category",): + values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4]) + elif typestr in ("category2",): + values = Categorical(["a", "a", "a", "a", "b", "b", "c", "c", "c", "d"]) + elif typestr in ("sparse", "sparse_na"): + # FIXME: doesn't support num_rows != 10 + assert shape[-1] == 10 + assert all(s == 1 for s in shape[:-1]) + if typestr.endswith("_na"): + fill_value = np.nan + else: + fill_value = 0.0 + values = SparseArray( + [fill_value, fill_value, 1, 2, 3, fill_value, 4, 5, fill_value, 6], + fill_value=fill_value, + ) + arr = values.sp_values.view() + arr += num_offset - 1 + else: + raise ValueError(f'Unsupported typestr: "{typestr}"') + + return make_block(values, placement=placement, ndim=len(shape)) + + +def create_single_mgr(typestr, num_rows=None): + if num_rows is None: + num_rows = N + + return SingleBlockManager( + create_block(typestr, placement=slice(0, num_rows), item_shape=()), + np.arange(num_rows), + ) + + +def create_mgr(descr, item_shape=None): + """ + Construct BlockManager from string description. + + String description syntax looks similar to np.matrix initializer. It looks + like this:: + + a,b,c: f8; d,e,f: i8 + + Rules are rather simple: + + * see list of supported datatypes in `create_block` method + * components are semicolon-separated + * each component is `NAME,NAME,NAME: DTYPE_ID` + * whitespace around colons & semicolons are removed + * components with same DTYPE_ID are combined into single block + * to force multiple blocks with same dtype, use '-SUFFIX':: + + 'a:f8-1; b:f8-2; c:f8-foobar' + + """ + if item_shape is None: + item_shape = (N,) + + offset = 0 + mgr_items = [] + block_placements = OrderedDict() + for d in descr.split(";"): + d = d.strip() + if not len(d): + continue + names, blockstr = d.partition(":")[::2] + blockstr = blockstr.strip() + names = names.strip().split(",") + + mgr_items.extend(names) + placement = list(np.arange(len(names)) + offset) + try: + block_placements[blockstr].extend(placement) + except KeyError: + block_placements[blockstr] = placement + offset += len(names) + + mgr_items = Index(mgr_items) + + blocks = [] + num_offset = 0 + for blockstr, placement in block_placements.items(): + typestr = blockstr.split("-")[0] + blocks.append( + create_block( + typestr, placement, item_shape=item_shape, num_offset=num_offset + ) + ) + num_offset += len(placement) + + return BlockManager( + sorted(blocks, key=lambda b: b.mgr_locs[0]), + [mgr_items] + [np.arange(n) for n in item_shape], + ) + + +class TestBlock: + def setup_method(self, method): + # self.fblock = get_float_ex() # a,c,e + # self.cblock = get_complex_ex() # + # self.oblock = get_obj_ex() + # self.bool_block = get_bool_ex() + # self.int_block = get_int_ex() + + self.fblock = create_block("float", [0, 2, 4]) + self.cblock = create_block("complex", [7]) + self.oblock = create_block("object", [1, 3]) + self.bool_block = create_block("bool", [5]) + self.int_block = create_block("int", [6]) + + def test_constructor(self): + int32block = create_block("i4", [0]) + assert int32block.dtype == np.int32 + + def test_pickle(self): + def _check(blk): + assert_block_equal(tm.round_trip_pickle(blk), blk) + + _check(self.fblock) + _check(self.cblock) + _check(self.oblock) + _check(self.bool_block) + + def test_mgr_locs(self): + assert isinstance(self.fblock.mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal( + self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.int64) + ) + + def test_attrs(self): + assert self.fblock.shape == self.fblock.values.shape + assert self.fblock.dtype == self.fblock.values.dtype + assert len(self.fblock) == len(self.fblock.values) + + def test_merge(self): + avals = tm.randn(2, 10) + bvals = tm.randn(2, 10) + + ref_cols = Index(["e", "a", "b", "d", "f"]) + + ablock = make_block(avals, ref_cols.get_indexer(["e", "b"])) + bblock = make_block(bvals, ref_cols.get_indexer(["a", "d"])) + merged = ablock.merge(bblock) + tm.assert_numpy_array_equal( + merged.mgr_locs.as_array, np.array([0, 1, 2, 3], dtype=np.int64) + ) + tm.assert_numpy_array_equal(merged.values[[0, 2]], np.array(avals)) + tm.assert_numpy_array_equal(merged.values[[1, 3]], np.array(bvals)) + + # TODO: merge with mixed type? + + def test_copy(self): + cop = self.fblock.copy() + assert cop is not self.fblock + assert_block_equal(self.fblock, cop) + + def test_reindex_index(self): + pass + + def test_reindex_cast(self): + pass + + def test_insert(self): + pass + + def test_delete(self): + newb = self.fblock.copy() + newb.delete(0) + assert isinstance(newb.mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal( + newb.mgr_locs.as_array, np.array([2, 4], dtype=np.int64) + ) + assert (newb.values[0] == 1).all() + + newb = self.fblock.copy() + newb.delete(1) + assert isinstance(newb.mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal( + newb.mgr_locs.as_array, np.array([0, 4], dtype=np.int64) + ) + assert (newb.values[1] == 2).all() + + newb = self.fblock.copy() + newb.delete(2) + tm.assert_numpy_array_equal( + newb.mgr_locs.as_array, np.array([0, 2], dtype=np.int64) + ) + assert (newb.values[1] == 1).all() + + newb = self.fblock.copy() + with pytest.raises(Exception): + newb.delete(3) + + +class TestDatetimeBlock: + def test_can_hold_element(self): + block = create_block("datetime", [0]) + + # We will check that block._can_hold_element iff arr.__setitem__ works + arr = pd.array(block.values.ravel()) + + # coerce None + assert block._can_hold_element(None) + arr[0] = None + assert arr[0] is pd.NaT + + # coerce different types of datetime objects + vals = [np.datetime64("2010-10-10"), datetime(2010, 10, 10)] + for val in vals: + assert block._can_hold_element(val) + arr[0] = val + + val = date(2010, 10, 10) + assert not block._can_hold_element(val) + with pytest.raises(TypeError): + arr[0] = val + + +class TestBlockManager: + def test_constructor_corner(self): + pass + + def test_attrs(self): + mgr = create_mgr("a,b,c: f8-1; d,e,f: f8-2") + assert mgr.nblocks == 2 + assert len(mgr) == 6 + + def test_is_mixed_dtype(self): + assert not create_mgr("a,b:f8").is_mixed_type + assert not create_mgr("a:f8-1; b:f8-2").is_mixed_type + + assert create_mgr("a,b:f8; c,d: f4").is_mixed_type + assert create_mgr("a,b:f8; c,d: object").is_mixed_type + + def test_duplicate_ref_loc_failure(self): + tmp_mgr = create_mgr("a:bool; a: f8") + + axes, blocks = tmp_mgr.axes, tmp_mgr.blocks + + blocks[0].mgr_locs = np.array([0]) + blocks[1].mgr_locs = np.array([0]) + + # test trying to create block manager with overlapping ref locs + with pytest.raises(AssertionError): + BlockManager(blocks, axes) + + blocks[0].mgr_locs = np.array([0]) + blocks[1].mgr_locs = np.array([1]) + mgr = BlockManager(blocks, axes) + mgr.iget(1) + + def test_contains(self, mgr): + assert "a" in mgr + assert "baz" not in mgr + + def test_pickle(self, mgr): + + mgr2 = tm.round_trip_pickle(mgr) + tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + + # share ref_items + # assert mgr2.blocks[0].ref_items is mgr2.blocks[1].ref_items + + # GH2431 + assert hasattr(mgr2, "_is_consolidated") + assert hasattr(mgr2, "_known_consolidated") + + # reset to False on load + assert not mgr2._is_consolidated + assert not mgr2._known_consolidated + + def test_non_unique_pickle(self): + + mgr = create_mgr("a,a,a:f8") + mgr2 = tm.round_trip_pickle(mgr) + tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + + mgr = create_mgr("a: f8; a: i8") + mgr2 = tm.round_trip_pickle(mgr) + tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + + def test_categorical_block_pickle(self): + mgr = create_mgr("a: category") + mgr2 = tm.round_trip_pickle(mgr) + tm.assert_frame_equal(DataFrame(mgr), DataFrame(mgr2)) + + smgr = create_single_mgr("category") + smgr2 = tm.round_trip_pickle(smgr) + tm.assert_series_equal(Series(smgr), Series(smgr2)) + + def test_get(self): + cols = Index(list("abc")) + values = np.random.rand(3, 3) + block = make_block(values=values.copy(), placement=np.arange(3)) + mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)]) + + tm.assert_almost_equal(mgr.get("a").internal_values(), values[0]) + tm.assert_almost_equal(mgr.get("b").internal_values(), values[1]) + tm.assert_almost_equal(mgr.get("c").internal_values(), values[2]) + + def test_set(self): + mgr = create_mgr("a,b,c: int", item_shape=(3,)) + + mgr.set("d", np.array(["foo"] * 3)) + mgr.set("b", np.array(["bar"] * 3)) + tm.assert_numpy_array_equal(mgr.get("a").internal_values(), np.array([0] * 3)) + tm.assert_numpy_array_equal( + mgr.get("b").internal_values(), np.array(["bar"] * 3, dtype=np.object_) + ) + tm.assert_numpy_array_equal(mgr.get("c").internal_values(), np.array([2] * 3)) + tm.assert_numpy_array_equal( + mgr.get("d").internal_values(), np.array(["foo"] * 3, dtype=np.object_) + ) + + def test_set_change_dtype(self, mgr): + mgr.set("baz", np.zeros(N, dtype=bool)) + + mgr.set("baz", np.repeat("foo", N)) + assert mgr.get("baz").dtype == np.object_ + + mgr2 = mgr.consolidate() + mgr2.set("baz", np.repeat("foo", N)) + assert mgr2.get("baz").dtype == np.object_ + + mgr2.set("quux", tm.randn(N).astype(int)) + assert mgr2.get("quux").dtype == np.int_ + + mgr2.set("quux", tm.randn(N)) + assert mgr2.get("quux").dtype == np.float_ + + def test_set_change_dtype_slice(self): # GH8850 + cols = MultiIndex.from_tuples([("1st", "a"), ("2nd", "b"), ("3rd", "c")]) + df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols) + df["2nd"] = df["2nd"] * 2.0 + + blocks = df._to_dict_of_blocks() + assert sorted(blocks.keys()) == ["float64", "int64"] + tm.assert_frame_equal( + blocks["float64"], DataFrame([[1.0, 4.0], [4.0, 10.0]], columns=cols[:2]) + ) + tm.assert_frame_equal(blocks["int64"], DataFrame([[3], [6]], columns=cols[2:])) + + def test_copy(self, mgr): + cp = mgr.copy(deep=False) + for blk, cp_blk in zip(mgr.blocks, cp.blocks): + + # view assertion + assert cp_blk.equals(blk) + if isinstance(blk.values, np.ndarray): + assert cp_blk.values.base is blk.values.base + else: + # DatetimeTZBlock has DatetimeIndex values + assert cp_blk.values._data.base is blk.values._data.base + + cp = mgr.copy(deep=True) + for blk, cp_blk in zip(mgr.blocks, cp.blocks): + + # copy assertion we either have a None for a base or in case of + # some blocks it is an array (e.g. datetimetz), but was copied + assert cp_blk.equals(blk) + if not isinstance(cp_blk.values, np.ndarray): + assert cp_blk.values._data.base is not blk.values._data.base + else: + assert cp_blk.values.base is None and blk.values.base is None + + def test_sparse(self): + mgr = create_mgr("a: sparse-1; b: sparse-2") + # what to test here? + assert mgr.as_array().dtype == np.float64 + + def test_sparse_mixed(self): + mgr = create_mgr("a: sparse-1; b: sparse-2; c: f8") + assert len(mgr.blocks) == 3 + assert isinstance(mgr, BlockManager) + + # what to test here? + + def test_as_array_float(self): + mgr = create_mgr("c: f4; d: f2; e: f8") + assert mgr.as_array().dtype == np.float64 + + mgr = create_mgr("c: f4; d: f2") + assert mgr.as_array().dtype == np.float32 + + def test_as_array_int_bool(self): + mgr = create_mgr("a: bool-1; b: bool-2") + assert mgr.as_array().dtype == np.bool_ + + mgr = create_mgr("a: i8-1; b: i8-2; c: i4; d: i2; e: u1") + assert mgr.as_array().dtype == np.int64 + + mgr = create_mgr("c: i4; d: i2; e: u1") + assert mgr.as_array().dtype == np.int32 + + def test_as_array_datetime(self): + mgr = create_mgr("h: datetime-1; g: datetime-2") + assert mgr.as_array().dtype == "M8[ns]" + + def test_as_array_datetime_tz(self): + mgr = create_mgr("h: M8[ns, US/Eastern]; g: M8[ns, CET]") + assert mgr.get("h").dtype == "datetime64[ns, US/Eastern]" + assert mgr.get("g").dtype == "datetime64[ns, CET]" + assert mgr.as_array().dtype == "object" + + @pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"]) + def test_astype(self, t): + # coerce all + mgr = create_mgr("c: f4; d: f2; e: f8") + + t = np.dtype(t) + tmgr = mgr.astype(t) + assert tmgr.get("c").dtype.type == t + assert tmgr.get("d").dtype.type == t + assert tmgr.get("e").dtype.type == t + + # mixed + mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8") + + t = np.dtype(t) + tmgr = mgr.astype(t, errors="ignore") + assert tmgr.get("c").dtype.type == t + assert tmgr.get("e").dtype.type == t + assert tmgr.get("f").dtype.type == t + assert tmgr.get("g").dtype.type == t + + assert tmgr.get("a").dtype.type == np.object_ + assert tmgr.get("b").dtype.type == np.object_ + if t != np.int64: + assert tmgr.get("d").dtype.type == np.datetime64 + else: + assert tmgr.get("d").dtype.type == t + + def test_convert(self): + def _compare(old_mgr, new_mgr): + """ compare the blocks, numeric compare ==, object don't """ + old_blocks = set(old_mgr.blocks) + new_blocks = set(new_mgr.blocks) + assert len(old_blocks) == len(new_blocks) + + # compare non-numeric + for b in old_blocks: + found = False + for nb in new_blocks: + if (b.values == nb.values).all(): + found = True + break + assert found + + for b in new_blocks: + found = False + for ob in old_blocks: + if (b.values == ob.values).all(): + found = True + break + assert found + + # noops + mgr = create_mgr("f: i8; g: f8") + new_mgr = mgr.convert() + _compare(mgr, new_mgr) + + # convert + mgr = create_mgr("a,b,foo: object; f: i8; g: f8") + mgr.set("a", np.array(["1"] * N, dtype=np.object_)) + mgr.set("b", np.array(["2."] * N, dtype=np.object_)) + mgr.set("foo", np.array(["foo."] * N, dtype=np.object_)) + new_mgr = mgr.convert(numeric=True) + assert new_mgr.get("a").dtype == np.int64 + assert new_mgr.get("b").dtype == np.float64 + assert new_mgr.get("foo").dtype == np.object_ + assert new_mgr.get("f").dtype == np.int64 + assert new_mgr.get("g").dtype == np.float64 + + mgr = create_mgr( + "a,b,foo: object; f: i4; bool: bool; dt: datetime; i: i8; g: f8; h: f2" + ) + mgr.set("a", np.array(["1"] * N, dtype=np.object_)) + mgr.set("b", np.array(["2."] * N, dtype=np.object_)) + mgr.set("foo", np.array(["foo."] * N, dtype=np.object_)) + new_mgr = mgr.convert(numeric=True) + assert new_mgr.get("a").dtype == np.int64 + assert new_mgr.get("b").dtype == np.float64 + assert new_mgr.get("foo").dtype == np.object_ + assert new_mgr.get("f").dtype == np.int32 + assert new_mgr.get("bool").dtype == np.bool_ + assert new_mgr.get("dt").dtype.type, np.datetime64 + assert new_mgr.get("i").dtype == np.int64 + assert new_mgr.get("g").dtype == np.float64 + assert new_mgr.get("h").dtype == np.float16 + + def test_interleave(self): + + # self + for dtype in ["f8", "i8", "object", "bool", "complex", "M8[ns]", "m8[ns]"]: + mgr = create_mgr("a: {0}".format(dtype)) + assert mgr.as_array().dtype == dtype + mgr = create_mgr("a: {0}; b: {0}".format(dtype)) + assert mgr.as_array().dtype == dtype + + # will be converted according the actual dtype of the underlying + mgr = create_mgr("a: category") + assert mgr.as_array().dtype == "i8" + mgr = create_mgr("a: category; b: category") + assert mgr.as_array().dtype == "i8" + mgr = create_mgr("a: category; b: category2") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: category2") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: category2; b: category2") + assert mgr.as_array().dtype == "object" + + # combinations + mgr = create_mgr("a: f8") + assert mgr.as_array().dtype == "f8" + mgr = create_mgr("a: f8; b: i8") + assert mgr.as_array().dtype == "f8" + mgr = create_mgr("a: f4; b: i8") + assert mgr.as_array().dtype == "f8" + mgr = create_mgr("a: f4; b: i8; d: object") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: bool; b: i8") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: complex") + assert mgr.as_array().dtype == "complex" + mgr = create_mgr("a: f8; b: category") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: M8[ns]; b: category") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: M8[ns]; b: bool") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: M8[ns]; b: i8") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: m8[ns]; b: bool") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: m8[ns]; b: i8") + assert mgr.as_array().dtype == "object" + mgr = create_mgr("a: M8[ns]; b: m8[ns]") + assert mgr.as_array().dtype == "object" + + def test_interleave_non_unique_cols(self): + df = DataFrame( + [[pd.Timestamp("20130101"), 3.5], [pd.Timestamp("20130102"), 4.5]], + columns=["x", "x"], + index=[1, 2], + ) + + df_unique = df.copy() + df_unique.columns = ["x", "y"] + assert df_unique.values.shape == df.values.shape + tm.assert_numpy_array_equal(df_unique.values[0], df.values[0]) + tm.assert_numpy_array_equal(df_unique.values[1], df.values[1]) + + def test_consolidate(self): + pass + + def test_consolidate_ordering_issues(self, mgr): + mgr.set("f", tm.randn(N)) + mgr.set("d", tm.randn(N)) + mgr.set("b", tm.randn(N)) + mgr.set("g", tm.randn(N)) + mgr.set("h", tm.randn(N)) + + # we have datetime/tz blocks in mgr + cons = mgr.consolidate() + assert cons.nblocks == 4 + cons = mgr.consolidate().get_numeric_data() + assert cons.nblocks == 1 + assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement) + tm.assert_numpy_array_equal( + cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.int64) + ) + + def test_reindex_index(self): + # TODO: should this be pytest.skip? + pass + + def test_reindex_items(self): + # mgr is not consolidated, f8 & f8-2 blocks + mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2") + + reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0) + assert reindexed.nblocks == 2 + tm.assert_index_equal(reindexed.items, pd.Index(["g", "c", "a", "d"])) + tm.assert_almost_equal( + mgr.get("g").internal_values(), reindexed.get("g").internal_values() + ) + tm.assert_almost_equal( + mgr.get("c").internal_values(), reindexed.get("c").internal_values() + ) + tm.assert_almost_equal( + mgr.get("a").internal_values(), reindexed.get("a").internal_values() + ) + tm.assert_almost_equal( + mgr.get("d").internal_values(), reindexed.get("d").internal_values() + ) + + def test_get_numeric_data(self): + mgr = create_mgr( + "int: int; float: float; complex: complex;" + "str: object; bool: bool; obj: object; dt: datetime", + item_shape=(3,), + ) + mgr.set("obj", np.array([1, 2, 3], dtype=np.object_)) + + numeric = mgr.get_numeric_data() + tm.assert_index_equal( + numeric.items, pd.Index(["int", "float", "complex", "bool"]) + ) + tm.assert_almost_equal( + mgr.get("float").internal_values(), numeric.get("float").internal_values() + ) + + # Check sharing + numeric.set("float", np.array([100.0, 200.0, 300.0])) + tm.assert_almost_equal( + mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0]) + ) + + numeric2 = mgr.get_numeric_data(copy=True) + tm.assert_index_equal( + numeric.items, pd.Index(["int", "float", "complex", "bool"]) + ) + numeric2.set("float", np.array([1000.0, 2000.0, 3000.0])) + tm.assert_almost_equal( + mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0]) + ) + + def test_get_bool_data(self): + mgr = create_mgr( + "int: int; float: float; complex: complex;" + "str: object; bool: bool; obj: object; dt: datetime", + item_shape=(3,), + ) + mgr.set("obj", np.array([True, False, True], dtype=np.object_)) + + bools = mgr.get_bool_data() + tm.assert_index_equal(bools.items, pd.Index(["bool"])) + tm.assert_almost_equal( + mgr.get("bool").internal_values(), bools.get("bool").internal_values() + ) + + bools.set("bool", np.array([True, False, True])) + tm.assert_numpy_array_equal( + mgr.get("bool").internal_values(), np.array([True, False, True]) + ) + + # Check sharing + bools2 = mgr.get_bool_data(copy=True) + bools2.set("bool", np.array([False, True, False])) + tm.assert_numpy_array_equal( + mgr.get("bool").internal_values(), np.array([True, False, True]) + ) + + def test_unicode_repr_doesnt_raise(self): + repr(create_mgr("b,\u05d0: object")) + + def test_missing_unicode_key(self): + df = DataFrame({"a": [1]}) + try: + df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError + except KeyError: + pass # this is the expected exception + + def test_equals(self): + # unique items + bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2") + bm2 = BlockManager(bm1.blocks[::-1], bm1.axes) + assert bm1.equals(bm2) + + bm1 = create_mgr("a,a,a: i8-1; b,b,b: i8-2") + bm2 = BlockManager(bm1.blocks[::-1], bm1.axes) + assert bm1.equals(bm2) + + def test_equals_block_order_different_dtypes(self): + # GH 9330 + + mgr_strings = [ + "a:i8;b:f8", # basic case + "a:i8;b:f8;c:c8;d:b", # many types + "a:i8;e:dt;f:td;g:string", # more types + "a:i8;b:category;c:category2;d:category2", # categories + "c:sparse;d:sparse_na;b:f8", # sparse + ] + + for mgr_string in mgr_strings: + bm = create_mgr(mgr_string) + block_perms = itertools.permutations(bm.blocks) + for bm_perm in block_perms: + bm_this = BlockManager(bm_perm, bm.axes) + assert bm.equals(bm_this) + assert bm_this.equals(bm) + + def test_single_mgr_ctor(self): + mgr = create_single_mgr("f8", num_rows=5) + assert mgr.as_array().tolist() == [0.0, 1.0, 2.0, 3.0, 4.0] + + def test_validate_bool_args(self): + invalid_values = [1, "True", [1, 2, 3], 5.0] + bm1 = create_mgr("a,b,c: i8-1; d,e,f: i8-2") + + for value in invalid_values: + with pytest.raises(ValueError): + bm1.replace_list([1], [2], inplace=value) + + +class TestIndexing: + # Nosetests-style data-driven tests. + # + # This test applies different indexing routines to block managers and + # compares the outcome to the result of same operations on np.ndarray. + # + # NOTE: sparse (SparseBlock with fill_value != np.nan) fail a lot of tests + # and are disabled. + + MANAGERS = [ + create_single_mgr("f8", N), + create_single_mgr("i8", N), + # 2-dim + create_mgr("a,b,c,d,e,f: f8", item_shape=(N,)), + create_mgr("a,b,c,d,e,f: i8", item_shape=(N,)), + create_mgr("a,b: f8; c,d: i8; e,f: string", item_shape=(N,)), + create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N,)), + # 3-dim + create_mgr("a,b,c,d,e,f: f8", item_shape=(N, N)), + create_mgr("a,b,c,d,e,f: i8", item_shape=(N, N)), + create_mgr("a,b: f8; c,d: i8; e,f: string", item_shape=(N, N)), + create_mgr("a,b: f8; c,d: i8; e,f: f8", item_shape=(N, N)), + ] + + # MANAGERS = [MANAGERS[6]] + + def test_get_slice(self): + def assert_slice_ok(mgr, axis, slobj): + mat = mgr.as_array() + + # we maybe using an ndarray to test slicing and + # might not be the full length of the axis + if isinstance(slobj, np.ndarray): + ax = mgr.axes[axis] + if len(ax) and len(slobj) and len(slobj) != len(ax): + slobj = np.concatenate( + [slobj, np.zeros(len(ax) - len(slobj), dtype=bool)] + ) + sliced = mgr.get_slice(slobj, axis=axis) + mat_slobj = (slice(None),) * axis + (slobj,) + tm.assert_numpy_array_equal( + mat[mat_slobj], sliced.as_array(), check_dtype=False + ) + tm.assert_index_equal(mgr.axes[axis][slobj], sliced.axes[axis]) + + for mgr in self.MANAGERS: + for ax in range(mgr.ndim): + # slice + assert_slice_ok(mgr, ax, slice(None)) + assert_slice_ok(mgr, ax, slice(3)) + assert_slice_ok(mgr, ax, slice(100)) + assert_slice_ok(mgr, ax, slice(1, 4)) + assert_slice_ok(mgr, ax, slice(3, 0, -2)) + + # boolean mask + assert_slice_ok(mgr, ax, np.array([], dtype=np.bool_)) + assert_slice_ok(mgr, ax, np.ones(mgr.shape[ax], dtype=np.bool_)) + assert_slice_ok(mgr, ax, np.zeros(mgr.shape[ax], dtype=np.bool_)) + + if mgr.shape[ax] >= 3: + assert_slice_ok(mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0) + assert_slice_ok( + mgr, ax, np.array([True, True, False], dtype=np.bool_) + ) + + # fancy indexer + assert_slice_ok(mgr, ax, []) + assert_slice_ok(mgr, ax, list(range(mgr.shape[ax]))) + + if mgr.shape[ax] >= 3: + assert_slice_ok(mgr, ax, [0, 1, 2]) + assert_slice_ok(mgr, ax, [-1, -2, -3]) + + def test_take(self): + def assert_take_ok(mgr, axis, indexer): + mat = mgr.as_array() + taken = mgr.take(indexer, axis) + tm.assert_numpy_array_equal( + np.take(mat, indexer, axis), taken.as_array(), check_dtype=False + ) + tm.assert_index_equal(mgr.axes[axis].take(indexer), taken.axes[axis]) + + for mgr in self.MANAGERS: + for ax in range(mgr.ndim): + # take/fancy indexer + assert_take_ok(mgr, ax, indexer=[]) + assert_take_ok(mgr, ax, indexer=[0, 0, 0]) + assert_take_ok(mgr, ax, indexer=list(range(mgr.shape[ax]))) + + if mgr.shape[ax] >= 3: + assert_take_ok(mgr, ax, indexer=[0, 1, 2]) + assert_take_ok(mgr, ax, indexer=[-1, -2, -3]) + + def test_reindex_axis(self): + def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value): + mat = mgr.as_array() + indexer = mgr.axes[axis].get_indexer_for(new_labels) + + reindexed = mgr.reindex_axis(new_labels, axis, fill_value=fill_value) + tm.assert_numpy_array_equal( + algos.take_nd(mat, indexer, axis, fill_value=fill_value), + reindexed.as_array(), + check_dtype=False, + ) + tm.assert_index_equal(reindexed.axes[axis], new_labels) + + for mgr in self.MANAGERS: + for ax in range(mgr.ndim): + for fill_value in (None, np.nan, 100.0): + assert_reindex_axis_is_ok(mgr, ax, pd.Index([]), fill_value) + assert_reindex_axis_is_ok(mgr, ax, mgr.axes[ax], fill_value) + assert_reindex_axis_is_ok( + mgr, ax, mgr.axes[ax][[0, 0, 0]], fill_value + ) + assert_reindex_axis_is_ok( + mgr, ax, pd.Index(["foo", "bar", "baz"]), fill_value + ) + assert_reindex_axis_is_ok( + mgr, ax, pd.Index(["foo", mgr.axes[ax][0], "baz"]), fill_value + ) + + if mgr.shape[ax] >= 3: + assert_reindex_axis_is_ok( + mgr, ax, mgr.axes[ax][:-3], fill_value + ) + assert_reindex_axis_is_ok( + mgr, ax, mgr.axes[ax][-3::-1], fill_value + ) + assert_reindex_axis_is_ok( + mgr, ax, mgr.axes[ax][[0, 1, 2, 0, 1, 2]], fill_value + ) + + def test_reindex_indexer(self): + def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value): + mat = mgr.as_array() + reindexed_mat = algos.take_nd(mat, indexer, axis, fill_value=fill_value) + reindexed = mgr.reindex_indexer( + new_labels, indexer, axis, fill_value=fill_value + ) + tm.assert_numpy_array_equal( + reindexed_mat, reindexed.as_array(), check_dtype=False + ) + tm.assert_index_equal(reindexed.axes[axis], new_labels) + + for mgr in self.MANAGERS: + for ax in range(mgr.ndim): + for fill_value in (None, np.nan, 100.0): + assert_reindex_indexer_is_ok(mgr, ax, pd.Index([]), [], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value + ) + assert_reindex_indexer_is_ok( + mgr, + ax, + pd.Index(["foo"] * mgr.shape[ax]), + np.arange(mgr.shape[ax]), + fill_value, + ) + assert_reindex_indexer_is_ok( + mgr, + ax, + mgr.axes[ax][::-1], + np.arange(mgr.shape[ax]), + fill_value, + ) + assert_reindex_indexer_is_ok( + mgr, + ax, + mgr.axes[ax], + np.arange(mgr.shape[ax])[::-1], + fill_value, + ) + assert_reindex_indexer_is_ok( + mgr, ax, pd.Index(["foo", "bar", "baz"]), [0, 0, 0], fill_value + ) + assert_reindex_indexer_is_ok( + mgr, + ax, + pd.Index(["foo", "bar", "baz"]), + [-1, 0, -1], + fill_value, + ) + assert_reindex_indexer_is_ok( + mgr, + ax, + pd.Index(["foo", mgr.axes[ax][0], "baz"]), + [-1, -1, -1], + fill_value, + ) + + if mgr.shape[ax] >= 3: + assert_reindex_indexer_is_ok( + mgr, + ax, + pd.Index(["foo", "bar", "baz"]), + [0, 1, 2], + fill_value, + ) + + # test_get_slice(slice_like, axis) + # take(indexer, axis) + # reindex_axis(new_labels, axis) + # reindex_indexer(new_labels, indexer, axis) + + +class TestBlockPlacement: + def test_slice_len(self): + assert len(BlockPlacement(slice(0, 4))) == 4 + assert len(BlockPlacement(slice(0, 4, 2))) == 2 + assert len(BlockPlacement(slice(0, 3, 2))) == 2 + + assert len(BlockPlacement(slice(0, 1, 2))) == 1 + assert len(BlockPlacement(slice(1, 0, -1))) == 1 + + def test_zero_step_raises(self): + with pytest.raises(ValueError): + BlockPlacement(slice(1, 1, 0)) + with pytest.raises(ValueError): + BlockPlacement(slice(1, 2, 0)) + + def test_unbounded_slice_raises(self): + def assert_unbounded_slice_error(slc): + with pytest.raises(ValueError, match="unbounded slice"): + BlockPlacement(slc) + + assert_unbounded_slice_error(slice(None, None)) + assert_unbounded_slice_error(slice(10, None)) + assert_unbounded_slice_error(slice(None, None, -1)) + assert_unbounded_slice_error(slice(None, 10, -1)) + + # These are "unbounded" because negative index will change depending on + # container shape. + assert_unbounded_slice_error(slice(-1, None)) + assert_unbounded_slice_error(slice(None, -1)) + assert_unbounded_slice_error(slice(-1, -1)) + assert_unbounded_slice_error(slice(-1, None, -1)) + assert_unbounded_slice_error(slice(None, -1, -1)) + assert_unbounded_slice_error(slice(-1, -1, -1)) + + def test_not_slice_like_slices(self): + def assert_not_slice_like(slc): + assert not BlockPlacement(slc).is_slice_like + + assert_not_slice_like(slice(0, 0)) + assert_not_slice_like(slice(100, 0)) + + assert_not_slice_like(slice(100, 100, -1)) + assert_not_slice_like(slice(0, 100, -1)) + + assert not BlockPlacement(slice(0, 0)).is_slice_like + assert not BlockPlacement(slice(100, 100)).is_slice_like + + def test_array_to_slice_conversion(self): + def assert_as_slice_equals(arr, slc): + assert BlockPlacement(arr).as_slice == slc + + assert_as_slice_equals([0], slice(0, 1, 1)) + assert_as_slice_equals([100], slice(100, 101, 1)) + + assert_as_slice_equals([0, 1, 2], slice(0, 3, 1)) + assert_as_slice_equals([0, 5, 10], slice(0, 15, 5)) + assert_as_slice_equals([0, 100], slice(0, 200, 100)) + + assert_as_slice_equals([2, 1], slice(2, 0, -1)) + + def test_not_slice_like_arrays(self): + def assert_not_slice_like(arr): + assert not BlockPlacement(arr).is_slice_like + + assert_not_slice_like([]) + assert_not_slice_like([-1]) + assert_not_slice_like([-1, -2, -3]) + assert_not_slice_like([-10]) + assert_not_slice_like([-1]) + assert_not_slice_like([-1, 0, 1, 2]) + assert_not_slice_like([-2, 0, 2, 4]) + assert_not_slice_like([1, 0, -1]) + assert_not_slice_like([1, 1, 1]) + + def test_slice_iter(self): + assert list(BlockPlacement(slice(0, 3))) == [0, 1, 2] + assert list(BlockPlacement(slice(0, 0))) == [] + assert list(BlockPlacement(slice(3, 0))) == [] + + def test_slice_to_array_conversion(self): + def assert_as_array_equals(slc, asarray): + tm.assert_numpy_array_equal( + BlockPlacement(slc).as_array, np.asarray(asarray, dtype=np.int64) + ) + + assert_as_array_equals(slice(0, 3), [0, 1, 2]) + assert_as_array_equals(slice(0, 0), []) + assert_as_array_equals(slice(3, 0), []) + + assert_as_array_equals(slice(3, 0, -1), [3, 2, 1]) + + def test_blockplacement_add(self): + bpl = BlockPlacement(slice(0, 5)) + assert bpl.add(1).as_slice == slice(1, 6, 1) + assert bpl.add(np.arange(5)).as_slice == slice(0, 10, 2) + assert list(bpl.add(np.arange(5, 0, -1))) == [5, 5, 5, 5, 5] + + def test_blockplacement_add_int(self): + def assert_add_equals(val, inc, result): + assert list(BlockPlacement(val).add(inc)) == result + + assert_add_equals(slice(0, 0), 0, []) + assert_add_equals(slice(1, 4), 0, [1, 2, 3]) + assert_add_equals(slice(3, 0, -1), 0, [3, 2, 1]) + assert_add_equals([1, 2, 4], 0, [1, 2, 4]) + + assert_add_equals(slice(0, 0), 10, []) + assert_add_equals(slice(1, 4), 10, [11, 12, 13]) + assert_add_equals(slice(3, 0, -1), 10, [13, 12, 11]) + assert_add_equals([1, 2, 4], 10, [11, 12, 14]) + + assert_add_equals(slice(0, 0), -1, []) + assert_add_equals(slice(1, 4), -1, [0, 1, 2]) + assert_add_equals([1, 2, 4], -1, [0, 1, 3]) + + with pytest.raises(ValueError): + BlockPlacement(slice(1, 4)).add(-10) + with pytest.raises(ValueError): + BlockPlacement([1, 2, 4]).add(-10) + + +class DummyElement: + def __init__(self, value, dtype): + self.value = value + self.dtype = np.dtype(dtype) + + def __array__(self): + return np.array(self.value, dtype=self.dtype) + + def __str__(self) -> str: + return "DummyElement({}, {})".format(self.value, self.dtype) + + def __repr__(self) -> str: + return str(self) + + def astype(self, dtype, copy=False): + self.dtype = dtype + return self + + def view(self, dtype): + return type(self)(self.value.view(dtype), dtype) + + def any(self, axis=None): + return bool(self.value) + + +class TestCanHoldElement: + @pytest.mark.parametrize( + "value, dtype", + [ + (1, "i8"), + (1.0, "f8"), + (2 ** 63, "f8"), + (1j, "complex128"), + (2 ** 63, "complex128"), + (True, "bool"), + (np.timedelta64(20, "ns"), "= 1.11 otherwise, + # see https://github.com/spulec/moto/issues/1924 & 1952 + os.environ.setdefault("AWS_ACCESS_KEY_ID", "foobar_key") + os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "foobar_secret") + + moto = pytest.importorskip("moto") + + test_s3_files = [ + ("tips#1.csv", tips_file), + ("tips.csv", tips_file), + ("tips.csv.gz", tips_file + ".gz"), + ("tips.csv.bz2", tips_file + ".bz2"), + ("items.jsonl", jsonl_file), + ] + + def add_tips_files(bucket_name): + for s3_key, file_name in test_s3_files: + with open(file_name, "rb") as f: + conn.Bucket(bucket_name).put_object(Key=s3_key, Body=f) + + try: + s3 = moto.mock_s3() + s3.start() + + # see gh-16135 + bucket = "pandas-test" + conn = boto3.resource("s3", region_name="us-east-1") + + conn.create_bucket(Bucket=bucket) + add_tips_files(bucket) + + conn.create_bucket(Bucket="cant_get_it", ACL="private") + add_tips_files("cant_get_it") + s3fs.S3FileSystem.clear_instance_cache() + yield conn + finally: + s3.stop() diff --git a/pandas/tests/io/data/csv/banklist.csv b/pandas/tests/io/data/csv/banklist.csv new file mode 100644 index 00000000..e7900830 --- /dev/null +++ b/pandas/tests/io/data/csv/banklist.csv @@ -0,0 +1,507 @@ +Bank Name,City,ST,CERT,Acquiring Institution,Closing Date,Updated Date +Banks of Wisconsin d/b/a Bank of Kenosha,Kenosha,WI,35386,"North Shore Bank, FSB",31-May-13,31-May-13 +Central Arizona Bank,Scottsdale,AZ,34527,Western State Bank,14-May-13,20-May-13 +Sunrise Bank,Valdosta,GA,58185,Synovus Bank,10-May-13,21-May-13 +Pisgah Community Bank,Asheville,NC,58701,"Capital Bank, N.A.",10-May-13,14-May-13 +Douglas County Bank,Douglasville,GA,21649,Hamilton State Bank,26-Apr-13,16-May-13 +Parkway Bank,Lenoir,NC,57158,"CertusBank, National Association",26-Apr-13,17-May-13 +Chipola Community Bank,Marianna,FL,58034,First Federal Bank of Florida,19-Apr-13,16-May-13 +Heritage Bank of North Florida,Orange Park,FL,26680,FirstAtlantic Bank,19-Apr-13,16-May-13 +First Federal Bank,Lexington,KY,29594,Your Community Bank,19-Apr-13,23-Apr-13 +Gold Canyon Bank,Gold Canyon,AZ,58066,"First Scottsdale Bank, National Association",5-Apr-13,9-Apr-13 +Frontier Bank,LaGrange,GA,16431,HeritageBank of the South,8-Mar-13,26-Mar-13 +Covenant Bank,Chicago,IL,22476,Liberty Bank and Trust Company,15-Feb-13,4-Mar-13 +1st Regents Bank,Andover,MN,57157,First Minnesota Bank,18-Jan-13,28-Feb-13 +Westside Community Bank,University Place,WA,33997,Sunwest Bank,11-Jan-13,24-Jan-13 +Community Bank of the Ozarks,Sunrise Beach,MO,27331,Bank of Sullivan,14-Dec-12,24-Jan-13 +Hometown Community Bank,Braselton,GA,57928,"CertusBank, National Association",16-Nov-12,24-Jan-13 +Citizens First National Bank,Princeton,IL,3731,Heartland Bank and Trust Company,2-Nov-12,24-Jan-13 +Heritage Bank of Florida,Lutz,FL,35009,Centennial Bank,2-Nov-12,24-Jan-13 +NOVA Bank,Berwyn,PA,27148,No Acquirer,26-Oct-12,24-Jan-13 +Excel Bank,Sedalia,MO,19189,Simmons First National Bank,19-Oct-12,24-Jan-13 +First East Side Savings Bank,Tamarac,FL,28144,Stearns Bank N.A.,19-Oct-12,24-Jan-13 +GulfSouth Private Bank,Destin,FL,58073,SmartBank,19-Oct-12,24-Jan-13 +First United Bank,Crete,IL,20685,"Old Plank Trail Community Bank, National Association",28-Sep-12,15-Nov-12 +Truman Bank,St. Louis,MO,27316,Simmons First National Bank,14-Sep-12,17-Dec-12 +First Commercial Bank,Bloomington,MN,35246,Republic Bank & Trust Company,7-Sep-12,17-Dec-12 +Waukegan Savings Bank,Waukegan,IL,28243,First Midwest Bank,3-Aug-12,11-Oct-12 +Jasper Banking Company,Jasper,GA,16240,Stearns Bank N.A.,27-Jul-12,17-Dec-12 +Second Federal Savings and Loan Association of Chicago,Chicago,IL,27986,Hinsdale Bank & Trust Company,20-Jul-12,14-Jan-13 +Heartland Bank,Leawood,KS,1361,Metcalf Bank,20-Jul-12,17-Dec-12 +First Cherokee State Bank,Woodstock,GA,32711,Community & Southern Bank,20-Jul-12,31-Oct-12 +Georgia Trust Bank,Buford,GA,57847,Community & Southern Bank,20-Jul-12,17-Dec-12 +The Royal Palm Bank of Florida,Naples,FL,57096,First National Bank of the Gulf Coast,20-Jul-12,7-Jan-13 +Glasgow Savings Bank,Glasgow,MO,1056,Regional Missouri Bank,13-Jul-12,11-Oct-12 +Montgomery Bank & Trust,Ailey,GA,19498,Ameris Bank,6-Jul-12,31-Oct-12 +The Farmers Bank of Lynchburg,Lynchburg,TN,1690,Clayton Bank and Trust,15-Jun-12,31-Oct-12 +Security Exchange Bank,Marietta,GA,35299,Fidelity Bank,15-Jun-12,10-Oct-12 +Putnam State Bank,Palatka,FL,27405,Harbor Community Bank,15-Jun-12,10-Oct-12 +Waccamaw Bank,Whiteville,NC,34515,First Community Bank,8-Jun-12,8-Nov-12 +Farmers' and Traders' State Bank,Shabbona,IL,9257,First State Bank,8-Jun-12,10-Oct-12 +Carolina Federal Savings Bank,Charleston,SC,35372,Bank of North Carolina,8-Jun-12,31-Oct-12 +First Capital Bank,Kingfisher,OK,416,F & M Bank,8-Jun-12,10-Oct-12 +"Alabama Trust Bank, National Association",Sylacauga,AL,35224,Southern States Bank,18-May-12,20-May-13 +"Security Bank, National Association",North Lauderdale,FL,23156,Banesco USA,4-May-12,31-Oct-12 +Palm Desert National Bank,Palm Desert,CA,23632,Pacific Premier Bank,27-Apr-12,17-May-13 +Plantation Federal Bank,Pawleys Island,SC,32503,First Federal Bank,27-Apr-12,17-May-13 +"Inter Savings Bank, fsb D/B/A InterBank, fsb",Maple Grove,MN,31495,Great Southern Bank,27-Apr-12,17-May-13 +HarVest Bank of Maryland,Gaithersburg,MD,57766,Sonabank,27-Apr-12,17-May-13 +Bank of the Eastern Shore,Cambridge,MD,26759,No Acquirer,27-Apr-12,17-Oct-12 +"Fort Lee Federal Savings Bank, FSB",Fort Lee,NJ,35527,Alma Bank,20-Apr-12,17-May-13 +Fidelity Bank,Dearborn,MI,33883,The Huntington National Bank,30-Mar-12,16-May-13 +Premier Bank,Wilmette,IL,35419,International Bank of Chicago,23-Mar-12,17-Oct-12 +Covenant Bank & Trust,Rock Spring,GA,58068,"Stearns Bank, N.A.",23-Mar-12,31-Oct-12 +New City Bank,Chicago,IL,57597,No Acquirer,9-Mar-12,29-Oct-12 +Global Commerce Bank,Doraville,GA,34046,Metro City Bank,2-Mar-12,31-Oct-12 +Home Savings of America,Little Falls,MN,29178,No Acquirer,24-Feb-12,17-Dec-12 +Central Bank of Georgia,Ellaville,GA,5687,Ameris Bank,24-Feb-12,9-Aug-12 +SCB Bank,Shelbyville,IN,29761,"First Merchants Bank, National Association",10-Feb-12,25-Mar-13 +Charter National Bank and Trust,Hoffman Estates,IL,23187,"Barrington Bank & Trust Company, National Association",10-Feb-12,25-Mar-13 +BankEast,Knoxville,TN,19869,U.S.Bank National Association,27-Jan-12,8-Mar-13 +Patriot Bank Minnesota,Forest Lake,MN,34823,First Resource Bank,27-Jan-12,12-Sep-12 +Tennessee Commerce Bank,Franklin,TN,35296,Republic Bank & Trust Company,27-Jan-12,20-Nov-12 +First Guaranty Bank and Trust Company of Jacksonville,Jacksonville,FL,16579,"CenterState Bank of Florida, N.A.",27-Jan-12,12-Sep-12 +American Eagle Savings Bank,Boothwyn,PA,31581,"Capital Bank, N.A.",20-Jan-12,25-Jan-13 +The First State Bank,Stockbridge,GA,19252,Hamilton State Bank,20-Jan-12,25-Jan-13 +Central Florida State Bank,Belleview,FL,57186,"CenterState Bank of Florida, N.A.",20-Jan-12,25-Jan-13 +Western National Bank,Phoenix,AZ,57917,Washington Federal,16-Dec-11,13-Aug-12 +Premier Community Bank of the Emerald Coast,Crestview,FL,58343,Summit Bank,16-Dec-11,12-Sep-12 +Central Progressive Bank,Lacombe,LA,19657,First NBC Bank,18-Nov-11,13-Aug-12 +Polk County Bank,Johnston,IA,14194,Grinnell State Bank,18-Nov-11,15-Aug-12 +Community Bank of Rockmart,Rockmart,GA,57860,Century Bank of Georgia,10-Nov-11,13-Aug-12 +SunFirst Bank,Saint George,UT,57087,Cache Valley Bank,4-Nov-11,16-Nov-12 +"Mid City Bank, Inc.",Omaha,NE,19397,Premier Bank,4-Nov-11,15-Aug-12 +All American Bank,Des Plaines,IL,57759,International Bank of Chicago,28-Oct-11,15-Aug-12 +Community Banks of Colorado,Greenwood Village,CO,21132,"Bank Midwest, N.A.",21-Oct-11,2-Jan-13 +Community Capital Bank,Jonesboro,GA,57036,State Bank and Trust Company,21-Oct-11,8-Nov-12 +Decatur First Bank,Decatur,GA,34392,Fidelity Bank,21-Oct-11,8-Nov-12 +Old Harbor Bank,Clearwater,FL,57537,1st United Bank,21-Oct-11,8-Nov-12 +Country Bank,Aledo,IL,35395,Blackhawk Bank & Trust,14-Oct-11,15-Aug-12 +First State Bank,Cranford,NJ,58046,Northfield Bank,14-Oct-11,8-Nov-12 +"Blue Ridge Savings Bank, Inc.",Asheville,NC,32347,Bank of North Carolina,14-Oct-11,8-Nov-12 +Piedmont Community Bank,Gray,GA,57256,State Bank and Trust Company,14-Oct-11,22-Jan-13 +Sun Security Bank,Ellington,MO,20115,Great Southern Bank,7-Oct-11,7-Nov-12 +The RiverBank,Wyoming,MN,10216,Central Bank,7-Oct-11,7-Nov-12 +First International Bank,Plano,TX,33513,American First National Bank,30-Sep-11,9-Oct-12 +Citizens Bank of Northern California,Nevada City,CA,33983,Tri Counties Bank,23-Sep-11,9-Oct-12 +Bank of the Commonwealth,Norfolk,VA,20408,Southern Bank and Trust Company,23-Sep-11,9-Oct-12 +The First National Bank of Florida,Milton,FL,25155,CharterBank,9-Sep-11,6-Sep-12 +CreekSide Bank,Woodstock,GA,58226,Georgia Commerce Bank,2-Sep-11,6-Sep-12 +Patriot Bank of Georgia,Cumming,GA,58273,Georgia Commerce Bank,2-Sep-11,2-Nov-12 +First Choice Bank,Geneva,IL,57212,Inland Bank & Trust,19-Aug-11,15-Aug-12 +First Southern National Bank,Statesboro,GA,57239,Heritage Bank of the South,19-Aug-11,2-Nov-12 +Lydian Private Bank,Palm Beach,FL,35356,"Sabadell United Bank, N.A.",19-Aug-11,2-Nov-12 +Public Savings Bank,Huntingdon Valley,PA,34130,"Capital Bank, N.A.",18-Aug-11,15-Aug-12 +The First National Bank of Olathe,Olathe,KS,4744,Enterprise Bank & Trust,12-Aug-11,23-Aug-12 +Bank of Whitman,Colfax,WA,22528,Columbia State Bank,5-Aug-11,16-Aug-12 +Bank of Shorewood,Shorewood,IL,22637,Heartland Bank and Trust Company,5-Aug-11,16-Aug-12 +Integra Bank National Association,Evansville,IN,4392,Old National Bank,29-Jul-11,16-Aug-12 +"BankMeridian, N.A.",Columbia,SC,58222,SCBT National Association,29-Jul-11,2-Nov-12 +Virginia Business Bank,Richmond,VA,58283,Xenith Bank,29-Jul-11,9-Oct-12 +Bank of Choice,Greeley,CO,2994,"Bank Midwest, N.A.",22-Jul-11,12-Sep-12 +LandMark Bank of Florida,Sarasota,FL,35244,American Momentum Bank,22-Jul-11,2-Nov-12 +Southshore Community Bank,Apollo Beach,FL,58056,American Momentum Bank,22-Jul-11,2-Nov-12 +Summit Bank,Prescott,AZ,57442,The Foothills Bank,15-Jul-11,16-Aug-12 +First Peoples Bank,Port St. Lucie,FL,34870,"Premier American Bank, N.A.",15-Jul-11,2-Nov-12 +High Trust Bank,Stockbridge,GA,19554,Ameris Bank,15-Jul-11,2-Nov-12 +One Georgia Bank,Atlanta,GA,58238,Ameris Bank,15-Jul-11,2-Nov-12 +Signature Bank,Windsor,CO,57835,Points West Community Bank,8-Jul-11,26-Oct-12 +Colorado Capital Bank,Castle Rock,CO,34522,First-Citizens Bank & Trust Company,8-Jul-11,15-Jan-13 +First Chicago Bank & Trust,Chicago,IL,27935,Northbrook Bank & Trust Company,8-Jul-11,9-Sep-12 +Mountain Heritage Bank,Clayton,GA,57593,First American Bank and Trust Company,24-Jun-11,2-Nov-12 +First Commercial Bank of Tampa Bay,Tampa,FL,27583,Stonegate Bank,17-Jun-11,2-Nov-12 +McIntosh State Bank,Jackson,GA,19237,Hamilton State Bank,17-Jun-11,2-Nov-12 +Atlantic Bank and Trust,Charleston,SC,58420,"First Citizens Bank and Trust Company, Inc.",3-Jun-11,31-Oct-12 +First Heritage Bank,Snohomish,WA,23626,Columbia State Bank,27-May-11,28-Jan-13 +Summit Bank,Burlington,WA,513,Columbia State Bank,20-May-11,22-Jan-13 +First Georgia Banking Company,Franklin,GA,57647,"CertusBank, National Association",20-May-11,13-Nov-12 +Atlantic Southern Bank,Macon,GA,57213,"CertusBank, National Association",20-May-11,31-Oct-12 +Coastal Bank,Cocoa Beach,FL,34898,"Florida Community Bank, a division of Premier American Bank, N.A.",6-May-11,30-Nov-12 +Community Central Bank,Mount Clemens,MI,34234,Talmer Bank & Trust,29-Apr-11,16-Aug-12 +The Park Avenue Bank,Valdosta,GA,19797,Bank of the Ozarks,29-Apr-11,30-Nov-12 +First Choice Community Bank,Dallas,GA,58539,Bank of the Ozarks,29-Apr-11,22-Jan-13 +Cortez Community Bank,Brooksville,FL,57625,"Florida Community Bank, a division of Premier American Bank, N.A.",29-Apr-11,30-Nov-12 +First National Bank of Central Florida,Winter Park,FL,26297,"Florida Community Bank, a division of Premier American Bank, N.A.",29-Apr-11,30-Nov-12 +Heritage Banking Group,Carthage,MS,14273,Trustmark National Bank,15-Apr-11,30-Nov-12 +Rosemount National Bank,Rosemount,MN,24099,Central Bank,15-Apr-11,16-Aug-12 +Superior Bank,Birmingham,AL,17750,"Superior Bank, National Association",15-Apr-11,30-Nov-12 +Nexity Bank,Birmingham,AL,19794,AloStar Bank of Commerce,15-Apr-11,4-Sep-12 +New Horizons Bank,East Ellijay,GA,57705,Citizens South Bank,15-Apr-11,16-Aug-12 +Bartow County Bank,Cartersville,GA,21495,Hamilton State Bank,15-Apr-11,22-Jan-13 +Nevada Commerce Bank,Las Vegas,NV,35418,City National Bank,8-Apr-11,9-Sep-12 +Western Springs National Bank and Trust,Western Springs,IL,10086,Heartland Bank and Trust Company,8-Apr-11,22-Jan-13 +The Bank of Commerce,Wood Dale,IL,34292,Advantage National Bank Group,25-Mar-11,22-Jan-13 +Legacy Bank,Milwaukee,WI,34818,Seaway Bank and Trust Company,11-Mar-11,12-Sep-12 +First National Bank of Davis,Davis,OK,4077,The Pauls Valley National Bank,11-Mar-11,20-Aug-12 +Valley Community Bank,St. Charles,IL,34187,First State Bank,25-Feb-11,12-Sep-12 +"San Luis Trust Bank, FSB",San Luis Obispo,CA,34783,First California Bank,18-Feb-11,20-Aug-12 +Charter Oak Bank,Napa,CA,57855,Bank of Marin,18-Feb-11,12-Sep-12 +Citizens Bank of Effingham,Springfield,GA,34601,Heritage Bank of the South,18-Feb-11,2-Nov-12 +Habersham Bank,Clarkesville,GA,151,SCBT National Association,18-Feb-11,2-Nov-12 +Canyon National Bank,Palm Springs,CA,34692,Pacific Premier Bank,11-Feb-11,12-Sep-12 +Badger State Bank,Cassville,WI,13272,Royal Bank,11-Feb-11,12-Sep-12 +Peoples State Bank,Hamtramck,MI,14939,First Michigan Bank,11-Feb-11,22-Jan-13 +Sunshine State Community Bank,Port Orange,FL,35478,"Premier American Bank, N.A.",11-Feb-11,2-Nov-12 +Community First Bank Chicago,Chicago,IL,57948,Northbrook Bank & Trust Company,4-Feb-11,20-Aug-12 +North Georgia Bank,Watkinsville,GA,35242,BankSouth,4-Feb-11,2-Nov-12 +American Trust Bank,Roswell,GA,57432,Renasant Bank,4-Feb-11,31-Oct-12 +First Community Bank,Taos,NM,12261,"U.S. Bank, N.A.",28-Jan-11,12-Sep-12 +FirsTier Bank,Louisville,CO,57646,No Acquirer,28-Jan-11,12-Sep-12 +Evergreen State Bank,Stoughton,WI,5328,McFarland State Bank,28-Jan-11,12-Sep-12 +The First State Bank,Camargo,OK,2303,Bank 7,28-Jan-11,12-Sep-12 +United Western Bank,Denver,CO,31293,First-Citizens Bank & Trust Company,21-Jan-11,12-Sep-12 +The Bank of Asheville,Asheville,NC,34516,First Bank,21-Jan-11,2-Nov-12 +CommunitySouth Bank & Trust,Easley,SC,57868,"CertusBank, National Association",21-Jan-11,2-Nov-12 +Enterprise Banking Company,McDonough,GA,19758,No Acquirer,21-Jan-11,2-Nov-12 +Oglethorpe Bank,Brunswick,GA,57440,Bank of the Ozarks,14-Jan-11,2-Nov-12 +Legacy Bank,Scottsdale,AZ,57820,Enterprise Bank & Trust,7-Jan-11,12-Sep-12 +First Commercial Bank of Florida,Orlando,FL,34965,First Southern Bank,7-Jan-11,2-Nov-12 +Community National Bank,Lino Lakes,MN,23306,Farmers & Merchants Savings Bank,17-Dec-10,20-Aug-12 +First Southern Bank,Batesville,AR,58052,Southern Bank,17-Dec-10,20-Aug-12 +"United Americas Bank, N.A.",Atlanta,GA,35065,State Bank and Trust Company,17-Dec-10,2-Nov-12 +"Appalachian Community Bank, FSB",McCaysville,GA,58495,Peoples Bank of East Tennessee,17-Dec-10,31-Oct-12 +Chestatee State Bank,Dawsonville,GA,34578,Bank of the Ozarks,17-Dec-10,2-Nov-12 +"The Bank of Miami,N.A.",Coral Gables,FL,19040,1st United Bank,17-Dec-10,2-Nov-12 +Earthstar Bank,Southampton,PA,35561,Polonia Bank,10-Dec-10,20-Aug-12 +Paramount Bank,Farmington Hills,MI,34673,Level One Bank,10-Dec-10,20-Aug-12 +First Banking Center,Burlington,WI,5287,First Michigan Bank,19-Nov-10,20-Aug-12 +Allegiance Bank of North America,Bala Cynwyd,PA,35078,VIST Bank,19-Nov-10,20-Aug-12 +Gulf State Community Bank,Carrabelle,FL,20340,Centennial Bank,19-Nov-10,2-Nov-12 +Copper Star Bank,Scottsdale,AZ,35463,"Stearns Bank, N.A.",12-Nov-10,20-Aug-12 +Darby Bank & Trust Co.,Vidalia,GA,14580,Ameris Bank,12-Nov-10,15-Jan-13 +Tifton Banking Company,Tifton,GA,57831,Ameris Bank,12-Nov-10,2-Nov-12 +First Vietnamese American Bank,Westminster,CA,57885,Grandpoint Bank,5-Nov-10,12-Sep-12 +Pierce Commercial Bank,Tacoma,WA,34411,Heritage Bank,5-Nov-10,20-Aug-12 +Western Commercial Bank,Woodland Hills,CA,58087,First California Bank,5-Nov-10,12-Sep-12 +K Bank,Randallstown,MD,31263,Manufacturers and Traders Trust Company (M&T Bank),5-Nov-10,20-Aug-12 +"First Arizona Savings, A FSB",Scottsdale,AZ,32582,No Acquirer,22-Oct-10,20-Aug-12 +Hillcrest Bank,Overland Park,KS,22173,"Hillcrest Bank, N.A.",22-Oct-10,20-Aug-12 +First Suburban National Bank,Maywood,IL,16089,Seaway Bank and Trust Company,22-Oct-10,20-Aug-12 +The First National Bank of Barnesville,Barnesville,GA,2119,United Bank,22-Oct-10,2-Nov-12 +The Gordon Bank,Gordon,GA,33904,Morris Bank,22-Oct-10,2-Nov-12 +Progress Bank of Florida,Tampa,FL,32251,Bay Cities Bank,22-Oct-10,2-Nov-12 +First Bank of Jacksonville,Jacksonville,FL,27573,Ameris Bank,22-Oct-10,2-Nov-12 +Premier Bank,Jefferson City,MO,34016,Providence Bank,15-Oct-10,20-Aug-12 +WestBridge Bank and Trust Company,Chesterfield,MO,58205,Midland States Bank,15-Oct-10,20-Aug-12 +"Security Savings Bank, F.S.B.",Olathe,KS,30898,Simmons First National Bank,15-Oct-10,20-Aug-12 +Shoreline Bank,Shoreline,WA,35250,GBC International Bank,1-Oct-10,20-Aug-12 +Wakulla Bank,Crawfordville,FL,21777,Centennial Bank,1-Oct-10,2-Nov-12 +North County Bank,Arlington,WA,35053,Whidbey Island Bank,24-Sep-10,20-Aug-12 +Haven Trust Bank Florida,Ponte Vedra Beach,FL,58308,First Southern Bank,24-Sep-10,5-Nov-12 +Maritime Savings Bank,West Allis,WI,28612,"North Shore Bank, FSB",17-Sep-10,20-Aug-12 +Bramble Savings Bank,Milford,OH,27808,Foundation Bank,17-Sep-10,20-Aug-12 +The Peoples Bank,Winder,GA,182,Community & Southern Bank,17-Sep-10,5-Nov-12 +First Commerce Community Bank,Douglasville,GA,57448,Community & Southern Bank,17-Sep-10,15-Jan-13 +Bank of Ellijay,Ellijay,GA,58197,Community & Southern Bank,17-Sep-10,15-Jan-13 +ISN Bank,Cherry Hill,NJ,57107,Customers Bank,17-Sep-10,22-Aug-12 +Horizon Bank,Bradenton,FL,35061,Bank of the Ozarks,10-Sep-10,5-Nov-12 +Sonoma Valley Bank,Sonoma,CA,27259,Westamerica Bank,20-Aug-10,12-Sep-12 +Los Padres Bank,Solvang,CA,32165,Pacific Western Bank,20-Aug-10,12-Sep-12 +Butte Community Bank,Chico,CA,33219,"Rabobank, N.A.",20-Aug-10,12-Sep-12 +Pacific State Bank,Stockton,CA,27090,"Rabobank, N.A.",20-Aug-10,12-Sep-12 +ShoreBank,Chicago,IL,15640,Urban Partnership Bank,20-Aug-10,16-May-13 +Imperial Savings and Loan Association,Martinsville,VA,31623,"River Community Bank, N.A.",20-Aug-10,24-Aug-12 +Independent National Bank,Ocala,FL,27344,"CenterState Bank of Florida, N.A.",20-Aug-10,5-Nov-12 +Community National Bank at Bartow,Bartow,FL,25266,"CenterState Bank of Florida, N.A.",20-Aug-10,5-Nov-12 +Palos Bank and Trust Company,Palos Heights,IL,17599,First Midwest Bank,13-Aug-10,22-Aug-12 +Ravenswood Bank,Chicago,IL,34231,Northbrook Bank & Trust Company,6-Aug-10,22-Aug-12 +LibertyBank,Eugene,OR,31964,Home Federal Bank,30-Jul-10,22-Aug-12 +The Cowlitz Bank,Longview,WA,22643,Heritage Bank,30-Jul-10,22-Aug-12 +Coastal Community Bank,Panama City Beach,FL,9619,Centennial Bank,30-Jul-10,5-Nov-12 +Bayside Savings Bank,Port Saint Joe,FL,57669,Centennial Bank,30-Jul-10,5-Nov-12 +Northwest Bank & Trust,Acworth,GA,57658,State Bank and Trust Company,30-Jul-10,5-Nov-12 +Home Valley Bank,Cave Junction,OR,23181,South Valley Bank & Trust,23-Jul-10,12-Sep-12 +SouthwestUSA Bank,Las Vegas,NV,35434,Plaza Bank,23-Jul-10,22-Aug-12 +Community Security Bank,New Prague,MN,34486,Roundbank,23-Jul-10,12-Sep-12 +Thunder Bank,Sylvan Grove,KS,10506,The Bennington State Bank,23-Jul-10,13-Sep-12 +Williamsburg First National Bank,Kingstree,SC,17837,"First Citizens Bank and Trust Company, Inc.",23-Jul-10,5-Nov-12 +Crescent Bank and Trust Company,Jasper,GA,27559,Renasant Bank,23-Jul-10,5-Nov-12 +Sterling Bank,Lantana,FL,32536,IBERIABANK,23-Jul-10,5-Nov-12 +"Mainstreet Savings Bank, FSB",Hastings,MI,28136,Commercial Bank,16-Jul-10,13-Sep-12 +Olde Cypress Community Bank,Clewiston,FL,28864,"CenterState Bank of Florida, N.A.",16-Jul-10,5-Nov-12 +Turnberry Bank,Aventura,FL,32280,NAFH National Bank,16-Jul-10,5-Nov-12 +Metro Bank of Dade County,Miami,FL,25172,NAFH National Bank,16-Jul-10,5-Nov-12 +First National Bank of the South,Spartanburg,SC,35383,NAFH National Bank,16-Jul-10,5-Nov-12 +Woodlands Bank,Bluffton,SC,32571,Bank of the Ozarks,16-Jul-10,5-Nov-12 +Home National Bank,Blackwell,OK,11636,RCB Bank,9-Jul-10,10-Dec-12 +USA Bank,Port Chester,NY,58072,New Century Bank,9-Jul-10,14-Sep-12 +Ideal Federal Savings Bank,Baltimore,MD,32456,No Acquirer,9-Jul-10,14-Sep-12 +Bay National Bank,Baltimore,MD,35462,"Bay Bank, FSB",9-Jul-10,15-Jan-13 +High Desert State Bank,Albuquerque,NM,35279,First American Bank,25-Jun-10,14-Sep-12 +First National Bank,Savannah,GA,34152,"The Savannah Bank, N.A.",25-Jun-10,5-Nov-12 +Peninsula Bank,Englewood,FL,26563,"Premier American Bank, N.A.",25-Jun-10,5-Nov-12 +Nevada Security Bank,Reno,NV,57110,Umpqua Bank,18-Jun-10,23-Aug-12 +Washington First International Bank,Seattle,WA,32955,East West Bank,11-Jun-10,14-Sep-12 +TierOne Bank,Lincoln,NE,29341,Great Western Bank,4-Jun-10,14-Sep-12 +Arcola Homestead Savings Bank,Arcola,IL,31813,No Acquirer,4-Jun-10,14-Sep-12 +First National Bank,Rosedale,MS,15814,The Jefferson Bank,4-Jun-10,5-Nov-12 +Sun West Bank,Las Vegas,NV,34785,City National Bank,28-May-10,14-Sep-12 +"Granite Community Bank, NA",Granite Bay,CA,57315,Tri Counties Bank,28-May-10,14-Sep-12 +Bank of Florida - Tampa,Tampa,FL,57814,EverBank,28-May-10,5-Nov-12 +Bank of Florida - Southwest,Naples,FL,35106,EverBank,28-May-10,5-Nov-12 +Bank of Florida - Southeast,Fort Lauderdale,FL,57360,EverBank,28-May-10,5-Nov-12 +Pinehurst Bank,Saint Paul,MN,57735,Coulee Bank,21-May-10,26-Oct-12 +Midwest Bank and Trust Company,Elmwood Park,IL,18117,"FirstMerit Bank, N.A.",14-May-10,23-Aug-12 +Southwest Community Bank,Springfield,MO,34255,Simmons First National Bank,14-May-10,23-Aug-12 +New Liberty Bank,Plymouth,MI,35586,Bank of Ann Arbor,14-May-10,23-Aug-12 +Satilla Community Bank,Saint Marys,GA,35114,Ameris Bank,14-May-10,5-Nov-12 +1st Pacific Bank of California,San Diego,CA,35517,City National Bank,7-May-10,13-Dec-12 +Towne Bank of Arizona,Mesa,AZ,57697,Commerce Bank of Arizona,7-May-10,23-Aug-12 +Access Bank,Champlin,MN,16476,PrinsBank,7-May-10,23-Aug-12 +The Bank of Bonifay,Bonifay,FL,14246,First Federal Bank of Florida,7-May-10,5-Nov-12 +Frontier Bank,Everett,WA,22710,"Union Bank, N.A.",30-Apr-10,15-Jan-13 +BC National Banks,Butler,MO,17792,Community First Bank,30-Apr-10,23-Aug-12 +Champion Bank,Creve Coeur,MO,58362,BankLiberty,30-Apr-10,23-Aug-12 +CF Bancorp,Port Huron,MI,30005,First Michigan Bank,30-Apr-10,15-Jan-13 +Westernbank Puerto Rico,Mayaguez,PR,31027,Banco Popular de Puerto Rico,30-Apr-10,5-Nov-12 +R-G Premier Bank of Puerto Rico,Hato Rey,PR,32185,Scotiabank de Puerto Rico,30-Apr-10,5-Nov-12 +Eurobank,San Juan,PR,27150,Oriental Bank and Trust,30-Apr-10,5-Nov-12 +Wheatland Bank,Naperville,IL,58429,Wheaton Bank & Trust,23-Apr-10,23-Aug-12 +Peotone Bank and Trust Company,Peotone,IL,10888,First Midwest Bank,23-Apr-10,23-Aug-12 +Lincoln Park Savings Bank,Chicago,IL,30600,Northbrook Bank & Trust Company,23-Apr-10,23-Aug-12 +New Century Bank,Chicago,IL,34821,"MB Financial Bank, N.A.",23-Apr-10,23-Aug-12 +Citizens Bank and Trust Company of Chicago,Chicago,IL,34658,Republic Bank of Chicago,23-Apr-10,23-Aug-12 +Broadway Bank,Chicago,IL,22853,"MB Financial Bank, N.A.",23-Apr-10,23-Aug-12 +"Amcore Bank, National Association",Rockford,IL,3735,Harris N.A.,23-Apr-10,23-Aug-12 +City Bank,Lynnwood,WA,21521,Whidbey Island Bank,16-Apr-10,14-Sep-12 +Tamalpais Bank,San Rafael,CA,33493,"Union Bank, N.A.",16-Apr-10,23-Aug-12 +Innovative Bank,Oakland,CA,23876,Center Bank,16-Apr-10,23-Aug-12 +Butler Bank,Lowell,MA,26619,People's United Bank,16-Apr-10,23-Aug-12 +Riverside National Bank of Florida,Fort Pierce,FL,24067,"TD Bank, N.A.",16-Apr-10,5-Nov-12 +AmericanFirst Bank,Clermont,FL,57724,"TD Bank, N.A.",16-Apr-10,31-Oct-12 +First Federal Bank of North Florida,Palatka,FL,28886,"TD Bank, N.A.",16-Apr-10,15-Jan-13 +Lakeside Community Bank,Sterling Heights,MI,34878,No Acquirer,16-Apr-10,23-Aug-12 +Beach First National Bank,Myrtle Beach,SC,34242,Bank of North Carolina,9-Apr-10,5-Nov-12 +Desert Hills Bank,Phoenix,AZ,57060,New York Community Bank,26-Mar-10,23-Aug-12 +Unity National Bank,Cartersville,GA,34678,Bank of the Ozarks,26-Mar-10,14-Sep-12 +Key West Bank,Key West,FL,34684,Centennial Bank,26-Mar-10,23-Aug-12 +McIntosh Commercial Bank,Carrollton,GA,57399,CharterBank,26-Mar-10,23-Aug-12 +State Bank of Aurora,Aurora,MN,8221,Northern State Bank,19-Mar-10,23-Aug-12 +First Lowndes Bank,Fort Deposit,AL,24957,First Citizens Bank,19-Mar-10,23-Aug-12 +Bank of Hiawassee,Hiawassee,GA,10054,Citizens South Bank,19-Mar-10,23-Aug-12 +Appalachian Community Bank,Ellijay,GA,33989,Community & Southern Bank,19-Mar-10,31-Oct-12 +Advanta Bank Corp.,Draper,UT,33535,No Acquirer,19-Mar-10,14-Sep-12 +Century Security Bank,Duluth,GA,58104,Bank of Upson,19-Mar-10,23-Aug-12 +American National Bank,Parma,OH,18806,The National Bank and Trust Company,19-Mar-10,23-Aug-12 +Statewide Bank,Covington,LA,29561,Home Bank,12-Mar-10,23-Aug-12 +Old Southern Bank,Orlando,FL,58182,Centennial Bank,12-Mar-10,23-Aug-12 +The Park Avenue Bank,New York,NY,27096,Valley National Bank,12-Mar-10,23-Aug-12 +LibertyPointe Bank,New York,NY,58071,Valley National Bank,11-Mar-10,23-Aug-12 +Centennial Bank,Ogden,UT,34430,No Acquirer,5-Mar-10,14-Sep-12 +Waterfield Bank,Germantown,MD,34976,No Acquirer,5-Mar-10,23-Aug-12 +Bank of Illinois,Normal,IL,9268,Heartland Bank and Trust Company,5-Mar-10,23-Aug-12 +Sun American Bank,Boca Raton,FL,27126,First-Citizens Bank & Trust Company,5-Mar-10,23-Aug-12 +Rainier Pacific Bank,Tacoma,WA,38129,Umpqua Bank,26-Feb-10,23-Aug-12 +Carson River Community Bank,Carson City,NV,58352,Heritage Bank of Nevada,26-Feb-10,15-Jan-13 +"La Jolla Bank, FSB",La Jolla,CA,32423,"OneWest Bank, FSB",19-Feb-10,24-Aug-12 +George Washington Savings Bank,Orland Park,IL,29952,"FirstMerit Bank, N.A.",19-Feb-10,24-Aug-12 +The La Coste National Bank,La Coste,TX,3287,Community National Bank,19-Feb-10,14-Sep-12 +Marco Community Bank,Marco Island,FL,57586,Mutual of Omaha Bank,19-Feb-10,24-Aug-12 +1st American State Bank of Minnesota,Hancock,MN,15448,"Community Development Bank, FSB",5-Feb-10,24-Aug-12 +American Marine Bank,Bainbridge Island,WA,16730,Columbia State Bank,29-Jan-10,24-Aug-12 +First Regional Bank,Los Angeles,CA,23011,First-Citizens Bank & Trust Company,29-Jan-10,24-Aug-12 +Community Bank and Trust,Cornelia,GA,5702,SCBT National Association,29-Jan-10,15-Jan-13 +"Marshall Bank, N.A.",Hallock,MN,16133,United Valley Bank,29-Jan-10,23-Aug-12 +Florida Community Bank,Immokalee,FL,5672,"Premier American Bank, N.A.",29-Jan-10,15-Jan-13 +First National Bank of Georgia,Carrollton,GA,16480,Community & Southern Bank,29-Jan-10,13-Dec-12 +Columbia River Bank,The Dalles,OR,22469,Columbia State Bank,22-Jan-10,14-Sep-12 +Evergreen Bank,Seattle,WA,20501,Umpqua Bank,22-Jan-10,15-Jan-13 +Charter Bank,Santa Fe,NM,32498,Charter Bank,22-Jan-10,23-Aug-12 +Bank of Leeton,Leeton,MO,8265,"Sunflower Bank, N.A.",22-Jan-10,15-Jan-13 +Premier American Bank,Miami,FL,57147,"Premier American Bank, N.A.",22-Jan-10,13-Dec-12 +Barnes Banking Company,Kaysville,UT,1252,No Acquirer,15-Jan-10,23-Aug-12 +St. Stephen State Bank,St. Stephen,MN,17522,First State Bank of St. Joseph,15-Jan-10,23-Aug-12 +Town Community Bank & Trust,Antioch,IL,34705,First American Bank,15-Jan-10,23-Aug-12 +Horizon Bank,Bellingham,WA,22977,Washington Federal Savings and Loan Association,8-Jan-10,23-Aug-12 +"First Federal Bank of California, F.S.B.",Santa Monica,CA,28536,"OneWest Bank, FSB",18-Dec-09,23-Aug-12 +Imperial Capital Bank,La Jolla,CA,26348,City National Bank,18-Dec-09,5-Sep-12 +Independent Bankers' Bank,Springfield,IL,26820,The Independent BankersBank (TIB),18-Dec-09,23-Aug-12 +New South Federal Savings Bank,Irondale,AL,32276,Beal Bank,18-Dec-09,23-Aug-12 +Citizens State Bank,New Baltimore,MI,1006,No Acquirer,18-Dec-09,5-Nov-12 +Peoples First Community Bank,Panama City,FL,32167,Hancock Bank,18-Dec-09,5-Nov-12 +RockBridge Commercial Bank,Atlanta,GA,58315,No Acquirer,18-Dec-09,5-Nov-12 +SolutionsBank,Overland Park,KS,4731,Arvest Bank,11-Dec-09,23-Aug-12 +"Valley Capital Bank, N.A.",Mesa,AZ,58399,Enterprise Bank & Trust,11-Dec-09,23-Aug-12 +"Republic Federal Bank, N.A.",Miami,FL,22846,1st United Bank,11-Dec-09,5-Nov-12 +Greater Atlantic Bank,Reston,VA,32583,Sonabank,4-Dec-09,5-Nov-12 +Benchmark Bank,Aurora,IL,10440,"MB Financial Bank, N.A.",4-Dec-09,23-Aug-12 +AmTrust Bank,Cleveland,OH,29776,New York Community Bank,4-Dec-09,5-Nov-12 +The Tattnall Bank,Reidsville,GA,12080,Heritage Bank of the South,4-Dec-09,5-Nov-12 +First Security National Bank,Norcross,GA,26290,State Bank and Trust Company,4-Dec-09,5-Nov-12 +The Buckhead Community Bank,Atlanta,GA,34663,State Bank and Trust Company,4-Dec-09,5-Nov-12 +Commerce Bank of Southwest Florida,Fort Myers,FL,58016,Central Bank,20-Nov-09,5-Nov-12 +Pacific Coast National Bank,San Clemente,CA,57914,Sunwest Bank,13-Nov-09,22-Aug-12 +Orion Bank,Naples,FL,22427,IBERIABANK,13-Nov-09,5-Nov-12 +"Century Bank, F.S.B.",Sarasota,FL,32267,IBERIABANK,13-Nov-09,22-Aug-12 +United Commercial Bank,San Francisco,CA,32469,East West Bank,6-Nov-09,5-Nov-12 +Gateway Bank of St. Louis,St. Louis,MO,19450,Central Bank of Kansas City,6-Nov-09,22-Aug-12 +Prosperan Bank,Oakdale,MN,35074,"Alerus Financial, N.A.",6-Nov-09,22-Aug-12 +Home Federal Savings Bank,Detroit,MI,30329,Liberty Bank and Trust Company,6-Nov-09,22-Aug-12 +United Security Bank,Sparta,GA,22286,Ameris Bank,6-Nov-09,15-Jan-13 +North Houston Bank,Houston,TX,18776,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Madisonville State Bank,Madisonville,TX,33782,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Citizens National Bank,Teague,TX,25222,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Park National Bank,Chicago,IL,11677,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Pacific National Bank,San Francisco,CA,30006,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +California National Bank,Los Angeles,CA,34659,U.S. Bank N.A.,30-Oct-09,5-Sep-12 +San Diego National Bank,San Diego,CA,23594,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +Community Bank of Lemont,Lemont,IL,35291,U.S. Bank N.A.,30-Oct-09,15-Jan-13 +"Bank USA, N.A.",Phoenix,AZ,32218,U.S. Bank N.A.,30-Oct-09,22-Aug-12 +First DuPage Bank,Westmont,IL,35038,First Midwest Bank,23-Oct-09,22-Aug-12 +Riverview Community Bank,Otsego,MN,57525,Central Bank,23-Oct-09,22-Aug-12 +Bank of Elmwood,Racine,WI,18321,Tri City National Bank,23-Oct-09,22-Aug-12 +Flagship National Bank,Bradenton,FL,35044,First Federal Bank of Florida,23-Oct-09,22-Aug-12 +Hillcrest Bank Florida,Naples,FL,58336,Stonegate Bank,23-Oct-09,22-Aug-12 +American United Bank,Lawrenceville,GA,57794,Ameris Bank,23-Oct-09,5-Sep-12 +Partners Bank,Naples,FL,57959,Stonegate Bank,23-Oct-09,15-Jan-13 +San Joaquin Bank,Bakersfield,CA,23266,Citizens Business Bank,16-Oct-09,22-Aug-12 +Southern Colorado National Bank,Pueblo,CO,57263,Legacy Bank,2-Oct-09,5-Sep-12 +Jennings State Bank,Spring Grove,MN,11416,Central Bank,2-Oct-09,21-Aug-12 +Warren Bank,Warren,MI,34824,The Huntington National Bank,2-Oct-09,21-Aug-12 +Georgian Bank,Atlanta,GA,57151,"First Citizens Bank and Trust Company, Inc.",25-Sep-09,21-Aug-12 +"Irwin Union Bank, F.S.B.",Louisville,KY,57068,"First Financial Bank, N.A.",18-Sep-09,5-Sep-12 +Irwin Union Bank and Trust Company,Columbus,IN,10100,"First Financial Bank, N.A.",18-Sep-09,21-Aug-12 +Venture Bank,Lacey,WA,22868,First-Citizens Bank & Trust Company,11-Sep-09,21-Aug-12 +Brickwell Community Bank,Woodbury,MN,57736,CorTrust Bank N.A.,11-Sep-09,15-Jan-13 +"Corus Bank, N.A.",Chicago,IL,13693,"MB Financial Bank, N.A.",11-Sep-09,21-Aug-12 +First State Bank,Flagstaff,AZ,34875,Sunwest Bank,4-Sep-09,15-Jan-13 +Platinum Community Bank,Rolling Meadows,IL,35030,No Acquirer,4-Sep-09,21-Aug-12 +Vantus Bank,Sioux City,IN,27732,Great Southern Bank,4-Sep-09,21-Aug-12 +InBank,Oak Forest,IL,20203,"MB Financial Bank, N.A.",4-Sep-09,21-Aug-12 +First Bank of Kansas City,Kansas City,MO,25231,Great American Bank,4-Sep-09,21-Aug-12 +Affinity Bank,Ventura,CA,27197,Pacific Western Bank,28-Aug-09,21-Aug-12 +Mainstreet Bank,Forest Lake,MN,1909,Central Bank,28-Aug-09,21-Aug-12 +Bradford Bank,Baltimore,MD,28312,Manufacturers and Traders Trust Company (M&T Bank),28-Aug-09,15-Jan-13 +Guaranty Bank,Austin,TX,32618,BBVA Compass,21-Aug-09,21-Aug-12 +CapitalSouth Bank,Birmingham,AL,22130,IBERIABANK,21-Aug-09,15-Jan-13 +First Coweta Bank,Newnan,GA,57702,United Bank,21-Aug-09,15-Jan-13 +ebank,Atlanta,GA,34682,"Stearns Bank, N.A.",21-Aug-09,21-Aug-12 +Community Bank of Nevada,Las Vegas,NV,34043,No Acquirer,14-Aug-09,21-Aug-12 +Community Bank of Arizona,Phoenix,AZ,57645,MidFirst Bank,14-Aug-09,21-Aug-12 +"Union Bank, National Association",Gilbert,AZ,34485,MidFirst Bank,14-Aug-09,21-Aug-12 +Colonial Bank,Montgomery,AL,9609,"Branch Banking & Trust Company, (BB&T)",14-Aug-09,5-Sep-12 +Dwelling House Savings and Loan Association,Pittsburgh,PA,31559,"PNC Bank, N.A.",14-Aug-09,15-Jan-13 +Community First Bank,Prineville,OR,23268,Home Federal Bank,7-Aug-09,15-Jan-13 +Community National Bank of Sarasota County,Venice,FL,27183,"Stearns Bank, N.A.",7-Aug-09,20-Aug-12 +First State Bank,Sarasota,FL,27364,"Stearns Bank, N.A.",7-Aug-09,20-Aug-12 +Mutual Bank,Harvey,IL,18659,United Central Bank,31-Jul-09,20-Aug-12 +First BankAmericano,Elizabeth,NJ,34270,Crown Bank,31-Jul-09,20-Aug-12 +Peoples Community Bank,West Chester,OH,32288,"First Financial Bank, N.A.",31-Jul-09,20-Aug-12 +Integrity Bank,Jupiter,FL,57604,Stonegate Bank,31-Jul-09,20-Aug-12 +First State Bank of Altus,Altus,OK,9873,Herring Bank,31-Jul-09,20-Aug-12 +Security Bank of Jones County,Gray,GA,8486,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of Houston County,Perry,GA,27048,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of Bibb County,Macon,GA,27367,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of North Metro,Woodstock,GA,57105,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of North Fulton,Alpharetta,GA,57430,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Security Bank of Gwinnett County,Suwanee,GA,57346,State Bank and Trust Company,24-Jul-09,20-Aug-12 +Waterford Village Bank,Williamsville,NY,58065,"Evans Bank, N.A.",24-Jul-09,20-Aug-12 +Temecula Valley Bank,Temecula,CA,34341,First-Citizens Bank & Trust Company,17-Jul-09,20-Aug-12 +Vineyard Bank,Rancho Cucamonga,CA,23556,California Bank & Trust,17-Jul-09,20-Aug-12 +BankFirst,Sioux Falls,SD,34103,"Alerus Financial, N.A.",17-Jul-09,20-Aug-12 +First Piedmont Bank,Winder,GA,34594,First American Bank and Trust Company,17-Jul-09,15-Jan-13 +Bank of Wyoming,Thermopolis,WY,22754,Central Bank & Trust,10-Jul-09,20-Aug-12 +Founders Bank,Worth,IL,18390,The PrivateBank and Trust Company,2-Jul-09,20-Aug-12 +Millennium State Bank of Texas,Dallas,TX,57667,State Bank of Texas,2-Jul-09,26-Oct-12 +First National Bank of Danville,Danville,IL,3644,"First Financial Bank, N.A.",2-Jul-09,20-Aug-12 +Elizabeth State Bank,Elizabeth,IL,9262,Galena State Bank and Trust Company,2-Jul-09,20-Aug-12 +Rock River Bank,Oregon,IL,15302,The Harvard State Bank,2-Jul-09,20-Aug-12 +First State Bank of Winchester,Winchester,IL,11710,The First National Bank of Beardstown,2-Jul-09,20-Aug-12 +John Warner Bank,Clinton,IL,12093,State Bank of Lincoln,2-Jul-09,20-Aug-12 +Mirae Bank,Los Angeles,CA,57332,Wilshire State Bank,26-Jun-09,20-Aug-12 +MetroPacific Bank,Irvine,CA,57893,Sunwest Bank,26-Jun-09,20-Aug-12 +Horizon Bank,Pine City,MN,9744,"Stearns Bank, N.A.",26-Jun-09,20-Aug-12 +Neighborhood Community Bank,Newnan,GA,35285,CharterBank,26-Jun-09,20-Aug-12 +Community Bank of West Georgia,Villa Rica,GA,57436,No Acquirer,26-Jun-09,17-Aug-12 +First National Bank of Anthony,Anthony,KS,4614,Bank of Kansas,19-Jun-09,17-Aug-12 +Cooperative Bank,Wilmington,NC,27837,First Bank,19-Jun-09,17-Aug-12 +Southern Community Bank,Fayetteville,GA,35251,United Community Bank,19-Jun-09,17-Aug-12 +Bank of Lincolnwood,Lincolnwood,IL,17309,Republic Bank of Chicago,5-Jun-09,17-Aug-12 +Citizens National Bank,Macomb,IL,5757,Morton Community Bank,22-May-09,4-Sep-12 +Strategic Capital Bank,Champaign,IL,35175,Midland States Bank,22-May-09,4-Sep-12 +"BankUnited, FSB",Coral Gables,FL,32247,BankUnited,21-May-09,17-Aug-12 +Westsound Bank,Bremerton,WA,34843,Kitsap Bank,8-May-09,4-Sep-12 +America West Bank,Layton,UT,35461,Cache Valley Bank,1-May-09,17-Aug-12 +Citizens Community Bank,Ridgewood,NJ,57563,North Jersey Community Bank,1-May-09,4-Sep-12 +"Silverton Bank, NA",Atlanta,GA,26535,No Acquirer,1-May-09,17-Aug-12 +First Bank of Idaho,Ketchum,ID,34396,"U.S. Bank, N.A.",24-Apr-09,17-Aug-12 +First Bank of Beverly Hills,Calabasas,CA,32069,No Acquirer,24-Apr-09,4-Sep-12 +Michigan Heritage Bank,Farmington Hills,MI,34369,Level One Bank,24-Apr-09,17-Aug-12 +American Southern Bank,Kennesaw,GA,57943,Bank of North Georgia,24-Apr-09,17-Aug-12 +Great Basin Bank of Nevada,Elko,NV,33824,Nevada State Bank,17-Apr-09,4-Sep-12 +American Sterling Bank,Sugar Creek,MO,8266,Metcalf Bank,17-Apr-09,31-Aug-12 +New Frontier Bank,Greeley,CO,34881,No Acquirer,10-Apr-09,4-Sep-12 +Cape Fear Bank,Wilmington,NC,34639,First Federal Savings and Loan Association,10-Apr-09,17-Aug-12 +Omni National Bank,Atlanta,GA,22238,No Acquirer,27-Mar-09,17-Aug-12 +"TeamBank, NA",Paola,KS,4754,Great Southern Bank,20-Mar-09,17-Aug-12 +Colorado National Bank,Colorado Springs,CO,18896,Herring Bank,20-Mar-09,17-Aug-12 +FirstCity Bank,Stockbridge,GA,18243,No Acquirer,20-Mar-09,17-Aug-12 +Freedom Bank of Georgia,Commerce,GA,57558,Northeast Georgia Bank,6-Mar-09,17-Aug-12 +Security Savings Bank,Henderson,NV,34820,Bank of Nevada,27-Feb-09,7-Sep-12 +Heritage Community Bank,Glenwood,IL,20078,"MB Financial Bank, N.A.",27-Feb-09,17-Aug-12 +Silver Falls Bank,Silverton,OR,35399,Citizens Bank,20-Feb-09,17-Aug-12 +Pinnacle Bank of Oregon,Beaverton,OR,57342,Washington Trust Bank of Spokane,13-Feb-09,17-Aug-12 +Corn Belt Bank & Trust Co.,Pittsfield,IL,16500,The Carlinville National Bank,13-Feb-09,17-Aug-12 +Riverside Bank of the Gulf Coast,Cape Coral,FL,34563,TIB Bank,13-Feb-09,17-Aug-12 +Sherman County Bank,Loup City,NE,5431,Heritage Bank,13-Feb-09,17-Aug-12 +County Bank,Merced,CA,22574,Westamerica Bank,6-Feb-09,4-Sep-12 +Alliance Bank,Culver City,CA,23124,California Bank & Trust,6-Feb-09,16-Aug-12 +FirstBank Financial Services,McDonough,GA,57017,Regions Bank,6-Feb-09,16-Aug-12 +Ocala National Bank,Ocala,FL,26538,"CenterState Bank of Florida, N.A.",30-Jan-09,4-Sep-12 +Suburban FSB,Crofton,MD,30763,Bank of Essex,30-Jan-09,16-Aug-12 +MagnetBank,Salt Lake City,UT,58001,No Acquirer,30-Jan-09,16-Aug-12 +1st Centennial Bank,Redlands,CA,33025,First California Bank,23-Jan-09,16-Aug-12 +Bank of Clark County,Vancouver,WA,34959,Umpqua Bank,16-Jan-09,16-Aug-12 +National Bank of Commerce,Berkeley,IL,19733,Republic Bank of Chicago,16-Jan-09,16-Aug-12 +Sanderson State Bank,Sanderson,TX,11568,The Pecos County State Bank,12-Dec-08,4-Sep-12 +Haven Trust Bank,Duluth,GA,35379,"Branch Banking & Trust Company, (BB&T)",12-Dec-08,16-Aug-12 +First Georgia Community Bank,Jackson,GA,34301,United Bank,5-Dec-08,16-Aug-12 +PFF Bank & Trust,Pomona,CA,28344,"U.S. Bank, N.A.",21-Nov-08,4-Jan-13 +Downey Savings & Loan,Newport Beach,CA,30968,"U.S. Bank, N.A.",21-Nov-08,4-Jan-13 +Community Bank,Loganville,GA,16490,Bank of Essex,21-Nov-08,4-Sep-12 +Security Pacific Bank,Los Angeles,CA,23595,Pacific Western Bank,7-Nov-08,28-Aug-12 +"Franklin Bank, SSB",Houston,TX,26870,Prosperity Bank,7-Nov-08,16-Aug-12 +Freedom Bank,Bradenton,FL,57930,Fifth Third Bank,31-Oct-08,16-Aug-12 +Alpha Bank & Trust,Alpharetta,GA,58241,"Stearns Bank, N.A.",24-Oct-08,16-Aug-12 +Meridian Bank,Eldred,IL,13789,National Bank,10-Oct-08,31-May-12 +Main Street Bank,Northville,MI,57654,Monroe Bank & Trust,10-Oct-08,16-Aug-12 +Washington Mutual Bank,Henderson,NV,32633,JP Morgan Chase Bank,25-Sep-08,16-Aug-12 +Ameribank,Northfork,WV,6782,The Citizens Savings Bank,19-Sep-08,16-Aug-12 +Silver State Bank,Henderson,NV,34194,Nevada State Bank,5-Sep-08,16-Aug-12 +Integrity Bank,Alpharetta,GA,35469,Regions Bank,29-Aug-08,16-Aug-12 +Columbian Bank & Trust,Topeka,KS,22728,Citizens Bank & Trust,22-Aug-08,16-Aug-12 +First Priority Bank,Bradenton,FL,57523,SunTrust Bank,1-Aug-08,16-Aug-12 +"First Heritage Bank, NA",Newport Beach,CA,57961,Mutual of Omaha Bank,25-Jul-08,28-Aug-12 +First National Bank of Nevada,Reno,NV,27011,Mutual of Omaha Bank,25-Jul-08,28-Aug-12 +IndyMac Bank,Pasadena,CA,29730,"OneWest Bank, FSB",11-Jul-08,28-Aug-12 +"First Integrity Bank, NA",Staples,MN,12736,First International Bank and Trust,30-May-08,28-Aug-12 +"ANB Financial, NA",Bentonville,AR,33901,Pulaski Bank and Trust Company,9-May-08,28-Aug-12 +Hume Bank,Hume,MO,1971,Security Bank,7-Mar-08,28-Aug-12 +Douglass National Bank,Kansas City,MO,24660,Liberty Bank and Trust Company,25-Jan-08,26-Oct-12 +Miami Valley Bank,Lakeview,OH,16848,The Citizens Banking Company,4-Oct-07,28-Aug-12 +NetBank,Alpharetta,GA,32575,ING DIRECT,28-Sep-07,28-Aug-12 +Metropolitan Savings Bank,Pittsburgh,PA,35353,Allegheny Valley Bank of Pittsburgh,2-Feb-07,27-Oct-10 +Bank of Ephraim,Ephraim,UT,1249,Far West Bank,25-Jun-04,9-Apr-08 +Reliance Bank,White Plains,NY,26778,Union State Bank,19-Mar-04,9-Apr-08 +Guaranty National Bank of Tallahassee,Tallahassee,FL,26838,Hancock Bank of Florida,12-Mar-04,5-Jun-12 +Dollar Savings Bank,Newark,NJ,31330,No Acquirer,14-Feb-04,9-Apr-08 +Pulaski Savings Bank,Philadelphia,PA,27203,Earthstar Bank,14-Nov-03,22-Jul-05 +First National Bank of Blanchardville,Blanchardville,WI,11639,The Park Bank,9-May-03,5-Jun-12 +Southern Pacific Bank,Torrance,CA,27094,Beal Bank,7-Feb-03,20-Oct-08 +Farmers Bank of Cheneyville,Cheneyville,LA,16445,Sabine State Bank & Trust,17-Dec-02,20-Oct-04 +Bank of Alamo,Alamo,TN,9961,No Acquirer,8-Nov-02,18-Mar-05 +AmTrade International Bank,Atlanta,GA,33784,No Acquirer,30-Sep-02,11-Sep-06 +Universal Federal Savings Bank,Chicago,IL,29355,Chicago Community Bank,27-Jun-02,9-Apr-08 +Connecticut Bank of Commerce,Stamford,CT,19183,Hudson United Bank,26-Jun-02,14-Feb-12 +New Century Bank,Shelby Township,MI,34979,No Acquirer,28-Mar-02,18-Mar-05 +Net 1st National Bank,Boca Raton,FL,26652,Bank Leumi USA,1-Mar-02,9-Apr-08 +"NextBank, NA",Phoenix,AZ,22314,No Acquirer,7-Feb-02,27-Aug-10 +Oakwood Deposit Bank Co.,Oakwood,OH,8966,The State Bank & Trust Company,1-Feb-02,25-Oct-12 +Bank of Sierra Blanca,Sierra Blanca,TX,22002,The Security State Bank of Pecos,18-Jan-02,6-Nov-03 +"Hamilton Bank, NA",Miami,FL,24382,Israel Discount Bank of New York,11-Jan-02,5-Jun-12 +Sinclair National Bank,Gravette,AR,34248,Delta Trust & Bank,7-Sep-01,10-Feb-04 +"Superior Bank, FSB",Hinsdale,IL,32646,"Superior Federal, FSB",27-Jul-01,5-Jun-12 +Malta National Bank,Malta,OH,6629,North Valley Bank,3-May-01,18-Nov-02 +First Alliance Bank & Trust Co.,Manchester,NH,34264,Southern New Hampshire Bank & Trust,2-Feb-01,18-Feb-03 +National State Bank of Metropolis,Metropolis,IL,3815,Banterra Bank of Marion,14-Dec-00,17-Mar-05 +Bank of Honolulu,Honolulu,HI,21029,Bank of the Orient,13-Oct-00,17-Mar-05 diff --git a/pandas/tests/io/data/csv/iris.csv b/pandas/tests/io/data/csv/iris.csv new file mode 100644 index 00000000..c19b9c36 --- /dev/null +++ b/pandas/tests/io/data/csv/iris.csv @@ -0,0 +1,151 @@ +SepalLength,SepalWidth,PetalLength,PetalWidth,Name +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica \ No newline at end of file diff --git a/pandas/tests/io/data/csv/test1.csv b/pandas/tests/io/data/csv/test1.csv new file mode 100644 index 00000000..4bdb6294 --- /dev/null +++ b/pandas/tests/io/data/csv/test1.csv @@ -0,0 +1,8 @@ +index,A,B,C,D +2000-01-03 00:00:00,0.980268513777,3.68573087906,-0.364216805298,-1.15973806169 +2000-01-04 00:00:00,1.04791624281,-0.0412318367011,-0.16181208307,0.212549316967 +2000-01-05 00:00:00,0.498580885705,0.731167677815,-0.537677223318,1.34627041952 +2000-01-06 00:00:00,1.12020151869,1.56762092543,0.00364077397681,0.67525259227 +2000-01-07 00:00:00,-0.487094399463,0.571454623474,-1.6116394093,0.103468562917 +2000-01-10 00:00:00,0.836648671666,0.246461918642,0.588542635376,1.0627820613 +2000-01-11 00:00:00,-0.157160753327,1.34030689438,1.19577795622,-1.09700699751 \ No newline at end of file diff --git a/pandas/tests/io/data/csv/test_mmap.csv b/pandas/tests/io/data/csv/test_mmap.csv new file mode 100644 index 00000000..cc2cd7c3 --- /dev/null +++ b/pandas/tests/io/data/csv/test_mmap.csv @@ -0,0 +1,5 @@ +a,b,c +1,one,I +2,two,II + +3,three,III diff --git a/pandas/tests/io/data/csv/tips.csv b/pandas/tests/io/data/csv/tips.csv new file mode 100644 index 00000000..856a65a6 --- /dev/null +++ b/pandas/tests/io/data/csv/tips.csv @@ -0,0 +1,245 @@ +total_bill,tip,sex,smoker,day,time,size +16.99,1.01,Female,No,Sun,Dinner,2 +10.34,1.66,Male,No,Sun,Dinner,3 +21.01,3.5,Male,No,Sun,Dinner,3 +23.68,3.31,Male,No,Sun,Dinner,2 +24.59,3.61,Female,No,Sun,Dinner,4 +25.29,4.71,Male,No,Sun,Dinner,4 +8.77,2.0,Male,No,Sun,Dinner,2 +26.88,3.12,Male,No,Sun,Dinner,4 +15.04,1.96,Male,No,Sun,Dinner,2 +14.78,3.23,Male,No,Sun,Dinner,2 +10.27,1.71,Male,No,Sun,Dinner,2 +35.26,5.0,Female,No,Sun,Dinner,4 +15.42,1.57,Male,No,Sun,Dinner,2 +18.43,3.0,Male,No,Sun,Dinner,4 +14.83,3.02,Female,No,Sun,Dinner,2 +21.58,3.92,Male,No,Sun,Dinner,2 +10.33,1.67,Female,No,Sun,Dinner,3 +16.29,3.71,Male,No,Sun,Dinner,3 +16.97,3.5,Female,No,Sun,Dinner,3 +20.65,3.35,Male,No,Sat,Dinner,3 +17.92,4.08,Male,No,Sat,Dinner,2 +20.29,2.75,Female,No,Sat,Dinner,2 +15.77,2.23,Female,No,Sat,Dinner,2 +39.42,7.58,Male,No,Sat,Dinner,4 +19.82,3.18,Male,No,Sat,Dinner,2 +17.81,2.34,Male,No,Sat,Dinner,4 +13.37,2.0,Male,No,Sat,Dinner,2 +12.69,2.0,Male,No,Sat,Dinner,2 +21.7,4.3,Male,No,Sat,Dinner,2 +19.65,3.0,Female,No,Sat,Dinner,2 +9.55,1.45,Male,No,Sat,Dinner,2 +18.35,2.5,Male,No,Sat,Dinner,4 +15.06,3.0,Female,No,Sat,Dinner,2 +20.69,2.45,Female,No,Sat,Dinner,4 +17.78,3.27,Male,No,Sat,Dinner,2 +24.06,3.6,Male,No,Sat,Dinner,3 +16.31,2.0,Male,No,Sat,Dinner,3 +16.93,3.07,Female,No,Sat,Dinner,3 +18.69,2.31,Male,No,Sat,Dinner,3 +31.27,5.0,Male,No,Sat,Dinner,3 +16.04,2.24,Male,No,Sat,Dinner,3 +17.46,2.54,Male,No,Sun,Dinner,2 +13.94,3.06,Male,No,Sun,Dinner,2 +9.68,1.32,Male,No,Sun,Dinner,2 +30.4,5.6,Male,No,Sun,Dinner,4 +18.29,3.0,Male,No,Sun,Dinner,2 +22.23,5.0,Male,No,Sun,Dinner,2 +32.4,6.0,Male,No,Sun,Dinner,4 +28.55,2.05,Male,No,Sun,Dinner,3 +18.04,3.0,Male,No,Sun,Dinner,2 +12.54,2.5,Male,No,Sun,Dinner,2 +10.29,2.6,Female,No,Sun,Dinner,2 +34.81,5.2,Female,No,Sun,Dinner,4 +9.94,1.56,Male,No,Sun,Dinner,2 +25.56,4.34,Male,No,Sun,Dinner,4 +19.49,3.51,Male,No,Sun,Dinner,2 +38.01,3.0,Male,Yes,Sat,Dinner,4 +26.41,1.5,Female,No,Sat,Dinner,2 +11.24,1.76,Male,Yes,Sat,Dinner,2 +48.27,6.73,Male,No,Sat,Dinner,4 +20.29,3.21,Male,Yes,Sat,Dinner,2 +13.81,2.0,Male,Yes,Sat,Dinner,2 +11.02,1.98,Male,Yes,Sat,Dinner,2 +18.29,3.76,Male,Yes,Sat,Dinner,4 +17.59,2.64,Male,No,Sat,Dinner,3 +20.08,3.15,Male,No,Sat,Dinner,3 +16.45,2.47,Female,No,Sat,Dinner,2 +3.07,1.0,Female,Yes,Sat,Dinner,1 +20.23,2.01,Male,No,Sat,Dinner,2 +15.01,2.09,Male,Yes,Sat,Dinner,2 +12.02,1.97,Male,No,Sat,Dinner,2 +17.07,3.0,Female,No,Sat,Dinner,3 +26.86,3.14,Female,Yes,Sat,Dinner,2 +25.28,5.0,Female,Yes,Sat,Dinner,2 +14.73,2.2,Female,No,Sat,Dinner,2 +10.51,1.25,Male,No,Sat,Dinner,2 +17.92,3.08,Male,Yes,Sat,Dinner,2 +27.2,4.0,Male,No,Thur,Lunch,4 +22.76,3.0,Male,No,Thur,Lunch,2 +17.29,2.71,Male,No,Thur,Lunch,2 +19.44,3.0,Male,Yes,Thur,Lunch,2 +16.66,3.4,Male,No,Thur,Lunch,2 +10.07,1.83,Female,No,Thur,Lunch,1 +32.68,5.0,Male,Yes,Thur,Lunch,2 +15.98,2.03,Male,No,Thur,Lunch,2 +34.83,5.17,Female,No,Thur,Lunch,4 +13.03,2.0,Male,No,Thur,Lunch,2 +18.28,4.0,Male,No,Thur,Lunch,2 +24.71,5.85,Male,No,Thur,Lunch,2 +21.16,3.0,Male,No,Thur,Lunch,2 +28.97,3.0,Male,Yes,Fri,Dinner,2 +22.49,3.5,Male,No,Fri,Dinner,2 +5.75,1.0,Female,Yes,Fri,Dinner,2 +16.32,4.3,Female,Yes,Fri,Dinner,2 +22.75,3.25,Female,No,Fri,Dinner,2 +40.17,4.73,Male,Yes,Fri,Dinner,4 +27.28,4.0,Male,Yes,Fri,Dinner,2 +12.03,1.5,Male,Yes,Fri,Dinner,2 +21.01,3.0,Male,Yes,Fri,Dinner,2 +12.46,1.5,Male,No,Fri,Dinner,2 +11.35,2.5,Female,Yes,Fri,Dinner,2 +15.38,3.0,Female,Yes,Fri,Dinner,2 +44.3,2.5,Female,Yes,Sat,Dinner,3 +22.42,3.48,Female,Yes,Sat,Dinner,2 +20.92,4.08,Female,No,Sat,Dinner,2 +15.36,1.64,Male,Yes,Sat,Dinner,2 +20.49,4.06,Male,Yes,Sat,Dinner,2 +25.21,4.29,Male,Yes,Sat,Dinner,2 +18.24,3.76,Male,No,Sat,Dinner,2 +14.31,4.0,Female,Yes,Sat,Dinner,2 +14.0,3.0,Male,No,Sat,Dinner,2 +7.25,1.0,Female,No,Sat,Dinner,1 +38.07,4.0,Male,No,Sun,Dinner,3 +23.95,2.55,Male,No,Sun,Dinner,2 +25.71,4.0,Female,No,Sun,Dinner,3 +17.31,3.5,Female,No,Sun,Dinner,2 +29.93,5.07,Male,No,Sun,Dinner,4 +10.65,1.5,Female,No,Thur,Lunch,2 +12.43,1.8,Female,No,Thur,Lunch,2 +24.08,2.92,Female,No,Thur,Lunch,4 +11.69,2.31,Male,No,Thur,Lunch,2 +13.42,1.68,Female,No,Thur,Lunch,2 +14.26,2.5,Male,No,Thur,Lunch,2 +15.95,2.0,Male,No,Thur,Lunch,2 +12.48,2.52,Female,No,Thur,Lunch,2 +29.8,4.2,Female,No,Thur,Lunch,6 +8.52,1.48,Male,No,Thur,Lunch,2 +14.52,2.0,Female,No,Thur,Lunch,2 +11.38,2.0,Female,No,Thur,Lunch,2 +22.82,2.18,Male,No,Thur,Lunch,3 +19.08,1.5,Male,No,Thur,Lunch,2 +20.27,2.83,Female,No,Thur,Lunch,2 +11.17,1.5,Female,No,Thur,Lunch,2 +12.26,2.0,Female,No,Thur,Lunch,2 +18.26,3.25,Female,No,Thur,Lunch,2 +8.51,1.25,Female,No,Thur,Lunch,2 +10.33,2.0,Female,No,Thur,Lunch,2 +14.15,2.0,Female,No,Thur,Lunch,2 +16.0,2.0,Male,Yes,Thur,Lunch,2 +13.16,2.75,Female,No,Thur,Lunch,2 +17.47,3.5,Female,No,Thur,Lunch,2 +34.3,6.7,Male,No,Thur,Lunch,6 +41.19,5.0,Male,No,Thur,Lunch,5 +27.05,5.0,Female,No,Thur,Lunch,6 +16.43,2.3,Female,No,Thur,Lunch,2 +8.35,1.5,Female,No,Thur,Lunch,2 +18.64,1.36,Female,No,Thur,Lunch,3 +11.87,1.63,Female,No,Thur,Lunch,2 +9.78,1.73,Male,No,Thur,Lunch,2 +7.51,2.0,Male,No,Thur,Lunch,2 +14.07,2.5,Male,No,Sun,Dinner,2 +13.13,2.0,Male,No,Sun,Dinner,2 +17.26,2.74,Male,No,Sun,Dinner,3 +24.55,2.0,Male,No,Sun,Dinner,4 +19.77,2.0,Male,No,Sun,Dinner,4 +29.85,5.14,Female,No,Sun,Dinner,5 +48.17,5.0,Male,No,Sun,Dinner,6 +25.0,3.75,Female,No,Sun,Dinner,4 +13.39,2.61,Female,No,Sun,Dinner,2 +16.49,2.0,Male,No,Sun,Dinner,4 +21.5,3.5,Male,No,Sun,Dinner,4 +12.66,2.5,Male,No,Sun,Dinner,2 +16.21,2.0,Female,No,Sun,Dinner,3 +13.81,2.0,Male,No,Sun,Dinner,2 +17.51,3.0,Female,Yes,Sun,Dinner,2 +24.52,3.48,Male,No,Sun,Dinner,3 +20.76,2.24,Male,No,Sun,Dinner,2 +31.71,4.5,Male,No,Sun,Dinner,4 +10.59,1.61,Female,Yes,Sat,Dinner,2 +10.63,2.0,Female,Yes,Sat,Dinner,2 +50.81,10.0,Male,Yes,Sat,Dinner,3 +15.81,3.16,Male,Yes,Sat,Dinner,2 +7.25,5.15,Male,Yes,Sun,Dinner,2 +31.85,3.18,Male,Yes,Sun,Dinner,2 +16.82,4.0,Male,Yes,Sun,Dinner,2 +32.9,3.11,Male,Yes,Sun,Dinner,2 +17.89,2.0,Male,Yes,Sun,Dinner,2 +14.48,2.0,Male,Yes,Sun,Dinner,2 +9.6,4.0,Female,Yes,Sun,Dinner,2 +34.63,3.55,Male,Yes,Sun,Dinner,2 +34.65,3.68,Male,Yes,Sun,Dinner,4 +23.33,5.65,Male,Yes,Sun,Dinner,2 +45.35,3.5,Male,Yes,Sun,Dinner,3 +23.17,6.5,Male,Yes,Sun,Dinner,4 +40.55,3.0,Male,Yes,Sun,Dinner,2 +20.69,5.0,Male,No,Sun,Dinner,5 +20.9,3.5,Female,Yes,Sun,Dinner,3 +30.46,2.0,Male,Yes,Sun,Dinner,5 +18.15,3.5,Female,Yes,Sun,Dinner,3 +23.1,4.0,Male,Yes,Sun,Dinner,3 +15.69,1.5,Male,Yes,Sun,Dinner,2 +19.81,4.19,Female,Yes,Thur,Lunch,2 +28.44,2.56,Male,Yes,Thur,Lunch,2 +15.48,2.02,Male,Yes,Thur,Lunch,2 +16.58,4.0,Male,Yes,Thur,Lunch,2 +7.56,1.44,Male,No,Thur,Lunch,2 +10.34,2.0,Male,Yes,Thur,Lunch,2 +43.11,5.0,Female,Yes,Thur,Lunch,4 +13.0,2.0,Female,Yes,Thur,Lunch,2 +13.51,2.0,Male,Yes,Thur,Lunch,2 +18.71,4.0,Male,Yes,Thur,Lunch,3 +12.74,2.01,Female,Yes,Thur,Lunch,2 +13.0,2.0,Female,Yes,Thur,Lunch,2 +16.4,2.5,Female,Yes,Thur,Lunch,2 +20.53,4.0,Male,Yes,Thur,Lunch,4 +16.47,3.23,Female,Yes,Thur,Lunch,3 +26.59,3.41,Male,Yes,Sat,Dinner,3 +38.73,3.0,Male,Yes,Sat,Dinner,4 +24.27,2.03,Male,Yes,Sat,Dinner,2 +12.76,2.23,Female,Yes,Sat,Dinner,2 +30.06,2.0,Male,Yes,Sat,Dinner,3 +25.89,5.16,Male,Yes,Sat,Dinner,4 +48.33,9.0,Male,No,Sat,Dinner,4 +13.27,2.5,Female,Yes,Sat,Dinner,2 +28.17,6.5,Female,Yes,Sat,Dinner,3 +12.9,1.1,Female,Yes,Sat,Dinner,2 +28.15,3.0,Male,Yes,Sat,Dinner,5 +11.59,1.5,Male,Yes,Sat,Dinner,2 +7.74,1.44,Male,Yes,Sat,Dinner,2 +30.14,3.09,Female,Yes,Sat,Dinner,4 +12.16,2.2,Male,Yes,Fri,Lunch,2 +13.42,3.48,Female,Yes,Fri,Lunch,2 +8.58,1.92,Male,Yes,Fri,Lunch,1 +15.98,3.0,Female,No,Fri,Lunch,3 +13.42,1.58,Male,Yes,Fri,Lunch,2 +16.27,2.5,Female,Yes,Fri,Lunch,2 +10.09,2.0,Female,Yes,Fri,Lunch,2 +20.45,3.0,Male,No,Sat,Dinner,4 +13.28,2.72,Male,No,Sat,Dinner,2 +22.12,2.88,Female,Yes,Sat,Dinner,2 +24.01,2.0,Male,Yes,Sat,Dinner,4 +15.69,3.0,Male,Yes,Sat,Dinner,3 +11.61,3.39,Male,No,Sat,Dinner,2 +10.77,1.47,Male,No,Sat,Dinner,2 +15.53,3.0,Male,Yes,Sat,Dinner,2 +10.07,1.25,Male,No,Sat,Dinner,2 +12.6,1.0,Male,Yes,Sat,Dinner,2 +32.83,1.17,Male,Yes,Sat,Dinner,2 +35.83,4.67,Female,No,Sat,Dinner,3 +29.03,5.92,Male,No,Sat,Dinner,3 +27.18,2.0,Female,Yes,Sat,Dinner,2 +22.67,2.0,Male,Yes,Sat,Dinner,2 +17.82,1.75,Male,No,Sat,Dinner,2 +18.78,3.0,Female,No,Thur,Dinner,2 diff --git a/pandas/tests/io/data/excel/blank.ods b/pandas/tests/io/data/excel/blank.ods new file mode 100644 index 0000000000000000000000000000000000000000..7ded3c3c1d688242b6af3b2bdd3afe617a23c33c GIT binary patch literal 2813 zcmZ{m2Q*yU8plUmJzCV!LR_6ON*G26!YCo5n-B~pj3^T$dW#x;)Cf09Mv2iv^pa$V z62Vn+wP8j=gdlioZW7+eU2o;RynFXKd!4iQS^saZb=LR$zCYZQ_7`TrUqcKK@2!yx zqApG+BX#VdZg+f8z5yUC#vg@r z$9ker0dQ(eZUFuF2x5T6=%h6iEdZcQJ?8WzY(R)N3JVJM@qTIh%9jM@=;mA1VLTg} zBZ1NtBKor_gl<%Q0~9=RSL2q!S$iL@L+6`*H7|9*-g}D>@gR4Jb%$vOX@!)VsL04# zd;90Wb=^mlButVnPL{tArc;qYT{9S#@gSB%=LEI(=Jw!&@|3Zsh(aH?4<4OrQ<9yFA%V5q zXrIP3y1Wl-X^w^AQ(8XmChpU2z0n&Q!w*z;EKBi95f4z6lW1%Q$>@)peUp<`D_zoO z36gG(I?|sWd1YCcZ^G}`dMC^GCznFu%`H6wf?w0me`6+GuViNYR&r^!rVqzD=AV)! zn)zVZZ(LipEwhA7&s8v$gXyMxCcsn%&*yA%JC@exKaF*L!d34B)YI%&%$6c;HDbt& zoSCr;G(EaSV-rzFbk~Ir#1m!tbd8Uulroi(o~Aa}pXLEsJ5x5sQ!p7osQ&X80)H--H80n&d7AsD!4i8-Y_Q~8_Fv*d zV`?+c;V6N03c~1`m187gh48Wd2|zkKt3H?8>$48(Pfn1R;}q8$waK2c=EHZ)g!M(; zl;Mml7Q>V#oZ~d#lDm{m!aBANc8wfNq62L@o^cupU@xeH$F^5wbA-d(Ts^molS(%c zBUU88j+6}huMNEFn~GG_PEKe6{|h6_u2UU|3ZpR#06@I~ZhpQ2r}R>3+^{vvn4{8Y z_c7{ndm+aG-t?D^Qk#tKrf)R;MbXXa!n165qbLo+PnqN`%bbGPJ6*NT`;ogIpuU9? z-fuAWGuNsil$_mF;8(Gw2ZbA3#l_1Kg$Ni=rKQ}AyT_B+^@H{M&2_TGlI4Qti_)-s zTeg!?aB*Wf9-2h1wJ5K!ov|Dj7#lWOh&IuE|DyeDGhS?^NPsL3d`5KF z0n5Y?{t?#A3fqHcM!Vpy39Hy^wpk|P>v!}YUT#c8n_E6^H4DVl$?SHmcZ3{*r*9VX zRsSK&UXRd`9tB03-E!Z4C(pnXNl;Tif&Ks~dd@3K$OK@^`y?SOP6KJ?-DRhY1vSbN~ zjC$3&-c=uDoT+>mIH!4IX@;MvN>QS>(n5nio}%OY{859sy+Y6Bp}h(Ck~+AJ^SNi? z27`4|E-2&(kxGtIP<{#Iih|ovstG6uxiXO90X!yCmEMj7p$W8I^Y@5;mMLsKCn`k2 z^z=LmlWKz=rR=uZgY5dSUqAPIvCE$qHSP077KDM*KGlIAp%}pxhyAqozR_eD?7VTD z@lxPefLK@=Cuom0;TF)o%APQt6}54zrHN=U)&+f?*Bvr-9YDCSA>1D+XlxP3>FQmt z((^V#ta$};27QmT;9po4D)-8%8and(eVYc>^js2cG^x%FZ76X|A{im)O8lH0m{)`| zVi~%SF0S|Zag&itc++K8C`W=Qksxv~+4>uQCMqp9LMj#g!N^9D+oYxHI^|Mu?A@$#P6pC_-^dZjAW1ux#6 z3}AuTfq+ZGSBodq?Hw1ln}fcv{~cC|L+m>8!zFJee4*6Zlk4vS=eT<^a$$rAqIhCT=!3uWgRZ3bZgPg&Om2MTA5_ z?$?+@u!QCP(Js$;Zm+9*Yx@IVNZSM+7I>T^Q@2y*J~BLVj;};jEQQ4Uhw;nSsdbd3 z-CZtdOw7iwZF4wRR#V+`hgiyug(YEVd8)p1pZf2$^bMf*Z>ONJm~D%Csmqw|{>kA~?FI+yh&)?mjf%_T2ljl#an z*Yf{;h50frigTg?01~Oc;=6@F1t3q$OKR4Lc`ukFZp)}ke_17IDX$_ew@i-_#CW?T z`sGMtYi+z6tYxyv?^;RJ{}G-=du-3yHonu_}})ZAjzk8~3JDD8jp_mxuW` zkLzHR>hkI@Y_%z*!eQ^$ELxs~X$jBsd5eg#0yHeqHr=bb3TduMKrp3;nw!yu>aCU+ z9N-g8JocSCCY>5%K6q|L7%0_wRbOxmvd5fd?obex;U&)_7f5kyH@Ej#D+e zO)($_e=xYBVtsWwXfmaSf@Bbo-|gqE&vIBAo_fB|iOjlk8w1i7){y=>$X(B~>+`W# z|41w;8#1z;JL>{!-pi`OIjq9Ewn_>4JV8Vjp)!%GTQ-uAhxZh#thX`e5QY8iko?1} zz)U!py-!{;1#GU@IFHkiP`ZKJ8b7Ijf-Sz+Bz|)(;n%%LW4`LV{0^W9mr!GY)1=hu ziP;Xdi2qgPY0fBow^!8c1N^^{JKg;;{$uOlrl+cZwrW2V*3{P@#_ea_&t2t*?k%;~ z{8x|pS@<(IKZFp5f1v2+5I^VJj}ZFQ^rPC>@7V`8rK3OnLPxzsso6Qebh`RCg-*K5 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank.xls b/pandas/tests/io/data/excel/blank.xls new file mode 100644 index 0000000000000000000000000000000000000000..952c76f045e8af5f100b8c2690ecf8d604770a64 GIT binary patch literal 23040 zcmeHPeQ;FO6+dsYn`{Ul2_NB8cmx7T2oZJxQ9hOrMI4ro5gc^H!6n&*3?X5bjSVw0 z#Mb^(snvwyRMJ|jPPJOU6+5*BoR0R7wRKt;D`hNpsAI=YJJNPq9d-LV_wC!g@4k=h zR_&Dbz07_4?s@nA&bjBFd)~e0-Iq6hS#$ouXQsSE%CVBl=+kT^l}mID*YnJLHQ|0X z%UScc3D+R`$0ekJ$}l>zJVYIl*DFr-YgCR7e}@9}bnq0;M88McLHlS7(bnC`eftur zwq|`p^0v-y{R=(T2pJL@%Wx8gw?Zo?4YV>E!nuYf3+cv+^Eh#SN|b$BoLA6TI53k*NO9JoHaTw>YNpng*2Vcp?sL`VRH}A*$QnfC8&!!X&0q% z{%`=nmmf)}@&Ot{i7b^y9tY%n^jUTqe@?lG?9HxXu zF-0G$q#7+6rUapcckgRdIV{GAVK4$TahMVstkBxGYFh|$xJf!aIE_N!L3Ok1W;M^6 zwP0uCvK=GQ^)oeisYf=dfE;ty5jdQG}qsY$ogh*2|LWo2fx zz{*dECb5jg3QU*anYU14YSz#5H7g6&g_`GSJVpi4-?O^-HBKjWrn74i^uL+6yy-l^ zoyxn>8P%!$|1w=8ottMI0Yjf5sjXKXjr0z=imc2{V5mv6E5^` zyU_1)p}WcBhX0(vnPAh;cUGTyE;-+b6?ecRTMlb%Gc!S?d>OKoR~quwN6uqCPWl;d z(obWGM=SGs7;Dm{cHJHxPq(vz^2bW**l;#;Fg%|QTIgILK%?O2%W%cMo*$BUyflr* z6FFe7z(q=`U`+T*;J|{E#35K}1$L<=ao|2FsRA2qNfo$LN~*wSUQz`fUnNywhf-1n z9vLN7;GtJi1s+Q!Rbb~=QU#6~lvJU))C%)UtUPb4;rh0^yuH;elz)hjrZ!f{FF;AlaBVNVFJ`NYo0VI>AK%DRjE6Id;((QLUA%bFCR8jI zvk4{BOre;TlZi~BWSU(ltmq`6K04^4ZYQB+t$a33siScTgfd1WYu_f6$t>Hi)oW8QJlTw~?`TvI*kuw|HV)0I^BOIxeGRyDXhulG zka22`>ipHwGpiU7!erR%S;mX0B6Q*ng80|)Aep!~2;yH72r|w(@dlY)3!Assv>ma4KlSD zkWAbgWTpiK8E2iyAcKLiRE@_MMHbZS^YzuOt;z26!h!Hc8erAs4lOu6I||YD1yoNu z%|}{It@uk)H>Ej>4Fza4k}879_H*xkkPY!QKW#7zsOQ)Ov#C+pMC@$pJlGg(5Wyyt zY`=K!x6W+Fs%)ZmHgi1K7^@V)raamH{s(V6vl*wdS!8F^&c;$d;Cg|K`HW&TJ;D zY#c0Yo(CJ*(lYVP$+OOErl@QjEbVd+HnOE<;>X{9&za3sm5qa?MLpQamX?WseCBy) zHq)5R5^qb>!DdMjmKKSA^SL(-Htf)FfZ_W~yu) zENzho8`;t#(T`4i!->sEyq?CqEC1YFtvoj`%T~BPD~UGW@e^mzSp`s7lnOdi0hO%? zuRgYHbOxPW0EPXip!Eu!N3^os=(WbwY!t8d-S!* zuC9UbAw{l<*%6RyHztFvvHDrx*LRQey7gN(b^aTKX$cqeZDvksWEPVdXV}F2@Xv13?Zz=i4AoAMAmHS7z^XXXnhT5^Y0x=GvYf80s=B zt~Mnbq-&wH9>r8SB5q=VnTa(T^L?x!)+ZBfo!uQe)(VQCW!2E^dI-d}-w8oGAfT~E z5GH^DGvm601{|AW1tc>Z;b34i78w#ae4h$l;{a~>KNs+9?E5Ep4$t6G85}-P1t04G zZumhLaARr^5a5esaQH&SA(V}C0B3(_^jL+gmchUXTFhar>$>;#!W4R~T^1OR_FM|I~#XGzAB-?W1e}f{-B>1Jc=;%G@^frqJD9c-8 z-WQqPheO=uP~1qm0`k+QWN$i^fO}CKvo;Nmc@u!Sr774HI~HR_Bm5~|mYD4w!W)&G z!Na8b3BxgJl~p6K+yqNc2?xPYIqhq>EG$+lu`dx2n|jj8f$%OXMqmWS3uB}q6vrA( z&&z>o)1`5cx!`Dxoaxexoaxex++d)JuH+cq=H9gE@h>S%Cjff0(1E?rWtYz$($eQl zC`e-DOi7HKDT$F|hj~?h2)dhX6(JNtM{GfxQ~8BItBg%`8Jp_1&U6>Fwa{9tYz(?* z8>}OZj$?ht(_}d1MU81#HA@Tun zwGnM+XV?q7i$IH=bepiCLJ1y09e9dzIAsalDz*-P6|H)aUn#>fuuD3bUdWW?{6^EH zM40jldsmYZy+1sO?w8~;OxxHn@({LVKP27bdWT`^8vdTtZpOgE`(;gAyCv1rkxJ~< z*L8L!_v;OPOP4g3ZQT?{zB|#~+L`F$Twm^dedn%SJ>4yRU5UN0U_(&zB4S=dZ{iV! z!}!&5zX7lf@^}tTMCg&l;Nz4QMj|@1G(aDVkfb2vtx^EF)^IJ7LE&?4?lM3}(Y&M5 zoi%^Z=oo0nBpP!#4BBCd#sIwM!q~N5ZP~S672uaBh2!UVMKlU>@URb{Q^$zw8k`Bw z@i2Y-*ayGbxNA#nr=T|p`aE&|vY_-!?WxCLBWl*}z@1aU@wlFV^c7M1PH~=t((U4S zH^!S`&Fh9d%6qX!>H0z5E(=7~qy~+%2BPBlb7eW8n+x1*uyvsmo9Cm@F3$F{{|@0?vQ+ zfr}q*-d+9pye=7Zp4PAFYfW|u8TwDW5Y9gOhPDcLbfor`vd7-g za-NJS{^4uD*MP49Ujx1dd=2;-@HOCTz}JAU0bc{Y2LA6g;I;k_oO%7sP|MWn!TYfO zZ~pK}UjOq4th_Ig*Z*6P_+zjek$9i=ok;wF+hHVL+aE#Vbw98FIrjT762}9cK;rno z86-I#FdO-8yOa72En8dktCRR&Bk8@^<*4_x>uHd;rILyLy{QAaDCKCu3sqcyr$Y#i z18}^;h;|n)BZbQ-jYL5fgo6^?fbBi0J^MM1zhBh%V+qiSfA|{kHQ;N&*MP49Ujx1d zd=2;-@HOCTz}LVftpQ%g^U{~sxOgqkyZd;1qgWmDJAGd3^NuTC7W0~&*YCW?h}ZqR z#;-x*cmBN2H}^&HTA$+q{GBm>SIhq&mcN_iPs{mJYF=i~K+=(BBJurM_~rF~J<=Sc zxkwF2jYxd|6D0oikIz>F*JFDl@7UC_Wu3nfMwj({vXQGqhOzVsv z>zVt}q-+QG6h24TCFIy+l8gSue}K*NXBmpExIVXYE<{_!W>5YOf!ofzwcCYEOeuW0 t;fC7){yc_w{1nTdzpEC52R|XY8r=9pk-gY@$~{n;JWKzL_SZ@L{{?{WA}{~| literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank.xlsb b/pandas/tests/io/data/excel/blank.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..d72fd68ab3dbf214b3b0c4b5ace56b34ce627b13 GIT binary patch literal 8908 zcmeHMWmJ?~+a4H77*aq$X^D|e36Vw|P(TDhKx$?j7)lzY8|e@!Q9??(L-+J2YrW@LYi9p=p8Hz+?t5QbRSAei4!{B60RR9-fVZI_MgRr? z5QPl@oCn}x>d0DJ*upGq_3t`a!EE%n9nH-clCdz^UjZ;t-~aRc7h9kr>Xv0KFUWT- za+f|!H=VUQ)g+Gnk}&I+!bb+fqNzAnx-UNkB2SM;HyJ(HJ88>`O(O^L=v(aFv}VNa zdo{JjdD}Vp$s0u75)e=s+?$bbg_Gi#+{w)G@P6iQCQ|lcSp|Yi6+UcGOUYPvfl-x< z;dyfoP?k+#*@k+dr^6@G6g=r0MuDnYP!MCzq_; zEFV1TcQXkAlYM`G+iT#Um!WdRy}0n;Li4n_)cqk{Y%+wLbZ-Z@#)Ri%AMP^N2okKJp}?(|CGV9p3@C`sIXKqKJyr9N4#R}P(!zBwvyrhzHhSV{VE5ws578gr`Q!Y#G?Lyfy1WRd; z*S0!dR*bdvWK@aBapZd1sd*ZuH)suLtanx9CQ3EPwRf_IaEMrY>$$kSb(s&pL-(|D zM-4`Cfnz~^v%_C)@0uECeNoFZ&00CIUy`iK<5wY%)T2YGPE4gLJMTVGmmZAARc-5< zZyP9iZJslCq3h-Oa$7j|X2Kw~Fb6%qj-s%B3JpGOI)2c?+k_Cs@e9-!*ROTW*(aao zH3XcehIrq{)SnZz)5W|$L1a6Y3~S00-}0Xz^Yv<0y_78;>lH|!;f7NE=RE1dP6*vW zy}1Dd0B8Vs7>;H9xB`ZyPt}PHq$y@67MyiDzCvO zTq7n*=fZ8kKDtVqfUJsAd9#~{x00@hSwyLXSbJpF+A(^9TORO|6Pm{x9^u&z_)j@; zJ=*t&-)q)b99r*Td1{s4-y2WzBt6&P_oZ!#;z`>gshx0`B6kS6ZL1-eC6);HRxI&C z_v?*Eoe5AgSSLJBCkmD*`DuRum~~#IAd{m0RwbZ-Ru*zHF>*CnXn&>c&^@F>u33eC zub0u9zpp@E(@Hlyfje-eO)PBQ@DoQ}lnrmgGsCB%F@8$5B%v-#LmQz)kGW4yA~q6+ z13xTAM)hzO-kpDG-5mM#-UQ0mKL^c=h6SA|)EjkFFc70wRM7kwE~aoO!a&eS3aBjQ ziY-e7WDXMsMy7$XrAisS&XLgYQSr7yXVd@?5eaOtp@AyQ7-}&05D~*|z(P3lm`{tZ zo;Qj=71)*m^?%rGpa@ljH^bR+o>BsaoIqfyLMnFk+A#c7rD}dt(3c?478R_96*GXK z9|U6J2Z2Z#=B1?NahYW$nd>eoP=o=RttlC}jn=W^5Y5#%P+Xw75_T;a1AP`Kl|HEz z?^Z0SGPS@{eq%vcBj`L8^&CG$ufzazBp&nb@x$3BrK+14y9^FczE(ICiZFzJi~OI> zkt`;V5r%rhf-(oynK^6_Fqo|k&-aD@TN;@{p)sxOt`R-~H69qC6Il#@0UtF0r>|Tw zKff*ir#3?cGR$RVX|kA_3{NKFC?mNDU%qlG>R~GCyF*meG5b_BY1X_2d>MS#U~?Fv z?q1Ru*)S&Ny2}c_i=4MfZZYtgDbxgccP*Go9hJ8#V}cxF2)nB%Vq#K(qVnM{n2+BQr})}0ax=Gmhvd`VE~bs}1oq}cny zf;VeS*72l>oky3;`3TgZN8mo~epNE~*U6cr#v1dtEb!f%e^>xj(m*(qhp*NQZ1oei zc^m!J2MxoJi*!;&&*}xEgJF=(-5v8UgB#EHC6i8yeKf|{14GSTA@8f!3yXCWd91Yz zEQ_;)n~4URp$2RoJywp39mPhOEZWXw8Wfa3CGeswvVZ_Pij{TheXJ3YmT7CzZA6|( zd6~$!#wQoQ zG&%1uC?w=g_xO0b#|rmX`FH@GzRQa8zYc2lBm6A}Y;2v(U^d^vH#MRK0+c3^?@+$_ zF}t{kWb_b|HhA4ayq_Rdq{o^$MwRPPU7$;c;E@v{N&JP=jZpO_}BIj zCZ+Edr8^dpCJM{=PvS-`VvJfg;n@!-2~=p))je|(BOEsK z8}tfKERDtOX>QnM4HjHP^e7bQ^iDfUjK6@}KNJ$Uy>!tuR8i62vy9=2Enqv>yy|pZqasO$j1+6TV6>W+=kApuE$#5@oW@SEY?pl< z)VZ0mC)Kbx&U=KqTh?pRlnf6;yiHb;sBTp6?%G0JJQd?dp@r`J0!dK_t~KisLXM7|n2(d9V!=^pYdXj5fnLo;FsZr{g1{EI71{k7 zE7Hq19PIgsyZAbyG|z1Qh|oQgKX+nn@9R#&OALbvT%{at`NX!G$CdFLobD@4{A&+# zG?4H2F;Dv<+P_%59WhORua$-al@wOm-xn_|lfKDl2 zis0}#Ht18A<|(?6Mo%MR7LhqJO4H!sSEcxFJC;4prluQ?UXz$F^fqB8H<6yHJcwkm zalh(54ygwcrzd?|=?i^ZY)!;3X(f*q_q$T2{Uq-{Ao@%c;>Gm2#ZvNcYdN%wUgd++ z!G54=1?QC8@#jc=*OLkwyq%K#(}w3Nu1|Jn8t%P3IT@SA-;b`IW^$HeX6oXy_TF{E zDy42Ji;FA}p>JTX?@g*^T+cNayO#v45NgWCA7IJdTE$0jX;d#49fsuAskV~mggG+@ zU7F;c&u)=_4YWXLV-ttMAE|QbwmvO$2_k+2-LqQpE|pYzl`ius+GToL-lpJz4du@nPiOINWG}48Yx!tEh~{f~roP^hP6gJAEUEZMX$Zc> z9@N46N(dUu=3gl{qkE^Av>;8<5g)U~WF1exEpv%QV~{bpj%gR@1OPwnqPwTDuD7b` z@9HZu>1a~6c4x}_a9hF34<`f?h}C5YPiBf9IOnkGAo5UMS^2q#@7nf;f1}$#FSSpu zYjWn>aUr{Mju&Q2I{6ry&-&W~G^;>+^k$PIj+ASp8L`&qT?Rf(>{zbltWC6-9!!<* zCacL352Pf&=xOVvCANu%C@rAzNSc^}4}!#)9G5^W_y;&x=wZ z?TrYEm@O+-goc2~oBTcwuzLO%JMX@dI}f8ZuV%%jEhx6!>gN`I_}I$Hf`)T$-1{AF zO)=Jk2VNXNE}Gl2G2sEb<-ux{l;?c{LE)ec?^M2S`{%}YuS$Qiehu|6tz-4N?vOZD z;#ph@Kg8*7CreE(nZ+TR`+H(40}#`jf6F-{NdsPtz4dBqX}9co`dYaN-?DlH}V%}8a{Xpt~TQO zxHN(A(Bb*y{?x)&m`ihjk@`f4t2)#I@zECItO2q&ls|00o|Y5UK~7p{W$oQ)#=6Vr zt)G5nP#}qHRaW^%C$UvYdZqb;cWt6v@27m?YKH7uBS_M}Xg)`1ex$UZxs&VVbR`3~ zRAb2^Nc;mhZHA{g%82H)L%HkloUUs2ZCfnoGHpt#MaZI~Zbq2w1&GWNF&2m@%|WWP zdWW~dH`Vjdd`mw}_IRQ{Vb<^yO?#~AOB_Yj!O*+xctc*L(84ux_>S#bA*c4IX6RjM zy;!Z1rkSw(@Kq1wW6M2NyiAMtH-~XRtI(5o9VXbpK#3Xh<+erW!8j5*`7+VjjL%EW zf?gPSYwlFH!TFljN>=oE*uy85T_ndwd~l(nEP$a;&Z`&E$H*+J_MCqE?1+Y7&9qB$ z1!jDlb3irB0cET5Yz8*xE6(r_oLv`}{MTeH<=4mj;5a5gZI_Qkbc$CM%SC)^K5`F3 zyR);AJzY0Z2jpxX$L_Y-M61&N70!+8=|x+gTPfyyr{u-Nck-u^<)yuSBtwxb_)0x1 z491woRWJ@$(#EDe#?F!TXLUhflZbOwb7WNTg``X%yDLXmn8gW-l>fOyS9v8M$Wc9w z7WJe1t+#zUlm31PLk)h0)=oDqpfJbpN3re*0{|vAi3d&^KOgGjU*I_?*<@s+rXf=l zF+zeOMxd6)s@9fPHavz_R=@NA|Ii^Q%OylgBd2*ueAWmK>AhD=5};tEFpX&FbGlNq z>f28=XCHBzD#^sflzni8#S6AuJXtgsT-(@BrQRr49||`X zEYFFvV7vyyj0vH|eo|}o13b?2IlxdF%m7kWXt9DVve7du=^0}{VE_c?!of#(>@k$p zjbExstHawIZSuHf(%S&SZaJ#M@PcfB)>XK;*mRO0RDO*qe^N35SsX!S38pLF6duI? z*b(XJTlB7(S&wLGL1>q0*35P0T?C<-QGN#WynB(7*M!$4I&-!XVo!&%RjH(%%L@bh zW=fDmm^%hr%8Tv$G(lQk=3+s1)0mGppvOuX7`#@Lgz7e&^W#oA9|HOi{tYFM@M5Hr zt%9`>W(-G}_TAIs^>0Ksnt3;FiyTzHjoGRU59!`CHeq@(Vd|XGyhq&CSb96|0l83^ zO9UH~fV+_BjjWkGZz%yW|0S1>p`lhc zJ&&tmaM*o^OqX17Sk0ml@uf0}O1uiEL6_{3n19RgmASgE+ z8q-W7SeK2wkFeaXNw$?|74&||6?s)il3-2HI#8OBS3?DFV2G>HT*BNdwhyJSIT6iv z21}2wwP>5WTs9!7{B+C1Z5d-sNOWCVj`5;|peT54sBYTm=_fuN=4wg0KWszAcO)Ude5ItYG}6Vh<^4e%C7&53jY11^`9&F-|h(C ziI898VPJ9r{_0xzkMQ_o{lz7*s?whU{(LL-hv2uh7S(2cxT8WB{4-7equ>(iJone6 zJvt6Lb9;tFf9@w97hM?r5PT-gjsGvI|?7MM|MucE(Aei=y)_XQIR?y8U;y{dYtEpxhbz zf)4oCo+Bke2mFrv9SR)rbnjfV9=!EFvf31ieYX1DQgMLutmsb62tcHJw^?O!eqtgFd`z8e> NVF3UX-zHzc{{Z37J9_{C literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank.xlsm b/pandas/tests/io/data/excel/blank.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..c3c2074276d125fdd862bdfc423c3f40855bef1f GIT binary patch literal 8418 zcmeHMXH-+^)(*W%uZG^6K#(d;dhbO+dLYt!DAJ^NBowI;1cD$6g7j_x0Vz_HE?q#1 z1Ox%8U!1vfopHQt-Cy6IJLkuFSJuhiIeGWuX_QQvq-R_y7Qa6(A~-p6G@F z0AS((0F(fHOmigewo@v5)l-LpN#Ba$K8qJ56X05R(gQ|+09pe>FtRl^S++)M1R4*7aI31r44jGTq$nX~YxGhnt<= zJ0H^WX*Mi?*9xg0^NIsffr1fvrgD=ou*Gy+7bOB7f34$WLB4(AVr;yQg5{BEPVEL& z2tcqVd$iVE<3jFT?_$Am;{`jju!&UF+1a;L`y3$>z=|r?#TVy>qSC&nu1}Env}NfR zgHPIdTeiozS@8{!g#szm=i%8W=fR7y;Mg72RF;$~vv#Da2O40PmskM(zp%8<1jPOo z{X`p0LLxLvt-T?xK0<=u@Bgs#f0&Yg`s+z)!)SJ1=|QQtDt$cNbXUhOENRhJZ(%iN zdd0EBT$fzTA%Aojg%zP`H#>T`K77-BerL_en3(}Aq}$cZqfjjZ4w`fhs3^Ea`WVdS z;6&96e86CPdv4y&o_5BRMr-6eWDJX@BlIG_

    &d}~H*mKMQg-dtXj9MiBea6&+pSp9J05hpRb@OI+(+|I*5_C4~3M-J4M*IXP` z&S+8xGI6`&_w82@ezqVAh~r4!7kgl*j1IH#+1Ie;K_f;!;w61Ic}6wn^vfeZ-?@he z+xCx(bpwJK%D`Xu7Sqtl?>rljF2z7mwh*H(mgSH?4!wi1>R}eU>KcV2j3pH#)15qJeDNtK7UUGzO z@ub}_x%6ABF>r!gO<8m(vCIn<`9ztBnW==3MMO%~(frtJ@o?ze$_YNtX`pB(mmTb@ ztjK#P^?YRBjB$UNcRl^Vv`Y1@krPyklsXAL1mPxV_hQ|A%TGrH%d#PyPa2LUN`NayzP|A!RsJH;$>#lyM#rz$XkEAQ#4pny zSyWXsPlaedr|o@6SV%Nj&VNp^5zu)$JN?c7O$U0f{~nD~aEV(6E&xzU|6Pgx77eH) z#0?_!+gt8X@#E zD`6O2&9+$>EuTTI{3Dnx&;l;=*b1x@>O*zSmvjSKW4QDa zeva1U%zKz{U`sbJAj2v=sM8o%6>*!~xCfINV=m5QqI~==XQ$|)eiA!NP|)>4IRz!1 zvDyB(Bf8ce%}tDg_;4;AI5n64U&AbP7uSAoh%@+7^O>3d9xl~&pkCIyxcT#H$?CRFd`pAot&D>Z66U5w=DQTH3B>Y@Sw2Bu2yQd0FJf74sdjgXHP*$ z7&%eki~ReSo8OEMHov8>#121TzJQ*68r|z+77%b)5_}N3+?llydbmExTzoS>bpOj9 z?P`Eo_vJ|!cgNY;y&GiL9a%>cg$m9Nx8EebkUKwb>E+yY3s+M)hN&tSnSaD*+{K5A z#CUOXvwO8ucaa%6;E;s-5p3iWGA>r!xC?0{@k;i;Ex*TuWmPceZ_gM7e&R^5PH+1W za3tx#D5y#7skF~$8X%b%>AeQN)y-%VY}(V3XxCfUW`=1ZLZsPyI0usi?}#w)Gr+QW z5bMVoFpAGR2Afm9DJKzHat5E6R2a3jbgm=VxnVSkeook-IawQ5D*aq*{Aqn9`QHV` zGBm5J;}2>^F;lE%?@8tJGCWLWSdOD64yAlnrQL_LI99RHF>G zfrnx#wek6$GUcfW8uCa96yKxtu~OdIb&}{dFpOawk-P@J2{XbsNIGodi$r-zC>=;2 zWAyURg=A6m&Xv+eNQ-(Z-@%T{&xxn7w&MN}zkr16fP_tOHJgo|vI8$Ir|(j2*01hX zx;C`AO?CIDIUFfhTScFeW`1InNjxI3fBl8#Yo%hxvypjo>%!ZsUtUkHs_@3}RnSw+ zy5zl;zB3)5$76>j=J4@}Xj?-RvEzqWg|@d0QW~LTT8aVhbFN%ruwbb&i^RUoY(Pz- z2$x^5+80fge}ce@rx09fb&Sz+a*;W>=+76YKVbm#$96lXqD{y6PvfS zSBrgh7M~AJyg(=`G)&swdZ+j($9yR6ZbDI)z-_a{A~f~NUN%B*EKumQ>@Gaxb$=S$9a8!$C))uDULFR)fg&^duyA=A;IS;N0a&ae)8qB(gO#qp`c#R!$c^b#`@?gBsn$rf; zyb4Kw5k-7zls;|$tuwDeTUdg?+4I!)b33BS2#28JKhQj3dn`B_m75&Mkap_o8P@RK;Ys1Wd4L3KDRho6tjf5;Pu0yY|mLfdU{85n^mw6+vdefD)3KphMxA@(2huE&tQyV-fjgEE8zIQ zU+Z9rJ?R*_q1A;DgL8~y5W^l0`B3 zvc$>@^f=PTFLW>0xPyCT$NsDd<%FAzIroujf9R?Gw|N(z#e$t+*Xxmn?u_EZI(IKE zx?5#Ux6A$7%<@0Csi+)u7Y&s=m|+YOnk(?@oK8|L!ObfwOtpY7#I7&BN*n-o+02u1 zdZtDlFi%~U?dF^>7D~Kly%{!>uy`8Hg=>pt5b{QxNu5?*S~8;N4atnn7~;9py9o_* zf!XImjqirO<%8-ox368ul~(Vs4MNsGE~@~g>#FQrr0d7tFHWcP5-O!{v)D1%wP#S@ zyt_9V;(0(4ES@dQsRKfm@`V@1GG)tzeQaPD8mstq5er2otlFR@1r#kQ=zd5FA4doT z>Lc{qa{YTHOG-nch1ZoBRAgJ^UYplU#Vjx_LxFe87SvPUvcePZ2D|>L zM1nx0;QqOHvs_N*dBi@@t!Ld}@TzHaNxg4RM@M}P?b~2jeTKlDzT>Mh*6T<98{~8I z5`b`WRX zdPANyZqAWd-Iu(j$VuvU+`U5oshI#e#p#5)t=_+n+scN*rxeJ{Qj@r@UgY;O#(%@v zP1dCl*qndEhn?#I56RTmn&GHxAZ$^ii4ouMN;)-CxJjI?W?)^`W!fcr@Bc0>{)?!H z{1cV`@2ISql&v_T;}!Lvx$>LpOG+E~en?R{ioS>(k;_b%qg;+s#2+fsKbaZ$GDVXr zs+uc0wBWxDGwkD%P8RiPJbJJ;z>Mma=(ZQ8@`kS(%7aX&N2tE++sOuIu%(hb^rL$W z!&CQeYHZI%3A-B+$wv^dQorQfysOFiE@G0miv+XdJzX1@pk1*ct-|dtn7?z{`cc0N z{l`>LViu^XvLc*1_ff38nmTrA{MXV6zYXR6H46BoSId?vZb)z3n3Ps524A#9jSeMf zbF3Owuva0@eW(v#Y7--;oX8N?WsJ@b$}fKk9s^sM`Hl1nvb~QE25Qx1lP(3DO_knv zP-qUu!;5*m%&0NyV9n-!+Cyx;dR``#XGuEUp=zK|;s0RX^YTi)Q#`Bxc1eTiGZKFF zm>PR+@upMC>M=yl%m%I%)YVnJQ$ZFgt4^MhEwdtGJ2@-|da2BIP+Gp~hUFnP?2afS z-!MLKzPuP-ru`LXMh4Y0)X|aI#s&b0f6rt-(0i^BpYKFLWauQ$g2-Y}(HD4yE$qEI z^%_-eFneO6SwnTgHop4pxbfK3kMEr}?xXtr;8C)*RkSXmK-oYCQSY-U&2Zg8(@K+w z(sVNRC*GExCY;Boazr$AW@;u1(YLN49q#Vgo$M*O&Y4xsr3%La3SA>o-j4Gtx`x`P z&BRycL_9R4*I?AvQE_#n6L`MBffL`k8?N3@y+gpCb4jXfxIgn=p%S9Qc+(R(*JNkQ zu|MupQd@|JVK}>yv_|VEaW67UMq*}jOb4I_HTwc@Gil<`!bFG))_K#cHXD#Q<2c^$ z+%(xJ;M>=lpH?fWK9ANl4s%G*0IuJ0jASszjq+tu;oM>uH{Ok0l6#IwOY}iKXf5Vn zBjdh-SbNmHMTbk2*GN`e-bVyb11kA4i10-c0JqPZVpgav>{B+DFUT#g2Ii~o=Mf{M zLCK^uR3nm;ffg1{*GxC$%a+DRQ*dF$wtH`;UdySSpC|YLNolg z>y^t>bD+a25rsM-!^y5xDG6C-%IynWJBIQ zeZBDh09k;%a|AAIvr$f7BDgrOy=eU)2QjF^pG>qlj30_z4Q?Y|s~HMx8OpS!a=-W> zQ|m8Nv*Qoz*{!;D>y@3T(o3a;mS;jpiFB*b$zm4z%JVer>J<;ZbE02C?g1Dp+wxz< zlN{Z@vG=gk_xAAg5wi2}hJ1$|y4U$n+Xl_Ws5EQ6P7u}W1)VbztpY|%1>;;d4$S}q z3if@y;Afw4fDM&S{E;3W4M>L3?%qTA>DCR2{+r_0ly;5?GK&ZoiVwpuJtf zo4v1A)04ZWSC1=`RK#x)W*2cI#>J*gnoWuY2L(GB_(&}8dF2MxWB6-(`Q4PsaNg|# ztmH8tnz7GyC}`ZQ1JQiKqaX)}!}D@y{p4IaxtmCj#8`-tSVs9gHC{$fgpqq#(->la z*l{SS5r4;)-y|vMPFpYBtYMemT7#_N6xvg7h^@C8yWp~miR--uKAT-Ha0J$&!tjGt zXT+uWUKF>OiK}gZ*j7Nzgl~LRRWp>K+Vfj<-21+3heeW6-?AhLioMpn>=lJ#WtYie z;!S#&8Yk#1_jl<4R;`CUM`O|%-9aMzGbU|4J^z6u8jJtB;OPo3DGB4cXgTljg)I;f2@{+t&_puPr`PFUg$i+By)qNk9;${1t=F|v zKN}@WbXg$-g)1)st4F5fkBvSfdi~>W`z)v0EmF5vf4YQEY&f7te=Xy zS(NPdNVJW|ndOv2uaCyUp7B=fRDz8wEt9?ENq$XD&gH}!=eDJ71I3a|=@!?p%SK^R zvFsN5MN`fo>AsXq+AuTRjl`O~oKXT!gC?Eh>yi0%ab=l=cAetz!c{_u2$ z9&`WH*ZtY}=f>j?;{?KAwjqCZ@bmHFhXXzGfB*X5j)4EYmsN^idim|h^0SwpMfHz& z38MO?m)|7z&t85Oe?PojrTNXv4=MPw>0gWZ4?_ST34I{`TS@=f{IB`;&*m)XzVN@z a|CN39wQ$fl`92dR0`#NBDkjtSPyYuKH&N{X literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank.xlsx b/pandas/tests/io/data/excel/blank.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..73f6ba6d29af3c8764f2aa9bb3291a43d0650074 GIT binary patch literal 8379 zcmeHMXH-*bvko9#dX*|2K_C?AB2}6oy^ACy(n1eKs`M_Qw+I5^pdd(>ju8ZQtHom08jx4u+5d6o#0R>_QP-g5Hj>t9h5Od7lEXG5`PP_z(U9-N{|rtw3^> z1GUqT1p$qZw1jL!Rm3cs%Xxl}qIjYVJ8EQ|>@KpSPD~7PpXE`)z;CnbmX3u*vrEle zjmULm%(p>upDx2zl50HBGfP6_H*M$(3t6IvlEI~=V# z_-gm2m-TMD@fjI0>sBzc6?RjH#O-@4iPtW3!TD!!JRds4#T59hM6b*h!`_FRFz&3p znQN_62{1!-6cqE_3wB$mD4=>bfR7+zd^DCR8~2dEdg??DeHYC;2y84%a0*mEyu+Fw zI5)o^@2Bi@adq(Ez&H^SPz#7<#POb;a#FdQuyWQLysMVN^vUnI*`Q&)JBd8>*{seB z|M+uxl%R{;_OsQeYobI3Wag-5@goeYE-r8YI)5Q$wIPt>D+XzIFjt8&q`dD2g}Dm} zemnm|#{Xaj{`BgxiG3I{UT)t~YgGDtvg)KpkXz7jN3DU)faNvkCTn$EKBxS_eke|` zhV|sY{!-s9^Xbh+I|EiGu#k3ZJ+DH!DA;$@-n%5ni0mnt-Nuf(gW)j~$arcRcHbM7 zoCO{|c~%hfbWZ6 zBzG)(RPjno%h_9K(I1?q9V?$rCDY}(p!9(FdEex-OvNts5Sm@&2a5M6?AAEh+!Gde z_bzHAUD-9G`qe(|%FaZ)%df2k3Fg>7KWyriohuBT)q)&?&~M2M?qgp3?<{EuQl8kx zTv5dV02lxSSl%$9e-XvY+06m$?CkKZc>PHlEKCK%l)3-xqcO3^9+SaCx564j#(W!V zKBQ8zFX<80rS4&cHl4H8a8^ySw)lq;JK><4LM<-ktBA4|k8iYuTl(vK>NO=XO$=r? zuP>^M3x74=bxcSdQ=YQ&go}hjcs*u#YBR-$V~66^6C0X)*BxwCPH7XmQt?`&b|Ld9 zPY{q2YTKXn1;X$~R*O~m^y{O!ZhdAxl35)`d1h7Cq>BSjkExWs_0EH!7Z#DP@6u_1 zq*NEXk;^s!yf3Vsq(vEpCDi&^BK&=tRRv=!L=#9h8Vb5{GFEvMAF!u)dUj);t0pbr z15eZksh5Dh)%$)JMEo5ueZgNKU$gx9~x;6Ctq%_k= z@zF9ao9;wqlti3i$75JTRbXGct|kSO{; zJ^x*}L_r%q2-QC#uW|R&iv2_1O4}DQekQ(G)vWw9V#tf6tmXKhQa9W118d)x@TmYO@i;t4altr?7{@N2`&c7DLjo{^<xU!A7n7ao{4&$4IDDJpS*KWfV2hkZ2LHxIvW8xYoh9=sTshi`1VF0@Qf74w<* z8kKbGiwR1V%c|Ba&NfRk&?Aht{ChY~zy*%UcmO~l<2U8`Jsfabs3TP9`&sxKdDl(v zI!}sIw~&o_U4=V4^3*Wmdqj_wXO(C%wmC(x*kz=e=;z4~s{1oe%s;|vtp}xHHEaN3 z{QcMs@EmUQh!UJ*nte5m3i>Y1K|IEh;Rt2+K);Kbc@B@S6CshgO6;5qu9L<0N_`^R z@p0FG~ZR{KwCYruOCtMA71l^lC z9rRcU-huaQQ=@rp%+^NDjXm9t@m>#!l#Z@>_*Nscv8-nBwMwq5WPUxrU6M3#{n7fo z$c9t3Zb`!-_Ssji4D%=2g##MUa%GZtawBQmjKdH7eS6T1h_@1{+h2PLi2KeP?zMXH85 z>QT~sMBii9bNK1$t(S*n71 zywU>s52@TOl{dHTB-(WK!kPOeuY+$P^a*rh_v`pVwp=BY_GAvRI{2sj(h+&i6}^WtE?;J{xLRf!a>HeA!rL3Z!FGh3YTmI#RURd3amd=ITpFSa6o!2U0Y??KO>_UI8vj?V|Q%N&kIwLo!BkTz1Vd#}fPbjZx_^P`| zJ0BJUPq@63j*=Vn5juJH0Ga%@Gm+hljPdgEx_9(2(MPk)bw(GNn55|SRsJ#Pv@ewq zl%jE)Qb#7^@pVCNo|pmMNrxLF3@l$<2lu&M48l8jv0iFU-2v0S_D^~lN^+u~G!8j> zlT~s@Sc1^r z@`T^hxf09>fc@(CICGl`S3@ z+;X`QmhD+j6DyQx!G>$DBB>!S;qw`~e#!I+tO0D}Mj(j-uGi3F%hibEmL4h1R@4n7 z=P(ut;%OP{a~<*uGU$8dnQLwKE7MT*ieyC8v*22ha&Y`RHjvUioGc&b108}~ry_MT zyvv)A)4DA?yg2Ns&(ym;&ml+C4(^|FHvM2CA$m^C;v`xRE-cy_Z z?6u|f6xoM8D8EHnSCA8#n5UsP0r<#5W2@YcJXXG9UVLzhdecENQ`%sJ|Dn0Ce<%6-HCQF`9z@ho*41g+k%(Lf;pWZv}PGHFgRU!ly5-gt5&*wn?Hi3n4Cg8 zR`I2mWmL*lz3kLX@nuIF(xTXw-nw@m&`4Gi)eE5qO{2JT%|h#)u^s-!wX0L4mVuH+ zidAL;i9Dy3H?LmEbcG<>SlE)p*=-~Rs;714j9^8X!`MZ;(FwwQQ@ z{(G)`SADUGUEg*oDhFZbA^mcxNpe(kp^5}Oc{;}vU0=p%6U1(3iuKHRtt0f_am&Ps zxz`>%UhHDsYLjS#2vfTu7xd(TCgc6oUv{l;`XsX_kfwOjKSkiHxz*J+XKo2Q=@ZKb z6SC1%aIHSj;QA0edaacdyJd*JiCfS*Uyn|~xE0}LpSX0;>A?6o0T`18EGsPuqRD&` zA+M@-sW9qm;fUvo^6nxfa@4h9?KYl&N93TiW&{>rm_(%(6>xR1d@E0Y-#G*-yz686z0dES)EQc>t{Mv7-*wV?}v{c{&bF6eZb~E zyVFTK$^C`1XVO{s$i`c4>nfCZJ)U;CxLjiw#pbnM@Jg(hlwU2p5^_hp?u4p*5S1~p zf@cYb!PMR;+=Sm$qlizJofid-_Q?S&l-c(RixwPloNx3wp`KB!80^_!ocGPq{f;yJ z-D=5dn8>VO0su(QYsE|g$-}q8&hc{_I6Aay)XSO>5E9}^JvE{x zzMA#O;fRFKLv|~HTkkxPp*O3_=p4itZu;1Wxt)$_1Zj7hlo|#XCXsXe;dal(kn8Y7 zj+mC-RMk)+%;-AW=E08j@s1K~%Cu}MK{x`C3k#0#9p+br1waxfqDnJ@Q}h_sneS++ zz#QoXUd(XfM!ne%QtPDIB;?PyAXC=cofuLmg=#V1azRhkS%WxthusURa`CbBCRbt? z={zMKhNQ_#Oso!S0aW3pUyx0Pb)1^mC^5lmH`;}IT~d2o+rT%ghATOIyPDJEss-g| zVcG_dY@*c}mdtEJm<;ejJy=w@);PoswnJv+UZ4_V+_xS#=JPL-^GKl_e+lYSXkIy zH(8N?Hak2JuRXeb({|};r=5k1nVkx5Iw~R-50xa!=Gj~qknFWyqg<4b0q;`@&eig# z5ai|*Nx%6r$NOBM#HNKYX5vdWSsUZHS6;__Ld({rz^^YAxn?2jJW$ zE9JOZ!t=AL^Tv;IP~9?u(HN`!sGg7ozb2B!${wGFo>UOE)A>i)Dlgf}O)o_IcA1gU zYilv33Z>|VVj;9dl4ZbX{#C}(v&2i~^Ui!{#J_`_!je+oZA?ON#=wpO)4+i^TkE(v zySNKkJG()@!4A{x{HJe&;bLr}u4XHcI&22_Lg{^_V`iKq6x8+u4Dm3RyE!9_L1ubAx z<#DmK{5TEysAFmBU7dop##PIUsLKEN*>ilx26X>@ zAd?gv=7Hp4Tf`@k{{{T3@suC{CCzCWw@{ku0gFyAXO3aTWzC4eoG}d@eJ5e z6x@)i+i-{W;G*0Vwy26I4X8iAFw0rcc+ntHjS;gs@Lj6iaU7ZuovbrmJ1Q^DRVs4| zv$Y$3O~(>5tzv}^lpM9?5}wwRwOw#5qe;x+!6mVAPf)FA8X9m|SLqx-edo@_IM!yR z`<@$MlZMxVFC{ynZO?jX%VjD^aZSBHaK2LD+QU>VJY=lrpo1L{Oj5G~9El@dew*}F zC>QVbYg4g1J>^K>rRrvy;sNp)hk0^fkn$`;dHSRj(e*_D+U7bA;Y z>-RvfC}(90*SPx}R&rzWZ?0h;G@j#24|&D&0x9{2`;9c=B|gaJ$0i@SSNP(HMb}(C zk2vR8F|qoW3dX_~0Q|e*?w=9;bN$VzS4Z=&4*oib`qS{+nu4LtPh+WH4gcDk|Jkqs z)AIZGo%&z>{Mxhq;b{#szy76z`>XM<9mXHVK19FmH~#A2*WKL@2e&Bx=k9;o>;3P& z3{(Er%lEy>uU>vt&p)2Uf%>;zzH8}Uz5J^3et03I{qE(5=KIz3uhsg8Apqcq+4=sh ncK>Sr*L?f4`4PiU=Kr02bu@7?IQcgHBnEV1Fc@3LHj@k@dl|}3sLZt{yFu35SRygE z5JDsq5y~}%RPN|L-_!T?-FyDedCvL&&v}36dH&D)p7({@Fq~ik{N1DgDZ#qwAll1u zXQp*@AR2|mg`tpPVZnhuNNiwe2skpt7Zi%b1Y$s;VW<$_P@hX^R0tM?35!4>eK7$j z6c$da$p>KkUO@`*>gx4%6axUDPCMrKBn%c8jKYAT(7}UFk3;5Fxq1cO8Z)y+=gFZ= zL`e}GD$$$wz5#p zCOPQ!UnVObiUBngQFi8IivFZZ{W%c>>Kqk5qP+C96;XorS@ZAKnm*nA3Kv=bArRe? z2`pG!&vY+|oi;%GLVag^2Ci=Ej@{DOIY%g0yK)OvJsF7UnzwrHH#9kUeziw|t@gB! zyRpLdZ$UZswp;K^PQmHQkJ1VH@V54TVUaI4B)+oLx>mC=e=Re7Q8$#u{xkxgBbj|` zEcBVtnFrZri;TR*(|KXNO9!=<8Y2>UTYT<>#-i3F?A~Sb( z(o4F2lRHl*;`f2BqMu~a&Ip)T?@y~`t0Mz!99>%rgxI_Bo6qoJH-+>ghCv+r79s{= zKH3ZIvtCQ-^NV?Ka``aFWKv~ht`Y^hi#U&+Q*sGK2`(@!GG&NZsM;m8^KT0n2);=0 zItdj|IjiDcAmDIrWarJ<++973;YjAKdb(+@(g6T<-Y5 znBHGaEA}qN8=TvMEOoZBAqVAU6pU<*Uujk0cqS8bwutT}Kkl2H*~M}SD^-xIZm?I; zuz5~YDKGTT`Qc#|UXLS^P+EA$m70#I*kd%`bFAt3@D*KToGdc%qQ-;)eMn|$v)pF{R($0hQ{Q14EbSJf=7FFscY2R;EN5{8JgKuPmn+uN$2;J|-Rp!c#Q6F7 z&@TK*Z=@l+76%45bUvfX=>bb{^RDH9#@}p z^by18%M$j(@^0>@rgx(^E4Mn;Kb@%4Ye(se?7!D<_t8R>K`Wu}#YX9_8QwWqEeU

    F#(eioN{k% z0KPdnIl8h&|5edBrYk?-CBpIIM)LhrXu3i~J$p@TjkwT^TBNI0-FCcK+Tp#;SFGpVI(FC^3_Ek<~{5}!Ih;Y z>SgV@?a_^cgU*cZA-DqBHnOT!-8I}PF&XaY?4<2@3+*@)LL!@w+^hEg@<1T(xq)NZ zdOj7aF_RXORe)+1lvSTNXqojrsa@kf0cT8R$PHq6t&ZSGsa2}W}#>byI!ei zb%GU&X;6aQX&8rZdy-wAoXTdZO63MYTS=5Y=lc;+{ov&)yS*#P2g6A!@!(mu=)z|9 zM-Q{R6}O$$(;&{J3vq+6XguFj29#wE&}$A%T7<%y+@oWtjvqsIi&vr7V)7j3Zl>PT$##PW%Mu zpVnk_(~^OWEEw4$b0Y1n2>RCi@2}wEbqecjbuhJ*kK~?!6uzLd)b)?Uhw~q5PqD(2 z-B0#^lzlLl<;`wm!n222y6|#EOusV2 z)U(97O=y8gG0@B^pK)3m2KpqLOi%BcT+{_-#oaKp*YuAt1qUMU#_nZ*)#JiAk13K&1F{y$t~6ODL$6%lfz57{*GPV>$hUI$J}*2 z#gtBMlJo+x308dN08MSk20FoL`j2HEAX!7DNL`o(OP}WSW(yXgCGt3dWuSzG$f@)} zIbfVvZhHUM%;!_8@?y{H=q(Mas~8QXsOMmZ&rB;@KZ9Qef)ONE;%;A+;7q1tcC(wZ z4kPeo0Y~mNFU@V7!?~ldSlU7eG#dLAeKk^U}-2^_rekkyc?IM63{dz;?@TZ z$YSRese*(MBzF3ey8N6L zfZ(RW=!yo%&R1CJzKV0C0c#3ccYKCSx&t*bj=1jQBc&SE6yw0QtUA)|Mm!WKr@BEU4xEZ<$GMCu@CV7!SML&kM^IK z2)8*#{d@HLfpDNre+0pwaX-7%58N`%yZ+0weg^)mn;*byO#h;ypG*AAw;v^PY4oGT X;qUB&+W;AlSAeuflEzMf?-}|pl<4e` literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank_with_header.xls b/pandas/tests/io/data/excel/blank_with_header.xls new file mode 100644 index 0000000000000000000000000000000000000000..a2e6350808147046455deb333b1f74cfd19c9c58 GIT binary patch literal 23040 zcmeHPYiwM_6+U;pYp>%x>^z(&$&C{yw&R#&H!sLz^WdpC50RjN2ue2gIoow+-^bLT$1 z3q>l`z1G~lbMDM{&YW}R+?ly^{pzpl-hSwrDesVWtfC6~G*?ZP3f;ih3ka&||HTA!)D-r{H)ijY={=tDqsA>u9o&Zmc+u6X&Ny*%!rmC5@#$#(hS| z&dAbbKp@n;F0@*dyHlJS#CbH%2AvRf&I-yxnoj3XK1z49yNBq{RmM6>QV(_0E=uG4 zzf@w6VbC&qk(E@CbYEW1q>!}LrG&jL;bBXhgN}4Ab_~{7-LM)PKWvE=!l;)(EXIgoFhVqOm=XrO&~973Ed)8dB%L0fMiKC!hS?3XT4&8# zu%mhT_9mIx-aI`#6VZK|?fwndq&jc37iyp?x~N#FZGjyJd4`$`Q5*D98d|>SQfi?X z(GC`+#gvlev)h{lVSDrPJfUHJ1LWS(w0Z})v{5tuP*tthVycx|Ok0fvwbG?_W>yRA z{Df!<%S58cbP1k$3l*laeu1y7ELaz7zD?uND~SG?GsUlUI%P7QLyMsQL*DYIa|d@T z??q=+xAOnbbc0N8o^^yQeMXQIdU!rh4?igM@1+0GgZ={#`jZ~?;~w;Wa{_0ALqFg4pj+}d%Ukj|>5q8eJmW!s-h=*g5Bk?V=s)qGmn+YY1Wt`Z zKbD_hG-UbJy>)f>*W6!2;bWqFU4h=!b#lhZ85BMt=n=#a;fMc50dYwGZ1$kHdeB>3 z>EnRu*55}+dcprKf3A}B*J(2@XJCvaOrY?B-=v;Qm!s1r8`>Rp6dc zRt4^QWmVw5R8|EJeq~kQHG{G$w3b_8ez_Ie%B_GwWO;2^ZmYokPYnXb;piGIYG90s zcp>|=ULh3ChZF9Y)_B-7ogf%k4-b+{_=Cht0g1)!AbJp71dyW99nEoyzM#OBqZ{IJ zPE}YajP~=5m;UyvoOk=1Y#>8a=?;>6*$-r?9VF!;N$&k2MUs0ob1nu*l`tnuok-l| zAxZX|1(2$I6OSA@k~8xMo1FnvTD3yMEpd!+p&gFuLS2Hgs!$1cp)Opw;1DX2NH~O2 zX;LVrKFu>LhXNVeS{1nXZ42r}+E@due*3JBKXQOw&(A&_XnA713|`JC;lLfrGRks@zV)UeoN4aKgi@#K(g)rAecWYk|5)*6Mv97 zrGW4Y3O}9j#I^*T_=8}2K0HXa-51ti<< z4>HpRf{eRPRFI>gu~du47sVDdn)A&y9UZCO%)-Iw1{!45kFunOq!3h zhC1+D4>zoCYVi~&L-w$)8NC#nu7>7kyO`( zbH8(EGgfC4ce0t|!^WDW2sV|euJ=E9%bm?Qoy{UAn-(87JdepMtt!=Z{`Y@zXER=B zv)IX|&4&$77xHYPsjjns{kuDx2|Al4PBt+gHrB9KD6Klx^~s;!bZ1k~Y!>-@+6=H+ zRD!2v+h53@cV{zEXXD~&vwYa7o|bKY_k|PgY$oY!Ts&>A4;$6fvhBY;|Drpa$vPVs zPn+k%M)kC8d-l{>cQ#XWHZGoau@4*7)3WU+-g?iS%~YL@i>JkX*r=YCZU6Yp^X_b> zF`Fg+o@Rp0k`g>E7QgSgH!Lb;jp}K!_~0w&+}X_3*|>Px zA|E!Yr^VtQo&1Iyn~``ujd54uxw%GrZeEnD@_bejZMx-W?x3@ZpztUibfyNXdJ$fI zY~J7wI=cu8|ItAkHBi-Oh>rjCdUw#qA}G8?2W{3sRSzK===`ob=$s-bd_xDFuYs!m zK(zjem)t?;7D3?&I_N?TRJA_Qzm8qu1{$R%JY8vRvU4zciQV$X)LaFv#d2e3;Rzh! zMG>qJ^5}@Kc^NSuNoukG0BiW`l06;$J;_XWU+-Y_7CT_5K37B5mZ1*n$Ht)~hF;y& zhanixWcb<3puC(V8SwIwU{FbTLxh{Wo?)P{nv2?U)wbNx&=~Bhz}tv*dr}>H&2_1s zp26r5O|FT#5s+&S27?{67em8!xaz^5Ditq9i9w+nK&4c?{#03z%F0Qptelj}${od` z?Yx1C++ftSixQ~-lXXzZKKv!Yiipd$4pgFSlUBA8yAX?IO}YV3f*T8U<`0BXXO31U zoQtAPKlW>-#n`ul4i##&4OF2#f@D@8JQ^B}NrnQB*r$Wnxqw^o&jUP{_})n#!!vkX1xF0j!N+!`8$1o$Eq9I;Rf2o>X8z&RdT7OPU#ax^r87V|RJ)xG=s;R^kBlZD2kJy%1s z`{9dh^kVnVVclOEjb9FY&hB4}0TSX;dw1{NRA;{buh)c`gt*i$480dd@3cjLih?&5 zVv&qKyu@7z#f_w;ke@cD`ZMVy!iyG|jcEwX8v)EMO~a=+uvjx15l`{5M7DPbZ&dOQ z50@GsbjP?|R)fHDBRoAV0t7?lwQu0EI9aX6zC=K5?8~GEqr2=Fp%LgWjFEv*yw)fq zFE3Qfkj9J5#XxK2WJt4eGNf6#qoEqQgxBac^=E{|zoIdn0O(CZ2Tq%-A)h0p&E`xf zN@C@tBvwvJV&ynsUOEtg?q=IXh(utB&1iGFF!5)Vv8yg;SKZp3>4CNu+G|yefqAyV zJ2EgF>&xlw21Be65Pcc;0B!A|VzPi>g%m6Yr-yQs0NXG8#tuVuVmD63#xCiaVQ4Y8}5)hG$@xbSkruDOLPNQ&M6~ zxs#)-q{JVHPND}Cxh&VVHjF%sZP^bi_qg62aCHNJr;R8Q(V1l-`j{rcQfw`;n%>dZ zvtuD_%)hZ51_V+1CEkrgr_>RxN5Vz^sL%g~=ook%Q|_$&qd~_(JFd`JwmU$(L!mJM zZ-B72kk{I_kk^F9^KEf(-Rtqb<{_j}kd*sj2wEN^uIq&CwUG4V$3FPYhFzNzI|RK& z(C3NsR|RE2ta)0PjqrY=R^t}jIUQzR6Og_pO5ZBZb5OcV9PcK0cdEQ@DxkcVXjZNt z;{B^YWDOh8G;1L)j@J_MH$y`hY7A*QK()fLf}cX-xE4~NV2HO@ST?n+6bk1 z3~r2}PlO)DzAD_Yat8Fn1>S(a2uGq-SeyGXucy;$KIMNO(r%>~%)k@mip%_LXh|wLO@{Y=dF;mxly;N97|MS}`$-*OlNeh;WLFo7uz-gZ ztTL9R2^RR=R(KGDsK5$eq!kpi{{LToipON7c!=5C${zdnZ4X}faMPaJ$M1>Kyt&W4 z$zkYeBz}Lz_g5ov7`_ULL(5L2u}BAz_>prr5?1#xeA?|bAkaXdfj|R+1_BKP z8VEEHXduu)pn*UGfd&E%WB%Xz;gdZ7=MPwUUn0-{HzV=K zVAmn>KI>bN_ye~)ka%u?FA~rDdH&DK&5t1QdcYG%ygqOSNnH<^jr`UD49Hv4spNtF^dWwek7Kmp#aGOe5B+yKgy6EgUSTcE7cXPQ%Q%fh zK^BA;B^YE|UwZEWUdKNm>IZ8H&`EFv8VEEHXduu)pn*UGfd&E%1R4l55NIIKz?WJB zJdfw8FVAuDTAp|JVZ)I%XXbbMJlE$iGPFR zzpH_3u)UFYY?|1z&fkdgHuZ9jdWwjRWGof?ks0MP(J67JE36;;ne8Yk+rT}I&k=SB zIrd6&$-npyum%1sL#Y+V@&2#LE--&Gm#6N literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank_with_header.xlsb b/pandas/tests/io/data/excel/blank_with_header.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..3c241513d221abd0276082638a7e20f1a0c00a6a GIT binary patch literal 9129 zcmeHMWmr^e*B%OmYA=02crNFaWF)9F^nH0Dx^Q z0DugDi>@zkZwH6k!S89gIY6BZd0lN`^y!%B?5O~B;vro(kw{;_Ae%#IK5tN<S)>cXWq5WPkF;Ph>=|Nf$Lupg!Eck$nL48(TWv0vb zWlRPB%g`-Kih9+ur})5(x=A^93#OsC<^bOt%~wIsS6Nw0LX2K>x^Z!<7T%c>on>$}N~f3y+|MhTajqRekez-lVxs4OQwb=Hj60BQ$~&#-e3LL-9#J*pu3- zHCH8Jaq@&1I~ga1!Q`OvU**T$maFgK?xt{TWuxbpL`0zs#ez(TC*sR{U9ri^LIMV3uIrui0dujYTMj$NZA8jB*_Vg43p#GPDmiL)%+C_S)8Zut-kpX?r z5ejqS()QuVp>Q%;SD0#~8W0}UV3>?Yko?X{k&!WS}43SA%B!?{Ep{R}-G zzx9}px=kBWy{!Sgc!_I4bE7**V^>0hyRoGGnNGa|M_`)#t%s@M_cdc9C{L_qsy}Ky z(v%%a#8L0;neQAddudxRcc~|ZtP*aHwUIPLCCo*~udgh8FM}HIN;Y2T!rP>96R=R9Ax3^!vFxN0k~+cFuvbH#l_yy#?;>4hS$`}?vJoRL)Jg!xBuH$RZP1z zGXE>?`k(m5dGL%{$zO;n;qZrzZeeJ-9nmA01wRFz*5qBfPFVLj4u&bq z$;1+m{pd#UrsZ^|W7;51ySnP@PK`nKO+mx~yhRA4#7t|8rn82OKMqaPi}+8knOENCi&p(f*rB~OIG6e)}mlSPm(gJAHz0Hg*}f;u2G z8UPg$Nh}T%V|Az*#CXoaGLAQu4R7WlSQp#~Vg_fTceYxznzdq1!2?VK_*0chB5#_v z0zzg1{W;)L0saoNr&x*T%)Gh+prBR|NWm)!fOYXoD8X?kffJ>+3_3l&o>zHLT?_-( z0;4d!IHjc#ComSUuMVX@#)T?beh4-G5U25!DuYA7Jg&AC^ju>p4~uMQoIG_&noHVf z;|5+4P6g8w6JaJMd3+{hwEQBN(!~UFB9U)ckXE2Pw}O);6bg6ZJNx7R9#7T~NL&ZI zXS9ECoj0-s%cBJe_-g>T0~AX6`QiK@I!%;F(U;X^N#p8rd>DzJnkqyGfGa7fMkuMY zhAFAy_9&^d96`n49Iym*4o%d{R~9WF%E;8ftQ4@weG_`+4| zaET-At(}aQ3=LRtGa=3htDAfy8ER(=K`!wZS_JQj)Yc*2&>*#7`bmr5vctJ9XSo5^ zU8m9r0%A{APKx_gj4rOoDkSi_0v|rxMr($QU?;{$Vp>g%kHMR!r5jEO1q^n|G?4IuT@Gn zk-oV5jRkksjI{ev2|J$wkNXipw;{gQwAWSXu!&zE=N0me&EM}M_#gHWn}yOF z&c!6pmN9n49C9Cro(=1|F?I%?GHoV}pDdtRDeFIM<^x7KSqud>xY{{*asT zZTf^{96dWyrUp0#TaZKtNo!tlvB`mnBmIe?IV)O#cn7wX@mdG1l6+rPQD9HAvBNN} z(p4}&Qj1?~vG4}wV^-uF5b`ER-pFuuvNUmoLNwuyR(2Ln-<3<~Zmnyqtp(KAA0Y#k zGoB%ZA&}uk9ni2g$OP?g0$?99t;LEonm)4YBK#c}?goRN0cJ+Gt6<0i6}#20w&j@tbvkad03X-pSjv#dk&i9XmG@4pt&5t^V+iVTu!r!EuS7xzB(AF z)-;sN245Xes%_Mjcb#gWeB{Ifn#o8S*Dk(z!!3sY0g-%ZK1`l$;yD7Cr}BYl zMrSqmZtpn7=t!bT_^pP>{_T%yuGgEzcV->Dm-+KOm^^M+vJj}+9()k5fBYQJ(XXr!56SxOP>V94QIrR`FQv6+SJ_VO_l_DQ+{j zcsAw$7foKq{7DT=E;8>3FZ)Iv>avj$6+f%5X_VJ$cXr?^9zIG5FW<(4R|Q-{pY`EO zk;!)^h>&B8i{{w)CAHI%EdWcxaU5BXEEQ7LJ*-Y#=l1&A z!oO-%pl$PR5B;=1y6c1-QQ7qM0_Jd@gn&@b*-)ju_>e%{A?5zP3m8C%(5&drro~(KB zVgo+P1Y8V@N??P8c(hK@hBy0|60?ZRky2QPjie&*9&g)!a<{f#clDh_Pd40uT3xsC z$>g(47aI?(?dOs)k~}@>-^^a<-(+hcPPrm|w7A!kF&!v<*NEr~QMfPTmv(#U!_DQ0 z9=coa-S+pMSXXgRJwE;tbI!>AH!$;TUHDcoGav*gjPT`oBcFKkfn zATNk?X9}g6a%~>Ej*{5`50*|5^#7tx!%|kiFQQ7`Msjm`JxJN5i5$#1PiNxP##a;0WoV zy`#NmxS|u}86Yz0YE`~^d&=){OUWS+J6z=nW{nSDfTLzYg<<`UAXRO8mH>`UG~HA*P-;m9 z8;yt;IP{UeuK${re*=&WG&LyM$sEbCER*i18h6y`$YLKB=;Zr`tv^<(VW^9tcx0BW zvMTR+Nrp;Sv*pFOO$Sy>Z2^&2Mg3eHnnm9p`1P0Fwusfanirq7pxl0AfLGY!p@W+p zHTT@OUjsNkK-FbxU{GXEz8Q3UILnc!aM=Vn@0Wj{E+gajcP zSbe2jUQCtwlp@Rzv3t8nGtH%@p3ZK2o66E-!Uc zY@9rNcnz#VzjQkyAr8yo&McS7r;|QYVeOhAb8&$V9v`u>B;EJV37ijP7&Awj`Ac(6 zj9zlon)0+QO;~#C^L_9Nv4acq=nOJYoe1&NM%Y=l!ByO~37kz754)tY3Z8b8lQcLu z`ZdE?cffx4vN?wY(nwe2)vkRcb|}lPwl%8g6y{q`;j3@B z^**#hv}6tAb<0|2B8#F{ylo!Z@2cbG+P%9zf=#djIjQKj!V1HXoFQNCT!ie8+t^H| zyl{tseKqXpgfVW+ofx`ii}QU`ielp1MbkEwh`xT{ za10Bc>Zh;tX6R-$P%clB=9XQCkE3hP8bUd&qA%3Uky4s3q~&6;dvf(e+MWE8qA5(i zB3p;7Y5%d$6!>!nMd>r)7ZoxWeQ?t5=+POgW!VgGbF{ql*x_6p1~Lr{s2OR_Sc_ZEIbj2^=Eqnj+p6G!=#?X4wl*ayrU3qw_nQ zwoY@MCyW@)L|KVSuj04M_1Cp@ucg_beH9scmC|gF)br=+{$I=sXS)CYo)^+ISmXYy zeLU-#|Ckq$Gc*DA`x+KsWx}j8->NXOT1Ux2baa!Ia-MZTOr15-k%&?7rRSMb6NDuEU512&yl5XDZ+&w3<@8(Qn`HUDjlP1sX_5Eu z)9>$dAF`J=adf+G2Z6etO@F=7#sG9Ipf`3FKN$J(FZ?2I!(jI=vLA#Y+h`IbDFd-L zQ+Kp?aN;wwcZB|a7V$3^gEXCc^u1dR^yEzwH;#x1yO-IMlU3gFYLq;$Xe_4=v&UQ! zeiKVtd%VBX>-@@4P^w;Zw?*`7<|lbQOmUEoj<`!+LELbga$(G zhp8QC6wM5L%zQ)xY>acn8O0c|RwEDE$q8i~2(X@}CN)8n*CGbJIliS}g!MYb_FgGl zg@=q<;E#tS9~wg~##VP5j&p*)Y?2)nyBY~=%jYf`-j~ti6q&`2d!>dL^BU*~k_81O zYjLbtZPVhLf3AP+&LS&8Q{o0Spf?6qeXtFAyiEI9&}yF^JtYirqwB-r!a7Gv8h5xd zsnT@h*xXD&^Mx>_l_VY=%GmD6&G*?Umo=u`m7R*NMtS z)hQ<*>5$e)-UIlfLz*}^{Ogb3gZQ^ADMr?28U*xT#XqF;TPaI|aHvLV$3mXdB4D*Q zAL-2A=eAarONc9f?+Hy5?6P~bXe+q7zL!b0Ua2`6N#y=<00NK7eCnlv=$nh|@mOD; z6K6qt$pJMZgp>x#tTqnvxz871L{Os#ldwWcmEbnbK6z=+7=qsft3W-tz_iES!+E`U z2%RhXpw?KchwYPo#wzUgWBT;Tq=Q#Hg-c7VC-EZ`R~d^YrITz*qbcnh90-|LiK_2}FAc3NTN$yBOZkdS+BWmT;2um> zn?SuuCJ|F=!0qWhwG4I)f_aC0|lX`6ZibMTfzI_e#@YyvirDkGU*Os zx|T5dqg?0SY4OHaqU)`o^_wF5wQu7#tE0kuH_WUUlP9d*b6R(adzuk96O71(B0Zwn zAo#p*h+fIV6hR1lVtyKr?&0B%$2Oa!JB3C1!45AzJ&2hv+fr}td9jULXbZ`{Iqq;@ zFHNe%qmxSk1rL*I!ga@37dgZ51(xqS?=w&FA-C%1Qv6@1@Uw#apHn!=4fLAtuP00RKdR<8T=@?k7O$T{r~|L2bD8EN0PblXAS-12m)0YwRJid zR>J#J_$T5O6$O>*JVznGM@9K1@reqE${d~p5)q&Ro~`a|`%zI)>A7>1)!$HlBI{5Q zP>Hy6glkA%?mON0CoP96ib^@1i^dV1iJ}rvr~s&a{W$>qBB~93FzR`ij|%wrd+kht z3V3GJGbmIP)Hd~eSrV5}QGQs~dDDsth?+9a0i}@S|3CMDs91mP_|B04fN&(%PyHXN z_+RVukK%Ml;_y%L->WsM^xrFjhD=?5tH>{{jGFh(*P`)HSigq>7BYbV0Hny@G$sI0 J^gVF`{trvud>{Y- literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/blank_with_header.xlsm b/pandas/tests/io/data/excel/blank_with_header.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..5b2ed0055b243b198bbc43d7ea3e7ce5391eda46 GIT binary patch literal 8813 zcmeHMXH-+$)(us9lO~}lRS*b8snU^-bfrq@y@uXt_y7U`0B{*_Ie5n)5DNfE zzykm-0tm286x>`PHm;C+`u^@VUS|A$F3zkaxY*ne0N9xK|7rijEAUExR_g_bxfd48D|)wgT46lgEK(%-BoB3!x6$~L5@v*bLyf=C5&(d z!lA@k9^^1imkB9_QVIpcCGZqUy^K^b#niNfbyMz+>~!=T-Vmrw_V>3(^b5ZQoTveF4H8_proU!ZOHHF7!WfY zsh#?C(x@%%Ah;xDPG-mBR9PCk0P=3<@R%=NMEM4wOAV*czrktLYOrkkfq6u^hFDv8 z%G=l=)DBbNI<&5T#==r2=GN_(x<1tRI)QfXKC zaQx9#g`l{sO+%&avOe>3_Dz~vA zYK1KWryTt%iwwyhSYEMnpzdS1&j`LVJLi1Q51x^4Id%M~E-{gzHRFWMQiSO?HSx}d zXIUypXAo>Zl78ut&D)Hnn=zft+I&(INJoMTffp*IkE9OknnX0+`~;SREwi+9kC zMfQ&m5W`XndAs+n=Pt!W?jzcmKCrd-`j~A5IgAh#U#_NOQHxkU%8KRhxVTaOy0ex^HCdbGgZ+ zE40cE@E6lXvY!OYpvq6*mv42`;X4)>Ksl}wJS%%*Ji~n;hLDzuWSwjrEvSEFAseCK z5!w{mrR8;^`0xvRNdgC6%$qGZeAJ@)st27bNmlhDaTnB~K( z_cJ5hpWa~AicA?kb82gKRS)u5vP;RTzT59cH*8HIp70UH%9~@%-Z#J*-_2Ux^+7ad zAlsr2*k!E_nz;_{Uk#g(jLng6ed;)A9^r3H6BElH6GB9i{3Y#lF>jyoHle_#)zv;< zbS!Mva)*0x^!+HJs_-~ZUp{OV z@3IcMK3+80pzpqO_OnjA#foAAGMLYl%l%$ao4iK%r(l{zq8Kth9*HK46 zYp(8s)Kmm->E&`|cZiPZy&*{}UzXkt0xJ9LG4!!dZw{}MeKRnZR>YOZ3b^U<%w*Y^ z*tD79xoAR061~UjDS;e;Y%f0FoH<75HzU+bqUtO`hmP@K30s->iV?z+%QWR4urK}6 z2@O%h!J*?RA8JyKbNI_uV@8-?>oF9E@jtzxdmvHnkvB_b_nNh-LHV0Vo>oAB50Ztd zJ5ZiOVmfeLk7-fwi+4O(O_OkZvRQ)1>uqADdk<<^P&-AdAFz{8!^i|{W$&k{RjIwI z5Rg-p7+7$Zlsx0_p~Ecp-wAzeS>}>~4*-+{zf090+X1n+aj_Bj@f7?{=MAIVZZkKi zJISYgSs`vNT#djB-U-vS`IVZ$Zr505huo~YdL^=>s=+|?Vi;CeD>xgg?GwnE7lqvh zDdIGVt;9K^*;i4kqkpMBiVvKeh*i828gw?l$mab89T``waD{!zW9ErjRX|*~G|QNn zeV}j5sXCB3ji}G+m7fNB4yLk&o;=<%>&fR~rayS~ z&E~k8h&z<}z1S>RvvCHK6%)1`FGTjWlh{}T1BHsE9dG9(dH8;?SqpOL~|N+f(@1~V6*v?QSY^g z9mvo&HIm!Tczw#`j*sUN{_}C6s;PDFzy??$mc{%9&B|*^d0)QWs!Sib7Phe{^vShB zr?Tw;`}7M(u1SGb`G}fLts-f^^knunaKbh?a0m&6_K0R}e;M{^nt^QNTki~U1f8ip zG4kEPrxs$kosB{lpN6R2jMWgE;frBf7;T5{;c4p69D|ThO5%X(!qBsihkAP-57QT8 zU*Bi>202+B+38~8l_fsZ3$W?&lr4B|oF?v!gDq&XW^=aS9>( z%Oi;bMJM|k1Bunrr>AXw9NRA8%1Q^&TZ$zn9|)MX2_V8T9vob39vw7Y6nb`eWZ^!9 zYlTEi?<&P@ZJNnEl6~*U?r`In7tQ)wGeucGv?p8vf@|`qALn{sI&U%Fv4B{{RuvV3S87o#RLeO#5`|Y8 zGgBEC;%G=iEzs#Nu#RY9Avg*M9`9r3d}V%JZVA3p z|BGJciksUGqTM>WF-$12YnC!lJp!Gi{T80cEe}zJJ;?*CKHk~jY^uK5a=Hjf5qCvn z+_=Krc&dBmTyNv&kuXh=paH&GtKMTahBLD_w$vXRm$s{%n-DJ3-Gga%UlnW3qmRk6 zR+*#{zY<#ae5U zXo@1We;X@@7-p1E4WUq%_Z!b8Az?IStuczky~CnILnaTCoj2bVNtJyF$BC!nf8TB& zqwe4&wfAkX@CNXR(ULc|+c72O({?NK@O_~8B{l<#;b)ghy*1xGeKlDPSCng-0vnFW z7v!1@#n~p5bn)FW%Abb4M1E*-1m&1qlmOzMQrvAJ>#HO|`rx=SXeCv1G=owk_^yeMEp*}hGs zEtz}&8b2pj;)u?S)AdOP=Fc9Z`<(9jF@4-v)#|evmbA}<)2pLMkM+{uSRcN~uhb9} zC3JK@27l^+*XUiKx;#OaRZL1MoEZa1yw1gYW{mSHK!+$pzqDSpwgU^fU9F1c12I$Q zytweBq@I`!?X!QKuPo?c9DURxn~0^bM;P){LSL>(Fw=bz%o7n3zAof_8hg|^B(B~CzYb%cz#@e{ zs9|}mO;JZ~GoUzswcBxR&PKH^0~-G*qFJccGNu1ASiuY@Kfvv)HbJp#g{m<=$)@#5 z%a#LvGWP5T>fNEo)`xRWUhj%FgPesTbzPZmkZRhVnRd6|yt`51gD@)mgiuo2>n<6p zurtDXMPwq!t9d*{{T^mgS$S8Tq58Ve`{#)-8M-XyC^+0xqxM*)&mL{(p1vy<9ltCS zhE8~Q9LCj0C&ykpQRf|GTJzJ%7vrZ#+8C-f- zbY5M$8(r4u-P_sOSWh<`1Z~XVGwwgQBz14)tM3}+?3^edoDzAIBzhSVPho!D0-jQX zqn=4rkQ%Oghj?Bbx&*Q|u8mvd0>c}F*x5H}@eMN#YvjB^7F`4$s6pAI;8IDMk)&2U zJNOJsYp(nlWeKX1Yz1pDS7)HAan*WcbgulcRR3b2O4ar)-dN@*6qH&?^3}fP@tLf; zg;{wwR$c7KDv~;fTl&Y4FcwnfGXYz~6y8FIz{X%wU+|OWkF#Xvp<;&e9ONzs-HX80 zRzk=$hr`w#mBC&7_A6Ti7x}WXl|_YAN_^^KeAgUZZaNh+v=)kcv2ot#CY%0J|2pa# z2v;Qj+w zeyBbSS6pYla|LIgEMl`^!`bXkm(K`4_heTX*u*#622FbDi(HCbeB z>rAzx)xR!=iB&PVYFR1E3ttOdKC8Xkcke4*k$9qNs}=)9d-tMZ$XI*m9VWqz1p+8N zrRmedoHT*xI7eJws~BcRg8&jkPPH!95^~nILoTOo&et-8ybX)Q#UyTEi+LwJ6a7G9 zem*GR7UyRcl5# zy~8Qrf~#ZDgWKYii*G@cWok!&d(U(7=3Q1JLTyQjRApf_g?&%Zr$IGa;ohj12vwQw z1DZV$XBq;IrxD2AHbz@gf&+DLY>}Fo8)VPtd6zmOIAF8q8Ni^V%4~jC|9E^nP<;qD zQKYAMfY*c7>EpfR$rndFx#8MJD&)jdFa3Q1JqwS&KBh1}U{zRY1a@6>bcp98xw5F^M22Jg+Tc=Z){^|+F)U5>NZXc*nD0^Uf>-ZYQNjDwq-O2WN~rv$t2^BoNciJv+quLl|3uI zfTcULmb6UgBkCWSEhUQnII0Oyh8TT@Aq-mB)v@6s{0*M8ORYL&j(GN=FFqQq74ht< z&%IGDt38d@(hsvsP-R##wvS}g$B*)6R^nJ^yP>}w`Cj@dJT1{{>wbGF?=l6KIDEOF zdz~JiIKP>qw4$FFpv<7)%_z(hNyxBq+7h!!V``nUw(yP8>{3ACt=)W5xFjf<98HZ9 zn+h;Bb+~qSP4?0IiIEhosqLHgD-Q-8Ox=wgl<;!ku}S#wbm7ZB9W5alz8j5-6{)$9 z*Gdt^n!%L(oa{n5H>->MzVTJsbpjL7p9{&mfp2_E`W90=w{SzhRLjY(kPtxZo83)k zaoYC6jh;@3JGmb1dyWX<{?w1tiY<%G~C)nH=fhq<}EC5HA0_a;-(`eZc<<} zU)F9`+MaIiUIJiu_wN+KB#HmYcVVaqOP6z61W`n95g)TeZ8pH**VLf~S=QOijn2=t z0u7fut~sj9=jHf?o(|q{fVl*&xrl7-9KbK_wCT-2$%0Um){s1x%w;)cv5f#Omm*eZ=)og-!1({F7-&Bke zH5xLRfcmo>j*^jt$PDed<|$bTjw;C$XM2Z<=XA`8b4nJ-P_Zd{4#7EHDf=at8k)2s zE<92@uTlclxsiy*P%SKQrJ;l7$p}TF(;@{ZT=6|a zEoxf!K<^X0&o}Ol*Fq|Ip}_rfxSQZgAO};~Qs1beMCZhTcIuW)NwQ0U2!h*@^_cy3 zKdq@XlVR+1f`v03v$fb!VSRn>+2k_Eh8eht>b+F?I_GPrwc@1Go9mc?rm%e;qNsaZ zCNAs0-%b-#8360JfA=lt8c#B@@VbZ3A^QT$Z-Z)RNz<4)#>2m>Xv$wb+{(?<<{uZw ztl7U>RGQiCE>`;JEzJ{h(kUO=mGRU@b;z4!e!;4cZpkQLU~L($JIk3l+QmDO)BJ__ zR+qS&igJEMlA%RGg?Eg7$^(LDkIF_9Np8}mAK)tud-3@jIrXDx85x#V>>h(O#%Y;d zT%HHu@e{Fs$^l(64sfII)w))^P({Zm5x0vrd>dP8X3^Edbq}CL(DH)DkH2N58LRgp zTggq`1Tu#X=Qr5!=i2gkXi1Gg>NCHa(}SntC?4fuwZlrnf_;yaH@X|$!w8QGVNB*~ zu64eb;P7flzVs_}MeXQN748G6%m_l_w^KKY_>lCjOG*%O-ZH%)`1QbRExThipIfl~ z+lKmr%B=VFL!%J41T2an(mkEQ@8cUsHkqJ0{rx`4WP?s;uBIB=g%lET0|pw^!~p|o zSZt?XIeQ0!(*1pPK6Ra_Gxl0*Yrmj+vAP}S2l(K!ITo50iV3WLob*`Od>Ezj`(pZ^ zd+=xfo6@?r`d+|Jx zVgSYsLm3xmu$42|RL95F%EOrRk&`1`A<7NbCx9D>_y1}C!z(ZVF;?#6CW+idy^`rG z=YPi>-P>Y(>h>IY9Y1<-6PV6S&yq9Oal(Ht33J9l_Z=TugPsJW@b9{|*{d{!2hY9> zJ@QGkGE*+8%O%}*I)g0M;1ty<0c{G*93%y3j7@M#9Pl$W8@od%dCo-XtI7(Jdvnp1 z`O>fx;tzq=uiv_J+R&l3FC?($CYV{!7~%>5#U+`@nXMDq&)VoQ&62E6a$ZlpFdOqL zQl^N}xl?|&mf}J!R^P+^+N^NAAouG#v$+##L*lwn$6GS~>A2?6X@s<#1BJ~vNUz``d^*x8KsAq8LR<3eP80rV35djQBcB3hY)T}aY_X>t0Y0qvBdGl9>_ZE51rE2H%0^bfgIf90-XG`e|uEblA=)qfjD*6-jL|!q~EI zzL|v{L$gzyzlC#gA$e>A-09LtJKLuHM5Ilwe|nS!5noqV$N-JMfV5tho8buYiwXjR z*a(oCx?4GVaB+Tr{sYecVMP8i_4t%wwN7rFH@gaLG9S-BIV)fk6t}7WG_AC@p&*jcmFp``+GsdcunN>%fL{HE<6mFMf>KLOpcu_NgL zKBh8%Fu&kv`UnQe0ZpGjtBH#Pwm>czK>XC|B-s1g?#0R6n*HXsqp1|ntlmRb??rad zXmE&4!tF88{m@Ir&&0mgH1eyuJmOjl0A;AfOXd-azV%v_vBrKrJohYH`mJIZ=>c?{ zu$?+r`}hLp8YDzUL%jRH-P%%z?SBMK zSbNxLfLn^Q6-1;xUaHdY1i-AhJklkQe$3(`kt)+7*~eKx9>#bPATd&Mxm2>-MUG~l ztDD4h8>6oHg~1%_sZhJPSQy<@!&q+ZGc(Ch8MmOupiVW93)!bfjD@jGsFCk>VX!f? z%G+)%BxsrWMrH;w9V{yC3do~I>58B869nbQjRIz{%QQ?l@S$srj zAR_43*ooJYLST%bjbR`Ct@kH>MVaT>I?}-#sPvk%wK0MTx;>8_GwM2QR!R#n@StAP zq7=jNe5O=DIdQm%9aycWD4J-bm{7~bR*sY@;UNE5X=qO-Oa5vZ^``-#IOwwr)*0 zy7ms`$TwM#lw0B#V-7?4I3Fg`GWEw8E_37yPjJ{xH<R_M!uK?<5;)h zJ#1>zC)Kng`}rFmZzNm>7Hr21<0#*qQ2_a#hMM=O`Mx ztn`Q4g_2{60hF^V!APAg<{3z>pSc~`M{cx&^O+5!%8}1Vj}??^ZoX9>L!+FUjFP<- zQ71IPJKQt)F+~r7|T=3RaKBz$2B}1*pRA?9}V{c~kt-Lsl7>;As6hWO0i?enxyq z)Xj7(=2?@;i?;r$Nvs7Ga-MpSlezKY#hB-2=stLGj|9$YW3V-C_`u8k4DHnfZ^iVM zr(ZoZ56Ns1UA6plvR0Q3@k}xab9*Pfms=w zI2B%wwcg~OLhR$dQB*Ao3K#=Q;j~0+nfoGnx2e*Ssg|QjaRZ58RH^jBjZWo^)YZ}&XcWpjCzr~G9zjfd*K_mFFJV>M=tT6Bc{kZJ1HPJA5S^l0#p;(x@V!b!BrmYBGBfe=i^5uLhFP!UyBx*-Por;!uQ2d~ z_}qfwmm)e+`8;W^E9PvWfgxMMPELHQWb6E!u^AvVn;srzX=u7(ZlON2Vbg7eW}LNh zI+_hVwpPuZi_%O;2c2%L`GthG+%AhNYPI+j&EAsNLo*m(@n^RJX*^w4%;$^bBSM+8 zNTW9%v~lA~p?XiObWi@VVvJ1qY80`8)oO zykT0-)B?Dw)>lSdZTB8-mwL79=Y4LMlRNAx94xibM;gE~lwwyspC(y?8kUzoR0dYw zurj;xDMdq$xiYS%@xiZo+zsj5cfN_2R3EGjSgn6tmII2`S6MiSHjGVtc$az` zOD1)j&VtI~C4^MMdVe&)^$^crAd`nll^b5d7E%yJlPMPbv5{(Uti0p5Nh}Z^yK08e z6kvp=xcO64c-UH5fjzi>biCg)S$qooyB3SwMQE}8cDLE`IbaGz>h_j7_nU^+71kI# zN7OmvxyJmIB;Ji;K}^N`piB1_@vQXA&;y{;n{~|riiZ)!4W4g0IvQ%phy0ToARGq0 zrxaqQ>nGlug!2o6fDl6XS-c=UIEKLFt{E(`3RyV~D>per>jCznaMCKbr9pM{3X3_c z!JmHr@+_c8RAMx~1=R*NN86GmJx*9SQjWKd)Sso>7J%?koTwRq`DZaX6F~SN@92 zx_ugT`6btOe|%5Ci>6QWcqTzYdeThzPN!Wflp8IW;29>n-3JQ&2WV}#b}@)K(lg`* zc@+x1Y9hTi?Vav96aZWDggqFTAG6}k9Muko-{D5#*Pa^j45_#&j}O(2Hdpek&$vpt zLahDYrNw`-D}4X#%K!JSxF@}~XhrN7H{t~MAF%R6^&zn0JoggrL_&0a26iZutY9guWWD_dZpJlo3{NjC|N%D!f%a79Ghqp({<(opI8;^;9e zb$KQ>XTd|TKjq|pF1ex@Ye6~uGqzUJV@5j~-k`xNtO zeJvnh&?W9m#fQS@YnK;OuJOa5bjx#MoM$N1xiPhNmmJH^_Wn=Qg$Y;Qa}yRTodHZ= zWo9orZG;9{<6+IiX@{Yz__4N*lNA;sC%HB> zi_I~805;^5pz$AX_t4^k3X#=}=-g$6X`CtGV^6>J@dk7+KRVB^@->=Ir5hMOPwTuiXKok&Uzl{O_;Qnh) zU>`@TACrMWRO9Bj2_knRzM&ViGW4i6C|0#6S>j^PX(?c}vo&l-PevtwoUq#r+U@m% zhTp5JB6r{i-t)ELcfXiX3Q-$)SfLwQl1jkv)ZN%sm+ACe9GmQ>zPzqfgx(#vjrG38 z*}jb9ynfYuGEWqsz%ev&Xp&vpG0-w)Hl`vgG);?Aky=Gn&e7>6N7*7HYRv1s5QToy z9ZdGDD|}h4gV_nG3M*A=30L@hvxPb1!K6oVT>(0h*4$?N8o8ICPgsVS;OwU{Re(HL z{|mHTx0zA-28^Gx-kofR#0&;r!}LQZBz-pqTI`H#MFl-qPr z#?5}o!|$Se=Uv#dlH9S25_D84CAp4+0k&;&HJV3mJq*#WnfesLRy*k1I+$)w;{5HsSe>_6?T&ZSo4qPMy;m0eGBq->tuMIXf~h8f z(?xWY6_+U})hjM+m%puBfbKUWl_7Z39>JRgh{~6xi-m@}i>n8hxvT4U2qB8Z|J1t> zP=uyRIjnFKMC@Xp(}wTVLt%H6lXTN9GiVwdU#a=&t-9T@SC-Dsd=zxqFJK3C^4oOc z-`zijQS7&B&n4mck4#yDlVOlx&4!7!q|t!=tu;|5q~hCF7PyvmQIfk|(_H@R<;aP^ z8!?2;mUZ{R4sSwB3)1LgYGRbE0@!(IE<@gxj$t+EQ5$mirrVt*z%k(vjfJLZNfD+B z(F;dgyUADNG;s@ZX7C`PX9!!)o zU1pkn%mAAVv<`G(u~{{HT9jSa`4H(X#gU+uT8`U3=}2hMc&{-BJ8lqe!zTAs0`^9C z>Je7~+N)Rk{3?UhP`~y1m!vO73E~`92)ILJmw?qHGm@v;pJ6@T(GNV9lg*cNUB865 z@T~hWQ5UcFjLC|0OrB~a?@AOVIOX!Uv)a?0GphHJ8Cg>6Ma{&TIg-;@3Z3QE)@EHz ztubvIn>P|IiIr?I4?AoY#24M$LJTyK;maUF>YmNuT33(#UdO=*!0s-!pHrC1CSy zxp{qMT&Ddksp1%9Hpn3&+yz0bguh0(g^Rn@KOv4-vwt;6il%xe9ZAHZ>IFXTa06s; zFrv=5(3-2I%aRYci1FU3OB(Y-%cY-P>ty!VWd^8+#85MR4y_KwXSj&|CRH+{9HFfV z=JQ5jUt_Liidd+PK~>FIQW1j?KUYO1uQO}qowk|&;3Fa<5`|e!XZN~;1&w&>#SjoD zCLVQ{suk(Ml5OJ*_qV==V0k-{ZY(k40lxPDi*e#n%-fF=`Ar)fkwPt4r9zJJQo}#c zR-(UnpO0M5>&mv7$P5|nBZg%HFmKo*TrI4xZYRe(6&VE6nuFL(gKs(IC29*<(Xx6SGb@0~`!AT)^Dq_*Bx9xZT)m$PWbPC|J#Z9zxQ%N^jj}KPH@+~Tt8#{ z97_U1f&Q!F|La(OoH(v~xh`CPdI==^;pJ~K_PU4bGV!MeEkrdOF%FM^i^l7we@*v4 u4FP~|3IO1G^1p8VS1SG0x`gT%>pzKALm3qjX#fDu_YWdi8NL%V;Qs(&aoDl| literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/invalid_value_type.ods b/pandas/tests/io/data/excel/invalid_value_type.ods new file mode 100644 index 0000000000000000000000000000000000000000..75a7a40b25d79366d8f99fd4945e8e6e7bbffae9 GIT binary patch literal 8502 zcmb`Mby$?!_Q!`9q(wqPKtNg=kyL4rp#=u%p<@^j29Q)*K|(sETT+@sH&TL>G>DWS z-SHca_a4E^ImdJF?|q(ko_GHF?6vn=d#$~{OA&;EiU~M-ka9HSfB*Q`4<3LVAZ7(K zHZ!rafxykIV76TBswx-&lnqLIt+UteDHZ?;bpZtc`29!m57H*H<^>Y@B_xP}ff3XQ z0k*RFPSpsq*J7vo2UN04sMc22cGi|gFuVUpCfv#jY7K!ILH|o8$t8B4nWzu>)tUe7 z8RAQbW|j~WBisMdK{x*#C~5|`gjoLz{EvNf+1$_8^VjG8b1Yp(G=^G1;6}f?nm-dS ziz8b&1a4>h-&=WE#9P}~nb;WF{t}fCfDAwfU?XguOMalG7C_xZ#gxC&io=6_zO9JvgyC2W1rxcsTNZ8i)8cRh61qBs{<9h8s&oNKY2%91#d}Y>|e9Vhi zQh?)H+@zOn`d;u8Z7mSlOa$RIRNR_5U(ja)2m_(ooflUMh(gm(9Ua}~oEe8nSx#@r z8k$ku?uq8CK@WX>B8j8&shpZ~|7hBOKm%uNCr`0Ir4i)3(2br%k6e+#C%J|n>gn|* z@&mMb{Ay4!uz7PUy^|aU%OFU!uv__FYE7TBfTyX%{ev{Dn$U4sJ+nZeh*NAeCPh0f) zZ<}I$Jj;ErE5{Em=D+RRX#}#7r?i$@ujb92h7sw6Us5fuYYM2&uEq_>&wdJt zE4KHUDN9HhF4w{Bt|a*s{ajOhKq7i4W}P~#AhMnh<8HfMDNmhSFsq(sipaOhvf=t0 zOh;(P$Hz0U#}yIJ#7$yiqi2lawj$UH`f4JdukKPgIRB>geqg3vuvV_TR2V*ISWoz} zBt8pr->NllbM-2g3(TFGXcP&9?3r-4`5c>vA2OzuF12OaaMo=9<4r*_-O|)vWL74z zu^m=bff15;y`1JnAeq00`TReC>&T}lHMEt*9BTB4)8`MIETHOyvID|nSsU6D z&j)}m>`D8p3Vo@Q)tnsyp5EK^)v2FU@Pom7&t!WKD_Z#PFGf4&(Xct+sh7-^l zm?sCG*Eb<=qwZm0b|BD(v7^E~p^nE#9mu{BgFjId7^+Ni*%;nLeTV<^_mJ<}4HD;;3W{JIg|ti0IJo_EQN zPh&QP6n>+rAK-f;jonflh>k%SAZoV#*@cY1dj`W8tOQ?940hb5y5~PGkd;`32`N>s z_ARA)?JgXEA=}{v_av{B2Q_m?Y%1~BC#}r1>Q(o9CJX7U-45&-o_K+sKBqHPU9hU< zIq4E^>a=R$WY^!kK6xulFOF+fVW~>c@pS%#t>=_eXY+VsONJ!>C$BEu&Qn6 z5%sp!$92s?4XIQ|^DEho>t6PQ5m?oyEzDeZaT^VlIm-0WQV4(Y7V`+L(snRyN#7OW zmC-SjkfTv{sfqvA7sbm@HFBQ?(?t|&l(^x`EQ zqp!j0GsVY|sB$360_W%Y&I9fwOa59rlmRO?z@;uZn5)kLLCLc>c%>GeZfjf zPE(+{Yt4!}siP=ak^c0ReA4+Q)o<1_y9lNu^UCHHgjG(q-mTqO|JG{t4C&&2NHCf7 z%|&=18L=PG&-Q~69P&pE{*%2B5DE&4yh5d^b48J<`+u>Ob3y!Bis}BbI zNd@feGJr_6Vg?S%ggUTNL5xpO8ijfMbg8*Yqnw?Gk8}MBX(x0V6m!*6axOW1zeFs5 zB7vV;d#G8POcP8HGtW<&&CT2#E?6diHH;AT&K3cgsDX@eslr}ul9N4YyM{Bx$612~ z2stPOhjfTrTaan{6B=@CNN~Xhk=vu*@g&nvCo^@V%=7T&3lB9F0X@%B`j>{cA z*<5%l@s_S4YA-+Vz7;hg_YrN!kD2v?M68g`vEb;&6%41CpgMX>|3Wsvf@8`jKqxE4Grv*$HB0(>Yt;AyrhXmV z2c;v_qvBXo*7l{}q)cs2vs5+o@1~uO=2P(!D-607ZCt(RZf_{R9}D6h#D;i(cefeB z;f~fuf0g=x_3jt|Ds?FB|u@)q< zeV|FN>gV)DgSXmAHkK7ilS1(X?dmEQ$G2YAN7QKN)@ZxlPKeJJ9aQ%SCUhLK3Di(S zDGx%L+sv~Qtbm@F#qXZ*HIBSGm_l1kO#eKgePd zKY}pE2MdOhT9j!p*q=bJeA82r5+YB!1q{ZJ9NbCMkUF57>sX2}4ea?E$zkPD;}gx?0Hb(y6o6)~ggOqZ zb$-TifVbeHw6NbUn8EDCO{)~T0Z_yHs^O9PG_;66Mn_+}SywvGgMuLqKL$ySeXem- zNKVAzC=QPzD1raL#jgyYC=W!&)u&c^rKp^?f9epx;uz?nD;;ti?8~@t%SIxKm^1sN z0bt))EBpjdE5f)GD>}LEb|X<>b2IA{ZSAlo8PwL>>eT~kcsKtAcAQ&bYzb*KBAOE( z3t4idS^_eALjvk1uQk4u%nJX&v_JvjB;;Qz-U&d87SqtwmH74*W0jR-pLPK@@`i?s zYHW$=TH~AcdRGcrbov5iUJbQQ68a$yhLg(9!B-rQ^*h~;O^b8|#j?LVv9Amx*WKr| zv5wyaa^0m%R$=9nZ{3$$5W|QkEAK5@gwRqEQ99%0`9*)D;@H$3Ot{B%0#->+>`Ha< z6A<=E4rW{FF1+3tPgXi0tmq?d-ZL8<#7T;b0l#6`B&{Y5Pvd;dsKJMV^3a~{o_sP!;7b)MY?3zj?U)U<_fg)Q(sFryEe&S1 zRs$%{tdJ-CGmx~EA1}GdP3`h{=(dyB)F$_7S}@TB_eyVt_!^QJhGcd(G2UnpCqCVA=b1sI6K2ntKTb|~s@ zC+HLDAf?>AO^)}a^%Z00vy!2(2V;`%Eo`+o{3qVPHl7f@xAm{zl^}`ECshzu61a14 zy#uCu?>f0qnpg^p5t6@NIcZV}h)cU2?Lln1I zOT|~1HIvFGkqc?q+nC$!`^w(X+Py)uNLS$MG0MdZ@ujz+1&Rg$Y|sJ#n27RY3wMMX z{cV02*RZrgTmnyObg47z3XnpX-dmD67kcJu?hc}f_3Pinib*z14r5d-j^7&kCM{EB z_&9rJP@b1Tnr5S7cHGv%)1QOc8QaWP(tO97c&>vKL!IZAY$maC!)9}M{Cc)=>wSw`^Q*Vf`ZJzPR5UZu zGRcDI?m&**R^9x$U!Ruyz6;3fSIk}4k$tpA=EpEuBxFU!m5wgUeuE{t9p|IfC(uJP zU%8SzN|aSwiO``RK&NpdqJa(yH&S4vvGT>#<+-Pb8AKE1SJMEoXMdTrq0(kKG}0 z68W-~7Y_*^#4OZ5bjrzl0F!--iwb#S#LLoLlXfkdX5Pi&>Ssa)3{cn6HBYTJ{d5~f z7w%r9cq6S(C`bA$PpUSN?66Md3n%h-(FIl6RAU|T`K`enQG2#Xu@ww8`%z&Wh3z9! zig-fauSQEoa$PV-F*_Z)G&^R9I+H;4jT(gaMP#Oj4(pyhVAFoC(!*D?ONMUZ684SG zO6u@_jmz_I5{H}?5Z+UbP?g+{U2j=aE|UL{{s^STupA(zO`M zUNL+x>e|v|K_`HxElB~IW+m?*+RtJ>;-zj#ffE~3Jeu{2B9ro@aegv=Xu{fjqf*T# zl4hPJEo#pvm@EEGd|1Mhr*5L?q|)frSHRu=@fiW|#)L*NM{chqB)7NjS}r0)*Hy2=_vVJX6HSzlr1Tz9gg zWEQ*CcbA{nGHJhN0_c-TuGOI@@%8|K1_BRbRiA;}m#&Sl{SUplP;*4wXy)H?!Y3E!} zQkdI=HJ8r{E8e6_ce4uSu63%=2s?Be5-%r_JM@v^%H|e+~nQrtJgndK6udCT$RWfQmfS5Wa)_kkBguU7G z?x+qYd&}I@lF|r_v~0A+YWl?MK?%t_a_dFKz9WgvZ(T#~7&jR2h_#pEn6AekgE*U) z6%UVG{exRx1kgWv=&a_Y9$pi}Td=)-fMzd6)HbSDGdMJR@^oq6omi_JYf|dV=q}dn zJJ#!-kI1lf_D(uoz>+s*Qqo<6A~oq|GuiYes#wzdE1z2mDzPOu4g}$zzVDmSJ*!uj z_YuxiOBGW)OMMu`4Epy9^^aSx@I8y1b;JQY5^?W3lYP%Y;Q!{aS;I{JIFw(~lvu`I zvz?y{nbFtNo7#z5|KcHet&g*!M-=+QkQX@20IS9xVcg}0ZNxVShi0|PM;(ZIk! zNlA&FgM-2wA;<1GeozF=QMvhbyCHy0vb{yWeiM)t#O? z#NF0ts;gt1TAU$KezLrxDwSjmL;|-)00Z4S98U5Dnaa&pvjw(^Yb#A+7!{ta1Ri1;zdv|@6_K;A z|LpGozxQ{3(hts4f7Z{*FHmJJpXqkr_z=@(;j;S&1!UeZ73U))_n2$kUH*tytO`l}JXr`G@M zDt*ri2;Txy-9r!~T<9?&fM-3XOB2G6Tzye5J`cn|2mGWS|CNcCCoYT&QHbb4pBGUP zxIYc*9|m5OP|q6}c^-FBQ2i@Qf1odFgXhttSctj*p%nfa?V|K_o`#Ntp#334{Tl6} zv~-?kkDy%?ntqLV@hWNQ595hUn2_^xn-dqPJm;9yJM)AVQ+|E*LdhF#0Hw=mgPY zv}g$letEzDUs+%N?>lGhb-c4=-- zZU`TLPXx@<)78li=HukyE{t-w7xI81osdEvo(Ok)4?ABsgu9Or($gCOvqw525I(w> zF@XS*e3B~v?KP2dR znOxTyoo2oi7XEt7IbZZ-OkjDYh$fsipfd1|{i?^;@tq@7J$|n3S?lmTdi%3;&@_I_ zm0-0_+8{8aQgsEos!YvO&(?1J_#K7N0_&KC^n{AnJ4>3B#}OZ`^jce5fYz}~+2*3N z&t5Xlm+}%bj8TsR$8-1-E7GII#sdXhtGneF?dHRbewq>#X4Mu=%`?Xk+)+zXi79ss zQ7-DTEqv5UMW5P`M1~BYHX4Cv%io<9YgBx1*D;cc)o$_g1+z3Z3Z^a{ns)^up8%x> zf)&((fC{XRX|@*T{exm}yQJ=-FkHH(rgZ5+vfmu*l+}v9$Dq=n6iHmeVnfTLXI$oCeSF@@;oRa5N<5h!nRK(r2wT6Lhy?H|+}1%cux?%A?m3WX}y7dSFgc zL*pQK+w%xf8FKtiItYw~WuHf{Mp7hzpMNBHKlizY386ArO13jKtI;T(odg?50PC@k`gR-TNKkX3q*(s-vlWTDyquKJ|6ni(9jKaFJ4a zSWbB)=t`X-RVb}R17T6(%G&&FMR(YQdE_Hk_2uIJ^0=4fp0{3xq)KaLx>A66tx;s# zuZuBzkZLuTQ>t3gw<$u0ehIls5wBZy4wg?ySa{K~7_-Exv;7}Gd~UqZ%Tt9mMr~IN z>^Ja<8J9seL{vRv*_BVC8tOfC{rSosuQLf@8J-M)<9L1s1UZ1l*Uh{e#}1p9?#6~$ zltSFK)%K>c5A0>>xZ$AqWS8Zz$g*oM2B8?hTF|8G+^FSZTI!YqWlv6Oj@5Me7h)PJ zQ$NqZ@^H)mHUev#kok%jzMw}ouoz{@o>_mK>UYNd0_a9M3(MOi|zanMV6B{Eh6#5pm*xHbgf|f>gR&*L8qBoX~!Iw)V;VM1pGU>$u05M zw8#N~Ze0N2lHUM34|kv6r@!2!cj;Zr{; zg{(5T((=7Bh)NLmp5*k0MBp{2Jb;I!+c$+lx0reF+(pProk+Js75{dVzu0;@VcC=u zgAMeym?>>dOj}W5U!|}6_HFeu!yYY?PU9l~p_-+U?foh6szYm=rBGRHP3}1CSstJH z!6>4v82oi{0Qm}83^se2ez#)6rKQt_+#+2^TK|pr>mIEj-mFvN!GS@3z+un+#=H+r z6KVAnPQ`^gCSw-&(5W-9%F>17R@VcK#K{qzaQu$Ql>BR=hQ;muNy!nE5sCIU@2OhO zqC25@wTTfOe6lqpD*atBdlo3*N*ym_RufOZJar#0k=TXZ;rHrntQGEodL}MG^zCa> z@4K;5GpG~Lbnkjg0xgs&;#+7Q@80d36MDpNv=ql_c3%1xa>Xi4!=+mH$}!=(hT<$G zOo2_)qx^{a&ulety-zZlOc%<(Uf7ezk%Af58BN;>;0Ps6DZ_-sE-SJ*q3>SSFGC8l zvwrAyuYS9(#Tc;SO70kMC`-cu!t_;?44+eOb~kSM%*xhte(1UL#D&x>zpuZHEt(hE zMcGfIMwTp{TI4M?@F@LY!V@AuYFk)2l>|Jk3k!Z)YV!CFWhTC}umX(&2jeN;HbuTF zI!cINJJE=bZb?l?@;!BgjpxIF_$an3f#n){gdD=H_}GrrE+g;SYqrCbN5JMp2PZ<2WBp;@mJHpv7O^pp z|4Pqns$3dTarHCnlV7+`O=r|0ux7(*lU44qsza~)7pHoWA!nBcQroE3Af~&@TguvQ z9c|8xP+)cIm>}J3K|KntvVJ}4nj&HNow1YAyN##z%f(?2wmzn!f3dDc)v=?!xBotN zO0GyGsb#Yd_0(AJiL3l!6+MyM)84iX;cL{qo}x{ElF*#cqI%k)~61-^;_Q(K@{8{4Rx zunE1XL?pM{lupua3Wmmoo_soGG9;70&>eejs0}(mMN^cY7K*Z54Z^9?5v}$mZ{ERNNl|#j zPgds6oZaV}OO~5uHlv@=Q_@#^Ba({Fc9QTp8^n|2CMj_cYsj6JeXbYx5-8h-k=W(4 ztQawB6I~DPKC>Ls80^i}Y$6V`zEHZ}PRBSq7`WnL4uh*>5af_hG7%ypy5XhD2wV$6 zG`HcG$r6ik+M!e!zAbL^^rsj;v@me!?Y4CJ9eb#(5Gg~tQxGMpRhKGSf(F#@2uTlsX2foj#78U+;-av^N-w|a z7A}#a#1v_j<9XsB;@1bjGKz3zI>QLhdJuXFs!K84XZ7`=w8<<-xY&szlI&f4^q1Uh z&La1>yAlCAs5Hx97lrLgv`@-h0s`Rg)gdSi>$TyrxlYTGqREsBQ|V?@Xl zBlHX>FZseJbr{-+QGGk_$)|d zG?#CAf0#pTdrUc5mF>PBwlZFo$Tc>$!ujlAI{QiA2SNPIc2n*>Sh(~zJ)px@6J`3I zI~Z2rJ!Fcn9VhSf?-05)mpH=MgzKN);ZFJLMy!n8wa=QF(L0!d$v5uqoNGe(<=O%t z=!Juov0LRueMgi zoIkRCMZV(vUPp!iYuK^#v<-_)ZG**o`#stL-L#^tF__0}hFct?fX>jvuSdN69(%HUxuym0u)yl;yZ%>R*7& zCgNNJHf_ejA68GU%PT1HepNdctIKYWP!Ie$SHUd&9|h^P5iieuCy; zFP7*vR*|>O05_@L(9k1F1#umbISQ%YMqe=FLYv7!+5#foyK zR}jre?JzS+UaI#{TzpVNP28nJZ=^)(k@bzkL@n&5lc@QbiLAG9UHIe%?k8dQj}KWr*Rfs_&qr5u#A%vVYc)r*RgF~MTNyVdG^I@8o&Ki1PQLC zdMI{fLbB$dGKU))bK}`Cwupx`isQ{v20X;^ESO*Zrc**PLz_t2U_r9ACSxXl3_I&2o~n+ zC+QA1M!$9EcajpUwcI{>_m;z#{7Z!kE89{GD5WLSSevdn5mwX}X%kB)@lv OFDK_^CUVpM_WlQNTI$OH literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test1.xls b/pandas/tests/io/data/excel/test1.xls new file mode 100644 index 0000000000000000000000000000000000000000..faf5dc84700c9342356499cb8de5751577fcecca GIT binary patch literal 28672 zcmeHw2UrwIw|32th9n?}Vq%D*vBwV7- zBcuwhdsR=tnUIGN#*hS}Mu@jCEH0k^Z#ujw8NxUyLopa$Zw`-efTluJNiJLshzTWM zOX_S$oeQYc$EkB~(vplIqS?tw$${M<7lGU#Ii6IyWz^Xm&K%N$Iv=L;+@k2tq$BwQ z(u-zipurRJMS~MWgd~wfkT4Plflk4J6vt6Kg($`cugmcPEyR)-G9I)NL-@oG&QTClkK!ATki;t16V1Tkl)VxftXdXO%}g^*Abr3+sonK(Meh9ZpV z+*3?2w>Jm5Lv1`mfr|s_41bW7nXe0Hm}I z9Po|}2dz&NAW(WZAbz5{h|<3-y+8q7QTm+<(r;Hl-=KiLO95SxJgXF>&rv{Egs(`R zeF|{?x8zx^0H2npjZA%=q|(dM4=SJ^RzTmcfKHcJ7N3@Xpm&i=HwH`!hvgq&h)F); z%z;JMm87q47dxnOX-Vj1WC+}v!ibQfYfIC0By_l;`IcehO9eI3>Fen%z8a8To;)xz zrg7@^N!y#DgkDC*!EIX!zMh0$FF$SnFm|SKat#cOwT+=(=yqx#p#wf$FLb+BgPvZV z{xv1(S@J_~Dk*OmA${ug(@H{Tr6SP8l=>;J9W;<8 zfGDpW^lD82QC>UPRW<=cdF?>MZ3>9;+G*D`5aqQ4DV+7 z<+TGIs40>tuN{8VK$O>xS<^t2*A7e}7{!5zJkLgZQ^gI?qG=$?Ysa!_Aj)gUs%ap~ zYsb22Aj)f}Q`10{*N#m+2+liliMmLp-@yl>3xA3lYG(zj^Xd#KG^oxC)Bck}AT2Ev_NaKsEm?kkzKl=;fj~wm zCQTv~(r9QxBtkK1DLi|YrW_$Tw_a0NcqyS|yL6bPidzqA3l@%~GvK%YADpjY)(?Ad z0aJ5E6GU}LaYel&QMrS3Z3>7;pd7@lDIjcD%~m3=l+#K?sbz{IYYHBatie77f+_Nq zC@NheV+#L!_Q|f4ZuP2=+h`<>L6pj=7fRMVx=VrN1}iWR(aT&0$TXHw!eR=HF(sxX z5|!4gN770Q3kyX=3?dPXqKVZbX%I}Bq(RivfQxL^^aLuiY6SJX>&LyQ9CIQ$7=0UsBsvC(m0SzJR^7+G2MUZjTi$P47N!s!aKF<`TC zk!549gbnSqC^l-s$ghuY$Y;}%&Bj%hjkOXsv|FRts0$-szkV&BjUk(jn=G3yO4!hj zk7A=Cj1;{umCweA&8Dj?8wVw9&^3zdh9``?`?5|xn^tT#?y_uLl(50~m|`}X!pMg; zx8$=iMmBEBEzK0zxHZAjO8pL;emS&-Z4b##}{rk}i*p4!jYAsa<`)>BZ_dJBTzC_LYS6O*~73Vn+#w zxUbY977d_aQEX5%7AVt-2zj02EFaXe0Tk?q4Qj;#W!elOI4h!hb+)8d4WM8xY|zdu zP^N`Ik&B6ZQ0oRzunjh-Jqwg+4}^?fTP`28Qv)bi0vpts1U3lqNV~x_W_kmg1;~jfDlY;m#PkFZCGj5 zVLMhsT0R*KJwZr)9{W^P$YagQqblM-p2@IHBas^WhLh>_IUG_oAcq4hhlWT;CWkby zR>ToT;H2NQgxIjSu;lo}>HHDGX~~&9Uuh_ASCKyHN2bBB0UWCGkL0yPF0O%#_= z6Gr+loA-^62XpT!4aI3IQYF3s63;StT!@fJ1R_f$Y5}teScn=R42y_~i{itKgH>D= zeW;%SAO*G{lp+cwqh}fjrvc5b?}!OB92&F+NJhKUmE0D@l?e_Wlnrhm2b}hz3cy8z zO}|$`K@i-H2@al=4c<}?IPFapfYSpEE(P9|2@W2Wb;=E@8JrHIw)I*bO-rKfN*khVi-6! zR+qqO4_!h40AooL!4A=-(UTm?lfx29-WR!d?y`~_)a>ulB-R6 z5?Ij=oSaNm@KhGlRsbDH6QD3qRg0qsVMfy+>@yS@> ztVIqL6AA(qPS=nsWkXU68K`%OtD#FPiJ?Uqz}7W*Egl?}7>8bs_TvI+LAlz{B?JS9 zM6g?lW0U8 zKp3On0>^1^U@7q!M{QA^2emmJS399a438EHlbw+=N5G)CAf>V|gW`&m^8O5pTMn-c z$zjwG2Oq+L*Uhjccr)V}(w9P`=fK}7*oJ!w68RtFlCR_-?tOrbwp2bZAbUgbfzTJW z>EeEX@c3Bt0qWqPB?p&&GF8ZapiVzXijJQg8=>G~OjKNaqD)R;*bhQ~Rf?fAeQ|;1 z#u#THhe4Qz5f``_4`I3p#O4uSJA$T9&cbyOTwlR;A6&zE#tY~I zxS+X;r>VP&LDOBzB|`?Qs2V|`k!(hu-_t9?Dr^tQ4dFW4Cq6D2)+|FuOivIdg*Z%` z6uWE1rGPU|`u*w#d`){|KPb1*lV|mO&V}>~n`RdOvBuK4nuzbDMACI0qZsV~a zb>O0$VfizjCPeua_D;`8G|kv@?VoPkOI@c}r+!Oqae4C4J8fPrTG8!MiBH87l|y?w zjOzX7$>-IU4>JxN6}SiN8sx)E+h3en_G;Vb{w2oV4i}Gfs&&|6yJ+L(A5M?>Y_hj{ ztg!3VuG{YM-)k>zd1kN9qR_vh_$KyGi+ZkK^y>DFA7YQ+O)pHJ={~kJ;FQQ{T&ns1p)@Q|_CwHhS+~E_ND2z>Vbini6<$p~$ za4EWKO7EiCb4uoI*=ofbJuPAW<{7sSh8`bsb#t%F&h7FC9CG@#rQX8Iz~jUAct>0< z(Z4pR-)*<`21 zC}AR6h)$`$))~IOXI09^sTq|X;m2(HjW4{k=h!FWliSwoLf*9vw=14JO7r;hzESzw z!MZ=#)x=9?cI^i6t+T&6tFN{E;`?gni=e5$jwp;$-ye}}a_UORKPQ5Vey%B>5jfH# zUa(~Q)cHsFcF+GVZ`bPnnUbu_0}8AEUfyZ*uuFAw`V2A#A`+u@#L@k^$b9L1D zKRf-s=40>0bLTqtnAK18`N?RHOVxbCO^M5QnR#5By=IKHQ_tPcyMKOYxxtNhI`8V` z+KJ@XW50(b-E8BYn9?@&x8b8=E}pwlxvMr!XJYC#-vw{Hl7{i`}V4$;@0p1i&DQ@a&k#@qJ~G%@9Payn;PzI*)hqG!=sL+|)z zHKC=I>J-!BS@D#)>-=wlsC+3{?>V@mf&RsAZ9M7egUi^c5FVy12>Ds-MPIqcwxKw*uRBhSA9_fD%YI&iw zZqrR}N7w!{miE+52)ryDHLY-v@tcy*7ti+gt}g95)zoTL+Jc={TY?`Bt6aXf-v_gP zi~4Hkj(fhn=4R*f>E$T_3xyToE>HaS9{Jd((^x%&tplwepVivpU7d0ELH5BV{NHaC zUVdj-aADiTa+R5p7Gb0LFZhB}WiF389LThpvOPC%b&k5g+a+;D`-0D9XLaYP20A(k z?)O1I-P;Z0|Nt zzu!F{>yWqMkv00YEoOO^jGZblyO~(G!0=S~fIZgv=ALC9wr}1(=}@}s?+yje+lK|O zdj9_A#XTVbxmT}_w%>K7&*{~hlHAVRobh#gn%8oB`?amN)b!67*JKD6MkcR$;{+cmVM#mT@ADQ}O)d@vi)J*rEJ=hm&(Z~Of6 zN$=sgm@Ex$a+^~pf?uBD{cv#b3GJ!J_Z)drwB(iZ&Fnz?wykew^V1ydRTkaNnqDx; zdf|_TMjThKE8~8@X1{RM8^^vQLdwV6ZoPBo-hp}T@|+9*S-0BKG)9x(Zoi({6C@OSN_d633K2?3!xShlK(Mr#GMCRSx$#=`gOzrwzloGr@-Z8Y! z z=61adPU%(I^(e3I7d_@i#p~1g#=V7Fy-nu^d2O!z*>uUyf7(7il%(Qc>gQ!xbNrFV z3$wYUM)p0ww+~oaN}S$GTXYQRUiMmw>uv`Q8IaSiuYKn;s-~* zwI5l>RXddzKc{q`Ge3IQj7+yl#m3XE_wVKe-dnsf zU39$Vl(bp)ud=)BP8g;2!YjW_>sakD51p6gVWW2I{Cu$WkWmpv!Dl`mouwMwCV5@R zt(WiCE%`DnX>d~b#fL7}TYnJxj|sgKJn@U;o8H`k&!%s7pHOl8ME~s}HR*#+n2$yBdFR@a4Rqm+F_}Cbmfq zjhLjftCzp&8vEkoITgEcz$rW8(7l?m;SfA&2jh`WtPux2%%Zw+#<)W~?DE$-?PN5! zZq8I*eEjvJ`YWvOF1)h4Ol9uluDb35%b~Ac>+Z4RIV=b}xhCdV_PP?oaF=_|(L37m zdOZL2r<_xhet(tjIm|dS{o`ArFZfgRwE#M$mw<1lG-&(kL;BA%oS_F{2v% zg+S@&<4B#Xg$dGH#$*T{kflId%AjF7;6vyip6W~e@Kl?NGkN;JEX!eU&2!k9q6=#y zI3wi3N`W4AZvbZktI$01b^E(({^N%TLMeI|if&7t=TMYXY8pmkTEPkqfqCmM@Z=Os z(Fkb;A&p8smpWV5r>+*@ugB=4T3}Ou|5nhMalaAXrg1mStr5@^MCv|f?X+<-b;W5AP0i{S84*}fM5jT-Dx0|3 z1JmCZAu4LYlZ+FiqCM%6H!V`G`J8=Ap>iRG9tp1)dnc%UK81llHr6Av^nNn;K6F-0 zLDW_Yvz}gYyCgtVl-ZM{icxW`Rgd^S$h%xreV;;Ap-^2CRZbrfJ`+T%FY;M;5;Qy3FoAB?s(!0?Jx)-);23hZKey zrGWtVlTrOkK8w>J>V~w(lU~d`N}-}PcoIIJt}S&6H7hMF{`$EW6oxv5+CR=BR)6VV zVhYMycQh&_xSZw#OFuwa_|Ef8C(b(x746BBc#2VR&0df6Ui9EYMF+YjG$_>Y#Lz8I zeyXKVH7HrnRejK}Hv1r^Agby5+^)gP-W63qSKtEGwG>tlgBX8(V51lIiO3I+2ok)E zkR{-?mPobOVaqxInhF{xE_dNlDp6enVpMC@Kp#UdaYgZ3lPE?uFf)8huF{u~KX?WZ zFtaC~@TE!993duL?Fql@0gz@#NIODGdEjvV-*<%QsIk_W4)b4ij+l>jhv*>a7&ckd z10BPLg$(q>x&=h0Q>ESCI4SqXbgH!bV>(q8 z8K+9SKc-Wq-CwGd`}1WuRoeYAovJE@%5U@rO@nhUo+hRz2a0{sjd3K|?8Q-c`FUIFeY z8m`ctvHD$#D5u}$!gn61eiz#>tp+tUCH-tu<`yXBOJ70?2K-;DdmKzi>RfXD*Xtg2 zfV%@4>RyHLX@g+QCD=5qZ6=MbT@LZ0Y(-)fUet>lj3J;GHJhPhXg{#eRop?L{a}ZV z&k)%Q;*uDq#5oTA3{&FU+zLp!3$8Zs#-s~vLukIl6HnqpcI+m$A@t5@LpWOFa2l~8 z3EF@7^*c;gY2`~76gBl=-iG&|1EckKuHX^dPWvmJfOY@^_D=ZR69V?t(2A(0J`S$< zGB^POzFEzJfR&vG0pEOL#bGr+hD`W7Q>@-#VG`dzd~i%$oRHd?hIDir)W@daigA27 z<{uND7@rg$nauYO3y+CQj!%l__nQ_jjD=^O5bhhNWLI=8<=}m4*|w-3)5vIcKR!`Vg7IT zc@NJ2A49-}Je>azfq)A(qafg?!6G2wJ^-AP+O0?zwqLcm?rb0Oe*z)}c05Q-r% z*8^Z#fI8@TyFP`51Zr=AoNGs5f_ZQ~g5C_@aO=`=>)LSZMsy$#^2Buo#0`#5oS1|g z>656nFo_88r1@ypK(hv#HPEbqW(_oJpjiXW8fex)vj&IAI3R1 zPJVHh4eqGGIW@kA$GJXkslmB2&e?JPj$1Zx-j8#9+?0Xu{BfQy*-(#jeOwQ~FMXRp zz#oa@XR`4m_x9DULFEMMlc7k97p)-Uo5NsjXLBQt@aCL;> z1i={seg_42IJrT<=ieaUwfSgV1H<5B3;6vAK76SMKaGiB=lSm(#zc+U;*6Zoj`%!0 z(`!3$=}$#iC#)L@DHymX!q-s7Q*um{kehsk|NpC=Kg!V5j3xu=lxk!+_2~)x#tD`k zKNS;6$%K@K?^P+nwE_OvhOqrKmA@5&VdH^Ajr0X>_(lq!Ezr5&`KBNdb`#i|!UdxaJt*sLUPeiSh4{qh~NM3`Y-lCePWAp8#gF)OTJzz zk=?oqH<&=L3mDpt&IDYbB62I0k7vPq({SBhfQjGEY}O=MTj}%h;d1Zm?SeAJ&*;FL zmb_!j*OUCUl#^Z4D#w$L_re*$k4J|EaVXM(Rp~>kJ=^e)Kw@?MeqF-6CNdN)V50P< zsP0scL6Ld>AUCODF}@HwbY-72#EC!fp_Q#^#n3jM(~*@aPnng+qaH7-TTUY4gWQ`KFJIz9Zg_h8H-QxL z%@*q-k;#}c(uViGVjSJ`f{;vGJQN{uJbC8yAQ)#9>Tr(*SB!i$hV=3J?(D!=O_!cP zohTRSokB78H0gTW-RXXPPmB-!09V%W+m(6OD`J3q&LBSfI+<_mGZrwH_<7OcdfYb+ z(oLx4YK4zbB*GZ4%FPw9aZp?7x(VDqW_~k+LAi(FFf=EYM}uJH*%>N8^(Sdd1%Bu_ zMBGtCC>IAIZ6hbRtuq(r_wWBn`oCDmKfHQm+~9XfcYc@j;_)YYd90k;I!OL30UQ22 zqy6RDhj|RJ!vk~XBEDzTr(3&&S@p96Z;KiQrP<2@@2Lfv7Ix^4Yi$qJmpy`YNWXr= z62&g)6l~z*_uOYWR{ctN!@e4vl$?D<^qc6{Ilt+n8U5FjI z@;oC-evX{t^0w&EN7vl5@{R|@6gPtPP>enb-MfvV_YTixHW%JiF0vc;jwm#!Ta~Ir zBsD04rpWKNIx%NvWu-wZfhK4G03`qm=x)pPhg94goNP=T9BepEt?d6L8z5pOMEv%@ z`>IdqL(qmm=1~45=$h980|$DJ@GKHhwM75rwzg3Y?kaWCs!?ookbFfSC)cy+i5qL2YO{Oo z8R2xm+S*g<<%IDGg>aYXUZop$!d3=WY;%1lEbNO`R1PnWo&YBm<2P`C7Dgrh)S+`i zXQicmRXsa~%jMK;UY&tWHqQB!L&9oSY&9s}5nlvv$#6YH@xAhqi2*b^Y`kOsaQd1O ztondRk&>vh+yrH(^sU7(W6ENdNgQK}l03LdYE5zrbytgP(&&E8^G(+Gi(*OMqVe4g zrSIZi;q}bNbKm_k#!9ue6|z0q72|^lw7-U7Y0hcwPYAwvB4~q;_>Z91kHoUJFu!BW zX9`1Ag!!UN;i1yU@T10m1eL&^(FR>2poD_C`^^{C05@SS#Z^!q?pXiBM<+R$V}Uq-4I-%o9ReG^#xt@`r^#&`JuuNdcAumvv<4@)&b zD$Jx7K>vn@BNFsoE5EuWe&!%W9^o)6bN~Q+;TGpRaJY*z*ZDu5@4~Y-H&5wj@r?_4 z&=LRyeUk!)@rI}Yr0@D;L5zV5+Ona_vM+`Dje#uh0r?s+XcpYX+1;p*!8ya-viG1o zHBg?S?{3?H@+mGEo0_Sbx%c*)_2X6zqm;nuBqdO#fk6!YOcmHEEs*DxyUp$p#&uCe z)kKJz>y?yvSnj$gWFG<`3K0U@qxOSE`C;oplg-$C=q9L_Q!uLosMB$|AAa5PbODV~1f2qidMJHCZ(>?%Ie{q%7C zxQ3?fJ~xN^x(=-JjM`{}ahaYbW?iN6iW{?{1?wCB6q9==Avct#qac%9Q_!VJ^!T?2 z;Da`*ur6OrlH*yLtAcfB9X<@l55rUNHxzyFNCGC`4OzOmrS?cM9CIn3xbhaQu@{_X zW1#{?516wj{5l$i1r`S67>>0Xy$z2w7MbHtJtjMEQgbb%$QaLkXlx;^_dL*0JeE*E zL))Hs=WlxUItt&<^WG-vDMrNEBe`GdZ}bTi7iiQK?KyE3x!_D)l*CRtywp*{rUJh!3dx2-$yMNEV2VVr6 zTM)OX5Td02g?)d7{|gSD2Y=hamK*Kd1Q_#m^CH17r&l*6q|-Rv2|N?`ft}`480o1o zXjbp$W?VWxh_{{L^4>mpk;?U&u}D|&-gPipp;))+`_r#qx6ukTiB!_D^#xxv!|R(a z&$;JK-GJjhNhy%8tQbpsL>Qqjq`t~mSu;=~q;3c0O6m3)Zgt;0mg{fA1v@D6df<3` zX67EJukG_&2j3pcb>7I39r3{16em60V8mK7BO3IoX5qTc;e7%cG{Euy;4dKl@Ezj) ze$^kLmb;%CLVvV>uRrKN=&v&92mOisq(A>f5C0S#wtlRowszQpK_tj75>#v(2~zY1 z;c5t_On^tgk9c;$&b-`?+`9fX+%-_%39JAx#1Vr9h++%c=m!xK>RpEDQ83UBg9sXo zPho^HPbWQ$MPrCuuP_myqn(S>@&|F^eHUlNnlV)j9#hO<^GE1yua#ytK>RN87cLUw z!?{OV0E-k#E(2@GPzS9e<$+z4mq2Uok*a0)19saOL9f!X|oCZw}F4FM2fNBD6s<2YUYO$ zeyX$n)R}(HTe`|cP!RAd_lZ`IVm6NgQ14JG2rlYJP;0KDBFs=oicJwDj~W6`o$!w| z4FzRKsb@bTu^mnYt%5vY=rSv>XgQriCbv2v{e8_Sd_dDCTDI{piIjNYVwfUEHb~lr zE>bNZ2CXi;IY>Q^`P_3B*Q%N!(cKf`TAu zluZlVMXfBq_gPU`Sth-s#}&|zVf#Xo4py9l*GI^qVno+UtpjepH+&OSQy5yCuwx&%Oynw9XD8kj zH9s0|@$TxJ-K|o-L!N4_*B;cQd=*=zGM+XWN1QLwjkF4$%hk#njiQ)co-ntito~8~ zCVWl3K`{8f{keCCUyc^`m|u2d@JcY4aT*U$h^M5ZdXVi<)=5@#?KJEz+cKVI6dN;8 z4B^;c!&VD9L1O?W0MH5fUavaOrd;m8?ciMJ-_Y;%s`Xm~5trr2nz$DI?tJB3d#X

    ghjW4ZOG zpS0Z)HLEN!s?C!rTHY9ZZP01*+#2;#(GqYFk9gu1`Vw+=^~>mG(syk=2?O#~D_^3S z^VItegQa^wYDx1^O`WYxK5Ow!Gt!tmtC+CPUr(P zZo8L!i9&8UTcBV`@9{88R~SQMayEEGEM>5V#Ebaxa8s?CSpIgJd=5rILUf|Ft-_u4 z^_pS=Q;x3*UY235-YJ%yubIE3?p+TL4(1Z}l{q_F(iiAkfe`09MVHVD4Ev?tX5tnp zT@^Z7V94UbYRw)R*tY>B*1a%so&fVZ`B3^7Xy{t?c8pDy><58t{D!us)Q?6E0_tO^AU5C&w8W7t{re6V~6 zTWv-zA;WQjI#vcP;JNfAV(!~dv}6;7Pjs?i;#SeQsF5J64xE&*OxlhvVh!b_st@nb zOB;b6$6xo;lf@f6j&{>^%IE4&boifZ!+WT6%}shwcV^$6oeq%CZHfCHcO{?67tVF! z5T9*k`S~37m2YX_vvVr$96Y6&JKbxWEa!C9pdt?BAZcEZwq=8 zeaI(>c*_XS2kM$Q7S|^k*(qrl;*va0u~g+pBki~$XewOX@HxlO5XTxELh7iciNii90n^|lT?Bn^udJNsf zX(yZx?-RF}$(J!brh|^miD zMD&d+yFS~mtafQNg)G^8V1{k|9A{9Hl>st7;Tx>ua4tsP(vHN3QYSMYHh>#L!w%3DtRX&aJR zRrXjclYKGVOw|e_)C_6q5^x5e0vuWAm^bXlELW%S$T%MReki6UgK->UpDZ5C-}mgo zM3~} zk#|{?um%79RpHSUnLD?~#y46YlU;{nU#AJymdKm*q=|U?m2&Z!GpbU*q6G0){Re?h zw88x(93tPSlq26m4JFsH_MhSmq2I1kWKKr z^WDTqTcOH*sbv#G@U3QXDn|R8?PBxlv-lnFsKQ~FI%w|I%tpyj73Yv$eGcDy6>;Y5 zj)#J2nO@8;9W=C9l@+(NFfs^%?1tt@Ei4v}f^gSo#^h(qaFM@$}&X+QL*)F{RmQ*5?g9_w;=v4H=u~U!GEF9ipJOWNRy0sA)vH zy{+h-oGdaB;m0`v)JBk8qF)o>w|>5!G>*0G!tMUJ7vE`G4M%EM$><43X?g#EzMbo5 zlDICws~OTn$vUFfu0nyKcS^kC`0Sd=JeaCWfhu$1@lj{>M6N-;9P7z!rH;DTpClfS zVAt)d3xL)}O>|2^p0(tvy5=N0ZF{eL(R9ErnFdp9JDCy3?#?%z&3YGj-jeUam2Ml& z=-3={ZC*!FSoX-{w;rEvkbPpX6?LN}dz`KI_@SZIn-u}fJPC2zS~)K7EybW9`vw}m z&{FlhsjcKdS(zb++M5H}+G`jC_BLP_BXc`YiKr4yDo1mD+IZ>NZt_}MtWp!FXg|M#8z~ET15o?ZX+=FS0 zq_~P_x@74?ehZQZjaxK!M|REb@HAFI77f9GmsBqezL^!SL=^Sc?#7k$JMW!6aeZe< zXC}miS0jMaCpp^EHMpH)16&uJc$(Ge@LLZQiM7VGBktrOGMc|Bb)3Jutx6qsD~uqc zq5XEd)bN9ciF!Cz>qIAHJ0iK+*_m85+xJWCYBf)A2JEZA9RYPOCsQwP76fzDzHXGj zZh49pK`q0>{#@_1b8DP1bGfjJGqem8HFEmdIf9L!=B2faE`cYog)eqPia0yEZg~NN zz?4O2EDNTM_GXMt;SQ8suaKS)k+!qmQgX5u&+j4YE#8bBv zzil;9j0p@%zwiutFdxf8jyotVM0T5KiQ+vX|NS*vn=D3m{Ez0I?{DW`=KuHHH_kfY zFCpfh7_mk8o4G%KcW>o7dlU%})fO$xF(yxvPcw${LFgvIyTH%X3`Fv8c8Ew$RIT+` zh?lqmyWGU+63iL4*88WbeMxN}pBiF=+qN@?B zp00I&wep>E!FaJ}8|km3F+eH>l$imdukc3TD*K;lMBLN!+Mn?8V^A0NOrL2slx97G zZqbWO6^D2fn9eV|Oa_MG)jLL42+tK7W>m|Ez%`cD^P~ z9t!s6aQE{{=y04d00kYAgaVE64}I$=ixGOHN+^rrQiPMRin$OCyN#uJL>PvX>d!|& z&VyijRhpOR;sGAn?yUJ!Oe-AvU!4~z`Vx#iohv4 zr92X|GiymZCbp?b`V}*zgzjp53cTeuqJJCOXsO?9LeTFBOuA%2VISaojbMn7^^1Rr z*u6O*T33t|u$NMi3GK|CW2N=PBS%B_wd7QRQ)blNF54EbyN)!GRFU@#R6Z3&y#Gvm z(&*{AQT$cCwr;_HSl6-l8n^x@7mnSTIy>n|xi;4TAzs%d*M?ZgX4+om$QZHXQbV^- zq~BW9q}ECcM3g-IfK<6x_?2IG5(_S>(+5mm@eE7z8->BLZ=(;6s*mlueO0NXPUINc z@ymI6@`f_vjHN|^!<>glUGhx`!u_g%a?ZrqB*Yz4M1&ytk%}gcj{jrgclrKtWyH7G zPk%pqfqH-!y7@4Jk0lhQ4b8voIZ**nsHDU@y;(;qKUe)#aY7FFUab zBd0k#w=gdmfI?CRonj>_DqT#B!U|`{(^crCcyjo}Fj(3NRcF?sX-1ej$cTiRBRNpW za+$1k)N@O7pn_&X73v)$;03-7cnO>3d)ldS43r+3NADB_34)1w(+_M}WV^KF&TRB# zr+44)Zcgx2XlG52iP(e{j=alMA~chkOb$XXU)0o8e2#vcMn!$EY>`h(-$t}#Qdg&w zp2dT;NRIn-n7@;8q&QCtGs(2~&FRNl4(WqF$ev)xilP$}f!V&D8Bcqo%#E*>id?#= z1Z+LHW_Pg)8{STE>^t3=pMO;;0G7~wAv+`V*i!cu~}G1x;c!){O@>vN_8 zrqxcP%W)$g9+cXBf;R6Ay)H`0rZqOid2b+u+}ql1r-dM6)7e0CdWwPND4+RMT{YS} zZP6?Ko?YveUX4Nh96zPO56MHjb+>%eccoHC66(G2PJbKCMOY{o*$~rp9r5|cXnwxn z{}j!sL}CPJ*jl>*fv;eOPy!mvg(r%C(>EhjLTUWov4H-&r<|vhKRtzj{*R|*nMnNv{U!eFCoTL0w?ng#IuCiYsTqpbk;jfxIvMF-4_`!c8m;wym`F~+UWkDBGh==j)m#|ZU-wP-{RQvt4`;(T zyXH81jahS5%~dkuz#zx~U;q#R008&^npd4jo`3)Vh+qH!NB|H(>Vno*4u)0^+6t~V zhW45?E|wN}IUqn}SpY!q&;R%Of4l;t@l#UWbjX3{lI}tI4Mvn%^!;>=GOY@Tmw;kl z`ca$f56K~{&s70)!T5nfRxOT~f4pf+FL$hH&vK;r4GgEI*VUkkcZ!x!Fb*xP9FVu^ zLxSnXPtNm=CwiA|w6q>XCc@EWoB6R*3fkPI=Tc>(CaZ!68bKYKBbXeJu0pb-AZ=zS z9Vd#L10_cX3j5tON67@Izzyl}cPJC#Fjarot*g8oPO*_vmIEQ!wa@iz%rg%Pr`W-u zq6|{@@48nCGTL+Yb6kV2>sN+~&A!-Do%QcROYKnS#a8DWa|&aMXD+CW&SInj6i|Xb zJnamw0!u|oSb{&Z2~8N>DR}`OIX{)QlvO~CPpC;v&CLFRr+{4IIDbOqi90yOjud&! zxYhVT8p+2B=i-jz#gsFe>F2Fm1Ss9Ql5oJYFXtjt(IwB4fwft3O9R&|ENG1_7wF2? z3Y`{hRxB?fuqg=omA;M|ga@1aEz*jU39^LG@^X)U>I?2W}Z?~CB!w``<3G1E90+SwX9l=K1-OSgIvoaS!kZc{|WT*;i;q9{rlzh#RLZ4e62TnbgePtYi1KqBR11)_4N_^9lrVVyAN96-#lO_%A2s8 zR_bNiaS*s_8(4Jy5K8MndU2%bg`iQSDrXq+nMWGTbuvf-TufK!1pcg{n`KD zS8IHi*)PucwfRkX*v2}op*y;f`)M_t0ZQcbW`|%?IjXOFl2khR#A*uAfPKDy<0V>J zS#w?qmcCZtEWr!WgEz5Po~@U=uyyxv0pl;V%*Hg9<&OesIeE2u3XIsZ36WTffuKl4 zM&CR8BsL6NpGvTh4Vy6ipkxo>LCl%))ggc(VkhF-iy05oG*mFV6E_qkSbqa_|E*3_ zb)Nl9)6;6|3Ksfd_c;WH09^Kz(f~9Gc__O&6o`Isg-+VOJ9-Ky%=tEZ!2uK_zffm7 zfHO|2iwL@hNS-cDb)eS+De<+Mj;UAvHp+h4-bKvC5m$gcJ&sz1mMiMUTJZaU{-EbO z1O85>?@&@(e!u_#;qO(`hxaGmsq|M8l`3jlr?Vk@Xq&zPZ)Ys-nF9nU^w60Q)+xpp z9PSJQN>>6U*GU@Xe0ytK80CwVsui1wMEBB)oM_{@vwPHCV43HR3k{>G8-lE-o=?cx zUb8gwYNJRWtdB3y{De7GHS4J}{AzQ1r(LLye{SX9SObtHU!-ZiMha0>ZA7g|nq!Q8 zV&QL56j;>ysW>TIsn|%O0oINlPFCF2_#6T80Trfd=^}PF2TN`KkUQ7_c8*Vbu2mUH zO}!noJpan-AVaQ}n{V|Z@Hhxzwtks`z2nqBj}^ADjnJnq?!y0q6dl8v1F}9c(#(}S z01?vzXwpRbanQ>b0iRZ&k~6YvCEA^6iJR4#ttNJhQ#F85=^jM*!a5frdj4Aii0_?L zt9g6ysK3}Xyi%r-QM6B&aVdI)~ z+C0T7SZoXrP=UK$!T@@R1tHEu>pk%Af25+z$45Cz% zX!lUm)JSf*G~de0_*se~+9_;sEL~@8mZyWD5=cZ)jwGAMGr%@bwt*BC_uGv0$ z@;B5nb@9PRaPPd_?+*glrbsNePEPw?y1pWu?L)T(tpG_X1R3xI9S6U9gol1=bqGs9 zGFo3AV&t{SAyeb{gbwbCwaXr|eZCn06DK#xzW@%3K015brd-c6wN@)=!*FqQ_$=an z&tC9WlycjSDJ@ynvS(3}S*uqte};KZQ3V=6C2+B)n;Kbj+ISbC+$?43jPR&arTr9i zxq<-^2ajyR))J^<#w)Cavo-i3BOuDY&?e&Tb=mm>Xy2zngL=)72T@=GJ>nBeazxu) zM94)SvKr9b$~8$NU1A8kV5V|!qk{n~vt@nYrNwEx8EkfDnJ$<5mkPC~iPHB!o^r!U zz{Qs1qkRi7$wfd~sw@+7D8b3`=aee-4S=tgq`VZ|PVn*BdL!>$nmXx0%uhihareTADs|-1S%9o^7|*Ipv9^fv(z}(`<4#W8QzvSE^BcO!4>H$Qu*@0R7K= zwKp*|bg-xWbz}Oe{^{{6k!f`Bf#;Mr*i;*&d+^noAf<@0ig!73iZ?c=$@nUO5|*S( zigV8A$xJ7EHV9=P-TghRZ!Vs3TwJWoOFk@eb>ue#IK-1sii1uW)3_VBB45eq6B|G) z6rssI00nWYqi|!}rZMFKReb&D^J=T>!#*gWoeL|uctm&uhClBTEdDiQFB@&l*W?=g`vaMpRAR0xn|e6O zUD&SP+5Xn_*VlYiInk5Kl28*ed5#}DTzORDZg0==m8{nNy-T+Fv;EXux)>iI!OY;t z(fxPy!P#dW#q7O&6JOT^*U)1ublS0TAEMd@Vgt%9N;hT6v%aq^d&)me z;4LKdHON!Vxf0W8q3VF7BKL*+@jv*gXA-NOLf)>35Mw(R4n(ctXg<-2`$K26EXM1p z!<$q#Ywmr4T*=nEi#l}?iR^&8ZLLI#yo`El2__56#J@wz*j9E1E) z#W7o@t*>yEmh0#<31Uk-y-Sw&&PH$lNWd;M_;x990DykHpMCYO4a332(9)3h*C+i? z?wzQPg<-NHcc4A;!q_`MGw(zZuWwFR#H|q-WW;0D)t|^JF*3xqVj=+%v){;(<>e;{ zSaZiu^8v%QT}MF@)gIzbk&Z1AmhBRyS%|4Q!G?*vwiOoMWqP{(aJZaqOLh}UhVG6{ z)NYr#;!Y1=OeUI1^fG57V{(@kjSojKgtU5qrg|1%`RZwhU^xUaun7DhWRabS` zNQ8F5R0XZF-WZxBblg>@?5_^H%%<@m{M5eWYXg7N7`QopQR}FmE(A1T@=YkHU*tGW zu!`1&m3f@YZc#p096?>5)YhzN!Y+;qSjbZFstm-1Y2N$MA`)2P7@eBl_bs!>%PNM=jteE0`1&_~b0byu89YbD% zrcQ_orrfQIUhj}Z{WE3O94j7eCij=ui^Cs59o$~m2M;~k)lGF2;#T0q^73bMQQB`W zFKFx99j_Nhm#l}gq?g^j9nTLVNgZ#O95a~acCPJH~Z32*vl(e*vS|8 zAypUUTl(iDVY)tWPnUqa8NGA@y|H`<2TtG4Mh1bj)bWnG9V<_}SFlw2L0h>x%$J&R zFPn%WQ!0e3Kv(s{ozR^*Hj7h7E-q{`CD!78G=;Q;Ovad>(TJ4--0d@FDR0XyMN}H@ z!GhGUns2nO#ia5HR6REAui!B_YN&kzgR=@84&)C%eak8~U2`7d9M zty@;~d^4Zpb~hl@eg7SFp20=C{UatcvL|xvLiEho?O_Jr8SK2eL`ceMiGmj)htl}TO3k^QDAWdfZ9>=Ut4%E58Pn{*vO!`Nc;d2+iX2uj1d z%XrI5V{2Pq7gVwkgpIrh452kKO}5>11-FbyILS?Bpk3VWVKd|lpa*oZEgheaBOp|b zliX0x+%;_|J*{g7bhOvGPuoF0x|@B(yKT4~gO@e;<ni>JbH+&txnTK~=M4Zm4RtGmxI$@$emrgUr7E+c;Y*$8M~ zi-5BSNb6KMINi4u*=EuvGr)0z`&8y+s?~Tx>K$7WUQ&kTs7V+#LBqAq^iYW#1s6t= zEd9J7M@p2hlVwj*KU|_BW~7XpN}E%Qph06FT*x&d1e3%H4%E2M*UM&)Jr5L|Rg=KT zmS)g(C5$j-9nodA;RKMijQzEiQdFiKRHkH(lCsr=ZFiyQEKyKgc5gR7we1G+FY^Z) zVa4z3qZHPrw1$^Mji(5^#W5D80LtjmGU7!!$S*b9x|%tf7Po!Mqb3Y@sEMGPrt?MH zqT(D&d{XUiP6?W2kZ#gz!n9*NiuizF{)vFJ(T9XYezKR->Y>gIqJe^t$@~H70KI3y z1+0m1Rvj%zzR~I9=S-`6XWhh)Xw^YN@f;d)Qo~^MG09)zd#Saly%J!Vrm%7>DV4q9 zdDM1N`6zMIY~dX^2VEGt2u%am&}B2FaDBGq%WYnZ`!=vkiI%Xs+i)YI+JHIsSrPJ7 zRA?!|BSXBy{Opd#=zY>EQDSxA)i-B+3q}Nz+=Z5XUO!tfn`Y4WO9pVmyIKRV|TnWxrQ{-E{NKxCBo6|JG zAIto>kzJ`}q=JBvo@KeWbQEUezAPu@9n2V2bL68s z7R$JQ9A0gmGXi(^;c~O_Vk?w8kP)y>A|!j9#vF`i(Ft3PQ;mSwXk{m*e)A-^0ACYZ zq>jhX?SNPClgmb^F9~o$R~mV#)Hrga_^_!HBdY;7bNmRXp*#7>$+Ti%f#z&ymwGG! zAuLQndlap1>v6kXW69n2904!msctw+)zZl!OVi7H)aYO6Hgqs?M%K9m0&*&@)%{8+d{k=eGvTm#h~&N`0MOau0)cfIG96`nP{< z{fn$CXt3Ywznb^f|3B1!jDOUBD|r8^|KRUf`q@wVtJ}m_s+!Rfii?BHeXAn$!lA$C z6`!wH&9H^!h)s#rgFIi*ztO>WU@1g6b*mJmfyncAiZW^JSCb<>zo_qV>lYp7ZU{^KkULv$31y5+Uhv%t2G@of2%IhmXYEfS{P-g~Mm00I6fD;6(lgI|&ofECbD*;cy@i|6a5!-8Bo~Wm>A2;N7bib@cWZ1QjE*aM_lny+Nl&jw z+5+5~)qNKrL<&sV+9`bQFA1)aM1y2$n7O&p{z~`hw z9qc2Xsk`Y{A64xdZ5TC1N-f71`Mq-A5kO}`G|-j#vyyc$#OhM4aH%eJWh_}w4XBZ9 z3SDPjECEf1f$7BqZeQR6n>4j~O*D)bYh8>VR!3NSK*PQ)M zsOx4U=9Ug6W#x7I>aYx6A%-j_AQ~A+F4arba_&Y2@e!Zu$Q~8AbPc4MpxjM3*PI~T zQgv$Q$4wp26ff8y=y2|3XaeTXR(%9Xd+g(0--4DpqNo1u*z67fUp4u@W zL*zfB9{I#oa~rO4CM`(IQ)(>o4R6B$k7|CF%>t#h9m~oIj}6^ILfQmnG*P#rqC#vB zF9%JPAFbPkl6WBl8^+qC7F{~>J2=wzf%;HxL&4VgGTn9cV09T*xF9^UBswy(Iy_SI z-H?)T^f_PBiNnWhO>#e)-8&nwJMwRG7ty|{kGM=u^g=c_HE1H;7qeu+K0RFh2BIii z)<}47YxkxLjZL{l*7FuN)Unz(fdjQMJprLt(50d&>fa)t!yZ z@<0nri`P3yHbF2s9#I2OPPWC_3** z8*7D-8hlS?W88;*)3O9h*&eUZ;g8?JpA#OGYkE+ z8fAMXIA6fNJW;63E*L)>=joy6AD4~~AXsA9KCTXNGxpiMM5w)x%|L8w49)J^mGV}-LE9BXn`usSI|#8yHt`Oft3|rKY)#o3xg%A& zl=8*PUHJO^c?FQ5C+R%OYB482tjq1GDge?)d-h}Ime+ZM^+L-N0i4p76AG1;Dq?Be zDUB$q^U7MeN|3F~o#n08bH6z~m-XcKl->-Ki~MNviRkN=-pGZ;*5QX@fK&E6$5xmP zQdxB523=&TLo@M6#{wH1&6%V@4ZEE8G37JEw25Imbrk-KACAb&tIhcE_M1MA9tsb`%5c+i>1hB0i&Fwb_NC&8 zOsjLrXGrQv<+}#g=M9fG>v@RAidYWIUka;5Ap{7>XIh~+13!Cw2wFS;FrHKwH@kqKtl;_5+WiDL~l>j|ZX30lki zp7yoj8qJlI zuvfH%#^AHVTwI|$EA*Qx4eJQF4;ligI0pGJPEKE-B|#p2AgP30!7J?kj3yl17e#){ z+nNhN>3Ir?y%`o>ZTlb7Fy+8WR&R6xL+L&kY%uOz4f8y1o5n8w-c_c6ne<~CuNH{~ zWJX&+f_rHlD+I3?Vnj(8J;&|q1rgWQj+K&_HMM=(598R`WRoYOJRTIQ=eN6Pak!Ci znNZPZHH=GbWfF1xCfO;;_Y5i2-bk-thG%HF1~eY0&Ajs4Dzd(0BwR4{1-5Cc@`3!f zvux_O0V|1|)%BN(N7E0r^|XLOP?KZiH_*t^-n&M$lm0<80MO5{5_3PWB;$-_zGe(0 z_Rza?*JDJ+99P}gglqL99-K`R2%WB@;&RQ0T~W(*g*4MN;A#G-QVNFXqfd}CxgU&4 zhZE876b+4^4gQjhLw>A-5L%=I0v*6Mbp>YQ0k(~9PhBJc)Va_|IaG;vmUvPnIFyTn zR+|8$&KidS-5reaP;&y05JsP^B8!h8+|FJXDeP1>E`zM^gF+%I%aq#e2<`(TNR?8c zVsZ)BBsbJ9m_GogFfIFvdSXE}6H1xa$l4ARZEkxoYnov%ed$Wkn?lTMTEZe$!aL3(S{pK+I%gb8!u)pPz;w zL*k?PDLlMWU|yaYRKUU%MC}TO+1fZK`80ht5z6ci|Ja;zw0 zG4tpRwLK@E3km&>x6h1kgwjb0^Z?LvVQQCuty6^3SaV@FM7Sc>(bn7Ns%N--98Lue zs2PDwd6+O6I$|Q!?fLea6I*{sl1i}SkFB&G7oKxFY*Y}!AUC@0DBE!0UqB305Ee5I z8d`78-JLT(es~9<62`1PYU|TU<%^XU>rb-{x)g_z1$iRzgS`%1!~qkR5i9DdnG&;5 zW@#n9@4*ME!a#&+>G*!H6x|}<&PgSG*!7$`AJl5*+kLXah|yZwB8b&zb9yrLb#)rI zZ@%ktWdnOFf4=h)GDryhq|EUp!Yi(^m1N@m;Q1ef=h<=u9_w9r(*LvYF#M{n--Tye z7~UhQ_D$zu&1r)cLIrkH_7-0bnRv-qAEOeiCFHb;e7K`Lri^?xr=J{+1rwV<;`n0_Wvo!pi=wPVC%Y~D>nkXV#MFaQgX_*+-kKwEP ziemlvJox5|uWBMVEwej>`(dTT-BU}#FhPZ-euE`UN?iNi-L+#*92Kvz+r(P|Q!;U# zD@D4ubfsz@${2ufj>p@ z+4alIRfi4F8gf>pKFX{SC-nIR(VQ;BSEtv97P^VC0}dM}4fUr@pbedmARjU8>6#dY zC!g(82-!(fq&Lcg{O>Z1y(;`1=acif-Dz1)aZ-WZ^!4kPcCuf3c;3uH*V+TLL2c~! zk}b593x>K9D%~W;^UJGQ(xlCwu)I6bn-7zloVt)+yKwhsR{7Dq0~BE4T~l*SuR!wm z3K#9LdXlQ<*o0~H57~q#sz8c4lOVb`GzfGih#4w+ZNtA1N9uP|+G52mh=XvhscmQU zG@oc6~)mS6Fo% z{IcPoO38SiVj3JxKlo&J`ZG#aO>P70W)oYX&g+U$6)g}zdm<*9_Myn(Vd`OnPZR=6 z$%O|X%TbIXw5~7bGIkaZ_*AEJyo*YGKOnb#A@?s)G1nWr{NNvOY0GFBge`w-pdK7o zi3iM|tWR7^IjQXIZJQ+?4JPYe&J8p=9v!bBo^6BSnpF#)ts*M5Sq53zrg7HYb%iAK z{tmN@YXVL}{LlqN)8S^YUpypNaUfDZkFx2I#jU5SW!P=0c)uzzE=EhyRG3fWqX^@b zZdc;GcJ`eOI_yMu*e}w{Pe?#J+R-y*>xhX`_0~1jVA=cll--oS5S;%85bY*m9|EG`06XzXILS+i?j zRe}e47D}WE2*I>{p>d#Xg%KMbcfjkyvik;6$;FAeP;!i{Smk1issPo9mPqrVssN=1 zT;j8fOS@Whv@ouzlwCauFMAKfsVbI>vd}55I9@k$4C%OWxv|GBf##3wS2)bQ<&z$} zQqjp2(z(5>GJ)rolH&LazpoY`?&FM!*SgO)#s0`$#$bc~vevIb{~ zRl>IBV8YT9zE#H96f@xUsRXAK0-y5T2U%R{?}2YgWIMve=aG*X%rjsJAHq-nvR|cn ze4``Um0v|m=S#GOc&W35?@{3d|A5d>wAa$cTV$eQSI{o96xzC3=saxX?<%u!~c@{oz4Gy zg~v*p_u!!io%?wNZF19_3YZdqUcz-#)2)n*Toc-;uQm}{n|6On$s1#6?umSSdbq#W zYv&lExChezR0b68whe(HC+hu^^k*Z`qec{nnaRZqvR!Ryjh;bSdgt=S}DA*z%+;Cl)2T* zcY{Tm_C;PWyIGZ<&v5*sF4(sc$tEO4l+`+}fw(a%Z0S<*{0AQBf%ZF5$Bh_SNP>O{EqT_yYMFpGr>Qi{AwP4NBQ>>;ZF?{-j literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test1.xlsx b/pandas/tests/io/data/excel/test1.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..862574e05a114240e403a653b7fe22d2feb458b3 GIT binary patch literal 12074 zcmeHNWmjC;w#D5&xVyWCAi>=wxVsb{+}+)R6A13^?(R--C%8V6`}%d?bl+d_?)gw> z)Tp!OsI$jhbFVe`l9L1lLj!^Uf&u~pA_9^COf6Ue0|8M&00E%_L4jxr*;qRmSv%+` zx!M{5wCG)|ED3YKKq#|-K;EAJ@Adz91xDhvEV>xc0?!~H5y|RvBbu1{D%>ighNY7q zfz<)#er724In!S4(|4U%%E$q((-XVtw;pa{YcIC)M}`Ra32aBuF9=U^K&Vp_rWFFO?(j(b%~*vvYIDYpP(_K2Ip36?@$bY;_0r^ju5wB}!8FURF)D(Iu1lU} z^?}QPB&q~~svX-(ofS-hzr}x53+JU@W$|Nu#SDo0zOMz%a_8^K;*L4oA{ElX>a8A-0*evx`hLfE$~)v-6S1TfJ5 z{QPfr{y$F1zr1=$th8)5BV6E_*i+EJ?d)nalAxrsfLJq;lBc)$3Sw`jWO zge#>}YZUdjhOgO@gX<(B(-*>(NaOUX*wCo?xPcgaDLxu~(&}sacV)n{f{G`lffe-} zS%-1sXf#H=WH%;Tc{WLei<8a1pJ!x^qU9MIk02F?e4!w9~vy=_>3PZA%^hC$f3 zU-ZC$fUtp}fL$yZ{*@=rHue_!HZ~SNH?u!-2Ka49dwcf(_tg^LY5t2deyx6!9(J)# zt5}Y1RDRm^r@&Gw1>r#p5v*8n_ z?o|L#9%MWjUmOCMBeo;1yx0hF%tD2-I|xG2gALZf_g-tY)aSTPwLGmSFX3VDcb-Dv zh#}=qXbi!V&<3-sLP3}YmKkLMUD1>HVa_+%^A6zH`GvYu0X%Usoushcq>7Ab>is<) zsEIFCjI2G1H&K8kfQy8SBY_}SdK{e^18>x|jnMafg8|Ps2K=2$P}%e|exN`=;crC} z;@cB%RQfB4N|dc^(zwxHJo6t3kk*T=82z&f6rlv_r`VOX<~>^MJE>HF0anv_#W(BD zQXp1Ihs8%2$nE&}{Cjs?LDPuM1x}}pQ-XD{N$I-tn^r`UlWX)V<*JRC&5`USbn0}P zZMFmrPW|_kx z^)#guz#3k%on`J;tzygS_S0$q)(HGrSzN9GF!*{DuobPA#c+9G?@NU{drj%gkzkEcay|H+;wbKOFL-7HjGg(K`Y>2Xp z?&U0q;mGCuVgDsiTWWr$d&3=zR#nm~1>S#xMMOv*+*s(WMu^zGG9dB8bzLhweYmpz zxZECDB|V;`cEy3}T`g<=RhbFP`2I-diP2^sz1TBEK)#s$NBbXdnW(2D6 z+=L}Og*5SfH)8gLp}PpB0_3aNK@vlBV6 zu$r&`TwYC8UD!L89N)mS%EPSQ1cie@&GbVymJA5(D*`q)SYDeUY7d|*l6;SY?08mUPmOwsAIcGq{Ew*g z1J!*r9Xz}R2kSb9=6MrCU#B8tMi5dNhTBbU!^3}F$*`kG#0eZQyFkV(vS<fq}VSm&rzSbk7q(5v%^FqA7CX#7Ilh*EM=;dtv3x3je zlr2OeH~>NjGQeUhC5%rJH=R85%U76jr3_k`vZKx|o4IZM@hf2yH^%RdNd+cuG8kuL zSGXm%rV9P@=K)8xmheT{$2hK{A@3cQaJCnU$7*lsa}4uKlt=AUHoqWLSgm1ACrB)A z_bgi7IUB$JBLO=xk=kS+fq?o5f40%THVg+-BP%0@U!P1rxp%BS8ivDx){gnW4+n65 zV%v@)TiY17j9Vo&%!tRWtvi-iVPTGK!9fKfwdKCptT5~`! zNin)WQo2K$W+|cJ1Ro~)(pp$}o9XHH!{K78HQ7xx8MZ4nQKwDrk}o}cA(?bK(aVC9 zlGR;SEIu6B2-^A{mi9?<`HQDLveh6|{{rZ}uw`~4fjHMPHAXK~U`#`YF{jTud3e8A z7s(fleq21iwPXyMe1%dhlGr6)1NIe8@raf#^)i`w@V7os-BO~Uc$(gmJ?~-GGhj@rkPGiXs0&$Vy)rjS>AI^; z0xl0a&8G;V{4_r2>wtd!)PH^StkGUKRS0a#>YGqdx4?arU>&UoFZVE;-K=`1JdCk6 zp`%sZh*umHFrTI5RS`%4*R=5wwp@Vf{ip7r4C6wRqxmc}9nJ1}$rEk4`YQ`LVR8G0 zK>C1MbLYk-bi=OwNYphrKY`X*s&V;f_;1Gwic)IC96K7}r_Aqxyg?L5#kU4~;bbSE zsv5#5xi)&1$)=%DYywfpbsLnZ;3bsr%HR9E6p)10F*4^RXz7Ni;V9g===Tgt)jiQ< z&2kVjWb%D}IY0Oj)XwL1wSV8OQ`J~YEolu&rl@#28>RF5{EWG#)BbXPc)@WnLvhj7 z)BbcnoYek$!F^q^wSvyj=5cv>5Qn+$alI!Sg}1bfil>F@xEwkF|Wmt)0A*D|hJA9xFIyTxJ?!9^o!WJVu! z%|gLD^L)%(QWYUrr%2^~!P8_y_PS&W^ff~I!NXlfg#JsHqia^>-Cr$c`P>aj^xpml zqi1l@PT#O8z5KBPmoO6>UR#*qcV>I9PEqnQ22x?SP^upf2DJW?yWBz7*aGb^aN7uv z_*y;ev6A-tQ;lI*EHQje3lVa>fF-HefgVqC1fq{I&VJ&U?oo?8!hAxwa#c(#_0V-) z&&I?J;NQ(t<0*n4jK=UPTf$RX2{0XeE5eMHcaJx=<7rdfudB1h zchUy-&fWYS;Z6O`D3ZK|FRxx86bu~Zs&6p*%O~NCZyJ`T@o5Wb?1-RfA}Xz@ao;0) z$3v)wpKcGfrT*2L26r(TZCN0ls;-h+BQ%cYbvuvY& zBaS`GVFV(w#YbX_1|M>A#fcss>-$=BsCsG=R*QSoeXQn&Uqbg!ONmXNCw3v}_=dhJL`ehS&hb>9M< zuvCVhD?S}MQX_8Y#>js{m_B*{{-ihY(aEg5f1dtydxvf`068p7N@oPKcJpDY?bD*W z-5D}r#$)YJmb#Ubd4O>(A0=3;Ozy<-9Cv_rspc8H+iJxmF7J6O2|U8mthL+`2TZMf zy(W}7nUQC%`d7#V37)U@IL}$ngfs@aJ5(NqZ9uo?&2_K;*!maQl+)wA)qmA*t^a?h z|5*O0|CR~=RsSK~viETv_f@q@uva!=CKMM3S@>2)=!e68&nrG#tDNQx$&r|pr~`XC zXL@BsYR6TIaOzSkN&{2m?+|1Cv{yxi`t+>1%V%JSXJM~$Btm*eN$V-j?-_i%WNAs} zFZ99G(wAm$DHv&(COi4skp^21#tIEE!A4eDicEWkS(NH8z(^K42=N6f#??PNOwhh^ z#{HY{whR)9`7WWS=HOhWSt!4+;)rEkX@9LbaAjhxLqC3K%+{plqG|UW+ui#KrtN** z4cJYBjQWHAGbe>u94p67cfvTuk=q*+LlG)+T_TbjpkE zl<8(f5`OdC{A!&KNrQ?7D`}vVw)%kIcaU1^Q!T=4L#DR(%aQs|YeqiuR^NnwI+Yb- zcwb^7`jv3qkdSt6hSL(gr485GiI5Y^QcBhoeI!w@yu4gumoNuYT>!Jog@$ZC0}sx| zv<6Ey@;fBz*1qOoZhgV#*b?Ja)j(A#ZnzKQZ&CxQFGDphSAv+8NPrrd(u7s<7&mgPcm@5874k&h6()t zu9kzVrO~fts!@@)S>{A)#awv(a9$>-Ks(P)itdOD48ElAW9ZX5g<^!d6ZAn|y)o?N z7B4imG|NmYZ$Xk4A;@NJB#kJp?I{0)>`2Y0Z*ZaiZbUewI%P^D@-Py2KP>ezYSf-a zqmd$Xlf}f-93!(=J)t9sk0zBtOVHt%-)F*F%YtHvRIRvkQd;e61I@nIm_WInoa*_? zp%~rypoKdqnX9R_jMG8ZROUL^R@h8#Avg?+LfDQ#B_W2zKitYuF06sno!JnFxMx>-s>j-XdlQ$eNk)#!Ma*pH)g<>X*iso0J&7j=J+9sd&&sKB} z8ks8T_%%6Q38Q|z{Ls^oeu;XY{RL7Sax!LfrFuG}_EkL~5kLdJm3;u{yUxy2{II7t zP#}~Z^mnlxeA4=I^nIzaJm%1l=p=}`+xE3#H|4!R{KfQqAH%28T~qwe5T75Z)n?{R zo=o!evGR{f#`=-1u5j6Lxc&`sFpdRgNb*kHGRfsV;q|L5<4OY3p zOFo%tIZvNu{4R9+)LrLU@PV}y_mor}Ii_XHjgWcMng*@dVC}-YI+RjYcV_i4JnsRh zMU1AOG)|_vRE#m&QplfJU+)VK!b%C!a+&CX0*g}uME7LkNX@Epsiw*6C=@#f*5-^3 zH|pM#jux>Wlsy+#i9rbxQ%$$P@C1r`AO@|T{TL~+yirOWawM&#Qh_>ILqmzEgA)!!n+p|^UtB|V$xsAhSZNtRH-@DQbIFo7g(~D(d0j2RKh|q3Y`!ewhwghPs zcK1=+T0z8>jbnv0PIXPM&ixo(Hs!?e$a@d!m9v{2%s7HbgiM&|(`uH*)>5fB0n_Z1 z0z-O_lO}$}Ej4*xN^)Mfx&phj6~#b-n;A~cn}Fp+o~pVF<-@7_ znmPtxVVH?gs%uy@S??WVh6(>5dLY;*c&XVRxYBVZa$hp~6T6w*`RcGEV~#4XZNs(u zQ1(x!h=otqFbH_(!Y=6)IzyW1>j|}fRH_6+^)e+WnBEPc zMGh^}1%nOXoV!D2!*&H@-&Y?a zA%`(#tH~1~i?ngoMv6F(}J_@I-E$+M<5IYRot3DKq$sF_|MG%5_X3FY@AC{4+~ zU>sZ0PKVOub+Gqnz>^q!PS3-FrOH!ECb?@dDIa|(?)IrpcS9pWE9e8gr|iH(>W_y8 zVXNG|!|CfnX86quK%0TVDk z2~~6Yff6GY^==YuMmwqb662<}Il%2$ZlBTtb`l1hAvsozrI>BxnhwB2=t9o4?d>z| z8=-QX0^1MrRG8Z7U*i;^GTKzw1r@H0d${>3UipNOkI$pT4Kpp6sR$P)$4Ewsu{GCL zeQf6sO4otQVq2iF7<^U=&6j@$5}5D`Izh`fYVTicg%Ano~E+wvQ_x8#$Cytii#BKbwfHj${)|EQl+oot>Ji-5-3)59n z#EUtFlLD};EQ=pDO8ey8sU(4vguIg~EZKKTOg8l>R^U%ve0uf#eA#aMeibdN!T^27 zmH z@k+2_o+PNQ^-sjQ<7CX`J$B)r$s!H9XzXy~<|V;+RyDRVx|=+lo5jE>4(%JkPi{Hp zV34yp3lw1B%j@%b*b@07`@Cb&sEUxC8Bdsw^!}6;7)Cx|yYR+DB_iMr$&~==W7K#& zC#l76cAeZqj7KRmYDgmSCfP<&!D`{qy`eB6jP|`nte3d8+ye69;3~<4A7knr%@BPu zJN#LsD<`%<^|HyVG3N9{X^ZBGVLee2&3e%l2yyiBAjby0g76e?3^jw}DhNRZk`2geXeLyhz3s9j zqaozo%lJS>#-ihuB(rU?U9)N+v(?08HcFu@T0fn3bzY(hzrDi@%c`K0umEfU=~TEm z{AUm86?~{v@exj4%DA<3jSTxuHSZTCmW5~;`f`gYB6JbLlC28-myX`k0f+7AcEAFY z;rTd`)=j%|yJ?Tajsky4)0(BByjgh&H29r7Zk z`3&8Mz+-$o`Wk>=(@XeEP@eQw3_gVsh3Z-}V`ZrhATXK=P~}66w#XcF<3ysLZXyAz z!y^P9FmSaQe}BoF;2)1#9D7QfZO$?G^;>ircek}#C4iBGn=j>Siu3jtsI9ZI7~Zr- zEYt;+b#W2%@0?q5YVetswtufW{`zK$|GkvqChDZgeCy;b-}DCQ?Wo?+#z4;A#umV! zXKVXYN#2H9{(C_GRs^QUby_d~9O?kQK?ppJNN1%C{Gh^=hvTMJ2_)XkLNQ$_r_L-q zCw6lcXEozkcU^_k`wDp-6F+lOWTze!V|gB+MsFXJ&5>R6q88lWJ^xLXm;^${7ZxAZ zP6Va?VH>hGEW39AgG!Q&7d^+=nnNMBs1jI%bde$-rV?1D-z7e~xTLdLR~!3^R@K#$ z7BIh9i(R;;+fjmRz@Ij*V&wG_xN7uSyo%xjvjJ~9sC>Oem zL>}c%NcRLiM_d;jjF*v|kC`@Hdud{jX4h>F&uy&iOg1jbdV;T@PB^I;Ph8N7SECOm zZkC9f2v|C|fE1-iY~n<-{OOkh|0daQA!Q6zrJV$o?$=LrS7(Q2PrPof)dS@}uH=ll z$ky9JUfw#~zjJ!h2whm<=lIE+VSxNsP8-?U=l4@P6} z^dP=|lWs&+MqjDr?T;I^_C~L@-Zp8)>^NH%KjZ$YvDI1im3ti;0q2tT+)H0%YRBINIG(HR0d0rlZ2|wqJ75sHH&g1L zXL$cu=0EQLFwrX~`R@S#K3@1o;Lm%(n?V0%*zotje;>K}YvAzPJn?@X!1^8M_j#K? zkuaeDHhuGZ@b8EJe+Ku#{a^6kMge|D`TYd5qvyTkBj=r_E7e1%_r!|y1+ zw+nxwkP`nR%CF|(ca(oG5&pyg0&*e;0{XpR_&xgH#rUt$5>$VQ{zIDOB*EYK`qR3E O2U2*Wd>75nZ~p^@q!D`n literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test2.ods b/pandas/tests/io/data/excel/test2.ods new file mode 100644 index 0000000000000000000000000000000000000000..2a90db839026b57eb35a1f5c2120b8a257ea416f GIT binary patch literal 2877 zcmZ{m2{@G7AIHZQh9p}=WmjQ_OiE*?EW;QVW2eE4#xjGMm>4_NWx^GbqAU?1#Gv8I zUdWQ&jFBax;i`zP;!gda`}F_!zxSN?Jm-Di^L&5jdCvR$yuU93#>EW;{HMtQ5(9LS zKdtVMj$8(fFYn|ZyJw?k!+iM4DhVh~ zlTor4p41p}csoAu$1;~?ZEHcR8_*?RYxM2s<&Pz1lT^xJ&2K)eQjJn=U-)It*#yx3 zN*&2m%M20!Vpwbh(Ce3;*rd01APVf(lmPi{#aez^VoSctlTrE0@xQ9l%0oi~7#h8S{EyJ=L4iJ-1)(I9d%&5 z-*cZclqBe*t3DZaQ2ShQntU?Vi{)QNMNV9dpP4P3iE*GiaTFh`RiRgC`Uq6H6)GZp zj6V8rh)FRlFXEH=t)(p+qQW;EY`pFpXwWqqj6F4d>%6K&@$OPq;avkylq16XjDJ62 zN|9Lr$tr$`$=YoP51yP%l__(RQkr!gFzrdmKjMIGKZk*^KsBX~i~1PW^9Sxq+ZSAQ zL%h#0s1sf@NpxlgqQ9_9XqXqas_ghYbFmroj) zTg9zZq2_Pp0aY>^dRElP?I)F(c(jUhZ4zh9=`@7Cv@W>l*tnDIaIPsb^5b2yZ)g|t zX4~mD#Pe(6G~2s*5)x_sJY%dg5D&3-%B>rIoti7?H)5|!SJTLzYtaEU?+cx(W%~>5 zNM`|DbmdX|#pH?cF<1gI@pL2*}V(QQ5Mf0Bu%pfNoHQ|N@ zgDq3bHLn5R(3%>qOlZx1K9&(IS>G91eeDJ4i++E4+l>NndaGJE=AlcY^O07Q#Fe45 z^4uLr*_RQEF$s~tcc*{ri`NO>dds0 zAO@?xUw9`jZdf9(fE?CBbu5SWeJ~{KVA{r7T~xRJ1d@Uj>Uf1?-A8@bE4hy1hLv!2e=-M7qSun~kN8006+g0N!{U z@f*i%E}M|8Df7pLn;%2=gg?V+xy!egsp)+vvj|3Se-z+qNZdXyP6m88NC0X~7^it`Zy&uoc9B1Wr z)b>UB_`t)N}>YVea25)1~T8R}ie!P=9znLU>}o%uzK9o(Ju?9o(*$nhBW-ZVR# z#~W-Y%r&Ej!`{g z=(SIAYJ;9du}IeAV8Fft*Rt~Tm#k>U~vX|bx96U@q_iN+YPL?DIf zn!Rz+NI(z;cDxl~Hbepb62;}R+_Xb~r#L2Uk*a99D{;eYCGZNl*n zyz^Q4L$KErQ}>49!Al%Fq5a6?aK%7FxVBRVx2Saf*|MpmGfBPai0KfV~~BW+6vas8TxQkdZ?{= zKGZ_ukH{<0XK$6m&l1WOHv5|{C5ri3Z7gqg@6gxF#03xuZv4$2zipz(QB#NRL*$a_ zz)_2i`muLtWye@iXj~jiv0*CBYd_3LWQ#E4K0SW~`ha5MdB?F%ic!0u3rQ;+N6vKf zc*hxyjeRuo*WpQvO3|~0cyjcGJ)YH8Fx8h&qNYkak5K}`S3F1Je5=Qo3=B<_md$px z>h3>_H;>q1R7isV@hE1hUu1i7000#BQG9nGXd>#{KZ%mw3vbsDKEGxDCH_tGd_$UH zxuW=7wTbz|n2!F5VXxV#%+-zeTMN#al}_P{c~C7!(9%wwk?Xn<)D@LgBpI6t$I{By z>2tp_yu=EsEG2#O+j#j%9-bZ!`3xLBMr|OL%XeX6Q+CJ~!07IV*$muayvUNHy#;XJQ*|yhe<6uMz}J4&=?SB` z*U(ltkYYTmG|3xbd8Ux|O=9GkX3)+N?-9Mrp&W%+@BXRzx& zE&e3kJf#z5493>WS(-ldJ<)5Pb1(}$LGkp}eQ98}*V5J>DGy9v^g*gQF%M2!!>p#$ zQ#6HQtsgI|Y}_l^<@vidaB!-Bw`c6H0{DL;`0eeF_O~rWz`m*e-Rk{N*t2hc7{8x& zKi8rkx_9gf^9(E6h8OOOnK9mcBZAMV9QtnEwmaN7h(){_^C<)LQ0K|F_Gyoh00(_852z; z4k|lIG%VbZn8t35#-EtLLKCMu7a9!-3tbg9hG1kc5<^1!{J!(vv(L#)nK2CpGiQ44 z&w1y*bMCq4=f3{G zU3rFK9AX5>GhYEDE$a1m(U$6})?k5QnZz$A2^o@?(ND>1IPvOXH9RILu9}s>d{GVm z?Re_)7cFo*Tk(0&{>`x{u_Z!z-Hh|U<mRp>|^S!z!(z$PCTUrhm}ag7}Cu6`9$;!JXfhlLnQ* zE|?_Rq;y@U4N(hh)C3#3-u@Hi;FfIU%X}Qk9M9o8$!-AT6adpPf`gi3%P6R!a%z_KfWtd{OCnP<+?`QyOvPB%aKM zH|vT%ZQoR0zi_>!w70e0i1pWgBm6Sh6Bzeme4*zA{;myf3XG?v8^!VC$k49g#x=mATD!ObEBE-r}K z+!$h4TX}LV`aKirsmbG$sAeB3P??wXog?7hd?2OfT?iHSb91Coy-M)pF7mjWEs4Jn`dVg4ahvx<3Xm&a%zH!GQHmX+mc zgnU=B+m>Z2A$DTUl1GThet8A^KZ;u~zx{XNzX0QUCZyzW8=eHEGW2@#neeOKKXcG0 o-YX-r6B5#@Lq`7tQJX$n{>Au8Z?H~O9{;%c#1!{I7VE_OC literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test2.xlsb b/pandas/tests/io/data/excel/test2.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..e19a0f1e067c8b871dd21b6ba5ccee17adbee404 GIT binary patch literal 7579 zcmeHMg}=O(&%HgLbKg%LEmaImasV~}2LJ#t113V9tpYCq09Kd)02u%W zXe96AjI?$}n(6ttS|dz&y`A7p*_c4iOaKu5{r`@C@eU;Db!jy7gX&m!h+|)HdUjo4 z9xlJgNlKNkKw>*KBdksTezyj4>I+?!*Q$8#{@~HLP?^w7kDZP_`-Dux;6w!BQ2@mJ z(NM05&AQ`R^3_tzoN_}NTV^XG87O#mCSuu$I0-aq+4hk7G#-?Xm(S+D(m1HGDFtvY|Ei)Uz%)z84j(OK*9%07k#`+Vmp&Q&m^cQaqGd0 zmMNZT@$l2(2ejidH7DxpV2M_^TLeTYTawrwVNlnxY|kpzcs1ZoPzEZl?nJ>&wd_23 zCB_%7avp0rDBB3Ocp1cMYSZp4G!;~YW}DoLtS74U_n~W4w;03nY^*mR^xy+GRb;xX z_dgg0F-@+-cySDRi#?cH@_8;JQBjg_;;o8MJ_!8OSlCO%rZX71K-1Rw9W*Z&mzFP7;)T#tzBLo1e`ZA-aMe(}pE zXJwqMidv?p(;#l*IfyfDMPv?}4C(;JkuO|Hf4a5Tmr*y}GnD^EOo68)$U{5GGOzX4 z*p2Ohx{`F6R)yYv&L|!+_Yf05|0=(gSRICk_50e^6jVGPb$9#1v{91Ums-lY9^GtG z;JTkBuX#H&%1t*hn)=K^roLa#S66mC<)T*4z)H_pMXpovGSxr^SuN5AYd3Wq4CP@I zG*X6|<3+n z*bAE_y%*fBMOP&8BsiQg-yiMzuR8g(zxVhMy;6hL2@L?}f;XJ+52<*$xI0?9xH$4! z+B^Rxn+xbfhgRFa`zX?B{6?GlZ-IVGctZv!E4NlnxmNBdAm+7t1@yUP*}z*4?jVoB z09C%k^viEiyYu_YIa@6kSSuQNI7{g;Zy6QyLJm9OX96T9Q(MP&nXQ$ zS4D|i;~1itH#?YjJKkxAFA8&=;Vn8}7g|Ird}$I$dw*iIyx}6WyFTBkiN4*x@}|3j zcd{G3f`#S-G5U(;%|99AU<0!=7q*naP?NcfB~OIG8UqcEE0U>X4#ox12vGBP!xpsx zD&?Vp34;9jr2>NddqTo$RQ+UF?)B6~)Rxq7)H;DSi$aS6FreENc>#H_)>9m14j~wz z)jhyrn0gWnUKZ#KQolf=O+Wy-O+Y{bLNqo}&_@uQMi3yVZ^#(Oq}&R49o87njP;QP z$|ON;XUYvq1RRl3gO>$WHi&J2g=!Z%&kT-s;#_nBq?Ge0o8d}I*w&PpR^XfX{J==K z1?(Hme|7u_wxGw3cG?In7{WgsM<9LR)`)LHc^cQHf*}i1>{Ao%C@L=lO??4EuClzo z*{uTs`8VX-$&kpAPpvY(9-Q6Y5w}^v5H{fAd?+}x850wkEyiC}csyFKYa*2=AUdkl z&~i)Od%l^v^lqqdFCLYF)qQRXho)FHg{gM~ZcUt3iH?pCCbQ2#p{>~AOE{lH)`?^I zmi11}9^YFk@hj*sm{6R?zDqT2ls>u3HRg-CGB>f?|DmQ5PZDKkUs=5>+qddyue65e zn>^*5WZAWAQ)D|!$oOnACw1n=D++a=ctH;$`SK#TJlX8yN>G7HC(+`~t>=b=GnDTq zQ!Ju1n`1`y`_;U!w@Dl}`{Q7uX$ogbjzW; zOvCPlSp3z>jg;PKYy!<^G--j7HAKVKn)|zq65c5}EzgCa*z|cF@4xER7iXjkY_UpA z7KHjdvz%M;8W#}>V7PtydC2wR?^N0!h@d2SuQl$UX1qCE~o9yP%$GrC`L{Eo8$Xf5+@Kk)m-!Xrhshzq1?4on!;1 zqz8XBvVQ!DudIup?HYxMt)Nsl$G+rjVFltVo=huyiT>8e?TV=kVCDeM&1ClhzvDEU zyfxcfgb56Wcf8i79VQwa&kAW%-@iEldt<4b`qCcWl@8#fLBHQQ!=Bmu^r%5-8!utr zk@oQR^XdIh*rtJUD)Tvn$c`5`f|nc+?I8nFJawzSr?(-9H*&HfizAS{q?fngg%vo{ zR@WH;q{^{OuUILCBxPCi=Ke2 z)}Qs?K~-g3@t69tITHV3-MeJL|Lbbm?A_BrqL9{-?CssPJby|5R;UHG7Ty=APg){mr+g$M7{f-m^0(k`jeDWZd@U?Ut!nu8{Y^d?Yl*ZclfQIn^ z{S_2JNihEte9I?ChScB&j5fRbkWqkfJi?xYp?BfA-!80V!I>EePmz#$6+1a|2fI@R z&KnG(6?V}f#}{rDpp2?(i{h^=+T>+cGA3^4v`AGCK|B_tS14iz$I0i!+*I;q3#z*W z$7CK+f)j0u(JMI>Zq8eg;fi8rQ|`*EMe>^}A_7@bx|kp4Gi>{@$5> zay{!_jTBgcrqJRR>8l=#fT(;Yi9fOk#y()W2F>^qCO|)7eN8qj`w5{0lUu%(^!zog z?Dp!85W>3+-4Y7v@(fyaBGXZj4fw zd>@5*pqpQjf31dEJBHa7*FHG(CF=Op^zHQ5`nxW* zk7wUFya=HYHg={Mny#6wy=GLkNhY@{bBS|i#R1iuB^CLFpL1i_il>)x$%dJ(Qv%G+ z?>dCZMay9^PsY+D8)oyem9KWNBf+*LZ|aEvk%YepeE;$el7arE+i{B_Z&%)so}?IQ z;Z9sebo03p9PQut96oe}HiE{=&P48j0qa<~(b_rD+ zYRYDw=F_cc1m-Up1fIrY)RrW!`I-ZjwzWbz8M-8MpPSm~L-w3(ZX5AO;sx}ylmc%) zaaJiYdd#8!IB`;3&?%#}tZauYp%5da<95~CME3F}3nodb5pPm<0AExnPe?pmUmC&EW`U%7s zNkTjnz%y`tgoy>nK?Kq6deqf7Bx9A!{LrWx7_{9R=NME}=eWqUY7@5u3%Vw!Sqo)h zO-m1I$}_a6FX-kYN=$?ktullxkgde@#v9?Nbia6P4R{yQjGyUF(<2Zlnp_=I{K=(Q zz`C^Xt&W~ZLKN#2eC<(fq=4oJl}^N!PnV(tJrj$;z=BXV$B_HF6XyGwG}FRcG+g(K zO;B)Fu(-OgzSNKw#e~QPw@vp;IELe^x?AyMw=T;QBtP>y)Tq3RV~JUql0?Z``gDEpK1v%I>vu$#H{@UdzbEBzNmC7pdz`8Ut%?j&aa-^YfpC;5$90$*|m~B-wIkZOqd3pWq+JU`E;xZ?KBFRV5mncD| zt&S*0bQ=4$c8WNtH)uw$yh5jX>R(d*k0g)Oti@}R0j1XC3n<1QEm%^^Mn5R>Gu~P# zH!RAQ@B<6RD65wmWRD9mlJA1$*vLE2U1OSy9xzjJ(;?M8?Y+F=d!N&bi92s;aRYnU$5-QL;ud+nIq*WTFF&;V#^sx#*f zVSbdAlocYIlo3e5iMK%0v`7YK0&!=w0D|$U!Dxiv>~1zAFQF-VXWh`1DG9ojgSl8~ zxx2U`_^e#qt^XWV{@1~wx9(1yiDoksb?BD*2{GYVWkN4B9%+)@<&1nVa7?lVnsI_% z77qAwh#K@vF%mXy65nbTcWHn3z6Nj9ylB`VM43DXKt3i{(27%^FytBrTuUM?8pKnw zyGR)s-W8rIn&5~Ltt4umib?AoSDGNoT8=EqqBeRHl04BakwCRFIoy4eV=#Abhr$S; zF)!9EBF3I%^&0D5Pi)wMa$|J+a+e;xj4J`faAs;N8_VIlwT9t?zCaCex6#23npt~B zRKjI%K89%{p6Iq78!{(s6M~G4In+cmLSH1IV4C8h-eQohbAY(hLtGAsFiy)7J)2C{5lus2)DtRviHUm~Zt zt~SW8As_dMEo+P`vMl=GsO$*p>aV>ef1+83i;Ejy&IYC@u#Zaz5eH;<`JG*V=<-`q z8VJ-*bD~#7(I*4?{IgH|v${vOOcw4w*PUV3-ruYH!8mgO5Q`}hC=e5H4^t>cl##os zy`eo5A4Y|-qsNY}k$*>g5ga#U1e*BdXzhXiNqh@e*Z+9`ZO{HVQsZPb7x+P;TgqP; zS!SQ>%HZ6HB5^Qq132YATb~e%Z)p*LycjQFjgv8RUS8 z^URoz<=IHpBTMAxkB*%TD4L=e31+5PX}fBUdA&*8(!L#dRxrjrC>^$obp{>(s|1EjSHDmP)kiuqMV%LL%6Su@s*0Ue@FD9jarLKfQEy z>TK`snxxbQjQwy`qDvZmEB;7~7l4=1x#0Itn*O?De_em`+(}FIXMmpv-hUZ>yVjs( z@sIKMdBdM4fd4d{N9*nXo(G=CIX@BlfkcD*x9}uM^>c{+fdl|NLu38@SN;sx XTB=yzLp=e206>psuN`_Y0s#CEceHL? literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test2.xlsm b/pandas/tests/io/data/excel/test2.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..31cfba7ede0823740e16aad889b6b6c2b025049d GIT binary patch literal 8086 zcmeHMgn8K~g%TyF^l?5u|Yl9Xcf>MEJ&i zci-K0{r-Y?&-|X}%sl70@86truDhEkH$P%L5=I-v77#53fLF>WF$DFM<5I(zP7?UJR^W z8dGQ=d6?z&cE6b@uMpaJa^; zgseS6Vsa{u3EG2?0^)}j_o->=J)g6+Kfc8uJlvlMG?>L|)CYmoHpv;3V9^GBOV2$f zSOb_A4G3Uo0oXaCckY%nA-z7b6D}wnd>)s#qFsbz^s!ri-$D}7!7_(Z`Dud8l*^9Xy|bB@xJiyk(R+=yijhDu2Xp ziZ5iMf_1S{6X;Tgp(qA-23-#?!NlWt2I;?k60D3RBoj!$xoXvdAP*m|{^Xg3GsAw(A2Mw(-Dt!3zu#RAYTc7d{Q89HSsYFJI;eeXKviU_d z(u}OmepO^`i*Vjf%0yQ1Tw2LCu2ht$>fTfa$)LNr%|dm+pacEcC8eGYMAWv{JlE|3 zgRhCDL-)zUtS+MO0r%9i2elal2`~636yGuDodp{<3Vs>M@$DDB(^oNgG!z<3SGJ4@ zS^qSW5Xsc^6U2(NBaDO+fQjVe!1EtF@pg8zH+Ocn{}Imquo)yoOhah--)@!KYM_2z zf==ukD6e;x7YT5|gNI>HZxFq=KfMaq*{cd|Y-zch$!@)V+a|a475!zXFWuw` z#T{jFTwlQC)-*lmMUVVf%;VSB)@r`v_C1AHIP26EAlg?#5&@AOccFudbiwS&?&}2^ z9eRgr9HuOW>ye8{ZGh90#UafAmEaL$baw=Zy+G^SFlv-syQ$5 z;&kHA2NQT`z$XOnn3JhxnIT2yZ`FUT9Q5FNm?p05q>c0KuyEgZcNPTQH`gXSQ+=ao z=jmHb;)}XlG)aM5io~23!CoY3d3tgHvs8~hF(Nafz@-R8R^&NpaR#$mB>Gz<&swkm6a3>x9l*8!8Ucy!Ul!|eY zvXM@ji|v3fP=9D39@6B4Yc#fB=g87KjVrrXoP1M`QCvhivM)n5a*j133Ehs0JHXyD z#Inz3|4y7gp9GKf>w6lprs?*Eqd4?ih*xjc24hV}7n=&PcaV+}zPy4>t03!zV_)-F z`*SRfKBj2o;p3Ou9}TsohaRtcv-M&ohS*ZeYmr6EKVioQyW`I(_`{DdzTFP%Ck8&D z(1m{NNe3Htmux{s^e@)Uht3buzQr_xaj!)k(#;LY*Gg0*dv(KXz8O4Av!X6tV~8Q6 zo)?(C1S#K!dUHQOCg?Hymgjh9#Z7zP&{ zg_8latCzLswDA06rC@5D2X!{{-^o{{{6HQ&x1~;kve_-;PGS-4b)e`37^&II8y__n zX3c}~(-7O#$UxNW?--jWB!-rT)~EI_Q1!`r7&1_^L~@u;e?16 zKRDmEv}ql!Ur2fwfInX4tbdBC8Kz0KKd4=RhVp9YnvZ72w;Uh-5@!7d2KUPmBgcq~ z7;zzB<_bt+vxta@;|N-7^|5ouMU;Egi>?e&0()LD^zNZmnen za>ys6z&CVXvN$y=29^u9q!y-!URL{#ZiyWLneqH|=EP_M?`k?`Dsef;s1ef=(FX6~ zVA%BMJbMdqRS3R3oORy@Z5&*Und}@d1pabrv7t_nQqTc_1j-*3#BWpcfLJ+N@%(n@ z`!TaUgNaxwVd6JrS5i3c-q-fl4m@Ab{T*zU_E_k5i{NEVR*ChQ&nSjE_~I`-bppU=s^7M4OQ+OpskcT(avDlFV1Y)#K(mPz@{LSv-I;3u{eBtyHO9nTFU8@0G_SNJx&10Zfj5>-D%i!2{h?K;Hco| zf~C`rDyNBwI!d;V#`yywzq3~%b9-}hBHjU)0!LV>3+J;(VKSJ;f{5>Hfw3BtP49m^ zUJDZnD4g2%#YL%+1~Lz=QMP}=ICBwQBC+h%fQ>?1)|OMhIg#^xw+yO+wir%6bvcr=C}kgwu~Jb zlYnmvFHM5?uD`&u$P(Ywer`*+J%eflU0%A25wJY=S-VV64EDdexYU1na|2_+|Jr!p|M|{D>Q#;9vn|_bb&wk{>b2h|g3 z5B*};-LEW0+o~p-h&L+GzGFqjO6>W20J8>lljO3-p7g&+*sJT~mHje0oWb-g;rmlM z*%*e+2SoQTXa!aXeDPhShX!0$80In07i3+CJoHL+nW6&tHSX2Dj?1`|(3Ms)kF`gFr955s9O_PRHQVx>g^L?;2X8+og3 zj4mCk)h9-Vg727PrqL#PRFmuzY8q_<7S!OIR|byJU)Y!IZg~^W=S7z4z!h%Znb8=E z1(2GH3a@#`C>$EWm?!hYuNUXkXPkVMrN4e>WoqnECG^CO9;gQv9C zkMYPg#eEtkk#-~@c_bxTyJOe!COrqWy97&1Jk50&4=ru-d5`FvlCvAji~xX?Xat9i z0hVketgbQ64QkSlYLt-$38#doMFJ^5i;JBVrkuoiR$!LPXtbMO-X|8M91{lGvon-7 zhOo42zwZ}y>X3m!Khyy;n_zeLuGjc;We!~04&*(s-q|odv*x+pWgFpIFM#@_iW)_$ zm!R_jMLgM}m#VXGIH({6f)8CQW~B+14c+b~J#UGpMup+<=c*nlZJEs(7L_(U*1S;V ztklpupTii*b--I&P8@hQj+9d>Oi=VkA=8lA4_M5e&SEpQf6Tp3k?&pWfYfzOMFG}%Trjb7zfL6vHF zHl7DRYai3*#Az=!Cb;hxX1pc*IP6=sdr`M>DXpJOmjTu*vq#6FF-0p5&nbEy(kpvB+BX&frb&;?r;ZO^dH4?#k==Sx^ zr%r)W^0z}<$lJ&p+u$rL^iA!=6m|`?3K{~TfR^*p54Zs>i=t(2x!~-B#)96`sjzG` zrBX}TkKfj=z+K&v(@IVCNe9itym93`y~^Pa(d*vrk|atg?AEQ|#NwTS0;H)YgJiC# zX)l9pE*e(Qkx~St^BhMaKJN&JBQ-oQQX#m)6OmeY;ySLCM@Uj(%lWbI(k7DUm=*j5 zJYZA9bFJKg$IwT1IquDoc}2drdcYM&mR%+!-X|qIf`4*W5*239nvIrXpN4v3*@l-q zWY=a)cEXFN-=z@yHBx9dH%vU@t>Gi25lEyZl=zUR87DC{H%Pcp+J7{VR--RbnHc@8 zCplUQdk$K|wkIL6GI335$~p^sGVmOJ+p0((U2t>n1fCM{Ir&btdglXAh(iQR$Q6`n zu$%~fJi9UROT1!Hyzw1H6tCGR008-a@cN@_afeu0dAK7oOGNiu;uV+jTlo@si1L*V z6=pAb(9RvXU*M=&2}P1ScTKliBxQ)sEn*t$qI2X?cyBK4RG=)ub2-IvU|eP9eW9gi zsh7T5dAbC1DSnR^Ue+}EMMOrlfg|{U+RuP0^PA_H@+%^LtmU;TFfB?mAAR-pwdmv&b@6)b>o=aI8>ArHAr~m#5o$}X69P+@y ztk3A71!zew$uKTQXUkN*wJ+U}jA<)!_=SDt@m{cSg=~D&xvpxhw9Z~dBZ0r2+V>HO z>`gF8J!!N+{(7XT%x7|V8IrSEy#g5<7!$y&4|AUKUPQCb%ZY1UADvJyzLUdk#b%XFr|EybSxs88=XQ#tYGz`%EdBG69Jf;{eK#VQ-bhC8gu9Z&&@^0Cu#wjc;4?yh3 z`nvqMU2kKESfM?y`cqtSWrN-wCwCTA^K?cyWzgcpk%p>=9zqZ$9uHycjd! z!8_34M7Q`LsFmW$7jT2xRp~_6{wiN{znVOce>093W!hGUd&~G})wt_Y9!x>6WWP!lFYs zVZ1~xwXwANJwiP4l9i8fJKQn__yv{CJ>aXsFwZaDPz=DSGadm#VuTwa_$vs_TwMOe zAY#dWZ`sLzx?vm`l_(616H;lebRS-ukMya?My;M)R}&ZLghpIv!&Q zCT7~is<8)Akk_2^0cj05*AX!0B8!ZQE4?*qiSGKR(n&?Bogy(%baNSq za6OVECe1E%%F`18Ii^2oN~|hK9Y8tvbQG+79pu;p?3y^iz*9ulm+>Uyj&$%a@;EjA zFua?Kf4~!ZaS7T9Voomp~h+ws2mE&Uw+uP6#7CwHNeAZd?7J>(EqRt2$5* zCY3p|m#LIF11+%;X~iUv(m;;+8i;Yxg za?#Gy0srl#ypPJ&!#S&)53dCe(7f|cIptlJw;Z(`s7Ufv6k8pxwF!0`M?ce?tof@26i<^LzIf7bIehxr#xcmBa>{w(}6Gxevi4PwE5 z#Z&#P;b#KqPYtp-zodhHR`7H7{igzS1flffFZ?$b|5^0sMCeb^W|IF-{U2%3&szQ- zO#jpY0JM?;0RITBKa2m}4gOVJ0>O#>SNwOUs0l(t>@EO+h4?&0L~j7ik6-@>=LpN4 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test2.xlsx b/pandas/tests/io/data/excel/test2.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..94dd951e0bb84dd566dc9aa58c89067771f49a8d GIT binary patch literal 8067 zcmeHMg3ZDe3MGDM7lWVJHE~p;O`;_uYMW z*Y*1g-aYeso-^~D=e~b)&bjWoj+!C@B0c~afC>NrXaI)LbaOp803Z$#0KfyF!s|%e z**crpI_qnA*qb=LS z{lH#a)~8|}-3dL#MmiZ57WWz9-*Kd}2rLDva7P!tc#ek6t8H!S1H*b+)G;Ch`Z_fA z$OJe)_wF&Y6OqI_YHJ;2;*&{mlj!S5o2CNZIW%;s^F3abR464iCma z%hXHbl~udAFhUOGZbJJmrXX=RQi^VpaxF9(zW(xy*mco4^kk5BCUP7N(jbH(trQ`THjd9?Y(-v8*MH zg;|RI%XG3|jviOxjB-3DeSnuo9zrPkK0v!iS!>DgvK($o>e+r-NM(aS=1#(Bs^3gf z{x+s~7)WVv;ypp1lcCvMxp$v6&Dj;H<}-7UMWtbeqY$l!zOi-3$>Y>^yj$;wDrtS{ zw0yXiJflx~88XlOv}^f}2Gc#d1s-&j^d0sGMp6|n!6MW@jU+%UG5G|xBF!)(AqAkq zxm$Do$4=br9IXuP?5uu7uRm-C4i>^-TK>0Nsk*XaHy2JT`duiOTdFGoaL$>Nc29E` z8@0cdWr2}~*Y|XpjJ5gYi)lqp1iKKAgW+D+3x2FM1iZ6Ow!#QpL{B{H17Re);gewm zRKxu;%D^W`c({8zyCwVRSSe0;O=9tV>>2oio0rTCB%wL^w2x1lCPMfuN4Q*gES<(E z24I*OtX=oNe*J8(EU>nrfjWi7bmP8Bdf^=TL!bxM_zB4ac@azx!1&fA4cld>%sJ}u z#~V{+j}fcRoNJ5?@)AYLVt!HY5NGPpz9&?EEb&eo+3#C44_2L}z7MdpFC4QEo*tO4 z_xDaM!*=?gkP!nSZVAEQ@*bA%V8h;kLFTVuDb-N6Tj0WI#h&%UaaLU)<$GXAq?~F1 z7m~GAeO}t<%>Fn@MBY{%7i0C>+fanx79B&6HQg zf6-WrvPp{m(VoD+f*>;RRgWbHKLt+GIX`hed!9cT-8^Y6l>|F8l-uWde^0b|oja!P z@P3sIW7=(qN#S#hrGe2gc=)QK`!%@C|u?x3pUsZdq&o4{Sf8*{lN2BpO+*Wp^Kf#VC|0lE%U+Wmm7xt z7to~d5w&2<8<2Ifp*HbqzJk~%jUcn{T5pm}$O~3!BZ$and8e-wU`iR7gHQ@bKLHaJ`SBb;+7D?oGLJRe3?Vo^>qeM~)#> zWj1?Dydv(Y=*)RvV9O_!)-Yl#9L!Rs7#;&5H284mu58bs1|2yKut*H?K}@?v`F28} zZLDp2YV{8BIZ-F=d&E@1bovt+zV}QHd`&90*~*?(O>?DR?Cu+yo*r&oiaBe67q8Pd z*1{?VC=#upm2;tiT&fy|Ln+Y>#|OWJS+|zfqaxVIG3>&JT`-tAnHxBo7(a7%w6HaE z`r(*#V9ifC!$bJ08Tw%`nf=OuWH-WP4{N23F;-41-jyuW;0%glHO zqOYzjw^Wm}*@1^V)Ez09qQs9Z7{Lx7`$R@U+Mr8p`qV_xI*W(|Tia>D=u|fkSj5+m zn3EiMRqipgB@6{JV0k{95vK5=R7lirsm&wfZZX_i)hk;( z>uVol8lI&q+ptZsrGBW@#glV&db-^zu$;h0#0iNma(ewt<;8?+2@z{~Py&!F!Fb-) z4htnOf7d=Bo;#bp)5a~J$;;yV?FC2nL*Od0@R<0jPT{_+)KL9^iCCa%H)kb@%F5j9 zJZ@b3RF%Q6_!L2qcS%-n71UyY^14it-0xZ+y^jOBhD_wPwDK(#*r%d$kje5vlJl#^ zr44#RHN#iXDJLdDQnmY^1dUupy4`|z)w+>H%v%V~z9ltqCXIua!Xq$!>U5(jlMZlr$KK;ktA|}LwsMe7fxHsY)40{NSRw|XQBL_ zXT_H0z#-Fxg2$qgpC3BLKx^fc6VbpP7y-JSt^SZvgdj8?=4nl|iF(Dg)LX4h!b41g7Z}AoPkg`Xo zaSZl^-?R}ijp3;TaJK#(G_nt%_mbMO%cW}jqYevd6sEg}Z|_PoRi2Y1JDq;GAwpv> z@qYE>jz60t`*2s62TvV##)!)z=Ia=1qflmUsf7=t5?bNI#>56en=TQ!0r`TRpWckl z()1+FK^VIacPUBoqH4nJZSe{s!=@Od9beI_k6VOx<0J=1Q@@@+9A0`@Fj#0aE$VGd zyBaJpDdihhwAwRh&Y@<_3wq2wMbQ@d*qE-VUG9lc`6$w3&j=Q$Yonp2ve7#H%@U+r zw6I9gJuhcqYM(}&bn5WS?sqYJRb5EFcM`cp|o(0&ubdvr<3yNu(3?I6+L zW4}y0i+V99Wsm2qS)f55=FOw}uapXf@SEvUb{f+;J1qTV6~tSq7s)n*H{4# z2-E8cpQ=pvhP;@eND1) zmQiKZCSD_Q@LjQ%P52SZg5^C|?8U6$LKV2g(Jdt$@A#bt@>tW6jO1z*I=*@2Jq zGb&TI9`X|Bw@mc4ol3Ya=;1xp!0bae`pKb@V41;B*(}4H(sfa50|XK_1OzX{L6tj} zEp5r^h#mQ8Peqa(2C$Hl#@}{=X5{P~8K-ywgm{A(%(RerT>%x<5e~(==V7&yQi=iz zuaiQ6q+dmZ&vFt@qFhQ)izHQ>4X+;I^N|h<0IgVP3u*%xo7Km0TTqPfw8z|_)9KKMVcE?I)p3V2z zp=4SDXGz=fVcgq>XmUgd22Y04q1=|ijCO88jgH!-5?iUN=EV%kV1_l;>QZb^?+9Fa zfdEeKyX8U!l$Zyh2$`0f!`uOQ!*AmVuH1^bHmXagSc=I9=7H%l%oWVd@67BOve|@o zY&Q`0IX>_TIzxg{o@u?hcbO7x(NG+<$kv)vlGZeys*T)EWjiJ<$qpke<=F-=AnTdG zv?_9mE1YI^ke%`g4$OSYhRj%5p=JM+~vt+omC*v}AM>2I4KK|j! zPi%=~**SR60`f}3O0c=6<~Omqu;9jis>92k+iZ2gv0uWb5=G~IN0cX2eCo9jeL%u(Opi=bk^ z{g*LpiSGDIFf0qKLIMCVe-F>j9@Zwm#pPTL9lKO6eBTeX_ph$fNzMD>Q45pM)$?pq zCROH35E!d=(5sb`t>^s?xZ_?Ie|c2Kd70LAhgJt zqf4JE;#J5S7Y$7f^Uplq#)Ql32g`ZW6i6-0yqh=@%&DubPQ!sXI?(sI=L!N!ogcKx zQY&Le58?GOlua~<_Z%icQd;C zgcu7wxe><s}9}1xm#=HJth9!zXiVyzqt)gMMM6k z9-F|Tic~^@!|&a2QSb%RyI~$w=$HXcgVtt$DwqgLLy{{nmRkJ2dJS&x5Sx^%tB!-# z4{${laek72{TR8bcb6bmTz0o=86y(wOwn6{eB4*^nw;{=*X*)p85u5tS0dA9F!<|^ zz-zb~AzcNWYb-(Wxt9(jPcv}|N-WqGyROVaIFFgYN8lc_3eFq(7A)E>qN@=%)|6}F zy%i{X6j53szetz3z##U?S$jdYiE$HFe7|Lr9?=OG*7J7R$ny~X-HafS z;LqAG;0Db@j6?AcIO{QD6El1TawNQld?;1BV&(CXKf4ekC9tF;)oi=q;>+V#6eMgg zvcv-~*6$mi(1d5-Jv>=Y2>u%Xpj@R@$i>_`m@(iwl)kSBZ~b_BbM%*ZMI&kR7=jhA zX$SxS@qh68qiS(7H!*Q`f@PMl?zhA%D&e>CCFB6%oC*v zKW&yJbyhp3L-c*_bE8H+W+#n}Lju%g_MrM2hYr-h28SS`*e6yWUVrlSU-%qb<8 z0<}_n3Bi`VW>Qd2gI5cdNMstq=o}+B*s`)a;-{XdOevblE2XCk5c|9_tk;Eg<41y$ zpc+;`D7mK=S;}{pGx=gXFSMoAGB71VJvU9-J2D^f6YWx@H^tfGe4XaPCu2!BzHh~i zbu~V(kJ@2|+Pj)Qofhx#j~E+E5hUBM05%w{^7<%*6XyrMVb+}Vaz`{$xNFSEynD~L zB?oW#R8aiGWJZ3l&qZkf7^>vm+4X$vC6)Yn2nMlFU+P!nz-**A`*;Yujh%6#=IT+0 z`TI!|;`K|b(&IfpffA|cx(f}ZN{MHCCABzSmddw-qG{j2iYjqK*)lhSb%pNZ154)V z-^!QGhkJ&3v8sdYCfw$cOf%D?8aIYURq`IB(_!U^Hmqw$b7zanh_=FKW_?*tsUhUf z2y_4qwiMsN{L(+`*3aWe#7Zy(c)&)1Fsj4Y&PdJC&fbaB$j;H^car{Jh683AQSthU zU0nE|myz0~`@K^0Yow(X0%Al#IO%?e4;kHbDwP#*XZ9CT>vhVMI(ygCz7K+ujca@~ zk%hUymT&2BKQjY9*Tq>TGIxnKJrfx%veJs#~b2agzxgC&Zbj8gwyg% zsYXDJ$$E{JSo7{NqMsMVyMa5QF%~*A21wR<2Kp3Y59cgjoVnZlOd3B0&KY%^0y5`#g%Kkck7Ihmworcke(K zS8mPPw?@?PREg(xDg%S-u5H?xwBhokJr;8LZ1H0yi9KH$9`(dtJ#zPZO3`(b`i?PC zH#Uq(;>~K!sJizr*|rFPZEG}a1Rv&vaQ+HG1AF^_5eS3c?=3Cz!1Ck#_#p zAvAD*fXYo@DHwAhTW8vfx4pPp$ZzXg9#-Mtt3k&&lDG!@t#J+5K@>Q zb4#BvmF;Jkv=d0hKC4ub$suPb)qBJ!ZH=IwI4aim3PFO|9XIMTO`2sv)fM}5<)d}FC*qL2iv?IG{KCFHTMIB)r*NnjUB28*v?B(!p7wZcFJvytmk*?`e0zMu z2SswrI%SiwU)r*HYE4FvrSPQD`bHgRw|2iTCrKQowrYC3?O=>EiK{w(}6FZHJ|7`9-)VyS-C@H74Mrv`3}Uy?yT zEBHD0{!_sj?tlKme>3r)MSo6%{uIq4`0v#Jkre%`;C{!?aJ!_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test3.ods b/pandas/tests/io/data/excel/test3.ods new file mode 100644 index 0000000000000000000000000000000000000000..dc78781caa6e9a2e8d8d3ba8afae8815d155266a GIT binary patch literal 2889 zcmZ{m2Q*yU8pnrWh!$5TqJ;?&y+^blIzx~!O2(LIqXc7=h!PSlN^V4#8RhC+E&9C( zqDF6HWTN*Vqa-5S;jQ=bUhcd5ti8@T`<(x`*V*U$?e7mU08-EZ{?+&akw~Q&anj9M zrzUlG7k7kjfENPh<%M*C!+c#lJtX`*9K}6hJ}y4uo?ZwKM^8A)9pT|C?&IZ+fI0d& zBM`n2QcM;A=sW^HVE$o#s|OGOFd({iha6}(^Rd*eE*I&bOWFzl{FZ@;gYpkt(rooBB}ADH9tQ|uZ>pt-yvF;!|w zp4w7YdHfjDAxlS|B`~?Cz>Z^56~!1a1f7NR(}oxf`jymG+b$otBT)xk2HTbkqn1ui zw89dzX}I-A@&ZoRt`_tDz63as?zi&#I)=%u)wE?P7 z(S+^+%~YQxHozcS@(_((*Q48`dJqIsSiQt{j8WR`@VTq=Yj$jk)g3&6Rf@nOpdIhc z%g98jzOnNQn{)D}(*=w2xT-H@E7B_?Bk}9aOPA|Fe(VinqC`d+)Z%D4)1v~qg!V>y zw3sxv_3lgt^EIuJdf3zZ4Ks_NuOs1;$gGQ86Hq7vnr{{Gu5Q?H`zO@vgtk zLGu-|Ew4XBY$qT2+WPb~tgFN|b=TW8OHn)6UMX_VRUvOE<@9wr@~yDtbzS1BEKhu> zRn>DsN@_jaOh#PmNgPwN_Ozhhl%0*O?RJmFSRom+M_8SO)#GSscx_-f51c}0m#^2P zx%M3>xv8qLxnW6qNA23E0#R8k{lPal$~j3OaPdI13U^zc2y_#DeN z5|(r=7pK%HK3Jhf{oLF-IG@rhf2!I7vP2t4^t|BfL47{V73{0cppg9it+%ejTJ$49 zUJ*}KUT8xg*+zsVs`8HpF-uJ9@~y3n`NN9s#iDykCaeW!qJ^={_4CAo?@iA=xY{uD z=_oN_lT=Hdkh{(%4ZWjTTE0#e`9;j-W;K*H50d3EpRumwL$5sp z6RORzI%2_6{}unhbSO?@pZoXJ<+a`05-N{;U&wM*f8biD1P8e;3r8N^VW-+QvUn9t zAnY9~;6C;36Nx^Y=vXWy^SR=v6{l%tG9SPBc=ARDDy_2E4ShdFdU0&D`naC`<+!v3 z-=4mIqm`?sLh7p~2RU8G9AcA0h7ca(t*6P!$Y9vG1+L^tF+CNIF8tW|ML&PaD$HoN zssf}dy10faznZDqu$KYvAeU}P zw(C0AG=2fmZ{-a}L1L#*5@yH)z;D@`*WK)DH?1R!46agLdzi!#Tjt&ZsB`tLtyVux zyex|o$YS(bZf9y{$Md##1k|@Q1=)q@zt))mIV?q3`S}@-+U2^SPM=;WH@pBQ!gc`; zA+4~=6Xz}@iXlqguEzX@NY@NQjA)w}cx!D5zr0s7C3`;_8^qx{oxY#p3@(ZnDy;Xq zZN>u2Np0UK+flUFEm3-RT!-!Qz7^Hmp9W_&8xMx&2=A8_k=($#snwTsrbSOVfjvS{+x zA{XPVKfSNJzL^v&ZwP)~{Ywn99-2>wj!mi|ngARggQ#it-)jhawAzim ziN#d=DAwLeu?el>n{wnI7$IAaq6Cfsb@(4Qh6HvCXX6yX4ikUE#QUc~`F38Mfs6FY zncNHNc~!u96Hv)Qb0-clXnIRCqKy+r&7MDfLcXG~v=bWAuKl|FTleSi3Q9794$Rmc zChK$Ol*{glTqnbzf(CW7_>TKkrB12w9Pf2<&$y$v37-u<)TciK9Q_%={j6_4&GK}AB7;8qru_&z_?hZrmG#)Rg9AboQ%a2$eTo1lOUROvvS3D2*CltW1IONkY~RdS?-e@Co>aj$tIDzkYesc7 z(GcTc6vhGW_}|(kN~o$jgu!byH6Ju06Bu_b+31hqI8*(z z?faoHCtdz9fj{ehE=)gk>!fn^UuEiN;m_Rs5Jph_14TcF_&MKxgh(KzAIS=zXCK6X S5_I;2l63HqvXlATKmP^ORQ{9z literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test3.xls b/pandas/tests/io/data/excel/test3.xls new file mode 100644 index 0000000000000000000000000000000000000000..f73943d6779517e5448aec76fe1b7cfd526f37ae GIT binary patch literal 23040 zcmeHPYiu0V6+ScGwa0NDcH%fs?6ni;;n*SBbwWZOo5VbY^C$!*70_g3uM;cAj+`}3 zB%&Bv{uD~Vra>icDW#|nDDM(bX%i#_t>m;7C8DGeg(^ir44tNpQ9^@{x^1gI3qcM) zNoymsDGDCca7q8-hB?i1<}6utZA=F$v4YuMCUAo!>&`g}5bD{`+}|2F+6KJ*{^&>!}pAN8R>;zNJPhyHCJ z`mH{6KY9G{pAa}TP>b+GTMysypuXs`+>9&0>e>NojW!i$%I*h`EoaJshs$2ANIeHAIlk({Af+~_#ej?6= zJhpx;|F4qtmuV+XXG)w2l5WfSU{-HgoRO-knu;3e`5r-s05-m*XG{O3=o7v4Tqesa za!!@>moW&WwYDCnNV-x!3c83h4m;jxRMU;DP&*~*rKfJGJ7l!bAs3ws=rjTGzXWGI z02O@<`i+U`t@Zp0))=*j3f5q-hzcx7Q6fc(t-vZPiUaqJMO9$q7FB_bQd9+Q7DZLy z_EJ;@Zm307;K)=|1#V(RRp2&UR0VF7ffd#(?SZ3ciGO>Dh=!8xVwaP{szZhf!Xc#q z5Oza<5RTggfUs-&gK*F&0EFGuAEdf45O!gI5DsevNWyOI55j%=m>{F=!PdbPq!5r` z>r5{UB-lDL3IhqY4mzwt9w*p3vkC(VwhrcQ4oPs~?n|t;6Hf$N2XnhZK!UAPTNp^N zbB`D70~@IanW$z(E5C?`z{#WV|=NDAeo z<&R?dr9$AiKpiJH7h9pF*b0k_t$^j9>@%>n*?=y^<_}AjpqSNras|vT+!dscz3_LZ zkEvsur8Wqgle3E}`wkO>!$`ak5HlGJk|+d3?QT@NwgM#Q2zif_@6O9mA=k!|$s4?t zXpVl%lnwRMZ)2)amfNcEZ1}Q4n}eDyRK9y$-~t&*`?!bs_J{*SZS5;;^{_7U=9ALL zOmox*k?MK;@yGL8yOSncJJWzIl5FixTE5LzqO8+qNpG8Z#l_@j7J@E>P&v=b(^P)U zTIH&sx~g`_xLWBBR-;11e=fs%SDUJ{`O5AfUy?PpBZJ_c6>)IU@D{KvlI}Y5<~wGT z&sj}i*T`!V%%)0Z6VGSU5WvO?M1oB;-F5EFAH3O2R@o%-*~|-IV}&ZgrZnC4_B*e6 zvzel@Y0YQT6u^dqSypLf>8`VX{Hr&cYL(5hd^U>%*l-BUvN6(Kr~md3Z#Gj^Hp}zb z!~@t^K}{&FJl*x-UtaZQQ^RapgFUSdY+4KOw9)pbN6&h*nWnPw@U%GrY@D7p+WzL# z$Gq80SJ`-Y+WY`EPEQ+c|NYr#z1hrA*?4%`q5w8dPaAC?J#pHb%}kYzho_w%z{crm zqwU9Dd&`^6ER~IirzHZ|I6ZB&{ex3adb63$Y?cRmS`2KK7vO2}#5bRK#bUz&4atsa z=c#NwJZ)|O8>gqm6Q_^d<;|v6W#i##3j)|UJuRMi{*|NNZ0b}t9-h`5z{crm@x)`_ zebJjuOl9NYX-fjwI6W<%7&>{zn@zpS#>3NE1K2n{EuMJ)_&2=RjK`~M%v>&PWu zpaw1AVcCXM=a8||ZFy~$Swb5yaqF~u;3$v2uqeQzBR=PqgKQ+J#Lfh);kTuFItF@D zneM*cA>$@Dpk8BEkhR&Ug9fn8CxxL`H}zo%#v^9#dl{4m#*zWADhLLpgf~RE$rDcv zg`HQ1>#E#!HLOpGfkGk{8|c-&rUOJ!Y2sjMq0m31|&M`+Pti8*A%+@eHFz@!Z-xgMVsSP^mA=D|{w zU7(aL#s0-y*%)1ep5QvWPRn2fb>=B`BBp^l1K4TRFUGzdbkMG`c(4pL7ArN%%<*|O z+`Oh@ZwTIGTzp_(PpUVS>FYlj+nPR*88R+)L+Q)RO1hX1V7ndLzE{}44}u(o&bLCG z1Mml)rbz3vv9s<~j%ULJ{nEZZIO-}lt~S$*(4|mXpW>>#wji;5&Kl| zDi3fg{`r8L$sZi&F+77OoZyIoD)?j%a4Qb_fLlWYU4XYb!4V6UfKW2U1DxZbWwA1+ zT88y;v;OYG$KrmER`x;JjlhtzU zSp>wUzD#<^*yYC1$DzG2Mg~F=o=-+zUMGn%L>z}fZi;0kZ*Hm$ma;@vN;oSl2}(#66;DzVqI~-yl^lI-OY825RJkRTky>N z_Qao6#;&@GU3F`BrU%+u>aJBX3Fg@f@5sP#tgn1;*BD~8fauGx2PkU~B{KvBE8Mbv zL^zdeeS!5ZHQfK;{b!A5C^KH4x(5v~jNW6^oHLRzfMV4}=Pn{em32=khX{1s99tKv8h zrMpCWGszoP<@o{|<*npG$N7D{r4@)8IB8HJYat=h?~?L6Ln9brjO4^X4R>12osURY zLkbi`AI6EF(K3#qW9%T#n^~zKP9mmR#>>$@6uK2#uTUte8=}NoYSLZ-?gr4R;Ye$- zO!pMO%cFIC$fjFisasuA>e$vhiAtmxx)5l$F^zURib2UdS1!3I$gY<3lGB&q$e$mZ z@&ig+%5RM7Kc-Ek4zQ1mtspMf<%qC=j}@#kmh}l1_{3H?i}6%wh0oFo4zvDGpMHeS zu2ghIZZX-}UccqubMI~5Q~BVX1}&QZ#H-vZJ&Me4@%jEbWR9wrA#+sNi98wk05bO= zcOYYp4zpmQDab!W=74e*m3U7GM+WDQif}n5ePee=f8YMTu1xIW10CreD~dI(cxLnY zC$)8`KX~2l#xLL;W9=}78VEHIY9Q1=sDV%ep$0+?gc=An5NaUQK&XLG16d6O&HwdN zCr^zmo>e(~H|GD%?>)@(|L2i;uFvy-p7-<5ceWt&&(pRe^PGMsGSBz-AoHs6K4e}8 zkm~~X;asi*So8FWqCUvJ-?aedvb^?SEtKa@KD$Vcoc^=O@l=!b)c&=ZC%`!}4ER@IO-U zA4>4lybd{rT#wB6=iqxTaw9VT55Ron1;`7L`TmE<{2iuGR|8jKlOpfCjA7$B|I(8; zn-`O6C@S`Vu~h6wF@ujpC&ZC1vA*0#wxgtM1NVOXs9=|nW3MC^{EdHrwfVCQg;w02 z-FFwItHhQ~{xtzVJMX#f5;8F*_wx%s+y&szZHU`Xq5SzjVnpM?SClRSH~zWD_1Hkl Q7AQ`h6(8gI8yx)q1OB=L6aWAK literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test3.xlsb b/pandas/tests/io/data/excel/test3.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..617d27630e8a06a5eca901b92f91e896fbb4ca43 GIT binary patch literal 7553 zcmeHsg2LK3AMttLf8hJfbQV;t<|=xfCv4OEyebB6Z&n=p>3{WY4DZ9^l`GHj;DzDLaQ6et;EkFgX!CK?53~-+FQ+t`~|=r zhMpB32#a$o35BXeXH1N0HgWGnJ%B zv_8Lm!vyI6qFkkzm2MQ8j@oEp5}=i9v% zT0QyXp^jUCXl8yq58@$Nf_Ts&l8e~oQD0!3rJ~Om&UOw)9<|I5jhDWZQ0A?Q@i&OE zFX=X&y1F~sQk5s)tvvjOD}`6WC(bH7vN3%9jv?c%)hN(cyRA4`b-mAC$s=uj3BtcA5sbQ^l`KI z^mOC5zwYsuY%tIZ9a?Sw+gpWU`*+&3eoyo}qFeH~1;x$U>dlHrQEAV05YRIF`q5XM zJTd-bQCb37d6!?I_LmPo6zz0jup!!cx$5Y#LdfA8blBCC<3pIZ_FI+PF>2UsWbpot z@Q${mtcmJ@n)L zoi~FB{@FqF8EiBkNYF<#Z~n;`S0|W@t*E^`rnYDP#&ibXksdWR7SML`zjjhL31ZS z$Q9P-*9HwCpvbmT7kN@eia8|H^RdOrR-Su3>fOQBnC0dMVYc}U6xq2m!H4@f zZj&TU;Dhm*&Al6IRpC$j%w|;PaU!VaE%Ijfxu-(0)|Y1X-@I>nhA)G1x&G`0Qegz? zc3pJ?KlH(z$9?;OeWwcNMIxprt3|mBS8FMCf-{8uiIo~E;7a6+_nv{CYW5SaUfU@% zA6uY$H=ARZqSu)=dH6;q=cE{=|VzJDg8A3uctPDSvMc2~3n zah6WO9x&RCvCd zxqcT^YNS4TXJBeWnv1)ac&Zm>%NafH6@(mVurGs{g^^vQq{7tVMk=}05@2U?a4a`v z+Y_6*4%X|sR7+%BdA5}^oQgxJ_k{Lgv`iE6grnZ!K9h7%PEl8xC=`dGr0?A~PBrrF5tcd8UGsgLDw=NZ1GpE}$WpRRl8UY`>;tvW*#)72SojKZi?3L5IOlbTmLAXfz8aQ-ro=@^I#Le{aC~6_>U|}4kI=04(V*5nD z5=TzeIJaM%zOfY4)x)0mZ0=oWM8SJ0%lx1uuG`)NR<_&LGiST811@TD5*@_Tufr}O z=JXS1bZdLn1e3l(pq)aNIW_ExK;4Y%-nseYyL6-W#l}x5*_7?4ZW>^DcxrRN;vN?G zA-DM-qPTKz7)E+{ zqkKrWV5-LO+R4r1PXhG=ggt7MV$Q;{gPeyli{%L4Z}{@v991SelOc$?N5K41+-nbf zM#GOEI+biVn-XO*nui5$%)8FCxt*8O<-U7)3J${7JPW1!8X*_OMT`EvcaF1g@ab-w z$S!{7vK!skkh1y1PdL`mikizsM1FnMS7X;)eS70ZWqDhWp=Tiw-QYT=`z5;=RU~!k+u?73-}f|;k{w;guvDJuTFzMsjFaZ z**-SYvTlp>B-FS<%mjgPm~X#cnw(^=wmJQJ+%2+xc)O$|HSf`tHH-oBHBEKQ)7bDKbh~v7e3U~K6N+M+Wa5VE$-VVg|@=46Z&$@2$hrx2`FMbZ*Sz`5-lBK7eXyB<+L|v zM72#vnXID-t6~MS3GB0v%xS=Fm_05DkhcKK4BzXdjKeEs;rp;tW-o7f|eP;K|t);v1#%KY33J_;y!HJQKaVzBOX~P*7xSE62_Z$JQ<2niQeQ1Y-qQxJ)NUQwh zSt;aLSN_V-SS&MzO^U$at%0AA-h0h{U#U-gsnG#hm0;l0cy_nA1fv<-!+hF#(H&au zgi0$EoDD3gBWfZ$u1`55w#DN#SOdp&t8FpOm@>VrMEKxI;8)|ODM;gsi6%t2;sS(u zDfpDn3Pn$(Q1tAuqC6TXmmC^wq-uI&uI6M7)M~@sNJL0 z)y}1E&g7|jxgOK;6kb3ONmmiUPV~C5*Imu8U(%STK|~XI?01Qtt8Cxid>h5)oR^k$ zlE2#6n!kK0-mr6rvgN4>kLYOt;?0*c*B9&ewB~i!SVXBvRUPHdHpdcB27Y(K!wgpF zm~0EzcpJ6Ys3^0K+YF7nCNJIhaXh(1lMwY$ru6FZkZZjWuW0#JMEgbnZM>8I21OGh ze_<;S^VR64oZmwS>@TyEZRLlw@#d1a_|}!|Nbv{pO(taxts%{$2eTyg?t~RGcT@SO zK-S%EC?@nW_IvGg<&v;yGx|(Ada0-RZK?mc$isA-@jK)}xkK2=AVsNKQUXK`*=ZEk zE0xm}E-sgKf-{KQJ5!$Xc&WffT0h%Bj{s*>0LQe(0X`uVMcyF*FBtn)Qk^CdfR*3+5jCLErfjXJaH^{ z3+@-h*4pcQ|cFUkPqHU*NQAL$fLs?cGk~^J}p1rEg zb@FsIx?tHZQC&gcAkJ(0KGc^Ih?hI2(@(q_v<$s#KWH&OWY^HZ%hTGHEQ>bqXI;iLyhUDj zT&IGW79)=VaoNF5=04gBsVP43%{Lg%E#5!j@ssM3b=Dfa#fcg&%kXZV?|J7z&bC^z z8h-qB&Nw-a z&N?#z^o`t;O|wWhQrGA$Lya{mDF|y-XL~>|Jrd2z-$_|{A2Gv)j=4B`XGHIRc7uP` z@aV?K&L>#I1Lhd?qlO<#w*>&PnX`aGX;C+@MAF2WcshF9dh-cjCBQvn4s<2_Z`TmR z_4Z3b6Q2To2cUlv-_FbHKi+?j*&kbOx`N({ASixE{e+2ivCK#w_i75MtC=^zz4*!I zj6_CHNA2Yi5aRUMpUgj}Bkv1TWCwY4FufeHI9m~$*wY*{-17N20)SKn)=%qv$pq}D=jV+OZsaHx<^tJbVef->r)XKt%9tM=ZyJ$tr zxl23gtNsHE;!vQ8f`g=;Q(ZnX`?i-7sOmAR$N{qp->6An3>zoQ&j zpioRTs?k*`qK8y0K%q+Rn1-3J!oyzUMCtFsXcCkp4|Uyq+16uODA#+g*kWmPNZMcT z1)e{h;u(`mTsK;dwp(TJhRQAmeaQ5D?jwcN`ZCt2@FLdou0?xlau9Tzp|RjIuZq{) z$0L0>EoFgbs@H@b=3ZtN*qx~kp7~C}|8p4iF)ETl?nUn6@cxF3>=umUD^hwu4*e+p zPzeU`GCK4B`}?H7p4eZaZzprtXB=Mkb~Jg2@XR>?c-Z^@w*TQ7C`}$#>*2zcJy*DqUVj(`t(HLL z+lL>*V|^^%S;wTR-LdYhe08oe84gGxnsPX<-pbx}E=S4lOSeM;+8(0MY>A}?X&KHs z7*?L|xCV^#s7zX0w}u3l!OdHF`SZOlM=+L~#>N0c2&i64=mez<_ygqWA@N{FpIBF5 zf!=^n#G>J$8uKAuV{#EV1Peu7V+f)+-z6sU3h zFi6W>0Hs8D6>Hx3Gfgh#C1quqFJz`yPn!P~cbkGTzl>>bczpDmbBq9k0(WcfaJith zr*I4okf?yY^$|eq;KDuy6|H*;bF=nV!-^= z$i>{wm6PMg^<3ccK?Z!%T5;~Y=O(C{G?VMJnYg7o@Sb(f!Cl*3I+ zY3-MVRW=CZd`%k92%JqV+`)PjA*8Z5`G#=7^||GIx!-^t?b#)nj+T{>b>;JH7hyVY zBQv|s6Pb(-{9C_=YMBF?bRfJ7o^kp92RUbfdbOZ$!&%5MWbf@7HX;b_0dIUn|C1pKou zw&Ex}L?3*+LlGp$(UVaG)aUzURKfB{_;`C?cT4s$u+v@fo5T|b*s}=)H!qkUkcQ_M z(#f1QO@@JN#<<*hY+OH54#6-pT)Y0_>ZR6RS#WJb15G-M#m0S;^x}8qx53`j6DOo- zilSKFfQjuXTDFTW+3%>wtv42`-eb01`B#`56eY@3@A<_1!rW-W2jr;(SrT10^4_%Q z9Im-ZT@SHzEFQBDpB`GQ5B5*5z%2bw$cO_Gw}oMFc>}9|aA0r1AoEwSlxnLwE^=YE z;mif%x~Z>^gV3H6t7e$Mh2?Hne=i+yW0y%4RdmqAygtm|_uidR4&Q&SiFc;bE^p)R zT~6qYxLYtmid6*nASaZiK-}!~7I|VoY(R=d>JKl^dD7qrW-^WUHH|}rs~xu5 z@UJl&iA1+KC@h4FWr9@koj2B^Y?5KLIuX98AdC(+?z7?Gqr^@9&PP(n4tYUNzd%+? zEy2!w!0rEJurJoC&I`+6bic};v3m+jY7dfl_YMV8Ks3BBNj`jzJ}e%-6A^QOv8{)0 zo5^xZkUN*K9_`cfB6Ll|rJWTLd>!P`ov}$@(bB=JMBo{qrGO(VXW0m1y0Gu5A8k%S zSMOnngdaM7tGT47DKV(O;mO>M8Xsg$A^Vs(QuY}O4%iiER?c_*2<7^I=t;c)Gg9sF zrLHuvUT5L9)$o&xjpu{s2dURlwP36pA-lBadL(OwO5)wxA(q#=uTsq^iq`0&h$-fH zXD*c$?}I%#gyBhexXgqu0;^wDG-tP`Ik|Lr?WeBa(72nZk$yn)_St-UwU45E!J0qe zN40ucc}}&Sd#n&hfhk;NId@C4`p8Gwjq|?90VI{#FyhwV4 zVC*!=IyuZAG4mE>>x59xOwZ!f_BG-Y;x4*3h#7)e3@5UnH%!i;CN+mVRiCP+`O;60 z_YF;tk2WsE-E_fAS6LgMBPxa{lWh+w=fi`!)U}_Fq{lWKAN~?%2GKGm8L%g{u#*6G z!eHiVW#VFPrsd{h?O^HpBQSwRCDFUNa6{MHRw=vp_O*%W5(_Z8@TU-7dejD4X!2@J zERqv*JfezU@i+u!5EDkeq1?+L!u*!dKY)YMaMvsn1jG^z6XF%RNRZfSAZN=f4A7-e zc|T10I4n{#fswPZ`x8Bt)L-kH%a2NNCa@H7b-0Ei>|v1a&fFm6`n_U4?wUAcdZWlwiJnp}XOo%rez zrmN?Tt%V)uH)LNs%f&rL+TDWn;yUyA>hxEngDu>&8i}U48tJKq=xt{j3&?oe4{mMi zRjsuKIzBRu&e4}`IHWt!Jk;&s$-g{3-Dwk8NdggbLSx^#zSL5CI_XhD%vv6j1SC%~ zTkvqiMu8OWIt3+i=dpL$dj>W6T3_FubL2e)u91j*e6*%tye}s;Qa@xa9&FLeSxKt4 zI{y;Fjpvl1HvENvG6ecM)iy;*BMzu&z$D52x{d!$0?;FDvY@36v|QwrfyzNH2ZE** zR!vBo^oQ$2u3}J4P69)+5YE!h zhecv=Y;8u|d@9MNPW(8xOZV6Sgu9Ia`P8Q&DPl+x`M?p-m%PQ(mNJL&@hURrmfE=k zKA*GqL9=_avx1&}m%K;l$@AwkM z@$@te=Ue~#<9Yn8jhcQYoa}7Ti=Fkgo+Ok<7WFZ4BFt~TQX2VP&%ZSa+`IXc@vRRot&-)rqMEk4lq>f-Xr+q*j`Jw|I%9s;E}1NY{bj^EyrG53h7 zcb8cl&Sc<>DGBZKQwiW){p*A10f6B%8k-*Xs-0GS7StFlFYkc<)n;mlt0X(UQRH)k z#(t9h>Zz|U*j(60dwRTi>Tt5h+?TLk#@U&MGjmHVz8#m)jT|*2F$vjpkHQNo6zuAL zH9kk%mpTt+>^<70B89}%L^?U(7ez-*Kah4YX3(6l4)4WH3yr0Dxo|YP{IF=a*nURL z&x~#@RANdhAmZIx->?;jh8?ev4EHo;d$f!heN%^mym0wAl8jFji|du?NK@H(9l>S^ z(k*&KwAh}n8!%%)J3%^Q^jYugxV@?#E~#%LLvI*f#ofN3mWra=62|AhpyFM@^~Q0Q z80>Rep_@ZJpOatsrK(*|q^Q6T|KJhxsu)PU+|zTD!*kwUScG z0!c4Z!+>O8L`BZ>lTKpXOHkiQsy9Es!4_RH)}lEk_S#r2eNMH{g(8vEI4m=nTOdo^1{86g$yFq3X!>ig6_yJM6XP_`d!z zxU3=p+=ACD#Y!k~XyFJsHk+f|LHMI7351uP@3}UrOQ>1iQw%Ktvt*eon44c)Iz7l^ z6aMP3fw0fVqIy_3OqOjeQ?K~~DM171Wn zw0dUy&OM=chSgba+CMZn=P?^HZ?R(EE)P4Oh$G*-tS`Y5Co+{?jwNI!XT7`NfNf7< zTl1i&X}ud0*QRrps4o7j%|=nRBTl53&9 zgdJZ(`5v+Bqz81YPq!((+wuGgi94Jv@$b;vako9st&CtUznZs9gUghkkiGjRTUX)# zA~H?hfmn{6gZC^5QW{Z$!!^CIiNl2rH_=2qg};#O3!1 zG29l=54kRPE$B2XqVkdK&_#c5^7J@3s$j(~h0L&Q<{Md-#mer|cp5j?iHf+3(FWmx~~ce0Kob^A-j3ong5oF3$*neGq?x>-qzk5UuKb64J4u#r($SA z9MY%M7D^BptG;4XtESm41RioHynO$Ozl`%Dv*(*K(WR@M(!ATyLmEi1g6xDw16Yi+Kh!MOSdZwkIE38-|v z)gxD=EVhuhGbJf2?O;HP>~T_J*g!u=#n8ROh$z^svJciA4ij$Q2yQVA4}HZ>+#mwM zf}-d6L@?Hhns^Sec?~M(mz6^aJAuz;Tg@tT#|AS2n%OgYLeWZ4R+Kt;D{7N(#iyP+ z7e>o-z8mjugR+YO$}bhOQ}wJuD729y+=*l#?23W4kIL4T%L)^tO&#Ofa@jUlnS=^9 zV|!0XurbmaaV^1BOn@-a(653P+HmM(>*1ydYlNWc@SU5x&uzS?Bp(L1;dkIScfc9w z$XlB6Ni6C}C6u^)ehueEpRoKI7KDmjvcZ`LwRzn|lOdT%3Pom8OV?{x;Eqo5DTTV~ zgoFAau9$b6-HI<|kgNK43F9Bh?N+T|Mq{5T`$Tkz5RGv~3#Ie3I6)@(~XmzH6i$4uaF;6BR=&Kt!RY`Pxe%P~*Z z^ed9R)dThz;>=<`(Vj;F!#F2rg%KgTjhRSEwyB6GW=+_MgEmcu#3x+XPdenHzlZVd zW`~G|e$aagH*6JV7EW-;S&tc?oE;#LFX21lPo>@yuSkIW!JPyti6slEX2%_mK#`!L zC~1R{B@uYOe%~lh8<}_a@MJwH^h+XIxmugByOmuiW6)JN!@xWI_2Ze%@n7P#wMqr6 z7q(orMgRav{)5*a>l9Zjb8|OWSS<^$fdLIwObjTHoTiqx=Dfrw4!Ecy#rsOXW`=)N% zz0}!mveHMm>u}Sk)W6iNJky(hGA_Km(bend)r=w*^NI|gHIlRC1Uop7P-E9FsTi#} z=`nvwKYqUZWTr{t$p*7h)V+W8yD5Vf2z)Toq_f?z5`{HL7AKdZb@cwI__bGpmt%Xx zzR|#k5c4HP>?7SP$(BycQ^ikwmI1&lO9J^!3&^aQL^OBavzk}sppQ4+-&1R-xXTCk zPBxgV?|x}{+4sZksxZrY!=e=sMrxQjnrgT>I=OP1I=Yzu&d>kLWx%E`CecW_hl}9D z3Q~vkpl?QDjkMHaP@I?$ZdTyYLq<>iDn)6$+5N?gdi^q$uKxAR>tUfZvzoTjU`i7G z8CH3QsKDuH;DmgHryw>CPM&g>3aq6Jy%6UIbDc@a5Q8nN z-ONG~{dS~5yLHN;=-mZ{&6^3kN{gEidjONO=m$Is{1NUYcxf@*2_VbGi>bGTUjf0U z#S+0TL`e~=jui;>`xOXTJT60%l(mO$F(MWi%h4gej>BDZAk`_AW)ks7T@}ME{pB`K z-+Mfv(0GNW)&#d{qct*LPS7QNdq7Mm8WJ?rj2Vx)&% zm_en_75LXHbKyQ0SgID9=2kOAl)l=VRj+^5yW{aSmYUO0jNt)QeA%dQgjFlsoL_1U zX6E-4=kVbDA!<(}-LL`!NU5hrB|eRBGWDYf)l)b+(oS|=LH4K81Vown4mmo4%Jyl( zb@63+$$bdt?)IS4)&To1V8{3g3bs7_6G?Ysjxakn1GiJdPeZ%eIOiKI48|<&p~kV` z_wq0FyrAl8lm`L)-RI@gP>Tx=#gk_fe(_WABC<}4|e4`r=x z$=rYrkUVow*<_uTx9uO>krU=B$v4{FXyWeHj#&97nddLTy|39^kqf-Py%^lM8D_sv zF8ZWq6KvE`bA#~*gyG;H!q}EyZ*~4*vOoG?Yp#BuLgiYA52&tbn{LK6OsR4xfOBU#7 z1wR+ve=4}Z`_Cu*w*>!L^ym8LPtjt+{|@~h)zHsc{+>(!)BynG5d#4K$gDq$|2+); hRh$tKGS zzPs!C`wQN^^Ld^-@tpISd(Zd8{T>Zv6jWjWIsg*@0MG+W_A;#ukpO^rQ~-bofQhUp z>*(NS?%-ymL7hRqGY+_0&esUwD7>jhkE9b^;O|aamv{D&`6^KOg`IpT3J_vBR zw+W|2w^*!yp>stIvIGHGP#v&BZ8Xk0c6 zgRG3W^|GXb=%Ev{01exyI+Ah$hcW-2`=C6Pp@S|u*cJ0C5mnDJrCD=UT)deY8G}97 zNHhx)E5m7hc5IK~>P{P`{MH{e$Dafb}SKnFdOf7tFy|CYG# z&PTXj@ZMp{R-nKQIskBcivrO2n_Jdv^D&+vu%?3WI&6en8o8L;x$^S-xc}>p|HD4` zm!+2_sH%7K;e_r#ybkL>n}Wp?$S8YCKWw4b3V5!#h+iL*$3zKhXQd+0q6kJ&2y6|w z9$bJ)#C+|i|MnSN7DY%b&e-5t9-4aT?2f^~;*#>fxoouu=rMIV_3gESk~f=2YaB=E zyW$+>fn~-ApAO}!3C4JyP~0IZpb90q_c}(72rd8HqfAp(xrdLS75gTP&okWv1e|l@W!TZ) z#>X6}V_#sQ7YsOArsQZgcsi}ji{cpSy+6|DaVCWK8HMPyle0LI5Y>msZeJYDapZUe z1=D1=oCd0ZMnt&tb-Q#I8!ye3s7W%RpDT-4bp4!-nLI47kU{RGX(AMCGs@>KVB`9c zdJut`p}Mu_moFagltb$p8|l*6E#S9JvJ2nP-$1=-$B)VHC`#aZ1I9Ne={e6k<-cJb zy}!0l^&YkD%)7*aQsO!+WKVE~=e};y-d}Z-xf*0|UpV3#I@z~a z8|a%_Mp*ivkdXwUZr(+}VH^R2AkjY}>?LeW7J=W0K1*L!<-V0ZM zWVhO$rE3ybW~U(G<}F4+KFQFoH0974){tb_R(SLt_NFeDZ3g>y;+)y|wJ4wN=OL>a zE^QnI(5oPiuJm=r%9eIEC1THj#}D!4A6UKvv!2^`H;gnVW2tvDMIa9zz0q9M)s!00 zgL|@dVa5enQ^`LfjgU8B$A`G$&phy5JHWWQ9efh!Z$PdUw%C~p(d{VQv>JMH4mTM% z+e^8Mtb^cQi`k``=#s4#DoJ)}1zTR}yi75tDq3ZTB&C`aoW4+2yoGx5+(jl6;4>3D z53G4v*__pu>g3YywVSebP3LZ`M*iWBx6k^U%UukebB?@mKbn<`sxz9koTG<$rmhp<;GN5sNNjNjaNsX8%h?2VoTSto`1qh@@^*f<91 zn(11c*uFx2LfXmj8Z})slj&F<{F>Dn+@$7^tLjtTG*|Y?@wTz)(Em9dMt*<&{sYX?i$ zAAt!xB8}C>M-Z~wxP_!lN$8LR0YZ_yQ#<&wZM>WsiMuIw>gP zHT6z93C>}BUq3!Z<4v=85D-@)R7_CpJYH&}k&-jFFhGY&rDTZwQD}r_JPU7+d(%b& z6q155sL2*B0JKg!{MewNLND-&zOt+y+X8;Bzjf5uNZ6{qoLDS>N)p(sdusH{#Y90J zo+cuylvs)%M}*%N<7Q=UZ_fMMo&U#Lc67#~C`E|dNH6c>xO!gOTG;U(qWjueF6^+- zZ|AQS*PF-Hq`f2`XyK>VNHERONJ}xqYCYAMMT4zyANcB8wSlKp%y*7Rpe?bb1w?`37yi_hQ1I}2g6>a z*d{A!!~zxdS*7`3wFlVUP; zyIrED9uhsCA=?@~XcAT}pwo?%M&6Wh$WlZku77<1%4zD^ka#q{tK~;}QUCGB*Q>!oet8pH-nb~0Qb6W`Rf^`%7^hBR3m~(u%~;UV zQLA)b3MH>bJZ|Yuf~?9@24T2v^j?)40B|S&VSEoTiaaYDXyzQwCiXC*R=E(!w1mSP zE$dnUA=mnPV00tlULXN&;G6JKHtcv&DWoWgM_XXVGQSu0Y-);*_l^JU(HzkRyta=O zKPyY(d~0pBI}zi)MMHF~INR&5)JA?+b8n0ScdifD(n;gms=hSE-kyf32V7jZiW9Kt zd97Zg#s&Ibo?kq9b8`b@#D1Tci$X2Q#J@hO?YFb|jDJ|wyVEQdeg}W7FXmUQ0?5>xl!%gL5^~CF?Xy38Iqwekax&hPswc=&d zM+|yi#qLyh^T`|z558u48TA0&je0sexXnWrkVIvpE?jA~)?K zEv9fk0d?-`_tCE}BzZPbjSB?mu0qrLn8Y5>t4B27s!i6Gxam%cvsbt2iWlYk5$)Y) zTe$~TuQ2tpoW`CN{~l1ZymxEZF48)*Q8^!?5gbZd>R#^=C&B|`eKFx*o$ZxcIwc-< z1g4S7n}-y)!fiN3qE-Z!*~s2mM{3csntx_w$o-Bv@+`tgn{u3eOjW%}(3A>tQ=(%Z zamc=4bITWZHY>VN4Jmc;OpCx!$hE3GFZGzU567VfM;goyzMr2_n{x10l=}9am8q^% zh0q;4qPGT^d*I15IXoI7Kh%}WKEf+oAN_d{BxMf*J-shh_0^`OEj1IhqY&$nM2hnu z9$LzHa;MnLLq`{uDM0`U(GU(B11v#bL`{8^N4fqT0QK}&h@H5mh_%e)1JH= zR-Yx~OAFrXZMGpUcy5?il9+ykS|K_=P}H3*Vxc1AhJ(^77cA#oIxR)8r0c>RpWGNj zg$lzF$Wl3YxM@72n_pC`r*W>rS*EUiHiI#gWrw%A6xZ7~ij-L-LXiJzxmXD!_D&c| zw$1toe-P0~ay;n5vxE;`Q%cKTLNz!K%#>%VWNUt9>BOANdH1UW9A%g1ji9I-ECl1R z&a+$hse3J2%462KI+H50+GbOAQCsPpM--*G;S^;8TaX2GLn{N@x9;)9(;UtZru;*o z*^fBU1&bAXw*|O_#2tm+W`2Q69m`d9I+juxpZ07+0yaJG+L{MFt>D!DT&hlcIdjLu z38Nb`0; z008&*gzV;RXZ~9z&ezg&Oy?sGcvE-#>>`uGsy_jS7B8J5#dqvUVo4 zh;An(COFm^CbrHEc6k1JwLMgGDD-X{9NJ4#V4NH=SEAjzVNr-gR+VN$}beN zQgp3?skG3;-AUw`x9>r;4$4=T$_o>sOdVreb2!&mSjFA?+QKlMm}_;`bT``J&(Qb}7D)L$B`J z2F2ZfuwA{36NPuG>?cJv9w2>5MRO5gd0xAWj+7`Um2E#1^5v_@3#8h+`bq?sc%t{` z44g+FWfOu*tvMIFFDyfOk60mxkY3A5-fP7cJce%4i&0OGv`ey`l|8O#(u`suiSGL% zL-@z1h2g&6@BM25g!PNssyPp0q!R`W7m*ofRw*@6<_-L33GSb{FYnEKxmtsPCTkNpy_@2gdCdl1J}YZL&0>_2$@aZYiy zGB9@iwDN)rigO4C|KlzfD>x>F#Ar6cg4l$4)$kCpvY~e?FYU^9P3mdt) z!h2AbTNrlo9E|eCa^1gAt1)i6it#*qeCmC_%V-DtP93Gq&2b-WN?}wcsUi`bdh!}4 zXj!<@NSpQKiVs=t0V=7DWUpwl45|<`nZElDmH*bb9y$AIFlHjgT!HL#xN24)0mP<{ zRA$SltD|iO%cB^NL|>ndCOR4%NOnpLNz>--)Y&=<+fvHO#2mND`ywVSR$JmnM&&a{ zG4cKB`$QY_qBZg*8#wh}*O_U<{PhH>H0odK zmko61o{Zhygm?BhdNpI*i+)K##1X;Ua*PMf1!-*CB^KXlPJASs+((q>K9OOZa13Wt zioEr&c{{1!Qh*Fan{c)}Qlhd3%j4(pydSweD1PM?@8#GQzH8L~A=rFT5${0fQo5xB z=S1<7kYxZc(~?*r!=hluOe%^$*P!-g1^DB&_qUW<8vcrbonttw_02CWpV)Y)s)De* zHzHaA5u}Ehqp60AqmwJIsiTYe?{xgXTn1w6q7#giyZMMeETgr{4)~@Q*2>E)1jXJH zBghOq;AZjEt5#GZoY`GS_s}a>>Fis}xEc~mUHg<-7R*hiXRS>~_<;@Zp}u&sSxSA# z+bafn`xRl?G!|AJMpPjjE7{vla7 zK`an#yU2IbIsH6QbhHY!57iaNv!jYUd^4QbPfGYr;T{syv{)MILXsH1;#i5oxLb+R zD&R6WxJ$XbMdmq7a!rS~msyT(y3p`Qe9QFw8nvBgkLIexxS-j)Q9%4th6t%kR2(ynj-U7Z-%-r{PoVu4%~9SPQ30 zR#chbQ#3PEEhr+*QMfZp#TBjGo;D0Cm?8!ZluQVa&c2`V#rD7;pcgY-GyQnk=Dr-muK z?B&jkdc(_}Esw7;w7iD*n3!qe%17>oTfOI;^-HP6$@rG+90uJTr1dn?3C-6pDD%{) zBBJw6qP-ukdV<72-od3K%Jo#1m?R_5AzNEi**^7deO!5NQZLGxyFIwIKNG8^L((AjmkiDo_F_^isA$JYlL-Wiz;gok;+O&UUM+wSN zQg~-~tx2$5H*DpbXr8x-R8qUX{2=i5`}qL;dWh>bspylM4b-T;_8R*So(~C`8$q`G zy0Q6($^Mvs(dN`p{=0&|w_yGd{4u8@{NgVSnV$v!PD}q)@I8V>`Tt4kpY{CARsN}I zAJM!0iMjk)_-6*{PhkjR!+ym_{jA|<+UHLVj5xm}fqqu-bItvyf(^p|{DuEk;6IE0 zT>ktinhN^w(*IEm{jBBhsq{}B0KiL90N@{q^=I+Fhrz#!6C;?g|BC+}6g8C55Y`0% Qun@23h}=Cx_2bk30jZ|Q(f|Me literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test4.ods b/pandas/tests/io/data/excel/test4.ods new file mode 100644 index 0000000000000000000000000000000000000000..c73a20d8b05621dfdd301f16a975576ad6116d21 GIT binary patch literal 2992 zcmZ{m2{c=28^>b_jcB!YHL8{>MWSRZliEgwN)eQ*JtBxogjkBA4IS%PTGTe^)KY6v zTNS0Wwo+>Ah$WOpNXnGd*6J6X?>jxy`R0Akd+vMhd;ZTo=icY{Jbx6Di<=Md*N_3+ z^3_Rzux}35A@;)K@YujmA{I>~`rEAcRB= zz@j}!-dJoPiXBrJ!1FbN48Z5v)BB@b0DvnP062IPDKOL*OM+a(`*zz65-8e7+r{35 z9JSw6LKDodUmhiQP?><_AW` znZY7&RFI+-0+=QxWE9>0c`sx?Mjc>Wlp!h7a|ddj6Nr(7+I zAfEY{>ca#b@mFSJf>%qf-ZbHJw;qU3ShhL+q5;VS>TSAgp-dP2mIoQE#zq`r@`a0+ zS6)~MpX=67j*bR8L2FAcwA^>0P?Bs9NRr2`6ivvBzsuvCl)dtMv&+NS)h@&Gdk^vp z3c#htXq40yM#&X_ukkc;fKP0B;tEP+#O@^qIX=q=_g%N#UcT%$u4i|H4xPI5c~-oR z*FIShEPTN`@;t4V>cSZwe$ji@>KSWivBIU>Aj+)~LyX?<&TB~Mv}ipNq$VK|<@n?C zol7y-k#XWWz0L-viyT#-^`9E#dVEPoExk|#&mj~76!@hc^usmP?xRD}%-haxoyC|y zmWfVseQ3;d1_zWMVV|G z>I?Do^8HcANsj;4Cu={ggi%L#4!V=uV7t-Vaes$D>#VA@5fQGBV*-8P@^g3hlFWl6 zg(XG1L;3kE#X3}`epype_?R`O`?l4vkeLNkbBqa1zePtkf3PU~^)_@kBQq~3f6OJ_ z^@x?NE@Pu?lPk6LFTE6#%PUiOdb{{iIy2q1~c4s55d1o=- z0d%bfcDODAEwa+(UQ@SH<|Ol?|9oWB_2mLHPSYpL>w=I3TIQt)=_jy>9!T>rC@XQa zP2sUlwOF(2{Fn-u7T@611QLMJw#|n;NykcR9lSH&6kUpr5U)K><&*%8st6qUWIJWX zh1^djkDkb1`mnwg}T~B33rHlWT2JR@9Dsd~H z9Pq!_91W)DEqvg<0zGxUeGQz`GHgHOoDCm{;yrYd*1}pF7f#qISlPr(3qMv>oUVuw zSe4w;=;Yi$T>C8d5zHhHBc_H4dF9wfWsR-|GadSfWBxCI5-Ibz$+Nl~4<*Mi)pRr9 z+{l9X@lRD4Z+KZKey{q;_Ck+qqdz5*4fdBWT!6o$U#K=G9mPhUb{GI)(+|M-5dsft z1)Kfp%bf@8w}~z5i??Vb!JuEluSUW1!2*Qyh>-4w9%Hl|y_bRF@-{rYbz;`7J42~s zQBUQB0YgAhbX<&7ZMSyA`>nCbsqCq16eEAR$IlCO+gvD4`A1@TRL%wpV4!fDwmwhm zl{e%Zu$R2CphBEdTPI)2?$19B)&gG(&N)qJ+OTflmb)xDXi2m3=b`k^aCEF`D#7mG zW(oFHiuhBF&NuZt+MDwB%Os1%3|rG{Zs0~a8W;xRWNu|edKeA#BO*U;(nH%Dl(wi- z8hEDO^XP32pT|$8>uO9?GCPjlY$2B38&P3g&96e`nNHm}M{yIf$=VL#&f!@x>6L(M zkp+Zw8c)LlgOAuMU+fdH5j~W9PF_{YT&mXXMi}*GoWQG;KN3pMCf~{$^c%jVGu1Js zEYp?LY8cz^FM9q=_{n{b8u*YS!#Vi}C77o;`CbaqDrd%$7-^F%ur24bNpDF}eAw-f zx^5)@ZcNNmBZLJl-ug@lzgV4-^ZWIstB^pE?9G(6%2`Y`ICNJ}^0C_E!aM8wg#2Q2 zxqVEwgvjwAFNIfIDX$6&)>W0_$1f@PnKC(d!doFxHvSQxO9O&=X{+#=*E>p2UFTVX zuKmFWH2#y**nWXO->~@<<^%vl*jIDoONOc!ED%sUnt=1flD-mKnQG|6b(Y_F>(*@N zHT~d6$_n|6F!D#hvk4fFqo$mB@3fu;`gX9D={@)b(Y1)sDDAtI7TTopMb=QO_bp+c zi<@*-&mLvHTm+1YaRRluq_NP~Bd5d)VKRvnzVQp)O%tnFh08aE;AApGzIiIu{V>u{ zaEmnOJo8oxUYum)ntQoHV!D3u94z(zq}^N(5JNVcnAkA9ssl`oOxCr6xpKS=c{r~r zcV166p)p1B%0!ZH*ox~I*{gPvrf*<$if*#6(NNeCZ5qBeT_q0vza=Yc^=`K{2LN!L z{VBdWAZ#G|pk5_en_G6B6mQSnmOlPo*xuRs0_C~5Zk9MSQhxSO1Uz0?v@m#Y>mw-} z9=aIO?sB5HDHdU(Pi?I$F;FQ{afxM1SarBS z&B(xWDNuRHabX&7ceq6!1pVSTpFu;wsG+IT?>kilBZuB$HenK#`xPgeX;WsR%gQmj zr#t{a!#ta@s>ghANkEG<6sRlke?o`Pi_ zc@cN|!D@CGXenb6%rwGD%}7mro_-tu zbD}8Zf$HDg2{3>7o literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test4.xls b/pandas/tests/io/data/excel/test4.xls new file mode 100644 index 0000000000000000000000000000000000000000..10a6ab1cca6a4d8e7bbfef77d8bfa7d9caf7b2bc GIT binary patch literal 25600 zcmeHQcU%<7^6y>JN=6U`6qbyVqX7^V6$2t7Cn_ew5@ZDh%yOW@sb@m)OkkD+6Grq@ zR20K;tfzpeD40DDv)*|qC%3<2 zlT@9%6vdeslUk_3BAICFOyaG=@Fih{OJYelNrdx%q{No5a*IX}l+<<%29z#Fbux(*JY%Bx)e^O!FZmd#J)IVrWVK80t0la?TB83~N-&6z zu(b^7axjRSu!K~7U|Zb2%B9m+F;u^T!6aRUC5S9ZSwb!{N)XaXCWf&hnlIGQFS`Q6Z+{1s$B|ct9~$w1t6WC0SAm zDplN<^dkKUnaXbF*h?VkRJNxGsaaleqM5xJMHn!ZZLH`(dcq&Th}Q4L7OmKeEmmVB zu_Hqz8G}&bY?>h?=m6&9(<*=mv4s)WDuA$jiwvyNH8;2 zG$eVX3)~1!oXvqd%|AoKxJewit4Dq$T5xxCJlqqUqN0F4ndC#u7@@?19AMiM7cv<< zIh&XR(H#Dqhyx@iG8NS2NkT~w;gCqGj1A=E*O>%$9uhqmA%uM7gWa?oC2XXFYZB1^ z6yA_eN4G@b(8|lABdWCW|DWj$9G}92DbS}9Y7+W6NRQsE!LGd(?Au$xp}iIQwYP$&SOpw^ zqE3L3!Wu~P8ZajYe<2*x^a@N?vpd0oDV%;vg1EE=gzqCC#I-FTF0SGrbSLNrAgxv+ z(Hu$9om#k}cLRUS7gZQj5!#P8uDtz~FT4FVVj#IhSvm;+syvY1;vgIulJH;Vib%4K z)}8JEqC#~iO6}t_M}{Q4%`G4*%}q>AP2~$C5bSmYKpMOG=p;geThyT{g_gpi3)L6O ziVEc;U8oNqK1c}VHIA+1>^A{0uLCMgs|qas2zrpQpYG@<@|nxclw`|#wX}?~f5=HS>AbwW77(Eg{QJ~RADG@Cg5sZ9uXW zeV~}YXs{I$(;4Bw|5j>8-6mI}lTC3hNu% zc=Un_iY@L{q7|rjY@Vo76Lq=5@hIuH%%E1Q--e82C)0r?L2zQ+3L+sL>R5qrqC};W z1c7P!*Fkvx@NA+MpBpAQ0w!9fapEAH-M;1TxZfA|J%EEg?j`Q_;Z7oFOm6Zs(Up5W^sJb(EhreYwFk**U_kZfjWSo?5x=R(;TM4hN8a-!;9Ht?13ntIw0 zz9p_rAR8Sz8y877W^&jFVi1ar5+~}*(_7Nnbf&X$m1JWjhm9agq1Y&MqF%p#EuD=n zosF9$n_hC*U>wt|G!;%1{{v4t8$CK3cS$x5a@b(F(9DL#iK=<@K{}f*bT%H6Y+U59 z5lm|-rFG&&J$`#vIvagt<0jwIOn{AB8!V0IfA;np>1?{v*+{W8b2)58Esf{@sr;aH zHr?oKq*$7@95$ks#`C{-@1Arv26Q%3EX`I98&ON+`SX9&c=w&MvA4m%3&jFX*~Zc506V{(;eA($hR~$u<>YvrMbA~T>MSI21XNZGHLN? zJ?LztSek_#Hlmj1;`-;|bm?r2>1?D}nvEPbqL$|3T2@vjos9{djTB3>lfy>T(p+5m zHBr*pu<2~1Sela@Hlmj1;<{#U^SGh+jVYat6iaiH!$#E7TwM3wSR<`9>ckAEkuA^7 zfwbpl559`bXC+K3a;l|+nzw?2MbSY`X`rH3M9AwDXX&68t)O5(bWlqgsHn{lf@=xX zQ(auk>M$W_X+BsB9keG6RMbLXD3uWDpt#wgS*KtdbWnR5sHi;zwD$>_dC*T9D2v#@)0GSgi_Bv674Lb55nq7}g5}1@mM5?hE{ebkA?gvYaT&4M zlW4<>eOSZy4dX`4=7uH5Cd6m47K;Ni_4#TdIFI zxnf{47(wD-YUm4BT=)}}N~-w?rNT8S;$_ogmvuG6rqk4t&Sp}1$Ab_p0q@2 z?i)eox72V*Q-K-|v>GaWb%`3{yt)ua+CV?C zL&!YX6o`EvPxXHS2r?gZ9t`5l1AD;5D`D?rXE&QyCukci&-f)IfT4Pd<1!5Sio_3; zmOwLAToD&y0W%?%ChW3-2x1r~EHXAenhmi6O;81G(Cjb}2yH(W1dRp(1#ck`P6YpEk#ND}&LbcGP;UaUZqZQy9o2f?n;u_uP*~wIo&!sW#0??x=9Z2@M zsLMwWDc0vmXq7~e6G|e;2_+Ha&|&sVQv%&th>M`41RW6wZBA?n{843Ss-9@7!Li9) z(3Z1!Ed?#;o?x(!Wav25m!!2b5WL78gcsY8H6A{esJYe{pf_zG3k@7oFefP) zss{`NTG-5)Tn<=aVpw>1Y%(97{4BBz-f4sKNCZnC5?nMN`pv&gvVk=~eS^rI@5V7n z;7A47AgN3(Cz_J_`&#EgZ=^dg(uLnp--P&NSZJCWJbxx9X|lt-8QcTw$^%NBw1+ed z`?BzORlcDx@%F%&zmju%kCV_|K{%o|H|M&1#Eq;U5#tQHqR%CXH8U! zN;l=^k2v&Ukgdf_BfrUc4|4c{ycrI*rxZgwBakZ1m!LKZDzFp zQSUXGi6)tQDjWMS%iZT#r8T8#UYk9pw%f}U>-sz?^S%8{;l$w{X+=IEM59#eP$7x;S9KKlHwTg6KaO$CkcVWudq zue@9N&r3j09?)gDwPPx7`ge4|bNbrr(@UMym%Yt6yR;%@Ol4^|%Q=pDnZAy@Q^+Hm z)EG0PdqywsjlQa^dWMz0=WWAY?|%wcetxw6Ia`0rKIPKKzCPtKkLGxvPhC=$u_xb> z6*_O`ce@wfE1FsySh3sdnzK>fuoF%{bk&6;}K zWYac-haUb_AN7N7_nua`wxEoEtkj9OZS;lILw3)GD8~0*?;ra#d}o2GQ27Xo_S*o;5dq$N5PyZnv;kYqRtzoTFTZQ@F->H z+{~KZ5oc_MgdZ(0Jo6Xv%{3f&bw}m)d$*rGS=#&82YS^xBQ+-2<*I0W&$;HX{N%eu zcKJpHcZYSqY{z|cwaahy^SYHCU#=SQ5vr9pgS~i4<@u<4 z>pe2QsHM1OJu_HXTW8mIk7eBWJrxc27yWU+s$pp5GusC97u}zv>*}AZ{g9XS^5MKo zS1h)jsJ&PE?aoL2CheHF%4X%P^2yWQOsQ#lx3oOQFTKx2%{x~YsT9=)F0TnV^0S}Q zs!x_4rOuOb4qFUj24u(1%H5&+&Zr_hFL+-U^TFY_SBz1NPw=;jOy9aQx&DpA)&`s0 zD@)23>x@#aboa2+o3ki*1MA#F#;Dwg!J!U3*C=b>MN@y$95SgSV?$_AYW%E*M4i`> zLxZ_Ts$Vs}a6DG7e#Nhgd$fx;&-$a+gZ)KGeANM$tqMclRxc>3-Ok)m!5X(Eu_C2$ zYgGQr==iPiaKcLZeUL`lQ(y!u9)ql<)uX;)fE~ zcdF|j#~FQEu($5x3*{+`2aR~VTO+yb&^Jq;O;7qcy~5kJxPD>N0?VXxg9f@kt+veH zba*^>e-W!LVz>$EJZv5Nh`GM2*Ff`v$7D?6Chh3${=PQF2HzVjdlZy+)A5@}=fBK< zF>Hw9>uGx?jGR)Eo}fOwzov<0q}9YXS07eavky)E@~QE%>+_E*^Yhnk+upzN*`M#9 zRQdVjpHnaO)*C&$G~_|*`-YR-`=xsS{iZtm(5rccPg42y#~ys!RH>PMxp3FJ4=?jm zy;2)L zjfWqVc6m%v$+>VQJFVnSTFCPT^}?l2o7KWMf1cRQphV$H%reVcWKwogtd8-oPxhvK zh%9^(Z<@@_@f>hu-QD|(lz$n)by%ux`A=FEDq)I;b=m^{K1c1^HN6LY--rGZBE|oF$va} z7R0?iIQ+9mZE9f2nHb{-mu6n)dK_+OY?^Yuj(_&sgfkl-oSS|q-Oz1Pe3z=a(|6DK zg`Rw)JE(quoz4B{(;I%>>Am`IC!5zdBW!;)c@|~b{TELIE2o)Niam|2!yfCJT^zVP z>hNX9GnHlQqYLwEH#=*3rn+-K+P_aW?s0fl37}t$*82dNqIMSSo`k$ZE5Aiih|z%tiC)X zNn_5-y8|L7_l$X!d%SjH^|CC3ofFEpmSp>MAL?4r!=d5q@_*ud4SH%@<|`#kg4>`>MS-l2^gay7kFZAYTvHRr{>x17p6 z_#)w;Gw103^FhJx>(`j~QyX(@OtSOnMi5EK?czBb4jM4`Wmv?eFB@Ak?c)x^ zI=8omJ(nGv+`u(A=sU_S#Jei6OO5;MUQxWmT!pwJ#pAPYkGJ<#-}2+!Pr;{eTb?|1 z^K?XF}F>v>ZExa981_{3$zWIuK^Hq;>rELRWdyN`2w&`x=>xs1= zjKj8SDm&i%d?Ehq^stztwITb4?H_H@c)Qb_d0LlttG!+PJb300BR>6=$-bkladVr+ z4;nuWd9(7>tu*uKZJ8VX-tqM0*&UBnGJ;Cqb}8N$(#!43oT3|lHBCSDeB;QvfM=#| z`Z_~7K25JGmo$wW&D_;2|Z;ep;>uz<){*Q(k6Z^7MryR7X z@4Y4CbjaT0XBV8Q_~WkDu^H+Iyna|~r>{Nba$JK6Jjo;<)EX6;+fTq9BY2Ek)1GyL z*$MUZ6M0-W&sIjMUUbq+Z&)&ym5@+%T6>*U-SQg;Zz`leb=UClu^98}wML;G%VAmA zxy`X>Hf}A`jc~c|9J9}m)vx}CSz9j5`03S}0YUl!D{4F=4^LaNCZYH1F45~o+KjTY z2(oc--jt|mJt8~z#`nRW(+d0SUOVoo+3p_){(R=fu!h+wJKpCo7G1VD8?L>uniy;d z+`>QB)W>i|w4;gskq!LNSp; z%49Zhq&NV5KG#~?3G^v4yQ5d6&yQLw8oji43d zA}rd7N+ZW40!J#roCyPGB{CUWe-<}8lN<6o5{i;W zMK1zOO9+bzv12TTI~O3bA^2e!SxS{or_NSTI*K}e^RX7WwrN56-KVF>^-g$mfQSr` zj{$)gMoq5NQRO52)jFRGYmm7#9YD2~!wZ+EsN)-u0t&(^7ao*~0T(uOFmV$YZm_e% zEL><&G|bRxU#4{@qvqKsc;S1^MF0DA>b{?H@jmj?vz~jzw&(kJUHlErF;52_bQL0jq^1B*rD-j)f!&6-##<9UAD+K!*l8 zG|-`e4h?i@phE*48tBkKhXy({(4m3>d=C2FS%N`Q96X5m?|PXvC3Qkmvc+GLXjs~xHt!v8M<$sxc!5kA-o zr{steE#9VU_+J28_@fMMssx38ggse!D3P($$1wPvCbk{FW{aX^LQ3n0t%A~$XB*&; qV+hAjTlt$H3=SSRl*nM;Hp^IlmJ`+9smGf1B^#FTcIuh09M!l z00jUKXasX{_Oy2PG}G~Owe~O(@O5%z&A|q8X90ki|NnRV4{IP%r(3;6h_s%4ha~P9 zw|Dmiw&9l-xyfk?YAE!nspm(Aj zP2qOl0+M43*SL@1_R2QGtv&{E8k!6{3(ZB95jiHeA2fifLi`w;)GS7Eyqg*fh`j{q znk%zh*5BP7CS{#iiS^+c^cBB5x#U*}m8>i)F!5FOP(BHTL>Xa?ag{OUv|v#SBhR8x zHaRSNX0zZ6?pFx=*ohU8XJkyZ<+Ayik*uxc(LsI!dAXO=bV8aEBvCeq%O6E1a!wVZ zLNNUN`V|WR|B1P<(1o@`j5*%G5R(wYTr+oTM-M@PZ|DCQ`#((6e=R*Kz8}L_qV_H2 zdf4Les(tqX2>i%BJXUt43h9~p@ybHdLf`3xQ$Hm>j z(#6F=!19*!-?X`a33M2?{cj&7T20@~rs12T-xA$`;$`R6-B7NR`+|&prB*3cXjw7v zhKo1UYY?d_n4I~?o5S7t{pH-P)(h;FO?=$tjM)AZj%y4!&qjuNvG6Q6N;gB5aM;Nm zyVi>LMZD97$ZNb=q0-i#=8hD`ArA;Lm?BBD2F-cjJVH_(>QTGGbTYN2cvMHV3lpxad#1V#sc zf~U+Sf&jF+BkhN2C+O&wAzh(F5*J>UWg}5@_pq>F*saE~LNJAk>@XSj7Dh!XQj$eZ zYE}WuZG`fL+DPIK5N=uuK{yzChOKzJCQ9o#EVYqW8(AKr8(hDbPH9duSlA|%9v7D# z5%XzlThBTwSHRj_1975Z;n=`49!K{|O11Yf)K zO1DP~Od8TyU+O;UORlIj;gTTsAT>KMk*d?t+@u?tz*L$vt+)thChaS6uzQWW zv3N~JTi|blbhakv!zPR(88Nf$Ka9x3&f413<1at-{RiY9FXY(YWRGDV?(A{ambs@{ z#wVfnNbNC;SO+vSv?8fhH2HzG%I?9w)5h3N%4eDEvzHM!r?~IOIA%E*!du0z_E(^` zx<}U}xp_K3qa6ryF4T~#FS`GwWg*xgko+bUHI^z58s_kV5GRq7bG|m!5~Oe6Q=xA6 zOg!OQ)kb<>3@(wzQ~F0JsanvmwZ{G~i==OQZfl{a7%o$O=ZCL44W*AWAzSQ{6Gahz zPc3Ixe8w(cMl$=Kej0MU_&bxjAL^|C3Ksx~rvFx8e5aA8owbv-;P*4+TY<4|(0~7f zBIRqEBN@CYrmyiQR(G$Al|OQ-NDm*o-s$$9*(ibOR+LRW0|gK)K~0c%3!V4Qh$PR6 z6=gd3#^yt!Si}iokVyOzUBKiNzr+);gZ`6&!|A}i>7hf#!6m*!_EpA(a0=?`$=$s8 zwOQZRc8<)d$qy~T*>hKovwWkt@4I!Im~WbnpKgbB+bM;MH-pCB241Y3gh!66zvxgB ziuw!|Yk?T2Kj)AGswdoX%gCbGW*D%{(QdHhC6s!jGxjwP$wDh_3Q<;3@dpffgTYXE?A)1E0BV5V??r%R*19Pp*z_ILQ%)@ z#mM^6s$fMoQM(e=Wm{qC9Fn?m8YT6j? zLUjEahy{$`G~9eUJ2Jxh%{EDR~h$YW<^EbG9`WoxPL7!I#q_4S1Z3kFQg7vu zs4PqOR8>_1rz!6%{?}*Jd zqUZ?P6lYR&D&Cy8qQJj`okP6~yYWECRKWvvDZQKRUIFuV5QiK?XVWD*^ebkXSTxVm z9`pmqidZ@WeuU*B8+#<>Ghxi|`pZ*3cw&P=Riu-+CL=th1oEb-T*=0!Z9N4uO`++G0kT3!1c#hxD?otnOx`q~ieQkOFG z+WuKMy{NG>)zDP!M4ggR^(KWJ8p_W-y<&gZmo5F^ScrRL*ov=@Wyyw(u}hMUQ^<7) zn}>nRVxHnsvuvcz^ESb{!Ole6vi!+Z2uL#Ivg7_AJD$u;&)tq%4F$UMhjgUG$%}X5 zAIG!|-Buo`JX+=9_jxv^J~Ff$k9-+KK|X=tyjv6@A0)vY_9U~H&5oMf-r6NXVW>HW zZAwtPvI$tQWPmzNz^W@tUJEb>DsICgxS6}9@(N9D^uT*gHvUFJ4+xOGt>r-7RA-ek zqZBT^l;jBsVW-%4UA-LHmva4SNd{3No74R{;gLYU=F-wQ!qNvnx1U3DpiSeF;S z(bBn`7|ni_Q1iW}Cq!dTrOV^$Dt`>hJGqn&SQNqG5PnB{+JRtCvUgIQ<&PWVj@ z8Hr_ctI~S1IO$xvo)51(4zDwH<7CQiNU6zbpH2*J7*6goTRZ~c3J!m1a&EASB3l)=MgX*5YP0b2lygI8I_BS zl5?v&N>8Lmt{qvV=IHJgNLa`fi_dAUPA*937uR~F`?q506B z-^$ZmGtr45nMzf_#(i*}JK05D@j*U^6rG2$mw1~o6eNyH+{Zl&Yk4kykI-E&M*bMoBaABxshT%bUtz$qlI5} zbRdp@pn%cM4a}hYQ%4?l7Vg#vZBO@G&bHsv%(VWFrmn`uMqq1my*Y0~PtK z2`GH-)wi=9tUdhzt;7yEAY20KXCaF#mIWn)bK~QQpt@*;gfW=Exowu&lND1;2Ihu& zC^Ae5h;XrjySuo02wJ(gTmRMK{IBxEs4g(xOru$dvTa7~1f=?Ssy3AUT~r+RoWgaY zKAD};DjK`7iM&#x*9}v+cmtouGCe}1SDvB~delwSQAvgm$nIKFBIga2E9rAn0;xfo z=1kh5REABo8%;Uay@Qf=@3}k8c77y?N-)RL&`RF5k9yyClhUet6fVr*cV~BAD?MH! zM3m1M^4fy0u|b@9mYk8I2Qmq0S(TT+_+*v~v2T5knpONZPzBy+NltdM;AO6IyaFxG zkWfV*ByD*{2J}(aQ)p~HO5(CI8P%L}vHE<1{idJg@rR;po!&C>EPkpCcdVXU32NiI z_sp|pPIE%_gy8xq^CGZ{o=}4;hXH0Pq&~;%vH{PK>E>)#c+N@#f{pS6u;pj52Fv<1 z{nlnQ_UO9&A=jJ5@jF?$r2W(1`ECzLg+OMoHwC(2kzeoT#>PHRSwujOVQEo)kxJ0_ z%dby3#;^R=D7P5jw~JtAzEz*hnDJMA`B!OxskbcL{jNJBtbMbmv88;>a%rYe8;W-`n#|t74d9gbbbVYrp}qOVAN{12Cr4i7 zUg^!52Vx>y=r4O?^T9I{C1H{6b)mhs65Wlr%;>85#;Q*I{N4(4Eb&@5goQEz#v4xD z*xnCZj6?t`DCeG=6Km?bM%SWr=lDAXcq(H`bB5cQ4!xX2OX|<;n#kUC?HPdrfO@iC zbZ5Bb5VwJWueWtg4+B3NGRpyz=h?6wUgmhJezElY^wFV<`H;S3R$G&!miS|g-A7iB zru@<|J8O&7Q0r3u(Aj*HFK|TAsq}_C&s9Uutj@PYy|#uQsJf>qJ1~iTbq5djl`uA{mG*1q*w>pSEN^uH&x%HQ2W28xwC7P4 zi%f1}(lfrxi7v0)ucB2C2CHRj!i$?wXYJ27>c5TC!UPvlV{^j|;#*FoELtFRT9E zvA?gsd36O>{Tblrp84N`->$V7TKubneqQkBCgQ&Z=P|YI|GTAl9_M^h=?4-${y#3@ z@22E=;q%>@AHpDl@4`QNHRnao_aT0Wni2kU-~V4%;ymE_vi=941<`lF^F{u7fb;3n z4*(TRo8V_>_#=5bk8(bt_<`b#$uBS{zoZxE5q{1Me#ip=OBjS-GKKTv=e_t30DqeE t%Q)}T&tv`UqJJO(0B8)>KacX4!-lKkU`!7HAi{i@FxFd)X|MnQ{|D+kn<)SQ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test4.xlsm b/pandas/tests/io/data/excel/test4.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..a5a2ff8caadd65345e9ee5f8a40aa1b53a25609a GIT binary patch literal 8360 zcmeHMg2?6=4hiX@q;V)gK!$Ea`Nn(S z`+BeUet*Gx&-~`>nK@@Yzd3uawV!9Nt)YU9LIOYoU;qFBdH{V@v#&D}0N{rL01yK( z?ik8BJ9$_+d6;PXx>&hC;r4NKpesPR1Iz>5LEQiE@ju)H6{#cOUS0x)3#A*m4X#*N zEeJzs@6Iq0`y;7$4XkR~T^p_{*B7c&QGm3&(@w{=TRFR~Rp^ENuN+Vb?GLbKx9+Eh zXc^Br8CPHIxCf2%t4`V4wugmQA}!eX1PZ;ZLN-;L!Nb-qCZP!j>4c;W1_B;17ANYm z_&s+gEHW4ri(WE500R$*no~=4!*J2nHHM3o7n<|OsaOedYT>Q|HCCjmk*0Rc#A2iz zrNJ74{dLyLRVH1wWnz*fUs{K130@j(4qtop|)Dr#Hg3`(#V{obWC zk8#!jrbT@M*jWH}_9!M@Ndr>*p`A!U>0nxX-il@sj?u?=kM}I3Y}!~R(TghNFN4Xv z%xoYhAXx$}+^?e4*9+(J!wUbn=GD!#B&8-)z0tOxE&#owSv~0v|tIc!V z#2I`|EFIpR$Y*sCe+%FOzaG?N5G1wdOZAD|Kw!N2T6_OS6@xf8cYB@c4ukcfW11Ts@Z z6_zl_pSDaz3fhhFdh*-3Puv?uaAu@_HPvvn;UbB>%IJlz-?m{~!L^xr9iO8WY64MCSWGynh}aR-7je!yg>OE8yx={gW4@XI&wG(_nfogWae zPJk}>W?PRTKQ*UFElBJ2^NJnp4RvI_m|k!AYS`FVeF2PkR`~O%4hcDT)fpFwo0eKaey-{ zKVRWT4uMqbjP?N0`%iXIQ&(R}fn-2SL*MXjoJrs{@hvyYm{`V9pKt>!}GCu2a2BjrH0k}73Bxa;7agLJcADFGMe9bN-UlROdi zyRWBC!IRtPi&7RS%Zf{e1*R3UNXSC!N?{XD7J?SooAXur+ui|pAeo$c#U#<5cW9`% zs7LJG^VL3t70HLw3!Pbe z75EYUd0Ly!9dh`TW>I+A5WjzBtlIih+QVK*xtdkiR}<_{OOa*XS|VNzG&s+Ylw#clDrtH8W_zp8xIu(!id zjhSpDFUFpy%6ky8CneFr!zS!1lM??U7Sx`!H@EYNFh5i%~2o5adJ1+E7s95 zRfK#3*VGq12`HpSygRifKMt5yw$jx3w6>&x(=eIN84;sVu$K$9(NIJ=`rE%Q7 zZ|t8s@EoD}J6JElS?G6*Hp&{T5^G;2Q4F>5(Q71I7D* zHP|)53Y}-H+;n>7M8~Dy%U^hPdb-mgvXUZ5#shm^?jEiM)|>Kzkg->Vr4Ul3ST1@w zw-_}hN{cEMf1MYu*TF(JKXShlAqJK8vGB^CO#kEfafyt)ux z%tz#s1s?fCaxV<_GTlDyp+*9svJtBc-^&i+%ol`SkyAx&9fHfHE?F4dREmPIjFOs3 zIkSN%otRZ@nyG0q+4{X6Q4250KJSQKjXqRKn>Nz3t@I|I^hw?2m{{Dvh9Km#jEfP8 zczk=iQ4har%IT8;p6v=a2Y__9IjE5KG%W2tDyWcfRPa;5(rH_z)A)D|C9ti2eqYG% z>`llVd~QzEJK##-5G!@zV)igh2E$ko@p&yUPMxx0;>XWxVL}0gQ#-!6$ZtV}%tLFG zt?TG#E@DfhmOa}Eq|3)0GKJ|>JQ^=>A@F25RXqmbO@sLTY7YPavq%KWz+&jLc9oGB?0&tp0P@Qy`D8Y?S^a^@wh&|K?AF;w8ic+oQx(h=E%(h zSJxSy9 z^!@1zFqXc(4QjuT%Yr`Xdzk>Q>E)F@8XeALN=o?lj8-<5VN0A?_>x9og}@iz6*ScE zvcfQraj_ulLhPYas?8J~z^~3#(;lCBCB?msVphyg_cij>0F#*3qIyj0EqJ=_jfcUs z1Y1p~fkbIh0P(&waP_{RdX{?v^+4VqSEqMi=7d{VE3iL4nQNONiIJV>He^!PqDB!|jW6 z;29@hWzhLIR;K!HRU%L9nEqPAfCv$Wg^`{DHYkr=L;U(MDaetOR8Ly0`ios# zXGS*4yArHNlIgC)c&O=6cPm2ukw{b^M>9m|>exScGHV@gOtG-U<~AclkMmB%FoHvpnLP6+eY^M1 zMc6)`ay%}g2-`qdvgALPBm(Y_e)~SI)yCItRwAK+DlJHazs|Q<~ zcp=Y9j+1Vx-auP&Q)^g!&bz6Bn{REWdC*y5KmuKU9fjlTUE=-<8;~p2HjQ|ImTh5k zZ)JJr9`2#rrBVYYUN5tjk(UP((`TZ{Dt0#0!uMdAEyd`Nbctzas)ePPl0Ih9_fW67 z6^u$hd6L?+eL)c0BL?3p2}hsF^_Ij*E;3^O`SxxOw)nCA>;OSl_?L4xaW==Yh z`l=%<+MD>QLd%XKPIdI)P8HF4>Dj)cb&h&E>Qqsdhwa}YW(`d);}d0>7W3Z`?~Tu? z3d^0GUijG^3XX2q(W5pU$}~w^?oh%*^?~{8ZL@)sQHgZB{c9g2Ji4=zQcH35LbTrP z%fO4VVGP6v^2)g^f=RRjlq+114l$mfLKLXE3&{D*t+REtMDR@+%{1E961K`G(L=jI zse3Jn`l+ke6#zI~W$u!OSzpjHH;Qe^pVL_BmFp3yXhpW`r0>UdtdHgsXZRC)q;wVE za=A}0&F8AU`rmIQ#%Q&ZM>PmI9g#Vyu8OYY%pJyL8F;1FYvAg)miEP)VIvz!z`lmV z3VUQFR7dOW51sFb9`U|I(l%(b{gfhjn|o~Xk9B-AVZ%Tb;>r=?P4YX1+-=O=T$PfH4#n${JKIxl<=q30A5 zXYYFGlOh)C??h;qvP?jGt~_2=$kh1Q1!Q1wwZAai?Pu)ew6lrfs%P*m6q@5w9RHA- z6uJ58;VHjqi|rU{U@uoeA9x&MS6qmX@&A4!TL#%G4!U(;_KWv6u8P5z zy9sTqT5ZOzk_?{M{(-7&oH654`7v z7ma5MfmDsbKy{?=tYb#fZejZ1p7|U$3c~Sc6(fOHGPTZ#ZvDHD}k`-xAo&d7)S9!GC7kWq+#;@HKDn9neA?u?EnH$tuS){7=i zZl0r+E^|CZVl*t`)E8x!@1@q&Ot!5?HYjRT$zB}56D>ZX3w&Z2M&-OL?!oDNasysZ zn$*#Qg~#Q;q<(z=_O0zE^xXrVO*SUeN*Y<4N+j^x>G4^4_0G?5ig-_W`4CManj!NR zf10Sb&2qtJ%2?X#QakIs_nwsb_BNPl0X$`_;bwt`)k*bVd}j*HXa#D!rtUZ^oD*Z9 zSIeFfaEXGSg*iOzZ^rF=bbEM|Mk&w2oMOtJux2EYxM$r#jmp-17Y~hC$9VA z;Os8LZJ3W3D-8w9GN!f2p_;%}z^9ZCMbhnao!F~jKdt>VN!Bk}o^R{%i&p@jd<*9k z8tm!J)1=mFk5pvt7hCmv@#>4L;^}1z1&c(g_+*Yf;W@Z_RVo>{S5dO@)ok~tBZ_o~ z3y~-Oc~AI{k^7_lhaF-KmA?k~>t@v-z#nZHLLPtGy80FPR|EggKqz8S|Nq%)fcOzk+`?!G41AaDD^-w8Vae{%Sw`gpT9=qZ#pQfM09Bp8--x|NF=PRtf$Z z<=6E8Gs-O4?@#b+G4N}Izb4q9cmSXgQIh^G-Tn&y>umUExDq0t{}=wBxluy}6*0O1 Q02bo)43W`^XnuVAKkIH+w*UYD literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test4.xlsx b/pandas/tests/io/data/excel/test4.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6fb62272caf091e4294d9b397023b81f8ae7d706 GIT binary patch literal 8344 zcmeHMgObGUxs>^PKbC`O#>fTP~uKEwW?Cq%XknS<%0`9@D|9ATzegYLKBPzX|xN_$TH!`2tA1~EO z0{M1>2k=;)h!#QG0SvHga#M8S5-8ZL4w1jA8F{(TaJ`AQ=PJyg(Z`Zav=z7;aFmhAWhGZkVQ zd$!fWfoxf@kN`_?K=I&ElVBHk*Oc7Pp=c8I6O!D;9l;ssYs+z$H11FrHJyLN?&{!I z5ntHErf$gIVcOR~?r$gnz}+1p0P;6kHfnIvp1`@L1XmpfT$Y9|=60?e55DjJmE-@g z2LE#F6-mnAUQW#LeTAEd!Sfkt0X2dU52c`IV@2t;V#cvgm|Upc#@vM{)$$vao9^?^KQPG`P2nStFEI(HH4cbwkUXp64SqWL!m}I*M(|8-2*>xSuW_9A<`mw9L z98{&@#k=4he>gy&dm5nvqOUcp$bsk>?!7-c;Bn4}y^e@~+Ra)LjfdoeZ?`Xk>^OQn ziU>5>t)vKkh>VX1+ukYP#lX&V#cvi*8f42R5d3_>L{ActUqmB&(mWN;Ycs~_&TZp5 z@!K$*Gb0V3o?V5i!776r-oB;EWVYD6YnE9&L&*vDrkp$`L3<>MkoG$h7Sr-R>1)MT}B};*ZzT*NXfqzcJ9qy%=J0_jvbc4+^ zD}}|-g+~9Ev8ItTbYphKNE)~Mt=ORf5M}+CF3R(Tf%9x%JC*S)8qdAvDd{v24I_n% z*N_>@uzP4#ApIUtXkmi8u$j#}^o?HraecL+ihwGVy|B#{kUI?;jxCjrl-bJVmok<-rdxFrX`l-iG7xIaK?PaxM=@;&Z2C{XL=fREuiF(JMD$20OVGG#1_pb;|IsPCGWl+(kb1y88+ zBlGBMjnm}&Y4hYlLNgX==)E6+SjH>@H(^pU8hSD0ZG9MRTs@Ykr|bkux#~pCYh?xD zU^8n;lItr!O=R{ny5wA;t)#osuyrc;65#?wc5`a|ZDhU-wQ^DE3musNFWMu$iC_n{ z_pr*zraGc86C?^?Q)=hOSntLgWaH(oDQ)uJbB6>y*Y9gRw6&meBJ_yXCf@Aw62HZ! zZ^}D|;7)k7bQ-uC`JG&MlYtou=FXS2xtpCf){H*)7i={LOmu!3*!bv_7E3{mK^Uqu zwP1Elqx&9+b>-L;Fi~`j%TXbB<@PP)?OoHGz!|rWhgqfm{tiehUl@hvOFt|{bEjtU zr`iipASKKfxi3Xen}N2U%P)7IJ&$omnM|Sr2_*|QHGu!W{R610{c;r{@8`|m7 z`&kG$A;&2VKq$Yw>l;Yi?m}3=IAcxbP}0ofAT7q#+R2{UHm;RMYu|*F$O#!(yh*jh zUUNt&-QKfUF@d;{pdD~>hXmb+q4!|6>oY=ZGT*`NkU96zMd-^+QZom_2H&0T zrUjBSyEcfmmr!FV%=6CH*yRJ>>1QG0l4sm*p{sj$pJ5!oltWJBj(r+j4oGl0VErKn zS2u4vbJy=PXMwt|?II^Z03}QU*xp(P6;uq~t>fh*XyLL#_0_Nw0?oWrc)Vo?>)anq z?<+JV+l_G_JDl|Fa8>hx-(B{LTD@}2heXu!N~taDHtB2CIC3>zgTiGjEDa_k;@mOS zN9X&hJ8{9#eG*q7Rm$kWaTCNs_$#EqE&F^^8#%3dfGQ!v6{1nNx^-_f{aNn zVZNcMYfbd!Kx$S!X2Bzt)>xSiEK~biWFi&(Q_B%d35slGqb#6Co&9>65Dte zwiomPOq1@Tif2hXNo;8B;|hRl;Q}UV<0Z!I;zrG}JXOk+kR!wv#&#d|t5%j94o6t~ z(eyV-u=+Fl9y+ihdxI2qG*+|6eL4Cav8vKl-QIRxWqP84WpZDcxtwmWOG%ml-ngkC zo8G$gORaa=0OBJ8^t~5PkeRF!DC4j~pK_9kDQ%tO?7w+Y;UPJF0A0>e5jcR_Y~`AJ>o~W3h(^zOjN^hw^(qO?d4KCv_ZG9YGEsO?x@D22 zm-Dg%m=BD?G>M;Mr3j3kHSHw1-+1ckC;4(BK>mt=;xfSUqTY}bA(=-qcQ*)QV1^Qi zXfczmhB?9Lf;9QK6$54_{MumVBcdLOT|2yv3-W|i_O8?$54-Td5 z9+S$7XQTWwb%>YO_3XX)g~cnQOcU8IhW*C}rg&je&Lr~Pb}B^OCIJ~pa(M5uF75o2 z35G&_kA|Pp&^_Vpz_U-OAWv6v;ybf}O-!RJ-#g`alu(QR>BeuX^Oq$+Ot6DiGMv0| zq~DjXzt87xR_6BR9KY|mzS9_{Jsv|QK+r*SC4uScd1Gr~$8m__YiGF#W1!wC_*Bwp z9$%N4NHWyQMGZ+ZeGSP>GeB=Yg)E@peWAa#eWPrxHrP4AI66;Tx#^JUK*g@z%bkCD za`LrZU?rKChy(h()HPI1MQh5VoQS0=BpF1OY_{Oxh>coUwBr<%#FfX^ZSNV>>}!2} zd;TDg9kfO)G9j_1Te2%J{r=6cxp=TeA4d&|%Ie3^LM}X~ER~TBg5N@*FVbvNA4B3m zkMtO&xL&mLXC#0;!lw#a+j*CZow9%r$mDsU=|#1ZGR6ZD8d0kl6jRee(ha*kf~Fp# zeV$=EkUnHlt5(9(t+clsX_K1EQPEibjRAE|ONu{gFi@7;WANT-keIKEWCSOJ7P zO#%6oCn2d~$ddV>_q-c4&&psq)e?1^Lu%wPrpl^1N4-S=d%YP zQa}S<_~$kM7%*w$#P`!{A$)%MQ(wKY5UV9Y^h0Z;E$gVKPC|=>W<6ix2$zrArSj9r zI3NjF<*+0fB`q5MO`X`i8aDs{tw1564-`X^oeMH^j${&2h^kjC0?{pF(#OiU7HX2H zZyy+b6Ll{X4SvHp<)diO6rNF1B8(2QYWFT!YMhKeJ{|L*7`{?=yw03%Lzw&=yz zPiwu&s1g=$V&g=ZGPZv+^t=9;V;Bg#IsBAG6yH&^(HwVo8UYTtymS@8Wzh9nyG)M{ z^u4;ce46v^8Xuq7j|_sZVgW^p)EfwQK>)FvmAp!qj1_F@JB z2KrPsz3#PN+jNCAFhQ&4`UdKD$Tb1r-T)_au(bQ}?HRgfjFU>`;&t#@0tUIp7z^ zM9$F5xERr?PFhFw;iiW@rwUy-7+q#B9x1V(74tKrSqqb#mJWz4UF#pQdH}KG5t8Ma z`K=>H){M5fQ{ka-)i|=OPc*aZmFfHD%JD{m&*jLs=#epEFkd%N)}VTVOxCD=--|d{ zZ7-+v;rrnXy2QBKXOz;>G+V;>{1+5FE4bb`&XPm@PAfF?!1IsNPWWyb#p-mCe%xU8 z+P2t?OYsL^kcX3QS=8iM| zd|q&|RMY+ekeI({@ywUpe!)*(5Dl-n=k0j4-8R;6jmGIm#qWbGV zc?X_!)9=T^rNAvF zm;400q@w~LTV|T#h9HI()rmeKhgK|K43|kXu)x_ z!!*LSnHS-eBBU3kQiQ?<5_D&ZTC95cjfKoAk5|^Yd{z>7S;vJvA@%KZawI4wcec`j z!WZK?or2&hva%gBcpK(FMOs+n3|SUS8p)k$z$1?ZpU&`*Zj|t2s0eC)B=t)U7U8 zmL_z1KVJn9+f`liCnf$>!e-VZJ{mnCc%23HCG6Y?Cqt4gugd!jDYeNuvwIJiHBqwP z?{r+@0xt0Cx}O8;j4wuyvlyoa@@=iqw{TWKf#Ov?JNa#i*h_dck+29&;hN#-Xs z-c=&@UX$z1@ssRC_`dUP-K@;9Z(3Xs*5{c{b~7mF5oJ`f)9}En_KYZHu~qqI?FDRL z)W8l!!Fh?2yYP3%=al$mj!({gYz}zef2pTN zetRJGR>JHnDJ)o<>GgW+tp8+0Jk?JB+6NK0?ySU=Vl1s7wXQuWSRn?K27h1nQ4Rxd zA|(&$3j31-pgwZB9QlJhVlETQSDI=9I7YO_kk+-htM{X)C6iAsDl4;2!9j&K_OQwV;6HXH8&S)2g~2fdA&F(^q%kb zZLlY>eop-f=n)M~lUpHbJ=BR|9~Sq8HV7=E%8$$AZ_^S{V&5oktz!Es0BBw=BP#l zGH8UreCJi)cBtyUc!D27Sydi7v8 z^cat)1dwd*ObBw{G>c(^nB|z-tufJNTX3*y7C}7bF1KRd=NenS^okqDauze$|_=&|selEXkeuVgP? zKkJlu$meO~CJhmZ@CNG+cG!Hnp<_p6EU_MPrKeUTep1sMr9_GS2?DKd0h_iUi)=5s zrfQOP4WdrL8%22E2wSl5kjh_QH-yY_S=f!u@%Tn%J#kV)3mO{p`UUw@vAb&P&C#xh z9GlE^MwJxOR+R`U)zjm%vfz%-Fp^kzSm_W&Jc=&;7I&(kr`2-aX7X6-%VHbL+;{Gz zuWhZ+QvEne7(H61S!&62EKNPL9mnbRQc9U%^I?HZ4uJVE^5F-T%!!nwsz=1N) zmfuG5hXRSVxekm~6(6;|R8hw5EcfaLoWd2rhIjs)T%#?ONvil--JznCSfP2p2dB2c zDz?@uzCeL+MX!u8eUAP6m&Ky~yA?&Bu8ntoSfU8`*x`BN*FE4rNAAz#FFL{y#lHvm zd$;OO;P+!XTpoYvUHuIFyMzB%U>kf<|Nr~>Kgapm*8LG_AKvEv$>jYR{Id!61N;r+ zckmBO>}TlD_QMZoC-yJRh@S)eT>Je95JLFhKmLzO@aHH$r~e;O`icIygP)6mpCkM| z!T!Jl07dYU^dIT=XZYV|!@t4>;raZ(@c+z>5JhD8=mG%f@YgeVM!!e#{oDTm69r*W literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test5.ods b/pandas/tests/io/data/excel/test5.ods new file mode 100644 index 0000000000000000000000000000000000000000..5872e2624d0330b308defb8fcf5af335e4cc92f7 GIT binary patch literal 2906 zcmZ{m2UJtp7KTHKNCJq`2LUO;f7hwUs8Xec1QMi#mPiS`gdkl8VWdfD3WAD2 zKt&~B5J4FfsiBA<(h(6s>I=?#%lCZq&Ru8S`tP&WzWY1-w?eS7g8+XINdU=DJw<_e zb1)AxC(av(A%q8FP=SGd-X17|cYwcQh`*;o01EGoR|p8i_002woG6yT+3E_Siyh0ex@45X;e+HEIiO?+UFc&cg zg3&`u!g=J0i&eXTlu}Pw{?l2uej5v#qYWw+6lZ+$HNBFLuSH%-SfVA@WARk+ZeSlq}LS%E|hE#)ty`u_{$VbE+b=>~qka z`bJ<*-g52eeB@g#90u+Qb8iPMV*cdw`p~G);gO`=o$+F7IjsJV>h6@?=$q>Dpah>z z&+}<1ns<^sb8Iwiiv%7^rBcz0zSUIwA@_ud$+C%9JBA}mr9{I?My--3_`XY-jFqR} zn(tSBkx{sNKq{xXD7-nE_|?u5T>^tLl$!OCN?wiV`+NbeB4)&nzv^Wph_CdYo;FN7 zKCN&<=1XkuUb|xNnc>Xi)h;5CN$0-|yAmm4cHZq8Sd~SE2UQL;I@ra|3RlRck+W8? z$N5p}5PBkoF`i@9Q`W)L$AMUqb?Cf4^8~v5Fmq z*&(%Oea+l7A;xc7%OA4)h>%F)_gR;R{q%w9|!K@zOz~XroH~#jtymTMB(mduP|J$mitVrZ?jNgORGRL|TJ!Ljw zY5?pT)kq{7ooPC(+#^>G@#2Qdnm+$Fe|LQFwQFxD3m-PN=|XV~r!p=lk|b`xjtEkO zXVxf@K_4HtG=~qJoBS@36C~V3kE$cTRM^q!$!<$8Rm`THC1P(oJ-j4FGa${sQIleS zZhzu+#7t~r6lmf1Yp)X2abIIK^MfOE%P!$)y+V8q-~vcXYn=EU9%_%tDY=O;M=q?l zJU8!le0-l^acN}eGpa+4=BqEd_-$<40L@8mV+#}(DBZ}x8<$XnJf)-|$x-uOT1%2R zew~fZJ(?)c7+-V!nAHB5Km(zqE}!xE%8i2RXR)k?H@u^(%B{NhPxoa8+D$zwZlwZ+ zuy>!FbZ~b)a?jN;y3~bt#6+%Z-9X7a1dC~|Md%PeSK0{QZ@_b|{#MEHoOk|KQe|>E6a#JWaz;`pUdrC>gF~>V!Fyv-BRGzMY$otlY#WX zF)~v15e}G;?>i7$qJ?q_-*KbBKKfXSULAO1*H~%M?DU2w@J9bgY!`X{YLJ-6Ez)$v z-0IPn(=FsOBvE6?!b3C0DMRZwOG`Y0TyZTyJ~y1$(vLVIQ6y8_Q5~8@jH4GnQioHX z2BaozR4R0%ga_K&ofuJG59(We-qxmi$*XzUf>#l4er}Ztg zpWM!jxZou30n8sav1~#_%?fr0BWsLO%)+9Pi^?QtPX+JOY;AF-O$?cLpD@#~i&DyA z&O1gGmup%nVPk`3rYEQ8aJNQrT}tD_7rtrT?HgaR_QN2|9O7B~@Ex)5gB}U9cIbN^eNJ#Zl0lV&}Hja-7!HLI&uGXoI z41fdg*bk-yw~ADHeNqGU+eE;KnS}Ryy!@Y5m-qG_ZWZbv9fe}A^LeTdLChk+mHq9S zY0b*OKS#-Qw&YzFdg+#DPyf6aFRo?wwv3HvK*TA3nI_?eTZ6s@-F=h2v5=%?)FQ^K zI&|0!F6L~7pG+$^X=7KaIPW-VjWoHXPSth#P|!Vst#-Q@Jnd8VeNO94kGu0jO~B@u zED3A-z)k8#qZChM@~nM07(Az|{(Los*XfLyAQY!jk}G!DAj=&Hab3}(bae8q=5uk# zOJ-|*%CR5~aOnpd$%p1S3w}$cA5)XE5?WSK$tOj1*9)ZZnTlq4iTa^IeES)?eY(2> z!5uy6@7oPCmN>bKiVXTeHo^29V4Lg_Q-raN@mb*#akXQsYwdj=)DPZi9+KEKQ35JM z6}k@Zgi5dUQi2PC*Sf@$VIqqeYl0DZ(#D{4ktFw7X_p-A26y2^&J^3;VWFr;%hFwP zb%HWGyjRuNNyD#Pca7)l7BXX=?reRL#ty;Ft=qr23OS_`wg!{zVd%Tym({nouGWKn zIAKB2xvJ@#H|f}S-nKVym`rHvL~X^dz)c@iqQeaWg_7S-T{&6^Ut~PXtuIhRewe%4 zWBFYktaxM7hq}%2Pq>o1j;szbG393k00fw4^9#0m@}?F>ia3O` zN3zh|2)$z)_zBmsDKX88Gy`OTL!-!O!?c!4R@tEa#0#KDyxzdTnx3yZFe^Gk+gb(5 z@+$P+q^h)`j#LUYQ}oh6njdi<`7Yk8ZtxRK*8nnSymzj#q&2}LVtce!Sn(fsE{*w( zJ_!)NY54Ts6Q}D(QrDJhwF;l^nH~A|3jZ zy7y$wVyO$VQ9P(kaS?Aiv_#`r@*`c!Mzh;%R4i4t-MeEPoM4^Yb_P{Q0`2Rzc5i(t zB`h@NwqF0^wVyK1+(&)-uOr1Ss{Rj4&DHn#|dW6WPTXxe*r(Q@e}|6 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test5.xls b/pandas/tests/io/data/excel/test5.xls new file mode 100644 index 0000000000000000000000000000000000000000..f484f54a60c6195fd9899e7ad9436d99c32a17b6 GIT binary patch literal 20480 zcmeHP2V4``*1rh>0;qr@q9}%r0+A-Nq9RgkG!=V6N@yY=0^+VbScMgP!9_t-EU2Ku zuD#c_J+q1h6cOv%*0t+%t@+NGAz?BkF?{c9`~7zEyOWtS=l;*R=bn3RnVVEyFuc9? z=MHxW>*zx?$QOwY(PGd!aE*yN3<$A=3%vg#kw|DFA#nNk@fWhdXVBFeDo+Cv50WM% ztaEKhd`LQwbRp?MY5}Pwq*jphA%P1JLrAS5wSi;=sVyX9NbMk*Kxz-E10+*O9U*ms zWCp1-BmpFINEVRLhcMYNuKz-2{L8936zu))99-}iY2mp5B;ezjkfYR@_|sqXd9ji9 zq6NZgjuqTy(`;lUNg^p^5{ZECB=~Ob(VB2M=(kcG)X9&ct^3lPnq(MBBa`714th){ z14$x@rgTC|Dv5*ZL=~WRARl8Owjb0%1*_bn7Y%os#1|w)QaXr0Qff2x@c{YKva2f} z?e(TXz78K+v6d$J93$2~(^rFNLF2zexa23^F*p-)7t%PAjIm&3Ttb4tdwP`Mf8yvz z=@8CA9eTr=LyCc72=N3dQ0s7RL!B!ro`W2#8Rtk9*o zXhSv!cKq*fJpGDr>jhRG4p%1G!N zCNSd(Ng31&z%cW76)-c>^&-nLn3UsKfFXsl2`D8mrU9Y}9mIpRWpM6f3q-a?1*H5k z9U2L&oNdWg9JgAG7EQJAY&ZucS(x!UkZm}4wFpf~6Ty>)FaPj_sUmo4L@IE|YZ06X z&*)QhG|8`*JGfOH(GQ*>!pPX4CTCi>6W0rPUOeM%253ukA;174ZD{blq@ zCG9jsg>9jsg>9jsg z=|3yMSEi3L{#{CN_9&t6P(nYfgih;&GiQ1GY@_&b22IhI85%CvTdqfVv>k9pP1B>J zk90oLneb?RG$H0u^gn@tI;8$tri5OuguYoJ9cSbIQIUt3DQLPB6Z z2LlOOA6zw{_$fLr2GDd|Nob%8;1U)SSagAW%i-{2cowWXChp};sT9>s}u5i zvi!N&(9l@V80<{9J6y$(;?s7PwhJAa&ayL1jA%MbPa`S)3QQNs&oVpUstAS8;Ge-a zA~VoHvZD{F(q1df@Ti6&ibj`{4y-cyY!D2sYJi{{DhI)Ktp*6Xrg9JrwrYT&yDA64 z$fE`by0CH(>|WIXLAO>8f-AmX1rZ0Rt{q%5Rs%$J?Lhcd2Sj!4;G(q}ny9WFT=iB1 zM0M@pvbh={s%r<=-5WvRM!qkh+m4H?>e|7Ddo@5**AA}ys{x|AcJS4K8X&4`N1z^v z>e?|^4@7nCSf~R+znoFt0P$7aS8YPS-BAu=r5=dt+Obv-M0M@hs0X6Dc5KxHQC&NB zG7uau6%#Fq+&B!|4nB~aJ}+rB>=IVz)de`Tsm=>|KV%ZcAg^A%l7VP90B2=oNu(aOf~-fSnD=M3+I^y|siug-yuhvz6)!th7=%u%iy<2vb_J($dlc`X4Q0UE#tM{knWn=MCt#x+ zFJ^#QgnR~RO`Lkp^fC6>2A0DZ5rC>vW_6dxIELt`8B@=8~1M2Wbti97$-wJ~<3xXVnTu;EJ^v?QA&Q>v+Z^pFE77AbKL z$&+FR2)ng=$jpPbY%-se`j|u_mVrn^(Dv=yo3wVMv0FRR06QC!thFPpv9+_6`UY5; zbq3Lb>9bO$6vf5=z7W(Clxp<6CYEYC^Y)a>z;;zmCVx1qJLm-&c*^H8Xm@r~Wj0@F zchFRm-ZD+l-Jmk$?U~PQ4Qt;pxpQ%{7B~vLBO^tLsm@FJfn+K6qzwm$XC`S0E7?xO znh1y^ByV^>9SJMf_;#8OGaOjH(dLPw>z}-nXyP?xCIUN2U~K}$7_!B< zC^4F%=uf}ZC>GP2EylH}7#kI0=ukz8(Go>Jefm_fm^N%NZcW8>RUrn0nFcgEFqF0^ zTJlP)Sd0-{O!uZ@get^f2y76;7e&|Jd8JrPTecYYrea)Fh@pcTRT>>p^xfyz6^k)O zF>b0otuu&mQ^V850f(+WQ!J((TZ{rvvs595>1pDCKhJGbEXIT_MuDf1pDCou?iu7Gug5qrlT#Rfu7F znmFLp?VXCnbVM=ksy$5rV%*j6G#A&E#~;vQpf%x+8d*&1#1^B#)2vj8VS1X2>zki) z6pJxqi&5Zdb}Ga$J$vJk3po7^bJWxRzBdR8*Q4!~%vZMAmP5v--_W5^be@C9IyUtX2$a z*$4_A#RfHJfik^_kf)i>ib1U!LBW66pw=u6o198-w~=?4?HC|M7ziK8!A6hJ%THLe^qL=ppdu@7tbKH&+GX$j$}aY>0w z`QOU}a*ZW=gx-FJTLTjSx^XL_HZ_t!P#hRb1>mXzB|r=ISm8sQ)o{QJ%sal^osQgH zK35-h>A>3vpIA}kB!Q18Az>*$i=|gPi7x0B3xmN(*sLjlYY}Ld0L5<9N}>BmwZd_` zmP{-9PO24sC)J9+%jfcleYS>VDPJJ3ilzn>-3Rtg(t9^S1A@E`*;>GB$Kut3o$QUg z0$9m*f?XKWJdW8s$YaCG!;|nKPa5p~OQFWTk+3^dnnRea4LO9Y9NLl=O>)S~Y72Xd z;7x{KMsh-UVt8s&%5*`fC?j<#zrQ>bx4XoE_>m0Qc!_nNNYy`Xl${PX4+V8Hz#nj$ zBCUO_><0JJfwI8_y?;^?II5>SE~kTpNBV=&l31>aYYS2=5GKWH1>=2K9~&SFkBUo- z5x`u5rKpAhSatv?guWjKipGF~^c(@WLIAPDZt1cw;N25+qZoQ{J^!0Dj@mjZWVf5jX31|}w_fh(lRR~gp`%7eR-lff6U(iH2UBIhZY#F1?d6$lOR!AYDzeS7gk{A zbcDbh1i)C*6!0kwEcA?qil?x`AuVq)EQ2&SJh)Ugf#&EcuPcXu$RP0a6l!xfLN!>Q z1D6n;W$ncS+e=VVs%R-cLLP&w3+)9lQb8#K<-=JIr3$Q*NJAQ~aWn>6`c4|s=sRgh zqwn&$dgL2ilnqWxrD}W{OK4kw4yJ6-w9c6!A0wn(og<-96Z%f734JHkgucUo*)v-c zY-c5}f~F=^L1}#v;jLD4ijELIEU#D(x^(c{+6!2xZ4$h(9ZE0V~Fl zbjpJoIhfnC-N{neWRJ^pAPas!P|w#Qo%a~Q-4*ibwI+GR&mm;X?*t{1D13vxm@J;s z6LwRUg5PJ8qeqXzS2igul#oK#XuuE?G2leQM*?&b4fkRw2!Td|AH0N9HuXU=@Zeez z@L)bM=z3L!7xLZBh8(Gub+76To)F%?3J2KeYgQczGpfPuIc0s&&EE2Kgy z0nHWi0%4p%D;z$dEJ_7JCQ{q)bD=gj;KCKOh+(lJQL1wh--Hw~Y>m(G;6YjsKUY}K zdNv=PaNu){!-oVvU$Eek8sMv8@ZoycC+HZ#cc}zep`xj&a2uYSoFIb6m=Ycl5tjsKFE-Z9;zRGm zo5yuN<^9yz=3&{TEeES~&we;HWaQ%uzYb2X93D}kr4>Btv8BzSipt7={^~q&=(>%e zk5_-w%OpQxk>!RP+E!&d`z$<~vS5|Y#64AO_c_k>jO^{)I;Pk3NBd_SH%`~s@^DtK zH3NJrs_&I0%`JD!aq`e}D2XmF+Oa>+XTRXau-Eg1W6z`vIsc$+nt8p0&hza>k2-{Q zHhACC|BjP`&)o~J+Ya!b_g?hC_2S{lt4^M(tG9b{Cg$GBKZ3~?$(%3e-Cp0S2?J9| zV3aLytkRlI50AnVcd$2ZiwR2+B~0xm#Bz1m}9#iGq;B|A-^MM(PU9K#yJqilQE(cbdMI|K;07p zgT8k%*G^kp+avOTonOTEa}@{vMSO}oc%Lr0w5IOr{d==|oWEsMy>f8N(GJDhE$51U z73S@EF~eb#>E`PLI-Yb$xO2MggU1;rXLl}Wb8Fz4sY5a?s~6{P+f^^9*=1jC{@kH{ z!al3`-t+p8e02F;m%DyX>}_WWuq6VU3`1?Y2+pYXAEPfUGVN}3 z*z%><{0FNaToqf5?mpH!e}i8A@`ip{+WdPtKW&a% z-tET(z5bT&HWzos-7|WfIj_cdJFufE}8v1Ikxy6#nes;V)J#5;z9IxrS zcjvp2=MNZ}S+36e!EnLH-laEn9*VE={)zzk7C3@T7UN0<2bi_sd8J=gInJnj=wpP} z8PoKhd+eI~JT^mT(94^~heyPx6&L-myd-;R?CxCQz6sS~3*2W1nDll!Zd!49%W#*6 zlZ?ba%t)Ujv~jq5;K7)@XFu`fI{4;mYeY3j6P#n>Dh|ByrX#+NL+SQlzp1izLqb$+GzPv4W0FPCGEeEv|{p8q8F*GKoS21_!&yt&_F zjpY4_HW~-pu3xL$d9i_QP1=lz`Uvy+fm;4C3unJvIl2Yc*G%XMn`m=~u*9Qg&Ka)vrNkfgMxSce7P>^vt+Qig<+HwQOivCeXJ8}Z|rY#>e z*>}Eq`^9VBm(=)cTjd;d2;`Ypk4aegY}91`!1d2=M|RTD;||ey&)GP}q;j2R|FviD zlibjjf12O@eojxyT4?|<>UvG4P0gFzo> z415`GG3~aFQOOkj4!Ns(eS7zlhr9mmO{30MegDJCQvKfa!8;7Di zf3&^XmJ|Nb#%kI5)Yii_=3Fly!W+1{(mYnHbxKrHuHT8H>x4HdhweJb-F$!a_nnq% zek|YkwC6;we{w=UqeXtJFNIcax6gm??s~|6#q}0;$2KiU(Aw%+Z!m6JtKj*jdvm94^0%|s-0Aor$Rd4rUeKL3*YZw3o_Qp1yZBztw2;;tILp`N zmwONJSm-$D_(sPc)@=ITf2fbuo)k^qomJLX$>`}<&v;*YYjy7|Z_(bO^in_Ho|(Md z;9bZ698nb3(ZQjXd#PUklx@vo;oY(PjVs35&m#*5a@S=1GtTWa@9>}p{dTX;GR^Ih zKmP5Gl474Y&vmbfsI6by*G9t9UYjh}yfU#1u3ceXGHAf|ejdehxm&b`Z`nA#dcZe- zY~3GYI_FH6b^9s`i^}uL<3>1dn?1b!wY>W$=N>#B-!{T-d3373YOzpSTU9}##(9?XfmeyYxBUiODxFS;J` z-*xd{x(njBj7;wN^xL&}zI)p5^y^8hCY+x3WWi0Jx}F6SHw}N=sd8+$4AHG_9uo%6 zUw_l?O7Wo5`&Y)it+@Q{il;FGUfIGZ@9#YZ&-_S_o7p3fmKPVV*$sV~DRkSmtiFvN zjZnQEqkC=`odoJ0cQVR(H**?4De3ZlgC#cgc~#r4Xyp9Xy`{UC)sR1)wybdA3ulKP zE{r>{eAU@DkuEo#W6L}6dp`bgO3|^&d;eJ2cc^jDoLbMQpC-;+nAGEG+n6PT?E-DA zhS~|8SERJE9h6^OH8=Eqc7@0KMI(N*SijNx%z>%_@6s|$UasWKIB9h#!r;4V(!L<1 zNV4OzM~68v-8vhO|1!jTYHhBj-MUla+m*Q+6Kx(onLt2GnArFO#RzgYmi7kv=@2mgNsy%OCH7DA|d zh+otFQ~tihKnD8agixWQw{i8B`tu2kZ4}aHZmz?(o|ekol2o1xBS6rcrTg^ZyX{>y!=Bo?(m;jn5{yM zER?TAsKclxPjls~39B~SUs*l}qC1Wfu?+10Zn6RkG`2OWg$cgt0Xs)soSfVQGS{N| d2M_;&{cwy%aS17J6w)s>)HdVajQ&@9~uXnYhuZ4>T0uTTQ0RR95FcIltkHi50 z?C}5q8UP{CO4-xH59Z-#V;tlK^Sv$@=niK^;{myI06^^h|BnA*4I~@)>9z?|Hn43{ zB$jde_u)X^R-NOfrhBYHX259eQZ{N#3$ez^iKuga3a31V-hC`msv9lK~nkw!TQ?Ti5*Cs95oe$4K( z)H0;|`3eVHV^sczVaYso`?2i?YjPzOT-&~_r8AGNdttKO!E$)%I>()%ius^`(!}1h zo#|vvHC|(Q@gi-3KA$iXNmw@#Yg>w$A-+_#k6l=tI+V)k02kbFabrfcKHw$M#CfFZ z%4QtgTHo1UPws2Z?2| zN%XO5Y$%qWr>D38{hyet9R8tW4{MIvSYnc3nQP+%gZl~zemnom*#BXg{$uH}_Xe?y zCGA{SZ&02;{OF-hm|OjV^$~`WmtvN~gR%N<9=qb+7YDA#;x)|2>)V6b4VYKMkDH`b z_)5bO2I0`c4)ZaSjW-RYS&AJh1FyN`_@sSqUB4On{N_@kAxli-t^tgej&Ip$YY=6y zCuhLlUfFlwv{i-YcCNBsa88`JQA#}c#8t8JwebxjrSXh&`mf$By&9{|cQ09_dy`G` z!p{+ZD|4J)lJA13mAa%&9s@CRA#v>T^UOH)X*znw4Y@aq{^--Pj%XTs&0Cg0o5f3r zYe35xaz6|j)>S6Ejhdm3is;s7Dwcf^ap%J0Q0&rw<;ln0?bLnjOg)w-3;;r$K)BF% zsswoYxIsNV-2|a79)HsY2OH?HZ2R9H#fB~4%%<_1qhA+aQzXpIf1$1Z;>tl-Lak1< zkm~ zReUyTc<<`dT`~XkVd^@6)W&!fO<^lYRe!&;cj@c!Zy;NWY_l zgR`9jb}heI?l6f*-WROtb72!fvhU0F^$UW-e817-kx`#6trDetKz(Ds<|1c$tuIm& z-EuCe)+;~p*%f7=0$?sD`j-0Zc1+qnP5?qDG>lK`G7!Fgu7Bum3?K4V7cq4#-cV){ z10iEyuK<{e-=6fw@Q4B_uChSzsb?}Q-ehWIH}gG17yc0j{>px8UaBI= zs$0~y%l;T8OWqgl#mJJ;iD_VEs_fw0gg6HIhy}-3u{1xV^B=UEx!YUZ5t{jlUGG4o zQ#jsC=Elnrklqn~b;L)B-VrUe42Aa+SEj_XUi*_g_nzGtu|tkQEeL(cfc+QM&_0e; zCXY-m;cgcr6ov}C=u!tBe7^JYWPl#IwsV(Z#*?sWmDET;0$tL%6o41Cse2d3^Lou6 zU(+fw$3@v1CDN&xQ&B{c_D0SCY&L>$eO4b>$MbRApMjzGK0>3qVY;AamGu6?9TAiu zm@a~&q8FNf&Dwdn!7z^HVvZ#Q3?f#|4c9GXV}jcUy)Fq8(6INQj8m+Wd#=JxC*O2w zC+M;SykK0X%!h9h&;Bd;LiwddU6%+Oo*Khz=*eB+1<{aR0@a&Yr_xg}wJGhaoosZ- zz=nu@@^d2c-{u>+2ApXvSi@k#&a!@$E6AbsAczBMRrGC4l z%wQ#Qy>Dz)mYb)We5~8SmJ2!T6}U231%1L{5khT3dmdMdXGPhqoCH6alXI^A0hHX_ zwZBr=xlB6ga?M)CKs*7dUID{Hq+C7uTbSPN)&<$XjJ)i7=MnoMG-Tp+C&r(r|c1TzmU3oS=CVopDvUrOOSk!ZlnKY1t+QnsWF`u06AfMi{|I%eHDkVtcL z`h1hADsodrzWCHZlM0C5dUaFPX4(UiaNxTggq+`-%HWRwJ_dMG&RQ~YuNK#q< zzDmdG$*@!N@z;EM!d2&2%L`+Ar-3Is3sTvq2W>RC4YYZ=#1|4MxkeEoPz)3jt3#mX z6nA?w!-5p3B2EGd^Nd&(q!6Fo@V_a={K@)K+EzFE=PrUS))!GZXd|*lG`jV~F zIisT!aE<0B4;2eP`bph(VD6j;#1H;)>S|)j-a)mP64>Pk zW>gDEr4%T=>Hw;|rBjPYHFsBuh0rJDN+yn~onyb%z^{V`8L@=NeKvPj)5-fhYtKwX z0yMxqz$vf<7-my6IOT%`ai~b-%EpgvpKtgj(TKkyB>TuM)!q z5RMMrtf72Iuwcwn!i5T~M3L1s#5|A;J=Bs>GA|+}d{Hn4i5>v0SKZ#Lz*Jd@hzMpy zkd;qI9#UxfofAN3fK5C6ql3#{uhUMR%<%1$Rv&#q1POHH3_-=xKG!as$iAyP&Z5*j z4AI!MyBIJ_1j_NIV2FGu7d&?)ElDw3Xx{T08I{qgK4~uUA)AtA2Uq+y=ElT%-2#op z;B<4}l$TEoBpGj#$X&m66`q+T(+Bfz)$U+Z3c*gie^^woCSwa! z^MYRi#j9ow8JAV?B^^kLHS(oRl5)2c-h~qsi(x3Q1(55-3O;mQ>aJ3|^1wreh@Re} z?H=H+x1_AI8EQ%~!KVQS9P zRa{=dx3k|fE$UwMuR-a)rx{0et%NtDD*8X%3ecy1gp-O+DpFFHjuX&qEf9#xE<~ys zDziWL!N+_5kYfyB5j57VG_=e9p$9L%Hr+p8RYm3|UZh@i>%M8a4HG~A1aJsL(aANM zAUJ15SdeDLcLxxs0n85UbrJ8vYtmf2Qj-&u_402R`7rX8OpSBRn*f1XU4#0FK?6#V>*|2GYxrgjDaoq=%iYOx zhFSWz14itikv&rnskxLfLi>Q%fw{dD{u!0$(>b5s%#W>VXwvrtk8iM`uAXaJ5aiZ# z;C+9>s`d=>u}@xum+_g(=WV#lOCdL^q=Ms|`5LC?xv!Cin6>knMMk`DF{ah8eK4+d zt0`I!{TCph^DMVuQjIbzEp%M<{et=qhoc;twO=3myR^4nOiE=})wpJjPUd?dWWWzZ zZ@+xH!!V*Y#V1}=V%@S*wHg_;g}0WwrNhB#?)J9xPeL5Z!!|Un4^Of2>en2%NglE_g`GiQYa{S4G5tq0 z4>9N?$5Emz&bt0dsa@wpEbe@9ZE8R*FIER=7#%e@f zUY5G}kq;qR{j)%6VRNkq$sZPlsrn!J6N_^O*gPGKXwzjl${I!88Lq3d<#k@fFSzxd zQ#7c`izw!h5T##;e<$8HjZhz!^^EwbCKMF&*;a$3`6lnHjm_Y!!P=s%!AD;)_Jf6$ z!A*nmZ8pbg?4lt|9jk@SJZ)VDDdFzp_PAk)9Siu};Fl7+5OQt$!W{W$wVk~h(N@Lp z%B4Gd?OU)B{8xq05H19pVHIJIJwV?T;_GbZ19LF)^KtQT`ZF13in&e@rVmyb91+sk z63E?iCUR#*lf7Gp_~cLO2~_jglR6EoZQ<>^a+Etl-_N%eWe5AiXsXhUPmxZhqk|%% zxp65`iAd|}aCPBs61h0K-#ti8#|-c~Hq-LP?hF;SYIX3m*Z1-C@)ff8^nv}=mHoF^ z#qLnZJsZ7NVNl16&Jnp*Hm05&G8CK0{a#fgc0ggXq=wFUd@{d;w5bt8AdTKEmtQ&w z9?%(HDdi6r0P|I+vr>0(13Ib`TqTr*4&)k?kO*xeQ)hgds&jV=CKgnC`Fg97SM&IU zb;HTzG%R|b5?RWc8N>?MDEGkUnS1YOxoa*Sy(#wU*pDg!aCytMa!7Ng+1KJkjYOl! z)m!wsKlXtH6un6ClXEgV*e~vPt-ehDGKe%?xumu7<^H-KYRZXZI+|;5pf{%fu}NyJ5)Mg5XAWI)z+ondugO?f#7=vy41n)k2aU{ zf==K~MDMOA-=;jJ>4HG)MEiSC6UZob)}_dYTH&L$^)zjBLVRsOABX8BHz ze^db2GSSW_NW;Sc7WiicFm%rr0K{ia0g5DqMIq%Aq%QEbc87)4E0!CxW|6d;A=6S5 zku9{SLQxSc)^E5dLFnxli=_Yv7XYCACk5=hy#8gtH=TVSnfH|RJ_u7ruB#tjxH$8~ zNRiMaj>^@-8{nQ_@M%Iisk60QV34x<=qrL6k*>b$|GOKS(Vad2N%YI_GxPJ@9X?gBcgqL*a0c33&3Lc3dbpd(DfI3aD&WaeN{F2hy03eo%nEgxGXqsrr25GvrVNR!bHS>3jb&^ofSRXZulX}OZwmc z{r{Q|X93Sv?>_+fvHdQrLeAFmX93P8OFsZku(`xv?ZLmE=tt^w7UgV)@dJf|_Pa{X z<{M`beohU3$O8arScG2^g|p&kz4#9ReryW(^D@r*^s`t$yXYTC06+#7>%Wikm&4ZA W!pE8(06>cUFk`LvIyTAx0RI7F?$F=> literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test5.xlsm b/pandas/tests/io/data/excel/test5.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..7b8d489f243219ca0dbeb5f8212ccd08eab765ca GIT binary patch literal 8642 zcmeHsg;!k3_I2YB0yM6{-5Lq*65I(c!GlY13lKB}*PuZf3vR)k;1Z-ExTlfE0)Yhm zb>_YKW+pS=U+|`Sty{NO-E-EhTYJ~3vyX-{5;6e*6@U%^0B8U-Rn5LG2mpW|G5~-N zKu0u^adGyrcJ_Go)YsM8-GtM}$&tDM8Id^;fCzv7-}XQJ1u9ZT)%v)APc9X1WVSh? zSL!6u`3@0B@K`m)yBnEQpLT7#DPLWxOh*DziDsR@)l_uOAj@~BMP*|mqhszg|@^$FyATZLpM2f@bFEh3-{lhg@G9SQ`<(-p<* zGWx~3m~B$$Kl<^grqaJ?uHR6GKasmbL789HvOhBZO~xhWOJlI7 z9e1Q$$c8=wy>n6YOW5`pAa;21kb;uNGnKj3=pJY2_;51NVD3SKp0ctkl!R7c<+VZI zQmV%!Qvm&<0dVD80A|h@26b^GLi>rmKtah+YE0gm7Wkp@a<|^0rMPVy;}jaWLiT$Q zv6s1R`I)3NP#f!7km73LLUv?f!6}k|nCu+k!>(G~1``}&_xDHujlb};QHP7}96q7~ zry(Ysr_WwkJGz57e_sFP=l?J%|1|ZABvti3u7?rF3OA8MmyneNptQ2LltLTL)0e@D z%Q%g(h4kbrT}%|fr(~f>@U^5TX0vT^W&n<>raT%J?Ep z&aGmjAJ+@=4RVnoujI?@)e+BH-dvimJiJCHH*+dm3!LQCBg4QiqKF_8%Lp+XP&HV! z_+Etok=8z}jHqc6$lFhz%nF)ME8fGBh!RqPO=l1exm!G6s0tWzr1|!POh?;R$gajB z_k}R6?=vgM?lak}F8td74z=tdE!xL;-+3nG2O09d1sOIxJ{`^R?H9o4D<3);e)Wc` zbQK=5{!Wq*@s#v4_!nt~lY|U_j^N`6`VUXMU0yg?xVSj{3}?S{1_2(^;9mZ>TZNXY zaz7Wa1M@DD%R9@95O=`?L<`e7z(F5wU|C|M;eC0&M$X!5qCck$LUM`lJsuzQy5z&& zM8f~p!&VxNhwO*%cr1eAGJZCWgl=(IN%=}11s@N#e^7piiJj?=-y)tg#GXqa_~kn@ z18HPoF|F)*%XGwJ`w1>j9((ssk4E5_8Ex1Oz6#TZRlaIyYNF0$vDvwAky*MxefP?j zYU+#>Ls1mV7cjLuOT+fP=g9^7*Y+D5Ro@ARp2Dk#I~3*0l<)b(0wO%9BZuUvf>@H= zcM3AvbdEPXq_0O#P_VEde++OWiaX?n2 zM?}y7tT_eCZ-smG>$B6JQVnP7)zqcD2cs6-hkI5Ej(beBuwVwjd}%n#NGQcue43%fV~kqf@~x|*oU5a!h(&HW|K5B*br^Mj^IrEsb- z)@93Zb+i8K$7IZ2esu&WT?@kF`JT7QX}X^gg-{?iNf>?g1c9Mt#%II`kl8*tf*Pgv zPj7I=SK3^v%n*x&K@ar^`2y(KkS0Dup!J-)HS#UUlS1h?>U0EXXtNaa#$ZH+huW7uc@31##lq8Vx0z- zP93bN-BQovGRSwjefT?r@rmvwT~D>^!Zu@Z z2`YEe__8iENBFR^l&9sW)bXm83bEF+$(4;Kxnx%`x;+pkLR?N!$6$^w#kR$Jel+sR zq;<7Sc+GUdA*Or9)}M_aMu*lc&xUjlcr{PL@!44DLp@G!u8MAj=7Rv?S9w@HaqWD> ziymUlir`&y5?L7+AXVF4ii*j*{#+4?h)(jDlF)T>(s;(6*#|D5b#UYNQJmaabZ-}T zdotH%K5qeaN0F5$mpwcDHTaw1QpuyWMRoqy&v4XSgkx8i(LxC{LQH|n_mJ#6lz(CZ zNa8EPFcJXJ@DKpN`VSM_J$xOl-G6SHU_Gb!LVn<#Y1uu(Y0gJUGa5uS@;U`3lX9iD z)GG)=a^u8jT)`!<&i(bcIy%+%y@SezN;Lcnh z`l;h1T0eRH8cCq$u~{ePrJkr6A}bVm!|}q{mJsZ>gd9kY%^ujl3B| z#(a{it==~##f=}C(E7nG;rWbZniPBe!)YgqEGMs^IftYq7TaS>;B5J|nfXOZLSv$~ zo~LYa=mkvyjiNT@V&DF0511*7$La&M!PqkfZ8cuYmpm`1WaY%(ge9OALXcl2lTER{ ziXY}r;TpB#8p{~1V^)eW=pNHaRaDnm*r!Gaa?#4Gtozn5y^6mQHaKR#TnofEYt;yo(~3j zQ~l6ZMj^%**#*%x$eegxAc2;RtV;g4&t0`ezO;1=vwuy646V8V_A8J3Gd7D}5<%lO(;ldzu-|{gxX;G8zw$1; zfycMMM?V51E{Y)dh!)w;?Ekmu=V5E@WDWZ5%Kel1FoVf>az27ix>JdmFS%dl$LJ0C zj2M!^m@7`@&W5vv0&YA_4v;}VqJBTnWj z4y+0`q*OlD@CK1iLZOq#VnLG^#_b+>+EDb8>>BW)F6gN-8WSBq<8Z^_Dh0NZ>ih;G zgQcv&tED4E_si*v>thpHBideWue_k%CXJ=ou(wXBqXEd{QP0eHv%A|clr8DkxpUG6 zS%gvBxH;xoQ}Zb3lA6_CS#bF~@zBu09u_Hj``fjvZ*b!p^T*h>f>bRFBYWd;%JF)L zMZsn6VYW-*ZUZo%DD|)djeeAN+cv^)(6lB{+LZ3<>u9XN#+OLn(l19vVsISn$2|OM z$Y#$1K)V$%HUQy4^UFf2^Uzc=6v;x|vBz5lOXqEs&Xbe1WX#ugwJQ$i`#MO~3)R(b zN8i7$b+c*MtQqy;pwjqE!S5q3ImGQ|r6+&h;>?}IDwRO_9=0PsNuiH)*o~%|?b~tG zOTZ`;(e)Xg(uH$DA||be(hb@*eO581IyqL^h2+aX5rny({SSy!LE!uW;sCUG4bZ<7&NU z@0N&_Nr^bW#SA9%fH#aSh&<{$@&S9hSU1GlH zuyXaF`=fl6RmG_gY^o#@cPOj4mvartmtqP_o{$XMBtAp7AkS0}HZ(O|NI3^zSFp z2yx++?Ka3*M2+^!0i~V3+Z?COSkLkwP>S;;)=&c*6R0fgn@@Q%F$tDbur*aYBadlu z7Zi>_(}{5h?g)vw79-hrB<{`!?i}Gi?4`O8znbG*Q=0TM@(^x+vT-NoE~fK`am8qe z*1up>1Y1h1MLzbq|NJUVUv@hPdqt6TDux{jicm&GoE}vqWjVBwO`4nV}ixo zHMaBO#(=w4V+HO~kJ>HW>tmm-3x8?m6=KY2@0P_7cV@{ggvaj-LxicS;wEsb&zzMN8$KQEcE*U9DL zJzZBII3x^$U^Ag6=7-9;B^hSuJ)bEPKIOz1>bwsNG%5ziiVB_~nVfCR8Em#%VuE+Bk`|0Awimr)&usfR(}i|U zOLoE*N!6n$@3s4~S7ZR)$cwH*OnJv$>E9p|_G+iDs-Pe(I}gEIlAW z0fbLB^j_(D1_jo@hQ9!hZfyf#6rX(kkffr2#J=}ff9u-3S!T0Jf!~~JjA4)%^!$M~ zXWLa{-bjAMqbY;1uh9i>d{OmiTMG(v>lDjb(XbyIn~6iiPs~^`VO>?NAP{8QJ=2zm zZ$^XAfbY=a>PvL{z!Bl`D^O6}nNPHCx6rG%lRv-De@q-mKs zO_{S3ZZdhkw4c|TbYg4#Ha*!S&PB2s>m=DPQ|(DhCyB3!QN3Kz`U}y#TijWFy!`zU zn*?k2Mx8@>7lIkwX}=aBbY=T0Egr2zubuX0Ql+t~c0K#LZ(1~aF7i!l@bUoC`K!}oEIUKb|lq&t6Ykc)vu_+@&UC{!elXpmg&a*0e#O8&x%&N6+ge+DxPyM{ zehXSDDQobw4LG>QbQn}~WBQO@SzETWKn`-nWoOuQwD14*18hcX01}zSJZ% zu*mgI3)LF_kK1xxE1U^s2i?c7EtI;^jTC1pcB|jT3n(iEtQT*Nkm*m+`S?8xxJ#j` zSh^yj2GWys6*!@r47?@gusxz%3hBvO6dp%vtupU)?=Ig+)Mby|LYXlp6UO85#^{@C zB&eyiDCr5q-IP2H{&ubFbtgNsiI@Gwx$VF?4DyIpsmw|cTSKW~D3{Fg5s2{OHgl{+ zuoeya$W(cwqRBG~I5;4^b{Q~uM3NNUp7dEt!fP|nbjmZSk~oF%2iH4d47B?J#*p;M zJW1qI4MwG_)ATv#>ux5|=)~Q~;M}1n3I0BSBA}=|%6*K`mq0{*YafSqbz<1B0=>c8 zl#~@$9Oa)Zl#F8@*mr7gtZncZPE7lxfu9wRQeB)7TFsyayejDf*X=hWfHXiAyh%GHE%1F9SlE_;8qtJaKy>%|H-lBUQfvV4?E`#LX2TbTNF7|lc+ALdy7h(16C(m!*BWj!qN+Z}i^bPOL`&=Or2K}Qflzb}E)doik z27Hv@Kd5oHHGg4krS0*;&iVOo3652Kk3C%6mOT!+3;mq!AQX~@M<1@W%MDbJFtc>A z2x-)6Jea-J^G_7o+C`9-{?&DRqd z#HEuOS6cfo9B(3}8XYTAo%6z`x1~jyN{sRMW4<{SJ?%9`rBy97X*#S|B_UNwXVS${ zH)D)@hu^UxDEPX^PB5Xgo-F%zr@T)~t~H%7e(-yBt9{z_%E3|R*ig{OgkJAIrg83= zxQd4Gi8{e)f(zHctXwQLUbwirgDhQMSpT=2^RM&;M?pZ6rg|tBLHL^L4G#ENV}zm= zRe#o6os&Cy;=ArrTP5xTyMxbn9VqY^s&}L)2+-?^$(~N;pQ>QH7Z=Au9)WsAyL>0J!n?9MQ^aF1tSoZH0I3kfHzx> zdCF%{$&zeAWxLD91CJ>G5u$ctw9AR1p3jd}KgG0?a7y&JyorQXy$9*Ev0*bSBbOK> z)u&F_II@LbJ`HSs;#-}U&&A)v6ycSD!t1qh(V0Yjbg`gC6z<(djB!^f1PFun zPIZoG8pItv|3P7`sW9+T@Io;=Im$HFj-;ze{oQzz`SqKx!1bWbejH1Q%xxwa-az4xzDK&CdbmO^ zw4(7aq@qAU_`JpfVbkNeTCrvR2l}XrMA*Fghs4;{6YvV9b~T1j>_NQU4K z$~lXJ*n&l;)PW{Oq2w;B!XE4{XE$n_iBmfID`9W)-%{v_-B;V~jCad}c39}mDk-IH zD-qPHXD8=m)jPkyNMk%o>W0^+tq@d>IR&mHNci{;rx?E2WpE{@r}AuWht>D>j5t=1A%pH{T#j^fe2-v zj0qFaG0~3_(ZIus;_YkmgFm!l1Vj#a%=qVO`R^NU=-wuMmdinJz@Q0Ue_<8c@41PTr{OaMa@%0aT u0APv~0Qg(9{nh-hyWyYBEhzqE{@=Y(Lm36Wx&Xif_%9e9(AlYe{`)^MG3}cG literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test5.xlsx b/pandas/tests/io/data/excel/test5.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..8660913c7481289e2a1fcbdbfb55744d1112d11d GIT binary patch literal 8626 zcmeHtgOiGaqLQHzO`@2YcEAWJH!c03!VQ|F!?(5vWWaRqN*lKDku5mHEo4 zv{WyNE^rt!gwOU=yk{V3upDfVYhwe;eSHIz&L^@JtizvN^5Z=jx2|_|coz~m*sh87 zCV03<`vrv{==~7%VK)g`qN~2%aULOsBoCRfag1daAl;?8Uz1;UT}r8v+?t43JZLlL z%a9J<>R>BYxqhke@M7|ISq0pgXu!1KLa%E-ZTo7~uupUfXGpH1ImO<#zN$Or8?>#C20D~a`H2y-%CT(tpGdR{%;H<-h)6&@0!rl$U_4D{Y zI{p`P@K3K^nW(DX&y5v+tZ*AKd^xuq50qB+l2T}=d-gI&aS696rjU_hxtp01_>4Rh zNj|XiwuG7&&e?eiYUX0A7=#X531^|m|ayP%t>n<8B=?XkT{hcJi;>l^J@DXW) zlY|_Aj^J$%`WH{UoLue9oSf`_Mz8;H1_2(z;9mZ_TcxI|@&Grm6Z1ZT+bheH2yek1 zL=V+Iz(xPi$hycx$M^DVm4dC!NcWR62+1kj=XiX`^HKn31Bu|gm%TI!AK90{{#X>n zY5a5?3Ek|liaJCdg#aJAe^7CViIeF@&?=rd%#lkdw0*_$kSwCGm|pg*bvm5ic7of3 z*VgSL)d(Cjqm5sKuEVsTRUwVd&9s@UmRqn^nZ*m#w;?_>Q>SDYielJ4fT`VCI`*sH zCl~17J8msieJ1RB3$L-ZC@Yky%LN|$hr82849n95vL?E16=bw)AFsPh-;A(!FMj73 zJv+Ai`eA5p6+Y8{hYZ3t3Sb!ymumQ`iwnO34w*lL1)^=_l*LUL@U{^abgPDqL5H03 zOqE3+;t=(vDIlA26S*QN!-2e@92PK-JRSE;YhLcPMD_Yru3KErOhfT-l^+ukiAC5F z0h(SVgnny_NqW8;UsN)68ls}er9_-hrL}8+eLlLUGu)m6^dbf%vq@)s(vGYvAY+dd z!6RZfwcs?D;#b*1tx)1kERW_f`s%$VB+i8nnxdvlKJEn`2$XOTH*)EgUD76Q$;w=_ zZKmu`J-DWB2^ns0t1n|=3(`HwJ}|n3po-46Au$o2!1`2eYI;cb{WBd)UNr0N}xo+2z%?*|$O3}ehgJO(vNRefH} zO3<%F4^sXzu@60qft^{&7-HDKFAYwQ;`GCd_YkP?!KtX;6eS2NLA&I9sFL?=r`58a zF?0M$SudGUfcQZOv0pC6w``9O_p^q)wIa``b|saq6?AMB-7V*MA zMp0$L0uP>V1E@r>&j_~>OJoUd$CST*ZiKBfNyqS#&VIJ)py}2@#ZNOmPA^I&jwU(r zV(BGnjR*;9o;mX7lss9x4Bf#-+Z=ei80t;A$gqkDRpd_pj0C$~I!W?@s&vSk(A~v9 z&^)MQukVeH7xV}3q1jv$&12Z*iXXz(+-VliYBFCQPH_t~PhwjcoSUQZHDwySC^lQv zTsQ|$qM0pvuBVoWy!l9ro-G5V*v!JJ_XwZ~ksGwyJxoDqZ|u~5*VV~8pN{EOxEG_r zok|xGYzT3D^RZh_Q7vtnH^&Ef0V#kqC9lm0Bdel&n#bH%yDx@jd52M37Mg5hF(WN1 zFAZEQy#`NOc3H)3m}<|b~7%y z;yLeFAKwVlqVR7>BIKR7 zY;$kx<%scjj4Z_Ow>mTtl6%faJDy5o$A+7P)(||I&K(zJW4p3kYpE)HOfr-~NpCtb zX=6M|r>}4}6+2^xOvg3wz8r`Rkj{|l5uh-~#@yi#6hD&e4EvI9S{wkY^Ar6D1`Y}$ z-a~4~1Z}PJiEB=Tl12<`)k5=;R=d^=DyVK9lRw4R^EwGHvnisq@rD{Qt*P~gdH4C= z75aq+>{7NKAG`>3#3W&(33bj5VJO*)76bud#14b^rn);HJKv)XP&BNP20T4B>B78x zE@pzrwu8KGe_?3Ng^h}2>DLk-WP-|1oysai!U=qx>fsJLJAI|sGXDcqxphO0GiNjJ zbgR9GQQ%y23Q8}dZB1rl*{Qcc`=q1WT1)vZ%Vxg^c_WaVr6h{Nrv;$?9gJ<9;yTn9R>tir*w&KRb^ddRFDM9PGhiq}^1ziE1q88?t{{53) z2y+&%`8!&@G2@3?YJA|Aysk8|a*yAH#iJF@A%`T9PqBr>eR!12J!;N9mN8n-q7<#y zGp3!QsII-RPm2&}Z~bUdqpuiEU}3)pTGr`1WqI?1=Rh$ZXUsPqEYsj;E1~yD{jg3h zR@iaZCY9_!q+w4A!+_YnDfD{Wx3+i;FQ(u@`1qqmlCpC9Y^>PwA2;Dm)*Fo698-$C z-ec#Mq7R=YTWm)$Ge(UO%W{uVip6-w>fshF)CY;34F!79{LoTHAt4am1u-5H3VBUuCb zJ|54!z`kaU#h9>khm=u&JY04YC8|-JX=a0B|~D1T8J69 zp93!)0~D)B(aX=KLw%hG&+t*SbsI<(To}<8i(7%;OCkm-a|^RxeC09-^^Q~zJJ1+F z>9B4mI^Ric2Bl8vth|oG4rqFbbe?uODjJPzXFKNZTT4EB>JQqjgt7yO4q9Fo(wv2+ zJVuc$#2e$^ELc2iuX3E6tRrW+v8h|OJKNVrs#&P1aXGsBzS_gCVYzD1kBds@Jq14x zzvL9RnU$XWd5Zh#TZ~f4jsR8%;*(^?2)o@Vn%Vv>`{(f(g`zs%qnD^L`i>ovQjaR@ zm2r4XUw=-v<7w-kMgTCz(@WvYb69$Z$i=^>EBH$LaDppCP8uil9ryH#l`2EmCq{X9 z_y&w`%2OhnPM74>9Eo~&;K^Ct>Be5+%#FI*G0gpXt#|K^hR;B5h$>m-Ja)4C4mw#Q z;d$?UJAN8r{Q1Oi2zK4i+?{plDh4% ziF-QMl5Q7Mf_aC~_9PB?qJccVdFb{ADwj{p0X1B}ruDYZ?kiP&{+H1!>kBUEYmcXcw!r1^)FR(UxCmb}rmQl}zM zUQ23ao2(CeL4n{=s3U>dl}B8(=HP;?Q1k1VCj6`ZFk9udd%cnb)k7#bw3W|L!{amSMmNOj)(zVWtjvs%B%HDup5iyCIWwXVXz2Ibu736{pEI60Ft0;!u}>i#a~voktUxEx>B*v zExgsb$LeJn-Cy=ppaRS(4mQX^*gm1Auqv6b)z0PSJ6Tg8JR}O7!(m2E$PblsNz~7H zZZ%UTa>9i%+yx5@dz7wN;7B*6`$(GbJ~k|Kp|#dpPnW;whec`XQ^Quo(y9VPQAV^2 zZHM{G`WZ3GEZ}=ESqapnJGvTcqE*f}iLWddfvDxnFkfDzyRz%D&|~3tj=2|f)2JAn z%PRQ#RrCMO%PA98V1M6W^!K5KTXved4hYw;?{znqk?Tl@4T;p-)tm&r~T-o ztWD&GPE{~%N7Gk*T6QN2Ve~dKbFqxIgpd8zO{E8YE80CbYY{4FsB-cFVLT7MRf>(i1o|yWWoK7%XQgK&?<| zo2v^Z(Vy41rD7~^QtE*0cS%cqEF}shn`PYeAWO~6Y0jLTaFNOLq5r(rtQ}M9yWznu zaVDD8R4>VKnPN*;I!SU(g6ipvHc*J>)#}FP?dj)-*eq1LH|iM5w-EHujsANPLU*>0 z(wC#0veFd2x`GOiUgbq40-lWiSNIZ)=fIees^Ogp_(!miOd@QzVyIQBT&S|m;PVPa|p z^>jwfUl{*mw)SwCjq0cFJO~%3Nq=Q)Zq^nS?rxwzx<7(ea`GxXZ37OiG9L!k-oC(M zRMwI$Es&eL=C;vqK3Yges=PvL$btMg_6q7N7f4A&gNWYTw$iK;{J5*oF~^-yw$tH% zZKl+NZlE|*xm)u#PEc9Nf30|9gj{!u!Q0o^|2~a4tER3Pf~qmpRrdREM@h^+I{Qve_dNI5a4|dg(uO zM4A}Yk@#6k!gC|<#gs>46-hGD5AL@l7-+CTrr@;6JW1qI4JM`QleABcH$BW^Q3<;d zLAk?E;{Ch94=L$~)bF<-fHXjrd`cxuf@0t3y)Gm*e4!t_I*cKVGB?nE zdXj)$@?*TMx~%}b9s9Td^xJs;^=WozQqgtG9hU}n=Z5_1uVSSno70bYJj?A~Qb#a! zkw(-J9EI)^KVRea>??w43s!$FbsD0$8Z=^vVScc0iss1bvY9x5_odhK&T`4@RS|aY ze$xB~3{m4mNE*S`uK&Z{y!SN{(LZN4NhhM+?QpbU!1Er$f1$?B+SJv;T+7|n#?k7J z1jjtC*A}j2%N_^chknks6An(rXMCl(%L7!9FabN61vhCn9ZcT`7)rB1hAu6)xgBqE z4Ob^; zHxSs#)#BgPoBv5)a1{6_8msql6TV+ny~R;pm7k}>lfi0x!t8q&U%W&H4;xS< zBCRebMjGmDGNt&SW>>e!-JI09?dAE;4y%Ro#E7z8k;dkj-quwS_G;a&lMr=;1W&gS zUSb{cDqAl19S1y=S~@%};uu}?1ZmzHoIy-T-oZnrTrlYAJ)H1Y@XsJ zy~%JyQp3ds;GYp`>g@bKK*ERY-z_^))oJmkevfoPgSAW{ysUv0Tv?zXa#m}Gu;Ff{ zRt(O6#~4|e0G(HVmoU5F`6Y%1^g_I#^eE0K>zwq1@AJqldJ-~`P4)wvSD(|2ow_2U zNexk|ymTW85?khJBuX955Eu*~9C|`*vi+1gnu#{GNcxa>%J7OBj!@AFZGe$MD23CC zh&zYV>8;vE!j!h|a@d>vbjs(CVKp{e<2~}AEmlU8Dr#x#Dg?Ef*~w3`>RsDVvS<%z z*$3)4RD*{*yeUFn)++^DNfRmACAL<1?>)%#?W{3U{6VG6VWt5FwF!;;K68bpG<@~l z(}+$_E(p-kYNgMBoI+|rq4r9HE!YE3Vc$+t$Yq%xCYi9st{aNR9a?o#qOi6QMcRQY!NH&baMm(1;drSGbW5c$HYHM!~za0i@)BO z9{duE5fC}yLF1pR}KGX8=2hw(4P@ORVSrG{Uohj85;e&64PhuJ^0^tg1>wDeJ}9K%M5%&34e^g?F@eR@YewQ%N_veAp-#Z7H)qx|Lbb_ gXLC);Kbik~ZPZXkfzK`g@Bsb^f=6^pnxEhP4^`sj;Q#;t literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_converters.ods b/pandas/tests/io/data/excel/test_converters.ods new file mode 100644 index 0000000000000000000000000000000000000000..0216fb16311d8817a64e1dcd2c648fa07c6cc0db GIT binary patch literal 3287 zcmZ`+2{_bS8=veT6$Tl)}j2XKOvdeY}MM9YAV&YcfB7+baCTSvM ztqDm+WQoZ(!Z-S!@9F!x_dDk~&pH49d4BIX&pGe=dw<8lmXV1A@b4lGNDtD_0?|K? z8XLU@`v)T;V!{xxu&^M1I4r_HG(%&oQU_l;v2@Y=Ls7T7lCQNDUP+#mj$Yw5{N_aE=_5`LWxEFiN8ay*irU_| zyD{44wDWXya>=~#KBdVkAb!~JA;R334C^VMbS?%WKQ>q@z}sXl4lYn>@hwHwHq`yZ zG6N-!8`XW~6PsCQYqKf~oXL>8lrM%i+Hj${5S1JC-7HBiQu|Cb9=#k63DQRHs?YPZ z3lOQU$uj(ijJ2epE5k7dkjM=nD z%T~-5eR3B%9Nj*1DZ2rSP=sW}D3j=!R2rt=NFA)p(OEyo*4ip6%O0<&#@rshsx*w(mma^$EIclv$D1Fdr+!nRIwVPvfSX~weAufu(A2gV}uZ?qynrv!cLAf z?w;5});uOCmEItNp(QpxLgMap$bQ`|FOy64#`imDYc^`6wDU>C&1LeO)=_@*gaIlI z8%rP1&FdFJyERLMpt{#xw)}LMp@G{OYcDO49^M~Jvt1a0!{^zQJ!cknLG4nKp0^wq z6gh{!yJ*`5J~8wC(z1<4KQA4z{J@y)%(Ca!8hHB}#z{7j)!Iiow@gVr*2>O}aCv9z z%awdPflKxMNFor~RB$aGqbVDYQ^&4^+*xZ`Z)J~#X$%Um=Sv|!wz65E4znL5>>;HG zdw>`2enov+oFE?037+NS-_sb<7Wb$Wfs#Q|G-C~ec!xi|wwlDpQHr&}kgi9?t3Hxo z2>h?b^ZaGQKEiz8FZJYY$x_r;Fv}mG%XtsY6&%om{qT?x@V_)rh86jGl?ec-76Jf( zbQ*w%hD03k#t{#=k@N5A@Fdu~91xzj=d1?@k+xhBw`)nZ-a5MgcQ%YivZBrLxo7r3 zX|W*n-#5LYB>j4a-Yl-7+r~C1uWU)@1|fZENKaK^`=`7EY`OW%^_vHK+hQIuryho# zfc0v&ZQ4O%FGoE)Y@WS>$YU6S5Yq;QBJX_EW=i9*g zMLABYaVZB;yOyV}#;n-DJ8-JiYXQMzV$ss*43+ex)qwBGa&}Fy^uWNp`da&rm4*Pi zFSZn3X!;mKep)ZP*RXjLJYTVP2cZu_!a-b#_1<;+OSPBy1#dM%z1dvdt%&z?0$i)4 zau{dc`B}By$JJ%4=d*$eu-qq8qJR|RDPsae;(fi^;S@%uF%*Mu% zeM`>3n?t4(Wxcr#goj`tFR3`;HoQW)Uc;hosCl|ftC!h%+#Ed|dpJmBG#TgxLHgb* zl)pA+jyy1a&^Z4{i2@~tyH<)k82R+fuRS(1{MeN9gEW$3e@{pG4(q#-VxW&#Ac@61 zC#J-5=>dio-s)wfa=uz(L#)!1CxapS4#PB|W7pRJ%KD-5AV&1#pPxy%5A!Y@vo@MZ z@<7_`E%}U7;z9%slL=dCrH`#(r+$Z#bJtU1C0f_+dZtf}dTu$PFpzR_DHL?TU((BJ z#KF8OPYvw*S`9Bfhhwi3xSqn#Qf0`Hu9iG{;L6bKKBZID=D89RVW4YUlDZ@lQwdqo zVJY=F$HB6agFNq4#XH~EnPLepwfQJI|ErCl0O6WPH#dOq>Vz1;|WzHHQkYXo?yCBa3 ztuNZ{{{?5E1S~|_|Ft-r);vyVt;(K+f$=^ATokR^i*kHg_uYos()3C|HIBZt=)w$F z#tBvn^4kg1isH3okY9FFlV}RZ{A{MP5XA-jq06AGr&v(np)*&09{8HineZyIL9K!1 zq{Sd|cXjPT!hF!&gw(8*5v)|dk@OmaOMr_f$xcgQ8@Z3KwezJ8uP!3>JzX@dvW0o9 zF?X!T_yT_eQL*E^0{zfUCT`#hPQy+}!2NI-0Q!{w>DdsYcjJhS==XJn3(NPD6JhFi zJrN2OBe0-Sx4NtCt5XtF!z%49l&SfT%arJ0&_Op*`iapK27hrGlunL8TyT*1r4aMp zM|&TR33qTC#v|uAo({9hb2Dx)Gcum~&ZOU}5qTEOdHTgVEpddc3nk1BvR-(Ni|E5E zK&fR{9>#_36Gh7Jv4}oDjONq}Oj7>?pX8TR@u5=}7_5C}kE`^qWMq!Tji|3MIH8NM zP)jzeg$Nsvw;$~WtbeL}F!%0FPhGV}C~}5!`_*HS43h?$Cw-8t1@SctzL0{w%jYVyni7}bwkZKqEYqD`V2 zmF53fj!oGDbdPJXcQ9@mpQ6$Q0xo=8 z+vbV&1J@9@6035!pBCseK_olWK#vmv&Oe?iD!O6EVClCmuOa}r>iM|6_qD>R|9BLdJtV!zV*mh| zeiT1cAR+>G#M3!WQ2U;f0tve7CR$IOQD3U%f`#DU2t?G!QKBwG%k4fNrP1I+Z(38o z(QcoS)OJ|8*Q%}oEqPoi@N@xDz{)4BkW{1mtvR+!S26q#v6oHS4i$`g_!?DJFj)%> zxa}GtrY0HMMO7>od2#N6IW(|%-cH|dhee#4@>ZqTz1!|>keiWR?4!f#xYdWQSsV#5 z&k$)r?XEAHq5Mj0sL##@`9yXz=hq#sl)dvaD^cqpF(w;F0bf%`tto0HPGF3>vbPec z@9UPDTogwhw+G|L-}aht3*M5=^^XvrUSmduzi9fbwOz5f-4Kr*!k&=R*AyElbo9P* z!wT4wWsNGm9?MneR9~`Hfxj8T{rI?`c*easQ+9I0C&{G(d3UyJnr&dsB9L424)*&Y zU);94qfhA^-c3%wz0J>EQb5yS6(Hh-(LTXiK^Th&HxxUZCxHrt!^c#WX|eKpzsZ$& zBbJkl2c!abvNwNT%`pa*AHtb_SAf4u^rOE&%YOvBgY6OO|BLXS2nc=nQtp0oYKLTndNAPnO;xQownjg HTmb$B5394} literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_converters.xls b/pandas/tests/io/data/excel/test_converters.xls new file mode 100644 index 0000000000000000000000000000000000000000..c0aa9d903adadade636e3abc1420a2e563b23527 GIT binary patch literal 6144 zcmeHLU2IfE6h5=vEw?{(w}logP}eH7w1wE<4~9UuEnCG@F>Q&)kVsp$P+7WI*90Hb zMd5)&5(qKAkSIyn`0|VuI2Dwmf zArD-0#>zGHViA1+*L)FR0pu4u^({25g`wu@fi8*RFD^j|%Mj{ec^<2sf2l&mxJ~7w z={_&=k)KUw!Tz)dX7d%>-TvqK&2zi}$hQA^-gEz#0HwfUpbYQ>NC8p~Q~*nWr9dT6 z1uO%W1GfRyzzSd`unMRFZU=(E9RSC3r&E(&1G)~V2i7|LMyH;h=>l~#JWym|6$^dx z>Ef?>9ba!Wq2C-Sap6>PBY>#33*G7>n1R=$h1qZL}gI=a1IVi zGa~mZNTgm;2~a-Msx6e7Piu|0R7RL%9}GVl?rQC<@m9+j?w#76t{vgpP3fwmwX-{1 zJE1#kM1D^NF@|&*4U*5HEC09r`1EHs`CNSVnoa)y%O8PHgPfjI(td8V{lJjXlIE|? z!EegJZ^*$f%E}kSiH?Vdv&-%jpO>^5{4@WMKk0#TLQ<>)i2|+56NOq=B)nQLNi5QO zX~LJvl-azd^kvE07!uQ0zf}ohoolP3Op0 zly_L4Yo_!swbZ%j`YAfuverePmRgKvH|%gVJh0YeO3v=W|3pi=HnIaP#qxqqTg=g7 zW^5D>v$uWD+Zo^T(DS65;4c*87xMU$$>XYK^6Z<#OkKuFrW`}b&AX=e3i~D@V!s)9 zns)6skNQ3RR)9C!`W*KP%J?-``DGF>l}dY5&p@gQmPB5}s&d4BwPe3}99iQjL>nd9 zAN%#oA4sTv`3WkOs%Q6Cpi&$?d*IA44k)k(FXWb~Rj*3%mlLHZ&6vTx3jX$~&THq_ zTq;>_w%G0Iad4(S)n)ptUs+OKUuyxu-*sr0$_21AwCzdhKFdt7{M1>QW^5qHh}ioxx# z0Nt?1fJKjqjOtT5cxQZMuzxuAY-}JN3?J-|M1v39zpkgZFA|MLbjjNW2L|l4Z|ltY z3GdoQ=d5WfT!$aH2Qv#-ZQ{rqzh2$GH}K90OB(Aw`hlw#Km+g4Y(D@{^B}+oJPa^y zj{{t}Hvul(y8xr(6M*aRCBTLG9^eX00KA`H0eHy$3DAbuncUCU>^nDYvhe=xrfKWp z$iP6hHn*Ic&eY*NIKL1(aY`snpeCJoX7a?@@pDG$p(oxtdj6crnO;_LB3Z5^6w-dbO0LMlI0P1qix%g;aD2@ki=I?*?f5slp=4i&c1}v(_@%3R? z_Tk3O?-2X4iQkY{fz{@EbKj&-r5h?Zjcv|>W~3c<_hT<^#~7Z@M|Fk_?Q=e~%`&4g z$4f2v+nhE7yHSbcker>(3};)mpR;R-+yx;X?kqm(mgg}4{rKg>Z$I1c6=*=$gq%2j z1;xP0UFseCJP-1n|7ysFoWsN?TVbJJ#}LskP;(p4#(y#Y2H211p3v K%c3wJ`+oz*DjZS( literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_converters.xlsb b/pandas/tests/io/data/excel/test_converters.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..c39c33d8dd94f0a1928bd1658333625825d32d77 GIT binary patch literal 7810 zcmeHMhd*2G+YgGUh)wMTv8qOqnzcuViqV?2gA%l&w%SGQUA3vbsZx7{+G>y1s8O|5 zt5l6Q?K3>}^!*Ed&&lULC+BnL`sUo%eSNQW5I6A7Q2_`5KmY&$1xyCmnfc%V0A}X^ z07?K5S6|-V&Jk(nXr$$OABn!p=VFV3WS+z2N(bO#fB)a{KdgaRt)5$L0%Y}UpGc!h zxtw}%pd&8`xhQD!6v!>dXN1+6-tN@Gj@;oZ@`$Q@hv2vgp-Q2dJ}V7vj!Bv3p~+B^ zZ{9HDxZ&)(7V9?0&#qRS%X*6e+< z>qaODSHA2lR#h#pmKX^i3v-c=LQ|PX zilJUuex97*0T911SKfD_eGhAnH?hPd#xmE)0f|EM^Zhvg$Jqa2n*L+yq0s|a#)3Mw zRO;myzpvV<05huUAkU}Ccu419b_`WvS?n@?5ZQFX)k`%+!+W`-;-45 z#Z>mh-Dy?e4$P2Ob4w3@pcx-=;n-TH;f@7euD8ysw)!?Nvv`zX?A`C&EpIYFXyB)RYRwq?bdGk$$mA&{;XBaSgd`<6%f!QH3R4VJz&grdP* z1ETHyJV6rf#F_Mg&kv=Y^VY`Z;7|9MrHmpYF=h$O5FRE5b9^IFcN?kBo}EJ2l32Sh z=PprqK8$O_QqA>UI`9-^HLW_)&2|Fjr`@TS5R6RHkr!tv)iyw~Y!vLZn=Lh7sWvd%kM zA&u%r(dy)Ic(A+kN%#rIZoB^SG+JCbbvgNS{Z_Y*o{tv|ncVjL7hrZZGh4~u zh7z1Rfr#)l!$ByO7N(@-}RO&8@_DndAQ8nQA-%FWz-qStw7NY+OdaHGC%rn(Ln?F6`s`b^{pt0iS#+Hn0dM}OwT;sWF$JUp>g)eo^HAf=V-eYz9Z#&CS z=XlzJRXQVf1K3XO3~hx(I->t{RX=Y8|8`WUffj2lK?&x*r*9cM8YO=b=m^rBFp0ET zVV0?k!wAQRA+PUz-L>7A_>{0OopJmk;Py0ENCYa~Mh}63iw{)#Z1s$3UGyrl!HW!M!4HQ%;N@%fT^Un`jEwvF@$BWmPH~bM@s$Qhx*i zNG*px#YeIhJc3l)-C>b%Ny@_H3&RPRa=YH1Xf+h4r3!AbNlX<4xaOG7Ejv$$hMD+Jm8Sn zQz&|g&i&mquh?^#jn4DIy%~?sGsAmILrWL;*j5=A{3&Uw-|b{aug$q&I@nWRzI)sD zDC51jLApyQSIC2&yT+S_lSkWrJyy#8qOIVG*B*pb?+`(gx5_(}1w!{>@HRn%q%w9n z+*>jC9wetzZZiy;W@{BtvniO42dkg6v%kq^YEcm@8?a%174^R{1M?Kpo1| zB9@n>dO3F`XA7&)C+B6_IZCv*M%}93rQxOz0`EL?81y_yvB+Js)FFvw*7I;)o3@^8 zwmB|jNPgRRNauoo{m7kR|B*Vfkpj~KrHzYSVA~o&m!P1vc3&YCNxs9MUDJr#xZQ8S5pZ2QL#J!}xZ;r;4 zuDg^hc%7_N&OSOC0{gd@WNz=Q<$6ha9qc3xoE*Ln#-F+MCIie84MZ=CUwvR#4jjwL zG`{>guq#OyB(DG>rt-B9SxW_y%gO zbyx+$#sCH}=zHYM{R{b?JLV+|c2GxDl7vio)YOazLHBhOpC1{6u)PKqv2eQ}O?Z7r zxWLPzO+KiS0cj_fNwTUxIzg04p$JMBEuR&6N6DStx1JYu4mzrY0&j{kDcKfo&YMvZ zT{)LYvm<{qOu$eP?ZcYX1AUUmyzR*@$I#WnO1DzOOdGkvozuG#Cb$ewVjwEgzHuNz zM(aLB`jPc=R4_ytp7uRRkZBUBEE|-WNFo7wkY^@6uZ+m-eAVSo@~F92LLpV28Bs4X z?F@tudA=ljM{swPyZF3^;53=1WNnoVvM6ik zuN&%$wJgbfmnaC9@Df4oUi#$7%vAQ^0Hep(lRK;>DN0fJDLO5pZTPXuVAa7YH?MQ) z#I4ccooMeDo|F_*<{ZHV0ScaCTz=0}`=C}d6xK-l0L9_fOz1SfW>pJr-jc4*Q4C&P zN&K3-F|N`!B7lpzM>0F#&_Wyb+1A2MUm)zfcORw#_fDeS^%DIAPVI#FDY478k#D;D z*fUtO+%Av~Ua%bHRvwUFP(@E0WRIkC%4dA=8`2xhyK)@Xu81aaTAPwFyUT60p8VpS z-e_PnU50K?N^^`fqmob_6W?A#(5?5olPc^hBwpmmM3{jiIq9(X#ldT%5LR4HFigEC zu4iCa#w;89ME@18?{t@A5(V} zRC|BD8!f)d8{y*=UrdKv5WsHZAE-HLyqiuxExbj~9awyK55-0&rYfv0HH@I16xrah z=q*Fx*_7Ao#Ek1)k_SD@ao*Re9fwuF8mX=Fl$(J;=3EbX@9r^@$Y#BK)|<{jXWv~m zvW^;AXX?REmEDk3mD4<$8rsl%x5I3b0w&-e`PO3BU~OVm@lLxyrBIFKV4TRwg%p!T zyydg3bwBR<{?|yz1R2RlDtjDBjpAm=`j|JHWol&n_w>c;hV*&D0FAaS>iPn09^pf$ zsyAPctY0lB+?c+#1Qn(sS3*i3tq%q6sXIn_dZ;fjuo!19U97&rMnj!=(5#__86|w` zfc#E)A<+AyWZvzAKI=-&i^7E)k6PB8=mRVeYgDyW=`$P7cuj-r(vB%+_+O_+n+td8 z1N6jz=M9V5SFSt@sMXGY(co4$_<4%7(iT)C85hBe&Fk82_E@lC?BAiDuEEY3ferJq zcmTkKe}($fAn&2s@T1exSoxWgJ|hUfTtHR{)9I~(iE)$(dJR;eYUxnJR~t%M$9+2JiX|w#qP^o&-o0K_iLDy_GMO3vE#Ew2vaZrfggHWpGN|1~}3W_Z(9!sc3 zFukZiOvO0(QD4&u*5ro1&L4?f(5g~g6s2D4H0|dGzR^V3oPU^+j5}DW>oY00*2-f{ zsJcys=~cq#`JSBId?}qd&7)tbaj^m28`8sE_vPGr4N)rYRXo9T$cAYxj~EW#x+7Ci zZ}&pX8ob#uybdU2lJ@9B&mei)uKQ3vhTt>i3w^S_zga zC_TPW%a=LnBFw;Va6I-(YBj=?)9Sd1AxHYRyFjfFIerH_)t)+l3ARAHkb+Gy<2gfASk^K;?Tru)|tI3O4H|#}=mM z_GSnN`}=5qGkXW*pN-f5>QSufJfe-%`~;}lXH^fu&j%_rfFTsd!Ox`jIR))PI;wLT z$}A8nZ^Jg;>erI!3Sa#?gZ7eIez}4&(w+gw(uDPrC7$cV>!Rl~Ps|u~*qmimW8e`h zrYE8k_g1PrJm5jvw$kI<7u=Jj!2|O& zk|JCwrLkj^?}F7HBEDVut&u*4N~;TDXMWVk%-Hc~75ry)f-MhC99*y2nIm0(RwqNz z#sFM=NIb4!r1xVVsmN+dp4Lup?^>DiN04J6COi}b3h~(&B2z<}VzGY12_nKvkSAD9 z&|p0z**`g9a{vB+l=z{uKaS*RS^KvFpnxsBuPmTx2C-=b$fqdN>dKcg6P$S`%R4z{ z85QP1MN#YHY86rKlTK4%i|#H53fHEY-Lyk5bP*T@?-o$uyHV1w?ZvqiG#PkO;0x{( z7;NhVvqjg8Zq4S~g7B1Hswh6$)(Q@Holv=!)5dHSXPy=d$1salJ>ZvrtLI1BQos9A z*Dzrz413Z&LfJE!n2v=zsGoqJ$+!Q){-uZ?-AMH#cCkfPv zJW3pMz4U|@()%>-0#t81`F^FXUCtB5h{#bk=#7>1vN|IE%YJciFJa@z z?_a3>wO@Z-fAe|`apPBjU;Fxh3I4d&VoC9jF8^7dy-q*G!1^FM`88%t+J&o)9%MSZcl{?+mPp@!3p>1Y4;Ea2I`!YQCX z=qKRWF2h-Xv-#R7fGxI9@#_B*G%D59sqEs1_1t* zH=Gqe>%&h0AlNMQ*JYgb=x4Ehb+eVT(_JHO;A2e>003b>OjzqxWB76R Ef8hk^1^@s6 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_converters.xlsm b/pandas/tests/io/data/excel/test_converters.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..eaf0b1d0219c56b646556c52b931bf78c74fc8c0 GIT binary patch literal 8467 zcmeHMg68!<0VPF{9*Gf7u$kMgOF+uFVk4yr<#wR!3%m{E;tsyK;F8p;?Bx+%%cFaVg zB;e8jtwXLr9V=mAh1e4AtmzlJ7u4s}wN*aFQ@w`rLW#T`%Bn&N7QNx|(V3^%{*Q{> zZF$2L0}&5Vn4rZ`{@W|VfY^b#9V!}n_f)nvqZ|DGy`51%gDI@W2db*-Yh(<{^N|KU z@Km=^R$rz$1A_SzU+mmrOuCXLl-K)q!Ud)MsWJJB+C?~zKEHXeV`WnWV1S!-EsHr+gXSe>ypzKp>~;DXZL0uj~Kp z-e>%#*g_`C`7Tx}0$qw=RHc9p|I0!6yhQ9~KmE5a0+rE(#Nv$2o>gJ#7cTBV4i?um zMVCrMFUVu^Wb#|4l8P6bM@KwIMN8QW)qzDu#ZLzcbp)f_4=6B+imAfxNM;5azEd|? zFh8qCnUvGnsS2xoC7i#RJenOaomR4eD;*)KwmqIn(hoC#GF$E2??8WYPNAn`BWhb~ zp64pY;ALXz@a9M%yNl?`mrEn3UzkksZ!L&+mV{NXgOm_~Dc)1l1|bYj5Pe$EhIR z7gVB?Zg5!?A(}Ul!=CtUr;(!(R1EW-Dw-gr+eC!hn_Cq-*mzkmqE@M-e$G5%k*{ZL z%;e#PB@7D3t>a+=b{}}$`RrgHsRxmm8ERa4b`hepT@}>$>J?oUyY=c#t33Q0`tu+! z+OZ>Y%zF~JUVyRn33~8Z_uX$8hp#WK)xAF0cNbpZtWs5|(!3Ou^bK>P3-4E=4PZ}# ztrldq>+K=jh+&Dm8(~8M=jeJEjXA|XuZFT=CK?SiHE4}7qIny2`86zvciHE_L1?uv z3LeK?Mb*)X@km%!1EVviym}bh{DZ|s4`9yO02SOf>|4fAi3C-QOo@$B#yZj-o@0`aVA~Xi zcy>4O%CiZPc#SzpS$Ih`J*qJ(M%WhQ?swk2RrkKDQ{WXrTO36PZj+hbLBv0!y*LdX z&lstxi6*x?fw1pNCVo7WeXm4}K#M0`v${{b!6I~`p|qc-)B+P?2$!C{G3iv?ZvXH? zYz3tge-IV#Zb6mta(Xn0(_6`SYVy~4XV@a#e90OiY89mYoC;H%E3X}gT=O8&t^fE9dVDNlrK@0tRejn+u=czH&d>w2~ zJkotq!E+vU1;Xcq7Ky_6wrXI*w8A^v+r=*I`+WyY9#mm9w)b5>ce+877E;bq<$9bf zrG#J?>dstRmu^MT%UZRcf~KT@363&s%2LP8)fV~$_G3G7 z_y|`)`ja0pGOmMLv@O-@9Xkm?sR-Q2v&RR3N=s~#fl zik#N8GOnrU)N*qwONEJ~o$4(bhsek`#}dijHcHkid;ByvuFgq8pZ3+fSGFtBp9HKt z(9B%UqRlu``5rw*GSB^Lfg1tEJiRf6qrMgX<$R-qpbwML0f0D)AH~3LWN@>waXi8z?PSOi$9GK^&y|n}60Nuyo34EJ{eyeDutjQ|AJ}Z%Ypq-aqE6L)8 zR#uuZR>z6f96I4T^Ob$Gx~)!s*GJZo8OEwrXcm-?%bRw;OizuBDhb@`bEdZi4|NTP^;Dw8z1zlDB0FLBj&e z1@PncD(L8F9R*u^Fw!h5l`Q9{(Y>J+0&{0U|9@f0pxdtU$iDg)5jk_ zBZ38e3&%ISa8YYyK+FROincGn6K7F4iRIh%IFg0K4%xyqN{ChhZpC(zyxKzsp;g0} z-C8#Q0JBIrp%)ZQo|g}@bctXSRgP>>Edent;4sI?!;1CEbvO4-t|i=yC4!oH#=TXI z--Of%-jl+kEw*Ht+YL9JoTP(1_q#cqC0bi;c+ZNTmnU(yv4ZGH21;8u$Ha-VWo}ZN z_}%I8Toc@VPiUfAIYJdY%#cb#ehJ zwGv188;-(!6KZER}TJsRK_53 z#FES`_^W#qVPJ_!_uItL8Txl=v-2#y`&%^R#W4+$&QPM#=!i*Xc~?^=?J?W%UV`+{ zSh|q8{gDN((xEcPDM?>T21KaLgq(jwIpW=r4Y!s9zo-K5Bz0%Bf+b^Xm$H&r_2_K{ z?8wtqx&9~ z^`1#oXHGM+?M7pw;ia45ggAR$r=fUhkuTA%G~1G-fM&JD(9`VB5^Q&$!w&^$WD4i>%Q{x=z{1f>e2Z*kEw)j* zw5(QN7#Rw#Fh)!xP4pHuAW(uK&1kk znzIUz8OI15YJsT7GlQ?^rZpy^UiW0aU9mDXcB>J(V@JNL2NmplGEEGB2)#S>wt#&E zBHtAAWspS1k%Z)-v}o<7U3+JGF4~(CtossaE`xZt)5cP}MW>aWTv;ah0e6UoaM&2; zlO73cXnx>UZF&&VC@ZHboE(xC2BKJ&5I-qQK8kU#z$lm1Y%@O>A{L++5eC__Gn6(4 zvb1S`>=lK!%g%>?ssm*;&0}s~BKY!Tcb!{z@48{Vf5Mn(4Y}N68{%9o2!EO)`Y2MP z1f3To;?5QcugULRN7#qb*2Wc z)YLnj1`g#p;2{>`-@X5Ul3OZFP?WJ)rUHz^3`fnk`#QoKNHmg~KyvQ+l4rHPf|mUy z)!-Z`_byuvTU*8xXXXO1*d}xpb%*;ozlhs>C{V}1^u~QsvRzko)V9E2LQP)Ja}g01>u z_DMQQj?&oO>w~*ZHGa<`)0Lp4ik#g1CxOM45f%76lXGA3dGJuiT1h9+tGp{HQw&c& zcZxPQcKH!laP--30o-T)MiOy@|~ zr3oaCOKc)n=i-X!;uz^r?UWM1q?I3E-cJ=p5i!vt0k6Z-bOJU} zwbT0bSKbPgX;lemRRv?WW^NJYEQ1ueH&pd*#R@++wED0s_1Q#pYnwIh@{9E>q}xC3 z?8@3en_cuXk$d?tP0OnfzBM$cf}S2jK*n6;iP^ot5(hjFpUXz*?w^d6e~5I5!% z@JM9?L(oAktbGZ047Y4+UrYsx-^GXqiaCUGGUkYrRKrWt51D;G+xQ3QU{8q0^dpwd=Y(xL+9p+n!#)>mqIOX zt`QfT)dmD$su2pj^qk$~9i2xse(W?jR9Cw=w|3}HZ3XbSzwqoIexf^8wBS2nS^hLL zw9I1|qjXdu5)b^m+_g-D?oZYh5W-A5ibIsb-#Tqq^YJ4#L-#)Ey)z%?%VlZK?|8o| zZ?qxnZC6M(_#xRq_B$JXls7OND=Rk`_`f!pZN=MM8 zV#ln)rlH+4#w8{ymQ;EC+w>kleX}Jt<-!z&|FzcTENDU=*lw`C1H!tH2Creh8pexkAq%3&C4rvIk)?h~Tbl%VfoA&9?s zI^jE%X5=k3lft4ybTgM!6BoSsx}3`qj;>^caU4hW-2^e?xh;ls$EhO!QFvBgGsjGd zShluP@D1+uqF|;c1}B0R$eHheanszRNwF2_qlhR?uQvC@*${_eyI zrHf0|q@I=rb2e*s>yD0t52yz_1J8$M6L7!Zn+Thxz|?P1b@B;3%%yjY?e2d!zof-EWRjtB@{9mQFT5ai$N)whxh;gE}lOXbV}6oz3`+UY+E9DmjvW zEb5aGCCnfnxW))Xb0j0R3KQBY_KBt{jx;T@L$h4Rz#2K)sz_$1iT!!^nf0hj_g&B%@#lXDu@$^Xxx;ohy+NsPd>d5sFOE-mP zaRUQDAsI)RtW78?t6U_HtqC_$DCCkwo-%T;j%)!Rn-pzpY=kc$#o(q_X)zH1r z7VGPBzbXrn+*<^nR+&p=iC6U@%uVAA#K<}G#GS^j%sUjk)`5wXXV8_J9yZKnrYPwP z63AB{BKa3mTo=;39#=EC)?04~o(!mq(C48DrAI}$J~5UCBA~_&d=>*@ZI=(ve_gIO zq*+RWEB26w3dAMP) zXSc-Zx`A=}s!}9SanTeen1j=|-9mmIpq_2YM=Lq?o*uK>oa^EoKCXtF;W zgd6@a;VSpb>pOiAy5WpeGJ3=P9OniHXvrLI@BJH`gGez27UpDA8jV%$_R1Zfjy$yT z7GmpQawaZrko^$ehEjWI{|%inB$72O)AQY|Rpk}`*ZVX?(EMVOQ$ohZLW=1(*1L~T8#r9rtS?MM?_~Pt>IVQAeZ8I z@fGD37g9_6b^H0x1^aXSMKfAU^`8#@*%$lM@W)t&WX0dQWIr4JvsdtU!x5wc_WySd ze)jXTfc!U4nE#TKe>VPEVEoIN8v8fnf60wMd-z$h`^y6^&MyVLpB?;MFa70UjPUor z@Sp1GXVagP%fCz)N&YkSf2Eo~d--R){mTacKu8V%{44tYZ2r$p@bBiYDgS2v$Cjw2 VdKG?|nOSUC%Smbv@_IoZq?6xqrVKYyco*CIJG0Bp+ag+9W58hVb3i z+Y#m?B}u%NCHLq9WEi9I_hJ)+{cBmSI#i)zu$iEHVdaTt~?z*?X(q}r5mdQFR>JlPdH!?&d;XbS>!^wnRN+tL-Yk1pq(XOTVb2$l08T( zxugfJjnU?}RHy6;x!*-Qj5-A0cek+x96QW!#}sdmIgjl>f<*7!e^ODrs1xn~{njZR zmEI|{yhMo|5gjSH2BzyhS4~WFIE~)Jr2O}SjesHpR&OR^yq>+iSrwn&Zs|FcRKyfJ4FF za8Dnp+n%110q(G8Sar7+X-0|(R1-SpRqf4=bVi4$v@fDqdXFDY%s;YTn)8C@CJy#X zB;+&Hc<^hhV{4=KMN_mg9C%f4TbUbSCbzi!mp)n@TYG?#z^W3$_#6Q?kc05buG=Ol zKPO0=)q3G0l?+VOaDfc+LaaklI3EeI=b29W=_OD7yy2;Ya)AO=B8t4(1?#vAz?8pL zpAI!%?)Pun9JAHhCc~?D7AvumO>e5MMl%ZK3(iirjdAlg95`1OFO-xGl%l*gWbeX; zbKPSzqxk9$$)c)@GaO4hhOX6H`vfg|1sx{so+1KNY2E{xCZz97`Zs`>P67mldpi;l zib}S1iaaI_ev~r{dCvMzx%UECJ;&`t7pF zxHiu@W?)5!Rf;&;Axk-viVJ5;*gg%&Jqe8P60aw2f_ z=@Lh$x94vAxdCg_{Uake@FYU6vr(Y|(p6@)gSXTd;W0Wh4Iih3zZh%6OeeOg@Wpp? z6q;eA!o2FkBe@&Rb=q?qxl6NoMfZPI1JD$_b)8U&)W0rQPC_*TU{e3IvOnD0%^nVS zBUa>}woWo6EL_?L_(K+*oi;s6XuL8i{&M-@E@`{*6t-0{d7$PvM3T#j+Hnb+g8jD9 z2-;eZf3y(OdWDvvB#$hrxeWnCg&(dgPjW!I^2R2d&m${%PnR~F84P37ld`PZP_~e? zM<=S;6kzm?ioap%BbSY(KCSEZWs!EVn3P!osy)QP?|sn670_>V^tSoI+0q~BsJgs) zlH_`CAnhl_rDQ2L-S=kHXEgcG4hnRZM`qtp^rz~#b>hJX(ZH!j%D^E@2Z9^xbs5@a zybuZ`(A@B2lMTYqwWPF@&dpkOq_j-+B6oFKz>O8op6f&&{{q&;zyH!AngVh=Gb9wXtG7Xm@P+p9;2 z;foPWkK|iF-{*NP;t5nTx@xF)eoeZ>==y-Zrv1ap8LDChUTcG|tcydPH)ZH$`CT!H zaX*II)~j|w&-K)+dF2XwIFO+#%6iQ%0etG-2uNg==6Y$-XRog8j1B50SDW6^(;iPe z>+bJ|R=9_)(h=Kpvi8}%8bVSUe{E0pf6^u{1s`WeM_(VQUpEL`dYd`g)yq{)_P z4<$&7Mu^+hM0p3_@GnkSDf5L3A!1BBx`*gt`-^axFrT6FvU)gUWyf^JUgMiJrQ^-2 ziRL7+t^)I%0Os7V*qf(?=xH_&1ATnrPPtc{OE$O z8`}9^`m2i(%`4r?f>RLLVrrOO34N|4OIxeESb%auo`t!cP$JT^=;%6ahU0}ow+sk5 zOFN};DikoYnDv-bVotIpg<7_q&eZmNc%|HDg(!u{9dS${_1D`?mok4wqGuXfDeLoR zgMT2N-q|C2;3V)W+HY_sOZubx9yN0p>%7n1y5|-QbE|Cm!lN9N!a^|@)kR&LUAZwD z4mm!7!z9(Q(8KQC_x9hDsHh?NCBwT#7cWoRhFzLB^`C^7IJkkA60iI~%FDTf!W#0q zlTE&i)|Jsl3%>tmFBQlYhNU&b4W8z{{^$@0W(5rzc>+ZZGVQfh>UND2Eyp#_9l>HF zltklfVpS^GbdB#Eb-ClMBzlm$rjUg4pnjfmRI1c~!?aFqIepX&s&!jrz%+*>atm}% zfx*jvrHA9%H!c@|e}`d@;z~s`$IA?ou_o{*G3K-!C*A3qEP(fNhy*3~li^Bh zxokPD`FE4#M=rgO&ale{4=r^C)yP|Xh)>5!B9qrTEcX4#_qiq4IJp|K!~>~QX%ZpB zAJI>Ix)MPMaYr)8V zEc9Rus=k?C&BHh0(Tpso4#9~USVtg2o1yYXLAxlEgYnw~V^CbPCMrJE+w>rXwEosK zB*j)M^D7q3ll;Rp=^7=-LkI<-x0&j7?{QH$JF#pNz2%GT{O&ED;&$3I+xpVZn{h+D z{2SD|T{3C{-Gf0p@$r$Hj)fEzW_YQzUW0M`I7TQ~AkA@i(8ANbD zcW1mgcjw46WNM!kd#)=#)1YmvKsvqr(h|5o(DKq7jx>5USH2CFQb1%Yjw;-Fldrhu zvRrswU+$;@VDtU$mFg%z(A2J?{xAczEY-ry)E5h7)AJWUoi|n6);Sy*NFw1d86J#2 zvNBz=5&W(_fc!aBTC{8ucZv&9YOS4<-Rfi1#tLqk1k+xEmKW+5$D?VlUMl>NGLuzO z6Z)onaLhA{(iuPiMKgezwx$Ntko1%rekw*o}k^aI|1`H)jzzLyA5~ znba{kab9FD`3^1sNJ!QQT<^d09lHM=lfHp4$6xbtIlb3tM4I^leup_t3ZayoR{H@I zK`qcHc%PBk9}90(eNyam!29FNCr;FQ0c2&*{of?9r3cYB1vP75)v1#>y;;Oo+vM8)<)BDDRt_d z$5-ezR|5}D)qD3vQ8K|ZZj~WHW>=nI`f9p56UHLigZrxsN+M$;UTb-r)4$+(J}Zsy ztc$*0r*{KW!1=f6B(b_6QA(Z?z~y%Gz^~47;i69F^MML*3T=LYp}D9_lZExO)E2SZ zo9DnXefB0-(NnT-8inO9501u17rZO`bW>iDWTy9Qf?Ep5!!0bNpz4~U*v#ui`N3MF z$30zIH8Y691;FNzt zoUTUVx24BbJCL>N?NTX4-6J7K*q>HveLIf(bB_DL``?#M-EmU2Y>&H&Z~2!Y*}F>N zr>w-Jvm@^YxM;GQU-jP~JknJTX%(et}Z00 z&ff_Pf`RpK?ZNPSJN*~4dUV4OJ|azv!kYo>m1nksqgKyI6s4Q=Gv4D*^gd+1O zP$N5a)@2R=Nj@(3mesPOsHcNW%ED2k=!MFQ+4@Jhz}CmO2Px1g%v9ph6f~U!uT|X? zDeTFhN->vO@K)0gn(MH2oEYr|MuLk(_ts%3s>tMqyl?(He74zTxoKdEz4qT;QF~NwvP941--u&2(V36Q=wg0&RT|>z}aR{laATBKDw?vv55S{?_M6 zxE0$hWTgh-HcJZ+fX`-l<=uY&bPi4>@Xa>*(QYdo$3}(Z}W&LrQHnXpv@a z$M1{lzy9)K;~f7og7M8*poz8jZK?c zQo%(Vi5J?UvYx|Sivu+Q78>8{>@_lPesU-2d^=z)@sz#gS!w~-JPV6P*z~i<9k;ZP z+G=R^>s;y9>o33ywk0Ft0#|vW0#H$64PtIe7=y< zkB68?a!t;A38|@2It4{63mb4x2@`<2#0#qQN?I^N@AY@O+97wIpC_k%C2zO>AuX&I z?FCzAGK$9EF=j^&o`+ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_index_name_pre17.ods b/pandas/tests/io/data/excel/test_index_name_pre17.ods new file mode 100644 index 0000000000000000000000000000000000000000..56638c983d94409f5726581d30478cc2d70a6eae GIT binary patch literal 3699 zcmZ`+1yq#V+8*E#Qc}_&-3=1b5`%P!q%Z?R4&e-49zu|k4&jI(A|asE&@mt(3=Knf z!vNBVB6sxM|M&mB=k9Oq^?mRD-uK;W?X{ovJddFc-XFw(zXv z;tuf+@PdH6yr3@jAa55>4^dwa2N6#Y+yyS;=>_p{@U-`Fhj@64z`bA)kOSNq0`WG) z_M`z2{OrIESo$+&8-fP_NMN`5u@l@o01APN_`5?tntk$^6{BdO+fpVX4NT*Ps4^pA z{v=cA`L6t24VQkt75v$<1*_9SGkZ&rlAl6Ap|&KyfGHG~W~k~g zpl+3KtK3#(mF66wkbC0fc(Mu?!rn7~PgtuvyX%0!zT^!kbors&v;e_1U zF?Yra+gb1>`5^`xD1Ikoq5KS!!pY2uVXx5QaJ5B_yZxq9lQf#AX(_?%w(gPkF=D>-ddS& z|Fvt1k^Y{cj~O&ps5>@a-mn(Y!N~OO`Ryy>Qp+M@qN_Z`>9VdQvSCg5+@CWMm?hN}BQ< zQ+A87vkPg4g9Sb0V-bbEse)_T#}+2uGlJ%x5c)a11;ThHO)-No1noYZ0{wKD-3@I< z3s$iJmvm-zveXT#HxL@y#vbAtr=}Z(?XiRT?2?g-Yx|;R+ZSi2dhnx{(+pMy#C4QW z_moa%6IMv!09pL!xAmwPoUQ)2_{u5}`S%G;m+hD+M zePPT*^a$(8we})~A{YF+dKZKo2if8qI!m8+Nl^9m$@n1#J_RNb)C6@QUGU!PwFX#s z3!+(hgAA)eXhD^B@bn-%{N@s)7R4+!Rn|}wu=l;^=18DF_el*oFxJid>(jvS@?@%{ zMPGbj7MHS(?;wkf(w+Ao0DP$_@6%~qzbHd4DMehZ(bVJ8V;yg5&sT^`cb8P-P>Crd znsGHr){}Ir4!jSdcHtF3g9ZOte33nC+(!f6H?u1YBO2bOQUn9-!Oovw#pLf<44BM% zHpV5If2*XG*%QS=?fM!I@c+T6JLBnGL;wKDSOWkg*bIA55APr7#i9{ojWk}BrD*Y- z_^#FmmyStA0r*qo9NZFv@_dKV!2L094*lHxT;Hw`T!Ou_D)E(U@v?wQh|jyPK01N> zTN|57yb(btf6cf63Y++F91(D~LsuZ{xATL=5S3F%-`hd^KBX+>b2S{>^$ay={F1FoSkQAXM>+k zmoV~ZDD7NoNnW~IAMFGc5zUJ{~L6}Jeo|((uCM3aIrb%LY%hwGGx9^~nvE1#1 zv@5IJWA|qjqWokg6p>eNJ5@i4Ky?z09H4jxX!J0y_2frvJ*ScG~O?I>OC*XwxPVH3Y#U4f~x+a;P? zt9%_Ps`_>uoxw??)3H_R;kwW&a6A8}6l7iUrMN&C9n$+*W~mC^Z*fI!{87h*vr!@?+q{t%r&QN@b8p4%jh zm$?)&p#tuPMTw764h?U9R{10?3K(z9>1*CuNkvxBYqoXV(&xv^)8b=1wVw~pNH6$?HoYO$l~Z z%zP(Ognm_{xGZ($;s)f~q}3nP-K;e%9%&kA+T&<6*kw30a}kY3m>J#l?2*8Ep^S3D z6=4&idH2>`kIIE7P9Uz#`N&HP+#H$A8yrz-KIc3_!Eu74mIoZlKWBOv$W(Rb@^Tl zkdbxUXb>m5ge+N@yi4uI!diHkUi@=nSh_G+Xj8Fvji0VxUZT+|(IaZY=>k|Wb26hx zw*3t3$P@CezPG%-(&{mF-OAg&k1fwJ{etPyP-XyhSIqh8YF+0#f?Le?_T0ppzgDqU zIq15x8^gx&pfNcp7xq}*5+McC@k2vSnJ#@SuDU~P%-U3KGfxXSALTEyXmdnwC1!~9 ztdn1jjIvx7tWN^l5T?$H*o^{<3fJ(JoeqX(us<_m!+`Z+YM};6G=x8-w zs zzPNu!2C!4rs&r{%thN6PWRT#K=xUHKP10T0Ojiq!De7+Igkijb)=DbR)!VR9!J>(M z?cEdviJ6-d{2lsAc+xkQF+zaNLd?7PwBdBz{MN)l%Wg;*V^btJPHh6mvpuljQOTSJ?#pEZ8E1seTFF}VNr7Ian_-^#&-$lUO)`9stQ_n zwf9N}9#;C;et6F8F{!E8Ja7p`qD3D}5+OpnrjZ+Q8;yjs4Riw`2s=1JVnouaZ3Sa@ ztnP4bC-28`RE?ig?bAU1cWS`UKGJu;dDDpmkfddvdB*H^z4(0J&keL5G*nqSy%hs&YA(Xm+4aHZ-F2!qeM&EP3hTIh zmunR|^UQ+h_YEWT`>8X5xBLmL!n)~lSjj0%+H}uJ5&8n2z?hdq0pLW>s8{re;sF#E_21`ONfhlAmqV+}Xn=+I*!TO@{&);C%nC7*(VLucsnq}bzTi6`!VtV8cYcpipP%ZGp=yOFQ zDO((r-}_~0PIV=YSVSU|1v(xYxMMpW?o>IssjQ;Ly$$>>S)11$p%wIXuAKg#sr?0U zyQ>ur01$~iil5pY;tl%2>+vZD26*fgT?ag&gdH#O5eVN;`_eO81O6r1{+udS#&4c=xZ{8K_7rbRogJMa4$%V;wyxPhiwYYjX zUpZ;XPh6jP*ej-lUf>!$_&{UMFyYQ1rpFhxn#)_Tw;V2`mnv2LNTph+GlVYuaPZwT zV{`pUzp?)2MNm=Mk-}ZuR+nWPui1+QVB|Ij`;u|6+zrU*Y$hWn*4c68=+BhgBq{3` z-B7VfGvELj)uU5RbkKV|{|%+1PQC^W82?n{6BqmTF^dg%n#`4@InF&=t)t?;v|%|8 z299GgJqC7Y>o?Y`>+qLf-#8!QYgd!me24~S3|e-L4;<1jQjDKuhpJq=^x6Kew!*;` z{%LBk3xNN$Pd`?FjsG!IhB`m0{<}f?Rbh(#{AHJZulrrg{i<8WV&(rRyWb0c2hFd- tN~}TqM+p7i;`ht;t3@+bR$@)#&#PysgOAmk002JrVZ&ZcMY12!zW~+rm5TrX literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_index_name_pre17.xls b/pandas/tests/io/data/excel/test_index_name_pre17.xls new file mode 100644 index 0000000000000000000000000000000000000000..2ab13105e792518433c4c6770786827d091cbbca GIT binary patch literal 26624 zcmeHQdvI09dH>G6S65enk;Fszf#D($2q8eyl`#aPE07)A6Jax$49o>$;OpThn&hWbEAj ze!FMS`ySovxb`@?M{~}e-Lw1qzWw&w-`U->7cTr=<11fy{K}t7;OLR0{IXalX-~R^ z`z{l25Iipy88u-i?vYeI-bWUw%b+3CHL`E(^R?%nvm{+B@>5C3H&W+u75N982jrO4 zi1Zzv96g?WmyTUZhe3-vKrPu}WyC^SQc`jbVWV83^joB^i`Dh(D)%$$xa^8KN@HmmDGTrGK8m3djEbjk|3i2ND(G#x%6zh7(h$)JqMs2r3@ zT;I(c7W8k4g?eYP? zFkJ(`Jf)gPWIpc--69*%Ph6p9zgC$vQ zVqOy(pW_#0|EKInsYjIk!|7iwk^UDY(!W+B{cMT!ua`*wN{RG8DUp7EiS$zXl)`^f z;Vcc==W`{}wLaneTAy(GpO(ORyhQp_CDQ-8MEW0;NdH!e^h)*lONFx_WFPG(7&U3X zda$waq4^KZm(&@Rf02_uGV<)oXIDz0f8x2UK9`%6Uy z*ikDgKxV3_0J~U41=xoxD!@)zQGuRH3v8&gKzpSHHdb0-Q>6tk8m-Jq+kFMtVZE$j zoyzPC&q_kgl8`B*>cG0CC+aRGqNu z9gbAeA&9-`HCQ5r`t!ke|NU=^rR(47134$@XprLf;y|wTgA__=QvBIDk0zf9H0Lz{ zsa4HMt@HVhm(XPPvkpkDUB$cazPo5vY}o7s@TS#y((p(f^CHqtWr9j=Ltd{^`Dmps zUAh!fDxc4Xl=7w-rI=>xBt|K3T39Je4LwTz;#3K9M=9mC%CBarU~k+4rF2K4^+QS- zVZ~|7u~fKbZBEVT|2!gH`q4)pbwG>{cui}BdDF@_cd%pbfEv1EvSw&2zYm0K!Ie1e zG^|Bv{9`(1u{ay5#S>3F5po)DnyCfTfUOSWG~TrE7;7`~y0xGm_)L2$^Wi{$ybc9Y z4!wqILA$ehUQ(moO`7SCVcu#+DVK5)2{*C6mvJXB}nOBpw6{X>)_j_Q!)@A0h#TO2U+0*K_uEH@gObbfRKISY{FSz88(RrxuP79+5UKt zl|B$eqHPinvbG!$e(8v_$r2w(Nj`}O!90I%!_M}{gJgXmh(y~Y9%NlPAY4_Cvk6zb z%J7qTkgLi8neC4US>*#kB-$okkkg4p(ty{Obak|3H)eMZ4Hd?xI%hJwWrn7+4=c5z zco~-IJETRjvJvM_8Ny#d#$}4j;V^(D3#2wx7`gb`8^s#l(>pK7bC{& zX|w%5c=efRF;@h|M0nbU7%^T?o9&-{?&WAPR|dsIc-rO|FoT6c^Xucvk8e(~(5qQoq~cW#WkocHF=zw>~Jf}k4%pk99v`OsIs8x6Y71%)RBK|2GW-qsiSpEDnf0?kM(-mbE1 zaCj!O&EN9YRmG(2!gAxV^9HWrq6k(9IXdD!ml16yX~0GXEc*8ijt!j{8=M-Q7@x`9 z;|EMM73YiIdo&~`umfojL$6Vpzz~cx8NPc7%H=F0fXhotKw9ug2@koRVIi|Omhr{v ze6iDsdTh_Y=Zn6>g`p$azQWkpOy=%@UdxK}px0px28ZMbhK7B(8^fPhD=tN;L7|sG zjaFQLN;_KVn9)kdj8-~!8jH3Yrjx~)OxCYTO%g)-V3MQw8-yqk=53!&BX4UUZyFmK z-Mm@(DBc7gbINR-PNB@&K$%oAgEA+u@oQ3zeTU>ur^Ke|T9nunC{bHn5Gvu9wNy4^ z1THt6JT^8sJ~%Zod1rQC;pEgz=0-nM;+kTE+#n~h|Bh`xuG)VBirfjC?}Iug;SXHA zGOf?Xw%w}^b;I(^jS~}a)a`y;>&jwEZiLY$07w zP(Yc>!At$Bp^u=^hkXo?biC2wBEvph;!eYG3*=hpFSisU$1fRBVTbOHB*9W6rOuB@lTFJbh9T1fgu}Tex*6t997E2#8xI zrV2BegMN&}JoFc0OhGBGH5%sSLbahZE;74Bt7C@Jbj(nij-5`-mu+05yY;>yCG%-k-Y^emEm z8ntW0-#&mGRJFYgD&MK@cBAej!xQkkZLs2~d|Y{un}a>*5ZcG|;lRjY{HOX!KvI2t~Jux;lIxy}$SuBt6YXv&A z{e)N#XD*kAJ+;ssL4S|=683|C$@7ebA3(>j@OK`2u+Jm&q1P;VQ}$b*#WVd~O8N%I zItC7Ppi}F+PUo)6xyw0s-S}LzmznoE#q=GE={pwFcPyswy3s%PcJy^H=+r@%4str^ z)3c7SGr-N=CboBAFGz6)jO9wd}bnBp_PpQdvBfNm{j}d0@x#|MK zUW6Gm#=Qvp5e{XVaN>b|G}&Q7&Y(%pc%JO~Cri#E?W`w_F7N=-9`K|QfZJH~PXro# zKN0vXbZU52VhJ;;1C^CLUim-5%kwhzbOElza93`yZ&WD_sMni+{>JxrAKa54P$_Nd zxiGM-2?xf`VH9{SUj%?qY^rnM2uSK zSPC)9*RdMJD48*q=pk#X7)$g(l8ISpYwluV&IJDIImWePNyON8`dJDwei6{I8pKxG zG0N>xcH}d@^XpYF&@p22Q=X0ylU}Z4#N>*)juA6!$Ebae@&P_0mDO{ZK;=v+Jr_AK zV)8qm&Phyu#MCijuCimaK@SIi>KO+F^NGT^clvyT+JlW7;xi*%(z7nRO@KH?Q{}G#G-akyisv3qc zA?EXMlPOdKs1|>(n1a5-hJB}4fi)r~dDZ0sZk2U;z(S99d4Nvk@_>cD;qrjy7V;@E z`Cf2&Ky!=X0sbV+<$;94G(5n8r^^GHTj;&S)ZAitAnD_Qq{1{jz@MDCJizIV%LAHQ z3=e2-F+8BT#qdB%VHzG_cXD}vqg$5;G`AQY(A;8p0N>S+)9^rz!ZbXfxyA56jl$GC z&>#n4&|&a^j|E@}DUStIPBel(-Z2)K*e0?Wo<^@X#@i1}Im@I2xct9ph+#vUDts7^N`A67Fb#EYKrsoM-e}Gd9pH zV8#ZT1e8Jvq#tlhThgCL8m@j%{o%4(mT=t*@mpxbqmpv%K zj3QD>LbHt$W{)1MSZljz_Hem`5?n5!1eZ%F!Q~QK$#BV+(GcX4gZ7^qQ9#0b$0h7R z-`*keSnqExm(T|6+wUEhki$3%`2XY=dbeg9_Wt#DOf!QSfoNth+@qPnaF1pN!##Sm zV7Ny!gW;Zpk9#yT(7Ke)v4hJ!dhB4hM~@v0_h@D?+@r@1hI=$KWNl2%3}$q~k%Y@V zdL&`EM~@^7_vn#?;T}DbFx;b=!Elde1~XRSxWeTg%?yTn^ti%sj~-VT?$P54!#y=V z?qLh~XD}}Jq`}JG`HYu)(rPqhxF@YfE{1#3N`mH|Me+kQHRm(S-xc;jb$;ECUt(=D zonQy@AAFs^a^EADetzrWhQ~gWkqzsf{9y;uzKoOIfX{nz(rs_TNyohdC*ATGPIiY+ z;H1+&h?CCrd7PB=DhjcA*$wqSDzR&Da%%7hqAHzOD&3AV-|o?&$%*3=BU9NMP7W2u zlx+FdCw}ky|9rvf#q;UT+aFB7e!*f#tsYeiR4q`oK-B_O3sfyowLsMZRSQ%tP_;nS z0#yrCE#S96-26ZB;`1+_+jLdK>4z}?Z~yt%IREFwpG!!b|L?)c#pK&@asg@>Cl}hr zaB|K*g_HCCyKr&|>|UH)4|o74*9R`*^sWbRErExg!|M+Qj1!YbFkAs4EJIAQCyM)< zlyH+^Z-r8d`gcbu9X#8jWC8G~57#CrYyZULk>iI8g{k98C8HjmRFA3!surkPplX4t z1*#UPTA*ryss*YRs9KN!s)QLFQRwj8rq)*~E;9woy&zkl PGJUqbjrw!joRR;33Ir}f literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_index_name_pre17.xlsb b/pandas/tests/io/data/excel/test_index_name_pre17.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..5251b8f3b31941aeeedd0a45df83d2018d75afe0 GIT binary patch literal 11097 zcmeHNby!s0)~7+bMH=ZCx<$G{8U*R?MnbxgW)P5;2Bk}okPeYKl0j9ewY; z|9;QC=XuV|Jaf+aoxRuot=MZF1sP}r1@DLBkLnBlqzJL=q{i7RJV4% zQFcEwj?P;&9$1$36UIOXPQM6qD}&O(eN>09<_4_A=5A@-?&cZ}j|9fmjb&fhN>h?k z(lr*Y;^ZpOGZP~DbV-r6hCfw;<*GiMfgp;(7PAT`X_^;+dxyQ@=M!801S)K3>4CnD zy_s`07K4+#$~OfINh;Ph2@QC+eZeZP#3{@9#ES zF2-qC@Ztw_&2y8cTA@Pm2%t(*C;JOBMnVsDn=nO7R=|l~Y#+_^j#YL((4QtoZ_N=`(~g?YLH1*5lG>$YV!jcAGk}Yx*xHf=;pXd_(mp0}Y}dp-U9= z(x8MwC>bPXbLjv?=ISn7S{KvjT z1Tt-12V*NoCdTid|1;_TVmtm}>fylt?x-K3W&^2&y8o zC z8+5TV$xD9@_eBZ-YzFTPX|_#@NR@ z_=b!Oae7E8Y-o|VtxUx@pS9N3Q2=wf5>ELjA}q3GX%8b)b<~9P7Ng?Kaa&60L&&PC zE26bm;}cS$PEntpNL%xoYn#)}_FU4?FPjtE)_g61oRp2-L4q{VE%YG@n&r7ED(Wfk zKG0b!A)<5d@cU@#n2kTkt7uMF3FQ%X&aENF6anQ$yh=rZJUXOzU=%UU@kFrV6jK%- zv!g^G>Y!-YWQg+ha=Jc{^7Ru*ymFB(;XUXhRi;VZ;L7*AvlTYG`~lbz9?AV>SF8`i{JqP+z1B^N{+G|)GQUOy7cDoQ{`SRcE?oBEwt zcYHjlU76yxv%T59DMTm<3^LaCP)on@7Dbx;qH6Sa4JE@a`3(&jYhREL#@yWI&Mw{i zj#4ZQjUZ_?75FGxj-LX#jHIq{Gvovl%T^HSwj11$uoEI=(*~C;%%3bkK4Lc;8Ve`B zpR(Z`Qmd3(aucAu^66xI`^oA}nCMfZvXR+nz1Fu^-mXNxyeXkHb znA5Bqj`X{q0Zu#{yc&r(-_geD_42gf?pT}FczVV&6_ZHS>~Hop#CZVo9&uYzlo`DBSDMMu z+?KSmu*k$%6;6qvJ$0(?M!A;vDfS#QNM&ky7o}t=C>6G1#l0c&OoU^>%WvEhq+IiD@QB{V8 z?F2b#ENs_bz0uAeKY4ZLjKEw z{NW?-mgUw%TJ<+ew=tu`&DYI83aFjl*cB2@Vsu4+5qARFVKfDooEQybJ~#WtseMVH z^#+Aa>$E13sgE*Wo%{J?Jls6~E`x=uZ{PM|a#b+plMyw!YnqJfKN8HkW=%;OqZGW( zl`JbAOG?8C)8tXwU@xoeE#y(MW?_2Wx+FPKx-58|tnA&e;wn!iphgzk3A@lNP|^61iU+$!E!jOCVzIx0DPfeMD>mn!U(B zQ^iL_hgC_O5n&i=uavsz5asC;<+4g zqL0n4r*6QJ1SzFX>!|mmx`i^pZ;ivArE49ueTz>*To7bgzCqP zEh^9~YBr>3UFiYM7Uj>`I=NXH-%c}IsYe=G2)!`X9Qe4sTWEh5 z=@lLcscYElENRkfAH_kRhXFLM!vg!+A2SX%(_bf}4J>EBp^+|3JBSGjTWlF|I#U@v z9&$9YqcwmCN9B4MjNxnk%;k;YP#X_}A(pC&0y!N^ORWX)rGEhemH(ALo=6R*RzR48 z*z;;wF{f=5pg)}UN1Cwh9nz@>XTY^(+6;-_YZzo*Q&m5O~PX62})>1AQvT!Hi0v&^-MGNN2d zhOfx(ttCxXGOMdD>A!dd4htWFqqrxIX|EhbkOTC%8+{Tp#d>!QJr^YvW zYG&#eLf^2nhzxx!rlC`~*}r7Jb@y~st$tIqQ6b~Nc6cd1xs!p-HY&#bTMVGsoW(iu;XhE@62B6eR*7L-FP#r zTAv||i2-w5_N9r4Nri4wT_c8{!O$&{nj*+Wnek!aydn%emT#1gZJQc?)zp-9fcbU? zosgty)pLkNv`$U#KHrMdD5hQ*rk=`V3Y`8}}`IZ4GJ;Ye+=oOLh4pf(BqQo?4 zEDSjH>#iI}OGQ>X+RX&C7|JqcNKVj4u|qj9vHn3Zw0`!CmHcMaC01*fC_V+O#dCXS z-%e7}A@%0qWr`TZ(t4X;G$bD|_-uEw?&%b%T7Ec(x)}xb?by7Zw#fUSo(pFrz$tTn zGWE>y-NHOd*PH=z{|HFkITY-zCt*Ai~`Hya2&%-S085X^-7Gib1uIup;PO3_8+7slx z+)tO=pQc}5&nzLH$2ToK^bjR~I0$eEICX=me$ZKy6kEnk(oWMhlF>xAU#vItGy}Sx zqq7ikoU-_67tsu$+O$=15n0@-@EN-(+JiitaGr6kutzKh+Qv)+7A?xeUIC!>Ikd(z z9PORanca3kwSdftN(9@0}UErd|Dlsr?H-=h&XI=ws~CM=|q0pF$*+$d zIwX@E3lmbRPLM^kK0JkXgP;u^Bz~&8uf3}l;uXX_?`mGNr?ePwaV%~Z3>PUK1~X`D zlKC)x{GQ993%9AVtZagB(BAPuNQd|M$b-OQugrq?vmDRs=u@pWHA^AY;>P-3sx=~? zkyy=7yW;L)~{d5(XaJF(p>+MDaK__|nknyDy)HlCH4I@~)#3?Q1ydsUW^#O9!! zRIA-`%Ko4!GnWls4M?&GMCad>hJ!?9M4uQSZ61|S32a5DW?meB+)EKnyCs+zAoKdN z*MZVDBG}QtoO(2#zjdOItZZrptFHcSLPeHzUxyja>mxfVGgUV3ccr8Bw92JB&jUuQ zluYB*xZWn_u1oej8Dr!$ePQQjgU_%!8&C`1TnVG2<4+F_z?Tzw9rN8^{W_D)HDyF^0@hpY31{_l+ z%on+M^02V>ZUd*0gQ+6%9ua;y-dXI($$b`S(oLnPq}Q-^Pgm&mhp$HrI8_@%Odhd) z2Ke!QNm2ipfb9HD@L~2;2Ww@4zD^EplL4T6bI#0Hlj)OBsErdRKy94t!8He!|?4pl9x9C+rHb$$RLqLZW)hSDWZXVqPTL$K?h^)i_nq5_1gSAd=FAw0u zVqjA9Il1T(d6TpN6)Vt+TfeE4eBO`GsXA8B)!nYvyX;NSGi4qn{TQYe&j zg7K))z{T5;-f-2*j~2NvExd^agN&N%B3RvY!dxGe?RQ~)q#G@AH8+;BqW=lMFVP|$ zPEuhaN`(et(!biMd=J~?#A&abLE}@Gk&2LZqIy;5a&&3TuCL__+cO1(0-Fy~Q*g+; zM%T6d=CBdakC(BxdN+)|&RSZ|r>A*XvG^<6kZ?jjS-sI}_uy0Cej7grn4XnqBqSHp2mb zUd7SEXPmW>MKRt-C#6f4b=4!I=##OOh%&?5q=rz2jmGp|7#*EwWCPRtajoIB=D>T6 ztC)Bu>lp>mG+y+B(Kgqxzl&|fsYF7}pqTb=b9%PFs~%uEy%UaT_J$vP##0RvJ=t9A z(!_g-q&w5e$;Gahrsp5^gO1WTkHo~XO8u3`{mD)-A2OlNGL^W<>s3C4{}RyMEQGjr zaBAY3gK=)EX>+PFtz{nfh3)i{Zk zn?{-2x0bM0``R#XJuLro)e*4pv*Ynif%BZsLqi@a)JiU-9^ujE&i?%jOUP~RiFfH8 zzu#s)1xRJtfHEn?f6nP|=k5WVW`tJ64jNA@kSkLq+A1{b$!UYGJYZPlkrr-ARj%0= zE}n6as)yaW$@{2AWkPwezf*OZRFcYrtlnY%w;IqsvpP+g z7&))8gxU~ZKI4jduYfsz?38!HxKxfqUFm(Z2%zBEIR6lWZ&H=(iirwy7m}6gnMOv3 zU5~+Hgwy!eigvzynQfFuPPi*WWE;;Yx{V_#5{!YfC%jUOfIPpjsaG{4 z6Jm_oKrItV3s!lcOe{qL3Q)^A1(ETC`XXyLqr)V1ca9ve2TBU?RMKG*1TbDOpE-n? zK%5K9yOnWeXL_lT8|gJwYv zN=;;?MVEGqH>DlGoglOKQTtvF@dK4ZT<_s(g>Y!9`_;gnyR4dqiihX4eZ`#qnrhyk z@}+i61`w0++H(!A#^p&3axDI3U@ZN(mz*_udeEkP2WOhrS${2YZJ8lsNvam_hOrzl zD4cjxipuk=Bs9!Ojv+0VSQ8Cg7u^xSFEFDwSk!d)90G$BnZ)8NA`8FlasVDlbOcSC z!oZ#i?~5*nX?0-b7eRcR<)8NAwth21uS`LMXhST4p$OT1HCq@bmE zKJ4_6;!q1K&a?Ve9HNu(38<;al6OdnY?<-KO1+FQOSP(`P1D;|}Cnc(2? zA%1anJZ$${0|v$bR9Qf}kq4c`p!3h7=byDa(9T@n!A;7>$k_F^qBjK8gMfl1jfY~1 z_6hQR9L+^S*V5(V^FgqzR*<2$tBDyaq&*Q2FISlPx9UEzzLr)6*@`)6Z;Syd;2YT* zDmd8MIWigAIvD@i!Tw*}9;7-CprTAGGj`w{?2Slwg>_P71fY`0s{B4th#dBuu*3Vn zDk-VVnd=7gx>3~j&gGPgl@ytwuVoB=Ow4K)ab%e7G!X4o(H03AgY+|`c>}Y;K!wpPpaY*<>5W?XsYyfH<*i4a z;} z7GUxcw!&uTtRapRH}6u2;66ePQd-X0_1NS0MT5;kF#LEs7Fw^~4<33|2)kj%+T-=N zxYm>1i!kI(^J4GU?d1}x3p*rS^j>73ppfOW7nZ=120|V{bxnU6Pba?tRS14HeLzhE zN*aiZUZ9#4`j1@Hx3l{n6+s;Q^GJ!Evz%r|@0^3bAPJu(sMV$+@1i50S#RxtlA7*x zi0v=SXLvBvPI0kC-APea$I>yo+nG?ppBFna(`c^|9AmW~Sd+H$P{dZ){Hhu~Dw*ji ziI_NYBg}n1+(jRX-Qx=v3mlQK7=Thd10PPNiOqc_U_lvVQckseDqR~KcaA;1h^mJF zS@SmjF@x>lrP^ZJV1l|%m8l2|HzV)0?l9}?w5T-BU`|NOlLG5hjwW;ItKK9$8kH2! zSpp~N$Yi>~qQULV75~0C!aNJ>rUljqTZn`izy!Y+g6mAyu$(|OA|_sTLId$r(+|}I zF;@^R#ChmMqDX02YD@$`!|i8_`RO7aIruSB-4uACopyypozBH;5<-Z%Ewg$MQW3O^ z9pxh5bc#q1NkY~P7s*?j9?dJ@k1m7-!%@F7osA{E6dT?qoW6g927KQs+wp>}iH&?8xn9Lw?%yUy0ag(eF@L|?S!+*}eZRm1#U3Y4DKlS)z zhv6nVPlEj|t=m`bZ}&l~^DC!Mn%#?eKvz^j=f8C3w;9P#&RpX!XO8^bH`CGgoCST2 zjg|QlZqbGjs^$l%D;^ux5@fX4+V=Lh233Er8pr(pa{|j zb)Z58vfJN1@SzZ`A{Xe&zn$f6(EjOx;IpJdhX>743&y~k3NENZkT32f$RqqYM@T3D zs9gQ~*RQ@0?8oCbFJmdl{0#8(OCCQ8PlG(r53hWH3;x{o{!tJb6w&{=2M&${-rl@J zGPw75-2bbK4lWGd^1BlbL;PF#mj)m>3V5ID4uudzuAftn-*&CQ0m0iocYws8+WXId zx1+nI1UL$KgW?Y51L`fxFYO9&1n{E&9YQ|Z9|(W90Ki4TYuI<9*ci8>;1z9f0PsS= z9l$=wOZ=>ZAC!7mHUI|%-*?{u9)h~PKLg$>^%e>o1$<|ChZ2tuj`G8>?)Hh`fZ!Xg zJHXeV^8as!bqfWK0-o{Tp{Nppqx>+eyZj#<5Iljt1AGc{!9NcRJe>x|`Z+(oLxO;4 z17ZD=ErW~yoYMR#u0ZyK_@4<5xHNe1xRa-#_!krTt(L)K!W~xPzry-0k-**k9Tp3ySoPu?(QzZ2`<6i-Q8`1O9<{BJU9V@L$C=6?h<$>+1)q0Z0=uh zZ-40iO`oc#XL_D`PMxZwEC&gN3HA&O77PrG4D97;4~8E&7#R99Ffa@-SO`5a2YVND zdly4>Pe*g-m&_h^wxn;MAZQD~AVBv2JN}E`z*x$(VjnAJ*pY%ycyaS9h61)h))wUs zb@UT(S?@vYwx%z1untE$;DygbAR6U8Y(SyFB^V*kr_NRnw8U7k1f|DT}jVA zvbYSS>okRXW}GrLFFcVJP_^FP0gOvSW(8OW^D>AzUgZ=r0tqFF%s$4q9o?1+cs{jIT_7#RMb+1BHwW?^lK@y z36G)Q!e?X%*9z&q)(n5O>mKa7iqJTy1)l^s-qiUR(uY-y)>hMgXw{^;ubYfHP8c%u3h!ZWgI&x#m!Bz+$~d^NY4fGR5I zE+XAdrtTLYvx3qXTS!T;+D$`YKW@f!e6(6l=+#= z^`j8w;_|Bda)i>kI$tGeqzS5}v)0EZ)#KMbz8=p+s_Xy+7Ahj~Q0?Cy?C-*2cO4_; zR}*3B(=c&&4<%$rk(#o|7XV19GdG&fu)b7$x%n=k&ttb@&;2<`39O>T06ip8SP`b{ z(0h-KFLma54`2G(PoE+o-0a*(B9gY^anhL>5Docj`{iKE>w0ShkB z_{F70GogIRioKKw?R37Y)h^?mmz+IhJzPmdd2^DPv|0G$FC4_S zfu_TLAO`%KN)vmTVnQIT9S2bf1!MuD(%(r`q^=OZ#EROfweMf!6P{p;jU}$#*r37N z&~AOTg>J=330ZqAWzC0he;1eh3P*&&ZVut{Q(k)J)f#L262nN|)C*&m!L7{*_K%@- zp=)OMhnHs?L>3dl%p&-)ctQnRT4&7;ulP)x8BCjEz#2Yy?Dz`Al544#^&lz4X4%gV z(jQ|kDQsIuE4RTcocjc-N0I=6LB{TA>jQCSlf{%Q<~&dRrC3l@8~NmItdt6& zJJY0v4cSRoVXnwae@Lt@a^B>P`%+a8Ms~rHsc*+Sq&s6=7vYl9ZLzZ)cqf(*j03Ue zRMmjQhct2E<`?56>JgrfqLMYac+KFU(CIv$`|*dz(>W6sda?bQqw9J!>=DesrkfZ) zKt|u_YQQ^fQ}aQb+4~8VyPG$4eLI~6%;b}d=Azunwm)CJ?VIM1^+5#3>*&;Z2AM!m62#@EGYE(k+R3PI z*rsLTRZ!a|J}H~5XO635en@gvHFUHwG+dfl;noUZ9DZwTMIbZt!J}{ax{}Tc9TWJ> z62C7RTA{SxjWpu3qtjde9*oZY;jfR8yWPL#nyM2~XFiB)h;U$FxWBmOY-w)p;>_~% z!tq17Qqm0L->_m1taC3O344&-lx|rjL{fa7cu$!+6 z-0mam!I7qrE}>2~8J5Q7K$R|f)d{E%JFug?tD@t1787HEvML!^Q-M*dpE(Ra$vJdo zE-7L*-P|()brAjZ=nt7pVW>JR5SBH{OlxEau9WP}IcSDdA58X08a^FReLH|kd)Jm> zVaSF>*AHVZ`LO-MN}e)ObYcSAHWcP=4Yjd$&ByBlUTC?A7Q8)^vZ}B{=`d0*1|#ct zrGDzoFa1lDKDpBJqVX<>O-oIPVbsKch*xFZk^wVfW3V^c>HdxkR1KIgW> zTY}17DSh7fq;k6Bk=%!IcR1{-Nt4tulB8tvM&HR% zFggO+z1Rw{xXwEV>S~*s@9FVwBhT%NTcd?#kZt#-PRH48S#Mmvl_2Z6imQbezPMBi zuYB>j+|+@x?Z+feZ3ueD0gMP$1SRis<0MlInqgPJ>tRGCZ^i!lHZ&1 z3d_0JILNf;V_UtNI1^s1gw#>x{~P281=Eew?^cGv#eu8c_5TJU1mXTTbw#>9s&$^V zm(2+Is;GUMPvT?Jw6;sbwgFnB89(+a%OSOV_7GdK&Ok%sbUpNkp4O?|8PK>{QG~9O z5cW$ZxMUxXQa;K%w@cIZ{3fP}5v8%yx=l%mI(y1@3B9+OcHG*JZwsX#*lw^X$`!98G2{EXa3dH8MfECp-|-vj|wxq z<~w-{lk8`2P17)4o1Q=3$K@Hwi%F}}6q#|p%*3G|TiCecegui{c8V+{{YMk?T~#i* z59qHI8OW)t0GzaRTUd!T+nH4OiZU}<6*EqD-u4^K}e; zRx*^|47VjHs^==M^PI3Y!%fdH>nj4$@7^$O7c*JoYjpTbg0!~_q`kRxk?$Ip2g-aj z`g7pYM&VNG+hXIq4YUyu=%M{og<0*N#^o@G2+L)Th)W-=@`koHm^e!HAXkVe1G$wlIU zhneUk+c(PHIn$cxiN|VJakL3cg7D&lZgR*q3O?~CC&Y+Tv>ghm#F)-Fb}`|mF5&JA z8nj<+MR94?tXW&t_di_DI7Y1l-^Pv$_Cf)PZL`?4#Dp|RY`HNz*UG6j1(JS9UY-iN zX`k5evjjmnmq7qE`JNkX+x5Ye`1T5t#5;Fu8Tmg%nT0lzZIjFlQaT^XD_u-MeBwaa zC6RsR|IK<7USBnLYl74kf{KlrKg1U7OER!~@zM?M$&*4xdaWlz5 zm?Q&;FAINsySLQj795xY+abNPLbG3USLLP^GVOUdC<{qKEF_p>l1dJs0>AXMqnRpg zG@Hd{F-)Q!<$=WZD}DH;lCkb5jB9+g!EILn_2R*i_%SFK{dcnZ=)$D)BZ2Vw&m3m^ z$ze@Bhb2}_zx;+r)Sb?4Nx5jz3Xmd(%govz4f29l%H%9EH$3_cO_0{=PC#YC>$J!NDnTDk|>C-2dAATH7;@riC9Xi!Z|s4?5z^>)#EO&daa>`7 z>7-~#V5#9iUO)}M$KqaE|&hOtilvCkj5+ThRg$YicU^04DP(n8=@lg5pz5`lfh$QyH4dKKba&&bs5no5;y)wTQSG)CLMb)PXUyYwTciOR)bfesi0!V+HGjpeDmu zm@kF_9G2(XId{Kn2-|iwZR}k6bI+T+;G!0AZ4Yz;Ez>Lvi8}S;;PC9o`V^vRDT=Ih z-=ZJ$`?4H&rhRZkF z2>oKz1d%#;CMq)h?ldFIcF4!kgxoZOcSUfPM)Qdk$;$O2CmZKt93C%{U1SQ`-W1Ah zjw!2KN-GHE0#}}ps*6=@y#GMt`}?V@<2c(*1~k2S*>_>19A22w4^*%;OA+pE-zSeF zY+F=2c;nozM~+@A1$cNl&EQTN}ZCa>3p_zoJ01|<+ke`LkZE}pjL z&Oeykpt0_d&5HVi$>2A+_?E*-uw_{#(y47$s(e=OkTd!#{7FsQie?|K^4KWV57NqK z4bazb?pNKfauu9vF;={b+KgpOX!#Kmq|5=b)ZBEpZuxiY+niy;Nsd%jQI`!OIEzCl zGNzX~Yj$*;#KObW5_+cl(1QxK#X%ev6(#6y^u^F@M*B$i)(SFjnrJGxX7wQv~i}07Q!;y_=I#0C3Bg{(ugFLYCQQ$n#V&lq`pX5-)d6W zB0zgDQCu14wFHlz&8zqH^r-hp)Q`rz)tUJ5X5e$#;khu2n z)-6*Oeg{y86WgMFKR*JDrM_d0l7eweN3BqA3n3G@^m84_dju*)Oy3c;BND6U)Rm01&dG_5q~M)(4rMl5i* z=hUokdvQ9^o;@0t=Z6PEzdfvR#tBFs12{>O}sQx*>0E4*I@^9xZ}q0 zMuqOZUhVq?7{H`rJOlr?5Cc=P-^O znya+dL6c^k48c-kCaPg&g@jAZMc==K%4Xmm^&(Y(epaLi*wWQ_MV_-n=we|{fcxtD zN_SlK{LJgds;g~+!XKN_;$b^y@PXf;`rgB-QUC3!Cn~g7x1ne zD-|x5=62>RKhHmP|C`RZJpngr7Y>jQ)mw>iG07A!5mGMgGuLaAPqI;jmW&3-k|jZRSKAu$?so{1S0mt5dl^FN|D(4rfG z4Z3O79~!6CsL(dlO=3ed^~}Vf47U(teqOavDV_EVf+TPyhKWgwzETda#52SxC-Bqq zd$L%#eXGRjL2-}p-UWRb0aW(dcM=#5VZF%675HRBb>mPk`m6AIUqJeOq(D&*!A*lF z<|vH1iA>|FX5PGTLRUlBeR0;dTbiS#h)c+r%_%XH&#ZeekFV8Ka*7mC?x|loYvo>{ z=J_mO3)jus^|kqYo6X$3cnM@3$)0&L!-eel5Z|fRu8Vzx?pv?Ij|1?Wq<6Jk;f9q} z^IYiy9pE}XzQ~3)c1_Eb#A^YJey+&toio~RZ#*Dn@y1bhrcMWMctBZphYsMVH= zbzSLiv#X}S62AcFwz~+>o^Wzh4pqR-8Ng}>i-w%e^Q+$<#iXoGS%k6%&p?@!P6j7hf_Y#&*=d`_3^ zX;`{$&##V%VK%ZMu3mb?zY>3=w+(x6f_V7~amz;W_OlPW7hTC3VouPv0AQS~z{Xp) zS7iFp*Msd00lK0+UkmT@s_)8i>F~90B(pqwh$eKRqYJ?8fsKR+SCXFS2Kq7jE5w80 zfW~E>EB$3Wte$ezy0a#S(tb7P^iBQEkZp0Y?=d8(=oxPDs*}}x9N7A&0MRR2Yh!#4 znfS|2;Vgxv61;13LRD1#gT+^S%+{~5moBh*&l3|+GoVwB&~`IVUJ%pego%Sep0s>;p=yDt{T-@&VLh=;AXPExmYOgC@Nr2{WGCA3Q50gmuh1u5 zcw#IW$oJqkv_9>h*NwAtI#7QZQx^E%!nLh@sXkQVF5!98yTO>1loI=NM}DL>R!e;N zSq;VP7V^+|^XQFD^uTdK%9h)R3KyXP=#*=X*&ymV%v@=8N4V5(Jg#w8Ub5weW=weR z+46Z;6w^#Glg9TjZXN>%8l@)kds!KN9(7Yu*pBl9n$;bvCK{D)lJHESO*DmtZCGHKg??rc7pDO$|)l-WCobTMnq&S8|28ZT2Z ztd^o3lwijf%|;@rhv;B-EsxZJaM!3s=lZ6k@Cu`v-GQsmSSK_a!{(|L$(vspw!EoE z-p(>Z${e<4MIRpbd-_+S`RJv9*mLeF6LX}XO@~>CZ@~66#BfT@>iS$XX9g26s?^g& zauXpULVzGaqyXaK0s#IFPZ{otEDY@&r)6n&o-5wRi8* zL?vXmssn5|-xHxSISL@gla+1wE#Kp8D?c{#T9{`vgbAu0OhduJ?op(u9XxaKKdW*9(Z!g|rzbCp|+vG`qi9|A4qp(>8olswv2o zIjKhs-0w-#c6-3HNz4+volu#A&GRiO$(rf4LB4b*ZcMA)b?eX;lx;BW{8N) zh{{+@r@9qBI1$*;ggO$m^$aDgU|H;gv>%`gOdSsSJy3neytpWcas~#YCt)el~PF=b3Aoy%54&BkTi@?C zfJ`1ctR}Kc>2N3;s%UM9cQX)4>n|4MSFaI%m*u7HR2O1{+Ax@)Iy>f{S)Qe_lew9i zi<6bT#m{8Vc2+^LpA-ywqchUo@)O^^bqruEs2@_^Wi)GW^sW8f{ze*VFFp{pcZENq~BHK!5BW694 zads2`8+Jtf5FX}@V8_lbvrLt}I6_4|VcCIp-IRzW3wCgMRD4kN@_YkrEj>J( z1v5IiqEM+uk*iJIN>mvti(7QxH`1W#t4AF9UM6;xJx2-<5t*rCIXj48lmJ1giOr}J z3I{sLEhQVBsDm5RbO@y&LrzpI+;KsBz(;#YfGdV%k=Rjkm`HOY(kyf*^*VSU7=%j;9MN@}7DcJf`uSd*0`9H9Qj+x=+nX2S84a{BV4OwKGvsg59- zA7sVa-L00o;crvWq?$MVc_8xnfvAc4ck&rKI{uG)pjY@+uaj z%M`=(6sy32K?s@l4{t_>YReemaxs#>Eoj9>nl_uJtmVe1YfVlk_K$bf>L8%Sx@Y%>`dY0t8C_)`;TUR%73r5(xhvN|lC~$32t{a|gOlU+FzWCyYAax3=M+{0RCwg< z(qE*;$G%A!WV+HY=UDOTwCO?X9zC-`_J+_B^+bCnHqXbb>-Em+i|bH0O17AEkE#z+ zFaBFv1v-CxB7xheItOng&EwZ{AmmDq$ts7hFRgHBu@`H7%*doWyA}fLW%yz}{ z;&i&Y4Fw}Zh~w)nc3O;gg{Uubf*R@ZcW8}W>jZfRxkx2$mWODnz+s`4g1S5qhn8O3 z?mGEl78%ihL-b1cm_crjsMBLOCo6FabK)cKV+Qo}CX_GK^>C>Ogxl%yM!$Zv%*v%Y z1Fn&JdXAY@;RG}KVx;gDd4>bSUn_Z#a6M?E<+yP(MY+BuPqA8kZyz4iB4-ob#I7?x zY6sxAGT?4Yznt=?@x1X&^B@+Z7O{7X>5A(%ju(C>H3{q-OIb^Xo0Ic2$j z0{nB&&R>o{uFar8`P&Yjr;h*J$MAQ@70^QOKksUIiu1II{R@c()M|Xv(thgvv@7|` zIT5i^nZ6(r~I4ypSrIs2MyxukMbQ7m@0_!v@}0H{U434%lQBR literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_index_name_pre17.xlsx b/pandas/tests/io/data/excel/test_index_name_pre17.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ce66c40cda1413be7fda819598f5185a24e4fcc0 GIT binary patch literal 10879 zcmeHN^!#=uYX7mhK!<8fgTio1wcKq-z9ekVZgKQW^xLK~lP;h41LS_v^h} z-@o8}&kyH(=IpheGjrxy&t7}2t)c)62Y`A6g$M-&MF}PEJn`8U8VZX35fl^-6e5hC zguR`Mxt)vQGmwM1^K(`YTN{eka4__FP%set{~iCuZ(u0-xpfyiAoLL727|USKc<Gh;v`{ZROfd*CxXmV^b>%zxVcInPRb=M4?>=ifv?hDcs ze}?jBFA{4KzSp>A?BiR3 z>#;~UbZxB^hK5h$x+5bH<2WK4;tqg{~QYex&RE^-EO-Q$PEt#3a^?As4A+1s%Gi>bwUOmPKwG1(T%XTJV&3mEk= z`833f?T<+?pAiMY%J{YV-hP~46pY#Fr#%0{RUU;65TI^wuLw=Oc63Exq;pDLlTN7lpdhl3)ERMxvYeq z5?9}^2(4=5&D%*D&Gef|Dg25i63(xS+a$LT2* z$k5ED<5V)U9p}3@i&9p<8Wk7z<&#mFKH9u*e!6vB$Aj6RZeFAhW&KA3fl=heOOOfc z-$4?f_ig|MGGYt|1w{mf2<>6R_D7t!+dEmC*xOtGn9Tl)8ED9i2C@C`KHevHzWm9V zpw^&qAIC(uMFLk(h9HB+18Bw9-QWmPW>>u>U%D!{fW+tGtdHa_??q^4=NH}Q!ZC#W zK%>r{sngf(gOhSs!_L*MejevM=jooZ5T3%kPVOw$2z*Zm0 zyc9sWy@^eFfiK2nJB4!oIVUy!Vu`(Fj%grg?5VL!@7iiO=cf>ckR`L*gY(lBQj3uw zRxzR&LXo^RtDqSw=0aDUaLov&EkK z!Q!@2@h7d^7O8nKa|#!Z?Hz4UYz*;y3zL>@j+x{lJhr@J?ujv{tpq3BrAThCel<>{ z8{m6iP`IQJry0~2GM>YCJA8M4GG)TXD6vy@cv*{uHvkA|xQg}#r*(Z?^na^uYTk=K zc{`$dbM?BWYoj%f^({u*6Hk>d>IMii<8qCNNnR#h8?+w%aozsTZ|=p=>0#KguqJVr z_m$-`Mgyk~;SUy27Aehc5WotxgC2Kf{iCG4CHas=>^A#Y^2rJt<$sc1J&2bj*CZY zSz~Kh?-E_r3>~Zu4d*5nc(weQ`zwsCh~*{*J-X&E%NVS%0o#u(iMpcTfkoYJ6yaYS zoGSb}a9Vfzckd%M+keY7H7C;6TnN`tk)WUmesRs&(%js|neFF=>xXhBrx?b)W(V{v z^UfZMdXQfgty#uLP=6WuK$AIq(56K%4<|oY%DL=oZ;b0njN?|R2;YVWQ!ewrUQITuC}AXBcFoO>1k^8Tq@8-btP*5XBdR^q z9@lk~;$w*j{ubpk9j z#v)?mqTdF?#yoF1CEvCyF=}mz5I)S5i@rIpiwKgT)2tFc=FIp^0~1^h)pQUUCLYI) z#Xlo4E;AyDli9?{=K7AY*8k`bPj2_`sG z+E?rd16>p~Jgds-Mv?FOiT5>0lw9>sODDR&ELkG9WD->%W~j8r*wEsd{%3M$sZw4Q zJrILk@HVx@YX~-BWM>Qta^=F!XmH>+8bO!)w)B1iN+!{3`Y|+o3@LK{8Hn|O3Tix# z!F%74Ja@OS*IrA_S|yYM*J78I#UUCK?|qs(hFThjNlV1bRWarO=yNG@1EqtO823-< z1=SA_58h;PfC!5ppIwX4T_gNXK+RI%s+=ICUWbhS2U0)zN$OV$ahdFZkc(H>lHR@@2ZLcMQhwG8 zlSdmvzWM5#^-Vlrwe2EW%zjf|f&rMi>Q;}4p|Qh}Ekik0?=zeJHDMLc5odO9nxaJI&~3KHPKG&c*somn3eojkCDp_7pPs9Sl|B7ZV(ROj2IW@d zoRc>%TPxrAY`F!#FMDCS!!5FyHotn?a-fy!<$wosvxHiwOKxZK3tZ{akDHGDHdA&ru#P#!6>&!uNGu$Bb%n#J2{NdFA7@51*ASD zj%&Nrts7v~n+f7AvK`PVX7+Ks(dntHAFqWU>}VR>oPdm*6@=?LiQqkFK}z!WC=#H# zaXU9{$!%a68BiWNsachls)m}EYxFiiot zHaxk%jmTwuoiJRP5LXlCVx_Z}j?>&f!4jGs)*H`RC)2C*HLn5gK=!QAu= zYq$wj>*=&aN^%n!r4vrJUUnJ<#lw|diOcFiZ>+E;<9qrdr)!wHtmJ6+4A-ToE2l~? zbDVHj!%R;BwWR^rH?Nu3->|$S(s=JZ3eny+i1ubPM7*t^?ycv|W zjyT*FRR`;UL>Ny2_y+rIqo89yDiWMnC7ZtBGMw?WBNr21xCJdKG&8nqE zeZPbGv?I(K=yklQ2DDwOy`@^VzMO}ixeAH@qJx%7hR zsJ7hbn=kjrB-ag9f2Tx`@h`6Gbqt{mM5}R3>CrGD)V3PNsKF|O0?JDS~ zEVuwd>%pCqB9pt>iwfhwbUQvSntbwL3n`Z9#KL{JfUg}bSfK*McsyxJagMqWFbI{}N&=7f+Z=wi9iwahRz93(nH z$|b4ffxtqAi;t$t2bdRigORn81d?_ed*E{g*ZUr_8bb|CW-yOW<`z|!Tr8`Nxz!JQKV1+#^;>Gtj|avyEx%>}%h+@B)!Gx_>YvC1 z$5^)?d*)nGdLWK5C@6#?oz-~3l!n{`hNIHS%}R7~mr>E3&h{B?J zJKVjF7`;>vkDb7*qXtLr8^gQC^ftZ=^1CK#Y!-DWWxo^FWEu+f!7)I>1yxMB`(DD> zw5V(2Whs%Z#W^CLNcd&RF&-7*Z>3QULN`*CnT! z=Z08S^&w68MNjcl*AQOuQ#TOqI!l{yMmqPNbtAHWW_a!`)%+OPFu8cQ%-z(hkz_j~ zATW$gSoAGAYeQEjFMp`dXpepdZ<{`fBMnvF`nYhW`;(c#=1@dLG-YA!788m~@)be! z;-pDHYz;xsKwh26PNw@;H1?TKv7p0l$rj2&X9q2a`zZ&3z{h&{ryosF>4GLABT{cp z(lTuNyd6xaOvCvXgeM z5ZIdcMV}Pm`k4Q_eg7SVy8k9MRjn?}cgRQ-B!NKtBP(`x0oj;4|6p>R#)Vp4MIJM`7ArFnM>8!$I?GKc!^S0DJlBcG3MJ8|YMx zv*1pbfC40Jg*bOpY6ktGd<5*vSr{V6YZyy z(lgzG?*&%B3FNXUEyQ+Xd;`y6w1Z~%5-2zOSt4+a9ByNP#76J?8ziF~6y|bo z3as9X*H0AC!&qh-A5%KT@;Q^K=r}V2 z;Viw}JKIcH5C}Vzc#}`+1&TA@wxn*pXL?o0;ka_I8_84T#}WaX9VcexOq=_<*RMr0 z0-by3s1c_vp6L6ytV#5r3pUYQJKmS zB)`6}*ZM~4pCJ?DGd?X5B@DfM_{1*FJO7Qp&Os~{JWo-zz2@U(N)$_t ziO9Nz1#%vB7k$4%TI-%$%x|xR7$?OFw%@hYUr=Suk+@jc5fi+)ywDw1J3IBfvTAD{ zq4vXLwzyl*>b(;*sJ!)Xs@JbL0b#;xwM#2+iwNc|N?=S9DB+=nS!iDLoYO9m`Y;d? z#`+c^kC8d&%1o|2L5CF)!4UU*Ld)sLBNX>t5Iuj&rRII?sYkbvFZl&~hw*lsT8j63 zc14wu^}MOPvCk+}0asyEM|p6$aU>J(eb+N zH=_1J09#lZQMRv>?||clq6T0IC)xx&-1&%ryPiij5jT!}S-EkX3uQB8W~ijtlnkSp zs^6Sw#cATM{5Rz|nDF>GuuOYm{mm z>Lzlanu5~tX~HZdSf7-yl*y(%f*}uBh-P8YVk}c2ECdBRWd(em|3Ddov}2Vp-Ye-5 z);XguCxppa{Z&g7Pw$Ys^tpC;03C_F*)F9GQnOT4@WcnsQ+$V6NwdrD4r(5Bf|EUL=o8qRLC(N*{e3~a!=smQKw-36Z zwQa+E8QGFQv#%@qa_fY4#gzxFJmE0L=GYu~(T>3fnb4_$H25vFcZFJ)1$}s^mCIM! zP(iII6P6DbP1`V{u!NdiGB5SxDPt)k=lOO~BB`IPWGN67Gix=!!o4i>v))t(vc=6H zxoysZwMWpbfbr4rEYCQvF$<*8*QZVo4Tbz74TSuhZjWY=k3JEXG zNz^d)_h(;hvA%S~oBM{xf0huBnFgPHh_#u1{FIC#D@5|#p;td>xtkGc-tGjkX&e&>L7^L%x+19JgK3S`Hm_i&OB;m z6+2*4}9{a#$+H}4Io?gnFG?Rc0!(Ec7DqFx<&x9*JR1GD~4&hgh~Ae z1UHYKeT^a$#jT7qUyquxNIZw>9?i=4izXUnQ{uH(dSygYs0^NZnRbNUeC^x>S-Lno zWN=g1+92FY)VhN)G-K&Ua|aD^x$mUde88-U3}o>WGM>w)!lguyfhOkI($M5z+fD)G z{AJvApBuIJnU$PU%UHDSzO*(TPZZ4NfaJFJBV5dyGP9UvWQL1X4J&2nd!;yu#52*z zYhm8Ax|T%fz_@EvWAp4O17F}&a@zBB8S8{(;#glap?L|aAeJ;#DcV}5$(SQnE$AZ? zd{5mqnvRySn8maVTwhKt6?8?4e-XJaX zSx?2iw&~Qqr{EUewg+wPepd4g1cMJ?g53`wuuFlLerlMBh0nOrAuc7qR_Sle{ecvd z#X$%)ju{3e;f`8c`HF4p0Y($f$mxfOOs`kl_~L6prR!I z&d09O#m8a6-?$JecWQ;i8G@UQlM)}H9+=%;Ef1pZJZ|p4D$*3@NgvfC+urF&(RRBd zv`)y7xE@g*L(K6hEXOR zq6v2>Yy%3RC}o@Ng0%{)JNq2gHGn>6H1XP|Q%$oG-$1$(4W#o`X(oi(CrSPyt2YPF z;?Bu{wXN-1Ft=I(#jC-MY0KWt+uggWW`aE)@x5 zMGdPN^=1Mlarx_`!*5ZRs14Ebr?B61NV{4rL71{`&wjERK>~O!I{m}-pTw&#ti8a|F7c(Ay{;xnDPe-g3v>{ zyRc9%lzlbVg8jtdy>)CH3$7(Z@A-aYT7S2jiwUK^GXvS_!H@nS1LxXF_7YaHGCuLWKMc;1M8=bZ!9pBro^lsscra=%dJDYv+RF@#&hwBEdB9P*+T8;X-?>}U^y+WU^c7*0V*A=JeDJNb+q9R9~W2$VI|6&Y-CXJD7F&n6-fbysZ36uqvN|ZR*okaWSuxds!|t z%()gkTdg~=+CQFJqkF+UzGl`s&&jhLI^D+o9%-)=5-B4`lEoA{Dxh ztg-h((>!uir)lQie$M3*-?Xy!th~OaO0l;_4NvUIS!^L-ht>9tq9lW^Ze89$AL{V3 zi>(&(O+Mz+tiXCkq78au*BW8|ULFdmtNFgi<E%ZzN=)9ob+5NOD(96yNJ z2Pave02H6!7eQDoq}~#1Eu*a>iL-8 zW6%{S#qZ-JHPIBDh#$!n>R+6OhGBtBLciY__t$^;*Y!8s;5n=Rn9z{-^W5dXx`Q z9=3sgp=hE17UgGC=po9(BKy{e|)YvT***FZ#K7eu(n0F7*o~ z4euw)uPW6;*N02>U#=a5KV2U#+#dow%$9xu_)`4w?1#D2Lxg|k3BUZIpn55xp#GIL mJaqr3(*NCEg641Tf9k%90z8DTKgxG#P-+m$<3IlK>Hh!~7seX^ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_multisheet.ods b/pandas/tests/io/data/excel/test_multisheet.ods new file mode 100644 index 0000000000000000000000000000000000000000..39058e67b4d5bd7b32a66018a2c78dc9aa256328 GIT binary patch literal 3797 zcmZ`+2UHW=77e}kB1MW+ktTv5y(olE=3n*12F zqydA%Ajq%)2sj|X59$d{}c zh!m3(K>bIAa{xA{j_q6u0APi*&5xZB$S^+$LM{~M*J0O}TA;={<0kbz`s|E2lh-9< zKBhu_tW0gw9z}{mo#O8X^$#}igpy%rcgzk}Osv9SBh%0Bw&3wsb~ZR$*MjTo zql1Va`daYz{adb|ucdM*Gq#z(bynpVU{d2(9Mune<;wukHf+)bCRzi77=T zm}j)NQ10&L4mLlETOG-)MzK+x#7v~NGaaU=-r$NY4CN)dU%C|Obh&e-jP@X#RUs(- zb#!_+#cxMKZ*=m#W{pdOJnzSsWF!{QU8O1imdfq1qd3V zn84dQ=TcqLtY5?hr&$u_rnKJjcU~xi$%=Zc119QD<99&~d*j(cT#(@9 zZ12{R=?}Tos)Nl0-$Dy|=Cn(Ua`y>jXBb6umo5&H$q5zRJqymF?L|2@PB{WLq_f{X;wI1H8;pr9^^yVsX0tP6$D@MS?pW zPc^niqvUEkl9Y4Tw^M<5Ymt4pZ2eAz+eVaM&DNuFR!O`kC{&HYdq`QMBzIiVelKN< z8rtKSP@~WO;?~i);M9hK&q5rX;&V$|$5Vc<$@2${nzv~~4w}!ud5$mOw-M|m&dq1g zy?LO^oA9;u5F`MuNt|7E3~DUee(iNJIfJ)c8Eyys@*WeSoJvdfg2`}|DnJMKG@;YY z@N>H}yg!a64-t`BJK@)^WXDjKXhts|;NIaI9qul6-&RmihPt9Ii~7zjU^|pWzUSK) z@Qv#60N2Hs&$}>YCmI^yt4(sUhvXTP9CzAu>u04D)I_gWew1vU-fohyIiI7QK558uxN9F z=r@K%^q|2c2wqSQsgag`3o|=dZg(;Vw+-XPJ63axqDPpB3IwD$debM6k?C|up|0?K zbNz|Q&95{3ZxrUAo=^e+-@j7;{#U;;DiFqLY5)KW1OOOF`t|gOBYy~kX94Qr&bP-lJfLI_6u0=ID@+OVr9>J8Ay<~%S(O3ZlO6q+%{ z-nqBGKPP%{dA(wQg--Leq-vUD6S8?L17R>69Uq8lK%O4#o@qkj8{43Fq)d8-!^EZ! znh$q!q`CD2b-2$l)YBPJ(K}rDIICDHzsL|n;57E%LbhK_I}quli|K1eL&R)ZWb1Ir zxR9{o%BkJ2y&UUdx`J`*BQ=%q{A6JGKr-%d1e|)aJ{#glTYDPX;U^UNkyD=$Tjb@? z!RptNN5fa5osBoR<2TpzdS)X+MfLhY;cEX9qM>osMu3Yt_?2J&2-b$DF^+wrVtNO! zX`f}*(37`NFL>MGm{wt3-W+*ju$WYFnCDu^KAvKt98b!4fCE zzLtaKgp1Bb4M)bAsCN$Pkrms_^aRKO-7Oh=V(O8t_h|jgK;m9*N*mubgW3jaJ}FnQ z)@oIvw$Ailvi9Vik&xPUSh41c`6SXC)P4Td9XjpgagRcdV~Q%Hj-Wf}lv-ay5ch0n z@p$*Or2)xFi}cbUCv(=mYX$D~`$n%zQ3eZ6A~QE%K9|b*&7<2?VXatUWGGZ)@A%si zIiKgcH#{i}9}-TMAdRZ)JKvqm&przio`ea(zmREiS1n~eA^7qKf=oo^ z2-86HP`W=YTSdh%G8+2)Njj@T(iOI05pC;+XrQ?jbL4a^1KZPrE2C!S0fK%YfcZU* z-_K73#ROyz0mXPGVLamcSP<+&#eJ9BWGK%^jfDaK#RcYw9=0$5@Rjo!o#NXQB_RfZ zd?HDNxG@NZ!$h%jvv=r{)Qn$%`Ml&NbR#A0;XCA~qV{f!e3f_l^Cz2jKJ*Xo&&ZfHIo7m2$#v(~ zB$H86|MW7T+qIgY{Eez9XIZB3Yj-|&h%&6!|IrY3zk(H=s1P&Pf^>)09Um@EJ$%!H zdnoa+d1ABNltb~ARe5(_rMmB$%P5u>LC8bIGo24sii=B!BhfPTnGFIfik|)oDPiep*7i8ifnL zQbM*2Z3lfGufuZ?d%HXQK_>AB_4BFS9tFn{@$avSZZ0Mgjn3_8g(Xl~4a95Tk}aF6(TlxZ;f(c7*nO4>mpi!w7o!*+tchK*bEd$Bi#G0>3eon)<&3W zcacQ`TYt^cWwu!23Xjqfx0S5z>pAD;Rs3kA;poUF26d=hF8R)?W&JV{fjo59EG;@h zWMpwpS(d6Lo=~b?%Y~nLH+#+f!i*Q)=?RzYH9A#@Ee0XWxPd`sS8&$GJ!R7>E!qU`^OqI1s+Sgt&)JdB zV^CU>$K%Z5_r4Z7SZ?(9-BSb23|15$Dy)0kHNnbe00RS%2 z(@ft+SJuefKpqB$L%ku0KgwZcx~?yU0;B$3{D<~X?T}h&iJ~+2X zoIHb3I+r{}KWfy`Ft!Sju#e$TN2Axo8Yj~|m`rrp_Yku#GxGxKPg1VB<=emHovvHZ zP)dJ9u$%3o@7VOH|RDH5ZAeDxEoPIdT4ME*SPOZEr3n0^x()roc$6K9Y&g}*O5-(yW@j?g_Biu$-5+#NY{!Hzhaci=p=8cW&FT=)$WGQ9SL0;LVM=hy4m418xrINCN{l_ic3(6Lya#ja_20N+7T$^e z>;;DteN<)Bv`=%so%uTp$;f5@uuY_Y0r;mu`?31-`rEF7On$KbzXkirupzzvG-SW> zer^4J@;FJY;(s-QzY>3y%TFRNsfGMk@%$R%*EIVXLX44QO#ezckO>v3H3a~uNVhO4 KDIYTZnEeNEl)i8P literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_multisheet.xls b/pandas/tests/io/data/excel/test_multisheet.xls new file mode 100644 index 0000000000000000000000000000000000000000..bb6cb35e34a578f62038993e982f83a3df7f361a GIT binary patch literal 24576 zcmeHP2V4}%((hdsSb_?Qh@!A$B&;NnQxuS30x@6?NLU0!P?G4$lT$b`2Rs!-#ee}$ z@XYxPXFB)Hm@&um)H6NLQ`xU-X2a}k0N#7w?|b+CZlGspYP$bbU0q#W-P610&zas` z^Fy0^gmrWyI;2iwK=c@N4qRiR4iiFL;R5gLBoYZtBm^%1KK?=)_yV#Dq49Je@gV6! z!Zz23#D`=6$q53=)_CF@@9|QVU3CkXk}Ahtvv^1*F!H+CZ{|)D}`Z zNLG-nAqgPaK(d8o2gx3i10+XCPLP}-wTI*a32h9MJLCE72S`Aju-*;V%Z{m`r++RFXi+gp^E@1lOrrK%Id<`atYID1(-)N{^mO?{rBB zaEPO15CNymM#|#{{AG1-D1X$K2?`B|0Uvs?o-X-zoLK)j5l-dof)R!DUl|fQhFJ8Sl zZh^kEW1#j-V!iEVX1X=2nhMk9&sXug5$NHf(Nv0n38;CTz5l%GJ)&-J6 zBd8^&&IF({SuVEjH0QJ>?x1x|Sb-Fx74K`RMYISJ4Go9P>k~mWpRxeT&t>cvffJv#dH?1G0K+1V!;t0rkjAw1f zr3-CG3hdnM>^!qa)b>okA(2(1VQ!6f2%9*@&Sr zlP%^{gGyx&CY!*TNwh#(z=f7HTF`|-lFF-0v~{ z!IcF{KksCQMcWmwNGR*Ag2KU735veN)O4BAG9$vH^@A%F6#X(81DCKMLF3?RhH`#d z&$!y5jK77#*9Bih(f@!9)FHF;%^K(x8t9wU(s6}FnXk;Q4AkbA`>|3}Q*$G8s4v|w zaD_%zuOF>{w3g`sS7|6ZOP?_5Q%l#E(JzyJgyg}34qabd8KQ6~IxhIobX+Zx(gkn{ z)nU{j*qKKwEPJPwEVPP;oO1pGdP9C1N_WXz8RT{0?Cg65|-jMJ(urJ za+T7>IN-8Dqc~v28&v>Jq)`RX78+Fm zjigZpuu&RS0L`jV1<*e)OIz5yWGTL;{7LqN2*4!#IzfF#;m2j3(#07QH1;H!uRfM{M@Pv1u5H_SUg& z2nhYcOLGUrS5o!rgnnbD8N|L}Alh5Up~>88B!9Se(N3HbW&^AW#J$5L>9Oa-lG=!$WP&%FD}DgbD}H4#VY3fpX+1nMsby^LT$&Fb!bT*M=q4Ad3Y&-l!`Q%hva!F1BBh%edOvvUE*X> z$vk8>CXtBcATk%UZQC}L-i|bOZ$}!tx6?FPZ)XeDQ7#m!2R(yYq15?B%*^(SNMj2{ zyBlS{NYi9Ou`SOVL{V6til$U&-kp>RuuYYd%OAuV4*a12PxD*`_0I09%ti2Z6gFC@xNvn(4WiA4wMD*eg9aJS$04 z;svkA%3x2707fMN34(nBaqy-McL|^e#dqSyM4u;0sD1uHqKnt4S`g6z*E5ilsR}1A zRZg~AIniB(;-o7|`26sSdQQz%IC-mba@5KRtwdTUJyF7o7cbOvYN5i(N0pO{R!-P+ zrJVFd36eKr^_mJRpsQRl@kt;Qceb%?WHDcR$JfUON>|YHZJ5D<`Jy zd3nFvny;Rdl?o>{w&$#s6Vvv*yicD#t)7#$3MVzT=cbht)Aqc)CD#+wa}uaiW_Po3|pP#R;PR1~?gXyt+unl4jw(TVP8i!Xx zMy#k&4{E0X1^Z%y+OR;G7DdR5Tu=3&_6kriB{rx73zTU*gy77Q3IvoDZ45Dly!v1@ zY|!>BP^O(g&pJ%hgE}fe!9duc?krHINe~i}{*!u8Cj}_j0vptm1~Dhg_n>T1dS+5ahG;xQTq@99BS{mmfouq{(y5J~86*R)$4dh)3}8{Sx)=h&ztq+6 zVRHn;dEL`e7xTYTHY!dVi4N&5-9Ji?%vczwoH8Jsmr8+*V9yb(0tLsU#AT%{=1)?_ z;+jkJU<(ZHMZ~?LDbT$7RP=|47|Y8LaUq04)PtUd4{;*Kfy~T1zS5n6(p?eP6n5Ld z>kZuGnUx|Eq$E8PE#^nC;p^I*l} z982&*xH~ulS^@V3XTUz;80s^Il2aCaQ3e7_S2784eM{QLJR;1~ku2srDl1c0N5Ur& z0ENm-0tKc6I37h@9_f&$2PK585_+J1b_oF)0mJ4fc}b5v9+Y%umE^&fIwB~edGIiB z4D6IeyG125fyfsoVgp3uW6~#PK~<8K0l0?HTDakn-ZDNBfUu~pygTB2U)K9@uCC~P z^qs8t(RZ@mN8e$;=!kk8myO2~n>ML&R76DU)TllGMa zkW=A4(IrXS*obt(+D0XT(Uhq)wN5jrZ4{U&Z8K$Z6n${HAyLT`3a5x`O#E=rpil%b zl@3-GP{#xY4F!xqI(bHuV*+u}Md`yj96j74>{kj{7~k2jrYu-`eAfUg3HV<8E0{UL zc2>Z`2+xLX&Vr>!cnz?049ulqyC`5`jPJ-&LJJlwj`7lt!_X$I8W<9Pux1jKmKg&B z5zF!Gw1zrFfdQiLz`n<$T)k4?L*tj}kv2Km9XkHDq$_?qXz;W2qy);M(ozNSB0(HI z+$`o7E6dGoC22(l!x(`3?-L-&Yzn`ZA%I=SXmPkIq8N#oeL`(OB3X0vdTgZc zn0-PIUIj3p&F2p#^BIdY3!(m|@D~NThy?*e&^0sROC)#|0Vy8D#V?n5fx}6H$O5&d z$idv8?@JcLS1oY88A1#A7D+8%k67 zzAj-b#ew^to_O-Y6TZ%kkI58C;B^q6SVP?~UmV{Y5?n|RbLl6StN`>Cj3>a9%mvz9 z1`RVW<+~d%WzqWe(czhaZ$z>US@Cp2IKS&!`ab9KYiHLk7-c=@osQn*lsVVzY$Ce9`OvP5a7lA+K`QT} z_}~Zerjgbsf?jw!KG}S+^1yk+(|?}mKjhiDU;1QM4~i|*(~BJX%+B#(Rdw~xzj%%r zuy#Y#vsGXDTNI@%uv>pi-+uFsZu1YP&s%9QX7~9ud);SuiR@&+d-xf-l_gY^Y~NQHyiagz;M+OE5vS7opMAV}l1;6f!K-a0 zPuoOUn|y2%cF)5t`2M-KEqjK|`6znqec{mfmB&xqtaW~VYV3pKzeSQt$*j7wK5y?_ zi3U|jAj(!&R{52UPY%NqU(h#h?1@enrDSvv;yHi8IZ%@E%vS{mXI#kbfAN@wLw1Mq zg~1)x<&t~O(-LhhZ(2UT8F^CAa6dnP{i_d~yVh0B`FL;16M=b2h2F9I!2xFy?`3!W zaoY6L1?x9D@JHlK_;%gbH+M!K=yzeA|1X}F+G$C6(!sugcyQq+QN?Ae^E>x^yz&LAqFT{$lb4#$fC|orgc}kv6Jcu8fpLzANsSpRN=NfB z7DpH!6PN_v%Uv^R;q^{&`<+8$x1Fil|4$NJ+9v2^*~Qg2uReM(qtn?tW;HANG#Tbr zs^4U`=oew(?%$`nZM58Uqi5UWZYlRpwtW07$Kv#kc`ferI+f8s*REz^!Iqu1f-5^+ zYiwS*)sEh4pWJm$k0DPleQ19_^tr2}A=gj6@HcYrr*DF>e*|pmz%IiOo34TfHw_QN`ka41@DR6D-X(f!v8y-Md=p7Sf2m~A=eyMAAXbnkBV zlkO}_eO^?ugYZ%HFz4yE@1yTt_b$FR>sacLu4b2O``1y}_0BNsW^cdQCknUmJH2q|`wipW{o*y5d5!zY z`Fe0fm%CFB4RLd{OAI^h;Cs^az>2MYA#JT5Bu#9)uEp<`7h}t#-psbKT{J$_nYX)> zV+*gNR|5C!_%P(`elsh(RfXT0&iiz<;}tjiY=2jykZs{p+l)9_=sW)DSaDyyKf3L9 zpWVD^hm!1Gb=Qx7ITN~LS?m6Xjt&0d`RO0ULT`(6F5AxKbQX8XYJ2td@V(Ov%BEUg z-I8^%YT2hE;?w3x&w;{<$lbMxlWrMT=3QK^cW34Bjl-|}K3npu`IDutbsZ1z%Ij9T zPpy6uY=+l4lZ;;Z?VSB8F~^|y>)Yms1}A5gmVCRcEN^k*t^(oS(KXTYd}oGRboDrD zS#_y$kk^xOX5w$BX3r8jy4~OZczEH5mwZUNgwpvfBbpe`}zAh8#gXkwYuA%kKVn0 zaH&VY#zQ{Gbj%_K9UF3I+UpOyR}Y`o_0!9mqH6IUA;%?km&X41<DHZ z&3x{@cUbE<((?I{{VitXFV7p&>Vr?!7s2QWu}?n^H%UHhcOr4c@e^cZi`@36R&HxI z?|qxI=6xFB47hMU{x6eP|7QwTr8J;r#<~wG8v-TUrzs{eyczH_dPadNko?Q*>CQb@i zd!}EH6Q$ztZp}>NT(%uc_($H+y#_a(`j^E9^iJ>yU6fTcvBl@YXRaas!B+itc-Kzi zU%8mO%jA~cqp0>9H2Mw(-=I`Hd)n?XNvsXK2e%MyKV}z%t zaMg?^Kin?()U0==zWdr07K4vx&ze$w{#DBxb$49fRqg!z&#cMr$IC9pdM|&u=TTjk zTYJB(GU@$kYOmKZwv+A}n3YXz)}~;k|CIZm{d}9<-8l60`ER~maiZsqp`H&`n^?Wm z@tIls^>y>U8+tvSQZh7S>EX@&eJyj1{72>YNGV;_`HX1d(eUGTK~rl-r@zhi^ZecH zMa>nb&9@H5clplgc1upoCrA4wXEU1*(wTLmqCc3vu2y)Y&Kb{j|7= z7e>yg-Qat^=ODv2!<-^+R_^leWKJIIqy{oYAy%yIBPd=OeW8pUO zgZxSTny=?9TU%5S)YEUid+(zg+`nDD@td##!S=h;b$RzzI$R~gCSN@jbn(6YgVVeP zdrGp)Lqj^}@(Ln%9{po*NpxE`x9i-CwardAU0Eo+KZ3tu`3ToJWPUI1>YSgGd`|KX z^?uxa*Q#lj1?`JQz29C|8l2Q+?OP&h8QSu#nXue{quuH^7S55^m)n%}?zyeIU+HXa zrQV>*4U=nne)Ze;`^H+%I@Nyd-m2n~io%Md!Jb=Y4r+a^@X_(v2aYDUjCEd?To76Q z^}uvX*DW`%IXxW2<*qt;u3y%<{w8M0&(HcKPk+73*yxwD!{6Wdq0Qaam(LEgH>>$D zIJMLLrC0fJlg{U?`MFv4itXklj+MFPb9Wp#)Gy8A+V56{J@gMf5;{l5yWFc2gl)C? z^-R}ZE|yzEUcOC9I^1ELY0-u+We<0?{qg>v1p{hdn3q-zbh%tRc<1@_+T^{@m-bni z{m915+$vwx>0sw)M_M`U6ASyrf4bt(d(WSnvXg!~YFo6-KD5`!(QB8uUU!&%AV2BG ziJN9~^8WPy-Q{4|&I|uEoR?fVWJ2c`Q`X%3`bGDXZ^x}1eR9(CdAEaac3wJW7VF0njL{Oxv+5c zE*R4+Vc5nkQ_QiAjz%a7V|bQEClQ86Jb@Cr}Y~mYW z-~YE4O{(1Z!kICLij(#)TY0)goY!s7#ELfj&dw1^?tz)Lo zPwVue<=91ioFg3V2RI8om!~&%>RnWNesWy?m)%p zn1sW1hxhB6VpYh-IMhV0{rV^au;VvHX?M^kBbs9uWXD5_g60r6GaG@eel`+2I8h*7 zQinu^Dr^k45hyUKVSgM`8Z9Q#ua7RoNH_=5C8MDB6}YvX`b7@xmZe%IX?&haqO!fR7F<1cvd$>+_lFGj@n3y7xZE)?CBI!~u4dDIky z#&m!MN%&n4&NuMn2ux4t-}{(B<<6(hj*vTnIvxi&F|M8EC{F|0Gp^Ubiy00ea^TE? z3EIL|c~fAb2*;%W*=OaGL4a*Ya;fPS>Yy1I8;`mNm2hhKxb+<*o%m$m{{G`x6&b2%}7uX`3Ef9paxMB_JOc? z0N02|AC6FlLvHv}>rwt{Oq3o@Y(XUM`M96AeEo&~i+a}K09gY0w`!pI`42K;mj zpuobeG9ycnL?MfFARq2@U>ON!{#T+!H?B`BgDdfd{X+7`2(GKl8*b%~Jt*FmU!)C)^+sE#c^{^DgNI@A21(_mX~~s% zL*vUI)8KOJpurWqJt@)TI=>3P^>G&(JO`G}rjQk+h)jjQqa1%g1REy?lr^AU>1!;A zYD7BP2Ud&D!+|@vhSi)}{chamWEWP61ma10oZ_33Q`90&AVr}r+kxOZh=QhuK3Yeo z7`WB&$MRc|v5X}h)Ce3pA+X^Y-v5*`9!c@#AO5heL?7tu8{+2Yg%Pr{_g~4)WBp|A zUG_=H-)>&kjmV&ffs~tv_k8H)IRyS$ZvOWGwwp(L1+%Zy;^z4jEX&QKtCevU0B3Z2 zl=DEc%s~S(I%uXZ#ZD&kDFjM^8^msprLf~Ny*{6MEbrGc(0aVS2_=dKuOCQBD)ss@ zo=o?SJav>sf?K)7G`L(Olo-8&tU_i`8V5v%n^U;=F%)m9dzbNMx_1Nzzr$1q)4kK+ zO1zow-2zGjUnXybd(WeIgMVUzLvAI>!JjKXR@9pYSK`fd?`?rMYiL1VC>>g)yrtfs zZfB-@M{w}xOx{fQPJ=7)M!zf5xd2K-tRt&8y7y~x_g+CccXaO-kO{*Z107U755)yH z4wn+jb;AWAfz+8kX?ESn9QzDbg#2H)?w%0wjOq8c>t;sEf9(kU?pUeomPY7$jd9(| zfinj2iq3(qTWf?)5|J~yZfXFK#k|(gQxwhtJ4QVb`oHe0Sw0K<7VS3#AlknYp`$2T zBJ>8haQT?Tj?e=sk+ekUG&tqTup&4F}X=+(;L$ zAwtiicw;=u;H|-h%Xl}^h06yYE~z&{2hE}brrbG11E_8^S6PIPV=5h?WBXBVUKXKK zB?GB5ebVga4WJ})F@Qe~=RX{wGbH%yU7}ua5XF|p-&aK&1-qoyP;icdG@ORu=ikTw zKMmBu0=AAJT7mL`l)DA1-+YWrG^t#~C$3K0uX_M(4pJFeL5?32kL4q~?$^ zA>qb|<&f}aFV;iCO$%Egp{<#~lDHLoVwv8egoJd_Wa=K*69>+ZjmdzWZ?Yd51rqfP zUvEbw#igfZq$OkuB4Xl_QZv&s5(S|-uyF{U1wzW}lzjDK@OchiPY7;Rv$Nwx%DKX= z#zR~Exi!DklnO0Pik3nB-9A=f{qI7;Z#QH8A#9@}SFAVw&_X;U+;4#Lm_R~3V7;e8 z!Z+OckWe3UA)$V@Lt^R!b%_V|Mf{~3#KJ?iL&6AT;cayan}{K-Izk5xx({C9USls=&!M7uz&m44UnYF6ubnAR;1eQ z=dU`!x8S@yJbWl0;}_PeJFenV&x8XLMWRfhU!x@YJ9A?+z=2)dX|P3Y0*QnzY{`u& gO6A30;bnToh5%syX*?&*$c^Q%8R=hp@>k~nKkY6z=Kufz literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_multisheet.xlsb b/pandas/tests/io/data/excel/test_multisheet.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..39b15568a7121f8f5f1ffd850aa958b19203f858 GIT binary patch literal 10707 zcmeHtbyQUC`u0e74oD;2jUyo_jRQy{B}fe2-69QwNK2P=HwZ|V#L(#gBHc)cAn+SK z?|FIU^_=gl_5FFythHxP?EBj5xu3hQtt5wt1O%V}&;S4c4FFjph9Tz`0Pq+E03ZaQ zA?Qfh+BiXMob=V)?I4a%Io+(S?qne$FlPV|;Mf1}^z*EODn~M8MlD)Y-0fDO2Gh8+4S#>nHlb#e{Ph&YeqDXmPXPKjw-Gc48q}&5EFXyN-3R!6vDJ3mrA;V;0 z;A|WWN26ZH#=98H#1(H0dwg5F5mY1T3heS;R{Jtm0N(;01fR)yEpX>7!@^ zT~v>(g$*ZZ=_{kR2?Ocj6jzZ^db};X(WfC$OaJ`9fr> zL=lChBSzb4kIknDDohLW1433sr5+df45YvCMB>-V(~ad?|{zrr&wl ztdFYnatM*oK%U5k)=cO8UKmB%1#*kGmDSix#!ObpyOy?PL;&FG3K5|6i+4--eeU`S zH(7bO^J2oiTi*d<<;cbPMm&nKg}f-oo%$ zaTghE$ZI09=^uVQF=oo=ucf@$JA|e;!3M|j-wQ~xmHIg=`x)kSX-=u{k2IC0J?xSk z8fJdQCg2e8)cbjz_ga(+Rao<}GKA#D3Pl#mo0ppBrvD(I|>puK?O1j~Z-lnEb(ED!|w_bO2o z3n%)&q|Sd1U;5wuq@V4!{2h1})ItIP$N*@!+^o3%;VQ1S4wi z21VI;7>aeh2JCo0hYGSpq`vJuF>8NYSsEi6Te{k;hN?ve9djCl>0>j$;-4u6F1oB> zO#7_+AG%xXs7JKR6wM3sUj`swRqs9|8$ngxusA|6e0eIM#b6t8%ZF^$k99l5s{3}oekS6;f(VDQ6+ls$1)O_WIcfHYU9SX^(CN$LM@_uiq?;MXg=!=z3tY~4R?Z}+c6>jF&*JE%d zYn^ysVfJYGpcm0KOcWaa7OUE&K8`r-5f5Ly%hFR_i&k#z>pvnwlSKl)KUl zTd|+Q)*m`Q^5I~~)b&JA%=u<%+t&IZ?mEgf4Sd@P zqHkA+UvW~nej~ zyHaZ|Nl)Y1qZ6Jj40O*mTwHUVzI)e~%H!f_%ntQ;8sL!6=YqmluMXD_$=@_^y>@tv z26sHf>uyo77&B8kpw$!4%nb=qYr=Til?}Z^)w!1M7dHPrm`Y?z?uaq1SwoTaz0^S6 zoYowFEvc-F=}B_p0=s9iT{f{k8x?d(}WpqObMNxmB>tLtoUI z0mt0X$H(5GJU#_>xv$lTnzWpDE%b;KyuOXdVdrNK0aLzBYcB?!@LNx@4teWdE_(Pg5X|l!Wd&4nOw(f)mJ*F@TP#}(y^}bR?v?6u*sh}5^6%@^g$(>8unXSg%2t$5CZ*xO)DcwK;IF72 zEEZI;=HYtN>pj};wtFr!(1J~3E6?YS>HdX@dy1~6|M@nF?qrtZPKxxnJI?L{qOUs) zXe&kpP|qr6E?o|&h4eRS+scS+-h>qZoQ0RQ0zj%Ef95Mxy*2Xh-! z#~`Pl1L0FVr%L(!#aeBER31$Mm@2(wQMuGdBnzFd zUNLaEJ*X#FO2}0nX;smB$@$}2UCP+ryOo1McV$<~laFr)guLGJ&o|BwS?dxwHxdMKUso1sL;obj6-=Frk;Tuuz#E=PtUaK^$Ordn;7@>q8k zkT*v=qgFM6d(JwB{dV)1HxI+=HH-Z-B_AFS*(QVryYN@brY zB+3cmw3w)uz;a@f^jFDbpBUacmh>mmkmNwNQi%=}`;J~n6v&WuI`rAU109- zk1YImUj*uKQ)I$}u?pFbX~@q!?PLbAhH(9S=lL-W+17$alHUcsr#pBMzM8f=H9)U* zSBD|yEhfymh>>G+Lgq-Mduxl*RZ>_>a7zc|B} zI33zy^F#zV4wLIXY+i%z%`8;)B$R!`EpQJ198jKbp1>7ng(*V^VjQV9U!Xd|aGnDT zgms5hqTSnJc)ZPaPVHkyx^n;so<=xlo{>-xUi5nZgq@MjOuCN&)PvuYTrMCXwIz~M@Ad2~6`bnl@= zS8kU_EBDf`zI6~HHW6iKVo<-vW14jKGK3k@ge#y(o4yJ@NY=takmSb%`r7($apLkX z9y4Sy>XL7@H4^#ea5MwX{O^?xGPTIwpcL;X z>{FVh7oo<_Nh8&C?|pi`!IC?$`N}@}jg?I&w;sdB)I-q$^9Nb|N;2Ik1M;G{_(rUi z9VR}$?G5O2Ov)LC55OkeQc=z^_JrRe)~{R&e{8r&!vmjtJvuO;bzMXUX4vDB1*YEj zSo$si!d=YM>>GX&l-w;ltI6{@9gpe=kv|ybIYaz7_tC0HlBQ#PYk@L|+#B=$(*PN( zlr*7!h<&?!7oC_FOwZxaMSy1?xHB=yRPeFcSU8Spp6HgrIU{}SkQ0GAV(4t4GSZ{8 zG8!`!0taK;K&g(_EY6Q*Dm7gQ`72s4D5H_j3v+jbo*_uvS&0K9rP4;#$|~7nzJd6f z+1|`zF}LMKSfLd2!SHllaTLQjQ!UndtEI&gZG_NCNwqqn0TK2fVW};5vnKOW@~v2l zBT42;O=fuT+pzrItG~*lfqolJ8p=y*?cvVokwu`CyLd_v@W$Fk3SJk zhxBatwS-g-d=79`B6xc%AuFa(Ojh6(2e>_#BPc!Zg|vzUeVqd`(x+60DS(#yRIk|R zG5zO0q{#XtmmDccA#bEW#hUh0jU;^vcJ>*BQ5bGF)8uQ;B^|WfH#%%D0k0k*q`Q4E z=kGy!4_+0oO?yrr$P)APxGEz>!dy7(5C9>PB02vp2KD_4C$pk4>!-^*(&aRp{UVQ8$;%~o z53S5wxh(NxaxXHLYbjcm&H`0nJH#o4s;uKMjhbNxm_@@@yj-fw3Js8$Dgbh?S!ko} zHq*NSP9@{>Nd|EJS-y*TXFF|70=?8DUA?SWwni>xc7&|M)}kY_3F$dD{=yQywvFno zz@WVvOiE~(vj^ou$vqET>n%;cRE-JOCY$EPIPaE4HwLkmdFJsHaL&rO_HpT$#~W*- zXUhd%#udK%V7>FJ-)6MZR7$b`sJi6)SJkBxk>uMAKh(@8TGY?vbFIr0(G&0N%q1t6 zx_vi2Z7~QsOamQCNoAM&t4{gToDk4*;mmWDxhg)ZrbYc2(AOr0v377`;+BnV$+kgO zm6Tg9q-i+_oh;O6$#jVhoimA#F0a)jP9Ax_B!2RKkJ|d1b*mdBiA8`}Rp3PfMZ?oe zqrCN(`2#hF(ZvIfM^^w<`(K)@S+ikUKMVp4o;2h#an*x$2{*iBp2BBRwwDsPqA%VD+&kD-@A40mS-7)X zw^3|OFo#9o$ag+CU>h%@RIhZXco!A@XO~rwq{T$4*Sj_&M6(QtiYMo!>G@r|dx<;uDH;XN<#tR8 zG=Z6Yag&k-s1C924&7!Vc4xuilZd1d$}g%Y=_`iY^bPO@eHPAT0U+#!&CFn>%&|JksFN9iyJ|oXS6qUw63U@^KX2Vm&6qR-BawgNasSQBc<2dPV_mr1^}qy$y$7-@w#2G6c)F7rtv62m^D6J6@JroK?`qrnhvvWRR}C(G zd#)#OHQ}7(X8*)B!n+1C=+H7(SxV|@)9;bLkNLZ*PKH5d;T_xpl>c>`{j;|Uj}ejU zcu3N7l`sw($&f-KdJ8m6MShZ5An@*;a7cPKhKAE2HYG*Mv0Qy zr|a;2de^TX4^JA`q9p?5YED%!U$@DM0$HCF_7x0?$>lE?1*%TG`&=0-Uv?sgU9VnL z%_skTeE5FSro2JMZ~+@q)w;fn7^USYM$8&o$S{b;A20bZ`pn>+=7c@L()S0|`T>So z6bi+$^x|~*s+pk?jS=)DWDRq&NQsRIPc&XX%oA_wrZ?=~HAN{m+sYnM8CEQlK}CAB z;lE(>F>1CO&A2sO&t~kA$Og9rp(|3M-Aieh-{@dw+%l*22TAc!P_^nu1IY*wN2CE* zy)=B@c&_hIFoT^eFIswznRM}Om;$2_2oaZiwNsq#IR8s;ZdzK@8iZ@~K#!kKku{Hy zfG?9Uv4!x2l+ufK*7V4)i}=UpHbkKxc}}cTa#J_o9>~=%l!U&_mCx9FlI`01W_I`I zZ+(Scf?vKVX0((^@A-aPZ+-^yo|Ac=X{S18IJC4^>oeH?ETR_o_$#;B5{K>s?k}*o z2DFQf4^0NkHEf4b?bN^Z_#u`@RYLH8)Zqse{P<(q_Qzle-b@-exIeNnhPeIQ_0i~O z00iVa@d!MxeS@?_Ue^(_w)gsCIlsudcxct%+qyW^+1mPHpw*5fgj|b+B$t;54*8qj z!8lprl^Wd2jNx1hK7593Y-^jBVl?iWv9-FrqE{H$1nv}<7=>fc ziAfC(2)-r6%we;DsoUBs%%s`e3KlDGCDQy*kW-I%b}TC?LvKn~c{vWjUBr?=Cz7?~ zPQ?HIxh8|{&{U#KJ|(W)ddEblJGD}fymQ2x+mIimz-O5y8q}}2f4+KlE6a6 zD8OPnvh(!4v=+(h2I>GGyVMwnyq`db=({!OS!1|Frclsndi@dlL5KTeV*6#BL_9^` zVyO2eSLN?h-bEWGPYm1?rQoC>-d|18z|QV}miWVc|M5zR5wo4+#tPg+JfX&dkv9w~ zVZA8MlDl_QZg4Bw#Z)2JD6^72q&RAOD)LoS=Z42@#ImQ`sr<;M&D6K7;Bd|$ur3s= z{II;j#Xu)CZ?LWin)Q%36C{d~-$|wPaU+s?oV5ojk>Xh!{w=l_^6~f+k0ByiS{Dm{2A+MH%*r zX)eJR9WxX>AT=+T);{>LRF{)wdD+Z}UIO^BPEBohN3_4Q(S5(@wHF>peC*Z`UFi}J z?W1Qbs|YY^y}1ZxAM$`(c?8XULr8|jZ6HwaEYsWK=Uw0ZH~^W^RwTi}4x6cyyD@3h zSh~7o!M^wbfpVP@bhxKgT8Z*|c+k8~)SxKWPOVx-&mM9C#I!=sbnU~^FUafS^%-MTh)JER?$~v&s)}$LVa-3j_lWI*2#3*Yphp-mD#z z4NuioIp}@ikT(IR?de}3|1PFtzJD{e4Bsn!_}mT;euhVKBLgcV2{QvY9S0BOy3slW z?RW5yc(R_S^|5V6#*@c4u{F3vn$)oilY$OlA@+Q29b{?T~?xz%F%c46esnMWJqZ7rA+D50N9AV{WX zmoE!Hlow(V#ew_8`s549jJDApF`-B6Iw&beN719ot;+OJsa3v8W$i2UBkC&7@?$_{ zU<=zMJ>J>*EoI;$M^N$~<+h(S?f)ycWqg3=wmx`nqX^OYce(99zUxn_=Ktrew-7ks zGvMDpo%Dy-{(1V%vr0;GzXJUF0K}h?@DKCw*!Y*nA#Mu($_W2e5Cy(3a69}L{P0bj zo6O%ek_q~smB~LD=9|Jd8M13(PmDi>{~cd;6YwVCbPebYubh9aldkEfnz@OrO)HpY#Z>ExK z`83*}3;89n+?2i<0I#LX;nel7%KVEOe!DUM8jF$s-(dX=tx9sp@Fw;LIY9`Zfv3zA IxC8+3e@VK02LJ#7 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_multisheet.xlsm b/pandas/tests/io/data/excel/test_multisheet.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..eaee98a801cf09b128d3a732b818f49ed96e878b GIT binary patch literal 11313 zcmeHNgL_>2)}Gk58ry2)G`5Y#b~CZns6oTVW@FoKY&C4`G;H`yd++&rPS3f2!M*#L znZ4)v%~~6?-gi+^1_BZj01bcz005+b@9!)~L%;w4cxV6s0{{!IBVupoWNPQ6uj=k# z3Vg%lW@|%|0|`!(1po*A{(swl@e>${AC~W7K^41^d=S}Ul-3OoLbX~F^QRy}SDLt(RwDGusdWG+p{=pXnSLoc6YpWnq&W5r5t* zvTRuOz_-}Is@Ld&Pv5odYtJO%G;}_Tq2+cY#H#vZDemxTzesLW6xy0+Nq8}XSU}T~ zK31nXCSJ=L85z8HbK(FF#E8335oDGE$44J>;I2CV4CQ7o4HYA`ojSXwJCz{PpKwe_ zC#`@JLAs?LVzy--sJ4R9KoyT4XAXOgM%VwLF6NYJq zf5C5^8VmU)XhjwTPXrKt^&L%ZfXqxkKL3Z?|6;KI>DEhQ-F- z3d^_(Nw$!x`uItGL8*((r65@8pe96B#Ses#@NM&X_`JNr7kMy9dcDa}`W_vVm%QG! zEcoNyTW1(LD#t|ex1}4sXfCr?v)9QI((W`aZP9ckjYZiqLu=&X)90d9sN+l;_;48c zgu&SS$^P2?a$2iKx8-27!YapQ!IcfKvJT?LQ+?+X3-*x&L%C&-rjoG+fkx(w zfqWh4xBuN+Yy7ws$Y%Oa1KR>8z2C;#y}@(#Sirq(c1AI=r~-75G{i*zrnP*($9O#v0G&i!Utx^rOv$_^$XpjP|kSdQNdIOM+L(I+GV zyiH0rX9ARw+$EOY@wv#()xy|ECrz`33BLR{`$f0x)|a^;`{_! z%W!70j6z!)Cz9=uac}UbbOIL3;Y7qeW=^{X)Uqr^YcA18dOopa+i&hfRwM?y4L$kD z-J{;%QQwem^>B{`jtc~B;{20Iv2%_&K7u$G2?hW_0oe-3r2cG7d1|)yODw2Q0+!FD z-)?5!f`jD*hrgcu&bWN;!LR<#N15=$gguzp8cMSiosr28r0b+K_g zfr=RJA%{8Cta-H&rCXvh)=Z~zS!Fq9Zt@r&`@7Ee-FJ;yt)e|Odt77F_Pz? z=tr?dS%r}DDu8M}|A$6`aLiuui`XK>cFiFj=6F##m z^v05-XyMFj2&!d;ZZuo%DiFKA5}Z503Yh%1ryBRlfOh`qozSeBG7wB}^W7?7V?)Rr z2!&uVD~VemZMC482pwN_IBu>sVH6F7O97TIW~R_XD~FUr5V=ip~GXg(6x>i&{?4H;vr??C;*vf`|bxdCa8eL;j zj}@3pm^4nMF%gh?35E~8H0Cm34UKjB>~j<%#C0w*e$}G485J>Sk^=b}-65k~9AoqTFrh?~-A&F&*{+sZ)Q*OGzN8>VdSwbbUS)Hf6D zs=|$}JJ#MLmtmf%qJ@N|nDz=2hD3`dOy?RnVjguM%O`P1T}2xOzHIOorp4OV<#wsW z`v7+S0OqKyRZyqwfY`rY_Qp;M~3keDNYuqwx-NKKUsfJ z?~&GcGyx}OJNda_#7@S}{0M~>rw(OY9>R)kiJkUL?yI+KCm#Ki3yUj7pMsXi(b02A zm?Xb&kz^^;zoqpZ6-L7>eg+@Ukm_IMs*NvGRdi*>8{gd>Kjrrwzt(MaLRZ;^EljJ- zJ62~_)rFxZe@Qh|d%Q}BEG;*`0ZwTwsui$&0uH>Hy1qYsBdSBz&FYfn+ufkJ92t~k zn=tGJIU1^Oz@FCG0w-fkvCf*FI6%t--NMQ^PnVEINFLiLA7I4dY0E}ReuS7Wl6RdC#MQ@}VcrN7YpW3_k4pTR3oquq(j$5R)?uShF7ahx0zcI2T(l96uQ|(? zEoFA&<5l=H_m)*F)|Urr5EY9R6>m>&zpZuBE1Iq8^q@eKx=n(PgKroGEN6tre;lIB zokdC)?s6iwf{Vpdgjnx|6VLQ)+h}}%%jH#f8@_>#)V6DVE%dUqS_YZbFl-^-nzgxS z3JgH;flLTpg2Bu!K>R}=Y0ef29EF6_Ee$36q>Er0sV2_5 zSsaB9-}B3=i_QJq>3aqF(`To<^{)L#Vs;(z0it->tElmg$D{EIL6;}DhtZ1={e^Sg zf#W+tpCL-9wB7@7LVTlfY_sgRa1>fhBY^YVKNq=PGSX|&Poq&s7CGJiEp)uP4 z%)a0uYXmB*M;5G&j_i$?37~=rwQi$%!D+P&lcE>8m9N9tVI_L1i6k+mLbM9(i*baD zmX&iS7I)&##s**V5{$ww&k3}!M17S=JZ|>!xOrB3uNhlJrO*YBSlDZoG=q&n*CSNp z;UhZ?qg&_b2<84oQLcutX|djeSW<2l^y0l*5u?!IZgEs$+gr2K#A(x+mxnI|*kUS4 zQ0qPr8(B4;v!x(lF3Tb-$vTIelA$e1o-j|vL>&UbVls?Ar#;~U?GAyA@4j0Jb%1{} zM7e+Qrk9EhYiq&`h{IYy?s>!#p~zR`NF|kLEVv%>A!o2@EC{K6U<8XSlN6a|5%)v~ zl3>H93H1x)_6#i+9NF54iVOp@v03>5R60qiDs<4w#92lMc3X{(``~aLh_AgN6NkT@v#c%Xb!;8agIBQ2cGIcCv=~bTQ946Wn0? zb5PLBB&i%b(n-yi!kAA{K`Dz(l@?l>9Qof(ixQP|o4||8a=>{hV6N3{=WnW~`3O@{ z`;74lj|@5@$`Qw!#69BJi!;Em8kYvyvqGI@99Ovbxmy@!^+>0n;TTtB(Y5i#Eg~+C zW-0{7*yqZ~@}T+m!?&$yEI3l^$>x+r$1EV^obt26fL~k!n^9irl5<$MU~skdjJrWM z^=yw}Ptxl8M~bE}-p(r}YJC<%Ii~?OqNmFu8Id_0D55vK;eJ$mIU#Bpx~X2~Ptsbq zrArQ(jkehzkXHH(uL)r|#_QU3oOLgL?94;dMkf;qCz;6#hN( zlhb;VL*r(#*(xDMV}dT6R*bJXyb4pxU0v4a>{6mht)Oq=Iq%(}HOQKCaxO6Xva zIdlyKcHf;D&>s&xsmGZ#N;cUBp*=2K`r=q4PrpDvZ^bo)eKsbyXK z^W4e?6)GrgS!)bXvn&kmLSj?)_0mWc>W6NT@-?T-oN$ylsP0XEb;q%w}AaC?{ zf5n{|BC!tv0Kg%EqPu_jE1;9RjVbVlr^?f?jZWo4eezL$hS=nN@82nmscbt)rgor~ z;6i^@0zqXxyMV^!Ih20V;R-!Ce6rN0XPsv-?v~-snZdA1ti8ckQ@$&`W9J~E8!{w4 zt7`|m88+U}FQ2xiv!;tTY4S8_yZ8Bi5-nB{9l+1WT0rFY!TUtOvMu%AN1wkc@%ok3 z0EGkoY@KRP_43_gxpD1@QW2Jqq~vBpyo{@8Qp4=HUv2Gu{hE7%m8lRiYJGmMhc|i*3SyJOl-N=qUn9#tNR<67e-aQAJ9y#!*>Wa_0BiW)iTY3R0*l zh3?7o?HncL8~OZeWszGjqR0&2xC3S_GK8?N<=Z*W%kBl>H2#OoI0@*VfEtHgQF5p~8vy6nHa+SddOi@D^yG2d!ZZUeiwjdGjut1vsLMzbC^HiOm!WZm-4*lb!xyEP`G#LX?EmtQ?W!84@}mmazhKNQ9#C4I`I)U z1l#6X+_t&awO~S00*u@_gwc1^TJiad^p0zjv~pVS>Fo`$v%4~12x^VqFs98|&YM4# zUIJoTK7N;w_5F}`NE{E;s(~m$;6tuFs!ugM=!A^rLBez!r42FM$w~?nkv`g}0qe%qNR%UNc!r7^}) z1Rg1-{mo-G9*W(GP&&t1J~b~&X@R1-MSg}gqt{||@C4L>xmHi@hKwm(aELNav4}5# zZHw|!n5M~1n{Z)dHBe}X>@W(s>Be(`7WE7yD39m*D-ur=g~Llmdapf~HlYLc=#}OH2?aNW%dDxWBSJ zpoOWa6Oj4mj{OHc#H+utPi4XM&a8d*d#JT!)R&UNM=Jo7%!Noz&)AM5CmCR|)THM< z-n6_VRTQ4Sq^$CeM%mTX2cCBbB)qF4*Hu(!j4@ssBvI>D*0xRDyXjfEBE%Ekr0at! z8ARvD@Nb7E7QFTBCl-?r8%w;>Iu;A?y8QUqA(k(r%14otvMm_Wf}OM)MG*jlWuY zs@NSNMocH1zw)#iKDv4*#Kt^L@h1*R7N4o-`)l`tT{&9?rHZf*uBxnLra8O{(SzcMA=&LOkK5gHi^nsr2oJtkeS zVc!^^u$uWd=ZfACp<}5h!#^;DYM`tMG}W`WeJf8JZ%?3IQ89Pnoj`3~nl#{4F4015 z)+fnoaQWuMvk8sr?8ijSe5tUSt#7FFSQ4dG{DMy)tu>o@3gB(Ty+a{3njc1WFohd% zfPf|+bq!68pJ`)X|N4zE_J9IB{NARK@XW%?Kk|K1D?p9>?g$K8+}h$F1@}At!&Gb+VA1*$C8K)a+Qv{Y|OJ*J&_M1 zQ?FJH)eYBP@}gpSn2#V=PSRW()U!_{@rR^GrmD@l?OQAVT!p%N^1ws+^D@fFOSy`D z6wcKT#7XuRlo0_Y&Gi24tF?^FK~*Mo2WsrI-bq1G(_@aeW~39WHW;hsc*fP~Mg1NR z@bFI{y!_p}s%@{IkbqX~{xgz(riAh8a`vgOQQI^Qd|#(&b8ajXSFyw(MW%@4f0A3i zz{jqEuXJ)XrhaZ`H$d!<`l`#Y;C_GImaNtPbPSBKCZMDEx~h679(k_qY^4q zg^}%NX~o!?)bKA(4AwT-K?xo`EEpdh&Nxh2Bi$ZUCzLrU0(M0GeUhuB>utom;NA%J zepPWMy2t?!#~M!dD3)?A3pKwvj76ya+;vlP9Mia6t{=6BfQ`W9{s}RQogjN4!+wMS zBMDs4a2+sR4wo`^^^X5NRWshLv(FPtb3&WBT+A$U%CzPreN(5X} zq}Sr;^!3@KqZducr6~`tZoy506lX2F*PSiRW`aTWWU;|~&%@{mUBwdwaB@Q`<;{J? ztjsK+B4uKA4l zu1i^!D~eQI5%HWEePY9L%4qV~2+EgKMKR&Lx*G5=F|tg>DtV8SZ^AJr{3f6b{Pl%j zyz)OxCCFRpOMFan#znYiy=TF`G9D8!3utZ=-6cXtdtU%9h?LXUGp`V3>Zx&(z<+m? zN(+B;6vmHvI+J*mZKo?An12B!I`4U$eFu$pjauR6u(knKRTd&95!u})G`G@#8O)i( zq$3U~cez9Nj!BPFuKG@!VP(v)u%q1FskUPgg0T+H!<~-?_j0n2|J_3RB6$LWef^i> zpl~Lhgrx9E608>mIST&5wnyHmJPueHkUG3)S-d$l>*ZuHw0;QgZ{blr$)NZj_gwkT zczJeR4M_Rm>>iO*=y(uLrhL|2ACNmjy_Zi-)?K~EPCJF&(7)UfImtU=f60IBWUk-N z&y?$JKX&CgtejrSaPG(;kZc+>^3w!+z#{ z^o05tgp#^ls8(xixB4Q6p_%OFQ$Qp{vQ55PUZaD%3>VI#KtL?GhbXYypB0pWh3T+FT@#xTWOq)z#2| ztl?*Y9xs(OkqSGm>S$G=nC+|&DGMIWst!CSI0T9eQQGqo-S9646_>w{u)LRU=*4m( z>ruD1<3Wt7;UdZ3Askqz=sVAG$DOSELMpxIoV;sIlt!&6mnJ#2hCU z)IxQ+Tc=QCLplO{&Jby}g!)}h1~V*!{gWm6CvS7?<%J}!u|DXX?G<-3bAIFw0ge2s z0%0~i1-W@g*}ys4am}{0ln&z4ih%-BtYUWMZz=MlICP^GlLIIl!SAWH6h9$46m8Ei z#9L9-GfoGni)y!KbBa1L(dDeGGuSS|O0zvGt1V@Gg>(_qw5>O9B7wBb4>lSeZ0)RY|CuHs?HT1%9PwWo$8ytrfl+kGtla?gufrFXJbndA#=C zKqLJsRfi#lw%y)PY;0!IK76P2+ClpPb$22on-+T7NU+5BOoVBQM{k!~Pwc;5(-)vv92{T**tqoe9mHS}AH+1YEg7;QXcII=uoGiaBYa}$NB^j=XZNn+^e ze3^RutptcH?zwA$Bc5BrT^>!IDMcT(A`~txPQ3m7eClKrK9XBPy`|o>a*XeIj(SjP z8P%U}CJ8Xxsb6~>BIHfj*UNA|BvWC&Z;DA(qA$&TA?cMfD?xgF?zX?3l>zkC z4_Td~4wPwHa8^9oV0B+ga zU3l7P;f7c~_JaXW+y+FK~7AIuC%!2st6yll~afP61R1wj?G)L~$Wu*~Vtc$r?fpK=V zNoZ-z(R_52(!i?6QskDW+mo!+!Op~OXo~nKM*<*y!Ay5#-?5IaF2}$~k4(tc?cTmCAiHxSKDJO-t#_Hw2a%puvYq z_&O3DNMI@BagK3mf_v2@o(Z(YmoGLhP8QfI)f;RN(>Z&6i;2_a(2T_vbE}vXdwK3% zD%TUzmN0>KUG0<-66y5ZwzG5iDArFwxyBubDO`r@`C!Ry8owrUZN&;_IGE46Ldvl`Hxgc`m=vfpT zz4PLgK2MP|AIOXBB{tl6FRQO;kq}p%%jQ{Bsyth!DPE3NmO$s2>LXv#DTgXk#p5VQ6Xh!=O##-~5oQkD`A`39%Sbmcqi?wXz;?byJgQkx(JcB&oER) zdSdX#Lpfx~C7qB+O8$NdZ@BuX`*E=78^wZet ztOg}ug56fXcOSy^4C;CQ-Ay+xKXfR9{t^RF7YOao9Aj+nX!<{r0U7DPH?PlY`?+M;qm+U_o$qHM4$zG>Xq3_Jx331V=7OO&zWKJoM6N1ueF?F%gxbe@FB z>r{LV_ZeRnBHXtb+sX2YZ-P3`EP1)!#hzzheo+~5V?iLri{Ygl21kuv&v%uFdT)>Y z%!A!esR@91QDv5F|7B_F=ntm&ORx&p&EK)UER+#ACkD1!$Gy?RT(zYqf8Yf+uKE}R zT&AHA9E}gzxjcWcQ$$DSxslq7GnLkeKo>VVU75w%VNG2VZc#Q=C2C=I#czB;E#PL$bbH6 z!s>Uyzs{NbS#TTF0R7KXC%^0YeYoJ4CN5A6{M)F(@4~+oo7r=ih zKnL}2L1+I}4*y;B_d3}xQ7X_B#lJ87Z&kD3wftTq_@%`G)Iz57yV22a{_&pN)UHW%N_e;Kk;^#H| q#qIqr{Z~%@B@3!#QvK=Ne=xM73>3&tf3!W20MZ~+pQQQm?f(Gb0HLb@ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_multisheet.xlsx b/pandas/tests/io/data/excel/test_multisheet.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c22771232961fc9c9a91f4aab55990461dab22cc GIT binary patch literal 11296 zcmeHNgG8SX{5VDK#&-^WvHRMLsD8=knZjl>F!XvB_+Snd(ZV8&$)lW zclR^1_ssK~wRX&U-<2xzFt9iPc)$|?06+ogcYpWR843U(hX(+#0Z*V`iQC&bnc6uS zXn-9|LHew2wl?HBu+a2b0BFeX|F``=egdBp^yRzRae|NG9>oVsYvyS~2b%O1TBrQZ z5PH5kvB<~2jUPT-^+@$1S9{)vd!XenLI6!6H3 z`RJ~YJo}T}z&d!xIw+Gb;}_Eob#HtTHHkm#5c+$pk(eUZc6{b%=T_4;h-=;`&LD_TF2;(jpJ}4N)q2+QqP_mm`6orZ`Fr1_Q#2ZklC2 z6Ai*`Lmj>r3Y5NAq^Ys1s4mXsH9T!j*>Y%6+Gg%1Z+b&&BMus5kn>r><@~E0 zbtchFb_AaU(Yz>h##=1GOPmLhoyNq+_7|*j$W*t{d(!r=y?0o8z?9CqMHlpD6N#e- zZ2;J9xD97#*~2p(S%VNR@C0cjw*jlQ$__Hwn|XP3lc%&KPBGsn1v1_D-(n4Se+S$S zSbbEN7F!dC3mLQWxI>VZ>yCJdqPW7S!m=W`b-RNM06adz095{h)>=(=s&j}5DnP)9 z3_+`bqp1yujrGU=e<=Nb49!2?dP$t3ayL6l@UhHY$iUU~@>>isc~?=HW(o~&U)d$} z+NfM=;^lTa5)2KZ02pbXR`0t{i_3yhdjk}g8{8!kSU3Vyb*`ns?{3~WBQVlBCP}?1 zS?|GgnZB65Op%rY)4R0BFcvoyX3Gz*Qb|poN>pNuvc4oj#Lg!P#uG~M)9q8#SuwsY zgPImoKP(NdsDGZd7eAWjGn-Vfiz*Vzr*JTlf;#{*HlHu^9I&Cd_)es$Zoy|+VVvp6 zPYE_KvFSLINNdNw_heB@A5f#@#=7PlmF}m>y71Ah;XWP80QWpc>@FTS8T5}JD_ns% zuD^ri`~pD$8?vGU3jh!SoA`suJ(@B#`gBsKb+bhn1O(4X3j-^G9tgL$RGq zUOLl?#DoSk+meKldd{A)mo4#o%uHXwsdkNz#ZbKp@ual?pDAL)4{Z@6kJimqCAe+8 z7xOu&_{`V!XXc!t6yb6PMi}e`q3DTGn!nvpj0026~Rv;$zXJg9Kw6$Mg$9NF7e5CkxHN}C*Ac2Ytl|$i- zy*G3D#RN;dg#V410G!4}`!UIDIe8vu7XP`nX2G`Yqsqnxy?0$;LphL6Z)iGrz(|u8 z^qK|ebjIZDF)$8!sJKu~C=Tvo*HrD6s^ZLw z)b|lC_Q&(kkQqw{4L@+}alGYsBHihTH^a=>C*%p5q{pQ4%=IIC{f2^ei@!ZxY^;!-{y~Zmxn%v zG3L@H@LIjl?fB{oJ1<@5*qOdadU|s_`N{G_kG#GQPNcvf_4yz#7#fc;l2evg{UFtb z1Bi2N-_SOBu_E45xc>d@A!g<1%fjh2@YthD;qVYNG%qJI)XAE0L0azEXjNo$c}5Ov zTs-rpAEe$>IJpl-SbX014$efr3nQ{FQhMhw5w5u=SG~506juXrWX-YWDy07Ln=OLKv({6-)|yZn0N`+YQV9Soh&#WGXz`TOol%{;E+Yb0Gr!SKVbJZ0BV9F)WG8i;qYO_KRYzjp)H@F9NRM zI&XcdIt?H|_&fXJykaNufg}@ZL_ZRWpHOkKFts&h`?=@%!Mz7MqcOxhIBis?B9U8} zTeF|3b$DLU#OEO|+ZNmDPUSv-!+GT1H$FGNT=+3?feH&Nhn!VriI+S}jp+@8&#)LK zPSGRuXr^r63U5t9sfLOx8{z2o_UN&Y&*-IIixZam_LC3k6?unRY#Mq9bX3o12Wt*j zNYLaIXV;-=fD$_Xi$~C)tBK3oV||HNlwBMySw3C$DvMEp$+n3@p0LBA28LYe9nFaH zKe0d6ib@f%*|iw zZ7)U6S8|#W))Xl*w17JHi4}82sXE3)H$Xk>8&plwEB7rP3AtpXAtXFtNO$Ua9T28Vmr6Ps5G zSCS@xa#oDm^`<$el~Nq)9{3i)cA&#@%tRx3Qf#?ICDaly(nZJ0xdWFkX={C5Fl7Nj zd7J+TUQD{KQaphm`*74eE2GDZGqOVTj9)VBg?hT-`he>J25{)W4%_(JIVMuAZ(f48 zK5SC5XD^O|j~%OMr$*d3bf`-TL(KNt>^N!Cbn4muQ(?~73UZ9vw`9gv4X2!`$T*7% zXsQa%A;*-M^D;+l6S2|zAgI_(<4@@i1R%S85KGW^E75l7Z${|1PxX6fIdQ*^nSt;* z3aC5|*dtX0s~u@&^ME32A#ZaA8b<OwERS>{``YVwRDhjXyd@HH}DF(6#17Br8ZJj

    * z57B|C^Nkf2I@;X%-%SgXRP`F63rlmL1*j1&HEn0FswM?V(lB~~gdYwJ+at?RMjEBu zP`(2Dq|9o#Y*td4>3znWtYr-|K>9|hdxA2?@F#Jo(rkvc&D%oO21T=0vL*eb96GKMigZ8o} zHn!;rP^twU&!h^j4$qQkpi-iT*`Dd*PeHKQurH=LP7&IQtHt6nf)C0Y>{j^f|o!@D>j^yDJA zHi8)4Ts%CW>v=17hwK8l=6z{El;859+S9>u3rEZ0tEXM5WJ!#Q?FQfWUguAp3tXu6 zU+qt~8I+zk)q1FCOW?e`KbRZ@*ro4KKKkY?%nIl949V9bQ{lZ!xsg%R>Y0{Ku(Caq z#tHG=Swgi6&}yAVl`C~OSNZ0?Y*vH|zpTdggt%3d?L4itQ)re?&9Gb@y)|o<8E%@L z87wL}VP7YQTCTP47Nt~u?0mGjNRSMsrEO&EXbqpe{AYi~mlh(u3j+WkqC&#EfB7qr z6WGQS^utr-y|j%<x+sRBwVs~CUPck|08tr@Ku6HFRC zOj>WfzaPa&mdE%D333#W`o8r#61HqjyY)5@s!Y0kZq-liKr~&e(OtE8b6*CmIZ`de z^_G#@s85i0l}N6i9`&uMxvg6TCtA5`oT1|Dks&b8S{8o~Db*lde!_l%@!4XN#6J&N znFxM@n1-dCf4W#|6;N278vOF0v^3@Y_nIbhsDp9}xJu=&@zc#5RkbUn{7W_QYbesF zOrQ8YHXTajurFnsIgg9r0%&@_eK!0=tdAgWSYl67`nWoqB~_Kq=cN6%sJ4x~(O)qT z#k~u>SVAl)#-#-cUZRWXg;r})5CH-=p`puk=;XC|C+|)hU@ow%fPsXU_x(q<(lMe0 z28h=IU7NX0h@Kley#f+dfdFMB6F7+%}QB?t-Zydsy zI%}*1{lt4lb;(N6msaq3o4!w$0Me<3 zDMl7Vt2n4jGurEbjpIkfaT{g`G1|&X4ilHrS>PS0-sKn~Ptt+keHK0ZtXX8AM|KdS zW|rg0r)id{WD5R5Q`@fzl7zAA=WSsYG80q&F7-|AT)Tt8C@Cy5W?w#xBusuzejm96 zAFDdbO!0NZg2!(yPpZa$4OoRr^O=(n1+-Pg9F7IVV+TnZO{nA4@ym3O5vLA6OQ_ft zRjlJ@_pmB|T{G|P{n=fu5T^R+*SWES%mwA7VRes^SYR}>uxtHUhs z>Zc_PAc@>UU!&^bOG!o~V!D7_tA{otmQ-Fu6nUpOl&7H9c_mpK)0D;y#4yTgI7}2S z1m)Ze;7ou;9Wy!l{i(sS^uySP&fXrh8z8z6UQ32oKj`ZH?X+!O8=E*AP4QHkB=cpJ zTkYkBPNi=7P-I}<_0>n``=~x=i|TAHM6(l3+q`ldejn>FLtu9-!5D- zE3AQtFwl?uJJM*lFD9tNTRG9XoH<`D_nUf}dfY{na-IgPO)--ByOn{5VBdXM*_cw* zQ_n8)-9%46QBFBwdo)T~{l^MI{Woh%90(`KAp!sdztTOBg{i3%i0$Wr>jym~XzAOh zvEz8XuX*&ntFdG;kd-CEEC3YGgvd@#*^Z(m8{)E8XXM>qH9wx7DXYQ#{x{SCT0t0HNs z%?9hJ6K!O887g4!DL>R&%zyfd)r57{Z|jv&Q3W5)!&2xulQ%#g-+NzROj3?~a{pD~ z&4lCrAflHn_6Y1y9-@;L)%ny}sk1ed`q~E^lENC|bQAd+N4!%M9l!Fkvyiyfoha1A zBOYK3qr-dFSX2~$9BS1*R1_Zptk@jwoH0KxoJZ~=BA9&VuLm)&9t6B3L>epm{3S}R z<<(QfSpbT1oGmOS^}Nrwv1c=RkA^v}BM#fKn+9oY{4_JtByUKtV>=N2)Fw5Fu(Ud0 z)@SL9K61;jdrv&xUV;m@6>XJO%fsHfYH(0e%_9wd(n_eCbg)$CNa8)=TO?Ky_Qm7T z>fK>3D3^gkMQVa(K0%iG!kTJ~iM}e_SjW}+tt@@CEsl4DSO=rYfIO?-<(m`#20VtdFDo6} zx$;W3fzhk`;%J?sr-H)iE!k`n054;{Eo#Z({4m_cr!*FZ9Lm z`jwH8b~cRF<^m?Mql$Ma1uX(|^_+<`p9UvB*97mV(`Q68-o!ITNmbsoT?z5@d!Tu6 z9g&r-_dejd^!O;Za4yp5Mj+XbB$FEEsvdOOm}hZ#pzTMcU91>s8Ld7Oz`%7k|BO~K zPJd}w$2FEL6p|5@raA4lYpwQk74G7}oe=%c%jln41(fV@F=S8|M&-x-+C3`GtRH}IXN5#c6BD`v(3MW?}-D4YOx!q}usqs+zZ%O!hr&P+-*+_aK>WlV#QFkUi%YhEZ9ZGR8l65W( zHNV=AL$3PNc~yNF+pt+?5WRzphs+B8h?2!coZX*k|Cty&8Ct|>4KP`Tm^yM1Bosm0 zM0oA&{lMOo*lMmAJI$6lsXY$+DbG6j0e3s*NlOR~GJz@T3n?t7x@?N!vqrR%)H_$V z;6@SZljiNqj^-vak-$32xM0D@A*{sCqA_Ab#X8wMx)B^3 zziED*>aR38oWXH^8r`DrIY!rCTu&nfmm(0-jYid93IYW$OE}cZ3pHF(2%TBHGL>=%N8^wa1cE^>xs^>vrHu`1P)WK!g0oY$KVY848)#3_uEe+&RgzHx=(f{K)&U; zWyiY!jtHCiH?>M^lVV{;6hMoh=Ja;YDo2}oygW)2x;aQ=K)N~z6T&&3N;=54(^Cq_ zKZBE)^*GGFfycbWD0g#MU5BbH4Uv?N>S`68S+2(k=E-4wB?YT^zQq{C`kF?u>PDA& zdBo^Ldl}fNrhOiUr54d0EJ#mqKHe)7G?y_?m56L#w^S4u&dQ&d96nBt`?MfO*-y;& zzzc)l0XGx&mB2}sKu+~q86^UPFEaQI5{3sQoY4J_tKf+M|F)|kg&?BcJz6RwKl0Ip z_nPY+T6?J1;<3q^tLMmZhnO4I(haGTk`vxi{(T2q-DduK#h$i9SN;PbBd|V% zrA_!Z*F5)5o1cx03@_PrrJN~A&{djrSMlQ;SKu7-HSc92F~BB~(d)#pTIIYp5H||d zhWA`i8_~S=@3wqYX1In_8~=`~)V(acVu)YbhrDtA9kW^(IhvZNJ2_g~ng0x3O-4kK zy4W$`##+}%yAEs@7%AflkVOT3pOV+)qH@PCq=c`mg!*RxQ4q&3rY`O=6H*9$-E=I@LQY9U^8Bi25G+g2uU8z?A%L5oY`mUIZ{m`&}g zVg5kdK=5s~@-1lAD7_z|E52h)MS%g{moT>4eMNJ_<>jRJIWB2E` zYiw=bmFBBdPLKqrds9r<5EB_}F!QXy1Bu_T@B)={jpwXXjJ}n$KX1M$At&I2(q${u zxI!USq|%q|E!J1FFs*-0#7Ge?k-bI?bAvEdMKKq3WO6RY_!dUaJPXbT(OFO0+oF8j zg*c6@Fa3+QCjlr1IB4{FvEOFE?tRWxG>&S7ifXd-;KAu4-RK)>2wB|n)qp;Gd==|- zZ}dnle5Vtsd}eXv<>%{NE3f>H$`bAk-Hw%GLi=O%ooe&2!E6(`zu8vZ>ir;zK;rJ} zOy_+{b++5an0YeYnsQO5lH8{iM#I-xhpNREBT4q|T5YUxCDiQo!jN)~T!hJN^@l1vp1*Y3!- zZRHwpX8E{HfAf$0kduacnFfh$l_4;~fONo2?13td_6{I6BL{~c(1G-d|JV0|@Me0v zsQn^4O3*RPB^k;xapSNGieE{N;`8$gW2iV6bIk%^c9ltJN$kOFOtfnMiu*#;R}Z%b z1=+o=vFnn6K-OR}Jyyq9(TYkZBZJTn%U2QQl;WJ(+`{nmJ(wy>JEQNXXnXOn_&Oq~ zpz(5OtzN3-lxA}@81hx9^-d9NJUcWkVKiSCUZ{tM9xN7YPjVoCLX&`hNI)OlqbdDJ ztShy6wtjXr$629OZ@Ztt)8kV_mZ69Vl$_74WL4?mzjditOH5zD0oir7(MU^YF!9+= z&k&(oKLnOVN~?SmR6#eqxv-6tFVvJSMq}p0$C~SxwjGKq-isORKS>Nbo|$+Ma#V&;wRp)XW0v!1PPv2 zUofL!E;<&?trnG$idJJT^4p3j>iGoK?e?5Cd2HG-s84c;ps=iS*Xji2XM6W;46 zhuobSttPh>=}z+1((}Go6&QVoUg|Zu(0M)VR%!Qrn_DR$<=y*9o!cRk;qm&J5%Ka# z>iJzG%q-8b(fDJPzO!g; zI`n*1Ce=YCh7(mORbm+Rq=7P%;iW`}I%T21^SHmSD<(^4vW2KhFcl8V6DwF#UXyYE zEGd;>Ym6UXNm?~`U_11YoOH8dpg)}4EPCuPMvAw7LZB^tEr|pi%%Map?tn#A_48HM z=k2BI!^dT6kPJkNdJ`eT+4czu${^vyk1fsK;JQP4Ajf!76HeF!ueEOHHiY#N()9eh zo6hm^8B~G%C5Di;DCVCz259eS`ahC^80o(^&p3VSfFEO12)F2Q9*Q}(Isv+uqHKLg z0et)}_;Em{NqX65`|SobrPVeEoZwYKu%k>1zefHQ+yYLm90ux>Z!f}h@vPou&DtEU zl8Wg+fErSwmuex8%44b8kkSgRK^hIq$jW2$1p14n#`eNruOHbYj{0bO$z+ zp&kBMq5oyYX(FR4pEnqIYZzWRS7$pW6%c&(;oA1>oKtX`O+$nUiasjePz^YQ1`CkbVs5R8GsT`D{$kA&cmlIgMEKm{lX5qR1c|4~fKj|0ltOf@Xn) zz5hJH_{UTKy z!7oj;kQn&4S%cq&fA6>c5>`d|$L{Oz3Vttu|55-2>EA-m{;M4RyXfzAvR|UOkTHsX zU-;juX1{Csy+-g$ivgsE{0~}wRt$dE@_SD7OG_>3zi#9A4C!|bzb78QG{imq$Lsi+ zhWxJOud&xJ9RR>I1px4SB=)=X?~d-5d;#^(Yxs-X`(664ocv1`0N|tj)4Bg(Xcc)l Uh@Jjud!Pd3Ag10y|Kr>L0c8%FX8-^I literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_squeeze.ods b/pandas/tests/io/data/excel/test_squeeze.ods new file mode 100644 index 0000000000000000000000000000000000000000..10ccf0da2693e7b612ece6b9ae28b4c2fd811b05 GIT binary patch literal 3218 zcmZ{n2UHVX8iqp&NC`-9B2}bFlSomJrbv+v2BgLi3=jg6&>{pydQ}kVhy+BcH0dBk zdh?eblpbk;rFRht8{Fcq`|q7|&z-q5?>FbneE0d@Yhpk}O%M1-69R-HRib3b7pMIU zxx2f#!!iDzaHyvz(gg;^xOkvseNav^9#FIkTE@c@j&kyVdAq|=7#Xyu7aZz@M!?}1 z6LL&;0L{+`LICTG#L$b>0Dub<0C4&yG{zqZN6YxRBl{tJs0n5EDQ3A7$Fe1}>UmLw zu+{oKn#zokep(au3uvuplzLr8@0*ilBh50jQgRSY5nqpR8R)!o*Bu@<`@hmxkdU-M zqN`p7cZWrmYMqdFAo~%Do$U&Vwau=hhPrl6@M+P1)3q9v1qK_QrJMTEbe(hUFlGAJCyjos0`8-#XjsX!Sew;8 zf=w`?B_KYIrL}~5w>srx@vD%PxmJi;6b0!KGMqqv{8*)kgUYB81D9Lo3CaXF%t5)f zV}#)u^+i~k) zb(iR&Nvp64PMcMm2de{T7F1nVF9(+OU#`1RpM6p}Z>>$wTCzbZ=h;DB&|)UUAP86V*$gd!a0X3Ph{fsrFl4Jn!SiXyRyzEX2jchlmU9 zRG%!m_d_k;Fh>veMs)@7%HI*2J!-c*N?}W>ie@ryGu^UC%yEY$cYe1DX2@hjWA3^p zq|ad=Pp}#d=qfW8Z`Ffp@jP8`w~@|V(4}Y==SV~PD+KL$OIogO-7n6~-t8}Q-+?u@ z`RI*o{4NnEj2UP<>dkXDCn6v~=j_Rbh)189TN3d*0O8H2dqK~+n}jZMC;KP>&DHGU z*g8PAxxz}`9K88=g<`rgZ*0T4fYx49B}4`-H%i>(wU0z)4r5rmi& zSfm}D1j?Y+Y=6F(%W7R3^!nDhTg}i>8(w!DA}n=K5JYiy>V0^Qhh-3q-jO6Czncjs z$)4%>oLi$LdG76bdqI)Fu0Dwp%cY=<~YeR!{?dz)*4kJZ{_Y;qa2CWcTK}(rl?Xy!; zv{K@!x;xvLGtIcebXA~Ko=d8kjJ`58#H5utC-8D#xX+8`fuxn4MgbjLUgr?UI;7BQ zI0Wx4-DhQzR8*S_B*;bI1m-!gAH=|-*JviE(aAMgyQ6NkOLaxD?gBQ;D*;0#4}!cU zT@<*+z6cK-FQle4e9KYpp{}sEupsLC1f??!^=*U`ioH3)p~GO`fkmTkb8a80W z1Y1(YU}@onnmZBfRc+nD=W)yOb$eZq*-;g55dZhM4QH#c>;`Z0}V7!gj8?M4i| ztC6eY?ms~bWWPksu~^85O8VJ7*UhvUtG`F9ccJ`U-k@g5WtNMqOTGyjVxsjh%Kc`! z`_uCU*ZDg9C)Y=ri{(fmYT>G#DZVMD_Yb2IPg)w*jV3#Qp1dDn$}xfy!>P)w7cInM(9&Z-!PNzY)2 zP_iIeKkBZFR#B8gBxkM)1JfM|)jd>FLADrC0)N@kBVRP*BbOc_I8G8RuYspnjaRO0 zyiJ!+w>e1THqiRzVAM2Uzn*2r_NYjP67TLVkvhwFN#|GzJNINQ_42SSQ)i}rNr5n{ zWHztaFgQKGq((ne@U9|_K7!q6sbaa1MR$1UZKF15J&v>2P4N3&Xw>r;DH`!ex6;^; zkG`H?>^0?IzK(qVl?(0QRU@`CYxp<;uAzXio4B)fEj9xbNU~&AkP&yyQm+Fwg=ZTO zMGGe2<@+@rl}hw6UUNB_)b_Geq!v;2M!uEy3M*nlqd%8lCF3`$#Kwn8qf@6d0Xv-a zS9(ga@HoZ~KX6C8o66&1Y;CITZ`W#L_D1{!T}dCxA;^GpDCWv?i!{Do$F-rs3Yp@Q zK5Io0rSz@I+NAyCZk5gDJ7P*iVfZew2gvkY!pQdyC<(3K@)sTbzfc6p6Z9i`A8JNd;9`)+ zY}bN=S4C^bpWXu*XtExlXY8it_^;)~Xgj1s-tkOU&8vc+;y>S+=>o!nHAhExG~HBy zPjPW-reFt(e&4rW6-91b7mliq=d&J-L1O>r@MrM-^3Ozd4ebldw@3=_vRj_$K0KN% z;g(}}|LNp<>m^p);38}n z%u`G%I$uj1*XLZ43Q@c0G;@{TrbZ-5G#mQm6C0;O@S+;?-P8@Xrj`0y109OKikKMj zsKtj_mQ`8{UPnSr{ayS%jxCXnc8A5emD8hN;l&YU;Mae+eDi#o-P823VnleiFlreFRyK4aH{(M{`r@}f_(bRP5-X@y-@zr;mPImKg#Cs!r#}*FX0$D oU;c9C{2t=>9Qzewo{qe$_-DqM7yxNb?*Pe%AUPv#PwP40?~_=RNdN!< literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_squeeze.xls b/pandas/tests/io/data/excel/test_squeeze.xls new file mode 100644 index 0000000000000000000000000000000000000000..2524b975bdce6f7e11e9a88e6641efafe2694c6f GIT binary patch literal 26112 zcmeHQ2UHZv)~*@SkOUMFM06yBB*`ww5X_PcYghwI7{Y>}hzhF+iUGqz6cd7J)itc5 zqL>gdyRNVzqKFv4yoNQ)Vw(R}H$C+9^o;P{f8II&f2QVCPuHEQ@7}s~>vrAhZm7Df z*RXki*G9xhK1d0D6{#at8l3~@y{WP;g5O0VT%^98;2cPb^nXYL>O2@oRTVVSvr_HU zNe)s~L#PpP(LUuYHFMS6pj*59Ew26aKtrkwXepaZ(^|4kdTA5Ml8taNlDsLV^WKp z;e@1i91hetj+MwotQgAQtZ@r;Bp(CoLysYcw8r$mS;PCAHGIBNgM$QAZ#nE*=O9PzP+2o@8gCGqb5jJGDT|0q_vWRlOzt5F_aZd7*l2~23kv14`hp+5embi z*zzgKT8iVX3Bq_AuNH!-y(!2YX6+jWTpW-Me4r`4oh_f<$d)hBLV)bhcaqG=9!T;d zM5D132wIuCqh9!$CuT~u7xSeW6RcZXHXwblB!u3J_{2APILTMgu_pfa^}s=VQpNbl zCGoKfC-i@?pclxX@0CH{ErY&Z27QSPx-5BQ;U6GyI!ozi zgA6(;k92)f9%=ek894i8&=1O>pO8UcEQ5Yj2ED!Vln^+YQu-n7M2DiU)YDs|xkeKy zuO{m2iRsbNOOL7{NkgPM=9#v^e^I&`0uQ@xeWhg0Z2gU;ZqP1P6chqQlKX;SzuYi|Sl_BL>6Zv$9Fwy)D(5)JVDqdNgkI?s>cHSoTQlGtl7 z8*q6o?gURvay`uFOM}3|`r9BPfqW2`wtzUhNP;k(peum1T8UJ5q(yfUbH&~b{IQ6x zFs?H6AOCRv<)0$i{kN6?DMYI5AfgNMKs+Qtgfb)%Ju9S1vYauTZUCZ23@4UaAeb&g zl8SsWh+0b*Gcqzn)D8jK2f*9n)#65$tlC%NoUf>Nh(r9omq?R+( z#1x4tqAt0upLOKHl6o9V?KIA17)#$mFfl7 zJMJFcsbRfSG-^Id`z=#w)#|sQAU((wh-|-w7sjn1Qqo}^Dw=4B7%x5hx!7 zHiT%BK!L40Oy|#>J!WkIL9iH>cP+!gR2y_69|ZQlz70|lC?5p&zi1#(VAqL!5c9Tx zz)n0=lWiwBFlmEM75Gb(gL_UZ`TR^b)k*5IF=Vn~ zu{0|=Z0MF&5qRUladtLFOg1c*)=Lf>x}{YFR-C-a&ZaAq4U45Y%3(vdw2HvPcb>4b zF=n!1u{0MsZ0MF&5%~GqL3TFXFdKLImc|D*?rpF%XO~q6YDqTOp+OS+G!rHp7E3di z!-j5Y&Mr4suViP_oyms9(yZmMp<9}>%h}ojb~ZhjY*;MKP7WKor8&Fo-*kbU4WG$| z#nPPQu%TO;v&)LgTkLFlGTE?LnyVZ(bW3w~`B=V;jg1!Eo`!Xo_`2!GxNe$=)MQ>O z5ek`8#tv%M3JMm*1ntQHrCSl)`UnkT2Q_a61^Z!wS};KAHiJ+>=~#A9i&jvu7AB|- z1C(wd2xUh7!VYTL3JSKt1hr>?((M7Ep}WtsgIcwMf+a9Pdow`kqmNM2>H%z^JY)@5 zSL7cawSw12a^yM2A|>Px+l^7;E3gXQ6oDN=ygI_?cpI_BljyJp_qt`0m^0e2$;Vug|M`~YD>!U|pnL#{3&4Uj7q7K4!}9+rjyaFzfc zx>R^8idYoV9VjIg-hWaROGTC_smKx~6Z-ysh^=+`XdGM#CWzbEXDUM*OGX=I5f9o-gXgu9iM4MeN)xwm$W((C z4vZFRA}y&FlDs-2M_7TQ{;87_!V|+&l9JQ-BZa9cD|iDXp}1}$UDO|?!XtBd>=TLc zPXa;GK<6VtoK&y}ym>{9J|65A^HPVt!S>9+q$Ds@FG*ZZSCKLr2ue$0m@3{8r?7w- zh1CJp`w&445{5^`CC2a}R$vIKqzjrI1Oj2(j{`wtKtS>?1YrU&VR{TsC*iOus)1yr zBh2G=fFOeg4!(~GuEzpS`ac=qBEj}@yoSf%E_874flTm@Ea0RclmSjI4Y&ljD;*qs zA;Tdkbz%X>{*W9oHM&~zxEja}Z(|KkoIDLoVVdNSarL1;gF&;C!4~nLw@!Z%?E6!L zhHno1htppd7D(Wi0^<_ng;6c@e<(v3L-0$1#6ZWxphrn~041?CihU8~eef2yDkx41 zd4T*VSU4>uIUL*z!!dKZfnyE^VBFJWuqo_V$cTpUr*N}`>Te<3QE4%FFsV!g)6qpz zRt^G|!C>jhgoD6PE!yY6X=|{m4$mb5L~v4ya0M?y5`(J&^95t1fKYg^k@CEFqndJQ zc$2x+(UK*~rI97drIDpPt|sb(_vk{Vr4S=NgTb^jK!*@IkREfo%f}v4V$3n2RT8p9 zNkWz=Nyrj*nBQfpfbPsCMNmUf5Je#-${Hwt7ppQqqCp841>r z0t1KjC2j2-4B<^cB&A>rVC+3884(az;pFHm6GkPmzku~l`4O_WR0*4y9_kBMM>IM- zIT5=o(!B~K1?6gjrydO)l3`v^UW?7G;)^(;;T+x=ltbGj$%WqO!Dl3ZL=e3l0V1an zXFCdxrd;$Hu0-I6V33G* z;XsZM0T&)R3Ae&Qg(#9|02xkNk^yysR`c?;R?~jLKZFidQq~8v#^j#puRA*Bpgq(L z;5)(hxPYX@6xePG8<{p)ICX+U>ZF9-%P$3=ankMIH0X2IL;E3x#lAd?zvf=Zx$w)Z zlfUPi8(h3my#4!6O@Y-T{de#U671?+nl^adPcG}BR&r)-;r1bW-}v`3e`Y*z!nV5} zA$5C3Et=ES&^vsa&f0g!P8_%PSeQ9@(YoQ=W}r(=9W< zWOk^WHtd$+vqj52?_Uh4eW-M3pUIfMe?M$qYkn{H;4y)F@a`c2ysQH!SJXV;(LCUy zfxp8aN3GsFZ0)sZbLH66Bb$x(J((zUyW+Ov4*!kj(vD~LX)Ox79K$!Ve^lthp+-_9w{ndLsQBKVX@|Ht=&C!MZVWQ@xIjfh~qD5&r&weaQqvaBoV!>Z31T1B^@Z(Qo+1zTp;mxLV;y|Tr- zvbXWJL5G}Hb=1kO3pqZ#$Uo}JMcwKl{p(#L%rmwxpAuHvW9fiq4O8$wQH?6rH?UpdiZWnkQch|N13?~f?U-67mn)4i(XNb~hC&=u*9#;2_| z$bt?ohK%nMrNjy+2_0$2AXpj22$QjeXjKGhoZ;*GHl}Z$kz3~xS!&%sqWDr#=?4^0 z*wydCrs|FLwGZ!SdHi`-|60LN?Xh-+YTAp1l@8g5U(K@HZoK2>pl;{v5*jabe)LzW z;l+dh>U4MT?^B1Rn_bJDzo+Cj|9VNUYdv4u-JW>V{HMN429ADmwaMmQ|7X1{HMpMa z!aM0?$Lqlb><-+`!LxxTHra*VSnTIxLz0a9>zA9oHW@H8Dbi|s*MZY^-Pk`a_UeHB z<7)1w_Wcm-QhH#Wk=1MMl7Z&t{rtw~9+~3nyKU6H;XXl|c#CWHo-D|}{XW<`v9P1B z@6_0`1LaMR%It0jA~;2fOT;Sfre7-m^z_)Ge*QKa{R<=bCp8Ua)NjbPf?mCx=Y`ujzFlelD)DUprJcPOzy2e% z!K}*oz^ZFQgy|ONUDZS9MuyIeas8#IU-};B8-J{tVd!l+^BhmN;_U(LX!>{QLN_P!XLl-o~1)3iV%Bk{McFH4QX?!1} z?6-s8nFX4tcbk*A>}k&8ZVfMNqJ73MIr89krz-DRC!gi1O})d4ccPqh}U z^vU<)ezsTnV!2@UL+wuv1?L@xTmRf}HBEEs!~uW5%9=j!e$B!cSIliA@8+rH{$luI z%EPRe$%p;lxt!%#X`V@6*3s%#jK|#4l;>+*hw6TbHuc_rJordnd6|uIc*HTSgDKlb zTyRaBP#>-T@zDClN%e`U_4~4$7e79~GrEk|fRW;qiW3qk}DBJBlvp+0E`>{#I@1buKBUdgw*ROG}`mjeO)67o? zLeXa<^AlKU1wOd#NQ&}`i8MfejOCk-Q)Av@8@4yqM~wt z$eRJ69U?oS>Eqg+il+p3ki z{&MudvawbVI`!}8*tPp+PC<6MR-;;Qi6FqX@$_l^gR?X1-6{{IJ^XNe_tljz4{H6H z6S;bT=d;xxr(5pz)b#kWb>AuPRTWBij~_CA{O8NXMgJ^G-d1>CHrZntSKWIuhY7(oEhVI_wrRZEf*_tW7t! zN19vPS=n}+_uOxsbKQ&u#XlW-H)fUHh$!b>QAL4!>XecPjR|WyIqi0qlD^-rg2WXi zoGh>P8Glb5<(IYp&X$$WD*uW)R~;AM@bb=)8xgA8gI`#re0}t}i>1MDpAz05cr(B| z%9s(C_#hUy>Uy{;Et`0f2I&+L20FF1{P9y9!in*YZh zV?;Z6`Cle(?o#7(cj2AI4R5m^>iCTctDd0kJji3vr+DbuCf*^Tzhh@&V+|a z!)Cog+%l!ZUpqyY2Vd0R5NNV>=IN-~Or7m@A(k)Z#vcgY81A~l;#1VTC$BsO8Tx@Y zIu?fwdShSg7vFn*?EHpxFRL0fqEqdE8hyz>^?gF4|LF=X`%+!^E2IF)bkKhGz^obKsX#5Hcau8FCg; zdmeGL6xV(tz;DHo>50HveEv#cLp$G$-#5VWbg{i=R2eA*2^*9}-80py2|OhU+tFWcbj}-eo(P!Mg*)j&q^knz8*XrX!@9ij-YStu z>yor4JhGHEaagR0Ny9_Hq~Q^#(kCvWN!tmRv=bYmDu;N@l*9Rkg-~e!$G^YARu8FA$&O%!H_YGora2^5_qXLd z)XVBXofFVDL546|tW@k$20_MCX$<6!kYgcZmy`(^?(d_eknv8^TF7|l9zY}f03mKO zL>L_nU*d8=Y;xSxmNIrxL2;4ENmG-eQ~3Q;BZUdb7igI)uc>TE`;&vu(U2X-lgC}* zU?Wf@g$5KFP-sA*0fhz>8c=9Jp#g;k6dF)yK%oJJ1{4}#*MPkEpL?zHTA_onZr&P* z|LvNKaQuG*GJZ1&?+=7R#@jGB?#FLhMnT5=xCxMP?2ey#!*TyC$oQW7Jjm330Nx)s z0GYlY0NVmYB4c?S0t*v}yPuw7_mTGv*0Yi;WN{*izD-@g$5KFP-sA*0fhz>8c=B9|FH&e9FJpY9CPFN8(tB@ zFE|nR`0@RD9P8r;VsPM$<7^zic{?{AZf*J%1eMQx8PpSRd~P7(&K>SqJ|m z8T^-1@Jj>uk163mya#0beq41Q6yJ;?y65D!ja?%0m_JbXmxBymIo$S?93 zYW?+xe>q&^;PIm9qk(%eypSA0$PrJG+x!jx147In%h1+_eFS7p6*Pi)-3z~ojQft? z*Ni4)!j#rmw`Jj41Ajb+@cd~je?A7olLwLt>IdBLtKyU3#TF1>yCYG4P_u22>=6t1pol(0p&TKRnaH_04F*C zKn%b_)t9libAj2p7;Aevz?==aJ#200GSN`k(gCQ*`~SKAi#<@U-KEybOIXjiO%VHv z&21bgoqwM|gNa)soT0a*!8TL0>a(lz@uAH?X!^=^GVX99Zwt4BkF+{DiaLa)4nk&z zQzqDYpIuCrC{9x9vk5qx*@_c7pq(R>F?g77gwNiO0a=2o63fKpFWAruuPKF3c$T{G zJ7>USC0Y$U3P_#sGl!Qaa6j9iQ4Q)jmr7kwSUBA`bso$p9UT?`P3(oX(wf43gIbSd%C;`@SmMTsZeblIo zqc;E^x2(EQGB#ZB4Ja*gFvf~f$?mT0?B*0lI`)a#30)_#%1dD0ziDsfR_vT9O+JiK zs#+@)25xUM!04mp>vMQxc8({`4B6;t_RLmS?iw(P)sgXT z4tx%2@D}*OIF>o;8c2&|;Kc%LL4~A0k&&kWq?@3}a&_n2j?J_qX4F!uP*8 zX8-o;QE>=l_<}k%73yUcPFCy`urey^=&~mXIS6K$?WigrXE8wz56oHf1*)jeHg^$e z^^-k=`ENyJua^Y4-VQL$Yu6jTv(;Z;k_u^;?d@ZWzAoYvWa#_ojqg&dCQU@c{%zP* zvg=>8b`W8=55;eDwpDaJ)oGDs56zHK_ezg;)Jk|le*OT`(5LOKB{h#glvSf=Zglir~?@=SWVOO{2q)&%ozncL9CYF=aK?VeRrD#XKo&QIHyqr5twe&hYf z`}EZp8yX3o4;kkXpnQuY(?Pbd2y!PaAp4blWNAmPR!^AIj36qXryanGsy)=D=q03zJ`kIbl8>CX+k8RlQ`U!$&ZcfeAFm-qorGmn-Qa8mgSj~Ke4qHg#gR`80!K=Uq@xkcXjh{sR9+@M%1(vrYM&~X3j7@L z;!)X_^oQOAW~di%vAbK!?&!f{vh(7JYc9nV9c>A75>vgK4b^&Dv8ua^g7n?_wQsm~ z8b&;((?;F2=TCB>L?%mjy$lf8*cqwJ`KTeu5LkcHozQi0!gRBK*(`m2sCbVkh7T%9 zE@dQvs0$m%24(cq{)z5jr*>|}=@i*@6?Tz=x7D}cb0n5g=T2c-DL!9}xx=0X+2$_E zZxy{YUaVa{6K>s?D`M!a6)dfln~Q#XYtFVvi%Ei|{*P|i95%EYMXucZ&u-!Sms@&p z0yGGN5e9O>Oa^ElbMI8zjrJ-d1_JdoH0sXtbyuiC6KGuCfwj)ADqza3=%wzOv?$yb z|5{yLsn$C1i!S8j))PB7rzKwJ%d^;Lo+Ho6g`m1 z^mmqoe-&wJM!Mq~GFTXYW68g=?oY0K&$%{;W~DY>LX4TZ8L^<6@#PIE+2`CIgkDeg zQ98`WFp`sE(C$x9O}MnrOSWF%fej9-lX!ZW^YuhrZ<3SdNpzado*f@=q2+25sVC#; zi&Qtk>Km!1JhH}=V0ZGh=-=BSLsru<;mh`J93fhPZz7 ze!`NEr~gQZUd1OD-`X`gbxx3hHqMuz8z&gSl>rY5wumNifm zgyTwwf~j6pC-Uw;xYYfzqY?|!o%%?I;r#{Z;%Ni>j+7lIrVNxK32vOEh%0u{#5#Bj z3E%8cfK0aSZcgm`B<=n;oOzZ~E8I8M z9J?zkukiBkv5D`PfS`}0!M!+3P?Ja;SYeC5itI#tvq=k`aDuWmGoA4kDtI+EH;LFP zc#R;2XF>bS_}TR?Td$&i_EEWYjE7{C&{WN%#MSx$=PjEWdq-P+^i=QCAuYT5Vdvw! z;4J5GYE?Til`<1g@TZE+5CW~e+iKoW8>p61p6uf@LM{&d!8Z4t7ua{?VBStezFLAk zc^QZm&wSQ;rdF;KO^>EK6gpzrxaH&bwA_g6Sp2&i=@amygO$qp#0(h*rq7Dte4`SyFeZB10rLsa z#zEaPAylTTY9Wc+K>~s7vCWmm?$b39!gpj%10WKy$<-9HN;%2buk#0)aPsWJnYQfO z_BQ7Z9!=@STeRt7!>U18FHGh9(~7;JORf@ATZ(xSEs6^WTrhtzXki)OnsVJXtby zU69Oppi#)uIqTztk5oxqIt3`BX_YrllU9#pCe-&QLuCNytbEk&Nc#k#Ea#eW)atoXNxm;F;_7 zqZQm&k{1c$w43*!WBWurqZ*)Wzox;q6tZWa5E77ENKv0=nje<*Ru|FeyMM~=wSW5l zeXAua!6OmRy)O&1hdR#~J%Ft9R{S8@-Ocf+dX-wk01O_Nz-iT)~t^ z@8R^?K8iO^z3I8#{;u)i?g((2)I%fW@V*LDt!i|i#FJ)WnJDWU6k)rHdyFd7*w_5+ zo~%SiT$@>Ew~#Q1tX`NT?i2TET8+nyE^bgzA=&Tt0Y^~Ja4)@V4}RvC>{UY4dZx&K zpw7(#n~>-TeOogw;wU|yl-(z|o9$92-$+Z-PpZUC#y?d_yJ@Gz*&flb{;{M(>D& z&C)(dL_?Fd^!4N<7`?TwBsfwR&<&Wih^&CZ;Sxz=sHa@$XDlHPKjWQnCwFjbvU$x6 zdg3}8$!}~0x&_S-g|7EAahOfVW5#5XuRDGY+;n;)y$5bXVa09reL}=KVJRF?=dzY4 zpiFpX!605%uT~x3*uYL@Vpcy<>xE;oLY1mQ3Qb&PB1J)sr zO~05&+}f4=h$H3|+t7#O8G#o*p+?$WWYfJR!xf%l{Gg|Rhy zGo{~xDzMg)o&XM2)4bL;fc0=iDFpl4gx)>IKr~%ijjQ_%DJ^dsD6N>yk_u>);ZE2~ za^SU(#oMKax(BbJ!%KwYH<)zle5^u;_Z%KhunmzHbu6kRtG~{@$|*KF_Ed%a>j6vr zoyyzF2pz%Pq}(svI@K@Pg_fMs>mBegaD_}P1Mf+qalUFTGgil2qVDe0*tIi*hqlYk zzQVq}Sn0~tB4aAE0D3m^^7+VTc7dfdH^gG+(Fms=aG+PHCA)p@6dJc+&>Uq>C~m0A z$v#Y~+;z+ly!kvPL_v~CIclo%oj9^sa7c|!!g^^Lh|~d-?(5;mFMS!PcH}Io3BB=(Pjzq zBC83Ci z&r!%Y=K6vM!a@vDR^KjD$8b5w;J)(rsWuh5z;(*Mcq3@(nphqZm_C+g#>B?S%0U#g zCZ*3m>K;=5plVO=DW{e>f1!DaO7SSrNkPx}u*r2Y5J)0bm~0nUi4rLG;?ah)j#OaT zR2F=-XW6`c;J)?D=2Z2FDMp^!IX$j3@LGs++vl9&#iikR6A z{7fLFV?1Dp$FCr)aAFNOp%Jr+S1c+YvJj)~x%*bncuT%A@sMP6tQP3 zA3@%zI?H;1b%k1*i9j6liFweMEfL8_Bq}3B<;Ds-T;)>Md@rnmXP$5N&EQyga6QoN zlJ?lQnU!wo265F93XwgKK;QD^w4OM&!t@w7FCMO~cFNV;v1HS4@sNZ~#Usk$qk(hQ zzmslD>J*#W-b;wpkeMP_ud^5-(A;p4|Hc1Eiiy`NChk7Jmq)zQ02&=8>G$Rv;jRL7n%0 z{6f2-I4zZLlTmD}FxWHKbY{tYL{QL=#_Mcv&;i*g{O*BWGnYC}qz7b>g9bVN+7$k^ z8$z~tCQhD;cIGgT?|Y(wxO)Ipbh-poz8Jp{9LbnB#2hW10RLLZLk>a&`dkr2{AoC{ zRr$knOy<;8;K(IGWVcO-lrGHe%`}|s9h`Yg931|V>HiTdke*A4m$Dz{1qE*c4`@M? zR5ha-Ape)93c`D(CMa=k@1*0G_3}9>80MKz_OsSm96!nUPb$0(T~dd?v^#u^5LNhM z%GS2eNk)N2rA$^8>@nigaLmNdM65J#<|lm(&|duTLakWShCUY9Ey8e*{z0Z2J@XhP zWgs{Kc>RvbV|u$mosL2$(v8eXK|+Pz-BbCNR64kZQDq*4kA++Gc)E{&ER}#(I5fe( z@_3?Z3M%8HbT&Ck!lwiG9o64bQj=?Uu`aD!_>%6-oL78Tf8OMMb1grO)cak?WZ?3% zDt_(^9cnJoQWC9@4T4v;+~vj?8SmI-E#tuco{K7;J zckp!g3}$ne7@sqx(4Jd-UZyVGJ!GbOrz_*`FpJdtH!ed6petfr?JWWH6N^oTxR(*` z8PN0E1*)$vxJz=u=P*7gZt1Y(0=ZH7Jz|n|1f$iEOg2HTUimwd&Fr0E|4nD zv@UE5y+f&(UpA8*J-wP#vlzTrdp8J7qW2DdFGnXP1E?o};Nj6U(kj6dr!Wk3B(j-N z{Ge%MX*F-4%2(wByjl@!&qhl+rrmv%1D$`q&7}{dY2QZEok_eGJ&Qs9>E;#c*nq>% z<3TtV!~w*tou1Op#PBt5wMqK`0kq|}(ixzBaFLs}5yRy{cCg;?)YtLNxLjTK$E|a; zMjl$@vAhsX!HPM*9c_mXc)WS44+-RA=bGig^>NTQd(PZUVaYP*>5p;I4A2oy$#c^+ zOO?meB(gKXR(X$*3B1!*BCDWG(pHQO{+=U)Aov8*XBJFI4|Q`@W2GS@-;j z#D)Fug7-r`zifP2E&ORLfb(zT-?YQaD3`^xpD4<>e?0S%A>&7TCH%Ltb>ou3HBe~x1RtI z(*MOBKU=xWfR}rrpMcV2mjN$#MwhXE?RS160RV+atjpccW$RyS%-^kDDF0^tqvBjP jznmR@0vuiYUtZ)d(xD@x>)QzdvSmg?mN}%j1OWUG0vzP8 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_squeeze.xlsm b/pandas/tests/io/data/excel/test_squeeze.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..fb19b5a40efb73d5511ca0631dd0c3922cdeeebe GIT binary patch literal 9122 zcmeHNg;!Mj+8)ZGgrTInyK86=q>=7!8FD~cQV@`kZt3n+VWhiDNqK3!~f@NG%3c%7GgF?8& zUWTlxD|AynT?|?puXac0xL=;%qNS?4)s zMo`TdjY^(^vkY@vmA+_XlVA#LOLS^+C=bf3a74y*x#>_6lJo(U*s=Klbl3DIeHt84Ty@ zml`(x)VuBkS0n??dRE35xMk>ddKjuvJ*KME>Y(wq<(O3yT3;QJpGHaHfnRxp28WZ z48Mv2cUDt3YY3E+ZE-uWl1NqzV{)?HVIFPiGr<8mPwbqM3 zh0oZH(fM?wOKpr~AT5#*q$dHe7q^3pOQO-c{nY1cd=*i+_#(7To|R!K*RCEw76!Lu zIoFESZk*>cXEWz%PZYhGp2K2U%HNgbDh;gA%1s?Tt_6*8=#rq}6_SNL5=#p*=vC2Q zwz#Z9n32}}UKv)?ESR&KFqRQ8n_RT>P~w%a^4??`K|j>OcD~B5A3}Y0MWU@~Cv0D1 zk?jVi@iw)Bw4XfAXv4enV^_`W*P!9Uz2q5t(np_j7GTi8cQlmc-7SdLRo;I%5FABW zvJB5#e|Hka(jxO~_!D`81OSi#P!YT!od5C@PiHp=3ukACpE>Q{K7#;HY;ctS?5#C^ zVLh;dfmpKlWrfGr4I7^O49uTbKuI^afQz~ zI`-oW((xl=jDZM@gq#@mx?uZTlSlf9DBjt*MhUB3eMtI`#a+pC>57%!BWvFB4qNav zJ9Ng);EZ7OX^Ddic?~(~q4g}7vHrW)wXA$266bC@_q!PX#2i=0Fn&@vYuJDQ05+Tw zaOV7(GX>g?&I??iAL91+)W;Xo3aHGFLnBl2s6Fs@Kb>1zlgY5*iqvNzmR+xSVxbj& z=ks6>zT9*hTN1mN!{m4_yD!RB_#PkacuaJIOUin%2P`GnX5)k{ya0L=Z-kzlBQ~{d zad|dm#yVreyxEHY$i+(}*6by6NUsgv=GEsxNKVp1;jYngs8H&WFXQc1ezV?wLSqEt z7)PMiIp~NhCn$!n*lTl{V^KwJc1vD^*9)B{(61@p1(&F3f6#hFFUjUTI`Z*>tg6)d zm4Uo$vT!2oq-9jKLooO)HrP3PKwKwo1}!7wf>F`u8a}D5nnc5M z3mNUrcWc6W{+!3xsMxW)hD=Xv-9!j8-hd{j!SbNfa4k92oXSXh*q3Q7uG$BOFGpkC zx)Q0vR0_DL3W~Dc5~xIr4baI?zqe^)H36c$kvu!3<6}^QQ87SSk{D4}>~fHty|bYj zasH8|Vpb$6=dI*GB#7NGy=nyYO{;Oqmd$i&*nHaoiG* zAlZ_7vK$CqvrL zwqF@s*{~jz4wp-}7iyET1&O<94V&kEEp;|mHI^ic9*TJ4vW7Dqv}l*Ofsgfy{Mq*S zaS#~`N@XJAyjuGt8F~}1?gA!|D_F=on4)iKdG6aOl3?Z)ui)uTwg|@Delt z{C;l~LizoS@&FPy13ZYJsDcrfs+Eedo`IXj^y_^;8<7i;;F@lWeav3CCOrs%3AI#4 zTsx3q;!9o_NW0uT5xj+a%iklg9|AvZGQ31VegFVG{Ffg=-Mt~!(4UdFKx@${lMCcu zJsnVUtBPqg&%sKyYZHRcT**S8biIpG@6bbNm67N;6L3IuE}d9b8BZw6`+6+%y>QEJ zp`_ZdUX^xCeiVqdh{YJ%IC3-_&ZB2$MXt?HU{FhTkk+HybK{xj;oGtA`C1qmH2BqP z%czB}TStVU6gJMTd00FlrvWUR$6X|mbNI%oAhpJoiKp6vq9 zh|$bU!}00qUiWoJRZI-(Ywf5CGfCw5P7XaOY%8Cq!^cs4YKvuR1d$r{%#AdW6ovGK84IqldMBh(U zr)S!xlGrmutu_;oOYJzITsD-sP|sB6h@&=pC_PfSrWZlIt63xfywvHL zpB0i!pg*#s!^+~vLffwp=+45>4SPH5Ji@#8QK~GV(7o9s#{AeHCx(X34fLc{{peD& z0q16V4&@9kYMT52W|@`AgOP}L7fBCc;gjDp8K+pnyNBcYLq zNhSI+BC8at5>UTniQ05e9x-qKRkekOAYt*L{Mq3?GgwPKmx)W>=posCtG>Vdte`a+ zTjvZ-s&O5ie8LP^-e$J^?1OTbQ0ou=`31qfy~9p?Vzy6{=yMRL{CV8k-F)(TgxeO- zQI8d|q4@KZ7sU(ZJT4~AE~eCx9U8r1&V(H0JTI}gl}_wZc^6PEL$(;S~Klfw{0>Iedp-StkG46#tP3-0iF#tvUbL zbN{6Ap1zVZsUYYh;dTJkdA431eV+ggFqw_sajm;1313*FWtzis-%U!R$%8fCG6+$2 zDlY{QcJ{R4VH^Nf(AMi1DS&cJ2&U65FOi2!k%@kORzcKDv@>{RiLhz8XJh zPK&>m>hG?yQ6WPQi9X*Yq3)~eLSE>rAZQ2UNNgv_5Kx5`1`}51*u3cIyq-a6)d6wy zhc`*j^0oz6QN22UG?Gq3kr?&KH+qIOgFXPs?AfE)@6Sb+d{_!a?XXz z*@qQ@dz)+rs+IZ3NCzFy!VJ!!;eH0FHZV&`# zSp%IVAGL{w92n~xJH7KlQY0EVw;5l^g_(4~LNSF~R$o>F{0ep2i8PLTlAO9XG-$0jZ%&b9{uTHz^ad z`-$~TY~5VjJ0fjY5u$^ePy1hnT^&6WWo9yw3oUO{rOCLPiAV8c6y)$H4=yRF zDZcM(?#_8cg#{~#Yi*0mIx52$cBX>xmk#d9_;aVs21)dD- z_=A5`v?d`l{?f@&>r3Hk8o?_Q#t!@aq%<1^f}5bvA)p?+3|30Ns21Y9gAQz=jBoqC zi9-A0Fwp11TG~lI$uIaeixUrTgUDP(HDAXm?_wzF0kt(m zH=6i8y~+W*IZ~w0;W#FW4G$ZzRy1fRZ^YB3wms|tT9tSn%e;yUmcQ@P>o>J*dVRQ? zMxCv86d6IuWb2M{TIiK6X#yEOShYzq7L7f$H(sS+=VZw6DRD;H^SbeP8<$T#$yKe( z79OlKM)mK@W}@win80O8EFsvo+9BbE{JHf~Cg`BPnc*iuWkN^{i7an=6$>zj&z@VxiC9 ztQUr5(D4jfOPmARsx(7Q;4mr@7E;+Y0o4=5L`l;_rnA$d7#501s;~3Qu)+xi-q_>~ zDy$pHH%3@n_}iHfR9U2@4MpEZjf7z}3FZeroW_u^Z9t?xZdO2e?ry9UCWBj6R?n(7 zT>BCMKPfI2AE!mZJ@5g9?m5>xN=VsRL`f(TM#A{@1J&7v#u3)$rbix?K1J3p=fXDL z!BNjj)odAvZC4pq6p4qkl0Llxe(<&y9LzE$$U1UXOnOFiKc+RYh>%IeCcA({O}3Bz z+Wlh_B8OIIF8U_gLtn&EnDX~PyC6IRuZO+iv^FQEasGEJUo$t&CrKViHOg4hF+mtv za6%z6`aBb!==DCeXL`vMW>z$1G*!cub4Yb+dZx4O*2W=7iIOE5&omMdLPRJ6_Wj>T zh3H){-B0)SFr0XMdcxi>Vhv2btxdAiihN=WHDmbI+xjU zvaM6OFt*`>6?#fJcVB{UL zd4kBVr|i$AHb2A_654Bx+3(dwxTQedjy67i_3Pze+nf79@`OST6L#3_~m`pWh7 z)wuTEqaGl3`$UGzu7t(;x5VMq(G97O?8@#Bz4$md(i%vsqQPQaa}3tum^jf`j3<4y zQu&v2ngRlW0<^fu1`6*sNJ)&O$x~5Em)MoodZr?j(Cc{;_u}WsO0J07FKVT_($ zJa}yzKk4Er)$VyHKYy^Q|CKk*a}-=Yu$SYNm(O9w{c4k-cGlMJP|iONf27F7#383eE|Ba6$~89f z1N(p`6?NJOE=m@K#iJEZ`JSrPg1{D9Te+>#;X>nOaR=F^FRcwW z3XJ-yXhi0u_8;i)j_!_jY3apXrd+Havh7fa!YY^BSUo>g`eTDoabKHGiihT!4M$P% zU8Iq>n-w*xo&@U7a!Uw(QMs096&N`|nKB{)-I!qg zkeph>&HU;FBd!XaL85t9YBk5mL~nqvmQ@=wi@xPl~)m??OGZMAyr(9-=} z;KdKhOYct3(L>AA1LbwM-YqU4o#{q7&-7@oac;~Y3kI^jS3f3XE>XYOWgrRnZw?_~Q&Pw+TK z3cd3uQ-VEF>zhjn9@8lh1~ZN$bw6(iveDqxoLD5?>D=^v3tVY95Go_I!HQsml#}3V zq+Cm4W6TbDLjgT&r5Bbo4Tn-;pDK03^tJ3G*fD9Q)E)L8yv{D?qfXk)c01MJlHgM# z_8&va>sA-^fXz+-5oZe1x;wF^l~;w=?N=XUu6}tfXA5imZ(RBN`!~Y-es5hxb z3kB{OivK_Vd486NiHSxr?X?l83gpByh}XxeAA~W zWd3&IFG^y1_df%^py4)@?|iw_D=J+_V)}~98{8|HRU7GTv>@)0kMgNkMzKgXd8sfG zO@&d~zS=ys3Xe=6?0~>dLW)iPD|dk4tBDc5p6U)}8_%1gFj9UR3)X@|P8`LEwM}Ss zw6$S`$J=B2bzTvU_x$_%uZTcuo6-8d( zK5}udhOCWT5`gbfHS(OL6vhJ_hB>^%oB2mTJCcalilDGqB<1C|Dck!$dYL+UiFSnh z9elAa@qN#i;@P2xb{muv*9hTAw)rn(z8Bt`$CyfS50oRvp)_Gr*sx;dEkN_1%h2&+idslpcQGCF;Q95Q7XvQFpR3 z-#fUmBZ>7-)kS(y9|*QVdo)YDz9pNQ ze`1<*1=o=N-dH>{fx*<^q%wxn2M4ZAS~*)jb#r!sa#}jOS^wk=T*m*eBngiae(}Z* zAzb)j2f!Qbc;Dzx3yo6jH#vlsFFVM{TQCM{A5JkT2z+ZZo&KaR@I0;6Ia^ei&V+KP z^9r$0mL3n^OWaMeE)wIt+PFiLYfAq_n4)TwqI9sqxLuZsX-@@1g~nk;8A&2e4!Kf% zdJR|#X!$I-)&|H_dJd6U;HuVAh&C=0t75*%@onc_G5vyzxjFsxGs2P&T^?S1J&t}r zSsu_OEH@ChU_$5+@pg(oNAYeeNIZ>C$$Vkq<4%EyHB=zf@}{Im50c*N&}q_j4ZL)` zIh~Z4A*jnBKuYY*#3>3)S1gQ;+v3iKxOcIZ2pEcN83EcHrm~pOx0*J&e`M+FREq`W4<|x7rV3b^~gGVnj;v0kG+e>T>8y&y&enq z0};CG`+nI69let;{i*joO*zFQFb6^>FNtzMjFPUED{Yz?_=fTu0A44Y8W6vFOFXXQ z!{`o_p`av|qFzw3J*TC%Foxg+1qmu$iuVipRe6vi6X!x{E|EwT*vDatXCWJ#@_BJ` zP~-LIre%E@KPCSyNF)*K;QnimAev9}DfuYX8=s3(p%~|JWLfT28=~5V4x$xTsK&&x zIU7)^rQn^uW^RsL9r8+W4ZddBJ~p-sbDimfTp;UX+j!m}qMGLD0np7O<{e6IF^h+i zC3K{{YoJls$(;Z6P)B>m`&wtN;2M{hchC)lHeJ$$ZT__&^9^#>ZO>c#dRYlRr&be>eQ=yy?$|lW+;?|DQhnj`REU;TIAn+TUgozZ?H9 zx&AWN!u-SdmjL^_>F;9KFH-{e5Cp#d-=(kL0ex~vL4)-$zX$fg1bY|KyZRfa3?r}y9Kx49tgpLdxC`E5FA3V;7)LN-sIeO zU(Vs2_Y3abYfbMpYo@;5-Tl?Bs;~Nm90DRP02zP^001ZfV95NODI5R*K>`490I2XU zCG72-&Fq{F)I1!_pn5Fswl);+5aH=^0PwK?|9AWc-+|7=VY_xV%#8RpLc9 z1)SF%cO<;;t5AB0B&f$|BCvuQ-Vz(87hngA&g3f-Utjw=ngDx#cAGPXSvE6Dwr+-F z>aNu}9;-Afy*(CuLrGtlbc!uo1&6ybdOG3Ih~`gdA;=`DfV-F9`5{EgXbFLDKJ9Xf zZh4X~MNA!o8-;)K)Xr>;G$1ci2aBCZxLux^*-Xj$u9-I6P}vkX`{jEHE(4-9gQPrz zrsBO*pBpxkMFFH#;R|s$sCqni?sC>J<@{@YxReBs@*91%4v3TOT48J?aG#*d=nb~4 zNJbiZZACJcX3iIom{loUQI(`lq!eMphVbIsK;?pbKcl!aZ<&i{F>NA_pasFWEe~J% z_<=V;)V55E`F?)jr|Y^Us8`WcZkt_jzOTra7q11wg(=*eg2ziL?I(ByflP>RFui%L zC-d)JSLq#Gr*pG7iJU_l3rV?hbU|Tw?366!Si02t$aUg$;wAVR7p3P9j@eU~~n$ zBhL{403rY?oVyL{zx>3_-pSh7-ro9G4Ewjwz`+6;4COz2G{+CV>SV(V-hcf$Xtc#K z{_e$B0WbR3E%mb~Y*pY;1sjuM5s&Ni#66zM_bDTJG+f8*1LoKKC(ESNF7xzs)g*|G zGZ{m5==$eMn&%W@b)1do*mzOWm1QifJi}Agib|$UqEy9vDu!ttL5XTFNw~~}bL3LG zd&HnbCX7Mlz(z^!^~g1tB5RRklQ*@r!QvTrM>L}mtJ)2XJ6uxLqd$caEyeU z80P9gs~dfMZFnS)>|EW1rH&p1?PtP{B$_mZa-R@YZ#V~xIU20n<0c*rV)Up93*>XY zWTl1HFkr^|Y+qF|at(@{IceN)WBe0yhJyVRiD9f^LID7PFiOCf^JmVySGTpFVFTR> zTisJ0oqv--rGFM2(O@Y(r{-dPUBwYYi65Rdt(g1aYQYUVV2ndSFOtId(+{ZI@Kc_k zNLy__pu81}WZ(@akYD~qnq!sh$Y8~{UCQUgR2(YOZQzq$u11LOEyt8}oxb!ZSAggy z;##*Fdw2SJDb1CY4xX(i#`RA6jK0sZWZ;j=;4E^X7_|1#!RdAAuGAoy|(vtW{i7ls1LmF+o zfp38{-0D4*KH!2?H5ulVbxgxUmZgLlAK>*}$x%b$X)BPvGvcYkY^#I@!*0F z+;NLi9-K)>TRg#zYav&t+5}KV#~5Q3_gRK^H&#E`8C&j?Q3a2hUbMBo|HL^}MeM;r z&S%E8Ot!Btz%|m@dO(}l1w@R$JCL(Li=a%l;xN+yq@w1>l_UtOQfv5K< z==>RwcRq@%@LP70Iq9s$%pMs}N>g-lr>19YMPdEj_iF>zf(9&Qzv!R<_|5rjQ1g8r z@!db}z}|4&)XA_E1rZql!1|XTL7hEp%%Hz^-uG&=cA0D-pNemOl{bo*rqe8p6x-%O zxb$TVv`JUnNHx}71g05@wiAB)kTdbb4`uNLLYz^xL{ip2S!KkQ*)nltKDAS~t7zOKP!)UZ%;bz!=f~K8T(lE0G@>b4+tsj= z;Ds+b8;#N^id}y!ECrEg4V2a=v~^iw0VHk~Ea7?eansW0P7tCQ8ieA~(nR%iMwX6# z)mGb5req!8J8iCSUEBbC~= zPd@)r;#@gXo+Xab@QrP4u@V08DPMYod}TMBaz}%ppTJzZTV7UBGM@I}mIfn(Hv@IA zjK4F(l`xyF^4OIFpZuxdPc_rLw1I1R05x%zY$GAJ0ulzeU zI@S)l)WIz(tpV1A9Qk+dPv4f=#dD-NDCu9y2Gc~uR6HZ6mFuC!(IxeA*%|qUt^j|^ zDs!P0q8N94tak&W=-*QaHei_bF6@_i(qBdJAA!Kx(#+P3^^YU_FBut5>u&nVT?BkfR`M9mjd5%`l1#q4$$(x zwc9p=2kD4FfJU<9COagz z>lQgixOjAnN9ID*Wrp&m4z{-$u5xUUZAHv5>NTr@=$+Q>g~*7cAZ(&DegdOOR_6-dy7 zqR+O8D0`|q5NCQy@xBN=64^|Uz@rGs4nb?s**6SufCLt3Br^<4+2_w^AC&sa-%r^bQ1emt;%IjG?&iwmq-We-U-+&)HbwoWLd5&1fB5ZnK%AIr zxW4!0YJ-K6zW3?Y{=(C zA?l8Tkp1iLdy4~(wr+_M6PbwlCfADMBp-DU6w$w9O@t^@<4Low@3D)APDHM(l9%ncMYC(s(jFv8$3KjO)6nuic9U?2~N=>^C@iWgK8i#k>a#A-?GyqAI}sJRb1tLzz!wgd1LgQR168l#aFgO=6Y=2>9oaK>-$5g7TZSjplsp~ceU z1zEy@tfcQ@D2*OwykD~n@Ujl=Ws_b(?uXSzXW=p_m?UQqC`tCvqntn2!?UQh=c2En zVR^w1waD-KTL$3hxMOvPQkx$e#QEGVtY@wnjS(S>)k&Dp(Ahj;cob|Sq0KSshF;@Y zb*hzIYG_LJfvS9!LZWC<#w(RXxFA6?zdfHfVnJHxbobx7yB;5V zxVg1_n#JxLds~%csTSdD#;A4L!1;t1qWWm48m@oKsor?dvUw`A>3CD4Y-V`X0Xz7F zeCoakHSJbEm-n0SLcmPK;&BijnV6cvm^v1fG=a1ixBurJ;o7$M+S7Ou>nF^vVrz|Y z`2<#K!&W=h;Z7+~r^D6H>)zcgOgmGJM9;}ogDheTi|k?_L&F@SE=SaF@w-rfUq&+= zwndE3ekKkq4XuiOW|nuxa_3@UNvkC;j}~~^G4B5{=2~-^C)$c=5F=u% zM%vrM+rw>YT49HA2Qw_DEi$2&viVj>KoBZ8%3w@5IM;9>l8oy-jr5CQL7n2U zzvd*n2;ZW@l{6#I;4#v;E|CBj-3_gyz6`gr+BmN*_|dZHq5ti*KK7mH_%b+i-L8eO zJa__u>Q%An)Dt>d9bBcXs+dXi4d3j6Pkfc*Yk2x{OQj9Ay`Y|MDT86}4m(Kvhw3+c zV!DEkBP?5U??1j()kv}u5ux3==}SY@_d`IU5)I-}22UEiY>IiU1o`@AQ&o({>Cq}a znJ7Kt@JQQ2UN;kZWnTN%WzJbD=Jm4l;8P*3Q#qd8=Q7?9tXl89b{P<{2bynPlM+Rf zu;CW`Iqsx7*EODdfUuF@^aWc7*vsi3;)MmC*lbNZivy+vb#UC}nvlub-_Fqg1bVIFa zD8_q6BSR4QWCWqpwKl+9g;RBOmUydu&Fd}7LhU}^2Lf~Ka3-4)B3zY}D^VcEWWNU# z(6wBAZbDUiAQtkyOfy7V%_^K3lWJVqdJj1&yM&7}X)W97M1@U+ONr2D7~x%~GOtU^ z=^qjCLd>K5xCHBgBs24m~l< zh%YAZ6S2|+KB4V{E5(tT*OqK8-f9&TuOQH^gE<4cd9$h_JalJ-UGk8=cT30?NG8waN1!P@5x1%^N-f7B z;R)HtvlJ0yl3r)`;|&`f(&Bf%@k>O%s~da==L(zU!3#Wr zdsLNo_F^(4e%1pl?!pb+gP<)@c%UpOBo;w_{%y+U-d#6cTQ_8jzt`Fe`vTYNY%ZP| zdSJOqK6(WgieQoVI%YTj-YCXE^dzWfPEd_DOtqes*ny-eu#bivlNX%k{zNu-OI&%n zxS)$Un_qE6y;=u%nu^tqe#vViIz6vT=!y7%Ylo0EgLMod3X-y&snO06csCNwaCNb< z`#d#X!UtPHBOiq+ZRPcQP8L*W=JK$d8xG4u>=CNKi2j^4LD7Cxog0*NPoZL-YO>Gl zvlzQ!o$TnOSI@$;bRf1Ql^-L0rifyFQZ*5LYy1Vw(eNh;S2iS5^NtO2E@2wd-?z@- znP3tn7^(DN^mzo+CQa>4UO3r1Kv_-foy>l51}5YGSCWM765jE8)Z<-#pX1d&%EN6f{CNUPYQ`iAaBwd| z7bE2ai&(|I)$f>^916u1t|O`;5Yrz2eVmyQO?N zT=>j#^ETC$pP( zlrQs~!STI7`x}SkH~i7~T$wEmng*HnYgf@nGzTJ%TcJ?D>Ve*c~QMhLp&r(Q|q zw(halK9qZI2CTy2n0>)x7mys#6H&*qg;v!J+?Ud;08YC{l^|~Arg*R|R&*Oue?SsL zK{u$-iq%A&AA@(4j0lw`#lx3&Ng5TB!6ARgW+>XB8?o7r52a zz|NAZL0TqIiK`m22Lw9MR~sPbqA)(Qi02G|R5nESfv)lCx5(L_8e@qT(GYhpgN9m; zr+mKkw|!~*RAtW=Sm_Y+5xNT3s!5!%$-Nw4v`Xr@>2_n4C&IOa>D0cad938;7;3~M zf4p${bE@`nrdH}ZdlsQ(pQcFOkVUK24avGPg~0d*t};`GqLwy-Jiw7YqtW?=AEiFf<7ULIA1KiEh*g1?l7K;^2&Qb8+!A zAlPI9$bMS5476yf;}l2$pqK^#eyoJ_LwmR&uLO8{ben4@7s)WRY79JZL)RJk=7&Ab z-K4o2NFVEKy^``BaH82tc=5>d+atTDd(=AK-rbVOEXKpis#dYAuFQ>ge|R}0?O*CE ztv40LVmuyyMjT8%ZG%_2mC~|Feli;6TzIB#m!xv$lVa0HQ32B|38PALN8(I&cymTG zI%vspnz34r!+wIhH=#OW@VMO$_#JKPOon!81I;U z95jO?s6|S*bLZECCX_au)tAAJUBD(}C`0;-ZDHC~r;OFiL6-;UMeT{zmAN^q?OG9< za>1GfIniw_EY(_)gkNkju%ql*-P=J zW8^1qqNa#d%Fj5*vAbn#B8W^Y6Hp)B?fINKNAt?DE;pn(QS3A0;>b(LBqIKT^qiBiHsNe?i$W(?!tcT*}?vchs>+PyK;Nk`YAIY)50pcI1&x^Ma#bFZHMb6@7 zy&T8sFshAQS&Red@#zeU(wb^KjBUfi#77{*!AG}+E&##Np#<7H^eO)0ybK`pM=R4z zBleF%&ASU38BDp|N)V)o%G z3o|D5dD1;w{?Ym&;>PSK%XHG0_PnXacMF#l{fY(QlI;|w9q})z5yNHbJ*^_~`uaN9 zbjCCNuhO1RGPIJiz##KtiMV8O+8mVDu0^5iwHI>1ofVPCBahwjVlk%j?qsqW#%UcA zRN3#@UmHO=MRwPUgPLSxSk=R^IZm-}q%JM+;O}9De3aypW3u(P%Q8aAr&vdb4x&t- z(Z??i3`{wOafmmsCcVFu*E|&tj9pO((jSUzc$jX(-VB-S%hN1i2E$7+i7Vp^QFO`s zooL75ofyVr2=1$paLp?UoWTf0+ry~5Lz%|n(5Ejk(wz-Me#Yln6ejo}|3hs=a<=37 z$q4ktS-ME)*xU}d5gHoiYv)I-ycDYMm8-!J7VU9X(*2oRp2&LEfvP01>7sC)Rlpl8 zo8+GgqL=rA?MBCAdwsVD<}J=ZyrDURlPcw+Ec+@GYkiGkzBUGp=9HJ%T7$K#+g}s4 z7VsTOZ-tMl!`^yv45I6)rMv31npMA!Db}F*|f}Ql1g1*lkq9m}V)ydg}}z*9o7rcg`+3IhO4eiXtPvIA>dDn9jz5 zam{-ScU{7&+z|HIyB(E)esynm94Q2$4d5unC+?qq;aQMpF1y3Y)NB0eo}P)q)B3ja z`cH-|VrFCIgnS(EhDii{Mt; z6M9iY=iS~D9gEk@S5ubUcdQ->xRz!LZ=<$g!K;!3xm&@h;(0fxiA_9$%`B}a#z|Qa zXYQwrR|Bc;7KC0iQ|^x$rIF}P5t{8iQRPpSHr4uUF`jphleaIOY19haZA+Xy z88(HZxw>;!G^x;zDb}Ii@@kF%ni0e#~8F+5TN|Z!rV*%e!3`t_c7xA-1=rFou=IIDMBv zwVlMMS=&R~)^_g=6SA-CUDK66X*xLvK*e{PWy(RF;Wto zqhxeo0wDB)f_!rMKtd6pkVt>As=(khrD#$T`G{M`=P}_Lt>{l2HDF%8^vqZtTw7v30k-7oI6*C%jLJk6t;-iPPLfYeqqd(vuacJTnb+o80!700nB$nrOK^k zm$-2fc;b2@9#k;;6wmG;ueP2J4bc?j#cK_dQhFpnoU5br5HGjv%DxTF%+_V@Cg%+e z1)1fa8rLmI>n$3;f0|R{@3#YuJ)V%kAnxyH>YNa#U&K~Myno33%Esg!ol#rfeWxad zwOY*MJ&1OG|9xLja*0~a(%Po=6bw!2wB}p;jk`!iaBT*ONz9`mE>YoJwxUIUKRVrT zyH&}`f5UxlU7BYX5hrGvoNz=TA zr>ofj)lj$c`Uy;WWL2@P^WFE1M>tPm>6>du-43}o%XMKD2d~a?JJJ5QDK03T-JpWw z4R95g!+E$;mYSp~AhO_*NGH^dpM`mrA357^yWHSS1!C=S;ixCrh!EV{@| zyM28Csu{$?9~l)y-Lt6ZaK+G<|Xn=7q;rMh~qvRUn(j2B6~e(-^v zN-_F`^*M^pmZu;)ZEvsqJCuFIHQp|);hgyI=rVeM{>wChCRqrhtc0honh8`=Q%_yW z)4|K#)dl&JII$_vyP&HyYR6HF9RVt+8gZe#&A{*@pmP#JW+lU~`{%r}o~R!QVIO{Ox15#QAkvz$#rDymLr};pdp_iH{2bMU zW_QEnhKrDS7_(w{_@+SPY>FeTHk9cY`QBy$$E{eLaLX>+yzat$?XnUirEtpZeIKb) zICOGyABvDCO$kZ5VFe{*wmh|ic30YDJJ6+iP`7e9v|*G7Vou6_iVqc!{Mr8_&tEElyv z61|^0x&t0R2|&zd&REPwpAI+|etV3-$K-xJA@qJX7xd`Y%-le5(6$v3gy75r8gF zBO7M;bo3kEM7cyWk43NvoIR9bXnQYFjohI}8ZxZY5~(FRt%^5*X&h@<(y`_|OmoVo znR~I!vkobkVF2L$Lcb|rhE>+wSm#THCyVNz`Id^iiT#^|6Z+YJL`Fq>BxpVH!IcLr`XXcKa zI$6JJ<(6jG3FitXR^+|fj@anIdGL&dxts`bg$Jy^SF6<&NC-UsbNvTN;2rSl3(B*C zB7kBI1$AxKqM04osfOZokY+ddjIR}mFr6@%6V>3&DR8y!9-57NIDq_zjFAk z?bU$rd1$vR50>;t4*Q4Fmeyn-8AnFLf62hdMADCBl2nE#gk+OZ@SJG|sRz)fus|5sNS!$UF;dFTmu9w`P$eTXLr0jl% zwAn*j!kZQ3CpAe4_^2DS!1?JpFO+HbFXrBsxz~j|kL+OjTw)-;5S1Q7dm)*J>g1D? zw!Cn#fDEvJkzg64$iG_)yAN_&mu*3ekA+79$Q~3V6jLqiKG1>>kzOM)6F6{=FxSEp z{HL2)DuNHTxB}WVj{|iZY?cpf5%9qlJwDKaM?%bwCC$iKMr$lASz}{7utM{C!8zHK zOj`+BKvaSGx2;=SS5H@0pQH}mhkLM<;T>8D+Q1ACb1@5o_rt`}lo5uF+CmBdlQrAO z+s6Ec`6_6dqxzN!#PPlxeT! zbE!sd8hCO2`i&hLgPb=oa9qQ(_^F(!{H-@FgDbxgKcd)pi*i@0o}b; z9neo})d3y2Rvpl@Yt;c=zE&MD7SyT(1_QGldU5;#qlA@de-JQ^B27h;qdBKYlq=*S z!H{AB5;Q~8NHES=fCNp`G!hIx79c@$HH`!#l?6!9giRyCux0@gG;7mHa4Y14NXnwj zYX>(REkI&kJGf(M0TT1t!7WbL__H`~Xj^ydnr&=Vo8pL^dd1{g75*mTo>A{vRot%IA-ClcpJy=j`YF8hU3k-3Y z!f!H^DNO9}GCH!=t5?@xDkLPN22=VlmMMgBnL?P3DQ?=^3D_{37jQe^7o}Qtz|T*$ z>VV&>YSjThY|##=ZB(FvU~8r&KG2K?H0%JY4#N(tjU9QUZ)4ot1~$6%!U(F=fGXM! z0?h|`*xmau5_O1qB>t8l;Z8SPy#@dhoz!a#r=~e~fetz!3kex-T!`wjq8jG>Kj&jy zrF7Mb!l>i34z14TX_=~N9)Si(ilrvZLw&zk4++=X@mq?{+iR>RSsPQU%d|+?5L8l9 zQp4NzVXU_!jO*qq zfQa;h@23(%zn;=S#)Gp8wgPGDmD^9$*7%If!GPM~?gx^o&n4qmlT2GPWN4^k$XH8L zpI*6aoJ<2Q8ULDO+|7`o!H^+iBTc>kq|!KAkGBj*5qp_2wzWei{aWajOOn`Yy zYYk)qEU>h)sGViEjFV}?C1b?WT+NWtx3sdT+dFp{C*#5;W5m+hn<1lbX=PCtZtXEn zrYV<<5lic2hK#8g zdV6l0aWXBCOjq-kCIT{DEwD5{|Akx6Q!*IP;L^slmRvGMEX~af8GTFh^S`vH&^Vb^ zTrx&1&BF{CeM|H6KXiVXaWbvBWQke(^Yfp7 z;)-!HZMbBNSXzJ?GWwS0=l^=|Tw`SFz^8av8EfawUYvQerP|iyTuDgmWR-EGt~#V( zQCy^LI7s!a2tLNdMH@%zrb7z$!$sPTgH+#U2wAp!sBxt2bV$KkxJWy2km_3qA^9oa z8As}_LkhORMe4;ts&5Z)%IwD@#*wzyAq7j|BK74U)z>~D)r%sGAr-=TNyx*(l2hgj zgA6s#Yp%8;VX!nz(aylunBT#+17=6~jC&KBNa6%%E#NCbcyfkhTt;&CD0$|5;bcRE z{KjerLXSt`(X|Xftvf1b%VZLW6jN`O0Pb5b1Z?2^D89sX6A#3oUlSVC>tkQ5x<-h6vPLLo$T#6c% znJ!Jy_va#(H_@l_sRJ)_84jPr60n+j{>}6FC)H4XIBO4Tl@~Ua=JVeohK_SmZ z3*hWMST$u(punm21&mP{>@T3)**HQ+%hu4u>XV+JBQiKSD-%N&4X+`FjPe~IQV#|S zS>RV})G`)UaS~@FJV!W$%+cE^%siIEsvC zEXa6^05TsAf#U`s(1P>rSA{mD^$usKoh5IdS(B&2sf28}N01^(!EfaA$h@&1!O8#C zVE6fC-@bkDlTRe;-;=9eenkD^D-v^KFpB)bKx+GT@_# z6hU2GR7N1i5C}BHKUGcq%gO)_d}1krl1fNwY7tqqXc2)1#J>mt5JUZ;6fp&0{?Hcy z!=374`UDq_60c90nG-{A#LO1_8V^xt#deFblL z@ZWx(5DFnxZTX}E+EwVa!yUJ`FgfAL1}xxx{W7z|B14{xj}Vx`fMTLYLPDTm*1_+2Di zRA-SMQvf1y{jCJfzz%}jkY#tr#J+{67Qk;q_#GB5&&-BRx}-r9$4C_;yeEv#`0?xG zpB?mZ>Qg=KeZi^i7qWJFtiDkG-P{qaXFs>HnV2#Af@_=Tu&326yL%Tl;7`jGoGDXP zm#rP%`be*bzV3I{o!PMGl>M)-j|>?6=gAZO$8Y{*l?dIcbr=Jv)8QdlAgW|WIZO{-8JbzeJ1^b4BXfOnEgt6+yC@0qrW+PxFkjjhC*sId~&HYY0{p!)8F?v;oE$5-|apN8`PV5F?Nr5Q&`IBU!BfG z_PHD|(k*Yz*JG2;w_fsX)2gmfzrScKJ|Fn$rnzf>Rc}4$Q}%7_{=ChefAkS#1{Ow* zx;1j;+Rirzs^+bgu0PxARO!xF74KjuH3ynLaZ%#}7~m`@cv>JkO*&fIS??YV$)rr0 zh2~MGEZY8{$SL%C?#i5b7Xu}`J^G9+IlgK4OA=n(tk=;MXO>?+|HsXO!18O(=a%(% z9O_wY>o`k#!h7b9$CEtQG+%qEZ;Qj88P|_Cy8Y(_mtVL2tKqeN#}or{UC+&%wz2f8 zsG_vfxi*hHuO|NN_F2!_9}m9!dv%8!eeQR1x94{=&b&{(nd$YxJ1XIbLStjh`y$&9 zhQ`X9_i^6q`lMxqQZ8vfzS+m))?e5%EbaG*EyK>dWCS-+0FZw6*CuWf%yeth`b^4|I)A;~y|^pitx?SMB~PXX6m_z$!~F0uBV!BU3e~1n%nM3fL+`aNt{v^ z@Lii;xf}g1{I)R1rH6auy~*>w{5C$ig+qJQCKFN1k zWSikn z-^A=5==HoQsT=?Lu<(EFK#hW%XL;Mn`(^H(>W?bM}7(wV(oWs2|5eRi&SkyrdW z-o@pmL2o0Zs_m+KRgTAQ+*kaakiPqBV$NfmlqXR6YvGa4PaJL@cPK5(XGR6@;duXt z)+vWBI(D99Rn>f9o#60ZU7atkUgPqlx7Tl_E+JLlIS|K`A*TiFMny(+o9dZ)bArgD zqG46^l>YJuaUOT3KOf$A`jj8mH{G**z$Uz?*07L-~J(qi=Kq!RiBOx`Qn~!nDq(I&!4#hU-tQ?1_$5u z^LrWV?Jm6%d}PKiKVR-PHtcfIs#9H_t;*gh|M-xvV(#_c*M0lV413)&M0Dup)!fLI zi8-S@cHVNG_Se9(2hR5Ix4dp~@1=HESNI;1-h0+G?BSTL%6g^Uo=sGR*B#;JEJ$s2 z^Uc(tfW)H>r%9Io7CdKY(c@u5`UJZw2i9LSw&gm}8Oece_v(CE7+lny|JKX;o%{4J z{&0Nbz3hm$*yHo6-zPdK5+nY4Trhs>&9gHeo_6afxwgP|-ghn!$No|9C~HU9bN@rU z_6`Sg=Qe16MHV<^clLuN0sWocrMB&{Wlzk`1$$K;nkSFkRcBlFnt?|HCXTqA>il~9 zH`hmB&a}Dw)67@1?i^W{suEUr&N$tD!u$vI*Uoy??%wDbUtC^Upq?}%anSK5*A~k* zy6oCkIq!mM`^AYz9Re#|e;X1T-Sp>nCn|>>;0HW7uws$_j2Ll?LuZ6v+>4xU*W${M zPhATHgImWndeOG{z%Qz1Y3(FM>ss|M%(A=F$N#~(jZc269{h6K(DQekE(f{A@1B%4 z$2g;e(Ap8lhOhGPsVroq3DOZW$`1q?(s6DF!bgVRsl(Ph)@uyz?0-wvJqLtj~|z_&v`nRa}(we`)yu^m@k z+V5B58?d<~ZcFczDG!r2p3g2B@s-uLsU^jY(wom~t6tK`>9fFXDFJO)cC_6c@ZHZ_ z=MHUut6`tsotw2<$y+uvx6XCjn9`8&j@J(yaNhPs{^c$wwom-yWyO!b7e3lnr+kiN zQAD@*UCfzY^4a$1Ll$}tOz~TvvMFlgMXRj7Lz1fZjk{W4<=kuivdsCV zyn^oE(huH zNv_A|+->h+3GIy1u2uWw-AH|gX}tL|wF`fVsVo7%ngiG*K< zBt4EOT~h6Q@luKOMs7^W?C8So-wd4Hq~G`>KOHP>IjnD&-Y>JiJaEb*WX6(xOX`jI zLq1^AW5QQi?ReiXb#KhC&fiA0T%|maaz4M_nv1dS52vJWjai-?Fu&cKl&N!8_B-z?-A|pCid|5U6NI|edL1TQ?mxW z%HP!O`?>MA+J65-uVcGU^{pP4yW+_*-lW5Bs*z60bEN5_xTWf?@47YnQr5Y3;}P!% z^io`$X6^Cak+Le+W$U8^zYW{9>(Djr(0P0>_{wf7DTZcxmyr}XuD>AyZlb8`?FI7G zOB=ZzxrvvqV`E6r^%5|ejOmG=i;0^B=`j6n3=@+RadB{sK!?Kxc5U^!F=G;O{jFhj zgIgKaWCV=9A!g3F>#O;f%1#FVQrUrz3sHJuw^TU8a47`;N&)`+-a2qkf#noF*M~d) z#R4IDyY9)E=#g`#n}!vAmHx~@Fv5X{jd(Af8U^hX+4#> zuY_+dJZOOnJRTA7bo^8SA9z)T{Ql)9(AFN#(ZS{n{CCbCUkP{wb^-BX2F%=reAsfEsL+YHHT-FJR$&CsDx~6Z>PtcpQTF=OH{|%@+a`vGJJpJirsoES(V-ND$tj zw!eUV7-$W+MA5fG5f_dZv5-)_^Ft{vK7)(W<}I6BmVmLxQAxF4{2)dhB%y5K~MO;pyIGQvKE`c5{0fP%x7Yr@| zg9{BBapBHU2-zpT6^b?F8gapXB3K1(xlJtz}_KqhQLWXwz26TAgKKwI^u4oZRl-2bB_fF6pTg?@p5O9kx^fB(7u|B%3Y zHR#@+PAs5$?Zw!|xfAD#y_-8NSS%!+JYQ|{fwzaCV64aYaZoU34S|CH{3ZnoMz;(o zIFDsR!Ck^hP;hHyDioXnfl?$)Nd;wf_z|n zrCYP44uB9PqQh>3s+VZEbV$+;9d=2_cSN7QFBcUaer5_W8VgUYm!nEgfM3Gg`HEtRi=y~%1`e&1mHZr) z#D!@IEx2{)$CoSF9_b0d;s`+gT2Tz$WI@mk4p>NBVifQTaJci+6U2oQm%jn7tOP>v zn^-MZzGU7*%}QCBP$6+)TF@T6_)>~Tu7pO(z{MN6(ke%lr%q2`UhrcSF$&;KkA%b} zzEqz?LIszEEXa^VjtW_2NXUT1Te(u8C`Os5Com0xFS$}#DGSn(Q51^{8M5q1_{vIg zVL}BRE3{+k@uj-4BU^OL3?Z*K=?U)>Dqw&Tm-`I&aA%NdYajxIG0i=y(-W``x>6Lw z9W=-M(wYW_Oq0zdNA(`MDT)&+-sPzsa#V7qT!|t}td<2OR49t^IiUiiUJsvQ6mn%^ zH9qC2l$EHD_|o(Q+U8ZB+7ZVkF3eLSate(vO@v|Ps1hn*oPa;E8g-_u#HWM`)GXow z{Elc|D1M|bSH>uERN_Ke5Yl1#!XSX*jll3b_!Sqz9nhsG#Fs{jV-#Udk>WfxiY>lW zT*$H;RGyv?qlgqoiqW*foFaTtQCJrjQg*|fg39Ad6Dnd9$T;3{kcg3G7i)55&qpx| z6hvY*Dvq+NDDL@4;*wC&^HGi}Mv(|Y&r?VEB5vqw$Zm{6$F3|0Nkxhk#VAdDX4#D| zRaQodkr|36p+Xjf5=OQtyJ*1Jv*#mp3d0 zd4BrmvWbd454AqP*n5)3-sMVJ5Smer>LGX^+F*|AX`Wh$?t^B8_KzKum2UtlaY0K* zksxb0yw}jH>qhSl>7PS?z=E#Fup@&CAO=rmPvXVY)?{xScI8~%4 z+|N;A{K-*4V1dX3HF~Eo8qxp+lSECVAXodgiYrFsN&^++O3)1zmspKffpS$8M~YEB z7>>|EG=sH^@Ws%EUP^-(N)}Yg@84ip*9c!q1m%k@v-k7g9Oe|}gmOmTjPQl&hrxp}QAY@UkRhZ|AVh3^Q>5V(M}&TY>cfzTHqVZTdJ6g) z&P?bJ%!oj3SZ7p7r2}K4p&9g+*!$)GFJM|5nhbSvChCj+vTjLWJ4LdLi zF;)czL4Bf?Eaxg6t$|Ox2fI~PhB;A`XlBYvovF%|VNMuFB78A=YfV)cDen0w!WW|~ z^@{jXB+|b@Qj?%^AOcFtQ_Gd`M6Hh%DI%eSfMWrMkqBQ_dlY$xNSqkZD5(3PPeh7? z%4I?NgwQJ(LX1zKpJ{zU4nl%4K~AGH6C{QK0w*@s1<}Xptc13zBPR>Oxg3e%j3se_ zaRCvW_E;yx=~s_nCyu|MbW!$N2Ly(+67Q%<02s%+$GRUs;)(kxh*)eLoD)15BNmPX zBdor`V@#$7FAG9-<6ChdN}P%e^+de}`Nok@0|*9ffkt37*h?0q6`7P0f-H`;FH|Aw ziV91vq*FMe!nsRB$d*Pg%p>p(GZXTY1}lnvzoKN&ki>;rGU=M}P;OylP-pE8Cq)eN zXaqP@q4c$}LP(7u%n5x9Ct4;3!#Ko+y3mL*I7Xo*#*PUmC~9#JKrc9z(O{3W2%^PU zf$0Gi9nLm%&cUe-$A_Xrfhmev(P2+58Ium26d2o3zKk=;u~P$%Ltmqq(&eaLz^qOq zM`p_CpKcrm*MP0<$%K0Goi5>-69Y&Vy$keE^}1R z^VIgJIpm)Dk6tvf!(^$;#-MT<0wR3T*C0eQmWrdMtfN8dkz^em9adkMkz%xC93jr8 zSWuWW(bUEjJDRLu06>YL!6j7CSpgGLIz~(h^mN?;X3oE+}bn0RQ(+AbX zju@?(buNoL;mlQQTf+IvLc{QYChbb*AQbt?0|&Wez( zfy9Ni?1qxaQ(N4Pl>_{hp0yY literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_types.xlsb b/pandas/tests/io/data/excel/test_types.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..e7403aa288263456f3468a721206ad1ab786b586 GIT binary patch literal 8053 zcmeHMgFa2mq)+ z{rgUiZcs-zV@+>osH-8DmxDb`9uklx2LObB|KIC>*aOL$11cRnxJ~qjIEhs(9s>xp zlXWO8_{7CBcvjO({A!f*M~#eEz5?6#RqIP!BA?Ch)$%P3S*zM3q9dNG5`CTv{gvos@p zb6CNFdc_)k=atC;O>!-sy^?tctUI5$cYUtYLT_TzkTH6^X3g8PGO<7HWGPwUA)BUs z`No|>RdyZ<4W{CnHY#+wi8>w1* zaEZ>WinDxEwUx8v?eu(<2vH+HTx~C{56v?Si*3ekc;Za~Q#PFjdBB<9HOJT;owT)<_k-h@k1jkMsBDD#)i()_ekcDjH~Mad(SvF7N6=%o8|2 zZ*C9)s=qMze$Z;yIXoPd;Kam$GuPM!YVXR;_5JxTWB-R~`j@H4B#posi`l&|*K~jF z;*+BsYHs~InwN{XY&gq|j^y>R`3#chXBN!G{0)>>`^O{MO^ZVl#VtZI?3F?8YC&d2 zUAog+2V+f@FC@EUhDTZA*o9m|4E>+H_TNlYr;2JmRfFCkX5Z2{8i`Ol7gOWttQ~l! z-7do#o_inSlN0wJjg)wjYg@_YQB7YBshJEE)uFM?q3QYphl&m2v222OZkC{<%o$Pv zc4}UIIRWE*GIYu!^q8&JnQ?MU#H8d0Vq+T~c{f#Ekp!d)A$maL4Pkc!pxy^;x5YeY zPnGC##0UJa;9gaVa?$6((bUCH;8XuQPd=RnYo%Gj$gg46d0wrd(W39|= zLGI%LirgtLI9tz;R!%qa_d5~j>tXCHH55ob1ok`RpsJ~fAw*QOy^7C4vLJeV`~ID> zQ$COM3H-MnG>N)mtj>cS`N3;J*1tB#xMH)62z3p! zMTq)h#@mhK-Wk0%R=YYj>}A;HZpoUAQZV`4p|c zgy>QKwjfSd5>*`S=RVq_z8*;Q8b9kb`kJFW-kaLK?V zJcAZ%Y5+w^x6&FgFV8U_zY_5%0Vv7~tPAesVF14b4)t2~n)f2DxP_U8@#e_kC8${T z0;1LdQ^nvqKHfp|}Crdw6w5LwL}m}F|7yC zkM#suu zqc}2qc-+HWQF7*x1}2d*rq$^oS%!tNi(SDO_|Woh%3SD#;vMe_$O{)uwe9C&zzk*1 z9WL&agFb_%>B6GKA=!#{=_Rk#--{T7wU!@_M%4BA686Uhkz4@tx#GsrHHdsYfja5u)-|%Et=BDsT_!qG{2ab zR|oacTj$;ZLXIc*{ZB?uMj;K`z%x#42Zh;1Q`ZL#IThPgM^F0`y8;UohPufA(e1fv zOldIq2nF2r^uM|Nd%U??L!oZ2zo)DpAHaVmEc+3dEu5&x%NIHOCT_-=Cp_H|8Z)K| zcH2~vwa+^FQ(~bCM`x!FdozcrUnFv`>q4~_S)$|ZbL{j~I|W2WY6JHNrgubHSbMRj zdo4_u11FrlwnysBN*Evc;cML?LR4hkzHj#m1C-3fw9@$83|rTBuvW#oO6aM0!(PU4 zJTfMvkSr@utPy(>3OPNZ7WK-=?=0aLK&C9}o4?U)uE>4?-lrFxD-HE7G+W;EoVj~9 zfXe6UWWpKcp8^j?jN1|jAL*v~-i!QToSQY&0m}X33I5)T>^>TbCYL2@g7fE^qka6PKD+`YWC=lkBsa>IAH}NG=1k9OQB|OsSV($a=V|ML3<_^M_)Jn952bijR-Fr{9|{gNWpJ!!R9Su} zyb4Y_=qFw$mEFt{EL_(s7-#8;U}#CrA(<3U8|fqTB`6qgtydF&d5|ubi(Hr-|IF52 z-g#hHQS+_MEYyQ#-)3cW+ExXGjEG{K*0eHFRzIyWY>1p9W*v1&qsvf0lAp zt=YJ7WE8CK_a+#}cW?)6hk!f(`(CmVT<9dT!JO+V71y5R&G^XEN{0G)(wut3p37#5K?LcNd(p;hQl?V z$W3c>wFTMaOy?EK#0N-$uVv5(cnnIk9zGMf)XA2Vu!+w@jKQ?&!bpqAqV4J-&`?RK zS^WU2XhHD4_;#9+D$(ry^*BSPYN_c`N8q(Kw2v~+!nFVDaPh;<)e!O0zQp5;p41z; zqNQ#Of}79TPyEjZs`fQ-*twJr&&sHmu1-4Ut8UKb&y7XuJcUKT9|`cyKQ8ucF3}Tp z04XmI(ONS*8KgUtB=NR9imCd0flJ2ppW08W9om^GYGN?FtWGCaMB=_E;P(uk=gk1A z_#XPkh+;WIhM>A$H3~$Z`@Z&f-jm)Myz5ELm=YXb#`scaMSV8D`oS^ngGd>SOq1=B z{TbHoxG8^?2f}qSW|b3VWprspyD=KVM-F}1 z56ji?>02Pj{s)aV$F!QlR&4%~MMX>p;(=#4D_u6pC_pz7ShLPf2kN7G#byl8rpmYL ztD)Z&_&!)b@`X=3X4MPMisOboAJGEkso$yZ32Z87l}c*gBtmyhfQ%N6=%$0c$2L zX1Xs<8F=2kzVX+ob>cQY=e-b#+cD}6r&?`H?NiYfSK-#E;~t$Yo`^e1md*a!=^O92 z5i3h=I zHIAq(vjYh{Ottcplnm(^Qcwo}LX3y538U;6EF5##M4SbWS4%00BstH~FW1hN!+m;i z5OEP`g<^uw{0c9V8+}#{_Xgv)xBa{8=m2`V%1DGS9N8;$;RrypU9m|Xke9jUn+M@UP zGjjSCMc!L2y9C_py0y6k?Q8qBw(W+ehkkKI<8JT3c^p|S(qRzS&|`hh$L`fh7VOUF zLg`t)%x+!Ow5YGDA88?H;vo>DOiiZq0R>IH>DX7C%6OnQ(@jz1;N&>^#u!y?wLyy) z?e1(N+gq;}kh}F8EP0Aelge>laA*eJjkVR9XtI4tai+b9gTXpLDqmX+cfFd{L{?Wh zsQ}buAVr7$2)QgalD6pi9dYHwdDhp>eeU}H(ngGJ%WtnJw9bJbSdO-mrJ6>J=R|e? z>};{2r~t+#pgx)qjebW^!1ncS$~5Yx8;@5(KaR`18pi!&W#bagimJgOeFu*t!lWKR z`TXV+#k&axrD<=bUX#>C+WA@csDjrJ`)D>stqSy@Cx_ z4@v%3v;3@9{E}L~OGn-NnYcxhnzOhVW&v@iB^U&eGmpHGH4jNZXbdJFjX{Wr@#D`( zBTM8kLGBhNC{I)>J#3Pnu0TA+KkN@5qD@d%wRA?IyPWBhX8I1(DC(9=jihmT`BS`= ztS8it4{D=#GLvYa)-h%Ai-fgWh*-@jq-lmDcW5$B7L4?s%*n9sHRcP7o2oL6>S@&s z;sSgo9ya96^-^h)YdJ{QCnDs`<#3W{l25ji7S{>pE2>Rr3-xLuT#4%NiHt$0UWW`b zk~x}5#J{f$$q~e6>H_N7^5>PHj+Gv4+1hH#Qz{iUwA^FZ%ERWIo{CVBr5d8X0#GU~ zrf2wgJK8or*2~muw3o4hO)B-AT3=KKVf5@DX~bjOaZ%_1?PTUXllnwSTHK5F9G1$O zPwwdV-R#Gmid}!TRbSw}*Lku0Yy=XOlKAcP3HIf=9F#xK8=Fi%J#syPXaq?_j{AC+SA(G z!pU6K#mU)~+uX?o`nv}GZ)*-;y^thhNIMT<*9YZGY{l%wM(nh4d-`iMZUr+pC% zqa4$n^agZ4-**TEd6DtWnv2|T3F7z%>zM3f*AvXW!#;x{B+f^diaJ*P22 zu(0pKN4~29RPg)vzW?_=4Bi5px_HYwT0p&i^kL&kCIBFaCItvi2nY+jmmo;Z*4`Ts z&?xz8O0vAWx2+mC0-8obQiyn{$@E|PZBuzP+(>xv;~@e7xPJ|PQ)lOYV;{Z(zh9Y2 zQcm+cn4$ZKXVjRBGSQw zsK{I(PPS)HXPchCe4nUYgi}eYXa;?ESX6j0sA|nXQ|UG6B88IDy=1LPOW#hcTv%7I zo1Vp+s#un%V_cvcI$2tzg_B~&HF~vDZy3E&Bt_!PHkPrgMAJ=Xufg|ToECb8f6U$h z2vMjpC5u~M5UpBdZl=I8Fdz#LKnn>~?98Ia`I0}IrgTGNG#n`#7U|hOi|se@+_mMf z-*23Hc4vl>AdaI+=YxS)S5aN`dDwE@@IdZd1JnH>Zs+k4#M9o4!N;hNUNi0HihExZ zh4nmn@%W9=5?6ml(g$||jibxdQz!UB{KgjqASZk?`R6mlKbGr{_dlE{sw(~p@as{= zABNxGjc`u<6)ircoGw{_g*$0Nn<>Eus7bjKTZ?cw11p4RE`n`w1WpuO5EQg+J@N+bFk- zj-MzW@BHSH+a<_tgkLLypY{Mi7!d&QxBB3=`RyeB69Av|_B3v%^xIg!=I5VC06;Jt c>(9UPd$v|p1pUbKm;g)wB|LhiDZX3$AOEjL!THE~}zFkjMb=6grfqH=ffC0b(000sINkyZlJp=&Y^#TAu2f#t>IE2tAwp__i37d>NwR`ibxN-oXS96n z0q*c6y%TxrlUI!ih3dh`unLMp1#cG`bH?zf(2!~l9a*bQF=ayytSHdAG3g5Z70>Ad zRZKZyOL^9i7YsIta|rGTs>;0!W_z_oIpUbxq+~e6jC;dkqf&1W{dDtPEt$h40@pPm z$n6Ru{SLRj0b&LhkMIdeTob9Ab)H`ioE(k$YRw|lYRJgQ?ck6~fg-eemJ(gYsC>v5 zwNOD3H1};2$0e&bJe`=t1pPbf;(QO69jEKrn zd1zGwXZC*lSi1jQ(&s&7!EkQb!^u?40cRugg$kbm8^-+AJ0?__OcZ*Tq6o&CWX2(V8BOZo3^Wy*3gy-cWWh)-cm z?&)rrXbUckq=#w;FX0AjX_hESSp6-#M=#p|%V&!l3P zO_QN)R-;U=ELP6r1Vdn*8Lr(5xDQb|EDx$}Xdq6bG24D_5?#86`4r?yG;#47{w*J} zCtzZCnuPAQOZ*z{y!F9M&U4hdEAJj@8^2VBu!No8C)9;FY(SF8p9bi>otxUCcCzjw z@?(gmW9j_O@a2iw)?ok4DtM>=sWPlz+ktve;1O5=;3fDDu*&>ZS<2Lu?DM!#{RH=( zpf)lWDc1nB?$$nQ2&`6bsKtMl-dE$_;kL8Z#+XDMT=-+YYa z1rM8NTXv)o7j^`=yW0DINYCbWj69BM91x%J+iL26QNQN9A!@u9%y@iS+EXuS!J|3h z1MIpUbM_&<7HPKf=)a7>_js8Vqe{SPP@fHeR>B1757b4)fzm&hD2>QAD80lLCN7meR3qI7WMi= z?}~g^Y`0(+Q^?fYH7{3HDq4!Vam!;&v20bUYl;=Hh9k5UUqYCz4#QQemt@Da zX(6P7s%W_ zBAXIX+8zE-+`|Pt6%fBH_Xle{^{w6L^4jdooM$(YMXNu4uvXDg9JyBxipkep$Dy{hl0kq-P?U zCPioGfz)bh%a5yY!tv-OgB_lMEOl2stExrn_7mAxR^3Up*J9$DFV<;uyGXU-13q7v zL=1bNt#>nvj8Qk~U=}qk9K56Qso|s#JXjj#XgMuo@ebvwj3`~X)c(4NkYG>Mz)Zp= z>udfDQb-IUKh3-=ib)p^hMWh&F@EQYfz%z{AeNqFlFHS>DGTs@JvN-H+gGo9O&0e^ zq)}k~-sb*Za}rtQyL9zFO1oCF&leNikh<8Hw*pix(ImmF**N8cJmFH2!P`IyVcjzG zvkZ(<7CphZx`p}Hi(KiC-s!BYBIrv`u5M=iSB6)OjSGzUL)W>mUxvG}_D`>4IVvjO ziFbITIC)m(l@g}axL*_fD;0AY4ZP36mRl4|MdaV9=iCExBl>AU2NT>R}b497vFJal1hRQ$~A*-%KCQpmN*@4Z-PL}%w}lBs+nuN z)}<6Au73UGB{wx3$%$ z&*`z3k9n~qSoHlCX>51Z14?a~3b?I;Q{k1YQ6zZw(0PG75_8S9=A9JHZ#+k{np=nw zMJ~|c2N=nq(aw4#s~f(*&!=HN3~V`HXH$>)mS3(^foGbNA&IuHAb?4nA;-L5S~Cm1 z^%fX(xPI{jMQJGuBY~T&9i#N|?ZD2t-rfnVvn2m2NP6=g)+-y5JjZxT+ambQCBzyO zm6B@BQ(qcFJNq)c3~kf=&4zrXb>@23mg2x@&I;t-G6V%fFLNgp?e#9FlLYmbz*se4vB z53-)ka_gsT+c2q9OW(a_niOulvbtn;K9*bYn05Fk&uXuRd0mqn&KnJ9dmh8MhH!N} z#x`1MfP=hd3d-<9du^eqbK)jF*{q2n1sn5&^ zK8)$Nft;BWO`M-VwGrFx->F-NPSb#3m)lP&V>R?UbfHl1N;@+3w=16WXGG7ZkWVG~ zTXRvf`6x&wM5CF)+ub3)d5~H9nmJnTw|vhSKumT zbiY(L*(a=ie1hPW6fC*|<#HS{;H_Sek)pRR3klgFa-y`qcX6x2I&+)|ay0d-MoBeA zPW5I_jY#Q}G)Ebb^Ccd#26u_n|h#kOIzMD4PlS(;=G zd^9JV&-c2lCW+yKwY>}Up^9|-uMQryJL)G3AuQN}5Sp)z9kjmt5HwM2V zDU8F_G)7T#uk|ibOu@af4}C>x(5Om-EUk81DG>PdnJluNi6uYrok4^qp7OPaQSYE^ z{R3V03@0&T7Qgq?>E1w%-&sEjNGA3iY@A>&&Joc*Z)s8|e!Qv7=LCPsfMD=OKy738Rlwlys zk246t%w8suUZ4=#u3ORZ@BpNq4)K1sYt>Qr624|1bSqzn^+GezSrhq()JlnJ=w;(* zHyn4KwUV^q(^H2mSy22%Q$$O|c%1bygLE0-W`{LfV?%i%rpoj?a^$Os&s<%TDjn6L z(FCO}qT$2XQ5HDR>qm(~A|B@+{1)3Mc7a!nUld=lhIzRG zzP|g>jImv6r)ffi)F6@IP`Zq6U{viY!vMk*SYka(meH(ZaPDp?2d8HU0kAYp@o!ZDw8#WG8^ZH^Pt=hHb}LSaa#x1q;x z#e5x$sG)jrA0l*zaE5`7)j9i-z>`jGx+50&1^$NUjkT9W)^eu9lvlgd9C>k-c634ozEHpcMTDe-5w29}_lSR9HiD8;#3c_B0!#k4nL#wYjHEfzi zp~BcrUNrU7ZzN11%Yw^&R>yPS2bNOcU9(4tB0equcAZ zS$4PXhDoonYr@15cy#0ChhUiFQoIv->GkRT6Oq{_@N;eHGy+fsv^Ud)=!nuCQC)Zk zJXpHOEJHy!N?Gzmf$J(24o@XtR|(3<7x25=h+<;fpm=sT(egDl8R=j@LSm9q_%@+HM}(uWkk*xRz`&PM0(IS{ z0OBN?8FGfP{BHjFEA9TYOZ;9K?-p|Dx#)BaZGqjRJiSweHA551s~Jz}*7a%MD5x$b z`cL0jng#;e^x;yIFR{G+g0ezfpq5W{m^pjj2WzZ zAfK0mKUbmhuA+#2G8y&#B;G&*he6bGyjBd{YAZJt{j(p94J=4{o<0HBume>!SUCr+ zzVy8tj{5LpmF~VP?aMWTI3-<_se^k6UBhv4H_OWYIfmoSZTgY-Xi-tJ`rokY*6%mk zbr*abPtb@nf7A_SYuUP4y*I1lr+(2UpErIu%ky5ZT;~MY3sg0M&v)8JhK#Z}W2bn) ziBRX%paW+`Vd|HsRRo(T&0Exf_n7@iOlNGcP2+3Q4!E&usek_0%tAiOSup|ln8*qb z0O0)Yet#Yloh?jFU7Q(z>wfdW8A+?LxlE}2tBg(0ftT;Hsi`HFsio|Q7@C&Hb`aDC zA-5OzkH0%T|Bx%ron}9qErWOASxr?i=Q%iBFQ6@l+Rim_4XHXJjhcNu9OZ?VL$o^8 z^+HPo$U{X@Y8j$eEcEgsmdI=@S}=m3U=s4ZwwMK#DXS}f+>wQN$Ox`>uxXEnIC<-N zIT^0}+Z7(Qg3TtoHP!s0IQ5isZ?<_e6%#hYh4$RKY0``&F_9-D4YrMZb<>*xwGhFh zbm6`d81h+ht${gp3@tSd-{slpcvtvkMh`KTgZDk3U{}{^WLx|&i)mR#Uo1)CB@kp@ zu>0+XpbYX9SU~W6Ai3Om$cY3?mj*<<@Y4#4G{}Edw&{%8ovLg7(G29!nxW zu&>Qe%}0eIld0QDtEQ?uLrcL%!NcEX=5o@w|yWZ0e&;j z;MY;~mC|7o15xm`ZF7AxwK{;j4oU!&^eks`!%DI9E+!>&EH`{j=BTunVX*CKw{^9| zyRt1FY8h!C9yoF1>YRlTQ2-(D)>%m04{w$nWs=qIparvu>33`=6jh;3@?J(j!+;8G zZN@F_muX|G1O=A7ZN@j;zaZ-{Z{8;N&scMOJf4r%rlPg^WNTDqsg;*ie9fzp+2*C1 z_neJerwTYq-mb-h*$|6~tGTF?jE+|bv~rgVqgp}cay>g~vv05HaGbiN ztryKpVwPGpQ8D~sGjDN`=qA|xda?LgFp=SO^jdGvAZS?*yOmq=vc$gSam<08=_OyN z0jG{Ch0$r+lz0n~=g3*^V{B`dw=2X|>uh{2iOzlMsjU)XX@0uf;)l4eS6ptk9!X_N ztR6`Hjb_R<`L{u1NrPe*$^GuWqjQjo~H_qly;Ka&5GcRxFFi9uDRO$s24&!%9 zIa?SynVP7$I9b}6|90xLnNX5}Vn%{Jb?-V+yg&Bh<&2v+=p5`czk*6=vC*)I*NI1e zymo^_xFWoe5qUywd6rf375XkTzjswKflyUQveq4BJfJ2up{7V1+=1R zreuoUG?h0{%NqIv4Y-3T7f|4wo6(xU@(Eet^+P9FK)%J|XHTX<*y)nO*9FTUD|X|? zZw|N6=G12#3_UZEdnPd7#R)0WUA4V9wenarF7Y&@SG_f~Ubh3AFW;C{UGW}2O?SFYn zuyRBLbrgDtaY9ez9$q5f&=;hK#nnfJ9S%`Yj(+oyTw9WOlK@z}T9s)jpZ$i&xbExe zaVsLQT1tBld0F2!T6#82d{6%l7Frz89x2${Y zL4sZO!-dID#nscGGOlbE>NQ) zH@_$-`TeWjb?a%teFWuX<in7(>LUCy-lojFL&`C2JB~+3Tbf3E2i; zclAS>?}U&+Fe(Vg2bsa6tQYO=@?MmJRlG=VFIMpYRu=69OMha5iS}UtR@0GlA$&2^ zsO94DINR)+FOxliSCWRQC*4`+u5}sX=AfeNhtjO)xmpynwY+2yvzbA8d+%-;#AxNT z!fd`!&iV#?^c8*)eLEo%55*2kB6owu3UE7*Au*%S%DQu%Cw3f}ON2rYVIR#yRbRe@^9M>KZNt<%iK)wl<>{sRt>z(Sh ziD9LW!OrUjOo3Z{+#91cqW3E{#Akxx$<7;bh8vj<^-!y9&ZC?H+zR(S-UQ8XPbqkX zyXjYzm-z1MggC2no_eZFSg&U9VC4@4FsF@IOq_xTo1r`U@84ax8*>FG@6BGFyD8}< z-|3sPQwUaQKg0ZCJs}|J!QSHEzh3xb*Z%1L58u2Um*No|(;3%Zuz<=gXe}(>15$1W<$K!bM|06+l$0>D1ImH6ks{{vj7)*}D_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/test_types.xlsx b/pandas/tests/io/data/excel/test_types.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..561bb2c5d6714cac96aea28a2463a4795aea6427 GIT binary patch literal 9010 zcmeHN1y@|j*2NuyLxQ`zLvRaj2@oJOZjFT|5L^-*f~0XzaED-xO9yv%C%8K#@O5V1 zdoz=n?-#sRz1FSO>)w5C)vdE@SDjOuDu_sga42wSaBy%maAt?;HpcL9a4|@5a0GB@ z2!^swj;@xDu7Kwr&X%B8oNpcMsXrkhFlWIbz@GnK`7izgCE$MbPHtTJt7qS3H#n3R zs-(~a4g$OJS+ykEpdY)6%ndVaZ67ki?r>$YiR^?b@rM_@cuxjxsvI3^14FwSwXq@s zdfHwXlL>Ks?cQf-C3zg@V({`Pi;zr;=P>{fWt9q-^t!H7n_q5KTDjzj4H2<~|9bjd zw=Ujt7X+)wpg^Q&zI{bqcL`6puW%f9eUxCr&X&c?_-vg+2`PfsYk}C!I9HQKUx&p^ z8{%@CsL}#ruIHOHF-KV94vuNYSsYqMX>1XqRO5eARTzF=WY84@WNY;{^7d?H{t$7l zdD|dJWCP$Z%#aG82^^b&(*#CT6PNNj4*NhQi9W0LA9qkM-Z88akVBWrEE+PRV=UB3 zX&u;Ca=SC+G(?101AX&*dh3PTG!D$ky&dyLQ8$p}uOA36@pc-{+GKEh+i2*0LJn4X zcEr5)M>Y-L9iAlY`0?JOz`;E{Ai`<>g_bohxarPezNQLe9VU#H02fPp5Etjq>;LHZ zU(CTjb-g4`O{0?=EBNTy_mG~e$;B9485MWwXN@$^ef^ad9@RvBrYBo$Wg^FY{^T8^ zqF=M`_rCeXr%}5-G?#1qB@y_9Vsy3crNN1}uiv1uGP)!vye?UV;<-&;OkO4{Dtj=y zHAk}+*B4}~^e)pWjGxF=;tq4_JwYePB@ZSRPYy8XQhT{%c3lQPDWh{x8eCB)l(qYD zIMr`DA#Vp;GE_u$e=M1(2V`bFTjt$kPjhke;$9|JI)Ccb>RAxd6?aoPa;o>))J#he;UB%71q$(NH}8tsop9$&R2f6Cr} zeq^=L+da7q8|lBh%u4Mx)hk5U9aK2DN3bVgF7sz+DS4sglqQVpE4g!zxRyR+)sD%W zGZ!RJQs_>SYkT|Fr45PAUu51wDExNWX@D~|r?7~>*~XlY^6Vxx*=b-WKIQYe!n>QIW7_G_;0q$4<*?3G0Pw1F_NX=8F}I;H_H+o|-#FjVd?1E8 zr$5+THM1w+YNarDHwj*6kye~}h;`A~v4Qx8RA+)`w9)ImTr;xlR1LESXMg2-^Xxn3 zC0@kyH*_fCpWkd6u88IxcD1uv3c*36Y=S(Jq+aE-eLR~tW_25vnGwSm6+@Q9ctGl0 z8D$8akL9Si2bvG{V?V@H<|`Y^G1LO*GkAU*It zfwxsNP@?uYlEaK%@V?rwCgx~eQkgdGJSN$*&o!8~O%_a5CN<7I#8-uEauH7mmne<~ zCYpqslcZB@dS|DFPutrM5}|nnheUz9d~MtVxphldi>!mKtAw&TWsO>kAn0O1dgcYl z#={4h*EZ}UBv1@GHUhWLTuHWRS~Z0c5VvDJ7L0ULZGq6G zBOUb*rjHQ!QzJ1ZMh&W?eYx^Ag4sv`8m%XPO!P^UpM`-;4W#M+Rdvzigkn0=UdRhg zT|rAc{UP*`Uq*WCW2-ZVI2@mrcD zV-l}kc)a`ZV8g~miY@Mxgv82S+P>&}EKYwj&6p0%XOYbpDJaHCBoWR^r}hNI8Ws+M zyY9x%@luw)4MdJWYyFtSD$QT(i*ruh1J%?hC(o zG&n1E2P4|wnfQsz#Oo9`am&J(i2XYgL9QP5mY|=Lc22yZ(>OPw|8ng0ql9HXCR?4k zIExXt*qAPUTiLZqwX~rWuJ+do7;CMdv>b;#8Wck+gS(k6F}=*Lty046faDZl{L0Ee zcW9vziO~W6_f5gGZ={n6MS&Ql`hiy^U0XZzLa!aJ0v8u}tnjFxr>}?siy3I%_;%Bm z4!eFOOasq7voi&{<&kQ0TBYa(uGBU+4Rj|?IY<@`#fTMn z0lsrA4%@o^#SYxLB6jbR(Y1g$3Zgp%VsTpv(+$D(=Sa|jW8c$dGb)yDyO0~+!EVI)T@pd&Nh^W)lcy^1crjR$3;sWUB%{L5e-m#y-4CaCF83~X}wIfWN(<%9YU zq|NmwVptiWQC`Y`e#QM-@^~!D+=!(Y!yGg*rn6_aQR~sR;$H95rK;|!BIZte{Bp$4 zM?sDh7kH(tm92Ig3CSg77_v@H9Zct?qY1CbbWh!~-iY`YC>HvFXUxN6_Ii;3B6>(o zJ2a}*4)qmAKWGZu*ynVTy{8$b22;xX71=aD1;$t7U0%LTS{c9PV|z>2xn2sq&vFQP zc5Lgj)4-A>LRM0fDDaf`T0UwO{obR}vu?)7B3tN659B0j8e12tOU9K_rkAQ#do1hs z1IT}3%yZNLj~zvznpopsXcKG_g6pJSg%mB=e77iuP}kkU)sg(bgW=7UdL1B?J_-@a zRHNmJ2z2I4E#v&e{^W+T*_rc}9_>yt z=pSXOodmZ<9Y%~U>Yuyv-($fY0nB5WnOmBa$Me-~JfW>{Jo zw*R`X3oh?lyMRVk4Yh7H=jqN(^v0MjWP|+(3Nh| zr`jtUxM4>q-Z$sxUuF1up1GckH79v0CSi8OB>-A9PsP(BXOig06a0X}%>3RO&%lv* zmgtT*nC#zG<}-a=@Em&4x@Qn?!8ad?k1;r zx0FQ8maAP#N*8x+9B8KL<-f%fCqg?DOeY=C*6%m{iLDPO zq8Alw3q)KkKBuQUA4!&dF19qHDdy(MDUyz9!YYr;R?UwXBTNHu?oQw()1lll&^Rf) zZMTBM{vHHlL+ag*Hx&mR^W^$n$W2dMfwK)%ClLA%$>j=_$P4C?Zlvy_D@7^&$H&eY zs*4}bAYqMRBeB3kPURA~t1Z4Py*2IGs0zz(*x~qLdBW{uI<1wmkrc&^vZ0&P;=F6B z6=COxxaIFc#~EZ@Oc=B|t7XkX`#UVD-_M;6E;*EU762#3y)EcWVeR1d4K3X4>bK$4 z+SeA5;}M`}iLltdS~jBOmMjPKm@jdR9zMT$Wt3if5q?y|&|r6>UB8hFuDn zDfHHIh@!G7GP#+G#5J%Y!gB6u?M%&J7@wHvSz-KKBdLySWEZJdodigK#FVdp3o@!l zYZ6bfuUx{`GxXvj%>>>ut_V2A;*yBQW_fi2cBrs+dL8(dm^ln@Ip^ky+@kU;;mx$? zp6MTO)K&x#HmNou##yG3x4GjQTb1^r&|X?6dJ-Rczj9{tb^6k60$371XhC}HZTFb^ zvhH#aR|^>U)HE0k1B+xaFqG)tNG`qD!2S@NI+H5+2$4`hx0&GMM%35guqvkS?tPT* z@E{cAXv32`uxKi?<(6E)7xXJCcA%$C#zLEOTnT8(;|0ig^7`Sf$ZFhJqTF%_n0#i? zb#ZwiZI|5S9ni;kbh@3P(Nwv~M*}|WNB-I<7Dx7+vuoqb!$jTLrNhg za@@^Z7NaQHMCH6~1M#D!(uqCa?dR?<6H%CF$4O09t;n?AkZJ*NWC)F|-y6>+>yNtX zk818FW*aEH-o)T`AR>CZdATCrycr@rCmn1>4&E{+%CAmtiY&tzN>=dv$XA#QuW81` zM;+~|Gu!am!|si3QL}sw2VJD=mniNr?m&`NcCo>PyO&)J$srTG8W1xGnG4@;)FV8g zJS|gw03)N$2vp4Rrp6Afj+KrdctfyF+By9e1wuDrNb*Ui-tDnchU2xnX~JWYst`G_ zsL@CDJ`|qVBrhgz&&mUL9U_C{OjVkpuuB%wjTi$j$+u%h~tqgUl zfK{C`=li0s%M>N_v*aDkR8i5*h@x9Ucscs|Tx_Tx!u%uNx$F+|1f*6F#ctlXubK4O zOgDv(iGV-QiAHF;eM+*(=nx1i8yMAKqaxLqO=-{G<9wP_gm~Gku_#0}*#|>}XgT6N1+cZxn>ZPs*4id(sCC-bk_UDW_lG_AN9`TA{iJJoR+;6VBJ()|7Ed zq+`AvBkKVRnnWyod>Mtd+$6%p@!-p1kGiNl!vTI`+KT)9opv@}ZE=trsc!#|3ZvaO ztdCYqVzrEL#`kXFjZ8=6-E7Odr#TPTH#r7_@FF5q0Rtp8tG8<{Mzh|pkMO9|?`rz8 zUOKqh1zFXIGb1&te;(PN5)CpgH9W%hT&x%+e|p?ZhmA8g>8QCUgi+&CXNYFUVCnn$ zWdUluvRFYK*^jIr)NJM^n=C#SEpS(MjkOQ|7+Bb{S&+en?GhQ$;owMrm*1beM39Z8 zr7MW*x9T?yPD^lz{=|*jy$o23(^sjjZc)1YT}) zz2AE@+Wlyiz%trfBXmS6`x~YYmmFUmd&Z$s5JZVg`yE4mNe6=Q?9duEo$y{-(U^<< zd>yONM8or>FJAmM86+%Www?vgIC9YF7>Hvemr1ZCi(l-ByAWALhjcx&o%Dy+GGm^M zh3y)|4@n-`^&QtD9a?X!J@>LY-fDAq!5J?F$QOl!wbVhTfmIBkmaa!7Z5_7du7iGw zSS}n2ALI?&r29Mo$~}8EY0~zm>O8F^!03p z%YQNcnAUZx62&&O#b=Pkv6IskBAby^KJscV4 zmd{ExSnJqP)R}hM^98B(v0kX=EK3QzjVf3W(u7dNXUiG%T3QSdc0Q?gN+^jE>%t&p z-{I&F1M4W|H0H?iZK0%Hp$KbC0U)+6M9Bf#{Q}RHEr6XLW@E;_k?r^f>Qoc#_;IEm zVO?tNm;^2?#@rW!kab# zu>KFOv~9_MT*N9(6VZ;r?m%H2BmA93ARAK`OA8%W7h6Z`-!^*oqgpD6JXom5?(IjK zw}+l$Lb0QJZM|*Q7l`0Sdp(NPUx<*|{hh#i+=q zS%HinIdwNGmg+ynk-(S9Z}q=eT3ph%(y}ChjDPN_<_o^Vi`ybn^Uno+N^1i1W@2kR z{@zCCpJOwZ=fT~JI#HDWICo*uPQbi=!1)^4T11%b5P4H-^vUZfqfN!=*za+Ycwd7~ z^m_Z}IFG+1#;q^+zDaMN9+`3m$O1a^g0q7iu#ZvmyxV<#MwR{+$Nz4KlqUu2zbR~| ziw6r)S~!_&x;Qz5xXhhgEdP~U`(I=d<{Xi68XBF{q#;L)z9GS0Dm$*Pa}L%r_jAI+ z`1qF4n&v};@TD4ky=)sNQ#_8*TB#mAa_UJ$eIXApNiTU5@GR~V8h?4vL?c3TncfL@ zc$&3AMbVUI7BRU&LL!NX5O|t6!@;4=6P*)-{(xBsdCtp;x$DD`G*9CDV4T(3+VGo- zYB77#2NSWjmnlP)2*XHcSnh2zk#eqb7!tID`K6xzE2ONNS>vaYGXs=!J+wwsZX>{8KAF+)fmp33`?uY$t8oz!I+YUQN{x z#TgIN)i_owpA#2(tV~K z<9OU+g?FfEnzbW(ax92dzw)F3wU$DI6Xr`6 zF7J99kXyTNU!A#|3%^U;nYuW2(=txH0ayz#NR}Bq{3i(tkH7&Fi+?_y@b6Lkcl{4% z6*N`;YT&PV)PDni)`>7`{3)yYEAX$mr9T6kU~T>XGfltR`8DD2hovLf0gPYL55Izc zjgkHV_Q3iL{71y}SLm-1q(7huuy8Ewy}!njel_sx8u$+bL?r)d_}^B+m@Xzomia){sH7{zaAj3u%4h{qM^M^@x J8uibA{|8S^yfgp+ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testdateoverflow.ods b/pandas/tests/io/data/excel/testdateoverflow.ods new file mode 100644 index 0000000000000000000000000000000000000000..bb05267865303511e1e472b4f0ee48ba5a0279c8 GIT binary patch literal 3422 zcmZ{n2{=@38^_0BY$My)MuS4OtVttThO(3Gor#hxV`ngSk{Meu)?`=85<>Q!EJ>D9 zWMtQCi?O`+EqtT*`mVmO_xql6J=ZzUIoJO_*SXJq|L*5D)TN}N2mC$G0m5Arq9nQ@@qW@Qg8qyMBlVTOR*-QQQdk2y7$X8q9;24TnS`w*gXES zzWhk+LvFz*r}FFE?AM8URqO|Q3v2sO^yob!8%gStPianD!G}k_JR@?1PZi0w-wUMc zMnv)KCr&Knm!bEH;O zb4y8CnZu1eTw};~i$v3zh0>?*DReE8L`MI?a=-J_N6)fLI(QUfcm{rHEBizI zb_K}$>|8<(YuLt@BA^ z^K-*Ke3$(fn(??-;Lz^nVO7cqmOgBs83P+1lZKD98f_2MY*kvJiXMsz-KnE0=LrvA zEyp|F^8oVznjD=Tf>$xgt5V3tx@A(JK2`rXwoXpkbk`r@1N96zsFf}t4&{vqx4Kp@ z3D>QNl+D+jY{Q)e&22w0`;P7|4NKRh{lZla?j9q078^K0+n zc4I({y)I{?Ai{mY5PwL&0-7mcRZaUQt*;Qtu2(5Nt^o~bdM7&GVcM;r+}qaYHQ@V- zrTMM!4d&5ttD*M3ORPQSlB+8+!XyJ|_VE*H10Fx+i=0f@2c4rww!5%3_P)V8VxSEG z%ZRY46uU!2Oy-7UsYanw-~(tS4_$u}NN7a;Kmm0Qc-9t}kA%Mll^es=T#BdQI%o40 zk^m`1A;V^1kK0JC)#Dd$zY~_r@l3sZ?n93WyAX8*@48m!J?Gr8z7%(0av;>o@6au_ zU%$)($5)}p+JH1JKU zopyW!67{-mAyh`M@66p{qo$cUutska_gq8p;bD_JzYkexGU5%p46MoxV|GDi=!AqQHFXZrss&U4tB~+%beVSuX8;vJ|@|`UA2%CO`lC;|TsAexX|)t6)dp&A!^nJ9;53g+~2k%b6R`z=%@ie>H zUXq>X2^x>30d2PF(c9;LxbKSBpP_Uu&=O4na(p}Ewdc8id3s=|1mg*5k z%~e99M_5N(wNwB;=aM zD&8eK?h(e%#+}WFU z>bvpW1FVqySBDx4o zm%G^W{j=j$Pj6X+fjcRg*0^5D&emZItYMRO*m>9yH)q;mdoY=a(!FX9F}+ zU1fsIx6+;J5*OVp+r1xgXD%^@n0-`ICwL+~E~XeWGrnWZ3H70$DM2{ol6cd4M25NS zRSI!pHRr(fR8LHdO-Gr{lY=w~g4-IN?&zw#$=4P1McAjZMaD;7I&IK$5g-$-=~yEN zbbCBl**-Zs$-M#CQi~|@zp~m)v81~r<#c=TWhmuQ2bW_LwVL{lwWJo$lDFY z+!mU;OFL02pfr;p8-;7xVX-+lZh`Z z>*YaXQ@mXzgIhVrkKPJQIt_5qJGOi4V8dxjv7rgJ2&5eFSSr1H)5)gRd`ApDHaX5I zGd09?zGAm(vfaeO@78c;O_ajt!)IGd{ozND!b%^~3Aiv&r{UqTaP=$QLs-Go@OokQ z$)tWGY|D3jr|`&|DSR=Q_0BQf|KeF>57YWrGRGi50Dzr**1utjkkB^JkaD$hbF@Qa zf0C^@9^phO1HQ2vKH27{;!`Pp;puV!eh1J_lmp$C7tnt{u54hH=wqbyTv3~2?Y{p5 zcuKJW99uNI{jtd*9O9(6wYc4NIQCZ&JA+}^9lGYb3ENgd!K0kna_6GQ=!dl5))GnR z3n(0$A|AgiST`DP&8UlD-NjB>Ow7O(^9bse8K`RR@v7Nta`8FC$f+)B8$5zY+(bAl zP{#+yDjCUHQuO)0{w6PQUHN=eO&pIIk>C=rWcdYeUp~B`qN*;msCj&;I=eMgEAVi< zgiDJ2q5nj?HCLdkF$Dl{pX`dC`&G1;)hX8r#`-rq__^>|IXQgpcg;-BbC^JI2{p)h!xyy^ms&IvnA*Rn z-KG?}iqjlh+Q~0FY)))-7H6|p=gg9=Bho89 zF`cw}tNk%ScyZy+lN5Nv>!`DXVB!5(bWr8D#fj`&pYa!)KQNZO++KV+Wqd^T?B9N; z080GazmtCi;Gdns>D^!B-+hCj?y2hk_X58ZrsT_Cox$(A-%I!}-4MC3|3^XpUHE&h u{1WDobL)R*&hI9EkFj4S%E;{uxj*_DafZ6opwlPRQ} zzTNlU-F;e()lNu#HTS)D&%5X6o^$TGKku%-|Bq#t9)D)#PsEezq)0wU&5{xa+`==n zN()3*%is12T`VeX0eaN~7#w$V&A{Qgm=B3CX?=KMZN}k9`bx- zxPX))FGOC1ycl^2@>1kw$jgyeAg@He7Wq2la^wo+Rmfpv`VRA2$Mc6seII&J+hq`c z!?49ZN#LH8ldweLX(>F$Rzd)1;SjGP7t~eu>zlq_Du3zkKK;ZiRwasKx|-Y$?n&vF zBT9~bXZ3=f7fQgQ-`X-1f<)FEZ_zc2r9(#K82+NF?FXQRxRQyGVd=wj!iVSv@MjEU z|Dg;%Rb8*XpDq_}gtSlt1AqHWr%TMD0(6`5dJ?(jY@{7MHwzY!M9i?kS+i{pT%_n;tJou`nWDq z*K?}wi|Tp{-dwV7ldrel6yAv%9H+Vh4XWNBt81mY&coG`Zz`N0DM+>4DsP~ENFJtH z1HU|fowZj+VIl1a{qTw~{2~UQJ&yYbN<*+FBFMj#`%N zlox4M=)M%XkD_lLLEr4d6E(WURpZL66P!U>Z@`nXl3$rz^uV$i^uWb4NNe{D(yE&= zT9(vjyyi-EZ%OTp(Gt*_sYqgBHfHyXD{UiZbu%Sb0JFWBqGciS&RiR3&464L!5cC9 ztq^OCY>_qitHOX7mP!l?jaH@HjM7#>bG@qw?!jcbB0uUK(vGpBNAB_^vKO=QRZb$? zAcMaYW{rwq3sk#yDl+*b%0=*mY?57K-HrBdf<|)dfN!;qrmADpsRTx)OUIf9uMNU$ zMeu}d%+LC6nC5O6N2Ad^CTn*c)?}S5YxBL$`qMD=o?NXqRg_PFd^O!-)FY%hJxx>N zH7m;IsYG$N!_-Aq6nD+sqL_7TQP#)CyAM*C|lSqzmDza4KWw^mTmKbL+H{F2hY z3ID4+@W0Fh|5_gSV|n1`^1#2E2mXaT@Q3oi^U0Hs{_cz@7`C=ZpmdC8GH6HC(tP7_&G(y%Z>bIcp|s%< z+flkc_q@_{2*u8vw^MLW zdl4#azEP37jGWwWQ}sQ3xfhoU&ON+f^&aR z!87gE@j);ys5rw-2PVXs`$LQ6ekwYX>gDvu`-e}3HpnR#oE0qWIWAWFD;7#&4ip&N z&ANPBXk9KZ3$4qYokHufk%iW!<_oP$>lIp;o?U2ND!b6S>;Z+=Wv?!@E^4V3$0r>jk=qsQFqHU>fSt!y1S=Qce_vBt)70o3v~mTe#{>wZJm*xs$Dx%erT!#K2FzJPPKKN5S&SQ|prFx2H_~=8|XT^yBSY6g2BEPI+uQq{gg_ zeoXyt%e04n&E)mhr=EH$gKyh==Jf0BYuw=1eT_bZFEANpVPDWidA}oto0wM@yA~~1wMNVQr@wL z%Z)7mT} zI9xp*Iw<2xPb7fBG+2?EClP#NSO>|sB1iEZw;Nw7ctRwMudo!_!g-+5U@ZRQx8F$> z^Vw3!DKaTDm_$q_mA*_kgHAIkj>Rv(`G&z{p~0laWOBVP6Fv1+Eh&k`|LvVO4JL~W zCbcG$O}sY6rNJbJcirI2#OYm=&EI(Q z=LVB&4JJ9ftHzg!)4L{{-+T2rgUNMdvSSMG3WLdxg1jqI^SNhV(@fylJUzwORcGQuF$wpEH~48hrf47`P`hCf0ez z#PU=KA6xT{gCeaDe%m0q#z7KZ%*#Nh)nlQUbU~Usu=k`7(cUbzYA5Za(hd$x?a~}fR zUYr^6ncHF|V8>@J^DUg|aF#-@(kxeLBCs0A9Xz6*l&M5r;t|!0;S`lSP+3vyGoo_W zCM&8g*`n%_Evha}1ZLv{5!RedgGZ9F@z74U`mB}U!UL~7&=IwPmaXSP>6aiPDCN6_z_NSlk6! zL?#03G2JZ_>o$mZOx{%|lcVqqp73n6is2cfcuGLN@ld;)L11~ROs2-;1j=qEx87_o z;Fenn1Xw29c%J^Y!DH_x`=jB}=x8F^Jvx|(j)y+uVYn={&|o+U(MFYkSy7#K=FS}1 zb5+Oz&vr>#uO#D=y5zKT2|iD1ch&DE0xRSO93is~fnhIn^RDVW{c3eE9%^!nTC@Nr z%T?3XMk#ihRvkE{v{8y3A011{q$)QPTGz{@EGH8kQB|8=Oj7lKe&JuB3o@zB&7>v= zlZ82%=oo7<(KC#IViL*Cq!vtec+|BhClf|pOF56Nu8F`rso_sF4#h@>lhJ_$dR&8> zmUSI|#B&HeWduvNB#box?{N1#?(W+zc^13>>_ajpW*@QyG4>ETKvEr-Cnff}v`m@h z-+WB&@d2>)$UtBBXe`kO*Bx~W6UZ<-%LlS$6?W)s{o6WZh%`9^CzRP9mWd^lb9y*Co9BRWTuoA4lqPeglR zg&sGFz-*8>43jYg(GEbdhhFUB+YZAW`0N;_OLlD2B|En168-}UX**#VYjBOr_($ri7!#(26kz3$D)<>Zu&Nmni(2{hrtZP$6qTks|Z`zd~ z^fFx*AN)QW>XOeo@E!(!8U8vUEPGiD&N>WR@Tmp42Y$w1z3hVj_sPBRz+p#?X25#o zC}7p-1NT_`tnw)wd5JO1VUXC9>>oKkT1`+nKd59t5rUrJ2j>i^=1ge0oNvhtY~}V?DErKd{p~ECCp4zIrahl|Qx%$$NkC&JS9SwAJ6O;F}bDv$}plLB=>m zPN#K@;!b8BwOGd-707v zhug&ABw^`-8+G9B{V7uzzRw?S@aHngv|RYLR>>RERa68ww(xgmPA&Sig|>DD*m7R6 z48s^QbBVTL;+@oa=j;bJS86(N=y0Vhu$qhJ%HRLsKW&j+b?k!LS5*a@y8Y(r{r`&w zoR(jJLPtyILAD)VMXf98CkGNkBg2h-3EAJ;-qO)}xP9+FY2Vk;)Z8qG6Mf0paXuk_ z#gYTD=)DMj(z<`Y95@#3jdh+JipjyYeIIFw^+;>74;%c^;l9Cyw6(T(w(mL6DTsgr z2!slyJ$8I>U_`xd9pbH8+M+|TWJlj7VNDyCr8a-k!|{FoV%%!ntbY{;q>$Om7iPEL|n`e@SZrY zp_SjoeK&qi&A+<9LivXJ$k_lWQ~5cOhcFU7=W%lHsulh>t>7^0@6)3WeO$uo|HZ&*HlqnQi1>dQIL+bxYWn@u zzz3KFm#7sU4DCqti%XyS(&gW@9$oPCpM+%d`e$FS0_@Ak^bNjmK<40jD>4U>F64#C zN0B-Fk0E1Ch9#Z;{~&*cJBP5J0*MEAh@<|g0PeZU9B5klx|4&)2jj!xefM?81{BY+ zi?2NR3o>^ zTZryO*rQ+@y#Kpp3mT|>Cp?rK>`g|GjfAuPu{9p=>yGJe8IDv{)u?f!u`eFqi49N@ c)nB)D9F4_>x9yZ)?K=N)TVGf2zdIWE9~>dzSpWb4 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testdateoverflow.xlsb b/pandas/tests/io/data/excel/testdateoverflow.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..3d279396924b998804dd307eaee93225ea08260f GIT binary patch literal 9856 zcmeHNbyQSa+aDO(p@&8oNTTU$X=(NLLR0#K39|9kxpTcB30(PokhRENJR zN?wf@8-+*iPp5#djk)`p#4O;2T2N3}R}|#+i=BhC7(PuzOPK3wt3O-a!69>$o(`z) z6%CyMnH$5h-IZ%wSH}AaskV>zZ-13im6_1~=o27q1!!&kwB9`LNQ}ZN?P?_{2Yz$S zFVjyi4MZrknRn~M^=q^UxU5!r8>hHzcr#@9mp=4T87z|uRV67G@UBZ~QL&)hIc*^H?&v{V!w7usJE5IC zcy%GDIZkYHkf<)&b+;XLq4Yr(LF9_5&zjSLPy=^$xI9{rz<5}Z#8`#O;+&u^yigNz z&*{>>SNS#1=p|94v!0#;0g69FyoAp}>j82`9vRS=kP-jD&e+PHljGa(zmfhw9Nyo( zdT3O?LNgbrZBwRJV)6KcjSN;wSq&upJ;62nStc8*vamEpv4efs)okAK%U?Hl`jczl z_YP&(^GmW8_&iebG0bY!98=pGs4YkmYnAMK%N)+iZ|AG)?pfi!5~)lRT(_rWOhU%G zs`9BnQ0YKOiLIri`>A@9BuhYw#J$Ha!yl@|J|RD`5UYEu>Z&3>o`|W~JFwC_R+eF% zzf3leOkCq&g8u3GI0YXoEw{D|--9$toXc4_p{o_o!)2z)D5$oC29_ODPYYXvh$&=! z@1s6gzWGQ8_5KW=!~0a@_Cmp(z!@TcM28}Eo?tv8gf`prmvOSto+*Zjtc=4z0DuyJ zh2m_*`Fp51+1gne+S*!j7@FJsCu~rV1rqt}|MpcI-EGm#MId$HcY=s>XP-2ez>X|o z@w1xT1**Dyfs`}y4h5Xn=aSvQZG0bRg~o%}5ax0!ui308PA1QOq%Izt4tYzdQC_aT z<>*}RiYA_Cog-+f)aZp)Grwt$-;*rcHof| zX?3_oB=yx-i*;oX&gMl8|IQWt*R0Jjd#>M@46+AMx;5~iehzvg>B5*hMa&?4UC>t+ zF5Xf5{R6oPdm$#FnkO05Gi*T>dx@$yt6JwVf0b@2zzeS?k$!O(=^psVf213I??4tN zFf)B#LouMdmR7JrR{Y zm7H95mzYL)!mls09Qi!jDZ%r)9P?0@Z;~FybTDqA^bl>fHj=U@;OO zc`;_*CPgzem5$KVx8xEj&}KAg|Ef^s*M$^~T{n&TgQ8#+7eGzibN^LBOWZCc(1J;D)#) zpDqF6fSLvCwCFC$}GsjaQpmlCrFZ`!!E(b0oSbREN=Lb#CyO-Lc1l^!l?MfowA?6+f%1I>( z(OX7*a&y$H_|!_AZQZ$#-#Kd?&V;6>IDeI*fubPc#ttjP%$*Cn9xLoQG@99%dC4q ze7D5zU_~HSVW`30Fme7ceEQaww|zZagv|8Vb39@o02QiQS06A6HK+&x6~uo{jbIGL zEGKJ28IXrt;|M16LrPIHyvm{8ENta;rR-t@7hqLN${N`{N zvo*7VzEzWJNRLb4a3*;CbPuHsHi3~CAAx2*Gd<ok(e|T9kaJanskQA~{ZYS3@Sl(L!lLb%EkY<|_{CiI}HXd;h?T3O-Us z--x3P`3Hh6&wE_lSKJP!ZrWSTDYRfh$k~k|9ML7<1g+qv7`1fDC@}(Tb*mdKhZvU2 za$Adf@Fi$Cn_7nxtRw%b1J$KrsF-koL>@zk?z%ql9p$BnOee8K)6(0FOVW41Yh*zt zW`*d_=t(J_2O6mGNT(H;~`qIZiOScx}qnBEmZXgZ*^O7J6KP

    O1s(`2|7~Vev~_2Ol3ACU93I4 z;4D1-!o<<^I*-B!DvKvFGJ!sokcSIiUb6Wu8DA2_2*Ej?-cBZtV}kFl-vXhdv#?Btk6Fo4^mo5WJaf>HEB!hIo~rBplQ3$`><9 z8fJVb8%gb|^YxvDd@DWOMk*FU6*;p@4#1u0!3?x$gnuN9&qIz<5wr3_tCcUfUOC(a zGn)rTd(V?m^k*>T@;RiEdK(|tC2q5Mt+#V;>gB6hRv)6Cjzsl+w5gc1$g0-N#DIzN z$sO)Z+_!%f{!M4#>%%f44|qo1sKYh}n46L|=B>a>gm{;5fc(wL z9N$8qTU~4u8FPofP)%td%af7UIw4!`92kJNV*VsykqK8}!R{UNG~sPNhWJeU%S5CW zVG}RQaXj~IhukbIww)2Ps4sLs8Jpj+^hxJ5OBI+7Xc%D?(-S^D8QIBN9of0sj-Py4 z^vl}eK-xlp=zTrBIlOQL{am-L=<&{PPX=i3y>a<`7-CV&Ht+d$F8YDTNi8MTURlv; z>$7_vuKSCv8p$Uo?-y_mV;dIe-6R?42iWa=_g&D+DcUO&qKo-yTbWyiQyS>D3iRJ= zqyTHLx98!EF%|4=;Fz(iHT+g`9A40@*aOawaAOFip5<7{>z2v@+L&pf<3BNZsK~C< z6I|&YivJ3B0AKel7nOUNb*n4Zedbb7+A>XZaT7{V(5<7(qL6f-b()Ou2TnPhm5VBV z8%o2irGrci0**xH;-wM{f!(#T;#g%1iSKK&yS1K;088)yjfUEm?uL4xhd=+Uvw7vF^1Scyt~5LVBV0BF zZNSzfl|FV1+v$@Nzp0A6{4;O=&E4(5HqWtP3cmu6)SQax>yK(!Us!$6E<#a%I@%Yc zUJp8;wVIuDCfy{=j<+LrAA2*iXSpRY?MWEG_lLQ137e`{jwOnBtTK z9y^tUD&3}iUW$g)Odd@2DBAfb0+A207$|rg1mkZZjU%^J{hA4`a?Otk_cBJX{3e#_ zD;M{r*N(|HEWjS|`szrmNb`6fUGc;caZPRRvywE~zBV(GxE=TvGc_LmS4AVNEGk7G zANh`yDVxTs-^h*6T$Sm*JIcXl`WWtFL&>&0?OTP}Sc;~nhhPP=Qz}TrMF#EHgej4d z68nXKB0<}}>Ci#PXGW?w#JlV=V1eb$R}dmjN%LhsrR65a7=wL8>8X{=L=Uxgrx|6n zFgR1urW9fsLPAI!J_1wEHQzh~IUb48r%$wTm*yDgWw10DvUh%%G4s~u?D7h>ao}TDAETo!;-nk*%4z%dD<^OaEz^^ zn5W=?=2odiO1>t$=B$$)Awed4>jOR-2oG6+${Y5$YW>rFj;(hdL`Zy{8GXKF&_&r7 zZ;^~4qd5LVl^N?Dq8#>m6Kt~Qu=$!ztE&U1Dy|!^S=PQ7Q53o1ZTZ;tKoKj)ruxnV z2519zQq^yc9tIR%1pn5%2Kzj1X*rvma@BEX|CtSPDhx!+71wW zHH4a6FUbWM_~pNR5&P9L7v7gY>X;YR8m68}Em>>@Wm^U+p^nMJYw{ph-LAQrykQ%l z`VhD&VJo%uKEMRS9H`~qiAN)IL#Bq`zp;~J0ydbJXE`)*$CZ6DkJEXucPR;8KDy4f zEs|BTGuK11GCVIOAh=hwU|CZ>JVNj;nh8g4XdPmNYE*B`>Ot7nen9tja_ebxD2sU% zcKtFDxyfou4v^V{bs)m#1pQaFt>DFF@qg4M^88ho=s+TwzUG6OqM=2rFP~{$6ptMb z`8b)FSm6A{^ze;=|4tI$j+9hd5kh4QLAOst&xtq9S?F|6zmy(x%D1yo9A{;F-^4i$ z*OGOOvLqq1NI=uFw|_ME0ZWQwRM4bJY*bOXCTZe;=lrex`b}Ev&(;mj#tAI^%qsle zRn%3wCq`MTA=y1;J5hN(_PeJ!jx&1nMmMkEmEO42eQTt#y?-mk5@nr#{8e(B?Jui4 zUV+(&H*$s)*>wHIj^pgzf4$>i^Or2;=62xj>NSuiLj6r%oG7j_+lL-M#!xcKA6i3B zF+om|#J%HXwWv>k@+!WggDWV~Kp=!zl0*iSs24}UMX8G$==m)Ej@KMsi8v*>P)Wc5 zf$(|X|CjCkKT!LuvHyRjc0J;`*|%Q&GIG!$#~*aSAKPGLLvCQ_a>oW{?0lyDV3a-p z6`jBvBa<77eE1i=8GnHKq9StT43X_JA(Ewn*%~R@*~0BPjco0V|2Vz*mz+YH&Miv( z9)gS9fAh)-=3t2;j%+!GW~vF9t|2&Wd$e?hkDK|fw(;=+>3D?HLb3adi2G5-n&AVD zX)-evK5hv{tVqnL8>umgpgK}HPk$k!cyOKtBWx9b!TzZ`B!;z%3in>M(o1XXHhV^M zW(|so`>&YW{B2WAuE25EnjWQDn!H!GEi!a#=krSAl858QVw+HGc-2z|tpd)wXC?T{4>GAktS2$7r!4tBeP%SC zaDG+!s_a-Y@AXL)-gTX8a#g38G#7l&v3gCdXZ$JEZZ+CN*A7}rYhe_PY5OX0khoAr*ifCyFFJl*d&w-R7VAw$7v#TyG2b6Y21L`AbnB>YSO$b z7ADWr3JlWVU}LhUT>)XK6c6LHuRZaowi_)zil|aF z-)C{TqGrTiO>p3;xG;G|+ltBBCb-dHElZ*}Tg{7mdPp3piTjjOk#2`|{uE9ZV&&Ca z;mBvZ_=|^jG52A}2c0Kiq`Vj&YV9LbV67~82BJ2-r! zOD^7@5tZxFy0Xn;1nsBhlHg1GSo63sIxqQx(qVVU8F>qfFL&Rn#x}pUsw5QkM8p;E z!EaVp613>JtdDvjU8yH+;pItE3OOi0w$yRTPF z9pgV8@_5}pO?C2A)DAV^hUokq`oSC)_m+F|UuKihs;5hD$TmMomr0dZ5R&11*h~qaFBkK>x;h>u`iyJ-j_Med^{W)d)<4Sne zef;-II56rK#D-ir3FP?O%6hhLzvDq-QK1oD{y3r$75LYhIso2wVjj!{2Y_t@NK#kJ zw~+l^ZxmE^B!l+rZzjGy_5JmWFDMk{egydO2=;s7PssfF-GS^y!5`10z89oK9zGyl z_AgGTF5+A~lsHE+!2Yu*`Og{GMd6D~{<*L@&Y!}6$Ln7NyvTW<14dlB2zbtbUqrdc z_nxCnAQ|Ex`>fy1bcS`#{$2#UNS~eqP9bX`67VN7^&-MWF7q5A67P3}Kbg&oq8B;0 zb5TZuGtr9-+(m$k8=G?g0NLM|<9wTQ5%6NeeGZ6<+&27Z-!sdeLH%=-s$Zj=tJD=~xZ&QCK{zo5lQTk%lIhV(# zJ$sRVQiCpn{5Zv8@Lm&_|K+xbp8ixdj;1=8mf&>U|!RRR?pycwdsDr-YE@UA-BqXd{*HX5tN;g(3qS-Q0{{RjK!4c`VIm9waDxB8yFPZ{X9MvL~6LQ zpo$lo5lQQpSE{DKK9beIFx3)3=kd2Qi?hZEf1?8FP7)SCqQb`!6>Af_fwJ$93~-0FGC>;lDj70Pw{ zYiM%h`D(>&@Ygz=E&=QT99l5SI}ypuocWI_O~CYDEzWxKA`3%o?+3Uek|-lxv>b=&&ay zrsoq+I;&l&&Pt_5RAo~-S&8unc-^b!2Ldi>w-3p}_FBHI2E36s%6K^xIg@EWFf{G! zJmaPo1}{p{^ZWx(u^=(S+?e-w3;^Kv77n2N7me0vJfS&;5>F8-K{UW;7(*vB8)r7w zyESEY-#^;?KcmUWxFM(*A9U>j8zsM9ZrTBnv+^1gfelpZe7MPtH6_7oTGVB0A#_K8ZLvBgL{(R;Js5>|ynTga7JgeKaokdeHR zibO7i&MQA~f$Tll)_hHplp@749t!rmtx7&eD;%18pCZvO7le0|Hiq4MNeza}cr>f` z^!sfspCh$G_%LwsNa#z1v)I;F0Bfd2;aSsXq2+?$C6MWvaa$**x*_zj{hjLzDA)9` z(0DWs2LKQQkYPM**nX$GyS3yV#FNMNqVbaTV!frnWl`%My+op}P zeop}t5pTG9PNMe-fj)3tP57JSGrWEi=upB#sT36b`PRCj-cG^Can(FJu}rJSp5U`7 z7JtO%FeOv64sCbuqwo&Ovi7gMp}k2)l_c$^3Qy*^jeEZNPY8x3OVpQGO&SDx>fnWj zv4*}teGqjVcfFEwLi!YiZ5PbY?LHfpHg9r(_A3UJo#AJiKw04o9W61T)POSR&)kVr z6~7C3jWh>H{q8B_wzEZ6ssVrzv}M4~Oh+m-;>p%Z>KF_OsK{fJ@_9}Pk`F~T=}<69 zi?j+@y2y9uB;yR%VXN+`U7TEaRK4r)EJb)GBPsuM@tZ(HvrbJH`9c&w7VR)=Uf$_3 zM}(e%MCW|Z_&Y~cLS4sb$h>T6ZEFYi zv1 zTOu;ON6r<~EXg!G8;_X{ehgIvBzV%70+oe-Hy1x&x(`9do4DRnLT(P^rh3Ma$DGlH z{OvSDY*!(36hUEGj9kCm{d|dKUf#ugcaog=l)ozpb;)iRFzLonQBJ~+f!ngQ4g30- z=<`kJQ`SYT%%ymx#dnPBjNRp*MthBV)9A=d&e&!Mpw9lgpbrt?kE0L)00)x01oOw^ zaIrA6HDmj+X1|m3wl>I~M4GUf=72)R+aCPbjab&TXu3kzVoat&cYMV|cp=@(&77N4 zX_Le%8VGzD_>t0a#ou2Z(vf$SB8H7y=ojGr+--{UAe5F47G{ucVP^gMCfWUYnSpom z6Bn`ITp?SeW&jhO0^OC-@#Il<(IM0#^xr|HEWYMr!H$luiJ z!soT*q39PGo?K&3i!3%$65>e2uw)&|9Qa09b2FUgMDsXX7q zc9g5{XOoQ&7?xMMx+2)GNhzj_Z4Dm}8Y(3{j4UXY$$u%6H|*6iQeUZd$P36%bZjyx zn=4MbdH@KcBD8UJ^yIl9IaX>LgPFz&vxpc zcmd5S{Yz-Lrjl<>bvl9zHWLyQ_TgAB{Kq^(@j$UD=B9y?Jv*#QGJz&6NcK3}jTH)6 zdOD-ju@4Xi)df97+m#=qy(lyExxDD){C?3nmLoe9O@8h0{p?^QXOD#~dLL1VCTV|S zD)Z`QHSF65bHgUDLv3z@`m@ZI%Wnk~q7Xz0Di$uU^Ybyo3{l_n&hC~MN4c=gEc-nn zn3fW>^&pH9k*BRZD};v5960Magk8yAxO|8UcqQJ9C;lJSlQotEI@}k8VT5y0J$z4E z$6T>qz0Jmpv+oK=`fWAM_2YY&RuD`7!M;PH%*3b8fzBPUk1}1uD>b(2CT&FFYM}hk z>3p(>)h>G)BWY{`eeRb71}ePC`GWDIEmm}dB%kbI-w1(Z&3aRytN}I4OS@tCWsb5+ zB8i>z=cnYG4w`1i82tOO%2>%7^rEx_&MU4$YJ7zz{2eIHWG~Q)Kg##E=>@ZvKE{(0 zppML0NCs+4JInevDGl43Sm?en{$h62A&V+qpdn;${8|4iMy4A$%~WF=1_UO!%t(`n zFsbg=!1GDE#LLd4!r&8T|LxSs-5nuq0eN?{N6j*p@?uc&o0de&$%Vbdn5cDEqy%p* z|9&mehalVOlp^`%t_pk~oL$3d5BGiMd77B5ev+ZN=F8eW#M|>`Z&jAenmJZt=I;i= zLemG+5Bhs8s}6%Xc$A%G?BqCE6{b16e6_~Eu6+mg(Wv9M4>fjse*_vy2Ejk0P{zYF zNo#wrkI5AeTq~go=U`pRHBV&nv=)lo>dSbTdI4`d7uaEy6uonpV!(=lN=H(4B(e`Z<>RRm`t#dNHb8_jSdmW_S6%s9tVbNA z+9D%szU+wT1$RmysNTwX3N|M)2J9FTx z9A%W$lLGvz!b52;-4Np!*yBqJpDuiRjmj`hC5p(H!KuV+8X2(66LBRs#UO_*X(A)e zYxHWZIe*i$?rMPOMHXE=3d$Q28*}N)&mwD(gdtq)AP=9HGB%G+*nSaaxMY<-4#=B z%G=xD1n4>jI^;2O=sp_q`^;p4yFt|r;VlbG@GgS{w0Iv@bUu(d_nm&g6>_V9$Kp#?97R9Dv0i)`-2kV zO2O*E11~CuNj78lB<-d5SH(Glssl=zO_4BjNpkf`+ZIg;Y&k47Cu`2Xsq@ddSs5xtU9c{IkMk(UNXw?%wZ-%gDo^UAP$9{4j9BLJdf1S!U;>NseeYS87V!$c&EIfQScGDX$5@+F;FOf)!@Fa& z@X#tkRA{dW+h26dRJ=whj1gIzkqh^H(tG}jWVocr%J$F@shUlxW{>%+@3SBQVO8y$ z?bp5(LUuVVpMi%%!E*dDMk3ij{vPR_Lw^(TE%%cQYhMibeA|KkqZ5>0#qTIt+Fb(Z z-Pc3Egn#qEk9+613*wQ=ZFjZHk4C~C*SwK`e(l3it~O{xVKJjdiG(G*xFcu(G#A1{ z<+!wFE-HH2WOB4xa@K)`evX5$jRETmiaM}G*|Tpb_Qr~RW1m15P3Hp+2Dy5wJix`M zkTi|O^jo_v&!%H%ODU`X#;;Nk9u74P1STWYr3cIz4nauBE znUD?3Fpu@ubi5FC%a2yZi;}Avi9vp^;GyMT60xxz)<$(x)cMWisS6>}QxM8I@QvTc zhTcvEEOaZ^>MQwnnv~iV+!;`EX-a&|4->ENP<#?MN+bx(h0LS!YRA|j6Ib_)eNQLMCHAB(*}i?EI8}&f z2dW}5>Oih|N2hvS`Ye9Mb%t^DDj+$ebeVU?O5Z^xiRuUw0qN9aP}z=LOfqO#6^`!+ zX^!yd8`)lO0?r5m($fU=B4Hl@1VvBT{zWG#RX{T5_^PLGpBsVBL1!htK;78c1gS{C z>o^;6VahK(+9*hcIBiOVa~1t|>TO7nH$4`cn$w*k z^qeJ?TYQ`d`T`Y@aVSu~77~=)Hc%Z}BBq1v;?d{ywldTNpc;nX0)sS=yO9-|5gax=kLQ9ZH?RJ!LOUxx9W6viE$*B`snNna#Jo9t{tF<1tJBK`Rf{%$CtlugH zp`g?dTZlVe$QL7w83*jxpCrbASf-F98-GpBk;T0XIbXOfe_RMzOFU$lE|9>9E-L$e zFf<#o1Ujx8RNG+%GQ}9ib%=m4>HU!Mo3>hhIU;GiU($esiii^`BJ@AJ;Nodx_T%;w z;z6(Go?wOUh1?)fd2N@bSEE~2G=+tTW$B})N@Q(_zc!UE5sBpb+;zDn7^gSK&oV(p z6!n~Zu#uR+odD9K&Bq2|*x2MLkV?*39j&IjuKO}?!*J00z`T!GWmMe5>sgbpng?Nm zhY$)3Ja$*oclcr7UJgGYl))>=PHqiYkx#tkBXkJr*j75? z)7Q1U{;rCwaZ1fjE_NcSaSo@6V3w9I^V_waH(@IRjiOjlTVip-i`m*5rZjbo#MY&3 z$ghj`*}e|ojy|KhvUuYPUk>+GHriMx_vK|0O~m&OumwPqHHkW(_1pT9*ey&3D31#4_@*~PwQW41- z2NOmb4&q&oGB04uGP_V@a3xRoSUfVAg};eQoJO<3+PbayI;wb8yzThWfXTY=amm*B z{_vL0M}5)*M+1`yt#ye=CWiccU~);ENU*6HqCn1;0b|T_Mi+e8!YdAi)UYH3&6O}h zsmxt*?K%ssG9yv4M3>qYG*<6x&xi}EA$;oW+^TC9#a63_1?D%i#lL3DRh7*Z6I3r; zP_{h&oh=SfvACEyshPRB+^L4`-ab34iKX3kyuMP97*_W!E|`lamWI&&-4bk#vLu`Y ze#&8lnGH`C8jMx8>;X{ZVw>LwQU#Qw0DT9US6!y2zfIrXmIZ|4StVF}6n)o;pept9 zErzbqBG9w3X{;H==Yy6`lFAHUjU>l-u(FuKVKUzBg&N2l>O$tq@>MNhnPY^HJAlk=67(9!Rk9wNW>1DMPRBb2w zRH;y_v?J18vOehMUVv)lZN_R7kU&M+6^_e2P-?-J2o<6ONW@%fq;($&+))0Aksboi zKCKzU6u$sp0Yg_Jg%}5kXV~F#DSI6MEthu*(XEkpD+-k}a~pdbX*=1IaT@IO*uZGu zwdE!`C0{5pExoxTY3Et@Rx9w*T(V3ttBB4_?^*k|-E z$S+iCH`>8r8@%*6CQ@h==8`fFnnDvcY9CNC0h01qTbfCnC%LvM* zXPbh!U!V^BJNHs;WPc02(>k;hkM(En89O-qL%loF{l28d4=Qav!3sIOo3s&j)xnZ@ z0}ejJe)Y;OuXa0%(~?C`@K7-)v7jYhDvyDnqP6%fe@@QON=5y=t$7=p;cjiEvCB ziX@V0`EMdMR+)vQ8s^0)`FoG7Q3L~(B|KI9h$9vpXNT=U{T8|+KKk3%h?AS8rA<|# zXNeb`Gq~)U3BtR~9&};ZCiVQ&wocVkb#d+LgOCGAf%WtY|5MeZx#sD$tOUSF1x8meZW)jLe{Mb5uz-X`Yb zv$a9@Ld91)$A2xf_{Hiuj&G1Rf5#4?*EH3l2l83?Z2Qv}!(P)%o%Q1E%LMO5SsPa> z4LXn{9g@#1RR*UXiT6gk3`Zjz540;abJrQBmIodVAGHkcc3a<~eD~n!J9n{me9z#i z=Y9=s@&C?U7+4l)9{20_8vgaH{+{q_HRq z!26}|Pe3Z1`+z^Y3jaNd9dvTx52yTbSwA*E%IW(k_bbPr*U|v31b%_?qmI0fa=(E1 ziE@Ge1Lbe!#C?SO$@fo$yH*SII-LKVir+WApNai6WhMS$dM^+AWnOk4^snj0Pe=eD r16o4;E%~@_{?~}{XLDlmKbij#Ka>>^pn3oRFrc4GsH7$-@Am!&z9{IU literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testdateoverflow.xlsx b/pandas/tests/io/data/excel/testdateoverflow.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..15f0e8825339e4531b1f25ea1b6dc1c70f893dad GIT binary patch literal 9351 zcmeHNby!sUx*oby8l(}C4yAKYh8P;8Ye1Tz8$nW$?ndbv5DBHbrBm1-AtBuzcXaP_ z+?(yW&-we_z2;%ovsm-Jv(`87`}=As%OfEZ0Z;+x004jvkRN}R9Ebn_5TOD9cL1{p zx*CUln(Y#~ zMMZ=H&VtpTAv~@<2{yu+eQcuZ##m+L!VwT3m8Wc62Q<~3dC68=niAb24ctMy{Tkh7 zrLw$Fy?)TZ^MqbXRkOHN8`OcT940gQw8v%@fx*C*n^-W~H;S-=N-n7U!04>6HAs~} zJUI98fo>sFw{ep%Gp{)Y+fhL)e)d-k5-WY>!}{e(FD56*Oun0$bPt%VGcq!olnxf!%zBd-(Zr&%!kr- z!d@qw;khmGeqPe~%NJC~_EoiSqu+~- z%4EXHbaR6QQ2vV|t2DS6j^Jk$;DW;f3?LXfn%OvUa(sWM&FcL}iT`I57#1_A*v5t3 zxdW;Pe!WE^jy$GkTUK~>nR=HR0}xJ%fA`|%^!$-33Za# z*m%K_X;FCEFd(v+AGiQEJvDCWz*9H;CD-RLuI?kkqt6%;06+miM{u{{{GIM@P)BPM zDAf9U{P~0W$ls|C|Lp(otv+_p>IZ288UluW9AfOu;)EK)B+GP90QXcDhuR+{95X}3 zV}Z32JsfXI0~jxT#E0^)7V>x6Nl~rRHKLeU&?|F`b!P6L3N(m`1kg@YkEB-=7)kg8 z9eh9dwkbMYO6MIhW`#1L1b^Pi$r&*!V{xFiO&jAlOo5P7d~@-b!08nMf8eNJ6&X@`T{VF zwFuby+@9K51d*u*>KF_PsmKvf33yBjQxAqU=+Lk}7Hbx=be8MNP9hwtAyC~>J3l^m zuXxk$QHt{P2~bXHeod&ZQKzbtdM;cLpK*vIH}~X-CsfZss$;f$?2UsenXW@5Y*wbU zx;cmy!8@0pq^qJ}QFS!K*z%^$I8dmcnh~b2FU|A_ zs*MaK>`3>;9|-_`Bhnphjl`K5a|>5DPHA_dFf)(kOhCWyo_)zWO*zHQ`N+(MB#JH+ z7C7NUgUK$qokNoMxEJM#7e%dyliC||U=7QZ2V8WH9{2e7XE>Y9MCSt@9`M`Cs634t&=0dFE{F?`>54zs9 zj`SGyq%qw!IpzFJ3U~J3gV#O^@=-WEm9(e)o=*PwIh-xbY|S`-ymNn-^QJZ!O8JY>`EcyQZ!YrYccwyTz72AU34zp)7AVTui`qTRU`=X(*Gl^!;+t$9IQR} zGDU)bsL=O?+cVcm-o0Q(CPajOrn%3nSJz2y&&mwElDM2D0&_%c(T@760MUG3At_I` z=C|MZS~5teE;xsgE6EBc-67YT@jOW{c310Qq(fg*t9dmeS)yKD^PV24)w$0z2*TXC zr^l$(lKoP%6Ojh-OCYRd!fJyRBhS)LH4QMMU%C={XV&aO9{{c!Tt58sX*LgEl&k0>t)r>bqHY$37Xpd+*|c0$q_- zNSD|cI_fu6OuQ48U;HHR<&)eY&!*wp_iFq6fV>2U27|Jh;>1gAKnNX~jf;cFqjSn5 z#guM22&F4l#BZ1+Q$$^T)@p^0O;FAhQ_uOaHP!?|ovZ6+ zUGSh1Bu$NhJBF4pe(!pOjaF~MMwPw=EFx22vZ+pcVE%es zoct~l$GP9AdoVFLI>p@7AK1Ocq4>n#gdGh^xLIE=|5i_DggzP@rC(jxUA#@18S8nO zq4&jk2k+r|$7r_9U?laG{o(1}aP|&6XXGxbB17Wt_+;kg^>WDC2Xn&)&wcHO2DPV| zO&4qVG~zH+DLQsO&$F{p!whksvyQH&=Lb26jqJPKL3ox@jJ06gVKJrVM@wXePCSIG z*<_tbo@V$xkQaH%-jhphojp6DUbojYJHi#*jaJ4_ z(zq|q_{nL>MMO=Y&_u8u!|CpGtm2PyJuP~H9Hq>}(n9oM*>g!C?Z-|sehrF4P!kK? zc;hc-2kkPLkMlJ|pvD9GUvV>CA!()>QwU%P=|x7GRH#X1mjX>;h?Bi(A2(X{9N3Ts+YO~>a@sZnw3&M+zdYQf!V@(%&FQz=Dqi=E{p-h|tR zQ|@lNY_kke8-0|6GmRJ3JE%8jjb5tknN>3!6l{kEq9Rj!Q`mhymKFPfJdc!}p4iFq za>!5dcKT?IeO);O^)jfFv<=pGd3^*K0Rxbq(kK(-nWVK8=;QIlf>ufxUh!}&V;9(1 za_)GzRo;@vh+HgPTwh*`$u}_B6Z|l)`DGV@ChK8)>6WmtsxMn&dVw9%3-mxnh{!#U zu*%s!?xIv$;u;e>bXvCuCOJwz`R@JE6C*Y^{*@KTJ;aj&l6nLUO@z7h0i-G;%U&f^ z+Uq3D^25f$77w4MB49d9;NG*Z4a$^7RY~-`Ze#O*ovn4R=`O?})oZNnXs`yZJ=akl74tnbwk_4YW3R z5nVdz+8h)r==_!~&)Bf}(#R9d6iaWQ)f)8Rqs^lm6v3ez+kS28ypo08{A0^NMPtpu6OO)^+8J~`*GTpPPP{z&rXv58xfTV)zCcfJ*C8-#w9{A~b`4HuL)D95(cz0Qn zH=y!GNuwzmUJhlBK2^)SDXA@wrRGG{*;-D6rp*MY$hQQ?NI_j|*97;>(a+2FE3Xf% z_UO~oy`~PT^o57V@jovyVamrYia-z{VjeYy*{5p=$ew6+=jIBsu?oA@@9Jr*`8GGtOe$q zJP|$T8zxFyL*|C`MqKQ<$wz7pHuM71i18u5f8@1 z@sd%F7CGqd?R?puYh3;GmP)sD#L$(jDNTpq-fXE89^^00V|pkpvFVX2=fK?(X!&5e}oEDaGLw ziFyNI7<$Uk=N(jZFOqo2mOXrWT}gHJI^L5A)r_8wQ;EHpJ_cF7W;8q}q|EL~Z?sW< zPMwCc&ytXrhQqq>WbHe(x0EVPP^7^DI@dwAF*B{8|JQ2-lzIn zjnUORM)#C$OEWyea_#;-aH&AWo`2xO6c#PZV9Ef#)6vp=a}>U0zPAwP&Us59f3Fh_ z!0isN=5Qkl7jx8U`?Z~7rE?4l<L z!r0NwRL$AZ($3uJyADkwTjY?r;neZpQTB8b=D1I9OZx1weM>BBMxR0&$&6`rPr=2p zrgY#D;pxG0fVk^{_IuBe-d984`ZV&1=OrMmJ>OQZt~$as*$}#-Fj~Gbyqh~c;O%28 zs_9mgi9VYkqo$8*W=%n$^vKomuhe30a|+^eM2?E-uiYpGV_;NKSV%gY%N3)H8o$_r z9w)?pSfl~o9gCpndHZk?b~bnOj=2!FlCaM*l`lmYSyXnoH#i-%06wbeSKHzMu|^rj zw2Oi9?)#$UHEcBfaz)Z8SkOR%i-;F4BAh?G;Ot>z_T%>BV!^LxxbTB_g09i%JU2_z zD{(B#8$yC4-s)qfO1=Fi8DT0@A{NFs(0Q>T9HTcQ$UaU-9{!BFzn+5BjTF|cEx-w5 z`L_NjUpgs!d8Cr*s^-hA4a;6Lws|jw%7~=9=hFrsHFvT&cM%K(WP;AbL*yZ!9v)v2 zjQ&g5R$xnPr%*rOLutSFD)N#Tqh&noz&qCR9u5q1bR!*ADWaO+&qaYFMzPV+*-lI~ z#{MJ$!rt^{cC*^!I%G+xUK~GsLn20WK1*A}l%eJUg>@+>dPLDK=hsg}BTwlrE#h5} z-ywaKi8R*9d3lk@5PH~-{*oC_^c$OuS;kg{g>AQOQi2tzd;?vWdsQW~+t7Ea({lu- zU4rVAqpCJ(y(uJ6lm!8_Cf~K|w%7^5mFGD{$GEd(@XGEkrRIjx>p(UGE#fK{DjJ_c zH%E;iLL4K0#p?WByy~_u(}-H(sEH+2!7O|gzU0{pDq?TPAY@^N{lpg|Y;y!MY|b
    +
    +
    + + +
    + + +

    Federal Deposit
    Insurance Corporation

    +

    Each depositor insured to at least $250,000 per insured bank

    +
    + +
    +
    + + + + + + +
    + +

    Failed Bank List

    + +

    The FDIC is often appointed as receiver for failed banks. This page contains useful information for the customers and vendors of these banks. This includes information on the acquiring bank (if applicable), how your accounts and loans are affected, and how vendors can file claims against the receivership. Failed Financial Institution Contact Search displays point of contact information related to failed banks.

    + +

    This list includes banks which have failed since October 1, 2000. To search for banks that failed prior to those on this page, visit this link: Failures and Assistance Transactions

    + +

    Failed Bank List - CSV file (Updated on Mondays. Also opens in Excel - Excel Help)

    + +

    Due to the small screen size some information is no longer visible.
    Full information available when viewed on a larger screen.

    + + + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Bank NameCitySTCERTAcquiring InstitutionClosing DateUpdated Date
    Banks of Wisconsin d/b/a Bank of KenoshaKenoshaWI35386North Shore Bank, FSBMay 31, 2013May 31, 2013
    Central Arizona BankScottsdaleAZ34527Western State BankMay 14, 2013May 20, 2013
    Sunrise BankValdostaGA58185Synovus BankMay 10, 2013May 21, 2013
    Pisgah Community BankAshevilleNC58701Capital Bank, N.A.May 10, 2013May 14, 2013
    Douglas County BankDouglasvilleGA21649Hamilton State BankApril 26, 2013May 16, 2013
    Parkway BankLenoirNC57158CertusBank, National AssociationApril 26, 2013May 17, 2013
    Chipola Community BankMariannaFL58034First Federal Bank of FloridaApril 19, 2013May 16, 2013
    Heritage Bank of North FloridaOrange ParkFL26680FirstAtlantic BankApril 19, 2013May 16, 2013
    First Federal BankLexingtonKY29594Your Community BankApril 19, 2013April 23, 2013
    Gold Canyon BankGold CanyonAZ58066First Scottsdale Bank, National AssociationApril 5, 2013April 9, 2013
    Frontier BankLaGrangeGA16431HeritageBank of the SouthMarch 8, 2013March 26, 2013
    Covenant BankChicagoIL22476Liberty Bank and Trust CompanyFebruary 15, 2013March 4, 2013
    1st Regents BankAndoverMN57157First Minnesota BankJanuary 18, 2013February 28, 2013
    Westside Community BankUniversity PlaceWA33997Sunwest BankJanuary 11, 2013January 24, 2013
    Community Bank of the OzarksSunrise BeachMO27331Bank of SullivanDecember 14, 2012January 24, 2013
    Hometown Community BankBraseltonGA57928CertusBank, National AssociationNovember 16, 2012January 24, 2013
    Citizens First National BankPrincetonIL3731Heartland Bank and Trust CompanyNovember 2, 2012January 24, 2013
    Heritage Bank of FloridaLutzFL35009Centennial BankNovember 2, 2012January 24, 2013
    NOVA BankBerwynPA27148No AcquirerOctober 26, 2012January 24, 2013
    Excel BankSedaliaMO19189Simmons First National BankOctober 19, 2012January 24, 2013
    First East Side Savings BankTamaracFL28144Stearns Bank N.A.October 19, 2012January 24, 2013
    GulfSouth Private BankDestinFL58073SmartBankOctober 19, 2012January 24, 2013
    First United BankCreteIL20685Old Plank Trail Community Bank, National AssociationSeptember 28, 2012November 15, 2012
    Truman BankSt. LouisMO27316Simmons First National BankSeptember 14, 2012December 17, 2012
    First Commercial BankBloomingtonMN35246Republic Bank & Trust CompanySeptember 7, 2012December 17, 2012
    Waukegan Savings BankWaukeganIL28243First Midwest BankAugust 3, 2012October 11, 2012
    Jasper Banking CompanyJasperGA16240Stearns Bank N.A.July 27, 2012December 17, 2012
    Second Federal Savings and Loan Association of ChicagoChicagoIL27986Hinsdale Bank & Trust CompanyJuly 20, 2012January 14, 2013
    Heartland BankLeawoodKS1361Metcalf BankJuly 20, 2012December 17, 2012
    First Cherokee State BankWoodstockGA32711Community & Southern BankJuly 20, 2012October 31, 2012
    Georgia Trust BankBufordGA57847Community & Southern BankJuly 20, 2012December 17, 2012
    The Royal Palm Bank of FloridaNaplesFL57096First National Bank of the Gulf CoastJuly 20, 2012January 7, 2013
    Glasgow Savings BankGlasgowMO1056Regional Missouri BankJuly 13, 2012October 11, 2012
    Montgomery Bank & TrustAileyGA19498Ameris BankJuly 6, 2012October 31, 2012
    The Farmers Bank of LynchburgLynchburgTN1690Clayton Bank and TrustJune 15, 2012October 31, 2012
    Security Exchange BankMariettaGA35299Fidelity BankJune 15, 2012October 10, 2012
    Putnam State BankPalatkaFL27405Harbor Community BankJune 15, 2012October 10, 2012
    Waccamaw BankWhitevilleNC34515First Community BankJune 8, 2012November 8, 2012
    Farmers' and Traders' State BankShabbonaIL9257First State BankJune 8, 2012October 10, 2012
    Carolina Federal Savings BankCharlestonSC35372Bank of North CarolinaJune 8, 2012October 31, 2012
    First Capital BankKingfisherOK416F & M BankJune 8, 2012October 10, 2012
    Alabama Trust Bank, National AssociationSylacaugaAL35224Southern States BankMay 18, 2012May 20, 2013
    Security Bank, National AssociationNorth LauderdaleFL23156Banesco USAMay 4, 2012October 31, 2012
    Palm Desert National BankPalm DesertCA23632Pacific Premier BankApril 27, 2012May 17, 2013
    Plantation Federal BankPawleys IslandSC32503First Federal BankApril 27, 2012May 17, 2013
    Inter Savings Bank, fsb D/B/A InterBank, fsbMaple GroveMN31495Great Southern BankApril 27, 2012May 17, 2013
    HarVest Bank of MarylandGaithersburgMD57766SonabankApril 27, 2012May 17, 2013
    Bank of the Eastern ShoreCambridgeMD26759No AcquirerApril 27, 2012October 17, 2012
    Fort Lee Federal Savings Bank, FSBFort LeeNJ35527Alma BankApril 20, 2012May 17, 2013
    Fidelity BankDearbornMI33883The Huntington National BankMarch 30, 2012May 16, 2013
    Premier BankWilmetteIL35419International Bank of ChicagoMarch 23, 2012October 17, 2012
    Covenant Bank & TrustRock SpringGA58068Stearns Bank, N.A.March 23, 2012October 31, 2012
    New City BankChicagoIL57597No AcquirerMarch 9, 2012October 29, 2012
    Global Commerce BankDoravilleGA34046Metro City BankMarch 2, 2012October 31, 2012
    Home Savings of AmericaLittle FallsMN29178No AcquirerFebruary 24, 2012December 17, 2012
    Central Bank of GeorgiaEllavilleGA5687Ameris BankFebruary 24, 2012August 9, 2012
    SCB BankShelbyvilleIN29761First Merchants Bank, National AssociationFebruary 10, 2012March 25, 2013
    Charter National Bank and TrustHoffman EstatesIL23187Barrington Bank & Trust Company, National AssociationFebruary 10, 2012March 25, 2013
    BankEastKnoxvilleTN19869U.S.Bank National AssociationJanuary 27, 2012March 8, 2013
    Patriot Bank MinnesotaForest LakeMN34823First Resource BankJanuary 27, 2012September 12, 2012
    Tennessee Commerce BankFranklinTN35296Republic Bank & Trust CompanyJanuary 27, 2012November 20, 2012
    First Guaranty Bank and Trust Company of JacksonvilleJacksonvilleFL16579CenterState Bank of Florida, N.A.January 27, 2012September 12, 2012
    American Eagle Savings BankBoothwynPA31581Capital Bank, N.A.January 20, 2012January 25, 2013
    The First State BankStockbridgeGA19252Hamilton State BankJanuary 20, 2012January 25, 2013
    Central Florida State BankBelleviewFL57186CenterState Bank of Florida, N.A.January 20, 2012January 25, 2013
    Western National BankPhoenixAZ57917Washington FederalDecember 16, 2011August 13, 2012
    Premier Community Bank of the Emerald CoastCrestviewFL58343Summit BankDecember 16, 2011September 12, 2012
    Central Progressive BankLacombeLA19657First NBC BankNovember 18, 2011August 13, 2012
    Polk County BankJohnstonIA14194Grinnell State BankNovember 18, 2011August 15, 2012
    Community Bank of RockmartRockmartGA57860Century Bank of GeorgiaNovember 10, 2011August 13, 2012
    SunFirst BankSaint GeorgeUT57087Cache Valley BankNovember 4, 2011November 16, 2012
    Mid City Bank, Inc.OmahaNE19397Premier BankNovember 4, 2011August 15, 2012
    All American BankDes PlainesIL57759International Bank of ChicagoOctober 28, 2011August 15, 2012
    Community Banks of ColoradoGreenwood VillageCO21132Bank Midwest, N.A.October 21, 2011January 2, 2013
    Community Capital BankJonesboroGA57036State Bank and Trust CompanyOctober 21, 2011November 8, 2012
    Decatur First BankDecaturGA34392Fidelity BankOctober 21, 2011November 8, 2012
    Old Harbor BankClearwaterFL575371st United BankOctober 21, 2011November 8, 2012
    Country BankAledoIL35395Blackhawk Bank & TrustOctober 14, 2011August 15, 2012
    First State BankCranfordNJ58046Northfield BankOctober 14, 2011November 8, 2012
    Blue Ridge Savings Bank, Inc.AshevilleNC32347Bank of North CarolinaOctober 14, 2011November 8, 2012
    Piedmont Community BankGrayGA57256State Bank and Trust CompanyOctober 14, 2011January 22, 2013
    Sun Security BankEllingtonMO20115Great Southern BankOctober 7, 2011November 7, 2012
    The RiverBankWyomingMN10216Central BankOctober 7, 2011November 7, 2012
    First International BankPlanoTX33513American First National BankSeptember 30, 2011October 9, 2012
    Citizens Bank of Northern CaliforniaNevada CityCA33983Tri Counties BankSeptember 23, 2011October 9, 2012
    Bank of the CommonwealthNorfolkVA20408Southern Bank and Trust CompanySeptember 23, 2011October 9, 2012
    The First National Bank of FloridaMiltonFL25155CharterBankSeptember 9, 2011September 6, 2012
    CreekSide BankWoodstockGA58226Georgia Commerce BankSeptember 2, 2011September 6, 2012
    Patriot Bank of GeorgiaCummingGA58273Georgia Commerce BankSeptember 2, 2011November 2, 2012
    First Choice BankGenevaIL57212Inland Bank & TrustAugust 19, 2011August 15, 2012
    First Southern National BankStatesboroGA57239Heritage Bank of the SouthAugust 19, 2011November 2, 2012
    Lydian Private BankPalm BeachFL35356Sabadell United Bank, N.A.August 19, 2011November 2, 2012
    Public Savings BankHuntingdon ValleyPA34130Capital Bank, N.A.August 18, 2011August 15, 2012
    The First National Bank of OlatheOlatheKS4744Enterprise Bank & TrustAugust 12, 2011August 23, 2012
    Bank of WhitmanColfaxWA22528Columbia State BankAugust 5, 2011August 16, 2012
    Bank of ShorewoodShorewoodIL22637Heartland Bank and Trust CompanyAugust 5, 2011August 16, 2012
    Integra Bank National AssociationEvansvilleIN4392Old National BankJuly 29, 2011August 16, 2012
    BankMeridian, N.A.ColumbiaSC58222SCBT National AssociationJuly 29, 2011November 2, 2012
    Virginia Business BankRichmondVA58283Xenith BankJuly 29, 2011October 9, 2012
    Bank of ChoiceGreeleyCO2994Bank Midwest, N.A.July 22, 2011September 12, 2012
    LandMark Bank of FloridaSarasotaFL35244American Momentum BankJuly 22, 2011November 2, 2012
    Southshore Community BankApollo BeachFL58056American Momentum BankJuly 22, 2011November 2, 2012
    Summit BankPrescottAZ57442The Foothills BankJuly 15, 2011August 16, 2012
    First Peoples BankPort St. LucieFL34870Premier American Bank, N.A.July 15, 2011November 2, 2012
    High Trust BankStockbridgeGA19554Ameris BankJuly 15, 2011November 2, 2012
    One Georgia BankAtlantaGA58238Ameris BankJuly 15, 2011November 2, 2012
    Signature BankWindsorCO57835Points West Community BankJuly 8, 2011October 26, 2012
    Colorado Capital BankCastle RockCO34522First-Citizens Bank & Trust CompanyJuly 8, 2011January 15, 2013
    First Chicago Bank & TrustChicagoIL27935Northbrook Bank & Trust CompanyJuly 8, 2011September 9, 2012
    Mountain Heritage BankClaytonGA57593First American Bank and Trust CompanyJune 24, 2011November 2, 2012
    First Commercial Bank of Tampa BayTampaFL27583Stonegate BankJune 17, 2011November 2, 2012
    McIntosh State BankJacksonGA19237Hamilton State BankJune 17, 2011November 2, 2012
    Atlantic Bank and TrustCharlestonSC58420First Citizens Bank and Trust Company, Inc.June 3, 2011October 31, 2012
    First Heritage BankSnohomishWA23626Columbia State BankMay 27, 2011January 28, 2013
    Summit BankBurlingtonWA513Columbia State BankMay 20, 2011January 22, 2013
    First Georgia Banking CompanyFranklinGA57647CertusBank, National AssociationMay 20, 2011November 13, 2012
    Atlantic Southern BankMaconGA57213CertusBank, National AssociationMay 20, 2011October 31, 2012
    Coastal BankCocoa BeachFL34898Florida Community Bank, a division of Premier American Bank, N.A.May 6, 2011November 30, 2012
    Community Central BankMount ClemensMI34234Talmer Bank & TrustApril 29, 2011August 16, 2012
    The Park Avenue BankValdostaGA19797Bank of the OzarksApril 29, 2011November 30, 2012
    First Choice Community BankDallasGA58539Bank of the OzarksApril 29, 2011January 22, 2013
    Cortez Community BankBrooksvilleFL57625Florida Community Bank, a division of Premier American Bank, N.A.April 29, 2011November 30, 2012
    First National Bank of Central FloridaWinter ParkFL26297Florida Community Bank, a division of Premier American Bank, N.A.April 29, 2011November 30, 2012
    Heritage Banking GroupCarthageMS14273Trustmark National BankApril 15, 2011November 30, 2012
    Rosemount National BankRosemountMN24099Central BankApril 15, 2011August 16, 2012
    Superior BankBirminghamAL17750Superior Bank, National AssociationApril 15, 2011November 30, 2012
    Nexity BankBirminghamAL19794AloStar Bank of CommerceApril 15, 2011September 4, 2012
    New Horizons BankEast EllijayGA57705Citizens South BankApril 15, 2011August 16, 2012
    Bartow County BankCartersvilleGA21495Hamilton State BankApril 15, 2011January 22, 2013
    Nevada Commerce BankLas VegasNV35418City National BankApril 8, 2011September 9, 2012
    Western Springs National Bank and TrustWestern SpringsIL10086Heartland Bank and Trust CompanyApril 8, 2011January 22, 2013
    The Bank of CommerceWood DaleIL34292Advantage National Bank GroupMarch 25, 2011January 22, 2013
    Legacy BankMilwaukeeWI34818Seaway Bank and Trust CompanyMarch 11, 2011September 12, 2012
    First National Bank of DavisDavisOK4077The Pauls Valley National BankMarch 11, 2011August 20, 2012
    Valley Community BankSt. CharlesIL34187First State BankFebruary 25, 2011September 12, 2012
    San Luis Trust Bank, FSBSan Luis ObispoCA34783First California BankFebruary 18, 2011August 20, 2012
    Charter Oak BankNapaCA57855Bank of MarinFebruary 18, 2011September 12, 2012
    Citizens Bank of EffinghamSpringfieldGA34601Heritage Bank of the SouthFebruary 18, 2011November 2, 2012
    Habersham BankClarkesvilleGA151SCBT National AssociationFebruary 18, 2011November 2, 2012
    Canyon National BankPalm SpringsCA34692Pacific Premier BankFebruary 11, 2011September 12, 2012
    Badger State BankCassvilleWI13272Royal BankFebruary 11, 2011September 12, 2012
    Peoples State BankHamtramckMI14939First Michigan BankFebruary 11, 2011January 22, 2013
    Sunshine State Community BankPort OrangeFL35478Premier American Bank, N.A.February 11, 2011November 2, 2012
    Community First Bank ChicagoChicagoIL57948Northbrook Bank & Trust CompanyFebruary 4, 2011August 20, 2012
    North Georgia BankWatkinsvilleGA35242BankSouthFebruary 4, 2011November 2, 2012
    American Trust BankRoswellGA57432Renasant BankFebruary 4, 2011October 31, 2012
    First Community BankTaosNM12261U.S. Bank, N.A.January 28, 2011September 12, 2012
    FirsTier BankLouisvilleCO57646No AcquirerJanuary 28, 2011September 12, 2012
    Evergreen State BankStoughtonWI5328McFarland State BankJanuary 28, 2011September 12, 2012
    The First State BankCamargoOK2303Bank 7January 28, 2011September 12, 2012
    United Western BankDenverCO31293First-Citizens Bank & Trust CompanyJanuary 21, 2011September 12, 2012
    The Bank of AshevilleAshevilleNC34516First BankJanuary 21, 2011November 2, 2012
    CommunitySouth Bank & TrustEasleySC57868CertusBank, National AssociationJanuary 21, 2011November 2, 2012
    Enterprise Banking CompanyMcDonoughGA19758No AcquirerJanuary 21, 2011November 2, 2012
    Oglethorpe BankBrunswickGA57440Bank of the OzarksJanuary 14, 2011November 2, 2012
    Legacy BankScottsdaleAZ57820Enterprise Bank & TrustJanuary 7, 2011September 12, 2012
    First Commercial Bank of FloridaOrlandoFL34965First Southern BankJanuary 7, 2011November 2, 2012
    Community National BankLino LakesMN23306Farmers & Merchants Savings BankDecember 17, 2010August 20, 2012
    First Southern BankBatesvilleAR58052Southern BankDecember 17, 2010August 20, 2012
    United Americas Bank, N.A.AtlantaGA35065State Bank and Trust CompanyDecember 17, 2010November 2, 2012
    Appalachian Community Bank, FSBMcCaysvilleGA58495Peoples Bank of East TennesseeDecember 17, 2010October 31, 2012
    Chestatee State BankDawsonvilleGA34578Bank of the OzarksDecember 17, 2010November 2, 2012
    The Bank of Miami,N.A.Coral GablesFL190401st United BankDecember 17, 2010November 2, 2012
    Earthstar BankSouthamptonPA35561Polonia BankDecember 10, 2010August 20, 2012
    Paramount BankFarmington HillsMI34673Level One BankDecember 10, 2010August 20, 2012
    First Banking CenterBurlingtonWI5287First Michigan BankNovember 19, 2010August 20, 2012
    Allegiance Bank of North AmericaBala CynwydPA35078VIST BankNovember 19, 2010August 20, 2012
    Gulf State Community BankCarrabelleFL20340Centennial BankNovember 19, 2010November 2, 2012
    Copper Star BankScottsdaleAZ35463Stearns Bank, N.A.November 12, 2010August 20, 2012
    Darby Bank & Trust Co.VidaliaGA14580Ameris BankNovember 12, 2010January 15, 2013
    Tifton Banking CompanyTiftonGA57831Ameris BankNovember 12, 2010November 2, 2012
    First Vietnamese American Bank
    In Vietnamese
    WestminsterCA57885Grandpoint BankNovember 5, 2010September 12, 2012
    Pierce Commercial BankTacomaWA34411Heritage BankNovember 5, 2010August 20, 2012
    Western Commercial BankWoodland HillsCA58087First California BankNovember 5, 2010September 12, 2012
    K BankRandallstownMD31263Manufacturers and Traders Trust Company (M&T Bank)November 5, 2010August 20, 2012
    First Arizona Savings, A FSBScottsdaleAZ32582No AcquirerOctober 22, 2010August 20, 2012
    Hillcrest BankOverland ParkKS22173Hillcrest Bank, N.A.October 22, 2010August 20, 2012
    First Suburban National BankMaywoodIL16089Seaway Bank and Trust CompanyOctober 22, 2010August 20, 2012
    The First National Bank of BarnesvilleBarnesvilleGA2119United BankOctober 22, 2010November 2, 2012
    The Gordon BankGordonGA33904Morris BankOctober 22, 2010November 2, 2012
    Progress Bank of FloridaTampaFL32251Bay Cities BankOctober 22, 2010November 2, 2012
    First Bank of JacksonvilleJacksonvilleFL27573Ameris BankOctober 22, 2010November 2, 2012
    Premier BankJefferson CityMO34016Providence BankOctober 15, 2010August 20, 2012
    WestBridge Bank and Trust CompanyChesterfieldMO58205Midland States BankOctober 15, 2010August 20, 2012
    Security Savings Bank, F.S.B.OlatheKS30898Simmons First National BankOctober 15, 2010August 20, 2012
    Shoreline BankShorelineWA35250GBC International BankOctober 1, 2010August 20, 2012
    Wakulla BankCrawfordvilleFL21777Centennial BankOctober 1, 2010November 2, 2012
    North County BankArlingtonWA35053Whidbey Island BankSeptember 24, 2010August 20, 2012
    Haven Trust Bank FloridaPonte Vedra BeachFL58308First Southern BankSeptember 24, 2010November 5, 2012
    Maritime Savings BankWest AllisWI28612North Shore Bank, FSBSeptember 17, 2010August 20, 2012
    Bramble Savings BankMilfordOH27808Foundation BankSeptember 17, 2010August 20, 2012
    The Peoples BankWinderGA182Community & Southern BankSeptember 17, 2010November 5, 2012
    First Commerce Community BankDouglasvilleGA57448Community & Southern BankSeptember 17, 2010January 15, 2013
    Bank of EllijayEllijayGA58197Community & Southern BankSeptember 17, 2010January 15, 2013
    ISN BankCherry HillNJ57107Customers BankSeptember 17, 2010August 22, 2012
    Horizon BankBradentonFL35061Bank of the OzarksSeptember 10, 2010November 5, 2012
    Sonoma Valley BankSonomaCA27259Westamerica BankAugust 20, 2010September 12, 2012
    Los Padres BankSolvangCA32165Pacific Western BankAugust 20, 2010September 12, 2012
    Butte Community BankChicoCA33219Rabobank, N.A.August 20, 2010September 12, 2012
    Pacific State BankStocktonCA27090Rabobank, N.A.August 20, 2010September 12, 2012
    ShoreBankChicagoIL15640Urban Partnership BankAugust 20, 2010May 16, 2013
    Imperial Savings and Loan AssociationMartinsvilleVA31623River Community Bank, N.A.August 20, 2010August 24, 2012
    Independent National BankOcalaFL27344CenterState Bank of Florida, N.A.August 20, 2010November 5, 2012
    Community National Bank at BartowBartowFL25266CenterState Bank of Florida, N.A.August 20, 2010November 5, 2012
    Palos Bank and Trust CompanyPalos HeightsIL17599First Midwest BankAugust 13, 2010August 22, 2012
    Ravenswood BankChicagoIL34231Northbrook Bank & Trust CompanyAugust 6, 2010August 22, 2012
    LibertyBankEugeneOR31964Home Federal BankJuly 30, 2010August 22, 2012
    The Cowlitz BankLongviewWA22643Heritage BankJuly 30, 2010August 22, 2012
    Coastal Community BankPanama City BeachFL9619Centennial BankJuly 30, 2010November 5, 2012
    Bayside Savings BankPort Saint JoeFL57669Centennial BankJuly 30, 2010November 5, 2012
    Northwest Bank & TrustAcworthGA57658State Bank and Trust CompanyJuly 30, 2010November 5, 2012
    Home Valley BankCave JunctionOR23181South Valley Bank & TrustJuly 23, 2010September 12, 2012
    SouthwestUSA BankLas VegasNV35434Plaza BankJuly 23, 2010August 22, 2012
    Community Security BankNew PragueMN34486RoundbankJuly 23, 2010September 12, 2012
    Thunder BankSylvan GroveKS10506The Bennington State BankJuly 23, 2010September 13, 2012
    Williamsburg First National BankKingstreeSC17837First Citizens Bank and Trust Company, Inc.July 23, 2010November 5, 2012
    Crescent Bank and Trust CompanyJasperGA27559Renasant BankJuly 23, 2010November 5, 2012
    Sterling BankLantanaFL32536IBERIABANKJuly 23, 2010November 5, 2012
    Mainstreet Savings Bank, FSBHastingsMI28136Commercial BankJuly 16, 2010September 13, 2012
    Olde Cypress Community BankClewistonFL28864CenterState Bank of Florida, N.A.July 16, 2010November 5, 2012
    Turnberry BankAventuraFL32280NAFH National BankJuly 16, 2010November 5, 2012
    Metro Bank of Dade CountyMiamiFL25172NAFH National BankJuly 16, 2010November 5, 2012
    First National Bank of the SouthSpartanburgSC35383NAFH National BankJuly 16, 2010November 5, 2012
    Woodlands BankBlufftonSC32571Bank of the OzarksJuly 16, 2010November 5, 2012
    Home National BankBlackwellOK11636RCB BankJuly 9, 2010December 10, 2012
    USA BankPort ChesterNY58072New Century BankJuly 9, 2010September 14, 2012
    Ideal Federal Savings BankBaltimoreMD32456No AcquirerJuly 9, 2010September 14, 2012
    Bay National BankBaltimoreMD35462Bay Bank, FSBJuly 9, 2010January 15, 2013
    High Desert State BankAlbuquerqueNM35279First American BankJune 25, 2010September 14, 2012
    First National BankSavannahGA34152The Savannah Bank, N.A.June 25, 2010November 5, 2012
    Peninsula BankEnglewoodFL26563Premier American Bank, N.A.June 25, 2010November 5, 2012
    Nevada Security BankRenoNV57110Umpqua BankJune 18, 2010August 23, 2012
    Washington First International BankSeattleWA32955East West BankJune 11, 2010September 14, 2012
    TierOne BankLincolnNE29341Great Western BankJune 4, 2010September 14, 2012
    Arcola Homestead Savings BankArcolaIL31813No AcquirerJune 4, 2010September 14, 2012
    First National BankRosedaleMS15814The Jefferson BankJune 4, 2010November 5, 2012
    Sun West BankLas VegasNV34785City National BankMay 28, 2010September 14, 2012
    Granite Community Bank, NAGranite BayCA57315Tri Counties BankMay 28, 2010September 14, 2012
    Bank of Florida - TampaTampaFL57814EverBankMay 28, 2010November 5, 2012
    Bank of Florida - SouthwestNaplesFL35106EverBankMay 28, 2010November 5, 2012
    Bank of Florida - SoutheastFort LauderdaleFL57360EverBankMay 28, 2010November 5, 2012
    Pinehurst BankSaint PaulMN57735Coulee BankMay 21, 2010October 26, 2012
    Midwest Bank and Trust CompanyElmwood ParkIL18117FirstMerit Bank, N.A.May 14, 2010August 23, 2012
    Southwest Community BankSpringfieldMO34255Simmons First National BankMay 14, 2010August 23, 2012
    New Liberty BankPlymouthMI35586Bank of Ann ArborMay 14, 2010August 23, 2012
    Satilla Community BankSaint MarysGA35114Ameris BankMay 14, 2010November 5, 2012
    1st Pacific Bank of CaliforniaSan DiegoCA35517City National BankMay 7, 2010December 13, 2012
    Towne Bank of ArizonaMesaAZ57697Commerce Bank of ArizonaMay 7, 2010August 23, 2012
    Access BankChamplinMN16476PrinsBankMay 7, 2010August 23, 2012
    The Bank of BonifayBonifayFL14246First Federal Bank of FloridaMay 7, 2010November 5, 2012
    Frontier BankEverettWA22710Union Bank, N.A.April 30, 2010January 15, 2013
    BC National BanksButlerMO17792Community First BankApril 30, 2010August 23, 2012
    Champion BankCreve CoeurMO58362BankLibertyApril 30, 2010August 23, 2012
    CF BancorpPort HuronMI30005First Michigan BankApril 30, 2010January 15, 2013
    Westernbank Puerto Rico
    En Espanol
    MayaguezPR31027Banco Popular de Puerto RicoApril 30, 2010November 5, 2012
    R-G Premier Bank of Puerto Rico
    En Espanol
    Hato ReyPR32185Scotiabank de Puerto RicoApril 30, 2010November 5, 2012
    Eurobank
    En Espanol
    San JuanPR27150Oriental Bank and TrustApril 30, 2010November 5, 2012
    Wheatland BankNapervilleIL58429Wheaton Bank & TrustApril 23, 2010August 23, 2012
    Peotone Bank and Trust CompanyPeotoneIL10888First Midwest BankApril 23, 2010August 23, 2012
    Lincoln Park Savings BankChicagoIL30600Northbrook Bank & Trust CompanyApril 23, 2010August 23, 2012
    New Century BankChicagoIL34821MB Financial Bank, N.A.April 23, 2010August 23, 2012
    Citizens Bank and Trust Company of ChicagoChicagoIL34658Republic Bank of ChicagoApril 23, 2010August 23, 2012
    Broadway BankChicagoIL22853MB Financial Bank, N.A.April 23, 2010August 23, 2012
    Amcore Bank, National AssociationRockfordIL3735Harris N.A.April 23, 2010August 23, 2012
    City BankLynnwoodWA21521Whidbey Island BankApril 16, 2010September 14, 2012
    Tamalpais BankSan RafaelCA33493Union Bank, N.A.April 16, 2010August 23, 2012
    Innovative BankOaklandCA23876Center BankApril 16, 2010August 23, 2012
    Butler BankLowellMA26619People's United BankApril 16, 2010August 23, 2012
    Riverside National Bank of FloridaFort PierceFL24067TD Bank, N.A.April 16, 2010November 5, 2012
    AmericanFirst BankClermontFL57724TD Bank, N.A.April 16, 2010October 31, 2012
    First Federal Bank of North FloridaPalatkaFL28886TD Bank, N.A.April 16, 2010January 15, 2013
    Lakeside Community BankSterling HeightsMI34878No AcquirerApril 16, 2010August 23, 2012
    Beach First National BankMyrtle BeachSC34242Bank of North CarolinaApril 9, 2010November 5, 2012
    Desert Hills BankPhoenixAZ57060New York Community BankMarch 26, 2010August 23, 2012
    Unity National BankCartersvilleGA34678Bank of the OzarksMarch 26, 2010September 14, 2012
    Key West BankKey WestFL34684Centennial BankMarch 26, 2010August 23, 2012
    McIntosh Commercial BankCarrolltonGA57399CharterBankMarch 26, 2010August 23, 2012
    State Bank of AuroraAuroraMN8221Northern State BankMarch 19, 2010August 23, 2012
    First Lowndes BankFort DepositAL24957First Citizens BankMarch 19, 2010August 23, 2012
    Bank of HiawasseeHiawasseeGA10054Citizens South BankMarch 19, 2010August 23, 2012
    Appalachian Community BankEllijayGA33989Community & Southern BankMarch 19, 2010October 31, 2012
    Advanta Bank Corp.DraperUT33535No AcquirerMarch 19, 2010September 14, 2012
    Century Security BankDuluthGA58104Bank of UpsonMarch 19, 2010August 23, 2012
    American National BankParmaOH18806The National Bank and Trust CompanyMarch 19, 2010August 23, 2012
    Statewide BankCovingtonLA29561Home BankMarch 12, 2010August 23, 2012
    Old Southern BankOrlandoFL58182Centennial BankMarch 12, 2010August 23, 2012
    The Park Avenue BankNew YorkNY27096Valley National BankMarch 12, 2010August 23, 2012
    LibertyPointe BankNew YorkNY58071Valley National BankMarch 11, 2010August 23, 2012
    Centennial BankOgdenUT34430No AcquirerMarch 5, 2010September 14, 2012
    Waterfield BankGermantownMD34976No AcquirerMarch 5, 2010August 23, 2012
    Bank of IllinoisNormalIL9268Heartland Bank and Trust CompanyMarch 5, 2010August 23, 2012
    Sun American BankBoca RatonFL27126First-Citizens Bank & Trust CompanyMarch 5, 2010August 23, 2012
    Rainier Pacific BankTacomaWA38129Umpqua BankFebruary 26, 2010August 23, 2012
    Carson River Community BankCarson CityNV58352Heritage Bank of NevadaFebruary 26, 2010January 15, 2013
    La Jolla Bank, FSBLa JollaCA32423OneWest Bank, FSBFebruary 19, 2010August 24, 2012
    George Washington Savings BankOrland ParkIL29952FirstMerit Bank, N.A.February 19, 2010August 24, 2012
    The La Coste National BankLa CosteTX3287Community National BankFebruary 19, 2010September 14, 2012
    Marco Community BankMarco IslandFL57586Mutual of Omaha BankFebruary 19, 2010August 24, 2012
    1st American State Bank of MinnesotaHancockMN15448Community Development Bank, FSBFebruary 5, 2010August 24, 2012
    American Marine BankBainbridge IslandWA16730Columbia State BankJanuary 29, 2010August 24, 2012
    First Regional BankLos AngelesCA23011First-Citizens Bank & Trust CompanyJanuary 29, 2010August 24, 2012
    Community Bank and TrustCorneliaGA5702SCBT National AssociationJanuary 29, 2010January 15, 2013
    Marshall Bank, N.A.HallockMN16133United Valley BankJanuary 29, 2010August 23, 2012
    Florida Community BankImmokaleeFL5672Premier American Bank, N.A.January 29, 2010January 15, 2013
    First National Bank of GeorgiaCarrolltonGA16480Community & Southern BankJanuary 29, 2010December 13, 2012
    Columbia River BankThe DallesOR22469Columbia State BankJanuary 22, 2010September 14, 2012
    Evergreen BankSeattleWA20501Umpqua BankJanuary 22, 2010January 15, 2013
    Charter BankSanta FeNM32498Charter BankJanuary 22, 2010August 23, 2012
    Bank of LeetonLeetonMO8265Sunflower Bank, N.A.January 22, 2010January 15, 2013
    Premier American BankMiamiFL57147Premier American Bank, N.A.January 22, 2010December 13, 2012
    Barnes Banking CompanyKaysvilleUT1252No AcquirerJanuary 15, 2010August 23, 2012
    St. Stephen State BankSt. StephenMN17522First State Bank of St. JosephJanuary 15, 2010August 23, 2012
    Town Community Bank & TrustAntiochIL34705First American BankJanuary 15, 2010August 23, 2012
    Horizon BankBellinghamWA22977Washington Federal Savings and Loan AssociationJanuary 8, 2010August 23, 2012
    First Federal Bank of California, F.S.B.Santa MonicaCA28536OneWest Bank, FSBDecember 18, 2009August 23, 2012
    Imperial Capital BankLa JollaCA26348City National BankDecember 18, 2009September 5, 2012
    Independent Bankers' BankSpringfieldIL26820The Independent BankersBank (TIB)December 18, 2009August 23, 2012
    New South Federal Savings BankIrondaleAL32276Beal BankDecember 18, 2009August 23, 2012
    Citizens State BankNew BaltimoreMI1006No AcquirerDecember 18, 2009November 5, 2012
    Peoples First Community BankPanama CityFL32167Hancock BankDecember 18, 2009November 5, 2012
    RockBridge Commercial BankAtlantaGA58315No AcquirerDecember 18, 2009November 5, 2012
    SolutionsBankOverland ParkKS4731Arvest BankDecember 11, 2009August 23, 2012
    Valley Capital Bank, N.A.MesaAZ58399Enterprise Bank & TrustDecember 11, 2009August 23, 2012
    Republic Federal Bank, N.A.MiamiFL228461st United BankDecember 11, 2009November 5, 2012
    Greater Atlantic BankRestonVA32583SonabankDecember 4, 2009November 5, 2012
    Benchmark BankAuroraIL10440MB Financial Bank, N.A.December 4, 2009August 23, 2012
    AmTrust BankClevelandOH29776New York Community BankDecember 4, 2009November 5, 2012
    The Tattnall BankReidsvilleGA12080Heritage Bank of the SouthDecember 4, 2009November 5, 2012
    First Security National BankNorcrossGA26290State Bank and Trust CompanyDecember 4, 2009November 5, 2012
    The Buckhead Community BankAtlantaGA34663State Bank and Trust CompanyDecember 4, 2009November 5, 2012
    Commerce Bank of Southwest FloridaFort MyersFL58016Central BankNovember 20, 2009November 5, 2012
    Pacific Coast National BankSan ClementeCA57914Sunwest BankNovember 13, 2009August 22, 2012
    Orion BankNaplesFL22427IBERIABANKNovember 13, 2009November 5, 2012
    Century Bank, F.S.B.SarasotaFL32267IBERIABANKNovember 13, 2009August 22, 2012
    United Commercial BankSan FranciscoCA32469East West BankNovember 6, 2009November 5, 2012
    Gateway Bank of St. LouisSt. LouisMO19450Central Bank of Kansas CityNovember 6, 2009August 22, 2012
    Prosperan BankOakdaleMN35074Alerus Financial, N.A.November 6, 2009August 22, 2012
    Home Federal Savings BankDetroitMI30329Liberty Bank and Trust CompanyNovember 6, 2009August 22, 2012
    United Security BankSpartaGA22286Ameris BankNovember 6, 2009January 15, 2013
    North Houston BankHoustonTX18776U.S. Bank N.A.October 30, 2009August 22, 2012
    Madisonville State BankMadisonvilleTX33782U.S. Bank N.A.October 30, 2009August 22, 2012
    Citizens National BankTeagueTX25222U.S. Bank N.A.October 30, 2009August 22, 2012
    Park National BankChicagoIL11677U.S. Bank N.A.October 30, 2009August 22, 2012
    Pacific National BankSan FranciscoCA30006U.S. Bank N.A.October 30, 2009August 22, 2012
    California National BankLos AngelesCA34659U.S. Bank N.A.October 30, 2009September 5, 2012
    San Diego National BankSan DiegoCA23594U.S. Bank N.A.October 30, 2009August 22, 2012
    Community Bank of LemontLemontIL35291U.S. Bank N.A.October 30, 2009January 15, 2013
    Bank USA, N.A.PhoenixAZ32218U.S. Bank N.A.October 30, 2009August 22, 2012
    First DuPage BankWestmontIL35038First Midwest BankOctober 23, 2009August 22, 2012
    Riverview Community BankOtsegoMN57525Central BankOctober 23, 2009August 22, 2012
    Bank of ElmwoodRacineWI18321Tri City National BankOctober 23, 2009August 22, 2012
    Flagship National BankBradentonFL35044First Federal Bank of FloridaOctober 23, 2009August 22, 2012
    Hillcrest Bank FloridaNaplesFL58336Stonegate BankOctober 23, 2009August 22, 2012
    American United BankLawrencevilleGA57794Ameris BankOctober 23, 2009September 5, 2012
    Partners BankNaplesFL57959Stonegate BankOctober 23, 2009January 15, 2013
    San Joaquin BankBakersfieldCA23266Citizens Business BankOctober 16, 2009August 22, 2012
    Southern Colorado National BankPuebloCO57263Legacy BankOctober 2, 2009September 5, 2012
    Jennings State BankSpring GroveMN11416Central BankOctober 2, 2009August 21, 2012
    Warren BankWarrenMI34824The Huntington National BankOctober 2, 2009August 21, 2012
    Georgian BankAtlantaGA57151First Citizens Bank and Trust Company, Inc.September 25, 2009August 21, 2012
    Irwin Union Bank, F.S.B.LouisvilleKY57068First Financial Bank, N.A.September 18, 2009September 5, 2012
    Irwin Union Bank and Trust CompanyColumbusIN10100First Financial Bank, N.A.September 18, 2009August 21, 2012
    Venture BankLaceyWA22868First-Citizens Bank & Trust CompanySeptember 11, 2009August 21, 2012
    Brickwell Community BankWoodburyMN57736CorTrust Bank N.A.September 11, 2009January 15, 2013
    Corus Bank, N.A.ChicagoIL13693MB Financial Bank, N.A.September 11, 2009August 21, 2012
    First State BankFlagstaffAZ34875Sunwest BankSeptember 4, 2009January 15, 2013
    Platinum Community BankRolling MeadowsIL35030No AcquirerSeptember 4, 2009August 21, 2012
    Vantus BankSioux CityIN27732Great Southern BankSeptember 4, 2009August 21, 2012
    InBankOak ForestIL20203MB Financial Bank, N.A.September 4, 2009August 21, 2012
    First Bank of Kansas CityKansas CityMO25231Great American BankSeptember 4, 2009August 21, 2012
    Affinity BankVenturaCA27197Pacific Western BankAugust 28, 2009August 21, 2012
    Mainstreet BankForest LakeMN1909Central BankAugust 28, 2009August 21, 2012
    Bradford BankBaltimoreMD28312Manufacturers and Traders Trust Company (M&T Bank)August 28, 2009January 15, 2013
    Guaranty BankAustinTX32618BBVA CompassAugust 21, 2009August 21, 2012
    CapitalSouth BankBirminghamAL22130IBERIABANKAugust 21, 2009January 15, 2013
    First Coweta BankNewnanGA57702United BankAugust 21, 2009January 15, 2013
    ebankAtlantaGA34682Stearns Bank, N.A.August 21, 2009August 21, 2012
    Community Bank of NevadaLas VegasNV34043No AcquirerAugust 14, 2009August 21, 2012
    Community Bank of ArizonaPhoenixAZ57645MidFirst BankAugust 14, 2009August 21, 2012
    Union Bank, National AssociationGilbertAZ34485MidFirst BankAugust 14, 2009August 21, 2012
    Colonial BankMontgomeryAL9609Branch Banking & Trust Company, (BB&T)August 14, 2009September 5, 2012
    Dwelling House Savings and Loan AssociationPittsburghPA31559PNC Bank, N.A.August 14, 2009January 15, 2013
    Community First BankPrinevilleOR23268Home Federal BankAugust 7, 2009January 15, 2013
    Community National Bank of Sarasota CountyVeniceFL27183Stearns Bank, N.A.August 7, 2009August 20, 2012
    First State BankSarasotaFL27364Stearns Bank, N.A.August 7, 2009August 20, 2012
    Mutual BankHarveyIL18659United Central BankJuly 31, 2009August 20, 2012
    First BankAmericanoElizabethNJ34270Crown BankJuly 31, 2009August 20, 2012
    Peoples Community BankWest ChesterOH32288First Financial Bank, N.A.July 31, 2009August 20, 2012
    Integrity BankJupiterFL57604Stonegate BankJuly 31, 2009August 20, 2012
    First State Bank of AltusAltusOK9873Herring BankJuly 31, 2009August 20, 2012
    Security Bank of Jones CountyGrayGA8486State Bank and Trust CompanyJuly 24, 2009August 20, 2012
    Security Bank of Houston CountyPerryGA27048State Bank and Trust CompanyJuly 24, 2009August 20, 2012
    Security Bank of Bibb CountyMaconGA27367State Bank and Trust CompanyJuly 24, 2009August 20, 2012
    Security Bank of North MetroWoodstockGA57105State Bank and Trust CompanyJuly 24, 2009August 20, 2012
    Security Bank of North FultonAlpharettaGA57430State Bank and Trust CompanyJuly 24, 2009August 20, 2012
    Security Bank of Gwinnett CountySuwaneeGA57346State Bank and Trust CompanyJuly 24, 2009August 20, 2012
    Waterford Village BankWilliamsvilleNY58065Evans Bank, N.A.July 24, 2009August 20, 2012
    Temecula Valley BankTemeculaCA34341First-Citizens Bank & Trust CompanyJuly 17, 2009August 20, 2012
    Vineyard BankRancho CucamongaCA23556California Bank & TrustJuly 17, 2009August 20, 2012
    BankFirstSioux FallsSD34103Alerus Financial, N.A.July 17, 2009August 20, 2012
    First Piedmont BankWinderGA34594First American Bank and Trust CompanyJuly 17, 2009January 15, 2013
    Bank of WyomingThermopolisWY22754Central Bank & TrustJuly 10, 2009August 20, 2012
    Founders BankWorthIL18390The PrivateBank and Trust CompanyJuly 2, 2009August 20, 2012
    Millennium State Bank of TexasDallasTX57667State Bank of TexasJuly 2, 2009October 26, 2012
    First National Bank of DanvilleDanvilleIL3644First Financial Bank, N.A.July 2, 2009August 20, 2012
    Elizabeth State BankElizabethIL9262Galena State Bank and Trust CompanyJuly 2, 2009August 20, 2012
    Rock River BankOregonIL15302The Harvard State BankJuly 2, 2009August 20, 2012
    First State Bank of WinchesterWinchesterIL11710The First National Bank of BeardstownJuly 2, 2009August 20, 2012
    John Warner BankClintonIL12093State Bank of LincolnJuly 2, 2009August 20, 2012
    Mirae BankLos AngelesCA57332Wilshire State BankJune 26, 2009August 20, 2012
    MetroPacific BankIrvineCA57893Sunwest BankJune 26, 2009August 20, 2012
    Horizon BankPine CityMN9744Stearns Bank, N.A.June 26, 2009August 20, 2012
    Neighborhood Community BankNewnanGA35285CharterBankJune 26, 2009August 20, 2012
    Community Bank of West GeorgiaVilla RicaGA57436No AcquirerJune 26, 2009August 17, 2012
    First National Bank of AnthonyAnthonyKS4614Bank of KansasJune 19, 2009August 17, 2012
    Cooperative BankWilmingtonNC27837First BankJune 19, 2009August 17, 2012
    Southern Community BankFayettevilleGA35251United Community BankJune 19, 2009August 17, 2012
    Bank of LincolnwoodLincolnwoodIL17309Republic Bank of ChicagoJune 5, 2009August 17, 2012
    Citizens National BankMacombIL5757Morton Community BankMay 22, 2009September 4, 2012
    Strategic Capital BankChampaignIL35175Midland States BankMay 22, 2009September 4, 2012
    BankUnited, FSBCoral GablesFL32247BankUnitedMay 21, 2009August 17, 2012
    Westsound BankBremertonWA34843Kitsap BankMay 8, 2009September 4, 2012
    America West BankLaytonUT35461Cache Valley BankMay 1, 2009August 17, 2012
    Citizens Community BankRidgewoodNJ57563North Jersey Community BankMay 1, 2009September 4, 2012
    Silverton Bank, NAAtlantaGA26535No AcquirerMay 1, 2009August 17, 2012
    First Bank of IdahoKetchumID34396U.S. Bank, N.A.April 24, 2009August 17, 2012
    First Bank of Beverly HillsCalabasasCA32069No AcquirerApril 24, 2009September 4, 2012
    Michigan Heritage BankFarmington HillsMI34369Level One BankApril 24, 2009August 17, 2012
    American Southern BankKennesawGA57943Bank of North GeorgiaApril 24, 2009August 17, 2012
    Great Basin Bank of NevadaElkoNV33824Nevada State BankApril 17, 2009September 4, 2012
    American Sterling BankSugar CreekMO8266Metcalf BankApril 17, 2009August 31, 2012
    New Frontier BankGreeleyCO34881No AcquirerApril 10, 2009September 4, 2012
    Cape Fear BankWilmingtonNC34639First Federal Savings and Loan AssociationApril 10, 2009August 17, 2012
    Omni National BankAtlantaGA22238No AcquirerMarch 27, 2009August 17, 2012
    TeamBank, NAPaolaKS4754Great Southern BankMarch 20, 2009August 17, 2012
    Colorado National BankColorado SpringsCO18896Herring BankMarch 20, 2009August 17, 2012
    FirstCity BankStockbridgeGA18243No AcquirerMarch 20, 2009August 17, 2012
    Freedom Bank of GeorgiaCommerceGA57558Northeast Georgia BankMarch 6, 2009August 17, 2012
    Security Savings BankHendersonNV34820Bank of NevadaFebruary 27, 2009September 7, 2012
    Heritage Community BankGlenwoodIL20078MB Financial Bank, N.A.February 27, 2009August 17, 2012
    Silver Falls BankSilvertonOR35399Citizens BankFebruary 20, 2009August 17, 2012
    Pinnacle Bank of OregonBeavertonOR57342Washington Trust Bank of SpokaneFebruary 13, 2009August 17, 2012
    Corn Belt Bank & Trust Co.PittsfieldIL16500The Carlinville National BankFebruary 13, 2009August 17, 2012
    Riverside Bank of the Gulf CoastCape CoralFL34563TIB BankFebruary 13, 2009August 17, 2012
    Sherman County BankLoup CityNE5431Heritage BankFebruary 13, 2009August 17, 2012
    County BankMercedCA22574Westamerica BankFebruary 6, 2009September 4, 2012
    Alliance BankCulver CityCA23124California Bank & TrustFebruary 6, 2009August 16, 2012
    FirstBank Financial ServicesMcDonoughGA57017Regions BankFebruary 6, 2009August 16, 2012
    Ocala National BankOcalaFL26538CenterState Bank of Florida, N.A.January 30, 2009September 4, 2012
    Suburban FSBCroftonMD30763Bank of EssexJanuary 30, 2009August 16, 2012
    MagnetBankSalt Lake CityUT58001No AcquirerJanuary 30, 2009August 16, 2012
    1st Centennial BankRedlandsCA33025First California BankJanuary 23, 2009August 16, 2012
    Bank of Clark CountyVancouverWA34959Umpqua BankJanuary 16, 2009August 16, 2012
    National Bank of CommerceBerkeleyIL19733Republic Bank of ChicagoJanuary 16, 2009August 16, 2012
    Sanderson State Bank
    En Espanol
    SandersonTX11568The Pecos County State BankDecember 12, 2008September 4, 2012
    Haven Trust BankDuluthGA35379Branch Banking & Trust Company, (BB&T)December 12, 2008August 16, 2012
    First Georgia Community BankJacksonGA34301United BankDecember 5, 2008August 16, 2012
    PFF Bank & TrustPomonaCA28344U.S. Bank, N.A.November 21, 2008January 4, 2013
    Downey Savings & LoanNewport BeachCA30968U.S. Bank, N.A.November 21, 2008January 4, 2013
    Community BankLoganvilleGA16490Bank of EssexNovember 21, 2008September 4, 2012
    Security Pacific BankLos AngelesCA23595Pacific Western BankNovember 7, 2008August 28, 2012
    Franklin Bank, SSBHoustonTX26870Prosperity BankNovember 7, 2008August 16, 2012
    Freedom BankBradentonFL57930Fifth Third BankOctober 31, 2008August 16, 2012
    Alpha Bank & TrustAlpharettaGA58241Stearns Bank, N.A.October 24, 2008August 16, 2012
    Meridian BankEldredIL13789National BankOctober 10, 2008May 31, 2012
    Main Street BankNorthvilleMI57654Monroe Bank & TrustOctober 10, 2008August 16, 2012
    Washington Mutual Bank
    (Including its subsidiary Washington Mutual Bank FSB)
    HendersonNV32633JP Morgan Chase BankSeptember 25, 2008August 16, 2012
    AmeribankNorthforkWV6782The Citizens Savings Bank

    Pioneer Community Bank, Inc.
    September 19, 2008August 16, 2012
    Silver State Bank
    En Espanol
    HendersonNV34194Nevada State BankSeptember 5, 2008August 16, 2012
    Integrity BankAlpharettaGA35469Regions BankAugust 29, 2008August 16, 2012
    Columbian Bank & TrustTopekaKS22728Citizens Bank & TrustAugust 22, 2008August 16, 2012
    First Priority BankBradentonFL57523SunTrust BankAugust 1, 2008August 16, 2012
    First Heritage Bank, NANewport BeachCA57961Mutual of Omaha BankJuly 25, 2008August 28, 2012
    First National Bank of NevadaRenoNV27011Mutual of Omaha BankJuly 25, 2008August 28, 2012
    IndyMac BankPasadenaCA29730OneWest Bank, FSBJuly 11, 2008August 28, 2012
    First Integrity Bank, NAStaplesMN12736First International Bank and TrustMay 30, 2008August 28, 2012
    ANB Financial, NABentonvilleAR33901Pulaski Bank and Trust CompanyMay 9, 2008August 28, 2012
    Hume BankHumeMO1971Security BankMarch 7, 2008August 28, 2012
    Douglass National BankKansas CityMO24660Liberty Bank and Trust CompanyJanuary 25, 2008October 26, 2012
    Miami Valley BankLakeviewOH16848The Citizens Banking CompanyOctober 4, 2007August 28, 2012
    NetBankAlpharettaGA32575ING DIRECTSeptember 28, 2007August 28, 2012
    Metropolitan Savings BankPittsburghPA35353Allegheny Valley Bank of PittsburghFebruary 2, 2007October 27, 2010
    Bank of EphraimEphraimUT1249Far West BankJune 25, 2004April 9, 2008
    Reliance BankWhite PlainsNY26778Union State BankMarch 19, 2004April 9, 2008
    Guaranty National Bank of TallahasseeTallahasseeFL26838Hancock Bank of FloridaMarch 12, 2004June 5, 2012
    Dollar Savings BankNewarkNJ31330No AcquirerFebruary 14, 2004April 9, 2008
    Pulaski Savings BankPhiladelphiaPA27203Earthstar BankNovember 14, 2003July 22, 2005
    First National Bank of BlanchardvilleBlanchardvilleWI11639The Park BankMay 9, 2003June 5, 2012
    Southern Pacific BankTorranceCA27094Beal BankFebruary 7, 2003October 20, 2008
    Farmers Bank of CheneyvilleCheneyvilleLA16445Sabine State Bank & TrustDecember 17, 2002October 20, 2004
    Bank of AlamoAlamoTN9961No AcquirerNovember 8, 2002March 18, 2005
    AmTrade International Bank
    En Espanol
    AtlantaGA33784No AcquirerSeptember 30, 2002September 11, 2006
    Universal Federal Savings BankChicagoIL29355Chicago Community BankJune 27, 2002April 9, 2008
    Connecticut Bank of CommerceStamfordCT19183Hudson United BankJune 26, 2002February 14, 2012
    New Century BankShelby TownshipMI34979No AcquirerMarch 28, 2002March 18, 2005
    Net 1st National BankBoca RatonFL26652Bank Leumi USAMarch 1, 2002April 9, 2008
    NextBank, NAPhoenixAZ22314No AcquirerFebruary 7, 2002August 27, 2010
    Oakwood Deposit Bank Co.OakwoodOH8966The State Bank & Trust CompanyFebruary 1, 2002October 25, 2012
    Bank of Sierra BlancaSierra BlancaTX22002The Security State Bank of PecosJanuary 18, 2002November 6, 2003
    Hamilton Bank, NA
    En Espanol
    MiamiFL24382Israel Discount Bank of New YorkJanuary 11, 2002June 5, 2012
    Sinclair National BankGravetteAR34248Delta Trust & BankSeptember 7, 2001February 10, 2004
    Superior Bank, FSBHinsdaleIL32646Superior Federal, FSBJuly 27, 2001June 5, 2012
    Malta National BankMaltaOH6629North Valley BankMay 3, 2001November 18, 2002
    First Alliance Bank & Trust Co.ManchesterNH34264Southern New Hampshire Bank & TrustFebruary 2, 2001February 18, 2003
    National State Bank of MetropolisMetropolisIL3815Banterra Bank of MarionDecember 14, 2000March 17, 2005
    Bank of HonoluluHonoluluHI21029Bank of the OrientOctober 13, 2000March 17, 2005
    +
    + +
    + + + + + + + + + + + + + + + + + + diff --git a/pandas/tests/io/data/html/spam.html b/pandas/tests/io/data/html/spam.html new file mode 100644 index 00000000..a8e445ff --- /dev/null +++ b/pandas/tests/io/data/html/spam.html @@ -0,0 +1,797 @@ + + + + + + + + + + + + + Show Foods + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + National Nutrient Database + + + + + + + + + +
    + + + +
    +
    + National Nutrient Database for Standard Reference
    Release 25 +
    +
    + + + + + + + +
    Basic Report
    + +
    +

    Nutrient data for 07908, Luncheon meat, pork with ham, minced, canned, includes SPAM (Hormel) + + +

    + + + +
    + + +
    +
    +
    Modifying household measures
    +
    + +
    + +
    +
    +
    + + + + +
    + + + + + + + + + + + +
    + + +

    Nutrient values and weights are for edible portion

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Help
    NutrientUnit
    Value per 100.0g
    + +
    + + oz 1 NLEA serving +
    56g + +
    Proximates
    Water + + + g51.7028.95
    Energy + + + kcal315176
    Protein + + + g13.407.50
    Total lipid (fat) + + + g26.6014.90
    Carbohydrate, by difference + + + g4.602.58
    Fiber, total dietary + + + g0.00.0
    Sugars, total + + + g0.000.00
    Minerals
    Calcium, Ca + + + mg00
    Iron, Fe + + + mg0.640.36
    Magnesium, Mg + + + mg148
    Phosphorus, P + + + mg15185
    Potassium, K + + + mg409229
    Sodium, Na + + + mg1411790
    Zinc, Zn + + + mg1.590.89
    Vitamins
    Vitamin C, total ascorbic acid + + + mg0.00.0
    Thiamin + + + mg0.3170.178
    Riboflavin + + + mg0.1760.099
    Niacin + + + mg3.5301.977
    Vitamin B-6 + + + mg0.2180.122
    Folate, DFE + + + µg32
    Vitamin B-12 + + + µg0.450.25
    Vitamin A, RAE + + + µg00
    Vitamin A, IU + + + IU00
    Vitamin E (alpha-tocopherol) + + + mg0.420.24
    Vitamin D (D2 + D3) + + + µg0.60.3
    Vitamin D + + + IU2615
    Vitamin K (phylloquinone) + + + µg0.00.0
    Lipids
    Fatty acids, total saturated + + + g9.9875.593
    Fatty acids, total monounsaturated + + + g13.5057.563
    Fatty acids, total polyunsaturated + + + g2.0191.131
    Cholesterol + + + mg7140
    Other
    Caffeine + + + mg00
    + +
    +
    + + + + + +
    + +
    + + + + +
    + + + \ No newline at end of file diff --git a/pandas/tests/io/data/html/valid_markup.html b/pandas/tests/io/data/html/valid_markup.html new file mode 100644 index 00000000..0130e9ed --- /dev/null +++ b/pandas/tests/io/data/html/valid_markup.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ab
    067
    140
    294
    370
    + + + + + + + + + + + + + + + + + + + + +
    ab
    067
    140
    + + diff --git a/pandas/tests/io/data/html/wikipedia_states.html b/pandas/tests/io/data/html/wikipedia_states.html new file mode 100644 index 00000000..f1a4c4d2 --- /dev/null +++ b/pandas/tests/io/data/html/wikipedia_states.html @@ -0,0 +1,1756 @@ + + + + +List of U.S. states and territories by area - Wikipedia, the free encyclopedia + + + + + + + + + + + + + + + + + + + + + + +
    +
    +
    + + + +
    +

    List of U.S. states and territories by area

    +
    +
    From Wikipedia, the free encyclopedia
    +
    +
    + Jump to: navigation, search +
    +
    +
    +
    +
    +Image shows the 50 states by area. Check the legend for more details.
    +
    +
    +

    This is a complete list of the states of the United States and its major territories ordered by total area, land area, and water area. The water area figures include inland, coastal, Great Lakes, and territorial waters. Glaciers and intermittent water features are counted as land area.[1]

    +

    + +

    +
    +

    Area by state/territory[edit]

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Total area[2]Land area[2]Water[2]
    State/territoryRanksq mikm²Ranksq mikm² % landsq mikm² % water
     Alaska!C 1665,384.041,723,337!C 1570,640.951,477,95385.76%94,743.10245,38414.24%
     Texas!B9993068528194 2268,596.46695,662!B9993068528194 2261,231.71676,58797.26%7,364.7519,0752.74%
     California!B9989013877113 3163,694.74423,967!B9989013877113 3155,779.22403,46695.16%7,915.5220,5014.84%
     Montana!B9986137056388 4147,039.71380,831!B9986137056388 4145,545.80376,96298.98%1,493.913,8691.02%
     New Mexico!B9983905620875 5121,590.30314,917!B9983905620875 5121,298.15314,16199.76%292.157570.24%
     Arizona!B9982082405307 6113,990.30295,234!B9982082405307 6113,594.08294,20799.65%396.221,0260.35%
     Nevada!B9980540898509 7110,571.82286,380!B9980540898509 7109,781.18284,33299.28%790.652,0480.72%
     Colorado!B9979205584583 8104,093.67269,601!B9979205584583 8103,641.89268,43199.57%451.781,1700.43%
     Oregon!B9978027754226 998,378.54254,799!B9976974149070 1095,988.01248,60897.57%2,390.536,1912.43%
     Wyoming!B9976974149070 1097,813.01253,335!B9978027754226 997,093.14251,47099.26%719.871,8640.74%
     Michigan!B9976021047272 1196,713.51250,487!B9969089575466 2256,538.90146,43558.46%40,174.61104,05241.54%
     Minnesota!B9975150933502 1286,935.83225,163!B9973609426703 1479,626.74206,23291.59%7,309.0918,9308.41%
     Utah!B9974350506425 1384,896.88219,882!B9975150933502 1282,169.62212,81896.79%2,727.267,0643.21%
     Idaho!B9973609426703 1483,568.95216,443!B9976021047272 1182,643.12214,04598.89%925.832,3981.11%
     Kansas!B9972919497988 1582,278.36213,100!B9974350506425 1381,758.72211,75499.37%519.641,3460.63%
     Nebraska!B9972274112777 1677,347.81200,330!B9972919497988 1576,824.17198,97499.32%523.641,3560.68%
     South Dakota!B9971667866559 1777,115.68199,729!B9972274112777 1675,811.00196,35098.31%1,304.683,3791.69%
     Washington!B9971096282421 1871,297.95184,661!B9970042677264 2066,455.52172,11993.21%4,842.4312,5426.79%
     North Dakota!B9970555610208 1970,698.32183,108!B9971667866559 1769,000.80178,71197.60%1,697.524,3972.40%
     Oklahoma!B9970042677264 2069,898.87181,037!B9970555610208 1968,594.92177,66098.13%1,303.953,3771.87%
     Missouri!B9969554775622 2169,706.99180,540!B9971096282421 1868,741.52178,04098.61%965.472,5011.39%
     Florida!B9969089575466 2265,757.70170,312!B9967419034619 2653,624.76138,88781.55%12,132.9431,42418.45%
     Wisconsin!B9968645057840 2365,496.38169,635!B9967811241751 2554,157.80140,26882.69%11,338.5729,36717.31%
     Georgia!B9968219461696 2459,425.15153,910!B9969554775622 2157,513.49148,95996.78%1,911.664,9513.22%
     Illinois!B9967811241751 2557,913.55149,995!B9968219461696 2455,518.93143,79395.87%2,394.626,2024.13%
     Iowa!B9967419034619 2656,272.81145,746!B9968645057840 2355,857.13144,66999.26%415.681,0770.74%
     New York!B9967041631339 2754,554.98141,297!B9965988026183 3047,126.40122,05786.38%7,428.5819,24013.62%
     North Carolina!B9966677954898 2853,819.16139,391!B9966327041700 2948,617.91125,92090.34%5,201.2513,4719.66%
     Arkansas!B9966327041700 2953,178.55137,732!B9967041631339 2752,035.48134,77197.85%1,143.072,9612.15%
     Alabama!B9965988026183 3052,420.07135,767!B9966677954898 2850,645.33131,17196.61%1,774.744,5973.39%
     Louisiana!B9965660127955 3152,378.13135,659!B9965034924385 3343,203.90111,89882.48%9,174.2323,76117.52%
     Mississippi!B9965342640972 3248,431.78125,438!B9965660127955 3146,923.27121,53196.89%1,508.513,9073.11%
     Pennsylvania!B9965034924385 3346,054.35119,280!B9965342640972 3244,742.70115,88397.15%1,311.643,3972.85%
     Ohio!B9964736394753 3444,825.58116,098!B9964446519385 3540,860.69105,82991.15%3,964.8910,2698.85%
     Virginia!B9964446519385 3542,774.93110,787!B9964164810615 3639,490.09102,27992.32%3,284.848,5087.68%
     Tennessee!B9964164810615 3642,144.25109,153!B9964736394753 3441,234.90106,79897.84%909.362,3552.16%
     Kentucky!B9963890820873 3740,407.80104,656!B9963890820873 3739,486.34102,26997.72%921.462,3872.28%
     Indiana!B9963624138402 3836,419.5594,326!B9963624138402 3835,826.1192,78998.37%593.441,5371.63%
     Maine!B9963364383538 3935,379.7491,633!B9963364383538 3930,842.9279,88387.18%4,536.8211,75012.82%
     South Carolina!B9963111205458 4032,020.4982,933!B9963111205458 4030,060.7077,85793.88%1,959.795,0766.12%
     West Virginia!B9962864279332 4124,230.0462,756!B9962864279332 4124,038.2162,25999.21%191.834970.79%
     Maryland!B9962623303817 4212,405.9332,131!B9962623303817 429,707.2425,14278.25%2,698.696,99021.75%
     Hawaii!B9962387998843 4310,931.7228,313!B9961498523982 476,422.6316,63558.75%4,509.0911,67841.25%
     Massachusetts!B9962158103660 4410,554.3927,336!B9961933375102 457,800.0620,20273.90%2,754.337,13426.10%
     Vermont!B9961933375102 459,616.3624,906!B9962387998843 439,216.6623,87195.84%399.711,0354.16%
     New Hampshire!B9961713586035 469,349.1624,214!B9962158103660 448,952.6523,18795.76%396.511,0274.24%
     New Jersey!B9961498523982 478,722.5822,591!B9961713586035 467,354.2219,04784.31%1,368.363,54415.69%
     Connecticut!B9961287989890 485,543.4114,357!B9961287989890 484,842.3612,54287.35%701.061,81612.65%
     Delaware!B9961081797018 492,488.726,446!B9961081797018 491,948.545,04778.29%540.181,39921.71%
     Rhode Island!B9960879769945 501,544.894,001!B9960879769945 501,033.812,67866.92%511.071,32433.08%
     District of Columbia68.3417761.0515889.33%7.291910.67%
     Puerto Rico5,324.8413,7913,423.788,86864.30%1,901.074,92435.70%
     Northern Mariana Islands1,975.575,117182.334729.23%1,793.244,64490.77%
     United States Virgin Islands732.931,898134.3234818.33%598.611,55081.67%
     American Samoa581.051,50576.4619813.16%504.601,30786.84%
     Guam570.621,478209.8054336.77%360.8293563.23%
    United States Minor Outlying Islands[3][a]16.04116.041————
    United States Contiguous United StatesTotal3,120,426.478,081,8672,954,841.427,653,00494.69%165,584.6428,8625.31%
    United States 50 states and D.C.Total3,796,742.239,833,5173,531,905.439,147,59393.02%264,836.79685,9246.98%
    United States All U.S. territoryTotal3,805,943.269,857,3483,535,948.129,158,06492.91%269,995.13699,2847.09%
    +

    Area by division[edit]

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Total area[2]Land area[2]Water[2]
    DivisionRanksq mikm²Ranksq mikm² % landRanksq mikm² % water
    East North Central!B9983905620875 5301,368.57780,541!B9982082405307 6242,902.44629,114!B9993068528194 258,466.13151,427
    East South Central!B9980540898509 7183,403.89475,014!B9980540898509 7178,289.83461,769!B9978027754226 95,114.6013,247
    Middle Atlantic!B9979205584583 8109,331.89283,168!B9979205584583 899,223.32256,987!B9982082405307 610,108.5726,181
    Mountain!B9993068528194 2863,564.632,236,622!B9993068528194 2855,766.982,216,426!B9979205584583 87,797.6520,196
    New England!B9978027754226 971,987.96186,448!B9978027754226 962,668.46162,311!B9980540898509 79,299.5024,086
    Pacific!C 11,009,687.002,615,077!C 1895,286.332,318,781!C 1114,400.67296,296
    South Atlantic!B9982082405307 6292,990.46758,842!B9983905620875 5265,061.97686,507!B9989013877113 327,928.4972,334
    West North Central!B9989013877113 3520,355.801,347,715!B9989013877113 3507,620.081,314,730!B9983905620875 512,735.7232,985
    West South Central!B9986137056388 4444,052.011,150,089!B9986137056388 4425,066.011,100,916!B9986137056388 418,986.0049,174
    +

    Area by region[edit]

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Total area[2]Land area[2]Water[2]
    RegionRanksq mikm²Ranksq mikm² % landRanksq mikm² % water
    Midwest!B9989013877113 3821,724.382,128,256!B9989013877113 3750,522.521,943,844!B9993068528194 271,201.86184,412
    Northeast!B9986137056388 4181,319.85469,616!B9986137056388 4161,911.78419,350!B9986137056388 419,408.0750,267
    South!B9993068528194 2920,446.372,383,945!B9993068528194 2868,417.822,249,192!B9989013877113 352,028.55134,753
    West!C 11,873,251.634,851,699!C 11,751,053.314,535,207!C 1122,198.32316,492
    + +

    See also[edit]

    +
    + + + + + +
    Portal iconUnited States portal
    +
    + +

    Notes[edit]

    +
    +
      +
    1. ^ Areas were not published in the 2010 census, unlike previous years, as the U.S. Census Bureau no longer collects data on the Minor Outlying Islands.[2]
    2. +
    +
    +

    References[edit]

    +
    +
      +
    1. ^ Census 2000 Geographic Terms and Concepts, Census 2000 Geography Glossary, U.S. Census Bureau. Accessed 2007-07-10
    2. +
    3. ^ a b c d e f g h i j "United States Summary: 2010, Population and Housing Unit Counts, 2010 Census of Population and Housing" (PDF). United States Census Bureau. September 2012. pp. V–2, 1 & 41 (Tables 1 & 18). Retrieved February 7, 2014. 
    4. +
    5. ^ "United States Summary: 2010, Population and Housing Unit Counts, 2000 Census of Population and Housing" (PDF). United States Census Bureau. April 2004. p. 1 (Table 1). Retrieved February 10, 2014. 
    6. +
    +
    +

    External links[edit]

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    +
    +
    + + + + + + + + diff --git a/pandas/tests/io/data/html_encoding/chinese_utf-16.html b/pandas/tests/io/data/html_encoding/chinese_utf-16.html new file mode 100644 index 0000000000000000000000000000000000000000..59fffc0d19c5731ad445d2b1c15b68ef5337e8ab GIT binary patch literal 824 zcmb7DNeaS15UjJW7`%%{@Z$CY?}8T@jT%7>PCO`nKtCYfe3cLJ18TLyXc7_z2}wHD z)z#HK7=wp$7w}W@1jnIM7DQ6-|#YmLWb2f zUtQNHJC#f-+X^{P+JWG|+W$Ps&J-%~aXUzC*X$PE^gdpG0#{e=t9TQx&i1>VuBZFi k@2i~mzprND31xjfhONC?4{`%n6K0~fk)=czo17f}C$`Vc17Z4c6m zM^#4s{23V;S>Im~yAg3O8gVSH#hECA@j%>(Q$d{fJ+UG-+Go-8L@*bhQHyoVCGK_3 z^^Om*8-Xu=`=TdGi*viw7!7*7YF$wbtjC)%d9dcA@V=ZAaXcRc@2diTbXVd++=^{c z1fGtQ4|8@zSKdYOjPqh1xiel9csQ2#T@=sM^Vju#Q4h{;T92%w4t4$W_rJ~xE&FzF zah{w{?O7B9>$}#q#!qWq(?#m!KEu1GOYk&|I8a07y;?KP*;zZUUtV`oEnI_1nsN1- zr6+2SyubJ7h&S=^@;~*O$8Q<++Ej1kb=-He_QPjCV(T#C_2{>sYq|`-ypOKl$=}D_ P-K=R;8+88fa=(87IBImA literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/html_encoding/chinese_utf-8.html b/pandas/tests/io/data/html_encoding/chinese_utf-8.html new file mode 100644 index 00000000..ad1ca33a --- /dev/null +++ b/pandas/tests/io/data/html_encoding/chinese_utf-8.html @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    0 漊煻獌 漊煻獌
    1 袟袘觕 袟袘觕
    2 埱娵徖 埱娵徖
    \ No newline at end of file diff --git a/pandas/tests/io/data/html_encoding/letz_latin1.html b/pandas/tests/io/data/html_encoding/letz_latin1.html new file mode 100644 index 00000000..7b4b99cb --- /dev/null +++ b/pandas/tests/io/data/html_encoding/letz_latin1.html @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    0 Gét Gét
    1
    2 iech iech
    \ No newline at end of file diff --git a/pandas/tests/io/data/legacy_hdf/datetimetz_object.h5 b/pandas/tests/io/data/legacy_hdf/datetimetz_object.h5 new file mode 100644 index 0000000000000000000000000000000000000000..8cb4eda470398cf22278829cc91b531dbdca661a GIT binary patch literal 106271 zcmeI5U2I%O6~|}SakAMsjgyGth8AuMt=$%!wbQ1CN@&;drZzaX@j7W*R5qLJb>eot zUU}DPT2z`)DpFd&2vJ29rFq~XKpzZJC5sm$=!#~0oR=qL%x z{X_PHxvSJ^8$K5_9|CH<;(IMNzS-vQG?<9@Z2ZV@Jg!Jm@|UZ)L8Pa%oG^`;`w z8;=cfgAdwPi>vxF$jr5K5lLD*lR531F z%9mpO@_YJYi3H2jPoqcvk>SDp+|ZId=N?iA;=_r)!2vDLLr3!OXpc~c<-7DXKqHOi zhaMY=b@#^;$-RTa`(h)>YUw-MJ8ZhGI#2&NZMFl>S*OL+V&imZvyrpOOaSMbhJb&Z zI=1CGYrh3VA$2(8QXn~&l#lwB=(W*ikzV|o7#QpckOqt1g{;0xei0iU^3`MUoNq1( zv3Gu6C42N~vv*C}-sMu+bSj@LJdPRbQq4%&le!8`s+aEaeUpAHn|4nm zk4$9pI$EdyoW5_uZJAt0xUDY|PPA&#)^M&fT*&PTr`okZlZxtVfMx);`^Qt6DK}rr zkmr|e^ds%i`6XIot>ILnwVTc+8%izpGCZ;q*`%pk>V!*m<9r-{K5czew+vOto1v*( zsa~6Pz0#>difc7Bp4BZ~dcE4HRIb072eYYca?;I}&4U!LV*5oOHfWL=_qfY_!t)>frz5J*l1{Gv^2XglircD%>BQxE^+@xlGMm(l z&970{b1XYL!4WJ!O+S)RIYnx!$2r$86?+E>E^sXRrJ5^y1r@KMfnPke*~Iyxx7lE!s( zYLQ*xd^@i%xNgn1g0h}Ja|=b%F>lxP4eFehyOqMcrqH^iC2F6%ykl0>@UNcEo2J`} z@ffUic+;>P_X0O5*kI6yY&#o*!9zTL==d!?B+2rK#+0cQr@7k{obl0<35H8ZexAjm(EU7y9QT?|Rxh-t;P%(v{?m9r3P* zd1)a1)7kzI7vqul&39H-F@2n%eaQ4ohPZ3T7 zhBQsxn(XR%~juSvKZ z-ei$-XpV3TW8?1b2Pl#bfBm{^*RMEp`|H=8tgn9Eb=Pmb)|-Lh!ABER?`rxY>SmXnrgtoiVx!AP(eRoe&L98+AOHd&00K860cU3XukPQsskxVvw_R@7 zF6$=ci|4hmK78@^F;w~G?KWUpykvtb2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?x zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=9 z00@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p z2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?x zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=9 z00@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p z2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?x zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfWRk1AfOsmfQoLh#W$;F z{V93}$0{|i&q|F9$K#45ulf#qTg^L0J|q#i-c$s7Z=|0TSZ$}DyXi;D>*M;k zdFRMPwvZajx|7c6#H3S59nQGU&h+R`{fXq8R2!W+oyl)ce=L!pFz2^k$J2?9k-mYR zI6)}MbLk;Sv8ywK`4J0S!ceF=zx`4Vx$F=B?qE?x4*nF3f%mvL2GY*4Q$Kmwd zMm}ouaUL+?db!%KO=_8kgF&MW@S8<=n}`nTaD$<;R|N*$!4tuM}&g zmrGcu_axQ#tEV>xwN$=IpU;dPaZS<1^rj?7hlR6JmD3+NHkGwiubtjxN_Ccg-Kgt7 zkt<}}<8DSLrc*V3Jm>IHN^j~v>87WS6vif~%k@z$y|g2sHW}Tq5doR!f+Arvvh9`S zpR`}}_53|Xe%6XNQm=DNu)$=%iQ38GfSt#J`hGy=k4=q^W?ZWytRI{%pwjMWCRK2C z8qP9gSEvoa2C0HBkT7cX+Gca$$U9zqb$>}8%k_N|*Ml>qLr>SS;o;b0CT#3jqt#}m zdfP?P-1nc`Y{Z!_*Op^F+i)6RiZX3EV|d@0=qJ+9&hM|%49nfg>7&jwLIg>{}YHsgB7L$TeI zowuBN$AdKwRF}HPgHF0!=^vHyJYR=xda=DfP>8P9cGjq9>!dt!TN30Sgc_4i*ycR{88+e~+) z|Atf9qi$)qU3Y(-F7{W8Dc`OSbbwA`^jE)sf>N|{WGO=RDIZ^ECJ=7k=fe#oFWjVW zma|{Hc5t7Y9!q7NN$#HIu!D<@qvxq@d{z(4=h|qdDjD8J44&ZBg2D_dRA=3EOk5NdDPJ%I^8c8O(GUv z&FV?1p?hn~Nhv44R9mjsX5YN4wp^E$ySTHq+``(fUP$z$*>;{hXP3rL`96K+`6H?; zE$ck*^Qc32r1kx&e8Cx>c-qOE2h-#F!PL25jF;^9O9H>1%l?X7rT#g`lpy_5ub%6N zLtW46&y`#P>G`FoZ49N6WGklURHQx(F2(POcq2~9LU!DK5CfC4AD*?>aeFoC`-dez zn@=%9`bYSQT)kK6k7H%}WJW?I{n$!{NclzmkcQd{R9iRu_xZAJ#vOOF1-&?-wEle- zDTnuV`6%W~pG{{v98K#Qsjt~TtT$hNAI1B1vi_HAROeoPnt5GnX75?e5_?ph0x=)} z0w4eaAOHeuoj|qq!o5`*cB#sGp|~Kek`B=z00JNY0w4eaYnOod_xk<)O8d*}X%8c$ zdisV(*oBApLA?7LzTF1esi^E{8|gQaOZBsz88q@yi#GZl6PA6@wM!f|UkZV0ezt8X zqX)7-c;1Gub-&qP7s)qj{bYx%KxrSZ{JrKjk}e7b*Z!fgqHic?QQ8MDe~$$J zQ052!P}Y-r{ozDkuM8OtoaIdDsQ0~v>d7}7q$7F`F3qNE4J#F;`<2d9n-g_#w9zn)c0`<`_cP8 z&ij5E{-Ia3BGB4MFOQaMJk;M`rk^XlAA^5rO@DXzhmw^le@|{qm%MkZW>;2jzJY%z z_dvCEf%t<;|44ZC`w9M`JORKz6#k+0{l3+w-@bX*>+laXt7#tV=J(*t6Ns$0yncu` zlYi*xq$Ukso~SL?C3Shcww#Eq_Eq*NBDVA7IeR&uZyairw7%&7x6J;b=k4#;OTW~s z=VtGm>$#|ZX!BFHG4f2weYR41&Uy2(*8D?Hm+6xk31W}E`xQ5nf9T_;JIl_u{YplN zy=4_r>`_?)M1TMYfB*=900^vk0`>Zb=Bun1WF5QafuI2(00JNY0w4eaWdy`O)c^gG z%k3XJblmJqEY&~s?PrX9)YkZreeqV@2Q6y}B5ouC)%-(yUp9In`^{Hu_*(ay{mdla zL-zNkhO9vS{-NFQ4<&Z&muL1Kqnc!CchY=gT!}0ELm5ytKM?sI3-JSyu;^>ccMaKD zO?hSB&vDkP{r#b*zhBZA+q4CjHXAyBvbG%SAs=sBP9{`-7CX}QquTjJZPM8nYs;0F zusSi-{DbO$9~aRTuJpbI{-G7egWfu+=RKa+|HAJtU4MT?tM(WCL*XClw^s6fz3^+h zcE2{S9mMyCYCm~=f2i+!_hi#}mr4{FPm*LZbe zWS?T_6;nTX&R)*v8;6=AtzRks&@1-uDoMZ8tLO4wz1DoqZ6H05{K7Vd(nzuj^Yy~# zbMJiZolh70o?Ov_wdNmsu1ufINRagl{6nQ@&Gna6{5`}TEtEzG1V8`;KmY_l;6@-& zuYc(Os;n1^3(^|_3fceyAOHd&00JPefPnai`v0!@wfcvi2>kc$jkGZpT6gxtf4}kW z?b+s2BYzuxA<(eCsU>n~?qszOm!7_;F1)L@aQXmmZ2kG{<-zeS8@9Ij(w(||D$q>( zfGkLV+xJd&GP>)`+~iM=?|SJUmp?ky5^c!7@(m~OyScmm^Suij&o#FHX7g)@QV%B6n(Sry61D*G^+bk4B4Z*P6bl%R)n|ZTue(%kj_i=u9?&8qckb=iFRj*3n zs}w)^Q8%e2s58SdQ-kRTrk@h@VyJ!veWTo-;`zsWsNp-eSIfDa(ws8CQ0`W{{q$x^ zLrl+=2Ih0w5`6G8{%S$#F5^s;v2O3+tKe~7I{uJeX1qpmom6R-)2~eZt1eXY8o=tR z{)23hk4(CV@;}OFD;3=i{C)Z_{Ey4Ur|_X6Kg@2b#ay|vP%I!nn2zvIW+rqE`0pb% zw50)mX{nlhn9o&ebH(zLY_-;H{8VO=$E9%2sGX+R;J&cHh4^hamK-;v)l^vb~BwJf)8`L*^=%f|Ia{pzY` zHJ~S{WZZD)xGmfBou*6v7$6ehCNp_AGu1h65Xaf>O0({`YbZwdvY+W| zC`RoQ3CbQlJFvohU?P27Pb3mJgQ-0^y!069q$c`dL!yeh!e0SA$$?wXqd4hy&SlHx z>=KL#&e5aKxv9)FN=&M8q!3p^v>H8kwf;nf$VYymmshFO-XJ!2EmGX`lJVb-G0h$6 zv2GmHF6Zftyd2GbW_#;a+fvK#etiDAIr5fQiEU?g#qq*o4a6FVH4tkc);hzIeH3Ha-vc6q4TI}Q6D zT{k-JCp7Lj+=Td`d#Q${yzv6qOh1gXFAw_A3|w`~Vr@KX|W%8=R@y zZ-ATl;SMAG*@o>oE7qNY{kpc)^wtH}dpD3h$ft(uHqjnkOvv~L@F(+zbO>d==LSRW zb98+S))DfPT&bTN=hHFHQH6*5Mafr3^If689YMTsz76wh9q#Lhyh0N9ayQuLqtAoi a&nrCmuks3^ot~E8O5<)Dn3?(a=iD!5Y$rGX literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_hdf/legacy_table_fixed_py2.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_fixed_py2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..540251d9fae86f0d691e0e7748e0249af55e1d54 GIT binary patch literal 1064200 zcmeI(&2HSr0RUjGWIJ0~t<|PMe-F77FnSOyqXB#gnvEq#u4Kv3kD&l9l$Eqe)mSTO zwMyfnhaCEoyg?tK$38-jKK9s0=rM25a{k2ax{~YGMvUa6AlST<`~L%yZ*OvcVoFOz}2huU)Xoqe~eVm#^c{!?yRrZ>qGp@^uhYFm;UDLTu%3D%yaxcoyj-c=Xr-uyH97X#_aOZq~GRW9f9%tw3O4MTz?md?#KHl zq2z`zl)#;bhr1u%*#4;d@U(Z*Ebk9{2VpU7pBvjgw|%yKePwmNRhQsgT&wn_kgB;) zZsg_rW5L-Tq$pWPAANs27iSyk32O z&^-uSrmK?f+Gx6udV`bw!+xrdLlG(E_Qveo8?!e@(|s+b>-BdJ_xAfA#GTc%u)pvq zD(O~c)0W2Vm6UEUI2_a^I>aB{&&hDGzdIb=&*gY#{k{EzkW;9KQ`vqiES~MBJpP*V z`bOMOyua$(Ww75HKC0t~{k<}hZqWPS^kH{U=W{LPl@GjJ|E+J=k?ZO66r9Z`>)b=s zkY5Nd=jDlsiI_t>xAZ1kThl2~zVXsR^{spsA@c8T|M=&PPbydEJr_G`Yn^xNG)?@A zpNltVe-igNUA-Q|^qju*#B;j#D}*s#sz<`HL+i@T{pb2aTLwcJX>qlC@=LW{LseYoF`DgRSDJhbY^aQs?;B0KJiNv}ymYp3(x$5P(%JO90&l%Q@zh|Cbj<$0-DsL?51eBwHzrMfB?`O^R zy%i(0s@GyYl>5D6Kh{XKaKZa}suv!gUFYOq>e=RcrsEi`7ebQrujjAT3z6)Ld47a` ztZ;qa%Gb5j5A9rj-iyc9%qHvSYls@EkI)AZ{!-F({AbsBGOa%AmzkLO`w#25acbqk zje6l(?%UCRqzB8XXQrvAFYcF_d&Yj5=J|Ow-Y@sp7dGctI~%L_(kYBTzYlkQ)eHS- zVVolL2bXcg5+EPMX2hM1TMR0t5(rIR)Oiy}BHq`1ygKp8oZ?Nj}|A zCU#?2#O&_jpf`JP`f#}49SpjUrnU#Y-NSx8e0RD#oH}l$SX1?2WZZf@u~hwjv$gZb zwd2X_ORezl4>`(S7^|E*zO*^9dTXg#YSmFLUtgNsZ2fj+W#wm~y%J|jKOOFT_KV{y zo2~ioa3_x{*5AQg`*L0fXGMSj0RjXF5FkK+009C7UZlWzGO3S4#ZnpYGfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly iK!5-N0t5&UAV7cs0RjXF5FkK+009C72oQL>z`p<-NVALp literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_hdf/legacy_table_py2.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_py2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..3863d714a315b79de100e8446db766ec7dc2e82c GIT binary patch literal 72279 zcmeI#%}yIv6u|K@5O5q*2$iaeZdlb`Axyqlh89yS8G|rwQmN8(j3>yd8C$l4(p{JB zTXdNR=^~HPw`k9~cMcu~oF-KlRq`M3jOWh%IP;r(?_B?Ou(!WlS}z5<6^p?_uA=CK^3k@feV0jg`TSouciU}A*Zg1Ir>&_bedmlF9zS$Puh*6 zmdme0$MV-Ux8zLo@3><@Mw;e7{{D3Lt7g3&?jM~T?w*FT%5QJh(sFCA&ipuCO$XfS zg3A{C(>UE(_W9Cc{)GKUBQR^64%7MDApYxd+HM`~6{H7SzbmHw)yBzIdVZXX!{XuS z1#G^{drjw~|J!_5cJtjZO1jZFocui0+27Br_vC2L|HUY}h(qgjI$!#Evk zDm)wX$1d5G|H57O=hgnOR;o6(O6`ivS}6^;OOxS)QncwZl!K@4Sdcr?dH*u%zl_Jz z3ibW6CLfE()z_{>l~UBMbY$U26AKWyq zWfxaBnnW7a%ZtR_>AS~Ol~y%={~k=DBpk)V8}C6$cloBz^{y)sCuf6hFFAJ?WvA`x zlEFM$SNCu2EctSy(O@KzYW{3L7*9sMvq`r9EIIjpd>QLA)c2+Q%l6dmtsOh&#62X04muYFEll^ZWT? z_PY0pHY52r+|jS&7jCTMWv>je2u0+zMN}hq~-{<1o99xBh>7x!SdI zdguP`gCNq%WV^lG_RMlN7N)-gmZgqjU~%p@noX@YI~MfZ+STv6-QI33wDLCgZkDs( zAK7-dugbl9yV>{RPr~Tq(sq~RW9{~mNqioUGV%I(I|_Xn&96h!@@L+AZpNSX6Dy}P zv44*Gz^&ZP_k-_OQP_2BjeK1Cen`%DZp77jdt6s<;(T14@crWIWa7HsA199=Iuq;1 zixOioXq_BAYfJj9=i@rRT-V=yy1uq?IX%k7Tej!x*AsP8)lJ_GYzQEL00Iag@L>x? zwt%n&RVV$y_+j5RcZUE12q1s}0tg_000IagfB*srAbX?KG8I__OO;nmP@5^ zUy$DVj2`=h9CPVR9*}l=%rQSmfnC5o?_HDe!S! zu^_6Z%gtMQe)-H|c+B&A)Nu$tBlzo_?KtA9z`0KeU#0aI=Plwj;Rn5TTL2}m&yc3Y zi>tQhGGYcVl?FE2t!;ea9eQiF(idcBtc-*@Y>yi}@ju~hve%+rS47RMi1zKm-=H(t z1OpZy!4GA#d_x3Hk^Y;_R=*E;q`v|`rvJ9reS|NX)8p!{c--#wJKZfqkFLY?&3Xey zNPpiz2Rseww|57vhnwxbx8Ci2*BW?b{LA$w@m0;5ML(^Q!#yW~|K_KE*+_EY?`H8? zb7?eC^iyk#lca-d*Y8KO6TJ+Q)3*S*!v<5&UpG;-8BcO z$XtcHV2RjI8Z_Q6?Bk53E;WkJ=jt7+28>1c@*sNW1PXbYHfAZ5HcShXF1<3BhUB?xJMeQ>=-yu-Zl6!U#bo#BR_@lO^c%m zJP+`39E)IjhWx#29~(09+5$L}T(i#H+?*kGVIAzuJGF`>VNbK859w`?=G7;Uc8m5> zcHXsmz1A-9%PKcM+4O?kbrI|gl|1!cF zP5gGZjKbpiJblic^T{v#?5of17XKjlCqavS^MGDp)k2pbVP{~bftdzo8klKdrh%CT z-YE^t!#xP*cE!9k&)cqIEZTYh%S|m(%rRqS7!1 zi>-(^X5KNCmMacY^#dZ1{&)DX-3LLO9D7jRNoayN(pfqQvN)AVQ`Q1o_0}^Dt14ZO z4-T}-w0t)P9k52!|56)QKy%3C5V_q&am-Y}T7{KvoohEf@Dx4P4mh!S74oFC$Nz*qZRBsxGL^5$?yQN$RAu(W-IFaK> z?TH=`f{y_Q#E~O#;D|VK0zLsp)C)%x=9w3Jv+Jf2AP(d|RlK`1^UUu2JUct~^~EmcLy^aGwf1MEW7_MT-OYDf z{b#8@qYg?M8?Lppx@jMI7Y+(AW!k2?Y$t&ha>tD3nug#w{mf1DS1OLBe&km&L_j;ryd%gol^$aoj(8c zzWC$jIzoTLyfQrQ(nI6IoH9Ia=HNIpuMF?!<$ZDKb#ZoX8Gha?`{K?0+V5wWUxuIe z>b|)2d9(9OI(|I${mw5|tNQ)w*a-8z9tUUL*EBqSY9$@-V%)@$k1yMHA~_{b^v^%t zj~_PwYe2po_hzH*4Ek}Y-%fg7KYaYmKa9V;FFrgQhMzy&pXqUar5~@)3GH*f-(_vi zZd#GtHexKL_)dz6V9-?BVx{u>F0{)ysZvpWmOO z40`lGUVdIGO5G zBKL1d!T$!aZGZm=@2a}ZRG&80%QE7Ae)#;`vhL`6geHA;&Q!l-Rzis!^Dom6%DNMN z$xYVKu{FP7h)arKKKgYA(L zSg0h++TlCC*RkLJaKViWe<<7C*3Qmuw-t^%tDAN|)VdZ54!NE1zr&A;YivuXygoH2hB<}aLY*GgW;mUetlU(Z&Q+m(7_v9@IEkC^gszNyKJ8fK19x;$SnYGQP^P$<}(5pzfC&P05} zlxQl;%t@Kq)5c7Se7_?f@3T!^7dH}p|GX*`AAQ0+di?(E$FI9z{AJ01J$?_GN1n9(a95^NM?A!G yaKGdo)A!f*z1|0X{LtC|&GvfPb!FCV?=O^>SG&FT+D7Zu*rv;w<13lttA7Cz{LOp- literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_hdf/pytables_native2.h5 b/pandas/tests/io/data/legacy_hdf/pytables_native2.h5 new file mode 100644 index 0000000000000000000000000000000000000000..4786eea077533929868e41b46764b039b2c10ed7 GIT binary patch literal 12336 zcmeHMeNa?Y6n_h->qiK+f`g8($`sZlj};NrX%~fp8G{uVL@QiAmW0>^m&HI0cIXfA zqx=I6kx-FI)GReKKTvcOO!*MeX&5C$bHIvELCPm9bnmOt22n~$K$dKQYv5b^=fiHu;7#t!dS{)M~p;!0+=G$WbGM1gqy3J-~tlM06 zE+`Fvf9P3cWyO1bFdE+gJdP{hRpEhWD^Og)r$=qqsAqdWOZ5)F_wfCb_#ko%&S&Ae zSr@?w{$WAyUT}@76ak6=MSvne5ugZI1OkLts_<$^IS@8u=;aN^E6+|q^GQ87v^RRy z(hxAmD~0@!tXss4F@U$Y@LeZhpacixC}=a2VeF-H%o7xh`&h=k4{%fw<~IX8 z2zef!_aI)yVnPcwc)xKB%R7M2B?f;X{vnIOh92_@IADp4W&vBBIw>hxrjj4 zj;C*k4_Wu~Y_a3eKBf3FOFCxENj@IJyxokM#S9OQE40sM2Y^772mvOH9};U%8b1ac zKX4xO`=s#`;b!c7PN-?_?rdN7$RoZhJF9AcU2nMWnTBfD zYuj_WtA8u<3atNQL79rZ=$gH~{ReK@Wo>cmdCB>R6K9V%b$Q6Ei)xLYWh>pw{Zq?Z zj&i=GORI8bUbv&vahb9nOM8*0{`5J807K zi`P6{wBh=JZH_&%cQhrN9<+4c-xe1$O<@4rKQ>}JDeX`YXe9ibEHHk0c03p0H5q)- zXrl=H_YfFdC&B+CnAW?n>IHQ#E`0xKeYDr4K7yWxs*lFO`@oxl77(E^sjcCN3Z9-@ z(?>+ctHIsK^GMCy*_yeHA zsEdtW5sPZB!IlFfru8pBCl1wR0~UjOrgLJWwaJ=DwT@VgB^@&o&V@A}$e8Z$C7&MlNkmz1^?T(S#N?%7q~ zZ6z5EsIK*cWhvVi*zS(WySLbW-P--_el@4>8J#wltSU{5={~%3~XxIs_UEHmBnY?Jsh^>Yp%Yt)0+D;6j;@| zHZN%?3Y(R~H7>2_%AefCg|`2+E-5$i=<>#6JKwAxmzjF=_Nk`g&HL7;P!>(M66k)) ZQ)NM4;4Rlvxhp{xOB+RiBJeyA_zS3>0-yi@ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_msgpack/0.20.3/0.20.3_x86_64_darwin_3.5.2.msgpack b/pandas/tests/io/data/legacy_msgpack/0.20.3/0.20.3_x86_64_darwin_3.5.2.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..7a546a82ae766c99af5056604eaa720c321b4abb GIT binary patch literal 118654 zcmXuk(-I&GU_`;aYumPM+qP}nwr$(CZQHhOZ&H=cy!8CR8MdfVgU*ANv}oHoQ^sK{ z|Mxd()~;iVCY^>ZZrh+ull_YuH|X5pM1cRY_Yd&@`_Bo;Kn%9LixF&Ji5RQ5?-N9LsSW&k3B!Nu10noXTmO z&KaD^S)9!|oXdHf&jnn_MO@4!T*_r!&J|qARb0(AT+4M_&kfwjP29{a+{$g-&K=yz zUEIw*+{=C3&jUQjLp;nQJj!D{&J#SzQ#{QxJj-)D&kMZBOT5f0yvl35&KtbRTfEIX zyvuvM&j)iSA5Mke9L!y&ky{_PyEa;{K{|q&L8~AU;NEK{L6m~ z@ZbNP{}_;g7??pAl))IBAsCXO7@A=imf;wl5g3t?7@1KRmC+cTF&LAv7@Khzm+=^% z37C+Hn3zeJl*yQ!DVUO}n3`#rmg$(D8JLlon3-9amD!k`Ihd2Vn45W+m-(2V1z3=U zSeQjvl*L$_C0LTBSej*6mgQKU6k7 zBQY|gFe;-lI%6; zFe|e$J9986b1^sbFfa2lKMSxR3$ZYZuqcbMI7_f3OR+S|uq?~5JS(swE3q=Guqvyu zI%}{dYq2)#urBMdJ{zzh8?iB)uqm6dIa{zLTd_6Uur1rMJv*=?JFzpnuq(TCi2XQcma43gyI7e_KM{zXAa4g4hJST7>Cvh^Ta4M&9I%jYuXK^;? za4zR@J{NEy7jZF{a4DB@IahEcS8+Aha4pwyJvVS8H*qt!a4WZQJ9ls=cX2oOa4+|9 zKM(LA5AiUM@FV|*rHLMCEjCSg)0 zV{)coN~U6JreRv9V|r#_MrLAWW?@!lV|M0XPUd26=3!puV}2H3K^9_R7GY5qV{w*X zNtR-1mSI_zV|i9!MOI>ER$*0EV|CVGP1a&<)?r=NV|_MYLpEY#HepjXV{^7(OSWQb zwqaYgV|#XBM|NUoc41d`V|VsoPxfMO_F-T4V}B0dKn~(y4&hJ^<8Y4PNRHxYj^S92 z<9JTsL{8#lPT^Ee<8;p8OwQtL&f#3n<9sgQLN4NBF5yxx<8rRxO0ME+uHjm)<9cr3 zMsDI}ZsAsL<96=gPVVAv?%`hU<9;6CK_22^9^p|Q<8hwgNuJ_qp5a-Z<9S}-MPA}% zUg1?<<8|KPP2S>d-r-%|<9$BhLq6hTKH*b7<8!{?OTOZ3zTsQG<9mMKM}FdGe&JVs z<9GhxPyXU>{^4K#V}QW=&wvcXzzo8m494IL!H^8a&Lhq%*?{9%*O1@!JN#+ z+|0wg%*XsJz=ABq!Ysm~EXLw2!ICV+(k#QWEXVS!z>2KI%B;ewtj6lB!J4ea+N{I6 ztjGFnz=mwZ#%#i-Y{uqn!Io^r)@;MJY{&NOz>e(1&g{aj?8ffw!Jh2J-t5D^?8p8b zz=0gZ!5qS&9LC`s!I2!r(Hz6E9LMpTz=@p1$(+KeoW|*#!I_-J*_^|G!IfOa)m+21T*vj?z>VC*&D_GR+{W$P!JXX2-Q2^y+{gVqz=J%*!#u*H zJjUZZ!IM12(>%koJje6Az>B=Z%e=y?yvFOi!JE9r+q}cOyvO@|z=wRq$9%%4e8%T| z!Iyl+*L=gbe8>0vz>oaI&-}u#{KoJ6!Jqua-~7YB{Ko)6^q&D4h=Cb|K^cs}8G<1h zilG^XVHu9$8G#WQiIEwFQ5lWV8G|tyi?JDpaT$;CnScqIh>4kmNtukvnSv>qim91~ zX_=1cnSmLZiJ6&&S(%O5nS(i*i@BMHd6|#-S%3vuh=o~%MOlo+S%M{5ilteGWm%5p zS%DQh8VP1%gi*@7+Eimlm(ZP||P*?}F| ziJjSnUD=J@*@HdVi@n*0ec6xwIe-H>h=VzVLphAYIf5fOilaG(V>yoFIe`;7iIX{n zQ#p;(IfFAfi?cb0b2*Rmxqu6~h>N*|OSz28xq>UXimSPXYq^f=xq%zGiJQ5FTe*$f zxq~~oi@Ujpd%2JMd4LCbh=+NEM|q6Ld4eZ-il=#oXL*k2d4U&siI;hWS9y)sd4o53 zi??})cX^NZ`G61kh>!V%Px*|``GPO`im&;GZ~2bz`GFt#iJ$p}U-^yS`GY_Ci@*7Y zfBBCAg6cm5G7tkZ2!k>hgEIs}G898I48t-U!!rUSG7=**3ZpU_qca9$G8SVq4&yQ& z<1+yhG7%Fq36nAzlQRWVG8I!Z4bw6m(=!7zG7~d13$rpCvoi;CG8c0*5A!k~^Roa8 zvJeZi2#c~9i?akvvJ^|R49l_{%d-M2vJxw^3ahdjtFs1cvKDKz4(qZW>$3qHvJo4z z37fJRo3jO5vK3pi4coFE+p_~ZvJ*SA3%jx#yR!#-vKM=^5Bsto`*Q#Xau5e|2#0bQ zhjRo+aui2%499XD$8!QFauO$V3a4@!r*j5pau#QE4(DU62#@j@kMjgi@)S?= z4A1f$&+`H=@)9re3a|1Suk!|P@)mFN4)5|F@ACm4@(~~N37_&ApYsJ@@)ck64d3z| z-}3`M@)JMv3%~Lkzw-xw@)v*e5C8HX0|e8524o-xW)KEtFa~D`hGZy)W*CNLIEH5g zMr0&LW)wzcG)89(#$+tUW*o+4JjQ1NCS)QeW)dc4GA3sVrerFnW*VktI;Lj^W@IL2 zW)@~;HfCoI=43ABW*+8cKIUfu7Gxn7W)T);F&1YDmSicGW*L@cIhJPyR%9hsW))Ut zHCAU0)?_W#W*ydLJ=SLfHe@3<{6&lIiBYQUgRZS<`rJ$HD2cp-sCOb<{jSUJ>KU7 zKI9`l<`X{UGd|}FzT_*u<{Q4{JHF=!e&i>9<`;hDH-6_2{^T$I<{$p$KL!Y{{|v}L z49p-5%3uu65Ddvs49zeM%Ww?O2#m-`jLayE%4m$v7>vnSjLkTV%Xo~>1Wd?8Ow1%q z%4AH=6imrfOwBY*%XCc749v((%*-sz%52Qe9L&jF%*{N^%Y4kw0xZZvEX*P-%3>_e z5-iD5EX^`3%W^Ew3arRVtjsE`%4)368m!4$tj#*C%X+NO25iViY|JKX%4TfN7Hr8@ zY|S=o%XVzf4(!NI?949g%5Ln=9_-0p?9D#x%YN+70UXFd9Lymc%3&PN5gf@;9L+Ht z%W)jf37p7DoXjbl%4wX=8Jx*koXt6$%Xys71zgBQT+Ah0%4J;6613bt>Jj^3J%40mv6FkXNJk2va%X2)>3%tln zyv!@S%4@vN8@$O|yv;kj%X_@f2Ykp!e9R|&%4dAe7ktTAe9bp}%XfUw5B$ha{LC-> z%5VJ6ANojI73 zxtN=In3wsOp9NTug;tLmw1_1c$L?9oi})sw|JX(c$fEhpAYzukNB8R_>|B1 zoG@KzxbPf_?Q0}Af)~?AOkTlgD@zAF*rjo zBttPY!!RtvF+3wMA|o*}qcAF?F*;)~CSx%+<1jAcF+LM8Armn%lQ1chF*#E(B~vjq z(=aX5F+DRdBQr5GvoI^OF*|cGCv!13^Dr;-F+U5iAPccDi?Aq*u{cYxBulY0%djlV zu{##2Cu|6BHAsewVo3JUHu{m3?C0nsI+psO$u{}Gm zBRjD(yRa*}u{(RPCws9s`>-$ju|EfJAO~?Uhj1u|aX3eCBu8;H$8apiaXcq*A}4V& zr*JB#aXM#kCTDRr=Ws6PaXuGtAs2BmmvAYUaXD9TC0B7Z*KjS@aXmM1BR6p~w{R=B zaXWW#CwFl-_i!)waX%06AP?~{kMJmu@iV$^He++PU`w`QYqnuqwqtvCU`KXhXLey%c4K$;U{Cg9Z}wqd_G5nz;6M)IU=HC> z4&!i+;7E?*XpZ4nj^lVv;6zU1WKQ8!PUCdW;7rcqY|i0a&f|P8;6g6qVlLrQF5_~p z;7YFIYOdj0uH$-c;6`rZW^UnDZsT_D;7;!1Ztme;?&E$Q;6WbZVIJX89^-MI;7Ok1 zX`bO(p5u95;6+~IWnSS`UgLG%;7#7*ZQkKs-s62f;6py*V?N{)#nep0v`okJ%)pGy z#LUdXtjxyj%)y+@#oWxpyv)b^EWm;+#KJ7XqAbSZEWwg2#nLRpvMk5)tiXz_#LBF~ zs;tK9tihVB#oDaHx~#|gY`}(W#KvsGrfkOMY{8an#nx=Ywrt1t?7)uf#Ln!(uI$F{ z?7^Pw#op}0zU;^T9KeAb#K9cGp&Z8H9Kn$s#nBwYu^h+ooWO~k#L1k(shq~?oWYr# z#o3(0xtz!OT)>4~#Kl~~rCi44T)~xG#noKHwOq&b+`x_8#Le8ot=z`#+`*mP#ogS) zz1+wBJivoI#KSzoqddmrJi(JZ#nU{)vpmQ1yugdR#LK+GtGveRyuq8i#oN5YyS&Hy ze87i%#K(NXr+miee8HD|#n*hpw|vL<{J@X=#LxV~ul&aE{K236#ozqHzx>AlVf3E? z8Hj-ygh3gM!5M-f8H%A9hG7|w;TeGu8Hte@g;5!e(HVm=8H=$QhjAH?@tJ@LnTUy* zgh`o<$(e#FnTn~IhH06O>6w8UnTeU1g;|-6*_nemnTxrZhk2Qg`B{JkS%`&Mghg45 z#aV(SS&F4uhGkifOmg zhGRL7<2iv7If;`wg;P0=(>a4PIg7J7hjTfP^SOWvxrmFogiE=M%ejIpxr(c~hHJTw z>$!m&xrv*(g=Xrq_ zd5M>Kg;#lv*Lj0Cd5gDshj)38_xXSi`G}ACgira5&-sEc`HHXkhHv?f@A-ir`H7$T zgrGYX?J z8ly7?V=@+FGY;c29^*3s6EYDKGYOM28Iv;wQ!*7(GY!)+9n&)dGcpr1GYhja8?!S9 zb21lmGY|7JAM>*S3$hRkvj~f_7>lz6OR^M8vkc3!9Luu;E3y(RvkI%S8mqGgYqAz= zvkvRB9_zCK8?q4_vk9BB8Jn{OTe1~fvklv_9ow@5JF*iyvkSYj8@sayd$JdMvk&{S zANz9v2XYVxa|nlW7>9ENM{*QLa}39F9LIA4Cvp-ea|)+&8mDsxXL1&2a}MWn9_Mob z7jh97a|xGn8JBYfS8^3sa}C#W9oKUMH*ym8n5#PZ}Jvz^A7Lw9`Ex3AMz0&^9i5w z8K3h7U-A`S^9|qf9pCc>9|MHbe+FbA24)ZjWiSS3 z2!>=RhGrOsWjKas1V&^eMrIU7Wi&=-48~+E#%3JGWjw}b0w!c4CT0>QWilpb3Z`T# zre+$ZWjdy324-X?W@Z* z9LixF&Ji5RQ5?-N9LsSW&k3B!Nu10noXTmO&KaD^S)9!|oXdHf&jnn_MO@4!T*_r! z&J|qARb0(AT+4M_&kfwjP29{a+{$g-&K=yzUEIw*+{=C3&jUQjLp;nQJj!D{&J#Sz zQ#{QxJj-)D&kMZBOT5f0yvl35&KtbRTfEIXyvuvM&j)iSA5Mk ze9L!y&ky{_PyEa;{K{|q&L8~AU;NEK{L6m~5JCSLkbxMOK^T<57@Q#(lA#!yVHlR- z7@iRrk&zggQ5coc7@aW~ld%|^aTu5J7@rB4kcpU>Ntl$$n4Bq?lBt-QX_%Jjn4TG! zk(rp8S(ugCn4LM8lew6id6<{^n4bk$kcC*7MOc)@SezwTlBHOhWmuNwSe_MFk(F4P zRalkPSe-RkleJizby%16Sf35pkd4@wP1uyp*qklclC9X9ZP=FW*q$BOk)7C?UD%b~ z*quGtlfBrReb|@%*q;M9kb^jwLpYSfIGiImlA}19V>p)MIGz(Yk&`%?Q#h5=IGr;% zle0LRb2yjtIG+o+kc+sOOSqKFxST7vlB>9yYq*x{xSkuhk(;=gTey|mxScz=le@T^ zd$^bTxSt1jkcW7fM|hOSc$_DAlBal@XLy$9c%Bz{k(YRxS9q1zc%3(Rlec)AcX*fg zc%KjWkdOG7PxzG2_?$2JlCSuhZ}^t)_?{p5k)QaPU-*^Z_?dG|R9o%dtEwup%q5GOMsEtFbz3uqJD#;r?upt|LMGrO=WyRkcauqS)5H~X+J`>{U)0 z*Ks{Ja3eQyGq-Rnw{bgna3^@Fs8ZHt+B*@9{n#@F5@ZF`w`$pYb_g@FidIHQ(?p-|;;^@FPF* zGr#aFzwtYN@F#!qH~;W2|1m%${bxW1VqgYgPzGaghG0mBVrYh8ScYSGMqornVq`{P zR7PWT#$ZgwVr<4?T*hO3CSXD)Vqzv?QYK?^reI2@Vrr&gTBc)qW?)8UVrFJxR%T;% z=3q|dVs7SPUgl$d7GOaZVqq3xQ5IuymS9PiVriCPS(amYR$xU|Vr5ogRaRql)?iK6 zVr|x8UDjiLHef?GVq-R8Q#NCBwqQ%PVr#ZxTef3+c3?+#VrOdpRbJzD-r!B%;%(mHUEbq;KHx(>;$uGH zQ$FK!zTiu~;%mO)TfXCae&9!b;%9#0SAOGn{@_pk;&1-pU;bl&$okKK48*_;!k`Ss z;0(c#48_n4!>|m;@QlESjKs){!l;bK=#0UbjK$cD!?=vc_)NfrOvJ=Y!lX>ba4+1Y{k}W!?tY4 z_Uyop?8MIO!mjMb?(D&y?8V;f!@lgt{v5!89K^vK!l4|-;T*w{9L3Qb!?7I4@tnYk zoW#kT!l|6b>72otoWfJjBC1!lOLK<2=EWJjK&I!?Qfc^Sr=|yu{1A!mGT- z>%766yv5tR!@Io4`+UHMe8k6m!l!)3=X}AJe8ty%!?%3L_x!+*{KU`v!ms?s@BG1^ z{Ken=!@vB;08#Xx0U3ya8H7O@jKLX#AsLFH8HQmQj^P=B5gCb*8HG_9jnNr{F&T@o z8HaHhkMWs+37LqAnS@E1jLDgTDVd6?nTBbZj_H|!8JUThnT1)IjoF!lIhl*OnTL6q zkNH`E1zCuNS%gJdjKx`kC0UB4S%zgjng@UGdYX1IfrvOkMp^J3%Q7kxr9r( zjLW%#E4hlRxrS@Gj_bLB8@Y*_xrJM~joZ0{JGqOyxrckXkNbIm2YHBxd4xxKjK_I` zCwYped4^|sj^}xS7kP=7d4*Sbjn{dDH+hS(_ANh%&`GsHkjo1rpG9KeI0TVJ26Eg{uG8vOI1yeE=Q!@?IG9A-1 z12ZxcGcyabG8?lq2XitPb2AU~G9UA^01L7Z3$qA|vKWiA1WU3MOS25ivK-5^0xPl- zE3*o#vKp(i25YhwYqJjPvL5TR0UNRr8?yXLAncavtY%0T*%+7jp@hav7I%1y^zvS91;5avj%m12=LLH**WOavQgE2X}H8 zcXJQ-av%5e01xsI5Az6*@)(cv1W)o5PxB1V@*L0e0x$9sFY^ko@*1!625<5fZ}SfC z@*eN=0Uz=aAM**H@)@7=1z++NU-J#$@*Usv13&T;Kl2N}@*BVN2Y>PxfAbIj@*e|4 z(|-nJAO>a-24ye?X9$L5D28SjhGjU0X9PxMBt~WwMrAZcXAH(JXAb6MF6L$)=4C$SX8{&uAr@v4 z7G*IOX9<>MDVAm#mSs7XX9ZSdC01q?R%JC-XARb5E!Jio)@41`X9G55BQ|CeHf1w5 zXA8DuE4F4Ewq-lEX9sp49jL!s2$V5!cBuvU=OwJTc$y7|uG)&8MOwSC=$V|-4EX>Mm z%+4Il$z06MJj}~{%+CTW$U-d4A}q>cEY1=v$xM$W7eLE!@g&+|C``$z9ydJ>1KE+|L6% z$U{8LBRtAuJkAq5$x}SdGd#<4JkJZf$Vb5JG{$#yw3-G$VYt4 zCw$6he9jkq$ya>MH+;)?e9sU3$WQ#tFZ{}H{LUZz$zS}8n2?E>m`RwF$(Woe zn3AcOnrWDp>6o4wn30*7nOT^X*_fRR?oIFqwDn{zmq^EjUixR8sum`k{n%eb5?xRR^5nrpb0>$sj9xRIN< znOnG(+qj)OxRblMn|rvI`?#M6c#wy9m`8Y&$9SA4c#@}hnrC>H=XjnMc#)TQnOAs~ z*La;bc$2qyn|FAZ_jsQV_>hnIm{0hW&-k1#_>!;qns4})@A#e{_>rIZnP2#o-}s$B z_>;f*n}7J1{}>>q{xcv0F))KLD1$LLLog&mF*L(4EWbQGcY4FF*CC;E3+{>b1)}!F*oxt zFY_@!3$P#yu`r9UD2uT;ORywMu{6uDEX%PxE3hIfu`;W$Dyy+NYp^D3u{P_lF6*&A z8?Yf8u`!#lDVwo5Td*Ztu{GPUE!(j@JFp`=u`|1{E4#5fd$1>au{Zm$FZ;1S2XG(< zaWIE)D2H)4M{p!ZaWuzpEXQ#?CvYMsaWbcHDyMNeXK*HGaW?00F6VJR7jPjLaWR*0 zDVK3MS8yd)aW&U)E!S~9H*h02aWl7YE4OhwcW@_naX0sHFZXdj5AYxl@i33@D39?t zPw*s9@ifoyEYI;gFYqES@iMRQDzEW6Z}28>@iy=9F7NR^AMha`@iCw9DWCBEd6Id24Y|aVNeERaE4$=hGJ-j zVOWM^ct&7EMq*?}VN^zAbjDyz#$s&7VO+*zd?sK*CSqbHVNxbza;9KPrebQQVOpkR zdS+loW@2V$VOC~icIIGC=3;K84j-r{ZE z;a%S2eLmnrKH_6O;Zr{2bH3n9zT#`X;ak4rdw$?Ye&T0-;a7g+cmCi{{^D=`;a~n^ zfY|!afDFXI48ouc#^4OWkPOAp48yPt$MB56h>XO@jKZjl#^{W}n2g2PjKjE$$M{UZ zgiOT5Ov0p0#^g-FluX6cOvAKH$Mnp=jLgK$%)+e9#_Y_&oXo}C%)`9Q$NVh7f-JNj_kzF?82_>#_sIFp6tcm?8Cn7$Nn6^fgHra9KxX-#^D^n zksQU*9K*33$MKxNiJZjAoWiM`#_62FnViMhoWr@C$N5~qgJnVE%I znT^?*gE^UtxtWJ~nUDEdfCX8Ig;|6}S&YS5f+bmsrCEk$S&rpdffZSam05*VS&h|M zgEd);wONOCS&#MEfDPG*joE}v*^JHEf-TvKt=Wcc*^cemfgRb2o!Nz5*^S-VgFV@c zz1fF-*^m7>fCD**gE@plIgGOTWA5Cby^ zgEAO{GXz626hku%!!jJhGXf(r5+gGTqcR$!GX`Ta7GpCG<1!xOGXWDa5fd{BlQJ2T zGX+yJ6;m?}(=r{?GXpa+6EialvoagAGY4}r7jrWY^D-avvj7XS5DT*ii?SGtvjj`B z6ic%V%d#BHvjQu!5-YO`tFjuavj%Ij7HhK(>#`o}vjH2j5gW4!o3a_3vjtnS65D)VRkMbCg^8`=w6i@RE&+;74^8zpO5-;-# zuksqN^9FD77H{(o@A4k+^8p|75g+pjpYj=>^95h>6<_lW-|`*b^8-Kf6F>6{zw#Tu z^9O(O7k~2)|MDLL#M6HUWFQ7+5C&y124@I{WGIGa7=~pyhGzsuWF$sr6h>t?WG&Wa9oA(% z)@K7YWFt0a6E?yQj^_kUZs!i}!9`5Bn?&kp> z_!_=}Z{a)m9)5ry;V1YRet}=%H~1a?fIs0c_#6I#f8jq!B}e}u16jyH9tu!|5|p6= zRj5H78jwH}TF{0LbfE`*7{CxFfC*tDm>4F3NntXW9HxLNVJes!rh#c;I+z}2fEi&X zm>Fh)Sz$Jq9p->JVJ?^(=7D)(KA0aCfCXV8SQr+8MPV^m9F~A3VJTP|mVsqqIanT6 zfE8gSSQ%D3>*u`!SQecoCqhu$#4ps3a7#8a0Z+SXTjNU4x9_;!TE3jTnHDz z#c&CXhfCoyxE!v4E8!}*8m@tB;X1e;Zh#x%Cb$`Hfm`7=xE=0*JK-+48}5O7;Xb$@ z9)JhoA$S-bfk)vncpRR9C*di08lHh?;W>C7UVsFp#J}(Ap=>+K^_WFgc6jY0#&F%9U7286I#%Q4s@XheHg$HCV&ZHBA6H^ zfk|O9m>i~nDPby@8m571VLF%|W`G%CCYTv!fmvZTm>uSTIbklC8|Hy|VLq527Jvm| zAy^m|fkk04SR9ssC1EL88kT`&VL4bHR)7^@C0H3&fmLBOSRK}YHDN7S8`gn!VLezM zHh>LbBiI-=flXmE*c?W{7O*9Zgi){+Yz^DMwy+&+4?Dn)FdD|dPOvlV0%Ktu>GG>2L;|31`9Ca1NXc=fU}K0bB?d!NqV1jE76%GPoSBfGgoDxEij3YvDS$9&Uge z;U>5lZh>3jHn<(`fIHzXxEt<)d*ME~A0B`Q;URb!9)U;UF?bxFfG6Q8cp9F8XW=<` z9$tVK;U#z(UV&HPHFzD~fH&bScpKh4F3NntXW9HxLNVJes!rh#c;I+z}2fEi&Xm>Fh) zSz$Jq9p->JVJ?^(=7D)(KA0aCfCXV8SQr+8MPV^m9F~A3VJTP|mVsqqIanT6fE8gS zSQ%D3>*u`!SQecoCqhu$#4ps3a7#8a0Z+SXTjNU4x9_;!TE3jTnHDz#c&CX zhfCoyxE!v4E8!}*8m@tB;X1e;Zh#x%Cb$`Hfm`7=xE=0*JK-+48}5O7;Xb$@9)Jho zA$S-bfk)vncpRR9C*di08lHh?;W>C7UVsFLjNHHS;#>i3Q&X+l%WDus6ibXkU$e!(1s3lp$B~!zz`;Y31K3b7$$*9VKSH; zrhqA7DwrCkfoWknm>y<;8DS=v8D@c5VK$f@=72e2E|?qUfq7v*m>(8^1z{mr7#4v= zVKG=7mVhN;DOehofn{MiSRPh@6=5Y<8CHQ+VKrDC)_^r(Em#}YfpuX$SRXck4PhhL z7&d`TVKdkqM!*)ZC5(houoY|#+rYN49c&Lfz>Y8)#=uUnGwcFmVI1rVyTR_T2kZ%Z z!QQYB>9KoeTf zh7NS02Ynd85GH^LVIr6qCV@#|GMF5ufGJ@rm>Q;mX<<5;9%g_UVJ4UvW`S8@Hkcje zfH`3cGSd0{@79~OWGVIf!;7J)@!F<2ayfF)rmSQ?grWnnp39#()AVI^1@R)JMv zHCP?ifHh$)SR2-XbzwbNA2xsuVI$ZWHi1oHGuRwPz!tD2jD%6J6>JULz_zd*Y!5rY zjxZX=z)r9;>;hw99PA3a!S1jJ>Db=3+KW4Z~&5foI`4cphGW7vUv%8D4=`;Wc<2-hemZEqEK=fp_6OcppB158)&D z7(Rhd;WPLgzJM>`EBG3|fp6hE_#S?MAK@qX8GeCZ;Wzjl{(wK>FZdh&fq&sYNTosl zAp=>+K^_WFgc6jY0#&F%9U7286I#%Q4s@XheHg$HCV&ZHBA6H^fk|O9m>i~nDPby@ z8m571VLF%|W`G%CCYTv!fmvZTm>uSTIbklC8|Hy|VLq527Jvm|Ay^m|fkk04SR9ss zC1EL88kT`&VL4bHR)7^@C0H3&fmLBOSRK}YHDN7S8`gn!VLezMHh>LbBiI-=flXmE z*c?W{7O*9Zgi){+Yz^DMwy+&+4?Dn)FdD|dPOvlV0%Ktu>GG>2L;|31`9C za1NXc=fU}K0bB?d!NqV1jE76%GPoSBfGgoDxEij3YvDS$9&Uge;U>5lZh>3jHn<(` zfIHzXxEt<)d*ME~A0B`Q;URb!9)U;UF?bxFfG6Q8cp9F8XW=<`9$tVK;U#z(UV&HP zHFzD~fH&bScpKh4F3NntXW9HxLNVJes!rh#c;I+z}2fEi&Xm>Fh)Sz$Jq9p->JVJ?^( z=7D)(KA0aCfCXV8SQr+8MPV^m9F~A3VJTP|mVsqqIanT6fE8gSSQ%D3>*u` z!SQecoCqhu$#4ps3a7#8a0Z+SXTjNU4x9_;!TE3jTnHDz#c&CXhfCoyxE!v4E8!}* z8m@tB;X1e;Zh#x%Cb$`Hfm`7=xE=0*JK-+48}5O7;Xb$@9)JhoA$S-bfk)vncpRR9 zC*di08lHh?;W>C7UVsFL;oQIS;#>i z3Q&X+l%WDus6ibXkihuJxSpM&;@d{Y<;@-6zC%Q8?8*xN_g^78GOEY&mN6YWb%~0N z-M{$1S^rJ?Z){0)M8~N8%VN4jMs6Aec%A_pHrX0$pJj$m6Dx?xn38_R>Vk!xhluAY= zr&3TUsZ>;IDh-wPzimoSWuP)rnW)TE7Ah;1jml2tpmI{VsN7T@Dle6f%1;%b3Q~or z!c-BeC{>IqPL-faQl+TUR2ix)RgNl8RiG+Tm8i;86{;##jjB%7plVXJsM=H=sxDQJ zs!uhb8d8m@##9rkDbPDM~HsFqYD6-Bk8T2pPPwp2T+J=KBgNJUdIR41x4)rE?s z;;61vH>x|;gX&53qIy$(sJ>J`sy{V=8b}SI22(?*q0}&HI5mPANsXdLQ)8&H)HrH9 zHG!H)O`;}KQ>dxbG-^6EgPKXrqGnTbsJYZUYCg4qT1YLT7E?>8cxoxNj9N~upjJ|= zsMXXOYAv;nT2F1DHd33Y&D0iZE47WMV7RI!|4oE>f4M%hVO>Ds_#zPTinxQn#qv)E(+Bb&tAFJ)j;^ zkEqAg6Y44TjCxMJpk7k1sMpjR>Miw-dQW|zK2o2k&(s&{EA@@~PW_;MQopF*)F0|E z_3yvqr)h>}X^!S;ffi|rmT84nX^qxtgC?{|TeM9(v`c%mPX}~JC!iD3iRi?15;`fJ zj80Ccpi|PR=+tx?IxU@!PETi`Gt!yp%ybqyE1iwbPUoO=(z)o|bRIe{osZ5>7oZE$ zh3LX`5xOW{j4n=>pi9!F=+blBmWE7Fzd%5)XFDqW4PPS>Do(zWQ?bRD`b zU5~C$H=rBRjp)X76S^tgjBZXx&@JeebR-=`x1w9qZRoaiJGwpHf$m60(=l`>x-;E{ zj-})1u5>rLJKclsN%x|A(|zc^bU(U3J%Aoa526RtL+GLOFnTyWf*whaqDRwX=&|%T zdOSUWo=8ukC(~2tsq{2@Iz5A)NzbBZ({t##^gMb#y?|awFQOOIOXzrdDZPwdPOqR> z(yQpz^cs3Cy^dZ_Z=g5Qo9NB-7J4hajowc0pm)-{=-u=ldM~|?-cKK(57LL|!}JmQ zD1D4RPM@Gp(x>Rt^cngreU3g)U!X72m*~s%75XZDjlNFbpl{N*=-c!i`YwHszE3}( zAJUKL$Mh5WDgBIoPQRdE(y!>(^c(su{f>T5f1p3opXkr@7y2vxjs8ympnuZ8=->1o z`Y-+OzspZE48t-U!!rUSG7=**3ZpU_qca9W7?ZIWn{gPI@fe>8n2<@pBxDjXiJ2r! zQYIOboJql?WKuDynKVpVCLNQW$-rb}GBKH%EKF7=8GcyoQYssFfEx#CW>jrv}W2cZJBmVd!_@^k%?wvm`+S* zrVA6x#4%l&ZcKNk2h)@3#q?(SFnyVROn+toGmsg?3}%KfLz!XBaApKEk{QK}X2vjM znQ_c`W&$&jnZ!(HrZ7{PY0PwH1~ZeH#mr{rFmsuC%zS16vyfTDEM}H4@yt?Y8MB;O z!K`FfF{_z1%vxq0v!2<&Y-Bbuo0%=lR%RQso!Po>WNtCHnLEr~<{opO zdB8km9x;!ZC(Kjk8S|WZ!MtQ%F|V07%v_m1FJDHurPGzUD)7cs9Om-GKo1MeXW#_T;*#+!Eb`iUnUBbq*OW9@Y za&`r~l3m5FX4kN5*>&uCb_2VS-NbHYx3F8;ZR~b-2fLHq#qMVJuzT5k?0)tDdyqZE z9%hfQN7-ZSarOjzl0C(qX3wx^*>mi9_5yp6y~JK-udr9yYwUIQ278me#olJ`uy@&e z?0xnD`;dLaK4zb=PuXYebM^)Ml6}R#X5X-H*>~)F_5=Ho{ltD|zp!7~Z|ryW2m6!# z#r|ghuz%Tq|An9C7>?yQj^_kUt53VQIi|ftx;rep@xc=M#ZXh>^8_W&ihH}HW;oJyrBsYp1 z&5hy4a^tx1+yrhSH;J3fP2r|;)41u}3~nYji<`~O;pTGlxcS@yZXvgbTg)xt;<=^V zGHyAyf?LV0;#PBOxV79mZauew+sJL=Hgj9Jt=u+lJGX<|$?f8Hb9=bG+&*qUcYr&{ z9pVmiN4TThG442bf;-8b;!bmCxU<|j?mTyayU1PQE^}A7tK2p2I(LJ+$=%{^b9cDA z+&%6-_kerIJ>nj7Pq?StGwwO}f_urm;$CxaxVPLp?mhQ``^bIbK678VuiQ88JNJY8 z$^GJfbAPzM+&_-uX`bO(p5u95;6+~IWnSS`UgLG%;0bT?7H{(o@A4k+^8p|73HXG3 zB0e#ngip#R_DqJ~f|)Ps^v{)AJeljC>|OGoOXe%4g%V^EvpOd@epWpNG%O z=i~G91^9w|A-*tQgfGe$z1nzBFHkFUyzX%kvfZihL!$GGB$S%2(s7^ELRI zd@a5H+4fuw9Bfc@;gm20>_6q-;eLl58wy#gZRPx5Pm2>j33UA;79VK z_|g0rek?zZAJ0$VC-Rf{$@~<4DnE^%&d=ay^0WBa{2YERKaZc!FW?vQi}=O-5Fn@$U${*v8^C$R|{3-r4e}+HHpX1N-7x;_(CH^vhg}=&Q^r%0J_u^Dp?9{44%7|Av3dzvJKYANY^_C;l`4h5yQbS_+Xul+a3OEwmBZ3hjjULI-N6S@lBgziEQp{LMG=q>aS`U?Go{=xuZpfE@nEDRBb3d4lq!U$oc zFiIFLj1k5P>=JehdxX8hK4HIb zKsYEI5)KPTgrmYS;ka-@I4PVGP77y*v%)#yyl_FdC|nXQ3s;1z!ZqQ#a6`B$+!AgJ zcZ9pbJ>kCaKzJxT5*`argr~wY;kocacqzORUJGx8x57K&z3@T!D0~t=3txnVkR-Om_^JgW)riEImDb|E-|;5 zN6ahc6Z4A&#DZcWv9MS~EGiZgi;E@1l42>bv{*(gE0z<>ixtF*VkNP%SVgQVRuij> zHN=`?EwQ#(N31K>6YGl&#D-!cv9Z`hY$`Srn~M=*3$djbDMpE{#MWXPv8~unY%g{Y zJBra_jMz!+EOrrN#W=C6*iGy%_7HoDy~N&PAF;34PwX!a5C@8b#KGbaai};<94?L! zM~b7w(c&0!tT;{_FHR6Aij&02;uLYJI8B@`&Jbscv&7lr9C5BVPn<6<5EqJz#Kqzg zFAMqDed6W5Cy#Es%6akIEZ+$wGpw~IT(o#HNWx41{#EAA8b ziwDGm;vw;{ctkuZ9utp?C&ZKDDe<&;Mm#H?6VHnm#Ear3@v?YDyeeK3uZuUto8m3; zws=RpE8Y|Dix0$y;v?~~_(XgvJ`usk~G{swh>GDoa(Qs!}zn zx>Q4|DbCy~orZh{MEzObUO7o=o(gJCrv`AVk zEs^4-rP4BKxwJxBDXo%LOKYUH(mH9qv_aY^ZIU)iTcoYhHfg)GL)t0rl6Fgbq`lHU zX}@$pIw&2I4ogR*qtY?yxO74~DV>r|OJ}6B(mCn8bV0f(U6L+KSEQ@bHR-x^L%J#5 zl5R_Pq`T5R>Av(pdMG`T9!pQ8r_wX&x%5JMDZP?jOK+sN(mUzB^g;S4eUd&)U!&p$~hH@jhvD`#%DmRmx%Mo%5xuqN_N6D?^)^Z!Ut=vv- zFL#hT%F%L++)3^%cadY|IJv9bP3|uDkbBC#Z^eostMGDY9ckUnnX>iCR3BEDb$o|DmAs5Mop`xQ`4&%)QoB-HM5#U&8lWov#UAO zoN6vLx0*-ItL9Vls|D18Y9Y0-T0||X7E_C>CDf8?DYdj(MlGwBQ_HIr)QV~)wX#}8 zt*Ta2tE)BCnrbbzwpvH6tJYKNs}0nKY9qC=+C*)tHdC9c5o!yyr5dS5sjbx3Y8$n! z+D>h+c2GO2(Q1s^N$sq5QDfCOwX51q?XLDvd#b(E-fADUui8)TuMSWLs)N+Q>JW9P zI!qm|j!;Lcqtwyr7Jl|xU8*iqm#Zt(mFg;WwYo-KtFBYms~gmf>Lzuwx<%cpZd13bJJg-(E_JuM zN8PLLQ}?R})Pw3F^{{$GJ*pm4kELvBEdPTjeUQ@5D zH`JTzE%ml~N4=}wQ}3$})Q9RL^|AUyeX2fFpQ|s_m+C9^wfaVVtG-j;s~^;l>L>NH z`bGV!epA1zKh&S(69jn@QC)Fe&T6iwAMP1g*KXr^Xqw&rNA z=4rkbXrY!sOQuS~;z}Rza(%RnjVJ zRkW&FHLbc z+G_2z_F4z6qZX~jXq~jqS{E%=i_^Ml-L&pn53Q%xOY5!m(fVrrwEo%vZJ;(t8>|h{ zhHAsK;o1moq&7+$t&P#fYU8x=+5~N)Hc6YTP0^-m)3oW@3~i=1OPj6D(dKINwE5Zs zZK1YETdXb7;1`= zK5JjJui7{5yY@r-sr}M^Yk#!A+CPobX`Rtoozr<;&_!L+WnIx#UDI{l(1~v9mTv2g z?&_ZI>wzBX3G{?|B0aI5L{F+G)068d^ptukJ++=jPphZX)9V@ZjCv+Lvz|rIs%O)) z>pAqCdM-V;o=4BC=hO4+1@wY?A-%9(L@%lr(~IjR^pbihy|i9NFRPc+%j*^Nih3oz zvR*~6s#nvi>oxS6dM&-SUPrI1*VF6k4fKY3BfYWSL~p7$)0^uNdJDa!9;rv^t@PG< z8@;XGPH(Sw&^zkUdW_yl@2q#xWA!+_tKLoTuJ_P;>b>;ddLO;7-cRqZ56}ndgY?1r z5PhgVOdqa~&`0W{^wIhleXKrCAFof)C+d^*$@&z1sya+CO`W$_(K2M*o zFVGk2i}c0%5nrq?`YL_3zD8fGuhZA-8}yC(CVjKMMc=A#)3@t8^qu-H zeYd_x->dJ__v;7rgZd%;uzo~8svpyj>nHS+`YHXienvm5pVQCl7xatzCH=B~MZco4?|`YZjl{ziYRzti9AAM}s< zC;hYjMgOXQ)4%IK^q=}K{kQ%{|EvGgDT6i`gEcsVHv~g8Btte7Lp3x*Hw=RqrePVj z;TW#r8NLx1p^?ByXe2Tc8%d0$MlvJ0k-|u6q%u+)X^gZ+IwQT2!N_Q2GBO)kjI2gB zBfF8q$Z6yO+Ml++i5n;42S{ji?l+ns) zZL~4k8tshsMhBy#5pBd6os7;#7bDh)GrAhxjP6Dcqo>i!=xy{d`WpR={>A`fpfSi8 zYz#4m8pDj?#t37iG0GTij4{R<RjOoSRvT-KwZ=MQy|KaAXlybz8(WO6#x`TSvBTJD z>@s#6dyKutK4ZUez&L0eG7cL@jHAXe)*al^Q2+%j$(cZ|EnJ>$Odz<6jpG9DXGjHkvk1SBCzL=uxEBq>Qol9Lo9B}qk6lQbkPNk`I?3?w7TL^6{sBrC~AvXdMn zC&@)}lRP9Z$w%^&0;C`*L<*B4q$nvyijxwgBq>EolQN_%DM!ka3Zx>bL@JXiq$;UK zs*@U|CaFbglRBg>sYmLQ2BaZrL>iMOq$z1env)39g0v)&B#N{mtw|fwmb4@7Ne9xA zM3WfOiF77iNGypXT}e06o%A3*NiWiy^dWsoKhmEJAOp!DGMEe@L&-2QoQxnN$tW_K zj3HymI5M70AQQc|;zQ zC*&!4MxK)wev?1sFZo9(lQtQX zH93a^n6BxWz8RRInZQhFCNdM7Nz9~XGBdfE!c1wV zGEn|aKo47BUN)Ma-gR zF|)W?!YpZ)GE19f%(7-Vv%FcstY}s;E1Ol!s%AB_x>>`lY1T4pn{~{(W<9gM*}!aQ zHZmKVP0XfdGqbrFVYV<^nvrId*~)BfwlUk9?acOO2eYFYZN`|L%+6*PGuDhVyPDn1 z?q(0Or`gNwZT2zyn*GfF<^Xe`ImjGr4l#$C!_4942y>)4${cNuF~^$Y%<<*~bD}xP zoNP`pr<&8u>E;Y`ra8-;ZO$?0n)A&0<^pq}xyW2>E-~ZHrRFkoxw*nzX|6I?n`_Lq z<~nn|xxw6MZZbEUTg_>P0f>t4`uvNq= zY8A7JTP3WLRw=8rRmLi7m9xrQ6|9O@C9ASk#j0vmv#MJ)teRFWtF~3gs%zD=>RS!0 zhE^l1vDL(CYBjT(TMa#TeCO23mux!PXFKs5Q(QZjG=;TBEGd));H7HO?AuO|T|f zldQ?s6l|8_jn*b>v$e(AYHhQ&TRW_s)-G$ewa40P?X&h<2dsnEA?vVp#5!snvyNLQ ztdrI$>$G*oI%}P?&RZ9(i`FIUvUSC}YF)FgTQ{tm)-CI{b;r7E-Lvjn53GmQBkQsC z#CmEyvz}Wote4g+>$Ua9dTYJ2-di86kJcyav-QRLYJIc5TR*Iy)-UU~^~d^a{j(^W zwi%nXIh(fyTeKxxwiR2oHCwk0o7kpp*|zQ2uI<^r9oV6rz)ol3mw01f>y`90%XlJrB+ga?ab~Zb^ox{#)=dyF#dF;G)K0Ci%z%FPPvJ2Zq z?4outySQD#E@_vtOWS4avUWMUyj{VrXjif;+g0qUb~U@YUBj+v*RpHdb?mx!J-fc$ zz;0+avK!k??51`zySW`~b%5H79vD@11?DlpCyQ3X#$Jm|h&UP0&){e8g z+THB#b`QI!-OKK6_p$rh{p|ks0DGW4$R2DDv4`5j?BVtZd!#+e9&L}Y$J*oU@%99J zqCLr;Y)`SL+SBam_6&QbJgGZ`*h5yY@Z%zWu;{Xg{(a z+fVGL_A~pr{lb1}zp`K3Z|t}BJNv!;!TxA}vOn8j?63AW`@8+a{%QZRf7^fTzxF?y za%hKfSch|XM{q<(a%4wwR7Z1k$8d;aI+kNQj^jF><2!*9ItiSFP9i6bJ9B*oQzH;C$p2q$?9ZtvO77PoK7w$x0A=o>*RCtI|ZDAP9dkT zQ^YCi6myC@C7hB@DW|kk#wqKRbILmvoQh5*r?OMUsp?d7syj8DnocdJwo}Kc>(q1V zI}MzMP9vwW)5K}&G;^9e5l#!Ir4#8yIjx-5P8+AK)6Qw{bZ|O4(N2uh$?5ELable~ zr>oP=>F)G!dOE$F-cBE?<{Z@I*Xje&JriyS?VlvmOCq)mCh<> zwX?=q>#TFuI~$yh&L(HGv&Gr!Y;(3dJDi=)E@!v1$Jy)bbM`w2oP*9G=dg3cIqDp9 zjyorulg=sUv~$Ke>zs4WI~Sab&L!uvbH%ypTyw5FH=LW!E$6m#$GPj=bM8A2oQKXM z=dts|dFniKo;xp`m(DBawe!Y#>%4Q`J0F~n&L`)y^Tqk$$!gxS^ZCP3R_a6T3;=q;4`d zxtqdG>85g1yJ_6CZaO!;o59WKW^yyTS=_8{HaEMQ!_DdDa&x6UU!yJg(6ZaKHSTfwd9R&p!5Rotp>HMhE3!>#Goa%;PF+`4W( zx4zrJZRj>~8@o;1rfxI0xf|iOa9g^OZj{@~ZSA&k+q&)C_HGBaqZ{qUxSib2ZWlM! zjdQ!Y-Q4bO54We=%kAyaE za#y=++_mmHcfGs8-RN#|H@jQht?o8=ySu~P>F#oOyL;Td?mlE3d0yLa5X?mhRu`@ntZ zK5`$sPu!>OGxxds!hPw!a$mb|+_&yK_r3eU{pfyjKf7PtukJVZyZgia>HczmyMNrj z?mw6EXpiw&kMnp>@I+7YWKZ!_PxExo@Q7!6mS=m8=X##!dw~~v3A}_}A}_I*#7pWW z^OAchyp&!lFSVD(OY5cc(t8=aj9w-$vzNuo>Sgn?dpW$EUM?@Um&eQN<@54;1-yb@ zA+NAk#4G9*^NM>Vypmoiue4XjE9;f>%6k>Oie4qJvRB2c>Q(cqdo{e8UM;V-SI4XC z)${6m4ZMb4Bd@X7#B1s`^O}1RUJI|K7wJWLt-RJ=8?UX`&TH>=@H%?YUX0ht>+E&$ zV!b%8tJlrz?)C6`dcC~fULUWo*U#(k4e$nfgS^4s5O1hA%p2~F@J4#0ywTnmZ>%@Y z8}Ci+$=(!isyEG>?#=LKdb7OQ-W+ePH_w~zE$|k4i@e3&5-;9c>Miq@dn>$^ z-YRdkx5iuRt@GA<8@!F)CU3L1#oOv_^R|0Cyq(@IZ@0I{+w1M~_In4sgWe(Uuy@2e z>K*frdnde;-YM_2cg8#Go%7Cn7rcw!CGWC##k=ZV^R9b0yqn%F@3wcxyX)Qa?t2fs zhu$OavG>G#>OJ$GdoR3~-Yf65_r`ncz4P9CAH0v=C-1ZO#rx`g^S*mOyr14L@3;5I z`|JJlD4+HjpY=JP_XS_{C13UxU-dO#_YI%;rf>PS@A$6o`Mw|cp`XA{=qK_M`$_zy zelkC~pTbY+r}9(#Y5cT)IzPRi!O!St@-zEc{H%U9Kf9m9&*|s#bNhMxyna4EzhA&F z=oj(}`$hbselfqeU&1fxm-0*dW&E;!IlsJL!LR67@+T`}+O-{{8@epg+hT><{sW`osL;{s@1hKgu8NkMYO) z@V@-{iXggf4RTH zU+J&%SNm)Hwf;JPy}!ZV=x_2j`&<02{x*NRzr)|@@A7y1d;GorK7YS|z(438@(=q* z{Gb&w`V8>9=;2N{BlL8c&ckR`|(WDBwfIf9%)t{``iC&(M*3-SjA zf`UPzpm0zmC>j(CiU%cvl0m7UbWkQJ8doM3J+FPI-J2o?s5g2lm-AU;?cEDM$g zD}t55s$g}nCRiJ+3)Tl4f{nqZU~{k~*cxmLwg)?cox!ePcd#ee8|(}A2M2QCO8|M3(f}@f{VeW;Bs&!xEfpwt_L@Qo58K%c5o-S8{7-- z2M>aW!K2`D@FaK|JPV!&FM^lBtKfC;CU_gX3*HAGf{($c;B)XL_!@i*z6U>opTV!- zckn0p8~h8XkPexU4Y`mHg-{HoP!5$)4Yg1YjgW+9XoYs@gl_1Cei(#dm>^6TCJGaW zNy4OIvM_m=B1{>k3R8z^!n9$!FnyRI%ot`0GlyBitYNk=dzd548RiOehk3%hVZJbb zSRgDI777c8MZ%(Cv9NepA}kq}3QLD&!m?qxuzXk{tQb}bD~DCWs$sRTdRQZ@8P*DG zhjqfbVZE?^*dS~eHVPYuO~R&Ov#@y>5w-|hhLK@Z*eYxtwh7yY?ZWn9hp=N99ma&6 z!p>orFgA<}yN2Du?qQFxXV@$39rg+PhW*0+;ec>pI4B$(4he^b!@}X=h;U>$DjXe- z3CD)x!tvpRaAG(qoE%OGr-swQ>EVoUW;iRH9nJ~ohV#Pt;ev2sxF}p4E(znqrQx!0 zdAK568LkRfhik&M;ks~rxFOsaZVEStTf(j3w*R$v&*73J>B7LBJx5c;Bx6QZR zx5Ky7x68NNx5u~Fx6ilVcffbhcgT0xcf@zpcg%O(cfxnlcglC#cgA$YVlndoXc~D-I59LP%P(f4(6-Gr+QB({SMSAQR14Kcbx>X8hw7pFr~&dv4N)W17&Spn zQ8UyWwLmRVE7TgbL2Xex6oA^J4yYsQggT=xs4MD*x}zSbC+dZIqdurF>WBKH0cao^ zga)G_Xeb(nhNBT^BpQWAqcLbK8i&TC31}jkgeIdYXeye9rlT2XCYpt2qd90Unuq42 z1!y5!gchSEXenBTmZKGDC0d16qcvzPT8Gx74QM0Ugf^osXe-)=wxb|1Cc1@g zqdVv>x`*zg2k0SsgdU?O=qY-Jo}(A&C3=Nkqc`X+dWXJ4-=p{F1Nw+Qq0i_G`ij1x zA5a(^7Kg*(aReL@N5YYD6dV;t!_jdJ923XFv2h$67stc#aRQtWC&G!b4TDh7x%;c@c=v!55j}-5IhtQ!^80iJQ9z>qwyF#7LUW@@dP{( zPr{S&6g(AA!_)B$JQL5tv+*1}7th1<@dCUMFT#uQ61)^I!^`msyb`a%tMMAV7O%tW z@dmsRZ^E1L7Q7X2!`tx=yc6%jyYU{p7w^OS@d11gAHs+65quOM!^iOnd=j6+r|}tl z7N5iC@dbPlU&5F16?_$6!`JZ*d=uZoxA7f(7vID8@dNx2Kf;gk6Z{lE!_V;x{1U&y zukjoF7Qe&a;qUQ#`~iQ&pYUh=1%Jig@DDf)2}{C}@FW6>NFtHQBnpX2qLJt%28l^x zk=P^-iA&;<_#^>IND`66#77Xpgb+#?;Y1Kg6w$;ZhFIcA5|Wf8Bgsh$l9Hq%sYx1= zmZT%;Nd}UUWFnbK7Lt`@BiTs~l9S{jxk(<9m*gY)NdZ!j6e5L55mJ;CBgIJxQj(M+ zrAZl5mXssqNd;1oR3ep06;hQ{Bh^U_Qj^powMiXPm-vx-q&{gt{7FO7h%_cmNK?{` zG$$=cOVWz8CT&Pt(vAd>_M`*pNIH?uqzmavx{>ar2kA+Ak=~>a=}Y>N{$v0dNCuI? zWC$5bhLPc91Q|(2kx8`D6iE zNEVUBWC>YHmXYOT1zAZ}k=0}kSxeTD^<)FtNH&qpWDD6!wvp{*2iZw>k=EwWEzD=rO{|~8iU59v1n`>hsLGxXndN0CZvgIV(O!aVoE5bjB+Zd zq>5_lQ9~_tGzm>glhNcf1x-m)(bO~zO-s|!^fUv_NHfvQGz-m2v(fA{2hB-y(cCl- z%}evq{ImcqNDI-zvCbTJSMw`I(ht8$*=zO|>E~Ja-V!DJbrOW7Yx`M8xtLSRFhOVXS=z6+= zZls&&X1axLrQ7Isx`XbdyXbDZhwi2O=ze;D9;AopVS0ofrN`)TdV-#$r|4;VhMuM8 z=y`g9UZj`kWqO5PrPt_ndV}7ix9Dwphu)?4=zaQtKBSN6WBPkTqhBSrgWjHDk?L z3)YggVy#&l)|Rzn0jxdiz&f%{tTXGvy0UJpJL|!EvR%;o8eyl$mzy`8GY%m+b zhO%L7I2*x6vQca_8^gx3acn%Bz$UUuY%-g|rm|^lI-9{}vRQ04o5SX^d2Bvgz!tJa zY%yEHma=7RIa|S2vQ=y~Tf^3}b!@YjRj33z%H^&>@vH;uCi@j=7p0a1`IeWogvRCXid&AzcckDa%J$uhSu#fB$`^>(uuk0K9fra5= zc{m=PN8k~8Bp#VZ;Zb=s9-YVFF?lQ=o5$gCc|0DUC*TQrBA%H0IO3QSPC4V83og0h zntR-E%ND!EPs7vlbUZ!Jz%%kpJTuS2v+`^_JI}#$@?1PO&%^Wb zd^|rdzzgz1yf81qi}GT;I4{9V@>0AsFT>07a=bjRz$@}fyfUxCtMY2RI;w$ zufywdKVFa5=MA_&Z^#?*#=Hq{%A4`#yajK`Tk+Ps4R6ca@c`bQci;c|YEt58wm&AU>E6;Y0Z_KAex>Bl##knvdaQ`8YnFPv8^zBtDr> z;ZylEKAq3tGx;n&o6q5M`8+kDup*oYFCvJDB9e$KqKK#>nuso9h?pXlh%MrX zxFVj2FA|7^B9TZed;$q9h@gTAE`*Rm2`xNfgcVLC5lKZdkzAw@DMc!gTBH$aMLLmQ zWDpreCXrcW5m`kxkzM2vIYlm!TjUXWMLv;V6c7bPAyHTq5k*BYQCyS|B}FMwT9grG zMLAJkR1g(KB~e*a5miMsQC-v!HAO8^ThtMCg`cP=>Wc=#Uo;erL}SrJG!@N6bJ0Sy z6s<&S(MGft?L>fRFFJ^hqLb(>x`?i#o9Hfjh@PUC=q>t)zM`M#F9wK#VvrathKQkJ zm>4cbh>>EH7%j$#v0|JUFD8hIVv?9FriiIxnwTzTh?!!Rm@VdrxniD}FBXV}Vv$%Z zmWZWdnOH7Xh?QcMSS{9wwPKxEFE)scVw2b`wur4_o7gUPh@E1W*e&*my<(r(FAj)< z;*dBjj) zFCK`8;*oePo`|R7nRqTWm&mQiF>8BIo)F=R{`OU9OQWLz0f#+M0XLYYV=mOhCjmPAs?B$q-;sic;k zG}1~ZlgOkpnM^KI$dodbOfA#Mv@)GcFEhxDGLy_Kv&gJ6o6IhA$ec2l%q{cCyfUB6 zFAKcZsE8_(imaljs4AL@u41T|Dwc|^;;6VP zo{Fy$sDvt!O00YeDXfU1iYcyyl1eG9JY|$sP9;%ERWg-arBEqVDwSHLQE62=m0o2~ z8C52gS!GdKRW_Afp@fNHNgsE(?W>a4n`uBw~ru6n4Rs+a1m`l!CDpX#p$sDWyb8mxw>p=y{K zu12VlYLptS#;CDsoEontsEKNlnyjX%scM>-~sD)~gTCA3+ zrD~a4u2!g(YL!~8)~K~=om#IpsEulq+N`#yt!kUvu6C%MYM0ur_NcvTpW3eusDtW| zI;@VUqw1JCu1=_v>XbUI&Zx8MoI0;AsEg{7x~#6KtLmD%u5PHC>Xy2#?x?%!p1Q9d zsE6v2daRzPr|Ow{u3o5@>Xmw}-l(_go%&9FuimQ<>ZAIkKC3V4tNNw_=Qq<~bvPYf zN6-;wTxlW-|>Qp+lPNUQ6bUMAxpfl=BIRdXv&ZG0{d^*1_ zpbP3ky09*yi|S&!xGtef>QcJ2E~Crpa=N^(peyQ1y0WgKtLkdHx~`#X>RP(CuA}Q} zKV47P*A2A4Zm1jS#=41as+;NNx`l43Tj|!ijc%*k=>XkcchDVmC*4_h(Oq>n-Cg(4 zJ#{bLTldj@bwAx-56}bkAU#+Q(L?nxJzS5_BlRdfT946V^*B9VPtX(fBt2PA(Npy_ zJzdYxGxaPzThGyR^*lXaFVGA1BE48I(M$C*ydC0>a@uwN%F~|b8P9soOX4NCO|O<$+pFW%_58egUVX2D=kGQ28hMSq zCSFsonb+KF;kEQyd9A%RUR$r77vQz`I(QwuPF`oPi`Uib=5_aacs;#dUT?3D*VpUk z_4fvN1HD1sU~h;w)Enjv_eOXly;0t1Z;UtA8|RJpCU_IQN#0~{iZ|7p=1uozcr(3O z-fVAZK6oF!Pu^$mi}%(0<^|4gV8WVkCcKGYBAQ4hvWa4%nrJ4viD6=zSSGfK zW8#{4Cca5v5}HINvGEyXupx#TX1EbX8fCQcj4{?Ylf)!7$xL#S!lX2*Olp(Hq&4YG zdXvFqG?`3hlf`5;*-Unm!{ju%Om36M0x@BUZ%I{WBQtYroS0r2AV-;uo+^8 znqg+R8DU16QD(FmW5$|sX1tkTCYniRvYBG0nrUXbnPFy{S!TAGW9FK9X1-Zq7Mew7 zu~}l4nq_9WSz%V1Rc5tWW7e8=X1&>9HkwUlv)N*{nr&vg*VTX1_UL z4w^&eusLFmnq%g;IblwkQ|7ceW6qj$=DfLJE}Bc`vbkcenrr5|xnXXaTjsX8WA2)J z=DvAg9-2qyv3X*inrG&@d0}3fSLU^OW8Ru~<~#Gfd2c?LkLHv4Y`&PU=9~G!gt1|5 zI2+zZun}z}8`(y&QEfCE-NvvnZ7dtx#<6j2JR9F8unBDW zW9wQ!ThG?F4XnRyXdBtawux7kD+19p=ZEM@v0NdVnupMnD+u3%pU2Qkp z-S)6OZ7Wp#?I=6ijuoLYhJK0XL zQ|&Z6-OjKx?JPUn&ardtJUibmunX-XyVx$VOYJhd+^(=I?JB$4uCZ(FI=kL(up8|r zyV-8BTkSTx-R`hE?Jm39?y-CAKD*x@um|lSd)OYaN9{3t+@7!}?J0ZOp0Q``IeXq- zuovwmd)Z#GSM4=>-QKV_?JaxT-m!P>J$v6iun+Ac``A9QPwg}N+`h0c?JN7*zOirZ zJNupe-oCdV>__{_ezsriSNqNWV8ghuE}RSRBDjbyl8fx3xTr3gi|%5$m@bx!?c%t& zE}o0;61apekxT4+4ms?IqmDW5gp*D=?L23kb+X8Ep01bc?fSUBuAl4g2DpK4kQ?lVxS?*C z8}3H9k#3Y5?Z&vVZk!wMCb)@ilAG+NxT$WMo9po?e4g{?w-5v z9=M0@k$dc(xTo%!d+uJim+qB&?cTVz?w$M2eed485ALJ;?A*3c`jY%c`X|>X%Y69mz!JIvVmXQ zwtk&Mu1dGGfM0-rqgHL3)c0%t%l9p+AM#4)po`e0%RF!iJZ}rL?8wU zNI?d2P=FHr)!f5@7ugHB{c28vz{~E1+~8V)ftTP5xg`UWgDJq2U@9;*mrUTQ1 z8NiHSCNMLY1q^}zaI1JECA2sQ#6gH6Du zU^B2e*aB<`wgOv&ZNRo*J1_uj4|V`Mf}Oz5U>C3}*bVFs_5gc=y};gJAFwaj59|*P z00)ADz`@`Ua40wo91e~EM}nik(cl;1qBwI1QW*&H!hEv%uNl z9B?i;51bD!02hLbz{TJaa4EP9Tn?@PSAwg+)!-U%Ew~O`4{iWAf}6n2;1+NzxDDJ6 z?f`d!yTIMx9&j(X58MwP01twPz{B7X@F;i;JPw`!PlBhw)8HBKEO-t)4_*K-f|tO{ z;1%#Hcn!P`-T-fcx4_%r9q=xA54;aP03U*nz{lVd@G1BVd=9<K_znC447?(3@GUGD4h#=Q03(8tz{p?}Fe(@gj1I;CV}h~3 z*kBwmE*KAt4<-N;f{DPypbtbK1_?+(269k<5>%iDJYzjm<~)2W&ksSnZV3o7BDNA4a^Sa0CR%5z}#RSFfW)7%nud-3xb8f!e9}wC|C?E z4we8*f~COHU>UG1SPm=?Rsbu4mB7ki6|gE;4Xh5<0BeG^z}jFPurBBa)&uK<4M2ae zA=n6P3^oCqg3Z9@U<j!8Bl6Fddj4%m8KtGl7}GEMQhJ8<-u;0p(O7D}j~4DqvNx8dx2y0oDX-fwjRpU|rA; ztOwQy8-V^`L$DFp7;FMI1)G7*!4_akuoc)EYy-9h+kpXKd$0r85$ptZ2D^Y=!ERu8 zum{)^>;?7)`+$AHeqeua05}jF1P%s=fJ4Dy;BasRI1(HMjt0koW5IFYcyIzZ5u5~0 z2B&~i!D--ha0WONoCVGX=YVs;dEk6-0k{xc1TF@bfJ?z;;Bs&UxDs3it_IhDYr%Ek zdT;}{5!?iB2DgA)!ENAna0j>(+y(9i_kerBec*oZ0C*5Q1Re&DfJeb&;BoK-coIAX zo(9i=XTfvedGG>w5xfLm2Cslu!E4}k@CJAjyanC{?|^r~d*FTW0r(Jn1U?3zfKS0^ z;B)W=_!4{tz6RfbZ^3uqci{Knd+-DJ5&Q&x2ETw`!EfLXVBl;tA=`g292g#q07e8O zfsw%|U{o*~7#)lO#sp)5vB5ZCTreIOA4~uy1QUUYK_7@f3=)un4CJ5yC8$6RdY}O< z=)fdkQZN~q983YG1XF>j!8Bl6Fddj4%m8KtGl7}GEMQhJ8<-u;0p(O7D}j~4DqvNx8dx2y0oDX- zfwjRpU|rA;tOwQy8-V^`L$DFp7;FMI1)G7*!4_akuoc)EYy-9h+kpXKd$0r85$ptZ z2D^Y=!ERu8um{)^>;?7)`+$AHeqeua05}jF1P%s=fJ4Dy;BasRI1(HMjt0koW5IFY zcyIzZ5u5~02B&~i!D--ha0WONoCVGX=YVs;dEk6-0k{xc1TF@bfJ?z;;Bs&UxDs3i zt_IhDYr%EkdT;}{5!?iB2DgA)!ENAna0j>(+y(9i_kerBec*oZ0C*5Q1Re&DfJeb& z;BoK-coIAXo(9i=XTfvedGG>w5xfLm2Cslu!E4}k@CJAjyanC{?|^r~d*FTW0r(L7 z>5%=uWU{#Mk3x>jM-DnJ5;jb@Fya3@S=@goiwhemY~=q=7Wd!D;{H2X+SKfA$f|Je;^Hwsz#f>!mK`qvK#Iel>c z-=3RIXEr>*t|^Goi2l`uRM+PX6u2{QO+bF|+f{&i|K6IrP)*0{mNto!2g) zRqKdB*}1f>U&}`Rb%G~0hS`tH2Q3M%E%O7*qisN#kmZXD22N{SI4H}5KDRtL!|Sy0 zYw6d>zwP`MO*;8E2rY$ip`{S^m!%LkxD-N9I{!xM9L%Pe0;jkc0Q?)CmKasAd@^O zaWrAvKS^_F75{5F`->iJX_?^e?YC1peqO^?tAxaAj3IHjyqAx$B_zz)7`OK8q3Lz{EO>c8Dg4hD8Ya#f2H{^#TUnScBPLc6fP z)}Gh?>IW1kRPnE*eAm!A=zaM3VgAHFPWj(!Ex%5ArHTdqO3K50-8KJfV9Ni*->dEg z{&TAPoEA+&>-d;I>GQ+D?XT65ztAuJv_%bBQ)s^v+OLH6Q*rYCNlO1-ZT{Qa{m?2H zx*?AM8TF6F|99fPz&7SLe$3o@er@NrYUv;HAfRKbpa)$-mvosMx@3Mp$bJ#-7E}7C|rkJgoe~@G&^F3#~>!Ztwmr zLHu8+PCo_me_WkD^!(%3LpOULdj4w)G^jXs1%~kdhCqH<=*H{k+pf@d{oidD{2woP zZ}8V!fPc@n>p#B6EDnx!>ijHT{qOEy%O7`B|G(e;`<*^Kbc+|drTc3k!9TwnI3;M8 z?>Bb?L;tZ9ChR|sH^uEY<=Bb9zpnjqylHOWR~^m!yPM|mp?i_D|ATf$0;99u^V<41 zNN2305m@3*oJ4_Wc z5E%OLpqS@32I4}i-LC_!h)}ALz`)_o;9C?hD){qi3K3A?IAG?CzYzfi27?X!JI!nV zzm=#3L2C*mfN{|WRSDUR=yGB%iB*+Q-5M)&Tyka5*jVuDem+39qGIUM-$-ys znJ;MV-?mAs1_R-*_rY0PDl}^&KoyJ#Mgk*)QNXBRG%z|C1B?lVWbNXRRn{m4`=Cq9 z1}EV+lDI77oe-G^$KAh|n}Hb;n1#PHbQ&jb@XKLBhiXEHenN+KB)qQkhNSS9VfV7& z(c3?0H$RQ?_YbV4K|S=6p9VL6acm&&yLI1Q9+SWBt6Ta7^bYDc<_11k(6UwA7Jkit zqo02_Z|0YDzwYOQzy7B;2M!B{9EFJc+wZQ|Dxh(20`s+N;+Ljkt7e^B1%JcrQa^rf zVA4Ws>;5W*f%{Qy1251JeE0&Y%1?(9LTbxTyR)JDqak_zj|Ue1v0qwHA^7VzYFKj0VJ^?$=JnA0HOx04?<4E9gbQ%25+UT5scE6Lc|9&v^i%Ru>Exh=7nFo%71a$eGj1255q~En{i37u+A4krA_*QRS g(DTxx-iHeegXDBItlR|%d;WBg{ujCSuj#7)ANK+`SO5S3 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.2.pickle b/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.2.pickle new file mode 100644 index 0000000000000000000000000000000000000000..9777319465de6a012f3d071e436bc07600ee66ae GIT binary patch literal 127923 zcmb^42bfgFx+v@w$T{bnbIv&^l`x=^R1_5xD2gIcQ52CN5=0Rt znRCQ^d*F4Sz4zVs-2XiPt$FCTS9kSVRkdo>S9I4*&q}j=^YAc8O*2v|Bu68Mrp!o5 zPBSiPaIKW&0%Q7(8rW}qwE?5YB~=?bYGBfor191I_Zy#-lANi}sHDk#I;1|9vMeRJ z;D5?Fa^i>yLlarMr22Q()T5*u(XW5fi18`C68H0tnmBUI)XRss^n!ll#`T+;lANW_ zxTFE2M~$B_ZsLFmsfQ@^pAIo-+~{dZqf(Nyw!3tvYnzsrUcD@(Q!qEBb=W^;eoFV0 z3Z24E8Rn)Wr@eHjgaao`9g~#0&uD1PM9xm()cI6b9n*RCD^N~B9Soq`D|{TH;( z5CmZuMRA;x*c_65NUqeq56SaS`yNs>HCK4~CpA}6a>nta$4wYLZeY^5l&*;yCrwi0 zA?b#sNlyFE&5>qQzmbV1O*?4lxbYJbjX$v8gnlVQvj3yJ=@Td6pQ`m=2TqeHMWN)V zTZ4bD#E@eDT7~ARRY+~=xPQNKDXF*961Rg!k51f+W%VFC~?e2m-jb%RHD@Jgvq1-YadC8LMJM7>C_IXKBR5Zr9F>N z&MUFOl?E^O5bPv#Qv8a z&2sr5N|Kzm&!v`5&ik)DYeLevQT-CVGdWAE5u*nT@6vBnzrl%tAQj0NZ$drHF|KCnorpw)=%RhIMG|6F;fA8ywJ+zKSjZaBjE{UHBDf3c0 z@`Bb^2En0X`&#~(GYE=p>a{r4kuASFo$B@@^Q!)oF!R8|(^I{e|K#AG6Sm(la!aZ! z8bxU@CcLnJMUPbL^s2n>V$RgvCPx#dC9aD!gT^I|P3d~M|1In~WKnWHjWc0<(zv0C z;b!!pLF1DqBw8hNtBK=>j!GIozD;6y_(yk0Gko&E@u{6E-Q|oa$r=7`$wV(mZP0;9 zBNCTW>WGl~lX}f2hocg8OwKrJ^th4zMhu;nl(JyrbHOQn@*Yj_lr2ZT+pF){$=3!j|EK=CrXrh#{TnOwk&9!CcPY~Z96aV zlbUtizm6KIEwf=k>;EUU9vCJB-c3Epsp(P!C(L+h>r&zW(I5Z+Ry9*%pQ(-UkE&j}9{&zY|8clYZO7E% zcF4#?MESQk_TRr?)c^DaWB#WvNDXqvmDB;MMzxxCs@1#{#M1RojVza}b|yYVSQ)=x~la9G{MSsd0N zag)dpUCuJBabk1G-hX>K?v&=ApFZ_(Pe+~Ne}4MAiAl+6`XwfNDMJQLOiW4oCujXf z)Ez!FF)>I^d#M5`6Z?-(jt3;FnOcjqLq|=hSuZ6y|38Pf#FI&bM~@pipx=nZ5Z?Nq zw*E1MXYTWNrqlzbOD_D+uTLE9(yTKvbxXv90g1t_!>9@M>ij*pC5}6!+J8FFT|S8u zJ>#bZty&bX>hr1nG&Nh8V zP1%h97F2_l9&g3gY{Rx}$M(E}9oUhb*qL3}mEHLFpnH|=?(D&72n^ zIFqwDn{#+8Z{u9f<9y!E1zgBQOy(WDlXr13mvAYUF@?*yf_L*C-piF-#noKHwY-n_ z^8v2ogM5hV`7j^hqujv9_&A^7lYELBxrtBn8E)pYe2!cAJYV2ezQ}Fd&K=yzUEIx= z_%iqK6~4;7e2uSjAK&19zR3eT$U{8Lw|Ina^C*w;I8X2$p5(iHkEi%PPxAwQ$TK|4 zb3D%r{D>d(6Mo9i_&LAem;8!f^BaE4@Ay4`;E()?Kl37g;jjFSzw;0N$-nq_;19#g z4@8WahH06O>6w8UnTeU1g;|-6*_nemnTxrZhk2Qg`B{JkS%`&Mghg45#aV(SS&F4u zhGkifSxr9r(j452s6}+4G@LsOuDz4@luH}8apAT>yALK(^ z&xiR4ALRx<#>e>tpX5{A$W45j&u}xJ<#XJ^=lKG+@ZuuH-7N<{GZ$eY~F!a2+4yLtM{?`3N8720q5e`2?TjQ{2c+ ze45X2GoR&i+`{Mi0=M!-ZsT_D;7;!1Zob5qxreXtRqo|$e4YFF2KVz#9^gS9;$gnU zBYc}jd5p(-g75Go-{pHe#rJudAMit-;aQ&Jd0ya0{FtBcQ+~$J`31k^SNxja@LPVz z@A(6NM_d`aEdFeCGM3%I+(=k0WFe5WDGqW%&voSk! zFeh^{H}fzr^D#dQupkSuFpID#i?KLMup~>dG|R9o%dtEwup%q5GOMsEtFbz3uqJD< zHtVo1>#;r?upt|=M#LAPjMqR@o7H8&3u;6aSNa43*5>V zxsBVogFCs4yZI7d<{rMnSGkw3@pbOw8{E$~d4LCbh==(WkMM0C z6yN7*e!vfThG%(>=Xrr2@nfd0$+)ycD>+@C0iy?|eoX)Gi_9*6-kw;DIJDOPbxl#L z!|7AwaTr8FoLFy?HnCnOePUfn#voJT6ZI@X)*xGuJ;)K{3~~jzgFHdrAYYI_C=e73 z3I&COB0AmCPCAnS8g8spPU|^6G35h7CI?f3sll}1=3sg-Be*4)8O#c12Xlg3gWH0+!MtF8 zaC@*ISQsn{l7l;fJA=D|#lezbX|OCv36=*dg1duzf_sCN!Kz?&uqId=+!x#*JP@o4 z9t<7|)&~yEM}QbMS2NT(Bj0K6oM68oU^63$_P4 zf}O#xV0Z9R@N%#xcqMo>*c-eSydLZe-U#*wZw3c~gTbNTaPU@eBzQYG8XOCb2PcAe zf|J3!!F$1};Qioq@Imlla3(k#oD0qe7lMz1kAqKwPlL~b&x0?5FN3dwuY+%bZ-eiG z?}HzLAA_HQpM#6RFTtQj!!lvnuv}O^tPoZV zD}|NADq+>IT39`-5!MWAg|)*vVcoD^SU+qKHVhkujl(8k)390CJZurR3|ob*!!}{t zuwB?bydvxnc1$d@=^ShPNI+VHyY`tXME#;|92Q`jr)9rg+P zhW*0+;ec>pm=q2Q2Zuw#q2aJ_csL>)8IB4^hhxIA;ka;oI3b)EP6{W7Q^Kj?wD9I| zdN?DzC7c<~3TKCN!dt`J!nxtRaDI4uxFB2@E((*wJHk7|yTZlcl5lCbEKCWPhbzLn z!+XMe!10i7;X!uid^Ow~z81b7?hD@t_lIwW z2f~Blq403{R(K?QJ3Ja53y+5Tb@O1b=_+fY^JR6=1&xaSnkHU|` zPr^^b&%)2cFTyXwufngxZ^CcG@51lHAHpBQpTeKRi{UTfuiujJ*pAajA})-qdHODs9sb*Y7jMy z8bytxCQ;LJW8|Iz^qME>YL0TXbb~Rn$G|5nUZ! z6I~l!7hNCS5ZxH{jBbj0MZKdwQQxRv)IS;!4UCeaLDAr7NHjDW77dR^L?fe7(dcMQ zG&UL+jgKZo6QfDd zb96EKCHgh`E&4tBBl%7H5xh#5v-_-;kZa#G%gkwk4wZQ<5F?yxJ+C&E*F=NE5sG!N^#}5N?bLr z7FUmJ#5LnuaqYNHTsN*4*N+>-4dX^}Jh%#W%;(;~DWS@yvKu zJUgBf-x}W*&yDBB^W)p&1@XdoQJfs#5#Jf#6)%pL#7pC4aZ0>AUJ>6N-xJ>(uZ&m4 ztK&8C+W5Zs{`i4-UHoADP`o~VIDRC4G~N(D7C#<85kDC}6>p3;#ZSl2#GB)1Nu@z(gocw4+Z-VyJNcg4Hom*SV>J@G5?tMT6WwfOaTU;IYAKYlYl5Fd;W#fRg! z;v@0f@zMBLd^|o8zZ0K~-;LjkPsQ)Yr{fRe592fO+4x+1KE4ot6n}i_S0j=$^ttp4 z4@m=4S5zmbNh}C#kTR@m;$8U5i}#0JmAL!gFP~3cC~WzBzr;fA{)vByMSo#p$zhoI zWrJ`~;@{x^T1r2xcj^xReJ;`|btj2m$VeZAQJf}iy42~*u%U^Zsf9{irC7Vxe<;5a8`pvHtFws|MoV+|9A~;Ci~0GEX>Mm%+4Il$z06MJj}~{%+CTW z$U-d4A}q>cEY1=v$xCvh^Ta4M(qW=`h}-olxj z#o3(0TX`GjavtaNb}ryTE@Cq8;GMjSi@AhLxr`}X&K10y_wZh>kJTYksy`2&CCPyCq|`3ryLZ~UEq@K655#9J4aQpkug z(=aX5F+DRdBQr5GvoI^OF*|cGCv!13^Dr;-F+U5iAPccDi?Aq*u{cYxBulY0%djlV zu{##2Cu|EHv!Zozrh>h8VP1%gi*@7+Eimlm(ZP||P zc?CPLBRjD(yD)VIotnDwN~TVyQ&V^LVCqaZHC@ANc^$9k4ZM*(c@ujvb(Wo)`miti zu|EfJAd@(VgE@q$bMn+QjKevCBRPtrIfi37j^jCj6FG^KIfYX>jW=^TXYdxz)0@8kV^fa~}m zAL4pG%t!brH}Ek&&L{XJpW;St;?sPFoB1rC;}$;87r2!#avQgE2X}H8ck?B_%sqUC zuW~P6Y}ipYk()&M){Szv9>YhTrl#e$OBHBY)!0yvSepD}Uqf{DXh;FD5>?xRgRhjG2aM znU3k1ff<>JnVE%InT^?*gE^UtxtWJ~nUDEdfCX8Ig;|6}S&YS5f+bmsrCEk$S&rpd zffZSam05*VS&h|MgEd);wONOCS&#MEfDPG*joE}v*^JHEf-TvKt=Wcc*^cdb1v{`K zJFzpnuq(UqN?yh8?7^#f4X@>Oyq-7kM)u@Q?8V;f!@lgt{v5!8OyVF8<`53$Fb?Mk zj^rqg<`|CUIF9E8PUIv`<`holG~UeVoWWZ-le0LRb9gIn<6O?;eBRClT*yUC<{i9~ zcX2V7a4DBDh0D2uck>?J%avTk)m+21ypQ+u0j}eNe2DA$FdyNg+`z~9IG^B?e2N>n ziBIzxZsxOmj$8OVU*J~0$Zg!t9o)%X+|8HxGWYNmzRJCPjjwYb-{5|}$pbvdLp;p4 zc!Y2BD39?tPw*X{_e5-iD5EX^`3%W^Ew3arRVtjsE`%4)368m!4$tj#*C z%X+NO25iViY|JKX%4TfN7Hr8@Y|S=o%XVzfE7*Y@*@>Omgyo-yugiE=MDO}DKyqov%UasUS zuI3u9<$b)L4{#kHggm$`?p@Kx^RYkZyi_y+g$O&;Ju9^zrX#Up&1M|q6Ld4lioB;VzGJjM5U znji2(p5a-Z<9S}-NBo$d@Kb)q&-n$v8sP;1APWo(DvXnTBbZj_H|!8JUThnT1)IjoF!lIhl*OnTL6qkNH`E1zCuNS%gJd zjKx`kC0UB4S%zg9=w{@@LFES>v;ojWKZ72UhK_2 z?8|=a&jB3BBo5+W4&hJ^<8Y4PNRHxYj^S92<9JTsL{8#lPT^Ee1 z_%T1>r~Hhc^9z2-ulO~;;kW#b-}49l$e;K#FY*`u%HQ}q|KOkei+}%d%rLF^&zNbL zmg$(D8JLlon3-9amD!k`Ihd2Vn45W+m-(2V1z3=USeQjvl*L$_C0LTBSej*6mgQKU z64&!i+ z;7E?*XpZ4nj^lVv;6zU1WKQ8!PUFp-&KbOgGdYX1Ifu9MHqPZd&gbo1z=d4IWZuC$ zc^4OR372viQ@ETfcsK9iy^963@i`>TT+`*mP#oc^~FLMuH;j7%s*Z4a3@eS_hn>@gSJjBC% zi%0l2kMbCg^90}FNxsYXc#7}yG(X^nJj1g*$Md|vkN7b^;ivqJpYscT$*=e|zu~w1 zj^FbK{>Y#BGcWQN{>tC@JOALH{EL_Vz<46`hv~$B#!SPsOvm)hz>Lhq%*?{9%*O1@ z!JN#++|0wg%*XsJz=ABq!Ysm~EXLw2!ICV+(k#QWEXVS!z>2KI%B;ewtj6lB!J4ea z+N{I6tjGFnz=mwZ#%#i-Y{uqn!Io^r)@;MJY{&Mzf*shAo!FUO*p=OQC9h(4_TbgL zhS%~sUe6nNBYW~D_F`}LVPE!Re-7Y4CUFo4a|nlW7>9ENM{*QLa}39F9LIA4Cvp-e za|)+&8gJ%w&fqPa$yuDuIlPs(aW3a^K5yp&F61I6^A6t0ySSK3xRlG7!sT4SyLk`q zHSN_J| z`3L{xU;O*aFv9fWKVx3{MwG-;X>F%tdS+loW@2V$VOC~icIIGC=3;K&1-lqujBQ+fj6=z zZ(=X@W*_!tKlbMU4rCGsaWIE)D2H)4M{p!ZaWuzpEXQ#?CvYMsaWbcHDyQ*gPUj5X z!kL`K*_^{$c^l_)9_RCRF5p5gVlwaGoxF>Sxr9r(j452s6}+4G@LsOuDz4@luH}8a zpAT>yALK(^&xiR4ALRx<#>e>tpX5{A$W45j&u}xJ<#XJ^=lKG+@?WG&Wa9oA(%)@K7YWFt0a6E;{FdMGd;Y*5`4fNUMgGEH`5S-dAN-Sl@$WCc3^R)VjG2aMnU3k1ff<>J znVE%InT^?*gE^UtxtWJ~nUDEdfCX8Ig;|6}S&YS5f+bmsrCEk$S&rpdffZSam05*V zS&h|MgEd);wONOCS&#MEfDPG*joE}v*^JHEf-TvKt=Wcc*^cdb1v{`KJFzpnuq(Uq zN?yh8?7^#f4X@>Oyq-7kM)u@Q?8V;f!@lgt{v5!8OyVF8<`53$Fb?Mkj^rqg<`|CU zIF9E8PUIv`<`holG~UeVoWWZ-le0LRb9gIn<6O?;eBRClT*yUC<{i9~cX2V7a4DBD zh0D2uck>?J%avTk)m+21ypQ+u0j}eNe2DA$FdyNg+`z~9IG^B?e2N>niBIzxZsxOm zj$8OVU*J~0$Zg!t9o)%X+|8HxGWYNmzRJCPjjwYb-{5|}$pbvdLp;p4c!Y2BD39?t zPw*X{bQGcY4FF*CC;E3+{>b1)}!F*oxtFY_@! z3$P#yu`r9UD2uT;ORywMu{6uDEX%PxE3hIfu`;W$Dyy+NYp^D3u{P_lF6*&A8?Yf8 zu`!#lDVwo5Td*Ztu{GPUE!(j@uV4puWG8lJ7j|VgUdgN2ojrIpui>@4j@R=B-pHQ3 ziM`mHeb|@%*q;M9kVzcG!5qS&9LC`s!I2!r(Hz6E9LMpTz=@p1$(+KeoW`3uoilg~ zXL1&2a}ICiZJf(_oX^|2fD5^Z$-IMi@-8ms5-#O3rf@k|@NVA2d%2RUxSDIYmiO^~ zKEQQ+kPmS^ALb)`lpFXMALkQ%l236XH}Ppc!_9n_&v6T%=L_7*7rBkwxq~~oi@W&} zU*;aZ!dJPMukm&6;~U)1H+g^ud5DMk7LV|49_29}=Lx>UlYE!&@f6?ZX@0;Dd4^|s zj^}xSAMs;;!cX}bKj#&1-lqujBQ+fj6=zZ(=X@W*_!tKlbMU4rCGsaWIE) zD2H)4M{p!ZaWuzpEXQ#?CvYMsaWbcHDyQ*gPUj5X!kL`K*_^{$c^l_)9_RCRF5p5g zVlwaGoxF>Sxr9r(j452s6}+4G@LsOuDz4@luH}8apAT>yALK(^&xiR4ALRx<#>e>t zpX5{A$W45j&u}xJ<#XJ^=lKG+@$3qHvJo4z37fJRo3jO5vK3pi4coFE+w%%`U`KXhXLey%cH@=2irv|R zSMwTP%jfCHJtK^)8>9LixF&Ji5RQ5?-N9LsSW&k3B! zNu10noXTmunbSFgw{RwBaW?1hR^GI<=2mj<>OngD{<@nEtG1D+D z(=k0WFe5WDGqW%&voSk!Feh^{H}fzr^D#dQupkSuFpID#i?KLMup~>dG|R9o%dtEw zup%q5GOMsEtFbz3uqJD#;r?upt|=M#LAPjMqR z@o7H8&3u;6aSNa43*5>VxsBVogFCs4yZI7d<{rMnSGkw3@pbOw8{E$~d4LCbh==(W zkMM0C6yN7*e!vfThG%(>=Xrr2@ne3%Px%=?=NJ5vU-4^x!*BT= zzvmD9kw5WgUgR(QmA~a4+1Y{k}W!?tY4_Pl}}*pZ#snO)eG-FPLhVt4l7)x3t+@;YA6 z8+apo@+S6TZ}wqd_G5nz;6NsE5C?MzhjJK)a|B0n6i0Im$8sFUa{?!F5+`#Cr*ax^ z=5)^BEu6_&oXt7BmA7#&=W#x7=K?O|A|~?=-pRYTm`k{n%b3FDT*13}5AWqluHtI0 z;ac9u`}qLZ@j*Vs^?aC*@KJ8yV|<)X@JT+!joiei`3yJnSw6=te4a0GD_`U`Zs!i} z!OMIDo_zGX;UcScHxsPvfKi}j59^@e&=36|%w|SJuc$_Er4o~u3zQ%qg78X}p=!IfJ)wCTDRr=kQkE#<`rw`MjMA zxR8sO%sY4|@8V)E;ZiPR3YT*Q@8&(cmn*r7tGR}2c^~iR16;=k`4HFhVLrk~xq*-I zaX!H(`4l&D6QAZY+{|bB9Jla!zQC<~k=wYPJGhg(xSKEWW$xiCe3g6o8eiu=zQO%` zlLvT^hj^H8@d)4MQ6A%Qp5Qw?$#?l4Pw{=8<_G+cXLy$9c%B#d5kKZ9{FI;ZbAG`u z`4zwBH~g00@q7NjANdo1=0*O(U-=t<=O6r&e=*1@{xf3CG)&8MOwSC=$V|-4EX>Mm z%+4Il$z06MJj}~{%+CTW$U-d4A}q>cEY1=v$x zCvh^Ta4M(qW=`h}-olxj#o3(0TX`GjavtaNb}ryTE@Cq8;GMjSi@AhLxr`}X&K10y z_wZh>kJTYksy`2&CCPyCq|`3ryL zZ~UEq@K655AeZ>hh%wVJEz>bQGcY4FF*CC;E3+{>b1)}!F*oxtFY_@!3$P#yu`r9U zD2uT;ORywMu{6uDEX%PxE3hIfu`;W$Dyy+NYp^D3u{P_lF6*&A8?Yf8u`!#lDVwo5 zTd*Ztu{GPUE!(j@uV4puWG8lJ7j|VgUdgN2ojrIpui>@4j@R=B-pHQ3iM`mHeb|@% z*q;M9kVzcG!5qS&9LC`s!I2!r(Hz6E9LMpTz=@p1$(+KeoW`3uoilg~XL1&2a}ICi zZJf(_oX^|2fD5^Z$-IMi@-8ms5-#O3rf@k|@NVA2d%2RUxSDIYmiO^~KEQQ+kPmS^ zALb)`lpFXMALkQ%l236XH}Ppc!_9n_&v6T%=L_7*7rBkwxq~~oi@W&}U*;aZ!dJPM zukm&6;~U)1H+g^ud5DMk7LV|49_29}=Lx>UlYE!&@f6?ZX@0;Dd4^|sj^}xSAMs;; z!cX}bKj#I<=2mj<>4DyKoj2JTw(=r{?GXpa+6EialvoagAGY4}r7jrWY^D-av zvj7XS5DT*ii?SGtvjj`B6ic%V%d#BHvjQu!5-YO`tFjuavj%Ij7HhK(>#`o}vjH2j z5gW4!o3a_3vjtnS6v%nH;En9b zo7juJ*@u1EkNr7-1DV7@9Lymc%3&PN5gf@;9L+Ht%W)jf37p7DoXjbl%4xir(>a5; za3*JQHs|nG-p09{$N9XS3%HPrn9MtPC-357F5yxxV+xmZ1@GoPyq7DvimSPXYk42< z=L1~F2l)`!^I<;1N4bHI@o_%EC;1dNauc8CGu+H)`5d?KdA`7{e39F@ojbUbySSS# z@n!DeD}0rE`5Is6KEA>Ie3J)wkcW7fZ}AA<=20Hwah~8iJjr+Y9#8Rop5_PqkY{+7 z=XjnM_z^$mC;XJ3@pFE`FZmU}<~RJ7-|>6?z#sV&f96I0!e99tf9D_klYcSDEB-TL z%rs2PbWG0-%*ag4%q+~xY|PFa%*kBL%{ZuuH-7N<{GZ$eY~F!a2+4yLtM{?`3N8720q5e`2?Tj zQ{2c+e45X2GoR&i+`{Mi0=M!-ZsT_D;7;!1Zob5qxreXtRqo|$e4YFF2KVz#9^gS9 z;$gnUBYc}jd5p(-g75Go-{pHe#rJudAMit-;aQ&Jd0ya0{FtBcQ+~$J`31k^SNxja z@LPVz@A(6NFe|e$J9986 zb1^sbFfa2lKMSxR3$ZYZuqcbMI7_f3OR+S|uq?~5JS(swE3q=GuqvyuI%}{dYq2)# zurBMdJ{zzh8?iB)uqm6dIa{#h|6-}0q6Fyy1PYg{Qb><&+qTtX+vu@v^z@iLwr$(C zZQIuVyLYR>I0y1*>8;td2FXCf35* zSO@E3J*iI08rFC>)Jra4e3)@i+k|;v}4mkvIjX;xwF& zGjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYnaI$Vz%a3gNQ&A0`(;x^ol zJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hnIXsUS@FHHq%XkH^;x)XE zH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tNJA98H@FRZ0&-ewu;y3(` zKkz61!r%A@|KdMXV$*-rP)7qzw9rNe1-j^=j{!;y!6+CNqhWN6fiW=_#>O}p7vo`k zOn?b75hlhYm=u#?a!i3KF%_o9G?*6CVS3Df88H)P#w?f>vtf43fjKc3=Egjj7xQ6$ zEPw^E5EjNFSQLw4aV&u)u@siZGFTSNVJMcz3Rn>Rk0dY#~N4@Yhi7ygLSbU z*2f0e5F24*Y=TX(88*ij*b-Y|7`DbX*cRJid+dN6u@iR2F4z^jVR!6-J+T+|#y;2= z`(b|^fCF(54#puk6o+9r4#x-_fg^Dgj>a)K7RTXuoPZN?5>CcQoPtwv8cxR3IVV;qc&@i0Cnz=W6x z6JrugipelJrofb#3R7bmOpEC-J!Zg+myhEV*_l6 zjj%B`!KT;@n_~-XiLEdUTVoq+i|w#IcEFC<2|HsK?26s6JNCey*b94OAMA_$us;sK zfj9^U;}9H*!!R6&V+4-CkvIxR;}{%^<8VAqz==2sCu1Z|!KpY6r{fHqiL-Dv&cV4j z59i|oT!@QsF)qQSxD1!$3S5b+a5b*MwYUz~;|AP_n{YF3!L7Irx8n}niMwz&?!mpd z5BK8%Jcx(zFdo69cnpu@2|S6X@HC#mvv>~A;|08km+&%P!K-);uj388iMQ}J-od+g z5AWjxe29&yZK`exYu?QB$ zVptqYU`Z^6rLhc_#c~*m<*@=*#7bBht6){EhSjkK*2G#^8|z?QtcUfn0XD=&*ch8& zQ*4IKu?4ooRv3n@u?@DxcGw;}U`OnPov{mc#ctRgdtguOg}t#4_QihK9|zz-9E5{$ z2oA+z7>>g+0!QFT9EGEC435QdI36e9M4W_^F%qZXRGfy>aR$!BSvVW#;9Q)C^Kk(# z#6`Fmm*7%dhRbmUuEbTi8rR@jT!-s%18&4kxEZ(LR@{c$aR=_iUAPR1D7VlAwVb+9hh!}{0&8)74Dj7_j9HpAxF z0$XA$48zvg2HRpgY>yqVBX+{h*af>{H|&l*uqXDy-q;8GVn6JU18^V?!ofHMhvF~{ z$Ke=(BXA^+!qGSe$Kp5~j}verPQu9;iBoVYPQ&Rq183qaoQ-pEF3!XGxBwU8B3z71 za49as<+uV@;woH?Yj7>D!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$rcmNOLAv}yn z@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fjyp4D8F5biY_y8Z`BYccc z@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ#{EdI`FaAR%KK(}xbu`dK z3vF~zpo<>*7@))ujDk@y8b-$$7!zY*Y>b0(F&@Up1eg#LVPZ^zNii8F#}t?nQ(mq=6{}%&tbsML7S_f(SQqPIeQbaYu@N@LCfF34VRLMOEwL4b zVQXxIZLuA;#}3#LJ7H(+f?cs2cE=vr6MJEA?1O!=ANI!qI1mTnU>t%&aTtc88#yz+f_u+m#fCupq9>ybh6p!I? zJb@?i6rRR2coxs$dAxuZ@e*FfD|i*J;dQ)$H}MwU#yfZy@8NxXfDiEzKE@~b6rbU9 ze1R|V6~4wd_!i&cd;EYO@e_W=FZdO|;dlIjKk*m-#y|KM|DlqA{-cIE8fc=0HaaNK zMGt)pP+|y1!KfGwqhkz=iLo#?#=*E4594D3Oo)jvF($#Jm<*F+3QUQqFg2#Zw3rUl zV+PEKnJ_bE!K|1Kvttg-iMcR0=E1y}5A$OIEQp1$Fc!h0SPY9}2`q`Fur!vzvRDp7 zu{>75idYFNV->85)v!9&z?xVKYhxX(i}kQRHo%712peM)Y>LgWIkv!-*b2k2HMYUF z*bduc2keNQurqeSuGkH`V-M_!y|6d-!M@lJ`{Mu{h=Xu24#A-~48w6aM&Jk>iKB2d zj=`}w4#(pJoQRWfGDhMQoQl(MI?lkEI16Xv9Gr{ua6T@;g}4Y8;}Tqo%Wyfaz?HZP zSK}I7i|cSbZorMW2{+>w+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH$M86wz>|0i zPvaRpi|6n>UcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*}&+s|Ez?b+6 zU*j8mi|_C~e!!3T2|wc({EFZ3JO03*_zQpIAN-5|P)SJtQ9~UKG|@sE9Te!Ihdu@< zF$AMvRE&nvF$TuOSQs1QU|fuc@i74=#6*}FlVDOzhRHDnro>d38q;7}Oo!<)17^fb zm>IKRR?LRkF$dLkg}ZSN?!|q$9}nO`JcNhw2p+{_cpOjQNj!z8@eH2D zb9f#v;6=QIm+=Z-#cOySZ{SV5g}3nz-o<-(A0OaDe1wnj2|mSV_#9v0OMHc|@eRJk zclaJZ;79y~pYaQR#c%i>f8bC2g}?C+{>6W&B%=SQp^gTcXrYY`3Utv!9|M#af>AIk zM#JbB17l(=jE!+HF2=+7m;e)EB20`)FexU(!!pc|$ zt70{*jy13**23CY2kT-ztd9+_AvVIs*aVwmGi;76uqC#_Fl>!&ur0R3_SgYCVkhj3 zU9c;5!|vDvdtxu_jeW2$_QU=-00-hA9E?M7C=SDL9F7q<0!QK~9F1deERMtRH~}Z( zB%F+qI0dKTG@Onza3;>e**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-Y zBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRf zB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XS zBYwiq_yxb>H~fx2@F)Jl-}ndr;y+Xp(|^=ZM*~f?&_)LZy6B;g0ZI(PC>Rx^VRVdv zF)Wvqf#u^Lv# z8dwu+VQs8~b+I1S#|GFC8)0K?f=#g*Hpdp&5?f&yw#GKt7TaNa?0_Ay6L!Wf*cH2B zckF>Zu^0BnKG+xgVSgNe191=z#vwQqhhaDl#|RvOBXJat#xXb+$KiOKfD>^NPR2-_ zf>UuCPRAKI6KCOUoP%?59?r)FxDXfNVqAhtaTzYh6}S>t;c8riYjGW}#|^j-exUdJ1F6K~;dyn}b~9^S_X_z)lAV|;>7@fkkH7x)ri;cI+@Z}AfhJmLqk{rn^w7ruC5B)WjEd1PI>x}57z<-# z9E^+cFg_;0gqR2uV-ie?$uK#lz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7CJLbTgm;O(V-YNh#jrS*z>-)BOJf-7)R4Xa}ftckU- zHrBzqSP$!C18j(murW5lrq~RdV+(AFtuPE*V;gLX?XW#|z>e4nJ7X8@irug~_Q0Ol z3wvW9?2G-dKMufwI0y&h5FCobFdTrgh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h z;||=3yKpz|!M(T-_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s z;|;utx9~RJ!Mk`5@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnW z;}86azwkHy!N2$qm88o54HY%i(LfU|w9!FqLqPRxb5F%Ra& ze3%~#U_mT|g|P@0#bQ_-OJGSXg{83!mc?=yisi8aR>VqJ8LMDbtcKOG2G+z{SR3nL zU95-ou>m&3M%WmeU{h>{&9Mcx#8w!Ft+5TZ#dg>pJ77obgq^VqcExVk9eZF;?1jCt z5B9}=*dGVrKpcdFaR?5@VHl3XF#<>6NF0TuaSV>daX20);6$8+lQ9yf;8dK3({TpQ z#925S=ipqNhx2g(F2qH+7?_uyXKhx_pW9>ha<7?0plJch^d1fIlGcpA^(Sv-g5@d94NOL!Tt;8nba*YO74 z#9Me9@8Dg$hxhRTKEy}(7@y!%e1^~Q1-`^r_!{5fTYQJ_@dJLuPxu+X;8*;H-|+|j z#9#Ou|KMNzhe|T~j~eP|potdR=%7FsJ@hd^i6IyTqhd6SjxjJM#=_Vb2jgNqjE@O0 zAtu7am;{qzGE9ysFeRqK)R+d-VmeHZ889Pe!pxWjvtl;PjyW(V=EB^V2lHY+%#Q`I zAQr;HSOkk=F)WTHuq2kk(pUz|VmS=O@>l^YVkNAMRj?{n!|GTAYho>|jdidt*2DVP z02^W>Y>Z8?DK^9A*aBN(D-6Td*aq8TJ8X{~up@TD&e#RJVmIuLJ+LSC!rs^i`(i)r zj{|TZ4#L4W1c%}<49DRZfg^Avj>6G62FKz!9FG%lB2L1|7>QGGDo(@cI0I+mES!yV za4ycn`M3ZV;v!s(OK>SJ!{xXFSK=yMjcaf%uEX`X0XO0%+>BdrD{jN>xC3|MF5HcK za4+t|{dfQm;vqbYNAM^f!{c}YPvR*&jc4#Ip2PEa0Wabuyo^`yDqh3ucmr?ZExe6) z@GjoN`}hDK;v;;FPw*)|!{_({U*ao#jc@QRzQgzU0YBm={ET1lD}KZ8_yd39FZ_*v z@Gt&DB{}^^4RtioLsJnOoM4L9j3<&m=QB!X3T_y7RM4;5=&ueEQ4jS9EM_ftbi4<5?014SQV>bb*zCku@=_GI#?I$VSQ|X4Y3h6 z#wOSln_+Wofi1BWhGA=LgKe=Lw#N?G5j$aL?1Ejf8+OMY*b{qUZ|sA8u^;xw0XPr` z;b0tsLva{}<8X|?5jYY@;b@fE(tH~1Fc;d}gmAMq1@#xM94zu|ZMfj{vV{>DG}7yqG> zg8rk1IvQxAg*G}U&_xe@3{YYSM!~2U4WnZWjES)@HpaoY7!TuP0!)aBFfk^*1(!r3u|K?tc&%qJ~qIH*a#bA6Ksmj zusOECme>lzur;>9w%88aV+ZVrov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_? zI1IyaI7Z+I9EqcFG>*ZsI1b0-1e}PIa56^X6r76Fa5~PwnK%n);~boe^Kd>cz=gO7 z7vmCKipy|0uE3SJ3RmMAT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q z591L$ipTIcp1_lM3Qyx1Jd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!w< zALA2ziqG&lzQC9G3SZ+Je2ee!J$}HC_z6Gb7yOFf@H_s%pZE)Z;~)Hs|4>Ou|4~C7 z4K&e08yytrqK7^PC@}=1U{s8T(J=SbyT1i(0EQZCg1eU~7 zSQ^VU`?!rwXqJ?#d=sD8(>3hgpIKYHpOPx99v*Z zY=vRi8rxu7Y=`Z!19rqt*crQESL}w}u?P0VUf3J^U|;Nq{c!*e#6dV1hu}~ghT%9I zBX9(c#8EgJ$KY5ThvRVqPQ*z#86$BDPQ__B9cSQ7oQ1P-4$j4SI3E|_LR^H4aS1NP zWw;zy;7VMDt8opk#dWwIH{eFxgqv{-ZpCf59e3bP+=aVw5AMZ%xE~MTK|F+q@dzHp zV|W}-;7L4%r|}G)#dCNbFW^PIgqQIOUd3y89dF=GyoI;%4&KFkcpo3&Lwtmf@d-Y~ zXZRdn;7fdkukj7O#dr7~Kj26FgrD&Xe#LM29e?0Y{Dr^q5B|k}sHCF*sG*JqnrNYo z4hnS9LmvZ_7=lqSDn`TT7z1NsER2nDFfPW!_?Q3_Vj@h8NiZoU!{nF(Q(`JijcG6~ zro;4@0W)GI%#2wuD`vy&m;-ZSF3gR2FfZoA{8#`BVj(PyMX)Fq!{S&1OJXT3jb*Sb zmcvjij}@>YR>I0y1*>8;td2FXCf35*SO@E3J*iI08rF zC>)Jra4e3)@i+k|;v}4mkvIjX;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSn zC9cBNxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*u zB%Z?4cm~hnIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>h zCBDMf_y*tNJA98H@FRZ0&-ewu;y3(`Kkz61!r%A@|KdMXQqzCbP)7qzw9rNe1-j^= zj{!;y!N_irO}h_|tQtAxb;SSgq)kIrQNlvPvWF_+kzp|+{+mk^s#b`SIy|yXi~puZ z7E4m`e^;uCrs#^Hn2M#?ilYd{RXoL40!1nzN)#ok5>1JV@?v6R?K93`$2Pl>N2 zP!cMMl*CFBC8?53Nv@<&QYxwbJA^b!S|y#5UdfQblQd}valvGM7rIj*DS*4s3s+3nMC>51TN@b;rQdOy@ zR99*!HI-UQZKaM7sO1x+&e29!gK8m(pA5qx4n!DgBiJ%0Ok1GFTa+3{{3H;mU9&LK&fqR7NSI zl`+a#Wt=iznV?KmCMlDZNM(vLRhgztS7sY4LyjMObAC*taXXT6XRr#iTSAHlz zm0!wl<&W}L`S;)TtE#5zs-c>yrP`{a3e{CT)mH;msv&9=HL4m-jjqN}W2&*#*lHX# zt{P8`uO?6vs)^LZY7#Z6noLcurchI=snpbJ8a1t&PED_7P&2BT)XZuYHLIFU&93H9 zbE>)2+-e>*ubNNIuNF`Xs)f|TY7w=lT1+jjmQYKorPR`D8MUlhP7PJds}Hu}1I!GO?4pE1y!_;tfxEi63 zP)Dkx)Y0k~b*ws09j{JMC#sXw$!erJMV+cnQ>Uvl)S2omb+$T3ovY4M=c^0Uh3X=8 zvARTEsxDKPt1Hx%>MC`$x<*~Au2a{m8`O>JCUvvAMct}yQ@5)-)Sc=sb+@`l-K*|X z_p1lggX$smuzEy2svc91t0&Zx>M8ZKdPY5~o>R}O7u1XDCH1m;MZKzCQ?IKx)SK!p z^|pFPy{q0+@2d~ghw3BsvHC=PsyMQlN`bK@LzEj_;AJmWPC-t-XMg6LN zQ@^V})Sv1v^|$&*{j2`_@A0ders|h{hHAsK zaBa92p^eZ+YNNE#+8AxDHclI_P0%K4leEcNq&7vHs!h|TYcsT&+AM9hHbhsL_4Y-(~fH=w3FH??X-4AJFA`3&TAL6i`pgavUWwgs$J8r zYd5r;+AZz2c1OFb-P7)C544BcBki&FM0=_|)1GTDw3pf|?X~tsd#k6!H`dR9G~o?XwO=hSoQx%E7HUOk_lUoW5+)C=i_^&)yvy_jBHFQJ#zOX;QcGJ09P zoF1x|*DL51^-6kWy^3B{uclYmYv?ugT6%50j$T)FxCndPlvJ-dXRWch$S;-Sr-NPraAkTkoUy)%)rF^#S@oeULs_ zAEFP{hw0(^a6LjFp^wx@>7(^A`dEFOK3<=oPt+&rll4e_iau4Jrcc*r=ri?M`fPoU zK3AWo&({~|3-v|%Vtt9eR9~hq*H`E(^;P<6eT}|WU#G9vH|QJnP5Neii@sIgrf=7G z=sWdY`fh!XzE|I;@7E9L2lYexVf~1HR6nL4*H7pt^;7z3{fvHAKc}D9FX$KbOZsK~ zihfnUreD`@=r{FS`fdG=epkPz-`5}L5A{d-WBrN#RDY&F*I(!_^;i09{f+)sf2Y6K zKj_LBsWqRDUDP{Y9o!2)<|ch zH!>I*jZ8*nBa4yM$Yx|Wau_*{Tt;pqkCE5NXXG~u7zK?&Mq#6fQPe1A6gNs3C5=)> zX`_r$)+lF$8s&`&Mn$8NQQ4?sR5hv@)r}fPO{11k+o)sIHR>7ljRrl@VsNHrg0%jdn(Rql3}W=wx&@x)@!JZbo;bhtbpMW%M@s7=4X?Mt@^~ zG0+%f3^s-sLyciZxG~&_Fh&?7jZwyEV~jD@7-x((CKwZqNycO&(wJgQHKrNUjTy#F zW0o=7m}AT}<{9&i1;#>Sk+IlVVk|Y58Ox0o#!6$AvD#Q;tTomd>x~V@Mq`t)+1O%i zHMSYsjUC2LW0$ep*kkN9_8I$)1I9t)ka5^JVjMM&8OMzi#!2IpaoRXzoHfoF=Zy=- zMdOlj*|=g{HLe-gjT^>I#nZQhFCNdM7Nz9~XGBdfE!c1wVGEn|aKo47BUN)Ma-gRF|)W? z!YpZ)GE19f%(7-VGt?|^Rxm4?mCVX!6|<^Y&8%+LFl(B%%-Uuhv#wdstZz0j8=8&G z#%2?y%**B#^Qw8xyl&nwZ<@Ev+vXkfu6fVAZ$2;|nvcxK<`eU&`OJK7zA#^! zugurx8}qIC&U|lvFh81~%+KZ*^Q-yI{BHg*f11C{-{v3luldhZEY;F1-7+lGvMk$j zEMd8pXZcoONh`#PVnwy0S<$T+R!l3F72Aqq#kJyD@vQ__LMxG#*h*q0wUSxMtrS*D zE0vYnN@Jz9(pl-P3|2-fla<-ZVr8|mS=p@|R!%FImD|c=<+buz`Kj|(W+!swyIcFt!h?vtAR5HHdRBd_ zfz{AzWHq*$SWT^FR&%R`)zWHZg;}kwHdb4!oz>pzV0E-QS)HveR#&T=)!pi0^|X3f zy{$f0U#p+h-x^>Iv<6v&ts&M>YnT;o4Ywk!5!Og+lr`EKV~w@OS>vq<)chqcq%W$m{1SbME~)_&`Nbw^)_v=N_0W1`J+_`$PpxOx zbL)lm(t2gRw%%B8t#{UY>x1>t`ec2!zF1$aZ`OC~hxOC?W&O7PSbwd5mSU^6X6v?L zo3>@!wqpz1wLROn16$f5b`(3R9nFqz$FO7CvFzA(96PQZ&yH^=uoK#e?8J5wJE@(_ zPHv~LQ`)KQ)OH#>t)0$JZ)dPG+L`Rkb{0FUoz2c}=dg3yx$N9_9y_m{&(3cbunXFS z?80^tyQp2vE^e2wOWLLE(smiUtX<9ywaeQT?22|JyRu!yu4-4atJ^i~nszO_wq3`r zYuB^u+YRi7b|bs7-NbHcH?y1DE$o(dD?7|?ZMU)8+U@N2b_cto-O283cd@(L-R$mm z54)$`%kFLWvHRNn?Edxud!RkY9&8V>huXvJaC^8NVUMs!+N12z_85DtJx+YEQGL+cWH$_AGn0J;$DF&$H*-3+#pVB73pD#9nGIvzOZ|?3MN^d$qmB zUTd$j*V`NHjrJycv%SUMYHzc*+dJ%?_AYz3y~o~b@3Z&Y2ke9PA^Wg>#6D^tvya;+ z?34B>`?P(=K5L(|&)XO5i}oe^vVFzAYG1Rj+c)f+_AUFieaF6Q-?Q)A5A28bBm1%a z#C~c&v!B~9?3eZ{`?dYXervz8-`gMTkM<|~v;D>XYJao8+du4|_AmRl{m1@m|FacG zbu>qJ499dV$95b?IIiP4z7sgo32~x0QJrW`bSH)r(~0H8cH%g3op?@sCxMgDN#rDU zk~m47WKMD?g_F`r<)n7fIBA`9PI@PUlhMiKWOlMRS)FW7b|;6E)5+!JcJerRoqSGy zr+`z?DdZG(ia14`Voq_Vgj3Qf<&<{HIAxu3PN-Afso+#}Dmj&%Do$0Wnp54W;nZ|$ zIklZSPF<&-Q{QReG;|s{jh!Y=Q>U5J+-c#obXqxKPHU%))7EL{w0Al<9i2{2XQzwP z)#>JRcX~KIonB6Fr;pRu>F4x!1~>zqLC#=jh%?j~=7c-Lod{=yGtwF5jCRI2W1Vr% zcxQq$(V65-b|RfA&QxcbGu@ft%yecsvz_oh8mvXPL9yS>dd7 zRynJkHO^XRowMHA;B0g@Ih&m=&Q@oev)$R@>~wZHyPZAGUT2@P-#OqMbPhR(og>as z=a_TcIpLgiPC2KYGtOD(oO9l};9PVrIhUO)&Q<4{bKSY&+;na^x1BrAUFV*2-+ACX zbRId6ohQyy=b7`|dEvZtUOBIwH_ltq#Dz=I3Vu#o%c8T3$kJu~riT&b$I4BN@!{UfIDvpWc;)FOUPKndvj5sUKiSy!u zxF{}(%i@Z-Dz1s^;)b{>Zi(CCj<_rCiTmP#cqkr;$Kr{2DxQhw;)QrAUWwP@jd&~G ziTC1z_$WS!&*F>tD!z&D;)nPteu>}WkN7M83B^@i&DCAQHC@ZKUB?x!>w2#52Cj5N z+$e5TH<}yWjp4?0W4W>2IBr}wo*Umy;3jkvxryB*Zc;ayo7_#|rgT%esogYgS~s1W z-p$}XvsaxE0+>Ze_QMTh*=RR(ET-HQicnZMTkF*RAK)cN@43-9~O>w~5=- zZRR$2TevOVR&JQv+HK>ub=$e^-41R?x0Bo1?c#QIySd%n9&S&!m)qOz2L+)YshS>q%j{+GvU=IP>|PEprj4l@@ji^yt-aJufEs7Yv?ud z8hcH=rd~6zx!1yL>9z90yw+YDudUb4YwvaNI(nVF&R!R_vK0ys6$a zZ@M?bo9WH+W_xqIx!yc)zPG?z=q>UVdrQ2f-ZF2wx58WLt@2iTYrM7II&ZzV!Q1F< z@-};0ysh3gZ@ag{+v)A{c6)ogz1}`=zjwep=pFJ7dq=#Z-ZAgEcfvdAo$^k5XS}oC zIq$r8!Mo^P@-BNs!Taca@;-ZCyszFj@4NTI`|17ietUnszurGj@l{{*b>Hw!-|}tW z@rCdDp6~mCFZ~cdiXYXF=12Eq_%Z!her!LEAJ>oP$M+NX3H?NVVn2zW)KBIo_fz;O z{ZxKxKaHQ(Pv@uiGx!<(OnzoRi=Wlc=4bbF_&NPter`XHpV!al=l2Wv1^q&PVZVr9 z)Gy{2_e=OC{Zf8uzl>kjFXxB)<^2kNMZc0?*{|YP^{e^S{ThBvzm{LyujAMC>-qKl z27W`ok>A*F;y3l1`OW!Rv_J{aG{b7E%KirS-NBATCQT}Luj6c>N=a2U%_!Ip}{$xMW zpW;vTr}@+U8U9RvmOtB{%zv5r@uld*g8~#oImVev7}|C9gO|Kfl3zxm(&AO27Om;c-UK2m%>|1W|&hL9`%x5F>~g#0p{uae}x(ydZv%AV?S_3K9oNf}}yR zAbF4?NExIGQU_^*v_ZNceUKr@7-R}E2U&uwLAD@!kR!+$fLrN(W_vvO&2ZG$^bPKu% zJ%XM=ub_9(C+HjW3;G8Gf`P%HU~n)b7#a)2V;V*2 zObR9kk-?N;YA`LB9?S@42D5_M!JJ@jFfW)NEC?0`i-N_$l3;1DELa|_2v!EGg4MyA zU~RB2SRZT%HU^u5&B2ynYp^ZY9_$Eq2D^gY!Jc4murJsj90(2uhl0bwk>F@>EI1yV z2u=p4g44m7;B0U%I3HXHE(Vu^%fXf4YH%&M9^43S2DgIS!JXi4a4)zYJO~~JkAla+ zli+FaEO;Kg2wn!Sg4e;D;BD|OcprQSJ_etH&%u}AYw#`j9{dP?2ET&e!Jpu7@GnrL zDmAG~Lz>c(wsfSBuJoiY11V*Qj3T4TXfnEtA!Eu|GPaB(%*z04pp%1kn|%p$YOY%;sdA#=)HGPlel^U8cOzbqgN%0jZR zEFz1_VzRg_Axp|qva~EC%gS;xRF;<&WJOs?R+d#{Ras3|mo;QfSxeTIb!1&xPu7JIT(ni|i`9$?md;>?wQ6-m;JE zEBnd*a)2Bt2g$*5h#V@1$#6MbM#vFzq#Pwj%Q14S94E)i338&GBqz&AIYmyD)8uqH zL(Y`5l#k?N`9waI&*XFYLcWx*NO1=WV?Ky{&dP<^NY z)DUU}HHMl%O`%|@859CFhgv|PP#Dw_Y6aP#aL5j|h9aOyC<=;(VxTrqEEEUDLkUn@ zs2$WE>Hu|wIzgSGE>Ksf8`K@@0riA>LA{|qP+zDY)E^oE4TJ_k4rnkm1R4qrgN8#R zppj4_GzuCGje*8O!9_}252L+3EB*8fwn^1pzY8OXeYD_+70c2_Couh z{m=pEAan>i3>|@vLdT%v&fwRKd;OuY?I47J7&JE{*^TPSy{BQxdAY2G83>Sfm!o}d? za0$31Tna7?mx0T|<>2yg1-K$y39bwW!d2j^a1dM#t`66LYr?hQ+Hf7XE?f_;4>y1t z!j0g@a1*#G91J&uL*V9c3pf-GgImI_U>h6`+u_!51RM!R!O?IG+y;(?A9i!K2|Z@K|^pJRY6^PlPAIli?}wRCpRZ9i9QtglECC;W_YJcpf|-UH~tI7r~3+ zCGb*s8N3``0k4Et!K>jl@LG5sydK^FZ-h6&o8c|+R(Kn{9o_-&gm=Na;XUwPcptnU zJ^&wt55b4wBk)o97+04UxY8gm*Fe$Rrnfw9linIgm1yO z;XCkM_#S*8egHp&AHk2|C-77F8T=f60l$P_!LQ*r@LTvD{2u-Qe}q55pW!dC8*m36 zz!P`@Z{P#cfV99Dqyy=JAIJbQ0tmnW00f`_12`Z62`E4V2C#qwJP?2gBp?F?s6Ycc zFn~V@0GU8$kOgD~*+6!X1LOp`KyHu+Hb* z&;#@Yy+Ci!2lNH~Kz}d*3 z5`{z~F-RLE7KuaRkp!eI(hg~lbU->HosiB*7o;oF4e5^bKzbs*klsiiq%YDB>5mLR z1|owH2QnBLf(%85A;Xao$Vem+8HJ2S#vo&namaXN0x}VqgiJ=JAXAZP$aG`|G837F z%tq!QbCG$-d}IN#5LtvQMwTE;k!8qoWCgMkS%s`d)*x$T1F{j>gltB(AX|}b z$aZ80vJ=^b>_+w=dy##}e&hgh5IKY#Mvfpykz>el1M(61gnUN6Aa1BT>VbNqUZ^+fgQh{#qP}Q4G(GBvW4Eigl0ywpjpvuXm&IQniI{1=0@|NdC`1m zezX8u5G{ljMvI_D(PC(Ev;0*yqY z&}cLUZG*<5acDf6fVM^3q3zKQXh*aY+8OPFc163P-O(OsPqY`>8|{PkMf;)s(E;c{ zbP(!52ctvKq3AGlI649yi6)|>(9!4^bSyd!9gj{xC!&+k$>)+&FB_%E4mHc zj_yErqPx)D=pJ-0x)0rt9zYMGhtR|55%ef}3_XsXKu@Bl(9`G{^elP~J&#^MFQS*w z%jgyKDtZmQj^03TqPNi7=pFPfdJnyiK0qI$kI={H6Z9$i41JEiKwqM-(AVf2^ey@h zeUE-XKcb(|&*&G_4RgmlFi*@2^TvFzG+0{97fXkw$NaDiSVj!OU<_aghGH0oV+2NG z6h>nV#$p`CV*(~(5+-8`reYeVV+Q7r1z?%5%vcsIE0zt*j^)5|V!5!~SRO1dmJiF1 z6~GE&g|Na{5v(Xy3@eV6z)E7Ju+mr=tSnXzE00ycDq@we%2*&)1*?h$Vb!qeSPiTu zRtu|*)xqjw^|1O_1FRv|2y2Wr!J1;hSTigHYmT+RLa{KcCDsbFVd0n^YmG%2%eXzb*Kde7C02_!6 z!W`IOYzQ_K8-@+XMqnecL~Il`8XJR+#l~Udu?g5jY!WsZn}SWnreV{u8Q4s07B(B3 zgU!X}Ve_#C*g|X(wisK2Eyb2$%dr*MN^BLj8e4;{#nxf#u?^TpY!kK_+k$Pywqe_` z9oSB67q%PQgYCukVf(QI*g@=Jev zyMkTCu3^`)8`w?k7IquEgWbjMVfV2I*hB0Q_85DDJ;k13&#@QSOY9Z)8heAi#ol4> zu@Bfs>=X7G`+~XQ?zji;iF@JRxDTENPmBBF>G1TpAD#ivh(kDx102Co9K&&(z)76K zX`I1XoWprsz(ribWn95kT*GzT!2R(6JQJQ7&w^*gv*FqC9C%JV7oHo>gXhKb;ra0b zctN}nUKlTe7sZR=#qkn&NxT$Z8ZU#F#mnL4@d|iFyb@j+55%kBRq-Ia8eSc*f!D-q z;kEHPcwM|6ULS9OH^dv^jqxUUQ#=@NhKJzI@fLU}9)`EXTj4f59Jk}G@d!K;kHVwz z7`zQ0i^t*dcmm!QZ-=+XJK!DhPIza$3*Hs)hIhw%;63qPcyGK9-WTtO_s0j|1Mxw) z10ReJ!H43*@ZtCfd?cQTkHSaeWAL%~ID9-l0iTFZ!YAWX@TvGTd^$b@pNY@HXXA75 zx%fPMKE427h%dqy<4f?R_%eJsz5-u~ufkX3Yw)%BI($980pEyk!Z+hv@U8eZd^^4a z--++ScjJ5Tz4$(SKYjo|h#$fa<45qL_%ZxAegZ#SW{5pOEzlq<%Z{v6HyZAl)KK=lIh(E#~<4^FX_%r-D{sMoAzrtVRZ}7MHJN!NV z0sn}9!aw6*a5uu8@E|-1FT$JfA<__O311=|k)H4)G7uRFh=2(|AOuQa1WphHNl*k$ zFa%3*1WyQrNJxZCD1=IAgiaWQKM_D=A~F+Mh^$05B0G_T$VucPaua!oyhJ`CKT&`v zNE9Lp6Ge!kL@}Z`QGzH*lp;zKWr(swIifsKfv8ASA}SMsL=~bc5kyoYsuMMcnnW$4 zHc^MDOVlIk6Ag%lL?fax(S&G91QX4O5TZHJf(RwTh?Ybv!bXG>cA_;AK|~T!L^KgY zv>{@NI3k`%Alee`i1tJWq9f6X=uC7Wx)R-p?nDowC((=OP4pr968(t&!~kL-F^F&w zgNY%;P+}M{oESljBoc{H#Asp+F_sudj3*`#6NyR0WMT?2m6%3MCuR^ciCM&KVh%Bv zm`BVf77z=GMZ{ua39*z|Ml2^*5G#pQ#A;#4L&Ra?2yv7+MjR(j5GRRK#A)ITah5nooF^_27l}*6W#S5P zmAFP+CvFfoiCe^N;tp|_xJTS49uNW#B1UW@s@Z;yeB>o zABj)IXW|RtM!J(8q$lY`dXqk68Zs^EOQs{!lYV3dG9w9*FbPP6L`jUqNrEIvilj+~ zWJ!+XNr4neiIho&R7s80NrUt!1ISEdW-<$zmCQzFCv%WF$y{V^G7p)T%tz)Y3y=lL zLS$jG2w9XYMiwVakR{1dWNEStS(YqEmM1Ha70F6uWipVgLRKY%$ZBMDvIbd`tVPx) z>yUNHdSrdF0ojmjL^dXykWI;8vKbjdHYZz_PS~av`~hTud$@my*lK<>U%-CAo@RO|Bu=lIzIz;#3K$BvpzkO_ia_Qst=fR0XOcRf(!h1yWV0s#FkFjjB%7plVXJsM=H= zsxDQJs!uhb8d8m@##9rkDHTjLqe7_WR0}GU3Zq(5ttcB6PT8r}R0I`CMN!dI4Aq8; zrQ)b~DuHTCwWHco9jJ~}C#o~mh3ZOmqqPz*b`cng_fz%+%K@Fyc zP(!I<)NpD9HIhoCMp2`wG1ORU95tSrKux43QIn}D)KqF3HJzG4&7@{gv#B}MTxuRQ zpISgIq!v+&sU_4>Y8kbhT0yO(R#B^|HPl*a9krg?Ky9QpQJbkP)K+R6wVm2Q?WA^5 zyQw|YUTPn;pE^Jtqz+MssUy@;>KJvLIzgSJPEn_+Gt^n?9Ce<$KwYFRQJ1MJ)K%&l zb)C9F-K1_&x2ZeSUFsfnpL#$&q#jX^sVCG^>KXN%dO^LUUQw^9H`H6|9rd32Kz*b> zQJ<+VlpF0%d(fV=7wt{^&}rzjv@e~GPEY&M8R(2OM8h=cIGdx#>J~UOFG0pDsWbqzlo7 z=^}Jdx)@!YE~K|x)I%&ZbCPugXv~;2;H1+L5I>|bW6GwZKK0!JKdU&pd;xhI+~85+t9Ie z934+5&~523(#7dH_9;9z;9n!SoP% zC_RiGPLH5R(uwpadNe(T9!rm-$I}z&iS#6TGChT!N>8Jw(=+Iq^elQdJ%^r4&!gwl z3+RRPB6=~sgkDN7qnFbw=#}&;dNsXv&$^e%cg zy@%dQ@1ytA2k3+JA^I?Vgg#0iqmR=k=#%s*`ZRrpK1-jY&(jy^i}WS>GJS=Lgw^ey@}eTTkF-=pu-59o*VBl`ZfKAeoMcj-_sxHkMt+{ zGyR2jW84`J#*^`4ycr)R4U?AfWzsR}89ycilaYZKm;nsJpbW;~48f2L#n24HunfoW zjKGMD#K?@osEo$wjKTOb0Zb+)Gn0kM%4B1*GdY-?OfDujlZVO6 zgel4tV~R5+n37B>rZiKADa({&$}<(1icBS@G84#DVX87gOf{xDQ-i6=)M9Egb(p$L zJ*Ga>fN97yVj43|n5IlH(~JpWnlmk!P$rCN$+Ti@OgLj_S~C$$BooC%GcimXCYFg~ z;+X`dEz^!^&valqGM$*tOc$mr(~arQ^k8~2y_nuiAEqzUkLk}0UHapnYbk~zhkX3j8YnRCo}<^pq(xx`#%t}s`bYs_`# z26L0S#oT7@Fn5`I%zfqo^N@MOJZ7FSPnl=TbLIu}l6l3vX5KJwnRm>4<^%JQ`NVu? zzA$dAJL|!EvR%*pD)3Uy7IyOD)$7WzNvJeZifJIo8#aNsrSdyh!nq^p)Y@owgOv`t;AMl1KBEURW^vN##U!*ur=9QY;CpjvdcVU?;MZ*vae^b}BoKozBi+XR@={+3Xy4E<2B%&n{pW zvWwWo>=Je=E`TdyGBKo?uV1r`Xf%8TKrDjy=y_U@x+l*vsq{_9}agz0TfX zZ?d=8+w2|oE_;u?&pu!uvX9uu>=X7W`;2|gzF=Rnuh`e@8}=>xj(yL5U_Y{-*w5@2 z){S%LJUCCzi}U7uxHMc^&X-HarRV&(3|vMI;$RMN2#0bQhjRo+aui2%499XD$8!QF zauO$V3a4@!r*j79&joOqxXfG@E-ROf%g*KCa&o!2+*}?mFPD$Y&lTVba)r3UToJA) zSBxvpmEcNprMS{u8Lli>jw{bq;3{&JxXN50SB0y}1##85>Rb)3CRdBA&DG)Ra`m|S zTm!Bl*NAJ(HQ}0a!CW&gglo>V;6k}Dt|ixsvvJ{^oomfSaFJXT7tO_RZMax2j*I6K zxVBt7u07X*>&SKDI&)pPu3R^+JJ*Bj$@SuTbA7nJTtBWqH-H<+4dNW!U~ULElpDqk z=SFZNxkPRhH<}y6jpfF18bz z5x1CI!Y$>Nam%?C+)8d0x0+kSt>xBn>$wfwMs5?gncKo`<+gF#xgFe2ZWp(k+r#bU z_Hp~U1KdIG5OdpRbJzD-r)WD06r6+na{#!<+JhG`5b&sJ{O;x&%@{C^YQul0(?Qf5MP)t!WZR> z@x}QPd`Z3(Uz#t&m*vax<@pMHMZOYWnGfWv@KyOBz8YVhuff;kYw@-DI(%Ke9$%kt zz&GR@@s0T=d{aJ{Z^no4&G{C5C?CeR=cIDdja$)Dm+^Jn<8{5k$Se}TWqU*a$GSNN;^HU2t(gTKk& z;&1bJ_`Cc){yzVJf5<=LAM;Q6r~EViIsbxx$-m-X^KbaK{5$?V|AGI=f8sy$UwAjc zUGNY*1uwx{@Db7oX$4;)oseGe6EX-H1xSDeARq!NU;-`>0x3`eEieKrZ~`v~f+$FW zEGU91Xo4;ng1-I)5o zhC(BuvCu?lDg+D7gb<;*&_W0m!i1JWE5Rm&3wEKk5FtbgQ9`s3BeW4>g*YKzND$fz z?S%G12ce_TN$4ze5xNT9gziEQp{LMG=q>aS`U?Go{=xuZpfE^q2!n+o!cbwDFkBcR zj1&@uQNn0pj4)OhCyW;+2or@#!en8JFjbf)Oc!PdGlf~gY+;TtSC}Wv7ZwN$g+;<* zVTrI*SSBnNRtPJFRl;gvjj&c&C#)AX2pfe>!e(KMuvOS5Y!`M2JB3}sZefqGSJ)@) z7Y+yqg+sz&;fQclI3^qyP6#K3Q^INCjBr*sC!7~92p5G*!e!x#a8-P{7VC(0#d>0Wv4Pl7 zY$P@on}|)tV6mAPA~qLWh@oPb*ivjI+Qe|tF18jU#7Hqpj22_WHe#$8C&r5jVq3AD z*k0@)b`(2_oy9Iir2*J;tlbpcuTx3 z-VyJL_r&|+1M#8wNPH|l5ub|B#OLA*@um1md@a5a--_?V_u>cfqxebuEPfH)BzMU} z@|3(JZ^=hWBc+vmrF2qy$xq54Wt1QZmVksvsDw$lL`bAWNwmaBti(yYBuJtpNwTC! zs-#J}WJvx}fRstfEM<|hO4+3BQVuDnluODj<&pAA`K0_(0jZ!=NGdE9k%~&iq~cNu zsiag&DlL_f%1Y&=@=^t4ENDMd-qQjF9_ik0G|cqu_@E47o_ zOC6++QYWdi)J5tlb(6YFJ*1veFR8cGN9rr}lln^oq=C{P$srAvhDbxDVbXAEgfvo0 zltxLTr7_Z2X`D1(njlS-CP|Z}DbiGFnlxRSA6~<4x*%PYE=iZAE7Dcznsi;dA>EX2Nw=jt z(p~AEbYFTPJ(M0vkEJKlQ|X!XTzVnBlwL`%r8m-B>7Ddm`XGIjK1rV?^S@Wh?y`sM zDSOG@vX7ibPAmJ$>E!gXpPWI?C_^$V0~wJ~8Iy6DkV%=6X_=8(nUi^0kVRRNWm%C` zS(A0ykp1NVIg^}O&LU@(v&q@z9CA)Mmz-P9Bj=U#$@%31azVL}Tv#q57nO_2#pM!m zNx76CA1LZ-oLmn&-k%!8|^FR`FHRDd`nIC4-VtffQH)3ZkG2rr-*pkP4;H3Zt+Jr|^oPh>E1hilV5B zrs#^H_$vWQCMC0yMaimUQ?e^Ll$=T~CAX4C$*bg3@+$?Df=VH!uu?=RsuWX-Dsj1XbYAbb=x=KBzzS2Nxs5DX< zD@~N9O0d#Q2~nCWEtF6tOlhgKQfx}NVpm!#5lW;Ir9>++N*g6siBsa01f{LgPHC@n zP&z7|l+H>QrK{3S>8|updMdq?-bx>(uhLKHuMAKIDuWb)U~Q@N$wR_-Ww zm3zv4<$>~0d89m6o+wY1XUcQsh4NB)rMy<&C~uW_%6sL5@=^Jud{)f=UZc9J9;&D6 zrFyGAY8o}I>Z_(x)2n`J1~sD!sjv!EL`79h#Z^KjRZ68*MrBn_<5lIn`WhZZ(gZSIwv9R|}{G)k11vwTN0&Ev6P%OQ~@eR_myB)p}}uwSn4DZKO6< zo2X6IV6~YVqBd7ssG(|@+EQ($+SG8>uC`Vq)JQc-jaFmSHfpRIr^c%ZYFo9P+FtFT zc2qm5oz*UCSGAkkUG1UvRC}qt)jn!pwV&Ew9iR?W2dNHqusTE?st!|!t0UBrYN9$y z9j%U0$ExGh@#+M1qB=>PtWHsI`+JI!m3c&Qa&8^VIq30(GIfNL{QhQJ1RA z)aB|5b)~vWU9GNB*Q)E(_38$7qq<4mtZq@as@v4<>JD|Mx=Y=y?os!u`_%pF0rjAI zNIk3`QID#})Z^+2^`v@AJ*}Qm&#LFt^XdilqIyZatX@&Cs@K%(>J9a#dP}{n-cj$W z_tg991NEW$NPVn6QJ<>M)aU98^`-hseXYJx->UD__v#1rqxwnxteXG5Ky%kTG*8V- z^VWQ{G+J8CS4*d**Zi~$T1E}hU=3)9hH99GYlKE>ltyce#%i3#Yl0?fk|t}4rfQm| zYlh~p1!$SH%vu&LtCmg6uI12jYPq!BS{^O0mQTyC70?Q5g|xz25v{0JOe?OH&`N5h zw9;A`t*ll~E3Z}1Dr%Lq%37dSMXRa>Y1OpqS`DqHR!gg`)zRu|^|bn01FfOfNNcP$ z(VA+(S~D#~Yp%7>LbWihrPfNbY2lh(Ypq3Sky?}%t;J|KdrwuKpUtH(j3}gZHP8h8>S7{Mrb3oL~WEd zS{tK{)y8S#wF%lpZIU)wo1#tCrfJi)8QM&3mNr|Pqs`UkY4f!O+CpuSwpd%DE!CE3 z%e58SN^O<4T3e&7)z)e2wGG-vZIiZH+oEmNwrSh79okN9m$qBmqwUr9Y5TPU+ClA* zc33;29o3F$$F&pMN$r$&T05hi)y`?>wF}xs?UHs`yP{pyu4&h`8`@3nmUdgaqutf+ zY4^1U+C%M;_E>wOJ=LCR&$SoYOYN2RT6?3t)!u3EwGY}y?UVLd`=Yt&?z)HWse9?( zx{sblPpkXt>GbrvpPoU_s6#re10B&(9n*20&`F)rX`Rtoozr<;&_!L+WnIx#UDI{l z(EarQJ(HeU&!T75v+3FO9C}VYm!4bCqvzH0>G|~ndO^LAURW=p7uAdD#q|<;NxhU_ zS}&uQ)ywJS^$L1Ly^>y857evZRrMgfnqFP6q1V)F>9zGbdR@JqUSDsZH`E*HjrAsa zQ$1L3ribXw^%i=l9;Ua{Tj@4ET(|43^$0yukJ6*{7`=@itHP7{JLnzt zPI_m(i{4f5rgztS=sopbdT+gt-dFFZ_tyvL1NA|=Lm#XU(TD28^x^sleWaeKkJ3l$ zWAw55IDNc6L7%8k(kJUv^r`wZeY!qFpQ+E%XX|tHx%xbPzP>r3>d`Z9gF zzCvHAuhLiRYxK4HI(@yqLEorv(l_f{^sV|feY?Ix->L7?ck6rfz4|_VzkWbJs2|b~ z>qqpX`Z4{uenLN~pVCk3XY{lBIsLqTLBFV9(l6^*^sD+c{kncbzp3BSZ|isTyZSx- zzWzXes6Wyl>reEj`ZN8x{z8ALztUgpZ}hkNJN>=>LI0?K(m(58bT`A@@Gv|LFT>mL zG13@m4PPUjk>2n#G8h>R$bb!CAO>n+25t}rX;21jFa~RI25$(4Xh?=^D28fihHe;! zzY$<$GBO)kjI2gBBfF8q$Z6yW z7%Poc#%g1YvDR2;tT#3o8;woIW@C%7)!1fiH+C31ja|lWV~?@d*k|lF4j2cGL&jm_ zh;h_7W*j$87$=QW#%be>an?9zoHs5Q7mZ8CW#fu*)wpI{H*Od=ja$ZTfNescyKSP`jH++l zovD1|!z07u;zJ^1%pBh|AwJG*NutBI_SXa>69=YsbK8CQyw52lD9FlgcE(k;t261! zhQxB zB!tAqhsAzv?Dg-<$Y3f#Y>_3KpGozge8*ZaB<%M`jrL4xX6{t^|0ttR`3CuN=gVW& zT_{yvhi}`E*l=^`h>l8hctw~1P69wNGm*Wxb-H5;BruIr(=7Ysx^?&^4MxEc?V=;i zQXNngGqa_-)mpk#>l}iEEPK+A4xd`CR9|J-5GQD&{b5K8iLUc{@MU8)BCE{nvQ|z0FVc79P;V?8tbtBSH2C z-!jw5aKAVgVEw+bAh5mV(b#*NiN8CUo>g$TG5Hfa+lOY@mm_JZcg9HG1eoxYd7i1n@GWdf2NYd1H%V|1NUh?`qzdas!lT`_U&bBlUp&sOR*VkQ4? z1LO8oY~1iuHhM>fw+{>bAK;`@fh#z@3iD+8wDU}iTJw(~hYybL4P5gNTZsxKlalYNaNFf&sktRf>rICljEc)eh^OY72 z8urHHv`Ht)jv6lL*akYhVmg>Db$A5Fm_|0hk*-dy94-enDzWm^^)e<2GN5uj!kZC&-3;^J#bI;9=S< zZ#Or0cc<0*MG|*6cMnS+!)yUTwoF!WKK9xsOr27*1^&#AdYC5h^?)+4naX*Xop2sj zF;3onQ$&@Y6=7Lbvk1!Mh4b)AqGpqw$Je65Y}Ktc1=^~FeHF`32g`X*`;(O2NuHKS zx-03TX_F{i(kk6)fBt`E;%^70Z-)0usSqn$*WV9Jwz_|U*xwB5zZsSOT)uC+eUW?|%x{hR3{HOc>R*p6qED{f&w9qB#pAklvglwhZ2uXjPiGq#WE&LZ z8Un1#|LVB=vvym$znStusYd_l-+DW~rY~WR4i%GRn?(M3^miq|O1dbOL<5p&X3IOM(Cw}3 zSKE?6^GibC{p*}S%K=(Fpo|kuer7@oM*p130-ps`E0r}p?`!`3(ex*&&%c!?lvGU6y4$5JU;HH zqD!qXsjq)v8)UNWzdX5Y@8)(y*jMTmaB~xuHW_Bo$WqTQTXf^`zS&-zG<_)B*`jaB zbFJQ(w8Ff|)fOFH*u(p+NpBq(UCW|*n`D`8Q5oDD{LZ8?)4N=>=$1T4miH#jxNCL? zi#DlSWbb?66z+G1?GNI>{}19dwQy5k|4h*K@+O1^l=n}>6%nQ9)H)ErxmpUnVM+G7UeQn$zD zd)dnUEK^DZFVNu~Z;K5JOEf2jrh#*6Thd%Hhpn7zt{7%97lAUMDTm_2m>*9_b_ zW5vz@?hXsK-==U6^hljzpOqra72+M9401ChnW05Ue6YFdAKWq`+N|F0Fn>!PIg&tX zA8J1Pk8%2&shlg~`cD$~G$n3IigHQX42vXPBumPY%)b*$cK8Ih2x(!n6x!hx6&fDt z+$qr1tnf!9eivF!-d*t*5J{2vA3`JrQ@@4CcL4Zo3H@7$m=Vp?5b-dBH^~rroC+eo z&Twn6a|-w$!{b-euRn~}{S!$3&Mdg13_l9>pUuMe;7h*$g?afJ9d&y!_1!Sc^hpZm z9&*K^19m9cag&~{y{5B8{ci^zz+Jw|4;IhEY4M!K-{I9VIy#pLO_StykvuMv*G2Lr zrjA$NH5KH$`1D_(GX-pah|a%;bIezZ_v;!`2Ak+qL4)6_z;^(zU;+GF75Gj`s{WY5 zGteuwlGOS!g-4)Ipht==Zjefi3Ru^c18-VdKQlV-(xmu~W>nxm)}$1keCrVW#?ZRL zbWRlf(g*w|wx1l+|21_g!a#d^a|ZG?jPS>GY#kI#zwt!fenXS$NMqI!;P5O{u6ANo zIHR>%U|qUa3tRz={~4VBEd-bS2%qmDxH%PkCZvK-hGh8UHrIIm4g93|V$N{fS1;^u ztneR8_v@&QR?<->#R>EN`FI>Q0i@>)ZrCer*;{0l`E6AlJ!4{&0m|; zuU@C+_9gk>f9Z9;{w6LoYWQCmn zODO+gK>ph)$p0c=!__s{&XvZdJnT)ZQ&Z~{Y@M2!CyR;yGv@UVA^fLq#J_;~iWXjf z=|cqlnBqGhqV|s|zSD|^skEY?b?x#Y>ld~5kr7GeI}^8e^Id%9-j{jG>m z>f@hh<*roUhjF=*uIEw`Rg$QdM2#ex67F?#9$BrhZ_=XhFIIp4sp}y>n7(giY;IRe z?FG)ZWRkqqCq+KyI_iHcoS(aO_HL%+O`ZFZ@vg~%89)EJxZ+~SKQaDow_pDbmvsG} zY8CeH!&1Lz=QqKL1F6`#Ifb3}5&sI@|AC|9e$COJEmyfXlM;UT{ysSz@qKtA`Tma} z`t#=gB}bp8;^>VOjylp=;hyLy^M!?oe_;yp)t;uFDgI{u;o%DPHAzezugPGJDy9ZP zCb3@3_<6X+VvM;f#>3sY67vW91yeX#?1$Zg3BR`$Kk41?rSMHeg*2CrlFfv#b?q|a z3HI40B1{A0@M<1yzDJV0t*qIKasmBbg1_8lZg#Hz!(T2rFR zwOmsoYi{cBv?k~FIcCLja~On8gd5n-lp8sywuKQOr=(!nR6SH1*6uA=Q} zO--r1be(HgR=HG#nk&S~JK}PEoHEmF`rmETa=yRg@QI3!jSMmI_-$FqZ})gRH|RQd z;(ELG=>B$`O6SCwa|^M#+m}flluzF6oBVQ9de_TMsrLK+!{$?S>D{@6ZM|w>9o|XQ zCy83`BUne;Bx=2XU>)g_X!;~-y^>%Z8Iq`Tsxas;W$x^st**(4nwCKQ%3HzP{CWSZ z7k@rhB##BZAEd2bxR`F)Y_nZox1jxWCvP${U93nks7X9=Gs|!uW)#wSn0CQ=n3l|W zSWB|j@w5E^&Sp5g60A);$qY(gr&81(^T%A`Oh~rFYgscUnVw*dGQt8~u-V97Qi4hwk04t2<30q$z15y!0!C^`GiPe^al?$@_d i8BBV#_=OGTt4c`=(()~UF literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.6.pickle b/pandas/tests/io/data/legacy_pickle/0.20.3/0.20.3_x86_64_darwin_3.5.6.pickle new file mode 100644 index 0000000000000000000000000000000000000000..88bb6989f5b088e55cdfde2423eb9f0b6d393bb7 GIT binary patch literal 127244 zcmeF&1#}e4-YEKZcMa|kEVw(t1HoNFlu3vY0fM^@9^BpC-QC^Y-Q8}_t>T8W5N@Yf@9)SJ-b9iLU^ZC zw-z598y1}C7Ze>ClIWL|=#c7WoBR=UT$6o$e_f8FT`@#2as|> z!jpoXQVC9No9ZTFgPo!Yc9TKgBW@GQ*PCW!tmlG*qn+Z_8b`)2&`l&J#W_diC{eSV5&bF;w^Dsr|z%=Q{ANZ zG>&0ThI-1|jYX7pLI$9odf^*@yV3Hgfq{W_ema}?PXLmX8Zy+smbU@^@#j?~_*gkj z22?-;^Z|Xre3hL_ShM#h#Jjn5$jY>4n|3!&CVETgS0)C?uGW8CH|@0Y-)_&32IsZbO zqH+bg8af5*YV5SXCf?F?HT_kPJ)JsQ*sn-&zarjBPK9`?TB1`&JvD)e>5vfQ9B^tP z6OtV1lyR){k#zd1P)|uXh2XSsZ}X%&2S+D|C#JeWl7rnuuu~ILT>;67p3AN7rbD0= zIL#h&dM#JQwEoIf#Z}ei@2cji;i~N%@zm)|e^*m~^<~bvHE+eP2A$il40p|Ti+?@l zxnbPtnE0y6iIK74iHU#eDJkeEoi57h57Tfv8C39tBOjKb zhsRI$_3?<$j`4GPUh}xKn3n&GqqKiP zipRFi${qTmdMi<(K&MfA+p_B)A3b%YwX5BqYnRgqU6KC}H78fRr>*>}`s`HY|C3t( ztLR*dpt>#nm!<6#?yu1QvY$K$JVx1==p427oR}w$GNrXE$5*+gDMq0bm|6Oktkr3A{JT}!W*6@A^ ztoHlU?H`>slXi=dMSk!8qXg#?{>2jjMKKJ=)1ClPxtwr+Zl+FpQ3X6V;Ljjv+CNX& zaPNgDy6Im-M$i4HcV&Pk@n4S7Tnqowbl89DC|ygVsyZi4bb5BqI|8g4-XStJ)kQ~D za}NI+nEp3IK`L#G=7|;X@5iEBh6g8j2Aoc`M*mebHyPs@o2CW`f9c_C#U&?rVz>IQ zV*fb~*D%=`5{3UdZ}9IiouREW!gY(@QIj)n^X5lt0G>ABbuljX-^p}0=XsLV_YUwq z7Z?5eDFnCWJeInlDcGBhC$ns1<+PW$AIg*#!KNWyO+yvHeTzIr~oemF5=B4d-2oc_Q$50&;gH884$ zbNSBT(dEf%Crp>{@Q!Jzu|clj#31h!#fdP4crKjvL=0-;9Uswd!n5a$p3*9VGf{|)4NY~PO!TjD zNLozPxd;rVOK`-Kmi&*}5#$PTMio&p&Sm_nSWyXo>_jDdt{;i&>=2lii%3-0v|}mW zW8Knrx_fter0w+d;uL38ue1Zbz2ACEF{+Q}5aqp?z^Hzn9W<)HH`rfG%@@MD`PkrC zs8orm^>$gF+-E%v_m8Zf?e5m{&?=1s^@JUo+iE5IrJ}Y`7ig{I6NlOTjQapJMvpxx8U5D zGvyERw!_o_pWos}op1sM_;{*}^hX+hWcsP!Qb`NR{!?7m6PK#dvP97m#XMJi{=6z|8n4st`aWSb;W1Qe=)iSO2QooB&pg-x<%u#r&!V)jXw3N8S4E0$qQu28=5%I?~Ck#MHcQ z#?wW_1SfV(b<;LLEH)q~JfH{Y~&Y*NYM|IOftOB*>P z|7mdaPqPm19HW7^;yQnv>-BD)>7i%C(?AH21S!xnmGo@hnWtk~3xEmeuZcrBS76i| zCuO%#!?QH|F`qg z^DIsZPyJ&i^6!lo{=2CQ)RzCfIjU1s2OeFa&T9x_c8hE3F^nhXd0abf(i@MMU%@ID z92@T3J<8*(2F-4Ge0lpxfs2k66MJ{}ID#;aT-5&c+|x~_ElB9nQH zf7h1&SnPBb3#2B$Rz81kQFzq8KN|8sJ#d^RocWJ6%DoW! zUft?Ec>YxLsd%RsiF3xofqy?Sfv??s18=xL<;lM}A^4j$5GO;TY^Pgs+BE^a3kgyn z4Kg4Lav%>1pa}kccgfC(G&DHcGir4@)TpLTkiV?O{klCW?e?4zR^Y#PSM@%5*RWiu zcl{VD{I5k|Jk(k9^>pD*Z=O`7WUAZ#z1BHjhIfc_`tIOpr}y^%ZSU9f-Ax6B#)Wy7 z!~VL=QST8BQ^2Cmxc86c%)jl(tDf?9$f&cthdX^XiBJgb^vx{7A-uChA|etZBMPD- z8locxVj>n|BR)tvBt4P=$%te^G9y`#tVlK_JCXy*iR40ZBY6;CBrlQ=$&VC33L<_; zA*3);1SyIXLy99MkdjC#q%=|nDT|at$|Dt!iby4-GExPpiufbdkm^Vcq$W}esg2Ze zhEa8qdPpErA8CLzL>eKDktRq}q#4p2X@RsvS|P2GHb`5f9TJ4JM}ml8NkrBv9WE3(Q8H0>P#v!T5cw_=H5t)QcMy4QBk!i?uWCk)5 znT5~0CEsHgd9eWAV-m7$Z_NZauPX(oJP(dXOVNrdE^3e z5xIn1My?=Nk!#3x}U=&Cz=b*jpjjp(Y$CrG(TDZEr|M| zh0wxi5ws{;3@wh9Kue;f(9&obv@BWhoD2zVd!wwjgCM^qNC8!=ooY?Iu1=m$Da%dI!CW-b3%B5739`BlI!)1bvD=L!YBB(3j{d^fmeh zeT%+B-=iPUkLV}#Gx`Pnihe`Cqd(A}PUCl0fG`{*FcPCM8e=dP<1ii*FcFh58B;J7 z(=Z(~FcY&d8}q@^Vd=39SVk-pmKn=}WyP{#*|8j0PAnIe8_R?FVtKKASbnSkRuJ>U z3Sot@B3MzZ7*-rBftAEcVWqJ$SXrzbRvxQ>Rm3V`m9Z*VRm>l&hE>OEU^TH?SZ%Bh z7J${o>S2LceXIf25Nm`r#+qPFv1V9vtOeE*YlXGO+F)(5c32SB9t*}ouuv=v3&$d` z4wwsz#Gk zHUJxl4Z;RvL$IOPFl;#H#ztTxu~FD)Yz#IQ8;7N0#+^kMr;$d8QX$w z#kOJFu^re>Y!|j0+k@@J_F?<61K2_A5Ox?lf*r+!=dlae zMeGuG8M}gA#jau3u^ZS;>=t$#yMx`u?qT<_2iQaG5%w5+f<48aVb8G_*h}mc_8NPG zy~W;P@39ZqN9+^!8T*2L#lB(Ru^-q^r|U;?X9W-^a1y6*8fS18=Wreua1obq8CP%> z*Ki#-a1*z18~4G};py=Vct$)Eo*B=AXT`JO+3_5BPCOT$8_$FL;(76Wcz(P9UJ&=g z3*m+FB6v}}7+xGNftSQf;id61cv-w0ULLQ2SHvsfmGLTgRoox1hF8aH;5G4Dcx}86 z9)Q=y>*0ZTeY^qQ5O0Jx#+%?x@n(2)yanD8Z-uwU+u&{Sc6bop9uLMt@K8Jq562_$ z4!8@C#G~+zcr+e^$Kr8#Jl+XUz!UK#JQ?qdcfq^jDR?)$JKh8DiTA>L<9+bHct5;9 zJ^&wx55foIL-3*aFnl=f#z){I@lp6_d<;GoABU&n+ucvMtl>#8Q+3$ z#kb+x@g4Y1d>6hO--GYP_u>2T1NcGw5Pldxf*-|?;m7e4_(}W}ei}c6pT*DN=kW{p zMf?(e8NY&G#joMl@f-L}{1$#2zk}b!@8S3H2lzwpQm&f`@~)AGhdD3W`Ji9p3D0;T zFiLg?IiBT&e;##t#|~-3uBw%@IO9RsJVQ#)1_P-7?P(sDmW(sFNZUN`9zC09=;_(K zYaWiWcMoxf9%-BNj>)?@<9+X@g9d1V7HES$U^*~8m;uZPW&$&VzivG%>}LbBgE_#Q zU@kB>mil3=9V&zz(1bj0B^=j$kwx1IB`JU_96fOaK$XBrqB540ZvX z7mMC41?&cP2YY}$!Cqi*un*W5><9J-2Y>^?LEvC;2sjiR1`Y?^;0SOeI0_sMjseGl zA>`01~4O- z3Cs*;0keYH!0ccSFejJ`%njxNeZjn7J}^J%d9m)<3W9!MA+Ru51S|>`1B-(tz>;7o zuryc(EDM$c%YzlbieM$MGFSzy3i^Z9!0KQPuqIdwtPR!yJ+I?ETV1do7zow}8-NYL zMqp#G3D^{D1~vyh?-xB=ORyE#8f*i$1>1o^V0$na3;{#IFfbg906Ty#FcORcJA%=m z=UuC3iv{Drc(4;iTLQ^0PZ=iRJl^Sqt)Y(2qVU~jMw*ca>v_6G-m z1HnPyU~mXH6dVQ)2i@QZa3nYi91V^E$AaU)RB${v0h|a<0w;r0z^ULga5^{xoC(eX zXM=OVx!^o-KDYo}2rdE_gG<1r;4*MIxB^@Wt^!wsYrwVOI&eL>0o({~0yl$Oz^&jm za67mI+zIXicY}Mtz2H7@KX?E<2p$3tgGa!l;4$zxcmg~Lo&ryUXTY=IIq*Dq0lWxa z0xyGCz^mXj@H%({yb0a{Z-aNhyWlYxFd zpaptf9Cwp1Z zU9cV)2-XK1fDOS$U}LZe=y`4D*_wgP!4_akuoc)EYy-9h+krt~doUOb0YkwsFdU2k zJAf`Q5{v>ng3(|M7z@UM@n9z~0Zaswz+|v9*ahqgrhwhR?qCnFC)f+@4fX;1g8jh$ z-~ezSI0zgJ4grUP!@%L78yo?S1V@3R!7<=ia2%Kljt3`z6TwN~WN->N6`Tf62WNmY z!CByJa1J;ZoCnSa7k~@FMc`s^3Ahwo1}+CzfGfdO;A(IUxE5Rot_L@O8^KNBW^fC* z72F1H2X}xw!Cl~Pa1Xc_+z0Ll4}b^3L*QZX2zV4c1|A1bfG5FI;A!v-cosYdo(C_0 z7r{&5W$+4k6}$#s2XBBk!CT;M@D6wvya(O~AAk?RN8n@d3HTIz20jO0fG@#U;A`*= z_!fKzz6U>mAHh%HXYdR775oN%2Y-M+!CwpgD4y28VjvC@APG_+4Kg4Lav%>1pa@E! z3@V@sYM>4ppb1)_4f=rT!1Q1SFe8`=%nW7$vx3>c>|hQsCzuP&4dwxT!MtETFh5uT zEC~96g}}mK5wIv&3@i?o084_Uz|vqDuq;>(EDu%yD}t54%3u|+D(DYZ1FM7H-yAtr zr6%mx0&9bHzyPo=SPu*Y>w^uzhF~MGG1vrb3N{0qgDt?8U@NdS*amD1wgZE}_Fym= z0)~QNU^o~7b^u*qBp3yD1f#(iFcyphh!2#eva1b~c90Cpnhk?UEH#h0=l zDmV?C4$c5)g0sNc;2dx+I1ii;E&vyTi@?R;5^yQF3|tPb09S&mz}4Uya4onFTn}yl zH-ekM&EOVrE4U5Z4(nW#ckCH#qMM0KJDQIn`e)F$c>0YqJ*9uY{?CmIk9iAF?Yq6yKI zXht+AS`aOXRzz!}4bhfpM+6b=iC`jx2qnUZa3X@}K)8rVB8uopL=!PYED=Y<6P<_z zB9TZUl8Mel7osbXLUbd#6FrEYL@%N@(TC_u^dtHc1BijdAYw2vgcwQ;BZfQwTkZ&A zBr%E@O^hMN661(eVmvW{m`F?_CKFSLsl+s5Ix&NoNz5W<6LW~U#5`g?v4B`eEFu;Y zONgb!GGaNgf>=qcB32V?h_%EzVm+~e*hp+5HWOQjt;9BBJF$bB2E)$h_l2w;yiJIxJX1B3=`3h_}Q$;yv+!_(*&rJ`-Pvuf#Xv zJMn|~NgyOjVkAxyBuP>vO)?})awJa*q)1AnOe&;GYNSpYq)A$&P5O}O$n<0eG9#IZ z%uHq>vy$1!>|_oyCz*@PP39qe$-HDfGCx^>EJ*s1g~-BW5wa**j4V!;AWM>^$kJpP zvMgDSEKgP-E0UGS%48L?D(O#FBde1&$eLs=vNl0^~gZ7KG}e5NH!uHlTFB` zWHYik*@A3Iwjx`TZOFD{J2HrDPX?1AWGER%hLaIw2hv4Gl2K$wGMbDbW63x&p6o;> zkcngxnM`&jyO3SU6tWxHo$NvOBzuv)$v$LXvLD%>96$~v2a$uxA>>eU7&)ADlOxEH zwA)k`Z$miq>@+J9-d`-R~-;(dh_v8oiBl(H^OnxE1lHbVh zT(o-3zj8rBn zGnIwPN@b(6Q#q)dR4ytvm51`B@>2Pz{8Ry|Amv9Dq6$+*sG?LcsyJ1GDoK^1N>gR1 zvQ#;$JXL|JNL8XLQ&p&{ls{FCs!r9QYEreR+Eg7XfT~N?qXMb=R0FCZ)re|LHKCeP z&8X&73#uj6ifT=@q1saIs35966-Iq;MNu88Xex$^rQ)b~suPt! zB~nRLGS!*tLUpB5sBTnust47R>P7XY`cQqTepG*I05y;rL=C2fP(!I<)Nsm8ji5$S zqo~o;7-}pvj!LD*Qxm9()Ff&$HHDf=O{1n$GpL!=ENV72hnh>xqvlf!sD;!bYB9Bh zT1qXWmQyRJmDDO~HMNFXORb~UQyZv_)Fx^(wT0SBZKJkRJE)!1E^0TmhuTZ+qxMq= zsDsoY>M(VLI!Ya*j#DS7lhi5dGjJ`E$TLP zhq_DMqwZ4=sE5=e>M`|%dP+T`o>MQVm((ljHT8yiOTDAsQy-|0)F$E|ev_;#r51o!qPiLSr(wXSY zbQU@*osG^;=b&@ax#-+<9@>}AOXs8W(*@{)v>#oFE=(7pi_*pD;&chRBwdOwO_!m| z(&gy#bOpL1U5TztSD~xY{&Y3EI$eXVN!Oxl({<eej-%u0PILmD zNGH+BbZ5E?-IY$EyV2e09&}H-7u}ogL-(co(f#QG^gwzLJ(wOs52c6E!)Z4?f*wha zqDRwX=&|%TI+Y$zPoO8#ljzCx6nZK>jh;@=pl8yv=-KofdM-VWo=-2J7t)L9#q<(- zDZPwdPOqR>(yQpz^cs3Cy^dZ_Z=g5Qo9NB-7J4hajowc0pm)-{=-u=ldM~|?-cKK( z57LL|!}JmQD1D4RPM@Gp(x>Rt^cngreU3g)U!X72m*~s%75XZDjlNFbpl{N*=-c!i z`YwHszE3}(AJUKL$Mh5WDgBIoPQRdE(y!>(^c(su{f>T5f1p3opXkr@7y2vxjs8ym zpnuW`gEAO{GXz626hku%!!jJhGXf(r5+gGTqcR$!GX`Ta7GpC$Ogbh#lYz;|WMVQi zS(vO$HYPihgUQL{VsbNi7+)qYlaI;I6krN6eoP^zFjIsn$`oUYGbNakOev-`Q-&$a zlw-;>6_|=lC8jb{g{jK;Gu4>tObwY)wn8r*KrYX~m zY0k7@S~9Je)=V3wEz^z(V%jsoOb8RogfZbv1k-_WF_BCZ(~*g0VwhMaj)`YFF$qi} zlf)!5otZ97S0;t&#&lopn6Jz?<~#F) z`N<$G%3>_e5-iD5EX^`3%W^Ew3arRVtjsE`%4)368m!4$tj+qc>Dcsa1~wy`iOtMr zVY9N?*z9Z$HYb~l&CTXvec8NhJ~lsFfGx=Sv4z;eY!S97TZ}EvmS9V=rP$JJ8MZ82 zjxEntU@Nkf*vf1bwkqq-R%5HPHQ1VLEw(mWhYeusvh~d*`VPn}iHlFRoCa{TY z5}V9+X1lOm*%Y=L+nw#f_GEjpz1co&U$!6HpB=ysWCyW>*&*yub{IRHb+aSbk?bgT zG&_bJ%Z_7H+41ZIb|O28oy<;Qr?S)7>Ff-4COeCr&CX%xvh&#a>;iTnyNF%PE@79l z%h=`Y3U(#Cie1gFVb`+j*!AoNb|brq-OO%bx3b&V?d%SAC%cQ?&F*3MvisQm>;d*5 zdx$;E9$}BN$JpcS3HBs=iapJqVb8MX*z@cK_9A(J>+B8oCVPv$&E8?} zviI2g>;v{8`-pwaK4G7-&)Dbe3-%@Ziha$#Vc)Xv*!S!Q_9Od={mg!0zp~%h@9You zCyQ_>hjBPZa3n`@G{PpjjPVp;A(QUxY}GDE`Y1c)#C!W`dkC9A=ij&%r)Vfa?QBr zTnnxx*NSV+wc*-w?YJPWJr~S{aG_in7tTd+9XJ;k$whG;xo9qii{;|Dc&-zdz$J1? zTr$_0>%w*AQn+qhcdiH5lk3Ix=K64bxqe)KZU8rs8^jIfhHyi z+!$^wH;zl?#&Z+6iQFV^GB<^r%1z^@b2GS^+$?T3H;0?c&Ew{C3%G^cB5pCagj>oj z-DH&&%iI^YaDxg1jGJh%d|+;fwOc_~LvCz9e6YFU^3I3K}x;9YzqAH{d%qxl#k7!Vl$#@xysHKY}00kK#x3 zWB9TBI6jph&rjed@{{<<{1kpFKaHQx&){eBv-sKk9DXi8kDt#k;1}|X_{IDZeks3< zU(T=KSMsa))%+TMEx(Rm&u`#2@|*b0{1$#Izm4C{@8EaxyZGJw9)2&skKfN9;1BYL z_{01W{wRNpKhB@vPx7bu)BG9!EPswa&tKp#@|XC_{1yHxe~rJ+-{5cZxA@!q9sVwV zkH619;2-jj_{aPc{we>Af6l+)U-GZ`*Zdp)E&q;x&wt=Q@}Kz6{1^T!|Be67|KNY} zh=2;1fD42`3Y0(#jKB(Ou{nrcg_$Ez}VLgt|gKAyB9wP2ol;0!9s`-DufB)LWIyka0!t@l+aO#7Gi{0Ax?-FItd9vqL3sc z3!Q~7LRTS0=q7X*dI&v*UP5o7kI+}>C-fHv2m^&d!eC*DFjN>O3>VzO2w|i!N*FDS z5ylGRgj8X?FhQ6oOcEvwQ-rC)G-0|hLzpSd5@ri?gt@{zVZN|HSSTzK77I&+rNS~{ zxv)Z5DXbD!3u}b6!a8BSutC@;Y!WsLTZFB`HetK4L)agOUy0i5q-tHVm>jySU@Z&`iX_a!eSAzs8~!aE|w5WilxNTVi~clSWYZ2 zRuC(SmBh+o6|t)5FIE$)i#5cWVlA<@SVs&H>x%WnK(W5qKx`;B5*v$6#HM01vANhn zY$>)9TZ?VPwqiRmNNg_#iy>mD7$$~`5n>0?B}R%-Vn;Drj1gnSI5A%ABqoT7Vv?9F zb{4ycUBwi!o7i3KA@&q|iM_=>VqdYJ*k2qV4ipE8gT*1@P;rg#F64CakMx_ z94n3!Q^oP(1aYD`Nt`TB5vPjN#OdM;ai%y+oGs1~=Zf>h`Qid`p}0s~EG`k3ip#|1 z;tFx4xJq0tt`XOY>%{fq263afN!%=M5x0ul#O>k^ai_RT+%4`A_lo<({o(=fpm<0; zEFKY$ipRv`;tBDjcuG7io)OQA=fv~k1@WSINxUpx5wD8Z#OvY>@uql7ye-}l?~3=t z`{D!fq4-FAEItvRiqFL7;tTPm_)2^&z7gMw@5J}w2l1o$N&GB+5xTKk}6A8q^gp?R86Wb)sSjRwWQio9VtMnE7g+%rTS6>siD+JYAiL8no7;2=28o( zrPNAlEwz!_O6{Z|sl5~|g-D@Nm=rEWNF5}X6e&eX9i?a~Mv9f5lprNaNm8=Z zS?VHnl~SZ`Qg^9`)Klsu^_KcbeWiX0ibX|gm$nkr3`rb{!VnbIt2wlqhYE6tPUOADlh(jsZGv_x7eEt8f@ zE2NduDrvQ}Mp`Salh#Wcq>a)hX|uFN+A3|6wo5ytozgC8x3ovvEA5l^O9!Nb(jn=v zbVNET9g~hrC!~|oDe1IyMmj5*Qq>Iue>9TZ1x+-0hu1hzho6;@mwsc3jE8Uas zOAn-n(j)1y^hA0pJ(HeGFQk{!E9tfLMtUo~lio`oq>s`k>9h1j`YL^szDqx(pAsUY zGA83PA(JvC(=sEoGAHx0Ad9jj%d#S?vL@@YA)B%#+p>?GPEIdpkTc4e|3;l55L#&p$~hH@jhvD`#%DmRmx%Pr)V zax1yD+(vFIx08e9_HwWsB8SRha=08JcaU9jq#Px8l%wSsIaZF7&}~DUXsz%VXrR z@;EtF9xqRjC(4uL$?_C=syt1eF3*r>%CqFz@*H`tJWrl4FOV0?i{!=f5_ze-OkOUp zkXOp9tV%W| zyOKl6spL{}D|r-OC9jfC$*&Yp3Mzg|A*HZVL@BBiQ;I7kl#)s*rLOw%P6<-lE5S;L5~_qL;Yx(kL2)UON|e%3iB@8iSS3!0S2`&PN}`gaBrBbj zE=pG=Md_w=S9&Ntm0n73rH|5A>8JEp1}FoSLCRodh%!_erVLly$_QnoGD;b(j8Voa zitWx29K zS*fg2Rx4|iwaPkWy|O{ssBBU;D_fMU$~I-YvP0Ra>{50sdz8J(K4rghKsl%!QVuIe zl%vWq<+yS}IjNjdPAg}Wv&uQ;ymCRgs9aJmD_4}O$~EP>aznYP+){2Uca*!zJ>|ag zKzXPosQRge)WT{JwWwN5Ev}YOORA;R(rOvCtXfVjuU1ei zs+H8rY8ADr>aSK)tE)BCnrbbzwpvFGQ0uDo)Ihbq+CXinHc}g_P1L4pGqt(eLT#zG zQd_HS)V69nHAro*2CE@zs2Zk*s}X7k)ul$NQEEpuT8&X-)i^a??W88CiE5IXtaetr zs9n_*wVT>q?VQHrI8M7I!T?ZPEn_-)70te40WbDOP#IGQRk}j)cNWHb)mXQU92uqm#WLuIQYAx=G!vZc(?Y+tlsq4t1xxOWm#RQTM9*)cxuK^`Lr4J**y4 zkE+MiILIe0s`bqt)eo?=w-_-Bw5A~;tXsCv1 zxJGECMrpLhXspI*ye4R(CTX&!XsV`Zx@KsmW@)zOqovc*YZuS~;z}Rza(% zRnjVJRkW&_zgA7FuGP?LYPGc5S{*GwtE<)10=4>D1FfOfNNcP$(VA+_wB}k1t)+G_2zAg#R?tc7TyT9_8DMQ9x~mlml-X&tp_Ek=vg;-SK zb=6X|Zd!M(ht^Z;rS;bOXnnPQT7PYTHc%U+4c3NeL$zVraLuia&_-&bw9(oaZLBs< zOV!3}6SRriByF-bMVqQk)23@Pw3*s0ZMHT?o2$*!=4%VIh1w!*v9?58sx8x&Yb&&s z+A3|ewnkg4t<%17qpAoCGE0yMZ2n9)2?ebw42&3?Y4GDyQ|&P?rRUU zhuS0UvGzoJsy)-5YcI5y+AHm~_C|ZFz0=-nAGD9!C+)NLMf<9K)4ppzw4WNHqdKPJ zI-!#~rPDg2vpT2qx}b}?q|3UZtGcG^x}lr8rQ5oXo=#7%XV5e1ne@zh7Coz;P0z09 z&~xg!^xS$L-B-`6=hO4+1@wZtpI%5WtQXOX>c#ZpdI`OxUP>>mm(k1W<@EA;1-+tP zNw2I|(W~nIdNsYeUPG^`*V1e2b@Tweu3k?M)a&aF^oDvPy|LazZ>l%bo9iv~mU=6_ zwcbW=tGCmG^!9qN9-@cpVS2b8p?A<-dZZquchsZx7(G^x)8q9{dV-#)C+W$0XT6Kw zRZr2o>D~1ndQZKV-dpdZ_tpF9{q+I*Kz)!tSRbMf)raZBb+-`+4>xPu0BtnuP@LS>WlQn`VxJqzD!@Puh3WO ztMt|S8hx$4PG7HY&^PLv^v(JfeXG7r->&b_cj~+J-TEGVuf9*;uOHA4>WB2h`Vsx8 zeoQ~EpU_Y0r}WeM8U3t&PCu_-&@bwj^vn7c{i=RVzpmfVZ|b-7+xi{-u6|FyuRqWq z>W}ot`V;-B{!D+aztCUmuk_dY8~v^RPJgd|&_C**^w0Vi{j2^>|E~Ygf9i;V8km6_ zgh3jVK^u(08l1r!f*~4`AsdRJ8k(UShG80(VH-Y1IwQT2!N_Q2GBO)kjI2gBBfF8q z$Z6yTHwG93jX}m>V~8=-7-kGN+{OrFq%q1EZHzI-8sm&q zW4tlJm}pEgCL2?Xsm3&8x-r9;Y0NTa8*_}g#yn%bvA|epEHV}wON^z)GGn>1!dPjn zGFBUFjJ3u(W4*D#*l27rHXB=vt;RNEyRpOAY3wp~8+(ks#y(@ealkle95N0YM~tJ! zG2^&#!Z>N1GEN(3jI+i$jtHw3sx^cs}Y1}ey8+VMm#y#V{@xXX! zJTe{|PmHI=Gvm4O!gy)CGF}^RjJL)+cvzpn=>}C!# zr?g z6tkPz-RxoZG<%u7%|2#dv!B`D9AFMK2bqJ-A?8qXm^s{ZneGxy#&b?lJe8`^^330rQ}F$UJNwF^`(Z z%;V+>^Q3voJZ+va&zk4V^X3KfqIt=@Y+f<1n%B(h<_+_vdCR=<<{8j<0pyg*3vI<*8tfE#itGHFdDruFnN?T>DvQ{~(yj8)fXjQT* zTUD&8mcLcas&3V=YFf3d+EyJaz^ZH2vjVO9Rs*Y{)yQgWHL;pn&8+5D3#+Bo%4%)3 zvD#YgtRSns6>NoAp;nj`ZbeufESD8&MOhuKXe-8wwc@OJtCN*rC0a>Vvent@Vs*7r ztZr6!tB2Lo>Sgt|`dEFfepY{LfHlw>WDT~4SVOI0)^N*hjj%>qqpZ=^7;CIG&Puh$ zTNA8_)+B4PHN~20O|zz3Gpw1`ENiwk$C_)+v*ue1tcBJhYq7P&T52t`mRl>VmDVb2 zwYA1tYpt`^TN|v6)+TGSwZ+#%jiI%*xW zj$0?Jlh!Hgv~|WhYn`*sTNkX0)+Ot*b;Y`BU9+xRH>{i1E$g;*$GU6Xv+i3DtcTVk z>#_C3dTKqho?9=hm)0xmwe`k&YrV7HTOX{C)+g(;^~L&XeY3t>Kdhe?Vxu-@<2GTF zHf7T`W3x7A^R{4%wq(n;Vym`h>$YK=wq@J4kDbm=Z)dPG+L`Rkb{0FUoz2c}=dg3y zx$N9_9^2Q>Yv;4`+Xd``wx3b_KhlUCFL& zSFx+w{&qFHx?RJrY1guA+jZ;!yRKc&4z%mr4eW+?BfGKP#BORgvzyy3?3Q*byS3fM zZfm!*gY5QpupMHD+F^FM9btE{U3R1$Wp}ir?HD`Oj3C9(GT=m)+a$WB0ZD+5PPS_CR}(J=h*%54DHc!)>=c!X9alvPauv?6LMZJJlX< zPp~K2lkCa%6nm;Y&7N-0uxHw{?Ai7ld#*jto^LO(7ut*L#r6_=slCizZm+Oc+Nn zZlADE+NbQ(_8I%Eea=2_U$8IQm+Z^-75l1v&Ax8muy5M8?A!Jo`>uV@zHdLUAKH)X z$MzHZsr}4;ZojZ!+OO=__8a@H{my=Gf3QE=pX|@}7yGOI&Hirxuz%W!59)*Y;68*8 z=|lO@K8z3R!};(&f{*AU`N%$skLsiO=st#z>0|lWK0ZF_eA4@5@X6?t$tSZ<7N4v> z*?hA5itWx29K zS*fg2Rx4|iwaPkWy|O{ssBBU;D_fMU$~I-YvP0Ra>{50sdz8J(K4rghKsl%!QVuIe zl%vWq<+yS}IjNjdPAg}Wv&uQ;ymCRgs9aJmD_4}O$~EP>aznYP+){2Uca*!zJ>|ag zKzXPUky~LMykox6lzK}m6}>jqo!5Usp-`WYDP7anpw@FW>vGP z+0`6sPBoXBTg{{9Rr9I&)dFfkwUAm^Eut1xi>bxc5^71clv-LXqn1_6spZuQYDKk@ zT3M~4R#mI1)zunmO|_OO^&tI$52fPF1I=)72U3Om&tzTb-lMRp+Vm)dlK8b&PB^wx>?<#ZdJFb+tnTFPIZ^MTiv7XRrjg;)dT85 z^^kg4J)#~}kEzGi6Y5FzlzLh{qn=gIspr)T>P7XEdRe`qURAHD*VP;9P4$*~TfL*+ zRqv_y)d%WB^^y8meWE^9pQ+E)7wSv(mHJwJqrO$&sqfVf>PPjH`dR&=epSDz-_;-L zPxY7jTm7T{RsX3`w5VD%ExHy%i>bxZVry}Rd7v~Vp#i?1cn5^9OG#99(9 zsg_JrG*#0yq3N2TnVO~9nxnazr}omP0Oz3&~j?IwA@-AEw7eO%dZvC3TlP4!delns8&oXu9eVAYNfQ&S{bdZR!%Fg zRnRJGm9)xQ6|JgPO{=ce&}wS6wAxx7t*%y2tFJZC8fuNS##$4tsn$$uuC>rwYOS=^ zS{tpc)=q1$b#q&a25N(}!P*dQs5VR+ zu8q(}YNNE#+8AxDHclI_P0%K4leEd&6m6+8S-GwoY5GZO}Gqo3zc^7HzAxP1~;R&~|FOwB6bsZLhXZ z+pita4r+(A!`cz;sCG;{uAR_MYNxc*+8OPvc1}C5UC=ISm$b{;7452aO}nn$&~9qC zwA(0*#awBOnv?XUJviz1?mXd=3ZA!3SHBDRPl;))OvD&mPS5iTM`e33vT6p2J) zkwhdF$%G$VF*)L!WNEjg(rLw2q_{(a*;x$6sbgNkw&By=|p;wL1Yw}L}rmi zWEI&&c9BEm6uCrhkw@ee`9ywEKok^(L}5`x6cxopaZy5)6s1IIQAU&%Rg zL39+IL}$@ObQRr1chN)i6um@m(MR+Z{X~B;KnxUv#9%Q*3>Cw~a4|xR6r;pwF-D9P z%@AoL2MM8#AdNYY!%zYcCkb36uZQ3u}AC``^0{6KpYf@#9?tn92LjJadASN z6sN>#aYmdK=frt&L0lA<#AR_sTou>Eb#X)76t~1}aYx)0_r!hiKs*$W#AES9JQdHx zbMZpF6tBc<@kYEA@5FoYL3|XS#AoqEd==lsckx5~6u-o8@kjg>|3nl$svb>`uE)@0 z>aq0LdK^8j9-@cp@$@h~T#wM>>k0IPdLlisogOkJOXvDfEjm_JdLg~AUPLdd7t@RDCG?VdDZR8_MlY+E)644>^on{Vy|P|Kuc}wmtLruNntCn0 zwq8fCtJl-(>kagVdLzBD-b8PzH`ANzE%cUpE4{VeMsKUP)7$GE^p1Kby|dm$@2Ypx zyX!slo_a66x86tZtM}9U>jU(G`XGI)5q%*^ojZ; zeX>49pQ=yOr|UEHnffe!wmwIntIyNt>kIUS`XYU?zC>TDFVmOnEA*B6Dt)!SMqjJ1 z)7R@8^o{x^eY3tr->PrZx9dCfo%$|)x4uW;tMAkI>j(6M`XT+Wenda2AJdQPC-js0 zDgCs5Mn9{c)6eS{^o#l>{jz>Vzp7u;uj@DToBA#Nwth#ytKZY_>kssY`Xl|Z{zQMO zKhvMf>|Ehn}zw1BrpZYKTxBf@}tN+uZ7*UOA zMsy>F5z~le#5Up>ag7io)QD$<8R15R5#LB)Bs3BkiH#&iQX`q67^4m_6f_DMg^eOcQKOhq+$dp`G)ftzjWR}AqnuIRs9;nyDjAiHDn?bKno-@T zVbnBg8MTc%MqQ(xQQv4_G&C9+jg2NoQ=^&D+-PC6G+G(0jW$MGqn**-=wNg-IvJgf zE=E_Qo6+6qVe~Y58NH1@Mqi_!(cc(g3^WEAgN-4^P-B=e+!$euG)5VtjWNbpW1KPG zm|#paCK;2BDaKS|nlas&VazmU8MBQ!#$02bG2d8VEHoAwi;X45Qe&C1+*o0(G*%g_ zjWxzvW1X?y*kEilHW{0ZEyh-3o3Y*4VeB+^8M}==#$IEevEMjg95fCYhm9k~QRA3# z+&E#JG)@_(jWfns1gJTx8| zkBukBQ{$QO+<0NUG+r66jW@eGxy#&b?lJe8`^^330rQ}F$UJNw zF^`(Z%;V+>^Q3voJZ+va&zk4V^X3KfqIt=@Y+f<1n%B(h<_+_vdCR>R4bYl-HKtwv|?GYtvFU(E5r)5;#pxw0J*{3=Z>x{h*Xn2Ww+2`PtwGjcYlt<}8fFc*Mpz@Q zQPyZ{j5XF8XN|WeSQD*D)?{mnHPxDCO}A!PGp$+HY-^4+*P3U|w-#6ntwq*iYl*eg zT4pV`R#+>oRn}^2jkVTVXRWt3SR1WP)@Eyqwbj~YZMSwhb ztwYvf>xgyKI%XZWPFN?cQ`TwgjCIyJXPvh$SQo8J)@AF8b=A6NUAJynH?3RNZR?J8 z*Scrjw;osztw+{l>xuQ$dS*ShURW=!SJrFmjrG=gXT7&RSRbuV)@SRB_0{@jeYbvC zKdoQZZ|jfs*ZOBgv7_42?C5q3JEk4Wj%~-W(!9vTNIQ?7DV6yT0APZfG~M8{19nrgk&Cx!uBUX}7Xl z+imQ&c00Sh-NEi?cd|R%UF@!QH@myt!|rMKvU}To?7ntCyT3ia9%v7;2irsJq4qF) zxIMxiX^*l;+hgpp_BeaIJ;9!6PqHW5Q|zhsG<&)|!=7o+vS-_K?78+ld%nHEUT80} z7u!qhrS>v=xxK<(X|J+Z+iUE#_BwmLy}{mSZ?ZSrTkNg&Hha6h!`^A{vUl5i?7j9r zd%u0aK4>4Z58FrVqxLcTxP8JtX`ixB+h^>v_Bs2!eZjtHU$QUTSM00yHT$}K!@g75Kt zMkkY#*~#K$b+S3xog7Y1Czq4k$>Zd8@;UjP0!~4vkW<(x;uLj?ImMk4PD!VfQ`#xx zly%BE<(&#nMW>Qe*{R}Gb*eelof=L}rN)kD22Mk#k<-{|;xu)dInA9G zPD`hi)7oj{v~}7!?VS!zN2in1+3DhRb-Fp-ogPk4r)>ErZu`Z@ib0nR{YkTcjB z;tX|$Im4Y1&PZpJGuj#BjCIC2zxhGMrV_=+1cW3b+$R%ogL0jXP2|v z+2ibW_Bs2V1I|I`kaO5M;v993Imew7&PnH#bJ{uMoORAQ=ba1AMdy-p*}39eb*?$r zog2@ogdCm=a=)_`Q!X`{y9Be$nyK&sOZipM|#&g5ma5ut@ z?Q)zw_#>aO9MuI1XU;b?&Fuba=!?-p2B7x{ch%ZWFhu z+stk5ws2dzt=!gb8@H|7&Ta2@a67u4+|F(nx2xOD?e6w)d%C^c-fkbauiMYVga3{Kx+{x|~cd9$ho$k(XXS%c8+3p;7 zt~<}2?=El`x{KV!?h<#YyUbngu5eentK8M@8h5R`&Ry?ra5uV}+|BM5cdNV2-R|yi zce=aW-R>TDue;CP?;daux`*7u?h*H>d(1uVo^VgPr`*%-8TYJv&OPs5a4))-+{^A2 z_o{o%z3$#{Z@RbK+wL9ru6xhD?>=xJx{ut)?i2T^`^D#q~nGP%oYr z=7oC^UVJZsm(WY(CH9hdNxfvA;;EkI2~YP7&-5(M_8iakJkR$6PkNDFaxaCK(o5x~ z_R@H1y>woBFN2rS%j9MDvUpj&Y+iORhnLgK<>mJBczL~iUVg8DSI{fu750jFMZIEP zaj%3|(ktba_R4r=y>ec8uYy<6tK?Pos(4ksYF>4(hF8<8<<<7;cy+ycUVX2D*U)R^ zHTIf#O}%DbbFYQh(re|l_S$%Dy>?!EuY=do>*RIzx_Di^ZeDkZ@nZ-O_`o8(RQrg&4mY2I{ihBwoj z<<0iycyqmZ-h6L?x6oVUE%ugpOTA^@a&LvV(p%-N_SSf7y>;GtZ-ckd+vIKbws>2; zZQgcohqu$)s4q?}B&H zyX0N=u6S3yYuPrYZ}bMJ-s(tG8-_TG4J zy?5Sw?}PWz`{aH0zIb1~Z{BzBhxgO_<^A^lcz?ZrUKBs7AI*>M$M9qNvHaM696zof z;)nY2{4hVSyz_`#Joael9<^pU2PZ=kxRX1^j}3A-}L+ z#4qX>^Naf>{E~htzqDV*FYA}{%lj4lihd=(vR}oo>R0ot`!)QUel5SYU&pWO*YoT9 z4g7|FBfqiV#Bb_1^PBrE{FZ(zzqQ}SZ|k@7+xs2-j(#VUZQD2h z`!oER{w#mCKgXZz&-3T|3;c!tB7d>J#9!(!^OyT8{FVMHf3?5HU+b^)*ZUj%js7No zv%kgP>TmP6`#b!d{w{yFzsKL}@ALQj2mFKnA^)&{#6RjE^N;%{{FDAE|FnO`KkJ|K z&-)kri~c45vVXRV-}@i@kNzkBv;W2a>VNaU`#=1j{xAQx|HuF9|MQ~+QG;kf^dLqMGl&($ z4&nrHgODIJh!=zf;XygCarEpjc2mC=rwlN(H5ZGC|p(Tu?r!5L65*1(ky;LDisIP(7#-)C_6`wSziA z-Jo7jKWGp%3>pQEgC;@Kpjps7Xc4pwS_Q3xHbL8*gCW7tU|29b7!iyNMg^mTF~QhiTrfVE5KIgv1(Sm* z!PH<{Fg=(N%nW7)vx7Oo++bcXKUfef3>F28gC)VzU|Fy{SP`rYRt2krHNo0oU9djb z5Nr%K1)GB{!Pa0~uszrj>!PVeea6PyY+zf67w}U&u-QZquKX?#43?2oKgD1h$;92lI zcoDn|UInj%H^JNBUGP5m5PS?i1)qa2!Pnqh@ICku{0x2tzk@%)-{4;mMMjm;WONxr z#+0#SY#B$!l_4@z#*<+(Tt>+FGJ#Ae6UoFfiA*Y!NkytslS1m!kfyYxEgk7fPx>;D zQbx+;GKEYjQ_0jajZ7=k$@DUV%qTO-%rcA2DznM#GKb75bIIH?kIXCc$^5c_EGP@f z!m@}gDvQbDvV<%tOUcr*j4Uh5$?~#-tSBqV%Cd^ADyzxrvWBcFYsuQOj;t%|$@;Q^ zY$zMa#?k|Q&a#W_D!a+D8a)cZyN6FE0j2tV+$? zD!<9^@`wB>f63qSkNhkD$taOgBcnw|kBku+Gcs0W?8rEgaU(+_LnGrwhDC-)MnwLn zX0GLbx|T<@{7=*J|M!Zw|8~nI`){{g|Lp(Q^B(1Y8s`3Y{^yJT4h?qyzc+b@@O&u7 z!!QiT2#k*jFd-(w#Fzw=Vlq@vMGXb&XrPG}+UTH*9{L!d#7InzDKI6b!qk`s(_%VI zj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0Vlga^C9oux!qQj<|66DH zKiB2BFOLyqVBX+{h*af>{H|&l*uqXDy-q;8GVn6JU18^V?!ofHMhvG0Cjw5g+j>6G6 z2FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#0 z2G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|3 z2G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh| z2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y?Vq;Ex*he?N$Z(J=Eu8Vc0WKoc#r(LonI^f5q*k(eA)U`kAdsWAqxm>ct8Ud)I2u>cmtLRc7!U{NfF#jymI#8Oxq z%V1e7hvl&XR>VqJ8LMDbtcKOG2G+z{SR3nLU95-ou>m&3M%WmeU{h>{&9Mcx#8%iE z+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cY>oQBhJ2F}D;I2-5ST%3pVaRDyGMYtH3;8I+M%W(y+#8tQ& z*Wg-QhwE_zZp2Nv8Mok8+=kn62kyjOxEuH2UfhTK@cNB9_@;8T2t&+!Gm#8>zl z-{4z(hwt$Ne#B4s8Nc9H{D$B02mZug_#6M=U;KyvQ!M}AiHgxMI>x}57z<-#9E^)0 z7>e;Q48t)3<6{C$h>0*UCc&impId3 z8q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<-^|1jq#75W{n_yFHhRv}Bw!~K0 z8rxu7Y=`Z!19rqt*crQESL}w}u?PNVx5R&@7xu>gESmVw^u>PI9|zz-9E5{$2oA+z zI2=ddNF0TuaSV>daX20);6$8+lW_`8#c4PlXW&eng|l%E&c%5+9~a<4T!f2p2`Lkg}ZSN?!|q$9}nO`JcNhw2p+{_ zcpOjQNj!z8@eH2Db9f#v;6=QIm+=Z-#cOySZ{SV5g}3nz-o<-(A0OaDe1wnj2|mSV z_#9v0OMHc|@eRJkclaJZ;79y~pYaQR#c%i>f8bC2g}?C+{>6XzKWkJeU{rVSX%t1+fqo#v)i0i(zpr zfhDmNmc}wz7RzCItbi4<5?014SQV>bb*zCku@=_GI#?I$VSQ|X4Y3h6#wOSln_+Wo zfi1BWw#GKt7TaNa?0_Ay6L!Wf*cH2BckF>Zu^0BnKG+xgVSgNe191=z#vwQqhv9G> zfg^Dgj>a)K7RTXuoPZN?5>Cb`I2EVibew@RaTdWJh5EEfyOoB-<87iovh5~go&_oMubkIc)eGE`yBqqlc zm=aTAYD|M^F&(DI444r!VP?#NSuq=C#~hdwb75}GgLyF@=Enk95DQ^pEP_R`7#7D8 zSQ1NNX)J?fu^g7i3Rn>Rk0dY#~N4@Yhi7ygLSbU*2f0e5F24*Y=TX(88*ij z*b-Y|YixsUu^qO@4%iVpVQ1`uU9lT>#~#=ddtq{5Fg=Ve1cE$89v7s z_!3{?YkY%m@g2U$5BL#3;b;7UU-27$#~=6;f8lTZgMaZKMv2M$KSsmo7z1NsER2nD zFfN8*D8|Du495tJj|ng#Cc?y+1e0PiR8U0?1?p&^i5A-Epo<>*7@)*ROpYlqC8omE zmta2uj}5RPHp0f(1e;q9kCAPxW z*aq8TJ8X{~up@TD&e#RJVmIuLJ+LSC!rs^i`(i)rj{|TZ4#L4W1c%}<9F8M!B#y$- zI0nb!I2?}?a3W5^$v6e4;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBN zxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4 zcm~hnIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf z_y*tNJA98H@FRZ0&-ewu;y3(`Kkz61!r%A@|KdN45{vxDXc!%1U`&jKu`v$D#Sjd| zco>G^7=iII0Vc#mm>82_QcQ*ls;Hqr9St8}ndZ%!m2002ahTSQv|7Q7neVu>_XHQdkv02a#7(#vx8PRXhTCxm?!;ZV8~5N|+=u(|03O6cco>i1Q9Opn@dTd4Q+OKB;8{F} z=kWqw#7lS?ui#a@hS%{1-o#sY8}Hy_!ytyQ+$Tc@ddubSNIy=;9Go$ z@9_hE#83Dczu;H=hTriA{={GS8~@;6{D)Culm8eEqhkz=iLo#?#=*E4f}t1>!!R5p zFg_;0gqR2uV-ie?$xuNRH590$fhJmLqk}Gb=wpBqBQZIqz?7H@Q)3!Ti|H^uX26V? z2{U6B%!=7CJLbTgm;O(V-YNh#jrS*z>-)BOJf-us$}xhS&%jV-swO&9FJPz?RqwTVoq+i|w#IcEFC< z2|HsK?26s6JNCey*b94OAMA_$us;sKfj9^U;}9H*!*Do`z>zo#N8=bAi{o%SPQZyc z2`A$eoQl(MI?lkEI16Xv9Gr{ua6T@;g}4Y8;}Tqo%Wyfaz?HZPSK}I7i|cSbZorMW z2{+>w+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH$M86wz>|0iPvaRpi|6n>UcifZ z2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*}&+s|Ez?b+6U*j8mi|_C~e!!3T z2|wc({EFZ3JO03*_zQpIAN-5|FiITqAERM(jDayR7RJUn7#Bk@6ysqShGPWA#{`%V z6JcUZf=MwMDyX7{0(CUdLsJnOoM4L9j3<&m=QB!X3T_y7RM4;5=&ueEQ4jS9G1rlSP?5>Wvqf# zu^Lv#8dwu+VQs8~b+I1S#|GFC8)0K?f=#g*Hpdp&5?f(wY=dpF9k#~~*bzHnXY7Jq zu^V>B9@rCmVQ=h%eX$?*#{oDH2jO5GfxDhwuX54~X zaT{*O9k>&B;cnc6dvPD`#{+l}58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{ z@fu#o8+a3M;cdKwckv$H#|QWjAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXZ(U+ z@f&`}ANUi0;cxtdfAJqiiA(-tG>nchFeb*r*cb=nVhDy}JPgBdjKKJq025*&OpHk| zDJDY&Rn$F!wSOQC8DJ+d;uq>9t@>l^YVkNAMRj?{n!|GTA zYho>|jdidt*2DVP02^W>Y>Z8?DK^9A*aBN(D{PHzur0R3_SgYCVkhj3U9c;5!|vDv zdtxu_jeW2$_QU=-00-hA9E?M7C=SEnI08rFC>)Jra4e3)@i+k|;v}4mQ*bIy!|6B! zXW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdTTW~9G!|k{O zcj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{SMVxc!|Qkh zZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_!|(V5 zf8sCvjeqbj{=+CC!!pc|$t70{*jy13**23CY z2kT-ztd9+_AvVIs*aVwmGi;76uqC#_*4PHyVmoY)9k3&I!p_(QyJ9!&jyZzFARfZQcm$8)F+7eZ@FbqX(|88Y;yFBz7w{rp!pnFCui`bljyLco-oo2> z2k+uNypIp?AwI&#_ynKgGklIO@Fl*&*Z2nC;yZkgAMhi7!q4~xzv4Iijz91x{=(n* z2mj(fj1o%zV>FD8F)${^!q^xG<6;PgVmu7PaE!qCm;e)EB20`)FexTO1y$5gppFKb zXrYY`y6B;g0ZNR-SI818ZU}tc`WBF4n{P z*Z>=1BW#RKuqigf=GX#TVk>NoZLlr2!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR! zH~D!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$r zcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ!}E9nFXAP8n18?Fjyp4D8F5biY z_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U?!}s_BKjJ6+j9>68e#7th1ApQ#{EdI`FaE7|{8{=SH48c&0hhZ3w5f~p6U_wlUi7^Q##bl_UiW&;k(LfU|w9!Eq zJ@hd^iIJEbQ(#I=g{d(Orp0ua9y4G@%!HXS3ueV^m>qLqPRxb5F%Ra&e3%~#U_mT| zg|P@0#bQ_-OJGSXg{83!mc?>d9xGr)tb~=Z3RcBxSRHF%O{|5ru@2V7dRQMDU_)$# zjj;(f#b($XTVP9Ug{`p-w#9bX9y?%1?1Y`M3wFhB*d2RdPwa)gu@Cmee%K!e;6NON zgK-EB#bG!cN8m^tg`;r{j>T~}9w*>LoP?8c3QomoI2~u;Oq_+YaSqPKc{m>z;6hx4 zi*X4q#bvl0SKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)kPTYmNaS!greYhVF;6Xfu zhw%s=#bbCJPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!xO}vG-@eba_dw3ro;6r?b zkMRjU#b@{&U*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y@elsRe;6f<{Kse* z9b;fjjD@i=4#ve048?dDhT#~2@i74=#6*}FlVDOzh6<{vp+FrCG|@sE9dyw{9|M#a ziODeqro>d38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<-^|1jq#75W{n_yFH zhRv}Bw!~K08rxu7Y=`Z!19rqt*crQESL}w}u?P0VUf3J^U|;Nq{c!*e#6dV1hu}~g zhQo0Lj>J(o8pq&R9Eam^0#3w9I2otlRGfy>aR$!BSvVW#;9Q)C^Kk(##6`Fmm*7%d zhRbmUuEbTi8rR@jT!-s%18&4kxEZ(LR@{c$aR=_iUAPx4=M$CknF$-qJY?vK$U{1`1xiJss#eA3_3t&MkgoUvP7R6#%97|wH zEQO`943@=mSRN~2MXZFCu?kkjYFHg>U`?!rwXqJ?#d=sD8(>3hgpIKYHpOPx99v*Z zY=y0{4YtL0*d9AzN9=^1u?u#^ZrB}rU{CCYy|EAW#eUcy2jD;)goAMi4#irsL98cg$ zJcXz644%bvcpfj{MZAQU@d{qWYj_=R;7z=RxA6|%#d~-kAK*iLgpctFKE-GF9ADr| ze1)&^4Zg*9_#QvtNBo4J@e6*%Z}=U5;7|O8zwr3~(f$=c`Cd5RT7?WU9Ooj@osG&d|4K&e08y$4fLmvZ_7>UU-1*XJQm>Sby zT1i(0EQZCg1eU~7SQ^V< zSuBU;u>w}aN>~}IU{$P!)v*TF#9CMz>tJ21hxM@mHpE8Q7@J^IY=+IT1-8Ui*c#hl zTWp8zu>*F*PS_c{U{~yh-LVJu#9r7N`(R(}hy8H?4#Yt?7>D3c9EQVj1dhZ}I2y;` zSR9AraRN@nNjMp&;8dK3({TpQ#925S=ipqNhx2g(F2qH+7?_uyXKhx_pW9>ha<7?0plJch^d1fIlGcpA^( zSv-g5@d94NOL!Tt;8nba*YO74#9Me9@8Dg$hxhRTKEy}(7@y!%e1^~Q1-`^r_!{5f zTYQJ_@dJLuPxu+X;8*;H-|+|j#9#Ou|KMNzhf(5_{}>IUV+@Rmu`o8q!MGTLp%@Rt zFdQQ=J|@6~mJs)Gh-IairFwb=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+*1(!r3u|K?tc&%qJ~qIH*a#bA6KsmjusOECme>kgV;gLX?XW#| zz>e4nJ7X8@irug~_Q0Ol3wvW9?2G-dKMufwI0y&h5FCoba5#>@kvIxR;}{%^<8VAq zz==2sC*u^Hiqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8PmADF5;~HFx>u^18 zz>T;GH{%xEira8I?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2plXwbG;~6}Q=kPpU zz>9bZFXI)wir4Tu-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~m-q@_;~RX7@9;f- zz>oL|KjRntir?@%{=lF33xDGu{EPoEN&@m9qhWN6fiW=_#>O}p7eg=<<6#(vV+6*> z1eg#LVPZ`3|Jt<=Fe$2aQP?9n=bUrSL89cGBrB5hFhmK81OWvE9P)sGs3@W+A}9!g zC?GiqNY03gB8UWuBJg()F9LfD9{1Vj?El{9);!a1&s2AH)e7IY)~aR-rerFnW*Vkt zI;Lj^W@IL2W)@~;HfCoI=43ABW*+8cKIUfu7Gxn7W)T);F&5{eEWwg2#nLRpvV4r? zSe_MFk(F4PRalkPSe-RkleJizby%16_&Dpc0UNRr8?y49jL!s2$V5!cBuvU=OwJTc$y7|uG)&8MOwSC=$V|-4EX>Mm%+4Il$z06MJj}~{ z%+CTW$U-d4A}q>cEY3$+f+bmsrCEk$`54QwJS(swE3q=GuqvyuI%}{dYq2)#urBNI zan@%8He@372n=Ig_vPb-uw_e3Nf+Hs9tPzQcDpm+$d?&f^E1&kwnP3;7X0<{~cU zCtSj%{FKYMoS$(8Kj#-*$yHpojI73xtN=In3wsOp9NTug;?h12~X_IGE3K2#0bQ zU*L-z&Ji5RmpF=}IfgGYoMSnT<2iv7If;`Q!6}@|S2&H+IfJiqCST+0e1o(2Cg0+0 zzRfv&hwpMO-{bq7#}7E4A94W~@*{rCMO@5JxP(jjDVK3MKjR91&M&xzl$z9ydJ>1KE+|L6%$U{8LBRtAu{D#N*Ex+UUJi#CM zBTw=tp5kf#%rpFjXL*k2d4a$3A}{eWukb3b@i$)Q@4UgAyv2}Y@;{?6Dx)zvV=yLT zF*f5cF5@vi6EGnYF)@=cDU&fdQ!ph{F*VaLEz>bQGcY4FF*CC;E3+{>b1)}!F*oxt zFY_@!3$P#yu`r9UD2uT;A7u%aWGR+r8J6W^EXVS!z>2KI%B;ewtj6lB!J4ea+N{I6 ztjEV$pAFcMjo6q?*pyH3NjBqCY|a*J$yRL5Hf+mwY|jqt$WH9cP7g{rMaRa3BY9FrViT4&^Yuz!y23BRG;TaTG^$3}0qA$8sFUa{?!F z5+^f)Q#h5ca2ls`24Ce&zQ))2250e2zQx&mn{)UM-{oAs$M-ppA88m{F!uIC1B z%3*wgFLF3Xa3o*iD30bBzRYlrqEI#LK+GtGveFc%8rV25<5fLsH29jKZjl#^{W} zn2g2PjKjE$$M{UZgiOT5Ov0p0#^g-FluX6cOvAKH$Mnp=jLgK$%)+e9#_Y_&oXo}C z%)`9Q$NVh7f-JV$^KEWs1j8CySTd*Ztu{GPUE!(j@JFp`=u`@&2g<%m_~5RKCJ#oX#11l{5JoU*{W~#W(pDXY*~&;X8bnbNL?M=RAJE`TURzxR4+5 zV=m%ie!?YO%1^nB%lR2s@N<5_m0ZQu{E}<9mg~5l8@Q31xS3nHm0xiiw{r)-=1%V7 zZtme;?&E$Q;6WbZVIJX89^*GW&Tsi0zvl`5z#n;%Kk*b#^JkvnFFeb0JkJaKl^1!5 zmwAO(d5ypEI)CR4-sCNYq?G>|g;5!e(HVm=8H=$QhjAH?@tJ@LnTUy*gh`o<$(e#F znTn~IhH06O>6w8UnTeU1g;|-6*_nemnTxrZhk2Qg`B{JkS%`&Mghg45#rY^pup~>d zG|R9oA7eR|X9ZSdC01q?R%JC-XARb5E!Jio)@40D&iZV?hHS*fY{I5|f={v;pJH>i zU`w`QYqnuqwqtvCU`KXhXNIy1!`PMG*quGtlfC#fd$SMwvLBz}v+U33IDi8=h=ch& zhj1u|@dduf;T*w{e2Jqtnq&Af!#S4YIGz(Yk&`%?5uC!Qe1+3Eoiq3-|{8h_(;{>~e`$y*FbCI2%DqcR$! zGX`Ta7GpCG<1!xOGXWDa5fd{BlQJ2TGX+yJ6;m?}(=r{?GXpa+6EialvoagAGY4}r z7jrWY^D-avvj7XS5DT*ii?SGt^HG*yNtR-1mSI^w#&Rsr3arRVtjsE`%4)368m!4$ ztj#*C%X)mA_1S<8*@%tVgiZMbpJX#W#pZ0mmTbk=Y{Rx}$M)>Nj_kzF3}qLFu`9c= zJA1Gvd+}-ZW*_!tKR&}}*`LpG00(jq2lIIj;ZP3a3w)8oIf5hk5=U_~$M9u_b1cVk zJST7>Cvh?(IE7RB3a4>8XYf_dgLJ@LkU3dwies_yOnhLoVP# ze#DQth>Q6NmvAXRJP4oi})sw-}OI{$~_MWi&=-48~+E#%3JGWjw}b0w!c4CT0>QWilpb z3Z`T#re+$ZWjdy324-X?W@Z*MQ$}S9JS9W7}_Fzx;;?wNSKJ3eWe1^}mKcC|O4&)#X z=JOoFp&Z5+_#%gM1V{2Ej^b#J;mZu?SdQa(PT)jN;$%i}3a9cFPUCdW;H#X;*Z4Z$ z;4Hq$w>X<`a}M9(yPV7S_&(?H1J37%T)>6=h#zwi7xNP?;ZlCeWn9kBxPqVa3$ElU zuI86q!?j$;_1wUX+{De?!ma#@+qj)O_%(NO7k6_H_i`Wi^8gR>5D)VRkMbD5;cF(i%r&nS$_ zXpGJnjLBGx%{Yw9c#O{kOvpq`%p^?8WK7N!OvzMC%`{BQbWG0-%*ag4%q+~xY|PFa z%*kBL%{##2C@p0B?12$wMHf9qxPUS0{#_62FS2>ff@pZnzS$vakaW>!P9KOSMIhXJ8ea_Ntl$$ zn4Bq?lBt-QX_%Jjn4TG!k(rp8S(ugCn4LM8lew6id6<{^n4bk$kcC*7MOc)@Se%ct z1WU3MOS25i@-dcUc~)RWR$^sVVO3URb=F`_)?#heVO`eaIv9L^CO$(J~aqdA5zGn`{Nj^jCj6FG^K8Nn%>%2zmz(>a5$awcEn z>wJT=_$J@tY`)Doe24FHF5lz(oW~D1pC57o7xE*1%tc(xPq>6j`6-uiIX~kHe$Fqr zlB>9yUvdrCavj%m12=LLH**WO@+)rRcJAQU+{sl%p*L?WBi84 z`7OWW_dLNL_#;p9C!XSI{>(G{g=cw==Xrs@@**$sGOzF|ukklt=kL71o4m!4bn-u= zFe;-lI%6;Fe|e$ zJ9986b1^sbFfa2lKMSxR3$ZYZuqcbMI3HyRmSicGW*L^{V=Tw=tiXz_#LBF~s;tK9 ztihVB#oDaHx~#{?S)UEqkd4@wP1uxA@JTk~Q*6!_Y{^z^%{FYyc5KfM?8r{+%use= z7`w6?yR!#-vKOCbZ}wqd_Tw{rmi_r02XG(St^uW%Zta|U1KOuoj~`37h4O}@q1e4BIl4&UWmzQ^}Dj~{S8 zKjZ=~P5&+;74 z^8$b6MPA}%Ug1?<<8Qpq-+6;Kd5al ze2uU34bI}5e2cUBHs|mizRS6MkMDCHKj3_R$OT-;kN7bcaWOyP5-#PZT*l@6j4Sv# zzu-!);%a`$HC)SeT+a>M$W7eLE!@hlxQ*MngI{wecX2oOa4+|9KM(LA5AiUM@FLhq%*?{9 z%*O1@!JN#++|0wg%*XsJz=ABq!Ysm~EXLw|lqFb_rC6F}SeB2m9Luu;E3y(RvkI%S z8mqGgYqAz=vkvRB9v^3YHef?GVq-R8Q$E2b*^E!IIa{zLTd_6Uur1rMJv*=?JFzoE z*@a>3%5Ln=9_-0pe44%4hke?WG&Wa9oA(%KF<1Vz=mwZ#%#i- ze1cE18J}WvwqQ%PVr#ZxTef3+c3?+#VrPc33&Yrz-PoNy*pt2ZG<&lT`?4RO;j`?| z=Qw}^If#S#Jcn>7hw%lz$l)Bpk$j1xIGSVlGQ&BR<2arZIFXY$nGu}AseFagIGr>2 zDrfRFzRou|i*NER&gR>k!*}>D=kh(i&w2cS^Z6kca3Me9$6Un4{De!ml%H}Lm-92O z;OG2;E4hlR`6btIE!S~9H*h02aWl7YE5G74Zs!hu&7IuE-Q2^y+{gVqz=J%*!#u*H zJjQQ$oZs?0e$Ny9fj{yjf8r^g=FdFCUwD@1c%B#dD=+dAFY^ko@*02Rb^gv9yvbV( z$t3?X3ZpU_qca9$G8SVq4&yQ&<1+yhG7%Fq36nAzlQRWVG8I!Z4bw6m(=!7zG7~d1 z3$rpCvoi;CG8c0*5A!k~^Roa8vJeZi2#c~9i}O*IU`du@X_jGGKE`q^&kC%_O03K( ztjcPv&Kj)ATCB}Ftjl_Qob}m&4cUl|*@R8`1fOIxKE>v2!Io^r)@;MJY{&NOz>e(1 z&J1N2hOsNVu{(RPCwuW}_GTaUWj{W{XW5_6aR3K$5C`*l4&hJ^;|qL|!#RQ@`4UHQ zG{^8|hI1^(aXcq*A}4V&BRGXq`3k3TI%n`z&g5%+oo{d!-{f1I&9^y+@9-kJGqOyxrckXkNbIm2YHBxd4xxKjNkA$zvXxQo+tPNf8IV5s>gi#ok(HNaE7?ZIWn{gPI@fe>8 zn2?E>m`RwF$(Woen3AcOnrWDp>6o4wn30*7nOT^X*_fRER$*0EV|CVGP1a&<)?r=N13bt> zJj^3J%47V7$N4S4a4+4G3a(>1Y{G4BKC0B7ZzvLRO=C5VN^zAbjDyz#$s&7VO+*z zd?sK*CSqbHVNxbza;9KPrebQQVOpkRdS+loW@2V$VOC~icIIGC=3;K{)#nep0v`okJ z%)pGy#LUdXtjxyj%)y+@#oWxpyv)b^EWm;+#KJ7XqAbSZe3T_vlBHOhWmuMvu^h{@ z0xPl-E3*o#vKp(i25YhwYqJjPvK}92eKuf2HezEoVN*WAC)tcou{m3?C0nsI+psO$ zu{}GmBRjD(L)nF4?8A`4(sMZO-94 ze3x_i9^dCYe!%(skPEnwAMs-@;$nWnC0xo+xs1#C8CURge!-Po#nt?hYq*x{xSkuh zk(;=gTey{9aT~XD2fyY{?&5Cl;a=|JejeaK9^zph;ZYvrH$2X7`5nLK3I4zzd6GZz z6i@SKp5ZS%%X2)>3;dNAd5M>Kg;#lvzwtVM=MCQEErw*5{~3i*8I92ygE1M4u^ESP z8ISRqfC-t1iJ62+nT*Mqf+?AbshNgpnU3k1ff<>JnVE%InT^?*gE^UtxtWJ~nUDEd zfCX8Ig;|6}S&YT`C`+&;OR+S|uq+>AIhJPyR%9hsW))UtHCAU0)?_W#W*ydLJwDF* zY`}(W#KvsGrhI}=vKgOZbGBehwqk3xVOzFidv;()c4B9SvJ1o5mEG8#J=l}I_%wU7 z5BstopW(CY&*wOR138F;`8R?o_$p`eHNMU_IE!!cEzaiKoWpncF6Z(+zR!95fb;nw7jPjz;>TRX#r%Xz zxRjrA8JF`juHfhVf-AX-tNA6@a4pwyJvVS8H*qt!a4Wy!Hg4w*e$AcS#ogS)z1+wB zJivoI#KSzoqddlMc%0wzJATg-{DD96B!A*5p61Uy!(Vuo=XjnM_$x2+5-;-#ukspy z<8}Vd8@$O|49Ow?GYX?J8ly7?V=@+FGY;c29^*3s6EYDKGYOM28Iv;wQ!*7(GY!)+ z9n&)dGcpr1GYhja8?!S9b21lmGY|7JAM>*S3$hRkvj~f_7>n~!mS9PiVriCPSw6;c zEYAw8$V#ltDy+(Ctj-#&$y%(!fDPG*joE}v`2?S2Gd{)UY{8an#nx=Y zwrt1t?7)uf#Lf(57lyGbyRkcauqS)*Y4&Cx_GLdl!)Mu_&v5_;au5gec@E)F4&w`a zk;6HHBl!|XaWu#9WrlMs$8kI-a3UvhG9x&JQ~3&~aXM%4RnFvVe4TG_7T@GsoXxj6 zhwt!R&gFZ2pY!+u=kr4@;6i@HkGY78`3aYBDL>^hF6U=l!O!^xS8^3s^GmMbTCU@I zZs104;%08)R({28+|C{Rnmf6RySayZxsUsKfCqVqhk1lYd5quiIKSn0{GKQH1ApX6 z{=`!}&7XOOzwj*2@jNf^S6<{LUgi~E-?QJc$2pnl2iU?6h>tV-uO><=6vbitp$J#O&rEhBP;KhicL{E>En-}Zsu4uRi}f!|Jn-_8*o0|&i(cKyPm zck11?d&l5;g~#vEw{`a}{X2Fj(z@?}J`v%Gd$sM+p>3Z6?R)m_Sm5@{!eduy+qZ4C z$hS<6xP3jL6~kk92>kcmR!6<<_aML`@p<7yj{~;ZZABZ5&ZIQG(#d2A}?G9~*LG)tnoFkNpdMZ07K2t(!Ei z9IUmw*BDv24{d{w+^xN^oqAJ3QTaJXhYc3b&2LL`=q1Ovg;j!fedJT+G9K6@m?`Z6Go{wEf)M`yX=k zZSSepJCzr5PaTyD)=}F)9Tf;Zxm`!07ym8w6Xk9T&vo|<|DkHG1yAaF%fQ2p(3>4Y zZ`BJ62?XZg-w(a|`;MJ3@Q$!Vfrs124!sc>-yVK|`-(4J;Jx9|`-b-J*fAn3{oMoI zyd5aRG6((@Jf-Gg*#pn+Pes8Xc{>$F4+(kXkw+qBhjc}Qw_G8 zV2HXm5{(P)2nL*{cN=qf^vE>*?+H24{1tS>lk8(nZD{3p#pDmIBEG?r*$wP|%z|)ZUUj@xu=F$8aTDr=C3v zM}!9{a+{*JDR!IUk!AUxYJm?A9rxzn<$;FwuVwDPoqyLpsMT-2cSqFP(f-K4JMP_a zZ(`mX+!5?2i-J!i=6H8Tw2=ea|Gf5<4=Kw*I$`;DWjHPDr>rLWm5fVZomDH^*{;1pTMUFSN@&neTSk zdr%VnKP#w42@VnNmf(S4ksS;^911=h4n7T5p|8e>eXkdSfPw$2!yxL&>B z$UZD_AQx1x;3dHk)PJh6{!c~S`^JjFu=`KV>My%Vmj`u|p7-v!H}>|scSn@kF=|IK zjT#u-5$ssaf=>*%%l!W$-2QX-iELoO?oU& zR^)bP|3Cg69GU$ul==BPWq$l_nTN-3-?ne-z!F*OPThJ222B6S;*a{!NmtZBM7>@9 zsTAiQgXKHr87#+p)+obcv~J(FeQ0o9Fg#k14qduO)PAJT0~RiW)6Uzo#i&t2BBy}= zV&!4Y9u0tDMs~&t&4u)G5U1u z-KArn$ffTn9r{Lumb8}zO@ela8}03&i)K!{5Hw1G{1x{m`6G@!aBcg~FEaWcGoy)jE^^GBiww>N z+J)r_jFujDX%D-p@V5uE4=RqoF;gGL7atidP2Bz!w2F8GU> z_~0P?pm2X@%74$iG;Bzq5B~F1^uKpSHSSzd#rv)(JbGZItM7n+J%`=7X&M?^jl~O1 z7+8l6+!%-meGI`q!z z54!L4BPVwcxq>qfxq=7XB#{;Vm#1cPB;G3?>@u{q>$S;J_6J5|JtxL zB&5O0)#C#_He^=)rbmOGJ$@?RzJTTW3>g@7Y_w#X_eBn0LkrgXWBvK=(?9K6!83Wl zg20^t(1XS?fx?U2VluGw7rDjeod$fTB>qn>x<74EasC+bAHUP0w%*sG`h~~p)2sEJ zul0ofd)DC|a+ff2S^M58-rZ(9S2CFwNG5?)BAEn60{3Q;J2my7HXGV0JWAb?5y3BR z1ancY+R;M8>eZ`P>sDZke5f4~7++V8(xXpcks|ohH)3S46-2J^ZO^c&;-$cSybKFl zP6*n);)SC@SL_^_|8l^%9nua8dNt*N&Q}6ff3N$Jpi@gniGDTUi7itb1ufPx?^{=I z-v_+gw(n{2kqgYtLtEW{BR%k)u3!o&scqsxD~*XN+3moEq@G8Oz20r*<=NUMi$fOjE1Ap^8W85z7Lcv4a-Tu0? z?=9sAo?r0bfpUyoKnqURg6DT{nf^r?#{MJX)d;>jxcwUfC|W`p5F#z3YBu@7;0lO_DnI?zr~`b&KGR;DTV(;FH@6f_?fG G`2PT0b>sN} literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/orc/TestOrcFile.decimal.orc b/pandas/tests/io/data/orc/TestOrcFile.decimal.orc new file mode 100644 index 0000000000000000000000000000000000000000..cb0f7b9d767a37159c5509da1a47877d1c8b411e GIT binary patch literal 16337 zcmeHu`BP%ox^^}*Z063;>CBy}lXNwVTQ?|S!os`gV)y=$*m zUVjn;Cqq&3Q<&Q^h>QO7AzzQGy5`K)8077Do<4p0)IDUxooCOUJ$2?7_$S_f*XcV? zoj!Z^ABVpk{^O$GF8U9wmjI;_AWs5pl>_tTKzBJ%Ta2A6!j^Hc<7{kU0d|0i&0%0W z>DV0_cDD>z$-&l8u(MgP^Z*>&2Mf|*e+oQIf*m_xiwI~+fJYydSl$Ir7{Q_raIhUL;{n||a90bqw1BJ4V67UQ zYXr+0!0~#pPzjE6ft*^fvj*I$2AiwE6)9L#0nV0#rDAYQ1Qr$n{Q~f?1nl5}TU@ZI z2wW-ztJ&Z*3luQHwnAV~{o}s$$EKnm=MH};j{ndX{9(uSLtXC=Q!PJ?Br)B*A2z5z zRBwGhzVLmX=lgc+_ba;Z%WJ#Q`Tgm=*Eb~q7`ZsN}-z*GZ%1qx3slPdpe$!m^&HUll zCGoHOgJ18uzOL{6T42Bw%D(R5eZ5Kjx@POEiG{E7JzsTLzgpFORZ;uZfD)5K|Ee|l z<(%ltV&<27BFxUp7j^zGrW{`sbbis@@Wn>?7uD=9#xtMi zt$*IY#H@^bUT*q)ME&`p^z)XY&le6a2;vt8f*1B&7iRa+vn>~jWEXmQ7q+MuYPUX{ zT=iwTgJ3cAw{G_Mhlg;u^YS^DlWIo<kmB@$X zlOK)@e0bRYVTi9Ul~Re$d9wHMQ?f2;R-7zuS>~XEpLp#pFAq1MlP*-Wi)kEsEYLWxg}G_xAqE+fDwr z=NxYrcfQ@%@b*sm+hspaW!@@Sf2(`;t&Nels!eZ=tKZ72e5<|at(C(!+f1mD;G2i8 zH(Pq&TxfYyAbWFw_vRk;&4#TvW*6RAQlomUZ*1w_sI7ftQt$?o{zhl=_4UZ>m6NZJ z4ZL1fiE2~6z9f3RjQRS|-fIUduQmH$n|Hic()n6{!)rrDsCxEm)0tNb*I(_KeRXr> z)f&^Q6Y5vdiCHCSrLC_H26x+*ek8QuPnB_QYw38koU?y^_8ZrbIoyNvFBW$ z_1uo`TwU$CDZ#k{`nm4p%NvoGt0!Nc4<%|8j$o0b)D&sGV1z*S=d!eoO zg{77k%49DL@m@HfzA$ute&P8N&-4A(=XZ6_*VjHjEqK0={(MjJxy{IP^)lqdz;pSA z=Q@A~>^h(Rq<+7(ocu)6iAzHScT3C2W;CX7m`qZB8 zsfOC8W(7|b(Vyx~KDoJosGWRra^Oj(;mJ*1nUnC%s#X?@=ygGF{^&4 zsPduSqKCE)AFPc(I2nA9Ird;@?}O_t4~{0`W4s4*b02KmdSGecfilknL)HflbPqJw zJ}@tMAZH2QpFF!8Ia@z@c6#7!q2X+g^6aMQYz_15#NPd_lkg7z{i}}qD?0BVZMZ*2 za(^rP{>99FrR(<%&fd2;0BbX@xbnWfqWg9Z@2!j9I~BaQVC>%R-g`?8@M_t; z5x@TqKo^sDUBi4Hkb@#N?-ZQ9#3+VR@B=6pf+}$vF_w2ykMTWb3m3ME6 z?yhCty)S|@qjz=s?^<`V!{tb&NQ3O%&X6oRG#TC zIENA(V|O+`y4iAPjqJ_|-ktfmcXn)@UR^j{;W<5OJ)P5bx;P436r3)l zpB_x!u^+jkY4VP_fjf!~cl0Um*b&{)=Z8)0ohpc)>h_=7aGa{{JT=~MDo=8%oqcL0 zbNh}1HZptr;mGYRrrQ_Pw+kw7A1Jzg@9<UYl-33e&wwN5?DR+*6F=l z3Zu95_;1;C+)~qd%S6L1`I1{Y*tc}EVHN8qMrTjtI8U^iPAsZVlvbV?EIP4&cym+y z=8X)rczCBmmsMUrRCN8p;qi78bUt{zWbAl<@A2K1$C6j%E?vTWa6_uBlT$@9=yfuDwF?h6e?C4 zURKa{S$FMa8-mNK>6eWsFU?!Jv_S@08Mw6EaOsHh(nHauEzC<7_AU`bFB$M(vgf#D zmIs+_xTHvONiX}7t<1%>>laVXUd(h}+-bUaU43y;E@Z6e;#|tbZSf;Z!6Rj3M}~Tj z9JCy1mK~Yr9qHWy`nSlt3*>qadD==YY$Ny7k~an98ajC*N!nTfIwnc01EdNAX;ewd zDI>KqNsD{L(kOAzPptI-O`XKK24b;<*vBUBWC(TZgsE9Vfs@c}B1~F=>Po_R5h0I4 zXpiGpg81?={75hUum#^D!!z3eK`wq^3%9p`Yw+M^t+=8#TyHIIOMt7TjL1w0XBDn^9^8$1ng&nyBSRVI%awnQ|QE0(t%Ai zrlt}zQH05-U^?RH)gZcJ3_aS5&e5XBeqZFFOLNhKTd4g7RFelaXGImaq55i3I|5Yh zS5rx3!4k5261g#etTrIWmB_p@WIGeNvWIAUXT*;PEl3ha>G2Eb3qGyqaN#2ki(C~*HPiVmcG0u|jvvpJwzg>~6*i4dND zNYIy)GOIv({D>rev59?YPYd!+XiBXEZ*eR^1UCZ@fMJ9@Wv^`4L!o~*fFWIk)0 zd0-~>V3z-IRm&rmkw?QTkJ0H*&}I=1(^G5yr#ZXNXiA=QH$T5U^n!5tMFZ_+f7Lm~ z9#T5>s(I(NQ1Kfy^_vRETlUD?QR=(w%JF}qLoC~Ik z&x0LbP`zJ9TQD}xH_NJTSp(lSE`0Ai{2^Zc6VLFo&hr;<{1+!55ZC|K)&F~7{trs_ zZ!+=!vFiUN;{I30#=ofw{-KUyV|_UQH%JAvnOwjl$^()*Iv^U$2aM|sV1~&8vb6<( zi;lMh3W0E%1JFykfU>n1aE$VRwbc@U!w>)(xe#!hOM!%pAlxeh41#jN-y#7rBNc!& zDh15+Dj+1Q253DpKrvkd@Oy~S5(U84)B);YCE!}F2NJXffM4AR=)0Q$-;^3i?=%DY zX_85;1%i%NfV!ju`Xbatdr+yqr_Im({ z<`QGp3(SapKvq8mRE_%q%f>ViE|>-A$^f9W&H;{K5HJm2$|+j_G;LwPJ+=sJuSWnO zYZ)*oRsg>x3SiUW$ZO~B6B0;2M5fZe+TsArRaYi}1w2=)MK z-sO7dKH!UHfHXY|NNNrNQx63aoX&wzQ&fnYPlMPrc@TN^G1f{xq%n^PaaOY+@$Lc$ z&(DVFl7$d2uLzP-b0OlPVu-E#ia;(OLa7o!WL-jt)mI9MY>Oa_VlhP3R1O&)lt5yO z6_DkrE49)ph{sqBNqS`v(N+z_SX2v{X;47219gz9uo7aSs35H5RrHQVh;pI{;@DI} z)(SNcj;aNsv1uXhP%C6RtAhy3^^iv1HU4n}B(u>0k+M4>W@Q&7WbKB~=1maAK@Y@U z)(eTYSs+gJwd$A^;$qn#3B>?}-)D#D=LR9ZeFr36It-EMMj$4a6B2h^M`eyd
    y zt#=%Ava}44c+JGdN;t&xn0Wnr>LA;eWW_>%5 zs+}anQk;T>oAw}dM;fAx>_Z&X1IU^*3*mGgLQTm ztrSBg{pC>8yaXCNsDM(%l~B393Tks#LziPRD62pY4RCLD_9>w8xjHB>qlD^2DyUc2 z08P0Xq2l!>Xcw~?8mQGkDNVP@0$Qjw-3pBe+n|hAJybQ?4jo=KKx6qvC|BMI)tbAY zvcZ$dlnE*ln4!j&Ug*q-1)5#ygI3Y|p%$498a53;>C<+ob@6sb$q;m{c^JwW9)W6> zolrN;1>LS1g9^LdP{Y(X)W73_Msn_uswbglhYuQxOhIY6(@;g_4AkB^3yn?&pzQ5A zsG2(u)vdr1_5~<^F$~q`M4-NkC1|>187lFvKuw7#G{{+lQXAHxD&w8Dg$?NPVI0bm zB%qCkEvVD84UNZlpuECesIEQ*_4e;UhrMUS*?nl2_y8KvXQ7nwL#S+n0<#uSVUfCA z7{f|~se*a1SmLgjhymlaF=5&<7R<9=0826pVIl*hI@t1DwrW!4fE4$U>TVlChe((nWq)7 z&~6=!#;=DdG%A>9;J)Z`BaEG=hN-KYVXkfsEHTvr<0rK+Jx>SosoP*_haQ$(xZjl9 z0Si_dVbrcpnB3O|vu$_7mbpDJR+AalIM@qwE?QtB$^$%UKTKz|!Mxr9SSn$MiHjVt zu7)94U|<+V36H>Jlu?*b@?gX;24hUPVXDn>*l^(lELQJ@acz?@ZO8}nWT#+B@ifev zhcJ%M!e%xCuxvpPR;8SWS*#&gIJf|#AB17bvIxx4wgk(19_GYWU>epc%&l01ZClo1 z!nqjCu)hKGmu|u`tqGXawFRr%d?dt3!f3U-Fhy?)W}n@IMfcJ$wlD)zYY$*9XBL)- z9>OezkLhcua9>X@JUvZ=OHz4o6F(mw)G*-G5hh%|!h+lK*zj=u<17;g-smraJCj^^ zoX3OnnoHoiAwJx@B!H)KOX1=w5u9#&BH$ClDLdtGS#bs2+9ZWX1}ou=NEKW~mBELl zHSm~G4p)Yr)NU!@o+2eY*-#G`*;R03xB)&xX@X}ZYIs#gGu$$vfjhEK(K)SfrAh~P z*xKN0Aw8UP*bdi}cfj54M)>x4CtMisg0G36@hf}a8LJsCo$rO44=nIdnH5gc_rn!# z8{8fnfJa$_a8CQP>b@bkYi<~x*dKxOMWb-N&IR|m#^C8SH(bK>z)iIi@KiOD8kmI3 z(>}OO=!Y+Br{S#88F=IBEZmtt2aneT;XLy^+`aLS zQMgRD3b&fp;1T~ie7oR9RY@E^+`I{o4JF{*}gUASl}1vl>O!G+3~ zvP~IymE!1;SZiuEbtNbLjfXMUW#BFLg!LP4C==-IJ zOy-sJK@~zGmLW{~8br`7M^HCv5%PjMgso1ASnjJwu!1UtwCpvfs0k6*sS&)fW`u5C zgYYu7h*WJWLTu3?x&m#8Kw6J5x4kZF?Lb&Zjflu>UV3~E2Z@0dm?SZ_;n0|;~F93s>iM9_Tm2*q{?VdsVs(Z)pt zdoY4fFD@bMigywfQ3T((iqL!45Wd7ZBF))ANE+e@)4(PoxR5|lDcgvs~BB-^h;s*`O<7q1e$%ypjb|KT1-AMh~ z$0lD7GPrFmdK;X`)W9fG z9CjhQC~jmxGLEElc#yJ*31nLSS)|a1WT>W)Dw`iU9GXVPvNK3-c>t+xpF?`agUIB@ zJW|qo!Ke%)XRM3JY%qeXI#@zl%2tr!wkVSBUPUTnYe)wxhBVE7&gqLIHFKLt_kIGo zUAm1F>UNL@R}$%8+eKy=fBNy-G%~pNMQApIq@@p#3gIEruBD)&&Kwkbm5Ng5(@?IO zJXE5Gj-m>`(x(_GAD@LvYYI@35jM)SQiuxX6``myE=q1HM%ny4lwA8Yt3-fmY!;%N zL#3$rk_g44iBY<$a+J4Af=c-+P~sga%I5qg&{U10IAkbUqy}ZB)}kU(1&YyGhf+-{ zQNvsHs8~@0YB~C?*4~8jgw?1dMS~Jmw4jU~TGY%$D=NFGLsfC~D2u8c#mfJlzR-bE z9vV@O@-Ea`dpC;XF`+c^9+aEii`rINP{RH`R3M4*AM~R#WdkUw-i|W62T`Gz14Szs zMk(q>Q1(72Dmpica`ybF7P(O_-8d@Y@}T(Z6DU1%66LG)q0+rmC`rJNGNq?c@#&wa ztpSvLbPi=(4WgFwLnxMf0o7;@qntB~sCX)Z;t7^fywuO$ktiy)vWgPZ*HK-v7%E`e zKvAaSDB12N%3898iZpMdbo^gb%SqHQErp6z?V-5cX_R(qALZG}ppwN|lt_JuGCC+| zuja2=YA(8}l7_Z)=ApxrbToZCAFbpv(T+wIdd*&d<}9+&so`JU6-DUn4lY{gEk+v> zJhY#~M`u(5v~)m-HZPQ-Lx&=?c;z>Rp&V`ZNYK%E1)5!0iB{KFp9qkPm(5Z9>T3p(R?rQBq2S&Tm=5j#BFr%&VUUbB4K{ICi(2%32o_hahIv@eG zfF6htG^8}ydNJox-(|AP*N*A0q(B|l@UIITSKg4gvFm2~Ev{R|Cmk^L9jVinGwi#) z_f*{%y^_vp-HiYMYv8lR>;>wHf1Lg>W+GI`GYD(iL68`8Iy-`2jHdC&HN z674@P|0MRQ;R5xG;V*?>2ftB$m%x~QWc=*@i{!77Up2p{|FHg@^DkXk*}t#-L!Sds zasdYo;M0LXKA>a*2^K)d(>Z{v2oM(oVIH9711SMuDFxVNfL9Ed2rLP9rU9$QT^ zZa-EY#U}T#b|y|-hBK)ElL5!I<3f`--4c$zi}TU(Dj_~oi+8r+xke!2!CS+4;WmCb zmtZU*uxkhbEkR=;P~C*kAfVVJq$orOmnf+s#+r%d9wKj)7!DBiF`{k?@E4LAD@fT! zlBh+OOHtFj`&i5#dd`6 zJrY?wV%Ryt%DZ@mf3aGAF-3Q=yYFHZ1Ck6~Y)@P)re3mEe2J<05^l>SA@e0V*Cq7e zC7Du)>hO{b=TfKi(kk_(iSA3SBbN$iFI`@{)R?){SqBM}U#4lejM{mb$8nj$e_3kf zGRNL!66VpE=qUFZp5bVC;HZA`C}ZiUKY6r~etA}KxvTbaS=;3a8^q?hTok@Mx^;P1 z?lDfuv7qdjR(p(QIp!TZX7xhS@ngf3E2P{j;+0oeny=vZToG|zVF+Boid`X$K-7g- zQYx-=H(n|4x-vO*rG5HJ@#>Xp=_^gFtCn{lq57+I9aqr@uk!h>QY~MV*}ck{e|1&q z)d|Jb#yqIdcJ=ba)yBoE**jMUXxC`?*HGoxcv`Pf^j*UiKpmlLB#CQcIoFztujN%= z8`fN_H($$eUF)B_HXw&)v)8&f*U6;UZ8u$K>%LAja$R)hx~{eBIGO7-I;gh%IIZEh z*LYm%IG&z5KD=^Vx_3OzxZWbVo@#|g+OIbZT+f=kekO9gI(a>XeuG|J~$y#}nu=%EC(@jR_ zP3*u;f%ThG2`IJjW{>1%MdQt>&YK-WH%q2(jzw=ar*G!5PB^GAefczb5>cIVXXRV%kAQnyv^XcveW*S(~Z5Sv#!&+M`ZES+lQxZMR$rS z?~JPN>@wZSao!o6y;Hk>r~eS&ZHa840oQ94xNen&sd^o_~|nd=3R|Na8~_Y zGlsj=_PZ#)yWC56$#?Hc((kql?-ncW&Pw5?{=2yocZb4v>$dNv)9&$=+@q?wC!@W` zX}QOxh9~Clv2NZY%(-`&d#|zTUbg1mK+nCJ(R-!ewE_!H9`jAQe5ZCljXyhT?>_hbRhv?^2?1wYu4?7zkuIhX^;dt2Ue^?lO zczN$(Bl8iC1QAd^qA@%|wLjvSd_=MINGkb=gZ`*Q_-L&5(O?6D*Z*kP^Qb=jC}aCk zf9_+AC68rgkGZsu$t;g)I}x_|$3&ZtMJbPWaUbVYJsxa+T-)P?hdEZA`BhL>fpO?~Khznk@)V^Rb zAS2cn44xNQ;TLAMUQp-0NGW;IEqhU}eKFblBHxY_&%d}9f6+vFiOYQ{RQZyw`6YVK zOFrjIs=!N;Nu=}OrK-Z06Oxy$jV}wkUS1x0**N_&d-df&`enlsk}5vusXwRaIG3`Y zbNJ3lme0j@&zbXI;g!C^+C}R1uP|({_$OXzTznmifsHpqZ zuF$KT#H+!a*R;j2(W+nbYF<;CUrUd^rWT^48?VK)uUR;+^QEsxnqD__zs?$YeP-r$ z_1fzc1Ib9sEw<;az`|Rat+%MTZ+m!eD`an{THbc_zMb@-V!^k~@wa)D zcfv*Q=qul0G{56Fz0>G?Cp-I&J&ckaytB=I*Cu&a)c9_+^WCnYcRADV2BYt4)9;G6 zQQorml=bhW4et%x-;?^@i!Z%r*?o_n|9(XH{#q`YrGI~>|9$nu`;^7^-P`ZWX&)p@ zKCst(Aa4D@RDw3SKj4Nw2yK3#%lVLA{GqSvLzU*kOwWhT(GR&b=tS&8YxYAS=cDC{ zkBm(pvAaJC41c7V`G~sqQAmqc6r4|q&pTA-CC2ly!Sm*+^SqVw;naCO<71r#?XUZ| zvHjz$?PHhsV_D?m?VXQpd7p>`pG4)K(A{W`^^@TEC)$NiXj`9nsh=u&pQfun9d7wl z+WTqdUoF8;`SDL9ho2gXK4VpWHlzMbZTgJj{LDT3nQ9ZA%zS2NUl2iJ4G`c8zV@zuZSBFNS>FtczmclGiFbTs8T^Jn^-W~? z8$;?F7UNstC`MiPEv5Zix9wZG_uJ&+xAvWH#d+VY@xL?4zgrGqLVe%q#=oO4eCJDi zr=otJDgNGB{e4x-_X+d&#u!Ez{C;`kd*k8v?4ln6(jPSHAE>4uJR?6SW`A(90Y~Ns z3H!%b`H$v?A91P{(N8YrPcp+#+XFw@CVvtw{S;09)J6Z9 zBm7xY0czWRruF~q_57?1|D4|Xc{ukk(vrW#Wq+|~|3Ym7BV&It%>RY8`Ii~WFKX^D zl&W9c&A-Tden~oi@pOUW*e`1bznBXD%B}cosPV75uD{ZU|LU9mt7`SHieb>n`n5{@ zYoh*FYsas`!C#kszZ#c+Wv6}()@ese7TCfWHdM*H2&|D7lQ z-LVSl`+jGP|Lza{-kA73oAZaO_zzk2AKRKgZ00{C`(SkLkFJeBIEQ}(Ie*hi|3*{) z&D;Gq<;dUCGk=S*u+q%m;st-Vl>eRI@b`%E?*_-;S^mGzto&WQ_jis2aIaX=+j%y$ zP&R-znCxi3e-NEXI?&RRVYIn<1RWZ3{^`vBML`=w+pFB@XxBKJJ>@~GcP7xT;z@L( z$%p1UrqKF`|4;M&uW9KlTGAOnny=$ zo&z|T9L!p65r$*oVl;tbj62Q4Yzz4qVXFXR7!_jttEHHTnF>hdVvN~bjtR|3Ftoi2 zj6zU}vA0xVq9fH9c2tH@)8!b(Y%Y*6DKPx$I*fi-iSd=FFzMz7jAXbGV_I&)1Zm9} zYPANV+M@xsDJ^DsrxnBEwP707dW_T2j)_MM7+$UsqpR%1cssi=>NcEs+l1-jnlXXK zUJPZ>f{`uuVXQg*m`H^U!!QnDR9-tK){+mzI71k2!!SlWFoN+cI5Ek?QH)43hA|r4 zm>JJFCL8x)xK0LOsh`Az`+XRCXbPju`Y{gi3}#I~i{Xq1Fq(}yjJseSqm42Fp>+Xc z2!=8K%pxWuTEa-%mNDkB6-;P7ilMRAFbc&w#zQXvqJa$zdq0j*mnJZ-)-6n8bQ{B8 z+rj7=yBME5g-M(DFi9C3FzxMQf`S7KRhz}gorf4(lmaf(sUWK+7i{dIfzIhXP}EZh z@c0Z+r(uHLVHTKLE&#=Og%zN{bgwb{V**Z?Sr*+CP_0R|OAAhmB8l+TTTwtXiU zUgiTV-5A*Da)ZvbaWKx90C}}uP}e&NdS`uLYHtb@3#UPPo)8E)XFniL2J!C z80iUtjF|;cl?sEy{0JD+EP=}EQb4=10($aR!DRIsC^D^sMt=;PNp65y-X>VpoB%CD zTcD#`1khb{Z0<7dv zh&7dquo|8OpnAkudAuBJV^?68l~OFLzY^OxUxjrZRAb|1HCUcrj&-Xm0B=lzO|g_% zaa}#Ot51au%r#&snMSNkq{dox&De-bgH5axRLoZFaIFp->utkw1A44B-H!DLJFrQu z5i1()#2Qz-u)?KEAY0RetumXjmYH5`IAy`o1y-!Gr621Uv0>L%2Cy9ZAl8su4Y*B1 z*lqtXR=7KYHI$5E{mm|HW@rp6U3O#5G!HgZHG%b4{&`S4HHo$F_^{DpKbEbY#;P4N zSXX2go1o5N`ISMezH=U%>8t_LTMJkTcM)r9j9`QIB`kGu87t3;Vr>lM~0rRhwAeKmw~<*ur`bx3Q`6Bvx$L#ddj8*g$*_Yu>H}Wc3-Wwf_Jcna^Sw z*+Z;KoP!(IQ*kkOE{?lF!)XiXxDdAvNcJ&sq97Az%&>4XA~r6oE5ucead4LPB3zhR zjH4@fI9ihua0K|cwX^`oDJ{imT17bbXc=yMRg4odBshb-0_QhNaf-ouK)P3jGYe$6 zP)iMt=9J?U(OR6HUWbdylsI-zJx)EX!tq(8L`f5luTkUl!_7F~vIdu?X>pS3R-CC@ zhYL=%;iySHE}GK_*wh`kWrq>RTI$3#=62(pl_p%gvj@lXnQ^-9UYwWPhhs~d0P$cy zu4~bT3*^{w6zL#NW^~}J-XUBhF^pprIdQ6nQJmVS24V|iI4)%zrWAK>G9%3 zo0B+W;S_GB-jB=nPvczPX26o2#f8OlIC^^!ryQThIW|JLwSq8?qg=#ktPz|$xP(h2 zG=Q)yiZisW;{0Q4xXk)GPRiQAnH6zd$g+u}%_VS({Vg27NDD+;lQ_0(7pGoJ;arR~ zE+OB?@q05k{p;vcw0|CetDXKXQi0rY)FJpFSG%X+;TjlN`hB) zRp5txQhaQ?63;EJ#%r5oc+X%BKDj8z>lI|9v<^RGRN}MVdVJNE3U4WD#D^Q2@N~Nx zuM9Wi9h4TlNn!vv9j$oHgbwfCY{PFCw&R5=1Kwck!23f+d?wq8mzH=e+AEIiQ-iwtN7uSHGC{DhF`8X0a{ZW@9}TqlgR{LRI-gX zHt*nPhLZT~(k{M=wuiS=rSU9V4?v&F;FUWEct`Oeeyxc@;5c#!nh2HPrqT%8(maB& zlTK(1nF0S6gODj=5u}X;1hbt@2!#s?w45S>qJm4XcN7z%6Fh=5+Y6{U0)k5=BqVI5 z1pb1Epg$}l_{z%(>2?W0;;A5*;!=Xoa)er6O_2A?2)6ke!t#Nfz!ED6jrux*)2$@L zWAy}HK?8x;ZUwx3O@!2(njp?J6S_n#gn&*|JMASmg> z1cz*dux4@+IMbtqRDm6Em$(Vr&Eo{&u!mq+o*?*XlY~r_k09-yBABQAgwW13L9G0< zk*E$3?2b7?G!i7Rb3+7m)9uy;u(UtFiYsFKO_YD zDMZS_Fd)lPiB>U<7}4hu8RK-KY9pUGT)-s8>R3dswScG%vWc=XCy*2s5k+lWqH(O4 zIJ3?pW|@3ql|n$YScHG}J4=c5eGxGoxY*HJPFx$65IJiVL=B^o=$2Ozx6Rc=;jD~k z*sCG>1+~P;mxky4k{F_^h_sppqN1mfXrFE(Mt9XjHeW+jYg&km*cgylZYA>b z+KBpUJ<-?QPE1c3h>~On(ZuT{2Gw0e>QFaP#To}}xn|;WWiOG{Wg#~D`iRbLD>2Ts z5qV7mMBSjB=v^En4l6u>SUODXGL8@fUMG>VHA<8fjS;O4ZenC$oX7}!h$@Pg7_&?O zF@ulDotPqOH~mCU;S4cZKT8zZ0z_kIjyRJI60_nEkvr!FEaPEfcw>=BXD<Ogrie7%9#P>+ z6FsFfS@5YnDPv?B$U7!d#MGOC$N5d8BldPD-}=0aFc=6zpMk4SNkXE~ zq$F8YCCRup14Mk)B*u=6q$-w^hMQ_hu|Wli8>u5{sr4j}R7FY}8%Q&Z0ASo|BFz*v zld=sOQkA`hWC?3YVTz7KuV^DFJM<*SL^~-fp944?BT1v`B)M%}r0q~QNqA@?8OqHh ze|s+}Gj1VC<9(#6-XIWC+DNqi0g__gPO={ilA>ipB({E-q;`*xTrnpp!E%u-v-5zy z&rR~pjg!**9+E`lC7E=Sq@c@3qOMPoa{bZ@LmXD1p=hT^c=}4oF~P#ArfzN zfuvgvlf3y6QmST&BsMRT=)y1%NJU8$!5T@{vQDyL2q4HARCNm9uYq+!z*DdyiM zDYc7$wq%#&X-<)nLwh9Aa++kMWk@qs2c&FQmQ*!$NV4ou$PQ-&psRDqN(YVXh~$yi zsQF}0C4;Q#WRl&JEb{hN0a?f`B%4Yv^V^HanJ||u&Eb*F6(!_Q2cJyy3do9tkZk9O z$Wc`pnUlW)s29q~u0shqQ7$F(4V7fQr;6;0SCiB18nUEbPB!(|k~K9^fSRo%%fx>;2;ZA>p*s7 zm|PVdAzSF9&AL5aqHKZgAi?masCZ`YnV5kGYti^7rt;28Yl3qO5-`ZxlG rKY=+7KOH~yz<;dzz(2FEnJmx{S{5I3LY$gwPl(Ki0VfCx86O*nAn33yj{0RALJfJ%g= z-yY9=v&depmn0*ZJW?2kTu94yT=M+Mv7+18fd!+++rKoYVb8*(6m+$6w2JQ%VP zoBWuE$qYVmaJZY`s{`<^gga2tKpn+nltV!*rn5eyFL-h~g3j5(B6iCb91v J6y2AX#Fs$e5wL~=SI1j7^ppUN+?iL7eMX!KP?|U(2PGykr zMnDEm3tYcYD8Tt@fkL4XffW78WSpFmVtur#7q0|w$w2R*$@rIbh!<9^<^mL0Lk+rG z7y%GVJw2ukQ|}5C{s@8!l@@f?W3G^{))X}OTNY*dP!`yK*kA&C*IDdcYs=phkq172 zl;I+!ixKH$=mn9X%L9%Oe|Hf##E4cjDk@BfpcCUH;z))m5B!%Ri3irhN|VJ(Ba8$c zuJ~RTmmMt)WsJ?{hw@(UIzZdC(#W!?aRcQS*!7#jZVTou4o-nHTCV#KW zY{|-Mw5FuLKI&`p5VZ497v}k2IwSz*xk~_eg@|YXgdiBe7i^Rw1OVVq2jF7(tUfX? zU@yP`$Tq>iWdIN^0LUFwkm0NbKnG$70VP5mcZ3H|krHSfJh>oc+L%Xy3~Fr+4_uU| zO=If=0FxHnT})gBzyTr@BEyC04BZ6)rNPT61?B*VqoYN_Fj08eeu*?PY(Kogo#?uj zr7g5DhRsBVNhN#3;zc5fP^%uOjrnh-!L9pc6=7+7%vM9Q$0swq zYndaP)cv+kU8bw-28{y;gN3drVcdBz9t7dhuMmb;CnuK$&m)N*ABg3G{t2zI3h)7JHQ`w$?;Yul6&u ziwZDH8s)Zzzn)e#+n)4aslGibo1Dd}Z!(O7*z|E%ly3M1cAf zYt1z)_kqG1Uqzcg?G&q_?Z#etp>J!{dl`+{7n_4>OPBj?lslX5vp~G+Qn9goge**% z^tY~Wx04^}8SVY?Oi#J%6t%R_-}~zcW<%i-&9}MC=;kgDljiWlR`QBJa-XMfic6|J zaZJ9)>Wb5cGce^ly-Ls2ct8(Y?Re=hf9M( zLo**0tb4TQEc>Otekshx*T)2S*OK9$LOJF>!JUR*2#VMT+3 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/orc/TestOrcFile.testDate1900.orc b/pandas/tests/io/data/orc/TestOrcFile.testDate1900.orc new file mode 100644 index 0000000000000000000000000000000000000000..f51ffdbd03a43fadbedce302ffa8e5967a30ad59 GIT binary patch literal 30941 zcmeI51yq!6yYFY1p=;6DUCQt4DAB&1V9K%}IV7?2VLBo!ov29;7` zK;jJWhPdA^&v(|^XK&V0x!3*F%`9hK*Zu$fuj|p&kz)n`u+RxnF=92e&IS)ywE$57 zz+C*2r4<0gpdEwhZ|Y0^$;CI|xXKhhSJ^aOw}{T`T6|~omgRWf_uxH&b-{8>HEA1~ z&-69|wFhE}mp3-YR-3*LqL2e@k#{G>0s#O3LVV;S#4#{7BS4VXqbSISQDC9v17D&n zgOG4rb^e(|N#XkWa*vfFZn=GcBF8k-doL zdUxG5y@)$fL5*g@Y-!l5WNh)8$2NB;-b9edZ|fjsv@i|6|Hacd*q)BT9gf!s#8N^o zFTR2t^E=_!A-6rF;MF-&S@bm+)o%-ojN|2L-Mvko47YE_djj%;<7!FE5*~$xnnBM# zvD4-CfD`Gb*t$Q*$iW!>iNGkBX#@~*G;>r!;#^ETPG50A)`8U;Kx930LipKgJ&8EN z4}Ix}b2i@4z^sYWO?3>KdXUR+A0nqhTO;_DEoBn?-d)^39*F#SJCstA;l=B$)66L` z-y!0z=u-gHlaEl@AS(1OCe%6E0<+B2jF|PQv7zlc-e9srY_v`W?`8(T~E+{q`PMBnLfU%6#gJ=NC}oggCw= zLp@Luj++C(jDujty{EujCFD&0Y!rNB0;3go8R!B)ReBJL{L`momkbxoS~Ll^C&T=& zNBEvAAnyWL9lKmLHwr}GkeJJhe9V43g@1Hz*>4MakB;JQYLb}R3bRU_&O9-@yvE57 z`$+1iHh9-kUK20c7%uXLiK(T2@(=rzYRz9P)v8gHj zn?X&<+%uz0y~EFqlzPb{^&9#_pKH@@zy`m@+Zc4`%6AfazFz#A#Pu=0^dpTCk7*^Y zkGauKx98W0)iD7HPSYwz9~;)4&OU0mo%0*M$VX!B7BQ|=+)K5_t%=4ph|rP-$q%mR zWeYLJU77X%o9rU4WX|(t6YZ1rspwal5K*??-Hf!~L#H>yS{5SjOMNG4nRE2-7dZ%W z*<+mMYKHq9y~95UquaYIwJC!9rnA3q(=i9L(F3use#F?Dz5E2C@`TV+mg{Snf+)u7 ztm6|R4OyOyFqwLcJXeyZ9aI+KfI(MG3Jiu|^7sxB-up%dFhV_7bjSMpy|Y}mW(Yht zz&_sLphnp3O|A?$Zr>${{cU#sOT+`S5^)cx_{i!k8ImlS*V^}Icq|!d+x4zI%=9Mo z+2q8Fq-)o1e3;-(WQ(a(UHv$vLjw7~>(Zb?PeJK*rGo2RQ>r!9$&@SfHG|rp^Am=7 z{2!yOT#6plP0LRgd@KEwOzKS(jt!GlYB!Zwa^0Ou#Er7~Oyjpc4RQE)F4HYIZ$?hM zwY?k5^Nr1HLAm*jKn}6t4UQ-jl6>H$&9Bj%3dt~hCj-I*qD6b?| z@w1WHTrcYZLv?PF6ji^xPC@case=QUn^s~}qorD-5ztDd1ei;aa3XT znfkbTx<}5?zK$)dOMI|WV=FrAi&4{i)h3ke<(lRXREIfr``YVU2~$!B>U3L?(9imQ zgD(3~fZSHw2QmM zR__dD;jdJNiQIH{?rr1j?U_ieX%L~>7`@e-HW_{6m>fIO$sS_7h zXD}(s^o{wIv2lZjni{N9xti zpsBI$PT@)sPiL3aHttod#yUqI7m=M-=heK)G1gb;QsW_hm5M(cGZ@P5Q;@yv z?%=*H;;i8ZCv{%YTk9*GZ2DA>URMBhm)M=(SsSC7h?ID#@0AO^V~g)yUAIM!f3N#B z?USoKyS1h>iKYvyjpLtuaBaLxy|%>H=eMWEs2U?hbecu(inw$FhP)#vaQJfh)i%(U zyd$xVXR+2@)tlOIp>R^V+xIo=yKJv$m2W;M_ihup@&i_QSkQmvU`P?V6VbBi7Vw44 ze?@8AP01Iy4fPs0Xfp|(4nBUB)h=-SC zNWKP(e?(Vz#TbW^yAPAe&}D=$)Pt>Dai79T-wh`-&~VGKgfd{=R%%mExn>q7ZxF6q zkNw4!PI!jlR&n`j?kiz_-gx&n2xDf1GKxfIsDmEFBoPp`V#teL!asia&5nvhn?iSh z`&pjfu$D`nu(*PW`@{WZYTN;4%3_y1_LuT*IF{n3ooYB%SN+J4-`=H7VeVP}vRM=3 zYtvooz7IYj$h|h%*XG==Xi2qO`9>EaNc%CBmaS8HhhKr2MH$|(yh(7!=TRhash<8s|ToLA?J<8Nu;VvEW z8_&OAt^UYf@i2a8MT}-pRs87(cNLeF10{kqPE0v^88DHoFjK%mAaOnu$yi+4S7kZ8 ze7}QO_xY>$?y!IwV&jb8Y#|6z1!4!r_^tR;p5WNdYrp$;E1l#$;^rn(om&RTk;dP&WF?<4|WHJ8S*v zw0cYWQ5WA~L9UeK$=F}y@R8NNs{FAFn&`C8ep4axpY@u;mf@}Bheun!>npoi{_86( z(^*G4LuMTNL*M;;T3pt5R}bqxO$9jp&8b7pmgALOsl$Ndp#YbsE!VdSmwsgX9}gXy z9d8{2)=om6MAesJ3N*OnT!Bf1vXwaBsLe{VyV*;g2L8-twZeqS2iL`m)i`8qbRw>^ zBr4g652R|iKPAdsaTm`%8C1PC`}hu)p?tvFyyP>z%V(6ZeKWJX?u(^KS;@AuiPV8j zVtH+erEb}?5og`QUWQF<4 z!p-w7>8&?zi=w6(Ufly1uogR&^(V1effr)r4QaZN>uc>E*jr*J$f-g5XY(D%Y z24bF%f!@6WSLO*G-EDJ0WK4hScx|@#aC%dyWr^wtR(BA$PMW5eQTb*SfN;(JWiCuDtX^v0pu^;ko* zj(;x-8hvbI{0hh1*}y^f4w{>l5N)N(kf$G<)v>gRa9_D$Iux_&S5ysltv-3q}Gcj32C{eK|*Xl4N>eP5MjtrVQ zAGZ=(KoW#vPQWEGQ9F7O_tXQsePYqM+ze>g10Txx?+UV7VD-{6?Z7Y4;8z z-Mk*OXL44*A-Vq>z1h5Wb5DYAf~_=r^^5D|s2obISlYrWcvIJ~3a)e56b|>8+CL5% zgdUizJWd$AY-^*%-BVJYUK&mKK_%avjRI}*0&NP3JmDP$E9xjxUmN$%H_OpgeB@+S zf@PiHX5pJeDb)>v6hvv=?JU6Ql!gW&>?*;_ZnmR~Q@3DsHyvqv`>v!m-4#avZW)80 zot|)4z!uw}x~7^O+YrOog}~8jVRV$^`QWqXnYtl#&pX8pNA(L{p`kk0Y6vHWTu3%eXAs_<3b~Bv1PP-;K#OQ%m79a@x?o?ceP-TJkXh!Gm_&%%ZH1-JYJI(_ zbVn%?JT4E0;2|rI?r9_IogS&d(*2E37=lsdb#qfEt^h8?86RM zQM_S!O$A-v@8xKBFqrQ()NNyxZ*jORjZ95Bvb}a}%KLDawS2d43U1OE_pRy8tGfbd zDV?hC%pVXWS_F%Tb{QGN3H812JARg%fPE;o=5<4OL46n#1aU^ za+MBNk468KlLN(lW==NT3bhNCdym$YlLy7#hg0LUGLwfm@}kz6KW)ZXai!pAz|npn zN9f#^vCq%h#{ay{#Nhsq3QT+pRX-BX8TPNQ-9$r^U2)21uKLmR5jyrRlZ3)Ju3#p%;Y2r-)EP>)KGe% z{LMO@SUL6jaC*MRj&cHZys#LH$sLt@pPl%blC9Os-aMX&7ei%NKg{4~SJQrYE#7(X z%H-3I*8zhZlLa0xHPg3ZYghPIi_C_gl7({jl)JXlG*-AW>DXlxm@P1v9+=B^htEG+ zj|+aBe@Q|oLDIfEsQS@*LQIB|W}h`Z8GrocR9Y!|H#B~6qnm1I0*_~+t~|VvcW3eVLHV68jeog9ita$q2tx#3CqpknkKHY=-m^G5J#;L!yLF86?H@)y z){oIj%sQ#p#ZRMNyh1*IriQI}W}kbm05UGJiVsDM&G6j1Y2L?lL^S=qY5m8t?KRK0 zi$VrNUl!7CycW9mZq&Nz#?qJ7mj=mS9GZ+i;0N4TC|P}GkXYhSV>D87`0NIGiKmCL zN6FALvrPThpI=at9XS|I>Iq;Qq?x?H!M;)5ZP4cYF=O%ts?iIQS5{v-e%yLZ`!PxK z1&xoT&(1r%Nc@j+O)rRitZeaI1?%QhI_^`?@+#KjCc7Dx!Bxr(ecMMSvK;F{AMPlY z&1o37AVR6?u|@9aJI^{#yaQ#K`u3cUgk8QJ>xi`J@3;YQqoZ!GNPXY_?7g6t4aR^4 z)}zk;NFK1E0ag@pYi}NeLB(L^%T6rIe!WejD6Hu{2Ko~dUuO3<<>jy{UJNn|;>(t(0pPRo=Kp-k~&CT4}Rp06eb7zSykZAypVx z4UHd0VaQi|GZNlrT%6V8Y}a|-o@pR;pnI!5d5=P9(X zN;ydsg0OTdWFJSzhj=}fWlvNYW3PFkPq~m&jURALd?A0U=LW^&NPZ4|8LBi|}>nM(3Q-MuHt z_fVmOj(+7L`!X)uc~dK_O#z5k&}A~e;qv|b=`9N2)ey~dk+5IuTqGnpzaO+XzaJd@ zWk1M&em`hNRfVZGZk!f{scvnV5a{x5Vj?ZFuZ0gO|eQq^QKEyZ);=9 z-0P+D`#~2@OQ+`(I(h{K?B965|>Yr58 zfl?pO?FJ9*KI}|6e+`)&SA12a>51?E0{w^Oqn(kguj!H!uqUnkcyrR|Vjmg$I^SfL zUJEqK=ou5Msq&&+f>-wqtiZ}O)-y6&8$|QmJXn?%-{Bv-+Iy|=bGyeynjpU_8TY!l zCI(93^c zO+BCP*3|7%_G}ogFMCYA`O;J+;!Wm~@9Xl-c`6F{bdykk-14eT3Q`5xBqghFCz+Z_ zyGJL22sH9-r-9DUW^YRS1uFV>5H-(?p+i@B{1#`jq2DSs52CYw7`1Guw&X&;IR>nY z9JRU}=1uRhwiH4)(T`^z9gSokq)VaG=M-!H5(lx*$3aun7L&mLWF##AB@&7|R?lwX4ZIbobw0&;pS-FYMuKGyL z+jQA;LHkhJImas14%vaD}IpZF_j1rx>JioY_(BCo?jvTdcq`XgT%PdS}Bfm@7O za{JnUG!n+1kA(5(BH=XSzY+;Io%biVrqxb^+t1POw|!s#ii?XzK=E0`I*oH#L(@|k zzi;y-3#Z-on|zMHzuc7?|7|ZgwLbadFQ)u7j}LwJKixdpMXsWYp2j}^>7ju2tfSgg zWF(b3+C84jIx@Qp0HAS6W=Q5sK9{VPY?N%5d?z_9Ie{i5`BidD@&F2k;y{U^)KDfU zCz>Sm3iK*e4SF4F47G;dg1SR}Ws%zLFjD*m5X);1Fg&O3=#kN>e+bEd^xe2iLxlDh55A1LBaJCo^Ey6$(HpLwPcCNy;^q5Cx+i zE3_|rW*t;0#t;}-BE7uVhfl343jpc|Y3=pkQ*)dRN9NIYgicQ=((^a}KyZ59OGSTk z2?j94l?0=7eJ`jt3WD$o;u!{02l-8S?SvD7g#O_-cs| zX;?|1-Q+5W5xEc};6QE12Jjw|k66x`6Pd?C7ElJF2(!%7l-r^rGeZcv16on6D^Msd zSHmu;5fY__rihXUl3x{2z8!}41)Yh-_NfgE2tf1`bdf`j>9Ej{h~$KlkRU0E3LZvX z{a{;_HDt4PVR!AC7J%=+V3dMt6v%cUl9jNX=;}?l!RAOXL5@<~93wOl_EEJfXHSJMrTo7%_Z_I(05;Nd%Zu@9nS)d>} z7dr-L3x;#T7LZc^fKt70aGlR9zu={76z6e?>mIn}!)Aql@Q8qf_#cF&jYd@zr0^4v z5WlM2ONKKjq=f;-490X-Cy58E>qIXDm|`EZLx#;?%5VuvSzsRVc?G8LYB-zTe8oc6 zg7}W8r;l-P`YhyUoWjCrA(HWEorSw~`wqeR12ffN zx_)u)){zy;KMo+s2>pSB0Kg~ax8wqMKd|vpq9KM_TbJBgGJG!eXzgY5&01D`J(bdS)8;)rM=zr3!WvuiZ+(Z_Jf3N9aOiJkVHucqCSlUf68eynO#9IH}n*cj(v&m%M_ z<4}+{tjt<$)|mJUa({asxy%28+)Ms0a$jfv1-U={1-W1U&ms4?Q9Ja~kr>0m%xY5! zM-ak*s-{HPUc_lMNaKrYCNO(hsY`r7RBo-;Ae??;X?+F`@kb=zd#LzLeHd1i;H^dbJq_m>UnMu!>|AMSNkWIE70(pKF z0>1p}dP}u?@Z)m#&~7@psRd`(hekL{>}Sf!4&|eVPcn&4rID%r16VQBsWjq;ltx~j zsU$)l4q#6hPiH-n4`5}ir^9WhKV%fAn7tqeg;TNS-q-J-FQ{jCzP5m_rz#7dM)ju! z036wz$iu9|V697LR4L>D?>`g}_{@m{!huvkWYbO+5VJQpCklvw^@#%Vi9GB?0ZG9O zEvT0b*#;3N2n_Kj0Dl)iPRyp#KbuYQJb0int2ACmg6NpOjiZh0FZBVS^ZLiBS2X$pN!xplWXT%#~<#X3_Y~mDupj6G@}G zovw1k_2dugq~;SKof>gHXNanBwK22ylvORVn9bZo3uibYZaJP?*_+WU@vSi-DY>It z>REEH#!Lv?YNRZt#M#4eyX3orfr;K+o0)paCx@Fr)hg8_3x-Hi3e}Ie=~YIx=Bl-p zUbmXOJA}UmIgK%Xs${LT?f+VVh@~03EP*sGqsA)7JL{q&CR>=gSY;rxABI!=$lQg9vg0!JH5a#`#E|Fj=-Rm5-P; z6=ak%^wp8&p&Fu?O|GP9Gnb;;bW=#pum!-e;i}!z7zFLHoXtUE6KyC-6~Ph)uO)Ah zPPdWLTA*uI+_x2lVN;V$w_;AgIZ@(BlP@vK5^yz>Mbs0x-M&m}$*wo6@tp4HlJKm| zt&Y5p5v-MD{j(L`j4`vi8Au&lRXVqzpq+C{MX8N2PFRQAsX&&az+qG4SwYjV5_)lB zj1FsNzW-*Ppc0w=g)w$2!H2Gj%Te6lmkA{MgVh zxTc{}o{=ciz?OGMmt`Y}zeKuVq@brrOP)gy6Eb2v`7F%vS>nrUjvnR0FfF6)BG1pt zt0Pt7MPwe}X7OSXkI_H{N7d+ooN(!<1WtJi<9e5?bxNu;^+NOw^JuE|Bpfn52k)&u z$A!x|4VR7QPgdVR7cb@UK=aLKr1^4ndby!_1iNYY@t8|n#k^*8;Pdl@5hGhifXnlNmxc)* zRX3?VRyS2sY>b|FWMrOqWFQ7;zeY-q>-)TL+4%~%k0vD+1)`ZZzb{I~>Q$=&^!Mbj zT^jD2vd;(^5?3q)wTL#kd7>@NM)i%PW~2*99ZVKX7QC&0FVF26@#?nQXURHO)w(-g z&Z}#aos#uj+s&UESnIs{*WjiT15Gc9wu2;m{zHBY66XzI9pZt6Z<{0(iDDp+5D!^> zUsvJ#j+^bjdoATRZ*}60+O6}SN%oib`}+6&7};oQ01z=e5S&hrUn?WyM|!=LyC}r( zJ@|eOZj6j#6W)I`Mf#OO{a^Yq)UR}Y@P6pBnGqPOk%TGNq<)@4aTh1}4Km%&SC3In z&WO+2=c9;MXM9Blz)Cf0b0?r)(YUNdHxq^U&n#q`|{%2sO{3N zZtgL}KXNCW8YWIi)vJ{N>NAtY$=GY?SoFj;cziSP`>7|fsqom;=MS&Oi6?OvX|Pyb z;SoBsGS=<>*zup93V5}8_(Aw@P91NZxe$@g#N88K6|moR>Qp=~57-X?up{%OzW`5- zze62z)R}xwO4c5{V2ofQQQ4=ip>>fP^wEil@=R_AT}~S$`up0LKyWTr_x9W3EfXYb z(nhsznggYuP5O7Dk48YG>7NDq)@}ybuz(>CIOxs1G^Bn_V|xBfV_MoVPSTi3Uug=3 z9JO2s5jqGW5L7{|Z&%?jZF%RW;Z6R3g5wnJw|!NDY$m~aibVg4<9wI`LW)a%&trb8 zc}W2t-lu26kQK*Oh3phr0Z9Ro3OOk`xli#zKyBoQU5!BJHOafHsm(9Ne<^G^Sba2| z#k`uTH$?hpT5?+1asxcyle$=DN1-Nm2y}5En)Gt`)Omhn?hF989YTe?QmCF1zWf zs&!MQ{_iuM4?<89YrqA(xu_Jms1&)V6uGDrxu_KRUt1}10dFEN;LYEPf*0@xSugpo z`C~8O%|BNuashAt8&-;3z?*Z~Lok9Nt|%o@a4?U$M`>X8n6TQjq`kCHP{5$kUD(!fU zj|lN`0|5sg0?__~-t-m#A#*WE)#K+*ALM`_yI>1!PDb;wFp_85%-$?TB`~Dyp+u;a z8AR9#r*lKEzq>mxq?o~_%Z==p@jHwmj|!ceYVp5DDWrJNU2D7yqxr+tN{-pD>1ula z3z6EFGGb(xir<@q;2@iW95=S~BM*${{&YYjX#?LM01&9Ona{N3SAn&`;oqBhxEl$~ ziw^p`@0z~#d`ylTGT_dehN}=6jN~@IBOHQjgeXSA`a2m!=70-;a{+KJ0L}%#xd1r- zY(eb>!1ovcFhmq_@Kr}49C3VH+fn&=#dc$z$Nvkwb>!!)wwDfj-~ZsBwb^AmqgiNO#2? zQ~~Y3q%%KLoSvU4PTIdoaR$da3%DSgu=vtxWl?L5#JXdqvqCm{lSj<#jDx%uM0x`O`T7{N^+46Geq7`V@VWDc>p=v47 zn4kRX#BaT=P|Mzye2B*z2*fc5fbR0~8lw~*x``X3tbfTPq(M%R!vJXCEX@&k9_edd zn2l|h9^xIP2k~f~pq)z05NYyo3sxnTdIo;vZ)V~Q!Y-+HHjl%7E)OOU;WfcXQK`R_ z8x|Zaf9Hb@YC|1@6yg0)zeMPL6#mH*4=C*3H8qhe(eL-^GoMW%pXg6vr>5&%d^w1{ zFKiQtcDghe52zL#ep_mc5HX+13q`K!WEx=uOM-nLX&~fPW&7#!3g*}=4*DpE3tV46 z(j5R~0RSIM@BW=PY_!P4OOQ7WO<@RTM@i3sOYPh2)xR0$dr?_)(Z(zGZ^=>@ZM-hp zcwMydx@hC|KfaCE1)RBnGk-G*Uci|PIP+5*uZzl>|KV-CF5t{5oH^NzpR_GGg){A@ z5U>nH2I7ttX>OI5h_sH5nAw08l@fu@GZ@9xa1xDJ4(Nvs+CDV1+s`cYk-b2SQHcwp z6h|tY&c?lSyFu!TCq*(j%WDlm9-3!EmxiE`P-Kq~UFprnMYJTJlg1^$jxX_41{p|} zgSVJ*v93Y&G%WrM_1w4$l{t;r5--S@uVo|+0%G7e*@1v zJT0y{?gal?TqE&s;hBV|CPysH8=I|D?1iBqasU@n&LBRMGjLBDv7=!GX+e_3v;Zu> zJ3z_9w25P!2WWVY1((tG(2C@(T9as0f@Sq%W6^>batxj{mbFUEa=W^h4%-U^ zqLG^i2ED*Bo;s{^DUna3Eo6Czj>8KBfN&n$&?mz_Ht98h*RpL=@R8>sPiA{7%^ftk zDV_`~g@*ol`8=WM`l|+Tc(3?xS0CHM(b)iis*0wRTYL{ILy`SDe5AJAzw=)}my4>I zi>jH6s+o(bnSZ*A=>>H8p9)=?PoPVAt0%GIABowPLabP$7UYHNZx{kE+3H? z#gTX=cekJrCv6Uba7 z=Wf*+Ei=dON1&G$eAFGm+6>f|&=|J`K9Q>Q;BB7ngG%ww0BEzed6m%VB z{dTr0yfMAa_awLB-N;7sSNRjJ%nYO5ca04Im~>;Ju-8<&g1Lx^3GpsnjeNYq$!yZ& zSca2wyw3?)<7!5QX3f^{Cng;t;783)_+D4$U)|061EA1zf(%hQs`m-&+avzqC!sdd zs^+xi&EW8N7;~T#07MRxHbAQe8(;fDxUA81?nJn*-zBafPvx^)1|17-pmGBMMt&cM zJhzc#1;jsyByK*e+f#Fv7sW0YJmqgHNf$ijf~Q>Ylnb76!BhVA*P>i>D!b@Z_OHJj z?4ncIf4A47Ty!e?-}YLR3!ZYyQ%*WUobVJa;;@;w1jdvH+#dX$Y%%hiT z8SS*)`}-g9BCg)ZQWj)$c<54N(2nwHOS1j#yP#Ld?j0w$@VE==l|7v`9B&MYJ|#1< z4n0l&y=3Fbf8EU^_*9l?Y5j?tVE;8Y!9fAc4K7(Oq-ayZV9Z$gjR?`&X~fZyZ4<1^ zwr;mi6@>L26jmxK3iag2KA3(3CI;B2z|lu}Tk-QPx(ejduYsY?KKq9zyaunqQaRxT zMk&a*MZpB!i$a8ue+Atq_J*jLHZ~2vH-9lT$EEo!jawBUfOX5a?LluiNef=nCIlh6 z3M(cd#h@yiwjwrYNFzwc{kfXPFBI1{nIG%Q8p!^=T;0rIua&9?DwH-QVC@NxRtml~ zNJ7z;H4nC`MQ=tn8LT|S9gskXK$x#TzSbsfVH%5ii#i&JGQ<^ROyfTHv&tep`aLkp zcHyHFCyWXP09*$G=FCZ#eY)5>-nMe`hG|mAFCObdkIVLhl;dqzWm5+zfNz;rd>4e$Ml})U{+;AP`(E5)AB6HI2Oo z_gOU{(ZEFpTucb+7YRA=q!JPKyR4*+w{Gy@ik^o1!DztUtho54=%ug;=psRsQYbnJ z1@?VbB{=rG{;0U65Hxsj$XriSG;pIi@M`3^P~gQ#h=7+8!N%@*0EPROL4ntx!o^HR zdyPH;1)jRk3Ot3Zg2LZ-15YudL1sFh5L9VU`f1`%dz#Y-V9Q}B>P*1dXg+$`E_Mn? z59u*0-hwojzpNA;=CXHqtD{=iDe(~tp9cYf;*Xjo(T*4jO=+Kse;L9#3spj{mDp(EJgjE<{*pVE z#+VAIH-+n?L3!6C!myspxG2^5A`iY?>D>tYduh4luL?IG!}30H3RaWL!IvcasY4g< z&S6R<%pX;BsV0j;5sxPtzA*&qPSr$0Io1#r?P~GWF5MQD3^E%d%vPhNe{dnrErE9trY5Q z5_)OU)S3`PFPc%D$qT^|^ip-e!gW9kpQn)cl09oL=a)ahy4VQkSai0d394 zMD9HHu5aC3Y>VzDDpvgXalGICakx2;k;Gh>+W%Q2BPjt7*{{d&b&O27ElJ#=r@H+p>a@xvi&B`@ zC>i@1wcY<%2GFFN5uS^wCL1%;9pmvkt@5bX-LLo26~1SZHFG=8cidN7IgD&{YM$$B zXZAlgoYDDIb+I6G! ztS(wTZo^yx7h}C|@0_L|%yjdMavJ6_xtOtjt8Jh`TG-a}285h`YZ7cv%{5-AUm2-i zuFhE&2}5pRV{Ea8Gja zNX3Y?d!x{%m)#oE;K_2N%l1d2O+Wgz6Os!;?uM@HO;uXPFg6A>Jh?9Hwc!&_;7U(O zoMcW;`^gJq&kS2XAyt#%j`Nc)#!j(6l3qh?au?EKk3)sc=uH`3BgAu0?;?Waf<2aX z@x4#OC$9|=JI%vfobN(Q5!V+^#vutBj=?Rjv-2GzYaSL2yHCwaQFNIx>N3mh+w~#t z%S?1{FNpcaJCnF9a1sX4ztt@Dk8mb6$5F`7e;89Eilcb(Sd0Aow5;P{M zADv_{t$EWSWt_0Z0ciy4^rtAlJ$Z3X3VCK-cg^9F1nGsdgTp+4-06VG>$bk37wPeV*#bbA=iXwB*=Ky#fv80mT;tJS=cb-quB z)!D38HQns^0<_W&`I+acJ*tl#lUmoc%n@}WExBq-K?zfOWlhRu=!p{r<(<@9Nk!|L zb4w8elAEgZO92vJblh6()&n8QmFD-Q;f%h-($o)API4f|pUQ}yPZ76jLMxcKFEWqu zrJ0Ig!dw348PeBJs9Y_ZYFmu)Ef}EGZ@VZ@ufAssYQRS1u?ra%HKu zn=&wf_o$93x?Ny5?6?h=YOU{azFb9w&{bR8PgR_s-uFE!C=sHb@3{6UrTY1eM zvEj#sp}Dau10TE5$!$)ato0^GAG|a=+1+!oq@VydGoQoSydCM1nM-)Vnl`IJhT^F)db3>BbPF|R8$m%caEygNJlej6mGWu<{o3<}N z^tH~dWQiN*M9%p|OO!-+-c{2*k>g$|G#rXF{5(_I)9XTD=A3nFNn~zxd7zuRG(bqJ zT5`2iI^Z!(|CJm$D_0G|FZ?wq}9SrvfWeYMzPJ-vl8DPR4h1nOi{Q`>a?-C&&+MrO0H&kEQ#)nuC!%rBaJqjE7l`C7RMnl zf#pjG7>^Vhp+XMMS7U0jwsGkr)7~OoSgQ8eJxI!%Z4}b1!9%Cwo(c`oc-;?< zA||cGJ|ldLXvhDn1@#e4D(z;j=P7P&8g?myBJu_|{nsaG2%1@rKC??(_CCsKbRMSVJKC&xVe#v;K zXyUIG7k)EhY*O$f-FEziNv^C`x1Q8{`Klq7XSc$?O`fK0Q5H$>ys2b2xv4-L%ZVe) zAPpmx5oGq*^dw7VKGGc)JFO^7nCi9}dUI6n&W+>HlF%^y`lVIkNBp79-F&8eF;DQ# zM>Riwz4rLXrAOM`T$y=p9b!i>J*Lg$5;b#kYPdTc9u(p(XjXL1(C1W{;mb#-^i%4t zsdhYeEFX9o;8^mq7AD#pCTd-mSD`N%7ss?m}B3rt8XQcI+^P2VK_7 zGJxOa3Nk9%mZJT)mt5y2R}$RkCMyOLwzXP~IM!Pi-CQc{=2m95ih2h;Z2sk?E%l0> z$rZ^hkDXQzyY!0lOBv%EiS9eCJ4QQ8JCNDkBTqDMs*MU0DL#^B@IcX2gtxBM+k3U?p!m~> z=Du)Ka(ajR)igtb`VBTE-X}_|7W{uX191+YfzF@!#YTxkZdci@#tnY0sWw{kA6z(9 zF;2aWDB290BTrlq_ZY+4t_sA=IgqUrh%QNfg}2a z{F@9Djjz)}WD^eS6N-i^27O=6V%*kY>pz`>dRMWm_Y|S)ab6aDA~iTC}(IwjGKUm1)>HOt>exJH;uvFE=! zPl>^yP>HK4s6;q$4ma&QhgnA3dqc~IJ}nZP29pmXT27doX>h%Nsvn8tn)H!F!_x5? zAIJGHe{FQRQ#wUfPok{SX4`c8GYNRwE{H#mzoX@{{DSby8CtJwwJ)mir;?g`v`ht_ zGKN~p^;(n^w&P~CaoA+EBW1Ii33W4R3)Dm{%bYqe=fec8bZlB|dtN}}JEBFP5*=Z| zj>&dQ`h#Em48I(4PU~DZv|I8S_;R+aRsGJ+Jds-y_xe+#WyOj~e)Rj8*~C5!GT2Ps zf^28?&U>nB`Z+z7cWFz4S~U|pBKg}=_>VSW1WouKzbVVVT9p)(R!P|;g(-MHm-ohe znkHqMCY5N4;5T_wnm}?_vzxW`6G3^rloX}{FKc1tf>%jn@=FA$NMqlnC`-yH8^enCDZo-Vel9kI3P=Z+4J z7cI|U#B2M|zo-?tnB|x+GIeFSqx&c#q{G4>tN!(+Jc4UIhCTSJ_40y+56W^KT&yXa zEOn4U2E8tCa*3R*>1P@wy2YKXiN4j)&18yq3%NLX%K9YI)_M`%$dQc;iV)SyfK&)a z<~dlI&j~bCTE?fedKtwJoEG(QxGq2bMWx}>Bl_*hmf03F{fT1~Z-8;}^m2k;ahhI< zXpz&3?Fe08)SbbZzKGs8=C|y*XD1B%AFLy`X3(7xDP?JO&P}p(->_NkloT!FW-oEr zjkgaBShH1Im!)*xX3bt{7(g17hJ7uoe{-83Gp1Jgqw#&>NE2@%;W|BiB#DmmJ?k&B zeMqS70`aX?Ul-R}=rS?g!V&e=yXN$(`4vl#?s=@etCIlc(OJ8#D()@+JD)qZ2kbU| z2G`G4EULS!yA2a<*U)ceO0Efc5K2j%%Z48JM8mb0@>UJS>Ph;-m9zk5kpYDMrwQ3!Xl1 za6{?N7aKn2=qoBO>mT-opT=NU-HPL5SJCu87jD}k)}LNe?a{)~pXT^l{qa(0;Uw?O zQ=?W1@eJ8Jigin|YLh49>Dgr>SWK{)?;Fd!yE_^(7v}vi^_Zwugt+B9ul$g?h~PK{ z^@b}96nx<)9??lMIAHRL=v`I87t7{0Cd|({eHc4+v32Tm?4{~ccRqGpDZ4a2 zHS_vn^psVZUMG>qrO%l&&o4%1S{3NEXKp>eM49R2sPCBB`rIg9r~1pQqZHd#y8RdU z@h-+1yu!!3l>hExm2FpC|0@i=S4ZBMPStE&tETIU5`RVOa@pnEN5TN2uCTIKq%NlB zg!TeOqcJu2Xoh*t7867}=)FWLz0`Gm+uoO8T@3BKarWhintsKqFLg1V&1>|`?R=d0A7`js_dV?8l&Ww}|UIN_JagH2&V_OdE}RX2vwQ$Sw^Q|3Oy-75FWCjVxP%(W+J)HG&X#J z^R3p4xA7MxH?^<5_383In@g&GnLDAzWT8yhU*|gT4~Y})K~xk;-m0_=N!0!Pup5>q zj%cxK2hgYK_9zG44Aq@vb8J!kl9)gtspxYvOn#C>wM8}7jG;9k(MUwE#{Xi(e- zy^@QY$8^4`z5HC(_YSKhGUP_ls`l*XJh68aw;yJIWgZ-*(!kB)JR%06KPTlAd7KmZoVxV>64k>1J`No-^f~rC-1E8y&jWF_Q!htCG(WXx z={`-w4 zP8pbFE%R7?BQZhY)|S1VI=DoICg-Dm=p<~^ICK&oIsAFhXhfZd zqtc@v8;GNN<#L3l-N(MZSieiLktO`r4t9dbg6`O}dHm{E?Gip!md1=!k01U#XvcZk z=0#tRuAjDUWI>+Qe0=*wtjvPo3+C}J21N9rs{==~GEZF+vM%&e?Nxq^CfRl9)8M8> z=eGgdX`lX{vv2a$orv6DVQpLv`PQB={a9QS@uaema6}60bQfboZGC*!IZvaw_uZ!p z@@^d+N9H#)Pa?9_=HlWjON3J#9FLEWeI(kkx4bpU$8|F-zySEC5_hNWWM4~GU%cKM zm!j)Lw#sEwPP6qh{duCr6t`|>&&rjJH_**n*iG`%Ers3_mw8|~Yg1NrqU?R|?Si6} zqwnW+=O!En>Zi)`gq$htQeEx7+71Mwr7w{1Q(l%civd)S1#*hww?_tsV(-ukt$OOY zzNM8IqDx$ma`QavT0>es)Qu%E!|gUUlqm5vSA8=maYL_SUb!M!;;XgCoX~cq-B!xr z8f!&{!~)jNaL9Ih;^t#XEQX|K>c5j&Q_?g|>-q~X`X#k!B5(K>!)*;I>lBUO=5@i9#V*J&2<<#Yel zlQ8t~Nf>_UBphV=pPYmXw(I>%gDR-Q?cnM6{c~UAq@A6b$Fsc?3w6#Vvxo6gh(>j5_<=2U|Vn61EE;0Vk>C z_R7vk!nV#tQ^HjOka^p3ajJ~T&X`o5jH^{`IsG6Ap7To4%q&bG_tmmD z19lbBWjIWE5Yv*$4!`7HUZqZ))oXlT2>MHOe-r*h?bZTT7~KAd#!~MlBI`KnVcVNc zgxeWx1|&z)s^FT}D`wQyxRo`pI&ZiXLR+et<}0)@4E2!LY0XQA5p}~uQ+H%P2|9K$ zE?dtzcVymi*tP)@s||vBhn_resb;AQ_D7?eP58W9vB}L@*sr~~=Iw2d9e(7YUgb9l z4?{+@nU&%qII+2e!rb4o% zBBMfsFNL4{D79F|XBkQo!QaXkfkyhBgzN$kIpC$VMlYK}pp4gp2_3o7f=OLG^k)+4 z*_u|is~*zgQ9>ypya%}k3SOiRLgB&R0C;h1SAYlpW&n86oZnq_W?Gh9DW1bP4W0u} zrcNykGcu5cpTNq=37pFs60I>n-tf=XE1c4aAwtmyO12M(W!e=xBb|P}3#vaaN^y7@ zc)VxHsra-HFOoO`c#-f}naiu%o^TOuSAvH?&8QSu<{#0Oz=CHuey|H978?X|PMBv< z*@Vm#$H%9Uh(>=QAOa0Cyf=T1wC#!QdV9*lcd@@G>Y1(u21hb)XlFTP-YGCTg=gCSGC0%L zQFl3W(dweXg^?;F)y!V2t7yu3%26ha0pwK5T?CKw^a_oY3oqZgR_0tIIPGQA&D5LA zT4>%lowFKB+kHY5&>Y7gXC0J-FS)*DBy{lYwLe|-8TaMr^O`#SPctQVMeR@C8Wov+ zyBwfi?DfW;>DK4tv$dbZkdrkTV%4&A3o)#lcN^DE8i{Q_%3h^4T68HNB69sezh=7- zJGaI!IZxp}n!WjUaP{_rpA??57jcp&j1z9cG#bF|C&T8a)P+-!LveJ7AwMA7S4|kF z%$^)`=vZKtb_}@@9zQJfuJSu6Y~r_}oaJ7p`>N2Ra=b+^R*PRFAFLv$Gr>~Nxo0kn zz@woUti%~Wa-xTkM;~L7AwF47@t~O4;ra>k%j_41)n3qV9}^swzE+dc^?)^(qH#FK znJIW!I}Z5Yv$9ljTH0IA0VRbhk}yFnE}JwNjx?(UwdZMNZ3YWmy4g-UtFu4UdiF40>x4_1CZW>CyiVFVHKpvhNa+%` zlpET|=e_tcrPA8d-apki!*Kxz-mc&O+)wv;3PG1yf+Ve!7QaHlO zMV<*cc6iELD+e_v-IYowwn_QibKyk3R%U+u1s@&V6x#d?qE_)vEq7*K5Fll3+Ftgg z_UB*163OCr#B@z%qMec(oG2-8$17_)F`sKV5y`6O{ zy%C$?sH;CEj97+}IA8j7PcpjeU5E}yPodeGAq^H%6avX7Vw(s)A@O_! zqcBaSpj2H9wYBdmqvcH=%9~9G_H0_9y>DX8Y=f{>8ALB1QcF9881n3dvsi0*?pUa+{2VS9SEfzHNGcI&3da)4b9z&!+gB0VZbuZ zr&Z)^2DC!B%)tqBd^oV7{ZZUwe#y=LwEnaY#Z70poF2Tn?(jvt$X>bV#x2{K+5TGb z;*-ney(O$gw;E@WhJDRtuSu7^L|y(%GQ$zud9ET+D>PGyM;JUU$rP4*)r9`!xh)F2)7pId#S3yiqUc53G2L6 z;mUl+JFh6&*2j52Om|m$M3&B0!w$%b3AU}nLxYx)vn?Y18LD>_dY59H+PEzf6IvO| zQcdruW-LWDgpfU{59fiA$nh{1sF|3onI5UW%M~CDLpDDJ86+n|JEfxeoL3Ywx(aSO49I zZj=tNJAl2J^q|?(4t5{kYTXguRl|3#dM=`*_Ogr}Lzf>C#IDr70_X-alia8F6#Al- zjc@LQ10HW?wmJp><)xjaJ#ifn+*fwTFpu>*lmNez?Xm6wVF%`}-^^2Q4^At0GOkm; zxi*Zg7DlX(EX>IxCTyWZ_h+CiIX$~5J@9S)`NwVzW1dtkZt;vOb2IM_wP< zK=1?Okn<6^L8cp$78?y#ihG~y7QW=qDFr#SJEc4arWBc2)RbcM343=+>35Z)l21~3 z4kyKe!uh>&$aE}n+@&txxT<@V53)a{?D{nhO({6;hUhe`V*_7|O7CkX*=;=x9)J9o zDFx-enAm{Aqq1R#EfxYMo@y69P|~7Z@_NQ9%>X|V#YHD z?0)v6TbTzY62#gOo57P5pGqsr4_N@j?wtch5+~e4BS}fW0}joU=){PNt_k=)0u(8bn1NSUObhk0IJ_%4Um_SN@u54ODh?Q=1l(-JFt^rz z&r;G^OfFVe)wr`kTdPTGxo*>#Wi{h1YE?fy2{&-^v(M?Eca8`g@FYYdTyD=PCB@Pn zy7By!xeoerG5x+11eZVba-I=^15RIy80w1=9!FQvxgZeHuVUkfFio+em{(H0fN`oX z?oHf1G|;;MdKWzN*FM ztM$}3UIWkN2{EC}n^mO=--sqWa}zjM6`Q`FZ=RCIQVD}JNR6FaqwmT>foz6?BG);3D1#B4SJ}o_bgQDnQm#Al_mG77HO-r zlr}}yXGW!wzc~0l9Cq-X`NhGP`9}xe9Q!X0zVu%leCPjr4!*Ds3#_d6VBL)Pd_z%d z=;}r4f=oe6A)5{_wJGIzw8RO8I+13fJ1^~)Ww@7dOaBeA={)4%Ge7L$JN^G5vC+#r z6>`-);p2eVW1ri+b@x1C{(tLTSV#QgU1<6*dKZ`udlz{BXWoS`fA%i?!fYgn{;Q(G z|6cC`edr-#Bgyri zkXzy1a7WyhfuO$fY8yNi-TZ*OKD=@iQ>R$T2I9x_*9>tzl0dOkXyn_MuWhZtxiOq zl01_*!OWnWhyF-$h4UyLxNXAF*S+V3#TuE22?AS{eOawrtLITRpY@~Nr4B_GI_4Ei@j)d2aD zLmURBaD2^&!ILHcg@bKZO!yum!SpnF4P891Wl&FsFGb+(n>vTKh zqLq+aqW9KN(GpLuQv6gB%7lzG*xlY!-=&8sfq5*M-M3YS<0}P#JL)FfYc)!Rp#=B@ zoCt)He20{CRbkFGV+IxEuzE4J{K(@z%qwd4)WxoqykFg@oED^aBHEp;88%ZBqWpcq}SvYMJ*O8Y~tya zgKvTJQF}z$n4Ny_PJ}o?93XMHw9~mE+u7&{pnNTe%5kj1#v&ix(f&%2T^+EOE{I1)31rdUxv6M_k*J6z-uX&xPNWIDY#q zO35I^kjwrgq0xpC6_9{-F)#x)`*G)p3H?(#l>TAQ#;_ju&Fd-)N|yj$WXgu#f>j2Y zflgn(Cvc#Ar!cW?bNzc8DY zol_H}Gbfwq-AQ-rKV1vKJS=jc%%g#*+{%fG{0cL5`iKM+wQG9whGu_04Xy2W2o2q#E`n9^5=drb_#QVmF-3pAPH*Igu zqbJ^?I&{$^E}CRjFk`<60#&Nf+3CRUHE2S+cO%g7zyk&Rrlw-^LV?%6`%MJ~oogB) zpx+eqn}U8*&~FO*P5;waE9f`<-|9E*J2<7>WRZr$%ySz_E<8GZqK}YDy7>M3_X4ba zM^}c_=VOgHxqWu&3k)6iez3Vx&DS0rfB;D`BzanRY(e#yc1IvDPxR!OMWSgS9J6=h zg+(G7R@6KLBZHS^pWga)XgTOTtvl#Fee!R;r@u!tGt1tOngBF2H~gs|`CBwo?vDYV zEd!<&x;-@Gqd|tzw4Iuqj9xDskKy;y)W_D#6ph?j^Rseccoy@HF(w-W zBaWg%)n(>B$x2@JI#y>px}kScL?3|^E$~X}oI{vm`>==%L*olE3wErrsDil(M9lwu z$14b-3I?s`C@-4qd3btK;&IMO$uUCv#esU8qsgE-bLHKX+OW_wIgbW z$9I0){GpJulkr1J0-xraJh24w0p88w_W8^-CvnayR}r5hLD%y(C)$A(!6M7ug?WH4 zue1=M{DhfurqV)DSeGO?$~T2&=sr7+*U8Fzof&ulQUgo4z!EO7gbOU;0!z5S60U!0 z2^VN&4FHX-|H|tCQXY`<94O%ejjaF6OSnMFgQ7en-zkq293m~5gA>3@DhG#EHj!Ve zdiUkXbX8K&dB)C!V@4omjJb z$_fqc)Am9Mh97?jN~nVUDwrIgJVppqrO2L__3u$0hru}E1t6?Dw;A>=o15yll_Kw| zG2pLy0KMest~!y6v#XbAIKpo6`1#4)8Mi0Tdl+fV(kNnfT;YfUYbt>}4bzT*o#%(L zdqUG|X0z2{UUV`0!U7der%qtwvyXShv+b0H`I?~o-$jb!OhXGe?t>wVq}N_^98^&v z!sb_DbBQyXu|hN+K-Yrb;Rys*uP<=!)oZY<@tA5D@2^ zL9p>2#Y~wr7N8C!#{iUteUX7Fof%dL>r~Ta{RomEuz3V*9s!$2z~+(vxZo5dLH}Dx z(C`5gl=@Ca&GZDeH^U`_C7(Anjpb!iGe$0ZWo%;GppAJ}=TD8|YC$N(LxX$2Fji*v zI@kUPl@Hiq%z#F>yS?sx!DD zEynsjzDMp$-^Aas9DoR|B$V2InED`kcv!KIGc6PU@S^u?y7)aNs5lRlEHo70L?(PB zvx!L4i|&cY+?cAv6VZzL{GB1HFP#CWZj=4AnV`PNDc zLCTZ3^2${|kdTTZ_B9;a%~GsBlEC01$v^X;9S_?Fd>{`3m$HQZDLaMGZRX`@z0zu&v7i3Bfb(i{vbZri-_{~(>HJf!VM{* zpTsAhXbEV42MR}o0VAvJ20{bA|02t|M1iMKFz8^Zi64@OE1K%qG2PVnF$aBkh z$)@9x5}P=Nizyz*7(Ee+QcLQU{jK8xC?y3S_x{*xNexWY_W(*wXy>-2rckO1(4CU+ z@z+w5gCOYR0T9&q=Teh@5%+Onb)VX(+wGSB9Mzwtf`9^GKF8zCR$_N_G)tr9uG`&?+LY#Q3T zR}%+>tAUtw7xn%$WuuP!r%L*Y_XkN(*xw{U@r&8%_t4?N*;!A&M5F(%v8}5B6MlCR zm~O~-NzfAll+7i^+bkx<H+KRf6h z8dIooDwID~XL7V?3eKJVIj8JKafa4#zL|Qsc|Y6oJ&1<{NfHu~5(J&BUF{Y2SCX)< z?jaeWsCgohJ=4uD1kz|N_EsB$afOC)qNANT2h%F?4pJ`l3}t^&)1!p_)(*k4nNb4O z9}w+h^tTedZ-lkQ^t#ibpiA|PxQ)!O1jw0QeQJ(GY-#*Qzy}B?#RU0U;uwZR-^LN# z;QdR8Xl_=JyRuOK?ym&MR20!QQZM*35j4OO!c57r!>@*H5dSmT>9wY2mOfcoZ=Ov< z$s%8EEI=E0)u%bxct1A;@S?X5eGyT}_cM#kRA3?q?0o|X5ZL<$_P&9=Z(#2m*!u?d zzJa}Ol&4q!@0JmRy>DReo4}bOi(qM$x(_iAOahX;-dI_>^$E4o`0rHP6YdhATpUkSA}AL(GxsPzbuPLl947sd zE-yzG)H98f7|$moW+E{_rubF(4a&T;wmXPhQ>=wxx2c_ z>|U2&V&W4LhfxWzFhm_YChV048|HfSFwEsD-sph?c5HkA%aXp8x%Xv@q{Jm zLpKgJq+Dgzm#myRU#MKTy&2KPhk}!FNg^yfB;Np0N@+IIF+Kyr#j|y*- z_xO6G^fSeWsXgR_28>7<$~9jFQTu8g{|-X? zM}J)W#uD_`+2&vt=;J{Gl>0XcP}Ya+nPh+fRro#4&HF6@N)Nz-x0M6K&A}|tMW~X{ z4QO4G5cByqLaED~3T&Jcbo?5{Xk-`EAQP0^t)*zTv*Negqa?XH4~!(tD)Lp9LFowr zFM7UiFCCh0t}u&9RO)Q3xp`L~9(zzv%yvWpS)kkW2Z$1o1rin9gHk9AB!U$Wskw}g zfVIDXVEXuka>tfpl3843xM=kS+3L%(9#R7BE4_8&-1!VQk{!a==1aZRu7%s@Egf+V zw!hUV!~l7o@(|@51Fl1Kh?iuHW%Yh%F>IvlFJFM1?P#lDaTX{1(HB>xsIZCw#DF%g zdoH|-qEC5iL5Wny!b+rHb-w)!U-q*{EvdH3pXaZx^Jnb9=b5Rf91g zFb4E54bWf=2#f)NF(5Do1jc|A%tV@Z_a2kRPu>(s+-SpuU9gHQY_*yP_ZxR~A4P<8SQup0zrK`5aIMF%2YA~*X2zvk&`w3OoK$XxU)6Uw;H;cO!00Z7bj0ypG4YPFTxu+vT;EXqIwyS z3gO5+2P^YAfrd)U_>@*JqxgZ-qCO7S<;TCMG<0vg-qc%eFa^8j7=EV!nn`ji+Y(Srhw;vu%Uxv{% z#^*8eluo$`6-ti)O4uee(iGS3p4!`73>3U|bzv2H?{k=eEH~rE{kpb^wr;H8!2PE> z%G{8yKVF2t2kYe{`06tR7OEER`+DOneIA!C(Lm}{r3fYAVq!ULnAQNHB}vY$JrdL@ z{17f>%)*fFg1Z)=v%g8`@^TD)@sBK$OXHfCIjUhFsIa?SmNx$vx#S=UsymPd`g^r- zb{pi4-S3&3gm%`dNnK*lDSwj&difR$9@D#zf^zqoQ@WtzNret9Ryd+vF$)@w_dtsnc&uEHt4w4|i7n}3eR8_k<^{aT>LEr=UQ8!puE$1=@S%n?P z56UY*S;-b@>A?C9tFX~QMnuFpL;;{6JwS~ptn=4IC&vV&h++T>aqdbH8-1I9mLh~b zJ`r64Q4p8~0<%D176{A&fmxt`d=}`(h9EM`p;Z6YV%-jVUzH(jFRTlK7cMxFtV!G7 z#lAE`+XpjpD5-Nf{=NAv-BFowWmV<7TLzIoJp#4b|3h2wf>+W0q;*cb4y#<|yTwN3 zv(gaOe++{D?uMXG^L=^c00?UA&sc5vEeQIG#DXV3#Qq)y`rddOU)2!yB96tFLMP`P zbGk>N7rcpplMKBcmm_#c@)I@k#@x^;J&6jtL-R|ZZ}riN@sy9TCyida92|L2g|~&- z+ncb8<)c4)Z9{$Q6`WnY6D`FgD?K#*erAM9b9_Y)ZU+*}G%0PpLsi z>l}%z&B|Y%nSOKUqW8>Yy1&%HopYp#Lb?><{vb-*e!lbo1_BC#2|O9#enYJ}Sj=jZ zu=7p27kej2#(C~GwI&avb0G#x(F_ONN|I#b(WS9=M1><>^6EWG)O?u?qs@UfKyX9h zIAL&{H(@z$-nSQKH(;OKPe@};d-8jtGt0>^%T?JjUrRI=7i8rIw*!IOfxzuR;C3Kz zI}o@X2;2_TAFaQOds^#lqr{>N)}2VYaWaufI$V{?G&slP8nrvYl3^zkW-ujQDQ4Zti=-d*b@XCXKD zXA|m;w_4}Y2UT|ldAHJ%cfLXUR4X343zfO)IIRCaodv2SkdlH+X-*{{X}36&3}bmJ zgX0p9c^-+p!bSLsI<}eU zB`^NHUV9((WE%zgVtG_DhlhhwY$pj*9)ZW@dc{2uyA`bM(|E!l6YD{%Qp z)V8--8CVo#V2<<0JW%xk2$cP|@<1MmC-1m=DJI!%ZQt7x`aJ~Ne1r`j%K?-UR37L% z1X?_SM~X{s55={v&AkbcH-Vc@G6^sj!;X~UxnlpIK#r#kj2aIj9ZvZwlb|*60d51e zqA`ktnn;a8v4{88tDG;I% z8$LS;%r=yJ6>PhOa81UKiDA+KDnsWdeeqXS#jyYl%I$b6`^^*cwM|J)h1#%DU zT;k&X6nmuNR|>R^;~u@X3d2=&sZncz7;(`xfkwDT z6MXVKl(6b)>Ao<$oTD&aJsYT?iSzLkB)NM|PvFQcgzBn#30`sOtz!1)lgA$zVRf4K z5su%;*#Qgha~jvJ@8^{WT{vTNp7%a?Q!{ z$0DJ5Zstu$3j}gb1aU<~(eme7ptI^PG_L}MyjQ4NZvVNEH-bAJy$EJgBk^xK;i`MN zAA+55U>pdH1GRvia9}4K*a-)A!hxM|U?&{d2?ut(kx4kW$X2`8o59YIAV2ZvR{<#Q{ec>IV3Jz0qw>LVB^FCTR_Fu5#D@9uqTjrZQ2 zWBZGX>n@P<-b?@S$0=KS3#0E)hGE^`E$PjVIFMTp*7}di`&Ki0O7i{tUWhN}PL*^%5&k zPQKX!JMb<7^&2+6mrrAc7+NRv_}pKv50;G@LmsVLsG>o%$ zk!$~S+E(Ey7w@v5R!PKY$_vTw%Yjy0=$`YO-!}j1Ve)UZh(wA_9TRXl5Euvo13_RQ z2n+=M$2Obn|E|qORjjtt+23v<&eK;d3P-j~Rl}P!{93qJ_wh4Y$B!&?J&SN_VZN8D z8mt%{7oM=rOBJrncf9k8l5KsQ_rr8|rAK7xY&GnPLY4`(t;0iumXWh9BK;YvcNBV; zVw~EzEfW)38Ou^l@2F-hMKy$wJ*f}pfsx4ZFkX@UTDPJgt)vx3`>Z}eU{tE-A0G&s zIItWj^!+pw5UVcs)w=@Cy%8Q5it+Lj%+@|FGRTiOuhwmqv~;ZQ%e}pL4{suwfTr0%@zyHMvo7wbynCGaY?-zd#Zze`Y>6Qbzpbkx2!-7@Erv8!c z3W_s}>_y{$UHH>ERJ_SmX^=;FkOy@u#6%&7oMNMq?(IKU0Vyg}U1L^Jws&4yBzfVl zd=Ll4L4aIPue&FEh{*;U?@`Rv)?GW!fRLNU?taGe&KGC@T>G~2ZRwehu6J5i)}OQ6 zUE(%DB2aKg5V#`<+z|xwAdm-vJP71LAP)k05Xgf-9t83rkOwhP@P(gvL?_AY@Q>s{ zaNtw>?v9|{dbr%Y>m=iFX*gLW?=@N(*ohM-U+6P=z@p#LSE_w)hZ8Edd_;;46q9%Y zb#P`4;R~vn3j3RYMy%+c?)BOWlfeldyCt38(=>n{x!hi?o(Cx1f(G}#M21_^+>fW+ zy0PwhquSwK?@1^K1^`Xv`oAewXMpi}TMMfwm@#4B$n5H)#QpXNDY1!TxR@fzgjy3c zv4IhtUO|XLOF-l(_QN(pd|yYIc)9^q-z$d$qDfg;MSo#I2egvJgXM7lCKnW{rU5w% zf#5RYVnR^ANGPyF)s*1a@3KZ4=kGy*ZOYBD1u@XyZBve_!GYJV0%FO*E`8Ypa* z3BTi3;k;i)%Hzwzc>DMT!8R3$UtzvyVC8O$%Gf|e^w_cZibJAEkZ(5Q`nrQiYDqC& z#(|sJoW(4HnP*1*8~f~sXF{hy z`z&am1?{t-eU=N9OF+25E$Ar$y#s=d4d^OCA#TP4*j|aauHWSnphrc1D3mMUbYKvWKqd^cd|6Se z?T+1*s+6%?qf+@8X`S%7A5cZMVfyLeg+lauT`B0e#b5X(?O&qmXW<%%_VrCf0cyju zloVR0xYGJx%b&bj`J5>XofJ;f)9*dFuc#nRymfU9rtR+d?IB@?+-0#*B zaRMJ>AlvIr{kk3f4v(j&lJQ9#SyD<9V>gS5i)aeQqZxdw4PRk>eJ)?Wh)fA_+Iph4iKc(Fz_mNLBH6wd>q}+n zk3e^8!>6eu@7Nr0jXs~*DJ@2<1XfzfV9@+p*HmHKQ zC#X^TKPMPQoV>94RaoNG#wP4u)~~Kv;R73$wd&w1 z5EmZOV7QD71~pj?{;4cyILur8d1w~Xyk!f^YF!_i?SN>BBiYOw@^e_7T!ykmaW)cX z-X1$hdamx0o=@L$)=yS$*wX){GT-zzRW#Dn?Pp~^P_D9TrZsjf4^o~#J$IM^?XFb5 z<2i{x-l=(60DnHi$ET5~)Nots-jlDAt!wwqHt$xG;se=gE*xYqTe4lGXr%TvJe6tFx6EKdQ;Q~vSg zDaeE6DSXh^@^>;6Phfi^A)Qyy7>&)CXcUg;Jg~R!yC+#`?M`=na(^t!Ph=k@?o`@0 z%1oKjcj4~OPbZ7qj1zzK(XDi01^);j6$<(NWl7M1Fpy;3K?s!lZ;MlY9|r0l+T1D! zAkf_7<;uq2LZF;^T=+l=-uE!jcL;P>Qv~wSbCyGx?5MVvjLt3Z^*Zc}0ysE|t>@35 zdr!GlSBhpkD}MV^RMxo2L8cQCW`FT=N$}dU?pNbthlU$IlG#LLiU(~Xa>MZM&(+Dy zCc94>|%{KP;_={IqrJZGD=DIql2_;@^WN3vwaN$~HUR5*n$ zErVXc%vmW3WPuh3>TQx;Igc((AqC!UH#G#bzj1AN4HvP;8h~KzcC{$R1o>Lx34SvAP(>dZyy9ttw-$v+7tbp1k$rOR6eD=b=CvXi5a}QLFisQs7;*?VI-V zYhKSn$fA7VrgD}t5m$p4!(P;K@K0VN26KoA6iAP@wBAP59OAP53M z5D0=m5CnoC5CnlB=pPP(c7eohc?z(~SX1%Y2jK^qUB#Y}3~Hh5oEXG6u)SSiFALz* zcr}4TsM6QYkjF~!{=F}matXSDJI?I~e5ZNp9US(JsPpP#p!epM5Rrm2tkR;z&czE@dt zT?puueF2l3qUu{%d$m{S0`$v{$I@F+qXZO6GPvr>rW8F&X))H2x;ct;|J65pFc9?a z0129-^otRu^o+srO)Hl1S%#8C@VD|sppgQdDOe;J*!Nlg+MjZtb+Uf-n0pB<^Ix3+|2F~K+x z7zYC5KwumQj01sjpnrNCX!sxt64Vn8YK<}D^NNoQI+OGDH9oLo+4xrc3Ay4^v(<^n zQ<5vr7xv@Ty3u4eLUb7ScNLix5cQGn!=Np}4FvyQjXuy@SJ;a+;Ds8YfHt~@v$&Ff zm~12)55k}ae}qB*da{`**?ijrOg4$(Zx)7rJK1d4;K3*Q_F+&i4ys1~|Fn13e@!oZ z8y_%ogTY2BsK@}xfr!*01W`&UNs(@lM!KY>8XCS`n?R!=K z6@2Dlj0S5M(h<~8iv8L1is-LHK^5Z7C0?x$68(d}TIziz-7VU%tjm}~>A~+=mp#7? zesdh_f@589tP75H!Lcql*5zN%x{UtR)BNc==)ya9#m+&t=uV+=9Onggtnrq}F6v;5 z-MzP;OCsS%ND&XNH@5$Qy)f+ssD4LVEX6kn?0=-5#d3)Ew+096ieYm5ZM13X`Z?M( z{?BOh%Rmr?D*`VXW?^Apx09-qjXGIEFqQrP_-@M_h_VEOO*W^`IyaDhwYUuU3z*_* zk2s zJ6``FxzbRquH_SH^yJbZuzQ;8S1<Q%=UjulAt6O(EhDUauaO zH2mgt!M@nohD6>zWqYwN4(89J&wT&Y{oH5219+QyehxMPzYR9OvH2gNcGj_-O;h!9 z_0fNKHmB2vAtxZp%5w1Ee!9Oq8>DL+rwj>)+A^PGh|(N@1Kz9dK-8URwNdi$kqKIP zhp1-4%Y7qR%MAAOL8?0b*N3mo8wjUbz-y z?8M=gqvRrMjd1iox)wOxh##8ATC@naQW3jsgG9|8cnC7RRU#f$$#@2$Y>9%kR^dqJ=$vHLyaM2l89j-qub$$Q+ELcX9MCj^4%5yEu9mNAKe3T^zlO zqjz!iE{@*C(YrW$_g_x$o=zXuk@{mbE~+9_Z4i2$sP<8$AT!Yow#TV=gFEvyjjC2$ z9p;i|DU=8i4Cn8qe}{0F8P|H=PDFi&V|UXYqX^oiPOAzc!s}(MKH2L(zS@`vx$_M+ z0h5J=@c3jm{P@V{8N`L1c}qQj?iAkTCvC=p#Dh{J;9x0ciK09Zz6rrWK-L$cG$k;B~&FLx#ja)z3+D*-G(LNz3&I30v{jWY{u{Rq75H4`rjZf zzl_8$ELNWls z+Wcc{z?7>E*tVnu04P4}PxtD>{ARK55A>nbL7$sBIO!w}#9kuGhwTZFmZJ2%M7gp^ ztW5vAS3it{G=K+L9O~J_0N4d%D=4nv-FH}ZtDCv9K24svqtVrrgoDnb1>bUOMu*w=b2C7GvUozy{5>g|AwI3yEM?{S_9?Gt_R|tpE zIZ~I+!Kd0hz692UK^$qa=GgGUllPz%2ddK0_br@~l;!uZumWi>8WHAKeqYp>;U zU6S%9`-iwEpUZm674IfqW$4)5q)Ur?8fnT6!@!rEm{aNu7Ip0wG&dLYCAzW4m$#Gl z+wkcHoyP8n!&og0hrPd=rCAvt*`3&jG&$*sjP<#R3eVpu2$DosJyqw3y!Ma6By2Vi+i!weE$S}Q&5G~SjRkL3$ znR-u%UM-S%e~aMEiw(gWfoJg3Zd4{D+XQP`Cu!P5jXeDFN?*(-@WJ4XjiOs0Z#_X0 zf6NiD)bO%?LgAu2y#_|B+!P%N_Z@O>7*K2&ve}B8TFsbRv+x-}>=zh(URku?FPZ)< z>$hBd{7wi+NiJ~>{E&n>l0@_=BTWG?E9Z=0fh^PNAOrdG!y8)+j|a$mm!oNSrMNtE z$i3|b-c0@d2Va;$1l@8es;eOCjb&8da#RJsKO$?oSavgs$ICMADpdr6{~jZ-;`=ti{5;nG4VvK&dts7{H8*~lHIv2OM4Xmx!u@M zap`dVO&a|b4gC#4{WVv8WrnGld-a2Lo(XO{=BQ1cMAVC^;c>p^D^n&!GLZ#fn0Jc^ zuR}PO-aJwDyMvmy7Jtlu&V8a$Mu%9e64cKS>wkjTOKe&Bp#%^oyKyl-b6JNIh{K_> z!^is__D6lFqY6nho!@TR@pYf|{^Lo5!_cOEWj~OVlzJ9W2|z$S;}6Wg0C|-COoDth zoAhHgXn%$gjAW%R?0?8nL+I=PNuHr9K?;l(^vj4&gesB9P|w8%<@4n?l61c$TYE`5 z{t|Ss#YpikWtLvQm7_k`*&ULyMOFGvYnC=3EP9U$h9r>{zKA$^v|SL^Rq$xFApB!N zgy-rL%P`(S`g^$?uVtNGVrW;#Eu%6AX#-QEzwpCa=~buO;S%j0dTnNU?XDATJfD3g z-Y&U^AwJO?%yTrh3w)NTZ;Nq{%KDV-A9Q<G5wkc2HLUG8|FffoaP>yre>OEt|O-RN6e0jhu7~?DC`qP#4&KZ*i0!f!IQIg0Do9pfKj5wU@8{SM$>-Y9s zO24dnop$5iVh+A~xRZ=7002ZPtF^_u$m{$z)aiY3mph*O8?d)EfW zNEiZ98ZI3}gnA;zd)oG4Pw6@%SV$dGPQnYDZYdpIOp~YgI13FObEtBXQ96=y@`Lkj z#tzBxL67F7ovzTInP%+>RwNrqPrN!BhROYl0P3gOM17H{j#qwm;m=4-j`u{|RE~%- zL(W@wyaO)_f9bMJO3S%m8W%*cubWvwB6e9@UWF@aMrxlpery@vfSt3|ioyNH;yW2% z&rZR1m^Y-={v4$7C?!zG zkS({3jeyWAis8AGU0IxeOq(s&cKg1$Pyi5-nstFA`q&1!e8)|;{EBYnV7 zadr_#c%XaNMjBg@InRX_EH(`~F+e~uDP`zVq0+@;~t=@6(MK)DlqOH1~ z1L9xK&D4x^a&K89R0>;xk^qv|_>_3`v9~Yc1HhL` Q3Hk6I?h+6vtI5#+2fmTVNdN!< literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/orc/TestOrcFile.testSnappy.orc b/pandas/tests/io/data/orc/TestOrcFile.testSnappy.orc new file mode 100644 index 0000000000000000000000000000000000000000..aa6cc9c9ba1a7284bee1d70f186d212d8659b3ef GIT binary patch literal 126370 zcmWjKV{j!)6ae7Zb~bi4wr$%sHg-0)lNZ~zZQHhO8}IJFsj05|s%vKYpnK$%gi(Qj zU?5Szp~5H>fW!gUL@=O0KtL$J05?!jp^ma1eh`tq@1A}Tv9`9t6FR{afK&maC}F?> zqX6Rw0ON?UV4xTo7#K}W7@1I@%*@P8O<9fEutF}u6oAHnfZTo}q`h6DmbUJng*O8T z)<8tjg8tOc9_@jYWSzSnoQ!W~d_@qjR}X#!$#Sy$ObV}1z3Db-!_-Ok2|F*h&57^1 z;%X{_aQWoIAlH721+2yFgLtZBaOz=5KRzqkK-NYOyaAZcmxm`18i>4;k`{TLxD%~K z!qd5E?l>c|$cPZT-H;R_ujVE0AdD2MknNj^ab^b|Z_YH_eb7%$3`KZyqiLW zBz%`&q90b_q;VtSte5AUC}lGmP&V_a^Cd;l0WD+xyUGk5lqV9K$GDYzRC)ch3LDc_c|bQfpOlETngzg-thBzzbs z_Tm>C?4=MGgH$FDd%?^|?4Vooa+G7R17OOz-a?8^ae4e7{I|jfU7L7QbuSKE?=8td znyQ@0t$*2ANf<0?T#qtLA_vk3<}9~vBbQm&x-FFR2MscQ)#T4AM)qUy`@d7Nc`~vL zk-5orJCAeIf1aE>U@v<<+EvbIM9c!1rbiGwZ;ru_-$J6UE)t7LI06@F82WcWSH6#N zD~DL2rpczilRIP!kt?LN8XpXO%Cg-*Ad5v6oYjA0o|3N~)v(*&n<>b3#Mh_#w-7QH zpf`B@by*A@NJlPBd9kni?_l9CCxDR~$1mL6IeH66Q%-7RSgBs3m;~moNEL{%X=9_n zCL8uHj_ftqc4JL_6+0?YFZHE8NGxW)$X%$^9uJy4%mg>7jC_ z=vPih%x=RS#fRPT=i?s#<<1Mzzj{Gjrw`btY1QFm9+ZX>%bmBf;{gSM*!>bIUh6G67SUKI127a=%%W|iW$JgTU4*y&#QGz+1(gz(^+!MaPnn|Dp)4^ta`JtbxrUvoxf%)X zbhtZ;8Z!6zsgRGx#@YaeTy6W~P$Bt+rYW?zihFG(D}O239Yi}coRM5UyIR4kf}L3) z;YGFIu;^U}+eg3b64vEgP}!HrYu~_h5?iJgf2+A6Nnmt1CBNS18f$0d4rz<;e9y0( zV~>}j=QpQBt3-PmfYB+9Pb%3mxh*yl4LCS4Gt8900hZ|(;~I!rWE#j^zN(EztLf6( zw`8aVehRA%$xq70!6~+@Rx>K3ZFvC>{iRY9?hdU`rY}Pa_oepy*ynX6U9H6YlIOT` z-7-FZ&g35{nhjvSymc8B4yKSEV*Po<(OMg`*?wE3SK`KLfPYSm#8cm(J}xu<(hG)W z&n6rdNe3sKxEXA|5DHim`ECCChm6fHex=OB1NHOPjrZ`6-&NgC{odlkYoKyZw-oc# z@{c+QG2sLPfZe7RQ6f8(ula%#k}>?d(a*V&zU$d-1nu~#_%EIMMHjCKsCR;Ik9^QQ#d1g^}DO4SA@lr1NY;Ef6Pxp7i zRZ7(WOnq8u)RMIM%f`8WlRhQB9(fqy79luC;7XZ(p&ER1hk3y7mK1$^3NGeg&bqw< zCOY#(FM|@EJ_5%+)Xr7LKhhVzzYwqDmfm28*oJWBR*EEw*oPlVm_eL=%7V%bFSKJk z4gm}ctDaTnV=(PsDDVZ%mKBc}jJ6g+SJ(sjzeQ7j3AjZEpM(V9w;gH@doKS=W({lO z0ew>Z4yxV*z2Y$S_0fN&v-(XCCIapGxA+)%+`H;LLlBEtEG)ZL-nLbGMr-URrIO_m zz=eIf#JRG-s9)Kh8UQP_pr zHHZ$tk`6s4hq)JOjnMeTU_5d!gy$$HbPZ22u<(3^{E98vM}Q@$Bhnp}CIf~6%!MjA z37j~Bu(Z9b(sdJhTUdIR*VnL6#sOsTt4L8bc+Tu^>uj zf{BhIWk>#_Lfo_>QgG;%sX+&vziY~5d68MaxN{l`-FWu9>vJ8n&7ds&H37_m=)UVl z*DGv$XkCSocEqa~icq(o!1v*f5ank?+viYDboy}<;vQ^v1lf~y81)~&0k@ykbz1m8 zd5FU287{RG70I|Q@!Sc1sfceV@{C}pT2fGkxK&Tuvn&RbB}J=g(zF8rMr_6Z?bUrS z0$&Cvg!A={$LP%K01Tf3VG!f$zbc-7YQyAwo4I@IffOrUAg8vAqindyvGp4vngBA5 z^3_o>vvFQSgW2CZou^-In9y_KIsH!?*O|D7mNki-Lp=*!k80=q9RTK&NQ;K(J#it* z#a=D#V7TfLCAd*(|F42c1lSL+ICM-A7|bp7w80{BYjQn0yopFvG4Ic-@gQEWP~E-P>VF`m?+y-9n)GmC!JpfxC7DRs9v?CrqeH2!?++1|VlUtsd~ zzIlRV3_XrCf{e*s|Fc&HRRdnLAzOh2QZ<4DV9>L7d7$-%#Pm4ZphMo~9Sa_#er3B{ zyz{}9BE|V*qI_qG9EqWQqWB)0A$>3tMl9K3$x_#g2~*__TU&G!R2$3H84Am$DE=Bz z%D{LSFEPBh*=-eOB}ZmZ_#*hWvKAe!83i!o{GI2eKmusTv{SQUIw_~K0<9AF5U&#n zWjcW>{v!vD-UC<$FS7l4aU&+ST&phr1){Awc(KQuKt!>N=30iSxIPGZ)J*Y-Z~_p% z$l5Tptfe91*is%w$B64-@I%Zq;LRrhhH}SZ%T%YC93#+crI$HT#GWQ*zOX$UAt#GS z)h!9~L6ko>b(CX8wAnyw?V*C}W0ZNkF3wuKpvIA>efuxe0Gd0$mDBj>OuTsds5rrH znQmL%yb{uztNGmFw{(D&`*4&C2Y|WV5%h??oT|ego>fpcX6aspe2u}%v3h=oQ$lMO zXIoncaJ$1MuM6B8u?g^r9DgQko7c#Tb5Vb2%H1s@`V0;^AxFN~+f> zb`Wttrg^5BDqZ)IJVMJdrqYhtK&tFJAv)TZK>QL;;pC1waOmhpZN{*-JT#fi(484w zj-NP(>8sZb=i|nWh<2Z)07gISK)f_eE$@A^3R-;cZiQkr(b=nbQs_#IQ_dbArgXB6 zlm%u~boGccQ7yQ*r-RDVUF`xY+opsu{o!MF+e0NXi|7!ON~EIxKhU&ui|R#?mN z2P2fo9Zj>j2u?0K|EYHH;j%8hH`$pMwhD z9QVq%_?N2l(DGXk#Wd8Vn9lP`jeIVxed|3IkXpvBF7J|5x#yNYww5is2o=@p%k2Y8 z9ZiLoZlMBm$L{?H^y@)|WtV*9je355!##uUa07o-z(}`gt}Jc@Q7a@r_k1fJHy}D+U0Sh)Y{|xM47(H z(aF-rozTNnk)IyV@RPhE$oW1vW45yWPR(}AHj-KO5n9es&xFu3-`*^$Bl)r>Bv|HZ z{BJhb>9lY>-vyYB?p75)$$GZuz-|4F&K~{vtXQqBr+rTvOKMxm&{3R9Pc##EK5(=} zX_Vr`HKUNqi;)y&sD0O#9k)e#o8;L45mS2b7gfdA(WPw3QLC3ydxLhm#Zf|MGq6Ph z7`GqoRWA}G9P_vP5~u?7YZu^EV}#;lmV&Ttm|GNerg3Q#+UjP|w0)id*=RGI)eL$B z=08H%Bftbq$VHNTEyL@07e6miMg^ar+{Rfw^?k-l`1T6%6v{ZgpB9TwQX4CjRsco| z8+9dI*|QPf|CmjS;I6QD=OSB?&2h|`%O@n9=<4^)$|K0Q{aAQZ_FDb?Uy%e6 zNEwd^-aftJ(ajYUFm%c+3UBiy3-SO_Zibt@TVRLe$*I| z^%!@rnyugVTh)Mr+#MBVXcTuqOZ#_Oq;JI7N4j-o1kiXv(qM)00e9);d1WuQ?Wd0-lWiwRu%IjO>Lm~D2$t>@o<1ssCoe~O3+rwX&7^2 z*t~ylrD8mKJ4SqU^ht?G4CmT(bv@Nxd(cx(7)j-DViCPvP+M4w>Kn4u$rF!QQL6G; zZGYS~w=aLWAOLFUw$gyA3;uvK6+=O0vOUHC7f(wq3;!K zVF|ZqW|14JZoTA(-&2ja^=)*OjdazcZ18{Z$Fbp6(ev-(Cn^{p&vwp^Ye{wt=^>@j zl>&YaVITvSy!f=#{!ZzSeQt-ARJ$%(r~OW1@*SJ)Nk-F&pUVU=P+wct;>Oi|PJ}Vm zp&4I@E=dU=-Ml++3TdJ6LzU zm@j*TfhF9OH<`kwX-jvI-pVR&fG(7rF}CrVGL_xy05hE4CtRFiVJza@+Y1WC9mVZk zSz{oWD2yVSEXV27%uy~ftW=jW)s%vl^T}!#1$qH{(ZAXg4&ecaE!og0^wrMy-8SgR z4^QN@@o3wt&thFTtF1nIC@G@0dT~|$wUk(P zpVD>#D+33_&1V@$PHa2V4hooY29?>u(XIQCav)gn-)z=}nF3ZUhX55DINPf-j+QU? z3GIWgpW7rN4)`4FLUqc8TVh@IFNUpTl`x7eHF_sV=L&mq3OFQad)mIT(+vxBu?6v> zh!)91>r+g9ADpieb@(x{%j6DGY)?hg#O`z1e{WjrFzQqVHjMjzIqHAbEE%{KVEG+# zo42u#UBg;GzF;#t1#`-=8SgA^JB<`d@XK=^@9ES$qr1-)Lp(rNJb&K@$X*aK@-VJC zCozhLWBFCnMV&{j{e@JGG{V-06xW>UNkjVqjD5tdfM)d^Dhp8|Ol_JIYr`eyp7pL( z?HZ~q$AYm+b@quYY+yrD^Cg2tPonM@FF^?XE|Ar6O=`#p1N>HYa6@~&mZr8zUW{5> z_>t2LqIqBX+L7$K16!}vu8 zCR*x3mPBhBB${DP^3AZ@!HU3%^94D#Bs$Vr8*Y5Zn{{Hqx_L`p87Ohc41OX z(b*PpADeAC5g9i0J_@aOS~{rPhL#v(-pOkBc{%0ZGt3ZV}z6{}%ZL3zTC@ag&{R1nuHV^&}U zNv4;NIu&omltd81Y4Blz%~Oq!Ejk)pG^E^&(0eAt*Y*`R0qS6IKoK-xU>BkXp_=3G zna)8#L|y$R8;L^mYjbI#0A{|!C&(P(52`Nd{#sp9lLj#kg&onekmg;7$qHyVc$oujB#D1d*0k)Y?Rt@m)+I`ENz4Cv$WSjApY~Z1X=qzxFuUJVT}U zMkO!74xw^cky_Jg*?_#Ek~^g?re2t~tKLJoxVqBuhz_{Iwi_PznEkVvWN1xKQC_oY z?wovjMOHkKTItK00nFM6O~*!zM0^=}Dz+7#Qk$03V1wHU2awr0!&MPWgLRaxJ~=ul zf^F*roM69SCnhT{-9aRlNi88*O6d*|ReQ1_`EjaTVM3xR2kD-F?w}Rt!sv&myPvB# z^@Lvw)F&#RR?M(ky-{f!*wSdmil|eOQ<>0qAyS;CTAvq&>3~M2p{MW2c9F~>$#PL{&f!@2h`LIn$PI0ABmi4>CYWRsNfk&4 z2p3CmItDPpv1!18U0;o`@Kb4APK2*t8~fqp_Ap86)^hXJlpFh@mJKm@TPjbdf9KbZ zb#_}P8B@_tRt%J1((32EI{Q}jKWz*SP)F$ALCO^T3r_)UQ}{aUvEw06Qg%{-<5AXl zZ&(Qcllj@z726R?20J^}$@p@?(H<5VNP;)?!q1M;`IqVh4=c^7-7gAp#MVrXvBn)rdmy$ zqaiIE6i2|BBMJKELgb*0_@N;nP8Z|?emm#71n zm-e9qo#>wZboErwyK^%%_L1chOuA?&viy=cl^#3tmy*D~!fk)H8UX-fT+C?% zZXZ9rs+_sEROZW{HgSnRPEN6=m96V&og9GERgnJvYtfe12q;QDB_|Q_5FEK>H3{vm zB-E7NTh>%MfW4LwR;AseshLlzWUngdl%Ln2v2i_sx2d=FM>dGh$`hjzzz`wnYVPCh zzeMwh$VB+ty4mCVM&}Qd*cJ!-n6oODVQq@bhdzZ|o6Nd}rX%AZWw4^0R#5GU$ftGl zRw31AGhKoYX-l+^=Qy{vb_L$BUo8O@oq6xQBo`a$9M$(Z2Yd{Z%uQ}b%M3<}FrKLA5G{4s#38XSroNINg} z(|Lz8PJqwBBTs;FYE!&~>>EmUAYSJD-@e?zvYlk}&zr(sy1byfffzjQ2G^0SsGKs{ zzBTmU+a9Q_=TWYD?5>rq|CZA(ikay7nL_)&H3?8zs>t{NOlehZs?H;xrcZ1{v9l0F zI5hiewL)72;f!UYQ!(lAY(WdVtpx3tq%~)$4!c`C?RwDYz&94x#O^kQPOxInv>_+8 z{}QEdEm+~3&b98$CJ)|nrC4XU=W=6zZFF-cnNIF+E&wx8_+AmM7{rmF8SD&?sZzVt z-gD=uu3duXF?k|Dc4`&1({P+Ab@KC6$nYJ!v>Oc6E}>DptD>IJ2EL|0ix1qjB|zTq zYb_PXJ*NZBk<_h@T$Dq+&0s&-0l$QyC2e8)7gYnmY#j-bXMEx#lM|CyLL0uTY9^C_ zl^79lBmO7RW`1SGvjXS0Mv3Qf|(k8fqt&8GvFR8 zdPkBAQbWqSz`dY!< zj24{6$^wC{r)jTSpMp&F8&|A}2^dM5>{|dZH~7_bWhd-B7)~%(-DydrK8SV4qPx(_ z<_Y^SADWQp>dI4m3~PQa3xz>T;`PHeW7QBDx8voNn9OhAbg+K6p{TTOMz_9H|9NE^ z>UHpF_8HtBWAilFhT6qwq9iEcj_@1B9R{eI{$g1!F9ruWro|`vrQ?}-A zipS9DkGWMo13KJY;k?D5kb3kI9DFvH&(fi(NGPqJ5)3TpmLD=1#F+)1;+uQ`9%FLp zW_#b}ycgEWbk{b9c2o3_P9ET&7QMlTPTHvvyFt=a?H1JPp{SeT0 zm1_HWFB;R9op$Le36Os~h3wiQN-H2Wm41bMpVuuYpd)q*WNmM%s)_{TN`eZ%Hes!E z>Nwnlkv6AWff8c}Nih}z*>pwz!s;Q`}To(5* zoVw-&5k1ZwSDImY2`G1gPm%vYF03&$4iBD}1dHn1{4&R&;~TUj*U2M8P`C_H{PH}9 zxjIZDPH|O;!8e`{x@BM#&(n>}ZZ55+&8r>um+ihFd>7ON$h;^o0p_nYfS(D@UR zi-2J*Ua3dSrQ1*MF0v^T7#ecnr&=F0tw77SkFEOpe0{0c(BCDq7{;`FmSV{?`n_DA zrI-ZLe*#?r3~%jBA%bGe)a(;@_mS9_J+oR4@jw%1!KuYZ&iPBv1{e6BQw2On9atJN zY#J=~GEgU+wdN-fd~n@YRoI87>rhC2J9PbdGu_;VV@?St*g2l$QX&Dh<0cHywzq_>T(pdQcMltiV5;I}FkSEq}sq(&o@t^D^ zE%b92$sVM?2aOwEddhcAiK36@s(F+?+8Oh7yM)M0R5jJ)w;z_L(FouvG#d(Rb&-(B z+OPkL!aCEKkRYNk7FHhf1@om-AL4c&gx;J@xnE;b(FHS)h*6)zQL%@;rvpVK=#j(C>@;RL^ETn5aK~OqnVT1v4^?9Da4V*R zf&mQcO;zJE5La~U4=h%I_vX!bFPtZKT>cXmo`P~uhJ6;tyAT#c@{wB3x+JrP!YjO3 z8m2%sD3nJb?fNLPV!LX4WR7|8%9PX8&juFGpCnz-g1#_6`6phq-DzOjZ?az(J1xin z#(Ci!Qg&?h6T>WG{yZ!S^O#SBKM_H%>zWc6mW^0d3@z3ZJ6pIYH&2BeNL@m&?|~YD zQDECnL&9H@o$W|`*Opz$w5-e@W`KK(rrsyoKxK)_J+&&+hLxV$Ife4Ejc+hQ7QmQx z{M^?~zJaXRxrDJLP`Q+%C&CNbCtr-g4+62jPs+$h)%s6^juZFmTzl>+oDhUos)x|B zys9snnm?PCDBkrH*Hwh25yJ$FTxq<4DHqD0-FmfJO!R_v+oKjJX((h8CVc{!!*nDn z>xH4#DFKnq8ycDx(fxQepMcprdI}K<5Li=gZACG&rf_pctQnch6G#1k*vCW|Js0vC zV1rb>q|^Ss2`0`HGq7}`!j*()Pxsmp1Cb#B?xviKX9stAwQF2khn7fYkQPOX$jR(lhTY! zE9OC%w-GAU^!|9UL^iVc-xfICe}4|?IMa2`5nySekd0gyZfFnd0E`kWn}jP(CG^b4 zp{fbA4-@C)>y(6F5ChYEAUUUVaWnJf!Fq&&^@S`x2B z>wh{_Tbw!%eDHO{fgYY9_zZU)j5k6#tr4TLf6(4x1NK$V&&mNzzTk-sTGB8X+9{D_ zd~CDBc$o>BxLGzsQf-xa@a>Kn+HQs02j=)C1bJFd`Hi?~xusab0<>39`~sMSTHi~r zkq~6Gq|qp^Q>6;>9gN+A0Yh4Q(_ksOk*N|WUxlpAD~Cu0fVl}g2g$$Y*;f;wxagIf zl3|hcp+2pgn`Gg*bLdO#2PY~sD8t?^Nj<%j;~W{vVss-BG$L4kU#++9GxMo>VkV6g z>iO#oy0cLT3isP6?gN7AZaZoQ=zVcQ#Al+uijiy^!1VSm_cTz(O<4ErsGB>XP8!Hs zXXswT%S7k%1uB$lsFJha-=_S6<(J_fBzQ@k>PO5&5_$Q!5~K)6F=V1Q^*lKGW+-!0 zR;;(>HK_T$SA2Ea>Pd$nm*;q-&c4V}d5-vgEd*dvJRMt(6>qSH0`*|Xryapda~4E% zW>T~nTgutysRUwaa%FVrvo>W1hdeK)G3sZ%n6n6@$>5cXKWlB6PuF!|u3Zv2FE^3T zg)pc#_{!d-^KF=-Me)kdhM%U_-vz}JK_Y4ZOxePa3Zg{D;8N2(%TFUEQ`^0S<9?$4 zP`2)^S?)O~+g=EUUB4I2s)?{CFW0a{KL|FX`%lJ_HhMMM|8NY!Cp0fa$_7bmQ=lncHi zB-Bo1K9XI*x^pS1Pxi)%GR>_5_T-tw)@6mVk`In+vrI6!H-Do8(_IDY}o%Lq3d{C*d=Q2FYYYw>`vO zw9wAV0q7IP!8!EqhjMb#0`o(tqfW$bKdO3b$&YnVhXLG38Qi2+nn*=g>`lYlV=O5EV>T^ZGMHl#GH16$$`YA_`)bK5Qnrd)ZTx8vjd6-# zSa>p+Z#yeQeEyvwmm;}84j+2Jvj{itIkby6JS93q2}0SQzrAqfM`-y*#iAuTs(@g) zwEw_7?7d2Kgiu4*Q!YpS3}A?5fN#XMNrGDx36z;iX|LI~u{P3yKO)&#>7}cThzF0A zIM`WKDZDO@FFb)x&rnG{W9f^1+LRdo`2V&nNGiY3@jLUipod&mR>e*L6MYk_$qPh< zmTbLp^$1?5%4NJhFZmbN&J@9@2&kTR50jiTBTg4}nw7*Ufz~w<@dhkrYx2u`MG$%@ z;v*1WZ18&HiH!>g4R}imliP_N);kC1?NDI?$ZtJ`7GZU)CBsytMv|@H*Zq#FgYf+K zjE<>8drxfhRT6prHSdu_kMoBjg$na31~Xyc2_Mn%6dzXcogj%VQ z4nJmk>W2inhhYD!@4ZhenW-HL;BU(PIsE8~krmNceQx4>CG&JW_^M}c!udT|g9Fi- zWHeFSR|Bm8x%h8Qqnm*tbA6AkoH}NOn!%QVoP0fYgFrKClSKBmO>a%|pB2)wOnCZ8 z!$75j&9gmNc(jU86CMtUj&8oGjSAkO-P9*1xXUXdFxPnoCo$gSCW{-silBO23T|r* zR8DyyXmb6}9Ke7cCex=KGrqtjnYX?p^hnbt3)%%Tr0zdKi60PdFRj(wg?23!$@p<{!asx5gC%&!7+ zd&muy4`8m(8*(*~&Ht;mD_nn;U&qGZ+8?|f6nflEC=NTi0~2IqET>>Xhe`W|eg7=o zCs(6e9?XN)FN|#N8bEqoa`u9z*uD7J9OLQ8fsjYmrLW0xrL#@u}i$B8wc-Mn;A`GFk zhVm=GbER*@ZqrV^fzM`&2QtBbHEhC5cf(p&6-B>}-(M~6-@BeV77$a6-PTx6a!w@7 z|8>=W!oQ`V$~r)iX2m`kt?lMS!N{1Y&eA2k6GOrN()I6eV~Iy4(-~I7Z%{ibT5Vb2I9vK zN-4~#W7%w`o81iaj-w(cuV$6O2*UZC@N6LVRW_N1(X9M7mT<@xJ6px;{^!at3AbMAV$) zlv*A*y~{kkJrk9sLedC0jtb+7;HJarNe)9HO>wOlUrRmfP$#1S%<=h4DU=@@2gwL$ z?)GC$NVx+>;w=W4|BWKA9(FE$6LNy3R!o2nvCWQdp@!D@x@va=HYa`XakS@Cc9cxk zHkLNmu_;N1&3TuPV9Cu+&{KB$ymyf(IJ|`~?>gjBKoOZNI)G^p=6+8-j8sI%pu=d3 z78{8W6n2En0-er=!+1fe1B-$Q`%k8KnzK1cMqiuiM%T;dpd>i)2eYHN2Bg>PBsL)j zqT+SEnlc@KgH0GYMAMJg5tp6*0d%8NYRIROH&=B*u=n4gzDh6nbd!;fXr5&e?Vx!B zCrgmHN3uxV`1+6N%Mv`zN)+YhP&9@_jd&VlX6bA;cf${T=^t9FX{?joiVrfk6PZYa z&_FsaZ~RiSqQa$2!TGU+Ls<5ju-8>C-!1g_;yE3pA^_v|^W+HUvlH?)$AW%&6>tXT zrdVt2)Pz7}b^E2)1j|cg1b4QUgW0I5{1xk~UH9X(5p&ExHP4yELXIip>a^>jpl3BA zEWdf!T>q&_t^kuKd8)79*535vtIWZ4Fa)`sX7z6u7eSwxc&T!&kDR&C&X;$0U<03F zT;vzQFk1DZqD_jaxrRO(OO0bm@~uewWJc3Zjd+yLS}keAgevZbvXJGELkN8cD^7|7 zZV`8p`Ooy87TW%e)1E-i8R0s9E=40=hh29%B!CfiTv;CZ)`PiLskui-hCd*%!#9dp z7YK&|iJUa^6${3^7pI2W@!|bX=MDPlg_o*R{P%sElMa1!7q>cn;cvNS_Qg_4g4LZh zY!1mJP#QIvWuhSt>~eCW)*{*lf#sxtl9f#WQ*N7}{N)-1vqMa0`DBOq+gPo26@H_3 z41Rz5j5(;Zc-KEr)DKB1Zl)#2I`oYmB%VHJ%4&S0wl4Sc=B$#3!<~=5FK23it%u(( z$(RgBNM{gVKQ<{yr47nXn6q(lx7Z}*Uw2T3Ac%@uuR&gEZu#Mul1B8pO+FlwVcGKS z`&b}h$GH$aQzy{a5%FYMyZm-1Y10jSEAu5)^SMd)PwS2sIf^THTA$kb0ZB@EI z0h9D#pP#|Nmn`2gJ;rKPyyn^>D=?yjWKU?}DjT1u*pZYrRV*V{*9L%d(#O$Mai$4`rB(e?wssgCRfSIUH|7Gn^fKhV;Dj+Dz0;tQk_CG|;lAU%+T~yIs^NcYbM75SQ%&;6 zzn+IwAW0cY(hwZ_zIbZ~0nECWFy-tQ%ykB3P_JmxKqb+mbW{ktu~fHLg2KemjFE)N z51)o4e}Ci2A5S@y24iw8Qu8DSS^Trnc)3&(cHR-qU*UJUv9~ZALnFpc1hb!*dlTLxwov%#bu*VVa4V^_h5U^5P zV1@$0+kKnFk}yfc)&*K_WrbGS8nt^@k%vW6$9VrixUa{(rOUQF z=UVqt_e>g{FM96(s|b{f|HNjZL6lM^KyR6%;ek%Evjl88i4lxbKAK1o7fg+L#v2u! z=!w^r6W8Icdz}KkAxiIn*i~bl-XNJCZ}2BIsX$v~rsosy`AqivS78MsNN7m*$(zhE zlE*X}`H^I75?BHl|2k{>`cbkSon<}=YmnD)*Rkie0etTXICXQGNaH3;^b8lFSvuCF z<+-q`)W4Cw#U-e_d+ALHE=_p}qDdfAs&;ZE>~g{*u6cd=rQ+Kh7OB^&Prk?~8bup1 zv8qfg(LkvG4w2hd_HTFA=h|EHg(GO1*}o_ySAAur?9s#ZY3VEck+jZ}AuaI)c_d)v zKoXa0UJ(q$N2eooF-j#2;cDUm&I1)xF5J8@_VwPA_{n_)T~r*Vu#+jQaaoQ{3uedq zZ{#$6HoP0a#4CN~_u5I{p_3L}LqiCLJ5Cx9U-Y#sz+C%R+>J;WExE%_U)R?!GiuK6 zr&vp^AKLwgu?znLj5J4akA6;lC_xl~5JDf^lVuMI*DG}^PpOD*Ykv(0df=T9D3v#=jxh#-I#%vmt{h15K__-Ax=yB($66P)-dxb&y zS;%+8(3pvExD{oDyvUdvyl&>vLC3fJb5QUmwL@4myaUF)7|O$r^4R&-Z0g)D)8`%K zV*Zs;d56C=JBTHAbmBebL`LAHB4Lav_%;(!;L^T1f>+nTEtzrS6-6zO1Au@|V$$bZ z_EKDG`_qOFBBM0t_ml@RpO;#cs+VUmAf$WZh~K(QJu;lrIK`$sOzRJDdn~RZ_iW+* zmAHS0K}q?8+8|a*v|~q(Y`5XIz*siz&2ROSts+veZ@L}5=$I>R>kjP z^+WCY16ub=+a25?braz9R$Kkb!DE|gEu*V_=i-P1E5AkNNi#!kxh~3bU0zp+yZ-e$ z4VS{JZASJ7-_JofOh@hEk10beJIZOOFkdM~5GM2=%UGp!MDA|WS*$I-K%%|mtSt!= z2_VA-K;T735-D~jlrrH8Q-vEAmbmg#Hi1ZV$Q&sVaROf|$-h?@7Z|67vZglvF{^|G z1VAh?i~z`3st5u zJwQWLso~C**tBOaB3ranaj~8)L>z1-za@yj2k2rVqKTW!V`%!%N1ss_{~hXc?9OYK ztK^Nwx&<>&qpoBS(0SoSbV*klJWyK72kC5*N|=|j>85kfbfj@*y$e|npFl19Wo%dk* zKU?4G0b`jRI=5F$$LoMx+z)tW!B9$F9v1&{c4Cb8;mF%x{V{u|IJr0l6aE`2`)nvSD=g zpTxw9g)z{HWRCPspSYGk9&Pi41O(T3d%$PBRKP)a=Ve`PIpM#QBM%+Rg@H#U%1obd z53lB(xDu@nA5U3}qLK)Pb4l?qDeSVxQ#B99IplNR<4iDB1hfssVd0<~*Ojam8F#M} zmS}7jgDNR!-b@f#@3q!HVCVG(7;RDMcuNU3am~cJ^LvGx3BVv0_?}O9&~DRpRSuYr zn&czpkBf?@Q6r%nFBx6epGZC0{PAkGGzvQs8&ON@zFLaQ%uIJh!k9tzYll*cYL`o| zN*E`Pkd3CMq@G#{hhtx}>3Yc$S`$MS8BluNIOgth{GaK(#%{W!j23NuB&ynn$W*6Q#q^4stOG5uCy1K}!hn{)>lT3kTu( zy{o2`t(fU=7>ljO@da%L1)O!jyk?7rq|%}AaIvbE1YlUMgH&>2Ya{sLArU2TmuI5- zOW?;U1});YXDp&H*RW(Wzot;Sk6(`5bh~Q%V=NSUT21b+Fes4tpJV!0ORw4CmvTks z_gm})Ih0EuNxnv0+qj*)vgWO(UizYlPQ7JVaJ>M`nmH{fRQ|E-Y4;s~eK1nAXB0 zB;7YV>nQdWa9_7P-XCLidv(7zn#XMlb6N0?9( zBW@rN42;HtW(IH+H{gsE85{~$p9`}1-=_l4`Pai68s`Sfb>P$F zYu@ScIcb&Mscpf#EI7aQPC6%l*IX$87lOI8-)yXG2hYRAXqcwmH zc^9i(+6>m4ImYKIhKJ>p;)sxqwTfg!lD{#*crjFG`^U|plQKkqkIS)YLS~3iyK&yw zlvLKhjq}X%IN6P#w(Yr062QRw#DX|$vE6~}PaHLcjaN<)zHRBn@Q*gKk&onb^c$J8 zi#!D+odX>>vs^kcwR}kv{3(EI+ck1{mjV;xPyK4NpO?7$4gE*5PDzeO`+n~*xXzP{ z6-MJEKc(0W1Xjzw9qHeirh=rwki3+DSTMy7WUobS4ZLMB8oS?m@hiT(9EhWlt_c;i z@%{|@qhV@w&TW*>Vn9r$?BWzeI2qqcT7@w&ST96%wdMSFpj1Fdy-0Un0!nq7d^a;M zUZO3!^D>iC1RH+w-=-HjTlr+{D>U)~w@c1J8U{t0Zphml#C1Ro8}=TbPR7@Aa)k** z6%trafVXe8gpPv(g*1YN>7dkiR@pBYtgM>N@Q<+v8{LE(`8d#lf(`VNSzj!wFhuHk zgsgE`aTab$0t#3F!$r}yWc($p$xSDM@Nm(8LiyR0<&iVoQ+{5;OG#=(o0E~!e&|{| zy&YGO>5uN{%eT_e+B~F*Ak}TSE<1B%ybD{bb-5(v0xzb1C zpfTbDvRs{NI6M2a_Xql?SMly;d_S~~AY@$b708SI@$IbA6Fl+=GqXnnYDg{JX@~KS zd-n4=wEz86_1UV^fquLP{en>JWc&)-HR^CawuKvn%3xdFP(MRd4?{y$510ccrSZvh+} zN{6nIR=PbM^nlU~jR-27eS!gLeUyN7BMn10gGl3$N~cH&JW3=+KtKkN5K-d&*5>)1 z!rZy{?7hG5A8W1u`u4qU+XoI<^R2r#ob&MOFaPAJqd#_;Ie+=(54LZ)9vhLz=ickc81-u|00x4iz)b9=6G%76C1?$)i=dt|x({ySEd*m{W>*IE8o(@y*K z&%US~vcsV#7k6HG_cd$Hd&DotjO(57<@9fTHN)a-y?^Rk%ii(gxa*I)aOO?MUi5<{ z&OOv+u3hbzSHJn%_ML|(e{$2{mnXb&>7=)3-+KI$2WJ_X8f`D?{Vk(%hY<)AF^yM0ln{6yFPN-lV=`$wO!-B*B?1{i$A=0_9yjMufE~A|J~=8KOO(hTyS$38LBeZO3^|J0`^ z@AuJ+tK2ZlxQ8b#x8;|s>|VL~qnTf=tntN3$KSlc2A|d@eKhXOe_z56Df{LeKOyrDMNMPHn@&%HZ; zW2e)XoxIP)Z7-emkh$L;zwLClKl_hGKil-w1AjAl#W~+T@#%ZU-1y3gOTDz!!8hG^ z!d`POIs0CRZ}V04k_8`I^!k$@-@9?yUnXt3@$oLxU3ry1&T!Y{->v+>efN)lar%uv z+2%WMy*SxoLruHoMei zE?8mPSH>N-{O|rTIBUE8$4-9w8;`I2=EaXbd&MFPe0AiQ4|ciwjInD>eC3??mbv)g zofmrSfV0OAe*WR2jVDf@bn-s`dHVHRADy>Y^40$QV;=lw@8xfdAG^}^M~(LS?S%DC z82{P@KmYTD<3F!1a@?D@Om><7{QKbVzPb2^M;?F5y%(Oc$AWWz@ZB>nTIAESK3i$m zH|GBEfx%9Do2b_N3 zox3znymR}dSHEk_CZ|8}{ublE|A&RHpZdz*w)*J}vploFv^5skaqsJPJ9W-&=URF8 zDSx?c$G^`y=9=lh@#y-q9`>DoPniE37mYo8!WnG5&Gp`1lfO9rslyjO zZLcFHY&GxU(bG0P;oklJb<^`F|MvC^elYRXZH`^;k~dd>rv1dxyWf5O)W7`n>IeUH z^tE>_f8HbCyW!fSzBu=-9j-WG_4!`8!;#5Z7k+*2DHn8qu=YcHF238Cqc$Es_tL4a zOuT3L=X)Ei`C0!Po6opU>z^xryy~r2-nq{aF7u}w|2FN~&b7yWXUz-Ws-3aYU4Q-j zyZelNWu^-!e|F>^yB#_9U;7;L@f{cLKK9lZR~S9_lV=W`>5=^xd4K13KDy+=Z{G9P zxE&9gzJJ*ovu(TBfd{o`So5`F?*0FFgIjmo=Dp{poHn&s{F;xa-nstw7uY>V7q@=x z($SR0Dc_mrnb}5DCM|z^ee0)JJ#qJ+Ub^gucTQXAurCh2_JL>4Ikfxe^M~DV_b>X} z-!|j2tKa>=YU5`(ZQ56>9lr5BE6-NBfBj>s{r5IJd*w;XuCmz0@4UJ0eWx9;=Q_{s z`Nerxo^|Qi{a$}+zFA!6$MZkm+54(xZ(FCj&2e`eblAB1@_!n>dUowcAD=VF4Udmq zaHd~=d(z9dpE!2PKX?D==d;}K_<4W(=%V|^EV|=AW?JLO2VcBz-P`W_;@JNAx3`vl ze%)Ee{pDYCE&07$+b>_f!|!`Lertng9=-46=YFx?!}qw%ezmP1`PMd{ZvF6Pv;6p~ zYi2m_wx!;Ae5QMzJ?bYj{qPr8_ZOOa{zt$0({Rx_H@)iG%SXRDdivjdbFGgSnA+cP zvvrqWclxt`z0z7c&3MOum%r?t3BP%D%0_>lFztqyR(>c!(1zk0{ne*Dri zPdvD=%Y1kA%=SN6<*7M0I{Nfg_Iq~oHH-6q^tUg!_{(}TUbW0d<9@u+-!?kyf>n>- zYl$mAfBWe>ezxoHzxkW`x0X5L-_LKpz}v50xa;qZ95c^~mt6bs@~6GN_#ZD^X!@~> z?Kbg@r%ylP52vm+(;JIyvCBIfU%C13UfarL_FZ7N?u#3&f54~3H`lKoyvvt2uD$iD z8^68ojX!yM(kjOsw!{ygpMB=T=R17q3ub!ZFJs3(ICb;${^u`?pF7ttCeKsZ_o$C2 z9rbzT$aA*4@1Ey&+UAjOAA0A$Up;>9)4Oi_;~6%;VAG|i%{p$MIeHiGw`~B_Y&*<& z>toaH^S^)C|Alkjxc%7qzp=`@-#zQznVx=b@r~|VZ>j1B@2$SuS7#jk_>r@IHF|cd zb^o>T7a!cW)O3#)^IfpusPp{CliKfm|CvR`{d1E88tZMh+^xm0Pu=2Mw>@yoikIzh z#JO9az51EQRJ$85`d62EdcB#>`s{->)_CNVZ!LGwTkCH4gQ?few)VkG&2h)Pr~GUE z1KvD&t%WB3{E*Wouf5>hkN1mH7FzJ|FCM)0x4Ry`>v!+^{bsX$xYaK6@4w%C*POD- z@Usm^=Nqi(wQ4?jM4rsgK&U%Km(y}vqYt)uSy!?IglINj-MU9$a$=dQcyLTg@d+X9E( z^7!X3-mu~Jf4zOBl`neXnic;v&yVV@r`G*=#j%gvJ?-;57CH03uTJZZJ>i0X&a~Qf zx4ini%PjxB|9)qQHTS)4@yRnSedfK-ZF=(uvtO~?k{hmh-<5mb`iHyrAU=J-&wkgK z$_~rje!-JZ>@zHWJ@=w-?{s$S&CL$k`oWj3?oYXNznxCl@bjsa1=xo2zqTH{Zh zxW+#7KiJytXPf@yt35V3ckyRkX3Kxva?z#h{CBg%PG5S%*Dswq{Z8W-|MTi6JU8jo z_iul#_uEa@n*Qcn{`=g1la_kpd#`SD{!CAv^yUNW{PyO{uN=Sba=oQq-u$~~9=b(i z;`Z~Nx$xgkyy0&X=iX(V1D|~It(|vU?C}{NJ^zUPe|5*iwKiVrSeNc<+gBgBb`$_cWk!6@%MlJ`P^Imb<&UboA|RM zW}j!9JD%A3=eUHoj+Wzm1TaG#C)Akn+ zjDK|1KmYEmPh93Fo3Atb2XkEX%J`FR?oa#IgRAa7;j&+Bzt2(qzklPn17F->bk+Ue zy!MtuH(zy?+oqj5{)ZrJN)P;{^@0gC$78s zm{*s7=jN#wU%Soxzue-hzx~g|BThS|`u#hPKj*pWpZj>`tzSQU{>|54e(j@EI$!_A z=!ki*oc8jjM}0Y4cgc^&y*umED;)5nTRz!q!p94J_nz0T{`2!Iy*$UCgo%@c1wpi;s%e{2lv$L)9(i#gMztTm=ukhrX z>!17n`HwGf#6hh`o;qo>>%RQfMjzkxNbi8pS6^b6-8NX~>`#e!zkm7uTW{&Vvd*n* zZ*$+v`)#`0&P(k2-W&f|>o4Q}^X#u&=A%Vl|I^eHXFKDJ12*Z)@Pe$9$P z=cPM7IsNq4zHsv~mmYP}a(}3P@9bk|op-mxmt1d_FYbE(vL%1{{WmUjnH49$y!84z z9lCyZ!i)3oecQVyoO#8357n2yw^;ayC1<()!s}~CKXv@~r&MkNPZ!E&Rg;p5N!*m*$=F(P}q7 zdcq15m)`D$g%`W&*x5h&=**>8o^8x|i|=#kzRN{SnYiqY+y3K-hwA@5_49?+UgMXq zKfcp^Yt=XJZgj{0RL`8_XGa&i9R9(ir+N#1a>b@s9Cdwj%=|ZQRv*9ncb@p>KknP* z_KCv@d%t_nzx!h^TXC254|w{atAAfxdf|1yIB(CzK6vBI$7cS=`8(ZHd3~lWZg!a+ z+w1?&`kTLd)>)IUSYgZ+m!0ukbDb~FICkvh^=^9bf(^%C{`<x#yH0?fRc-j~=k_Ay@xow-ped@hSmj6WZ}z9&hj-on$>{i#&pB`I)n?oO zzbia`$kw~>@$Ru3O`LU)!CRfZ*B*U)Om*reZ@jX^zkYsOb>>K+YbHjPv9qxZ@seO-n_JkQ8nrXhbk2~noOQ){! z*@Aa1_voBkTsO~B%kMj7pTj==(f%Lq@bM2nJ#(u&$8Es-K5dr|rn~>3$rFxR`TYaW zb(u&1@q?3|y#JWqb!YGPXQ zaOIUxT72S@v%YZCjFWzM#gC4T(Dl#f*IjYv)n5B>?qw!i`|ysJ&GPRjKW>eiX|-i0 zEc?rG^DVOZ)=$2D{^Zm#S4{cppI@!A=XD>gxZ|t;eCq0Dx196WADl2|hbx|V^rr2O+H%%! zF0uDQVfP3$)gE+wPjArnro;BocXw)) zR;$x$x10TDb1-N$+wD=WSM&${>d0jV&7$ZQ&0e!P>{W)1cCSC|7bD+UbO$wG?2d;0 zYSFJ%o3;L^+AXU6VbL138-qc!Qt35EZFktJ4r-&;sNbn{n(bQAZPW(cX0_2^l-VZGX^)Qe%K)*2SWQLk1XRI9a4d)V)H zI*rz_)~eR~jn2Sb^}D@#Z8#cq+r81K*DSijcDLJ=}f9CU}3K|OoZw2Zyhu-}cMWh&Kft7|WM z6=Bfp3xh_Z=(PN6x7}zqIz_w4n$`NXO0!mJ+j9HS8d#T6e^_f)dp)tx@tD#=$n=Kp z-iq`qy_%nGHG92bp2k8}hRvZ5t2X9mSb{-@b$bQ%J%UTbwL-F|1( z6Myw)qn{Y-wL7DJZ_pq4;a00P8p)rc-K&op!@;oK8Mb>KJx^2V3`d<-t1tcPRa@p- z!`7%e>bmP%!|L>H#%NG)6^%xWIXv3moL*z9+^jiMrcU8Ybu`l7!#>baBlz-`sL^_Dwrv_)k{8rDYL zT6?4*wTC{dTQsWHrP6DRDnh#6s0>Dxk*eW?I!%=%DP z)RZO9S{0MUP;$4rtxm7fmMeo{r`<2Q^`SLs3~QZcr`NIdwQ4TX6r=U}&~9|a?6B3W z3@f#&@9apJGH(jSaoB7XcB4N~`1*C_rBksQBf&ec9lctk(^ij$qfx8TtrVSpHM`j9 zRYx_S(yDok_Mjy(n?nd`UYE{`2TJ>hNRamrsbJR>kR;oTn;jqZf+Ne>fH~S-( z?pT*53T?CBZqz%4E$NEbL8VjJqC8u_A@i#qw>A{4!&bd6b=1i`O{3Cj z^@WRl?+q%s?Ye?CtaKH@s;cCM6tq^mHSCQnWT!6r-1DH`A9kw6sNIvtva8jz6YYUb z?{@n|f<}3&>kGqPD|^!@dYw_F(jBUJZOf$Uw1Mfz%6$8yyvnf4mBk#^_ zHrw4seHMz_#{-S|{7rX*Okjzw4`Qmr=!KM5WL>icyjK?NlvK$6}5~ zN^-ZZbX8?lr`HlFt$N!h_gn4SU{LE;(?gni(a2WI6k$;4A;XTlv!~ToLpQAVTt)?# zT>ZWpB)w|tY;`DO{73((w~L{oCP-^);BZu}c9qbZVpEEhDz0ITh6|q z*AqGwiQF40K2;$zEJL6n;D@7DuTkr_t0{Tf=&;}GR8-Qwjn^tgT~i|;cDtIj#?Y>- zzW%1Dxw}*rMO>N-XI-(=?(`boQ7rdbZB?u?%AyTZ_S-ef)@kYX62d3+J-DA43=7F_ z+51IHm-VqD#j7>y+4z2IB&Ulkqjqg$bcl9aUv!79#;`qVG>6K!t~AVL>gu-EuJb9? z)lRL^YinmJV!I{D`<7aD5FL$vyWXk_pN=-!ANA|%0E|&r0V_)KP;d(PTCb^t>pVJt z*G6^w&2Bf5W}|zO%YF?T{--)<*gLJEQPk@ibw|{-{Zw;MYYAkcgVyVk$>#SRLx> z1u!uxQhr9tmRJNBAcK^9pE|7TVD(Bd5`~^i>`8PPDivL((rhYM_Omh^$gSp}s$|+2 zUDQJLG$8>$s27z+y#QE;DGg+@h_9sH|lPAbPcyJTE+SQ5#s?YBwM1Qk7A)(`{Ek zwq{3Nki>-wR-2|ms?rq)or>qH)->d9E~8LOwn4QCZdlBs)`CfERgk7x zu{Sz<(b8cciy`PUESk_t$8I&n+AvYp9$3X{$9H!D+J-XJtkhbgiiadI}3N$XIT&89xl6Qp`2B>&u6$}Iiq#L@wc!tg@s%W*|ZNS)VTio!SMW;~+FaV@l z6(x@^qErW-v`}SoFGCYRg=Ja?;IT zHRPe(X)A_qyrv04KA3>6S*>{7R;^xbG&^c@rR8s7dzS%p+>lz70JLBw3vKN>P=Br0(-Ims5JIW>*>=BPmQJ~zq=&2| zWDC;mG@4>bm3Hx3QncRfb-^Eepx*9Fk~(xc=(Oq`A2yN>F4*b#w^0zK`p|L#KUo2E z@7cg_b?=oFzre7Hib^qb8(X-C&fQ+tX8jdO;9txqit5l7CO9&1d zzPCMWYBS0kUQ?3;BWW+3<$S~DbQH**a9p9V8E_OQFSPPEbV9*N|5S^ zJb_zj1j>?K8HEVp1EZobOcnE(Dz+w29TXOBkhIX`t6+zY6+D|rvnUma)y`_+!%-K+ zQId6#zTAdB6~z==3{UN?7Gkb7(8Y(~wE8w|Hkz%96{>?>ojyPZ;#G&XvE9|PyP#CB zyi9jA0wQZ@CCDQTG_4LsI1omOvQ}eM>&U2L*l2=_UBL+G0%k~JJB+mhVYj+HsaCa2 zs0R*OX;=D+u+Qje;JuVi*Hb@KpYn0lA_=A7_q)RpBo55N9ni9bRpAh`9j_Lw&az0ELIPYv5%? zAoiPLSPjtgbQuUlAn771NrJBf54H47Rj&p13(!Uz?h_3GTTrNCgFw&^RQozejXSG6 z7RqHRntH2SlVNV9IaJ8nf>zD8uS1Jf8EM)|mENpf(;kWv-b(q3=F%Dtt8TMvm7vX< zOz#5eqh8k!Sin&Jb}?|7kVW6wE~;*o=FqC9qo|ZhR~?#7eFDh=Y#`VYU@uVv@@K68 zk^(}jkv0a983wBgB+Re@ZDwj3tPdN*;!1a*)xe?;kiSq$Sw%W! zv27dqEH_PJ>UdQ8n1;}(0WDBN9qM(NPPKvw(LJO>(E(6;!!*NwQKh*l?h>7zWjW|6 zHAMD1c5fs2Qa4fUB$S8t4X)J{O_Gc>M>&L*%lQQ{LoV^fs? z+SX7(1a4X=2D0F0Au^1sNkzoIY7{!pCFN*+9ny=(kuCjxZ7A}A>z)uCq!A1RBjS`Y z4!a1Y=t)CQLq_mnaIBhVTZA>_hDHDjo*+fs4Y>|21V)5$Y*SNTj^5J)G6gp@1JRYJ zlkj1!l`N6i?H08*bWg6RRe?^>40;u5w!h!$)oRkZWbAan8r=%bP#iUZx?pXM#QuQF zlQ0QLw%Ji9R4GrB`&2Gr2E{E}OA|&7WCCs^u5#NHv?K6CEQJ#y@@ln*qE@<>c1>QP zfMRAu7DKkc8~6y()|!d}iU=d{*`exMP8E394s;%3Ut9muggT&Gub`G_QEquu!LtY1 z79a_}bw4m$g9;Fyqh29WQA>uPZXen2QAUbpbOEAc*G*z(fu!^s`qIcA%4=yrwE|F7 z%uY3;q1};^1w^h6MD!vD)v1>w(TIOYO=O7jgc0?1s21W1nx+a22oDj9g3v%=Yla3i zD4G(Q?4etRJ2eTg!c83_90h2)c>p>Fo)1pYlW?Kh*V&0 z`l1J-HKFKHgOsTxK8Y3aZ-P{S6$Ox*N!D(4Jw^}f)>BpIw(QNKg9|EJw~2C5WatY} zR|`bX(;%=iPve!^@CWP?u;Jr_uAlEXjG&aNrsyCpfOlVuk@KnXMLou112OQvYJ9)(>r?P zDMTn5O+`xf>m!ZHrrRX7JP23)vFM#)DmST+W*THu?g2ssXo$n+oGJED`j|45Z7Zw| z$->k^epIkGC`C*YY!mWUy&8RgR5Iz3FOqPo;IoxCq|jcXa*k|S!{o>Wg4{+D*NbFk zur!pfm8Q+QR>Mf7RU={3FfnXx${fb;;?k_~^*&kD({y2H#Z)m;^8>L04s46IGs#M5-vp( z+``wP709bA4s4c>h0zG)1Upp&_^h zL-gTm9U5oUo&`kOV0OZo$RDbf4yR$*Z?6;L=TtlkCO zlA|h#SHwwsk?~1S#ESwcvDLlwF|Zhh?JKsUs;-D^)!bW;G$La?G%yT75^#F7SFmuH zHdqJ*x;@;fmbzhzqv@rYi*>D4($>UIg-oQ{*j|AQ++&E4VAUYVD%HYylwws)RA|}~ z%48BeZD`tntmqUBwVvn1K*^FuqLd{PIh>`zSIWzf-L>loA&L>$tPwXPgsc{Qpi$@q ztmPt7M|zWoGgI;pWsOR%gev9szSU7fN#xcpS}4LrT-95KC5G1Os2Uy`f`onyF`58d z#k$s&A;p(yuZtp$8HSKFD|BB{Yi54#PI5*bHD?HUqnk?$x|_nT=#eJqc+IpmaEnsj zR1@GXjKF$ZfF7XQOuFG{)F~;igsWN3{)=#l}5wfRsife~?0 zs0>ai$T$y-6%x><(*dTYhqBBRHBAXD#9N~#r*v8?a*gOOjNTAFwLXIsanvw2P)Z4t zsXcXk0Q$lNv_$KsyBJOz;uc00Lbd1_%9>dn0C*ltcxLlhK?y zDjz_=?2WKe^Qb&9VpCNG?=xZ1N^61>3&*dFYN};PCpx2$c$gd$&-AGDCFB`mCituE z=3(Fr9M-lQ*`sDJ#irh*4YN65JN5{i$grQR6pcoh>c)mQ7NbX9)q724g5eq==aQUC zlo<`wA;T~l)LQ{@$0Ip|)5}D{w#(Q#?T^HdMJY8vAc4;Gw5rpXXz3(v-5-25jBvX~ z!TlHsdYFg717StH1Yb5OglW7ZimIkU%!?{Uln1z7#@JS=@GXjyl4ODCKUFo!K#^pf z$rq)R(12JNJ{o+8{tUnfy!y7Oo4Z7h%%j@+SSGGrWC#K=8KeYIU&B2WVCBf5tcW#3 zj0q5WT-T7poZgn#RDZE2%7rvCpN`yzk_^P8rbMuFhmTbl+9g(5G7SsI8*QHV`M#|YC7yDZgZ`A=DmH$lJW69GA-x^jaa zA*2)vftG+rKQ!9NX2H-VWy4f+Lz$|Tgl(a+Qdk>lBz8U1RcuE}>dD|iR|CZCN&bMA zJc=j z-b$B!^$E6I7s5R-Y%Nm7aKW-Mi%}Cz@V9Oj1chsAxMD?>9FcuJ4X)~MEWc7~3ZnvX z10V?v0=%t6O(W!bkZ9*VE z9yJ!IMEPmU;CiEhr$!924p3c|$N&Hrhs>2S*b)yjcTx-?JgemIiE-3{zHkGeNy)KK za3PtL^wQSg%7zjkP~bUIK(AgIM)|-z@poC{h75SMO#*Hok48vSWsQ6wSt6=^07)Xj z6cDxIB<%vG$fOEObdc-~@>j!?Ad`$83~{BXl&0k|AjQuVDMI1Vf>1RYmeC;WY}`ZA z7x@~Ff=({5oeHN+vv z2Y?6r2AzTamvJF(CQWcNyxG`L81^k|1U^+$p%f08u6r&s8g)V0%HDVo>#HJnh}(9d zqe>9x05br|@SRIYpr^JVs2a1@V4scg4C{3ZaXZvTNu}~#?N>7F4f2{Pc&1?lygUt* zOTj=F^&B^>#Zj+S#sYDKOK`_je1#NXv>VaVjME7c>V~|>EX;(-Y1kqKfO7!`!bx?` zs8nJOMRyt4$`99CoF@2{YOE4~XMhP?fy%ET=;OFrr=59@764W8(kapg_J(+xKk5t8 zVAqzAjPG9KL>yMMY*cEPCY`Ad{aaNtO=B+W8$?DFha&7Xj$)M+)q&nD)-7iWQ?1Nn z<2;?Xsc@QuiFlZp9v|BW2nv8@;6$Y>8-dAqhG?jeG5}5sX|^7sr-@`lI9UsLYHT)* z5k*c!{*@%rt*{U=t))OH7C?`sa}d3hXzQ&e8&i-x%0;uU9X^kNS%qqf1?ZT%rVd2R zP!?27DAS05enw%@D+xs!V;2z7*^~w{qO1~#T1i_|!gDAEbsf-O*VTZoLL^2*{$(uu@& zgbX?yZ6%9Fp2YkxRjZFOw3_)O;t6FE^g#jx-r*%48l5SwG!qp?6fCtjl8ePtOE1Ub zdK8c!lLR>3ux_dl-kpG^@k>vj1A;Lxg{{=v_#Fn(M0p)z0_4E7Bia%@So9Ke)oGSg z%#AnzFvF#>X8}uQTnGpsi#ib~>|V2If>X-G+NKy`oGdmvji@*FBND~oX?BPOLZJqB zYL042$jH{xsXZLgf>O<*VF*P>HCz?~w86IT;Fm+qT_#CCs?p?up6Hou2I?JDYi`x> z#JVLx4S{P76K~c>%_FnItXi3%pe7l$`oUB;ZPPzIRE96WER7JCav80djfYShMNy}< zlUB%dnlT0@Qnad(PC32wZ8J};kzPj^OA5CURRbyPHqw+r3$SDw#dZ)yPNNl&rPhhv zkQy@g)%qm`IO{%5K;d_*3A-E8m|D6RnhY))s1ooX5HKeb1CLyl2Xtz+U4fEyfgC^? zNEP5aD3x1m6+`4zl?EC*4;V26I+1pQh^q?r2kPK-jk=aha|^FnVFu+37rgSH%K!Aq%BZ% zh;vA2dtlat@np;{tNd_0$|@OU$KL3pv^7HW(5!&CFIQ7N_0qPwky0!^P}ahXAJ5mq z>A`s*wgu6y)QeD%Os(O@;bbw6Y8xR%?uiRA=+k9%O=Us=%S-A>4a_?f6A>14$!CnzG44u(t z{JE%vMj67!l^BxH(}Yv+DAMtQyL%8&cZ@A9KD8t{ilCnJKWO-pjpzlp>m--_?H1r@+uRB2l=wLmV1%Hb=-m7qoZVWy!D@j%=~On( zRcS_ybd+%WZ4^k)HYpL9qz}r8SzVQJjeiln<%I1UV(Ec6Dzu%aDb4PYECm~g7>9r~ zU_s+Z;5Fj*3d3j6^@jHH8s#>qMceM zdn}mc2|IL8{I~RLynxqTfznJ-j(`-jHtxE3@oMaFL?u)o*pKD5LI;bP5A%*07@5nF z2Ei6AAnaIwM1ko&1G()FO{{WLBQ46DMT<94xWy}o2;&vJX^gg1uM~5DrinCX>@(ty zOItBFQ3LT{xvXNB;Fc|4@j=r_3dZJ|gv~(lwIALmKnF}Eb}3G3I<0_3gOtj3@9r5A zhCJr{q!NJEqY6^Zn*=Zu!Ahno?z&!fPZ=L2nQA48wO%Be{1cr9?iRQ%z~}<{Q6TwN zqs(Gsuw5N}CF^MYkgwuxgn`(`9;957Lzqf!w#i00la|;X#4hXVRhYJ)Dkbb|BvO}V zTh^FjanO0xOPNXb)4n7a>mS{)6fuEfb8JX}zXqp{e-L;kCGxbSNyBM<^X`5nKb$72 z)PPel8Ji8nl`d>kOmHJ45pybQp+-y;_w1W;usY? zU@E1Bn|+kM;YLFQoiA~F2EMun8lp@>ed-)3ny>|_4>RZhUX=w&&qVDZlka3B_0SH2 zeo>Xso|ro5V$y*CZ>Ck`GC;S~(wW^0JBhXiC&K*L4;KRMMfzKJ9axYyttnKfqL?Zp zVhSAxaLNyA8{dN3=5I$)_T43~7-(2pMl_zXZV#Xub9dq}r!UkbvCIdpUIg;UHSzMs z5&-e|6lzyV2~3NIBE@u`_NxtL@&dQ8hG71l24?Rh1b}2AZc9uTnMl}_h(HnD1bkpx zoPBTt?qaOf=eIE2ca|Z-U6v7uFc1V`tX&M1dNCCfNVofBTz%ZAK8uz*Or2E$VMRQH zpgsZ%0hlwiv3pLTuDzx`f+U4y*T=|EsjW!^27$ATX0zQ!p z;4SoB<=ACXoopFtSBNS)>`QhvZZ$ud)>4ZyX-pl^0hy4m(1cix(tcI39EbuBJOVUL z>5XiX?@)ALeENN9=izAxX*7)UQP_4BHaAK4_!$W3cSg=CQJzdz7*!!;D!DWzWEw`b z6f%23MY6#01=R(G)`K#o582D29D30-GHwGGsB=2z!09S~>9#3T&E#^Q!b*H zBRy%B%6tY%nw&4#>{>FOs7t-akmL~KkhLbg!akj7VTxlyKzi~?Xw7D5pof8FYAdwQ zNdsy?w18OPbcxi<(-MLUr64=oWBn_q95)3p1f$zUq(Y<>P{FI>lQttM zX)GizsbxHxqj%I5%Vcz0jV(b{F{uOoIFKWqxFdso%1`J6Xkx($@oZfhw{RgZA-pP& zk-E92f@jhg)R{f-NL|u}_*0fm12h(Wb`&O^DH;ikVkVsGTp`$!q;Y~tnPH1{)fjnX z7jy)qGy8;wLK(DQU`4_j_W}NcOgbzH0f2@`N(+L6ZwZ(#gNW*c@Qci*{lek)25b}< z%Bc8ZW^>mbhfE_J1!WE-m^sGLL%^esMPqT|jRjr75CQgsl*ZnV11LA4Q7R+5pf>F} zL~Ssalt5)NJ)r}r5)#IU7}-xU*a;ayFq4we5BLmV6UZ0Tl(M8zfrEs=rty(;;+B!t zp=)5ch7nk$C7_Oy6VZy872|ReKR8J|h5lAcqvbmrDcRWzh~8vo9oNI#Udc&%j0B9V z=_;TVIk!wtQG;_d4G6nd(jV#~7ctO;#Pf}&CeWHz6yqan+k@WO&@VR6AGJcm3H zlOVDeg)cUCghTzXASjweiwo2Zqyno3u{C2GY!m_)AXlylGozfECDL?(f((=lR@wI` z;^NJOC;MchpdqoQUYHu&7C1-Dp(&*q76?+A;Y{ER-!jV)Q3SKp8}eM=gqi`~F%QKW z<3>I7e3i?l5jUJf(nUyKU`#-0R1L*V?SfK4%P2G5vI^&oC`L8*^*B8#S;Dd${h=H;ep zP&nkL#fJ+Gxq3q9AvKoJgQm-H33!zaS zS0@b7Fb7ZyDyCd&eXL5{3FT33JJ0&zkfNuN69#X9xswlYFvlZ6i<5HP2`LZ{`{6xL zE0;PnXYC-61NjClcpuOeh5%T?)D5>lXlVoP7z859h4jWIMboq$lycBl$kE)8Xe0NK z{6Vx&jn6}1qSPsY??%e`4T;e+#P4m7^B+c3(2$!cb0#W&P-0F9!V~xr!itIO>46l4 zFKJC%_gxG&YNKgcgo6YD{TG9UW=mbbELai6VeO~VDCTs2V+avRtd@TMUT|lq5!XPn zN3SLG7;V!CH6EqYG%OlpOsCF#MqiBw4E3_7^3_)=m1hAiih)dIhYQ@8Eut1boNMv zbC@M)54PgF;!(mp0kD`vlp>0&gBj=|mxpFc2URl_5&i@)m}8=D3U4KGi%<>i7QifV zjtL0N?rC&X^+Dsv!kL2$??jA;N;Cs9CTM(aKp35Y*ad|se$>zGBV37W%2fUx>Jy1W zVye_Rs2Vt@ec6M6TT8E@z~1_s1HsBeOeutiG<)bQ+#aAZh=OUUngT%8QyppN&;zpS z!ys;JPF%+AWv=Bj;?7fx>3Tdjaa)_5#20V(5)_#|Qg>Vi(i9-kFXSL#V^RV%rH00T z8R|{X2RA59ghB~21@JK-skILAInC)ZmWk3CkF+q- za59iL!HWE%BJq#tBOqFwLsS_v49+kxfbHGR$7TS^8wlAO@^31KFajn6ZVX4@HY$nB z0Jb!Fk`Erpvo0`vo06fuVmoeE+iJ&?KRCzk3^XWAyN2Zs3iR- zYGdYDN#|TfonX`>ce^3CO^yx;r8t*dF=lfjB>1oLD?8+1iE11~EkXSG)SwRq2Xv*d zkuJb8Akvgi1Beq12ZfE$1BGT~WKnk~Kp&q+c=)JJ+c6~2@BLes^C98ZeI zN@&j%3LP4&GicyERmANQ*+${Ois{fQxBj3J=$eT%@ouAZUHsoeN*U!1&~SoIVE|yG zf>SYLtk4P#M1(UPA=+p8wuzh_CR!_dIqa^T<|tzh6vzmvV;6IoINPBft{j_0yoyhj z|AnuZYA!_SH?UO977m*U1=*tbF>nR!LQ-O$=AfN(=V3O`SnkGkPzlJIX3$E7#=+x& ziu;FJK)jVQ^d3Fg00cC^3e;qX7^z0Il5_BY>{RIB-q84{);`?_nL<&kh)mL)%n(14 zS{h`d{MDaw?g@nE3>>^7wbV~_ z*OTOQS~Nzl5^y{g?o6fA69E!xw>DB0yE!P1KoXPG1>u-!rG4kR)G&;=RDboqn(hgQ z!F^c)_>dpzYh6pxCf1pGp}p~7<$Y8?hE$KyaLfMU}S z39eXUly<5BtED3y;r=p-#1uChJg)8x)BPO;WZeYRh}Y?mv=a*tUN}?+*<>Py*5vd7 zS|}rAjgIcv0!AT;Z`S!bs?_*I03Qw^c&Su&S*vw%-kN)4$u*8Y5#V|Sp0wkre50DMM%&}HWi6&`Waw3aFyQEpE4m#L} z1!hVZ!)hGR(t#^fjh5hyLS{tD$|R|7~We!Sj_B1mu?W8o&3vDM;;CI&<=xMPN5*O>=^Mef&O4}D8>WO;mcEzI*HEX zWAeEPaO3Vc?t}-ChoIm@m!1k<`l=%>E)&FMP)A_S8A07mmC}&oSW}BhkReWlS&D&s zk%G#Sh>aG+$t;Dbhpcr0JVGXJhs-W$;h-p_(&MO)4Ci)dh=Svb9&>M70ougME5YQ13N`rQj&|oxaIID~__{Lb<%tazAyrg0c z&v-sruD4s zIfC99U6e(*qHP%_n;J3#nF@I?h?xmeV#P(auJ5@FHgpu(M@-O2EU(ik781xT-#sLS zV@)aN=jEcs7f61{CkfV=%##S_RY8(wp+@a?Rx6@I>H%@qI7t4BFbZ#-ekt$!x>qU<-oquqI_yA(SMYqs4514 z!Y)gg!BE~Uq*jPm8DJP-E8TNSr}Z*Sk7qSvj+vy{g)_j&ip?@k!oan~oYgbwR~)!8 z^TsFv<}(B&j6i^)@etM`Ir$uOeWIdPWrqR9uNy%$M)~Qi4(T?%nJT8!@z#tfQ~NZr}-bk*O?U3Wz6*62bnTdwTnYDYvq(OKP}O z4)TiAoT<9v)c^%?%SjyGRc;4Mr7Re?c}}J;{1~XEsaecOF3PR#&~aT$aKAnd9MJIpV`) zl|REt(bMv(G7SRmUZ0A9R;;^zB6@R8GYS&DFx9Oc=?hs&Pel2*l5v*t1eIpjrlVb}IL* z6VPl$Cty<5k#tt55-iP2cgm?sppv~2gq|kvHL*OK6P=Wlw$_ThiaW{~Y2mpveD0|9RzKznP;U#@k<(14Puap+dOX&oW zm6FV06*b4TOkSeLVQykQ_z}I$&??Sudxo|p_O(2f)D57ldAW=Jp%S`G<~Y<)fx~Ad z^Q?|V$MBO$p$1Yf=Qhv<9RhekU+5h2C*vyONlwrKsFn`zaF3=9#=j2eBQP*S-aX|q zh5(APu$7@in%E}jz}${1J{YfI!N-cY7R}g__vSDd;*DlAOkZ(gvyLkXXafOFA6GY= zM2>AVQ{y;4Qw3Bj>OOC$S%S^reOGxz54QzGOOr!QwV|Bu*XzB{O%UfbadCX6lLOC2 z?s4QQyrQB<$;yjK@_rcNok3kWUNw}@!CdVado)m{1V)P1*E1?2&Bg3VHt`=LChS2? zG3C_?J-K@{bxkKEqVNkOPGvc0g(ic?FsrvCq!H*Sj`M4>LRVyNca zzn+mUuY@vxqF*4SCk)Sll~5K0KaPQ@x-lkOP{j!s03-=SSu_z7&4!&FL?-9tsGP}3 z+)SlKQ#%41fRP+YiSC3T49EnatPvkB%O9HaBzDuMva{$F4Cr~?8?26$^M>SU)HChZ z$_$KHJWi}IL$k`(n2(OiY-q&c?Rr|>Y>=0zt0#pH`& zH-qhHR3>nQ;-o%w=V@r7*k?RC<+&Y!65ojL5l!+Frl17klJ1w(Yb?jb#GFXe6|-?B z=rr+Tlf%b&l*<@s$nhwrd6y1_N*(plN@-3iC}T!f%+l1oE>T}#6i^kA2j9? z*VIBi)YItIQq5)Tk-m(U2D70S5XAcPSS0{|UqB01#dn*zGOKY1|U8i%g@3 zxe(4CIDrx5kVXo)V~^du=QV1-kWqfI($wCB048nJ3k=}(=s(XUET#pRl$tt|(%eFt zE97ErRK4^DB1d!_@>Jg9Hi(Cs9KIS3cbt3^g{Wk{DNO~9LL(KUt|JxASRU@AIa3$$W&K0SJN-3^vafYqmTM_CrG`2 zQXR-rVy+o9#jT39>${^=0Tc3=q+%R_>R3np1A9>tR77?O%Wu_mG#lt-v_fw}93zJK zH)`SgWp%?>oCP2jz^kwg<(76uQE|T++q;ZT=!39gR9Wrg5a6$*-{#bFIdmjUqn)va z!dY4$EP*Xvl_(Ohf9T?iyj4^Iqpum2fc)s=N&F9iM7yTog-_W^xZNnh_K^ z2z<}^I-%u7R>W77mUOHqo0XTtJH-qV4i+)0$r;qV?anwug3vZI4i51!d*Z?3?BpjR zr$R_VhTs$DCbAbH!)a0C9b71Fbqq5>C{$r{$jvKKyiU#REc7s;;4;G6K!}+H+yWh)G=jaeX!eZHk@teWpVNL$U-0I^Nio3!Qh7T|)Ked> zEg~EclCig=P|!I&DQ6jSTu}$0t5b&MU+I34wO|>Jl)W(~M*HP_o|6J#GGgMYMCJ&c z?vZn3qErk+4O9zVMF<(<0=cX{Hj&z|e`+K~TH!kcM$}ci=D?wM;|nfReiab{txHWS zhT@#&gZv!+Bdh_?s$2}64)xQToz*dtRY!7aKq%)8-?Uu01;_yy3OOZJ(gilM|B?u@ zH&7vF5ywAN#vh}^8%*na%Ab77QD!zX?tR=R{Fv0iF`3Hb*_H3qC(4SM0+$rIkF%8& zrbPiokUrX&NTa1{8MuO}h`EK)y)G&;RaZ;_zcGuB-^27KZy>Naq)lF&`2Xdh2lv`i z^$7LV6!k2WYj+35L7SkAJ1RXZ2 zgh;VXj@^dNLB}+qpof46gASv8^`P_za08WZyN>gWKJ3%w6bp!s17b!lGdd$7y|PR1(nci&zRtE7Oj3qvqZU zn8CgZOWIU0W7myHrH{sb$d{{x#RL~OgjZsKk2sxV*OR4OtZxyvAzPd(NAnT_dDE9~ zGQXC#nUbMI5-lo4PXGE=3lo16_yHs@Zl!ouIRFsF>Z* zHgkG`H0#&`vQ}D*To2nVcYpcRZ*%3Ue9- zc}|i-N;Fh%@p3dKV|DT_+Hbe{EX$#nVUT4r7c*Uih~WiI#ARW_SOpAE0XdUV+Gak- zTMr{SrvO86?gQ7bv~hX^lVAi0VWO>v`YDiCaFvTj^MI+DJ~U+37CWzU2~>HeCr75y ztf`Ig8`O_4lg?or_BvA+7`mgixF&GOs@x{6o0Nx{m?XJ~TE|yl(G2|rQ{EN2n;CKT?x+&17RwwFYHwuA)D!r)^}IxH_fNXT`tFR~GO0O;W> zYGgjM+#8SX>|`3eBF-U_cdiLLwNs7()tZ<$bLMC`j%5=KZJg;cT9qAKmkK9_l|~+( zk`W*+LrEe}2s`s?L6t@GmH>xzBSL3H5wDLr&^nV+eff@u^T z->?Qwju#$uqZ|puuiR($CAh_uP4g>XW59Po%=Qu*>W_SMW`JTUc*GL#pb;w!Aq67% zP|QlRPmWM&)3MRg3{B2~P;M+hD^<|T;-FVO2`_eF$B3b9($b1zYM~524a{_^*v$2K z#@U>Qi!okvgC2}PpkSj;${Fa9*?}0#+}Q-qf!YvGyOXzrGRvt|)C{e@bP)3glKnw$ zpRC#Q`1oel6enJsVhN#^X+l7(nN6h(V2xaHIdlvA(00p74*f_5;*adAh$2bMB!Rh$ z##T+gVQeHXVsoaSCXeJeEP@^l=L=z+r6o+({UL1+C}bCpjXKV&mT3^6Nf6&r;oCl-6Pw9E7~`N2fXO5usjc~&X6}1nj(9x zu@8@@1Fz9BWt?@&Ipr=Q#^@%Oa6uIdG)oEMOyJU>%F=T4k_aR&Y|TgzvJ40oFJx{Z zON2!meSqAvNU1FHuD5xFMmT2Vg?>|92h zH&e2s)RdGNPn$Otc)ZNe;bCg5J%zMw54@bSIYuHm)k`c0JFtO>LKtIaz?@ofO5p1e zz|>;7=rVa1h<=aqzU%lz%O!Q`7Z4O0B&DXCD z5}f$049;Ok2UaLY^g?Hx5NNX%STz!-%j88E(25T+kIvY__vM9LK8?7_)0!jh<#0P& zE6P0_5+>#SBd5LdT0gmDFX#l|3wB}0xbBfgV_^g=r86o=j>vjl4KY;175-6d?23$~ zvt;6=>cMfIlxfZ=0?Wu@K$sIKfE$>>(rBO*fW^6gN1e!nhW+XZDqS8ZOL53dSd5j{ zY|YD?xyV?sm<*%gBM-tbZNPw*|1AP$j^R^KA`1dUQA0y6tuS9Dvlx;~0w6}<#pR35 zGDMGjNrASy#&%>I$rJstESLkU2C(GST}Ij@kO*c-KJk&+0MllN zhya=QOq=dwdukE_tn{`M0_X;%&oG70(4!+JIZbPAbq6A;B(g3D5khN&h-=`rs-i$d zla}ui4-5t2%rzk>V@l1(@Hip{qmD6oz^!>K4+2?FoayhReX@&zw-^27@JPS~Vg)i8 zHR`i5T#I}znW(g)0`%t_!7&UKAPR6%18@lu>4uGRMS;tpjOjrXC4uaGaOjqf=_OQd z8c9m2HJ7DgnX)_EWELXxpcV~;;un7h=pcS#3378ewy6+vy_p+JHXe)go3fJvpS<_hQAAe`N+RqUE%^>Z zF=wabSYAki&__tlo9Hn|EzizCR4zlx75Y>kJyIPskOqg`~lqZ=8 zHslnLS(8ee7gqqY#32=nr;3%AaF@5KMJdT=dj482L+R6ylnMzK6$}AXTFHE-52hBf zUi*n91=7)zu`>e+Hcrb57V7BAju#;i5_1Sxo6s%jh0sz6f+hdj3F&}>ZYmQQY+`jq z(8vU6Wmqdnh-fG=j?K$}G}k!F4W0=n#<}>sFJRd+4&LHY#Oi0yi9@+?IKuMkLr=~FD zVyM!@T0?d@g#(!4O=K7%ip>VInCu-oL}Zbz615sXQOnA_0Y(8E<;-`OGsNkGHLH*G z7o8!}1-1lTnajw#WK$030S`p>yys5Kq|o8LlzKsChj|NO!kGQb-(gY>7647qd5l2y zEN$XBev(4e=51C=n@1sqNBj2z1*R;`)!-!c+(5^vF#xAj=D}6+FQAck5Aqd9 zig6jks;YCwf9B^}E^_ds@{d&j_Q zMy|V*$6OSbk*-N3hL~_rTa}=583BiVI0TnBPq-Drwpeu;UB&d( znQ}sA(h)}_MClzqn@Lrg8VrNb7G5#HondDng{Oy%pBH3-9~RF3dtO9GGl)GlE*f}= z=jZF12C`^KBt6W`Sq(T_TL&)O7}P-I($k2#q^SsKI*Aoj5W*{|L0AdD23cHmqCh5K zKE%FCe^aVB=#`}GO&Gbf(AVv@m|_X`XhtzPBtpPXc;upd!V|B4$bApMWt)DAQ8|GBJDNafZuDSq zmb2aeXTWcrX%Lnfi$rtiM)V>!1#6Wt9Z|)(eMn3(ne;eluVr;SdM1l^DK; ziE3gnnZm_@L3e}x2q;A6v{Ld!^nLtII+zBE&AghQst8guua^@K6j2}Y`hIp5BQyWZ zh|4UMr6FuFgT!om6yNM>&i63A(govGMSe-BEDUH<-shtb(9?24HAkBWqUN|B}q!~kG9Ux@8W%Z%t`xQHb)meyhlhG@!JnAjscwIZV{x(dO8 za$>{N9%9+H%1$fW_tI_0Ap&b4#ZG)W38Q84F{s#35|o}%Ug_$$28k@KLS|5RWTPG% zcYY53f;gEuJ1mwrNQLTO@suKh66@`Zjg9h(u*xR`6k$vTx;mffj1H~pnkTt(N?RJbo|zC3CGNxVS!avo zkCQ%e3d2jdj^Kpx8RmRF^JbwCERU%0g~iwj=D=i%x}qk?^-nP;jMRo00xX+mnnhEd zvQ1LVhvpUPkc?{p=n*(^6tyb{f`O7?(s=grI%30g`jR{$sIh#o6dEBJyFtDg2oVWj za(q0$HXy}@^Z)C4PhN|sO*%T1*FNZE3|i{A`G-K~vsJYyA~~Y%jue+DUipQi8PDcT zCz>+XfpP%K=}&$XG0?!YM3>2V5Uozk#5*m2Ff^UOqEplaU3nV@6b7#`AnG@ERr*AO z)?s+16vIGc_{V9w9PEI+VuZq_I5G0~c^dngQzQzk!6d_`niQ4^>k391S(Ns?4lclq z+7Zu1tUmz?;Nr&+CHKXUHS=WgnEIrg<%g@9g9<3>S2EpJ89! z4)?KvGJ#8Q8!15lIo$;BB@@Ejoc@ioI22St%?Q9mKht;zM{@Q&>p}4@=RjJaaRyIIhEdspXiI5XV?HC@>l&CC%4GA^aS4oN}xWW#Ho#A68@@u`O7#yECgLTl72BAoWsF0tjJeU|%2&2}>q5vaBqD?u z!k-MH0;9_C$Sj}NRfLj{%bFGo zoGldS>SG+B+*5RKg_EQuD^!>?9I;3`2YFI76it(F!H>n{?61|Y)1fWhb7lys0swR*CoJGqP&@BS)9i2*<8wKthiUQ| z@;NJpcA{0R8TM5MIuPiUa4uu|mdx_RerJNGXBTp~iHAoyj%F0SRSN*eB5<=F3LQ{A z(PboI9uyH&aGE3tObp#em~*hK#!zUxoTN&mS&f|2R6#X(gfVaEfMCVBL+fUzJj!`3 zTz)|sdD09BuYztgA_}W>XBp%Hltg%~1A-+j;bYO&MH~w-5SFJgT{PQKxA8U#uglVO zNOAOif*@TZqu-42kPn8=913Haay<^B6njtMO={E}6ZU95+CyxEL@%R9wHg@B)3_sW zE;64!VW}vh6iE|$ohJt!O;)s&6aug&9%nU1H}P-&6_w#8Kuj@a<=wo6+Dyl9{x5L|z zx~J985Xq=GbZG*M%jAs{4&F*bGw!SpsAi}~%ZhMEj&e+bH`Nbj;%vyJER-*<0V>gq zoEdV&FjQUX^GLJ~cMkX(*Te8Wv7AYpaS^Uro}k}&C@hbH7I#2CO_=0mHNb>?gg+?h znieh-<4{cbc@v7EZGJC?H(6L2qYcnM8&7q$_=Ma9v{>*}cWoenxY`}#anb0+lnEOr zAO+ObNd&qfgkezxg$^jI#G3iMKTx3}=n;?9Xc?Jf|F8=3Eqlr{>`IDIjrak|!n??s z{ebn|ty1C56OahL4kzU|4MVGe;wn&HiGbib!;2AfF+n$|j?r9>DYGV74y7Pw(m-_( zEWdyRsOI7U_w!yxAW4;Wu+Y(Bd90_Y{p4-v1i@+V9~%H*~9BvUGmgf%r}(xTzI2mMqlFDrD6&6u6|wDN$FL-O&b zqTjrcaIIJB>uC;B@W85sRto~9$s2ws#VP_F8jcIR;0A!Z1i5sRhbn)3T>mc(>4oX0 zbPU>pS-DJ&yGBLCRf#H?rXmc(gMzR@tWBm-w01b}mdJ~K@pfVcYz1#(|9KT_9>L(u z@GR9>KgQT1Y)AtCXG+KqB;x6@Y(?e!Y@GX0cj0gaOb#lSl7lCY#3c9}V#`T%s!;?j z7gO8yQv0Tr=+Go5zaJ(H+7iRstUqXpt<_UgoZWyJlb$5;oaa&U)d_Y7>O@>Edc9ZF zMb*USSdi$9_8JVr2jW#tz(qj6Hu(mMG*L4ec=Co2W;QGWQ7C_h zPOyZqDNwkh-f+Ys@Zr_5)dQpkMl`{^x?L}cZ6`OEBLKPpc%qh(r%ku85Z*MNH$k4# zX@FwhY1zPLKv{u;V7yH^=VT$I$a$K9=OpwQ@1^l#zr@6hR6tr)QmwPS@EYG-Y`~c@ zs#f%=ypZev&*E~(!!@D#<|_~`QK9qz#R+%K;R)}oAkSnS zgaiHsrGT$^f64rEE-}+GPeXpAZ6~P}4{<^jQ2mHn#*3yjN`wF8@LO89ZkMRi&ht(q zr)^{uZ7O(0jfc}?Uoz3~-ajg-unPS7+MgD8ys?)cYe`ttDM z=XpmL@j-LS5dh#&bmmQCjJ=33gA$mLu_IBW0a%>EEgADuO#;et5G9vUuxxqk|A%Y+?qmsZ96fW#>`?Rp)7>$!~FUv->|Bd;}Ez94KTliF<{)p16(t_ z#~O7|z|G#0B*7-G1@jzs8TMyp%6_Rs`wUpotJ|DSOLeijKr+Upe*i%8Q+C>3Pbc!# z8c>hTEY?zSkck8u2njgCUJ_cNoq1bBykdb;gZ1PU^O-^kK$9g$3?f&h3K%9tmeZy@cVuUE z>i=lE?s%-zwy_pZB?Lq)kE9X8>UUg2Ai68bfRWRR91!I6u&z3UofeKHv}l z_k)H72^zug0AYhh2L1d19iS8fyB4qq+5(U)!NCNmgnmv5k`FKlB|)gqqtl8CVGwR0 z#YSz!fS7?U0i`hnPJrd$QX%gF1P|~4u?v8*ppXm49hfSBkg&~x>4Fsh1eO6!R@0p%vBtN>FAupgimq%vsIK%oIxA9ez4GtkT;brvKz zKs%uOaNz_1`-d<-99dL>2Tuu9;E84h92}q)APZ5w0r7sQoCA&k_8D|4pgW{Nw*X=0 zMw>fr>fM?gY3Aq*Xyu1kPz1rP_= zXQbChZ7cwL&=0gh9E9bC;v48Bfdm4zA*AjAnjz9dTtbQv$X@}>K;CnrOhFzEij@F! zkeCiJJdl<^GzFR)RANv=1T+rvPPpn8d^n_`kTn4T0y`WUoDmX$a0?AwK$pYtz@x$U z;1lQ$4Y+3%AP$^|lZG-NePL%J!XHExux6-Y0gwy`3CN&96b1<>lpujTK&m(hQLrws z6(M;-i3OY#D8isR5g;DuE(9pf z^8_c~Rb&9;FzJXt1D+q` zP~e%MRE}<EE^4{#%}p&@7iwE`O&I3g(X zA_4++Sin?6Jr9z5Btb>@%>z9NWbBFZ3-&%-o(rwLC<%cG3o{HQNvLXp8Q8E;_5nWw zaT5|6bQ=j2!(k^v4HtX^@W`;Ha0-Bzg@Xng7fQ`g_J>#koC~V)!*~Ibfvbib2U0(X zNWcg{wgIf&31tkl4_X`4Ap&p@3P%v@K`4ScXaEg?(}jtKVi4q?FdRTHh(`k-4apMJ z$e_#$n--!U^b!N80c9%UevzsSDJI}b3wS3;O;GRI$&YP;`~#pXDz*X%1vi?Y*@6j# z(14_Upq+*pKu8PeN+{{UWWtAFuyB|_7Iji=gjyYNy{L~BQ5g;=B|M~Kg^V7`$FSZI za#7Pe3=oRcClzN1X|SUJegcmGZvv*zLh%pgAN3c)WWkRI0HWz{6;FKmi}Z8#o&v=3&J^ zI|^hEN`N2@gS!!+q=~KtLZV4%41^~DTZ23_x-JYcm9SAE3ZkA)XtaRc3{3|ROitQ- zPP*R_OAWLsaJ>-Y00u!F7yKaLS8!!WRSWJC*jb zg}ToH?jjW^z!5;z;DF$qKt&ct1GpUE2xt+|MKnZU?w0s7E1s21JZ-%L-sPSZWv!fK14eVGAKv4KW^2A34#d!5bhJ9NaeQ5=Xj0 zl#_#I0tO6z4saU82_R5`0f8+B7y;e|-Nu6&-GIS>;y3Wdh-?I=7v>#64!{UNt}v%i zECG%Nl&UZqIA-AJ0PaJw2YUl)9{_iQItgyC1eA`hp@!uGI0D^O0C2$hLpcCO4#5

    q;3M4>@!$Z)dq5F(MCJJ8y?JK%+0}1Mp69qc!8TjcPYc;Kzj#FIci6QSA>o{m|~Dv0BZo$J75IB!%!}U&q0qqn1P%h z;2{v+aMpo*h29-VZ-90}2NF)nNz*Wl3f;GgZZZZx4rDdL9w6{Tzt#)$2B;nwOQf0v z2nC=T1zq5&APt32LF7TBK|RgT1p}Ef>LUPujA|$-UITv(pasMhkmkaQ!=Isu0a*fS zpM~2iAOZs*I#I~O`9V#5pzsD_3n>ACf&m=>{0B}LWSl4v0Cfn69HJ$-K~RGrxd>ok zP!NHf2V58|5bEzo3Sm@DhLj721$B4$fhRy|ATNL;3_chUWiSxX2{R~7;G;@e;M7KYn3<-<@V1_VsQlEvnhUQ+#h9I^4uQ3Ky z2T&_KFpubd6VRYRKLI31u!)d#4CploERbJ=Yy_IUz(2rYgtq|t8bCKfBe2%sb-}%z zNU;ENLM{u51OOsnr_ddkDBgho1_TC38(n|`)D5T#AbErJgpWYZ0TT*X5#AD@9c(>F z-+@5^FABU7EZ#{69JB(%)`DsZ93#l=p%@F(g4&os-~k~M#I#@r&@?bQP=JMzzy}~) zzzaaXCK3gsdu+iboKVmJfvOxL9TZETOblBN)kUGE2x%7-BY^aS;EGPfe>5YyFAm-T zy$3{nv;`n7LAUdwd=8|SunJI)1|I-R12-_hhC~h4NTGqy8EVFc91ErXuzY|JA#}ma zLV!i0ZRn%|=#08jP{kIGA?T3+t3%ueGjM-3%IqQc0j3vfkT4Fw3BZp7wTyr&`hjz( zFT>~2!+{<|*Z7?%6@lAE`a@L81@-`4XNpQna0-x=6e=9IKL1`0cBS4l3 z6;7BPSZ!D$Ae{cI-vC2}Za@O%32>AEWFbdEKob@dRf!-zK*a#vP7T!uR2>Gg3aPdM z$s?!>xeOd12v9)OfoBEed2&qv5Qj*F3zLKR3j}VV(FiW4LOnI`Z|IlykXQ?HdDNu^ zs!-IB1;s$r!waGylpg|t3#xLcOu%5F1d3E8z(RmKgoFa34N?jL_XMB~AUFaJkh*{w zbfp#=2k-$PkcE^RT`UFE7Hn8354pI&v-+{{j!2!sIpiB+a7%T_qPGJ&I zO9|9BfTjYP7v>PW4IFql*ARQaZ^CMT4+fPfWJ%yDU}!MG;02Mq7;OQ_k721`z7VW} zy#(hEw$3cvMg9OS2 z0EmF?VBqMk6=@U*U={&k!45}^Db%|G82~+xWWcD+2uh@=`2@=0P&onM4PqEjD*}@M zxDSMHAYlcR10@3B$k1QTBT@&Z8-N^;Jy4iJEWv+GOW=G#vjZCyFaZ=VAd7?O0Uv`V z5IFyc=!2bwI&i@G!qEhwz)3d~lF&er6E*a~E`@ah0166Z(Ck1Z6)vYiT?A0P1W$4D zi!l)4fu)CY2RtIGse@1%^kEPyK-mRY2WnDq9MSJo1CEEdhpuRax(mP}@GwZu4{RTj z^FU6Dy6l1BLYh_3NWejcT?xeya1dzH5S#>-8&&y`(h*&=0Rx9h8F(`oKe`qc$|*2@ z&|tx6Aqs#qfP@|VHPqx`Z^D59Di75ZPz(eA4e$^?0*>HBj0-0Z%s?>=3NC+=HWqo**y-hylVT?0oc#bKp+kwc*`?tN?e7%G^*VgaeIuXV@JOX5fS(c^h~N zU^*cY23(460)SHi8yX3`VH*I0577j|A7C)lM+Rmf&w>mJU?KntK-uu?aNz!dxTl@Y?9gBh&~D^)n>sfwzM?EASFPen8O@-H-uE8EidBUV#}0Y5)j! z;7Duh}%>^c}S#AP_vh){+4FPsl>zrZ&GU`99HKy~y)Fb;z}xmpHo97t?n zQNTKYNXUiY6oSMQ@_1-tK!tiR11ArTCLAoF#=#UET2P%rg%{m|4%7;;`B2k65t5?b zd!Y28#S-Qjh6nH!g-zH%C~bkSz{(;*9XJuRH$grQumyO+{{%2VA|NUOK^UO1U}r*2 z8OqX-0f0gi>G^;ZhJq!~35d}_*Kb1h2T&XWDKzCkFofd+a1@Fep!WmD2GB8l1S)y( z?vO%&mIEOhU}eDVLxBTo-Jm=~m2WtA5aHleAOQjY1uzk{M8Q@>x10eq0v-)EA*=%2 zIfe)TpaFqC1Xl%q5XE#Po=4Sn#IAzSP};T-Z2`!jkggToA%Wf)RIgBb0?H74oV!sNLJSj@7!fTN4ITj&0Uiw&F&-&y3qSk?3;rUGAyu+!5rL0l z-}S%{!uPQV)zLGhu{`k6Ut5?@o{EJ>ibaXONCHo1f-hrZ7_qQ0c(^V4cvQ?dgve40 z1s)j|9^PpzYV<*AJOX|Kc!E_6FM2`|Jk!pjh3w>|BJoZhP4l1G7Ggpy3v`)U9hF zfk%iT#o{KS;YJ_Fy$k;zDG?vrqmUI6-5Z*#P2OcMk<^X02 zy&?vK$A0q8V2}#l1icMB0}B_AgN%`Z9KM0$8-;h46z^_S6do0rZDGd~I*W!4fBNsS z$?(XtXxRThp)fn_Fw7!fQ_L)v)oX$P_f_;p6Dp|9$J^ z%USR&ZnWI+XfT5zz#>Pp51T>)kCYiL3VZ_({>nr|gEkZfzU+aE$B1XHix|U1=;VP1+eDJ;WUL%`qzu^b)hjLZ z=-p4@5nyP9|9ixL&nbatYX3J0Fk9%ERww@lJ_b*^1?_N*FZ>I^&)*ed8fsVFeQl<>d!gcy8WIG#fMCrA9hsV##y zzKT{E9!rNF>kBi7>lEeDatcESub~Jp151J7z#_vVoEOavATX6VDiKt-asbKfRgj4vh(P|ot%8=p>SU;RSXfwu ziC9=V%*6PF_>@=-q6XNcj?lo*DD=NLTVtJn-B&Dg{QgxHkW3@?bO zu?ex@KWq%HFeC9bL(wbNw$2XDwlr8+#01oMgb(TpuzA@exLD2=K^OO!y}1sT_Peg&fB5) z*}zO+7JK*SqBK*MW?{1WtUvx;mj)|06Sneo@AG}w$cpWIn0*-c?c=jmrYn=_#Yb8< z30C6C)N^7bnp8yUuhy1l?XE8eBqR{}R;nx;=x zeD4P{+C=A^bB(?dYBb^faa&A#Z+TU9_|#>6)~l?w)*bTP0fa*xJp*X_H4*ZrLb9Q)bk_6*dXdA%DK6|po^g-esSvc}%qP>*k)glf8OcqfYce#IS;TaTfF3lzMvLr}+Y!d^L04IR97TCix4PxG}SA>mXLg zeJ~S((dpv}q_Z}EgmaHA6O-wz_E*Gut~vaGB;PUBrD$*UN(a_E9}6Gb^s)S<3@6Xh zy{WfFoDOR@l-P;YHz|3KFZW)*rRqLBsHKv&n8cw%rS&1UU^?qXm8T?;B0h_8eRb;u znCWP&)hD)kOX6mlcNj-KuRjuBQeSz)X^EOQ@A$0hN4N37mn||Wdh}->B&qoI_+Bg@ zWcgX8H{$$x)bvI6xYW6T8`vgs1^emZg0{f#!r?-abm|fDxpmySo(+k$ATJ~KuIofqE8bq z*IC4l$-Nyg9_kJPibK2=FV!rz9thI5=K4uRxQqs#e_bGJR_ol5QSM9BA}4I}a^Dop zXndNl&+e}I*x1zQUZu-LnEheBr@xRBcP#Ejn)}}6hKhQXPjsQazbI>nS3D=ZZz>F( zK6Txk#(Qw6yfv3gLV?9zaV_K>PDhSGtAE4(PGjfL&l<^#{bYss`aPp^p}Ai?YLCE7 znYqZ=#YhQ}#1_Yv>eQ^r_%EN5yO|&5rBXfG8K~Lv;c5@)&tVwTkn^Oa?f=N*>Gz#+ zVrQ6>V3E@pN33bqu#T(DltEQg&BI6d9#80-tL3uP4iUW$_dIPk3>>-a8X}Lykr!aI zs549@;(Jv+&N>`GDa6s6=yUDH=;C~XL^5OB+V6W<{ttX;*Zy+w-Hp(`?~~E}W*v8n zpQweXWo)w{z=9={&Ezt|K(7&PotS!cr{mPZ=Hs$6_=2F#3` zgh?BaK3O0AY{Yxk;ZK9!Ji|ut~?;|MpbFDfigh z<}s!57@Na09=0tAQTqDjQ^$`-24k+NJ!%VL9?POWqOMH?JH|#z*iJZqz4nD}ff>Ch zqZFJ(SEf?cuD1gA#D?=jKY|SsslHL9icDN0+w2rlKQf;g)mkf>*QabYYa&)<)lJfx zZFPO)7!n^6!w~b+_vW8fi6-r{uU_AzZ@=Q&GpT*+y>vFMAZwkYy!doTH9005%uq7L z3h*7h?K!U%&r=epl+(9fvV@yw^@OE$hw+ABWZ=fI%Eb55GuoufE?6d>x+aH<6J>kY z4_0T!I!`a_sS|UEY;)!~w~2pP9(-f@^MbZyQ3^lr4Y#!-%G_&7 zDqE!DpY5x}e^uhiiyp8H=7<8yPp_Bvo@rj`0k z$`apTP&vQpXFs!sT$NZmr_M|iR1r!MMNx71-6zFFbH`Yf5`)K!9 zzoJQe$9r_yk3~W~yuaI9`c`J@(+K;!c}-EaWZ7c|*bPsU9k)q>&urDEZiAUIDn+&z zk7rD`zludL7bx}asnhjG6m{b3jH^a}zyHP;myWc0M8f8c+Qr)1V)~@T#8-}lsehFV zyxV78#IJGbE^`aI^H4d>eD{45U_x(L8dzmW`G-)o;J~Q4?*2Pp!?ZvpLNGI}%_A!8 z9F9E{*f_TzW%+pSW08{ozGAq6_{*&|{$@Yr;c z_K8A?7}?`QpE59H5dSm_qHR^m9My|BYurQMlIPvx&yn29+(?rC@DT%;VYPH-k74*( zJtE0!Tttgu`Yc9DHY}nT%vmB)bEEdl@6B6(8v~UEN%gLBYGEaX&OV~fr>W9&&$wtI zJ$U&7b~x_ z4iIa5M#)W%Ii4JlPQYAt@HWPTr*_}}PFPVI9mRLSq<+-l>)GBLw%lNbIa-0(mnT$8 zcRYu96C-}^=ff3(6(2R@OV!R3sj1NyEkZ12R{oegOv;s;SBMCWXH+?F%!l{!dAwQy zP0}6Jz>to~)Yg=+*9opH?}y9Y8lE=OJO9VHzVg7(Ssy3O^KRkQeJU`sD*n6F>e{Cq z?rrx!&eiLOEW^Emt@H*v6CWl^O|rArKCWEx(qUr#se(^TxT#Tbw%AAHVJ#~o?MvJI zvRALoqhFkJ;r7X7xYF{WDW+wbcRx{KTzFi9l1p3eOW9A)U1EHlKR3V(%cr6SPq_{< zvK#Yn8f*0izZrWxeKkC`vs88F;*20a9>Z}}46CWj&(?ms?az->qgnjPd!L;df1zwU zNF74`NItzON;3Xl-Q7$9(aoYS?lJbn)?6)R&d()ctQU>lwVC=4?76^<>Rp?$KDK1@ z%Pmq%^109aAN^5otoCfW0~$Jc4OvQjRbyWwZHK-3?yPxh^; zVDkVwN9dc5>enw0y5ElQU8kC_N)%TSre-t@g~i>;8Y{d^|GE1UiES2`DaYJWON%p@ zl71lnT75W9)NXib+b8@+&p*t9C9Ho?$=>z8?(yW=m@BeFI~=o%`v*tTu-ueDy9X}=`kd+?{7!QEBZ_issN=?2Aq z1lyU0fBj1hN(X)xUt1QgrhU=Num3)C`-#QtykXqYNPnII+8zn7#W67RN_<~;a)!xI z_Clm7iHqR}zGyM8H_tmJCNB;ugvgJDUyZ4cHpR=2HEC{MG*sr{^PkELbG%-2*yop) z-pnVewm^L?waR`7-%g3JKhDs_odied{i4=jdu~4YOUYWZAj3GV5HQmd&5*xQl19z_ z3;Uy*$}N`96N)YkdP540y#KCyWcmb=Tr*wr-mG8KYIj;l>g*~BF=AALHg0&X0-Ky$O@Av zuT`&CdGkpfzcz7a+E^7|?L;JXM{vWuN6wKLqxQUMv!ER5yPw;0yDL*+r^yRQs6(B@ z()ce}FLB?PU6jM)(vjJHp3=bO>s`L-+ntBgIAx4c;zaZ zE#A6J8@l?-yo>+FV9x3zwa(-f>d>BkhW?@_^h!Ph&E#ia=hcE4wL!s$Nn;DeRL{?S zDf4~vQlUNm_1g|E&ybz#QGRdiKibUqNV(jkv>O{4H0jl2=^*^TXSeBbyhPY_D(mAJ z6DKW+&!0Ol80%|%{Lz+o%tUvawf{AV`OII#uKgz~OqH|R+~@qkjKvjs7uthsGvsw` zU5s*;L{Hdv$68$;U-5lTg!iVmI@r`Q1Fw>&^xy2xvdMen9xV0ap~-C{=8J~CqcleL zM$;F*i8bbBMseS9#*&&2!J7(Vd~D|Q(6XK)M?9q9?OG>?flDQriBdBg_z=sIW%l#S z2=6ZvT^d|l%haeYvMP~@$V2KgA^7Yo)d|6-PxsZnrJNT?2`s^_y7?gZ;qLHv)<=C$ zKWcL9#C>`c8#&1PMyvjEq(gCmpfWLmmE)m&U(~_y9i{7Z%?3kY#xXQnE>KD~g^4si zO7?>k1^XRo3#Qk`7aZl!sNhRa$*fS1rED7wXkjeu@uTM@Hp+{Qg{5MiekaI%&4tac zRrhS?{M#%#Dg8jawp>M)Q(UZ@#Lt$3;*@7~uyN;C$W58glcBaL+vmIv+&@oyEcD{4 z>wi!RAbdsa==JqC>r!}{goiziyj^A2=gnC@%IcsT#VU*B(uVxeO~T8wTN`(EeV)F1yIJ0tj z-FNfHlaNSzcfMJtYcUQv+;66B^B3QlSxHdc@z!(nR$;0gx~+HZT0oxeXt#*H;LW#5 zMmw?-zk64m2p+m_)$k2G(A@8QapCkO4bOSqHR+oD@GKaZf=7MrfR{vIGCn1gZ*Gpo& zmHH8U-=gb^jO8}pPLawP?Uxb`n_kx*)9O)9c=l<>ctGff#&X4+9uE$glVy71;NISE z+o-Sp0xCsQ%Re;mgf^KK{n$B1K)3F313L|aR|Z-Zr<7?q#g;n zE5-fntzUBWSEg<6J_wc^Ms;L2r82c6jXtdg)x6;BsV2uB zJPXle$=fSIXXg}5Jb0_}1xxx*xpgqklN-J&FLu)oH=#>BzpTcyHK6g5_>dCXk zX)#Cds~ozIgT}Ktrdl4KWq5TxROjBP^MZ29QK*uwc{`p;-+9>??aKM5j9lk9oj#FO zeO%T!3XMs(>c5EnoUCoKlSG(>r%OR&eUesyTY`1*517$NSZ{Axe8n71X5ifXIlu+) zhV(-x+&>N&sq2c_X)Z$c4p;xEa{l}6EdDgcEGSSzr=>>~^DY1K-n|$87YG>a`X9+1 znZ1fWh;j}(lZG>2=p|`WPjvlx!cgDi+O4%mY2Qm+Tfhuw)Ro7}x18?&&`9zL;k1;& zmb-r1;5m_}pXB7Gp0bFs+|K0Pk74Z=TYJ-{lbN-+@!!;xy_U>Z<=aMurKOhFug(N+ z8(OD#?XZi;={(z1RK4~^L;OtTe)yBGJoE08!Yze~sO;9FdDs)9dyyi7pd_APzw)-N zvz2?leSEOAD0K$eQwbp~!oTs;Xc-U`{bnR<}!h3AX?rFbf-@6tr0q%Kcvh7c5_(>Dr}8@GX(vTxUh6lQKLdmsExcY4#+M?1|h zBvPH8Z)-}!Tx!MZQkF~KbE~u&oB8I`&Ys-rgGqCGj~=;4p09o$XS_6hU++0@Rz!&J z8FMc7^sKqz)W`$NjeRzS0Zx{b;iik&Wh7=EM~7gBDgQSA&YyuP3?*UZU%B}e+@&C2 zyA-a!rw1d#M%=Q(&-_lUFUj4sb952B_tbc!D!B9XQvxB9RIeYV(ge)p*9vHCO4kNl zv3l-pNasF!mOj06SN$HJWLEAiE*Z(}IObHUjyN!*NPpP-GngRyuA|^hyyk(RBmP{m z&wqz)e@k>mlQK*;{W2zsB%(P*T0Cz(eeJg3w%eWreN@FfJ!m-SU%B*9b5Z9)>;Aq3HDfu5EU=Mxo8DJ=Vl+ zuQJ@3MRQh-1ZstQLb>}ks_SC}RRX<{X5Z+2(kdgboLx+R9uYB{t;FnQSS%T3*B3MP zD>~p>RaYF2WFDW#-b{@ub`TBo%6P4KjugSuS77EHZ%j3N=!;L@jWl&V7NJxIW(^cq zzw0Dv^&2+P3WutV_aE0Ye6boC9Z%qK>s>Vbqven^+RZ5Vhe{XEc6|J?sJ&O20?ee#taeJ4)Dpj1krvETpdn|VG*XBi z@}#;G_Chr>#87BfC8Cqhd(GW#=@>Gr!<3f37;QAu|7c;+0KR z#omLLVii_5#%Z<1v2*pD6&{vg-wr(3897wn{zdDKi>d#^7W`Viu+?XpjyZ~)?V<9~ zop~MaLTB01*K_7~7)0B=2qn)op1Z7YgEbJ$ypZ{{Aj+&vZ1UjV^(d(+bq?XQwIAIy z|DKRY#cZ_e{#7{RR9c&)`;9A7@kLkp`GS|#$Po#sK~=dP~j$rfyqnJtGrcFi$hM*b3eq<^eZ;M_!B3;nF~yuxm5bd1YW zmhYlbnjiYME6r*1xQa7nw?upReom1Jo z@o#R|*p}2Q>~EBA{nih!wRslpx}X2}D*#DwvfCV9@967H+gako?|nM=bqDXvMC!8dOnn_*(h9}uEG1#y{%Ys>88J#=ZOA2 zz5voXTg`*2b?)h-F?-$h&dIM4=QRD7>f1b(I15Bt1%q3@=qpH8tmz#}Y;F6@RQLqw zdg7>ch@I&c5lUPd^p)SVX|FsjXswgqj8s@ovYpa-T6l_DKP<;3LzJQo2K6?G+QYfp z2&;tpHJzP4wZ9jZ+V)C%yIrvL_xauDA3m5mwcIx}Kcb`Y7F<34!ml>!A@+-2Z921# zjNXo|P>H@!FPP(>v{2$s3+11H%T5)pc3_6D8uv*}pwjjIizDxh-|@w;u`)7cN99E@ z$Qh%#u_{;sNu@w8j zukLA>ISooO-9M*fUKgEy7}j4vzf+&b`_uHnEQ9IN z4?=BHtgiD3x_5nCc`7=S3)Rk?y06D@Wu+|>r=(mw;&1DYQ@W5}*lR}DKgI%!Uc+pW z)R7`sX7ZCGzp|$WU)FNZP<&^NF2^!>1ZLt!Ro%bTL^1S9El%G_TFvy^kond2O4{NY zZGTN7W;o)OsI&d0p1;>P_UT@4yY4(=8fytX{Zet+tK(&7j_1J|Eym}a1~)ESX8$#& zF?k=pagQ)5rSa?i%Tnv}#XC8z8>Zz&U}nMN3t!YlHix@IXVxNf%GM)0Ge4gH_B8qV zUADVI4fz!zH#gesZpOSi&p|4hfhQgl)%eLiLPBz#;Lf%TwpfhW%>mD;bLMg;XV-~K z+d4!`Hu+{%-DbH)0*yCc%}vt|4y%MA71m*BMEhfA^8DG_v$I2s|Mpq#ac-)SDGlL` zdNT66>wEe=yZeuBw!P2&NJpcV_A=YRrJk^wtA>g#bGI%Cv2gP^Pu>uhNW1yKlkr{p z$_-Nfe-S)ker>NF4n*r%Pih~u**v5`)KHp>xt?H5##K7^(Z(=J>G8RQ-Vb4|kB%wn zoYgtXSJiEeJ)UrwD_Bfy$NqX@ar>ub%5v4b#P@fH>`DVn!|r)^ZhBIPXr8kotfQhx z;T68-d;7-)qSw*fGOQ}0YkLtgq>Ox9V5Z?q3^#?(=y`ho+a9{*R0Q5SWV;7F8bFJssbp5pupaW|1H(iGyNuT!I;tFxs2m%{bx=Dz@w9-P2Q*`AvdgJ1sE zhprV%{d|Lc!4fu~?9dkv0*f->p=DtSzd!CgA-n11I$QhpN|sa#Qugu*~t@B=gE z{&2oDD)%lWmf4Rs#~q)=f1ESPX1CAY_9_|WD&aSN(;LzfxVLB)_d9ItNJk<(VVVX-MlGN zpIu6eu1Y@HG3@&x^Wlu#Rk{x}l<`+STbC>Kwz&jm4p&|-FI~XGU*L+3HLD9wDbTAtU}+Y0sE>)Y zE8eysnW@LEy!n7lE5y(HY!bORtJW`7o;9O;cZvS|4zq9-pqMiGBbbsu8{8pIu|Ca3 zcEMt0xyI$JR+Gco+V`EYJ3Z`8U%rExY58lqQKwhfnTq5Y!|P@yJRAsS0uLF?1EQ(* z(w=26)tnv^Qoin7dtelp&S8;WltG>PTVSA)*fz73Ael8tYg+T1=7@vnkioI$P2yHc z*<29@UMX5ov+@PL&!bjzSF6;nJfbQu*}g>@7TWmDSMO`KOn&QX(q)mOYs#t3424=B zyY#7~+o^06T_Y^mo|GDi%xVcBmc#eBp;Z}c9*3;t<%Q|>*k;_F8i}s$ZSvWCpYIG- zs@DEP@o(Jn+vQmTaVOMHg`Clf6EQiw*US^0T_dQtu6XWRr-SI5DF7GjTsZ`@SaZf< z{{1Ss1Zos%(Q9wjD`CirGtaV8z8m&w^$RT%kmi1C%y82g(*HyKhNc?Npj3g7dLJu`xxYXO$r z&Pwdd(z(QE{ff=Y=T%xo5kYLxKhg2l_<^BlreA%n@9D}GyMTvJ|9vu**vwI0wNw$9 zeY2Oj`YpZ3KulEoY2V|B%#eL^_3(wVj~VQ-^QH|lZDN$ddPK5D1E(#R7$zNPy{Lb8 zKRuva;bR;bOg%F1ZsK2l3uX+%C>(+V);B7Ab{8faW{5SOKf=k;xmZJyVJ{wd&~=9) zSyxfr*Yan;`+>cp*M$a$jLI095sU5V5B*)-w`A{6{62T3U@z$k2kFbn`{6isQ=>7iOqc8L70?D12 zBDpr&5Vz6lYRaltH)C^tU)J)ueCdoIJ!ACeCku{5U!QO;KB)6sn%FfDA10j2c+5+f ze*U9PKw03tF-w^Gj6@dI9GLlL>a`+Vw&|4Fq^?`8eC^x!OV;NI;yg03rWo#twYSaR z-x!{B;KHhD``h95xixIbqN+m`rUrz(rJ1D}MFZyHl*P$_kI)!}O3amw10q2t%Swg=PNlS_-vcG*dMoO=?# zrAWkLr5LW*&HOPY+r9J^%%mBPT|J|0%kbt4zMF=a%=SAxJ+n+JUlUGsXE}4lHJSqt z3xAed?kh4k1;+k3vgA;7{FP~qSW%Xdh16tC}CMyK4uQlVx})C=Vnu6-8#)#cAO z1fHk9Xd>ymVEM(k(z?W_^S!kKuQ>a-_WS3j$mvXp20~naS!vFGjI_NS|K#nd*9JL` ztA=yb_%(htrlxT?Ha=Bg=9YT*IUnmS9j81w`o&}HXtoVs;X9F_9V**JJ1^oy2r{s~p($&dam@R6eCm9hMorLTU-}Hv z2fi=6+NQ&Y2RgMrBz3$7GavqOJ}p#T+F$EfWl%mko?^Q&I7HCDSZ%->hmKy8xf*MM}%sKaqzugPR4&Pc772T_V^fY*|>Z|9e^U)*Ht`P#m!dJ+68@Xe+ zcf2FEI6qEiCWegLM1IdCY88lSC8qR|+xzxh7r$2gj&QHn!2-19Y-yp>a zu6GXVxA$C+8=}k&`4eofU%mK(mO8NDK&DOjK>cr>FkJ^sA^RXKrUwM0;nDo4x+Qh0CLc4s8WZA{ftIuC5u?U=WXgrvBjW zPQ|cUgXRAHCvDD^zpP;9DHXPd6-hxS@ss<#68-5sVgVl&@|0PBQWtH^{A6utZSWqw z{P$wE)Ghhw`F&^i%Ha=+_=hUC zb>lMc`MUVpgx*yQP(R|iU}kt_NokCOYhsLB)rDb+-!D}1Z>&PTgyMK+HsM}Qdt1ZM z9LX|$8N=Xov09kLt;=90-1ut-u`y&Ksx^VlQ&-nU*jgv zRc_5vEv=ntYUXReFWLk%7B=gLoWU1w<;$IoH)V1?AMjj5w2pTBro&)>T;ou>iZCl% zO^eC3jXQLNbFl_@qVFq`x*bIYJtz+e;1kkkVu`X#F}Tk7F`6+$Xy>mwx#?+1Q4^Nu zJTJQ26~c81lE+jW7}2ERKYO$Il>0i#fmQjfD3O93mPiLfs<+9Xwde#gt-dod-iW|m z&CX}=t(Bwn8_Xi`PVMqNX884Q>+;Us`I}CX6NZf)%cCFS5}h7dQf6vVl7+P08o?^< zDtbqI>#OjeXxVpC^vI_rC4X`>p3{BOx~lqfoWb1!H~f)^G2_+`#obrn`hgi3&LI;O#1PpRnA#Tq`j)zQ+rlxt<~d7^3`X!kBw!;xwK=f`^{dy`OOrqt;*6h-@Oz|A{H_H^gPoe2G#L1ja?;C{YAX@Vu*D{ zZ=?#&wUkR-*P0IlGewP8C)B7lzc0V-o|Q1Da%?4L3~Y*2s#kpfep`RLd2%eOON35j zQiG^OtVGj|>$rnlpY{RIC%rnk$m%gn({h&^<#Ytg&qX`-Ya-=@r^a~P>ck)EbXWb6 z|5P#>FJS8Z+ddx5OkzbO_l@Ildg;>j+*rNwF#3h~M97%qaV$sO!;l*eb!P_u7+!aB zxKu6>94JjanfLu-=?|T@Be`2ml{<#5IJM$FbyUE5W<-kPTr8_K+GzdGx! z;?LVh)a;nYc}W<|?DsyP=b-$Qc)ICSBKW&x`N+Ozhk#AGtMgMOU%+#a)&*-%l^ zG4Gnz@l_|kWMRNeJxx!%hL2o{LW#j+`p4PVRnP`KM+h zo8)huV6OG0d7kzuUv}laGlfUC*f;g~T;J62GI^|s)l0eRlX!0PG#q>=G#V>4Rou6#N@$1t3 zlkq5$$ld3NGr&wlSSQ_){dTe1_rkyjsYVvXu1aoa+l_oM9<;?aB%`*BjKL|~dicc6 z(&Khj3}AK}BIn{R+P(9h_t?-n{ib-g3GaC8c2j4;$F~cSlj==u zaU#y~y(wx(U`8KzvaqI#FXBz=e9hArjvFk-f}aW}QZ$St^_8A92t*ea1&|0`TB)J& z{3q+(_@DQ_%oBW;kdHA?zFl4z-RXAtu}gv`B6+Bd z>BX{`@>sT|(b~~JtAV?^?9BJ_S*>p!e~@Micr#K|PT4dpRQXYE-}2^&Gez-zU)_W! zD&o_FJ-7D2Oy|zmmw#=K1`J&ig)GXXHS@Jz1;xxIO&(qGZo#enwQs zUrNbSd7p2#%zh=L&b>jR%Xn{-$|oz?V$N3B3x z&R-ewOS1D|#=UFlTk#W))Yd>-%-a>q^Bb{Q0gUHa4z?qeSV$e;XT2XUYp~_ zds^;U*ecoY31)ErkWp33Hd23Q(BojLSf{A3t+G;*kIlWPsA$8{I-rvB%36 zrP1TBZi(1I(>>BhtnE>@rNT0npIOMub^1JPq&?s3a~4~=y{j43&&j#jZ$%{#F3Y%_ zGw{6cl)}GBxrPd%S=Y(!N7u?Ce^q3qytymb{d|L6^#$8_$3vDwwOX&u#L9EKr%A?l z4E65w%_5P-zVE;{yto=h!X+tv65<-s+%uM<0#6K})K;B(#vrJq{WsxX=8EZ^ThWdj zw`(R@gKp4vG~g(`%_xmjc~apt?fEG>xRj{OXffuoOvEvpM7G=k=iOUdc-qOzZ@P~i zc)Y)02ZNd2;Yi+pWr_uPze~77w8JG_zm&3?gw#Gv9OIr3Z0)xcap0NADtOv_7}*&d zcwuMpjg&&VOH*!GY|=}CSgysRDlPhtMKw41?qX6+2djB(kF4bJnbNuqeu{iO(xXal zj?c3~DQ=3!m)=id`wijR)DmGYkIPCD9_%?+6`wV*)Qjv_^?PRZQ7@9yjqQre_911* znd8KlI?ZHPJhR_@l2>{tp71rIcdRw%*9CG)@}MWQBVLwP%0%%BFD({hmE0fk8hW0) zQ+oCZg6h7<_t|Qlc|+53&6`}S|8V_&cC`4y@MBn7-R)mi*ppg=W^4C~z3XP3?ZaLN z{V|dbGR_z>>a6+sZSz7h#Tjv{A8tB-98|C6j6XAc1eZx%T6)EA`Rdsht<>{Q@g*Kd zq~ElckRE#Y*lL4bXg6x7St(63udiwqJLc#9@Ap(h=dTW|t?Aq!Dt&0qpSKsUV9Q0) zw-|^iNLgJ?B~Ku*EL|CJED-0iP+tx`*I)nO+O3i&g2h<3a8ukBniY)Ie;xI^V{3cs z-liKtLgjZaPOn_7>^i+RN9>hI3$T?$Xd!TWW-QtWyZ zO~y64s|y8SM*X2JjRb=LuiNHh_cCSekFw?798yvlA>HkwUKA@=<)^uM;wqfv+b+iZ zxIh>3Ah}HL_RkJK{YSrvLp6@hYOlS}qBZyYf{C167JK+F+k!@h@g9-v((P&!w<&Cw z>#nZsERtei=7a9Fur-M}#vaqK3l7e#|6;vpWFO1@^RIsAN*I|i!8Et8%HNK!RP}<# zgv6A5Y7B)58CSa_8o&Q~cJx~IMzy$uZ0IWg z`4#i@iH~4Lm~FYdiE2Hyp8F%ml*-HJhF!KM_pM&C{VwjYEcy^R#k!ajbJ1e;2y1M` z_tMulMq;0td~ZzZrc6KAq!4BI9B{{2%bJaV)im&@(Gu;d^OV-47Nh%4Aiuoq*<7M92 zh|NuW7t9=+uKSk=Na3{U?n)c>=!nJnw&)P*-1$(}xqqnc)%y5x&+S0F*ZKkhT3ami zDc!p<8#?;&pXjt!s9n4JV$R}z*2@~`c)|R{GfyngQzGY_W@daarTHH|icjk{pCxc# z_%s)zqSuztVsauO{eJPJ*KyX!Q_o*_{Dm_gfAy?jI7j5D?4tb)Ptwo-ig!R)WALTE zy9*0*uYWEn(bSCI)n0qT;yKwM_Eu9xq>sC`87ZMj- zG+bxAz>IdQ!~LlBy_eVmq4$WiZGBb>rRtXL3_f5F`Q?vf@$T+s#y%2*R4(1bf5LDs zLd8x);JfSkyG|;_TgI686u=N55au3$60;6qjoz|KZQeK62X4?t7>T zC+%l2;~B`A^spz7?5F=>$2d)&oLlk5wvQk8<~Ap;N3Y5r;^7tsjrZy|645RxrR_R3bGV~ZpShpZ`tNM-%KuW$eV>-^5| z&@!LT=UMLix~}_vp6jNIy6{$=J-1EZk zo?MYZ`}V(k^tiQVWYsqZ%zbRmfO;FxY1;CVJ1%Y1xz1Okzn%Tbg8xk%@>tO^{Rhr} zW6&`xp8fLk<6keS_ifqArFH+)vS`Ll{pu`#W%o;lEZ_37rO%&w_1vqLEZpj-!wxy8 z+4)nu)H!7EF(2A?;-m}T%b2mM+&x8jS?L4l|;nUw8cKXW` zPwKz(g0KJS{m|vlcHjEnXTJEYc-qv3YtCGE)AB>MY1RMC2lw0Y>kB@dd}HmQe;w0n z*a)!YH#OeVV}`AJYxHIJ52<_6k~bC( z{q)dHR=v>S@>MsD-lY4S6HmPT`Y8`~cPg}_ie@WuH%*;Fk;OuNA5mj;Z-9h?EB1%yAR%RR^Jor{yzWio4&hrb?5(e z-}b!ve_#IW!H0I}a>Pkzd(4eLOgV1!mYXkmdB8m#j+xY_{wc?|+GEOBuNJ?4$`6Ac z81wSF4+hT}Hf`96-}M{xz>J$8t9iK9qgV8~_KqK$HM{VjHYdMUI$*bpKK^>o2PThe zG_!91-)C>L_r7)JbXl?A%;rZwbo;Cq9z1`-C9l@bf_`n|5qsU*>cDr_{k`kR`7?K) z-0iidFP!s0(ehsp9ks>GvE}oYT($Dc7wg^g%$0Tad%DBA&#wF6%?tm1VR+psmHVId z*$IO;f3@jn#gF`WY5T4tUweDgX-5sXb>de~oOizLq?d|`r*?9HwcNWjw?){g4_;CMPrFEK*xO4O)SHAV~m!~!x(Ww17 zn?CZ`g4Mqqd--2my}sQEt@b{C#_P8~|8v8s_4eNKi+U}7Ja}oxy}thN!2i~a?smYk zgP%P7`*o)+n6}Ty=LB_L&}8gZ-45t{*7bi}a@Nw<=TF*w$6e0q*mKMom+XG}>_eI# zGN$Cl@i#r$Vb$|3ulQxeBZHT0^hD=LZ`bNQe$7iCt!_WI%i&iXv1!ZB5A62c^2UcA zwO!fB&G#9-#r|J^vdh`MpEz&m;^K~HJiJ@=ekXX$%$7s1yzBl?emJ>Lk43}xX!4(C z<8MEA&O=@D`)m&%SEj z4x3zCb61_IFLrtA-4=aoUGU#Mo-Dqn^H!f7HfBM`MRPj#_~!SuxAfls?DVRaG(2YH zw;z68)wo;JfzN)|r}EDUbDx`d=1zTEpZ)n;PxL#Z%SCIpyL;O;o8CP2{;?Yk-|6gT z+pIe5r2#WnzP;j^{+G4yas78w7rZg;)8BVkc*EOmS6y1WQ?2>Wmfbt;wH@y||J&XJ z9v^mE*VPwvOBAU7YFU;}?E8 zta;yBr>wf>yh|V0?&aA#-Lc0OMQ0yg`dIOu-wpY5*(>`VbmH?TAJg;mH##&zWw0Q5yg8<>#+SjodzE~tK0N% z1`S-g#q;%k{OQ`acE7(QcVx zPfu_3{KSJcz3KY*YEOP?*fIT94;j>KW}Pv&&hC2DIafB>=!zYt{Q8^6jD5TG^<&RG z@V~>REPeChliSxCKK!rpefJw!f3sshKYZ;D`^{T8b3{q68xI;fXhP*JCvRN;!>8MS zHtWX&4?KMLNsCTuzxI^tyWhF`*YSN{-|ed6_LEQC>&CzL|FY!kGya+M!;@Q|-|N>- zueEsQvCq<}44 z;wKNBT<3zxt*8-;nSENxeGRG(*?ic}hqw80#o+HxKk9_~mksW6>#%dSX?oAB&zg>X?}Gd0w!dz_ zE4S*l=+bf5c3HXp@Ui#xJa_Nu&A+JiRD+wod2g$?1}*M3^XZj2j@{wUp)IF%X!+Ba z?iU}upiaFtwf=1P$*rTucD&)Uxt%7Dom%hmt~*_JQbqeukNW0@(vL@VKW@OWhs++l zcEKml9eZf2%MN>H%|8Q%?)5d@zkYhsro(<((X;cG zbEnq!n7LJ*>+G`qgO3l{b;=HtuerO!FP;7=x#PUM4%+ULj|QzAx#;+9pXsw;(AJ|? zE_i0l2d$<9d0_Oe3$h*_W5PpbK4)k+qs>7UNi2r z_ja0DzyHGrob}0~T6gVu{f?Ihs;_Ky)IVPiUfy@}#mkPZv+4E!Tz~VMPaia=){riB zpMGt`rvK~x+NZsT&3wF3-N~IksF}6XZN0MS<<}MyGuva9C6F3GjDsmrgh`; z<6bzS-5Em;SUL9e)m6K9Ui;g72OW9B!p)ZNwS4}~%l7W>F)u$h>cSr$f4lp3FZONx z%!gw-mN)t5vB7(eKcw#3euE#L^ZWP<7rr`RM)MJiDqkr(b*Eik`u4shPmjN7mo1+@ zVP(5hwtVU5=O0}6_tq!0_hrS9MN=ApWk<^d*!mz=MH+q zW7d8#t>o5^cRPCEMO*gi{l%OWPd?xO(GCZmbo{FWYMt8S$Q#-Y0DOUeeS?U zop+hnV{zSSojVS_dj9y9m3w#GZsNI5ZvXaHotodfQID=Woj$I`8&^NHdf3|456v2K z+>h;!xvFU0i$i|CV#_ds;+?O&_u{8p59)X64dd&Synbz?la9To{z0QEc5ia`**|=+ zwsm>K;Xke4e~W|WFWdO}i;Iq%`d8Ohzx`QxMZckc4*L3o$7;@f>DWh}Ieh=~T71#( zi4{9f#+f%R{jkMDJ;rw1cgePI{j}8!uN{5o&ri*HuGTZRJb(W5Q--W;QS(Q$ zfw%rM|IWS_?6hCE%NOms{kxa^@!{pSUAcGZ8SPKM`p(W9{dM-AAO16G{=W^Q3%u3# zub&6jySmNyzyI;}6Vq4DpRnxRcH3XF>+eN-EqP^Gzlk+NYwdJO$3tcv+U(>5e;oDx ziBAuFz4l`RYrXgQ-s^ka{rLPF_Z_i*UB^>z-L}Kgt)@LNWs}99uUv9bi!DE`{%r9- zy^kMvV#hWemK~BB!8fH79)029U4AS-{-T#(s41TN-|g-gvERn;J$mVW4X=Kq(Uduh z54-YzgZ?w;ovoWxo-nWXk~2%ctGf5S9e*0N#ZMj1UwhKHi{?yy=$SgVzPY&JEpvAp zG41GucYo1lm(gvv{Hx&upWQj?fF>6l_Q&E?9y3Uh;mmiw_Hh{<(dxcKP-zd!lftUbQ1F7H`Yy61z#|9E2hzEdBcyZYFKXEx&Dhq zJsN&<{VT&7uXttlzlWFpu*qJ{-+A++OV2#@m^cLc}3HKy?%b>r>)*ULM8z4LKynET%zn@xt-QS;&7~Juu z6?@;+?U~9Q7Vh1#MT-;LeZS53JG^+sfNQUQeo38USO49s!TqhjS=4vr`$s=Hx$c)u zXa6#7w}bB5XOogq|4wY(WBNJQMt0^ENE&H;=gw03xYqevuZw{*SV7&u&{&m81Wr+{2UGnp$wYzk$>3-XPAH8ME9s3Sn z@$oKmXI+2clRJG}T6*c;@10WdWitoHSHISNc)y8zMY;WP=pODMn7ypix7!_dbBlI$ z9&9vc_s1`o-lurr>C5KTZd>=OiaophaCrX?$6bAG{TVHH=rQS;5gVN~?3)Sow|Mvd zHLp)E>Q-5A%-Yp`FKfB(%bE^jW(_@LX}u2pht;02=>O1S)+~@W_9lrc#%IumKS~l$S z)feOT?=xuUFZz4To7Z<5+4`T6-`#oryyeY?Ogy6b$a6ke`s1rJ>Ye%g`}@^&ey-Q4 zqfc+w>-EDH{CAi3YX*I_{P|Yh&g$~$xI6l7+UM10J5O1>Zt?ajm+w)3TBqXgkGgeM zNyk;c3_kzE0bT2F)8~c{Zfo<;2^%kcHHG{OI!@p7;90dl{QCFbk2rJebyGUt`T8BF zY|>%=`S)%7P4{mOey3ioNB&s*?hYS3*6Hd$u@ z)#S^OU%okT<;wYUpE>`v!R=rC>W;3>Hag(cnLQ4C@ZZ-yT~cS;?tlMLcb9j5SlW1p z1_M9-bmo%t_Ncl3z+Yb3tr-yV1WT zPkqt2{p=c#`QM65YbJlSc7CgEZ~ow@V~?m+I-uw0Pu_WW=L4!1@AmS%VgKIO@2CFd z-5Tuu%#0qp?Dg-MfoorRWYt!)ADH{&xf?gEnswEzD<6MzMA`7I2OPOkgQ{)=Cav1& z+q<{lW#7BT-qy42C3ieE`HvezdE&XL7fpQO;%O(3eQLz;8`c%i-sPQ#Hg9mn zgx9Nw|91Bo%XZv*cGr)Zj~RACld=8&IOyBMp8j*<)pu+>tk=S^GoO3!k#(b**MIfF zzUTeew%^jH=3Fp!>RVf=R@M6Pt$+S^^o`ZE8`Zt-tG&)XCxf>CF-z}%aqv$UZoc_D z4Gw#9d?ETJpJ@(#cmrr}W z`rCp_Dw1B6!nswa5ult|>V##rhzut47w^zvEB&q2Aezhh22zuia06 z<<7cozj>n*E}3`xS#MR1+WFtrs(m%eXwe*M;;pPhN`U1jxen>%U0H^w%9rKrR4 zL+kE->Ov;~f4u630X=H>KIP9f{~3G8VefA@>GNxQd(4gBG`seO*ZPjRVe}a{-f-|{ zL!T~xr2EE$F1`BHYp$sO-}+C#aM85$FRpxe=&C~+_5S$N4}LrIjQ;oT(YWunv$sF) z^Tju{Z~oZlAB?|z>hc3`s2Kh5%ukMMwxCY=gTFpEyG^%CyFB;z;yY*m+-32?dLGmE z!xa^iI!);H(VgM(-X2=RMtf{R7Qj*kSIvs*4|- zvi5>`4Xa1DzN34$dEE!S+p_iZyKKGwhF#`gw(i+q7c_e0f?j_N8QFM5qvl8a_`;GS z228%W?+W+W4j`t7S?1|0a-S5aDw=bUZ>A=1-?>lFk@y`$3t@^AfJ3n6czxApg zd3i>~;BgIpn>cVn#o!|+|Ge$;y;_w$watz1zPDYsiFcR(dGnG>o;m-!ozMGh=`{=P zIibzmmNTy@y=U^Z4JUO!cb}_{e(=c`z8{ferEU6NKBHUn2RirOYS=98Rc%{?(z0dimd9Bxfn*W!_T;F_N>kIZg z=&VipUv=-ZN4@&Uh?XN>{Ap3!GwQbaVRq@8pPb!p`nFq~J*d;euXcQP%FFc{HLZE7 z!->b=_{zbD%=z#8zn^;LFBdfEcIm242leV-lbKd6fEL(leYt?_8*>cTk zJ^#7?)VrwHZ_i!2;Pn6WIpU4=T{dmC?%Ikz?_JVkM(tboyRUuM zb`}3?GxWKwri>YL^KJW2*njJr=C|;euV-Am)3}-xV(uiNv><^!5;zWL0%UO0Nc>Jx9Cu;puAFYdE;8M$fmO?K^Z zN~`S$E!^?7*B4wc_55jvzCG^VW3S!j;U^#Nc;BJBeRkAor)~1X0cY*ll?}FD^u!tU`#p1OuiY1H@pIEFM>MW!aZufn zU5~l>@e3Yle#O>zUUj;HPn)U)k*4 z6^&nA-KyK!{kk6V+NpzDj=FZc2E!W7|9OvHKDu$EQ-&<=+WGFQ=C^9O$?zMW8*swe zC;!%X_FIdVUi;Or^Y35t>ze)V9&paT&tKlHxO2rdzx3YQV-DT({)<1{rP)`P{c*r` zt6%tg-o>v}?~=?=ReatJ79@zoh?VL-uZc;DF8kKI_ecm+rDh*P}jL z{8yLOpU;|o#kkW~S6=Gq+L&*iIcbk2KfZC-Q_XsRe%qZ-G`O$2!E7Mo(yRFCkbY?}~`OeSRY`(?*tF9k; z(Xv7Rbegv3Gml@j$-K|^IsC8le?0x+X;qJId(Wa>H%(1?c*#kZben(wlbg+b<)?$* z_~HI#d%bO)e|qokVMEVu-}J(J))uWe|Cip+|JC5#-41)} z?WrTa+~Ua7H+$;d8=gC>$BDi7+W3sBdoEab`pdu1I_*D4Z}!WDJ;qlrs(J5^(PbSy z=JxSFAN4}B%^FX6@q;5qP3$%Pw_f!|)|z+05mycE{=^k?Hv0F~QyWbG&*Gyt`}^bj z`?Yy=<$aUSzy76pKaIGpqV9;lw*T*sr(E;UO{Z+~;>9Dk?pg2L)?4aDeV{xkl*cORHjtNxTLUthKF@cY-?)-7!bAFgTl#!k<^bl`Cp{b$CX11h!| zJ!Q)|M}Kqnfuoju+^g*Kvm}_U)7%>&*OOp9Z(T_1Fg=4(|QY|Nhhe<5k}*t24Z6>#gRlyX=jl zDt=$ue(0p$ix+v!#b4ZV?64NKs(v4F)<&}$tz7;@!^XYN>GaCPpVznh^qFy^I;yZfe$_U{DX6*cKYn*uRcEgz-HZs54`b+ z`9C}|diy=vHY}gp|DW*_8XfcKu!p~X;ueqj^OBkSbZEHvsN;A0=IEhE@6fzz|E7D+ z7}oTsk7|G1dgSEtE(dM+1hJ zYh8^3+d{@3nN>!_;rySJP=@2pKX{dm&RbN1*n;hsAiy!Pd+0nwq?pSka}FV4Kb zecSIw-G6+myH}08=E;4ZtUO`QrZ@L4d3BrjKI;C|ziVE6?BKh$E#GL+i^u(3w*N<$ zbzM01oGFbe4)}Aylfyf;I&_~7I}cvIa^=pS?%IA*gJ0G+t~+YGS~c~*xwg3Lkh+^~ z)9t}~HuIQ9GoLOyxpiraPUAlKyl#Wh-~F@0gBwqt^wiQjw!UlpZLNFU+qJm+T|b=u zV*AVY8h`Ke_rAZ~=vRv7UU9*w4y|^0;;zB}ENI!{lLL-B`l@#-KHYvq$B%dGd{^1+ zTmNs;nAzR;ob>f29Zwm)Ytil9ZuFQ2!@h4gdCy(nIIHKF>yA9^_X{@La&3#Rwr+Cn zphG)d{6MeDQG*5y-s-X$FD!hfd7qI@ZoKNIQD2uo|Nf6pTrlU>uj`yy-g5IJYu0~x zS%XHWt@-+vg`>KBaM;bK*7^1HZg*_5^x)C^mhDjMq`G}R9Og0G-tpYfXGhl`((=&X zT7Gxh6`kL{{<^Kp_N%k7Nvp43n$@6Thr9Mar_b7!+wL*z);3Faf2iyF`Kw-gd*0Xc zT6Q?M?~(Ukd&~M&#bwiOX@CF9x8{6s?dYFc4K6rQ%fr=<^+elqm5 zQ}^Czqg8z`o6z=^DbKb0^}yYl>~qPvjccZ-HHtfW4d)F$9c)|}d&GXuJ>Tx9W|itX z?3m5*yW%CopU4&dPHr#_^J3Kd?OP zim_V}M;jYT&vIY ziU%gY1EyHKr*iLijT>}XM7gB7#$~?|KUj%#X{u=*9}PFUGi`O{xJ%$&_}$ih>^1!h zwit}~+%sR~HhJC)d`>F)j__jQT;Qj;hdviO$2qXPIKC;dwsP0FJO9}!ahu4X#e;!U zmFs|+;l%radm-CZ4$%dZ1lRx8aM|Hw&6U7~)NWGuhCIdMsmXhQR|!`M*NXEoa%Xd8 z?t6DdI6s99x6AkWSa}Th2=*GW24HpMg8w$GsF{khp2%I#{ybNqHkD;1jBNOFa0~0t zx`$hi3-R6ioU8oZvCg!P1A=S6UEj@V#t86K;Uee$VPH%_sl&o0R+p~BXLiDrllNeMx2ae8yD<}S*S@=+ z*&OiL&7kovVe!O<&t=~(i+9O4=TlCf<*rp`^5_O@L++AlbHg%+*%rSG_mA_VVJ}wB z#3yBv$QwdhMJnr1j9M?Tg4KA&Y`y4w#UR?ggee~`acOQMuXS!{@ zt*hW`3_*W4n=_nry}j$wnMCnHa4WZ4++EGhyu~HlvG=NAF<8Z6g3}`VV?)g*!UNq) z9uvZRk9)ki^K$HQ;X2p1ifEIpDcCl&E{gRDe`9_FC2ow4!3cLLZg8$IXXRMw%JtlM z&L6-f=>C`?ce#GKoA2#GHg)WxU2V=VIKCZzUNuU9O1b zOBt75h7oR86;Mi9lxCK#Nb$x8FtPF&&T>p^SjKWyhzqtq$1{i@h+DgDDL3D9EafS{ zP$7m7wi3%a7uItjWVpqlp^O7@5ho5tH6=_(;ylA#$x5*3Sjq@HSpwpb8~yzyU(8a^ zweY-~Sen|+`Ejm+=ZeB=#2T{H;&s8G%gy$2x^&fkC7VWO!hEdaXULG25eVB$SLDYU zrJXRzA<$#u66n|1(l9zIaFAKwzl*C=d4fP2L?q)A7%G{W0(U4z;yWPD4FKXes)KoGpa0xI|Ri-NyoRRqH zSde_J_$jzfpLa%oInB!BV8W1t>y!j)U&T8z!_4o1*#*;=>>Iv(QW#GUX|9~9aAuXI zK(T;ps$<8_d%Kh)0v|LlBbGCNiOn@T46)Bfb=^DrsV%=m%7J3sf=-%yXmXI1EwW4rRH2{EJ>VWonzPBF z0FDSOAYwPX!45XIV)iSqMcy6uN5OefvTBpiS+IR!KgFxCTCS<#3d|6JEdVPS_OrHM z+=1AyGcY$hf6IG*bd2k)K%5IJnbB1>@M zd&H}(v=ka*cje!Nt#~uB_sL@f+IUYD%mCSz#3Y4Rjtnh;GW3g;c^jrgGPT*aC0KYd zYG4BgMDb;?Lt1-p##}y~t3ow*s2IevgW#LQ1i{FAOnfWiI4e~PZDtVw>t76y@Izql z!l9yq-6gcb!lQ(_T~S_%=>jmp)Sb_l*I{1ETt@Ko*$nNA+E|yojbdfVnXsnJV|a4$ zlN6rWidc;sZsO)grK z(pu-t!gPe&UXjN!Fk%U0y)p4%MI2zvYKwCv%)`#H zEkk*>9ndcCFBvi4l>7sO$2NvfQB~{A4d5#t3Vu)?1D9fvm0dck+bZFPK@Xo0Se^|X z2T^d3=Mt>Iz%-_oF)X#;;E#5(2IIF^1l>S_9A}sTF$-r~0uN`<7>BUG0!`TqbFQf)$!Sw+FW?hqD3G|F3Nmm>4{Fa^vr%}f`$?Q>6D_65cFCST zUkDO%#1Gayyf7HvG09-`X{8!Tc2||wuTW&Ya~}mhOm1smALk^Cr7Q-#G65Tsc)eLl zFqYlQT*g1}NlAoe`dY#xlS_+tjzJkCG#FQu=A*-OrI?E}n@;I;u;6QKq_HC8er&yImG0P2T5$rIf%o;9)mJi0P++<#=VedB{@4iGbaFADKQ@g_SNVbRKaFb=|4|0&hSpPfJNXd?y@auq;3jrJ=WrV5^E`&n zl~44Oxp#VBUQFCtge~r08$4Io+6yoS4BBHM#XODSn>mroc*(X#F0#>v93&kh#C*d$I~ zPwc@HV{A@Vd=gB21!YEQ88hU940HoMR#F>48TBnrl9q4S{n$I4$aw5n+HBYGB0m)X zjN6hpRd68m2YApi@NQGYL`dReNyxCKh~up2#@-FvuH*n}C4zqZ4Oz@o7Bf6f6p);a zQs%jc8CTrgSVUEE+F>`qe?D&D{L&%f3IUgAp3jI^8-Rmluz{6843XCpn;2pl3NOZi z8--!h&MVJK^cZ8DC5~^DSV<n}*>oGu69D{gbNdb6~*@lAI*|Ym%)&xn(QVfL=hPEgz zmej643r|nWAR(p@19M_|pp*%o6^9bqrg$2&Tsz4>2HfI-F{fl!n~j~A6pv9{MAnM= zAmna=3em=;%xX4r)v}4{^lO%uM=&RXAGw1Gc2Z8Hha_BJ7Vrc|lcYnPE4cGXrnZl< z1JHp&q+JfQi~ARGlC_d6O&xzULtjBCVGwWVfD2d^UPgn$e5#6N$WS`DJqF*H1#{(s zfuiML)L`f~SRoTwsH8voo3NdQZV$p9q6AmX?5wtJL{59WTviBK68IPgwh=h_C{b6S z0w{;WX#Dzkd9`i*{|(}+?KKvj;0>2H+mG2CCwe)WCle4Uxxu)8(N|X42VkfC&PJLG z7|<`Y^WB2hOAszhPF{+R01C*^W|rAe<_5H+rw}ZPFk7(L%NuF#T!H!=rTF4mJQ5pf z4f~Z!X_OW}tew$~GdgG3f^=cAAr!InGsb)vc_~WF^iE;J2>*HKt71U8fWCygxJ&g2cFfg61d+p2QE z#$uQSBRgQGYUBz2o1#q|t12y7_R`V=D26_{m`2^AtU-Qah&r`0E#WY2jSpeZi}H>jMHG^1fUSa{mE@2W!sc2I!_Zl`u?n!8N>wOhAKzStCXyHA#*cDB$%to z4fp}I1xFypcrixnXkGkB1!zIZW0I2MdPr3uY49k+(?L4kxz-E}L>;Nv@dOVE{O}l1&q|3jj5#8owm+f`XVYcOk?-H2 zaq)_Sq>w9O1teWuL5xPwrNQ_{bB+{0t9^B`=!3zDBV?jDF$(z}1O9M)6vo<=1Co4y z3u-2rL;w`T*&j+MaxGC2Pm_~2z%)_FtnnEozMwQfVo@@a1;fKrx=T2bk=i7-B?_>a zORiuv7aiQI1@oYFVzkW#Be(VXL`86sUJ;9AQy#^|8=ELlrV5PX#tf75Ko0e;gw8~- zHlP($R8}yzl=+JYDfX|F5dqyK@QWmKi0Ukl$!&n5-6vYd6&~e|&n&-l$ekaoJS188 zi6wHqjS_6+i!G8e0)pUS__GA~3E~J46UT5Qg@nrhHR`Wu9iho5qK9}kRZn{4jf;Jv zl>lhuTRDZmz|xL{iV;|>P=`DY@T%V#N(0tvU}Py+c>QkR9*4VZco^58I=)#fsk)tMWjBKqw6ECT7BJEjeGi@gj1}T;KQI!6SI4hFZFJh5Q6Cv$(Y} z<#_oPku)guDY8#F`PXBtTS~s%Ut;iyTQ!CCkEHd-q1c`Z>m1sVWm_9sGkOK1dz7Q# zwb{QIz>aOarH<12euWIZE&+yZ;WG_|DQ5I+?m#e$$t01}$CxH*$2339i@hvP{J zM+=Q+3!08MJJ(SQHrg`ZQt^%BJy1&tVDsqXT1%tlbTq^S#1nocNBcv<%@R<<;>;d* zWBbMbM7hPFSX+y&fC;?_`4_DcHLXz)pSI6C9!(C~lzFXbGBMS&OfZq5iD)tRgtUvP z7wC}3@NdNlQpc!~YPeOa9e7(ZH0`Xv_(=yw7KpzXx&36&zrESm;QbUs5Q)a04 zNZA@q!4%5rW#WQG(>6=M%PQP(ss`!!O2a_Nr$A*IY!4X^?SR}Fyz&_4PYO{($P$)` zl$9jIGH>s{L=Xie7SbX;ED0_%=cO0f7#^btP-KaUOTp6MTxtI>Fn|SIP-Kc7y^LTS zDiJ0|%M?f=6B1#un{4`EDw=Uz7PU>kKMD0vS6EWBS1+>tJq91(-mHixKPm}uU*>QM z*#ntA5FQ1#6boqCIDIOK@)NsJ%&-hM6LA3YP$HmOR1%W$?*Lpvj+s(Q1k5(fO?-@{ z6_KFY@&TQ3Q2Pa76aA!FS*5UVkSl}-46kkt36PV-3#-P+T9?sTNbN-iM%jU{Eq6KY zrGBU2b#{2tzWv=@D*PBifKdHndt%+~3isDzFa^MiR=*g+h=F zs3OKU++LMRh-YSp!)YNi_m52EQEErbVly|^RC$Svj(=e!<<_yER0+AL&81F8`1Ds| z;_V&i2EdvZh2kVoC`l!@4YX8@ob#|HG0`1GB^3gBRF(@i_ZX2Rd)ls4GqromqugTI z5fmW~mbEEqfGLnaR3t-VEb6Ja2XNtA+u^ptZ*?hNVs$}yNrnmq;D;TC9|S<9%*diF>`AHwW!74211P^72F4mWy9^f36H=qcc~ ziv&v>CCNf)ei_cG$P%D^S(skQ-mz!!y^eQjd9@Ak*i?Ke2@i@&nPR(Z7vgQ@6 zXliny27(HHB@I#JD{T>!RFw%&03dy(8nsAm9om6a(HRSlPs28FfvnonDguOM8BL1hLiGqmLgK0ztOJ_~ono*hkopns z55+hy%pcKS=$&lb}Uz7JmBHvqN_1N61?r5rdF7s|8GO}7OAo!0s=lg`` z{1_T1-jQNH;0NQNYStS|ZC`I@>HKzmzQ!`aWGoE15$j{R>=`hb-DvYb+R$>OZU*m2 zF@~b9Z%dXTzXnGMR2XIFZBrqif{wBxvW#En@1bY(bB{r^$_k@Feyj*?t&X@MfKae9 zQHCX&roDtd+1#E>C^WvRtR|uY%N6$Jv_Mo9jbJ@&UQhuI6?DSIJO;#xk$i%UWI-Cj zx&&$XN^UHUlxuD4{D@vhf2Lzgz4)3Mu&Q#LUq1YG()N6M#{JM1$_G>6)RN+a3MPcpHhgBS_%z20#@oi-qhz1IK@D)1~yJN zMv?7JGZRq#b!`x~s$~cddJKAD?-Dqv7lyCO%05SrIvsV7H9S1%uRn%8^i!6SWSX^dQlJ?IoPgN!_L|c zT`09g007`qQnB=`{u++(S_V?j7b~n3g+rOd)X-v!Bt2a=M7pAcPOqyrWDN>o)~=P{ zD@k!=_$o=tL!nstUU9*8@t@L~Xwv{k6;9Ju@D2nZa7a!ON)*>j3DixwpgxGF{X=n+ zRFrv8ObE}AQ)x>x1X-?PoA{KPJfYN9fwT;$NLyUU=GY0)32$LaB6^HXSL~;u)_S)vW@=qK>`Yk3`Q_3oFCWPdC6{$=V9-j%&*CVdlwnn1n|h)L zgG1K1_a}x6iD&yU#h0+8qw+u6@|!H zWaUDUKHVN-<1QSk?4}4wLk0tUe>iQSU%)aF7P5>e(uIB~e^ieF+(91BGHsnmso<^W z3-^Msg(p2IQaLCM0jCTMU_?XG<`i9C^~QSB+m7@GEJjVL41WY%U zs{n%t!LaHZ%1&wU1O3uT0lAU|0bG%g?5qsMuwIN(;fG4qyK79n-oE5Sg=%}IZE#|n zcBvx-K&hm8?KCn$Wf6iSIBS`qZ%O->ryPjHfkdhYW(>>0D3$QimZkPAqga-*jp#!+ z20saELbD;V&n#f$o3BxQ(6~jFYA4_VE9#1PAc&x72V}vu>Cv!6F#4nlwX!r37(fNi zHUfs_j#d#+((H$~k%*uvpjk2zU~o5&5qpJslVvIiz2Wo z&J;uCYpDsdOnKXs$#__molsUOq*JZ!cZ&zH$lN4@hn(6@ zMkOG!AL9+c5fH-aB7aKH%0J#!MG96AtO91#fM7=;7u(HyDIy0X3s;3Q7*1e!s}8^% zs8$9oxl;{AyueyzRiKqhKx|Qk6*I*gdnYxTR8#D3psLt!K#(ddwv@dNagf_85W{K} zIKbP~p)aVbj*ZgtgwD){a6%HSGS;+Z`BAnO_4H5CY~Upf6oO0r3ecvAnGq-nctxr$ zFG)wv_ScC%suqx)fR|b!-4NWbDoK*Y!Yn7jH!w_rHyvmYTk@#H4^*K`_yhe0)WLXE zAqox6p(t6>))VCj%&hXHph@h>`ln$^onV!%lE(mPLbqj(-rMT5cat3y1?6OQra~94 zZB9WZ&BJo9&j>UOW;?OG~W!PsL69F`j!J4dz z6pPZBU_ga4Q;~)}-;bLCZdNnV3f9Ft05h^Z#z$DCAwYcwBDHWCvsyxk_M?qT2a&WR zK$+eQn&nuLDMK`jmAxf*8w6PtfytgtE(Rzf=#!o7w#d(l#gr1uE8QAmD}H0B$tNkp zcsVVA{$guvl-yaE8vuaLDsDqyQMLh#NJkV6e^KDF5@AGOD###pz=RCqp^gg;6ac6i zmd;;u97JEJ^_ofzbr69dESIVz4hr0GSnq!oqdAwM=2 z6^|CC!wG(+;zz);YQQUrMt;yWmA-D2glGftc??*mT?5d8i*z%nhrx#00n$upvyxC# z$b$9#z9I}co)DHUcq^fQO0YUkb!TSo1TtSEe9JM^gEYCJ1_HasfJe%zzy)X@nr;X7 z50FYXN}7^vR|`zBM20cPUwdU<@V`iqo$H0v5|>5;H(sl-j3#7gm{!VNDWE(pUz~ zR)~WThj!426*|Zvze4vzYO&xk1}@x|r==?j8XY96s|`Zj(~wi6uPU`rN>P7N-;$JM z#WRFfPh=I<0?LVitC~Y*p)tzpnFRk% z$+7^jPz`_)y+IgML+YeXMu~*3b3TCv2{7oDf*-WP2`mQ9Um@8>>Bhyg)lpW$bSebI z+cYAI%~mortEy=jQ(o0T1DL50YmW?Kl2}sf0XN`9&4o04*@hyC8VhwEW5F77na(h# zZ&c-jxC(gMa6ZSMv+^s+L;$NZCYrf~x*RCMvwVkShe@F<)L#t}HkPM}YKpeZmAV3KIPIV}UVe}TKtKY7)s!-AtM+}~I3^xIkURa$s zE%a^a`ofnf)}@iJ0lt(xiFRtAlC9{7YgV-uetJb`AS< z2??^;xM;@^eMw@`29l2Gtokb@S&;{_v1CLkkF$jeLYES*4pm}7x($gDIFap1%D`Cj z?bsf`CMrZJ@;l|q0(c;07L-aoTEUBuYI%lakP-CmDib3`y4TPRtv3cO$4gY6Q6pm< zJ=TC%U?4`tKCxPTi0x*t0%r2KpqhGWjyDkAbzs<@qACp)SFpNNMdTVqeM?VNv@yz) z2($svth^ktK4oxAO5Q-5&6%HqBOvT)TbE`HrG6h#%8s@TQuU-PMkSZ8gd9R1ATQbr zgg}5=f?w7H;xTj)InL1QbK-SuQNdz zLSYeuO@Dt8+(0S|cTC3snbkjejICjS)5Vmv;CcR{W(a6WZ(sUL6wwgBtQ<;(u$CGQ z+Lk3E6_lb>x_U52^+<|vKp(EBLkgh^APL6y7)(*@HXu69j210cZi@5;lThZB_E2g8 zD7!u7I1-B?!!yakR;Z>25*OvwSE1Bsziny?EdhmB>Xv4w;ygyhRVq&FQ(-TPz!q_! zkmgj&QWA)Wo(ZH807WViFlxjhkPvK(P?;`Y2(MbXIkhEBv0>VR{@iXJ-jQ*A7aOcDmrg|<;OusJo{ z@`7v!G^CRreq&5lu031s#`=m>!-qP}tfKcxpvd zG>x{F2+*J7VDL)@O)&#W)}E+2!i>;ctQ*}TbQcsOX9iUu5VlN6p_CjjBk3f42bPK~ zDar=dsbW~i^l*etK?0bsN&|AfM)^pq6v`hJ|G?c;Q+K&NmHf6d3uqEr0?w)o@KPn5_R#!w(6?C}AUlZWbfA~V`r+mVP zIe8~%NN+^WbP?N}`XFUkD_lxVLMIjIh}9KG0G4^EQ__(_O7u4%$YUrXV2y?3KjcVZ z0BVh@mh%{1-CJvKQw~7kp7NR2%nj#$>?&drDJ~h4z^wa}E{L0uh{`vV zB5Y=5M% zl&*%zCngly63S9brtC>c##dsAEa%tkBcg`(Lx+nz!a*nC%EpF_0Yd8{Dq2xwtDHkp zoe>fRd=RE&Gy09zFI`ws zz^OcKAyVOlgl6+8BSWZ2sTvq!Al0e>l@w{!3$U;xRT#)GI^BFtDm=2JlM|FZ^-**_ z-ByMl)%2%57qY^nB93XbkPJZuXxU_0+Lj#Q3s4q#EDZcC?KT7z&>F)_iKH})fV!-$ z9@T?c=`TW*CF9v`#0zp|N>c@uOF7?m)K4rEN6XF1wTqOz36nxpn(G9S?A$^vQ{ZuC z$%@CqJVtO$p+_hXwspk(m*ikoDVVT zaPS=aQ)6_yBB~|HA^+&`R%>uU$}Y%)(h#O4y+`wNOd)flOrf_&Aw_J~HEIJ}w9dwe z*R*Kp!XnMnWgxweiW3xjWhx6Y{K@x0>}g|6aT4_?f{(E+I*{Mgh6fuY-^lJxtLbaS zRRmQWnZO4tTa+>wIl~f-xZ>!B?o~Vv6DPDOASog)doIY0da?xF~ z0D-Mf@hI&{^b+(h*vHqTvf&5kBrI~^gtVM%b4c|foV@0Ys+i-Sonj$!(okT0jfuLc zo#{kFaRngS;NiizJGO7-;4WrAYO91AFtx&)V9+Yj z$z5r`Q+$VI+kzu0O6HW5wUD-H2W|*JhDu(`u~54YsRAKsG02KpBXSj~KTDS$V_c{< zScj;H=PKvh;i#O|PdS5DH4qc{#UH|N1f^oM#WA6rj}ZcdOtO>0p2Lm4BeZ~CfIQ+! z=|PL0pSe+I!kG)FIyB*7Wfrye8k^k@(6FD50OL!umD&xzRBRiAFM4kTQveBA&;?B@ zw*16rSW_IKa{G21LZA$W#yN6O9*2^oP&vz68v=%?4rUhVCVu{gWP$*we9N$QlT#K> zYr|Ypwu2T5*=czMjQm1D2|?K80P734@|g57(qaT(rInORXuz~6f>}=FlPQ2fE!9x5 z4g|6tbz5{#KM}bsd>vwkP1BF+P=#u{JO)_gK)OKV-(*;i5#GH3!H(`>HxOVV{pZ+f zPKnBK3d!1M#4AlyG7L>4XBVYPJsCnWU?Lu9_ofu!afl#APam*C0MsvROY0sZZOZzN z-fH^D$q0l=aDls3MoIhxl}b?rIjHDQ(h?7d&gMKNEK5$L!~{10V>=aguyL#zOEp+m z%!(xBF*)F#qdBG(dayg>EO9a&@oAwIR2-U&N|k+!3m0T)4V8H&3S{>D@jxMomJpiL zMd~-+Oei7iv)W2Z<$i9d zEazvD64GIh=4h1;D3Hhj2}xHeMNezmbNnIE!s^PPX`PUy#f_Y%)=s1+L)<}^kSowt z!A@jEO)Mk9LX$7P=5QQJnpQw#FRlaPb}?|SchF;l2802j3UE=J@)#kEW|uA-1riir zSBJ=@k;W!tRwm>hRXlYgbh!*^+PCS zA}G}Y$p!^2allzE1~Em9VGleXmZG*$qA`fH%sVY(UHTf$7b;Ft6Uu8Im7$awKrTnq zlzb!(LxS`W0s<~<3wuZnCCzuzrnY1|fL==>CxNO-i%_t$E+p~WK{kz(rFo2+ZjNUP z9KlSgDLHa4!MJoGT#x7i?>P4cSSmb|71e{KUkW$EEm~Wk&MW|3`Y%+E!u1?S5erOd zfgZue9s_Qr&q{pI3M5nbe8C*mwSJ?irUX<_%&ipNSs~=mE|o#Le-1CI$5M{jbNz* zgXt*{a06^7<%%8FfQ%jxI%1K!m zPz)m=sIW^(>48(jK>slrSq?z6|FxmxHuzy$SM7CyHGR`+jKm~jYlwRWFTEv%t9*^k zWIzEIGGl=~i8CAOlTnqCTTKJRBJybzW^W|+z-mr>BI~fIpRNf6aImbXEVeQ#g{bCo z4ZUrCbV23F!5uj{)yhg_!UE7ASe}!0%Ku;}5DCdpI0?owQm(}SuH~AvA_&hBr735y z6gV$8Bkz98uC}jb7&XZ-0^was6!0A=H;EJ)7|afksnW57z=WuX`>>zx4SOr~Fav4w?_dp7iA!JOR-X^PNrv=sLxUiBsNAwk#xR<7W$zo%B;X1)_H;fQocb zNRU(_;85w+%%>Xw=L0ePhoUXK` zZ_DZeRmBCUO^AbCAvI}JssXOC5lcZGDPjXMmdXj;6>@_<{I8NCvZB+3rmj~(F4htf zQMf<~-A$C11ludEAUTa}?vM%F5pRj~piv30mX(vm4V!bU-d24@=2Kv6t?H5Jr|lfs z)NM?I^1~Ij5q$};&;UbrkO-Vaz(zEIrMp}oIxdbj(1T$QYP|U3P*$$R(0c*-0X#m) z;?TL}70aEcREG5cEYGfsG;I5j1gJquR0d662gfxUA_X#Z+g#)$1LFwf+w=tZ9Ou&0 zi$d?V{9xx)@3cZ|Bd4S)87Z?AP9}#PQUm-+%vu`-m?wZ2pgK9k3bg2vWC$XsIFvle zvG`#S4w&LCAq(B%YG1-$eg#0!p$6lQMNv337iz}_kbQ(N0xola7(z|6c4C^~L;}U7 z;U?2Wr|{nTwt(iOHbMcF2$_KP-vF4ak#>$yDL`(Y( z(V}f|nzNdw*oq)VXT6es1z*J?qL4=XP!*sCsD_T=E(HX&(ByjOZGa0G7$9p_zq7eX z%(-Vkgi%lX)lppz$Tnp+83q-8mN_6mJ=&!=XY!+_n7I$pmrt++Xd-- z1BL8sEp@qo!d+C-OU_IX9jtL^0^2rf1yV>P6JmAGskVx4M&WMFC??vXaFV7*aRkNuh!a!_8>v{WS*X@zQ)&R+7Z$ABNXB+x{` z8ww$*`D$TADy1vA#G);fu^he6!2n4dc1r%k>C;yW#nB|7dd-stNr}{vv?x)oUdVZE z(6>-qlZ2FoZ5^quN{loW0UVaQP3HYThcqn+x?r$inDV@?b7T#i@m*>`)&{@}34m(p zI}@Z61#{7ne&y;6T8cN`Q;s71`9A;{)gp`rWcft}qZD>janu`VLbw8HK*cLzn>r^G z_+{Zd9U(c)t2B}XZzQn~@lcxg7}YBfBt4a3Sg?yYK!sJeh0(1mM7ls(6Bt2Xd6#TT z@CIZ=EA6ttmOXD7M=z83Xt(J$kCcPLgS(ZP8*2;uOffTB8aAl}rt=#QG=j7W6?Mdj zG^y)o`;tbZ-r4Ba#o3F(itsl`WLX2?Wq+6DT~U1J}6WoA8>`R zm(o(&^Yb&K+`vnztF$JPdvG?XJ(&-lP#n;IdSJa)~yV7Utjvo-7V) zXoH~(M@a;p+`~n~0TP4@X+9c+*x?YLEovvK*aIW!@u#=B^aUS5KK+Z;;Fzp)AudI; z?9)O|fQFs}w#}d81$awopn94!Q1>*W7!G4Wwve~d(5I0Ig;jRK?#MY#prDlSIvIiP zL|fDQqjgoN6qbQH>=4i_i@jG&$XhMi7^f{qRa(L)X%m>JtORvx%I; zbp3-NKy;K`5LetVXYd5ZbQl9@fH6@42N6Nt$AJIB?}UGCK*pR9AI2b|LHrjK~g|*p29&uTT;9cE-ZK`F$FXybv9g{AshJ`fl-u6jm=p& z3`6pSLbB3OmY~~0{lGs4rQ}pVE-8$1lT~G%srfj_QD_Pf?v)qt9o*IQsx1*C3!M|7 zsSX!!mfKL!G4HD9K--VUq^KROK|J)^CPRLf9J5Mn)YNBmYv?hu)FN{QBbzX?DBVlH z7zU91hjL2W;uV~#4M0?M;5S$}$Bzm$O8iLsfvE2uLCG$6*mx72)$pP()exhI2L8eA zY48?9ohf$WD97BC0TS^PtnCp0vDb1EkOog9O1GRkZ%LMbii1!ld&7Xpwqb={K;@|U zdP1G3aoikBr-^%8o0^7_DolD(=t62A@=k()FdvneQe(A=IGThcEwrsB`$inmY?T9X zUJL=(rI1UV;KC4j5IpLKa;_>L_$oiA2Sue!Cn8J%6@p(j8R1#Ij+Uk*7AaC#hfwT& zgf9683`OFFB6H-3kdj%LbdH_mw3D@~Bdc)R65?xOmG-1c*KY~4WFqa~;=a!p4ed5X zt~C1s4X}cGpE9Y2|ICdBYm@>ZPg90iWOqrniki7fMgd*kc2FT2X71jzacp_y8bk-I z3|M3ycNdDbDky$CB|4?R%#sx&1PeGGqb>;3oPH~f41Pv>xwOyZ1|s?kg#xSU-TmXxs+Lwvvcc%7Tl=NL_2PLO9MQ*)f`i4zfkNp`ScjZToX1S1o6 z6_G19p}5A-jSw=rr}_a7VTplm&yCcGf~$ons)&@)UA;tf(45r_CRAk7$W&-c@L4&n zjH9MbBup00#ikSvr;1H#A!*_THbrW3FeqSfp)0nD?f{t0ghE!o+il^Z^ zhpEM)aAJk{k;C=ybrQeZ6|LC~>b(h~-sXNDkt0nzL^+=ef=Bzx_-V#T2b6|U>{_nR zWg00XNSI_PjXCy#7?yogDA6Z_=Sr%a6caAx#GJ(p?Z8qUaR7O9D+NJFI|H?vh$aF{ z8gdZeZr;+_Z!nx3&V@0WbrMq2sOB*R46W^j3J5e>aaN0#6TQ?>N@fcP^uwbQV+c7o zk-~HUy&?%(q#e$pkv9D%{nU~~>0toBAK5QzE9hPpJq?*D)PYqTt=*gnPxmwVg0@Ie zb5hPq&{YQ4N~-vqIxm(&MXp3NQxn6WbQ!6^T6!rV!_ap3w-BXf; za$l3Wxr~w<;v&nCv8W}L8&qVW1r-y zl4+z5^|h=wFlj2SASqWVp!!M^>4cd|Q!Y}1rc+0uoTbw|Wn6hY$9zdieka$|DR${p z&>KfWv11%dA_)sI1Pyc&sJ7fg*!60G8-S2|Uho)k%j(jTso<6#8+=~k(hTB{QLG_< zBSAFQ9gC!6hiVe|OhT2e8BvOG4+*A4l3Ha|Td{=&ZL?a8M#B!vv1*csi6JFLg#Lsax}w<#@F@@X%rR|5B|nOYfbh$s zR!bx&+fcR3u_VDs;oW3;cO z&VyqC(HK{>M%dfF@*)Z*-WuG|qeaQb|3o^cT&WYv+bX!x21!+lsVPiEgCKtBS ztx>$yF8`n0ZPrj$OvZW(!G_jEVMmoIrn%unDc{-)drxV?(Ks#QHlBkAV7U$`m3cA; z{He$cF&Z2DfZmLfV*a@c!q;T8!p?*Qd^XCPl4H(yfIncD&I@XpbYm4Ns%!A^c$>w3 zOmY_vt#<-xlxON9f&d9&D1Z~SLrl^AS=gS+Fcy%>1Qfv#lutF1m_jS0*pvUFwMt)d zs&8^`n^Kh~5iQR^kY9(c^qHu5N_`d?_JPE!v?cE<8%X{68evMn2$J!_&fDZPxJs!4 zfD$&al+$t4xDD6PnOz+?xgbI?5jns^Dg#}QI7|fq-7}?7RC>#za#_-FmdBunDTb?o z5r{SP=_df(-A?3<3fq`6s=Ps+)eUE%sjsIlrwVI9N~AO&e_y(Qe;_^BT8Bznms9LX zcMaj+W0WE=uUzA*V~TW(xj?Xm6EI#}bFMcg3M(E1rU-@`A!JFF2SiJ456>Z@tHD@n zfd!BkFs_?G!Xx7pj;W@$Ms7*=l2F5DQQh2;=%c|Jh2vacpcZWM(!yn?2`MHIkjL3T zivUZoPh^ios4THr(vc?}+siz30m zx{|J36cUp%h~lpN?o@_^neIv;gKnXdO)HhakbEuY_!^aL2-*U+1hRyExcY{gwz4(V zNP7w?O)I{E&%uG597xR>(zZo%VBJw>JJ3=#tV)JzYG5sXOt8)gB=9~0ao{xYBEu_L zco?-eKpQ%T?JXBYZX19&XX`=$POK;qWIJh~$W2bRvjeaxUZ(LR&2~j9DykHC8MV49 z9Zp#SNFLoeZ~>G9ptP7d=;}|w_9VshbXhkk)FyqZD%po}YGJf;B!Da@OsQ4KQgA(T zCHJIi4`$O^a(KjJG|}h{%RTn;rw&bBY{Fb{E?i|U^Hc3h91Zv`YA(#7af zBp@_aYvrqzqq0bhZK<%T_c14neHg7I2#5YH(&-N2p13?el^jm#Bn4-BN1RH>0tDy*e1%%{^c05B_@ z%8|BqsUnrNv_WbT3U-mA(l~`b7$FQ*e-s>+ZjFfj>DTeOkU*M`d<~8%_Uen1y>av? z44|22A;8|=QPL6-#e92wGuD!2(T=3W=B2&}57c~5Vfdx3MK zh{+_i%P|r5G%3@4J;GkXoeSL$w8b2y^On|c0n}=RF{WC=2w2W|1z{k!SurL6LzA&% zyO0pzPCg4-Z?5vHmlQW?n;Jj73vzB@l5OZMzDA&uA(L^VBo&HXF6eMPBmDv-PbG9B z0&zWOywlorIz6o)a3dh+!WY~#e@EjcCTQVNr$n3$Jy3F- zvRY(1=l=y#y%-_)V1F8XU^BA2S&~@`eG2%e;ZBE-n4NBEu+$HLSCIqRkNllmSaP+8 z)=iPkulgEQJ( arjJP^!_;@trCp$t9n5BUfxx2(gguD+}M^h!E~p#5;ATloEzRO zMv>f21Zo~S@gf~SA?YSf_N2Syem`tFdY(+;>Le|!!WAOzs-A#Y#RWel6%++>)oj2} z;habeqem4L!!Q8#%$(jylLBeVzVO%67hkvs%4rvIN39%HFNetQ-eO$Bx7%vn*}s8i2(pOn4AtOwFkUP4-%nPApx^WOiRNOcI(=VV2T`4 zRF5Xlr6o9Pm28zeBh@&~fpF{Guk9;j%cHr#g02Niip4e_i9phhvZ!1UDd_^jXw%=~ia4<-E%S&$uIDgsNZE7?+NDu;g5nL4!PeIADyJSuQEZNACxN#0#unJtgh8 zXbnmwo)&ce!YK|fAO_?PC8d9`!88??Jp3oagUeYk(K8ZkLtVhPLlpeahe7;yf7%4L#E9+T--tM;jy z;KEqpG8&BDY#D*TgEy-criqynmy1+VRkOi}kBJC++mfvuR>(28+)Ruhk2kQ6w_o8k$1^lW%DzQDC8r44lUxJYuP0gNQ9|Y3PIWF&M4F1RNSm_G+>Zwy&W} zDAm4*cI!Pe%6d>uQtI!nQt^lUEdf_Eh>uo}%>_B|gU6(ThKr!`8YW?JjZnGN(t~p> zhKgw*rEdQi^};r!;^1qx0~$>s3K7sUOd7TdQ?pUu_Gr?Kep1~OHhVBMz-qG)bip2R zg3=Ghq}LS;bdVMxLzL5EAhfHrlOq%@ML5Ex$(6<(DKF$oa{~@iF0>Tn;u75x(pthm zFmyUp#X)*JNR!46{8+CAWTKA>fk8uaJR~76e4Kzv^bWtUe{|tr!q;3j7#l8Xhd@iNQS3z7H5;G?ny{o|kI7x6 z<{Da7_mKF>7HN1l!O>?$8k#CDtDGfniIUKa$Ph6#SMzH3!p_nw25r&~?bFz@kTDYU zl({5_!liSjUlx=Nb=@Lu7;TQC8Cd}~cc$i`h2fytfKm_G?K@fV*FJMm(wCnVR0f>S{~d) z6j?~gjQ&zEmmn6TEG5&V6CIU^u^s$J#$b&6Zo8_hF!P0(e9b1l)7Msi`{(mCRt%YFeRAD>su;YY)1 z-}xGs1`(b>USuIx9@Fua5vio8oODIXmaO%c8p?B#N5KsXe#y}Ri5-2^2~2Q;Qg!5ugD8N|O-O^? zsLIqHl7z=V)@oXsvxA!fDRR329-o<<@(RUEhgQz<2;kO))n-Tt+RpXlz6gpyp*m$l zxx(1D>AbcZ6mLN4+%@3}-YjTRu6?QKrLv4}h5SSU(cqLllr~uVlU_p; z^;cDDwcdM-mFUx)uOpfHTszVUH|yF-PD>~4Hn}Z42swMjYK-g(fZ<`jcWg%Q! z58)L_VIT6T#HU=NNF{WJ;wHG}z8En@K%fz7P%KmrF?UdkByF{NH`lhIm||>dph6r9 zprJ>z4ImW`n7i`q0p(onf9eHFkyZ(vot0STMt^DqSidKQsf7d=LIlo^`+5sOh{|F& z^(0{VD?LO0Y^vf!DddD+h`VW54y7?CH0jhLO`oGgB2QV`66lZ4G+0|XW@h$skz^cO zv{6}H!m$`^)QN67NHmlePPcfspf?IOtqTs!{X~Tp07XraEyy1i6N@Y%Da2#KmHjC_ z0Xl}Pm)Mi|ufdXo#84y(bPRCO;sHtEza}y#adO6(`m_<{M;Ohburw{X^n0s@ zs&tv*@R1-z)wn{0wzr}s_{kI@Z734C6mI(iw#a4SD(Yw_L9c20w8Dfz$|DE|WlBRn zPc9cn3Pl*Teoag{+M0TqfF!%a634)1RtB`{w;RbY-U7tt`#LSYy(8pYC zmSbMwM7mw6+4M&6KE0imx>REaIAxbtV9(4!@)@Gog$pmA$ z9)r6<{t|X>p;k&!ycaGb+n_-{K==lXl`4ff30J*J4_X8a*+?eUIHlJt=bOmHDX&5~ zn#)LDlA8i3Dqflz0j#W)B6nN=RxP1V)(0F>GRz;p_`k01CRFqDtmF9qf6n>mJm<`F zGVhsZPEL|(k|sl)G?Oq1HbxC7LqLr24PEJIO4ERpQ45o`lujyQ1G=b+ML|T|3W8wu zaiMELP&Zk+F%c^2LKm%{?|o)AZJEw=<~;wG`@UX&*YEmW_d`o6rJ;`XB9g@@O&h-` z{Kx{Gx|C*oK@zWS}vnir5-{7>kw^2h|Cu7~Sf~Yc7bfNS&d{PY~hHOb!9- zgiPaJNCGGXzs9Kub=73r`LH5VQxb4k6hjFwb~LX&tw-!_YR^ibkh6w1uv!XHuv4FK z6-9gm2ctsgu-%4e`8cTOI-L+?FQjZwrl$gx>ZEB_0(TDRa=`>WfeyEcv`{#VV=l3= z%557bWW#e@Rgta~1K|xX3ZSm${6PvAoC(glZ?Zyg@zcbE5gm9D&>5k`be*A%CPI@9 z)G1I}8E+uGLjE{C%oDyrYLmGwir@{X;oBrrz+D$Yg$kB)1FsLk@txYJz6+q?K&?R~ zrL`oq0e1qT-iA5&lOU87R^A(w7E2YMOY{07kiR~Jihy~tJ(Yu)UNqKAfMCc|yrre| z4)S|(FD+HQ251|jGUnDq8l{r#*OW-=13Ry9g3d&8xxuZ>%w${oq|m1-u(-jR2vXwB z^^pElH%_y2_3y8t&wYL$!_!dw?EGSVl@r`Jdw5=F4R2hQTqZzS^HBwO> zWTNRRRPF`~(m+snbAGrLTu|=}4gz5W5vcdrlG~&*Fg~D-VTPN#17@}s-%uY2$O;uY z9RY$id?U;kd*oX@l4{OIaR==!yuY$1klO)-{M_m3gFm8H9pl&;A#%+?ko%zIu+W;z z0w#tu`an&r4NfGf(GV3c2}m%g6LDDap&5n~299i&tge7nJYd`#T+-MOOKq07(IvNz z;zcM`U1;khV7kPtsY9s(IU0U}(Q z2C7AiXwasj7FMb&mzh_ZE?kNbX#2=WfqSJ}ss*JJ4Q*w#>WdXqf+habjzH!FBjj#> zk9dr|Q!h(f0*>3HrSrh(XN_}BHeGZN5=%hB2L_3PA`n6d(5~Ms;JPN~R{0?f?10c> z+^GU0{HIHx_1~fOD0haA;Ti4Kn(>f|HBdbFr8D3nH*fxkkBY(sfM{*GnH4dAGFApb zDALqf6PqB5cnQatPxa+!`um7et()PWx|r|uHc%fyB|xce&Qn`Hd^1t0(h?g)WCDo` zPkAar!+c-AYD+UObAv>rYD%XfjQS2EL=uSa0(uCxJHGdGAsa3WA@#6wdKwwbd#Z87 zpJG#`F2QtEBSV)Aa9WbJwbu(ln;2sTQq)fRMKY${vCSAr!*vvWoTdSxK@CA3mb793l8rt&J(Cz5-PNDKq1F?M@0QWjEXDY(+H;cfs}c6&U3+oP-)a?p%Y) z!jLJ*HGZh$Qfr6zMdxs?7GhHT_%z~f8-gq4a@Zn#Vhv35B?1BxgYwPZ%iwZjIn_GJ zYM>ncUT{*bx;$ZRhZNdPZJW;1*M>#QP)M_9QO9kxpqRIssXDpNWx12GoB#T+g^FUd z?{FG>6F_*R!;b_MtymNt$!f!r_DHK*6LVzNC)M;d0odBlBQ%6LX*%#t9ATkiNPV>fRA~EYvAM zjGsd+gwQFRWLQQfA9zB?1@H|Fp8`arbbx}^20qJj)6I4=A7nWOcJ!Edd5cOVCPY(D zG}UY|Bv_EGw~;sk{Q`-4zR?EXg2>a+l_I)PG!Z?9mSZq`1K{YQbVP_TsZ1nxSOF(W z@v9n!t|hJJDTFB%GK$?>x=qX=rcf3h916$sWQ^SqPo0}OC5=N0yA}xDqNwla09jgk zqVj@B$S`yk+BA|K#Wbr^aMW#Mj2cK( zmVxoD>Rm5E(yesYUCo%mdv>Eq*YVweLE;vFNd%ZqALEKW0DD087LC1nKnsu4qfnyv zwM~`Bo)s!Ycqy|TPEkd|`4t7CI3xu*>Hhf`qXDRRHNrwxQx0nZgVwSpm~OGhDI;87 zwP(?Y=7ff(nXzhssKr#%AV=_>p2b2pmV)YzY=d*;suLubuF5FVQ$=&o1qXe=0G0O=tLwPHbj_1YBT5}`b<#?si)(}mr6nBt(S3(T{X3W4<7 z`h*~_trt{fUzA8E{V5DZ7A;b>q`m1FYuQ9LQe6oYR?2}JLU{dHBQ^F`n+U^D6)juy z20PuH5ir+uP?$6MXkANPDq#e}DB}>5@Ouw9&QAmK#Q^McB`*9X(3{`=zjq6SGx@p5|QaU z5J2?JzC1ZwD-tp{a3s?sF zr0l7Aiu%-)wySQ}bRwy?s9&QkQHY#xbjvW0*aHG3Q8U$GH+-kBL=B?PK91_mrPWh+s_e1N0vPMPCVM)s2y+V<7NR_?; zT`(u1v)##(`sVH=2uj^fk7zZ#jg$;#yayu^6J`jGEhf}h`554w*mcCUJ@DbjkyrX)kMn1 z6ky7DdH{jN)T(Hp;bVtI8~Wl(dw^1_eYBVL^tb_efSz(qQs7lvDTo6`#Bz9P#X_8F zYyip$Ua`=?KTVC=XPjZtyc9~oHjG+=sER{13Wa7*TqZ&)2tWcqkLgGhRLN*iKojru zG&tOe;v9_v)=AGM7y3p@Xi4ga{d6G}%rVy;%0V#S%#f%at?rl)OvFIBa8sj<+G_Mm zp-EoG2E{LN-P_dZS5zS$uz#cxT2YWRzZBIWJ_{*C=tv_$&_>O!_Vx9`r`^O9b!3Yk z7jwm(NVUAAVng#>hY*!c0P@Sx-87DBA}wU;s7X)#bz7&%)twMU6+(K2$RXnuY*)_& z8fy-u>J~>v<1SgM^66lx!T=`C&d!C^R1v;92x)Mg7|GFe)QEM=Cz*bt4fR438rokX zW0L^j0t9N5j4oC&2~*J-6;dpp6fY91K43?O!&EP6bHk0AQBk4e6h&dsG1MrMlz*@t zR$D;`HeiH1QKm{MK!(szUl@yY@JHKV2?eye2G|w7tcscRwz2^+0yAcfZrKkOd|*b5_s0W?1^Cm^Xi}uVnIuxj^#RfL)C|J2bx2;QX13yX!hx6 zK*%H>T}lLW0tmS@s}#M^p9G~6sm^(ehKR!fDQ_W5B2-h4m_n0jlxI4Jbqsky>ISt7 zVA!D_MxZgssegErqxf;~{z~$dDXD>!eTtwGc*BljTq1Iy(fUAxh$<=<+u)OF!A*+j zZsa4s>AW1rX{WF~}ZqQrDLDM!atg*?(428ZkRjXWq| zkXc0)AufW;{nD3a3mQ|2Wr1ilo$AaTbb%2pibey5h-%7Id^xcxrM5s(b6zO+H27lw z*V`~bu`c2xGE6E`{6b~uEkMSOV$3Xbr|ly+_>!H=%JG3>0KTyO?=GTIs@I8dR1_ z!fi%{aMzp&mI&q&4>$^=MRPQ*cLjIYFfzND$3hBXN1aYLTRkW?sN3W@%&$7a(`o`~ zvB>-OFkPF%Mn@*!tV%bMiEBsML7+Ik54OCcEJba zh{ClLL9hdE_q`QXOU7uVo#iNsMZ-ixO@-+vD40*r8bZU=d4*2VfMUCwL1==9Lx7Be z;D$PUrSGRpu0dym;t+nt0eQ1wVW_b)5VWb|U@ab;q1$*7 z5Jl`KgrhI657gJ9(?u&;0noH63419YFUGE)Cf*rkWzw2<96)1w1?h8R|sR3{3G(pq4ruY_R&4NWKT$FZqqaS?t4#{D!+ zfmkKO3tak=3LXeV{O58_EWH0-QD-i;IbVL;WOv zRGYD;C?}K6DzfCy-li6^CJ+Zhp^N5XgGHiTaaBcLGGN(~K9|6gFrgBg)wqpq5yEk{ z>C#YdHUKGOGm$A;BPvIDzluYkGOA!;h(*^IO~sJfDkUg*;v-y?ng|v%ozwRYyB|=b zVd6FnM$e5R2MX>}I#SKNY}E4~1) zs8p>sRjNVJ$rpPY+!YYmqUrc(q_9+ldi#P|5mq^gWTOrzcd4Afg#|K^Ii40x6z)Nf zaVf!D){|4~3Cl%Td^$fd$ZZJsEIA3QroH($x9B>+pC^VoZKNd-xI8_~q1_&(m0{H`j}7O)mipxqf#-Kw+Y%h~vP=Mn!bwzX@TG9U=ioghiN1IO(hi2?0dC_KOinSTlq5BSVm%p> zyrf&x4@H?FC(O~Q6r}Xdb(Q3Djwb%vv-J1E0snRNVG;ldbDEXpBNx zmKs7y@S`QA|7cL108RTFQOpH8pd*eD@CVSnw|DOAio`)i!uK@6JQHui(Wg*@2{BAV zoP-0#yKzSA51i+x9n!_m;Hf}0FHCPC?1Pzj49zMnBW(*0gY5+UC?NHMxU7WR$f$%bPTm5?{4+c*iejx?EZt1gICAd@ z_{nto>b=eq7_=q#{BqY#mzy&}jQQSvlt4ZO||HG>9MW2QCmK46ER) z4FsBs7p%qn0=~Tgo+cK{@wzams}7*X^?3&f+oA)@CS;QqnfyZ*7A}Q%%bQfyqV>gv)*?jG3xbtsj|z8*1| z3(*s|lamt8A}^}lc%809O&TcKX(Eg>ORFQND3UbMHxL`X%&?fYfwalkluA1AqjM_h zkdcRWRQWwsUv%1}lYP{1v|0S2z8hu)o)MCml9(6KMQ{Xifrf#fbb!sNDgRV@eHp6D zspZODNOOH3?kyO=HAWfXB*{Q%=Ie|Gp%-U`jM4G}T5XK&L1)z?z{xphuyb};0Ii?j zRQNQOg0`eVgQr?Ud7%ER9jsen0&q9h&0XX#Og&vQZv?!|^r%UdJiuaA7zz;vcN+p7 zfr--t#~>FhAWmpPaZ3#jPKKWsqqvh&nDL^H00IF+c%=`Lj5DD+QDqF61on2ZRkU0tLj-B13x)7_l`o{6hxYQ5H6!yrLgXlwlQW<0N>| zYU!I#l8O=l$r0Mt<6D05rN%Y@U@aN}S#XEAfDDImSpzy1LIyjDdcls+l1B1%7zuBp z!PH@Pb`~v>7LLHnGqlJ(0wbw|Y1|5vRFYIJZ7h#yfh`d3ZFT}~(U9|^RSOni3L<94kzk#9)meX z2#R7-q^c6#)S10%dpZYN8$3f?nNUhl0$M3K6e%Da4ogYGF`P`|6Ea;VC3a{l>Sm~h zY`5Y{FEm{$??}A`VHqLQ7(-|3Oqc zJW$!b=q8Y&(>XAC%0iZI9UYF-Y<`gjHdi@oDOn)gWi1kPk8q0+fS{g+w;5INBuV4-5xIcmv&bE^vJwS)4c3$&&C<%RjVTf6Vy-<}zt-u1!XchSw=sY@*La&7zk z>2*(*{pI4cJ8vcz-rJsC9$P+p;-_|h{=R!&Sgvoj7yRVT`v*q{7p`9)AMzZ#@Ar_u za({dA^fFKRt@|C1pS{}+4_xkW`PAOLnxC3><}1r`bYiAUeJw~)!L6n@w^iJzG79jsqpj~^Wi-77q~oLye>lQ-t8 ztLe_}ygui_=6lQIH(X=){jctgr|L;h9=!bV!T!yjc)oY~Z)fL=)18C$bZOam@y(|9 zxU=2NPuQOCTlNp0J!xAW`QrT#&Oi37f1K}af52aS!zHhK^mC`rb9vd{n^yCBI{#n+ zrpImVv>Z>HWqo3Wd)UL}Xm7hd*q?43>>s>&V|)Jevyb`hn=bdJOSjSe=;hOYI`qSz z_3Ow7E?>7m)7^PxfA1b$-P>C&N7JX5=cd_~S%O>J)_fY<$#j0p6*p~($DjA76IXfO zL+-7-?{humS$}ik!9!l`FV^1n;&iyIr=81BUGp%f57FY?+CKj9+W!9Bvc5EIbDo!T z+4-I2aE~oNzPEkRMbG%vgP&ja=NGKv@v^=(>~gw0oML+>bG-5Jf*$eJZvUBlVZOL& z1K(wKY|j;@oukd#ip=vFUbHYu&&SH%c<5{GjtyVk?xJ>N*v|Hc$4j4C_D`%=3;O7C zwB9nFZqnOwndUvbfE+r#na?ZsBi zFD5iuy`yJtwkmdGd}1$`Trby_^Y;Dxt1nF_S57~6`L|tUbNc7!eDs=MPudE;c5K1- zSq(csciP8?ap6+um5#M;8yi zU)WkNm)-2Oc6;5E-#m5uTc>aLr>=K$boytewej$_l5yo#4CmIeUJrXbo-Q9eI&3m4 zyteG0^FBN8&#%0>ckctc;Syuh#-)A%M7#CC7*7p|jrN#UUcfnG5?60NxND!B?jB6j z_RZDw;WYjD$>AfbN6t-~^|D&co9C9*&7I8)hyHwS-hBO!=jHHcb~a!C+nvMx)y}z{ z%^#e(u(O){+L`xm93TGdPrvx~FTL~L7v6j4FTL5fZy)~X;j;SR`M0OTW4~Rvxcaw0 z{IBnxJ@=je{Gach-F)=X*M8@(7ytH@e|vg!xJn+crSbBGmFiHR`khxdU;Sjy5B$*W z>E^fo<|{t=$v@ry&e_d3esY@r;^W`@zxjJUoVLCXLzoE3%|CwZ!~ftxKEK*M_aFbd ze|+TkU)+53@4MH2?D_}KAD>+Oglh=T-jx=ceh;zxls^x9gvu^3M<7=HUZ9 qc$2xc>&d$uv{ARslI7fYsPSGX0*rwBTYLB9r zRDybu_>iRHv9*|Kihe~i1?&W$bdK3sT9}>0!Y4z$-Jk{I%hxzuI+5IZP38YemH#!Bqj#5VJYl=f{FKQ5ww3>7E4Q|BL;7U)Lxp0}zIJ2S z3Y`L;H%TawqIdDkzoFVW$d^fHbJ@ZdJciG!BJ(Fa5uk(l6-8wWOxjf(UGbTpQxB}^ z=*HcZHnT$8@!9wLKRmEg(1FByiZeFY!anOLwgCz!v@BTpf#LTy}YO&e9JyQfmx?u5#GKA5v!gtm`-qfM9yH1V5NQnfRE>~033^=SpL5$ zi0WC2Pb-sTAxG>PN-*>iZ{oFyK}|!W)V((Yk0mnbFj8l}G12iTdTNib@^ocC z?cx2fhC!kbyMSO5j}Xix2+T{T5lDPA5-Zb@)B!ny{ro$@wnDivJd(=YTeUe1RHtK} znY?PuTb=^G!ellUoYswM>y}_fW5KT6sNyB|33c(#Z`Y8i9rkZDrw2L|E!MYMO*j@# zI=)bA-Rq5EgyVE1PRI69x$5g87fR(gw=Au4h9i-)u&?S2eYJ-3$*wHkO{MaNyp1}r z*YBmneno3wCS>Yj#fA7&c_852z{o|$uv#)Cp2QXGs;St+iC!c2K$iM95Ti620B_Sb zWwA0*%Y9e1H4<+)>^IsMWApkMyEX^^dI0CQ_={u3|ghvk5N>EZr>fm??s z`$4+I{Q~{h#Qg(f>4g1iEv(^5sJP$o8vIX4pJ|8$Av$PZJh1C2as7^B;LYHD42|GG lbKU*L>tUZOR*Kxgpo`82&9jSa0eHB6S$yBbGmQLtNt}fIg=aRS87F?ipZ(i@g<$l5*+4PY z2C=+~au2Db7!Q)X(QFlfjl3>nO-X#ggEYyc$OB0*QK*ROMZX6GlaA5BRMljd?qbHp ztmT+arGvSFPy-UodnI+z{S_7*!!f&9bg^VPWzSUAS1i|RwYyI>^yFMitgzk#RxQ0x zv8E%#x{d)hbPTb{CAOGmNN9s&yOw`o$HlHUqI^>ka=}CF^}Hpu^apGIQKSt&)KX{h>p5KTlH>M&A7M=p*BL{?Nl#FscJAWmJJDTwIL^9_k})~i+wvsA`~ Jgw1khe*xOWy9xjR literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz b/pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz new file mode 100644 index 0000000000000000000000000000000000000000..f4ff0dbaa1ff92bce1b6be92058373060b67c571 GIT binary patch literal 625 zcmV-%0*?J3iwFoa=zv`U|8sC*a&u*7a$#*{Uob8*FfKDLaA{*}Y-IqQRNre8K@h%6 z@6!6Slu(F>AP5qb9v1rGL!q=Oiam_9KFhL~+jNg!E@5|14OrBNR)htGjDLy$oNqe2 zx6x~i2yP(z<7VdjzMYwUAAJ25cy{Ram=-COQGX;@B*#=p77YX)GSI6p>~^0j`q-XK z;ob*0g^pe(vP{%u4Ek#2LY&JrlCl9Ub45i zSuJV*E%bW5gC98TU%aCr`DC>CyP*9&V7S*oVXmuJo}(9E(>E5$6tp-tI*-g;*A_}e zg+f#vNHrQm9Po{vSunocLP_d?^h0dDT-Mm8sHdrq2Xq;+rm zRVjH5H!r!_m6!(#p1k|+>aFD{kvvPJb$w(&&_pp{NQiY5C4yr&EpzxVcPlnV!yjsT>L&YaW`fK z^>2p(Y!QYcJ#{dSP=Y(y}Oc>8Fp5I z%TVT7pV@LUU%ACcc}{HnTCoJgJ~Ed~GgmK6nM6MU LLm3IRz61aOFitCK literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz b/pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz new file mode 100644 index 0000000000000000000000000000000000000000..b299e7d85808e540bff2ea5a9d688a677677289f GIT binary patch literal 521 zcmV+k0`~nMiwFoU=zv`U|8sC*a&u*KWpZg{b6+qnGB7SPE^ujMYiwlzZBk8d)G!d; zQ17+^Dj}++%cVj>NZSJum0HAs9%v5;B68@SE6dKBbwt)1`2*NPRXDT=$+r&wl^cwm z1Z_(lDU+GJ@w|C6-^j0nwO%~_an00}nY37Fk(!!mgGT6581VNe(VsN#73_Y8GZ_1I zY89+EStX9DMe*qa zQ*0)5Q*3++esn!)D!vMna%Kcpqe^_C??Mdm*E{KunyFcUo&?)LLlb>tnYNJiJTZ1r zLqB2E*0@ZDw9Dye^?)soza>vuZ LQ#LY-Kmz~(w%YyQ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/pickle/test_mi_py27.pkl b/pandas/tests/io/data/pickle/test_mi_py27.pkl new file mode 100644 index 0000000000000000000000000000000000000000..89021dd82810861dbfeef234dea91197396e20b8 GIT binary patch literal 1395 zcmZ`(Z)_Ar6u-T@p6!;Z6PwD7TRsEv2h?Ktee@rxtj}jd-wJa zQspX|v;t~k1mZV8_@U9n1S9GfW459J8?Eu1HYO^XXdpEYX6 ziP3#UVrvyxC$Z{9@oMLn|V&Pu?$ zDXz0}rkyhr)C9?M*7-jiO=6^|I9^*H@MqGxslCR_HOb-z6JL`Q1PB3c=Yd~EeyMwgCEb}A=6M=wq{M$qYbwD z7k-*N?>3ybM|Pk1#a(;!LgKA6pSj!S-yR$2o^T%{Soc!b<JE34+1007t#D! zQ*UtYXSapDa_Z&QbMDUB*V)O>$rEL==} z>FkD;ofl}YP-_GLagA#d_WaAY6JINZ-D Ni#f!xY|YW4{{SjH`hoxe literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/pickle/test_py27.pkl b/pandas/tests/io/data/pickle/test_py27.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5308b864bc0c728db604bac7b5007c1c0c916e4a GIT binary patch literal 943 zcmZ`%Ye*DP6yBGnCWuvZ0(MF=b>%5pv9W@ zqaV>jg4zh`Ps&0>7A7M}Yy{F9N-fH==p9jrf2!;3Mr_pmak%%K@0|OcbG~Mq5Ja*U zsfkAaXy`D>WDBw$2!;|326VsXLyCez6sL$ny{u}AE@%|aNuVe)3p0vy zSxW?3`9n2$D$QD1dn5_)lD0((PlLAVwXB7;62NqtwL@!@+wHFUNseh)pz-YX@7L9L%U(PnRRE#`)2FkSS z)8{NTOM#_Hm#_4C_?z;2i8b)WB}vF?4e{N$0# zAhsbtTJre2F?_YEzqi`b2V+x*kH5HUoIllB%{={R+-XAbXnP;n7r*Z}P#g>oI(Odw zI%>IL9bm={EKPb)H<%F&8z}OXA&q(_iIJd^zxh+0mTs+v4 zUH;$=c$sbMJ^i>J#P_eTP?nfenyR1xagUM%Gmrt&2LT`KxkEBCixq$fq!tKRfD$ls*@0}3 zdXN|g5Q_mZ3|s&@0;Ue+o|k_a7(nV_YC-z`gTZ?U_5XeSKPVrhjvJ~Lqz(i?=7Y=w z$?-zjfw86p26; W0mF_VG{jBU;yKhUskuNAAOHZej8AF+ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/spss/labelled-num.sav b/pandas/tests/io/data/spss/labelled-num.sav new file mode 100755 index 0000000000000000000000000000000000000000..bfab052089d7e62d2e9747c51434cb3a42156278 GIT binary patch literal 507 zcmY#!^D%PJP}WrNbn;aQ4hRlb2o7-!@eB^}bPiVV4OR%x%uC5HFIF%z(lY=-1vJ3K zz`(!=#Xt(o2GI-*kq!oC3T~-M3WlbJCRWDAR>o#%dP(L2O$FJ90ytP17=Sp;F~|T2 zjF^FvAooDP2Yc?2jLc#MAj;261=+^}lz_RH9moc$2Z?b2u^14;zy+X1Fm)h1U;bra z0I7$m1?l?_2Ja!%|M&I(pnQ-zZm3$2IuHPv4>AuV#|vc(0%@4LZD9T$&(7B2Y!Z@L>oIanrSU4s}Z^Pyhq~H&;mv literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/spss/labelled-str.sav b/pandas/tests/io/data/spss/labelled-str.sav new file mode 100755 index 0000000000000000000000000000000000000000..b96a9c00fcec10b33cf35b2d3d87bef396fcd3ad GIT binary patch literal 525 zcmbVIT}uK%6dlV-j3CMfA1^4V55|6&#DcJCt}L_>&PMcDID}vt8Tja5^k0Ne{sOam z+PTnkXE@xsbI-kJW~$Mx7uG8cin|Hvd#y>Q*J-TNxTmSzYQBs=Dbe&eo{naVIeD!M z2a5!IN~xSB2ZcPtQ|S7n%{#em*AF}=xb&szzmW%vpSY+TyE6yv0&F zx95qW#OC<~8Bv~fa_=MFqYq~VW|=8iv7zYTz1}JXy=c+5`^6>;yUp_3=FlBmEp(WJ z`2cDsOq?NR_wT%#>BxMbc*=zM?}M=iP(Nd$`J9<`1=Vmko0xjdsTCWLl&s`{<3k!X RufA{##+Dxe$fw9>{Qxb9P$&QZ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/spss/umlauts.sav b/pandas/tests/io/data/spss/umlauts.sav new file mode 100755 index 0000000000000000000000000000000000000000..e99cf1267bebebd16bdfe881579e6e319aa10986 GIT binary patch literal 567 zcmY#!^D%PJP}WrNbn;aQ4hRlb2o7-!@eB^}bPiVV4OR%x%uC5HFIF%z(lY=-1vJ3K zz`(!=#XyRI1w?>Bq=S*Mf?H~mf}yFQk(G(Dm5CXeUXr;$b3yij0LV@-;9zB70OBym zAVZLOB0#~AjLc#MAWFj2vPtHTZYgOH(iV8Q2SHUQqzHA3;+QaUGV?_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/S4_EDUC1.dta b/pandas/tests/io/data/stata/S4_EDUC1.dta new file mode 100644 index 0000000000000000000000000000000000000000..2d5533b7e621ce311de87989c9b7368be4c86bb5 GIT binary patch literal 2997 zcmbW2XK++i7>3VXLPCH92%rQImJ*26kfkI*_M979kPvzgq&E%8nxbO8I+h<6u#E)U z+OXHCSSPWgu3blG)UkIRdl?IO?suP2e!w5{BhS9y_kQm==kC3G8$ymsQYDT$jK7A; z%G#>N_SUwh_N^+`(ooh~Ro|#$_J5gF{14R)tp_A$U{{Nl>Mi2Q>c2Lt2 zwo6vlv|tOVRT+Eip-MBH31L7WlWx2I^U zXsW6?xI;P0>0v{=L&^#HOj3@2LA!gZ&_1O+oopRS#Z3zJUQ$;k74{B%U3~(r`zmL@ zKc8mxONruw^q{Z%(eH1$t0PknATI+B4Ekn}WqIvxJy^7-kFfM}j>I=a>M=J{sPs@c z%rZC986A%ADCMO1`xqg8PmdIK>oEOMLA`4<=N=9O>);5ey&XvQ<2E!^u)pg&P` z+)P4GhEt@z)18XWfzwzo7e=U0hcn4?3Az})6kUQ|Ci@Dz%Y`v_g;2RGx!+caKklxUdfcp$xo)$T`E^n^ zX1y@%Zct8N|NGpioFt!{B*)XG_&2lu7V#GqmI+G>%azl|_gB!bg4NQGyR|~EtB!g- zY#{eo_In)lMz|F=!Dh)ZCWby9wvf|`ZiDUcgkXJrVz3``67_BD>ty<;z*8;#=ixN; z>ExdwT4|5x?@ZBga~A!xsh>msx$wLo$DGgn1@J=4y@^D@MS(WkEfb02=nnM7LSrr= z|5C}@;a*1W<$+IM0e9l-pufx3vrD%ZT#3F)e4f4C!v>){Q2KQ}UW6Wk4N zhPNpDwoqQzt$|=>HA>VQ!zS7dHDX{pZxi%lp)E{P(FbA)(Ses3*am^pnvku$L43 zex=#~fBiHkcweW}?+5$SAArt)1K}VicpnbNHw0$V9}0(&Kb-yu^hk6V-zYfR?%V%e z7-Q-GUW~Qe)zJ~nl6oRK4&QjImFn+ilXEn=6VMa!O@fo*6gU;;z-cfSM&NWf1I`qB zdKNkl=EK==4x9@MU=(U-pbHD(JUAaN5E`>k&K-6a1$9+Ke=+A?f-Z(jxh^H>WpFw5 z70j)KtKe$6Jv_Ze)>CFJTqlg1_2g`Dg6DQ4{Y|ix{LT2bpv%zZtW$xmv~!V2L|37! P@z>z14RUoIzIymCo1Z8n literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata10_115.dta b/pandas/tests/io/data/stata/stata10_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..b917dde5ad47dfecfc872b5a5ea756ce603eda59 GIT binary patch literal 2298 zcmeHJy>1jS5O$8A1PUZ1puUEdy$cahqLH)WuL-&aD5zH6TZgQ)KPTH9L_#7GFF;Fo zqT*?I0-k{)4TvVpc)ZzF?v@&f>}hVspJ!*rW6yZOI^|T7IVIsxt4tdSMi*LHEyv?+ zh?)VUM^4v<6rMeQ3Q;?ReYq|m+LthqSA;-uBB4;JE+oniKYU zG~ODHc6PT$5Vf-qHBppmCefOIJ_mkljfF>n?+tjI1^(?4kH24L{$QQ?tH4hJU)x~( zN8rB$hlsqwT$x;@g-+bjOk4T$wEg`$h3MQgsaFb@Cw+Q!#Qz&WRkgR# zuW(S*%H`GTUJb91l)1G%T2<}0q}PDRD|derExhYniO&G97;J4^kz0pbbt7y7TZZUt z3Jz*hDre>0oqMDH$X7&k@xqFvEI|-83C5#XywQo@;DmQ5RW_e4z0v{x1%*yHtHr9W z{$5;kCeJUutz(}ME5+OLD%lU+Lp-_t@gd3a$&0wbX#Q)^3*F*O|M4BxpM7fdFAV+% zIqY=&aFAensEnFAZLsy_Q!61_LNdHpu`3?$39*Rrj3JTC6iypqNFor+0|Zz7RevT4&QmC-{2^3(A*;Cd)bEs)|rAL4!l!0h!$xfM`*EIaM!}IfBEIT zFO#M`$YHoR#Q`W9ZkK%;H=vgc+t)0&L2ctMtT4M literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata10_117.dta b/pandas/tests/io/data/stata/stata10_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..b917dde5ad47dfecfc872b5a5ea756ce603eda59 GIT binary patch literal 2298 zcmeHJy>1jS5O$8A1PUZ1puUEdy$cahqLH)WuL-&aD5zH6TZgQ)KPTH9L_#7GFF;Fo zqT*?I0-k{)4TvVpc)ZzF?v@&f>}hVspJ!*rW6yZOI^|T7IVIsxt4tdSMi*LHEyv?+ zh?)VUM^4v<6rMeQ3Q;?ReYq|m+LthqSA;-uBB4;JE+oniKYU zG~ODHc6PT$5Vf-qHBppmCefOIJ_mkljfF>n?+tjI1^(?4kH24L{$QQ?tH4hJU)x~( zN8rB$hlsqwT$x;@g-+bjOk4T$wEg`$h3MQgsaFb@Cw+Q!#Qz&WRkgR# zuW(S*%H`GTUJb91l)1G%T2<}0q}PDRD|derExhYniO&G97;J4^kz0pbbt7y7TZZUt z3Jz*hDre>0oqMDH$X7&k@xqFvEI|-83C5#XywQo@;DmQ5RW_e4z0v{x1%*yHtHr9W z{$5;kCeJUutz(}ME5+OLD%lU+Lp-_t@gd3a$&0wbX#Q)^3*F*O|M4BxpM7fdFAV+% zIqY=&aFAensEnFAZLsy_Q!61_LNdHpu`3?$39*Rrj3JTC6iypqNFor+0|Zz7RevT4&QmC-{2^3(A*;Cd)bEs)|rAL4!l!0h!$xfM`*EIaM!}IfBEIT zFO#M`$YHoR#Q`W9ZkK%;H=vgc+t)0&L2ctMtT4M literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata11_115.dta b/pandas/tests/io/data/stata/stata11_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..cfcd250f1cd9fd5b3c2a77f1414fea73d407d955 GIT binary patch literal 810 zcmbVK%}N7749?b{gC_+qg2;dr6l~ckUBn)AQT(|mLcxe3rmOGBFa+pinzxAmYO5Glt_&dPzw6_kJKMl=8vKP literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata11_117.dta b/pandas/tests/io/data/stata/stata11_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..79dfffd94483f30bcf6f155711caa392d849f970 GIT binary patch literal 1268 zcmbtUPiqrF6d$94coTZ@A_FC$V4ZA9(a2+55bL25gn-v&lDsy7Nj9CG26Jk`leb>I z_3p)Q(6isdqaQ#Jyu|lrXPdRgia0R){>*RQ|5+%!@g^I3qhW+*h)#n;i>5%GB#%K= z0`wOibH{*Jy`i^YNZa$aWt)wlJ zCr?`)P^(i=RTL95(_}5I50P$ME16EDr;&b)R5r@<_mOt5m-1nxpCUbv^y`iC{9B}; z*elG98=Jnx%r9nG=%IiYt^@?*r7e)Ld=2_*SGz8#>oIQ4Q%%UPbg z2XcqmL~I@k9)~ZV&$Hz*F=y`@ET8_n!GsO&{Lx@-CSuGm7#U{<9vyXXJSa4%-!wyd zZ`9nAlxH-h!Anz`#*=3^qbQD%xbWfAvy_4-2&ixo3hyl2jkH!s(%Qb&J-S`*0Nbsv zTTx*BX5{(VZqU9<`yOqs7l+5sgr5duUsMGbZ_q8|L7orAOaAPdu@kSU1N_=4fM24h b2IX&2uvhlZQE)W8{x6F9jFkneesn(pwkR(F literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata12_117.dta b/pandas/tests/io/data/stata/stata12_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..7d1d6181f53bf2b443798b2842cdb95f258b56a1 GIT binary patch literal 1285 zcmd5+%}yIJ5Oxdnz>Nwa^%hm)S|>;dZ5g?!2M_|mfd`1Zn+aKILl)bm4Qdg}od@W1 zaN{XBaYDQX;zT|6EJ;{6RO+ECt-qOX{_NTDf_2*IcE@Q2J=7gE3JeygEvi;)4P>Q2 zI&?TNF>LSZBOsykK6;8Ct zby{HC4NJ7@{_&j<=OCZa2?HX|=9}>aiNwfpJA{-BWu&K2gve zn7(#aEhnFvhxI(KGs}s6-T0E_uLD3t_N*3(934vinPJhKX9jp_LR zhM&OmFFMV*U-1@hY-Fhx*riUy`*`(L2sCa;480&jFj!{_tF{jIiKO7sykC624t5WF pxnK(=8NK(*@psA+mMB&p6hERnOc{Opn0Ru^=m-;EpVaZ6?+@SE-rE2G literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata13_dates.dta b/pandas/tests/io/data/stata/stata13_dates.dta new file mode 100644 index 0000000000000000000000000000000000000000..87b857559e501e555b74131bce5abc7b2b1d98ff GIT binary patch literal 3386 zcmeHKO=uHA6n?g1#Xl&Bco706;HBM75vy@nAq`TfjY{jGc-T&}lkVbfwwax@1`qAg zqZbd_i+F1f;=zMntXFS}XQ3bz5%Hi#4y9Q z!?1;1f*nFFM2SyuM666^2pM$)P1hI+sUTb(zk-a0iAJpxfQ(TK6|4gQyj!r4kEe-m z@xJOH<^_aH1+!M~;Q7?GB1;XLnW2<LIc&PED$j^@(8qakr zdq(5gL&`pLSmD!S3g2n$I0bzJIf=t2MXQ(g=cCi|< zFn~(6jRYsicxqG9mf&K)1em9{=dkZNn0ev9qYwm{!T~S@WyJmet%=uYueF|95%8_u4DozI6c{JqU1%w+Fm^;Oz_d!JdSLIR5@4 z{xc_@V>vc(JQd%O&!p;G3>|`~^Lx*OsBf(+-1s4s>mD`g-KXc>qD;JfvucZa2aoP| z342l7@5d^#jMVK*eaha(mrErsorIk^gwD2s&YRLGCQaYgy8fV|izzNLLiyL1SBxxV zZ1eEXL(B`+;^*AGYOap!x%J#8Y{C-}$`x&U|KYB&eW? zVL=HVV|18>1c%ILG-4zkZ^y2!;N*m0Jd_Ik1HITSRPlLIhp>B|;0t6o(h0j42=-}` z`Q!*19P=Uv{;A_+=K?RqhnX`BRvo2)U9E?OmuBEiIz#rgv^ys=q0t5*6RqG! zL5f}LXqsk-F^HB;p>3BgjF`O2xOCLyFKe~@`d*#W{W_mDdDj75e`fLr2X%eXh*)FSA1>Syjp?pN-64!i5bQazwBJOWq*qmk~iEYBWKc&wZb4Cxt= z(2gVqMCk7<5N87sp;#=wvsR0&_4=Gj7m8l(Y_S_c8SIx2fqpK7AXYvERD|k?#1RjL z??nQZlBY9rJ?X2_Id=0RJi@~?d^FPBQDW?rqv5gGXekq`xuevE@wU=hmTVEb_QSvu z>tAIFwU;FHmn~)Nl{OUwwiN-YX5_YZ`JZ(r*V^@=S>R6!o{03ZkU1275l=HmJ5vVvuEwteylyiM)UTpUsF}uj`jQX?~VOI zYXMBVJHEr;-kJQrvP zVGXPRxMcw4orJ~@RREMhYBsk`Zz4XN%PIh((s$van;%1Lu-r9hO0W=)m|iL(*!|~? zybL2fRKOSO=A&HDYNYpj`JC!Bd;eRJs78w4pwB>t&?JKgqz{vqd>Sxx z8AudD)$q4Lx&#IUALQc`929Dl#-AJ*>e6M@*oE$Vk?cl4^Ii1ce@M`fN^d){tGvc) znIdNfE=pULrA*gDMGYl;7a5s$7oq6}kPY3q8jNS3WWwySm0Mp`+rf~hqwJEE8OPAH zq)Dg!?81$eM=Q7C(otJ4KKlW{e$pq$LD5ieR(!u?cdeV#O^OfNI;g8;>sH=qc6(cY zaqf-Mvn-(N&e*yCg Br&0g_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata15.dta b/pandas/tests/io/data/stata/stata15.dta new file mode 100644 index 0000000000000000000000000000000000000000..d13e2fa337db39f73c2fc2a252126a1a73396180 GIT binary patch literal 3183 zcmc(iO^g&p6vy8JA;cJh9~VWS6GS1-FzgzUk4~#>R0w1f116e1tkpd=GsVxfv2M#2b3h;8*+tF2X?%qM{<|`mgTl*{^ujNq+shzFxh0 z?^RWgA15qfL)C-@euJ|rH-T@s!7bi8?~%0b^{we$>-9dcp|97kOt$)!!ju+k zLqYyK?&KXk`HS!5c_+Vj^1(TI`$Z@F=jG*^lW#luo|DnddAoMfuh?1rVP*siB_B#U zZ5{{LC;ZBcD%(XdEGghgLy0I>xuXB!nc0@x;>X$~Qp8E-IKR>@`ZaA@EQx~^>sGI+ z6Rqg&%jNn^Ott#bLwV^^{|0YIqub(F3KdwoO!-h2*P(pgM|W%<*wVj!pxgVuez887 zY1CvCGgFI5iKJ8VVy<|dNoVGduPn0p<2|ePP8=Y*_v!Bz+VbtiDprpldlU7iPEX)R zbfo=!*5BnE(b&1;wtdm@V^7%e{ZHlNd)&CO6M6l^u6^LO3z`0Rm)@G`DaO-`{^Z7e zboHC8|N6onCyV(Ddy07q`)?}xJ3ar_znK5ynfAMJgzxp=W0VXwhIEphC~G!l5o<$c;O* z^kfzwa`ko}>~kl~0=(Y_7K7#BKF|j?f=yr>coGbP5ZE1O&yob}1p0v!60B@9J~OIg45tDa1Go53+BLHungP-N?;?{ z0zyy+2J8l7;3e=fI14U=&2zB^upb-+C%{|aL+~Y-55Ft~OThiW2lkX61RSJR!uA@# zL6!MKK<)%hVE1ebSRZN72D^X(F|dA1!7vyByFmwxf<3_cbRXCcuvcjejDv&V5O@)s z1D}9T!Drxe@CEo5TmTorC2$2?1wVnG!7tz%u)hBd{0^>zKfn#}C-@8e4JH7=-#uUs zm<#3sn@IMSyAv!1cY!5fIk+3F0A;WmRKQwr7+isGLnf3?n~+t3ZK;|Zuac?6%2tRf z$T-I3Mj@|?s7aAlg4I(&k>Of$!nkH58;Cclt%j=5l2I*%9zi2o>KaL=Lr#*1I;9qq z!ggyh6|uU?UaF{Jwq_@*Ysl1y1f!XvHqGPPGdkA zR?W;wbQfzGj0%ntw=y(rbd(JXDfud@vk=YsR8#as92%J{^e|b+ zNrxYJ9_IL9BQdP8;z(n?SVJp>adiY3&r~SR%2+FFnody+1=1s(UaDAiUarnY zdrCk_2`o5&LUSy*G0c-(BM%ad*iSX9aI#iN{D{Oa z5V0cip@=7vwkVqPFyP6Q#H-O{yy#J{UT|h-X8S=rP)!<>ZkoP%Z{C~tn>TOv%`7x6 zWT9Z#LK^JXQCK%LXy{QLnY!ln`k^Wr*gI_L0|rw#8#o2kA`81T1wge+gWcL5z#ph~ zYcPs>^{57DA8$b`64R-_SVC)T^}5fYVYkoY^}Bto$B(r^wb%<)2_=RSbO8QL3wn45 zZ*&U!Wf$i+1yvQ!e-dC$9N>JPpy`90|J=ywFF|)7;yhHDS2%=>2byDotidi_4$F94^xx>D6%5)S^_|3C=vml+CgAT5Hu|#T9f!wr*;Ht ziNMZJo{!KRFsx|Aw46=U)5G?oBZhqgMhsb|)_l}wRu!>kpQlR0w(9JF4fSFbV4JB& zPc?>6m0_Y=Jvyid?e|%b$G8e0b(4Ao{%r)sP*U{wpig^sphv zYdOjF$Cp2LFrT8H18FbuN<5W(CBBB4a(r^Uy%qQ*IEiClMZYsi=~x;_0QCU!h|~jE zu|w(^D7H#{3;*x$z>?dKC@V13j|>#Dbb}r=BOz0R>J~|uUNW_ftA*vlN+DBN-bjJ` z3dz4F`8OoLO7d?>{+)c8GI{}zba&#B0Jx5C;G6gslOnIy@K zk<2J<1i&oG&XMdq$u5xWbCP{QvWq19l4O@~8cgBGIE|m+Y5bIXiMs&5eky{x?FaNn zE?wptfBYAkW&L3nuuChQET`_W?(H&kvb^XivY3u-cZgDb=ish@-CX`x!@1?y3gB0E zsM`4g*)`78P6y7j7|5cV+E1CXRH6!Oc8&sD*8~t|e31dL&B{_nm^lipfux!EmH<-h zKzgRnG^=aCeXx|x(o%A__fCK2%lwyr64dDy5+ XJ_5bq*^w)~t*~ubazndct7U>d{*0flEL9KZ}H!49QO7KDPu zGb=e97!)iO{F6%*j0_Bo6bucm42&55{r~;vZ(2@%Vo7{%W^pmH2}tae{L-YHR6Ht@ zDodc!IE={5!)pNpLr#8PIvxv<90O#kTIw03qYB~YsTvxPrbE?&H1!OmYcMn-(F*)t z93--J&H|w1s&JOo8RQ~#8tlpk^{G$mpZ@<3RL8)^0#*kW$3ieNj2cLJ7(xRU6y87p G4}Jh1&UZfm literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata1_117.dta b/pandas/tests/io/data/stata/stata1_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..18014cec77a9109d4fec7f0d3b90ec389f7c8ab9 GIT binary patch literal 1569 zcmeHIJx;?g6gHqDF(SdjfC^Gq+>}aC^kl@+rKk%Bh@93fk({QoT_jk_HMjr=VPjz6 z1}q)80JJQ@j-9p&YNd*W0T227e*Av?-g7`Is;J*ql)yEkZ6*i^#u=52c-{%%jDX-) zu~-;|%d0xz83_$iK!6(rnq&3VRuW9;j7?r`F8Z{6|_H!Nn~$AaBGJA8LIcDTD~h-pPZ& z(wTg}W$El(reFRy`1?DYdu3dipmj|JZ7C-7z=kbJ0RP~$^*e56d1xE(ZOBH$QIZS+ VrxK7#aC6Y%Tyq$1j-34!z5zE}`_upc literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata1_119.dta.gz b/pandas/tests/io/data/stata/stata1_119.dta.gz new file mode 100644 index 0000000000000000000000000000000000000000..0f75d8b92db1481715ba9df65d1fb7382a2b7e25 GIT binary patch literal 269559 zcmeFY2UJu^(=dDoL>!5N3L*%*;9x+4WDpop3Bmv(f=CvWoFqw{aUC-Z0+J<35CK8S zK^O#O1W6Jk2?HWIXBe2dU&B6O_X+#$_x|ZU|NETN=ichBs;;V@zTMSTJ^WAh>>*6u zHKU{V@U-%@lCY4FkQcS_w1TGlP25t)%t#Ae$Az3<2x08@YVxRAWiXFi@A7MQo$N7w zZPffYawa6lshIEai;&2EOhC6?<)LEsx|#i-Ql6onMakH|9O}s6zIU3Lxlz2J_JZx; zUOz^oqq59ZKe=6sgE-9-0-hcOro?w(8EKTKG|#XJ`{)phyf zW8`HH7U^)R+Y&G$2F*rX1ZmO2s`#sAg;z0Zrtbo#bG4r*W}E9|FZ?(h>HDetQEGgb z4)4tVf~#c#In<|LWc$X}N*K#l0$;lZ4HFBRmO|kco24z+sZiM#UHJV}(~|$Kh^E8$ zS2rBVjtTJMn#mZkPZ?VosO(-SG5Mh&vNye9NbksGq_s+btK^qSHnP8u$-qQI zsCRt@$D{KA*v2utnB-HlB=x~yKXu(Qhc+hpWjgc?M zjwHs-n%U~(o_Kc{o0;Z1S3cJ$!_P9+goo)f?{uH@DcX z1yQ5i_0i+G9-~e{qP@fRdI{>@#`NxUi@H%-=PFN(HFzX(UoTpFRdES@b4xAC{gtVl zyI_CnC3Nb6X60*gzE9hgBgI@yR@2WiWZI=RZ%jr+2DaGx=8G#vpPWl?p16td8@}aU zenzn)V1QUab$aa)q_-BZc}F%i)ae`X60XN%(=QobH<{usBpmL_GjM34^DC~I)K`ee z6`8rA9FeP3(B%_y6CrwHdefJzyGeNool9u^Me}1taCl-#!O4C!Tc8mNaQd_;wj~ntoWfw*HRG<8mcs z&fHu*e$93N)1I}ZL|*s@%W8`n;;?^6dfJ`oG2f(9&l~5~ygH7dg-_awEyc!v3)(c5 z_i@SW`f@vz)_JVDFjplqewO&!Z!<(TwUNaIHprv(9IKw4@Fkj>1ZB;!oY3ORT2fzK ztOBhpD^)$xNfVBrjn?{A#7>!MSd>aP5ckt#MOCk?3E<}Cs)g|==%qU}=?GmDq#M_QM2)_DpCPv~&BCx~Y#TK$TMU2reQ zF`MCDFtvIj?u%BpG02rex?Jqzbk|F9%>777PvOSa1o3k|HUFYw;3@nErNdMB4~mqh z@E?>yPvJi(o}R*gP?kM~|DZ5?3jaa*$Km9 z|0#cV1lsC8{HOfM5%^N~;Xmb1jzD|ehyRp6Hv&Sfb^j@Ub_6m)G_?;$#K@o|oVBm_ z#(XH0cw}$Q;d3}5^uFLQ7loV-cW#zh7vR`K+&<(w#xZ4!ZIEmOtAeJomoY1_xK&N$+0{&2@`Zhu->N&VauJm`FDf3&+%_ zCFlc#>+KQ=HxY*%5CKw!d-VWEBQE10`XzMk6ix?m@ER1ND8uBo)BF-NLm zyb=BqC`M)ZCc4wxVv;i%T92dETepR8gVi-A`KRjeJ>fH-V|af=ImubydT5fbTxa8H zl24F$Uu@$qi(-6t^l(iK?`%{~w8Wu_s2qFkPp5j0gw(%xt@oBhJ*Ya`XQi!Hc~lZH z5I*xUhPNTgDOuvs$0(-?2}H4t;d_((l#qHW=X%QFh<(oW#oU}XwTi{8Z#^~19}2Gj z$)%pc9r4AvUZHMRUwHpRiCCh~v=L0Z_o{+aM;p!ot6Qh3dCh+sPl0B~4?iqTX8CTBx`dEvs{n%XR7sOjwA%#xSm2 zGIazdUxxKUT^<{T!Z$Zdu`j)Xwx-NT*w0!`pVKp+2Nrr}!RwtL4H25crhay73$XNx z&&qh07JYu~*~`ae>bv={uPNHukYdLIF~YCp2=(e|c*a+S zcFply^GHbh^wTBmkm~E+6V)t|G;9Vf`v`S?biIq%X1i7Pb-nDnjkl<*<<`c=a>K1` zNae%2SlZXAFHQAZ#NNcUW(sWDwS{%~G@NH@tZ7mC?S#OzGR;=swKLVExDg(s@WMS- zB#RuED&}D`66`SPxH3+yr?`gtZ{_upU@kKfKZ`sqJ@Q@?olJ-(alt~TQa@DrX)t-# zIux7Q{~)NsaqIFIw_%mGtR>Y7T%+hPG0gAnk^b@aKY*r#egi!`nCqI3E+lhT8o z*BCP24-*q9PD=9mO$)>?wp;2Cp2H3+6E^crhTmjR)H-;ogWQp7kWV9i4Wg-lI7pEr z4f|G_W<5e0-A%x{`)*v6u6$<@B5HJvA?EBbR(54c>wKO5{(}I{6A3U+Tq;i-qwpoYu zHf<%~%8eR^*WGJz^ct@FB2{_>6MT&vT7^Do-h>SdI&WiKc`S-)wzoukZ0Nb4<3@ zE+^%s>2#zfX_#YY+!X`o{gQ^sm9U7RV{~G7*-^*C5wi<<>B?rI%Fi0HV=Lqy_zYQ^ z)>ubLs=q)_Q0aM@mXvGmZ)c0h>RN9}b66QqJhcRGC@*1`v}*ZaVgKgK1yH~B7F$|k zmO8)AD4g9F3R6CVIX(4sc&$fRnn_%d#IKc*r>+tk47aTYjxC7IdTb4l$fA~GM9zWu z^1LI|j;Iazwepe$?=7m5$?Gek2C4TNibk?enz(j}Sdv^xKIyZ$>T4)q;_QMBys6r? zfVKNHOb&p{+NQDbC5{JMseKD1{gSV#oA>2tj1PFR_R5u8HWcl}pG_0z%FPOJxnu&r zcE3UBEcrAodN&bfCc@+H6*m!maDupwl0m3)cn)#Z-2$7g1ouzDZ@uC8=9#b97atw` zX!;|nuk*VDE7TBuA)2W4$uIQvOJv`EuT^r(S`ZvBGIMLI0^WpvyM(vb3H^3$KRqmT z+Wx@P6ec}SM4V`KE5p@tNS$kNg08r*<`SHDAscraFF3o4HFn`Al}|tPP%p2bZ{h`4 zMbKxNEK?ztQ@9X1=0Zqj@0(V-?qk&wOaf)r6%ZR}KD1}MMd1GSx-Cth9yhMQZ4=)7 z4Wst??PiE+yA$5+PUh#MoUr^hw{Rx;+tTKXZKzM{KNZ^?{)F!hpBcWZ&FZZs)5sXh zI~kQDeEQHxRL(u^>Qg=DfVR5CrsC&_TCgggM+pvuPjCZh0x(!>$#BO8TWWp1r>#aj zD#@!**J}@vP(3}w8c%Y%)D8;m~`ONc^TsnnJA%fF?^(%q{8GZ$X>p9}CjaLHpFeKZzPuJ%xSZNa1Ok!Dm@q|d@ZmvM#&>f5+PZTe(NW!jFAh~^R#07y5{gh?=7Wo zn5pQmFI0LmVVm5iNzKu{t3yu3F)fM5e)KX(DGbFbCN{VP2FG^+Ev_YP>LwgdI*w|2 zZyMyC5x6EdCE^sQcXk53CL=JjdX8Mz?u|va?MeCQJ%VHyD==?lulFYG|r?S+u z)1P#DzNUP* zb4B#vXoGM;x;H{`4bN{*v^_;Bu=BdLFvc$$&)N2q`f^^{n zm^`-A8|go?p9d%<23!fxTdFr2&dWcH<)pwL5&|ElcbSorkeFlA_oaIE$qgrFRnl|T z-odOJFC+SUY{sV>6%;Wqu*=?kP6HYdrOxMGzZOX^E_(Rv=M(zQ#ogDYGT&ASTzZ`O z@S#T7&xYq6*u$(OE;!o%!rhWE);!&qSmrTT=0CeJ{eyJ#=338~VoTucSmI_Q_1j!+ z>ZVB?sr}5CkY`1?{(c{5QmN66$`PYfd*!F2cT-8va=F%(pN|UKD=U0AjMJTFZ0#3r zhp{J`#L_lCN)exr8np!|H07oH$~4)wZ~4WwyxdTY6U*q)k>r4qw3gDPuun%r+qT@} z#@e^+ekw&54Y)wX34 zht*x;xA$%{)mlojuL>I_wgpHwm9zyYHhs3I$u_}WO3AL8OIcDS>AqfZU1?>3yq4!{ zD%!@#38O3a15Zc&+XJw1vKjIcySDS83zt>F??-Akv71E|dEP4FRpfoJ`V@iYF5Cyx^0c_bLh13dZI84zXl$?oc80~M z!qtq7P1scDM-xi1Tc&YIx&?!vCNR1dO2jPr*jMEbZjxM}LqFC+rF_!LJnSjd;RkJ7 z^KDH}^~Sq6_fq|8E*yw`x7^SX>JBM>CYyaC$>FAwsdIb?^7tO@GmPWXDXosGxu1SG z2IAz#DPv2*1C+OYj|AE6In?6tVL>;%(kzVmCTvMzLEgMOZTo_}bL>dxzYre1;N!fY zh7#9(7fF(F<~(`AcL!?iz!Ap z9jq>98O(=rouMR#d1rdO2-@y2Iv;A2K{qCe%d)ru>aJJL^qE^jDb7#=`Ghll%$Alj zy^WB8v!>l?j;vxKsYA#sCLD3W_^eY%jGAg^6dom*;-IA(AEsAwG^I%+voE~$rcehU zm*ki%7Aic1^fKY-4aS$BLOQFd_DA7m1XEmoPkuv4%2{*#G)G*qkmn&J#)P9h7@u(p zX{M&y9)%YbOtIHejSg!qJeu-Z!>%{H^*7|phmg)D9F$;u@hN1WnrdGZURf~3SxYr3 zthM-PN{faaP{vlM5cDp|(OWF^Tk>;is$Eg|p9E7JwQ8P*wH6(<`lK=58!m1m?iS|jK_ zs398h5X+GW3goR1bsodT??=n-9Ta8|523?rBAim8)5md+`vr9N4)*P8jnFt2qLC>a zrpFVer~7k?)Xyo2=WDtj;{7;~)`A?9nnJ-lXmOc;g|I_~Z6jiLIBxE6#36;4@2Uw& zdF$iA(`DxwwDYvud7eIw7UXDNoP3{GRkpbr3J(SLZ=`eikX5F7DwI9^i=cz6grX~+ z5Qb6Q6VhUI^MrI6_jp1n^wo3K$$S`9^@!FlLY`8{e*)WumZgwN7**Yf)^?#Ze{HFL z>lYJ8I5BH6cIBk=?Mf(xRv~67Bt51E9U=ZZaDN=$GxSl0zjk4(5P(%9^z1m=lu&WA z5?@p!#M}P4v*;@Q)X~k2RYVOhLc0k%tqz$;qMr-mvbn0kF8M2}gBPLN1o;sleMwxb zAUfwzb(E{Y6g6NIav?xjH*xYBnVwCM$5Xx7IK-$xO1?CG<5|@!5qJkyj;y$Ck<9(p z*T;`=o?DCIE+>6xRYKjLm5=~>ru+?+YR};ihu>KTGXDUj1;qRkTQZ)YwEVRCE z2l^sBO5x)?r--`mt<5|W!`&+(@!t~>hO$c23p18il``WZIPLntGyC${{gYeP5PnL-!|t;tuIrIo~3oq zZl)2pF7|^Jy{mHZR(ojls&XA7V6E+BBS{WDzO%Tegrf1GxI@+I)yF7%>?nJ1=o@iQAQel0X$cMd=YT-jn^MCt&Rz&yt-JUr=xm#haqE! z0ds}pqCE1SpUyk&|J!HXEsowSq2LjrLLlt3rd@WpcvM8|m7`W7!YSDoYp&|38b#vY zKF8;tKw8Tq&8~0&hVmT-Pif6e#t|XU6bIZb4uw}d$x4#xmak}L0ysNdFEXOl=xE9b;gr0Ks#kPs^dj+D&+#cIkXPi9 z=dN&M-Qu{PC1fxn#Jt16owdy%4#rMY*edg)s@V=h`3{4ZJTmZ24?ElLH%;`@fCcI$m0@KN5jni`i995(1mgommOw|8Nv1*m?1@68nHR0oJ9zSFS0bT+19MYAgbL zM~bwx#0G9D;XFfw#)dy@O)M?>EE9F9UivyLN9u`N3k$1Smt{;90(O0G^cQK8|7mjN zIklpXb#E2Pcumf_SsAC`7up^=)^s6oIW3re+{Kzt-3Xr5hmAz49czetIf!725&ce(U}s7Ylm0C&ImR{u+{(1E1GG15gLl9 zFmLUjXJXFxN<1pG=CCITvC{FO#~~@?0gUQ}2)!>t20I{_-fe_`!|^`{vCmt38S>WF zhb}O-dP3(I-*`gmjPss2G$XqL^tW^54Dv9q^yr`rkPFfA#A3VpLN!k4Yi#p;G?h>I3&aSPChKsnOW> z^nU}U{qvFh-=5X~gZN$e;tL30*n=Y%;ZX)3Cq@w!=dCR@6Jysa@#heg;v^e`{xJeJ z)_@;d(NBsb9Gw_P>p6;JguMme|M=g%(RRUhr~ z?RDd=KFAm|x45_uEbh5Bwcrfz6RJM2t7;oe1mNyH6^9S@@5-n_iub-@)4O#sX_~&9 zN4T1{@8MCf#G8h2T8dS(=q6@3bu-RO>)@zR*2(TSZ0rutp-qHm{ry1bl{?)uqxijX zy>`cZTP3+b$mAib*Llq9b6Iu%>H{t>)TDb3Z1YvI^JV3r{P!AN#_2s|l}<-h3=S?M z(%AVX>*A)%TnX;DMd=i8vwPbDMF{mjd1n3+H$7pBK^Xj`(BUrKa|85ex%N!Crzx1# zD-*Rj8f}vjfRQ7QjA&}*zanT>3B@lgE(UAmf0KwU7mD}v^{qN;JpWNEpZfXpl_;AF z1Wlq){P5zUgjW7AUxZ+^O@XO-VxXq?UF*}I!>2!e{_G)EZ=?54Xi7}1>solzY|OM+ z{Y5b`exi_KokVP*P`saSnVqJ0t3<53W|W0_RqWB&XDW)tcjL735Aj9Z)yjYI`Sayy zn+VnIzNg}JC%_>3Z|K&;-S^j$!N*6HI!9XscyqdmYj7OngqF|n3>A;mTc`5;a2 z#*q>ED4R-a{X!wd+rAsXvbS3bLjkTTaJY{^--zHY+`I*M0(q(3>bBb_Tl_ad7*BbjPLnk4O0mky7Si;KT8%7x>7 zrr-HjY+m;n8E6_=TASg+t%STD>Yb#XtfghYjP6pmI25Sf-!`z1JE^T?`b=>@6{Kw? zPda9g8J6frU7FvFmNkl=Gd4N2icovmw4r#!!d&4E+~+Oo?9<0ttl}SZJ4mvBHDz2RFBkJNqRHD&H@TYr`MUm|$wae-Je9H4*hZ-oc9)UPPjrF<&{_feJ zmlgRd`KC>yU{ty&CQn{YUsjIv)z&ZEMbE2Meu57(A{ejMi@X+ktO|$+r zCm*8!`uf}~wWeWlbYk;`#fkeQ1?t>Ma!32op+mU z{$k2o-JH=n@7zY(RixKjnT&!~7z>~Zm0bbtIwh6M{tt-orcqZ(RC}4^#artxR_B7K@jAe2NG|_jPluw zX8791`K0?EYpS(Bf?2v9C){=>c64G2;jKHd_HqLq$3{4Exy@8_X-JA+bLq=IkQ}(K zf}2o|rNNi&X-TENtO|P6Ai3)g{X^{J@^2t`q=%wZzEQ4wlkSjx`a*hzY!3=F zdU)G*yOuC&8=RQk$Jj5yWXTlppC0n~rX_}2_VUzIFx34wuf1 zmqP1*(lB%l)$}GMf1C1^X0n_l*z_p!?U&dwJ>%8=4N8Ovx}!(;uLy2tyPbJyMc5eX z>haCl#XUNun-96$s{Hh7SLQpK&22IvRfcRQ3iF<{UfR(@<8QqlMK>L8D!Tm_Jw(v0 zJ-Qkvc61WKP5iB2+?%DaM%HZ!xXiyoP~V2e%?H~ffSdX|Vd>w{^7zG zQLJpibz|-ZVt3F%Y~Y(kIby&>XQ!BG5G@q8o5#-cpFq^KWmwGky*2><;{exyy51%w zj`7bDHyO4#%3zYLxb*w3UG+CkoJ?c!P}a!*Czy z#%orhg7@yQsr|~9`aMou6!d&dR2&U(Kn=k5Y(IChIcG0WV^TVWWiPW{9`f}~^1@76 zI;j(iT)K5#btUllUaG}G^t(B^d_%tKSM(nyA#=X%>a81II5b~%COv);L4GAZ@tP#8*vF_?%Lyp&_0Sz zV`4X?yK08Z%NG)%2j=Z?zS1OltbVP!@NB%Ifv<~0o|^AD5DL3Cl`rHN79>d z#$DvxU5=8PjBU-dRZUGqhWP!aa&DzySdTg_XP~0U3areE7dMc#%5&K^*BhPtp zssSrIFx;==#r0*a(p+jZ`tf1qNWs4%>^d=jE1BNfIUqyfK>Coy$C>@#q~Z?LdBw;1 z(p&pEi01!`3K#e#^kl7)U24$%_&eoDdN0o1lPG3j^9~~AlI&j!U_U@gOTXf>LQ9WY zU6(4imc}_R5PA+4S}A?cn>vE%`p#D{a<10@LpkF6!MT{3E$#t9R!Vd}RMkP@W2~a{ z?tUrA^tA9c$#4x<;_h|=af@V=sd&>>q^}ZeTkHTaP35id0o#@KtB;C?18T4RxzI-h zrbjP1&k?|4@)6MTv^8aRLd5{g4KiZWIapnEo1`5!Gdb~z?!#;H7bz@*Qy-%+)1b*a zYZTsdud})pO}3k zl3~wBL1Ru*+Uk?9sDA!>3ibgX*k6|Sb`J&HNE0**!bE@sF7?w@wZ%c|w&mW4AA-h$ zq_j6-A~0)TgNEVSfj70h^}7{N=Ynl|PFhwdhA;3HuY@(0&6+{Jkl>N}{JA%;2i!jl zs@Tu%V;p$PliZZVDf4Y$rjKZSu-be6;R>D53|WF{@FHIzvPdqkm|bA@wuxK6Z&8Bc zO~jVC7_JJ+paL-5${c##lD$IDOkwR?Se@0GjbYexv9LlX1xP;EoPCZ#+^iJZ-JJazSRud*NUvsmuYSX8&>PZw3vXNOECgP!eUnRWA z(P>1`xPqXW8rE2`ItK=En~mY&_U|Du*{m@d_(0J3`nFy-oL2bjdv9q2%{9xm2jcrI zfLgb^2mZ~``g!$yow=Bhr4junDyL>@1ix;|n6q9C3A)xxRnIwG{cJHQYISSh)b2f6 zxm2GY6FMCtXak3lm+4CKa>kHJ)@FO#&pPS{t0VK1q7DXvkKnt01?cBmhX_uLWuv{# zobR~^T0e_wZ-4fs;AR;eW;|el3r*HI$S7r{2ncU>h(MU`hHgKCF78C;>D$4UH*Q9=R{xf#m+ogla<*gBeR*W1)1 z;1E~9BQUL&E=A#uA8VwO@;}hjO7d7I2kVp45{8ZE_L(%v{~Wn@H&)WYDpb=i@MdVY zOY@yej<>*%pdhcPkky%&>PiS6EvE3KRM*$u`pkEe?U zC=u*#?>$YjIsd@I@a|^8-6S>xk!Jc%#bS>$eUEy@Y{^(bcmu8IuiZ9P5UI}lrMZO> zc!euO{vs^GfJnHuwSn$f&9>tSB0EV=-f_UCm|u{5QR|9msv@qz8`XOL+PjzaBLL$> z3**n^>CKy;)xF%XGUdY$d!bX6k8h6qzADOOtc#?KG@ASC&s~ucyyjBo?eUcskUWt6 zeG03R75HJ_%>G*m&y8`-z?k{X(PTw}T`zs{5U?dCB3{jTi>k!LUgUGe@yOds-zsmR zDgP^5>>e7lhcnGKfj-lpjVGfenOX+`x$nT2-V*D>Ym>gD)+POe$?Zm83gZjDXhhMz z7OJ->UNp(_ae5}{zwcXX-V6INU-eDFSf-1Wegj{YH~p2v%S7Kj&S(eIyoQed?dH&>WR3o76-_w7ClnFR{|u6`4e!~?fN_?g+A*x z-;TJjoR3By*Jh1mLd*|XbmuuO8hy+*ztH~vNQBje*DO0-2<$V10ulM-xV^yU+3fEX z%Hb>*)ql7?U|Y0I_=cLl_G6yE!vta|0STf6*82y}hP(+WLcFEK=*W7-jmS z#?2S{9LGg*XK<&E8p?;WbV&@Ua9jo7qZs15%Px)k`>K6@LYp2@!=)WY{L)O%+$907 zk6G>7Q@&4tR}E?(Bwm}PQkq22-Qs8iZCs`mWTFlAiQ`V9aE{ypqA2t?ahw?_OQ7}?>^6$Q>1BMhx28XwhJ;@`}+f6YxKh@fjxjfuspw=Oo({Yo-70oInx zl0H$Ur^}0UpU8Tr9J=vEQ?eC-3MUXwWmdT4F5S!d&{?W&$S!ek*jj{Ey!&n-YRpx1 zo9LvTBQYIQ+v?nx{7Kiuf9s4ilir-!Rd7x2C76)OR-7`vy}b!ZSB&TZ>86VF%#g;- z!_D{vC1+=Up#ref=X`wmx|Xlrv=+sD_lOjFW}h32ZB6U`)8U;S+WBE3%>I600j^OK zH0E9b+Nq!JdV@UIH=~{dA1T=;Ja9T(zROOCX*kgw^2L>Y(KtvkH;2F(hDXLKW_f!c zfZWtuKM5L}x=4jzIIAm$JA-bu6MCY(1J&A+E_Jtq@2Axb^~2tApP;b{$);7~V9!o> z_c7dMM@*q2LE~?3sNnCvwnfs%+yF%1X zI|4Ltp}>B2zw#Zl2xh30>S3?VeNzORsv&JV=WP{m&QjsVUE3-Q@`TvP%#JWT1=HS& z^BVatY8BRtn+tWqSeV%M@MV%IE&hujEswZ)7Tmpj&_;8>r7atYox&c7B9>2nJb36Veb0m$_MoYRPtMW|i4DEa8N%KJa`W8c@yEcf)7JMEVe&CTV`3)XB ziGAy^k}dZ7brU?fXt6b;m2GaeQb071qH=ALv5i!C@!Lb#KaR*FaICU$(pHSR*lCe- zttP>cc723sO1pkA*@cr(0p~SGHb2t@Q5q~n z4R#m^9b#OAX{HnKrFsh%%Zn-|O+RVf&GU8dY%w=|x0P;G0NGz)34$MATC-3zQi#UV zw9K#33X;awEXo%q>jKrHSMvrIdzTaPsQs?qE|kp~Vq(>Z5$s1%v_` zKWY(XiRU?tOy^HLNhM}==}MywzJI}Pv^~kDIrL;_N5PAoN5yi3jx*^g=3vUzJd%E?h5l4dQ+^W)a6oP9T5^f(G#8!Il#wNK!< z>~rLBaty?R)ww!nKjnA(fJ}9srQjBy(=SaEU5B0SIltlaPU~l@XoHVU-&cAgOHI^g zd8*tPKX2G;;wH|<9>uz}uupF9Ty=#TGojYsjZUZLxEzb^pvd#POKu3!nxm!6MuR6z z?@k4(<7xtv`bq}r*V&aD=nUfKFPE|;y&rR9tip*6iGI7AH|V&IdVTFSM6U6fS2}}N zpXW}jzI;0>>6c8Y@Ngc6R4e9SzAy%jcsIHd;ToYB=F65RwC_%I{$jD3yxi9Y%Us89 zB+=ZR{5ET*gim1SDyZ*174#8ri?5ZW%fn7sX|#E%FBV?HHqtR?WvXP+0?$913pi=+ z=k<}ewQ;=)D{8iEx$LUNwy8uT@~y99FO?ZXtd^aK=i4y_aHhZi$7gzQ*l*4H!vYBYefhU> zl3x+3c`koY{!N_ZF9DQdWbLi899H&BdTuYA9-n|d*BC68tiq#X2t_~{H$nCMC~npo5A-E>4l zm;Dwut!SO!u~y0Lag2Mk%%qJ?D2`}cX2+H2l62zkU2n1k*+KZZbKf`V2gz{0-zaerm4%p?CtW2jCxNB0ziX5=jvJPiv ztB576!&|c0rvp{q*DLFWafva)Bt0YE>(htyLca-~g(={2kP*KT zd|2RE^zS5Rfo=OYcwT{He+hu0w;Cn~%dBgfO1sGdHxSb}$Z7%c4F_FXKzv6aoE8vU zI4E}=s>ea@Lx@C0R3ucj4!y@gnhOYR82U)teHHJ@PF=6aJD>GJ%-ZgH;S)LpTfi~M~$X2j9S)D`xIie0n(QNUpJ%)5Z zn&KwQS)U-{GlqwXtnA*xxH&m=M*^ftLO2=Gd9EJ*Wp)c}n(Usu0<$_$yE{ZqfUHP} z@Y!7t6-z0$N{3Ow743L;5{BF7B{J}jU*mwX zY8bWLZ?8&r=cLR^rOcHg1AXFJeZJwV6fxt9E~?}Nl(xbjzG~7GF)^Os@R~kx;uEFF z2%t-wJ0}Y0k~WOeE^o(ck^nv(i75kBQy;o*(r*b6pRCpQ3eIHE222_+LuMcu>*#;C!nPGWK$+|IO7QMtv~{Xgs}ti z?(#(}GcaY$YR$I4wC)d-#~c*MrZ;zpY^({B}{fR~WT5+)bbZq58*PSV8#69J;&{>wmKy>)(#}YgR(VrO+i1 zT{qo{uJ7mCj;^a!RYM>&e{m%wd9 zv{~s}ass-T{oVK~b57JhqEJAM<2zZ6?%5|GxZpMKKO+JX7*1;!+9M&W2isYU3bD9c z@SQR1y+qb}TS%E9Eftw9rx^Aq>={&G9-L$l=L6G|nZ1jN-J8AIhr1d8UhD!B;%I#_ zoU;m=G0z=C-$5KaIfv-j?A!}ZtX7bPHq;LA+PF8Da07XUpXkoB(C_-V>kSKi%yqib z`-qzBbRSsg&wt!C&qCjQolb~_zEqWN7jQR@Wgzc@4fgq5z>2x;p?4~`t2mRY1Pc6m>M5MR*qMc2Bsy%Z_m#l4dBvC{s;l{CXET-> zaRyC0{ONrtyk>2oY!N3uNZGtQwtaR_qpXYNQf}B!v>3?-Z8;M+D&Sv;RHM7OcdE!} z)}Sqm5VL@)<)mk;Yf7^(;Br4c1N!x|p(;N%;0M^1-ew|P5%<&SSG3)?=7-_zCp0MOaCq4EpNa0{r={VoNm5mpBZ2 zudaV zZ*@%svUR!sBne)cQ-RCdur1r9?_1smo%U|rXnS@)32OnFP~4w{5(C@8TuiJA7poyV z@hs0>p<|}psS7311_LJ4sYl@PWK!O4w{$}y4O`^V<*1m1Wt@iR(h<9<8;x4+35!6v8LhM9@Y-`>!C`z4s?8>R&mE?f}b+ zBS_>Uw7WnKpAK)v#O?#K2G<}mxIa(?Y1$%8S6*J1IT5hoc~GF4{(Kyy%w{Q#2tR|1 z*p(@VGR9ry6!=7c8v{i!YDgh!s-WT=B>M=04gYL@k?h3b?8!+0p&L#5~(>(5A!S-XS?tkxyl< zI2^W{!7%x9q6`6D&3Ul$4&WbwMHY}?ICq2gI21D_t~F8`!+qWt0!=b($@VqXBM2!B z_lKPZs8_yzMs=^4;zT(T;x0C%y zG9Ija(jdw{)lvnksjEv@Dw~uIKfHs;H{vgHBT>kqy`k5r3f~Ex(AjKKL6N%CdSF2_ z$Z2L6>)evUe%QwsRC3Y7kUrD4Mo$SuPGZFkTJR~99`{V}pZBaJKJ`iP_jgcD1J!?d zNuqEP^#|o2Zkqf<_5aev6Fu;U(IV^XorFhM6j6r%T>0KXcyw72r33!wSY-X@E1Fzn z@v8P;qb+tFm_I0gyG^3{f71VdMW_GIc;14);&c);bG)^$7l8~gU&Pg!m=6vlAwxyf ze@m3f<@unVILPY#=8XdQ3b-=y@P0#2%~-cX$60w0r>sm>n5O1SzqGXETn!>RVQ{5` z)chXrdJ7e{L9LIo(XZv{>RZ)|vNvxqGP;fYvi5m(6NK$sM;CZUgL=k%crUS7g8pvO zGZ$GDkM5U>LHZY4tok-P;4(o!((2V9&8!$g2_QPbAg}sOPhP9PU(Cc~*LOc9as@Tt zxVi0#;38Btld6y$9rw-s1G`0NC#3nARM5m^>4yC}0IvMmk<#7^vMPzO9Fk^Yf3|zg zwjZR2vq!Fdj%5%(#n%ZwXNS4&$+c17myX61DqvX;6b-F4mHRZ(opTDjQ>P~S7|`!Z*y%MzEx(*l@0R4Vtn{57jHK0Ai{gF%5gWt{ zkUSpH_!@5AmU=gW!ENU%>)UaC%k+_I;l%c7kRL z$OCsv9QPAiI-3TF4ihwW)lQQew=a<{(erY?*BDjw)Gn6|FPe;5b`o+NkJ&FBuH>VA zUeH*WplPI@AH^GCPOw?wi=b1_&$9k`o$!q-d8v!@7FH$4hYeg)sx8Bw-htMK81~%f z6F6iJNq&QB-L%2wL@UNU_qhakydlYVkasttW{AP9mX)1F41H!A^1h7-F!?Aq4+83y zK{upBIizkgR*9fZ(YW1n)zJ*|7k2H5+okb%k0lS|c$u|6l3fPLzJH&914s4#eboRy zxUX6o$$ns;fD1=8fMt<-=Q$}TaKG>iUNZvqzu3DHc&PLDZ`_lRRLZ^4v~6;fD}+vD zq`Oo?Y~@zD&k3ciu&7KFDn~^~h03k6N|7reMv5qm+{c*z^Bt69sQvB!e!KtOZ?EZV z=6ip>$LI4r@8|uzpR=bA=R|7pZdeZxQY6-rM~5hDx@PY>UYrg99JDp=PP2$b_R10k z(={K@uouC`Th|eAI@fEA--asUn%Mus8t?c;UZHP}o5SuJd`QbgL8Z$Yw#qTf?6Zu(cK}hNDHoVmMkc z>=lk?3VUsYD+YV*z~Ms_uC#%n3MUB%rET`c`}zJ~O5gS9znNJCvyK)r$y(=#e{do+ z=n#ptHfmLLFu+lNaTbn%O_LijA%M{h|2RThI}$DeY()EGxn>)j9&5A{0Zw6-JaS`5rI63ljrp`31S0u7F+$CgfSvb@OC*BZf87$I)&0*VU z*d<(-5?B{jgeI+q>jH;_9Ug|W`MTN3;jQ6M5p12?pmP|uHiEsvbxDN1!bw%YUg6Ld zu-8W9NeH~&Llg(D&@aFhy87KecPLz;`>B-Cy7T~hd5Vwm7-}k=SWIU#Zko?mYAPMh z>fuo7pMsw?(>F4wB2a#{D}5her}+%WrDoD4q5gjjSb-(5Ss8b{QU+Vf}%mGqD5rDY85 zMVNvJhU8exU=b!~HD=!~Ch@gwr&w7|JzNmc7t5Q(V85B6wu~XW2vZ%wptXyMc!lW^ z6O*kj+E49egS$ID+4d4L5Yev2}011Q^oMGtoS z-|L4KFu%dZpM6s(2%HXYc(k`Ge#QjiFrGDRr}$Swm^S(p!`odi*THkK z6*LUMWrE%?(jLDM9+?}UYtTac&lfe{Gfq03bIM@I=c5#*{S*(iovo>bZ!qn1Lf-(S zneE{uX^ZXYr_J?uJ2REGaF;&U!;-6I zOgmR5JK9vfc+Dp||EEJAV~np;EpOD^1~4P*t{diVNRXPxl~M@^{zhZ(s~Qvt-@a9U zQDYh9K^ZVsujgKSu;`(x&yn}rnJTsdnsAN7PFeQyU)tJM8r9JD? zpc~P^KkCALe@^JH~$%v!?eO<~-KL~Nw2gx0h0CZ z?=ySv==Xulc->v749V;`{Qi2fbB)GbyITh_1%w0yO>F@*)jR5FXx<9w!WMKn5e|Q- zN3fjMLO$0UNR|gMf0@cOEc((?7T9Ua5tdrE3|QORZZCmU0XSP*=C|E`hhnzG8fY*9 zutZ+_-lB;|01^f4^C0A#*%+$!_7qnpi%Oy1+qUxz_l=8RY!NEb)`8CW#KN16#F;Wa zcLgC=fGY#?fM%}pk#&4@r7;RYdvmtFcb%!Y->yyZ>a7a`zuwud2Eeje`}K0FJM^qO zGZg@HNG+5xwHp%r$t<@m)DDhrV<|KO_DJV#XL|I!wW2((q|NUZG_e;a_Ifm503emq z#>)#Dh1M2ocZEA_XOwloE4kNfkuuYH0l3zkMxkY^6wdlCkPWnJbJFl?)9CB)16;cF zcUwgG$?vF($W^ak-9<4$@M#8X>!E7%kdi;2po+m4h>teJ}ORkyl8X zSgcgeI#(WhNDNS}1#;r*SO;buYL%*%|mI1n#vKc3OUSL2%h15sp`it=LW*~?*7cy`POOm zvAjq^BMY48TaD$1Ae9WzBkK)GI6o~JF+GpGb TK(sv~ZvIlH>5XRjZ!$|P#GeWA zmT8tBH!gk?5C;i%F=BdfIetUZlf1<)g0ZmF4=ciw+?#@jTsAk%X^&^68wsb416kN* zF`g=fm(0>mgw4F71xR#pi*S}1V}%N#gxLmUM^<}1-=z)@j_IWhx>#PK#jyzVpZEJWvEj%kSW zZ=uq~vPQ6%NC?td=V;Obd;zoopBOE`H+F-=a!APTF2wL?q0kUoC{%|Q3dJEo&^18{ zSIT6sZX`tTXK}WliV{xa zMo4^1!;NG8D-OKCU|2Jq@0{xSVZkiXC3d#ktDHmvLxPQZxQj?yeH}eNEa@V-X|gi2 zZ;TR7ry7~RQh%@IWL-l|#2p$M@?m29EhJG2;kWL<;N?bhXVn*S%^J@-Ov25K1p+L@ zyo&UA?bkoc2b#-5-0B{HXLU83+hpqeXKeO|7sJad$a|F<)H_B$1PAQ*lR1ODwC6@21slwaHQ(vlM0&4WzU@W9(&hm|0od_d0upEbN|Z zQ&G-~LL=cz8-a?&%i3k{Zr@`Ixg$U96S#xCq7j>nm@F@f?DtnT!N^!M0KiNhjDY7~d zD{weVUpPN7$$WkmAFO59P28@L)GbHav>{2{aq47E9y4)mMk(R^Fohu!h%j(EI*MHcuLW8V?`INShp*6O zYJIP{ljFZ*21A~kh!1SP*rX-#zz(~GqpEV>gAS0|u` zpxGYvK5sczj&!=q(%aWcX(aW-j!2q2-Rjw(nUe~_O2!-8U&rgm1J z$a0w__Fiib+=5AQxqw=yA^aVL{bs@{olm$=Sleh?Tzy0O_3qFUZmCWaSo#jG#B0*8 zogVdUk6{NZ@*-WH*$2%N3JP0Rr2`JvdP^J4qj~G(0=pdWL)ZIzc%oKhE?XsVpK4%5 zEqh{_pi|qG?M2C?3{cxnQTVzq&9CYTh%1+=6A3-8wz{=RbLU6moQ{tnD(Vf8E}Z>~ zY2Pg`AGN?TCGg~q7>ViK0&JbcA&_q-6y&Lqq9;<0vsS$pPeNZJFA zbQWYtKJH30E*XMlDuIvXG6Sv)9>3m*yh%4)W>}2wX$GW=ZEj>+9Q{0nOBnJ^Kg!o_ zwic>>jGWR9W=&WO2a;9aE@>g&VNV8s$TQ3Ul8zoguIDW;$S^A3$o<;^!fRL z4^|k}KAbaGI&e`u1Z;+2Lr7{pk`5Xl4ojNLH7}I9+3#!9KkY0F@5!N@r+eApjd^|4 zBbMB!S>6CmR1M^T+#0T~@@##9eD=)_xHzYer2W1o%-5B9D48;W?O_Ar+#>cDPMu4P71(LPMV-xjO_%^c1*N72K zLnZ4RlDUWki=y~$(iBkq)u9CJXNCa8`q=%PB`bXUsD8>Z$nn0|PJ5@cw2op1A4sXU zShJr|Mif|(WqOC#@u|BruW5&@LY3?z_L$B{3+;@42%doY`@a} zPRL|{jy%F@O{~?LnCU)r)EdS?;^_~t(|jhXHH?GA-{L+Q9@~*JrFO#v9f7Yf`0xVR zW>8_6@-rn2KC*zW76asMfT#NZ30eM&f!x0YJ^s@V`aefm|0ZDXfA2K@5F+`fL!95< zVTclr1}LQTEw-$N!C)T9X8Ehrw6rKk=4H`-5{5hEuVPqNIY=zwEFcWfN#YT>d<_!@5|!>|j3=N(WB0|UWg8@RS0>|~2SEs}3B>#)Z1sw7}_1Qf{I<4-Thw}7KA z4}9$>7zA1o$#Qv-4sa|O_2dIztLCkf54_igFfYurWtv(vcNWSlu@#S1#61m&)5(h* z5PJ>zNMK6y=S6-9C{S$IEcXNKARz9B@zzJ0@M_*rAw(R)1eP-r z_p=<%28)^91<<`^>ZQP7yvLAy2$NIJm>R}%_!h7|2UjIhoYRB0TC^rOkEV#^qgBWCLS zVKWt>NvSb{;d(tj#O_}0&!@pMWr(@YPz+Y^qZ789-oRhS=PX4%VcV^KHQxH6Nt5Z( z`=eT5x?()nC^f$Fy{RY}O6#n=D+45~8i<|!mzyDfHw&Kih&xUXmjW_(1mL?Kc1Of_ z)C6nSfUpVryY8`SsH}E~Zx|^U~MEQW^I~I3Mb)AfE3L{AoB)jd5|;Hd1Mf=+1CPsj$;k zpnKwV!;u{nL$=-KKrT+!shGF&Apb!jnP5sYmF=5rg&ul(IPBwBnXBB@=6AW@A#W4q zVSOs8Rw$TN_wjA`{jLA|DG!Yk`rq9C>N6$ZT~T{AOAY(>ZOxjM9He3fBPxT|ZOmKH5tLFtVtwTjD5o)0si|+aAc%94 z`axQlRr9GSA=QJBSi!KGKdXm()?9(CNKD`m#^N^U=e=+}vu0k_Tme!3UR}@B)RfIH z7%M^;9?RD;1j%r0w&3MLm~8mlI*Nw=TJs_*aDpVF31)`n8a3p%sD!*app`$#6K-G) z&v4EvXSR9FdfjvlFP{KJ6n&-S)pdA7A}bgGd%JLkb90He5u6r4h-WNwV0rF=Q0eQY zIf)gxnP#A;myGqAcG20>w5_fUZ*{!_CkwlsTjd=cjfHtdrLsobEqFr!>^lvNDggCv z>l}vN{*;W>(c#_jSpLclV8GE*k{38 z32i!Kx#lFff0x@*DGhuxMKLD*)m)d8>#BeI5WDrj%|Zh|;@!UbOG?R? z53xJ6)ZDJ+Ii;LhpT#hXe%UgLlcE74{5>~*?4`eW%X9uzT7D_Iv zZ*Qsv7%0X?zNJ^BB$JxaWx$ZRkPUg#Nl#&YN#u7CUko>Kpv)ea=!IZa~ zT5BK@r}9F|z}c?r3}h_TtMLzt4Ek&&B;IY>^>F{vvLP2SR?0`}#|quT4(^^>O5vTn zOqXaW^}RZZ1x%`k%{R^iBA>mo`aXdOuZuSGF|L+`odeU7+LB;@2&kUY_b^K4Ih&Zg?_tailO%&0w_O|ZR z2^2J-o{h*WLh|qXBqj9-F2hpH8aslfP}WT*MLWW7p{!V@>2?0VKVNfgy3bchL|~)Q z@z2FZB@yXsCElf%24GQlKAUO(?U2)o$&X{UtxYeL;~-T%gaDXrHwfQ~w;;%lefKG* zYX>ovbJFio ztFQ+rg8cbcprcfy*`UKFrq^wI6V&aj_d4k?J))hWW0`b>JYUgSh!CnWlcta!;DzUT=mTw4h5c=&F6_&zQG?X7}~w2vV(xsu1Eas3_j> z(u+TWpP>S11IkcyFqIZo8c5~5WKLCBW8dRQ6*+OMe~TxARf`e2XLY9g4NLg zgX5RTAV7#nY;cf$*nx@yFz}M?2#zO4g1{R1C!LxHpVG76@#5f2!T2l}C})o!*7XGu zGEiv@71J~K)8f`ZbbF78>D8XOY9gjL;HMP~qQZhfHGU>WG)O!dXy0oP+eK^^a)tyx z1j+-SVExZD?0k4YM^3#WG~km%rZst&eO~7^z~pN)_x8Kx4p3Ad*0;IWb{iCm94RsY zwaVJ?^UedOyoPQ<&f)>#+jVlYmhwk810npQ{wOWB;5e!Td@forA9Odz!|v)0XWo#V(dr+B1VV#Xh>JP4GU)&{J#u zlkmUOJ;YS?v^?+sPh{A?6*~A=GC2P%46alOR1`$bKG)~AN^V~cf}it5KRgyIU&R>R zEy8zybN+V0AdEwSpL)d^4ME2|cte82jXNOq#RA+31;SD?s9~PEr!1+X3`Vy7zL01? zEsV0{b9K-H6$-|2-8IkC0bIks+%FV>4H@_a7((<4gTZY;_9a~F({pli-TmeUYzeGO-2 z3z9EdIt;ZCJDq8!`ewEDwz6CUG05<#WRT0;V9|A6^72`e^9HjUVH^Y-Id|6sTO;um z_dp_Idu|8(@DN`SEw8@)SYF})46lad80B8S^J25?kl}b8P)a_r!&Ven@zBUtYhjmg zozKQ{lGtjF92#hgu6%;vI{B@;w{fn`ERF&l@ZOMsf>$%O?DjVm-wP-x1x<+?Cp*te zf;fCm<;BE)`5j1JP>s$#h#>(EqY&5^f)jlEX1kAE;E8)WAIwiiX2V{e2S~(S#=7aq zr2-&i)Pg5w=#+t%=i0Fth{lGdhc-h1I58RP_WV~_(wprMRbVHG2$djm zFzFi$^7ptYltB;}$T2qJ+GZh%bq{aY8;PGW3zfcc<76Woi4q`j^D>q#O=pL_MhOLR zg7?hN-{Y)M#-*`5%!wO<$|1Nos@2Um3-Yg4;_zlSuk>#W4m)eIr3K$RXn7N~c4hU{|g2LZ~JHzBq(ZA48b|z!E5%X-dskPML{+ z1aH(qBWb7mkHsH&`nY8cm`f1(W}k)2-2F@mhN)n6tqpkLXLh+(`kq?gy@z@4P8`oP zGIQerVMUAIR^fD1^JIZX7Z3+pp&vEmI1PQ20~aCi>^+cj6pPjn72Z$^QCs0Bd`Cf1 z`Iz{br@6grHZh5VI`q|~1?87`?`C2Tk~HU?uj*YREO_s8{hNpmb2*Np*L(wND4Cwx%10t{h+#w-hS;RH%G9WPkW;p@E}2 z8Seris#YvH;7)0d^`>jtF8G@U8o5KPWgB0BqCZVWSGAz_(uRx1#g*+AYR!cGHwQvX zuAf+iE6fzG%GWb%QHa^nOazMDCjaK+%|d&hUu3aaruO#2R-HnJIgo1H0UP`F5YK+F zDY{OjTz?4%IY~RnT2)}fu{~Mlv77VPbFAHmLN&VjUb!HwKU38_vuZUi-cZ*1w(kg=E9i$|hZYZchu-0splL=p-`c!alv$~qn8pT4B} zwG@~DoLJs2_)|0RjKDzmOSUH zM=u1ZtqAJ|Bn7wZ1+`cb-#VykJcleP_!1fa9O_P8-8}H*@RnyT;Ju%-AdaWUA+HRH zRwHCsIu}@_=M84v%gl>>=)Sr%2lQ?GS+=CF!mWotk7al=WcHLc2nV$SO4cIN)Tg<# zqm^sDzezbLKPN-cj057w02vzw+Bc=WTypfv7s0&Y9Qj8Am#?PMbmzc{q+15yLz&Jr zpAohHaq;6XWdL{-t}GkRRT@*8fSx4%xKJ{`9q%+B`mX9)ngk%?!_v)I|DyL#Kl|)U zO4>GsEARoue>XoZyI=MWBqWPOlMfFl;|9Iv?YFGeuY1|;fDl52kth<0l|}mT4FBY} zL=}(vVtE5gHP?~lS3MCwN8F3C0={H0P!jq#5HtsV#OQvue_$zcV6N_9NG{@tkWwF1AuOGnnf;`q5emNeX*tMO%pXX@`W(eS@USUn zfaJt-McjIeL>c&od%v|#z=HKGVr=PG85%==C73^*W?sJeVFAEyU4uy4L$2U2;?^A9 zpz^Vob|lClG)8dVi!Q3XxFc5($hlFcBEWe0rvM3e$im$Efo1b5#v_pBxo_uLliP6q zo6njwttbXI2R@rmeV(YZJ2P$9Vus;tr{;+1eF|esJRKnMLSy+Z$-&EemSP4w#5VUT z-`mM34z%l`O}9)L4BUl9U;3HELqG`pX%9$%tZOR;MLbpR;T2V^bJROF-GXyHg|l0_SsF^AJZdX# zKHGbF&t;y>ZLurfI+)pQ7~FL0#i`mGWeeJs?3z<13Mo5eyKT_25*Oe_KG7Rd+E zKWEjzGPp(lCR-$g^DB?wok5h}4u5(+Qo6u1pnw(p>fv%ga2FyFlj|ysrEq&!2Hpc# z-DVu21)%t6K$QcgtHxMDi)AYkzKs6M15dv|YQq7@|JV)rA4odLKl-PAGm?<>*Jv-K zLGf$IOpF!cO|ZxRbABk;;LYSS?><-?eHmQWxBFENEDfh}fd1HsGaI-`_;%@esH%nd zn+PnlnIy*bU~>|Jf7t+8-ROwFVlU{o(Ck_Zgh8gnO_JqP@8?cp04apu{D%Ri$kNLi z68AjvHZmAUnj*WQQ3;~?{3|NAfEI#&zo0f!ucN3KUlCdoz>)}WD2+TY92)a9E(Xu2 zb%|zWrg0uZwTDr^HMJAocm3KUP0%qrr`uP*QpB+&2=!=1>MOTa!8?E3zFNT!KV!FY zE1MGTF)aDzRlHEoG664#Fx(%l0wwG?|5QwZ!Q+p8cx zH!SOha2>J?C!9WZW*{4Ae-CgMZoClmBOLG#z}yD@1{5HJV**Yl$P4&cv~e*dz~aZe zNEn)EAmKcI@C=|YC8@B!zf#kD=e1^esf=lh3dA;`%z!OkNy(pCQ%NL$%`W~{rFIt2s9#JzM z8gYL*;g%>;Lr|z&rpUk+i|KE5WcJHZ28+(Co-BNc5M(oBQ|?9RlFw^5_VqIE^VYUm z5I?5C-A=h?AJ@Sx_RK#SYz)t_?47CVqfBOT#X8jO&n#>Y9h_t0mp@j&y1dQytl*ZP zd_7-Evp(_rjWN<-BX8;|9$dBwk#SqB!zI;)%F54#&$;jIuifxTidv%P*TV5@SjH#& zOsdYa*4{I0$2|6ibQE^?Q0fuQ>ixB9y^U1B3c3<5@kH$krdmAKPKfUfRx=nZzPT${ z(pkDcCCbMQs@lyu=pX*(NSC$;<;)JMUX$8_bBBuRrTQLgYr7}+mqmdRT;Ic2vHpr} z)*i_R$e9XP-@$=;3cINX^31DZK#*-z$vVBQ9U`oA-iK5B*{E+*qQXD-#e=?G>>2H) z`zi0WK~pdGXD<Q% zi?!Yy=mH(@W>>c~eYZ!soS7ha9pC*?j4HUdxBoh`W^Ho^#hoqYGfm^#7g;J>m)eVjR?Cz!_lE2U(BwHunSbDl#+PZDCa@#`PZuieDN+VH%RAUc z6`H51Up3+^YB71`Mg(B|XQkBXWamscy4sPOi0? z@0T@Y*-akF?4MH^ch&Ll^AWJ=@F{~ggspmLma3Cv(0uhnHveE zF@7{+vhdbPT#%f#vx2qCi0zZNep4g+moAqWm|?Jbo~UJxSGBtUSGJKNqeh*OBcEUP z5l8M#Sz3xLF?BO9oj7JAESa@Fl{vU>#=aBgHbPceI*uM2 zn@HDX#$EimdB#S9XMM2JBmJ%sKi+47H1EM|3nJ4oBjIz|>dag!1wJf^v*u+V*!}t1 zApfZn{4*n#IPV>a{N-7DcC#g9Iae?U81Y)1T$VUDIU7`el#O=v_hnmEFu53seDYBs zJr{{;+XemFp`4WaZtPsS-&GaChRG^tz~-Yy8AE}$$dLi#US`!3RpbS(Tn;3T-r21= zt9oZ-oU|sX4p&VT7S0iKoT*D%qN45JxBZ3MS??{|Vq>y`wT=56zrT5m+A-bwOsbm4 zZV}EW&e)mDznkMDY3^=r0YfpU`jE(yoFiYsrEIwP(?&Ig3QiY8sZU-MGHV*vT1Tfg z0ZLrH@b*b{`|hWcx5j5LIlI)1;b4yceG1lfMoQg^UcuHZKJudK6E5VruAH@5Zj*{W z2)C)#4d!ajXSqg=Y@4$#>WHg~FfMBykvYg{2L6Ppw9vsUJdx#&5m&(pr_HQPJc?v# zk$G8bb~B$b!b+U@*(1<^N&b2Ax1LRwFV7~;93U@l8YnZt6kjAur}%&8%SUS-#YbP2)mHa|if0~~IV|T+=WgBl;pQ0?+|AfV(ys4wWW;-GNiT@b z+D>FGF=8$7){_=!$l6C_`&oL{aVG8#BaDRi`o#GcvbOGKmC}8da9sR(%JesH@wl6p zjZsuw=9YOC$x1xxUG=@Mo`_X88~F!U*<1XCi{x!qwI?T*^kwR)!?<$PX7`Y131=HS zGV0dpIbWil>);kMnsfG~jC8kdY>cQ(6xLPcyWTxUZKDNgGoMlKw()D`$ktEWwJ?pp zeGF`MmWtas;rZ0KPvJ`A$RjDHu`MQvU-B%mVSSOaQjwdFv~Zv2GaHup9Jy331rjce zd$=5lr$6;B(mweVHLt=tE?0~}j(#eeLG~*$Ct`6db7iZ|UF*o~Ux!sWai~cq>_wkX zfb@(Fxk#rO-2)v=$ZvT;hRgBCZ%A`S=guTz?-sEW!gO#+AUZ|lD?=6OC&WWJCyGUWBu zZ#5u@K7TM}>xAjHt~U=}zn_uUPv1&pj4{4Ik)x5MvX5}a1}m96H+9A8%Y14b}_EFD%dGF>u$#x_%@KWR2=JXcNQ4O1RM9-bQd&V=;&%3#n z8ZL_SEc3Wvz@!z@M|Vu<$fe&pMh&WjTjo<*_j0MKEgP$djUmZiI)2B7>qRb0Dtj<# z`M%?^Hk|RfoT=Eh+-1ECs^0#jkpb^@_zvX$Ww(5(1VpdXB))hz(rL~-d*X;RZ9tH{ z<2-|LTgcPA=?s<$%}YFZuvTuCw`ptFB!etoRv9UC#Mhne?#NW*okC&|^iHXpC3sTI zW=_q;ix~!{^N?4tzWaAIZ-J}o9i)HJwyK?G$SdSiq)N};kbQVQ(^qTd9bf;Z<4bkw z>zkyR4(3P@Igc4i7I-b6e=6&^PhTR#yqtx*XKEWRk?;yOS{X+e8P5mSApJ`>XCe>c z|1$1160q?2!7X#o3$;%hk4<_1z!>W}(Md78!_b6x^sHK3H|NsH6*lu;WILrY@YT)U zcXFxC-1uy}RE$EMTPll4mJgZ7b!K>0jE#UQ^qMaKYbo${Fwm?`xX{5pMfY-a-uSl< zgUbG@Hd9#g6)Dl;ZOL`Hx4sgO>rWH zo|E07aYMscZ|F{kxu}+!|2T{DPR>-l%c!ehZQt)cbe>^u-aFZp!Yn;Sw#GWfNg&NU zR|b(B4YG_Y<2Z=s5N(ur-a^-)uGOXU`k4?Jw}G=j)#{q#9W8st{}3H_XkzceTArii z$WoqrGQ3lt$%yDK`PJWxmt8z{_j25!ZoZ&T59yI^aZhX^W8{ju1yz?&6eAtxns-Q`I5e)-e9m@clCD3=XLc{TV<1)m)IoLAAkHuO|=-L3LavYHF&*AWb4jRt>CuK z3MJ1IBzTeK1wC_Zn8zcC%~Hi~b7}8jUQ34!W=s{l36|M$-8y+!*6>`RUd9aXhc?_9 zo(a^e$Q{I_WKM9`4<1`Nd2|?gJZZk{^A=^6EvAwph2I(-#(I0lI?S`8!(Z7~V8ERX z;rvG5GFsnnmBO1{frzKuQJwJ}L&I2u`gt|t30mK$jJG>KN`0es*`G2$DYInA`ujhl zZ&OdcdszA03z-GrC{e11Z=UCO-Mkr_au+CuvH%jpm+Hnd7HApq_1HzS^T_laSxROc z(u4X{jrA99hvZH3;t^;d%O$6>ZH78G3i8I{EK_wXtQG8`hAS+N-|Fa(#|T7+*TR!| z@OLzhqwywUWiL#T%KkZ(an~@4^zFvEIZ7uW&!&r&+Lz*t!ZG1Tr+e>k!vNXP-L^TY zfpzs?FtRBJg)tV{fGJopO^vBbpVM>nn0@Nl1y4AT^c7k2>Q*UYzL+Z%**Ck+BM+x` zb;itHq@SSnhi`HrZMCv^CI2tnCDwfv-1lsT)PjmG%Z| z_4i8+R}fuHp@<*>7-84*LxXI?V5d$dHMeIqGFlj0sU1h~PWgRAHNqFSed72x{2C%y zr>NV@b^U8ULp}E>skqBDME851R<{#jM_#~uB#V~ZG$Bg*W5<2}vimj~D> z!qDR; zX(cuH(=n)?T^#Bws9-xtmuapR+P<&g;D%~GH=*Pchf?R(M^%rda=CyeWTTY^9L&~@6n ztDRL<2LoUYIdIW0_7NdDPMiVr!3bx~ffgWaWXd+vjtX6GeW*&VbqxQxv^x;L?K z5l0q2^-|SR=7D`s*R@k#M5I56wyJoOQ(vzxZT8$e6TuWXZW6hxHe7?8@)#c{n`6s9#vJP^pTeNS?NL}Zm>ds`AhXYq&d>O@eSSl*{Z=3 z>f4o`=Cuu`(pHsUr5jG!J<_tpM@CzAvdWxx;u)P>BALq9Xsc>D$E$|_fE*bH(V+XZ zbI`0U{H5w6t+mmek|H7lcW4*EyQ%L%$06EnXu>O=sG>FfV2)k2`}*Og<@NUV7SL9G z)UY}9BtK#&gwUp7mZx7lHrpz#8qdt_|neBKq)(KF*R+ZT|=#u`^b|5RkaF8 zv@;NOQ`5Dwo<001YN@KqJ*2H_B0u86#h817-`l#hOZLhnO9_Ph)Y#l^SP;r&N z0d?KTg+27;!d|S}3s>tW%+;e!vLfLZx6#%WT@_qgz%z2cUXR`{tGe!;Bljye?-2Fs z$o&fMp{^O(ZKKWT{W@P&MY{e?RjxD6gY_Tr8KiY}Z1@_Wr)FjeW=^ zU76R3!R=-E;6XM((-zY%X-+yc@W zO~g(|@jH+vb6f1!A z;{0$}YHIs-Y%Ko+jXJQ^N@^T@JQg5GWcd;f_Ix#ptWp!(*i(!5y1Ft%KRellVz zehq7kg|4QE4H16HtuGLHR6mr zVMm@3=Q9@dry=6mV|~jG4YwH4pM>7ke~D_0!j8<&K|BPtoJtI9{Ox9zBDd6d=h3K zk7}e-5mI&k9;Vuu$c)+xfSyh?5?;*=T(9m`R5_nR9S4EG8H`R>;e3)_v=~Vzj_w(7$mF_=#=v)}oDe=c|4n9Ra~9msaneOu?J-#z8duMM3kDVbtRr^vN*~C2&bj z79IrpA5OEdD%ggvHG3CX&Xo?Aq`%hX_?XTl8{v`|&Bn(g_s+CBY%1<6J)~4>JAmr( zjos53U0*wsFBVruk;9Cu#V)R{jGJ}#8PoU;NU4(PSjg^ z<5*6^sQ%};&RwiX(C*TUmZ`a+jKlmiqp#FZg;>N^VMwkhT3{a%`dTH8a`9O+d)Q}ORSjHlosG=J*U z@S)wo{Fdq1lM|yA?)J$0hqJOi`IX!I?e^8BzjlksI=`vCeIzFHUAGT-ukSx0^x}?Q zy6>GcHGHZ-_TBJEHPxpGFF?YoaQd-syhEUN1A>y(1ueg;b*mKHgPNC|dKy*Y>dN{J z_T)GgiFWD$VsXwWKGsLibarc;s(rFV)x{K^HAu+k&@mS-`pn*mEEyW<{L(gLbU0Ck z5S=EwJYV^flZXfi_wP_|?_^h%ZqG6rrnlbY&RNmvS~HnrK^gL-(`k+xDeZhsCBGpA zz!!Dh`{)yUCq9}qZfGYTVwUZ_pUycTu10kYD! zDKa!))!C|R&hBkWd3nFDbu0#jEe}~O5F~p%hlEPWoXjsA=NohdHz{63Ld#Col4BA+ z=nZ<~@;!!oJ7ekFz5zuu~K2))V$>hZyv|+u+aAm>0yoyjPnkt zttykPmGOjgm;Fjx5&nB(fhE&KNr(KFm{?u)XnR~?%aG0h#^S;1QM47rI9 zQz_s3`i($E&tMQlf-b+0Ny1Qj0b&0Yc6@2zCA}s#>51&uvHYfN3nJ4oqy4|`-;c2W z#N}#=9bSe{2+zkPP`zjz*pSYi%;8t?2TGsFa9T@rhX{-gzzGn^r0?m0!$6Nv0xs+< zV2@dB4DRdw9SKVD(C(J^|PS@=~a9D?0Zm|TL@kBA6$(Jnf9M=|D*CzX?d#iW2X@K z8D4N@m%=n3Bl-eATNGdcQXd&D{rx8wLjSuK#QvMr=brv_{)$4~|GnBRK#A~%ZvHPS zKt9~AKU&oK+ZA8_yaL+)FOIddg}56E-;`hHP^!(2} z&R?S}bpVWqSfQZZEyPFAx9$9^1g`!Yr|}kE>RrmX{VhaCcsS7?|Is!0+c`1ssJdyF z=V;Al!pw@??xeMG=uz}5i2?UirF}WwBXNlkW_xd_dAc&GE)5qe?r75&o zDZ(!61K}D|JeQ-Adbo5ab}ju$7j|2?G@VIeCssS0?xYL5H$0Kf#HJIg5l(MH{=h5j zmUB(lu@mdBTK96XO24;8`0&hFUB@nLf@)pa;z`#2m}>95x*;gMZb<-hphrZrBB4~$!J;P(XYeT zVWU7$@sr5AGD|G+=-1)vuty-M{7K|J8B)iGr4RboR(KF*81TPv(9lai1kVULFT#Lac9o*^_tZ?BQz? zCXdq<2#WE|FM0CL)cEsV8K+93iX|SMJ$y|}{*D&qUuobzej=X1y)cN1xx%!S?Ht!( zA!#Mj2SO}U7}svWS^eHubm!BV5c|Dv=+c|)?)Q$;r8lYQ_r9e|Z=%@meNC6%gpL_I zYTHgWV@GY<$!6@RZ5yvz*UUS4@|O3cJno9G)?wq>l|*m!EA2W&%HwYMYMB`aRua*# z!`I=QJTA&t%hY&lB@z8Pd>!t}<8JwCnM2JJBKmdsI-Hfqf#G)8czGoe{W^RdP}6mK za~?Har#0tM({*}t-pn|uk{BnC11AXm3fJL`ua;{CagcEhTO`+yLPbi3wplMRyVz8O zBIp{j^AY2g_)oEBub}oT9XlR1WdkSL@u(@gb)p@QnzF?c?ReCbT|Uu{N0Hg-VQCbZ zofeixk=bcs=_J&Y9dE}UFb))v$7K?VFgKVsvfbsHFJxnzRff@J`@nlr)rkHiTGgnT zm)_)ps!!wDgK;O*xc!Fs&(v5)V*Rb|NHQK`iz0&6g6j zRc|H~V{S2RU=tLwvCAsK&|w(RfLWN(rx^fZOP^-2dn%ei#Z)u{#jh;cQF_5w>g*^> zc9dT5l{!1hk{zWNjG@lbqZu^wnxO=P$>?<8L|_udYPU`VCQ+=mcp@-~VztXB0+T3K zJFO{)vJgH zfY{OpW&y0F56l8sOCOj`o(h-+u=cC0QI zc(s#&NffJ{1Wck>?L=S_WzbFrCQ$}0YOtcT+35{dlr}rP!8$G10<~YqbB53O=A*RP z*{D@J#$rdI*uaVCGz!ITorq4OP;Bu;bQ*n5wYw%T(Ya-WRSGY|VcG6@sD$hknn+%obnxsvJ%5zQ9CPU@9CTWwQ@?4X&$-uxz znX^;VEl_Q)>4nKqZLVpB$^L$VuYaXH7ii$#mbs56fq73RkIw%0!nG)|U=r5?B^FHL zTA)c_6S)>>64+#}1!~GpZOX1hWoEynv5Ug5(=vz0huO!&l>bT+LIp96TOi0l{R5#C z6UDTL?JSp~5~-OG!!%{-gw82SCqOxcDM}}FPEk4m$|=wc*Kuf1|0+dq#6p{`$Ia)>F-&$A!t(!2hXvA>60JLtx*z17SO_*3TVz^!aS~p?L zbwDd7OkXbITh`paU5ys4m@sX*h`$&K{rlB!u)ufcK6%GCOG(sDy@^ncxyIzjm8wK) zVwj3%Fmo!J0e&i)fy7ia1E3zzrx^hCV6v3KXfoIs>vaO>V2t%Tfpb8QW&rc~WzVKH<b8&L1?WYK|X5E8C)#ZAUTbiOA$|ao2zKTojfa3(L|f{7ytB(IU5KIoru)ef`kN zw$oCFr?chmqf7+UVx6|EFIw3aKi+bN%C=}07#1~UM@?CvSD;wzBzPLdVkg1VC>A>j zo<_0QN$~WvrrcrUX-TF30#p|@SkWvnfoTm^Gz;wa@ELB)|$~73Z<0k@>;wV;&La~$3>50H3iq)dDS(G;W!Dc7NwirLls1b+P1#XX7NyP7Gv5C=6x(my=&pWGw__ka{9N+6J;uJ4H=-`w z+a9pB_&E628{rnWO($lJ zh-epPqew&-W`l@&uuao!N&TEC?NnQAuw-5FqJ0m%wMS;&(oVI*rbyP6FRFgvy*JqA z<7_Ma9NJ8O$-0+|R33P11d~6^7S_+XrtN5prOlkFpHsAm-UP3oQ@V)WL_$BOcoDsc zg?>)?B6<@+{hXJJ=uOfdc<%}(H_zU$pA)O?XopRZtSegtGb@P_miP0E({S13WNFSKI zRn?oB&Z(+@WIC%#m|)S4GjqpTwByX&aTe`3Gk5+2?MP|h~rjbRGlmS*OTvuHkX#s2$teqCA{f#PNl^Z}@7V z0Pc7IH_G?haOOAh&VSYlw|uow0Czlq3+i)+jh9zWF6aF3ox4jQ2(@shvTzR=A2k5Q zg_T*wY!&?s8X}^dn5`laotQl$hMkznl!UJUZuJC8!dG*5A|+vrxl5m#Fmozu0)8rL zg2YtR1fVO>rzQYhVH_plD}Z|)Wh8uqj)5`~rl({43lP{pB5FF`vO|qsplXb_>`-Gj zu#$LA9*0`ETgP38@s=HG>=vVDEgkcI-yhLu`==9k&3*HoTI9^}sw${~i=wzw(U4IC z7d3Ew|8pV*YT!=7uTcYc5`K*uxRdZ})WDsDU!Rr7(XsLV)pJn;7dJJ8ea1K6nNuLB zkwII;@FSC;8=;6z(~VHfcG!*ZlC5ZLHrZF`b{{Rp>}xjJSLk*)#q4V~*%;`SHggJc zg6$OK1RyL-K~Aupf}8+^1^VO!AS{e#lYND5X@uM>XkOX3lIqaBvS}@R4S}FPf^YuQ zlhXjNL@nI$7B1kGsD(S;!W|D?qQ)*5yMR}s7VdZpcR32-PBibQNv%R_J4?#r22k+? zR6Jo?Lt<8~c-Mg0&y7;8Xj4h?s8a_UJB z4sPr1efDgiu*RAC{(XzJlRKs2*|WS_C#Sx5RUUP_`nlcvVh!B-m|&B|HE!`P`}6X` zG6xzu9BQ3A-oGyicOL5Fm6A$OOZb=`-mp1*fZXrV5~DKIL2S!xH})7R-`kcs@G>Ry zy>CygOUE{k?4IUek5|F_he*%(Y6joM)WA9P)b{%MXSSCQ6uJ*4rKon74_K#nv{{*F6yKUjIg+wzqSjEp4Fx z*vpujzEW$~kJs8V`f9BEOF8T4LoQ0u>8?M4j>AG+Y5-XYYMNqOPY zmf6*ilHNY#pWaz7;x>>QoYvkS+LrE`t9yUwkXxTiX6I+?!Pgy`Lr?ZvyMKP(6L!bd zk!gH<)d$-oNK>tu)n)wOH4+4ibv<-48QP!_uLtspQFMX`gL8KDZ1$cPx5L9 zJ44$Bdwtr%dn>!thLqf22di~N1$%UuTT@@{&yT5f{OIeS*)!B}HIqWIrdHK>)VI7+ z>wBTwQRmS&*I?))?9j&`gIv?&N)9M=BkR{v@3#%r6b>c(yA~dDCy9kqh6+92DXaC8 z?uhgqFB~MX)l#0TJ@2aRe4p8^Ek!NYb*EB4T=VGn_0Je+himXAMU^u2QN*pyH@r2d z|NoeI53nY-t$mzCib&HVU5W};s)|%0Dn}6upi&JGR0NbRLJ&xTqM)LnSU^CEC`w0= z8X!Rgq=QN?p-7PyY6660{(Is%=broBd%WMh|L1{6o2Dj$JqTsG~` zs+ILet&Xcet(cWy0%He>NuiB&Q*jH+J)aOzIXf{n{ zj81pv%*0fuJAsC4(ql|w@f|G3cqn_Ii52HapyraGiqR*I7+;zYieV7PyeR84(TQ4~ zjQ67vm;mBqOQ~8^%zZY%+V%Rs3SC-3ypUYn3JUIG>RUB8potJ zv8o-RFS*ciCp0%1635JHVih5wQb=rx5ssvVP8EMzT|D!iqwMmze$t|X3MOEg zLK@)9VhTZ@RhDMxgl{|qD4ex2M1s7wK<_A|`KOrq%18{;t88XSg_TxDn~0yHtFU&p zGoO&?0xC`WDBbQQ5Y2t{TXo!MlksYB)@q4L+3F`_-&G)#C{}fRKA}#B%1VwTjXiFVx|H3?Ps`7@lGbO?PzP-iz!{BjQ#*EAn6UnHbA zzbvFTR~`>7p_XYWS*xExv+knkOIc-lW%x2Z{BSO2Q6Q^}x#-J4kygf8i#~)!tL5>D zZ>`Kfe!N!Gy3Q2RKd~e;+S)e zq`^+GhH4#2HOwDq8d8dQStxIKMqdAn@R83?hAf4zH&Ry%c%x5THB>%wbyvhIr{F5oU3tUD z*3nxf?!8ltPEd+=aEU%IQT^_6gx%$;OO{>x`P=ueZ3D;6{MD-bbso~-{Mlxyc29vX z9u2toc^znpa8R@9bXiEiFfMFvmX3>~c;QF2AUx017@0Tr*B!;p56+GqQd#kkWQvnF z2YYZkdJ0yEm+!1n4dacimQPk?>K36M$u9r+T{b23F*i)?5UyaoB_&SA9+>q9j^ z$|3F^JG}SI!+UD4iyPWjbzI$hxZ90u1*F#M*Z^IEw|lb4-5IVgnSvR=HT9QYoBJQmw46M zqkDoqPxc?!zUTI3Yw{8650|XV&Fbw$+dXuTwAswbq5k%pizV!(&HEyGJk~$|baR8G zVs`lr{ZB^)uRaP15KdTsWam-dB=Xn68@6+X}@4D~Mz=kdK`;Cui2f%3BlDv+Y@-eY9qKZUQlEanQ%?McbzuN{^B z(7)B-UieDK>s9k!2d8&j^yf7;yIXAnFU%pmAw4~0=lpT0PaX`!V6XSLTu6J8_(}&&SFUR?Fcy996 zK01v5W&r;lW?7e7AHMpisk(07W-fQR$=OZLeMXcz)8`$td>{IMKwtv47i=sFj|{ce zYplahqrNS#%=neDoI)LPcLq3QHkbvP4S3y{M!K9|eE&KY+P>N-CbFPxI}s^gsBowV zcdLE*t2Lb}^h=6R_>B0x@69*9!CZ+z+|sXZD&PR$3%%D3 zhBt2h*ecm>shrTaY|LLpR~fV>VdXO>=1V>Lc&UW}k&v$5yfOa^eZuH8wg;N4(iI%8 z#AIrPp)7YZ9waWIU!Yc!Q>J`RIG>x)xQtRzS@Np(8;X0kkV_HA&t){KoWL)5vn;lu zSlEmWm&pQUlUmo4Ef3;cU@-YK$+7O4}){R!Ghl9A(H9gjLembN)_b?nO{BmvO&O^Iu zO~TfhzFgP(wtRz>zS6i!IJeqM-Ug>b3SG@%g0W=7YcKhFHE+u-ycBxV5{}@%u-4H- zbX4{JhWi)zhoZ>WPqIuJr#6J4a{AVhs@W|xaFZRN2ej9YqQCu;FV?gyfHd-2|84nK z`K5vKAj;@c;QyKOl-BUrSfCoxT^K|x3}N^NhWOs9j;h{xpmN{u1GtPU3Ks$}Nq$7` zXELaIZ_oXU$k9D_cbyzUDi8jmJhU$J0(Wj<@I>K_n8MpyJyp^@mAXCEYYu#rJ5Y5X zu76kDc|@q?2d@(HiINY_CFSUn3V-pawD`YP68#Lcjts6o3rhD+S>|jcY%{o*jWT!> zK)5K5vQR-?RYn(m8osFI<2w3?l^?(LVSgC+a1K!nNU1IAQG5eInZ*?@>nUO3A+j&3 za=&%8K+i{|`T_One&y)_J(>P3G6Q=?W;u;#`S~nA{t`FDWjiQhJ0#FIDBV|KQa>f| zpDk$xpVIK#-JAZ};tAJH$x9KUA9rA!ZXb}9kZCfgJ)(T|QKpCgPyKz;njId$ycj#IVU);II#zLc;UY(n_y>vDN-h42oJryp zn%g~O9!GspQ|{lYJg_%2V4K&Ozx8pOsn~U_+Y<8=-SIYs?pOnNqpsT5PxTVb$Z%+PCcPUTFm+$Q3e_s|82IWdI|3UbUl!x;fp0|PrRTT{~Igw?~d0$ zU*Y&FCePOOcX$3O$Nlb({()Z}-jSD=<4QTEue~>0_BX`V)%0y~BUrX@djW15f~^y1 z>?Nu#H|ySz?mxk(5BLi2oG84nP zwZCp%*K+5O;^6vlPWi38HOCI^{`@eEf3g}O_3QQ=yIb6RE*|~sa~=g*f7>Fx-C7(sV?-F_^L%`t`4=7kyx_||5fixTEn{9>!%(=|1jM` zHtpN&UF&Z-%flQKPySC#O#f~N)llccuuX+`(NPty?aQ$q?Th%cfPTSC3faVmS6r+g z;*A@W5Y4``?Lc)X&N++NhTnd@H&lx1voyCSy`BDvITb7dMq3egNZP^$#Hbw|}ofu(4zP>SheCg&W(K)eUU~^)NPy z6sQ#4qFfrcsNeDT-uRyc^)CSBAC?MIU?wa}{3~Pd@2#d}qv2|84?lk|z{@7qhle!`Ui-D85f z+Ae8(s2z%Dig8!`bMW*(B)g~Frl$gTfUJMu!{3QCU&%*>lB!K5l{zJm*vjLip8)Is zEnPDpMSuNgYG~{5EPiMEnyU-XWS5tlsX)Hw=X`CIGedjgLnuT3T7E#EPIh2pYi!)t z2Em@$j_;%qmeBt(*!@Tgj%gp$*dO>a#!hqV! z9(b%yj*gF#{;BKBfX+b)-~X@L@!OoWYy?_;Lb>Qu@d^AlM@RfG#f79J|7EV~-|-^a zeP7s@nEZRJ@u(k5;4g^m|9mOf>^L~E=ffYf!+HUXXYmQ;oN2()c0P}1xlJ4I`Gbx7 zcY64jOh~)oM%iGz=r&mLXq}{dBu0`lt@v4xF zm-~L#b?HJpzRrp|ze7ige{iFUGp7cnr_trz*W4F&L5(DJas0bhZJR@nJ^kZ(&dKom zcro&eDp%Y!e1kBPjKDJmCZqY>A=;#lf;CCSdD;cGsc($*zw<;ES85knre+ycUEDD; zZa&^~JmrS>boY&zR?!K5zfgsV9=DYGri7b}{Ok2rNsX>g$BU$0C3^}QDu%ke-AYZN zDcn$hnX61qdxlr&Yh@i}*Y)70&Wi_dO$y=h6I>dlDpu5?oNfWP&xiE0Wt(k=1TQq$ zsbZfwb7^GlD?HO)CWn^hpEJUlslL??+Ueb^(<4&eFD6?TRNhu(m9j~R9~p!`C(p$R zdJz2DEhli6tDrb)p1Z(&bmcYo?94!go+_zTyS?**N4Sn&`qzxMqBhYWe9!ESH(NR` zyhzJ0e9EKPy{nZOSUG^%>|XFyNzd-lM$2O+H-CAOz`1SbA>QLZIi-L0d0%j18BjsbE+d$D6_4^Lr&l8l~DT z-btKVH=0W1I5m(_VH0RI!091ociMLh9L;>QuZr27_4U0bmM|AMsZ37vF?86g-Kt!3 za{G`BCgSwrzeV1i z+Ww4CWiM1#iuvNQ&M}P0LHtlOu1tOx>HaBmgX$)cXW*Y#9v(fQUD%M$bdQlj1M*9_ zYuUR?ybJlVC7$31&+O|Kko>@Nm+ z+tMEHpp|O*pGs-Zan-k>e^tScCpYozuX_f)d*NS9oN@~7avZUt>`Uo9TvRC{hDFd! z+&qU>iq?6&jpi6we{Bkxd-{CJH1gy+o@$%GWFM@PL+*#?LqMuN6dgxgahIOnki9FG zV;VWRPDpwhDHT6{S~)7gnN#uT?bom@N=ac6(c``6`zB884K5odRXAP{31ZXTg~RJYCOCse-PcfQu+75Cd1{ z&|P<8FZ~jwmekoLs=>?cZKW_~tE`>Bp@$~gzMCvjtHe#R?R~A}mTfE0Alfd8IMvFz z7GGNIAC%kYx8I&vQDAGf7tCsh>8ZfY?P;mpJJb&0N>j7HW@K7mOTM0JPun6Pb!wpA zGaRf}>)y|P-WuGFLyK3A`BdjDUKw)_2fbIYjp&pDf}#$0rx)&y2Ix*_9xQ8UAT5({ zcZhJiCk5d#@z`w}p>-cfcEu}L@(!NWwCLrE5>cs<5`DUn32p{lh zh(74(01&0$7TENebu_iX!e3i zpJ^LVCcj*u9YdJk4iTiU?GTnp@8RkYk&)@;dV63qkFnLsd9I0i z>zd&R9&gb4C%sl8k6v6gdK+mk*a~-~m2*b4AIv10wc2QF9ig?~Y~w-M8xP&y0Wth? z*`n|uSji@Z^3G>V*V=QQJeqX6^U1uh-5E7R08j<(-;UJ#4Kfxo?XKZ0KYwUTrN8)bTm zqe)rSnZ1B>Q+YT4Q>|Z^R-Ok5{DnQ3p0ew1*^i2sDn0{Go*+N z)wEmVypQqg(X}r*au0PT>Z_1ny0s<(BT>-E8}dbcFVg48MzTeqes9iM6>?EXWE+sd zsIN~`#S&6BNtJTs0)zAJZH(5+TFcf3vr>*2AeD@|nXfMlV~|qgg3>E%HG!<9ABET0 zIM8fo;n9V~FJ1J=#?3CzfbIjMtU%5Lg6zFBn&W(~e6B<*sNU`jSb~><+%}}Ujf~0J zoz}2eM5e=nA9H95EJX|KShE3aS@OqgdQE_|r$*ZQJbw8Xqx-uKv-=d(t?51A2^>$~LH`p*Mf$vZTS*fZgE^&|xH{x>N%w z_)5wd`@G$(XIs z``DE?gGFv$67DagT;pQSo*SUM6lZWf{$7R-Ub~|?er{K%Kkzo(7d&N?;)ihX4$4j2 zp~je^wy|cpZCr7j=?IeZQmll2U_OQNHU`CGd>rwY>vhze+z`1cN}NHuGh_C3=r>SqdCCc5=AZ zC47p5vbN(=(bj)h0;}>t6U-0Ehh$WB+E*+Wr(HcORSNk$g0F@uK(R>0s)uf#i^?;2 zU2~`W!a*@$tHEl*a(+FY_N1si<32Z?%1;8M_2@DK3 zOY4h;$pIg?1#j}Q|h_L166C*eiKSHniBI#*(#qzO`)*gSOU%V?T`U8|^0dv_n|E|;b>k;jSabW|M`B}njnZ8g+3eH#Q&m*kVVJRNUcx|gKDt= z(V>RXzW%0KJ`0dss>Mpf0Q4eRc3Jy<@z^Qdeo~lK z?A~AZvY;Tx9_Xnf>RMI@1l8v;ThZ40Sq4z?{;BqcylG_H(%cwmZR8%~WPTD=7V4QE zX41J7)He~RG=g_PM!A*yD#VD&JsZ4pgycvp>g%r*VQRP(<8+tm+Ny@i);xkH-nDhJ zNTCulZY@lztVEkTJs0w+r11&F^LQU*ohWU@_^xYi>nTGmp{f>)Bwntmf(Vw5EZit1 zy{OV`2%1Pz%BV(5LRHO!akpq97@5)>h;;XR^h-z9lu%YjnZ{D?W;!%G1jRmJ1o_kj z4?%>mqP>^yoM*)^PD0A=9+gWL6Wvf+1!3{aVty76X?Y|o{0;8QwVvjHg;Ah7-QF`H zVXyPMJxm=L?Gg|Y#{mAC`vFAR`y5xTo!$pM96QmSfdnYZ)8O7(l z@1~caFHxzx^kYd-E>v8z$=Y*Og;66m98yQxD@-VwrY&y9u3q-0yk8x6N+WOv7<_oz z4b?^Eq>xW9X6j$%v8X4JW+tI50V~w*7c;$j7D{M{^}xOK!JST`W?=F zCUVb_v%yP&tSk-aKsSb3I8Ic<&(9MY2`ryVDUbAAYc0vg8kd&z-3W10BYtgOxh&R1 zIce=x2wJhIM&p;guHUrznNzgn$=w~6?{u^wIrFPsAH6UwC<$2F-&OZCvc5_;V4@ko z8;exiK&#$@C*mz8iyGa8pczYw+Ch%-8jCw?qii4io1{(GbwAcjJ zuF26uITu$I6wt*jtfJr=!od6m&6}C+sU!}Eu7EQ;? zITts00J~X+j2##JG>wcs3e2k1&6TxIKfEF$xLf8oa>@?W4Mo)@gt2N5|K zjY>83N9UgfZBM%B=oapWc$UGn=ru#_iKs6LIP5BO?e@tkX_9vbc6DdL>zJq8qFxlE zVWat`c=#TX1NUY((IA-vSZRQ(PQgQKXI^023XDVyhaUnqW*SI!Zz0(QFjq3!h3iaK z)*H$q-hkpt2lu7=uf7GSfPN?M)Z#^Z9A`Zk#GgAJ=Vx}QKfb&y_IylbTS%&DDTk{W zB6&Uj-UH+_+`UtT<;_os74KI=PXfnHf4|qlhV0DUh^T*eae7rkiwn3F~@fYjD`+ZPBF+hfn{>%fa-fG?pX56??l*e#zE$0F?; z&72Q28MMRs^7Ei^METtA!+R0@6J?!!ZbGf1iyJKSw!haoTqOVHK6q8;*Ny?rx%ue1#E(vZ?~S02JBl+x)eoe-Z}_ zDmNjB{}c}PPjj~~KS(e-KBNX3TG`svqt(y|+|Wu{;yTMw1%IE*q0W;?ss-(qL#P5C za3`WUN^UFE(@mm%T}3)??sU#{Up(x2FKU<~lPhMtVdErP2m1!B+U-xVh)m>rU7W_K zBq9{%WGu~KP9~T2!yDGvypbX5;5vS`kV2=&%(dpVroBqv=d> z05wiHPN5g4_9W^#$DdjRw`STJd#xf|m%t^<2qlOXRaz*|N6UCY%$G;gGhQT~i&&;~ z6r|oz|%^B~qT(n@IZ@@Fx=7D#_dT{>0wb6!qcV2Zswz zedmfYl}Wee+PRv)tCRz{OiwB5uf-B(x|0`>!uW1eU&R_zeD@QeeB5I`#~+{!QPeMa zcd+48Cd`*;0tfjG@XGM_YJa1GV7{dJi4h0Tb_cG(G|G(KFmO_4X2z7s$uz!$8AiVm zLNk4}KnV{pFKPxL-ZnV>#-T(i09_RS@v%)eAUy7y!yaIEiVZmRsU%*kqYM;O0Bzcm z0=N^*`+ce4#zL0=S^A4f^~cFGF!ki+E0w;G+xq;ejLa_J#$bWlU{;_55bXGL2GA+@ zNUWx56L0?o;1y#1zczI`nHE^{hG#)uPHcsoTf8R6Ht{-(Wr_<@{3Z^XS0+(gUd^=);Go>v;d`y=@MS+}K2ycQ zycE*NJhz)z}g#lPo)Y4g|;4NmM8C3Uipo4WWeWV!Ept)kXMktY-XbKzCX^; z;R^um?g$5fw;>CBT9>prK=e9DbvCZI0k(Ml=Hnbk3q|!efK8M@5KqI7J?tNPmG-~7 z1_+R}XpX)O*?=p^2W3EQGzOs+txgVGe7g0^K@pj4_J#U(+t(FFSikYy$Q&Xw>=;bPkNtU)+Ynos~ z>QbIoLo$5GZ^y)Zak=-$p%xR9n*z7RWVI{S;Vk>W=&}xVuz*j0dH|zhGwbXX%|2Yr zBB{U868_`x1f1<1lR;EwHCz^CAM&yqB-BC%+eg&i{OzzE!?F7x9IHR^%&W(4x*DMC zMW13)$99v8BT1&GWf7-fx~-l~w;h?C&OQgYW0<*VGy`K439@OTvw$Yrk5o_k z5cHJD@%TeU5|QI6k3AOQz@DTd1Et_{+6P8;e$qYaBLE*P*uEEVRgrppWd^Wo{sB-I zF!S5jbW@>Q8qHi+APrV|2AiX1vj(~SyFs}SHWeBR3Z@z9Ndd}0A4qm0JgUfgaIwx< zuS5(GJTYuy8L$ST9&-b_#qwT~KX_b#!oKRYF`dp>rcOA&zNuB+(a(RWIA}OKc~w&z$XGG5%6kA)68L@&_n=Xg~@h=>0JOdd=Dmb9xDHn#?F@jjT>Ke=xgJQi@TpPNwyLxD|@L8P%sLHJtdQ(zH1ehfqksC+ls1+a4a zVt{niN3-WK9U$ub$;EBYJK2j69;4^dux8k^8?M+8=zy0~oj>PxR`1+I5KN${U=`Xd z<+CM|6vHVn@w9oUhT3f(%E-4(9S}o!OSYwYJ=V*_jINa9x}{qmRC)2e&P?^ z$Xx1N=j)y1L^qEG^PJv%yz|GbNZcWel3V!1Wydn3~KcDrv-fo}*M zktF%9$&S6vA=g-i?44Tfo4I522E${0qe98o`|j8j+AlpAAau81UJZfP4;Ab6st>ZV z=qt6%rNuKN?)*HY*6++OR9{xiFf)kC+msQs%0wSLdsQP70SJZU$|+RkZpWVFw&4TCewnWNJrq)2=0 zacT1tBSWVylU$CcS>I!vVnOXh93iF8@5(dQtUEvI73mUYM7D9?6DijBNX!%FTaRHH zvwX+^jn4$$mdAmqrSYk({i!S#k3VC^?VWc+cZhAE*Mn{Y%)u0N32y1;p&q>+uEMfd zrXRV#=W4GWNH@J%i1Q6<$`4@7#0K@AKQ}}>uljMT8zs6x2qSA#5Pr>mvU_9z*0a5X zhg8VvZbdJ}>^fg40gStexzt#0glId3$99HeLt|Fs&X+1FQ=aJ|)%Dk-dk#OC{;G|( zo=p<7({OncJ`pyqEk};e(BLrz+&$o?as)`RI18Rp*#lfr{3K>Q&YBTQ3hxZsv1-ru zjj}jGi>rZ$2KaPwD_I1w82EEi6*8$X|JP{7rFqyJFwWVld=Wr@ z>U;ZtgM=!sJ?nLLbhqg##>{E(L<^{tM%ZXj-CdxGnR51&Hv*?UU{XF2w zkP2UJdw#K%9vb3nShYqj??g@>33C9((e%(XbVSAyepVM09|FaR@?JPn;pc5u{q#_3 z!?w;^s+iDnME&_3Fo?@#80*&Qjxk0hs%Y1apV{J`P32~?`#vjHB{UwD{u=+CkkF~- z|L_nNku21Ap}%s`xJ%6d{pD= zZ9{V1KRpI$l8yq8q`YXKYuNPpxR6EYWbPFm4Mc!kKY!~JG-J#yS3HQn81El)0+0!= z14o_ng0Fb+JIfYkga4~1mj!X$KF;O9}YzenqBU7NA4*+mi{=3SjTrT?xfkCbc$}RWvu1L^BhPk?U z1}%=8-}FN?3IQ4z`96cEm#gExtR~{amcl+i?_5wQdLZb;VunS5IFMGtQoqZ^Qc07* zO9npG$rG)MH`QYnD;$lw)q?ZT;5Ec;A6qqRK$!8FOYg*$p zQ`RHe#k+oHLOn*moO7mR_M_WpepY1nVJ$=ek?dkQF`WN!Qm~W*xbKgPdF?c;1g>l+ ztCYx*A2qv~tHNcB5TaZl#KU25_!3NweMxXY`WhS@I#$?%6`33XA`-=4N6OlIi>R-1 zu-7K=4XCOdZ&9A9SfdtpOYrec5WWfApQwMfRQ~MysCEOg%m<~+B0gdKrTaj63ZbIC z!UH>n2hVs7(U}X7`us4KKE=Q+A`eh+rb9jzVwqWuN)Nmt?7ULu{H8usUxA@VygO%o zIqfv`VAxwSAAQ9*ogUusY>L*h{#RKhFyYrJQN1NC#LfOa%R+t9oBH@|;x|~EkE$Qd ztB#?_k42+{pJI!${IE|xP(%a41RJR~W2@q$N%gUd~ z+8SMhWv)gX(3A}UPrlGIB9E~hEiQlR<=nc~vP}@Al}wSRJSoK|_A>QnKq^Y^InULe zK{#e4&Fg=F^v?Y;oP&Zuj`PQ&0Uy?H(7GQL)HztTS;$-=(ta^5Y`&YQOZ&);_3wET z9_X5uRP6Htt&+FHT!ma_=t1$JBHPluD(BZAWOtvNr=WeTyJ%CVE;cBl<@ks9qdS! zAUl#ZDpOW)HT<9nCZg1z5`Gd9U0mJ*9H3t5?T?T2*0>Q{6IQrV*rmWP+HJIs#~T zKszpMfMd#+t3bs0iyH9R8UT7rwgSPfleut!3i#QP_N*q}$yNa8-ev*|@)!`Eqr_WZ z0d?r$z0W`v5emZ?JCG;gg)=(%9i)Z)5zKS&nFc7Z@)gXOHv3E$UTKjM&8s$RI4`h_ zx88%ye@Lws;p))$*S7`G`@82;#@BGY#Vy`IP9CSMKaBYOIm`O( zaNYH(k@gOc|9B+38iTXr1n)n{E*PlG8V{;WPk|q2U%ZFGsXBuy{!|8T;*Xb-XYa}k z{~Qhe$#MIC>y#+$qSN)#D&0Kq^|^F)Zu?~B18i{&Oa~cASV{}9;H6LXkb>TH09A~<>43tXtkNy9ZuyO%*!^ zcyYif7{Ri5_AHRRXxJ&qhMDsX*&t|IhI9dy84-(l6F^b`h{nqGuRc_oGoDe=$deb15_pYQCZ-*)EBuf0-fXREnTqa(p?LtI&UW$)*Q@r7ky` z=TL%e;Pnh*k(qG?wH7Fh>~t-jSfU=N+s?+hoQjjgW3ga`x@PnTgAyHJ6vAX>fM>f98_i1%fTj6 zUTrAhnEFlJ8a_({?17RQr^XKnXR z`48?%VEv)o`R)7;YGwL~VJ$2ta0mz_CV+5yRBNxy zBH+8jz5z7{uc{xh6btOZHa?F4q;udDcduN(S}NT7SFjJMmPf4krk2J|(||)j>^w3? z?p;7b01BX3KLo5m;8B>xpW>ed-aM?{kHS$Jg@b2_fXN@x1*-o3Z^;Hzpgq|oaYqMA z5pM#aJ`(wUbI=O!Bdi-zJbxyRhq@0PU0LP@OR1ucb}D)ufUrTJcyCt=z{)q7_754Y zoYQRjn5)7=O}E505`-wN-+ zAc1N0vIV+(@E%Y<-XSYv9WCXZ=RAlLW;o7CS=(`+_B1~LVP#{dGC@!@R_C40x%^Vw zJl9oSYa+m}#-&axGeyO{i$KC1mip1v?D8W*BwaWEQZU^)^C4kMy0^#tMtWRY4nu0+ z=aBV@glIJv5OcI-C+?ip(DVbTWW)5!Fr-UR@KK0fq);*&$#4~ekq zEk6#@jizVb0>t8NKW|FGV*vg8|RjjjNJ`4LT*vU|IM!LxVnxSY9_S^Je zgv$%+CxPCGdWLk-IQ?QX^zi5L$8zo<7SqnwzJNR6sMc72KCH*ufF8TlY!j&T1a0li zIk}~0FdC$roZ`S^R1S7>&fEvtM6m3u{;o(5LPpna!_zOy)?#Krb&1&Rd;1(DOMV6E zPcXcC1A@h3cKankyt{pL^UW2Fwyr@Q?!^twhYKtBV{d5PLR;r@gdX~Ca5{fUH&X{H zt8OT(_HL)p3&;Jf^_gV`Y2)E9U55RwvBS9VP55xTgXSWXJ@uWZHZEC_^ih zA<36?UG9MJ2^k#aq6cNDIF#ydJqjwIml;s$g{gqj1!bjKJCCmiH;J-GtR}$CmD9p> zdf{%H2-hi4|9Qjw&e6?2IFPgDwLTFT@QSfxMu)af9HmrmyqRV8nj@{)Kh0z5oY_(G z599q7ePE1BlRk~C(M9Gbl!ZIe8&{0}hEvs({tl=1YJ(upankp=-!sopElIIIpQ3+~ zD}{GzUIf~laW1~fqZF!j;+;nCZ)Y+Zf~Jw?@u4?)L+I5U=`V5w7H)SxV4rR0;(YZb%oEXKZZffQFl%=i z05spj%XaQsF7D}-YodoURN|FbS3l1XJX)y&(E^yK`|SBw2kxoVmaS~fTB@t?Lu6jG zzwA;Wb^|*4ZO{{wvVel>2D9*kbz)Csl$9>Zt}diyoj5?E0PdHsM;dzU2!odMVgbuZ za}}#C7#UU?Nptm*A4AvOt&3@dM zXrIZY+2r>my)K^;b>|mFlI}~aR|C-43l#9$*Px*vRN{<>)vU45DB*P3H0Lqv)xKBf zyPB7i-0{7Z6Mf0fUYAMQeSYt!BOg?MDJdsFLBmBW?h`{E9xbXxRMho+>Wf?I`paU@ zIMt6%d?WSdvc(5qaxna=Z{(qf#Gw;8z@>a8*55JJnq*7%!yIvcuZ0DoB-{@WC_2KG z)!8c~4d~%7enh(mP1Dme0Bkn5DIG*hr9c>QyQ6;5n2z-^*MP`*x#REEFi6&KftuHXZ((1I^Zh+^F4;b$`bgCOB9;7 zBs75fSeR*C@SqTyD4p)UkcwwcysoE+3*ythoDHPm;0*LRKJb{5P1Dr1H#1_x3@7_>u(#=!0njgF8nYiYCr0V7{G*9kp@!QAK1I!PuVDoPj&F5Z z8`alO#c%o+PS?-{`O5kGKl>q^nGMehmb!=0)@?fg4sc!o*}P1RxPEu6eJE?g4DB(p z&7a@pn&#mg5YhoMr4_ry3i$YoZox(uK`uzw_)`kV@SJ%uL{(wwIatKCv;qlRSIpP! z!_fM6DsG!sG~hg{c3?a*J)=t+f^I-JTg!(wPM8NFxH|GTSljLnH+BiexOheAWYAnW z*TgI4YWoIUpj=V4x+9s3`)of=a@j>tp9KePoLE^y^JGNJfMB zKeubc8PWl3){(yPYNfKeKT(eva7o`%PgMA7o)37%f#|kH4=d)`Zi440%Bmz($736=)u|V`#zM0BPv`yB;J^&?X{LMjH%ZzYoU>Kx*ar!N*9d^RNqw80;b&dlb^bbg14;9Dd^bY)Rd`;Eh zohSFN0600}2k@oQW01kd$uu_n>0T3YQx65>^VH+bi_^z{>`3}>&Ev4Bn#2PT%Oqha zKepI_gxi|8@pB&&?2%({+D~g*EasluS>inw?_i;cSZMM9F~~957iWC4KcfngK;Fpi zgWnO)kzf13&L_L1$Z{+WXGjhAlev*@jD(zo^zHV>w_~{-T6stc!UjNySj(@e=>nA`3zfhjXO4-jk z`IFXcTe{3uz?7N`({=9kxmKDmQD>rbbkq%H+~`p6kW`s%1h@k?)axuUI~#`k;n}S> z0rBtvh`_lqOul6qUp=w(2ykmAMD03TVK-Rfz5zHv4n2#!{mSIxKIP_$P+BO)k$YA zknjx3`tn)s0}j!p0xA+X2bn~^^a;1wo98J|$|^MXxDW>|Q7>AzQ&?_Evc?{ZltMnc z%&cV%;r`x1YMd;CzEo?km4s2JuU^(xucP%tH&%o{yY8Uy6rEV$+ z^~dmc_gygYB4cyUd2#MMxX%S=su8Jiy}#>;eo9)0|tTKqk-;*4(gdYJTAdPDCClX-GkHK00Vj1s2q%!*74-* za1vB)M`3&-&rbCYVwh?(;iTL-!8BHkJ8Q+aOXF0vK!X9H5jFf?g-GZr7;J~m*5BS$ zQ6TVQek0U~gZ6h*7K#A7v@Gp>D2o`ERaYyp$qkB0ZdpG>Y+GHv$8dksTgC!W)*i1@(>;?UZ>ELpjZ5?u z%pzXqYwD1%GgBikgkQ-ZW+FEy=Kz-`vVf$xWPC9-ncovJ1ZDLCHi9#gm!v$;P3rMj zMTg0{>c*MVPt|3G zaq1L+R)up#f^RAc8<(KH$xgNFsZ^4NW>$D0)bh@e0V1_uO=cd?wRY3FH<6Z?QKKjV z@q(b{NoRM8h9=Vw@`7GF%J2hn&ccQ8g?DBGBkjIC++?A&^@U(V0h8QM`FQQ5#4qAd zZrl`95C_rJiukg9c`|QJF}5FEUKzsBF5$aSD3(S@+b-`*5Om~Z-ID=-#C(=9rEFp) z=lRBEcRh?VZ|N0=*@|WcKC|L`cKSOe>dxh;z4DUUj*vf&*w;Tv0`YF-3i*h8KF)Y% zqOYm_`kjGmmH;SV?rV%m&OjAeeM+-F96$yM3#6U#2F?le^x# zkAFU1v;lvXI7kQllJ7%$P*hV1`fCBI&}9|J)gjTdXKo*dYv}o>!ATvwF8bx@!S}45x?m{G68~c_AQ7;3~ z8s;%zn^7JWu)3{)QFGXr3Nx1{K{e|CxIr%`5VdY8Mp_`#lQ+|pey{i4a+=Wejbuzik_%M`$&GjA>$7W!}0O&Ryj5J&Q`2Wjg0M)B84fMET%Cou`p8SsY5Q zU@X#W5vcWb$Lm>=M~avo?{}CL{i1em3=4@b2?k~$ABxZ1I2iUw5 zU~o8OAG?A|bl?~9&#+d-$(yPJx)LjY?aG!HfO(#g=6x{&D#9QHF^FgE6C+Hs?BKo8aE-RkD49Sg0I2ufo%=YT;NZ3AS< zXbOnY@UM)ffL!Ihw?U;xK~t@%UT{|Nm&r0@J$OD-mh$13;ncv{u`pbbyVS6S1SD@FvKb=NE%iWhotyh0NkC0-P|`zIN7K>;I$dJHVRC zn!m+@x+<)=YeR?~J9d?1VzOLSWrMpWKlv< z10qF0f+i6JF(eouBqZP5dlSgL;J*LweV@nGCJwGhBe&O(JPRbKl&Nd8cwhkQHw}|Ai`?=Me~p!9;b|stg8BT` zw_5lG$8U&|0LnfTdm`h90Pcl;t`plV^?GMqFuRE7Bv6VFbNK97MFK_22QLoH+0g(*< z>X_2|hkYO!-w3XR?ughJ>@lQ~<8=MOyQO6z-0>R7jq_dY&E#uiCseeS$y23)$HD_k znLfZcB(rc1stUF@f15wf6-_()il&{N`qu_rqJ*kxmz`bg%Vb00h;cUO+bBr)TL#4O{l)?<;`xkb4-9o#9gKtIt;Lo9cxtL-s<2+wXDrGLD8f z1*{sAo~_Ds8r-F5F-c;c`p6!BQoSyT;+6dton~e6&^*RCQ#ckM?CdzXi7ngxu=pvV z+;E$wa}-)DuDd)?`mmPvho>nnP;Yp*xTjL@fX+(G415jI^TFgW#bt8;q?twq3@i28 zDjGqVs^V6v5)`8A!q^#9@hTl;QpKME88dd^TubAbN!pS(u-mg%r1LxQ&y|l)p zyZGbf+U0we?0@^_SbMJ1=qh3z)XVa&9eHAW)#T3L$L|+=W14@=~l;!Fk|!p>c(K-E_CTTB)9ceLS_>h5fV_4ag>4ZbPe(?)OI~ z!cLc?6?O-7c7(MBREQ~#>hqYAqkAJpRhWi4p~Fis!DIX*CtXF&s2i>!15TX_oxq!{ zEt#=m(m$JJ^>T7#$vk`wYDH^su0b;tp)T7#My+&{~L-TSw311gx| zFXa|$RcTj6GU)}jRy_p>Ux{96+pE41wUK3MR@P2aiPvgV(ZG6Vin65j zMgIFiOWPNm*u4=GN(RR>>94_EN}@jP+3=pT;^;vsR)o^r zmyFg*(I8vAU9ihW-_uQ4b?*1Uv-IlI$)}+r@UeA<&!0@sd?iLJ zbZUMVYBm#!0>=lF$=ABN%Z%6EELb#n$EfU&oz=lz4V`}y%0j&eetaUq!%V$)vq#K` zT=9|H`bWmwI&Lt*SDhbd8oFy!2C;Z;ZQSS1>o#EuNZ%gpb@ajqlln!2E+OY6a5o_B zW!cJ~=ASxx<{n&JK|?B7UO?mYV7p-C_&79pHFEq?2l0{NMN#{&F(A8a!gSP#E3f!n z>334kr2g@LxblzzmhtQGuG8dIP)H|yV1`ZOuKMaBU+U5z7ok?qW;rL%V41abjQf;# zH=n@?{et`4=|u!K%KgXgYpq-Jc?AR@kEt4^OnZ241mT zSh~wTc~l`%?!|ifGMiiEu#7=a8PjncG!{yF(P5 z;lkPVoH}Xz{&lzh1ZNk(x*wCW{{6Sge6?!f?M89`3 zwdRl7k6BGiTgNGSq?;izSTVVoP)wGQzNz`f;3ZXr>iO5%vaLxMn+UB47?S#fQ=0Lm zaFc?`ZN=Y+<@0MNHWPahl(F|GoopJpB8>NOjEv40Cr?nYIP%0vM$NmUoBS1uCttkA zO+YgNxBYy?teRQieCTNEHXn^QF_qWv-~U(eaC;r~NVD^)tx%z7dvNLPmNvL1b)#j- z17Z4iehX5vVE)+<3y)t|EnY|=IJmX;Yh7ShG%eQfa7;4CJKHVrdAnS*S6lpCnmXU50VQWbhgrQKB0)NRF;zx^cTA#Is09m$8)(zvw~x< zxq8?i0z(?9;=TRJ(xm#4@DF>BtS)c;aF?%!?#SN;7>&5s9%Ym!moFR(IOcBG&A~>M z_L>8@viI5BrO*{5Lh8crNY2dah_(N4L>9%d3uGs|k>#`94CP&E?>myyvO11k4OJ9Y zpD{|4bQm(Td_z{ARfyo9V?6@at!&KR2nF%S#LY-+YtALrd}_`eQ`6X-t2?0@{;C`O zLDZbfVa=^22j(Q!k~4E&z@NNdNAuYO2G%ls+qrp7$z`cy+&#j@72{LKC~VDApGdvz zTSTJ;?5v#73j2HRhP8Fm(TA)&9*eE(nt%IzcY+4_&6W?HG2&B$cQZ1KP8>ONj}b6T zIC{j@7}}H<84ldXCr<70u!Re1OOd?PPcmJ`ev%P{dO!VMVWKd1(llt1>B2R$EnAm) z9WzzreH-z{zn(MX{kN7F+G@Cv`YY6LY8diarki2B%q5mKdQK%$Qw7F{ zWV|5xhzKlF`t-fT2tBNo>B3>H^Ih2SUI3vZ88ipOc?HMFiad+d#&Z?@|CRh2dA3V4 zx2Hnc*LDGab;@I0@#ME%@-JIfX5brecru&B8?4S9C6?9y)^3$gP143U;m~g2PHiUC znjc-vx|-sy4kU#`=_ce;iDnS3(UNBRy7=aCqR5FC+JqN^eg5mN+#{H|N~D zNwe=wJoZR)2$Os85_xe=>B`IZ4i7xHC1Ok0Sc~nTu%JkrXM&=(oXO;`PE9ky_;B#0 z^)$Y{431bSU5`2}{T8>69HiKG?AmbZGmCzT#jlLUiX0R^4^PUI<>WlEh2bW}hx(71 znjO#X%UYhX43{6d{S&0g>1-QQb7yGr(UjjlT&(^oo;!SBzM?bI{mj+O>;sqbP6Wv- zQZsX2+i|YsHh2Gh*Yi)=!4;|fJ3Gd^w+-s*n&E%$sHqNlm|VIgGw9WI*ENtI^=ZWD zl6mF@jw`8@e+(KrsTchh2af*U9^cXCU{qlhG+Ndk?;A2D#bUu|9`ZM9XExF*My)t4 zBmGS(PoW>#tKor0P~dAZ+A%_Kx4yl@Kv^rFSub!fZN^sz|BKVwzTnJLLWddOkd!ms z<-swRuPq>UVl%PDjOO=;SKA0JW;B8(TrN^cHMejD2@0ZIuMw9BNzNfjh~ z+k)R#knBsT=oOtL>Y4`%CH*5|l}b0MSMWr3sQWo0^=mq$J6?%(E?ytbzpcW44~Ni5 zP0mL64mo|1KQgH`Q6gQ2pMcK10O{HQVk?L&z8nmR&VhG;9H0cDhmq4m`_`Dq1zuibOX+} zi9M30bOkMbTaIhBF!3#iwXcd$TJ(HU>)gg1|0-f{pR?ZutgW^JeD9`tarF2Uma2CX zR`PN}F>72Ep%q8g+6u{Nvz1l7D&vETeXVK|)(2osQ}+E-nihN}?RXJWV9dFt zAAQYCt(-UNl6G<01uP3&T2GrLG;RYMc(oLYXlLEi-tv{UW}?oPzs-m5iIaM z8>pS=O6aUKmM?G^T49Or4+TFy6!!t&i)UOhYimjXz8BA#!!hn<92T(?%0H>}6h~r< zC{i)YB87zOtyFhl1&LEkg=RH^&Q{7EwGfBZd?N2x?w9{Xqa3tN#TBOgmjZ#M<45rw zYoITov~ooTzQ0wgwrzd2c%XATp~cll2sn8L5lZ>OvRFz9Z51eSbh;A--^rzj;t}Is z!C@iy1CwZmSGwhY&Zp}jYs4O5#N!Ev0(38IAP4Jpzbeq>$PpaV&TBUhMjHy2OQ+rIq5DQ}O~h%4k|$hO&_&~|9K+t%-*ey2>}C#KyieRX_`BrIKttE&APV2t2*2CY6^z#Nu7P^@H)-h z?qa=Y;p%-CD3?~5sD}%n)zeGe8$pCtPtF&Z?|w?M#`k)fE|FHQ%E0%hvsyv2D*cA< zPZ!DS%-&8T84x?vWd_{{>eXZkmTA3I*?mA&s(nO5{6}ez${Kmfcj`o;G%4vz&M#GJ zQ8cut9isbI+p_Up?U(|?C&!NBA~q}Frd80PYaj4Udg`S~0(AEi`tP8HqHQY>aTE1e%P z>A$AT#y1BH@2WR&C(=JSs{nRneP_3Ki}nW;29r0vjqUC-gcW{=IQcp$p&87w<#jS% z3WL~yYFGo;k@iAv0ljm2DxS;pIjn6}_$Fa|x-7_ajfw7ZTsiQ|Mr#Qk&B%>3-q5#h zgnHn$5K}1C$6v<=uX|k;p{HOo-IkM@IN1GdHnDw{j3jC2E=n>a^f=Q9lFd2MMaT7a zy4fMUwB7`%*xuujJ@+}hi@^Gsg}(zz;`w(UQJ}2^xBkPVC#F>6yPN^=W3&+%|4T)C zqExz!AUGZ$sx7K|br%EAZBdn4AOUHywR;7LB6Vn^;JUMYXC@+a!EN^MEu4?RKKlH_#eFyTRcF z1Dpi)rg%Jxuzf-3QMHCl&=;ZWG~6do@gg(?3kXTMrxqSYx)aC@0P7Jug2B$}*Gy;@ z07nnk&4@MGPa621Y3tQ~QvQNT8k3fdbZWuVzGl3Xgu**iS3>cDP2iCcNzeH>?2+O; zzLQykM7rap7~h2Sb4qXp$vG9@%d9WTmjQNDJHo;|OVILW#6EiA;FcHL5t_*j{=VST zV9@gF4cH`ullY+t+p6%}RFkDGlP);bgzxz&(T1IReRoe#9_9J~8mZceKzpvU`?)q^ zOIO%_iz^JVK5X5sF<>p?g9z=ibc$Jnst6tWq@;nSqb|LXTi}~pUidcj?IuDO#kXBS zudc-qxGuF!BvL+HaCur)FUr3hQBWuFItl$sfy`S$gnp$%8n_dhd-X3IQAsC|9#GLH zKhLC}SNz>Y$8~^|O`(xWogG!@A&7>7ylG8Hj^Wp;r@D*R&i^}Zf_3iKv}X~mW<~Mo^M7g_IrQX=jxYKe#{!sM z%aSyfnsOdbFy1z(qAOtN-J##!4fF^aT`Z&GJ2X{3aaw7M<7So`EV#VHdl=AOWJixPS%eL!gb4i zqYdkqu$$28q6#1rp)+X)I5$>=cDeK8=#I_|=uE2C3t{kgp-wYCk_B-Lvz&|+0kQky z@JhPsj1+AiyZ!2U1X7Ie3OajD58rKAw=2kWg%ml*Le~vDR_r8?&y!ogVei?VIEE|4 z0LNCi;5z-b1(RcMe84wV9b3$LoWdrw$ek2N@6{m}f*Mj{huoLJ#hL_MT-aSXT{^7<{X~#a8Etw3o?( z=EUvBHzsxgxr*OSXcd_|st?+)&?vFoPMpASSFIo!mS(GmK0#mxbo$cUi0zJU26uJz za9u38`W_7(y@Ht6VKaQ=n)y0(l))h3;BT&d+*eGzA(@g9J zZk7|0vIw_7C$k$l^b@v*_^zV&<>d0WN%*G6aMZspgV7v=REvf`4yWg(P2~`%`QJ84O4Rva3z+9sz72lAc9sxfVm{WWJO}8s0mV`hM(7|NY;b9v387U~3?xmx5aZk4|6R&q8CT(3d7Ta>A1(PtY&Z`J zoxt%l-o=U|FOxF(CD8-h-C9b``VUmd>pg4+9BPp}Ugk9(do=#1$90=F1~0wL z>*F3ITB}CcJf!$RdD9iqw0{+w2+jO2+w@Z znLCi5HlE5V+f%EUL;w8mg0v6ey1J{V#(b-5@2uK~)Cm-wQ2CO*1nvdWB3GYkj0@B@ zzcdK$WH;2)BG=9Pui8l$>uP`MfoT&H0H!VL1Ew`kO*^x4dD6)bo=ZAVi1y$ooeO&! zjxoNCe9mco{ic0_n@jMBH|=vyX?f>(Q&%6mdkpeaZ5e}7il+W&^_BG=?7{rqqd&IS zM%1-)Z%M6>nf!7weU?SPad1#bKe~;-RzELi&uHIH!69KF+vLH^tWg)a2RrnvL!d^e z>6Kx*b%r7>c-oNd@HcH->h$(()gF_TXso&*}<67np!j|GS2VIJVX?)s>Vt9QCZHzgs zVTU{gO1Z4V`s8`zZjtztgmq-5R)ku+NeHl?VO@ zA!!xIMDxzH1PIo*NEb8=os^?JywEmcO7PjcXB%0|?jD}1-M*(~SjN+4%@I1oC(mG& z%{NJM;|H=uPV>$l^Ivmp(oZM>Y$FQC%Qvxa@4+qVf9(uAZMB_o>%>5xsXaSGfH*LL zAvtperpzZ#TFnG6Mz^os9Cq)ckL?I~7Z9@PSb!1Yz{t!vUqLFKYF5^Du@@GU4LY#o z7i>NLmq1*M+nZRig#W?iE(cu@50WKZsp?*kE$D)nj+GUNZR^vj{{@+Q6=E|)8THLX zqrQ7Rstt^m9yjfnc@fIr~5paF2N`IiR8*84m?nq41L`6*Tv0MABP5wt$Xe(fC_^z@y^ zF`_EGIubR?L5EDx*rsXS6+5s@^FWfin(pI%UQW#L9)TDk|Kpg+_#Rc|iKwb?IdzKH zPZNRpnt_(s7WHmzr+`cQV7Qxvbgu5yK+1xHk2S!p+`=PmH=G3$SJ@!MHkx=t#o%mJti1Aa?Rw{+E z4PA{_$7r)4Ujrp#yjx&)eT&V2`vKVq5yK1H?T_#DNSMB&x@7ca9%<+1*ug`X78>Ba zzpe-2pWaLOZG3NMhy}VAEBYTBj)~n$3ZfOKrkx4_!!WU-o{{S|;(@#s@J(LxAa2r?)C?8q6sZm z1HTCz$Q8pjtGiEk+2Gr*l8)`J>j~N7C6MAWs!w;@QwxWQn*qZ0YKRViII7L8hNX^j zz*)f)ULiQ_-VR-PzYH}m4I{Kf*XMHRgVU(EK*6T7Ou8h6Ksp>Z(Tm9^cfC``#K^pL zVj6Ie7`%`oG2Uza5=^$c9z^IL3J%A-=72Oh9#}#!7EKEmYN7|$)g4O_fF^qihwX&! z@|~%@0|n9UQt2hDjD9C(s6)#q2ENDJh|N|xZ30%ftzgZVXP!s|nY0N>*#)0@XI1_D zv+5WPAn_XM!ldrN8rFRp!f*t(Yn*`tpd4{eIn|`9eTdV#|2~txAjLry&O|}+_Z>8n zb}{+y4f}J3^qf`$IPCw+^&u~3tn5B-dOk0gMV9&s6o8vJCdK~KuXajDglZUuIj*C6 z1(v2vdBY@WA2w6V@ycK67Tr5i(6|g_y(Edy{`KRCx8Ly6@?tZv-81O_5Gv>^KJx-R z7B#}wl9bOsh*P|ly~_unzUQ6 zvyx*8zR2O`%s9HCa}VV-Id;XN5rU&|4A(!bnTj+ zM!$?Y4*jw%`LMwhVS84uP`1 z2)8BW%*a#ALqb9{DFmlG2Cg(DboJAQG)=WUFw;1CPlU#uMps5TIB>a%NV&p{Mq1?T zsIC|DrLld6IR^HQ&G)UTiXjW%5y+>=FeIhd?fs{g-f*Q*h0I#Y;k*Wq+!b`le7 zyY5qVF(13z)3C;GqoY9&{Pc`xUaZ>$fK8eIpmLAGU+#Kt@BT%q357TU48Sdb+h>{p zvHTW~3Cl^WpVpy~1Mc$AFQkmzBO@M^qh6Zn*p4Ed;URTl8-o*RoVXfe@*Vb<{MLTe zUmX6o?@?n4FY)NZ_XwUC>#kM0KMjT$Nq~z>B_gQ?7dGkD?22B z+-Cfg)HPdY+h|A>t@D@foGxMgda+*PU%{5(3IG0U*n`@Hh|6*)#K!-21|LIh!<5c~ z61DlDsI3D~o9l=NIS6XQc`KA>ai-coQ|bMWap)pR=;q$=yW`X|8xzjs%+U{2AroFJ z;*$*zsrIKO2OzHDcf>Uep2(nSWKpm@2{@CKDhvtL0P?4-SKh@;17HDe4g=X?9p zh1$I3v$1fL@;1kY_|9+i9acH#_RSAGIYcnp*>Z$z1qCG`e{v8)BH2#-kbEB z1AmG8`L`M7%9PH9FPU%dTnie=+N^e{a7CwOoE0&p+Tl6m9^lE_un(?Vjd8aB_kgYP zM8xcXWX^eIux2|1Y_R{A#prJj`AAL4X(4E)>s>DCQ7OoUw>ro#G#k!%9BU4@tHkNF ztntKf{c|d0g5i%DctLPXN;JW(=}h{Ql%~yewe-j^hz9l!9b2_bXeMAbTUia0l^zb~Zi6cib{+eH|Gn+YdPg^=Z3QMc*9X-x&?oGR<1%L{oB?HpWG<{fh zLOm%J5<@lw;PFGB@p=!VI68slL)lqDH@dpx^-Er2kZqE2Ifu2ZO1M@%8Cwn*pJ>Qd zXB+HTn}I|4w{y<#P*Cr4{msS~tEHn(j-&5R$xw4mUi|z9@c+uxEe)obddXp-V`^_CMA;WdGI&vfXD@IL^nV0e^rr z^0+P>W*f=|xA31fr5_I2sOU-Nnn}iQyi*%u`hBJ0KpREtg+zMkrMKGgF_7;GXc_Jo z;1bE_!cY*m1|Lu>nD znk`(tCtmzW8&k|WWlKBGx}=^s3*PufDI}sn;%ukzlqv`F6_PDs)hls7YbAl@LWQHi zJvebzh0N~A9EZ}S8-D&d)b;OEOV?Mvuq>CwhA-9pxw650|CHBH=h~2$PJBD$yH2{> zYUngpxwv}l72mTPM_q9p=Mb>0snU)<$*$=7*#qN>MC1Q?bSRm>B=W$xe;%D?Ii$~} zOXc+yCV8EgT;4b)(NE1~D!$Q@AD$y|{W!}%FXBkTd@|XeiW=j>q6&+6e9=#SH#65~ zug>0@ZJBLjHzhe^jvIHno_l0!z744MblP&TIB4^Rz zP4n`n`6UI01jd_M*-bROIL4s7Xl|KspqBEk? zYjo{PgzF^RB}*jUk`a=PlDQIBiJs(Qpm9lFySMBw*)`dD*+bcB*&Wk{&KJhZDmz~k zj(>J$p8GTMg3Pe43WH5i$=R8b&KH`?_5@uwwRU?-eyE}Kh`eCeG>)fVxcKzxC4a$F zZTTkXPf{K#I$v1uL!ylA8I}D8-?zF5)QH8W*+qu=hJ^+?u7SU?`r^H_s%Qza05|K;RaZ!Su1^l!~0K#$>ac6#4SZuzGBW zi(v-iltaZVyieP7f3;oo!bF;ut8LVyM4DT0hIc0GfjnJ9zg#%7sIz{JuGNhVO^n=K zO{FudJL}DL*Z+QiVUGuBHsx{nRyczrJr zsV6#r8i>3WYB0SDd2^{TwE}rVY^iWeq?mnr$+Ph3mx0z5Fw%Y-MU4}8pPmi!ptrFd zr2wAP`c^KCT?a#a^fp~s@D%xJvC#6Qm_4E;C5#iZe}^L4SC4sVKTuJ%?91;!rsgsm zRt3Vr=P!-z_U%+e?{LYI|6bZz|DfETeG}tU3CbzcYuu=LOYYN>LK3`X-)^EWk$u9( zSjP&7DeBf8(zB<$e;{A}P|L4iKMYcwCk{uas;?)2pSlHG90_~}Oex8WntrvMa_ULB zVAE4zDVx%W@al$^o>lC=9SYmQj!hk%GtpPrE`0e^zT8xCn;Q!A_$aZ^6U4wH(K}8B z9{I<)%S$HX{sQx8k;R8`7TrgO>ac|6crpG%!PbpU441gDWX3@oaAaujka_sZx3F{* z#v?0kW?XAhhOW3p0`U7(J;_`42-X34M6eO)-IN(-ziCM#EsR0Se5IsfxsnQwl3Buq z7_$TifLRE6upsD2q66@xcgt=)rq5-d6Zqae#vSDt?8|>*GbvgOgLFt(&dMI~{)tZH&?Ab=V-V)FxBF@3w?MY`ly>dBnoQG5b_j>RBq3Z_`kwLR zJ;E2|M2YRqrg7#bpRXur;)jZ+`#sM5t=4#vy!PR0R*#~kXIj@9Q054twDakUSLl&0 zkitn(jC0yW7QA@T0tseWE}QFms~MJa_806U?ax~kCKo18tLnHF#<{?WrbFaEuGMn8?d{$~>$Ij(8^+nlW6sWT3jy)OFW zjpF0=vQ)!e&u+&|G;x|Tu`b!n$&X^5>q`j>()`;dd;UM8H+9-i|820lytHs$NlChe zqJC(iNkXWd{ zW@cFPv_a25hS=WQ-w#x3*OPDC7fSqo>I|~boSOX9+uS89M|j%b>#2W;XV!9utenm6 zZ+5tQ*j;yvyHmRd{(M-w#}@f${=2J-%5@LHpB1_r-i=sXu6wrW^|+=HC;h$h{6i|H zA8fqbIAVIE=J=)&dE(D!#hRYNH?4*1cb&yHTt;%6kC*^F8sk(dm8A8ECM@sCb+E{1S7RQW`SCp4&!_ z_l?pfVxPmGFr_5qqp^+1G?q5LzTAYPKX)2@xopvvt!x;l*Yo7_PVs#*C@^R~!k~A+ zEOiED=y(r6Cg4h79K)c>>8{bG+qzCbNd>~<;VAcX#h+jGG2gm7Kj6>5%FjQnz)!dm zrk#WzpM)&fW1S`94Sl`f;}00^Q24Jc@L?+(_!eC5A2OnNm%E3LQcOZ&M2!>>59FJA z9c0Nf|5do{F==A#TtCqFS_p?D?$>;urhnJ#Z z_9a>NlTCIszgB@w&~sk2EQ=Eg^vnQ%6nl5TvfYVE!F-N&USgC%Xj(t z9t^cEc8*onEv?3m-te`kqR1=hboHfD&z18Ey!g62UA~{Fzu&dY9d?sBhd0*b@r~^! zC112I@X7Zn^eONua^ZQ4{QcrH?`XM|2>F_p< zIsTi&puD1%?Jy9e2a* z!|kIu=QtM$dQ~?rtPjj6iV|4^EADZa_g=KnzjqdmDdO@hdBVf%W2ah0CdlK{LaiG+ zh;SX5uY5~QUUb;7!|P&ab0%_Tu*1jX597ZS=}1QVIS+UzB>E@F3iCHAZfiCA_QLCk z8HsY@YUoiWQ)ZDaY8TK8=tWlfr}$GuTO>QKhgvmt5Zx2ZirfiFZtNfe_FYtKsufj1 zdYR0%=G4 zwkC&G``!#ozK~F4U9^bj!gJwY7R~W{8kl8j@y#vRnCPAqS@LXnH!Jd7ZLe{x3-9qv zcqaS|UIu@O$k;C{b4%?J*P6UU+fn|y?IIOSL-9TMf zlIO^CU_5Ds|Zl!Lf zuC1LWh-}Xf;0K9vM{H)%ZLKfU-!>T%!9u|rT}Fw3UC83wNtR3eB!eWsNsh}d$R5eo zUk^?G(t-ENutlbnd5(FJd4ahpMVL{@T(lKvduX~TdyT$f z0&ksYzu#Tcxm!;0yCquxuHDA?&_Q}py!1uY_pD3%%@!a0qwwCJPj@EY()y#Z>`+v| zpHFuuPtOdaH?)Jp=30KwIhfthu~XuEReW&#o||^Bn?$;cs_$Ee-%ej`f#~&wJB6=d~foKYD8b*?6gop^y#>a*A zC23RLbKVbXmrR3{eAYL>{vOwY~87g@YxQ0B76wcU?A{?jJhvpQ`78MwGTnXz_ zlkm0Wo-$+VeGb9@M5Ikr0oug+bU>sXO|s7~I`Fiq-KK(YUV&(vU)6okC0~5;jz6MF zcy7x_W!r*$))yM(8x^+l28mYrJhlhdfuekPvcA?BGOhEuiCq}=Cq9nLL#;oidLyi4;lCx8OU7!bMww6}=4wGF?EavS+fBvRkr9*;83q;Ck|0 z=4_?`(~x$~iOVvUuaR$)Zy--$L{JEe+cCJ87U4TnZqNDk-d{hOZfi(~_na^7>3u9O z^=FuGSZ_}b!>(%TP>*zAd&*uu5q6a~ z66xr*ry`l_QWjiEtgz4Cp>JS6nI5%OG}Z5s>Gs&^_6ssVv-i2Si%i@c*0G3Iak61V z)a$V7wBK31wdPa^W4+61HpWMFRL%xNj~Pa9Z0{}17a8kQggfM7 z*@&t|CZd^=6j_oiR+bHcoxQ$wo06G7nr){+AFC_+RWa3#dqmHEF8f^a`Q!-4!pFS# zBC%*fy3HF|cv|Q*J$8iqIroe81;cnJMEm?s2cF5?W;fl?J=oZ;4@rKU$!EM` zgp(rML4n*3BSL@;)ZeMIz)X1ef&fLy`4Bg1s%tW)uBL9KT2gJO7UXG+NKOPNoD)Sk zN0Au_n>tQY6}_doh%>cn?C#{gR@XDAChv=J_O94T_EYR zO}|K%YmU7ht92(=`%ouGBk1}0XZ?;QHLn|Uh8k64W86-Z0xZ0@ZULD)u8Z0O6{gYV z(x%X6)2#2;oIA5xFM->GjS5S7vw5@m_jq4!riE5CbnNoGWoj8a*WQqQK7)TYIvjyU zx^9}kmGpA45o~n#msH`rE^3e2r#X(_0Y&I3Vq3vy?)HV71TA<#U**HBg)7v z0(}dt3atvPirD#JG)K7x+EG_hw~!|>r!wa-Co?YEN7y4k&?#jLOAYplq@wP??dhel zCBh!q8=(r^-VEZWg>sLm>+UOoCjxHQ(R4CnKu!_6-=bCaE+`$eOItNghl1oehHaYKx806|2R1k3&gUG_kuu|wTb z^+au9Lu68Eea&@kiqKV04HOlT@=1lH0#XrUYL%}!OBp7`%#_GTlq&N1^-s57cE9WA z{e!s*&C7-Ab{P$w@GhA8xYgwO8KVJe3;~K%5$Y?tu&jE-z{xE4r~saxcT3;vx{bWd zNNDRRLbUv*=e*glN0yh@_DN;5RW$U76)CPoD_Lh^d?E85`CE z@b6@9u$z~B4#gpWN6>gYPb@O`3(wqHJJ~%Vwa82qh8WaynU=M41?SrfrMy%8E|G?0 zq-2X^&-GAxQ%4U7!7puCw>N{SGS%@zQ(d(&L6xb3;2Sf;9L3Ul(Q?T!iKS$=L=^;w zYW!m6BscCf;~wz0F?&_^@7e2P=h+9XuE}%pHr*OKjUAaBRiFRiu=$&ULFrT3S=SC% z8Q*46}(3@chO*;1rJacEMXE?<-8%9 zdp&g*)tveVd0zYUs*WCb{~LKGbCwd{zlW2GUO_R;TOH+UdXI|+{M98n;%V2SEV%+s zf%=~9T(**Z+ zrcLb>K}36j`sq?}DA?2ER7&ryd)OUf%ncnXH|C$f->KWF8>zd&FfS^}^EsjhF(=Yy z(5BMn&?eJn(IS2I)F5UNCm#t$ylG{N!8up7f@T&6V9t4u0TPvo&eHT#J#U4n8)oH% z<$g7JgN*HF7)D0rr{-Jo4~P!?oyhzh1tRbJd3XD5-3ACbOVf3I*QYFtu7R6YEZi&k zJ8&C$I%Oth8fC6B_5vnoB@523cu~A4zBMmgbWh|W0^bM$UEVD}SlvR#G&>esEG2H; zz*Zy{!9G*H%#JnF?}F*h*jZNL$>)+UDqZu>FI}VWa8;hF$_Cj=PWnYyrYsopuF0^k z=`J^P6s;t*|7U7rsD3FUu%Yy)ljLSm;Gvo_QwqELYojE*5G+VAuK7Ld-7*`P*&WTQkQ6uBF}w z!>e)+do(k0RC;=8_H7OtXTob{eyWbE=}j&gR#V3p$iXmpgGpRCo5ZOqX2uHC9iBIJ zKkQ+Z2QkThru{UlsN_h9LoEI&t-%*}U;$RwI(ucd=gOMA6UOT26{c7V|Oq{-b4z@}v}n@zdQ8fAP4a3Z&9P10pt1 zYmY_FEed%1{*c8UmSg7T6{1E-ly$Ye;#KPM=flRx$nJUT{oURLvdXmLn<`ZiRfae64&3>a-pT{6sw!)*X;gDinFrqnh7DFGqcn>T$iJ zFBlYr^Y4i8hx31~s>$2g8H5GDddx*gAouu-E-k%2H`pXk=4w_i+o*?&sjJVe)Y&C@aI&oO(ACxFQj`Ruxu`Er_yE%trJ0}K zs;3GB^7_b#>lEBH5 zPm{08i9^l<^XY|y`Kv_^ry(amh=i^2$fuB9z&2_e)X?YZT2ce{$&6#m1!!zj9Ss!1 znIP~lfs022u2*iLZmZEuH;45sEZ)@PvmkEkqC1buykK5fXtT#)XiCieYHy^4o)Jrt z&^2Cv@-wEtbhS<35}>V?UxExkEMzJmkz94ChX&Y$YjnHAO`=lKBGF@!XSy|Y6Ez#+ zw|y#<0(XNd-(9T#>uhO)-x2I1S|+)WXAvCM*WNYHZhPC|9i7O*AzLgcv3q=~=fkkx zU|=T4+qotWk{)nAn(Aa?$qGFpkVnkL2C8XE-jvR+zSb8ewTcDD5=3*sO9c#@$W=S; z?a`kiIFVFFaFy0kCJjfR4{|Os-RP$nh!aAbo*>Slfz3P+Xm2!5udb@%P2CUI~q} z0U9e-s^(8ut}A0O3#Ykpr|Q8u3=ZlImyDNG&GgiTYz`z$Pmtw{g^!}adzmJhEBf8< za;7cr;K}Mv$PboxT2-0_3ysm^?BjGwtP^*n9wFMG4AGx3{?YpKeQv!-=4NzunMr~q z10<^@QzShHF-uGG43EG$TepS0X|`3h!ng8Vqi=ALa4~-SPO&$6@<{9t8yJhSsi|j3 zVaR!x779LXpEC}vPP1dd(Ua&;zcZQJ)sA9LFa1^`B*H!H=m}IR5#3@Zi{k+n|6qZ^ z9WN6G5b+Nr#vQsIYTwYI=9d<7(l>M;2XMwFHTYs7RCNG^9K;w}9*V!qQFBaHF~xas z!m@@%#PUeOM$pchJh5@@4EGC0MK-)d(b05^+DU?oq>H2rKF9_p_%2u`;F{~97GkL? zT|QDoVk6Wb()o05z+t@SP8E)76T;191 z=+UV{e?7I>nd(?^!w+XWH~CITlQ$f*Apv7T-s6?g%&-8lv?qGDjXXnn@1&(`EKOw2K?B3-rb)~R zPnWpuqU{6*0Bqu)?kqapnZL2o&;kv58w$44(lxcV(3GpR;hnu(%0uU_0}@2jd=;h1 z4-YtGhy1!be`d$vA5!>^4~}RomG+}0(W7l^tDi1%-`SYXS!R}0kPSZ!^8e(wf2&#hi`M+ojpCZz z)_kiCsbyX?H{^3mGoPgG3;SiQPkwvv`z-W7Q#kjy@eMJ;TVl3&(5o#+j%K=cc~uIw zupo19FVjMg9lZw<5-Z=VWeuBtshq-+nr(UjSy)oDM&Bq%Izyhc+Atr@Go-HQwWJ|wSol) zAYrU6;GJ+LBzisI#D`I=;0c5BNpuF1idePG>$-A;VI#0t**QQGpc6LcG^4N1xZ#swe;H+ z)f0P_MA)amG*g7{rlP5HovrI2?QJix2uYpm()TuP`W+t3Tk2_S=X(bjk;&mAnTW^} zf~86_T5e+RFtj^}%)_@4Nkl{@$cFiy+M5uz730)nWuDNfKGYY>J4M#FdSLqpMzk>D73~QSA7+wW$_oXWn87<~P zE+HiUkv)+tM97v@nzAMUk2q29T9c#loMc5D9yI-=Tx!rVTt)N7m=8REI=U_^di%#**%=&ewyYyYr`JP(3t1W

    }jb9@F@@ln|3iwWfl5f z&N`H;oA`laFQdQpTe&YZ30_+O^F8m%tS|J3m8*KhX&rSMef^{oa~L@sm=KB*1xjsv zTYx?T`|+RHk7weCu8nYt2Yy24IM8f54Rq8aY!%9P*|Y`7P0<-~N4Xr?C^aj3+!*JH zzP7~}rfU}t>-t{cXN@ibKNob&Npuxf>jI15P_+a|t3SQ;N?SnoeNn_LyLgw%9(G*9 zsM@$s?~yap(a%S3@i%n!?JR(WhSO0Y*oKYlb-JSD0lGvetvf;G!V-;{UfuvhM^;QL z-=&K!uS+7ln6wFf@fq~RkyaHeK@vf3K6-<|M6bVJ^5ko~>FkH*6)7+xtsGV(7ybSk zvg~l_0TDhA?7hHN;2$D4=)eq1WMHqxpZ@&ZS3;?V#Yj|u6%*bdV|>B{coFOYV4c<9 z?Ez=et$}C`ljfv7cP zb5x_7gB*^2J5$(-sS1Q232-+76y}H?6}7&bAMl*gg20P8AX1KV%7lmOY3TE8Hppu zpg(48M5n~>=0|Q5vP_7EL0AfEZX>)wpQ2ET0FrcHgC^ZORSG092J&@mH_<;zm~Fj> zB@}aIUTDIv?~DajZqc+b$W^XqU@6IHhI7lO7qYva^5w6R2?@*QSls{lNKy}PRQ)OzPFmp;U&VG6h1m18G3u*@4vjIdk|sXfD)Ulu z*k^280QqWV>wDOP$6rvs}@A zY70O{KmdkPeZy?2bWlWAP!t<%ODneht|;|j_HvZef3fK-!KQ-%3?<1JWEz9MH*gke zHB|6JMR9DzC3`T8ne#EK4lDJbW&1GA4s*ouV%t7y87QUu@OmJ5CV^y} zNlj3qyvyz=6oZR@$nbZtUjl<*B*f5XgB}1T*hOJPnV=*`NIn}o)u3yOVuS>2J}?9< z9()OdfId}m`6aeMO&}hyK(KC@Eq`T%nO`fA2w>z9bVl&4cQJzUvH`P$FgY5aOJRt~ zRMIOW`1c46_@L2&ZPC_ln=0?0N^BVspxt=QY6JhVVvc`$Qf$C zL25xz+z%jH!URz{)KhRN!vx!|0JCA^#ilLEjs@MqO}%b>VuKG5EN5=BB)a2TmKM*Yw${!MC8_o+IID=J$S)S zz8o|GrnXf2BMJ02s55G9p-K~A`UAOT(K9hb5C5lDOweiN^`)f8Oz&F_E&WEh0+&ZV z7bwY;)VCZ4J^lt(2H1dD1ynn!l&ER66QXqQKUs>|29=fz-J1Y+SwJK)C5KH-tm$I_ z1FK9As%|l57K@n-APLVLt~?$YCbkAYjMWc@nFmtWy;z_CRaB~Ubhs{Nky_c4ypc&^ zkFVBER5BeiJib#)%t6-zmcR^*%C70!dxs)I3)jvP5xMc34Nt-H9Rmj77kzr=XGA7O z!BenxKt8g9OM!1KnPmg3WnPzl+5|GA%eJ}ss+BE7MW-b*0tPX4j{u>;=BE05d*M<< z9@oP!FmpSfXXB(G9fVNCrIb})A_yc6UW9;PUF zcEWoQiY}Hj2m+{Ec<-y^Hw&;ASEJt)0kwS(#z%hB1lwMTO2B3J57}Fg0(uLvSnw)P zn6LrC8wOzo*8(nqwFMyP22>=7JbVJ|M1RG_&=3aNzP2Oq1D-6`4TSq;Fb*d4f}YP0 z!835_s(XAO7+hqrra)aGK(q?IIWpd&8vfK$Nd5=ni1s|5YOx zg6W4j_R>#*Gt{O9V6l5Ix07DOl`$2zL| z#A8vzsi>Yq90ZlR9BvJ!!14dAby2165%>dgc4KN~mkSUALJZ8ulxdrN%%7kKaQh9W zl{Q_epL)Hl&fNqIbiwqzN(Ui36m<~D;Lid%y-lr@{u!>H zqRj7BXt1G`-Xepz=HLJ$YEi`n)$k{O;D2O?|6p1xe=se)|A8?B!C&lA;E2GzTIH73 zgPnvyuPdMq;r{8q#3zVng`%_?0eDsbz;*%@u|qIUhu>`Y6U-#2JOryF&!aN{d<0hB z)cwGHqYeaP11GReih{0>_mVKcwhJ6abdv(`gBha$&lIsjaG^H^!3k>PTmsd0rJGPS z2nQA)t{7wfv>Fs_u>~s&<6zqb)xIZD9a%B$v-L*MG8pDVKf;u7XqP^seUg!l3*j30 z6V;s_o-il4p^H(fri4wg8@iuR-xK@6B|GE_GeJ44l)wJESK$b&s}SK)8z=fA=<#k! z)iy98QULrwx20^d(71;aYFC&$)t9XIPDOocwX4t`>(nQvb~V@@0-Z2nPs8W%O6&^r zDllZ3*;*(FMd087r$nR5eu-jDI)GAyo5;1};c-uD_Y6oW*Nu+8w0h|qO2rsMxbmC_ zcpg#Mh_oSC3s4L!6Jp^7^ASUfh@46@cWEIypR*SjbPI`h;54(EFo3A6zYNL&2l+tc zx>YmkNT9C5NmNvP)Rhk=JZS6!2wFpgRfuFOm6(#y9t(xIs|cRz55%ea3qMd|_Pv$Z zhHy|^O5l$9FaHUNvFO1p%u=A*S}pnk*P}zhltEMw7D_E@p>h{gz+GwWJa%Ks0hSs? zILKZ{%v9a~WA8n{n#jKX@gNE+0=53s(V3hwD8oD~A^fViePJ=vCE9y8fMuCn2 zAn@-l@X2^E>?u85VA_4uNv3Sa&n`La4*7JP&$a^-;I~irAXuWn*VjnihXBFNv-zWz^tBS!56|=82%P`ADV*~g z$ptU`|4bksgiU%FQ~}jP_@A(}DGM4PGO#XJ+A-BFcm}5|==VoDVO;U!C_mn~l}iK` z^f&VOzh|>fsf3>?%x8{GJt|XT@mnDQItQ}@ba(Ld!J^`I{I`|>7G6j`;+%p<6o7Q# zK^5rtT|l;g^Lnoht1+Kh0<<4l0(t_K`GM4Nur^Mq%=@U8Vd~;POw8q_fYv z<%6tEk%BVsJSf57-}vNAon`2k`}cvfio{Mrahq)9v?SlRRQ(@}Le7YikyX8s+vjE5y(?j`Y!a(Drg3JAeRUgQERMrWo zIEI2beW!hNRB(03iY|0ff{mcZsWbmQ*XOQax&bv~%1LLRD;Omph)m7zzMQY84ea`A zFdr{}JtyC0Do51x34O@2$`_pdU)uds*Y{JJsd`Jn7=b@>R4}GQC;O?9n7dg2Xj&qF zyBsJuBY&vUnRgW2%Np$?xOrL+a89UHs85+!3K|i$6sqB*760HRs%-#+@-6lbU=*e1q!5X0Oao z#sv6$k!a};cdyXU^B@_`f`ytvb^}+i(20ZI?*>u1Iu~V<(36zP-eTunH zca4ikFyavMj|2g^^veDZbq`4~8b3twvK=8h(1`d;@PDKlK9d&%eOt zH6h$`F?uTXi1YcF)jr$63!B!ZBDbBB(yuV@|5bGM_x`Gy^^aO4VqDC6KwNV5uZVE5 zui}y$7T=@^g9&mE0PYV}(jab4!DB>}^as$H$8)Q3W>9jZP*@K&zU__3#N4g`k*Kyg zrUA ziJ1j!!QB73KY!xcu_X<9XTLf1XJ#wEce4L(`*#NZoq>O6;NKbecLx5Qfq!S<-x>IK z2L7Fae`nzT@ENG3iB-KxIsK}`i_{~}B>6wVm0Bru6$C!-@sTadAyzPI#F?b74|`BL zo6{!x7`w8~aJCDJ;}}rA!UKqyD?5(G)UivE8ass{8#64HZ+j>skRFcKW1ly;|BR}r zOCz_ZxZGHUETcNFV6L;^zTej|usuiWJ8X=Mu zxlMy4ul@KibjSDuKSD-PJbrculB`_VQImRi4z;^eXbUCs1tBP?WT!YyPpIQNk%G<&NHX zB*?%5ac-4hj#QGd`GM_tH`)Z}a@!l?&|>QlIt|B(rZce3`Md+@49H{S3v`bT&o)D1 z(y(ty6ZCv_6W04(oBF$6Y?+Y?_eRPtUDb|U%BcXp?HvXq>%@Cjw1yT$m3N|YWi_~I z`-qH62zA$HB=TSdSE92LxAMNBC9}G}u;L-EwCq1XVS%M!PWsMeSmP zq{;L_GRBz`(|HP_;rHuyXG0G%t}yoTM*|_n(N6G=XcmJ|VBcCs_exJLC1^Iqt7Z(a zFeo2KcQvgR2-C5Fm-oPj|Ezt4d-w9~v{vj^d_)g9K$Ze|IPY>-+eOJw@oO3_-CrFL zO?QE!NAfXCn;!b`O*ZQBPQGAwhn4Cwp;S7HYSQ~s*2Jxkot=W=&$qr*p643E-$)2y zhqK?;yVdYr4v7V(KcD1hr}z<5+Van@rno0z#tF?{)cu4Se#ni$OwC*O`0iDZYzm*l zNy1n>;KvO^nka18JXQXgC-@!{2y<;@IC9heNoa)$8}kZJaDkFU_foN>-EQNNd8QDw zi^NFb(n2VTq(PQ@aT0cYX`UvVqYdFXQ5A#oWAC_i49IsRi#YTQ%wYCv9`<(As5?0< zxs>VpM_CT9yG+!#L=U>w)nLD%5fX=64 zc+j0>=(Kg)I%p@;?PejLSjwEtsm`afTJP_5pOmDtHHm(aZ>ao7DCo4MbgAkFgSpBV z7Vh|8^{?67x#O3XNGOo>coy(_`M>)@o_BJH&cOjd|U*o{IKv^y}{QH zEgHj+Q0kM{??8qo`XQ3z3sUykDh3CWp~97xM7p2Ukr0%|b>5Y4PAID9Vc8l=ur2cA zr#Fcyk@{L*zXcf@>xaOt4|+NL=`>3GR__Jl_@;OPPBFr7^^65L{TxtbrUJ}DBB(M` z0eY_jRc0tQfU%tOTRllcZlP5B{oaG7{#C*x{&|X8y`+_hUURRSxo2S!bARgn8}EsSHun8*sL+a2U9HP1q8?J4uQV#Jk3)u5`61>|5toQU*Q$iq zazsqeRAVj0l_>K1b;!^hKZF()F+mhs3ibtxnoda6BA48&R<-Myxj0#+YFCF`GGhUb z(gCW>Pz-{j;E+pZEI@N2s4_#b3>@VVa>($K5>(NxHxN$~MYT7dW9%N{QAHsr)@B}MaweVVhQPW9@)P8yWl*8LH zILZ6?cSm>2pKV8Iqf^tKs4|3IgznV8i#X5FKe+(E%?Z)Hd~EFCh)$a&LMuZ|X;Pm! zJZrAd(!@=n3-vt~;JYMq=9qd`Aa+PzOWG7_h!9qET3#8K4tgD)^&n%BejTW-8{-2i z2%681i4KBWJ`x-Rv{hJ?lKwN_^9HoxAsvRJQq^ILXqZ^XTugL#+C-9+LeSmz-Qd2Y*F)y zCCwRvqAF^F$xjunU(($%0S7)j!6;H5vN{IJbIl(Hw#aL4w5+hSsR(?^J%Kt_5;%PB zZadXGA_e1(p^xe>An`h9bOKK{d@xg~rmVSqG{?q+CK=bp zf4UOIf1xN(Wse$iY3?>hK2C(JcAsa)ttG>CT%9M{zj%0Ap zJmn7Tk&!PLE4!WzM&aBt(vw0|!&nULU$^m61^c~J(nF^IBsT1W)F#V7TCQgP;SJYo z&6;dIXxuq_q;m7-m^NN`zQ!#%g{xVY)O|)rn|Q&tDu_PuOQ85^EUWAU9gD@{H2IO) zxEx&to;#h<30)ybZM61oeb&xSSIp-(MjutNEPK~si8$}@I2+G>(s#hU#Bt!BxN4vV z*V_uuMwb&+?OS^|j;_6B_H`BFvSqjj7z$2&G$A<9M3+XWPlu}AAv1gk=QeBoY0IL( zr!NU+Dv4On0~6*a=?8xAW_2ki>`#WDU-OS(B_#K=6jIs0$)I)DG*WH3Y zj})99SlPgDuz5DxpYja6#pXIQVLCmjnJ2a?eIk15g$wIaeiOWLVSmVj*z$g1eJ|BvDIzQW-L;_aWn%j}efU(qpIxQ1gRgwbQD5OKba?BV!#us* zQda%TNq5eLs>@F;Oz?`4IOkY{g1?>_23T$3lpDB9F zMwn>-IF@;{8LUOvT+x^)XA;?wJG4JI5Qw%L@Dh$b;UoBP2!&oP3x1+vvt5X7c?5{I z{_;I!OA43H zQpYuvXAr1Guq_8Nua6eX7i2$iQ2#)_qT9yi zWUlIJx3h6$KRmTSJp?N$?(6em(dIUC<*GKlfKzip&HdTS`eMoqz_E-1q3VvlyA0d| z)Fn0^sqwv~y6*-y%ktib(2mB*2g+JDeGN6|(`X;J;mC`NtCX~M8DoN^-YyuI}! zp?6$iWaa6hBU7kMm_z7-#;G?uNyoNiFK8TUr)oa%a>F+t(J(}|?M9r!;|?HdRE+M5 z7A2eQTNRo}F14JegxQW&T(4_7xuA5heq*uMI-zSxwo%@G%$rFbuVZp7T~-CxSZ51w zJwntvbV3Xlzs}10g={-wH~h^PclhCH^4{xvkSDX85z_f$sV%}5^?Q$qE0J%?W()Pp zqq4Gq1_UIk{&7fhd%!hUM`paOg$)-2ui&hkO-ce|dSJ-V!2MSMr~yY)tOya#sqR9BsU=7|A9} z=92h|>re)TuTkCFcE=?bEMGEy%C`nR2QH^8MXm}RAbwB@k57uu&AB$mzi)LX- zHBR(rEU~vJ1yd~VtTUq9iPs}9d;2VaWN31bx=)rF<^ee!m6-&kJMycJ)(1aJtD%p? zDcsy_&K1UCeO%N-k6p!fLR?6fe$w6hK#3zUrGvn8cSezUl_3+{D5wtz!{1vTx9^eKU|sG1^tPg8J3m<* za~M5uzFR32ci%lQ{c<}*4@vR6<4dfgZZ9wGhM2Efi{B(WBcA&H1C+;5J@lX~87gjz zCpsmvA(c@c@d2MnAB9j(#XJ`(X-nT(%4oLGH88mS2Njuy*!UqR*rW%W)k`Cp1rwTb z_pS6T*MV&BNH7iKhS&hdAYyXLp^G7u=pG%Y8d>K0hvU}PTpUEcZ&2OhKFp=-lnpxR zO7cee5X$8SWcErMGcfW@^!;t8 zA-rPnJABuJTyjk*@~y($_yUN9NwjQ{H95c;CA&o!4C;2|^HB3CDPE)8gfKQ213*IQ z;*c^sH#`qDSLj*=m=j{##_EwsG5iFzc^b zl?O)y)3Nz9sEYc)i}*+ak8!*@dPhdzu9k&C`W$ihK5-j@j$Gy2whYqCy$tX@XpNm# z9eh`UFftaCFH;-pUVrG|A#^&agKmbRRvGfqCAOT5(r*3-8kz6RzYN)^hw@2Lm>3z7 ztaZB~2b0l6bGJs>6Y8X&(^tPLt09rHMVhIE_g`a4_Aywq>e3#+
    QC_(Un$OI4V z>%)=!G#jXnledLWM13HWOx9bKVCh`_?x1>y*95?8~_v(C>leP zP@>A~KSGAy_Cu_qA{O62WA3+6TU@EPcUU?cbN=o!{Xp1wk#wj7mJGNGQ+7>1uv(gh z1ArMb+w=@kAB~ZMDE+Ae4Tf?<3!1zX!wlyM6)N`Md z{#+$gtfO2!)rD0;vO6t)3W@zWNu#t|tFjJugs?cy3 zW-0OvNftWde6$rs4#s!oXc1;?I+-Q$+D-lw>k9 zL_vqj)A1*A0^U_IX|DsQiY#5%pUQ{R%&(l2_jnZc@=$uTheMa&`;b~K^Do-EdBljUstX(!i|FBLo4xnl?`mZa~DBEO3OT3s-Tk{B!~}dT396p~b13 z@~$l+9L=){q-E{OcEJR;a!*`G-7i@?GE#5rJxJ_pfn?s=K5!atDkoMQEId5RRvq)S zi3T*Pl8R;etCnm^DG3OfAT9Sh`Q-hO|B{gR6YbBGgy?sEEb}>##{228KRXu5t_e_TH*&vkle3^vA6A zQth*rZ(CjU_QgPbI{CJ)rW^YBbM}bV`iuk*QkcApCb9CwlZa@h980Pf!+oEOVIqSc znk;eQZAM;-^5dD182J?e-OpGRzU!_`2FnKKHw`mv5?U$M8W{w%+rDyUPvU86saEg# z+@^;=G2}a-Nbu(G>-ePsQyHj_HCj!aU8t`C^5B9D)DwLZOD4Ye#)7Gwt{>kP`Bm25 zTuVgzUjM(}0`_^1@uUAK{p@{_pYqrq-qS4A`0)jbS=sPq+t*J0^qL^}Ee(O^Y&|vD zyCyNdlwm#ryZcB8<}|r*1Ybw>_!Y_?i6-D|pvz7S_pwvl_je$d$ zv7NsRUv=-k-Tpj;+A)XCJ*Gn)TZR3jW^W>7%g}{Po>5R$ZcnL3arDa>By8oovXRi^ z9wjFT`(1>VrfZ=d26Wfb;zAZRu_ATL-8$FctF0VgdH7DKEaTh0XH}xv$6S8><#kr_?-eI!LmhQOScoZ$QK3^_bL$y-#)u28K6~ZwUe8y(Smy!nR#n%47m5fBT9alz&@; za9AvLo1t#=q|7^oC4p3^hf)rWk~)Nqjj&aj6PyS|-l=DIMXOZPnEd=+A_wb>hU-*A zF6EzXxCMGrhd1`1p-%5|zxy|KFO9Fk2OQ=*jqRQTFo^{~ODGdsDmvSAIRUX-c-Gn| zH^hLD(6u_XP`H}#+Gsb#@FQrC-GXPGZL=EG#8TA^g&Y#*m@1?rJWXaTKR=;fikN7c zWgL9~aq*;(L*3fY--{7N_Dl8eTOb~8nV}fG^*ohMMp5b6r`zSovlQCfJ0RZtpV=hx9>Xn zz{V*j-;R&X;UHSJG9(2+tvUwQ5-~@$ zvmdi@MGlltP2TYE#UGT$#qoY|_ve^is`FR5#n(0U9Y5<@?j`BaO4N;cKYljHPqM!X ztwik=of?cPm2;f2bk&|ggx6-ud zY1+H=!$NDxjf%=@VPoK-elPJI(+ynqQ{b`#m3pQ^8dUkl*7PQC6Qs;$K-4_D6O*)rFe{x3()P$GVYpj$oPE*y|$u}9%aFGI};-@ z>FK8HI~|e7&B7a<-{DuFGwI9&djPUp)ideUy4;x?q9_B<(iKX7N$(P#$oLNMnsC;d zk|D7t(Jj0JbFdACk(lorUzE5C$Jimyr8y~ejXQc_3VPo=W}7C2oQ|q>3}1!oaCGHA zj0c$Pf_9B$cfFM^TO{T=j%{T+yBT-OxgM@r6ZNx#> zH=NYc*RK^5ix^6z&5x+lYumi1?zu-u=%OTH5UkS>6M?fcu6zC{X2HUR&BIFqK4j@Q z^i><3>_$hwC}JwapP=e)eNIF>+^A+YY(P`5STKkyipM~Ohmb=2#>MtD=+!)P@l?6U zH_{wl+-`(0qd~4+RpO=au(w*6;}|m~8$bszAli-7%Q9No=8Vu1AftM~cPTT4v?6xZ zZwH7-Y+pf zA2gsoKLCi-D3WnDfcz&S&eQc9-&^^qhZglQxw{eFr|aXH&WKZj9_u`M?#k$Qc-+pA z(Z_iVR?FxUM`t+!KtdsQ6{BGim@p+qgT@k#J%klrOT!&Zz=>2UTi8eMEmw{Yj()L7 zA-1&8XmCiPQ3O=j8}87qyHz*bp-y+JQHXJebM%V?im~EdE4*^T9Ud0ZVqO#(4Q4KA z412Z0YeSerFtfpXdF((j04U0s4RX247BtmG08h5lP#IMj=>EaF!A{VRP zy~1lO%pp>DYj2pt6YzuQ?nhy(V9NZR?V+nfE%|HALXKHN)M?-t(~jUfj9xgayZQLu z2N~Tgz6L$SeE^#uz%!*YuR=u(YLZSfmBf2qdb(lIq-*n9ivBa+OZNzw3w*GZ_rL3}oyHrt$ka zEEW9IY&q!~cnmdaqwJ1f*WI&9g7AmJ1Jg+_S)&22O)Whfem{2HkH6{wnhZ#UIP{2+ zgJmL=e@HYi{r#VOz$5G=-9<(BXhXs-XkXW%?up{ejKD?9iVadAsr%K{&K5!ZbQ2te zc4N!{B3F~3B3p=b1PSdw3iUIT@z_&@GLGhpt}-8kl3>byaJ9mYUJuV3M!z$ zgBbD^qj0{w>FC+V3&~h5x~sd|Z!HiipNUy|SA@g0dgw|&$I|3wvRElSdTCI-M`0J2 zOFOimF@hS8H^Wy(j)o8m`De>GY-(4D8}C)7BZXn&HTdGbsHt-xpR~WXTh?UXB$RS- zG@oh)zPD)4h|ZksCy(k=jxBT26ilA2^$%VMjfS zRqo0eRvf8UWLApo9)5(!YLupGRsfbDJ3uzzox@c|IS$2i8+yph5H@;x_eClh@JG{# zPPf?D_k8yV41oJ-?xAU9mvYitk7wJqSb~W50ra8 z&rhOfe>=kSd?HkSFae#uz74{(l=Id=Rkob^@&NwozB|qc7Vw1b&K-^9*We&pO&=~m z6sZFxndDt{MqCskLzK27)h*Y-ZD9r|jk^*$NjPy`7i#xHHw#X&o8*`=zuFlgTqD2# zKPHsO5UWmGUKLG-MQsV8q;#C*^x;!D*(?=4x*fsBAo<&!gWSE30N!Sj1|I9fi*S>5 z+|ZeDhQJO1(6m0*y&ikDB2$ws`X>$5(a%M>G69N?Sunu_UNjDF$&Eu^==9+V{s=`c zAd?93@}ZPA8Y%a(gaRXYP zEu*;WqD&b!7S`YUYzh!HRalmSLoXv|&-ofpeufkB7iD%oTUP};O z8|7+QC~7LxZPvE$nvq&)MADE#3!;dDc)0>dDh2(MJB1qv!VOVQh&gD)!$sn$PLjuW z0-{s$Jk=Gq=7}-(-jr_HYhL2`_{AS5AK%tpa?((3$udg(X5{Vo0w?^1?nO-(cA;0l zh;mxJp-0uu7c^~2o>vT-UItC&r<#sUH#JgQa${_&sl!y$RiJ4kXxgo6cN#QpOrGZj zn(hHjWn1S9YQ$0w~K{fp?)A2H(F`R)33#-~1KE%ffXPRd{3G+bkZ(lmLcgWZFQ+n>dAy)V*b z>r<0Gr|X1dwTNY3j#G>&vpmA<4~;VEEG^u2T-VXWCv$xc0p@|$>j(;vrXkjC+hOj3 zuc1p~scHH@)T2jlXQrDnJMm5t#T3&e9vb|}sK8Rs;4)!fpPM-%kWNv!B}TCnXt^)4 zO;DN}r2q?wj(e?-xEpm<$qQMj;$YaXpmkyH$=LY$jzjgJg;~k*kn*fo)F0<@? z5KY2dJPw^rw2P*7QqV@QN?lPu1Z&g}sVd7`v_eDO zaIq~~A;s#fuCLE8@9X4W+Cq6Vj3{=c+BKbVI`Fb}a{9#(;Q*aPMvt_n<40GOx=Fi~nZ zI6Ls4o8*;iB~s~2^OBVKy&;g;@vv?C zz&t^qz~4UM>9~d&Ll~x`9A*k(nBF&22*dQg8A2GQ!y9JQ8KEDW}PJKYqK@} z7WQLmfAIvezWo#-$K$B_=C%$G3Ft&Yz1F)3x2IYWrkd^B?GuG@U8fVR<-;r=>cP00 zWv2XJ-3J5|#Sme_+mi5UhT=Z^%;1TabMNc|{6y0)kh1L)K}Imt=3AR5akc$*iV?$f zl$w46pk@I;4By+*_JOCT26%)<`f}u2%#+2=faf?H{LVN6Brze4flv8cvdgc;OMZos zF&w>6e`93LF+H32DNICg(ldw2Cl`;6Y(7lzuL;Q9Sf?yBR>X7`_235vQaIn^EBYmfJVaQJcI&bVg?45&haN!hU%a>583oPT_!sNa@y^U&|cG zgqMtG_h-k@s*bJ~>Q@(N?!WF+FRH_`?d{q^)U!mFCJ8s~XuA!Qb*c$)XJlVsE z-v2y)p+!ZP^(7E$wO+{Ks?at0Sq-aZ8T%n;$Ltr0@tvC*vp}hDq5c+W{SM{OgIhxr z4G;qk2-X(K)Mo!Wm}TpEXyY{h2iR*472g8UUnnuKGV&9B1y)9WqOSmD0XC)c>J25h3*DgcMbKC?3utNDO9lT=(~kIN#sZ7d`P@I6+&gChqqrf`0a51I00`c| zA}-Mlef)<=3^0=A1cveMDZ}_}g?z`!ARv(j5!wkPg_R&wW{wz8CIA;rGnWd+Oh z@3pE*$msu8vFTEUiOP20m77A*GD7BC-}sawJXd%KsRbS%qHf|n0{X?32Y{D$3N%mM zycp(MGLba?ZXk1x>E>VOE9vf#eb_N; zt<0hDSq%|?%~?2Wtu*{KO7K+{{u=q$9Mk97@q1hJR=?;^ztY`U4gz*=YA@Bd10!3g zUk88n5PYRys@UX=^jRDF5(}}nhc6IV|9Nu*xIVgfFw!hQ3;! z>`@@)P(YQlr+Wfcz;ZnqqfKp@o^_TmT@eIeH3~?Ff@mu?PIu21SE-;fR))4TbmWV8=qu3xQ(+Gy#r<0EOeQV?ka0w82&m*rT)3 z#ol(XMLt%Xp;LQPqA|4v%4?2h`t&I z2gQ%y)C8Sg(?-i@3oW3X1l@W%=G)U-ox%2XLj8-~eOpJ(r)%9o{#|P?sHGliM;yzX z6GIvE$r0K7A_1I-XHs<(Q172W+i=|}F8N7?A3O7AXn!2TQ=Cn`GvY~A_W`lgw(bT0 zQ4bwyih2_)M~>i>g=riOZ-RG;r?z`hngUCw+Z+4+-98?75`-OihZL_^q+eFt`LT-z zAH9{&B?YGtL^demPMxIpr;We=Jz~?5AmbZ~f(aSGg*#k?PeY`+r6!lI++FbOJ>R!8 z&k_O1VjulU_)q?%4;)O9&*NH!<(m@;tz+Jpgg|Wh1gWzsWbF~o5*`}F7V%f0bWC;e zl*>o+6R}GCzC9f1yyRiw2E)O>+kON+z>!42YYzv5{?J_y_9dy> z-K+dAIbhm zChZO@@wtrL%K9lXRVgSo_STTTQJ5UU!WxGHz=$cjxfK~x32ET)z*IPjC+IkdGy+t-D(Zqzh+H-=oe!D zqDF0q&>f_1U#iOXmRVr=^$x7tF1(iRW{>a*Rz}^^$R2M9iz9`?vtNCt!~(ZnyK61MoLyL z_JsQeLe~sEjgOW@&qF4VP6PQqNI-Bzng_YQ2?j;KKLSg)2DZ05CdXV&vhgvO@yCM3 zo}jvZzslWGCOp8JwZ`}zm}pB_eL9(RH|6=7_7Goo#n==ovrpCqMq_gOlUIRiTH@4YS4Y3ad?w+8AU{n15{59E6`dALqMaFS5ID~lODvh>@d9$xS zAWr$MTB3(OAAB2n97sH|Ek%9>t!mF-v`tMYO_d0;HCw!KQ!Uy0R^6L-ReM@Z>qi2l zl8dqWSihZ%NJzui==zJubdhbp3UGzjwH8~oMgN8gwtlo1y(NGV3Gavj2NbELZAym( zeUP_oTaB+=puVy%Ag&M$3o*p168rx)14-TC&g+BTB-N1ZolT@;ldnlsc$PjG@Ls8b z(B@6NV=%^8jdA;gFG}=9=XhvL6Mn`4rANZIuI#OxVP?We>K2@kqpKzkgf!+JOHprz z<19bk$zO7PPAw?iZzD5x7iaQlju~m=`RD7l5%la=;WFKAM%K1HqjyVm-I3q;2zV35 z(m<5vMi41Ub8f|(*k``y1UmuhPjo1_@iwM08uu84TIUj@<^MW0uN1>~74a>N)6VfI zlOItE2r(_lrfhJGEUq>cg>+=*ma)2Z5UzgQ?saYVQWAKzjQWMa+n;O;XfS0;K3cQA z)qIUmPaUnvzztA+QhoL=s}Wq+55z~zD7z5q^6~4fAlUwJ-jQ0Gs9asi;^{=|(^vrv z8hr-l&58orB|z{z%N;YXBVHyar6YoD>vu~!00zr-glBT6^Qutp9G+p`H&|zHZ$x(+vj|BF*SMdo__n&Kob!4GhdEamGt&jbD_Gr+x)F^D8l z4bEH3hs<6;XPur0-zl@koZp8;z|1kr9UzDn=5pPYZ~9oe>NIT5ofBZ(Lmz7G)I(n`Pbxemc?|@_1|o(} z=Zh(6qZG9EWvJQ*)IG8}mm>(9CHn~NB-gK~ANGE<1+c?*)dN7?bRt;m<#18o^+UsF z_m=mS!bwU%WUT`n@HGaG*&Vats+U6Si3z}`+WQCueEk;HBAvj1Q(D~k&2|Vxfdy{B zZ)?~S(*)q~qHeJOcro|4J9{3?WFkz3`x-8Tr*jJcybb>r01E=&-NJ5%S|2cSr>fLI9U zdN9P>7djeM0}rP-C)n+Bs{+gvJek?CEfQWgR=f*|STJ=<31Ym*ik^ZXZ%va3Hu-MU z4zPw%LapUjXaB<`NIq(wRfIv0#6MYsK(3DQ55-3^fyb3|mJ+SZJ{RUO5tAlCXC==z zu}_-qB#YOSgyXzJJGNLLuw)W){0-VDn<@_?M4mGx?&%@o?+MGz+kD&WVE4{ncA*o4|uVCxNY|GUFjT8 zpr5m>Jpou8n?0#tQCUf##Ab`eoR)}Utnu6!8UZ305tYRtSaIm=tGP*q6Zt2$218{V z+w8zYRW}*IlK?cXI>2qLp3N%M={qhUO-f9R{1I+RIXmr9@)2sAb*2g-6An`y`HLM? z!H8Cbjx{8=sI5dwZVII;FW-erGpv#$9(O#iANUK9_wP8fr(Hb2ZJ_kp6cSN-eKRyY zxNq@0uIP%dbcI^lFRvb!WtcX=zA2^A7Onr1E%i-h@3buRK5zPCMegObJ!eL*J)_p& z;~&Y=jeUuN1s9f~q`paLY7Fl<3p6-l8y~?rcDi4J$tT$Y<3-`QDuYAYjd(b|MZ52~ zK@osC54iO;A6E~J0E^1d*{P*P)6mhCq48n>tUq;?shh#CiB0e%6eG&JFOIm6e7*SqUK?_RPo7$+VBGG(!{Mc%EkTC-s6>|RK9I3;Z^szorBus|cXk$d3HpLS>l`%nEiAguUnd+6ZHcgpBaq06O{BZ{chr(=zb* z4l6}NtMa45L#WkzlTqDTpNI4JpeIOeAvlg7uLnv%rI?D6A>y-DO}4e-%w8*TB}F%a zw%_YH%~&3^k?!n1$-J$*CM7ckde3ILpzP1U0d#Sj%qs%lU)qIIxMRItSC&6?c6*pZ z6MH(QKV3I6B*jcr7AxATkqU8=uo6T;`o=6>(}O| zqXa%8&$%6noC@=&NI4htq3JY^kRXkt`n!HHUXNm_4o1Anul|O(@}gfp#`&uk_|~W! zxllP*p$ZZ!Zs(Go-3_R`rvV^y319<8Kf>;lABwD71JVs79K*!%gPK((h9J=|*HKe&$QQrf!Vl2p4lTu%=w1!-jbtumL?NT#&;^YjRG3+h~+Ysl;3>hiM-aUJ+BEKk7IRP0uz8tYQq@;L$ZGCW&R_tQ)|=~LYQdtgC2 z_druY|8nI}e~Md@sW68O{ZfvzeHb{<)betd3EoyVcKAG|uFg%DncKUFso!vchll#w zE)kt}bG0?y_g43i#^4N* zAy=zzJwRYUa!3gCiYl}AuFG+Vk#DkbC~{LP;ByiK=Tr785)R` z5{csgY^j;@!n`P-^1@8Ja0*_3u(&+4wr+}KGdm426Oa{8<+eICH}N2=ngemn?(gF# zz3I4RLAW|zM)^~ze4v5{Z#67E2w#X83!~!IvLd6?)rIBy+au#0xEC zr?#UOUB8AvnUx6=E^_Ar_Qk_g)ZjYZ7#f`5g$O5jSsG*;l^oOp{Hy?mr&;FCHdSa; zwRCJ;s_zr7TF$gz6>h7xh-IKPga0fG+t94d{};KN9rNBIvasd9my?EdzRt{MWf(2Mt z&XMMrilq*1v&j`P^;A&yGmY};-;ulTfDce2A_3Pk(2x+5BcoqOn*~mU4hx+a8Z31B zjV2=og)C~0o#93u1H=%4;-R%q3ETq6eHMrBWgbIa$-YYe7~~)Va#;A@!GD0bn4Nz6 zs$A?RU&ZgBvu>!T^dbj>lGQGo$IhO+tw;$kg)sD%NVWFH&5?xJ1OV{x*8@HxBHqJE zFrWGB5G1iw_Rc7BOb%P;5_y*moO#DF6jso95x}*`eT=E9^?TI|p5g@BhAi`WN~BG)Hzdi6a<0DlK}N*ca)Sqhyk$&w}k1CLau6AVL*s_Xo-Db z4&dtSZ2(~;0{!TfQ&M4XoD%f|%U^HIIA!BfpeTUMJ!EBzw4gMwfS;;21<@euj=-ObzK%GCLW04;w+&!W2jz&ows_}}!F-+8 zBfJZaehGDa1pE=iIf{E=Wwly#_w?-o`vA?($i-gw1e?=EEhFa{p^Y`p*w{sYk;5l= zcmh7;HSoaj27lQETrXOE@D13YE!h~mt5)R&IHA{qI}&WzvFJ>GT>3Fa1;Sa^)G0&c zgOfz#{PP_M#V24!k$uRO>%AS}DV8jeSrRo7lhd4}$ZpLtXACEn&02nSf3}`&Z;)Q@ zui<`1#R~5}lsL=Ong?4x74lz|eY#w%_KbMMU`G3Ep$I?`*%(?3tz|k=P>ejOz+tTguH3YWT zyU)PUqxj{!05JENK}V4@NR+2L^McjJI2s^@*YKQ$SW>ub)?E~3+N3L)!hy!r+IS%O zmm|SZZYADtDM-f)tMppoiegYi; zURXwz@6^IjL4L1lfHIM2RQX2LiE8@`d5J^kh!xA5(emTWB_M!IOnxuScm zH-7st&rApZYVUts@-Ok9p3ro^q*u`&?S7vd;yPk`cjyZyr#986m;2PZPvJvuipls0 zyYg}t##Dt<{aF~`DCX3=z&D)HATzOo?DJjOum3U(a;xiJ37AcWPd423C@tam!Y-eQJY_vvo+de*jLOoJ9eCgLIPD_H!T(5a>@a%H09)%D#Af zzGqRZ&9_{Uf4#F2b-nYsZ2B|3tp`dhh`+jDFYko-5&;?9RBlYw9^yvo73MQefA|1z zLWs49b%2!`WE`?L--uW$>we{%10q zcP572$^TD#cOTQl9mjFJ1qIzu)I<>*yTqqdrqueiTd?93|7=6>b}e5V@;k?Z#l>RlrCY>fyNFhEWT>Gq_HFEiP5pv zV4QE5qf1h(sDEnsvetMr`=N|k*z?lR-JuhZ8t$mUl(61n?vF#fkJmh`H`nIf%vMr{ zafg2$+|Q@`2U_1USET#v?#(8Gsq^~FNufgvhQf!I4y_qlJhXBsG<^nJp5v8Ut;$oj z5;=ZB1q{{^iHfactJ$hLqL{EH=a6M&h~N%UmeP|TNN@7o(sn!|Bbbb!EU3kT0$Cu( z%_}!gS*{EwHj%pp`(@!AXRU+QUF)oUHS$tr8h1$+X`;o$*Y?I1%OX27`AT2Hk}yrl zAY=XZbL;CYc5uHcG+2TBI1~-SL^ip|gq)JzU zO*9tuCpQReZoRg&S6`e>xOIf@a;aQTgv-pZ~V6FG58R1{-x{@MK#?teALYP5W~UC@S*_x2r*blBDJys^z8p5{Ga zg2lv*yXq`U{1c0sLxlS@?Y@_&x%{d}rG^n%PC?jE00mG01yBG5Pyhu`00mG01yBG5 zPyhu`;I9R48+ly~6;p(EQ$}f8f6_g^UidM%hGJRG6Di9iC6`GhLmWdXIbBGjq(XaUK?yD~j8y7FsO?PrC5eJ#Q`)kA} z#JvdjA}D|YD1ZVefC4Ch0w{n2D1ZVefC4Ch0w{n2D1ZVefC4Ch0w{n2D1ZVefC4Ch z0w{n2D1ZVefC4Ch0w{n2D1ZVefC4Ch0w{n2D1ZVefC4Ch0w{n2D1ZVefC4Ch0w{n2 zD1ZVefC4Ch0w{n2D1ZVefC4Ch0w{n2D1ZVefC4CZClts(jA|?(yEDpLOKf|-m?)7y zXl~!oQgf|)W=&dT#8z?Sy>_4ehDL=V&dy==sioXz}sT*d*HazV5L) O>ysPBW#8OhNdFz%v)ItW#FC*nJ~_WMuLPtC z6)-cX8d?;iic!i7C@L*3&P>cxa0b~~soZLfax(MM|>mV4!jV02;b2 Ap8x;= literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata1_encoding_118.dta b/pandas/tests/io/data/stata/stata1_encoding_118.dta new file mode 100644 index 0000000000000000000000000000000000000000..475f172feff8757337f408f75454add10ed276c9 GIT binary patch literal 5587 zcmeHLQEOa96h5ngwyCnHEmWT_QlCn4vs=NW45Kz{tfa=8RR|QiOm-*Dr8m3Fo!O9m zN=pjOldk~-eQcpGHV=h9_Dvc@MSp-w#0o7`iltck&~xU_*I=j~y+Fu&WgzyRx8~xIx-3 zdwesO4ur>_Jl^&A-b<HM$36xR=?c+KOt9)I>ILON9}sw7ggv98#5Qycj@ zUZYP$CUu2ZlIIc~nZoG1V_+)PMyqbq6Q19%^DXs7m_}mb$XG|52s=|T*GLwWHS(Rp zsCg#WM95p<)#H|~xaV3)&wZvH|IbClLBCVor|4WY@>J3?CQ|iaoE}p)hHh09TIN_txDUs*!C+H5lPG za|B1wfEXgEgMt9QgkdOS51@oH0~Qj*Dq;ZRXxXAOY_b^BMLxlcP|qFTfa_Z~@^_Fd zma!gCPBLoukFNOIca0pd5OHuJyI?Xf-fcMu^mLagjT`Hr(< zdsz7xy`;{ptRpKdqZ`qfSaFC$V&zfKyq^`eBixpiwpIyNQ_so3Zs4TyMLD8K0laew zeW%WxOd}^OBTi^coH)cGaq@T1yq^=cBixpgLYI(3yM&OMjxm4n!ZYYNbtdHD=bIfF z5kfj5#32rekiEZm=lz7R9pScw94QQQ<~vN&zY{gPqP}3%G*w-@sNq_|Bwa<4SVknd zdVwT4^d!BQ^#xh6_+{i?KU4>(Gb^W&6_$1C3$jBpYX7M(sG|_!s>^nS96A)D|Ejt@ z^#zMe-@B=XpS!a3#26jd8p#|?90rZ=`Am|rkwrq51z8qkS&(HxmIYZBWLdCNSs+69 Ykt&Q$V*EEH^w{(77D@Z2-T!f)0%MyuzyJUM literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata2_113.dta b/pandas/tests/io/data/stata/stata2_113.dta new file mode 100644 index 0000000000000000000000000000000000000000..09c90dca943d1cdf84bb15b884958ed941705b58 GIT binary patch literal 1490 zcmXS9Vr1Z8U}k`T316L{EFh&|sNkDeq+n!VX!4K|A}j=z-FrN=9K=j;Hgp0>9q{gU z0MPMPq+h|%*viO+;s5_XfByXb{U;@{B()?nH#I&PtQwPmD@e*r$EpaV z08<;1NO@{%c1~qHZgsi&c_kV6R1}sb7L}wH;Z~B7n3EP?nVN`G2|hU_ry?^|4Ndh7 z(jftg!cJj8h?kTzz}1xGBC!jR*cnLdN;n(Yt`W`dh~o$QzhU94H|?cPS?-!#5lmJo zx7b0VGeolwg6L(F%?^rvU;g*V|99V*KmtJUpdLm)tp~x>)TdyPQ3FTAkc?ylNlY3y M-vSdM162|u07r08-T(jq literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata2_114.dta b/pandas/tests/io/data/stata/stata2_114.dta new file mode 100644 index 0000000000000000000000000000000000000000..c60cf480ad5dd82db28475872f08a5280e7a80ed GIT binary patch literal 1786 zcmXS7Vr1Z8U}k`TuZEwM#2Zf-J2W&hG6IDfv<-ps3=9Vt6HsUdi{;E9@ed#O$N#Tq zVAxmxX)=h0=`ZV<+WFtmSiv{3NWsVe$S^dwGB#oO|NqaQKfizfNl7e8Ey>JHjZcPH zi$TH_BxR;!RRmIip$l1{JT)~tr!pS5y4?J{k_>z*3QH4Yn(E6GUANsF&cO~k1L zpB%DVk=Uvw$qWYRNCJeIswK{(YDi%q#R3M^l5$ctvBq-v-nO#`7|AC(^s zfzc44dkARUeA`~?#L%$t)f*t4<*vyU!DN+kiyh>U8KT(-LG-f8W(UQ-FaLYw|GRHY VAORqF0IZuJ^wW9}Oig`S4*($EbN>JU literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata2_115.dta b/pandas/tests/io/data/stata/stata2_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..ad7dda3fdc4b38fe4a34a2615414ec13bae8389e GIT binary patch literal 1786 zcmXSBVr1Z8U}iuDMnDb|5QAt4a5jd}6N*2xfLI_|WB}5SP|d)g29h+gP;g64QZOE@qDw7+1VLGCaI);Bq@QI(TE8G zf^kMOMm+BZ;6i~Qm#il=;qJXTz?BLeGK~Ot2rQ7}2q|%a0H*=tM9EeFnWRijmG;RQ zuidyc=r*plylV}wJ=4AkxUvE+R*}*^QKOmXxx=%^tZ>I+FlF_}4lkW>LGXp#f*U%=;`O!_jc`!@RqrU4?+h2-;zpstuBeN~SmChkq}m z)Jj5-boV7`Ldo7wSYvF7=l*0~+FO?KL5!6F6_Sa`l$i2GKWB6zjQuN@9CM%+UJ-LA3mu6 zq+-;+Tjd>^XfM?7`7eBD5AVVKIi}&=`~Uh2pFVkk_Tj(#PuSwg3l>fPhtDjSK6%cJ zX`z3IXBhPZQ|HfH__v=>KQMFNlD~al`-U0wmn{0*PpIE8Ep+DJzRyhkr#DsDT+hD; zYws+YGIP#?$&04{?Z+0+n?Cm+vcNE*^Z)T}hM74}n)A0`ufFkTqW(S&{|IK7?t}Vx z%=qgw|I2Id|N8g;2VVal!ua>?|GzTu=Co0DcupmGwjf?OZS_b{ZagA3Rrk$Yb`#l*fm3_Z0qTT>Dsk`9XIo9lb_g%>71Km-bYU>Yosfdk9EV=HD;;H;4byMr)RT zJLP{D2Q@AGSY5(&7pnYUlN=$pBjq`647fMl`VvA5rl0&@0s3Da0%+mcZ!My#7J89q0)eAviZ<4pU&#~zVoC^;Ll{%jlDv+<9&e%eL-qR)tcp` zCXmb!UXs%I3%dS-O;t<2c2uG%oz?sF@T5wITV!#|?E=Tu2;AwHM3QRF9A_(ZigMK; zB?tP$v1Yy;*A-R_6GJ_z?0u6k9@~U9ho0o)P>daD&t>9zRMsv({otL1Lx$zMW znFZ;rvD&3Cm7;Hw3%6Gb8wb`2T*XT=I=9X&J2piuEf9VwA3kh=;D1{-As*9yR_EQ{ zgGx(ok}57r2z>llVC-L$+sjpmo#2q!29WFgi$4ayN?AdspC`Yc)jO=>s9bu7eAOvl zXmELox84z#peJz5PP2lIm%o=aP^j;fZ*mFx7*-59(g4ccy-Vzzz7ZN8)(gte4;VO4 zV8tGFanP)Ej~;}t3NQu5p8~|_?=+Zl3m*`2;#(oh@=jnn>uK$%r^r}yL6UkPW9Rr6 zKHeHcl4?vDtgd>Y4^?V^BUxScpbH zpOCH*>B8>s8xdxc74@-sj!F0@L?ukt=pm>P3y!(MF{8fJOu4Sl@$I{X;I~#p-rtCJ zih6EV0vwR!ftr;uhM{nJxCxf{1FUY^C_<%kFUas42ZXHSdcvG%sZEt?rIjEjc_U+a ze8je)fU~CBQEdUy_bd^?u~#JQ^GxAkw2sKl8^g*=SZRY;%*fbLWXuN?_26$W-{yOO zH3UWtr*imPV%IcB=$U6tq@6aDVd}x;wnFWw@tR|6KY2Ylm8dXjXE4OZ?BZc%uZdyo0Wk)y z980+hD;n|Ql#rOl5J_x9!+hQ%mdnjNV0GP$ku#wm01r3Rr6*J9mL&A7I4={>P#&Yx#y4K_id@dR~ggx6j9Ox93-C6qGP9O%FakA! zfa=Hbhgn_KqeWC1)0*C6YJ@b`&P0mt4J$@i>4Q#Or&-DGFGBj-A;~qS!>s<^>qS(a z+?I~)cSDG(b|OlxE38;qWXFCWOqd1{kBWfLfWY?gJbSCm< zcZA=_A}4u)@EbMA&X4`TvFW^z%{W*wMqOG)x%Ta7@vQs8TRS**74t^2M#faROV*Nn z{h}YN?7*-X$EV%-4or%)l3mj`|VHgY4GEIZ{3?GJ6Y(~a0DF7C>Hir3EXW3h8}1VS)`n_;wa9 zTLi~WAeISy{HbA(j!lrh1E%j-ostqsrHkF^eOHEz9odmcM==Sw=79Kv@TWA$K$qdj zUL{1yq3}CapJ|Aq%8eehF5tc3^Fw2E^EAa1@nwF0{CJy&&hcAyTFf zGS&u!8HthfLW7LI{t-A<#%KENG9AGVV}z9^;MI~kp)%J2qT~y;YzN0iBIIj;MDyuV zfN=Wdd2PNNVfBF)Kq`9E-Cj?Gh4%J@d2NQ3_DFIdNK%PdlK8-;BVZ*GR-E}Gtf9q% zXey6#qt*e>g^f>|5-EWI;)Gbn!2dI_lExn|9RVHK!{>I4f@4NUE5vf$mA>Zg2`Rms z68U!lkWQ%nIDG6ISjpo9mX3rz?@~Edkj?6xv({rI^`%u2Iude-CCU{)(6Tce3xY(< z1jq9ERQr!X9tNzgFlDp)-AiJqRNz6|ue2hyc~*c72c!!e3qkma8f4)9k3pWDsEY+w z46*SWsMMn$P32iK-o=V=Cq9B>U6JIekcb3utcV{yJ`mv##p_$JVsv`80aMJ2hFKX% zy|oVH9|4YaLsv?u{!$H6{97Ozd9d1UrW{t+bKXYEtRG0f7-ma`$LWaF=VR2R2l{0; zXgO62zkJFlP5NxAx&orVbbb?+8xNtGHywz$6Z6G+6bRE3(l-yhT8bne$IPNH0_NP{PKA?42=3(~o%DCGb+wi@hZF#nTyIeZMr<7dw8 zxDi&2(s@{^r_^&(FCwF%+5dG@%Bq~27bX_ASlHU5IAUB^3E7T&T zMWZVlH1enX^eHSlnf-6SEj-E^TmllQoJXnN)q&_gsz)r7QT+gm)MGsqMvwWQ%`aX! z7IBTo`fw0djJZd4Q00n+CTwj**0n}mib7yzl!be~9<}%dj#cnu>&Bu(oy|TrenrPv zUC>Y4(f5RQ{ia^%JL07v{W1ww#(rLjo2>KYH7y zPIx%>xuA@h1;}`de0?K6b_B85ncKV>hwz_3`d$&n{ZbIiXj+-lmN=4E0(Tdy12YY!+=h>(BmC!lz4b(p|AKn3f)&G7`&}3oW9ayY zZHeucdVy;ahFE5xUv{ASyEH35r%u%3s_&Hyi1Gc9-LNu_hQDznoxEP5lncP*StzB9 zjBNqO-tZOfpFsx>!^-T!JXYt}F%^SvB0U)AOnRE%2+ZpRa4Zy7cB1#UB4bba_J==1 zzu1DwQAK&I-gfdnD$o3kHfr35)Q|r|Q1TZeV+&Er6b!pvh~+Nd*GHAX{g`Hp3-VY) z@T3eXul}6Aj~qzCl=p%%Cmb1Df+X*OW5H5 z_yOUU@<;wqAgHP4u5IPA?wli1lVE{3f6n~F?nMoaM z1Bh)u119oXSc$<7lMRm9qpkA!u%9O($wo7j-*kf2B{n-sIm2u$@Bt)dvVm|PB4K4C z>XM5VYpp??Lqjl<%;^2zMJI3;8*-E~w`NhFSpg(okNILB2`gK`v3v|Z2OQ&$@(y;B zQKL@*Sp$e6bHgzz^_fe*n(t4pUo;ZwM{FBXf<=-FK*gS#6+2}zWY8C}WEGxZjrC1( zapIdxhvyJte;$xKQLvH>9V^%7=V) z-c+=k6%?kbAfGinjm)9Us#$bh$8IERiw%)aM#1WSXmT00qwWa5o-dv?4gJzt)o0&) z)|gp+j4~;6sl&zoWVCe?qO4jENG3|Dz_B@qrIx?>WE!eJSk*E9Bw{&q0#j@rtvD-^ zjrW@p#RGc)cLWMkiGG=+L0o4{NBE7A@yv_d2_i z;!`b&axfN-6r+%6$SMdeNI?j%z~{vJn|OV`2CQsJIT0j^=xp zzd%_#KnHT*m{GqBjukASQ777yw2JnGJG~LHoCcHYK>qjWeOL4R(KF%LsKVM&w&p@s z=Tel1lSMeS@pC3Ok2w+f_SfkBvw+lVS;7Wxep)gUj?LoLjl7W6zopsmg;S zJ7DLpjR&LzwEP21ZUT}xn-fRQfUa z`F$JS7(FO+tvFqK=j28 zWt5u~O)I?Skt1F`2=`8gmCF{{n@2HB zzqH2%-mg&pTl|+ej|)9)&q#-n+nISBmB-J>QT7{v2PoS zSmKH}uCMtNVi|XX%28XW*NQnLq;d#hI;6p|Y74iuCm_X|W9ez3py&{k(jQg~^3q?Z z+-f^@ahOj0f430nZ5piHu*iAtfb7?-bQi_aTSppgd)7bgX-=a#(EZgANGC=eWZg;74Y6q^>=8(-N1;Y5JftGhcOK;TWdu_Z{C=1Xp%g`_P z05L4>b`N(-Ni@nZmn?oRK>7D0{2Gg5>jTHmY2(#>!9vW+Xei8kK#cD852=hTHh0BJ za`$JEDCq|Pd7wdRN9Dr*e7@_ug(&6c*!f%VXK+R)zfo>#G7WbOBTJW@a?hant)PeOSMQn-9XINw1 z;om8@CxsSNMG%)W$k?c{St$9bRK_})m6NBPNl)SC|9_K z#1xwdHyeYHtH(lO!Bw;k6ef+|n!5z#FX3^01Bl-F{7cF`-$fkqDsmhnl7y*vtCeKcjO?Jw-|f`8GDUV?&o_pU#1=3 zuv-FR+)7_ld2TA*S@I<@*2BtRY#Yo6Y~!d=*$dR=AfJ=I41~FYeQdSq9IK1{`7M=> z@1+rBB}sVXN0da!dJTi83d4xHimFz|jtoa#Vi8Lae~#6c-grkPF^$@cTS@Nq@+aIS zY#Xcef)qU*vD9mj3(m`-W8Z4zKZj=))$b_tAdSu%6HY2a;LKBX+prS2=K&-r{x_}q ziFOeP-xf!n) z`qJTGavn-)EifVDNT%O+h~+x(wc;xbi;p!Xm#~JBZivNtAFU3bMqF1S7Te>1G{&Li zGxF!n)eSyq{t8Vl)H~P`oRNDqV6>*uM&5IYXQG900v4128;sVe;8-LayUUjjSP2~q z!0Uyk^Q^9}_B~~;?5EG3%^(v~0clf!j5R^VW~2JcP|8|7vsj6pc{mn$FF^F}<%}p- zX3*flOUc<#3t{34Q5Rc;9|{={!8bqW=Py`=-uDN`a(&OUh6UaAqLO=%rk)Qc_tFVb zB8!mZrm(Ucz3;0*yy{lL(Fq{T?Sk{H(LT~3DpxY8<=QIZc!oeOPJ@yM6vGAa&{9_Sd$B>3V-C|XH`kJkJapx>B0!qK${Nhd!J5Fn&sh!W>jzP?@-1cc zCzFh#GUz)x>p>)OvKdOGDW?&Oy})c*hq{=xjHPy7gHpcMp7NBkhWPI*BlR3Vi7*CmGb9pYtYDM48E=>OIc&fW~?Zm%%SN&#*%f7h7cv|9O}{n z=a`Ln@Grnh1;6{{8pQI_gw8bm!0HZ7vKE!1qtvE)GnqfSKT&=?i|V(;g~ZqB{a_7p zHgGM{(E)^61BiZrkB!K5$fZG_$s{>|!*W%MQaT9C<#_Z8}uD7LYntSH;0zoyR^DzU zDo@W*ckw&2^ROF{T3<&gI|+=(I`qqbGzgv{VgENnU>i&d`o+49DF1$*9-sL=i3sXV zI2JoiO(#4ZjRYxlidk(@CK}O79=qW za*+aOQuzEue?rs^R$&cU{o0F+heCJHJwtLXyWyK}qAt#GEDEJmA3!AYc8}G|qCT4R zDLB7O?I0@aFVXx>CFK5AEQ$elkg+a;5)h4rM0G5IcMgmJWG5cjE#oh;y6m5wL^-LP z&TUgpoci@7a@c)jtc$>{jn=HF_ZJV-VxVJheNVsK1c<)FuuiCcC4HE1k(6A(23v&H zf$IuQj>g>o4;+i*bL}=@yKsUiIRj$oG1XaAtglgV%@yJm+?z0iusYNLQlut19^r4| zr>xolD?gdkN1hj1Uo* z{3TX*EVa8Rt@)XjnC_6JHK@zhr?Ao;mn$)l@i>$+hCl8d3y3#1r2^9>R^RVw50U$D zi*8`A-$Q4$YtN9!5#0D`w@Kzcla5Y z+)I#4W1wS+NOCma;1~xckHy-P11mc=Ad9As=2pHLBpi5!ZDDNjt7 z@$;yMeMM1yGNK3xPsxU>XzWZ+VLed_?Vh^ksMtFFN@*mG> z@a}tL<9`MaW{VMy;Rj?zn=vcaM$sOHjhs* zY{HFV34W{4%=bsukn8CsGAZxq`^$GoivyVZR~m^N=Y<_60kK@vtawk`gp+u0m2a?O z3>e}iDv=FTzwZvY`-dNqdSG@Y4G`oBNwAWnS-JcDCT!zWeK1s5!Rq{8d5O}Gf6%q5 zcSx))7R9|RtPF&}CWA1Wv4iFF^YxqI*gg|7SWv<0D`pK8xi*YAw*D?wxKV^T(ij;7 zq#*@^Z!sJ@$2(2jjEu#qzajv{aP6zXqI63qT87pVv1SyJzik4J4Z?ic4TT9csf%KG z-e%OL2gYk995Yhap(2y07wz7>AYSu6A@WRHBzdsF346dXNsA?b-J)qSe&SGch1IQC z;Uh{{4PpdePa?XFB{GZIDGh;+rD;~wy7-UU0xPXy<@>@btX{106_q%n=pW8dm!NTk znPVr)j`(fDrhS+%G91h1Gxu%56q|wDo_4Te=;F(Z++i#6NLM{gZTlIKCt(nl;qhkY z3|P60j2+|`TB|qM)4{hluwrz6XvU_*it~Ro((clDB7fc-5EFhElYyPz7EC_KUk~02 z!dR&*oKGdITXanjnW5HVX(~(YMohrdH`R(6j|lgp_ucT#`}w*1wql2g#1KDTSjpn4g)w2j#Ja3eY} zDHsb$b5!3Ca&Z9CHyOR3!pBe929C8b+f-#2RkFsietsfz$3|?kwh2wUJCP`ZTOi4P z0=M-b$iE7)B=ecaw!uo7FY3f!#oWI%3_4~j#>6(IIh`jH?v(=^^GA{oV(@JOdlMk4 z+qLTB0fgx)tG^yI96L-?@x>=i=@G9iCy zi;9h%=svLtb!H|J=51?mY#7#_14weR2DyJ>J7W2vP(7MnWsS#L1c=g-X5!*WEUiw0 zmFI26XInlJxaKQD3STLQ)^V^BGT}B~8eS~?DJIOn+D3)Ql8dr3U)h(%pmDVjp_da@D z-p$4@RUE}3A3lX6hcJ;#z~p4Uv}Om6qWwY3U4_?Jz5TbNuqU?=w+8-!(&5bF<|Mi# zPedbUX=#R{*pdSk*c zk=fQvJkshYeyO&SFiV_7`(qQ1}9OfoV9 z26r7fC9>-P4^NM-1Gu3By_u$W@Bho6$Qe@>nn8d0Ct}Hzg^OqW&1V+Qm@;?f^zgs@ z0>=D-X$$5r`pZw4KQL?l(!YFP{f3zfmM;FwPnf@9dibore4m;2cW;_u^F03=tiH2& z>a4j7r!1cFmmga)f5yDOi2}ofFZkQH8D`deY3^Tsz4?tl1NGNo_**c;^c>R9W9I+x zng90o{~qA|zh(Fb@ZTc-pa0SG|6-K>b144#-+v|J%#`OP5W5w<%8~2I(m(~nD>>i zF7v4%H9jF6_Yja4LO~bGbKD+2>WZh3bh0g<5$C| zLX^tij7+o*SQdk2Kku0}KyV4^&pX8?0HP;t`cY|6E!li%yO3r1Ops^&PENf9gs~Aq zk~V-PkH0*X6C!*z_{Kh3&1bQi{_i{~6ZA8gePgc>>2P0Q!e5a3;CiE++8p+W@sgCu zpVtf&tZUlv^}#8IELQ8&%abadZ;>T!whJ6rCvc};5=p8za-5Ce6zi%&$`1^LVJ&?* zt~*$C6T>~J?0u6k9@~U9ho0o)P8jMn%jo@S5UNy{hotL0gy75u=ImKD5zSgBb zmEvxa^S4(E8wWQCT-8f5x}d=*JG6i=t>JzJA30)>;D1{-z#qeYR^vU;gGx(pk{T{n z2zvZjVC-I#+bhfvTfsi3Js{Wl7k>-_OJ#A6pC`Yc)!IjMRIa!~zHmwsnp~dZt#`yF z}rHVVq{56J!#u-G9l_9{!y zxFPteKto8CzHzl8)Nv{BD$_*23t5erm*uZ+Z(gd+hj{Jx_hJS?%})17$8f zChueR3f`vysnXM=lfOeSt%cY}o;<_k^2g(cLT?tna;giT&Fad6j8rLlLb^v~3A?|u zB+MoY>SO&JviuWb7EJEwVaO2+jk&@wy|%(gx$e*L?Yo80w-!X+Z%LhEpBt4xdjxr~ z$}&bb97YEiz{DS5HPeF$mCn8(0XGf^xyQAHImc4#8Z%2n+$d`oOw zV;3ggf%~IqM7uSEWY-T;kYNt2*gmTSwH2H3kz$NI2ysHgu?DHCs&O(%Zs9G<{cMVX2WVpSe2OzrD z_dkVTEa=*>GGT2rQkl?}o_hHjf^mZTQ^1nW$M537ve&>cc7W)ER*j`xl?9D@aZ*Uh zWQZiTrx8AH;Y(K-7Oq0tdmB-)I^O^GPliLRMt^z|RVuA$(w3{j8Rk8jQU_%J9oj`V z6mk@=RHpH(zcZrt_ZZBAImBv1NKg7~|mcP)W0$^7P# zCe%(hvtMA*C6Q^A$+e-?Lw*qwLJ+ZsT`0%>0hV6>W=Z6SeK3J#8tTOa7QJ`r=TynC zqn!*lg<{r<$j;qh*axtr|Gafg)O8M+CXfaG2HJd%c*-Q##O518xYhwT?uo zcLj@uNp=_j!9=JK@u&#+Of=YjzK2=e;Kxg;?BYPZW9|s1u&zYj><;%WO>(Li1iw** z?EKgd8k@oUSdRmXKK9~r%5~~U%Vysf-rB;jtC%;E6(VMCcWG*nuV3&3%MJ{SA-=h+ z#_Q=9Xt0jdVR60Sd954a?qU+8HUrC02xhP9zQN01oi#}(s|~rxi_>bV4D3qB=o$t8 zcxS>~8wf~q6BF%=kL5ziVSG`(KU7`7yM!ba=d!x)+BK9J(w$aVHV7fTyAoxf7c|zw z#4sjw%S;vG6EFgy*o=tv0*ik97C>Hir^TzD3R(S}Fu?}FeOnWkCqiQ<;L8L)>EsBs zj!kHN2Mph`8YMl3N*8+4`>qTbJE{wjj$#sUtpM?d;7_WM!7c%aUNxGMeaW}1Hb)mr zl^eZiL*RSC=Z8*2`f>;$Z2%boDPB||L5BhmvBS{X8nEarYhx*Qpf|N{uO-9o12Sqj zAZ;O-0JwY$_Dg)w$d3^4gXsL10MWM}8cSu33oYy0D9DBFiIn4mh_#1cMqwnqP$A>5 ze*}$H@;QFH3`elT=)uw)x?0*GR2SHzDfyyXc7$Q0;PN#<;`l5nP&oDSoH}2Qu-YIK zAXRRK<1qjIntwF-PLY<$v! zNXY~cNBA-Z_MZkzCV#wQB>KP}zMxAi4AVPUz?bW;^fh-+NblQ%$iEAKI3fGv@UgGJ zQpg7`8-@P7%dD~DJXYhJyB;H{Kdp(Rm#U8ZNDhpCyXaPt7Al+bC7~D@$A%pjS4DswlUQA%oB_wU2Qm+9tgJ;Qj z7Yo9j_y~q|N06tXMI=LGrTpmeL2!RKUf%+X-tpN6OffGSVWA_9RvL(ZBsA6oRVg9+ z%T!3&Z$T*Jq2_Wk_`|&bbJEtPddi&~4k$VN5jZYLj ze1eGC8)F{@mwv};H}{aKbd}Q0FZ&ShnAZZgXDT2bCZ;qRv$76(N#pY$d;<43n+@Z? zV|Dt`sgz3)Y3RgmEoQaK2Qt%iCT%>PtgjvNE=_!$ejYy^v5ItP|UNvOBedXxBUv4bZCr0=;gOeE z81n`Z+r!Te`P96P8?pKTqFsGCmC6qZU1#Y?vf8~A<5~pUhC0ew3z>S{E|gu;n#Sq4+p`bFF3M; zDpyQ2d23s;t{w7H8U~hN6Zd>Qa`6ictK!Etj75by8-1+(N{_Lcke{}r?g{PwRin^< ze4r)QYkrr9(T|X?Hp*#WhUxn60-j#`q(*S-^wT^ zk4Ijr)Sdqr`dH#ulsgefduHm1P3yM;6Fm#QOh$=qf_k%%eZ6sXhY2X;OFSyGq=40o zYWoeEQV>m_+M48_dnGVW=OQm*CV9eExcoH?Yvf(mO@Lok@WqoaV6|KQ(&5W!TAkj3 zIFMHYcNePzGaaeihL2^z{pWn6)kKK@y!l`S7Ts36T^JT)==g^nh|QKpfomQCUuL3S zb|Cw^RFy!oji=wZ9b+d&+k@JPGw;10~0n7P4BKDf_5A zYZA3=){iuf|3grUmLOt_kjiunyIt_*F5lnB+y?h!nk^|VWObpFv#GrLGx|PeFo{s! z3(DL`L~JR7ya$FIN04vu^X`YJ`wY%zhCU2wdTIIHn_1W2E$)O5>>c=q{2;B?W5*eb|!Kib6)aKC~-@`oAnOikr+ zR=Y9dTPnSmsO@JaGH(aEWz=f8zZQM$5Smhr+AU9Q4~3Ckj83s1l^$nx($qW@@=V$g z;!lcfv_x6I2JXk9#B$ZaXYR>ibTW+WXsp@80HQB1`wsOoi#pT?5}N@!Oysp-iN_9; z2aVaGtcv)EpC=>8dLuf&;RLHmX?c`#x;a?j14;Z89pOI2fMp}{Qh*X`r$U^=!!VMJ zsQtdBCvX-Uc9b%=W>cTpfh0+b`C=CXmMzd&5eA<k{tfpvB0lsw}4fGR82-}Rv#yF&M7a%1ls}OX{3VvtH zX%NgTyq-`}gnjI%e7OHPZOrl{{*g9B2{XID7w(^dFHK10L%z0f8p_QA9j2zZh}Auf z$)`;8Y`U&X50bmZn#kYBf^|Q7awWE-o^ZdBFPl9b_0rYcpM8s1eNOE$%B0Vu_7?_{ z(N@ig61^Uf9Hdf#VRPY2J%97bbYy?1xsUPR!75af17XOGVj zv0iZh2SYKdYaD%ya-X0Pz35AF#*<j_mx7`9r1qxY%`bdKIPyUDoELlV@Z|O;t>0bl#qe-bZ zL%ORPt9>&UJ#8^>9!sUzEwe9C#Uq|>ELnhA=}MSiccNaZkjm!pMT>|%NIF=qqw}{X zjeUN_tgPc3Ixj%Pd~gzv1&eNsPSPdeQZw3sz4)?o~*hiODh+T(p_>x%4as7=a;mf!i zRF2(3y;ja8Vb#M3(>W7{)tb1iy#Xmx4a>?5heU@VmHuGS$;*DBa@+0H#eN3y|J_8S zx0ztMVUi2o0okvz^b{jdq1_=E4?y%cH{7J$yX~~of*Iu5ZwA8H?}IPDppXX-J(kAcA6bAmt1UVBwVK~u-rDuZ@h_9Y@Rw^*YXSD%QhIc z91yL&+g&Q1+<|Mgxn%SA0%80!AxQ9EXR2r+B zN0vMn(E0bn{W_Ck;{(Ias^ist;Udh+ICPlzfau-r9#R=wY{AM^l8jW;_Aj_5-iE`->eEH3UI~3e27a+(-`TB^( zm}qMun9GLK*kSx0s^%9SDbUuHE)l|KBPNku{C|9zE#FrTeHwS}|Ys5lg z!d0|2I!q?NwO}d4U(VzD1`w_DxtElCzKc2xS&chOf|Gb2e0hWU0`W*%6|!;MG7P>= zNag2%=z5-bMV0hDG)r1VX1}1g?#PEPZ!!1?BK8`o+|T!JwOl>EVYdWCzm>kG^1KYX zv;1E~-w2kW*fy9C*v65g$`{DXK|VigIRtYB``Bv3Syq$q^IIw%-%F#&Dw6!jk0>c< z>vasCDvTiJRn*Kfc2p$tk^o;q__M6G;>J5FiJ8=T+$wUfk3ZoqV%vz;3Q}AEd}&l6 z=bcxekA1C*|16$a)V`z4gG@SmOeCoegE3Fd+lGa}Jr5)yNx!MtPqB@H`!+c894|S` z>K|slp-kFddNXMjPOkn${uU!D&k`eO6qH;I@t@+8??%D><@Ht1XchWcAYLyroMSZ&_3tTj zWj}rPY$lnQ0Z98|M65X?HV4^Xj#Sp;nZ+vX%mG;7y#Ud=UuHzPI-7tvGw*bou)V{9@ z@oI>Mp%WmO+r{TtyRIcPu)3s>gaGIc9oPsQE!4geyg{~U*aO`Szis6EIcm=EZ zy-X*{@rUV{n`=pSA*ynEDIhJuvIetqsM=uP=dVWV8-S){;akCKzfaYR%8+mA><2N# z(Rw(Mrk;W?b^^0$9r9vSBbL#14O019each8>J|iAh;q+7y6932S!X+x$jT|SzE*gs zk^uL4HI+YaTZ2M2V(@kUzJk@aX~~N6_xUvI#{{y@au`u^&mu3aagN!Dr*HyTs`%Y6 z*T9#T22`fu2Uc@vvX!Wm9;Mc`o5_OF1BvqM8Dzf=E+oE0?T4z6GeK(+j?NIw8bGvz ze5^&La{&#RB$Kp24$D;qQfV(Rmy%E~ad7`EfAQE_Y_mb;vlt+{KF-ZWd3zyk+_r-> z4jDqYk{?FRq|3 zLzW%!5Hb<9zgP8TOq)0~SU|b^w_o|FK{F+W0FS#T8RjB>WP;MeIpZCqf zqW`=&ut<1hv(He;YQ36Ti%Or%2;Vt*Cg(os-s5+KXnDkQ8r4nw?++F>eL zU5sEaDr3&jyJg>!dHvl9cfSVN?}EITWw}djeW3yC01*IZV3@x8c3V+-dX~D2-;$k& z-H6ogI#TH*FdpkrFW;yTJVV0%Z$yJ_GAO7QtM;P&`#E}i)^{W-q%YxE>@;;wcsd#b zS?)o^GWm4t^$<)to>(pA6;?Y?bP(mT3OejVDUqkR6XxIzuyhq9F&6D29mb^dMT`H0 zm^WC3)#VQ8Br+Zf-97I#$-m@=Z@!7VIK!}5q|*EVB8|6wY`!e&r?x%?=a*@nMP>a( zTC}O0+~0~tG4Ku|)=f|X_%i3fJe`3tNj?`KC*PQ6U$wZBXp2lOU# z#C=4po4~D&Q(4US7Y{Sz(Z}BUo_e_n5N+oXPRM>WeVBZKl%K~2TZ+|z>yDlrhq?a^ z3`^t-Y&T%La70sb21M6unzN`_U8CZfE5s|bFJXpYb*KZR)Ewj_xW9>?8odE5KN-xA zJTI{Nk^Opz%J^DZKCYI0-VDCHXn@9g2(ouP*3m?j#p&J#2~64tUy2GCdv=%XwT74AQ)8T7mes+E_ei;hz^s98d=LX{L2g(1b93C z6031fbVIxNjh4NAM5;!*67B$odR<>+KLPINBVy5fa#bQGayTBQJTY9tZxtT)7p0x` zv>~X8Sh@@#oZ&ruaTAzZ@mP8b;Y%d%@3|3Y*3A0-xN|J}5ge0hWQJbvzB_I#f2dS4?t>zF| z>Yq_%+i&FNOfRB{W{9UCE&UR!!c;Z;?z_zwl6Dvtd%&U_>ES8Le>|t5yYG>W-wYzm z7Cj8ZuQ^ILV^;QqVX3^{G6_Mh^Ud*V#b0LiYcGN2(kp5sT_Y2(3?YoA4ypCT4@0(L zSp0%Vv}j77C7~zJ0i?|EBdduV$B9z*Yq~7^CP^CVgKH>DQ3>Rb%5BL0E!6u_zG;6F z1TzEIgO5sn#LoZGKv8OVLmS55A&#<<$RDvH!wkfg=XR`c*YSw9kWbca!i{1%eyh;Z z_eWM&;OQkY>F?2&;YXxin4PJE1bIR#SW;D%yWefXHcrhChDxefjo<QTp)@x;EnuNwC49xR(XX zU^Lh?2xc>Oup)keb~6myXFvptt5|K-?7%$MEhFyRLCq8M4Y8F}f2@mdYT z^wf2@$fRgR+cz(W*Zfb2Jj(__9x8Cc9%xKbeF_ zQ9Z^InZ@jshM|vTsx0Qb_y=zROFOW9S8|2biZ#BXlBgH`BN^%vGLA5FZAIAuzfIV* z5A#KaVR?MczAczyGjZF~5iGiHzP!jCwh)hW*V2p*lZZSSgRl~hH@jwo4xeqA0~#i~SE<(!r^rSV&qS`+jH_ z2hjSap!U=Gq^aAWvDQZGn!M6#R$n>5Ph{>`i|yAor1ldpEwjPA|qv1;$ zpL1**SSo#yC;lqt{>2gKV>V)ZLJOMTbqe8L*~2h@1ouC?giPm9aDIrycDgE;KN zr!eFYCUQBHoW@tw?Z8oVAY{3#u)0&Ma<@qD$ID6mqWGEzL@< z@<{>2%9!JK9V|L&K#-_huoK;{z9!`EGQt%)h{<=yW4;ua_hdBKQa)*S3i1+AYQ9*x zhWq4kBSr2|OR=}t0}^>TipZKyVuITk{3z@UATNE)k2z}Jq?nJQWg$tn#_Oym!w{AfyJ)QhR+S!uvotmHbY)hCY-nQbk_BW;i3 zmujmBv(!$O7+dB%^XGpy|N8fH|Mk2EuXA28TJxX&{r7*p&8)v^VWGZmGO7Q}YSv%<{J-D# zT@8F!&#ogUFX%eJ!*gI)5AUG^y_mOe-u*BCMNXf*;NO7%_;1AG$qN=u|C`S&m_B*V zjA`Nj5v=~v{DG18z zkJVjV>Q3cXKas}NO@jZf=Yo>(h$t1m8JSf!5Vl1Sw*9N%051|!GW2u?AsDx~bdKseUYm*cv_if&@KCzZW#5XNJhux{)_L4Ng|FifLSspAJJiKqwbC1pE0-$9txLd*kC9%nxOc-&C% z&B9ksapALAU1^Y!Dus_o_oz%^_ji_r*MWmh9lwq2{@&%Qqd8$2ve=(`x?|Fo2?#XwbPn?M7icVnsld7;No>e-qnUS z^?3UjTKKN$|O z8vW@{s8V4?6SrIu&M@z=2X#R8-#{+9p^>9_r6QGI{hblLzsF!E%pq1AQXC4ou%^Qj zuL!P7-w8}+M_RMzEy&*;+O-(5B=MU^n$SDl%yGerE|E;7OpXn$9`cJ2AA*e4ccC2j z2dwn^mz4y5*n1PKOhvz#U`6j;@)=dq?Pw>%4WWp&BC>NgIQAY^T;P8^>av!98z-pb z@xOK4kj-k&ESZP8w5G$Ho(Ru+HYZX>cbeZtV^S(y@y%Pou}Hq?y@2X}1A+a%D4W&3 zHO{9@pElGx@1amsV@tT^J&+DS0{fy9x53InJ|vN#E|z%RozG@p721p zn`Tdx&b_E4=}a8w3c1^-Lf#FefT!}Oo@e@Iv-;8J7C@BR(v`~_g!8xB5>D3#gwdI# zI#;ka4N0EE&nl!)n9itv0)Lp*)I3;3l`-w;U8YXRaP3N@*uJo$hn0Tl#C58b!hs^B zuOpINXE@Ai@4j3_<;flBr~%i7m|91oG`PZwg-Lc80K!D55b>x8_zVbaKi|WwZt%k; zRCaNo-qE)OQ&?9bZ+1ucmL@sH3xwaOLUw-W2aZkWeXPg9iazGzGRk%8NK0qk6JFcG zu`8H2k`*#$mb+9n$=5FU!O9K{iy^)_tj6o{7Z6xS>aeIm@VwfMaCa~XQkucaP!MLX z8ot5HU!66HC#wxPsEgBTstoK($LN{_|2SvDTpb8Va}%@57az+3lf(GJJb$n{pLYpK zEXrYZ-L-2dGo(8$w`>$bdUqwtKre8t1#YlRP|FMz;uA0eso0E+^@0`s_$`3E>`seT zJ{B_jIbnhgLin~OE>{G{P9T;EeB#Lwkd94|z5|ADS&fnwO{EJx={;A5j2+d5NJlXV zxK@DpgYYL+$Y7TMWUm^cWMBL(tIgKMQ000r+8Fpw@cE$=k-i)PNE<*#fQlDYNYJ4G zWb81wwgy&ombEdIJJ6e2x7U(k_W&6+9FVpkOaMZ@3I8QNXygY-_(3TDB|!A;hsID@ z<3daOHVJY;dm?4~AY<)8m{Ax>&s50xYaf7P6@0eeF2fP*FnU;N4qh#36sq&>Axgeb z%Z_kt6hgiVNGzWz1q!F0pHt_{5mp;y0;H-h-R<>ASZHTQn3qOa>4YQ)fh5(4C4~=a zF%njyVa1t0!s=Qth^2C{8?_32Dr|hzf=EdO5J$u^2L7Lhl??uP`AF!%9zMTI3>?!t zSRj^buJk2$S4ivIg2=xMfH2E=3WP|IS3pT@_`{s5?)V$qdvRr06(^f4?t8vSp*E=zBuDf7K-PANgF6ewhR-V?fId_~slybjHw$;}Cup8hNts zF;*M*J_(DWAH8MWDAbRADkx)S0W#hsU)zX}9YHL%#`dqqA^b;>zGub9SY6tN6e=YJ z(BlpoqMZ$nxy(Sne1htK2^~9!>RTDbr17XrmAdmEgN`M9MY$7!v}cB%*tC8vFso)F zmPu%_O<->(s;@VW?l1w3e2GVA7U#2?QEk70CFFAcGbrqyX3hy!^caCfjeFw;=VZTMIw!hgy)Sxp4_&zlcc zu%g>)w+q8!3>{zJf!J(m61e6Oh-C))We2LiOSSTI`b0IZraq~F=-&<74J+elA?hN(%bk-U|ueOW8tu} z6TQC`8GFokI{XRx#Rg1{DJfvJHk0>JdFCh7vROaUH2x1kDO`+(kH02+DW3dFJ;kbRK8vnt0NmwyV1-Zm4a_&BTac>qYCDb#e+i+J|= zL*R6)kg=7Z<$lP;4+y`UKk|ne@dCNg&i)G}%{!e0v=I|Nax zQPuL)_E0$4#po3CLCJAeCr!ykBhR3XA^xP$MoX0SYY=`cS}aE$d}d7!qm$rdM`O(% z1`vH&>38Utnbe^nkk|~+VIr@El{oA$x!{-`+NzL`_<0hNtT#gW4JTMla?7KX)6K>L zA4uXR>j?Kg8df%nuh8RHR~Av zJz_a@0#j@rtvVx;jrUp*#RGc)cLWMkjeeP=LR@D|NBGT<eH+59HA4y^NhGLS?{;-3r6=1E5kwHxSE*uwr9$+B*ZWIHO;B8H-qLvQ<9iqQY@W z@LH>8>eOKfB z(KF##aB)Mhjj@>3xRey&WD!ZN{hY~-V~#|=^(A`$3?NNvmau^vAD7L9W3zbkMqbQn zpEZ1slf?>pvBra>+GFQ$NCcz|wEP21ZVr+-8j_mx7`9rIT~4m`98H)SC}MG zMj3?nJ*-p_I)LixwHH=Pxdui@LM|d$xQr%owNj0DN0MVAtx{Tvn0$qD`Hi>igB=X)}uu_AJwIS)@ch%kxTx#Akd#j}^ zWp(`vYp`7;(lDQyB&N+^K+<648kE01Y3lPUW@R1U*m*uO=7W=X46NwKPr63AvP4?; z$pVt48BDkjF*_@2(J!5Ff%hwv|0X|X&-{PYh2c-K8rvncRC>LM23(IIjMX3_`KH6M zYY4w9iFr_|LhL$(BbI~`j_YqciCD&6r*g~|>a}7H39BAPn9dn+tk%SB?F~q&>R4t* zI4C*{rSyjtoxJoHD!1KEUF@e5|KCkSdYu6)*G+PPJ0Sa2D?P;sbZB=F#sd)jjSV*_ z_hvioG=DmI@|%G$_WKaaFKFaJ1Xr8L*fl=yTm%N+2DDg$;S_fMb2q6{vYlo{%pn(B z8VT1a6IO1SBLvTL)FXX$wS%^}8ik-hTe;Q|G@*Cx*rqW1< z2(omEktmlAA(r1vxI@9caz2uLly8Vwgo(BmgsC)~#t!565I4PF)B2_*B!7j0NDB_b z%0pCNz+LP`IF`eQSuDnU*@)dz4=cLNRgF|Ry_4E*Tt>26`Vyuh2T6Wp;;IDn%RLqH z*|&?;bD8-ldYaWY9{!zjd(vo8O%!oCjf_n@0>_@1lsN<(GanQ7^W(jjpkLy!j?UxH zu$ro8PpLF?7v+lAkhoF<;bvnHa!puBOt^}+hQegQ}FCdSkRUsS4Eydv5gi?M6h_2^}7gR~xLo=n7WY#l^>yA9c@*0DW zAY(64%Kd!rR?F1m8+J=T^jqmmD$h-)JIg*N`X*Qzifw~=k8K<^s(6OF9OUydmw_;s zv5&1boMkofKfk8Z@x3&PtRzVf{D_haS+8U8RAB@$ucBrvV@E}zF7b#Zgg?t_%dfwo zl9)lQ$E_rH`}h;?BDRfHT0x2pKrBrvl~`+I3bi!gkA1d&YnRCZti84WAP)qY7${~VBHgZUz_ z3_E|9*HoUKL2Vy~lKwOROfEnvtpp})9Le_k7O`C8y;gjIVez5LEw4 zDmWGm$L{dugH}Sv0`YpG;T)@JYMR;MbSXI#ZX!%VG3sK2@WUbFVff~!{QL#0(EI-2SibK$R=1$1 zR#fs2()4qYb>tgOMT9I6WJ zyS&wqz5x&=3*T~9`+bUDREB&@XWfq`j@HA8H02axu@jh0>rfY?nz8h*Yf#FU>QkO_ zRyRM;LX>;v(uJ3z$vWGiL{?5g`dZ9!Qj5 z&!GBka3S#}dOuW!oC#WsbaVz`)&Qa%*L&9l(!eqrfoY&(~u#AEB*nHws@qL2##%6HF;gHXb{E?uNnRa zoc9mfh|;VR)IMV;X>{WV6Rsd*?F6ME5zqR}hxc55cTBWOOY>un^Q@+oMGH~deS+RH zrIDX64?JtnfOG$sSV*!|Na%4h zq#Xp-7Z82N)h$J3UQLMEX1_o}gsX%h>9^}}l&U%_fhKerRP z7p2tU*EG^}$sN_NLhpA5a}$X9yl);B{pZHQiiAft`wSJV*6VF+QR#D+c5d!1Z@)xd$1`;M1(vgD`1$Vzq=9F@D zM4s$Un1k10rK=!`F_4QiIFrT~F8UK<-e470mouP~$apAp_uSJY?~)t7`3CCZ498+n zO7jDVRNnTX`Ld{=Dt!vhFH<{<%KD46a8nt%w-t+G;B92Ao1g^7Vj(d*mc%;;#R0Mt z5A2rl7g$a1&yJ#;Qc35wuOyBGdJ{R~9x~QV;MT^fR?PPo^%-%{vDdz*o^Jv~+j)c& zs$Wg(lP-|5^VncZusU$vp~|lPx?WSAMaAkW71vxQUZH&n zGX$$c9UvvBL`THOJC>iqe{&X_?_RSz3p>Y<&zXJ#o1b2N_R5DdYI#-tmBV zV^bG&T-|*{niN?TuFz#y-0@tmraZx`dfXRIXxjYU!mW(9F@^2jyz~r%5d-7mKuXk`0<@!3>bMzw+23`3@ z6Lic4kDwEvFd68PwS42}X2>AG+wqrJjeCL{8tF>70~qRceNp{*grA3u zt>Tla5-^d&@i66);SzoxRo`Efb~ez)ptr=*WdPv}?+}Zdz}$?((p!L7B6)w$jW{F6 z0&)=$-LjS*BKN3~CWpQxjd_SAk`bjkcQyP7pOdPU(a9S@eor|05ggOIck~n)o8PH* z)Jx)3=T4+7OtqB$f;1=&u^d&ca8Ea?8?1fyJwqj{IUMaFO4=vXaqI(fKE;zTyEURD zc?e2KyjmAkWW_H5xvxG*tz@;DL$K2Dgeu#9BR6Jv5k)jZJOydVmsl01sMUAhZN8AS z!?4%`E4q;$o}&E6QyRMaF4_3aAi`|X!!i7TtYkB0Wj{EU!s{&)k>onxY`<1~C97Y1 z305w>pf=J~GV$^d!dU81T2K5IZ5xKgFL*=?QTj9ynmikjQp1m|CUP7nN?9-I(ySXK zai|Zjp)5rukV7fAq53z`???H!`x8N!>9`(zQ2Zly{tpI6@B0^FHwnpOSSuMlRJO-5vdnuXUZT!o{$17DXNt_-)+J+PR$R7 zimOGaTDzKn9DdSZ&p;!6Mh5 z5yv*&!3q~lm?O=QF+kp?Vel=6V`q8CiJOtJc=N9a0MT9jVyGzH)QG0x4MeO9Ci2(K z!LcEjFT0^I;Rf@f7+J6xb?JriS`Eka)OEPXBx^<6SI>yoypM=H(*{W%DsaLca7% zYkWl|K`;78GSnqx9AW0zin0TKo3Lpg=8Fu+a{26iTQJ3D;I^kDtmwM=@*;QGLOjx4 zOVc}iLgYypgcW$a*)I8|>-e+bdYnJJ%brDY4@GAN91SG@i(xwgSX} z-^FBM=eGfq5AxSSw}LPh<`vGTn$;}2Du~Q*E3rJCrM4p{;OU#$iV=?p_oMgS@Xh=A zx%;+ahl$1zKVDqTYI|f8QI2mWelggBR<4;qm~9xhxhB<0%^n`jy9W>J&5#DP*fv8o ztNYqb6s1^evHxL9Iyfa13rQPT2;IwKiJUC_`H#$$kR2^&rT<3bCZ}*~hlQN`)`##9zVOzc>OqW+TSMx1f1lCll_4 zJsk5#k`H3=Z326fAgbHd>f-@~;R>t078igWriJ+IqZagt*CfKxc7Oy3Qm+G8NMsda zGi5t;Yz0y=0T5l*jsQ`ywiVqcHmAmaB0k zS6R)HT3Bh{8Gh19QOWWIC2ORxeQEB<;&}K;3zr}wA@vEmDSpP9gID> zwYW9t50nmP7B@%HC3PYiIY(7XGn1=)k^!+YX8T=(6`eF7NK`J^iSAck5^`rL;R+nY zq&wp=U-Hd+G6c4SPu!i1x&)M%FIKMNK6%_okvr5%-S<^|3cN>Erg`EN9 zxsUlVNA0U*^HH=kB+=G*jn$+-A0aBgwiGX3JV`=ZM-thglUR0Oj37IdnVF0qO=*mJ zF~vM94LFMxUt_iUq!A*st)+OR?NR(vZ6#rrIEr@5#-jQc5Ze&^{;iF%F*OC!Hx6|P xEV;(&K79pRrrC*w6$PYn%^F{$E9G|0B`c?H{+^ zLkxC|x7!ed_b|g*hQZ}9gLg#egs=z$uHRpH$A?FTK~!Y4p|MAw_NEz=+xK?&;M#li z8PLbe+wC8J^>+J1i^$Mu1Aa~YFXO|fgXaB3;D^sXy7%{!KYVtx{_x<3k6hdL7v zhkL%i5B~6(_~CQOhfi-ei&XEKp|Rni6C%RK$0tOG#Tj0`d;RXk|M1V-?VldDfTxGT zSVKZsXzX9r`KP;>8KJT9VgKQo*s#!u$&;f03HGO3%L5amrpNv1tG`}a9tfX4^RHk2 z`A!uPH8cLt-~Hv<^1!6%@V|Wbmz#5rBtBR&4|{J-ABPY92Q4UG@`*S~!DX2$fe z$p7F4hKY{)kAG&E@aa;-|M>HkKfHI;|Lg`Z`R{@Cj*E(oj|h*8|2seb{ZD(l{ry32 zDJnKHG(OJIxqmPB$^ZWGfB)_O9^m!AmGED{zjys#zU};f(M#TLfAAJ%En-I4c(t03 ze_!AJ;-3HB{D1BNOMUz22>+uIs&S7qFk-pZVyE37#?Jgq?Yv6;A?i2tKm2{dg!lK% zhrhR-`2PM6+#k=o|Mv%(KYa9uPyXw}zq{nWv|;{W#s9@j`O|Fuq5gPJoDv!vIuS!N zrmygcaRzU@GY^v^>xC0G@8*}>baz;KM z(%D1!qm*rDwdK2AsdVTvc^3Jluq6AjAk{r09Csg(hC*Ij%5&USKJlW5kanyYpB3$6 z+|KIG&vK>ms~<@1#+8EK=I4U4{1H*gelaogZ6PHdQg-kjIlTp!AUD2Uaw;Ht(zF+q z`c{)wd)5g#)=vaE{8w`PIUtO!5R|qIQV#It6F4EpXPF`S!2&*q)wuoUL79Lb$<(Xc zgapUC0u%j=)CAU;?=HQgq{@J*ez;jWF(Og(agN3i7Mxgkfq;(j8mD z$EJmPF#PjtvF%whG_E^bs>e2tvFu|QbSuU6nHo|6%IwI>amhl!!-oQ6|B~F8V}aNS4!JD=xxzpDy)UGc73TVS z@H<$o!+eg)r8mjv?b3udE|2l2--t`lBPeF4N@1Am{Ox`Hgt~48Mwg)BkfJ*%^`+dc zTg1-kOX1D^IzjP%hv+|o6nn(QL6y>Zaew@jzcDE77$AB-rva3UyGO{;uY`T(-vp+; zmevG5hQ}HUlGFtrJH8Yy$(A$gm$P4GGaNQIuJ9s3o8X(A*)@ZcF{AAcle z0QhEQkTYEPovf}Tz(kdTN2Ft7jF3L;8+hRrq`C5 zDA(~RetnA&{Mw4hJFIED+s)VNpxia^DtGUO+KNA=fulb+UGsLCA`B;vI6A7RR1H&HPF%YTLl7_sw48| zrnIixOF-HPi4MVPzNFtWf(erG#+*)o=oZ}l7=*E+i$Y3-Me`BL)MoVf^Oqn@JD48| zDVcoAW*$(jJOMtB@96CSPx~&<}Hd+OGN)Sl#32Xv})@ckiR2p*9_Q_&aWD3M(%X5*aa!NG%}Gg`)p}t|DS}^Ab9M4 zTgq|2LrRwqQkL@r-%|GghI9+k)1n0v3HQ-0{v4Fmqq-W z6hS4A-;E>2Tvl^(=5)lR2_4w(iSVp*10rR2qAy zn9FM4n4&1ttts_7_)sXUvLjrBPH+bx{@sy@Yau0$4@x75i#4u0^0}<8{9-g^E<4bE zV;=~&GaZQ1rVEuMotfi$pxkX&A#eLoz&G;8pXV5IS^bDJu_#K-=)5^K!r2?m2&d}? z!syJ>ogQFsCY&6>Pc5LRFl`Y1<@{b&Q}rO8DkGcI+sqvyyGMH>E$$8}dPwPoOkAQ$ zDd;1j^tFPM?-=*8+S@PVsT|sp4(okYNUnAwN=*+)u`QuN8^XH%|qD_S!3 zuJGCpid{m#k?O%?mU6dIb@G*SzL2sW&7!|yAFJ_v{5cA&6LpNQ5j-w;AlxlN;u{Ql@`LT54S@1a)msls=x|SVJ?zm{BdWRfx~vL2$(?c&rPg=*O%E zqct6gZV|69T5yVgYdqCX%_0T~1; zo>w6Odj`W}d%?AZkfO7$PNv+huGFT5mJGZL$S`j}nt?EbVe)n8FYy6Ghrr>xQTZv*Dc1)cYXQOxLrZ$5LdIMf0*;mOxxSl?`!U1lA*BI$HM3Tz z%yU3dGN4+vf?~s9@?}64^Er~gaQyihwZH6VwE<>8D!S7xo{xk$dwasXG(k#hI5_|$ zse~;Vd_cpYkdg!`&isB>*Cci^l>>Xydj3y^6^|McDV+e~1Y1Tz{}Ygs%^xWpiaM~B z&ug0u#q^F=u;oe*`jWdXWOi>zTVJS$R@Zw1I; zKsrFN5SYI~h4kAo9OT)AxR@bDmzuVWN?m%>ES@D}T&xIpbO;pd2q#ZOiAV>>iun;^ z0${#3uCGIi-s#CQbTLmFW2GZ?^)w*=P;jghvQk3yXQ_~qUjmTG11#xgJjiOgPG3Qp zrTyrXLAGS@at)Ds4M$wMAYZ0|mJ?O;%O?b?rO&3SBOu!IXI4_V{y>_0&4GxU&|jPb zL71*6ebd3KS#a_pKIC>F?COuoKL-$Z-HVvc$YUL;IVXg?3m#j zRx>hm17#jlS{?38$h0>CH!TE^Uf|egsFp=AKa)?M_7ObhU`l=vSo{sEUDZjZ(j`h) zJ?}=ml3ohj)(L>Po0;PI=#_U6myP_vdmq94RTjngZ&;mvL@bd zL@4`0u?1i+gZ`hv%LyYv9$!;l+ZB+am(D;+9i<*CyAiQ@tss3N!D9mv{Y5BB&s8RG zs2zzJri&>n+O_x)`hEnY3?ww~oD*4)jA>kNk-{S`$x!AMJhqje8uYPc8aH9|0Ytl? zJcG*j30-3CL~@!x7v#pFkV4H;_F`n^8{PA0z_uZs?`Iv4*9RL-YV+rxoq zht$EAP(#1_lOw-piBTJeiF2x~`5@_b0 zE=4SUf?^f?sM=A;P-l~mjc@T`RulBYI^;c}9lxv-+=f0Eq@Tt^%1F?18Gd;mAUad< zxY00wClWc-aG2Gmyi3QR=u2lfISq7ET2h$^t8ls&Bj=4-mzI=k{e}Ou72GOr)64S>ZE*0v`e;9Ra`InSC>Q6go z>xpfX*8(#?9JY){ime2Da}a&KX++DhNaPDVGP5X;)eLL)Hx#7+nmM5fIe6xUz&wsX zTtdw9*flWuD=1dS_gFF(cGZI|9(*3FUE`YxTSm~z%$CHFyb!os7#)~N2<2KlmIL#j z@^$sbf&6DJ3oA&`t+C&XW-*eEx!;o5uC5cf1~IT@GV*0TqQ6;{@?+LG)vmg38v)V3 z9k>NjM$?2>j-;LE3xqNjOrDBR%JA4~aO@Rd;ra>cz+On1R+P_b9NT80@r|RqmphZL zrdI;-p$8S26v#F%_z)gb;09zQhC9r^lef<5~I8ol!yd)Y$lw%6^b2!ldtlT zcZ1Y<25U28K6>S-ofvyUXz8flq+~D>Ic_d&nFGjnl)h6c$DHPT3X0ye5T@t|tMPdN zNVf^peBG0Hbo*W4bo1e{d7$MEl#B0Reks5IcMIf+8pyHCv|ay_bL z;sThz2z6`^ic*zYEsw7YhLUYf?UIKSA7ORUgab(A$+R}ej}+KyiL!Jd%wLQY+ov`@ zOHB@;h0KVX04d?{ zSdZcYR#UJw55F2o{e1-z#MURWX)!{%8IU5RRS>FWDZi=FL=Yw%*JFzcFpvFk5av&z zbvYiyFTs{5Ar|wu!Tgi3XC$ev~4UxZ1 zhSVLX$z_<1I>Y=rzGUhow4(Ixi5%n4nW&Tn?auG@eibcSd z8vfd&Nr?UcOC96Cg)MuIqKi$Z6(>cq;%-BtxML3B_M^g7B45Ub^dlgiDC0Vl zN4Lq~+fqKp=O$msYIfQn`kSKY?e?yu`sMZg=S^>HP|v7Qfy7_woQgD&d8T8 zrb1S`ptZ9+L~@(n1ZmjK^-^<#q`?QP^@q!O+MP1WLLB%-0>B# zSW zGtOZku_AsqEV=)NFTFGktX^l?9|J^N@}`V(;}_Ek&*@~pXBWc#CPT^vv+TtZADecN z@|bTiHUfR$vhP<7h_3MUIm%2~Lj8;4Ncrl{M49vzAm5vn8VjVO%CYKK5vXbLyrnG_ zW47FRfhz7PbVX4VdSws7{JaVIQh`u5fGt{h>^^^fRwUxm7?7iU39I3@d{31m@apnp ztSovGX8tA+<`Uv!hut{63X$$aLdr(ViVqNN$<&KjzAdMVE>9yV&K^YGwi!~Y;IXD8 zEBdyY`~I^mb7ogHlqIart)L3iMH&t92`9--`vH;(DOXVWTadbLKciRP;cMGO!DBvH zi6=vfe$4nQlslb9Pk$0ic53<&Za8{pSvB&dH8${mM&-ZGkK7vdAubGmjMdo9tftcI zm2~jc7{b)+OC&=U6uScR+mqx6s&ivZF!Kfe57^r0C>XKT)~aI_lyOM*My?6X|s}q+B)2`L2NMP^EMh zW00X8K^S*H^w*YMqug)nXzQpj^5hpIVH~!@mY;mMT3aJUqgZ>44$KT=*%1<%# zH{nlUjZA)_+{BGE!7+x+nrR}+g*~w47c=%yu&>+` z_bYn8ZYIf_Ya~+aUPyU}=nL43JrBk9@gY_-&|g+yw$ww4E@yr%RZeW8b}MF++(rh% zl@*+{v@lZc=nV^12$8xXdy`{F%oVX8X;GQfy9ihXd6_RY<^ANOpyOH zkL?>kw9aRqQ|{?z>ezn)_ACij;s;>MEA$tTN7Aa06{BaN@vTHCrvReseDnoXGPlwk zX&#ySjAFavAZ&S!#z)|>mk8w!zH8&z>hcYqzu5c!MwvX zju@3aLtJ+A2XkhFFc&e8Eij&9HK{+orqYpZG?C0B=?{E~vH@lN4ueyLLBukOTBM8` zmVmgV!j>TZ6ss-0`WuzRY-%%l9=YAkk8tNPZOqpS(&E9erA~#Mb)JJd_LWNhQ#iAz z{*5yCvgy>338XRv$~?AA8&(4M)Sm>U{h~&HgIyxbx5bj@NYN=)e?R*bWj1c3*V4kV za`hwf*Jx1(tkHsofytF1|8YM3RwB%w4UYBZPqUhUn3q`ZZ=*2{L-6<@BAN53?8pK# z0#c5s`LZ$VGe9;NEgN~KG4r>5P35p`YWFahxY5C2ay~*?PhdhulU(1gVapZXbMEJ8 z7QbOcx*5ePQZZTZ-dr45gbc`Vz>CxzVlGW{Ba#;Ji}^gYu-}k;tu-c$z(Dv3y>Ct z@K^(QY#O3J8=EYEO;)x~z!ib~#Ynsp|D+|D6HNh*eu8$!w)anc$3lYkf>M2httBdlt66MYZXxxP)vczrxk(J{peT{Lbk_z*AHIzTDU5G?B zq49P6wv^R3ZN!T5w+Cs?_o-xw^+2NRJB7G3!8&FIPT>Sdso=LfUkF>C8b!*p?y8itMSM(hq&2Xfa z29B*$Yx0sVNgzy5Tr>Q4Snuz)6{V?1sYCWAQrnXwOtb=zH5ZhcG@SKW7Vii6Ey+nL zEiK0!XIV{StA?Vq`jyt=Ma|`g1jRQ)$*`fEwip$qy(WRHwHwzdqhK# z>G>^9e)=WO#(ERE^c6>P z8g&7RIbz-a1qPCxDkS)b1=1V^)&PjU)q+N%(x;fFmTf{-cE=WUIcV7mhmgyW``c7o zMmAlH0_%%w9bd+3NP_%+$Iw-?-xw9-OT8AN{OcKdB>Wqa7}T9`EM}TJ?QlAp1X^x|$FliMo24L3 zCQhto@(Qc%BRYz5Nhuxpu9(Q7u7ufr6;j#@l9-HgkqKoo`GWZO5X%Isu)2M{TZ@dl zLbpVoAO|n>#4le%T%4g;GD2xNfY`{}J+y2V^-@cpg7wS9HlnihJS|vxn%rH3LDByv zJk~)_{1;;&u_%_#I|rlyvIz%vv-xwZ=D?3mqMT7qBU_Xcr`}zO9CH^Q>mYE87OPS$ z`xp1KQ&7iV8;(C;35d4MpmvCUCB2`1j+{P=3APxc1J@BXc`^F_-=NrXKF@9$rVA$& zC1*f%T_!q%faMP z7<&#vieB&7QQ`gYejVKD}Z&I&w4)Qyv*F;9Z6LZlbiQhSmnWA=WOv z31@r@TY3u2^%M-f`LHE{_w!hRHS%IW&I6*G-N;?!9@Wwf!7oYeLD-VOh|(Qb)%+Nr zW2%%98&-h)9#HZlD5iI9)&0;I0=!Uv`i1P1GY4DcYWX0e55@xj?is5aJ z;#KICy`WeIueVNvlkXUEeH-)TtbWl2NV)KW+DeznxQqP>W35AIJ@8`4S~QEFa72ru z^l2Ju@-#q7jNh}GgwdQR?R-gR?Yu_P2KZnb%34(XIfQa8qJJIveu#gwBMpQJ!}j2V zqVF;D59uRHwXbOHn482&HW7IUD>6(UYkiQCX>4NrJ3B~lZhquUV z(291io)ORK9}zj+7ET@@aKct_Oj2#}XIHB=8Lv1LUt~3N=lY1!C7qba*OA0dqlnC+ zcS-|M$Ffx^mbmx@u7;H6kn&B@MOG_T8AN5dUi3>~s7ugj!bI4KvLoJ2Sh*ejMTTMr z_}uNQ(ZwcXx2F}P=sFm9k=tt}?(e9jSuH;y@_01DG8}KV-w7!f;IZ9&Ts_MUb{P2f z3R3jW_f43TSTX8*J?$)wA@Zk<0Wspen4OsUZNcQ-{FUG}AdHn|g!8FnHSw1Pk@2o4 zmS(ZkZs=H?zFDN0a74HRx!)7Nyn~P2z6LW)5}NptqDofVDVK z%z!xYm(cgm4?-QY6;o0h(u3_o3HQPQiuu9GyV3Yog1zY|s_WF~;{d{ViPc_78H^dG zq4?~hhIGH@c*4=$kxyCTcd^b)H-+{9m$W;POMjAv}MBonW!VL43 zT4CzCrDIh1NUfGttmezTfSj-w3kO+I>#u{b7IPGn9}GoN+6`K6g2z_!cAuwXPF}4J z@Kvn#-tb`})2p!<*^#AN_Jk1SJ4bQJC>&|>yD-A7S0R(C(=jTztJ6gltBdXwAWA zX)t*sUwUUfmZE(?%gsfXS*`t7ftZtFFf8Y#fI~-ltYe3zI8+HULu3 zl?e;N zDsp=oiCsPKk%aO@B5PWUsXa&HrLdELJom92b5y_DU|EWm1f|)TuCSV{=YvG$=SJfB z^T$YVlLR6=wiZwC8Y#$*r!7pzOH&$?Ud*ucN+Z@{MORp@K7EkLtZgLjZ*~Z8s?8(J zOefKP_9#UE9Bk{4_urbDYBy$}^o>Sb{EM%!x=&w$mYMcqL0LX2U$_vP_HD$p31d+C z%T>kX3`f&Wjf|D}M9WIN*xPNA<-J4ixcJxz%WH`rKL7MiwdLD?y8Xxhd-eYSAwt?D literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata4_113.dta b/pandas/tests/io/data/stata/stata4_113.dta new file mode 100644 index 0000000000000000000000000000000000000000..9d7d5abb1b92156dfa03b076bc88c20cd9afcf71 GIT binary patch literal 1528 zcmd5+y>1gh5S}Ci5|B_78j2Jvq)2J7Y|904le_eJ26HB3d!_YVxI5b*A>lS9;stmS zBo{nD9w0@U^eLzyi8J5cIxP4HnTnNWc6N8>o7tJ!eY?6s>-3n4G5Y%Xa7DyAe;AOe zsG}%V)pDgaUULd=y88CX>d|;S#ujWa&dzt9SOGYmy(;h5Yc=|Ldvkr;>KXICVWK7* zJ|;I=BgcMIs8XTlE9m%5(mgSpvj^lWIPk_m=bc7ZXPJ)MnXk|?Nt72cAc)0d_f@If zb}GC3o?NNkFXrjsE)CN(#FkGx`m4gP%g#Y?_nUJn9`9XDdoOlEWmB$P+EhPEb<{Lm z!f}AohhU%}ctCUkMxbG^PBa3e%a24~f`^ucaguj9T>!f*;8bjCAP!h%NtcVWOhvMG z$tckM<|{!ySxZCP+DPjN#Q2o!Jxbq%J(olz#oC73QtAyo9b%&$K(T$6QbAO z5A6(#ij|6P0sd2=XF|_~c7$FC?FqdU0tjw~Mnmn;R?<*}Fky>;fSJTh`5>XZlQL5) z>7~@teWpymqm26;6YF*-r&vQ#mUS`u;~mWm7*4l^Fd$C?_5+&^03MJM{~^kYpXuSh GNy#011gh5dIPfe+7!5p#W)x6p02azDOvf$z4N1!7Es27~3nY?;_vXh6=Z-sHovl z+#?#EAPr9eM9g=$C+^}ObOjPCeX~2e^S3j*lkM(c4|mWU-@bm?;m)JxHt@0BueO(7 zuHtX{#itK1f#8{WA$=1Zc8^T=_#`;OkIP>_F9%cS-t}DE7bkDvr^*25hCqN)t}bAc z*Xj7oNiH=*bpda@H@EqyH?~=3lVMgD7`Qaf%NQ8M_Hl6BIUH^mX`UWDZ>!J^+G+gz zkXVGC>pMt6n-?VaSAjoO+F-#KX2WUS#j5vmCo;ubF}~hr7Ef&4chbSDYhHpg5r)@+ z3lb2Ea1U6L;QM>vb9kd5QIh5(nQSx*L@f=j#AfE|Kung7r98__th`S~r-*J_gli1y zE3fo51#T)?2h~OkgT<%NpJL8Zo8+}*wlknbFn@P4FyZE?!xM%F~|10qlLTUi pqa2B)pt2{bwDuoe37AgrDLCo@*8^J(fF78VwO{#)hyNxezX9{t7M%b9 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata4_115.dta b/pandas/tests/io/data/stata/stata4_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..2c68cfb393b9ec5defdbf78959bb9ea2b174a942 GIT binary patch literal 1713 zcmd5+%}yIJ5FVhEpF@S(13hrCDj{(pN}?PJ7rYP9S0LKJB(db|qV*;LaUfR?eTPcC z2#=BrPtg)FpR))@DTx*d2}|FsXZ_81>=}<+U7>Z_rgCGopx>R|U1PdC_^nq07k;j4 z>Q|hpdbQS2)uZD^gZ@q)|4rI`Yp^S?%M~o^H4za6p!FxBF$4i4y-suwK@T@X zfAnWBq9o0`oGox$*5O=iYA749a%qpJYfi!Skwd<$OZg=5quMT7wids2o3~?0vr-2L!p6> zvXX)#gb7;&1fOZbln+zNyBRaJ(|$&6GhlM&k{k~>AvT?EPKg0g)MGJP`i_KyO}`2- W!hplT<^#YExRRw$SvVi&N}d1$bO#*( literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata4_117.dta b/pandas/tests/io/data/stata/stata4_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..412c2c0d7b3569148266f331338aab76ff4b46fc GIT binary patch literal 2185 zcmd5;y>1jS5H>#n0ul;81wsg|B1H-Xx7iR05?Rh}6Cnkq;Q>~@JKvop@9t^sONan* zZ5{#9Q1KjTDJZCD)9?%snq$WH=9asJ=n5n(jc3Ll&wQS-y-T&>hNo@A8SM$)7A2#l zP=ag0qUb3ND}^@qjVMYqc=2+JhGUg(vpGV;ZAN$4bwWVgVN~&^Pz+=zP$M$|p|U=^ z6|Duypc|~NL{EZfZDVbnhT~ZpRwy#wXW-4Y_nzOq;Dk-jk3GMC(Ty)&@`j#$nsdTu z&(#&DzxMp?HK!kVPD6}JcX=s!Q;F2<_l0J^Mn6VBN5|09aAFEga}HZ$oq-Z=X(qLnc~{pBI;!BtVVI2W%q)I~m+GQ4 zN@{bGWHh1S$uR8{WyXzWOV1iB-FlewN%8hPD2y!MN6Ec<|*tV-_NwgqV)*MzGeCz{e-so+Ztaz#hxMAFZG|-VL&Cy$rX$ z+N;%UI-NX!W3Sl5mec*cB46g6zLMmH>51}8x3fE(!WQg701@aS6K+li9CV+t200Jm zDGuR6RqG)IkTu;P)xZo6?frvG8T1R8n_4nwb69(2e3@vg$4lNaqJ#@8Tbj{uYNQSC zID_{vNbpfa8g4@E77{F=En;RV35BBnh zZ;wXcqB?drK*WZwBP%)Z2;?Tv9iV$a4}cbdmVq7vVKy)YxNqDn?h-!%xF1}L$kAt! z3u5*PVtSMt!f&1WyM!s!U4R=z8+;ba2$t*=p1VGA9ieH9hxU+ zK9c>lzH0b6`)kgb`!0nBv3?$8g+Z75mpMn0Hi3 z0{0)818yiwTU!kQ=#i*lcK5{MiDYW&Jh7+z!u)@^2ocp#1IWDOT+o(q5iU9|!rG)w zbdjmZI&)R{X8Ax9V4d%+IFw2=EQM}}%4@=QiyLrtq;tf#F# z$Vwt?s2G`PM1a`PQCuA`<9%@y(-To_pp>4Dn;7dF3Js!mtYX!J{_&*V$h198YowHO zA(0yScsp3mOJTRTn7RK*S`yT$_CKx#I?A~V|(tQD8?jgoP8Z*7z8e_i5`=r~&XUck5UV%saKf~8Y7@iI0d<4GkhnrvQW7lIB zPyGhlvB{!rc+a`mPPiavMZiyF2 zj6aE7{7F3cm*_=L#z;JCLc-Al2NHp+L`_5l)|uHESXx>%CPpZev~S*f-+Mdr?Rz`B zN(l!_!8Txyg_px%4*fi@hKV6*FI)0q1NNtXT;t{fV=1ezv(jn9zR~Az#?4F!3whWg zDA4WVz0l=5;H?Hf=NA@@#cy1oUz08X7tf$MG7{I2zW^Ybh=&Vi2t^YD%8`GFiBuqp zR*`pz0$QXd=Jf+rQzCd3c?SjL-zESRB`c#HKBp^8O}{m_fE*%Nj&*yz>QWvRusHyh zfR&fdr~QL~0wk}xDu-ovs{*o(4&O2X*}hHvj~UPf6v7n>GYkW}w}0)Z#kh&tAwQOn z?bw<-HWTJ@0ya5Zt855qe^EP4W^o=ICB-=iK5W4mIjXZ8FhO?f#^Q-& zb{yMNZey-rE&`}g@Bkq5j&gw|;v!gdTm;2Q9o0p~K4UC;v==pHSKb<+Rct_9h{aYH z;sBSKGz=YaanFHja#h+ep1P!<#+s7Z>t52^{M$$|zW3PeCnmS9XsM`EcM zj7ssaCc~tp#3Gs|i7?O)+cZHFobW$S>JClo)3`@UDHj~-k&ky8%V{g@Wd}3WpKw_Q zYHy~}Jf8tb&ijdFc2}H8b|~MpjgWI|>-23R>+jk7FrYRqllPHZpRbemgX9h~?qT2& z(OKSS-6lR$)vTg$i_*aunmjpK6!O=lr(n_-fk3J6toQ`*e)9|AWA_fhK>a5z^}#MOZL(aAv%K8Ye1A- zBMO^(8sYvm0Le@`u_i-2nUOJvcTq9}(Ij4lXHjIlNThi70}~lKS%qg|0Pk%AP{WmE zB-J6mnD8WArDIbJz$&kA(}7^nTty5|S9Qo--wd8irPkq_+E%<}OU6ETz867k+a|u9 z-@EC#Fq5AM6Z}*a*dlbq+S1U`q2TKsHU+Q(;~#`M61yWmb z6aY1z(DdHEWGbD>t{lhrRGgR_SBohAok#&7_m*ivYOF=9vlfnvT-AcTREu6U zRE)l{L5I?UwNOeeE|f7LH*4D#*5an4#b*m#FV>>pFwwx^(D29&mMt`vR=d15ZjPub z0CQLik{8Y{XDqPiVWcVCe7q&n+IHgPsnhLe&UTzT-`UlD;o_ysJ)2jHIGD4Fw0M1` zRSMJw>kic)4vC;-w2;&}77wntSa=-i(sUC#|6iacn-D*^VN8<^IfhijJP6GUM4F7C zZa@q~nhI0$u$~%7!K9i_APr_!Bc&sxDlj?%JCTfJKm5;01Mzulp5%y+X(8Yo`Sc)C z$x`6}znGc*Tv*{?{x(+gH$ZkVOg;I%X&ik_0!RIRaC@;rDHqZE-35utkt`EmI{B-#FKK8$G@WgJiYzr^S z0w0+c`>+=H?5JTprKCj<&JlKAu-!XaNd7=Aag>r4m4CJ1=LV6$lEhv)ErseuVz~;B NEtiWnH$#s2{{X{R*%$x- literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata5_117.dta b/pandas/tests/io/data/stata/stata5_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..afbd3b0e0afe31bef669b8e9398c623beff57d74 GIT binary patch literal 5366 zcmeHL%WoS+7@s9b^`SyS$^p1^qtL2IDBg7fA=PMUQ{t4SgqE~C%E64iWACQBUUzrg zCLpoyt&tE9i3=}@1Fr*r00#YQ=e2>$%Zx^`0GwDT;(W)%b50K7{G5a5 zoxKQgb>6`NDbv8=x>7LnL(01f8_xaqxOu!;Hw%_MVcOOqi+9qg#d$r7kTe_~O_ujS z4jW3D0a?CB*()F)z88QXZtYRZNKVObhLyYu@;8t-K;pdU6<;8+Ps#x;vE>F$oWHZb zvd!#fc7y-@i3~oe9)M_2CwzjAV5NjC8~WcW{5s?HI{<>HUfVH3HHZk0v3+r)rWpGU z0Kx~!-0cAgi+wSQZ3_UlpQfZpQG$~wW`UnDzM2PMopfNFZ&%`K6o-T*&i<(}tEK^W zBKx8NUyj{Px-c=3+zC4xvwtetT@N^~SA?f`(4-}~lS^t*b<{zZBF}rd=^P)DvBW_? z9F%zvhZbt0nQ5V0Kb<)V)>{k*8=eKxV{yu15#O+R{k`gX-mj1g6tSrlfcHi|gGSgQ#S7hdYIy$Rt=c=?X zqwnR-ZGerS-745a-_<1k4_y!8b$x4d8a)3o9zb3B^}LLi`zRU`E-m%-bg3&|J1{83 zFp!#a>q+I>5HclYglM(q`HSa*upTvf+vz#=6BlK^PNE>x`WCnll59<)$hPuImB$pIGz z-y=W2TO5I{@QA*c!T!wXae)6ANc|>h{4*^*n-A8d)R*#GLs#UzwP*buDUW_IdQkul zFM0Ak^7ij<$oD~GShf4O{E4)6+%FatzP0b1jH~e`^mF;m6^WOXUH>#5-&FDSJPv-Y z@WMWlW+@+3ihcwx9^zc^v*Q5786+;w@;w6Q1 qZcj~G!s;qIx4K+X?T0-whV$~X1oLw;5MM1IyFSK+x5w=K)Oj2C0b$Aj literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata6.csv b/pandas/tests/io/data/stata/stata6.csv new file mode 100644 index 00000000..27a1dc64 --- /dev/null +++ b/pandas/tests/io/data/stata/stata6.csv @@ -0,0 +1,6 @@ +byte_,int_,long_,float_,double_,date_td,string_,string_1 +0,0,0,0,0,1960-01-01,"a","a" +1,1,1,1,1,3014-12-31,"ab","b" +-1,-1,-1,-1,-1,2014-12-31,"abc","c" +100,32740,-2147483647,-1.7010000002777e+38,-2.000000000000e+307,1970-01-01,"This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted by Stata. This string has 244 characters, so that ir is the maximum length permitted","d" +-127,-32767,2147483620,1.7010000002777e+38,8.000000000000e+307,1970-01-02,"abcdefghijklmnopqrstuvwxyz","e" diff --git a/pandas/tests/io/data/stata/stata6_113.dta b/pandas/tests/io/data/stata/stata6_113.dta new file mode 100644 index 0000000000000000000000000000000000000000..2e4795b167f266cf86afd73033bc1a43ae9afc5e GIT binary patch literal 2752 zcmXS9Vr1Z8U}b=S1A8YsF#?%ZsWs&c3Wf^4iA4%V28Jez%uoeT>cf$VV08@t|NjRG zf%JgDf~Bh5UB>f5omR2Sf48Q;U{rCUh7sjN@lGJzx14FPW2~Zx7f}H%k zbi5{{<>V&<&A<&(@=KF)fUdv^QWAlVEJ?vB&rn=alnHVLP5@Pa+nWpwYD}oQRW0=l z(xE~mvr`xldW}p>is9xIGr-uWwvAxk*QG32e4hvugQaCG(hRWl&i6Ug0i^E1k4|tt zSgOo0Y9KVgM#B&@PP?<<#M!}c3Y^v%p?p{(egNe&pG*g< z{{)kVx&S8M@WTo2CcK#gs%r1!La=!pbMwK~28@Okb&W{o!3~Bo5*Zm8Aa*p^Lxeuq zJE-mPNK9g2NKH%6$jr*l$<50zK-UNJ6J`hx-LOakrWz1nU})G64U7E@ALh6vCMA!! zut-jMQV&cQ4ay(>|1&OGVj<5ELH$kdkV=JiA9OYC8KPJJT!HB#1v!rt78RG2mX%jjRzY(MI4vSl}3&VdPD7`tI;j1s#T!}4{8B2iT%}=H?lmoHk zWTVysf7Y_?{ERX=Kz$eaw=e_%_%szPpJ zMP_bku0l>~UV2G}LP2U#Ze~eIYKlTqr9yB?Vo9Q&!XP&^rIE4W$^ZZLz_6%;gvFhD ihJ+MXV7f>_&Lf3I#U-U>Zp6cc4dZ z9C!c_XOQ{^Jwz%G5S1T++QzKyH8vFvqNh@r%lwaLKh5r#bt%vXy%15qu9Zuw*$z)W|04R8&iJkx7(@COOUY1LLk4bdgCiAp4jA!y&v$g`wg| zPEXXkrT-#9pnShWj}{8-Zfuf>p(4BA7n0ORE}d=n+=2iRw%~opB;WS~-@wUy_>MF$ zHm8F4?Ll}dC_#I|Py&%+*<+Im`cwRSAZMcC9RC^)X9JGHSy!YEvYV#VyASm+{zvSW zjl_QMB3XtVFNe(A|Gpa5>-*w#2d0?EE1{mMCsVE&Q51;2;Y~!w*2LuS-Yii8v?bjz zm)~1<*;%15E_;s5L2eL^ri&tw8yebV&9o~vT3T7>2CitXZE~(r zRI5102`3}?j~AtN#XI)>n!w_7gvGZSEYfK)7c_lGR$ay`A3m;C*X1P%XX&=(^>FWq Fb_Muvm<#{_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata6_117.dta b/pandas/tests/io/data/stata/stata6_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..bf4f3838e8be7d375d83dffe1ab3c06493f791f2 GIT binary patch literal 3490 zcmeHK&1)1f6wg@2UK9lHM*4A*&Td} z(2L;BKcUi-pn_*F9t!pkC@NY#bj;`OvSs^mleU53_g?bLOY-val0l3UPKrJ!7zLF0 zG{q>T5haY`eEu48t%AHdr%5Uaw`Zr2+fvaq?ghx5#%Kng0YHc|7)8XR5f-u*5D#N2 zhKj5BLVg6^s#V}{Ejs}2d~Fc9a#XZP(vWx&E%JJmGW<9DlWk|evtQ!-2^_Jg4ub4ZA!318 zdZh?*c{BcQ2rFE)??H%?N_kHUb5TNsjqb%yYI5#-5Qvy0_q!7qPeVD2uEh|#FH^Zl zy9BW$oOk-Bl|Gcm7zXeARot7T1!|x$DdpUkyA!rfI`V_z znPeq0jV|2Ld=OjyiNO;en&3D@*tFeSCNfbeASo$vnu`1!mKa9vt{Pvw51JQob5Jc1 z0X9lInzWi_nH0|dxUV98V|!Ia{+qrVTF0N2{tSFlH@9PSVESzsz2{?D@xR-4Gnj2} zY)_h+edc3W@4hW4d%drc9$974UQa7+_L_!`KpY3?H8;j})z~ ziYrOAv!7klQSyZ+_DL218tcXv>tO5U6!A*O&KD(rb6Lbiqi-wQ8R2iyr6+3{xZ@Y@ z1tEj_!2|=spt!hU^N_JCkR^}@gu^g}g2)33W3m*+wHQXUQsx1y(lib^r#|%Ra8^Fo zhv0v1)L(HLn_0FjeDPHK;_EVa;1@((`0Zz8A!WR__-LvA*qjy>OGhhTP~@rs|+wxM4);;*k`~LLur`X7#uRuWF(+^I2^JV08VNl4*&oF literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata7_115.dta b/pandas/tests/io/data/stata/stata7_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..133713b201ba8d5f3c8ff716bfa4e61638979da5 GIT binary patch literal 722 zcmXSBVq{=uU}S)RQ(-XR+Sng9taQ!|FYjQ`3KiwqeI z3=DCa1XN&zQxYTrRA3A=!2pjUaVC(Ys->PmIv!ORQmU543?ynG(FTTiSyl{dsVk6^ zn3S3W3|?5&!A!q+Ef6JYS7bYnqd9}F;2j|uNRbGx1PjAMpk9I2lNNnx2v@fk zKywuutW1D57&KXl5EPpX65bLCLpc*b_fmnW(g9nKqRQnksYF4zhuhKq0npqF)K`)6 z0mEo^y*b=4@o^nH{BZc`aI}*7{l2-HNp*+U4(}X3JA8Er)MSMbmp$G}M690%Lb0E5 ziF}WNPMHnuk>jx#3Ly!C(5Ib2Bt$dDq5%mtj1UxU6SB>oWdqfhI_W8m?b)&i2Kr}$ z`@T%MR;*gz4IU;!%u%X!MHc*E!3;osaoD@7zglc%P4zqQv*Q|(gzJoF$@xkD+nmF>^=TSBnbwI#E>8{BuFF%{~1cz zqh)qH?tA$XsDOlE5PZq4-}~+^@7}!$yG_!^6+f z8Yt9HbR2B9+jLWFS2fmHD(}f|FskRb&G=kAUso+gy^2I*adXf=-Q-nn<8yvOUndw1 z-ULb<=!E(@uljn0{*kd>D$H+=(@O-yPm30O@rLwu!?bB^fxZ}tDk1hXluc%C3 zR$l=QLVo}O(}Z&Jt3J&;N zjNRgvsY@uGFn)(Srf#7O!o)r9nRXq0B20x4Vj369CQOGBW||kuAFo|!g;@(J@VcwyQSDj+Pp;+1JzsF1MuhBu}ip(4Uk1QDh^ mp<=>v6j7#qp%TJM3^Ardp;E$X9C4;2p)vwYm`uk)<%D+)Z-evz literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata8_115.dta b/pandas/tests/io/data/stata/stata8_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..bb78368b3462b142ccd0c8c399e3a18f335a88d3 GIT binary patch literal 1624 zcmeIzIV=QG7zgm#W!?9EG}{oB%q($LbV@qi5@FreeJy{bQmIrbl}a5YjYRh~?)zT$ z81EaO&Dw;BO2kXvzVH3=e{W{qPBLS5o956;wYTQq%CJjm4f_6KG8diED78NQ30dI9W61n3RJn=+UGlT z4-9u$k(h;%sTHWH{^}y-k%K6!uhcz}rQaz2!h&x+<}ZkJWm^A|E#_8s_gWjHZB25H znB4o6v(|gA{crewCOZKIfdd0eWy5Ost|Gz`pE>uA9 l1rcQ05Go|hhY(`g6e=Ptgb`-i7Ahvdg!%wL+Yu@uyaK&w28I9t literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata8_117.dta b/pandas/tests/io/data/stata/stata8_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..fcfa7abd7b0d995add24f8504e6ae6e7d36194a0 GIT binary patch literal 2063 zcmeI!J4_To7y#e}MDPhHDq=%SG*<2{5o{+Vz7-Ffcjm@gga*xl0MEH0dG;c?~( zS2(`lmes<{e1&xmN|Ev~;r^q$%q&z{J1;kw+0I!9zhD@;*uk0WbPAX2a#Ub4krcYC zWSZAlEe*Fay$y{qt4aUduo{_JXk})86iFw|_1(hHCyKQtQQ;=V9L4(5$S;;rB84yI zQQ;@W;fkm}O0ldms#j5DCicqu9WUW@y26%&Y2owrAQ!9!*FKS_BncLJkcjx zZ6jt;&wGPTu$t|?z9Tx}TBB|3e}^77<%B6amlPGkgyyFGI|;`hjhFc;Pr3P5ax z%ZGcn!>lLeB^~MWOV_W)dW(8Gu=+1tz|8#g^%lDsVTb3~j@ItL+5at#xYxx4FP!Xf zIzBf0KH@Q)gJ{tqcDn8NmQbscMQgms)wlSo&H$`w4Df+?gJ(P0JdG)yD45>+Tj&cF;(8&Rc#zFC+>x<_kRA{nRgj*Cd8Efg#}qtYfCZ!{M8_3)i?E0kCpw|PUxFp1G|@=~av7G928gN@ zWL97W=^atEg8o%lMS4$Equ@mrvPd6@PAM2zgEgd&M5h%D<{*dknW$F5%XL^s8X`KQ z;ME3fAblk|tKjt}Y$AOlI;Y^x7HlDXC#qBMb{n>leh{5k009I@zliD;Ff%*{%$J^v Q$AsdpR{?5KweTO{HzJm*I{*Lx literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata9_115.dta b/pandas/tests/io/data/stata/stata9_115.dta new file mode 100644 index 0000000000000000000000000000000000000000..5ad6cd6a2c8ff90bd34341ce087b1e14822d00a6 GIT binary patch literal 2342 zcmeH}OK4L;6o#ihPICwgGxMK$oOvV=2r6jW z6_h}pQI~N7oOz4}j3kr00f!D`DuNZc5*#=*2)OCO5Ltr&4-pua29oA50gq;wN2F~G z5Cxyfq5KlrkV?k-%eh!8k=z+eruHOKfSav=!&&&WL}YKe+_c=-V~l;4dsi6WwVYXL zg%-Qid}JB1{Ib^gzbv<`Gk&inU^Oa?Q(mAMkEKPW!~*i|ck2gmTeFg%qH=k}r9#GJ zO`l7B8-cr2lc_U;iEaYFn_#S)Am2?;Sze$sU{(x4kqfU72)R|gdJ|U3IAs7{Y=VXZr7x}0Gm1qHg!vxEt=#^Po-?_BeAvB6t zcP93)t5IjNed*16tP>It^z@Rx==SwMWwd`l*@?FYov==2rqAl^tAWBc(PQ+;5B2{$ z{0DN_^5WZr1lc_0G%FaFPhTM$5WpQZ@+qf_KVXsWTkVd6Q1v4}&Ifedm4@_XacYL> zjHPRN*wDG_pbM{Ox4rwU;;KA1*e{7rU)-!Ye|16Y>bdut)!eM%N%3d zd%X3TBD%d$*KE8oT>r%Kz9AZM5PB(&j!x=%&VT(WeL3*M8BKI#+VH~-!@2W@)02|l zPkh=@)8k#+{-cg$sZKRC(fE|%z3YaxbB2Al9Hh(K*{B4pw*vyfJ+)hwEpLXZZF8G{ Gjs5^At5M(p literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/stata/stata9_117.dta b/pandas/tests/io/data/stata/stata9_117.dta new file mode 100644 index 0000000000000000000000000000000000000000..5ad6cd6a2c8ff90bd34341ce087b1e14822d00a6 GIT binary patch literal 2342 zcmeH}OK4L;6o#ihPICwgGxMK$oOvV=2r6jW z6_h}pQI~N7oOz4}j3kr00f!D`DuNZc5*#=*2)OCO5Ltr&4-pua29oA50gq;wN2F~G z5Cxyfq5KlrkV?k-%eh!8k=z+eruHOKfSav=!&&&WL}YKe+_c=-V~l;4dsi6WwVYXL zg%-Qid}JB1{Ib^gzbv<`Gk&inU^Oa?Q(mAMkEKPW!~*i|ck2gmTeFg%qH=k}r9#GJ zO`l7B8-cr2lc_U;iEaYFn_#S)Am2?;Sze$sU{(x4kqfU72)R|gdJ|U3IAs7{Y=VXZr7x}0Gm1qHg!vxEt=#^Po-?_BeAvB6t zcP93)t5IjNed*16tP>It^z@Rx==SwMWwd`l*@?FYov==2rqAl^tAWBc(PQ+;5B2{$ z{0DN_^5WZr1lc_0G%FaFPhTM$5WpQZ@+qf_KVXsWTkVd6Q1v4}&Ifedm4@_XacYL> zjHPRN*wDG_pbM{Ox4rwU;;KA1*e{7rU)-!Ye|16Y>bdut)!eM%N%3d zd%X3TBD%d$*KE8oT>r%Kz9AZM5PB(&j!x=%&VT(WeL3*M8BKI#+VH~-!@2W@)02|l zPkh=@)8k#+{-cg$sZKRC(fE|%z3YaxbB2Al9Hh(K*{B4pw*vyfJ+)hwEpLXZZF8G{ Gjs5^At5M(p literal 0 HcmV?d00001 diff --git a/pandas/tests/io/excel/__init__.py b/pandas/tests/io/excel/__init__.py new file mode 100644 index 00000000..55017232 --- /dev/null +++ b/pandas/tests/io/excel/__init__.py @@ -0,0 +1,6 @@ +import pytest + +pytestmark = pytest.mark.filterwarnings( + # Looks like tree.getiterator is deprecated in favor of tree.iter + "ignore:This method will be removed in future versions:PendingDeprecationWarning" +) diff --git a/pandas/tests/io/excel/conftest.py b/pandas/tests/io/excel/conftest.py new file mode 100644 index 00000000..0455e0d6 --- /dev/null +++ b/pandas/tests/io/excel/conftest.py @@ -0,0 +1,65 @@ +import pytest + +import pandas.util._test_decorators as td + +import pandas._testing as tm + +from pandas.io.parsers import read_csv + + +@pytest.fixture +def frame(float_frame): + """ + Returns the first ten items in fixture "float_frame". + """ + return float_frame[:10] + + +@pytest.fixture +def tsframe(): + return tm.makeTimeDataFrame()[:5] + + +@pytest.fixture(params=[True, False]) +def merge_cells(request): + return request.param + + +@pytest.fixture +def df_ref(datapath): + """ + Obtain the reference data from read_csv with the Python engine. + """ + filepath = datapath("io", "data", "csv", "test1.csv") + df_ref = read_csv(filepath, index_col=0, parse_dates=True, engine="python") + return df_ref + + +@pytest.fixture(params=[".xls", ".xlsx", ".xlsm", ".ods", ".xlsb"]) +def read_ext(request): + """ + Valid extensions for reading Excel files. + """ + return request.param + + +@pytest.fixture(autouse=True) +def check_for_file_leaks(): + """ + Fixture to run around every test to ensure that we are not leaking files. + + See also + -------- + _test_decorators.check_file_leaks + """ + # GH#30162 + psutil = td.safe_import("psutil") + if not psutil: + yield + + else: + proc = psutil.Process() + flist = proc.open_files() + yield + flist2 = proc.open_files() + assert flist == flist2 diff --git a/pandas/tests/io/excel/test_odf.py b/pandas/tests/io/excel/test_odf.py new file mode 100644 index 00000000..b9a3e8b5 --- /dev/null +++ b/pandas/tests/io/excel/test_odf.py @@ -0,0 +1,46 @@ +import functools + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +pytest.importorskip("odf") + + +@pytest.fixture(autouse=True) +def cd_and_set_engine(monkeypatch, datapath): + func = functools.partial(pd.read_excel, engine="odf") + monkeypatch.setattr(pd, "read_excel", func) + monkeypatch.chdir(datapath("io", "data", "excel")) + + +def test_read_invalid_types_raises(): + # the invalid_value_type.ods required manually editing + # of the included content.xml file + with pytest.raises(ValueError, match="Unrecognized type awesome_new_type"): + pd.read_excel("invalid_value_type.ods") + + +def test_read_writer_table(): + # Also test reading tables from an text OpenDocument file + # (.odt) + index = pd.Index(["Row 1", "Row 2", "Row 3"], name="Header") + expected = pd.DataFrame( + [[1, np.nan, 7], [2, np.nan, 8], [3, np.nan, 9]], + index=index, + columns=["Column 1", "Unnamed: 2", "Column 3"], + ) + + result = pd.read_excel("writertable.odt", "Table1", index_col=0) + + tm.assert_frame_equal(result, expected) + + +def test_nonexistent_sheetname_raises(read_ext): + # GH-27676 + # Specifying a non-existent sheet_name parameter should throw an error + # with the sheet name. + with pytest.raises(ValueError, match="sheet xyz not found"): + pd.read_excel("blank.ods", sheet_name="xyz") diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py new file mode 100644 index 00000000..10ed1920 --- /dev/null +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -0,0 +1,124 @@ +import os + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.excel import ExcelWriter, _OpenpyxlWriter + +openpyxl = pytest.importorskip("openpyxl") + +pytestmark = pytest.mark.parametrize("ext", [".xlsx"]) + + +def test_to_excel_styleconverter(ext): + from openpyxl import styles + + hstyle = { + "font": {"color": "00FF0000", "bold": True}, + "borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"}, + "alignment": {"horizontal": "center", "vertical": "top"}, + "fill": {"patternType": "solid", "fgColor": {"rgb": "006666FF", "tint": 0.3}}, + "number_format": {"format_code": "0.00"}, + "protection": {"locked": True, "hidden": False}, + } + + font_color = styles.Color("00FF0000") + font = styles.Font(bold=True, color=font_color) + side = styles.Side(style=styles.borders.BORDER_THIN) + border = styles.Border(top=side, right=side, bottom=side, left=side) + alignment = styles.Alignment(horizontal="center", vertical="top") + fill_color = styles.Color(rgb="006666FF", tint=0.3) + fill = styles.PatternFill(patternType="solid", fgColor=fill_color) + + number_format = "0.00" + + protection = styles.Protection(locked=True, hidden=False) + + kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle) + assert kw["font"] == font + assert kw["border"] == border + assert kw["alignment"] == alignment + assert kw["fill"] == fill + assert kw["number_format"] == number_format + assert kw["protection"] == protection + + +def test_write_cells_merge_styled(ext): + from pandas.io.formats.excel import ExcelCell + + sheet_name = "merge_styled" + + sty_b1 = {"font": {"color": "00FF0000"}} + sty_a2 = {"font": {"color": "0000FF00"}} + + initial_cells = [ + ExcelCell(col=1, row=0, val=42, style=sty_b1), + ExcelCell(col=0, row=1, val=99, style=sty_a2), + ] + + sty_merged = {"font": {"color": "000000FF", "bold": True}} + sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged) + openpyxl_sty_merged = sty_kwargs["font"] + merge_cells = [ + ExcelCell( + col=0, row=0, val="pandas", mergestart=1, mergeend=1, style=sty_merged + ) + ] + + with tm.ensure_clean(ext) as path: + writer = _OpenpyxlWriter(path) + writer.write_cells(initial_cells, sheet_name=sheet_name) + writer.write_cells(merge_cells, sheet_name=sheet_name) + + wks = writer.sheets[sheet_name] + xcell_b1 = wks["B1"] + xcell_a2 = wks["A2"] + assert xcell_b1.font == openpyxl_sty_merged + assert xcell_a2.font == openpyxl_sty_merged + + +@pytest.mark.parametrize( + "mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])] +) +def test_write_append_mode(ext, mode, expected): + df = DataFrame([1], columns=["baz"]) + + with tm.ensure_clean(ext) as f: + wb = openpyxl.Workbook() + wb.worksheets[0].title = "foo" + wb.worksheets[0]["A1"].value = "foo" + wb.create_sheet("bar") + wb.worksheets[1]["A1"].value = "bar" + wb.save(f) + + writer = ExcelWriter(f, engine="openpyxl", mode=mode) + df.to_excel(writer, sheet_name="baz", index=False) + writer.save() + + wb2 = openpyxl.load_workbook(f) + result = [sheet.title for sheet in wb2.worksheets] + assert result == expected + + for index, cell_value in enumerate(expected): + assert wb2.worksheets[index]["A1"].value == cell_value + + +def test_to_excel_with_openpyxl_engine(ext, tmpdir): + # GH 29854 + # TODO: Fix this once newer version of openpyxl fixes the bug + df1 = DataFrame({"A": np.linspace(1, 10, 10)}) + df2 = DataFrame({"B": np.linspace(1, 20, 10)}) + df = pd.concat([df1, df2], axis=1) + styled = df.style.applymap( + lambda val: "color: %s" % "red" if val < 0 else "black" + ).highlight_max() + + filename = tmpdir / "styled.xlsx" + styled.to_excel(filename, engine="openpyxl") + + assert filename.exists() + os.remove(filename) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py new file mode 100644 index 00000000..140cb80d --- /dev/null +++ b/pandas/tests/io/excel/test_readers.py @@ -0,0 +1,1056 @@ +from collections import OrderedDict +import contextlib +from datetime import datetime, time +from functools import partial +import os +from urllib.error import URLError +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series +import pandas._testing as tm + + +@contextlib.contextmanager +def ignore_xlrd_time_clock_warning(): + """ + Context manager to ignore warnings raised by the xlrd library, + regarding the deprecation of `time.clock` in Python 3.7. + """ + with warnings.catch_warnings(): + warnings.filterwarnings( + action="ignore", + message="time.clock has been deprecated", + category=DeprecationWarning, + ) + yield + + +read_ext_params = [".xls", ".xlsx", ".xlsm", ".xlsb", ".ods"] +engine_params = [ + # Add any engines to test here + # When defusedxml is installed it triggers deprecation warnings for + # xlrd and openpyxl, so catch those here + pytest.param( + "xlrd", + marks=[ + td.skip_if_no("xlrd"), + pytest.mark.filterwarnings("ignore:.*(tree\\.iter|html argument)"), + ], + ), + pytest.param( + "openpyxl", + marks=[ + td.skip_if_no("openpyxl"), + pytest.mark.filterwarnings("ignore:.*html argument"), + ], + ), + pytest.param( + None, + marks=[ + td.skip_if_no("xlrd"), + pytest.mark.filterwarnings("ignore:.*(tree\\.iter|html argument)"), + ], + ), + pytest.param("pyxlsb", marks=td.skip_if_no("pyxlsb")), + pytest.param("odf", marks=td.skip_if_no("odf")), +] + + +def _is_valid_engine_ext_pair(engine, read_ext: str) -> bool: + """ + Filter out invalid (engine, ext) pairs instead of skipping, as that + produces 500+ pytest.skips. + """ + engine = engine.values[0] + if engine == "openpyxl" and read_ext == ".xls": + return False + if engine == "odf" and read_ext != ".ods": + return False + if read_ext == ".ods" and engine != "odf": + return False + if engine == "pyxlsb" and read_ext != ".xlsb": + return False + if read_ext == ".xlsb" and engine != "pyxlsb": + return False + return True + + +def _transfer_marks(engine, read_ext): + """ + engine gives us a pytest.param objec with some marks, read_ext is just + a string. We need to generate a new pytest.param inheriting the marks. + """ + values = engine.values + (read_ext,) + new_param = pytest.param(values, marks=engine.marks) + return new_param + + +@pytest.fixture( + autouse=True, + params=[ + _transfer_marks(eng, ext) + for eng in engine_params + for ext in read_ext_params + if _is_valid_engine_ext_pair(eng, ext) + ], +) +def engine_and_read_ext(request): + """ + Fixture for Excel reader engine and read_ext, only including valid pairs. + """ + return request.param + + +@pytest.fixture +def engine(engine_and_read_ext): + engine, read_ext = engine_and_read_ext + return engine + + +@pytest.fixture +def read_ext(engine_and_read_ext): + engine, read_ext = engine_and_read_ext + return read_ext + + +class TestReaders: + @pytest.fixture(autouse=True) + def cd_and_set_engine(self, engine, datapath, monkeypatch): + """ + Change directory and set engine for read_excel calls. + """ + func = partial(pd.read_excel, engine=engine) + monkeypatch.chdir(datapath("io", "data", "excel")) + monkeypatch.setattr(pd, "read_excel", func) + + def test_usecols_int(self, read_ext, df_ref): + df_ref = df_ref.reindex(columns=["A", "B", "C"]) + + # usecols as int + msg = "Passing an integer for `usecols`" + with pytest.raises(ValueError, match=msg): + with ignore_xlrd_time_clock_warning(): + pd.read_excel("test1" + read_ext, "Sheet1", index_col=0, usecols=3) + + # usecols as int + with pytest.raises(ValueError, match=msg): + with ignore_xlrd_time_clock_warning(): + pd.read_excel( + "test1" + read_ext, "Sheet2", skiprows=[1], index_col=0, usecols=3 + ) + + def test_usecols_list(self, read_ext, df_ref): + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + df_ref = df_ref.reindex(columns=["B", "C"]) + df1 = pd.read_excel( + "test1" + read_ext, "Sheet1", index_col=0, usecols=[0, 2, 3] + ) + df2 = pd.read_excel( + "test1" + read_ext, "Sheet2", skiprows=[1], index_col=0, usecols=[0, 2, 3] + ) + + # TODO add index to xls file) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + def test_usecols_str(self, read_ext, df_ref): + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + df1 = df_ref.reindex(columns=["A", "B", "C"]) + df2 = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0, usecols="A:D") + df3 = pd.read_excel( + "test1" + read_ext, "Sheet2", skiprows=[1], index_col=0, usecols="A:D" + ) + + # TODO add index to xls, read xls ignores index name ? + tm.assert_frame_equal(df2, df1, check_names=False) + tm.assert_frame_equal(df3, df1, check_names=False) + + df1 = df_ref.reindex(columns=["B", "C"]) + df2 = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0, usecols="A,C,D") + df3 = pd.read_excel( + "test1" + read_ext, "Sheet2", skiprows=[1], index_col=0, usecols="A,C,D" + ) + # TODO add index to xls file + tm.assert_frame_equal(df2, df1, check_names=False) + tm.assert_frame_equal(df3, df1, check_names=False) + + df1 = df_ref.reindex(columns=["B", "C"]) + df2 = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0, usecols="A,C:D") + df3 = pd.read_excel( + "test1" + read_ext, "Sheet2", skiprows=[1], index_col=0, usecols="A,C:D" + ) + tm.assert_frame_equal(df2, df1, check_names=False) + tm.assert_frame_equal(df3, df1, check_names=False) + + @pytest.mark.parametrize( + "usecols", [[0, 1, 3], [0, 3, 1], [1, 0, 3], [1, 3, 0], [3, 0, 1], [3, 1, 0]] + ) + def test_usecols_diff_positional_int_columns_order(self, read_ext, usecols, df_ref): + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + expected = df_ref[["A", "C"]] + result = pd.read_excel( + "test1" + read_ext, "Sheet1", index_col=0, usecols=usecols + ) + tm.assert_frame_equal(result, expected, check_names=False) + + @pytest.mark.parametrize("usecols", [["B", "D"], ["D", "B"]]) + def test_usecols_diff_positional_str_columns_order(self, read_ext, usecols, df_ref): + expected = df_ref[["B", "D"]] + expected.index = range(len(expected)) + + result = pd.read_excel("test1" + read_ext, "Sheet1", usecols=usecols) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_read_excel_without_slicing(self, read_ext, df_ref): + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + expected = df_ref + result = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_usecols_excel_range_str(self, read_ext, df_ref): + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + expected = df_ref[["C", "D"]] + result = pd.read_excel( + "test1" + read_ext, "Sheet1", index_col=0, usecols="A,D:E" + ) + tm.assert_frame_equal(result, expected, check_names=False) + + def test_usecols_excel_range_str_invalid(self, read_ext): + msg = "Invalid column name: E1" + + with pytest.raises(ValueError, match=msg): + pd.read_excel("test1" + read_ext, "Sheet1", usecols="D:E1") + + def test_index_col_label_error(self, read_ext): + msg = "list indices must be integers.*, not str" + + with pytest.raises(TypeError, match=msg): + pd.read_excel( + "test1" + read_ext, "Sheet1", index_col=["A"], usecols=["A", "C"] + ) + + def test_index_col_empty(self, read_ext): + # see gh-9208 + result = pd.read_excel("test1" + read_ext, "Sheet3", index_col=["A", "B", "C"]) + expected = DataFrame( + columns=["D", "E", "F"], + index=MultiIndex(levels=[[]] * 3, codes=[[]] * 3, names=["A", "B", "C"]), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("index_col", [None, 2]) + def test_index_col_with_unnamed(self, read_ext, index_col): + # see gh-18792 + result = pd.read_excel("test1" + read_ext, "Sheet4", index_col=index_col) + expected = DataFrame( + [["i1", "a", "x"], ["i2", "b", "y"]], columns=["Unnamed: 0", "col1", "col2"] + ) + if index_col: + expected = expected.set_index(expected.columns[index_col]) + + tm.assert_frame_equal(result, expected) + + def test_usecols_pass_non_existent_column(self, read_ext): + msg = ( + "Usecols do not match columns, " + "columns expected but not found: " + r"\['E'\]" + ) + + with pytest.raises(ValueError, match=msg): + pd.read_excel("test1" + read_ext, usecols=["E"]) + + def test_usecols_wrong_type(self, read_ext): + msg = ( + "'usecols' must either be list-like of " + "all strings, all unicode, all integers or a callable." + ) + + with pytest.raises(ValueError, match=msg): + pd.read_excel("test1" + read_ext, usecols=["E1", 0]) + + def test_excel_stop_iterator(self, read_ext): + + parsed = pd.read_excel("test2" + read_ext, "Sheet1") + expected = DataFrame([["aaaa", "bbbbb"]], columns=["Test", "Test1"]) + tm.assert_frame_equal(parsed, expected) + + def test_excel_cell_error_na(self, read_ext): + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + parsed = pd.read_excel("test3" + read_ext, "Sheet1") + expected = DataFrame([[np.nan]], columns=["Test"]) + tm.assert_frame_equal(parsed, expected) + + def test_excel_table(self, read_ext, df_ref): + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + df1 = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0) + df2 = pd.read_excel("test1" + read_ext, "Sheet2", skiprows=[1], index_col=0) + # TODO add index to file + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + df3 = pd.read_excel("test1" + read_ext, "Sheet1", index_col=0, skipfooter=1) + tm.assert_frame_equal(df3, df1.iloc[:-1]) + + def test_reader_special_dtypes(self, read_ext): + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + expected = DataFrame.from_dict( + OrderedDict( + [ + ("IntCol", [1, 2, -3, 4, 0]), + ("FloatCol", [1.25, 2.25, 1.83, 1.92, 0.0000000005]), + ("BoolCol", [True, False, True, True, False]), + ("StrCol", [1, 2, 3, 4, 5]), + # GH5394 - this is why convert_float isn't vectorized + ("Str2Col", ["a", 3, "c", "d", "e"]), + ( + "DateCol", + [ + datetime(2013, 10, 30), + datetime(2013, 10, 31), + datetime(1905, 1, 1), + datetime(2013, 12, 14), + datetime(2015, 3, 14), + ], + ), + ] + ) + ) + basename = "test_types" + + # should read in correctly and infer types + actual = pd.read_excel(basename + read_ext, "Sheet1") + tm.assert_frame_equal(actual, expected) + + # if not coercing number, then int comes in as float + float_expected = expected.copy() + float_expected["IntCol"] = float_expected["IntCol"].astype(float) + float_expected.loc[float_expected.index[1], "Str2Col"] = 3.0 + actual = pd.read_excel(basename + read_ext, "Sheet1", convert_float=False) + tm.assert_frame_equal(actual, float_expected) + + # check setting Index (assuming xls and xlsx are the same here) + for icol, name in enumerate(expected.columns): + actual = pd.read_excel(basename + read_ext, "Sheet1", index_col=icol) + exp = expected.set_index(name) + tm.assert_frame_equal(actual, exp) + + # convert_float and converters should be different but both accepted + expected["StrCol"] = expected["StrCol"].apply(str) + actual = pd.read_excel( + basename + read_ext, "Sheet1", converters={"StrCol": str} + ) + tm.assert_frame_equal(actual, expected) + + no_convert_float = float_expected.copy() + no_convert_float["StrCol"] = no_convert_float["StrCol"].apply(str) + actual = pd.read_excel( + basename + read_ext, + "Sheet1", + convert_float=False, + converters={"StrCol": str}, + ) + tm.assert_frame_equal(actual, no_convert_float) + + # GH8212 - support for converters and missing values + def test_reader_converters(self, read_ext): + + basename = "test_converters" + + expected = DataFrame.from_dict( + OrderedDict( + [ + ("IntCol", [1, 2, -3, -1000, 0]), + ("FloatCol", [12.5, np.nan, 18.3, 19.2, 0.000000005]), + ("BoolCol", ["Found", "Found", "Found", "Not found", "Found"]), + ("StrCol", ["1", np.nan, "3", "4", "5"]), + ] + ) + ) + + converters = { + "IntCol": lambda x: int(x) if x != "" else -1000, + "FloatCol": lambda x: 10 * x if x else np.nan, + 2: lambda x: "Found" if x != "" else "Not found", + 3: lambda x: str(x) if x else "", + } + + # should read in correctly and set types of single cells (not array + # dtypes) + actual = pd.read_excel(basename + read_ext, "Sheet1", converters=converters) + tm.assert_frame_equal(actual, expected) + + def test_reader_dtype(self, read_ext): + # GH 8212 + basename = "testdtype" + actual = pd.read_excel(basename + read_ext) + + expected = DataFrame( + { + "a": [1, 2, 3, 4], + "b": [2.5, 3.5, 4.5, 5.5], + "c": [1, 2, 3, 4], + "d": [1.0, 2.0, np.nan, 4.0], + } + ).reindex(columns=["a", "b", "c", "d"]) + + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel( + basename + read_ext, dtype={"a": "float64", "b": "float32", "c": str} + ) + + expected["a"] = expected["a"].astype("float64") + expected["b"] = expected["b"].astype("float32") + expected["c"] = ["001", "002", "003", "004"] + tm.assert_frame_equal(actual, expected) + + with pytest.raises(ValueError): + pd.read_excel(basename + read_ext, dtype={"d": "int64"}) + + @pytest.mark.parametrize( + "dtype,expected", + [ + ( + None, + DataFrame( + { + "a": [1, 2, 3, 4], + "b": [2.5, 3.5, 4.5, 5.5], + "c": [1, 2, 3, 4], + "d": [1.0, 2.0, np.nan, 4.0], + } + ), + ), + ( + {"a": "float64", "b": "float32", "c": str, "d": str}, + DataFrame( + { + "a": Series([1, 2, 3, 4], dtype="float64"), + "b": Series([2.5, 3.5, 4.5, 5.5], dtype="float32"), + "c": ["001", "002", "003", "004"], + "d": ["1", "2", np.nan, "4"], + } + ), + ), + ], + ) + def test_reader_dtype_str(self, read_ext, dtype, expected): + # see gh-20377 + basename = "testdtype" + + actual = pd.read_excel(basename + read_ext, dtype=dtype) + tm.assert_frame_equal(actual, expected) + + def test_reading_all_sheets(self, read_ext): + # Test reading all sheetnames by setting sheetname to None, + # Ensure a dict is returned. + # See PR #9450 + basename = "test_multisheet" + dfs = pd.read_excel(basename + read_ext, sheet_name=None) + # ensure this is not alphabetical to test order preservation + expected_keys = ["Charlie", "Alpha", "Beta"] + tm.assert_contains_all(expected_keys, dfs.keys()) + # Issue 9930 + # Ensure sheet order is preserved + assert expected_keys == list(dfs.keys()) + + def test_reading_multiple_specific_sheets(self, read_ext): + # Test reading specific sheetnames by specifying a mixed list + # of integers and strings, and confirm that duplicated sheet + # references (positions/names) are removed properly. + # Ensure a dict is returned + # See PR #9450 + basename = "test_multisheet" + # Explicitly request duplicates. Only the set should be returned. + expected_keys = [2, "Charlie", "Charlie"] + dfs = pd.read_excel(basename + read_ext, sheet_name=expected_keys) + expected_keys = list(set(expected_keys)) + tm.assert_contains_all(expected_keys, dfs.keys()) + assert len(expected_keys) == len(dfs.keys()) + + def test_reading_all_sheets_with_blank(self, read_ext): + # Test reading all sheetnames by setting sheetname to None, + # In the case where some sheets are blank. + # Issue #11711 + basename = "blank_with_header" + dfs = pd.read_excel(basename + read_ext, sheet_name=None) + expected_keys = ["Sheet1", "Sheet2", "Sheet3"] + tm.assert_contains_all(expected_keys, dfs.keys()) + + # GH6403 + def test_read_excel_blank(self, read_ext): + actual = pd.read_excel("blank" + read_ext, "Sheet1") + tm.assert_frame_equal(actual, DataFrame()) + + def test_read_excel_blank_with_header(self, read_ext): + expected = DataFrame(columns=["col_1", "col_2"]) + actual = pd.read_excel("blank_with_header" + read_ext, "Sheet1") + tm.assert_frame_equal(actual, expected) + + def test_date_conversion_overflow(self, read_ext): + # GH 10001 : pandas.ExcelFile ignore parse_dates=False + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + expected = pd.DataFrame( + [ + [pd.Timestamp("2016-03-12"), "Marc Johnson"], + [pd.Timestamp("2016-03-16"), "Jack Black"], + [1e20, "Timothy Brown"], + ], + columns=["DateColWithBigInt", "StringCol"], + ) + + if pd.read_excel.keywords["engine"] == "openpyxl": + pytest.xfail("Maybe not supported by openpyxl") + + result = pd.read_excel("testdateoverflow" + read_ext) + tm.assert_frame_equal(result, expected) + + def test_sheet_name(self, read_ext, df_ref): + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + filename = "test1" + sheet_name = "Sheet1" + + if pd.read_excel.keywords["engine"] == "openpyxl": + pytest.xfail("Maybe not supported by openpyxl") + + df1 = pd.read_excel( + filename + read_ext, sheet_name=sheet_name, index_col=0 + ) # doc + with ignore_xlrd_time_clock_warning(): + df2 = pd.read_excel(filename + read_ext, index_col=0, sheet_name=sheet_name) + + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + def test_excel_read_buffer(self, read_ext): + + pth = "test1" + read_ext + expected = pd.read_excel(pth, "Sheet1", index_col=0) + with open(pth, "rb") as f: + actual = pd.read_excel(f, "Sheet1", index_col=0) + tm.assert_frame_equal(expected, actual) + + def test_bad_engine_raises(self, read_ext): + bad_engine = "foo" + with pytest.raises(ValueError, match="Unknown engine: foo"): + pd.read_excel("", engine=bad_engine) + + @tm.network + def test_read_from_http_url(self, read_ext): + url = ( + "https://raw.githubusercontent.com/pandas-dev/pandas/master/" + "pandas/tests/io/data/excel/test1" + read_ext + ) + url_table = pd.read_excel(url) + local_table = pd.read_excel("test1" + read_ext) + tm.assert_frame_equal(url_table, local_table) + + @td.skip_if_not_us_locale + def test_read_from_s3_url(self, read_ext, s3_resource): + # Bucket "pandas-test" created in tests/io/conftest.py + with open("test1" + read_ext, "rb") as f: + s3_resource.Bucket("pandas-test").put_object(Key="test1" + read_ext, Body=f) + + url = "s3://pandas-test/test1" + read_ext + url_table = pd.read_excel(url) + local_table = pd.read_excel("test1" + read_ext) + tm.assert_frame_equal(url_table, local_table) + + @pytest.mark.slow + # ignore warning from old xlrd + @pytest.mark.filterwarnings("ignore:This metho:PendingDeprecationWarning") + def test_read_from_file_url(self, read_ext, datapath): + + # FILE + localtable = os.path.join(datapath("io", "data", "excel"), "test1" + read_ext) + local_table = pd.read_excel(localtable) + + try: + url_table = pd.read_excel("file://localhost/" + localtable) + except URLError: + # fails on some systems + import platform + + pytest.skip("failing on {}".format(" ".join(platform.uname()).strip())) + + tm.assert_frame_equal(url_table, local_table) + + def test_read_from_pathlib_path(self, read_ext): + + # GH12655 + from pathlib import Path + + str_path = "test1" + read_ext + expected = pd.read_excel(str_path, "Sheet1", index_col=0) + + path_obj = Path("test1" + read_ext) + actual = pd.read_excel(path_obj, "Sheet1", index_col=0) + + tm.assert_frame_equal(expected, actual) + + @td.skip_if_no("py.path") + @td.check_file_leaks + def test_read_from_py_localpath(self, read_ext): + + # GH12655 + from py.path import local as LocalPath + + str_path = os.path.join("test1" + read_ext) + expected = pd.read_excel(str_path, "Sheet1", index_col=0) + + path_obj = LocalPath().join("test1" + read_ext) + actual = pd.read_excel(path_obj, "Sheet1", index_col=0) + + tm.assert_frame_equal(expected, actual) + + @td.check_file_leaks + def test_close_from_py_localpath(self, read_ext): + + # GH31467 + str_path = os.path.join("test1" + read_ext) + with open(str_path, "rb") as f: + x = pd.read_excel(f, "Sheet1", index_col=0) + del x + # should not throw an exception because the passed file was closed + f.read() + + def test_reader_seconds(self, read_ext): + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + # Test reading times with and without milliseconds. GH5945. + expected = DataFrame.from_dict( + { + "Time": [ + time(1, 2, 3), + time(2, 45, 56, 100000), + time(4, 29, 49, 200000), + time(6, 13, 42, 300000), + time(7, 57, 35, 400000), + time(9, 41, 28, 500000), + time(11, 25, 21, 600000), + time(13, 9, 14, 700000), + time(14, 53, 7, 800000), + time(16, 37, 0, 900000), + time(18, 20, 54), + ] + } + ) + + actual = pd.read_excel("times_1900" + read_ext, "Sheet1") + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel("times_1904" + read_ext, "Sheet1") + tm.assert_frame_equal(actual, expected) + + def test_read_excel_multiindex(self, read_ext): + # see gh-4679 + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]]) + mi_file = "testmultiindex" + read_ext + + # "mi_column" sheet + expected = DataFrame( + [ + [1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True], + ], + columns=mi, + ) + + actual = pd.read_excel(mi_file, "mi_column", header=[0, 1], index_col=0) + tm.assert_frame_equal(actual, expected) + + # "mi_index" sheet + expected.index = mi + expected.columns = ["a", "b", "c", "d"] + + actual = pd.read_excel(mi_file, "mi_index", index_col=[0, 1]) + tm.assert_frame_equal(actual, expected, check_names=False) + + # "both" sheet + expected.columns = mi + + actual = pd.read_excel(mi_file, "both", index_col=[0, 1], header=[0, 1]) + tm.assert_frame_equal(actual, expected, check_names=False) + + # "mi_index_name" sheet + expected.columns = ["a", "b", "c", "d"] + expected.index = mi.set_names(["ilvl1", "ilvl2"]) + + actual = pd.read_excel(mi_file, "mi_index_name", index_col=[0, 1]) + tm.assert_frame_equal(actual, expected) + + # "mi_column_name" sheet + expected.index = list(range(4)) + expected.columns = mi.set_names(["c1", "c2"]) + actual = pd.read_excel(mi_file, "mi_column_name", header=[0, 1], index_col=0) + tm.assert_frame_equal(actual, expected) + + # see gh-11317 + # "name_with_int" sheet + expected.columns = mi.set_levels([1, 2], level=1).set_names(["c1", "c2"]) + + actual = pd.read_excel(mi_file, "name_with_int", index_col=0, header=[0, 1]) + tm.assert_frame_equal(actual, expected) + + # "both_name" sheet + expected.columns = mi.set_names(["c1", "c2"]) + expected.index = mi.set_names(["ilvl1", "ilvl2"]) + + actual = pd.read_excel(mi_file, "both_name", index_col=[0, 1], header=[0, 1]) + tm.assert_frame_equal(actual, expected) + + # "both_skiprows" sheet + actual = pd.read_excel( + mi_file, "both_name_skiprows", index_col=[0, 1], header=[0, 1], skiprows=2 + ) + tm.assert_frame_equal(actual, expected) + + def test_read_excel_multiindex_header_only(self, read_ext): + # see gh-11733. + # + # Don't try to parse a header name if there isn't one. + mi_file = "testmultiindex" + read_ext + result = pd.read_excel(mi_file, "index_col_none", header=[0, 1]) + + exp_columns = MultiIndex.from_product([("A", "B"), ("key", "val")]) + expected = DataFrame([[1, 2, 3, 4]] * 2, columns=exp_columns) + tm.assert_frame_equal(result, expected) + + def test_excel_old_index_format(self, read_ext): + # see gh-4679 + filename = "test_index_name_pre17" + read_ext + + # We detect headers to determine if index names exist, so + # that "index" name in the "names" version of the data will + # now be interpreted as rows that include null data. + data = np.array( + [ + [None, None, None, None, None], + ["R0C0", "R0C1", "R0C2", "R0C3", "R0C4"], + ["R1C0", "R1C1", "R1C2", "R1C3", "R1C4"], + ["R2C0", "R2C1", "R2C2", "R2C3", "R2C4"], + ["R3C0", "R3C1", "R3C2", "R3C3", "R3C4"], + ["R4C0", "R4C1", "R4C2", "R4C3", "R4C4"], + ] + ) + columns = ["C_l0_g0", "C_l0_g1", "C_l0_g2", "C_l0_g3", "C_l0_g4"] + mi = MultiIndex( + levels=[ + ["R0", "R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], + ["R1", "R_l1_g0", "R_l1_g1", "R_l1_g2", "R_l1_g3", "R_l1_g4"], + ], + codes=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4, 5]], + names=[None, None], + ) + si = Index( + ["R0", "R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], name=None + ) + + expected = pd.DataFrame(data, index=si, columns=columns) + + actual = pd.read_excel(filename, "single_names", index_col=0) + tm.assert_frame_equal(actual, expected) + + expected.index = mi + + actual = pd.read_excel(filename, "multi_names", index_col=[0, 1]) + tm.assert_frame_equal(actual, expected) + + # The analogous versions of the "names" version data + # where there are explicitly no names for the indices. + data = np.array( + [ + ["R0C0", "R0C1", "R0C2", "R0C3", "R0C4"], + ["R1C0", "R1C1", "R1C2", "R1C3", "R1C4"], + ["R2C0", "R2C1", "R2C2", "R2C3", "R2C4"], + ["R3C0", "R3C1", "R3C2", "R3C3", "R3C4"], + ["R4C0", "R4C1", "R4C2", "R4C3", "R4C4"], + ] + ) + columns = ["C_l0_g0", "C_l0_g1", "C_l0_g2", "C_l0_g3", "C_l0_g4"] + mi = MultiIndex( + levels=[ + ["R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], + ["R_l1_g0", "R_l1_g1", "R_l1_g2", "R_l1_g3", "R_l1_g4"], + ], + codes=[[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], + names=[None, None], + ) + si = Index(["R_l0_g0", "R_l0_g1", "R_l0_g2", "R_l0_g3", "R_l0_g4"], name=None) + + expected = pd.DataFrame(data, index=si, columns=columns) + + actual = pd.read_excel(filename, "single_no_names", index_col=0) + tm.assert_frame_equal(actual, expected) + + expected.index = mi + + actual = pd.read_excel(filename, "multi_no_names", index_col=[0, 1]) + tm.assert_frame_equal(actual, expected, check_names=False) + + def test_read_excel_bool_header_arg(self, read_ext): + # GH 6114 + for arg in [True, False]: + with pytest.raises(TypeError): + pd.read_excel("test1" + read_ext, header=arg) + + def test_read_excel_chunksize(self, read_ext): + # GH 8011 + with pytest.raises(NotImplementedError): + pd.read_excel("test1" + read_ext, chunksize=100) + + def test_read_excel_skiprows_list(self, read_ext): + # GH 4903 + if pd.read_excel.keywords["engine"] == "pyxlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + actual = pd.read_excel( + "testskiprows" + read_ext, "skiprows_list", skiprows=[0, 2] + ) + expected = DataFrame( + [ + [1, 2.5, pd.Timestamp("2015-01-01"), True], + [2, 3.5, pd.Timestamp("2015-01-02"), False], + [3, 4.5, pd.Timestamp("2015-01-03"), False], + [4, 5.5, pd.Timestamp("2015-01-04"), True], + ], + columns=["a", "b", "c", "d"], + ) + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel( + "testskiprows" + read_ext, "skiprows_list", skiprows=np.array([0, 2]) + ) + tm.assert_frame_equal(actual, expected) + + def test_read_excel_nrows(self, read_ext): + # GH 16645 + num_rows_to_pull = 5 + actual = pd.read_excel("test1" + read_ext, nrows=num_rows_to_pull) + expected = pd.read_excel("test1" + read_ext) + expected = expected[:num_rows_to_pull] + tm.assert_frame_equal(actual, expected) + + def test_read_excel_nrows_greater_than_nrows_in_file(self, read_ext): + # GH 16645 + expected = pd.read_excel("test1" + read_ext) + num_records_in_file = len(expected) + num_rows_to_pull = num_records_in_file + 10 + actual = pd.read_excel("test1" + read_ext, nrows=num_rows_to_pull) + tm.assert_frame_equal(actual, expected) + + def test_read_excel_nrows_non_integer_parameter(self, read_ext): + # GH 16645 + msg = "'nrows' must be an integer >=0" + with pytest.raises(ValueError, match=msg): + pd.read_excel("test1" + read_ext, nrows="5") + + def test_read_excel_squeeze(self, read_ext): + # GH 12157 + f = "test_squeeze" + read_ext + + actual = pd.read_excel(f, "two_columns", index_col=0, squeeze=True) + expected = pd.Series([2, 3, 4], [4, 5, 6], name="b") + expected.index.name = "a" + tm.assert_series_equal(actual, expected) + + actual = pd.read_excel(f, "two_columns", squeeze=True) + expected = pd.DataFrame({"a": [4, 5, 6], "b": [2, 3, 4]}) + tm.assert_frame_equal(actual, expected) + + actual = pd.read_excel(f, "one_column", squeeze=True) + expected = pd.Series([1, 2, 3], name="a") + tm.assert_series_equal(actual, expected) + + +class TestExcelFileRead: + @pytest.fixture(autouse=True) + def cd_and_set_engine(self, engine, datapath, monkeypatch): + """ + Change directory and set engine for ExcelFile objects. + """ + func = partial(pd.ExcelFile, engine=engine) + monkeypatch.chdir(datapath("io", "data", "excel")) + monkeypatch.setattr(pd, "ExcelFile", func) + + def test_excel_passes_na(self, read_ext): + with pd.ExcelFile("test4" + read_ext) as excel: + parsed = pd.read_excel( + excel, "Sheet1", keep_default_na=False, na_values=["apple"] + ) + expected = DataFrame( + [["NA"], [1], ["NA"], [np.nan], ["rabbit"]], columns=["Test"] + ) + tm.assert_frame_equal(parsed, expected) + + with pd.ExcelFile("test4" + read_ext) as excel: + parsed = pd.read_excel( + excel, "Sheet1", keep_default_na=True, na_values=["apple"] + ) + expected = DataFrame( + [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]], columns=["Test"] + ) + tm.assert_frame_equal(parsed, expected) + + # 13967 + with pd.ExcelFile("test5" + read_ext) as excel: + parsed = pd.read_excel( + excel, "Sheet1", keep_default_na=False, na_values=["apple"] + ) + expected = DataFrame( + [["1.#QNAN"], [1], ["nan"], [np.nan], ["rabbit"]], columns=["Test"] + ) + tm.assert_frame_equal(parsed, expected) + + with pd.ExcelFile("test5" + read_ext) as excel: + parsed = pd.read_excel( + excel, "Sheet1", keep_default_na=True, na_values=["apple"] + ) + expected = DataFrame( + [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]], columns=["Test"] + ) + tm.assert_frame_equal(parsed, expected) + + @pytest.mark.parametrize("na_filter", [None, True, False]) + def test_excel_passes_na_filter(self, read_ext, na_filter): + # gh-25453 + kwargs = {} + + if na_filter is not None: + kwargs["na_filter"] = na_filter + + with pd.ExcelFile("test5" + read_ext) as excel: + parsed = pd.read_excel( + excel, "Sheet1", keep_default_na=True, na_values=["apple"], **kwargs + ) + + if na_filter is False: + expected = [["1.#QNAN"], [1], ["nan"], ["apple"], ["rabbit"]] + else: + expected = [[np.nan], [1], [np.nan], [np.nan], ["rabbit"]] + + expected = DataFrame(expected, columns=["Test"]) + tm.assert_frame_equal(parsed, expected) + + @pytest.mark.parametrize("arg", ["sheet", "sheetname", "parse_cols"]) + @td.check_file_leaks + def test_unexpected_kwargs_raises(self, read_ext, arg): + # gh-17964 + kwarg = {arg: "Sheet1"} + msg = r"unexpected keyword argument `{}`".format(arg) + + with pd.ExcelFile("test1" + read_ext) as excel: + with pytest.raises(TypeError, match=msg): + pd.read_excel(excel, **kwarg) + + def test_excel_table_sheet_by_index(self, read_ext, df_ref): + # For some reason pd.read_excel has no attribute 'keywords' here. + # Skipping based on read_ext instead. + if read_ext == ".xlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + with pd.ExcelFile("test1" + read_ext) as excel: + df1 = pd.read_excel(excel, 0, index_col=0) + df2 = pd.read_excel(excel, 1, skiprows=[1], index_col=0) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + with pd.ExcelFile("test1" + read_ext) as excel: + df1 = excel.parse(0, index_col=0) + df2 = excel.parse(1, skiprows=[1], index_col=0) + tm.assert_frame_equal(df1, df_ref, check_names=False) + tm.assert_frame_equal(df2, df_ref, check_names=False) + + with pd.ExcelFile("test1" + read_ext) as excel: + df3 = pd.read_excel(excel, 0, index_col=0, skipfooter=1) + tm.assert_frame_equal(df3, df1.iloc[:-1]) + + with pd.ExcelFile("test1" + read_ext) as excel: + df3 = excel.parse(0, index_col=0, skipfooter=1) + + tm.assert_frame_equal(df3, df1.iloc[:-1]) + + def test_sheet_name(self, read_ext, df_ref): + # For some reason pd.read_excel has no attribute 'keywords' here. + # Skipping based on read_ext instead. + if read_ext == ".xlsb": + pytest.xfail("Sheets containing datetimes not supported by pyxlsb") + + filename = "test1" + sheet_name = "Sheet1" + + with pd.ExcelFile(filename + read_ext) as excel: + df1_parse = excel.parse(sheet_name=sheet_name, index_col=0) # doc + + with pd.ExcelFile(filename + read_ext) as excel: + df2_parse = excel.parse(index_col=0, sheet_name=sheet_name) + + tm.assert_frame_equal(df1_parse, df_ref, check_names=False) + tm.assert_frame_equal(df2_parse, df_ref, check_names=False) + + def test_excel_read_buffer(self, engine, read_ext): + pth = "test1" + read_ext + expected = pd.read_excel(pth, "Sheet1", index_col=0, engine=engine) + + with open(pth, "rb") as f: + with pd.ExcelFile(f) as xls: + actual = pd.read_excel(xls, "Sheet1", index_col=0) + + tm.assert_frame_equal(expected, actual) + + def test_reader_closes_file(self, engine, read_ext): + with open("test1" + read_ext, "rb") as f: + with pd.ExcelFile(f) as xlsx: + # parses okay + pd.read_excel(xlsx, "Sheet1", index_col=0, engine=engine) + + assert f.closed + + def test_conflicting_excel_engines(self, read_ext): + # GH 26566 + msg = "Engine should not be specified when passing an ExcelFile" + + with pd.ExcelFile("test1" + read_ext) as xl: + with pytest.raises(ValueError, match=msg): + pd.read_excel(xl, engine="foo") + + def test_excel_read_binary(self, engine, read_ext): + # GH 15914 + expected = pd.read_excel("test1" + read_ext, engine=engine) + + with open("test1" + read_ext, "rb") as f: + data = f.read() + + actual = pd.read_excel(data, engine=engine) + tm.assert_frame_equal(expected, actual) diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py new file mode 100644 index 00000000..88f4c373 --- /dev/null +++ b/pandas/tests/io/excel/test_style.py @@ -0,0 +1,169 @@ +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.excel import ExcelWriter +from pandas.io.formats.excel import ExcelFormatter + + +@pytest.mark.parametrize( + "engine", + [ + pytest.param( + "xlwt", + marks=pytest.mark.xfail( + reason="xlwt does not support openpyxl-compatible style dicts" + ), + ), + "xlsxwriter", + "openpyxl", + ], +) +def test_styler_to_excel(engine): + def style(df): + # XXX: RGB colors not supported in xlwt + return DataFrame( + [ + ["font-weight: bold", "", ""], + ["", "color: blue", ""], + ["", "", "text-decoration: underline"], + ["border-style: solid", "", ""], + ["", "font-style: italic", ""], + ["", "", "text-align: right"], + ["background-color: red", "", ""], + ["number-format: 0%", "", ""], + ["", "", ""], + ["", "", ""], + ["", "", ""], + ], + index=df.index, + columns=df.columns, + ) + + def assert_equal_style(cell1, cell2, engine): + if engine in ["xlsxwriter", "openpyxl"]: + pytest.xfail( + reason=( + "GH25351: failing on some attribute " + "comparisons in {}".format(engine) + ) + ) + # XXX: should find a better way to check equality + assert cell1.alignment.__dict__ == cell2.alignment.__dict__ + assert cell1.border.__dict__ == cell2.border.__dict__ + assert cell1.fill.__dict__ == cell2.fill.__dict__ + assert cell1.font.__dict__ == cell2.font.__dict__ + assert cell1.number_format == cell2.number_format + assert cell1.protection.__dict__ == cell2.protection.__dict__ + + def custom_converter(css): + # use bold iff there is custom style attached to the cell + if css.strip(" \n;"): + return {"font": {"bold": True}} + return {} + + pytest.importorskip("jinja2") + pytest.importorskip(engine) + + # Prepare spreadsheets + + df = DataFrame(np.random.randn(11, 3)) + with tm.ensure_clean(".xlsx" if engine != "xlwt" else ".xls") as path: + writer = ExcelWriter(path, engine=engine) + df.to_excel(writer, sheet_name="frame") + df.style.to_excel(writer, sheet_name="unstyled") + styled = df.style.apply(style, axis=None) + styled.to_excel(writer, sheet_name="styled") + ExcelFormatter(styled, style_converter=custom_converter).write( + writer, sheet_name="custom" + ) + writer.save() + + if engine not in ("openpyxl", "xlsxwriter"): + # For other engines, we only smoke test + return + openpyxl = pytest.importorskip("openpyxl") + wb = openpyxl.load_workbook(path) + + # (1) compare DataFrame.to_excel and Styler.to_excel when unstyled + n_cells = 0 + for col1, col2 in zip(wb["frame"].columns, wb["unstyled"].columns): + assert len(col1) == len(col2) + for cell1, cell2 in zip(col1, col2): + assert cell1.value == cell2.value + assert_equal_style(cell1, cell2, engine) + n_cells += 1 + + # ensure iteration actually happened: + assert n_cells == (11 + 1) * (3 + 1) + + # (2) check styling with default converter + + # XXX: openpyxl (as at 2.4) prefixes colors with 00, xlsxwriter with FF + alpha = "00" if engine == "openpyxl" else "FF" + + n_cells = 0 + for col1, col2 in zip(wb["frame"].columns, wb["styled"].columns): + assert len(col1) == len(col2) + for cell1, cell2 in zip(col1, col2): + ref = "{cell2.column}{cell2.row:d}".format(cell2=cell2) + # XXX: this isn't as strong a test as ideal; we should + # confirm that differences are exclusive + if ref == "B2": + assert not cell1.font.bold + assert cell2.font.bold + elif ref == "C3": + assert cell1.font.color.rgb != cell2.font.color.rgb + assert cell2.font.color.rgb == alpha + "0000FF" + elif ref == "D4": + assert cell1.font.underline != cell2.font.underline + assert cell2.font.underline == "single" + elif ref == "B5": + assert not cell1.border.left.style + assert ( + cell2.border.top.style + == cell2.border.right.style + == cell2.border.bottom.style + == cell2.border.left.style + == "medium" + ) + elif ref == "C6": + assert not cell1.font.italic + assert cell2.font.italic + elif ref == "D7": + assert cell1.alignment.horizontal != cell2.alignment.horizontal + assert cell2.alignment.horizontal == "right" + elif ref == "B8": + assert cell1.fill.fgColor.rgb != cell2.fill.fgColor.rgb + assert cell1.fill.patternType != cell2.fill.patternType + assert cell2.fill.fgColor.rgb == alpha + "FF0000" + assert cell2.fill.patternType == "solid" + elif ref == "B9": + assert cell1.number_format == "General" + assert cell2.number_format == "0%" + else: + assert_equal_style(cell1, cell2, engine) + + assert cell1.value == cell2.value + n_cells += 1 + + assert n_cells == (11 + 1) * (3 + 1) + + # (3) check styling with custom converter + n_cells = 0 + for col1, col2 in zip(wb["frame"].columns, wb["custom"].columns): + assert len(col1) == len(col2) + for cell1, cell2 in zip(col1, col2): + ref = "{cell2.column}{cell2.row:d}".format(cell2=cell2) + if ref in ("B2", "C3", "D4", "B5", "C6", "D7", "B8", "B9"): + assert not cell1.font.bold + assert cell2.font.bold + else: + assert_equal_style(cell1, cell2, engine) + + assert cell1.value == cell2.value + n_cells += 1 + + assert n_cells == (11 + 1) * (3 + 1) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py new file mode 100644 index 00000000..31382d29 --- /dev/null +++ b/pandas/tests/io/excel/test_writers.py @@ -0,0 +1,1294 @@ +from datetime import date, datetime, timedelta +from functools import partial +from io import BytesIO +import os + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, get_option, set_option +import pandas._testing as tm + +from pandas.io.excel import ( + ExcelFile, + ExcelWriter, + _OpenpyxlWriter, + _XlsxWriter, + _XlwtWriter, + register_writer, +) + + +@pytest.fixture +def path(ext): + """ + Fixture to open file for use in each test case. + """ + with tm.ensure_clean(ext) as file_path: + yield file_path + + +@pytest.fixture +def set_engine(engine, ext): + """ + Fixture to set engine for use in each test case. + + Rather than requiring `engine=...` to be provided explicitly as an + argument in each test, this fixture sets a global option to dictate + which engine should be used to write Excel files. After executing + the test it rolls back said change to the global option. + """ + option_name = "io.excel.{ext}.writer".format(ext=ext.strip(".")) + prev_engine = get_option(option_name) + set_option(option_name, engine) + yield + set_option(option_name, prev_engine) # Roll back option change + + +@td.skip_if_no("xlrd") +@pytest.mark.parametrize("ext", [".xls", ".xlsx", ".xlsm"]) +class TestRoundTrip: + @td.skip_if_no("xlwt") + @td.skip_if_no("openpyxl") + @pytest.mark.parametrize( + "header,expected", + [(None, DataFrame([np.nan] * 4)), (0, DataFrame({"Unnamed: 0": [np.nan] * 3}))], + ) + def test_read_one_empty_col_no_header(self, ext, header, expected): + # xref gh-12292 + filename = "no_header" + df = pd.DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]]) + + with tm.ensure_clean(ext) as path: + df.to_excel(path, filename, index=False, header=False) + result = pd.read_excel(path, filename, usecols=[0], header=header) + + tm.assert_frame_equal(result, expected) + + @td.skip_if_no("xlwt") + @td.skip_if_no("openpyxl") + @pytest.mark.parametrize( + "header,expected", + [(None, DataFrame([0] + [np.nan] * 4)), (0, DataFrame([np.nan] * 4))], + ) + def test_read_one_empty_col_with_header(self, ext, header, expected): + filename = "with_header" + df = pd.DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]]) + + with tm.ensure_clean(ext) as path: + df.to_excel(path, "with_header", index=False, header=True) + result = pd.read_excel(path, filename, usecols=[0], header=header) + + tm.assert_frame_equal(result, expected) + + @td.skip_if_no("openpyxl") + @td.skip_if_no("xlwt") + def test_set_column_names_in_parameter(self, ext): + # GH 12870 : pass down column names associated with + # keyword argument names + refdf = pd.DataFrame([[1, "foo"], [2, "bar"], [3, "baz"]], columns=["a", "b"]) + + with tm.ensure_clean(ext) as pth: + with ExcelWriter(pth) as writer: + refdf.to_excel(writer, "Data_no_head", header=False, index=False) + refdf.to_excel(writer, "Data_with_head", index=False) + + refdf.columns = ["A", "B"] + + with ExcelFile(pth) as reader: + xlsdf_no_head = pd.read_excel( + reader, "Data_no_head", header=None, names=["A", "B"] + ) + xlsdf_with_head = pd.read_excel( + reader, "Data_with_head", index_col=None, names=["A", "B"] + ) + + tm.assert_frame_equal(xlsdf_no_head, refdf) + tm.assert_frame_equal(xlsdf_with_head, refdf) + + @td.skip_if_no("xlwt") + @td.skip_if_no("openpyxl") + def test_creating_and_reading_multiple_sheets(self, ext): + # see gh-9450 + # + # Test reading multiple sheets, from a runtime + # created Excel file with multiple sheets. + def tdf(col_sheet_name): + d, i = [11, 22, 33], [1, 2, 3] + return DataFrame(d, i, columns=[col_sheet_name]) + + sheets = ["AAA", "BBB", "CCC"] + + dfs = [tdf(s) for s in sheets] + dfs = dict(zip(sheets, dfs)) + + with tm.ensure_clean(ext) as pth: + with ExcelWriter(pth) as ew: + for sheetname, df in dfs.items(): + df.to_excel(ew, sheetname) + + dfs_returned = pd.read_excel(pth, sheet_name=sheets, index_col=0) + + for s in sheets: + tm.assert_frame_equal(dfs[s], dfs_returned[s]) + + @td.skip_if_no("xlsxwriter") + def test_read_excel_multiindex_empty_level(self, ext): + # see gh-12453 + with tm.ensure_clean(ext) as path: + df = DataFrame( + { + ("One", "x"): {0: 1}, + ("Two", "X"): {0: 3}, + ("Two", "Y"): {0: 7}, + ("Zero", ""): {0: 0}, + } + ) + + expected = DataFrame( + { + ("One", "x"): {0: 1}, + ("Two", "X"): {0: 3}, + ("Two", "Y"): {0: 7}, + ("Zero", "Unnamed: 4_level_1"): {0: 0}, + } + ) + + df.to_excel(path) + actual = pd.read_excel(path, header=[0, 1], index_col=0) + tm.assert_frame_equal(actual, expected) + + df = pd.DataFrame( + { + ("Beg", ""): {0: 0}, + ("Middle", "x"): {0: 1}, + ("Tail", "X"): {0: 3}, + ("Tail", "Y"): {0: 7}, + } + ) + + expected = pd.DataFrame( + { + ("Beg", "Unnamed: 1_level_1"): {0: 0}, + ("Middle", "x"): {0: 1}, + ("Tail", "X"): {0: 3}, + ("Tail", "Y"): {0: 7}, + } + ) + + df.to_excel(path) + actual = pd.read_excel(path, header=[0, 1], index_col=0) + tm.assert_frame_equal(actual, expected) + + @td.skip_if_no("xlsxwriter") + @pytest.mark.parametrize("c_idx_names", [True, False]) + @pytest.mark.parametrize("r_idx_names", [True, False]) + @pytest.mark.parametrize("c_idx_levels", [1, 3]) + @pytest.mark.parametrize("r_idx_levels", [1, 3]) + def test_excel_multindex_roundtrip( + self, ext, c_idx_names, r_idx_names, c_idx_levels, r_idx_levels + ): + # see gh-4679 + with tm.ensure_clean(ext) as pth: + if c_idx_levels == 1 and c_idx_names: + pytest.skip( + "Column index name cannot be serialized unless it's a MultiIndex" + ) + + # Empty name case current read in as + # unnamed levels, not Nones. + check_names = r_idx_names or r_idx_levels <= 1 + + df = tm.makeCustomDataframe( + 5, 5, c_idx_names, r_idx_names, c_idx_levels, r_idx_levels + ) + df.to_excel(pth) + + act = pd.read_excel( + pth, + index_col=list(range(r_idx_levels)), + header=list(range(c_idx_levels)), + ) + tm.assert_frame_equal(df, act, check_names=check_names) + + df.iloc[0, :] = np.nan + df.to_excel(pth) + + act = pd.read_excel( + pth, + index_col=list(range(r_idx_levels)), + header=list(range(c_idx_levels)), + ) + tm.assert_frame_equal(df, act, check_names=check_names) + + df.iloc[-1, :] = np.nan + df.to_excel(pth) + act = pd.read_excel( + pth, + index_col=list(range(r_idx_levels)), + header=list(range(c_idx_levels)), + ) + tm.assert_frame_equal(df, act, check_names=check_names) + + @td.skip_if_no("xlwt") + @td.skip_if_no("openpyxl") + def test_read_excel_parse_dates(self, ext): + # see gh-11544, gh-12051 + df = DataFrame( + {"col": [1, 2, 3], "date_strings": pd.date_range("2012-01-01", periods=3)} + ) + df2 = df.copy() + df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y") + + with tm.ensure_clean(ext) as pth: + df2.to_excel(pth) + + res = pd.read_excel(pth, index_col=0) + tm.assert_frame_equal(df2, res) + + res = pd.read_excel(pth, parse_dates=["date_strings"], index_col=0) + tm.assert_frame_equal(df, res) + + date_parser = lambda x: datetime.strptime(x, "%m/%d/%Y") + res = pd.read_excel( + pth, parse_dates=["date_strings"], date_parser=date_parser, index_col=0 + ) + tm.assert_frame_equal(df, res) + + +@td.skip_if_no("xlrd") +@pytest.mark.parametrize( + "engine,ext", + [ + pytest.param("openpyxl", ".xlsx", marks=td.skip_if_no("openpyxl")), + pytest.param("openpyxl", ".xlsm", marks=td.skip_if_no("openpyxl")), + pytest.param("xlwt", ".xls", marks=td.skip_if_no("xlwt")), + pytest.param("xlsxwriter", ".xlsx", marks=td.skip_if_no("xlsxwriter")), + ], +) +@pytest.mark.usefixtures("set_engine") +class TestExcelWriter: + def test_excel_sheet_size(self, path): + + # GH 26080 + breaking_row_count = 2 ** 20 + 1 + breaking_col_count = 2 ** 14 + 1 + # purposely using two arrays to prevent memory issues while testing + row_arr = np.zeros(shape=(breaking_row_count, 1)) + col_arr = np.zeros(shape=(1, breaking_col_count)) + row_df = pd.DataFrame(row_arr) + col_df = pd.DataFrame(col_arr) + + msg = "sheet is too large" + with pytest.raises(ValueError, match=msg): + row_df.to_excel(path) + + with pytest.raises(ValueError, match=msg): + col_df.to_excel(path) + + def test_excel_sheet_by_name_raise(self, path): + import xlrd + + gt = DataFrame(np.random.randn(10, 2)) + gt.to_excel(path) + + xl = ExcelFile(path) + df = pd.read_excel(xl, 0, index_col=0) + + tm.assert_frame_equal(gt, df) + + with pytest.raises(xlrd.XLRDError): + pd.read_excel(xl, "0") + + def test_excel_writer_context_manager(self, frame, path): + with ExcelWriter(path) as writer: + frame.to_excel(writer, "Data1") + frame2 = frame.copy() + frame2.columns = frame.columns[::-1] + frame2.to_excel(writer, "Data2") + + with ExcelFile(path) as reader: + found_df = pd.read_excel(reader, "Data1", index_col=0) + found_df2 = pd.read_excel(reader, "Data2", index_col=0) + + tm.assert_frame_equal(found_df, frame) + tm.assert_frame_equal(found_df2, frame2) + + def test_roundtrip(self, frame, path): + frame = frame.copy() + frame["A"][:5] = np.nan + + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + # test roundtrip + frame.to_excel(path, "test1") + recons = pd.read_excel(path, "test1", index_col=0) + tm.assert_frame_equal(frame, recons) + + frame.to_excel(path, "test1", index=False) + recons = pd.read_excel(path, "test1", index_col=None) + recons.index = frame.index + tm.assert_frame_equal(frame, recons) + + frame.to_excel(path, "test1", na_rep="NA") + recons = pd.read_excel(path, "test1", index_col=0, na_values=["NA"]) + tm.assert_frame_equal(frame, recons) + + # GH 3611 + frame.to_excel(path, "test1", na_rep="88") + recons = pd.read_excel(path, "test1", index_col=0, na_values=["88"]) + tm.assert_frame_equal(frame, recons) + + frame.to_excel(path, "test1", na_rep="88") + recons = pd.read_excel(path, "test1", index_col=0, na_values=[88, 88.0]) + tm.assert_frame_equal(frame, recons) + + # GH 6573 + frame.to_excel(path, "Sheet1") + recons = pd.read_excel(path, index_col=0) + tm.assert_frame_equal(frame, recons) + + frame.to_excel(path, "0") + recons = pd.read_excel(path, index_col=0) + tm.assert_frame_equal(frame, recons) + + # GH 8825 Pandas Series should provide to_excel method + s = frame["A"] + s.to_excel(path) + recons = pd.read_excel(path, index_col=0) + tm.assert_frame_equal(s.to_frame(), recons) + + def test_mixed(self, frame, path): + mixed_frame = frame.copy() + mixed_frame["foo"] = "bar" + + mixed_frame.to_excel(path, "test1") + reader = ExcelFile(path) + recons = pd.read_excel(reader, "test1", index_col=0) + tm.assert_frame_equal(mixed_frame, recons) + + def test_ts_frame(self, tsframe, path): + df = tsframe + + df.to_excel(path, "test1") + reader = ExcelFile(path) + + recons = pd.read_excel(reader, "test1", index_col=0) + tm.assert_frame_equal(df, recons) + + def test_basics_with_nan(self, frame, path): + frame = frame.copy() + frame["A"][:5] = np.nan + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + @pytest.mark.parametrize("np_type", [np.int8, np.int16, np.int32, np.int64]) + def test_int_types(self, np_type, path): + # Test np.int values read come back as int + # (rather than float which is Excel's format). + df = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) + df.to_excel(path, "test1") + + reader = ExcelFile(path) + recons = pd.read_excel(reader, "test1", index_col=0) + + int_frame = df.astype(np.int64) + tm.assert_frame_equal(int_frame, recons) + + recons2 = pd.read_excel(path, "test1", index_col=0) + tm.assert_frame_equal(int_frame, recons2) + + # Test with convert_float=False comes back as float. + float_frame = df.astype(float) + recons = pd.read_excel(path, "test1", convert_float=False, index_col=0) + tm.assert_frame_equal( + recons, float_frame, check_index_type=False, check_column_type=False + ) + + @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64]) + def test_float_types(self, np_type, path): + # Test np.float values read come back as float. + df = DataFrame(np.random.random_sample(10), dtype=np_type) + df.to_excel(path, "test1") + + reader = ExcelFile(path) + recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type) + + tm.assert_frame_equal(df, recons, check_dtype=False) + + @pytest.mark.parametrize("np_type", [np.bool8, np.bool_]) + def test_bool_types(self, np_type, path): + # Test np.bool values read come back as float. + df = DataFrame([1, 0, True, False], dtype=np_type) + df.to_excel(path, "test1") + + reader = ExcelFile(path) + recons = pd.read_excel(reader, "test1", index_col=0).astype(np_type) + + tm.assert_frame_equal(df, recons) + + def test_inf_roundtrip(self, path): + df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)]) + df.to_excel(path, "test1") + + reader = ExcelFile(path) + recons = pd.read_excel(reader, "test1", index_col=0) + + tm.assert_frame_equal(df, recons) + + def test_sheets(self, frame, tsframe, path): + frame = frame.copy() + frame["A"][:5] = np.nan + + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + # Test writing to separate sheets + writer = ExcelWriter(path) + frame.to_excel(writer, "test1") + tsframe.to_excel(writer, "test2") + writer.save() + reader = ExcelFile(path) + recons = pd.read_excel(reader, "test1", index_col=0) + tm.assert_frame_equal(frame, recons) + recons = pd.read_excel(reader, "test2", index_col=0) + tm.assert_frame_equal(tsframe, recons) + assert 2 == len(reader.sheet_names) + assert "test1" == reader.sheet_names[0] + assert "test2" == reader.sheet_names[1] + + def test_colaliases(self, frame, path): + frame = frame.copy() + frame["A"][:5] = np.nan + + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + # column aliases + col_aliases = Index(["AA", "X", "Y", "Z"]) + frame.to_excel(path, "test1", header=col_aliases) + reader = ExcelFile(path) + rs = pd.read_excel(reader, "test1", index_col=0) + xp = frame.copy() + xp.columns = col_aliases + tm.assert_frame_equal(xp, rs) + + def test_roundtrip_indexlabels(self, merge_cells, frame, path): + frame = frame.copy() + frame["A"][:5] = np.nan + + frame.to_excel(path, "test1") + frame.to_excel(path, "test1", columns=["A", "B"]) + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", index=False) + + # test index_label + df = DataFrame(np.random.randn(10, 2)) >= 0 + df.to_excel(path, "test1", index_label=["test"], merge_cells=merge_cells) + reader = ExcelFile(path) + recons = pd.read_excel(reader, "test1", index_col=0).astype(np.int64) + df.index.names = ["test"] + assert df.index.names == recons.index.names + + df = DataFrame(np.random.randn(10, 2)) >= 0 + df.to_excel( + path, + "test1", + index_label=["test", "dummy", "dummy2"], + merge_cells=merge_cells, + ) + reader = ExcelFile(path) + recons = pd.read_excel(reader, "test1", index_col=0).astype(np.int64) + df.index.names = ["test"] + assert df.index.names == recons.index.names + + df = DataFrame(np.random.randn(10, 2)) >= 0 + df.to_excel(path, "test1", index_label="test", merge_cells=merge_cells) + reader = ExcelFile(path) + recons = pd.read_excel(reader, "test1", index_col=0).astype(np.int64) + df.index.names = ["test"] + tm.assert_frame_equal(df, recons.astype(bool)) + + frame.to_excel( + path, + "test1", + columns=["A", "B", "C", "D"], + index=False, + merge_cells=merge_cells, + ) + # take 'A' and 'B' as indexes (same row as cols 'C', 'D') + df = frame.copy() + df = df.set_index(["A", "B"]) + + reader = ExcelFile(path) + recons = pd.read_excel(reader, "test1", index_col=[0, 1]) + tm.assert_frame_equal(df, recons, check_less_precise=True) + + def test_excel_roundtrip_indexname(self, merge_cells, path): + df = DataFrame(np.random.randn(10, 4)) + df.index.name = "foo" + + df.to_excel(path, merge_cells=merge_cells) + + xf = ExcelFile(path) + result = pd.read_excel(xf, xf.sheet_names[0], index_col=0) + + tm.assert_frame_equal(result, df) + assert result.index.name == "foo" + + def test_excel_roundtrip_datetime(self, merge_cells, tsframe, path): + # datetime.date, not sure what to test here exactly + tsf = tsframe.copy() + + tsf.index = [x.date() for x in tsframe.index] + tsf.to_excel(path, "test1", merge_cells=merge_cells) + + reader = ExcelFile(path) + recons = pd.read_excel(reader, "test1", index_col=0) + + tm.assert_frame_equal(tsframe, recons) + + def test_excel_date_datetime_format(self, engine, ext, path): + # see gh-4133 + # + # Excel output format strings + df = DataFrame( + [ + [date(2014, 1, 31), date(1999, 9, 24)], + [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)], + ], + index=["DATE", "DATETIME"], + columns=["X", "Y"], + ) + df_expected = DataFrame( + [ + [datetime(2014, 1, 31), datetime(1999, 9, 24)], + [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)], + ], + index=["DATE", "DATETIME"], + columns=["X", "Y"], + ) + + with tm.ensure_clean(ext) as filename2: + writer1 = ExcelWriter(path) + writer2 = ExcelWriter( + filename2, + date_format="DD.MM.YYYY", + datetime_format="DD.MM.YYYY HH-MM-SS", + ) + + df.to_excel(writer1, "test1") + df.to_excel(writer2, "test1") + + writer1.close() + writer2.close() + + reader1 = ExcelFile(path) + reader2 = ExcelFile(filename2) + + rs1 = pd.read_excel(reader1, "test1", index_col=0) + rs2 = pd.read_excel(reader2, "test1", index_col=0) + + tm.assert_frame_equal(rs1, rs2) + + # Since the reader returns a datetime object for dates, + # we need to use df_expected to check the result. + tm.assert_frame_equal(rs2, df_expected) + + def test_to_excel_interval_no_labels(self, path): + # see gh-19242 + # + # Test writing Interval without labels. + df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) + expected = df.copy() + + df["new"] = pd.cut(df[0], 10) + expected["new"] = pd.cut(expected[0], 10).astype(str) + + df.to_excel(path, "test1") + reader = ExcelFile(path) + + recons = pd.read_excel(reader, "test1", index_col=0) + tm.assert_frame_equal(expected, recons) + + def test_to_excel_interval_labels(self, path): + # see gh-19242 + # + # Test writing Interval with labels. + df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64) + expected = df.copy() + intervals = pd.cut( + df[0], 10, labels=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"] + ) + df["new"] = intervals + expected["new"] = pd.Series(list(intervals)) + + df.to_excel(path, "test1") + reader = ExcelFile(path) + + recons = pd.read_excel(reader, "test1", index_col=0) + tm.assert_frame_equal(expected, recons) + + def test_to_excel_timedelta(self, path): + # see gh-19242, gh-9155 + # + # Test writing timedelta to xls. + df = DataFrame( + np.random.randint(-10, 10, size=(20, 1)), columns=["A"], dtype=np.int64 + ) + expected = df.copy() + + df["new"] = df["A"].apply(lambda x: timedelta(seconds=x)) + expected["new"] = expected["A"].apply( + lambda x: timedelta(seconds=x).total_seconds() / float(86400) + ) + + df.to_excel(path, "test1") + reader = ExcelFile(path) + + recons = pd.read_excel(reader, "test1", index_col=0) + tm.assert_frame_equal(expected, recons) + + def test_to_excel_periodindex(self, tsframe, path): + xp = tsframe.resample("M", kind="period").mean() + + xp.to_excel(path, "sht1") + + reader = ExcelFile(path) + rs = pd.read_excel(reader, "sht1", index_col=0) + tm.assert_frame_equal(xp, rs.to_period("M")) + + def test_to_excel_multiindex(self, merge_cells, frame, path): + arrays = np.arange(len(frame.index) * 2).reshape(2, -1) + new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) + frame.index = new_index + + frame.to_excel(path, "test1", header=False) + frame.to_excel(path, "test1", columns=["A", "B"]) + + # round trip + frame.to_excel(path, "test1", merge_cells=merge_cells) + reader = ExcelFile(path) + df = pd.read_excel(reader, "test1", index_col=[0, 1]) + tm.assert_frame_equal(frame, df) + + # GH13511 + def test_to_excel_multiindex_nan_label(self, merge_cells, path): + df = pd.DataFrame( + {"A": [None, 2, 3], "B": [10, 20, 30], "C": np.random.sample(3)} + ) + df = df.set_index(["A", "B"]) + + df.to_excel(path, merge_cells=merge_cells) + df1 = pd.read_excel(path, index_col=[0, 1]) + tm.assert_frame_equal(df, df1) + + # Test for Issue 11328. If column indices are integers, make + # sure they are handled correctly for either setting of + # merge_cells + def test_to_excel_multiindex_cols(self, merge_cells, frame, path): + arrays = np.arange(len(frame.index) * 2).reshape(2, -1) + new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) + frame.index = new_index + + new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1), (50, 2)]) + frame.columns = new_cols_index + header = [0, 1] + if not merge_cells: + header = 0 + + # round trip + frame.to_excel(path, "test1", merge_cells=merge_cells) + reader = ExcelFile(path) + df = pd.read_excel(reader, "test1", header=header, index_col=[0, 1]) + if not merge_cells: + fm = frame.columns.format(sparsify=False, adjoin=False, names=False) + frame.columns = [".".join(map(str, q)) for q in zip(*fm)] + tm.assert_frame_equal(frame, df) + + def test_to_excel_multiindex_dates(self, merge_cells, tsframe, path): + # try multiindex with dates + new_index = [tsframe.index, np.arange(len(tsframe.index))] + tsframe.index = MultiIndex.from_arrays(new_index) + + tsframe.index.names = ["time", "foo"] + tsframe.to_excel(path, "test1", merge_cells=merge_cells) + reader = ExcelFile(path) + recons = pd.read_excel(reader, "test1", index_col=[0, 1]) + + tm.assert_frame_equal(tsframe, recons) + assert recons.index.names == ("time", "foo") + + def test_to_excel_multiindex_no_write_index(self, path): + # Test writing and re-reading a MI without the index. GH 5616. + + # Initial non-MI frame. + frame1 = DataFrame({"a": [10, 20], "b": [30, 40], "c": [50, 60]}) + + # Add a MI. + frame2 = frame1.copy() + multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)]) + frame2.index = multi_index + + # Write out to Excel without the index. + frame2.to_excel(path, "test1", index=False) + + # Read it back in. + reader = ExcelFile(path) + frame3 = pd.read_excel(reader, "test1") + + # Test that it is the same as the initial frame. + tm.assert_frame_equal(frame1, frame3) + + def test_to_excel_float_format(self, path): + df = DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + df.to_excel(path, "test1", float_format="%.2f") + + reader = ExcelFile(path) + result = pd.read_excel(reader, "test1", index_col=0) + + expected = DataFrame( + [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + tm.assert_frame_equal(result, expected) + + def test_to_excel_output_encoding(self, ext): + # Avoid mixed inferred_type. + df = DataFrame( + [["\u0192", "\u0193", "\u0194"], ["\u0195", "\u0196", "\u0197"]], + index=["A\u0192", "B"], + columns=["X\u0193", "Y", "Z"], + ) + + with tm.ensure_clean("__tmp_to_excel_float_format__." + ext) as filename: + df.to_excel(filename, sheet_name="TestSheet", encoding="utf8") + result = pd.read_excel(filename, "TestSheet", encoding="utf8", index_col=0) + tm.assert_frame_equal(result, df) + + def test_to_excel_unicode_filename(self, ext, path): + with tm.ensure_clean("\u0192u." + ext) as filename: + try: + f = open(filename, "wb") + except UnicodeEncodeError: + pytest.skip("No unicode file names on this system") + else: + f.close() + + df = DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + df.to_excel(filename, "test1", float_format="%.2f") + + reader = ExcelFile(filename) + result = pd.read_excel(reader, "test1", index_col=0) + + expected = DataFrame( + [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + tm.assert_frame_equal(result, expected) + + # FIXME: dont leave commented-out + # def test_to_excel_header_styling_xls(self, engine, ext): + + # import StringIO + # s = StringIO( + # """Date,ticker,type,value + # 2001-01-01,x,close,12.2 + # 2001-01-01,x,open ,12.1 + # 2001-01-01,y,close,12.2 + # 2001-01-01,y,open ,12.1 + # 2001-02-01,x,close,12.2 + # 2001-02-01,x,open ,12.1 + # 2001-02-01,y,close,12.2 + # 2001-02-01,y,open ,12.1 + # 2001-03-01,x,close,12.2 + # 2001-03-01,x,open ,12.1 + # 2001-03-01,y,close,12.2 + # 2001-03-01,y,open ,12.1""") + # df = read_csv(s, parse_dates=["Date"]) + # pdf = df.pivot_table(values="value", rows=["ticker"], + # cols=["Date", "type"]) + + # try: + # import xlwt + # import xlrd + # except ImportError: + # pytest.skip + + # filename = '__tmp_to_excel_header_styling_xls__.xls' + # pdf.to_excel(filename, 'test1') + + # wbk = xlrd.open_workbook(filename, + # formatting_info=True) + # assert ["test1"] == wbk.sheet_names() + # ws = wbk.sheet_by_name('test1') + # assert [(0, 1, 5, 7), (0, 1, 3, 5), (0, 1, 1, 3)] == ws.merged_cells + # for i in range(0, 2): + # for j in range(0, 7): + # xfx = ws.cell_xf_index(0, 0) + # cell_xf = wbk.xf_list[xfx] + # font = wbk.font_list + # assert 1 == font[cell_xf.font_index].bold + # assert 1 == cell_xf.border.top_line_style + # assert 1 == cell_xf.border.right_line_style + # assert 1 == cell_xf.border.bottom_line_style + # assert 1 == cell_xf.border.left_line_style + # assert 2 == cell_xf.alignment.hor_align + # os.remove(filename) + # def test_to_excel_header_styling_xlsx(self, engine, ext): + # import StringIO + # s = StringIO( + # """Date,ticker,type,value + # 2001-01-01,x,close,12.2 + # 2001-01-01,x,open ,12.1 + # 2001-01-01,y,close,12.2 + # 2001-01-01,y,open ,12.1 + # 2001-02-01,x,close,12.2 + # 2001-02-01,x,open ,12.1 + # 2001-02-01,y,close,12.2 + # 2001-02-01,y,open ,12.1 + # 2001-03-01,x,close,12.2 + # 2001-03-01,x,open ,12.1 + # 2001-03-01,y,close,12.2 + # 2001-03-01,y,open ,12.1""") + # df = read_csv(s, parse_dates=["Date"]) + # pdf = df.pivot_table(values="value", rows=["ticker"], + # cols=["Date", "type"]) + # try: + # import openpyxl + # from openpyxl.cell import get_column_letter + # except ImportError: + # pytest.skip + # if openpyxl.__version__ < '1.6.1': + # pytest.skip + # # test xlsx_styling + # filename = '__tmp_to_excel_header_styling_xlsx__.xlsx' + # pdf.to_excel(filename, 'test1') + # wbk = openpyxl.load_workbook(filename) + # assert ["test1"] == wbk.get_sheet_names() + # ws = wbk.get_sheet_by_name('test1') + # xlsaddrs = ["%s2" % chr(i) for i in range(ord('A'), ord('H'))] + # xlsaddrs += ["A%s" % i for i in range(1, 6)] + # xlsaddrs += ["B1", "D1", "F1"] + # for xlsaddr in xlsaddrs: + # cell = ws.cell(xlsaddr) + # assert cell.style.font.bold + # assert (openpyxl.style.Border.BORDER_THIN == + # cell.style.borders.top.border_style) + # assert (openpyxl.style.Border.BORDER_THIN == + # cell.style.borders.right.border_style) + # assert (openpyxl.style.Border.BORDER_THIN == + # cell.style.borders.bottom.border_style) + # assert (openpyxl.style.Border.BORDER_THIN == + # cell.style.borders.left.border_style) + # assert (openpyxl.style.Alignment.HORIZONTAL_CENTER == + # cell.style.alignment.horizontal) + # mergedcells_addrs = ["C1", "E1", "G1"] + # for maddr in mergedcells_addrs: + # assert ws.cell(maddr).merged + # os.remove(filename) + + @pytest.mark.parametrize("use_headers", [True, False]) + @pytest.mark.parametrize("r_idx_nlevels", [1, 2, 3]) + @pytest.mark.parametrize("c_idx_nlevels", [1, 2, 3]) + def test_excel_010_hemstring( + self, merge_cells, c_idx_nlevels, r_idx_nlevels, use_headers, path + ): + def roundtrip(data, header=True, parser_hdr=0, index=True): + data.to_excel(path, header=header, merge_cells=merge_cells, index=index) + + xf = ExcelFile(path) + return pd.read_excel(xf, xf.sheet_names[0], header=parser_hdr) + + # Basic test. + parser_header = 0 if use_headers else None + res = roundtrip(DataFrame([0]), use_headers, parser_header) + + assert res.shape == (1, 2) + assert res.iloc[0, 0] is not np.nan + + # More complex tests with multi-index. + nrows = 5 + ncols = 3 + + # ensure limited functionality in 0.10 + # override of gh-2370 until sorted out in 0.11 + + df = tm.makeCustomDataframe( + nrows, ncols, r_idx_nlevels=r_idx_nlevels, c_idx_nlevels=c_idx_nlevels + ) + + # This if will be removed once multi-column Excel writing + # is implemented. For now fixing gh-9794. + if c_idx_nlevels > 1: + with pytest.raises(NotImplementedError): + roundtrip(df, use_headers, index=False) + else: + res = roundtrip(df, use_headers) + + if use_headers: + assert res.shape == (nrows, ncols + r_idx_nlevels) + else: + # First row taken as columns. + assert res.shape == (nrows - 1, ncols + r_idx_nlevels) + + # No NaNs. + for r in range(len(res.index)): + for c in range(len(res.columns)): + assert res.iloc[r, c] is not np.nan + + def test_duplicated_columns(self, path): + # see gh-5235 + df = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B"]) + df.to_excel(path, "test1") + expected = DataFrame( + [[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B.1"] + ) + + # By default, we mangle. + result = pd.read_excel(path, "test1", index_col=0) + tm.assert_frame_equal(result, expected) + + # Explicitly, we pass in the parameter. + result = pd.read_excel(path, "test1", index_col=0, mangle_dupe_cols=True) + tm.assert_frame_equal(result, expected) + + # see gh-11007, gh-10970 + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A", "B"]) + df.to_excel(path, "test1") + + result = pd.read_excel(path, "test1", index_col=0) + expected = DataFrame( + [[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A.1", "B.1"] + ) + tm.assert_frame_equal(result, expected) + + # see gh-10982 + df.to_excel(path, "test1", index=False, header=False) + result = pd.read_excel(path, "test1", header=None) + + expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]) + tm.assert_frame_equal(result, expected) + + msg = "Setting mangle_dupe_cols=False is not supported yet" + with pytest.raises(ValueError, match=msg): + pd.read_excel(path, "test1", header=None, mangle_dupe_cols=False) + + def test_swapped_columns(self, path): + # Test for issue #5427. + write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]}) + write_frame.to_excel(path, "test1", columns=["B", "A"]) + + read_frame = pd.read_excel(path, "test1", header=0) + + tm.assert_series_equal(write_frame["A"], read_frame["A"]) + tm.assert_series_equal(write_frame["B"], read_frame["B"]) + + def test_invalid_columns(self, path): + # see gh-10982 + write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]}) + + with pytest.raises(KeyError, match="Not all names specified"): + write_frame.to_excel(path, "test1", columns=["B", "C"]) + + with pytest.raises( + KeyError, match="'passes columns are not ALL present dataframe'" + ): + write_frame.to_excel(path, "test1", columns=["C", "D"]) + + @pytest.mark.parametrize( + "to_excel_index,read_excel_index_col", + [ + (True, 0), # Include index in write to file + (False, None), # Dont include index in write to file + ], + ) + def test_write_subset_columns(self, path, to_excel_index, read_excel_index_col): + # GH 31677 + write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2], "C": [3, 3, 3]}) + write_frame.to_excel( + path, "col_subset_bug", columns=["A", "B"], index=to_excel_index + ) + + expected = write_frame[["A", "B"]] + read_frame = pd.read_excel( + path, "col_subset_bug", index_col=read_excel_index_col + ) + + tm.assert_frame_equal(expected, read_frame) + + def test_comment_arg(self, path): + # see gh-18735 + # + # Test the comment argument functionality to pd.read_excel. + + # Create file to read in. + df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]}) + df.to_excel(path, "test_c") + + # Read file without comment arg. + result1 = pd.read_excel(path, "test_c", index_col=0) + + result1.iloc[1, 0] = None + result1.iloc[1, 1] = None + result1.iloc[2, 1] = None + + result2 = pd.read_excel(path, "test_c", comment="#", index_col=0) + tm.assert_frame_equal(result1, result2) + + def test_comment_default(self, path): + # Re issue #18735 + # Test the comment argument default to pd.read_excel + + # Create file to read in + df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]}) + df.to_excel(path, "test_c") + + # Read file with default and explicit comment=None + result1 = pd.read_excel(path, "test_c") + result2 = pd.read_excel(path, "test_c", comment=None) + tm.assert_frame_equal(result1, result2) + + def test_comment_used(self, path): + # see gh-18735 + # + # Test the comment argument is working as expected when used. + + # Create file to read in. + df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]}) + df.to_excel(path, "test_c") + + # Test read_frame_comment against manually produced expected output. + expected = DataFrame({"A": ["one", None, "one"], "B": ["two", None, None]}) + result = pd.read_excel(path, "test_c", comment="#", index_col=0) + tm.assert_frame_equal(result, expected) + + def test_comment_empty_line(self, path): + # Re issue #18735 + # Test that pd.read_excel ignores commented lines at the end of file + + df = DataFrame({"a": ["1", "#2"], "b": ["2", "3"]}) + df.to_excel(path, index=False) + + # Test that all-comment lines at EoF are ignored + expected = DataFrame({"a": [1], "b": [2]}) + result = pd.read_excel(path, comment="#") + tm.assert_frame_equal(result, expected) + + def test_datetimes(self, path): + + # Test writing and reading datetimes. For issue #9139. (xref #9185) + datetimes = [ + datetime(2013, 1, 13, 1, 2, 3), + datetime(2013, 1, 13, 2, 45, 56), + datetime(2013, 1, 13, 4, 29, 49), + datetime(2013, 1, 13, 6, 13, 42), + datetime(2013, 1, 13, 7, 57, 35), + datetime(2013, 1, 13, 9, 41, 28), + datetime(2013, 1, 13, 11, 25, 21), + datetime(2013, 1, 13, 13, 9, 14), + datetime(2013, 1, 13, 14, 53, 7), + datetime(2013, 1, 13, 16, 37, 0), + datetime(2013, 1, 13, 18, 20, 52), + ] + + write_frame = DataFrame({"A": datetimes}) + write_frame.to_excel(path, "Sheet1") + read_frame = pd.read_excel(path, "Sheet1", header=0) + + tm.assert_series_equal(write_frame["A"], read_frame["A"]) + + def test_bytes_io(self, engine): + # see gh-7074 + bio = BytesIO() + df = DataFrame(np.random.randn(10, 2)) + + # Pass engine explicitly, as there is no file path to infer from. + writer = ExcelWriter(bio, engine=engine) + df.to_excel(writer) + writer.save() + + bio.seek(0) + reread_df = pd.read_excel(bio, index_col=0) + tm.assert_frame_equal(df, reread_df) + + def test_write_lists_dict(self, path): + # see gh-8188. + df = DataFrame( + { + "mixed": ["a", ["b", "c"], {"d": "e", "f": 2}], + "numeric": [1, 2, 3.0], + "str": ["apple", "banana", "cherry"], + } + ) + df.to_excel(path, "Sheet1") + read = pd.read_excel(path, "Sheet1", header=0, index_col=0) + + expected = df.copy() + expected.mixed = expected.mixed.apply(str) + expected.numeric = expected.numeric.astype("int64") + + tm.assert_frame_equal(read, expected) + + def test_true_and_false_value_options(self, path): + # see gh-13347 + df = pd.DataFrame([["foo", "bar"]], columns=["col1", "col2"]) + expected = df.replace({"foo": True, "bar": False}) + + df.to_excel(path) + read_frame = pd.read_excel( + path, true_values=["foo"], false_values=["bar"], index_col=0 + ) + tm.assert_frame_equal(read_frame, expected) + + def test_freeze_panes(self, path): + # see gh-15160 + expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"]) + expected.to_excel(path, "Sheet1", freeze_panes=(1, 1)) + + result = pd.read_excel(path, index_col=0) + tm.assert_frame_equal(result, expected) + + def test_path_path_lib(self, engine, ext): + df = tm.makeDataFrame() + writer = partial(df.to_excel, engine=engine) + + reader = partial(pd.read_excel, index_col=0) + result = tm.round_trip_pathlib(writer, reader, path="foo.{ext}".format(ext=ext)) + tm.assert_frame_equal(result, df) + + def test_path_local_path(self, engine, ext): + df = tm.makeDataFrame() + writer = partial(df.to_excel, engine=engine) + + reader = partial(pd.read_excel, index_col=0) + result = tm.round_trip_pathlib(writer, reader, path="foo.{ext}".format(ext=ext)) + tm.assert_frame_equal(result, df) + + def test_merged_cell_custom_objects(self, merge_cells, path): + # see GH-27006 + mi = MultiIndex.from_tuples( + [ + (pd.Period("2018"), pd.Period("2018Q1")), + (pd.Period("2018"), pd.Period("2018Q2")), + ] + ) + expected = DataFrame(np.ones((2, 2)), columns=mi) + expected.to_excel(path) + result = pd.read_excel(path, header=[0, 1], index_col=0, convert_float=False) + # need to convert PeriodIndexes to standard Indexes for assert equal + expected.columns.set_levels( + [[str(i) for i in mi.levels[0]], [str(i) for i in mi.levels[1]]], + level=[0, 1], + inplace=True, + ) + expected.index = expected.index.astype(np.float64) + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize("dtype", [None, object]) + def test_raise_when_saving_timezones(self, dtype, tz_aware_fixture, path): + # GH 27008, GH 7056 + tz = tz_aware_fixture + data = pd.Timestamp("2019", tz=tz) + df = DataFrame([data], dtype=dtype) + with pytest.raises(ValueError, match="Excel does not support"): + df.to_excel(path) + + data = data.to_pydatetime() + df = DataFrame([data], dtype=dtype) + with pytest.raises(ValueError, match="Excel does not support"): + df.to_excel(path) + + +class TestExcelWriterEngineTests: + @pytest.mark.parametrize( + "klass,ext", + [ + pytest.param(_XlsxWriter, ".xlsx", marks=td.skip_if_no("xlsxwriter")), + pytest.param(_OpenpyxlWriter, ".xlsx", marks=td.skip_if_no("openpyxl")), + pytest.param(_XlwtWriter, ".xls", marks=td.skip_if_no("xlwt")), + ], + ) + def test_ExcelWriter_dispatch(self, klass, ext): + with tm.ensure_clean(ext) as path: + writer = ExcelWriter(path) + if ext == ".xlsx" and td.safe_import("xlsxwriter"): + # xlsxwriter has preference over openpyxl if both installed + assert isinstance(writer, _XlsxWriter) + else: + assert isinstance(writer, klass) + + def test_ExcelWriter_dispatch_raises(self): + with pytest.raises(ValueError, match="No engine"): + ExcelWriter("nothing") + + def test_register_writer(self): + # some awkward mocking to test out dispatch and such actually works + called_save = [] + called_write_cells = [] + + class DummyClass(ExcelWriter): + called_save = False + called_write_cells = False + supported_extensions = ["xlsx", "xls"] + engine = "dummy" + + def save(self): + called_save.append(True) + + def write_cells(self, *args, **kwargs): + called_write_cells.append(True) + + def check_called(func): + func() + assert len(called_save) >= 1 + assert len(called_write_cells) >= 1 + del called_save[:] + del called_write_cells[:] + + with pd.option_context("io.excel.xlsx.writer", "dummy"): + register_writer(DummyClass) + writer = ExcelWriter("something.xlsx") + assert isinstance(writer, DummyClass) + df = tm.makeCustomDataframe(1, 1) + check_called(lambda: df.to_excel("something.xlsx")) + check_called(lambda: df.to_excel("something.xls", engine="dummy")) + + +@td.skip_if_no("xlrd") +@td.skip_if_no("openpyxl") +class TestFSPath: + def test_excelfile_fspath(self): + with tm.ensure_clean("foo.xlsx") as path: + df = DataFrame({"A": [1, 2]}) + df.to_excel(path) + xl = ExcelFile(path) + result = os.fspath(xl) + assert result == path + + def test_excelwriter_fspath(self): + with tm.ensure_clean("foo.xlsx") as path: + writer = ExcelWriter(path) + assert os.fspath(writer) == str(path) diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py new file mode 100644 index 00000000..cc7e2311 --- /dev/null +++ b/pandas/tests/io/excel/test_xlrd.py @@ -0,0 +1,43 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + +from pandas.io.excel import ExcelFile + +xlrd = pytest.importorskip("xlrd") +xlwt = pytest.importorskip("xlwt") + + +@pytest.fixture(autouse=True) +def skip_ods_and_xlsb_files(read_ext): + if read_ext == ".ods": + pytest.skip("Not valid for xlrd") + if read_ext == ".xlsb": + pytest.skip("Not valid for xlrd") + + +def test_read_xlrd_book(read_ext, frame): + df = frame + + engine = "xlrd" + sheet_name = "SheetA" + + with tm.ensure_clean(read_ext) as pth: + df.to_excel(pth, sheet_name) + book = xlrd.open_workbook(pth) + + with ExcelFile(book, engine=engine) as xl: + result = pd.read_excel(xl, sheet_name, index_col=0) + tm.assert_frame_equal(df, result) + + result = pd.read_excel(book, sheet_name=sheet_name, engine=engine, index_col=0) + tm.assert_frame_equal(df, result) + + +# TODO: test for openpyxl as well +def test_excel_table_sheet_by_index(datapath, read_ext): + path = datapath("io", "data", "excel", "test1{}".format(read_ext)) + with pd.ExcelFile(path) as excel: + with pytest.raises(xlrd.XLRDError): + pd.read_excel(excel, "asdf") diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py new file mode 100644 index 00000000..b6f79143 --- /dev/null +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -0,0 +1,64 @@ +import warnings + +import pytest + +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.excel import ExcelWriter + +xlsxwriter = pytest.importorskip("xlsxwriter") + +pytestmark = pytest.mark.parametrize("ext", [".xlsx"]) + + +def test_column_format(ext): + # Test that column formats are applied to cells. Test for issue #9167. + # Applicable to xlsxwriter only. + with warnings.catch_warnings(): + # Ignore the openpyxl lxml warning. + warnings.simplefilter("ignore") + openpyxl = pytest.importorskip("openpyxl") + + with tm.ensure_clean(ext) as path: + frame = DataFrame({"A": [123456, 123456], "B": [123456, 123456]}) + + writer = ExcelWriter(path) + frame.to_excel(writer) + + # Add a number format to col B and ensure it is applied to cells. + num_format = "#,##0" + write_workbook = writer.book + write_worksheet = write_workbook.worksheets()[0] + col_format = write_workbook.add_format({"num_format": num_format}) + write_worksheet.set_column("B:B", None, col_format) + writer.save() + + read_workbook = openpyxl.load_workbook(path) + try: + read_worksheet = read_workbook["Sheet1"] + except TypeError: + # compat + read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1") + + # Get the number format from the cell. + try: + cell = read_worksheet["B2"] + except TypeError: + # compat + cell = read_worksheet.cell("B2") + + try: + read_num_format = cell.number_format + except AttributeError: + read_num_format = cell.style.number_format._format_code + + assert read_num_format == num_format + + +def test_write_append_mode_raises(ext): + msg = "Append mode is not supported with xlsxwriter!" + + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=msg): + ExcelWriter(f, engine="xlsxwriter", mode="a") diff --git a/pandas/tests/io/excel/test_xlwt.py b/pandas/tests/io/excel/test_xlwt.py new file mode 100644 index 00000000..01feab08 --- /dev/null +++ b/pandas/tests/io/excel/test_xlwt.py @@ -0,0 +1,67 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, MultiIndex +import pandas._testing as tm + +from pandas.io.excel import ExcelWriter, _XlwtWriter + +xlwt = pytest.importorskip("xlwt") + +pytestmark = pytest.mark.parametrize("ext,", [".xls"]) + + +def test_excel_raise_error_on_multiindex_columns_and_no_index(ext): + # MultiIndex as columns is not yet implemented 9794 + cols = MultiIndex.from_tuples( + [("site", ""), ("2014", "height"), ("2014", "weight")] + ) + df = DataFrame(np.random.randn(10, 3), columns=cols) + with pytest.raises(NotImplementedError): + with tm.ensure_clean(ext) as path: + df.to_excel(path, index=False) + + +def test_excel_multiindex_columns_and_index_true(ext): + cols = MultiIndex.from_tuples( + [("site", ""), ("2014", "height"), ("2014", "weight")] + ) + df = pd.DataFrame(np.random.randn(10, 3), columns=cols) + with tm.ensure_clean(ext) as path: + df.to_excel(path, index=True) + + +def test_excel_multiindex_index(ext): + # MultiIndex as index works so assert no error #9794 + cols = MultiIndex.from_tuples( + [("site", ""), ("2014", "height"), ("2014", "weight")] + ) + df = DataFrame(np.random.randn(3, 10), index=cols) + with tm.ensure_clean(ext) as path: + df.to_excel(path, index=False) + + +def test_to_excel_styleconverter(ext): + hstyle = { + "font": {"bold": True}, + "borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"}, + "alignment": {"horizontal": "center", "vertical": "top"}, + } + + xls_style = _XlwtWriter._convert_to_style(hstyle) + assert xls_style.font.bold + assert xlwt.Borders.THIN == xls_style.borders.top + assert xlwt.Borders.THIN == xls_style.borders.right + assert xlwt.Borders.THIN == xls_style.borders.bottom + assert xlwt.Borders.THIN == xls_style.borders.left + assert xlwt.Alignment.HORZ_CENTER == xls_style.alignment.horz + assert xlwt.Alignment.VERT_TOP == xls_style.alignment.vert + + +def test_write_append_mode_raises(ext): + msg = "Append mode is not supported with xlwt!" + + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=msg): + ExcelWriter(f, engine="xlwt", mode="a") diff --git a/pandas/tests/io/formats/__init__.py b/pandas/tests/io/formats/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/io/formats/data/html/datetime64_hourformatter.html b/pandas/tests/io/formats/data/html/datetime64_hourformatter.html new file mode 100644 index 00000000..c92b7218 --- /dev/null +++ b/pandas/tests/io/formats/data/html/datetime64_hourformatter.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
    hod
    010:10
    112:12
    diff --git a/pandas/tests/io/formats/data/html/datetime64_monthformatter.html b/pandas/tests/io/formats/data/html/datetime64_monthformatter.html new file mode 100644 index 00000000..589c8fba --- /dev/null +++ b/pandas/tests/io/formats/data/html/datetime64_monthformatter.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
    months
    02016-01
    12016-02
    diff --git a/pandas/tests/io/formats/data/html/escape_disabled.html b/pandas/tests/io/formats/data/html/escape_disabled.html new file mode 100644 index 00000000..260a04d2 --- /dev/null +++ b/pandas/tests/io/formats/data/html/escape_disabled.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + +
    co + co>l2
    str + boldbold
    stri>ng2 &boldbold
    diff --git a/pandas/tests/io/formats/data/html/escaped.html b/pandas/tests/io/formats/data/html/escaped.html new file mode 100644 index 00000000..d68bdd3d --- /dev/null +++ b/pandas/tests/io/formats/data/html/escaped.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    co<l1co>l2
    str<ing1 &amp;<type 'str'><type 'str'>
    stri>ng2 &amp;<type 'str'><type 'str'>
    diff --git a/pandas/tests/io/formats/data/html/gh12031_expected_output.html b/pandas/tests/io/formats/data/html/gh12031_expected_output.html new file mode 100644 index 00000000..896e154a --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh12031_expected_output.html @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + +
    A
    06,0
    13,1
    22,2
    diff --git a/pandas/tests/io/formats/data/html/gh14882_expected_output_1.html b/pandas/tests/io/formats/data/html/gh14882_expected_output_1.html new file mode 100644 index 00000000..4cfd8785 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh14882_expected_output_1.html @@ -0,0 +1,274 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    n
    abc
    1001010
    21
    32
    43
    54
    65
    76
    2017
    28
    39
    410
    511
    612
    713
    30114
    215
    316
    417
    518
    619
    720
    20010121
    222
    323
    424
    525
    626
    727
    20128
    229
    ......
    633
    734
    30135
    236
    337
    438
    539
    640
    741
    30010142
    243
    344
    445
    546
    647
    748
    20149
    250
    351
    452
    553
    654
    755
    30156
    257
    358
    459
    560
    661
    762
    diff --git a/pandas/tests/io/formats/data/html/gh14882_expected_output_2.html b/pandas/tests/io/formats/data/html/gh14882_expected_output_2.html new file mode 100644 index 00000000..d4e7fd9b --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh14882_expected_output_2.html @@ -0,0 +1,258 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    n
    abc
    1001010
    21
    32
    43
    54
    65
    76
    2017
    28
    39
    410
    511
    612
    713
    30114
    215
    316
    417
    518
    619
    720
    20010121
    222
    323
    424
    525
    626
    727
    .........
    30135
    236
    337
    438
    539
    640
    741
    30010142
    243
    344
    445
    546
    647
    748
    20149
    250
    351
    452
    553
    654
    755
    30156
    257
    358
    459
    560
    661
    762
    diff --git a/pandas/tests/io/formats/data/html/gh14998_expected_output.html b/pandas/tests/io/formats/data/html/gh14998_expected_output.html new file mode 100644 index 00000000..62b96493 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh14998_expected_output.html @@ -0,0 +1,12 @@ + + + + + + + + + + + +
    A
    1
    diff --git a/pandas/tests/io/formats/data/html/gh15019_expected_output.html b/pandas/tests/io/formats/data/html/gh15019_expected_output.html new file mode 100644 index 00000000..5fb9d960 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh15019_expected_output.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    1.7640520.400157
    0.9787382.240893
    ......
    0.950088-0.151357
    -0.1032190.410599
    diff --git a/pandas/tests/io/formats/data/html/gh21625_expected_output.html b/pandas/tests/io/formats/data/html/gh21625_expected_output.html new file mode 100644 index 00000000..a87e4ca3 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh21625_expected_output.html @@ -0,0 +1,14 @@ + + + + + + + + + + + + + +
    x
    00.200
    \ No newline at end of file diff --git a/pandas/tests/io/formats/data/html/gh22270_expected_output.html b/pandas/tests/io/formats/data/html/gh22270_expected_output.html new file mode 100644 index 00000000..6694c43d --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh22270_expected_output.html @@ -0,0 +1,14 @@ + + + + + + + + + + + + + +
    x
    0100
    \ No newline at end of file diff --git a/pandas/tests/io/formats/data/html/gh22579_expected_output.html b/pandas/tests/io/formats/data/html/gh22579_expected_output.html new file mode 100644 index 00000000..425b0f91 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh22579_expected_output.html @@ -0,0 +1,76 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ab
    cdcd
    0101010
    1111111
    2121212
    3131313
    4141414
    5151515
    6161616
    7171717
    8181818
    9191919
    diff --git a/pandas/tests/io/formats/data/html/gh22783_expected_output.html b/pandas/tests/io/formats/data/html/gh22783_expected_output.html new file mode 100644 index 00000000..107db43c --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh22783_expected_output.html @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...34
    1.7640520.400157...2.2408931.867558
    -0.9772780.950088...-0.1032190.410599
    diff --git a/pandas/tests/io/formats/data/html/gh22783_named_columns_index.html b/pandas/tests/io/formats/data/html/gh22783_named_columns_index.html new file mode 100644 index 00000000..55ab2909 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh22783_named_columns_index.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01...34
    1.7640520.400157...2.2408931.867558
    -0.9772780.950088...-0.1032190.410599
    diff --git a/pandas/tests/io/formats/data/html/gh6131_expected_output.html b/pandas/tests/io/formats/data/html/gh6131_expected_output.html new file mode 100644 index 00000000..cb3a3363 --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh6131_expected_output.html @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    d
    aaa...ac
    bc
    baca1.0...NaN
    bbcbNaN...NaN
    bcccNaN...3.0
    diff --git a/pandas/tests/io/formats/data/html/gh8452_expected_output.html b/pandas/tests/io/formats/data/html/gh8452_expected_output.html new file mode 100644 index 00000000..81ce397a --- /dev/null +++ b/pandas/tests/io/formats/data/html/gh8452_expected_output.html @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ab
    cdcd
    0353
    1464
    diff --git a/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_12.html b/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_12.html new file mode 100644 index 00000000..4eb3f531 --- /dev/null +++ b/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_12.html @@ -0,0 +1,70 @@ +

    diff --git a/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_4.html b/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_4.html new file mode 100644 index 00000000..2b1d97ae --- /dev/null +++ b/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_4.html @@ -0,0 +1,46 @@ +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    00
    11
    ......
    5959
    6060
    +

    61 rows × 1 columns

    +
    diff --git a/pandas/tests/io/formats/data/html/html_repr_max_rows_12_min_rows_None.html b/pandas/tests/io/formats/data/html/html_repr_max_rows_12_min_rows_None.html new file mode 100644 index 00000000..a539e5a4 --- /dev/null +++ b/pandas/tests/io/formats/data/html/html_repr_max_rows_12_min_rows_None.html @@ -0,0 +1,78 @@ +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    00
    11
    22
    33
    44
    55
    ......
    5555
    5656
    5757
    5858
    5959
    6060
    +

    61 rows × 1 columns

    +
    diff --git a/pandas/tests/io/formats/data/html/html_repr_max_rows_None_min_rows_12.html b/pandas/tests/io/formats/data/html/html_repr_max_rows_None_min_rows_12.html new file mode 100644 index 00000000..3e680a50 --- /dev/null +++ b/pandas/tests/io/formats/data/html/html_repr_max_rows_None_min_rows_12.html @@ -0,0 +1,269 @@ +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    00
    11
    22
    33
    44
    55
    66
    77
    88
    99
    1010
    1111
    1212
    1313
    1414
    1515
    1616
    1717
    1818
    1919
    2020
    2121
    2222
    2323
    2424
    2525
    2626
    2727
    2828
    2929
    3030
    3131
    3232
    3333
    3434
    3535
    3636
    3737
    3838
    3939
    4040
    4141
    4242
    4343
    4444
    4545
    4646
    4747
    4848
    4949
    5050
    5151
    5252
    5353
    5454
    5555
    5656
    5757
    5858
    5959
    6060
    +
    diff --git a/pandas/tests/io/formats/data/html/html_repr_min_rows_default_no_truncation.html b/pandas/tests/io/formats/data/html/html_repr_min_rows_default_no_truncation.html new file mode 100644 index 00000000..10f6247e --- /dev/null +++ b/pandas/tests/io/formats/data/html/html_repr_min_rows_default_no_truncation.html @@ -0,0 +1,105 @@ +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    00
    11
    22
    33
    44
    55
    66
    77
    88
    99
    1010
    1111
    1212
    1313
    1414
    1515
    1616
    1717
    1818
    1919
    +
    diff --git a/pandas/tests/io/formats/data/html/html_repr_min_rows_default_truncated.html b/pandas/tests/io/formats/data/html/html_repr_min_rows_default_truncated.html new file mode 100644 index 00000000..4eb3f531 --- /dev/null +++ b/pandas/tests/io/formats/data/html/html_repr_min_rows_default_truncated.html @@ -0,0 +1,70 @@ +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    00
    11
    22
    33
    44
    ......
    5656
    5757
    5858
    5959
    6060
    +

    61 rows × 1 columns

    +
    diff --git a/pandas/tests/io/formats/data/html/index_1.html b/pandas/tests/io/formats/data/html/index_1.html new file mode 100644 index 00000000..41221865 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_1.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ABC
    foo11.2one
    bar23.4two
    baz35.6NaN
    diff --git a/pandas/tests/io/formats/data/html/index_2.html b/pandas/tests/io/formats/data/html/index_2.html new file mode 100644 index 00000000..a86ba80a --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_2.html @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +
    ABC
    11.2one
    23.4two
    35.6NaN
    diff --git a/pandas/tests/io/formats/data/html/index_3.html b/pandas/tests/io/formats/data/html/index_3.html new file mode 100644 index 00000000..02edba49 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_3.html @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ABC
    idx
    foo11.2one
    bar23.4two
    baz35.6NaN
    diff --git a/pandas/tests/io/formats/data/html/index_4.html b/pandas/tests/io/formats/data/html/index_4.html new file mode 100644 index 00000000..0d1bf9ff --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_4.html @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ABC
    foocar11.2one
    bike23.4two
    barcar35.6NaN
    diff --git a/pandas/tests/io/formats/data/html/index_5.html b/pandas/tests/io/formats/data/html/index_5.html new file mode 100644 index 00000000..c5ac12ec --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_5.html @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ABC
    idx1idx2
    foocar11.2one
    bike23.4two
    barcar35.6NaN
    diff --git a/pandas/tests/io/formats/data/html/index_formatter.html b/pandas/tests/io/formats/data/html/index_formatter.html new file mode 100644 index 00000000..7a2f8a9f --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_formatter.html @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fooNone
    a01
    b23
    c45
    d67
    diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_named_multi.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_named_multi.html new file mode 100644 index 00000000..817b54d7 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_multi_columns_named_multi.html @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name.0a
    columns.name.1bc
    index.name.0index.name.1
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_named_standard.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_named_standard.html new file mode 100644 index 00000000..e85965f1 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_multi_columns_named_standard.html @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01
    index.name.0index.name.1
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_none.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_none.html new file mode 100644 index 00000000..8c41d2e2 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_multi_columns_none.html @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + +
    index.name.0index.name.1
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_multi.html new file mode 100644 index 00000000..7af63e89 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_multi.html @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    bc
    index.name.0index.name.1
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_standard.html new file mode 100644 index 00000000..2f783786 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_multi_columns_unnamed_standard.html @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    index.name.0index.name.1
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_named_standard_columns_named_multi.html b/pandas/tests/io/formats/data/html/index_named_standard_columns_named_multi.html new file mode 100644 index 00000000..ca9b8bd8 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_standard_columns_named_multi.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name.0a
    columns.name.1bc
    index.name
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_named_standard_columns_named_standard.html b/pandas/tests/io/formats/data/html/index_named_standard_columns_named_standard.html new file mode 100644 index 00000000..6478c99a --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_standard_columns_named_standard.html @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01
    index.name
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_named_standard_columns_none.html b/pandas/tests/io/formats/data/html/index_named_standard_columns_none.html new file mode 100644 index 00000000..432d8e06 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_standard_columns_none.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    index.name
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_multi.html new file mode 100644 index 00000000..d7660872 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_multi.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    bc
    index.name
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_standard.html new file mode 100644 index 00000000..4810f660 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_named_standard_columns_unnamed_standard.html @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    index.name
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_none_columns_named_multi.html b/pandas/tests/io/formats/data/html/index_none_columns_named_multi.html new file mode 100644 index 00000000..e111f55b --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_none_columns_named_multi.html @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name.0a
    columns.name.1bc
    00
    00
    diff --git a/pandas/tests/io/formats/data/html/index_none_columns_named_standard.html b/pandas/tests/io/formats/data/html/index_none_columns_named_standard.html new file mode 100644 index 00000000..d3a9ba01 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_none_columns_named_standard.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    columns.name01
    00
    00
    diff --git a/pandas/tests/io/formats/data/html/index_none_columns_none.html b/pandas/tests/io/formats/data/html/index_none_columns_none.html new file mode 100644 index 00000000..44899858 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_none_columns_none.html @@ -0,0 +1,12 @@ + + + + + + + + + + + +
    00
    00
    diff --git a/pandas/tests/io/formats/data/html/index_none_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/index_none_columns_unnamed_multi.html new file mode 100644 index 00000000..b21a6183 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_none_columns_unnamed_multi.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    a
    bc
    00
    00
    diff --git a/pandas/tests/io/formats/data/html/index_none_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/index_none_columns_unnamed_standard.html new file mode 100644 index 00000000..1249fa56 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_none_columns_unnamed_standard.html @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + + +
    01
    00
    00
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_multi.html b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_multi.html new file mode 100644 index 00000000..95c38c9c --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_multi.html @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name.0a
    columns.name.1bc
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_standard.html b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_standard.html new file mode 100644 index 00000000..9583a21f --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_named_standard.html @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_none.html b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_none.html new file mode 100644 index 00000000..81da7c36 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_none.html @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + +
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_multi.html new file mode 100644 index 00000000..f6202590 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_multi.html @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    bc
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_standard.html new file mode 100644 index 00000000..2ca18c28 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_multi_columns_unnamed_standard.html @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + +
    01
    ab00
    c00
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_multi.html b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_multi.html new file mode 100644 index 00000000..ed3360f8 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_multi.html @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name.0a
    columns.name.1bc
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_standard.html b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_standard.html new file mode 100644 index 00000000..54da0385 --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_named_standard.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    columns.name01
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_none.html b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_none.html new file mode 100644 index 00000000..3d958afe --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_none.html @@ -0,0 +1,14 @@ + + + + + + + + + + + + + +
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_multi.html new file mode 100644 index 00000000..b57fafbe --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_multi.html @@ -0,0 +1,25 @@ + + + + + + + + + + + + + + + + + + + + + + + + +
    a
    bc
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_standard.html new file mode 100644 index 00000000..235ca61a --- /dev/null +++ b/pandas/tests/io/formats/data/html/index_unnamed_standard_columns_unnamed_standard.html @@ -0,0 +1,21 @@ + + + + + + + + + + + + + + + + + + + + +
    01
    000
    100
    diff --git a/pandas/tests/io/formats/data/html/justify.html b/pandas/tests/io/formats/data/html/justify.html new file mode 100644 index 00000000..33e4b571 --- /dev/null +++ b/pandas/tests/io/formats/data/html/justify.html @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ABC
    061223442
    13000020
    22700001
    diff --git a/pandas/tests/io/formats/data/html/multiindex_1.html b/pandas/tests/io/formats/data/html/multiindex_1.html new file mode 100644 index 00000000..88db1775 --- /dev/null +++ b/pandas/tests/io/formats/data/html/multiindex_1.html @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    CL001
    CL10101
    0abcd
    1efgh
    diff --git a/pandas/tests/io/formats/data/html/multiindex_2.html b/pandas/tests/io/formats/data/html/multiindex_2.html new file mode 100644 index 00000000..289ea220 --- /dev/null +++ b/pandas/tests/io/formats/data/html/multiindex_2.html @@ -0,0 +1,34 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    0123
    0101
    0abcd
    1efgh
    diff --git a/pandas/tests/io/formats/data/html/multiindex_sparsify_1.html b/pandas/tests/io/formats/data/html/multiindex_sparsify_1.html new file mode 100644 index 00000000..5b5bcf9c --- /dev/null +++ b/pandas/tests/io/formats/data/html/multiindex_sparsify_1.html @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    foo
    0001
    123
    1045
    167
    diff --git a/pandas/tests/io/formats/data/html/multiindex_sparsify_2.html b/pandas/tests/io/formats/data/html/multiindex_sparsify_2.html new file mode 100644 index 00000000..fd4c6bd2 --- /dev/null +++ b/pandas/tests/io/formats/data/html/multiindex_sparsify_2.html @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    foo01
    00
    foo
    0001
    123
    1045
    167
    diff --git a/pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_1.html b/pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_1.html new file mode 100644 index 00000000..42a5ea5e --- /dev/null +++ b/pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_1.html @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01
    foo
    0001
    0123
    1045
    1167
    diff --git a/pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_2.html b/pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_2.html new file mode 100644 index 00000000..2be61392 --- /dev/null +++ b/pandas/tests/io/formats/data/html/multiindex_sparsify_false_multi_sparse_2.html @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    foo01
    00
    foo
    0001
    0123
    1045
    1167
    diff --git a/pandas/tests/io/formats/data/html/render_links_false.html b/pandas/tests/io/formats/data/html/render_links_false.html new file mode 100644 index 00000000..6feb403d --- /dev/null +++ b/pandas/tests/io/formats/data/html/render_links_false.html @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + +
    foobarNone
    00https://pandas.pydata.org/?q1=a&q2=bpydata.org
    10www.pydata.orgpydata.org
    diff --git a/pandas/tests/io/formats/data/html/render_links_true.html b/pandas/tests/io/formats/data/html/render_links_true.html new file mode 100644 index 00000000..3eb53f31 --- /dev/null +++ b/pandas/tests/io/formats/data/html/render_links_true.html @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + +
    foobarNone
    00https://pandas.pydata.org/?q1=a&q2=bpydata.org
    10www.pydata.orgpydata.org
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_multi.html new file mode 100644 index 00000000..e66d3c81 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_multi.html @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fooa...b
    c...d
    bazef...ef
    foobaz
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_standard.html new file mode 100644 index 00000000..536b3711 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_named_standard.html @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01...67
    foobaz
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_none.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_none.html new file mode 100644 index 00000000..0f262495 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_none.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    foobaz
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_multi.html new file mode 100644 index 00000000..d472cdec --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_multi.html @@ -0,0 +1,88 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a...b
    c...d
    ef...ef
    foobaz
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_standard.html new file mode 100644 index 00000000..31c71ca3 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_multi_columns_unnamed_standard.html @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...67
    foobaz
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_multi.html new file mode 100644 index 00000000..779e84f6 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_multi.html @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fooa...b
    c...d
    bazef...ef
    index.name
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_standard.html new file mode 100644 index 00000000..b86454f5 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_named_standard.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01...67
    index.name
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_none.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_none.html new file mode 100644 index 00000000..d294a507 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_none.html @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    index.name
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_multi.html new file mode 100644 index 00000000..24b776e1 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_multi.html @@ -0,0 +1,74 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a...b
    c...d
    ef...ef
    index.name
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_standard.html new file mode 100644 index 00000000..a0ca9602 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_named_standard_columns_unnamed_standard.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...67
    index.name
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_multi.html new file mode 100644 index 00000000..6640db4c --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_multi.html @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fooa...b
    c...d
    bazef...ef
    01...67
    89...1415
    ..................
    4849...5455
    5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_standard.html new file mode 100644 index 00000000..364a0b98 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_named_standard.html @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01...67
    01...67
    89...1415
    ..................
    4849...5455
    5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_none.html b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_none.html new file mode 100644 index 00000000..e2af1ba4 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_none.html @@ -0,0 +1,39 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...67
    89...1415
    ...............
    4849...5455
    5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_multi.html new file mode 100644 index 00000000..8c9a9e24 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_multi.html @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a...b
    c...d
    ef...ef
    01...67
    89...1415
    ...............
    4849...5455
    5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_standard.html new file mode 100644 index 00000000..b9dcf526 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_none_columns_unnamed_standard.html @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...67
    01...67
    89...1415
    ...............
    4849...5455
    5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_multi.html new file mode 100644 index 00000000..0590d0de --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_multi.html @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fooa...b
    c...d
    bazef...ef
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_standard.html new file mode 100644 index 00000000..28a2d964 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_named_standard.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01...67
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_none.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_none.html new file mode 100644 index 00000000..387ac51b --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_none.html @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_multi.html new file mode 100644 index 00000000..30cd8590 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_multi.html @@ -0,0 +1,78 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a...b
    c...d
    ef...ef
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_standard.html new file mode 100644 index 00000000..81edece2 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_multi_columns_unnamed_standard.html @@ -0,0 +1,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...67
    ace01...67
    f89...1415
    ........................
    bde4849...5455
    f5657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_multi.html new file mode 100644 index 00000000..2acacfed --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_multi.html @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    fooa...b
    c...d
    bazef...ef
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_standard.html new file mode 100644 index 00000000..c9bacdbd --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_named_standard.html @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    columns.name01...67
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_none.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_none.html new file mode 100644 index 00000000..f2696f7d --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_none.html @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_multi.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_multi.html new file mode 100644 index 00000000..37e73152 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_multi.html @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    a...b
    c...d
    ef...ef
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_standard.html b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_standard.html new file mode 100644 index 00000000..3241ff41 --- /dev/null +++ b/pandas/tests/io/formats/data/html/trunc_df_index_unnamed_standard_columns_unnamed_standard.html @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...67
    001...67
    189...1415
    ..................
    64849...5455
    75657...6263
    diff --git a/pandas/tests/io/formats/data/html/truncate.html b/pandas/tests/io/formats/data/html/truncate.html new file mode 100644 index 00000000..a5eb8c5c --- /dev/null +++ b/pandas/tests/io/formats/data/html/truncate.html @@ -0,0 +1,86 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    01...1819
    2001-01-01NaNNaN...NaNNaN
    2001-01-02NaNNaN...NaNNaN
    2001-01-03NaNNaN...NaNNaN
    2001-01-04NaNNaN...NaNNaN
    ..................
    2001-01-17NaNNaN...NaNNaN
    2001-01-18NaNNaN...NaNNaN
    2001-01-19NaNNaN...NaNNaN
    2001-01-20NaNNaN...NaNNaN
    diff --git a/pandas/tests/io/formats/data/html/truncate_formatter.html b/pandas/tests/io/formats/data/html/truncate_formatter.html new file mode 100644 index 00000000..7615ef89 --- /dev/null +++ b/pandas/tests/io/formats/data/html/truncate_formatter.html @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    A...D
    01_mod...4
    15_mod...8
    29_mod...12
    313_mod...16
    diff --git a/pandas/tests/io/formats/data/html/truncate_multi_index.html b/pandas/tests/io/formats/data/html/truncate_multi_index.html new file mode 100644 index 00000000..8a295d66 --- /dev/null +++ b/pandas/tests/io/formats/data/html/truncate_multi_index.html @@ -0,0 +1,101 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    barbaz...fooqux
    onetwoone...twoonetwo
    baroneNaNNaNNaN...NaNNaNNaN
    twoNaNNaNNaN...NaNNaNNaN
    bazoneNaNNaNNaN...NaNNaNNaN
    ...........................
    footwoNaNNaNNaN...NaNNaNNaN
    quxoneNaNNaNNaN...NaNNaNNaN
    twoNaNNaNNaN...NaNNaNNaN
    diff --git a/pandas/tests/io/formats/data/html/truncate_multi_index_sparse_off.html b/pandas/tests/io/formats/data/html/truncate_multi_index_sparse_off.html new file mode 100644 index 00000000..6a7e1b5a --- /dev/null +++ b/pandas/tests/io/formats/data/html/truncate_multi_index_sparse_off.html @@ -0,0 +1,105 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    barbarbaz...fooquxqux
    onetwoone...twoonetwo
    baroneNaNNaNNaN...NaNNaNNaN
    bartwoNaNNaNNaN...NaNNaNNaN
    bazoneNaNNaNNaN...NaNNaNNaN
    ...........................
    footwoNaNNaNNaN...NaNNaNNaN
    quxoneNaNNaNNaN...NaNNaNNaN
    quxtwoNaNNaNNaN...NaNNaNNaN
    diff --git a/pandas/tests/io/formats/data/html/unicode_1.html b/pandas/tests/io/formats/data/html/unicode_1.html new file mode 100644 index 00000000..72b81018 --- /dev/null +++ b/pandas/tests/io/formats/data/html/unicode_1.html @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    σ
    00.0
    11.0
    22.0
    33.0
    44.0
    55.0
    66.0
    77.0
    88.0
    99.0
    diff --git a/pandas/tests/io/formats/data/html/unicode_2.html b/pandas/tests/io/formats/data/html/unicode_2.html new file mode 100644 index 00000000..79c08809 --- /dev/null +++ b/pandas/tests/io/formats/data/html/unicode_2.html @@ -0,0 +1,14 @@ + + + + + + + + + + + + + +
    A
    0σ
    diff --git a/pandas/tests/io/formats/data/html/with_classes.html b/pandas/tests/io/formats/data/html/with_classes.html new file mode 100644 index 00000000..8cee3f0c --- /dev/null +++ b/pandas/tests/io/formats/data/html/with_classes.html @@ -0,0 +1,9 @@ + + + + + + + + +
    diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py new file mode 100644 index 00000000..e56d1488 --- /dev/null +++ b/pandas/tests/io/formats/test_console.py @@ -0,0 +1,72 @@ +import locale + +import pytest + +from pandas._config import detect_console_encoding + + +class MockEncoding: # TODO(py27): replace with mock + """ + Used to add a side effect when accessing the 'encoding' property. If the + side effect is a str in nature, the value will be returned. Otherwise, the + side effect should be an exception that will be raised. + """ + + def __init__(self, encoding): + super().__init__() + self.val = encoding + + @property + def encoding(self): + return self.raise_or_return(self.val) + + @staticmethod + def raise_or_return(val): + if isinstance(val, str): + return val + else: + raise val + + +@pytest.mark.parametrize("empty,filled", [["stdin", "stdout"], ["stdout", "stdin"]]) +def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled): + # Ensures that when sys.stdout.encoding or sys.stdin.encoding is used when + # they have values filled. + # GH 21552 + with monkeypatch.context() as context: + context.setattr("sys.{}".format(empty), MockEncoding("")) + context.setattr("sys.{}".format(filled), MockEncoding(filled)) + assert detect_console_encoding() == filled + + +@pytest.mark.parametrize("encoding", [AttributeError, IOError, "ascii"]) +def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding): + # GH 21552 + with monkeypatch.context() as context: + context.setattr("locale.getpreferredencoding", lambda: "foo") + context.setattr("sys.stdout", MockEncoding(encoding)) + assert detect_console_encoding() == "foo" + + +@pytest.mark.parametrize( + "std,locale", + [ + ["ascii", "ascii"], + ["ascii", locale.Error], + [AttributeError, "ascii"], + [AttributeError, locale.Error], + [IOError, "ascii"], + [IOError, locale.Error], + ], +) +def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale): + # When both the stdout/stdin encoding and locale preferred encoding checks + # fail (or return 'ascii', we should default to the sys default encoding. + # GH 21552 + with monkeypatch.context() as context: + context.setattr( + "locale.getpreferredencoding", lambda: MockEncoding.raise_or_return(locale) + ) + context.setattr("sys.stdout", MockEncoding(std)) + context.setattr("sys.getdefaultencoding", lambda: "sysDefaultEncoding") + assert detect_console_encoding() == "sysDefaultEncoding" diff --git a/pandas/tests/io/formats/test_css.py b/pandas/tests/io/formats/test_css.py new file mode 100644 index 00000000..7008cef7 --- /dev/null +++ b/pandas/tests/io/formats/test_css.py @@ -0,0 +1,232 @@ +import pytest + +import pandas._testing as tm + +from pandas.io.formats.css import CSSResolver, CSSWarning + + +def assert_resolves(css, props, inherited=None): + resolve = CSSResolver() + actual = resolve(css, inherited=inherited) + assert props == actual + + +def assert_same_resolution(css1, css2, inherited=None): + resolve = CSSResolver() + resolved1 = resolve(css1, inherited=inherited) + resolved2 = resolve(css2, inherited=inherited) + assert resolved1 == resolved2 + + +@pytest.mark.parametrize( + "name,norm,abnorm", + [ + ( + "whitespace", + "hello: world; foo: bar", + " \t hello \t :\n world \n ; \n foo: \tbar\n\n", + ), + ("case", "hello: world; foo: bar", "Hello: WORLD; foO: bar"), + ("empty-decl", "hello: world; foo: bar", "; hello: world;; foo: bar;\n; ;"), + ("empty-list", "", ";"), + ], +) +def test_css_parse_normalisation(name, norm, abnorm): + assert_same_resolution(norm, abnorm) + + +@pytest.mark.parametrize( + "invalid_css,remainder", + [ + # No colon + ("hello-world", ""), + ("border-style: solid; hello-world", "border-style: solid"), + ( + "border-style: solid; hello-world; font-weight: bold", + "border-style: solid; font-weight: bold", + ), + # Unclosed string fail + # Invalid size + ("font-size: blah", "font-size: 1em"), + ("font-size: 1a2b", "font-size: 1em"), + ("font-size: 1e5pt", "font-size: 1em"), + ("font-size: 1+6pt", "font-size: 1em"), + ("font-size: 1unknownunit", "font-size: 1em"), + ("font-size: 10", "font-size: 1em"), + ("font-size: 10 pt", "font-size: 1em"), + ], +) +def test_css_parse_invalid(invalid_css, remainder): + with tm.assert_produces_warning(CSSWarning): + assert_same_resolution(invalid_css, remainder) + + # TODO: we should be checking that in other cases no warnings are raised + + +@pytest.mark.parametrize( + "shorthand,expansions", + [ + ("margin", ["margin-top", "margin-right", "margin-bottom", "margin-left"]), + ("padding", ["padding-top", "padding-right", "padding-bottom", "padding-left"]), + ( + "border-width", + [ + "border-top-width", + "border-right-width", + "border-bottom-width", + "border-left-width", + ], + ), + ( + "border-color", + [ + "border-top-color", + "border-right-color", + "border-bottom-color", + "border-left-color", + ], + ), + ( + "border-style", + [ + "border-top-style", + "border-right-style", + "border-bottom-style", + "border-left-style", + ], + ), + ], +) +def test_css_side_shorthands(shorthand, expansions): + top, right, bottom, left = expansions + + assert_resolves( + f"{shorthand}: 1pt", {top: "1pt", right: "1pt", bottom: "1pt", left: "1pt"}, + ) + + assert_resolves( + f"{shorthand}: 1pt 4pt", {top: "1pt", right: "4pt", bottom: "1pt", left: "4pt"}, + ) + + assert_resolves( + f"{shorthand}: 1pt 4pt 2pt", + {top: "1pt", right: "4pt", bottom: "2pt", left: "4pt"}, + ) + + assert_resolves( + f"{shorthand}: 1pt 4pt 2pt 0pt", + {top: "1pt", right: "4pt", bottom: "2pt", left: "0pt"}, + ) + + with tm.assert_produces_warning(CSSWarning): + assert_resolves(f"{shorthand}: 1pt 1pt 1pt 1pt 1pt", {}) + + +@pytest.mark.parametrize( + "style,inherited,equiv", + [ + ("margin: 1px; margin: 2px", "", "margin: 2px"), + ("margin: 1px", "margin: 2px", "margin: 1px"), + ("margin: 1px; margin: inherit", "margin: 2px", "margin: 2px"), + ( + "margin: 1px; margin-top: 2px", + "", + "margin-left: 1px; margin-right: 1px; " + + "margin-bottom: 1px; margin-top: 2px", + ), + ("margin-top: 2px", "margin: 1px", "margin: 1px; margin-top: 2px"), + ("margin: 1px", "margin-top: 2px", "margin: 1px"), + ( + "margin: 1px; margin-top: inherit", + "margin: 2px", + "margin: 1px; margin-top: 2px", + ), + ], +) +def test_css_precedence(style, inherited, equiv): + resolve = CSSResolver() + inherited_props = resolve(inherited) + style_props = resolve(style, inherited=inherited_props) + equiv_props = resolve(equiv) + assert style_props == equiv_props + + +@pytest.mark.parametrize( + "style,equiv", + [ + ( + "margin: 1px; margin-top: inherit", + "margin-bottom: 1px; margin-right: 1px; margin-left: 1px", + ), + ("margin-top: inherit", ""), + ("margin-top: initial", ""), + ], +) +def test_css_none_absent(style, equiv): + assert_same_resolution(style, equiv) + + +@pytest.mark.parametrize( + "size,resolved", + [ + ("xx-small", "6pt"), + ("x-small", f"{7.5:f}pt"), + ("small", f"{9.6:f}pt"), + ("medium", "12pt"), + ("large", f"{13.5:f}pt"), + ("x-large", "18pt"), + ("xx-large", "24pt"), + ("8px", "6pt"), + ("1.25pc", "15pt"), + (".25in", "18pt"), + ("02.54cm", "72pt"), + ("25.4mm", "72pt"), + ("101.6q", "72pt"), + ("101.6q", "72pt"), + ], +) +@pytest.mark.parametrize("relative_to", [None, "16pt"]) # invariant to inherited size +def test_css_absolute_font_size(size, relative_to, resolved): + if relative_to is None: + inherited = None + else: + inherited = {"font-size": relative_to} + assert_resolves( + f"font-size: {size}", {"font-size": resolved}, inherited=inherited, + ) + + +@pytest.mark.parametrize( + "size,relative_to,resolved", + [ + ("1em", None, "12pt"), + ("1.0em", None, "12pt"), + ("1.25em", None, "15pt"), + ("1em", "16pt", "16pt"), + ("1.0em", "16pt", "16pt"), + ("1.25em", "16pt", "20pt"), + ("1rem", "16pt", "12pt"), + ("1.0rem", "16pt", "12pt"), + ("1.25rem", "16pt", "15pt"), + ("100%", None, "12pt"), + ("125%", None, "15pt"), + ("100%", "16pt", "16pt"), + ("125%", "16pt", "20pt"), + ("2ex", None, "12pt"), + ("2.0ex", None, "12pt"), + ("2.50ex", None, "15pt"), + ("inherit", "16pt", "16pt"), + ("smaller", None, "10pt"), + ("smaller", "18pt", "15pt"), + ("larger", None, f"{14.4:f}pt"), + ("larger", "15pt", "18pt"), + ], +) +def test_css_relative_font_size(size, relative_to, resolved): + if relative_to is None: + inherited = None + else: + inherited = {"font-size": relative_to} + assert_resolves( + f"font-size: {size}", {"font-size": resolved}, inherited=inherited, + ) diff --git a/pandas/tests/io/formats/test_eng_formatting.py b/pandas/tests/io/formats/test_eng_formatting.py new file mode 100644 index 00000000..6801316a --- /dev/null +++ b/pandas/tests/io/formats/test_eng_formatting.py @@ -0,0 +1,235 @@ +import numpy as np + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + +import pandas.io.formats.format as fmt + + +class TestEngFormatter: + def test_eng_float_formatter(self): + df = DataFrame({"A": [1.41, 141.0, 14100, 1410000.0]}) + + fmt.set_eng_float_format() + result = df.to_string() + expected = ( + " A\n" + "0 1.410E+00\n" + "1 141.000E+00\n" + "2 14.100E+03\n" + "3 1.410E+06" + ) + assert result == expected + + fmt.set_eng_float_format(use_eng_prefix=True) + result = df.to_string() + expected = " A\n0 1.410\n1 141.000\n2 14.100k\n3 1.410M" + assert result == expected + + fmt.set_eng_float_format(accuracy=0) + result = df.to_string() + expected = " A\n0 1E+00\n1 141E+00\n2 14E+03\n3 1E+06" + assert result == expected + + tm.reset_display_options() + + def compare(self, formatter, input, output): + formatted_input = formatter(input) + assert formatted_input == output + + def compare_all(self, formatter, in_out): + """ + Parameters: + ----------- + formatter: EngFormatter under test + in_out: list of tuples. Each tuple = (number, expected_formatting) + + It is tested if 'formatter(number) == expected_formatting'. + *number* should be >= 0 because formatter(-number) == fmt is also + tested. *fmt* is derived from *expected_formatting* + """ + for input, output in in_out: + self.compare(formatter, input, output) + self.compare(formatter, -input, "-" + output[1:]) + + def test_exponents_with_eng_prefix(self): + formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) + f = np.sqrt(2) + in_out = [ + (f * 10 ** -24, " 1.414y"), + (f * 10 ** -23, " 14.142y"), + (f * 10 ** -22, " 141.421y"), + (f * 10 ** -21, " 1.414z"), + (f * 10 ** -20, " 14.142z"), + (f * 10 ** -19, " 141.421z"), + (f * 10 ** -18, " 1.414a"), + (f * 10 ** -17, " 14.142a"), + (f * 10 ** -16, " 141.421a"), + (f * 10 ** -15, " 1.414f"), + (f * 10 ** -14, " 14.142f"), + (f * 10 ** -13, " 141.421f"), + (f * 10 ** -12, " 1.414p"), + (f * 10 ** -11, " 14.142p"), + (f * 10 ** -10, " 141.421p"), + (f * 10 ** -9, " 1.414n"), + (f * 10 ** -8, " 14.142n"), + (f * 10 ** -7, " 141.421n"), + (f * 10 ** -6, " 1.414u"), + (f * 10 ** -5, " 14.142u"), + (f * 10 ** -4, " 141.421u"), + (f * 10 ** -3, " 1.414m"), + (f * 10 ** -2, " 14.142m"), + (f * 10 ** -1, " 141.421m"), + (f * 10 ** 0, " 1.414"), + (f * 10 ** 1, " 14.142"), + (f * 10 ** 2, " 141.421"), + (f * 10 ** 3, " 1.414k"), + (f * 10 ** 4, " 14.142k"), + (f * 10 ** 5, " 141.421k"), + (f * 10 ** 6, " 1.414M"), + (f * 10 ** 7, " 14.142M"), + (f * 10 ** 8, " 141.421M"), + (f * 10 ** 9, " 1.414G"), + (f * 10 ** 10, " 14.142G"), + (f * 10 ** 11, " 141.421G"), + (f * 10 ** 12, " 1.414T"), + (f * 10 ** 13, " 14.142T"), + (f * 10 ** 14, " 141.421T"), + (f * 10 ** 15, " 1.414P"), + (f * 10 ** 16, " 14.142P"), + (f * 10 ** 17, " 141.421P"), + (f * 10 ** 18, " 1.414E"), + (f * 10 ** 19, " 14.142E"), + (f * 10 ** 20, " 141.421E"), + (f * 10 ** 21, " 1.414Z"), + (f * 10 ** 22, " 14.142Z"), + (f * 10 ** 23, " 141.421Z"), + (f * 10 ** 24, " 1.414Y"), + (f * 10 ** 25, " 14.142Y"), + (f * 10 ** 26, " 141.421Y"), + ] + self.compare_all(formatter, in_out) + + def test_exponents_without_eng_prefix(self): + formatter = fmt.EngFormatter(accuracy=4, use_eng_prefix=False) + f = np.pi + in_out = [ + (f * 10 ** -24, " 3.1416E-24"), + (f * 10 ** -23, " 31.4159E-24"), + (f * 10 ** -22, " 314.1593E-24"), + (f * 10 ** -21, " 3.1416E-21"), + (f * 10 ** -20, " 31.4159E-21"), + (f * 10 ** -19, " 314.1593E-21"), + (f * 10 ** -18, " 3.1416E-18"), + (f * 10 ** -17, " 31.4159E-18"), + (f * 10 ** -16, " 314.1593E-18"), + (f * 10 ** -15, " 3.1416E-15"), + (f * 10 ** -14, " 31.4159E-15"), + (f * 10 ** -13, " 314.1593E-15"), + (f * 10 ** -12, " 3.1416E-12"), + (f * 10 ** -11, " 31.4159E-12"), + (f * 10 ** -10, " 314.1593E-12"), + (f * 10 ** -9, " 3.1416E-09"), + (f * 10 ** -8, " 31.4159E-09"), + (f * 10 ** -7, " 314.1593E-09"), + (f * 10 ** -6, " 3.1416E-06"), + (f * 10 ** -5, " 31.4159E-06"), + (f * 10 ** -4, " 314.1593E-06"), + (f * 10 ** -3, " 3.1416E-03"), + (f * 10 ** -2, " 31.4159E-03"), + (f * 10 ** -1, " 314.1593E-03"), + (f * 10 ** 0, " 3.1416E+00"), + (f * 10 ** 1, " 31.4159E+00"), + (f * 10 ** 2, " 314.1593E+00"), + (f * 10 ** 3, " 3.1416E+03"), + (f * 10 ** 4, " 31.4159E+03"), + (f * 10 ** 5, " 314.1593E+03"), + (f * 10 ** 6, " 3.1416E+06"), + (f * 10 ** 7, " 31.4159E+06"), + (f * 10 ** 8, " 314.1593E+06"), + (f * 10 ** 9, " 3.1416E+09"), + (f * 10 ** 10, " 31.4159E+09"), + (f * 10 ** 11, " 314.1593E+09"), + (f * 10 ** 12, " 3.1416E+12"), + (f * 10 ** 13, " 31.4159E+12"), + (f * 10 ** 14, " 314.1593E+12"), + (f * 10 ** 15, " 3.1416E+15"), + (f * 10 ** 16, " 31.4159E+15"), + (f * 10 ** 17, " 314.1593E+15"), + (f * 10 ** 18, " 3.1416E+18"), + (f * 10 ** 19, " 31.4159E+18"), + (f * 10 ** 20, " 314.1593E+18"), + (f * 10 ** 21, " 3.1416E+21"), + (f * 10 ** 22, " 31.4159E+21"), + (f * 10 ** 23, " 314.1593E+21"), + (f * 10 ** 24, " 3.1416E+24"), + (f * 10 ** 25, " 31.4159E+24"), + (f * 10 ** 26, " 314.1593E+24"), + ] + self.compare_all(formatter, in_out) + + def test_rounding(self): + formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) + in_out = [ + (5.55555, " 5.556"), + (55.5555, " 55.556"), + (555.555, " 555.555"), + (5555.55, " 5.556k"), + (55555.5, " 55.556k"), + (555555, " 555.555k"), + ] + self.compare_all(formatter, in_out) + + formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) + in_out = [ + (5.55555, " 5.6"), + (55.5555, " 55.6"), + (555.555, " 555.6"), + (5555.55, " 5.6k"), + (55555.5, " 55.6k"), + (555555, " 555.6k"), + ] + self.compare_all(formatter, in_out) + + formatter = fmt.EngFormatter(accuracy=0, use_eng_prefix=True) + in_out = [ + (5.55555, " 6"), + (55.5555, " 56"), + (555.555, " 556"), + (5555.55, " 6k"), + (55555.5, " 56k"), + (555555, " 556k"), + ] + self.compare_all(formatter, in_out) + + formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) + result = formatter(0) + assert result == " 0.000" + + def test_nan(self): + # Issue #11981 + + formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) + result = formatter(np.nan) + assert result == "NaN" + + df = pd.DataFrame( + { + "a": [1.5, 10.3, 20.5], + "b": [50.3, 60.67, 70.12], + "c": [100.2, 101.33, 120.33], + } + ) + pt = df.pivot_table(values="a", index="b", columns="c") + fmt.set_eng_float_format(accuracy=1) + result = pt.to_string() + assert "NaN" in result + tm.reset_display_options() + + def test_inf(self): + # Issue #11981 + + formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) + result = formatter(np.inf) + assert result == "inf" diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py new file mode 100644 index 00000000..a7338742 --- /dev/null +++ b/pandas/tests/io/formats/test_format.py @@ -0,0 +1,3288 @@ +""" +Test output formatting for Series/DataFrame, including to_string & reprs +""" + +from datetime import datetime +from io import StringIO +import itertools +from operator import methodcaller +import os +from pathlib import Path +import re +from shutil import get_terminal_size +import sys +import textwrap + +import dateutil +import numpy as np +import pytest +import pytz + +from pandas.compat import is_platform_32bit, is_platform_windows + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + NaT, + Series, + Timestamp, + date_range, + get_option, + option_context, + read_csv, + reset_option, + set_option, +) +import pandas._testing as tm + +import pandas.io.formats.format as fmt +import pandas.io.formats.printing as printing + +use_32bit_repr = is_platform_windows() or is_platform_32bit() + + +@pytest.fixture(params=["string", "pathlike", "buffer"]) +def filepath_or_buffer_id(request): + """ + A fixture yielding test ids for filepath_or_buffer testing. + """ + return request.param + + +@pytest.fixture +def filepath_or_buffer(filepath_or_buffer_id, tmp_path): + """ + A fixture yielding a string representing a filepath, a path-like object + and a StringIO buffer. Also checks that buffer is not closed. + """ + if filepath_or_buffer_id == "buffer": + buf = StringIO() + yield buf + assert not buf.closed + else: + assert isinstance(tmp_path, Path) + if filepath_or_buffer_id == "pathlike": + yield tmp_path / "foo" + else: + yield str(tmp_path / "foo") + + +@pytest.fixture +def assert_filepath_or_buffer_equals( + filepath_or_buffer, filepath_or_buffer_id, encoding +): + """ + Assertion helper for checking filepath_or_buffer. + """ + + def _assert_filepath_or_buffer_equals(expected): + if filepath_or_buffer_id == "string": + with open(filepath_or_buffer, encoding=encoding) as f: + result = f.read() + elif filepath_or_buffer_id == "pathlike": + result = filepath_or_buffer.read_text(encoding=encoding) + elif filepath_or_buffer_id == "buffer": + result = filepath_or_buffer.getvalue() + assert result == expected + + return _assert_filepath_or_buffer_equals + + +def curpath(): + pth, _ = os.path.split(os.path.abspath(__file__)) + return pth + + +def has_info_repr(df): + r = repr(df) + c1 = r.split("\n")[0].startswith(" + # 2. Index + # 3. Columns + # 4. dtype + # 5. memory usage + # 6. trailing newline + nv = len(r.split("\n")) == 6 + return has_info and nv + + +def has_horizontally_truncated_repr(df): + try: # Check header row + fst_line = np.array(repr(df).splitlines()[0].split()) + cand_col = np.where(fst_line == "...")[0][0] + except IndexError: + return False + # Make sure each row has this ... in the same place + r = repr(df) + for ix, l in enumerate(r.splitlines()): + if not r.split()[cand_col] == "...": + return False + return True + + +def has_vertically_truncated_repr(df): + r = repr(df) + only_dot_row = False + for row in r.splitlines(): + if re.match(r"^[\.\ ]+$", row): + only_dot_row = True + return only_dot_row + + +def has_truncated_repr(df): + return has_horizontally_truncated_repr(df) or has_vertically_truncated_repr(df) + + +def has_doubly_truncated_repr(df): + return has_horizontally_truncated_repr(df) and has_vertically_truncated_repr(df) + + +def has_expanded_repr(df): + r = repr(df) + for line in r.split("\n"): + if line.endswith("\\"): + return True + return False + + +@pytest.mark.filterwarnings("ignore::FutureWarning:.*format") +class TestDataFrameFormatting: + def test_repr_embedded_ndarray(self): + arr = np.empty(10, dtype=[("err", object)]) + for i in range(len(arr)): + arr["err"][i] = np.random.randn(i) + + df = DataFrame(arr) + repr(df["err"]) + repr(df) + df.to_string() + + def test_eng_float_formatter(self, float_frame): + df = float_frame + df.loc[5] = 0 + + fmt.set_eng_float_format() + repr(df) + + fmt.set_eng_float_format(use_eng_prefix=True) + repr(df) + + fmt.set_eng_float_format(accuracy=0) + repr(df) + tm.reset_display_options() + + def test_show_null_counts(self): + + df = DataFrame(1, columns=range(10), index=range(10)) + df.iloc[1, 1] = np.nan + + def check(null_counts, result): + buf = StringIO() + df.info(buf=buf, null_counts=null_counts) + assert ("non-null" in buf.getvalue()) is result + + with option_context( + "display.max_info_rows", 20, "display.max_info_columns", 20 + ): + check(None, True) + check(True, True) + check(False, False) + + with option_context("display.max_info_rows", 5, "display.max_info_columns", 5): + check(None, False) + check(True, False) + check(False, False) + + def test_repr_tuples(self): + buf = StringIO() + + df = DataFrame({"tups": list(zip(range(10), range(10)))}) + repr(df) + df.to_string(col_space=10, buf=buf) + + def test_repr_truncation(self): + max_len = 20 + with option_context("display.max_colwidth", max_len): + df = DataFrame( + { + "A": np.random.randn(10), + "B": [ + tm.rands(np.random.randint(max_len - 1, max_len + 1)) + for i in range(10) + ], + } + ) + r = repr(df) + r = r[r.find("\n") + 1 :] + + adj = fmt._get_adjustment() + + for line, value in zip(r.split("\n"), df["B"]): + if adj.len(value) + 1 > max_len: + assert "..." in line + else: + assert "..." not in line + + with option_context("display.max_colwidth", 999999): + assert "..." not in repr(df) + + with option_context("display.max_colwidth", max_len + 2): + assert "..." not in repr(df) + + def test_repr_deprecation_negative_int(self): + # FIXME: remove in future version after deprecation cycle + # Non-regression test for: + # https://github.com/pandas-dev/pandas/issues/31532 + width = get_option("display.max_colwidth") + with tm.assert_produces_warning(FutureWarning): + set_option("display.max_colwidth", -1) + set_option("display.max_colwidth", width) + + def test_repr_chop_threshold(self): + df = DataFrame([[0.1, 0.5], [0.5, -0.1]]) + pd.reset_option("display.chop_threshold") # default None + assert repr(df) == " 0 1\n0 0.1 0.5\n1 0.5 -0.1" + + with option_context("display.chop_threshold", 0.2): + assert repr(df) == " 0 1\n0 0.0 0.5\n1 0.5 0.0" + + with option_context("display.chop_threshold", 0.6): + assert repr(df) == " 0 1\n0 0.0 0.0\n1 0.0 0.0" + + with option_context("display.chop_threshold", None): + assert repr(df) == " 0 1\n0 0.1 0.5\n1 0.5 -0.1" + + def test_repr_chop_threshold_column_below(self): + # GH 6839: validation case + + df = pd.DataFrame([[10, 20, 30, 40], [8e-10, -1e-11, 2e-9, -2e-11]]).T + + with option_context("display.chop_threshold", 0): + assert repr(df) == ( + " 0 1\n" + "0 10.0 8.000000e-10\n" + "1 20.0 -1.000000e-11\n" + "2 30.0 2.000000e-09\n" + "3 40.0 -2.000000e-11" + ) + + with option_context("display.chop_threshold", 1e-8): + assert repr(df) == ( + " 0 1\n" + "0 10.0 0.000000e+00\n" + "1 20.0 0.000000e+00\n" + "2 30.0 0.000000e+00\n" + "3 40.0 0.000000e+00" + ) + + with option_context("display.chop_threshold", 5e-11): + assert repr(df) == ( + " 0 1\n" + "0 10.0 8.000000e-10\n" + "1 20.0 0.000000e+00\n" + "2 30.0 2.000000e-09\n" + "3 40.0 0.000000e+00" + ) + + def test_repr_obeys_max_seq_limit(self): + with option_context("display.max_seq_items", 2000): + assert len(printing.pprint_thing(list(range(1000)))) > 1000 + + with option_context("display.max_seq_items", 5): + assert len(printing.pprint_thing(list(range(1000)))) < 100 + + def test_repr_set(self): + assert printing.pprint_thing({1}) == "{1}" + + def test_repr_is_valid_construction_code(self): + # for the case of Index, where the repr is traditional rather then + # stylized + idx = Index(["a", "b"]) + res = eval("pd." + repr(idx)) + tm.assert_series_equal(Series(res), Series(idx)) + + def test_repr_should_return_str(self): + # https://docs.python.org/3/reference/datamodel.html#object.__repr__ + # "...The return value must be a string object." + + # (str on py2.x, str (unicode) on py3) + + data = [8, 5, 3, 5] + index1 = ["\u03c3", "\u03c4", "\u03c5", "\u03c6"] + cols = ["\u03c8"] + df = DataFrame(data, columns=cols, index=index1) + assert type(df.__repr__()) == str # both py2 / 3 + + def test_repr_no_backslash(self): + with option_context("mode.sim_interactive", True): + df = DataFrame(np.random.randn(10, 4)) + assert "\\" not in repr(df) + + def test_expand_frame_repr(self): + df_small = DataFrame("hello", index=[0], columns=[0]) + df_wide = DataFrame("hello", index=[0], columns=range(10)) + df_tall = DataFrame("hello", index=range(30), columns=range(5)) + + with option_context("mode.sim_interactive", True): + with option_context( + "display.max_columns", + 10, + "display.width", + 20, + "display.max_rows", + 20, + "display.show_dimensions", + True, + ): + with option_context("display.expand_frame_repr", True): + assert not has_truncated_repr(df_small) + assert not has_expanded_repr(df_small) + assert not has_truncated_repr(df_wide) + assert has_expanded_repr(df_wide) + assert has_vertically_truncated_repr(df_tall) + assert has_expanded_repr(df_tall) + + with option_context("display.expand_frame_repr", False): + assert not has_truncated_repr(df_small) + assert not has_expanded_repr(df_small) + assert not has_horizontally_truncated_repr(df_wide) + assert not has_expanded_repr(df_wide) + assert has_vertically_truncated_repr(df_tall) + assert not has_expanded_repr(df_tall) + + def test_repr_non_interactive(self): + # in non interactive mode, there can be no dependency on the + # result of terminal auto size detection + df = DataFrame("hello", index=range(1000), columns=range(5)) + + with option_context( + "mode.sim_interactive", False, "display.width", 0, "display.max_rows", 5000 + ): + assert not has_truncated_repr(df) + assert not has_expanded_repr(df) + + def test_repr_truncates_terminal_size(self, monkeypatch): + # see gh-21180 + + terminal_size = (118, 96) + monkeypatch.setattr( + "pandas.io.formats.format.get_terminal_size", lambda: terminal_size + ) + + index = range(5) + columns = pd.MultiIndex.from_tuples( + [ + ("This is a long title with > 37 chars.", "cat"), + ("This is a loooooonger title with > 43 chars.", "dog"), + ] + ) + df = pd.DataFrame(1, index=index, columns=columns) + + result = repr(df) + + h1, h2 = result.split("\n")[:2] + assert "long" in h1 + assert "loooooonger" in h1 + assert "cat" in h2 + assert "dog" in h2 + + # regular columns + df2 = pd.DataFrame({"A" * 41: [1, 2], "B" * 41: [1, 2]}) + result = repr(df2) + + assert df2.columns[0] in result.split("\n")[0] + + def test_repr_truncates_terminal_size_full(self, monkeypatch): + # GH 22984 ensure entire window is filled + terminal_size = (80, 24) + df = pd.DataFrame(np.random.rand(1, 7)) + + monkeypatch.setattr( + "pandas.io.formats.format.get_terminal_size", lambda: terminal_size + ) + assert "..." not in str(df) + + def test_repr_truncation_column_size(self): + # dataframe with last column very wide -> check it is not used to + # determine size of truncation (...) column + df = pd.DataFrame( + { + "a": [108480, 30830], + "b": [12345, 12345], + "c": [12345, 12345], + "d": [12345, 12345], + "e": ["a" * 50] * 2, + } + ) + assert "..." in str(df) + assert " ... " not in str(df) + + def test_repr_max_columns_max_rows(self): + term_width, term_height = get_terminal_size() + if term_width < 10 or term_height < 10: + pytest.skip(f"terminal size too small, {term_width} x {term_height}") + + def mkframe(n): + index = [f"{i:05d}" for i in range(n)] + return DataFrame(0, index, index) + + df6 = mkframe(6) + df10 = mkframe(10) + with option_context("mode.sim_interactive", True): + with option_context("display.width", term_width * 2): + with option_context("display.max_rows", 5, "display.max_columns", 5): + assert not has_expanded_repr(mkframe(4)) + assert not has_expanded_repr(mkframe(5)) + assert not has_expanded_repr(df6) + assert has_doubly_truncated_repr(df6) + + with option_context("display.max_rows", 20, "display.max_columns", 10): + # Out off max_columns boundary, but no extending + # since not exceeding width + assert not has_expanded_repr(df6) + assert not has_truncated_repr(df6) + + with option_context("display.max_rows", 9, "display.max_columns", 10): + # out vertical bounds can not result in expanded repr + assert not has_expanded_repr(df10) + assert has_vertically_truncated_repr(df10) + + # width=None in terminal, auto detection + with option_context( + "display.max_columns", + 100, + "display.max_rows", + term_width * 20, + "display.width", + None, + ): + df = mkframe((term_width // 7) - 2) + assert not has_expanded_repr(df) + df = mkframe((term_width // 7) + 2) + printing.pprint_thing(df._repr_fits_horizontal_()) + assert has_expanded_repr(df) + + def test_repr_min_rows(self): + df = pd.DataFrame({"a": range(20)}) + + # default setting no truncation even if above min_rows + assert ".." not in repr(df) + assert ".." not in df._repr_html_() + + df = pd.DataFrame({"a": range(61)}) + + # default of max_rows 60 triggers truncation if above + assert ".." in repr(df) + assert ".." in df._repr_html_() + + with option_context("display.max_rows", 10, "display.min_rows", 4): + # truncated after first two rows + assert ".." in repr(df) + assert "2 " not in repr(df) + assert "..." in df._repr_html_() + assert "

    rVTG7w3MBxOI;%;wQsECj18nLUu$ z`KF*ZtO6uU13mEwzcXc z!P+*$zFL-gbiEIXAZH4@vy2Ww5u6)uK4DBA`)GVSZ)AYrVyc>3?$&t2HTd^6>hVSN zfCSv%y703E@NTFn6r$`1wRhr#KpoA#FHp2u|1?42JOqTrsK~c*5e0ogxdHZgrRJpr zrDk6AW>BFk5`B?!oJ2|GV;r`-nX8<&G7~#Z@*23>u1O>+4zSANe5Y7lE6pgYf}HX} zYQ0vPbH>H{HT`NbNr~mdxHWWKSkXqYKV`2jmQwXYqXuMBlo;2zRKl$Wej-TM$(tm3 ze;}H}q)vyo`!oe*r??i5fcz+v=6teI#sW22>Z_NEpcdwwv&wz|J{1rNv(hbz5&6bl zRBQR#hAb{wP1cvsIA85ILET-IwBOK}nIV%dvWa-yQqTRrJc=A_D=Htmf(g7S!cTL= z`t+firz9Y+xkApjAy}{pBVs|Sn<{z^-RVmtf}s1(*v~Z$4lY8rC^WR?5*%t@ z9yrVMq_A5iG@Us49-Rz#`U`HL0-wDm-@`7z^xnH$1cm$V@3c#`k@+q7yUgKD zc>F)p&e-1mAJ%>6+wWUiY`@|L7k<#m_j!$|iw?e2JS6ax;Pq>}-0ID6UQ2d8;eCbd zg#4yh>0B1l^5)`X!R+k8rSjTYTki=8DN-O^XSSXxz73nc7O_cco0!X+uTuX~KEOXCiLRIMHo3cczdmlUl(k zTgS@DnwU2Ae%PMVV?UPO|Rr)9IN%llK4@IIJ zeQHx|d{|?c`p$oU=%8t6yUY3putlo4zK=f_#EB=|IeeW+c>vJ7(bDk;m>H? z8fe@$zTI&DX{?0#%l7+ifZIKfp8z-n|Ni#>tpjo!@OIVv6A+K^HsH^O!hesV2cIka z;gmlv>&Na#C4C#^cG39rS_qvwf9)(2u zno?te0ES-?M1U@_@4I!h0PvO-01j8e`w}onysSSK(`wa)n}UMIcxN@31bih8F1w2* z93$|^)Gik80i?%ZC`dBN60^Awu-$ihiY)?1(q-d@-jFbB z2b(2#J%mp)3e;+iu(YRR%OP%L&D1nBc%5P=vn0}% z+VxqzwDp_l@R|zRH}YfLr1Cy@uT$70tG4R)Qx#nX)vZG27^e&T#A}Ohi-F&n<&C8L5pS7(u z^2HXQOpkDv@381o*<{M)(T*^9y z2KH$}5L)^>oEJ}apAXl92%#@*j@!hFBgQ`8=)UITlB!XtjJO1jA1OQce)QB|41{*N zk~S3~1qVm0vL&x?Dbqu4>FX|dMh_tqD|T;PrcS#nnev3C3KBSy9>fBrcuM!&Q6SUl zMw|frOQRUxq4o>bgCz}KF^IfE-oAvyZIH#Hg_zlhf%!M1jY}6gk2DipS`I>+i##26 zrcE=HvMJWRNlr_C;setMd7|hOZSL3lR8`_T1aCdWOl{sU8I3Qom(>c7<09u^9zLc7 z?TiW*;rsGAxpAM#mdI^`3aC|)b-ui4br-kRcBI9KV!moJhlz=RD7CnT<8iuYD4 z|6rYjl-Aa?3tb|+ipkJB6)3Vldb)Yrsp(8?H1@06?8iU@-O)0KiFDFp)a;aL&zSwE zVi=RFVCnJ%#g-oZAX?Y@tEie3>u5vnvEoSGiv<7S;cJwQSk?2N8mwH(CbB#amhXNW z&|f?Sr#m|TQ}OhW+vARt>XV5tZHKZwQkxYhrQ)4-Pk`;)S4_>{nL{&QVfsfO>}ALT zoy}hYB-R~*fXF@&Qt;)9MA&) zP4V-7dKq?f03gEy092L(u3k9bLrtJ^yxYbIKdE%A!E11zvGXQ^4b1KETEOpGV!)>{ zA>l93YMm;Fdjc)5#vi)zrKdK5zzWLXR?m%&fRYieg_RDn2?btg2uc?w^@cHn48gW# z<8%jmySxNevCI9U*$$vmX5*}|m67pa`&dZNiB_V^m7|ejhM zt`SN%xQxwez)#rpj>Hu$4I{_Yb@p<>#bJs~LdhPmG=xR{%2Ye}-PT+^7w#j1yX3}a zHN|)6C)x=W@>d;SiYx^;c(aN22a5$=b`%(~i=deadh)o>H%sC4(+|+5%%r}UYhH-z z*=Gl8tmjN-{3Xs_{lRC(Tf$U_50nj0=86s*;L8`2SdEx4H9oeemhgl}F7rVI7Yc@l z!Y|i^;kB8V#tiHPYoA+iraNt_^Ic--TV#fv+gfBe$N4e!Sbb+~SM6d;>^B(mI^!q! zS+KcV%<9HD4<`?zda7hSxo4(mc5pAybz2SXl)F5K?#(MgRju1Lk|6y1XX&+)Rl4pr z2chk(9_IO^c3n#~FmnbHOlncE%RMc$Zo9ON>^Uhfu+g|6NB;CNy2$gd2T<846=@KM zae@UWSaME~GTBG6cGWWt9T=MA0L~YeB~osKVluU{9F-UH z_;g@*$-}n!mj_TeM}b>)e%WOlwrxIk#4w5XVmYL(GhjP-_PNBHhE0`-^8Ap?e!^_` z>C(*ZS+k^6M~0X~xx4~RR}U_@e#-Vt3x%QQ{alN-&C77WWp zMaat`FQIxltggX|#9HEZVgX+oet~BqN>CH!<(xZ5W8qG>pjw@RbS=D-Zj*#LvS{LQ zx0(+NpJn&Bu)dqNFFjelH}+ikmScUT)uv@=0~8?5t+EDwpB`!~mn;dHwIJ@8+vV)2 z2013Mxy`nar#odFqkF`Ex*W|}R`LuD^2{2uroZZZT%tzqJ$nIv_Fv&Ti42zT%t&42 z-Rp3ls$I=ToNT?j>F0^dMU@MUAEsO;w%4cKQ56KGt#_m*GA^AbwMV_6H;?uwYivPA znYBmnPlP&STg3?xvBPgr^0&=1Oc=L#dHN^6`rD{>&t#=OJlz@r|VdK7@^p$6KChV7;4 zkt~}Ma+L0u%8Ha%9HJ|o`#%RQS8`@n?ku03Jy;v>o1AV5s;t`+`a21V?&SFX7gc&d zM*!eB^=_nX3763~){(;^aAE4K;3N-4$0O8CMr8@=_YNRX%D`rJmx?#Q>oQ?+RaY$dpp%llNci~!u zkJVexYX~o&@aB@zsmg_VdR>u2OKn7jfF(|dzBXFj+_-QVKkswX!`(yIzPPuNMx&E) z3B!!OBy4J>RIv0sLMYw3B0^XTcSf`lHI2}p2w9@Z5Wr9 z{D}CM-QTll%j>N|wVOuGJ`tO7ABvsTd{qh#UceS*tYf&AT~*J=CB`cf*I?50RysFi zX`eE;d9M?o!-CJh_0J`f-PJ83IiIiHl0p089{Yp)>kidR)toEy-e9lb}k zD7q2ohsV6JjpWGl{yp(1wnXfFrsme2dvRar8@c&gD+kT+;Stt*F&mTx;89KD&vWL> zB9S0WSTNf4b-y+b7WDCJ)~Kc(uik2YVe)%=Q3FUWKdIcd{mo>+zya&xfVfVAUQ}P8 z<^^bGIRwksU-8;CP}9^x;r7J?$l&h-pgAJ*%iU5>1o&SceK`BO{o7TW8XThjzf=B= zu%tfz_RW9d{w%-0af;NU{2#^mPvD=V`3sR2F1 R;TwADB}`3CokN!Z`~&E6gN6VA literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testdtype.xls b/pandas/tests/io/data/excel/testdtype.xls new file mode 100644 index 0000000000000000000000000000000000000000..f63357524324ff6ff97f4b1a15b6a371957ec6e1 GIT binary patch literal 22528 zcmeHP32YqY6@If`+v_8V?Ig}gvhk7FjvXA^3FPuRF;|3h2tq@JX>jayY{ju7$A&~A zjcGxuN=m_@K_za12Gj!z4M#)O&;$vADw!5hC?$GdnZh zZE4l2Gpm1g=FR`U_vXF#zyH|Ze(C4MZ#{5q_S<4Rnk7p(R|dVeRkdt`lqFJ)4_M87HDNPTP1wQ_ky=@46{f#|Ri0AKBr+073SBOX(UOuv z4gQ2&gIS3-bIy?mrBq}ej`q4;yV~ozy4u4%Ju7$ZTDfP>TQUvq6ZvZ_tiJWrsW9nC zv_Ab)v;%FcNtj>y5wO|$$v8cDpVj^e`uAPv-*cfq>Ow!_LVwJK{)h|x8!q(QUFdH5 zxZyvcaHhj7Dn1i-_@)b8*C(~Uu1_lcK^L53F7zi|=s$6xf7ylpBNuw6`aGp@3R3K& z$H^3fes^*4Jq7m^Nbs<#U!0(Kcb}Ypa=rwQD|$9kjI#6JQ9&I>d{(>AYhCCyj`XR( zRQP|1!;6leZ_pF|)#EwWpuZxUaXBBuFQq5Og*uK-$Ls6#yd*u0OT4~~Zj(F7sGqQp z9{(7I8u&Nha<+j}XwVaSJ{-4Oy#82macMy*?0m1HPd4fUU$?Vv|7>)ZPIl%5$H_j^ z4Eie=0ZMbi4kZTN_FjU7pT~ZXj69HMZVpY8n`yYj5s_b(-xS;*s+0W|Ix7TZ5}31a z#nC7GR`g0WlhGptQW;n!BL;mGn1KW;l97a9rVQ-rGLk@FWyHW>&WM3d$%ujBpAiGw zT}BK{G#N3l#bm_5CYccf+gL^nOp2ZiYi;d;*&xfkJp@%liFPyO^Z{Zxygy_IVaiPd zgkk6o!uFm92t(5ygh@CJ5QeKe2op;hAPix55O&9DfH17xK`^kL7-YQ7+dG(@qyysZ zotf!@czb77dLZ83L5G#j>v(%-PI@5T-odoaqzP`UeTm+78XCR5gK1wnAl}}YmmY|> zchK{uqlve7!s&r{duKs4A8AXJL9E-riZ1 z9*DPhsuCa^FO5k~%rXvR+u?)Q3qOk`)8k#X&Z`utH_J{0Bap7K9iUT_yj&JtRLl31W6^TSrlrqx{rAX7EMGU3P zwA4}TVpR7W7ud(iwV5*1Wy-K5Q-%u@4D>b~XfbT+u%sT<^e&hTm|a*Ij2L_VPv#i2 zk8Q9jWlAG-sYNPdheYo*f~B~LL1Gbakj8XC?A?vsuT20+213f~q=xe{FeKYpByyv( z5o6mWm6Cg$kdi?RnQ(C*3X0&$FAVnQUYd6zUeYOI1%|44b`^+gXn$wM8Ml;P&%A9Tt zrOdR{6oeIGQ{<&sDwScCg~1-G+Wycrw(ejx7=rF|8QR_6RL#xT=nhgfS(DI&;Ra_| z?U~BZoM529ly^N8D3Ky4Dh>6O;kxkJ&dzB6(9+S+1{r178$Yz*yx1hH;@3;1gryE; zt#slOm3|rGYB}CUlS0W2M!V0Pdq0-VYs!Ltm0ZlAm}0w_hEy?S9%A&&rNm@MyU(Bb zwX>KKyO_pQF$+D!=($aa$%%Hq`~DlwVy4=~G^L8E@esr87dKjNwEOIve{dEv%`RqH zs+c7nVwf}IVnWgG(|`Q4vzY02G0RiMG9IQYiJyO@r<*W1$HqG zk=EoP#vEx4jUS%;s*{*PEdOJoo9H*!+WO54V!5t;rO2jRo^b{(Pl6(%?4S#5pynvT zZz!8LID=LsK@mT8&`KMqIc7wT{P?rZpp{8bM2j7?+6HQl5Rs9tZ#jc5OoAdd?4Wfv zP;)$pT=~fJ&Y+8upoj!J=u#V~+4~}YJA9cFXh^C!j$7B!H5yuB^}IGGmL=;j)9Xs~ zz}XzGVHJU+BVKcXiWu!8pWb-%C)AqzwUFhEM`>1V1U^A*VqaDx2>@mb*O5-J!q~?B2l77p*5{>(-9spl+3|ZVq-jChLafI`jnBCwS^cg5X(b z;|az>;2Flgut7EU?Ua29jwK_x;8g+{4a9(40pU#elS03ICGy2O11`w#L zthn0jSWqs9(FSax%Ebr+3&ISn$r$frLG6lYM^|tEjxgp5Hbt|FVA(665M#d=itd1d z`W!)p02<8v_4{==hGH%>(-YyLz+_A^OyJ0UcJN{ca6SLIfX5==KFKjW!5hus$bok7 z5(jWS54wQsLxX?H>@=0n|Mxn{E*3gpW&ZVOu9zk3*=Fl=>MU>e?M zJuJH$u}G&Un?Dyn45x90$5MHnC4Pdk5Grnc4VTHuY997K0%GI9P;@l3-HH*&M|&a05R?+U zpOJaFuwtY%E{Y@*t$t^uH2uyP@|htm zJ|`imiGF8jqTd;s=yyz*D@U?ny9%oc+1YT!X1w!YV&YGeF;rJERJZgF^}$+8tz21C z;GQjrjv+XX_DYR*jS#C9#J~_Ez_#{~HA_Lz;JWvNDyS^$3$(kDNA$Nf?igalvIf1Q zZ0#8AXUfvkYs9K(pa7YAD-R~Eh_bgkTBS|qs>Z&!@Z7JzJ5Z+kUV z-lwiM;N6)FhY@!xV8vd!QAJR)23yb$^rBo&SuQuIEy}myt&VVU8j*2zPc%BT^xn`+ zxz{994_kd*=P0&yA2mHA-$8`2hR?HF2t{PaIZZx7XTWMKAa$T@N9jcALK3R4U#iXx z>fET#O)(4tLQ()-U`Oc;m7?Gxew4<)19BKT95y{!_d89FfOf=0gB&>s+CdYI0Nf>^ zZ%r?G}cg3<4tUufY$rMWZ@?5i!1{ehh<*v7{Q{upX<~|ES{{Xc3bA@IU~c^(9l7FVLVS ztEaSyzV;sqa3=+?YiVjYd5Vz68#|4bwbS776dG+~uJ^I>mfHG-C;FWR`%{K~7f{|X z?y_)4+-I;+pLMu7jdduCb&6D-(s-Sqs>9zdsYfu5TGa9y+Tv%R`et+u4fs?@ zGuAYJs9ey3+B}GMJ0kmbi$C+RpBitERZu+EpfIiiJ3=#Lhp8IEkqZl1m+0+)X#ck3 z6QrQ%}*ucZFo}q9VH(Su48bhV<&DEomh$jU?pnAZcaET9g34qv3ONFXK;`}aHu;W`A@BlIOv zDFlg-TwY-HtpkHQ_i(jzk6H#|ZU68s;9J1AfNufc0=@-&3-}iBE#OB87f%)j5`7UE2fQpr}ERj3)`C>-KhW929y<+k z+TYj^l#ovy(q%l#O~^R1^9J?lMZuf5(?M@JoqMGn9Q-~s>uX24Xavuz*-0APy+ z0FVK2F-;X+oZX?$?v@7L2&kJmzt>8OeDO@Q$(+}6X#Xcq1JTeu1 z`AYzsRmy0AIc&}GEM2w&E3fJ{tsS$ishlXy>`e5MDN!0R(x&qv_17fgqDw_rTtBsp zXl*F4u{MSkZ0VIQkhXud+5#t6k~nGDwzYKTQFSjO+ab3{Kbf;djZ`dndzK~ir0vZl ztKHx>a4K1%c&5W6NRJ=TjR$T^y`={dt9G>xh*hy6vHJp~ZeZD(Rj%`F#+#xHR9@MR zb+XWbd+o};uuC{xcBxvi>ZO^wpuMW*M4~uCeta;!~-=b+f0rF^RG`W0F zfTp!N+huJia*P;^{1orWG2$g2`NyJnv7A(OS&_Myx|_;rkWjQKaFU~pA-4@kc{SiH zIuMr2v}-xXhT$rPKfrofIpBz&+~I2K<6jsDhADW9HPxp15HePG;=Lop-yfB{NK+1G4_9$rvF-cbmAbIu>_r) zDh-MYC#%jXxH;AJ;QVP~ZlXChXS(W`yeo2t2M~@Tks5}to4bQg8>ah5i{6SW@stJK z(+#pI>@c1%+!}5u%aZF*8W`e?xcG534$+V4|>9^=EteO)L870{p{Gg`l)f$XAW|WLk2$j@{<`jI{m|+`X{Oj?vyT34L>ETcZY#? zGAC(7d6=%6s)$_570~hl_#tFyN{00Gj(X5&;oEVyqpC7q>87D ztD}vJizB~{z4PC+!9WK(G~52Sj}pC>Z)Vf@&Czd)tjpo%6x3^|)GK@mh_BVG7A>}^ z9B$&^4!SoIpe~S_#n*JWgW6xp+ib^Rt#0Artf0rbPv-QQ4pcrq+7HCFSufoPx(Q+> zb?W*2Vqe%JeU$W#2ROl4nhVj_mX{2nKS-|-%+^klY`1U=dX*3?>NA~fw)mwn+KrHN zP`qP+J3`6_KbJN<|Ma?N(dYMbqG^YWGM4f2?Y55?!Q2dV5Rj#~kE2X?@6HRhMTw3e zj(xnIVoH;q6)~cYM8;U=jd#pD@4B?37DTwtE-g5#3NN@TowN!)`EY8wwC*Cjvxe%? zMsN48b`!0oUf7DB`GOuq=z+GIe_ML2pBICsuI51XPXSlz+}m#rdHddre=}T3J7{gcq7dJMwmo2HyFbi48uUN zU~7kwXGcv2tXXBFBaboWg0>YTt(P<9ESh9(gFJifX$q6T7k1EOaOU$4!Hk52oX~`C z_WkQ}=R_sB_0dx=&@{RH=W^ZLy`7+L-{_IArzxPQODwZFJ^Ys9HK`5Pc1pGy8>u;o zIg6&lRLt`sCD!7^$!(%K6ww~e(UC;mZmD?!vBGMfkAYZ7ZIYC*=Tg=#Y@-24HNip{ zFzf}IuBvBBlqUy0MAEYbTa@UN`ilhe^~B6c%f%5GM)L6_;D?N3UKZ1k35Pm?<9HvA z0hw`}r49~c2v7!gX-umw6aK6zy&6Nb^WTZVOr zAq_J%eM#v>{0H{sF0bR;>7yzIG^@O?KWgdVr*-;>v}}qgR7FiLm{Oj4mf~l{zz;g+ z#`DLb5CTFOi}mF!F{C%2&tQ7S3Qw|oAe&N9_Tf$isiwQprFVzeG^{SUVEeTO<}7_2 zraQ~mK4CRyw%(>&P71FJ_s`YDK|t=>%s<@NiR!Jy;?sN({nS{CFP6`Q(7XZoJMHJ_ zvZuA6wN8)T0oEVJ<_3pC-QE83SKn`h{_$2$gDnndmSWGHWN%uzTW0PFc1Gw=TE{!C zFv?Y?w2P$1K-G2*_V27uZa+G@o^w_eYBh^g}xOcYbKg5bjs9kS+|*P__)|g zdC=ax_k8jd9eni0E7P%gFPWJqFb}(H!dk0z4sj|f5ka-!yYm47YNhQ3r|EJe#aOwu=`N9P4Q(Qd_mb zT#S%ziSEju?~mLoyYu#Z#;_qrfs_Pm@6{j83<3t+Q^qC4auB~j!U zlG)2hwoF_Zi{r|E3Rhy}BogAKXjt~TwmV#A4##%~?k0^CU?l~IC2$4Z z<*$;3*OWVbI=viFe6?(L^YC~-^C^baERK39zj9W8%l*cTZQg*juB)3CrG}1mN0{g1 zi397-uRk~x))_y;hFlj_KiWs$c6;%0?s7NEl+CxYe*AN(gcq>lsG8ItQPB9_eH)wn zDlJp;kYskSe2+}HhQx>BkaD1JV`?iEXLp!%eN_Zk+<1jq)RrGVHeg6Dj3Oo>odfdF zuN%b}+Zkj-#3sH(O6d@T%&xigVBclb*TG@iD|im`iTMuHUez%uUjUvfITPMI&Ld|b zbAC3yTev*F%h5&jl;Qg6%F%G%eE9X-7K9%OV}n^f_PSg@+5H?h%%t7q{pBdip@A3m z;OoaEOaHS5THO8Wm**XiwEcY!7CLS{Jv*D8zjTz^JkR2*#L6KvwbY5nR%!WoaeC{K&i|#O(@aG3Fy;gZuc7fMFl=*gJEHT!#28IznSXBa>zQl! z>Uo|xt(v~XFiLqp5NY(5;E>5_?t>TQCP~p_S2DkerrCX$&C<=;UWYHJ`h#3uMWTtk z+$W>`V~l(Z$in1TIqAZ#`sr`XTMtBNnsc8C;}|6}p%RH@SJkjF2>FR8hrli4Hw{AD zh&cpN6Egi*;<-M{<%X!Io%Xx3xx|FK1y^v4r^>cX4lut$E|S$ZJbzS~r#8?Dr%2mH zu)__7#b3M}=i$Sy%+~ z0QqRO6w?wS59(udDJjW9qX-fRwnOrTMm!$b8i>jFxE4SnYTDR?Wj(V{)j?G?uoLW& z0n+^3+9lGvCVMki)J(7iaUd}4KDLc4z0s>Et0g%@9y_%qqy}Eqi%U-f zA#N@5SdX5K*@zmxjetoB_wfZu{*h_i^oYRYSdJwh*?G09*xI6itJ#L{-Rdkn&{Uu& z;Gwg-D4)>;GtJpGzUDY*_&awsUqb>9Yvq#x*=ME6gXAP_2-lEKC-wuO5X(aDN#PvQ z4MmNcLqv$`!p1umZ~7(p>QJHSEmQaU5{V1fjUK^`-cdT!>Xik1a~I*%7_K@;OEqyl zTM+0@wxK;A)EGX&Gt(*2a>w$mHKC+lQCsmcD~ea7Qd3wZ!XhAi<|$Xxyf4@gmLGI- zXV)@b@$2mS%th-S+JVOoPq9^WCgTh^ai@Z7AQhYBuzmN<3SN_*Zis=r`D5eiu7&uQ z2^)cq_gxNkaEqPmRFT*O8<4X%gZ7{ppv(gK=l&JQml;RLxu;paPC~)D&P<{}&82g* z4qs{Gwdbib@pe8g!^B^0gkaYyp98EzOS7M(esz3~7$|-3QIgmZWAuz(>6McZ?-EcK zb3y}AUjpXv<@SX&@eb3iMrczua-ti+LBPAt{qr<8` zd>={#yhi#Lvk*1!*Lb&O3oCa&_ECHqLn%v2?Z2FNtgjgxC!R`TyQDt42DZhteGBFB zC+X}uWFGpkmC_c?WuJ)mc8Qc4wwzN8m;ICJS{P61Kte8sCD0$0aR0VzO0O+^#!lT zZ=RQ0v5RwpU}Q;!$Yk;M@O$*xos8;&mE!h~``q4-fRbx#YESr{xED;Wv@Sd7`|Tl; zd+a03Bw`YHx@NH6xE_mC5Q=fat<$wG!_qaZKlW!6zowD|!Q7HyIsi(4P5E#dL%D7W zvuE*?nOxu!u4Uj&idD8yQGL#HWYif;o=iqTmZ{0015`k>mP(qcaG0T`f2AyG{^_6t z;ku_IZQuH^a%grNudpL%tJTf0Ib`tGjw z&UW8Z*Yv^emX_vbKx^w7L9`qXH6eI-*tQo2aWDEJz+Yt9eS4 zKd8V}3v~=&%b1Evr|y9JIwI>!X~;*6rp_8cqLAUE$t<^~EVog}T~7Q>{HlZ$H4;39 zg+NqFyhBFrgzx%bydAH85JgH5IYUdfvWH*FR{D-wTT+?BkWtV!y(&%5XY%!uQm)Rh z&1?uef^e~2vp^j-t?P2P|0wGY#!YMS4mM-RP0pHA!<&RT7o*`8D4sA)W;pN)3c+k5ySlg2hR=+ zXyjb7qgBnwltW>`cB+PHjrNub{E@MsEz|T{G1B&(wCP*O`wB#*l4u3M z1c6gAh2jIkQl;ZxlX16p1A^=168ynuxa}>`1O&Z-Ufg78*YulbS*zj^N1_d%1U*gs zZ^O4nApWy^w95WEG85%pJ_r(oZUPUO2&U;I)^!L1OKeo6_Fh_JB-YqzKC{iKxDrv4 zur?7_lh8Tkfs9%3@j6ygoo0}<+>0OX(1~2kB?n>AIZ_>@cp?27k9jX$A#<3w4N#-U z=y-B=B!993_XF*O5$EA@7*zK z4Fr$rSQZ@039*&kKb{?uL<2C3c zm4V@2@j`>4siSm>gt1sBE2lSO(M`d&5z$WQ*z-a|qEs9Hp|7ZF^C(oIJT-!QIAcu% z+)3-CFZ^DX8TyrUm!}30qE=x|8@u?2RQWVVBmHGFGurR~ypT}!_9v`FM|o3e8s}h( zfykR-k)EwngubJX-QI5Z_>M45eV$|^i{)*&HEZV8QCJmq7&ccm(3^v-VOQ)IK#U9m z_q#Lt{BccQv#;gIdY@5-bv?-PueF%r@5xA7C|9$K4 z?fU!ro3C9u>OTYg+-Cn<@Y}Tx&53_C-7gCM+;{x9APQaP{=d7B7jZ83p?)CI;{8MZ zzx$XMg)eq>ehA}T`Y!yV$8%BiVoTzOXgK~q&;9>3C@unCtnhyTMiP7nyjbsF1h|++ z{Q!WXI|e_y!5>N0MU;z4!w-~3bPj<=`6Y$8i12eZ@IxK|V5a~8e#s3kieL2KKL9GJ uE-vGuH@}GWvw!}91OPCh6U~2KMfk>Z z@44q3&-wm>d)I#UUeA8ke&1)WwSK?#76goe2?SsRZ~*`S6M(6t*2@_U0Pw~H04M;s z==##mP9By{9>$tpE|%_woKQyx#%xSihq-|Kb%WjvG+z0Fla`DO^dfJ_=hX zm%!!UMeiqn@Kn6Did99kWz`jYd8RxT3W&co;dEHOk@3~F1Sh9E#Q}@Veh+_YLo6Xs z!)V&csPt^h-EV|PdCbI~%^B^fvh9)A#=X}LCc;sJ^yezh)Mky)u#yp$@3``oSyC#)P3)K{gef1s z_lF!l3e>O^#3_2bOf+M%!k9&OL05`|kVtd{c`JkvaKMUNqbWd*lK&Fgzg!36S9z`xK{FYd9pkB9%?xNXw z3EHHT#x4?GS|gab9XpchKb?@bMfBvAu=37WGG(v3ne|MGPp<>h$py8RhK;aosab~G zTvNcZX##Oo;P-R}!}!;rs- zGFg8oNuYRK;t^^^>QN-22H>JW9k~9*lNZiz_GZq`_CL+p@0>wH*))`v|L#_-t^)1^ zkv0)rhk{px37|Yj)jTW?|6ykLA{~Q6qpOc8Sb?=)bsp|pLhj> z;-u|iei%8XH-*EV2$u8E(GUi%*)EbkNFIxVd}sS>(JldTiaSM}cuX%x22g1IoQ?TT zXinZ;nd7=KIG^1x$dkv;{UcpJ3Nr(hs{xlG8av3K%9y*aQ|xy^UJRp0 zckmQNiM#-#n-feA&)Z~AaSt1>tW>;)?b~uL3D;UO~Px3v~t{799OFrq7X7-|eGBdty%o0OhlpPwqs&aT1?$F4wS`NGoz89g-yC61D zl%u`+REP`7ZY!X|P6pTI94bM)sCmcY%zl7Ih+`SSwxtO5)@t&r64j|$(rIADHStkQ zC7r#iC~6;q{iF&Bf>uG(Bl?YNPzFE~avxG7S--v_y12kGciM0FVN+;AC-ofekm z6JK!pd5q!l$s&+?E4OvfX#9z?n9? z8eL7?8}dOIGymOdv<;=3VA|HZTzKTXz$ZI5ECqUUMN44U{g$n_Ov2&{9lKeG3qtt~ zKDklO(Cc-{!;hKRM;-?rr@Fm{6iW3;BjeW>)|)?kmLL95Qr>%$D#Uw-*vw%1sW+hk zlk3Km=vSuh8!9%q+$-?A5~bU8eaP|g?#F`SGE0CtVfwx8;pTp(C>*T3Ldzq zJV)yy&nI?d@z|Gv2frKPsK_q|?_`o~o$9$yn&3AM(y1q3t%+l?&lvN+5QC@C$S5L> zXe!3pWzeCI5Ub=c#DM(RC!~Be>^2UDj%D&15+*ddGo0ltU1rNSw8=xqxWCRk0MglV!j6zK+Zi>Ypn^@2Dw? zQIX%7*Mr?Or9!p3?la+TYSe{&l`Ph*t}kVjhK8?sW<&!QPwEs(gY?NB9b<~4HRI{) zaKtCvCRZK@{U5!!A7ledhBlAf_Zr!&_k zF-fO?vK1x&K!OmA#kDQjcZEq{FL-<3PPKD_a2DNg?rPOTSaQ#1w3t2n8+6pILA{}j zd)*nJtnx(H(f5Z!*g=ZGT=k`{N8Li4rivgohe*3lko2Hn;lQDy{3~jOI_d z>2hdblqbfsUn$wvl{arL6ZNhXX`CyWH_aG4-^aE%7|=LBjhJ=o>Y{-R+IaK5oFaCt zasOtn@UB9I%Li-uVJ@w>q$FQ>4w!3nqacOS9#4EVn+QN;p)(3BFqOL$3So0vK|Ew5 zhQKvAKTFz{O>jPHySX&(GSE6cgvKwUr~3l?3*`04?@LSaYLR?HpUwn$9UaaIa=ptz z-Ra_fu9xSbyv=ZPIix}2W4P=@wwQ$%Z4;W7>6;PY)D0?GMLb;3+3ZYu3>3m% zq@Q|3DbkwGWF>A8T-yy}%&a^QsKE(+TZ~gi#kQ?`VU#A+>Nu<$Oz;8i+g26+t#5>5 zy)Cs=K-ce74Bw}mRwi5>?V}`BUPh%?`|`59O@a}=$Ctm)jKB5b>J=%;jI+Ockn;fEFZ(D3X`XI=Nyu16|y#? zBR6iXH@=!*mLH;GO@3yT(k;6~=Y`1>HcJ0Y{9ND#ugp>~gUjRb1rj#bd0#)KTD5!f z2*rR+L5$A_YmV&;KD6cHEX@xpILY6amPiww$;z9P3THLR;&!ms%H!7i_uP{VwthRr zDzy}mRp>x&Y_ibhRJ{E?PGK{`gO@iwPhayf6HMZ+xOB$u=j3+YPTQa^j$x5yPgW08 z=c9x%!&~%}fg(U}@mIG$Jbli*&xZNJ{?>F8l}JNEbK*6W4!_6$b{}`|K!?{6xlQjp zALJ94Ln^V=FnVmjmPuH(&)O{o+A}qU<7T^FY@_<-)Jh6L3Bgr9VLX+ksRIx`vFP}L z=7-1hbLHu~P%Ev3Azt5Mj`^0-hMCr2kH}Uyk70dDN@~$M%lU|)V1?U7VQ!tBZHXTB zlKdy*u`%U(DjOFT3VPl>MXR^4%eXQOehUk5%InAwl2jua`jn?}$?|V|{B;Qv^%!(o z+hoycsPETa1{9t%cHbck?YDy7=}Ysx|FoNVFnrLQSB}{ln%c(pGCC^RRr}<4U{=kn zY6y0Epzhi)Y-NrJ8@%sX`7y@IWwa~7sd3KC^5LX%MO|ThgiW5gizZD`v5IKsbGasEdEn0N^YVIowhZwRv2tZRxv!== zu!A#nGQQ-r+wl*b(rGumiVYjkvf>atTx@{a^5vyN6*qR1JuGawg>}PJws#1SiR$7I8QRV! ziU~`MvdFd{k2NLV(Z_=n1t}$$TC<|6tG(84ZHR7#$#Y{{zcWwh65^_PEGwVv@Hvk$ z!V>gCTkjtI=6d`Bn_$ZWmNhQLcr&V-5zW!>c;VHF-m@4?X^Qya9(fZOoSK7K_?Fy6 zbLhkt%DVwJfx)s+B1tUPob?*Up39mc;eieWzcSmTctliYDMNt{X*rH%pUjf*VE@IU zrEYmz^e%{HrGGGNfw?Ed(}T!h3I+=>!}ZiqIpi^MvHmjBG{T!tv&FLUZua7nMMjNS z(oSK*bx@N+F65X%UvjA7EIGr@fOxUIY@}{-HjF>_!0e=5-w_)J=)zgR$RhPXii$5~ z>UQJ&=kE}Gvb2=BQ+!EHcf*XggkY5sEGF*vEa`|{&# zPtQ`F&pWl)3T>BTnJQDIJRwm$PKV5 zj0CfG20u$6v1u^GEdybH*Iglj>Z-E~!(WO^2;PcR_f4!5*)|vcz`;GT`P!v4gg6Ic zU~3U5i#w@%pRDq%mN5;<@9soN7QW~0zDh`U5d6+WLOKz`(6%=g2o67zE+YeeCDcV% z)+@9mrr|5zd%+))$XHLGcLeKz<@9leuZ&`~5F|0%=YmNuYKNNG!9qw5iw`qaU;U7U zpJB*itVb4#eZ6vu?S{^0g7BAt+E0h~=B3Z|np&NT$b9z^1}AS7YeJ;pmXXuWwAWO~ z9{RR=s~;j9c10N1Z{~vY7L~rbF!k|*Oa{APap^_FV=&q-15AJW6y3*JcT!yK1A z+qmrbzz2~7Yws_%c_JHiOVhk!HK<`n(7MBwzAm?_{tZc>!4>UL;Z2g}f&o@nNbJkc zppj$94rFCvG+|{lPpt(e5p^nb=EX-e`~|dsIL>&?qQP@yH9r1n;0s7kg>_&+C{n(z zXD=-xkOBUE<`ev&okvnQ7FKdf7=e@^cesIsv{<)4xwf@z`A5Ep2O8d3q9QsX6kn+R z#h0HEox6>trH4D$Z{2SWYh0X4ONwe5YsHIHn#qv{7RPg zef7oV8J!3Lb5{H&mcz(uy0w*Ph8TmT>}h5M#8U}JQU-6rU@dzQas0hzvLl4tTsOT! zX)oz?=_BjZ2sk7zgn+V)iL#QsIRU zg>`3HWalBJ>EMRBzhU9Jyv!58=iXx9f_omvE$&QkciqiS84zqtV%&T2{(48~caz?obqwRa( zVxH-@YNEUx|2D>#N4}EQgeIk+?;I&FVS@=RbL-R9lGD|-)m?^jhqD5WpQJh)K;1VT zRIqybwnnjtV1)cS!k3KGE5h5yw~6@9>FV`1k(e&mjTh{3Ba{1>VLAQx@(WuS0 zLer--_ntB;Of*BmlMI!gvT!zsxH-GHbD2B4S^m4!^1rkcYUzApB*7gZD)?T|P1fpS zp6$ZNvFD|Is>GNN!)l^@=Gs&E?vFa%ELJYqg|2XWEs1&^M*FzNdG2F%hsjaClHtae zD1#GJJ^W~2MZEXagib4MDk2Cyy)|3b3l__pUe?`OVarFw+KQclaOn03JRRO5G#@x+ zEQs)tzHphNQcO2ap__`M+az`&?{Vj7d#Xy;MF*O=N0uI)7NPLNcALKS4xIWvn>F`q z-0Q8!ps{POOf?o2_Ibtcg6W>`9fsj94h%@DH;!|1j^X%6-Vz(M^=wc%y@4{tiqs|GKvedkByI`M!O)d*Rp|Fm^Ajc zPvfDV77YC;v10J54!9+l-v+G68T+WG(o$Hs1z^cz7R!L+wAHg6vv_MwgiFUd=kKft z>{&WcTYnaqO(aI|D=KeHaACydpGe#SvgLJZsWA{CbLh_ZU0)~@7Ejj1HCDjS6E5FZ z2%9-liWct&DKJQ?+e78JwF$ z;NcH-e3pIgNQ#}Nbj%_`@v01aZX&!&O+oib*H!oFDH*$K%&=Qjyp~cy`TDVUKm>QGA~`1 zUlaTeUo`YbsKnYo7p#BZuHXBAC|^Use^v0;O4{#&Kl^+XC;n7d`&IC-)rCI`4xwC) z|G(DotDav|&VSN`_qPP}ufo4(8~+fNLM<5TC;pau{8huRxx7C#un_)ZX75)8zXl|K zD9A?T27ms-f5MYrMSu0p{tz9z^`EK#%~ShT%U{jtA36X4$2$PP-;C<7;(t8?|17>w c^C$6tABYez7HW3^0DROHfU?^k=zqTbACj%5p#T5? literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testdtype.xlsx b/pandas/tests/io/data/excel/testdtype.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..7c65263c373a3a43ee6e7adde9ec0a3abba219a9 GIT binary patch literal 8501 zcmeHMg8uQ5RJrrDr4gas}w+zOLeOI5ZA`*o9S4Ab1=Hd^#SIXDRpXwlU0HFfWkNcFfUO)TKv7 zTc}bpIdIJ9bf?Rx2@Bp0^nH))sS#>X+cu~5cFY~Tv5YBudJQ@z*{C~cks|JCVZ7rL zvOSO75P@xdUeOQwwwJKs$8&`R09;>V03d&(Wu+GQ_8|&u$|%<1p=fFBYH9Dr&GqB@ zFCG7fIrx{M7saZmwS)2DJMxzy$di$oXksa)=aTaEjGDgw3e!ZD5m|R=W|~=Pi8ZN% zFy#CieJ^{bW<(=4k&MTSd_`fTAd%Zu&k^v%a~F>rcUfE$9=Q}PbdWq7IT|@mmQ(a( zd)63rx3DHJQ>ka}_M_oFnNs2bt|!zuWI42O3bEt>y)G5qS+mn(v=J$dZ3MieS}>M%cE*EZy}1lc%wTeanGNN;BDa zZ%)-zq&gEH=_${ETsL#Zk)K`#-(FvuXNMq8dm(bSCoqgYZx(ez{gWgC;;$1AP%~18 zA_+AB8x3mD{U4q@cXG8eb8@o#aeDp188p-phVt^i-HOyzlsdq~jd)if;O8mN$Vn#L zxtX@Kz7k>gRIpF6F!K5y&e7bhGt?PX;>K`-d+zjiKRe+kT*M$dYQ2{iN{Z=4X1^l> zbm~9o$G|q*M$iSy0m(?WHoq2b;}O1bBdZmUMRKHrgjP@4n5jata+zcfYlq-`Pd|Y@ zc%Hg_zSWDuOkc&a|9P;+79y~sx|-n)yVc5dt@P9}*4sc&`oRM#90gGVPr%^%7sh+1 zt+L11`wf>?DxRP0TC>jaS7-~B=nDA7yy5N)AxJrTKlWI+m3PVYT00BwQWw4K%~Sgv zeTO?%%RSvAbEuX6CuH2GjF-t#aA8A*J4C2AP>}g6Sh6+xVu!)R4H}&f^W4=g8B&;w z$MuObkZ9v(#y5>wVo3|%g~TkY?4N|&H*hSMfo>|@j?m|w5$h|=(q4Wn#EoFL6;NR( zf$MVh7iT}Oe#_#-zK4c?!!m+xLjmfg)#zI(s#86uQ_qTR;;oQEJjtXWY8Q<4uo3}= zmRG7uw|w3voqK>bEhvSFh!s^?dxd4Y?hmO+G#TI4=Gkj4Q^h00B4!0)a?A;q9o>Fo z#T_s_e{3jXs$Gp;8lwo;AX0o~)#+tj6co``6*bZMGJJ~HhBLW**0CLw8JmeL_bun^ zDa%}WS{tu8O;kZ0T%)P~3Z&2is^!34dGi(q+Ex*oT#T)+*6yAE;8m%3geH z!3-}Ax0uJTegv6}X&TIi(I8jI)vDzF=M1a^ z_dWMxU8EtIVqMa}@YR|1`gia7d*2n6cfwMHc&P}@48|UN;Ttfztc{9(W$e19WP?9? zsq{8^fU69+y~Lf6)x(!W_}S*Z`HQY6?@p+->D8K&X`mfNnZ+*1!neOfP#CsaYZ>YZ z7`2&z{(~Sl8&g+H3k`QyTSsfRAA63)z)V3K7{6r?*7}^Sc5IYrnZcov*c;u)Kb^Ci zeAJyQd3{QsM7YH8%8xO4y#03NW~5lRGNHB-25x?@M|NRQ-&w^eS|@29p#zKit_(cr z?Epu4UYXKX2Fb>;p4*5CZo|i0>dBWY;z0HZWB%u2@HaFv3J4>b@?mxvbgh+u4oP7i zNCN@%vZ|W4k(|DW!s$pvd{+ei6#s}aTvx?bET{i<)pd4X_Z9G0ENRSWZdAGf0Ni5w zkre#K5_cO*2TShXuJ?ZoaZ69hiAIpP>E^m0!*RM!8FM!e(~Sg(K@ZN@1ahE?MJe{!sck{I&jx zOs83|*I;+$;A~OEjg_xCfEd1Iu{V#7=6&D$I5117FI0q3sVRt2l2T3U!LA!qpjw@G z7_rwiYD2$D7HL-1l`u#{!&f}gV?eV z-^B@VHf<7GCtrGowK^1J`x5iGWayHGw3)!mm^bRG%b|f$mKe`|sc2hU z*0erPfLtupI8`)noG^H{i)FFbr*V25G3nabNdx(4 zy`Ph(aBzxHL}>7b=Bwos?uRnXV%M8T!vmL4-Hz+a8M%kwPxI-q&-e$gN7e>>J@>Xo zZqi?$_iGS&8_qkD%x2=m*o35J_@w(gc7cnRv+u2Dt#>3n0108u(v5PGi?pOMT8SG3 z)pWraGAd5}t8auv7TqYNWZTp|GfEX|armSgg!cjM+eRfW#W(yRWOEHA$mN2P{$kW| z@r#RtU9_aii|Di}A6}NoBpCjCT)8{UxEs$eUlJ2dNGsl|cN~T|_gvtLW#49!5RiQw(+xMB3p=PqQ{}|)T{Kxl4Si;X@qKT6{gLjBmllBDY*iuh*o;kOLV$HSP zJ;w0_gG@=rVsS-pk#s#Sl*>LG+`|1_J%0$JEJz;Pp0ZD>k=7d1dB;=$#)7&k(V>O#ApU)~MZ<%U1AT7sOcDTXATc z@un>kXKA`y&PDpZq*$8ZL{`q6SU9s$7Q3CbMh?5qulu%Skac7~tJGXXW`RAavB^xQ zW6|ctYx(sEcV6DKTz$<4j4%l%aq0B!h2%Eg4%@&^j!z=<9<1)BP6r7?h7@$<0U{tT z@s~G0JbuQ!%ZB;fj$*8lQlvhiDe($=3%A?vX3u?OU%Tf4sSR?P4>HbepF(K$2|dn# z!z8rIdxb)t_C!tou<0oh%c!m?rGiXQLU5T+7)NDpbPtS6C_22S`QZWGR9V_K)JiL% zpV#LT$8>W^{X|QUdsGXY$FQ#WO-kV^%jtliV7cpAK~C*c+hRTHIk|D;p&{iuN*iYu zGCJNJ1*=Hd`Rh^)ehUkwH?N|CiBgPc=-xbjoh%pG?Wc>Ms7J5U(khEiLw%>_+`r(I zp^FMXq}K{c)syOR=W!SF$MBElypNczp((9wFJhvTU9^u5`zF=QD*Iu_d+IK|!dB+l zp&##fRD6!LavtnVaBP?|v%EK=TwYrcA7PVg?yN~uSfnCacBR7)6{=$KzJL4l6V^TM z_7dGbLN`8}53tE^qjI;a*K@c>WO?9DZPRjkJGS)k5pj=7d2(KkwqpgQ>!dGbwLRtU zKfa}1|1vJLPs@r!aDTQQYRi|K231(wPIkAjy)UdAs=6tOsM8^Uye#+n9D>Gaz(NHz z+}yeKkx6I;JHEu!&ygp^ISpFAO4P^maT+o69`tNef+c&hg{Lah)M$mVMV(hwV4qvN zwJ0&i|2?&#a%c`YHoRE{wb-7nnNp3Se!HXDi>jub~E1!Wy>aG+e-<#2!4bux35Pg!= zH&e&BlA6Zl5jTc_LkZnne5%Kp1T6i%&x|~u!!{g1sfg9IZ%{+z(8J-4PqtewZmV2t z%ZpR>183oB`s1Y@rNc)_)Un6*p_|7xhm7X5=2zlbr>vj`7S$Zy$7edd71vEmcN~I}y1LFkPj+|D)q1~Gi!0Z5 zK9s36Rm>F<#R0!7gD}UXfc$C)8mS(2k?N8b-%;6%r1nnmxs0Z)UpjPD%9jN&s)SaL zj^fP;b|&DOscNb)r)nzVnVA*}Hs$j(=eQ(do@6m3od<$n!Gd3~U17-(e;@y1lqsXa zq0jqitGK;Q9u!s>q4}mrIC~OKrk){?x@kk7$=is_q>~x{I9?15Vys1eZ~d#m+SN2` zCkc+pTnMPkm-y&~^(%WBt|G}7PqTsOyR*UPafPJe!#Iz8=(#mFgf*9CZ1ju5K=-wfz zIH_SsMew^hl9Pn*c)2a(-`Wd$Ya$_?2%&G?846GeKaegZ0e!{SMOW4Pdn4nr+SSoj)f#XyV(XukqXtpQgF+tF(=w9N<=qZYn|115ss&Y z7*}CaLAkSvU!CcF>|WS5 zH)3E7yR9s>;|@^>yLj=#;aGk%-`zS(wccmMsQV9T;cfY0lKL=*d5>1^r+i9#QGF}# z&o+6Y8gxriJ>xW}VF%FK{l%V6*UH{CNsz%MZGXXalIDy7uroOB#R7QX5V8eXoEc15 z9L!Z~hDk&p3!QlK5qw$#@9qyX9I|Ne99RtxkM}Ks^;B5<`h=q7YP)w*BLe8*7Zc;~ zy*3_6;W$|FF@6L>g4F&R65MRv_VCKqviX;M5f3oDwnSxg1Sq~x{)aC=GCDUKOG|e* z?%%rK0@l~BRiY-r#PFT?b9z+%gcDH0!KBN{@I_3spvu^KN%^HL?fa^;^OIX5c+8pc z>p=T~K-Ikd)U6qXNTVz6Q)5>vc>~n^0(1rp3ElM4RSZ zR%WP_v(DY7ILMT?jC3c*eu_HZY7xtsm}~6DcCqKYq5xYdMSKY>x8kei;V~5ipZ=4i z6Bn@AWzn{v;J&&uBK)qszpE*pSIU9LDd?TLKp)U*zE_nG#O_0-oQOkEp8R2e2U*2=+gXWIhRnt>@#Z(d{c1{JS{na=pmB3ItxU`-QubXIf>Qsj^=l%g zYGz+CoJ9p5`L1=fQIjo2Ar-~bPddzgO)$?k?T>C;$(9q!g&3_D0kgTL->L|5v;0~a zUL5#HTH~9PfG;@Synua7Xr5XftCAe6s;TNUoZ6ohXc(93s0Vjlw^PFE=vo`ZB7(Bz zR*0^C7+Ig5UUSM3*O=ehE$fO+jxJRGMgp zf+q<|pR#Z=hqyX9yK$R4xmx~SS@~C%LQS1_tfXBKm=eAd^!-MUp}Bj!K;DaIhZSum zkr6Xe`;`PEYzhKXe$9D^a{H~M2B&nqRB`~KRft8(s2{=|%-~4@fNe6-Kr!VcrJti(?{ z%jFcwg_62Ub0nAQ3+|+0mOdbw0bLW)mYmS%F^F2r+_<*er}#i0M$nrT{qfTITX!}C z9yKdps*n50Z0ZhvNW`Y=#dSvqYYhD(S^4$$z(+m7P~dXo$@T|i^Di}0Qw1F2SEwf> zsE~sAuNX9ScK#QFDD3^-Qe!2Zroi|wb}-KA@n>j+XCU|iMP~9(4oghYVymoGa?LZ# z?z}9D*&2(g8vHWhwHdJtgPzMM%+NkCK8zmhguGgL2LfW!*xfvihmOw}`jP`OYq2kNy|sYKYR`9sIqN_J`q*J`u%I<{`HT7Q8zMvrh+16$ Q02lT1M;+X6>3-b(AMX;Yj{pDw literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testmultiindex.ods b/pandas/tests/io/data/excel/testmultiindex.ods new file mode 100644 index 0000000000000000000000000000000000000000..b7f03900e6617408e0296cef406140b1e408146d GIT binary patch literal 5575 zcmZ`-1z1#Fw+0DmkQSsvazKdzrKN{%hK3nR7-A4rphmep)N|8olK!$D* z#0%fO|NZX${(JX%_C9Ctv)^@|wfEZVJnyRo!onuN_|L+E5#}WR4shGKt9ZB7*})m= z;q3~wa&>ibu(9%RaDfSU!XN+_E4Tw3;Nl8}L0oKJIYVI{0Jy6=)CvN(heAEHZf#Ox z;QnQS1*17ExaTo828J3b2FBf&;2z#iP&mNL*=fgc6gDqL)*(={&s;N8;Kc0!{6N+c zVRka3dydH!8Q+|z{HD+vsoB{h9qFDa$5pdiB;I@Adb61kb28O`{3)J>m5QoO{u1y4 zf2F~mYhU$bEJbq*lS9>J$&yoydm@bM(N#Y=dkPi4P#r*xJEa_YCn04zsZN@{PH`zXW_ zZS0cCjsUB-KYpK@SMJ|2d@h@!`9fiRnuz}6VZ}`;jm$Suy&yC_D$=upbVAN#PivKh zyz82T0V~)9^QW_$`egW5HHIA1X$0*L(t$=iz=TEWgsRR=WJ=Wm4;C~91qt+9lBuxF5gKqWO1yBvLUE(gJ>-jtcS{XwkOQISWzTaEq_p@gR2w&A z9ACmLALh?UfUu*MA1*I7%%<8rQ>wG*nZ;<|>0EB>N+IqW+?{DpEu6D7Ul5H*)}X^1 zF}#ePTXxM==Jz^I4iQX+;$qs1PCD)dCQxgt(XWaOysUkZ%JHq9^L-cv!UmRG#xg^0 z+$&b0@SK68rs^wYIoz%WFMT?-C}@Mko=Fo-^odPG6|Ak-9I#iI>V-u(fbGWbrx&5BLBX*nC>V0SeEvVY*tc!erpmB8flS_NPVzu6qV- z#W1I4UsfI5Jc3Vpu$iKozw{UIcgq=7DSP0sz>|>TiqF1}-gL4+wBrd>Fy&G8L@^TrbP?J{8+)F5u{~HmeISs0fi!k%_+H$6GicL~giWTk)Qm z%@7N8eeeDG5FP|{w~S5$w2$Xng`M6yXl`AKtPt7I`JQ-Ld?xucbM?1~Ui7Da4#y|D z5SAxR(TF^YFD`VylA_&7wrL})hJqenZbuVC`L3=J!+29jIvuizoZ6MTtJ{R}=MNUf z|4t{|o;9%h#26U(qZk;3x9P;j1?F*=f9~>$839Ilt&=&ej7_HK}_lZi~go{An?_u3GQ(VThtJ)^x1g_U@ zVnZv-yv-Jgqwv=c8b$_>l=ftfl~xTGkypOLa5i7eT8tR-Fm>yK6MEwcNlJ_`AhN(UH9l!q(dMGP>84cC3 z`ZA)-A7)VC8c*pYmwule3GO>q$oxeKx#W3URVPMtUD2sNlT!z(c_;V^_VbfY+nb4@ zwZgW<_|WEVYRQ;nSZbVOp`iAj37WGaT+I<{2Vi!6*`blp8uLRzWJNe1g|Cg`4))hr(uI4gGBOL< zw$O`mvqr#DmMyw63G#!NKVrmmaXyKAm*>&SvHNAST|qHE2Ebs422FS@L_=j0Dq zQ?gov#$g15z?|p~8~+M=KCs>8x~%s3S0g%Gdk1+>VEr}JQ5RIZjs+jpebIvIVBe=? zlmRYg&-K2}b&b~N6ej_0mk#pQkkY#yuyX7P*vTfl5hOm07E}GReYi3IY0NN@sx!^h zLRw@0BFEQ7y@gy7yQmyieAqalbb_a{GsB;mw(M_nDU2iT|py0Mt2%-iFN4603?E$xEGw@Zp;=t9sL19yB_{V%^{S zXv37utd`}MC*9P-d(MGuRq?g}q;KL&dLkhH3O{*f7K6{sdT&ZBvk=(S17ilE>jIj7 z;XzX;qAUm9ZtD>C5{d+~q!i0`A#$jo*}kA|<_sUewpYD9X|2mNUf4D3loRWjkV?1a zx$fHKz{ z#&k378R93`X0Z^hp|W|)hxW-aMU)KJgcl#l|yroiWP`Zy^f3t9hg0}GJqikU^ zCXWl)8Afqt(~lh$gjJR%*e`e4cOy{0*$`e{ zqnPkhj(vh|C@u2Gvms|eg0am9kPOpO^27vBo6k)G;{sHC^$h@8*?QztWgLq?7fkBi$iW!+m5+Av(;R(1}SfDGSxPAs@q;5RLv^(O_P?l7P=IOq`IQz znWC15VeX{ucKHD}=VO5l+jyNT01E%7W?C}j5AAhh6RV|hAxM>J{qD4IfK0}h?(^;L zQ_NOZe_^xIFbUO=)BgQGH_PV}xOf%Np_uxWRv|zkE*&0Tnq?uekWjFTa6RUn2X%)9 zz3qMbo)Jf^5cN&Vc4DqL(bRh=bLZwQBIT{OE+YS$1S1&_&nR%{N$XWn=*3Zy=V!}a zkXTp(VKFIqBBcXDC^O>K+VZppe?4v(DHc4>q&2#a!_D0D+xQ66j}@)m^D4nAx5y7# z_vr|m8$OPgf#{89RdvF%h&D7fO#%qiJOb(m)V{YNRyecfI%VnTp-J*==}O4-q9g?F zI#c?6qoXh{i1!)O^}$x9a8_FN=c)yh8QD58u@CSigkd*4ni#~WCq(efk31g^NRP~+ zl%PmA9~N3s$W5-_V1Ia@@0jdr?eVbCp;-v{2OPM)7_eOjAP9W#(RHL&eAkru+3Y~( zn5_+=Bi7!Wx^tG^#d!`P!PoK~cQfj58_8ak)HgC)Pf=-mxy_l?45FtcYiYTbVSWRx z9(}?IPcL>58u}X=(%WP=3Kl!aCKHP3iup zag1A^HQ4yKU{Xck{DY_VIBFblIl=F!O5oVM|)GJqVK|YK2UnI>Imz4 z=ZELdvZn19+1u^J%C~e*PR|~t@ko$E`%<)|riK`Hb476Vr4VnAzX1z`)6;n?#N&*) zJX0TZj`t-1X(_c8*yK!zaNP8h81t{q1@1SBi0)iet-4iS8K}OCt4lEGCFY3nt$kX| z$f8g4+HiJ$&-61u2OA%qz2`tchkkqEDC$9fIHg5miR*}zXDpyA>3~Hq`H5KQtZ}MY zFbwPx;?2*jwDN_b_Q(=+Xm|+C9V$33%3H|dR!{B`RQlEuNs4x%rY7UNnKJ~?oW}G zPVt7K4LXFGba^9gZS_Exl9Z?J{5E3Hr{=eY5+zlOdrF47>; zafFumt|?deIOtzj7+!GWM~jSFzV_u++FVj@MsCLwG)hEfgJR)KGHQLx~XhAmfM>o*h%DZrgQU64>&4aCrpoIghvx_4N6w+XXHs$w&PX z8U{wG`Sc6G@7*R?W*5_vYR0$npyI}{m5v_^5V`JSm+N)!uEZ?U&ogkkcic)Z_FX{iB;$P#A1qtIjwh0Z#c(Movm>+JTeiP`cEKOMF^s0_S*k z%JiBG1*sU3=zW+MpY{tz{pABLKJrBoczITVXoeQ~iSm@T74{QW7E(pu+G0G`G|Xy{ zXj1TgiW54n`**xab|?JNy83L7jPpUsL^XX&S1%_ekt60hpGxGm9k1LGx<_Fr(4h13 zj(FfncsxQK><|CJ{J;28#1QG>%q>r%yoZ57b$eG=(39m?(NGj{wt_j>Lg9Zgr;o9+ zj#y8K6wbqzy1b-45xkt)J3hf@7~RvtxF(`(>H`Z>8dmY1+RAy)RjBvdjG;A!?5AuLjAggaBRVMgXMl4n_$`L>0{ZK;yAGZ zA0)MfEigx3SFeb&$xE@mYmA{co__D-gSPk@Y*+hzQ(EQ)$ClEKP($vg5M|%1g-Tk1 zTSxz$I@YO))a>pX=vjbJS*C*RaidHIV6Fy z5LAS^QG-u#F(qFyNefr)H(*y~KKf$GCiQ}Hmdh3^$Aq7*4W)t2&`f2AC!x9rurwqH zls|?@8giEXfojeuRmJoFqFM{1aDUh~m?cflflr&NgWK({I39m&TQ9k|@_k#RGdc1T zx|Opmst)=kf{Lotq$Fnn$q5w;#u3OrUNq!yC#;HDo_vQ_;hw%2l>;Lu@_wL5qS))U zExm!VQRD>!*Ry8$swzK#Y564Cqj1hNja542%p0D}9%nfA9qxZN=9g8p`zRk;mFpC> zMUvt)=4ft^2@3@L3dG9LwugRAHE1q9|G1hF@3I|N>uDFm64X!N<17nxDBi*apP;X| zek^Pa=Tf1|Us#+J%~==}hx-3T$G;JJw~v2IkpIN}Q(gEQS8yv~{tvC; xpTK_}v%i5;5B{G+_fHf549~w!ERo+zvHumUS|A+UyEizu7xQf}$5Gz({sVC~Ab literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testmultiindex.xls b/pandas/tests/io/data/excel/testmultiindex.xls new file mode 100644 index 0000000000000000000000000000000000000000..4329992642c8c4f1fd8383099c56a5d9d8026f4e GIT binary patch literal 39424 zcmeG_2S5~8)9;Q04g>^LL{T^u1ZmPl6s4mmAgCBMsCd9pB1I{dD2lx$iX|#aG`46= ztclTBqDB*A0)F=1TQoK_mRRqf*>`)lw^!u+OujGQ&%)cgoi}f0-n^N4W#8@{`j^e6 zr5hVxCcGnr7?Iaf1UoOW>_OmN|2T<+t57K~{7#h;Vn7p4*WqKIe z+(b;sWg;XSM0?;&$Q4Na$Y5ejNQ6w9moNF7jtE8usQ_iz4TjfIP@kBAe8ymrB4R;I ziI|uZOHu)NHlz_Ft1WY`$DG$QC3i6Aj>MK!6MJ$q#JXV+Lq$OP2*Vh>kC}6G=4=IL z0olToIm1AFi6c1&5{O9|8o!WyW-5pzQj$ZmNh&FTGh$T#Eg0`Yj&o2yG1-Ai)lru$ zfq;bYFme@jl`^&U)PkX|578SjP!2v9bsDHsn>T9R>Wvs3-iQ(M1`GiS<>`YjselB$ z0fPXBehU(SUFvHW)Q9;SXb4HeH(ua49loVTe8$=gSr4qPKCS1{D(d@LR{1!`O3PW%W-K~dZ# ztYnJIer^n)U#nmRpt)CbkUPaKECslDlUDEtMY-kNCEP;p615Vc#FMmFXGUwF&W{m| z!!k5f#Waw3fG4S#vgMV0*}@EKZb=8Sg4hu<0*-!Oxw-wk($o7%va;Ie=C&^^yhQ3l z^AYk`E@3|DX}1L7*o`yrM{L%+g+uqCmHz)K9JMGQ68xZtkf+aY6mZd0b^R;h>vh1F z=z!~@^Mww4UG(P}h_6c?U39+EOCN#IZ^1TQ4ag8U~4*fOMh&gH`4`A}@rJuewJ)nMoFE$V!z;~3mJ{g4# zpgdwp(3E4OMC=7g2~>O$!X%UM1u-oqZm{G!-+u!FwPiM00WId1u*g%Q~*Q0K?N{o8B_p+qTT}G zJbyqZGScl2BF0f<8EA5J04^t5_q%ux_>)?Q=21N9?4hH!e5YgW{jcP_jf9pVpUK2_5 zw~j;2i0E$}nD%lcfeZD%g!+!3zy8*NX>m=6=x-gzni0|8I!-ksqQ7+{H6xP2j!Vsm=x?2tH6xh{_N&s-tPnZJ>b4$3jDgYFDDXvRuQQ z-<^-~8zoSw3ayUMI<&l0pcG2eJld)esgUU~5Bcp14iUb$2PxHqy409YvNk4{S1O6H zA!yU4O&Z?Lg|Xg_FrdvGthaMvnr+q&aJg+3s=dv$#YJwoVb#cmu|jdf4J#BErWt}@ z1)XDg8k*895Uf^!Z>oY4aW7tX&;bgFbkAi_?|fI~x-Z)uXiCyiDG8bzG={o9H4B6& z3uv_p%>to@76>!6K)9g=A`C4MX=s5cLkmP3TA-7m1v(pAAjZ%FT?{RNTP#!&Tf=WY z7PlfF5<}ekA9Oag9L(Ye|5DD>u6i(9MPe$@c5#IO$ zkpy@PUnI*ocjJNF7{6mxfS(JlyCEAJJ{vzxHqG_0p`9$l##ol|@Z4GLY;5^#{59FM z)We2$!wee}S;p-L7qqjf$7d6u$;Mp|8|vI)>{X->co6j zI~ylH8!eXRsfP{M()|24ezspb8wsC{7EANd!-i{Ve*Uwo&S_`UjL$}kr3L6=!?iR& z|L41AYGY#ocdB4bM%izM^ZL!Ea#NjsB_Ro8cWNivLPZoTicho|k0{rQ;C5(Yymq3_ zDxzRNe4;KqqFkFHWYPA%+KIZTh=R57iMHYqcu0Aj_^CKiYX$AHQdC9`%{t9oV4N`X;F55-fZz$b%H`Wc^yJ`DyETQ*smysp;tD^ zhaniw2C?r&qB!Ye8Q_W;!@z{V-A~BG8M^?GxmRMfD>JpLIl{WI5ex3LMP|v;21z1i zIXSb%V|a2kl$(QGSuhw(BZFXQ7zy8U;EyX6u7)v#LT&(MrNVioiBc+h#Y#o5SgGjM z9HEGKlp4urizVu!7#jhTNKi>G{7HcoA&Bc)Y67@!JX{ml3a!GGkYwly`YX$Lm5QKD zOI{g~TnuH3VVh_HGxkj*Ba|h)OHHAKH?M@L+(M&-IQX|A0LyM*=><%HKvD|r3*f72vT8-d8`9_%8`9|29AO>O4rb*fp}2@?@nJls z4S;k4qXW%0=Z1WYkZNs?0F@;4ij{<3v69d$44CapjX`(L>LM5$LqjA&oePu`e^eQo zYA~8=Qg%@eXv=l}r4e1D0NP0^P@-Sr4@EWQvsIU%1>fXSi0Q`!LTH3-YcH)eNXNdcd z8JtCuSx`G0_)7vJsZ4Eqg2*G7Z}Cuf42H#EckMun*<>hVK`IRNpiJmRaXBTB%oKY< zLjAG_u1s2xc%avl3;~)^KrsV2;Ygr}l!@&jvAFaTO&SwFkA)-FYs6h|q@KOWvYlmb%qNv{_{UTUa+H&DxR#e!5{P940vhqpkQ4vP zN`Ql$n93n#5R#EmPUg*;TNpOP39w z2n`b)k1!EQ%`eKD2phURyW_Q$s-S{o1M*`M=2#MBkuog1C<}-bO(YFSOdp!Tl2rjnH+O3e_#Rue0sTT+Y?X&Smf zLq;1EQ$eb4&+ZEGDY-Nlbr44pkvftzbP#bGgf!wPM1WWUK{Y8L4FyCZPD7hwQBZZ_ zG?4}bx;7Ta9(Ofrt60zI$b@cDN||*iJE=>&nzAKGOjDD6qr+KE?h%N|9;i+Hb|HC5 zdO);5n_!}X6u_yFuxja0kBt0$IHyVrppt36c=7`UWakXc@l|~K0cJR)2*4X88v*B` z(i}N706f$IlO>3+BnCclES4s=g9Kj`hn?b6NKSy2o#5o4O6Q52Sj^+V5IHf{NQ93# zAh;R1b)oJq^xGzYj%JY@nV(k#>(nVpBL>R~`+E=1&G~B9uif_gSVupNc{%>NSJ#S7 zVPcnCw>cSo?N8T8m8Mmq9jIsi! zvK2?32MK>|Gqh#tt5U0~;_m0{Z%>^SboD^w$?HbnZfM%4QdBV(peX!TZV)FfoJI5a`>3(Fdz01(nE6XBVFE1gN z-Ns}!cR1^C`)tB~6Z7rj2`lbD{i5URb<>|+p8K1`Zs8h}y;mYbf6cl)wBxQZ;|@$* zvC2i9Jbdt^>Q%nX3=TzuxOpxj13ZnDw65qeXdb=f-7UOI^CU_0^s` z%T~+Q9CJLhdF!(iub?WL6&*BUp51t;;510MTdOEbmMd$`Img44DpOW~7GhBuZ@yPz z9d@~7>9DfXZPT{9MW=51b=~$SB(kD$`2Nq1EIE7f`qlAm4_>T)d{L}rU(X6t%W1MI z?(9;-}Y!}E)3Ex{EO6;xpPejSp}Qb&_g(4(<=C)*mwFS-+k4W7hw-DLl3>=6@(6UQeTykPgxq--1+*DvtRaK z_u-U%Ax=kwvPKS@^eWcDaklyXw94UeXD(F4+&^>T#r^(2E^>Fa3EvPhZ_(Df6T7-B z`efQ8sr|3Nc4+2X8NYSPhH<@Zl71383C>UHbz;r&43{4Qo+N(MLNaos-_nrq$oL~? zf0^>se#cK;e-|v7I<{qeMFW%I#U}zH4$9`66;0`T)+i)xbbxc+xs&bZnYTPM;P}fO zznnO3ej%=*@8;B8Y5$8aj}+e;_n@e%y8a%w%=d4fOzu6j;JL}}V}9#S-Yy=wxni+k z;6>B;wVu%nAHVLjcFtkX(mPlE?&fXozIOYc!dH)7%9B6sw(4+K*~Tc#pF>W}uXZ?| zQ8Kb>k3-pyZRb@#J~(vC*`jKb^aoWgqvTz`>^tm%NqxV@3G)w~S^8>p*N&6pg3fwP z%qOFkFZ`jQ+muNiwgpT#n|XXqpXt>l-p|RQut^O*p0&_5eO$?uE8a6YjZ3W}`L_KQ zY>fQAU*jV?${rcVC!Kxav!~3a&WGz4kCFfWtmmWOpUt^C`Cl`fUKhQ*dHBIzCoiwe z%ic4_U-@DF#nDfGJL$AMbn)mv9zJMzeR$t}^5=V9cfEW$qMfy0RqB0>+P>eVI(Ka6=s(LkMJ-FdCT?0SF7lr=#KpD6knbDpweFc|Y&CYM*{#LqO)sr& zmm%9~HRR)Ay~7^384Xx+epq_b38|lFnf>M(}!k@MwidOWwCB(&ey{l76*A8{cv)VZ80(8%B~ZfY;P=h z(0*~NCZEh|a%D;b%hC0|+z|Kko_>Eu_e*fEE(){nywi8qrktu=!E?Kc(MFLw-HTs1 zF6!*v!>zs7@)7k42SnYg&ThM}dyh#?4+VLqhL-gdcmL?gckbiw4{00uDELpoevk7d z^KJK^U)6A2aKPZG_eE0{mru1#8Q^+u>cu@3@jkx2OuBCC`9+Jy^8TkY>i_=j_3Cvw zTXyE2+V{)s%%@Qs2TXiXcizpW9i~4uOaJkq-Q#r;yS_YhAvtAf@9PhLJb693)g)43l$I2}%n)H)w=sRB2 z+o^j4tB8uQiv#`$m=*T<3ZtpE-j{b|O^VGP)Zx+VUls>CJa3ZfF?rtyGiTnd-sX64 zhC{nXEqV<+_)Vkb#qpQEsywlNMO65flhXH&MtoYHTs0+Q+OoY~z56aYRq^YdYu|m~ zvmm)%dk4`g>ptI^?H^s$_+apYSsTCI{qY7tOMGuwX^Vkq=wHSe;QZ_s3sm zH-EpbgT?bZ!>1iLKllE^I}U?`r_Y&o=XZ}&vI`^YwU*iJsJ2ZEE}!A)-O_dX;qGS# zwK>t)@!^moPg)L6nzY|dus-&DT&msfk~!-qSia~v=XCnB9~P~R^KEPP!QdCtyWg+a z(9_hm*>S6ROD!$^Kbb_Pw{i85zi(9eVR!SnNz1?PvhjPtp6c~o1VfAZ$F{6@<;!JX z*J*KP=#w^HD@T1k#eTxo76GaLLrV9&YM&Io#P^fISEcUbMvuMSE;rZzy4`O9x2DZc zHObuX8yitEFZt7DeOo4PZ58vK#f`NR$D8lDZ|{EZ<(dU^cS(9plfDe?8GYY%u<^!_ z6OPrFhxQq`!hhx6Axk5ydbyc>ak1f{pao|HhlRa&_*jM;4?A{amt^wuT~+O3_pWGD z;e1?RS@zimr!Nw%H`SFLh`QKn-v_rY$hvHrTQ+s|xUGI0Q)m9%Od8qWXwirHPZPyU z^VW2E-^Aix`pfU`XPbO1Jb$hJA5~wZKf5rf@Y0fdqjLPl_sDA+Xc?a&@3(J9$3Az~ z4Bgpx@!g%#(U0~mt6G!US5Rr59PSbS;$;2H70GMY_K9u0zK!4I-P3<Z zCOmRFC0|`#{_4`ww#S-BYpBCQ_sBB?kf>V;aT;2aWsbfYq6SM^=25ee^_^ zWUsSdwcdKP>e*tSZ|B|dd>yiC;~zIFWxrj!65Tj>^M#xgL}x%-{F zmg_vl-jk)@muGLE|LK8xX?_=cv(_{gx4-qtkcE44x89i%*2Avb)YHM~8wQS>k>B=i zgUng6Zt*Rhd$@V~E-0{a?K-F8(6ppyrR##0&wTG%^W~q1|GfQB%+unM&mJrijQY`e zXR7tcH?t` z%!G6eO#zK_Xc?#`#;OzGkJVJAeL$a_ksH4tf9#_w*a!xN8iNUr`uUXj0}&Oa6WS;- z!xBCXz7eQsEQXmCR~kJgkr-BUDXaacXBDMaZ&foQOjmVfG?~^_jYG7QmDWA=WVWc=^SHBk`$; zp(zZ+ojH3j=W%crkWyxT0?2TX^0CyqqpjSe;LXPyx2(FM9y~6ty$Ym$IsRMDMAQ5OiLaIaYED8%HpN+c~2CL6P zF<^)wPrzU!4aBB+Bn+`F0DHF*TgYHj+Q@^U&EScdvn^m>Qeqo%u(6JaP4TQ5Y&*av z<5@{W3Q11FEF34_iLjF9&HRPH{wR#0goH6?TAS%$<k$x82KvGd>zSi7ba0#O`w2cF-d;N3V1uMzA3HvDmS9v$? zf;vHyhUPZFU25VQNDt|9-34LjC|sC8$izx`=?d#UUBJ9xafg8m;rq7H%IUaQm>Uby_V(WH1>KxSZu>K5f zosMVBaXi!mV(WH1e8CWgfRhl`zk;f#!}~rW-p=(fg5V085dNTtVqEV!tb(cFn2%Ne z>l_l*bE+#Lo8tcI4r%hv91?W`uf2lAI;4q~{)-R8R^!9au=y}FUoPx>MgJ8>!oU`& zqY2?V9<1A84tphnJmf|5jNuBoP!}U`h4kr_0JaO^u@hF^(aJQP3vPcgy_zv&(&)@c zIgBZfqmpojFP29e#q#R?94(Lc;V=)mqAX!dkf2vw8E~&L;;Hw3lm_12#XOV&Yd~ed zdj={QM2hn2{UI$6XDE+uX+<`Hvamml0xD6&!2S?;!-49?<_UA#9%@3aUTVK!(NbgG7~Ls{sRK;eSax%CZ#5g#QC!OcesSAV;KFxE-` zK$y-c=|C83tA8NOpHu1|2x}WQ>mCTxVKW^Ff5SLx2=uvcH3x-yz(E_Ci2gGJVQl!z zz2FvF?|-^u8o#ri_`k<7WBx72jA0y;F7XIpKPk=~@l3+F-$lx09VUx(LK~ zK}rMrGxa`|V)I>)@XU$MqG0RMSB|jwMusYLyJ^pjz92 zmJX`v%yc&>A`y0VzSVFR2eRm*ICBshb6u3?(4`_k`3r}yIKVN5M;nS${^>6I?uM^) z{TBO5cKFJjOR~dPJ`CHE4?}n1!_egTFboQO81m=Bs1u@*V-CHl`8<|7EtXadX@v@h zp?O@_M~kG+MDx(~*|321j8=vdblqqsoO}-pH8?Q~SNs_>GNJ}lfkL~Js`trUC)GCd z)pSzzUYeGtPD=HJ+EoP#ocXE(#oNt%RWj6U=4(qjf!c<{;2sCmZxt+1Pz)A09Ty7O zVDX=u`=T`1i$qCbtvRhWc;LNKRwwAVG%6ds zVkDzi+~9$0Cv0ELT?E6GUwHo*s+vfyzXe84LW z&JuVZ1!3|MArBvuEM^{of=Y1?fvB|6ITBE(XKpF8WdpO)p*;xkWF9u5XHJ24@D-`! zfuE>rT?a1vpmQ^$PzGBU=HCl11$G^nNjTg}sXb~npws|s*-Aj|m;c8az@CjgqT$g= zd=9i2&g=^iBoMEqq>xA;V&eny*$}7kflF_IBmyozi(Uw43B+xDVFEr$N+UZP^^*Xe z512?J9JZHvv+Qa=|I0LhkH{858@Gq|CbTBQm_Kj0PKU=~yeM{lOBbN|bvuNCLt z)xb*_?H59O=RnKnLkfh^nK$G2vKo_)U?M^ul&?Mhe`+8KCZ#>$r!SM>tqO?{H*oA9 z4zIE339%pFCqxKtzZNhfcpNtiVHEEvVE2)-pMp2uwd=s!e$8&TrlWN{v+Iu{A`vi* zIoqxIFzg5UFkEWj!f+amM>O+EuW$;EPc%(;JVn5~1H6nN2=vZu|H4^43BwZ{ zSXOg8bXRseG_E3`xUhC+tYwAkJoa17J@ruqBX|Gy)<#j!)FOu)?dyN6A#y~ zPx}CDH6&aC!Tc~tXq3z?O!$NiFR(U&6IuF%_Z0ZVMxP4_pTzzQ5^}l*g&g4%%i@9G zadCn9fl>O zF{6$N&~$2#S`E}{pjHF58mQGktp;i}P^*Dj4b*C&Rs*#fsMSEN2Glj6YyOY(bm8%; z;}zZx)^p~={NMB0x+s7d^I)rCAs`(R?)uDLdh+Xj>zo^LlHB7NF_;bOa{2aDzNF-_porDPPaX2+h7g z5jHx&EA8NgjEwk79Oe~@u#t_Zzk-p0utg~ja>erW3i&Iqly6&PXsf?wkywDlSYuoT zK~43}FBnvqC6g5u!d-Y;8TdfGv1=O=_>Q{9Uv|Qh;fibgj=Dq&srmVXIu~cBt0~hB zG6PdH{4)I0+j#k=2gtkvq&|LLQfaEMSDLRZD4?BBU}{>R6wqpqS`E}{pjHF58mQGk ztp;i}P^*Dj4b*C&Rs*#f_}|h1&LeT&h;vK4ACI$O+)IJ?`0<`R&g*ezi!)Q4pX2-z zXTvzh$N4i`2a+!&KS=(N0wCdboIptUd(QY<6*xzJ z4R5v!hR+U=@Yj1oAi1A%`6?X5Ji#dOL0=3|9@gdMQs28 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testmultiindex.xlsb b/pandas/tests/io/data/excel/testmultiindex.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..b66d6dab17ee05027b4770b858491050c0d96555 GIT binary patch literal 18853 zcmeHu1yEhfvNrDS!QGu;Ay^3R?gV#t2n4sF!8HVz;1&`HF2UUin&9rvzr#8AlI%my zJFn{1tNQC*sul%%?fGV=r@N>7n;tnya0m<#C=eJB5D*fOdgPGr*PtLEVF(}~=pZm) znnE_#jz-pwFO=MDjU2QYU9BvMGa77?M+N9^{?M8|zTcBVn z{aBX$CiLL%Q%kl!={DLlu42=-QWwW}s8dg3MQJn6H>l5A^XiI>VZp0(tTn>;6#8qB zx)9Q9tACX15zEtKs8>3*M_jVx;KA;mg5g*x-)@c%wJD3nL$7=vc(bEwuc{vJxxH&4 z4;eF;BGDxVkBO{ui0fdAffGR?@J)%hHGV{5FL$gkx;3$ zb;7#pZXTk^qA=TW%TSR{jiyH#!%^BjC|=#kPlXNIUSRL=FkpNgs2{^yJJBbsTXNPb z1;1#%F2hYGM;LPw_S$?j0zANLys$U2bYNoq@%umC;{ULV_-X1z z(akcg%m{(IV)a6?mmAi^{)5BXl-XZVXpy5RtnsQtvdH~*wqeC!XstGp@C|NnuFqA+ zQ-+YX2!tMggLr{OVX}r{wye=Szp>g*X2K$#_gw%9l3)K@%EpQD^=m&VH)`h&(Rghw zjC#o~e$eM*8FA&rk&G_5a4NZ12n?tduDIEl z<7U}PZs-o>pwoeL`pabLoJCDNV?D=J=^4~V(~5EL`VKRENowH0C0+ID?b=nfE{ixr z41_Q!)1e0qBPc^DbaJF=;hD4v{(|4LMyid_&vvUVA8%!`oPtBPq`2mj(_M8B7~abQ zgz;C)86iy}5d&CG6*LG4E(i>$t0mJP#^YjRZ=r8vW5K9zX8kWi0tIZlfbag_{;7}a zF>hr?5k3vL@f&K0|4uHWpl_lGH&wF^oquvBXxU{t8g$#xVzrHn#ty$2t}9}?v&gEo zeUdJ{{n@WY;&mc3KQUpjl5rfh{(-$m{R|2+JNBc}9PdO&Z^QK2xvjVFR;Hh}RG1I3 zDMEB)kAC``hcSYN8_{4Xn3$qf7-*qb=v(uYcOT3Y2Le^u%OuK>b%e)|MLwu^ji_#% z`j%gF#VxMIPih_)n8!ka_rBdtKEp()9>4abcI1q?58D;A?!**CZflj5)` zE;Q+B4%|FuKV-5mgBRt-dVcD4j>d8g2$eO>WrGxm8zmeT{6)PkRwFL7UIC9X5CcCR zEy3X!`-K<>hFCK97O+qI6NYIHCsgq#*(mHOjA^-w>M7CK{V!J z>pj5^l)0Jy2LF&A))7W&ngG1_cSsNrfQttK=;6;A=pQR-p(u~ic^fm1n`53X0wIKA zo*k-S5QYpfG>#@2Gp^Ab+d{!qn22kZX{;_cn6~pgYzLCxhp2V;-OE$o8hQo@;EG%5N zwa}NouMH%Km7~atpmJ@i4uyKFSTXQ9EGTv?-vOx%=GOc1x z-w(buTB?w7VfNe0NVH+5uUADTgu*={kDg2ol!=xSs=mpi?OQD{lbe5PRLh%OXKck? z+MQfk>VYah$J3oGA25pb5mthwD=k~s=~E_hHeV{RnJK$W_^p^T7&pz zmbsVWZm!LfV?ibqro#!32%N(hQ-*ZE3CgPANe>|Jy&lU=5R#j^upO0eF!#h!Ihjo9 zMA)XeR5mF?gi6fFfz1BCU2Qkiy_irvfqbmFTDX)(oRqFK!++Il`=a$pylqTfkkO82 zZQlqlAE)cg8IjW8!Y~(lKc!lxX9ekM zsZY;Sk;FON8qZ%Z^NtRvs(G}k&|at+1LnRea%_2Gfv#S zH9iTV=v_j0*nji5sshx1WHzUlMc38Qh(BRRE(UaHMazT)RW z^80Xxvn?vH6b=(tLDg~P+k4!kFhm*o1vk4sReQYH;}F7Jv*KEa)iuQ+1Y8o&@8V?U zKw|}3ma>$WTkBPk`BK3YN1lh%$@$KM#+xBenb-^v5I#UqWPdBDf6K{opc&vr5ug?> z-|gx;zDPY{?g&<#)QhrMClsiRYkM3YZX|JVa%{Cdxu0;xpK((ap!}6OG}7{&g@#-k z$CH6dpWU8`O&)5RFGv$#40Wk|Mr>Wz2deZ7DAYaBlrgcuC27`$EXv^_V<{<@J|*cR zshRgy%9xgN#qiZ^r}RfcA;{+9rujVmgfwa-dwf8|DyR`8euiXg9R=2q!*ar#@9 zT{52O!T`5i{e@MRNp^N`Lig*V5nI4U`Bzmz3}WC?0nC0190UaCFA}&@64oDc_mAO~ zX=g_0+^(v}qIrf19t_C>!iENI+OCeW>iQG{&C&xF3Mv95)jKW;lN?GQY!>9o#XPzp zlTR{aIr3~ZyN_>#n~DE>1g^rl8Ds~fsvMaV>BlgkTmzlYU86%X;cH*p?D{r?>)P6j z(=KEZBN918q^x~95_0f{C_N1c5o4SKo?cHXBfTg|JMfL; zlo<#>vZmUV=0hg*f5{w*&>}aNoItC<_lh&UiIAu4hG8k1#E&^PvaAAJQ%cu{P4^@L zsYQ;jr4Y%I#%D-y6?Y)9_+7ZVg*5v`uO<_BUH3jM zVWz3^x7{XI8kwXos@JXJR;Q2r*Rda3i=M(4HKu?yC<8ngf2@Ursh+)&p`xR`nYD?- zk9ANAw}2DTMF~hhjqy;pa#d|XnhdcBQFulDUavrgMSSG(0Jj6rY$KFHPNXeuwV=Tu zuH%`%02_Gd5^A`Libt<(TKrWzSUPGqLDE$0h9{!*>`Tlag#s3E>!xj#*qxQI2`hL+J_t{8F0JLj>Ea7=vBn^j0`B@?w0N}4$4Dx2VACv|!H6M6S*wr5?SUocRcWC&?kd_h%Y);6@GkFRspTK)1-6jGnFE$rJPL|`D zj);_NjxM|M&ZQYUo3OLVY~YzkiirjL))Lz-dwWY1x8+95P$7=WXPXF^lxU~a)Zg&Y8@MG=kq5dcGLjZ8$V1Sf`$T%%w5i}$ zPfttun{8#_h&G*^I7)c>iX`WJNMhM#a}A3dh37*T>PzOrfa2lKxA0HxAw*w8sf>WO zr@S(KPDqW+LXTO$=E{D!Tx_YM-HbYPir{5UtB=^vZP7jQCT;(S| z1=k81mU;N}yBavJW1A5VbMt*yzXh-~?E@n-aSPvUCezF(b3sjGbOJiOw>M*ld23^b zRGrA_1pHU)XG2-bLHrszNQ+1jex!>%HvE@|n~_6AvW;%%XQAfx^h;jXi!m==-PGg4 z99Ng!wkOEGdU^7#T{ZpY=Ib)tS$xwnsmF6N(jf+W{}VTenn#_r$uZ?zMD5gVqZv&k zdnLMGRWrcr**lBiCMZe{ci>DJl$$myFC$7?<$5uSqddsM@D~_Yi+Y4}z^zTyA(11E z?c^A=dSBOih9SQ-;6n-YCtCDdQ%-k71 zu`jil|N1+OXwlkUidulO1k!2?)6#{4U^aLQ*M@9xtn%XzkdvA)7l;U7i&(enED1k~ zWvmHe4x}U=lG>*b9X-RRP@W_SZzVkecLSk$Jw&Lgyr;dR68tKVYr)m5c2{A^|ME!0 zHV7(0A{1iC#yFESegf9zz=g|1QA#SoCvf*@Ke)qdV)Ri!$*atQ4|D8Jb#!T#8=7UH zDskieAu0_Br$m+uGp<;>sQF3u=$;de^T#&3#k=!8=I2XwCz$P-zaX(N53gbW1J zd9mdg$!zwD$se^_P98sM%FJbhR*5EBibmnvkbnY3Vnmr7ByJuPRSIZDp<-T|;O!%i zqS+M4^p{M$>a(Y?2@i7ctDqW-=WCtpCn=x)j$T(^m{6G|(cfW;nRsYRX{yY|^}cM3 zjz+O;+sS{dTEQeA&7SUHtt!yd$)Rb|XXx6PH}%nE>h^wZ?a0BPGC}g_ zhMl1)(%Q7kQNlwR!C6oEvY#)vICcO7wbj<%zr&L9gvI|w9_=Js2HK90l;|L`ZFSyf zE1i#h+zg+V0+O4joO+{C^0rkHOjWwDtZ@}e{M=~sp=*>ktiyO3X>z|YeTmh_y&aGm zx`fq|E0S@9@TgVC!daJCch$;|62g{vwt)%kl6g9mnA~Kf5;e?}UyWhKE{5^3<8B4LdiNJYB|+^Zwd&4qQDtv-d@S5;PUT<< ztUrlQLm})K-h3P|gA51f{f4pGw{CboXJN6B{?@~i#ZTUvhyz@Dg%9q#-7 z*0yGRi+-OkukvuQ7jt!VN%#rRaoMs(UCrni%2W&moaD$Bu>qJtgAv^;)Q-+mlEIn1 zxYjTlvuN0c6*L^Q{6_QPwwrH2AMkX2=;=(?5!{?_$k6)&>w~|Hwz67U~vw z{6EgD0}_&CMpqQ~xMR=`!)d6Lq$mip`MFug_GSLoTSPXkliDPveu{iGE+<|btUSIi z`is{W7kd!7N@%hvaGG4TO-A*Ncyq2<(-KCA1&O(0 zf=wdco=Hh5alJA*Yt#!oe9LhtESy#5r#Rt9a)L(6gfz!g>LROKMG8IZ-_T_US}6PhZgEY%Pw%jK zPy$nhkhcFw0*`+y0pb6@B|v0FC-ILY!2Od1%4OPqWSaybF2VI9M>r_&a;aE|orF8V z6WgrFaCu&njW+O3EubvhY~Io%uku>0guWKhgU%~~7TThCu7h9yLT8t-*;E**7BBY*83eUF5(LeOjQ|MWh({p6qwgqf?&ht+m%y{p2 zKUTDr%e4h_;I^;3>XaG6mQS)+rBF{=UAtA>ZutDX!(IyC)CMa)pwKTeeNk}&Y|^ew z^>VMo1?1SS`|04--{v-h)9RSJr@9V(Z zrvEh^F!*~N7&qoC_(u}p_(=jlI-vHywHKUCo|^oR>%h$b@%}%O0Q>JG;0P)F|Mgz5 z+NVlI4hjM?2O!QL{gLbbAkF_ID1V|R9Tn;T{tqR!I!h8_Q7p(51yU{z#TPkPxYH~3 z45VQ}01Wntj_-@iT;4T-*elSjqNyIi+PJObt=^qUc^SwL6(>p%0p_V#r!;hw#)@_P%7w+sjgW zh!2HYG3__4^Z>C|0|GI?7k->aI5@gl8r`u+*-AY!N`ff7U$qfj1j{-h{9tjj^5q{1 zNQoH+*m2;pkheyM2P30ViQr4khI=(hczGl5l^^pPlZ1r4?l#DQrR6KhyO`^l#d!p^ zEu-q9oc=`l>!}KV%^9&i2tpYBBFJixS-J?UvU8frRGuBghpA3qCxp`}ez>{UFrWKV zwQ{wTNZ}nf5ZY>D4oFKDkye=f+$11z>&f#Gbz{oOpe|$)clsy6T*T#K79r+f zbtnoFx|84IsfJ8yzte@ECVXC?MSgLsj-Y;okSx?u6Z9CYN~vFsqtW`Bt*1g)xb2PI z9)5rVW$|asG=yH)m0@$?0}N3ryolS2K8acPF-Lc%v)HRvA->zidT2g)72(&t`oS8N z>pJ>FBWq<1JZkYZf^#VZ%zZOo>d_?M=6Qxf&F;c87|^f{Biwday4e)bICK%oxY5rN zVU1PqFxnU>NcPit;aT_|f71zi$^P_o32gO!ZI41pV8tAYheKx3{8y4ubx?#MOg_D= zpu<^AULY?>bDxRnhS)Jw&k* zo6fzHvroKf#_aRys=h86D7UJIEny1<#kjn2^nH#wOQxQr#Q8|yXA^fLvu;s!@Jz{R zID^6YY~5Ds?3FHWi?O3dak62+oI|Zl$vFYq8JZs1GyczC`4$Q~9r!cnc<~jFiicfX z;;{(_o~pl9y8>`fe~sWBvw6U502a#x@mrqwE^Z5psr$hI2HsA!#lVC@z_*Lgk813J zqIqELb4Q3`R2>FlVe$E1K-7%az+utGH~927cBNXX?QxYQd^BuKn(Av<(27sD4}ubW z^(4cd1Tz7ZDvp~V$k#yqbC+*M3a+*bS!;FC$S!G&qq`*!Dn2ZS2-pYjeWP`e$u`lI zB!Lk2SH2^By0cKa?GcEmBoYk0(h}ygm~Y=nh?WRR1?ITDv9mu;}mV+S1pv{ODaT`yw>Gzp{M zB~efaN{Tz=WObqu8DtRm5bU9z*m+CWS-?_EHs}W)nJPoA*?a4R4cf2`p2VXcdC`_Y zP3FLa`Uz$YK&YN0RgOuhv zrkOiRy>Us4WKA{px5}D0>xK66tTvzTJ$Rh)&W}Dr2e3$szo%*MHdX%^{vTw1 z2Y+4;11T_>ze^IOyPU48y8zP!tCplLs1MTi1(EoLLSt2FzM6#AOu3iLa2Vuj#X&6Y z=-$ByOgKDE!3r0*_*X6r{GMs)C4xRBp3f8ZT0Kz99f)SLqg7*~m(OjAL&)FzRy)c$Swi_rDQBl!nmkM_cZ`JTFqJ z%a19QIF3kIl{o&*nn+pObE^IQn%r&k{xN-w?Q(#FVh4>6agT@9Q1Hr|G?qZ8U9tA@ zCos{R8(Lag$QEENi><`zQ6z1euJY1mPE^dJ9dZxPQ{Im3!1o)S(;!jn9Vo(w!*>;| z)E2aTLW1n9a7Rzdnv)o-W9Y0da#_NSellzAM9PI`83v&Ty4>?PtSlswdKrXfyy#n% zIF2f;nq9P!t3iLfA;J(FYpCo;#tw4C>NrDu#))=4a--OIWY+Jlo1ik8PU0QUWo20H zXBEqn6Qvb8Up~cBaIa_fWD4}#KZEeVZg!2sOlY194+osE7(JCG6n1BBd?$!AbGbJy zJaZW)Fz(Cita}}E!~&&fIo~vlWLXaR@)CL_eUdnA8&7S{hoM@RYYl%nX@PdGoD8xj z1uJg1)RQCVSSw8#;oa$y;~$BkfnTTLIfcm$6(9Rc+{a}U!I*sB+9s6 z*Pw}dISTHYMBvTgw+jVVmg&eQMI_ZwN*Rabs`g8nrp(bsLUT<=MOVf$hr?Fo9!F<{ z?{hfAfc7E1nl-n%(3ETTm1S*fF)WKxe0elj%V+V9Hvio?U#vL6r!m5iPb_4PZ6SP` zWJjh`{jP>S9-J??w@BAe7ze-jV5~j1Z2?O+pz&r_!bsQK6`foZML@Su_K`+k?t?{q z9^p)OWFPqI9K86FZtym#_K}_{D}A46D4)rvnwNGgYAH8MYvK@HCqkmXJj*XiA6qEyGtl=+?5fl#-DJ zLII}p;qs}Hx?<;YDT@3k_+8yjXy|=e!?h#gT3xsU!e80VZgMLz9W0&bxiE@Mz&IoK zg%?>Z7kKyU1$>m_m$MmgCLq;}-(u0}$H8&*@{T0Aqhb!^i^!u&3it3biM1!s9%?8R zb@9HGFYhQhmE>H&a36G;WH=R31zoj5ZBkt)O>-+Q*2+G-GCokrAsaSh70`?X)!<_C zbq}Rz2o3O^E?wc73?g*qD*w#hG=520Gj~%J5=!Wv;rA-dXNA%X{K$(EiFZpMI`n!j zQx-B|-C24G;Yj`DL*RwXkqyMG#Jh^pcA4ZQEGzun9Z04Scvl$?mn_d%EH?y?c&rbZ zym44mzC;gd|3-8jZ4_WNa(=&w-%$@?fBGU5navtbv+QzW4t<~MHt?!_`X%lXI`S=# zfPeteCiF>-nP@fQ_&M+Mv=h_!B^J@^9_hCp**vsVpKOVEXxc|iq;c|+dC&`xgL@Na z6}bm7a2SJ_bhP%M**={G#8-ZN+!eJ61!qA*H8T%7#OGCnxS*zB8rCEm$!Wng>!7dGLk^L)Jm6KFc|!SAxFb0cJA$j5N7hV`PQp7< zZ$4dIex&Xl)?{Q=OZbSgn47z#KcQ!;^?_FQP0x20jro%!6$o*)Z=Yf8TStUYzo~tH zaF-8u+qM2LR`HHL35$_tgcJnqC}V7MN0Lui|6E2;<0bkf`~mbowiHxbFn||BgKrx~ zT=P1OO&Z$Gh`!0+C_GnIKHY+mFt*b1Z8486s{0VrF*Rb zSsj+#aH3-CqB}uqxzCGjM?V^bsr8ROeq$5y>H=G4ahX)~r4pBYKFiaJX|{?Z`bD!+ z$~_MpOE&$u(ZE;1MrxXRvvv_YXdfj_-Tf3zjnF4fM)7o?$$kXqqeV9l*U(8BR5f;B ze^qcu(uE;D*c%l6Ec^{ec&id6APah^Ll!yt zg@4rHT{iSzbVvv2a1PuK>+d@B^p!K@0t`hN@cgYb?>sl4Lw#LcIWT=1Vi71@<70sx zk9-s|aMn@C*D{d$DgZ6fCw>LpRv_R<2(tg%Nut`b`u(F7yes z31oZnqnZT;U@ugFt^xL73n;L0G_qGTa&-JrX#@QK-8{GA8v%vbC>>smJ^?;S#mEA! z3OFc|a+1(d!J`cVqd3)Q*I8&V286MqdJ8={m}#^SXoda*(Vh)Fm(jaR1LEs;k}3X} z29#1Xd&6*3pL83P8X&ouDqgP7EM;ySrS0t7s&EIfsah{5S(uF<&CtCn@U7 z-*tS=&46Z8LPLgLvQW)Nr*vv^3BC`{Saza_KiV_ni+iNMj|$Y3ECgktdj{?cyAl9{3rGpGMUglsBU#uhZI{cdkF2*0YEtT6jj?h6qIR z3GLwQzr)Kk_#n{o*%F))A^L|U{xuHVrl8PJ1&I7dr7S9-THDaZK+fLA)`7{u#@^`9 zyHWos(gujmBU;{~g&8AY9{LiQWZ=EH5mrflC!&&Tga40HTnlOl(7S-)NF?*tW zT3SLGB+7LBgnSI~6TV}VerDn(ktrd;F2Tm94DM-JX$i{GexvXuTn%(x@d+unYlc5| zHk62wXq#wuG(pAebP*`EII$mon2Y~0&iTH3*~iPnXIZJ-4%^5^YmQ+r`&BA1c~0ln z`_+_{OK~5wJbNX=myT^CJtmgou8L-hM%R@Cxd@-~becfJGHv30Vruv%rg6*pYT0%U zl#`3tTe!_PzIaE4oGjlp ze29IznYc~NA9$9|6@n9b(Ha|m#_&D%Evl?{c9-Wld-TIck#6e5$m0NQd3!WV>uE!)Ws%I z!Rln^8`0@@&G+(2o*U{3wMewNH@nGED6#g6)nsGztD>2$f0)G#P}y4;Z2$|s;)vMg z!bh=0I+y;6SJ7`bv~ZieB2(?L=;lxa%Y~V~Ic-YuMkeXiM0ec+Tv!7>22OKlCh;fs z{Ld4vQU=YPlI1GHP0?W^&yqoWkzw~?Dn3`mjq+t@HVcEprA7MQ2v0coWMV&h*DkI` z_BkF4_C3QUR^H?&sOQDVxi&Q@GmH$R_5ntOMQ8aU9JiyYD$AvP(PqAIRON4Ym1Z3o?4CK6M)Z5OUubJLkr!`p>PdU>0)OY!J4cueEQT#_qO;^_VH&&V&W>g#jZ=efbwOkf<#S! zwV-S^6&50ymuY?q7>Dgu~8U@LvC4vd!c@ubLI!P!f>sn&O1i@$buhe8y4Bt zVt0jM`IED)ll{qqtq*a9$*13WB=HkRqdw!Tn)@C(BMTE~k@W0c65Pm(aNZ9csp%a$ z5NqF56Bhg2G;^#(>vF{8$n`_?MX2?Q!cig(u~x~g+`8eeCYbgj<&+oRuWv_=JzOI! z;{FAS6(FqtGVH%2e0~Y!ehC`K1d2KUVLx^-MIsJPE-T9q`TtdI|8Y)pM|l6z+ds`| zK*1OQW!Mkj@4i=xKmK~)wzr(*uK>SZ_52xt1F#MKbnz3o;jcH{es1`qq#qEx{^s5r zFb?odrF*0^*ncO5|GGH`+!**u$$jG=7v6u(DE}DGPp7zd1zo_1z;`e15vu{`mcK{* z_n9~_An;X&d%zD!z<_syyUPiIQGko^?@@$M?oj?xk`IgkTr7W&@bwRbzZ`Y{m=Z7w za5?xriXhtEIDm`8fdPPP?(P8y07ve>mG6B89xxzq3EMp&5$@jr-evg@Y5;IW*gYT# zpa%Z8BfPH<10Er8fz>@A8UEh@-j`be0|M78-2+k({0-oJ^%5{3aQV+YAQd3F|Lv6T zi-CXvfvaim0cilN(r*FpYioc3fs0e_0qKbU2JpUA1sD*x*5e+Kfdm-vC!YZf16M7c9UU=-lPnR^s0x(8A2)DRd2_)y^ykuhZy%@DSrp{;DP~jqW4&EfB@&W1AB1UfH}{5 zEa$(&dT_S`rZexcR{sv`K?4D1D(|t>0CC}O=lGyH05g#HSiOITb;r{IuOl$Sc#p*e zV8wqst_Kwim^!@2djAL3KOqu<2ly*Fb`J?4d;wJRU#K$R=D)HTKQ}J|5Uc-g{wKEq z-1^u2@8{OFPkw4`@b7F87yvl#zlQ+j{TuN<=u3WYayEa5^{1zWyyNU?Kxjb90S9Mu JfEFMi{|~n{oO}QP literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testmultiindex.xlsm b/pandas/tests/io/data/excel/testmultiindex.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..ebbca4856562f92be9757464d7b3e278b2873ebc GIT binary patch literal 19133 zcmeHv19x5Rx^`@%v2Cld8=H-7+l`w>jqRk3ZQDj;+cv({zI&f{@9ysJ51cbI)=Jix z^SMUmv+nEmJ>{i9!O(ypfS`bYfQW#&#zCL!fPsKmVSs>8fuKM>3ftN^8QVDND!bbm zJ8ILrSz8h2fPqkE0f7Ln|G(pZ@eYh6tjKOLA_bj&bqg7&FYckE^(`*Y0`i3ZP=UC< zINewDww+vcvUElYUein+RP(vst| zaNCo5cGS*rV4ox{*m~TY;FW6al$U8AZc>a40w~Mo)@oI2<*B%e(V4z`!R+$KGRuBb z4fyP_q0!V8Cx~SQG|)_KJRJD|9Lk9a(>Tcz(7hs^sidN{@JZhg)n{wpma~S&_M1mp z^VZhDoPd2f^9uF&!p_v-8q+uh(^&Bd@yVPUt$tyriuUUeP5PFMJSt>Mft4H4^5%?6 z&aq85qWY`n_L5w#%oOz5fc*?=)AgClV%?mERc?JsS6%irO4KQ(NO@!_r8o>=4ZfG_VQMMFlwHJ^1!so53} z_XtG~c3JAK&9Ld>xl1m1E@k5dz$Iz7-hfa&wx0Z8#u+i@xQciKNb1`gD3JWW(H&K% z%@_ng_cZ{UVF7g4buhMaWT5}~{(m(8U(D@)x%HA587WXEq~J6EH=>bN?j>sCDJuri zeHo({V5a03J&xq)yr!2sF+r#7I((0`a@P#^ z21w*s2eF9b4_%*#t%{C{Z&)Izce3Yi_`*sya{}qf-uy6~#Z`O#c^kFNIo0DGFg33w z5sS#Q;&Q=J4IOJhZTL$dsE8d)N-S&R+($169bU2J zsAlJrc2@Wd9fS@&5gUTyQ)s?q?w?ssDk_SR1*L$d?>-!Jg4eXn5?J>46 zBYso2VHnK0W2)pe98(+TKp9)Ddi*10z^{(BB*B1y8sUI|5CK=XSuwcUI#?Rm+FJg! zj(KX9wws)2FKtt=A(RBFrOGLgL~&LJq2lv7bV2z}P4Usu64HijmDQ0--^H%h?%qeU z^0>s0WwCdX1*QsnNs-a8s?xH?UsS=n5EXEg+f+!FKg>Tk9 zMxx#fkCwMNjZ3JsEdVG4>z-$F)V+-A^3aSVSZ?NArV zHaTo66%Cbl9$r7E#%$+vulYxW~ z@hXd=%4Z(adm$|GVWj0%OnBq&jqzY#;gB!MXJAs|A4Pk%X!U9Ap=TE7_DSC0wOPVj zk*p#OVg{fA!(S*|lpSOheaRdJzK5v$l>mZWcXJw%0`ooTee zdoOsyj0%KQpcRO^K67gsZqlB!Ud*H z3mYdTm@i&RAc;|+B`J~GMJWKkUw*i$P8n|S1O^SFQF@k>QEGzOm9AVuC69xOz7 zkSaEhrx!*}J#JSAcUdsC^`{D7NGcBj+i1qJOcFA*`&mA-R8aZd{dGnZ+&x7R;M6n^NZIkQyH@ZFb;L|nvF|44h*-Z=Nssw=;WWe1>_KuGo$TiLWr$PW z8pDErjuYd~UXJigZx(H5al}A8&0{$k=~GgH4m~q;x01IFzVp62tL9%`*qL^F-0RMp zb{sz(y@Jhk_bH9Pe~o%}XTp>dPuCzbnEo#5#qE>k6Shz?#Dl%cFC@Ze=-kSDckko! zL}k&+d~?;U+iaA<=zS<_V!GtT-<;0lfHC)AOBv?NPZnsE4Tcjh-OgCN^_Z>&%q6KZ zBYdgIe%EN)tss#m&K|FHgS8&8gtK<%`WZ-1lHQKXo(-T=w?I` zy$gB`m~2ZpAsq`CScnspG@RF3ZSg{aVfk@`$70yjkH!}H8 z8OoRxQ&_}e>>73kjMCPcnBXtl;{%yxX{m9{WPfzm!#a9|Ni|}MHiC&sqfynKXz&{tLpkg{!YCOg zgZDAR*tCB)LTX90k#X=@ml1EjPln6Q)tBmA59cUpE}K9%)=!X0&j6#f|Pm;>za}v+5Ad%aAK8R;bZolg0$rtl5t-)&Rf`UWeb{Jq#dIR#i7OkuZ zL&A262KyGXgA;SET;(B*k}f%C`@tljCuBJoY;;IgX2MLypPVhk6WKWz;czNaY;{?p z^ZkMu+_x}PU13~Rvn{s_KVffK2#!VkWkIcxCW$m~t;NX&tOjT7&U8E-VJ{$p#Ce_bwg(2KWE!Y^|Fgy24(r}E-{&ktHJOLAWe zig4j7)r*f>m;JHISoibtrC4@Yy!NzkxuhpOSjNG}$&}o67+MY?ONBc*?t4U|iOt<4 zfqbMUUqt;ujCtahEXb8z?J?U?omdRR?LG zPbl4=v2cjgig?&6OGi?zC#<9|R^+o(_)#d~rOO(=j^NvkSns0=?P`&sXd!-bog zO~R?|+fGskn>>Z@Cgh=$)3g1wkx~~zaS5*MRlX-)PdzL~{^zpHOfufkTu<@%t&~b} zHakRpFL|Vz91-;3fQ_ne95qD(P~9&SuV>fcfoAg&kXpw>ezyg@z4i zHmpClK%pqbBim-MP(#I$2Hw_`sE$_jn0GC7ep-PiwEHsKjM7u>D=m$hqg5VXo&xN( zSmD7WAOJ-~*zAo>W`;Ph-MdrZdO&VmE}fDt+r*m@2vqYe2!u0L^&w-zB^Y?FUaud& zvrzoDpzq|3Ro3B?3O6^#PO*amjg??Iv30G0Z8+O?N&aZKqS-CXVtVVf3B!+sU1-j@ z!G-mqdeC|#H5NS5n%fTbZzOiPtbXIADWB{qg+TdeC!z13@&0j%BRpSiM+6)8C!Bm> zFa1y5_aE24xcbqoAuJ1N(dl@LMmV#Ba7cpQ2Z1AO@!nU|oAk=OF{c+4v`l!*at59# zb10^?&elw+APJZIk?Sw8m{l%DLL{@A+Y7cE=)^_XQKuY1Kyof$3#=)bm8e+T0y|fl zC{{IjE_R*q2Duby=ESm)H$%?4JozFW=>AdF%Wvk?rvmt}`QKGN%Rg29;{T|63SF>8 zF^M9f;JGgn(={inqzLLVn*;0<7K^@XmD(}n$mleiBwRl3Q3RRYL~XuhG){R!%&VmH zt~}vpu~+_V&0uejjmawpr0C`_iWkA;_d$|gnLe**nGJ3+o|5PwA!6ii<;pmXWg~Yr zZSt6XqUvU-9ovP?2uMR0==j@mOZlinaz{9{2g+a80|8)nVx)Yq%LJ z%{C04*rJTU+3Yf|^vr!XM&`c9y3;n1Bbrn0&ck>D^G|NW%NrxjNAm#fstSTDy2CvF=q z-znIr=)P%1*PN9kmuGr;K`DptGl)Kqk_$HFS$Dz|f!2rb#e1d1j1k}=#Ku~NQ;_-)1G>Z!i=<-$-b`emw zNuzhlS0+>MrgwptAs`g_vZc*gH255CFrExiuRAexmVyTGfDBo&+Px!@KQdJl-cO%= zgqS0mcG>0v3Ovz)|6Ut#xX|%2V{+_=W2~psb8^HO1w*5 z-oXCH8=iU+eD(HuKNrdcg{c_47qOm5HWZZ_)~#I#5`Bu8y^h+=QhXr}y~Z8Mi$|~e zkT*q`Zc#)^Wrwmc4v%x^x3Pmi`z|eP5t+aM)z1n2Ully-FM(EFiUcTl@A4Udey0cy zByEOL-zmDyR#d6=cEyd&0++U?SWubuJ)74dOj=>pj^Fk4;;)9v2>bdW%o@Fb{K?@ST zg!EfsG`F`V8Um^5rnh|JQ2q90D=Jl_8IHL`nJcZJlKZZEbVO7l?`Y3dYp0CX{f=*;9?{f=LV;{*3R!g|ybe>`x*0OfK~9dWPzJk$(~R0H zhm+8^E(}oc0xML|#7-bX+r7rK4KuNG00r+vVNFj#M7UTH1k#XPdU;Y&3QwmoPP}w? z-i&|bYhYET?^M%Kv{GbT=A~k@rn4VppRDDkewgq{Hag$TxFk0_KA^S}Cpk|@x5m+^ z!HRAqQi#y8T4<*jea^XxF+oc3-OY{&B!Pwm>zut@bf+b_Olc0w*YGTtgZKV=0n4-> z9yF<{6YTW0%JQv>&?D)B`mnFjuv=6&Miim8D&)$k(_TV^c9vw4E48E+j%UgBOJXw! z1Af3XRfBc9k$k5^h-ID@lqsj7TsxAb!R?AHA}A`dEh$K5ctL7HkIOXH6^`{C1U?>3wNQ*V;AV-LN<)DkZyh#B z2vk*OSpqBeCcs01b&Y56&vtL*C$c@H|Yqv(VAuGc! zy>*N&_m+M%FXRX05zl6zwHR3LB}?qRf4wJq?|4Cp7)}KS6R*V$nSY36D1lf)>M&-h z+WBEfR054B3~pGHvC5V_9{g6@HApH(Em8rh7Y#n6fXf*ATYrqoP$-lt68Q6F^p7eP zngg`Pn9zPh-rSZFL4!n$Mh_biJmsl#zTyVLW{^#vOAw*SSif?_cF|^OLxNJ}U1hsL zB{oQ7tEB!7-3=OPvu~`CzA@n#dY!V3vU&_V?TCXPT{L|d>fFA~7okosk#!li>2mY0 zB#y(q@_O5w+GH`5^R=gOF78F^ssIY&8hopSolfZkPv z!@|U%haYwcT=vU@x)R&ZtkwIw%8c?s)tFLoy*q^E_cd^Lk+&1@cL%E6H-NeuhmJ)n zLQ+j-cwZApi6ZWwiA*{ZFs+2@&C_D{(AUG6vv?!+#Lglh5AsT-BYF?!cin4vwFwNC zF2(57c%r5E4SQi5F*c!FV(CXLW3=nNJXq{S?HSTrR~Lb2!@HOHZX6HNx0(4@8ABvo zw4LqsnsNkL(_a5oTI^zQ56hC*<;9~O%7@W3AdjqcSj>>^Q`%8r&TU`jhFj}Sx*nMc z+Rcw#jB~wE%ak|ks`fTAD%j^fUm(`)K*$3u*WN7)Ez{zh2Z7s^bpCaPhK+v9(gjC{ z+8c10_eVQe`)X$M`i=-^E`U?Sd!O%YrT;vt$fP}bofGppe5EQUI`lI4R86zGI_0(L zLdXn%f{oS_#Jj$7ONTttBBpX&#K@gYp>wJLT!-^R68*v0hR%g1M<49`DxO+c;>k3y z4hSYI&kC!;P@@Hbp8ayGYESzg(=J{Xqs%)$I2KK+mU~RkjGf0qIbgg*K;1S5O zMpKLrw(w>$PpNC=h(ygHw>QBtV>@X+vl0d|kuE{r?reK{a%~%?8ho)-%B4fC+C0Gx z(eXpJq#7@-OWQhI%Mh!_ln`A^n`tq2eC{J{ljHaK;G)$sz-9>ZNr-08tyCG~sV&cP z4(Z?`vX)WFZW0MRD(Bjm0&_4zN$F{eFzErlP}2(Cz&-+kAuOyad|IYk(^wR&_%n!4 zW7&ImdCPq^2Y6(LR2vcZ=}$dHMKm`iB1{%$&sud_(JJuv?ZL>|`zjr_aBqY9SS&tu zib@KdWDIeTLv6-!5)x2vnlt(R<84rnaYQnEw)Q&bdt2Ps zRS`&x_uCY)ThoKbx*5Z;85CdlQ5x7$7w1HBm_R9lKM#h(+KTrBn4KJtjgtxWnAv>A zh*+q_l|r!{eNsGFYb|%mNKafHR$U$C;^ZJ^lMmtW?OkFW6x~CGIK{U7g<7oTQpU2r zmNfiB4;3aicKX+kg`buSyU0fFJbgZS-4bjs)={`(Vz8vThb%|Rjt`X12>2Yed}H*j z)U=YTUiqei@yj3MXCn&4x=jEz zqW?QJIR0rTeo=$sgj>W1l7eW0T~bkjxrR@=sKENHj}wXQGs}`dvH6;}DGrL=Oc;7? zblThV$=b@!;`VEPSqm>})wWESe`OTUoO~O7s!W*At9qo(R|<6Vxx7tysY#mHeZHq> zDlWSV+WK%RL|p9ToKi*W@XEZ8s;FGMbe4Mg>%Rg9h+$R6Dt17?fcy9g_ao0L+BvkB z4Znp_a>zl}mCZ7a4Y`CEW{jcUm`2Wqt+lU=eAE3$BxU)>3iWuELY)YzOABm%8*KW4 zB9PNWLgE=uGMrNsBROBQ}ul=c~bsi_qx&q+i)HzMSuW=ANcK zD3`;bl|kf(wjG73DxPumxgQ!DRqX^tjUdsR9z`zKbtI>9B0J{Ga1tN=OsqkC-x&Y` zS>Y;my#)xNIoguQl%@7GQ;Fq2WrL-8yQHr3;zr}6BMSGf;@i-{n@ok+$C~ z2uqzMck|9WC0A)QjG&$%n3D&sS+S8c+cVDlh-yv@rEVcMuKA&AZ>x;hFLeeBAO;Rr zmc_n-5+3~k8SUdh@|JXvmj?T*_4R7Ovjg8tp0)@Vu3k6n&+^kz{An~?v7n! zDhB>Ztfv4_ysPXQX~fxv=7xD;(xO>CJa7g5re&Y+=H0Z^rRxKsBqJwQzOI#SlXYi* z*Di7ksQ>+E%5>0xh=2jW7<51j&40-&asCx{{7RX6XV$#=J!b$xOK5^AKL{;#12U$! zr<~-1KQpG*KQpF_HiSvEQaC%H^!@#J_xMlb9&YrodmkDHG|)+W_lZJO1Ff$tFmegj zraSQ*0()lPVU*-Bd1Qm|k`nNX$N{oSynw7yuw;)JgE3xH8KI}90h1A1>AFOp+h?-n z%XAtG*$Y)fYFO88ccuCs_aHdo0ox5djRe#tgKhaKNo*8ZSXv_A%5h731DZiPC$inB z5Dx)$4bWb428V|niLXjA=T~J0xVA78FUOEcoa4BwB=_NR)BA3=11^^7A`Bl8%+(8TZ2*MjdvVCiQ_r2*OIn zG2z1Ti#O$oBIoX);l4;Yc99`9!EZg>eCC~ncpXZo8$HjCUYnn$NIAPjagkz#6G(Cp zYw*@k5bTIyys=x}Zu}K_$RQ~I2>EPTavMeC3pou#VI%$tGeh}Lrzr-P&nIlj26;RmsT>Lt!iby+U)t+!%x0fqp>J zn`<&QiuNsezky=lyw<-YBDu?vF&KGFs488R$`I|YGX#jGie3M-yc9TdzBJewhI}WOU&--&koQI8xOGKYxK*M>ux-Cv*ETznROFy*J~T5 z*+65Reu0?UD7CQ7K%f>#@<2@0l z((M3ZAQUVnHGWP2QmyS04{tQe6;Y`KxE%A4B<*^_iM0-kzpI73#ym-X@*>$7O&*rl zTu6Y4Mn(*qOcYsRAh~{C2gv1KNXsrp9rKB>n!&AG2_zMrW!C(zD18PxRn^eUTT3}* z_Y#N`etR<@k|Mvz0I<{;y#)kH0WOixL<`4HQAI3@oHGjoAelXYWQ*`yCpSU7 z3ILLo;0>NvBCYK|pbp+a0yp>Kco0gz?_T3AhnDO72g&+C#DQ~7VT)jkJ1lAM!j9z5 z;KKctg%VM4h(Dl1ahY+6PIvGL`X9g=77L3IAEYpm`CdDoTq(mNuRI!qK^{vX(+&B-OQpSZ?=?0-J(I-c?s8j{r z#u9xHr@#|TIint!FP~69^^~uN^o~vpJW;{fP?e0jthghLA}`{ewK(n`guP|i#M##? z@7*dS7}x3ScjTXgKYVYEHsdDOrdKykV}nkr^a6p~?L*_Qw#I6S-p z{OW_ePhTahftA1MK+>#t6+-_A`zJ%zq$o$11#s{*0108N|5E#7{7JIBe=GjUji^mS z0IR^3{>%tBovbpEs{{Wm{&Cy@ihu7k&6Gt;(MXRSpYOf59POMQGUc$* z${qBpP*DmtW)a(D=_c#hyIJ%LTvI^{YV2iHA!iujcR_kQu~r|i`mT|BX)FZV=|zhX zYf{qviwhU6mat--VMF7;2f80;Vb0RFeB@A^Q~_@zSoUMS&RXB=+#;wu*wsBzhAqfT z2ucP`zE06y(v2a@wjyl3^+oA}58dR}M?A;cX_brdWUpv3O?s1^H}G3%mkRp$wg2As zkb$?qy)lJLqbUuXW7VK;SEF;4mHUbA{Q;_Mj#dZsN3{WuN#IGO3TpiA zU~5MhX9E+nyycuMxVLspRXdF_4u+^_dPBfDrzo@vZ}=>OKr%Udcij;6+VH+glzZSS zs=Lk0z8ePG*z;6Lkn5xG#FO|@X0ZE~NO9#{rvk zz&)Pgy;1&37rE=o&y<|#Yp!~09CX@O<;ta^xeC2s%Ie3M6uqCg9NjpoYVlkV=qh9S z1|AuyH|jPfSn6;Ti1NAM)@ckH@0P7TlE(XGoX|&O)+=I(27Q_feBeg`QJD+Fd_q_b zx0A!})EWCe;WIs>eY)sF(Vd_4{KvSZHLJZ<3Lw&W#6Me{f0eYI%#5v#8GgP0s%9T+ zO4;IYAhlua`x4$}s8^BoGZRB5(Dq6^`TJ_0rD!rscQ2R=aVMl@`%5_Z@gu-s& zJI)zj(N=F7KUDaCM5en2;{$Gk{-{nHpMZfPJq?u430&9w!Uqq1XqM2q80!9|)4C|n z2Z`A}O9M5eOV3l>I3ikj!3YG;P!&-x9iRUoTAW${P70jiP;l@W4AznZ~FX9jK& zM0@FZ+lfo?%xtoRD(BQ~(xF+gxsqwu7D!rVOS4q9S_S`#1qe4dt1v!)pn6J&ibISC zme7-Hv{|DlfttRp!`EDLS2K6=`<#OYtrBQeVm#Heq^@hRMCc6m&~y zzU>@%Ab2D7Nwh+Ju}988N}J6*8cGEDU?utuLG#KPdb-ZHN8Mg6mFr8l_;rwUZ?vI8 zaZMCbVL=uZ#8Z$TSwQYrmrEQBnYn;hI-igI<9*09g`6ybudO>*U@B)sSYUbk>-HT3 zzeqdp+u_ah__Lcv&)f5gIRDFiApz6_+vMBq-nftZ#qlf#!Q11g5~7#(x()JbHuP7s zkhCoC3_qJb#H} zhZ+w$X`CI_V;TXl<-n&0wJ`5a;id-K8!*xApD+oY=4>`+?5!=M1!aPw)9buh$Rm?H z;l9C$laj$4xITv>Aub3@;x*e$%i9e;!SLpN|5W&c6mQWP#9XLG0`+`&f`YJNv2yel zbY^|z+YJPl%hzb6B=^8Gq}`+oc6`dTdvzDPt}uwTF6vuocSbaQP4etHFw(tg?7hp0i0L_ANzZ=VBn<6_6UYsCw6t;xaMJ3mYMIu9=82v778ouxVK`Fs7N6 zu5K)fk1#7Rv{S|@E|KA>dR1q0vPuFx1paAY+r_gp-k6DO(P&u5qw zWe&FOS{BzlGbf0Th!v}3&JUgIh6u<|&)CZCvS1s%#Zzq#jSU-CF`?zSTy1tUXDvu~ zlh`{+b}}+&;?f9{Jrpc$Qiqoht@fA*0jAR;$8ys?Ji{9y=G=jbuh93kVNSJ6cdOn- z9A+L_E?xPC=5i=Np10V+T$in{vqRLO$Rf*r$)MUzxLpROBKJsh z>MT|sJzd+)VD|tbI#H2NUIe$N6?MiKv@)vm<-M|CCVo7l1P8j{T1R$tU7h>RyFH$R zFmWab(@evJUQUMk_oCv-RvQI`5yp(Js+uJD`@8WglpO8UY};aSq7$G8Z4_jWX(ala7lLUj17?N zu(SI6$A(Rz95Qf~dVR$FG4|u(fd$EXf;s<@RLCk!iF``YGFiWRhRd|P`|KfM-~HU0 z4X|b1KC18+!Y0Ev)wXD*@x=RJGmO)lm~M>A!!vjIm4hT(^)BUuJcDlZbwmE0c@6@W zq~klS{-y^ZBW#lM##_wh+p6P5jdJnuh@q*QZ-(ug9C&;F99H_R_JjL081+1p5RS1J$i+6ftV!mU*=LXH4isRgZf<5+CR=fmG@mhHR;qWryb$?Y+zXXGjz45B0 z>G+D9^Tjzd`zT9Cq6Rki{vJF(i*#DEgdWa|dKmR;(kfvVwu_~u*Uti|7Q#wOA5)|D zeVnJXDnAzK-gvyRNJ3U-G?vK0BVmR=or}f$0#3E8BGTg8SCyQJTc~`(FqwWVL7GnF z*}fT?MH-?eVZzJ?3;0hBeA;HYBF+t7qivh35|&Pk)J`$}DYXgdHFHT@*7}?zICb>} z4!1lSBCubQLD`+l+|54j#elNAl(`Sh4SfO%K~wT5#Jh9#q2RQ=Qq>D?*2)ya%e^7N zEjgTgTti@`UtFd=V_#yKAK1Hcu6xkwFf8sCuknBIs0|47KfR96x-b)GaeS9@!9_+h zX6G%~iKiDbrupF#KH+efBHQA{(pi>uJjb5;2i~V5*iy^A(}R2C?gVvm@|D7| zG4AT-pu~P2P*U52WD-^{!{g^EyXV8JI}cnw3gUgBdqS{h2gBhA@xT$+T3rRllMX{i ztgiLK^@7+rH65Y%XJ)#BH~CI93id(uw~^CS-&=>9vKE-PueEV5`DMT~zX!jRqJGZ| zPY(G!Of=&pxN~ytOSCJa2xD$W^P@HP;M7r7Tx}5b?u%@T*%256M<8_uB}lGcx@mq$+jzaUUbAOyYsmFk z6S$j03I(-x&F|1~XapQ_)@7EyE1c24V0*-=zf#%UZNGzr=QV;WJxPrua`2_j{t&Ed zN&>y#l9Ud=_l5KlqSI>qM%MlqersX*izi0)0}mF2g55>yY^0Yc%aoIRwm-CIZj8{d zqN;3Zn$yCF%<^1E(_j|*jLi{W_+zI#ZWnzfMD~xP+)o_)nSDAaA(Jg@sdSs9HV63a z6_c3v`esX+nh$PyP~CCo*s*LVOVvPPn0)$yx}IIOi__C@e^iajVuq7rfHOIOIwaaZ zRHK=`gRzm4lY_a9=`RgwG|_3(#Rvzn{*>1$y#4gyOF?BeDHQn$^gO6hY4#+_w=dHc zExe+*(J@ ztz1U2#B{)~S9IXet7r)$nPTSZMYGZ|crS&Ix|jjkgWICTF`5cDha5^T8^`8lB_O^1 zO8(p;`~)Lb$wG}&Y{V|P+J)yMxi7gd#o4P~JFk3G6JH`_?{ho52wyzv?e0Ojfx$pF zI>-Nko}XRqkN3}Q%`DXkI3onOf6~A1|7UCaKRS*7>UICC-}qO~;a97BU{q&V7bBYJ z9q6mDn`i7CAa||ALENCS2Ws%R2(gqHeTVkqS&eJlv9n`0zQdmS#8Zf4g7B4grU@iy z+YrO7;@oqQu3fN_^KcmrnvN%0f11861yoj6@^gQ*Iy!7+kOJuoX17nzLS-a$VKm?F zg2ZQc#=(a#I1?pB7S@-#kTndlwP&>3-f98wc@;HYk0ZSacx-ap$39$+2TE{7>+$7$ z;2ID;x%4v-JaWCwu=GX%E3h6JzPbgpB>ks#EDL6NPYbwxB*179#a~-s2l(!tj2)DW zot%CO0mE;fndlA7ZH^PTWxEBCg4^MPgZ)z?L6N4xU_=b#qCrS$$uL4UMI%MHfC^i0Xd$1GiWwqAZvX_s*l>klTSL4)g53pF7m(ZyTK_T^sdTt#i? z9G(Y_z3ORlg|H}@JkQsX&W_6w@xMFNuF%e*D?UM~Bw?Uf)!e&nPWG~`mDI<8ES|4{ zJ_y9cR{AuWh^L7{?ZKf?p;c=}F#BsIOv`Du zQqzP6-Rw~e%S(E%%jrwv+VvEYe!;!KTQ!Kb2V8^6K&ee=-3kaa$J7m1ZA=SbTmN&z zzdWYeDge&t0kT}kfT0Q_TSIvVTRTSvLt6*qpDqbt2=f1CDFBk_6{BDoz=#&K2Kj^- z;~G(DSt&^8l*?$QUlAG68nit(BppXudeW{tGpE6Ro8&q6a@v?k`X!dOmBdU*nOc+- z>MLZJ8eps~Y_*PtUo4|i8d_BqM$ri(;N8NDc9-Q44Ln0rk>C|Tg5$sJsAsIKt|BTe7N>__i0hJod}?1IVE$&-v5z#ne}L@@H!L96)ot3aV5v%#Q; zu%$u#eOunuu==t)4GmDidX}xoCN>z>t+#SE$Q59Q%S?mG%=FqlK)=qH=r*go2-Vha z`c;Y84Y4tPJUG@MXuav2Czd<^zMN5E)Mv9?>k1ya!}SqFNyXn67U4(!Su!C{V#w`5 z`H<)JeT{au6-rDYL7$IEosn!V7W}u&ljYP`c3s`*#Dh0f!%U=19xDlH3di^95us`V zb31J>s0$o`&HPU}bk`t6oCV0C5WqD?`d2yBx3l{{F$8ScKaMozPMal0xK_k@e#|xw zho)>y5Dn3y-WRTro{#({FM|6@cYUe)esrVjYaG4pU4<}tDr^o}SuQUULlRmd3`U3%DKSNGvW zj9#bh;+z@T*CsUnnoIfynAmbp@Zf5u0f`lqnK?_I(D3SuuMF|5j1&e65~zOFhW=}x z!i7J6I0;ziPCF6EzLJ<|H@8YOzoY~e;c{<^)sgx_N|SICLeFk3z~IBx4$TGxG4?%6 z&Sj&)1@~y&KvJ_XDQ9{p={O$AuE;)rQs7>LF0WsWRBjMPd^9~w$DV-_Fp>zfGg4O> z6b~P<_1z^D`=x#xx5)hANv%^nv3}3*P`kz#IVp?-X08k+U@B%!5 z;$KvIa1wrHo5=$h(E8Gn?e0rr~j zhrZ>xK1OHvPUVqf-o207+8v~&M-lY;HH1hXMkDrbDg#4a5R8#H$7}R3a*d%)57wKS z+G#RMvGQ(Ood^kQXf_6)^n)t5C<>E42C9eTv8XZ;$a?k^9C^^=bc>VK_Qyq|T}`^@ zPMn7XthLN;zkNXq);(Ds_QChJQF-n#ouFF#g5tv}vBC;1fwZ#A7WSb=@PXM^fRrTA z#;kRipgt1`?gw*_01J9SJGQ4y1Q&lqE7ZhbIemBPRh{Hgq>I$otPedXz2Wjb>NrIB zgS2B3iNwGx;6m_jdljQAPmSq(7ki zYVQ9X<@fI9zfnN3|A_Ld*ZFsp-`hg}MnS^+Bg(HP(ce*i@1Xk|g@*8tD8KsYenIVJHqdg?7twq|9unw9M}Gi z_QZLA%QtR3}K z+-!{;bm&~IED3YKKq#_+Kmf=8-{XJr3XCQU$@MTG1)oZ^3hz*xuT*~(mF29&1ui-R zBFkV?LtVa*$@cOf`}salN^w?dCzhgq|Ft7oBIDi4cnQ% zDBH@ocLB>Ka#QrnQ0Z$LaiiqdNowb2mv#BVLMuz7s7Yz?eTDmJTmS3%95;hRx*nSK zpxqKmWKDu?T0`J=4k_+)f~|Ia;C3-7?swYF1E*K$z(x)K@QLTsX^P8%vCvuyV8m7a zoCCe_Aeb!iUMp97Ti?^A77kZ7SLku#C<5S=5BjUe#|M!nL{NqwXHFWN#v+I(=sXhf zjy5F%tdG(sqP6Q9o|TNhG=;})oWTMCy}p71Dg1|Jqw2OBfdDLf9bmYy0L#|1H?nk~ zr~Bje|1A1{u^IpE(o17yr9l}17VZB^G}^|!O!a=+l3w&c*6Yc7(9GfILew11lMOp98#^mAh)Byya+Si}#BZol`I#m6Pr z%n?+(*$dZvVWnF+fpny=zF1D;YJL8^P1hbninpaYY#iZKtyLiFt*a~ATed7{P z+zy~Qe0Qz)t!(VF6=oyuHbeQu)X@bZ zskA7C5-=jzfubXd3&Qx3=Bbj9SX73%qIvY=MS&{Ly|%l+vGwOEExoTy9^iB!*w=kL zx?~Hj!+g&)tbv(q0x=B*>ngFRjLAz=u7#Ipgi?bvkQPJdPWtX)tZGZBW5S8SD zC?>-Wj(t*P4B3Ta^bD37?F@rV-Ypg9L?A{i>$pJ^5K-cl!n*`3jmRbF?t_6?s9@4} z_GE$~9vv?#4&b*8TUoYNa_ze+NE9@an0GKAa2s9Y$11~XwR?3~91{`WHFn@4jo#VI zDP<@Qfs-yWsiJGGk~C)0QtAb(ZyCup-qlKZ3pR5ooKF%Fa_73drgi}@2WbsEkOe`! z4&-V{O!0dA1^Ur+d+|mQ5$`okfNZ9`xC~Tt*U$W{#WmQd{qa)wy}6Zvn|1;hj%ES} zVXXccGz~ct$2M)mv;tgoRdj!{Q7(bZRjWKHICbXiW50{Rt67o9wo>`C0zygoZBVSN zvqA*6`EteAAWzVo>aN1?_J%@*3ux4OW)#F{lZq}RqSOg zmu#z&ZAfeuU-oj)nCC^x9N?>h`u3rBD#e%s5keViEw7iK@Z0>#hl)6pNrPL8v{VS4^fiHC%v3P5NC4A@a|XDbmn~cLtE#;xg!~V#(hPzPg*vS;5O;hd5*1oB5+$6IAbW1S8=L2$?Ps5c zmv0?sPsfLz(|UxoIr-7N!=DlL?;hgc(G?hwF9VMK-~DKfA2A2yi=wwd zF9B2S2`41u0fUQQh3c&yfV=VeKhwU8csN^kVPGjD`lf+p1xK9C9pVvk{&Kcj*F}zH zS&Kr0K#bu#>@@XFs`7SbWF{EgG@r__1RaVEs9N~uuF@w+P18NlX~sZ+3I@ES?8GI_ zKv$TYah8{&nX>LMFMnF4HSu0Utuu~7s_DL@t2)%!mIjP@!2cmLID_l8qHLskXj5aK z0FS~W7yNa4FPznvRMCv8(qAVsI*L6t`t!REcvSkmye>^_inUUrnsX@@PUopn7>5v6 zSj^ES2RRE9xuH_d#q9j5j&Qihx{}w@lT!&YyOd4N@L1vy4HfveW_BkjT#;(Ik^hUAXR9-e$Lcaz$8?cGpt=sWg<^vbV5zE`4Em0?KO z&e33nvAa02XDZe1!YJud^EML30lgtB!C+&E2oev#aF1-5 z8mDeH5Vqd`>>RK<39~g5-)6QwX7ShKL8^%H)U|2G8p*#e~}Hy1x>gBKOF1`G|mkcHx{Jog=<;pEm{l0ZIE zvk!y#)LBK!?>%a+v24IRK(&Jbm5BeM+DyMxJ7FE5+DHTIWFQAX|m5qg^x<4w$U*B!DyrIWwuDE-U^m$Uy4q)&dXP5R{38p z8T~A>8f2e=02opW21aAwgodaoOoS&?zXJeMi9zw6>_BhtZwQs6tk?+iw8Hf1L zj@KB}m~tp8L=D-`h3AKp5A10@2N!<3(SwohJFyO2?#XQnYsdP1Lm75OJr}uA5xWx; zgCo2HyxkSD@#3VDws&r;@58LR39)T1i?5#g2E&VaQPPVMm#nN?_G6kY)N+w^9*akb zS$+v&ur9D{ceTmnr#d`%CbQ`+fA79l3Y<+9mhbQgwvC5=?S+%-B};1{N1w*>44rOk zgVsG@PGsst$W3IN5sOc5UJ0jte(UBHwsIO$0k*L9-`za(zuo-O|G0T_J+LJ)$zq}4 z`7e?)wI^#N2GLRB)QgM{jG%rZ;J= zKIZS_kil5@jmMW|_GCwcv5ps7^EH;h(KloEMo4qgd!zsZq;V zJ$k}!N%>qT%st|^>fe{3D$|Cg9Dc;(zQU>QLO_}&wRzcM&z*Z#QepT72}5AnD$t1U5|eR zf~^tyDLM^{uh(r-{kSDd8jxlO{+ThDDRUca;~kLQ(NjJ9i|JDX{Xgq_gVvI(I!Llrg(3CaHG+HnW3gKj>7CvW7Z&x2s&q!cBj< zRSe9Gm#$IO^Y){&qluPwmU$G6X3T;&*^0LWpZDkSue=waMBN~PVFNdUpSc`yBN6jN zJY_7Dx+J7DLcw^dM>koCO)JRagSK0Kj#4V>E@BfIHhi398DrE?_N0*{Wq~3FM~UX= z-(j_xl=6ht4qe!;!q?ZF2dWzRqRm6CuuNrl$5}!l+ydgt?*`mGj~bAP_$rAiwg?$B zjLST1M%C~8;~*{M>NMR0audUB@RbaU-WH22s$27g}SdWSUaikF?N=*SgIHcP7$g=86> z3N5K8%wzzaxU`#M$lPc9iD7pBW2eHl6Wl(8FqRFeN5JM2KbF*byWx_(Z1sJKtdLeA zs1SAqv7-r+|2eybf{ViHu~awHoLN)Qq2i6O#&wx0`(-?)P=o&k^A8-;i;6`|!A6C) z$uqst?jUzc4xI&(Vnwlb!h~J)2AJ651h_-y)w3PJXy2>I#+iF1 zvB)m3;SC_EbdEa5s?D@NjyDQ09;dKML8VRjypFr@A?xucfOFXw*(=gk@n17+p`*66 z4kz3_Hna?pib>6wi^D3OlX`E#=#dlf_mUG4%w{-52xKcsw|;Mqgi^GekS;FB*+UP{ z6G~9&$sPS%Oj_7ttfUd$d5Mvi=N42ZL^3gjWR}9*j`2g`LP`sH2)5Wy@8nXm&YYB6 zygct>Q$aq7!XkQMPqaJ}TQOV;W7Q-++EGhjCCXdko%`1__ziZ8M6(~c@xy77*&@Y- zSu!-298c8tq7=tp`_D5;w?jlO46IQWq`^;xFZR>bCcliWQPv;yjyew=M4u+yCEh+I zcXrs&t@Xl=?e^`n%Wdf(ykk=&8k!+(KhGTli6gN-+7};}KkDOU+HFMN>VjM8VZx`N z(6u#&0WHcDhMx{4vGZAnSz~2=6wSetKg^!g>pMi9sA&rZMUfN!+=nJy>!In;mj$C= zcUUT{r7Rn#9){{4_+aa9ZXOvzFs7HB)H~X^hP7@C0Y%lxPxd7V7dV>Nqcnq;kTqSJ z!?Ec^`(q*Y;6w0h&3VKr(rb4Yfrl%?^Fj}Sd+GY@!jrTC5-!^A zV%Ix8)U=^6(6$s-HOQ-Z&48ks?-nI!EDk~ke%5p!absJYSRgkP{7eFH)-eFdf6&-_ zzc^5f^oY(fCB&a-`w;!yXWm~RHf%x0A=uk#8v`0s*f*FVS2TN&8&!|G(yLcG@3c0) zyaN6CrRG8{_r^RIfuB}B{?jyP{U6EDLb)7S zKX~t~mQq*>?HRN39h=819WTuNH|KCOFqk^cjr4h06tJ+L>Y}siETzFQ+Jj-Hu3ts7 zu!jccAykbRpnKdDkaZ0TqX&tw-%oHFQ48Lpm(-!gl#*cI%|)^w<*7{(*`voqJsOLM z&iVERCu;L+DTY&}c+!{ycyC~9X3j6(%kKAj#S&4cH>zR>;Jr=w0rne7^fMjM)*wP! z`6Ew|Ln3yBi+-u_Kk~?Igu{%>=JQZf+Z8R>!jgAmhcaw1?@aFO!#A*#l{sKc_8gs` zx;2`2D|^VONi^C!T@j86LpczE6lJ_YEQ9Lr|A{oWfC0&G#Ba5x zT6=@E^BaL0s5%r*CV zB|grBPuDh=@Pmy#2p(T%0ZnRoCz&yHoL}En@Vw0Tkzl&*v~@}P*j_gbUQ@@k$;d#% ztwL{GFf%4)C-%(#3`aaIjjL*?rY2{tVr0DH_u19Qt+(GcyBW;hfQZ9cPgpU~?svwp zF*;3mau@uW$7NrE@tt8U*2Q9$I3$cf#@bWMKFW@>W=FZ=V;XBap^Fk;(}y4ntXItY zC;q_*8|j~nbo!kkxxq=^7>D6=61YePMKjt-8Xze-xg94j99cvEGkAQ z{H%ekx@#)3PBc9xhYZbxVQxYP{lCM+(=^DQ!Vn)meBuc9pv9hnsrcyi2Wd{va6W_Y z%<4;K3$-V@I_#?5x+!`82>-i!P+bpX*Dk;?qW`;Lu>WZoIyV0%3L@>0|b+Psr8UkrYi~EpqKqx*8^Fp5XJAK+xtHp*0u=4G}9aEm6TSpgByK6Nz*i z!N#I&{1)o3Rn=9E9$q?@cmxyD&fmjIiDQWO$u0ttJ}kC6ZANDd+ej9y{6N7Bzw*ScV>!gn%qGf8(J3!3x% zrFFkCau~Gi5xwBG_XIl23BQzB`a{9i(>sB9l$?1bq9{ZOua zHNxL6W{YsuMR3pT#o_TRrcq5t)RQudS}HNc^EpvDu%>r*A}SVdXR)qu#IEnY2H!Q% z%+ofy^pgr^u=qx;39Onbr6y4FlNk@@<`k++fyT66CYgE)wl~UJ3cBHkqKc=p!T#JU}OGD_d1lDv2h6H-t`t3}QNAfXXH{!BO+ApLCObN?wpfi~U3Dog-8 z7{&Hx(aQg}Qovn>2M*mOF|1EvQ;+H*`+cK|CpJ~Lbf5jVBu5yy9lZ9n2aEGMjCyD} zJfEF2t)Y7aPRD4AHX8&*>)cJ(qi9WjZfrpx`~E~{TIzvIe-pNr(-0+?~%`ER@I^V3;3W#YwJ1}u2@9PaaMBTcOZ5@D4vV>?Q_Ezw2xe54 zO%n6~7{^a<_R3-O4`JU6TWFsym=DaGnccznj~qekqFP>2772dKp_yzamuLaCdvw4k z%YUI99Dh=dU#8)eS^J7~j~6T83$ToH^naC;7!gEdw?_Y@&~-W>b`-gyo+Ru81Gjhh zk0`u6i9t1m+B>CTG3&R{!qnaKZgrtk69i_b2%JJ&<`J-JQ&?>Ze)zEynK_`_@}H!O z9DTD<7?-3pLdvNJ6B5*~G-Rn=V;OZnK{t4AhovaD&Ib&4&eO*;Ia7f#_Xt`!ydPY*{U$8MT&XZuCWkbwCh`lb3cQcaqT2D6%1tx4%jr2->;?k#6l2|>ti9;2qx5^&(pzI zUAWY_-dI!cZVml{o7*${-k4gy0qhwlkkTaZI1wBjj0RfyHbgc9mc|an&e=ArnNLRB zpzgyHQfwm!L+EG!aWpnDUl1W+&6^<(``c(jH_=h}?hr24=qz{*79@otvqVgYh^2t( zOF=rrwLC=T)wK5R`k74iLl@+XcrVP&2IP@f`@HWQnp>UO4PmKNmf;&8Eiqoo(-%s= zG&bzuh-8K5P}fv~N^?hNfNK^7K-$RZ=y!R4kYQ$ycG%YlN`_pDQe(s*<2umt*2Hf2 z!$);mZ31CmW@u1mYf8y7m6bbrmX4Qi^7SbU)@vDs^4l;s1x6=akcXq{@qCIaYa{7gd zF#7BOoiO`l&E5+_#%Ih zky67ytv|c{BIKfoH#~$DH-Iw-p33hy^!v0dHIFEt?*DIMoPV0wU##9+-ZyXssq#JM zET2koas4R*BeZ7ctaGf6>ho>n2|Tnhdp^!CNYD8>?nD)N*Z;73q$XiVjXbGCx{4d3 zhjZR{qcMHp<<5;+=+INln+HS>cSIb@H-m_QP_S53_&EVcbvDa9ywNC^L}ilT@=U{0 zG#d#gR=UjoF6Igv3&aD-OQhq}dG9>uLjsI7GGbX}qezPa$@KF&K`!<~TK6#PnNEa1 ze7bQhg`}jl$O14Fx(sy6>fzbf)(VQAWe`XHCVaUva)?u8&;CR+l~!iu15r=Ye)RYV-qaXnI#}0y>^6E*mucIGa>H$xSXmsVChbxFy%wZrXI)*; zJEz@5X4}1s}uVvWyRRhcx=c;LhQ{0{be?B5e=w-ctt?>Crbwt+y& z!6owR442{!omC;NAKar3-2#YxAC5br z3|G%OZw0h`S1iM|?aEG5KZrPRt_f^0Y)PjD&28AR{5lpEcoAScgH0rw3dw%bKCHmc z%O7k8vsa2(`Xo3y=@n^x`pc+9#~0i1A1zP~ z5erXo#7zTJDC~#ybO{ScU0n+_Pp#<-)nC-HE?B$c#1!K-QvD*{NmzrF&2~w79ivFo zESyG~C1Beh^W-v=6kX&hhSx<#1_x)drP$>4yV@S4dI#)RMrsArum@wwC@- z61N6_S5#qaei5_DkB8?1X4YOpXkb8g){5+?)0{xKsN#_UR@IA5%A8^pvD{^j-@L(+xFCLIdx& zF%522wUoWakmAJXmQlJbaU35%^mQ^cjxeqmEX6{XQNCiHny7xjv<`V*QKBE)7?*Yl_jSycfoN z2LD}#CuumJoCe77pa7-dcmJj1$MDO@{-fn5-zg9W0jvsh^hYhY`DBfe?9Y}T`yVYo z49$ZNDwHvu`@5W{UHcmcTjk}G8Pv3fs0K_fj9c(=O{VpXFXzwZ&XF-}wL@m4IQZKh z<>#Uo=faNBM{0>_IADQd1C1;N@Oa`ivENlDj7D%iqbbeJc9t!1HLIrWtqMEsx|X-t znp0t~pb9y`2xA0&=a$?q5>WyjsUhc8r09qX_M=F34D{ z;o4@Emy-%})J_a+bRgF?5zyfyGgHPtqyk%zmk^W;x_HRTPwTT=iaQK3J@X*n9o?zu zN&f?aFRS%d z!K9d!J20qj2l%Ebzk0`@KUTW4%=KZ%IpK z&5;DDP<#^4&@s_yJee2mxu(x;E(S0Ao)i)41}FR|Vd;=)QyP&Bxn{atdfJ4}f@_~)iBc7?+=Jt8E z!KSWK+WU2jykERaQuUC6qlg>_JOG%x8al$RTDHV%YL9}WjuS@CD$-v$^||)SYjv9T zRcKC)1zWZCz}+V_=4hZNy{9gwEdBHDJ24!y@X{z3Qd|UFu2nf$syvnfoFs{nwoT*j z?~A}jRc_m&?5N`vRj|UP)L&_d6pYF~SDkhXLJXuJ|2PkWIU$$O+0Vnd+=P>U0Zcvo zvx(i7)zKymP-ndNe+(J_YGON@8d(|9|9bt^zy6^qZG*#()Q)-JLwJ*+UQIf{^d2&S zrcd(8zbtPsMpLn+wrK=ha&f9mA`%=1P4xQB3gLS5CqRa z4e?_-KL25~IF$gLG&uc{;LsBotOYyfnYmo=EZh=^&hpcaBd6f0=~O9Y&QI4V`xd2^ zD#kqv&D^PP z^Y$7v%AnPW@sv+eW=+-Y2OIE%TP4bOQU+~{+RkSXhUX*7cQ+AB_I-Ug3Ztg(EJ5=K zRt*k64J0yaWa&M@H>mS*`2+=7!gHnRr}m3d&@G_(c5>i>;0@KM(2DfM9ykIithe&0 zDG(HbmFYGG&8lYUXuDn?^!l__uP$8UH$c+8(1we|HBm@}1({V5e}a6^0&=^&SZ1fs z%mqBt`Mm8O?m}kB<>d%`Y}_~lQ#m5S0xLRRc5dnUMLKw2kFIAXo?JD0U!PXR`JeBK z2%zp+r(WmwC%oOxf6QSLygrO6BYNs=SR=1xL&ungq-A+!_*(Zfl<(wG@8ur!CUK#0 zK&;`<)1q^CrV|s==UhlB-?s<9i;*^SY{YY0x~m}sOVx4=`Ry`*zC ze2TO?b!Xe|Fo^YTsvBrG1~h%-@3AmEc*y;VCE|HkQ$JuP8#b;yu!@tvpDbG@HA-Y3 z#6<05?6rPg*$^MYB2WHcoYF7)1J4bNC~OM<1K%B+3$w_20D&#n%qk*<-HMktQKLMu zc%G#10XyjC`L0#>swZwWA6Yw94IN5hWw|i?t*E#G5@&XsC{z!5qc~Kv&j7JtfJx*S zxzKt(ZmguM!6lj?B#I1zIXxL>>a~`Ghl0Hj}%Vd(Hg&XvVoWoX8#Z zZ7vh0bujSJ{kYNG`95P-C_0m#QGmHvV0>@G;%-T3PkeF3gBkF_9AX?C>2bS6F&$iC zQCOh2Ohqsr2Z{4N{1{g6xr-rzdn+$-)owlrq1)V^T8qjqz`n!62`#%mf6f4dz%z5M zQo)74T%CU6YOIoZn<_R<4P? zF20~QJuTCWWok=?iqCv1X&mYC@U;q*)zDBnB{V7!F;y1_KSl9tvUucxj|N<#CV_fq zmnaAhHc8`yZ}A;r|2w#lVPn^KLupPViv6Uc;iCr3Vx%Unsa+I7G119(YS&jIOY)!U z#yoD$mF$K&jScd`MoFA%Cu5Cmr}`4CTbDl>QP0WLG!@53m=+k=s^FBA%JNjdsI$6q z)RTEK65owOP}BERYK$N_u$WeOEd87pCvG^%r=JsL3byH95!XC5BZ!ZP6RTp%|2*FV z5s;ytv7Os(&N_C3r`Ga0E^I{An1=mgt;N-hr6Ao^a{naR(a?;MQzK07NU*F~9bVyc zjr(K>Fs(M(J6D~fQ@qjl9J^5QmHIx`OsTf%t~GmzBTST*g7&{`RGu=9kMW$xyPE*r*R7C={l~T_6{MU6P5TBL~wiC zP-l%mtD?G|xl{x*@#7gJ+0g~pJF}zf>)m!S_IVDFz>G7S>?IOrR=M8%UWHwy?O zj2K+hG>P#K_TpD5*gL4mcIhSKKViL2s7yUVhu0^%FM$%JNy3CX7R-Xusf=dB7%{>x zgCH2nocNmh1&F%B3xbp9?llk&JT#1Pj`ZZQ%23SlMMP&+5)`Q;RYMvLiL7&u4&SdC z0UFaWCk$j;!=quVqyvFYj_}&^9v;4*pq!LtFPJ{snru(BO)!7MIV9W9T)JN{%xK_E z`ca(l;@>8buW$vcB{m=;h8_Tex8`CQy16+d_zT7)(vX3G02ReQ2FM&7-7Jk9 z{zxHzs4rVDb0B%?eSGzsY{`+PVP`W$cfth*U-~qa*U7gNlt7V6L9ZJow}bI~ix(T4 zuiYvar-KIDi(nXoJB~XJH^eNt&X$&qlVeWRH~K|R2oYIvFg9#%^wB>4(FDJk6@%1R zDLT1=*5Uli!Px>CH+FVuF72N#)?G&FsL*^>n24IR;9C4*Q<-X&G@ zf;eoEj5Fp+i$O)SGLj3&j1uGz?tCce&GhWRlKOlV|zx`2gi!BveG ztBFc#Z<=-UY~J9P53}HdP#w?xzD>y$vv265&_{UVL#1Mr8xKq+gGDDJO5%730*YqM zR6SbuGi|9deSSkT$#W@~Q5oEpcRk6ra-A^@msi;Jj+o@FbJg(5A7>kuqJ}l$FuD~L zeHXKNsE#o%?=+W0=YY}~oXs#UO`OQDQ_Z7qY?V?ZQt&?N5>cF#T!va;>A}}=t_s%? z+s;iKi^|rq+nPE%0-;~I)F7`}E{t1ppb~#iS$;ZLKE=|3mTNvi9jpi+MMZkq zT5%IiYIl8Y<3aS<6Jdbd2q4~I4G&=8+Z^<~{@#&t6H;;PF>vaDvZ{M)+l8p!i-%BU z0y;Q3pL{!{ZDwhEM&Iz&j0%4fF{{SXUF=g5PAy&{1O%8?1Xnu_LeYtr8gy)l!$|g= zIp^ht=Xt;9$yea}K(Av8sIMX0VwQ@N+@2pHd)#vwIqKM_`VY2$UeMRulJV3s z&d?s#_}`S3MadB5lFiTtfqNk?M-c=of<-Z0IovIXv)^n-Ho-!OModa*b^Ej=gfFN9 z*$=>4HqG?3Pd`}L59iaWo2bt>I8yPA)+M8LM z{BlKxlU>%`3~&$|k9lpvJC71y3aYY6pvX?37eEcmvZqiAzs#7o@`~a{$8Ntaz96c# zyl(V`hw6~PpA-Z8&GxNLY4qHlAMfIUnMk)?$0+oGzjNNZmt-`lT0ybEvd3>wvSw^f^cEER4Z`7@o|S5{9;0hyf_vZq$zM;NgxW-6Q# zLpG_kZaiwU%H+PCnOnTC>d435xezp(4^}*a$ zOwwI|Z-fBXPx9yW|1mZGuOZ$)$H@O2<^4tMe@&4GMs^Djt*NDq)76N)#Kf}SQQCf6wSA{C>iR;Fm(R~XR@@| z-0DIPvX)-1?v!T7>q7t+uad^gk4P^99_!qWafyqGKuON%k9;{2oP(k#7rvhak6msu zEW8lF3amy)E^h!6KmQCv<-yEcG=R%Tf&&5~|8og!0l&SYk-f5!qvIb+K>y}5Bi$!6 z>mLc+ayl09wer1UW%OBQOO4+C+rmHoyN@ibAW zd>M|@-FXpBZ+PULc=I8W2 z=bz7s>sQlA`UQ6auQeb#?r@F9gJssA>sLXb*{82L>tb60%lgj+FNC>r6$5;u1}G&V z1NJKz+88L<+t@nL8`#(z{So^Cn*jfBn*u-+J!AExdl=CA*1 zaE7IsNL8v)Q%phQYN~zz0M7o^0yatwr?MRw6Fk=bYbwe~8#|i8z%602w7)R8T}hcC z-&f}l`0Z0mNY{h{Seh>8E}kAd1~y{k^cepriA()ud_s<33=)b{21%%wbN9;W=fz2C z)I!Urp}_QUyF$w@NIORY9cFCA5Mlel2P`7~u8P!=Bs*p68 zNXTSwu5*9P+07SVx#!+}E)>6zd~#n;GKAzaHMw-;8kz=-Oy<`g^R(%CwcN#$0q4`^ z+hJT>0TH!f5b&P`xzM`KBBA6|63JWiI`^w9u(B}8K-CATjI*O09(*5FjaRcho~;M| z@`Gul9P07WbDT-o27YE(wJs6{@M$=Mwqw?5Afg` z#zDzdl-YR;p3mVm=P~s0EDYqI6eUr8YYhC?{lbN{Bu)Z0xYJGqvM(hkJIpK-%`PZF zML6A><8-CJkWeRFhtRQE3DA3Uc0jWNL5zRPl6T&0bjCfN_#~xSl$0|wob)3e$+p-o ze@fs^gEnvA1Bv_)jQCi3n64c?g+dJk%Ejie$EWl2fwu3Y(c?DpYHbwaYs!Wzby8xu$I+A zf&?G5)Ti36m#>tGxZbRr#nM|GW|NE{yx^idMV9W%k|ckhF3{TeO%6Ajn>(m8%HHK&w>c6L)>3Z{J?aNlZIc%z zX$7i>lz8wnE{|WG)Hw5@Qz!IRP`?uQzycK-AE&4A}ctBDAUpvzN zy-E75@PF=`{7V!Fh!QXa3lRP%bNRJ*<1NbD5&6GR)G>aG@@ruJEy~-W;lEJA0o}vjoXf8f z;%niKYB43ga&Z_ZH@D1MDvtBbr|@Z`)yS0p3>e{sP!! z{sr(?P4BJfTk`#vs1e7XqHn4ATZFgt^=}BAZx`YBWcDrMTQ>L?qAAxe#5Y{0JaO3StZ@Omt?JGtQs$)?M$Gwa;DaytB{Q4+dpmWCQ%u!~xM5oj5RU@nf*k zhA-L|i6{6Y;r{*@v=e4t1hBQsB&oWuh09M)4%Z-2) z^I;cwpQZB09zzQs4YhmMHZuaqXIYGH&TonGIa6JOo5Q;D$hs{mUbT%{?qPO`Dq(qf z%6I~KzOrk~QF7N|4ZK5QS10u-8_)2m;b`YQa3NUxGR2GHGmavLg)e|ybe*+5>xaZ9 z59w~gA?61>h9VpBygGwE^u~gzr{i(lZL9dw!1B|u( zx?&n`7kzc?1@U#Pclne8J#j3T#jG7T{mod1TrL!-639*2QqfF6HqCh-rO{Z8Tt>4f zAwWo74D)mE!-`Kg2z*uYcp`4bKH=6!r+Qv=#vvK zJLD80U)DSslQ?y)r=II-A}5GzkE!mx((y#!b(@V>*Pv#z^=^wf+1oK2E3nZy*EiLA ztX`sZ?-%Etna~+TGsks_Gm7l9UB$<%qs#(bUZkV^@G8h!z0``lKupc^wWoAEsl3>bR6 z8YbWGc37a^$Eu}+-Mfu(4S+9AQ&!`uPb}u8=o;98*+AnlfqhP+X&0S@pTEscA`$kQ zaueL8nD-kk^~4+{}iB3z_o*?>O@`$sRNUA006O{+r@a zthoMoMgZWeAOJvPIl#*gi~ms*XdK^mvh-WgJl-X^apM%e3`vztWSWV;)EcG`>fQL3 z$=f{wg-st?!6uB(utCMVV#G zcD=qQu0nfII2y$pAF;k)*oHjmig~rTE~+~lHZD^8&W0Q0ibUYi-6H)Qmk+_mRhp0zSD@`k6TT_=WH}TQC-#0F-ZgHFZOMi`{W&)5RlyvfEn18X!3%LhmkC}nct1`Bc zl7#`?FrPp*xzqMV^gHFxiPf?0HKipds>-zCm5V~WZ`wERI+~Y~Op7g~3T445^N-eA ztSGH$@?N-Ns+}Fyo=(JaGT*k#S;sp>LNy zb+?qw+yz#RZs7XEGS6hWlC)N2$81R;>wc4bl9a!*%T`2uL~*pT7`OBE9j5%h^>x=KM`0mrp-)t??22^(?zm zy(`nNgF$Mph{in+?xrrmjBrA+Tv|J83V@!1~-C3w^py+luR6U4JcaGeVbWW=QN2KY@&OLvV zy|xgi&*<&Gg|=xTIWvA>EYCO`w1fq1mK}_N-2KTW-`D;Awq_BBxIez8odD?p0Djur z(!}0K+1$!h#TSl6qma0t)vG$$$cI6d!-Nt&-y32O+^8s%za1L!8Sr>Yo!M1W((>u7 zz7;$**ybEb&s<<9oNz-sv)W1}Tw%(p27cHk2|; z_5YC)<4PJKF{^Z#wb9+?9$_u5Nzu@Ta<`0P4erf)@S8LSiTGZ+gr~b^hDi*;o$7MOH56hS z(bM_XTyc*;qJ$TGCIFKglx%V%5RdjMi#XJW_q{^x`axXd=LRE!M$?wZLOZyh*GoX+ zfU<%gmY$TDQY`kY8c#JH>Sk~s1N%FZJWy%vY6^0t;#bv7N16=k7V)G``I@wj#`2gw zmBm#7`Lk+3rKd7|2+ohp>j<;+*@D2@h08|;Pkc_f0HvQWjSPY0Hx$aMFfgSFw$TjB zH6@SXszI?5D)Ya)CLO)^mM(uwoZnsio?^dTd_G3|&G=Ws!JtgENC5y$wC4|6Li+v? H6M%mJp8}3h literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testskiprows.xls b/pandas/tests/io/data/excel/testskiprows.xls new file mode 100644 index 0000000000000000000000000000000000000000..21ccd30ec62daa28898220a6fdbbd8e061f51f78 GIT binary patch literal 22528 zcmeHPYiwM_6+U;pwyz&aY$tJ^#2Y70Y{w3X?S#D7Nq9v#4?$3EYYdGrB=@@l9EO(IqBXK5(~QPK!Pl}1!lEu^hl5U~Bu%--F(PwxUk zt$MFD_ue^o<~wK3oH=*q+*!Z&o6<86Jw5Y1vK;FuhdxgfP@Y29a9?ZWO9;;s3CyA~eKy4z8t_d`Ya^Du` zN^zcuvqncnol}Ccl;+TBl-KDVHuV7Aov&@A2=!1G?V&7XeQP^m$=G?a$H5nyb zd%tO1E(POa6pn)tplL=4a+Z{+z_rAj;B1W8X+)LtE9ceCo3~_V&Fa=_Bhy+lCpZ_L zeYWZOwO2;luQL~_qiT2@Wt?%t^t#5B{4Rv;|>g`=MICzGpU`P=CGZ7M3c{&mD z>8y#bUJn|Dq>A|*koY{Y5&C~F`6&3H(7&DjQy2PAT-B((Af6@I#6g(u#m!;?(9mgwmvR=S=^@-sve7MkCmQ8b`jPfuYS7=H z>v22Nz?opsQ*u6@)LXJ$W2L3#Mdi@*1A-0#WP5=x^(^(Dhu5W(p7V|J7CABDmF3^S z43O5P^f1|=Tgnq0zVI_a6yO7X<|cg#-NXt_9bw9Seai>KdOZ!B=v*K`6YyTh!5z;& z`Q3`QQtd>%$OHBYTqLUsaweGt2NonN4&3mpDzJ*Ps=&TEs|wt?SykXp$*KYmi>xZ} zc*&{)57ewGuxHAu0uQmQD)1Q2ssazntSYR_w!)%pE7WCMp+4IRi?gkO$!d05T5YPp z16K7K+OWRf;$|?Hg@ZAifVDzEPx>Do$#UKu3fVz0fgc|v5%vaY%mk#N(F|hsnoR&n zo0Y9OcF~ulxUxHhV^63G3k1=Ae(>_&ew}b{f3*o@jPjg860dlHTx14`x=51vU`&zZ zUQ2f_14zE;PL?_xzQIM3_;*tv`N<~k-@iW*N*>(p44~2q6dE3hqtJmitXqXziL$Ct zVP~PvpFeLCDjW{mgi>jSP)tk8M21i*%`Oy&CeuQFGVG#mC!tiWd^t^7N8@q`B}XG` z-zJp7EHR>`G-YoY2V`Z-*#7x&H0ehleKZAP*np~9gPBUpT;0}zxfWu`fypgHUzu&d z-U<%XrCVW}{}=-^k%-$`@ys*N*o;P{8LeO%xQN3r8kJ_BV_k@{=~l2Em{dEgUu_wW zYfvGWa<8^lu->_oRHw#zH)zIqwEL|})JpqpWbB=6fVlfDUYVppY|^ogb12%ON@te@ zF^%y-;w|1F2s|ku$T&5}>in&vXI>^C#DQy%cNwobGSG=P2;ywxgT!0BK@ewCK#+0P zi8sjnOh6FtM=^Ih;gxCzI`IaXo(V|2#T#Uf2?QBuop^&(Wdg$9$4e)?>dQbU-XJqF z0g1PGgJ2=5NP>*BPP{=DWCFq;LcDY`#RTH&PrN~}&R3e_0upcW1__x!ka5JWnd@XAhR+7iMM!z%r$`^8QS$ zMp<>qLkrGHOh6QW2~|;u>XFt{8~&oyM=_3;Ljf8UQ+_boar)dxiCn(tj>RmXih~)< zrqs%&!Oo`AgN*H4>qc$H8ehbMzyqt#?e<#JF}TM$29P_T!dvm?zy?H?*-}PNd zwDsnfoI&TML19r=(76^+)r#;9W!q+F(D`Xl*pC&o$^xp|4AJ2izu^p8l?H{iSV3zn zpsIxsjkN#38FWD!6t-ant+#-x_CR#S6E8c1E=+^M60D$0Eud=e6aDMZrB0waRpaeS z8zSwa`bu-pYqJtLv;pD9_S73VmxCgR5c2GZ?>UT^^du$N=YXL9#z;@wP){V*)!#R& z-)sg9lqZTvZcS>VA#6~JVCvOH{g{IBN`~LP49ejwg8_$^1cN-nCnY@Oc!q|;YEC!h z3QW0!fl1iCfzKBkJELvgp^ed=o>6_jMXqUyLdeyL$zU6GV`{h&cRl!1rQ%SOm=vlG z)R2nfPkAY+WX_OE<_xK1?jVA;i$-!1qk70JN^TCAY=laB@fQIrA}(7ul83U@ma=)+ z>6k7XqHFOcxGq(vek6!G3oLbl2_1EYurF*t%zfKvU#iC9k$luxY^jl-D7Muw^O{N* zVg^o^?A_B7>5Ihr2lj=wNB71?^~=mqfn|vjx`g&(V;=W?pXmR72(k}4-wtv1!X7wy zW%NFGcGA2G&^Cl;F6-}yp{_RLYBLi-x(rI|x0otN#0@MkGq5INzK<2e<Q3CE_G56Pq>JQ$dWMTP zD=flHhhJ(D9o>yiZ#Q{>oRl@De39XOIK-U?#TC;4r;#i~Mc{xyRxHJwjryZ@#87@uc43{Qz2Lna4l4EpRhhn0~ zZ?rI-3h1pu2lhT!T|RqAQ=c;-Es4w-lE|DPiOjLXTs4vl-OV?PkeiE+*oHO_q!#|H zGB(xKY^vM4Vm;8-Qgf}GN$8&Ku#Olyj`d}?c8wv{2#Ed|TYx3@kTXL-u)<~U2Zd3Y z;tQ;I!;i?;8aXzxQd*0*BkhO`^s&p5?lo){G*ARjy#pKuFs=-*<>XfRB`!34mwpZ1 zr36XtLF-EKw;dq6L~E~t$os_IX0)B1;SlU@CA8Q@HwX(#m*5e!8*fn#r!1%I#TMn; zX)4{ppOi6^9qjJfGthtI;Le_|!Po=(bb3HxEv;8>B6%3QyB}7barFbRW(|KQH64j) z_c@I|rRi+rB7qb^+J)4H)SkdQU#Bv#hT5o2FGpe=d6Xxw0XhU;hm4}h>^tJ!+@*q2 z0(n3E@kg(3-nA{fQ&4Kfd678Zg0lvXkQSi|W=>S6-HazEgUo9x(ygNOZQ{HDr8~rN zF3j6qjr-~p%KPCO<^Cbw5DP@svj(lQE*i!0yRdPM(HQ0wV`(u^!+fma*Nr%?h7>vt zqXz=`+f+75gzyP(F+(S^@AN>4pkSWE51_J1#5BB5AcdJG``b*z(6yE0=(<|>GxQdN z^PUbOnbVMCn#=`6Zy328c)k<>*#}Z(fq`TW}j|(1>mD_toNW z9Rig<=c?4qCmf7%C+zo5Q?C_blQh~WjgO5GcY)pdG}^5cgVAxcAm?If?E>z<;Pg3M zSRri6dT?)^x&6WOA8+j}dHi0T7A<`CZQeccBP9N|z|WhJxTh{h;+9^IRED$%iG9Vl zkPvyH`;aCh9YJDFXHkhi?O>*x{87n<$UrR8jjW(EmiVzrGSB5UceM@l5B7J&LYM4q zi}ncKBO}jWI{)lzS~H%He0-$$#r)rDY+?T4YrxlluK`~Jz6N{^_!{sv;A_CwfUf~x z1HJ})4VX3Hwf+yh`Rbcvi)WP_ydUfTx{sga^?w|R_l)uSe;X1+uUtHee;-XqR!=bf}2LMEo9H`UA1_H#A(^BCgslPQ1x h9kAH^f=e!43~s!MzZcsAxd*b7XT=w2|6GOte*hMZw}1cu literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testskiprows.xlsb b/pandas/tests/io/data/excel/testskiprows.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..a5ff4ed22e70c9d19b22488024e79dda8144a598 GIT binary patch literal 7699 zcmeHMg6B2qySu*e7FVvn z?;p7L%yZ6p&a-E|GkfjzuKlic)D(dTcmN~-3IG700*nVZSopvJ02T-U04@Lp-cSnS z0JU_0nrL}CTDlmqc|5kK%tC-?$N<2@zW?9xFZMv3R+maM2UZ=;E@pHogIgCI)o>*; z0}esHG`98V6puRDx4jzL%cp#+QfgHN&LQz*+!fqYJzx!8x^eM_!SS%Wr{1)t@k6;r zRvUI#DWc^F*_8&w)>IaT;(SEYQ(;Snm`PX@=53*bm$6v+X!-Y@mm3FFwj^n3>H~9k zG>R8+S}sj@DB~)y?UgK=8{4u8Iu<8dKn6q0Mzk*n%NIP|OQJiI4yNK1WSO+=if5N4+yGQ{&!%I*#unv$`8$Z>$D(V-p&i z7iBn8vz52vYH`<(R#hk2oo_0v3dk}F46nzie&I>lsBAWh=+;=Tf7g|rsHrLgve6$h zj72%I9OX_w=)oWIebKW(T&Suf-^fGJMgGEtMFl<$1&F)SRFV8pxwW3Gv_rM(L~+mGvijQtnW^lwuSi|K-+e?M^ zgQsx<{W{c0i-eN>UQ*=0fKC0MJXt&3PdtH*)WCQ`3_yYNuxJ0BD((u&Mu%=(l+`#ZfYIYn9|{B~QJhJ}6i56_{5He5Pmebsh9p zWKVd_`uTWo?rs*#zYoD|^+uKhX*V(G|G4-mzCvv|u_4v_}OzH|NUA-7w~ z5Y9(8%4j`dM#t{v>^KnVQA#;ShH9)}i?NGuS+pPD)5(lSi>LKrE*K1b{H?uA!9q{( zWDy7DT!^{nuYaB4OFAYOF^P(5u}CDRWFjL0A)4?%wG-*++Ivg8DA4Lle~8*yfUn=V z`T(;vhCG65>kHN1mv+^c3p|WhXbTPxxfh_)=S`ex-!2T7HX+=58*`niu;u<|+%SpK zCiTHakYVwF3A@AM=I;?>YXt(E@|cSQmBgPTN?`zLqWFNZbm&Fm)l`0{Sj3!!9NnM= zb$|lD2qL4IshXt)$aKjXoWzzvkGAlHQ;)NS<348&d{2jUheZd%95m28kSjwTJ4(f> z0}#3h7|G|XX zZA@94L`4ENhEron&m`@5iJX##nxjZa$HCQY8K>}~4dqA%O>Hwho`U_94spznw4{|I zH99&oAo@qJ|8wq=;4E4V*w9-TXYT$!cNeIqy`{?!uH&p}`50!z2@Dx}+D3rtv&rvYXyKV>->!+BRUtZ~b?(4wA|&#LCU(ke)MPg-*<>pM!X)Io<@+SoH%XP`OWL{VZ=l;U5nTp z7X!qL-dddRK|B7BA&mwz5{@K%uVz;5Y*c8fbUF->-F;;{c&4&ZpHD~jvzf<2^wFlO z7(Mv1#tHm%W7XN)>RaqzZ+{A9VInPfa_2i@uW9Po>Wofc9$C5eghLN^b(@4PhYw>N z(E%!QU|^6O_woRcXuMRlc zf;Gp?qU=`5#Vg`lcoM=b750t}A8(HBCZ338UR4HYPcpoWw9l|JP;226?5ptE?iyVe zVqomR80`R=()$cKdaU+UnitUOd*Nu`!v`udu1eXJ-9e0_qnoQqHpkGj?XFM(m-5Go zR&S>EMk1lBz9CNY5w5`)wp2abqY&~)&2A~+;X@+J`||BttG+n>HRm>s&_rQ?=Nt2x zW%n^&UT^Xzmj^?R$iMP{U+LQXTO6VHF6AIp!)F5}SJ6lW%lHb@z4fl(1 zx90g!xkXjrDBKvIlrZK9Sy#79HZMi3tUNn~h9DxrU=7sKPT}*_7P($(-BV9%1prcSLcedQ$lD zkc0&C?fhSLk>jplRYV{=(_Mh2$r&&?S?<-Xc<}De7uy>fHIgu5Ybt{8K!N+QgSiN? zeu2@9zD{hFqTuRM`{j$f-UVDG)7!^qhp*D%OsA0*i`ise_cT7KPu*ql-e~9AHZIn- zt2u$c9*ODQba?;GHm^qS4H8IkQKM=y7tu^|Ea#RKn! zN2kz(LZ5Zakw>)onq$)PFX7CVnp~+&vrq?&iB_N2PjlOd!C-W zw5?;AdvQ4%Yx4Z6ju_>z>fLo~qU!UfM+>dG=~q{i^Jpgt4fE7q(lpcqtj_*No(R=M z?H^KN%lIi;8Cr%j8>n_lOeb|Sfpxs?MQEe6CHq@wU{>vh^@{U|l4iATyy7S?nlREC zw&kKOnOvX)SRWBH(#lDV^-*`|hi74!Z$Zb78~)W|iWzy5oe9sT?}TJ8kvEq$!G(lK zo{@=B=J;-q=SG;g7qEX=&t$?xI#6EYXA&~=Y;$Kq@j??COk2q$C za&{MevWRP=)}H*MeCCS8y}s0>eQM`aiUUbfTJ15a@Mh{GpeKMabbwq}d&g)?C-`|F z|BQ#thi#2H|MLS`#~`E#g_j5e5UVWegi%!YJ$HUrpHgA>3D{BPflFf(et+jR9GsnW1c(0Ndp{ANYIOZh7spY;<*PX}U_r78c9$c>c< z#>RfkKvrTEsif$TquOwFe0TopV9K)y;H%qVBKfs(dS} zt?)JX*Ee8G1h)+q7wWXeab)?RZ6 z6Z*{fW`VsU&YbwVPif#B^^sbU3KYTXgFZgts=}(rqx>i-0tLx6Oayd{H}2xq4Tl_c zfjPeCkM~WYq%NnwzFIWvB<@YNO-GVf8;jIpKpFR|29^c3^9VqyK z%1hUeTD^qzHVPA_qO6}n2Cy#8Ijwjr3IS#T#TjV{mv)7Yy~SVMieg&Bb>5Ilm)Uc& zECJQwN0l6Fizw;6n7ph$vkZ`|1#e41WOgQltdMMg`p>>#kjp=iujLPH{K7T?8Z0WZ z8ya}{l=WK?yT@S9;%mq1uNy48qInhjv)%WWhv#Gjg%01%+tpSNk6?|*(xNF2ZBSak zTYR!)evaMNeoWQ>Z704tjL{|r_0tj#q18%e0g&N2^FWlt72>bCj$Zi zg#XyxZ}xTnn9m9=;#h5;$HO1`%SfWNF1=H;Yt)h7A@c(63T&we5A?nOuV#-Ke0y!0 zX}(Ib@;p_EjZ^M|^3*LUZMxfE3E@HcE-6tQ%2`3{j1Jc>eh`9beL5-|h-bM>eO0wx z89}&%Z=YpRYW3r0qHnS(J$@2z*tQ0)1p&oZCvQB%OsJJp6ba^xOB`;iXD@3Be(roB zUg`wvux*1n4guP_?i`PtD(N_$h*J%*QIMS=xO}v+QqYUw%_=}T7nN*m-RaC&_AOd8 zx#*sIzJ#!_8Uf(Zo$KMo<2;jsmFAO5zdpm0_b+zN($Q7YU3b=5g;j(omJJ*!$aHXB zCR;lX@fA0=$=XW4f|9<~$7b|N)-zo*4Ild%qa7B^5d6xqwTt}w_hAm@f}ME3JJtnk z=4=VlggV;Zg?Srj;)+71Jkc9KEQpesetHUI7A0-+&R9CX_aj5Zr zyqL`EG9^?UEi?@%yx-FJrgD>AC-Ys^nnDRf)HavUmr>MspYo*LP9kxs z&qs%dU(rUjY8@IO1iCX7QhyExL4-*$8mSyCK3pJtE~PM;DEy?=Q^KFYg8j<*Znuu0 z;Z`Njl&?+K5h^)K56&H{jfu^tG!iT-<5IcB^fSFr6MD}L-M?4MA|ua!G_8E=q&_vb z`0SIUK3+^2h00~p0!Vfq^=>;w&%W-ya{pGAaL`FQ|32z&wv!4S6mJmBLum}qPdz_# z0Zsoh8fPtIH8^1-4`C-c?EIr9{i8I1)n;bSo(~;BmL9*A27@uCAX8H{1an48StMeE zYVj?Vd@LFu`Wn2Fs);T%itBLZFh3c42Y6UJpV0|rkxoF5K8z8$1e4gja?P8P zziKH}y+1PfX;&W^)d)Qxsn0oD;$0ck%rH2wr+Lvjnkre1m;#752VmGbXr-@GC<~Cr z;IMe<9grU~5fBHq1EcrqVH9a`@&@uN8xp#6Vvuc@ zk)z;c&E7oS@pJ(b>Ahutyq?C}@_BeVO&QxlW_+|Bv2;O8OX)q*c?u~Rv}A!t+s;my zWn53UjfMdx59By5`*qrAhYPc1kQ2;1`!8q9j6ybADG&GqR+OCSuq+NATX3~}l2JIb zRuU?LLy2DqEaUednzGq&IPe0`%+!@VAe7MKm;27Y=Pgzk|L}lf?ljKBl+aU-^t-^+ zDCA<1Y*46scKKpPOqIoAD8+WVv3tWbm^`Ob zApO*L0efTn#w(B+PhqIY=d{I9fl_x3%{}Pg89Qt_e(z+!!L!14fM37i`g76#y#L~T zmzv_w06#a{|1|t@uYob+Z*BM6hCg>6|86)3D{lXHAM!TN?M~DU5;5vuIpEJu=56EK zt(+TUe6-(;ZyGzdO>Z|MZcJnE{Pny4Ut8if;O)Bp1~49$gJ2H1UFF{fxGkP;0Gwcb zf}gYDjg-2La$70fpmf570}SOKy5Tm$&#K_Y9sr=c2LSv-BiuH>oyKnfN?_9F=V{zd p>bJ3e&eAtX000S0UjFr0{>a&Ciioh#0|3xrpC1ZzhWN+R{{cg%x<3E_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testskiprows.xlsm b/pandas/tests/io/data/excel/testskiprows.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..f5889ded4637aa8d336643b8d42ed34798c5cb5b GIT binary patch literal 8281 zcmeHMg5wkz2I-QLuAv2_!J$Dw8kM18Na?x=NJ)cqH&TP7^l!ZJ zU9aBXU+{f5;u$D?-LKZkg(rKia4o&g9<}gZ z67A7G;wOTKPsBgeGO22}t-C5;o~w+71CmKTJ0HE@db{gdj+)z@;fR9o0K=Hv5=#lz zGMREVsXYJU9x%+KGG=Gj5*kv5Fk|cO|0t~-$xLnn2U9nnh&oJCCpfv+A0YoAKVFy7 zH`X0L&#+f0V$KAn#tb89cwML)ijAtS@hM+%rU5cc!Gw?Xe$SP+%8FP8Y-Z0uC`8O! z7^rc?9;{_0fC?2}#-1@-rG-#kP*s+_$e;XZD9xY1-KwC>FZ1kUczksFIc9)yo~Ipm zxLojxJ_5aSepJBT`T!udZ+4%O8t9qK++=iz+Y8$t_BWWssL@kaR^1|{Q&@;J=$uRT z7-sULpEbZ+IQ7GPGk{K8P>axVXfKdc*qa=ay`-6kWjy~uZ~vLNZ8PI2YF??#x4`>e z=C;sdNhv%n>?=Xa%b7EoPct)4;rxB%CkS=hYH=$}aERUAAptc0!P8nDt_LUZUsT{U z#Dw$I)XmD#os;AH^?&^QFDB&=LoZEKRqy1&0>c!p!+Xys7GB{=DL;R#&Yz?^nG`AocyVDCiTjeW_CLj`dQ1`qHoOS-QNE53^Tm{6lC%5k$Ff>4Dt%SD7ljVGccVbi#Z}Fo{@tuwl2V!J@^t za)b#ft^G1^WxYW5PSS8@;8aS%7i@_LA(g$cbmCrj3!9m8zg|b+=>@rtmaULorNvt} zVLBgEOUDn#GMR0JH-7ADS-qNcd<5TkhUI%0vQGmIYxutQzw!AffZhr1J?smKrYTy4 zhpb;o5-k2Y^%y=QO>mNs1JDq>9XbEt$#amKg9QlW@I9RU#Tf*6OoMy*?{1}&a^ z@LDl%!?~VkdJ*H#cyQ9~>Fna7_0_Pr5PL?PhHa*p!ROUnifqh_uJznRJa8{8B zPup3Gq6ms5vosc%=Fi+2VI6~?UjYp)YsEyuvl;0HA>H&-Fp|}Lo<3z zhOQ`z?E@Iy{tRUO)-HR7cGPlht?D!6(4KpVwLuA0rY?CT<_Gqm4eynw31mrh-^fXC z)`6{fNL_tmX`4G@>py{6ulMy#EWua$SIFR_z5`Oj;lc^ee{kVXz#;P^Sn{+~@vn7}yu$k4@;3LCX4Ab!2^TpB% zb^{!yRNF;*46t&a z6`rzsYz9qHtW;j$e#sQePe2K_rkNh%Dst*iOQMVDy|MWDG zMIe}z@x#ioQ@@BvZr{!$7lHu@j3haQN@}lR`9L&8lMd?GtT+2X&1|l4+g`4FAEjnJ zc~n=xIZ~tjq&GSsmn$Ntc*A1BrTu_u7Tse(;MFb3<4C}+_{4mba3KrW*K9a4j9~mZ zjW5R`pHiY=-s7+IU9rGN4%zn)NMzGis<_lv1y7TwKfhQ>d$K$&#~My~Ykn7aL0%pzvfF#HiNJ&h zR4}LVr5PO!LPL*UlDZV1Nc18kZ!t((6!fTHwJAQFT--U&!ahrPrb!$}(yOI*BWDs6 z-V!?UBI`~~Lti{gz-}I7?na z$_N&OcJs=0KHZ5bHg=#*kM`k7sA=~n!Nw?EkC?U{@>a&jr$GV7#1toOX1N=c=WU!p&t*{^m! zBipkZ4;4;O6GazxpGS^#V3IwoHzu`yGM+Lm#l%Bdv)R*{YImG$<{% zwH}aLKrWg*m2uo(s*_Zw@wsf)9o$*oU^&{Z@p-eQ^e9PEPV)ApP60 zLMwLGV($R}YZTurgx?wBVQb}N#rgY{`}?T&3lzb%6ILY-|U@fleHl5)x_ zzpKTxQWRw|aczHEv+=9fR0~n-`q`4BHQV`;ATc5*Bzj5CFXIiWqXm*C5NS7t1QO!8 z$l%+qdybJ;u7X+b;(6zxQ`DANR0Kel;mjweZVXcN@fSgxio8wZyC#Owue1ee%(~sj zV!CDIruY{FVGm98So+XHE*gz{2AOqA^i2&Dxv`%4q~E6xw~^-HDP4vtq})TK37(H( zXV-fORVFL&33JN|o}TLj#-i`oC5(2 z$=ZdH0zN;$)ea*~!Jy{Ljk<_T;VN`pNjM|ztc&NYT* zzPX>fFdorfRq=CD_|TjYEEz$ zGwOl%a+We_5u099GVwQPpu?t?Drfu@y4U6mM1Kg|PW4p`3cEJ%C0?E!&iceL%v3Be z%0w*C?fP)$Yv;Q-8ZqI3Z@I}Es`K4Gt7?81`!<0AH{6$NUj#U50`a?*@89j>zPmm; z-ZH0MpK`q8{LSxZu9}JSx66}r7&Yp0CgS;1?|5sO9HVu#{jXSO)>vx}YL}~iyuK_2 zi&$A9dqLZAN@B}p+}1$j=x^Q4tU-oSA9i!E-j-h#ld_QOU&v+ne4rXKh>p%iaNe$^ z#5|YtAuutHF`lRH4u{mv37#9zkr96=#(RHU2P*ui<(|4?yb`u91H6f$!6QiXiq{-2 z7vUo@K7vS{GuxY!^C`xkwMu5F&K8h;v!c|*Gv1%E-r}%#C7C@X5jshDg_nl%8iv1_ z{#BToCF`ZknM=2E=+eiB*es0_BvsM73Mg;Y(AJ80X*I!{^CXp~JzoU4Dh)>VMI5s( z6d>D=6*g4|8l&e|QTK^#WHoFu3OUY@d;q|xQ4T%eH=J``nLt#DWj{zWOmnsI%7d;! znovF8AZVNE0si5JXUB~nWi^lyY!!4UMm=nME39clq<@24Jhhb2B$lj}c*#f^1o=EG z{&*wWMFWp z=3f^$r@Dw@H*wjLV~p#lWj4^gjJJwjULgn`q@-`Sj41Jt=t6n@O=moW@jcNS1#K7f zCzUpov2>kUHdwP7AjjAX#w8vN1{kvCcAXYTNW&I zpc7Kn7e-L>2`m<0qfBQqKf$&~q*;c9JJiS%1o}S2sMa{5C! z5?QEUjmi5i9gCNcGpSlw5y&CXeEIE|h(-wdsAjref1gdwEfvbH@4VVt|8*>H=Bnc^8Y}4ee&?@JRF-UJxcg-fFXM%8Baf(PWtb-{C#ub?c`Z*Ace{#QJc6^dQ-p`FAX{c3$ zS5PF*AYnDp*)vLwXQo4yTHTXtskBZ`Y}!cUd3ED4s4MRp#i?QTZ*#0T*{5i8Pu zFsXA2ikz@%CjR}08y}=(mDb9G93FL2;jz1jVZ{TBmILOl@2_jz)eG5JrPYANwf0Am z(J{6Xlu&@plFE#iYWTYB!yK%M$ssB-LOY*2N*0*H!%q27%Xg~9q*%h5%##xDVI5dr zUo7=w?JzaS6rE1fw z#aQC_iP48gg!3G%5s_(8X)~z|my-Km#g=uEVd9QHFKJ6SXF8DVLTXN?!#nzLX3fTO z9NRT@>+hS$wPX7jd{LWv$>yJ=be`4ymc-`P&4lB(-!4VpRVib>sPkf72r35JsTUM+ z#52amxW@SOZ~lh1c5}77&F;OyWuY{iijKRiiQj~EHHMhDBs`_E@D20dq8A4*oy;IS zT${lo81|n**~7=t>i1}zr_<*=2hZ;AZ%g2{uo_x4*3q&Q?W3WM+(Su36kC^4eyti7 zEvZ?&kaM>Pq3_4g@f$D)Rc}-80<)E#-An3$@N^+2czpB(1d8s;*uY%xgU!cxhi0mSkx6n9cEYNFcT# zkB39SMFJzFkrOScL0&=Tf{8X{yQl7}5(Bx#Op2>@JQaytj*7FFrrrAu$y2-`1)q)s ze$HhS)>@vDy8D1FXp=RAf1leIPJhWyS~F2n`k_h_N|^SsjGy1lYnx>abL~W%KtHkE zdxxqXfm%^QJreK^ErB%jM)VZ5MYJqAt0O1Q>4FLnEGchlE&syJ)9m*Ks(=+(SJU511#(j)*Ml zwfhnIZS|LEC=@u&rF_$dV8pODj6GaYOb7v~TW(Ezl(qE0i9Q3yn*EBsL8yIvjEY7L!X-j-BRNI(Otq9nsuSQ z@cZ^tiL7SI6Eg|Y29m>Q!$F?w8{bps#zqc05Pm7i;$C9+B9Dnvi+7!g6Mq*^M967; zU-KbNh19w4V`!&=g~#Ml z2#|gA{E2|KvAjSM*R0yXskWsmEnBpb6cgIB-In8l7^bAcT(&;y8vK%sF%!Heh{Ss7 zMf_TqyIVWvDv?#CpY)3#ri~GjUh-XC+qr4@I$!{lM<6^}hWbW@=C9_HMfTI_pvBE=QgQG$ zU#2A@uZtBE`4NkM-1tTWxH__87=(a2?#q}`7AT26F9mkW`IsD7_IZ-Zue}V7Dpq2d zlC0hW7n8Z4<>`qr=G`Uj0hx4ID(U`~drLBCMnILPcc#2%X<`L4Le-qxC4|8xS)qEI z0IP?@Y@1dsi&c-oaG5g!5vEC_+#7d$N6TZhJhw(m&D3fcvDHT&^Avqc5q*gay8doT z>un|Xah9_6jAPD3@G^EJcWOrC)ub{itey6y$W=@@cToG-p3Nxz{Ij}ioy_{$#o9Vs zhp)ADjt)1`By@`}@f#0pD7~9-+FNE~L-V9JOP}@`%OPcD$NeTl#cT3hv|_vR?0lSI zu~;kAr+Tgp;y&rrWM1f1WV*e0=_)I5jkYi{-s;)@+<$Qo$9X%;Gt>DqMEWMsBR75| zHiN_S;AwT{{D{yl|3ZUw_+|4Or_ILnqztEJ)^Um_WSUY3TZBs!g8=#9<0rM5vk93} zR<1`wP(+F0pin{-!InnNvlf@7n10m2Gk9_RYsoZ1!Dy%m7hx>mNX3T>HkP1g8g3vL zcg|-ZH>*F-9{#7(!08*7_}HP7mJ|#Ny$N3ydqOem1XP4jiIs8b_jEhwm>Svf|Fdv_-_(Ia21a2pg6RGU<@K#7?&#Z?HAw5Pd7f<%5tg;}QH^wI+VL(I ztPHtI@N|Ll4hOOF7*!imQiPb z^c&D!yn>>N4F%7ai02PhQ8c_3r`mF^;rrmP_&ej4){BSZj}%_{;{Av}a~GHY!4HnR zKU-F!DroMzw2X8{gS9{*w15hh)aA~j@SUy%C^oNU_j$0%q#!4pCf;0*@@EL|sx^OI z>w?PDPYhNa%i@wf#Y)!aPgz2RWV6 ztepgbb(A1K`L(UlHWo(%Rm1qld9qbLn3W^B%?kf)4-R7A?t4GZ6$L!|qKdH@4V5_P z+uNy{IIrc-@C6p`N?jVQqapHD)18W;&4?<<>iB4gp4+0fNq64g6(9NA^&Y zJRLfycbEqIyIqiQ$055L&9WB->!(BZO5*QoZvP|DARw~CHKu<*#Qkg0{_6kbNLNGo zpAPr-}bF^k4F} zpS}DulK$-j00@Amf4{`lpUwZd5&md?Oa6oTpWC8_G75Zk0RRm6CkP(8JE^|k{U2pW BIAH() literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/testskiprows.xlsx b/pandas/tests/io/data/excel/testskiprows.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2d7ce943a7214fa4ede1cf0b37d371fccc3ef2ec GIT binary patch literal 8258 zcmeHM1y@|zvTod6f#TLE&e^rU+Ew+Hk{ld7J^&Gb3;+Ot0OS4l7Wyy%KrB1}fCoT^)fETX zIh)xz8>qS4n>oE=b+fgh%7TZb{|JDEKL5Y*Uu=Qmcl`<-?AVeQGPmNZEVA>TMUkKF z`S;*5Dhs!ECUh5?=w@14K4ga7V2gnXtavJLhvz*xjs`70+u7Fmhjurrphx)iwyNus z^00OF?9#M6B93>|(LVTyPb&J9*uWs_bvhu$p|(SXOJe!CY%z%i0im$(%KO|^^Vuc za$lV2>o_M2O<)(S$a9f=E(kv@)aj13W@_=$^YUn+kB&H1x^Cbm zurOfJ%@p+m`j1Tml&mAF2unEZhTnFI5M;mXKWwL3xS?6bBkNouHEqa@i8WOqqPAs* zs~e|HVG| zx1|@y%PV%UqX!?z+=ldCOf1A=i^;utF4G89^Y(o)k5wI&{e*O(g^mncjU)h0+Nat3 z_Uqh&VAOUm@ccVhaRe^Dz~h=XCBezp4lYQHw2nzq4#mrzIIa_C6X&VYvhMV*%`uEc zb@^bqzNN=f<3|z|*u$)vBq(^fWWj_&seU@$^4g2Wm!&WhVyb&3!R56)AGZ^R(|x9r z^0qKULit|qj-?XxIvJbKlzR2r0MD*S)Kx9`EX$2E9r>x<4NPrXPbAV?@b0`=6f$~M zsJU=2IfkWsXg;3#=u~kX^}l!TOJi9kD$z7ghr@eoy1T0UGfRkA`MU{Aps!6 zxY@A%?k8_Rj@HH?koAw~^{3CkKtmW*%YXMMR*{$MWXEnsyANT1lkQ4@Gvmxgy{o>1 zh1^%gFh>jI^gdl8Wo&q*H7Umi2MTsS80>Mqc!v2M4)3guDL)(+-UH9(KmY+Wcrplw zY`j-O;V+GVhr7GIQ?!SMndXF7FC5>?oQco7eo0S59FmPl!=o2x7PcMmN6!nJ(KT&ykOt zZePp04_UWmU!$*)70FQ)J`?f^cBTsHm8SGzh<94cN^Mj>Saue>`O45Tcg);>dhmL+ zuV-Qjy3@Zx1_$W_kOB%9HfXwo1$_bvnZJW2M^!#%9BQA(N0iJ2fV;WCJ*gSn)67y;mnK0;xa{**vFhn)`{a5#47D-xz?rU ze9et_O>WX*E+4J1bt*D4Q*)=f=6H6Vr*|mqz&!_;InDA%T_k_>G06W4H^A-G`7=k?vhrtrM7mSmO+ZP!a=c#s-@#Aos)fA2-biDi_1*^*$jWJ#8_}UMolWa8>Hvj#HGkF<7ZD2nEcqP&yN? z6v%}_XchX${~bzB7DkR{rmD`4mUiY&KMo(gq35XW?ASre%@FeTUE9LY&?j4BhwvW0 z)6upcQ@;(Uot|q*1hPJW+}QIxy7h(Fon%anUcLJmyZsH`ibZee(*y;6#Ha3yu#t8& zV#ZoMqSwmfNz-C<9As7N4IP{HGH_$g^|@rtFH>~~#if?)L+C4}(m&5{ZNm8#j>{Mu zp;_hFQyyJfoAdHM9wKEK+Enyf<`40}9Hcxw&Gm%)w`B#CZLdT@bCMO(9|gg09^!0a zW^2aw+x6*>RqbjI+L7{Le|fYifbFTsJQHt190wiv#i#`X8`kD-V$o^OL zdOd^m>V;YcI`L1@P25u-Jqa-vXXPk{6v-qZ!czLpN3yVJG8V}Z=eY+uX829dbpWGK zwk_jEyCvL0+NShg2w`(plu6=&g4-}L3rT=Ip9kUjJBx|i_~E?2J;qWCBuYZ1;K~la z3QOWHWnDgV#8pLG=Rav!|CFJrKt#rz{!DW01FO#7B#CBC-Z6%6fxF(PamzP_s_uxs zD@3kVE)Hgs4b~Hr5_xc+DL2e&na*SfdjyU(*PZREbV@w3zjdKLqq?c!4lyVyp5#sz zYJ$=BdW&gbZCf)}M6u}0?CpZN)I6qDa^(gmOFW3RJ~9hluw!_OO6*ue9#{tBRie^v zP9GX<>AXb~ET|b_%+hgDzX>Y}N36jmZKW3rj0T3!@$R67KURZe$Pwi;Yc?bjTo(CQ zudB+l#Y~~NuFrtAhA=GUV&yPuj9E&I<_y@ zrxynlNRV{ci&t(juO+f{SCRLQ(9d6^uUIQwfA`|_V8~lU&+y;%-HetMf|NXc4b+Rc z>~3W8)e&pm$-c=fy(u7KAkn&#N^@@|AJUG9_y}XSSxts^A?1#%uNSE|PthF$uAbw) z(3>MBXf440d{P7Y)T!!}ysWnzxGDj>i=@QHO>vD;87>vzBGfwui(SxL8If?kh&gW- zO_QI^Bfc~vQ^D5Tn|Zy#YUMyUdq&828W)S5g7EGDXFc_ZpMoJHNaEbSTQ6X#lM#cV zUWBkBVn+rcQvrD;pOZ=@czvF*+@NQRhrL{TWKX~*<4Oj+`CMjQexN>L{yWkhzPY55 zdDbYq<{mrJ-Q9GDz|!mZPSuJ97SAo?WuNAVveb1qtf7xgSSlqKY^>|PEd+owLgMkB!xJ;Ula48PLk~{m^Ge(B6dvLW9dY=kc3X&*bG++ zhH~r62|guX(X*=QTCF{11;^wf7A?E)&9z5kIWw8=FE;kVoK5S~Gnl2N2lJH;N~P($ zB{@mN(lID2VVhW83c|Eu->6sMa_=h2zs4)&1aY?;X$Pg_Szpv+cnT^Z7u1x$w6#c) zHbX9-*Fz<`OWrY@jF|O}I^`KLHpB2=293k+ZZ;;Nhdj|Jt$dH;#AFPhNj#3DHWnc# z0}GRe38C*#fl+UWl~68#!8oUn>So6$xnsN>Srdkul{k3?qP63`$J@DtD{#!T%Tp-2 zurC$YNC{2rD!r+!KLK^++#=X^VXQpNXq>vkKEh{$+w&u`OG1$1H%P~M#JJWfCMmm8 z>TCV1gB+X1UI;w~m=E!uyM45(^ia!ZZkAFNEUdaW3Xg)i5hsfPY!sEFy;i`{U>Rm* zicj=^DZ#h>wY_kGE+p`b3#oLwQb>#;u)!!H9ua-t^!940AAOsyq4(;OhA?;Pur9^s zc59NB>pih`T$=dxkn#v}=G(lyw6Qj8%yTD->ZFQw*CsuY6J_1jXL$3hOrc>Z;VCo8 zG}oegM?w${_yb`Z_aLf5wwZP~OaH3V>5%q5%vr;+ESokJjoL^0Qf(OS+FJ@!LCkLx zWG^zhGYQS_9ranie7P3wvI;9CH+RLsj~jiHT-bcl5CU;l!&TMlw#1oa5!@(+JFM{e{&xV{(CoLm!MMveo z*;8{QOG9EjljQI^hMZ6;>!qEmisk1u(KGBJ8Ta;mZZ-%4Q#D6n%_G1@QNwGRw|$OV zY;Q%ss?LOqX$8nm$f8+A(SG|duV$Dr%)T9=U-Xp$@dJ`#C{kG=#fbL@$JHa!g zCc%=#jP|T(+hh6(P3^jD;pCIAswWsiIG9NzM(@IzqL^hc3-}|R_@7f)x6f#BZfM}O zT~_c&4~@839^#>M?I52K-$rA?KOO2ZeoQ|}e2i~etJV+8ZK1V9Nh-r;B<7JabU<+M zp0;#pK2biQWU)vNHL{e8`+P$MHz z#;;fJ(`VVvICsT`EGY+CiWXhQSirGHGb?ALAtI7H6?Eka#VsL{As(GRghB(~tgE`+ zB+`N@LAn*W4V(K*?4;z@5I&rhRD2WVUU}tU&@1*30WNIuxg>}FRW?(=z_vXhxBc7d zeQMYE7;VvS;gdOQ=iY(?hwgWA8pQDm4MU(EGMCk&H&NQ$rD=I>N&Uok5P< zJC8HF`g&Gs5Kb}S;%Oc*@^A9>9^GF-8*^EcXsUH1ExKnaxV}rEDQ;ZIt+NvD0jj;Gl~;P=#Wu8IMuRL zp=yC#n50j2zSDFv5J{KtDVw>Eq6()lZA>3q8J0jZHJ@AcdS_$X=w(<%@mH+^#*{HU zqHC_3TT4eJ4{KC_+z6OMRYyzrlaYlROnj>7gc3NOi6ylwcQG(;vX$c@p1FfJJQ!@6 zl@%pkcyOfT&dSS7u=FC|#v}`DN_Zs?v~qGyxTO&2J9X6*v8S=F(uz=-PTug8?_Nc!>BwD02jEs=2WCRe&CET_F09mz4n+vxGN(BR6j zvSAPm(zpk0Qi+c!%Dh;xZB}QZPsz7Qa<8@$Wb!Bx2pMs$F%}wqKf|k2J+y~ws(oVd zz+|Gm4X36=&ieg$yQ58XX8*9PEJ# zrCe|H{R0(;E@WvNePfs%BUeDNMP541D!k#9l z)!$6YAsw_)1qs|lhCKCc8{0J>eKK!SbE}?STfJCaV_|(%U1MW?7ePq97=%;5Z%*dc zfZ5hG6BUpnzFz#QPfrRi<74y}335*TH-crG<>wdUG>ZkQ0q&J^Enuff+XkaguK5O= zi`Nd4Jh#XTBje33ZExN#?qb?)X1Ju=eFKZ%`8a3Cj6|iey6nHIOrIa&+u>fQ6A!s= zd~ds6pPGt9Qz*#1fyaVQC+0Yxefl&~=cnJ77e?48(5K#pdAv>N`$(?H!fFkV@% zgB?F;3E@G!&okZB%p|D>j?UAON=w-bt#5(QW-L~s@y>JHwI~rqcrnAN#dDwLYCC^> z6RkKQiWOTPIb8HaN~W1@M6Nn4ncgwevTa*R+)n{AQrOhoSKsGmvfRgwSey*W5*=e* z$rD97kPog{K~fr^X#)q;Sr;1%zjKYjSfRARa+B(XWjcJxK5UN^IVdAxo&$M2<~7@Z zgd7bjv+9?)(fK{+K=v1xv|;&(3jiSDri$~ATUpeVnKE>pXjIH^&hWm1 zpyvA(dFM~XHDjR2BZAhv*ndZ!k-h!@fCn|%A0s1P9yIrZRE9gJL|-7~TR;jH)p(ji z>M>moc+t4>vCo-VA_+e3EdK6#lsk=oN1^d(rK?E#$;4pAi6j>BEA&JyR#|1y@(NcY zgRq;0jUucpQ4TP-Fk*fuiqib%RM|J$9{5x;!!|+~rcvD7#CI0Do9L`{%DkVdigtx`ge)*TE!Ut_Jsu+YZm?Z8 zv>sXk{yskHuj#m`_Oce?CWZXHbHS>hx~{cg@r)Kv_hb4iJ*78>!C>3S{(N=CA`0d% zy^lA8CXIw$iUI5pwJe^QRME(1y?LgQZ^gb|dk!Gkz#u1@wyBYI)VoqN)_U-v^48&F zBH|(Qu;RRl7KGHkHsQDnaO0z+Af{bJLBfi@I79Ps3ps^#y5$qOoWk3gromH~=an|l z;K(jgf{S%K#Wvkwf44mx);M@)tx@tSZ}n`* zN>=zo)%|~{85md=D7*CU-P%7l?a%pNdbdh)|5WhLzS5t9KjviUEC1GI`dRRwT>IYz zo1iJj|IfVttmkI}_AgBb(1!F+bnMTPz1p^4uw5%p*Be;$N? f7vCWHoA@8cqLLf}baw#&ROrta8nlbaf86~ax1}@# literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1900.ods b/pandas/tests/io/data/excel/times_1900.ods new file mode 100644 index 0000000000000000000000000000000000000000..79e031c721ea34fc6097556d7a8d427f72f236aa GIT binary patch literal 3181 zcmZ`+2UHVX8Vx0ibYqaB2m%610I3Es6d_8D^w1On2Bd`&AcjtmAVmb}Rl!2cvrK zpmrAr7qlnd4UKYhb9S&rc{;daB)u_rC0tQh2dspv8ya)h)z-@ejq#Mgx_O{ccd_@- zXiq(AOfcZsuL$P>nw}JnLOKB8A`<{`^d_t)-WiRRxbNcp#-s}~ewBTav*wV$bjctQ zT>VH;eLWkVbeqJd5*OcnCIYN+$@}{*Z{v=}V?|TF-rVC!WkaK9BpdSD zl8SnJPuGz>U588-3r^w-JhH#jSg`ioX|V(*-D0_Fp*RS4ueJ0}IzTKJ=bk%vjewBq)-ZI_2hZm4j&NRLGWm1Gt2iVpt1w2;?~4a-;2dL^f0)+U7#(E zALJ45tHJ>0F%NrMs#0J*t<~Xyj(?kz*_E!{b@dgtQxU^Lk?_buexkv)l#=bMTPVuC z?!AtK!l7H?@(l8?u`?&fi@M7)uYYTQ5%T5+lRbaxPaL52;gcO<71~g#t#grn?3ZOl z{a!=-iltck7fV-GIfv);O>rM4?pmA`UV3bTNM_fZho0q0tfw31SB{;Lvserv7Q7M< zs|#U$Xr~Yi4i@K4H@;>*)S3y3C*>=lJ`Xy`X))4sGRk*4LVAMa3eyWN91D_{%xacN zeJy@q)!9D)`FLeO#^_V-nykWE6m$5#{swEf^++i2!FChz@Igy~WAcv$y$op9lnEl{ z&RPxk%I8DFSlK9>x1F+&5%aUmFzu=>{OBp1+G-+GAOCau9MLqPylU6!>adk~S;wXa z7N#TlkZHJ+Ey;(#BS;vh@OMh4k6}YPZ{u-+fiu#D{zk7*6gIvkni8XuYzX_TvGRHJaBzVk72k)yJ}*xWKC|Px8j=gs6R`SO>=Yq&-&{vIqEhW00B3>oIvg} zVL|)CS&_!GJ!DpU;zi4P4&UrTW5yUOB=ACpS?8yNFt60K^4EPpm6Q}ev6snRhjDgX zvzOYvlww_&B)TnsWLU;}!N~1~iz1!yEH!ieV!nKbav8DtZWL@jL%aSk>Cm(yz zah@|Fy=^H?VaUpUCv|!c*>3tpV6m2h9D0eveZQfb_JzvY!$Ug2|8m+aoIH-A2LO0Z z0|3+q0Jg3e&m$3_l6t~)vUpCOy|H8vr&5apEwb6r`1a+`2^+tJ`2*sh>_w+;R?&r3 zZ$J^yS-^PVSU$)SP}aV^-E+lOaaD3d&MH0GDB+$wtGB;>bE+lS82bSl*myuR>AmhS z29A#;x}-B1_9m^pS1y8^nJ5T`cGRcJ@hoKbZPo+)9|@$cD`?=#&lB>sVu{1dM67`M za8zW6dh$-yTF?*-_ig6g-3Gf2pLIt;MN-iYx^b~#)6?txGQJos$aMKuQj*R39PX|D zMZj*|%R1W99LFjAkl#4;W|74lesOyg7FHZ{oGr&;Ww)X?CRgBfCn@7&gHfz+luVrQ z0*cf3x%0q~ukfOesh;k}roPCtp=%~VOL3d(3dk@~n#3kEW^l^pHMc6kP;G08OAfty z%^t;7POH0KTzhQsOWb7lfGqM@7h}YnCx3{>?X9l%sOsCkAq@o)`rDTgisFtM7STRK z$hd=nN44oF!@#xx&7zFPO1)kvpS}4dUEmj+4)9ppw}gbsd7t>pwKXY+lenf^1$(i^ zO_bNAJi^m*Vwpg(`^YOXQmX?&D#{PdPw>K5BJ$uDJMKt-XIV45DD=$Q?Ht{^qP^Nn zJMQ!J5{Ul$9MMn7^3iycz(W|%DjxmB$UjSpfa4BQ3@~L159_o8#5E#VCbYwKtiyPxy-@W znOv7ER=V9Wwaxf}65>wsgQ>!hOiaYL;CmPbCpmDH|L*yz9q|D+*Mq%GrCh^;_v@A+ zx!0eNq8v>CWpQpC(FV#k9Lb0L^oH1{*`Z4QlL4X|T>VL>%Zn=MC8832wOAjfu&ahC zBvy`B=i52C-Vaj#WQt}(!sOYUdl)|q&Kq&6V+&TE5|6LZ)La9H<5fgX?YDiKXW1Y? z_P;|tD#X~DzbEg}Y^jZWJ>XR1{{VMCEh2N7K?58Ib>Hejv9A$iBm23z)P*!OmrZVI1tx*k77d7^CV)C&di^8amZ{U)j4tOgNL`L-+z{8J0a}Aup)gF`4&UQ?9y7>`*<#g6sB#I&(@wV|Z}3O}NC%36$0`%)?E)X0nv<)k{)TvT25uoFmU(YIBxG-T=Op{fKCL$mx4oX7_YHeGb!_p+m;Cbhw<6 z8#f|4R{DinGr?I}$KxS3xv0wQ23BW#cZ7171g(>d0L{_bEhK_vt@@wN-h~ z7Anbp`;eaBD^Ei!{>#o%e+A&L#`);(@Ae-Xt*3p2`v2DWH^PWI{B4x~#Qj-*f8#`{ zMftyq@t?py+437$PR*FV<<6fW{>-x9AsS9lHw^#IIX!LQv7;Fv_28%G(Nj89x1c(R{R^E>ULjuS{c6o@88pKf??0|zI*(E6F2v-jTYjx$bm zJehO%-tV6C`yS^z-+A1Vr=O}ga?{--Ulk)orAR(bl}U+>9>6`bO64M>aKq=1Q>m0D zis1H(^ck|iN04Ar$!N^09;Q~^DT!}msc^L9= z2oXU1=5MXq(o$?bl|s3wxWgdPgU?3%Q!()i-dU< zIjF93rvA~_OK-S6dDfbz0w?3KM;DWo;NB(cWv!B9y}fu|_mvV1&~Ggn@~K4D@6FI9 zi{(P;mJRr8Qo5f5DculLgY%fjfzCB<^4iED*Cn9uzpLN-fxL$KP3VZGwfkX8jzX&ln|8g zZ~XwqM5onFE*vraF@hK${YwPkh5d0U^TenQjX+$+m^}98vEypSjjNl#y4Jd_4voeL zkFnVz%wKguyyX&mfocr@7`Uwh)9dW@@l+ie2k|hJ+e;WIs@N=`j$b4!;LMQ8D+v+ui=_o9=VTlvp3Js=UPr&CbdlapL!hkMiX z(2dIex%4|c=s)+M-|0c$>p{QEgMPaQ{RR*E4iCDQJYM+sC>&1Wo%Z>W2VKjPTVBhP zOTWbf=WY-By&m*md(gk*L4VMLUZ_03R5)A=IqjqUgo`3Y|9(Zqo`HJ?N@%w#&qb1^ zClZg0d1Q=)?pO3;1RX{H4+^Mc`R5c5dW{Etsw&n{pGz(JGjcv| z$6)HE{5Dh+U>vxFioEfZGF zT6jAxSEwltp+4B=VQx2}>{j_?mNI+eYzU=$Bdwn!l*KI76UbO9SF@=Z&FKG3t}eZQ z|Nab!z=0g6 zvC}LqNCUP?mebg2xl<-A?QL4n4;-f57QV>nkEf$TC=*_Dw4mLoo;|41?iNk=Mz@eV z7PYc*8xOgSY=L;kEl!xTAUV>}#zhiO*rm&jp!`hJFGydbKM42ovmkhIQ^yScX73r7 z4~YIYKCN1(H}&APqU`mKyKgf88Bpx98Tb;K$T(I=3 zPCUW6wKvF!d_ejd{XzKMFDnTi+->3yG9e!j{q53Q9WIKql6d+Fmk-_`{H)n8NMEBr z2-cN05Infs#2;j0J|OyAuD3c|@b;@tJi!%`HwZuT_6yS2=noQcfZ)O1CjKCk@&VD` zw!PKi=2gGy#1mWudV}zYM86vRf)s#7OOJ@)h`NJMIp7vbAwv*<6EdPx$%j(2G&p7_(z zcT>f@CofDJCU7%@Y${AP^|@@u`moW97R9DGo;d!}VRtr_CYy#_HWPi==w*#!QxZ?S z_3lgVY=)X_X6CY)>cfT$pCU|?V5rh~;^>jTxw9E&vYD03X1Wg>t}oJT!f2a!{^`zU zxXEUAE}MEEHhLbcv?+@xK6>*7cQ#dIGt=MG#(>StJUp$h@qxbA-PxRIvT^aWaXxJ9 zp4QiR?7;`z*^DsRxOmzmA2xPR>uWsp`a|w)PBPiJc-k}{Hg-?zYwSDtqC1t#*-93ji z8wNC_OiUYXvT^aW@jh(qo>t%R!k+8g*;JcsTs&>E4;#Cu)i?a+x$E88j4|1`cv_7Q z8@s2~HypU>33oOTlZ}g~)%mcods=-%?~~~WZMMxRCL0$|o9V;G?rHT69~`{KO`Ab7 z7OP&#jGK#$adUL4)N`y9S$_G$?x5qcpztUY^b`Zs?nU^9a{e-R(D7MN_>T!X!2q@U zjL6VuDUFKW&!cT!6#ZdjFMq|#58RQk!1NrEQEFE>;uNL02uzmXT%A7G z2VTOmH9aLLJJ~2(f>X}fvJqK@k>Cf=kl3b?u zl%mFTqef|JP)-deuixI(wH$%4b64^OOagfFtuvmGsk4+U1E@oT~- z?EdALAR#U_wsowJx1{_3xrQ(!5SJQNL$61pw>Tm|QN|lHvB-)(+!HT>;ReZRkYCP= zcPG1=5MGSH95@+)`8)tq(=PZF1B+hKsCbI6E0(_d@ChdE@NlUfL3eC$${G-`JP)4U zr4IfQDsBA$ZnKNkG8`%c#Ce^`_^$9;Cq{4p`U^3V5Q>|VT=ghXu(NH2H10}g1Fe3t zLYjWELYjW+4GxsKf?crX-AUErUo)5v2lR4fgWNW^hkQmzN1Kz7l|(;TlISN(68*$b zHm|1`wj1vhp|}_gaXxhJ$}Ie8GP>$~y6TFyWIL=i!&$3n2%2XFyd#N*qrGyyJwS-l z6+~x}9$@S}6rH3XXmH*7A>~w#{RP_HiX&QE>xnK_Aq&t(va+eGgCR?Y*O*h#;6Oy` zmEh2Yeq}{1x3J1VTv_<)@WpbqZIk30Xjg&16#!YQw7n1_Z&i28pgV&hkDSki729O9 z@}O)9_MmkbMUx1Xv*l8CJo!p!wVB_b;TadU#^cEu*M~>Q^){Z`ZT07tn{lN3X4^Z~ z+X*)g;Ed#>fiSYjx?=(P5C?Wxj~PzBqHRMwg)u%XrCWQ@y%unhKm#6D(qT87;cEV zufUc6eG--rZ+-U<%hsMBTchYx6@8kzep69;P{v9V;Lsv9fy?pc(GYnJNB)*7eTBMC zMCpV|$6}Lg_meXyZ^Wk9?r*}JpNIj-7k~+Ag$9-W7_)wpvJbO|eOWP3!|?0gZ%!(`x+Vn(lg3jocG^eEXY~IZA?Cv#)HEvbt8V?>+e3aDBZYcKrE`Dx zE3a*TSW&2DRCX`f(f*hJKA|YoF)DLjyyMLj9sL{@ice z{raI76ouMF<=Sr@yX%$imlcJ&MJ08~q?K=f`&C7uW>J~_^;OTk_57a|g?dG$efRe+ zdSlWtMWI$vx%t3vpEU6Hgmpm&E)MAkH3(Sf*IQFMmxec2bf3T;OO{(lxNLblYJ&(w^+U!>2c1wO_w zR;&gYG_)Gx0c^_ZCIvFy61&D(0+`p`@8l`_NlXx>1`JyS0c9}Gg|c^ zWA_b%kN*F_AK;h1_Btx@dm!uZpNjC~`phV|tgX4Lb5my`8CkleIo_^&!}GH%m*W4( z7P8{=*Qoy~OZNa$3QB(Z2cLcN*?rSTmG|y}{x$F4N&O!~)^SWQeCwD*z4_s|1)06P z9htIoMm`kzGGvaOS0G#VpnYycw%Y?{QAz7ENMVscOeXWU8cf(uY!5vxkU=03lICUc zm7QJdH*v@A002?| z4!VIn+|k9#(Z%GhC&J3vh~LA(9-M=L&Y1;3N8SJ5@h|p3!rfl=Rzac$mL0;lGETQ% zG^VjCY)%r&LIq-*$+>G#`uDqaY-e5~pX4FcMNZ*~Q^J+PbNw()UG{02rjhAL{BM42 zrir6@M%J5l=c!T^7`avYG&W2Y1~MYl^K+4_281a@Gv*zSsm`7d72*}LI;}O2sBg)! zu{4I{ZEKb+leC?gZi5pliS5-aTAMp^DZ5r?+AQ@)*NoUAMk1tx$sCF{*i&ioxw)qC6YGB@;SFCey!kwn@SNyaSZEp;L zx$#R(ElF`6Lu{3-ueQ1C#c4okca~aSR0ro6g+w(H)cAVRHQzCt#B^(J)Wdfbpl+$o zf^QCmj}d`q)?(c`Mm$8rKdg8b$w*X}78-e|IxC$92t^tIr#MRKb6SBESN+Z-{jGBt z_e>Vp(41~u8DPOz^gAb}ud9$Pcoqh3BZ+?G$)A_YeCMuj^AibwYs>5F!ZSH%ijlr3 zeqLMv0g#`VD<81beuN4~H54(gpqOjoWM%Ix!2kXHA7lT;H2vG7N5&7L7z^q^DmBP2 zpKLfP;bd3WgP+e5aT6}EInq`~<+92g9a?e}UaO%$L+%Y`Hq7>q7QPi#;3*Aog$9@x zwBMW5-X3l!eJ0bcFfha!%_Hg*Wc0}Q^`o^oO@_yf2T&_=N}hGC-N7*E(M>3CTV-#e zPKyFpXtun@!>s5BT1hcf=e9D9LwCKjWT(=wA^pQ^{gc&s4kfFU!MWDk3!5hi3G9N<6-zt~j9@JEMLqgId9rb^m;4nqQ-|US4FCtt!(QNbs<^|Q?9AbC zJAU)~j(^ey4Ykps*!J%}iZz?RhfU-6M321oMFuB3uU<{5UhbP;Z0((Dks|ZT;ddO| z0j?u{ssc&R_}(4uE*`ArBHPeds+)N@E9fvDlG=Z!#Vi{i?FZtRe<|4txQ)p|V&C)m z<$L|$@H*nm&n_P&#*4zY3y2I28!7Pm@;%&yx0p)RlB3`puhAZD1Bb^C22SwWl zxWgp8uH?{!K0lRqFZ?{QAd+&#AZZdC+h&o>0OqErwZt?L^|F)f>fL?Gwj$OZz;S@v zQ$(TH^XUd*dptul(^farZg;0f#PT(+bG&6oW#MHPg_9PcjQ6Jot6$*4yPJzW8mQ&| zXWY!|*M3|;&G4Y&fe>{?#m(O%#xS~ERSrtl;akwh)g?wmH^QX!Fb0O-rpO#=$V_+5 z`zCqjV_Y#jtm;qfW;G|sed+bNxdnGd_=e91=eJ0SwK-?&jxq1^4B#hRNwsg{;kOqV zbOQ$2GTqX(va4MAVl`+(In*O zy_NHK@;ukhR=9`qr4f!NkK_%4^K| zYBB6zF9dJZ``wwdnx3`O+BR3HEW?7cml+4lGgm6kop9q7nooM}Me2wKI+zz2YCjt$ zn111)?fk)Dsp%zEE9)HoaS%|RrAbThRM`ZcbB7XP|G>7BeD+H! zPdTQ;9mWm~UKotX zgUz-m=3*_JWFbvmOw#uSJHoW4%wp|6G00RVwp~k#vQpVSJaG6jwUhi+I{UmTSbLT; zBE~+;P9M@HB0gB@kL;cNEWyduMKIZAY0BY0itzX}SY=+srsqwfO-=z+<@zLVSAGRE zft`J^?x{J!J=?xYby%6`6RDanX#+7>AdP&Q4F8*T1Y=ej2fK_C9%;F4Mb|{I=nJ~v zU)*gh$$Tb+WRaM85$u_7zOd##b^W>@!^5-vQ3Up{C>U6_vGfuP09c~^-kkhKK^K^n zgO$KVrJS68btley`3#)I? z+Quj;g$2}tA1wL#sg$(kou@n<;@+WKCK*O0 z&MK$L;NEb&k{YVCxrZAS80~Izt+r~5sR(hiIkNNlQh)e-N%2~EKILEyao6H!E?$6Q@R0h)_`%zBXsTysRD$krW>ej6Iy`hyc@%If zHShIPA*8TJWa8KrDN_0VR8cZ439&-Ep!8k_(siQBXly6;Q6vJz^Vq5- z{EE-|n;$l&?eO|-c3wpqmuTD7eMP?*k01Er`1-wVLEXK4EK6w-)vpIL`pz#uF5q`9 z8nAg+){lQK5%U06e61$&LF6?~xa?q&U8P}67!uD4lyC)Z5l} z$0Kk7J;P|%>b|mFz;gjuj`&RZevE;@1_+G*K_#Q_mVJ5xw>8G#5xl5ta`o;tw z38Dj;KlZ|Q@)-$r?Uhhs*7{ndj%T zOL$+CnwFTo64f=&h%R{CuSIGu2A%BRLWE*Pt3+T7!>x0elO}Q9cily8 zwA9p+{X>xZJ7FEZlVjAur9L@Dujj72*7Ic8Zy3Bn(@C5d2-kTFI%2e6c<(`hBrbgF zMEYp*-TVO@S%RGJwf(kOf0zT6C!Ea9$r$Y)W8kBoDM%>KP8D|2N_}J0a(IopDJNeT zTPL1zF`h_jLj?D6+JA09E=a@Nh}c&?cL@s zV>>e?w|eEJ)Wzq>qNlfoRKOdWF{vq-2;CJPv(fW0a}n*gVb&7DeS87pAJXr=O9r_e z%P>Em>9|@|WM-Vl)nv}sy)h5-HxTIYd+g{U!lyIIM16jpuPMe6*6pI=tqpQBQ#=`v z$}dS6BqMG`I0bdsvmXitnG|qO31^dR$*bKSB1BXdG&&f+=@;XxTMSNZo_6hvCo1@& zlMK`8rf{UuEDiMJF2t_U-f)bRc*m8$EYOu;PIED+Hhh9>2q{)~!SJrtqoDev^2x)n zFjk&YMQ(!-14NMTE>qKVAlMM{JmAD(&m>mstJl+Ic~{ox>AmXC<=9toTmE(r;YSdhBF8%A87$D2<@uYz`$ld*8gb`66j9*2W7yOmt>0WPQEz1;8x0Br7B7 z%*RzX&oIX6m zv5x1MzuMczS-JAfP&@k-l<-FNkKO&pzHV!2nbODU6CWJj?2iUImq!Z|4u%309t7fW z!mKRUEfeZnK}&%!Y_K|-P?jlHE}jyD9xY2k4DUpH*HA}SR!>J)Q(NbdU}9i+lx);T zE@8qhG&&F-m=EO82NID3W8oAp1`nx$zVv}eiS=p0`qZpUBOszi(=!OM8$$|NSco3( zB(@e_4QU5jktI1G4jeajm{>5sD^S@v)Qh2shT*ZlD!|uxQRpL(Z+wq)aaf}Njl|kF zcbT3?I_kq-m^I$qF2Nk3f66N2f zk5BTB1P`2uCQhVvBu^dI_Mo zgqGdp^D>hTk24dGAZ_D^Jd{@thESS9)@@}@>-*HuLGCH|BTWlWB_l&%K4yfuQOye# zl97()mMEY9qUslOpCDpF&CsI;8ET-aerK4Ola-~Gi_?8ao8Ri))WNRiuBIkHOG^WY zSB5)?=?POZQ=%Cf3-Fha7=>j%5Y$SWp=xJhR0C=Ww}3do5zYb@a3`xj1lIrhPL%7s z03BiZRh)uYCm^Ue_ruu{8(~5^^k8 zI%s*BWZ@_0$Uuph2{95QBeeaEU%ru6d{(X?sHuvEIpZ%|xqHf6z@4kv`XmXjYQ^;C zZ4&m}7czEs+9BoHcCav9L-|?+m#ciS!3McbkYg7ff-Z#ggCASuy#R@-*AIICKg46cmf%Vgggqj91&A~@~H^j~} zB#g_XW&AzWIcfI^yA7PYbo-&yq0o)cB1R5vI-C0`d808~$4~Y$7G4c+vYm4npFo#8 zleMpX3hN#x-T`oWAxAZ!KBzeSXFoe@szk4d^0G5(FrdaC{p=s@1gecQbMjPnw6ya0 zt(_Q&HwB<$f|JmNV*PG)`TAgmWI%{-VQoSvIWzIvuK0iDCsMt7RWJyBM*%E#r7c|z z1S*yJfYavX@4-#_YaW;>;}V6VctDJrCi*)M%n*qGXnc2vVsqqlsSmAW=5U z(MX4HXh;+4hZ_{E+LpmW_%(MrMePD?JP>|6B;37an!tPXsq@>N9`6yx>CaPaq|v+$ zy7Pt}?FCg4M%KGZJo>vv^;F4v#eU%WYksQwJ_vt<9L;rDADiW7em@Gl$w ztYQA$a1qrW|KEz{Wt_{J)(<2a+`lrypPJ}pR*55k8BN5#YBZ3 S002UL=uy$jjnZfUfd2ts?A??A literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1900.xlsm b/pandas/tests/io/data/excel/times_1900.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..1ffdbe223453b103b4a67c138c3eb0109b9b4863 GIT binary patch literal 8282 zcmeHMgqa|L^)A{sLvmgQ{KJI5H6V8>tPBsKsh=)Q5-g zU+`Ep9(U9+sc3)Na8-grl*hvXDMXV_C)L|o`>quzdA)BPkZ|q652m-pQUkS2W}Hka zA-nE=BfQGvc6P17LFI6>w%)HErdJ@C$xdOR>lP4Dg^23}ru4r8$k7$V>oWSpy5r^> z_6tSKn}AiB!FY|yMY_S5C~E3o3KV7=b4SRTa51V6T=}Z32$jRko-^PJ5waEetDkTL zYFRx(DHUG9oHbjc$)&uatSo<7Fx_n^C6LI|Ca)wQZP^_jAN>Y`?q{6uX~z>T8@Q?u zNAFY+<#(|06%acxcSu16^h{xHHoC{|2Oo~SGMIi)tEZ%-vQ0uOzZhxIHJ{=!!sJUo zXMnSK?u-8JD;iB<9bD`8=a1eO^{2$-ENkXt7%z0_9a=uNZDAZk$uE<>^e6T*w=F#t zm&DP+ycVQ@&R$4=nVoeE7Z@NrgR9?Bjay}cLG1n>0igajp4RDb)1ARalwmYPhw;?Z z&C0=@i}T0*KYsoXlkzW9FH2HU>*B@;1Iyop_d})@6L2JzUP#Ed0JZ%B6c(`SV)N+9 z7e6sk;AoQtBgpx;`Q3b(Ulfhq>jz$}@s~y85s1*$zbFq&gSvX6urj)(%DR@VcH??Y zoljl7ky8XQd$q;0mNpjWDh({t$xa+gSK*9s>XD)07f^%|iM{T&Xvbd~(o08Nz zEDx(}c$BmEdL+YtCbe)E^GSq|^1=8U!hUxPo7oEAeh1+B6`78ft&m-%MV6Z|Ey(nl zL&vFf#wYw+Uk=sGeob0_yi48@xju%RbAQ8H{^P-SpzcR#U8Vis2ZExhiz^bE ze4Lzi3i~3>Fp`h~P~p5CxPIly3uiZb3ukBhAK~nG&cMN98qCZ8_En~-qSVce(}sQ* z&ix|8ix79#gNyb+XCE7Npq6Ex5ya?*JA-avnMzLO``RET;;RL&C>9*xN5XM8|sTj^Fe+sh>TIKyd4lnSnGsuaH*y ztZ6)q|M@VtC+~CjQOYkc%na6U1VBTy4$6aS8yaZdvRH54H%ZN3AZG`GsK-u8(G)~6 zL4dKHNg&(hXPFDslhzw+70|H#=R7FJCPk?d)rW^-zF{6T;r(*d{wzuEoA2MW=zv!} zB(J})e40ODA3OtFZw&NJEyGs&pO6{ot$cb2gTWF40Duj90tT7Cf+hd8iu3#rP9a=S zBQDy{#^JNe9$4%Pu&3qjG+&g-d$3DWXVE_z_(bhE^0dKN_`o6x?^fPd!Kp$`_BGRY z2_d5~b_EtVwoGj=(=jk4`{R-Z9&JOCy-n_W8`QIAziVrEkGo3uBXJxvDVceamZT~g zLvB&}3WR8;*wZjMkK3%!VT80?qzKs>Q5C(fE>^qJ%(wO=6?Vx1@v3LY4w*Dm{PCAJ zqaQfG^3$0F^DR(Aj}!$bfEGoyQQNqRk+_>)&hNHGV(qf4H;3JpSq|hKM2Dhi)mTci zm0b5DL&6}!OP09SqR3k^Ss%mXmG|1>Ryg8a)*YlV{Y&mY`NSS%Oq+Z5Q{L>Ppg2TH z)cXXRaw@i?H;C5fLwv@$%v$=Tg>mu9vuLVtW}O()RBH6uYdqWa-OaePx6>GIQ5Ks> z5Y37^&c4{)HjtfL@9d%=^?f&*`OHuxXH_7g%g^N^J6w~-QOOgxK9-kMB~z!j3k8AE zi(iqmHFJ({df%hp2%>op*+x=|tju#$lEy8VHAp4092azkE`M>L|N0570!i1G+tSP= z$JVl2^zFIYzapbrG~6-+Hc|`w6a0z{cUyBet7lpsZgx&K?mzYt{vnA6o!mH~t8FWk zod<{7#I#BI=%4W?5kkCb1Fbdrw8rMii8-H8#V>n-`7?+KBi~RSWDsE-C-n7Wqcq$# ziv;3giiQdC30)?LZ#R&$y)X1LpiurWNU9MQshPmY)$Q4|ofM>-ity@oJ0dtQ)!1i~EJ+g)lv39OTrIvSMp2Xy)(xh&7$+oC zE=FzW=ZKHh?i7eSixD^?(2H~JPBf~F6^fhWO1Uv45)sZv2Hy1`J49Z)3T9@<^DUIl zP(8z-!~?R7WN4hbF-X$KUj=L_@HJ2Dn;1qXXbV!C^}3J8^h(Rl2rT)7Sxxj<22g{p znvDB~m~}qrn;ItZU|53Q5Yva-NOAI(t(3~ABEwS$F2rzf=&_b6krskN+%f~d&36G~ z(e~^T$9ko`!#~d$D~RFnRehAfcMkiEh4q09*jGJ-DBN8}`dJvkZ;}pMJA@?l0Tq8< z)Kz4vKn3UOxf`Aq`j+r%^Hx!&o+=3id&WbV@oY}R!)Y?T`oa^;fD({#(S+TL4_YAP zfOQhD=bo-s)6Mo%)6#_q)zsTo4Q%I%LwzDAdRrb&wT9(Bd6PX49@1P_351)LmQ4$! ziM7HR_`bq2wRfzaFQr-vVE6OHT5cQHFTe6eP$V6~-Wr|DU36l3^?=l^o-*VkoNu{i zrwwyNn4QNiLzt*uv;{{Oq-h6U9D!7qU+%ea0x%93H7~f29!00UlBq;e%&ym*LU>u~ zZ@;Cb!WBP*=Cw7Ot3Qlsr;-qZ#G%ax#mSe&+L$^8n~M2InTYwj-F%-t?#hm%78CZn z%uCr+S?C3=srp_W+W7n3@<7*jA8}FpolJ7#G$U>-MV9HD5j-mck{B%%FpSoj66Ym2w_ypmFqN zZwp(1p=8H?-gQ>RbqNUznf{gRTTln(ut9Wm4xH0Y9R)f>7KCSF9Amsd)f=8$H!lb= zo+l;jD8Z>dt#>Z!)^bl7~m3;1wg#4Du*QV%&HIzdF8yR(* zw?&*62p|A3YK&74c!>q!llDh>vf`WD0@hq>y7r)Ll)_glFbLRTqQm{Z>DhVfOHl)4 z46_ROE=DzCdnc@EL!f_)STeJm*esT!ngnH}2*{nBdn~aT?Q$;(zucMUn9sPH8Tpn9ba_WDh2ER5KcB zL*uQYS61-?hbZV9p%EWEp7bCkU+PQ*F;)}2lh<}ZOZh}9FvKkcoUd&lc<^$fWZDzk z@{N>5T^EX%ch8|tk%jX1+jKwg>d`17m+2neiq=I7ozf}Enk%DHvMCG}AEOK>QeVMO z4@oi&@%N|@r||SagcTUIN28eLvdD8s_3_z_GF-26Ig?nZ62|3xmQNplAY)Rou)>pt zquy;lC7>26eOUXgLI03l)h(@*L*Hqwt>Jh)e>MxGuzeWm@vJE$lU+`3s93|aLXNpt zhL21#1CzQ2zLnFnB+>x>g-#Woz=4{IHGT!3vp}c0L1+fPJ){ZqrKmb;NqwcVqiwpJ z6>8;z@dJ|Ew0*Pb=(&K{vqz&AR+vGn&J*wlJ1wag;q$|(IA0I9sq+bn#2F;6B{_LUsq)Tts!*wU zaxa(F%Zhz7Qh!m?bn4ube}m-MgSmb`t9u55CnjJ+I1D6lN=1?tHqF2#X5H+Nlu=x- z2(W+HMTx`VB8CwUEMD=OzaidGzi$w-u}ZHEd#rUhhKTlHJ5dn{*dngP2vx<^Zfhc!k~vtAVzz8(K;k9FZrRhL?|<<)BZ|1!}0M?w@_v8y`W#Jj+#Y^;mOVWxqJJP#hIlV3{f?@s^iakIMR=>pHe4TkG zSc>+Fy3!x#=IC-szVCV5Is}427d4%xj27^?Kt@<MLBeaF25Cuj}td# zIRj1E;1KYTBYnsez6mrbXm|-ET;j>`iP>rtwiH=M5-mzeG8C{zb_sMKGfYfl*Za&0 z$fw1p2ba`z}qCm!!=4Ox#wm1BAGhAOFov_C?fOC9?Y$)i#lOteFg8 zoR<0*+&wi^3QGC+F|iE2>hC=XNFy90@EAyJ^`B%K3Yv5npK)^@rR=nI7BQf#Ln1xHa&et|+>iv}m$39(aYGXbJXk~=^VDqsm(&KC^?AK>x^p%?)M)Lfm-aE~saLIZtT;*)Rr z8!flY7rwR)Lg`BE`t&R(nV|Hs!Tqp)@S%TgWsOwaR&ba-oarRX+n3AE(_r2C4Bm^h zRiE9FkMe6Dlg_8rwldDWj4C<>7jDv9k?)U*8Xut>QVwYit@Nl%RY^rch)s`FVzSyZ zd~?J$+oOnAujwD$p0LK;T)}hQZmzK&57I|9L|;J}Sze<-HkKRZ%yDFHgY5^HN^&!~ zS%=$%U)b)2G6r5{&<43P1r1V59>a36f7T>>RyP%8Fu13n001Pv!u>~0;%;kY<>Ait zOY=*{mYg#0l*f$|2G*N{^AUg+LwC6EJaPtHHI1((Ik{ z^*q{Few3Q!V$0!Bza7d#reUZd@y;0K^poZuA!YU|6MT}ji@1V`VH3Wo^h6GCFxj1m zI&z4=1;JJA2`0nDQMB z6N;_M2~u|g$ZTvafjTP;$2~nh8ob`ji4$dhcZRO@{)|a_~F-n1cmgmT6ma5rqe~- zCpNXjK58N9xV^k&@u>-HHaVw;ByA%kr094%kyZQSY(?o)lU>r8s0tLa9(xVgMI>?? zg0FqriI$s828RV>i^GpmAhPkNiQbj zM!FAAf(YEsjgETdSYoe<*ZQp(DIe9FWhB#A%=nPXR)ujFtV)z3*AkkE$|Z$WGRl$L zlLu9qlNXnuWlA=0D(=2DgiP8ms9z||1gvvsu92~CPHHZ|-CEPXM@Xu{o%P-QfZ`IC z8$_1>Q1AimOjVyhI*o_Y&!Du6waebS?kMZqbYhCmT48+DFDA3t}PF?Mo95v<2Os_w#6|+-<{vB zU+^Fcx{Q$0NyV&hJ#2cHhOXUho(whL>r)`@qsLV6IbWI;Yyrn$1w+lY{MWj%nf&-U z@`=saw~3f?+=V7Sufi8~<&jn&rd-Zj9 z7`{>(5V4PNE&*o_;aW=dK!TAzZWwDTHy~XnQ&M`CY_#ea*^8D5=|AeFW4_{5Z!ilh zaEuu&)=?{^V(&4|xgNA{A?TqF=3dc$|7iA&c+A6Zh1O-SB)>pke2xDGkHb0;W+#~H z4sP|zK6m4Hado1AXT%{I#(fbdy7JN5lDAE&H4{G`10NhxLwQ-qx9z*0a+R+)yZ?;= zKaM@~LpG=A+N-%9a;fjF!>74(^^8(OTrP$XyBsIb>sf5z{&a}g372)Z83}# ze>rUW+3@d@`(F)*V9EObFTnro=Vu-EFP_l;q00Vj{IgW|r!g6^;cukc5~^0VpB>E55F$%KDQ{U6ES&tCo>QvdV;0C>T2 r$bSUbpUwZi2maL@5hfY_VgB2WsIG(rTU`L)0qhq5i{agrKYITMgTg*> literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1900.xlsx b/pandas/tests/io/data/excel/times_1900.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3702289b256fd0768b7edb235c423b0a9f14b15c GIT binary patch literal 8266 zcmeHMg;yNO*Bu;!4el}o65NABaCf($!8N!{upq%55}XiRf(3%R1$TG%;11tp<+r=+ zet*Gk&6%##r~19q)pc*Z_iic5z`$Yy-~osL000CqI!HCwg8~4eU;zLu03x)Gh@GvI ziLH~qs=K|3<10ot8*B1Uu+Y>Q0BFeb|FwVd7buGBm2YDPid{(FimWk6&y@=zzS#Hc z!lYFaXl{?~EHKtdv#@wb3%&sgXX03LmSGOgd9oh$o0r?#RQbK{tW!n__3ds}(<9gd{|YQiIkanRN}%)llRWFgSk4>wH)e0Wpcrpzw7EF@h-XpVy`;Io=K)1{8F*ja;8 zpqxvqL-}*(o~@_Qm%2hPLyeM zr2fV>ZeR?(z#JIWfHps{2w&I8Nh-%qSe_qpTAv zO;mxKlh)5Gx4Y*XSF^&tF)`RSXBc4(R_yWt`y6At_M}-D*wYN6d>gdC)V&!n?ui_WSHS zU-(Wp=zN8}C=?T$_i2@DabV*08)pRCXATMCZ;F=NF_Q@n(TsuG3~@ev8T3+j@VO4 zGy4q0=5@MAL|gkxb3%p*#xBtPu)oXY;sx3Y4Axl-U0w(#tOu6$AupU=|4Ba#qS1aa zshh3IL`NFclA0JAfzrQ;CEcL-*B zE7yFk{Z;mg{VJ=g$&+YI*B@#`X3yc%{oKihPY95t_)y&e!&_q@y2}=^bHwBNTT?mr z0jrj*Ym{~3LK)J67yRHrC-R_f2{Lb*7{~QbAM4Z(mz{)fzSA_#9@F=p9-6N8bd4`U zR{HOd>FF$ewGV;80t^6v4tW9snLmT&bF7@*>`zX?oRh)MTTMq`(TndH?Q+m3W^UD< z7fCwNi;|^La`rTl*$lp_*5%nV3cY_hyQ@^OG6kX|30dsiV_s@d`dUjXrymZBgnwEBq3RIG;%QQ+VqXa#Hrl%iEy>#y<9^dhgig$gU5hxkf=o zxs@TC7}CKQ>n?Vwo4nx`>E-LPo$GKj)XT5z<0S>Uhpg!W0>;6Z-}LR z36zxGX^2>2h_qj|7De^We`xXu-%Fk_bnYg(-HJ!B4iT#Ic&E=O-H2SxSM~YAV}!|| zu3MA`1G6}dybL&POO+^Bp-Eri+^Fejz@)mFNOgxWUxN>8kk@?r-QupA@XT~;8v(BC zht6aRRj!0dHm^E6lfC#rMIu8fOT_AMR!o`LSB>p!_e12ow?uTcj3euA56HJ%NN)Y+ z!6dv(v&oV8nvhE1)WOWg26`Fh#_fDbXA1v_jB>sp<7CK4 zCFGC&D>59-4INBgt2jAW*qS;1+)LQ|g`TuB0|S;DmPlIn_EqsHVm>3cV2#1}yHxs` zDzm8!&l2G=z9fxYbU9>C#={BzNV1oVi*gjz)s2o&eP7G#i-F1)$j!!m86~(`O+@!8 z$4iS?wxE|lDKJ<$>KRkJbIoRqpLznqcV+4@Rt$@zqoFDZSrF?7(3hgG$foO;y4%Oq z)tC)xi(R6)<2ttlR^#6PxHG={va=cvNi0^0erEN*z}(5)#Kwf_mn+N9x$J56+Y)gC z8}YVyfu0KV(=o;b(J(RzD;#f(rldoN^KriReyG!piX)j1S<}oA9Io8T7PRBXwt=A( zWZEA6CO4cTsFx|?KoyOPGaKxC-vMtOeDj7YB|VaDu5gm{H3|tPh-NTZ>CAyjm@@Lp zXG4muc63)yJ2Xm_i_DJB9_t*!?3O zWssQ&BWux8p=1I)G@0*Q7z2X_ZJ`W7j=R4@itp5H8z>xU$0B;TQ`9Y}Wl~p)AIMhr zMGVU>umugRfDqJG-Ve*uUPREs1LHOJ6kXLHKj8@}dsfI*Z~{jO2dSHzpIcR^t<*IWXm9 zn9Zz16-;{9#=@%AFa3u2ALurBEqUl@NRLvHWF8I)U zIioE$jA#~Lxxq*i^rLSK&1BBo(!6~_;7~>4{{;$MtlVlw{XWpbX`3pLPb1Wbq3xn( z3tA9{K%HH}Qa1_|0ScMr+C>g|s=AaSgP%vQQ5%nQS?F!Gp(4i=If>-5F`cP7fNCKZ z6$Zzk%61L>ERMD|esZYK?;WDY@9l8=WBRBqJ%Ws%$LlgHeqC;^(|tuAe6?@p?RCd; zy}HfGMCOgrDTDX0i=KXae6ndszBXz7z|;sno~?M!)OdY*aY%}=lnj0G$}Q4V^pnmS z;{FlJxhcx3mHhPzn9YMGX8|R}Z_j5dLYjZ6n8g&N8+zGUN9Ut0+`OB0lU8z*k55CW zc_p6Y-b^x}6&jiWWxMs282Lin9aB#?Om~j7Gbr=xEZ2qZECEh)KCt|x$}YEE#W8VN zciDeU6m%Cx2E_c}5~(~`!pn}Udz>kJL2YSB$SxIm-XNGHHSw$ql)_n$Wowgneu?F-5baT*h-4cK|44m{9|CiFUyz z>W%Pn37lDXsJv2h<3#aI1WO@X%V+EPQ;Z+$&aHP~;tJ5SKog%I{G@~C_dLpG*qV2+ z`IC#$wfyn&G1t$CeKN;p1ccT@?H`1p7hAKyiGl-tv0J?NUR{gB-f2yE>&G1x=V5ui zZD!83`7;#(VmDE&RbM*8=HhZ}6Ycp>YnjanpNy)>Cx4>|h%^aZTE_J4C#L*%{l38I zWd~f`rP`?9vvTZINmYBK_$C65er9gaY-Kg}6VK8731@WUk0M52+YnsbI`-9ajbt~I zK6ts64~5{`Pjsl4)Xy8K6^;v6T!CKI+ zYGs%ldkS)nSj*mZoq zg|q2EU458VKXr%3!={7T_rajkH*yoja0#O(@%Nsn>mZqNLOO0&9+u?tSowkPp*|RM&$0NBm3bu!jQH?g8Wl zOw+9}7JikdQ$ejgXfp;QpR8My)vNLJ#9L6^wYKFa1L)tzN?)XOrs101JLoYsHeU0k zm&qV|R=Lp4`{aWx6ms$yBcFwby$N&g-DpHyy}Mc3VsKk$HjA#y3dxhP3B62FIh0wJfxaF?L&}g# z7bZ0k7ven9Hg1}MsR1`QCe2~$Vp=vRE`2o|x-F{&I`4HD<%DkawN`W0Jas}vEBj2_ z%_`W6G#y1?nu5X4S}R8LEi63(|N1{rZr$5NMgd7|36$fQ$* zgD0Y?$g48L`&tC=u8&mPjxaYqy?gVbnXV>d*Qv zZmEVxUFIa`h!Y`a7o}U8sFzITH&Py<6H$?puTDna(z#Uj%op#PwUxoAJ!R38WWf&D z!_K~3c;@(xo1dl9ihxu0UKqZaJ`&Gx-iOHZE@iEahTKw$Zp7h)%R1+>{9aOKB4iwR z7R*n#O0EpnTAaQQLgwW?H9uh2MZWx0Wk_@HRne$zV|ScijWQu)Cy*6g9WtTAlw>zC zYt#57r)8sS>&0bcb#sH((2hN+6&jQHDBf)1rTtO1+iR!O`8Ak*mWy#G4DkEb=3X(i{*rQ^nUyJc3GVIqRtb)8s;l0( z2ovSkX3Oxe@O+|EqpCX_xx3rlW^^VocGox(cQ)E?L|$FMOMkiUnXTV1j=1Y1Y?g4) zIpei_lQZ`|fk@%((>rg=Y#m#}!pX+%uS?@U6lX8PqNeO>_U4ofGwp^Iof%|~^1j>9 z@$iP258ERjC5P|zD-|=?MO)Q)&7ZqwTY5%m_K4412#q<PNUzPtYb%(?MUmWf_Vz8@TO5$fo#0nF1I+Fjr!lN2qnn=sODqOAe9IoNrj~Yc^if z5GrXa38m^HoHQx#;E<#*KZlMnwHK7q)2_kN7ah%DeMflDs|fG!ZG?SQDR7(?m~=NJ zPi5I?vllw!jpfi#28bX>_z)s?c26yf^a4=XMS)VUiYnP$Jt|!<8zpkj0Z&I)=c~3v z757>Qns$9cbNrtF);IEI^+d!xa_*R7((M92PEK+sKYgzSIGG_T|NQ_Kp7_l zAh`_qwb!StPz6q&X+!Q5rPQJ3nC(vN4gtVH$hasXG@*o6E1FAqRNpVRQc;-VBL<~E z2%!+x`m2#r2N9*>0-lFL5|iYt$bncd>>SE)6--oI{fS)WW3x&;4+Z}Z7@e$ykqJ?B zW*H~i_zi>j1jv|M!DYK6bh#hK$J#_w5T%I4op!3xbMYnCxMDpTaTja!dIz||^MZ~L zF2o~GqTNcX3c51itJr4K%hs&g!hCrNB@*H`bo5pE{9DVmt4Aj8!#~y|7uPso4akTE z1mzeIrRKGrv7&>Wy(5#corB45XAJ)dG!W{8W9DVrn6Wz+;U0owE;#Op31`rp_JoMv zPoRTCd*;=xV`JK#Zw}>Vi(RM%HWnPcQye9E7ANB?V6tUsSh3v%90ZwrWXgJm9Lt=( zL}j7kq^K(k2BKvQ^EK5=YKeSM{gLq+=6hTQi40~^=`#@>V?w_&Q=Pp0^CID?b?t&A z!W&hhImL4+P9KrQ^kqVn^)bacsCiS$PfueiFs8xz1qk+mnSO-+!(2b0Z?j%xSl(?4 zHGQPs9U&%6-ZpprzG1ByvD3gPVK`f*M|8$f^Z;loK1jdLv!$sm!O!4Z4`YTKlZ;>N zDV98XJru8@WTRfS>5O!{5yiAsvNn))TWZ3?l#wy=tfR>}VYUl@shPwo424BLd2-7| ziC&5HT4?XOj7VGzKW6Ir(YUAC>Ruxo56Vu~$A{qH-RHK&?l4c?QUa3A(|HgnnR<^y{b7XiG8-jx1zD&{JgygqcxHUj$y~bHo3-D{^zd4}x640F z>Ble@>9}|Y_hn62Rkj-LCXR%}Ie)EQO~;C_h+C7h^C|Tet75f5V75(IZ=RY$At`-_ zZpKZoRULK***oSX)lZz$9|gl+Oy!ssy%qj`?b;VP1r4O_3AEr!a6GJcNk4O7w|`?x z4E+pr68S=Z{_Vw z2a(8+`hk`S7Zw?|?y=M8pxp_7{;|Qd>ee5Z=%eV9y@g^|`D?DJ{c+%{ z-fJ?*O8kmnC};+VEcMT)w!g2}@BKfV;VR1f)xlrSmwq?=*(X9s@u$f zNR@np@7?8l-uw1hd!2puKHq=V+UNhi|J!=nc=*JCe>Zjj!u?jf2=?Ks6JooEiwD#< z&l_;VCU-s^Az>>bQFQv!Cl}YFfXX5Bh10i1M2B30{8NP+Bw4SL!rKU*qGD+ zf}auC0h+$MG&Oht06z%;aP=j)Z=gFAF7m*`z29Wmb6$>efwt|Et$x!WgSz!GC-`%T zdgeV8tICtK5yp7xI|BYEN6aq{?=1Wg;MWve9GDlFPrtP{fufz2oQM{jEpjz2Nz^Ri z%e)u)DEKmmRnJK%gPMffJkmN#wJ_j20un^a&d+Vj?`1e&H zT1!f8Bif5|O6H8!8`B*^?StH+sV&beLq{a*T^R*?4kZI`rO`9RZNmKO}?X%EYl!H48M$#zA2icQyX;OP1;=T`Kt@|gFS>m(0i zFHVL~rW?LYHD5c@Q1Hi(!P3Zn4H9RzoQnWJUu^0jj|whAe2*m|m{LrNFSuJM_?bA_ z_(uKq4(;>@eY1e~7)MJcp3P(vNEW5$sw@*jMkn4hn=*1)+HxZ@y1Gm_rX!L(*6CI_ zb+|Bdp0R?(R9^uo4OOLNw?FA3twoGaODr?wDl{4|CQhAEU5OCoiiBiD3_-rejx%#^ z<91)LYH;1ML4Tk49^au`hcVt~Gm)jR)ogSZEZ+;s6FPiu_`F#^_c9lz-O16>Zb{{rCf(kl<8Z{Xo{u36!I z!8F=M?i|f;)k*WPq{f&y#l{rKoo_z0ejekOliS$+4ycly9W3}~*6`&MC%P4ZK|dv= z2Z_jt)oH#J(oeB#&~SryNWECiLjM_Sl}n?f;OY;neQ_yRhkrw+rwCQD4<0Rx3F;-f zKAW((zq?_Hl{@g|HX&^RvN$Zr?woYZ@o`@7O6H-l!1s1gGuhPZGeX zxXC;v9j0h}FtBqxKP~UFfMk;`d~V=3Ds#k+#x}Zh z<&1D(U)e?CdAyu_OS4|DEUUAefG*HN+>zL0=X^+D4z8$`WrZ2JWC>a>Hw=CwV0&z1 z_BeA%T95@O_+Y6yMSNo-EQ}ePAC<)6H#%7YT4SR>k0!4Hf%L>=lf$VYXI7af%|{%p zqD{Wc$%Q52NkB;VIm_eOnEAZe+nZ-Qp7zQ_Px<<^znP!M=I&GF800Cd5OEqZF%vs| z?cZJ{vfzhw3T6hqOxg5Z&`T=D$^Dg%t?;r|T=D$0^NRwdia_2i|6Wxi_SbR*fHcZe3G2b9lgVZXfsX4Dl}i+h zs&BeIp#-o}QRc8`&G^=jGlZXC>(_}b>SOZmSkPXm*A-Lfp@4(o*Q66hnJt@eWl)M! zz;~FclkyeS>hKOa;xUCp$J14ANS|Lz^8WXo^UYBaWqc-kkOo)cxfp371wshJ>rx=% zyiAGY;@%n`yc?(1oZP7J+G#l@g^yEn{Wf-@anQQ}Ea_ziVtT^0zgphUzEEDR0C8Tf z=h%wx=g(^^Xi%;>CK`KsM9NgeCl)vT=p;?H#}m0ub@7l%EG#9J@N}5BA1CHP;YZnQ z95o$u;&Q)@A~)$frN;+Tq;#NET##Vsd72p4>4WqbNn5kj2kPjzZwf%sAC-s?d?Cg) zn$)RMKQ1Yr^m*Wqy}xOS6f-X$URx~w-ixvy&{xj3+Nx=5zgS|qSh5LOz#sMykS^5DifeJ5LuUDEy~v)ugDp z;)#>0e?u(xK2Y{=667h^4T?Ahpr@q>EM+)#UtyFWcB%gQcZ%gTZ+r;}43*8SfylzE z){e%y?jxvObq=?V22ba|R?(2?MOhI&w?-b>1wWdjEs|nSm?xgn>~Edkf%2G!Uzd-F z*yU=QOR*=@Rt0^7FWp;MWtRUlQOzdT6h)6|UQ>`tc{*dVGzfHvP@SGWP<6WnOnH=~ zs4r!MGZIj@BEtz*;)riaW;CBpbPw9LnT&95nAuQPQRCjaeIbb|>WRGb@Dx)^C;E?D zab|=TGsFP^g0Wli)89dT?XE;S(LncZ9~<3^-0!RmpQ$%FgDd>g@M8Z2 z;P2M@YW7$A%V_IqU!nfLdH#hk!VZ5~>ECg`7wBI&K5W_kuX6o6@OQHO0#0EQ<{zo^ kdx+nY>{p0oBJ7dkKPjiD4J5c)0b(yUY)YD6`3S(j0PgsQ=>Px# literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1904.xls b/pandas/tests/io/data/excel/times_1904.xls new file mode 100644 index 0000000000000000000000000000000000000000..ac70787c358a56f90ec2fe073e58258c5d063a7b GIT binary patch literal 16384 zcmeHOdvH|M8UJ>(Nj89zgb)!Wth^ryh6Ip@>?){e4d5sacEG`q?3$F2(6FJ``O`7my0%O5`EPRmek; ztC3Ga9)^4}@^EDO2y>0?{xf8z&#kBzOBeo<5|MMI6TjWE0WFk&s)ol{#tEWYB+RSG z0d;knMF@;pLJg!63^a?8AO43@F5>8!0{gyZa8<||37_?YLzZR?Nx5SzKmWcLKA|Nrt9;T!LnbuDUK?(oX z4`57mTHWNr5z`+di1E?CL;zmcAD1#ujGE9e#AS@hqjrxPT|0Vo{hZ}>)@@B_1V(s_ z%@$$KvWw%bm)i@}VED(tZ55bOZ?BK1n$T#7hoRhF!az~QrpqL$7g<5(lOjrz6?Iyc z>|^@vsJc;jx1#Q>6?HYCTA3*0SutB|x$T~GNBOx@3paHzf&d0DP06y0#j*^i$z(a* z#?Y#r=5JM1Z0+@i%Wa(VEE`idvwqg)t*y%=tC5yRHXzl6Dr665O>J$nSFfJEb}dlq zUs2N}k$QJ(0}bg|BeU!ArE??^88Zaa53bxkMFctH|PdmE*@3+jD58KV+v~FT!CAx7c~wSoK89KOxrC@ z-&IjjJ+K;f-l6DRm|FPS&f5OP7_t@Jurn9Bif+hRWznC(>`$T@J8+@vR$dV1Rh;3> znh9~{s&JKDMZ;xMOlC}9cJIb;oosZ_Ss;job(LB~a12nx5{8-+F^C6qE3imG74#gj z01jNu6vTmw7gT|vwx9}ZtAZ-fnG34GMk%NQ-M^p;>~{rKV4x|e0((qB71$*Us=z*0 zPz46Xf+|cdv_fs673vDDFs0B6vkR>-$5Da(&mIH<8^dQCQG@#zMVY8!RtSdE;e;KN zQ|bs(E=WT@AgP!?NMk-A4UJ9^GYC!tNY+bo)X5bc%ZjxAjBbe2DZ9afA?VK+kN^Gm zDR2EJI6!tui91N@2|tiCoFH)zNm75^Ws~GaqdBJoq*OI0rH;kcc}UXt!wg7iTE$J9 zHl^6SOl)=n*lA@p8eXY1z=gIkYzj3CW$i-6+=V)R{CJK~u~;leC_Bv(inO#$ST$?m z?X+B>COU-rV55h*-Gs7R<&#;;?2R)Zlxf1U>Tlv8fuXx#ooPpbDYLb zv$P-$*eY30W2fa#nXt6CX+b}5n090MQlmefi3*`ic+Js*cBgvwphmk}G~FBBLhdNk z%EoOx1*-_;dj5RBzSPQi9g8Ld_eTKOK)|!D9%da=_g!1c!Th>X1^ePP5vNQ zSK2`E;BFItka78d=x@2+>Tto^uR8GrS4iF<{LI@gNMDmbNW=kx2X~wJgN)AyM1R}% zR)?Ec{i+jBa1rPY!Xpy>g7h`{gPi67!GpU^oFL{n9!0h61Utv=w*@QlWeuyrhWfFQ zy2#lrE%DCe)UDx#vX!<@r$DGCb%HctPk*dLqz?IPe7kHB{pC_GNdnmME)B&Ke>(bZ zs+jlWg=xcBZf1~8g~_HNm(3_2HhR&b*c8VT$6q|+&Zg32)0oR?TlT3^%sFTUZ<<`k2Si>EdEu(5kuU(<(&_Pet=m277Cds+l+X5`^%4UISLdrq@q zKtsyJv=Js77f&1G!^ZAu4UI3{e3Lty8k3ESr%mu-WB0U%#@{@9FZ#Z94kc@U;T(X=;$mcJjw(;%>cD~5x${Zw9p-NOcoUWV}gz~K4=Mb3_R&wa;qBq_(a z2W1a;2cXe(JU+n}8R;LDvK9){V2tt1gD<( zk{cian8E%Oc083ip0))C<0J<@U(9cdx2%fHk9Txz4R12!Iw>^(an9RqyI(@DW zyo6E{vr<}88BeD!5!R4%?>eTgyP-mP`CzJ}KP7lt6b*s5=OZwC#xlHLT zMU5#&jndShoElDEL*;ays6BW6nvUkq=44m*hRBlm`sCK|0w+{(da4|UHP_=%9^1ZC zwLcG0Zh*~~K%DjP2hR1a)@Ng<-Kz|`VLxXe=$#fvx z7CaG)3>!FNp9x;!0^8+tI5rQ|`!4U&Z@JbhO9S1$Y_0%A!z^B>45etog zP&C8^obgb%SgGAC+kyjRIvP?g=v>nSSLkszS+ENF@WkpG_#zuU+tE_^P+&D0zb1Ub z?q7}x65>)*d*`ZnYr6kmXb3Y5aj8i)^eQxZt0MvwWxO#Hi>&CwJ@FD4ZjhV-`DIbO zC)wSM@L~k!z^Mq#ivUbbyWvv|EP6$w;wiqaSo-e5Cz!Ou!=-u!-LcUrYe2wq5j?$H z9sDI!+WG<9W*4hvI8+9RMP14G*6>OvMsNW73o()qikp*M^(axWvu%Ym?n-9^t$wmX zntrlEnts|A94NB{yI_lZlB&hOW-uKJ=*7wgxovI_`HYZ`HYXt~iGH#q(NC5n`iY@z zc5g9kH^wPKaWNX=BIw+mS@_drbk#X@)g|r84p?idvsTeyG|v)vM-mN3d*yn2fDmUY zh^{0(z}R~zI$1%`;JWog%BdXt3$(ixN3^!q6J4xA=Aw;cX>)fcLzWJ&F{hxxfr!*g z!J!-d%8FWUVU>fpvhelc%jA07Cdm!Zt^$8c0J2hPdkI9|pzanzcLqZqIiCe9w#z!@ zLD>@QL04iFO(ImzkSo;jNbqBY7|oMi#m9SU^6+fgRRkhLbO8UmZ_jj1NmGSiwxv60SzZO*&Ple}l3eEVkR; z%sU#8ouKWs(FnT*v@JFo0eA>PpE@shoH`#Etm12x$bAdG*{sKKaROY0T`LH~4OaIR zxbnYG!t&wm@BU%o%8Ozv6n&DSPgd7&DoQWPSZORATBJ5`HQpQyk=IbiGPsOvbC zPN;M&Hr{qWA%pTpY@+S{R?PW{7=U~Mn2=UzROyc~>o+O8FnidQ6$3S#=}ZnRDqRXG z!m?4>UgZ80s4Wu4D<;}RH&!^v??1e+rIB$nSV+^2>7)_CEQb*$heq3&<fZ`}9# zb1x_gwTsG)-#T{h%RMhC3U!N0>hkeR-~RTiibBnzGUMyZo_*{2KPw9Lib}`!?_K)F z_+yGft)jAf?{A+uy6%nGb8@OY1~rbLYgfAbMX^zOKZTLActJY4PGmFs(`JV+y|!)y zdIfbw(%yjH9X<(J(|AP(;%-IJIlA{{U+5~d9TE8dS+oe*Qfod_GyZ;&KA#r&7{gex z8f4JWwnSHY<=R`0zrVPx{LUN0GI{*I7wSR#39|0*+B4aw&PS%VU4~qV+=k3()r*YX zHw-@d{{w%3U;5hXsKoDqtiyjQ!jJ1Squj#wmhP^#U5RAm-1RN-4&57`pHX=({*UY| zyzd=*N1XptmhJ(d6qfw-4<35*(5@+`lyAEk`q#dH5A}ZxS;sNK@U3GK_2!4;R%G__ z4rI#C8Tk<8tB^T%UW07egZ8-v*=`S*MJ27vAcaK&F`3NY8Zcoyu|4#(Kn8(CNSc?$ zmv(ipTFV*3TGhj?0D(8@6iBaq*I-t;Qodr5^Zw<3VcB~|J{8&S-T4&EvrM7<^MuX2 z*k_r40MWe&gB5i!|95j7ERgMgixY|VmbmVQk%s#E#z@Aw)QB;!J&~A+(TrOd=&4hb WM^2e3@6Fo#)x1slMZr(m0{;U(y*23o literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1904.xlsb b/pandas/tests/io/data/excel/times_1904.xlsb new file mode 100644 index 0000000000000000000000000000000000000000..e426dc959da496e4d70b4f7e798c73e141a40dc8 GIT binary patch literal 7734 zcmeHMg;$i@*B%(U8>G8yC=n1rKxu}Q&LN}*X_OwiOF%$6m5>w(rMo*MBn6Q!$#1;n zmFw^O2kw1mo%da{&g^IAoU@-D&wv$xNJIb>02%-QpaVtDPB2^zi1t-N>*412h-Wvs5f2y|mr zsH_B}h0^$zlk)uG0V1M&vGJZaq zXJD~qbCn`qfs|9FOKwSLrY9kEYhgZOO%FF2Z^pDEjO;QVuMne<(Q&u&7C;4zbLH+@|hHdd0 zaXy%XmlD&r3!Sz#@sS$x?P^C8-zZsAe9Lb@vIhFy1(`axCiH0%KbeiaC|g6Yl`FUC z)Ur718Q5CZg0Ia@Cssw3VsE+SRdrytK~Q8PPK}QTWwVmWB(iIBqYkzU&#ji~Oz757 z=olXD%zBI)%ZR&h=*Lx$A_>vz(n14w1t-}He^7)TaEhgrD!Ua(!smMx;b)Oeb6~W{ zgy47^bASO`#`g-Js;)w^AU%Y(jUdX)gF7#WUP;5JIbPJy0wVaCeO9&cR?t!(9Jp;d^0ej#7UYRe#fhcI`>c z-QkAPbcuH9fg#o?4q?Xt123PqUhA=H)M1TBs^-L`9G}(q2SZd(#8f%kDtn)6wMerC zXGy6%&5U}Yo)}GbWi8P-r14B$aw-)S+&{eDKUtk;SF%ProIzOcY=OL=Hg!vggN9E} zR>&xa9D}L=BjWShv?$qm(pwa}V#8~$+1F+5p@g^O19TCM*6z6IBkC^TIL~F9cb17B zge(vQ!Mebd#iB1@;WULl@TLEgC!0qHNvH6cIyg_r0cZ&BwmiR6#SQ9cV+w`ZaGO51 z|C2Td@I(h^+rRrNR%`z5HjUo{{f@v_3AC)ddPUj#``>(HYL%*micBkqKd`X-yNviM z@Fb>leK^@)I$F!wX+vPBZsuUEphS8~X#0f%xomv2ABbl9wPf4>Auzp2Vr z@M(t6e1i{M_<;M(-#y0K0%Bz>U@8Grl<-ED!T~bG2*KitC2Htk=y>EHGTuJOiYk)4 z@Lgm!6JxNs8N_(a(khudlLcetDM%aC#>)uGMeOgg>@w>@T5=9D4dTm`#g9?8=mLbT z0>%qLRs4K?W(mlti1gWHWb%h(Wb_iszW!l24}!cO1l@^Xr$vYfS zVAHWxRqGOdN31XXoI%QEf{L{Zb;;UqC|Z5c6rF-KD+J?6q7SwZ+IR_o)e@zJDlBg{R}#l9G*ClQn0t+Ko4U)_|m$SvZKGCVnw zwmqh+H}SNVRDeETF`D`Gn)gAyuhP8v^qh_QuBmio848rS%+P; zLQ5ECXIh}InLdm&{mM?$>7(9q(`&L;#(C_s0H72@lREE(oRI^A0kg>}#b+m93t#VS zB~l05C)SRGE{%dl)5r-x+@2^Y0$!WMyvd z?DR*F`t1$quMlNB*lZ1_Cd%S@=8mzmQQ9GIM~M2ANsP?~wM6CfHi5)QbNT(_BfGCt zdr7DFvaYHEHRo8vqir*7bir*xB7>EFJH3-%L|NIoa3;GT#w>oL4(=O+Ri;HuI-UfY z#3Vokwhbwpa!lj|X6B{37p6Gc)_s-AR%OER;x%7W2cl81RPxDR`ia%yjG3z(?bC?5 zr{=U32?(K36?D&DYc!T*q=R-CL}y+FdgPlfuDeYM3i?t%y*wOsK>ZcRU}c-juTcPi zWs2|J$8Q{SwlcRf=lSgp`rdtP=_o-dWXRq#?9n`U1~n6PC4cDhYO0*VYV<+5-q^DH z-T9Xmu9kxQ%9|9{(Xz53{F^(4-&JGtE!0 zUtT4J7nC0KUr06h{GJ>fH3ji>gQ*gnrS!&^9gd>X{k&fX7=| zWDlE;kDcYc{2!#`y?p`N;dhUCK87hyDAk`LOoSqOr_d%Kt(ThcGhSsBsw4A-l{7Ug zE{K!3;j_Eo!E%YMp+PeR8@{@N6{a)rAbundDIOLS%jW-tyGq=urp$Kz0^7HUuXJJO zD!kuRkqfcP;5Lc~;hse<``+4y-t>Ch&I1Yn*W2Lm}cLr%4zR$%IMv+znQ|Gg}l; z2J~!9Y$0Xs3KFZY3So;Luh0+Q_2NbW3`qnLN5-bGK*GGbmZ+mT{7rG0gx3g2tRrVK zYcPC{prfAF)_d--MZ}i|`{s}3Z2WV1tg=PsgPX=VBn%hw>BP;Tx_7a0 zI-Ii{d{5U9=Mzp8jQ&$E^xpZwm*`;{l@A`@PQ$GmIG21bKgAn)Up0`U9aX=%ZckG2 zes;Xl{wU+>YHk_hG_h%!-cy=^ewfQK;MfDH=2mBIN_@F6O*?DbSXL9=ZmIFyqby*9 zU}rJLBva|ZHii|KX499-^Qh8Ra34`gj3+|`UXkPpCyS0QcG4YsvHyn}=n-5=NPs4WGYA3kshMv0OSM;eA& zWYZ^3qPy+830tZwDkk{_?Hulfbofk;-3l!A&Mta8FX&Rw@zQov?+t?1^NE2_t@l_b zG`5Sg?j$?-g)bZly(T{_96@(Vb{2ZAzb(}tXRAu#PUd949PJ;Y=AxP@NGQ)r;dfL| zd1ugaEO4tSJD(p_D~@I<4o`ej9t8o18*gfewt4)aMqn!*3-8k8-G0Uxwl5Od0Sd_% z{fF>oEq$rlzpBRK9A#vHAUC^qjo^vS;XI?f4hl~; z(nsY)hVbzAZd2#6y_u2+y;9=JBJ)I1)4L#f+D*0Slw@RwN2?qrqgP|5LYnVGEJXSH zxco&vrfGjj!g4*6pwFG@;HxS!G0bCYGUe*tT(I)fzMd<+UODqx@D&m!2CQhYds>rh?LXlMAY{|;B( zQeaB+v`b$cUcpzbBrC0M5_@vBQkVyOA!?20rhSCy2e$kbo~{H_^6Npx;d69-aIvy8 zl4q?B3E77HhP!@Yj1;N-{Y_jXES!8diJGP(-iDxD|8u(oqZp~ng^9FPlOFPc7uFdl zvf!y`4OX;iSPi6Nhsff{d8dL?r>6^|A!+bJySj5F=1uIjpUqR~2^g)&zD{li1#25} z^=|MnawPEX3elJT4am278=J+9bWd9lOx2!72&lAnt>5k`roHtlaX!ZK8FU!$(iCJN zSosQI5?GS?GV#*pmBT>EgllnJd!$xArF6M1h;t37ia4q0P+v^T;>qr5@qu%gVl!k% z3M#WZ7i@v@7^vgbjYBPaTee;}sJWYa1~O7yY%@A6_l#?{n8$sje>L5qW@3wTPrRV= z;8P#*`q+|;i0ILqWt;k%v2ncVcqR;m(JfjtM6>ti9NzdHohNidv%Ak*BiJ6tp}$`v zAhYSXWXA zA^d@=0O=<%G*&Bf$Y)4GeJj>7%nFrO83B}OjFN*9CI(K%XAYKj1MyXX_zEjPe7t)g z0mW5gQ!}ucdru$G5r7Iq)K;M2hehHk4ze4fg@P#RBt`QYAbVFeH?2+G^L{`e{<$qY8qFdJS3I1BKq#$xo-zzG&{1&YrHEDJeE?1so~c zde>ZeZ`@UEqzpK^iq^#Snf->R&ckBTXrVbgX*lN|QIr(^WOIcCDVu^fT5s1}9+c8E{X3pw9UIF*v*WMY-U!+1c$sa7j9 z-dbDSoK&(~@V)*;Ie*7(0*?+pLj@m1@BuI9ovch8%^~W}j*smvf2(s-2D_TOnwkJD zEe%+l66^tV@pMUa&rJ~iN6oxGDMAQ_pRftMX2yqimJp~J*b(aB#A60^H2*_6{jZ^f z@3m)~qza6eENF-M3U#Cs+%v_8{zkeVXOkd@Eb?vsWlgN)Y?Wq>57ZA^M}T_vRn=NdWE3@lDJe~Y0hp6&lVf#x^TsmMQTBj`Cc>?P+C4>iwa91IwAl=C zl?X-RF5?Kotzb#)7IH;If1f%MXS*x0A*LJ)gBh43RAe)1I_>Dw(0r;RtB?|*K)iD6 zH^PhePdzu%H@~? zx%+Qz#7LYm01=rs5fK#Q`=HCm8ztx^82}pv2Z#S?06&pGuCzEw=qxW*;12MZ z25XMu&Q~y&U$LpI=;0d^gt!_@rF^rj3dWG)*saOvn%ItM*O|zbXYOaxa&uH7Mu#!O z?cmVWY$9YN3LDbn=k7CJjc1$~jD*(9X1?;22<)qFC@uM9B8|9ryfS5|3o-GWC%=qDExOKKQ z>3Yd^i`s-hi#~{^GIRum1;dcbR`fL#-y)wUQc<}Stu$!r*@zY2(Z1cm!0JI=_>i}C zM5x1j>{Wp#ZlWpo(B)FKLHJUEB$)&IaO##KZ3nrnI{$8eHIG&>~oZj#Cc#hCaf0<$; zjN)u~w4m?aUQiW&60}$~(3>?=!z|U$<1jJ^JnBmA^G4Ho%e<8(?r}vD)ajG%U28bc z-IE%(;3A~Hf01+qg&)N4{6RqEf@gtW{|WZzasB!H#b07zg`WX_*5dy({Qj(ibK-Bx z{7u83#mT=LF2UR4|1DMC#JMR}{Xil||0^Q=DR$m8zN!8EFh;}pJyHIp1l=^fDPjCD z^~C(^d;dQ{<0jxuMc@aZH`Z@}H}!#=05{979{{>=h2iJe_@fBBiE^`A_<^zkFCKnX zJKRM0xj6V?4*-Y}0|5Uh6>gf}4C+4sT1js%<7SAziS=`Q{(%GlNW%-$zkbRe(Hg9P V40k;M01N)2f_pCzuFU`d{|ED|w^{%I literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1904.xlsm b/pandas/tests/io/data/excel/times_1904.xlsm new file mode 100644 index 0000000000000000000000000000000000000000..e884eca1e7c743198dace47a50baf24f57fc7c54 GIT binary patch literal 8260 zcmeHMgWmDJdZx0+NpO zFw}3n@m;Up-(T>3`#k6D=R9Yx^_;!;yY_nbyS3G^ut5M^06qW!fB+z6^>9}V0Kf+u z0H6fmW11?sx_H>Qcv$Gc-E3gL33@v_vAo8{uc4q@Q!6P0s? zRe`AaDmnaXyO{kHymxQ3)^KU+yScaF=o$iD~?ua0q z`~po4(lwuQF|Rn;g!vDPYK%EJG=&70Vock6`(1lhhILPAf|SVM4T$-PoPJPBuOC2} z?M=J^r%x=5JlCXGDq_|gp~a1$s87x}2qD3{quu{Tb-F%#n1PF&xN7I2Sfve>M%cXv z9F$U2eE9*|`vO6_Hj;S7(n}=M_f}Z4nNFE1N*}+O>@-o3NEB&SQI}A(?hKEQettsa zZbM;`6^aD1YdBNs{KB4q@Ujgvxn%@5#h`0#IK4(JhsC z5Iz4u)j6`7*c^6-`S)CmKt1{pEaiY^|MULYdD+;lUdZu^cu6z`NQSNUQE6Bj>Y*nd zFQ(4p33=V!(y)p;$yZxXhcg1EQu8)RZbnFH?2J99>V;X^O_%xhIzf(3>GgH( zr5q}(vfQOv;THFuS`QR6-cw%q3TS2a>adDaAVr6jdpKSl1(;Ne?+v_wcS;g;6!(7X z3yuaCETB`?uR#)YBRTB=JtB?hAfX4~V|Y6W{ShaRT-_b5TwNW%r?bCe1_Pbb&|dz# zONoxAdZ#e3ndmZH_)&%z75TJ>5bKWqHW_|jHP0+3M9lwik%71IH={{)AuQK0IAXBJ z>*N~g3Kr#28(%>b1-1{R6G8^Zb?{&i3*TzDlsQ-#hmvAvYrA-th%_BW*>EGNmp==1 zeI3coK^LBr$EtYPFcv2MU`W_g^Z{&ysUMA*f$G&j)DzvE(%|a4I+k=E+qJ6(h1p}= zm%(uG=m8ypsw@c{FuE}g;X}6FKE~f~I=9t?4>`8wporHPi`AKnu0egnJXpeemB9f# zNwBro&)?}ImObRp`gz{Z?(+{EB5YUtdL|apGyN-M7A9M}5702!#sUDy&@IrA`4KF+ zx|*)D!oSq_#IKTRyV%f>8ZoBi!a5Gl98=3_xkAzHtA%WT^3rV8B1k$N}xb& z;g-DL@Gqn_p$Iv#m!Fu4l}xt9iR>HwwjSeYAW2#5Q>cw=)vi4a)53Q=p(mcLP3lk{ zz$4+*U?Bw&A^jHn7KZK9-{WeDJ)%YNxp>9{WeV?KSJqY&w{Sh_maf$`I_9w$>h*34 zoJ^A0c4nY>qLzQhd_2R>A?pr@WQGB+_cMF062yB^sO{Zk?)45X2v$UT*+{iAvST^y zvV(<6>7a^VG+km?XHJ^cD6pwtEk=j+l}!lYNkMNW(Zy|JQ1Kp}0n!}D4R>+Qh%#Zs zLg!ovPJWj6mYb>}1RUZK$Rz6i$;r*echKuHH8hS`OY^?Z4pSojfZyv?N_JfNM$<@} zSwZT7CSGc)ki$^y9i7DhV9;J-Ac*dnztoE>+nnhbl1rB5jew`J>8pp_u@yb()EuNA z;I;cWUVS2A%y00Ss+#+Y>=tPf&IHG+dEeT6xWc>g{HlsQl!V&MqqF{4=!>HAfP)>j&7(Ihq0gijOMUaO(^-E~?Qt=ns6r>@D4G?(2HE zJGj`vzON|agSQCVg@K{V%}Y$}JG*)`tVy{LDyBij%lfWs| z>DlllDcB$tuV05dMwHwkeQ%^zSpy>a3{qZFLu9*#G~3*-tD|VvU+ht&o-jS9bDRkH z*Yz=^eCu;8E&%YE;XB#?9qb%yUE)h#T zbw%Q#)hD$m#zLx^foJc`5|WwbqgIVx$&FTTypeN-f}F9~<%Bjr)@zRD$(d&>xN{^@ zQ_V&OU3TF*MV>vpp7}CfY_52U`93id1%ziflLYqp+ES?i}8ks6lCb)d$PN7S*E`89ro}X!`Ma#&aaqaflOF@&} zNqWQDynT|uLbzG}M~6p6x^UdU&$M0-JRjOjHabpBD&}ETfxp<)@g1oR_Q>oTu6wvt zo0R(GjCT=UV>zpo2)8IMnUqL_Hene1`jJ{VI@it?GcN@4`+JfuHjf#Vo_b@c(G8NV zkIZDxyYToC(z(|%Jt@cVE!Ana#dvZguL4PVjnJ>9hn@V^j2ecqH50tb+HtJ7R_Ost;TH_N4K&g9V{Z5VZcX1mk2Uj!dKq@FpVmO4yK06xP>Mv$*R z-;-wM$qZFIcI!3^S?uH`;c2)@T^YTtf|I3%|G7YnMJH^1j=I94XH!zR!gyp?#wqhu zC41wR%DU!M6$mjyQaB4BJ5t^cvgyLij%pRY) zaceEw?MfbVu|3B(O>XGJ(+!zR|DMIsfUNg4Yx`Yqk7EjgI_x|f3uoSj-abnZtu614 znM=-dNpq7WX%x1nC;X_dl~K|DlU)X7P4iaz z#S`*xPfd&ICy04`Ofy{Qe6PR1Mw@9uxy6hPx8}Cwy z!`ThW-o7A*@mT>lQuY&Bt!^?gX5V0lgc5(9$|gLc(5`P*ur%K{y*bua-1+@Qqqkb))u*>jpz?W;N|JDX?KVK;Pl zN3jVAza*;RK;FrzbE34!4FrY-`I1tCH58?``rC`#SQoU1?Uf+lKy;EDj?`B&uFb#>y|uB36fpiMdtgR4@qWD;r^El7GH{AzMT{$j|PWTLmie)12PtV1u|yq0uf8aZRR9c+;*r#Q99`*Zn|y|h<#I}&PmeXy)S2fy%mnV zo*Vz^p3zG)MZtd5eAi4xSh+LmvqMlqwT-YG zt)$y~X+!q(kiJv|yj*LO|1J&@!8>2dVB1lNn?)&-r7A)lx&vBUc%fWa4t zaqA&rju;l>Jn4@YT_+I+vl*htY0EyF!{sU~Q_` z9u0R(JOF_954eBdnZWFAY&>8>zgNFAwq*U;s99m4pU@#VX3<)GAIffwxjE*?o$Mks z9L3*bB`~1Uzl)HmlB7|=+zSi=fnVg%ImFos;Z1wXl*MZA&h=pILl;E#qrAjUuFm~m zthU(kJFCwWqz+%ONnQ@DY^P0|Z<6W4T%h;t2j|L5ck?XmjRrZc1#Ni~x$?f@s1csX z81YE|SY^SUk-(B1mJ~G!rp-gqk?QLQ@1Bx64=SI}qszr;cFh)WAmGmXvLrB1VouS^Qb@O(qwCz#SNl=$s# zU+F&PDBq~9d_R6ELz6Lssfu}8U!Ax1u!|(b2UAmyqNh_2YyaD}>arm2cnCAD<=q*2 zrMn51dWv)v6{VGGmuJ4CuJ?@=UF~iQ{Pv-G=CND5+jIxC}43{BW zvG%c3--_yP)=T<)QwFtuDjItJ8rD!#wd9^}O!88clrt1_1F6RIIQ=@qjQ~cPtAE{b zm`baV8==2Apn*(|R%GtGT5G$zy1|63UEOW|JY4vn5QC0x-=ul<4q;H&BF-Kq3?uI1I^)ONP^>c%rGpoAu+_(QvnQ?k9;t;@pbEt;y`D6D?n zHlC^0lCjelGku_VTxU3knAe)m0*J*W?lgFpXc<~WW|((^Nv~$Ddv5xr{qNg7b#zIk&koxhB4*Euu6w`fSqvW`UONC6|`3=w8RBbc}-r_w$$t zJGq6^mUG5aN%WZhx~4ANqMnXI!;u`V_5go`qoteM|ImmYvOh~^(yi~ZS11DO7)(6R zAT=*d98_YZB6(P0g^}cCrlw8deMfEjXIpOWF{uh!qp~yQ%BhnDI>{lHtthV?YetmZ=7QW z3iR(3GxK+uy*e9kd+(TDg?m_XjXFo)}@FhrBr z%OiJjw}&o_n4CcBA@WCYvP*PH%tWpp4_s zEP5#@yW%&~#h~!5Y3Lw(rj}D-P{_^X#Nm|-{OL3qWP9=nbbL6guKgR5>?7IKK=JL< z!qrDF29teH2T)-2O#Fdg3`_yE^7QZLwSUdmU-iG7-fFA=)4@LvlKwLMUKgN);)i3U zpAG*huK#E_h$iI!ztsM-pPv=iA3PEKqQCxZ{IeYQw=oNP&w&1kU&OheJ^ZYJ{q14* z4-Y>pVm~|hc^mS#gJOz5e&IixlAld~rgVRsrc?c=>%S1ZpS}Dur~d5&J$q=L_)CWU l+5Det;E(2C>3=Z)b4Aov$3f3706>Vo0?|pljOqK^{{f-?9eV%( literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/times_1904.xlsx b/pandas/tests/io/data/excel/times_1904.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1a13468e59d1c51d7414a2a33e35994207be491f GIT binary patch literal 8244 zcmeHM2RmF_7aqMc7(_RM=)I2-y%RlJ2%`5I6P+Mhln99$Bs$ST5WPqAUPq4_(MSI# zw|w{B-2454@9y)=+0S`qul1aL_WSO&_PaDy(a=EvOaL|j0009lzGc{&q5uH#=l}pI z02|d<*2US=%GuLQ7v^f^@qpXM$&n!k9hLPB02Mj@-}Zm}1xk|oG}?KA@~29dva4Jw zbCpuqLc1Z|BrtdVt20CmO z+I8;dDXL8#tRDr&A6lX=@P@}X63!36F&mmh=+p(NDhs2Iij2DA9XOi(O#GfSvp$bL z(mbyhCbKo;GR~9=28WDH0W=+=tI13God^9p?vmxI^?h$+n7?3NBBkkoQ$MQDjEjGy zaf{K3bMakwrrbR-5%!RP{GQ&AqAmA!EouCm^M z#yIxkj|JJPz3|%#qx*6dz(MZ&R5+yXXu`>%uY--c)D0Ib5M?OiGCI4Z;pMd=Z?;|x zrUgzW=WP<)eI};1GxD0O$HT&Ay3DV~5qx|`rKe*nW>;>J=`PL)GkfINawwP9OnT+V zrIFsF%_vBM;2%`%W`1)VXjCn@-FrmtW<`NFu>+kkH6(U?lBRy{AD2O6Ke?scG2f=XZ zKkP@tw%9GD3sJ-%CE3~9F5bl>O!XkGlT7U4%mj(9BUqU!BXaW?<&Nq`!UgRIc)j@T zJ%(sMBQeugy&81>RA;9&q`J12A(hQ~?WRt4_5?F41V%r6NQt8?K>z~`Z;XLC5Uuhj z*ax33t<_-z4z0Q8_-i!9s&qv{P`_|bhKL?T`arfskF}iF4SIV^o-!Ao*_vk$IQx$F ztXF%x$KlA8{yStACR(}&u`VB(!+L5qEmk5B3!MJSMqP2pg0b*`QH-32ECe^jC4xQzpg)@A0d&xN^Lm zo;3A-IEf?nxey3FxnQx zv#s~v^2Jg^5Hi@lBsZ*45qc4>f$eZgg+E)9*sj=zMZluQKnTJ^_``b_25nQDF*O7p z-#X`T_KE{S3Ll9oYN`mDxtw;1*XS6Wu$c|?_8n zO`qN8mF=C9y=H!>wN?`AdixzP+OyQMp=w1$`%?III|G@*VI?Pjs_>xpoH(OF(5KHT zG1`o8tU_^53wqM=uH+3t#ru@{2vZDKO!%AuY5b6x_9YLD_#&^0m8>q5KGZXamfwBK z(bdYY-}^c_>^Z)M`lBa1vz$xM{ zxW}s5tE|4hRX7irse zc6BKj6W`*sl8&K0^{x)K))vqio~5SXzDpMe_udmsqacfXO}mpuj=vw@-9v;`dtDC+ zCMJ*w7ZVUe#7ljxrRK=V3oxWnE9#@v3Xjx|XW{Aas{5K4qMwZQS(`P6pV%&Sf2c-L z4b1-v{J!J^p7k2SWb>f5mZVV+-YrKyZhT4UFdq1i`(x<)t(910da*+NBe(wr?4Gt( zPF6g>-1&aYWyi4JnOX$cM6m$@`rhN5PPC*T|!s=*{aiFQ6} z)!>cPaP`JJDHkZn35`jLXLIzU`f#3ay}8QlP1bgpg_fYc~Y0~RzkueDsXpYKe~8F31zD|TEN(CBHgFAl5yz-bED28 zlZDH;myX>@bnw>259`Jj zilLuBp_zkI&1^B=mUwnR4Hz@iLx#a4!@c+tWG-K@HF`y%vRmuX*xeT z-J`==OhY|=;1g#pmt(w&y}OTpVvWDzpmDzJC-8(VZvj6&WG86jxe9c#l+PM$5{>9; z;0Q93Y1z)b$Sk`kyv0UkaHfz7YoQ%5jE;VT;=J*J2Jch>Mq+9bV=_nA6_Nd6R`k?l zmXfTc5LkIw$>mX_QJ; zpUI;{SkY($O?IcPzjE8Vkx%QTnNql)R}4H`T?@kX9W9*jBC>of zYw@8S%iE`OS1;c}?Q80*0H4aCD00_{PW`e^^A>u=<1$re#>G_Q_-s#%)0`>&M4N?f zr5llM(V>r%7{JKN@TQp{!Gyxds3Kk{}u6Q=xiH`4P_U0OP)qGEr6mRXr1YnQwL zl}s7|eHH2_Zm+^fL)6E56(qtt_tdRP%LH76+sqBa(nuXn>j-=$G_eb7%GI1~UnyE) zm(Q8t-nvTJemD_56BK(SGGt*z5VGVlin_DWkc=O}q+eE*LF~a{0bqW4kickhmzE}5 zk~$KK|7|iG_kvOr=M)@Hd_?haW^_V0&d;6u#XzH`fGFg-VZw5vvsaV`|8$!=-90Z} zcu9={bjn!saaG-+OXu5545v1q0k5rOI-yL9EX;$jy=X zGDJ;IZ0mDd(fpl=r^kX=Wm{EH8MdeO<}VU4@xMK~JcIY)Z{4ZyIm_3V6n;HuOt-Ps zl5F68P3e%3DtkVlHH4k%pO=?9(&|8X;z9QzxnkY>lgZsfE#np;(m8I9XOXX>UQMSk zpG)oTLl^bY_aq%*VGKn)(`{&WA=O7y5pBJMGY>~{99y;ZYbi_>S_xo=n;Mg0oc=FV zPSd+G$!)IPO}U$z&Ly%cRPlUkygBBB3c+^w@(Q@)SYl(`VqksiP1q|}7mFKQK5M)d zDl;iKM2p(Q_4pSfsEKg#Nws+d-am!ne7M4HCo;3wM@BG$-$Jn`%+czX82nbx$t8^! zcr9sn(|Yh2l?kj4)Z|i5VD}n->pjvW{*a_NKzhovH}C2+g8;0XG12IU({wn$@--6z z={?qRJyN6YPf~Le#o^7V)MS2f0SXNG(#WhZ12iNlV^ZIbwD-$s3{0{0SUy^kDwGC* zy`qS^#wzxP@BKD=;%}^O-8~fx>n`?+^6Ewj?2R1c(lnyQPtCz>stTd%cO`FUF=82n=UB+CGQTMiK=820cnx| z?Av&qtR;^i&KEdnDGRb>b`7k|LZbQasps<@p7S^)-p#Q9fLp)f{m0S7!`8~m(}U-i z?w6D;NpCi4mKW&Hb3`8lw^TiVG8v+7j`*`CIr9ugadum9^(lSc-Gfw$P$;492Ze&@ zGjb{Ip4;$XP5VH~Vl{W?x={|G3;cRf-U6pLmjM~8O*WiPsxt-2gIBjjuKSj^Q>INf ziF7=ip$~2Q=iZm@=9$|X^fLLM`dR0;q@5rOE-LdlXyKl+u>Wke%BU9_ zjNVmkIon;o^7!?v>`Gq#e8&fTm`0sAt==d_f->9i*9=nE6q|Klzqn>@N z&RcWqBnW+isxC#+-Jy$i@O@i(i5qh)ln&EeYlcceE8bjJjWpqa6)Ql_JbV>P`8E(x;*R5L$!^STzbk z(|e`E+VvgsJ?w6zM^e6+%DK|sp=J3V@$T9XMOYGp3~oAMLN=Dzq8r!F(0wY5NdAq; z?Ck?A!x(#e-hPKL&R9~v&Du0h%xD^k!d&X>(5+b8*vaojwRb;?Jo&B&YMCk;c%9=> z_n~spJ>QTZi=U7s40Qvc!seSQN-Y^kLvizuM-BtUQ+@;FKXynU6C)*=M=q9{?k=t# zJeDr*R=*V!{#9a-?Dk8XS8eA7b;2=jA`(x9uV!enccr@NI)ZJlT54a-IkgJ2DbLLX zy39C#klp7y&AeDim#sXz{{RLVQeb_Z%Zi3CR?4g8WVt6@=qOk*YNQJp^K_O1MO#PbCW~$NY81Ni#S4SE25D8jup)d zL8i@}(hqLlaH?IbMpkr*lW z0RId}b63}Y(TIfMZ!JAh`iJfnwug2?k3UZ>HZP7JTwl4WLf?P~Jt|9FghQ zQZYhPdDbQVGM~>Me~Fty1+w>s+leN7?0xdiI(8Ryb#tacWdV{85I=q{vH0HFlD|>* zQ#xrJKIz`mDq2LIz*oPOmuGM9Pj62#lRoE6FUsPUSoWXkq?Y~uY2Yw>riMkfpU2hc z)b5Ql?8P(@czfb0bZjuQw(UEDXq0HOuUP)9aP@IUf0Ex>-#I;UC4S8bP*Ay$($hcJ zXMbO>-}`^4)@rK$)xlpYNxvKZ=u?oS_)}f!XT!g$>wh-1}kUt!xlKlD#|2>ub zZ2EIb_lIc|*?*`0w*>EJFMo}xfA|0Z?jiHUzeU)e%@_W%2maZ7f$C4@zwL;csu;-C R1psi7R}eCYXVU(7`VUT^90~vc literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/excel/writertable.odt b/pandas/tests/io/data/excel/writertable.odt new file mode 100644 index 0000000000000000000000000000000000000000..113bd651e8cd0c4018c624b75096ab806e793d59 GIT binary patch literal 10313 zcmd6NWmsHGvo7wIpur`$2G>Ba;1FB`!{9o&TW}}1ySuxF5G28aySuv|*x$`b_CCqJ z_x!sx&swWT>Rqd9y5FkqYB@nI{%#F?UZJj{+ z01(K`7znU6wzOchx6os-1lSncFj#{0E%YpbcINsPwhXrVj<#}t1Cziiiw}hc1A97f z9>Eli?96p70LErEjJ7|T3?K``KslMW$OyOyk3T^c7Za9$ynX=#0|$eHd~7XvUs*q1 z+U2AaMJOpLsi~=1Sy{QbxR8*<5D?TbF_m#}6v4oBz`%@gag9kxKte)7Qc_Y13JMw; z8oIi=rlzJgHa4!Vu3%uE2nast=z;k7p`@e{)YQ=|ED1b3DFOm%!onGnk~wm6d5Ve! z8XCpAy1u%)LAtsh&CO#$pg@|DJeTUyR@`4C#SZm zs;afM)!)A}DCqN}eEs@$baZrjdU|DLWp{V?uFsJ}=;Wvto z(|glf7Bid}Fhk?1ZgfGUd#nY|I``>7qO*R2;z7~#4>h6^Xs5Dd=mFe;BL^m4W@uR9 z3{Lcn2iF|W4%)#Wk(mB@x_sUnc5`@H9up3mvWfK>&Ub{gnMIJjc?&A9GAd=som?-{ z&XnySwu8!UNoC8e%0wj*w^1Cg%|d$E;$;Zmy6awa-E&d+RceS`g=9 z7dD|$Xi9S|q$j9=u0wBB&DS7aR7jRex(5t3ZDyTx*u^jFcpe?3IUQ~JPTvjdzq+4Z z?_y;9zCRJ&T|ZCcFf(`FxmTXVy|-`QHb+?3G&kwY>BtWBuxXhgs9o6!&1-X}Sh*eC zs^(vqpPO?_=HA=$-?IwnbP5>74>>vC25}OEIcyAWEY+lat>+`K9N1X7;4%m)5rVl4AvBnboM+hYhan`UE+&s5 zTS6w(|81y0d5Spule8kckH>27VOG9#gp|0NDjuB3|YgIGWx!& z2_cm<2aFsAxv)yu284>^1KeGv9W+htg<0tsywq*n{Yf28xX?QX+Wt0{9a!dH=dF5 zqVv6`?fiuKCCY`;oe_mr<#1Q2FAC9a0oz|eZ0CQG#*dK6C4KT0h^5P zPSbfVL{Wam?ymfNJmPd{Vd)*N{R);bZWz!~9ye9$0$H0RYi=`EF-)+{+;iU?JzUd&)Gi|LL} z)aLcMqQ|5y5->TjRk2l*93ikd@jJjSJc{IZsTFb)&B`^UZe5v1FM;qe2q6a_hFd}( za7E{Xh8oeLPI?(vSh^?X?|s(Zt8HbmmN3o9R}I&OB6j2quu;>ojuwDyRDsree~^OCTyRaiTy*fMQ%=HRk4xF>ASo0w+DT97Ux~2SRyamM&1<8IY7t!_n=y!p&30*|HeLQ2hVbi~BORQ62MK)8o(&%89o+%)h& zUmKXCQ6=JVXE@$hYuEE`enNE%BEQz@^EK`Q;3Vf&Ry+!E29|-F$QiE>$Nt>jQg|wJwHU=EuOfMk~f&3 z$TOKyqnY!N3K~Uy9b83!9$cgWx0AV%0f)kkWU+7bXF7S#o#F#K_*HGpM3VDao)+|yj^gH;Fhn)avDSMxobi1 za=vf-y<&VOKx~8&D^0(1ecv8+;)9$X-^+~^ zGOOkU!?fdnSmXsa0P)@jV%L{5UH5#tcx|j4*jiLyHb=zug@X!HXu?asNl^k$%TljY z;UwMXeTYI2w(|3j=p>pR7j%~JX8er-1IY0r^igiU%oMHWG>OA0;Go#_>fDddUbO4B z2__rG5}_sc+IyI#Pmr;C_dm-}!*o=&8W&t%jfZmwMsC@rTrswBbJ%Pk%k*eGXVjyV znWJzc-odO`5INJurzbBLjBFouFC-jlGrgLliR(0JC4E-K9jknu6!(S*$N-P!Yb_&a zm+NT9{z3~)C!H&dR6^Hi80skAtdIAAFu@AR`uk6UNG=lXR3ZtU=p|peB^?6n*BfC@?zKg5A zS_NfuucI>MNT=%<;pN6$!?>iWlnW~?p+FM>OKZ8HpL;L1;RruqWrMMtN4)mp|-!nPJ zFh3{obg+OmbmUuFA)Cx)UD@JbIvd2@Nl(sH-ubZv8&R72TGT%2Mw)R^^X1ev9oi0bnwEW#-u8e(o-zdnF}Offn9FTz zW-EGewuFhHEjPfGyXnJDjOQmQeI}0ZJK>lvip=Y=vVl_KxCpViGjCbLn9i*K5l;A4b$AGqNnNZ0D4G zda5-eJl?#<*B`;9-B^Low1Ael8wkOK1!39{NYRgsXoY>FCp z@1&NZpv^6tVzZr0xszNn+E-xn-e4oS-h=6aF>Lu>7N@w!;PAHyg@4402Z@Wb+eJH1bNl5Z++>y ze0@{qR{;;4jE6@?l$PJ{GI8%MzGHp)oSRHqNN83Mdw-WKG4M%0y_}|*0pYD$Fj@H} zBoDYmiYL8-nyQ{ohBo+6Tkr(KDi$XyAZfhebqk$ zbheO6+Vuij6OZnGv>4=;bI5(*JVUQ~x6VnUxD=&q%2S#2?9A^bGSgJ$t}MDB)>VNo z%UdWr4xN2x zeatM=^BgDfBS%yjbN#AXg_xt%I^yTiXx>nWo}y$A*S3&Q@bcN8or*aMLQw4k=a5-j z?;E`+mK^wBuj?MSN3U1b2QLS!u>wR$72cceXKJ_I3R`3mEj(52FR?Zn*%71p(iz)4lv-tU%e}2_l93^M zT54s@nyY4HdMSIH)-Syauo& zK0{3cH;SZJ%4z__%q(eTuXHwv3C$!Y?g>a)b3VOH4A(KArj=d~M66BCB^1a4TMiXYY49 zoj-_3@R@s+1y4pT>Er$|pj3N(^!ZMd@*pz}19(wAWX0-5x*lWAfD%fVSS>0*GF1+N z1?!Y^p}Aw^>>L}8A5`J%XpL&1HmK*3k@MYSkESt?l0c3*Ped|nmBMS}LIAT4-}Qry zMa_mAWXrQsF0=QG>S$gY&>QKUjHU1d^bfNu2g3oWY5;p0j*HS&}5?>rUV)rvF zU$U=@l-+{pI6!j5pi*7oI94wOf+>lvHt}LM8JpltVsoVSMO9UmmH`z4iBGt^<^2JYqslUOY7700f9NtioPi9z0rb~zo;-YTYf z$aF=lf{;GCk^Wku2c#1(W2hU2QN`3SGOw<$TSR-R0S4N}#0@TqfzAPmb^^`rw)4hI z5Nk$PLbavT5VGa-L59*k^5laom0?m^TsA=IT?OD}2;gBK&GU`Q~NZe85BlHI{ z8o}=O_-r4|Vgg>-Wuds)Ms}hh6nN(Pwg#`4(tzOH?@V2&>Wa%rC_dy;L-lZ*Ao?wG zl})fSqxNZij8DNsLqrh(OBZM#qzt0u6z5V5fuKy#(q!0Bk;bjFDS*mP!gnXT_26F; z>U*WV*vP9$m)A@@8RE=L7*#X7W&U;zdTQ0#H+=*FOTM?|TvgKd%4^n}04Ct2QNhP4 z#*}!fh65=SS#IOon2Jc827p2E0VWgrpftYB=A|2|Q;d9!sQ1`sSXnt_5eS>Y-tv{B z`vD`|)$>KrVY<7%=(QF#lbet) z68lV5@YLR?K23)hu(hQH0?fITdK7OyF^qHKH(jZOrwuf$a=K=jR|qFJMCVOiyOper zC2wQUV}^qV7u?e`6zbzRBykHvTTFSh)@f8kshvDb*WQGih62)(MuOgRh`UiF+&qV> z9grUqiV%#FyN!f$v)@O~ojW#o&Azu=0QedqTo2iQ&r6sa4oY+!xR(7+7x~=TY9aMt4JXjQ_$-QnH%-nOm6cwt9p*Z+ME&dK=LS$H~e^|bC8Do z7fAGr7)Wl^wg*|>`*KCud|wsYkZM&uwdszFV)*la4R9h}FayFnq+-6s9TpM?Av={hbp1XXk zy1bCVOC^9i$70$doDN$ks@y9gu%j25Ud}{!96${;4N~{b>)PV#aj4D5n%`)`KvJ6> z43q#q=kG2;i4kREZq<(B0Rkx|A=zWB42?bHvRLfjYPNIj^2Vu;jV6XrsvFwimb+7+ zLcaedd!l;}`*_$Ocp?yE_R z!%j5eG@>kSo*`!oeI!!COn;qmMe&xyCkNm=UP?NN;d`}e}qXoIf|3S4zXf}MR4N+dVdy-}Cuvl)dF8z0f^RGNZwp2< z)p}F&9O-Y)KefFqm)&8cJz88Y90upzl)>!Q$We;VVGgoJC%8!FsN;welHRC5^4gB0 z+R!4i9!PFhTe}pqzE&+mX%l9vL#Bo9m*7wKhPhsvlJ@j&3l$tkS}ccAHm+#idzblQ z{%Ui~SpJw{MEhz`nG_lTqiWDKWv5=!}r1s;cH3>@}x{MaW|Qgl8Qo&yh1@!Ks$l|I1J*7 z)%AH-o81UqGHja9h;xwg+yO-1jDnAJYa$_kgQYXreU6Y z7k)wP0E}F~=_#p_3*X(Ox$Mkm4e-3_&9wHCrV<2xHzR%O4^!(0VOgEWw|F~(?8?@b zL-agxXrMWoozm|_B5D=7&yHSh+Z@=HjnhWbB?5c_XZqCH3OS>q-*l^!gx#%p>vXX2 z-ztbVeBEOh5maE=cir#hJ9?Lzk@Plz(CzXT^{HEMsH8P4frEiDK>b^{Kzd4o^=)m9 zEe!ukgIm-WY^FKU9#cosP8SKmC`~}5EfO_7Jv>rwzvhBZizkZO&c{`>`su8#aV* zD8Eoi3M2xNzYLLG2fUAKj9O?l(UY)`FfQeZBv0p|*w#U?zER(}8YRf1YdfZ@*VJN* z{gRzxVAJxxO|8w%+hmy6ylzI?!QsZp$9e#}Oeu!{P7{;#HFYK9YOqzGIwhrH|EqyKJoE8@XjP3ZzFls1 zQr;pP=bEKg=YA(XO{LosU$wh0{~|sm4zuBtz;t8UD`Xs2g4~ydh-i z*>c>MX(Hf0fuR3%^T4XNqhY}`l|`s|XM@sQOwN*nz4Lr2LoE{B-brvZ$+uDAFLs0Ah41&7ai5}EcZ=7D|R`|O2kJHDOD=>^XE+L5&A zj$cx@1lM!plxk1s43c^m>g`ctnzIa%zy%rGT+PKQuwf|Uo&M$y@e8$Gs z7C~*0FZdvCMWFDU1?bC74(SL=jIy-(67N(%eswKwA16}e`$=kleM^XXi-Zf^w~P-; zKGSM{1~Scg(&3n&CYHfz)%1#?|N7J_jc>jM%J7{}QW;7#hN3p$`G;QnqW+FgZV2H9 zbgEF27_W8>iiGmvM-$|l&G z^smx}`s+D?r&+B-dt=$~#csdK52(kE$qGg!Di6WU;|<)SL=x&f9rN`esDB@uloQnceXsRprlEv=l!VcSei2e`41|J$$zq!bgtAyx6B;v<0tP#@mizBSuI_ zir(+=g6FC?3{sQJw6(cvqy4G2XAm&;U`~qN9ig32j?}`><97~cHtEGhgpJGn#U_2hz>cy&Lq)Sy%)R2t+qL99lVV zGs4t`0hNGe>-tb^m?{AWBKU6APl`I|n0~A(jl_VaQ05YX1hfYdHn$7!g_GEju>%G9 z2~PS|$)>9&ZikO?k=m;TZ2TPR<^EAv;B$bYW;z&7;UZ+A+#oh8QK7c6xH}Ar;&8Z$ZyvKNIplUAak5Qx2 z`M$!RDwtN1TU-PYDhbcy8y81rNIWzTxgKG67)+&6mU;Y@f4sGNxx@$q2IfxvBhdUP zqW>tEJY8`g4%Yq9Hq4A5OOPGtv1V?^`1gAGuX;qk(K9v&80yo zllB1kJ;9av9g3_#YVtlYqna&R?k0wYoEh%v$y!>XM1ms|@0>K6Mh6QTF6~%fdJZJB z!KzFu$HWs}*=Ev?F-|Knq%YNBaPWkog2rd7uDDzS%kTy&?`(I{Bb+XX`GV)T3(_kQ z0JZWh{>$Hq|XY%SC(uNIA&IcgnuDxGu zDc{%hUT}>ua^43z+l_V|Es#a)Bs1^JZa48bJ#5@F3_iSAE~~Z|HWL`gGQ5u1s6HlA zdRz9CqE%lQjX*!9Xx@+eJN~>*iTUV0X%WRY^x`t2jQ{P&$27hqO46j03C-_<%Xhd| z)uf8%lMI$uDlwlYO*nxUEAu$j=ejbp$jKZW-9wAi2UlFtuA_A`dtU~eaPphQ13Jj> zt<00ryEnHn9+fKq6;9t=KY?R_UIA+7D81UQoHM3WL_R*0xcW6#`7t-QK%~dc8t2wl zKiK(h`PW>hnu>_L`_i<+9V7F3n#O1;x)Q1{^?ptLpXPl(r3>qEDE^;W;6Jr~`%`8g z+2Yqp!Vk?q@%&Fskw^OeCmsIE`hR!#PdzZOC*J=lZok3#A5q}XU{Cz|QyPB*_G?u5 zGtN_7_$kM~!TEcX_%qTI|Nay@;{WNUpHbt_D1Wz0_YWvPqR5|d{%%*|Z*cw|RsM|h zce}p-2I*&%`F}-0Ao-u(`5ATogYsjn{Yt5S@5BO+DF4l#k5TAHC;p-ND|`se) literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/feather/feather-0_3_1.feather b/pandas/tests/io/data/feather/feather-0_3_1.feather new file mode 100644 index 0000000000000000000000000000000000000000..5a2c7b3dcc684b3676a94287702ea01aefa057af GIT binary patch literal 672 zcmZvaF;2r!42GRH6%j;Hh9YHPs1O4KL)q8}5_IMO2&n~CAXRkf$jr*b0gw=fKs^*E zAbkIOF+@;D_Otzd|8|^AF3ye(Nn|8HPIP-QYtvcl*z0)x_%bv*zkYri?8fZpX?t6H zxz_sISSiy5(TliUEtb!#> zF9YhAU;SJ@TFyNZ>fxdp=8NTY#aTB^!?9gsBtEC}yvO@q#oZA)Lc3hT-$In}#Q8{= z_3vU`Iu7A~UhS%)Ca0=oYemE*>a##cXt$5{aKpsvqrCR3eU%_L>}5wh5`Q0$OWWUs wPyBryFa9V6J>vJ&m(`r=t6g;GlRsN7H9B~0T=zM4NF3)??l1odi3$JaHx?%;)c^nh literal 0 HcmV?d00001 diff --git a/pandas/tests/io/data/fixed_width/fixed_width_format.txt b/pandas/tests/io/data/fixed_width/fixed_width_format.txt new file mode 100644 index 00000000..bb487d8d --- /dev/null +++ b/pandas/tests/io/data/fixed_width/fixed_width_format.txt @@ -0,0 +1,3 @@ +A B C +1 2 3 +4 5 6 diff --git a/pandas/tests/io/data/gbq_fake_job.txt b/pandas/tests/io/data/gbq_fake_job.txt new file mode 100644 index 00000000..b0995222 --- /dev/null +++ b/pandas/tests/io/data/gbq_fake_job.txt @@ -0,0 +1 @@ +{'status': {'state': 'DONE'}, 'kind': 'bigquery#job', 'statistics': {'query': {'cacheHit': True, 'totalBytesProcessed': '0'}, 'endTime': '1377668744674', 'totalBytesProcessed': '0', 'startTime': '1377668744466'}, 'jobReference': {'projectId': '57288129629', 'jobId': 'bqjob_r5f956972f0190bdf_00000140c374bf42_2'}, 'etag': '"4PTsVxg68bQkQs1RJ1Ndewqkgg4/oO4VmgFrAku4N6FWci9s7iFIftc"', 'configuration': {'query': {'createDisposition': 'CREATE_IF_NEEDED', 'query': 'SELECT * FROM [publicdata:samples.shakespeare]', 'writeDisposition': 'WRITE_TRUNCATE', 'destinationTable': {'projectId': '57288129629', 'tableId': 'anonb5ec450da88eeeb78a27784ea482ee75a146d442', 'datasetId': '_d0b4f5f0d50dc68a3eb0fa6cba66a9a8687d9253'}}}, 'id': '57288129629:bqjob_r5f956972f0190bdf_00000140c374bf42_2', 'selfLink': 'https://www.googleapis.com/bigquery/v2/projects/57288129629/jobs/bqjob_r5f956972f0190bdf_00000140c374bf42_2'} \ No newline at end of file diff --git a/pandas/tests/io/data/html/banklist.html b/pandas/tests/io/data/html/banklist.html new file mode 100644 index 00000000..a0562989 --- /dev/null +++ b/pandas/tests/io/data/html/banklist.html @@ -0,0 +1,4886 @@ + + + + +FDIC: Failed Bank List + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip Header +

    255{40 + h}31
    '.format(unit=unit) + assert expected in h + + +def test_html_repr_min_rows_default(datapath): + # gh-27991 + + # default setting no truncation even if above min_rows + df = pd.DataFrame({"a": range(20)}) + result = df._repr_html_() + expected = expected_html(datapath, "html_repr_min_rows_default_no_truncation") + assert result == expected + + # default of max_rows 60 triggers truncation if above + df = pd.DataFrame({"a": range(61)}) + result = df._repr_html_() + expected = expected_html(datapath, "html_repr_min_rows_default_truncated") + assert result == expected + + +@pytest.mark.parametrize( + "max_rows,min_rows,expected", + [ + # truncated after first two rows + (10, 4, "html_repr_max_rows_10_min_rows_4"), + # when set to None, follow value of max_rows + (12, None, "html_repr_max_rows_12_min_rows_None"), + # when set value higher as max_rows, use the minimum + (10, 12, "html_repr_max_rows_10_min_rows_12"), + # max_rows of None -> never truncate + (None, 12, "html_repr_max_rows_None_min_rows_12"), + ], +) +def test_html_repr_min_rows(datapath, max_rows, min_rows, expected): + # gh-27991 + + df = pd.DataFrame({"a": range(61)}) + expected = expected_html(datapath, expected) + with option_context("display.max_rows", max_rows, "display.min_rows", min_rows): + result = df._repr_html_() + assert result == expected diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py new file mode 100644 index 00000000..bd681032 --- /dev/null +++ b/pandas/tests/io/formats/test_to_latex.py @@ -0,0 +1,884 @@ +import codecs +from datetime import datetime + +import pytest + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm + + +class TestToLatex: + def test_to_latex_filename(self, float_frame): + with tm.ensure_clean("test.tex") as path: + float_frame.to_latex(path) + + with open(path, "r") as f: + assert float_frame.to_latex() == f.read() + + # test with utf-8 and encoding option (GH 7061) + df = DataFrame([["au\xdfgangen"]]) + with tm.ensure_clean("test.tex") as path: + df.to_latex(path, encoding="utf-8") + with codecs.open(path, "r", encoding="utf-8") as f: + assert df.to_latex() == f.read() + + # test with utf-8 without encoding option + with tm.ensure_clean("test.tex") as path: + df.to_latex(path) + with codecs.open(path, "r", encoding="utf-8") as f: + assert df.to_latex() == f.read() + + def test_to_latex(self, float_frame): + # it works! + float_frame.to_latex() + + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + withindex_result = df.to_latex() + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(index=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule + a & b \\ +\midrule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + def test_to_latex_format(self, float_frame): + # GH Bug #9402 + float_frame.to_latex(column_format="ccc") + + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + withindex_result = df.to_latex(column_format="ccc") + withindex_expected = r"""\begin{tabular}{ccc} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + def test_to_latex_empty(self): + df = DataFrame() + result = df.to_latex() + expected = r"""\begin{tabular}{l} +\toprule +Empty DataFrame +Columns: Index([], dtype='object') +Index: Index([], dtype='object') \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + result = df.to_latex(longtable=True) + expected = r"""\begin{longtable}{l} +\toprule +Empty DataFrame +Columns: Index([], dtype='object') +Index: Index([], dtype='object') \\ +\end{longtable} +""" + assert result == expected + + def test_to_latex_with_formatters(self): + df = DataFrame( + { + "datetime64": [ + datetime(2016, 1, 1), + datetime(2016, 2, 5), + datetime(2016, 3, 3), + ], + "float": [1.0, 2.0, 3.0], + "int": [1, 2, 3], + "object": [(1, 2), True, False], + } + ) + + formatters = { + "datetime64": lambda x: x.strftime("%Y-%m"), + "float": lambda x: "[{x: 4.1f}]".format(x=x), + "int": lambda x: "0x{x:x}".format(x=x), + "object": lambda x: "-{x!s}-".format(x=x), + "__index__": lambda x: "index: {x}".format(x=x), + } + result = df.to_latex(formatters=dict(formatters)) + + expected = r"""\begin{tabular}{llrrl} +\toprule +{} & datetime64 & float & int & object \\ +\midrule +index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\ +index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\ +index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + def test_to_latex_multiindex(self): + df = DataFrame({("x", "y"): ["a"]}) + result = df.to_latex() + expected = r"""\begin{tabular}{ll} +\toprule +{} & x \\ +{} & y \\ +\midrule +0 & a \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + result = df.T.to_latex() + expected = r"""\begin{tabular}{lll} +\toprule + & & 0 \\ +\midrule +x & y & a \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + df = DataFrame.from_dict( + { + ("c1", 0): pd.Series({x: x for x in range(4)}), + ("c1", 1): pd.Series({x: x + 4 for x in range(4)}), + ("c2", 0): pd.Series({x: x for x in range(4)}), + ("c2", 1): pd.Series({x: x + 4 for x in range(4)}), + ("c3", 0): pd.Series({x: x for x in range(4)}), + } + ).T + result = df.to_latex() + expected = r"""\begin{tabular}{llrrrr} +\toprule + & & 0 & 1 & 2 & 3 \\ +\midrule +c1 & 0 & 0 & 1 & 2 & 3 \\ + & 1 & 4 & 5 & 6 & 7 \\ +c2 & 0 & 0 & 1 & 2 & 3 \\ + & 1 & 4 & 5 & 6 & 7 \\ +c3 & 0 & 0 & 1 & 2 & 3 \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + # GH 14184 + df = df.T + df.columns.names = ["a", "b"] + result = df.to_latex() + expected = r"""\begin{tabular}{lrrrrr} +\toprule +a & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\ +b & 0 & 1 & 0 & 1 & 0 \\ +\midrule +0 & 0 & 4 & 0 & 4 & 0 \\ +1 & 1 & 5 & 1 & 5 & 1 \\ +2 & 2 & 6 & 2 & 6 & 2 \\ +3 & 3 & 7 & 3 & 7 & 3 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + # GH 10660 + df = pd.DataFrame({"a": [0, 0, 1, 1], "b": list("abab"), "c": [1, 2, 3, 4]}) + result = df.set_index(["a", "b"]).to_latex() + expected = r"""\begin{tabular}{llr} +\toprule + & & c \\ +a & b & \\ +\midrule +0 & a & 1 \\ + & b & 2 \\ +1 & a & 3 \\ + & b & 4 \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + result = df.groupby("a").describe().to_latex() + expected = r"""\begin{tabular}{lrrrrrrrr} +\toprule +{} & \multicolumn{8}{l}{c} \\ +{} & count & mean & std & min & 25\% & 50\% & 75\% & max \\ +a & & & & & & & & \\ +\midrule +0 & 2.0 & 1.5 & 0.707107 & 1.0 & 1.25 & 1.5 & 1.75 & 2.0 \\ +1 & 2.0 & 3.5 & 0.707107 & 3.0 & 3.25 & 3.5 & 3.75 & 4.0 \\ +\bottomrule +\end{tabular} +""" + + assert result == expected + + def test_to_latex_multiindex_dupe_level(self): + # see gh-14484 + # + # If an index is repeated in subsequent rows, it should be + # replaced with a blank in the created table. This should + # ONLY happen if all higher order indices (to the left) are + # equal too. In this test, 'c' has to be printed both times + # because the higher order index 'A' != 'B'. + df = pd.DataFrame( + index=pd.MultiIndex.from_tuples([("A", "c"), ("B", "c")]), columns=["col"] + ) + result = df.to_latex() + expected = r"""\begin{tabular}{lll} +\toprule + & & col \\ +\midrule +A & c & NaN \\ +B & c & NaN \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + def test_to_latex_multicolumnrow(self): + df = pd.DataFrame( + { + ("c1", 0): {x: x for x in range(5)}, + ("c1", 1): {x: x + 5 for x in range(5)}, + ("c2", 0): {x: x for x in range(5)}, + ("c2", 1): {x: x + 5 for x in range(5)}, + ("c3", 0): {x: x for x in range(5)}, + } + ) + result = df.to_latex() + expected = r"""\begin{tabular}{lrrrrr} +\toprule +{} & \multicolumn{2}{l}{c1} & \multicolumn{2}{l}{c2} & c3 \\ +{} & 0 & 1 & 0 & 1 & 0 \\ +\midrule +0 & 0 & 5 & 0 & 5 & 0 \\ +1 & 1 & 6 & 1 & 6 & 1 \\ +2 & 2 & 7 & 2 & 7 & 2 \\ +3 & 3 & 8 & 3 & 8 & 3 \\ +4 & 4 & 9 & 4 & 9 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + result = df.to_latex(multicolumn=False) + expected = r"""\begin{tabular}{lrrrrr} +\toprule +{} & c1 & & c2 & & c3 \\ +{} & 0 & 1 & 0 & 1 & 0 \\ +\midrule +0 & 0 & 5 & 0 & 5 & 0 \\ +1 & 1 & 6 & 1 & 6 & 1 \\ +2 & 2 & 7 & 2 & 7 & 2 \\ +3 & 3 & 8 & 3 & 8 & 3 \\ +4 & 4 & 9 & 4 & 9 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + result = df.T.to_latex(multirow=True) + expected = r"""\begin{tabular}{llrrrrr} +\toprule + & & 0 & 1 & 2 & 3 & 4 \\ +\midrule +\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +c3 & 0 & 0 & 1 & 2 & 3 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + df.index = df.T.index + result = df.T.to_latex(multirow=True, multicolumn=True, multicolumn_format="c") + expected = r"""\begin{tabular}{llrrrrr} +\toprule + & & \multicolumn{2}{c}{c1} & \multicolumn{2}{c}{c2} & c3 \\ + & & 0 & 1 & 0 & 1 & 0 \\ +\midrule +\multirow{2}{*}{c1} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +\multirow{2}{*}{c2} & 0 & 0 & 1 & 2 & 3 & 4 \\ + & 1 & 5 & 6 & 7 & 8 & 9 \\ +\cline{1-7} +c3 & 0 & 0 & 1 & 2 & 3 & 4 \\ +\bottomrule +\end{tabular} +""" + assert result == expected + + def test_to_latex_escape(self): + a = "a" + b = "b" + + test_dict = {"co$e^x$": {a: "a", b: "b"}, "co^l1": {a: "a", b: "b"}} + + unescaped_result = DataFrame(test_dict).to_latex(escape=False) + escaped_result = DataFrame(test_dict).to_latex() # default: escape=True + + unescaped_expected = r"""\begin{tabular}{lll} +\toprule +{} & co$e^x$ & co^l1 \\ +\midrule +a & a & a \\ +b & b & b \\ +\bottomrule +\end{tabular} +""" + + escaped_expected = r"""\begin{tabular}{lll} +\toprule +{} & co\$e\textasciicircum x\$ & co\textasciicircum l1 \\ +\midrule +a & a & a \\ +b & b & b \\ +\bottomrule +\end{tabular} +""" + + assert unescaped_result == unescaped_expected + assert escaped_result == escaped_expected + + def test_to_latex_special_escape(self): + df = DataFrame([r"a\b\c", r"^a^b^c", r"~a~b~c"]) + + escaped_result = df.to_latex() + escaped_expected = r"""\begin{tabular}{ll} +\toprule +{} & 0 \\ +\midrule +0 & a\textbackslash b\textbackslash c \\ +1 & \textasciicircum a\textasciicircum b\textasciicircum c \\ +2 & \textasciitilde a\textasciitilde b\textasciitilde c \\ +\bottomrule +\end{tabular} +""" + assert escaped_result == escaped_expected + + def test_to_latex_longtable(self): + + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + withindex_result = df.to_latex(longtable=True) + withindex_expected = r"""\begin{longtable}{lrl} +\toprule +{} & a & b \\ +\midrule +\endhead +\midrule +\multicolumn{3}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\end{longtable} +""" + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(index=False, longtable=True) + withoutindex_expected = r"""\begin{longtable}{rl} +\toprule + a & b \\ +\midrule +\endhead +\midrule +\multicolumn{2}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot + 1 & b1 \\ + 2 & b2 \\ +\end{longtable} +""" + + assert withoutindex_result == withoutindex_expected + + df = DataFrame({"a": [1, 2]}) + with1column_result = df.to_latex(index=False, longtable=True) + assert r"\multicolumn{1}" in with1column_result + + df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}) + with3columns_result = df.to_latex(index=False, longtable=True) + assert r"\multicolumn{3}" in with3columns_result + + def test_to_latex_caption_label(self): + # GH 25436 + the_caption = "a table in a \\texttt{table/tabular} environment" + the_label = "tab:table_tabular" + + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + + # test when only the caption is provided + result_c = df.to_latex(caption=the_caption) + + expected_c = r"""\begin{table} +\centering +\caption{a table in a \texttt{table/tabular} environment} +\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +\end{table} +""" + assert result_c == expected_c + + # test when only the label is provided + result_l = df.to_latex(label=the_label) + + expected_l = r"""\begin{table} +\centering +\label{tab:table_tabular} +\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +\end{table} +""" + assert result_l == expected_l + + # test when the caption and the label are provided + result_cl = df.to_latex(caption=the_caption, label=the_label) + + expected_cl = r"""\begin{table} +\centering +\caption{a table in a \texttt{table/tabular} environment} +\label{tab:table_tabular} +\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +\end{table} +""" + assert result_cl == expected_cl + + def test_to_latex_longtable_caption_label(self): + # GH 25436 + the_caption = "a table in a \\texttt{longtable} environment" + the_label = "tab:longtable" + + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + + # test when only the caption is provided + result_c = df.to_latex(longtable=True, caption=the_caption) + + expected_c = r"""\begin{longtable}{lrl} +\caption{a table in a \texttt{longtable} environment}\\ +\toprule +{} & a & b \\ +\midrule +\endhead +\midrule +\multicolumn{3}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\end{longtable} +""" + assert result_c == expected_c + + # test when only the label is provided + result_l = df.to_latex(longtable=True, label=the_label) + + expected_l = r"""\begin{longtable}{lrl} +\label{tab:longtable}\\ +\toprule +{} & a & b \\ +\midrule +\endhead +\midrule +\multicolumn{3}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\end{longtable} +""" + assert result_l == expected_l + + # test when the caption and the label are provided + result_cl = df.to_latex(longtable=True, caption=the_caption, label=the_label) + + expected_cl = r"""\begin{longtable}{lrl} +\caption{a table in a \texttt{longtable} environment}\label{tab:longtable}\\ +\toprule +{} & a & b \\ +\midrule +\endhead +\midrule +\multicolumn{3}{r}{{Continued on next page}} \\ +\midrule +\endfoot + +\bottomrule +\endlastfoot +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\end{longtable} +""" + assert result_cl == expected_cl + + def test_to_latex_escape_special_chars(self): + special_characters = ["&", "%", "$", "#", "_", "{", "}", "~", "^", "\\"] + df = DataFrame(data=special_characters) + observed = df.to_latex() + expected = r"""\begin{tabular}{ll} +\toprule +{} & 0 \\ +\midrule +0 & \& \\ +1 & \% \\ +2 & \$ \\ +3 & \# \\ +4 & \_ \\ +5 & \{ \\ +6 & \} \\ +7 & \textasciitilde \\ +8 & \textasciicircum \\ +9 & \textbackslash \\ +\bottomrule +\end{tabular} +""" + + assert observed == expected + + def test_to_latex_no_header(self): + # GH 7124 + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + withindex_result = df.to_latex(header=False) + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(index=False, header=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + def test_to_latex_specified_header(self): + # GH 7124 + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + withindex_result = df.to_latex(header=["AA", "BB"]) + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & AA & BB \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + withoutindex_result = df.to_latex(header=["AA", "BB"], index=False) + withoutindex_expected = r"""\begin{tabular}{rl} +\toprule +AA & BB \\ +\midrule + 1 & b1 \\ + 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutindex_result == withoutindex_expected + + withoutescape_result = df.to_latex(header=["$A$", "$B$"], escape=False) + withoutescape_expected = r"""\begin{tabular}{lrl} +\toprule +{} & $A$ & $B$ \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withoutescape_result == withoutescape_expected + + with pytest.raises(ValueError): + df.to_latex(header=["A"]) + + def test_to_latex_decimal(self, float_frame): + # GH 12031 + float_frame.to_latex() + + df = DataFrame({"a": [1.0, 2.1], "b": ["b1", "b2"]}) + withindex_result = df.to_latex(decimal=",") + + withindex_expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1,0 & b1 \\ +1 & 2,1 & b2 \\ +\bottomrule +\end{tabular} +""" + + assert withindex_result == withindex_expected + + def test_to_latex_series(self): + s = Series(["a", "b", "c"]) + withindex_result = s.to_latex() + withindex_expected = r"""\begin{tabular}{ll} +\toprule +{} & 0 \\ +\midrule +0 & a \\ +1 & b \\ +2 & c \\ +\bottomrule +\end{tabular} +""" + assert withindex_result == withindex_expected + + def test_to_latex_bold_rows(self): + # GH 16707 + df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + observed = df.to_latex(bold_rows=True) + expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +\textbf{0} & 1 & b1 \\ +\textbf{1} & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected + + def test_to_latex_no_bold_rows(self): + # GH 16707 + df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + observed = df.to_latex(bold_rows=False) + expected = r"""\begin{tabular}{lrl} +\toprule +{} & a & b \\ +\midrule +0 & 1 & b1 \\ +1 & 2 & b2 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected + + @pytest.mark.parametrize("name0", [None, "named0"]) + @pytest.mark.parametrize("name1", [None, "named1"]) + @pytest.mark.parametrize("axes", [[0], [1], [0, 1]]) + def test_to_latex_multiindex_names(self, name0, name1, axes): + # GH 18667 + names = [name0, name1] + mi = pd.MultiIndex.from_product([[1, 2], [3, 4]]) + df = pd.DataFrame(-1, index=mi.copy(), columns=mi.copy()) + for idx in axes: + df.axes[idx].names = names + + idx_names = tuple(n or "{}" for n in names) + idx_names_row = ( + "{idx_names[0]} & {idx_names[1]} & & & & \\\\\n".format( + idx_names=idx_names + ) + if (0 in axes and any(names)) + else "" + ) + placeholder = "{}" if any(names) and 1 in axes else " " + col_names = [n if (bool(n) and 1 in axes) else placeholder for n in names] + observed = df.to_latex() + expected = r"""\begin{tabular}{llrrrr} +\toprule + & %s & \multicolumn{2}{l}{1} & \multicolumn{2}{l}{2} \\ + & %s & 3 & 4 & 3 & 4 \\ +%s\midrule +1 & 3 & -1 & -1 & -1 & -1 \\ + & 4 & -1 & -1 & -1 & -1 \\ +2 & 3 & -1 & -1 & -1 & -1 \\ + & 4 & -1 & -1 & -1 & -1 \\ +\bottomrule +\end{tabular} +""" % tuple( + list(col_names) + [idx_names_row] + ) + assert observed == expected + + @pytest.mark.parametrize("one_row", [True, False]) + def test_to_latex_multiindex_nans(self, one_row): + # GH 14249 + df = pd.DataFrame({"a": [None, 1], "b": [2, 3], "c": [4, 5]}) + if one_row: + df = df.iloc[[0]] + observed = df.set_index(["a", "b"]).to_latex() + expected = r"""\begin{tabular}{llr} +\toprule + & & c \\ +a & b & \\ +\midrule +NaN & 2 & 4 \\ +""" + if not one_row: + expected += r"""1.0 & 3 & 5 \\ +""" + expected += r"""\bottomrule +\end{tabular} +""" + assert observed == expected + + def test_to_latex_non_string_index(self): + # GH 19981 + observed = pd.DataFrame([[1, 2, 3]] * 2).set_index([0, 1]).to_latex() + expected = r"""\begin{tabular}{llr} +\toprule + & & 2 \\ +0 & 1 & \\ +\midrule +1 & 2 & 3 \\ + & 2 & 3 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected + + def test_to_latex_midrule_location(self): + # GH 18326 + df = pd.DataFrame({"a": [1, 2]}) + df.index.name = "foo" + observed = df.to_latex(index_names=False) + expected = r"""\begin{tabular}{lr} +\toprule +{} & a \\ +\midrule +0 & 1 \\ +1 & 2 \\ +\bottomrule +\end{tabular} +""" + + assert observed == expected + + def test_to_latex_multiindex_empty_name(self): + # GH 18669 + mi = pd.MultiIndex.from_product([[1, 2]], names=[""]) + df = pd.DataFrame(-1, index=mi, columns=range(4)) + observed = df.to_latex() + expected = r"""\begin{tabular}{lrrrr} +\toprule + & 0 & 1 & 2 & 3 \\ +{} & & & & \\ +\midrule +1 & -1 & -1 & -1 & -1 \\ +2 & -1 & -1 & -1 & -1 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected + + def test_to_latex_float_format_no_fixed_width(self): + + # GH 21625 + df = DataFrame({"x": [0.19999]}) + expected = r"""\begin{tabular}{lr} +\toprule +{} & x \\ +\midrule +0 & 0.200 \\ +\bottomrule +\end{tabular} +""" + assert df.to_latex(float_format="%.3f") == expected + + # GH 22270 + df = DataFrame({"x": [100.0]}) + expected = r"""\begin{tabular}{lr} +\toprule +{} & x \\ +\midrule +0 & 100 \\ +\bottomrule +\end{tabular} +""" + assert df.to_latex(float_format="%.0f") == expected + + def test_to_latex_multindex_header(self): + # GH 16718 + df = pd.DataFrame({"a": [0], "b": [1], "c": [2], "d": [3]}).set_index( + ["a", "b"] + ) + observed = df.to_latex(header=["r1", "r2"]) + expected = r"""\begin{tabular}{llrr} +\toprule + & & r1 & r2 \\ +a & b & & \\ +\midrule +0 & 1 & 2 & 3 \\ +\bottomrule +\end{tabular} +""" + assert observed == expected diff --git a/pandas/tests/io/formats/test_to_markdown.py b/pandas/tests/io/formats/test_to_markdown.py new file mode 100644 index 00000000..8893e429 --- /dev/null +++ b/pandas/tests/io/formats/test_to_markdown.py @@ -0,0 +1,55 @@ +from io import StringIO + +import pytest + +import pandas as pd + +pytest.importorskip("tabulate") + + +def test_simple(): + buf = StringIO() + df = pd.DataFrame([1, 2, 3]) + df.to_markdown(buf=buf) + result = buf.getvalue() + assert ( + result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) + + +def test_other_tablefmt(): + buf = StringIO() + df = pd.DataFrame([1, 2, 3]) + df.to_markdown(buf=buf, tablefmt="jira") + result = buf.getvalue() + assert result == "|| || 0 ||\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + + +def test_other_headers(): + buf = StringIO() + df = pd.DataFrame([1, 2, 3]) + df.to_markdown(buf=buf, headers=["foo", "bar"]) + result = buf.getvalue() + assert result == ( + "| foo | bar |\n|------:|------:|\n| 0 " + "| 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) + + +def test_series(): + buf = StringIO() + s = pd.Series([1, 2, 3], name="foo") + s.to_markdown(buf=buf) + result = buf.getvalue() + assert result == ( + "| | foo |\n|---:|------:|\n| 0 | 1 " + "|\n| 1 | 2 |\n| 2 | 3 |" + ) + + +def test_no_buf(capsys): + df = pd.DataFrame([1, 2, 3]) + result = df.to_markdown() + assert ( + result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py new file mode 100755 index 00000000..6ef0e045 --- /dev/null +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -0,0 +1,360 @@ +#!/usr/bin/env python + +""" +self-contained to write legacy storage pickle files + +To use this script. Create an environment where you want +generate pickles, say its for 0.20.3, with your pandas clone +in ~/pandas + +. activate pandas_0.20.3 +cd ~/ + +$ python pandas/pandas/tests/io/generate_legacy_storage_files.py \ + pandas/pandas/tests/io/data/legacy_pickle/0.20.3/ pickle + +This script generates a storage file for the current arch, system, +and python version + pandas version: 0.20.3 + output dir : pandas/pandas/tests/io/data/legacy_pickle/0.20.3/ + storage format: pickle +created pickle file: 0.20.3_x86_64_darwin_3.5.2.pickle + +The idea here is you are using the *current* version of the +generate_legacy_storage_files with an *older* version of pandas to +generate a pickle file. We will then check this file into a current +branch, and test using test_pickle.py. This will load the *older* +pickles and test versus the current data that is generated +(with master). These are then compared. + +If we have cases where we changed the signature (e.g. we renamed +offset -> freq in Timestamp). Then we have to conditionally execute +in the generate_legacy_storage_files.py to make it +run under the older AND the newer version. + +""" + +from datetime import timedelta +from distutils.version import LooseVersion +import os +import pickle +import platform as pl +import sys + +import numpy as np + +import pandas +from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + NaT, + Period, + RangeIndex, + Series, + Timestamp, + bdate_range, + date_range, + period_range, + timedelta_range, +) + +from pandas.tseries.offsets import ( + FY5253, + BusinessDay, + BusinessHour, + CustomBusinessDay, + DateOffset, + Day, + Easter, + Hour, + LastWeekOfMonth, + Minute, + MonthBegin, + MonthEnd, + QuarterBegin, + QuarterEnd, + SemiMonthBegin, + SemiMonthEnd, + Week, + WeekOfMonth, + YearBegin, + YearEnd, +) + +try: + # TODO: remove try/except when 0.24.0 is the legacy version. + from pandas.arrays import SparseArray +except ImportError: + from pandas.core.sparse.api import SparseArray + + +_loose_version = LooseVersion(pandas.__version__) + + +def _create_sp_series(): + nan = np.nan + + # nan-based + arr = np.arange(15, dtype=np.float64) + arr[7:12] = nan + arr[-1:] = nan + + bseries = Series(SparseArray(arr, kind="block")) + bseries.name = "bseries" + return bseries + + +def _create_sp_tsseries(): + nan = np.nan + + # nan-based + arr = np.arange(15, dtype=np.float64) + arr[7:12] = nan + arr[-1:] = nan + + date_index = bdate_range("1/1/2011", periods=len(arr)) + bseries = Series(SparseArray(arr, kind="block"), index=date_index) + bseries.name = "btsseries" + return bseries + + +def _create_sp_frame(): + nan = np.nan + + data = { + "A": [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], + "B": [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], + "C": np.arange(10).astype(np.int64), + "D": [0, 1, 2, 3, 4, 5, nan, nan, nan, nan], + } + + dates = bdate_range("1/1/2011", periods=10) + return DataFrame(data, index=dates).apply(SparseArray) + + +def create_data(): + """ create the pickle data """ + + data = { + "A": [0.0, 1.0, 2.0, 3.0, np.nan], + "B": [0, 1, 0, 1, 0], + "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], + "D": date_range("1/1/2009", periods=5), + "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], + } + + scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M")) + + index = dict( + int=Index(np.arange(10)), + date=date_range("20130101", periods=10), + period=period_range("2013-01-01", freq="M", periods=10), + float=Index(np.arange(10, dtype=np.float64)), + uint=Index(np.arange(10, dtype=np.uint64)), + timedelta=timedelta_range("00:00:00", freq="30T", periods=10), + ) + + index["range"] = RangeIndex(10) + + if _loose_version >= LooseVersion("0.21"): + from pandas import interval_range + + index["interval"] = interval_range(0, periods=10) + + mi = dict( + reg2=MultiIndex.from_tuples( + tuple( + zip( + *[ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + ) + ), + names=["first", "second"], + ) + ) + + series = dict( + float=Series(data["A"]), + int=Series(data["B"]), + mixed=Series(data["E"]), + ts=Series( + np.arange(10).astype(np.int64), index=date_range("20130101", periods=10) + ), + mi=Series( + np.arange(5).astype(np.float64), + index=MultiIndex.from_tuples( + tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"] + ), + ), + dup=Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), + cat=Series(Categorical(["foo", "bar", "baz"])), + dt=Series(date_range("20130101", periods=5)), + dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")), + period=Series([Period("2000Q1")] * 5), + ) + + mixed_dup_df = DataFrame(data) + mixed_dup_df.columns = list("ABCDA") + frame = dict( + float=DataFrame({"A": series["float"], "B": series["float"] + 1}), + int=DataFrame({"A": series["int"], "B": series["int"] + 1}), + mixed=DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), + mi=DataFrame( + {"A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64)}, + index=MultiIndex.from_tuples( + tuple( + zip( + *[ + ["bar", "bar", "baz", "baz", "baz"], + ["one", "two", "one", "two", "three"], + ] + ) + ), + names=["first", "second"], + ), + ), + dup=DataFrame( + np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"] + ), + cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}), + cat_and_float=DataFrame( + { + "A": Categorical(["foo", "bar", "baz"]), + "B": np.arange(3).astype(np.int64), + } + ), + mixed_dup=mixed_dup_df, + dt_mixed_tzs=DataFrame( + { + "A": Timestamp("20130102", tz="US/Eastern"), + "B": Timestamp("20130603", tz="CET"), + }, + index=range(5), + ), + dt_mixed2_tzs=DataFrame( + { + "A": Timestamp("20130102", tz="US/Eastern"), + "B": Timestamp("20130603", tz="CET"), + "C": Timestamp("20130603", tz="UTC"), + }, + index=range(5), + ), + ) + + cat = dict( + int8=Categorical(list("abcdefg")), + int16=Categorical(np.arange(1000)), + int32=Categorical(np.arange(10000)), + ) + + timestamp = dict( + normal=Timestamp("2011-01-01"), + nat=NaT, + tz=Timestamp("2011-01-01", tz="US/Eastern"), + ) + + timestamp["freq"] = Timestamp("2011-01-01", freq="D") + timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") + + off = { + "DateOffset": DateOffset(years=1), + "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), + "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), + "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), + "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), + "SemiMonthBegin": SemiMonthBegin(day_of_month=9), + "SemiMonthEnd": SemiMonthEnd(day_of_month=24), + "MonthBegin": MonthBegin(1), + "MonthEnd": MonthEnd(1), + "QuarterBegin": QuarterBegin(1), + "QuarterEnd": QuarterEnd(1), + "Day": Day(1), + "YearBegin": YearBegin(1), + "YearEnd": YearEnd(1), + "Week": Week(1), + "Week_Tues": Week(2, normalize=False, weekday=1), + "WeekOfMonth": WeekOfMonth(week=3, weekday=4), + "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), + "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), + "Easter": Easter(), + "Hour": Hour(1), + "Minute": Minute(1), + } + + return dict( + series=series, + frame=frame, + index=index, + scalars=scalars, + mi=mi, + sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), + sp_frame=dict(float=_create_sp_frame()), + cat=cat, + timestamp=timestamp, + offsets=off, + ) + + +def create_pickle_data(): + data = create_data() + + return data + + +def platform_name(): + return "_".join( + [ + str(pandas.__version__), + str(pl.machine()), + str(pl.system().lower()), + str(pl.python_version()), + ] + ) + + +def write_legacy_pickles(output_dir): + + version = pandas.__version__ + + print( + "This script generates a storage file for the current arch, system, " + "and python version" + ) + print(" pandas version: {0}".format(version)) + print(" output dir : {0}".format(output_dir)) + print(" storage format: pickle") + + pth = "{0}.pickle".format(platform_name()) + + fh = open(os.path.join(output_dir, pth), "wb") + pickle.dump(create_pickle_data(), fh, pickle.HIGHEST_PROTOCOL) + fh.close() + + print("created pickle file: {pth}".format(pth=pth)) + + +def write_legacy_file(): + # force our cwd to be the first searched + sys.path.insert(0, ".") + + if not (3 <= len(sys.argv) <= 4): + exit( + "Specify output directory and storage type: generate_legacy_" + "storage_files.py " + ) + + output_dir = str(sys.argv[1]) + storage_type = str(sys.argv[2]) + + if storage_type == "pickle": + write_legacy_pickles(output_dir=output_dir) + else: + exit("storage_type must be one of {'pickle'}") + + +if __name__ == "__main__": + write_legacy_file() diff --git a/pandas/tests/io/json/__init__.py b/pandas/tests/io/json/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/io/json/conftest.py b/pandas/tests/io/json/conftest.py new file mode 100644 index 00000000..4e848cd4 --- /dev/null +++ b/pandas/tests/io/json/conftest.py @@ -0,0 +1,9 @@ +import pytest + + +@pytest.fixture(params=["split", "records", "index", "columns", "values"]) +def orient(request): + """ + Fixture for orients excluding the table format. + """ + return request.param diff --git a/pandas/tests/io/json/data/tsframe_iso_v012.json b/pandas/tests/io/json/data/tsframe_iso_v012.json new file mode 100644 index 00000000..bd9ff885 --- /dev/null +++ b/pandas/tests/io/json/data/tsframe_iso_v012.json @@ -0,0 +1 @@ +{"A":{"2000-01-03T00:00:00":1.56808523,"2000-01-04T00:00:00":-0.2550111,"2000-01-05T00:00:00":1.51493992,"2000-01-06T00:00:00":-0.02765498,"2000-01-07T00:00:00":0.05951614},"B":{"2000-01-03T00:00:00":0.65727391,"2000-01-04T00:00:00":-0.08072427,"2000-01-05T00:00:00":0.11805825,"2000-01-06T00:00:00":0.44679743,"2000-01-07T00:00:00":-2.69652057},"C":{"2000-01-03T00:00:00":1.81021139,"2000-01-04T00:00:00":-0.03202878,"2000-01-05T00:00:00":1.629455,"2000-01-06T00:00:00":0.33192641,"2000-01-07T00:00:00":1.28163262},"D":{"2000-01-03T00:00:00":-0.17251653,"2000-01-04T00:00:00":-0.17581665,"2000-01-05T00:00:00":-1.31506612,"2000-01-06T00:00:00":-0.27885413,"2000-01-07T00:00:00":0.34703478},"date":{"2000-01-03T00:00:00":"1992-01-06T18:21:32.120000","2000-01-04T00:00:00":"1992-01-06T18:21:32.120000","2000-01-05T00:00:00":"1992-01-06T18:21:32.120000","2000-01-06T00:00:00":"2013-01-01T00:00:00","2000-01-07T00:00:00":"1992-01-06T18:21:32.120000"}} \ No newline at end of file diff --git a/pandas/tests/io/json/data/tsframe_v012.json b/pandas/tests/io/json/data/tsframe_v012.json new file mode 100644 index 00000000..d4474c76 --- /dev/null +++ b/pandas/tests/io/json/data/tsframe_v012.json @@ -0,0 +1 @@ +{"A":{"946857600000000000":1.56808523,"946944000000000000":-0.2550111,"947030400000000000":1.51493992,"947116800000000000":-0.02765498,"947203200000000000":0.05951614},"B":{"946857600000000000":0.65727391,"946944000000000000":-0.08072427,"947030400000000000":0.11805825,"947116800000000000":0.44679743,"947203200000000000":-2.69652057},"C":{"946857600000000000":1.81021139,"946944000000000000":-0.03202878,"947030400000000000":1.629455,"947116800000000000":0.33192641,"947203200000000000":1.28163262},"D":{"946857600000000000":-0.17251653,"946944000000000000":-0.17581665,"947030400000000000":-1.31506612,"947116800000000000":-0.27885413,"947203200000000000":0.34703478},"date":{"946857600000000000":694722092120000000,"946944000000000000":694722092120000000,"947030400000000000":694722092120000000,"947116800000000000":1356998400000000000,"947203200000000000":694722092120000000},"modified":{"946857600000000000":694722092120000000,"946944000000000000":null,"947030400000000000":694722092120000000,"947116800000000000":1356998400000000000,"947203200000000000":694722092120000000}} \ No newline at end of file diff --git a/pandas/tests/io/json/data/tsframe_v012.json.zip b/pandas/tests/io/json/data/tsframe_v012.json.zip new file mode 100644 index 0000000000000000000000000000000000000000..100ba0c87b2ba55c169081bb0ed60c5db7391bbb GIT binary patch literal 436 zcmWIWW@Zs#-~d8>PgidSBp}Ejz)(`0R+N~V8ee8$Xrz}_oSzpO!Nb60eJyg=i>r~} z7)2P4PTcFqY$(uj|LLnEw<6!?Th+y}ylfKDbYKphQr@pG)b!*{7t{95#=p{PX2~tP zo9VSN!2DO`Wj2tkn(477rQ0RX7Wsm1^R literal 0 HcmV?d00001 diff --git a/pandas/tests/io/json/test_compression.py b/pandas/tests/io/json/test_compression.py new file mode 100644 index 00000000..182c21ed --- /dev/null +++ b/pandas/tests/io/json/test_compression.py @@ -0,0 +1,113 @@ +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + + +def test_compression_roundtrip(compression): + df = pd.DataFrame( + [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + index=["A", "B"], + columns=["X", "Y", "Z"], + ) + + with tm.ensure_clean() as path: + df.to_json(path, compression=compression) + tm.assert_frame_equal(df, pd.read_json(path, compression=compression)) + + # explicitly ensure file was compressed. + with tm.decompress_file(path, compression) as fh: + result = fh.read().decode("utf8") + tm.assert_frame_equal(df, pd.read_json(result)) + + +def test_read_zipped_json(datapath): + uncompressed_path = datapath("io", "json", "data", "tsframe_v012.json") + uncompressed_df = pd.read_json(uncompressed_path) + + compressed_path = datapath("io", "json", "data", "tsframe_v012.json.zip") + compressed_df = pd.read_json(compressed_path, compression="zip") + + tm.assert_frame_equal(uncompressed_df, compressed_df) + + +@td.skip_if_not_us_locale +def test_with_s3_url(compression, s3_resource): + # Bucket "pandas-test" created in tests/io/conftest.py + + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + + with tm.ensure_clean() as path: + df.to_json(path, compression=compression) + with open(path, "rb") as f: + s3_resource.Bucket("pandas-test").put_object(Key="test-1", Body=f) + + roundtripped_df = pd.read_json("s3://pandas-test/test-1", compression=compression) + tm.assert_frame_equal(df, roundtripped_df) + + +def test_lines_with_compression(compression): + + with tm.ensure_clean() as path: + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + df.to_json(path, orient="records", lines=True, compression=compression) + roundtripped_df = pd.read_json(path, lines=True, compression=compression) + tm.assert_frame_equal(df, roundtripped_df) + + +def test_chunksize_with_compression(compression): + + with tm.ensure_clean() as path: + df = pd.read_json('{"a": ["foo", "bar", "baz"], "b": [4, 5, 6]}') + df.to_json(path, orient="records", lines=True, compression=compression) + + res = pd.read_json(path, lines=True, chunksize=1, compression=compression) + roundtripped_df = pd.concat(res) + tm.assert_frame_equal(df, roundtripped_df) + + +def test_write_unsupported_compression_type(): + df = pd.read_json('{"a": [1, 2, 3], "b": [4, 5, 6]}') + with tm.ensure_clean() as path: + msg = "Unrecognized compression type: unsupported" + with pytest.raises(ValueError, match=msg): + df.to_json(path, compression="unsupported") + + +def test_read_unsupported_compression_type(): + with tm.ensure_clean() as path: + msg = "Unrecognized compression type: unsupported" + with pytest.raises(ValueError, match=msg): + pd.read_json(path, compression="unsupported") + + +@pytest.mark.parametrize("to_infer", [True, False]) +@pytest.mark.parametrize("read_infer", [True, False]) +def test_to_json_compression(compression_only, read_infer, to_infer): + # see gh-15008 + compression = compression_only + + if compression == "zip": + pytest.skip(f"{compression} is not supported for to_csv") + + # We'll complete file extension subsequently. + filename = "test." + + if compression == "gzip": + filename += "gz" + else: + # xz --> .xz + # bz2 --> .bz2 + filename += compression + + df = pd.DataFrame({"A": [1]}) + + to_compression = "infer" if to_infer else compression + read_compression = "infer" if read_infer else compression + + with tm.ensure_clean(filename) as path: + df.to_json(path, compression=to_compression) + result = pd.read_json(path, compression=read_compression) + tm.assert_frame_equal(result, df) diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py new file mode 100644 index 00000000..2ac2acc6 --- /dev/null +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -0,0 +1,724 @@ +"""Tests for Table Schema integration.""" +from collections import OrderedDict +import json + +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype, DatetimeTZDtype, PeriodDtype + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.json._table_schema import ( + as_json_table_type, + build_table_schema, + convert_json_field_to_pandas_type, + convert_pandas_type_to_json_field, + set_default_names, +) + + +class TestBuildSchema: + def setup_method(self, method): + self.df = DataFrame( + { + "A": [1, 2, 3, 4], + "B": ["a", "b", "c", "c"], + "C": pd.date_range("2016-01-01", freq="d", periods=4), + "D": pd.timedelta_range("1H", periods=4, freq="T"), + }, + index=pd.Index(range(4), name="idx"), + ) + + def test_build_table_schema(self): + result = build_table_schema(self.df, version=False) + expected = { + "fields": [ + {"name": "idx", "type": "integer"}, + {"name": "A", "type": "integer"}, + {"name": "B", "type": "string"}, + {"name": "C", "type": "datetime"}, + {"name": "D", "type": "duration"}, + ], + "primaryKey": ["idx"], + } + assert result == expected + result = build_table_schema(self.df) + assert "pandas_version" in result + + def test_series(self): + s = pd.Series([1, 2, 3], name="foo") + result = build_table_schema(s, version=False) + expected = { + "fields": [ + {"name": "index", "type": "integer"}, + {"name": "foo", "type": "integer"}, + ], + "primaryKey": ["index"], + } + assert result == expected + result = build_table_schema(s) + assert "pandas_version" in result + + def test_series_unnamed(self): + result = build_table_schema(pd.Series([1, 2, 3]), version=False) + expected = { + "fields": [ + {"name": "index", "type": "integer"}, + {"name": "values", "type": "integer"}, + ], + "primaryKey": ["index"], + } + assert result == expected + + def test_multiindex(self): + df = self.df.copy() + idx = pd.MultiIndex.from_product([("a", "b"), (1, 2)]) + df.index = idx + + result = build_table_schema(df, version=False) + expected = { + "fields": [ + {"name": "level_0", "type": "string"}, + {"name": "level_1", "type": "integer"}, + {"name": "A", "type": "integer"}, + {"name": "B", "type": "string"}, + {"name": "C", "type": "datetime"}, + {"name": "D", "type": "duration"}, + ], + "primaryKey": ["level_0", "level_1"], + } + assert result == expected + + df.index.names = ["idx0", None] + expected["fields"][0]["name"] = "idx0" + expected["primaryKey"] = ["idx0", "level_1"] + result = build_table_schema(df, version=False) + assert result == expected + + +class TestTableSchemaType: + @pytest.mark.parametrize("int_type", [np.int, np.int16, np.int32, np.int64]) + def test_as_json_table_type_int_data(self, int_type): + int_data = [1, 2, 3] + assert as_json_table_type(np.array(int_data, dtype=int_type)) == "integer" + + @pytest.mark.parametrize( + "float_type", [np.float, np.float16, np.float32, np.float64] + ) + def test_as_json_table_type_float_data(self, float_type): + float_data = [1.0, 2.0, 3.0] + assert as_json_table_type(np.array(float_data, dtype=float_type)) == "number" + + @pytest.mark.parametrize("bool_type", [bool, np.bool]) + def test_as_json_table_type_bool_data(self, bool_type): + bool_data = [True, False] + assert as_json_table_type(np.array(bool_data, dtype=bool_type)) == "boolean" + + @pytest.mark.parametrize( + "date_data", + [ + pd.to_datetime(["2016"]), + pd.to_datetime(["2016"], utc=True), + pd.Series(pd.to_datetime(["2016"])), + pd.Series(pd.to_datetime(["2016"], utc=True)), + pd.period_range("2016", freq="A", periods=3), + ], + ) + def test_as_json_table_type_date_data(self, date_data): + assert as_json_table_type(date_data) == "datetime" + + @pytest.mark.parametrize("str_data", [pd.Series(["a", "b"]), pd.Index(["a", "b"])]) + def test_as_json_table_type_string_data(self, str_data): + assert as_json_table_type(str_data) == "string" + + @pytest.mark.parametrize( + "cat_data", + [ + pd.Categorical(["a"]), + pd.Categorical([1]), + pd.Series(pd.Categorical([1])), + pd.CategoricalIndex([1]), + pd.Categorical([1]), + ], + ) + def test_as_json_table_type_categorical_data(self, cat_data): + assert as_json_table_type(cat_data) == "any" + + # ------ + # dtypes + # ------ + @pytest.mark.parametrize("int_dtype", [np.int, np.int16, np.int32, np.int64]) + def test_as_json_table_type_int_dtypes(self, int_dtype): + assert as_json_table_type(int_dtype) == "integer" + + @pytest.mark.parametrize( + "float_dtype", [np.float, np.float16, np.float32, np.float64] + ) + def test_as_json_table_type_float_dtypes(self, float_dtype): + assert as_json_table_type(float_dtype) == "number" + + @pytest.mark.parametrize("bool_dtype", [bool, np.bool]) + def test_as_json_table_type_bool_dtypes(self, bool_dtype): + assert as_json_table_type(bool_dtype) == "boolean" + + @pytest.mark.parametrize( + "date_dtype", + [ + np.datetime64, + np.dtype(" str: + return self.hexed + + hexed = "574b4454ba8c5eb4f98a8f45" + binthing = BinaryThing(hexed) + + # verify the proper conversion of printable content + df_printable = DataFrame({"A": [binthing.hexed]}) + assert df_printable.to_json() == f'{{"A":{{"0":"{hexed}"}}}}' + + # check if non-printable content throws appropriate Exception + df_nonprintable = DataFrame({"A": [binthing]}) + msg = "Unsupported UTF-8 sequence length when encoding string" + with pytest.raises(OverflowError, match=msg): + df_nonprintable.to_json() + + # the same with multiple columns threw segfaults + df_mixed = DataFrame({"A": [binthing], "B": [1]}, columns=["A", "B"]) + with pytest.raises(OverflowError): + df_mixed.to_json() + + # default_handler should resolve exceptions for non-string types + result = df_nonprintable.to_json(default_handler=str) + expected = f'{{"A":{{"0":"{hexed}"}}}}' + assert result == expected + assert ( + df_mixed.to_json(default_handler=str) + == f'{{"A":{{"0":"{hexed}"}},"B":{{"0":1}}}}' + ) + + def test_label_overflow(self): + # GH14256: buffer length not checked when writing label + result = pd.DataFrame({"bar" * 100000: [1], "foo": [1337]}).to_json() + expected = f'{{"{"bar" * 100000}":{{"0":1}},"foo":{{"0":1337}}}}' + assert result == expected + + def test_series_non_unique_index(self): + s = Series(["a", "b"], index=[1, 1]) + + msg = "Series index must be unique for orient='index'" + with pytest.raises(ValueError, match=msg): + s.to_json(orient="index") + + tm.assert_series_equal( + s, read_json(s.to_json(orient="split"), orient="split", typ="series") + ) + unser = read_json(s.to_json(orient="records"), orient="records", typ="series") + tm.assert_numpy_array_equal(s.values, unser.values) + + def test_series_default_orient(self): + assert self.series.to_json() == self.series.to_json(orient="index") + + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_simple(self, orient, numpy): + data = self.series.to_json(orient=orient) + result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) + expected = self.series.copy() + + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + if orient != "split": + expected.name = None + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [False, None]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_object(self, orient, numpy, dtype): + data = self.objSeries.to_json(orient=orient) + result = pd.read_json( + data, typ="series", orient=orient, numpy=numpy, dtype=dtype + ) + expected = self.objSeries.copy() + + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + if orient != "split": + expected.name = None + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_empty(self, orient, numpy): + data = self.empty_series.to_json(orient=orient) + result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) + expected = self.empty_series.copy() + + # TODO: see what causes inconsistency + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + else: + expected.index = expected.index.astype(float) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_timeseries(self, orient, numpy): + data = self.ts.to_json(orient=orient) + result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) + expected = self.ts.copy() + + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + if orient != "split": + expected.name = None + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [np.float64, np.int]) + @pytest.mark.parametrize("numpy", [True, False]) + def test_series_roundtrip_numeric(self, orient, numpy, dtype): + s = Series(range(6), index=["a", "b", "c", "d", "e", "f"]) + data = s.to_json(orient=orient) + result = pd.read_json(data, typ="series", orient=orient, numpy=numpy) + + expected = s.copy() + if orient in ("values", "records"): + expected = expected.reset_index(drop=True) + + tm.assert_series_equal(result, expected) + + def test_series_to_json_except(self): + s = Series([1, 2, 3]) + msg = "Invalid value 'garbage' for option 'orient'" + with pytest.raises(ValueError, match=msg): + s.to_json(orient="garbage") + + def test_series_from_json_precise_float(self): + s = Series([4.56, 4.56, 4.56]) + result = read_json(s.to_json(), typ="series", precise_float=True) + tm.assert_series_equal(result, s, check_index_type=False) + + def test_series_with_dtype(self): + # GH 21986 + s = Series([4.56, 4.56, 4.56]) + result = read_json(s.to_json(), typ="series", dtype=np.int64) + expected = Series([4] * 3) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype,expected", + [ + (True, Series(["2000-01-01"], dtype="datetime64[ns]")), + (False, Series([946684800000])), + ], + ) + def test_series_with_dtype_datetime(self, dtype, expected): + s = Series(["2000-01-01"], dtype="datetime64[ns]") + data = s.to_json() + result = pd.read_json(data, typ="series", dtype=dtype) + tm.assert_series_equal(result, expected) + + def test_frame_from_json_precise_float(self): + df = DataFrame([[4.56, 4.56, 4.56], [4.56, 4.56, 4.56]]) + result = read_json(df.to_json(), precise_float=True) + tm.assert_frame_equal( + result, df, check_index_type=False, check_column_type=False + ) + + def test_typ(self): + + s = Series(range(6), index=["a", "b", "c", "d", "e", "f"], dtype="int64") + result = read_json(s.to_json(), typ=None) + tm.assert_series_equal(result, s) + + def test_reconstruction_index(self): + + df = DataFrame([[1, 2, 3], [4, 5, 6]]) + result = read_json(df.to_json()) + + tm.assert_frame_equal(result, df) + + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["A", "B", "C"]) + result = read_json(df.to_json()) + tm.assert_frame_equal(result, df) + + def test_path(self): + with tm.ensure_clean("test.json") as path: + for df in [ + self.frame, + self.frame2, + self.intframe, + self.tsframe, + self.mixed_frame, + ]: + df.to_json(path) + read_json(path) + + def test_axis_dates(self): + + # frame + json = self.tsframe.to_json() + result = read_json(json) + tm.assert_frame_equal(result, self.tsframe) + + # series + json = self.ts.to_json() + result = read_json(json, typ="series") + tm.assert_series_equal(result, self.ts, check_names=False) + assert result.name is None + + def test_convert_dates(self): + + # frame + df = self.tsframe.copy() + df["date"] = Timestamp("20130101") + + json = df.to_json() + result = read_json(json) + tm.assert_frame_equal(result, df) + + df["foo"] = 1.0 + json = df.to_json(date_unit="ns") + + result = read_json(json, convert_dates=False) + expected = df.copy() + expected["date"] = expected["date"].values.view("i8") + expected["foo"] = expected["foo"].astype("int64") + tm.assert_frame_equal(result, expected) + + # series + ts = Series(Timestamp("20130101"), index=self.ts.index) + json = ts.to_json() + result = read_json(json, typ="series") + tm.assert_series_equal(result, ts) + + @pytest.mark.parametrize("date_format", ["epoch", "iso"]) + @pytest.mark.parametrize("as_object", [True, False]) + @pytest.mark.parametrize( + "date_typ", [datetime.date, datetime.datetime, pd.Timestamp] + ) + def test_date_index_and_values(self, date_format, as_object, date_typ): + data = [date_typ(year=2020, month=1, day=1), pd.NaT] + if as_object: + data.append("a") + + ser = pd.Series(data, index=data) + result = ser.to_json(date_format=date_format) + + if date_format == "epoch": + expected = '{"1577836800000":1577836800000,"null":null}' + else: + expected = ( + '{"2020-01-01T00:00:00.000Z":"2020-01-01T00:00:00.000Z","null":null}' + ) + + if as_object: + expected = expected.replace("}", ',"a":"a"}') + + assert result == expected + + @pytest.mark.parametrize( + "infer_word", + [ + "trade_time", + "date", + "datetime", + "sold_at", + "modified", + "timestamp", + "timestamps", + ], + ) + def test_convert_dates_infer(self, infer_word): + # GH10747 + from pandas.io.json import dumps + + data = [{"id": 1, infer_word: 1036713600000}, {"id": 2}] + expected = DataFrame( + [[1, Timestamp("2002-11-08")], [2, pd.NaT]], columns=["id", infer_word] + ) + result = read_json(dumps(data))[["id", infer_word]] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "date,date_unit", + [ + ("20130101 20:43:42.123", None), + ("20130101 20:43:42", "s"), + ("20130101 20:43:42.123", "ms"), + ("20130101 20:43:42.123456", "us"), + ("20130101 20:43:42.123456789", "ns"), + ], + ) + def test_date_format_frame(self, date, date_unit): + df = self.tsframe.copy() + + df["date"] = Timestamp(date) + df.iloc[1, df.columns.get_loc("date")] = pd.NaT + df.iloc[5, df.columns.get_loc("date")] = pd.NaT + if date_unit: + json = df.to_json(date_format="iso", date_unit=date_unit) + else: + json = df.to_json(date_format="iso") + result = read_json(json) + expected = df.copy() + expected.index = expected.index.tz_localize("UTC") + expected["date"] = expected["date"].dt.tz_localize("UTC") + tm.assert_frame_equal(result, expected) + + def test_date_format_frame_raises(self): + df = self.tsframe.copy() + msg = "Invalid value 'foo' for option 'date_unit'" + with pytest.raises(ValueError, match=msg): + df.to_json(date_format="iso", date_unit="foo") + + @pytest.mark.parametrize( + "date,date_unit", + [ + ("20130101 20:43:42.123", None), + ("20130101 20:43:42", "s"), + ("20130101 20:43:42.123", "ms"), + ("20130101 20:43:42.123456", "us"), + ("20130101 20:43:42.123456789", "ns"), + ], + ) + def test_date_format_series(self, date, date_unit): + ts = Series(Timestamp(date), index=self.ts.index) + ts.iloc[1] = pd.NaT + ts.iloc[5] = pd.NaT + if date_unit: + json = ts.to_json(date_format="iso", date_unit=date_unit) + else: + json = ts.to_json(date_format="iso") + result = read_json(json, typ="series") + expected = ts.copy() + expected.index = expected.index.tz_localize("UTC") + expected = expected.dt.tz_localize("UTC") + tm.assert_series_equal(result, expected) + + def test_date_format_series_raises(self): + ts = Series(Timestamp("20130101 20:43:42.123"), index=self.ts.index) + msg = "Invalid value 'foo' for option 'date_unit'" + with pytest.raises(ValueError, match=msg): + ts.to_json(date_format="iso", date_unit="foo") + + @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"]) + def test_date_unit(self, unit): + df = self.tsframe.copy() + df["date"] = Timestamp("20130101 20:43:42") + dl = df.columns.get_loc("date") + df.iloc[1, dl] = Timestamp("19710101 20:43:42") + df.iloc[2, dl] = Timestamp("21460101 20:43:42") + df.iloc[4, dl] = pd.NaT + + json = df.to_json(date_format="epoch", date_unit=unit) + + # force date unit + result = read_json(json, date_unit=unit) + tm.assert_frame_equal(result, df) + + # detect date unit + result = read_json(json, date_unit=None) + tm.assert_frame_equal(result, df) + + def test_weird_nested_json(self): + # this used to core dump the parser + s = r"""{ + "status": "success", + "data": { + "posts": [ + { + "id": 1, + "title": "A blog post", + "body": "Some useful content" + }, + { + "id": 2, + "title": "Another blog post", + "body": "More content" + } + ] + } + }""" + + read_json(s) + + def test_doc_example(self): + dfj2 = DataFrame(np.random.randn(5, 2), columns=list("AB")) + dfj2["date"] = Timestamp("20130101") + dfj2["ints"] = range(5) + dfj2["bools"] = True + dfj2.index = pd.date_range("20130101", periods=5) + + json = dfj2.to_json() + result = read_json(json, dtype={"ints": np.int64, "bools": np.bool_}) + tm.assert_frame_equal(result, result) + + def test_misc_example(self): + + # parsing unordered input fails + result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]', numpy=True) + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + + error_msg = """DataFrame\\.index are different + +DataFrame\\.index values are different \\(100\\.0 %\\) +\\[left\\]: Index\\(\\['a', 'b'\\], dtype='object'\\) +\\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)""" + with pytest.raises(AssertionError, match=error_msg): + tm.assert_frame_equal(result, expected, check_index_type=False) + + result = read_json('[{"a": 1, "b": 2}, {"b":2, "a" :1}]') + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + @tm.network + @pytest.mark.single + def test_round_trip_exception_(self): + # GH 3867 + csv = "https://raw.github.com/hayd/lahman2012/master/csvs/Teams.csv" + df = pd.read_csv(csv) + s = df.to_json() + result = pd.read_json(s) + tm.assert_frame_equal(result.reindex(index=df.index, columns=df.columns), df) + + @tm.network + @pytest.mark.single + @pytest.mark.parametrize( + "field,dtype", + [ + ["created_at", pd.DatetimeTZDtype(tz="UTC")], + ["closed_at", "datetime64[ns]"], + ["updated_at", pd.DatetimeTZDtype(tz="UTC")], + ], + ) + def test_url(self, field, dtype): + url = "https://api.github.com/repos/pandas-dev/pandas/issues?per_page=5" # noqa + result = read_json(url, convert_dates=True) + assert result[field].dtype == dtype + + def test_timedelta(self): + converter = lambda x: pd.to_timedelta(x, unit="ms") + + s = Series([timedelta(23), timedelta(seconds=5)]) + assert s.dtype == "timedelta64[ns]" + + result = pd.read_json(s.to_json(), typ="series").apply(converter) + tm.assert_series_equal(result, s) + + s = Series([timedelta(23), timedelta(seconds=5)], index=pd.Index([0, 1])) + assert s.dtype == "timedelta64[ns]" + result = pd.read_json(s.to_json(), typ="series").apply(converter) + tm.assert_series_equal(result, s) + + frame = DataFrame([timedelta(23), timedelta(seconds=5)]) + assert frame[0].dtype == "timedelta64[ns]" + tm.assert_frame_equal(frame, pd.read_json(frame.to_json()).apply(converter)) + + frame = DataFrame( + { + "a": [timedelta(days=23), timedelta(seconds=5)], + "b": [1, 2], + "c": pd.date_range(start="20130101", periods=2), + } + ) + + result = pd.read_json(frame.to_json(date_unit="ns")) + result["a"] = pd.to_timedelta(result.a, unit="ns") + result["c"] = pd.to_datetime(result.c) + tm.assert_frame_equal(frame, result) + + def test_mixed_timedelta_datetime(self): + frame = DataFrame( + {"a": [timedelta(23), pd.Timestamp("20130101")]}, dtype=object + ) + + expected = DataFrame( + {"a": [pd.Timedelta(frame.a[0]).value, pd.Timestamp(frame.a[1]).value]} + ) + result = pd.read_json(frame.to_json(date_unit="ns"), dtype={"a": "int64"}) + tm.assert_frame_equal(result, expected, check_index_type=False) + + def test_default_handler(self): + value = object() + frame = DataFrame({"a": [7, value]}) + expected = DataFrame({"a": [7, str(value)]}) + result = pd.read_json(frame.to_json(default_handler=str)) + tm.assert_frame_equal(expected, result, check_index_type=False) + + def test_default_handler_indirect(self): + from pandas.io.json import dumps + + def default(obj): + if isinstance(obj, complex): + return [("mathjs", "Complex"), ("re", obj.real), ("im", obj.imag)] + return str(obj) + + df_list = [ + 9, + DataFrame( + {"a": [1, "STR", complex(4, -5)], "b": [float("nan"), None, "N/A"]}, + columns=["a", "b"], + ), + ] + expected = ( + '[9,[[1,null],["STR",null],[[["mathjs","Complex"],' + '["re",4.0],["im",-5.0]],"N\\/A"]]]' + ) + assert dumps(df_list, default_handler=default, orient="values") == expected + + def test_default_handler_numpy_unsupported_dtype(self): + # GH12554 to_json raises 'Unhandled numpy dtype 15' + df = DataFrame( + {"a": [1, 2.3, complex(4, -5)], "b": [float("nan"), None, complex(1.2, 0)]}, + columns=["a", "b"], + ) + expected = ( + '[["(1+0j)","(nan+0j)"],' + '["(2.3+0j)","(nan+0j)"],' + '["(4-5j)","(1.2+0j)"]]' + ) + assert df.to_json(default_handler=str, orient="values") == expected + + def test_default_handler_raises(self): + msg = "raisin" + + def my_handler_raises(obj): + raise TypeError(msg) + + with pytest.raises(TypeError, match=msg): + DataFrame({"a": [1, 2, object()]}).to_json( + default_handler=my_handler_raises + ) + with pytest.raises(TypeError, match=msg): + DataFrame({"a": [1, 2, complex(4, -5)]}).to_json( + default_handler=my_handler_raises + ) + + def test_categorical(self): + # GH4377 df.to_json segfaults with non-ndarray blocks + df = DataFrame({"A": ["a", "b", "c", "a", "b", "b", "a"]}) + df["B"] = df["A"] + expected = df.to_json() + + df["B"] = df["A"].astype("category") + assert expected == df.to_json() + + s = df["A"] + sc = df["B"] + assert s.to_json() == sc.to_json() + + def test_datetime_tz(self): + # GH4377 df.to_json segfaults with non-ndarray blocks + tz_range = pd.date_range("20130101", periods=3, tz="US/Eastern") + tz_naive = tz_range.tz_convert("utc").tz_localize(None) + + df = DataFrame({"A": tz_range, "B": pd.date_range("20130101", periods=3)}) + + df_naive = df.copy() + df_naive["A"] = tz_naive + expected = df_naive.to_json() + assert expected == df.to_json() + + stz = Series(tz_range) + s_naive = Series(tz_naive) + assert stz.to_json() == s_naive.to_json() + + def test_sparse(self): + # GH4377 df.to_json segfaults with non-ndarray blocks + df = pd.DataFrame(np.random.randn(10, 4)) + df.loc[:8] = np.nan + + sdf = df.astype("Sparse") + expected = df.to_json() + assert expected == sdf.to_json() + + s = pd.Series(np.random.randn(10)) + s.loc[:8] = np.nan + ss = s.astype("Sparse") + + expected = s.to_json() + assert expected == ss.to_json() + + @pytest.mark.parametrize( + "ts", + [ + Timestamp("2013-01-10 05:00:00Z"), + Timestamp("2013-01-10 00:00:00", tz="US/Eastern"), + Timestamp("2013-01-10 00:00:00-0500"), + ], + ) + def test_tz_is_utc(self, ts): + from pandas.io.json import dumps + + exp = '"2013-01-10T05:00:00.000Z"' + + assert dumps(ts, iso_dates=True) == exp + dt = ts.to_pydatetime() + assert dumps(dt, iso_dates=True) == exp + + @pytest.mark.parametrize( + "tz_range", + [ + pd.date_range("2013-01-01 05:00:00Z", periods=2), + pd.date_range("2013-01-01 00:00:00", periods=2, tz="US/Eastern"), + pd.date_range("2013-01-01 00:00:00-0500", periods=2), + ], + ) + def test_tz_range_is_utc(self, tz_range): + from pandas.io.json import dumps + + exp = '["2013-01-01T05:00:00.000Z","2013-01-02T05:00:00.000Z"]' + dfexp = ( + '{"DT":{' + '"0":"2013-01-01T05:00:00.000Z",' + '"1":"2013-01-02T05:00:00.000Z"}}' + ) + + assert dumps(tz_range, iso_dates=True) == exp + dti = pd.DatetimeIndex(tz_range) + assert dumps(dti, iso_dates=True) == exp + df = DataFrame({"DT": dti}) + result = dumps(df, iso_dates=True) + assert result == dfexp + + def test_read_inline_jsonl(self): + # GH9180 + result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + @td.skip_if_not_us_locale + def test_read_s3_jsonl(self, s3_resource): + # GH17200 + + result = read_json("s3n://pandas-test/items.jsonl", lines=True) + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + def test_read_local_jsonl(self): + # GH17200 + with tm.ensure_clean("tmp_items.json") as path: + with open(path, "w") as infile: + infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n') + result = read_json(path, lines=True) + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + def test_read_jsonl_unicode_chars(self): + # GH15132: non-ascii unicode characters + # \u201d == RIGHT DOUBLE QUOTATION MARK + + # simulate file handle + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' + json = StringIO(json) + result = read_json(json, lines=True) + expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + # simulate string + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' + result = read_json(json, lines=True) + expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + def test_read_json_large_numbers(self): + # GH18842 + json = '{"articleId": "1404366058080022500245"}' + json = StringIO(json) + result = read_json(json, typ="series") + expected = Series(1.404366e21, index=["articleId"]) + tm.assert_series_equal(result, expected) + + json = '{"0": {"articleId": "1404366058080022500245"}}' + json = StringIO(json) + result = read_json(json) + expected = DataFrame(1.404366e21, index=["articleId"], columns=[0]) + tm.assert_frame_equal(result, expected) + + def test_to_jsonl(self): + # GH9180 + df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + result = df.to_json(orient="records", lines=True) + expected = '{"a":1,"b":2}\n{"a":1,"b":2}' + assert result == expected + + df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"]) + result = df.to_json(orient="records", lines=True) + expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}' + assert result == expected + tm.assert_frame_equal(pd.read_json(result, lines=True), df) + + # GH15096: escaped characters in columns and data + df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"]) + result = df.to_json(orient="records", lines=True) + expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}' + assert result == expected + tm.assert_frame_equal(pd.read_json(result, lines=True), df) + + # TODO: there is a near-identical test for pytables; can we share? + def test_latin_encoding(self): + # GH 13774 + pytest.skip("encoding not implemented in .to_json(), xref #13774") + + values = [ + [b"E\xc9, 17", b"", b"a", b"b", b"c"], + [b"E\xc9, 17", b"a", b"b", b"c"], + [b"EE, 17", b"", b"a", b"b", b"c"], + [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"], + [b"", b"a", b"b", b"c"], + [b"\xf8\xfc", b"a", b"b", b"c"], + [b"A\xf8\xfc", b"", b"a", b"b", b"c"], + [np.nan, b"", b"b", b"c"], + [b"A\xf8\xfc", np.nan, b"", b"b", b"c"], + ] + + values = [ + [x.decode("latin-1") if isinstance(x, bytes) else x for x in y] + for y in values + ] + + examples = [] + for dtype in ["category", object]: + for val in values: + examples.append(Series(val, dtype=dtype)) + + def roundtrip(s, encoding="latin-1"): + with tm.ensure_clean("test.json") as path: + s.to_json(path, encoding=encoding) + retr = read_json(path, encoding=encoding) + tm.assert_series_equal(s, retr, check_categorical=False) + + for s in examples: + roundtrip(s) + + def test_data_frame_size_after_to_json(self): + # GH15344 + df = DataFrame({"a": [str(1)]}) + + size_before = df.memory_usage(index=True, deep=True).sum() + df.to_json() + size_after = df.memory_usage(index=True, deep=True).sum() + + assert size_before == size_after + + @pytest.mark.parametrize( + "index", [None, [1, 2], [1.0, 2.0], ["a", "b"], ["1", "2"], ["1.", "2."]] + ) + @pytest.mark.parametrize("columns", [["a", "b"], ["1", "2"], ["1.", "2."]]) + def test_from_json_to_json_table_index_and_columns(self, index, columns): + # GH25433 GH25435 + expected = DataFrame([[1, 2], [3, 4]], index=index, columns=columns) + dfjson = expected.to_json(orient="table") + result = pd.read_json(dfjson, orient="table") + tm.assert_frame_equal(result, expected) + + def test_from_json_to_json_table_dtypes(self): + # GH21345 + expected = pd.DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]}) + dfjson = expected.to_json(orient="table") + result = pd.read_json(dfjson, orient="table") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype", [True, {"b": int, "c": int}]) + def test_read_json_table_dtype_raises(self, dtype): + # GH21345 + df = pd.DataFrame({"a": [1, 2], "b": [3.0, 4.0], "c": ["5", "6"]}) + dfjson = df.to_json(orient="table") + msg = "cannot pass both dtype and orient='table'" + with pytest.raises(ValueError, match=msg): + pd.read_json(dfjson, orient="table", dtype=dtype) + + def test_read_json_table_convert_axes_raises(self): + # GH25433 GH25435 + df = DataFrame([[1, 2], [3, 4]], index=[1.0, 2.0], columns=["1.", "2."]) + dfjson = df.to_json(orient="table") + msg = "cannot pass both convert_axes and orient='table'" + with pytest.raises(ValueError, match=msg): + pd.read_json(dfjson, orient="table", convert_axes=True) + + @pytest.mark.parametrize( + "data, expected", + [ + ( + DataFrame([[1, 2], [4, 5]], columns=["a", "b"]), + {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]}, + ), + ( + DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo"), + {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]}, + ), + ( + DataFrame( + [[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]] + ), + {"columns": ["a", "b"], "data": [[1, 2], [4, 5]]}, + ), + (Series([1, 2, 3], name="A"), {"name": "A", "data": [1, 2, 3]}), + ( + Series([1, 2, 3], name="A").rename_axis("foo"), + {"name": "A", "data": [1, 2, 3]}, + ), + ( + Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]]), + {"name": "A", "data": [1, 2]}, + ), + ], + ) + def test_index_false_to_json_split(self, data, expected): + # GH 17394 + # Testing index=False in to_json with orient='split' + + result = data.to_json(orient="split", index=False) + result = json.loads(result) + + assert result == expected + + @pytest.mark.parametrize( + "data", + [ + (DataFrame([[1, 2], [4, 5]], columns=["a", "b"])), + (DataFrame([[1, 2], [4, 5]], columns=["a", "b"]).rename_axis("foo")), + ( + DataFrame( + [[1, 2], [4, 5]], columns=["a", "b"], index=[["a", "b"], ["c", "d"]] + ) + ), + (Series([1, 2, 3], name="A")), + (Series([1, 2, 3], name="A").rename_axis("foo")), + (Series([1, 2], name="A", index=[["a", "b"], ["c", "d"]])), + ], + ) + def test_index_false_to_json_table(self, data): + # GH 17394 + # Testing index=False in to_json with orient='table' + + result = data.to_json(orient="table", index=False) + result = json.loads(result) + + expected = { + "schema": pd.io.json.build_table_schema(data, index=False), + "data": DataFrame(data).to_dict(orient="records"), + } + + assert result == expected + + @pytest.mark.parametrize("orient", ["records", "index", "columns", "values"]) + def test_index_false_error_to_json(self, orient): + # GH 17394 + # Testing error message from to_json with index=False + + df = pd.DataFrame([[1, 2], [4, 5]], columns=["a", "b"]) + + msg = "'index=False' is only valid when 'orient' is 'split' or 'table'" + with pytest.raises(ValueError, match=msg): + df.to_json(orient=orient, index=False) + + @pytest.mark.parametrize("orient", ["split", "table"]) + @pytest.mark.parametrize("index", [True, False]) + def test_index_false_from_json_to_json(self, orient, index): + # GH25170 + # Test index=False in from_json to_json + expected = DataFrame({"a": [1, 2], "b": [3, 4]}) + dfjson = expected.to_json(orient=orient, index=index) + result = read_json(dfjson, orient=orient) + tm.assert_frame_equal(result, expected) + + def test_read_timezone_information(self): + # GH 25546 + result = read_json( + '{"2019-01-01T11:00:00.000Z":88}', typ="series", orient="index" + ) + expected = Series([88], index=DatetimeIndex(["2019-01-01 11:00:00"], tz="UTC")) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")] + ) + def test_timedelta_as_label(self, date_format, key): + df = pd.DataFrame([[1]], columns=[pd.Timedelta("1D")]) + expected = f'{{"{key}":{{"0":1}}}}' + result = df.to_json(date_format=date_format) + + assert result == expected + + @pytest.mark.parametrize( + "orient,expected", + [ + ("index", "{\"('a', 'b')\":{\"('c', 'd')\":1}}"), + ("columns", "{\"('c', 'd')\":{\"('a', 'b')\":1}}"), + # TODO: the below have separate encoding procedures + # They produce JSON but not in a consistent manner + pytest.param("split", "", marks=pytest.mark.skip), + pytest.param("table", "", marks=pytest.mark.skip), + ], + ) + def test_tuple_labels(self, orient, expected): + # GH 20500 + df = pd.DataFrame([[1]], index=[("a", "b")], columns=[("c", "d")]) + result = df.to_json(orient=orient) + assert result == expected + + @pytest.mark.parametrize("indent", [1, 2, 4]) + def test_to_json_indent(self, indent): + # GH 12004 + df = pd.DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"]) + + result = df.to_json(indent=indent) + spaces = " " * indent + expected = f"""{{ +{spaces}"a":{{ +{spaces}{spaces}"0":"foo", +{spaces}{spaces}"1":"baz" +{spaces}}}, +{spaces}"b":{{ +{spaces}{spaces}"0":"bar", +{spaces}{spaces}"1":"qux" +{spaces}}} +}}""" + + assert result == expected + + @pytest.mark.parametrize( + "orient,expected", + [ + ( + "split", + """{ + "columns":[ + "a", + "b" + ], + "index":[ + 0, + 1 + ], + "data":[ + [ + "foo", + "bar" + ], + [ + "baz", + "qux" + ] + ] +}""", + ), + ( + "records", + """[ + { + "a":"foo", + "b":"bar" + }, + { + "a":"baz", + "b":"qux" + } +]""", + ), + ( + "index", + """{ + "0":{ + "a":"foo", + "b":"bar" + }, + "1":{ + "a":"baz", + "b":"qux" + } +}""", + ), + ( + "columns", + """{ + "a":{ + "0":"foo", + "1":"baz" + }, + "b":{ + "0":"bar", + "1":"qux" + } +}""", + ), + ( + "values", + """[ + [ + "foo", + "bar" + ], + [ + "baz", + "qux" + ] +]""", + ), + ( + "table", + """{ + "schema":{ + "fields":[ + { + "name":"index", + "type":"integer" + }, + { + "name":"a", + "type":"string" + }, + { + "name":"b", + "type":"string" + } + ], + "primaryKey":[ + "index" + ], + "pandas_version":"0.20.0" + }, + "data":[ + { + "index":0, + "a":"foo", + "b":"bar" + }, + { + "index":1, + "a":"baz", + "b":"qux" + } + ] +}""", + ), + ], + ) + def test_json_indent_all_orients(self, orient, expected): + # GH 12004 + df = pd.DataFrame([["foo", "bar"], ["baz", "qux"]], columns=["a", "b"]) + result = df.to_json(orient=orient, indent=4) + assert result == expected + + def test_json_negative_indent_raises(self): + with pytest.raises(ValueError, match="must be a nonnegative integer"): + pd.DataFrame().to_json(indent=-1) + + def test_emca_262_nan_inf_support(self): + # GH 12213 + data = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]' + result = pd.read_json(data) + expected = pd.DataFrame( + ["a", np.nan, "NaN", np.inf, "Infinity", -np.inf, "-Infinity"] + ) + tm.assert_frame_equal(result, expected) + + def test_deprecate_numpy_argument_read_json(self): + # GH 28512 + expected = DataFrame([1, 2, 3]) + with tm.assert_produces_warning(FutureWarning): + result = read_json(expected.to_json(), numpy=True) + tm.assert_frame_equal(result, expected) + + def test_json_pandas_na(self): + # GH 31615 + result = pd.DataFrame([[pd.NA]]).to_json() + assert result == '{"0":{"0":null}}' + + def test_json_pandas_nulls(self, nulls_fixture): + # GH 31615 + result = pd.DataFrame([[nulls_fixture]]).to_json() + assert result == '{"0":{"0":null}}' diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py new file mode 100644 index 00000000..e5314576 --- /dev/null +++ b/pandas/tests/io/json/test_readlines.py @@ -0,0 +1,181 @@ +from io import StringIO + +import pytest + +import pandas as pd +from pandas import DataFrame, read_json +import pandas._testing as tm + +from pandas.io.json._json import JsonReader + + +@pytest.fixture +def lines_json_df(): + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + return df.to_json(lines=True, orient="records") + + +def test_read_jsonl(): + # GH9180 + result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True) + expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + +def test_read_jsonl_unicode_chars(): + # GH15132: non-ascii unicode characters + # \u201d == RIGHT DOUBLE QUOTATION MARK + + # simulate file handle + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' + json = StringIO(json) + result = read_json(json, lines=True) + expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + # simulate string + json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n' + result = read_json(json, lines=True) + expected = DataFrame([["foo\u201d", "bar"], ["foo", "bar"]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + +def test_to_jsonl(): + # GH9180 + df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) + result = df.to_json(orient="records", lines=True) + expected = '{"a":1,"b":2}\n{"a":1,"b":2}' + assert result == expected + + df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=["a", "b"]) + result = df.to_json(orient="records", lines=True) + expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}' + assert result == expected + tm.assert_frame_equal(read_json(result, lines=True), df) + + # GH15096: escaped characters in columns and data + df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]], columns=["a\\", "b"]) + result = df.to_json(orient="records", lines=True) + expected = '{"a\\\\":"foo\\\\","b":"bar"}\n{"a\\\\":"foo\\"","b":"bar"}' + assert result == expected + tm.assert_frame_equal(read_json(result, lines=True), df) + + +@pytest.mark.parametrize("chunksize", [1, 1.0]) +def test_readjson_chunks(lines_json_df, chunksize): + # Basic test that read_json(chunks=True) gives the same result as + # read_json(chunks=False) + # GH17048: memory usage when lines=True + + unchunked = read_json(StringIO(lines_json_df), lines=True) + reader = read_json(StringIO(lines_json_df), lines=True, chunksize=chunksize) + chunked = pd.concat(reader) + + tm.assert_frame_equal(chunked, unchunked) + + +def test_readjson_chunksize_requires_lines(lines_json_df): + msg = "chunksize can only be passed if lines=True" + with pytest.raises(ValueError, match=msg): + pd.read_json(StringIO(lines_json_df), lines=False, chunksize=2) + + +def test_readjson_chunks_series(): + # Test reading line-format JSON to Series with chunksize param + s = pd.Series({"A": 1, "B": 2}) + + strio = StringIO(s.to_json(lines=True, orient="records")) + unchunked = pd.read_json(strio, lines=True, typ="Series") + + strio = StringIO(s.to_json(lines=True, orient="records")) + chunked = pd.concat(pd.read_json(strio, lines=True, typ="Series", chunksize=1)) + + tm.assert_series_equal(chunked, unchunked) + + +def test_readjson_each_chunk(lines_json_df): + # Other tests check that the final result of read_json(chunksize=True) + # is correct. This checks the intermediate chunks. + chunks = list(pd.read_json(StringIO(lines_json_df), lines=True, chunksize=2)) + assert chunks[0].shape == (2, 2) + assert chunks[1].shape == (1, 2) + + +def test_readjson_chunks_from_file(): + with tm.ensure_clean("test.json") as path: + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df.to_json(path, lines=True, orient="records") + chunked = pd.concat(pd.read_json(path, lines=True, chunksize=1)) + unchunked = pd.read_json(path, lines=True) + tm.assert_frame_equal(unchunked, chunked) + + +@pytest.mark.parametrize("chunksize", [None, 1]) +def test_readjson_chunks_closes(chunksize): + with tm.ensure_clean("test.json") as path: + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df.to_json(path, lines=True, orient="records") + reader = JsonReader( + path, + orient=None, + typ="frame", + dtype=True, + convert_axes=True, + convert_dates=True, + keep_default_dates=True, + numpy=False, + precise_float=False, + date_unit=None, + encoding=None, + lines=True, + chunksize=chunksize, + compression=None, + ) + reader.read() + assert ( + reader.open_stream.closed + ), f"didn't close stream with chunksize = {chunksize}" + + +@pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"]) +def test_readjson_invalid_chunksize(lines_json_df, chunksize): + msg = r"'chunksize' must be an integer >=1" + + with pytest.raises(ValueError, match=msg): + pd.read_json(StringIO(lines_json_df), lines=True, chunksize=chunksize) + + +@pytest.mark.parametrize("chunksize", [None, 1, 2]) +def test_readjson_chunks_multiple_empty_lines(chunksize): + j = """ + + {"A":1,"B":4} + + + + {"A":2,"B":5} + + + + + + + + {"A":3,"B":6} + """ + orig = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + test = pd.read_json(j, lines=True, chunksize=chunksize) + if chunksize is not None: + test = pd.concat(test) + tm.assert_frame_equal(orig, test, obj=f"chunksize: {chunksize}") + + +def test_readjson_unicode(monkeypatch): + with tm.ensure_clean("test.json") as path: + monkeypatch.setattr("_bootlocale.getpreferredencoding", lambda l: "cp949") + with open(path, "w", encoding="utf-8") as f: + f.write('{"£©µÀÆÖÞßéöÿ":["АБВГДабвгд가"]}') + + result = read_json(path) + expected = pd.DataFrame({"£©µÀÆÖÞßéöÿ": ["АБВГДабвгд가"]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py new file mode 100644 index 00000000..bedd6008 --- /dev/null +++ b/pandas/tests/io/json/test_ujson.py @@ -0,0 +1,1079 @@ +import calendar +import datetime +import decimal +import json +import locale +import math +import re +import time + +import dateutil +import numpy as np +import pytest +import pytz + +import pandas._libs.json as ujson +from pandas._libs.tslib import Timestamp +import pandas.compat as compat + +from pandas import DataFrame, DatetimeIndex, Index, NaT, Series, date_range +import pandas._testing as tm + + +def _clean_dict(d): + """ + Sanitize dictionary for JSON by converting all keys to strings. + + Parameters + ---------- + d : dict + The dictionary to convert. + + Returns + ------- + cleaned_dict : dict + """ + + return {str(k): v for k, v in d.items()} + + +@pytest.fixture( + params=[None, "split", "records", "values", "index"] # Column indexed by default. +) +def orient(request): + return request.param + + +@pytest.fixture(params=[None, True]) +def numpy(request): + return request.param + + +class TestUltraJSONTests: + @pytest.mark.skipif( + compat.is_platform_32bit(), reason="not compliant on 32-bit, xref #15865" + ) + def test_encode_decimal(self): + sut = decimal.Decimal("1337.1337") + encoded = ujson.encode(sut, double_precision=15) + decoded = ujson.decode(encoded) + assert decoded == 1337.1337 + + sut = decimal.Decimal("0.95") + encoded = ujson.encode(sut, double_precision=1) + assert encoded == "1.0" + + decoded = ujson.decode(encoded) + assert decoded == 1.0 + + sut = decimal.Decimal("0.94") + encoded = ujson.encode(sut, double_precision=1) + assert encoded == "0.9" + + decoded = ujson.decode(encoded) + assert decoded == 0.9 + + sut = decimal.Decimal("1.95") + encoded = ujson.encode(sut, double_precision=1) + assert encoded == "2.0" + + decoded = ujson.decode(encoded) + assert decoded == 2.0 + + sut = decimal.Decimal("-1.95") + encoded = ujson.encode(sut, double_precision=1) + assert encoded == "-2.0" + + decoded = ujson.decode(encoded) + assert decoded == -2.0 + + sut = decimal.Decimal("0.995") + encoded = ujson.encode(sut, double_precision=2) + assert encoded == "1.0" + + decoded = ujson.decode(encoded) + assert decoded == 1.0 + + sut = decimal.Decimal("0.9995") + encoded = ujson.encode(sut, double_precision=3) + assert encoded == "1.0" + + decoded = ujson.decode(encoded) + assert decoded == 1.0 + + sut = decimal.Decimal("0.99999999999999944") + encoded = ujson.encode(sut, double_precision=15) + assert encoded == "1.0" + + decoded = ujson.decode(encoded) + assert decoded == 1.0 + + @pytest.mark.parametrize("ensure_ascii", [True, False]) + def test_encode_string_conversion(self, ensure_ascii): + string_input = "A string \\ / \b \f \n \r \t &" + not_html_encoded = '"A string \\\\ \\/ \\b \\f \\n \\r \\t <\\/script> &"' + html_encoded = ( + '"A string \\\\ \\/ \\b \\f \\n \\r \\t \\u003c\\/script\\u003e \\u0026"' + ) + + def helper(expected_output, **encode_kwargs): + output = ujson.encode( + string_input, ensure_ascii=ensure_ascii, **encode_kwargs + ) + + assert output == expected_output + assert string_input == json.loads(output) + assert string_input == ujson.decode(output) + + # Default behavior assumes encode_html_chars=False. + helper(not_html_encoded) + + # Make sure explicit encode_html_chars=False works. + helper(not_html_encoded, encode_html_chars=False) + + # Make sure explicit encode_html_chars=True does the encoding. + helper(html_encoded, encode_html_chars=True) + + @pytest.mark.parametrize( + "long_number", [-4342969734183514, -12345678901234.56789012, -528656961.4399388] + ) + def test_double_long_numbers(self, long_number): + sut = {"a": long_number} + encoded = ujson.encode(sut, double_precision=15) + + decoded = ujson.decode(encoded) + assert sut == decoded + + def test_encode_non_c_locale(self): + lc_category = locale.LC_NUMERIC + + # We just need one of these locales to work. + for new_locale in ("it_IT.UTF-8", "Italian_Italy"): + if tm.can_set_locale(new_locale, lc_category): + with tm.set_locale(new_locale, lc_category): + assert ujson.loads(ujson.dumps(4.78e60)) == 4.78e60 + assert ujson.loads("4.78", precise_float=True) == 4.78 + break + + def test_decimal_decode_test_precise(self): + sut = {"a": 4.56} + encoded = ujson.encode(sut) + decoded = ujson.decode(encoded, precise_float=True) + assert sut == decoded + + def test_encode_double_tiny_exponential(self): + num = 1e-40 + assert num == ujson.decode(ujson.encode(num)) + num = 1e-100 + assert num == ujson.decode(ujson.encode(num)) + num = -1e-45 + assert num == ujson.decode(ujson.encode(num)) + num = -1e-145 + assert np.allclose(num, ujson.decode(ujson.encode(num))) + + @pytest.mark.parametrize("unicode_key", ["key1", "بن"]) + def test_encode_dict_with_unicode_keys(self, unicode_key): + unicode_dict = {unicode_key: "value1"} + assert unicode_dict == ujson.decode(ujson.encode(unicode_dict)) + + @pytest.mark.parametrize( + "double_input", [math.pi, -math.pi] # Should work with negatives too. + ) + def test_encode_double_conversion(self, double_input): + output = ujson.encode(double_input) + assert round(double_input, 5) == round(json.loads(output), 5) + assert round(double_input, 5) == round(ujson.decode(output), 5) + + def test_encode_with_decimal(self): + decimal_input = 1.0 + output = ujson.encode(decimal_input) + + assert output == "1.0" + + def test_encode_array_of_nested_arrays(self): + nested_input = [[[[]]]] * 20 + output = ujson.encode(nested_input) + + assert nested_input == json.loads(output) + assert nested_input == ujson.decode(output) + + nested_input = np.array(nested_input) + tm.assert_numpy_array_equal( + nested_input, ujson.decode(output, numpy=True, dtype=nested_input.dtype) + ) + + def test_encode_array_of_doubles(self): + doubles_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337] * 10 + output = ujson.encode(doubles_input) + + assert doubles_input == json.loads(output) + assert doubles_input == ujson.decode(output) + + tm.assert_numpy_array_equal( + np.array(doubles_input), ujson.decode(output, numpy=True) + ) + + def test_double_precision(self): + double_input = 30.012345678901234 + output = ujson.encode(double_input, double_precision=15) + + assert double_input == json.loads(output) + assert double_input == ujson.decode(output) + + for double_precision in (3, 9): + output = ujson.encode(double_input, double_precision=double_precision) + rounded_input = round(double_input, double_precision) + + assert rounded_input == json.loads(output) + assert rounded_input == ujson.decode(output) + + @pytest.mark.parametrize("invalid_val", [20, -1, "9", None]) + def test_invalid_double_precision(self, invalid_val): + double_input = 30.12345678901234567890 + expected_exception = ValueError if isinstance(invalid_val, int) else TypeError + + with pytest.raises(expected_exception): + ujson.encode(double_input, double_precision=invalid_val) + + def test_encode_string_conversion2(self): + string_input = "A string \\ / \b \f \n \r \t" + output = ujson.encode(string_input) + + assert string_input == json.loads(output) + assert string_input == ujson.decode(output) + assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"' + + @pytest.mark.parametrize( + "unicode_input", + ["RäksmörgÃ¥s اسامة بن محمد بن عوض بن لادن", "\xe6\x97\xa5\xd1\x88"], + ) + def test_encode_unicode_conversion(self, unicode_input): + enc = ujson.encode(unicode_input) + dec = ujson.decode(enc) + + assert enc == json.dumps(unicode_input) + assert dec == json.loads(enc) + + def test_encode_control_escaping(self): + escaped_input = "\x19" + enc = ujson.encode(escaped_input) + dec = ujson.decode(enc) + + assert escaped_input == dec + assert enc == json.dumps(escaped_input) + + def test_encode_unicode_surrogate_pair(self): + surrogate_input = "\xf0\x90\x8d\x86" + enc = ujson.encode(surrogate_input) + dec = ujson.decode(enc) + + assert enc == json.dumps(surrogate_input) + assert dec == json.loads(enc) + + def test_encode_unicode_4bytes_utf8(self): + four_bytes_input = "\xf0\x91\x80\xb0TRAILINGNORMAL" + enc = ujson.encode(four_bytes_input) + dec = ujson.decode(enc) + + assert enc == json.dumps(four_bytes_input) + assert dec == json.loads(enc) + + def test_encode_unicode_4bytes_utf8highest(self): + four_bytes_input = "\xf3\xbf\xbf\xbfTRAILINGNORMAL" + enc = ujson.encode(four_bytes_input) + + dec = ujson.decode(enc) + + assert enc == json.dumps(four_bytes_input) + assert dec == json.loads(enc) + + def test_encode_array_in_array(self): + arr_in_arr_input = [[[[]]]] + output = ujson.encode(arr_in_arr_input) + + assert arr_in_arr_input == json.loads(output) + assert output == json.dumps(arr_in_arr_input) + assert arr_in_arr_input == ujson.decode(output) + + tm.assert_numpy_array_equal( + np.array(arr_in_arr_input), ujson.decode(output, numpy=True) + ) + + @pytest.mark.parametrize( + "num_input", + [ + 31337, + -31337, # Negative number. + -9223372036854775808, # Large negative number. + ], + ) + def test_encode_num_conversion(self, num_input): + output = ujson.encode(num_input) + assert num_input == json.loads(output) + assert output == json.dumps(num_input) + assert num_input == ujson.decode(output) + + def test_encode_list_conversion(self): + list_input = [1, 2, 3, 4] + output = ujson.encode(list_input) + + assert list_input == json.loads(output) + assert list_input == ujson.decode(output) + + tm.assert_numpy_array_equal( + np.array(list_input), ujson.decode(output, numpy=True) + ) + + def test_encode_dict_conversion(self): + dict_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4} + output = ujson.encode(dict_input) + + assert dict_input == json.loads(output) + assert dict_input == ujson.decode(output) + + @pytest.mark.parametrize("builtin_value", [None, True, False]) + def test_encode_builtin_values_conversion(self, builtin_value): + output = ujson.encode(builtin_value) + assert builtin_value == json.loads(output) + assert output == json.dumps(builtin_value) + assert builtin_value == ujson.decode(output) + + def test_encode_datetime_conversion(self): + datetime_input = datetime.datetime.fromtimestamp(time.time()) + output = ujson.encode(datetime_input, date_unit="s") + expected = calendar.timegm(datetime_input.utctimetuple()) + + assert int(expected) == json.loads(output) + assert int(expected) == ujson.decode(output) + + def test_encode_date_conversion(self): + date_input = datetime.date.fromtimestamp(time.time()) + output = ujson.encode(date_input, date_unit="s") + + tup = (date_input.year, date_input.month, date_input.day, 0, 0, 0) + expected = calendar.timegm(tup) + + assert int(expected) == json.loads(output) + assert int(expected) == ujson.decode(output) + + @pytest.mark.parametrize( + "test", + [datetime.time(), datetime.time(1, 2, 3), datetime.time(10, 12, 15, 343243)], + ) + def test_encode_time_conversion_basic(self, test): + output = ujson.encode(test) + expected = f'"{test.isoformat()}"' + assert expected == output + + def test_encode_time_conversion_pytz(self): + # see gh-11473: to_json segfaults with timezone-aware datetimes + test = datetime.time(10, 12, 15, 343243, pytz.utc) + output = ujson.encode(test) + expected = f'"{test.isoformat()}"' + assert expected == output + + def test_encode_time_conversion_dateutil(self): + # see gh-11473: to_json segfaults with timezone-aware datetimes + test = datetime.time(10, 12, 15, 343243, dateutil.tz.tzutc()) + output = ujson.encode(test) + expected = f'"{test.isoformat()}"' + assert expected == output + + @pytest.mark.parametrize( + "decoded_input", [NaT, np.datetime64("NaT"), np.nan, np.inf, -np.inf] + ) + def test_encode_as_null(self, decoded_input): + assert ujson.encode(decoded_input) == "null", "Expected null" + + def test_datetime_units(self): + val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) + stamp = Timestamp(val) + + roundtrip = ujson.decode(ujson.encode(val, date_unit="s")) + assert roundtrip == stamp.value // 10 ** 9 + + roundtrip = ujson.decode(ujson.encode(val, date_unit="ms")) + assert roundtrip == stamp.value // 10 ** 6 + + roundtrip = ujson.decode(ujson.encode(val, date_unit="us")) + assert roundtrip == stamp.value // 10 ** 3 + + roundtrip = ujson.decode(ujson.encode(val, date_unit="ns")) + assert roundtrip == stamp.value + + msg = "Invalid value 'foo' for option 'date_unit'" + with pytest.raises(ValueError, match=msg): + ujson.encode(val, date_unit="foo") + + def test_encode_to_utf8(self): + unencoded = "\xe6\x97\xa5\xd1\x88" + + enc = ujson.encode(unencoded, ensure_ascii=False) + dec = ujson.decode(enc) + + assert enc == json.dumps(unencoded, ensure_ascii=False) + assert dec == json.loads(enc) + + def test_decode_from_unicode(self): + unicode_input = '{"obj": 31337}' + + dec1 = ujson.decode(unicode_input) + dec2 = ujson.decode(str(unicode_input)) + + assert dec1 == dec2 + + def test_encode_recursion_max(self): + # 8 is the max recursion depth + + class O2: + member = 0 + pass + + class O1: + member = 0 + pass + + decoded_input = O1() + decoded_input.member = O2() + decoded_input.member.member = decoded_input + + with pytest.raises(OverflowError): + ujson.encode(decoded_input) + + def test_decode_jibberish(self): + jibberish = "fdsa sda v9sa fdsa" + + with pytest.raises(ValueError): + ujson.decode(jibberish) + + @pytest.mark.parametrize( + "broken_json", + [ + "[", # Broken array start. + "{", # Broken object start. + "]", # Broken array end. + "}", # Broken object end. + ], + ) + def test_decode_broken_json(self, broken_json): + with pytest.raises(ValueError): + ujson.decode(broken_json) + + @pytest.mark.parametrize("too_big_char", ["[", "{"]) + def test_decode_depth_too_big(self, too_big_char): + with pytest.raises(ValueError): + ujson.decode(too_big_char * (1024 * 1024)) + + @pytest.mark.parametrize( + "bad_string", + [ + '"TESTING', # Unterminated. + '"TESTING\\"', # Unterminated escape. + "tru", # Broken True. + "fa", # Broken False. + "n", # Broken None. + ], + ) + def test_decode_bad_string(self, bad_string): + with pytest.raises(ValueError): + ujson.decode(bad_string) + + @pytest.mark.parametrize("broken_json", ['{{1337:""}}', '{{"key":"}', "[[[true"]) + def test_decode_broken_json_leak(self, broken_json): + for _ in range(1000): + with pytest.raises(ValueError): + ujson.decode(broken_json) + + @pytest.mark.parametrize( + "invalid_dict", + [ + "{{{{31337}}}}", # No key. + '{{{{"key":}}}}', # No value. + '{{{{"key"}}}}', # No colon or value. + ], + ) + def test_decode_invalid_dict(self, invalid_dict): + with pytest.raises(ValueError): + ujson.decode(invalid_dict) + + @pytest.mark.parametrize( + "numeric_int_as_str", ["31337", "-31337"] # Should work with negatives. + ) + def test_decode_numeric_int(self, numeric_int_as_str): + assert int(numeric_int_as_str) == ujson.decode(numeric_int_as_str) + + def test_encode_null_character(self): + wrapped_input = "31337 \x00 1337" + output = ujson.encode(wrapped_input) + + assert wrapped_input == json.loads(output) + assert output == json.dumps(wrapped_input) + assert wrapped_input == ujson.decode(output) + + alone_input = "\x00" + output = ujson.encode(alone_input) + + assert alone_input == json.loads(output) + assert output == json.dumps(alone_input) + assert alone_input == ujson.decode(output) + assert '" \\u0000\\r\\n "' == ujson.dumps(" \u0000\r\n ") + + def test_decode_null_character(self): + wrapped_input = '"31337 \\u0000 31337"' + assert ujson.decode(wrapped_input) == json.loads(wrapped_input) + + def test_encode_list_long_conversion(self): + long_input = [ + 9223372036854775807, + 9223372036854775807, + 9223372036854775807, + 9223372036854775807, + 9223372036854775807, + 9223372036854775807, + ] + output = ujson.encode(long_input) + + assert long_input == json.loads(output) + assert long_input == ujson.decode(output) + + tm.assert_numpy_array_equal( + np.array(long_input), ujson.decode(output, numpy=True, dtype=np.int64) + ) + + def test_encode_long_conversion(self): + long_input = 9223372036854775807 + output = ujson.encode(long_input) + + assert long_input == json.loads(output) + assert output == json.dumps(long_input) + assert long_input == ujson.decode(output) + + @pytest.mark.parametrize( + "int_exp", ["1337E40", "1.337E40", "1337E+9", "1.337e+40", "1.337E-4"] + ) + def test_decode_numeric_int_exp(self, int_exp): + assert ujson.decode(int_exp) == json.loads(int_exp) + + def test_loads_non_str_bytes_raises(self): + msg = "Expected 'str' or 'bytes'" + with pytest.raises(TypeError, match=msg): + ujson.loads(None) + + def test_encode_numeric_overflow(self): + with pytest.raises(OverflowError): + ujson.encode(12839128391289382193812939) + + def test_encode_numeric_overflow_nested(self): + class Nested: + x = 12839128391289382193812939 + + for _ in range(0, 100): + with pytest.raises(OverflowError): + ujson.encode(Nested()) + + @pytest.mark.parametrize("val", [3590016419, 2 ** 31, 2 ** 32, (2 ** 32) - 1]) + def test_decode_number_with_32bit_sign_bit(self, val): + # Test that numbers that fit within 32 bits but would have the + # sign bit set (2**31 <= x < 2**32) are decoded properly. + doc = f'{{"id": {val}}}' + assert ujson.decode(doc)["id"] == val + + def test_encode_big_escape(self): + # Make sure no Exception is raised. + for _ in range(10): + base = "\u00e5".encode("utf-8") + escape_input = base * 1024 * 1024 * 2 + ujson.encode(escape_input) + + def test_decode_big_escape(self): + # Make sure no Exception is raised. + for _ in range(10): + base = "\u00e5".encode("utf-8") + quote = b'"' + + escape_input = quote + (base * 1024 * 1024 * 2) + quote + ujson.decode(escape_input) + + def test_to_dict(self): + d = {"key": 31337} + + class DictTest: + def toDict(self): + return d + + o = DictTest() + output = ujson.encode(o) + + dec = ujson.decode(output) + assert dec == d + + def test_default_handler(self): + class _TestObject: + def __init__(self, val): + self.val = val + + @property + def recursive_attr(self): + return _TestObject("recursive_attr") + + def __str__(self) -> str: + return str(self.val) + + msg = "Maximum recursion level reached" + with pytest.raises(OverflowError, match=msg): + ujson.encode(_TestObject("foo")) + assert '"foo"' == ujson.encode(_TestObject("foo"), default_handler=str) + + def my_handler(_): + return "foobar" + + assert '"foobar"' == ujson.encode( + _TestObject("foo"), default_handler=my_handler + ) + + def my_handler_raises(_): + raise TypeError("I raise for anything") + + with pytest.raises(TypeError, match="I raise for anything"): + ujson.encode(_TestObject("foo"), default_handler=my_handler_raises) + + def my_int_handler(_): + return 42 + + assert ( + ujson.decode( + ujson.encode(_TestObject("foo"), default_handler=my_int_handler) + ) + == 42 + ) + + def my_obj_handler(_): + return datetime.datetime(2013, 2, 3) + + assert ujson.decode( + ujson.encode(datetime.datetime(2013, 2, 3)) + ) == ujson.decode( + ujson.encode(_TestObject("foo"), default_handler=my_obj_handler) + ) + + obj_list = [_TestObject("foo"), _TestObject("bar")] + assert json.loads(json.dumps(obj_list, default=str)) == ujson.decode( + ujson.encode(obj_list, default_handler=str) + ) + + +class TestNumpyJSONTests: + @pytest.mark.parametrize("bool_input", [True, False]) + def test_bool(self, bool_input): + b = np.bool(bool_input) + assert ujson.decode(ujson.encode(b)) == b + + def test_bool_array(self): + bool_array = np.array( + [True, False, True, True, False, True, False, False], dtype=np.bool + ) + output = np.array(ujson.decode(ujson.encode(bool_array)), dtype=np.bool) + tm.assert_numpy_array_equal(bool_array, output) + + def test_int(self, any_int_dtype): + klass = np.dtype(any_int_dtype).type + num = klass(1) + + assert klass(ujson.decode(ujson.encode(num))) == num + + def test_int_array(self, any_int_dtype): + arr = np.arange(100, dtype=np.int) + arr_input = arr.astype(any_int_dtype) + + arr_output = np.array( + ujson.decode(ujson.encode(arr_input)), dtype=any_int_dtype + ) + tm.assert_numpy_array_equal(arr_input, arr_output) + + def test_int_max(self, any_int_dtype): + if any_int_dtype in ("int64", "uint64") and compat.is_platform_32bit(): + pytest.skip("Cannot test 64-bit integer on 32-bit platform") + + klass = np.dtype(any_int_dtype).type + + # uint64 max will always overflow, + # as it's encoded to signed. + if any_int_dtype == "uint64": + num = np.iinfo("int64").max + else: + num = np.iinfo(any_int_dtype).max + + assert klass(ujson.decode(ujson.encode(num))) == num + + def test_float(self, float_dtype): + klass = np.dtype(float_dtype).type + num = klass(256.2013) + + assert klass(ujson.decode(ujson.encode(num))) == num + + def test_float_array(self, float_dtype): + arr = np.arange(12.5, 185.72, 1.7322, dtype=np.float) + float_input = arr.astype(float_dtype) + + float_output = np.array( + ujson.decode(ujson.encode(float_input, double_precision=15)), + dtype=float_dtype, + ) + tm.assert_almost_equal(float_input, float_output) + + def test_float_max(self, float_dtype): + klass = np.dtype(float_dtype).type + num = klass(np.finfo(float_dtype).max / 10) + + tm.assert_almost_equal( + klass(ujson.decode(ujson.encode(num, double_precision=15))), num + ) + + def test_array_basic(self): + arr = np.arange(96) + arr = arr.reshape((2, 2, 2, 2, 3, 2)) + + tm.assert_numpy_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr) + tm.assert_numpy_array_equal(ujson.decode(ujson.encode(arr), numpy=True), arr) + + @pytest.mark.parametrize("shape", [(10, 10), (5, 5, 4), (100, 1)]) + def test_array_reshaped(self, shape): + arr = np.arange(100) + arr = arr.reshape(shape) + + tm.assert_numpy_array_equal(np.array(ujson.decode(ujson.encode(arr))), arr) + tm.assert_numpy_array_equal(ujson.decode(ujson.encode(arr), numpy=True), arr) + + def test_array_list(self): + arr_list = [ + "a", + list(), + dict(), + dict(), + list(), + 42, + 97.8, + ["a", "b"], + {"key": "val"}, + ] + arr = np.array(arr_list, dtype=object) + result = np.array(ujson.decode(ujson.encode(arr)), dtype=object) + tm.assert_numpy_array_equal(result, arr) + + def test_array_float(self): + dtype = np.float32 + + arr = np.arange(100.202, 200.202, 1, dtype=dtype) + arr = arr.reshape((5, 5, 4)) + + arr_out = np.array(ujson.decode(ujson.encode(arr)), dtype=dtype) + tm.assert_almost_equal(arr, arr_out) + + arr_out = ujson.decode(ujson.encode(arr), numpy=True, dtype=dtype) + tm.assert_almost_equal(arr, arr_out) + + def test_0d_array(self): + # gh-18878 + msg = re.escape("array(1) (0d array) is not JSON serializable at the moment") + with pytest.raises(TypeError, match=msg): + ujson.encode(np.array(1)) + + @pytest.mark.parametrize( + "bad_input,exc_type,kwargs", + [ + ([{}, []], ValueError, {}), + ([42, None], TypeError, {}), + ([["a"], 42], ValueError, {}), + ([42, {}, "a"], TypeError, {}), + ([42, ["a"], 42], ValueError, {}), + (["a", "b", [], "c"], ValueError, {}), + ([{"a": "b"}], ValueError, dict(labelled=True)), + ({"a": {"b": {"c": 42}}}, ValueError, dict(labelled=True)), + ([{"a": 42, "b": 23}, {"c": 17}], ValueError, dict(labelled=True)), + ], + ) + def test_array_numpy_except(self, bad_input, exc_type, kwargs): + with pytest.raises(exc_type): + ujson.decode(ujson.dumps(bad_input), numpy=True, **kwargs) + + def test_array_numpy_labelled(self): + labelled_input = {"a": []} + output = ujson.loads(ujson.dumps(labelled_input), numpy=True, labelled=True) + assert (np.empty((1, 0)) == output[0]).all() + assert (np.array(["a"]) == output[1]).all() + assert output[2] is None + + labelled_input = [{"a": 42}] + output = ujson.loads(ujson.dumps(labelled_input), numpy=True, labelled=True) + assert (np.array(["a"]) == output[2]).all() + assert (np.array([42]) == output[0]).all() + assert output[1] is None + + # see gh-10837: write out the dump explicitly + # so there is no dependency on iteration order + input_dumps = '[{"a": 42, "b":31}, {"a": 24, "c": 99}, {"a": 2.4, "b": 78}]' + output = ujson.loads(input_dumps, numpy=True, labelled=True) + expected_vals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2)) + assert (expected_vals == output[0]).all() + assert output[1] is None + assert (np.array(["a", "b"]) == output[2]).all() + + input_dumps = ( + '{"1": {"a": 42, "b":31}, "2": {"a": 24, "c": 99}, ' + '"3": {"a": 2.4, "b": 78}}' + ) + output = ujson.loads(input_dumps, numpy=True, labelled=True) + expected_vals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2)) + assert (expected_vals == output[0]).all() + assert (np.array(["1", "2", "3"]) == output[1]).all() + assert (np.array(["a", "b"]) == output[2]).all() + + +class TestPandasJSONTests: + def test_dataframe(self, orient, numpy): + if orient == "records" and numpy: + pytest.skip("Not idiomatic pandas") + + df = DataFrame( + [[1, 2, 3], [4, 5, 6]], index=["a", "b"], columns=["x", "y", "z"] + ) + encode_kwargs = {} if orient is None else dict(orient=orient) + decode_kwargs = {} if numpy is None else dict(numpy=numpy) + + output = ujson.decode(ujson.encode(df, **encode_kwargs), **decode_kwargs) + + # Ensure proper DataFrame initialization. + if orient == "split": + dec = _clean_dict(output) + output = DataFrame(**dec) + else: + output = DataFrame(output) + + # Corrections to enable DataFrame comparison. + if orient == "values": + df.columns = [0, 1, 2] + df.index = [0, 1] + elif orient == "records": + df.index = [0, 1] + elif orient == "index": + df = df.transpose() + + tm.assert_frame_equal(output, df, check_dtype=False) + + def test_dataframe_nested(self, orient): + df = DataFrame( + [[1, 2, 3], [4, 5, 6]], index=["a", "b"], columns=["x", "y", "z"] + ) + + nested = {"df1": df, "df2": df.copy()} + kwargs = {} if orient is None else dict(orient=orient) + + exp = { + "df1": ujson.decode(ujson.encode(df, **kwargs)), + "df2": ujson.decode(ujson.encode(df, **kwargs)), + } + assert ujson.decode(ujson.encode(nested, **kwargs)) == exp + + def test_dataframe_numpy_labelled(self, orient): + if orient in ("split", "values"): + pytest.skip("Incompatible with labelled=True") + + df = DataFrame( + [[1, 2, 3], [4, 5, 6]], + index=["a", "b"], + columns=["x", "y", "z"], + dtype=np.int, + ) + kwargs = {} if orient is None else dict(orient=orient) + + output = DataFrame( + *ujson.decode(ujson.encode(df, **kwargs), numpy=True, labelled=True) + ) + + if orient is None: + df = df.T + elif orient == "records": + df.index = [0, 1] + + tm.assert_frame_equal(output, df) + + def test_series(self, orient, numpy): + s = Series( + [10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15] + ).sort_values() + + encode_kwargs = {} if orient is None else dict(orient=orient) + decode_kwargs = {} if numpy is None else dict(numpy=numpy) + + output = ujson.decode(ujson.encode(s, **encode_kwargs), **decode_kwargs) + + if orient == "split": + dec = _clean_dict(output) + output = Series(**dec) + else: + output = Series(output) + + if orient in (None, "index"): + s.name = None + output = output.sort_values() + s.index = ["6", "7", "8", "9", "10", "15"] + elif orient in ("records", "values"): + s.name = None + s.index = [0, 1, 2, 3, 4, 5] + + tm.assert_series_equal(output, s, check_dtype=False) + + def test_series_nested(self, orient): + s = Series( + [10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15] + ).sort_values() + nested = {"s1": s, "s2": s.copy()} + kwargs = {} if orient is None else dict(orient=orient) + + exp = { + "s1": ujson.decode(ujson.encode(s, **kwargs)), + "s2": ujson.decode(ujson.encode(s, **kwargs)), + } + assert ujson.decode(ujson.encode(nested, **kwargs)) == exp + + def test_index(self): + i = Index([23, 45, 18, 98, 43, 11], name="index") + + # Column indexed. + output = Index(ujson.decode(ujson.encode(i)), name="index") + tm.assert_index_equal(i, output) + + output = Index(ujson.decode(ujson.encode(i), numpy=True), name="index") + tm.assert_index_equal(i, output) + + dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"))) + output = Index(**dec) + + tm.assert_index_equal(i, output) + assert i.name == output.name + + dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"), numpy=True)) + output = Index(**dec) + + tm.assert_index_equal(i, output) + assert i.name == output.name + + output = Index(ujson.decode(ujson.encode(i, orient="values")), name="index") + tm.assert_index_equal(i, output) + + output = Index( + ujson.decode(ujson.encode(i, orient="values"), numpy=True), name="index" + ) + tm.assert_index_equal(i, output) + + output = Index(ujson.decode(ujson.encode(i, orient="records")), name="index") + tm.assert_index_equal(i, output) + + output = Index( + ujson.decode(ujson.encode(i, orient="records"), numpy=True), name="index" + ) + tm.assert_index_equal(i, output) + + output = Index(ujson.decode(ujson.encode(i, orient="index")), name="index") + tm.assert_index_equal(i, output) + + output = Index( + ujson.decode(ujson.encode(i, orient="index"), numpy=True), name="index" + ) + tm.assert_index_equal(i, output) + + def test_datetime_index(self): + date_unit = "ns" + + rng = date_range("1/1/2000", periods=20) + encoded = ujson.encode(rng, date_unit=date_unit) + + decoded = DatetimeIndex(np.array(ujson.decode(encoded))) + tm.assert_index_equal(rng, decoded) + + ts = Series(np.random.randn(len(rng)), index=rng) + decoded = Series(ujson.decode(ujson.encode(ts, date_unit=date_unit))) + + idx_values = decoded.index.values.astype(np.int64) + decoded.index = DatetimeIndex(idx_values) + tm.assert_series_equal(ts, decoded) + + @pytest.mark.parametrize( + "invalid_arr", + [ + "[31337,]", # Trailing comma. + "[,31337]", # Leading comma. + "[]]", # Unmatched bracket. + "[,]", # Only comma. + ], + ) + def test_decode_invalid_array(self, invalid_arr): + with pytest.raises(ValueError): + ujson.decode(invalid_arr) + + @pytest.mark.parametrize("arr", [[], [31337]]) + def test_decode_array(self, arr): + assert arr == ujson.decode(str(arr)) + + @pytest.mark.parametrize("extreme_num", [9223372036854775807, -9223372036854775808]) + def test_decode_extreme_numbers(self, extreme_num): + assert extreme_num == ujson.decode(str(extreme_num)) + + @pytest.mark.parametrize( + "too_extreme_num", ["9223372036854775808", "-90223372036854775809"] + ) + def test_decode_too_extreme_numbers(self, too_extreme_num): + with pytest.raises(ValueError): + ujson.decode(too_extreme_num) + + def test_decode_with_trailing_whitespaces(self): + assert {} == ujson.decode("{}\n\t ") + + def test_decode_with_trailing_non_whitespaces(self): + with pytest.raises(ValueError): + ujson.decode("{}\n\t a") + + def test_decode_array_with_big_int(self): + with pytest.raises(ValueError): + ujson.loads("[18446098363113800555]") + + @pytest.mark.parametrize( + "float_number", + [ + 1.1234567893, + 1.234567893, + 1.34567893, + 1.4567893, + 1.567893, + 1.67893, + 1.7893, + 1.893, + 1.3, + ], + ) + @pytest.mark.parametrize("sign", [-1, 1]) + def test_decode_floating_point(self, sign, float_number): + float_number *= sign + tm.assert_almost_equal( + float_number, ujson.loads(str(float_number)), check_less_precise=15 + ) + + def test_encode_big_set(self): + s = set() + + for x in range(0, 100000): + s.add(x) + + # Make sure no Exception is raised. + ujson.encode(s) + + def test_encode_empty_set(self): + assert "[]" == ujson.encode(set()) + + def test_encode_set(self): + s = {1, 2, 3, 4, 5, 6, 7, 8, 9} + enc = ujson.encode(s) + dec = ujson.decode(enc) + + for v in dec: + assert v in s diff --git a/pandas/tests/io/parser/__init__.py b/pandas/tests/io/parser/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py new file mode 100644 index 00000000..15967e3b --- /dev/null +++ b/pandas/tests/io/parser/conftest.py @@ -0,0 +1,123 @@ +import os +from typing import List, Optional + +import pytest + +from pandas import read_csv, read_table + + +class BaseParser: + engine: Optional[str] = None + low_memory = True + float_precision_choices: List[Optional[str]] = [] + + def update_kwargs(self, kwargs): + kwargs = kwargs.copy() + kwargs.update(dict(engine=self.engine, low_memory=self.low_memory)) + + return kwargs + + def read_csv(self, *args, **kwargs): + kwargs = self.update_kwargs(kwargs) + return read_csv(*args, **kwargs) + + def read_table(self, *args, **kwargs): + kwargs = self.update_kwargs(kwargs) + return read_table(*args, **kwargs) + + +class CParser(BaseParser): + engine = "c" + float_precision_choices = [None, "high", "round_trip"] + + +class CParserHighMemory(CParser): + low_memory = False + + +class CParserLowMemory(CParser): + low_memory = True + + +class PythonParser(BaseParser): + engine = "python" + float_precision_choices = [None] + + +@pytest.fixture +def csv_dir_path(datapath): + """ + The directory path to the data files needed for parser tests. + """ + return datapath("io", "parser", "data") + + +@pytest.fixture +def csv1(csv_dir_path): + """ + The path to the data file "test1.csv" needed for parser tests. + """ + return os.path.join(csv_dir_path, "test1.csv") + + +_cParserHighMemory = CParserHighMemory() +_cParserLowMemory = CParserLowMemory() +_pythonParser = PythonParser() + +_py_parsers_only = [_pythonParser] +_c_parsers_only = [_cParserHighMemory, _cParserLowMemory] +_all_parsers = [*_c_parsers_only, *_py_parsers_only] + +_py_parser_ids = ["python"] +_c_parser_ids = ["c_high", "c_low"] +_all_parser_ids = [*_c_parser_ids, *_py_parser_ids] + + +@pytest.fixture(params=_all_parsers, ids=_all_parser_ids) +def all_parsers(request): + """ + Fixture all of the CSV parsers. + """ + return request.param + + +@pytest.fixture(params=_c_parsers_only, ids=_c_parser_ids) +def c_parser_only(request): + """ + Fixture all of the CSV parsers using the C engine. + """ + return request.param + + +@pytest.fixture(params=_py_parsers_only, ids=_py_parser_ids) +def python_parser_only(request): + """ + Fixture all of the CSV parsers using the Python engine. + """ + return request.param + + +_utf_values = [8, 16, 32] + +_encoding_seps = ["", "-", "_"] +_encoding_prefixes = ["utf", "UTF"] + +_encoding_fmts = [ + f"{prefix}{sep}" + "{0}" for sep in _encoding_seps for prefix in _encoding_prefixes +] + + +@pytest.fixture(params=_utf_values) +def utf_value(request): + """ + Fixture for all possible integer values for a UTF encoding. + """ + return request.param + + +@pytest.fixture(params=_encoding_fmts) +def encoding_fmt(request): + """ + Fixture for all possible string formats of a UTF encoding. + """ + return request.param diff --git a/pandas/tests/io/parser/data/items.jsonl b/pandas/tests/io/parser/data/items.jsonl new file mode 100644 index 00000000..f784d37b --- /dev/null +++ b/pandas/tests/io/parser/data/items.jsonl @@ -0,0 +1,2 @@ +{"a": 1, "b": 2} +{"b":2, "a" :1} diff --git a/pandas/tests/io/parser/data/salaries.csv b/pandas/tests/io/parser/data/salaries.csv new file mode 100644 index 00000000..85631704 --- /dev/null +++ b/pandas/tests/io/parser/data/salaries.csv @@ -0,0 +1,47 @@ +S X E M +13876 1 1 1 +11608 1 3 0 +18701 1 3 1 +11283 1 2 0 +11767 1 3 0 +20872 2 2 1 +11772 2 2 0 +10535 2 1 0 +12195 2 3 0 +12313 3 2 0 +14975 3 1 1 +21371 3 2 1 +19800 3 3 1 +11417 4 1 0 +20263 4 3 1 +13231 4 3 0 +12884 4 2 0 +13245 5 2 0 +13677 5 3 0 +15965 5 1 1 +12336 6 1 0 +21352 6 3 1 +13839 6 2 0 +22884 6 2 1 +16978 7 1 1 +14803 8 2 0 +17404 8 1 1 +22184 8 3 1 +13548 8 1 0 +14467 10 1 0 +15942 10 2 0 +23174 10 3 1 +23780 10 2 1 +25410 11 2 1 +14861 11 1 0 +16882 12 2 0 +24170 12 3 1 +15990 13 1 0 +26330 13 2 1 +17949 14 2 0 +25685 15 3 1 +27837 16 2 1 +18838 16 2 0 +17483 16 1 0 +19207 17 2 0 +19346 20 1 0 diff --git a/pandas/tests/io/parser/data/salaries.csv.bz2 b/pandas/tests/io/parser/data/salaries.csv.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..a68b4e62bf34a64118a776c3575a848d2dee3eea GIT binary patch literal 283 zcmV+$0p$KdT4*^jL0KkKSwin59RLA)-T(kFKmdQ>0s;s?FadjPCWfk?`V564U}|{@ zw3NibCX|?(r>UmYq#z#udx$b-45?apVahHUu7f_`FRN_siIg?f?>dqH$ITaOEQHr9du@O{PIlb?An)*U(TfOTX zbpsCAbm5!s!W(C(!5NmFQ?yeZP^@L8$($VyP+WvF%a>z(y78UnoV7}xCJwa<@d0TE z#Mku-jM^4(+(0?TNRg(UtccGpkw+iwFoFZmv}T19M?)VRCscbYWs_WdKc5yLH4c46A)p+z9-W8X#q=G}(WW zNF>F}doB!s0w~x&&+qf~e7}&x?fqasFGPb#!0!h(4hOtAb&7m@sC0DmfAa+4lp^)K zfAErX(&iCgC#aB=2ENaD_8 zlAWg?Z0?DBUYC{LeG&8S&n6F9De=tzByr!LFbZFP zmAHELCHXyI2wg%&^ai{=izo|X515HVsN{K^B_#}tXj6+g(tK9htwRyLm#%dPo??u- zi8l2myBVD7Ni9yVI*WPmxyz@l2#z7Do%|c8eq03Sh|k1gIpfpEA5VELulWK102wEc A!2kdN literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/data/salaries.csv.xz b/pandas/tests/io/parser/data/salaries.csv.xz new file mode 100644 index 0000000000000000000000000000000000000000..40df8e8f936dc027143334a54a30d08223afddad GIT binary patch literal 336 zcmV-W0k8i3H+ooF000E$*0e?f03iVu0001VFXf})0{8(BT>vS9M@c>Pbnrp>s&pj? z?{#o?>=k#NICS!%dd2YXQg%@sI>ulS#?Cxh1cpnuCht<_GBz9_iVNOp*glYq|CXA11Wb zPp-Z_P{&*cfI&}IE_@P6UJ!a+r?9`gmV_zs%8-StgMBYn%D83mo{#8c-x{fIE_I}t%QC)7UlykOvN~LyGyJ4luv^P5Df@1 zSoyPi3vu5SCKqWQha7lxYqoESQ@84CB2x0;fz$N16pb5^?-^cbb$+%$5YC*l#Q*?h iBLnR-8?NpE0jmP}1pok6{p&Ze#Ao{g000001X)`D)|(0d literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/data/salaries.csv.zip b/pandas/tests/io/parser/data/salaries.csv.zip new file mode 100644 index 0000000000000000000000000000000000000000..294f65b36771d2cfba97a51f9fa6df494d511fc2 GIT binary patch literal 445 zcmWIWW@Zs#U|`^2NDD~z^sm%i&(Fxf@RNywfrmkcp*S%ou_!aOSg)kGEHs3Zfq8+m zbi|95G7+T}+zgB?FPIq^z=Utm?$RR$JgaMi?{NH?ETxdT%H(GKBoCFNr>pgN8kjV0 z*q?v@`D@Mo35VbQO|aK@Y4$kqp52U(eUC+HS55Vbl9JE$pIIJGQTn>4{=-c6)LcWu zhgx?kkDTDFikp_*E^qHtw&&N!c*Y4o-l^~SKJm4`!?Dij=duNb&s;qoolBlHce#Ad zyUyyk=~M633VwcfUX4Fe^W$0WxX#o4TIbX1qxb#(_HlAUt&!S&R+Ym0^9{J->!}4{?zXs86LxE@v*6z8>q6LW?aE4 o0Ss;i24E;NENKL>Py?A263A!)9N^8$22#righ4<$6r`O205eCa6aWAK literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/data/sauron.SHIFT_JIS.csv b/pandas/tests/io/parser/data/sauron.SHIFT_JIS.csv new file mode 100644 index 00000000..218ddf33 --- /dev/null +++ b/pandas/tests/io/parser/data/sauron.SHIFT_JIS.csv @@ -0,0 +1,14 @@ +num, text +1,ƒTƒEƒƒ“iSauronAƒAƒCƒkƒA‚Ì‘n‘¢‚ÌŽž - ‘æŽO‹I3019”N3ŒŽ25“új‚́AJEREREƒg[ƒ‹ƒLƒ“‚Ì’†‚‘‚ð•‘‘ä‚Æ‚µ‚½¬àwƒzƒrƒbƒg‚Ì–`Œ¯xwŽw—Ö•¨ŒêxwƒVƒ‹ƒ}ƒŠƒ‹‚Ì•¨Œêx‚Ì“oêl•¨B +2,wƒzƒrƒbƒg‚Ì–`Œ¯x‚ÉŒ¾‹y‚Ì‚ ‚éuŽ€l‚¤‚ç‚È‚¢Žtvi‰f‰æwƒzƒrƒbƒgƒVƒŠ[ƒYx‚ÌŽš–‹‚Å‚ÍuŽ€lŒ­‚¢iƒlƒNƒƒ}ƒ“ƒT[jvj‚Ƃ͔ނ̂±‚Ƃł ‚éB +3,‚»‚Ì‘±•Ò‚Å‚ ‚éwŽw—Ö•¨Œêx‚É‚¨‚¢‚Ắuˆê‚‚̎w—ցithe One Ringjv‚̍ì‚èŽåAu–»‰¤iDark LordjvAu‚©‚̎ҁithe Onej[1]v‚Æ‚µ‚Ä“oê‚·‚éB‘OŽj‚É‚ ‚½‚éwƒVƒ‹ƒ}ƒŠƒ‹‚Ì•¨Œêx‚ł́A‰‘ã‚Ì–»‰¤ƒ‚ƒ‹ƒSƒX‚̍łà—Í‚ ‚鑤‹ß‚Å‚ ‚Á‚½B +4,ƒTƒEƒƒ“‚ÍŒ³—ˆAƒAƒ‹ƒ_i’n‹…j‚Ì‘n‘¢‚ð’S‚Á‚½“VŽg“IŽí‘°ƒAƒCƒkƒA‚̈êˆõ‚Å‚ ‚Á‚½‚ªAŽåƒƒ‹ƒR[ƒ‹‚Ì”½‹t‚ɉÁ’S‚µ‚Ä‘Â—Ž‚µAƒAƒ‹ƒ_‚ÉŠQ‚ð‚È‚·‘¶Ý‚ƂȂÁ‚½B +5,uƒTƒEƒƒ“v‚Ƃ̓NƒEƒFƒ“ƒ„‚Ług‚̖т̂悾‚‚à‚́v‚Æ‚¢‚¤ˆÓ–¡‚Å‚ ‚èAƒVƒ“ƒ_ƒŠƒ“‚Å“¯—l‚̈Ӗ¡‚Å‚ ‚é–¼‘OuƒSƒ‹ƒTƒEƒAv‚ƌĂ΂ê‚邱‚Æ‚à‚ ‚éB +6,‚±‚ê‚ç‚́AƒTƒEƒƒ“‚ð‹°‚êAŠõ‚ÝŒ™‚Á‚½ƒGƒ‹ƒt‚É‚æ‚é–¼‚Å‚ ‚èAwŽw—Ö•¨Œêxì’†‚É‚¨‚¢‚ăAƒ‰ƒSƒ‹ƒ“‚́u‚©‚êiƒTƒEƒƒ“j‚ÍŽ©•ª‚Ì–{“–‚Ì–¼‚ÍŽg‚í‚È‚¢‚µA‚»‚ê‚ðŽš‚É‘‚¢‚½‚èŒû‚ɏo‚µ‚½‚è‚·‚邱‚Æ‚à‹–‚³‚È‚¢v‚Æ”­Œ¾‚µ‚Ä‚¢‚éB +7,‚»‚̂ق©A‘æ“ñ‹I‚ɃGƒ‹ƒt‚ɑ΂µ‚ÄŽ©Ì‚µ‚½‚Æ‚³‚ê‚é–¼‚ɁAuƒAƒ“ƒiƒ^[ƒ‹i•¨‘¡‚éŒNjvAuƒAƒ‹ƒ^ƒmi‚‹M‚ȍ׍HŽtjvAuƒAƒEƒŒƒ“ƒfƒBƒ‹iƒAƒEƒŒ‚̉º–ljv‚ª‚ ‚éB +8,‘æˆê‹I‚̍ ‚̃TƒEƒƒ“‚́AŽ©Ý‚ɕϐg‚·‚é”\—Í‚ðŽ‚Á‚Ä‚¢‚½B +9,‚»‚Ì”\—Í‚ðŽg‚¦‚ÎŒ©–ڗ킵‚¢—§”h‚ÈŠOŒ©‚ð‘•‚¤‚±‚Æ‚âA‚Ü‚½‹‘å‚ȘT‚â‹zŒŒ‚±‚¤‚à‚è‚Æ‚¢‚Á‚½‰ö•¨‚ɕς¶‚邱‚Æ‚à‚Å‚«AƒGƒ‹ƒt‚©‚ç‹°‚ê‚ç‚ꂽB +10,‘æ“ñ‹I‚Ɉê‚‚̎w—Ö‚ðì‚èã‚°‚½ƒTƒEƒƒ“‚́A‘¼‚̗͂̎w—ւŐ¬‚³‚ê‚鎖•¿‚â‚»‚ÌŠ—LŽÒ‚ðŽx”z‚Å‚«‚邿‚¤‚ɂȂÁ‚½B +11,‚Ü‚½A“÷‘Ì‚ª–łтĂàŽw—Ö‚ª‚ ‚éŒÀ‚艽“x‚Å‚à‘h‚邱‚Æ‚ª‚Å‚«‚½B +12,‚½‚¾‚µƒk[ƒƒm[ƒ‹–v—Ž‚ÌÛ‚É”ü‚µ‚¢“÷‘Ì‚ð”j‰ó‚³‚ꂽŒã‚́A“ñ“x‚Æ”ü‚µ‚­•ϐg‚·‚邱‚Ƃ͂ł«‚È‚­‚È‚èA‚»‚̈«ˆÓ‚̋‚̂悤‚ÈŒ©‚é‚à‹°‚낵‚¢Žp‚µ‚©‚Æ‚ê‚È‚­‚È‚Á‚½‚Æ‚¢‚¤B +13,‚Ü‚½‚µ‚΂µ‚΁u‚܂Ԃ½‚̂Ȃ¢‰Î‚ɉŽæ‚ç‚ꂽ–ځv‚Æ‚¢‚Á‚½SÛ•\Œ»‚Å‘¨‚¦‚ç‚ꂽB diff --git a/pandas/tests/io/parser/data/sub_char.csv b/pandas/tests/io/parser/data/sub_char.csv new file mode 100644 index 00000000..ff1fa777 --- /dev/null +++ b/pandas/tests/io/parser/data/sub_char.csv @@ -0,0 +1,2 @@ +a,"b",c +1,2,3 \ No newline at end of file diff --git a/pandas/tests/io/parser/data/tar_csv.tar b/pandas/tests/io/parser/data/tar_csv.tar new file mode 100644 index 0000000000000000000000000000000000000000..d1819550e0a0064b4d9ad829f120e49760c3ffe2 GIT binary patch literal 10240 zcmeIuK?;O03_#JW1@F)k3{BNsM}nR}J9B-TY7p4K!(9_GO$s`)68z@>0YS zW+wk!;+jjzL^~}framQ!s=W;o;!FQIwf(Nymk>_1JC}X6!*X|eRCwcUqis`RFe4E_ x009ILKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R#|0009IZ32f(E6h{C6 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/data/tar_csv.tar.gz b/pandas/tests/io/parser/data/tar_csv.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..80505d345f1e2ffd298f35c9881d548b4e686676 GIT binary patch literal 117 zcmb2|=3oE;Cg!*24Y?W&1Y9nzWqm(E*kjs^Cl;GtaGF%rEuI>&TH(+2KS`@J&G;va zzCM;ZdHN5nr6t>TeUET$&f>6aztJ7uYuEkxQ)8*7nf3P*CtvTJzhT?OZGvEvVZ?#+ OZ+7nsQDM+vU;qF|YA}lc literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/data/test1.csv b/pandas/tests/io/parser/data/test1.csv new file mode 100644 index 00000000..4bdb6294 --- /dev/null +++ b/pandas/tests/io/parser/data/test1.csv @@ -0,0 +1,8 @@ +index,A,B,C,D +2000-01-03 00:00:00,0.980268513777,3.68573087906,-0.364216805298,-1.15973806169 +2000-01-04 00:00:00,1.04791624281,-0.0412318367011,-0.16181208307,0.212549316967 +2000-01-05 00:00:00,0.498580885705,0.731167677815,-0.537677223318,1.34627041952 +2000-01-06 00:00:00,1.12020151869,1.56762092543,0.00364077397681,0.67525259227 +2000-01-07 00:00:00,-0.487094399463,0.571454623474,-1.6116394093,0.103468562917 +2000-01-10 00:00:00,0.836648671666,0.246461918642,0.588542635376,1.0627820613 +2000-01-11 00:00:00,-0.157160753327,1.34030689438,1.19577795622,-1.09700699751 \ No newline at end of file diff --git a/pandas/tests/io/parser/data/test1.csv.bz2 b/pandas/tests/io/parser/data/test1.csv.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..f96f26a8e741907243c32845024b7277f0680005 GIT binary patch literal 307 zcmV-30nGkFT4*^jL0KkKSW1Ji@9)pArt5lpH4IRO00Iz@ zAlz^!=a51W3H02z!+mSBeK%(2i;3qroLGpgc)ZhAta;6*(Kx-+d8k8stbmZHPIKD0 z%{khavXfep_f#A?g|OEiy6GMb+kPrC)~cTlb6Q1R&R1#Y8j1mNvJ1wwH#o39Ih8E! zh)Zn6%{yHy5rH`%)11mfB0$%H6FSJnOFqhY6AzMzlFupWZ6*MkRe{ySGvcT?8Y%YR zy=rhGo@XLhosGo#8?jc!M4$nL1(98A8>(wY4d^qqg(%E!y5l#$-h21{F64@Ep&_5g FVD{@glUM)% literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/data/test1.csv.gz b/pandas/tests/io/parser/data/test1.csv.gz new file mode 100644 index 0000000000000000000000000000000000000000..1336db6e2af7e99bbcfc1105a34bfc575dc36e39 GIT binary patch literal 294 zcmV+>0onc^iwFSY)EiX*167eZQpG?FL~~BT0miP@){@9rjsgJ*F~>*q6PvOFYh3mE zsptFW^XqrLuDA8RKAsf70XbmLz{}%ZIJ=;%5X;PE=X|E2vcYxWG`b24pMTd1xj8vqwIDPuw&2SO=Plp!ZwMZ?<$$Gm`X7xbwUQ8jm3vxQ8$JTYzrw3Qs-g=ik%a+b{ s*hQ0nb}~LhhMDH09vE2TyUUuE=GJ?BbgQBhV0VW60b<1z@;U+l0HUsoVE_OC literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/data/test2.csv b/pandas/tests/io/parser/data/test2.csv new file mode 100644 index 00000000..6f914115 --- /dev/null +++ b/pandas/tests/io/parser/data/test2.csv @@ -0,0 +1,6 @@ +A,B,C,D,E +2000-01-03 00:00:00,0.980268513777,3.68573087906,-0.364216805298,-1.15973806169,foo +2000-01-04 00:00:00,1.04791624281,-0.0412318367011,-0.16181208307,0.212549316967,bar +2000-01-05 00:00:00,0.498580885705,0.731167677815,-0.537677223318,1.34627041952,baz +2000-01-06 00:00:00,1.12020151869,1.56762092543,0.00364077397681,0.67525259227,qux +2000-01-07 00:00:00,-0.487094399463,0.571454623474,-1.6116394093,0.103468562917,foo2 diff --git a/pandas/tests/io/parser/data/test_mmap.csv b/pandas/tests/io/parser/data/test_mmap.csv new file mode 100644 index 00000000..2885fc2b --- /dev/null +++ b/pandas/tests/io/parser/data/test_mmap.csv @@ -0,0 +1,4 @@ +a,b,c +1,one,I +2,two,II +3,three,III diff --git a/pandas/tests/io/parser/data/tips.csv b/pandas/tests/io/parser/data/tips.csv new file mode 100644 index 00000000..856a65a6 --- /dev/null +++ b/pandas/tests/io/parser/data/tips.csv @@ -0,0 +1,245 @@ +total_bill,tip,sex,smoker,day,time,size +16.99,1.01,Female,No,Sun,Dinner,2 +10.34,1.66,Male,No,Sun,Dinner,3 +21.01,3.5,Male,No,Sun,Dinner,3 +23.68,3.31,Male,No,Sun,Dinner,2 +24.59,3.61,Female,No,Sun,Dinner,4 +25.29,4.71,Male,No,Sun,Dinner,4 +8.77,2.0,Male,No,Sun,Dinner,2 +26.88,3.12,Male,No,Sun,Dinner,4 +15.04,1.96,Male,No,Sun,Dinner,2 +14.78,3.23,Male,No,Sun,Dinner,2 +10.27,1.71,Male,No,Sun,Dinner,2 +35.26,5.0,Female,No,Sun,Dinner,4 +15.42,1.57,Male,No,Sun,Dinner,2 +18.43,3.0,Male,No,Sun,Dinner,4 +14.83,3.02,Female,No,Sun,Dinner,2 +21.58,3.92,Male,No,Sun,Dinner,2 +10.33,1.67,Female,No,Sun,Dinner,3 +16.29,3.71,Male,No,Sun,Dinner,3 +16.97,3.5,Female,No,Sun,Dinner,3 +20.65,3.35,Male,No,Sat,Dinner,3 +17.92,4.08,Male,No,Sat,Dinner,2 +20.29,2.75,Female,No,Sat,Dinner,2 +15.77,2.23,Female,No,Sat,Dinner,2 +39.42,7.58,Male,No,Sat,Dinner,4 +19.82,3.18,Male,No,Sat,Dinner,2 +17.81,2.34,Male,No,Sat,Dinner,4 +13.37,2.0,Male,No,Sat,Dinner,2 +12.69,2.0,Male,No,Sat,Dinner,2 +21.7,4.3,Male,No,Sat,Dinner,2 +19.65,3.0,Female,No,Sat,Dinner,2 +9.55,1.45,Male,No,Sat,Dinner,2 +18.35,2.5,Male,No,Sat,Dinner,4 +15.06,3.0,Female,No,Sat,Dinner,2 +20.69,2.45,Female,No,Sat,Dinner,4 +17.78,3.27,Male,No,Sat,Dinner,2 +24.06,3.6,Male,No,Sat,Dinner,3 +16.31,2.0,Male,No,Sat,Dinner,3 +16.93,3.07,Female,No,Sat,Dinner,3 +18.69,2.31,Male,No,Sat,Dinner,3 +31.27,5.0,Male,No,Sat,Dinner,3 +16.04,2.24,Male,No,Sat,Dinner,3 +17.46,2.54,Male,No,Sun,Dinner,2 +13.94,3.06,Male,No,Sun,Dinner,2 +9.68,1.32,Male,No,Sun,Dinner,2 +30.4,5.6,Male,No,Sun,Dinner,4 +18.29,3.0,Male,No,Sun,Dinner,2 +22.23,5.0,Male,No,Sun,Dinner,2 +32.4,6.0,Male,No,Sun,Dinner,4 +28.55,2.05,Male,No,Sun,Dinner,3 +18.04,3.0,Male,No,Sun,Dinner,2 +12.54,2.5,Male,No,Sun,Dinner,2 +10.29,2.6,Female,No,Sun,Dinner,2 +34.81,5.2,Female,No,Sun,Dinner,4 +9.94,1.56,Male,No,Sun,Dinner,2 +25.56,4.34,Male,No,Sun,Dinner,4 +19.49,3.51,Male,No,Sun,Dinner,2 +38.01,3.0,Male,Yes,Sat,Dinner,4 +26.41,1.5,Female,No,Sat,Dinner,2 +11.24,1.76,Male,Yes,Sat,Dinner,2 +48.27,6.73,Male,No,Sat,Dinner,4 +20.29,3.21,Male,Yes,Sat,Dinner,2 +13.81,2.0,Male,Yes,Sat,Dinner,2 +11.02,1.98,Male,Yes,Sat,Dinner,2 +18.29,3.76,Male,Yes,Sat,Dinner,4 +17.59,2.64,Male,No,Sat,Dinner,3 +20.08,3.15,Male,No,Sat,Dinner,3 +16.45,2.47,Female,No,Sat,Dinner,2 +3.07,1.0,Female,Yes,Sat,Dinner,1 +20.23,2.01,Male,No,Sat,Dinner,2 +15.01,2.09,Male,Yes,Sat,Dinner,2 +12.02,1.97,Male,No,Sat,Dinner,2 +17.07,3.0,Female,No,Sat,Dinner,3 +26.86,3.14,Female,Yes,Sat,Dinner,2 +25.28,5.0,Female,Yes,Sat,Dinner,2 +14.73,2.2,Female,No,Sat,Dinner,2 +10.51,1.25,Male,No,Sat,Dinner,2 +17.92,3.08,Male,Yes,Sat,Dinner,2 +27.2,4.0,Male,No,Thur,Lunch,4 +22.76,3.0,Male,No,Thur,Lunch,2 +17.29,2.71,Male,No,Thur,Lunch,2 +19.44,3.0,Male,Yes,Thur,Lunch,2 +16.66,3.4,Male,No,Thur,Lunch,2 +10.07,1.83,Female,No,Thur,Lunch,1 +32.68,5.0,Male,Yes,Thur,Lunch,2 +15.98,2.03,Male,No,Thur,Lunch,2 +34.83,5.17,Female,No,Thur,Lunch,4 +13.03,2.0,Male,No,Thur,Lunch,2 +18.28,4.0,Male,No,Thur,Lunch,2 +24.71,5.85,Male,No,Thur,Lunch,2 +21.16,3.0,Male,No,Thur,Lunch,2 +28.97,3.0,Male,Yes,Fri,Dinner,2 +22.49,3.5,Male,No,Fri,Dinner,2 +5.75,1.0,Female,Yes,Fri,Dinner,2 +16.32,4.3,Female,Yes,Fri,Dinner,2 +22.75,3.25,Female,No,Fri,Dinner,2 +40.17,4.73,Male,Yes,Fri,Dinner,4 +27.28,4.0,Male,Yes,Fri,Dinner,2 +12.03,1.5,Male,Yes,Fri,Dinner,2 +21.01,3.0,Male,Yes,Fri,Dinner,2 +12.46,1.5,Male,No,Fri,Dinner,2 +11.35,2.5,Female,Yes,Fri,Dinner,2 +15.38,3.0,Female,Yes,Fri,Dinner,2 +44.3,2.5,Female,Yes,Sat,Dinner,3 +22.42,3.48,Female,Yes,Sat,Dinner,2 +20.92,4.08,Female,No,Sat,Dinner,2 +15.36,1.64,Male,Yes,Sat,Dinner,2 +20.49,4.06,Male,Yes,Sat,Dinner,2 +25.21,4.29,Male,Yes,Sat,Dinner,2 +18.24,3.76,Male,No,Sat,Dinner,2 +14.31,4.0,Female,Yes,Sat,Dinner,2 +14.0,3.0,Male,No,Sat,Dinner,2 +7.25,1.0,Female,No,Sat,Dinner,1 +38.07,4.0,Male,No,Sun,Dinner,3 +23.95,2.55,Male,No,Sun,Dinner,2 +25.71,4.0,Female,No,Sun,Dinner,3 +17.31,3.5,Female,No,Sun,Dinner,2 +29.93,5.07,Male,No,Sun,Dinner,4 +10.65,1.5,Female,No,Thur,Lunch,2 +12.43,1.8,Female,No,Thur,Lunch,2 +24.08,2.92,Female,No,Thur,Lunch,4 +11.69,2.31,Male,No,Thur,Lunch,2 +13.42,1.68,Female,No,Thur,Lunch,2 +14.26,2.5,Male,No,Thur,Lunch,2 +15.95,2.0,Male,No,Thur,Lunch,2 +12.48,2.52,Female,No,Thur,Lunch,2 +29.8,4.2,Female,No,Thur,Lunch,6 +8.52,1.48,Male,No,Thur,Lunch,2 +14.52,2.0,Female,No,Thur,Lunch,2 +11.38,2.0,Female,No,Thur,Lunch,2 +22.82,2.18,Male,No,Thur,Lunch,3 +19.08,1.5,Male,No,Thur,Lunch,2 +20.27,2.83,Female,No,Thur,Lunch,2 +11.17,1.5,Female,No,Thur,Lunch,2 +12.26,2.0,Female,No,Thur,Lunch,2 +18.26,3.25,Female,No,Thur,Lunch,2 +8.51,1.25,Female,No,Thur,Lunch,2 +10.33,2.0,Female,No,Thur,Lunch,2 +14.15,2.0,Female,No,Thur,Lunch,2 +16.0,2.0,Male,Yes,Thur,Lunch,2 +13.16,2.75,Female,No,Thur,Lunch,2 +17.47,3.5,Female,No,Thur,Lunch,2 +34.3,6.7,Male,No,Thur,Lunch,6 +41.19,5.0,Male,No,Thur,Lunch,5 +27.05,5.0,Female,No,Thur,Lunch,6 +16.43,2.3,Female,No,Thur,Lunch,2 +8.35,1.5,Female,No,Thur,Lunch,2 +18.64,1.36,Female,No,Thur,Lunch,3 +11.87,1.63,Female,No,Thur,Lunch,2 +9.78,1.73,Male,No,Thur,Lunch,2 +7.51,2.0,Male,No,Thur,Lunch,2 +14.07,2.5,Male,No,Sun,Dinner,2 +13.13,2.0,Male,No,Sun,Dinner,2 +17.26,2.74,Male,No,Sun,Dinner,3 +24.55,2.0,Male,No,Sun,Dinner,4 +19.77,2.0,Male,No,Sun,Dinner,4 +29.85,5.14,Female,No,Sun,Dinner,5 +48.17,5.0,Male,No,Sun,Dinner,6 +25.0,3.75,Female,No,Sun,Dinner,4 +13.39,2.61,Female,No,Sun,Dinner,2 +16.49,2.0,Male,No,Sun,Dinner,4 +21.5,3.5,Male,No,Sun,Dinner,4 +12.66,2.5,Male,No,Sun,Dinner,2 +16.21,2.0,Female,No,Sun,Dinner,3 +13.81,2.0,Male,No,Sun,Dinner,2 +17.51,3.0,Female,Yes,Sun,Dinner,2 +24.52,3.48,Male,No,Sun,Dinner,3 +20.76,2.24,Male,No,Sun,Dinner,2 +31.71,4.5,Male,No,Sun,Dinner,4 +10.59,1.61,Female,Yes,Sat,Dinner,2 +10.63,2.0,Female,Yes,Sat,Dinner,2 +50.81,10.0,Male,Yes,Sat,Dinner,3 +15.81,3.16,Male,Yes,Sat,Dinner,2 +7.25,5.15,Male,Yes,Sun,Dinner,2 +31.85,3.18,Male,Yes,Sun,Dinner,2 +16.82,4.0,Male,Yes,Sun,Dinner,2 +32.9,3.11,Male,Yes,Sun,Dinner,2 +17.89,2.0,Male,Yes,Sun,Dinner,2 +14.48,2.0,Male,Yes,Sun,Dinner,2 +9.6,4.0,Female,Yes,Sun,Dinner,2 +34.63,3.55,Male,Yes,Sun,Dinner,2 +34.65,3.68,Male,Yes,Sun,Dinner,4 +23.33,5.65,Male,Yes,Sun,Dinner,2 +45.35,3.5,Male,Yes,Sun,Dinner,3 +23.17,6.5,Male,Yes,Sun,Dinner,4 +40.55,3.0,Male,Yes,Sun,Dinner,2 +20.69,5.0,Male,No,Sun,Dinner,5 +20.9,3.5,Female,Yes,Sun,Dinner,3 +30.46,2.0,Male,Yes,Sun,Dinner,5 +18.15,3.5,Female,Yes,Sun,Dinner,3 +23.1,4.0,Male,Yes,Sun,Dinner,3 +15.69,1.5,Male,Yes,Sun,Dinner,2 +19.81,4.19,Female,Yes,Thur,Lunch,2 +28.44,2.56,Male,Yes,Thur,Lunch,2 +15.48,2.02,Male,Yes,Thur,Lunch,2 +16.58,4.0,Male,Yes,Thur,Lunch,2 +7.56,1.44,Male,No,Thur,Lunch,2 +10.34,2.0,Male,Yes,Thur,Lunch,2 +43.11,5.0,Female,Yes,Thur,Lunch,4 +13.0,2.0,Female,Yes,Thur,Lunch,2 +13.51,2.0,Male,Yes,Thur,Lunch,2 +18.71,4.0,Male,Yes,Thur,Lunch,3 +12.74,2.01,Female,Yes,Thur,Lunch,2 +13.0,2.0,Female,Yes,Thur,Lunch,2 +16.4,2.5,Female,Yes,Thur,Lunch,2 +20.53,4.0,Male,Yes,Thur,Lunch,4 +16.47,3.23,Female,Yes,Thur,Lunch,3 +26.59,3.41,Male,Yes,Sat,Dinner,3 +38.73,3.0,Male,Yes,Sat,Dinner,4 +24.27,2.03,Male,Yes,Sat,Dinner,2 +12.76,2.23,Female,Yes,Sat,Dinner,2 +30.06,2.0,Male,Yes,Sat,Dinner,3 +25.89,5.16,Male,Yes,Sat,Dinner,4 +48.33,9.0,Male,No,Sat,Dinner,4 +13.27,2.5,Female,Yes,Sat,Dinner,2 +28.17,6.5,Female,Yes,Sat,Dinner,3 +12.9,1.1,Female,Yes,Sat,Dinner,2 +28.15,3.0,Male,Yes,Sat,Dinner,5 +11.59,1.5,Male,Yes,Sat,Dinner,2 +7.74,1.44,Male,Yes,Sat,Dinner,2 +30.14,3.09,Female,Yes,Sat,Dinner,4 +12.16,2.2,Male,Yes,Fri,Lunch,2 +13.42,3.48,Female,Yes,Fri,Lunch,2 +8.58,1.92,Male,Yes,Fri,Lunch,1 +15.98,3.0,Female,No,Fri,Lunch,3 +13.42,1.58,Male,Yes,Fri,Lunch,2 +16.27,2.5,Female,Yes,Fri,Lunch,2 +10.09,2.0,Female,Yes,Fri,Lunch,2 +20.45,3.0,Male,No,Sat,Dinner,4 +13.28,2.72,Male,No,Sat,Dinner,2 +22.12,2.88,Female,Yes,Sat,Dinner,2 +24.01,2.0,Male,Yes,Sat,Dinner,4 +15.69,3.0,Male,Yes,Sat,Dinner,3 +11.61,3.39,Male,No,Sat,Dinner,2 +10.77,1.47,Male,No,Sat,Dinner,2 +15.53,3.0,Male,Yes,Sat,Dinner,2 +10.07,1.25,Male,No,Sat,Dinner,2 +12.6,1.0,Male,Yes,Sat,Dinner,2 +32.83,1.17,Male,Yes,Sat,Dinner,2 +35.83,4.67,Female,No,Sat,Dinner,3 +29.03,5.92,Male,No,Sat,Dinner,3 +27.18,2.0,Female,Yes,Sat,Dinner,2 +22.67,2.0,Male,Yes,Sat,Dinner,2 +17.82,1.75,Male,No,Sat,Dinner,2 +18.78,3.0,Female,No,Thur,Dinner,2 diff --git a/pandas/tests/io/parser/data/tips.csv.bz2 b/pandas/tests/io/parser/data/tips.csv.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..1452896b05e9d41f58ffd816a0459d86796718a6 GIT binary patch literal 1316 zcmV+<1>5>UT4*^jL0KkKS@WgHUjPpp-+%xR00n>G1qTcuzHi=eU zr8L5NfM@_34^ia=nn@4@ngc)pXm>4ki*@VR?6|SRoF#LZ+TkL$)Z)}c<#mBig_KMX zruJeOi&bv;V=*04xP@hDQp(ibF*2pqxW%nuMr@F6Gix?+fsH|aKayy7UwGa_-`dVs zYfM$)R7$k8wpC6gfmM#M!-v|)iP#1h4cPkh|rkJNTD3*02| zUew#%bX<$c*~vCvMH>_%oV^S&6a+#ukskADG3ECrBRBE^v4aChy? zvDazQUv(jtyOFJd%+RitVq;Fo?$ru4tx8y4RWLAw3OQ&r5YZ6QA(|s=%EqEnNvFyDucBxbJ63X0f6|L)lrAb?vZoDHd%^>qwTK z8M-E+R_N`PibFFSF!cCl2Z7}>xeJ`*<3&DX2?dNalnbN*vYZ7QTLis}+CyTbyv{>s zl!hm_!_I4KZE}>uSzBr=*www83fCT-SPZ&+p@dCkFG(R6{D)ETHdAf-8>fnW#-GXdM4pE5VK!{hIp z4{*7H7hK39V*E6-z)7yKmA;#^4 z#PVN7@@@mJL*EhAX#`mH2SAk2lkhNXJBL>BHS&`^r&JS)>z58UjoYiOCqY*zmz*K6 z1SFlk-!Cn`6liVaz=_bPhSWpu1LJ>%Cxlk3T;w2WIQ0LRX3%vrxUPW z8d$X$uIXc_sI{9kN=EXFie6i&h29y!AZcb)r??rFOLu%3R3P<2gpt$oRe1O6gk~8T zu3j+kM{M-PhPbG60sxBGP*RgE)NL!@Yr%+f=+n7l@JL0;84IYj5yo31-0M)BHp<)Q zzkK_6UA}%i|M3mU6cFV&C+q8L8zqA-)xv!>^z@7=Fgi9q_iLEzwg+!G2w0Ts9jf*M z64F>g8RrtB4m-(FnM=?v>|@tRdI1$7H2kMsssN5^GU(*!z`p{ft@Qr;@_OlzdPSq# z=N&m=z8R{dV?dV-Iwe>fL1(0h{JJ}+<6sZ(@ePlLCs;FVmX?rYPxs1DA(^whpU+gQLdb{bOK!0;_ zkQW*TzXUDj{aqJ}zCZT`AFw?MCRq$YLmUun3sPt|TJ|F1y1->qh6EwxZc5srUOK?6 zfIOA24Gq;xs91xZWkXI-kgFkpK@VM+dImzp9WY2eRlGn`2@#FO*RJOK&vl0mX5&x| zsC*~R>SEi53Wfn0JC1s5&DImTC?CmS%t%KJn8SnJ{vz7Tu;z{(oX1Uj?2r-D=FHLg z#Nx)*tqL1*0`$uskSzVPPI~Zw87JK{kHS;|mjvLPazsSBBGTEE(XeUKcA)Oa1!1&{ ziGd~d!Xgpq$A_L=)+{U2btCFAD_NiGHe#QuSj!mhzmK3jN5V2e#ai_;@D^ZS3^-kH z6guhK*S?INWvhtT8n-^y8%I8HZbrKc2koF=btc|VG&cU-G4a~h=kf7qrTv=Ut%I~S zEXzKRMTs`<+xJ_K%nb(}Ie8d~S$W#@BiccQnPiO(+O^Yd9ou<9tf*;o$=WeUAZqAG zyzyj!F_p;rzPQ?Y92;+@To35Y<=xOSTm>@DJ;}6?*Lzr=TgaG9BIbr{y}$`b72TY! zqYYtgpVJv*bV|eFpvy$Pm>HFtbh_Na_)b19LfLd-0+3QVd;u1iG1e^0tsmq27&c@f zqhD+!jOz~T@n@5$<6yJqL9iFfH0&B9mSe(Zd*O_H&`()&cv#qX>*83gV@pnS)Uxa6 zh&!W4Kw{zbuyG*bJ30s^kL%1hKc#3Y!TLa1|HGI+q2~|%8;0j+sEAdd#O2^p#_J5{ zqk&o!uGkw*Xq2S)W72nPTLSJR3mF;xQOdr}*By;^C3XK=k7;*$ zylq6O8Vck|96AOM^M;z(GGMh%)?T{?8o*P+jIR3%VPB~S`#)bVj@Hps@zV;k&aoL? zJT_x>_m~9QgT~p5h literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/data/unicode_series.csv b/pandas/tests/io/parser/data/unicode_series.csv new file mode 100644 index 00000000..2485e149 --- /dev/null +++ b/pandas/tests/io/parser/data/unicode_series.csv @@ -0,0 +1,18 @@ +1617,King of New York (1990) +1618,All Things Fair (1996) +1619,"Sixth Man, The (1997)" +1620,Butterfly Kiss (1995) +1621,"Paris, France (1993)" +1622,"Cérémonie, La (1995)" +1623,Hush (1998) +1624,Nightwatch (1997) +1625,Nobody Loves Me (Keiner liebt mich) (1994) +1626,"Wife, The (1995)" +1627,Lamerica (1994) +1628,Nico Icon (1995) +1629,"Silence of the Palace, The (Saimt el Qusur) (1994)" +1630,"Slingshot, The (1993)" +1631,Land and Freedom (Tierra y libertad) (1995) +1632,Á köldum klaka (Cold Fever) (1994) +1633,Etz Hadomim Tafus (Under the Domin Tree) (1994) +1634,Two Friends (1986) diff --git a/pandas/tests/io/parser/data/utf16_ex.txt b/pandas/tests/io/parser/data/utf16_ex.txt new file mode 100644 index 0000000000000000000000000000000000000000..f0b452a2bd5ff25e752f015f0f9a04da7d4106f3 GIT binary patch literal 11406 zcmds-%~Bgj5Xa}bsmeR-6T|_h@^O^0aBu;kfHD=|WWZRlEQkb7;kWanaP)ygDmNdJ z{D0G0c9z}QmC(e-DQb(=bWcxrPfvf%{`2>mJ9QUs?9Sc9UAoNe>209r*gbG-Zm2g# z_x;;QqnUd=gJ*(+IdCJQ^||jB=%+U+=b*`|c0XKG)x|C?HACZA-EZJv~X=*LvG} z+wyNKofxa`Yxk9VCTe3n@4PNX;w%%D2cnZn;)x(rFA-Sh8e4JS3i^Yd6}PJ2KWpBJ zG?cqt&3&oo*h{|VUg`aW;h|Gw(){y4ByteZd|KtZW?`{DWv_~ zYc%&3cjia1sOd~oN8YNY-tLg)|1s+uzrS6wWJxS7mb8ELS=3x-qXoAElNs z-Qgp{-=$|o8{gg70M*W=C87&Wsc3MVujWppOHd_p6u|Y=i5%> z6X}7-@?j>;JLx3kCiXrLon>$=PEkLlkh(vs9Vas%w66W4Mx+wO3Sv@*<6Shm;3?!F zOSu&1WZQNk@hrdT8OUilz7la>OLF>_WOfpo>nTY`d?=>^u z1OIZd(rT>U@1{pVol1XvU7F)0-3P37bwRC1QE*2rU~j3Y+c=DOx$kz;p886VI(E~a zFY~~3I^&S=WG#!8)&fdycBIQcwe!}Ap{6y7u5_ZfL90WXrE0?LUWF6cyh>VmHznbP z_Y=9YYixZaTc?ZMJRZsz!&!7Ztwcqqhn?G!MdAZrc*%-R&$WKlVWDK*H0*YznSL9r zY3;P~Bge6q??RQWb!@B-`_MRA+^8%%ubJBImD9?$tDXBO3#EFujyqAEyYe_B+*hzJ zI_CGhXRXr5TIh|=Gru}BCjWEKhgF^PeIc#}j%)sx@^Mse!ohWF!{l z?Vd8#jGopkIUo*HIUr(Hac*;2h2Ex6g;wXL(({Iw!EVFJ77x|S7)yV#@zA>JIOLzz z9!LzQKF+$Sqze6+-4-u$%WOWYd7k+?gAHYk>{;Pw?+R;LFO??3o2DJ#wY93bc|3RT z=9!Nfcn-ekDLgIw6Ez>} z|43Z|Tj%y%^L>W<>0!%hflfZ%G460D%A~DY8Yy?+H?^eY+vo+$rf*1Yh<}~>_bp-C z*9}%-Hc>3`sblLaeoFXq`r?}eDoeCL@1C!~$XV+*BKBp=M4J34ZP^zJ#mqYP=Yg}w z#TgAHtMj*<)=GbZ-l!<)RFm!7eG>76P6uagNB^-li$K+xC0-WzC}@U|-i`N40f}+2 zoJQa=JY)2&xDDiMNGz-D+tBBuZ}a)^+gI;`E~h7AIF%Y1y`4Ji{#wRY>IwgB5jckZ l`Jl^b0L#WRTWx%xckYudzlx2A)lCd%k#Qu*8vgFv_`m7kfOG%= literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/data/utf16_ex_small.zip b/pandas/tests/io/parser/data/utf16_ex_small.zip new file mode 100644 index 0000000000000000000000000000000000000000..b0560c1b1f6c41307b575f2a86509021b49649f4 GIT binary patch literal 285 zcmWIWW@Zs#U|`^2c&?n{%@J5Cmki|10Ae8q8HUo5G()ra)Qb4x+{Bz5y^@NO&=5`r zX2v6bB0;#cf}4SnIF&U?+Ut|9%K z*xYyP>aL!jYp@|<#>O`+A~_|u(y}H_2)et1Z)sMQR;tn2-aQ>H(-D~jm`P25a**gJ;0ll4Wxq+ M2qS>>dJu;J0ImR5j{pDw literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/data/utf32_ex_small.zip b/pandas/tests/io/parser/data/utf32_ex_small.zip new file mode 100644 index 0000000000000000000000000000000000000000..9a6d5c08da9db1e064cb3bd1a687b7d658ff0ff5 GIT binary patch literal 251 zcmWIWW@Zs#U|`^2kZe~C*s;8}q7=w8U}Rtr0@9@=X~stJsTJ|XxrsSBdL8lcswfDT&J>R_h%g)9*)k*iJzu`Vo7G7}ZRn7X%9kSx(KK8%%&3Sk6zxz?+dtgaNlRfi49C1u%)t(E;A9Y#>QSAanrIOFZzWtsX;wHzSh>18!S^7Bet1D1ay+z;Hu=H!B-Rf)NPqfpiIo!vFy7 CkUIMS literal 0 HcmV?d00001 diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py new file mode 100644 index 00000000..1737f14e --- /dev/null +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -0,0 +1,610 @@ +""" +Tests that apply specifically to the CParser. Unless specifically stated +as a CParser-specific issue, the goal is to eventually move as many of +these tests out of this module as soon as the Python parser can accept +further arguments when parsing. +""" + +from io import BytesIO, StringIO, TextIOWrapper +import mmap +import os +import tarfile + +import numpy as np +import pytest + +from pandas.errors import ParserError +import pandas.util._test_decorators as td + +from pandas import DataFrame, concat +import pandas._testing as tm + + +@pytest.mark.parametrize( + "malformed", + ["1\r1\r1\r 1\r 1\r", "1\r1\r1\r 1\r 1\r11\r", "1\r1\r1\r 1\r 1\r11\r1\r"], + ids=["words pointer", "stream pointer", "lines pointer"], +) +def test_buffer_overflow(c_parser_only, malformed): + # see gh-9205: test certain malformed input files that cause + # buffer overflows in tokenizer.c + msg = "Buffer overflow caught - possible malformed input file." + parser = c_parser_only + + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(malformed)) + + +def test_buffer_rd_bytes(c_parser_only): + # see gh-12098: src->buffer in the C parser can be freed twice leading + # to a segfault if a corrupt gzip file is read with 'read_csv', and the + # buffer is filled more than once before gzip raises an Exception. + + data = ( + "\x1F\x8B\x08\x00\x00\x00\x00\x00\x00\x03\xED\xC3\x41\x09" + "\x00\x00\x08\x00\xB1\xB7\xB6\xBA\xFE\xA5\xCC\x21\x6C\xB0" + "\xA6\x4D" + "\x55" * 267 + "\x7D\xF7\x00\x91\xE0\x47\x97\x14\x38\x04\x00" + "\x1f\x8b\x08\x00VT\x97V\x00\x03\xed]\xefO" + ) + parser = c_parser_only + + for _ in range(100): + try: + parser.read_csv(StringIO(data), compression="gzip", delim_whitespace=True) + except Exception: + pass + + +def test_delim_whitespace_custom_terminator(c_parser_only): + # See gh-12912 + data = "a b c~1 2 3~4 5 6~7 8 9" + parser = c_parser_only + + df = parser.read_csv(StringIO(data), lineterminator="~", delim_whitespace=True) + expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) + tm.assert_frame_equal(df, expected) + + +def test_dtype_and_names_error(c_parser_only): + # see gh-8833: passing both dtype and names + # resulting in an error reporting issue + parser = c_parser_only + data = """ +1.0 1 +2.0 2 +3.0 3 +""" + # base cases + result = parser.read_csv(StringIO(data), sep=r"\s+", header=None) + expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]]) + tm.assert_frame_equal(result, expected) + + result = parser.read_csv(StringIO(data), sep=r"\s+", header=None, names=["a", "b"]) + expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + # fallback casting + result = parser.read_csv( + StringIO(data), sep=r"\s+", header=None, names=["a", "b"], dtype={"a": np.int32} + ) + expected = DataFrame([[1, 1], [2, 2], [3, 3]], columns=["a", "b"]) + expected["a"] = expected["a"].astype(np.int32) + tm.assert_frame_equal(result, expected) + + data = """ +1.0 1 +nan 2 +3.0 3 +""" + # fallback casting, but not castable + with pytest.raises(ValueError, match="cannot safely convert"): + parser.read_csv( + StringIO(data), + sep=r"\s+", + header=None, + names=["a", "b"], + dtype={"a": np.int32}, + ) + + +@pytest.mark.parametrize( + "match,kwargs", + [ + # For each of these cases, all of the dtypes are valid, just unsupported. + ( + ( + "the dtype datetime64 is not supported for parsing, " + "pass this column using parse_dates instead" + ), + dict(dtype={"A": "datetime64", "B": "float64"}), + ), + ( + ( + "the dtype datetime64 is not supported for parsing, " + "pass this column using parse_dates instead" + ), + dict(dtype={"A": "datetime64", "B": "float64"}, parse_dates=["B"]), + ), + ( + "the dtype timedelta64 is not supported for parsing", + dict(dtype={"A": "timedelta64", "B": "float64"}), + ), + ("the dtype 262144b) + parser = c_parser_only + header_narrow = "\t".join(["COL_HEADER_" + str(i) for i in range(10)]) + "\n" + data_narrow = "\t".join(["somedatasomedatasomedata1" for _ in range(10)]) + "\n" + header_wide = "\t".join(["COL_HEADER_" + str(i) for i in range(15)]) + "\n" + data_wide = "\t".join(["somedatasomedatasomedata2" for _ in range(15)]) + "\n" + test_input = header_narrow + data_narrow * 1050 + header_wide + data_wide * 2 + + df = parser.read_csv(StringIO(test_input), sep="\t", nrows=1010) + + assert df.size == 1010 * 10 + + +def test_float_precision_round_trip_with_text(c_parser_only): + # see gh-15140 + parser = c_parser_only + df = parser.read_csv(StringIO("a"), header=None, float_precision="round_trip") + tm.assert_frame_equal(df, DataFrame({0: ["a"]})) + + +def test_large_difference_in_columns(c_parser_only): + # see gh-14125 + parser = c_parser_only + + count = 10000 + large_row = ("X," * count)[:-1] + "\n" + normal_row = "XXXXXX XXXXXX,111111111111111\n" + test_input = (large_row + normal_row * 6)[:-1] + + result = parser.read_csv(StringIO(test_input), header=None, usecols=[0]) + rows = test_input.split("\n") + + expected = DataFrame([row.split(",")[0] for row in rows]) + tm.assert_frame_equal(result, expected) + + +def test_data_after_quote(c_parser_only): + # see gh-15910 + parser = c_parser_only + + data = 'a\n1\n"b"a' + result = parser.read_csv(StringIO(data)) + + expected = DataFrame({"a": ["1", "ba"]}) + tm.assert_frame_equal(result, expected) + + +def test_comment_whitespace_delimited(c_parser_only, capsys): + parser = c_parser_only + test_input = """\ +1 2 +2 2 3 +3 2 3 # 3 fields +4 2 3# 3 fields +5 2 # 2 fields +6 2# 2 fields +7 # 1 field, NaN +8# 1 field, NaN +9 2 3 # skipped line +# comment""" + df = parser.read_csv( + StringIO(test_input), + comment="#", + header=None, + delimiter="\\s+", + skiprows=0, + error_bad_lines=False, + ) + captured = capsys.readouterr() + # skipped lines 2, 3, 4, 9 + for line_num in (2, 3, 4, 9): + assert "Skipping line {}".format(line_num) in captured.err + expected = DataFrame([[1, 2], [5, 2], [6, 2], [7, np.nan], [8, np.nan]]) + tm.assert_frame_equal(df, expected) + + +def test_file_like_no_next(c_parser_only): + # gh-16530: the file-like need not have a "next" or "__next__" + # attribute despite having an "__iter__" attribute. + # + # NOTE: This is only true for the C engine, not Python engine. + class NoNextBuffer(StringIO): + def __next__(self): + raise AttributeError("No next method") + + next = __next__ + + parser = c_parser_only + data = "a\n1" + + expected = DataFrame({"a": [1]}) + result = parser.read_csv(NoNextBuffer(data)) + + tm.assert_frame_equal(result, expected) + + +def test_buffer_rd_bytes_bad_unicode(c_parser_only): + # see gh-22748 + t = BytesIO(b"\xB0") + t = TextIOWrapper(t, encoding="ascii", errors="surrogateescape") + msg = "'utf-8' codec can't encode character" + with pytest.raises(UnicodeError, match=msg): + c_parser_only.read_csv(t, encoding="UTF-8") + + +@pytest.mark.parametrize("tar_suffix", [".tar", ".tar.gz"]) +def test_read_tarfile(c_parser_only, csv_dir_path, tar_suffix): + # see gh-16530 + # + # Unfortunately, Python's CSV library can't handle + # tarfile objects (expects string, not bytes when + # iterating through a file-like). + parser = c_parser_only + tar_path = os.path.join(csv_dir_path, "tar_csv" + tar_suffix) + + with tarfile.open(tar_path, "r") as tar: + data_file = tar.extractfile("tar_data.csv") + + out = parser.read_csv(data_file) + expected = DataFrame({"a": [1]}) + tm.assert_frame_equal(out, expected) + + +@pytest.mark.high_memory +def test_bytes_exceed_2gb(c_parser_only): + # see gh-16798 + # + # Read from a "CSV" that has a column larger than 2GB. + parser = c_parser_only + + if parser.low_memory: + pytest.skip("not a high_memory test") + + csv = StringIO("strings\n" + "\n".join(["x" * (1 << 20) for _ in range(2100)])) + df = parser.read_csv(csv) + assert not df.empty + + +def test_chunk_whitespace_on_boundary(c_parser_only): + # see gh-9735: this issue is C parser-specific (bug when + # parsing whitespace and characters at chunk boundary) + # + # This test case has a field too large for the Python parser / CSV library. + parser = c_parser_only + + chunk1 = "a" * (1024 * 256 - 2) + "\na" + chunk2 = "\n a" + result = parser.read_csv(StringIO(chunk1 + chunk2), header=None) + + expected = DataFrame(["a" * (1024 * 256 - 2), "a", " a"]) + tm.assert_frame_equal(result, expected) + + +def test_file_handles_mmap(c_parser_only, csv1): + # gh-14418 + # + # Don't close user provided file handles. + parser = c_parser_only + + with open(csv1, "r") as f: + m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + parser.read_csv(m) + + assert not m.closed + m.close() + + +def test_file_binary_mode(c_parser_only): + # see gh-23779 + parser = c_parser_only + expected = DataFrame([[1, 2, 3], [4, 5, 6]]) + + with tm.ensure_clean() as path: + with open(path, "w") as f: + f.write("1,2,3\n4,5,6") + + with open(path, "rb") as f: + result = parser.read_csv(f, header=None) + tm.assert_frame_equal(result, expected) + + +def test_unix_style_breaks(c_parser_only): + # GH 11020 + parser = c_parser_only + with tm.ensure_clean() as path: + with open(path, "w", newline="\n") as f: + f.write("blah\n\ncol_1,col_2,col_3\n\n") + result = parser.read_csv(path, skiprows=2, encoding="utf-8", engine="c") + expected = DataFrame(columns=["col_1", "col_2", "col_3"]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_comment.py b/pandas/tests/io/parser/test_comment.py new file mode 100644 index 00000000..60e32d7c --- /dev/null +++ b/pandas/tests/io/parser/test_comment.py @@ -0,0 +1,136 @@ +""" +Tests that comments are properly handled during parsing +for all of the parsers defined in parsers.py +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +@pytest.mark.parametrize("na_values", [None, ["NaN"]]) +def test_comment(all_parsers, na_values): + parser = all_parsers + data = """A,B,C +1,2.,4.#hello world +5.,NaN,10.0 +""" + expected = DataFrame( + [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"] + ) + result = parser.read_csv(StringIO(data), comment="#", na_values=na_values) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "read_kwargs", [dict(), dict(lineterminator="*"), dict(delim_whitespace=True)] +) +def test_line_comment(all_parsers, read_kwargs): + parser = all_parsers + data = """# empty +A,B,C +1,2.,4.#hello world +#ignore this line +5.,NaN,10.0 +""" + if read_kwargs.get("delim_whitespace"): + data = data.replace(",", " ") + elif read_kwargs.get("lineterminator"): + if parser.engine != "c": + pytest.skip("Custom terminator not supported with Python engine") + + data = data.replace("\n", read_kwargs.get("lineterminator")) + + read_kwargs["comment"] = "#" + result = parser.read_csv(StringIO(data), **read_kwargs) + + expected = DataFrame( + [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"] + ) + tm.assert_frame_equal(result, expected) + + +def test_comment_skiprows(all_parsers): + parser = all_parsers + data = """# empty +random line +# second empty line +1,2,3 +A,B,C +1,2.,4. +5.,NaN,10.0 +""" + # This should ignore the first four lines (including comments). + expected = DataFrame( + [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"] + ) + result = parser.read_csv(StringIO(data), comment="#", skiprows=4) + tm.assert_frame_equal(result, expected) + + +def test_comment_header(all_parsers): + parser = all_parsers + data = """# empty +# second empty line +1,2,3 +A,B,C +1,2.,4. +5.,NaN,10.0 +""" + # Header should begin at the second non-comment line. + expected = DataFrame( + [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"] + ) + result = parser.read_csv(StringIO(data), comment="#", header=1) + tm.assert_frame_equal(result, expected) + + +def test_comment_skiprows_header(all_parsers): + parser = all_parsers + data = """# empty +# second empty line +# third empty line +X,Y,Z +1,2,3 +A,B,C +1,2.,4. +5.,NaN,10.0 +""" + # Skiprows should skip the first 4 lines (including comments), + # while header should start from the second non-commented line, + # starting with line 5. + expected = DataFrame( + [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"] + ) + result = parser.read_csv(StringIO(data), comment="#", skiprows=4, header=1) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("comment_char", ["#", "~", "&", "^", "*", "@"]) +def test_custom_comment_char(all_parsers, comment_char): + parser = all_parsers + data = "a,b,c\n1,2,3#ignore this!\n4,5,6#ignorethistoo" + result = parser.read_csv( + StringIO(data.replace("#", comment_char)), comment=comment_char + ) + + expected = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("header", ["infer", None]) +def test_comment_first_line(all_parsers, header): + # see gh-4623 + parser = all_parsers + data = "# notes\na,b,c\n# more notes\n1,2,3" + + if header is None: + expected = DataFrame({0: ["a", "1"], 1: ["b", "2"], 2: ["c", "3"]}) + else: + expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"]) + + result = parser.read_csv(StringIO(data), comment="#", header=header) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py new file mode 100644 index 00000000..4c02a37b --- /dev/null +++ b/pandas/tests/io/parser/test_common.py @@ -0,0 +1,2072 @@ +""" +Tests that work on both the Python and C engines but do not have a +specific classification into the other test modules. +""" +import codecs +import csv +from datetime import datetime +from io import StringIO +import os +import platform +from tempfile import TemporaryFile +from urllib.error import URLError + +import numpy as np +import pytest + +from pandas._libs.tslib import Timestamp +from pandas.errors import DtypeWarning, EmptyDataError, ParserError + +from pandas import DataFrame, Index, MultiIndex, Series, compat, concat +import pandas._testing as tm + +from pandas.io.parsers import CParserWrapper, TextFileReader, TextParser + + +def test_override_set_noconvert_columns(): + # see gh-17351 + # + # Usecols needs to be sorted in _set_noconvert_columns based + # on the test_usecols_with_parse_dates test from test_usecols.py + class MyTextFileReader(TextFileReader): + def __init__(self): + self._currow = 0 + self.squeeze = False + + class MyCParserWrapper(CParserWrapper): + def _set_noconvert_columns(self): + if self.usecols_dtype == "integer": + # self.usecols is a set, which is documented as unordered + # but in practice, a CPython set of integers is sorted. + # In other implementations this assumption does not hold. + # The following code simulates a different order, which + # before GH 17351 would cause the wrong columns to be + # converted via the parse_dates parameter + self.usecols = list(self.usecols) + self.usecols.reverse() + return CParserWrapper._set_noconvert_columns(self) + + data = """a,b,c,d,e +0,1,20140101,0900,4 +0,1,20140102,1000,4""" + + parse_dates = [[1, 2]] + cols = { + "a": [0, 0], + "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")], + } + expected = DataFrame(cols, columns=["c_d", "a"]) + + parser = MyTextFileReader() + parser.options = { + "usecols": [0, 2, 3], + "parse_dates": parse_dates, + "delimiter": ",", + } + parser._engine = MyCParserWrapper(StringIO(data), **parser.options) + + result = parser.read() + tm.assert_frame_equal(result, expected) + + +def test_empty_decimal_marker(all_parsers): + data = """A|B|C +1|2,334|5 +10|13|10. +""" + # Parsers support only length-1 decimals + msg = "Only length-1 decimal markers supported" + parser = all_parsers + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), decimal="") + + +def test_bad_stream_exception(all_parsers, csv_dir_path): + # see gh-13652 + # + # This test validates that both the Python engine and C engine will + # raise UnicodeDecodeError instead of C engine raising ParserError + # and swallowing the exception that caused read to fail. + path = os.path.join(csv_dir_path, "sauron.SHIFT_JIS.csv") + codec = codecs.lookup("utf-8") + utf8 = codecs.lookup("utf-8") + parser = all_parsers + msg = "'utf-8' codec can't decode byte" + + # Stream must be binary UTF8. + with open(path, "rb") as handle, codecs.StreamRecoder( + handle, utf8.encode, utf8.decode, codec.streamreader, codec.streamwriter + ) as stream: + + with pytest.raises(UnicodeDecodeError, match=msg): + parser.read_csv(stream) + + +def test_read_csv_local(all_parsers, csv1): + prefix = "file:///" if compat.is_platform_windows() else "file://" + parser = all_parsers + + fname = prefix + str(os.path.abspath(csv1)) + result = parser.read_csv(fname, index_col=0, parse_dates=True) + + expected = DataFrame( + [ + [0.980269, 3.685731, -0.364216805298, -1.159738], + [1.047916, -0.041232, -0.16181208307, 0.212549], + [0.498581, 0.731168, -0.537677223318, 1.346270], + [1.120202, 1.567621, 0.00364077397681, 0.675253], + [-0.487094, 0.571455, -1.6116394093, 0.103469], + [0.836649, 0.246462, 0.588542635376, 1.062782], + [-0.157161, 1.340307, 1.1957779562, -1.097007], + ], + columns=["A", "B", "C", "D"], + index=Index( + [ + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + datetime(2000, 1, 6), + datetime(2000, 1, 7), + datetime(2000, 1, 10), + datetime(2000, 1, 11), + ], + name="index", + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_1000_sep(all_parsers): + parser = all_parsers + data = """A|B|C +1|2,334|5 +10|13|10. +""" + expected = DataFrame({"A": [1, 10], "B": [2334, 13], "C": [5, 10.0]}) + + result = parser.read_csv(StringIO(data), sep="|", thousands=",") + tm.assert_frame_equal(result, expected) + + +def test_squeeze(all_parsers): + data = """\ +a,1 +b,2 +c,3 +""" + parser = all_parsers + index = Index(["a", "b", "c"], name=0) + expected = Series([1, 2, 3], name=1, index=index) + + result = parser.read_csv(StringIO(data), index_col=0, header=None, squeeze=True) + tm.assert_series_equal(result, expected) + + # see gh-8217 + # + # Series should not be a view. + assert not result._is_view + + +def test_malformed(all_parsers): + # see gh-6607 + parser = all_parsers + data = """ignore +A,B,C +1,2,3 # comment +1,2,3,4,5 +2,3,4 +""" + msg = "Expected 3 fields in line 4, saw 5" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), header=1, comment="#") + + +@pytest.mark.parametrize("nrows", [5, 3, None]) +def test_malformed_chunks(all_parsers, nrows): + data = """ignore +A,B,C +skip +1,2,3 +3,5,10 # comment +1,2,3,4,5 +2,3,4 +""" + parser = all_parsers + msg = "Expected 3 fields in line 6, saw 5" + reader = parser.read_csv( + StringIO(data), header=1, comment="#", iterator=True, chunksize=1, skiprows=[2] + ) + + with pytest.raises(ParserError, match=msg): + reader.read(nrows) + + +def test_unnamed_columns(all_parsers): + data = """A,B,C,, +1,2,3,4,5 +6,7,8,9,10 +11,12,13,14,15 +""" + parser = all_parsers + expected = DataFrame( + [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], + dtype=np.int64, + columns=["A", "B", "C", "Unnamed: 3", "Unnamed: 4"], + ) + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +def test_csv_mixed_type(all_parsers): + data = """A,B,C +a,1,2 +b,3,4 +c,4,5 +""" + parser = all_parsers + expected = DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 4], "C": [2, 4, 5]}) + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +def test_read_csv_low_memory_no_rows_with_index(all_parsers): + # see gh-21141 + parser = all_parsers + + if not parser.low_memory: + pytest.skip("This is a low-memory specific test") + + data = """A,B,C +1,1,1,2 +2,2,3,4 +3,3,4,5 +""" + result = parser.read_csv(StringIO(data), low_memory=True, index_col=0, nrows=0) + expected = DataFrame(columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + + +def test_read_csv_dataframe(all_parsers, csv1): + parser = all_parsers + result = parser.read_csv(csv1, index_col=0, parse_dates=True) + + expected = DataFrame( + [ + [0.980269, 3.685731, -0.364216805298, -1.159738], + [1.047916, -0.041232, -0.16181208307, 0.212549], + [0.498581, 0.731168, -0.537677223318, 1.346270], + [1.120202, 1.567621, 0.00364077397681, 0.675253], + [-0.487094, 0.571455, -1.6116394093, 0.103469], + [0.836649, 0.246462, 0.588542635376, 1.062782], + [-0.157161, 1.340307, 1.1957779562, -1.097007], + ], + columns=["A", "B", "C", "D"], + index=Index( + [ + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + datetime(2000, 1, 6), + datetime(2000, 1, 7), + datetime(2000, 1, 10), + datetime(2000, 1, 11), + ], + name="index", + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_read_csv_no_index_name(all_parsers, csv_dir_path): + parser = all_parsers + csv2 = os.path.join(csv_dir_path, "test2.csv") + result = parser.read_csv(csv2, index_col=0, parse_dates=True) + + expected = DataFrame( + [ + [0.980269, 3.685731, -0.364216805298, -1.159738, "foo"], + [1.047916, -0.041232, -0.16181208307, 0.212549, "bar"], + [0.498581, 0.731168, -0.537677223318, 1.346270, "baz"], + [1.120202, 1.567621, 0.00364077397681, 0.675253, "qux"], + [-0.487094, 0.571455, -1.6116394093, 0.103469, "foo2"], + ], + columns=["A", "B", "C", "D", "E"], + index=Index( + [ + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + datetime(2000, 1, 6), + datetime(2000, 1, 7), + ] + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_read_csv_wrong_num_columns(all_parsers): + # Too few columns. + data = """A,B,C,D,E,F +1,2,3,4,5,6 +6,7,8,9,10,11,12 +11,12,13,14,15,16 +""" + parser = all_parsers + msg = "Expected 6 fields in line 3, saw 7" + + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data)) + + +def test_read_duplicate_index_explicit(all_parsers): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo,12,13,14,15 +bar,12,13,14,15 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=0) + + expected = DataFrame( + [ + [2, 3, 4, 5], + [7, 8, 9, 10], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + ], + columns=["A", "B", "C", "D"], + index=Index(["foo", "bar", "baz", "qux", "foo", "bar"], name="index"), + ) + tm.assert_frame_equal(result, expected) + + +def test_read_duplicate_index_implicit(all_parsers): + data = """A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo,12,13,14,15 +bar,12,13,14,15 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data)) + + expected = DataFrame( + [ + [2, 3, 4, 5], + [7, 8, 9, 10], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + ], + columns=["A", "B", "C", "D"], + index=Index(["foo", "bar", "baz", "qux", "foo", "bar"]), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + ( + "A,B\nTrue,1\nFalse,2\nTrue,3", + dict(), + DataFrame([[True, 1], [False, 2], [True, 3]], columns=["A", "B"]), + ), + ( + "A,B\nYES,1\nno,2\nyes,3\nNo,3\nYes,3", + dict(true_values=["yes", "Yes", "YES"], false_values=["no", "NO", "No"]), + DataFrame( + [[True, 1], [False, 2], [True, 3], [False, 3], [True, 3]], + columns=["A", "B"], + ), + ), + ( + "A,B\nTRUE,1\nFALSE,2\nTRUE,3", + dict(), + DataFrame([[True, 1], [False, 2], [True, 3]], columns=["A", "B"]), + ), + ( + "A,B\nfoo,bar\nbar,foo", + dict(true_values=["foo"], false_values=["bar"]), + DataFrame([[True, False], [False, True]], columns=["A", "B"]), + ), + ], +) +def test_parse_bool(all_parsers, data, kwargs, expected): + parser = all_parsers + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_int_conversion(all_parsers): + data = """A,B +1.0,1 +2.0,2 +3.0,3 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data)) + + expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]], columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("nrows", [3, 3.0]) +def test_read_nrows(all_parsers, nrows): + # see gh-10476 + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + expected = DataFrame( + [["foo", 2, 3, 4, 5], ["bar", 7, 8, 9, 10], ["baz", 12, 13, 14, 15]], + columns=["index", "A", "B", "C", "D"], + ) + parser = all_parsers + + result = parser.read_csv(StringIO(data), nrows=nrows) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("nrows", [1.2, "foo", -1]) +def test_read_nrows_bad(all_parsers, nrows): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + msg = r"'nrows' must be an integer >=0" + parser = all_parsers + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), nrows=nrows) + + +@pytest.mark.parametrize("index_col", [0, "index"]) +def test_read_chunksize_with_index(all_parsers, index_col): + parser = all_parsers + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + + reader = parser.read_csv(StringIO(data), index_col=0, chunksize=2) + expected = DataFrame( + [ + ["foo", 2, 3, 4, 5], + ["bar", 7, 8, 9, 10], + ["baz", 12, 13, 14, 15], + ["qux", 12, 13, 14, 15], + ["foo2", 12, 13, 14, 15], + ["bar2", 12, 13, 14, 15], + ], + columns=["index", "A", "B", "C", "D"], + ) + expected = expected.set_index("index") + + chunks = list(reader) + tm.assert_frame_equal(chunks[0], expected[:2]) + tm.assert_frame_equal(chunks[1], expected[2:4]) + tm.assert_frame_equal(chunks[2], expected[4:]) + + +@pytest.mark.parametrize("chunksize", [1.3, "foo", 0]) +def test_read_chunksize_bad(all_parsers, chunksize): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + msg = r"'chunksize' must be an integer >=1" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), chunksize=chunksize) + + +@pytest.mark.parametrize("chunksize", [2, 8]) +def test_read_chunksize_and_nrows(all_parsers, chunksize): + # see gh-15755 + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + kwargs = dict(index_col=0, nrows=5) + + reader = parser.read_csv(StringIO(data), chunksize=chunksize, **kwargs) + expected = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(concat(reader), expected) + + +def test_read_chunksize_and_nrows_changing_size(all_parsers): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + kwargs = dict(index_col=0, nrows=5) + + reader = parser.read_csv(StringIO(data), chunksize=8, **kwargs) + expected = parser.read_csv(StringIO(data), **kwargs) + + tm.assert_frame_equal(reader.get_chunk(size=2), expected.iloc[:2]) + tm.assert_frame_equal(reader.get_chunk(size=4), expected.iloc[2:5]) + + with pytest.raises(StopIteration, match=""): + reader.get_chunk(size=3) + + +def test_get_chunk_passed_chunksize(all_parsers): + parser = all_parsers + data = """A,B,C +1,2,3 +4,5,6 +7,8,9 +1,2,3""" + + reader = parser.read_csv(StringIO(data), chunksize=2) + result = reader.get_chunk() + + expected = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("kwargs", [dict(), dict(index_col=0)]) +def test_read_chunksize_compat(all_parsers, kwargs): + # see gh-12185 + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + reader = parser.read_csv(StringIO(data), chunksize=2, **kwargs) + + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(concat(reader), result) + + +def test_read_chunksize_jagged_names(all_parsers): + # see gh-23509 + parser = all_parsers + data = "\n".join(["0"] * 7 + [",".join(["0"] * 10)]) + + expected = DataFrame([[0] + [np.nan] * 9] * 7 + [[0] * 10]) + reader = parser.read_csv(StringIO(data), names=range(10), chunksize=4) + + result = concat(reader) + tm.assert_frame_equal(result, expected) + + +def test_read_data_list(all_parsers): + parser = all_parsers + kwargs = dict(index_col=0) + data = "A,B,C\nfoo,1,2,3\nbar,4,5,6" + + data_list = [["A", "B", "C"], ["foo", "1", "2", "3"], ["bar", "4", "5", "6"]] + expected = parser.read_csv(StringIO(data), **kwargs) + + parser = TextParser(data_list, chunksize=2, **kwargs) + result = parser.read() + + tm.assert_frame_equal(result, expected) + + +def test_iterator(all_parsers): + # see gh-6607 + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + kwargs = dict(index_col=0) + + expected = parser.read_csv(StringIO(data), **kwargs) + reader = parser.read_csv(StringIO(data), iterator=True, **kwargs) + + first_chunk = reader.read(3) + tm.assert_frame_equal(first_chunk, expected[:3]) + + last_chunk = reader.read(5) + tm.assert_frame_equal(last_chunk, expected[3:]) + + +def test_iterator2(all_parsers): + parser = all_parsers + data = """A,B,C +foo,1,2,3 +bar,4,5,6 +baz,7,8,9 +""" + + reader = parser.read_csv(StringIO(data), iterator=True) + result = list(reader) + + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["foo", "bar", "baz"], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(result[0], expected) + + +def test_reader_list(all_parsers): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + kwargs = dict(index_col=0) + + lines = list(csv.reader(StringIO(data))) + reader = TextParser(lines, chunksize=2, **kwargs) + + expected = parser.read_csv(StringIO(data), **kwargs) + chunks = list(reader) + + tm.assert_frame_equal(chunks[0], expected[:2]) + tm.assert_frame_equal(chunks[1], expected[2:4]) + tm.assert_frame_equal(chunks[2], expected[4:]) + + +def test_reader_list_skiprows(all_parsers): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + parser = all_parsers + kwargs = dict(index_col=0) + + lines = list(csv.reader(StringIO(data))) + reader = TextParser(lines, chunksize=2, skiprows=[1], **kwargs) + + expected = parser.read_csv(StringIO(data), **kwargs) + chunks = list(reader) + + tm.assert_frame_equal(chunks[0], expected[1:3]) + + +def test_iterator_stop_on_chunksize(all_parsers): + # gh-3967: stopping iteration when chunksize is specified + parser = all_parsers + data = """A,B,C +foo,1,2,3 +bar,4,5,6 +baz,7,8,9 +""" + + reader = parser.read_csv(StringIO(data), chunksize=1) + result = list(reader) + + assert len(result) == 3 + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["foo", "bar", "baz"], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(concat(result), expected) + + +@pytest.mark.parametrize( + "kwargs", [dict(iterator=True, chunksize=1), dict(iterator=True), dict(chunksize=1)] +) +def test_iterator_skipfooter_errors(all_parsers, kwargs): + msg = "'skipfooter' not supported for 'iteration'" + parser = all_parsers + data = "a\n1\n2" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), skipfooter=1, **kwargs) + + +def test_nrows_skipfooter_errors(all_parsers): + msg = "'skipfooter' not supported with 'nrows'" + data = "a\n1\n2\n3\n4\n5\n6" + parser = all_parsers + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), skipfooter=1, nrows=5) + + +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + ( + """foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""", + dict(index_col=0, names=["index", "A", "B", "C", "D"]), + DataFrame( + [ + [2, 3, 4, 5], + [7, 8, 9, 10], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + ], + index=Index(["foo", "bar", "baz", "qux", "foo2", "bar2"], name="index"), + columns=["A", "B", "C", "D"], + ), + ), + ( + """foo,one,2,3,4,5 +foo,two,7,8,9,10 +foo,three,12,13,14,15 +bar,one,12,13,14,15 +bar,two,12,13,14,15 +""", + dict(index_col=[0, 1], names=["index1", "index2", "A", "B", "C", "D"]), + DataFrame( + [ + [2, 3, 4, 5], + [7, 8, 9, 10], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + ], + index=MultiIndex.from_tuples( + [ + ("foo", "one"), + ("foo", "two"), + ("foo", "three"), + ("bar", "one"), + ("bar", "two"), + ], + names=["index1", "index2"], + ), + columns=["A", "B", "C", "D"], + ), + ), + ], +) +def test_pass_names_with_index(all_parsers, data, kwargs, expected): + parser = all_parsers + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("index_col", [[0, 1], [1, 0]]) +def test_multi_index_no_level_names(all_parsers, index_col): + data = """index1,index2,A,B,C,D +foo,one,2,3,4,5 +foo,two,7,8,9,10 +foo,three,12,13,14,15 +bar,one,12,13,14,15 +bar,two,12,13,14,15 +""" + headless_data = "\n".join(data.split("\n")[1:]) + + names = ["A", "B", "C", "D"] + parser = all_parsers + + result = parser.read_csv( + StringIO(headless_data), index_col=index_col, header=None, names=names + ) + expected = parser.read_csv(StringIO(data), index_col=index_col) + + # No index names in headless data. + expected.index.names = [None] * 2 + tm.assert_frame_equal(result, expected) + + +def test_multi_index_no_level_names_implicit(all_parsers): + parser = all_parsers + data = """A,B,C,D +foo,one,2,3,4,5 +foo,two,7,8,9,10 +foo,three,12,13,14,15 +bar,one,12,13,14,15 +bar,two,12,13,14,15 +""" + + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + [ + [2, 3, 4, 5], + [7, 8, 9, 10], + [12, 13, 14, 15], + [12, 13, 14, 15], + [12, 13, 14, 15], + ], + columns=["A", "B", "C", "D"], + index=MultiIndex.from_tuples( + [ + ("foo", "one"), + ("foo", "two"), + ("foo", "three"), + ("bar", "one"), + ("bar", "two"), + ] + ), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,expected,header", + [ + ("a,b", DataFrame(columns=["a", "b"]), [0]), + ( + "a,b\nc,d", + DataFrame(columns=MultiIndex.from_tuples([("a", "c"), ("b", "d")])), + [0, 1], + ), + ], +) +@pytest.mark.parametrize("round_trip", [True, False]) +def test_multi_index_blank_df(all_parsers, data, expected, header, round_trip): + # see gh-14545 + parser = all_parsers + data = expected.to_csv(index=False) if round_trip else data + + result = parser.read_csv(StringIO(data), header=header) + tm.assert_frame_equal(result, expected) + + +def test_no_unnamed_index(all_parsers): + parser = all_parsers + data = """ id c0 c1 c2 +0 1 0 a b +1 2 0 c d +2 2 2 e f +""" + result = parser.read_csv(StringIO(data), sep=" ") + expected = DataFrame( + [[0, 1, 0, "a", "b"], [1, 2, 0, "c", "d"], [2, 2, 2, "e", "f"]], + columns=["Unnamed: 0", "id", "c0", "c1", "c2"], + ) + tm.assert_frame_equal(result, expected) + + +def test_read_csv_parse_simple_list(all_parsers): + parser = all_parsers + data = """foo +bar baz +qux foo +foo +bar""" + + result = parser.read_csv(StringIO(data), header=None) + expected = DataFrame(["foo", "bar baz", "qux foo", "foo", "bar"]) + tm.assert_frame_equal(result, expected) + + +@tm.network +def test_url(all_parsers, csv_dir_path): + # TODO: FTP testing + parser = all_parsers + kwargs = dict(sep="\t") + + url = ( + "https://raw.github.com/pandas-dev/pandas/master/" + "pandas/tests/io/parser/data/salaries.csv" + ) + url_result = parser.read_csv(url, **kwargs) + + local_path = os.path.join(csv_dir_path, "salaries.csv") + local_result = parser.read_csv(local_path, **kwargs) + tm.assert_frame_equal(url_result, local_result) + + +@pytest.mark.slow +def test_local_file(all_parsers, csv_dir_path): + parser = all_parsers + kwargs = dict(sep="\t") + + local_path = os.path.join(csv_dir_path, "salaries.csv") + local_result = parser.read_csv(local_path, **kwargs) + url = "file://localhost/" + local_path + + try: + url_result = parser.read_csv(url, **kwargs) + tm.assert_frame_equal(url_result, local_result) + except URLError: + # Fails on some systems. + pytest.skip("Failing on: " + " ".join(platform.uname())) + + +def test_path_path_lib(all_parsers): + parser = all_parsers + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_csv, lambda p: parser.read_csv(p, index_col=0)) + tm.assert_frame_equal(df, result) + + +def test_path_local_path(all_parsers): + parser = all_parsers + df = tm.makeDataFrame() + result = tm.round_trip_localpath( + df.to_csv, lambda p: parser.read_csv(p, index_col=0) + ) + tm.assert_frame_equal(df, result) + + +def test_nonexistent_path(all_parsers): + # gh-2428: pls no segfault + # gh-14086: raise more helpful FileNotFoundError + # GH#29233 "File foo" instead of "File b'foo'" + parser = all_parsers + path = "{}.csv".format(tm.rands(10)) + + msg = f"File {path} does not exist" if parser.engine == "c" else r"\[Errno 2\]" + with pytest.raises(FileNotFoundError, match=msg) as e: + parser.read_csv(path) + + filename = e.value.filename + + assert path == filename + + +def test_missing_trailing_delimiters(all_parsers): + parser = all_parsers + data = """A,B,C,D +1,2,3,4 +1,3,3, +1,4,5""" + + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + [[1, 2, 3, 4], [1, 3, 3, np.nan], [1, 4, 5, np.nan]], + columns=["A", "B", "C", "D"], + ) + tm.assert_frame_equal(result, expected) + + +def test_skip_initial_space(all_parsers): + data = ( + '"09-Apr-2012", "01:10:18.300", 2456026.548822908, 12849, ' + "1.00361, 1.12551, 330.65659, 0355626618.16711, 73.48821, " + "314.11625, 1917.09447, 179.71425, 80.000, 240.000, -350, " + "70.06056, 344.98370, 1, 1, -0.689265, -0.692787, " + "0.212036, 14.7674, 41.605, -9999.0, -9999.0, " + "-9999.0, -9999.0, -9999.0, -9999.0, 000, 012, 128" + ) + parser = all_parsers + + result = parser.read_csv( + StringIO(data), + names=list(range(33)), + header=None, + na_values=["-9999.0"], + skipinitialspace=True, + ) + expected = DataFrame( + [ + [ + "09-Apr-2012", + "01:10:18.300", + 2456026.548822908, + 12849, + 1.00361, + 1.12551, + 330.65659, + 355626618.16711, + 73.48821, + 314.11625, + 1917.09447, + 179.71425, + 80.0, + 240.0, + -350, + 70.06056, + 344.9837, + 1, + 1, + -0.689265, + -0.692787, + 0.212036, + 14.7674, + 41.605, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + 0, + 12, + 128, + ] + ] + ) + tm.assert_frame_equal(result, expected) + + +def test_trailing_delimiters(all_parsers): + # see gh-2442 + data = """A,B,C +1,2,3, +4,5,6, +7,8,9,""" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=False) + + expected = DataFrame({"A": [1, 4, 7], "B": [2, 5, 8], "C": [3, 6, 9]}) + tm.assert_frame_equal(result, expected) + + +def test_escapechar(all_parsers): + # https://stackoverflow.com/questions/13824840/feature-request-for- + # pandas-read-csv + data = '''SEARCH_TERM,ACTUAL_URL +"bra tv bord","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord" +"tv p\xc3\xa5 hjul","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord" +"SLAGBORD, \\"Bergslagen\\", IKEA:s 1700-tals serie","http://www.ikea.com/se/sv/catalog/categories/departments/living_room/10475/?se%7cps%7cnonbranded%7cvardagsrum%7cgoogle%7ctv_bord"''' # noqa + + parser = all_parsers + result = parser.read_csv( + StringIO(data), escapechar="\\", quotechar='"', encoding="utf-8" + ) + + assert result["SEARCH_TERM"][2] == 'SLAGBORD, "Bergslagen", IKEA:s 1700-tals serie' + + tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"])) + + +def test_int64_min_issues(all_parsers): + # see gh-2599 + parser = all_parsers + data = "A,B\n0,0\n0," + result = parser.read_csv(StringIO(data)) + + expected = DataFrame({"A": [0, 0], "B": [0, np.nan]}) + tm.assert_frame_equal(result, expected) + + +def test_parse_integers_above_fp_precision(all_parsers): + data = """Numbers +17007000002000191 +17007000002000191 +17007000002000191 +17007000002000191 +17007000002000192 +17007000002000192 +17007000002000192 +17007000002000192 +17007000002000192 +17007000002000194""" + parser = all_parsers + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + { + "Numbers": [ + 17007000002000191, + 17007000002000191, + 17007000002000191, + 17007000002000191, + 17007000002000192, + 17007000002000192, + 17007000002000192, + 17007000002000192, + 17007000002000192, + 17007000002000194, + ] + } + ) + tm.assert_frame_equal(result, expected) + + +def test_chunks_have_consistent_numerical_type(all_parsers): + parser = all_parsers + integers = [str(i) for i in range(499999)] + data = "a\n" + "\n".join(integers + ["1.0", "2.0"] + integers) + + # Coercions should work without warnings. + with tm.assert_produces_warning(None): + result = parser.read_csv(StringIO(data)) + + assert type(result.a[0]) is np.float64 + assert result.a.dtype == np.float + + +def test_warn_if_chunks_have_mismatched_type(all_parsers): + warning_type = None + parser = all_parsers + integers = [str(i) for i in range(499999)] + data = "a\n" + "\n".join(integers + ["a", "b"] + integers) + + # see gh-3866: if chunks are different types and can't + # be coerced using numerical types, then issue warning. + if parser.engine == "c" and parser.low_memory: + warning_type = DtypeWarning + + with tm.assert_produces_warning(warning_type): + df = parser.read_csv(StringIO(data)) + assert df.a.dtype == np.object + + +@pytest.mark.parametrize("sep", [" ", r"\s+"]) +def test_integer_overflow_bug(all_parsers, sep): + # see gh-2601 + data = "65248E10 11\n55555E55 22\n" + parser = all_parsers + + result = parser.read_csv(StringIO(data), header=None, sep=sep) + expected = DataFrame([[6.5248e14, 11], [5.5555e59, 22]]) + tm.assert_frame_equal(result, expected) + + +def test_catch_too_many_names(all_parsers): + # see gh-5156 + data = """\ +1,2,3 +4,,6 +7,8,9 +10,11,12\n""" + parser = all_parsers + msg = ( + "Too many columns specified: expected 4 and found 3" + if parser.engine == "c" + else "Number of passed names did not match " + "number of header fields in the file" + ) + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"]) + + +def test_ignore_leading_whitespace(all_parsers): + # see gh-3374, gh-6607 + parser = all_parsers + data = " a b c\n 1 2 3\n 4 5 6\n 7 8 9" + result = parser.read_csv(StringIO(data), sep=r"\s+") + + expected = DataFrame({"a": [1, 4, 7], "b": [2, 5, 8], "c": [3, 6, 9]}) + tm.assert_frame_equal(result, expected) + + +def test_chunk_begins_with_newline_whitespace(all_parsers): + # see gh-10022 + parser = all_parsers + data = "\n hello\nworld\n" + + result = parser.read_csv(StringIO(data), header=None) + expected = DataFrame([" hello", "world"]) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_index(all_parsers): + # see gh-10184 + data = "x,y" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=0) + + expected = DataFrame(columns=["y"], index=Index([], name="x")) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_multi_index(all_parsers): + # see gh-10467 + data = "x,y,z" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=["x", "y"]) + + expected = DataFrame( + columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["x", "y"]) + ) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_reversed_multi_index(all_parsers): + data = "x,y,z" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=[1, 0]) + + expected = DataFrame( + columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["y", "x"]) + ) + tm.assert_frame_equal(result, expected) + + +def test_float_parser(all_parsers): + # see gh-9565 + parser = all_parsers + data = "45e-1,4.5,45.,inf,-inf" + result = parser.read_csv(StringIO(data), header=None) + + expected = DataFrame([[float(s) for s in data.split(",")]]) + tm.assert_frame_equal(result, expected) + + +def test_scientific_no_exponent(all_parsers): + # see gh-12215 + df = DataFrame.from_dict({"w": ["2e"], "x": ["3E"], "y": ["42e"], "z": ["632E"]}) + data = df.to_csv(index=False) + parser = all_parsers + + for precision in parser.float_precision_choices: + df_roundtrip = parser.read_csv(StringIO(data), float_precision=precision) + tm.assert_frame_equal(df_roundtrip, df) + + +@pytest.mark.parametrize("conv", [None, np.int64, np.uint64]) +def test_int64_overflow(all_parsers, conv): + data = """ID +00013007854817840016671868 +00013007854817840016749251 +00013007854817840016754630 +00013007854817840016781876 +00013007854817840017028824 +00013007854817840017963235 +00013007854817840018860166""" + parser = all_parsers + + if conv is None: + # 13007854817840016671868 > UINT64_MAX, so this + # will overflow and return object as the dtype. + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + [ + "00013007854817840016671868", + "00013007854817840016749251", + "00013007854817840016754630", + "00013007854817840016781876", + "00013007854817840017028824", + "00013007854817840017963235", + "00013007854817840018860166", + ], + columns=["ID"], + ) + tm.assert_frame_equal(result, expected) + else: + # 13007854817840016671868 > UINT64_MAX, so attempts + # to cast to either int64 or uint64 will result in + # an OverflowError being raised. + msg = ( + "(Python int too large to convert to C long)|" + "(long too big to convert)|" + "(int too big to convert)" + ) + + with pytest.raises(OverflowError, match=msg): + parser.read_csv(StringIO(data), converters={"ID": conv}) + + +@pytest.mark.parametrize( + "val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min] +) +def test_int64_uint64_range(all_parsers, val): + # These numbers fall right inside the int64-uint64 + # range, so they should be parsed as string. + parser = all_parsers + result = parser.read_csv(StringIO(str(val)), header=None) + + expected = DataFrame([val]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1] +) +def test_outside_int64_uint64_range(all_parsers, val): + # These numbers fall just outside the int64-uint64 + # range, so they should be parsed as string. + parser = all_parsers + result = parser.read_csv(StringIO(str(val)), header=None) + + expected = DataFrame([str(val)]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("exp_data", [[str(-1), str(2 ** 63)], [str(2 ** 63), str(-1)]]) +def test_numeric_range_too_wide(all_parsers, exp_data): + # No numerical dtype can hold both negative and uint64 + # values, so they should be cast as string. + parser = all_parsers + data = "\n".join(exp_data) + expected = DataFrame(exp_data) + + result = parser.read_csv(StringIO(data), header=None) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("iterator", [True, False]) +def test_empty_with_nrows_chunksize(all_parsers, iterator): + # see gh-9535 + parser = all_parsers + expected = DataFrame(columns=["foo", "bar"]) + + nrows = 10 + data = StringIO("foo,bar\n") + + if iterator: + result = next(iter(parser.read_csv(data, chunksize=nrows))) + else: + result = parser.read_csv(data, nrows=nrows) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,kwargs,expected,msg", + [ + # gh-10728: WHITESPACE_LINE + ( + "a,b,c\n4,5,6\n ", + dict(), + DataFrame([[4, 5, 6]], columns=["a", "b", "c"]), + None, + ), + # gh-10548: EAT_LINE_COMMENT + ( + "a,b,c\n4,5,6\n#comment", + dict(comment="#"), + DataFrame([[4, 5, 6]], columns=["a", "b", "c"]), + None, + ), + # EAT_CRNL_NOP + ( + "a,b,c\n4,5,6\n\r", + dict(), + DataFrame([[4, 5, 6]], columns=["a", "b", "c"]), + None, + ), + # EAT_COMMENT + ( + "a,b,c\n4,5,6#comment", + dict(comment="#"), + DataFrame([[4, 5, 6]], columns=["a", "b", "c"]), + None, + ), + # SKIP_LINE + ( + "a,b,c\n4,5,6\nskipme", + dict(skiprows=[2]), + DataFrame([[4, 5, 6]], columns=["a", "b", "c"]), + None, + ), + # EAT_LINE_COMMENT + ( + "a,b,c\n4,5,6\n#comment", + dict(comment="#", skip_blank_lines=False), + DataFrame([[4, 5, 6]], columns=["a", "b", "c"]), + None, + ), + # IN_FIELD + ( + "a,b,c\n4,5,6\n ", + dict(skip_blank_lines=False), + DataFrame([["4", 5, 6], [" ", None, None]], columns=["a", "b", "c"]), + None, + ), + # EAT_CRNL + ( + "a,b,c\n4,5,6\n\r", + dict(skip_blank_lines=False), + DataFrame([[4, 5, 6], [None, None, None]], columns=["a", "b", "c"]), + None, + ), + # ESCAPED_CHAR + ( + "a,b,c\n4,5,6\n\\", + dict(escapechar="\\"), + None, + "(EOF following escape character)|(unexpected end of data)", + ), + # ESCAPE_IN_QUOTED_FIELD + ( + 'a,b,c\n4,5,6\n"\\', + dict(escapechar="\\"), + None, + "(EOF inside string starting at row 2)|(unexpected end of data)", + ), + # IN_QUOTED_FIELD + ( + 'a,b,c\n4,5,6\n"', + dict(escapechar="\\"), + None, + "(EOF inside string starting at row 2)|(unexpected end of data)", + ), + ], + ids=[ + "whitespace-line", + "eat-line-comment", + "eat-crnl-nop", + "eat-comment", + "skip-line", + "eat-line-comment", + "in-field", + "eat-crnl", + "escaped-char", + "escape-in-quoted-field", + "in-quoted-field", + ], +) +def test_eof_states(all_parsers, data, kwargs, expected, msg): + # see gh-10728, gh-10548 + parser = all_parsers + + if expected is None: + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + else: + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("usecols", [None, [0, 1], ["a", "b"]]) +def test_uneven_lines_with_usecols(all_parsers, usecols): + # see gh-12203 + parser = all_parsers + data = r"""a,b,c +0,1,2 +3,4,5,6,7 +8,9,10""" + + if usecols is None: + # Make sure that an error is still raised + # when the "usecols" parameter is not provided. + msg = r"Expected \d+ fields in line \d+, saw \d+" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data)) + else: + expected = DataFrame({"a": [0, 3, 8], "b": [1, 4, 9]}) + + result = parser.read_csv(StringIO(data), usecols=usecols) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + # First, check to see that the response of parser when faced with no + # provided columns raises the correct error, with or without usecols. + ("", dict(), None), + ("", dict(usecols=["X"]), None), + ( + ",,", + dict(names=["Dummy", "X", "Dummy_2"], usecols=["X"]), + DataFrame(columns=["X"], index=[0], dtype=np.float64), + ), + ( + "", + dict(names=["Dummy", "X", "Dummy_2"], usecols=["X"]), + DataFrame(columns=["X"]), + ), + ], +) +def test_read_empty_with_usecols(all_parsers, data, kwargs, expected): + # see gh-12493 + parser = all_parsers + + if expected is None: + msg = "No columns to parse from file" + with pytest.raises(EmptyDataError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + else: + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,expected", + [ + # gh-8661, gh-8679: this should ignore six lines, including + # lines with trailing whitespace and blank lines. + ( + dict( + header=None, + delim_whitespace=True, + skiprows=[0, 1, 2, 3, 5, 6], + skip_blank_lines=True, + ), + DataFrame([[1.0, 2.0, 4.0], [5.1, np.nan, 10.0]]), + ), + # gh-8983: test skipping set of rows after a row with trailing spaces. + ( + dict( + delim_whitespace=True, skiprows=[1, 2, 3, 5, 6], skip_blank_lines=True + ), + DataFrame({"A": [1.0, 5.1], "B": [2.0, np.nan], "C": [4.0, 10]}), + ), + ], +) +def test_trailing_spaces(all_parsers, kwargs, expected): + data = "A B C \nrandom line with trailing spaces \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n \n5.1,NaN,10.0\n" # noqa + parser = all_parsers + + result = parser.read_csv(StringIO(data.replace(",", " ")), **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_raise_on_sep_with_delim_whitespace(all_parsers): + # see gh-6607 + data = "a b c\n1 2 3" + parser = all_parsers + + with pytest.raises(ValueError, match="you can only specify one"): + parser.read_csv(StringIO(data), sep=r"\s", delim_whitespace=True) + + +@pytest.mark.parametrize("delim_whitespace", [True, False]) +def test_single_char_leading_whitespace(all_parsers, delim_whitespace): + # see gh-9710 + parser = all_parsers + data = """\ +MyColumn +a +b +a +b\n""" + + expected = DataFrame({"MyColumn": list("abab")}) + result = parser.read_csv( + StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "sep,skip_blank_lines,exp_data", + [ + (",", True, [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0], [-70.0, 0.4, 1.0]]), + (r"\s+", True, [[1.0, 2.0, 4.0], [5.0, np.nan, 10.0], [-70.0, 0.4, 1.0]]), + ( + ",", + False, + [ + [1.0, 2.0, 4.0], + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan], + [5.0, np.nan, 10.0], + [np.nan, np.nan, np.nan], + [-70.0, 0.4, 1.0], + ], + ), + ], +) +def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data): + parser = all_parsers + data = """\ +A,B,C +1,2.,4. + + +5.,NaN,10.0 + +-70,.4,1 +""" + + if sep == r"\s+": + data = data.replace(",", " ") + + result = parser.read_csv(StringIO(data), sep=sep, skip_blank_lines=skip_blank_lines) + expected = DataFrame(exp_data, columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + + +def test_whitespace_lines(all_parsers): + parser = all_parsers + data = """ + +\t \t\t +\t +A,B,C +\t 1,2.,4. +5.,NaN,10.0 +""" + expected = DataFrame([[1, 2.0, 4.0], [5.0, np.nan, 10.0]], columns=["A", "B", "C"]) + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,expected", + [ + ( + """ A B C D +a 1 2 3 4 +b 1 2 3 4 +c 1 2 3 4 +""", + DataFrame( + [[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], + columns=["A", "B", "C", "D"], + index=["a", "b", "c"], + ), + ), + ( + " a b c\n1 2 3 \n4 5 6\n 7 8 9", + DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]), + ), + ], +) +def test_whitespace_regex_separator(all_parsers, data, expected): + # see gh-6607 + parser = all_parsers + result = parser.read_csv(StringIO(data), sep=r"\s+") + tm.assert_frame_equal(result, expected) + + +def test_verbose_read(all_parsers, capsys): + parser = all_parsers + data = """a,b,c,d +one,1,2,3 +one,1,2,3 +,1,2,3 +one,1,2,3 +,1,2,3 +,1,2,3 +one,1,2,3 +two,1,2,3""" + + # Engines are verbose in different ways. + parser.read_csv(StringIO(data), verbose=True) + captured = capsys.readouterr() + + if parser.engine == "c": + assert "Tokenization took:" in captured.out + assert "Parser memory cleanup took:" in captured.out + else: # Python engine + assert captured.out == "Filled 3 NA values in column a\n" + + +def test_verbose_read2(all_parsers, capsys): + parser = all_parsers + data = """a,b,c,d +one,1,2,3 +two,1,2,3 +three,1,2,3 +four,1,2,3 +five,1,2,3 +,1,2,3 +seven,1,2,3 +eight,1,2,3""" + + parser.read_csv(StringIO(data), verbose=True, index_col=0) + captured = capsys.readouterr() + + # Engines are verbose in different ways. + if parser.engine == "c": + assert "Tokenization took:" in captured.out + assert "Parser memory cleanup took:" in captured.out + else: # Python engine + assert captured.out == "Filled 1 NA values in column a\n" + + +def test_iteration_open_handle(all_parsers): + parser = all_parsers + kwargs = dict(squeeze=True, header=None) + + with tm.ensure_clean() as path: + with open(path, "w") as f: + f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG") + + with open(path, "r") as f: + for line in f: + if "CCC" in line: + break + + result = parser.read_csv(f, **kwargs) + expected = Series(["DDD", "EEE", "FFF", "GGG"], name=0) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data,thousands,decimal", + [ + ( + """A|B|C +1|2,334.01|5 +10|13|10. +""", + ",", + ".", + ), + ( + """A|B|C +1|2.334,01|5 +10|13|10, +""", + ".", + ",", + ), + ], +) +def test_1000_sep_with_decimal(all_parsers, data, thousands, decimal): + parser = all_parsers + expected = DataFrame({"A": [1, 10], "B": [2334.01, 13], "C": [5, 10.0]}) + + result = parser.read_csv( + StringIO(data), sep="|", thousands=thousands, decimal=decimal + ) + tm.assert_frame_equal(result, expected) + + +def test_euro_decimal_format(all_parsers): + parser = all_parsers + data = """Id;Number1;Number2;Text1;Text2;Number3 +1;1521,1541;187101,9543;ABC;poi;4,738797819 +2;121,12;14897,76;DEF;uyt;0,377320872 +3;878,158;108013,434;GHI;rez;2,735694704""" + + result = parser.read_csv(StringIO(data), sep=";", decimal=",") + expected = DataFrame( + [ + [1, 1521.1541, 187101.9543, "ABC", "poi", 4.738797819], + [2, 121.12, 14897.76, "DEF", "uyt", 0.377320872], + [3, 878.158, 108013.434, "GHI", "rez", 2.735694704], + ], + columns=["Id", "Number1", "Number2", "Text1", "Text2", "Number3"], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("na_filter", [True, False]) +def test_inf_parsing(all_parsers, na_filter): + parser = all_parsers + data = """\ +,A +a,inf +b,-inf +c,+Inf +d,-Inf +e,INF +f,-INF +g,+INf +h,-INf +i,inF +j,-inF""" + expected = DataFrame( + {"A": [float("inf"), float("-inf")] * 5}, + index=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"], + ) + result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("na_filter", [True, False]) +def test_infinity_parsing(all_parsers, na_filter): + parser = all_parsers + data = """\ +,A +a,Infinity +b,-Infinity +c,+Infinity +""" + expected = DataFrame( + {"A": [float("infinity"), float("-infinity"), float("+infinity")]}, + index=["a", "b", "c"], + ) + result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5]) +def test_raise_on_no_columns(all_parsers, nrows): + parser = all_parsers + data = "\n" * nrows + + msg = "No columns to parse from file" + with pytest.raises(EmptyDataError, match=msg): + parser.read_csv(StringIO(data)) + + +def test_memory_map(all_parsers, csv_dir_path): + mmap_file = os.path.join(csv_dir_path, "test_mmap.csv") + parser = all_parsers + + expected = DataFrame( + {"a": [1, 2, 3], "b": ["one", "two", "three"], "c": ["I", "II", "III"]} + ) + + result = parser.read_csv(mmap_file, memory_map=True) + tm.assert_frame_equal(result, expected) + + +def test_null_byte_char(all_parsers): + # see gh-2741 + data = "\x00,foo" + names = ["a", "b"] + parser = all_parsers + + if parser.engine == "c": + expected = DataFrame([[np.nan, "foo"]], columns=names) + out = parser.read_csv(StringIO(data), names=names) + tm.assert_frame_equal(out, expected) + else: + msg = "NULL byte detected" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), names=names) + + +def test_temporary_file(all_parsers): + # see gh-13398 + parser = all_parsers + data = "0 0" + + new_file = TemporaryFile("w+") + new_file.write(data) + new_file.flush() + new_file.seek(0) + + result = parser.read_csv(new_file, sep=r"\s+", header=None) + new_file.close() + + expected = DataFrame([[0, 0]]) + tm.assert_frame_equal(result, expected) + + +def test_internal_eof_byte(all_parsers): + # see gh-5500 + parser = all_parsers + data = "a,b\n1\x1a,2" + + expected = DataFrame([["1\x1a", 2]], columns=["a", "b"]) + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +def test_internal_eof_byte_to_file(all_parsers): + # see gh-16559 + parser = all_parsers + data = b'c1,c2\r\n"test \x1a test", test\r\n' + expected = DataFrame([["test \x1a test", " test"]], columns=["c1", "c2"]) + path = "__{}__.csv".format(tm.rands(10)) + + with tm.ensure_clean(path) as path: + with open(path, "wb") as f: + f.write(data) + + result = parser.read_csv(path) + tm.assert_frame_equal(result, expected) + + +def test_sub_character(all_parsers, csv_dir_path): + # see gh-16893 + filename = os.path.join(csv_dir_path, "sub_char.csv") + expected = DataFrame([[1, 2, 3]], columns=["a", "\x1ab", "c"]) + + parser = all_parsers + result = parser.read_csv(filename) + tm.assert_frame_equal(result, expected) + + +def test_file_handle_string_io(all_parsers): + # gh-14418 + # + # Don't close user provided file handles. + parser = all_parsers + data = "a,b\n1,2" + + fh = StringIO(data) + parser.read_csv(fh) + assert not fh.closed + + +def test_file_handles_with_open(all_parsers, csv1): + # gh-14418 + # + # Don't close user provided file handles. + parser = all_parsers + + for mode in ["r", "rb"]: + with open(csv1, mode) as f: + parser.read_csv(f) + assert not f.closed + + +def test_invalid_file_buffer_class(all_parsers): + # see gh-15337 + class InvalidBuffer: + pass + + parser = all_parsers + msg = "Invalid file path or buffer object type" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(InvalidBuffer()) + + +def test_invalid_file_buffer_mock(all_parsers): + # see gh-15337 + parser = all_parsers + msg = "Invalid file path or buffer object type" + + class Foo: + pass + + with pytest.raises(ValueError, match=msg): + parser.read_csv(Foo()) + + +def test_valid_file_buffer_seems_invalid(all_parsers): + # gh-16135: we want to ensure that "tell" and "seek" + # aren't actually being used when we call `read_csv` + # + # Thus, while the object may look "invalid" (these + # methods are attributes of the `StringIO` class), + # it is still a valid file-object for our purposes. + class NoSeekTellBuffer(StringIO): + def tell(self): + raise AttributeError("No tell method") + + def seek(self, pos, whence=0): + raise AttributeError("No seek method") + + data = "a\n1" + parser = all_parsers + expected = DataFrame({"a": [1]}) + + result = parser.read_csv(NoSeekTellBuffer(data)) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs", + [dict(), dict(error_bad_lines=True)], # Default is True. # Explicitly pass in. +) +@pytest.mark.parametrize( + "warn_kwargs", [dict(), dict(warn_bad_lines=True), dict(warn_bad_lines=False)] +) +def test_error_bad_lines(all_parsers, kwargs, warn_kwargs): + # see gh-15925 + parser = all_parsers + kwargs.update(**warn_kwargs) + data = "a\n1\n1,2,3\n4\n5,6,7" + + msg = "Expected 1 fields in line 3, saw 3" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + + +def test_warn_bad_lines(all_parsers, capsys): + # see gh-15925 + parser = all_parsers + data = "a\n1\n1,2,3\n4\n5,6,7" + expected = DataFrame({"a": [1, 4]}) + + result = parser.read_csv(StringIO(data), error_bad_lines=False, warn_bad_lines=True) + tm.assert_frame_equal(result, expected) + + captured = capsys.readouterr() + assert "Skipping line 3" in captured.err + assert "Skipping line 5" in captured.err + + +def test_suppress_error_output(all_parsers, capsys): + # see gh-15925 + parser = all_parsers + data = "a\n1\n1,2,3\n4\n5,6,7" + expected = DataFrame({"a": [1, 4]}) + + result = parser.read_csv( + StringIO(data), error_bad_lines=False, warn_bad_lines=False + ) + tm.assert_frame_equal(result, expected) + + captured = capsys.readouterr() + assert captured.err == "" + + +@pytest.mark.parametrize("filename", ["sé-es-vé.csv", "ru-sй.csv", "中文文件名.csv"]) +def test_filename_with_special_chars(all_parsers, filename): + # see gh-15086. + parser = all_parsers + df = DataFrame({"a": [1, 2, 3]}) + + with tm.ensure_clean(filename) as path: + df.to_csv(path, index=False) + + result = parser.read_csv(path) + tm.assert_frame_equal(result, df) + + +def test_read_csv_memory_growth_chunksize(all_parsers): + # see gh-24805 + # + # Let's just make sure that we don't crash + # as we iteratively process all chunks. + parser = all_parsers + + with tm.ensure_clean() as path: + with open(path, "w") as f: + for i in range(1000): + f.write(str(i) + "\n") + + result = parser.read_csv(path, chunksize=20) + + for _ in result: + pass + + +def test_read_table_equivalency_to_read_csv(all_parsers): + # see gh-21948 + # As of 0.25.0, read_table is undeprecated + parser = all_parsers + data = "a\tb\n1\t2\n3\t4" + expected = parser.read_csv(StringIO(data), sep="\t") + result = parser.read_table(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +def test_first_row_bom(all_parsers): + # see gh-26545 + parser = all_parsers + data = '''\ufeff"Head1" "Head2" "Head3"''' + + result = parser.read_csv(StringIO(data), delimiter="\t") + expected = DataFrame(columns=["Head1", "Head2", "Head3"]) + tm.assert_frame_equal(result, expected) + + +def test_integer_precision(all_parsers): + # Gh 7072 + s = """1,1;0;0;0;1;1;3844;3844;3844;1;1;1;1;1;1;0;0;1;1;0;0,,,4321583677327450765 +5,1;0;0;0;1;1;843;843;843;1;1;1;1;1;1;0;0;1;1;0;0,64.0,;,4321113141090630389""" + parser = all_parsers + result = parser.read_csv(StringIO(s), header=None)[4] + expected = Series([4321583677327450765, 4321113141090630389], name=4) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py new file mode 100644 index 00000000..dc03370d --- /dev/null +++ b/pandas/tests/io/parser/test_compression.py @@ -0,0 +1,151 @@ +""" +Tests compressed data parsing functionality for all +of the parsers defined in parsers.py +""" + +import os +import zipfile + +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.fixture(params=[True, False]) +def buffer(request): + return request.param + + +@pytest.fixture +def parser_and_data(all_parsers, csv1): + parser = all_parsers + + with open(csv1, "rb") as f: + data = f.read() + expected = parser.read_csv(csv1) + + return parser, data, expected + + +@pytest.mark.parametrize("compression", ["zip", "infer", "zip2"]) +def test_zip(parser_and_data, compression): + parser, data, expected = parser_and_data + + with tm.ensure_clean("test_file.zip") as path: + with zipfile.ZipFile(path, mode="w") as tmp: + tmp.writestr("test_file", data) + + if compression == "zip2": + with open(path, "rb") as f: + result = parser.read_csv(f, compression="zip") + else: + result = parser.read_csv(path, compression=compression) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("compression", ["zip", "infer"]) +def test_zip_error_multiple_files(parser_and_data, compression): + parser, data, expected = parser_and_data + + with tm.ensure_clean("combined_zip.zip") as path: + inner_file_names = ["test_file", "second_file"] + + with zipfile.ZipFile(path, mode="w") as tmp: + for file_name in inner_file_names: + tmp.writestr(file_name, data) + + with pytest.raises(ValueError, match="Multiple files"): + parser.read_csv(path, compression=compression) + + +def test_zip_error_no_files(parser_and_data): + parser, _, _ = parser_and_data + + with tm.ensure_clean() as path: + with zipfile.ZipFile(path, mode="w"): + pass + + with pytest.raises(ValueError, match="Zero files"): + parser.read_csv(path, compression="zip") + + +def test_zip_error_invalid_zip(parser_and_data): + parser, _, _ = parser_and_data + + with tm.ensure_clean() as path: + with open(path, "wb") as f: + with pytest.raises(zipfile.BadZipfile, match="File is not a zip file"): + parser.read_csv(f, compression="zip") + + +@pytest.mark.parametrize("filename", [None, "test.{ext}"]) +def test_compression(parser_and_data, compression_only, buffer, filename): + parser, data, expected = parser_and_data + compress_type = compression_only + + ext = "gz" if compress_type == "gzip" else compress_type + filename = filename if filename is None else filename.format(ext=ext) + + if filename and buffer: + pytest.skip("Cannot deduce compression from buffer of compressed data.") + + with tm.ensure_clean(filename=filename) as path: + tm.write_to_compressed(compress_type, path, data) + compression = "infer" if filename else compress_type + + if buffer: + with open(path, "rb") as f: + result = parser.read_csv(f, compression=compression) + else: + result = parser.read_csv(path, compression=compression) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("ext", [None, "gz", "bz2"]) +def test_infer_compression(all_parsers, csv1, buffer, ext): + # see gh-9770 + parser = all_parsers + kwargs = dict(index_col=0, parse_dates=True) + + expected = parser.read_csv(csv1, **kwargs) + kwargs["compression"] = "infer" + + if buffer: + with open(csv1) as f: + result = parser.read_csv(f, **kwargs) + else: + ext = "." + ext if ext else "" + result = parser.read_csv(csv1 + ext, **kwargs) + + tm.assert_frame_equal(result, expected) + + +def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding_fmt): + # see gh-18071, gh-24130 + parser = all_parsers + encoding = encoding_fmt.format(utf_value) + path = os.path.join(csv_dir_path, f"utf{utf_value}_ex_small.zip") + + result = parser.read_csv(path, encoding=encoding, compression="zip", sep="\t") + expected = pd.DataFrame( + { + "Country": ["Venezuela", "Venezuela"], + "Twitter": ["Hugo Chávez Frías", "Henrique Capriles R."], + } + ) + + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("invalid_compression", ["sfark", "bz3", "zipper"]) +def test_invalid_compression(all_parsers, invalid_compression): + parser = all_parsers + compress_kwargs = dict(compression=invalid_compression) + + msg = "Unrecognized compression type: {compression}".format(**compress_kwargs) + + with pytest.raises(ValueError, match=msg): + parser.read_csv("test_file.zip", **compress_kwargs) diff --git a/pandas/tests/io/parser/test_converters.py b/pandas/tests/io/parser/test_converters.py new file mode 100644 index 00000000..88b400d9 --- /dev/null +++ b/pandas/tests/io/parser/test_converters.py @@ -0,0 +1,160 @@ +""" +Tests column conversion functionality during parsing +for all of the parsers defined in parsers.py +""" +from io import StringIO + +from dateutil.parser import parse +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index +import pandas._testing as tm + + +def test_converters_type_must_be_dict(all_parsers): + parser = all_parsers + data = """index,A,B,C,D +foo,2,3,4,5 +""" + + with pytest.raises(TypeError, match="Type converters.+"): + parser.read_csv(StringIO(data), converters=0) + + +@pytest.mark.parametrize("column", [3, "D"]) +@pytest.mark.parametrize( + "converter", [parse, lambda x: int(x.split("/")[2])] # Produce integer. +) +def test_converters(all_parsers, column, converter): + parser = all_parsers + data = """A,B,C,D +a,1,2,01/01/2009 +b,3,4,01/02/2009 +c,4,5,01/03/2009 +""" + result = parser.read_csv(StringIO(data), converters={column: converter}) + + expected = parser.read_csv(StringIO(data)) + expected["D"] = expected["D"].map(converter) + + tm.assert_frame_equal(result, expected) + + +def test_converters_no_implicit_conv(all_parsers): + # see gh-2184 + parser = all_parsers + data = """000102,1.2,A\n001245,2,B""" + + converters = {0: lambda x: x.strip()} + result = parser.read_csv(StringIO(data), header=None, converters=converters) + + # Column 0 should not be casted to numeric and should remain as object. + expected = DataFrame([["000102", 1.2, "A"], ["001245", 2, "B"]]) + tm.assert_frame_equal(result, expected) + + +def test_converters_euro_decimal_format(all_parsers): + # see gh-583 + converters = dict() + parser = all_parsers + + data = """Id;Number1;Number2;Text1;Text2;Number3 +1;1521,1541;187101,9543;ABC;poi;4,7387 +2;121,12;14897,76;DEF;uyt;0,3773 +3;878,158;108013,434;GHI;rez;2,7356""" + converters["Number1"] = converters["Number2"] = converters[ + "Number3" + ] = lambda x: float(x.replace(",", ".")) + + result = parser.read_csv(StringIO(data), sep=";", converters=converters) + expected = DataFrame( + [ + [1, 1521.1541, 187101.9543, "ABC", "poi", 4.7387], + [2, 121.12, 14897.76, "DEF", "uyt", 0.3773], + [3, 878.158, 108013.434, "GHI", "rez", 2.7356], + ], + columns=["Id", "Number1", "Number2", "Text1", "Text2", "Number3"], + ) + tm.assert_frame_equal(result, expected) + + +def test_converters_corner_with_nans(all_parsers): + parser = all_parsers + data = """id,score,days +1,2,12 +2,2-5, +3,,14+ +4,6-12,2""" + + # Example converters. + def convert_days(x): + x = x.strip() + + if not x: + return np.nan + + is_plus = x.endswith("+") + + if is_plus: + x = int(x[:-1]) + 1 + else: + x = int(x) + + return x + + def convert_days_sentinel(x): + x = x.strip() + + if not x: + return np.nan + + is_plus = x.endswith("+") + + if is_plus: + x = int(x[:-1]) + 1 + else: + x = int(x) + + return x + + def convert_score(x): + x = x.strip() + + if not x: + return np.nan + + if x.find("-") > 0: + val_min, val_max = map(int, x.split("-")) + val = 0.5 * (val_min + val_max) + else: + val = float(x) + + return val + + results = [] + + for day_converter in [convert_days, convert_days_sentinel]: + result = parser.read_csv( + StringIO(data), + converters={"score": convert_score, "days": day_converter}, + na_values=["", None], + ) + assert pd.isna(result["days"][1]) + results.append(result) + + tm.assert_frame_equal(results[0], results[1]) + + +def test_converter_index_col_bug(all_parsers): + # see gh-1835 + parser = all_parsers + data = "A;B\n1;2\n3;4" + + rs = parser.read_csv( + StringIO(data), sep=";", index_col="A", converters={"A": lambda x: x} + ) + + xp = DataFrame({"B": [2, 4]}, index=Index([1, 3], name="A")) + tm.assert_frame_equal(rs, xp) diff --git a/pandas/tests/io/parser/test_dialect.py b/pandas/tests/io/parser/test_dialect.py new file mode 100644 index 00000000..cc65def0 --- /dev/null +++ b/pandas/tests/io/parser/test_dialect.py @@ -0,0 +1,144 @@ +""" +Tests that dialects are properly handled during parsing +for all of the parsers defined in parsers.py +""" + +import csv +from io import StringIO + +import pytest + +from pandas.errors import ParserWarning + +from pandas import DataFrame +import pandas._testing as tm + + +@pytest.fixture +def custom_dialect(): + dialect_name = "weird" + dialect_kwargs = dict( + doublequote=False, + escapechar="~", + delimiter=":", + skipinitialspace=False, + quotechar="~", + quoting=3, + ) + return dialect_name, dialect_kwargs + + +def test_dialect(all_parsers): + parser = all_parsers + data = """\ +label1,label2,label3 +index1,"a,c,e +index2,b,d,f +""" + + dia = csv.excel() + dia.quoting = csv.QUOTE_NONE + df = parser.read_csv(StringIO(data), dialect=dia) + + data = """\ +label1,label2,label3 +index1,a,c,e +index2,b,d,f +""" + exp = parser.read_csv(StringIO(data)) + exp.replace("a", '"a', inplace=True) + tm.assert_frame_equal(df, exp) + + +def test_dialect_str(all_parsers): + dialect_name = "mydialect" + parser = all_parsers + data = """\ +fruit:vegetable +apple:broccoli +pear:tomato +""" + exp = DataFrame({"fruit": ["apple", "pear"], "vegetable": ["broccoli", "tomato"]}) + + with tm.with_csv_dialect(dialect_name, delimiter=":"): + df = parser.read_csv(StringIO(data), dialect=dialect_name) + tm.assert_frame_equal(df, exp) + + +def test_invalid_dialect(all_parsers): + class InvalidDialect: + pass + + data = "a\n1" + parser = all_parsers + msg = "Invalid dialect" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), dialect=InvalidDialect) + + +@pytest.mark.parametrize( + "arg", + [None, "doublequote", "escapechar", "skipinitialspace", "quotechar", "quoting"], +) +@pytest.mark.parametrize("value", ["dialect", "default", "other"]) +def test_dialect_conflict_except_delimiter(all_parsers, custom_dialect, arg, value): + # see gh-23761. + dialect_name, dialect_kwargs = custom_dialect + parser = all_parsers + + expected = DataFrame({"a": [1], "b": [2]}) + data = "a:b\n1:2" + + warning_klass = None + kwds = dict() + + # arg=None tests when we pass in the dialect without any other arguments. + if arg is not None: + if "value" == "dialect": # No conflict --> no warning. + kwds[arg] = dialect_kwargs[arg] + elif "value" == "default": # Default --> no warning. + from pandas.io.parsers import _parser_defaults + + kwds[arg] = _parser_defaults[arg] + else: # Non-default + conflict with dialect --> warning. + warning_klass = ParserWarning + kwds[arg] = "blah" + + with tm.with_csv_dialect(dialect_name, **dialect_kwargs): + with tm.assert_produces_warning(warning_klass): + result = parser.read_csv(StringIO(data), dialect=dialect_name, **kwds) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,warning_klass", + [ + (dict(sep=","), None), # sep is default --> sep_override=True + (dict(sep="."), ParserWarning), # sep isn't default --> sep_override=False + (dict(delimiter=":"), None), # No conflict + (dict(delimiter=None), None), # Default arguments --> sep_override=True + (dict(delimiter=","), ParserWarning), # Conflict + (dict(delimiter="."), ParserWarning), # Conflict + ], + ids=[ + "sep-override-true", + "sep-override-false", + "delimiter-no-conflict", + "delimiter-default-arg", + "delimiter-conflict", + "delimiter-conflict2", + ], +) +def test_dialect_conflict_delimiter(all_parsers, custom_dialect, kwargs, warning_klass): + # see gh-23761. + dialect_name, dialect_kwargs = custom_dialect + parser = all_parsers + + expected = DataFrame({"a": [1], "b": [2]}) + data = "a:b\n1:2" + + with tm.with_csv_dialect(dialect_name, **dialect_kwargs): + with tm.assert_produces_warning(warning_klass): + result = parser.read_csv(StringIO(data), dialect=dialect_name, **kwargs) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_dtypes.py b/pandas/tests/io/parser/test_dtypes.py new file mode 100644 index 00000000..11dcf7f0 --- /dev/null +++ b/pandas/tests/io/parser/test_dtypes.py @@ -0,0 +1,584 @@ +""" +Tests dtype specification during parsing +for all of the parsers defined in parsers.py +""" +from io import StringIO +import os + +import numpy as np +import pytest + +from pandas.errors import ParserWarning + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import Categorical, DataFrame, Index, MultiIndex, Series, Timestamp, concat +import pandas._testing as tm + + +@pytest.mark.parametrize("dtype", [str, object]) +@pytest.mark.parametrize("check_orig", [True, False]) +def test_dtype_all_columns(all_parsers, dtype, check_orig): + # see gh-3795, gh-6607 + parser = all_parsers + + df = DataFrame( + np.random.rand(5, 2).round(4), + columns=list("AB"), + index=["1A", "1B", "1C", "1D", "1E"], + ) + + with tm.ensure_clean("__passing_str_as_dtype__.csv") as path: + df.to_csv(path) + + result = parser.read_csv(path, dtype=dtype, index_col=0) + + if check_orig: + expected = df.copy() + result = result.astype(float) + else: + expected = df.astype(str) + + tm.assert_frame_equal(result, expected) + + +def test_dtype_all_columns_empty(all_parsers): + # see gh-12048 + parser = all_parsers + result = parser.read_csv(StringIO("A,B"), dtype=str) + + expected = DataFrame({"A": [], "B": []}, index=[], dtype=str) + tm.assert_frame_equal(result, expected) + + +def test_dtype_per_column(all_parsers): + parser = all_parsers + data = """\ +one,two +1,2.5 +2,3.5 +3,4.5 +4,5.5""" + expected = DataFrame( + [[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"] + ) + expected["one"] = expected["one"].astype(np.float64) + expected["two"] = expected["two"].astype(object) + + result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str}) + tm.assert_frame_equal(result, expected) + + +def test_invalid_dtype_per_column(all_parsers): + parser = all_parsers + data = """\ +one,two +1,2.5 +2,3.5 +3,4.5 +4,5.5""" + + with pytest.raises(TypeError, match="data type [\"']foo[\"'] not understood"): + parser.read_csv(StringIO(data), dtype={"one": "foo", 1: "int"}) + + +@pytest.mark.parametrize( + "dtype", + [ + "category", + CategoricalDtype(), + {"a": "category", "b": "category", "c": CategoricalDtype()}, + ], +) +def test_categorical_dtype(all_parsers, dtype): + # see gh-10153 + parser = all_parsers + data = """a,b,c +1,a,3.4 +1,a,3.4 +2,b,4.5""" + expected = DataFrame( + { + "a": Categorical(["1", "1", "2"]), + "b": Categorical(["a", "a", "b"]), + "c": Categorical(["3.4", "3.4", "4.5"]), + } + ) + actual = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize("dtype", [{"b": "category"}, {1: "category"}]) +def test_categorical_dtype_single(all_parsers, dtype): + # see gh-10153 + parser = all_parsers + data = """a,b,c +1,a,3.4 +1,a,3.4 +2,b,4.5""" + expected = DataFrame( + {"a": [1, 1, 2], "b": Categorical(["a", "a", "b"]), "c": [3.4, 3.4, 4.5]} + ) + actual = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(actual, expected) + + +def test_categorical_dtype_unsorted(all_parsers): + # see gh-10153 + parser = all_parsers + data = """a,b,c +1,b,3.4 +1,b,3.4 +2,a,4.5""" + expected = DataFrame( + { + "a": Categorical(["1", "1", "2"]), + "b": Categorical(["b", "b", "a"]), + "c": Categorical(["3.4", "3.4", "4.5"]), + } + ) + actual = parser.read_csv(StringIO(data), dtype="category") + tm.assert_frame_equal(actual, expected) + + +def test_categorical_dtype_missing(all_parsers): + # see gh-10153 + parser = all_parsers + data = """a,b,c +1,b,3.4 +1,nan,3.4 +2,a,4.5""" + expected = DataFrame( + { + "a": Categorical(["1", "1", "2"]), + "b": Categorical(["b", np.nan, "a"]), + "c": Categorical(["3.4", "3.4", "4.5"]), + } + ) + actual = parser.read_csv(StringIO(data), dtype="category") + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.slow +def test_categorical_dtype_high_cardinality_numeric(all_parsers): + # see gh-18186 + parser = all_parsers + data = np.sort([str(i) for i in range(524289)]) + expected = DataFrame({"a": Categorical(data, ordered=True)}) + + actual = parser.read_csv(StringIO("a\n" + "\n".join(data)), dtype="category") + actual["a"] = actual["a"].cat.reorder_categories( + np.sort(actual.a.cat.categories), ordered=True + ) + tm.assert_frame_equal(actual, expected) + + +def test_categorical_dtype_latin1(all_parsers, csv_dir_path): + # see gh-10153 + pth = os.path.join(csv_dir_path, "unicode_series.csv") + parser = all_parsers + encoding = "latin-1" + + expected = parser.read_csv(pth, header=None, encoding=encoding) + expected[1] = Categorical(expected[1]) + + actual = parser.read_csv(pth, header=None, encoding=encoding, dtype={1: "category"}) + tm.assert_frame_equal(actual, expected) + + +def test_categorical_dtype_utf16(all_parsers, csv_dir_path): + # see gh-10153 + pth = os.path.join(csv_dir_path, "utf16_ex.txt") + parser = all_parsers + encoding = "utf-16" + sep = "," + + expected = parser.read_csv(pth, sep=sep, encoding=encoding) + expected = expected.apply(Categorical) + + actual = parser.read_csv(pth, sep=sep, encoding=encoding, dtype="category") + tm.assert_frame_equal(actual, expected) + + +def test_categorical_dtype_chunksize_infer_categories(all_parsers): + # see gh-10153 + parser = all_parsers + data = """a,b +1,a +1,b +1,b +2,c""" + expecteds = [ + DataFrame({"a": [1, 1], "b": Categorical(["a", "b"])}), + DataFrame({"a": [1, 2], "b": Categorical(["b", "c"])}, index=[2, 3]), + ] + actuals = parser.read_csv(StringIO(data), dtype={"b": "category"}, chunksize=2) + + for actual, expected in zip(actuals, expecteds): + tm.assert_frame_equal(actual, expected) + + +def test_categorical_dtype_chunksize_explicit_categories(all_parsers): + # see gh-10153 + parser = all_parsers + data = """a,b +1,a +1,b +1,b +2,c""" + cats = ["a", "b", "c"] + expecteds = [ + DataFrame({"a": [1, 1], "b": Categorical(["a", "b"], categories=cats)}), + DataFrame( + {"a": [1, 2], "b": Categorical(["b", "c"], categories=cats)}, index=[2, 3] + ), + ] + dtype = CategoricalDtype(cats) + actuals = parser.read_csv(StringIO(data), dtype={"b": dtype}, chunksize=2) + + for actual, expected in zip(actuals, expecteds): + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize("ordered", [False, True]) +@pytest.mark.parametrize( + "categories", + [["a", "b", "c"], ["a", "c", "b"], ["a", "b", "c", "d"], ["c", "b", "a"]], +) +def test_categorical_category_dtype(all_parsers, categories, ordered): + parser = all_parsers + data = """a,b +1,a +1,b +1,b +2,c""" + expected = DataFrame( + { + "a": [1, 1, 1, 2], + "b": Categorical( + ["a", "b", "b", "c"], categories=categories, ordered=ordered + ), + } + ) + + dtype = {"b": CategoricalDtype(categories=categories, ordered=ordered)} + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +def test_categorical_category_dtype_unsorted(all_parsers): + parser = all_parsers + data = """a,b +1,a +1,b +1,b +2,c""" + dtype = CategoricalDtype(["c", "b", "a"]) + expected = DataFrame( + { + "a": [1, 1, 1, 2], + "b": Categorical(["a", "b", "b", "c"], categories=["c", "b", "a"]), + } + ) + + result = parser.read_csv(StringIO(data), dtype={"b": dtype}) + tm.assert_frame_equal(result, expected) + + +def test_categorical_coerces_numeric(all_parsers): + parser = all_parsers + dtype = {"b": CategoricalDtype([1, 2, 3])} + + data = "b\n1\n1\n2\n3" + expected = DataFrame({"b": Categorical([1, 1, 2, 3])}) + + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +def test_categorical_coerces_datetime(all_parsers): + parser = all_parsers + dtype = {"b": CategoricalDtype(pd.date_range("2017", "2019", freq="AS"))} + + data = "b\n2017-01-01\n2018-01-01\n2019-01-01" + expected = DataFrame({"b": Categorical(dtype["b"].categories)}) + + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +def test_categorical_coerces_timestamp(all_parsers): + parser = all_parsers + dtype = {"b": CategoricalDtype([Timestamp("2014")])} + + data = "b\n2014-01-01\n2014-01-01T00:00:00" + expected = DataFrame({"b": Categorical([Timestamp("2014")] * 2)}) + + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +def test_categorical_coerces_timedelta(all_parsers): + parser = all_parsers + dtype = {"b": CategoricalDtype(pd.to_timedelta(["1H", "2H", "3H"]))} + + data = "b\n1H\n2H\n3H" + expected = DataFrame({"b": Categorical(dtype["b"].categories)}) + + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + "b\nTrue\nFalse\nNA\nFalse", + "b\ntrue\nfalse\nNA\nfalse", + "b\nTRUE\nFALSE\nNA\nFALSE", + "b\nTrue\nFalse\nNA\nFALSE", + ], +) +def test_categorical_dtype_coerces_boolean(all_parsers, data): + # see gh-20498 + parser = all_parsers + dtype = {"b": CategoricalDtype([False, True])} + expected = DataFrame({"b": Categorical([True, False, None, False])}) + + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +def test_categorical_unexpected_categories(all_parsers): + parser = all_parsers + dtype = {"b": CategoricalDtype(["a", "b", "d", "e"])} + + data = "b\nd\na\nc\nd" # Unexpected c + expected = DataFrame({"b": Categorical(list("dacd"), dtype=dtype["b"])}) + + result = parser.read_csv(StringIO(data), dtype=dtype) + tm.assert_frame_equal(result, expected) + + +def test_empty_pass_dtype(all_parsers): + parser = all_parsers + + data = "one,two" + result = parser.read_csv(StringIO(data), dtype={"one": "u1"}) + + expected = DataFrame( + {"one": np.empty(0, dtype="u1"), "two": np.empty(0, dtype=np.object)}, + index=Index([], dtype=object), + ) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_index_pass_dtype(all_parsers): + parser = all_parsers + + data = "one,two" + result = parser.read_csv( + StringIO(data), index_col=["one"], dtype={"one": "u1", 1: "f"} + ) + + expected = DataFrame( + {"two": np.empty(0, dtype="f")}, index=Index([], dtype="u1", name="one") + ) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_multi_index_pass_dtype(all_parsers): + parser = all_parsers + + data = "one,two,three" + result = parser.read_csv( + StringIO(data), index_col=["one", "two"], dtype={"one": "u1", 1: "f8"} + ) + + exp_idx = MultiIndex.from_arrays( + [np.empty(0, dtype="u1"), np.empty(0, dtype=np.float64)], names=["one", "two"] + ) + expected = DataFrame({"three": np.empty(0, dtype=np.object)}, index=exp_idx) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers): + parser = all_parsers + + data = "one,one" + result = parser.read_csv(StringIO(data), dtype={"one": "u1", "one.1": "f"}) + + expected = DataFrame( + {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")}, + index=Index([], dtype=object), + ) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers): + parser = all_parsers + + data = "one,one" + result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"}) + + expected = DataFrame( + {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")}, + index=Index([], dtype=object), + ) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers): + # see gh-9424 + parser = all_parsers + expected = concat( + [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")], + axis=1, + ) + expected.index = expected.index.astype(object) + + data = "one,one" + result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"}) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers): + # see gh-9424 + parser = all_parsers + expected = concat( + [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")], + axis=1, + ) + expected.index = expected.index.astype(object) + + with pytest.raises(ValueError, match="Duplicate names"): + data = "" + parser.read_csv(StringIO(data), names=["one", "one"], dtype={0: "u1", 1: "f"}) + + +def test_raise_on_passed_int_dtype_with_nas(all_parsers): + # see gh-2631 + parser = all_parsers + data = """YEAR, DOY, a +2001,106380451,10 +2001,,11 +2001,106380451,67""" + + msg = ( + "Integer column has NA values" + if parser.engine == "c" + else "Unable to convert column DOY" + ) + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), dtype={"DOY": np.int64}, skipinitialspace=True) + + +def test_dtype_with_converters(all_parsers): + parser = all_parsers + data = """a,b +1.1,2.2 +1.2,2.3""" + + # Dtype spec ignored if converted specified. + with tm.assert_produces_warning(ParserWarning): + result = parser.read_csv( + StringIO(data), dtype={"a": "i8"}, converters={"a": lambda x: str(x)} + ) + expected = DataFrame({"a": ["1.1", "1.2"], "b": [2.2, 2.3]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "dtype,expected", + [ + (np.float64, DataFrame(columns=["a", "b"], dtype=np.float64)), + ("category", DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[])), + ( + dict(a="category", b="category"), + DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]), + ), + ("datetime64[ns]", DataFrame(columns=["a", "b"], dtype="datetime64[ns]")), + ( + "timedelta64[ns]", + DataFrame( + { + "a": Series([], dtype="timedelta64[ns]"), + "b": Series([], dtype="timedelta64[ns]"), + }, + index=[], + ), + ), + ( + dict(a=np.int64, b=np.int32), + DataFrame( + {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, + index=[], + ), + ), + ( + {0: np.int64, 1: np.int32}, + DataFrame( + {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, + index=[], + ), + ), + ( + {"a": np.int64, 1: np.int32}, + DataFrame( + {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, + index=[], + ), + ), + ], +) +def test_empty_dtype(all_parsers, dtype, expected): + # see gh-14712 + parser = all_parsers + data = "a,b" + + result = parser.read_csv(StringIO(data), header=0, dtype=dtype) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "dtype", list(np.typecodes["AllInteger"] + np.typecodes["Float"]) +) +def test_numeric_dtype(all_parsers, dtype): + data = "0\n1" + parser = all_parsers + expected = DataFrame([0, 1], dtype=dtype) + + result = parser.read_csv(StringIO(data), header=None, dtype=dtype) + tm.assert_frame_equal(expected, result) + + +def test_boolean_dtype(all_parsers): + parser = all_parsers + data = "\n".join( + [ + "a", + "True", + "TRUE", + "true", + "False", + "FALSE", + "false", + "NaN", + "nan", + "NA", + "null", + "NULL", + ] + ) + + result = parser.read_csv(StringIO(data), dtype="boolean") + expected = pd.DataFrame( + { + "a": pd.array( + [True, True, True, False, False, False, None, None, None, None, None], + dtype="boolean", + ) + } + ) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py new file mode 100644 index 00000000..e38b7fab --- /dev/null +++ b/pandas/tests/io/parser/test_encoding.py @@ -0,0 +1,199 @@ +""" +Tests encoding functionality during parsing +for all of the parsers defined in parsers.py +""" + +from io import BytesIO +import os +import tempfile + +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +def test_bytes_io_input(all_parsers): + encoding = "cp1255" + parser = all_parsers + + data = BytesIO("שלום:1234\n562:123".encode(encoding)) + result = parser.read_csv(data, sep=":", encoding=encoding) + + expected = DataFrame([[562, 123]], columns=["שלום", "1234"]) + tm.assert_frame_equal(result, expected) + + +def test_read_csv_unicode(all_parsers): + parser = all_parsers + data = BytesIO("\u0141aski, Jan;1".encode("utf-8")) + + result = parser.read_csv(data, sep=";", encoding="utf-8", header=None) + expected = DataFrame([["\u0141aski, Jan", 1]]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("sep", [",", "\t"]) +@pytest.mark.parametrize("encoding", ["utf-16", "utf-16le", "utf-16be"]) +def test_utf16_bom_skiprows(all_parsers, sep, encoding): + # see gh-2298 + parser = all_parsers + data = """skip this +skip this too +A,B,C +1,2,3 +4,5,6""".replace( + ",", sep + ) + path = "__{}__.csv".format(tm.rands(10)) + kwargs = dict(sep=sep, skiprows=2) + utf8 = "utf-8" + + with tm.ensure_clean(path) as path: + from io import TextIOWrapper + + bytes_data = data.encode(encoding) + + with open(path, "wb") as f: + f.write(bytes_data) + + bytes_buffer = BytesIO(data.encode(utf8)) + bytes_buffer = TextIOWrapper(bytes_buffer, encoding=utf8) + + result = parser.read_csv(path, encoding=encoding, **kwargs) + expected = parser.read_csv(bytes_buffer, encoding=utf8, **kwargs) + + bytes_buffer.close() + tm.assert_frame_equal(result, expected) + + +def test_utf16_example(all_parsers, csv_dir_path): + path = os.path.join(csv_dir_path, "utf16_ex.txt") + parser = all_parsers + result = parser.read_csv(path, encoding="utf-16", sep="\t") + assert len(result) == 50 + + +def test_unicode_encoding(all_parsers, csv_dir_path): + path = os.path.join(csv_dir_path, "unicode_series.csv") + parser = all_parsers + + result = parser.read_csv(path, header=None, encoding="latin-1") + result = result.set_index(0) + got = result[1][1632] + + expected = "\xc1 k\xf6ldum klaka (Cold Fever) (1994)" + assert got == expected + + +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + # Basic test + ("a\n1", dict(), DataFrame({"a": [1]})), + # "Regular" quoting + ('"a"\n1', dict(quotechar='"'), DataFrame({"a": [1]})), + # Test in a data row instead of header + ("b\n1", dict(names=["a"]), DataFrame({"a": ["b", "1"]})), + # Test in empty data row with skipping + ("\n1", dict(names=["a"], skip_blank_lines=True), DataFrame({"a": [1]})), + # Test in empty data row without skipping + ( + "\n1", + dict(names=["a"], skip_blank_lines=False), + DataFrame({"a": [np.nan, 1]}), + ), + ], +) +def test_utf8_bom(all_parsers, data, kwargs, expected): + # see gh-4793 + parser = all_parsers + bom = "\ufeff" + utf8 = "utf-8" + + def _encode_data_with_bom(_data): + bom_data = (bom + _data).encode(utf8) + return BytesIO(bom_data) + + result = parser.read_csv(_encode_data_with_bom(data), encoding=utf8, **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_read_csv_utf_aliases(all_parsers, utf_value, encoding_fmt): + # see gh-13549 + expected = DataFrame({"mb_num": [4.8], "multibyte": ["test"]}) + parser = all_parsers + + encoding = encoding_fmt.format(utf_value) + data = "mb_num,multibyte\n4.8,test".encode(encoding) + + result = parser.read_csv(BytesIO(data), encoding=encoding) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "fname,encoding", + [ + ("test1.csv", "utf-8"), + ("unicode_series.csv", "latin-1"), + ("sauron.SHIFT_JIS.csv", "shiftjis"), + ], +) +def test_binary_mode_file_buffers(all_parsers, csv_dir_path, fname, encoding): + # gh-23779: Python csv engine shouldn't error on files opened in binary. + # gh-31575: Python csv engine shouldn't error on files opened in raw binary. + parser = all_parsers + + fpath = os.path.join(csv_dir_path, fname) + expected = parser.read_csv(fpath, encoding=encoding) + + with open(fpath, mode="r", encoding=encoding) as fa: + result = parser.read_csv(fa) + tm.assert_frame_equal(expected, result) + + with open(fpath, mode="rb") as fb: + result = parser.read_csv(fb, encoding=encoding) + tm.assert_frame_equal(expected, result) + + with open(fpath, mode="rb", buffering=0) as fb: + result = parser.read_csv(fb, encoding=encoding) + tm.assert_frame_equal(expected, result) + + +@pytest.mark.parametrize("pass_encoding", [True, False]) +def test_encoding_temp_file(all_parsers, utf_value, encoding_fmt, pass_encoding): + # see gh-24130 + parser = all_parsers + encoding = encoding_fmt.format(utf_value) + + expected = DataFrame({"foo": ["bar"]}) + + with tempfile.TemporaryFile(mode="w+", encoding=encoding) as f: + f.write("foo\nbar") + f.seek(0) + + result = parser.read_csv(f, encoding=encoding if pass_encoding else None) + tm.assert_frame_equal(result, expected) + + +def test_encoding_named_temp_file(all_parsers): + # see gh-31819 + parser = all_parsers + encoding = "shift-jis" + + if parser.engine == "python": + pytest.skip("NamedTemporaryFile does not work with Python engine") + + title = "てすと" + data = "こむ" + + expected = DataFrame({title: [data]}) + + with tempfile.NamedTemporaryFile() as f: + f.write(f"{title}\n{data}".encode(encoding)) + + f.seek(0) + + result = parser.read_csv(f, encoding=encoding) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py new file mode 100644 index 00000000..7dc106ef --- /dev/null +++ b/pandas/tests/io/parser/test_header.py @@ -0,0 +1,573 @@ +""" +Tests that the file header is properly handled or inferred +during parsing for all of the parsers defined in parsers.py +""" + +from collections import namedtuple +from io import StringIO + +import numpy as np +import pytest + +from pandas.errors import ParserError + +from pandas import DataFrame, Index, MultiIndex +import pandas._testing as tm + + +def test_read_with_bad_header(all_parsers): + parser = all_parsers + msg = r"but only \d+ lines in file" + + with pytest.raises(ValueError, match=msg): + s = StringIO(",,") + parser.read_csv(s, header=[10]) + + +def test_negative_header(all_parsers): + # see gh-27779 + parser = all_parsers + data = """1,2,3,4,5 +6,7,8,9,10 +11,12,13,14,15 +""" + with pytest.raises( + ValueError, + match="Passing negative integer to header is invalid. " + "For no header, use header=None instead", + ): + parser.read_csv(StringIO(data), header=-1) + + +@pytest.mark.parametrize("header", [([-1, 2, 4]), ([-5, 0])]) +def test_negative_multi_index_header(all_parsers, header): + # see gh-27779 + parser = all_parsers + data = """1,2,3,4,5 + 6,7,8,9,10 + 11,12,13,14,15 + """ + with pytest.raises( + ValueError, match="cannot specify multi-index header with negative integers" + ): + parser.read_csv(StringIO(data), header=header) + + +@pytest.mark.parametrize("header", [True, False]) +def test_bool_header_arg(all_parsers, header): + # see gh-6114 + parser = all_parsers + data = """\ +MyColumn +a +b +a +b""" + msg = "Passing a bool to header is invalid" + with pytest.raises(TypeError, match=msg): + parser.read_csv(StringIO(data), header=header) + + +def test_no_header_prefix(all_parsers): + parser = all_parsers + data = """1,2,3,4,5 +6,7,8,9,10 +11,12,13,14,15 +""" + result = parser.read_csv(StringIO(data), prefix="Field", header=None) + expected = DataFrame( + [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], + columns=["Field0", "Field1", "Field2", "Field3", "Field4"], + ) + tm.assert_frame_equal(result, expected) + + +def test_header_with_index_col(all_parsers): + parser = all_parsers + data = """foo,1,2,3 +bar,4,5,6 +baz,7,8,9 +""" + names = ["A", "B", "C"] + result = parser.read_csv(StringIO(data), names=names) + + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["foo", "bar", "baz"], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(result, expected) + + +def test_header_not_first_line(all_parsers): + parser = all_parsers + data = """got,to,ignore,this,line +got,to,ignore,this,line +index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +""" + data2 = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +""" + + result = parser.read_csv(StringIO(data), header=2, index_col=0) + expected = parser.read_csv(StringIO(data2), header=0, index_col=0) + tm.assert_frame_equal(result, expected) + + +def test_header_multi_index(all_parsers): + parser = all_parsers + expected = tm.makeCustomDataframe(5, 3, r_idx_nlevels=2, c_idx_nlevels=4) + + data = """\ +C0,,C_l0_g0,C_l0_g1,C_l0_g2 + +C1,,C_l1_g0,C_l1_g1,C_l1_g2 +C2,,C_l2_g0,C_l2_g1,C_l2_g2 +C3,,C_l3_g0,C_l3_g1,C_l3_g2 +R0,R1,,, +R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2 +R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2 +R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2 +R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2 +R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2 +""" + result = parser.read_csv(StringIO(data), header=[0, 1, 2, 3], index_col=[0, 1]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,msg", + [ + ( + dict(index_col=["foo", "bar"]), + ( + "index_col must only contain " + "row numbers when specifying " + "a multi-index header" + ), + ), + ( + dict(index_col=[0, 1], names=["foo", "bar"]), + ("cannot specify names when specifying a multi-index header"), + ), + ( + dict(index_col=[0, 1], usecols=["foo", "bar"]), + ("cannot specify usecols when specifying a multi-index header"), + ), + ], +) +def test_header_multi_index_invalid(all_parsers, kwargs, msg): + data = """\ +C0,,C_l0_g0,C_l0_g1,C_l0_g2 + +C1,,C_l1_g0,C_l1_g1,C_l1_g2 +C2,,C_l2_g0,C_l2_g1,C_l2_g2 +C3,,C_l3_g0,C_l3_g1,C_l3_g2 +R0,R1,,, +R_l0_g0,R_l1_g0,R0C0,R0C1,R0C2 +R_l0_g1,R_l1_g1,R1C0,R1C1,R1C2 +R_l0_g2,R_l1_g2,R2C0,R2C1,R2C2 +R_l0_g3,R_l1_g3,R3C0,R3C1,R3C2 +R_l0_g4,R_l1_g4,R4C0,R4C1,R4C2 +""" + parser = all_parsers + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), header=[0, 1, 2, 3], **kwargs) + + +_TestTuple = namedtuple("names", ["first", "second"]) + + +@pytest.mark.parametrize( + "kwargs", + [ + dict(header=[0, 1]), + dict( + skiprows=3, + names=[ + ("a", "q"), + ("a", "r"), + ("a", "s"), + ("b", "t"), + ("c", "u"), + ("c", "v"), + ], + ), + dict( + skiprows=3, + names=[ + _TestTuple("a", "q"), + _TestTuple("a", "r"), + _TestTuple("a", "s"), + _TestTuple("b", "t"), + _TestTuple("c", "u"), + _TestTuple("c", "v"), + ], + ), + ], +) +def test_header_multi_index_common_format1(all_parsers, kwargs): + parser = all_parsers + expected = DataFrame( + [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], + index=["one", "two"], + columns=MultiIndex.from_tuples( + [("a", "q"), ("a", "r"), ("a", "s"), ("b", "t"), ("c", "u"), ("c", "v")] + ), + ) + data = """,a,a,a,b,c,c +,q,r,s,t,u,v +,,,,,, +one,1,2,3,4,5,6 +two,7,8,9,10,11,12""" + + result = parser.read_csv(StringIO(data), index_col=0, **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs", + [ + dict(header=[0, 1]), + dict( + skiprows=2, + names=[ + ("a", "q"), + ("a", "r"), + ("a", "s"), + ("b", "t"), + ("c", "u"), + ("c", "v"), + ], + ), + dict( + skiprows=2, + names=[ + _TestTuple("a", "q"), + _TestTuple("a", "r"), + _TestTuple("a", "s"), + _TestTuple("b", "t"), + _TestTuple("c", "u"), + _TestTuple("c", "v"), + ], + ), + ], +) +def test_header_multi_index_common_format2(all_parsers, kwargs): + parser = all_parsers + expected = DataFrame( + [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], + index=["one", "two"], + columns=MultiIndex.from_tuples( + [("a", "q"), ("a", "r"), ("a", "s"), ("b", "t"), ("c", "u"), ("c", "v")] + ), + ) + data = """,a,a,a,b,c,c +,q,r,s,t,u,v +one,1,2,3,4,5,6 +two,7,8,9,10,11,12""" + + result = parser.read_csv(StringIO(data), index_col=0, **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs", + [ + dict(header=[0, 1]), + dict( + skiprows=2, + names=[ + ("a", "q"), + ("a", "r"), + ("a", "s"), + ("b", "t"), + ("c", "u"), + ("c", "v"), + ], + ), + dict( + skiprows=2, + names=[ + _TestTuple("a", "q"), + _TestTuple("a", "r"), + _TestTuple("a", "s"), + _TestTuple("b", "t"), + _TestTuple("c", "u"), + _TestTuple("c", "v"), + ], + ), + ], +) +def test_header_multi_index_common_format3(all_parsers, kwargs): + parser = all_parsers + expected = DataFrame( + [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], + index=["one", "two"], + columns=MultiIndex.from_tuples( + [("a", "q"), ("a", "r"), ("a", "s"), ("b", "t"), ("c", "u"), ("c", "v")] + ), + ) + expected = expected.reset_index(drop=True) + data = """a,a,a,b,c,c +q,r,s,t,u,v +1,2,3,4,5,6 +7,8,9,10,11,12""" + + result = parser.read_csv(StringIO(data), index_col=None, **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_header_multi_index_common_format_malformed1(all_parsers): + parser = all_parsers + expected = DataFrame( + np.array([[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"), + index=Index([1, 7]), + columns=MultiIndex( + levels=[["a", "b", "c"], ["r", "s", "t", "u", "v"]], + codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]], + names=["a", "q"], + ), + ) + data = """a,a,a,b,c,c +q,r,s,t,u,v +1,2,3,4,5,6 +7,8,9,10,11,12""" + + result = parser.read_csv(StringIO(data), header=[0, 1], index_col=0) + tm.assert_frame_equal(expected, result) + + +def test_header_multi_index_common_format_malformed2(all_parsers): + parser = all_parsers + expected = DataFrame( + np.array([[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"), + index=Index([1, 7]), + columns=MultiIndex( + levels=[["a", "b", "c"], ["r", "s", "t", "u", "v"]], + codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]], + names=[None, "q"], + ), + ) + + data = """,a,a,b,c,c +q,r,s,t,u,v +1,2,3,4,5,6 +7,8,9,10,11,12""" + + result = parser.read_csv(StringIO(data), header=[0, 1], index_col=0) + tm.assert_frame_equal(expected, result) + + +def test_header_multi_index_common_format_malformed3(all_parsers): + parser = all_parsers + expected = DataFrame( + np.array([[3, 4, 5, 6], [9, 10, 11, 12]], dtype="int64"), + index=MultiIndex(levels=[[1, 7], [2, 8]], codes=[[0, 1], [0, 1]]), + columns=MultiIndex( + levels=[["a", "b", "c"], ["s", "t", "u", "v"]], + codes=[[0, 1, 2, 2], [0, 1, 2, 3]], + names=[None, "q"], + ), + ) + data = """,a,a,b,c,c +q,r,s,t,u,v +1,2,3,4,5,6 +7,8,9,10,11,12""" + + result = parser.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1]) + tm.assert_frame_equal(expected, result) + + +@pytest.mark.parametrize( + "data,header", [("1,2,3\n4,5,6", None), ("foo,bar,baz\n1,2,3\n4,5,6", 0)] +) +def test_header_names_backward_compat(all_parsers, data, header): + # see gh-2539 + parser = all_parsers + expected = parser.read_csv(StringIO("1,2,3\n4,5,6"), names=["a", "b", "c"]) + + result = parser.read_csv(StringIO(data), names=["a", "b", "c"], header=header) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("kwargs", [dict(), dict(index_col=False)]) +def test_read_only_header_no_rows(all_parsers, kwargs): + # See gh-7773 + parser = all_parsers + expected = DataFrame(columns=["a", "b", "c"]) + + result = parser.read_csv(StringIO("a,b,c"), **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,names", + [ + (dict(), [0, 1, 2, 3, 4]), + (dict(prefix="X"), ["X0", "X1", "X2", "X3", "X4"]), + ( + dict(names=["foo", "bar", "baz", "quux", "panda"]), + ["foo", "bar", "baz", "quux", "panda"], + ), + ], +) +def test_no_header(all_parsers, kwargs, names): + parser = all_parsers + data = """1,2,3,4,5 +6,7,8,9,10 +11,12,13,14,15 +""" + expected = DataFrame( + [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]], columns=names + ) + result = parser.read_csv(StringIO(data), header=None, **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("header", [["a", "b"], "string_header"]) +def test_non_int_header(all_parsers, header): + # see gh-16338 + msg = "header must be integer or list of integers" + data = """1,2\n3,4""" + parser = all_parsers + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), header=header) + + +def test_singleton_header(all_parsers): + # see gh-7757 + data = """a,b,c\n0,1,2\n1,2,3""" + parser = all_parsers + + expected = DataFrame({"a": [0, 1], "b": [1, 2], "c": [2, 3]}) + result = parser.read_csv(StringIO(data), header=[0]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,expected", + [ + ( + "A,A,A,B\none,one,one,two\n0,40,34,0.1", + DataFrame( + [[0, 40, 34, 0.1]], + columns=MultiIndex.from_tuples( + [("A", "one"), ("A", "one.1"), ("A", "one.2"), ("B", "two")] + ), + ), + ), + ( + "A,A,A,B\none,one,one.1,two\n0,40,34,0.1", + DataFrame( + [[0, 40, 34, 0.1]], + columns=MultiIndex.from_tuples( + [("A", "one"), ("A", "one.1"), ("A", "one.1.1"), ("B", "two")] + ), + ), + ), + ( + "A,A,A,B,B\none,one,one.1,two,two\n0,40,34,0.1,0.1", + DataFrame( + [[0, 40, 34, 0.1, 0.1]], + columns=MultiIndex.from_tuples( + [ + ("A", "one"), + ("A", "one.1"), + ("A", "one.1.1"), + ("B", "two"), + ("B", "two.1"), + ] + ), + ), + ), + ], +) +def test_mangles_multi_index(all_parsers, data, expected): + # see gh-18062 + parser = all_parsers + + result = parser.read_csv(StringIO(data), header=[0, 1]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("index_col", [None, [0]]) +@pytest.mark.parametrize( + "columns", [None, (["", "Unnamed"]), (["Unnamed", ""]), (["Unnamed", "NotUnnamed"])] +) +def test_multi_index_unnamed(all_parsers, index_col, columns): + # see gh-23687 + # + # When specifying a multi-index header, make sure that + # we don't error just because one of the rows in our header + # has ALL column names containing the string "Unnamed". The + # correct condition to check is whether the row contains + # ALL columns that did not have names (and instead were given + # placeholder ones). + parser = all_parsers + header = [0, 1] + + if index_col is None: + data = ",".join(columns or ["", ""]) + "\n0,1\n2,3\n4,5\n" + else: + data = ",".join([""] + (columns or ["", ""])) + "\n,0,1\n0,2,3\n1,4,5\n" + + if columns is None: + msg = ( + r"Passed header=\[0,1\] are too " + r"many rows for this multi_index of columns" + ) + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), header=header, index_col=index_col) + else: + result = parser.read_csv(StringIO(data), header=header, index_col=index_col) + template = "Unnamed: {i}_level_0" + exp_columns = [] + + for i, col in enumerate(columns): + if not col: # Unnamed. + col = template.format(i=i if index_col is None else i + 1) + + exp_columns.append(col) + + columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"])) + expected = DataFrame([[2, 3], [4, 5]], columns=columns) + tm.assert_frame_equal(result, expected) + + +def test_read_csv_multiindex_columns(all_parsers): + # GH#6051 + parser = all_parsers + + s1 = "Male, Male, Male, Female, Female\nR, R, L, R, R\n.86, .67, .88, .78, .81" + s2 = ( + "Male, Male, Male, Female, Female\n" + "R, R, L, R, R\n" + ".86, .67, .88, .78, .81\n" + ".86, .67, .88, .78, .82" + ) + + mi = MultiIndex.from_tuples( + [ + ("Male", "R"), + (" Male", " R"), + (" Male", " L"), + (" Female", " R"), + (" Female", " R.1"), + ] + ) + expected = DataFrame( + [[0.86, 0.67, 0.88, 0.78, 0.81], [0.86, 0.67, 0.88, 0.78, 0.82]], columns=mi + ) + + df1 = parser.read_csv(StringIO(s1), header=[0, 1]) + tm.assert_frame_equal(df1, expected.iloc[:1]) + df2 = parser.read_csv(StringIO(s2), header=[0, 1]) + tm.assert_frame_equal(df2, expected) diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py new file mode 100644 index 00000000..f67a658c --- /dev/null +++ b/pandas/tests/io/parser/test_index_col.py @@ -0,0 +1,186 @@ +""" +Tests that the specified index column (a.k.a "index_col") +is properly handled or inferred during parsing for all of +the parsers defined in parsers.py +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex +import pandas._testing as tm + + +@pytest.mark.parametrize("with_header", [True, False]) +def test_index_col_named(all_parsers, with_header): + parser = all_parsers + no_header = """\ +KORD1,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD2,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD3,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD4,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" # noqa + header = "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" + + if with_header: + data = header + no_header + + result = parser.read_csv(StringIO(data), index_col="ID") + expected = parser.read_csv(StringIO(data), header=0).set_index("ID") + tm.assert_frame_equal(result, expected) + else: + data = no_header + msg = "Index ID invalid" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), index_col="ID") + + +def test_index_col_named2(all_parsers): + parser = all_parsers + data = """\ +1,2,3,4,hello +5,6,7,8,world +9,10,11,12,foo +""" + + expected = DataFrame( + {"a": [1, 5, 9], "b": [2, 6, 10], "c": [3, 7, 11], "d": [4, 8, 12]}, + index=Index(["hello", "world", "foo"], name="message"), + ) + names = ["a", "b", "c", "d", "message"] + + result = parser.read_csv(StringIO(data), names=names, index_col=["message"]) + tm.assert_frame_equal(result, expected) + + +def test_index_col_is_true(all_parsers): + # see gh-9798 + data = "a,b\n1,2" + parser = all_parsers + + msg = "The value of index_col couldn't be 'True'" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), index_col=True) + + +def test_infer_index_col(all_parsers): + data = """A,B,C +foo,1,2,3 +bar,4,5,6 +baz,7,8,9 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data)) + + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["foo", "bar", "baz"], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "index_col,kwargs", + [ + (None, dict(columns=["x", "y", "z"])), + (False, dict(columns=["x", "y", "z"])), + (0, dict(columns=["y", "z"], index=Index([], name="x"))), + (1, dict(columns=["x", "z"], index=Index([], name="y"))), + ("x", dict(columns=["y", "z"], index=Index([], name="x"))), + ("y", dict(columns=["x", "z"], index=Index([], name="y"))), + ( + [0, 1], + dict( + columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["x", "y"]) + ), + ), + ( + ["x", "y"], + dict( + columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["x", "y"]) + ), + ), + ( + [1, 0], + dict( + columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["y", "x"]) + ), + ), + ( + ["y", "x"], + dict( + columns=["z"], index=MultiIndex.from_arrays([[]] * 2, names=["y", "x"]) + ), + ), + ], +) +def test_index_col_empty_data(all_parsers, index_col, kwargs): + data = "x,y,z" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=index_col) + + expected = DataFrame(**kwargs) + tm.assert_frame_equal(result, expected) + + +def test_empty_with_index_col_false(all_parsers): + # see gh-10413 + data = "x,y" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=False) + + expected = DataFrame(columns=["x", "y"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "index_names", + [ + ["", ""], + ["foo", ""], + ["", "bar"], + ["foo", "bar"], + ["NotReallyUnnamed", "Unnamed: 0"], + ], +) +def test_multi_index_naming(all_parsers, index_names): + parser = all_parsers + + # We don't want empty index names being replaced with "Unnamed: 0" + data = ",".join(index_names + ["col\na,c,1\na,d,2\nb,c,3\nb,d,4"]) + result = parser.read_csv(StringIO(data), index_col=[0, 1]) + + expected = DataFrame( + {"col": [1, 2, 3, 4]}, index=MultiIndex.from_product([["a", "b"], ["c", "d"]]) + ) + expected.index.names = [name if name else None for name in index_names] + tm.assert_frame_equal(result, expected) + + +def test_multi_index_naming_not_all_at_beginning(all_parsers): + parser = all_parsers + data = ",Unnamed: 2,\na,c,1\na,d,2\nb,c,3\nb,d,4" + result = parser.read_csv(StringIO(data), index_col=[0, 2]) + + expected = DataFrame( + {"Unnamed: 2": ["c", "d", "c", "d"]}, + index=MultiIndex( + levels=[["a", "b"], [1, 2, 3, 4]], codes=[[0, 0, 1, 1], [0, 1, 2, 3]] + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_no_multi_index_level_names_empty(all_parsers): + # GH 10984 + parser = all_parsers + midx = MultiIndex.from_tuples([("A", 1, 2), ("A", 1, 2), ("B", 1, 2)]) + expected = DataFrame(np.random.randn(3, 3), index=midx, columns=["x", "y", "z"]) + with tm.ensure_clean() as path: + expected.to_csv(path) + result = parser.read_csv(path, index_col=[0, 1, 2]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_mangle_dupes.py b/pandas/tests/io/parser/test_mangle_dupes.py new file mode 100644 index 00000000..5c4e6421 --- /dev/null +++ b/pandas/tests/io/parser/test_mangle_dupes.py @@ -0,0 +1,132 @@ +""" +Tests that duplicate columns are handled appropriately when parsed by the +CSV engine. In general, the expected result is that they are either thoroughly +de-duplicated (if mangling requested) or ignored otherwise. +""" +from io import StringIO + +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +@pytest.mark.parametrize("kwargs", [dict(), dict(mangle_dupe_cols=True)]) +def test_basic(all_parsers, kwargs): + # TODO: add test for condition "mangle_dupe_cols=False" + # once it is actually supported (gh-12935) + parser = all_parsers + + data = "a,a,b,b,b\n1,2,3,4,5" + result = parser.read_csv(StringIO(data), sep=",", **kwargs) + + expected = DataFrame([[1, 2, 3, 4, 5]], columns=["a", "a.1", "b", "b.1", "b.2"]) + tm.assert_frame_equal(result, expected) + + +def test_basic_names(all_parsers): + # See gh-7160 + parser = all_parsers + + data = "a,b,a\n0,1,2\n3,4,5" + expected = DataFrame([[0, 1, 2], [3, 4, 5]], columns=["a", "b", "a.1"]) + + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +def test_basic_names_raise(all_parsers): + # See gh-7160 + parser = all_parsers + + data = "0,1,2\n3,4,5" + with pytest.raises(ValueError, match="Duplicate names"): + parser.read_csv(StringIO(data), names=["a", "b", "a"]) + + +@pytest.mark.parametrize( + "data,expected", + [ + ("a,a,a.1\n1,2,3", DataFrame([[1, 2, 3]], columns=["a", "a.1", "a.1.1"])), + ( + "a,a,a.1,a.1.1,a.1.1.1,a.1.1.1.1\n1,2,3,4,5,6", + DataFrame( + [[1, 2, 3, 4, 5, 6]], + columns=["a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1", "a.1.1.1.1.1"], + ), + ), + ( + "a,a,a.3,a.1,a.2,a,a\n1,2,3,4,5,6,7", + DataFrame( + [[1, 2, 3, 4, 5, 6, 7]], + columns=["a", "a.1", "a.3", "a.1.1", "a.2", "a.2.1", "a.3.1"], + ), + ), + ], +) +def test_thorough_mangle_columns(all_parsers, data, expected): + # see gh-17060 + parser = all_parsers + + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,names,expected", + [ + ( + "a,b,b\n1,2,3", + ["a.1", "a.1", "a.1.1"], + DataFrame( + [["a", "b", "b"], ["1", "2", "3"]], columns=["a.1", "a.1.1", "a.1.1.1"] + ), + ), + ( + "a,b,c,d,e,f\n1,2,3,4,5,6", + ["a", "a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"], + DataFrame( + [["a", "b", "c", "d", "e", "f"], ["1", "2", "3", "4", "5", "6"]], + columns=["a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1", "a.1.1.1.1.1"], + ), + ), + ( + "a,b,c,d,e,f,g\n1,2,3,4,5,6,7", + ["a", "a", "a.3", "a.1", "a.2", "a", "a"], + DataFrame( + [ + ["a", "b", "c", "d", "e", "f", "g"], + ["1", "2", "3", "4", "5", "6", "7"], + ], + columns=["a", "a.1", "a.3", "a.1.1", "a.2", "a.2.1", "a.3.1"], + ), + ), + ], +) +def test_thorough_mangle_names(all_parsers, data, names, expected): + # see gh-17095 + parser = all_parsers + + with pytest.raises(ValueError, match="Duplicate names"): + parser.read_csv(StringIO(data), names=names) + + +def test_mangled_unnamed_placeholders(all_parsers): + # xref gh-13017 + orig_key = "0" + parser = all_parsers + + orig_value = [1, 2, 3] + df = DataFrame({orig_key: orig_value}) + + # This test recursively updates `df`. + for i in range(3): + expected = DataFrame() + + for j in range(i + 1): + expected["Unnamed: 0" + ".1" * j] = [0, 1, 2] + + expected[orig_key] = orig_value + df = parser.read_csv(StringIO(df.to_csv())) + + tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/io/parser/test_multi_thread.py b/pandas/tests/io/parser/test_multi_thread.py new file mode 100644 index 00000000..64ccaf60 --- /dev/null +++ b/pandas/tests/io/parser/test_multi_thread.py @@ -0,0 +1,146 @@ +""" +Tests multithreading behaviour for reading and +parsing files for each parser defined in parsers.py +""" +from io import BytesIO +from multiprocessing.pool import ThreadPool + +import numpy as np + +import pandas as pd +from pandas import DataFrame +import pandas._testing as tm + + +def _construct_dataframe(num_rows): + """ + Construct a DataFrame for testing. + + Parameters + ---------- + num_rows : int + The number of rows for our DataFrame. + + Returns + ------- + df : DataFrame + """ + df = DataFrame(np.random.rand(num_rows, 5), columns=list("abcde")) + df["foo"] = "foo" + df["bar"] = "bar" + df["baz"] = "baz" + df["date"] = pd.date_range("20000101 09:00:00", periods=num_rows, freq="s") + df["int"] = np.arange(num_rows, dtype="int64") + return df + + +def test_multi_thread_string_io_read_csv(all_parsers): + # see gh-11786 + parser = all_parsers + max_row_range = 10000 + num_files = 100 + + bytes_to_df = [ + "\n".join( + ["{i:d},{i:d},{i:d}".format(i=i) for i in range(max_row_range)] + ).encode() + for _ in range(num_files) + ] + files = [BytesIO(b) for b in bytes_to_df] + + # Read all files in many threads. + pool = ThreadPool(8) + + results = pool.map(parser.read_csv, files) + first_result = results[0] + + for result in results: + tm.assert_frame_equal(first_result, result) + + +def _generate_multi_thread_dataframe(parser, path, num_rows, num_tasks): + """ + Generate a DataFrame via multi-thread. + + Parameters + ---------- + parser : BaseParser + The parser object to use for reading the data. + path : str + The location of the CSV file to read. + num_rows : int + The number of rows to read per task. + num_tasks : int + The number of tasks to use for reading this DataFrame. + + Returns + ------- + df : DataFrame + """ + + def reader(arg): + """ + Create a reader for part of the CSV. + + Parameters + ---------- + arg : tuple + A tuple of the following: + + * start : int + The starting row to start for parsing CSV + * nrows : int + The number of rows to read. + + Returns + ------- + df : DataFrame + """ + start, nrows = arg + + if not start: + return parser.read_csv( + path, index_col=0, header=0, nrows=nrows, parse_dates=["date"] + ) + + return parser.read_csv( + path, + index_col=0, + header=None, + skiprows=int(start) + 1, + nrows=nrows, + parse_dates=[9], + ) + + tasks = [ + (num_rows * i // num_tasks, num_rows // num_tasks) for i in range(num_tasks) + ] + + pool = ThreadPool(processes=num_tasks) + results = pool.map(reader, tasks) + + header = results[0].columns + + for r in results[1:]: + r.columns = header + + final_dataframe = pd.concat(results) + return final_dataframe + + +def test_multi_thread_path_multipart_read_csv(all_parsers): + # see gh-11786 + num_tasks = 4 + num_rows = 100000 + + parser = all_parsers + file_name = "__thread_pool_reader__.csv" + df = _construct_dataframe(num_rows) + + with tm.ensure_clean(file_name) as path: + df.to_csv(path) + + final_dataframe = _generate_multi_thread_dataframe( + parser, path, num_rows, num_tasks + ) + tm.assert_frame_equal(df, final_dataframe) diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py new file mode 100644 index 00000000..f9a083d7 --- /dev/null +++ b/pandas/tests/io/parser/test_na_values.py @@ -0,0 +1,567 @@ +""" +Tests that NA values are properly handled during +parsing for all of the parsers defined in parsers.py +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas._libs.parsers import STR_NA_VALUES + +from pandas import DataFrame, Index, MultiIndex +import pandas._testing as tm + + +def test_string_nas(all_parsers): + parser = all_parsers + data = """A,B,C +a,b,c +d,,f +,g,h +""" + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + [["a", "b", "c"], ["d", np.nan, "f"], [np.nan, "g", "h"]], + columns=["A", "B", "C"], + ) + tm.assert_frame_equal(result, expected) + + +def test_detect_string_na(all_parsers): + parser = all_parsers + data = """A,B +foo,bar +NA,baz +NaN,nan +""" + expected = DataFrame( + [["foo", "bar"], [np.nan, "baz"], [np.nan, np.nan]], columns=["A", "B"] + ) + result = parser.read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "na_values", + [ + ["-999.0", "-999"], + [-999, -999.0], + [-999.0, -999], + ["-999.0"], + ["-999"], + [-999.0], + [-999], + ], +) +@pytest.mark.parametrize( + "data", + [ + """A,B +-999,1.2 +2,-999 +3,4.5 +""", + """A,B +-999,1.200 +2,-999.000 +3,4.500 +""", + ], +) +def test_non_string_na_values(all_parsers, data, na_values): + # see gh-3611: with an odd float format, we can't match + # the string "999.0" exactly but still need float matching + parser = all_parsers + expected = DataFrame([[np.nan, 1.2], [2.0, np.nan], [3.0, 4.5]], columns=["A", "B"]) + + result = parser.read_csv(StringIO(data), na_values=na_values) + tm.assert_frame_equal(result, expected) + + +def test_default_na_values(all_parsers): + _NA_VALUES = { + "-1.#IND", + "1.#QNAN", + "1.#IND", + "-1.#QNAN", + "#N/A", + "N/A", + "n/a", + "NA", + "", + "#NA", + "NULL", + "null", + "NaN", + "nan", + "-NaN", + "-nan", + "#N/A N/A", + "", + } + assert _NA_VALUES == STR_NA_VALUES + + parser = all_parsers + nv = len(_NA_VALUES) + + def f(i, v): + if i == 0: + buf = "" + elif i > 0: + buf = "".join([","] * i) + + buf = "{0}{1}".format(buf, v) + + if i < nv - 1: + buf = "{0}{1}".format(buf, "".join([","] * (nv - i - 1))) + + return buf + + data = StringIO("\n".join(f(i, v) for i, v in enumerate(_NA_VALUES))) + expected = DataFrame(np.nan, columns=range(nv), index=range(nv)) + + result = parser.read_csv(data, header=None) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("na_values", ["baz", ["baz"]]) +def test_custom_na_values(all_parsers, na_values): + parser = all_parsers + data = """A,B,C +ignore,this,row +1,NA,3 +-1.#IND,5,baz +7,8,NaN +""" + expected = DataFrame( + [[1.0, np.nan, 3], [np.nan, 5, np.nan], [7, 8, np.nan]], columns=["A", "B", "C"] + ) + result = parser.read_csv(StringIO(data), na_values=na_values, skiprows=[1]) + tm.assert_frame_equal(result, expected) + + +def test_bool_na_values(all_parsers): + data = """A,B,C +True,False,True +NA,True,False +False,NA,True""" + parser = all_parsers + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + { + "A": np.array([True, np.nan, False], dtype=object), + "B": np.array([False, True, np.nan], dtype=object), + "C": [True, False, True], + } + ) + tm.assert_frame_equal(result, expected) + + +def test_na_value_dict(all_parsers): + data = """A,B,C +foo,bar,NA +bar,foo,foo +foo,bar,NA +bar,foo,foo""" + parser = all_parsers + df = parser.read_csv(StringIO(data), na_values={"A": ["foo"], "B": ["bar"]}) + expected = DataFrame( + { + "A": [np.nan, "bar", np.nan, "bar"], + "B": [np.nan, "foo", np.nan, "foo"], + "C": [np.nan, "foo", np.nan, "foo"], + } + ) + tm.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize( + "index_col,expected", + [ + ( + [0], + DataFrame({"b": [np.nan], "c": [1], "d": [5]}, index=Index([0], name="a")), + ), + ( + [0, 2], + DataFrame( + {"b": [np.nan], "d": [5]}, + index=MultiIndex.from_tuples([(0, 1)], names=["a", "c"]), + ), + ), + ( + ["a", "c"], + DataFrame( + {"b": [np.nan], "d": [5]}, + index=MultiIndex.from_tuples([(0, 1)], names=["a", "c"]), + ), + ), + ], +) +def test_na_value_dict_multi_index(all_parsers, index_col, expected): + data = """\ +a,b,c,d +0,NA,1,5 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), na_values=set(), index_col=index_col) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,expected", + [ + ( + dict(), + DataFrame( + { + "A": ["a", "b", np.nan, "d", "e", np.nan, "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["one", "two", "three", np.nan, "five", np.nan, "seven"], + } + ), + ), + ( + dict(na_values={"A": [], "C": []}, keep_default_na=False), + DataFrame( + { + "A": ["a", "b", "", "d", "e", "nan", "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["one", "two", "three", "nan", "five", "", "seven"], + } + ), + ), + ( + dict(na_values=["a"], keep_default_na=False), + DataFrame( + { + "A": [np.nan, "b", "", "d", "e", "nan", "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["one", "two", "three", "nan", "five", "", "seven"], + } + ), + ), + ( + dict(na_values={"A": [], "C": []}), + DataFrame( + { + "A": ["a", "b", np.nan, "d", "e", np.nan, "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["one", "two", "three", np.nan, "five", np.nan, "seven"], + } + ), + ), + ], +) +def test_na_values_keep_default(all_parsers, kwargs, expected): + data = """\ +A,B,C +a,1,one +b,2,two +,3,three +d,4,nan +e,5,five +nan,6, +g,7,seven +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_no_na_values_no_keep_default(all_parsers): + # see gh-4318: passing na_values=None and + # keep_default_na=False yields 'None" as a na_value + data = """\ +A,B,C +a,1,None +b,2,two +,3,None +d,4,nan +e,5,five +nan,6, +g,7,seven +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), keep_default_na=False) + + expected = DataFrame( + { + "A": ["a", "b", "", "d", "e", "nan", "g"], + "B": [1, 2, 3, 4, 5, 6, 7], + "C": ["None", "two", "None", "nan", "five", "", "seven"], + } + ) + tm.assert_frame_equal(result, expected) + + +def test_no_keep_default_na_dict_na_values(all_parsers): + # see gh-19227 + data = "a,b\n,2" + parser = all_parsers + result = parser.read_csv( + StringIO(data), na_values={"b": ["2"]}, keep_default_na=False + ) + expected = DataFrame({"a": [""], "b": [np.nan]}) + tm.assert_frame_equal(result, expected) + + +def test_no_keep_default_na_dict_na_scalar_values(all_parsers): + # see gh-19227 + # + # Scalar values shouldn't cause the parsing to crash or fail. + data = "a,b\n1,2" + parser = all_parsers + df = parser.read_csv(StringIO(data), na_values={"b": 2}, keep_default_na=False) + expected = DataFrame({"a": [1], "b": [np.nan]}) + tm.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize("col_zero_na_values", [113125, "113125"]) +def test_no_keep_default_na_dict_na_values_diff_reprs(all_parsers, col_zero_na_values): + # see gh-19227 + data = """\ +113125,"blah","/blaha",kjsdkj,412.166,225.874,214.008 +729639,"qwer","",asdfkj,466.681,,252.373 +""" + parser = all_parsers + expected = DataFrame( + { + 0: [np.nan, 729639.0], + 1: [np.nan, "qwer"], + 2: ["/blaha", np.nan], + 3: ["kjsdkj", "asdfkj"], + 4: [412.166, 466.681], + 5: ["225.874", ""], + 6: [np.nan, 252.373], + } + ) + + result = parser.read_csv( + StringIO(data), + header=None, + keep_default_na=False, + na_values={2: "", 6: "214.008", 1: "blah", 0: col_zero_na_values}, + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "na_filter,row_data", + [ + (True, [[1, "A"], [np.nan, np.nan], [3, "C"]]), + (False, [["1", "A"], ["nan", "B"], ["3", "C"]]), + ], +) +def test_na_values_na_filter_override(all_parsers, na_filter, row_data): + data = """\ +A,B +1,A +nan,B +3,C +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), na_values=["B"], na_filter=na_filter) + + expected = DataFrame(row_data, columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + +def test_na_trailing_columns(all_parsers): + parser = all_parsers + data = """Date,Currency,Symbol,Type,Units,UnitPrice,Cost,Tax +2012-03-14,USD,AAPL,BUY,1000 +2012-05-12,USD,SBUX,SELL,500""" + + # Trailing columns should be all NaN. + result = parser.read_csv(StringIO(data)) + expected = DataFrame( + [ + ["2012-03-14", "USD", "AAPL", "BUY", 1000, np.nan, np.nan, np.nan], + ["2012-05-12", "USD", "SBUX", "SELL", 500, np.nan, np.nan, np.nan], + ], + columns=[ + "Date", + "Currency", + "Symbol", + "Type", + "Units", + "UnitPrice", + "Cost", + "Tax", + ], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "na_values,row_data", + [ + (1, [[np.nan, 2.0], [2.0, np.nan]]), + ({"a": 2, "b": 1}, [[1.0, 2.0], [np.nan, np.nan]]), + ], +) +def test_na_values_scalar(all_parsers, na_values, row_data): + # see gh-12224 + parser = all_parsers + names = ["a", "b"] + data = "1,2\n2,1" + + result = parser.read_csv(StringIO(data), names=names, na_values=na_values) + expected = DataFrame(row_data, columns=names) + tm.assert_frame_equal(result, expected) + + +def test_na_values_dict_aliasing(all_parsers): + parser = all_parsers + na_values = {"a": 2, "b": 1} + na_values_copy = na_values.copy() + + names = ["a", "b"] + data = "1,2\n2,1" + + expected = DataFrame([[1.0, 2.0], [np.nan, np.nan]], columns=names) + result = parser.read_csv(StringIO(data), names=names, na_values=na_values) + + tm.assert_frame_equal(result, expected) + tm.assert_dict_equal(na_values, na_values_copy) + + +def test_na_values_dict_col_index(all_parsers): + # see gh-14203 + data = "a\nfoo\n1" + parser = all_parsers + na_values = {0: "foo"} + + result = parser.read_csv(StringIO(data), na_values=na_values) + expected = DataFrame({"a": [np.nan, 1]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + ( + str(2 ** 63) + "\n" + str(2 ** 63 + 1), + dict(na_values=[2 ** 63]), + DataFrame([str(2 ** 63), str(2 ** 63 + 1)]), + ), + (str(2 ** 63) + ",1" + "\n,2", dict(), DataFrame([[str(2 ** 63), 1], ["", 2]])), + (str(2 ** 63) + "\n1", dict(na_values=[2 ** 63]), DataFrame([np.nan, 1])), + ], +) +def test_na_values_uint64(all_parsers, data, kwargs, expected): + # see gh-14983 + parser = all_parsers + result = parser.read_csv(StringIO(data), header=None, **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_empty_na_values_no_default_with_index(all_parsers): + # see gh-15835 + data = "a,1\nb,2" + parser = all_parsers + expected = DataFrame({"1": [2]}, index=Index(["b"], name="a")) + + result = parser.read_csv(StringIO(data), index_col=0, keep_default_na=False) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "na_filter,index_data", [(False, ["", "5"]), (True, [np.nan, 5.0])] +) +def test_no_na_filter_on_index(all_parsers, na_filter, index_data): + # see gh-5239 + # + # Don't parse NA-values in index unless na_filter=True + parser = all_parsers + data = "a,b,c\n1,,3\n4,5,6" + + expected = DataFrame({"a": [1, 4], "c": [3, 6]}, index=Index(index_data, name="b")) + result = parser.read_csv(StringIO(data), index_col=[1], na_filter=na_filter) + tm.assert_frame_equal(result, expected) + + +def test_inf_na_values_with_int_index(all_parsers): + # see gh-17128 + parser = all_parsers + data = "idx,col1,col2\n1,3,4\n2,inf,-inf" + + # Don't fail with OverflowError with inf's and integer index column. + out = parser.read_csv(StringIO(data), index_col=[0], na_values=["inf", "-inf"]) + expected = DataFrame( + {"col1": [3, np.nan], "col2": [4, np.nan]}, index=Index([1, 2], name="idx") + ) + tm.assert_frame_equal(out, expected) + + +@pytest.mark.parametrize("na_filter", [True, False]) +def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter): + # see gh-20377 + parser = all_parsers + data = "a,b,c\n1,,3\n4,5,6" + + # na_filter=True --> missing value becomes NaN. + # na_filter=False --> missing value remains empty string. + empty = np.nan if na_filter else "" + expected = DataFrame({"a": ["1", "4"], "b": [empty, "5"], "c": ["3", "6"]}) + + result = parser.read_csv(StringIO(data), na_filter=na_filter, dtype=str) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data, na_values", + [ + ("false,1\n,1\ntrue", None), + ("false,1\nnull,1\ntrue", None), + ("false,1\nnan,1\ntrue", None), + ("false,1\nfoo,1\ntrue", "foo"), + ("false,1\nfoo,1\ntrue", ["foo"]), + ("false,1\nfoo,1\ntrue", {"a": "foo"}), + ], +) +def test_cast_NA_to_bool_raises_error(all_parsers, data, na_values): + parser = all_parsers + msg = ( + "(Bool column has NA values in column [0a])|" + "(cannot safely convert passed user dtype of " + "bool for object dtyped data in column 0)" + ) + with pytest.raises(ValueError, match=msg): + parser.read_csv( + StringIO(data), + header=None, + names=["a", "b"], + dtype={"a": "bool"}, + na_values=na_values, + ) + + +def test_str_nan_dropped(all_parsers): + # see gh-21131 + parser = all_parsers + + data = """File: small.csv,, +10010010233,0123,654 +foo,,bar +01001000155,4530,898""" + + result = parser.read_csv( + StringIO(data), + header=None, + names=["col1", "col2", "col3"], + dtype={"col1": str, "col2": str, "col3": str}, + ).dropna() + + expected = DataFrame( + { + "col1": ["10010010233", "01001000155"], + "col2": ["0123", "4530"], + "col3": ["654", "898"], + }, + index=[1, 3], + ) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py new file mode 100644 index 00000000..1b1576fd --- /dev/null +++ b/pandas/tests/io/parser/test_network.py @@ -0,0 +1,240 @@ +""" +Tests parsers ability to read and parse non-local files +and hence require a network connection to be read. +""" +from io import BytesIO, StringIO +import logging + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.parsers import read_csv + + +@pytest.mark.network +@pytest.mark.parametrize( + "compress_type, extension", + [("gzip", ".gz"), ("bz2", ".bz2"), ("zip", ".zip"), ("xz", ".xz")], +) +@pytest.mark.parametrize("mode", ["explicit", "infer"]) +@pytest.mark.parametrize("engine", ["python", "c"]) +def test_compressed_urls(salaries_table, compress_type, extension, mode, engine): + check_compressed_urls(salaries_table, compress_type, extension, mode, engine) + + +@tm.network +def check_compressed_urls(salaries_table, compression, extension, mode, engine): + # test reading compressed urls with various engines and + # extension inference + base_url = ( + "https://github.com/pandas-dev/pandas/raw/master/" + "pandas/tests/io/parser/data/salaries.csv" + ) + + url = base_url + extension + + if mode != "explicit": + compression = mode + + url_table = read_csv(url, sep="\t", compression=compression, engine=engine) + tm.assert_frame_equal(url_table, salaries_table) + + +@pytest.fixture +def tips_df(datapath): + """DataFrame with the tips dataset.""" + return read_csv(datapath("io", "parser", "data", "tips.csv")) + + +@pytest.mark.usefixtures("s3_resource") +@td.skip_if_not_us_locale() +class TestS3: + @td.skip_if_no("s3fs") + def test_parse_public_s3_bucket(self, tips_df): + + # more of an integration test due to the not-public contents portion + # can probably mock this though. + for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]: + df = read_csv("s3://pandas-test/tips.csv" + ext, compression=comp) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(df, tips_df) + + # Read public file from bucket with not-public contents + df = read_csv("s3://cant_get_it/tips.csv") + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(df, tips_df) + + def test_parse_public_s3n_bucket(self, tips_df): + + # Read from AWS s3 as "s3n" URL + df = read_csv("s3n://pandas-test/tips.csv", nrows=10) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(tips_df.iloc[:10], df) + + def test_parse_public_s3a_bucket(self, tips_df): + # Read from AWS s3 as "s3a" URL + df = read_csv("s3a://pandas-test/tips.csv", nrows=10) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(tips_df.iloc[:10], df) + + def test_parse_public_s3_bucket_nrows(self, tips_df): + for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]: + df = read_csv("s3://pandas-test/tips.csv" + ext, nrows=10, compression=comp) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(tips_df.iloc[:10], df) + + def test_parse_public_s3_bucket_chunked(self, tips_df): + # Read with a chunksize + chunksize = 5 + for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]: + df_reader = read_csv( + "s3://pandas-test/tips.csv" + ext, chunksize=chunksize, compression=comp + ) + assert df_reader.chunksize == chunksize + for i_chunk in [0, 1, 2]: + # Read a couple of chunks and make sure we see them + # properly. + df = df_reader.get_chunk() + assert isinstance(df, DataFrame) + assert not df.empty + true_df = tips_df.iloc[chunksize * i_chunk : chunksize * (i_chunk + 1)] + tm.assert_frame_equal(true_df, df) + + def test_parse_public_s3_bucket_chunked_python(self, tips_df): + # Read with a chunksize using the Python parser + chunksize = 5 + for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]: + df_reader = read_csv( + "s3://pandas-test/tips.csv" + ext, + chunksize=chunksize, + compression=comp, + engine="python", + ) + assert df_reader.chunksize == chunksize + for i_chunk in [0, 1, 2]: + # Read a couple of chunks and make sure we see them properly. + df = df_reader.get_chunk() + assert isinstance(df, DataFrame) + assert not df.empty + true_df = tips_df.iloc[chunksize * i_chunk : chunksize * (i_chunk + 1)] + tm.assert_frame_equal(true_df, df) + + def test_parse_public_s3_bucket_python(self, tips_df): + for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]: + df = read_csv( + "s3://pandas-test/tips.csv" + ext, engine="python", compression=comp + ) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(df, tips_df) + + def test_infer_s3_compression(self, tips_df): + for ext in ["", ".gz", ".bz2"]: + df = read_csv( + "s3://pandas-test/tips.csv" + ext, engine="python", compression="infer" + ) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(df, tips_df) + + def test_parse_public_s3_bucket_nrows_python(self, tips_df): + for ext, comp in [("", None), (".gz", "gzip"), (".bz2", "bz2")]: + df = read_csv( + "s3://pandas-test/tips.csv" + ext, + engine="python", + nrows=10, + compression=comp, + ) + assert isinstance(df, DataFrame) + assert not df.empty + tm.assert_frame_equal(tips_df.iloc[:10], df) + + def test_read_s3_fails(self): + with pytest.raises(IOError): + read_csv("s3://nyqpug/asdf.csv") + + # Receive a permission error when trying to read a private bucket. + # It's irrelevant here that this isn't actually a table. + with pytest.raises(IOError): + read_csv("s3://cant_get_it/file.csv") + + def test_write_s3_csv_fails(self, tips_df): + # GH 32486 + # Attempting to write to an invalid S3 path should raise + import botocore + + # GH 34087 + # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html + # Catch a ClientError since AWS Service Errors are defined dynamically + error = (FileNotFoundError, botocore.exceptions.ClientError) + + with pytest.raises(error, match="The specified bucket does not exist"): + tips_df.to_csv("s3://an_s3_bucket_data_doesnt_exit/not_real.csv") + + @td.skip_if_no("pyarrow") + def test_write_s3_parquet_fails(self, tips_df): + # GH 27679 + # Attempting to write to an invalid S3 path should raise + import botocore + + # GH 34087 + # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/error-handling.html + # Catch a ClientError since AWS Service Errors are defined dynamically + error = (FileNotFoundError, botocore.exceptions.ClientError) + + with pytest.raises(error, match="The specified bucket does not exist"): + tips_df.to_parquet("s3://an_s3_bucket_data_doesnt_exit/not_real.parquet") + + def test_read_csv_handles_boto_s3_object(self, s3_resource, tips_file): + # see gh-16135 + + s3_object = s3_resource.meta.client.get_object( + Bucket="pandas-test", Key="tips.csv" + ) + + result = read_csv(BytesIO(s3_object["Body"].read()), encoding="utf8") + assert isinstance(result, DataFrame) + assert not result.empty + + expected = read_csv(tips_file) + tm.assert_frame_equal(result, expected) + + def test_read_csv_chunked_download(self, s3_resource, caplog): + # 8 MB, S3FS usees 5MB chunks + import s3fs + + df = DataFrame(np.random.randn(100000, 4), columns=list("abcd")) + buf = BytesIO() + str_buf = StringIO() + + df.to_csv(str_buf) + + buf = BytesIO(str_buf.getvalue().encode("utf-8")) + + s3_resource.Bucket("pandas-test").put_object(Key="large-file.csv", Body=buf) + + # Possibly some state leaking in between tests. + # If we don't clear this cache, we saw `GetObject operation: Forbidden`. + # Presumably the s3fs instance is being cached, with the directory listing + # from *before* we add the large-file.csv in the pandas-test bucket. + s3fs.S3FileSystem.clear_instance_cache() + + with caplog.at_level(logging.DEBUG, logger="s3fs"): + read_csv("s3://pandas-test/large-file.csv", nrows=5) + # log of fetch_range (start, stop) + assert (0, 5505024) in (x.args[-2:] for x in caplog.records) + + def test_read_s3_with_hash_in_key(self, tips_df): + # GH 25945 + result = read_csv("s3://pandas-test/tips#1.csv") + tm.assert_frame_equal(tips_df, result) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py new file mode 100644 index 00000000..b01b22e8 --- /dev/null +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -0,0 +1,1518 @@ +""" +Tests date parsing functionality for all of the +parsers defined in parsers.py +""" + +from datetime import date, datetime +from io import StringIO + +from dateutil.parser import parse as du_parse +from hypothesis import given, settings, strategies as st +import numpy as np +import pytest +import pytz + +from pandas._libs.tslib import Timestamp +from pandas._libs.tslibs import parsing +from pandas._libs.tslibs.parsing import parse_datetime_string +from pandas.compat import is_platform_windows +from pandas.compat.numpy import np_array_datetime64_compat + +import pandas as pd +from pandas import DataFrame, DatetimeIndex, Index, MultiIndex, Series +import pandas._testing as tm +from pandas.core.indexes.datetimes import date_range + +import pandas.io.date_converters as conv + +# constant +_DEFAULT_DATETIME = datetime(1, 1, 1) + +# Strategy for hypothesis +if is_platform_windows(): + date_strategy = st.datetimes(min_value=datetime(1900, 1, 1)) +else: + date_strategy = st.datetimes() + + +def test_separator_date_conflict(all_parsers): + # Regression test for gh-4678 + # + # Make sure thousands separator and + # date parsing do not conflict. + parser = all_parsers + data = "06-02-2013;13:00;1-000.215" + expected = DataFrame( + [[datetime(2013, 6, 2, 13, 0, 0), 1000.215]], columns=["Date", 2] + ) + + df = parser.read_csv( + StringIO(data), + sep=";", + thousands="-", + parse_dates={"Date": [0, 1]}, + header=None, + ) + tm.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize("keep_date_col", [True, False]) +def test_multiple_date_col_custom(all_parsers, keep_date_col): + data = """\ +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + parser = all_parsers + + def date_parser(*date_cols): + """ + Test date parser. + + Parameters + ---------- + date_cols : args + The list of data columns to parse. + + Returns + ------- + parsed : Series + """ + return parsing.try_parse_dates(parsing._concat_date_cols(date_cols)) + + result = parser.read_csv( + StringIO(data), + header=None, + date_parser=date_parser, + prefix="X", + parse_dates={"actual": [1, 2], "nominal": [1, 3]}, + keep_date_col=keep_date_col, + ) + expected = DataFrame( + [ + [ + datetime(1999, 1, 27, 19, 0), + datetime(1999, 1, 27, 18, 56), + "KORD", + "19990127", + " 19:00:00", + " 18:56:00", + 0.81, + 2.81, + 7.2, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 20, 0), + datetime(1999, 1, 27, 19, 56), + "KORD", + "19990127", + " 20:00:00", + " 19:56:00", + 0.01, + 2.21, + 7.2, + 0.0, + 260.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 20, 56), + "KORD", + "19990127", + " 21:00:00", + " 20:56:00", + -0.59, + 2.21, + 5.7, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 21, 18), + "KORD", + "19990127", + " 21:00:00", + " 21:18:00", + -0.99, + 2.01, + 3.6, + 0.0, + 270.0, + ], + [ + datetime(1999, 1, 27, 22, 0), + datetime(1999, 1, 27, 21, 56), + "KORD", + "19990127", + " 22:00:00", + " 21:56:00", + -0.59, + 1.71, + 5.1, + 0.0, + 290.0, + ], + [ + datetime(1999, 1, 27, 23, 0), + datetime(1999, 1, 27, 22, 56), + "KORD", + "19990127", + " 23:00:00", + " 22:56:00", + -0.59, + 1.71, + 4.6, + 0.0, + 280.0, + ], + ], + columns=[ + "actual", + "nominal", + "X0", + "X1", + "X2", + "X3", + "X4", + "X5", + "X6", + "X7", + "X8", + ], + ) + + if not keep_date_col: + expected = expected.drop(["X1", "X2", "X3"], axis=1) + elif parser.engine == "python": + expected["X1"] = expected["X1"].astype(np.int64) + + # Python can sometimes be flaky about how + # the aggregated columns are entered, so + # this standardizes the order. + result = result[expected.columns] + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("container", [list, tuple, Index, Series]) +@pytest.mark.parametrize("dim", [1, 2]) +def test_concat_date_col_fail(container, dim): + msg = "not all elements from date_cols are numpy arrays" + value = "19990127" + + date_cols = tuple(container([value]) for _ in range(dim)) + + with pytest.raises(ValueError, match=msg): + parsing._concat_date_cols(date_cols) + + +@pytest.mark.parametrize("keep_date_col", [True, False]) +def test_multiple_date_col(all_parsers, keep_date_col): + data = """\ +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + parser = all_parsers + result = parser.read_csv( + StringIO(data), + header=None, + prefix="X", + parse_dates=[[1, 2], [1, 3]], + keep_date_col=keep_date_col, + ) + expected = DataFrame( + [ + [ + datetime(1999, 1, 27, 19, 0), + datetime(1999, 1, 27, 18, 56), + "KORD", + "19990127", + " 19:00:00", + " 18:56:00", + 0.81, + 2.81, + 7.2, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 20, 0), + datetime(1999, 1, 27, 19, 56), + "KORD", + "19990127", + " 20:00:00", + " 19:56:00", + 0.01, + 2.21, + 7.2, + 0.0, + 260.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 20, 56), + "KORD", + "19990127", + " 21:00:00", + " 20:56:00", + -0.59, + 2.21, + 5.7, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 21, 18), + "KORD", + "19990127", + " 21:00:00", + " 21:18:00", + -0.99, + 2.01, + 3.6, + 0.0, + 270.0, + ], + [ + datetime(1999, 1, 27, 22, 0), + datetime(1999, 1, 27, 21, 56), + "KORD", + "19990127", + " 22:00:00", + " 21:56:00", + -0.59, + 1.71, + 5.1, + 0.0, + 290.0, + ], + [ + datetime(1999, 1, 27, 23, 0), + datetime(1999, 1, 27, 22, 56), + "KORD", + "19990127", + " 23:00:00", + " 22:56:00", + -0.59, + 1.71, + 4.6, + 0.0, + 280.0, + ], + ], + columns=[ + "X1_X2", + "X1_X3", + "X0", + "X1", + "X2", + "X3", + "X4", + "X5", + "X6", + "X7", + "X8", + ], + ) + + if not keep_date_col: + expected = expected.drop(["X1", "X2", "X3"], axis=1) + elif parser.engine == "python": + expected["X1"] = expected["X1"].astype(np.int64) + + tm.assert_frame_equal(result, expected) + + +def test_date_col_as_index_col(all_parsers): + data = """\ +KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +""" + parser = all_parsers + result = parser.read_csv( + StringIO(data), header=None, prefix="X", parse_dates=[1], index_col=1 + ) + + index = Index( + [ + datetime(1999, 1, 27, 19, 0), + datetime(1999, 1, 27, 20, 0), + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 22, 0), + ], + name="X1", + ) + expected = DataFrame( + [ + ["KORD", " 18:56:00", 0.81, 2.81, 7.2, 0.0, 280.0], + ["KORD", " 19:56:00", 0.01, 2.21, 7.2, 0.0, 260.0], + ["KORD", " 20:56:00", -0.59, 2.21, 5.7, 0.0, 280.0], + ["KORD", " 21:18:00", -0.99, 2.01, 3.6, 0.0, 270.0], + ["KORD", " 21:56:00", -0.59, 1.71, 5.1, 0.0, 290.0], + ], + columns=["X0", "X2", "X3", "X4", "X5", "X6", "X7"], + index=index, + ) + tm.assert_frame_equal(result, expected) + + +def test_multiple_date_cols_int_cast(all_parsers): + data = ( + "KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" + "KORD,19990127, 23:00:00, 22:56:00, -0.5900" + ) + parse_dates = {"actual": [1, 2], "nominal": [1, 3]} + parser = all_parsers + + result = parser.read_csv( + StringIO(data), + header=None, + date_parser=conv.parse_date_time, + parse_dates=parse_dates, + prefix="X", + ) + expected = DataFrame( + [ + [datetime(1999, 1, 27, 19, 0), datetime(1999, 1, 27, 18, 56), "KORD", 0.81], + [datetime(1999, 1, 27, 20, 0), datetime(1999, 1, 27, 19, 56), "KORD", 0.01], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 20, 56), + "KORD", + -0.59, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 21, 18), + "KORD", + -0.99, + ], + [ + datetime(1999, 1, 27, 22, 0), + datetime(1999, 1, 27, 21, 56), + "KORD", + -0.59, + ], + [ + datetime(1999, 1, 27, 23, 0), + datetime(1999, 1, 27, 22, 56), + "KORD", + -0.59, + ], + ], + columns=["actual", "nominal", "X0", "X4"], + ) + + # Python can sometimes be flaky about how + # the aggregated columns are entered, so + # this standardizes the order. + result = result[expected.columns] + tm.assert_frame_equal(result, expected) + + +def test_multiple_date_col_timestamp_parse(all_parsers): + parser = all_parsers + data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 +05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25""" + + result = parser.read_csv( + StringIO(data), parse_dates=[[0, 1]], header=None, date_parser=Timestamp + ) + expected = DataFrame( + [ + [ + Timestamp("05/31/2012, 15:30:00.029"), + 1306.25, + 1, + "E", + 0, + np.nan, + 1306.25, + ], + [ + Timestamp("05/31/2012, 15:30:00.029"), + 1306.25, + 8, + "E", + 0, + np.nan, + 1306.25, + ], + ], + columns=["0_1", 2, 3, 4, 5, 6, 7], + ) + tm.assert_frame_equal(result, expected) + + +def test_multiple_date_cols_with_header(all_parsers): + parser = all_parsers + data = """\ +ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" + + result = parser.read_csv(StringIO(data), parse_dates={"nominal": [1, 2]}) + expected = DataFrame( + [ + [ + datetime(1999, 1, 27, 19, 0), + "KORD", + " 18:56:00", + 0.81, + 2.81, + 7.2, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 20, 0), + "KORD", + " 19:56:00", + 0.01, + 2.21, + 7.2, + 0.0, + 260.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + "KORD", + " 20:56:00", + -0.59, + 2.21, + 5.7, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + "KORD", + " 21:18:00", + -0.99, + 2.01, + 3.6, + 0.0, + 270.0, + ], + [ + datetime(1999, 1, 27, 22, 0), + "KORD", + " 21:56:00", + -0.59, + 1.71, + 5.1, + 0.0, + 290.0, + ], + [ + datetime(1999, 1, 27, 23, 0), + "KORD", + " 22:56:00", + -0.59, + 1.71, + 4.6, + 0.0, + 280.0, + ], + ], + columns=[ + "nominal", + "ID", + "ActualTime", + "TDew", + "TAir", + "Windspeed", + "Precip", + "WindDir", + ], + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,parse_dates,msg", + [ + ( + """\ +date_NominalTime,date,NominalTime +KORD1,19990127, 19:00:00 +KORD2,19990127, 20:00:00""", + [[1, 2]], + ("New date column already in dict date_NominalTime"), + ), + ( + """\ +ID,date,nominalTime +KORD,19990127, 19:00:00 +KORD,19990127, 20:00:00""", + dict(ID=[1, 2]), + "Date column ID already in dict", + ), + ], +) +def test_multiple_date_col_name_collision(all_parsers, data, parse_dates, msg): + parser = all_parsers + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), parse_dates=parse_dates) + + +def test_date_parser_int_bug(all_parsers): + # see gh-3071 + parser = all_parsers + data = ( + "posix_timestamp,elapsed,sys,user,queries,query_time,rows," + "accountid,userid,contactid,level,silo,method\n" + "1343103150,0.062353,0,4,6,0.01690,3," + "12345,1,-1,3,invoice_InvoiceResource,search\n" + ) + + result = parser.read_csv( + StringIO(data), + index_col=0, + parse_dates=[0], + date_parser=lambda x: datetime.utcfromtimestamp(int(x)), + ) + expected = DataFrame( + [ + [ + 0.062353, + 0, + 4, + 6, + 0.01690, + 3, + 12345, + 1, + -1, + 3, + "invoice_InvoiceResource", + "search", + ] + ], + columns=[ + "elapsed", + "sys", + "user", + "queries", + "query_time", + "rows", + "accountid", + "userid", + "contactid", + "level", + "silo", + "method", + ], + index=Index([Timestamp("2012-07-24 04:12:30")], name="posix_timestamp"), + ) + tm.assert_frame_equal(result, expected) + + +def test_nat_parse(all_parsers): + # see gh-3062 + parser = all_parsers + df = DataFrame( + dict({"A": np.arange(10, dtype="float64"), "B": pd.Timestamp("20010101")}) + ) + df.iloc[3:6, :] = np.nan + + with tm.ensure_clean("__nat_parse_.csv") as path: + df.to_csv(path) + + result = parser.read_csv(path, index_col=0, parse_dates=["B"]) + tm.assert_frame_equal(result, df) + + +def test_csv_custom_parser(all_parsers): + data = """A,B,C +20090101,a,1,2 +20090102,b,3,4 +20090103,c,4,5 +""" + parser = all_parsers + result = parser.read_csv( + StringIO(data), date_parser=lambda x: datetime.strptime(x, "%Y%m%d") + ) + expected = parser.read_csv(StringIO(data), parse_dates=True) + tm.assert_frame_equal(result, expected) + + +def test_parse_dates_implicit_first_col(all_parsers): + data = """A,B,C +20090101,a,1,2 +20090102,b,3,4 +20090103,c,4,5 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), parse_dates=True) + + expected = parser.read_csv(StringIO(data), index_col=0, parse_dates=True) + tm.assert_frame_equal(result, expected) + + +def test_parse_dates_string(all_parsers): + data = """date,A,B,C +20090101,a,1,2 +20090102,b,3,4 +20090103,c,4,5 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col="date", parse_dates=["date"]) + index = date_range("1/1/2009", periods=3) + index.name = "date" + + expected = DataFrame( + {"A": ["a", "b", "c"], "B": [1, 3, 4], "C": [2, 4, 5]}, index=index + ) + tm.assert_frame_equal(result, expected) + + +# Bug in https://github.com/dateutil/dateutil/issues/217 +# has been addressed, but we just don't pass in the `yearfirst` +@pytest.mark.xfail(reason="yearfirst is not surfaced in read_*") +@pytest.mark.parametrize("parse_dates", [[["date", "time"]], [[0, 1]]]) +def test_yy_format_with_year_first(all_parsers, parse_dates): + data = """date,time,B,C +090131,0010,1,2 +090228,1020,3,4 +090331,0830,5,6 +""" + parser = all_parsers + result = parser.read_csv(StringIO(data), index_col=0, parse_dates=parse_dates) + index = DatetimeIndex( + [ + datetime(2009, 1, 31, 0, 10, 0), + datetime(2009, 2, 28, 10, 20, 0), + datetime(2009, 3, 31, 8, 30, 0), + ], + dtype=object, + name="date_time", + ) + expected = DataFrame({"B": [1, 3, 5], "C": [2, 4, 6]}, index=index) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("parse_dates", [[0, 2], ["a", "c"]]) +def test_parse_dates_column_list(all_parsers, parse_dates): + data = "a,b,c\n01/01/2010,1,15/02/2010" + parser = all_parsers + + expected = DataFrame( + {"a": [datetime(2010, 1, 1)], "b": [1], "c": [datetime(2010, 2, 15)]} + ) + expected = expected.set_index(["a", "b"]) + + result = parser.read_csv( + StringIO(data), index_col=[0, 1], parse_dates=parse_dates, dayfirst=True + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("index_col", [[0, 1], [1, 0]]) +def test_multi_index_parse_dates(all_parsers, index_col): + data = """index1,index2,A,B,C +20090101,one,a,1,2 +20090101,two,b,3,4 +20090101,three,c,4,5 +20090102,one,a,1,2 +20090102,two,b,3,4 +20090102,three,c,4,5 +20090103,one,a,1,2 +20090103,two,b,3,4 +20090103,three,c,4,5 +""" + parser = all_parsers + index = MultiIndex.from_product( + [ + (datetime(2009, 1, 1), datetime(2009, 1, 2), datetime(2009, 1, 3)), + ("one", "two", "three"), + ], + names=["index1", "index2"], + ) + + # Out of order. + if index_col == [1, 0]: + index = index.swaplevel(0, 1) + + expected = DataFrame( + [ + ["a", 1, 2], + ["b", 3, 4], + ["c", 4, 5], + ["a", 1, 2], + ["b", 3, 4], + ["c", 4, 5], + ["a", 1, 2], + ["b", 3, 4], + ["c", 4, 5], + ], + columns=["A", "B", "C"], + index=index, + ) + result = parser.read_csv(StringIO(data), index_col=index_col, parse_dates=True) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("kwargs", [dict(dayfirst=True), dict(day_first=True)]) +def test_parse_dates_custom_euro_format(all_parsers, kwargs): + parser = all_parsers + data = """foo,bar,baz +31/01/2010,1,2 +01/02/2010,1,NA +02/02/2010,1,2 +""" + if "dayfirst" in kwargs: + df = parser.read_csv( + StringIO(data), + names=["time", "Q", "NTU"], + date_parser=lambda d: du_parse(d, **kwargs), + header=0, + index_col=0, + parse_dates=True, + na_values=["NA"], + ) + exp_index = Index( + [datetime(2010, 1, 31), datetime(2010, 2, 1), datetime(2010, 2, 2)], + name="time", + ) + expected = DataFrame( + {"Q": [1, 1, 1], "NTU": [2, np.nan, 2]}, + index=exp_index, + columns=["Q", "NTU"], + ) + tm.assert_frame_equal(df, expected) + else: + msg = "got an unexpected keyword argument 'day_first'" + with pytest.raises(TypeError, match=msg): + parser.read_csv( + StringIO(data), + names=["time", "Q", "NTU"], + date_parser=lambda d: du_parse(d, **kwargs), + skiprows=[0], + index_col=0, + parse_dates=True, + na_values=["NA"], + ) + + +def test_parse_tz_aware(all_parsers): + # See gh-1693 + parser = all_parsers + data = "Date,x\n2012-06-13T01:39:00Z,0.5" + + result = parser.read_csv(StringIO(data), index_col=0, parse_dates=True) + expected = DataFrame( + {"x": [0.5]}, index=Index([Timestamp("2012-06-13 01:39:00+00:00")], name="Date") + ) + tm.assert_frame_equal(result, expected) + assert result.index.tz is pytz.utc + + +@pytest.mark.parametrize( + "parse_dates,index_col", + [({"nominal": [1, 2]}, "nominal"), ({"nominal": [1, 2]}, 0), ([[1, 2]], 0)], +) +def test_multiple_date_cols_index(all_parsers, parse_dates, index_col): + parser = all_parsers + data = """ +ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir +KORD1,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD2,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD3,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD4,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + expected = DataFrame( + [ + [ + datetime(1999, 1, 27, 19, 0), + "KORD1", + " 18:56:00", + 0.81, + 2.81, + 7.2, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 20, 0), + "KORD2", + " 19:56:00", + 0.01, + 2.21, + 7.2, + 0.0, + 260.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + "KORD3", + " 20:56:00", + -0.59, + 2.21, + 5.7, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + "KORD4", + " 21:18:00", + -0.99, + 2.01, + 3.6, + 0.0, + 270.0, + ], + [ + datetime(1999, 1, 27, 22, 0), + "KORD5", + " 21:56:00", + -0.59, + 1.71, + 5.1, + 0.0, + 290.0, + ], + [ + datetime(1999, 1, 27, 23, 0), + "KORD6", + " 22:56:00", + -0.59, + 1.71, + 4.6, + 0.0, + 280.0, + ], + ], + columns=[ + "nominal", + "ID", + "ActualTime", + "TDew", + "TAir", + "Windspeed", + "Precip", + "WindDir", + ], + ) + expected = expected.set_index("nominal") + + if not isinstance(parse_dates, dict): + expected.index.name = "date_NominalTime" + + result = parser.read_csv( + StringIO(data), parse_dates=parse_dates, index_col=index_col + ) + tm.assert_frame_equal(result, expected) + + +def test_multiple_date_cols_chunked(all_parsers): + parser = all_parsers + data = """\ +ID,date,nominalTime,actualTime,A,B,C,D,E +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + + expected = DataFrame( + [ + [ + datetime(1999, 1, 27, 19, 0), + "KORD", + " 18:56:00", + 0.81, + 2.81, + 7.2, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 20, 0), + "KORD", + " 19:56:00", + 0.01, + 2.21, + 7.2, + 0.0, + 260.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + "KORD", + " 20:56:00", + -0.59, + 2.21, + 5.7, + 0.0, + 280.0, + ], + [ + datetime(1999, 1, 27, 21, 0), + "KORD", + " 21:18:00", + -0.99, + 2.01, + 3.6, + 0.0, + 270.0, + ], + [ + datetime(1999, 1, 27, 22, 0), + "KORD", + " 21:56:00", + -0.59, + 1.71, + 5.1, + 0.0, + 290.0, + ], + [ + datetime(1999, 1, 27, 23, 0), + "KORD", + " 22:56:00", + -0.59, + 1.71, + 4.6, + 0.0, + 280.0, + ], + ], + columns=["nominal", "ID", "actualTime", "A", "B", "C", "D", "E"], + ) + expected = expected.set_index("nominal") + + reader = parser.read_csv( + StringIO(data), + parse_dates={"nominal": [1, 2]}, + index_col="nominal", + chunksize=2, + ) + chunks = list(reader) + + tm.assert_frame_equal(chunks[0], expected[:2]) + tm.assert_frame_equal(chunks[1], expected[2:4]) + tm.assert_frame_equal(chunks[2], expected[4:]) + + +def test_multiple_date_col_named_index_compat(all_parsers): + parser = all_parsers + data = """\ +ID,date,nominalTime,actualTime,A,B,C,D,E +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + + with_indices = parser.read_csv( + StringIO(data), parse_dates={"nominal": [1, 2]}, index_col="nominal" + ) + with_names = parser.read_csv( + StringIO(data), + index_col="nominal", + parse_dates={"nominal": ["date", "nominalTime"]}, + ) + tm.assert_frame_equal(with_indices, with_names) + + +def test_multiple_date_col_multiple_index_compat(all_parsers): + parser = all_parsers + data = """\ +ID,date,nominalTime,actualTime,A,B,C,D,E +KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000 +KORD,19990127, 20:00:00, 19:56:00, 0.0100, 2.2100, 7.2000, 0.0000, 260.0000 +KORD,19990127, 21:00:00, 20:56:00, -0.5900, 2.2100, 5.7000, 0.0000, 280.0000 +KORD,19990127, 21:00:00, 21:18:00, -0.9900, 2.0100, 3.6000, 0.0000, 270.0000 +KORD,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 +KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000 +""" + result = parser.read_csv( + StringIO(data), index_col=["nominal", "ID"], parse_dates={"nominal": [1, 2]} + ) + expected = parser.read_csv(StringIO(data), parse_dates={"nominal": [1, 2]}) + + expected = expected.set_index(["nominal", "ID"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("kwargs", [dict(), dict(index_col="C")]) +def test_read_with_parse_dates_scalar_non_bool(all_parsers, kwargs): + # see gh-5636 + parser = all_parsers + msg = ( + "Only booleans, lists, and dictionaries " + "are accepted for the 'parse_dates' parameter" + ) + data = """A,B,C + 1,2,2003-11-1""" + + with pytest.raises(TypeError, match=msg): + parser.read_csv(StringIO(data), parse_dates="C", **kwargs) + + +@pytest.mark.parametrize("parse_dates", [(1,), np.array([4, 5]), {1, 3, 3}]) +def test_read_with_parse_dates_invalid_type(all_parsers, parse_dates): + parser = all_parsers + msg = ( + "Only booleans, lists, and dictionaries " + "are accepted for the 'parse_dates' parameter" + ) + data = """A,B,C + 1,2,2003-11-1""" + + with pytest.raises(TypeError, match=msg): + parser.read_csv(StringIO(data), parse_dates=(1,)) + + +@pytest.mark.parametrize("cache_dates", [True, False]) +@pytest.mark.parametrize("value", ["nan", "0", ""]) +def test_bad_date_parse(all_parsers, cache_dates, value): + # if we have an invalid date make sure that we handle this with + # and w/o the cache properly + parser = all_parsers + s = StringIO(("{value},\n".format(value=value)) * 50000) + + parser.read_csv( + s, + header=None, + names=["foo", "bar"], + parse_dates=["foo"], + infer_datetime_format=False, + cache_dates=cache_dates, + ) + + +def test_parse_dates_empty_string(all_parsers): + # see gh-2263 + parser = all_parsers + data = "Date,test\n2012-01-01,1\n,2" + result = parser.read_csv(StringIO(data), parse_dates=["Date"], na_filter=False) + + expected = DataFrame( + [[datetime(2012, 1, 1), 1], [pd.NaT, 2]], columns=["Date", "test"] + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + ( + "a\n04.15.2016", + dict(parse_dates=["a"]), + DataFrame([datetime(2016, 4, 15)], columns=["a"]), + ), + ( + "a\n04.15.2016", + dict(parse_dates=True, index_col=0), + DataFrame(index=DatetimeIndex(["2016-04-15"], name="a")), + ), + ( + "a,b\n04.15.2016,09.16.2013", + dict(parse_dates=["a", "b"]), + DataFrame( + [[datetime(2016, 4, 15), datetime(2013, 9, 16)]], columns=["a", "b"] + ), + ), + ( + "a,b\n04.15.2016,09.16.2013", + dict(parse_dates=True, index_col=[0, 1]), + DataFrame( + index=MultiIndex.from_tuples( + [(datetime(2016, 4, 15), datetime(2013, 9, 16))], names=["a", "b"] + ) + ), + ), + ], +) +def test_parse_dates_no_convert_thousands(all_parsers, data, kwargs, expected): + # see gh-14066 + parser = all_parsers + + result = parser.read_csv(StringIO(data), thousands=".", **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_parse_date_time_multi_level_column_name(all_parsers): + data = """\ +D,T,A,B +date, time,a,b +2001-01-05, 09:00:00, 0.0, 10. +2001-01-06, 00:00:00, 1.0, 11. +""" + parser = all_parsers + result = parser.read_csv( + StringIO(data), + header=[0, 1], + parse_dates={"date_time": [0, 1]}, + date_parser=conv.parse_date_time, + ) + + expected_data = [ + [datetime(2001, 1, 5, 9, 0, 0), 0.0, 10.0], + [datetime(2001, 1, 6, 0, 0, 0), 1.0, 11.0], + ] + expected = DataFrame(expected_data, columns=["date_time", ("A", "a"), ("B", "b")]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + ( + """\ +date,time,a,b +2001-01-05, 10:00:00, 0.0, 10. +2001-01-05, 00:00:00, 1., 11. +""", + dict(header=0, parse_dates={"date_time": [0, 1]}), + DataFrame( + [ + [datetime(2001, 1, 5, 10, 0, 0), 0.0, 10], + [datetime(2001, 1, 5, 0, 0, 0), 1.0, 11.0], + ], + columns=["date_time", "a", "b"], + ), + ), + ( + ( + "KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" + "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" + "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n" + "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n" + "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n" + "KORD,19990127, 23:00:00, 22:56:00, -0.5900" + ), + dict(header=None, parse_dates={"actual": [1, 2], "nominal": [1, 3]}), + DataFrame( + [ + [ + datetime(1999, 1, 27, 19, 0), + datetime(1999, 1, 27, 18, 56), + "KORD", + 0.81, + ], + [ + datetime(1999, 1, 27, 20, 0), + datetime(1999, 1, 27, 19, 56), + "KORD", + 0.01, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 20, 56), + "KORD", + -0.59, + ], + [ + datetime(1999, 1, 27, 21, 0), + datetime(1999, 1, 27, 21, 18), + "KORD", + -0.99, + ], + [ + datetime(1999, 1, 27, 22, 0), + datetime(1999, 1, 27, 21, 56), + "KORD", + -0.59, + ], + [ + datetime(1999, 1, 27, 23, 0), + datetime(1999, 1, 27, 22, 56), + "KORD", + -0.59, + ], + ], + columns=["actual", "nominal", 0, 4], + ), + ), + ], +) +def test_parse_date_time(all_parsers, data, kwargs, expected): + parser = all_parsers + result = parser.read_csv(StringIO(data), date_parser=conv.parse_date_time, **kwargs) + + # Python can sometimes be flaky about how + # the aggregated columns are entered, so + # this standardizes the order. + result = result[expected.columns] + tm.assert_frame_equal(result, expected) + + +def test_parse_date_fields(all_parsers): + parser = all_parsers + data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11." + result = parser.read_csv( + StringIO(data), + header=0, + parse_dates={"ymd": [0, 1, 2]}, + date_parser=conv.parse_date_fields, + ) + + expected = DataFrame( + [[datetime(2001, 1, 10), 10.0], [datetime(2001, 2, 1), 11.0]], + columns=["ymd", "a"], + ) + tm.assert_frame_equal(result, expected) + + +def test_parse_date_all_fields(all_parsers): + parser = all_parsers + data = """\ +year,month,day,hour,minute,second,a,b +2001,01,05,10,00,0,0.0,10. +2001,01,5,10,0,00,1.,11. +""" + result = parser.read_csv( + StringIO(data), + header=0, + date_parser=conv.parse_all_fields, + parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, + ) + expected = DataFrame( + [ + [datetime(2001, 1, 5, 10, 0, 0), 0.0, 10.0], + [datetime(2001, 1, 5, 10, 0, 0), 1.0, 11.0], + ], + columns=["ymdHMS", "a", "b"], + ) + tm.assert_frame_equal(result, expected) + + +def test_datetime_fractional_seconds(all_parsers): + parser = all_parsers + data = """\ +year,month,day,hour,minute,second,a,b +2001,01,05,10,00,0.123456,0.0,10. +2001,01,5,10,0,0.500000,1.,11. +""" + result = parser.read_csv( + StringIO(data), + header=0, + date_parser=conv.parse_all_fields, + parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, + ) + expected = DataFrame( + [ + [datetime(2001, 1, 5, 10, 0, 0, microsecond=123456), 0.0, 10.0], + [datetime(2001, 1, 5, 10, 0, 0, microsecond=500000), 1.0, 11.0], + ], + columns=["ymdHMS", "a", "b"], + ) + tm.assert_frame_equal(result, expected) + + +def test_generic(all_parsers): + parser = all_parsers + data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11." + + result = parser.read_csv( + StringIO(data), + header=0, + parse_dates={"ym": [0, 1]}, + date_parser=lambda y, m: date(year=int(y), month=int(m), day=1), + ) + expected = DataFrame( + [[date(2001, 1, 1), 10, 10.0], [date(2001, 2, 1), 1, 11.0]], + columns=["ym", "day", "a"], + ) + tm.assert_frame_equal(result, expected) + + +def test_date_parser_resolution_if_not_ns(all_parsers): + # see gh-10245 + parser = all_parsers + data = """\ +date,time,prn,rxstatus +2013-11-03,19:00:00,126,00E80000 +2013-11-03,19:00:00,23,00E80000 +2013-11-03,19:00:00,13,00E80000 +""" + + def date_parser(dt, time): + return np_array_datetime64_compat(dt + "T" + time + "Z", dtype="datetime64[s]") + + result = parser.read_csv( + StringIO(data), + date_parser=date_parser, + parse_dates={"datetime": ["date", "time"]}, + index_col=["datetime", "prn"], + ) + + datetimes = np_array_datetime64_compat( + ["2013-11-03T19:00:00Z"] * 3, dtype="datetime64[s]" + ) + expected = DataFrame( + data={"rxstatus": ["00E80000"] * 3}, + index=MultiIndex.from_tuples( + [(datetimes[0], 126), (datetimes[1], 23), (datetimes[2], 13)], + names=["datetime", "prn"], + ), + ) + tm.assert_frame_equal(result, expected) + + +def test_parse_date_column_with_empty_string(all_parsers): + # see gh-6428 + parser = all_parsers + data = "case,opdate\n7,10/18/2006\n7,10/18/2008\n621, " + result = parser.read_csv(StringIO(data), parse_dates=["opdate"]) + + expected_data = [[7, "10/18/2006"], [7, "10/18/2008"], [621, " "]] + expected = DataFrame(expected_data, columns=["case", "opdate"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,expected", + [ + ( + "a\n135217135789158401\n1352171357E+5", + DataFrame({"a": [135217135789158401, 135217135700000]}, dtype="float64"), + ), + ( + "a\n99999999999\n123456789012345\n1234E+0", + DataFrame({"a": [99999999999, 123456789012345, 1234]}, dtype="float64"), + ), + ], +) +@pytest.mark.parametrize("parse_dates", [True, False]) +def test_parse_date_float(all_parsers, data, expected, parse_dates): + # see gh-2697 + # + # Date parsing should fail, so we leave the data untouched + # (i.e. float precision should remain unchanged). + parser = all_parsers + + result = parser.read_csv(StringIO(data), parse_dates=parse_dates) + tm.assert_frame_equal(result, expected) + + +def test_parse_timezone(all_parsers): + # see gh-22256 + parser = all_parsers + data = """dt,val + 2018-01-04 09:01:00+09:00,23350 + 2018-01-04 09:02:00+09:00,23400 + 2018-01-04 09:03:00+09:00,23400 + 2018-01-04 09:04:00+09:00,23400 + 2018-01-04 09:05:00+09:00,23400""" + result = parser.read_csv(StringIO(data), parse_dates=["dt"]) + + dti = pd.date_range( + start="2018-01-04 09:01:00", + end="2018-01-04 09:05:00", + freq="1min", + tz=pytz.FixedOffset(540), + ) + expected_data = {"dt": dti, "val": [23350, 23400, 23400, 23400, 23400]} + + expected = DataFrame(expected_data) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "date_string", + ["32/32/2019", "02/30/2019", "13/13/2019", "13/2019", "a3/11/2018", "10/11/2o17"], +) +def test_invalid_parse_delimited_date(all_parsers, date_string): + parser = all_parsers + expected = DataFrame({0: [date_string]}, dtype="object") + result = parser.read_csv(StringIO(date_string), header=None, parse_dates=[0]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "date_string,dayfirst,expected", + [ + # %d/%m/%Y; month > 12 thus replacement + ("13/02/2019", False, datetime(2019, 2, 13)), + ("13/02/2019", True, datetime(2019, 2, 13)), + # %m/%d/%Y; day > 12 thus there will be no replacement + ("02/13/2019", False, datetime(2019, 2, 13)), + ("02/13/2019", True, datetime(2019, 2, 13)), + # %d/%m/%Y; dayfirst==True thus replacement + ("04/02/2019", True, datetime(2019, 2, 4)), + ], +) +def test_parse_delimited_date_swap(all_parsers, date_string, dayfirst, expected): + parser = all_parsers + expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") + result = parser.read_csv( + StringIO(date_string), header=None, dayfirst=dayfirst, parse_dates=[0] + ) + tm.assert_frame_equal(result, expected) + + +def _helper_hypothesis_delimited_date(call, date_string, **kwargs): + msg, result = None, None + try: + result = call(date_string, **kwargs) + except ValueError as er: + msg = str(er) + pass + return msg, result + + +@given(date_strategy) +@settings(deadline=None) +@pytest.mark.parametrize("delimiter", list(" -./")) +@pytest.mark.parametrize("dayfirst", [True, False]) +@pytest.mark.parametrize( + "date_format", + ["%d %m %Y", "%m %d %Y", "%m %Y", "%Y %m %d", "%y %m %d", "%Y%m%d", "%y%m%d"], +) +def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_datetime): + if date_format == "%m %Y" and delimiter == ".": + pytest.skip( + "parse_datetime_string cannot reliably tell whether \ + e.g. %m.%Y is a float or a date, thus we skip it" + ) + result, expected = None, None + except_in_dateutil, except_out_dateutil = None, None + date_string = test_datetime.strftime(date_format.replace(" ", delimiter)) + + except_out_dateutil, result = _helper_hypothesis_delimited_date( + parse_datetime_string, date_string, dayfirst=dayfirst + ) + except_in_dateutil, expected = _helper_hypothesis_delimited_date( + du_parse, + date_string, + default=_DEFAULT_DATETIME, + dayfirst=dayfirst, + yearfirst=False, + ) + + assert except_out_dateutil == except_in_dateutil + assert result == expected diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py new file mode 100644 index 00000000..7367b19b --- /dev/null +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -0,0 +1,298 @@ +""" +Tests that apply specifically to the Python parser. Unless specifically +stated as a Python-specific issue, the goal is to eventually move as many of +these tests out of this module as soon as the C parser can accept further +arguments when parsing. +""" + +import csv +from io import BytesIO, StringIO + +import pytest + +from pandas.errors import ParserError + +from pandas import DataFrame, Index, MultiIndex +import pandas._testing as tm + + +def test_default_separator(python_parser_only): + # see gh-17333 + # + # csv.Sniffer in Python treats "o" as separator. + data = "aob\n1o2\n3o4" + parser = python_parser_only + expected = DataFrame({"a": [1, 3], "b": [2, 4]}) + + result = parser.read_csv(StringIO(data), sep=None) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("skipfooter", ["foo", 1.5, True]) +def test_invalid_skipfooter_non_int(python_parser_only, skipfooter): + # see gh-15925 (comment) + data = "a\n1\n2" + parser = python_parser_only + msg = "skipfooter must be an integer" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), skipfooter=skipfooter) + + +def test_invalid_skipfooter_negative(python_parser_only): + # see gh-15925 (comment) + data = "a\n1\n2" + parser = python_parser_only + msg = "skipfooter cannot be negative" + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), skipfooter=-1) + + +@pytest.mark.parametrize("kwargs", [dict(sep=None), dict(delimiter="|")]) +def test_sniff_delimiter(python_parser_only, kwargs): + data = """index|A|B|C +foo|1|2|3 +bar|4|5|6 +baz|7|8|9 +""" + parser = python_parser_only + result = parser.read_csv(StringIO(data), index_col=0, **kwargs) + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + columns=["A", "B", "C"], + index=Index(["foo", "bar", "baz"], name="index"), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("encoding", [None, "utf-8"]) +def test_sniff_delimiter_encoding(python_parser_only, encoding): + parser = python_parser_only + data = """ignore this +ignore this too +index|A|B|C +foo|1|2|3 +bar|4|5|6 +baz|7|8|9 +""" + + if encoding is not None: + from io import TextIOWrapper + + data = data.encode(encoding) + data = BytesIO(data) + data = TextIOWrapper(data, encoding=encoding) + else: + data = StringIO(data) + + result = parser.read_csv(data, index_col=0, sep=None, skiprows=2, encoding=encoding) + expected = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + columns=["A", "B", "C"], + index=Index(["foo", "bar", "baz"], name="index"), + ) + tm.assert_frame_equal(result, expected) + + +def test_single_line(python_parser_only): + # see gh-6607: sniff separator + parser = python_parser_only + result = parser.read_csv(StringIO("1,2"), names=["a", "b"], header=None, sep=None) + + expected = DataFrame({"a": [1], "b": [2]}) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("kwargs", [dict(skipfooter=2), dict(nrows=3)]) +def test_skipfooter(python_parser_only, kwargs): + # see gh-6607 + data = """A,B,C +1,2,3 +4,5,6 +7,8,9 +want to skip this +also also skip this +""" + parser = python_parser_only + result = parser.read_csv(StringIO(data), **kwargs) + + expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "compression,klass", [("gzip", "GzipFile"), ("bz2", "BZ2File")] +) +def test_decompression_regex_sep(python_parser_only, csv1, compression, klass): + # see gh-6607 + parser = python_parser_only + + with open(csv1, "rb") as f: + data = f.read() + + data = data.replace(b",", b"::") + expected = parser.read_csv(csv1) + + module = pytest.importorskip(compression) + klass = getattr(module, klass) + + with tm.ensure_clean() as path: + tmp = klass(path, mode="wb") + tmp.write(data) + tmp.close() + + result = parser.read_csv(path, sep="::", compression=compression) + tm.assert_frame_equal(result, expected) + + +def test_read_csv_buglet_4x_multi_index(python_parser_only): + # see gh-6607 + data = """ A B C D E +one two three four +a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 +a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 +x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" + parser = python_parser_only + + expected = DataFrame( + [ + [-0.5109, -2.3358, -0.4645, 0.05076, 0.3640], + [0.4473, 1.4152, 0.2834, 1.00661, 0.1744], + [-0.6662, -0.5243, -0.3580, 0.89145, 2.5838], + ], + columns=["A", "B", "C", "D", "E"], + index=MultiIndex.from_tuples( + [("a", "b", 10.0032, 5), ("a", "q", 20, 4), ("x", "q", 30, 3)], + names=["one", "two", "three", "four"], + ), + ) + result = parser.read_csv(StringIO(data), sep=r"\s+") + tm.assert_frame_equal(result, expected) + + +def test_read_csv_buglet_4x_multi_index2(python_parser_only): + # see gh-6893 + data = " A B C\na b c\n1 3 7 0 3 6\n3 1 4 1 5 9" + parser = python_parser_only + + expected = DataFrame.from_records( + [(1, 3, 7, 0, 3, 6), (3, 1, 4, 1, 5, 9)], + columns=list("abcABC"), + index=list("abc"), + ) + result = parser.read_csv(StringIO(data), sep=r"\s+") + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("add_footer", [True, False]) +def test_skipfooter_with_decimal(python_parser_only, add_footer): + # see gh-6971 + data = "1#2\n3#4" + parser = python_parser_only + expected = DataFrame({"a": [1.2, 3.4]}) + + if add_footer: + # The stray footer line should not mess with the + # casting of the first two lines if we skip it. + kwargs = dict(skipfooter=1) + data += "\nFooter" + else: + kwargs = dict() + + result = parser.read_csv(StringIO(data), names=["a"], decimal="#", **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "sep", ["::", "#####", "!!!", "123", "#1!c5", "%!c!d", "@@#4:2", "_!pd#_"] +) +@pytest.mark.parametrize( + "encoding", ["utf-16", "utf-16-be", "utf-16-le", "utf-32", "cp037"] +) +def test_encoding_non_utf8_multichar_sep(python_parser_only, sep, encoding): + # see gh-3404 + expected = DataFrame({"a": [1], "b": [2]}) + parser = python_parser_only + + data = "1" + sep + "2" + encoded_data = data.encode(encoding) + + result = parser.read_csv( + BytesIO(encoded_data), sep=sep, names=["a", "b"], encoding=encoding + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("quoting", [csv.QUOTE_MINIMAL, csv.QUOTE_NONE]) +def test_multi_char_sep_quotes(python_parser_only, quoting): + # see gh-13374 + kwargs = dict(sep=",,") + parser = python_parser_only + + data = 'a,,b\n1,,a\n2,,"2,,b"' + msg = "ignored when a multi-char delimiter is used" + + def fail_read(): + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), quoting=quoting, **kwargs) + + if quoting == csv.QUOTE_NONE: + # We expect no match, so there should be an assertion + # error out of the inner context manager. + with pytest.raises(AssertionError): + fail_read() + else: + fail_read() + + +def test_none_delimiter(python_parser_only, capsys): + # see gh-13374 and gh-17465 + parser = python_parser_only + data = "a,b,c\n0,1,2\n3,4,5,6\n7,8,9" + expected = DataFrame({"a": [0, 7], "b": [1, 8], "c": [2, 9]}) + + # We expect the third line in the data to be + # skipped because it is malformed, but we do + # not expect any errors to occur. + result = parser.read_csv( + StringIO(data), header=0, sep=None, warn_bad_lines=True, error_bad_lines=False + ) + tm.assert_frame_equal(result, expected) + + captured = capsys.readouterr() + assert "Skipping line 3" in captured.err + + +@pytest.mark.parametrize("data", ['a\n1\n"b"a', 'a,b,c\ncat,foo,bar\ndog,foo,"baz']) +@pytest.mark.parametrize("skipfooter", [0, 1]) +def test_skipfooter_bad_row(python_parser_only, data, skipfooter): + # see gh-13879 and gh-15910 + msg = "parsing errors in the skipped footer rows" + parser = python_parser_only + + def fail_read(): + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), skipfooter=skipfooter) + + if skipfooter: + fail_read() + else: + # We expect no match, so there should be an assertion + # error out of the inner context manager. + with pytest.raises(AssertionError): + fail_read() + + +def test_malformed_skipfooter(python_parser_only): + parser = python_parser_only + data = """ignore +A,B,C +1,2,3 # comment +1,2,3,4,5 +2,3,4 +footer +""" + msg = "Expected 3 fields in line 4, saw 5" + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1) diff --git a/pandas/tests/io/parser/test_quoting.py b/pandas/tests/io/parser/test_quoting.py new file mode 100644 index 00000000..14773dfb --- /dev/null +++ b/pandas/tests/io/parser/test_quoting.py @@ -0,0 +1,159 @@ +""" +Tests that quoting specifications are properly handled +during parsing for all of the parsers defined in parsers.py +""" + +import csv +from io import StringIO + +import pytest + +from pandas.errors import ParserError + +from pandas import DataFrame +import pandas._testing as tm + + +@pytest.mark.parametrize( + "kwargs,msg", + [ + (dict(quotechar="foo"), '"quotechar" must be a(n)? 1-character string'), + ( + dict(quotechar=None, quoting=csv.QUOTE_MINIMAL), + "quotechar must be set if quoting enabled", + ), + (dict(quotechar=2), '"quotechar" must be string, not int'), + ], +) +def test_bad_quote_char(all_parsers, kwargs, msg): + data = "1,2,3" + parser = all_parsers + + with pytest.raises(TypeError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + + +@pytest.mark.parametrize( + "quoting,msg", + [ + ("foo", '"quoting" must be an integer'), + (5, 'bad "quoting" value'), # quoting must be in the range [0, 3] + ], +) +def test_bad_quoting(all_parsers, quoting, msg): + data = "1,2,3" + parser = all_parsers + + with pytest.raises(TypeError, match=msg): + parser.read_csv(StringIO(data), quoting=quoting) + + +def test_quote_char_basic(all_parsers): + parser = all_parsers + data = 'a,b,c\n1,2,"cat"' + expected = DataFrame([[1, 2, "cat"]], columns=["a", "b", "c"]) + + result = parser.read_csv(StringIO(data), quotechar='"') + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("quote_char", ["~", "*", "%", "$", "@", "P"]) +def test_quote_char_various(all_parsers, quote_char): + parser = all_parsers + expected = DataFrame([[1, 2, "cat"]], columns=["a", "b", "c"]) + + data = 'a,b,c\n1,2,"cat"' + new_data = data.replace('"', quote_char) + + result = parser.read_csv(StringIO(new_data), quotechar=quote_char) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("quoting", [csv.QUOTE_MINIMAL, csv.QUOTE_NONE]) +@pytest.mark.parametrize("quote_char", ["", None]) +def test_null_quote_char(all_parsers, quoting, quote_char): + kwargs = dict(quotechar=quote_char, quoting=quoting) + data = "a,b,c\n1,2,3" + parser = all_parsers + + if quoting != csv.QUOTE_NONE: + # Sanity checking. + msg = "quotechar must be set if quoting enabled" + + with pytest.raises(TypeError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + else: + expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"]) + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,exp_data", + [ + (dict(), [[1, 2, "foo"]]), # Test default. + # QUOTE_MINIMAL only applies to CSV writing, so no effect on reading. + (dict(quotechar='"', quoting=csv.QUOTE_MINIMAL), [[1, 2, "foo"]]), + # QUOTE_MINIMAL only applies to CSV writing, so no effect on reading. + (dict(quotechar='"', quoting=csv.QUOTE_ALL), [[1, 2, "foo"]]), + # QUOTE_NONE tells the reader to do no special handling + # of quote characters and leave them alone. + (dict(quotechar='"', quoting=csv.QUOTE_NONE), [[1, 2, '"foo"']]), + # QUOTE_NONNUMERIC tells the reader to cast + # all non-quoted fields to float + (dict(quotechar='"', quoting=csv.QUOTE_NONNUMERIC), [[1.0, 2.0, "foo"]]), + ], +) +def test_quoting_various(all_parsers, kwargs, exp_data): + data = '1,2,"foo"' + parser = all_parsers + columns = ["a", "b", "c"] + + result = parser.read_csv(StringIO(data), names=columns, **kwargs) + expected = DataFrame(exp_data, columns=columns) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "doublequote,exp_data", [(True, [[3, '4 " 5']]), (False, [[3, '4 " 5"']])] +) +def test_double_quote(all_parsers, doublequote, exp_data): + parser = all_parsers + data = 'a,b\n3,"4 "" 5"' + + result = parser.read_csv(StringIO(data), quotechar='"', doublequote=doublequote) + expected = DataFrame(exp_data, columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("quotechar", ['"', "\u0001"]) +def test_quotechar_unicode(all_parsers, quotechar): + # see gh-14477 + data = "a\n1" + parser = all_parsers + expected = DataFrame({"a": [1]}) + + result = parser.read_csv(StringIO(data), quotechar=quotechar) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("balanced", [True, False]) +def test_unbalanced_quoting(all_parsers, balanced): + # see gh-22789. + parser = all_parsers + data = 'a,b,c\n1,2,"3' + + if balanced: + # Re-balance the quoting and read in without errors. + expected = DataFrame([[1, 2, 3]], columns=["a", "b", "c"]) + result = parser.read_csv(StringIO(data + '"')) + tm.assert_frame_equal(result, expected) + else: + msg = ( + "EOF inside string starting at row 1" + if parser.engine == "c" + else "unexpected end of data" + ) + + with pytest.raises(ParserError, match=msg): + parser.read_csv(StringIO(data)) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py new file mode 100644 index 00000000..27aef237 --- /dev/null +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -0,0 +1,618 @@ +""" +Tests the 'read_fwf' function in parsers.py. This +test suite is independent of the others because the +engine is set to 'python-fwf' internally. +""" + +from datetime import datetime +from io import BytesIO, StringIO + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, DatetimeIndex +import pandas._testing as tm + +from pandas.io.parsers import EmptyDataError, read_csv, read_fwf + + +def test_basic(): + data = """\ +A B C D +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 +201160 364.136849 183.628767 11806.2 +201161 413.836124 184.375703 11916.8 +201162 502.953953 173.237159 12468.3 +""" + result = read_fwf(StringIO(data)) + expected = DataFrame( + [ + [201158, 360.242940, 149.910199, 11950.7], + [201159, 444.953632, 166.985655, 11788.4], + [201160, 364.136849, 183.628767, 11806.2], + [201161, 413.836124, 184.375703, 11916.8], + [201162, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D"], + ) + tm.assert_frame_equal(result, expected) + + +def test_colspecs(): + data = """\ +A B C D E +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 +201160 364.136849 183.628767 11806.2 +201161 413.836124 184.375703 11916.8 +201162 502.953953 173.237159 12468.3 +""" + colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)] + result = read_fwf(StringIO(data), colspecs=colspecs) + + expected = DataFrame( + [ + [2011, 58, 360.242940, 149.910199, 11950.7], + [2011, 59, 444.953632, 166.985655, 11788.4], + [2011, 60, 364.136849, 183.628767, 11806.2], + [2011, 61, 413.836124, 184.375703, 11916.8], + [2011, 62, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D", "E"], + ) + tm.assert_frame_equal(result, expected) + + +def test_widths(): + data = """\ +A B C D E +2011 58 360.242940 149.910199 11950.7 +2011 59 444.953632 166.985655 11788.4 +2011 60 364.136849 183.628767 11806.2 +2011 61 413.836124 184.375703 11916.8 +2011 62 502.953953 173.237159 12468.3 +""" + result = read_fwf(StringIO(data), widths=[5, 5, 13, 13, 7]) + + expected = DataFrame( + [ + [2011, 58, 360.242940, 149.910199, 11950.7], + [2011, 59, 444.953632, 166.985655, 11788.4], + [2011, 60, 364.136849, 183.628767, 11806.2], + [2011, 61, 413.836124, 184.375703, 11916.8], + [2011, 62, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D", "E"], + ) + tm.assert_frame_equal(result, expected) + + +def test_non_space_filler(): + # From Thomas Kluyver: + # + # Apparently, some non-space filler characters can be seen, this is + # supported by specifying the 'delimiter' character: + # + # http://publib.boulder.ibm.com/infocenter/dmndhelp/v6r1mx/index.jsp?topic=/com.ibm.wbit.612.help.config.doc/topics/rfixwidth.html + data = """\ +A~~~~B~~~~C~~~~~~~~~~~~D~~~~~~~~~~~~E +201158~~~~360.242940~~~149.910199~~~11950.7 +201159~~~~444.953632~~~166.985655~~~11788.4 +201160~~~~364.136849~~~183.628767~~~11806.2 +201161~~~~413.836124~~~184.375703~~~11916.8 +201162~~~~502.953953~~~173.237159~~~12468.3 +""" + colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)] + result = read_fwf(StringIO(data), colspecs=colspecs, delimiter="~") + + expected = DataFrame( + [ + [2011, 58, 360.242940, 149.910199, 11950.7], + [2011, 59, 444.953632, 166.985655, 11788.4], + [2011, 60, 364.136849, 183.628767, 11806.2], + [2011, 61, 413.836124, 184.375703, 11916.8], + [2011, 62, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D", "E"], + ) + tm.assert_frame_equal(result, expected) + + +def test_over_specified(): + data = """\ +A B C D E +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 +201160 364.136849 183.628767 11806.2 +201161 413.836124 184.375703 11916.8 +201162 502.953953 173.237159 12468.3 +""" + colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)] + + with pytest.raises(ValueError, match="must specify only one of"): + read_fwf(StringIO(data), colspecs=colspecs, widths=[6, 10, 10, 7]) + + +def test_under_specified(): + data = """\ +A B C D E +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 +201160 364.136849 183.628767 11806.2 +201161 413.836124 184.375703 11916.8 +201162 502.953953 173.237159 12468.3 +""" + with pytest.raises(ValueError, match="Must specify either"): + read_fwf(StringIO(data), colspecs=None, widths=None) + + +def test_read_csv_compat(): + csv_data = """\ +A,B,C,D,E +2011,58,360.242940,149.910199,11950.7 +2011,59,444.953632,166.985655,11788.4 +2011,60,364.136849,183.628767,11806.2 +2011,61,413.836124,184.375703,11916.8 +2011,62,502.953953,173.237159,12468.3 +""" + expected = read_csv(StringIO(csv_data), engine="python") + + fwf_data = """\ +A B C D E +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 +201160 364.136849 183.628767 11806.2 +201161 413.836124 184.375703 11916.8 +201162 502.953953 173.237159 12468.3 +""" + colspecs = [(0, 4), (4, 8), (8, 20), (21, 33), (34, 43)] + result = read_fwf(StringIO(fwf_data), colspecs=colspecs) + tm.assert_frame_equal(result, expected) + + +def test_bytes_io_input(): + result = read_fwf( + BytesIO("שלום\nשלום".encode("utf8")), widths=[2, 2], encoding="utf8" + ) + expected = DataFrame([["של", "ום"]], columns=["של", "ום"]) + tm.assert_frame_equal(result, expected) + + +def test_fwf_colspecs_is_list_or_tuple(): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + + msg = "column specifications must be a list or tuple.+" + + with pytest.raises(TypeError, match=msg): + read_fwf(StringIO(data), colspecs={"a": 1}, delimiter=",") + + +def test_fwf_colspecs_is_list_or_tuple_of_two_element_tuples(): + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + + msg = "Each column specification must be.+" + + with pytest.raises(TypeError, match=msg): + read_fwf(StringIO(data), [("a", 1)]) + + +@pytest.mark.parametrize( + "colspecs,exp_data", + [ + ([(0, 3), (3, None)], [[123, 456], [456, 789]]), + ([(None, 3), (3, 6)], [[123, 456], [456, 789]]), + ([(0, None), (3, None)], [[123456, 456], [456789, 789]]), + ([(None, None), (3, 6)], [[123456, 456], [456789, 789]]), + ], +) +def test_fwf_colspecs_none(colspecs, exp_data): + # see gh-7079 + data = """\ +123456 +456789 +""" + expected = DataFrame(exp_data) + + result = read_fwf(StringIO(data), colspecs=colspecs, header=None) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "infer_nrows,exp_data", + [ + # infer_nrows --> colspec == [(2, 3), (5, 6)] + (1, [[1, 2], [3, 8]]), + # infer_nrows > number of rows + (10, [[1, 2], [123, 98]]), + ], +) +def test_fwf_colspecs_infer_nrows(infer_nrows, exp_data): + # see gh-15138 + data = """\ + 1 2 +123 98 +""" + expected = DataFrame(exp_data) + + result = read_fwf(StringIO(data), infer_nrows=infer_nrows, header=None) + tm.assert_frame_equal(result, expected) + + +def test_fwf_regression(): + # see gh-3594 + # + # Turns out "T060" is parsable as a datetime slice! + tz_list = [1, 10, 20, 30, 60, 80, 100] + widths = [16] + [8] * len(tz_list) + names = ["SST"] + ["T{z:03d}".format(z=z) for z in tz_list[1:]] + + data = """ 2009164202000 9.5403 9.4105 8.6571 7.8372 6.0612 5.8843 5.5192 +2009164203000 9.5435 9.2010 8.6167 7.8176 6.0804 5.8728 5.4869 +2009164204000 9.5873 9.1326 8.4694 7.5889 6.0422 5.8526 5.4657 +2009164205000 9.5810 9.0896 8.4009 7.4652 6.0322 5.8189 5.4379 +2009164210000 9.6034 9.0897 8.3822 7.4905 6.0908 5.7904 5.4039 +""" + + result = read_fwf( + StringIO(data), + index_col=0, + header=None, + names=names, + widths=widths, + parse_dates=True, + date_parser=lambda s: datetime.strptime(s, "%Y%j%H%M%S"), + ) + expected = DataFrame( + [ + [9.5403, 9.4105, 8.6571, 7.8372, 6.0612, 5.8843, 5.5192], + [9.5435, 9.2010, 8.6167, 7.8176, 6.0804, 5.8728, 5.4869], + [9.5873, 9.1326, 8.4694, 7.5889, 6.0422, 5.8526, 5.4657], + [9.5810, 9.0896, 8.4009, 7.4652, 6.0322, 5.8189, 5.4379], + [9.6034, 9.0897, 8.3822, 7.4905, 6.0908, 5.7904, 5.4039], + ], + index=DatetimeIndex( + [ + "2009-06-13 20:20:00", + "2009-06-13 20:30:00", + "2009-06-13 20:40:00", + "2009-06-13 20:50:00", + "2009-06-13 21:00:00", + ] + ), + columns=["SST", "T010", "T020", "T030", "T060", "T080", "T100"], + ) + tm.assert_frame_equal(result, expected) + + +def test_fwf_for_uint8(): + data = """1421302965.213420 PRI=3 PGN=0xef00 DST=0x17 SRC=0x28 04 154 00 00 00 00 00 127 +1421302964.226776 PRI=6 PGN=0xf002 SRC=0x47 243 00 00 255 247 00 00 71""" # noqa + df = read_fwf( + StringIO(data), + colspecs=[(0, 17), (25, 26), (33, 37), (49, 51), (58, 62), (63, 1000)], + names=["time", "pri", "pgn", "dst", "src", "data"], + converters={ + "pgn": lambda x: int(x, 16), + "src": lambda x: int(x, 16), + "dst": lambda x: int(x, 16), + "data": lambda x: len(x.split(" ")), + }, + ) + + expected = DataFrame( + [ + [1421302965.213420, 3, 61184, 23, 40, 8], + [1421302964.226776, 6, 61442, None, 71, 8], + ], + columns=["time", "pri", "pgn", "dst", "src", "data"], + ) + expected["dst"] = expected["dst"].astype(object) + tm.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize("comment", ["#", "~", "!"]) +def test_fwf_comment(comment): + data = """\ + 1 2. 4 #hello world + 5 NaN 10.0 +""" + data = data.replace("#", comment) + + colspecs = [(0, 3), (4, 9), (9, 25)] + expected = DataFrame([[1, 2.0, 4], [5, np.nan, 10.0]]) + + result = read_fwf(StringIO(data), colspecs=colspecs, header=None, comment=comment) + tm.assert_almost_equal(result, expected) + + +@pytest.mark.parametrize("thousands", [",", "#", "~"]) +def test_fwf_thousands(thousands): + data = """\ + 1 2,334.0 5 +10 13 10. +""" + data = data.replace(",", thousands) + + colspecs = [(0, 3), (3, 11), (12, 16)] + expected = DataFrame([[1, 2334.0, 5], [10, 13, 10.0]]) + + result = read_fwf( + StringIO(data), header=None, colspecs=colspecs, thousands=thousands + ) + tm.assert_almost_equal(result, expected) + + +@pytest.mark.parametrize("header", [True, False]) +def test_bool_header_arg(header): + # see gh-6114 + data = """\ +MyColumn + a + b + a + b""" + + msg = "Passing a bool to header is invalid" + with pytest.raises(TypeError, match=msg): + read_fwf(StringIO(data), header=header) + + +def test_full_file(): + # File with all values. + test = """index A B C +2000-01-03T00:00:00 0.980268513777 3 foo +2000-01-04T00:00:00 1.04791624281 -4 bar +2000-01-05T00:00:00 0.498580885705 73 baz +2000-01-06T00:00:00 1.12020151869 1 foo +2000-01-07T00:00:00 0.487094399463 0 bar +2000-01-10T00:00:00 0.836648671666 2 baz +2000-01-11T00:00:00 0.157160753327 34 foo""" + colspecs = ((0, 19), (21, 35), (38, 40), (42, 45)) + expected = read_fwf(StringIO(test), colspecs=colspecs) + + result = read_fwf(StringIO(test)) + tm.assert_frame_equal(result, expected) + + +def test_full_file_with_missing(): + # File with missing values. + test = """index A B C +2000-01-03T00:00:00 0.980268513777 3 foo +2000-01-04T00:00:00 1.04791624281 -4 bar + 0.498580885705 73 baz +2000-01-06T00:00:00 1.12020151869 1 foo +2000-01-07T00:00:00 0 bar +2000-01-10T00:00:00 0.836648671666 2 baz + 34""" + colspecs = ((0, 19), (21, 35), (38, 40), (42, 45)) + expected = read_fwf(StringIO(test), colspecs=colspecs) + + result = read_fwf(StringIO(test)) + tm.assert_frame_equal(result, expected) + + +def test_full_file_with_spaces(): + # File with spaces in columns. + test = """ +Account Name Balance CreditLimit AccountCreated +101 Keanu Reeves 9315.45 10000.00 1/17/1998 +312 Gerard Butler 90.00 1000.00 8/6/2003 +868 Jennifer Love Hewitt 0 17000.00 5/25/1985 +761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006 +317 Bill Murray 789.65 5000.00 2/5/2007 +""".strip( + "\r\n" + ) + colspecs = ((0, 7), (8, 28), (30, 38), (42, 53), (56, 70)) + expected = read_fwf(StringIO(test), colspecs=colspecs) + + result = read_fwf(StringIO(test)) + tm.assert_frame_equal(result, expected) + + +def test_full_file_with_spaces_and_missing(): + # File with spaces and missing values in columns. + test = """ +Account Name Balance CreditLimit AccountCreated +101 10000.00 1/17/1998 +312 Gerard Butler 90.00 1000.00 8/6/2003 +868 5/25/1985 +761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006 +317 Bill Murray 789.65 +""".strip( + "\r\n" + ) + colspecs = ((0, 7), (8, 28), (30, 38), (42, 53), (56, 70)) + expected = read_fwf(StringIO(test), colspecs=colspecs) + + result = read_fwf(StringIO(test)) + tm.assert_frame_equal(result, expected) + + +def test_messed_up_data(): + # Completely messed up file. + test = """ + Account Name Balance Credit Limit Account Created + 101 10000.00 1/17/1998 + 312 Gerard Butler 90.00 1000.00 + + 761 Jada Pinkett-Smith 49654.87 100000.00 12/5/2006 + 317 Bill Murray 789.65 +""".strip( + "\r\n" + ) + colspecs = ((2, 10), (15, 33), (37, 45), (49, 61), (64, 79)) + expected = read_fwf(StringIO(test), colspecs=colspecs) + + result = read_fwf(StringIO(test)) + tm.assert_frame_equal(result, expected) + + +def test_multiple_delimiters(): + test = r""" +col1~~~~~col2 col3++++++++++++++++++col4 +~~22.....11.0+++foo~~~~~~~~~~Keanu Reeves + 33+++122.33\\\bar.........Gerard Butler +++44~~~~12.01 baz~~Jennifer Love Hewitt +~~55 11+++foo++++Jada Pinkett-Smith +..66++++++.03~~~bar Bill Murray +""".strip( + "\r\n" + ) + delimiter = " +~.\\" + colspecs = ((0, 4), (7, 13), (15, 19), (21, 41)) + expected = read_fwf(StringIO(test), colspecs=colspecs, delimiter=delimiter) + + result = read_fwf(StringIO(test), delimiter=delimiter) + tm.assert_frame_equal(result, expected) + + +def test_variable_width_unicode(): + data = """ +שלום שלום +ום שלל +של ום +""".strip( + "\r\n" + ) + encoding = "utf8" + kwargs = dict(header=None, encoding=encoding) + + expected = read_fwf( + BytesIO(data.encode(encoding)), colspecs=[(0, 4), (5, 9)], **kwargs + ) + result = read_fwf(BytesIO(data.encode(encoding)), **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", [dict(), {"a": "float64", "b": str, "c": "int32"}]) +def test_dtype(dtype): + data = """ a b c +1 2 3.2 +3 4 5.2 +""" + colspecs = [(0, 5), (5, 10), (10, None)] + result = read_fwf(StringIO(data), colspecs=colspecs, dtype=dtype) + + expected = pd.DataFrame( + {"a": [1, 3], "b": [2, 4], "c": [3.2, 5.2]}, columns=["a", "b", "c"] + ) + + for col, dt in dtype.items(): + expected[col] = expected[col].astype(dt) + + tm.assert_frame_equal(result, expected) + + +def test_skiprows_inference(): + # see gh-11256 + data = """ +Text contained in the file header + +DataCol1 DataCol2 + 0.0 1.0 + 101.6 956.1 +""".strip() + skiprows = 2 + expected = read_csv(StringIO(data), skiprows=skiprows, delim_whitespace=True) + + result = read_fwf(StringIO(data), skiprows=skiprows) + tm.assert_frame_equal(result, expected) + + +def test_skiprows_by_index_inference(): + data = """ +To be skipped +Not To Be Skipped +Once more to be skipped +123 34 8 123 +456 78 9 456 +""".strip() + skiprows = [0, 2] + expected = read_csv(StringIO(data), skiprows=skiprows, delim_whitespace=True) + + result = read_fwf(StringIO(data), skiprows=skiprows) + tm.assert_frame_equal(result, expected) + + +def test_skiprows_inference_empty(): + data = """ +AA BBB C +12 345 6 +78 901 2 +""".strip() + + msg = "No rows from which to infer column width" + with pytest.raises(EmptyDataError, match=msg): + read_fwf(StringIO(data), skiprows=3) + + +def test_whitespace_preservation(): + # see gh-16772 + header = None + csv_data = """ + a ,bbb + cc,dd """ + + fwf_data = """ + a bbb + ccdd """ + result = read_fwf( + StringIO(fwf_data), widths=[3, 3], header=header, skiprows=[0], delimiter="\n\t" + ) + expected = read_csv(StringIO(csv_data), header=header) + tm.assert_frame_equal(result, expected) + + +def test_default_delimiter(): + header = None + csv_data = """ +a,bbb +cc,dd""" + + fwf_data = """ +a \tbbb +cc\tdd """ + result = read_fwf(StringIO(fwf_data), widths=[3, 3], header=header, skiprows=[0]) + expected = read_csv(StringIO(csv_data), header=header) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("infer", [True, False, None]) +def test_fwf_compression(compression_only, infer): + data = """1111111111 + 2222222222 + 3333333333""".strip() + + compression = compression_only + extension = "gz" if compression == "gzip" else compression + + kwargs = dict(widths=[5, 5], names=["one", "two"]) + expected = read_fwf(StringIO(data), **kwargs) + + data = bytes(data, encoding="utf-8") + + with tm.ensure_clean(filename="tmp." + extension) as path: + tm.write_to_compressed(compression, path, data) + + if infer is not None: + kwargs["compression"] = "infer" if infer else compression + + result = read_fwf(path, **kwargs) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py new file mode 100644 index 00000000..fdccef11 --- /dev/null +++ b/pandas/tests/io/parser/test_skiprows.py @@ -0,0 +1,252 @@ +""" +Tests that skipped rows are properly handled during +parsing for all of the parsers defined in parsers.py +""" + +from datetime import datetime +from io import StringIO + +import numpy as np +import pytest + +from pandas.errors import EmptyDataError + +from pandas import DataFrame, Index +import pandas._testing as tm + + +@pytest.mark.parametrize("skiprows", [list(range(6)), 6]) +def test_skip_rows_bug(all_parsers, skiprows): + # see gh-505 + parser = all_parsers + text = """#foo,a,b,c +#foo,a,b,c +#foo,a,b,c +#foo,a,b,c +#foo,a,b,c +#foo,a,b,c +1/1/2000,1.,2.,3. +1/2/2000,4,5,6 +1/3/2000,7,8,9 +""" + result = parser.read_csv( + StringIO(text), skiprows=skiprows, header=None, index_col=0, parse_dates=True + ) + index = Index( + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], name=0 + ) + + expected = DataFrame( + np.arange(1.0, 10.0).reshape((3, 3)), columns=[1, 2, 3], index=index + ) + tm.assert_frame_equal(result, expected) + + +def test_deep_skip_rows(all_parsers): + # see gh-4382 + parser = all_parsers + data = "a,b,c\n" + "\n".join( + [",".join([str(i), str(i + 1), str(i + 2)]) for i in range(10)] + ) + condensed_data = "a,b,c\n" + "\n".join( + [",".join([str(i), str(i + 1), str(i + 2)]) for i in [0, 1, 2, 3, 4, 6, 8, 9]] + ) + + result = parser.read_csv(StringIO(data), skiprows=[6, 8]) + condensed_result = parser.read_csv(StringIO(condensed_data)) + tm.assert_frame_equal(result, condensed_result) + + +def test_skip_rows_blank(all_parsers): + # see gh-9832 + parser = all_parsers + text = """#foo,a,b,c +#foo,a,b,c + +#foo,a,b,c +#foo,a,b,c + +1/1/2000,1.,2.,3. +1/2/2000,4,5,6 +1/3/2000,7,8,9 +""" + data = parser.read_csv( + StringIO(text), skiprows=6, header=None, index_col=0, parse_dates=True + ) + index = Index( + [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], name=0 + ) + + expected = DataFrame( + np.arange(1.0, 10.0).reshape((3, 3)), columns=[1, 2, 3], index=index + ) + tm.assert_frame_equal(data, expected) + + +@pytest.mark.parametrize( + "data,kwargs,expected", + [ + ( + """id,text,num_lines +1,"line 11 +line 12",2 +2,"line 21 +line 22",2 +3,"line 31",1""", + dict(skiprows=[1]), + DataFrame( + [[2, "line 21\nline 22", 2], [3, "line 31", 1]], + columns=["id", "text", "num_lines"], + ), + ), + ( + "a,b,c\n~a\n b~,~e\n d~,~f\n f~\n1,2,~12\n 13\n 14~", + dict(quotechar="~", skiprows=[2]), + DataFrame([["a\n b", "e\n d", "f\n f"]], columns=["a", "b", "c"]), + ), + ( + ( + "Text,url\n~example\n " + "sentence\n one~,url1\n~" + "example\n sentence\n two~,url2\n~" + "example\n sentence\n three~,url3" + ), + dict(quotechar="~", skiprows=[1, 3]), + DataFrame([["example\n sentence\n two", "url2"]], columns=["Text", "url"]), + ), + ], +) +def test_skip_row_with_newline(all_parsers, data, kwargs, expected): + # see gh-12775 and gh-10911 + parser = all_parsers + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_skip_row_with_quote(all_parsers): + # see gh-12775 and gh-10911 + parser = all_parsers + data = """id,text,num_lines +1,"line '11' line 12",2 +2,"line '21' line 22",2 +3,"line '31' line 32",1""" + + exp_data = [[2, "line '21' line 22", 2], [3, "line '31' line 32", 1]] + expected = DataFrame(exp_data, columns=["id", "text", "num_lines"]) + + result = parser.read_csv(StringIO(data), skiprows=[1]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,exp_data", + [ + ( + """id,text,num_lines +1,"line \n'11' line 12",2 +2,"line \n'21' line 22",2 +3,"line \n'31' line 32",1""", + [[2, "line \n'21' line 22", 2], [3, "line \n'31' line 32", 1]], + ), + ( + """id,text,num_lines +1,"line '11\n' line 12",2 +2,"line '21\n' line 22",2 +3,"line '31\n' line 32",1""", + [[2, "line '21\n' line 22", 2], [3, "line '31\n' line 32", 1]], + ), + ( + """id,text,num_lines +1,"line '11\n' \r\tline 12",2 +2,"line '21\n' \r\tline 22",2 +3,"line '31\n' \r\tline 32",1""", + [[2, "line '21\n' \r\tline 22", 2], [3, "line '31\n' \r\tline 32", 1]], + ), + ], +) +def test_skip_row_with_newline_and_quote(all_parsers, data, exp_data): + # see gh-12775 and gh-10911 + parser = all_parsers + result = parser.read_csv(StringIO(data), skiprows=[1]) + + expected = DataFrame(exp_data, columns=["id", "text", "num_lines"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "line_terminator", ["\n", "\r\n", "\r"] # "LF" # "CRLF" # "CR" +) +def test_skiprows_lineterminator(all_parsers, line_terminator): + # see gh-9079 + parser = all_parsers + data = "\n".join( + [ + "SMOSMANIA ThetaProbe-ML2X ", + "2007/01/01 01:00 0.2140 U M ", + "2007/01/01 02:00 0.2141 M O ", + "2007/01/01 04:00 0.2142 D M ", + ] + ) + expected = DataFrame( + [ + ["2007/01/01", "01:00", 0.2140, "U", "M"], + ["2007/01/01", "02:00", 0.2141, "M", "O"], + ["2007/01/01", "04:00", 0.2142, "D", "M"], + ], + columns=["date", "time", "var", "flag", "oflag"], + ) + + if parser.engine == "python" and line_terminator == "\r": + pytest.skip("'CR' not respect with the Python parser yet") + + data = data.replace("\n", line_terminator) + result = parser.read_csv( + StringIO(data), + skiprows=1, + delim_whitespace=True, + names=["date", "time", "var", "flag", "oflag"], + ) + tm.assert_frame_equal(result, expected) + + +def test_skiprows_infield_quote(all_parsers): + # see gh-14459 + parser = all_parsers + data = 'a"\nb"\na\n1' + expected = DataFrame({"a": [1]}) + + result = parser.read_csv(StringIO(data), skiprows=2) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,expected", + [ + (dict(), DataFrame({"1": [3, 5]})), + (dict(header=0, names=["foo"]), DataFrame({"foo": [3, 5]})), + ], +) +def test_skip_rows_callable(all_parsers, kwargs, expected): + parser = all_parsers + data = "a\n1\n2\n3\n4\n5" + + result = parser.read_csv(StringIO(data), skiprows=lambda x: x % 2 == 0, **kwargs) + tm.assert_frame_equal(result, expected) + + +def test_skip_rows_skip_all(all_parsers): + parser = all_parsers + data = "a\n1\n2\n3\n4\n5" + msg = "No columns to parse from file" + + with pytest.raises(EmptyDataError, match=msg): + parser.read_csv(StringIO(data), skiprows=lambda x: True) + + +def test_skip_rows_bad_callable(all_parsers): + msg = "by zero" + parser = all_parsers + data = "a\n1\n2\n3\n4\n5" + + with pytest.raises(ZeroDivisionError, match=msg): + parser.read_csv(StringIO(data), skiprows=lambda x: 1 / 0) diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py new file mode 100644 index 00000000..8d5af85c --- /dev/null +++ b/pandas/tests/io/parser/test_textreader.py @@ -0,0 +1,347 @@ +""" +Tests the TextReader class in parsers.pyx, which +is integral to the C engine in parsers.py +""" +from io import BytesIO, StringIO +import os + +import numpy as np +import pytest + +import pandas._libs.parsers as parser +from pandas._libs.parsers import TextReader + +from pandas import DataFrame +import pandas._testing as tm + +from pandas.io.parsers import TextFileReader, read_csv + + +class TestTextReader: + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath("io", "parser", "data") + self.csv1 = os.path.join(self.dirpath, "test1.csv") + self.csv2 = os.path.join(self.dirpath, "test2.csv") + self.xls1 = os.path.join(self.dirpath, "test.xls") + + def test_file_handle(self): + with open(self.csv1, "rb") as f: + reader = TextReader(f) + reader.read() + + def test_string_filename(self): + reader = TextReader(self.csv1, header=None) + reader.read() + + def test_file_handle_mmap(self): + with open(self.csv1, "rb") as f: + reader = TextReader(f, memory_map=True, header=None) + reader.read() + + def test_StringIO(self): + with open(self.csv1, "rb") as f: + text = f.read() + src = BytesIO(text) + reader = TextReader(src, header=None) + reader.read() + + def test_string_factorize(self): + # should this be optional? + data = "a\nb\na\nb\na" + reader = TextReader(StringIO(data), header=None) + result = reader.read() + assert len(set(map(id, result[0]))) == 2 + + def test_skipinitialspace(self): + data = "a, b\na, b\na, b\na, b" + + reader = TextReader(StringIO(data), skipinitialspace=True, header=None) + result = reader.read() + + tm.assert_numpy_array_equal( + result[0], np.array(["a", "a", "a", "a"], dtype=np.object_) + ) + tm.assert_numpy_array_equal( + result[1], np.array(["b", "b", "b", "b"], dtype=np.object_) + ) + + def test_parse_booleans(self): + data = "True\nFalse\nTrue\nTrue" + + reader = TextReader(StringIO(data), header=None) + result = reader.read() + + assert result[0].dtype == np.bool_ + + def test_delimit_whitespace(self): + data = 'a b\na\t\t "b"\n"a"\t \t b' + + reader = TextReader(StringIO(data), delim_whitespace=True, header=None) + result = reader.read() + + tm.assert_numpy_array_equal( + result[0], np.array(["a", "a", "a"], dtype=np.object_) + ) + tm.assert_numpy_array_equal( + result[1], np.array(["b", "b", "b"], dtype=np.object_) + ) + + def test_embedded_newline(self): + data = 'a\n"hello\nthere"\nthis' + + reader = TextReader(StringIO(data), header=None) + result = reader.read() + + expected = np.array(["a", "hello\nthere", "this"], dtype=np.object_) + tm.assert_numpy_array_equal(result[0], expected) + + def test_euro_decimal(self): + data = "12345,67\n345,678" + + reader = TextReader(StringIO(data), delimiter=":", decimal=",", header=None) + result = reader.read() + + expected = np.array([12345.67, 345.678]) + tm.assert_almost_equal(result[0], expected) + + def test_integer_thousands(self): + data = "123,456\n12,500" + + reader = TextReader(StringIO(data), delimiter=":", thousands=",", header=None) + result = reader.read() + + expected = np.array([123456, 12500], dtype=np.int64) + tm.assert_almost_equal(result[0], expected) + + def test_integer_thousands_alt(self): + data = "123.456\n12.500" + + reader = TextFileReader( + StringIO(data), delimiter=":", thousands=".", header=None + ) + result = reader.read() + + expected = DataFrame([123456, 12500]) + tm.assert_frame_equal(result, expected) + + def test_skip_bad_lines(self, capsys): + # too many lines, see #2430 for why + data = "a:b:c\nd:e:f\ng:h:i\nj:k:l:m\nl:m:n\no:p:q:r" + + reader = TextReader(StringIO(data), delimiter=":", header=None) + msg = r"Error tokenizing data\. C error: Expected 3 fields in line 4, saw 4" + with pytest.raises(parser.ParserError, match=msg): + reader.read() + + reader = TextReader( + StringIO(data), + delimiter=":", + header=None, + error_bad_lines=False, + warn_bad_lines=False, + ) + result = reader.read() + expected = { + 0: np.array(["a", "d", "g", "l"], dtype=object), + 1: np.array(["b", "e", "h", "m"], dtype=object), + 2: np.array(["c", "f", "i", "n"], dtype=object), + } + assert_array_dicts_equal(result, expected) + + reader = TextReader( + StringIO(data), + delimiter=":", + header=None, + error_bad_lines=False, + warn_bad_lines=True, + ) + reader.read() + captured = capsys.readouterr() + + assert "Skipping line 4" in captured.err + assert "Skipping line 6" in captured.err + + def test_header_not_enough_lines(self): + data = "skip this\nskip this\na,b,c\n1,2,3\n4,5,6" + + reader = TextReader(StringIO(data), delimiter=",", header=2) + header = reader.header + expected = [["a", "b", "c"]] + assert header == expected + + recs = reader.read() + expected = { + 0: np.array([1, 4], dtype=np.int64), + 1: np.array([2, 5], dtype=np.int64), + 2: np.array([3, 6], dtype=np.int64), + } + assert_array_dicts_equal(recs, expected) + + def test_escapechar(self): + data = '\\"hello world"\n\\"hello world"\n\\"hello world"' + + reader = TextReader(StringIO(data), delimiter=",", header=None, escapechar="\\") + result = reader.read() + expected = {0: np.array(['"hello world"'] * 3, dtype=object)} + assert_array_dicts_equal(result, expected) + + def test_eof_has_eol(self): + # handling of new line at EOF + pass + + def test_na_substitution(self): + pass + + def test_numpy_string_dtype(self): + data = """\ +a,1 +aa,2 +aaa,3 +aaaa,4 +aaaaa,5""" + + def _make_reader(**kwds): + return TextReader(StringIO(data), delimiter=",", header=None, **kwds) + + reader = _make_reader(dtype="S5,i4") + result = reader.read() + + assert result[0].dtype == "S5" + + ex_values = np.array(["a", "aa", "aaa", "aaaa", "aaaaa"], dtype="S5") + assert (result[0] == ex_values).all() + assert result[1].dtype == "i4" + + reader = _make_reader(dtype="S4") + result = reader.read() + assert result[0].dtype == "S4" + ex_values = np.array(["a", "aa", "aaa", "aaaa", "aaaa"], dtype="S4") + assert (result[0] == ex_values).all() + assert result[1].dtype == "S4" + + def test_pass_dtype(self): + data = """\ +one,two +1,a +2,b +3,c +4,d""" + + def _make_reader(**kwds): + return TextReader(StringIO(data), delimiter=",", **kwds) + + reader = _make_reader(dtype={"one": "u1", 1: "S1"}) + result = reader.read() + assert result[0].dtype == "u1" + assert result[1].dtype == "S1" + + reader = _make_reader(dtype={"one": np.uint8, 1: object}) + result = reader.read() + assert result[0].dtype == "u1" + assert result[1].dtype == "O" + + reader = _make_reader(dtype={"one": np.dtype("u1"), 1: np.dtype("O")}) + result = reader.read() + assert result[0].dtype == "u1" + assert result[1].dtype == "O" + + def test_usecols(self): + data = """\ +a,b,c +1,2,3 +4,5,6 +7,8,9 +10,11,12""" + + def _make_reader(**kwds): + return TextReader(StringIO(data), delimiter=",", **kwds) + + reader = _make_reader(usecols=(1, 2)) + result = reader.read() + + exp = _make_reader().read() + assert len(result) == 2 + assert (result[1] == exp[1]).all() + assert (result[2] == exp[2]).all() + + def test_cr_delimited(self): + def _test(text, **kwargs): + nice_text = text.replace("\r", "\r\n") + result = TextReader(StringIO(text), **kwargs).read() + expected = TextReader(StringIO(nice_text), **kwargs).read() + assert_array_dicts_equal(result, expected) + + data = "a,b,c\r1,2,3\r4,5,6\r7,8,9\r10,11,12" + _test(data, delimiter=",") + + data = "a b c\r1 2 3\r4 5 6\r7 8 9\r10 11 12" + _test(data, delim_whitespace=True) + + data = "a,b,c\r1,2,3\r4,5,6\r,88,9\r10,11,12" + _test(data, delimiter=",") + + sample = ( + "A,B,C,D,E,F,G,H,I,J,K,L,M,N,O\r" + "AAAAA,BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0\r" + ",BBBBB,0,0,0,0,0,0,0,0,0,0,0,0,0" + ) + _test(sample, delimiter=",") + + data = "A B C\r 2 3\r4 5 6" + _test(data, delim_whitespace=True) + + data = "A B C\r2 3\r4 5 6" + _test(data, delim_whitespace=True) + + def test_empty_field_eof(self): + data = "a,b,c\n1,2,3\n4,," + + result = TextReader(StringIO(data), delimiter=",").read() + + expected = { + 0: np.array([1, 4], dtype=np.int64), + 1: np.array(["2", ""], dtype=object), + 2: np.array(["3", ""], dtype=object), + } + assert_array_dicts_equal(result, expected) + + # GH5664 + a = DataFrame([["b"], [np.nan]], columns=["a"], index=["a", "c"]) + b = DataFrame([[1, 1, 1, 0], [1, 1, 1, 0]], columns=list("abcd"), index=[1, 1]) + c = DataFrame( + [ + [1, 2, 3, 4], + [6, np.nan, np.nan, np.nan], + [8, 9, 10, 11], + [13, 14, np.nan, np.nan], + ], + columns=list("abcd"), + index=[0, 5, 7, 12], + ) + + for _ in range(100): + df = read_csv(StringIO("a,b\nc\n"), skiprows=0, names=["a"], engine="c") + tm.assert_frame_equal(df, a) + + df = read_csv( + StringIO("1,1,1,1,0\n" * 2 + "\n" * 2), names=list("abcd"), engine="c" + ) + tm.assert_frame_equal(df, b) + + df = read_csv( + StringIO("0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14"), + names=list("abcd"), + engine="c", + ) + tm.assert_frame_equal(df, c) + + def test_empty_csv_input(self): + # GH14867 + df = read_csv(StringIO(), chunksize=20, header=None, names=["a", "b", "c"]) + assert isinstance(df, TextFileReader) + + +def assert_array_dicts_equal(left, right): + for k, v in left.items(): + tm.assert_numpy_array_equal(np.asarray(v), np.asarray(right[k])) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py new file mode 100644 index 00000000..267fae76 --- /dev/null +++ b/pandas/tests/io/parser/test_unsupported.py @@ -0,0 +1,123 @@ +""" +Tests that features that are currently unsupported in +either the Python or C parser are actually enforced +and are clearly communicated to the user. + +Ultimately, the goal is to remove test cases from this +test suite as new feature support is added to the parsers. +""" +from io import StringIO + +import pytest + +from pandas.errors import ParserError + +import pandas._testing as tm + +import pandas.io.parsers as parsers +from pandas.io.parsers import read_csv + + +@pytest.fixture(params=["python", "python-fwf"], ids=lambda val: val) +def python_engine(request): + return request.param + + +class TestUnsupportedFeatures: + def test_mangle_dupe_cols_false(self): + # see gh-12935 + data = "a b c\n1 2 3" + msg = "is not supported" + + for engine in ("c", "python"): + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine=engine, mangle_dupe_cols=False) + + def test_c_engine(self): + # see gh-6607 + data = "a b c\n1 2 3" + msg = "does not support" + + # specify C engine with unsupported options (raise) + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine="c", sep=None, delim_whitespace=False) + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine="c", sep=r"\s") + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine="c", sep="\t", quotechar=chr(128)) + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine="c", skipfooter=1) + + # specify C-unsupported options without python-unsupported options + with tm.assert_produces_warning(parsers.ParserWarning): + read_csv(StringIO(data), sep=None, delim_whitespace=False) + with tm.assert_produces_warning(parsers.ParserWarning): + read_csv(StringIO(data), sep=r"\s") + with tm.assert_produces_warning(parsers.ParserWarning): + read_csv(StringIO(data), sep="\t", quotechar=chr(128)) + with tm.assert_produces_warning(parsers.ParserWarning): + read_csv(StringIO(data), skipfooter=1) + + text = """ A B C D E +one two three four +a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 +a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744 +x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838""" + msg = "Error tokenizing data" + + with pytest.raises(ParserError, match=msg): + read_csv(StringIO(text), sep="\\s+") + with pytest.raises(ParserError, match=msg): + read_csv(StringIO(text), engine="c", sep="\\s+") + + msg = "Only length-1 thousands markers supported" + data = """A|B|C +1|2,334|5 +10|13|10. +""" + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), thousands=",,") + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), thousands="") + + msg = "Only length-1 line terminators supported" + data = "a,b,c~~1,2,3~~4,5,6" + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), lineterminator="~~") + + def test_python_engine(self, python_engine): + from pandas.io.parsers import _python_unsupported as py_unsupported + + data = """1,2,3,, +1,2,3,4, +1,2,3,4,5 +1,2,,, +1,2,3,4,""" + + for default in py_unsupported: + msg = ( + f"The {repr(default)} option is not " + f"supported with the {repr(python_engine)} engine" + ) + + kwargs = {default: object()} + with pytest.raises(ValueError, match=msg): + read_csv(StringIO(data), engine=python_engine, **kwargs) + + def test_python_engine_file_no_next(self, python_engine): + # see gh-16530 + class NoNextBuffer: + def __init__(self, csv_data): + self.data = csv_data + + def __iter__(self): + return self + + def read(self): + return self.data + + data = "a\n1" + msg = "The 'python' engine cannot iterate" + + with pytest.raises(ValueError, match=msg): + read_csv(NoNextBuffer(data), engine=python_engine) diff --git a/pandas/tests/io/parser/test_usecols.py b/pandas/tests/io/parser/test_usecols.py new file mode 100644 index 00000000..979eb470 --- /dev/null +++ b/pandas/tests/io/parser/test_usecols.py @@ -0,0 +1,572 @@ +""" +Tests the usecols functionality during parsing +for all of the parsers defined in parsers.py +""" +from io import StringIO + +import numpy as np +import pytest + +from pandas._libs.tslib import Timestamp + +from pandas import DataFrame, Index +import pandas._testing as tm + +_msg_validate_usecols_arg = ( + "'usecols' must either be list-like " + "of all strings, all unicode, all " + "integers or a callable." +) +_msg_validate_usecols_names = ( + "Usecols do not match columns, columns expected but not found: {0}" +) + + +def test_raise_on_mixed_dtype_usecols(all_parsers): + # See gh-12678 + data = """a,b,c + 1000,2000,3000 + 4000,5000,6000 + """ + usecols = [0, "b", 2] + parser = all_parsers + + with pytest.raises(ValueError, match=_msg_validate_usecols_arg): + parser.read_csv(StringIO(data), usecols=usecols) + + +@pytest.mark.parametrize("usecols", [(1, 2), ("b", "c")]) +def test_usecols(all_parsers, usecols): + data = """\ +a,b,c +1,2,3 +4,5,6 +7,8,9 +10,11,12""" + parser = all_parsers + result = parser.read_csv(StringIO(data), usecols=usecols) + + expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"]) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_names(all_parsers): + data = """\ +a,b,c +1,2,3 +4,5,6 +7,8,9 +10,11,12""" + parser = all_parsers + names = ["foo", "bar"] + result = parser.read_csv(StringIO(data), names=names, usecols=[1, 2], header=0) + + expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=names) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "names,usecols", [(["b", "c"], [1, 2]), (["a", "b", "c"], ["b", "c"])] +) +def test_usecols_relative_to_names(all_parsers, names, usecols): + data = """\ +1,2,3 +4,5,6 +7,8,9 +10,11,12""" + parser = all_parsers + result = parser.read_csv(StringIO(data), names=names, header=None, usecols=usecols) + + expected = DataFrame([[2, 3], [5, 6], [8, 9], [11, 12]], columns=["b", "c"]) + tm.assert_frame_equal(result, expected) + + +def test_usecols_relative_to_names2(all_parsers): + # see gh-5766 + data = """\ +1,2,3 +4,5,6 +7,8,9 +10,11,12""" + parser = all_parsers + result = parser.read_csv( + StringIO(data), names=["a", "b"], header=None, usecols=[0, 1] + ) + + expected = DataFrame([[1, 2], [4, 5], [7, 8], [10, 11]], columns=["a", "b"]) + tm.assert_frame_equal(result, expected) + + +def test_usecols_name_length_conflict(all_parsers): + data = """\ +1,2,3 +4,5,6 +7,8,9 +10,11,12""" + parser = all_parsers + msg = ( + "Number of passed names did not match number of header fields in the file" + if parser.engine == "python" + else "Passed header names mismatches usecols" + ) + + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), names=["a", "b"], header=None, usecols=[1]) + + +def test_usecols_single_string(all_parsers): + # see gh-20558 + parser = all_parsers + data = """foo, bar, baz +1000, 2000, 3000 +4000, 5000, 6000""" + + with pytest.raises(ValueError, match=_msg_validate_usecols_arg): + parser.read_csv(StringIO(data), usecols="foo") + + +@pytest.mark.parametrize( + "data", ["a,b,c,d\n1,2,3,4\n5,6,7,8", "a,b,c,d\n1,2,3,4,\n5,6,7,8,"] +) +def test_usecols_index_col_false(all_parsers, data): + # see gh-9082 + parser = all_parsers + usecols = ["a", "c", "d"] + expected = DataFrame({"a": [1, 5], "c": [3, 7], "d": [4, 8]}) + + result = parser.read_csv(StringIO(data), usecols=usecols, index_col=False) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("index_col", ["b", 0]) +@pytest.mark.parametrize("usecols", [["b", "c"], [1, 2]]) +def test_usecols_index_col_conflict(all_parsers, usecols, index_col): + # see gh-4201: test that index_col as integer reflects usecols + parser = all_parsers + data = "a,b,c,d\nA,a,1,one\nB,b,2,two" + expected = DataFrame({"c": [1, 2]}, index=Index(["a", "b"], name="b")) + + result = parser.read_csv(StringIO(data), usecols=usecols, index_col=index_col) + tm.assert_frame_equal(result, expected) + + +def test_usecols_index_col_conflict2(all_parsers): + # see gh-4201: test that index_col as integer reflects usecols + parser = all_parsers + data = "a,b,c,d\nA,a,1,one\nB,b,2,two" + + expected = DataFrame({"b": ["a", "b"], "c": [1, 2], "d": ("one", "two")}) + expected = expected.set_index(["b", "c"]) + + result = parser.read_csv( + StringIO(data), usecols=["b", "c", "d"], index_col=["b", "c"] + ) + tm.assert_frame_equal(result, expected) + + +def test_usecols_implicit_index_col(all_parsers): + # see gh-2654 + parser = all_parsers + data = "a,b,c\n4,apple,bat,5.7\n8,orange,cow,10" + + result = parser.read_csv(StringIO(data), usecols=["a", "b"]) + expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8]) + tm.assert_frame_equal(result, expected) + + +def test_usecols_regex_sep(all_parsers): + # see gh-2733 + parser = all_parsers + data = "a b c\n4 apple bat 5.7\n8 orange cow 10" + result = parser.read_csv(StringIO(data), sep=r"\s+", usecols=("a", "b")) + + expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8]) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_whitespace(all_parsers): + parser = all_parsers + data = "a b c\n4 apple bat 5.7\n8 orange cow 10" + + result = parser.read_csv(StringIO(data), delim_whitespace=True, usecols=("a", "b")) + expected = DataFrame({"a": ["apple", "orange"], "b": ["bat", "cow"]}, index=[4, 8]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "usecols,expected", + [ + # Column selection by index. + ([0, 1], DataFrame(data=[[1000, 2000], [4000, 5000]], columns=["2", "0"])), + # Column selection by name. + (["0", "1"], DataFrame(data=[[2000, 3000], [5000, 6000]], columns=["0", "1"]),), + ], +) +def test_usecols_with_integer_like_header(all_parsers, usecols, expected): + parser = all_parsers + data = """2,0,1 +1000,2000,3000 +4000,5000,6000""" + + result = parser.read_csv(StringIO(data), usecols=usecols) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]]) +def test_usecols_with_parse_dates(all_parsers, usecols): + # see gh-9755 + data = """a,b,c,d,e +0,1,20140101,0900,4 +0,1,20140102,1000,4""" + parser = all_parsers + parse_dates = [[1, 2]] + + cols = { + "a": [0, 0], + "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")], + } + expected = DataFrame(cols, columns=["c_d", "a"]) + result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_parse_dates2(all_parsers): + # see gh-13604 + parser = all_parsers + data = """2008-02-07 09:40,1032.43 +2008-02-07 09:50,1042.54 +2008-02-07 10:00,1051.65""" + + names = ["date", "values"] + usecols = names[:] + parse_dates = [0] + + index = Index( + [ + Timestamp("2008-02-07 09:40"), + Timestamp("2008-02-07 09:50"), + Timestamp("2008-02-07 10:00"), + ], + name="date", + ) + cols = {"values": [1032.43, 1042.54, 1051.65]} + expected = DataFrame(cols, index=index) + + result = parser.read_csv( + StringIO(data), + parse_dates=parse_dates, + index_col=0, + usecols=usecols, + header=None, + names=names, + ) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_parse_dates3(all_parsers): + # see gh-14792 + parser = all_parsers + data = """a,b,c,d,e,f,g,h,i,j +2016/09/21,1,1,2,3,4,5,6,7,8""" + + usecols = list("abcdefghij") + parse_dates = [0] + + cols = { + "a": Timestamp("2016-09-21"), + "b": [1], + "c": [1], + "d": [2], + "e": [3], + "f": [4], + "g": [5], + "h": [6], + "i": [7], + "j": [8], + } + expected = DataFrame(cols, columns=usecols) + + result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_parse_dates4(all_parsers): + data = "a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8" + usecols = list("abcdefghij") + parse_dates = [[0, 1]] + parser = all_parsers + + cols = { + "a_b": "2016/09/21 1", + "c": [1], + "d": [2], + "e": [3], + "f": [4], + "g": [5], + "h": [6], + "i": [7], + "j": [8], + } + expected = DataFrame(cols, columns=["a_b"] + list("cdefghij")) + + result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("usecols", [[0, 2, 3], [3, 0, 2]]) +@pytest.mark.parametrize( + "names", + [ + list("abcde"), # Names span all columns in original data. + list("acd"), # Names span only the selected columns. + ], +) +def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names): + # see gh-9755 + s = """0,1,20140101,0900,4 +0,1,20140102,1000,4""" + parse_dates = [[1, 2]] + parser = all_parsers + + cols = { + "a": [0, 0], + "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")], + } + expected = DataFrame(cols, columns=["c_d", "a"]) + + result = parser.read_csv( + StringIO(s), names=names, parse_dates=parse_dates, usecols=usecols + ) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_unicode_strings(all_parsers): + # see gh-13219 + data = """AAA,BBB,CCC,DDD +0.056674973,8,True,a +2.613230982,2,False,b +3.568935038,7,False,a""" + parser = all_parsers + + exp_data = { + "AAA": {0: 0.056674972999999997, 1: 2.6132309819999997, 2: 3.5689350380000002}, + "BBB": {0: 8, 1: 2, 2: 7}, + } + expected = DataFrame(exp_data) + + result = parser.read_csv(StringIO(data), usecols=["AAA", "BBB"]) + tm.assert_frame_equal(result, expected) + + +def test_usecols_with_single_byte_unicode_strings(all_parsers): + # see gh-13219 + data = """A,B,C,D +0.056674973,8,True,a +2.613230982,2,False,b +3.568935038,7,False,a""" + parser = all_parsers + + exp_data = { + "A": {0: 0.056674972999999997, 1: 2.6132309819999997, 2: 3.5689350380000002}, + "B": {0: 8, 1: 2, 2: 7}, + } + expected = DataFrame(exp_data) + + result = parser.read_csv(StringIO(data), usecols=["A", "B"]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("usecols", [["AAA", b"BBB"], [b"AAA", "BBB"]]) +def test_usecols_with_mixed_encoding_strings(all_parsers, usecols): + data = """AAA,BBB,CCC,DDD +0.056674973,8,True,a +2.613230982,2,False,b +3.568935038,7,False,a""" + parser = all_parsers + + with pytest.raises(ValueError, match=_msg_validate_usecols_arg): + parser.read_csv(StringIO(data), usecols=usecols) + + +@pytest.mark.parametrize("usecols", [["あああ", "いい"], ["あああ", "いい"]]) +def test_usecols_with_multi_byte_characters(all_parsers, usecols): + data = """あああ,いい,ううう,ええええ +0.056674973,8,True,a +2.613230982,2,False,b +3.568935038,7,False,a""" + parser = all_parsers + + exp_data = { + "あああ": {0: 0.056674972999999997, 1: 2.6132309819999997, 2: 3.5689350380000002}, + "いい": {0: 8, 1: 2, 2: 7}, + } + expected = DataFrame(exp_data) + + result = parser.read_csv(StringIO(data), usecols=usecols) + tm.assert_frame_equal(result, expected) + + +def test_empty_usecols(all_parsers): + data = "a,b,c\n1,2,3\n4,5,6" + expected = DataFrame() + parser = all_parsers + + result = parser.read_csv(StringIO(data), usecols=set()) + tm.assert_frame_equal(result, expected) + + +def test_np_array_usecols(all_parsers): + # see gh-12546 + parser = all_parsers + data = "a,b,c\n1,2,3" + usecols = np.array(["a", "b"]) + + expected = DataFrame([[1, 2]], columns=usecols) + result = parser.read_csv(StringIO(data), usecols=usecols) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "usecols,expected", + [ + ( + lambda x: x.upper() in ["AAA", "BBB", "DDD"], + DataFrame( + { + "AaA": { + 0: 0.056674972999999997, + 1: 2.6132309819999997, + 2: 3.5689350380000002, + }, + "bBb": {0: 8, 1: 2, 2: 7}, + "ddd": {0: "a", 1: "b", 2: "a"}, + } + ), + ), + (lambda x: False, DataFrame()), + ], +) +def test_callable_usecols(all_parsers, usecols, expected): + # see gh-14154 + data = """AaA,bBb,CCC,ddd +0.056674973,8,True,a +2.613230982,2,False,b +3.568935038,7,False,a""" + parser = all_parsers + + result = parser.read_csv(StringIO(data), usecols=usecols) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("usecols", [["a", "c"], lambda x: x in ["a", "c"]]) +def test_incomplete_first_row(all_parsers, usecols): + # see gh-6710 + data = "1,2\n1,2,3" + parser = all_parsers + names = ["a", "b", "c"] + expected = DataFrame({"a": [1, 1], "c": [np.nan, 3]}) + + result = parser.read_csv(StringIO(data), names=names, usecols=usecols) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "data,usecols,kwargs,expected", + [ + # see gh-8985 + ( + "19,29,39\n" * 2 + "10,20,30,40", + [0, 1, 2], + dict(header=None), + DataFrame([[19, 29, 39], [19, 29, 39], [10, 20, 30]]), + ), + # see gh-9549 + ( + ("A,B,C\n1,2,3\n3,4,5\n1,2,4,5,1,6\n1,2,3,,,1,\n1,2,3\n5,6,7"), + ["A", "B", "C"], + dict(), + DataFrame( + { + "A": [1, 3, 1, 1, 1, 5], + "B": [2, 4, 2, 2, 2, 6], + "C": [3, 5, 4, 3, 3, 7], + } + ), + ), + ], +) +def test_uneven_length_cols(all_parsers, data, usecols, kwargs, expected): + # see gh-8985 + parser = all_parsers + result = parser.read_csv(StringIO(data), usecols=usecols, **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "usecols,kwargs,expected,msg", + [ + ( + ["a", "b", "c", "d"], + dict(), + DataFrame({"a": [1, 5], "b": [2, 6], "c": [3, 7], "d": [4, 8]}), + None, + ), + ( + ["a", "b", "c", "f"], + dict(), + None, + _msg_validate_usecols_names.format(r"\['f'\]"), + ), + (["a", "b", "f"], dict(), None, _msg_validate_usecols_names.format(r"\['f'\]")), + ( + ["a", "b", "f", "g"], + dict(), + None, + _msg_validate_usecols_names.format(r"\[('f', 'g'|'g', 'f')\]"), + ), + # see gh-14671 + ( + None, + dict(header=0, names=["A", "B", "C", "D"]), + DataFrame({"A": [1, 5], "B": [2, 6], "C": [3, 7], "D": [4, 8]}), + None, + ), + ( + ["A", "B", "C", "f"], + dict(header=0, names=["A", "B", "C", "D"]), + None, + _msg_validate_usecols_names.format(r"\['f'\]"), + ), + ( + ["A", "B", "f"], + dict(names=["A", "B", "C", "D"]), + None, + _msg_validate_usecols_names.format(r"\['f'\]"), + ), + ], +) +def test_raises_on_usecols_names_mismatch(all_parsers, usecols, kwargs, expected, msg): + data = "a,b,c,d\n1,2,3,4\n5,6,7,8" + kwargs.update(usecols=usecols) + parser = all_parsers + + if expected is None: + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + else: + result = parser.read_csv(StringIO(data), **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.xfail( + reason="see gh-16469: works on the C engine but not the Python engine", strict=False +) +@pytest.mark.parametrize("usecols", [["A", "C"], [0, 2]]) +def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols): + data = "a,b,c,d\n1,2,3,4\n5,6,7,8" + names = ["A", "B", "C", "D"] + parser = all_parsers + + result = parser.read_csv(StringIO(data), header=0, names=names, usecols=usecols) + expected = DataFrame({"A": [1, 5], "C": [3, 7]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/pytables/__init__.py b/pandas/tests/io/pytables/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/io/pytables/common.py b/pandas/tests/io/pytables/common.py new file mode 100644 index 00000000..d06f4677 --- /dev/null +++ b/pandas/tests/io/pytables/common.py @@ -0,0 +1,82 @@ +from contextlib import contextmanager +import os +import tempfile + +import pytest + +from pandas.io.pytables import HDFStore + +tables = pytest.importorskip("tables") +# set these parameters so we don't have file sharing +tables.parameters.MAX_NUMEXPR_THREADS = 1 +tables.parameters.MAX_BLOSC_THREADS = 1 +tables.parameters.MAX_THREADS = 1 + + +def safe_remove(path): + if path is not None: + try: + os.remove(path) + except OSError: + pass + + +def safe_close(store): + try: + if store is not None: + store.close() + except IOError: + pass + + +def create_tempfile(path): + """ create an unopened named temporary file """ + return os.path.join(tempfile.gettempdir(), path) + + +# contextmanager to ensure the file cleanup +@contextmanager +def ensure_clean_store(path, mode="a", complevel=None, complib=None, fletcher32=False): + + try: + + # put in the temporary path if we don't have one already + if not len(os.path.dirname(path)): + path = create_tempfile(path) + + store = HDFStore( + path, mode=mode, complevel=complevel, complib=complib, fletcher32=False + ) + yield store + finally: + safe_close(store) + if mode == "w" or mode == "a": + safe_remove(path) + + +@contextmanager +def ensure_clean_path(path): + """ + return essentially a named temporary file that is not opened + and deleted on exiting; if path is a list, then create and + return list of filenames + """ + try: + if isinstance(path, list): + filenames = [create_tempfile(p) for p in path] + yield filenames + else: + filenames = [create_tempfile(path)] + yield filenames[0] + finally: + for f in filenames: + safe_remove(f) + + +def _maybe_remove(store, key): + """For tests using tables, try removing the table to be sure there is + no content from previous tests using the same table name.""" + try: + store.remove(key) + except (ValueError, KeyError): + pass diff --git a/pandas/tests/io/pytables/conftest.py b/pandas/tests/io/pytables/conftest.py new file mode 100644 index 00000000..214f95c6 --- /dev/null +++ b/pandas/tests/io/pytables/conftest.py @@ -0,0 +1,17 @@ +import pytest + +import pandas._testing as tm + + +@pytest.fixture +def setup_path(): + """Fixture for setup path""" + return "tmp.__{}__.h5".format(tm.rands(10)) + + +@pytest.fixture(scope="module", autouse=True) +def setup_mode(): + """ Reset testing mode fixture""" + tm.reset_testing_mode() + yield + tm.set_testing_mode() diff --git a/pandas/tests/io/pytables/test_compat.py b/pandas/tests/io/pytables/test_compat.py new file mode 100644 index 00000000..c7200385 --- /dev/null +++ b/pandas/tests/io/pytables/test_compat.py @@ -0,0 +1,77 @@ +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.tests.io.pytables.common import ensure_clean_path + +tables = pytest.importorskip("tables") + + +@pytest.fixture +def pytables_hdf5_file(): + """ + Use PyTables to create a simple HDF5 file. + """ + table_schema = { + "c0": tables.Time64Col(pos=0), + "c1": tables.StringCol(5, pos=1), + "c2": tables.Int64Col(pos=2), + } + + t0 = 1_561_105_000.0 + + testsamples = [ + {"c0": t0, "c1": "aaaaa", "c2": 1}, + {"c0": t0 + 1, "c1": "bbbbb", "c2": 2}, + {"c0": t0 + 2, "c1": "ccccc", "c2": 10 ** 5}, + {"c0": t0 + 3, "c1": "ddddd", "c2": 4_294_967_295}, + ] + + objname = "pandas_test_timeseries" + + with ensure_clean_path("written_with_pytables.h5") as path: + # The `ensure_clean_path` context mgr removes the temp file upon exit. + with tables.open_file(path, mode="w") as f: + t = f.create_table("/", name=objname, description=table_schema) + for sample in testsamples: + for key, value in sample.items(): + t.row[key] = value + t.row.append() + + yield path, objname, pd.DataFrame(testsamples) + + +class TestReadPyTablesHDF5: + """ + A group of tests which covers reading HDF5 files written by plain PyTables + (not written by pandas). + + Was introduced for regression-testing issue 11188. + """ + + def test_read_complete(self, pytables_hdf5_file): + path, objname, df = pytables_hdf5_file + result = pd.read_hdf(path, key=objname) + expected = df + tm.assert_frame_equal(result, expected) + + def test_read_with_start(self, pytables_hdf5_file): + path, objname, df = pytables_hdf5_file + # This is a regression test for pandas-dev/pandas/issues/11188 + result = pd.read_hdf(path, key=objname, start=1) + expected = df[1:].reset_index(drop=True) + tm.assert_frame_equal(result, expected) + + def test_read_with_stop(self, pytables_hdf5_file): + path, objname, df = pytables_hdf5_file + # This is a regression test for pandas-dev/pandas/issues/11188 + result = pd.read_hdf(path, key=objname, stop=1) + expected = df[:1].reset_index(drop=True) + tm.assert_frame_equal(result, expected) + + def test_read_with_startstop(self, pytables_hdf5_file): + path, objname, df = pytables_hdf5_file + # This is a regression test for pandas-dev/pandas/issues/11188 + result = pd.read_hdf(path, key=objname, start=1, stop=2) + expected = df[1:2].reset_index(drop=True) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/pytables/test_complex.py b/pandas/tests/io/pytables/test_complex.py new file mode 100644 index 00000000..543940e6 --- /dev/null +++ b/pandas/tests/io/pytables/test_complex.py @@ -0,0 +1,185 @@ +from warnings import catch_warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.tests.io.pytables.common import ensure_clean_path, ensure_clean_store + +from pandas.io.pytables import read_hdf + +# GH10447 + + +def test_complex_fixed(setup_path): + df = DataFrame( + np.random.rand(4, 5).astype(np.complex64), + index=list("abcd"), + columns=list("ABCDE"), + ) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) + + df = DataFrame( + np.random.rand(4, 5).astype(np.complex128), + index=list("abcd"), + columns=list("ABCDE"), + ) + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) + + +def test_complex_table(setup_path): + df = DataFrame( + np.random.rand(4, 5).astype(np.complex64), + index=list("abcd"), + columns=list("ABCDE"), + ) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) + + df = DataFrame( + np.random.rand(4, 5).astype(np.complex128), + index=list("abcd"), + columns=list("ABCDE"), + ) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table", mode="w") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) + + +@td.xfail_non_writeable +def test_complex_mixed_fixed(setup_path): + complex64 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 + ) + complex128 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 + ) + df = DataFrame( + { + "A": [1, 2, 3, 4], + "B": ["a", "b", "c", "d"], + "C": complex64, + "D": complex128, + "E": [1.0, 2.0, 3.0, 4.0], + }, + index=list("abcd"), + ) + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) + + +def test_complex_mixed_table(setup_path): + complex64 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64 + ) + complex128 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 + ) + df = DataFrame( + { + "A": [1, 2, 3, 4], + "B": ["a", "b", "c", "d"], + "C": complex64, + "D": complex128, + "E": [1.0, 2.0, 3.0, 4.0], + }, + index=list("abcd"), + ) + + with ensure_clean_store(setup_path) as store: + store.append("df", df, data_columns=["A", "B"]) + result = store.select("df", where="A>2") + tm.assert_frame_equal(df.loc[df.A > 2], result) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table") + reread = read_hdf(path, "df") + tm.assert_frame_equal(df, reread) + + +def test_complex_across_dimensions_fixed(setup_path): + with catch_warnings(record=True): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list("abcd")) + df = DataFrame({"A": s, "B": s}) + + objs = [s, df] + comps = [tm.assert_series_equal, tm.assert_frame_equal] + for obj, comp in zip(objs, comps): + with ensure_clean_path(setup_path) as path: + obj.to_hdf(path, "obj", format="fixed") + reread = read_hdf(path, "obj") + comp(obj, reread) + + +def test_complex_across_dimensions(setup_path): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list("abcd")) + df = DataFrame({"A": s, "B": s}) + + with catch_warnings(record=True): + + objs = [df] + comps = [tm.assert_frame_equal] + for obj, comp in zip(objs, comps): + with ensure_clean_path(setup_path) as path: + obj.to_hdf(path, "obj", format="table") + reread = read_hdf(path, "obj") + comp(obj, reread) + + +def test_complex_indexing_error(setup_path): + complex128 = np.array( + [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128 + ) + df = DataFrame( + {"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128}, + index=list("abcd"), + ) + with ensure_clean_store(setup_path) as store: + with pytest.raises(TypeError): + store.append("df", df, data_columns=["C"]) + + +def test_complex_series_error(setup_path): + complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) + s = Series(complex128, index=list("abcd")) + + with ensure_clean_path(setup_path) as path: + with pytest.raises(TypeError): + s.to_hdf(path, "obj", format="t") + + with ensure_clean_path(setup_path) as path: + s.to_hdf(path, "obj", format="t", index=False) + reread = read_hdf(path, "obj") + tm.assert_series_equal(s, reread) + + +def test_complex_append(setup_path): + df = DataFrame( + {"a": np.random.randn(100).astype(np.complex128), "b": np.random.randn(100)} + ) + + with ensure_clean_store(setup_path) as store: + store.append("df", df, data_columns=["b"]) + store.append("df", df) + result = store.select("df") + tm.assert_frame_equal(pd.concat([df, df], 0), result) diff --git a/pandas/tests/io/pytables/test_pytables_missing.py b/pandas/tests/io/pytables/test_pytables_missing.py new file mode 100644 index 00000000..9adb0a6d --- /dev/null +++ b/pandas/tests/io/pytables/test_pytables_missing.py @@ -0,0 +1,14 @@ +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + + +@td.skip_if_installed("tables") +def test_pytables_raises(): + df = pd.DataFrame({"A": [1, 2]}) + with pytest.raises(ImportError, match="tables"): + with tm.ensure_clean("foo.h5") as path: + df.to_hdf(path, "df") diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py new file mode 100644 index 00000000..f56d0420 --- /dev/null +++ b/pandas/tests/io/pytables/test_store.py @@ -0,0 +1,4796 @@ +import datetime +from datetime import timedelta +from distutils.version import LooseVersion +from io import BytesIO +import os +from pathlib import Path +import re +from warnings import catch_warnings, simplefilter + +import numpy as np +import pytest + +from pandas.compat import is_platform_little_endian, is_platform_windows +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_categorical_dtype + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Index, + Int64Index, + MultiIndex, + RangeIndex, + Series, + Timestamp, + bdate_range, + concat, + date_range, + isna, + timedelta_range, +) +import pandas._testing as tm +from pandas.tests.io.pytables.common import ( + _maybe_remove, + create_tempfile, + ensure_clean_path, + ensure_clean_store, + safe_close, + safe_remove, + tables, +) + +from pandas.io.pytables import ( + ClosedFileError, + HDFStore, + PossibleDataLossError, + Term, + read_hdf, +) + +from pandas.io import pytables as pytables # noqa: E402 isort:skip +from pandas.io.pytables import TableIterator # noqa: E402 isort:skip + + +_default_compressor = "blosc" +ignore_natural_naming_warning = pytest.mark.filterwarnings( + "ignore:object name:tables.exceptions.NaturalNameWarning" +) + + +@pytest.mark.single +class TestHDFStore: + def test_format_type(self, setup_path): + df = pd.DataFrame({"A": [1, 2]}) + with ensure_clean_path(setup_path) as path: + with HDFStore(path) as store: + store.put("a", df, format="fixed") + store.put("b", df, format="table") + + assert store.get_storer("a").format_type == "fixed" + assert store.get_storer("b").format_type == "table" + + def test_format_kwarg_in_constructor(self, setup_path): + # GH 13291 + + msg = "format is not a defined argument for HDFStore" + + with ensure_clean_path(setup_path) as path: + with pytest.raises(ValueError, match=msg): + HDFStore(path, format="table") + + def test_context(self, setup_path): + path = create_tempfile(setup_path) + try: + with HDFStore(path) as tbl: + raise ValueError("blah") + except ValueError: + pass + finally: + safe_remove(path) + + try: + with HDFStore(path) as tbl: + tbl["a"] = tm.makeDataFrame() + + with HDFStore(path) as tbl: + assert len(tbl) == 1 + assert type(tbl["a"]) == DataFrame + finally: + safe_remove(path) + + def test_conv_read_write(self, setup_path): + path = create_tempfile(setup_path) + try: + + def roundtrip(key, obj, **kwargs): + obj.to_hdf(path, key, **kwargs) + return read_hdf(path, key) + + o = tm.makeTimeSeries() + tm.assert_series_equal(o, roundtrip("series", o)) + + o = tm.makeStringSeries() + tm.assert_series_equal(o, roundtrip("string_series", o)) + + o = tm.makeDataFrame() + tm.assert_frame_equal(o, roundtrip("frame", o)) + + # table + df = DataFrame(dict(A=range(5), B=range(5))) + df.to_hdf(path, "table", append=True) + result = read_hdf(path, "table", where=["index>2"]) + tm.assert_frame_equal(df[df.index > 2], result) + + finally: + safe_remove(path) + + def test_long_strings(self, setup_path): + + # GH6166 + df = DataFrame( + {"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10) + ) + + with ensure_clean_store(setup_path) as store: + store.append("df", df, data_columns=["a"]) + + result = store.select("df") + tm.assert_frame_equal(df, result) + + def test_api(self, setup_path): + + # GH4584 + # API issue when to_hdf doesn't accept append AND format args + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + df.iloc[:10].to_hdf(path, "df", append=True, format="table") + df.iloc[10:].to_hdf(path, "df", append=True, format="table") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + # append to False + df.iloc[:10].to_hdf(path, "df", append=False, format="table") + df.iloc[10:].to_hdf(path, "df", append=True, format="table") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + df.iloc[:10].to_hdf(path, "df", append=True) + df.iloc[10:].to_hdf(path, "df", append=True, format="table") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + # append to False + df.iloc[:10].to_hdf(path, "df", append=False, format="table") + df.iloc[10:].to_hdf(path, "df", append=True) + tm.assert_frame_equal(read_hdf(path, "df"), df) + + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + df.to_hdf(path, "df", append=False, format="fixed") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + df.to_hdf(path, "df", append=False, format="f") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + df.to_hdf(path, "df", append=False) + tm.assert_frame_equal(read_hdf(path, "df"), df) + + df.to_hdf(path, "df") + tm.assert_frame_equal(read_hdf(path, "df"), df) + + with ensure_clean_store(setup_path) as store: + + path = store._path + df = tm.makeDataFrame() + + _maybe_remove(store, "df") + store.append("df", df.iloc[:10], append=True, format="table") + store.append("df", df.iloc[10:], append=True, format="table") + tm.assert_frame_equal(store.select("df"), df) + + # append to False + _maybe_remove(store, "df") + store.append("df", df.iloc[:10], append=False, format="table") + store.append("df", df.iloc[10:], append=True, format="table") + tm.assert_frame_equal(store.select("df"), df) + + # formats + _maybe_remove(store, "df") + store.append("df", df.iloc[:10], append=False, format="table") + store.append("df", df.iloc[10:], append=True, format="table") + tm.assert_frame_equal(store.select("df"), df) + + _maybe_remove(store, "df") + store.append("df", df.iloc[:10], append=False, format="table") + store.append("df", df.iloc[10:], append=True, format=None) + tm.assert_frame_equal(store.select("df"), df) + + with ensure_clean_path(setup_path) as path: + # Invalid. + df = tm.makeDataFrame() + + msg = "Can only append to Tables" + + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df", append=True, format="f") + + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df", append=True, format="fixed") + + msg = r"invalid HDFStore format specified \[foo\]" + + with pytest.raises(TypeError, match=msg): + df.to_hdf(path, "df", append=True, format="foo") + + with pytest.raises(TypeError, match=msg): + df.to_hdf(path, "df", append=False, format="foo") + + # File path doesn't exist + path = "" + msg = f"File {path} does not exist" + + with pytest.raises(FileNotFoundError, match=msg): + read_hdf(path, "df") + + def test_api_default_format(self, setup_path): + + # default_format option + with ensure_clean_store(setup_path) as store: + df = tm.makeDataFrame() + + pd.set_option("io.hdf.default_format", "fixed") + _maybe_remove(store, "df") + store.put("df", df) + assert not store.get_storer("df").is_table + + msg = "Can only append to Tables" + + with pytest.raises(ValueError, match=msg): + store.append("df2", df) + + pd.set_option("io.hdf.default_format", "table") + _maybe_remove(store, "df") + store.put("df", df) + assert store.get_storer("df").is_table + _maybe_remove(store, "df2") + store.append("df2", df) + assert store.get_storer("df").is_table + + pd.set_option("io.hdf.default_format", None) + + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + + pd.set_option("io.hdf.default_format", "fixed") + df.to_hdf(path, "df") + with HDFStore(path) as store: + assert not store.get_storer("df").is_table + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df2", append=True) + + pd.set_option("io.hdf.default_format", "table") + df.to_hdf(path, "df3") + with HDFStore(path) as store: + assert store.get_storer("df3").is_table + df.to_hdf(path, "df4", append=True) + with HDFStore(path) as store: + assert store.get_storer("df4").is_table + + pd.set_option("io.hdf.default_format", None) + + def test_keys(self, setup_path): + + with ensure_clean_store(setup_path) as store: + store["a"] = tm.makeTimeSeries() + store["b"] = tm.makeStringSeries() + store["c"] = tm.makeDataFrame() + + assert len(store) == 3 + expected = {"/a", "/b", "/c"} + assert set(store.keys()) == expected + assert set(store) == expected + + def test_keys_ignore_hdf_softlink(self, setup_path): + + # GH 20523 + # Puts a softlink into HDF file and rereads + + with ensure_clean_store(setup_path) as store: + + df = DataFrame(dict(A=range(5), B=range(5))) + store.put("df", df) + + assert store.keys() == ["/df"] + + store._handle.create_soft_link(store._handle.root, "symlink", "df") + + # Should ignore the softlink + assert store.keys() == ["/df"] + + def test_iter_empty(self, setup_path): + + with ensure_clean_store(setup_path) as store: + # GH 12221 + assert list(store) == [] + + def test_repr(self, setup_path): + + with ensure_clean_store(setup_path) as store: + repr(store) + store.info() + store["a"] = tm.makeTimeSeries() + store["b"] = tm.makeStringSeries() + store["c"] = tm.makeDataFrame() + + df = tm.makeDataFrame() + df["obj1"] = "foo" + df["obj2"] = "bar" + df["bool1"] = df["A"] > 0 + df["bool2"] = df["B"] > 0 + df["bool3"] = True + df["int1"] = 1 + df["int2"] = 2 + df["timestamp1"] = Timestamp("20010102") + df["timestamp2"] = Timestamp("20010103") + df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) + df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) + df.loc[3:6, ["obj1"]] = np.nan + df = df._consolidate()._convert(datetime=True) + + with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) + store["df"] = df + + # make a random group in hdf space + store._handle.create_group(store._handle.root, "bah") + + assert store.filename in repr(store) + assert store.filename in str(store) + store.info() + + # storers + with ensure_clean_store(setup_path) as store: + + df = tm.makeDataFrame() + store.append("df", df) + + s = store.get_storer("df") + repr(s) + str(s) + + @ignore_natural_naming_warning + def test_contains(self, setup_path): + + with ensure_clean_store(setup_path) as store: + store["a"] = tm.makeTimeSeries() + store["b"] = tm.makeDataFrame() + store["foo/bar"] = tm.makeDataFrame() + assert "a" in store + assert "b" in store + assert "c" not in store + assert "foo/bar" in store + assert "/foo/bar" in store + assert "/foo/b" not in store + assert "bar" not in store + + # gh-2694: tables.NaturalNameWarning + with catch_warnings(record=True): + store["node())"] = tm.makeDataFrame() + assert "node())" in store + + def test_versioning(self, setup_path): + + with ensure_clean_store(setup_path) as store: + store["a"] = tm.makeTimeSeries() + store["b"] = tm.makeDataFrame() + df = tm.makeTimeDataFrame() + _maybe_remove(store, "df1") + store.append("df1", df[:10]) + store.append("df1", df[10:]) + assert store.root.a._v_attrs.pandas_version == "0.15.2" + assert store.root.b._v_attrs.pandas_version == "0.15.2" + assert store.root.df1._v_attrs.pandas_version == "0.15.2" + + # write a file and wipe its versioning + _maybe_remove(store, "df2") + store.append("df2", df) + + # this is an error because its table_type is appendable, but no + # version info + store.get_node("df2")._v_attrs.pandas_version = None + + msg = "'NoneType' object has no attribute 'startswith'" + + with pytest.raises(Exception, match=msg): + store.select("df2") + + def test_mode(self, setup_path): + + df = tm.makeTimeDataFrame() + + def check(mode): + + with ensure_clean_path(setup_path) as path: + + # constructor + if mode in ["r", "r+"]: + with pytest.raises(IOError): + HDFStore(path, mode=mode) + + else: + store = HDFStore(path, mode=mode) + assert store._handle.mode == mode + store.close() + + with ensure_clean_path(setup_path) as path: + + # context + if mode in ["r", "r+"]: + with pytest.raises(IOError): + with HDFStore(path, mode=mode) as store: # noqa + pass + else: + with HDFStore(path, mode=mode) as store: + assert store._handle.mode == mode + + with ensure_clean_path(setup_path) as path: + + # conv write + if mode in ["r", "r+"]: + with pytest.raises(IOError): + df.to_hdf(path, "df", mode=mode) + df.to_hdf(path, "df", mode="w") + else: + df.to_hdf(path, "df", mode=mode) + + # conv read + if mode in ["w"]: + msg = ( + "mode w is not allowed while performing a read. " + r"Allowed modes are r, r\+ and a." + ) + with pytest.raises(ValueError, match=msg): + read_hdf(path, "df", mode=mode) + else: + result = read_hdf(path, "df", mode=mode) + tm.assert_frame_equal(result, df) + + def check_default_mode(): + + # read_hdf uses default mode + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", mode="w") + result = read_hdf(path, "df") + tm.assert_frame_equal(result, df) + + check("r") + check("r+") + check("a") + check("w") + check_default_mode() + + def test_reopen_handle(self, setup_path): + + with ensure_clean_path(setup_path) as path: + + store = HDFStore(path, mode="a") + store["a"] = tm.makeTimeSeries() + + # invalid mode change + with pytest.raises(PossibleDataLossError): + store.open("w") + + store.close() + assert not store.is_open + + # truncation ok here + store.open("w") + assert store.is_open + assert len(store) == 0 + store.close() + assert not store.is_open + + store = HDFStore(path, mode="a") + store["a"] = tm.makeTimeSeries() + + # reopen as read + store.open("r") + assert store.is_open + assert len(store) == 1 + assert store._mode == "r" + store.close() + assert not store.is_open + + # reopen as append + store.open("a") + assert store.is_open + assert len(store) == 1 + assert store._mode == "a" + store.close() + assert not store.is_open + + # reopen as append (again) + store.open("a") + assert store.is_open + assert len(store) == 1 + assert store._mode == "a" + store.close() + assert not store.is_open + + def test_open_args(self, setup_path): + + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + + # create an in memory store + store = HDFStore( + path, mode="a", driver="H5FD_CORE", driver_core_backing_store=0 + ) + store["df"] = df + store.append("df2", df) + + tm.assert_frame_equal(store["df"], df) + tm.assert_frame_equal(store["df2"], df) + + store.close() + + # the file should not have actually been written + assert not os.path.exists(path) + + def test_flush(self, setup_path): + + with ensure_clean_store(setup_path) as store: + store["a"] = tm.makeTimeSeries() + store.flush() + store.flush(fsync=True) + + def test_get(self, setup_path): + + with ensure_clean_store(setup_path) as store: + store["a"] = tm.makeTimeSeries() + left = store.get("a") + right = store["a"] + tm.assert_series_equal(left, right) + + left = store.get("/a") + right = store["/a"] + tm.assert_series_equal(left, right) + + with pytest.raises(KeyError, match="'No object named b in the file'"): + store.get("b") + + @pytest.mark.parametrize( + "where, expected", + [ + ( + "/", + { + "": ({"first_group", "second_group"}, set()), + "/first_group": (set(), {"df1", "df2"}), + "/second_group": ({"third_group"}, {"df3", "s1"}), + "/second_group/third_group": (set(), {"df4"}), + }, + ), + ( + "/second_group", + { + "/second_group": ({"third_group"}, {"df3", "s1"}), + "/second_group/third_group": (set(), {"df4"}), + }, + ), + ], + ) + def test_walk(self, where, expected, setup_path): + # GH10143 + objs = { + "df1": pd.DataFrame([1, 2, 3]), + "df2": pd.DataFrame([4, 5, 6]), + "df3": pd.DataFrame([6, 7, 8]), + "df4": pd.DataFrame([9, 10, 11]), + "s1": pd.Series([10, 9, 8]), + # Next 3 items aren't pandas objects and should be ignored + "a1": np.array([[1, 2, 3], [4, 5, 6]]), + "tb1": np.array([(1, 2, 3), (4, 5, 6)], dtype="i,i,i"), + "tb2": np.array([(7, 8, 9), (10, 11, 12)], dtype="i,i,i"), + } + + with ensure_clean_store("walk_groups.hdf", mode="w") as store: + store.put("/first_group/df1", objs["df1"]) + store.put("/first_group/df2", objs["df2"]) + store.put("/second_group/df3", objs["df3"]) + store.put("/second_group/s1", objs["s1"]) + store.put("/second_group/third_group/df4", objs["df4"]) + # Create non-pandas objects + store._handle.create_array("/first_group", "a1", objs["a1"]) + store._handle.create_table("/first_group", "tb1", obj=objs["tb1"]) + store._handle.create_table("/second_group", "tb2", obj=objs["tb2"]) + + assert len(list(store.walk(where=where))) == len(expected) + for path, groups, leaves in store.walk(where=where): + assert path in expected + expected_groups, expected_frames = expected[path] + assert expected_groups == set(groups) + assert expected_frames == set(leaves) + for leaf in leaves: + frame_path = "/".join([path, leaf]) + obj = store.get(frame_path) + if "df" in leaf: + tm.assert_frame_equal(obj, objs[leaf]) + else: + tm.assert_series_equal(obj, objs[leaf]) + + def test_getattr(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + s = tm.makeTimeSeries() + store["a"] = s + + # test attribute access + result = store.a + tm.assert_series_equal(result, s) + result = getattr(store, "a") + tm.assert_series_equal(result, s) + + df = tm.makeTimeDataFrame() + store["df"] = df + result = store.df + tm.assert_frame_equal(result, df) + + # errors + for x in ["d", "mode", "path", "handle", "complib"]: + with pytest.raises(AttributeError): + getattr(store, x) + + # not stores + for x in ["mode", "path", "handle", "complib"]: + getattr(store, "_{x}".format(x=x)) + + def test_put(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + ts = tm.makeTimeSeries() + df = tm.makeTimeDataFrame() + store["a"] = ts + store["b"] = df[:10] + store["foo/bar/bah"] = df[:10] + store["foo"] = df[:10] + store["/foo"] = df[:10] + store.put("c", df[:10], format="table") + + # not OK, not a table + with pytest.raises(ValueError): + store.put("b", df[10:], append=True) + + # node does not currently exist, test _is_table_type returns False + # in this case + _maybe_remove(store, "f") + with pytest.raises(ValueError): + store.put("f", df[10:], append=True) + + # can't put to a table (use append instead) + with pytest.raises(ValueError): + store.put("c", df[10:], append=True) + + # overwrite table + store.put("c", df[:10], format="table", append=False) + tm.assert_frame_equal(df[:10], store["c"]) + + def test_put_string_index(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + index = Index( + ["I am a very long string index: {i}".format(i=i) for i in range(20)] + ) + s = Series(np.arange(20), index=index) + df = DataFrame({"A": s, "B": s}) + + store["a"] = s + tm.assert_series_equal(store["a"], s) + + store["b"] = df + tm.assert_frame_equal(store["b"], df) + + # mixed length + index = Index( + ["abcdefghijklmnopqrstuvwxyz1234567890"] + + ["I am a very long string index: {i}".format(i=i) for i in range(20)] + ) + s = Series(np.arange(21), index=index) + df = DataFrame({"A": s, "B": s}) + store["a"] = s + tm.assert_series_equal(store["a"], s) + + store["b"] = df + tm.assert_frame_equal(store["b"], df) + + def test_put_compression(self, setup_path): + + with ensure_clean_store(setup_path) as store: + df = tm.makeTimeDataFrame() + + store.put("c", df, format="table", complib="zlib") + tm.assert_frame_equal(store["c"], df) + + # can't compress if format='fixed' + with pytest.raises(ValueError): + store.put("b", df, format="fixed", complib="zlib") + + @td.skip_if_windows_python_3 + def test_put_compression_blosc(self, setup_path): + df = tm.makeTimeDataFrame() + + with ensure_clean_store(setup_path) as store: + + # can't compress if format='fixed' + with pytest.raises(ValueError): + store.put("b", df, format="fixed", complib="blosc") + + store.put("c", df, format="table", complib="blosc") + tm.assert_frame_equal(store["c"], df) + + def test_complibs_default_settings(self, setup_path): + # GH15943 + df = tm.makeDataFrame() + + # Set complevel and check if complib is automatically set to + # default value + with ensure_clean_path(setup_path) as tmpfile: + df.to_hdf(tmpfile, "df", complevel=9) + result = pd.read_hdf(tmpfile, "df") + tm.assert_frame_equal(result, df) + + with tables.open_file(tmpfile, mode="r") as h5file: + for node in h5file.walk_nodes(where="/df", classname="Leaf"): + assert node.filters.complevel == 9 + assert node.filters.complib == "zlib" + + # Set complib and check to see if compression is disabled + with ensure_clean_path(setup_path) as tmpfile: + df.to_hdf(tmpfile, "df", complib="zlib") + result = pd.read_hdf(tmpfile, "df") + tm.assert_frame_equal(result, df) + + with tables.open_file(tmpfile, mode="r") as h5file: + for node in h5file.walk_nodes(where="/df", classname="Leaf"): + assert node.filters.complevel == 0 + assert node.filters.complib is None + + # Check if not setting complib or complevel results in no compression + with ensure_clean_path(setup_path) as tmpfile: + df.to_hdf(tmpfile, "df") + result = pd.read_hdf(tmpfile, "df") + tm.assert_frame_equal(result, df) + + with tables.open_file(tmpfile, mode="r") as h5file: + for node in h5file.walk_nodes(where="/df", classname="Leaf"): + assert node.filters.complevel == 0 + assert node.filters.complib is None + + # Check if file-defaults can be overridden on a per table basis + with ensure_clean_path(setup_path) as tmpfile: + store = pd.HDFStore(tmpfile) + store.append("dfc", df, complevel=9, complib="blosc") + store.append("df", df) + store.close() + + with tables.open_file(tmpfile, mode="r") as h5file: + for node in h5file.walk_nodes(where="/df", classname="Leaf"): + assert node.filters.complevel == 0 + assert node.filters.complib is None + for node in h5file.walk_nodes(where="/dfc", classname="Leaf"): + assert node.filters.complevel == 9 + assert node.filters.complib == "blosc" + + def test_complibs(self, setup_path): + # GH14478 + df = tm.makeDataFrame() + + # Building list of all complibs and complevels tuples + all_complibs = tables.filters.all_complibs + # Remove lzo if its not available on this platform + if not tables.which_lib_version("lzo"): + all_complibs.remove("lzo") + # Remove bzip2 if its not available on this platform + if not tables.which_lib_version("bzip2"): + all_complibs.remove("bzip2") + + all_levels = range(0, 10) + all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels] + + for (lib, lvl) in all_tests: + with ensure_clean_path(setup_path) as tmpfile: + gname = "foo" + + # Write and read file to see if data is consistent + df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl) + result = pd.read_hdf(tmpfile, gname) + tm.assert_frame_equal(result, df) + + # Open file and check metadata + # for correct amount of compression + h5table = tables.open_file(tmpfile, mode="r") + for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"): + assert node.filters.complevel == lvl + if lvl == 0: + assert node.filters.complib is None + else: + assert node.filters.complib == lib + h5table.close() + + def test_put_integer(self, setup_path): + # non-date, non-string index + df = DataFrame(np.random.randn(50, 100)) + self._check_roundtrip(df, tm.assert_frame_equal, setup_path) + + @td.xfail_non_writeable + def test_put_mixed_type(self, setup_path): + df = tm.makeTimeDataFrame() + df["obj1"] = "foo" + df["obj2"] = "bar" + df["bool1"] = df["A"] > 0 + df["bool2"] = df["B"] > 0 + df["bool3"] = True + df["int1"] = 1 + df["int2"] = 2 + df["timestamp1"] = Timestamp("20010102") + df["timestamp2"] = Timestamp("20010103") + df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) + df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) + df.loc[3:6, ["obj1"]] = np.nan + df = df._consolidate()._convert(datetime=True) + + with ensure_clean_store(setup_path) as store: + _maybe_remove(store, "df") + + # PerformanceWarning + with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) + store.put("df", df) + + expected = store.get("df") + tm.assert_frame_equal(expected, df) + + @pytest.mark.filterwarnings( + "ignore:object name:tables.exceptions.NaturalNameWarning" + ) + def test_append(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + # this is allowed by almost always don't want to do it + # tables.NaturalNameWarning): + with catch_warnings(record=True): + + df = tm.makeTimeDataFrame() + _maybe_remove(store, "df1") + store.append("df1", df[:10]) + store.append("df1", df[10:]) + tm.assert_frame_equal(store["df1"], df) + + _maybe_remove(store, "df2") + store.put("df2", df[:10], format="table") + store.append("df2", df[10:]) + tm.assert_frame_equal(store["df2"], df) + + _maybe_remove(store, "df3") + store.append("/df3", df[:10]) + store.append("/df3", df[10:]) + tm.assert_frame_equal(store["df3"], df) + + # this is allowed by almost always don't want to do it + # tables.NaturalNameWarning + _maybe_remove(store, "/df3 foo") + store.append("/df3 foo", df[:10]) + store.append("/df3 foo", df[10:]) + tm.assert_frame_equal(store["df3 foo"], df) + + # dtype issues - mizxed type in a single object column + df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]]) + df["mixed_column"] = "testing" + df.loc[2, "mixed_column"] = np.nan + _maybe_remove(store, "df") + store.append("df", df) + tm.assert_frame_equal(store["df"], df) + + # uints - test storage of uints + uint_data = DataFrame( + { + "u08": Series( + np.random.randint(0, high=255, size=5), dtype=np.uint8 + ), + "u16": Series( + np.random.randint(0, high=65535, size=5), dtype=np.uint16 + ), + "u32": Series( + np.random.randint(0, high=2 ** 30, size=5), dtype=np.uint32 + ), + "u64": Series( + [2 ** 58, 2 ** 59, 2 ** 60, 2 ** 61, 2 ** 62], + dtype=np.uint64, + ), + }, + index=np.arange(5), + ) + _maybe_remove(store, "uints") + store.append("uints", uint_data) + tm.assert_frame_equal(store["uints"], uint_data) + + # uints - test storage of uints in indexable columns + _maybe_remove(store, "uints") + # 64-bit indices not yet supported + store.append("uints", uint_data, data_columns=["u08", "u16", "u32"]) + tm.assert_frame_equal(store["uints"], uint_data) + + def test_append_series(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + # basic + ss = tm.makeStringSeries() + ts = tm.makeTimeSeries() + ns = Series(np.arange(100)) + + store.append("ss", ss) + result = store["ss"] + tm.assert_series_equal(result, ss) + assert result.name is None + + store.append("ts", ts) + result = store["ts"] + tm.assert_series_equal(result, ts) + assert result.name is None + + ns.name = "foo" + store.append("ns", ns) + result = store["ns"] + tm.assert_series_equal(result, ns) + assert result.name == ns.name + + # select on the values + expected = ns[ns > 60] + result = store.select("ns", "foo>60") + tm.assert_series_equal(result, expected) + + # select on the index and values + expected = ns[(ns > 70) & (ns.index < 90)] + result = store.select("ns", "foo>70 and index<90") + tm.assert_series_equal(result, expected) + + # multi-index + mi = DataFrame(np.random.randn(5, 1), columns=["A"]) + mi["B"] = np.arange(len(mi)) + mi["C"] = "foo" + mi.loc[3:5, "C"] = "bar" + mi.set_index(["C", "B"], inplace=True) + s = mi.stack() + s.index = s.index.droplevel(2) + store.append("mi", s) + tm.assert_series_equal(store["mi"], s) + + def test_store_index_types(self, setup_path): + # GH5386 + # test storing various index types + + with ensure_clean_store(setup_path) as store: + + def check(format, index): + df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df.index = index(len(df)) + + _maybe_remove(store, "df") + store.put("df", df, format=format) + tm.assert_frame_equal(df, store["df"]) + + for index in [ + tm.makeFloatIndex, + tm.makeStringIndex, + tm.makeIntIndex, + tm.makeDateIndex, + ]: + + check("table", index) + check("fixed", index) + + # period index currently broken for table + # seee GH7796 FIXME + check("fixed", tm.makePeriodIndex) + # check('table',tm.makePeriodIndex) + + # unicode + index = tm.makeUnicodeIndex + check("table", index) + check("fixed", index) + + @pytest.mark.skipif( + not is_platform_little_endian(), reason="reason platform is not little endian" + ) + def test_encoding(self, setup_path): + + with ensure_clean_store(setup_path) as store: + df = DataFrame(dict(A="foo", B="bar"), index=range(5)) + df.loc[2, "A"] = np.nan + df.loc[3, "B"] = np.nan + _maybe_remove(store, "df") + store.append("df", df, encoding="ascii") + tm.assert_frame_equal(store["df"], df) + + expected = df.reindex(columns=["A"]) + result = store.select("df", Term("columns=A", encoding="ascii")) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "val", + [ + [b"E\xc9, 17", b"", b"a", b"b", b"c"], + [b"E\xc9, 17", b"a", b"b", b"c"], + [b"EE, 17", b"", b"a", b"b", b"c"], + [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"], + [b"", b"a", b"b", b"c"], + [b"\xf8\xfc", b"a", b"b", b"c"], + [b"A\xf8\xfc", b"", b"a", b"b", b"c"], + [np.nan, b"", b"b", b"c"], + [b"A\xf8\xfc", np.nan, b"", b"b", b"c"], + ], + ) + @pytest.mark.parametrize("dtype", ["category", object]) + def test_latin_encoding(self, setup_path, dtype, val): + enc = "latin-1" + nan_rep = "" + key = "data" + + val = [x.decode(enc) if isinstance(x, bytes) else x for x in val] + ser = pd.Series(val, dtype=dtype) + + with ensure_clean_path(setup_path) as store: + ser.to_hdf(store, key, format="table", encoding=enc, nan_rep=nan_rep) + retr = read_hdf(store, key) + + s_nan = ser.replace(nan_rep, np.nan) + + if is_categorical_dtype(s_nan): + assert is_categorical_dtype(retr) + tm.assert_series_equal( + s_nan, retr, check_dtype=False, check_categorical=False + ) + else: + tm.assert_series_equal(s_nan, retr) + + # FIXME: don't leave commented-out + # fails: + # for x in examples: + # roundtrip(s, nan_rep=b'\xf8\xfc') + + def test_append_some_nans(self, setup_path): + + with ensure_clean_store(setup_path) as store: + df = DataFrame( + { + "A": Series(np.random.randn(20)).astype("int32"), + "A1": np.random.randn(20), + "A2": np.random.randn(20), + "B": "foo", + "C": "bar", + "D": Timestamp("20010101"), + "E": datetime.datetime(2001, 1, 2, 0, 0), + }, + index=np.arange(20), + ) + # some nans + _maybe_remove(store, "df1") + df.loc[0:15, ["A1", "B", "D", "E"]] = np.nan + store.append("df1", df[:10]) + store.append("df1", df[10:]) + tm.assert_frame_equal(store["df1"], df) + + # first column + df1 = df.copy() + df1.loc[:, "A1"] = np.nan + _maybe_remove(store, "df1") + store.append("df1", df1[:10]) + store.append("df1", df1[10:]) + tm.assert_frame_equal(store["df1"], df1) + + # 2nd column + df2 = df.copy() + df2.loc[:, "A2"] = np.nan + _maybe_remove(store, "df2") + store.append("df2", df2[:10]) + store.append("df2", df2[10:]) + tm.assert_frame_equal(store["df2"], df2) + + # datetimes + df3 = df.copy() + df3.loc[:, "E"] = np.nan + _maybe_remove(store, "df3") + store.append("df3", df3[:10]) + store.append("df3", df3[10:]) + tm.assert_frame_equal(store["df3"], df3) + + def test_append_all_nans(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + df = DataFrame( + {"A1": np.random.randn(20), "A2": np.random.randn(20)}, + index=np.arange(20), + ) + df.loc[0:15, :] = np.nan + + # nan some entire rows (dropna=True) + _maybe_remove(store, "df") + store.append("df", df[:10], dropna=True) + store.append("df", df[10:], dropna=True) + tm.assert_frame_equal(store["df"], df[-4:]) + + # nan some entire rows (dropna=False) + _maybe_remove(store, "df2") + store.append("df2", df[:10], dropna=False) + store.append("df2", df[10:], dropna=False) + tm.assert_frame_equal(store["df2"], df) + + # tests the option io.hdf.dropna_table + pd.set_option("io.hdf.dropna_table", False) + _maybe_remove(store, "df3") + store.append("df3", df[:10]) + store.append("df3", df[10:]) + tm.assert_frame_equal(store["df3"], df) + + pd.set_option("io.hdf.dropna_table", True) + _maybe_remove(store, "df4") + store.append("df4", df[:10]) + store.append("df4", df[10:]) + tm.assert_frame_equal(store["df4"], df[-4:]) + + # nan some entire rows (string are still written!) + df = DataFrame( + { + "A1": np.random.randn(20), + "A2": np.random.randn(20), + "B": "foo", + "C": "bar", + }, + index=np.arange(20), + ) + + df.loc[0:15, :] = np.nan + + _maybe_remove(store, "df") + store.append("df", df[:10], dropna=True) + store.append("df", df[10:], dropna=True) + tm.assert_frame_equal(store["df"], df) + + _maybe_remove(store, "df2") + store.append("df2", df[:10], dropna=False) + store.append("df2", df[10:], dropna=False) + tm.assert_frame_equal(store["df2"], df) + + # nan some entire rows (but since we have dates they are still + # written!) + df = DataFrame( + { + "A1": np.random.randn(20), + "A2": np.random.randn(20), + "B": "foo", + "C": "bar", + "D": Timestamp("20010101"), + "E": datetime.datetime(2001, 1, 2, 0, 0), + }, + index=np.arange(20), + ) + + df.loc[0:15, :] = np.nan + + _maybe_remove(store, "df") + store.append("df", df[:10], dropna=True) + store.append("df", df[10:], dropna=True) + tm.assert_frame_equal(store["df"], df) + + _maybe_remove(store, "df2") + store.append("df2", df[:10], dropna=False) + store.append("df2", df[10:], dropna=False) + tm.assert_frame_equal(store["df2"], df) + + # Test to make sure defaults are to not drop. + # Corresponding to Issue 9382 + df_with_missing = DataFrame( + {"col1": [0, np.nan, 2], "col2": [1, np.nan, np.nan]} + ) + + with ensure_clean_path(setup_path) as path: + df_with_missing.to_hdf(path, "df_with_missing", format="table") + reloaded = read_hdf(path, "df_with_missing") + tm.assert_frame_equal(df_with_missing, reloaded) + + def test_read_missing_key_close_store(self, setup_path): + # GH 25766 + with ensure_clean_path(setup_path) as path: + df = pd.DataFrame({"a": range(2), "b": range(2)}) + df.to_hdf(path, "k1") + + with pytest.raises(KeyError, match="'No object named k2 in the file'"): + pd.read_hdf(path, "k2") + + # smoke test to test that file is properly closed after + # read with KeyError before another write + df.to_hdf(path, "k2") + + def test_read_missing_key_opened_store(self, setup_path): + # GH 28699 + with ensure_clean_path(setup_path) as path: + df = pd.DataFrame({"a": range(2), "b": range(2)}) + df.to_hdf(path, "k1") + + store = pd.HDFStore(path, "r") + + with pytest.raises(KeyError, match="'No object named k2 in the file'"): + pd.read_hdf(store, "k2") + + # Test that the file is still open after a KeyError and that we can + # still read from it. + pd.read_hdf(store, "k1") + + def test_append_frame_column_oriented(self, setup_path): + with ensure_clean_store(setup_path) as store: + + # column oriented + df = tm.makeTimeDataFrame() + _maybe_remove(store, "df1") + store.append("df1", df.iloc[:, :2], axes=["columns"]) + store.append("df1", df.iloc[:, 2:]) + tm.assert_frame_equal(store["df1"], df) + + result = store.select("df1", "columns=A") + expected = df.reindex(columns=["A"]) + tm.assert_frame_equal(expected, result) + + # selection on the non-indexable + result = store.select("df1", ("columns=A", "index=df.index[0:4]")) + expected = df.reindex(columns=["A"], index=df.index[0:4]) + tm.assert_frame_equal(expected, result) + + # this isn't supported + with pytest.raises(TypeError): + store.select("df1", "columns=A and index>df.index[4]") + + def test_append_with_different_block_ordering(self, setup_path): + + # GH 4096; using same frames, but different block orderings + with ensure_clean_store(setup_path) as store: + + for i in range(10): + + df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df["index"] = range(10) + df["index"] += i * 10 + df["int64"] = Series([1] * len(df), dtype="int64") + df["int16"] = Series([1] * len(df), dtype="int16") + + if i % 2 == 0: + del df["int64"] + df["int64"] = Series([1] * len(df), dtype="int64") + if i % 3 == 0: + a = df.pop("A") + df["A"] = a + + df.set_index("index", inplace=True) + + store.append("df", df) + + # test a different ordering but with more fields (like invalid + # combinate) + with ensure_clean_store(setup_path) as store: + + df = DataFrame(np.random.randn(10, 2), columns=list("AB"), dtype="float64") + df["int64"] = Series([1] * len(df), dtype="int64") + df["int16"] = Series([1] * len(df), dtype="int16") + store.append("df", df) + + # store additional fields in different blocks + df["int16_2"] = Series([1] * len(df), dtype="int16") + with pytest.raises(ValueError): + store.append("df", df) + + # store multiple additional fields in different blocks + df["float_3"] = Series([1.0] * len(df), dtype="float64") + with pytest.raises(ValueError): + store.append("df", df) + + def test_append_with_strings(self, setup_path): + + with ensure_clean_store(setup_path) as store: + with catch_warnings(record=True): + + def check_col(key, name, size): + assert ( + getattr(store.get_storer(key).table.description, name).itemsize + == size + ) + + # avoid truncation on elements + df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]]) + store.append("df_big", df) + tm.assert_frame_equal(store.select("df_big"), df) + check_col("df_big", "values_block_1", 15) + + # appending smaller string ok + df2 = DataFrame([[124, "asdqy"], [346, "dggnhefbdfb"]]) + store.append("df_big", df2) + expected = concat([df, df2]) + tm.assert_frame_equal(store.select("df_big"), expected) + check_col("df_big", "values_block_1", 15) + + # avoid truncation on elements + df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]]) + store.append("df_big2", df, min_itemsize={"values": 50}) + tm.assert_frame_equal(store.select("df_big2"), df) + check_col("df_big2", "values_block_1", 50) + + # bigger string on next append + store.append("df_new", df) + df_new = DataFrame( + [[124, "abcdefqhij"], [346, "abcdefghijklmnopqrtsuvwxyz"]] + ) + with pytest.raises(ValueError): + store.append("df_new", df_new) + + # min_itemsize on Series index (GH 11412) + df = tm.makeMixedDataFrame().set_index("C") + store.append("ss", df["B"], min_itemsize={"index": 4}) + tm.assert_series_equal(store.select("ss"), df["B"]) + + # same as above, with data_columns=True + store.append( + "ss2", df["B"], data_columns=True, min_itemsize={"index": 4} + ) + tm.assert_series_equal(store.select("ss2"), df["B"]) + + # min_itemsize in index without appending (GH 10381) + store.put("ss3", df, format="table", min_itemsize={"index": 6}) + # just make sure there is a longer string: + df2 = df.copy().reset_index().assign(C="longer").set_index("C") + store.append("ss3", df2) + tm.assert_frame_equal(store.select("ss3"), pd.concat([df, df2])) + + # same as above, with a Series + store.put("ss4", df["B"], format="table", min_itemsize={"index": 6}) + store.append("ss4", df2["B"]) + tm.assert_series_equal( + store.select("ss4"), pd.concat([df["B"], df2["B"]]) + ) + + # with nans + _maybe_remove(store, "df") + df = tm.makeTimeDataFrame() + df["string"] = "foo" + df.loc[1:4, "string"] = np.nan + df["string2"] = "bar" + df.loc[4:8, "string2"] = np.nan + df["string3"] = "bah" + df.loc[1:, "string3"] = np.nan + store.append("df", df) + result = store.select("df") + tm.assert_frame_equal(result, df) + + with ensure_clean_store(setup_path) as store: + + def check_col(key, name, size): + assert getattr( + store.get_storer(key).table.description, name + ).itemsize, size + + df = DataFrame(dict(A="foo", B="bar"), index=range(10)) + + # a min_itemsize that creates a data_column + _maybe_remove(store, "df") + store.append("df", df, min_itemsize={"A": 200}) + check_col("df", "A", 200) + assert store.get_storer("df").data_columns == ["A"] + + # a min_itemsize that creates a data_column2 + _maybe_remove(store, "df") + store.append("df", df, data_columns=["B"], min_itemsize={"A": 200}) + check_col("df", "A", 200) + assert store.get_storer("df").data_columns == ["B", "A"] + + # a min_itemsize that creates a data_column2 + _maybe_remove(store, "df") + store.append("df", df, data_columns=["B"], min_itemsize={"values": 200}) + check_col("df", "B", 200) + check_col("df", "values_block_0", 200) + assert store.get_storer("df").data_columns == ["B"] + + # infer the .typ on subsequent appends + _maybe_remove(store, "df") + store.append("df", df[:5], min_itemsize=200) + store.append("df", df[5:], min_itemsize=200) + tm.assert_frame_equal(store["df"], df) + + # invalid min_itemsize keys + df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"]) + _maybe_remove(store, "df") + with pytest.raises(ValueError): + store.append("df", df, min_itemsize={"foo": 20, "foobar": 20}) + + def test_append_with_empty_string(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + # with all empty strings (GH 12242) + df = DataFrame({"x": ["a", "b", "c", "d", "e", "f", ""]}) + store.append("df", df[:-1], min_itemsize={"x": 1}) + store.append("df", df[-1:], min_itemsize={"x": 1}) + tm.assert_frame_equal(store.select("df"), df) + + def test_to_hdf_with_min_itemsize(self, setup_path): + + with ensure_clean_path(setup_path) as path: + + # min_itemsize in index with to_hdf (GH 10381) + df = tm.makeMixedDataFrame().set_index("C") + df.to_hdf(path, "ss3", format="table", min_itemsize={"index": 6}) + # just make sure there is a longer string: + df2 = df.copy().reset_index().assign(C="longer").set_index("C") + df2.to_hdf(path, "ss3", append=True, format="table") + tm.assert_frame_equal(pd.read_hdf(path, "ss3"), pd.concat([df, df2])) + + # same as above, with a Series + df["B"].to_hdf(path, "ss4", format="table", min_itemsize={"index": 6}) + df2["B"].to_hdf(path, "ss4", append=True, format="table") + tm.assert_series_equal( + pd.read_hdf(path, "ss4"), pd.concat([df["B"], df2["B"]]) + ) + + @pytest.mark.parametrize( + "format", [pytest.param("fixed", marks=td.xfail_non_writeable), "table"] + ) + def test_to_hdf_errors(self, format, setup_path): + + data = ["\ud800foo"] + ser = pd.Series(data, index=pd.Index(data)) + with ensure_clean_path(setup_path) as path: + # GH 20835 + ser.to_hdf(path, "table", format=format, errors="surrogatepass") + + result = pd.read_hdf(path, "table", errors="surrogatepass") + tm.assert_series_equal(result, ser) + + def test_append_with_data_columns(self, setup_path): + + with ensure_clean_store(setup_path) as store: + df = tm.makeTimeDataFrame() + df.iloc[0, df.columns.get_loc("B")] = 1.0 + _maybe_remove(store, "df") + store.append("df", df[:2], data_columns=["B"]) + store.append("df", df[2:]) + tm.assert_frame_equal(store["df"], df) + + # check that we have indices created + assert store._handle.root.df.table.cols.index.is_indexed is True + assert store._handle.root.df.table.cols.B.is_indexed is True + + # data column searching + result = store.select("df", "B>0") + expected = df[df.B > 0] + tm.assert_frame_equal(result, expected) + + # data column searching (with an indexable and a data_columns) + result = store.select("df", "B>0 and index>df.index[3]") + df_new = df.reindex(index=df.index[4:]) + expected = df_new[df_new.B > 0] + tm.assert_frame_equal(result, expected) + + # data column selection with a string data_column + df_new = df.copy() + df_new["string"] = "foo" + df_new.loc[1:4, "string"] = np.nan + df_new.loc[5:6, "string"] = "bar" + _maybe_remove(store, "df") + store.append("df", df_new, data_columns=["string"]) + result = store.select("df", "string='foo'") + expected = df_new[df_new.string == "foo"] + tm.assert_frame_equal(result, expected) + + # using min_itemsize and a data column + def check_col(key, name, size): + assert ( + getattr(store.get_storer(key).table.description, name).itemsize + == size + ) + + with ensure_clean_store(setup_path) as store: + _maybe_remove(store, "df") + store.append( + "df", df_new, data_columns=["string"], min_itemsize={"string": 30} + ) + check_col("df", "string", 30) + _maybe_remove(store, "df") + store.append("df", df_new, data_columns=["string"], min_itemsize=30) + check_col("df", "string", 30) + _maybe_remove(store, "df") + store.append( + "df", df_new, data_columns=["string"], min_itemsize={"values": 30} + ) + check_col("df", "string", 30) + + with ensure_clean_store(setup_path) as store: + df_new["string2"] = "foobarbah" + df_new["string_block1"] = "foobarbah1" + df_new["string_block2"] = "foobarbah2" + _maybe_remove(store, "df") + store.append( + "df", + df_new, + data_columns=["string", "string2"], + min_itemsize={"string": 30, "string2": 40, "values": 50}, + ) + check_col("df", "string", 30) + check_col("df", "string2", 40) + check_col("df", "values_block_1", 50) + + with ensure_clean_store(setup_path) as store: + # multiple data columns + df_new = df.copy() + df_new.iloc[0, df_new.columns.get_loc("A")] = 1.0 + df_new.iloc[0, df_new.columns.get_loc("B")] = -1.0 + df_new["string"] = "foo" + + sl = df_new.columns.get_loc("string") + df_new.iloc[1:4, sl] = np.nan + df_new.iloc[5:6, sl] = "bar" + + df_new["string2"] = "foo" + sl = df_new.columns.get_loc("string2") + df_new.iloc[2:5, sl] = np.nan + df_new.iloc[7:8, sl] = "bar" + _maybe_remove(store, "df") + store.append("df", df_new, data_columns=["A", "B", "string", "string2"]) + result = store.select( + "df", "string='foo' and string2='foo' and A>0 and B<0" + ) + expected = df_new[ + (df_new.string == "foo") + & (df_new.string2 == "foo") + & (df_new.A > 0) + & (df_new.B < 0) + ] + tm.assert_frame_equal(result, expected, check_index_type=False) + + # yield an empty frame + result = store.select("df", "string='foo' and string2='cool'") + expected = df_new[(df_new.string == "foo") & (df_new.string2 == "cool")] + tm.assert_frame_equal(result, expected, check_index_type=False) + + with ensure_clean_store(setup_path) as store: + # doc example + df_dc = df.copy() + df_dc["string"] = "foo" + df_dc.loc[4:6, "string"] = np.nan + df_dc.loc[7:9, "string"] = "bar" + df_dc["string2"] = "cool" + df_dc["datetime"] = Timestamp("20010102") + df_dc = df_dc._convert(datetime=True) + df_dc.loc[3:5, ["A", "B", "datetime"]] = np.nan + + _maybe_remove(store, "df_dc") + store.append( + "df_dc", df_dc, data_columns=["B", "C", "string", "string2", "datetime"] + ) + result = store.select("df_dc", "B>0") + + expected = df_dc[df_dc.B > 0] + tm.assert_frame_equal(result, expected, check_index_type=False) + + result = store.select("df_dc", ["B > 0", "C > 0", "string == foo"]) + expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")] + tm.assert_frame_equal(result, expected, check_index_type=False) + + with ensure_clean_store(setup_path) as store: + # doc example part 2 + np.random.seed(1234) + index = date_range("1/1/2000", periods=8) + df_dc = DataFrame( + np.random.randn(8, 3), index=index, columns=["A", "B", "C"] + ) + df_dc["string"] = "foo" + df_dc.loc[4:6, "string"] = np.nan + df_dc.loc[7:9, "string"] = "bar" + df_dc.loc[:, ["B", "C"]] = df_dc.loc[:, ["B", "C"]].abs() + df_dc["string2"] = "cool" + + # on-disk operations + store.append("df_dc", df_dc, data_columns=["B", "C", "string", "string2"]) + + result = store.select("df_dc", "B>0") + expected = df_dc[df_dc.B > 0] + tm.assert_frame_equal(result, expected) + + result = store.select("df_dc", ["B > 0", "C > 0", 'string == "foo"']) + expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")] + tm.assert_frame_equal(result, expected) + + def test_create_table_index(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + with catch_warnings(record=True): + + def col(t, column): + return getattr(store.get_storer(t).table.cols, column) + + # data columns + df = tm.makeTimeDataFrame() + df["string"] = "foo" + df["string2"] = "bar" + store.append("f", df, data_columns=["string", "string2"]) + assert col("f", "index").is_indexed is True + assert col("f", "string").is_indexed is True + assert col("f", "string2").is_indexed is True + + # specify index=columns + store.append( + "f2", df, index=["string"], data_columns=["string", "string2"] + ) + assert col("f2", "index").is_indexed is False + assert col("f2", "string").is_indexed is True + assert col("f2", "string2").is_indexed is False + + # try to index a non-table + _maybe_remove(store, "f2") + store.put("f2", df) + with pytest.raises(TypeError): + store.create_table_index("f2") + + def test_append_hierarchical(self, setup_path): + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["foo", "bar"], + ) + df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) + + with ensure_clean_store(setup_path) as store: + store.append("mi", df) + result = store.select("mi") + tm.assert_frame_equal(result, df) + + # GH 3748 + result = store.select("mi", columns=["A", "B"]) + expected = df.reindex(columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + with ensure_clean_path("test.hdf") as path: + df.to_hdf(path, "df", format="table") + result = read_hdf(path, "df", columns=["A", "B"]) + expected = df.reindex(columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + def test_column_multiindex(self, setup_path): + # GH 4710 + # recreate multi-indexes properly + + index = MultiIndex.from_tuples( + [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")], names=["first", "second"] + ) + df = DataFrame(np.arange(12).reshape(3, 4), columns=index) + expected = df.copy() + if isinstance(expected.index, RangeIndex): + expected.index = Int64Index(expected.index) + + with ensure_clean_store(setup_path) as store: + + store.put("df", df) + tm.assert_frame_equal( + store["df"], expected, check_index_type=True, check_column_type=True + ) + + store.put("df1", df, format="table") + tm.assert_frame_equal( + store["df1"], expected, check_index_type=True, check_column_type=True + ) + + with pytest.raises(ValueError): + store.put("df2", df, format="table", data_columns=["A"]) + with pytest.raises(ValueError): + store.put("df3", df, format="table", data_columns=True) + + # appending multi-column on existing table (see GH 6167) + with ensure_clean_store(setup_path) as store: + store.append("df2", df) + store.append("df2", df) + + tm.assert_frame_equal(store["df2"], concat((df, df))) + + # non_index_axes name + df = DataFrame( + np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo") + ) + expected = df.copy() + if isinstance(expected.index, RangeIndex): + expected.index = Int64Index(expected.index) + + with ensure_clean_store(setup_path) as store: + + store.put("df1", df, format="table") + tm.assert_frame_equal( + store["df1"], expected, check_index_type=True, check_column_type=True + ) + + def test_store_multiindex(self, setup_path): + + # validate multi-index names + # GH 5527 + with ensure_clean_store(setup_path) as store: + + def make_index(names=None): + return MultiIndex.from_tuples( + [ + (datetime.datetime(2013, 12, d), s, t) + for d in range(1, 3) + for s in range(2) + for t in range(3) + ], + names=names, + ) + + # no names + _maybe_remove(store, "df") + df = DataFrame(np.zeros((12, 2)), columns=["a", "b"], index=make_index()) + store.append("df", df) + tm.assert_frame_equal(store.select("df"), df) + + # partial names + _maybe_remove(store, "df") + df = DataFrame( + np.zeros((12, 2)), + columns=["a", "b"], + index=make_index(["date", None, None]), + ) + store.append("df", df) + tm.assert_frame_equal(store.select("df"), df) + + # series + _maybe_remove(store, "s") + s = Series(np.zeros(12), index=make_index(["date", None, None])) + store.append("s", s) + xp = Series(np.zeros(12), index=make_index(["date", "level_1", "level_2"])) + tm.assert_series_equal(store.select("s"), xp) + + # dup with column + _maybe_remove(store, "df") + df = DataFrame( + np.zeros((12, 2)), + columns=["a", "b"], + index=make_index(["date", "a", "t"]), + ) + with pytest.raises(ValueError): + store.append("df", df) + + # dup within level + _maybe_remove(store, "df") + df = DataFrame( + np.zeros((12, 2)), + columns=["a", "b"], + index=make_index(["date", "date", "date"]), + ) + with pytest.raises(ValueError): + store.append("df", df) + + # fully names + _maybe_remove(store, "df") + df = DataFrame( + np.zeros((12, 2)), + columns=["a", "b"], + index=make_index(["date", "s", "t"]), + ) + store.append("df", df) + tm.assert_frame_equal(store.select("df"), df) + + def test_select_columns_in_where(self, setup_path): + + # GH 6169 + # recreate multi-indexes when columns is passed + # in the `where` argument + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["foo_name", "bar_name"], + ) + + # With a DataFrame + df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) + + with ensure_clean_store(setup_path) as store: + store.put("df", df, format="table") + expected = df[["A"]] + + tm.assert_frame_equal(store.select("df", columns=["A"]), expected) + + tm.assert_frame_equal(store.select("df", where="columns=['A']"), expected) + + # With a Series + s = Series(np.random.randn(10), index=index, name="A") + with ensure_clean_store(setup_path) as store: + store.put("s", s, format="table") + tm.assert_series_equal(store.select("s", where="columns=['A']"), s) + + def test_mi_data_columns(self, setup_path): + # GH 14435 + idx = pd.MultiIndex.from_arrays( + [date_range("2000-01-01", periods=5), range(5)], names=["date", "id"] + ) + df = pd.DataFrame({"a": [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx) + + with ensure_clean_store(setup_path) as store: + store.append("df", df, data_columns=True) + + actual = store.select("df", where="id == 1") + expected = df.iloc[[1], :] + tm.assert_frame_equal(actual, expected) + + def test_pass_spec_to_storer(self, setup_path): + + df = tm.makeDataFrame() + + with ensure_clean_store(setup_path) as store: + store.put("df", df) + with pytest.raises(TypeError): + store.select("df", columns=["A"]) + with pytest.raises(TypeError): + store.select("df", where=[("columns=A")]) + + @td.xfail_non_writeable + def test_append_misc(self, setup_path): + + with ensure_clean_store(setup_path) as store: + df = tm.makeDataFrame() + store.append("df", df, chunksize=1) + result = store.select("df") + tm.assert_frame_equal(result, df) + + store.append("df1", df, expectedrows=10) + result = store.select("df1") + tm.assert_frame_equal(result, df) + + # more chunksize in append tests + def check(obj, comparator): + for c in [10, 200, 1000]: + with ensure_clean_store(setup_path, mode="w") as store: + store.append("obj", obj, chunksize=c) + result = store.select("obj") + comparator(result, obj) + + df = tm.makeDataFrame() + df["string"] = "foo" + df["float322"] = 1.0 + df["float322"] = df["float322"].astype("float32") + df["bool"] = df["float322"] > 0 + df["time1"] = Timestamp("20130101") + df["time2"] = Timestamp("20130102") + check(df, tm.assert_frame_equal) + + # empty frame, GH4273 + with ensure_clean_store(setup_path) as store: + + # 0 len + df_empty = DataFrame(columns=list("ABC")) + store.append("df", df_empty) + with pytest.raises(KeyError, match="'No object named df in the file'"): + store.select("df") + + # repeated append of 0/non-zero frames + df = DataFrame(np.random.rand(10, 3), columns=list("ABC")) + store.append("df", df) + tm.assert_frame_equal(store.select("df"), df) + store.append("df", df_empty) + tm.assert_frame_equal(store.select("df"), df) + + # store + df = DataFrame(columns=list("ABC")) + store.put("df2", df) + tm.assert_frame_equal(store.select("df2"), df) + + def test_append_raise(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + # test append with invalid input to get good error messages + + # list in column + df = tm.makeDataFrame() + df["invalid"] = [["a"]] * len(df) + assert df.dtypes["invalid"] == np.object_ + with pytest.raises(TypeError): + store.append("df", df) + + # multiple invalid columns + df["invalid2"] = [["a"]] * len(df) + df["invalid3"] = [["a"]] * len(df) + with pytest.raises(TypeError): + store.append("df", df) + + # datetime with embedded nans as object + df = tm.makeDataFrame() + s = Series(datetime.datetime(2001, 1, 2), index=df.index) + s = s.astype(object) + s[0:5] = np.nan + df["invalid"] = s + assert df.dtypes["invalid"] == np.object_ + with pytest.raises(TypeError): + store.append("df", df) + + # directly ndarray + with pytest.raises(TypeError): + store.append("df", np.arange(10)) + + # series directly + with pytest.raises(TypeError): + store.append("df", Series(np.arange(10))) + + # appending an incompatible table + df = tm.makeDataFrame() + store.append("df", df) + + df["foo"] = "foo" + with pytest.raises(ValueError): + store.append("df", df) + + def test_table_index_incompatible_dtypes(self, setup_path): + df1 = DataFrame({"a": [1, 2, 3]}) + df2 = DataFrame({"a": [4, 5, 6]}, index=date_range("1/1/2000", periods=3)) + + with ensure_clean_store(setup_path) as store: + store.put("frame", df1, format="table") + with pytest.raises(TypeError): + store.put("frame", df2, format="table", append=True) + + def test_table_values_dtypes_roundtrip(self, setup_path): + + with ensure_clean_store(setup_path) as store: + df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8") + store.append("df_f8", df1) + tm.assert_series_equal(df1.dtypes, store["df_f8"].dtypes) + + df2 = DataFrame({"a": [1, 2, 3]}, dtype="i8") + store.append("df_i8", df2) + tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes) + + # incompatible dtype + with pytest.raises(ValueError): + store.append("df_i8", df1) + + # check creation/storage/retrieval of float32 (a bit hacky to + # actually create them thought) + df1 = DataFrame(np.array([[1], [2], [3]], dtype="f4"), columns=["A"]) + store.append("df_f4", df1) + tm.assert_series_equal(df1.dtypes, store["df_f4"].dtypes) + assert df1.dtypes[0] == "float32" + + # check with mixed dtypes + df1 = DataFrame( + { + c: Series(np.random.randint(5), dtype=c) + for c in ["float32", "float64", "int32", "int64", "int16", "int8"] + } + ) + df1["string"] = "foo" + df1["float322"] = 1.0 + df1["float322"] = df1["float322"].astype("float32") + df1["bool"] = df1["float32"] > 0 + df1["time1"] = Timestamp("20130101") + df1["time2"] = Timestamp("20130102") + + store.append("df_mixed_dtypes1", df1) + result = store.select("df_mixed_dtypes1").dtypes.value_counts() + result.index = [str(i) for i in result.index] + expected = Series( + { + "float32": 2, + "float64": 1, + "int32": 1, + "bool": 1, + "int16": 1, + "int8": 1, + "int64": 1, + "object": 1, + "datetime64[ns]": 2, + } + ) + result = result.sort_index() + expected = expected.sort_index() + tm.assert_series_equal(result, expected) + + def test_table_mixed_dtypes(self, setup_path): + + # frame + df = tm.makeDataFrame() + df["obj1"] = "foo" + df["obj2"] = "bar" + df["bool1"] = df["A"] > 0 + df["bool2"] = df["B"] > 0 + df["bool3"] = True + df["int1"] = 1 + df["int2"] = 2 + df["timestamp1"] = Timestamp("20010102") + df["timestamp2"] = Timestamp("20010103") + df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0) + df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0) + df.loc[3:6, ["obj1"]] = np.nan + df = df._consolidate()._convert(datetime=True) + + with ensure_clean_store(setup_path) as store: + store.append("df1_mixed", df) + tm.assert_frame_equal(store.select("df1_mixed"), df) + + def test_unimplemented_dtypes_table_columns(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + dtypes = [("date", datetime.date(2001, 1, 2))] + + # currently not supported dtypes #### + for n, f in dtypes: + df = tm.makeDataFrame() + df[n] = f + with pytest.raises(TypeError): + store.append("df1_{n}".format(n=n), df) + + # frame + df = tm.makeDataFrame() + df["obj1"] = "foo" + df["obj2"] = "bar" + df["datetime1"] = datetime.date(2001, 1, 2) + df = df._consolidate()._convert(datetime=True) + + with ensure_clean_store(setup_path) as store: + # this fails because we have a date in the object block...... + with pytest.raises(TypeError): + store.append("df_unimplemented", df) + + @td.xfail_non_writeable + @pytest.mark.skipif( + LooseVersion(np.__version__) == LooseVersion("1.15.0"), + reason=( + "Skipping pytables test when numpy version is " + "exactly equal to 1.15.0: gh-22098" + ), + ) + def test_calendar_roundtrip_issue(self, setup_path): + + # 8591 + # doc example from tseries holiday section + weekmask_egypt = "Sun Mon Tue Wed Thu" + holidays = [ + "2012-05-01", + datetime.datetime(2013, 5, 1), + np.datetime64("2014-05-01"), + ] + bday_egypt = pd.offsets.CustomBusinessDay( + holidays=holidays, weekmask=weekmask_egypt + ) + dt = datetime.datetime(2013, 4, 30) + dts = date_range(dt, periods=5, freq=bday_egypt) + + s = Series(dts.weekday, dts).map(Series("Mon Tue Wed Thu Fri Sat Sun".split())) + + with ensure_clean_store(setup_path) as store: + + store.put("fixed", s) + result = store.select("fixed") + tm.assert_series_equal(result, s) + + store.append("table", s) + result = store.select("table") + tm.assert_series_equal(result, s) + + def test_roundtrip_tz_aware_index(self, setup_path): + # GH 17618 + time = pd.Timestamp("2000-01-01 01:00:00", tz="US/Eastern") + df = pd.DataFrame(data=[0], index=[time]) + + with ensure_clean_store(setup_path) as store: + store.put("frame", df, format="fixed") + recons = store["frame"] + tm.assert_frame_equal(recons, df) + assert recons.index[0].value == 946706400000000000 + + def test_append_with_timedelta(self, setup_path): + # GH 3577 + # append timedelta + + df = DataFrame( + dict( + A=Timestamp("20130101"), + B=[ + Timestamp("20130101") + timedelta(days=i, seconds=10) + for i in range(10) + ], + ) + ) + df["C"] = df["A"] - df["B"] + df.loc[3:5, "C"] = np.nan + + with ensure_clean_store(setup_path) as store: + + # table + _maybe_remove(store, "df") + store.append("df", df, data_columns=True) + result = store.select("df") + tm.assert_frame_equal(result, df) + + result = store.select("df", where="C<100000") + tm.assert_frame_equal(result, df) + + result = store.select("df", where="C") + + # from the docs + with ensure_clean_path(setup_path) as path: + dfq = DataFrame( + np.random.randn(10, 4), + columns=list("ABCD"), + index=date_range("20130101", periods=10), + ) + dfq.to_hdf(path, "dfq", format="table", data_columns=True) + + # check ok + read_hdf( + path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']" + ) + read_hdf(path, "dfq", where="A>0 or C>0") + + # catch the invalid reference + with ensure_clean_path(setup_path) as path: + dfq = DataFrame( + np.random.randn(10, 4), + columns=list("ABCD"), + index=date_range("20130101", periods=10), + ) + dfq.to_hdf(path, "dfq", format="table") + + with pytest.raises(ValueError): + read_hdf(path, "dfq", where="A>0 or C>0") + + def test_same_name_scoping(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + import pandas as pd + + df = DataFrame( + np.random.randn(20, 2), index=pd.date_range("20130101", periods=20) + ) + store.put("df", df, format="table") + expected = df[df.index > pd.Timestamp("20130105")] + + import datetime # noqa + + result = store.select("df", "index>datetime.datetime(2013,1,5)") + tm.assert_frame_equal(result, expected) + + from datetime import datetime # noqa + + # technically an error, but allow it + result = store.select("df", "index>datetime.datetime(2013,1,5)") + tm.assert_frame_equal(result, expected) + + result = store.select("df", "index>datetime(2013,1,5)") + tm.assert_frame_equal(result, expected) + + def test_series(self, setup_path): + + s = tm.makeStringSeries() + self._check_roundtrip(s, tm.assert_series_equal, path=setup_path) + + ts = tm.makeTimeSeries() + self._check_roundtrip(ts, tm.assert_series_equal, path=setup_path) + + ts2 = Series(ts.index, Index(ts.index, dtype=object)) + self._check_roundtrip(ts2, tm.assert_series_equal, path=setup_path) + + ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object)) + self._check_roundtrip( + ts3, tm.assert_series_equal, path=setup_path, check_index_type=False + ) + + def test_float_index(self, setup_path): + + # GH #454 + index = np.random.randn(10) + s = Series(np.random.randn(10), index=index) + self._check_roundtrip(s, tm.assert_series_equal, path=setup_path) + + @td.xfail_non_writeable + def test_tuple_index(self, setup_path): + + # GH #492 + col = np.arange(10) + idx = [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)] + data = np.random.randn(30).reshape((3, 10)) + DF = DataFrame(data, index=idx, columns=col) + + with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) + self._check_roundtrip(DF, tm.assert_frame_equal, path=setup_path) + + @td.xfail_non_writeable + @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") + def test_index_types(self, setup_path): + + with catch_warnings(record=True): + values = np.random.randn(2) + + func = lambda l, r: tm.assert_series_equal( + l, r, check_dtype=True, check_index_type=True, check_series_type=True + ) + + with catch_warnings(record=True): + ser = Series(values, [0, "y"]) + self._check_roundtrip(ser, func, path=setup_path) + + with catch_warnings(record=True): + ser = Series(values, [datetime.datetime.today(), 0]) + self._check_roundtrip(ser, func, path=setup_path) + + with catch_warnings(record=True): + ser = Series(values, ["y", 0]) + self._check_roundtrip(ser, func, path=setup_path) + + with catch_warnings(record=True): + ser = Series(values, [datetime.date.today(), "a"]) + self._check_roundtrip(ser, func, path=setup_path) + + with catch_warnings(record=True): + + ser = Series(values, [0, "y"]) + self._check_roundtrip(ser, func, path=setup_path) + + ser = Series(values, [datetime.datetime.today(), 0]) + self._check_roundtrip(ser, func, path=setup_path) + + ser = Series(values, ["y", 0]) + self._check_roundtrip(ser, func, path=setup_path) + + ser = Series(values, [datetime.date.today(), "a"]) + self._check_roundtrip(ser, func, path=setup_path) + + ser = Series(values, [1.23, "b"]) + self._check_roundtrip(ser, func, path=setup_path) + + ser = Series(values, [1, 1.53]) + self._check_roundtrip(ser, func, path=setup_path) + + ser = Series(values, [1, 5]) + self._check_roundtrip(ser, func, path=setup_path) + + ser = Series( + values, [datetime.datetime(2012, 1, 1), datetime.datetime(2012, 1, 2)] + ) + self._check_roundtrip(ser, func, path=setup_path) + + def test_timeseries_preepoch(self, setup_path): + + dr = bdate_range("1/1/1940", "1/1/1960") + ts = Series(np.random.randn(len(dr)), index=dr) + try: + self._check_roundtrip(ts, tm.assert_series_equal, path=setup_path) + except OverflowError: + pytest.skip("known failer on some windows platforms") + + @td.xfail_non_writeable + @pytest.mark.parametrize( + "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)] + ) + def test_frame(self, compression, setup_path): + + df = tm.makeDataFrame() + + # put in some random NAs + df.values[0, 0] = np.nan + df.values[5, 3] = np.nan + + self._check_roundtrip_table( + df, tm.assert_frame_equal, path=setup_path, compression=compression + ) + self._check_roundtrip( + df, tm.assert_frame_equal, path=setup_path, compression=compression + ) + + tdf = tm.makeTimeDataFrame() + self._check_roundtrip( + tdf, tm.assert_frame_equal, path=setup_path, compression=compression + ) + + with ensure_clean_store(setup_path) as store: + # not consolidated + df["foo"] = np.random.randn(len(df)) + store["df"] = df + recons = store["df"] + assert recons._data.is_consolidated() + + # empty + self._check_roundtrip(df[:0], tm.assert_frame_equal, path=setup_path) + + @td.xfail_non_writeable + def test_empty_series_frame(self, setup_path): + s0 = Series(dtype=object) + s1 = Series(name="myseries", dtype=object) + df0 = DataFrame() + df1 = DataFrame(index=["a", "b", "c"]) + df2 = DataFrame(columns=["d", "e", "f"]) + + self._check_roundtrip(s0, tm.assert_series_equal, path=setup_path) + self._check_roundtrip(s1, tm.assert_series_equal, path=setup_path) + self._check_roundtrip(df0, tm.assert_frame_equal, path=setup_path) + self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path) + self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path) + + @td.xfail_non_writeable + @pytest.mark.parametrize( + "dtype", [np.int64, np.float64, np.object, "m8[ns]", "M8[ns]"] + ) + def test_empty_series(self, dtype, setup_path): + s = Series(dtype=dtype) + self._check_roundtrip(s, tm.assert_series_equal, path=setup_path) + + def test_can_serialize_dates(self, setup_path): + + rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")] + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + self._check_roundtrip(frame, tm.assert_frame_equal, path=setup_path) + + def test_store_hierarchical(self, setup_path): + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["foo", "bar"], + ) + frame = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) + + self._check_roundtrip(frame, tm.assert_frame_equal, path=setup_path) + self._check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path) + self._check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path) + + # check that the names are stored + with ensure_clean_store(setup_path) as store: + store["frame"] = frame + recons = store["frame"] + tm.assert_frame_equal(recons, frame) + + def test_store_index_name(self, setup_path): + df = tm.makeDataFrame() + df.index.name = "foo" + + with ensure_clean_store(setup_path) as store: + store["frame"] = df + recons = store["frame"] + tm.assert_frame_equal(recons, df) + + def test_store_index_name_with_tz(self, setup_path): + # GH 13884 + df = pd.DataFrame({"A": [1, 2]}) + df.index = pd.DatetimeIndex([1234567890123456787, 1234567890123456788]) + df.index = df.index.tz_localize("UTC") + df.index.name = "foo" + + with ensure_clean_store(setup_path) as store: + store.put("frame", df, format="table") + recons = store["frame"] + tm.assert_frame_equal(recons, df) + + @pytest.mark.parametrize("table_format", ["table", "fixed"]) + def test_store_index_name_numpy_str(self, table_format, setup_path): + # GH #13492 + idx = pd.Index( + pd.to_datetime([datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)]), + name="cols\u05d2", + ) + idx1 = pd.Index( + pd.to_datetime([datetime.date(2010, 1, 1), datetime.date(2010, 1, 2)]), + name="rows\u05d0", + ) + df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1) + + # This used to fail, returning numpy strings instead of python strings. + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format=table_format) + df2 = read_hdf(path, "df") + + tm.assert_frame_equal(df, df2, check_names=True) + + assert type(df2.index.name) == str + assert type(df2.columns.name) == str + + def test_store_series_name(self, setup_path): + df = tm.makeDataFrame() + series = df["A"] + + with ensure_clean_store(setup_path) as store: + store["series"] = series + recons = store["series"] + tm.assert_series_equal(recons, series) + + @td.xfail_non_writeable + @pytest.mark.parametrize( + "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)] + ) + def test_store_mixed(self, compression, setup_path): + def _make_one(): + df = tm.makeDataFrame() + df["obj1"] = "foo" + df["obj2"] = "bar" + df["bool1"] = df["A"] > 0 + df["bool2"] = df["B"] > 0 + df["int1"] = 1 + df["int2"] = 2 + return df._consolidate() + + df1 = _make_one() + df2 = _make_one() + + self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path) + self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path) + + with ensure_clean_store(setup_path) as store: + store["obj"] = df1 + tm.assert_frame_equal(store["obj"], df1) + store["obj"] = df2 + tm.assert_frame_equal(store["obj"], df2) + + # check that can store Series of all of these types + self._check_roundtrip( + df1["obj1"], + tm.assert_series_equal, + path=setup_path, + compression=compression, + ) + self._check_roundtrip( + df1["bool1"], + tm.assert_series_equal, + path=setup_path, + compression=compression, + ) + self._check_roundtrip( + df1["int1"], + tm.assert_series_equal, + path=setup_path, + compression=compression, + ) + + @pytest.mark.filterwarnings( + "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning" + ) + def test_select_with_dups(self, setup_path): + + # single dtypes + df = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]) + df.index = date_range("20130101 9:30", periods=10, freq="T") + + with ensure_clean_store(setup_path) as store: + store.append("df", df) + + result = store.select("df") + expected = df + tm.assert_frame_equal(result, expected, by_blocks=True) + + result = store.select("df", columns=df.columns) + expected = df + tm.assert_frame_equal(result, expected, by_blocks=True) + + result = store.select("df", columns=["A"]) + expected = df.loc[:, ["A"]] + tm.assert_frame_equal(result, expected) + + # dups across dtypes + df = concat( + [ + DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]), + DataFrame( + np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + ), + ], + axis=1, + ) + df.index = date_range("20130101 9:30", periods=10, freq="T") + + with ensure_clean_store(setup_path) as store: + store.append("df", df) + + result = store.select("df") + expected = df + tm.assert_frame_equal(result, expected, by_blocks=True) + + result = store.select("df", columns=df.columns) + expected = df + tm.assert_frame_equal(result, expected, by_blocks=True) + + expected = df.loc[:, ["A"]] + result = store.select("df", columns=["A"]) + tm.assert_frame_equal(result, expected, by_blocks=True) + + expected = df.loc[:, ["B", "A"]] + result = store.select("df", columns=["B", "A"]) + tm.assert_frame_equal(result, expected, by_blocks=True) + + # duplicates on both index and columns + with ensure_clean_store(setup_path) as store: + store.append("df", df) + store.append("df", df) + + expected = df.loc[:, ["B", "A"]] + expected = concat([expected, expected]) + result = store.select("df", columns=["B", "A"]) + tm.assert_frame_equal(result, expected, by_blocks=True) + + def test_overwrite_node(self, setup_path): + + with ensure_clean_store(setup_path) as store: + store["a"] = tm.makeTimeDataFrame() + ts = tm.makeTimeSeries() + store["a"] = ts + + tm.assert_series_equal(store["a"], ts) + + def test_select(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + with catch_warnings(record=True): + + # select with columns= + df = tm.makeTimeDataFrame() + _maybe_remove(store, "df") + store.append("df", df) + result = store.select("df", columns=["A", "B"]) + expected = df.reindex(columns=["A", "B"]) + tm.assert_frame_equal(expected, result) + + # equivalently + result = store.select("df", [("columns=['A', 'B']")]) + expected = df.reindex(columns=["A", "B"]) + tm.assert_frame_equal(expected, result) + + # with a data column + _maybe_remove(store, "df") + store.append("df", df, data_columns=["A"]) + result = store.select("df", ["A > 0"], columns=["A", "B"]) + expected = df[df.A > 0].reindex(columns=["A", "B"]) + tm.assert_frame_equal(expected, result) + + # all a data columns + _maybe_remove(store, "df") + store.append("df", df, data_columns=True) + result = store.select("df", ["A > 0"], columns=["A", "B"]) + expected = df[df.A > 0].reindex(columns=["A", "B"]) + tm.assert_frame_equal(expected, result) + + # with a data column, but different columns + _maybe_remove(store, "df") + store.append("df", df, data_columns=["A"]) + result = store.select("df", ["A > 0"], columns=["C", "D"]) + expected = df[df.A > 0].reindex(columns=["C", "D"]) + tm.assert_frame_equal(expected, result) + + def test_select_dtypes(self, setup_path): + + with ensure_clean_store(setup_path) as store: + # with a Timestamp data column (GH #2637) + df = DataFrame( + dict(ts=bdate_range("2012-01-01", periods=300), A=np.random.randn(300)) + ) + _maybe_remove(store, "df") + store.append("df", df, data_columns=["ts", "A"]) + + result = store.select("df", "ts>=Timestamp('2012-02-01')") + expected = df[df.ts >= Timestamp("2012-02-01")] + tm.assert_frame_equal(expected, result) + + # bool columns (GH #2849) + df = DataFrame(np.random.randn(5, 2), columns=["A", "B"]) + df["object"] = "foo" + df.loc[4:5, "object"] = "bar" + df["boolv"] = df["A"] > 0 + _maybe_remove(store, "df") + store.append("df", df, data_columns=True) + + expected = df[df.boolv == True].reindex(columns=["A", "boolv"]) # noqa + for v in [True, "true", 1]: + result = store.select( + "df", "boolv == {v!s}".format(v=v), columns=["A", "boolv"] + ) + tm.assert_frame_equal(expected, result) + + expected = df[df.boolv == False].reindex(columns=["A", "boolv"]) # noqa + for v in [False, "false", 0]: + result = store.select( + "df", "boolv == {v!s}".format(v=v), columns=["A", "boolv"] + ) + tm.assert_frame_equal(expected, result) + + # integer index + df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20))) + _maybe_remove(store, "df_int") + store.append("df_int", df) + result = store.select("df_int", "index<10 and columns=['A']") + expected = df.reindex(index=list(df.index)[0:10], columns=["A"]) + tm.assert_frame_equal(expected, result) + + # float index + df = DataFrame( + dict( + A=np.random.rand(20), + B=np.random.rand(20), + index=np.arange(20, dtype="f8"), + ) + ) + _maybe_remove(store, "df_float") + store.append("df_float", df) + result = store.select("df_float", "index<10.0 and columns=['A']") + expected = df.reindex(index=list(df.index)[0:10], columns=["A"]) + tm.assert_frame_equal(expected, result) + + with ensure_clean_store(setup_path) as store: + + # floats w/o NaN + df = DataFrame(dict(cols=range(11), values=range(11)), dtype="float64") + df["cols"] = (df["cols"] + 10).apply(str) + + store.append("df1", df, data_columns=True) + result = store.select("df1", where="values>2.0") + expected = df[df["values"] > 2.0] + tm.assert_frame_equal(expected, result) + + # floats with NaN + df.iloc[0] = np.nan + expected = df[df["values"] > 2.0] + + store.append("df2", df, data_columns=True, index=False) + result = store.select("df2", where="values>2.0") + tm.assert_frame_equal(expected, result) + + # https://github.com/PyTables/PyTables/issues/282 + # bug in selection when 0th row has a np.nan and an index + # store.append('df3',df,data_columns=True) + # result = store.select( + # 'df3', where='values>2.0') + # tm.assert_frame_equal(expected, result) + + # not in first position float with NaN ok too + df = DataFrame(dict(cols=range(11), values=range(11)), dtype="float64") + df["cols"] = (df["cols"] + 10).apply(str) + + df.iloc[1] = np.nan + expected = df[df["values"] > 2.0] + + store.append("df4", df, data_columns=True) + result = store.select("df4", where="values>2.0") + tm.assert_frame_equal(expected, result) + + # test selection with comparison against numpy scalar + # GH 11283 + with ensure_clean_store(setup_path) as store: + df = tm.makeDataFrame() + + expected = df[df["A"] > 0] + + store.append("df", df, data_columns=True) + np_zero = np.float64(0) # noqa + result = store.select("df", where=["A>np_zero"]) + tm.assert_frame_equal(expected, result) + + def test_select_with_many_inputs(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + df = DataFrame( + dict( + ts=bdate_range("2012-01-01", periods=300), + A=np.random.randn(300), + B=range(300), + users=["a"] * 50 + + ["b"] * 50 + + ["c"] * 100 + + ["a{i:03d}".format(i=i) for i in range(100)], + ) + ) + _maybe_remove(store, "df") + store.append("df", df, data_columns=["ts", "A", "B", "users"]) + + # regular select + result = store.select("df", "ts>=Timestamp('2012-02-01')") + expected = df[df.ts >= Timestamp("2012-02-01")] + tm.assert_frame_equal(expected, result) + + # small selector + result = store.select( + "df", "ts>=Timestamp('2012-02-01') & users=['a','b','c']" + ) + expected = df[ + (df.ts >= Timestamp("2012-02-01")) & df.users.isin(["a", "b", "c"]) + ] + tm.assert_frame_equal(expected, result) + + # big selector along the columns + selector = ["a", "b", "c"] + ["a{i:03d}".format(i=i) for i in range(60)] + result = store.select( + "df", "ts>=Timestamp('2012-02-01') and users=selector" + ) + expected = df[(df.ts >= Timestamp("2012-02-01")) & df.users.isin(selector)] + tm.assert_frame_equal(expected, result) + + selector = range(100, 200) + result = store.select("df", "B=selector") + expected = df[df.B.isin(selector)] + tm.assert_frame_equal(expected, result) + assert len(result) == 100 + + # big selector along the index + selector = Index(df.ts[0:100].values) + result = store.select("df", "ts=selector") + expected = df[df.ts.isin(selector.values)] + tm.assert_frame_equal(expected, result) + assert len(result) == 100 + + def test_select_iterator(self, setup_path): + + # single table + with ensure_clean_store(setup_path) as store: + + df = tm.makeTimeDataFrame(500) + _maybe_remove(store, "df") + store.append("df", df) + + expected = store.select("df") + + results = list(store.select("df", iterator=True)) + result = concat(results) + tm.assert_frame_equal(expected, result) + + results = list(store.select("df", chunksize=100)) + assert len(results) == 5 + result = concat(results) + tm.assert_frame_equal(expected, result) + + results = list(store.select("df", chunksize=150)) + result = concat(results) + tm.assert_frame_equal(result, expected) + + with ensure_clean_path(setup_path) as path: + + df = tm.makeTimeDataFrame(500) + df.to_hdf(path, "df_non_table") + + with pytest.raises(TypeError): + read_hdf(path, "df_non_table", chunksize=100) + + with pytest.raises(TypeError): + read_hdf(path, "df_non_table", iterator=True) + + with ensure_clean_path(setup_path) as path: + + df = tm.makeTimeDataFrame(500) + df.to_hdf(path, "df", format="table") + + results = list(read_hdf(path, "df", chunksize=100)) + result = concat(results) + + assert len(results) == 5 + tm.assert_frame_equal(result, df) + tm.assert_frame_equal(result, read_hdf(path, "df")) + + # multiple + + with ensure_clean_store(setup_path) as store: + + df1 = tm.makeTimeDataFrame(500) + store.append("df1", df1, data_columns=True) + df2 = tm.makeTimeDataFrame(500).rename(columns="{}_2".format) + df2["foo"] = "bar" + store.append("df2", df2) + + df = concat([df1, df2], axis=1) + + # full selection + expected = store.select_as_multiple(["df1", "df2"], selector="df1") + results = list( + store.select_as_multiple(["df1", "df2"], selector="df1", chunksize=150) + ) + result = concat(results) + tm.assert_frame_equal(expected, result) + + def test_select_iterator_complete_8014(self, setup_path): + + # GH 8014 + # using iterator and where clause + chunksize = 1e4 + + # no iterator + with ensure_clean_store(setup_path) as store: + + expected = tm.makeTimeDataFrame(100064, "S") + _maybe_remove(store, "df") + store.append("df", expected) + + beg_dt = expected.index[0] + end_dt = expected.index[-1] + + # select w/o iteration and no where clause works + result = store.select("df") + tm.assert_frame_equal(expected, result) + + # select w/o iterator and where clause, single term, begin + # of range, works + where = "index >= '{beg_dt}'".format(beg_dt=beg_dt) + result = store.select("df", where=where) + tm.assert_frame_equal(expected, result) + + # select w/o iterator and where clause, single term, end + # of range, works + where = "index <= '{end_dt}'".format(end_dt=end_dt) + result = store.select("df", where=where) + tm.assert_frame_equal(expected, result) + + # select w/o iterator and where clause, inclusive range, + # works + where = "index >= '{beg_dt}' & index <= '{end_dt}'".format( + beg_dt=beg_dt, end_dt=end_dt + ) + result = store.select("df", where=where) + tm.assert_frame_equal(expected, result) + + # with iterator, full range + with ensure_clean_store(setup_path) as store: + + expected = tm.makeTimeDataFrame(100064, "S") + _maybe_remove(store, "df") + store.append("df", expected) + + beg_dt = expected.index[0] + end_dt = expected.index[-1] + + # select w/iterator and no where clause works + results = list(store.select("df", chunksize=chunksize)) + result = concat(results) + tm.assert_frame_equal(expected, result) + + # select w/iterator and where clause, single term, begin of range + where = "index >= '{beg_dt}'".format(beg_dt=beg_dt) + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + tm.assert_frame_equal(expected, result) + + # select w/iterator and where clause, single term, end of range + where = "index <= '{end_dt}'".format(end_dt=end_dt) + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + tm.assert_frame_equal(expected, result) + + # select w/iterator and where clause, inclusive range + where = "index >= '{beg_dt}' & index <= '{end_dt}'".format( + beg_dt=beg_dt, end_dt=end_dt + ) + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + tm.assert_frame_equal(expected, result) + + def test_select_iterator_non_complete_8014(self, setup_path): + + # GH 8014 + # using iterator and where clause + chunksize = 1e4 + + # with iterator, non complete range + with ensure_clean_store(setup_path) as store: + + expected = tm.makeTimeDataFrame(100064, "S") + _maybe_remove(store, "df") + store.append("df", expected) + + beg_dt = expected.index[1] + end_dt = expected.index[-2] + + # select w/iterator and where clause, single term, begin of range + where = "index >= '{beg_dt}'".format(beg_dt=beg_dt) + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + rexpected = expected[expected.index >= beg_dt] + tm.assert_frame_equal(rexpected, result) + + # select w/iterator and where clause, single term, end of range + where = "index <= '{end_dt}'".format(end_dt=end_dt) + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + rexpected = expected[expected.index <= end_dt] + tm.assert_frame_equal(rexpected, result) + + # select w/iterator and where clause, inclusive range + where = "index >= '{beg_dt}' & index <= '{end_dt}'".format( + beg_dt=beg_dt, end_dt=end_dt + ) + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + rexpected = expected[ + (expected.index >= beg_dt) & (expected.index <= end_dt) + ] + tm.assert_frame_equal(rexpected, result) + + # with iterator, empty where + with ensure_clean_store(setup_path) as store: + + expected = tm.makeTimeDataFrame(100064, "S") + _maybe_remove(store, "df") + store.append("df", expected) + + end_dt = expected.index[-1] + + # select w/iterator and where clause, single term, begin of range + where = "index > '{end_dt}'".format(end_dt=end_dt) + results = list(store.select("df", where=where, chunksize=chunksize)) + assert 0 == len(results) + + def test_select_iterator_many_empty_frames(self, setup_path): + + # GH 8014 + # using iterator and where clause can return many empty + # frames. + chunksize = int(1e4) + + # with iterator, range limited to the first chunk + with ensure_clean_store(setup_path) as store: + + expected = tm.makeTimeDataFrame(100000, "S") + _maybe_remove(store, "df") + store.append("df", expected) + + beg_dt = expected.index[0] + end_dt = expected.index[chunksize - 1] + + # select w/iterator and where clause, single term, begin of range + where = "index >= '{beg_dt}'".format(beg_dt=beg_dt) + results = list(store.select("df", where=where, chunksize=chunksize)) + result = concat(results) + rexpected = expected[expected.index >= beg_dt] + tm.assert_frame_equal(rexpected, result) + + # select w/iterator and where clause, single term, end of range + where = "index <= '{end_dt}'".format(end_dt=end_dt) + results = list(store.select("df", where=where, chunksize=chunksize)) + + assert len(results) == 1 + result = concat(results) + rexpected = expected[expected.index <= end_dt] + tm.assert_frame_equal(rexpected, result) + + # select w/iterator and where clause, inclusive range + where = "index >= '{beg_dt}' & index <= '{end_dt}'".format( + beg_dt=beg_dt, end_dt=end_dt + ) + results = list(store.select("df", where=where, chunksize=chunksize)) + + # should be 1, is 10 + assert len(results) == 1 + result = concat(results) + rexpected = expected[ + (expected.index >= beg_dt) & (expected.index <= end_dt) + ] + tm.assert_frame_equal(rexpected, result) + + # select w/iterator and where clause which selects + # *nothing*. + # + # To be consistent with Python idiom I suggest this should + # return [] e.g. `for e in []: print True` never prints + # True. + + where = "index <= '{beg_dt}' & index >= '{end_dt}'".format( + beg_dt=beg_dt, end_dt=end_dt + ) + results = list(store.select("df", where=where, chunksize=chunksize)) + + # should be [] + assert len(results) == 0 + + @pytest.mark.filterwarnings( + "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning" + ) + def test_retain_index_attributes(self, setup_path): + + # GH 3499, losing frequency info on index recreation + df = DataFrame( + dict(A=Series(range(3), index=date_range("2000-1-1", periods=3, freq="H"))) + ) + + with ensure_clean_store(setup_path) as store: + _maybe_remove(store, "data") + store.put("data", df, format="table") + + result = store.get("data") + tm.assert_frame_equal(df, result) + + for attr in ["freq", "tz", "name"]: + for idx in ["index", "columns"]: + assert getattr(getattr(df, idx), attr, None) == getattr( + getattr(result, idx), attr, None + ) + + # try to append a table with a different frequency + with catch_warnings(record=True): + df2 = DataFrame( + dict( + A=Series( + range(3), index=date_range("2002-1-1", periods=3, freq="D") + ) + ) + ) + store.append("data", df2) + + assert store.get_storer("data").info["index"]["freq"] is None + + # this is ok + _maybe_remove(store, "df2") + df2 = DataFrame( + dict( + A=Series( + range(3), + index=[ + Timestamp("20010101"), + Timestamp("20010102"), + Timestamp("20020101"), + ], + ) + ) + ) + store.append("df2", df2) + df3 = DataFrame( + dict( + A=Series( + range(3), index=date_range("2002-1-1", periods=3, freq="D") + ) + ) + ) + store.append("df2", df3) + + @pytest.mark.filterwarnings( + "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning" + ) + def test_retain_index_attributes2(self, setup_path): + with ensure_clean_path(setup_path) as path: + + with catch_warnings(record=True): + + df = DataFrame( + dict( + A=Series( + range(3), index=date_range("2000-1-1", periods=3, freq="H") + ) + ) + ) + df.to_hdf(path, "data", mode="w", append=True) + df2 = DataFrame( + dict( + A=Series( + range(3), index=date_range("2002-1-1", periods=3, freq="D") + ) + ) + ) + df2.to_hdf(path, "data", append=True) + + idx = date_range("2000-1-1", periods=3, freq="H") + idx.name = "foo" + df = DataFrame(dict(A=Series(range(3), index=idx))) + df.to_hdf(path, "data", mode="w", append=True) + + assert read_hdf(path, "data").index.name == "foo" + + with catch_warnings(record=True): + + idx2 = date_range("2001-1-1", periods=3, freq="H") + idx2.name = "bar" + df2 = DataFrame(dict(A=Series(range(3), index=idx2))) + df2.to_hdf(path, "data", append=True) + + assert read_hdf(path, "data").index.name is None + + def test_frame_select(self, setup_path): + + df = tm.makeTimeDataFrame() + + with ensure_clean_store(setup_path) as store: + store.put("frame", df, format="table") + date = df.index[len(df) // 2] + + crit1 = Term("index>=date") + assert crit1.env.scope["date"] == date + + crit2 = "columns=['A', 'D']" + crit3 = "columns=A" + + result = store.select("frame", [crit1, crit2]) + expected = df.loc[date:, ["A", "D"]] + tm.assert_frame_equal(result, expected) + + result = store.select("frame", [crit3]) + expected = df.loc[:, ["A"]] + tm.assert_frame_equal(result, expected) + + # invalid terms + df = tm.makeTimeDataFrame() + store.append("df_time", df) + with pytest.raises(ValueError): + store.select("df_time", "index>0") + + # can't select if not written as table + # store['frame'] = df + # with pytest.raises(ValueError): + # store.select('frame', [crit1, crit2]) + + def test_frame_select_complex(self, setup_path): + # select via complex criteria + + df = tm.makeTimeDataFrame() + df["string"] = "foo" + df.loc[df.index[0:4], "string"] = "bar" + + with ensure_clean_store(setup_path) as store: + store.put("df", df, format="table", data_columns=["string"]) + + # empty + result = store.select("df", 'index>df.index[3] & string="bar"') + expected = df.loc[(df.index > df.index[3]) & (df.string == "bar")] + tm.assert_frame_equal(result, expected) + + result = store.select("df", 'index>df.index[3] & string="foo"') + expected = df.loc[(df.index > df.index[3]) & (df.string == "foo")] + tm.assert_frame_equal(result, expected) + + # or + result = store.select("df", 'index>df.index[3] | string="bar"') + expected = df.loc[(df.index > df.index[3]) | (df.string == "bar")] + tm.assert_frame_equal(result, expected) + + result = store.select( + "df", '(index>df.index[3] & index<=df.index[6]) | string="bar"' + ) + expected = df.loc[ + ((df.index > df.index[3]) & (df.index <= df.index[6])) + | (df.string == "bar") + ] + tm.assert_frame_equal(result, expected) + + # invert + result = store.select("df", 'string!="bar"') + expected = df.loc[df.string != "bar"] + tm.assert_frame_equal(result, expected) + + # invert not implemented in numexpr :( + with pytest.raises(NotImplementedError): + store.select("df", '~(string="bar")') + + # invert ok for filters + result = store.select("df", "~(columns=['A','B'])") + expected = df.loc[:, df.columns.difference(["A", "B"])] + tm.assert_frame_equal(result, expected) + + # in + result = store.select("df", "index>df.index[3] & columns in ['A','B']") + expected = df.loc[df.index > df.index[3]].reindex(columns=["A", "B"]) + tm.assert_frame_equal(result, expected) + + def test_frame_select_complex2(self, setup_path): + + with ensure_clean_path(["parms.hdf", "hist.hdf"]) as paths: + + pp, hh = paths + + # use non-trivial selection criteria + parms = DataFrame({"A": [1, 1, 2, 2, 3]}) + parms.to_hdf(pp, "df", mode="w", format="table", data_columns=["A"]) + + selection = read_hdf(pp, "df", where="A=[2,3]") + hist = DataFrame( + np.random.randn(25, 1), + columns=["data"], + index=MultiIndex.from_tuples( + [(i, j) for i in range(5) for j in range(5)], names=["l1", "l2"] + ), + ) + + hist.to_hdf(hh, "df", mode="w", format="table") + + expected = read_hdf(hh, "df", where="l1=[2, 3, 4]") + + # scope with list like + l = selection.index.tolist() # noqa + store = HDFStore(hh) + result = store.select("df", where="l1=l") + tm.assert_frame_equal(result, expected) + store.close() + + result = read_hdf(hh, "df", where="l1=l") + tm.assert_frame_equal(result, expected) + + # index + index = selection.index # noqa + result = read_hdf(hh, "df", where="l1=index") + tm.assert_frame_equal(result, expected) + + result = read_hdf(hh, "df", where="l1=selection.index") + tm.assert_frame_equal(result, expected) + + result = read_hdf(hh, "df", where="l1=selection.index.tolist()") + tm.assert_frame_equal(result, expected) + + result = read_hdf(hh, "df", where="l1=list(selection.index)") + tm.assert_frame_equal(result, expected) + + # scope with index + store = HDFStore(hh) + + result = store.select("df", where="l1=index") + tm.assert_frame_equal(result, expected) + + result = store.select("df", where="l1=selection.index") + tm.assert_frame_equal(result, expected) + + result = store.select("df", where="l1=selection.index.tolist()") + tm.assert_frame_equal(result, expected) + + result = store.select("df", where="l1=list(selection.index)") + tm.assert_frame_equal(result, expected) + + store.close() + + def test_invalid_filtering(self, setup_path): + + # can't use more than one filter (atm) + + df = tm.makeTimeDataFrame() + + with ensure_clean_store(setup_path) as store: + store.put("df", df, format="table") + + # not implemented + with pytest.raises(NotImplementedError): + store.select("df", "columns=['A'] | columns=['B']") + + # in theory we could deal with this + with pytest.raises(NotImplementedError): + store.select("df", "columns=['A','B'] & columns=['C']") + + def test_string_select(self, setup_path): + # GH 2973 + with ensure_clean_store(setup_path) as store: + + df = tm.makeTimeDataFrame() + + # test string ==/!= + df["x"] = "none" + df.loc[2:7, "x"] = "" + + store.append("df", df, data_columns=["x"]) + + result = store.select("df", "x=none") + expected = df[df.x == "none"] + tm.assert_frame_equal(result, expected) + + result = store.select("df", "x!=none") + expected = df[df.x != "none"] + tm.assert_frame_equal(result, expected) + + df2 = df.copy() + df2.loc[df2.x == "", "x"] = np.nan + + store.append("df2", df2, data_columns=["x"]) + result = store.select("df2", "x!=none") + expected = df2[isna(df2.x)] + tm.assert_frame_equal(result, expected) + + # int ==/!= + df["int"] = 1 + df.loc[2:7, "int"] = 2 + + store.append("df3", df, data_columns=["int"]) + + result = store.select("df3", "int=2") + expected = df[df.int == 2] + tm.assert_frame_equal(result, expected) + + result = store.select("df3", "int!=2") + expected = df[df.int != 2] + tm.assert_frame_equal(result, expected) + + def test_read_column(self, setup_path): + + df = tm.makeTimeDataFrame() + + with ensure_clean_store(setup_path) as store: + _maybe_remove(store, "df") + + # GH 17912 + # HDFStore.select_column should raise a KeyError + # exception if the key is not a valid store + with pytest.raises(KeyError, match="No object named df in the file"): + store.select_column("df", "index") + + store.append("df", df) + # error + with pytest.raises( + KeyError, match=re.escape("'column [foo] not found in the table'") + ): + store.select_column("df", "foo") + + with pytest.raises(Exception): + store.select_column("df", "index", where=["index>5"]) + + # valid + result = store.select_column("df", "index") + tm.assert_almost_equal(result.values, Series(df.index).values) + assert isinstance(result, Series) + + # not a data indexable column + with pytest.raises(ValueError): + store.select_column("df", "values_block_0") + + # a data column + df2 = df.copy() + df2["string"] = "foo" + store.append("df2", df2, data_columns=["string"]) + result = store.select_column("df2", "string") + tm.assert_almost_equal(result.values, df2["string"].values) + + # a data column with NaNs, result excludes the NaNs + df3 = df.copy() + df3["string"] = "foo" + df3.loc[4:6, "string"] = np.nan + store.append("df3", df3, data_columns=["string"]) + result = store.select_column("df3", "string") + tm.assert_almost_equal(result.values, df3["string"].values) + + # start/stop + result = store.select_column("df3", "string", start=2) + tm.assert_almost_equal(result.values, df3["string"].values[2:]) + + result = store.select_column("df3", "string", start=-2) + tm.assert_almost_equal(result.values, df3["string"].values[-2:]) + + result = store.select_column("df3", "string", stop=2) + tm.assert_almost_equal(result.values, df3["string"].values[:2]) + + result = store.select_column("df3", "string", stop=-2) + tm.assert_almost_equal(result.values, df3["string"].values[:-2]) + + result = store.select_column("df3", "string", start=2, stop=-2) + tm.assert_almost_equal(result.values, df3["string"].values[2:-2]) + + result = store.select_column("df3", "string", start=-2, stop=2) + tm.assert_almost_equal(result.values, df3["string"].values[-2:2]) + + # GH 10392 - make sure column name is preserved + df4 = DataFrame({"A": np.random.randn(10), "B": "foo"}) + store.append("df4", df4, data_columns=True) + expected = df4["B"] + result = store.select_column("df4", "B") + tm.assert_series_equal(result, expected) + + def test_coordinates(self, setup_path): + df = tm.makeTimeDataFrame() + + with ensure_clean_store(setup_path) as store: + + _maybe_remove(store, "df") + store.append("df", df) + + # all + c = store.select_as_coordinates("df") + assert (c.values == np.arange(len(df.index))).all() + + # get coordinates back & test vs frame + _maybe_remove(store, "df") + + df = DataFrame(dict(A=range(5), B=range(5))) + store.append("df", df) + c = store.select_as_coordinates("df", ["index<3"]) + assert (c.values == np.arange(3)).all() + result = store.select("df", where=c) + expected = df.loc[0:2, :] + tm.assert_frame_equal(result, expected) + + c = store.select_as_coordinates("df", ["index>=3", "index<=4"]) + assert (c.values == np.arange(2) + 3).all() + result = store.select("df", where=c) + expected = df.loc[3:4, :] + tm.assert_frame_equal(result, expected) + assert isinstance(c, Index) + + # multiple tables + _maybe_remove(store, "df1") + _maybe_remove(store, "df2") + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) + store.append("df1", df1, data_columns=["A", "B"]) + store.append("df2", df2) + + c = store.select_as_coordinates("df1", ["A>0", "B>0"]) + df1_result = store.select("df1", c) + df2_result = store.select("df2", c) + result = concat([df1_result, df2_result], axis=1) + + expected = concat([df1, df2], axis=1) + expected = expected[(expected.A > 0) & (expected.B > 0)] + tm.assert_frame_equal(result, expected) + + # pass array/mask as the coordinates + with ensure_clean_store(setup_path) as store: + + df = DataFrame( + np.random.randn(1000, 2), index=date_range("20000101", periods=1000) + ) + store.append("df", df) + c = store.select_column("df", "index") + where = c[DatetimeIndex(c).month == 5].index + expected = df.iloc[where] + + # locations + result = store.select("df", where=where) + tm.assert_frame_equal(result, expected) + + # boolean + result = store.select("df", where=where) + tm.assert_frame_equal(result, expected) + + # invalid + with pytest.raises(ValueError): + store.select("df", where=np.arange(len(df), dtype="float64")) + + with pytest.raises(ValueError): + store.select("df", where=np.arange(len(df) + 1)) + + with pytest.raises(ValueError): + store.select("df", where=np.arange(len(df)), start=5) + + with pytest.raises(ValueError): + store.select("df", where=np.arange(len(df)), start=5, stop=10) + + # selection with filter + selection = date_range("20000101", periods=500) + result = store.select("df", where="index in selection") + expected = df[df.index.isin(selection)] + tm.assert_frame_equal(result, expected) + + # list + df = DataFrame(np.random.randn(10, 2)) + store.append("df2", df) + result = store.select("df2", where=[0, 3, 5]) + expected = df.iloc[[0, 3, 5]] + tm.assert_frame_equal(result, expected) + + # boolean + where = [True] * 10 + where[-2] = False + result = store.select("df2", where=where) + expected = df.loc[where] + tm.assert_frame_equal(result, expected) + + # start/stop + result = store.select("df2", start=5, stop=10) + expected = df[5:10] + tm.assert_frame_equal(result, expected) + + def test_append_to_multiple(self, setup_path): + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) + df2["foo"] = "bar" + df = concat([df1, df2], axis=1) + + with ensure_clean_store(setup_path) as store: + + # exceptions + with pytest.raises(ValueError): + store.append_to_multiple( + {"df1": ["A", "B"], "df2": None}, df, selector="df3" + ) + + with pytest.raises(ValueError): + store.append_to_multiple({"df1": None, "df2": None}, df, selector="df3") + + with pytest.raises(ValueError): + store.append_to_multiple("df1", df, "df1") + + # regular operation + store.append_to_multiple( + {"df1": ["A", "B"], "df2": None}, df, selector="df1" + ) + result = store.select_as_multiple( + ["df1", "df2"], where=["A>0", "B>0"], selector="df1" + ) + expected = df[(df.A > 0) & (df.B > 0)] + tm.assert_frame_equal(result, expected) + + def test_append_to_multiple_dropna(self, setup_path): + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) + df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan + df = concat([df1, df2], axis=1) + + with ensure_clean_store(setup_path) as store: + + # dropna=True should guarantee rows are synchronized + store.append_to_multiple( + {"df1": ["A", "B"], "df2": None}, df, selector="df1", dropna=True + ) + result = store.select_as_multiple(["df1", "df2"]) + expected = df.dropna() + tm.assert_frame_equal(result, expected) + tm.assert_index_equal(store.select("df1").index, store.select("df2").index) + + @pytest.mark.xfail( + run=False, reason="append_to_multiple_dropna_false is not raising as failed" + ) + def test_append_to_multiple_dropna_false(self, setup_path): + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) + df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan + df = concat([df1, df2], axis=1) + + with ensure_clean_store(setup_path) as store: + + # dropna=False shouldn't synchronize row indexes + store.append_to_multiple( + {"df1a": ["A", "B"], "df2a": None}, df, selector="df1a", dropna=False + ) + + with pytest.raises(ValueError): + store.select_as_multiple(["df1a", "df2a"]) + + assert not store.select("df1a").index.equals(store.select("df2a").index) + + def test_select_as_multiple(self, setup_path): + + df1 = tm.makeTimeDataFrame() + df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format) + df2["foo"] = "bar" + + with ensure_clean_store(setup_path) as store: + + # no tables stored + with pytest.raises(Exception): + store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1") + + store.append("df1", df1, data_columns=["A", "B"]) + store.append("df2", df2) + + # exceptions + with pytest.raises(Exception): + store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1") + + with pytest.raises(Exception): + store.select_as_multiple([None], where=["A>0", "B>0"], selector="df1") + + msg = "'No object named df3 in the file'" + with pytest.raises(KeyError, match=msg): + store.select_as_multiple( + ["df1", "df3"], where=["A>0", "B>0"], selector="df1" + ) + + with pytest.raises(KeyError, match=msg): + store.select_as_multiple(["df3"], where=["A>0", "B>0"], selector="df1") + + with pytest.raises(KeyError, match="'No object named df4 in the file'"): + store.select_as_multiple( + ["df1", "df2"], where=["A>0", "B>0"], selector="df4" + ) + + # default select + result = store.select("df1", ["A>0", "B>0"]) + expected = store.select_as_multiple( + ["df1"], where=["A>0", "B>0"], selector="df1" + ) + tm.assert_frame_equal(result, expected) + expected = store.select_as_multiple( + "df1", where=["A>0", "B>0"], selector="df1" + ) + tm.assert_frame_equal(result, expected) + + # multiple + result = store.select_as_multiple( + ["df1", "df2"], where=["A>0", "B>0"], selector="df1" + ) + expected = concat([df1, df2], axis=1) + expected = expected[(expected.A > 0) & (expected.B > 0)] + tm.assert_frame_equal(result, expected) + + # multiple (diff selector) + result = store.select_as_multiple( + ["df1", "df2"], where="index>df2.index[4]", selector="df2" + ) + expected = concat([df1, df2], axis=1) + expected = expected[5:] + tm.assert_frame_equal(result, expected) + + # test exception for diff rows + store.append("df3", tm.makeTimeDataFrame(nper=50)) + with pytest.raises(ValueError): + store.select_as_multiple( + ["df1", "df3"], where=["A>0", "B>0"], selector="df1" + ) + + @pytest.mark.skipif( + LooseVersion(tables.__version__) < LooseVersion("3.1.0"), + reason=("tables version does not support fix for nan selection bug: GH 4858"), + ) + def test_nan_selection_bug_4858(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + df = DataFrame(dict(cols=range(6), values=range(6)), dtype="float64") + df["cols"] = (df["cols"] + 10).apply(str) + df.iloc[0] = np.nan + + expected = DataFrame( + dict(cols=["13.0", "14.0", "15.0"], values=[3.0, 4.0, 5.0]), + index=[3, 4, 5], + ) + + # write w/o the index on that particular column + store.append("df", df, data_columns=True, index=["cols"]) + result = store.select("df", where="values>2.0") + tm.assert_frame_equal(result, expected) + + def test_start_stop_table(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + # table + df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20))) + store.append("df", df) + + result = store.select("df", "columns=['A']", start=0, stop=5) + expected = df.loc[0:4, ["A"]] + tm.assert_frame_equal(result, expected) + + # out of range + result = store.select("df", "columns=['A']", start=30, stop=40) + assert len(result) == 0 + expected = df.loc[30:40, ["A"]] + tm.assert_frame_equal(result, expected) + + def test_start_stop_multiple(self, setup_path): + + # GH 16209 + with ensure_clean_store(setup_path) as store: + + df = DataFrame({"foo": [1, 2], "bar": [1, 2]}) + + store.append_to_multiple( + {"selector": ["foo"], "data": None}, df, selector="selector" + ) + result = store.select_as_multiple( + ["selector", "data"], selector="selector", start=0, stop=1 + ) + expected = df.loc[[0], ["foo", "bar"]] + tm.assert_frame_equal(result, expected) + + def test_start_stop_fixed(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + # fixed, GH 8287 + df = DataFrame( + dict(A=np.random.rand(20), B=np.random.rand(20)), + index=pd.date_range("20130101", periods=20), + ) + store.put("df", df) + + result = store.select("df", start=0, stop=5) + expected = df.iloc[0:5, :] + tm.assert_frame_equal(result, expected) + + result = store.select("df", start=5, stop=10) + expected = df.iloc[5:10, :] + tm.assert_frame_equal(result, expected) + + # out of range + result = store.select("df", start=30, stop=40) + expected = df.iloc[30:40, :] + tm.assert_frame_equal(result, expected) + + # series + s = df.A + store.put("s", s) + result = store.select("s", start=0, stop=5) + expected = s.iloc[0:5] + tm.assert_series_equal(result, expected) + + result = store.select("s", start=5, stop=10) + expected = s.iloc[5:10] + tm.assert_series_equal(result, expected) + + # sparse; not implemented + df = tm.makeDataFrame() + df.iloc[3:5, 1:3] = np.nan + df.iloc[8:10, -2] = np.nan + + def test_select_filter_corner(self, setup_path): + + df = DataFrame(np.random.randn(50, 100)) + df.index = ["{c:3d}".format(c=c) for c in df.index] + df.columns = ["{c:3d}".format(c=c) for c in df.columns] + + with ensure_clean_store(setup_path) as store: + store.put("frame", df, format="table") + + crit = "columns=df.columns[:75]" + result = store.select("frame", [crit]) + tm.assert_frame_equal(result, df.loc[:, df.columns[:75]]) + + crit = "columns=df.columns[:75:2]" + result = store.select("frame", [crit]) + tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]]) + + def test_path_pathlib(self, setup_path): + df = tm.makeDataFrame() + + result = tm.round_trip_pathlib( + lambda p: df.to_hdf(p, "df"), lambda p: pd.read_hdf(p, "df") + ) + tm.assert_frame_equal(df, result) + + @pytest.mark.parametrize("start, stop", [(0, 2), (1, 2), (None, None)]) + def test_contiguous_mixed_data_table(self, start, stop, setup_path): + # GH 17021 + # ValueError when reading a contiguous mixed-data table ft. VLArray + df = DataFrame( + { + "a": Series([20111010, 20111011, 20111012]), + "b": Series(["ab", "cd", "ab"]), + } + ) + + with ensure_clean_store(setup_path) as store: + store.append("test_dataset", df) + + result = store.select("test_dataset", start=start, stop=stop) + tm.assert_frame_equal(df[start:stop], result) + + def test_path_pathlib_hdfstore(self, setup_path): + df = tm.makeDataFrame() + + def writer(path): + with pd.HDFStore(path) as store: + df.to_hdf(store, "df") + + def reader(path): + with pd.HDFStore(path) as store: + return pd.read_hdf(store, "df") + + result = tm.round_trip_pathlib(writer, reader) + tm.assert_frame_equal(df, result) + + def test_pickle_path_localpath(self, setup_path): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib( + lambda p: df.to_hdf(p, "df"), lambda p: pd.read_hdf(p, "df") + ) + tm.assert_frame_equal(df, result) + + def test_path_localpath_hdfstore(self, setup_path): + df = tm.makeDataFrame() + + def writer(path): + with pd.HDFStore(path) as store: + df.to_hdf(store, "df") + + def reader(path): + with pd.HDFStore(path) as store: + return pd.read_hdf(store, "df") + + result = tm.round_trip_localpath(writer, reader) + tm.assert_frame_equal(df, result) + + def _check_roundtrip(self, obj, comparator, path, compression=False, **kwargs): + + options = {} + if compression: + options["complib"] = _default_compressor + + with ensure_clean_store(path, "w", **options) as store: + store["obj"] = obj + retrieved = store["obj"] + comparator(retrieved, obj, **kwargs) + + def _check_double_roundtrip( + self, obj, comparator, path, compression=False, **kwargs + ): + options = {} + if compression: + options["complib"] = compression or _default_compressor + + with ensure_clean_store(path, "w", **options) as store: + store["obj"] = obj + retrieved = store["obj"] + comparator(retrieved, obj, **kwargs) + store["obj"] = retrieved + again = store["obj"] + comparator(again, obj, **kwargs) + + def _check_roundtrip_table(self, obj, comparator, path, compression=False): + options = {} + if compression: + options["complib"] = _default_compressor + + with ensure_clean_store(path, "w", **options) as store: + store.put("obj", obj, format="table") + retrieved = store["obj"] + + comparator(retrieved, obj) + + def test_multiple_open_close(self, setup_path): + # gh-4409: open & close multiple times + + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + df.to_hdf(path, "df", mode="w", format="table") + + # single + store = HDFStore(path) + assert "CLOSED" not in store.info() + assert store.is_open + + store.close() + assert "CLOSED" in store.info() + assert not store.is_open + + with ensure_clean_path(setup_path) as path: + + if pytables._table_file_open_policy_is_strict: + + # multiples + store1 = HDFStore(path) + + with pytest.raises(ValueError): + HDFStore(path) + + store1.close() + else: + + # multiples + store1 = HDFStore(path) + store2 = HDFStore(path) + + assert "CLOSED" not in store1.info() + assert "CLOSED" not in store2.info() + assert store1.is_open + assert store2.is_open + + store1.close() + assert "CLOSED" in store1.info() + assert not store1.is_open + assert "CLOSED" not in store2.info() + assert store2.is_open + + store2.close() + assert "CLOSED" in store1.info() + assert "CLOSED" in store2.info() + assert not store1.is_open + assert not store2.is_open + + # nested close + store = HDFStore(path, mode="w") + store.append("df", df) + + store2 = HDFStore(path) + store2.append("df2", df) + store2.close() + assert "CLOSED" in store2.info() + assert not store2.is_open + + store.close() + assert "CLOSED" in store.info() + assert not store.is_open + + # double closing + store = HDFStore(path, mode="w") + store.append("df", df) + + store2 = HDFStore(path) + store.close() + assert "CLOSED" in store.info() + assert not store.is_open + + store2.close() + assert "CLOSED" in store2.info() + assert not store2.is_open + + # ops on a closed store + with ensure_clean_path(setup_path) as path: + + df = tm.makeDataFrame() + df.to_hdf(path, "df", mode="w", format="table") + + store = HDFStore(path) + store.close() + + with pytest.raises(ClosedFileError): + store.keys() + + with pytest.raises(ClosedFileError): + "df" in store + + with pytest.raises(ClosedFileError): + len(store) + + with pytest.raises(ClosedFileError): + store["df"] + + with pytest.raises(AttributeError): + store.df + + with pytest.raises(ClosedFileError): + store.select("df") + + with pytest.raises(ClosedFileError): + store.get("df") + + with pytest.raises(ClosedFileError): + store.append("df2", df) + + with pytest.raises(ClosedFileError): + store.put("df3", df) + + with pytest.raises(ClosedFileError): + store.get_storer("df2") + + with pytest.raises(ClosedFileError): + store.remove("df2") + + with pytest.raises(ClosedFileError, match="file is not open"): + store.select("df") + + def test_pytables_native_read(self, datapath, setup_path): + with ensure_clean_store( + datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r" + ) as store: + d2 = store["detector/readout"] + assert isinstance(d2, DataFrame) + + @pytest.mark.skipif( + is_platform_windows(), reason="native2 read fails oddly on windows" + ) + def test_pytables_native2_read(self, datapath, setup_path): + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "pytables_native2.h5"), mode="r" + ) as store: + str(store) + d1 = store["detector"] + assert isinstance(d1, DataFrame) + + @td.xfail_non_writeable + def test_legacy_table_fixed_format_read_py2(self, datapath, setup_path): + # GH 24510 + # legacy table with fixed format written in Python 2 + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "legacy_table_fixed_py2.h5"), mode="r" + ) as store: + result = store.select("df") + expected = pd.DataFrame( + [[1, 2, 3, "D"]], + columns=["A", "B", "C", "D"], + index=pd.Index(["ABC"], name="INDEX_NAME"), + ) + tm.assert_frame_equal(expected, result) + + def test_legacy_table_read_py2(self, datapath, setup_path): + # issue: 24925 + # legacy table written in Python 2 + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "legacy_table_py2.h5"), mode="r" + ) as store: + result = store.select("table") + + expected = pd.DataFrame({"a": ["a", "b"], "b": [2, 3]}) + tm.assert_frame_equal(expected, result) + + def test_copy(self, setup_path): + + with catch_warnings(record=True): + + def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs): + try: + store = HDFStore(f, "r") + + if new_f is None: + import tempfile + + fd, new_f = tempfile.mkstemp() + + tstore = store.copy( + new_f, keys=keys, propindexes=propindexes, **kwargs + ) + + # check keys + if keys is None: + keys = store.keys() + assert set(keys) == set(tstore.keys()) + + # check indices & nrows + for k in tstore.keys(): + if tstore.get_storer(k).is_table: + new_t = tstore.get_storer(k) + orig_t = store.get_storer(k) + + assert orig_t.nrows == new_t.nrows + + # check propindixes + if propindexes: + for a in orig_t.axes: + if a.is_indexed: + assert new_t[a.name].is_indexed + + finally: + safe_close(store) + safe_close(tstore) + try: + os.close(fd) + except (OSError, ValueError): + pass + safe_remove(new_f) + + # new table + df = tm.makeDataFrame() + + try: + path = create_tempfile(setup_path) + st = HDFStore(path) + st.append("df", df, data_columns=["A"]) + st.close() + do_copy(f=path) + do_copy(f=path, propindexes=False) + finally: + safe_remove(path) + + def test_store_datetime_fractional_secs(self, setup_path): + + with ensure_clean_store(setup_path) as store: + dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456) + series = Series([0], [dt]) + store["a"] = series + assert store["a"].index[0] == dt + + def test_tseries_indices_series(self, setup_path): + + with ensure_clean_store(setup_path) as store: + idx = tm.makeDateIndex(10) + ser = Series(np.random.randn(len(idx)), idx) + store["a"] = ser + result = store["a"] + + tm.assert_series_equal(result, ser) + assert result.index.freq == ser.index.freq + tm.assert_class_equal(result.index, ser.index, obj="series index") + + idx = tm.makePeriodIndex(10) + ser = Series(np.random.randn(len(idx)), idx) + store["a"] = ser + result = store["a"] + + tm.assert_series_equal(result, ser) + assert result.index.freq == ser.index.freq + tm.assert_class_equal(result.index, ser.index, obj="series index") + + def test_tseries_indices_frame(self, setup_path): + + with ensure_clean_store(setup_path) as store: + idx = tm.makeDateIndex(10) + df = DataFrame(np.random.randn(len(idx), 3), index=idx) + store["a"] = df + result = store["a"] + + tm.assert_frame_equal(result, df) + assert result.index.freq == df.index.freq + tm.assert_class_equal(result.index, df.index, obj="dataframe index") + + idx = tm.makePeriodIndex(10) + df = DataFrame(np.random.randn(len(idx), 3), idx) + store["a"] = df + result = store["a"] + + tm.assert_frame_equal(result, df) + assert result.index.freq == df.index.freq + tm.assert_class_equal(result.index, df.index, obj="dataframe index") + + def test_unicode_index(self, setup_path): + + unicode_values = ["\u03c3", "\u03c3\u03c3"] + + # PerformanceWarning + with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) + s = Series(np.random.randn(len(unicode_values)), unicode_values) + self._check_roundtrip(s, tm.assert_series_equal, path=setup_path) + + def test_unicode_longer_encoded(self, setup_path): + # GH 11234 + char = "\u0394" + df = pd.DataFrame({"A": [char]}) + with ensure_clean_store(setup_path) as store: + store.put("df", df, format="table", encoding="utf-8") + result = store.get("df") + tm.assert_frame_equal(result, df) + + df = pd.DataFrame({"A": ["a", char], "B": ["b", "b"]}) + with ensure_clean_store(setup_path) as store: + store.put("df", df, format="table", encoding="utf-8") + result = store.get("df") + tm.assert_frame_equal(result, df) + + @td.xfail_non_writeable + def test_store_datetime_mixed(self, setup_path): + + df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]}) + ts = tm.makeTimeSeries() + df["d"] = ts.index[:3] + self._check_roundtrip(df, tm.assert_frame_equal, path=setup_path) + + # FIXME: don't leave commented-out code + # def test_cant_write_multiindex_table(self): + # # for now, #1848 + # df = DataFrame(np.random.randn(10, 4), + # index=[np.arange(5).repeat(2), + # np.tile(np.arange(2), 5)]) + # + # with pytest.raises(Exception): + # store.put('foo', df, format='table') + + def test_append_with_diff_col_name_types_raises_value_error(self, setup_path): + df = DataFrame(np.random.randn(10, 1)) + df2 = DataFrame({"a": np.random.randn(10)}) + df3 = DataFrame({(1, 2): np.random.randn(10)}) + df4 = DataFrame({("1", 2): np.random.randn(10)}) + df5 = DataFrame({("1", 2, object): np.random.randn(10)}) + + with ensure_clean_store(setup_path) as store: + name = "df_{}".format(tm.rands(10)) + store.append(name, df) + + for d in (df2, df3, df4, df5): + with pytest.raises(ValueError): + store.append(name, d) + + def test_query_with_nested_special_character(self, setup_path): + df = DataFrame( + { + "a": ["a", "a", "c", "b", "test & test", "c", "b", "e"], + "b": [1, 2, 3, 4, 5, 6, 7, 8], + } + ) + expected = df[df.a == "test & test"] + with ensure_clean_store(setup_path) as store: + store.append("test", df, format="table", data_columns=True) + result = store.select("test", 'a = "test & test"') + tm.assert_frame_equal(expected, result) + + def test_categorical(self, setup_path): + + with ensure_clean_store(setup_path) as store: + + # Basic + _maybe_remove(store, "s") + s = Series( + Categorical( + ["a", "b", "b", "a", "a", "c"], + categories=["a", "b", "c", "d"], + ordered=False, + ) + ) + store.append("s", s, format="table") + result = store.select("s") + tm.assert_series_equal(s, result) + + _maybe_remove(store, "s_ordered") + s = Series( + Categorical( + ["a", "b", "b", "a", "a", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ) + ) + store.append("s_ordered", s, format="table") + result = store.select("s_ordered") + tm.assert_series_equal(s, result) + + _maybe_remove(store, "df") + df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]}) + store.append("df", df, format="table") + result = store.select("df") + tm.assert_frame_equal(result, df) + + # Dtypes + _maybe_remove(store, "si") + s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category") + store.append("si", s) + result = store.select("si") + tm.assert_series_equal(result, s) + + _maybe_remove(store, "si2") + s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category") + store.append("si2", s) + result = store.select("si2") + tm.assert_series_equal(result, s) + + # Multiple + _maybe_remove(store, "df2") + df2 = df.copy() + df2["s2"] = Series(list("abcdefg")).astype("category") + store.append("df2", df2) + result = store.select("df2") + tm.assert_frame_equal(result, df2) + + # Make sure the metadata is OK + info = store.info() + assert "/df2 " in info + # assert '/df2/meta/values_block_0/meta' in info + assert "/df2/meta/values_block_1/meta" in info + + # unordered + _maybe_remove(store, "s2") + s = Series( + Categorical( + ["a", "b", "b", "a", "a", "c"], + categories=["a", "b", "c", "d"], + ordered=False, + ) + ) + store.append("s2", s, format="table") + result = store.select("s2") + tm.assert_series_equal(result, s) + + # Query + _maybe_remove(store, "df3") + store.append("df3", df, data_columns=["s"]) + expected = df[df.s.isin(["b", "c"])] + result = store.select("df3", where=['s in ["b","c"]']) + tm.assert_frame_equal(result, expected) + + expected = df[df.s.isin(["b", "c"])] + result = store.select("df3", where=['s = ["b","c"]']) + tm.assert_frame_equal(result, expected) + + expected = df[df.s.isin(["d"])] + result = store.select("df3", where=['s in ["d"]']) + tm.assert_frame_equal(result, expected) + + expected = df[df.s.isin(["f"])] + result = store.select("df3", where=['s in ["f"]']) + tm.assert_frame_equal(result, expected) + + # Appending with same categories is ok + store.append("df3", df) + + df = concat([df, df]) + expected = df[df.s.isin(["b", "c"])] + result = store.select("df3", where=['s in ["b","c"]']) + tm.assert_frame_equal(result, expected) + + # Appending must have the same categories + df3 = df.copy() + df3["s"].cat.remove_unused_categories(inplace=True) + + with pytest.raises(ValueError): + store.append("df3", df3) + + # Remove, and make sure meta data is removed (its a recursive + # removal so should be). + result = store.select("df3/meta/s/meta") + assert result is not None + store.remove("df3") + + with pytest.raises( + KeyError, match="'No object named df3/meta/s/meta in the file'" + ): + store.select("df3/meta/s/meta") + + def test_categorical_conversion(self, setup_path): + + # GH13322 + # Check that read_hdf with categorical columns doesn't return rows if + # where criteria isn't met. + obsids = ["ESP_012345_6789", "ESP_987654_3210"] + imgids = ["APF00006np", "APF0001imm"] + data = [4.3, 9.8] + + # Test without categories + df = DataFrame(dict(obsids=obsids, imgids=imgids, data=data)) + + # We are expecting an empty DataFrame matching types of df + expected = df.iloc[[], :] + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table", data_columns=True) + result = read_hdf(path, "df", where="obsids=B") + tm.assert_frame_equal(result, expected) + + # Test with categories + df.obsids = df.obsids.astype("category") + df.imgids = df.imgids.astype("category") + + # We are expecting an empty DataFrame matching types of df + expected = df.iloc[[], :] + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table", data_columns=True) + result = read_hdf(path, "df", where="obsids=B") + tm.assert_frame_equal(result, expected) + + def test_categorical_nan_only_columns(self, setup_path): + # GH18413 + # Check that read_hdf with categorical columns with NaN-only values can + # be read back. + df = pd.DataFrame( + { + "a": ["a", "b", "c", np.nan], + "b": [np.nan, np.nan, np.nan, np.nan], + "c": [1, 2, 3, 4], + "d": pd.Series([None] * 4, dtype=object), + } + ) + df["a"] = df.a.astype("category") + df["b"] = df.b.astype("category") + df["d"] = df.b.astype("category") + expected = df + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table", data_columns=True) + result = read_hdf(path, "df") + tm.assert_frame_equal(result, expected) + + def test_duplicate_column_name(self, setup_path): + df = DataFrame(columns=["a", "a"], data=[[0, 0]]) + + with ensure_clean_path(setup_path) as path: + with pytest.raises(ValueError): + df.to_hdf(path, "df", format="fixed") + + df.to_hdf(path, "df", format="table") + other = read_hdf(path, "df") + + tm.assert_frame_equal(df, other) + assert df.equals(other) + assert other.equals(df) + + def test_round_trip_equals(self, setup_path): + # GH 9330 + df = DataFrame({"B": [1, 2], "A": ["x", "y"]}) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", format="table") + other = read_hdf(path, "df") + tm.assert_frame_equal(df, other) + assert df.equals(other) + assert other.equals(df) + + def test_preserve_timedeltaindex_type(self, setup_path): + # GH9635 + # Storing TimedeltaIndexed DataFrames in fixed stores did not preserve + # the type of the index. + df = DataFrame(np.random.normal(size=(10, 5))) + df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example") + + with ensure_clean_store(setup_path) as store: + + store["df"] = df + tm.assert_frame_equal(store["df"], df) + + def test_columns_multiindex_modified(self, setup_path): + # BUG: 7212 + # read_hdf store.select modified the passed columns parameters + # when multi-indexed. + + df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + df.index.name = "letters" + df = df.set_index(keys="E", append=True) + + data_columns = df.index.names + df.columns.tolist() + with ensure_clean_path(setup_path) as path: + df.to_hdf( + path, + "df", + mode="a", + append=True, + data_columns=data_columns, + index=False, + ) + cols2load = list("BCD") + cols2load_original = list(cols2load) + df_loaded = read_hdf(path, "df", columns=cols2load) # noqa + assert cols2load_original == cols2load + + @ignore_natural_naming_warning + def test_to_hdf_with_object_column_names(self, setup_path): + # GH9057 + # Writing HDF5 table format should only work for string-like + # column types + + types_should_fail = [ + tm.makeIntIndex, + tm.makeFloatIndex, + tm.makeDateIndex, + tm.makeTimedeltaIndex, + tm.makePeriodIndex, + ] + types_should_run = [ + tm.makeStringIndex, + tm.makeCategoricalIndex, + tm.makeUnicodeIndex, + ] + + for index in types_should_fail: + df = DataFrame(np.random.randn(10, 2), columns=index(2)) + with ensure_clean_path(setup_path) as path: + with catch_warnings(record=True): + msg = "cannot have non-object label DataIndexableCol" + with pytest.raises(ValueError, match=msg): + df.to_hdf(path, "df", format="table", data_columns=True) + + for index in types_should_run: + df = DataFrame(np.random.randn(10, 2), columns=index(2)) + with ensure_clean_path(setup_path) as path: + with catch_warnings(record=True): + df.to_hdf(path, "df", format="table", data_columns=True) + result = pd.read_hdf( + path, "df", where="index = [{0}]".format(df.index[0]) + ) + assert len(result) + + def test_read_hdf_open_store(self, setup_path): + # GH10330 + # No check for non-string path_or-buf, and no test of open store + df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + df.index.name = "letters" + df = df.set_index(keys="E", append=True) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", mode="w") + direct = read_hdf(path, "df") + store = HDFStore(path, mode="r") + indirect = read_hdf(store, "df") + tm.assert_frame_equal(direct, indirect) + assert store.is_open + store.close() + + def test_read_hdf_iterator(self, setup_path): + df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + df.index.name = "letters" + df = df.set_index(keys="E", append=True) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", mode="w", format="t") + direct = read_hdf(path, "df") + iterator = read_hdf(path, "df", iterator=True) + assert isinstance(iterator, TableIterator) + indirect = next(iterator.__iter__()) + tm.assert_frame_equal(direct, indirect) + iterator.store.close() + + def test_read_hdf_errors(self, setup_path): + df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + + with ensure_clean_path(setup_path) as path: + with pytest.raises(IOError): + read_hdf(path, "key") + + df.to_hdf(path, "df") + store = HDFStore(path, mode="r") + store.close() + + with pytest.raises(IOError): + read_hdf(store, "df") + + def test_read_hdf_generic_buffer_errors(self): + with pytest.raises(NotImplementedError): + read_hdf(BytesIO(b""), "df") + + def test_invalid_complib(self, setup_path): + df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + with ensure_clean_path(setup_path) as path: + with pytest.raises(ValueError): + df.to_hdf(path, "df", complib="foolib") + + # GH10443 + + def test_read_nokey(self, setup_path): + df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")) + + # Categorical dtype not supported for "fixed" format. So no need + # to test with that dtype in the dataframe here. + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", mode="a") + reread = read_hdf(path) + tm.assert_frame_equal(df, reread) + df.to_hdf(path, "df2", mode="a") + + with pytest.raises(ValueError): + read_hdf(path) + + def test_read_nokey_table(self, setup_path): + # GH13231 + df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")}) + + with ensure_clean_path(setup_path) as path: + df.to_hdf(path, "df", mode="a", format="table") + reread = read_hdf(path) + tm.assert_frame_equal(df, reread) + df.to_hdf(path, "df2", mode="a", format="table") + + with pytest.raises(ValueError): + read_hdf(path) + + def test_read_nokey_empty(self, setup_path): + with ensure_clean_path(setup_path) as path: + store = HDFStore(path) + store.close() + + with pytest.raises(ValueError): + read_hdf(path) + + def test_read_from_pathlib_path(self, setup_path): + + # GH11773 + expected = DataFrame( + np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE") + ) + with ensure_clean_path(setup_path) as filename: + path_obj = Path(filename) + + expected.to_hdf(path_obj, "df", mode="a") + actual = read_hdf(path_obj, "df") + + tm.assert_frame_equal(expected, actual) + + @td.skip_if_no("py.path") + def test_read_from_py_localpath(self, setup_path): + + # GH11773 + from py.path import local as LocalPath + + expected = DataFrame( + np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE") + ) + with ensure_clean_path(setup_path) as filename: + path_obj = LocalPath(filename) + + expected.to_hdf(path_obj, "df", mode="a") + actual = read_hdf(path_obj, "df") + + tm.assert_frame_equal(expected, actual) + + def test_query_long_float_literal(self, setup_path): + # GH 14241 + df = pd.DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) + + with ensure_clean_store(setup_path) as store: + store.append("test", df, format="table", data_columns=True) + + cutoff = 1000000000.0006 + result = store.select("test", "A < {cutoff:.4f}".format(cutoff=cutoff)) + assert result.empty + + cutoff = 1000000000.0010 + result = store.select("test", "A > {cutoff:.4f}".format(cutoff=cutoff)) + expected = df.loc[[1, 2], :] + tm.assert_frame_equal(expected, result) + + exact = 1000000000.0011 + result = store.select("test", "A == {exact:.4f}".format(exact=exact)) + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + + def test_query_compare_column_type(self, setup_path): + # GH 15492 + df = pd.DataFrame( + { + "date": ["2014-01-01", "2014-01-02"], + "real_date": date_range("2014-01-01", periods=2), + "float": [1.1, 1.2], + "int": [1, 2], + }, + columns=["date", "real_date", "float", "int"], + ) + + with ensure_clean_store(setup_path) as store: + store.append("test", df, format="table", data_columns=True) + + ts = pd.Timestamp("2014-01-01") # noqa + result = store.select("test", where="real_date > ts") + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + + for op in ["<", ">", "=="]: + # non strings to string column always fail + for v in [2.1, True, pd.Timestamp("2014-01-01"), pd.Timedelta(1, "s")]: + query = "date {op} v".format(op=op) + with pytest.raises(TypeError): + store.select("test", where=query) + + # strings to other columns must be convertible to type + v = "a" + for col in ["int", "float", "real_date"]: + query = "{col} {op} v".format(op=op, col=col) + with pytest.raises(ValueError): + store.select("test", where=query) + + for v, col in zip( + ["1", "1.1", "2014-01-01"], ["int", "float", "real_date"] + ): + query = "{col} {op} v".format(op=op, col=col) + result = store.select("test", where=query) + + if op == "==": + expected = df.loc[[0], :] + elif op == ">": + expected = df.loc[[1], :] + else: + expected = df.loc[[], :] + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize("format", ["fixed", "table"]) + def test_read_hdf_series_mode_r(self, format, setup_path): + # GH 16583 + # Tests that reading a Series saved to an HDF file + # still works if a mode='r' argument is supplied + series = tm.makeFloatSeries() + with ensure_clean_path(setup_path) as path: + series.to_hdf(path, key="data", format=format) + result = pd.read_hdf(path, key="data", mode="r") + tm.assert_series_equal(result, series) + + def test_fspath(self): + with tm.ensure_clean("foo.h5") as path: + with pd.HDFStore(path) as store: + assert os.fspath(store) == str(path) + + def test_read_py2_hdf_file_in_py3(self, datapath): + # GH 16781 + + # tests reading a PeriodIndex DataFrame written in Python2 in Python3 + + # the file was generated in Python 2.7 like so: + # + # df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex( + # ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B')) + # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p') + + expected = pd.DataFrame( + [1.0, 2, 3], + index=pd.PeriodIndex(["2015-01-01", "2015-01-02", "2015-01-05"], freq="B"), + ) + + with ensure_clean_store( + datapath( + "io", "data", "legacy_hdf", "periodindex_0.20.1_x86_64_darwin_2.7.13.h5" + ), + mode="r", + ) as store: + result = store["p"] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("where", ["", (), (None,), [], [None]]) + def test_select_empty_where(self, where): + # GH26610 + + # Using keyword `where` as '' or (), or [None], etc + # while reading from HDF store raises + # "SyntaxError: only a single expression is allowed" + + df = pd.DataFrame([1, 2, 3]) + with ensure_clean_path("empty_where.h5") as path: + with pd.HDFStore(path) as store: + store.put("df", df, "t") + result = pd.read_hdf(store, "df", where=where) + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize( + "idx", + [ + date_range("2019", freq="D", periods=3, tz="UTC"), + CategoricalIndex(list("abc")), + ], + ) + def test_to_hdf_multiindex_extension_dtype(self, idx, setup_path): + # GH 7775 + mi = MultiIndex.from_arrays([idx, idx]) + df = pd.DataFrame(0, index=mi, columns=["a"]) + with ensure_clean_path(setup_path) as path: + with pytest.raises(NotImplementedError, match="Saving a MultiIndex"): + df.to_hdf(path, "df") diff --git a/pandas/tests/io/pytables/test_timezones.py b/pandas/tests/io/pytables/test_timezones.py new file mode 100644 index 00000000..2bf22d98 --- /dev/null +++ b/pandas/tests/io/pytables/test_timezones.py @@ -0,0 +1,386 @@ +import datetime + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, DatetimeIndex, Series, Timestamp, date_range +import pandas._testing as tm +from pandas.tests.io.pytables.common import ( + _maybe_remove, + ensure_clean_path, + ensure_clean_store, +) + + +def _compare_with_tz(a, b): + tm.assert_frame_equal(a, b) + + # compare the zones on each element + for c in a.columns: + for i in a.index: + a_e = a.loc[i, c] + b_e = b.loc[i, c] + if not (a_e == b_e and a_e.tz == b_e.tz): + raise AssertionError( + "invalid tz comparison [{a_e}] [{b_e}]".format(a_e=a_e, b_e=b_e) + ) + + +def test_append_with_timezones_dateutil(setup_path): + + from datetime import timedelta + + # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows + # filename issues. + from pandas._libs.tslibs.timezones import maybe_get_tz + + gettz = lambda x: maybe_get_tz("dateutil/" + x) + + # as columns + with ensure_clean_store(setup_path) as store: + + _maybe_remove(store, "df_tz") + df = DataFrame( + dict( + A=[ + Timestamp("20130102 2:00:00", tz=gettz("US/Eastern")) + + timedelta(hours=1) * i + for i in range(5) + ] + ) + ) + + store.append("df_tz", df, data_columns=["A"]) + result = store["df_tz"] + _compare_with_tz(result, df) + tm.assert_frame_equal(result, df) + + # select with tz aware + expected = df[df.A >= df.A[3]] + result = store.select("df_tz", where="A>=df.A[3]") + _compare_with_tz(result, expected) + + # ensure we include dates in DST and STD time here. + _maybe_remove(store, "df_tz") + df = DataFrame( + dict( + A=Timestamp("20130102", tz=gettz("US/Eastern")), + B=Timestamp("20130603", tz=gettz("US/Eastern")), + ), + index=range(5), + ) + store.append("df_tz", df) + result = store["df_tz"] + _compare_with_tz(result, df) + tm.assert_frame_equal(result, df) + + df = DataFrame( + dict( + A=Timestamp("20130102", tz=gettz("US/Eastern")), + B=Timestamp("20130102", tz=gettz("EET")), + ), + index=range(5), + ) + with pytest.raises(ValueError): + store.append("df_tz", df) + + # this is ok + _maybe_remove(store, "df_tz") + store.append("df_tz", df, data_columns=["A", "B"]) + result = store["df_tz"] + _compare_with_tz(result, df) + tm.assert_frame_equal(result, df) + + # can't append with diff timezone + df = DataFrame( + dict( + A=Timestamp("20130102", tz=gettz("US/Eastern")), + B=Timestamp("20130102", tz=gettz("CET")), + ), + index=range(5), + ) + with pytest.raises(ValueError): + store.append("df_tz", df) + + # as index + with ensure_clean_store(setup_path) as store: + + # GH 4098 example + df = DataFrame( + dict( + A=Series( + range(3), + index=date_range( + "2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern") + ), + ) + ) + ) + + _maybe_remove(store, "df") + store.put("df", df) + result = store.select("df") + tm.assert_frame_equal(result, df) + + _maybe_remove(store, "df") + store.append("df", df) + result = store.select("df") + tm.assert_frame_equal(result, df) + + +def test_append_with_timezones_pytz(setup_path): + + from datetime import timedelta + + # as columns + with ensure_clean_store(setup_path) as store: + + _maybe_remove(store, "df_tz") + df = DataFrame( + dict( + A=[ + Timestamp("20130102 2:00:00", tz="US/Eastern") + + timedelta(hours=1) * i + for i in range(5) + ] + ) + ) + store.append("df_tz", df, data_columns=["A"]) + result = store["df_tz"] + _compare_with_tz(result, df) + tm.assert_frame_equal(result, df) + + # select with tz aware + _compare_with_tz(store.select("df_tz", where="A>=df.A[3]"), df[df.A >= df.A[3]]) + + _maybe_remove(store, "df_tz") + # ensure we include dates in DST and STD time here. + df = DataFrame( + dict( + A=Timestamp("20130102", tz="US/Eastern"), + B=Timestamp("20130603", tz="US/Eastern"), + ), + index=range(5), + ) + store.append("df_tz", df) + result = store["df_tz"] + _compare_with_tz(result, df) + tm.assert_frame_equal(result, df) + + df = DataFrame( + dict( + A=Timestamp("20130102", tz="US/Eastern"), + B=Timestamp("20130102", tz="EET"), + ), + index=range(5), + ) + with pytest.raises(ValueError): + store.append("df_tz", df) + + # this is ok + _maybe_remove(store, "df_tz") + store.append("df_tz", df, data_columns=["A", "B"]) + result = store["df_tz"] + _compare_with_tz(result, df) + tm.assert_frame_equal(result, df) + + # can't append with diff timezone + df = DataFrame( + dict( + A=Timestamp("20130102", tz="US/Eastern"), + B=Timestamp("20130102", tz="CET"), + ), + index=range(5), + ) + with pytest.raises(ValueError): + store.append("df_tz", df) + + # as index + with ensure_clean_store(setup_path) as store: + + # GH 4098 example + df = DataFrame( + dict( + A=Series( + range(3), + index=date_range("2000-1-1", periods=3, freq="H", tz="US/Eastern"), + ) + ) + ) + + _maybe_remove(store, "df") + store.put("df", df) + result = store.select("df") + tm.assert_frame_equal(result, df) + + _maybe_remove(store, "df") + store.append("df", df) + result = store.select("df") + tm.assert_frame_equal(result, df) + + +def test_tseries_select_index_column(setup_path): + # GH7777 + # selecting a UTC datetimeindex column did + # not preserve UTC tzinfo set before storing + + # check that no tz still works + rng = date_range("1/1/2000", "1/30/2000") + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(setup_path) as store: + store.append("frame", frame) + result = store.select_column("frame", "index") + assert rng.tz == DatetimeIndex(result.values).tz + + # check utc + rng = date_range("1/1/2000", "1/30/2000", tz="UTC") + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(setup_path) as store: + store.append("frame", frame) + result = store.select_column("frame", "index") + assert rng.tz == result.dt.tz + + # double check non-utc + rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(setup_path) as store: + store.append("frame", frame) + result = store.select_column("frame", "index") + assert rng.tz == result.dt.tz + + +def test_timezones_fixed(setup_path): + with ensure_clean_store(setup_path) as store: + + # index + rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern") + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + store["df"] = df + result = store["df"] + tm.assert_frame_equal(result, df) + + # as data + # GH11411 + _maybe_remove(store, "df") + df = DataFrame( + { + "A": rng, + "B": rng.tz_convert("UTC").tz_localize(None), + "C": rng.tz_convert("CET"), + "D": range(len(rng)), + }, + index=rng, + ) + store["df"] = df + result = store["df"] + tm.assert_frame_equal(result, df) + + +def test_fixed_offset_tz(setup_path): + rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00") + frame = DataFrame(np.random.randn(len(rng), 4), index=rng) + + with ensure_clean_store(setup_path) as store: + store["frame"] = frame + recons = store["frame"] + tm.assert_index_equal(recons.index, rng) + assert rng.tz == recons.index.tz + + +@td.skip_if_windows +def test_store_timezone(setup_path): + # GH2852 + # issue storing datetime.date with a timezone as it resets when read + # back in a new timezone + + # original method + with ensure_clean_store(setup_path) as store: + + today = datetime.date(2013, 9, 10) + df = DataFrame([1, 2, 3], index=[today, today, today]) + store["obj1"] = df + result = store["obj1"] + tm.assert_frame_equal(result, df) + + # with tz setting + with ensure_clean_store(setup_path) as store: + + with tm.set_timezone("EST5EDT"): + today = datetime.date(2013, 9, 10) + df = DataFrame([1, 2, 3], index=[today, today, today]) + store["obj1"] = df + + with tm.set_timezone("CST6CDT"): + result = store["obj1"] + + tm.assert_frame_equal(result, df) + + +def test_legacy_datetimetz_object(datapath, setup_path): + # legacy from < 0.17.0 + # 8260 + expected = DataFrame( + dict( + A=Timestamp("20130102", tz="US/Eastern"), B=Timestamp("20130603", tz="CET") + ), + index=range(5), + ) + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r" + ) as store: + result = store["df"] + tm.assert_frame_equal(result, expected) + + +def test_dst_transitions(setup_path): + # make sure we are not failing on transitions + with ensure_clean_store(setup_path) as store: + times = pd.date_range( + "2013-10-26 23:00", + "2013-10-27 01:00", + tz="Europe/London", + freq="H", + ambiguous="infer", + ) + + for i in [times, times + pd.Timedelta("10min")]: + _maybe_remove(store, "df") + df = DataFrame({"A": range(len(i)), "B": i}, index=i) + store.append("df", df) + result = store.select("df") + tm.assert_frame_equal(result, df) + + +def test_read_with_where_tz_aware_index(setup_path): + # GH 11926 + periods = 10 + dts = pd.date_range("20151201", periods=periods, freq="D", tz="UTC") + mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"]) + expected = pd.DataFrame({"MYCOL": 0}, index=mi) + + key = "mykey" + with ensure_clean_path(setup_path) as path: + with pd.HDFStore(path) as store: + store.append(key, expected, format="table", append=True) + result = pd.read_hdf(path, key, where="DATE > 20151130") + tm.assert_frame_equal(result, expected) + + +def test_py2_created_with_datetimez(datapath, setup_path): + # The test HDF5 file was created in Python 2, but could not be read in + # Python 3. + # + # GH26443 + index = [pd.Timestamp("2019-01-01T18:00").tz_localize("America/New_York")] + expected = DataFrame({"data": 123}, index=index) + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r" + ) as store: + result = store["key"] + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/sas/__init__.py b/pandas/tests/io/sas/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/io/sas/data/DEMO_G.csv b/pandas/tests/io/sas/data/DEMO_G.csv new file mode 100644 index 00000000..db2158a5 --- /dev/null +++ b/pandas/tests/io/sas/data/DEMO_G.csv @@ -0,0 +1,9757 @@ +"SEQN","SDDSRVYR","RIDSTATR","RIAGENDR","RIDAGEYR","RIDAGEMN","RIDRETH1","RIDRETH3","RIDEXMON","RIDEXAGY","RIDEXAGM","DMQMILIZ","DMQADFC","DMDBORN4","DMDCITZN","DMDYRSUS","DMDEDUC3","DMDEDUC2","DMDMARTL","RIDEXPRG","SIALANG","SIAPROXY","SIAINTRP","FIALANG","FIAPROXY","FIAINTRP","MIALANG","MIAPROXY","MIAINTRP","AIALANGA","WTINT2YR","WTMEC2YR","SDMVPSU","SDMVSTRA","INDHHIN2","INDFMIN2","INDFMPIR","DMDHHSIZ","DMDFMSIZ","DMDHHSZA","DMDHHSZB","DMDHHSZE","DMDHRGND","DMDHRAGE","DMDHRBR4","DMDHREDU","DMDHRMAR","DMDHSEDU" +62161,7,2,1,22,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,1,2,1,2,2,1,2,2,1,102641.406474,104236.582554,1,91,14,14,3.15,5,5,0,1,0,2,50,1,5,1,5 +62162,7,2,2,3,NA,1,1,1,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15457.736897,16116.35401,3,92,4,4,0.6,6,6,2,2,0,2,24,1,3,6,NA +62163,7,2,1,14,NA,5,6,2,14,177,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7397.684828,7869.485117,3,90,15,15,4.07,5,5,0,2,1,1,42,1,5,1,4 +62164,7,2,2,44,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,127351.373299,127965.226204,1,94,8,8,1.67,5,5,1,2,0,1,52,1,4,1,4 +62165,7,2,2,14,NA,4,4,2,14,179,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12209.74498,13384.042162,2,90,4,4,0.57,5,5,1,2,0,2,33,2,2,77,NA +62166,7,2,1,9,NA,3,3,2,10,120,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,60593.636684,64068.123183,1,91,77,77,NA,6,6,0,4,0,1,44,1,5,1,5 +62167,7,2,1,0,11,5,6,1,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,5024.464768,5303.683185,2,92,99,77,NA,7,4,3,3,1,1,61,2,1,1,3 +62168,7,2,1,6,NA,5,7,1,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5897.024603,6245.043868,2,103,14,14,3.48,5,5,0,2,1,1,43,1,4,1,5 +62169,7,2,1,21,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,1,NA,NA,NA,1,2,2,1,14391.77847,14783.600953,1,92,2,2,0.33,5,5,0,1,0,1,51,2,1,4,NA +62170,7,2,1,15,NA,5,7,1,15,181,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7794.52699,8291.636582,3,91,15,15,5,4,4,0,2,0,1,38,2,5,1,5 +62171,7,2,1,14,NA,1,1,1,14,175,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,2,2,2,1,2,2,1,22768.423624,22886.980387,3,92,9,9,2.46,4,4,0,2,0,1,43,2,3,1,4 +62172,7,2,2,43,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,26960.774346,27122.911908,2,96,5,5,2.02,1,1,0,0,0,2,43,1,3,5,NA +62173,7,2,2,2,NA,1,1,1,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11401.934012,12203.058423,1,95,13,13,NA,5,5,3,0,0,2,33,2,1,1,2 +62174,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,24912.668432,27335.895242,3,90,10,10,4.3,2,2,0,0,2,2,80,1,4,1,5 +62175,7,2,1,5,NA,3,3,1,6,72,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,26980.605125,30440.534478,1,94,3,3,0.39,6,6,2,2,0,2,25,1,4,1,2 +62176,7,2,2,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,53830.599426,54203.155016,1,99,15,15,5,5,5,3,0,0,2,34,1,5,1,5 +62177,7,2,1,51,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,3,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,7879.750437,7851.284287,2,92,99,77,NA,7,4,3,3,1,1,61,2,1,1,3 +62178,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,12291.154515,13189.875012,1,95,1,1,0.05,1,1,0,0,1,1,80,1,3,2,NA +62179,7,2,1,55,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16590.074977,17115.36835,1,92,15,15,5,4,4,0,2,0,1,55,1,5,1,5 +62180,7,2,1,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,20457.614917,22616.494827,1,97,5,5,0.87,4,4,2,0,0,1,35,1,5,1,5 +62181,7,2,1,9,NA,1,1,1,9,118,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,NA,13822.148996,14860.201344,3,92,4,4,0.55,6,6,0,4,0,1,36,2,1,1,3 +62182,7,1,1,75,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,63069.107216,0,1,90,12,12,NA,2,2,0,0,2,1,75,1,5,1,4 +62183,7,2,1,6,NA,4,4,1,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10138.00454,10211.52145,1,100,14,14,3.6,4,4,1,1,0,1,41,1,4,1,5 +62184,7,2,1,26,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,15600.678771,15236.26157,2,95,15,15,3.85,7,7,0,3,1,2,62,1,4,2,NA +62185,7,2,1,16,NA,1,1,1,16,201,NA,NA,1,1,NA,10,NA,NA,NA,2,2,2,2,2,2,NA,NA,NA,NA,18635.323223,19040.145288,2,103,77,77,NA,5,5,0,2,0,2,45,2,4,5,NA +62186,7,2,2,17,NA,4,4,1,17,205,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11224.041366,11682.365019,1,96,5,5,0.53,7,7,2,2,0,2,38,1,9,6,NA +62187,7,2,2,9,NA,1,1,1,9,115,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,2,10118.363218,11093.371216,2,103,77,77,NA,7,7,0,4,0,1,38,2,1,6,NA +62188,7,2,1,2,NA,2,2,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11793.948458,12167.27893,1,100,5,5,0.78,6,5,1,2,0,2,40,2,1,5,NA +62189,7,2,2,30,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,17983.231494,18657.922524,1,94,10,10,3.04,4,4,2,0,0,2,30,1,4,1,5 +62190,7,2,2,15,NA,1,1,1,15,189,NA,NA,2,2,4,9,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,20419.465237,21192.774678,2,102,6,3,0.54,6,4,0,4,0,2,43,2,1,5,NA +62191,7,2,1,70,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,8661.769277,9105.621464,1,96,14,3,0.9,2,1,0,0,2,2,71,NA,NA,3,NA +62192,7,2,2,11,NA,1,1,2,11,141,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14087.469432,14372.406512,2,97,7,7,2.05,3,3,0,2,0,2,45,1,5,2,NA +62193,7,2,1,17,NA,4,4,2,17,209,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11386.695644,11908.648036,2,99,6,6,1.18,5,5,0,3,0,2,38,1,2,5,NA +62194,7,2,2,9,NA,3,3,2,9,113,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,36778.822436,37956.900598,1,99,15,15,5,5,5,0,3,0,2,43,1,5,1,5 +62195,7,2,1,35,NA,4,4,2,NA,NA,2,NA,2,1,4,NA,4,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,15842.100091,16468.990083,2,90,8,6,2.39,4,1,1,1,0,2,21,1,5,6,NA +62196,7,2,1,1,17,3,3,2,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,35830.680775,40425.523018,2,95,9,9,2.22,5,5,1,0,0,1,55,1,4,1,5 +62197,7,2,2,16,NA,4,4,1,16,197,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,20015.720245,20424.69013,2,102,15,15,5,4,4,0,2,0,1,44,1,3,1,1 +62198,7,2,1,7,NA,3,3,2,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,30754.608672,32212.772113,1,98,3,3,0.61,4,4,0,2,0,2,32,1,3,6,NA +62199,7,2,1,57,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,127000.852889,130891.431194,2,92,15,15,5,2,1,0,0,0,1,57,1,5,6,NA +62200,7,2,1,42,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19184.316833,20543.822351,1,97,15,15,4.07,5,5,0,3,0,1,42,2,5,1,5 +62201,7,1,2,58,NA,5,6,NA,NA,NA,2,NA,2,2,2,NA,2,2,NA,1,2,1,1,2,2,NA,NA,NA,NA,19442.276314,0,1,95,6,6,1.34,4,4,0,2,0,2,32,2,3,2,NA +62202,7,2,1,36,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,41155.167164,40844.556107,1,102,14,14,2.83,6,6,1,2,0,1,36,1,2,1,3 +62203,7,2,1,8,NA,4,4,2,8,99,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,11324.954668,11525.075349,1,93,12,12,NA,3,3,0,1,0,2,49,2,3,1,3 +62204,7,1,2,9,NA,5,6,NA,NA,NA,NA,NA,2,1,3,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5585.02957,0,2,103,15,15,5,4,4,0,2,0,1,48,2,5,1,5 +62205,7,2,1,28,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,103663.693967,105583.964573,1,90,15,15,5,4,4,0,1,0,2,53,1,5,1,5 +62206,7,2,2,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,67177.369911,67642.297015,3,92,14,14,2.74,6,6,2,2,0,1,35,1,5,1,4 +62207,7,2,1,0,0,4,4,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6988.613752,7730.080563,1,100,5,5,1.05,3,3,1,0,0,2,35,1,4,6,NA +62208,7,2,1,38,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,4,1,NA,2,2,2,1,2,2,1,2,2,2,41241.224595,41216.943466,2,102,7,7,1.53,5,5,1,2,0,2,37,2,4,1,4 +62209,7,2,2,62,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,1,4,NA,2,2,2,2,2,2,2,2,2,2,13473.304889,14578.166065,2,96,6,6,1.11,6,6,0,2,1,1,40,2,2,1,2 +62210,7,2,1,15,NA,3,3,2,15,188,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,100370.520459,102294.664852,1,90,15,15,5,4,4,0,2,1,2,52,1,5,1,NA +62211,7,1,2,63,NA,1,1,NA,NA,NA,2,NA,2,2,77,NA,1,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,16352.915834,0,3,92,9,9,2.22,5,5,1,0,2,1,66,2,1,1,1 +62212,7,1,2,8,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9465.598219,0,2,91,12,12,NA,3,3,0,1,0,1,56,1,4,1,4 +62213,7,2,1,2,NA,4,4,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5422.375171,5488.513785,2,99,4,4,0.94,3,3,1,0,0,1,48,2,3,6,NA +62214,7,2,2,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,4,2,1,2,2,1,2,2,1,2,2,1,18723.98095,21433.166124,2,95,7,7,1.41,5,5,2,0,0,2,53,1,3,3,NA +62215,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,129999.035519,129559.2554,1,98,13,13,NA,2,2,0,0,2,2,80,1,2,2,NA +62216,7,2,1,0,6,3,3,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19442.488468,19106.370101,2,92,15,15,5,3,3,1,0,0,2,31,2,5,1,5 +62217,7,2,2,77,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,1,2,2,1,2,2,NA,15730.58404,17568.357111,2,98,4,4,0.97,3,3,0,1,1,2,77,1,1,5,NA +62218,7,2,2,38,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,3,1,2,2,1,2,2,1,2,2,1,39534.635218,41046.564195,2,102,14,14,4.05,3,3,0,1,0,1,18,1,2,NA,NA +62219,7,2,2,2,NA,1,1,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11582.174418,11621.723611,2,102,4,4,0.44,7,7,1,3,0,1,48,1,9,1,9 +62220,7,2,2,31,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,33193.038733,33538.564898,1,96,9,9,4.92,1,1,0,0,0,2,31,1,5,5,NA +62221,7,2,2,41,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,25818.768393,25697.012112,1,100,14,14,3.6,4,4,1,1,0,1,41,1,4,1,5 +62222,7,2,1,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,89973.129052,97483.088567,1,93,15,15,5,2,2,0,0,0,2,30,1,5,1,5 +62223,7,2,1,54,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,10133.862484,12107.78442,3,90,77,77,NA,3,3,0,1,0,1,54,2,3,1,3 +62224,7,2,2,29,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,1,1,2,2,2,2,1,2,2,2,2,2,2,43986.779369,46449.619953,1,93,9,9,2.46,4,4,0,2,0,1,35,2,1,1,1 +62225,7,2,2,13,NA,1,1,1,13,160,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24481.187693,25761.611656,1,92,15,15,4.99,4,4,0,2,0,2,43,1,4,1,4 +62226,7,2,1,80,NA,2,2,1,NA,NA,2,NA,2,1,9,NA,4,1,NA,2,2,2,2,2,2,2,2,1,NA,13654.270555,13892.295449,2,93,9,9,3.64,2,2,0,0,2,2,79,2,2,1,4 +62227,7,2,1,19,NA,3,3,2,19,235,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,68701.580401,72973.564721,2,94,15,15,5,5,5,0,2,0,1,53,1,5,1,5 +62228,7,2,1,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,144353.133634,144729.952645,1,91,14,14,3.15,5,5,0,1,0,2,50,1,5,1,5 +62229,7,2,2,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,72426.980535,75980.145152,1,98,6,6,1.31,3,3,1,0,0,1,30,1,5,1,5 +62230,7,2,1,75,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,2,2,NA,12946.818038,13703.92145,1,101,5,3,1.07,2,1,0,0,2,2,70,1,4,2,NA +62231,7,2,2,48,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,43535.993088,52686.708499,2,102,15,15,3.92,5,5,0,0,0,1,19,1,4,NA,NA +62232,7,2,2,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,115926.402585,118970.086068,1,101,14,14,3.3,4,4,0,2,0,2,42,1,4,1,3 +62233,7,2,2,63,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,NA,NA,NA,NA,10999.00871,11900.968385,2,98,6,6,0.78,7,7,1,3,1,2,63,1,2,4,NA +62234,7,2,1,23,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14860.312419,14555.816778,3,90,15,15,4.89,5,5,0,0,0,2,57,2,3,1,3 +62235,7,2,2,2,NA,3,3,2,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,49862.013993,55032.245455,2,91,15,15,5,4,4,2,0,0,1,35,1,5,1,5 +62236,7,2,1,61,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,9048.959172,9513.611982,1,93,15,15,5,5,5,1,0,1,1,61,2,4,1,4 +62237,7,2,2,58,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,140431.173819,139253.659476,1,91,7,7,2.64,2,2,0,0,1,2,58,1,5,1,4 +62238,7,2,2,0,4,3,3,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20786.668002,20803.881706,1,101,8,8,1.85,5,5,3,0,0,2,31,1,2,1,2 +62239,7,2,2,22,NA,2,2,2,NA,NA,2,NA,2,2,2,NA,3,6,2,2,2,2,2,2,2,2,2,2,2,39426.061521,39254.343691,2,94,14,1,0.09,5,1,0,0,0,1,24,2,4,5,NA +62240,7,2,2,14,NA,2,2,2,15,180,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,26657.121865,28092.602686,1,97,14,14,3.25,4,4,0,2,0,1,45,1,3,6,NA +62241,7,2,1,2,NA,2,2,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9237.934626,9897.780242,2,90,14,14,3.45,4,4,1,1,0,2,34,2,5,6,NA +62242,7,2,1,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,129608.716834,132839.525748,1,100,6,6,1.98,2,2,0,0,0,1,50,1,5,4,NA +62243,7,2,2,1,16,3,3,2,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,49862.013993,51432.868823,2,91,15,15,5,4,4,2,0,0,1,34,1,5,1,5 +62244,7,2,1,1,17,1,1,1,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14505.670599,14525.271178,2,102,15,15,5,4,4,2,0,0,1,32,1,5,1,5 +62245,7,2,1,8,NA,5,6,2,8,97,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,10810.913614,11522.32071,1,97,15,15,5,4,4,1,1,0,1,44,2,5,1,5 +62246,7,2,2,0,1,1,1,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,7210.38482,7501.352858,1,100,99,99,NA,7,7,2,3,0,2,35,2,1,1,NA +62247,7,1,2,0,6,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5158.857524,0,1,97,7,7,1.74,4,4,2,0,0,1,34,1,5,1,5 +62248,7,2,1,65,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,1,2,1,1,2,1,1,2,1,NA,9048.959172,9881.360452,1,93,8,8,1.2,7,7,1,1,1,1,24,2,2,5,NA +62249,7,2,2,26,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,59001.303336,59528.815212,2,101,4,4,1.38,1,1,0,0,0,2,26,1,5,5,NA +62250,7,2,1,34,NA,1,1,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,51543.062078,52965.08671,3,92,15,15,5,3,3,1,0,0,1,34,1,5,1,5 +62251,7,2,2,51,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,16672.82247,17074.238333,1,92,12,12,NA,4,4,0,0,0,1,59,2,3,1,4 +62252,7,2,2,11,NA,1,1,1,11,143,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17053.854294,17379.519997,3,92,15,8,2.62,4,3,1,1,0,1,30,1,2,6,NA +62253,7,2,1,18,NA,3,3,2,19,228,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,68701.580401,72973.564721,2,94,14,14,5,3,3,0,0,0,1,42,1,5,1,5 +62254,7,2,1,14,NA,4,4,2,14,179,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15381.581315,15476.088016,1,93,12,12,NA,5,4,0,2,0,1,32,1,2,5,NA +62255,7,2,1,65,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,29731.886612,29889.869541,1,95,3,3,0.87,2,2,0,0,2,2,65,1,2,1,3 +62256,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,7911.357817,9487.389486,2,94,5,5,1.63,2,2,0,0,2,2,79,1,3,1,1 +62257,7,2,2,2,NA,1,1,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,NA,NA,NA,NA,NA,NA,NA,9469.751474,10135.116584,1,102,NA,NA,NA,5,5,1,2,0,1,39,NA,NA,1,NA +62258,7,2,1,47,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,43470.92932,42833.060444,1,92,6,6,0.93,5,5,0,2,0,1,47,2,1,1,1 +62259,7,2,1,61,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,7736.56115,7645.782314,1,99,4,2,0.87,2,1,0,0,1,2,59,1,3,3,NA +62260,7,2,1,10,NA,3,3,2,10,125,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,57305.501166,60591.443404,1,94,15,15,5,5,5,0,3,0,1,46,1,3,1,5 +62261,7,2,1,47,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,2,2,2,2,33029.272844,33366.323953,2,93,14,14,3.25,4,4,0,2,0,2,46,2,5,1,4 +62262,7,2,2,0,10,2,2,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,7367.430495,7616.752822,2,91,5,5,0.74,5,5,1,1,0,1,35,2,1,1,2 +62263,7,2,1,2,NA,3,3,2,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,28617.223132,33531.791823,1,101,3,3,0.61,4,4,1,2,0,1,38,1,2,4,NA +62264,7,2,1,77,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,52448.388619,55291.886181,1,99,7,7,2.72,2,2,0,0,2,1,77,1,2,1,3 +62265,7,2,1,52,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19478.845078,19591.5023,2,95,7,7,1.13,6,6,0,3,1,1,52,1,4,1,4 +62266,7,2,1,64,NA,3,3,2,NA,NA,2,NA,2,1,6,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,29999.543427,30837.648113,2,91,12,5,1.79,3,1,0,0,1,1,52,1,4,3,NA +62267,7,2,2,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,34430.806911,35615.742482,2,95,6,6,1.08,4,4,1,1,0,1,39,1,4,1,4 +62268,7,2,2,15,NA,1,1,1,16,192,NA,NA,1,1,NA,9,NA,NA,NA,2,2,2,1,2,2,1,2,2,1,16734.618372,17498.916137,2,96,5,5,0.78,5,5,0,2,0,1,37,2,1,5,NA +62269,7,2,1,29,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,14718.123234,15404.517346,2,101,1,1,0.1,6,6,1,2,1,2,27,1,2,1,2 +62270,7,2,2,33,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,5,5,2,2,2,2,1,2,2,2,2,2,2,31765.061314,30907.973228,2,93,4,4,0.56,5,5,0,0,0,2,49,2,2,5,NA +62271,7,2,1,7,NA,4,4,2,7,87,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6971.452972,8307.996458,2,99,7,7,1.63,4,4,0,2,0,1,53,1,3,3,NA +62272,7,2,1,9,NA,2,2,1,9,111,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,11102.340512,11169.626274,2,93,6,6,0.64,7,7,2,1,3,2,60,2,3,2,NA +62273,7,2,2,15,NA,5,6,1,15,184,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7588.544207,7881.983727,3,91,14,14,4.03,4,4,0,2,0,1,51,2,4,1,5 +62274,7,2,1,2,NA,4,4,1,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5103.735747,5455.59,2,95,3,3,0.38,5,5,2,2,0,2,37,1,4,3,NA +62275,7,2,2,41,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,14442.406316,14518.77693,2,99,15,15,5,1,1,0,0,0,2,41,2,5,5,NA +62276,7,2,1,9,NA,3,3,2,9,114,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,63059.776443,67841.511058,1,99,15,15,5,4,4,1,1,0,2,42,1,5,1,5 +62277,7,2,2,55,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,139343.551779,140618.30879,2,98,9,9,5,1,1,0,0,0,2,55,1,4,3,NA +62278,7,2,2,72,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,62212.598767,64340.261278,1,94,7,7,2.51,2,2,0,0,2,2,72,1,4,1,1 +62279,7,2,1,80,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,9456.784526,9941.375417,2,99,13,13,NA,3,3,0,0,1,1,80,1,2,2,NA +62280,7,2,2,54,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,16966.723528,16502.38463,2,95,6,6,1.7,2,2,0,0,0,2,54,1,4,2,NA +62281,7,2,2,13,NA,4,4,2,13,162,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11372.489138,11738.962427,1,99,6,6,0.96,5,5,1,2,0,2,35,1,4,1,2 +62282,7,2,2,57,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,21219.116826,20638.40003,1,102,6,6,1.7,2,2,0,0,1,2,80,NA,NA,2,NA +62283,7,2,1,2,NA,1,1,1,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14457.854197,15490.547336,1,92,10,10,2.93,4,4,1,0,0,2,55,1,4,1,4 +62284,7,2,2,64,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,117778.281347,120111.495123,1,94,8,8,2.41,3,3,0,0,3,1,63,1,4,1,5 +62285,7,2,2,17,NA,3,3,2,17,206,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,103007.696238,104941.393061,1,101,6,6,1.31,3,3,0,1,0,1,51,1,4,1,4 +62286,7,2,1,30,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,12556.207754,12615.022145,2,99,NA,77,NA,7,7,1,0,1,2,51,1,2,1,3 +62287,7,2,2,73,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,12183.823561,12607.778632,2,100,6,6,2.11,2,2,0,0,2,1,79,1,3,1,4 +62288,7,2,1,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,22634.531479,23281.741513,1,103,9,6,2.24,3,1,0,0,0,1,27,1,5,5,NA +62289,7,2,1,71,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,NA,13555.672819,15969.746949,1,94,3,3,1.07,1,1,0,0,1,1,71,1,2,3,NA +62290,7,2,2,2,NA,4,4,2,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7618.827213,7957.141798,2,101,1,1,0,3,3,2,0,0,1,22,1,3,5,NA +62291,7,2,1,56,NA,5,7,1,NA,NA,1,1,2,1,8,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,20111.196953,20038.543847,1,92,14,14,5,2,2,0,0,0,1,56,2,4,1,4 +62292,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,36829.543424,39682.305625,1,93,15,15,5,4,3,0,0,3,1,80,1,5,2,NA +62293,7,2,1,67,NA,3,3,2,NA,NA,1,1,2,1,9,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,28478.57859,28745.66908,1,101,3,3,1.16,1,1,0,0,1,1,67,2,4,3,NA +62294,7,2,1,9,NA,2,2,2,9,117,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,9390.522479,10327.334743,2,90,3,3,0.54,4,4,1,2,0,2,33,2,1,4,NA +62295,7,2,2,69,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,9716.805546,12994.252166,2,90,2,2,0.79,1,1,0,0,1,2,69,2,2,2,NA +62296,7,2,1,19,NA,4,4,2,19,232,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13251.602554,13277.802057,1,96,15,15,5,2,2,0,0,0,2,51,1,5,5,NA +62297,7,2,1,43,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,18402.969014,18977.095614,2,95,10,10,3.67,3,3,0,1,0,1,43,1,4,1,4 +62298,7,2,1,15,NA,3,3,2,16,192,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,93665.036597,95017.313859,1,91,14,14,3.8,4,4,0,2,0,1,50,NA,NA,1,5 +62299,7,2,1,8,NA,5,6,2,9,108,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6631.058951,7488.793181,2,92,12,12,NA,7,7,2,4,0,1,54,2,2,1,5 +62300,7,2,2,6,NA,4,4,2,6,83,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6929.51414,7399.559927,2,99,14,14,4.09,3,3,0,2,0,2,37,1,5,5,NA +62301,7,2,1,23,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,16995.648055,16907.09145,1,96,12,12,NA,7,7,1,0,1,2,59,1,3,1,1 +62302,7,2,2,51,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,1,5,NA,2,2,2,1,2,2,2,2,2,2,19676.781212,20033.616894,2,103,77,77,NA,4,4,0,0,0,2,46,2,1,4,NA +62303,7,2,2,76,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,61501.13451,65355.920739,2,101,6,6,1.62,3,3,0,0,2,1,80,1,3,1,3 +62304,7,2,2,6,NA,1,1,2,6,77,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9872.244853,10573.701398,2,90,6,6,0.96,5,5,1,1,0,1,39,2,2,1,NA +62305,7,2,1,10,NA,1,1,2,10,131,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13285.093011,14158.005149,2,94,9,9,2.37,5,5,0,1,0,1,48,2,4,1,2 +62306,7,2,1,0,9,2,2,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4884.343512,4884.433539,3,90,12,12,NA,3,3,1,0,0,1,40,2,5,1,4 +62307,7,2,1,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,7828.117773,8460.915547,2,90,4,4,1.2,2,2,0,0,2,2,80,1,3,2,NA +62308,7,2,2,78,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,83112.549416,87237.362828,1,97,7,7,2.64,2,2,0,0,2,1,79,1,4,1,3 +62309,7,2,2,47,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,2,2,NA,2,2,2,2,2,2,2,2,2,2,40337.933888,40548.292782,3,92,4,4,0.46,7,7,1,2,0,2,31,2,2,1,1 +62310,7,2,2,8,NA,4,4,2,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7632.404654,7765.546414,2,101,1,1,0.1,6,6,1,2,1,2,27,1,2,1,2 +62311,7,2,1,18,NA,3,3,2,19,228,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,68148.957861,67253.324127,1,93,15,15,3.92,5,5,0,1,0,2,54,1,5,1,5 +62312,7,2,2,63,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,1,1,2,2,1,2,2,2,16352.915834,17178.789759,3,92,8,8,1.85,5,5,1,0,2,1,66,2,1,1,1 +62313,7,2,1,7,NA,1,1,1,7,93,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12577.115885,12876.06354,2,96,5,5,0.76,5,5,1,2,0,1,44,2,1,1,3 +62314,7,2,1,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,7611.107768,7639.752345,2,97,3,3,1.33,1,1,0,0,1,1,61,1,3,5,NA +62315,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,12842.559946,13803.13967,2,95,99,99,NA,2,2,0,1,1,2,80,1,2,2,NA +62316,7,2,1,39,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,31674.692905,34832.561132,2,100,4,4,0.81,4,4,0,2,0,2,37,1,2,1,2 +62317,7,2,1,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18872.772727,18824.29246,1,94,7,7,0.94,7,7,1,4,0,2,46,2,5,1,5 +62318,7,2,2,0,10,3,3,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8114.787453,8336.925581,1,91,2,2,0.27,5,5,2,2,0,2,42,1,4,3,NA +62319,7,2,2,4,NA,4,4,1,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10437.988787,11231.392369,2,100,1,1,0.08,5,5,1,2,0,2,19,1,3,NA,NA +62320,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,122483.259869,124909.680309,3,91,7,7,2.89,2,2,0,0,2,2,64,1,4,1,4 +62321,7,2,2,8,NA,5,6,1,8,100,NA,NA,2,1,3,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6412.057856,6847.117735,3,91,14,14,4.03,4,4,0,2,0,1,50,1,5,1,5 +62322,7,2,2,4,NA,4,4,1,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10295.166918,11223.20055,2,96,4,4,0.4,7,7,3,2,0,2,25,1,2,5,NA +62323,7,2,2,2,NA,4,4,2,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7618.827213,7957.141798,2,101,1,1,0,3,3,2,0,0,1,22,1,3,5,NA +62324,7,2,2,60,NA,1,1,2,NA,NA,2,NA,2,2,77,NA,4,4,NA,2,2,2,1,2,2,1,2,2,2,11469.456138,12167.81965,1,90,3,3,0.23,7,7,3,1,1,2,35,2,2,5,NA +62325,7,2,2,43,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,30932.175051,32888.281888,2,101,1,1,0.1,2,2,0,0,0,1,56,1,3,6,NA +62326,7,2,1,69,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,153565.050575,157855.235487,1,97,10,10,4.3,2,2,0,0,1,2,56,1,5,1,5 +62327,7,2,2,19,NA,4,4,2,19,238,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12209.74498,12386.615954,2,90,6,6,1.12,4,4,0,1,1,1,63,2,1,1,1 +62328,7,2,2,9,NA,4,4,1,9,119,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7899.813226,8111.155436,2,97,15,15,5,4,4,0,2,0,1,47,NA,NA,6,NA +62329,7,2,1,2,NA,2,2,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11793.948458,12167.27893,1,100,3,3,0.39,5,5,1,2,0,1,32,2,1,6,NA +62330,7,2,2,33,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,21097.664283,22267.583876,2,96,14,14,5,2,2,0,0,0,2,33,2,5,1,5 +62331,7,2,1,9,NA,5,7,2,9,111,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8246.426933,8701.859504,1,99,14,14,3.94,4,4,1,1,0,1,43,1,4,1,5 +62332,7,2,1,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,31962.323978,33458.586112,1,100,4,4,1.06,3,2,0,0,0,1,22,1,4,6,NA +62333,7,2,2,45,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,13046.228603,13115.21631,2,92,12,77,NA,7,2,0,0,2,1,53,2,3,1,3 +62334,7,2,1,9,NA,3,3,1,9,108,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19590.665143,20475.022602,3,91,5,5,0.87,4,4,0,2,0,2,38,1,2,3,NA +62335,7,2,1,14,NA,2,2,2,14,171,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13921.972975,14228.387356,3,90,14,14,3.69,4,4,0,2,0,2,49,1,4,1,4 +62336,7,2,1,55,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,4,6,NA,2,2,2,2,2,2,2,2,2,2,24211.824535,24947.165851,2,93,10,10,3.67,3,3,0,0,0,2,56,2,4,6,NA +62337,7,2,2,10,NA,1,1,2,10,123,NA,NA,2,2,3,5,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,15166.167659,15537.272247,2,94,7,7,1.04,7,7,0,3,0,1,37,2,1,1,3 +62338,7,1,2,2,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12260.86913,0,3,92,5,5,0.68,6,6,3,0,0,2,19,1,4,NA,NA +62339,7,2,1,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,71402.235366,74200.659205,2,100,15,15,5,4,4,1,1,0,1,29,1,4,1,4 +62340,7,2,1,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,126789.52929,130826.463813,1,101,6,6,1.28,4,4,2,0,0,1,44,1,4,1,4 +62341,7,2,1,8,NA,1,1,2,8,104,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13285.093011,13365.60735,2,94,7,7,2.16,3,3,0,1,0,2,28,1,2,1,1 +62342,7,2,2,38,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,77778.949308,78565.115731,1,101,8,8,1.85,5,5,0,3,0,1,41,1,3,1,4 +62343,7,2,1,28,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17420.978407,17615.311737,2,97,5,5,0.76,5,5,0,0,0,2,50,1,4,5,NA +62344,7,1,2,32,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,5,5,3,1,2,2,NA,NA,NA,NA,NA,NA,NA,26426.249254,0,1,99,NA,NA,NA,1,1,0,0,0,2,32,1,5,5,NA +62345,7,2,2,38,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,64324.554311,66806.652381,2,103,15,15,5,4,4,2,0,0,1,36,2,4,1,5 +62346,7,2,1,19,NA,5,7,1,19,239,2,NA,2,2,4,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6558.308393,6849.983523,2,92,99,1,0.22,4,1,0,0,0,1,19,1,4,NA,NA +62347,7,2,2,71,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,25812.913537,27430.822937,2,95,3,3,1.21,1,1,0,0,1,2,71,1,3,2,NA +62348,7,2,2,19,NA,4,4,2,19,236,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11419.859653,11886.179371,2,99,7,7,1.19,6,6,1,3,0,2,38,1,3,5,NA +62349,7,2,2,23,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,122473.120523,125156.324631,3,91,8,5,1.5,3,2,0,0,0,1,23,1,4,1,4 +62350,7,2,2,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,2,1,2,2,1,2,2,1,2,2,1,96255.674553,99423.043377,2,94,7,7,2.72,2,2,0,1,0,2,43,1,3,3,NA +62351,7,2,1,7,NA,3,3,2,7,89,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25604.034863,26336.174292,1,95,5,5,0.92,5,5,1,2,0,2,30,1,4,1,4 +62352,7,2,2,3,NA,4,4,1,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10437.988787,10601.396934,2,100,8,8,1.1,7,7,3,3,0,2,58,1,3,5,NA +62353,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,39587.338944,43437.954199,2,91,15,15,5,2,2,0,0,2,2,79,1,5,1,5 +62354,7,2,2,11,NA,3,3,2,11,140,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,80369.555824,79030.006233,1,97,8,8,2.72,3,3,0,2,0,2,43,1,1,3,NA +62355,7,2,2,16,NA,5,6,1,16,197,NA,NA,2,1,3,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,7176.749123,7692.153155,3,91,7,7,1.79,4,4,0,1,0,2,45,2,2,1,3 +62356,7,2,2,14,NA,1,1,1,14,179,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,26325.414456,27702.295836,3,92,12,12,NA,6,6,1,3,0,2,33,1,5,1,4 +62357,7,2,1,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,94433.586146,101591.315487,1,91,15,15,5,3,3,1,0,0,1,36,1,4,1,5 +62358,7,2,2,0,3,1,1,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9767.083234,9561.595244,3,92,7,7,1.99,3,3,1,0,0,1,40,1,4,1,4 +62359,7,2,2,54,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,22224.73066,23825.197607,2,94,7,7,1.04,7,7,0,3,0,1,37,2,1,1,3 +62360,7,2,1,17,NA,4,4,1,17,210,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11429.628358,11541.555232,2,96,7,7,1.04,7,7,0,4,0,2,37,1,3,3,NA +62361,7,2,1,7,NA,4,4,1,7,94,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13423.881856,14179.490667,2,101,1,1,0.21,4,4,1,2,0,2,26,1,3,5,NA +62362,7,2,2,11,NA,4,4,2,11,137,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8579.490652,8919.477637,2,97,6,6,1,6,6,1,2,2,2,60,1,2,2,NA +62363,7,2,2,44,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,3,2,1,2,2,1,2,2,1,2,2,1,134897.594057,143557.343892,2,102,6,6,1.43,4,4,0,1,1,1,67,NA,NA,1,NA +62364,7,2,2,4,NA,1,1,1,5,60,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14899.363418,16449.297045,1,100,8,8,2.17,4,4,1,1,0,2,40,2,2,1,2 +62365,7,2,2,37,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,21097.664283,21336.137518,2,96,8,8,3.4,2,2,0,0,0,1,46,2,4,1,4 +62366,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,37318.801462,40047.525745,1,98,6,6,1.65,2,2,0,0,2,1,80,1,3,1,3 +62367,7,2,1,50,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,2,1,NA,1,2,1,1,2,1,1,2,1,3,10133.862484,10097.253197,3,90,15,15,3.23,6,6,0,2,0,1,50,2,2,1,2 +62368,7,2,2,80,NA,2,2,1,NA,NA,2,NA,2,1,9,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,23176.790531,24820.797478,1,98,3,3,0.81,2,2,0,0,1,2,80,2,1,2,NA +62369,7,2,2,8,NA,2,2,1,8,100,NA,NA,2,2,3,2,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,15352.601806,15663.127806,2,102,7,7,1.53,5,5,1,2,0,2,37,2,4,1,4 +62370,7,2,1,21,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,13843.66558,14262.333267,1,90,15,15,3.7,5,5,0,0,0,1,54,NA,NA,1,NA +62371,7,2,1,32,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,22403.911395,23863.238542,2,96,14,14,5,2,2,0,0,0,2,33,2,5,1,5 +62372,7,2,2,55,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,1,NA,1,2,1,1,2,2,1,2,2,NA,17991.883465,18643.986017,2,102,8,8,2.01,4,4,0,0,0,1,59,2,4,1,4 +62373,7,2,1,46,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,2,1,2,2,1,2,2,NA,37324.655911,36776.974122,1,102,6,6,1.34,4,4,0,1,0,2,48,2,3,1,1 +62374,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,NA,NA,NA,NA,31335.13799,33530.36206,1,101,4,4,0.78,4,4,1,2,0,2,31,1,4,3,NA +62375,7,2,1,24,NA,1,1,1,NA,NA,2,NA,2,2,99,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,52698.05363,59566.360508,3,92,4,4,0.65,4,4,2,0,0,2,20,1,3,5,NA +62376,7,2,1,6,NA,4,4,2,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7730.47951,9212.541007,1,99,5,5,0.84,5,5,2,1,0,1,35,1,3,1,2 +62377,7,2,2,78,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,35965.834545,38220.111719,1,95,3,3,1.24,1,1,0,0,1,2,78,1,2,2,NA +62378,7,2,2,68,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,1,3,NA,2,2,2,1,2,2,2,2,2,2,10979.149658,11879.480817,1,96,15,8,4.66,2,1,0,0,2,2,68,2,1,3,NA +62379,7,2,1,40,NA,5,7,1,NA,NA,2,NA,2,1,5,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,24837.95225,25237.159887,1,102,10,10,3.22,4,4,0,0,2,2,29,2,5,5,NA +62380,7,2,1,16,NA,1,1,1,17,204,NA,NA,1,1,NA,10,NA,NA,NA,2,2,2,1,2,2,1,2,2,1,15506.325263,15592.022118,1,103,8,8,2,5,5,0,1,0,2,45,2,3,1,2 +62381,7,2,1,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,1,2,1,2,2,1,2,2,NA,15408.94893,15514.368855,1,99,7,2,0.74,3,1,0,0,2,1,70,1,2,1,4 +62382,7,2,2,1,21,4,4,2,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5743.559235,6261.298887,2,99,2,2,0.2,7,7,1,2,1,1,63,1,1,2,NA +62383,7,2,1,23,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,91704.59836,93129.802737,1,93,15,4,1.38,6,1,0,0,0,1,23,1,5,5,NA +62384,7,2,1,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,27444.308565,27682.940425,1,101,7,7,2.31,2,2,0,0,1,2,69,1,4,2,NA +62385,7,2,1,0,10,4,4,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6699.703488,6998.255943,2,96,6,6,1.32,5,5,1,3,0,2,30,1,4,3,NA +62386,7,2,1,35,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,21163.049914,22845.160537,1,97,8,8,2.51,3,3,0,1,0,1,35,1,3,1,4 +62387,7,2,1,23,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,17420.978407,17597.975048,2,97,3,3,1.29,1,1,0,0,0,1,23,1,3,5,NA +62388,7,2,2,69,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,49644.076348,54110.530387,1,97,3,3,1.12,1,1,0,0,1,2,69,1,3,2,NA +62389,7,2,2,16,NA,5,7,2,16,194,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11975.458482,12291.118947,1,97,15,15,5,6,6,0,3,0,1,47,1,5,1,5 +62390,7,2,2,25,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,14095.963218,14697.88566,2,92,15,8,4.59,2,1,0,0,0,2,25,2,5,5,NA +62391,7,2,2,40,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,34954.173075,36222.975959,2,98,15,15,5,3,3,0,1,0,1,38,1,4,1,3 +62392,7,2,2,1,12,1,1,1,NA,13,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,1,1,2,1,NA,NA,NA,NA,12260.86913,12783.275371,3,92,6,6,0.96,5,5,2,1,0,2,26,2,1,1,1 +62393,7,2,1,20,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,39915.513053,48709.670361,2,98,7,7,1.53,5,5,0,0,0,2,48,1,3,5,NA +62394,7,2,1,1,14,1,1,1,NA,14,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12493.910388,13386.323284,2,98,1,1,0.13,4,4,2,0,0,2,52,1,2,4,NA +62395,7,2,2,1,16,1,1,1,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,12871.484115,13281.030392,2,102,8,8,2.24,4,4,1,1,0,1,35,2,3,1,1 +62396,7,2,1,10,NA,1,1,1,10,123,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,10658.399025,10827.062436,1,102,5,5,0.62,7,7,1,3,0,1,49,2,2,1,1 +62397,7,2,2,70,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,1,2,NA,2,2,2,1,2,2,2,2,2,NA,17077.396628,18362.705174,2,96,13,13,NA,6,6,0,2,1,1,43,2,4,1,3 +62398,7,2,1,9,NA,3,3,2,9,117,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19926.440922,21167.339982,1,94,7,7,0.94,7,7,1,4,0,2,46,2,5,1,5 +62399,7,2,2,53,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,24870.513993,25871.320138,2,98,5,5,1.63,2,2,0,0,0,2,53,2,1,1,1 +62400,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,27463.558155,27680.636989,2,100,4,4,1.22,2,2,0,1,0,2,31,1,4,5,NA +62401,7,2,2,15,NA,3,3,1,15,187,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,93740.540203,99321.165816,1,100,8,8,1.95,4,4,0,2,1,2,49,1,5,6,NA +62402,7,2,1,48,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,1,4,NA,1,2,2,1,2,2,1,2,2,NA,14259.601244,14208.087437,1,93,6,6,1.15,5,5,1,0,2,2,70,NA,NA,1,NA +62403,7,2,2,3,NA,4,4,2,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8192.75936,8404.22838,2,99,77,77,NA,3,3,1,0,0,1,38,2,5,1,5 +62404,7,2,1,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,22009.438408,22692.209281,2,99,6,6,2.95,1,1,0,0,0,1,44,1,3,5,NA +62405,7,2,1,5,NA,2,2,1,5,63,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14716.463544,14890.443704,2,96,2,2,0.27,6,6,1,3,0,1,34,NA,NA,1,NA +62406,7,2,2,10,NA,2,2,2,10,129,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19936.606751,20692.800636,1,94,7,7,1.74,4,4,0,2,0,1,44,1,5,1,5 +62407,7,2,1,49,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,28680.660607,29290.218786,1,94,3,3,1.1,1,1,0,0,0,1,49,1,4,3,NA +62408,7,1,1,58,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,174520.785302,0,1,95,7,7,2.86,2,2,0,0,1,1,58,1,4,1,3 +62409,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,22419.63376,27869.657009,1,94,3,3,1.16,1,1,0,0,1,2,80,1,3,2,NA +62410,7,2,1,5,NA,4,4,2,5,63,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8158.702829,8662.344233,2,99,5,5,0.65,6,6,2,1,0,2,53,1,4,3,NA +62411,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,15790.702799,17527.166432,2,90,7,7,3.21,1,1,0,0,0,2,51,1,4,5,NA +62412,7,2,1,16,NA,1,1,2,16,199,NA,NA,2,2,4,10,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,21718.29328,22050.264992,2,94,6,6,1.5,4,4,0,2,0,1,44,2,2,1,2 +62413,7,2,1,65,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,131445.986898,130104.237054,1,101,5,5,1.6,2,2,0,0,1,1,65,1,4,6,NA +62414,7,2,1,58,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,1,2,1,2,2,1,1,2,NA,17206.320427,17681.811932,1,96,2,2,0.4,3,3,0,0,0,2,56,1,3,3,NA +62415,7,2,2,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,1,1,2,2,1,2,2,1,2,2,1,129336.409693,136474.939567,1,95,15,15,5,3,3,1,0,0,1,26,1,3,1,4 +62416,7,2,2,19,NA,2,2,2,19,235,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13824.001771,14551.102227,2,90,10,10,2.91,4,4,0,1,0,2,51,2,4,1,1 +62417,7,2,2,26,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,45207.136555,46085.238422,1,94,14,8,4.66,2,1,0,0,0,2,26,1,4,5,NA +62418,7,2,1,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,46965.818538,52174.805072,1,101,6,6,1.98,2,2,0,0,1,1,80,1,1,2,NA +62419,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,42993.150248,46481.579201,1,95,5,5,0.65,6,6,1,0,2,1,80,1,3,1,4 +62420,7,1,1,24,NA,1,1,NA,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,59682.963348,0,2,102,77,77,NA,4,4,0,1,0,1,47,1,2,1,3 +62421,7,2,1,23,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,14313.345971,15171.949804,3,91,6,1,0,2,1,0,0,0,1,24,1,5,5,NA +62422,7,2,2,72,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,63504.762752,64182.95787,1,91,15,15,5,1,1,0,0,1,2,72,1,4,2,NA +62423,7,1,2,8,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13192.206605,0,2,102,14,14,3.8,4,4,0,2,0,1,47,1,4,1,4 +62424,7,2,1,20,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,2,5,NA,2,2,2,2,2,2,2,2,2,2,41713.173502,43905.698512,1,90,99,3,0.9,3,1,0,0,0,1,41,NA,NA,99,NA +62425,7,2,2,30,NA,3,3,2,NA,NA,2,NA,2,2,1,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,81658.419251,86275.41601,2,99,15,15,5,2,2,0,0,0,1,30,NA,NA,1,5 +62426,7,2,1,9,NA,1,1,1,10,120,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8828.580268,8822.70874,2,103,10,10,1.63,7,7,1,4,0,1,31,NA,NA,1,4 +62427,7,2,1,15,NA,4,4,2,15,185,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11462.850569,11753.30805,2,100,4,4,0.69,5,5,0,3,0,1,38,1,3,6,NA +62428,7,2,2,12,NA,4,4,1,12,145,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16598.888685,16325.283055,2,102,15,15,4.2,5,5,1,2,0,2,29,NA,NA,1,NA +62429,7,2,1,47,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,25964.952645,26121.35248,2,96,14,14,5,2,2,0,1,0,1,47,1,5,5,NA +62430,7,2,1,27,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,49006.291777,50482.76018,1,92,9,9,2.93,3,3,0,1,0,2,30,1,5,1,5 +62431,7,2,1,54,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,25969.864445,29542.265764,1,97,2,2,0.81,1,1,0,0,0,1,54,1,2,5,NA +62432,7,2,2,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,75294.690128,81171.633513,1,90,15,15,5,4,4,0,2,0,1,37,1,5,1,5 +62433,7,2,1,10,NA,4,4,2,10,131,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,8579.422451,10224.240431,1,99,NA,NA,NA,4,4,1,1,0,1,42,NA,NA,1,NA +62434,7,2,2,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,9113.905743,10210.592308,3,90,14,14,5,2,2,0,0,2,1,60,NA,NA,1,3 +62435,7,2,2,0,7,3,3,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16547.193167,16560.896108,1,97,12,12,NA,5,5,3,0,0,2,33,1,5,1,5 +62436,7,2,1,4,NA,1,1,2,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11776.305841,11467.607256,1,90,4,4,0.47,7,7,1,1,0,2,50,2,1,1,1 +62437,7,2,1,39,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,2,6,NA,2,2,2,1,2,2,1,2,2,2,33948.23667,35734.088319,2,97,4,4,0.67,4,4,0,2,0,1,39,2,2,6,NA +62438,7,2,1,0,0,4,4,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6339.587912,6622.092882,1,91,7,7,1.49,5,5,3,0,0,2,38,2,4,1,4 +62439,7,2,1,75,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,60942.568495,64726.722108,1,94,15,15,5,2,2,0,0,2,1,75,1,4,1,3 +62440,7,2,1,22,NA,1,1,2,NA,NA,2,NA,2,1,4,NA,4,1,NA,1,2,2,2,2,2,1,2,2,2,35669.2076,36294.041819,2,94,7,7,1.33,6,6,0,1,0,1,55,2,2,1,1 +62441,7,2,1,68,NA,2,2,2,NA,NA,2,NA,2,2,5,NA,1,1,NA,2,2,2,1,2,2,1,2,2,2,8609.250304,11228.904188,2,90,2,2,0.31,4,4,0,0,2,1,68,2,1,1,NA +62442,7,2,1,0,8,4,4,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6981.458608,7088.61417,2,92,9,9,2.71,4,4,1,0,0,1,43,1,2,1,4 +62443,7,2,2,4,NA,4,4,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8464.796876,9108.215863,2,93,8,8,1.67,5,5,1,1,0,2,31,1,4,5,NA +62444,7,2,2,23,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,21503.272394,20634.528185,2,100,1,1,0,2,2,0,0,0,2,55,1,5,3,NA +62445,7,2,2,42,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,4,2,1,2,2,1,2,2,1,2,2,1,19866.025076,19772.340772,2,95,6,6,1.08,4,4,1,0,0,2,42,1,4,4,NA +62446,7,2,2,4,NA,2,2,1,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15457.736897,17065.756351,2,98,1,1,0,3,1,2,0,0,2,27,1,3,6,NA +62447,7,2,2,19,NA,1,1,1,19,237,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17490.464019,17826.708822,1,102,6,6,1.43,3,3,0,1,0,2,39,1,4,3,NA +62448,7,2,1,13,NA,1,1,1,13,162,NA,NA,2,2,4,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,18583.729819,18727.0393,2,96,6,6,1.11,5,5,0,3,0,2,32,2,3,1,2 +62449,7,2,2,38,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,32097.38481,34606.180038,2,100,14,14,3.36,4,4,1,1,0,1,45,2,5,1,2 +62450,7,2,1,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19282.792088,20545.343351,2,97,7,7,1.92,3,3,0,1,0,1,57,1,4,1,4 +62451,7,2,2,0,4,3,3,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,NA,2,1,2,2,NA,NA,NA,NA,9142.358181,9688.944551,2,98,1,1,0.23,2,2,1,0,0,2,20,1,3,6,NA +62452,7,2,1,39,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,3,5,NA,2,2,2,2,2,2,2,2,2,2,34564.442715,35621.093093,2,90,4,4,0.81,3,3,0,0,0,1,39,2,3,5,NA +62453,7,2,2,2,NA,4,4,1,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7329.363701,7518.547014,1,103,6,6,1.57,3,3,1,0,0,2,25,1,4,5,NA +62454,7,2,2,64,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,4,1,NA,2,2,2,1,2,2,2,2,2,2,7278.790659,7582.574348,2,93,5,5,1.32,2,2,0,0,1,1,49,2,2,1,4 +62455,7,2,1,18,NA,4,4,2,19,228,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,7946.436474,8100.311671,3,90,15,15,5,5,5,1,0,1,1,38,2,3,1,4 +62456,7,2,1,17,NA,4,4,1,17,214,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13731.625553,14242.275028,1,100,14,14,3.93,3,3,0,1,0,2,47,1,5,4,NA +62457,7,2,1,69,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,7736.56115,7645.782314,1,99,15,15,5,2,2,0,0,2,2,63,2,5,1,5 +62458,7,2,1,13,NA,5,6,2,13,162,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6666.045669,7317.485505,3,90,77,77,NA,3,3,0,1,0,1,54,2,3,1,3 +62459,7,2,1,4,NA,3,3,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,76935.850725,86801.923297,1,95,12,12,NA,6,6,2,0,0,2,42,1,2,1,5 +62460,7,2,1,41,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105141.812429,109679.354704,1,98,15,15,4.34,4,4,1,1,0,1,41,1,5,1,5 +62461,7,2,2,29,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,19362.260448,24981.28051,1,96,6,6,1.98,2,2,0,1,0,2,29,1,3,5,NA +62462,7,2,2,11,NA,3,3,1,11,137,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,79732.314338,79120.693768,1,100,15,15,5,4,4,0,2,0,1,46,1,5,1,5 +62463,7,2,1,7,NA,1,1,1,7,92,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,19774.151841,21259.203461,2,94,5,5,0.65,6,6,0,3,0,1,44,2,1,1,1 +62464,7,2,1,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,22824.336433,1,95,10,7,3.21,5,1,1,2,0,1,32,1,3,6,NA +62465,7,2,2,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,20084.755052,20430.629009,1,99,14,14,4.21,4,4,0,2,0,2,44,1,5,1,5 +62466,7,2,1,4,NA,4,4,2,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7629.74403,8207.121237,1,99,7,7,1.06,7,7,3,1,0,1,38,1,4,6,NA +62467,7,2,1,65,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,117075.881463,118577.582598,1,94,5,5,2.15,1,1,0,0,1,1,65,1,5,1,NA +62468,7,2,2,0,8,2,2,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6787.112205,6644.319314,2,94,5,5,0.89,4,4,2,0,0,2,35,2,4,1,2 +62469,7,2,1,9,NA,4,4,1,9,110,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13423.881856,13521.226684,2,101,4,4,0.76,4,4,1,1,0,1,28,1,2,1,4 +62470,7,2,1,4,NA,1,1,1,4,48,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12870.245769,12532.871153,1,102,6,6,0.8,7,7,3,3,0,2,34,2,3,1,1 +62471,7,2,2,76,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,45482.078305,46877.423634,2,96,99,99,NA,4,4,0,1,1,2,51,1,2,1,4 +62472,7,2,1,31,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,27988.858556,28110.553782,1,97,6,6,2.69,2,1,0,0,0,1,31,1,5,6,NA +62473,7,2,2,17,NA,4,4,1,17,209,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,6,6,1.9,2,2,0,1,0,2,42,1,5,5,NA +62474,7,2,2,71,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,31347.021791,31681.790338,1,98,6,3,1.13,2,1,0,0,2,1,69,1,4,6,NA +62475,7,2,1,22,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,36750.682715,37321.834378,2,97,6,1,0.05,2,1,0,0,0,1,22,1,3,6,NA +62476,7,2,1,40,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,28680.660607,28606.985896,3,91,6,3,1.29,2,1,0,0,0,1,40,1,3,5,NA +62477,7,2,1,11,NA,3,3,1,11,139,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21839.599095,22875.076583,1,98,7,7,1.03,7,7,0,4,0,2,20,1,3,5,NA +62478,7,2,1,5,NA,4,4,1,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8275.349856,8376.287205,2,95,1,1,0.03,2,2,1,0,0,1,24,1,3,5,NA +62479,7,2,2,36,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,11608.998717,11632.703325,3,90,10,10,2.41,5,5,1,2,0,1,44,2,4,1,5 +62480,7,2,1,43,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,105141.812429,110055.244549,1,98,14,14,4.12,4,4,0,2,0,2,36,1,5,1,3 +62481,7,2,2,39,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,4,2,2,2,2,1,2,2,1,2,2,1,27127.983961,27837.333201,2,90,10,10,3.13,4,4,1,2,0,2,39,1,5,4,NA +62482,7,2,2,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,NA,NA,NA,NA,31335.13799,31552.004994,1,95,10,6,1.34,5,4,1,2,0,1,32,1,3,6,NA +62483,7,2,1,11,NA,3,3,2,12,144,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,25604.034863,26336.174292,1,95,4,4,0.65,6,6,2,2,0,2,36,1,4,6,NA +62484,7,2,2,42,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,27585.470618,26830.521849,2,102,6,6,1.22,5,5,0,2,0,2,42,1,4,1,4 +62485,7,2,1,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,26556.735732,2,101,99,1,0.28,2,1,0,0,0,1,21,1,4,5,NA +62486,7,1,2,14,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,117872.104347,0,2,101,8,8,1.72,5,5,0,3,0,1,37,1,3,1,3 +62487,7,2,1,65,NA,2,2,2,NA,NA,1,1,2,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,10655.434864,11610.446696,1,90,14,14,5,2,2,0,0,2,1,65,2,2,1,3 +62488,7,1,1,9,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,41165.805324,0,2,100,15,15,5,4,4,1,1,0,1,29,1,4,1,4 +62489,7,2,2,1,22,4,4,1,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7281.670423,7938.059494,2,96,4,4,0.4,7,7,3,2,0,2,25,1,2,5,NA +62490,7,2,1,0,6,3,3,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15379.505002,15960.887694,1,102,14,14,3,6,6,1,2,0,1,44,1,4,1,5 +62491,7,2,2,61,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,3,1,NA,1,2,1,1,2,2,1,2,1,3,18695.172864,19350.637044,2,102,15,15,5,5,5,1,0,2,1,30,1,4,1,5 +62492,7,2,1,6,NA,5,7,1,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8227.856305,8510.505183,3,91,10,10,2.48,5,5,2,1,0,2,27,1,2,1,4 +62493,7,2,2,0,7,5,7,1,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6570.947402,7236.391986,2,102,7,7,2.65,2,2,1,0,0,2,42,1,4,5,NA +62494,7,2,1,28,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,4,1,NA,1,2,2,1,2,2,2,2,2,2,37970.860743,39407.668593,2,92,15,7,3.67,2,1,0,0,0,1,28,2,4,1,NA +62495,7,2,1,44,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,86361.036512,88802.863313,2,93,10,10,3.61,3,3,0,0,2,1,75,1,4,1,4 +62496,7,2,1,4,NA,1,1,1,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,18754.85406,18780.196288,2,102,5,5,0.89,4,4,2,0,0,1,33,2,9,1,2 +62497,7,2,1,12,NA,3,3,2,12,145,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,61479.689958,61628.148021,2,100,15,15,4.5,6,6,0,4,0,1,45,1,5,1,5 +62498,7,2,2,13,NA,1,1,1,13,159,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20347.899985,21072.708732,3,92,15,15,3.15,7,7,0,4,0,2,35,2,3,3,NA +62499,7,2,2,1,14,5,7,2,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9679.976914,10817.322533,1,95,4,4,0.97,3,3,2,0,0,2,22,1,4,5,NA +62500,7,2,2,52,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18441.731082,18102.807884,1,96,12,12,NA,2,2,0,0,0,1,46,1,4,1,5 +62501,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,77778.949308,78018.093799,1,101,14,14,3.15,5,5,2,1,0,1,35,1,4,1,5 +62502,7,2,1,35,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,14739.896059,14808.93904,2,99,3,3,0.42,6,6,1,2,0,2,43,1,4,6,NA +62503,7,2,1,5,NA,3,3,2,5,65,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22669.354731,25576.419472,2,95,6,6,0.9,6,6,1,1,0,1,49,1,1,1,1 +62504,7,2,2,30,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,1,6,2,2,2,2,2,2,2,2,2,1,2,38161.026403,37131.361742,1,100,3,3,0.39,5,5,1,2,0,1,32,2,1,6,NA +62505,7,2,2,3,NA,1,1,1,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13196.707564,13241.769839,2,96,77,77,NA,7,7,3,2,0,2,33,2,2,6,NA +62506,7,2,2,46,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,29105.053716,30161.539189,2,93,4,4,0.84,3,3,0,1,0,2,46,2,3,1,2 +62507,7,2,2,17,NA,1,1,1,17,207,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18581.167701,19284.858627,2,96,3,3,0.24,7,7,2,3,1,2,40,1,3,3,NA +62508,7,2,1,3,NA,3,3,2,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,58332.578536,68350.303275,1,91,15,15,5,3,3,1,0,0,1,36,1,4,1,5 +62509,7,2,2,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,30131.691064,30623.993423,2,95,6,6,1.36,3,3,0,0,2,2,60,1,5,1,4 +62510,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,62763.524469,78020.806152,1,92,7,7,2.64,2,2,0,0,2,1,80,1,3,1,3 +62511,7,2,1,9,NA,3,3,2,9,109,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,25019.74954,25881.463934,1,101,3,3,0.44,5,5,0,3,0,1,35,1,3,1,4 +62512,7,2,1,39,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17605.619977,18470.699991,3,91,15,15,5,4,4,1,1,0,1,39,2,5,1,5 +62513,7,2,2,46,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,29470.67209,35523.062266,1,90,5,5,1,4,4,0,2,0,1,40,2,2,1,1 +62514,7,2,1,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,19633.637051,20188.770976,2,95,6,6,1.19,4,4,0,1,0,1,44,1,3,1,2 +62515,7,2,2,42,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,1,5,2,2,2,2,2,2,2,2,2,2,2,31235.666551,33485.036978,2,94,1,1,0.01,7,7,1,3,0,1,41,2,1,1,1 +62516,7,2,1,10,NA,2,2,2,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18107.947773,18538.358733,1,97,14,14,3.25,4,4,0,2,0,1,45,1,3,6,NA +62517,7,2,1,80,NA,3,3,2,NA,NA,2,NA,2,1,8,NA,1,1,NA,1,2,1,1,2,1,1,2,1,NA,11483.380372,15883.29652,1,90,77,77,NA,2,2,0,0,2,1,80,2,1,1,1 +62518,7,1,1,47,NA,5,6,NA,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,16651.742047,0,1,100,15,15,5,3,3,0,0,0,1,47,2,5,1,5 +62519,7,2,1,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,17801.655316,19397.911862,2,95,7,2,0.72,3,1,0,0,0,2,56,1,3,2,NA +62520,7,2,1,0,0,3,3,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11017.136221,10826.673882,1,91,3,3,0.89,2,2,1,0,0,2,23,1,3,3,NA +62521,7,2,2,59,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,178239.535384,176744.998212,1,102,14,14,5,2,2,0,0,1,2,59,1,5,3,NA +62522,7,2,1,54,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,24930.322327,25494.618871,2,94,9,9,5,1,1,0,0,0,1,54,1,4,3,NA +62523,7,2,1,33,NA,4,4,2,NA,NA,2,NA,2,2,3,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,23872.904125,23622.849234,1,91,6,6,0.99,5,5,3,0,0,2,33,2,3,1,4 +62524,7,2,1,23,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,91704.59836,93129.802737,1,93,15,7,3.67,6,1,0,0,0,1,23,1,5,5,NA +62525,7,2,1,15,NA,5,6,2,15,189,NA,NA,2,2,3,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10725.405063,11099.246363,2,94,3,3,0.68,3,2,0,1,0,2,45,2,4,3,NA +62526,7,2,2,62,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,3,1,NA,2,2,2,2,2,2,1,2,2,2,8811.978874,9348.531299,3,90,12,12,NA,3,3,0,0,1,2,62,2,3,1,5 +62527,7,2,1,79,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,NA,9541.516696,10104.465045,1,100,77,77,NA,1,1,0,0,1,1,79,1,2,5,NA +62528,7,2,1,40,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,1,6,NA,2,2,2,2,2,2,2,2,1,NA,25035.846455,24668.484001,2,99,99,99,NA,5,3,0,1,0,1,40,2,1,6,NA +62529,7,2,1,59,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15760.921402,16568.510632,3,90,12,12,NA,3,3,0,0,0,1,59,2,4,1,4 +62530,7,2,2,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,141316.739364,156265.607046,2,94,5,5,0.89,4,4,0,2,0,2,51,1,2,3,NA +62531,7,2,2,52,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,20149.292081,21423.504707,2,99,2,2,0.4,2,2,0,0,0,2,52,1,3,1,3 +62532,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,77,NA,1,2,2,1,2,2,1,2,2,1,11523.037911,11989.050618,2,97,13,13,NA,1,1,0,0,1,2,64,1,3,77,NA +62533,7,2,2,1,17,1,1,2,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9955.153132,10089.038145,2,94,8,8,2.7,3,3,1,0,0,2,23,2,4,1,3 +62534,7,2,1,13,NA,5,7,1,13,159,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11068.745625,11332.552011,1,103,3,3,0.37,5,5,1,2,0,2,30,1,4,5,NA +62535,7,2,2,5,NA,3,3,1,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,69665.012606,74034.381659,3,92,14,14,2.74,6,6,2,2,0,1,35,1,5,1,4 +62536,7,2,1,46,NA,5,6,1,NA,NA,2,NA,2,2,5,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,14238.597316,14347.374047,2,92,15,9,5,2,1,0,0,0,1,40,NA,NA,77,NA +62537,7,2,2,10,NA,3,3,2,11,133,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15442.305642,15459.941839,2,94,7,7,1.62,5,5,0,3,0,1,30,1,2,1,9 +62538,7,2,2,1,16,4,4,2,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6802.424856,6978.006983,2,99,15,15,5,4,4,2,0,0,1,34,1,5,1,5 +62539,7,2,1,60,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,10717.375231,11218.730451,2,101,2,2,0.92,1,1,0,0,1,1,60,1,2,2,NA +62540,7,2,2,0,3,3,3,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22478.08837,21867.533571,3,91,10,10,3,4,4,2,0,0,1,32,1,4,1,5 +62541,7,2,2,13,NA,3,3,1,13,163,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,68399.970058,69290.143431,1,102,14,14,4.05,3,3,0,2,0,2,34,1,4,3,NA +62542,7,2,2,11,NA,2,2,1,11,137,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,14414.529053,14932.182215,2,96,2,2,0.27,6,6,1,3,0,1,34,NA,NA,1,NA +62543,7,2,2,1,12,2,2,2,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11981.824297,12142.965635,2,91,2,2,0.26,4,4,1,1,0,2,42,2,4,3,NA +62544,7,2,1,42,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18533.049642,19320.837782,1,99,14,14,4.86,3,3,0,1,0,1,42,1,5,1,5 +62545,7,2,1,8,NA,2,2,2,8,107,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15673.230419,15680.173615,1,97,15,15,4.52,6,6,0,4,0,2,41,1,5,1,5 +62546,7,2,1,64,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,13473.930124,13736.530204,1,92,6,6,2.69,1,1,0,0,1,1,64,1,4,2,NA +62547,7,2,1,9,NA,1,1,2,9,113,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,13484.595524,13562.00827,1,97,3,3,0.5,5,5,0,2,0,1,56,2,2,6,NA +62548,7,2,1,57,NA,2,2,1,NA,NA,1,2,2,1,8,NA,3,5,NA,1,2,2,1,2,2,2,2,2,1,25422.415762,25922.20279,2,100,7,7,2.64,2,2,0,0,1,1,57,2,3,5,NA +62549,7,2,1,53,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,2,NA,NA,NA,NA,10579.097347,10866.833801,2,92,8,8,2.01,4,4,0,0,0,1,53,2,3,1,3 +62550,7,2,2,9,NA,5,7,1,9,112,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8292.876947,8466.609309,1,102,15,15,5,4,4,0,2,0,2,40,2,5,1,4 +62551,7,2,2,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,86578.861495,89840.962333,2,101,15,15,5,3,3,1,0,0,1,37,1,5,1,5 +62552,7,2,2,41,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,139800.409559,140918.856987,1,100,15,15,4.07,5,5,0,2,0,2,41,1,5,1,4 +62553,7,2,1,8,NA,5,6,2,8,100,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,4945.578914,5359.127104,3,91,14,14,2.5,6,6,1,1,1,2,37,2,2,1,5 +62554,7,2,2,15,NA,5,6,1,15,188,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12546.119668,12876.82216,2,102,14,14,4.86,3,3,0,1,0,1,55,NA,NA,1,5 +62555,7,2,1,35,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,23471.353577,23548.945779,1,100,7,7,1.83,3,3,0,1,0,2,40,1,4,6,NA +62556,7,2,2,31,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,17978.142628,18308.053591,1,91,10,10,3.78,3,3,1,0,0,1,35,2,5,1,5 +62557,7,2,1,10,NA,1,1,2,10,122,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,10591.186197,11647.778431,1,90,4,4,0.46,7,7,2,3,0,2,34,2,1,6,NA +62558,7,2,2,49,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,20084.755052,20500.648257,1,99,14,14,3.67,4,4,1,0,0,2,49,1,3,1,3 +62559,7,2,2,9,NA,1,1,2,9,109,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,1,2,2,2,2,2,1,18668.602894,22428.786321,1,101,5,5,0.51,7,7,0,3,2,1,75,2,1,1,1 +62560,7,2,2,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,12435.659176,12095.32474,1,96,77,77,NA,7,7,1,3,0,1,56,1,3,1,4 +62561,7,2,2,10,NA,3,3,1,10,122,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,50248.753555,51858.292831,2,100,8,8,2.91,3,3,0,2,0,2,48,1,5,1,NA +62562,7,2,1,4,NA,1,1,1,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17865.135763,18076.339981,3,92,1,1,0,5,5,3,0,0,1,26,1,2,1,2 +62563,7,2,2,72,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,7,77,NA,1,2,2,1,2,2,1,2,2,NA,13638.730054,13839.452317,1,99,12,12,NA,1,1,0,0,1,2,72,1,7,77,NA +62564,7,2,2,13,NA,4,4,2,13,162,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11711.384457,11743.959438,2,95,1,1,0.17,2,2,0,1,0,2,49,1,3,99,NA +62565,7,2,1,23,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,NA,1,2,2,1,2,2,1,2,2,3,14385.653726,15533.829525,2,101,7,5,1.84,2,1,0,0,0,1,27,2,5,5,NA +62566,7,2,2,2,NA,4,4,1,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7734.994032,8078.467013,2,95,1,1,0.18,4,4,2,1,0,2,38,1,2,5,NA +62567,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,2,2,NA,22991.276372,25575.167764,1,99,99,99,NA,1,1,0,0,1,2,80,1,1,3,NA +62568,7,2,2,1,13,1,1,1,NA,14,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10867.770426,11631.363355,1,100,4,4,0.56,5,5,2,0,0,2,25,2,2,1,2 +62569,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,45973.010529,51521.910389,1,94,8,8,3.47,2,2,0,0,1,2,80,1,1,2,NA +62570,7,2,1,6,NA,4,4,2,6,72,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8655.162127,8675.53524,2,101,2,2,0.22,4,4,1,1,0,2,41,1,2,4,NA +62571,7,2,2,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,85420.170155,86011.35365,1,91,14,14,3.06,5,5,2,0,0,2,30,1,5,1,5 +62572,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,NA,47566.45715,54102.989916,1,90,7,7,3.22,1,1,0,0,1,2,80,1,5,3,NA +62573,7,2,2,48,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,30442.30641,30472.199404,1,95,6,3,1.1,2,1,0,0,0,2,48,1,4,3,NA +62574,7,2,2,13,NA,3,3,2,13,166,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,132630.209478,136814.3405,1,97,9,9,2.88,3,3,0,1,0,2,33,1,3,5,NA +62575,7,2,2,24,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,129336.409693,130993.13082,1,95,12,12,NA,6,6,2,0,0,2,42,1,2,1,5 +62576,7,2,1,66,NA,1,1,1,NA,NA,2,NA,2,1,77,NA,1,1,NA,2,2,2,1,2,2,1,2,2,NA,14488.953694,14771.336069,3,92,9,9,2.22,5,5,1,0,2,1,66,2,1,1,1 +62577,7,2,2,71,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,72551.269339,73326.076054,1,92,6,6,2.04,2,2,0,0,2,2,71,1,4,1,1 +62578,7,2,2,6,NA,3,3,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,48532.852397,48387.04619,1,101,7,7,1.55,5,5,1,2,0,2,31,1,4,1,2 +62579,7,2,1,78,NA,1,1,1,NA,NA,1,2,2,1,9,NA,4,1,NA,2,2,2,1,2,2,1,2,2,NA,12782.642018,13005.47245,2,96,5,5,1.08,3,3,0,0,2,1,53,2,4,3,NA +62580,7,2,2,14,NA,3,3,2,14,172,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,38400.791741,38900.548719,1,92,4,4,0.5,6,6,0,3,0,2,41,1,4,1,NA +62581,7,2,2,10,NA,3,3,1,10,128,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,53370.126656,53491.559571,2,98,15,15,5,3,3,0,1,0,1,48,1,4,1,5 +62582,7,1,2,10,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23170.920553,0,1,94,4,4,1.26,2,2,0,1,0,1,40,1,4,3,NA +62583,7,2,2,17,NA,3,3,1,17,212,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,93740.540203,99321.165816,1,100,8,8,1.95,4,4,0,2,1,2,49,1,5,6,NA +62584,7,2,1,22,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,29738.952706,31253.462984,2,94,8,8,3.4,2,2,0,0,0,1,22,1,3,5,NA +62585,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,14859.685983,15314.825218,1,95,3,3,0.98,1,1,0,0,1,2,80,1,3,2,NA +62586,7,2,1,70,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,NA,68266.732554,71967.824097,1,95,5,5,2.02,1,1,0,0,1,1,70,1,3,5,NA +62587,7,2,1,73,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,NA,15565.766387,16476.020536,1,101,3,3,1.07,2,1,0,0,1,1,73,1,4,6,NA +62588,7,2,1,21,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,137038.746155,146293.19093,2,101,3,1,0.18,2,1,0,0,0,1,21,1,4,5,NA +62589,7,2,2,75,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,61388.874846,64435.558603,1,90,7,7,1.89,3,3,0,0,1,2,75,1,4,3,NA +62590,7,2,1,11,NA,4,4,2,11,133,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8483.005475,8584.70346,1,96,7,7,1.69,4,4,0,1,0,2,19,2,4,NA,NA +62591,7,2,1,10,NA,3,3,1,10,131,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,54897.892683,57357.850008,2,98,14,14,3.58,4,4,0,2,0,1,36,1,3,1,4 +62592,7,2,1,0,5,3,3,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9665.908305,9498.805759,1,95,5,5,1.08,3,3,1,0,0,2,22,1,3,6,NA +62593,7,2,2,72,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,64463.340883,65151.773075,1,91,7,7,2.92,2,2,0,0,1,2,47,1,5,5,NA +62594,7,2,1,0,10,4,4,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5707.173345,5961.496632,1,90,4,4,0.67,5,5,3,0,0,2,32,2,3,3,NA +62595,7,2,2,28,NA,5,7,1,NA,NA,2,NA,2,1,5,NA,4,1,3,1,2,2,1,2,2,1,2,2,1,13851.686232,14954.517784,3,91,14,14,4.32,3,3,1,0,0,1,31,2,3,1,4 +62596,7,2,2,52,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11428.6913,11489.125563,1,103,5,5,0.65,6,6,0,0,1,2,26,2,4,5,NA +62597,7,2,2,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,11696.973403,12170.020379,2,97,6,6,1,6,6,1,2,2,2,60,1,2,2,NA +62598,7,2,1,18,NA,4,4,1,18,223,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11666.009872,12200.765691,2,100,14,4,0.43,7,7,1,3,1,2,62,1,3,5,NA +62599,7,2,2,50,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17521.481386,17644.217766,2,97,5,5,0.76,5,5,0,0,0,2,50,1,4,5,NA +62600,7,2,1,10,NA,2,2,1,10,125,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,10738.959181,10632.988891,2,93,14,14,3.25,4,4,0,2,0,2,46,2,5,1,4 +62601,7,2,1,65,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,9636.209947,9711.587411,2,97,14,14,5,2,2,0,0,1,2,52,1,5,1,3 +62602,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,60071.993203,65791.533402,1,95,8,8,4.59,1,1,0,0,1,2,80,1,4,2,NA +62603,7,2,2,38,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,6,2,1,2,2,1,2,2,1,2,2,1,39561.667842,42653.886229,2,98,6,6,1.25,4,4,1,0,1,1,46,1,2,6,NA +62604,7,2,2,73,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,NA,69886.968852,80170.534309,1,102,4,4,1.42,1,1,0,0,1,2,73,1,3,5,NA +62605,7,1,1,1,22,5,6,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7025.964374,0,2,91,4,4,0.65,5,5,1,3,0,1,43,2,3,5,NA +62606,7,2,1,2,NA,4,4,1,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5930.749873,6003.089312,2,93,6,6,1.98,2,2,1,0,0,2,51,1,2,3,NA +62607,7,2,1,4,NA,4,4,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10040.033098,11070.778245,1,100,1,1,0.04,4,4,1,1,0,2,51,1,3,3,NA +62608,7,1,2,67,NA,2,2,NA,NA,NA,2,NA,2,1,NA,NA,2,1,NA,1,2,2,NA,NA,NA,NA,NA,NA,NA,7278.790659,0,2,93,NA,NA,NA,2,2,0,0,2,1,68,NA,NA,1,2 +62609,7,2,1,9,NA,3,3,2,9,112,NA,NA,1,1,NA,3,NA,NA,NA,1,NA,2,1,2,2,1,2,2,1,25604.034863,26336.174292,1,95,5,5,0.92,5,5,1,2,0,2,30,1,4,1,4 +62610,7,2,2,12,NA,2,2,1,12,146,NA,NA,2,2,1,4,NA,NA,NA,2,1,2,2,2,2,2,2,1,2,18006.855255,21837.57166,1,100,3,3,0.39,5,5,1,2,0,1,32,2,1,6,NA +62611,7,2,1,45,NA,5,6,2,NA,NA,2,NA,2,2,6,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,11725.7448,11815.324423,2,95,3,3,1.16,1,1,0,0,0,1,45,2,2,4,NA +62612,7,2,2,38,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,21619.283038,22049.338941,2,102,9,9,2.68,4,4,1,1,0,2,38,2,5,1,2 +62613,7,2,2,29,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,28394.357741,26997.683446,1,100,3,3,0.66,2,2,0,1,0,2,29,1,4,1,NA +62614,7,2,1,12,NA,3,3,2,12,154,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,58479.782556,60361.370686,1,99,15,15,4.47,4,4,0,2,0,2,43,1,5,1,5 +62615,7,2,2,0,11,4,4,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3774.069255,3925.050204,2,99,5,5,0.78,5,5,2,2,0,2,30,1,3,5,NA +62616,7,2,2,37,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,1,1,2,1,2,1,1,2,2,1,2,1,NA,15542.93857,16609.63545,3,91,4,4,0.69,5,5,0,2,0,1,45,2,4,1,1 +62617,7,2,1,8,NA,5,7,1,8,104,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8439.403196,8726.876152,2,103,6,6,1.11,5,5,1,2,0,2,36,1,4,5,NA +62618,7,2,1,10,NA,5,7,2,10,129,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7923.925927,8546.646915,1,91,15,15,5,5,5,0,3,0,1,40,1,5,1,5 +62619,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,NA,24023.719783,25884.561742,2,96,7,7,3.21,2,1,0,0,1,1,80,1,5,5,NA +62620,7,2,2,7,NA,2,2,1,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18660.103977,19985.967776,2,91,6,6,0.93,5,5,1,2,0,1,34,1,2,1,3 +62621,7,2,1,41,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,23242.990557,23753.836506,1,100,8,8,1.95,4,4,0,2,0,2,42,1,4,1,4 +62622,7,2,2,15,NA,5,6,2,16,192,NA,NA,2,1,4,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10963.340882,11440.408955,2,91,15,15,5,4,4,0,2,1,2,56,1,5,1,5 +62623,7,2,2,78,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,39653.38014,40869.906755,2,95,6,6,1.57,3,3,0,0,2,1,80,1,2,1,4 +62624,7,2,2,40,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,18255.735511,19354.37213,2,91,14,14,3.47,4,4,1,1,0,2,40,2,5,1,NA +62625,7,2,2,60,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15207.312407,15896.113669,1,92,15,15,5,3,3,0,0,1,1,57,1,3,1,4 +62626,7,2,1,13,NA,1,1,2,13,157,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,17314.330266,19031.527568,1,90,4,4,0.46,7,7,2,3,0,2,34,2,1,6,NA +62627,7,2,2,2,NA,1,1,2,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,9955.153132,10379.318342,2,94,9,9,2.29,5,5,2,1,0,2,33,2,3,1,1 +62628,7,2,2,1,19,3,3,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16717.071084,17765.560854,2,94,8,8,1.6,6,6,3,1,0,2,32,1,4,1,4 +62629,7,2,1,11,NA,5,6,2,11,138,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6262.834446,6755.01452,3,90,12,12,NA,5,5,1,2,0,1,37,2,5,1,5 +62630,7,2,2,68,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,149153.684635,157450.342804,1,101,3,3,1.25,1,1,0,0,1,2,68,1,2,2,NA +62631,7,2,2,37,NA,3,3,2,NA,NA,2,NA,2,1,4,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,77778.949308,79987.938327,1,101,6,6,1.28,4,4,2,0,0,1,44,1,4,1,4 +62632,7,2,1,29,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,14424.961621,14817.68753,2,102,5,5,1.08,3,3,0,1,0,2,46,2,1,5,NA +62633,7,2,2,61,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,12176.672371,12646.648452,2,92,15,15,5,2,2,0,0,2,1,61,1,5,1,5 +62634,7,2,1,68,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,114174.703171,113009.251918,2,97,8,8,3.06,2,2,0,0,2,1,68,1,2,1,4 +62635,7,2,2,27,NA,5,6,2,NA,NA,2,NA,2,2,1,NA,5,1,2,1,2,1,1,2,2,NA,NA,NA,NA,13851.686232,14678.247845,3,91,5,5,1.08,3,3,1,0,0,1,29,2,5,1,5 +62636,7,2,1,17,NA,4,4,1,17,215,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12147.046136,12703.852077,2,100,4,4,0.86,3,3,0,2,0,2,36,1,3,5,NA +62637,7,2,2,3,NA,4,4,1,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13130.790087,13857.266639,2,102,15,15,4.2,5,5,1,2,0,2,29,NA,NA,1,NA +62638,7,2,2,9,NA,4,4,2,9,111,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12120.418061,13050.526646,2,101,13,3,0.64,5,4,0,3,1,2,62,1,1,2,NA +62639,7,2,2,26,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,1,6,2,2,2,2,1,2,2,2,2,2,2,43708.837271,43518.466069,1,96,9,9,3.97,2,2,0,0,0,1,28,2,2,6,NA +62640,7,2,2,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,26546.087356,29693.438127,1,102,6,6,1.12,4,4,2,0,0,2,31,1,3,6,NA +62641,7,2,1,2,NA,1,1,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10276.786905,10602.092524,1,102,14,14,2.83,6,6,1,2,0,1,36,1,2,1,3 +62642,7,2,1,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,23607.684366,1,95,1,1,0.11,1,1,0,0,0,1,30,1,3,5,NA +62643,7,2,2,2,24,4,4,2,2,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5687.793894,6161.690011,2,90,99,99,NA,5,5,1,0,0,1,20,1,3,5,NA +62644,7,2,1,61,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,11761.359913,11893.852645,1,95,5,5,1.79,1,1,0,0,1,1,61,1,4,2,NA +62645,7,2,2,12,NA,5,6,1,12,153,NA,NA,2,2,2,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6391.016092,6494.382638,1,100,99,99,NA,6,6,0,1,0,1,53,2,2,1,3 +62646,7,2,2,57,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,17164.211773,16694.46809,2,99,8,8,3.57,2,2,0,0,1,1,67,1,2,1,2 +62647,7,2,1,0,0,2,2,1,NA,0,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8143.831412,8143.981517,2,102,5,5,0.89,4,4,2,0,0,1,45,1,3,1,4 +62648,7,2,2,80,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,1,2,NA,1,2,1,1,2,2,1,2,1,NA,19516.411172,20124.53654,2,102,10,10,4.76,2,2,0,0,1,2,49,2,5,5,NA +62649,7,2,1,76,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,9334.084938,9516.918553,2,96,8,8,2.59,3,3,0,0,2,1,76,1,3,1,4 +62650,7,2,1,71,NA,4,4,1,NA,NA,2,NA,2,1,7,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,7110.382011,7529.893703,2,93,6,6,1.85,2,2,0,0,2,1,71,2,1,1,2 +62651,7,2,1,55,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,152467.08796,155918.172532,1,94,6,6,2.69,1,1,0,0,0,1,55,1,3,3,NA +62652,7,2,1,46,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,34205.013302,40575.476948,2,102,14,14,2.44,7,7,0,2,1,2,71,1,3,3,NA +62653,7,1,1,49,NA,5,6,NA,NA,NA,2,NA,2,2,4,NA,3,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,13323.689579,0,2,92,6,6,1.3,4,4,0,1,0,2,48,2,3,1,3 +62654,7,1,1,4,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,30553.632981,0,1,98,5,2,0.42,4,3,2,0,0,1,24,1,3,6,NA +62655,7,2,1,1,23,4,4,1,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5930.749873,6003.089312,2,93,9,9,2.6,4,4,1,1,0,1,37,1,4,1,4 +62656,7,2,2,0,9,1,1,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,6787.112205,6969.467907,2,94,4,4,0.73,5,5,2,1,0,1,35,2,1,6,NA +62657,7,2,2,2,NA,1,1,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11401.934012,11887.743086,1,95,13,13,NA,5,5,1,2,0,2,34,2,1,1,1 +62658,7,2,1,68,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,28559.076421,28710.827523,1,92,6,6,2.3,1,1,0,0,1,1,68,1,3,3,NA +62659,7,2,1,18,NA,5,6,1,18,219,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,5526.901806,5811.556543,2,103,7,7,1.65,4,4,0,1,1,2,55,2,3,1,NA +62660,7,2,2,7,NA,5,6,1,7,88,NA,NA,2,1,2,1,NA,NA,NA,1,1,2,1,2,1,NA,NA,NA,NA,7932.110938,8947.143287,3,92,77,77,NA,7,7,2,4,1,1,62,NA,NA,1,NA +62661,7,2,1,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,84910.063417,90031.493834,2,96,14,7,3.67,3,1,0,0,0,1,30,2,5,5,NA +62662,7,2,2,11,NA,1,1,1,11,138,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15962.145468,16664.698857,2,98,7,7,1.97,4,4,0,1,0,1,40,1,3,1,3 +62663,7,1,2,3,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12984.581505,0,2,93,15,15,5,4,4,2,0,0,1,34,1,5,1,5 +62664,7,2,1,64,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,25436.904729,25763.177128,1,94,8,8,3.3,2,2,0,0,2,1,64,1,5,1,3 +62665,7,2,2,67,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,13504.560081,14155.716256,1,102,7,7,1.8,5,4,1,0,2,1,47,1,3,5,NA +62666,7,2,1,3,NA,5,6,1,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8838.066777,9670.229845,3,92,8,8,0.91,7,7,3,3,1,1,61,NA,NA,1,4 +62667,7,2,1,6,NA,3,3,2,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,53915.042746,57006.573447,1,91,14,14,2.44,7,7,2,4,0,1,33,1,5,1,5 +62668,7,2,1,33,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,25120.174741,24762.106831,2,102,6,6,1.22,5,5,0,2,0,2,42,1,4,1,4 +62669,7,2,1,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,20891.980831,22249.799905,2,97,6,6,1.35,3,3,0,0,0,2,54,1,3,6,NA +62670,7,2,1,13,NA,3,3,2,14,169,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,107087.58296,107224.801212,1,95,8,8,1.28,7,7,1,4,0,1,32,1,3,1,3 +62671,7,2,1,69,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,7973.883342,8036.257566,1,100,77,77,NA,1,1,0,0,1,1,69,1,4,3,NA +62672,7,2,2,13,NA,4,4,1,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11078.202838,10933.134393,1,102,12,12,NA,7,7,3,2,0,2,52,1,4,5,NA +62673,7,2,1,34,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,34959.343013,34695.493801,1,97,8,8,1.45,6,6,2,2,0,2,36,2,2,1,1 +62674,7,2,2,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,NA,28989.076324,29596.20575,1,93,6,6,1.16,4,4,2,0,0,2,33,1,5,1,4 +62675,7,2,1,0,9,3,3,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16921.794985,16629.253929,1,99,15,15,5,4,4,1,1,0,2,27,1,5,1,5 +62676,7,2,2,17,NA,3,3,2,17,214,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,111142.989658,114235.009147,1,95,8,8,1.28,7,7,1,4,0,1,32,1,3,1,3 +62677,7,2,2,30,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,56677.205755,57971.218225,2,102,10,10,4.76,2,2,0,0,0,1,27,1,4,1,4 +62678,7,2,1,68,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,1,2,1,2,2,2,2,9235.951997,9462.879133,2,103,5,5,1.79,2,1,0,0,2,1,68,2,1,1,NA +62679,7,2,2,7,NA,1,1,1,7,90,NA,NA,2,2,2,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15897.166957,16345.216522,2,102,6,3,0.54,6,4,0,4,0,2,43,2,1,5,NA +62680,7,2,1,68,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,7581.139559,8116.967146,3,90,14,14,4.96,2,2,0,0,2,1,68,1,3,1,5 +62681,7,2,2,5,NA,1,1,1,5,65,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15457.736897,15510.51982,2,98,3,3,0.33,7,7,2,3,0,1,40,2,1,1,1 +62682,7,2,2,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,44636.780791,48652.73082,2,91,3,3,0.86,2,2,0,0,1,2,62,1,3,2,NA +62683,7,2,2,55,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16829.599526,17568.396647,1,90,99,99,NA,3,3,0,1,1,1,60,1,5,1,5 +62684,7,2,2,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,82612.6713,83351.284903,1,92,10,10,2.1,6,6,1,1,0,2,29,1,4,1,2 +62685,7,2,1,36,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,3,5,NA,1,2,1,1,2,1,1,2,1,1,12869.386353,13443.315161,2,92,7,7,1.89,3,3,0,0,1,1,36,2,3,5,NA +62686,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,38007.846215,47247.232001,1,94,7,7,2.31,2,2,0,0,1,2,80,1,3,2,NA +62687,7,2,1,10,NA,4,4,2,10,131,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8483.005475,8584.70346,1,96,8,8,2,4,4,1,2,0,2,40,1,4,5,NA +62688,7,2,1,24,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,10141.381563,13212.383706,3,90,6,6,1.98,2,2,0,0,0,2,47,2,5,2,NA +62689,7,2,1,44,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,116464.874823,116611.129855,2,94,15,15,5,5,5,0,3,0,1,44,1,5,1,4 +62690,7,2,2,26,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,17568.277926,19062.73336,1,90,99,99,NA,2,2,0,1,0,2,26,1,4,5,NA +62691,7,2,1,31,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,18544.003944,19387.603198,2,100,7,7,1.79,4,4,0,1,0,2,51,1,3,3,NA +62692,7,1,2,50,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,17521.481386,0,2,97,7,7,3.49,1,1,0,0,0,2,50,1,4,5,NA +62693,7,2,2,69,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,9570.416297,10279.373686,2,98,5,5,2.2,1,1,0,0,1,2,69,1,2,2,NA +62694,7,2,1,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,21219.562726,22239.20882,2,91,3,3,0.86,2,2,0,0,1,2,62,1,3,2,NA +62695,7,2,2,41,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,4,2,1,2,2,1,2,2,1,2,2,1,19130.246369,19734.446951,2,101,2,2,0.22,4,4,1,1,0,2,41,1,2,4,NA +62696,7,2,1,13,NA,4,4,2,13,159,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11351.725436,12116.15399,2,95,6,6,1.3,4,4,1,1,0,1,38,1,4,1,4 +62697,7,2,1,9,NA,2,2,2,9,119,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12477.812875,12553.43469,2,94,6,6,1.43,5,4,2,1,0,2,23,2,3,6,NA +62698,7,1,1,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,38666.703155,0,2,94,77,77,NA,2,2,0,0,2,2,80,1,5,1,5 +62699,7,2,1,1,23,4,4,2,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4912.962876,5115.96715,2,90,8,8,1.67,6,6,2,2,0,2,35,2,5,3,NA +62700,7,2,1,9,NA,4,4,1,9,119,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9022.8939,9530.778148,2,100,3,3,0.31,7,7,3,2,0,2,28,1,3,1,3 +62701,7,2,2,29,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,30253.427014,31766.413225,2,90,8,8,2.24,4,4,1,1,0,2,29,1,4,6,NA +62702,7,1,2,12,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,55,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,20322.312754,0,2,91,6,6,1.3,4,4,1,1,0,2,27,1,4,6,NA +62703,7,2,2,2,NA,4,4,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7785.57328,8700.336584,1,96,8,8,2.17,4,4,1,1,0,2,41,1,3,1,3 +62704,7,2,1,80,NA,5,7,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,9071.406562,10878.533019,1,94,5,5,1.59,2,2,0,0,2,2,73,1,3,1,3 +62705,7,2,2,30,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,35353.005268,34399.106917,2,91,15,15,5,2,2,0,0,0,2,30,1,4,1,4 +62706,7,2,2,39,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,42150.923371,53077.558906,1,97,9,9,3.7,2,2,0,0,0,1,25,NA,NA,5,NA +62707,7,2,2,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,25511.973394,26011.568993,1,98,6,6,0.97,7,7,1,2,0,1,49,1,2,1,2 +62708,7,2,1,52,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,3,1,NA,1,2,1,1,2,1,1,2,1,3,13567.923118,14187.110343,3,91,6,6,1.34,4,4,0,2,0,1,52,2,3,1,1 +62709,7,2,2,10,NA,3,3,2,10,122,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24070.467912,25563.654853,1,95,6,6,1.09,5,5,0,3,0,1,31,1,4,1,4 +62710,7,2,1,41,NA,2,2,1,NA,NA,2,NA,2,1,4,NA,5,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,36225.654087,36329.951191,2,93,5,5,0.87,4,4,1,1,0,1,41,2,5,1,3 +62711,7,2,1,1,21,3,3,2,NA,23,NA,NA,2,1,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18848.609643,22085.568952,1,91,6,6,2.04,2,2,1,0,0,2,31,1,5,4,NA +62712,7,2,2,4,NA,2,2,1,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16087.525226,17761.059567,2,91,4,4,0.69,4,4,2,0,0,2,21,1,3,6,NA +62713,7,2,1,15,NA,3,3,2,15,183,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17395.533107,17264.495057,1,99,13,13,NA,4,4,0,2,0,1,55,NA,NA,1,4 +62714,7,2,1,75,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,1,1,NA,2,2,2,2,2,2,1,2,2,NA,14200.083364,15006.095575,3,92,3,3,0.68,2,2,0,0,2,1,75,1,1,1,1 +62715,7,2,1,10,NA,3,3,2,11,132,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,47596.305609,49235.587472,1,99,15,15,3.82,5,5,0,3,0,1,57,1,5,1,5 +62716,7,2,2,42,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,11762.034222,12708.069605,3,90,12,12,NA,5,5,1,2,0,1,37,2,5,1,5 +62717,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,32252.129527,31981.695662,1,99,6,4,1.38,2,1,0,0,0,2,50,1,3,6,NA +62718,7,2,1,63,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,6038.685119,6085.921613,1,96,6,6,2.51,1,1,0,0,1,1,63,1,4,5,NA +62719,7,2,1,74,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,1,4,NA,2,2,2,1,2,2,2,2,2,NA,14646.929358,15478.305035,1,96,15,8,4.66,2,1,0,0,2,2,68,2,1,3,NA +62720,7,2,1,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,151766.599459,151581.541662,3,91,15,15,5,2,2,0,0,0,2,57,1,5,1,5 +62721,7,2,2,2,NA,3,3,2,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,33127.913744,33563.225688,1,99,15,15,5,4,4,2,0,0,1,33,1,5,1,5 +62722,7,2,1,1,20,1,1,1,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12493.910388,13386.323284,2,98,6,6,1.21,4,4,1,0,0,2,49,2,2,6,NA +62723,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,95214.22557,102645.939709,1,97,15,15,5,3,3,1,0,0,2,31,1,5,1,5 +62724,7,2,2,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,187291.098551,189225.431861,2,91,15,15,5,2,2,0,0,1,2,57,1,5,1,5 +62725,7,2,2,3,NA,5,6,2,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,2,NA,NA,NA,NA,5060.292252,5371.954509,1,90,8,8,1.43,7,7,2,0,0,1,23,2,4,1,3 +62726,7,1,2,1,18,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,39493.444219,0,2,103,15,15,5,4,4,2,0,0,1,36,2,4,1,5 +62727,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,43807.995099,48928.4376,1,95,6,6,1.65,2,2,0,0,2,2,80,1,4,1,4 +62728,7,2,2,32,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,32569.109434,37398.26507,1,96,77,77,NA,1,1,0,0,0,2,32,1,5,5,NA +62729,7,2,2,1,14,2,2,2,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,6962.449638,7451.646327,3,90,12,12,NA,5,5,1,1,0,1,35,2,5,3,NA +62730,7,2,1,0,3,3,3,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7493.544598,7825.810309,1,98,6,6,0.97,7,7,1,2,0,1,49,1,2,1,2 +62731,7,2,1,19,NA,1,1,2,19,238,2,NA,2,2,2,12,NA,NA,NA,2,2,2,2,2,2,2,2,2,2,21718.29328,21806.698186,2,94,NA,13,NA,3,2,0,0,0,1,44,2,2,6,NA +62732,7,2,2,71,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,2,1,NA,2,2,2,2,2,2,2,2,2,NA,18067.031882,19590.328656,2,94,2,2,0.56,2,2,0,0,2,2,71,2,2,1,1 +62733,7,1,1,5,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8253.750998,0,1,90,15,15,5,4,4,2,0,0,1,36,1,5,1,5 +62734,7,2,1,52,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,18783.626894,19376.125899,2,99,2,2,0.53,2,2,0,0,1,2,69,1,4,2,NA +62735,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,105261.096488,129835.348564,1,97,15,15,5,4,4,1,1,0,2,33,1,5,1,3 +62736,7,2,2,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,17036.134307,17206.306314,2,90,6,6,1.4,3,3,0,1,0,2,40,1,4,5,NA +62737,7,2,1,58,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,1,1,NA,1,2,1,1,2,1,1,2,1,3,10579.097347,10676.360686,2,92,3,3,0.7,3,3,0,0,0,1,58,2,1,1,1 +62738,7,2,1,6,NA,3,3,1,6,80,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20333.209695,22709.426802,1,103,6,6,1.11,5,5,1,1,1,1,29,1,3,1,3 +62739,7,2,1,74,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,60135.660369,64822.591837,1,90,8,8,2.49,3,3,0,0,2,1,74,1,5,1,2 +62740,7,2,2,19,NA,4,4,2,19,232,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12857.456314,12921.235691,2,97,8,8,2.7,3,3,0,0,1,2,72,1,2,3,NA +62741,7,2,2,0,3,4,4,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3861.876549,4122.902052,1,96,14,14,2.91,6,6,1,2,1,2,32,1,5,1,4 +62742,7,2,1,70,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,1,1,NA,2,2,2,2,2,2,1,2,2,NA,12962.876803,13431.341497,2,90,6,6,1.7,2,2,0,0,2,1,70,2,1,1,4 +62743,7,2,1,0,5,3,3,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7467.727239,7549.772257,3,91,4,4,0.65,5,5,2,2,0,2,27,2,2,3,NA +62744,7,2,1,37,NA,2,2,2,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,43108.74283,45119.517495,2,91,9,9,2.6,4,4,1,1,0,2,31,2,4,1,5 +62745,7,2,2,71,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,12183.823561,13095.132048,2,100,5,5,1.63,2,2,0,0,2,1,71,NA,NA,1,2 +62746,7,2,2,38,NA,2,2,2,NA,NA,2,NA,2,1,4,NA,4,1,2,2,2,2,2,2,2,NA,NA,NA,NA,26897.674477,26781.441446,2,99,77,77,NA,4,4,1,1,1,2,38,2,4,1,4 +62747,7,2,1,52,NA,4,4,1,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19064.767778,19872.734197,2,93,14,14,2.78,5,5,0,0,0,1,52,2,4,1,2 +62748,7,2,1,13,NA,2,2,1,13,164,NA,NA,2,2,1,7,NA,NA,NA,2,1,2,2,2,2,2,2,2,2,15674.964193,15512.049412,2,93,9,9,1.49,7,7,0,3,0,2,41,2,5,1,5 +62749,7,2,1,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,22039.963414,22735.178265,1,96,15,15,5,2,2,0,0,0,1,48,1,2,1,3 +62750,7,2,2,45,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,15890.289377,17041.022437,2,94,3,3,0.68,3,2,0,1,0,2,45,2,4,3,NA +62751,7,2,1,8,NA,3,3,1,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21839.599095,22875.076583,1,98,7,7,1.03,7,7,0,4,0,2,20,1,3,5,NA +62752,7,2,1,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,59510.728426,61449.871516,1,99,15,15,5,5,5,3,0,0,2,34,1,5,1,5 +62753,7,2,2,16,NA,4,4,1,16,203,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18421.460617,19740.37466,1,92,6,6,1.3,4,4,0,1,1,1,25,1,1,1,3 +62754,7,2,2,0,7,3,3,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25216.508684,24531.572308,1,101,14,14,3.38,4,4,2,0,0,1,32,1,4,1,5 +62755,7,2,2,19,NA,4,4,2,19,235,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12857.456314,12921.235691,2,97,3,3,0.66,3,3,2,0,0,2,19,1,3,NA,NA +62756,7,2,1,7,NA,3,3,2,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25604.034863,28346.339599,1,95,7,7,1.66,5,5,0,3,0,1,34,1,2,1,4 +62757,7,2,1,54,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,105571.343095,105442.613847,1,92,10,10,2.1,6,6,1,1,0,2,29,1,4,1,2 +62758,7,1,2,9,NA,5,7,NA,NA,NA,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7722.982971,0,1,93,15,15,5,3,3,0,2,0,2,40,2,5,4,NA +62759,7,2,2,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,18395.989535,18290.733411,2,93,9,9,2.07,5,5,0,1,0,1,55,NA,NA,5,NA +62760,7,2,2,10,NA,2,2,2,10,122,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9872.244853,10573.701398,2,90,7,7,0.89,7,7,1,3,3,1,60,2,3,1,3 +62761,7,2,1,68,NA,1,1,1,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11992.012141,12562.386395,2,102,6,6,1.97,2,2,0,0,2,2,67,1,3,1,3 +62762,7,2,2,1,13,1,1,1,NA,14,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12260.86913,13122.344171,3,92,10,10,2.26,6,6,1,3,0,1,34,1,4,1,1 +62763,7,2,1,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,110931.964733,116248.869259,1,94,15,15,5,2,2,0,0,0,1,29,1,4,1,5 +62764,7,2,2,51,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,22969.116046,23881.325913,2,93,1,1,0.36,1,1,0,0,0,2,51,1,3,5,NA +62765,7,2,2,29,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,16844.740449,17564.040518,3,91,7,7,2.45,2,2,0,0,0,2,29,2,5,1,5 +62766,7,2,1,1,22,5,6,2,NA,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5891.941477,6363.138629,1,90,6,6,0.92,6,6,2,0,2,2,30,2,5,1,5 +62767,7,1,2,40,NA,2,2,NA,NA,NA,2,NA,1,1,NA,NA,4,5,3,1,2,2,NA,NA,NA,NA,NA,NA,NA,29650.79971,0,2,90,NA,NA,NA,1,1,0,0,0,2,40,1,4,5,NA +62768,7,2,2,38,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,3,4,2,2,2,2,1,2,2,2,2,2,2,42456.72357,41311.151976,1,95,4,4,0.68,5,5,0,1,0,2,38,2,3,4,NA +62769,7,2,2,13,NA,3,3,2,13,162,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,74165.041171,76338.869017,2,94,5,5,0.89,4,4,0,2,0,2,51,1,2,3,NA +62770,7,2,1,1,15,3,3,2,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,51373.484735,55257.405076,3,91,14,14,4.32,3,3,1,0,0,1,28,1,5,1,5 +62771,7,2,1,30,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,21863.450986,22291.478781,2,99,2,2,0.75,1,1,0,0,0,1,30,1,2,5,NA +62772,7,2,2,46,NA,1,1,2,NA,NA,2,NA,2,2,1,NA,5,5,NA,2,2,2,1,2,2,2,2,2,2,29995.384937,31080.287607,1,93,15,15,5,3,2,0,1,0,2,46,2,5,5,NA +62773,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,27693.650341,27908.388498,3,91,1,1,0.19,3,3,0,2,0,2,50,1,4,3,NA +62774,7,1,1,80,NA,3,3,NA,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,11524.806252,0,1,94,3,3,0.39,6,6,1,0,2,1,80,1,4,1,3 +62775,7,1,2,69,NA,2,2,NA,NA,NA,2,NA,2,1,6,NA,1,3,NA,2,2,2,2,2,2,NA,NA,NA,NA,13676.984152,0,2,91,6,6,1.36,3,3,0,0,1,2,48,2,1,5,NA +62776,7,2,1,9,NA,4,4,1,10,120,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10410.106675,10617.081899,2,96,6,6,1.62,3,3,0,2,0,2,31,1,3,5,NA +62777,7,2,1,28,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,18927.052732,18722.519921,2,92,15,5,1.93,4,1,0,0,0,1,28,1,5,5,NA +62778,7,2,2,4,NA,3,3,2,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,78365.406271,80834.23308,2,91,14,14,4.12,4,4,2,0,0,1,35,1,5,1,5 +62779,7,2,1,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,81291.35274,86533.223356,2,95,12,99,NA,2,1,0,0,0,2,40,1,4,6,NA +62780,7,2,2,5,NA,3,3,1,5,71,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,83950.915901,92655.852423,1,100,15,15,5,4,4,1,1,0,1,40,1,5,1,5 +62781,7,2,2,3,NA,1,1,1,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10493.785765,10940.90082,1,103,5,5,0.71,6,6,2,2,0,2,31,2,2,1,2 +62782,7,2,2,0,3,5,6,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7997.590655,8653.844084,3,91,15,15,5,4,4,2,0,0,1,36,2,5,1,5 +62783,7,2,2,6,NA,2,2,1,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,15897.166957,16345.216522,2,102,4,4,0.57,6,6,2,3,0,2,26,2,3,1,NA +62784,7,2,1,31,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,39040.678458,46646.692487,3,92,7,7,1.49,5,5,0,2,1,2,62,1,4,2,NA +62785,7,2,1,4,NA,4,4,2,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8005.528865,8827.404644,1,99,14,14,3.8,4,4,1,1,0,1,48,2,5,1,5 +62786,7,2,1,7,NA,1,1,1,8,96,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,16605.106492,16594.063114,1,92,4,4,0.74,4,4,1,1,0,1,42,2,3,1,4 +62787,7,2,2,21,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,11739.283384,12800.154398,1,96,15,15,5,5,5,0,0,0,1,58,2,5,1,5 +62788,7,2,1,35,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,1,6,NA,2,2,2,1,2,2,2,2,2,2,34887.439952,36924.956604,2,94,4,4,0.73,5,5,2,1,0,1,35,2,1,6,NA +62789,7,2,2,26,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,16929.836231,18556.498474,2,101,6,5,2.2,2,1,0,0,0,2,26,2,4,6,NA +62790,7,2,2,11,NA,4,4,2,11,139,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7631.175557,7797.231767,1,93,1,1,0.02,5,5,0,4,0,2,36,NA,NA,5,NA +62791,7,2,1,14,NA,3,3,1,14,173,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24231.355333,25219.610889,2,100,5,5,1.3,3,3,0,1,0,2,46,1,3,2,NA +62792,7,2,2,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,23128.736624,26524.976891,1,98,3,3,1.09,1,1,0,0,0,2,52,1,4,3,NA +62793,7,2,1,72,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,87347.444626,90661.31559,1,95,15,15,5,2,2,0,0,2,1,72,1,3,1,4 +62794,7,2,2,45,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,5,4,NA,1,2,2,1,2,2,1,2,2,1,19757.142379,19558.756904,1,96,8,8,2.17,4,4,0,2,0,2,45,2,5,4,NA +62795,7,2,1,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,35522.958395,36075.029683,1,102,13,13,NA,2,1,0,0,0,1,22,1,4,5,NA +62796,7,2,2,17,NA,5,6,2,17,211,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,NA,NA,NA,1,2,2,1,6937.463063,7785.963668,3,90,NA,NA,NA,4,4,0,1,0,1,56,NA,NA,1,NA +62797,7,2,2,24,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,16929.836231,20416.10433,2,101,7,5,1.84,2,1,0,0,0,2,21,NA,NA,5,NA +62798,7,2,1,50,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,2,1,2,2,2,2,2,2,27240.276328,27670.554468,1,90,6,6,1.57,3,3,0,0,0,1,50,2,2,1,2 +62799,7,2,2,0,2,4,4,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3800.155777,4057.009552,2,93,3,3,0.36,6,6,1,1,2,2,69,2,3,1,3 +62800,7,2,2,15,NA,3,3,2,15,183,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,103298.858809,106172.653059,1,95,9,9,2.66,4,4,0,2,0,1,45,1,3,1,3 +62801,7,2,2,19,NA,3,3,2,19,229,2,NA,1,1,NA,66,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,23572.289725,23898.02545,2,95,3,3,0.63,3,3,0,0,0,2,44,1,2,4,NA +62802,7,2,1,6,NA,3,3,1,6,78,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,64054.596883,66498.597232,1,92,6,6,1.57,3,3,0,1,0,1,29,1,4,6,NA +62803,7,2,2,34,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,71034.153987,71525.773464,1,98,14,14,3.15,5,5,0,3,0,1,34,1,4,1,4 +62804,7,2,1,69,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,97936.972803,98855.488943,2,95,14,14,5,2,2,0,0,2,2,66,1,5,1,5 +62805,7,2,2,41,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,17059.906216,21044.744151,3,90,9,9,4.1,2,2,0,1,0,2,41,2,5,5,NA +62806,7,2,1,53,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,23775.734331,24513.29876,2,96,4,2,0.76,2,1,0,0,0,1,53,1,2,6,NA +62807,7,2,1,67,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,121588.761604,120347.630506,1,91,9,9,4.15,2,2,0,0,2,2,64,1,4,1,5 +62808,7,2,2,44,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,1,2,2,2,2,1,2,2,1,2,2,2,31334.47528,32083.194015,2,96,5,5,0.78,5,5,0,2,0,1,37,2,1,5,NA +62809,7,2,2,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,27004.065356,27080.034561,1,96,1,1,0.23,1,1,0,0,0,2,51,1,3,2,NA +62810,7,2,2,15,NA,3,3,1,15,190,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,105891.533689,107879.366927,1,101,6,6,0.97,7,7,2,1,0,1,43,1,2,1,NA +62811,7,2,2,40,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,19258.69251,18731.627868,2,99,12,6,2.69,6,1,0,0,0,2,57,1,5,2,NA +62812,7,2,1,50,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,6,NA,2,2,2,2,2,2,NA,NA,NA,NA,37557.946192,37006.841228,2,102,NA,2,0.46,2,1,0,0,1,1,50,2,1,6,NA +62813,7,2,1,71,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,NA,13555.672819,14397.395601,1,94,3,3,1.16,1,1,0,0,1,1,71,1,4,5,NA +62814,7,2,1,72,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,8601.453077,8734.617022,2,101,5,5,1.7,2,2,0,0,1,1,72,1,2,2,NA +62815,7,2,1,4,NA,2,2,1,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13504.027725,13522.274849,2,93,8,8,2.17,4,4,2,0,0,2,30,1,4,1,4 +62816,7,2,1,23,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,4,5,NA,1,2,1,1,2,1,1,2,1,3,14385.653726,15564.966804,2,101,7,4,1.38,2,1,0,0,0,1,23,2,4,5,NA +62817,7,2,1,41,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,19692.655418,21458.476044,2,99,6,3,0.94,3,2,0,0,0,1,41,1,3,6,NA +62818,7,2,2,52,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,15521.115746,15096.339697,2,100,7,7,1.34,5,5,0,2,0,2,53,1,4,4,NA +62819,7,2,2,59,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,140431.173819,141856.211353,1,91,8,8,2.17,4,4,0,0,0,1,59,1,4,1,5 +62820,7,2,2,53,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,1,1,NA,1,2,1,1,2,2,1,2,1,NA,21760.356138,22156.586691,1,92,4,4,0.76,4,4,0,0,0,2,53,2,1,1,1 +62821,7,2,2,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,16058.142925,17073.637087,2,95,7,6,1.88,3,2,0,0,0,2,56,1,3,2,NA +62822,7,2,1,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,114838.671743,118127.710351,1,91,8,4,1.38,2,1,0,0,0,1,33,1,5,6,NA +62823,7,2,1,46,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,20393.886544,21026.540202,2,97,1,1,0.18,1,1,0,0,0,1,46,1,2,4,NA +62824,7,2,2,40,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,29670.405171,29084.152013,2,101,14,14,4.03,4,4,0,1,0,2,40,1,5,1,5 +62825,7,2,2,3,NA,1,1,1,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9468.006743,10197.085534,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +62826,7,1,2,80,NA,5,7,NA,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,36240.721121,0,1,101,3,3,1.12,1,1,0,0,1,2,80,1,1,2,NA +62827,7,2,2,0,6,5,7,2,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25216.508684,26724.106581,1,101,6,6,1.28,4,4,2,0,0,1,44,1,4,1,4 +62828,7,2,2,21,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,3,1,2,2,1,2,2,NA,NA,NA,NA,60324.348827,61156.73477,1,95,7,2,0.78,2,1,0,0,0,1,26,1,3,6,NA +62829,7,2,1,0,3,1,1,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8429.177912,8429.333277,1,95,15,15,5,3,3,1,0,0,1,41,1,5,1,5 +62830,7,2,1,14,NA,3,3,2,14,173,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,88448.252445,87285.839384,1,101,14,14,5,3,3,0,1,0,2,36,1,5,1,5 +62831,7,2,2,4,NA,1,1,1,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10024.946819,10059.178608,2,93,77,77,NA,7,7,3,1,0,2,43,2,1,1,9 +62832,7,2,2,15,NA,1,1,1,15,189,NA,NA,1,1,NA,9,NA,NA,NA,2,1,2,1,2,2,1,2,2,2,16427.640886,17012.806816,1,102,3,3,0.54,3,3,0,2,0,2,42,2,2,3,NA +62833,7,2,2,18,NA,3,3,2,18,219,2,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,31627.471241,33789.841724,1,95,1,1,0.21,4,4,1,0,1,2,75,1,1,2,NA +62834,7,2,1,6,NA,1,1,1,7,84,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11159.151566,11226.781631,1,102,6,6,0.8,7,7,3,3,0,2,34,2,3,1,1 +62835,7,2,2,66,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,12139.361543,12607.897559,2,103,14,14,3.48,5,5,0,2,1,1,43,1,4,1,5 +62836,7,2,1,25,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,3,5,NA,2,2,2,2,2,2,NA,NA,NA,NA,39899.764102,44211.181291,2,93,4,4,0.82,4,4,0,0,0,1,51,2,3,1,3 +62837,7,2,2,18,NA,3,3,2,18,221,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,31716.869763,32646.512808,1,95,77,77,NA,3,3,0,0,0,2,41,1,2,5,NA +62838,7,2,2,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,19258.69251,19091.502506,2,99,12,3,1.16,6,1,0,0,0,2,57,1,5,2,NA +62839,7,2,2,3,NA,4,4,2,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8833.042831,9629.276723,1,99,4,4,0.41,7,7,2,4,0,2,43,1,4,4,NA +62840,7,2,1,60,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,1,2,1,116001.539479,120724.7298,2,101,5,5,1.27,3,3,0,0,1,2,55,1,3,1,1 +62841,7,2,1,52,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,26135.885159,28867.753408,2,101,8,8,2.43,3,3,0,1,0,1,52,1,2,1,5 +62842,7,2,1,4,NA,4,4,2,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9885.965005,10900.89298,2,97,3,3,0.82,2,2,1,0,0,2,22,1,4,5,NA +62843,7,1,1,14,NA,2,2,NA,NA,NA,NA,NA,2,1,77,8,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,26616.794908,0,2,91,1,1,0.17,4,4,0,1,0,1,49,2,1,6,NA +62844,7,2,1,7,NA,5,6,2,7,91,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4852.395137,5120.382571,1,99,14,14,2.66,7,7,3,1,0,1,35,1,5,1,5 +62845,7,2,1,27,NA,4,4,1,NA,NA,2,NA,2,2,6,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,18831.340773,22141.643614,2,93,14,14,2.78,5,5,0,0,0,1,52,2,4,1,2 +62846,7,1,2,80,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,12535.973802,0,2,100,5,2,0.41,3,2,0,0,2,2,80,1,1,2,NA +62847,7,2,1,44,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,33514.643153,35491.262929,1,90,15,15,5,4,4,0,2,0,2,43,1,5,1,5 +62848,7,2,1,38,NA,4,4,1,NA,NA,2,NA,2,1,7,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,22185.88687,25293.081507,1,98,7,4,1.61,2,1,0,0,0,1,38,2,5,5,NA +62849,7,2,2,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,19075.861607,19193.852226,1,96,15,15,5,4,4,0,1,0,1,42,2,4,1,4 +62850,7,2,2,1,14,5,7,1,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6257.554806,6809.629993,2,92,15,15,5,3,3,1,0,0,2,37,1,5,1,5 +62851,7,2,1,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,94547.245282,99362.23578,2,91,5,5,1.39,2,2,0,0,0,1,58,1,4,3,NA +62852,7,2,2,4,NA,4,4,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8602.397328,9613.133098,2,99,13,13,NA,3,3,1,1,0,2,36,NA,NA,5,NA +62853,7,2,1,16,NA,5,6,1,16,196,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,5526.901806,5811.556543,2,103,7,7,1.65,4,4,0,1,1,2,55,2,3,1,NA +62854,7,2,1,8,NA,4,4,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13239.363106,13690.337986,3,92,6,6,0.93,5,5,2,1,0,2,37,1,5,1,3 +62855,7,2,2,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,147198.558516,148052.525251,2,91,14,14,5,1,1,0,0,0,2,47,1,4,3,NA +62856,7,2,2,2,NA,1,1,1,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10575.997988,11390.397051,1,100,99,99,NA,7,7,2,3,0,2,35,2,1,1,NA +62857,7,2,2,0,5,4,4,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3659.311381,3880.295282,1,99,8,8,1.99,5,5,1,0,0,1,55,1,5,1,2 +62858,7,2,1,17,NA,4,4,2,17,208,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11763.86086,12201.333419,1,99,14,14,4.86,3,3,0,1,1,2,56,1,5,1,5 +62859,7,2,2,53,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,16058.142925,18803.458224,2,95,7,7,1.41,5,5,2,0,0,2,53,1,3,3,NA +62860,7,2,1,55,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,16499.662173,16948.4296,3,91,6,6,1.81,3,3,0,1,0,2,47,2,3,1,3 +62861,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,60071.993203,66823.228048,1,95,5,5,1.88,1,1,0,0,1,2,80,1,3,1,NA +62862,7,2,1,0,2,3,3,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9661.66953,9494.640263,2,95,4,4,0.78,4,4,2,0,0,1,27,1,4,1,4 +62863,7,2,2,1,13,1,1,1,NA,13,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14326.094268,14781.923489,3,92,5,5,0.87,4,4,2,0,0,2,28,1,3,1,3 +62864,7,2,2,57,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,32041.645327,32334.772002,1,94,1,1,0.32,1,1,0,0,0,2,57,1,4,5,NA +62865,7,1,1,17,NA,5,6,NA,NA,NA,2,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,10346.302718,0,2,91,4,4,0.65,5,5,1,3,0,1,43,2,3,5,NA +62866,7,2,2,5,NA,3,3,2,5,68,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,31376.988784,34630.493438,1,101,1,1,0.1,4,4,1,1,0,2,52,1,4,3,NA +62867,7,2,1,12,NA,3,3,2,12,152,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21603.199906,21630.881484,2,95,5,5,1.05,3,3,0,1,0,1,43,1,3,1,2 +62868,7,2,1,27,NA,3,3,2,NA,NA,2,NA,2,1,2,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,23002.111021,24919.46038,2,97,6,6,1.16,4,4,1,1,0,1,27,2,4,1,3 +62869,7,2,1,2,NA,4,4,2,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6936.347746,7364.53247,1,98,15,15,5,6,6,3,0,0,1,37,2,5,1,4 +62870,7,2,1,13,NA,1,1,1,13,163,NA,NA,2,2,4,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,24228.858782,24791.594558,2,102,4,4,0.57,5,5,0,3,0,1,41,2,1,1,2 +62871,7,2,1,48,NA,5,7,1,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,12490.95898,12886.461592,2,92,15,15,4.59,4,4,0,2,0,2,45,2,5,1,5 +62872,7,2,2,17,NA,3,3,1,17,213,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,78268.283998,80112.547303,2,98,15,15,5,4,4,0,2,0,2,46,1,4,1,NA +62873,7,1,1,15,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10879.348751,0,2,100,10,10,2.33,6,6,0,2,2,2,35,1,2,5,NA +62874,7,2,2,33,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,3,6,2,2,2,2,1,2,2,NA,NA,NA,NA,33355.357535,33078.896803,1,90,6,6,1.11,5,5,1,2,0,1,30,2,1,6,NA +62875,7,2,2,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,98217.832546,101788.674762,1,99,8,8,2.81,3,3,0,1,0,1,19,1,4,NA,NA +62876,7,2,2,74,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,63504.762752,64182.95787,1,91,7,7,3.67,1,1,0,0,1,2,74,1,4,2,NA +62877,7,2,1,8,NA,4,4,1,8,105,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13423.881856,14179.490667,2,101,1,1,0.15,3,3,0,2,0,2,58,1,3,5,NA +62878,7,2,1,15,NA,4,4,1,15,187,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16463.950838,16583.858395,2,96,5,5,0.67,6,6,1,2,1,1,34,1,4,1,4 +62879,7,2,2,14,NA,4,4,2,14,171,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18163.985724,18028.02401,2,101,8,8,2.43,3,3,0,1,0,1,52,1,2,1,5 +62880,7,2,2,64,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,117778.281347,120111.495123,1,94,7,7,1.52,4,4,0,2,2,1,61,2,1,1,5 +62881,7,2,2,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,134213.669088,137679.44344,1,93,15,15,4.59,4,4,0,1,0,1,57,1,5,1,5 +62882,7,2,1,59,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,11690.444016,12008.407525,3,90,77,77,NA,4,4,0,0,0,1,59,2,2,1,2 +62883,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,43807.995099,48928.4376,1,101,7,7,2.31,2,2,0,0,2,2,80,1,4,1,2 +62884,7,2,2,36,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,3,1,2,2,1,2,2,1,2,2,1,23725.035562,23797.982183,1,94,3,1,0.13,2,1,0,0,0,2,36,1,3,5,NA +62885,7,2,1,60,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,4,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,9392.289727,9639.798184,1,90,15,15,5,3,3,0,0,1,1,60,2,4,2,NA +62886,7,2,1,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,15576.264148,16963.869897,1,103,8,8,1.95,4,4,0,1,0,2,48,1,5,1,5 +62887,7,2,2,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,11696.973403,11982.125462,2,101,1,1,0.2,2,2,0,0,1,2,55,1,4,4,NA +62888,7,2,1,6,NA,4,4,1,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10410.106675,10390.534388,2,96,4,4,0.57,5,5,0,3,0,2,26,1,2,5,NA +62889,7,2,1,2,NA,2,2,2,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8914.157023,9196.329398,1,99,8,8,1.91,5,5,2,0,1,2,38,2,4,1,4 +62890,7,2,2,16,NA,5,6,1,16,193,NA,NA,2,1,5,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8210.014908,8564.874659,2,92,15,15,5,4,4,0,2,1,2,59,1,5,1,5 +62891,7,2,2,74,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,NA,13141.36586,14124.295248,1,96,12,12,NA,1,1,0,0,1,2,74,1,2,3,NA +62892,7,2,1,50,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,18225.504252,18491.00599,2,95,4,4,0.79,3,3,1,0,0,1,50,1,4,6,NA +62893,7,2,2,21,NA,1,1,1,NA,NA,2,NA,2,2,2,NA,2,4,2,2,2,2,2,2,2,NA,NA,NA,NA,44119.608456,46238.164206,2,98,6,6,1.21,4,4,1,0,0,2,49,2,2,6,NA +62894,7,2,2,9,NA,4,4,2,9,112,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7620.214819,8204.982373,2,99,7,7,1.19,6,6,1,3,0,2,38,1,3,5,NA +62895,7,2,1,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,78240.016337,81914.532146,1,93,15,15,5,3,3,1,0,0,1,33,1,5,1,3 +62896,7,2,2,5,NA,3,3,1,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,53166.229434,56500.79967,1,98,10,10,2.2,6,6,1,3,0,2,31,1,4,6,NA +62897,7,2,1,5,NA,2,2,2,5,63,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17458.543556,17482.134163,2,91,10,10,3.04,4,4,1,1,0,2,31,2,5,1,5 +62898,7,2,2,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,22473.275629,22284.590536,2,97,7,7,2.38,2,2,0,0,0,1,25,1,2,1,2 +62899,7,2,2,45,NA,2,2,2,NA,NA,2,NA,2,2,77,NA,3,6,NA,2,2,2,2,2,2,NA,NA,NA,NA,23377.708086,24402.18571,3,90,77,77,NA,4,1,0,0,0,1,45,2,3,3,NA +62900,7,2,2,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,8308.628726,8949.073879,2,95,6,6,2.69,1,1,0,0,1,2,68,1,3,2,NA +62901,7,2,2,64,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11704.708696,12115.082932,1,101,15,15,5,3,3,0,0,1,1,58,2,4,1,5 +62902,7,2,2,48,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,35126.205635,36772.568368,1,95,5,5,1.08,3,3,0,1,0,1,53,1,4,1,4 +62903,7,2,1,7,NA,5,6,2,7,86,NA,NA,1,1,NA,0,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,9720.482616,10818.978947,2,91,12,12,NA,7,6,0,4,2,2,72,2,1,2,NA +62904,7,2,1,2,NA,4,4,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6376.965739,6390.839385,2,100,8,8,1.1,7,7,3,3,0,2,58,1,3,5,NA +62905,7,2,2,1,20,5,6,2,NA,20,NA,NA,2,2,1,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4735.146545,4730.145135,3,90,77,77,NA,3,3,1,0,0,1,35,2,3,1,5 +62906,7,2,2,0,0,1,1,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6885.817041,7118.841014,2,96,2,2,0.27,5,5,1,2,0,2,26,1,2,1,2 +62907,7,2,2,16,NA,2,2,2,16,200,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14437.97544,15197.369043,2,90,5,5,0.8,5,5,0,3,0,2,40,2,1,5,NA +62908,7,2,1,21,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,1,5,NA,2,2,2,1,2,2,2,2,2,2,30581.409928,30998.258142,3,90,7,7,1.48,5,5,0,1,0,1,43,2,1,6,NA +62909,7,2,2,31,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,NA,NA,NA,NA,24598.096547,23934.388227,2,99,13,13,NA,6,6,2,1,0,2,31,1,4,6,NA +62910,7,2,2,7,NA,4,4,1,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12120.418061,13050.526646,2,101,10,10,2.91,4,4,0,1,0,2,51,1,2,5,NA +62911,7,2,1,16,NA,1,1,1,16,198,NA,NA,1,1,NA,10,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,24902.864049,25283.513039,1,94,8,8,1.85,5,5,0,2,0,1,44,2,1,6,NA +62912,7,2,1,59,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,100742.761147,101756.213818,2,100,8,8,4.59,1,1,0,0,0,1,59,1,3,3,NA +62913,7,2,2,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,2,1,2,2,1,2,2,NA,NA,NA,NA,19463.737283,25688.523518,3,91,4,4,1.09,2,2,0,1,0,2,40,1,5,3,NA +62914,7,2,2,4,NA,3,3,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27528.66901,27890.404993,1,95,1,1,0.31,2,2,1,0,0,2,23,1,3,3,NA +62915,7,1,1,35,NA,1,1,NA,NA,NA,2,NA,2,2,3,NA,4,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,39073.76885,0,2,99,99,3,0.66,4,2,0,0,0,1,35,2,4,1,2 +62916,7,2,1,22,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,37911.437415,38922.114085,2,103,4,4,0.79,3,3,0,0,0,2,42,2,2,5,NA +62917,7,2,1,13,NA,3,3,2,13,159,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,69216.263169,70543.167563,1,91,14,14,3.15,5,5,0,1,0,2,50,1,5,1,5 +62918,7,2,1,25,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,18259.989639,18151.137608,2,95,6,6,1.08,4,4,1,0,0,2,42,1,4,4,NA +62919,7,2,2,34,NA,3,3,2,NA,NA,2,NA,2,2,99,NA,1,1,2,1,2,1,1,2,1,NA,NA,NA,NA,20717.382973,22040.621269,2,97,1,1,0.21,4,4,2,0,0,2,34,2,1,1,2 +62920,7,2,1,3,NA,2,2,2,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12920.2211,12937.679363,2,90,14,14,3.58,4,4,1,1,0,1,37,1,3,1,4 +62921,7,2,2,10,NA,4,4,2,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8147.287486,8590.325322,2,90,8,8,1.67,6,6,2,2,0,2,35,2,5,3,NA +62922,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,89807.047643,92234.66957,3,91,6,6,2.57,1,1,0,0,0,2,31,1,4,5,NA +62923,7,2,1,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,20333.447198,19976.945976,1,98,12,4,1.34,4,1,0,0,0,1,21,1,4,6,NA +62924,7,2,2,79,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,16361.152596,17584.911061,1,100,3,3,0.91,1,1,0,0,1,2,79,1,1,2,NA +62925,7,2,2,18,NA,3,3,2,18,226,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,22699.241957,23364.572192,2,95,2,2,0.46,3,3,0,0,0,2,48,1,2,1,2 +62926,7,2,2,1,21,1,1,1,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11414.885224,11901.246118,1,94,6,6,1.11,5,5,1,2,0,2,41,2,1,1,1 +62927,7,2,1,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,138075.879417,141933.339512,2,91,15,15,5,3,3,0,0,2,2,62,1,4,1,4 +62928,7,2,2,43,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,15206.604563,14790.435937,2,90,6,6,0.84,6,6,1,3,1,2,43,1,2,5,NA +62929,7,2,1,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,9221.19173,9293.322792,2,95,77,77,NA,2,2,0,0,2,1,68,1,4,1,4 +62930,7,2,1,23,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,4,5,NA,2,2,2,1,2,2,1,2,2,1,42077.383821,44160.926598,1,102,4,4,0.67,4,4,0,1,0,1,23,2,4,5,NA +62931,7,2,1,15,NA,5,6,1,15,187,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11171.449402,11936.033174,1,92,14,14,3.47,4,4,0,2,0,2,53,2,4,1,4 +62932,7,1,2,2,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,35285.563011,0,2,100,15,15,5,4,4,1,1,0,1,29,1,4,1,4 +62933,7,2,2,17,NA,5,6,2,17,207,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11975.458482,12291.118947,1,97,9,9,1.78,6,6,0,1,1,1,45,2,3,1,3 +62934,7,2,2,60,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,39030.893313,44151.142675,1,91,7,7,2.2,3,3,0,0,1,2,60,1,2,2,NA +62935,7,2,1,51,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,2,2,2,2,21139.303536,21566.542108,2,103,12,12,NA,4,4,0,1,0,2,50,2,3,1,4 +62936,7,2,2,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,NA,NA,NA,NA,31335.13799,32985.054579,1,95,5,5,1.03,4,4,0,2,0,1,33,1,3,1,3 +62937,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,10346.035773,10598.2543,1,99,5,5,1.63,2,2,0,0,2,1,64,NA,NA,1,4 +62938,7,2,2,32,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,NA,NA,NA,NA,36053.766709,37679.125242,1,100,6,6,1.13,4,4,0,3,0,2,32,1,3,5,NA +62939,7,2,1,38,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,41155.167164,49173.131788,1,102,7,7,1.41,5,5,0,2,2,1,72,1,4,1,3 +62940,7,2,1,62,NA,4,4,2,NA,NA,2,NA,2,2,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,9814.165625,10072.791483,1,90,15,15,4.34,4,4,0,0,1,1,62,2,5,1,3 +62941,7,2,2,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,13728.308948,13352.59776,2,90,99,4,1.16,5,2,0,0,1,1,43,NA,NA,1,NA +62942,7,1,1,48,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,91761.294148,0,2,92,15,15,5,3,3,1,0,0,1,48,1,5,1,5 +62943,7,2,1,15,NA,2,2,1,15,189,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18042.255087,18115.696579,1,102,14,14,2.83,6,6,1,2,0,1,36,1,2,1,3 +62944,7,2,2,17,NA,4,4,2,17,211,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13775.846051,13737.029624,2,97,7,7,1.06,7,7,1,2,0,2,40,1,4,5,NA +62945,7,2,2,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19100.40225,19426.955733,1,96,15,15,4.9,4,4,0,1,0,1,47,1,3,1,5 +62946,7,2,1,9,NA,4,4,2,9,111,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9699.683862,9681.447258,2,101,2,2,0.27,6,6,0,3,0,2,45,1,2,5,NA +62947,7,1,2,68,NA,2,2,NA,NA,NA,2,NA,1,1,NA,NA,1,2,NA,2,2,2,1,2,2,NA,NA,NA,NA,11469.456138,0,1,90,4,4,0.79,3,3,0,0,1,2,68,1,1,2,NA +62948,7,2,1,79,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,55509.98679,58756.097166,1,101,15,15,5,3,3,0,0,2,1,79,1,4,1,4 +62949,7,2,2,8,NA,4,4,1,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9139.784234,9384.299154,2,100,14,14,3.89,4,4,0,2,0,1,38,1,3,1,4 +62950,7,2,2,16,NA,3,3,2,16,201,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,91539.042546,96988.607103,1,101,14,14,3.3,4,4,0,2,0,2,42,1,4,1,3 +62951,7,2,2,23,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,15190.604914,17388.543575,1,96,15,15,5,6,6,1,1,1,2,44,1,3,1,3 +62952,7,2,1,20,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,10547.036825,10866.006067,3,90,15,15,3.7,5,5,0,0,0,1,56,2,3,1,3 +62953,7,2,1,44,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17249.311662,18125.470447,2,91,14,14,3.47,4,4,1,1,0,2,36,2,3,1,5 +62954,7,2,1,28,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,18523.956321,18091.254104,2,99,2,2,0.72,1,1,0,0,0,1,28,1,5,5,NA +62955,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,3,1,2,2,1,2,2,1,2,2,1,76827.086279,81170.917712,2,99,15,15,5,2,1,0,0,0,2,31,1,5,1,NA +62956,7,2,2,0,7,5,6,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,4444.744532,4630.927855,2,90,3,3,0.65,5,3,1,2,0,1,44,2,5,1,5 +62957,7,2,2,3,NA,5,6,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5307.591514,5454.53335,2,102,3,3,0.38,5,5,3,0,0,2,30,2,2,1,4 +62958,7,2,2,72,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,65611.003966,67854.891478,1,94,15,15,4.95,4,4,0,0,2,1,72,1,3,1,3 +62959,7,2,2,13,NA,5,6,1,13,166,NA,NA,2,1,4,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8210.014908,8564.874659,2,92,15,15,5,4,4,0,2,1,2,59,1,5,1,5 +62960,7,2,2,3,NA,4,4,2,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9727.363166,10466.751274,2,97,6,6,1,6,6,1,2,2,2,60,1,2,2,NA +62961,7,2,1,0,6,4,4,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7882.953266,8719.306286,2,96,5,5,1.5,2,2,1,0,0,2,22,1,3,5,NA +62962,7,2,1,2,NA,4,4,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8009.966208,8254.622305,2,96,2,2,0.44,3,3,2,0,0,2,22,1,2,5,NA +62963,7,2,2,6,NA,5,7,2,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22371.648216,23759.450609,1,95,6,6,1.15,5,5,2,1,0,1,29,1,4,6,NA +62964,7,1,1,22,NA,5,6,NA,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,NA,NA,NA,NA,NA,NA,NA,13171.824182,0,1,100,NA,NA,NA,4,4,0,0,1,1,21,NA,NA,5,NA +62965,7,2,2,3,NA,5,7,1,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,29364.157365,32408.949946,1,102,2,2,0.49,3,3,1,0,0,1,20,1,2,6,NA +62966,7,2,2,21,NA,4,4,2,NA,NA,2,NA,2,2,2,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,24185.894115,24390.02655,1,93,3,3,0.7,3,3,1,0,0,1,23,2,4,1,2 +62967,7,2,1,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,27572.205373,28895.579143,1,100,15,15,5,3,3,0,0,0,2,33,1,5,1,4 +62968,7,2,1,5,NA,2,2,1,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,13610.050526,14582.186889,2,100,3,3,0.76,3,3,1,0,0,2,31,2,1,6,NA +62969,7,2,1,14,NA,4,4,2,14,178,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16890.963304,17218.04077,1,91,10,10,2.56,5,5,0,3,0,1,51,2,5,1,4 +62970,7,1,1,6,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12577.115885,0,2,96,3,3,0.54,4,4,1,1,0,1,29,1,2,1,2 +62971,7,2,1,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,175544.769665,178639.415011,1,91,15,15,5,2,2,0,0,0,1,53,1,5,1,5 +62972,7,2,1,3,NA,2,2,2,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17458.543556,18705.569422,2,91,4,4,0.84,3,3,1,0,0,2,21,1,4,1,2 +62973,7,2,1,10,NA,1,1,1,10,124,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17882.621856,18280.794545,3,92,12,12,NA,6,6,1,3,0,2,33,1,5,1,4 +62974,7,2,1,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,86986.68246,91060.136414,2,94,9,9,5,1,1,0,0,0,1,30,1,5,5,NA +62975,7,2,2,10,NA,4,4,2,10,126,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7397.129324,7630.740597,1,96,12,12,NA,5,5,1,2,0,2,35,1,5,1,4 +62976,7,2,2,18,NA,3,3,2,18,221,2,NA,2,2,5,14,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,82089.722105,85880.903702,3,91,14,14,5,2,2,0,0,0,2,48,2,5,3,NA +62977,7,2,1,25,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,50326.347525,51411.397032,1,93,12,12,NA,3,1,0,0,0,1,24,NA,NA,5,NA +62978,7,2,2,18,NA,1,1,1,18,217,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,21062.314667,23105.758741,1,91,14,14,3.9,4,4,0,1,0,1,41,1,2,1,4 +62979,7,2,2,68,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,53370.063988,54427.336747,3,92,4,4,1.13,2,2,0,0,2,1,64,1,3,1,4 +62980,7,1,1,39,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,19384.896286,0,1,94,5,5,1.05,3,3,0,1,0,1,39,1,4,6,NA +62981,7,2,1,32,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,25120.174741,25387.314373,2,102,15,15,4.47,4,4,1,1,0,1,32,1,5,1,4 +62982,7,2,2,66,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,49644.076348,54110.530387,1,97,3,3,1.16,1,1,0,0,1,2,66,1,3,2,NA +62983,7,2,1,7,NA,4,4,2,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7730.47951,9212.541007,1,99,6,6,1.3,5,5,1,2,0,1,34,1,2,1,3 +62984,7,2,2,5,NA,2,2,2,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8825.559072,9505.166523,2,90,7,7,0.89,7,7,1,3,3,1,60,2,3,1,3 +62985,7,2,1,68,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11138.867102,11750.838884,1,102,7,7,2.72,2,2,0,0,2,2,67,NA,NA,1,5 +62986,7,2,1,14,NA,3,3,1,14,169,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,69211.537407,68936.770872,1,92,8,8,1.45,6,6,1,3,0,1,36,1,3,1,4 +62987,7,2,1,10,NA,2,2,1,10,125,NA,NA,2,1,3,4,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,13395.612951,13401.547169,2,102,7,7,1.53,5,5,1,2,0,2,37,2,4,1,4 +62988,7,2,1,10,NA,2,2,1,10,128,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11036.458246,11087.266596,1,102,14,14,2.83,6,6,1,2,0,1,36,1,2,1,3 +62989,7,2,2,1,19,5,6,2,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,8173.816615,8894.95474,1,97,15,15,5,4,4,2,0,0,2,35,2,4,1,4 +62990,7,2,1,79,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,79754.311902,90534.066519,1,97,9,9,3.97,2,2,0,0,2,1,79,1,3,1,3 +62991,7,2,1,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,129608.716834,132839.525748,1,100,8,8,2.97,2,2,0,0,0,1,24,1,5,1,5 +62992,7,2,2,9,NA,1,1,2,9,116,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18668.602894,18957.049554,1,101,4,4,0.84,3,3,0,1,0,1,42,1,4,1,4 +62993,7,2,2,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,74501.766669,80316.820348,1,90,14,14,3.93,3,3,1,0,0,1,35,1,2,1,5 +62994,7,2,2,49,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,3,1,NA,1,2,1,1,2,2,NA,NA,NA,NA,18255.735511,18352.270791,2,91,15,15,4.63,7,7,1,2,0,1,36,2,4,1,3 +62995,7,2,1,18,NA,4,4,2,18,220,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11386.695644,11908.648036,2,99,6,6,1.18,5,5,0,3,0,2,38,1,2,5,NA +62996,7,2,2,9,NA,2,2,2,9,114,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13257.060167,14867.908577,2,90,8,8,1.72,5,5,1,2,0,1,20,2,1,1,2 +62997,7,2,1,60,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,29552.786683,29829.95179,2,96,3,3,0.95,2,2,0,0,2,2,62,1,4,1,4 +62998,7,2,1,19,NA,5,6,1,20,NA,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6558.308393,6849.983523,2,92,99,99,NA,4,1,0,0,0,1,19,1,4,NA,NA +62999,7,2,2,9,NA,5,7,1,9,113,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6137.256046,6434.751859,1,103,15,15,3.7,5,5,0,2,1,1,55,1,5,1,5 +63000,7,2,2,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,11862.436765,12619.924744,1,90,2,2,0.45,1,1,0,0,1,2,60,1,5,3,NA +63001,7,2,1,5,NA,4,4,1,5,64,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9624.976734,10219.132066,2,96,3,3,0.38,5,5,1,2,0,2,30,1,3,5,NA +63002,7,2,2,54,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,4,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,21901.662873,22693.823819,2,93,8,8,2.49,3,3,0,0,0,1,52,2,2,1,4 +63003,7,2,1,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,151649.038926,151259.483772,1,101,13,13,NA,2,2,0,0,0,1,40,1,2,1,3 +63004,7,2,2,38,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,3,1,2,1,1,2,2,1,2,2,NA,13047.751375,13287.186355,2,92,15,15,5,3,3,1,0,0,1,39,2,5,1,5 +63005,7,2,1,19,NA,1,1,1,19,232,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,32326.52031,34735.818434,2,94,5,5,0.65,6,6,0,3,0,1,44,2,1,1,1 +63006,7,2,1,53,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,123771.419917,127443.198085,2,98,10,10,3.78,3,3,0,0,0,1,53,1,3,1,4 +63007,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,47098.572584,49762.054694,1,95,7,7,2.38,2,2,0,0,2,1,80,1,3,1,4 +63008,7,2,1,38,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,15051.024912,15121.525249,3,90,15,15,5,5,5,1,0,1,1,38,2,3,1,4 +63009,7,2,1,6,NA,4,4,2,7,84,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8483.005475,8584.70346,1,96,10,10,2.95,4,4,0,1,0,2,34,2,3,1,5 +63010,7,2,2,78,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,12535.973802,12972.182488,2,100,7,7,2.37,3,3,0,1,1,2,45,1,5,1,NA +63011,7,2,2,16,NA,3,3,2,16,194,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,122091.956024,127137.394167,2,91,15,15,5,3,3,0,1,0,1,52,1,5,1,5 +63012,7,2,1,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,121588.761604,120347.630506,1,91,10,10,4.76,2,2,0,0,2,2,64,1,4,1,5 +63013,7,2,1,65,NA,5,6,1,NA,NA,1,1,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11138.867102,11750.838884,1,102,14,14,5,2,2,0,0,2,1,65,2,5,1,5 +63014,7,2,2,11,NA,4,4,2,11,133,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7814.742747,7978.458393,1,98,8,8,2.97,2,2,0,1,0,2,40,1,4,3,NA +63015,7,2,1,65,NA,5,7,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,117343.481374,118848.614941,1,91,7,7,2.64,2,2,0,0,1,2,58,1,5,1,4 +63016,7,2,2,14,NA,2,2,2,14,170,NA,NA,2,2,3,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,12680.621719,14355.413798,2,90,3,3,0.46,5,5,0,2,2,1,75,2,1,1,2 +63017,7,2,1,71,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,1,1,NA,1,2,1,1,2,2,NA,NA,NA,NA,12511.526803,13352.932153,2,100,4,4,0.44,7,7,1,2,2,1,71,2,1,1,1 +63018,7,2,1,10,NA,3,3,1,10,124,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,27154.222487,28089.451048,2,101,4,4,0.73,5,5,1,2,0,1,40,1,5,1,5 +63019,7,2,2,65,NA,2,2,1,NA,NA,2,NA,2,1,9,NA,4,3,NA,2,2,2,2,2,2,1,2,2,2,10235.0654,10662.230581,2,93,3,3,0.87,2,2,0,0,1,2,65,2,4,3,NA +63020,7,2,1,6,NA,1,1,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,13533.281742,15943.839793,1,100,6,6,1.11,5,5,0,2,1,1,38,2,2,1,1 +63021,7,2,2,46,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,3,5,NA,1,2,1,1,2,1,NA,NA,NA,NA,19524.115198,21455.7569,1,93,7,7,2.38,2,2,0,0,0,2,46,2,3,5,NA +63022,7,2,1,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,92467.370896,92229.84126,1,92,14,14,3.16,6,6,1,1,0,1,49,1,1,1,3 +63023,7,2,1,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,124820.027137,126759.886775,1,91,6,3,0.92,2,1,0,0,0,2,24,1,4,6,NA +63024,7,2,2,78,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,12692.401451,13134.05328,2,95,12,12,NA,3,3,0,0,2,1,65,1,4,1,4 +63025,7,2,2,79,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,14226.966896,14722.016314,2,100,5,5,0.95,4,4,0,0,1,2,53,1,3,5,NA +63026,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,53830.599426,56471.452044,1,99,15,15,5,3,3,1,0,0,2,31,1,5,1,5 +63027,7,2,1,15,NA,1,1,1,15,181,NA,NA,1,1,NA,8,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,18242.832494,18521.680571,1,102,6,6,1.03,6,6,0,4,0,1,34,2,2,1,1 +63028,7,2,2,6,NA,1,1,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14414.529053,14932.182215,2,96,8,8,1.33,7,7,2,1,1,1,62,2,1,1,1 +63029,7,2,2,2,NA,5,7,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20272.436775,20911.100412,2,94,77,77,NA,4,4,2,0,0,2,23,1,2,6,NA +63030,7,2,1,21,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,42165.369652,45330.590147,2,102,3,3,0.45,4,4,2,0,0,1,21,2,2,6,NA +63031,7,2,2,77,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,15183.616455,20169.750633,1,90,6,6,1.78,2,2,0,0,1,2,77,1,1,2,NA +63032,7,2,1,14,NA,4,4,2,14,172,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11085.075029,11001.247749,1,93,1,1,0.02,5,5,0,4,0,2,36,NA,NA,5,NA +63033,7,2,1,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,153755.392794,158780.137655,1,91,15,15,5,4,4,0,1,0,1,45,1,5,1,5 +63034,7,2,2,5,NA,3,3,2,5,60,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27257.164734,28966.726071,1,92,4,4,0.61,5,5,1,2,0,1,34,1,3,6,NA +63035,7,2,1,7,NA,5,7,2,7,94,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8017.552697,8398.795399,1,99,6,6,0.6,7,7,2,1,1,2,69,1,3,2,NA +63036,7,2,2,74,NA,5,6,2,NA,NA,2,NA,2,2,7,NA,1,3,NA,1,2,2,1,2,2,1,2,2,NA,12895.665603,13811.504793,1,93,2,2,0.55,1,1,0,0,1,2,74,2,1,3,NA +63037,7,2,2,41,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,1,4,2,2,2,2,1,2,2,2,2,2,2,34501.569761,34681.492512,1,93,5,5,1.36,2,2,0,0,0,2,41,2,1,4,NA +63038,7,2,2,2,NA,5,7,2,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7871.443574,8074.618887,1,96,6,6,1.49,3,3,1,0,0,2,24,1,4,5,NA +63039,7,2,2,2,NA,1,1,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9955.153132,10582.75863,3,92,6,6,1,6,6,1,1,0,1,42,2,1,1,4 +63040,7,2,2,54,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,2,1,NA,1,2,1,NA,NA,NA,1,2,1,3,15268.129241,15348.866231,2,96,NA,NA,NA,4,4,0,0,1,1,67,2,3,1,2 +63041,7,2,1,35,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,14376.416432,15539.834252,2,103,15,15,5,4,4,0,3,0,1,35,1,5,5,NA +63042,7,2,2,12,NA,4,4,2,12,146,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13841.239638,13875.328502,1,96,15,15,4.52,6,6,0,4,0,1,46,1,4,1,4 +63043,7,2,2,20,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,11609.8069,12151.984044,2,103,77,77,NA,5,5,0,2,0,2,39,2,5,1,5 +63044,7,2,1,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,79316.940936,94360.361124,1,93,10,10,5,1,1,0,0,0,1,35,1,5,5,NA +63045,7,2,2,53,NA,1,1,1,NA,NA,2,NA,2,2,8,NA,2,4,NA,1,2,2,2,2,2,1,2,2,1,18295.488967,18390.898385,2,103,8,8,1.29,7,7,3,1,0,2,53,2,2,4,NA +63046,7,2,1,1,NA,3,3,2,NA,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,39241.111643,42207.80453,1,99,6,6,1.73,3,3,1,1,0,2,42,1,5,3,NA +63047,7,2,2,6,NA,3,3,2,6,81,NA,NA,2,1,2,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14287.66096,14320.169639,2,97,6,6,1.16,4,4,1,1,0,1,27,2,4,1,3 +63048,7,2,1,29,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15875.103787,17176.519619,1,93,10,10,4.76,2,2,0,0,0,1,29,2,5,1,5 +63049,7,2,1,19,NA,2,2,1,19,233,2,NA,2,1,5,15,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,30061.88611,30025.949584,1,92,6,6,0.93,5,5,0,2,0,1,47,2,1,1,1 +63050,7,2,1,24,NA,4,4,2,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,12364.328404,12075.509308,1,96,10,10,1.8,7,7,1,1,0,1,57,2,1,1,3 +63051,7,2,1,30,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,22403.911395,23911.071969,2,96,14,14,5,2,2,0,0,0,1,30,2,5,1,5 +63052,7,2,2,37,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,3,2,1,2,2,1,2,2,NA,NA,NA,NA,19741.154437,19781.46423,1,100,7,7,2.51,2,2,0,1,0,2,37,2,5,3,NA +63053,7,2,2,8,NA,1,1,1,8,101,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,12789.411811,13121.359297,1,102,3,3,0.54,3,3,0,2,0,2,42,2,2,3,NA +63054,7,2,2,11,NA,1,1,1,11,136,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14469.11917,14745.425968,2,92,15,15,3.37,7,7,0,4,0,1,42,2,3,1,1 +63055,7,2,2,2,NA,3,3,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25257.21318,27876.153487,1,98,2,2,0.31,3,3,1,0,0,1,45,NA,NA,1,NA +63056,7,2,1,21,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,15196.92397,16091.373745,2,101,7,3,1.1,3,1,0,0,0,1,21,2,4,5,NA +63057,7,2,1,29,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,43892.276067,44574.41704,1,98,6,6,2.6,2,1,0,0,0,1,29,1,4,5,NA +63058,7,2,2,13,NA,5,6,2,13,158,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6135.470395,6372.721366,1,91,15,15,5,6,6,0,2,2,1,50,2,5,1,5 +63059,7,2,1,7,NA,4,4,1,7,87,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10469.725162,11059.049224,1,100,6,6,1.13,4,4,0,3,0,2,32,1,3,5,NA +63060,7,2,2,7,NA,1,1,1,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13064.573334,13201.924867,1,103,5,5,0.71,6,6,2,2,0,2,31,2,2,1,2 +63061,7,2,2,37,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,1,1,2,2,1,2,2,NA,NA,NA,NA,22326.231285,22166.696692,1,99,7,7,1.06,7,7,3,1,0,1,38,1,4,6,NA +63062,7,2,1,4,NA,4,4,1,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10690.995725,11500.031681,2,98,5,5,0.59,7,7,3,0,0,2,50,1,5,4,NA +63063,7,2,2,7,NA,3,3,1,7,87,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,60557.637514,61692.695301,3,92,6,6,1.17,4,4,0,2,0,2,30,1,2,1,4 +63064,7,2,1,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,4,NA,1,2,2,1,2,2,1,2,2,1,6910.118936,7233.371983,2,95,3,3,1.07,1,1,0,0,1,1,61,1,1,4,NA +63065,7,2,1,11,NA,1,1,1,11,143,NA,NA,2,2,3,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,NA,13822.148996,14860.201344,3,92,4,4,0.55,6,6,0,4,0,1,36,2,1,1,3 +63066,7,2,1,3,NA,3,3,2,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,61250.535365,69105.159981,1,90,8,8,1.67,5,5,2,1,0,2,28,1,4,1,5 +63067,7,2,2,0,5,5,6,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6956.259272,7247.645977,1,95,5,5,0.73,6,6,1,0,1,1,62,2,3,1,NA +63068,7,2,1,9,NA,3,3,1,9,119,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,26240.82746,27691.552263,1,94,4,4,0.56,5,5,1,2,0,1,34,1,2,3,NA +63069,7,2,1,20,NA,5,6,2,NA,NA,2,NA,2,2,1,NA,4,5,NA,1,2,2,1,2,2,1,2,2,3,14385.653726,15533.829525,2,101,6,2,0.46,2,1,0,0,0,1,20,2,4,5,NA +63070,7,2,2,70,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,1,5,NA,2,2,2,1,2,2,2,2,2,NA,21122.17432,23417.039872,2,93,6,6,0.64,7,7,2,1,3,2,60,2,3,2,NA +63071,7,2,2,53,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,20450.752436,20266.431761,1,98,6,6,1.72,2,2,0,0,0,2,53,1,5,2,NA +63072,7,2,2,12,NA,3,3,2,12,152,NA,NA,2,1,1,7,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,31756.649984,32677.072621,2,97,5,5,0.8,5,5,1,2,0,1,46,2,4,1,2 +63073,7,2,1,16,NA,3,3,1,16,194,NA,NA,1,1,NA,8,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,25367.590269,25034.201904,1,94,6,6,1.26,5,5,0,2,0,2,38,1,4,1,NA +63074,7,2,1,3,NA,4,4,1,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9016.053035,9698.338509,2,97,NA,99,NA,7,6,2,1,1,2,56,1,3,5,NA +63075,7,2,2,12,NA,2,2,1,12,152,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20502.928313,21114.699878,2,98,12,12,NA,3,3,0,1,0,2,34,1,4,1,3 +63076,7,2,1,53,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,21168.33083,23066.473787,2,99,2,2,0.4,2,2,0,0,0,2,52,1,3,1,3 +63077,7,2,1,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,86986.68246,92645.868047,2,94,15,15,4.59,4,4,1,1,0,2,37,1,5,1,5 +63078,7,2,1,6,NA,1,1,1,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8828.580268,8822.70874,2,103,10,10,1.63,7,7,1,4,0,1,31,NA,NA,1,4 +63079,7,2,1,17,NA,5,6,1,17,212,2,NA,2,1,5,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11171.449402,11936.033174,1,92,14,14,3.47,4,4,0,2,0,2,53,2,4,1,4 +63080,7,2,2,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,1,1,2,2,1,2,2,NA,NA,NA,NA,30505.56355,30746.687156,1,97,3,3,0.93,2,2,0,1,0,2,34,1,4,5,NA +63081,7,2,1,62,NA,5,7,2,NA,NA,2,NA,2,1,7,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,19328.482066,19868.466665,3,90,7,7,2.51,2,2,0,0,2,1,62,2,3,1,4 +63082,7,2,2,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,175997.804296,180603.118894,1,101,4,2,0.82,2,1,0,0,1,1,63,1,2,6,NA +63083,7,2,2,71,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,69601.920609,70345.22994,1,98,7,7,3.58,1,1,0,0,1,2,71,1,4,2,NA +63084,7,2,1,4,NA,3,3,2,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,45649.909879,51503.94697,1,99,15,15,5,5,5,3,0,0,2,34,1,5,1,5 +63085,7,2,2,4,NA,4,4,2,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8267.490849,9012.744386,2,99,2,2,0.19,7,7,3,1,0,2,43,1,2,4,NA +63086,7,2,2,74,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,62212.598767,64340.261278,1,94,15,15,5,2,2,0,0,2,1,75,1,4,1,3 +63087,7,2,1,3,NA,3,3,1,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,81508.607355,87670.792766,1,94,14,6,1.65,3,2,1,0,0,1,26,1,4,6,NA +63088,7,2,1,44,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,11113.602843,11073.454175,3,90,10,10,2.41,5,5,1,2,0,1,44,2,4,1,5 +63089,7,2,2,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,80232.348161,83992.832552,1,99,15,15,4.47,4,4,0,2,0,2,43,1,5,1,5 +63090,7,2,2,15,NA,3,3,2,15,186,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,41202.729804,43105.611516,2,91,6,6,1.26,5,5,0,1,2,2,80,1,4,2,NA +63091,7,2,1,70,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,NA,55996.810038,59271.38884,2,93,15,8,4.48,2,1,0,0,2,2,63,2,4,6,NA +63092,7,2,2,17,NA,4,4,2,18,216,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12581.940435,12644.353117,2,97,6,6,0.92,7,7,1,4,0,2,29,1,3,5,NA +63093,7,2,2,10,NA,3,3,1,10,130,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,1,2,1,2,2,1,21800.021754,23152.363882,3,91,7,1,0,7,1,0,4,0,1,40,1,4,1,3 +63094,7,2,1,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,27167.433399,29371.776665,1,94,7,7,2.58,2,2,0,0,2,2,71,1,5,1,4 +63095,7,2,2,4,NA,4,4,1,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10437.988787,11664.396755,2,100,9,9,2.46,4,4,1,1,1,2,59,1,3,1,3 +63096,7,2,1,8,NA,2,2,1,8,101,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13672.897697,14159.650726,2,100,14,14,3.58,4,4,0,2,0,2,40,NA,NA,1,4 +63097,7,2,1,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,71070.181743,74923.26272,1,95,7,7,2.45,2,2,0,0,2,2,70,1,3,1,3 +63098,7,2,2,10,NA,3,3,2,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,60197.256541,63931.531988,2,94,14,14,2.63,6,6,1,3,0,1,39,1,4,1,4 +63099,7,2,2,3,NA,2,2,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15550.931019,16748.422137,2,102,8,8,1.09,7,7,1,3,0,2,33,2,1,6,NA +63100,7,2,2,13,NA,1,1,1,13,167,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22424.988432,22856.096824,1,94,5,5,0.87,4,4,0,2,0,2,41,2,4,1,1 +63101,7,2,1,6,NA,4,4,1,7,85,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11060.738342,11193.339318,2,96,6,6,1.48,4,4,1,1,0,2,25,1,4,5,NA +63102,7,2,2,19,NA,1,1,1,19,238,2,NA,2,2,1,12,NA,NA,NA,2,2,2,2,2,2,2,2,2,2,16427.640886,17012.806816,1,102,5,5,0.98,4,4,1,1,0,2,42,2,2,6,NA +63103,7,2,1,46,NA,5,6,2,NA,NA,2,NA,2,2,6,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,12820.674452,13169.378628,3,90,15,15,5,3,3,0,0,0,1,46,2,3,1,3 +63104,7,2,2,6,NA,4,4,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8146.767632,8699.382682,2,100,15,15,4.97,5,5,0,2,1,2,42,1,5,1,5 +63105,7,1,1,14,NA,5,6,NA,NA,NA,NA,NA,2,1,4,9,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8915.81491,0,1,95,3,3,0.43,4,4,0,1,2,1,65,2,5,1,3 +63106,7,2,1,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,32720.69734,32838.149884,1,95,3,3,0.95,1,1,0,0,0,1,50,1,2,3,NA +63107,7,2,2,66,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,9716.805546,12994.252166,2,90,3,3,1.1,1,1,0,0,1,2,66,2,1,1,NA +63108,7,2,1,21,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,26847.643051,28699.104328,1,98,4,4,0.66,4,4,2,0,0,2,22,1,4,6,NA +63109,7,2,2,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,105873.555835,111215.69511,1,101,14,14,3.9,4,4,0,2,0,2,41,1,2,1,2 +63110,7,2,1,36,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,22439.245437,22882.446062,2,96,14,14,3.58,4,4,2,0,0,1,36,1,4,1,5 +63111,7,2,1,8,NA,1,1,1,8,103,NA,NA,2,7,77,1,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,11399.23838,11295.714433,1,103,8,8,1.85,5,5,2,1,0,2,25,2,2,1,2 +63112,7,2,2,66,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,17221.648537,17753.661813,1,95,6,6,1.7,2,2,0,0,2,2,66,2,1,1,3 +63113,7,2,2,3,NA,1,1,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17543.203297,19368.16722,1,95,8,8,2.24,4,4,2,0,0,2,29,1,3,1,4 +63114,7,2,1,15,NA,5,6,1,15,189,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11657.164593,12175.606972,1,92,9,9,2.88,3,3,0,2,0,1,50,2,5,3,NA +63115,7,2,1,4,NA,3,3,1,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,32621.433667,36804.729607,2,101,4,4,0.73,5,5,1,2,0,1,40,1,5,1,5 +63116,7,2,2,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,24291.664633,24483.672027,1,99,8,8,4.13,1,1,0,0,0,2,34,1,5,5,NA +63117,7,2,1,24,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,6,NA,1,2,2,1,2,2,1,2,2,3,14385.653726,15533.829525,2,101,8,5,2.2,2,1,0,0,0,1,24,2,4,6,NA +63118,7,2,1,16,NA,1,1,1,16,198,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,30061.88611,30025.949584,1,92,7,7,1.48,5,5,0,1,0,1,42,1,5,1,4 +63119,7,2,1,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,84933.772167,92023.102027,2,99,15,15,5,2,2,0,0,0,2,37,1,5,1,5 +63120,7,2,2,4,NA,3,3,1,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,55336.504558,58807.19381,2,102,14,14,3.44,5,5,1,2,0,2,34,1,4,6,NA +63121,7,2,1,9,NA,1,1,1,9,114,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15233.096858,16234.004804,1,94,15,15,4.37,7,7,0,4,1,1,58,1,4,1,5 +63122,7,2,2,3,NA,2,2,1,3,44,NA,NA,2,2,2,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10301.516763,10950.95816,2,93,6,6,0.93,5,5,1,2,0,1,40,2,4,1,4 +63123,7,2,1,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,18753.573091,18904.865313,2,99,2,2,0.77,1,1,0,0,0,1,55,1,3,5,NA +63124,7,2,2,54,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15460.72194,15832.955199,1,102,9,9,2.39,5,5,0,1,1,1,55,2,5,1,5 +63125,7,2,2,46,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,22109.546782,21887.540337,1,93,7,7,1.79,4,4,0,2,0,1,53,2,4,1,4 +63126,7,2,1,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,111448.08155,119470.923607,1,90,15,15,5,4,4,0,2,0,1,44,1,5,1,5 +63127,7,2,1,28,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,14391.77847,15024.211748,1,92,14,14,3.3,4,4,2,0,0,1,28,1,4,1,4 +63128,7,2,2,34,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,4,3,1,2,2,1,2,2,1,2,2,1,38589.695298,40356.151096,2,96,6,5,1.84,2,1,0,0,0,2,26,1,2,5,NA +63129,7,2,1,14,NA,1,1,1,14,172,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22768.423624,23297.239555,2,98,6,6,1.4,3,3,0,1,0,1,56,1,2,1,2 +63130,7,2,2,16,NA,5,6,2,16,196,NA,NA,2,2,1,8,NA,NA,NA,1,2,1,NA,NA,NA,1,2,1,NA,10767.566937,12084.516227,2,91,99,1,0,7,3,0,4,0,1,36,2,9,1,2 +63131,7,2,2,2,NA,4,4,1,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8480.466509,9125.076563,1,100,13,13,NA,5,5,2,0,0,2,54,1,4,5,NA +63132,7,2,2,80,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,4,2,NA,1,2,1,1,2,2,1,2,1,NA,18698.205673,19635.336647,1,97,15,15,4.81,5,5,0,1,1,1,51,2,5,1,5 +63133,7,2,1,24,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,35669.2076,40318.090187,2,94,8,8,2.33,4,4,2,0,0,1,24,1,2,6,NA +63134,7,2,2,66,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,3,2,NA,2,2,2,2,2,2,2,2,2,2,11851.128358,12345.740673,2,93,4,4,0.69,4,4,0,1,1,2,66,2,3,2,NA +63135,7,2,1,15,NA,4,4,2,15,190,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11351.725436,11256.943498,2,95,7,7,1.74,4,4,0,2,0,2,47,1,5,4,NA +63136,7,2,1,18,NA,3,3,2,18,224,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,66448.116716,70579.976727,2,94,7,7,1.17,6,6,0,3,0,1,40,1,3,1,5 +63137,7,2,1,49,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19898.840409,19922.416835,2,94,15,15,5,2,2,0,0,0,1,49,2,5,1,5 +63138,7,2,1,15,NA,3,3,2,15,188,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,76583.482739,78051.61978,3,91,4,4,0.92,3,3,0,1,0,2,53,1,4,1,4 +63139,7,2,2,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,24118.129224,23465.858863,2,96,3,3,0.59,3,3,1,0,0,2,25,1,4,1,NA +63140,7,2,2,9,NA,1,1,1,9,113,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,12307.832776,12437.228482,2,93,77,77,NA,7,7,3,1,0,2,43,2,1,1,9 +63141,7,2,2,34,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,3,1,2,1,2,2,1,2,2,NA,NA,NA,NA,27303.803575,30038.060995,1,96,10,10,2.95,4,4,0,1,0,2,34,2,3,1,5 +63142,7,2,1,54,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19064.767778,19872.734197,2,93,10,10,3.93,3,3,0,0,2,1,54,1,5,1,5 +63143,7,2,1,70,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,1,1,NA,1,2,1,1,2,1,1,2,1,NA,10498.47031,11456.643725,3,90,4,4,1.22,2,2,0,0,2,2,69,2,4,1,1 +63144,7,2,1,1,21,2,2,2,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10803.555682,11145.535856,2,94,7,7,1.34,5,5,2,1,0,1,32,2,1,1,NA +63145,7,2,1,50,NA,4,4,2,NA,NA,2,NA,2,2,6,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,15044.515884,15040.577827,3,90,5,5,0.87,4,4,0,0,0,2,43,2,3,5,NA +63146,7,2,2,6,NA,4,4,1,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8362.256577,9003.967662,2,100,3,3,0.38,5,5,2,1,0,2,28,1,2,5,NA +63147,7,2,1,41,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,31010.243793,32025.638239,3,92,6,6,1.41,4,3,0,1,0,1,41,1,4,1,4 +63148,7,2,1,40,NA,1,1,2,NA,NA,2,NA,2,2,77,NA,3,1,NA,2,2,2,1,2,2,2,2,2,2,31640.296506,31176.023929,2,94,6,4,1.38,2,1,0,0,0,1,40,2,3,1,NA +63149,7,2,2,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,145772.192378,153127.526275,1,95,9,9,2.66,4,4,0,2,0,1,45,1,3,1,3 +63150,7,2,1,19,NA,3,3,2,19,229,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,93665.036597,95017.313859,1,91,15,15,5,4,4,0,1,0,1,45,1,5,1,5 +63151,7,2,1,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,17212.153036,21760.360036,3,90,7,6,2.42,2,1,0,0,0,1,44,1,2,6,NA +63152,7,2,2,62,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,15277.358397,15812.991914,2,96,5,1,0,2,1,0,0,1,1,46,2,3,3,NA +63153,7,2,1,2,NA,2,2,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13002.944731,13931.716845,2,91,2,2,0.42,3,3,1,1,0,2,27,1,3,5,NA +63154,7,2,1,36,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,51543.062078,51154.050295,3,92,6,6,0.86,7,7,1,4,0,2,36,2,1,1,1 +63155,7,2,1,18,NA,3,3,2,19,228,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,68148.957861,67253.324127,1,93,15,15,3.92,5,5,0,1,0,2,54,1,5,1,5 +63156,7,2,2,70,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,49260.413155,50771.674072,2,100,8,8,3.4,2,2,0,0,2,1,69,1,4,1,4 +63157,7,2,1,13,NA,3,3,2,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,32477.57544,32050.74555,1,95,4,4,0.65,6,6,2,2,0,2,36,1,4,6,NA +63158,7,2,2,0,5,3,3,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10263.789813,10611.184403,1,94,8,8,1.39,7,7,2,0,1,2,52,1,5,2,NA +63159,7,2,1,48,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,37724.155022,39553.56554,2,102,4,4,1.16,2,2,0,0,0,1,48,1,4,1,4 +63160,7,2,2,61,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,130577.02995,132175.980892,1,98,5,5,1.81,1,1,0,0,1,2,61,1,3,2,NA +63161,7,2,2,37,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,4,3,2,2,2,2,1,2,2,2,2,2,2,39450.135734,38385.688196,2,93,9,9,1.94,6,6,0,3,0,2,37,NA,NA,3,NA +63162,7,2,2,58,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,19000.571418,18807.33617,1,96,8,8,4.48,1,1,0,0,0,2,58,1,4,3,NA +63163,7,2,1,48,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,146181.198007,148606.927767,2,91,15,2,0.85,7,1,0,0,1,1,49,NA,NA,5,NA +63164,7,2,1,54,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,27343.459193,27021.369249,1,90,15,15,5,4,4,0,0,0,1,54,1,5,1,5 +63165,7,1,2,13,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16537.460749,0,2,100,1,1,0.04,4,4,0,2,0,1,34,NA,NA,6,NA +63166,7,2,2,17,NA,4,4,2,17,205,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10848.628906,11198.221038,1,99,4,4,0.41,7,7,2,4,0,2,43,1,4,4,NA +63167,7,2,1,37,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,12327.761744,13424.968474,3,90,12,12,NA,5,5,1,2,0,1,37,2,5,1,5 +63168,7,2,1,10,NA,5,6,1,10,131,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8580.826574,9087.233306,1,92,14,14,2.42,6,6,1,3,0,1,30,1,4,6,NA +63169,7,2,2,33,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,NA,NA,NA,NA,53955.606082,55487.991675,2,100,6,6,1.7,3,3,0,1,0,2,33,1,4,6,NA +63170,7,2,2,53,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,22224.73066,22340.630739,2,94,7,7,1.33,6,6,0,1,0,1,55,2,2,1,1 +63171,7,2,1,20,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,9162.933592,9412.398479,1,103,5,5,0.65,6,6,0,0,1,2,26,2,4,5,NA +63172,7,2,2,68,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,2,NA,2,2,2,1,2,2,2,2,2,2,13057.178942,15139.165763,1,102,6,6,1.18,5,5,0,2,1,2,42,2,2,2,NA +63173,7,2,1,18,NA,5,6,2,18,217,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,5096.872559,5447.231524,3,91,7,7,1.33,6,6,0,0,2,2,51,2,5,1,5 +63174,7,2,2,47,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,29650.79971,34013.933936,2,90,14,14,5,1,1,0,0,0,2,47,1,5,5,NA +63175,7,2,1,15,NA,3,3,1,15,184,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,31348.486193,31739.246143,1,98,5,5,0.74,5,5,0,3,0,1,35,1,2,6,NA +63176,7,2,1,19,NA,4,4,2,19,230,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13416.172328,13513.882801,1,96,4,4,0.65,4,4,0,0,0,1,19,1,4,NA,NA +63177,7,2,1,60,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,8355.583627,8420.94363,2,93,10,10,3.93,3,3,0,0,2,1,54,1,5,1,5 +63178,7,2,2,40,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,30543.064147,31583.211468,2,101,8,8,2.81,3,3,0,1,0,1,35,1,1,1,2 +63179,7,2,1,8,NA,2,2,1,8,100,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,2,2,2,2,13898.598114,14013.214919,2,102,8,8,1.09,7,7,1,3,0,2,33,2,1,6,NA +63180,7,2,1,50,NA,5,6,2,NA,NA,2,NA,2,2,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,10857.294346,10859.552755,1,91,15,15,5,6,6,0,2,2,1,50,2,5,1,5 +63181,7,2,2,67,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,9716.805546,10308.451947,2,90,8,8,2.01,4,4,0,0,1,2,67,2,4,2,NA +63182,7,2,2,14,NA,4,4,2,14,171,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10671.280357,10591.403308,1,99,15,8,2.7,4,3,0,2,0,1,49,1,4,6,NA +63183,7,2,2,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,20135.631696,26575.299535,1,96,4,4,1.12,2,2,0,1,0,2,44,1,2,5,NA +63184,7,2,1,3,NA,3,3,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,86817.367332,95073.481824,2,91,15,15,5,4,4,2,0,0,1,34,1,5,1,5 +63185,7,2,1,3,NA,4,4,1,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10040.033098,11070.778245,1,100,3,3,0.73,3,3,2,0,0,2,39,1,3,5,NA +63186,7,2,1,11,NA,5,7,1,11,136,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11611.58308,11813.013945,1,98,14,14,3.16,6,6,2,2,0,1,39,1,5,1,5 +63187,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,26773.686592,29962.63786,2,98,6,6,1.7,2,2,0,0,2,2,80,1,2,1,2 +63188,7,2,2,60,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,12782.405209,13353.125929,1,100,3,3,1.17,1,1,0,0,1,2,60,1,4,4,NA +63189,7,2,2,0,5,4,4,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4964.196196,5018.399368,1,93,1,1,0.16,3,3,1,1,0,2,39,1,3,5,NA +63190,7,2,1,53,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,104765.204996,105038.68379,2,92,15,15,5,5,5,0,3,0,2,46,1,5,1,5 +63191,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,70667.985128,72578.248908,2,92,15,7,3.67,2,1,0,0,0,2,30,1,5,1,NA +63192,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,41229.806244,46206.205733,2,103,6,6,1.82,2,2,0,0,2,1,70,1,2,5,NA +63193,7,2,2,41,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,26389.420306,26882.397721,2,98,3,3,0.96,1,1,0,0,0,2,41,1,4,3,NA +63194,7,2,2,56,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,2,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,12649.084278,13204.362319,3,90,4,4,0.92,3,3,0,0,1,2,56,2,2,1,2 +63195,7,2,1,59,NA,2,2,1,NA,NA,2,NA,2,2,7,NA,3,1,NA,2,2,2,2,2,2,1,2,1,2,24211.824535,24594.266281,2,93,8,8,2.57,3,3,0,0,1,1,59,2,3,1,3 +63196,7,2,1,39,NA,4,4,1,NA,NA,2,NA,2,1,4,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,18838.303827,18900.579975,2,93,12,12,NA,4,4,1,1,0,2,27,2,4,1,4 +63197,7,2,2,27,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,1,1,2,2,1,2,2,1,2,2,1,18070.666316,17918.945427,1,99,7,7,2.38,2,2,0,0,0,2,27,1,5,1,5 +63198,7,2,1,80,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,7199.330978,7340.349591,1,93,4,4,1.56,1,1,0,0,1,1,80,2,5,2,NA +63199,7,2,1,12,NA,3,3,1,12,151,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,27156.800586,27669.661454,1,92,5,5,1.05,3,3,1,1,0,2,35,1,4,5,NA +63200,7,2,2,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,164722.990078,163821.547042,1,90,15,15,5,3,3,0,0,0,1,59,1,5,1,5 +63201,7,2,1,7,NA,1,1,1,7,90,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12665.770043,13116.669484,1,100,9,9,2.02,6,6,0,3,1,2,39,1,4,1,5 +63202,7,2,1,0,3,5,7,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6371.499593,6469.293142,2,95,12,14,3.93,4,3,1,0,0,1,35,1,5,1,4 +63203,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,35334.703093,40990.264786,1,101,3,3,0.9,1,1,0,0,1,2,80,1,2,2,NA +63204,7,2,2,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,26757.554986,26943.613048,2,97,7,7,1.89,3,3,0,0,0,1,50,1,2,1,2 +63205,7,2,1,67,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,28216.191929,28480.821588,1,101,7,7,1.83,3,3,0,0,2,1,67,1,1,1,2 +63206,7,2,1,38,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,14204.262514,14001.792163,1,99,7,7,1.06,7,7,3,1,0,1,38,1,4,6,NA +63207,7,2,2,8,NA,3,3,2,8,107,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,52414.628675,53163.961325,1,91,14,14,2.44,7,7,2,4,0,1,33,1,5,1,5 +63208,7,2,1,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105261.096488,116369.237268,1,97,15,15,5,3,3,1,0,0,2,31,1,5,1,5 +63209,7,2,1,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,65033.706797,70202.805273,1,102,8,8,2.42,4,4,0,2,0,2,34,1,4,1,3 +63210,7,2,1,10,NA,4,4,2,10,121,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7250.311091,8640.316317,2,99,2,2,0.2,7,7,1,2,1,1,63,1,1,2,NA +63211,7,2,2,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,93110.905446,112207.13845,1,92,14,14,3.16,6,6,1,1,0,1,49,1,1,1,3 +63212,7,2,1,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,11764.405491,12074.424659,1,97,4,4,1.34,1,1,0,0,1,1,62,1,5,5,NA +63213,7,2,1,43,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,13535.287828,13486.390654,3,92,8,8,0.91,7,7,3,3,1,1,61,NA,NA,1,4 +63214,7,2,1,37,NA,1,1,1,NA,NA,2,NA,2,1,5,NA,1,6,NA,2,2,2,1,2,2,1,2,2,2,37715.365512,38126.275234,2,102,5,5,0.59,7,7,1,3,0,1,37,2,1,6,NA +63215,7,2,2,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,11355.3308,11862.334174,1,96,15,15,5,3,3,0,0,1,2,62,1,4,3,NA +63216,7,2,1,54,NA,1,1,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,29042.244317,28700.143679,1,90,12,12,NA,4,4,0,0,0,1,54,2,4,1,2 +63217,7,1,2,8,NA,5,7,NA,NA,NA,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16195.492817,0,3,91,5,5,0.65,7,7,0,4,0,2,39,1,3,4,NA +63218,7,2,1,35,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,34887.439952,35451.076414,2,94,3,3,0.82,2,2,0,0,0,1,35,2,1,1,2 +63219,7,2,1,39,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,20698.946375,20973.485449,3,91,15,15,5,1,1,0,0,0,1,39,2,5,5,NA +63220,7,2,1,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,22634.531479,23281.741513,1,103,9,2,0.81,3,1,0,0,0,1,27,1,5,5,NA +63221,7,2,2,67,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,16352.915834,18960.412859,3,92,8,8,2.97,2,2,0,0,1,1,49,1,2,3,NA +63222,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,20360.306379,19986.123493,2,99,15,15,5,2,2,0,0,0,2,51,1,5,1,5 +63223,7,2,1,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,174520.785302,174307.982009,1,95,7,7,3.21,1,1,0,0,0,1,59,1,4,3,NA +63224,7,2,2,31,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,29102.738194,28317.485179,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +63225,7,2,2,63,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,11355.3308,11862.334174,1,96,12,12,NA,5,5,2,0,1,2,63,2,5,3,NA +63226,7,2,2,2,NA,2,2,1,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,9267.834226,9392.475614,2,93,77,77,NA,4,4,1,1,0,2,33,2,4,1,4 +63227,7,2,2,40,NA,1,1,1,NA,NA,2,NA,99,NA,NA,NA,3,1,2,2,2,2,2,2,2,NA,NA,NA,NA,25713.328161,31117.9447,1,103,5,5,0.74,5,5,1,1,0,2,40,99,3,1,1 +63228,7,2,1,27,NA,2,2,2,NA,NA,2,NA,2,1,5,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,44074.735764,53571.145963,2,91,4,4,0.84,3,3,1,0,0,2,21,1,4,1,2 +63229,7,2,1,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,19440.793325,19514.660132,2,95,9,9,1.81,6,6,1,1,0,2,56,1,4,3,NA +63230,7,2,1,48,NA,5,6,2,NA,NA,2,NA,2,2,1,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,14455.875684,16658.375886,1,93,9,9,5,1,1,0,0,0,1,48,2,5,5,NA +63231,7,2,2,42,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,4,5,2,2,2,2,2,2,2,2,2,1,2,29141.220673,30195.229087,2,99,99,1,0,4,1,0,0,0,2,42,2,4,5,NA +63232,7,2,2,14,NA,4,4,1,14,170,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10128.026158,10319.790822,2,103,7,7,1.48,5,5,0,1,1,2,80,1,4,3,NA +63233,7,2,1,32,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,23284.536512,27522.885754,1,97,5,5,1.04,4,4,1,1,0,1,32,1,3,6,NA +63234,7,2,2,35,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,20310.041341,20837.576644,1,92,3,3,0.9,1,1,0,0,0,2,35,1,5,5,NA +63235,7,2,2,63,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,16352.915834,17093.606152,3,92,3,3,0.95,2,2,0,0,2,2,63,1,4,1,1 +63236,7,2,1,80,NA,2,2,1,NA,NA,1,1,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,14200.083364,14896.175407,2,98,3,3,1.1,1,1,0,0,1,1,80,1,3,3,NA +63237,7,2,1,2,NA,3,3,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,49276.9767,53002.397569,2,98,15,15,5,3,3,1,0,0,1,26,1,4,1,4 +63238,7,2,2,80,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,2,2,NA,2,2,2,1,2,2,NA,NA,NA,NA,18006.276697,19962.608629,2,93,3,3,0.66,2,2,0,0,1,2,80,2,2,2,NA +63239,7,2,2,6,NA,4,4,1,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9399.281543,9563.245188,2,96,4,4,0.57,5,5,0,3,0,2,26,1,2,5,NA +63240,7,2,1,5,NA,2,2,1,5,63,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16775.083123,17306.08847,2,98,6,6,1.07,5,5,3,0,0,2,24,1,3,1,3 +63241,7,1,1,9,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,37831.648033,0,1,99,77,77,NA,4,4,0,2,0,2,45,1,3,1,NA +63242,7,2,1,59,NA,1,1,2,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,25221.349696,24851.265269,3,91,6,6,0.89,7,7,1,1,0,1,59,2,1,1,1 +63243,7,2,1,2,NA,1,1,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12493.910388,13058.677088,2,98,6,6,0.63,7,7,2,2,1,1,60,1,3,1,2 +63244,7,2,1,4,NA,4,4,2,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9228.425814,10175.8485,1,96,8,8,2,4,4,1,2,0,2,40,1,4,5,NA +63245,7,2,2,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,99275.150567,100291.087043,2,95,12,5,1.79,2,1,0,0,0,2,40,1,4,6,NA +63246,7,2,1,13,NA,4,4,2,13,167,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13416.172328,13513.882801,1,96,15,15,5,4,4,0,1,0,1,42,2,4,1,4 +63247,7,2,1,19,NA,5,7,2,19,239,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13251.602554,13277.802057,1,96,15,15,5,3,3,0,0,0,2,40,1,5,1,4 +63248,7,2,1,75,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,67447.112074,71391.281096,1,101,15,15,5,3,3,0,0,2,1,75,1,2,1,2 +63249,7,2,1,67,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,102183.259724,105037.978798,2,90,15,8,4.59,2,1,0,0,2,1,67,1,5,6,NA +63250,7,2,1,17,NA,3,3,2,17,212,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,88198.948426,91036.751291,1,101,9,9,2.6,4,4,0,1,2,2,63,1,4,1,4 +63251,7,2,2,80,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,1,2,NA,1,2,1,1,2,2,NA,NA,NA,NA,15288.064726,15818.288224,3,91,3,3,0.54,3,3,0,0,1,1,57,2,4,1,3 +63252,7,2,1,7,NA,4,4,1,7,95,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11709.3276,12266.117914,1,100,8,8,3.3,2,2,0,1,0,2,44,1,4,3,NA +63253,7,2,2,13,NA,4,4,2,13,164,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11160.089502,11615.801738,2,99,99,99,NA,5,5,0,2,0,2,20,1,3,6,NA +63254,7,2,2,72,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,2,1,NA,2,2,2,1,2,2,1,2,2,NA,23271.708938,25108.705116,1,102,7,7,1.7,4,4,0,0,2,1,44,1,4,4,NA +63255,7,1,1,64,NA,4,4,NA,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,6239.974623,0,1,96,77,77,NA,2,2,0,0,2,1,64,1,5,1,5 +63256,7,2,1,9,NA,4,4,1,9,110,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,10122.702296,10196.10824,2,93,9,9,2.86,4,4,1,1,0,1,30,1,4,6,NA +63257,7,2,2,6,NA,5,6,2,6,83,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6834.715094,7332.504615,2,90,5,5,1.08,3,3,0,1,0,2,29,2,4,1,5 +63258,7,2,1,1,12,3,3,2,NA,13,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,46703.291366,54723.864587,1,93,15,15,5,3,3,1,0,0,1,37,1,5,1,5 +63259,7,2,2,7,NA,1,1,1,7,95,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10118.363218,10311.586628,2,103,10,10,1.63,7,7,1,4,0,1,31,NA,NA,1,4 +63260,7,2,2,42,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,4,2,1,2,2,1,2,2,1,2,2,1,36276.046363,36418.32035,2,91,99,99,NA,3,3,0,0,0,1,40,NA,NA,1,4 +63261,7,2,2,41,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,20434.221508,20198.461156,1,102,15,15,5,3,3,0,1,0,1,41,1,5,1,5 +63262,7,2,2,76,NA,1,1,1,NA,NA,2,NA,2,1,8,NA,2,2,NA,2,2,2,1,2,2,2,2,2,NA,19352.965911,23161.019266,1,103,6,6,0.97,6,6,0,3,1,2,50,2,1,1,1 +63263,7,2,1,53,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,3,1,NA,2,2,2,2,2,2,1,2,2,1,24211.824535,24594.266281,2,93,6,6,1.39,4,4,0,0,0,1,53,2,3,1,3 +63264,7,2,1,32,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,1,1,NA,1,2,1,1,2,2,1,2,1,NA,20963.414192,21636.182301,1,100,5,5,0.74,6,6,0,3,0,1,40,2,3,1,4 +63265,7,2,2,28,NA,4,4,2,NA,NA,2,NA,2,2,1,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,28823.434754,27922.831827,1,97,6,1,0,2,1,0,0,0,1,31,1,5,6,NA +63266,7,2,1,0,10,4,4,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6575.334977,7272.954395,2,95,2,2,0.42,3,3,2,0,0,1,25,1,3,5,NA +63267,7,2,2,72,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,35965.834545,38220.111719,1,101,3,3,1.12,1,1,0,0,1,2,72,1,3,2,NA +63268,7,2,1,12,NA,2,2,1,12,154,NA,NA,1,1,NA,7,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,15506.325263,16662.012915,1,103,5,5,0.74,5,5,0,1,0,1,47,2,1,1,1 +63269,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,25840.959268,29976.982116,1,101,77,77,NA,2,2,0,0,2,2,80,1,1,2,NA +63270,7,2,1,4,NA,1,1,1,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16775.083123,17306.08847,2,98,14,14,3.25,5,5,2,1,0,1,37,1,5,1,5 +63271,7,2,1,19,NA,5,6,1,19,236,2,NA,2,2,5,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8062.957534,8614.793403,1,92,12,12,NA,7,7,1,2,1,2,45,2,3,1,3 +63272,7,2,2,20,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,NA,NA,NA,1,2,2,1,114993.808573,116714.079488,1,98,7,NA,NA,4,1,0,0,0,2,20,1,4,5,NA +63273,7,2,1,66,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11492.781131,11942.286414,1,100,15,15,5,2,2,0,0,2,2,64,1,3,1,3 +63274,7,2,2,5,NA,5,7,1,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11956.543345,12952.739683,1,94,2,2,0.33,5,5,1,3,0,2,37,1,4,3,NA +63275,7,2,1,0,3,3,3,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22570.662508,23423.888442,1,92,9,9,2,6,6,1,3,0,1,33,1,4,1,4 +63276,7,2,1,67,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,18079.825103,18175.893804,2,97,3,3,0.92,1,1,0,0,1,1,67,1,1,2,NA +63277,7,2,1,18,NA,3,3,2,18,224,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,69216.263169,70543.167563,1,91,14,14,3.15,5,5,0,1,0,2,50,1,5,1,5 +63278,7,2,2,3,NA,5,6,1,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7029.864692,7462.832472,3,92,8,8,0.91,7,7,3,3,1,1,61,NA,NA,1,4 +63279,7,2,1,1,18,4,4,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5770.570361,6126.790978,2,99,5,5,0.65,6,6,2,1,0,2,53,1,4,3,NA +63280,7,2,1,12,NA,5,6,1,12,150,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7558.274942,7903.176011,2,96,15,15,5,3,3,0,2,0,2,42,2,5,4,NA +63281,7,2,1,40,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,4,1,NA,2,2,2,2,2,2,2,2,2,2,35406.972937,35699.930106,2,98,6,6,1.21,4,4,1,0,0,2,49,2,2,6,NA +63282,7,2,1,5,NA,5,7,1,5,64,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11081.601387,11216.767571,2,95,6,6,1.3,4,4,1,1,0,1,47,1,5,1,4 +63283,7,2,1,36,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19133.795531,19387.575333,2,91,15,15,4.63,7,7,1,2,0,1,36,2,4,1,3 +63284,7,2,1,79,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,81415.860066,87761.355472,1,97,7,7,2.64,2,2,0,0,2,1,79,1,4,1,3 +63285,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,42992.537371,49508.460769,2,95,3,3,1.16,1,1,0,0,1,2,80,1,3,2,NA +63286,7,2,2,0,9,3,3,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10692.488346,10402.056617,1,101,4,4,0.79,3,3,1,0,0,1,41,1,3,1,3 +63287,7,2,2,78,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,18956.418923,20374.294465,2,101,99,99,NA,3,3,0,1,1,2,78,1,1,2,NA +63288,7,2,2,58,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,16033.091438,16307.204086,1,96,77,77,NA,4,4,0,0,0,1,52,2,5,1,5 +63289,7,2,1,6,NA,3,3,1,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,47990.623546,51629.685381,1,100,15,15,4.07,5,5,0,2,0,2,41,1,5,1,4 +63290,7,2,2,16,NA,4,4,2,16,201,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11436.28571,11295.489882,1,93,1,1,0.02,5,5,0,4,0,2,36,NA,NA,5,NA +63291,7,2,1,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,21799.527449,22114.217255,1,96,14,14,5,1,1,0,0,0,1,47,1,5,3,NA +63292,7,2,2,6,NA,5,7,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12292.225251,12666.385814,1,95,14,14,3.04,6,6,0,4,0,1,56,1,5,1,4 +63293,7,2,2,61,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,9570.416297,10355.226074,2,98,5,5,1.93,1,1,0,0,1,2,61,1,2,2,NA +63294,7,2,2,62,NA,5,7,1,NA,NA,2,NA,2,1,5,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,9525.506997,9950.810696,2,93,9,9,5,1,1,0,0,1,2,62,2,4,3,NA +63295,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,44886.08631,48362.89646,2,102,6,6,1.15,5,5,0,0,2,1,80,1,5,1,1 +63296,7,2,2,55,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16672.82247,17074.238333,1,92,15,15,4.44,5,5,0,0,1,1,65,NA,NA,1,5 +63297,7,2,1,50,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,174520.785302,182298.775287,1,101,14,14,5,3,3,0,1,0,2,36,1,5,1,5 +63298,7,2,2,22,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,16291.99448,17037.586582,1,90,15,15,3.7,5,5,0,0,0,1,54,NA,NA,1,NA +63299,7,2,1,17,NA,1,1,1,17,208,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,22768.423624,23116.446945,2,98,9,9,2.6,4,4,0,2,0,1,30,1,2,1,2 +63300,7,2,2,5,NA,1,1,1,5,63,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19235.084509,21236.049482,3,92,4,4,0.65,4,4,2,0,0,2,20,1,3,5,NA +63301,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,12433.776145,13392.195631,2,99,6,6,1.39,4,4,1,0,1,2,63,1,3,3,NA +63302,7,2,1,28,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,123110.069898,131423.888983,1,101,5,5,1.45,2,2,0,0,0,2,49,1,4,3,NA +63303,7,2,1,7,NA,1,1,2,7,85,NA,NA,2,2,3,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11546.167056,12698.029578,1,90,4,4,0.47,7,7,1,1,0,2,50,2,1,1,1 +63304,7,2,2,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,2,1,2,2,1,2,2,1,2,2,1,98760.497744,125641.994435,2,91,15,6,2.69,2,1,0,0,0,1,44,1,3,5,NA +63305,7,2,1,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,16905.961576,17390.157008,2,94,7,7,1.18,7,7,1,4,0,2,31,1,4,6,NA +63306,7,2,1,37,NA,2,2,1,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,34438.924452,34418.648235,1,100,9,9,2.02,6,6,0,3,1,2,39,1,4,1,5 +63307,7,2,1,8,NA,3,3,1,8,104,NA,NA,1,1,NA,1,NA,NA,NA,1,NA,2,1,2,2,1,2,2,1,66868.503099,69864.859716,1,98,15,15,4.34,4,4,1,1,0,1,41,1,5,1,5 +63308,7,2,1,28,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,22497.998055,22497.243767,1,102,4,2,0.73,2,1,0,0,0,1,36,NA,NA,4,NA +63309,7,2,1,0,10,5,6,1,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9148.090461,9898.749752,1,92,14,14,3.3,4,4,2,0,0,1,28,1,4,1,4 +63310,7,2,2,53,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,25964.813959,25539.412946,2,101,2,2,0.74,1,1,0,0,0,2,53,1,2,5,NA +63311,7,2,1,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,7323.703412,7610.147862,1,96,12,12,NA,7,7,1,0,1,2,59,1,3,1,1 +63312,7,2,1,74,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,65208.881742,76821.663824,3,91,7,7,2.92,2,2,0,0,2,1,74,1,3,1,3 +63313,7,2,1,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,20297.001922,20419.260869,1,96,15,15,5,2,2,0,0,0,1,56,1,5,1,5 +63314,7,2,2,0,8,4,4,1,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4200.172174,4368.199293,2,93,10,10,2.26,6,6,2,0,0,1,34,1,4,1,4 +63315,7,2,2,54,NA,5,6,2,NA,NA,2,NA,2,2,6,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,17852.668137,18502.046882,3,91,3,3,0.54,3,3,0,0,1,1,57,2,4,1,3 +63316,7,2,1,10,NA,4,4,2,10,121,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9023.469661,9333.449986,2,90,14,14,4.25,4,4,0,2,1,2,45,2,5,5,NA +63317,7,2,1,32,NA,5,6,2,NA,NA,1,1,2,2,5,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,17585.737085,18369.998342,2,91,12,5,2.2,3,1,0,0,0,1,29,NA,NA,5,NA +63318,7,2,1,27,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17420.978407,17115.540769,2,97,12,6,2.75,3,1,0,0,0,1,21,NA,NA,77,NA +63319,7,1,2,51,NA,5,6,NA,NA,NA,2,NA,2,1,7,NA,5,77,NA,1,2,2,1,2,2,NA,NA,NA,NA,18322.475193,0,3,91,12,14,5,2,1,0,0,0,2,51,2,5,77,NA +63320,7,2,2,27,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,16844.740449,17983.530016,3,91,7,7,2.72,2,2,0,0,0,2,27,2,5,1,5 +63321,7,2,1,6,NA,3,3,2,6,82,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,65374.972543,69123.624578,1,90,15,15,5,4,4,0,2,0,1,37,1,5,1,5 +63322,7,2,2,6,NA,4,4,2,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9460.53021,9835.43092,1,96,6,6,1.98,2,2,0,1,0,2,29,1,3,5,NA +63323,7,2,2,48,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16833.890659,17522.038775,3,91,15,15,5,4,4,0,2,0,1,38,2,5,1,5 +63324,7,2,1,35,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,1,NA,NA,NA,NA,11715.79771,11871.189399,2,92,5,5,0.64,7,7,1,2,1,1,66,2,1,1,3 +63325,7,2,2,2,NA,1,1,1,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14326.094268,15816.39252,3,92,15,15,5,3,3,1,0,0,1,41,2,5,1,3 +63326,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,11574.200203,11257.441852,2,99,NA,77,NA,7,7,1,0,1,2,51,1,2,1,3 +63327,7,2,1,14,NA,3,3,1,14,176,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,25736.568804,25798.716176,3,92,5,5,1.03,4,4,0,3,0,1,55,1,4,4,NA +63328,7,2,2,61,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,9793.924718,10231.212981,2,100,10,10,2.75,5,5,1,1,1,1,27,1,3,1,5 +63329,7,2,2,71,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,27842.199551,30775.350974,1,90,15,15,5,2,2,0,0,2,1,74,1,3,1,3 +63330,7,2,1,51,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,174520.785302,174976.353865,1,95,12,12,NA,6,6,2,0,0,2,42,1,2,1,5 +63331,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,64048.78027,64245.709042,2,95,6,6,1.98,2,2,0,0,0,1,35,1,4,1,4 +63332,7,2,2,15,NA,1,1,1,15,186,NA,NA,1,1,NA,8,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,18515.058419,19360.671834,2,96,6,6,0.87,6,6,1,3,0,1,46,2,1,1,1 +63333,7,2,1,66,NA,4,4,1,NA,NA,2,NA,2,2,8,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,8062.039403,9043.239367,2,103,4,4,0.99,2,2,0,0,2,1,66,2,3,1,NA +63334,7,2,1,9,NA,1,1,1,9,113,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10890.103352,10791.203193,1,102,4,4,0.61,5,5,2,2,0,2,27,2,2,5,NA +63335,7,2,1,10,NA,4,4,2,10,126,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10665.048307,10877.092316,1,96,6,6,1.52,3,3,0,1,0,2,44,1,3,1,3 +63336,7,2,2,9,NA,5,6,1,9,109,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7932.110938,8316.610099,3,92,8,8,0.91,7,7,3,3,1,1,61,NA,NA,1,4 +63337,7,2,1,75,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,8544.243039,9048.352361,2,95,12,12,NA,3,3,0,0,3,2,73,1,2,1,1 +63338,7,2,1,31,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,28996.250643,28582.932384,2,101,14,14,5,1,1,0,0,0,1,31,1,4,3,NA +63339,7,2,2,41,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,15747.833197,16391.584661,3,91,14,14,4.03,4,4,0,2,0,1,51,2,4,1,5 +63340,7,2,1,26,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,34099.599202,34629.549705,1,94,5,5,0.74,5,5,1,1,0,2,24,1,3,1,4 +63341,7,2,1,48,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,18094.847125,18029.478224,3,91,14,9,2.68,6,4,0,0,2,1,48,2,1,1,1 +63342,7,2,1,10,NA,5,6,2,11,132,NA,NA,2,2,2,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8151.552109,8792.161653,1,90,14,14,3.98,3,3,0,1,0,1,33,2,5,1,5 +63343,7,2,2,45,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,129042.31855,133624.984525,2,101,14,14,4.86,3,3,0,1,0,1,53,1,4,1,5 +63344,7,2,2,26,NA,1,1,1,NA,NA,2,NA,2,2,1,NA,5,1,2,2,1,2,2,2,2,NA,NA,NA,NA,46606.430863,48004.221702,2,102,15,15,5,3,3,1,0,0,1,41,2,2,1,5 +63345,7,2,2,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,10346.035773,10598.2543,1,99,6,6,1.84,2,2,0,0,2,1,69,1,3,1,4 +63346,7,2,1,36,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,51543.062078,54553.310105,3,92,4,4,0.55,6,6,0,4,0,1,36,2,1,1,3 +63347,7,2,2,15,NA,1,1,1,15,189,NA,NA,1,1,NA,9,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,26325.414456,26852.811114,3,92,7,7,1.3,5,5,1,2,0,2,33,2,2,1,1 +63348,7,1,2,10,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14716.367649,0,1,91,6,6,1.13,6,6,1,3,0,1,40,1,4,6,NA +63349,7,2,2,48,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,3,1,NA,1,2,1,1,2,2,NA,NA,NA,NA,20001.392001,24207.190257,1,100,99,99,NA,6,6,0,1,0,1,53,2,2,1,3 +63350,7,2,1,16,NA,3,3,1,16,199,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,103085.56884,109495.609645,1,94,15,15,5,4,4,0,1,0,1,41,1,5,1,5 +63351,7,2,2,30,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,26465.930618,26744.909102,2,100,14,14,4.59,3,3,1,0,0,1,30,NA,NA,1,4 +63352,7,2,1,64,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,125558.167126,127168.668278,1,94,15,15,5,4,3,0,0,1,1,33,1,2,5,NA +63353,7,2,2,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,36283.627734,36608.028102,1,102,5,1,0.21,5,4,1,1,0,2,24,1,4,5,NA +63354,7,2,2,1,17,4,4,1,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7881.296797,8480.363269,2,96,14,14,3.06,5,5,1,1,1,2,54,1,3,6,NA +63355,7,2,2,9,NA,1,1,2,9,113,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17596.36942,17952.278489,1,97,4,4,0.65,4,4,0,1,0,2,45,2,2,3,NA +63356,7,1,2,70,NA,1,1,NA,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,29145.675285,0,3,92,6,6,1.98,2,2,0,0,2,1,72,1,4,1,1 +63357,7,2,2,31,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,2,6,2,2,2,2,1,2,2,2,2,2,2,31460.18163,32805.207594,2,97,4,4,0.6,6,6,2,2,0,1,35,2,2,6,NA +63358,7,2,1,6,NA,3,3,2,6,82,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14307.565788,15198.555052,3,91,3,3,0.76,3,3,0,1,0,2,24,1,4,6,NA +63359,7,1,1,1,16,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,50163.512784,0,1,90,15,15,5,4,4,1,1,0,2,39,1,5,1,4 +63360,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,35525.197101,39677.515157,2,93,8,8,2.17,4,4,0,0,2,2,62,1,4,3,NA +63361,7,2,1,67,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,8232.241159,8296.636338,2,98,6,6,2.75,1,1,0,0,1,1,67,1,5,2,NA +63362,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,38758.039282,0,1,101,6,6,1.78,2,2,0,0,2,1,80,1,2,1,1 +63363,7,2,2,65,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,35475.142583,36054.748166,2,95,2,2,0.75,1,1,0,0,1,2,65,1,5,5,NA +63364,7,2,2,73,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,21143.97379,23464.841482,1,97,12,12,NA,4,4,0,0,2,1,72,1,2,1,3 +63365,7,2,2,50,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,2,1,NA,2,2,2,1,2,2,2,2,1,2,23200.373382,24548.135184,2,93,5,5,1.26,3,3,0,1,0,1,55,2,2,1,2 +63366,7,2,2,11,NA,4,4,2,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5221.819282,5505.774343,3,90,3,3,0.37,5,5,2,2,0,2,36,2,4,4,NA +63367,7,2,1,10,NA,4,4,1,10,126,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8714.559478,8865.734494,2,96,12,10,2.17,7,6,2,3,0,1,29,1,4,3,NA +63368,7,2,2,6,NA,1,1,2,6,82,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14300.71869,15541.734563,2,94,9,9,2.1,5,5,1,2,0,1,31,2,4,1,4 +63369,7,2,2,10,NA,4,4,2,10,122,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8147.287486,8590.325322,2,90,8,8,2.59,3,3,0,2,0,2,35,1,4,6,NA +63370,7,2,2,6,NA,2,2,2,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15148.721588,15796.67129,2,91,2,2,0.22,4,4,0,3,0,2,45,2,5,4,NA +63371,7,2,1,13,NA,4,4,2,13,161,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10162.625863,10628.468372,2,99,5,5,0.78,5,5,2,2,0,2,30,1,3,5,NA +63372,7,2,2,2,NA,1,1,2,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9955.153132,10271.907274,2,94,77,77,NA,4,4,2,0,0,2,27,2,3,1,3 +63373,7,2,2,29,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,16937.04417,23964.143641,2,98,9,9,3.83,2,2,1,0,0,2,29,2,4,5,NA +63374,7,2,1,57,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,1,16851.334496,17374.092058,2,95,4,4,1.34,1,1,0,0,0,1,57,1,1,2,NA +63375,7,2,2,17,NA,3,3,2,17,206,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,22046.14863,22741.646094,2,97,9,9,1.45,7,7,1,2,2,2,45,1,3,5,NA +63376,7,2,1,10,NA,1,1,1,10,124,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,14042.313177,2,98,3,3,0.54,3,3,0,2,0,2,35,1,3,5,NA +63377,7,2,2,39,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,28315.055559,38147.595136,1,96,15,15,5,2,2,0,1,0,2,39,1,4,5,NA +63378,7,2,2,18,NA,1,1,1,19,228,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,16360.434077,17687.989006,3,91,5,5,1.03,4,4,0,2,0,2,42,2,1,5,NA +63379,7,2,1,10,NA,4,4,2,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9983.293162,10181.782419,1,97,4,4,0.46,7,7,3,3,0,2,31,1,3,1,NA +63380,7,2,1,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,126789.52929,134450.153638,1,101,7,7,1.74,4,4,1,0,0,1,24,NA,NA,1,4 +63381,7,2,1,3,NA,4,4,2,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8947.841984,9500.1974,1,91,15,15,5,6,6,1,2,0,2,42,2,5,1,5 +63382,7,2,1,39,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,2,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,30626.581617,30979.39441,2,90,6,6,0.96,5,5,1,1,0,1,39,2,2,1,NA +63383,7,2,1,51,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,16287.780872,16801.552467,2,97,6,6,1.7,2,2,0,0,1,2,62,2,5,1,2 +63384,7,2,2,13,NA,5,7,2,13,161,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9508.389138,9876.066661,3,91,8,8,2.24,4,4,0,2,0,1,45,1,4,1,4 +63385,7,2,2,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,15369.196003,16055.414165,1,100,15,5,2.15,2,1,0,0,2,1,60,1,5,6,NA +63386,7,2,1,7,NA,2,2,1,7,91,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13927.458372,13742.678011,2,98,6,6,0.78,7,7,1,3,1,2,63,1,2,4,NA +63387,7,2,1,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,25989.236835,2,101,99,1,0.23,3,1,0,0,0,1,20,1,4,5,NA +63388,7,2,2,44,NA,4,4,2,NA,NA,2,NA,2,1,4,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,19100.40225,19426.955733,1,96,4,4,0.65,4,4,0,0,0,1,19,1,4,NA,NA +63389,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,31313.292063,33738.773556,1,95,6,6,1.65,2,2,0,0,2,2,80,1,4,1,4 +63390,7,2,1,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,151766.599459,154414.492891,3,91,7,7,1.97,4,4,0,0,1,2,77,1,5,2,NA +63391,7,2,1,70,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,4,1,NA,2,2,2,1,2,2,1,2,2,NA,11755.776731,12180.618102,3,90,7,7,1.15,7,7,2,1,1,2,30,1,9,1,4 +63392,7,2,2,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,63717.895797,67578.761066,2,101,5,5,1.36,2,2,0,0,0,2,22,1,4,5,NA +63393,7,2,2,19,NA,1,1,1,19,236,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,20502.928313,21226.512457,2,98,8,8,2.24,4,4,0,0,0,1,58,2,1,1,3 +63394,7,2,1,16,NA,5,6,1,16,197,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,5873.088927,6275.048272,1,103,6,6,1.82,2,2,0,1,0,2,56,2,5,77,NA +63395,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,37456.985636,43133.944105,2,98,99,99,NA,2,2,0,0,2,2,80,1,3,1,1 +63396,7,2,1,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,26344.362464,28384.957988,2,95,13,13,NA,2,2,0,0,2,1,80,1,5,1,3 +63397,7,2,2,51,NA,5,6,2,NA,NA,2,NA,2,2,1,NA,2,1,NA,1,2,1,1,2,1,1,2,1,NA,17018.449206,17108.441787,2,91,99,1,0,7,3,0,4,0,1,36,2,9,1,2 +63398,7,2,2,78,NA,5,6,1,NA,NA,2,NA,2,2,8,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,11812.364238,12222.042847,1,94,4,4,1.11,2,2,0,0,1,2,37,2,4,5,NA +63399,7,2,2,5,NA,1,1,1,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18078.669459,18848.958225,1,101,2,2,0.26,5,5,3,0,0,2,26,1,2,1,3 +63400,7,2,2,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,14883.664782,14924.287598,2,90,8,6,1.46,4,3,1,1,0,2,21,1,5,6,NA +63401,7,2,2,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,163194.688032,166437.638141,1,97,8,8,3.57,2,2,0,0,0,2,49,1,3,3,NA +63402,7,2,2,27,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,16844.740449,18124.20195,3,91,14,14,5,2,2,0,0,0,2,27,2,5,1,5 +63403,7,2,1,8,NA,4,4,1,8,104,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10199.928366,10684.945227,1,100,9,9,2.22,5,5,1,2,0,2,40,2,4,1,4 +63404,7,2,2,48,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,20048.680628,21335.7055,2,95,2,2,0.46,3,3,0,0,0,2,48,1,2,1,2 +63405,7,2,2,74,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,15914.916287,16468.700526,2,99,8,8,3.47,2,2,0,0,1,2,74,1,5,2,NA +63406,7,2,1,12,NA,4,4,2,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12462.601191,12584.643654,2,97,5,5,0.92,5,5,0,3,0,2,54,1,3,2,NA +63407,7,2,2,16,NA,4,4,1,16,203,NA,NA,1,1,NA,66,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12531.903464,13043.632492,2,100,4,4,0.86,3,3,0,2,0,2,36,1,3,5,NA +63408,7,2,1,8,NA,4,4,1,8,107,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13818.701911,14596.53451,2,101,2,2,0.38,3,3,0,2,0,2,56,1,3,2,NA +63409,7,2,2,69,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,11009.072628,11500.615946,2,99,2,2,0.53,2,2,0,0,1,2,69,1,4,2,NA +63410,7,2,1,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,16995.648055,16598.645683,2,100,3,3,0.27,7,7,2,1,0,2,41,1,2,5,NA +63411,7,2,1,19,NA,4,4,2,19,233,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13863.378072,13964.345562,1,96,15,15,5,4,4,0,1,0,1,56,1,4,1,5 +63412,7,2,2,40,NA,2,2,2,NA,NA,2,NA,2,2,5,NA,2,1,2,2,2,2,2,2,2,2,2,2,2,26889.724138,34281.795458,3,90,12,12,NA,2,2,0,1,0,2,40,2,2,1,NA +63413,7,2,2,61,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,4,NA,1,2,2,1,2,2,1,2,2,1,128294.718377,130390.843426,1,93,15,15,5,4,3,0,0,3,1,80,1,5,2,NA +63414,7,2,2,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,113642.287024,118593.484239,1,101,9,6,2.24,2,1,0,0,0,2,27,1,3,6,NA +63415,7,2,1,18,NA,4,4,2,18,226,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8364.097643,8526.060452,2,90,6,6,0.96,5,5,0,1,0,1,55,1,4,6,NA +63416,7,2,2,7,NA,4,4,1,7,91,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7619.934086,8032.046596,1,103,8,8,1.95,4,4,0,1,0,2,48,1,5,1,5 +63417,7,2,2,6,NA,3,3,1,6,78,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,61745.495006,63723.291316,2,101,7,7,1.88,4,4,0,2,0,2,36,1,4,1,5 +63418,7,2,2,40,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,26595.398371,26963.678025,1,91,7,7,2.2,3,3,0,0,1,2,60,1,2,2,NA +63419,7,2,1,2,NA,3,3,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25107.382643,27005.542261,1,95,6,6,1.35,3,3,1,0,0,1,22,1,3,1,4 +63420,7,2,1,62,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,21963.854411,22169.845613,2,102,3,3,1.29,1,1,0,0,1,1,62,1,4,3,NA +63421,7,2,1,13,NA,5,6,1,13,162,NA,NA,1,1,NA,6,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,8915.81491,9312.338117,1,92,2,2,0.33,5,5,0,1,0,1,51,2,1,4,NA +63422,7,2,2,64,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,12043.867275,12495.995159,1,91,1,1,0.05,2,1,0,0,2,1,72,1,1,3,NA +63423,7,2,2,2,NA,5,6,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6097.947961,6201.918891,1,96,15,15,4.34,4,4,1,1,0,1,36,2,5,1,5 +63424,7,2,1,17,NA,1,1,1,17,206,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,24902.864049,25040.491572,1,94,5,5,0.94,4,4,0,2,0,2,37,2,3,1,2 +63425,7,2,1,51,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,1,1,2,2,1,2,2,1,13567.923118,13997.525763,3,91,14,14,4.03,4,4,0,2,0,1,51,2,4,1,5 +63426,7,2,1,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,32720.69734,33802.428047,1,95,3,3,1.19,1,1,0,0,0,1,58,1,2,3,NA +63427,7,2,2,9,NA,4,4,2,9,113,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9565.802332,9920.158544,1,96,14,14,2.58,6,6,2,2,0,1,40,2,4,1,4 +63428,7,2,2,13,NA,1,1,1,13,162,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18515.058419,19360.671834,2,96,6,6,1.12,4,4,0,3,0,1,26,1,2,77,NA +63429,7,2,1,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,16386.190684,20296.970403,2,90,7,4,1.38,2,1,0,0,0,1,21,1,3,5,NA +63430,7,2,2,4,NA,3,3,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,26966.264969,27815.81123,1,98,6,6,1.31,3,3,2,0,0,2,22,1,3,5,NA +63431,7,2,1,34,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,2,5,NA,1,2,2,1,2,1,1,2,2,1,20071.705576,20599.326983,3,91,6,6,1.12,4,4,0,0,2,1,69,2,3,1,1 +63432,7,2,1,7,NA,3,3,1,7,89,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,48147.167375,49984.220445,3,92,14,14,2.74,6,6,2,2,0,1,35,1,5,1,4 +63433,7,2,1,4,NA,5,7,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10276.262805,11525.953064,1,97,15,15,4.77,4,4,1,1,0,2,40,1,5,1,5 +63434,7,2,2,23,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,16844.740449,17849.904413,3,91,6,3,1.1,3,1,0,0,0,2,23,1,5,5,NA +63435,7,2,1,14,NA,3,3,2,14,177,NA,NA,2,1,1,9,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,26858.274239,26923.130249,2,97,5,5,0.8,5,5,1,2,0,1,46,2,4,1,2 +63436,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,1,2,1,2,2,1,2,2,NA,29302.509441,32727.496464,2,95,15,15,5,3,3,0,0,1,2,47,1,4,1,5 +63437,7,2,2,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,128575.977224,133862.807708,1,102,6,4,1.38,2,1,0,0,0,2,23,1,5,5,NA +63438,7,2,1,64,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,6358.062034,6283.458155,1,99,15,15,5,2,2,0,0,2,1,64,1,3,1,4 +63439,7,2,1,51,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,23431.677775,25582.72205,2,93,10,10,3.4,3,3,0,1,0,1,51,1,3,1,4 +63440,7,2,1,70,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,NA,54654.899954,58048.628546,2,103,6,6,1.82,2,2,0,0,2,1,70,1,2,5,NA +63441,7,2,1,3,NA,2,2,2,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14505.510202,14676.996441,2,94,6,6,1.43,5,4,2,1,0,2,23,2,3,6,NA +63442,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,24719.680932,27438.0417,1,97,4,4,1.61,1,1,0,0,0,2,51,1,4,3,NA +63443,7,2,2,8,NA,3,3,2,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24070.467912,23750.822126,1,95,7,7,1.17,6,6,1,3,0,2,44,1,4,1,NA +63444,7,2,2,0,2,3,3,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8317.65714,8487.710712,1,94,7,7,1.21,6,6,2,2,0,1,31,1,2,6,NA +63445,7,2,2,21,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,41527.748953,47819.246487,2,97,1,1,0.01,1,1,0,0,0,2,21,1,4,5,NA +63446,7,2,1,16,NA,4,4,1,16,195,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,18260.901254,2,101,6,6,1.16,4,4,0,3,0,2,36,1,4,4,NA +63447,7,2,2,9,NA,4,4,2,9,116,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8192.839047,8748.579234,2,99,5,5,1.32,2,2,0,1,0,2,34,1,4,5,NA +63448,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,47098.572584,49762.054694,1,95,5,5,1.43,2,2,0,0,2,1,80,1,3,1,4 +63449,7,2,1,38,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,2,1,NA,2,2,2,1,2,2,1,2,2,2,41241.224595,43514.733172,2,102,6,6,1.12,4,4,1,1,0,1,38,2,2,1,3 +63450,7,2,1,6,NA,2,2,1,6,83,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16028.98796,15961.028504,1,100,14,14,4.71,3,3,0,1,0,1,38,1,5,1,5 +63451,7,2,1,28,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,16088.355002,17176.258631,2,100,13,13,NA,2,2,0,0,1,2,71,NA,NA,1,NA +63452,7,2,1,11,NA,4,4,1,11,136,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10469.725162,11059.049224,1,100,1,1,0,4,4,1,2,0,2,35,1,2,5,NA +63453,7,2,1,48,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,1,1,2,1,2,2,2,2,46745.699003,49120.516055,2,98,13,13,NA,5,5,0,2,0,1,48,2,1,1,2 +63454,7,2,1,13,NA,5,6,2,13,159,NA,NA,2,2,1,6,NA,NA,NA,1,1,1,NA,NA,NA,1,2,1,NA,10346.302718,11892.421636,2,91,99,1,0,7,3,0,4,0,1,36,2,9,1,2 +63455,7,2,1,30,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,24134.97174,25042.42256,1,98,7,7,1.52,4,4,2,0,0,1,30,1,3,1,4 +63456,7,2,1,80,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,17609.976188,17569.292452,1,98,15,15,5,5,5,0,1,1,2,55,1,5,1,5 +63457,7,2,1,28,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25807.688156,26357.095043,1,100,15,15,5,4,4,0,0,0,1,54,1,5,1,5 +63458,7,2,2,0,1,2,2,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8264.844608,8090.961959,1,93,14,14,4.75,3,3,1,0,0,2,42,1,5,1,5 +63459,7,2,2,80,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,1,2,NA,2,2,2,2,2,2,2,2,2,NA,21933.218587,23583.994398,1,93,2,2,0.43,2,2,0,0,2,2,80,2,1,2,NA +63460,7,2,1,2,NA,3,3,2,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,47507.757497,54303.981935,1,101,6,6,0.97,7,7,2,1,0,1,43,1,2,1,NA +63461,7,2,2,21,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,34906.069211,35828.732314,1,103,1,1,0.03,3,3,0,0,0,1,50,1,2,3,NA +63462,7,2,2,18,NA,4,4,2,18,218,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10484.6104,10912.740054,2,99,3,3,0.56,4,4,1,0,0,2,38,1,3,5,NA +63463,7,2,2,61,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,117419.769432,119338.215671,2,95,15,15,5,2,2,0,0,1,1,54,NA,NA,1,4 +63464,7,2,1,7,NA,1,1,1,7,90,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17882.621856,18280.794545,3,92,8,8,1.55,6,6,1,3,0,2,38,1,5,1,4 +63465,7,2,1,55,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,16287.780872,16385.890285,2,100,10,10,4.42,2,2,0,0,0,2,55,1,2,1,4 +63466,7,2,1,49,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,17602.101156,21123.738865,1,96,14,14,2.19,7,7,0,2,0,1,39,1,2,1,3 +63467,7,2,2,10,NA,4,4,1,11,132,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,9139.784234,11102.911341,2,100,6,6,0.85,6,6,0,2,0,1,59,1,3,1,3 +63468,7,2,1,55,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,26135.885159,26052.523863,2,101,NA,99,NA,2,1,0,0,0,1,55,1,2,3,NA +63469,7,2,2,14,NA,2,2,2,14,175,NA,NA,1,1,NA,9,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,14437.97544,16344.869867,2,90,3,3,0.38,5,5,0,4,0,2,33,2,2,5,NA +63470,7,2,1,2,NA,4,4,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6580.937346,7256.559544,2,97,3,3,0.66,3,3,2,0,0,2,19,1,3,NA,NA +63471,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,37074.886861,38850.492434,1,101,3,3,1.07,2,1,0,0,1,2,65,1,3,3,NA +63472,7,2,2,24,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,137368.929197,138597.101074,1,91,6,5,1.93,2,1,0,0,0,2,24,1,4,6,NA +63473,7,2,1,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,31313.292063,33738.773556,1,101,7,7,2.31,2,2,0,0,2,2,80,1,4,1,2 +63474,7,2,1,74,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,74761.468834,80588.325624,2,91,7,7,1.89,3,3,0,0,2,2,69,NA,NA,1,4 +63475,7,2,1,41,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,111236.95715,110951.212312,2,92,15,6,2.75,2,1,0,0,0,1,41,1,4,5,NA +63476,7,2,2,15,NA,1,1,1,15,187,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21818.047789,22469.060223,2,102,5,5,0.89,4,4,1,2,0,2,36,2,5,3,NA +63477,7,2,2,4,NA,4,4,2,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10389.292229,10657.457488,2,95,6,6,1.65,2,2,1,0,0,2,27,2,4,5,NA +63478,7,2,1,62,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,22184.040999,24922.402846,2,94,3,3,1.01,1,1,0,0,1,1,62,1,3,3,NA +63479,7,2,2,19,NA,4,4,1,19,235,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,1,1,0.16,4,1,0,0,0,2,21,1,4,5,NA +63480,7,2,2,9,NA,1,1,1,9,117,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,10870.942302,10985.231575,2,103,5,5,0.89,5,5,1,3,0,2,34,2,1,99,NA +63481,7,2,2,48,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,3,1,NA,2,2,2,2,2,2,1,2,2,2,33737.181071,40665.783943,2,91,4,4,0.76,4,4,1,0,0,2,25,2,4,77,NA +63482,7,2,2,43,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,22513.236051,21981.065205,1,91,10,10,2.56,5,5,0,3,0,1,51,2,5,1,4 +63483,7,2,1,2,NA,1,1,1,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14810.484787,14830.497242,1,92,10,10,3.04,4,4,2,0,0,2,37,2,5,1,5 +63484,7,2,1,69,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,95193.424282,96086.209653,1,100,5,5,2.15,1,1,0,0,1,1,69,1,5,3,NA +63485,7,2,1,42,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,18668.895165,18966.875509,1,96,13,13,NA,5,5,1,1,0,1,42,1,3,5,NA +63486,7,2,2,11,NA,2,2,2,11,134,NA,NA,2,1,4,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15583.587534,16935.930722,1,93,15,15,5,4,4,0,2,0,1,50,1,5,1,5 +63487,7,2,1,17,NA,4,4,1,17,208,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12972.249316,12937.057154,1,100,15,15,3.7,5,5,0,3,0,1,51,1,5,1,5 +63488,7,2,2,6,NA,4,4,1,6,78,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9309.947844,9654.826166,2,98,14,14,3.36,4,4,0,2,0,1,37,1,4,1,4 +63489,7,2,1,32,NA,3,3,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,85455.763827,87903.260844,2,92,10,10,4.89,2,2,0,0,0,2,34,2,5,1,5 +63490,7,2,2,11,NA,3,3,1,11,140,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,58636.069237,60514.266203,1,100,6,6,1.18,5,5,2,2,0,2,40,1,5,3,NA +63491,7,2,1,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,146951.437687,151959.13798,1,98,15,15,5,3,3,0,0,0,1,56,1,5,1,5 +63492,7,1,1,69,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,7101.739553,0,2,100,3,3,0.92,1,1,0,0,1,1,69,1,2,3,NA +63493,7,2,2,65,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,3,1,NA,1,2,1,1,2,2,1,2,1,3,14388.729229,15167.035829,1,90,2,2,0.33,2,2,0,0,2,2,65,2,3,1,5 +63494,7,2,2,0,2,4,4,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5099.47985,5303.483608,1,100,9,9,1.78,6,6,1,1,0,2,45,2,3,1,3 +63495,7,2,2,14,NA,5,7,1,14,170,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11419.438939,11635.655291,2,103,7,7,1.55,5,5,2,2,0,2,31,1,4,3,NA +63496,7,2,1,58,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,124170.603852,125419.736085,2,98,6,6,2.57,1,1,0,0,0,1,58,1,3,3,NA +63497,7,2,2,1,21,2,2,2,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10047.902864,11093.154402,1,90,3,3,0.43,4,4,2,0,0,1,31,1,3,6,NA +63498,7,2,2,70,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,20131.904783,23186.488493,2,98,6,6,3.01,1,1,0,0,1,2,70,1,3,2,NA +63499,7,2,2,33,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,1,6,2,2,2,2,2,2,2,2,2,2,2,38218.668882,37878.487888,2,102,8,8,1.09,7,7,1,3,0,2,33,2,1,6,NA +63500,7,2,1,53,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,15001.445077,15740.201709,2,99,7,7,1.63,4,4,0,2,0,1,53,1,3,3,NA +63501,7,1,2,4,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13366.393396,0,3,92,8,8,2.01,4,4,1,0,0,2,49,2,5,4,NA +63502,7,2,1,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,11245.778093,11672.431186,1,98,5,5,1.59,2,2,0,0,2,1,73,1,2,1,5 +63503,7,2,2,3,NA,3,3,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,52983.056893,55223.723565,3,91,15,15,5,6,6,1,3,0,2,40,1,5,1,5 +63504,7,2,1,54,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,1,1,NA,1,2,2,1,2,2,1,2,2,2,33162.406014,34847.152396,3,92,10,10,4.3,5,2,2,1,1,2,68,1,3,1,1 +63505,7,2,2,0,1,3,3,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23207.538828,22577.170535,1,94,9,9,2.88,3,3,1,0,0,2,28,1,4,1,4 +63506,7,2,2,60,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,1,4,NA,2,2,2,2,2,2,1,2,2,2,9716.805546,10308.451947,2,90,5,5,1.19,3,3,1,0,1,2,60,2,1,4,NA +63507,7,2,2,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,8308.628726,9015.10987,2,95,3,3,1.31,1,1,0,0,1,2,61,1,2,2,NA +63508,7,2,2,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,4,2,1,2,2,1,2,2,1,2,2,1,21340.150623,22501.62641,2,95,4,4,1.19,2,2,0,0,0,2,34,1,4,4,NA +63509,7,2,1,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,104488.914565,106745.836574,1,98,3,2,0.54,4,1,0,0,0,1,20,1,4,5,NA +63510,7,2,2,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,196995.351093,199029.909449,1,91,15,15,5,2,2,0,0,0,2,56,1,4,1,4 +63511,7,2,1,1,17,3,3,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24594.444896,28818.16319,1,98,4,4,0.66,4,4,2,0,0,2,22,1,4,6,NA +63512,7,2,1,64,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,11019.434708,12461.093856,3,91,4,4,0.81,3,3,0,0,1,1,64,2,1,1,1 +63513,7,1,1,17,NA,4,4,NA,NA,NA,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,15360.522091,0,1,98,15,15,5,4,4,0,2,0,2,50,1,5,1,5 +63514,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,13399.379222,14870.161708,2,90,4,4,1.2,2,2,0,0,2,2,80,1,3,2,NA +63515,7,2,2,54,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,220233.315202,222507.874115,1,98,14,14,5,2,2,0,0,0,1,59,1,3,1,4 +63516,7,2,2,54,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,1,4,NA,2,2,2,1,2,2,2,2,2,2,22632.809716,22750.837894,1,103,3,3,0.79,2,2,0,0,0,2,54,2,1,4,NA +63517,7,2,1,10,NA,5,7,1,10,123,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7101.095162,7528.512272,2,96,15,15,5,3,3,0,2,0,2,42,2,5,4,NA +63518,7,2,2,3,NA,1,1,2,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13366.393396,13935.903375,2,94,6,6,1.15,5,5,1,2,0,1,33,1,2,1,2 +63519,7,2,1,54,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,5,4,NA,1,2,2,1,2,2,1,2,2,1,16117.991297,16620.832307,1,96,9,9,2.78,4,4,0,2,0,1,54,2,5,4,NA +63520,7,2,2,0,0,3,3,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21671.775435,21083.121886,1,98,15,15,4.77,4,4,2,0,0,1,35,1,4,1,5 +63521,7,2,2,15,NA,5,6,2,15,183,NA,NA,2,1,4,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8424.942002,9162.575745,2,94,15,15,5,3,3,0,1,0,1,50,1,2,1,4 +63522,7,2,1,6,NA,4,4,2,6,82,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8579.422451,8987.382625,1,99,3,3,0.75,2,2,0,1,0,2,41,1,5,77,NA +63523,7,2,2,22,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,128171.594518,135937.909642,1,93,14,7,3.95,2,1,0,0,0,2,26,1,5,5,NA +63524,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,24072.616069,27614.797496,1,100,4,4,1.16,2,2,0,0,2,1,73,1,3,1,3 +63525,7,1,2,52,NA,5,6,NA,NA,NA,2,NA,2,2,4,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,12649.084278,0,3,90,77,77,NA,3,3,0,0,0,1,56,NA,NA,1,5 +63526,7,2,2,9,NA,3,3,1,9,114,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,51531.402068,50847.086518,1,94,14,14,2.96,5,5,0,3,0,2,39,1,4,1,3 +63527,7,2,2,11,NA,1,1,1,11,140,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,12215.503444,12532.555214,1,102,5,5,0.92,5,5,0,3,0,2,39,2,3,1,3 +63528,7,2,2,34,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,43717.124878,44415.60038,2,101,6,6,1.54,3,3,0,1,0,2,34,1,4,1,3 +63529,7,2,1,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,31962.323978,33458.586112,1,100,4,4,1.06,3,2,0,0,0,1,22,1,4,6,NA +63530,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,28483.117096,31921.002992,1,94,3,3,0.39,6,6,1,0,2,1,80,1,4,1,3 +63531,7,2,1,32,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,6,NA,2,2,2,1,2,2,1,2,2,2,54969.430704,54554.559034,1,100,NA,13,NA,2,1,0,0,0,1,32,2,1,6,NA +63532,7,2,1,54,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,19752.546483,23704.421997,1,102,12,12,NA,7,7,3,2,0,2,52,1,4,5,NA +63533,7,2,2,32,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,6,3,1,2,2,1,2,2,1,2,2,1,41791.57979,40949.069487,2,102,15,12,NA,5,4,0,3,0,1,42,2,4,6,NA +63534,7,2,1,39,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,43108.74283,42783.38752,2,91,8,8,1.85,5,5,0,2,1,1,39,2,3,1,4 +63535,7,2,2,3,NA,2,2,2,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9793.360626,10410.766193,3,90,15,15,4.2,6,6,1,0,2,1,60,1,5,1,4 +63536,7,2,1,0,0,3,3,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12105.109893,11778.058179,1,95,6,6,0.83,7,6,2,1,0,1,43,1,4,1,4 +63537,7,2,1,1,19,5,6,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5492.796032,5818.035599,2,100,4,4,0.5,6,6,2,1,0,1,30,2,4,1,3 +63538,7,2,2,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16519.058735,17573.900053,1,97,77,77,NA,3,3,0,0,3,2,62,1,5,1,NA +63539,7,2,1,14,NA,1,1,1,14,173,NA,NA,2,2,2,8,NA,NA,NA,2,1,2,1,2,2,1,2,2,2,30061.88611,30025.949584,1,92,5,5,0.87,4,4,0,2,0,1,42,2,1,1,4 +63540,7,2,2,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,27944.169354,28022.783308,2,94,3,3,1.24,1,1,0,0,0,2,57,1,2,3,NA +63541,7,2,1,70,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,1,1,NA,2,2,2,1,2,2,1,2,2,NA,11755.776731,15354.199056,3,90,12,12,NA,6,6,0,0,2,1,70,2,1,1,1 +63542,7,2,2,2,NA,3,3,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14718.567269,15747.930582,2,97,3,3,0.43,6,6,1,3,0,2,36,2,4,3,NA +63543,7,2,2,74,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,13446.397433,13865.38304,1,93,13,13,NA,2,2,0,0,2,1,76,2,5,1,5 +63544,7,2,1,63,NA,2,2,2,NA,NA,2,NA,2,2,7,NA,4,1,NA,2,2,2,2,2,2,1,2,2,2,9357.880765,9682.593718,2,90,10,10,2,7,7,0,3,1,1,63,2,4,1,NA +63545,7,2,2,56,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19319.908753,18964.846355,1,96,15,15,5,2,2,0,0,0,1,56,1,5,1,5 +63546,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,30212.098573,35047.752261,2,98,3,3,1.1,1,1,0,0,1,2,80,1,1,2,NA +63547,7,2,1,0,4,4,4,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5825.739712,6022.371944,1,99,2,2,0.43,3,3,2,0,0,2,26,1,4,5,NA +63548,7,2,2,0,10,3,3,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9189.424641,8939.81946,1,98,6,6,1.65,2,2,1,0,0,2,24,1,3,4,NA +63549,7,2,2,5,NA,4,4,2,5,71,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9404.475744,9924.78953,1,96,14,14,2.58,6,6,2,2,0,1,40,2,4,1,4 +63550,7,2,2,51,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,14834.928147,19579.353906,1,103,2,2,0.53,2,2,0,1,0,2,51,1,3,5,NA +63551,7,2,2,54,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,109027.902363,130159.069706,2,103,15,15,3.44,7,7,0,1,2,2,79,1,3,2,NA +63552,7,2,1,50,NA,2,2,1,NA,NA,2,NA,2,2,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,23431.677775,23884.31716,2,93,8,8,2,4,4,1,1,0,1,50,2,4,1,4 +63553,7,2,1,76,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,8155.039267,8917.162886,3,90,99,99,NA,2,2,0,0,1,1,76,2,3,3,NA +63554,7,2,1,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,8895.639271,8965.223759,1,96,15,15,5,2,2,0,0,2,1,61,1,5,1,5 +63555,7,2,1,6,NA,2,2,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13898.598114,14013.214919,2,102,14,14,2.44,7,7,0,2,1,2,71,1,3,3,NA +63556,7,2,1,64,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,9082.311855,9548.677283,1,96,7,7,2.64,2,2,0,0,2,1,64,2,5,1,5 +63557,7,2,1,3,NA,5,7,2,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8981.553859,10073.795326,3,91,15,15,5,3,3,1,0,0,2,39,2,5,1,5 +63558,7,2,2,18,NA,3,3,2,19,228,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,99340.784743,108314.70799,2,94,12,12,NA,5,5,1,1,0,1,37,1,4,1,3 +63559,7,2,1,41,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19184.316833,20543.822351,1,97,15,15,4.77,4,4,1,1,0,1,41,2,4,1,5 +63560,7,2,1,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,116464.874823,116165.700541,2,94,7,7,1.17,6,6,0,3,0,1,40,1,3,1,5 +63561,7,1,2,65,NA,2,2,NA,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,10938.831279,0,1,93,6,6,1.55,3,3,0,0,3,1,61,2,4,1,1 +63562,7,2,1,22,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,43892.276067,44574.41704,1,98,6,1,0,2,1,0,0,0,1,29,1,4,5,NA +63563,7,2,1,16,NA,4,4,2,16,196,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10817.360862,11313.215634,2,99,99,99,NA,5,5,0,2,0,2,20,1,3,6,NA +63564,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,65706.229298,0,1,101,10,10,4.3,2,2,0,0,2,1,80,1,2,1,4 +63565,7,2,2,4,NA,1,1,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12280.58152,13558.091553,2,92,14,14,4.03,4,4,1,1,1,2,30,1,5,4,NA +63566,7,2,2,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,14809.997435,16069.288743,2,101,13,3,0.64,5,4,0,3,1,2,62,1,1,2,NA +63567,7,2,2,9,NA,5,7,2,9,115,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10699.45895,11230.540406,1,97,8,8,2.51,3,3,0,2,0,2,39,2,4,2,NA +63568,7,2,1,19,NA,4,4,1,19,235,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12792.875152,12871.476461,2,93,99,99,NA,7,6,1,0,0,1,19,1,3,NA,NA +63569,7,2,2,49,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,2,2,2,1,2,2,NA,NA,NA,NA,25778.164795,26907.837256,2,90,77,77,NA,3,3,0,0,0,2,49,1,2,5,NA +63570,7,1,1,71,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,17780.584319,0,1,97,3,3,1.16,1,1,0,0,1,1,71,1,2,3,NA +63571,7,2,1,36,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,12031.037329,12793.158379,1,99,10,10,3.99,3,3,0,1,0,1,36,2,2,6,NA +63572,7,2,2,28,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,81857.569857,83650.947594,2,92,15,7,3.67,4,1,0,0,0,1,28,1,5,5,NA +63573,7,2,2,20,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,22154.886709,21259.816918,1,96,7,7,2.31,2,2,0,0,1,2,62,1,3,3,NA +63574,7,2,1,6,NA,5,6,2,6,78,NA,NA,1,1,NA,1,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,5399.929397,5851.470272,3,91,15,15,5,3,3,0,1,0,2,40,2,5,1,5 +63575,7,2,1,19,NA,5,6,2,19,233,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6999.347953,7445.743611,3,90,9,9,2.6,4,4,0,0,1,1,62,2,4,1,5 +63576,7,2,1,46,NA,2,2,2,NA,NA,2,NA,2,1,5,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,39244.104141,38781.831565,1,90,15,15,5,4,4,1,1,0,2,40,1,5,6,NA +63577,7,2,2,5,NA,1,1,1,5,68,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12714.663639,13256.404976,1,102,4,4,0.5,6,6,2,2,0,1,25,1,2,1,3 +63578,7,2,2,41,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,17754.413992,17268.517878,2,99,6,6,1.11,5,5,1,2,0,2,41,1,2,5,NA +63579,7,2,1,63,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,120735.071461,122283.708051,1,94,8,8,2.41,3,3,0,0,3,1,63,1,4,1,5 +63580,7,2,1,9,NA,3,3,1,9,114,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24713.905595,25821.327941,1,98,4,4,0.67,5,5,1,2,0,1,29,1,4,1,3 +63581,7,2,2,14,NA,5,6,1,14,173,NA,NA,1,1,NA,8,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,5760.953091,5914.685125,2,92,8,8,1.91,5,5,0,2,1,2,47,2,1,1,3 +63582,7,2,2,57,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,33821.324425,34164.529253,3,92,4,3,0.52,5,4,0,0,0,2,57,1,4,1,2 +63583,7,2,1,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,38321.717684,40488.857872,1,95,15,15,5,2,2,0,0,2,2,80,1,4,1,4 +63584,7,2,2,30,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,33506.462855,33906.169633,2,96,3,3,0.38,5,5,1,2,0,2,30,1,3,5,NA +63585,7,2,2,8,NA,4,4,2,8,99,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7282.523598,8846.731146,2,99,5,5,0.76,5,5,0,2,0,1,51,1,2,1,2 +63586,7,2,1,4,NA,4,4,1,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11342.022131,11810.675983,2,102,4,4,0.53,6,6,2,2,0,2,27,1,2,1,2 +63587,7,2,1,36,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,69063.138927,71041.142737,1,92,8,8,1.45,6,6,1,3,0,1,36,1,3,1,4 +63588,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,41229.806244,46206.205733,1,103,6,6,1.98,2,2,0,0,2,1,80,1,5,1,4 +63589,7,2,1,80,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,1,1,NA,1,2,2,1,2,1,1,2,2,NA,12882.868646,13921.030537,2,92,77,77,NA,5,5,0,1,2,2,80,NA,NA,1,1 +63590,7,2,1,21,NA,4,4,2,NA,NA,2,NA,2,1,5,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,21495.752024,21074.130073,1,91,15,15,5,6,6,1,2,0,2,42,2,5,1,5 +63591,7,2,2,19,NA,5,6,1,19,236,2,NA,2,2,1,13,NA,NA,NA,1,2,1,1,2,1,1,2,2,NA,6145.01663,6308.997467,2,101,12,1,0,5,1,0,0,0,2,19,2,3,NA,NA +63592,7,2,2,2,NA,4,4,2,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6276.300496,6799.229858,2,99,1,1,0.1,6,6,2,3,0,2,31,1,2,5,NA +63593,7,2,1,77,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,73144.179917,78844.986335,2,91,77,77,NA,2,2,0,0,2,2,70,1,3,1,4 +63594,7,2,1,2,NA,4,4,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5256.453918,5473.651288,1,99,5,5,0.84,5,5,2,1,0,1,35,1,3,1,2 +63595,7,2,2,27,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,3,1,2,1,2,1,1,2,1,NA,NA,NA,NA,8044.549611,8388.06604,2,92,10,6,1.12,7,4,1,1,1,2,27,2,3,1,3 +63596,7,2,1,74,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,15470.980685,17519.370244,1,91,7,7,3.58,1,1,0,0,1,1,74,1,2,1,NA +63597,7,2,1,22,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,14385.653726,15564.966804,2,101,14,8,4.59,2,1,0,0,0,1,22,2,4,5,NA +63598,7,2,2,18,NA,2,2,2,18,223,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16189.692833,16766.382859,1,93,15,15,2.96,7,7,0,1,1,2,18,1,2,NA,NA +63599,7,1,2,76,NA,2,2,NA,NA,NA,2,NA,2,1,8,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,18241.877822,0,2,93,7,7,2.31,2,2,0,0,2,1,80,2,1,1,1 +63600,7,2,2,10,NA,3,3,1,10,128,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,51093.739991,50940.240166,2,102,14,14,3.44,5,5,1,2,0,2,34,1,4,6,NA +63601,7,2,1,5,NA,3,3,1,6,73,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,50644.672852,56040.796491,2,100,NA,NA,NA,5,5,1,2,0,1,36,NA,NA,3,NA +63602,7,2,1,34,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,18838.303827,19546.605226,2,93,5,5,1.05,3,3,1,0,0,2,29,1,3,5,NA +63603,7,2,1,11,NA,1,1,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13533.281742,14131.961026,1,100,7,7,1.74,4,4,0,2,0,2,39,2,1,1,3 +63604,7,2,2,2,NA,1,1,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11414.885224,11778.085279,1,94,7,7,1.56,4,4,2,0,0,1,21,1,4,1,4 +63605,7,2,1,7,NA,4,4,2,7,86,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9502.15317,9484.287948,2,97,3,3,0.4,6,6,2,3,0,2,25,1,2,5,NA +63606,7,2,1,6,NA,3,3,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24713.905595,25821.327941,1,98,4,4,0.67,5,5,1,2,0,1,29,1,4,1,3 +63607,7,2,1,41,NA,1,1,2,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,36273.943099,35741.678901,1,90,15,15,4.77,4,4,1,1,0,2,41,1,5,1,2 +63608,7,2,2,71,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,1,4,NA,2,2,2,1,2,2,2,2,1,NA,12972.932238,13949.32349,2,93,6,6,0.98,5,5,0,2,1,1,48,2,5,1,5 +63609,7,2,1,66,NA,5,7,1,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,23761.280382,24066.059994,2,103,5,5,1.2,3,3,0,0,2,1,66,2,2,1,2 +63610,7,2,1,16,NA,4,4,2,16,195,NA,NA,2,2,4,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11523.287163,11546.069645,1,96,8,8,2.17,4,4,0,2,0,2,45,2,5,4,NA +63611,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,1,2,1,2,2,1,2,2,NA,20944.990388,22567.358502,2,95,6,6,1.57,3,3,0,0,2,1,80,1,2,1,4 +63612,7,1,1,5,NA,5,7,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8006.699948,0,1,90,15,15,5,3,3,1,0,0,1,42,1,5,1,5 +63613,7,2,1,30,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,40219.06806,40845.122132,1,93,9,9,3.98,3,2,0,0,0,2,27,1,3,1,4 +63614,7,2,2,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,32144.824104,32969.273278,2,97,2,2,0.77,1,1,0,0,0,2,58,1,2,3,NA +63615,7,2,2,26,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,128171.594518,135937.909642,1,93,14,9,5,2,1,0,0,0,2,26,1,5,5,NA +63616,7,2,1,9,NA,5,6,2,9,115,NA,NA,2,1,3,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6026.102306,6805.584778,2,95,6,6,1.08,4,4,1,1,0,1,39,1,4,1,4 +63617,7,2,1,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,90303.174138,94631.352733,2,92,15,8,4.59,2,1,0,0,0,2,29,1,5,1,NA +63618,7,2,2,2,NA,5,6,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6024.192029,6555.678695,2,103,15,15,5,3,3,1,0,0,1,35,1,9,1,5 +63619,7,2,1,1,21,3,3,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,53040.637714,62149.552868,3,91,8,8,1.95,4,4,2,0,0,2,30,1,5,1,4 +63620,7,2,1,61,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,14113.631895,14272.623174,1,92,9,9,3.97,2,2,0,0,1,2,59,1,5,1,4 +63621,7,2,2,61,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,129999.035519,129559.2554,1,98,14,14,4.96,2,2,0,0,2,1,71,1,5,1,5 +63622,7,2,1,18,NA,3,3,2,18,222,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,88448.252445,87285.839384,1,95,9,9,3.24,3,3,0,0,0,2,42,1,4,3,NA +63623,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,20611.860643,21777.486671,1,99,5,5,2.02,1,1,0,0,1,1,80,1,5,2,NA +63624,7,2,2,7,NA,1,1,1,7,90,NA,NA,2,2,3,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13616.85154,14798.521446,1,102,6,6,1.73,3,3,0,1,0,1,30,2,5,1,4 +63625,7,2,2,23,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,50915.06085,54078.415833,3,92,2,2,0.4,3,3,1,0,0,1,21,1,2,6,NA +63626,7,2,2,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,58826.425292,59007.297053,1,102,8,8,1.6,7,7,0,4,0,2,39,1,4,1,4 +63627,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,56180.550638,62747.143757,2,98,8,8,3.4,2,2,0,0,2,1,80,1,4,1,4 +63628,7,1,1,61,NA,3,3,NA,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,125558.167126,0,3,91,12,5,2.02,2,1,0,0,1,1,61,1,3,3,NA +63629,7,2,2,36,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,15542.93857,15828.161907,3,91,7,7,2.16,3,3,1,0,1,2,36,2,5,1,NA +63630,7,2,2,11,NA,3,3,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,41020.258665,40851.726244,1,103,15,15,5,2,2,0,1,0,1,49,1,5,77,NA +63631,7,2,2,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,85610.546667,92292.669073,2,91,15,15,5,4,4,2,0,0,1,35,1,5,1,5 +63632,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,65891.955175,73297.270697,1,95,7,7,2.72,2,2,0,0,2,1,80,1,3,1,3 +63633,7,2,1,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,13232.135,13189.930654,2,99,15,15,4.9,7,7,1,4,0,2,53,1,5,1,5 +63634,7,2,1,4,NA,4,4,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8890.779467,9258.147648,3,91,1,1,0.07,6,6,2,3,0,2,30,1,2,3,NA +63635,7,2,1,79,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,1,2,NA,55241.951079,58472.387275,2,93,7,7,2.31,2,2,0,0,2,2,79,1,3,1,5 +63636,7,2,1,17,NA,4,4,2,17,204,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11093.269222,11505.803928,2,99,3,3,0.44,5,5,1,1,0,2,53,1,4,1,3 +63637,7,2,1,7,NA,3,3,2,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,74331.764009,78594.005469,2,91,15,15,5,3,3,0,2,0,1,44,2,5,3,NA +63638,7,2,2,47,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,125007.681473,127469.032295,1,92,10,10,3.4,3,3,0,0,0,1,56,1,4,1,5 +63639,7,2,1,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16970.447459,16981.184092,1,99,8,8,1.99,5,5,1,0,0,1,55,1,5,1,2 +63640,7,2,1,0,1,3,3,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6420.20398,6662.903304,2,97,3,3,0.33,6,6,2,0,0,2,22,2,4,1,3 +63641,7,2,1,12,NA,3,3,1,12,146,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,81241.067732,81008.266507,2,102,14,14,3.44,5,5,1,2,0,2,34,1,4,6,NA +63642,7,2,1,25,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,20759.115219,21340.685849,1,93,2,2,0.32,3,3,0,1,0,1,25,1,3,6,NA +63643,7,2,2,18,NA,3,3,1,18,223,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,71832.578284,75129.017547,1,92,10,10,2.1,6,6,1,1,0,2,29,1,4,1,2 +63644,7,2,2,2,NA,4,4,1,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8010.447273,8217.210509,2,93,7,7,2.58,2,2,1,0,0,2,32,1,5,5,NA +63645,7,2,1,15,NA,5,7,2,15,185,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13693.32264,13720.395459,1,96,5,5,1.45,2,2,0,1,0,2,41,1,4,3,NA +63646,7,1,1,9,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,13822.148996,0,2,94,5,5,0.65,6,6,0,2,0,1,53,NA,NA,6,NA +63647,7,2,2,2,NA,4,4,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5158.14785,5587.91487,3,90,99,99,NA,5,5,1,1,1,2,63,1,3,2,NA +63648,7,2,2,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,71034.153987,71525.773464,1,101,7,7,1.55,5,5,1,2,0,2,31,1,4,1,2 +63649,7,2,1,1,21,3,3,2,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,38636.988369,42311.269327,1,91,15,15,4.59,4,4,2,0,0,1,35,1,5,1,5 +63650,7,2,2,62,NA,1,1,1,NA,NA,2,NA,2,7,77,NA,1,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,7863.861726,8261.011593,1,102,13,13,NA,7,7,3,1,2,2,62,2,1,1,2 +63651,7,2,2,76,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,1,1,NA,1,2,1,1,2,2,1,2,1,NA,13446.397433,13865.38304,1,93,3,3,0.82,2,2,0,0,2,1,80,2,5,1,1 +63652,7,2,1,28,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,1,2,1,2,2,1,1,2,NA,16995.648055,19983.260181,1,96,12,12,NA,7,7,1,0,1,2,59,1,3,1,1 +63653,7,2,2,19,NA,2,2,1,19,229,2,NA,2,2,3,12,NA,NA,NA,2,2,2,1,2,2,2,2,1,2,12712.538972,12956.930724,2,93,4,4,0.56,5,5,0,0,0,2,49,2,2,5,NA +63654,7,2,1,11,NA,2,2,2,11,136,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9807.589376,10028.771,2,90,7,7,1.66,4,4,0,3,0,2,34,1,5,3,NA +63655,7,2,2,17,NA,3,3,2,17,209,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,97212.131473,101229.4133,1,91,15,15,5,3,3,0,1,0,1,52,NA,NA,1,5 +63656,7,2,2,16,NA,4,4,2,16,203,NA,NA,1,1,NA,8,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,7825.55935,7975.961244,3,90,3,3,0.37,5,5,2,2,0,2,36,2,4,4,NA +63657,7,2,1,30,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,21280.199633,23174.199432,1,97,14,14,4.5,3,3,1,0,0,1,30,1,5,1,5 +63658,7,2,1,10,NA,3,3,1,10,124,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71470.369236,87471.572436,2,101,8,8,2.81,3,3,0,2,0,1,48,1,3,3,NA +63659,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,36810.071228,39501.597518,1,95,14,14,5,1,1,0,0,1,1,80,1,3,2,NA +63660,7,2,1,11,NA,3,3,1,12,144,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20080.431771,20654.625485,1,94,4,4,1,3,3,0,1,0,2,41,1,4,5,NA +63661,7,2,2,46,NA,3,3,1,NA,NA,2,NA,2,1,4,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,30747.096519,32457.841726,2,91,2,2,0.44,3,3,0,1,0,1,46,2,3,1,4 +63662,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,33707.673642,36318.620408,2,95,10,10,4.3,2,2,0,0,2,1,80,1,5,1,4 +63663,7,2,2,42,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,4,2,2,2,2,1,2,2,NA,NA,NA,NA,32208.300114,32376.263659,1,102,4,4,0.67,4,4,0,1,0,1,23,2,4,5,NA +63664,7,2,1,58,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,18916.732604,18939.145414,2,91,9,9,5,1,1,0,0,0,1,58,1,5,3,NA +63665,7,2,1,56,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,3,5,NA,2,2,2,1,2,2,2,2,1,2,23431.677775,27795.676997,2,93,14,14,2.91,6,6,2,0,1,2,74,NA,NA,2,NA +63666,7,2,2,79,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,30880.887565,31700.101057,1,98,3,3,1.12,1,1,0,0,1,2,79,1,3,2,NA +63667,7,2,2,2,NA,3,3,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,36506.435703,36986.142564,1,98,9,9,2.6,4,4,1,1,0,1,31,1,4,1,5 +63668,7,2,1,4,NA,1,1,1,5,60,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14716.463544,14890.443704,2,96,5,5,0.76,5,5,1,2,0,1,44,2,1,1,3 +63669,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,87811.478076,90326.44396,1,90,8,8,1.67,5,5,2,1,0,2,28,1,4,1,5 +63670,7,2,2,8,NA,4,4,2,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7239.045424,7432.710238,1,99,7,7,1.06,7,7,3,1,0,1,38,1,4,6,NA +63671,7,1,2,14,NA,5,7,NA,NA,NA,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5268.765205,0,1,99,9,9,2.68,4,4,0,2,0,1,43,2,3,1,NA +63672,7,2,1,9,NA,2,2,2,9,112,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,11567.189008,12721.148711,2,90,2,2,0.32,3,3,0,1,0,1,53,NA,NA,1,1 +63673,7,2,1,43,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,126789.52929,126463.833097,1,101,6,6,0.97,7,7,2,1,0,1,43,1,2,1,NA +63674,7,2,1,5,NA,5,6,2,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8253.750998,8652.000238,1,90,7,7,1.52,4,4,2,0,0,2,30,2,4,6,NA +63675,7,2,1,12,NA,3,3,2,12,155,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,101055.977672,102514.960722,1,90,15,15,5,4,4,0,2,0,1,44,1,5,1,5 +63676,7,2,1,2,NA,4,4,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5447.377416,6006.624362,2,90,15,15,5,4,4,1,1,0,1,53,2,5,1,5 +63677,7,2,2,2,NA,1,1,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13065.99844,14425.21291,1,95,6,6,1.37,3,3,1,1,0,2,28,1,4,5,NA +63678,7,2,2,22,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,2,2,2,1,2,2,1,32537.532358,33640.063825,2,90,1,1,0.22,3,3,0,1,0,2,48,2,2,5,NA +63679,7,2,1,0,11,3,3,2,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20370.716701,21140.779334,1,91,15,15,4.2,5,5,3,0,0,2,32,1,4,1,5 +63680,7,2,1,54,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,22446.308035,23586.645568,2,94,7,7,1.04,7,7,0,3,0,1,37,2,1,1,3 +63681,7,2,2,5,NA,3,3,2,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,59190.129038,61693.294338,2,91,15,15,5,5,5,2,1,0,2,40,1,5,1,5 +63682,7,2,1,14,NA,3,3,2,14,176,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,77169.155154,77548.231618,1,101,14,14,3.9,4,4,0,2,0,2,41,1,2,1,2 +63683,7,2,1,53,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,29181.414565,29257.589647,1,94,3,3,0.39,6,6,1,0,2,1,80,1,4,1,3 +63684,7,2,2,54,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,17364.980275,22918.553497,2,93,7,7,2.78,2,2,0,1,0,2,54,1,4,3,NA +63685,7,2,2,60,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,4,1,NA,2,2,2,1,2,2,2,2,2,2,12437.053229,13000.378144,3,91,5,5,0.89,4,4,0,2,2,1,61,2,3,1,4 +63686,7,2,2,60,NA,2,2,1,NA,NA,2,NA,2,1,9,NA,4,1,NA,1,2,2,NA,NA,NA,1,2,2,1,12237.578196,12748.319173,2,93,8,8,2.17,4,4,0,0,3,1,80,2,2,1,2 +63687,7,2,1,69,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11761.359913,11893.852645,1,92,14,14,5,2,2,0,0,2,1,69,1,5,1,5 +63688,7,2,1,2,NA,5,6,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8253.750998,8652.000238,1,90,7,7,1.52,4,4,2,0,0,2,30,2,4,6,NA +63689,7,2,2,66,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,93265.413087,97732.118213,2,98,15,15,5,2,2,0,0,2,2,66,1,3,1,1 +63690,7,2,1,29,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,12971.121387,14034.473347,1,93,15,15,5,2,2,0,0,0,1,29,2,5,1,5 +63691,7,2,1,21,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,2,2,2,1,2,2,1,40899.412129,44211.773156,1,94,5,5,0.57,7,7,2,1,0,1,58,2,1,1,1 +63692,7,2,2,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,30275.274308,30259.77569,2,101,4,2,0.6,2,1,0,0,0,2,22,1,4,5,NA +63693,7,2,2,11,NA,1,1,1,11,137,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,16986.005478,17421.337631,2,102,8,8,1.91,5,5,1,2,0,1,36,2,1,1,4 +63694,7,1,1,6,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,46081.129115,0,2,95,15,15,4.63,5,5,1,2,0,1,32,1,4,1,4 +63695,7,1,1,56,NA,4,4,NA,NA,NA,1,2,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,15599.953109,0,1,99,12,12,NA,1,1,0,0,0,1,56,1,3,5,NA +63696,7,2,1,15,NA,1,1,1,15,191,NA,NA,1,1,NA,8,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,18635.323223,18738.312717,1,103,5,5,0.74,5,5,1,1,0,2,40,99,3,1,1 +63697,7,2,1,41,NA,4,4,2,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,17602.101156,18151.242681,1,96,10,10,2.95,4,4,0,1,0,2,34,2,3,1,5 +63698,7,2,1,42,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,1,5,NA,2,2,2,2,2,2,1,2,1,NA,34128.967046,33628.177065,2,93,3,3,0.87,2,2,0,0,1,2,65,2,4,3,NA +63699,7,2,2,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,18462.756377,18592.086302,1,99,6,6,1.46,3,3,0,0,1,2,80,NA,NA,2,NA +63700,7,2,1,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,17602.101156,21123.738865,1,96,14,14,2.19,7,7,0,2,0,1,39,1,2,1,3 +63701,7,2,1,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,145790.86143,146040.776498,1,90,15,15,5,4,4,0,1,0,2,53,1,5,1,5 +63702,7,2,2,11,NA,3,3,1,11,136,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,56803.692074,58623.195384,1,100,15,15,5,4,4,0,2,0,2,47,1,5,1,5 +63703,7,2,2,2,24,4,4,1,2,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7382.686927,7791.142847,2,100,3,3,0.38,5,5,2,1,0,2,28,1,2,5,NA +63704,7,2,2,75,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,14534.533756,15040.28544,1,96,12,12,NA,3,3,0,0,2,1,77,NA,NA,6,NA +63705,7,2,1,8,NA,2,2,2,8,101,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,9807.589376,10786.008845,2,90,3,3,0.38,5,5,0,4,0,2,33,2,2,5,NA +63706,7,2,1,13,NA,3,3,1,13,157,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,28732.336483,28618.270464,3,92,7,7,0.81,7,7,2,4,0,1,40,NA,NA,1,4 +63707,7,2,2,63,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,3,3,NA,1,2,2,1,2,2,1,2,2,2,9048.366564,9718.651548,2,93,9,9,4.92,1,1,0,0,1,2,63,2,3,3,NA +63708,7,2,1,63,NA,5,6,1,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,13133.86792,13808.275718,2,102,8,8,1.72,5,5,0,2,1,1,63,2,5,1,5 +63709,7,2,2,16,NA,4,4,2,16,202,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10484.6104,10912.740054,2,99,1,1,0.07,4,4,1,1,0,2,24,1,2,5,NA +63710,7,2,1,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,94644.050918,104631.781207,2,91,15,15,5,5,5,1,2,0,1,37,1,4,6,NA +63711,7,1,2,7,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16442.636525,0,1,98,8,8,2.97,2,2,0,1,0,2,38,1,5,5,NA +63712,7,2,1,2,NA,3,3,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21071.650532,24690.382982,2,97,6,6,1.16,4,4,1,1,0,1,27,2,4,1,3 +63713,7,2,1,52,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,127000.852889,126845.993399,2,92,15,15,5,2,1,0,0,0,1,52,1,5,6,NA +63714,7,2,1,48,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,87954.465296,88343.63479,2,97,15,15,4.97,5,5,1,0,0,1,48,1,4,1,3 +63715,7,2,2,8,NA,4,4,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12120.418061,13050.526646,2,101,3,3,0.65,3,3,0,1,0,2,54,1,3,5,NA +63716,7,2,1,2,NA,2,2,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,8331.647763,8430.145707,2,90,3,3,0.46,5,5,1,3,0,2,35,2,1,4,NA +63717,7,2,2,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,37070.062227,37637.166459,1,92,1,1,0.03,2,2,0,1,0,2,50,1,4,3,NA +63718,7,2,2,50,NA,2,2,2,NA,NA,2,NA,2,1,5,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,30543.825378,35038.368904,2,91,2,2,0.86,1,1,0,0,0,2,50,2,3,2,NA +63719,7,2,1,21,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,34099.599202,34629.549705,1,94,5,5,0.74,5,5,1,1,0,2,24,1,3,1,4 +63720,7,2,1,5,NA,1,1,1,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16775.083123,17973.290893,2,98,1,1,0.19,3,3,2,0,0,2,31,1,4,2,NA +63721,7,2,2,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,NA,NA,NA,1,2,2,1,118761.81384,121363.708734,1,94,8,8,2.43,3,3,0,0,0,2,46,1,3,1,NA +63722,7,1,1,35,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,NA,NA,NA,NA,NA,NA,NA,17371.064048,0,1,96,NA,NA,NA,4,4,1,1,0,2,37,NA,NA,1,4 +63723,7,2,2,62,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,14204.126838,15256.34028,3,92,5,5,1.56,2,2,0,0,1,1,58,1,3,1,2 +63724,7,2,1,33,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,3,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,51543.062078,51154.050295,3,92,3,3,0.52,5,5,2,1,0,2,29,2,1,1,3 +63725,7,2,1,5,NA,3,3,2,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,30883.231636,34843.624635,1,95,7,7,1.17,6,6,1,3,0,2,44,1,4,1,NA +63726,7,2,2,12,NA,2,2,2,12,147,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21449.694498,22089.715913,1,98,3,3,0.4,7,7,2,3,0,2,31,2,5,1,2 +63727,7,2,2,4,NA,5,6,1,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5879.523197,6241.641704,2,91,15,15,4.63,7,7,1,2,0,1,36,2,4,1,3 +63728,7,2,2,11,NA,1,1,1,11,136,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16986.005478,17733.622754,2,102,7,7,1.53,5,5,1,2,0,1,36,1,2,1,3 +63729,7,2,1,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,160904.289966,160708.090054,1,97,15,15,3.89,5,5,0,2,0,1,50,1,4,6,NA +63730,7,2,1,14,NA,3,3,1,14,170,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71934.689876,71392.8162,1,100,15,15,5,5,5,0,3,0,1,47,1,5,1,5 +63731,7,2,1,51,NA,2,2,2,NA,NA,2,NA,2,2,77,NA,1,3,NA,2,2,2,2,2,2,NA,NA,NA,NA,24172.845721,24635.602694,2,99,1,1,0.03,2,2,0,0,0,1,51,2,1,3,NA +63732,7,2,1,60,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,3,1,NA,1,2,2,1,2,2,1,2,2,2,5185.036848,5649.75477,2,90,7,7,0.89,7,7,1,3,3,1,60,2,3,1,3 +63733,7,2,2,58,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,23297.357076,24139.998866,2,93,9,9,2.6,4,4,0,0,0,2,58,2,4,4,NA +63734,7,2,1,43,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,32980.717958,32895.997285,1,95,2,2,0.6,1,1,0,0,0,1,43,1,4,3,NA +63735,7,2,1,29,NA,5,6,2,NA,NA,2,NA,2,2,1,NA,4,1,NA,1,2,1,1,2,2,NA,NA,NA,NA,14385.653726,15533.829525,2,101,5,3,1.1,2,1,0,0,0,1,29,2,4,1,NA +63736,7,2,1,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,30631.476666,31783.189579,2,101,6,6,1.54,3,3,0,1,0,2,34,1,4,1,3 +63737,7,2,1,8,NA,4,4,1,8,104,NA,NA,2,2,2,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11443.206518,12087.326286,1,100,9,9,1.78,6,6,1,1,0,2,45,2,3,1,3 +63738,7,2,1,46,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,19260.892847,19199.459573,2,97,5,5,0.76,5,5,0,0,0,2,50,1,4,5,NA +63739,7,2,2,71,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,12863.404053,17087.605763,2,90,2,2,0.31,5,5,0,2,1,2,71,1,2,2,NA +63740,7,2,1,8,NA,3,3,2,8,100,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,26349.460983,27598.763847,2,97,4,4,1.09,2,2,0,1,0,2,35,1,2,4,NA +63741,7,2,1,72,NA,1,1,1,NA,NA,1,2,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,NA,21815.897449,24704.366996,3,92,5,4,1.39,2,1,0,0,2,2,63,NA,NA,3,NA +63742,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,3,1,1,2,1,2,2,1,1,2,NA,87944.345504,91474.433025,2,98,10,10,3.78,3,3,0,0,0,1,53,1,3,1,4 +63743,7,2,2,9,NA,1,1,1,9,116,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15962.145468,16371.237244,2,98,14,14,4.05,3,3,0,1,0,1,38,1,3,1,4 +63744,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,196995.351093,202246.928022,1,91,15,15,5,3,3,0,1,0,1,52,NA,NA,1,5 +63745,7,2,1,0,4,1,1,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,1,2,NA,NA,NA,NA,7284.164858,7692.652584,2,98,5,5,0.76,5,5,3,1,0,2,27,1,3,4,NA +63746,7,2,1,49,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,28542.421068,29427.85637,2,101,2,2,0.73,1,1,0,0,0,1,49,1,3,5,NA +63747,7,2,1,4,NA,5,6,2,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,5953.107662,6129.253313,3,90,5,5,1.15,3,3,1,0,0,1,32,2,3,1,1 +63748,7,2,2,11,NA,2,2,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12307.832776,13375.906079,2,93,10,10,2.26,6,6,0,4,0,1,34,1,4,1,3 +63749,7,2,2,43,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,4,1,2,1,2,1,1,2,1,1,2,1,3,9145.761989,9194.124252,2,92,10,8,2.01,7,4,1,1,1,2,27,2,3,1,3 +63750,7,2,2,20,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,23845.8146,22808.13483,1,98,12,1,0.27,4,1,0,0,0,1,21,1,4,6,NA +63751,7,2,2,5,NA,1,1,1,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10493.785765,11155.348431,2,103,77,77,NA,5,5,1,2,0,2,30,1,2,1,2 +63752,7,2,1,60,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,25436.904729,28576.794771,1,94,3,3,1.25,1,1,0,0,1,1,60,1,3,3,NA +63753,7,2,1,33,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,3,6,NA,2,2,2,2,2,2,2,2,2,2,34438.924452,34835.654759,1,100,8,3,0.68,6,3,1,0,0,1,33,2,3,6,NA +63754,7,2,2,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,99120.116925,102640.158973,2,98,15,15,5,4,4,0,2,0,2,46,1,4,1,NA +63755,7,2,1,2,NA,3,3,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,46257.816906,49754.984306,2,94,15,15,5,3,3,1,0,0,1,34,1,5,1,5 +63756,7,2,2,2,NA,4,4,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8754.193667,9543.319886,2,98,5,5,0.59,7,7,3,0,0,2,50,1,5,4,NA +63757,7,2,2,5,NA,2,2,2,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8825.559072,9505.166523,2,90,6,6,0.66,7,7,2,2,0,2,24,2,4,6,NA +63758,7,2,1,28,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,4,5,NA,1,2,2,1,2,2,1,2,2,3,11182.713216,11928.362428,2,90,99,4,1.38,3,1,0,0,2,1,60,NA,NA,1,NA +63759,7,2,1,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,90969.330762,95023.624595,1,99,14,14,4.86,3,3,0,1,0,1,56,1,5,1,5 +63760,7,2,1,16,NA,4,4,2,16,195,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13416.172328,13756.125082,1,96,7,7,1.39,5,5,0,2,2,1,69,2,2,1,2 +63761,7,2,2,21,NA,2,2,1,NA,NA,2,NA,2,1,2,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,40149.982555,39975.11173,2,103,7,7,2.64,2,2,0,0,0,2,21,2,3,1,3 +63762,7,2,1,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,34128.92969,36973.759081,1,92,5,5,1.15,3,3,1,0,0,1,23,1,4,1,4 +63763,7,2,2,17,NA,5,6,2,17,205,2,NA,2,1,4,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,7649.811754,7945.620413,2,90,6,6,1.24,4,4,0,1,0,1,57,2,5,1,3 +63764,7,2,2,14,NA,1,1,2,14,172,NA,NA,2,2,4,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18368.872199,18995.639955,2,94,12,12,NA,4,4,0,2,0,1,47,2,2,1,2 +63765,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,1,2,NA,1,1,2,1,2,2,1,1,2,NA,24952.423186,27719.90048,2,98,14,14,4.64,3,3,0,0,1,1,49,1,3,1,9 +63766,7,2,2,54,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,163102.567998,164787.083347,1,99,15,15,5,2,2,0,0,0,1,54,1,3,1,4 +63767,7,2,1,66,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,1,6,NA,2,2,2,1,2,2,1,2,2,2,11019.434708,11290.181976,3,91,4,4,1.38,2,1,0,0,1,1,66,2,1,6,NA +63768,7,2,1,64,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,2,1,NA,2,2,2,2,2,2,1,2,2,2,8609.250304,9380.869295,2,90,12,12,NA,3,3,0,0,2,2,61,2,3,4,NA +63769,7,2,1,44,NA,2,2,2,NA,NA,2,NA,2,1,3,NA,3,1,NA,1,2,2,1,2,2,2,2,2,2,37883.328863,38044.589557,1,93,15,15,2.96,7,7,0,1,1,2,18,1,2,NA,NA +63770,7,2,2,11,NA,4,4,1,12,144,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10422.423011,10751.577119,1,98,3,3,0.86,2,2,0,1,0,2,34,1,4,5,NA +63771,7,2,1,43,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,32719.762791,34306.488277,1,101,7,7,2.71,2,2,0,0,0,1,43,1,4,1,4 +63772,7,2,1,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,136843.962333,135447.112041,1,95,14,14,5,2,2,0,0,2,2,65,1,4,1,4 +63773,7,2,1,56,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,31302.008691,30991.200711,2,91,9,9,5,1,1,0,0,0,1,56,1,5,5,NA +63774,7,2,1,8,NA,3,3,1,8,104,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,62291.152254,66151.751642,3,92,6,6,1.17,4,4,0,2,0,2,30,1,2,1,4 +63775,7,2,2,1,22,5,6,2,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4686.494502,4910.642079,1,90,14,14,3.33,5,5,1,2,0,1,41,1,5,1,5 +63776,7,2,1,7,NA,1,1,2,7,87,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,16288.924956,19190.320193,1,101,5,5,0.51,7,7,0,3,2,1,75,2,1,1,1 +63777,7,2,2,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,30442.30641,30653.986309,1,95,77,77,NA,3,3,0,0,0,2,41,1,2,5,NA +63778,7,2,2,14,NA,4,4,1,14,171,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10972.028338,11002.546815,1,103,2,2,0.53,2,2,0,1,0,2,51,1,3,5,NA +63779,7,2,1,4,NA,4,4,2,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9488.600894,10206.6462,2,97,6,6,0.92,7,7,1,4,0,2,29,1,3,5,NA +63780,7,2,2,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,35126.205635,35295.519265,1,95,7,7,1.17,6,6,1,3,0,2,44,1,4,1,NA +63781,7,2,2,63,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,4,6,NA,1,2,2,1,2,2,2,2,2,2,10680.068244,11125.805824,2,93,15,8,4.48,2,1,0,0,2,2,63,2,4,6,NA +63782,7,2,2,1,19,5,7,1,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5899.406975,6347.827712,1,103,3,3,0.37,5,5,1,2,0,2,30,1,4,5,NA +63783,7,2,2,10,NA,4,4,1,10,130,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8930.342072,9169.253826,1,100,15,15,3.7,5,5,0,3,0,1,51,1,5,1,5 +63784,7,2,2,40,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,4,1,2,2,2,2,2,2,2,NA,NA,NA,NA,34954.173075,36023.601038,3,92,9,9,2.46,4,4,0,2,0,1,43,2,3,1,4 +63785,7,1,2,32,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,1,3,1,2,2,1,2,2,NA,NA,NA,NA,86578.861495,0,2,101,8,8,1.72,5,5,0,3,0,1,37,1,3,1,3 +63786,7,2,1,8,NA,5,6,2,8,98,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6276.493588,6689.515302,1,98,15,15,5,4,4,1,1,0,1,40,NA,NA,1,5 +63787,7,2,1,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,18404.681357,22792.166915,1,95,6,6,0.81,6,6,2,2,0,1,30,1,3,1,4 +63788,7,2,2,40,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,2,1,2,2,1,2,2,1,2,2,1,123493.356057,126566.961964,1,102,3,3,0.92,1,1,0,0,0,2,40,1,3,3,NA +63789,7,1,1,29,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,84850.826285,0,3,90,15,15,5,2,1,0,0,0,1,29,1,5,1,NA +63790,7,2,1,11,NA,4,4,2,11,135,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11277.594097,11665.009628,1,90,5,5,0.74,5,5,0,2,0,2,18,1,4,NA,NA +63791,7,2,2,0,8,1,1,1,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6700.950086,6971.360383,2,96,3,3,0.24,7,7,2,3,1,2,40,1,3,3,NA +63792,7,2,2,2,NA,5,6,2,2,35,NA,NA,2,2,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6075.554662,6069.137471,1,93,14,14,4.86,3,3,1,0,0,1,31,2,5,1,5 +63793,7,2,2,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,NA,NA,NA,NA,33202.24577,45456.634755,1,99,4,4,1.02,2,2,0,1,0,2,27,1,4,3,NA +63794,7,2,1,4,NA,3,3,2,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,78946.660075,92504.536814,3,91,15,15,5,4,4,1,1,0,2,41,1,5,1,5 +63795,7,2,1,59,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,152467.08796,152281.176016,1,94,10,7,3.76,2,1,0,0,0,1,59,1,4,6,NA +63796,7,2,2,36,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,18901.436377,19274.920746,3,91,15,10,5,2,1,0,0,0,2,36,1,5,6,NA +63797,7,2,1,14,NA,3,3,2,14,170,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,127790.772987,128099.355719,1,97,15,15,3.89,5,5,0,2,0,1,50,1,4,6,NA +63798,7,2,2,2,NA,3,3,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,50865.409844,56139.684211,1,101,7,7,1.74,4,4,1,0,0,1,24,NA,NA,1,4 +63799,7,2,1,69,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,1,1,2,1,3,10288.337394,12038.909334,3,91,6,6,1.12,4,4,0,0,2,1,69,2,3,1,1 +63800,7,2,2,3,NA,5,6,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4938.043177,5022.237557,1,91,2,2,0.32,3,3,1,1,0,2,28,1,4,77,NA +63801,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,10192.188896,10440.656902,1,99,6,6,2.3,1,1,0,0,1,2,64,1,5,3,NA +63802,7,2,2,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,18462.756377,18043.935864,1,99,7,7,3.4,1,1,0,0,0,2,48,1,5,3,NA +63803,7,2,2,36,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,2,6,2,2,2,2,2,2,2,2,2,2,2,41791.57979,45058.092516,2,102,7,7,1.79,4,4,0,2,0,1,40,2,2,6,NA +63804,7,2,1,61,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,5729.786464,5774.606655,2,95,14,14,4.58,3,3,0,1,1,1,61,1,4,1,5 +63805,7,2,2,53,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,27769.056387,28050.845299,1,94,7,7,1.65,5,4,0,0,0,1,46,1,4,1,4 +63806,7,2,1,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,38226.070503,39676.327756,1,99,12,12,NA,2,2,0,0,2,1,70,1,5,1,5 +63807,7,2,2,61,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,10798.193871,11318.855823,2,95,2,2,0.67,2,2,0,0,1,2,61,1,3,3,NA +63808,7,1,2,13,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23477.73925,0,1,91,6,6,1.13,6,6,1,3,0,1,40,1,4,6,NA +63809,7,2,2,10,NA,3,3,2,10,126,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,55626.447796,54931.333538,1,95,9,9,2.13,6,6,0,4,0,2,44,1,1,1,1 +63810,7,2,1,16,NA,5,6,1,16,198,NA,NA,2,2,1,8,NA,NA,NA,1,2,1,1,2,2,1,2,1,NA,7558.274942,7964.822305,2,96,4,4,0.92,3,3,0,1,1,2,41,2,2,1,2 +63811,7,1,2,4,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12129.691938,0,1,97,7,7,1.74,4,4,2,0,0,1,34,1,5,1,5 +63812,7,2,2,47,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,30932.175051,30085.634936,2,101,3,3,0.68,2,2,0,0,0,1,58,1,1,6,NA +63813,7,2,1,67,NA,4,4,2,NA,NA,2,NA,2,2,2,NA,5,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,6016.509449,6063.572478,1,93,4,4,1.38,1,1,0,0,1,1,67,2,5,2,NA +63814,7,2,1,17,NA,3,3,2,17,208,2,NA,2,1,4,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,57357.902168,67351.058423,1,99,77,77,NA,4,4,1,1,0,1,31,2,3,1,3 +63815,7,2,1,8,NA,4,4,2,8,98,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8579.422451,9062.344401,1,99,5,5,1.13,3,3,1,1,0,2,30,1,1,4,NA +63816,7,2,1,5,NA,4,4,2,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7431.820906,7738.904727,1,99,6,6,1.3,5,5,1,2,0,1,34,1,2,1,3 +63817,7,2,2,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,19075.861607,18553.800612,1,96,15,15,5,2,2,0,1,0,2,47,1,5,3,NA +63818,7,2,2,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,2,1,2,2,1,2,2,NA,NA,NA,NA,35126.205635,36772.568368,1,95,1,1,0.12,3,3,0,2,0,2,40,1,5,3,NA +63819,7,2,2,0,11,1,1,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7579.106293,8082.789623,1,90,15,15,4.77,4,4,1,1,0,2,41,1,5,1,2 +63820,7,2,2,58,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,15521.115746,15561.208623,2,100,5,5,1.08,3,3,0,0,0,1,38,1,2,5,NA +63821,7,2,1,4,NA,4,4,1,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10510.490567,10944.785425,2,98,6,6,1,5,5,2,1,0,2,31,1,4,6,NA +63822,7,2,1,13,NA,4,4,2,13,160,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13416.172328,13756.125082,1,96,14,14,2.19,7,7,0,2,0,1,39,1,2,1,3 +63823,7,2,1,21,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,104488.914565,109862.797404,1,98,3,1,0.27,4,1,0,0,0,1,20,1,4,5,NA +63824,7,1,1,34,NA,5,6,NA,NA,NA,2,NA,2,1,5,NA,3,4,NA,1,2,2,1,2,2,NA,NA,NA,NA,16974.834956,0,1,93,9,9,1.77,7,7,0,2,0,2,56,NA,NA,5,NA +63825,7,2,1,31,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,20729.794283,21506.407763,3,91,14,14,5,1,1,0,0,0,1,31,1,5,5,NA +63826,7,1,1,9,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,55902.987481,0,1,103,15,15,5,2,2,0,1,0,2,52,2,5,3,NA +63827,7,2,2,52,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,29756.667529,30535.306876,2,98,4,4,1.19,2,2,0,0,0,2,52,1,3,1,NA +63828,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,118611.064701,118209.809508,1,91,10,10,4.76,2,2,0,0,2,2,64,1,4,1,5 +63829,7,2,2,42,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,80418.665565,83274.564983,2,100,10,10,3.13,4,4,0,2,0,1,45,1,4,1,4 +63830,7,2,2,64,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,1,1,NA,2,2,2,1,2,2,2,2,1,NA,11225.5556,12146.092964,2,93,8,8,2.62,3,3,0,0,2,2,64,2,1,1,1 +63831,7,2,2,23,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,6,2,1,2,2,1,2,2,1,2,2,1,20000.263815,19016.481945,1,102,12,12,NA,7,7,3,2,0,2,52,1,4,5,NA +63832,7,2,1,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,133542.212862,137914.897649,3,91,8,8,1.95,4,4,2,0,0,2,30,1,5,1,4 +63833,7,2,1,9,NA,2,2,1,9,118,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,13742.678011,2,98,6,6,0.78,7,7,1,3,1,2,63,1,2,4,NA +63834,7,2,2,52,NA,4,4,1,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,15776.849485,18474.07459,2,93,14,14,2.78,5,5,0,0,0,1,52,2,4,1,2 +63835,7,2,2,76,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,1,2,1,2,2,1,1,2,NA,12535.973802,12972.182488,2,100,15,15,4.97,5,5,0,2,1,2,42,1,5,1,5 +63836,7,2,1,16,NA,4,4,2,16,197,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12462.601191,12584.643654,2,97,5,5,0.92,5,5,0,3,0,2,54,1,3,2,NA +63837,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,63504.762752,64182.95787,1,91,10,10,5,1,1,0,0,1,2,70,1,4,2,NA +63838,7,2,2,78,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,25812.913537,27430.822937,2,95,4,4,1.34,1,1,0,0,1,2,78,1,3,2,NA +63839,7,2,1,75,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,9257.537917,9400.859081,1,99,10,10,2.71,5,5,1,1,2,1,75,1,1,1,3 +63840,7,2,1,10,NA,3,3,2,10,125,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,57057.523607,61384.115788,1,98,10,10,3.04,4,4,0,2,0,2,47,1,4,1,3 +63841,7,2,1,27,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,59682.963348,63721.06052,2,102,10,10,4.76,2,2,0,0,0,1,27,1,4,1,4 +63842,7,2,1,0,7,5,6,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8577.614982,8519.98112,2,91,14,14,4.19,3,3,1,0,0,2,31,1,5,1,5 +63843,7,2,2,13,NA,2,2,2,13,159,NA,NA,2,2,3,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,17848.732433,20206.102312,2,91,99,99,NA,6,6,1,3,0,2,20,2,2,5,NA +63844,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,28974.537912,30613.083607,1,99,6,6,1.7,2,2,0,0,2,2,77,1,3,1,5 +63845,7,2,1,43,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,28813.038041,29247.173902,1,94,3,3,1.21,1,1,0,0,0,1,43,1,4,3,NA +63846,7,2,1,3,NA,3,3,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,38749.197676,45403.777431,1,92,6,6,1.24,4,4,1,1,0,1,30,1,3,3,NA +63847,7,2,1,58,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,152467.08796,155918.172532,3,91,6,6,2.24,1,1,0,0,0,1,58,1,3,5,NA +63848,7,2,1,31,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,16214.132654,16029.957596,3,90,15,15,4.89,5,5,0,0,0,2,57,2,3,1,3 +63849,7,2,1,34,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,71279.707602,73321.195079,1,103,14,14,2.96,5,5,1,2,0,1,34,1,4,1,5 +63850,7,2,2,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,140084.621651,140222.178533,1,102,10,10,4.76,2,2,0,0,0,1,23,1,5,5,NA +63851,7,2,1,9,NA,4,4,1,9,111,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13423.881856,14179.490667,2,101,1,1,0.27,3,3,0,2,0,2,36,1,3,5,NA +63852,7,2,1,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,158509.005274,161135.953834,1,100,15,15,5,4,4,0,1,0,1,50,1,4,1,4 +63853,7,2,2,36,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,77299.255327,77536.924917,2,92,15,15,5,2,2,0,0,0,2,36,1,5,1,5 +63854,7,2,2,5,NA,1,1,2,6,72,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14812.229505,15746.041025,1,97,8,8,1.45,6,6,2,2,0,2,36,2,2,1,1 +63855,7,2,1,8,NA,2,2,2,8,97,NA,NA,2,1,3,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14700.46789,14706.980156,1,97,15,15,5,4,4,1,1,0,2,43,1,5,1,5 +63856,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,23845.8146,22808.13483,1,98,2,1,0.13,4,1,0,0,0,2,19,1,4,NA,NA +63857,7,1,1,4,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8199.080864,0,1,90,4,4,0.67,5,5,3,0,0,2,32,2,3,3,NA +63858,7,2,2,20,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,2,6,1,2,2,2,2,2,2,2,2,2,2,42621.881199,51089.379491,2,102,8,8,1.09,7,7,1,3,0,2,33,2,1,6,NA +63859,7,2,1,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,17648.758915,17930.45653,2,99,3,3,0.44,5,5,1,1,0,2,53,1,4,1,3 +63860,7,2,2,50,NA,5,7,2,NA,NA,2,NA,2,1,7,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,120670.257812,123358.620194,3,90,9,9,3.14,3,3,0,0,0,1,56,2,3,1,3 +63861,7,2,1,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,138075.879417,141933.339512,2,91,15,15,5,2,2,0,0,1,2,57,1,5,1,5 +63862,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,NA,43004.283229,49692.210403,1,97,NA,77,NA,2,1,0,0,2,1,80,1,3,6,NA +63863,7,2,1,6,NA,4,4,2,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9502.15317,9484.287948,2,97,13,13,NA,6,6,2,2,0,2,24,1,2,6,NA +63864,7,2,1,3,NA,1,1,2,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14993.478359,14600.446315,1,97,6,3,0.45,7,6,2,1,0,1,29,2,2,1,1 +63865,7,2,1,2,NA,2,2,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11853.772636,11869.789876,2,96,6,6,1.12,4,4,2,0,0,1,27,2,2,6,NA +63866,7,2,2,0,3,2,2,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,7367.430495,7419.943227,2,91,2,2,0.19,5,5,3,0,0,1,24,2,1,1,3 +63867,7,2,2,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,28930.832375,30957.619671,1,92,5,5,1.05,3,3,1,1,0,2,35,1,4,5,NA +63868,7,2,1,69,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,131445.986898,130104.237054,1,95,10,10,5,1,1,0,0,1,1,69,1,5,3,NA +63869,7,1,1,36,NA,3,3,NA,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,86986.68246,0,2,94,15,15,5,2,2,0,0,0,1,36,1,2,1,4 +63870,7,2,2,11,NA,4,4,2,11,140,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8184.286585,8442.757341,2,97,2,2,0.21,7,7,2,3,0,2,32,1,4,5,NA +63871,7,2,2,45,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18490.479848,18604.849683,2,100,7,7,2.37,3,3,0,1,1,2,45,1,5,1,NA +63872,7,2,2,18,NA,5,6,1,18,218,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,1,1,2,2,1,6370.316505,6989.133332,2,92,7,7,1.17,6,6,0,1,1,1,78,2,1,1,3 +63873,7,2,1,80,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,8763.51401,9280.560292,2,100,5,3,1.29,3,1,0,0,2,2,80,1,1,2,NA +63874,7,2,1,3,NA,1,1,2,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14505.510202,14964.673559,2,94,7,7,1.23,6,6,2,1,0,1,33,2,1,6,NA +63875,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,10266.896689,11139.888992,1,96,3,3,1.25,1,1,0,0,1,2,64,1,2,2,NA +63876,7,2,2,9,NA,4,4,1,9,119,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7321.387082,7717.353133,1,103,3,3,0.52,3,3,0,2,0,2,45,1,4,5,NA +63877,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,30275.274308,29672.504425,2,101,12,1,0.32,4,1,0,0,0,2,20,1,4,5,NA +63878,7,2,1,8,NA,3,3,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,25457.327997,26428.651236,3,92,6,6,1.41,4,3,0,1,0,1,41,1,4,1,4 +63879,7,2,1,76,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,38226.070503,39676.327756,1,99,7,7,3.31,1,1,0,0,1,1,76,1,5,2,NA +63880,7,2,2,77,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,53541.401974,54961.757487,1,99,7,7,2.72,2,2,0,0,2,1,77,1,2,1,3 +63881,7,2,2,25,NA,3,3,2,NA,NA,2,NA,2,2,2,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,50197.83865,52206.228199,1,93,12,6,2.75,4,1,0,0,0,2,25,2,5,5,NA +63882,7,2,1,43,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15271.238172,15754.77307,3,91,15,15,5,3,3,0,1,0,2,44,2,5,1,5 +63883,7,1,2,80,NA,2,2,NA,NA,NA,2,NA,2,1,8,NA,1,2,NA,2,2,2,2,2,2,NA,NA,NA,NA,17318.187297,0,2,90,4,4,0.57,5,5,1,0,2,2,80,2,1,2,NA +63884,7,2,1,51,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,7867.418849,7869.055339,1,103,77,77,NA,6,6,0,2,2,1,70,NA,NA,1,1 +63885,7,2,1,19,NA,4,4,1,19,230,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,17558.40257,2,101,1,1,0.32,2,1,0,0,0,1,19,1,4,NA,NA +63886,7,2,2,7,NA,5,6,2,7,95,NA,NA,2,1,2,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11068.40581,11617.80042,1,97,15,15,5,3,3,0,1,0,2,40,1,5,1,5 +63887,7,2,2,64,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,94637.019439,107051.932277,1,94,7,7,3.13,1,1,0,0,1,2,64,1,3,2,NA +63888,7,2,1,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,20948.005175,20851.734447,2,98,5,5,0.59,7,7,3,0,0,2,50,1,5,4,NA +63889,7,2,1,48,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,28813.038041,29425.409675,1,94,1,1,0.36,1,1,0,0,0,1,48,1,3,5,NA +63890,7,2,2,60,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,1,4,NA,2,2,2,1,2,2,2,2,2,2,9716.805546,10308.451947,2,90,3,3,0.68,2,2,0,0,1,1,21,2,4,5,NA +63891,7,2,2,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,18441.731082,18102.807884,1,96,9,9,4.13,2,2,0,0,1,2,55,1,4,5,NA +63892,7,2,2,10,NA,3,3,1,10,131,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,49543.011606,52616.361829,1,94,15,15,5,6,6,0,4,0,1,38,1,5,1,4 +63893,7,1,1,6,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12722.361971,0,2,93,8,8,2.62,3,3,0,1,0,1,43,2,4,6,NA +63894,7,2,2,19,NA,4,4,2,19,231,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13775.846051,13737.029624,2,97,7,7,1.06,7,7,1,2,0,2,40,1,4,5,NA +63895,7,2,1,37,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,18094.858384,18452.252422,1,96,15,15,5,3,3,0,1,0,2,37,1,4,1,4 +63896,7,2,2,9,NA,5,6,2,9,118,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9620.269705,10320.938187,2,91,14,14,3.47,4,4,1,1,0,2,40,2,5,1,NA +63897,7,2,2,7,NA,2,2,2,7,95,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13338.830817,13909.366846,1,90,6,6,1.68,3,3,1,1,0,2,36,2,4,4,NA +63898,7,2,2,51,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,NA,NA,NA,1,2,2,1,24870.513993,25000.211608,2,98,6,6,0.63,7,7,2,2,1,1,60,1,3,1,2 +63899,7,2,2,75,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,1,3,NA,2,2,2,2,2,2,NA,NA,NA,NA,18006.276697,20109.914446,2,93,3,3,1.14,1,1,0,0,1,2,75,2,1,3,NA +63900,7,2,2,36,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,1,1,1,1,2,1,1,2,1,NA,NA,NA,NA,14138.631841,14167.501749,3,92,77,77,NA,7,7,2,4,1,1,62,NA,NA,1,NA +63901,7,2,1,12,NA,2,2,1,12,155,NA,NA,1,1,NA,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,20560.901695,20875.182272,2,96,2,2,0.27,6,6,1,3,0,1,34,NA,NA,1,NA +63902,7,2,1,68,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,143397.643998,144742.519652,2,98,10,10,4.76,2,2,0,0,2,1,68,1,4,1,NA +63903,7,2,1,19,NA,4,4,1,19,235,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13731.625553,14242.275028,1,100,14,14,3.93,3,3,0,1,0,2,47,1,5,4,NA +63904,7,2,2,42,NA,3,3,2,NA,NA,2,NA,2,2,6,NA,4,4,3,1,2,2,1,2,2,NA,NA,NA,NA,113005.700223,113116.666806,1,99,10,10,5,1,1,0,0,0,2,42,2,4,4,NA +63905,7,2,1,23,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,1,1,2,1,NA,NA,NA,NA,14979.624397,15387.451243,1,102,3,2,0.73,2,1,0,0,0,1,24,2,4,5,NA +63906,7,2,1,8,NA,4,4,2,8,98,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11324.954668,11525.075349,1,93,4,4,1.03,3,3,1,1,0,2,35,2,3,4,NA +63907,7,2,1,60,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,11492.781131,11582.681279,1,100,15,14,5,2,1,0,0,2,1,60,1,5,6,NA +63908,7,2,1,55,NA,1,1,2,NA,NA,2,NA,2,1,7,NA,2,1,NA,2,2,2,2,2,2,2,2,2,2,22446.308035,22116.943066,2,94,7,7,1.33,6,6,0,1,0,1,55,2,2,1,1 +63909,7,2,2,62,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,43972.04458,44690.47559,2,96,3,3,0.95,2,2,0,0,2,2,62,1,4,1,4 +63910,7,1,2,41,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,1,3,1,2,2,1,2,2,NA,NA,NA,NA,134076.17118,0,1,98,15,15,5,3,3,0,0,0,2,41,1,5,1,NA +63911,7,2,2,67,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,10083.559248,10533.779383,1,96,9,9,4.92,1,1,0,0,1,2,67,1,4,3,NA +63912,7,2,1,28,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,11477.00145,11789.46785,2,100,12,5,1.88,5,1,0,0,0,1,22,NA,NA,5,NA +63913,7,2,1,18,NA,2,2,2,18,223,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14081.782012,14391.713696,2,90,2,2,0.25,5,5,0,1,0,2,41,2,4,1,NA +63914,7,2,2,7,NA,1,1,1,7,88,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15962.145468,16535.37648,2,98,9,9,2.6,4,4,0,2,0,1,30,1,2,1,2 +63915,7,2,2,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,20048.680628,20209.076405,2,95,7,7,1.13,6,6,0,3,1,1,52,1,4,1,4 +63916,7,2,1,34,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,19788.748292,21708.160855,1,94,4,4,0.59,5,5,0,3,0,1,34,1,2,1,4 +63917,7,2,1,71,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,14055.866747,14300.892374,2,93,5,5,1.32,2,2,0,0,2,1,71,2,4,1,1 +63918,7,2,1,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,152858.509804,157853.944421,1,95,12,12,NA,3,3,0,0,0,1,49,1,5,1,NA +63919,7,2,1,2,NA,1,1,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11160.282155,11957.436882,2,97,2,2,0.27,3,3,2,0,0,2,19,1,3,NA,NA +63920,7,2,2,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,1,1,2,2,1,2,2,1,2,2,1,104934.725755,109561.954446,2,98,10,10,4.42,2,2,0,0,0,1,31,1,4,1,3 +63921,7,2,1,3,NA,4,4,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8402.233801,8920.908511,2,101,1,1,0.1,6,6,1,2,1,2,27,1,2,1,2 +63922,7,2,2,38,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,5,2,1,2,2,1,2,2,1,2,2,3,14767.290807,18767.43866,2,90,5,5,1.43,2,2,0,1,0,2,38,2,4,5,NA +63923,7,2,2,52,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,13728.308948,13391.816234,2,90,15,15,5,4,4,0,0,0,1,57,2,5,1,5 +63924,7,2,2,29,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,4,2,1,2,2,1,2,2,1,2,2,1,18730.678506,18151.501185,2,96,4,4,0.57,6,6,0,3,0,2,29,1,3,4,NA +63925,7,1,2,32,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,6,3,1,2,2,1,2,2,NA,NA,NA,NA,18933.643351,0,1,91,6,6,1.13,6,6,1,3,0,1,40,1,4,6,NA +63926,7,2,1,34,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,21317.283165,21013.422419,2,96,5,5,0.67,6,6,1,2,1,1,34,1,4,1,4 +63927,7,2,1,37,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,2,2,1,1,2,1,2,2,2,2,34997.800447,34733.660984,2,96,3,3,0.46,5,5,1,2,0,1,37,1,1,1,2 +63928,7,2,2,42,NA,4,4,2,NA,NA,2,NA,2,1,8,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,22754.478969,25596.207975,1,96,7,7,3.58,1,1,0,0,0,2,42,2,5,5,NA +63929,7,2,2,34,NA,2,2,2,NA,NA,2,NA,2,1,99,NA,4,1,1,1,2,2,2,2,2,1,2,2,1,28899.648059,29089.927914,1,96,15,15,5,4,4,0,2,0,1,36,2,3,1,4 +63930,7,2,1,4,NA,5,7,2,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7666.445036,8598.756895,1,96,6,6,1.34,3,3,1,0,0,2,42,2,4,6,NA +63931,7,1,2,32,NA,5,6,NA,NA,NA,2,NA,2,1,4,NA,3,2,3,1,2,2,1,2,2,NA,NA,NA,NA,20584.427267,0,1,95,6,6,1.34,4,4,0,2,0,2,32,2,3,2,NA +63932,7,2,1,1,13,1,1,1,NA,14,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15546.999135,15290.852049,2,98,10,10,3.82,3,3,1,0,0,2,33,1,4,1,2 +63933,7,2,2,6,NA,3,3,2,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19880.837381,19632.404233,1,95,2,2,0.22,4,4,2,1,0,2,22,1,2,5,NA +63934,7,2,1,18,NA,2,2,2,18,227,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,26628.917773,31355.430562,1,101,5,5,0.51,7,7,0,3,2,1,75,2,1,1,1 +63935,7,2,1,17,NA,3,3,2,17,209,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,20991.942732,21394.366957,1,91,3,3,0.62,3,3,0,1,0,2,55,1,4,4,NA +63936,7,2,1,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,110819.46221,119829.247006,2,96,10,7,3.67,2,1,0,0,0,1,35,1,5,5,NA +63937,7,2,2,15,NA,4,4,1,16,192,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12744.907234,12578.013387,2,93,7,7,2.78,2,2,0,1,0,2,54,1,4,3,NA +63938,7,2,2,12,NA,4,4,1,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12208.965913,12602.394285,2,100,15,15,4.97,5,5,0,2,1,2,42,1,5,1,5 +63939,7,2,1,66,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,9221.19173,9293.322792,2,95,5,5,1.32,2,2,0,0,2,1,66,1,2,1,4 +63940,7,2,1,15,NA,2,2,2,15,183,NA,NA,1,1,NA,9,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,16033.31661,16386.200415,2,90,10,10,3.13,4,4,1,2,0,2,39,1,5,4,NA +63941,7,2,2,53,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,14680.520497,15280.64157,3,91,9,9,3.24,3,3,0,1,1,1,64,2,2,1,5 +63942,7,2,2,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,NA,NA,NA,NA,145772.192378,148626.818504,1,95,12,12,NA,6,6,2,0,0,2,42,1,2,1,5 +63943,7,2,2,29,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,137368.929197,138597.101074,1,91,8,7,3.21,2,1,0,0,0,1,33,1,5,6,NA +63944,7,2,1,20,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,39031.957066,39875.03294,1,95,77,77,NA,3,3,0,0,0,2,41,1,2,5,NA +63945,7,2,2,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,87611.353116,90505.825807,2,92,10,10,5,1,1,0,0,0,2,29,1,5,5,NA +63946,7,2,1,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,15867.258653,19649.848899,3,91,6,6,1.22,5,5,1,2,0,2,37,1,4,1,2 +63947,7,2,1,1,16,4,4,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6376.965739,6454.74783,2,100,6,6,1.51,3,3,1,0,0,1,29,1,3,1,4 +63948,7,2,2,50,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,24351.694017,24510.535836,1,100,12,14,5,2,1,0,0,0,2,50,1,3,6,NA +63949,7,2,2,50,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,18295.488967,19299.909712,2,103,12,12,NA,4,4,0,1,0,2,50,2,3,1,4 +63950,7,2,2,28,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,22531.325817,21485.967466,2,91,15,15,5,4,4,0,0,1,1,61,NA,NA,1,2 +63951,7,2,1,14,NA,4,4,2,14,178,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11806.79775,12091.578602,1,91,15,15,5,6,6,1,2,0,2,42,2,5,1,5 +63952,7,2,2,10,NA,3,3,1,10,123,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,63812.58912,64724.869865,3,91,15,15,5,2,2,0,1,0,1,42,1,5,2,NA +63953,7,2,2,31,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,1,1,2,2,1,2,2,1,2,2,1,39561.667842,38764.112139,3,92,7,7,1.49,5,5,0,2,1,2,62,1,4,2,NA +63954,7,2,1,41,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,17249.311662,20609.214634,2,91,6,6,1.57,3,3,0,1,0,1,41,2,3,1,3 +63955,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,128294.718377,130390.843426,1,93,15,12,NA,4,1,0,0,3,1,80,1,5,2,NA +63956,7,2,2,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,87648.893016,94490.113574,2,91,15,15,5,4,4,2,0,0,2,33,1,5,1,5 +63957,7,2,2,59,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,19676.781212,23812.59189,1,103,7,7,1.33,5,5,0,1,1,1,28,1,2,1,3 +63958,7,2,2,60,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,3,2,NA,2,2,2,1,2,2,2,2,1,2,11851.128358,12729.036357,2,93,6,6,0.64,7,7,2,1,3,2,60,2,3,2,NA +63959,7,2,1,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,27356.080541,28129.563738,1,101,6,6,1.17,4,4,0,1,0,1,41,1,3,6,NA +63960,7,2,1,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25651.892165,25126.271361,1,97,6,6,1.41,3,3,0,1,0,2,51,1,4,5,NA +63961,7,2,2,0,9,1,1,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6699.512423,6879.514499,3,91,15,14,4.03,5,4,2,0,0,1,42,2,4,1,5 +63962,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,54095.581484,0,2,91,6,6,2.24,1,1,0,0,1,2,80,1,3,2,NA +63963,7,2,1,72,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,83244.241244,88112.194036,2,102,15,15,5,2,2,0,0,2,1,72,1,5,1,NA +63964,7,2,1,62,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,85786.890667,86591.455494,2,96,15,15,5,3,2,0,0,1,1,62,1,5,1,2 +63965,7,2,1,72,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,19115.0887,22519.216384,2,98,6,6,1.26,4,4,0,1,2,2,63,1,3,1,3 +63966,7,2,2,34,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,NA,NA,NA,NA,26426.249254,26363.744856,1,99,7,1,0.24,2,1,0,0,0,2,38,1,5,6,NA +63967,7,2,2,24,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,2,1,2,2,1,2,2,NA,NA,NA,NA,16929.836231,18556.498474,2,101,8,5,1.84,2,1,0,0,0,2,27,2,5,5,NA +63968,7,2,2,61,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,1,3,NA,2,2,2,1,2,2,2,2,2,2,10310.145764,11073.900839,1,100,13,13,NA,4,4,0,1,1,1,26,2,3,5,NA +63969,7,2,1,8,NA,4,4,1,8,106,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9173.220503,9355.60378,2,96,4,4,0.57,6,6,0,3,0,2,29,1,3,4,NA +63970,7,2,2,9,NA,5,6,1,10,120,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10235.530383,11121.127441,1,100,7,7,2.51,2,2,0,1,0,2,37,2,5,3,NA +63971,7,2,2,36,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,3,1,2,1,2,1,1,2,2,1,2,1,NA,21470.176619,21617.60214,1,98,7,7,2.71,2,2,0,0,0,1,43,1,3,1,3 +63972,7,2,1,15,NA,3,3,2,15,189,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,74123.161671,78732.269389,1,93,15,15,4.59,4,4,0,1,0,1,57,1,5,1,5 +63973,7,2,2,15,NA,5,6,2,15,190,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12224.9472,12547.183925,1,97,14,14,2.72,7,7,0,2,0,1,40,1,5,1,5 +63974,7,2,1,53,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,139662.869107,140583.8015,1,100,15,15,5,3,3,0,0,0,1,53,1,5,1,4 +63975,7,2,2,6,NA,4,4,1,6,80,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9309.947844,9472.353128,2,98,2,2,0.31,4,4,2,1,0,2,27,1,2,4,NA +63976,7,2,1,1,13,4,4,1,NA,14,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8022.100831,8575.148723,2,102,7,7,1.53,5,5,1,3,0,2,36,1,5,5,NA +63977,7,2,2,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,18070.666316,17713.108755,1,99,6,6,1.46,3,3,0,0,1,2,80,NA,NA,2,NA +63978,7,2,1,12,NA,2,2,1,12,146,NA,NA,2,2,1,6,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15674.964193,15512.049412,2,93,9,9,1.49,7,7,0,3,0,2,41,2,5,1,5 +63979,7,2,1,8,NA,3,3,2,8,105,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,65374.972543,69123.624578,1,90,15,15,5,4,4,0,2,0,1,37,1,5,1,5 +63980,7,2,2,6,NA,4,4,2,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7814.742747,8095.069503,2,95,7,7,2.64,2,2,0,1,0,1,48,1,2,77,NA +63981,7,2,2,6,NA,3,3,1,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,43528.185872,43349.34959,1,92,8,8,1.45,6,6,1,3,0,1,36,1,3,1,4 +63982,7,2,2,69,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,41983.304745,42669.243011,1,101,7,7,2.31,3,2,0,0,1,2,69,1,4,2,NA +63983,7,2,1,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,94698.084211,97410.286035,1,101,5,5,0.89,5,5,1,2,0,1,31,1,2,1,1 +63984,7,2,1,24,NA,5,7,2,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,33942.93341,34470.44913,3,91,6,6,2.04,2,2,0,0,0,1,24,2,4,5,NA +63985,7,2,1,5,NA,2,2,2,5,70,NA,NA,2,2,2,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12403.412256,12420.172189,2,90,2,2,0.32,4,4,1,2,0,2,34,2,1,77,NA +63986,7,2,1,75,NA,1,1,2,NA,NA,2,NA,2,1,9,NA,1,1,NA,2,2,2,1,2,2,2,1,2,NA,16607.774808,17550.450209,1,101,3,3,0.93,2,2,0,0,2,1,75,2,1,1,1 +63987,7,2,1,0,1,4,4,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6238.040326,6448.58866,1,93,10,10,2.91,4,4,2,0,0,2,27,1,5,1,4 +63988,7,2,1,4,NA,1,1,1,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,16775.083123,16797.750213,2,98,2,2,0.27,4,4,2,0,0,2,20,2,2,6,NA +63989,7,2,2,53,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,31785.728924,32871.467684,1,101,3,3,0.92,2,2,0,1,0,2,53,1,5,3,NA +63990,7,2,1,17,NA,5,6,2,17,204,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6666.045669,7317.485505,3,90,13,13,NA,3,3,0,2,0,2,41,2,3,4,NA +63991,7,2,2,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,NA,NA,NA,NA,22707.329726,23247.668083,2,97,2,2,0.27,4,4,0,2,0,1,51,1,2,4,NA +63992,7,2,1,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,19696.958879,19993.739669,2,95,1,1,0.4,1,1,0,0,0,1,44,1,4,5,NA +63993,7,2,1,8,NA,1,1,1,8,103,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,14007.413517,3,92,7,7,1.41,5,5,1,2,0,1,40,1,3,1,4 +63994,7,2,2,9,NA,4,4,1,9,115,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12120.418061,13050.526646,2,101,1,1,0.21,4,4,1,2,0,2,26,1,3,5,NA +63995,7,2,1,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,16851.334496,16987.280633,2,95,1,1,0,1,1,0,0,0,1,56,1,2,2,NA +63996,7,2,1,35,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,19478.738759,23024.340691,1,90,7,7,2.1,3,3,0,1,0,1,35,1,3,6,NA +63997,7,2,2,79,NA,3,3,2,NA,NA,2,NA,2,2,6,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,27102.582153,29013.853259,1,93,13,13,NA,1,1,0,0,1,2,79,2,1,2,NA +63998,7,2,2,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,29448.834066,32687.247837,1,97,9,9,5,1,1,0,0,0,2,48,1,5,5,NA +63999,7,2,2,44,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,14254.989855,14385.318229,1,103,15,15,3.7,5,5,0,2,1,1,55,1,5,1,5 +64000,7,2,1,31,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,NA,NA,NA,1,2,2,1,17879.023129,20112.694262,2,90,NA,NA,NA,1,1,0,0,0,1,31,1,3,5,NA +64001,7,2,1,7,NA,4,4,1,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13423.881856,14062.200951,2,101,5,5,0.89,4,4,1,1,1,2,38,1,4,77,NA +64002,7,2,1,80,NA,3,3,2,NA,NA,2,NA,2,1,9,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,31039.556381,33443.834712,1,96,15,15,5,2,2,0,0,2,2,80,1,5,1,5 +64003,7,2,2,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,138322.767578,143283.052066,1,95,14,14,5,2,2,0,0,0,1,54,1,4,1,3 +64004,7,2,2,0,10,1,1,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,6331.376224,6501.48724,2,97,4,4,0.6,6,6,2,2,0,1,35,2,2,6,NA +64005,7,2,2,12,NA,1,1,1,12,152,NA,NA,2,2,3,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18515.058419,19512.090709,2,96,5,5,0.76,5,5,1,2,0,1,44,2,1,1,3 +64006,7,2,2,9,NA,5,7,1,9,116,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19608.444487,19549.535258,2,98,3,3,0.38,5,5,0,4,0,2,39,1,4,5,NA +64007,7,1,1,0,7,1,1,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,7530.249163,0,2,102,12,12,NA,6,6,1,0,0,2,53,1,4,1,1 +64008,7,2,1,4,NA,4,4,2,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8123.332359,8458.989525,1,96,14,14,2.58,6,6,2,2,0,1,40,2,4,1,4 +64009,7,2,1,32,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,18666.270458,19921.991676,2,93,77,77,NA,2,2,0,0,0,2,28,2,5,1,5 +64010,7,1,1,3,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13345.162299,0,2,102,14,14,4.86,3,3,1,0,0,1,30,1,5,1,5 +64011,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,27118.033816,33710.198359,1,94,5,5,1.39,2,2,0,0,2,1,60,1,1,5,NA +64012,7,2,2,16,NA,3,3,2,17,205,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,27482.814969,29965.467716,2,95,7,7,1.13,6,6,0,3,1,1,52,1,4,1,4 +64013,7,2,1,80,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,8606.120537,9222.025464,1,93,2,2,0.69,1,1,0,0,1,1,80,2,5,2,NA +64014,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,26141.606772,25260.281919,1,92,9,6,2.24,3,1,0,0,0,2,21,NA,NA,5,NA +64015,7,2,1,32,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,45691.377881,46792.745533,3,91,14,14,5,2,2,0,0,0,1,32,1,4,6,NA +64016,7,2,2,0,8,2,2,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5234.186769,5271.49441,2,90,3,3,0.46,5,5,3,0,0,2,22,1,2,5,NA +64017,7,2,2,24,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,128585.755579,132183.659765,2,97,9,9,4.08,2,2,0,0,0,2,24,1,4,1,3 +64018,7,2,1,35,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,18289.793332,18322.51979,1,99,10,10,3.13,4,4,0,2,0,1,35,1,4,1,5 +64019,7,2,2,10,NA,2,2,2,10,123,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,13154.122703,13626.512275,1,93,9,9,2.46,4,4,0,2,0,1,35,2,1,1,1 +64020,7,2,2,17,NA,5,6,2,17,209,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11225.761275,11659.847432,2,91,8,8,2.34,4,4,0,2,0,1,56,2,5,1,5 +64021,7,2,2,62,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,13057.178942,13648.591881,1,102,10,10,4.42,2,2,0,0,2,2,62,1,5,1,4 +64022,7,2,2,27,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,14516.765165,13843.248577,3,90,15,15,4.34,4,4,0,0,1,1,65,1,3,1,4 +64023,7,2,1,80,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,14200.083364,15006.095575,2,98,99,99,NA,1,1,0,0,1,1,80,1,1,2,NA +64024,7,2,2,62,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,94637.019439,96511.799704,1,94,4,4,1.71,1,1,0,0,1,2,62,1,4,3,NA +64025,7,2,1,31,NA,3,3,1,NA,NA,2,NA,2,1,7,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,88274.617148,90802.847573,2,92,15,14,5,2,1,0,0,0,2,29,1,5,6,NA +64026,7,2,2,29,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,18097.801029,17207.598344,2,97,NA,99,NA,7,6,2,1,1,2,56,1,3,5,NA +64027,7,2,2,5,NA,3,3,1,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25864.922734,27487.163063,1,95,7,2,0.35,5,4,1,2,0,1,26,1,4,6,NA +64028,7,2,1,11,NA,2,2,2,11,142,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,9390.522479,9862.720437,2,90,1,1,0.14,2,2,0,1,0,2,36,1,3,3,NA +64029,7,2,1,22,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,NA,NA,NA,1,2,2,1,39915.513053,41236.907607,2,98,7,7,1.53,5,5,0,0,0,2,48,1,3,5,NA +64030,7,2,1,6,NA,2,2,1,6,78,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,14821.597351,14686.992722,3,92,6,6,0.86,7,7,1,4,0,2,36,2,1,1,1 +64031,7,2,2,19,NA,1,1,1,19,233,2,NA,1,1,NA,13,NA,NA,NA,2,2,2,2,2,2,1,2,2,NA,16781.078148,17658.768186,2,103,77,77,NA,5,5,0,2,0,2,45,2,4,5,NA +64032,7,2,1,46,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,36319.60275,35786.668567,1,103,8,8,4.48,1,1,0,0,0,1,46,1,4,3,NA +64033,7,2,2,64,NA,5,6,2,NA,NA,2,NA,2,2,1,NA,5,1,NA,1,2,1,1,2,2,1,2,1,3,14102.354333,14646.654863,3,91,7,7,2.16,3,3,1,0,1,2,36,2,5,1,NA +64034,7,2,2,62,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,13473.304889,14578.166065,2,96,5,5,0.68,6,6,0,3,2,1,60,2,1,1,1 +64035,7,2,1,8,NA,2,2,1,8,98,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,1,2,2,NA,15039.041447,15306.786696,3,91,5,5,0.89,4,4,0,2,2,1,61,2,3,1,4 +64036,7,2,1,69,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,28953.774534,28658.225591,1,98,6,5,1.93,2,1,0,0,2,1,69,1,4,6,NA +64037,7,2,1,28,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,12538.868652,13740.340257,2,95,15,15,5,4,1,0,0,0,1,29,NA,NA,99,NA +64038,7,2,1,4,NA,4,4,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8325.511113,9180.237397,2,99,6,6,1.13,4,4,1,1,0,1,33,1,3,6,NA +64039,7,2,2,0,4,4,4,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3689.415307,3729.699375,2,99,13,13,NA,4,4,1,0,0,2,50,1,2,1,9 +64040,7,2,1,31,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,19185.622388,19060.093906,2,95,5,1,0,3,1,0,0,0,1,31,1,4,5,NA +64041,7,2,1,9,NA,4,4,1,9,109,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,10122.702296,10692.492997,2,100,77,77,NA,4,4,0,1,1,2,28,1,3,5,NA +64042,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,16905.961576,17944.772858,2,94,4,4,1.16,2,2,0,0,0,1,39,1,2,1,5 +64043,7,2,1,14,NA,4,4,2,14,177,NA,NA,2,2,4,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11523.287163,11546.069645,1,96,8,8,2.17,4,4,0,2,0,2,45,2,5,4,NA +64044,7,2,2,8,NA,1,1,1,8,107,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,10870.942302,11918.469007,1,103,10,10,2.82,4,4,0,2,0,1,41,2,1,1,1 +64045,7,2,2,4,NA,3,3,1,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,52159.241328,54365.068635,1,100,6,6,1.18,5,5,2,2,0,2,40,1,5,3,NA +64046,7,2,2,39,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,6,2,1,2,2,1,2,2,1,2,2,1,38500.994547,38409.930489,2,91,2,2,0.81,1,1,0,0,0,2,39,1,2,6,NA +64047,7,2,2,34,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,58826.425292,61096.366503,1,102,8,8,2.42,4,4,0,2,0,2,34,1,4,1,3 +64048,7,2,2,54,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,150482.742079,150600.121666,2,98,8,8,3.67,2,2,0,0,0,2,54,1,4,3,NA +64049,7,2,2,16,NA,1,1,1,16,195,NA,NA,2,2,4,10,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,15690.47168,16407.081535,1,102,5,5,0.62,7,7,1,3,0,1,49,2,2,1,1 +64050,7,2,2,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,35469.911999,45030.280097,2,91,3,3,0.86,2,2,0,0,0,2,41,1,4,1,3 +64051,7,2,1,21,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,3,5,NA,1,2,2,NA,NA,NA,1,2,2,1,52571.533452,58252.213979,1,93,4,4,0.92,3,3,0,0,1,1,60,NA,NA,1,4 +64052,7,2,1,60,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,4,NA,2,2,2,1,2,2,2,2,2,2,10004.038848,10557.19122,2,102,8,8,4.13,1,1,0,0,1,1,60,2,1,4,NA +64053,7,2,1,8,NA,1,1,2,8,102,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13285.093011,13346.253362,2,94,6,6,1.15,5,5,1,2,0,1,33,1,2,1,2 +64054,7,2,2,19,NA,4,4,2,19,233,2,NA,1,1,NA,14,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10484.6104,10912.740054,2,99,3,3,0.56,4,4,1,0,0,2,38,1,3,5,NA +64055,7,2,2,59,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,16033.091438,16414.611167,1,96,4,4,1.02,3,3,0,0,0,2,59,1,3,2,NA +64056,7,2,1,18,NA,3,3,2,18,221,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,34847.322278,35515.35986,2,91,6,6,1.26,5,5,0,1,2,2,80,1,4,2,NA +64057,7,2,2,11,NA,3,3,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,38662.840486,39387.514697,2,92,15,15,5,5,5,0,3,0,2,46,1,5,1,5 +64058,7,2,2,50,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,15499.383981,15868.203732,2,99,5,5,1.26,3,3,1,0,0,2,50,2,3,5,NA +64059,7,2,2,68,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,12886.419545,13879.729659,2,97,1,1,0.22,1,1,0,0,1,2,68,1,3,2,NA +64060,7,2,2,2,NA,3,3,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27529.278041,30383.810541,1,92,5,5,1.05,3,3,1,1,0,2,35,1,4,5,NA +64061,7,2,2,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,35126.205635,35329.988956,1,95,1,1,0.17,2,2,0,1,0,2,47,1,2,3,NA +64062,7,2,2,10,NA,4,4,1,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9309.947844,9654.826166,2,98,14,14,3.36,4,4,0,2,0,1,37,1,4,1,4 +64063,7,2,2,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,32161.692893,32052.891523,1,99,2,2,0.78,1,1,0,0,1,2,62,1,4,3,NA +64064,7,2,2,46,NA,5,6,1,NA,NA,2,NA,2,2,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17668.324663,18093.708295,1,102,14,14,4.86,3,3,0,1,0,1,42,1,4,1,5 +64065,7,2,1,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,106195.998472,105923.202819,1,93,15,15,5,2,2,0,0,0,1,46,1,4,1,5 +64066,7,2,2,11,NA,2,2,1,11,142,NA,NA,2,2,3,6,NA,NA,NA,2,1,2,1,2,2,1,2,2,2,15929.068964,16501.112136,2,102,5,5,0.59,7,7,1,3,0,1,37,2,1,6,NA +64067,7,2,1,2,NA,2,2,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,12915.99793,13324.846215,2,102,4,4,0.57,6,6,2,3,0,2,26,2,3,1,NA +64068,7,2,1,10,NA,1,1,1,10,131,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,12231.897958,12180.037346,1,100,7,7,1.3,5,5,0,3,0,1,43,2,2,1,4 +64069,7,2,2,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,74050.783686,75666.677003,1,94,6,6,1.31,3,3,0,0,0,2,46,1,5,6,NA +64070,7,2,2,7,NA,3,3,1,7,89,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,48532.852397,48387.04619,1,101,7,7,1.55,5,5,1,2,0,2,31,1,4,1,2 +64071,7,2,1,67,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,1,11074.62349,11161.252677,2,101,5,5,0.89,4,4,1,1,1,2,38,1,4,77,NA +64072,7,2,2,50,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,2,2,2,1,2,2,1,2,2,2,21828.388259,21942.221447,3,91,6,6,0.89,7,7,1,1,0,1,59,2,1,1,1 +64073,7,2,1,13,NA,4,4,1,13,159,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12972.249316,12937.057154,1,100,15,15,3.7,5,5,0,3,0,1,51,1,5,1,5 +64074,7,1,1,50,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,16287.780872,0,2,100,6,6,2.04,2,2,0,0,0,2,50,1,2,1,3 +64075,7,2,1,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,34099.599202,35074.640491,1,94,2,2,0.42,3,3,0,0,0,2,52,1,4,1,1 +64076,7,2,2,1,21,3,3,2,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,26071.535804,27174.107547,1,93,10,10,2.48,5,5,2,1,0,1,40,2,5,1,5 +64077,7,2,1,45,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,30489.729187,30783.113246,3,92,12,12,NA,3,2,0,0,0,1,45,1,3,1,3 +64078,7,2,1,17,NA,4,4,1,17,206,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,18260.901254,2,101,14,14,4.03,4,4,0,1,0,2,40,1,5,1,5 +64079,7,2,2,25,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,41328.871311,42415.982935,2,93,9,9,2.6,4,4,0,0,0,2,58,2,4,4,NA +64080,7,2,1,0,3,2,2,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7004.278208,7004.40731,1,93,5,5,1.3,3,3,1,1,0,2,28,2,4,5,NA +64081,7,2,1,11,NA,4,4,2,11,143,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12176.538896,12387.770281,2,97,2,2,0.3,4,4,0,2,0,1,42,1,2,6,NA +64082,7,2,1,64,NA,3,3,2,NA,NA,2,NA,2,2,5,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,167711.394252,169284.299948,1,101,10,10,4.63,2,2,0,0,1,1,64,2,3,1,4 +64083,7,2,2,76,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,30634.82105,30961.983694,1,95,7,7,2.16,3,3,0,0,1,1,45,1,3,1,4 +64084,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,33810.591598,40546.042621,3,91,13,13,NA,3,3,0,1,1,1,80,1,3,2,NA +64085,7,2,2,13,NA,5,6,2,13,160,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10164.140113,10557.174707,3,91,15,15,5,4,4,0,2,0,1,44,2,5,1,5 +64086,7,2,2,45,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18040.833018,19102.925896,1,96,9,9,4.1,2,2,0,0,0,2,45,2,5,1,5 +64087,7,2,1,18,NA,1,1,2,18,225,2,NA,2,2,5,14,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,20398.562455,20186.553847,3,92,8,8,2.01,4,4,1,0,0,2,49,2,5,4,NA +64088,7,2,2,9,NA,3,3,1,9,119,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24832.585001,24642.096114,2,101,3,3,0.6,3,3,0,2,0,1,39,1,4,4,NA +64089,7,2,2,54,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16567.527819,16240.172481,1,96,15,15,5,3,3,0,0,0,1,55,1,4,1,5 +64090,7,2,1,18,NA,4,4,2,18,218,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11351.725436,11256.943498,2,95,14,14,3.47,4,4,0,0,0,2,45,1,4,1,4 +64091,7,2,1,53,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,1,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,12274.966216,12700.371312,3,90,5,5,1.05,3,3,0,0,0,1,53,2,1,1,2 +64092,7,2,1,18,NA,3,3,2,18,220,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,71458.892941,71654.216305,2,94,3,3,0.54,4,4,0,1,0,2,48,1,3,1,3 +64093,7,2,2,8,NA,5,6,1,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,1,1,2,1,1,2,2,NA,10387.513218,10891.034934,1,92,15,15,5,4,4,1,1,0,1,38,2,5,1,5 +64094,7,2,1,0,8,4,4,1,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7882.953266,8719.306286,2,96,7,7,1.79,4,4,2,0,0,2,49,1,3,1,3 +64095,7,2,1,37,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,113559.363135,118892.640077,2,102,14,14,4.93,3,3,0,1,0,1,37,1,5,1,5 +64096,7,2,1,10,NA,5,6,2,10,120,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,8246.426933,8701.859504,1,99,7,5,1.84,2,1,0,1,0,1,47,2,4,5,NA +64097,7,2,2,6,NA,4,4,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9139.784234,9841.162002,2,100,2,2,0.33,5,5,0,4,0,2,27,1,3,5,NA +64098,7,2,1,54,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19374.410926,19463.695548,1,96,4,4,0.65,4,4,0,0,0,1,19,1,4,NA,NA +64099,7,2,2,71,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,NA,90234.245265,93002.542962,2,101,7,7,3.9,1,1,0,0,1,2,71,1,5,3,NA +64100,7,1,1,5,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,58116.402634,0,1,97,15,15,5,5,5,2,0,1,1,43,1,5,1,5 +64101,7,2,2,0,5,4,4,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3861.876549,4016.370213,1,96,8,8,1.61,6,6,3,0,0,1,33,2,5,1,4 +64102,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,9240.841805,10026.588852,2,95,13,13,NA,2,2,0,0,1,1,56,1,9,1,1 +64103,7,2,2,12,NA,5,6,1,12,148,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9505.147857,9693.556382,1,92,14,14,2.42,6,6,1,3,0,1,30,1,4,6,NA +64104,7,2,2,4,NA,4,4,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8340.858072,8802.325948,2,103,6,6,1.11,5,5,1,2,0,2,36,1,4,5,NA +64105,7,2,2,8,NA,1,1,1,8,107,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14042.378151,15260.975241,1,102,14,14,4.03,4,4,0,2,0,1,30,1,5,6,NA +64106,7,2,1,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,68184.631488,70137.474393,2,95,4,4,1.34,1,1,0,0,0,1,34,1,4,5,NA +64107,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,65891.955175,72165.622258,1,95,7,7,2.38,2,2,0,0,2,1,80,1,3,1,4 +64108,7,2,2,2,NA,3,3,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18127.266286,20006.89679,2,95,6,6,1.08,4,4,1,1,0,1,39,1,4,1,4 +64109,7,1,2,45,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,80232.348161,0,1,99,77,77,NA,4,4,0,2,0,2,45,1,3,1,NA +64110,7,2,2,6,NA,4,4,2,6,81,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7282.523598,7776.514874,2,99,3,3,0.93,2,2,0,1,0,2,27,1,4,3,NA +64111,7,2,2,33,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,28047.519499,31779.09658,2,90,1,1,0.14,1,1,0,0,0,2,33,1,2,5,NA +64112,7,2,1,14,NA,2,2,1,14,179,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18635.323223,19040.145288,2,103,12,12,NA,4,4,0,1,0,2,50,2,3,1,4 +64113,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,35434.580514,38808.357492,1,95,3,3,0.96,1,1,0,0,1,2,80,1,4,2,NA +64114,7,2,1,9,NA,5,6,1,9,111,NA,NA,2,1,3,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5769.526317,6088.16495,1,100,5,5,0.74,6,6,0,3,0,1,40,2,3,1,4 +64115,7,2,1,8,NA,4,4,2,8,101,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7370.805738,7721.29497,2,99,15,15,4.9,7,7,1,4,0,2,53,1,5,1,5 +64116,7,2,1,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,34099.599202,35276.751365,1,94,7,7,1.65,5,4,0,0,0,1,46,1,4,1,4 +64117,7,2,2,16,NA,3,3,2,16,194,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,60694.411883,64307.712903,1,99,15,15,4.47,4,4,0,2,0,2,43,1,5,1,5 +64118,7,2,2,0,3,1,1,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7782.311913,7756.671982,1,94,7,7,1.56,4,4,2,0,0,1,21,1,4,1,4 +64119,7,2,2,6,NA,3,3,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,48532.852397,48387.04619,1,98,15,15,5,5,5,0,3,0,2,41,1,5,6,NA +64120,7,2,1,77,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,1,4,NA,2,2,2,2,2,2,1,2,2,NA,15301.031416,15853.994551,1,90,99,99,NA,5,5,0,2,1,2,50,2,4,4,NA +64121,7,2,2,34,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,3,2,1,2,2,1,2,2,1,2,2,1,27127.983961,27837.333201,2,90,7,7,1.66,4,4,0,3,0,2,34,1,5,3,NA +64122,7,2,2,67,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,6,NA,2,2,2,2,2,2,1,2,2,2,15876.871857,17178.834476,2,102,NA,13,NA,2,1,0,0,1,1,50,2,1,6,NA +64123,7,2,2,1,21,1,1,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12871.484115,14210.463876,2,102,3,3,0.45,4,4,2,0,0,1,21,2,2,6,NA +64124,7,2,1,6,NA,4,4,1,6,78,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9261.557132,9782.87535,2,100,8,8,1.8,5,5,0,3,0,2,43,1,3,1,3 +64125,7,1,1,4,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,55061.722794,0,2,95,15,15,4.63,5,5,1,2,0,1,32,1,4,1,4 +64126,7,2,2,7,NA,1,1,1,7,92,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,19059.339877,19259.716076,1,95,13,13,NA,5,5,1,2,0,2,34,2,1,1,1 +64127,7,2,2,69,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,18695.172864,19350.637044,2,102,7,7,1.68,5,5,0,0,3,1,70,2,4,1,4 +64128,7,2,1,59,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,124170.603852,126212.047685,1,92,6,6,2.39,1,1,0,0,0,1,59,1,4,3,NA +64129,7,2,2,7,NA,4,4,2,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7588.605543,8170.947433,1,99,4,4,0.53,7,7,3,1,0,2,26,1,1,5,NA +64130,7,2,1,69,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,2,1,NA,1,2,1,1,2,1,1,2,1,1,13977.762704,14745.704028,1,92,77,77,NA,4,4,0,0,2,1,59,2,5,1,5 +64131,7,2,2,79,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,36067.495928,37024.300659,1,95,5,5,1.36,2,2,0,0,1,2,79,1,3,2,NA +64132,7,2,2,41,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,20303.639991,21936.687597,1,97,15,15,4.07,5,5,0,3,0,1,42,2,5,1,5 +64133,7,2,2,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,57151.56429,59304.909373,2,95,8,8,2.17,4,4,1,1,0,1,43,1,4,1,5 +64134,7,2,2,49,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,29670.405171,29084.152013,2,101,10,10,3.89,3,3,0,1,0,2,49,1,4,1,3 +64135,7,2,2,23,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,11206.329484,11903.241195,2,103,15,15,5,3,3,0,0,0,2,52,2,4,1,5 +64136,7,2,1,9,NA,1,1,1,9,118,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12624.747427,12616.351223,2,92,15,15,3.37,7,7,0,4,0,1,42,2,3,1,1 +64137,7,2,2,72,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,1,2,NA,1,2,1,1,2,1,1,2,1,NA,10125.392704,11197.313648,2,91,12,12,NA,7,6,0,4,2,2,72,2,1,2,NA +64138,7,2,1,36,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,20071.705576,20337.925283,3,91,14,14,5,2,1,0,0,0,1,36,1,5,6,NA +64139,7,2,2,32,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,2,6,2,2,2,2,1,2,2,NA,NA,NA,NA,34401.268523,35501.848089,2,97,4,4,0.67,4,4,0,2,0,1,39,2,2,6,NA +64140,7,2,2,16,NA,4,4,1,16,192,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12531.903464,12935.738352,2,100,5,5,1.07,4,4,0,1,0,2,36,1,3,5,NA +64141,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,26344.362464,29482.177939,2,95,4,4,1.46,1,1,0,0,1,1,80,1,1,2,NA +64142,7,2,1,73,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,14537.797299,15440.503889,3,91,77,77,NA,2,2,0,0,2,1,73,1,5,1,5 +64143,7,2,1,58,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,25737.628192,27045.174332,1,94,5,5,0.57,7,7,2,1,0,1,58,2,1,1,1 +64144,7,2,2,2,NA,5,7,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9254.729183,9958.191837,2,98,15,15,4.17,6,6,1,1,0,2,40,1,4,1,4 +64145,7,2,1,2,NA,1,1,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12493.910388,13386.323284,2,98,1,1,0.19,3,3,2,0,0,2,31,1,4,2,NA +64146,7,2,2,47,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,20303.639991,21936.687597,1,97,15,15,5,6,6,0,3,0,1,47,1,5,1,5 +64147,7,2,2,34,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,1,6,2,2,2,2,2,2,2,1,2,2,2,45655.090694,44423.220436,3,92,3,3,0.51,5,5,1,2,0,2,34,2,1,6,NA +64148,7,2,1,2,NA,4,4,2,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6580.937346,6661.20735,2,101,7,7,2.16,3,3,1,0,1,2,64,1,3,2,NA +64149,7,2,1,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,108504.032354,117548.425574,1,92,6,6,1.57,3,3,0,1,0,1,29,1,4,6,NA +64150,7,2,1,8,NA,4,4,1,8,106,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12839.999482,13062.740187,2,96,8,8,1.72,5,5,0,3,0,1,39,1,5,1,4 +64151,7,2,1,21,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,124820.027137,131239.54259,1,91,10,10,3.51,3,3,0,0,1,1,21,1,4,5,NA +64152,7,2,1,14,NA,4,4,1,14,173,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16261.995423,16421.244198,2,96,5,5,1.13,3,3,1,1,0,2,31,1,3,5,NA +64153,7,2,2,11,NA,4,4,2,11,135,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6979.847147,8479.043057,1,99,7,7,1.53,5,5,0,3,0,1,39,1,3,1,3 +64154,7,2,2,59,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,19675.36066,19726.184442,2,96,14,14,4.26,3,3,0,0,0,1,20,1,4,5,NA +64155,7,2,1,11,NA,4,4,1,12,144,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11543.27965,11681.665542,1,98,1,1,0.03,3,3,0,2,0,2,38,1,4,5,NA +64156,7,2,2,8,NA,5,6,1,9,109,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5142.091355,5516.602825,3,91,15,15,5,3,3,0,1,0,2,44,2,5,1,5 +64157,7,2,2,68,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,136832.42119,141624.96623,2,91,12,5,2.03,2,1,0,0,2,2,68,1,4,6,NA +64158,7,2,1,27,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,17420.978407,17095.959542,2,97,7,7,3.67,1,1,0,0,0,1,27,1,4,1,NA +64159,7,2,1,75,NA,1,1,2,NA,NA,2,NA,2,2,9,NA,1,1,NA,2,2,2,1,2,2,2,2,2,NA,17161.371047,18002.626237,1,101,5,5,0.51,7,7,0,3,2,1,75,2,1,1,1 +64160,7,2,1,13,NA,5,6,1,13,167,NA,NA,2,1,2,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11171.449402,11936.033174,1,92,7,7,1.56,4,4,0,2,0,2,38,2,4,6,NA +64161,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,18441.731082,18102.807884,1,96,15,15,5,2,2,0,0,0,2,51,1,5,5,NA +64162,7,2,1,18,NA,3,3,2,18,222,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,35639.306876,36153.844915,2,91,99,99,NA,3,3,0,0,0,1,40,NA,NA,1,4 +64163,7,2,1,48,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,12262.33683,12778.149281,2,100,15,15,5,3,3,0,1,0,1,48,2,5,1,5 +64164,7,2,1,47,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19440.793325,20264.695737,2,95,15,10,3.67,5,3,0,0,0,1,47,1,5,1,3 +64165,7,2,2,6,NA,1,1,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,2,1,1,1,2,1,NA,NA,NA,NA,20495.125801,20710.596821,2,98,3,3,0.4,6,6,1,2,0,2,29,2,1,4,NA +64166,7,2,1,7,NA,2,2,2,7,92,NA,NA,2,2,3,0,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,10490.055059,10676.813132,3,90,4,4,0.63,5,5,0,3,0,1,45,2,4,1,4 +64167,7,2,2,21,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,16929.836231,17652.770039,2,101,1,1,0.14,2,1,0,0,0,2,21,2,4,5,NA +64168,7,2,1,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,96844.935107,100539.350089,2,98,10,10,4.42,2,2,0,0,0,1,31,1,4,1,3 +64169,7,2,1,17,NA,3,3,1,17,207,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,29202.587704,29754.083616,3,92,6,6,0.74,7,7,2,1,0,2,46,1,2,1,4 +64170,7,2,2,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,NA,NA,NA,NA,19337.563688,19353.557903,1,96,6,6,1.21,4,4,2,0,0,1,24,1,4,1,3 +64171,7,2,1,11,NA,3,3,2,11,135,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,25604.034863,28346.339599,1,95,10,6,1.34,5,4,1,2,0,1,32,1,3,6,NA +64172,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,10455.739374,11701.098067,1,101,3,3,0.86,2,2,0,0,2,2,80,1,2,1,1 +64173,7,2,2,38,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,17430.913214,17517.807291,1,90,5,5,1.06,4,4,0,2,0,1,53,1,3,1,4 +64174,7,2,2,18,NA,2,2,2,18,222,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16189.692833,16766.382859,1,93,15,15,2.96,7,7,0,1,1,2,18,1,2,NA,NA +64175,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,35434.580514,39416.921733,1,95,2,2,0.83,1,1,0,0,1,2,80,1,2,2,NA +64176,7,2,1,72,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,10160.645851,10359.669924,1,96,9,9,3.97,2,2,0,0,2,1,72,1,4,1,5 +64177,7,2,1,2,NA,3,3,2,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,22388.62869,26233.531918,2,97,1,1,0.21,4,4,2,0,0,2,34,2,1,1,2 +64178,7,2,1,7,NA,5,6,1,7,88,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6765.693703,7164.978412,2,92,15,15,5,3,3,0,1,0,1,45,2,4,1,4 +64179,7,2,1,52,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,25118.469449,25612.281104,2,98,5,5,1.63,2,2,0,0,0,2,53,2,1,1,1 +64180,7,2,2,31,NA,2,2,2,NA,NA,2,NA,2,1,5,NA,4,1,2,2,2,2,1,2,2,NA,NA,NA,NA,38184.257672,39182.709093,2,91,9,9,2.6,4,4,1,1,0,2,31,2,4,1,5 +64181,7,2,2,56,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,23409.465713,23644.84324,2,97,14,9,5,2,1,0,0,0,2,56,1,3,6,NA +64182,7,2,2,67,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,90638.027317,92118.903864,2,100,6,6,2.04,2,2,0,0,2,1,74,1,4,1,3 +64183,7,2,2,18,NA,1,1,1,18,223,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,27070.679378,27847.54398,1,92,7,7,1.48,5,5,0,1,0,1,42,1,5,1,4 +64184,7,2,1,67,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,36438.408188,36780.151097,1,101,3,3,0.66,2,2,0,0,1,1,67,1,2,3,NA +64185,7,2,2,14,NA,5,6,2,14,170,NA,NA,2,1,4,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11975.458482,12291.118947,1,97,7,7,1.48,5,5,1,2,0,1,40,2,5,1,4 +64186,7,2,1,63,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,1,1,NA,2,2,2,1,2,2,1,2,2,1,9068.437099,9499.757798,2,93,9,9,3.97,2,2,0,0,1,2,57,2,3,1,1 +64187,7,2,1,6,NA,3,3,1,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,66003.625505,68276.88064,1,101,14,14,3.15,5,5,2,1,0,1,35,1,4,1,5 +64188,7,1,1,65,NA,3,3,NA,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,20187.172902,0,2,100,4,4,1.29,2,2,0,0,2,1,65,1,3,1,3 +64189,7,2,1,2,NA,5,6,2,2,33,NA,NA,2,2,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6208.192797,6963.167461,1,91,10,10,3.22,4,4,1,1,0,1,38,2,5,1,5 +64190,7,2,1,0,0,1,1,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7284.164858,7284.299119,2,98,4,4,0.67,4,4,1,0,0,2,40,1,3,3,NA +64191,7,2,1,72,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,3,1,NA,2,2,2,1,2,2,2,2,1,NA,14159.984279,14406.82491,2,93,4,4,0.99,2,2,0,0,2,1,72,2,3,1,4 +64192,7,2,1,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,116464.874823,118336.754883,2,92,15,15,5,6,6,2,0,0,1,18,1,4,NA,NA +64193,7,2,1,37,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,16058.989596,16964.610342,2,97,6,6,1.02,6,6,1,2,0,1,37,1,3,1,3 +64194,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,10346.035773,10598.2543,1,99,15,15,5,2,2,0,0,2,1,67,1,5,1,5 +64195,7,2,1,33,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,19008.083201,19399.303007,2,97,12,12,NA,3,3,0,0,0,1,33,1,4,5,NA +64196,7,2,2,9,NA,5,6,1,9,115,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9211.293091,9657.799007,1,92,14,14,3.47,4,4,0,2,0,1,37,1,5,1,5 +64197,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,1,2,1,2,2,1,1,2,NA,7360.303891,7756.453804,3,90,15,15,4.2,6,6,1,0,2,1,60,1,5,1,4 +64198,7,2,2,76,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,12863.404053,14275.35524,2,90,3,3,0.92,1,1,0,0,1,2,76,1,3,2,NA +64199,7,2,2,2,NA,2,2,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8512.480199,8626.96297,2,90,3,3,0.68,2,2,1,0,0,2,23,1,1,5,NA +64200,7,2,2,3,NA,2,2,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15233.991221,16407.076559,1,98,3,3,0.4,7,7,2,3,0,2,31,2,5,1,2 +64201,7,2,1,20,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,9177.295801,9548.31812,2,92,77,77,NA,4,4,0,0,1,1,20,1,2,5,NA +64202,7,2,1,10,NA,4,4,2,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9298.536372,9483.411197,2,97,6,6,0.92,7,7,1,4,0,2,29,1,3,5,NA +64203,7,2,1,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,15262.313583,16475.41377,1,101,2,2,0.47,3,3,1,0,0,1,35,1,2,6,NA +64204,7,2,2,0,7,3,3,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16547.193167,16560.896108,1,97,12,12,NA,5,5,3,0,0,2,33,1,5,1,5 +64205,7,2,2,1,12,4,4,2,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7556.585831,7751.634132,1,96,77,77,NA,3,3,1,0,0,2,39,1,7,5,NA +64206,7,2,2,69,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,12886.419545,13879.729659,2,101,3,3,1.19,1,1,0,0,1,2,69,1,2,2,NA +64207,7,2,2,28,NA,2,2,2,NA,NA,2,NA,2,2,2,NA,2,1,2,1,2,2,1,2,2,1,2,1,NA,45798.520132,47350.399021,2,91,14,14,4.71,3,3,0,0,0,1,28,2,1,1,2 +64208,7,2,1,15,NA,1,1,1,15,182,NA,NA,2,2,4,8,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,14432.845547,14415.592261,2,103,10,10,1.63,7,7,1,4,0,1,31,NA,NA,1,4 +64209,7,2,1,11,NA,2,2,1,11,133,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,14820.807433,14943.02937,2,102,5,5,0.89,4,4,0,3,0,2,44,2,2,4,NA +64210,7,2,1,3,NA,4,4,2,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8414.846149,8934.299427,1,90,7,7,1.3,5,5,1,2,1,2,62,1,2,2,NA +64211,7,2,2,1,20,1,1,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11582.174418,11621.723611,2,102,8,8,1.28,7,7,1,3,0,1,39,2,1,1,3 +64212,7,2,2,8,NA,5,7,1,8,107,NA,NA,2,1,3,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8128.755281,8532.236543,1,98,9,9,2.39,4,4,0,2,0,2,48,1,5,1,5 +64213,7,2,1,37,NA,1,1,2,NA,NA,2,NA,2,2,77,NA,2,6,NA,2,2,2,2,2,2,2,2,2,2,34887.439952,38610.51698,2,94,77,77,NA,3,3,0,1,0,2,42,2,2,6,NA +64214,7,2,1,18,NA,1,1,1,18,227,2,NA,2,2,3,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,2,24228.858782,24599.205366,2,102,5,5,1.3,3,3,0,0,0,1,42,NA,NA,1,NA +64215,7,2,2,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,16494.288293,17728.004854,2,101,2,2,0.77,1,1,0,0,1,2,80,1,1,2,NA +64216,7,2,2,2,24,1,1,1,2,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11512.764389,12710.400836,2,98,7,7,2.16,3,3,1,0,0,2,26,1,3,1,3 +64217,7,2,1,53,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,1,2,2,2,2,2,2,2,20592.227875,20667.110628,2,96,6,6,0.77,7,7,2,1,0,1,53,2,1,1,1 +64218,7,2,1,53,NA,4,4,2,NA,NA,1,2,2,1,7,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,14699.320127,15747.67742,2,90,15,15,5,4,4,1,1,0,1,53,2,5,1,5 +64219,7,2,2,80,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,1,5,NA,1,2,2,1,2,2,1,2,2,NA,12863.404053,13555.744544,2,90,14,14,4.25,4,4,0,2,1,2,45,2,5,5,NA +64220,7,2,2,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,99381.891022,101559.2006,2,92,15,6,2.3,4,1,0,0,0,1,27,NA,NA,5,NA +64221,7,2,1,19,NA,1,1,1,19,239,2,NA,1,1,NA,66,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,20634.3158,20949.718537,2,96,6,6,0.77,7,7,2,1,0,1,53,2,1,1,1 +64222,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105902.251482,110971.270582,3,91,10,10,3.51,3,3,0,1,0,1,39,1,5,1,3 +64223,7,2,2,0,1,3,3,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20239.765739,19690.008753,2,94,9,9,2.51,4,4,2,0,0,1,30,2,4,1,4 +64224,7,2,2,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,30275.274308,33292.204257,2,101,2,2,0.64,1,1,0,0,0,2,22,1,4,5,NA +64225,7,2,1,8,NA,3,3,2,8,104,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,50571.965712,52018.055374,2,95,15,15,4.63,5,5,1,2,0,2,36,1,5,1,3 +64226,7,2,1,73,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,NA,73758.836891,83728.230941,2,91,3,3,1.25,1,1,0,0,1,1,73,1,3,5,NA +64227,7,2,2,59,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,4,5,NA,2,2,2,2,2,2,1,2,1,2,24553.193015,24681.235828,2,93,3,3,0.9,1,1,0,0,0,2,59,2,4,5,NA +64228,7,2,1,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,146181.198007,184923.777793,2,91,15,15,5,2,1,0,0,0,1,44,1,3,5,NA +64229,7,2,2,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,32609.153024,32528.629595,1,98,3,3,0.73,3,3,0,0,0,1,52,1,4,1,3 +64230,7,2,2,2,NA,4,4,1,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8296.101892,8926.698178,2,96,7,7,1.49,5,5,2,1,0,1,51,1,5,1,3 +64231,7,2,1,68,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,7820.017944,8252.409456,2,98,7,7,2.1,3,3,0,0,1,1,68,1,2,1,9 +64232,7,2,1,0,6,3,3,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23912.171644,25599.009772,1,94,9,9,3.14,3,3,1,0,0,1,28,1,5,1,5 +64233,7,2,1,14,NA,4,4,1,15,180,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13731.625553,14361.066702,1,100,6,6,1.13,4,4,0,3,0,2,32,1,3,5,NA +64234,7,2,1,29,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,59682.963348,63262.110969,2,102,15,15,3.92,5,5,0,0,0,1,19,1,4,NA,NA +64235,7,2,2,56,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,1,6,NA,1,2,2,1,2,2,2,2,2,2,22224.73066,22874.201587,2,94,12,12,NA,2,2,0,0,0,1,46,1,3,1,1 +64236,7,2,1,5,NA,1,1,1,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11388.091908,11522.723578,1,103,8,8,1.85,5,5,2,1,0,2,25,2,2,1,2 +64237,7,2,2,16,NA,3,3,2,16,200,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,38400.791741,38900.548719,1,92,4,4,0.5,6,6,0,3,0,2,41,1,4,1,NA +64238,7,2,1,0,2,5,6,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,5024.464768,5303.683185,2,92,99,77,NA,7,3,3,3,1,1,61,2,1,1,3 +64239,7,2,1,2,NA,4,4,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5396.442999,5408.183432,1,99,4,4,0.53,7,7,3,1,0,2,26,1,1,5,NA +64240,7,2,1,19,NA,1,1,1,19,233,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,23389.620035,23146.524434,1,94,10,10,2.94,4,4,0,2,0,2,52,1,5,2,NA +64241,7,2,2,0,9,3,3,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6423.658168,6554.989142,2,96,3,3,0.53,5,5,3,0,0,2,26,1,4,1,4 +64242,7,2,2,16,NA,3,3,1,16,199,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,82145.180523,84080.796514,1,98,15,15,5,5,5,0,3,0,2,44,1,5,1,5 +64243,7,2,2,7,NA,3,3,2,7,85,NA,NA,2,1,1,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,52455.416493,51758.830416,1,93,15,15,5,4,4,0,2,0,2,42,1,5,1,NA +64244,7,2,2,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,30888.99461,30919.326232,1,99,2,2,0.61,2,2,0,0,0,1,46,1,5,5,NA +64245,7,2,1,51,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,120041.937453,119895.563372,1,98,9,9,2.88,6,3,1,3,0,1,51,1,2,1,3 +64246,7,2,1,14,NA,2,2,2,14,172,NA,NA,2,1,2,7,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,26616.794908,27940.371621,2,91,7,7,2.64,2,2,0,1,0,1,33,2,3,1,NA +64247,7,2,1,42,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,43470.92932,47764.563168,1,92,5,5,0.87,4,4,0,2,0,1,42,2,1,1,4 +64248,7,2,2,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,NA,NA,NA,NA,67727.881967,67936.122757,2,99,15,15,5,1,1,0,0,0,2,38,1,5,5,NA +64249,7,1,1,11,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,57057.523607,0,1,101,15,15,5,4,4,0,2,0,2,40,1,4,1,3 +64250,7,2,2,3,NA,1,1,2,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,13366.393396,13791.68675,2,94,7,7,1.79,4,4,1,1,0,2,32,1,4,1,4 +64251,7,1,1,4,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7232.559351,0,2,103,14,14,3.86,4,4,2,0,0,2,37,2,5,1,NA +64252,7,2,1,61,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,3,1,NA,2,2,2,1,2,2,1,2,2,2,11019.434708,11234.197915,3,91,5,5,0.89,4,4,0,2,2,1,61,2,3,1,4 +64253,7,2,1,13,NA,5,7,2,13,167,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5388.361335,5758.757236,3,91,15,15,4.47,4,4,0,3,0,2,44,2,5,1,NA +64254,7,2,1,0,8,1,1,1,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6910.953528,6911.08091,2,96,4,4,0.81,3,3,1,0,0,2,37,2,5,1,3 +64255,7,2,1,23,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,4,5,NA,1,2,2,1,2,2,1,2,2,3,13859.220514,14321.474311,1,98,12,14,5,3,1,0,0,0,1,24,1,4,5,NA +64256,7,2,2,67,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,11523.037911,11989.050618,2,97,6,6,2.31,2,2,0,0,1,2,67,1,3,2,NA +64257,7,2,2,23,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,53047.531099,53521.812192,1,99,2,1,0.18,2,1,0,0,0,2,24,1,5,5,NA +64258,7,2,2,13,NA,5,6,1,13,163,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12918.122917,13476.480232,1,92,10,10,2.82,4,4,0,2,0,2,48,2,5,1,5 +64259,7,2,2,35,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,2,1,2,2,2,2,1,2,2,2,2,2,2,35353.005268,36099.35979,2,94,6,6,1.34,4,4,0,2,0,1,37,2,4,1,2 +64260,7,2,2,17,NA,4,4,2,17,214,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11438.127668,11466.298034,2,97,6,6,1.02,6,6,1,2,0,1,37,1,3,1,3 +64261,7,2,1,67,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,1,1,2,2,1,2,1,NA,13639.016686,14339.363246,2,102,15,15,5,5,5,1,0,2,1,30,1,4,1,5 +64262,7,2,2,6,NA,4,4,2,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10225.297464,10592.094499,1,93,2,2,0.32,3,3,0,1,0,1,25,1,3,6,NA +64263,7,2,2,62,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,2,2,2,2,9203.46153,9587.572248,2,97,6,6,1.7,2,2,0,0,1,2,62,2,5,1,2 +64264,7,2,1,14,NA,4,4,2,15,180,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8364.097643,8526.060452,2,90,6,6,0.96,5,5,0,1,0,1,55,1,4,6,NA +64265,7,2,1,48,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,27087.386131,27212.215064,1,101,6,6,1.43,4,4,0,1,2,2,72,1,2,1,NA +64266,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,90274.900586,93718.683628,2,99,15,15,5,2,1,0,0,0,1,31,1,5,6,NA +64267,7,2,2,10,NA,4,4,2,10,129,NA,NA,2,2,3,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7136.421849,7524.490259,1,90,4,4,0.58,6,6,0,3,0,2,21,2,5,5,NA +64268,7,2,2,11,NA,4,4,1,11,135,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12476.900945,13434.365665,2,101,2,2,0.38,3,3,0,2,0,2,56,1,3,2,NA +64269,7,1,1,61,NA,1,1,NA,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,9611.684527,0,1,98,7,7,1.52,4,4,0,1,2,1,61,1,4,1,2 +64270,7,2,2,47,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,27934.372045,28667.797605,1,101,5,5,1.15,3,3,0,1,0,1,49,1,3,1,4 +64271,7,2,2,24,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,45549.853584,49025.946555,3,91,3,3,0.76,3,3,0,1,0,2,24,1,4,6,NA +64272,7,2,2,19,NA,4,4,2,19,239,2,NA,2,2,2,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12209.74498,12386.615954,2,90,6,6,1.34,4,4,1,0,0,1,38,2,4,6,NA +64273,7,2,1,5,NA,4,4,2,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9242.457181,9624.356732,2,97,5,5,0.76,5,5,1,2,0,1,32,1,4,6,NA +64274,7,2,1,0,3,1,1,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,1,1,2,1,NA,NA,NA,NA,7757.493251,7980.837709,2,98,3,3,0.4,6,6,1,2,0,2,29,2,1,4,NA +64275,7,1,1,62,NA,5,6,NA,NA,NA,2,NA,2,1,6,NA,4,4,NA,1,2,2,1,2,2,NA,NA,NA,NA,10288.337394,0,3,91,8,8,4.13,1,1,0,0,1,1,62,2,4,4,NA +64276,7,2,1,50,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,23857.322871,23507.253435,1,103,1,1,0.03,3,3,0,0,0,1,50,1,2,3,NA +64277,7,2,2,48,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,2,6,NA,2,2,2,2,2,2,2,2,2,2,36924.381422,39366.592716,2,102,7,7,1.89,3,3,0,1,0,1,41,2,2,6,NA +64278,7,2,2,18,NA,2,2,2,18,226,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13824.001771,14551.102227,2,90,5,5,1.19,3,3,0,0,0,2,50,2,4,4,NA +64279,7,2,1,11,NA,3,3,2,11,137,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,52386.010737,55389.865547,3,91,14,14,3.4,4,4,0,2,0,1,40,1,4,1,4 +64280,7,2,1,3,NA,4,4,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7014.280192,7545.08247,2,99,15,15,4.9,7,7,1,4,0,2,53,1,5,1,5 +64281,7,2,1,44,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,34000.722046,33736.711001,2,93,14,14,3.52,5,5,1,2,0,1,44,1,5,1,5 +64282,7,2,1,2,NA,4,4,2,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6074.284591,6449.253661,2,99,3,3,0.32,6,6,2,1,1,2,59,1,4,1,NA +64283,7,2,1,5,NA,4,4,1,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9016.053035,9035.668246,2,100,3,3,0.31,7,7,3,2,0,2,28,1,3,1,3 +64284,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,35434.580514,38808.357492,1,101,1,1,0.18,1,1,0,0,1,2,80,1,4,2,NA +64285,7,2,1,7,NA,1,1,2,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16334.967392,17408.274993,1,95,5,5,1.5,2,2,0,1,0,1,47,1,4,2,NA +64286,7,2,2,56,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,16109.317112,16194.502254,1,103,15,15,5,2,2,0,0,1,2,56,2,5,1,5 +64287,7,2,1,7,NA,3,3,2,7,89,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24713.905595,26080.214484,1,98,6,6,0.97,7,7,1,2,0,1,49,1,2,1,2 +64288,7,2,2,41,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,31809.199771,32336.174545,2,103,9,9,3.97,2,2,0,0,1,1,61,1,5,1,3 +64289,7,2,2,0,3,3,3,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23207.538828,22577.170535,1,94,6,6,1.21,4,4,2,0,0,1,27,1,4,1,3 +64290,7,2,2,65,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,10346.035773,10598.2543,1,99,10,10,2.07,7,7,2,3,1,2,35,1,5,4,NA +64291,7,2,1,50,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,22238.49412,25006.164622,2,90,4,4,1.38,1,1,0,0,0,1,50,1,3,3,NA +64292,7,2,1,45,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,21924.03349,22615.591521,3,92,2,2,0.65,2,2,0,0,1,2,80,NA,NA,2,NA +64293,7,2,2,4,NA,3,3,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,53166.229434,56500.79967,1,101,7,7,1.55,5,5,1,2,0,2,31,1,4,1,2 +64294,7,2,1,19,NA,2,2,2,19,234,2,NA,2,1,4,15,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,17149.002427,17445.048827,3,90,77,77,NA,4,3,0,0,0,1,45,2,3,3,NA +64295,7,2,1,10,NA,3,3,1,11,133,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,NA,NA,NA,1,2,2,1,69959.431608,85622.357277,2,100,NA,NA,NA,5,5,1,2,0,1,36,NA,NA,3,NA +64296,7,2,2,40,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,2,1,2,1,2,2,1,2,2,NA,NA,NA,NA,33716.655399,36032.83001,1,100,8,8,2.17,4,4,1,1,0,2,40,2,2,1,2 +64297,7,1,2,9,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,15002.25457,0,1,91,7,7,2.72,2,2,0,1,0,2,31,1,2,5,NA +64298,7,2,2,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,10991.458112,11482.21496,2,99,10,10,4.76,2,2,0,0,2,1,80,1,2,2,NA +64299,7,2,1,65,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,95606.814645,96503.477057,2,98,15,15,5,2,2,0,0,1,2,52,1,4,1,4 +64300,7,1,2,6,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11035.074625,0,2,100,1,1,0.04,4,4,0,2,0,1,34,NA,NA,6,NA +64301,7,2,2,73,NA,5,7,1,NA,NA,2,NA,2,1,8,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,12813.709202,13258.116643,2,103,12,12,NA,2,2,0,0,2,1,73,2,5,1,4 +64302,7,2,1,45,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,136880.768184,139080.782402,1,94,15,15,4.95,4,4,0,0,2,1,72,1,3,1,3 +64303,7,1,2,77,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,81395.297807,0,1,92,4,4,1.61,1,1,0,0,1,2,77,1,3,2,NA +64304,7,2,2,0,5,1,1,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8359.077295,8583.668456,3,92,5,5,0.81,5,5,3,0,0,2,23,1,4,5,NA +64305,7,2,2,41,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,14831.744106,14468.205243,3,90,10,10,3.67,3,3,0,1,0,2,52,2,3,5,NA +64306,7,2,2,2,NA,4,4,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7826.463896,8746.031629,1,96,6,6,1.21,4,4,2,0,0,1,24,1,4,1,3 +64307,7,2,2,24,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,102855.726146,106395.504024,1,101,7,7,1.74,4,4,1,0,0,1,24,NA,NA,1,4 +64308,7,2,2,31,NA,2,2,1,NA,NA,2,NA,2,2,77,NA,1,6,2,2,2,2,1,2,2,2,2,2,2,32097.38481,33124.257579,2,100,3,3,0.76,3,3,1,0,0,2,31,2,1,6,NA +64309,7,2,2,78,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,53541.401974,54961.757487,1,99,13,13,NA,2,2,0,0,2,1,80,1,2,1,3 +64310,7,2,2,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,163194.688032,170363.320524,1,97,15,15,5,2,2,0,0,1,2,49,1,5,1,3 +64311,7,2,1,54,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,17636.923031,18334.929229,2,97,15,14,5,2,1,0,0,0,1,54,1,4,5,NA +64312,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,42993.150248,46481.579201,1,92,7,7,2.64,2,2,0,0,2,2,68,1,4,1,4 +64313,7,2,1,65,NA,1,1,1,NA,NA,2,NA,2,1,8,NA,3,6,NA,2,2,2,2,2,2,2,2,2,2,14067.170863,14736.245577,2,102,14,8,4.59,2,1,0,0,2,1,65,2,3,6,NA +64314,7,2,1,6,NA,1,1,2,6,73,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13285.093011,13495.322435,2,94,6,6,1.5,4,4,0,2,0,1,44,2,2,1,2 +64315,7,2,2,52,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,147501.330925,150787.451601,1,101,6,6,1.31,3,3,0,0,1,2,80,1,1,2,NA +64316,7,2,2,12,NA,4,4,1,12,155,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,6,6,1.16,4,4,0,3,0,2,36,1,4,4,NA +64317,7,2,2,18,NA,3,3,1,18,222,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,32725.110579,34226.88515,2,92,3,2,0.64,2,1,0,0,0,2,18,1,4,NA,NA +64318,7,2,2,14,NA,4,4,2,14,176,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13790.410898,14648.625582,1,93,7,7,1.79,4,4,0,2,0,1,53,2,4,1,4 +64319,7,2,2,14,NA,5,6,2,14,174,NA,NA,2,1,4,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8675.8731,8857.654394,1,96,15,15,5,4,4,0,2,0,2,41,2,5,1,5 +64320,7,2,1,14,NA,4,4,2,14,171,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17606.165994,18413.211403,2,101,13,3,0.64,5,4,0,3,1,2,62,1,1,2,NA +64321,7,2,1,12,NA,3,3,1,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,90320.117415,93226.168932,1,100,15,15,5,4,4,0,2,0,2,47,1,5,1,5 +64322,7,2,1,8,NA,3,3,1,8,107,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,41342.668304,42524.849071,1,102,8,8,2.42,4,4,0,2,0,2,34,1,4,1,3 +64323,7,2,1,3,NA,1,1,1,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17865.135763,18076.339981,3,92,1,1,0,5,5,3,0,0,1,26,1,2,1,2 +64324,7,2,2,28,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,1,1,2,2,2,2,2,2,2,2,2,2,2,38364.674202,40512.730459,2,97,13,13,NA,3,3,0,1,0,1,28,2,2,1,1 +64325,7,2,1,44,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,1,2,1,2,2,1,2,2,NA,108408.375382,111473.582646,2,98,14,14,4.16,3,3,0,0,0,1,49,1,5,1,4 +64326,7,2,1,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,90303.174138,94631.352733,2,92,14,9,5,2,1,0,0,0,1,29,1,5,6,NA +64327,7,2,1,9,NA,3,3,2,9,113,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,30554.050397,31755.009616,1,97,3,3,0.93,2,2,0,1,0,2,34,1,4,5,NA +64328,7,2,1,78,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,13523.396212,13874.942566,2,94,2,2,0.56,2,2,0,0,2,2,71,2,2,1,1 +64329,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,34049.658577,35975.208577,1,91,9,9,4.27,2,2,0,0,2,1,80,1,4,1,3 +64330,7,2,1,43,NA,5,7,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,126789.52929,129656.862785,1,101,15,15,5,4,4,0,2,0,1,43,1,4,1,5 +64331,7,2,1,16,NA,3,3,1,16,197,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,59545.101745,58762.542429,1,102,8,8,1.6,7,7,0,4,0,2,39,1,4,1,4 +64332,7,2,1,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,17579.006909,17715.920575,2,103,2,1,0.43,2,1,0,0,0,1,20,1,3,6,NA +64333,7,2,1,46,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,3,3,NA,1,2,2,1,2,2,1,2,2,3,20197.354438,20825.605262,2,96,5,5,1.84,2,1,0,0,1,1,46,2,3,3,NA +64334,7,2,1,6,NA,2,2,2,6,80,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11477.372934,11658.996164,1,93,9,9,2.46,4,4,0,2,0,1,35,2,1,1,1 +64335,7,2,2,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,6,2,1,2,2,NA,NA,NA,1,2,2,1,43813.24867,46884.227528,1,98,NA,NA,NA,4,4,0,2,0,1,31,NA,NA,1,2 +64336,7,2,1,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,19869.518693,21796.765593,1,94,7,7,1.21,6,6,2,2,0,1,31,1,2,6,NA +64337,7,2,1,21,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,42894.724338,43561.362107,1,98,3,3,0.93,2,2,0,0,0,1,21,1,4,5,NA +64338,7,1,2,46,NA,1,1,NA,NA,NA,2,NA,2,2,6,NA,1,6,NA,2,2,2,1,2,2,NA,NA,NA,NA,40337.933888,0,2,94,5,5,0.65,6,6,0,2,0,1,53,NA,NA,6,NA +64339,7,2,1,6,NA,2,2,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12604.990052,13551.632975,1,103,7,7,1.03,7,7,0,3,0,1,50,2,1,1,1 +64340,7,1,1,80,NA,3,3,NA,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,9443.12441,0,1,99,77,77,NA,2,2,0,0,2,1,80,1,5,1,4 +64341,7,2,2,51,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,2,1,NA,2,2,2,1,2,2,2,2,1,2,23200.373382,24548.135184,2,93,7,7,2.1,3,3,0,1,0,2,51,2,2,1,NA +64342,7,2,1,12,NA,3,3,2,12,146,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,103364.662244,103614.262032,1,98,14,14,3.15,5,5,0,3,0,1,34,1,4,1,4 +64343,7,2,1,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,84821.217572,87190.888968,2,91,15,15,4.2,6,6,2,0,2,1,63,1,1,1,3 +64344,7,2,1,37,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,28213.419452,30402.035877,1,100,14,5,1.79,2,1,0,0,0,1,37,1,2,6,NA +64345,7,2,1,21,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,21763.209029,22385.504537,2,92,4,1,0.18,4,1,0,0,0,1,21,1,4,5,NA +64346,7,2,1,3,NA,5,7,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8402.098771,9423.873047,3,91,14,14,4.32,3,3,1,0,0,1,31,2,3,1,4 +64347,7,2,1,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,32720.69734,32838.149884,1,95,1,1,0,1,1,0,0,0,1,50,1,3,5,NA +64348,7,2,2,13,NA,3,3,1,13,158,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,105891.533689,107879.366927,1,101,8,8,1.85,5,5,0,3,0,1,41,1,3,1,4 +64349,7,2,2,24,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,NA,NA,NA,1,2,2,1,11475.373333,11965.391974,2,92,12,NA,NA,7,1,0,0,2,1,53,2,3,1,3 +64350,7,2,1,12,NA,4,4,2,12,145,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11351.725436,12116.15399,2,95,7,7,1.55,5,5,0,3,0,1,30,1,4,1,4 +64351,7,2,1,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,18353.275855,19744.388768,1,91,4,4,0.81,4,4,1,1,0,1,32,1,4,6,NA +64352,7,2,2,2,NA,4,4,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6085.457443,6422.142601,1,99,7,7,1.53,5,5,2,0,0,2,37,1,4,1,3 +64353,7,2,1,0,11,2,2,2,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7973.737542,8118.415368,1,91,7,7,1.66,4,4,2,0,0,1,32,2,5,1,4 +64354,7,2,2,80,NA,2,2,1,NA,NA,2,NA,2,1,4,NA,1,2,NA,2,2,2,1,2,2,1,2,2,NA,21122.17432,23417.039872,2,93,6,6,0.64,7,7,2,1,3,2,60,2,3,2,NA +64355,7,1,2,27,NA,2,2,NA,NA,NA,2,NA,1,1,NA,NA,4,6,3,1,2,2,2,2,2,NA,NA,NA,NA,42583.505439,0,2,91,6,6,1.3,4,4,1,1,0,2,27,1,4,6,NA +64356,7,2,1,20,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,39915.513053,41236.907607,2,98,7,7,1.33,6,6,0,3,0,1,31,1,3,6,NA +64357,7,1,2,1,22,2,2,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9670.78354,0,2,93,15,15,5,4,4,2,0,0,1,34,1,5,1,5 +64358,7,2,1,5,NA,4,4,2,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,8005.528865,8250.049428,1,99,NA,NA,NA,4,4,1,1,0,1,42,NA,NA,1,NA +64359,7,2,1,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,25815.880139,26556.735732,2,101,99,2,0.73,2,1,0,0,0,1,24,1,4,5,NA +64360,7,2,2,54,NA,1,1,1,NA,NA,2,NA,2,2,99,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,21939.007884,22053.417944,1,100,99,99,NA,7,7,2,3,0,2,35,2,1,1,NA +64361,7,2,2,16,NA,5,6,2,16,199,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8301.172828,8662.397073,1,91,15,15,5,3,3,0,1,0,2,47,2,5,1,5 +64362,7,2,1,68,NA,4,4,2,NA,NA,2,NA,2,2,8,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,8491.292032,8823.40317,1,96,6,6,2.06,2,2,0,0,2,1,68,2,3,1,2 +64363,7,2,1,22,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,14313.345971,17701.114982,3,91,14,14,3.53,5,5,0,1,1,1,69,1,4,3,NA +64364,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,38976.947883,43357.400647,1,99,12,12,NA,1,1,0,0,1,2,80,1,3,3,NA +64365,7,2,2,5,NA,2,2,1,5,63,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,14852.990935,14903.708855,3,92,7,7,0.93,7,7,1,3,0,2,20,1,3,1,1 +64366,7,2,1,4,NA,4,4,2,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10234.881417,10547.495238,2,97,2,2,0.33,4,4,2,1,0,2,34,1,2,5,NA +64367,7,2,2,39,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,20039.469886,21581.359058,1,97,15,15,5,4,4,1,0,0,2,39,2,5,1,5 +64368,7,2,1,70,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,2,2,NA,8497.912951,8999.288803,2,100,2,2,0.77,1,1,0,0,1,1,70,1,1,3,NA +64369,7,2,1,51,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,17824.805721,17767.952896,2,95,4,2,0.55,2,1,0,0,0,2,47,1,3,4,NA +64370,7,2,1,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25890.025676,30408.154535,2,93,3,3,1.25,1,1,0,0,0,1,25,1,4,5,NA +64371,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,18441.731082,18102.807884,1,96,15,9,4.92,2,1,0,0,0,2,55,1,4,5,NA +64372,7,2,2,19,NA,4,4,2,19,232,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13176.946531,13078.313981,2,99,9,9,1.78,6,6,1,1,0,1,46,1,3,6,NA +64373,7,2,2,6,NA,3,3,2,6,78,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,41790.228676,44382.642913,2,94,7,7,1.17,6,6,0,3,0,1,40,1,3,1,5 +64374,7,2,2,52,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,10700.939986,11250.682738,1,99,15,15,4.47,4,4,0,2,0,2,52,2,5,1,5 +64375,7,2,2,14,NA,2,2,2,14,175,NA,NA,1,1,NA,9,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,14437.97544,15197.369043,2,90,5,5,0.8,5,5,0,3,0,2,40,2,1,5,NA +64376,7,2,1,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,32720.69734,34296.120763,1,95,3,3,0.65,3,3,1,0,0,1,58,1,3,3,NA +64377,7,2,2,54,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,19060.786733,19867.822877,2,97,5,5,0.92,5,5,0,3,0,2,54,1,3,2,NA +64378,7,2,1,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,115391.113177,120921.742093,1,102,14,7,3.31,2,1,0,0,0,1,27,1,4,5,NA +64379,7,2,2,20,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,30275.274308,30320.430859,2,101,4,3,1.15,2,1,0,0,0,2,22,1,4,5,NA +64380,7,2,2,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,114993.808573,116714.079488,1,98,7,3,0.9,4,1,0,0,0,2,20,NA,NA,5,NA +64381,7,2,2,61,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,148725.079159,151155.002758,2,94,15,15,5,2,2,0,0,2,1,63,1,5,1,5 +64382,7,2,1,24,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,6,NA,1,2,2,1,2,2,1,2,2,3,14385.653726,15533.829525,2,101,6,1,0.22,2,1,0,0,0,2,26,2,4,6,NA +64383,7,2,2,26,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,4,2,1,2,2,1,2,2,1,2,2,1,19658.218913,18995.471706,1,99,13,13,NA,4,4,1,0,0,2,26,1,4,4,NA +64384,7,2,1,0,5,3,3,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24315.386145,24328.135897,1,102,6,6,1.23,4,4,2,0,0,2,25,1,5,1,5 +64385,7,2,2,13,NA,5,6,1,13,160,NA,NA,2,1,3,6,NA,NA,NA,1,1,2,1,2,1,1,2,2,1,8878.081187,9740.503961,3,92,77,77,NA,7,7,2,4,1,1,62,NA,NA,1,NA +64386,7,2,2,9,NA,1,1,1,10,121,NA,NA,2,2,3,2,NA,NA,NA,2,1,1,1,2,1,1,2,2,1,16397.644545,16823.243233,1,91,7,7,2.1,3,3,0,1,0,2,29,2,3,6,NA +64387,7,2,1,71,NA,5,6,2,NA,NA,2,NA,2,2,7,NA,5,1,NA,1,2,1,1,2,1,1,2,1,NA,9748.579573,10638.31203,3,90,9,9,2.93,3,3,0,0,2,1,71,2,5,1,4 +64388,7,2,1,69,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,8232.241159,8296.636338,2,98,5,5,2.2,1,1,0,0,1,1,69,1,4,3,NA +64389,7,2,1,10,NA,1,1,1,10,128,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,9485.228453,10197.575258,1,103,10,10,2.82,4,4,0,2,0,1,41,2,1,1,1 +64390,7,2,2,45,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,18494.181242,18934.264494,2,99,4,4,0.78,4,4,0,2,0,2,45,1,3,5,NA +64391,7,2,2,26,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,36731.516901,42296.380247,1,99,10,6,2.75,2,1,0,0,0,1,31,1,4,6,NA +64392,7,2,1,10,NA,3,3,2,10,126,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,46228.073505,48314.890943,2,94,5,5,0.89,4,4,0,2,0,2,51,1,2,3,NA +64393,7,2,2,16,NA,4,4,1,17,204,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14166.687432,14623.20249,1,100,15,15,3.87,6,6,1,3,0,2,39,1,4,1,4 +64394,7,2,2,2,NA,1,1,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,8082.096138,8109.693752,1,90,4,4,0.46,7,7,2,3,0,2,34,2,1,6,NA +64395,7,2,1,6,NA,3,3,2,6,78,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,58159.917125,59822.981903,3,92,15,15,5,4,4,0,2,0,2,38,1,5,1,5 +64396,7,2,2,51,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,162038.12624,162493.980108,1,94,77,77,NA,1,1,0,0,0,2,51,1,3,3,NA +64397,7,2,2,54,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,15714.689474,15797.787849,2,99,15,15,5,1,1,0,0,0,2,54,1,5,5,NA +64398,7,2,1,4,NA,4,4,1,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14467.4421,15952.720644,2,101,5,5,1.19,3,3,1,0,0,2,32,1,4,1,3 +64399,7,2,2,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,26763.110196,36351.891896,2,94,3,3,0.95,2,2,0,1,0,2,45,1,5,3,NA +64400,7,2,2,61,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,11623.354795,12210.370942,1,94,6,6,1.98,2,2,0,0,2,1,65,2,1,1,1 +64401,7,2,1,29,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,29537.209125,29019.340657,1,95,12,3,1.19,4,1,0,0,0,1,29,1,3,6,NA +64402,7,2,1,48,NA,1,1,2,NA,NA,2,NA,2,2,6,NA,5,1,NA,2,2,2,2,2,2,1,2,2,2,27776.016947,27368.446715,2,90,6,6,1.15,5,5,0,2,0,2,47,2,1,1,5 +64403,7,2,1,40,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,22685.373982,24056.024455,1,98,4,4,1,3,3,0,1,1,1,65,1,2,1,NA +64404,7,2,1,33,NA,3,3,2,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,99283.360764,105383.970589,3,91,8,8,3.4,2,2,0,0,0,1,33,2,5,1,4 +64405,7,2,1,78,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,1,1,NA,1,2,1,1,2,1,1,2,1,NA,11550.158096,12480.92393,2,92,3,3,0.4,6,6,0,1,2,1,78,2,1,1,1 +64406,7,2,1,18,NA,5,6,2,18,219,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6999.347953,8045.308483,3,90,6,6,1.3,4,4,0,0,0,1,55,2,1,1,1 +64407,7,2,1,54,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,17801.655316,18085.793361,2,95,6,6,0.97,6,6,2,1,0,1,54,1,3,6,NA +64408,7,2,2,15,NA,4,4,2,15,184,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13124.737024,13087.755238,2,101,5,5,1.08,3,3,0,1,1,2,62,1,4,2,NA +64409,7,2,1,7,NA,4,4,2,7,88,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7886.48532,7943.675078,1,99,8,8,1.76,5,5,0,2,1,1,37,1,4,1,3 +64410,7,2,2,13,NA,2,2,2,13,160,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16360.434077,17220.943159,1,94,14,14,3.4,5,5,0,3,0,2,41,1,4,1,4 +64411,7,2,1,37,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,18745.208345,19348.815401,1,92,7,7,2.1,3,3,0,0,2,1,37,2,5,5,NA +64412,7,2,2,54,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,27553.988486,28260.692091,2,98,3,3,0.93,1,1,0,0,0,2,54,1,3,3,NA +64413,7,2,2,48,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,2,5,NA,2,2,2,2,2,2,1,2,2,2,25778.164795,26907.837256,2,90,1,1,0.22,3,3,0,1,0,2,48,2,2,5,NA +64414,7,2,2,1,23,3,3,1,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17516.090261,18614.694278,1,98,2,2,0.36,5,5,3,0,0,1,25,1,3,1,3 +64415,7,2,2,55,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18525.3583,18971.375925,1,92,14,14,3.9,4,4,0,0,0,2,55,2,5,1,5 +64416,7,2,1,6,NA,5,6,2,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10810.913614,11522.32071,1,97,7,7,1.48,5,5,1,2,0,1,40,2,5,1,4 +64417,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,135384.418391,141422.067719,1,98,15,15,5,3,3,0,0,0,1,48,1,4,1,4 +64418,7,2,2,17,NA,3,3,1,17,211,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,80091.55101,82618.226829,1,98,6,6,1.11,5,5,0,2,1,2,37,1,1,1,1 +64419,7,2,2,63,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,1,1,NA,2,2,2,2,2,2,1,2,2,2,16352.915834,17178.789759,3,92,5,5,1.26,3,3,0,0,2,1,76,2,1,1,1 +64420,7,2,2,54,NA,4,4,2,NA,NA,2,NA,2,2,6,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,17928.412795,17976.980061,1,90,15,15,4.34,4,4,0,0,1,1,62,2,5,1,3 +64421,7,2,2,2,NA,4,4,1,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8754.193667,9419.610037,2,98,8,8,1.8,5,5,2,1,0,1,32,1,4,1,5 +64422,7,2,1,16,NA,2,2,2,16,197,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,15351.501195,16874.029415,2,90,1,1,0.22,3,3,0,1,0,2,48,2,2,5,NA +64423,7,2,2,12,NA,4,4,1,12,151,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12826.497818,12890.123626,2,96,3,3,0.38,5,5,1,2,0,2,30,1,3,5,NA +64424,7,2,1,0,6,3,3,2,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25469.602005,25029.287945,2,91,15,15,5,3,3,1,0,0,1,35,1,5,1,5 +64425,7,2,2,7,NA,5,6,2,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7722.982971,8635.133042,1,93,8,8,2.24,4,4,0,2,0,1,44,2,5,1,4 +64426,7,2,2,0,1,4,4,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4412.61884,4541.653777,1,93,7,7,1.97,4,4,1,2,0,2,33,1,4,3,NA +64427,7,2,1,37,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105412.227726,111169.022023,2,101,15,15,5,3,3,1,0,0,1,37,1,5,1,5 +64428,7,2,1,7,NA,5,6,1,7,86,NA,NA,2,1,2,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9101.455527,9700.372536,2,102,8,8,1.72,5,5,0,2,1,1,63,2,5,1,5 +64429,7,2,1,4,NA,5,6,2,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6185.185728,6767.562311,1,97,14,14,2.29,7,7,1,2,2,1,40,2,1,1,1 +64430,7,2,1,65,NA,5,7,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,7415.090784,7473.093956,2,95,12,12,NA,3,3,0,0,2,1,65,1,4,1,4 +64431,7,2,2,32,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,42468.064168,43146.583227,2,101,5,5,1.19,3,3,1,0,0,2,32,1,4,1,3 +64432,7,2,2,12,NA,4,4,1,12,146,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16598.888685,16552.117725,2,102,5,5,0.76,5,5,1,3,0,2,30,1,4,4,NA +64433,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,1,2,NA,6882.452425,7271.66356,1,99,6,6,1.12,4,4,0,0,2,1,51,1,4,3,NA +64434,7,2,2,7,NA,3,3,2,7,86,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,56541.371473,55790.525616,2,94,3,3,0.82,2,2,0,1,0,2,38,1,5,3,NA +64435,7,2,1,16,NA,5,7,2,16,196,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9136.388281,9719.077424,3,91,8,8,2.24,4,4,0,2,0,1,45,1,4,1,4 +64436,7,2,2,34,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,27127.983961,27837.333201,2,90,14,14,3.45,4,4,1,1,0,2,34,2,5,6,NA +64437,7,2,2,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,5,2,1,2,2,1,2,2,1,2,2,1,19520.240895,25185.107635,2,95,2,2,0.33,2,2,1,0,0,2,21,1,1,5,NA +64438,7,2,2,44,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,25774.834017,34018.000767,1,100,8,8,3.3,2,2,0,1,0,2,44,1,4,3,NA +64439,7,2,1,53,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,148039.171547,149225.244125,2,98,15,15,5,2,2,0,0,0,2,52,1,3,1,4 +64440,7,2,1,40,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,24506.160629,24417.630409,1,92,77,77,NA,2,2,0,0,1,1,40,2,5,5,NA +64441,7,2,1,0,1,4,4,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7867.225786,8217.805417,3,92,6,6,0.93,5,5,2,1,0,2,37,1,5,1,3 +64442,7,1,1,61,NA,1,1,NA,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,8390.796678,0,3,92,7,7,2.45,2,2,0,0,1,2,55,1,1,1,1 +64443,7,2,2,2,NA,4,4,2,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7567.81858,8250.001805,2,97,7,7,1.06,7,7,1,2,0,2,40,1,4,5,NA +64444,7,2,1,18,NA,3,3,2,18,223,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,74123.161671,78732.269389,1,93,15,15,4.59,4,4,0,1,0,1,57,1,5,1,5 +64445,7,1,2,3,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11353.600279,0,1,98,15,15,5,6,6,3,0,0,1,37,2,5,1,4 +64446,7,2,1,47,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,105141.812429,104871.724763,1,98,7,7,1.66,5,5,2,1,0,2,37,1,5,1,3 +64447,7,2,2,41,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,27934.372045,33663.467631,1,101,4,4,0.58,6,6,0,4,0,2,41,1,3,5,NA +64448,7,2,1,0,0,3,3,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22215.410216,21831.35406,2,98,14,14,4.32,3,3,1,0,0,1,26,1,4,6,NA +64449,7,2,2,18,NA,5,6,1,18,217,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10312.363668,10584.186706,2,102,10,10,3.62,3,3,0,0,0,1,51,2,5,1,5 +64450,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,9845.894687,11807.30791,2,94,14,14,3.58,4,4,1,0,1,1,80,1,3,2,NA +64451,7,2,1,32,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,18464.039593,21965.968723,1,101,6,6,2.51,2,1,0,0,0,1,32,1,4,6,NA +64452,7,2,1,0,10,2,2,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7284.164858,7284.299119,1,92,6,6,1.67,3,3,1,0,0,1,27,1,3,6,NA +64453,7,2,2,5,NA,4,4,1,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15449.848607,17265.123353,2,102,1,1,0.16,3,3,1,0,1,2,63,1,2,4,NA +64454,7,2,2,4,NA,5,7,1,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13130.790087,14128.876607,2,102,14,14,3.25,5,5,1,1,0,2,32,1,4,1,3 +64455,7,2,1,6,NA,4,4,1,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12399.014378,12645.533348,2,96,6,6,1.35,3,3,1,1,0,2,25,1,3,5,NA +64456,7,2,2,31,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,32097.38481,32019.876048,2,100,14,14,3.58,4,4,1,1,0,1,33,1,4,1,5 +64457,7,1,2,14,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20322.312754,0,2,91,6,6,0.93,5,5,1,2,0,2,50,2,1,5,NA +64458,7,2,1,4,NA,2,2,1,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16775.083123,17533.373205,2,98,6,6,0.78,7,7,1,3,1,2,63,1,2,4,NA +64459,7,2,2,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,129553.609875,132943.624501,2,95,10,10,4.49,2,2,0,0,1,1,62,NA,NA,1,3 +64460,7,2,1,10,NA,3,3,2,10,122,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,46081.129115,48091.57352,2,95,5,5,0.87,4,4,1,1,0,1,34,1,3,6,NA +64461,7,2,2,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,24919.497762,25578.256222,1,101,1,1,0.1,4,4,1,1,0,2,52,1,4,3,NA +64462,7,2,1,12,NA,4,4,1,13,157,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12147.046136,12703.852077,2,100,4,4,0.85,4,4,0,2,0,2,39,1,3,6,NA +64463,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,52209.836905,57180.809983,1,98,13,13,NA,2,2,0,0,2,2,80,1,2,2,NA +64464,7,2,1,0,7,5,6,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7997.590655,8653.844084,3,91,15,15,5,4,4,2,0,0,2,33,2,5,1,5 +64465,7,2,2,32,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,2,2,2,1,2,2,1,2,2,1,38184.257672,37153.966183,2,91,7,7,1.29,6,6,2,2,0,1,33,2,3,6,NA +64466,7,2,2,23,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,2,6,2,1,2,2,1,2,2,1,2,2,1,18723.98095,18882.01405,2,95,1,1,0.03,3,3,1,0,0,1,23,1,3,6,NA +64467,7,2,1,4,NA,4,4,1,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10510.490567,11159.309176,1,92,77,77,NA,5,5,1,2,0,2,41,1,3,5,NA +64468,7,2,2,12,NA,5,7,2,12,147,NA,NA,2,1,4,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8301.172828,8622.168806,1,94,15,15,4.2,5,5,1,2,0,1,47,1,5,1,5 +64469,7,2,1,47,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,17787.524589,17730.790674,2,100,8,8,4.96,1,1,0,0,0,1,47,1,5,5,NA +64470,7,2,2,20,NA,5,7,2,NA,NA,2,NA,2,2,3,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,55392.206282,55887.450369,3,91,6,6,2.04,2,2,0,0,0,1,24,2,4,5,NA +64471,7,2,2,37,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,61994.231866,64386.409549,2,92,15,15,5,3,3,1,0,0,1,48,1,5,1,5 +64472,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,31460.11615,32394.500316,1,97,3,3,0.73,3,3,0,0,0,2,50,1,4,1,3 +64473,7,2,1,79,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,11869.786782,12478.026352,2,101,6,6,2.04,2,2,0,0,2,2,74,1,3,1,3 +64474,7,2,1,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,23594.223469,25276.966347,1,97,15,15,5,4,4,0,1,0,1,40,1,4,1,4 +64475,7,2,1,62,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,3,1,NA,2,2,2,2,2,2,1,2,2,1,8197.936864,8932.691175,3,90,8,8,3.21,2,2,0,0,2,2,80,2,3,2,NA +64476,7,2,2,58,NA,1,1,1,NA,NA,2,NA,2,2,77,NA,1,4,NA,2,2,2,2,2,2,NA,NA,NA,NA,26272.35217,26409.360251,2,102,4,4,0.65,5,5,1,0,0,2,58,2,1,4,NA +64477,7,2,1,49,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,19895.710643,19832.252596,1,90,4,4,0.58,6,6,0,3,0,2,21,2,5,5,NA +64478,7,2,1,10,NA,4,4,2,10,130,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8655.162127,8668.652185,2,95,6,6,1.36,3,3,0,1,1,2,62,1,4,5,NA +64479,7,2,2,17,NA,1,1,1,17,205,2,NA,2,2,5,10,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,20117.170449,20520.192725,1,94,2,2,0.27,5,5,0,4,0,2,47,2,1,4,NA +64480,7,2,1,4,NA,5,6,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10276.262805,11525.953064,1,97,15,15,5,4,4,2,0,0,1,40,2,5,1,5 +64481,7,2,2,19,NA,3,3,2,19,236,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,35205.804094,37301.699975,2,101,2,1,0.32,3,1,0,0,0,2,19,NA,NA,NA,NA +64482,7,2,1,2,NA,5,6,1,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10498.222836,11004.769407,1,100,15,15,5,4,4,2,0,0,1,39,2,5,1,5 +64483,7,2,2,5,NA,1,1,1,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16462.187772,17500.018071,3,92,5,5,0.81,5,5,3,0,0,2,23,1,4,5,NA +64484,7,2,2,26,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,1,1,2,2,1,2,2,1,2,2,1,15265.136017,16765.388266,1,93,15,15,5,2,2,0,0,0,1,29,2,5,1,5 +64485,7,2,1,32,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,1,6,NA,2,2,2,2,2,2,2,2,2,2,37658.482129,37374.26166,1,100,3,3,0.39,5,5,1,2,0,1,32,2,1,6,NA +64486,7,2,1,9,NA,4,4,1,9,110,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11443.206518,11526.18825,1,100,12,12,NA,5,5,0,3,0,1,39,1,5,1,3 +64487,7,2,1,7,NA,3,3,1,7,86,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18812.486733,19704.440218,1,98,6,6,0.81,6,6,0,4,0,2,34,NA,NA,1,2 +64488,7,2,2,39,NA,4,4,2,NA,NA,2,NA,2,2,5,NA,3,4,2,1,2,2,1,2,2,NA,NA,NA,NA,35601.750356,47964.629837,1,93,4,4,1.09,2,2,1,0,0,2,39,2,3,4,NA +64489,7,2,2,65,NA,5,6,2,NA,NA,2,NA,2,2,7,NA,4,1,NA,1,2,1,1,2,1,1,2,1,3,9991.888445,10532.363744,3,90,9,9,2.93,3,3,0,0,2,1,71,2,5,1,4 +64490,7,2,1,8,NA,1,1,1,8,104,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17882.621856,18280.794545,3,92,12,12,NA,6,6,1,3,0,2,33,1,5,1,4 +64491,7,2,1,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,87028.709207,89819.695129,1,98,8,8,2.46,3,3,1,0,0,1,29,1,4,6,NA +64492,7,2,1,0,5,3,3,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20854.290322,20493.76497,2,94,14,14,4.05,3,3,1,0,0,2,37,1,5,1,5 +64493,7,2,1,17,NA,4,4,2,17,213,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13416.172328,13513.882801,1,96,15,15,4.9,4,4,0,1,0,1,47,1,3,1,5 +64494,7,2,2,39,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,28546.083602,28843.236835,2,95,9,9,4.1,2,2,0,0,0,2,19,1,4,NA,NA +64495,7,2,2,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,160743.928829,166234.629208,1,95,10,10,4.76,2,2,0,0,0,1,53,1,2,1,3 +64496,7,1,1,30,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,25120.174741,0,2,102,14,14,4.86,3,3,1,0,0,1,30,1,5,1,5 +64497,7,2,1,60,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,1,2,2,1,2,2,1,25495.045769,28642.112646,1,94,5,5,1.39,2,2,0,0,2,1,60,1,1,5,NA +64498,7,2,2,47,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15747.833197,16088.766331,3,91,15,15,5,3,3,0,1,0,1,47,2,5,1,5 +64499,7,2,1,63,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,10004.038848,10164.290775,2,102,9,9,3.24,3,3,0,0,1,1,63,1,4,1,4 +64500,7,2,2,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,105593.259211,109935.078529,2,92,15,9,5,2,1,0,0,0,2,29,1,5,6,NA +64501,7,2,1,66,NA,2,2,1,NA,NA,2,NA,2,1,9,NA,4,1,NA,2,2,2,2,2,2,2,2,1,2,9068.437099,9213.701881,2,93,15,15,4.84,6,6,1,1,2,1,66,2,4,1,3 +64502,7,2,1,14,NA,4,4,2,14,177,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10366.393886,10567.129261,1,90,9,9,1.65,7,7,0,4,0,1,36,1,4,1,4 +64503,7,2,2,70,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,12863.404053,13555.744544,2,90,9,9,3.02,3,3,0,0,2,2,70,1,4,1,2 +64504,7,2,2,57,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,16165.962054,15723.538094,2,99,12,4,1.34,6,1,0,0,0,2,57,1,5,2,NA +64505,7,2,1,18,NA,4,4,1,18,218,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13731.625553,14361.066702,1,100,1,1,0.04,4,4,1,1,0,2,51,1,3,3,NA +64506,7,2,1,9,NA,3,3,2,9,119,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21087.869274,22686.932895,1,101,4,4,1.16,2,2,0,1,0,2,51,1,4,3,NA +64507,7,2,1,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,46023.826844,47769.922841,1,91,9,9,3.24,3,3,0,0,1,1,70,1,1,2,NA +64508,7,2,1,14,NA,4,4,2,14,171,NA,NA,2,2,2,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9987.254512,10059.992052,1,96,10,10,1.8,7,7,1,1,0,1,57,2,1,1,3 +64509,7,2,2,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,36283.627734,36608.028102,1,102,5,4,1.74,5,1,1,1,0,2,24,1,4,5,NA +64510,7,2,2,10,NA,1,1,1,10,123,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13028.403003,13395.598637,2,96,6,6,1.11,5,5,0,3,0,2,32,2,3,1,2 +64511,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,118611.064701,118209.809508,1,91,9,9,4.15,2,2,0,0,2,2,64,1,4,1,5 +64512,7,2,1,7,NA,4,4,2,7,85,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7886.48532,7943.675078,1,99,14,14,3.8,4,4,1,1,0,1,48,2,5,1,5 +64513,7,1,2,80,NA,2,2,NA,NA,NA,2,NA,2,1,9,NA,1,2,NA,2,2,2,2,2,2,NA,NA,NA,NA,17318.187297,0,2,90,77,77,NA,1,1,0,0,1,2,80,2,1,2,NA +64514,7,2,2,31,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,2,6,2,2,2,2,2,2,2,1,2,2,1,29176.121289,29479.238307,2,90,4,4,0.81,3,3,0,0,0,1,39,2,3,5,NA +64515,7,2,2,41,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,3,2,1,2,2,1,2,2,NA,NA,NA,NA,36924.381422,38397.364281,2,102,7,7,2.16,3,3,0,2,0,2,41,1,5,3,NA +64516,7,2,2,2,NA,3,3,2,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,37236.564417,41097.653079,2,95,7,7,2.91,2,2,1,0,0,1,32,1,5,2,NA +64517,7,2,2,4,NA,3,3,2,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,51483.624552,52160.136719,1,91,8,8,3.57,2,2,1,0,0,2,33,1,5,3,NA +64518,7,2,1,14,NA,1,1,1,14,178,NA,NA,1,1,NA,8,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,24805.109913,25184.264697,1,94,5,5,0.57,7,7,2,1,0,1,58,2,1,1,1 +64519,7,1,2,1,18,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,37915.354974,0,2,94,12,12,NA,5,5,1,0,2,2,39,1,5,3,NA +64520,7,2,2,64,NA,2,2,2,NA,NA,2,NA,2,1,NA,NA,3,1,NA,1,1,2,1,2,2,1,2,2,NA,12026.225854,13435.778737,1,90,14,14,5,2,2,0,0,2,1,65,2,2,1,3 +64521,7,2,1,15,NA,4,4,2,15,182,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11351.725436,11256.943498,2,95,5,5,1.18,3,3,0,1,0,2,55,1,4,5,NA +64522,7,2,1,13,NA,5,7,1,13,163,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5535.564809,5914.423708,2,92,15,15,4.59,4,4,0,2,0,2,45,2,5,1,5 +64523,7,2,2,8,NA,4,4,2,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10888.493631,11627.08662,1,101,1,1,0.21,3,3,0,2,0,2,32,1,4,5,NA +64524,7,2,1,28,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25372.225163,26225.545107,2,93,6,2,0.72,4,1,0,0,1,1,69,NA,NA,1,NA +64525,7,2,2,21,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,45673.879644,47551.913663,1,94,5,5,1.47,2,2,0,0,0,1,24,1,4,1,4 +64526,7,2,2,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,139800.409559,143196.722449,1,100,8,8,1.95,4,4,0,2,1,2,49,1,5,6,NA +64527,7,1,2,42,NA,2,2,NA,NA,NA,2,NA,1,1,NA,NA,5,1,3,1,2,2,NA,NA,NA,NA,NA,NA,NA,29650.79971,0,2,90,NA,NA,NA,2,2,0,0,0,1,40,NA,NA,1,5 +64528,7,2,2,7,NA,4,4,2,7,89,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8579.490652,8729.153641,2,97,13,13,NA,6,6,2,2,0,2,24,1,2,6,NA +64529,7,2,1,18,NA,3,3,1,18,219,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,112494.70924,119489.826838,1,94,5,5,1.04,4,4,1,1,0,1,18,1,2,NA,NA +64530,7,2,1,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,174520.785302,181786.280703,1,95,10,10,4.76,2,2,0,0,0,1,53,1,2,1,3 +64531,7,2,2,2,NA,5,6,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5248.47937,5499.505789,3,91,14,14,2.5,6,6,1,1,1,2,37,2,2,1,5 +64532,7,2,2,43,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,14340.013944,13947.561844,2,99,15,15,4.9,7,7,1,4,0,2,53,1,5,1,5 +64533,7,2,1,13,NA,4,4,2,13,157,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12418.38217,12987.625827,2,99,6,6,1.57,3,3,0,2,0,2,31,1,3,77,NA +64534,7,2,2,28,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,3,1,2,2,1,2,2,1,2,2,1,19658.218913,19871.634013,1,99,2,2,0.31,4,4,1,0,1,2,67,1,3,3,NA +64535,7,2,2,7,NA,4,4,2,7,86,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6491.294105,6989.429429,2,99,6,6,1.11,5,5,1,2,0,2,41,1,2,5,NA +64536,7,2,2,75,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,68852.695662,70965.028494,1,101,15,15,5,3,3,0,0,2,1,75,1,2,1,2 +64537,7,1,1,7,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8749.381325,0,2,100,15,15,5,4,3,0,2,0,1,42,1,3,5,NA +64538,7,2,2,11,NA,1,1,1,11,137,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,16986.005478,17733.622754,2,102,6,6,1,6,6,1,3,0,1,35,2,3,1,3 +64539,7,2,2,7,NA,3,3,2,7,90,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,80369.555824,80552.420473,1,97,15,15,4.07,5,5,0,3,0,1,36,1,5,1,5 +64540,7,2,1,69,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,1,2,1,2,2,1,2,2,NA,11992.012141,12184.108862,2,102,99,99,NA,2,2,0,0,2,2,67,2,5,1,5 +64541,7,2,2,0,10,3,3,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27492.029044,26745.284482,1,97,10,10,2.95,4,4,2,0,0,1,28,1,5,1,4 +64542,7,2,2,5,NA,1,1,2,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,13366.393396,13791.68675,2,94,77,77,NA,4,4,2,0,0,1,26,2,2,1,4 +64543,7,2,2,0,7,3,3,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12997.670925,13008.434462,1,99,10,10,2.48,5,5,2,1,0,1,33,1,5,1,5 +64544,7,2,1,69,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,7688.593855,7926.541218,2,100,14,14,4.59,3,3,0,0,2,1,69,2,5,1,5 +64545,7,2,1,15,NA,2,2,1,15,184,NA,NA,2,2,4,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17555.907575,18699.509115,2,93,8,8,2,4,4,1,1,0,1,50,2,4,1,4 +64546,7,1,2,6,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14300.71869,0,2,94,4,4,0.72,4,4,1,1,0,1,30,2,1,1,3 +64547,7,2,1,2,NA,1,1,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10803.555682,10931.27688,2,94,9,9,2.1,5,5,1,2,0,1,31,2,4,1,4 +64548,7,2,2,59,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,17950.494975,18045.41616,1,92,2,2,0.24,5,5,0,2,0,1,35,2,4,1,3 +64549,7,2,1,44,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,42123.732489,42281.342391,1,102,7,7,1.7,4,4,0,0,2,1,44,1,4,4,NA +64550,7,2,2,62,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,3,12449.239856,12885.71781,1,96,7,7,2.64,2,2,0,0,2,1,64,2,5,1,5 +64551,7,2,2,10,NA,2,2,1,10,131,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14414.529053,14504.545241,2,96,14,14,3.36,4,4,1,1,0,2,28,1,2,6,NA +64552,7,2,1,76,NA,2,2,2,NA,NA,1,2,2,1,9,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,13725.398968,14221.420409,2,90,7,7,2.52,2,2,0,0,2,2,71,1,4,1,4 +64553,7,2,2,65,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,13422.679076,14021.98734,2,92,6,6,2.31,2,2,0,0,2,2,65,1,4,5,NA +64554,7,2,2,1,17,3,3,2,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,43715.161102,44289.593043,1,98,15,15,5,3,3,1,0,0,1,33,1,5,1,5 +64555,7,2,2,1,20,3,3,2,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20272.436775,20538.823422,2,94,77,77,NA,2,2,1,0,0,1,24,1,4,77,NA +64556,7,2,2,0,4,1,1,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9767.083234,10416.172562,3,92,2,2,0.4,3,3,1,0,0,1,21,1,2,6,NA +64557,7,2,1,3,NA,4,4,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11324.865632,11670.771889,2,96,5,5,1.13,3,3,1,1,0,2,31,1,3,5,NA +64558,7,2,1,62,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,11764.405491,12074.424659,1,97,14,14,3.93,3,3,0,1,2,2,63,1,4,1,4 +64559,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,72551.269339,74475.92188,1,95,7,7,2.45,2,2,0,0,2,2,70,1,3,1,3 +64560,7,1,2,13,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,21282.11341,0,1,90,6,6,1.44,3,3,0,1,0,1,44,2,1,1,3 +64561,7,2,1,6,NA,2,2,2,6,83,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8927.426533,9245.241527,2,99,13,13,NA,6,6,2,1,0,2,31,1,4,6,NA +64562,7,2,1,20,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,23235.97926,24326.785264,2,95,6,6,1.19,4,4,0,1,0,1,44,1,3,1,2 +64563,7,2,2,0,7,2,2,1,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7849.042868,8059.930457,2,98,6,6,1.07,5,5,3,0,0,2,24,1,3,1,3 +64564,7,2,2,54,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,19302.748944,20091.820859,3,92,15,15,5,3,3,0,1,0,1,55,2,5,1,4 +64565,7,1,1,54,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,20286.317046,0,2,99,99,99,NA,1,1,0,0,0,1,54,1,3,5,NA +64566,7,2,1,7,NA,4,4,1,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8294.996898,8761.908989,1,96,5,5,0.53,7,7,2,2,0,2,38,1,9,6,NA +64567,7,2,1,37,NA,2,2,1,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,38835.309696,39871.800261,1,100,15,15,4.34,4,4,2,0,0,2,35,1,5,1,5 +64568,7,2,2,5,NA,1,1,1,6,72,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10493.785765,10940.90082,1,103,5,5,0.71,6,6,2,2,0,2,31,2,2,1,2 +64569,7,2,1,14,NA,4,4,1,14,177,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9188.45337,9407.445906,2,95,15,15,3.85,7,7,0,3,1,2,62,1,4,2,NA +64570,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,49428.481088,50236.061312,1,101,2,2,0.72,1,1,0,0,1,2,65,1,4,2,NA +64571,7,2,1,26,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,124091.929364,132249.317489,1,95,15,15,5,3,3,1,0,0,1,26,1,3,1,4 +64572,7,2,1,50,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15693.68983,16353.841376,1,91,77,77,NA,4,4,0,2,0,1,50,2,5,1,5 +64573,7,2,1,7,NA,4,4,1,7,86,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9022.8939,9451.941588,2,100,5,5,0.88,5,5,2,1,0,2,30,1,4,6,NA +64574,7,2,1,17,NA,4,4,2,17,207,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11834.781205,12063.950506,2,90,6,6,1.12,4,4,0,1,1,1,63,2,1,1,1 +64575,7,2,2,6,NA,2,2,2,6,72,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13895.77426,15584.232002,2,91,99,99,NA,6,6,1,3,0,2,20,2,2,5,NA +64576,7,2,1,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,38666.703155,42427.769217,2,94,15,15,5,1,1,0,0,1,1,80,1,5,2,NA +64577,7,2,1,0,4,3,3,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12008.205501,11800.609728,1,92,4,4,1.22,2,2,1,0,0,2,30,1,4,5,NA +64578,7,2,1,1,12,1,1,2,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12567.081957,12360.030976,1,97,7,7,2.31,2,2,1,0,0,1,22,1,4,5,NA +64579,7,2,2,3,NA,4,4,2,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10683.855206,11939.151167,1,96,3,3,0.43,4,4,1,1,0,2,39,2,4,1,3 +64580,7,2,1,7,NA,1,1,1,7,88,NA,NA,2,7,77,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11367.678664,11216.859778,2,96,77,77,NA,7,7,3,2,0,2,33,2,2,6,NA +64581,7,2,2,31,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,14179.938483,16215.179647,3,90,10,10,4.63,2,2,0,0,0,2,31,1,5,1,4 +64582,7,2,1,59,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,1,NA,1,2,1,1,2,2,1,2,2,1,16628.326744,17078.450103,2,102,8,8,2.01,4,4,0,0,0,1,59,2,4,1,4 +64583,7,2,1,16,NA,5,6,2,16,199,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8305.829479,8595.334823,1,93,15,15,5,3,3,0,2,0,2,48,2,5,3,NA +64584,7,2,1,41,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,12471.410981,12474.00514,2,103,77,77,NA,5,5,0,2,0,2,39,2,5,1,5 +64585,7,2,2,42,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,3,1,2,2,1,2,2,NA,NA,NA,NA,25189.042335,25118.259708,1,100,8,8,1.95,4,4,0,2,0,2,42,1,4,1,4 +64586,7,2,2,25,NA,1,1,1,NA,NA,2,NA,2,2,77,NA,1,1,2,2,2,2,2,2,2,NA,NA,NA,NA,50177.882654,49959.335916,1,100,4,4,0.78,4,4,0,0,1,1,33,2,1,1,1 +64587,7,2,1,19,NA,4,4,1,19,239,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,17558.40257,2,101,2,1,0.18,2,1,0,0,0,1,19,NA,NA,NA,NA +64588,7,2,2,3,NA,5,6,1,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8171.700571,8311.029296,1,92,7,7,1.65,4,4,2,0,0,1,24,1,4,1,3 +64589,7,2,1,13,NA,2,2,2,13,158,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14081.782012,14782.02856,2,90,7,7,0.89,7,7,1,3,3,1,60,2,3,1,3 +64590,7,2,1,6,NA,4,4,1,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13423.881856,14062.200951,2,97,7,7,1.38,5,5,0,1,1,2,79,1,5,5,NA +64591,7,2,1,51,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,20075.522681,20011.491118,3,92,4,4,1.22,2,2,0,0,0,1,51,1,2,1,3 +64592,7,2,2,29,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,3,6,2,2,2,1,1,2,1,1,2,2,NA,45207.136555,47234.971464,1,91,7,7,2.1,3,3,0,1,0,2,29,2,3,6,NA +64593,7,2,1,47,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,19686.178677,19752.591494,2,94,15,15,5,5,5,0,2,1,1,47,2,5,1,5 +64594,7,2,1,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,21399.459455,20960.973241,1,90,15,15,4.34,4,4,0,0,1,1,62,2,5,1,3 +64595,7,2,1,4,NA,3,3,2,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,27813.351627,32589.867729,2,97,1,1,0.21,4,4,2,0,0,2,34,2,1,1,2 +64596,7,2,2,12,NA,1,1,1,12,150,NA,NA,2,2,4,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,20419.465237,21192.774678,2,102,6,3,0.54,6,4,0,4,0,2,43,2,1,5,NA +64597,7,2,2,16,NA,3,3,2,16,203,NA,NA,1,1,NA,11,NA,NA,NA,1,2,1,1,2,1,1,2,2,1,23708.623398,23934.71279,2,97,2,2,0.38,4,4,0,2,2,2,64,2,1,1,NA +64598,7,2,2,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,98514.948291,98181.677236,1,99,15,15,5,2,2,0,0,2,1,62,1,5,1,4 +64599,7,2,1,35,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,29756.291619,30015.261296,2,101,8,8,2.43,3,3,0,1,0,1,35,1,4,6,NA +64600,7,2,2,14,NA,4,4,2,14,169,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11478.437608,11446.094627,1,96,10,10,3.04,4,4,0,1,0,2,43,1,5,1,4 +64601,7,2,1,6,NA,5,6,1,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8556.838894,9269.764719,2,95,15,15,5,3,3,0,1,0,2,34,2,5,1,NA +64602,7,2,2,59,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,22224.73066,22340.630739,2,94,14,14,5,1,1,0,0,0,2,59,1,4,2,NA +64603,7,2,2,40,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,20303.639991,21936.687597,1,97,15,15,4.77,4,4,1,1,0,2,40,1,5,1,5 +64604,7,2,2,50,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,2,3,NA,2,2,2,1,2,2,2,2,2,2,23200.373382,24548.135184,2,93,5,5,0.89,4,4,0,2,0,1,42,NA,NA,6,NA +64605,7,2,2,0,6,3,3,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21671.775435,21083.121886,1,98,6,6,1.34,4,4,2,0,0,2,25,1,3,1,4 +64606,7,2,2,59,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,4,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,23644.678283,24188.238174,2,93,5,5,1.32,2,2,0,0,0,2,59,2,4,2,NA +64607,7,2,2,42,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,14447.262849,14051.875602,3,90,15,15,5,5,5,1,0,1,1,38,2,3,1,4 +64608,7,2,1,22,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,104371.641146,109271.336207,1,90,15,15,5,3,3,0,0,0,1,59,1,5,1,5 +64609,7,2,1,64,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,8385.499814,8451.09383,2,92,12,12,NA,2,1,0,0,1,2,58,1,3,5,NA +64610,7,2,1,54,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,141773.363283,145979.183499,1,97,15,15,5,4,4,0,0,1,1,67,NA,NA,2,NA +64611,7,2,1,50,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17206.320427,17271.69718,2,100,14,14,3.06,5,5,1,0,0,1,50,1,5,1,5 +64612,7,2,2,11,NA,4,4,2,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9013.911777,9371.113947,1,97,4,4,0.46,7,7,3,3,0,2,31,1,3,1,NA +64613,7,2,1,25,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,49741.714519,50662.492276,2,91,7,7,3.76,1,1,0,0,0,1,25,1,5,5,NA +64614,7,2,1,7,NA,3,3,1,8,96,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,71470.369236,87471.572436,2,101,8,8,2.81,3,3,0,2,0,1,48,1,3,3,NA +64615,7,2,2,14,NA,3,3,2,14,179,NA,NA,1,1,NA,9,NA,NA,NA,1,1,1,NA,NA,NA,1,2,2,1,23708.623398,23934.71279,2,97,2,2,0.38,4,4,0,2,2,2,64,2,1,1,NA +64616,7,2,1,21,NA,2,2,1,NA,NA,2,NA,2,1,5,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,32384.468407,33958.290597,2,100,14,14,3.58,4,4,0,1,0,1,46,2,5,1,5 +64617,7,2,1,60,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,2,2,2,2,2,2,2,2,2,2,9053.837749,9198.868668,2,99,77,77,NA,4,4,1,1,1,2,38,2,4,1,4 +64618,7,2,1,1,21,1,1,1,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8481.734412,8750.218705,1,103,5,5,0.74,5,5,1,1,0,2,40,99,3,1,1 +64619,7,2,1,16,NA,4,4,2,16,198,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13969.457688,14694.403205,1,90,14,14,3.25,4,4,0,2,0,2,33,2,3,1,3 +64620,7,2,1,36,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,14221.330587,14963.406508,3,90,12,12,NA,4,4,0,0,1,1,62,2,4,3,NA +64621,7,2,1,38,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,39053.240727,39737.089727,1,100,14,14,4.71,3,3,0,1,0,1,38,1,5,1,5 +64622,7,2,1,13,NA,1,1,2,13,165,NA,NA,1,1,NA,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,20398.562455,20710.36162,2,94,13,13,NA,5,5,0,3,0,1,32,2,2,1,1 +64623,7,2,1,2,NA,2,2,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8488.185756,9094.478438,2,99,77,77,NA,4,4,1,1,1,2,38,2,4,1,4 +64624,7,2,1,1,15,1,1,2,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10078.126531,10467.748183,2,97,NA,2,0.36,6,4,3,1,0,2,25,1,4,6,NA +64625,7,2,1,69,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,2,1,NA,1,2,2,1,2,2,2,2,2,1,8491.292032,8823.40317,1,96,7,7,1.39,5,5,0,2,2,1,69,2,2,1,2 +64626,7,2,1,0,4,3,3,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9081.731172,9425.042696,1,101,5,5,0.74,6,6,1,3,0,1,38,1,4,1,4 +64627,7,2,2,17,NA,5,6,1,18,216,2,NA,2,2,5,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9269.277563,9333.801929,2,102,5,5,1.08,3,3,0,1,0,2,46,2,1,5,NA +64628,7,2,2,45,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,27524.153939,27114.967247,3,92,12,12,NA,3,2,0,0,0,1,45,1,3,1,3 +64629,7,2,1,14,NA,3,3,2,14,175,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,119111.433099,126517.990149,2,94,7,7,2.38,2,2,0,1,0,1,39,1,4,3,NA +64630,7,2,1,0,3,3,3,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20370.716701,21140.779334,1,91,15,15,5,5,5,3,0,0,1,45,1,5,1,5 +64631,7,2,2,1,17,5,6,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7029.864692,7462.832472,3,92,8,8,0.91,7,7,3,3,1,1,61,NA,NA,1,4 +64632,7,2,1,21,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,51858.383222,54820.448996,2,91,10,10,4.63,2,2,0,0,0,1,55,2,3,3,NA +64633,7,2,1,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,108410.783716,110095.623149,1,94,10,10,4.3,2,2,0,0,0,1,27,1,5,1,5 +64634,7,2,2,74,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,16361.152596,17584.911061,1,100,1,1,0.36,1,1,0,0,1,2,74,1,2,2,NA +64635,7,2,1,29,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19510.698389,19365.075974,1,102,7,7,1.9,4,4,1,1,0,1,29,1,4,1,3 +64636,7,2,1,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,26556.735732,2,101,2,1,0.02,2,1,0,0,0,1,20,1,4,5,NA +64637,7,2,1,77,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,2,1,NA,2,2,2,1,2,2,2,2,2,NA,16591.871479,16938.772121,3,91,77,77,NA,4,4,0,0,2,1,54,1,4,1,2 +64638,7,2,1,8,NA,4,4,1,8,99,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10410.106675,10617.081899,2,96,5,5,1.08,3,3,0,1,0,2,41,1,3,1,NA +64639,7,2,1,58,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,25963.141347,26172.595903,1,92,9,9,3.7,2,2,0,0,0,1,58,1,2,1,4 +64640,7,2,1,80,NA,1,1,1,NA,NA,1,2,1,1,NA,NA,1,1,NA,2,2,2,2,2,2,1,2,2,NA,18949.267372,20024.848444,2,98,4,4,1.15,2,2,0,0,2,1,80,1,1,1,1 +64641,7,2,1,4,NA,2,2,1,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16775.083123,17533.373205,2,98,8,8,1.48,7,7,3,0,0,1,26,1,3,1,3 +64642,7,2,1,15,NA,2,2,1,15,183,NA,NA,2,2,4,8,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,24585.624844,25010.051033,3,91,5,5,0.89,4,4,0,2,2,1,61,2,3,1,4 +64643,7,2,2,52,NA,5,6,2,NA,NA,2,NA,2,2,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,22560.963402,22680.264479,1,98,6,6,1.65,2,2,0,1,0,2,52,2,5,1,NA +64644,7,2,2,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,4,2,1,2,2,1,2,2,1,2,2,1,93796.829073,95761.252302,1,100,6,6,1.78,3,3,1,1,0,2,35,1,5,4,NA +64645,7,2,1,2,NA,4,4,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5942.817425,6352.518414,2,97,5,5,0.84,5,5,2,1,0,2,27,1,3,1,3 +64646,7,2,1,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,32860.812334,35527.111698,1,91,7,7,2.68,2,2,0,0,2,1,80,1,4,1,4 +64647,7,2,1,4,NA,2,2,1,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13504.027725,14468.59111,2,93,8,8,2,4,4,1,1,0,1,50,2,4,1,4 +64648,7,2,1,36,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,37080.526463,37974.333878,1,103,15,15,5,2,2,0,0,0,1,36,2,5,1,5 +64649,7,2,2,4,NA,1,1,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,1,1,2,2,NA,NA,NA,NA,15326.318384,16292.539752,3,91,7,7,1.42,6,6,1,3,0,1,37,2,1,1,1 +64650,7,2,2,10,NA,3,3,2,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22371.648216,23759.450609,1,95,5,5,1.08,3,3,0,1,0,1,53,1,4,1,4 +64651,7,2,1,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,28454.541725,29098.608791,3,91,3,3,1.24,1,1,0,0,0,1,58,1,4,5,NA +64652,7,2,2,10,NA,5,6,1,10,125,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7905.246022,8589.222558,1,100,3,3,0.73,2,2,0,1,0,2,38,2,5,3,NA +64653,7,2,2,1,18,4,4,1,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9287.278834,9993.215624,2,102,5,3,0.63,5,4,2,1,0,1,24,1,4,6,NA +64654,7,2,1,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,151649.038926,165350.159672,1,101,7,7,2.31,2,2,0,0,0,1,44,1,3,1,2 +64655,7,2,2,9,NA,1,1,1,9,118,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,15510.382876,18634.445505,1,100,13,13,NA,4,4,1,1,0,1,28,2,1,1,1 +64656,7,2,2,54,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,14314.968597,14503.241546,1,96,15,15,5,4,3,0,0,1,2,54,2,4,1,5 +64657,7,2,2,4,NA,3,3,1,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,71121.657525,76575.027332,1,92,9,4,1,7,3,2,1,0,1,45,1,4,2,NA +64658,7,2,2,51,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,2,2,2,2,2,1,2,25483.560748,27318.705448,1,94,5,5,0.57,7,7,2,1,0,1,58,2,1,1,1 +64659,7,2,2,11,NA,4,4,2,11,142,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7646.649777,8233.445923,2,99,4,4,1,3,3,0,1,0,2,38,1,3,5,NA +64660,7,2,2,4,NA,2,2,2,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13366.393396,14209.054666,2,94,7,7,1.34,5,5,2,1,0,1,32,2,1,1,NA +64661,7,2,1,29,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,31312.870743,38059.589922,2,90,8,8,2.24,4,4,1,1,0,2,29,1,4,6,NA +64662,7,2,1,10,NA,3,3,1,10,130,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,66003.625505,68276.88064,1,101,8,8,1.85,5,5,0,3,0,1,41,1,3,1,4 +64663,7,2,2,33,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,21522.871343,21589.047039,3,92,7,7,0.81,7,7,2,4,0,1,40,NA,NA,1,4 +64664,7,2,2,48,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,40760.712736,41570.695949,1,98,4,4,1.26,2,2,0,0,1,2,80,1,4,2,NA +64665,7,2,2,6,NA,3,3,2,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24070.467912,23750.822126,1,92,4,4,0.5,6,6,0,3,0,2,41,1,4,1,NA +64666,7,2,2,11,NA,4,4,1,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10332.067017,10608.478853,1,100,12,12,NA,5,5,0,3,0,1,39,1,5,1,3 +64667,7,2,1,58,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,6,NA,1,2,2,1,2,2,1,2,2,1,26135.885159,26052.523863,2,101,3,3,0.68,2,2,0,0,0,1,58,1,1,6,NA +64668,7,1,1,21,NA,1,1,NA,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,52698.05363,0,3,92,2,2,0.4,3,3,1,0,0,1,21,1,2,6,NA +64669,7,2,2,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,35475.142583,37174.132552,2,95,6,6,2.95,1,1,0,0,1,2,60,1,3,3,NA +64670,7,2,2,2,NA,3,3,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,55441.113009,61189.845638,1,92,6,6,1.62,3,3,1,0,0,2,26,1,5,1,5 +64671,7,2,2,13,NA,3,3,1,13,164,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,117872.104347,120084.841085,2,101,10,10,2.33,6,6,1,3,0,1,39,1,2,1,4 +64672,7,2,1,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,16995.648055,16715.026676,2,100,7,7,1.38,5,5,1,0,0,2,45,1,2,3,NA +64673,7,2,1,12,NA,5,6,1,12,148,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5388.361335,5758.757236,3,91,6,6,1.22,5,5,1,2,0,2,37,1,4,1,2 +64674,7,2,2,39,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,1,1,2,2,2,2,2,2,2,NA,NA,NA,NA,38161.026403,39476.245254,1,100,7,7,1.74,4,4,0,2,0,2,39,2,1,1,3 +64675,7,2,1,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,152858.509804,159455.333031,1,95,15,15,5,4,4,0,2,0,2,42,1,5,1,5 +64676,7,2,2,56,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,4,6,NA,2,2,2,2,2,2,1,2,1,2,19969.163208,20990.960204,2,93,10,10,3.67,3,3,0,0,0,2,56,2,4,6,NA +64677,7,2,2,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,19210.136544,18684.400763,1,96,12,12,NA,1,1,0,0,0,2,56,1,4,3,NA +64678,7,2,2,0,4,4,4,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7333.056164,7413.124496,2,101,5,5,1.08,3,3,1,0,0,1,31,1,4,6,NA +64679,7,2,2,38,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,2,3,2,1,2,2,1,2,2,1,2,2,1,17978.142628,18905.276914,1,91,7,7,1.57,4,4,0,3,0,2,38,2,2,3,NA +64680,7,2,2,35,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,21116.677917,21908.928919,1,92,14,14,3.47,4,4,0,2,0,1,37,1,5,1,5 +64681,7,2,1,15,NA,3,3,2,15,185,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,68148.957861,67253.324127,1,93,7,7,2.16,3,3,0,1,0,2,50,1,5,3,NA +64682,7,2,1,42,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,133542.212862,160359.69013,1,94,14,14,2.96,5,5,0,3,0,2,39,1,4,1,3 +64683,7,1,2,26,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,6,3,1,2,2,1,2,2,NA,NA,NA,NA,109522.19868,0,1,90,9,1,0,2,1,0,0,0,2,26,1,5,6,NA +64684,7,2,1,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,1,2,1,2,2,1,2,2,1,16995.648055,19983.260181,1,96,12,12,NA,7,7,1,0,1,2,59,1,3,1,1 +64685,7,2,1,12,NA,2,2,2,12,146,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,13752.835112,15116.811136,3,90,7,7,1.48,5,5,0,1,0,1,43,2,1,6,NA +64686,7,2,2,2,NA,4,4,2,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6226.488588,6570.976462,2,99,6,6,1.03,6,6,3,0,0,1,33,1,3,6,NA +64687,7,2,2,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,20247.768461,19251.812222,2,93,4,4,0.56,5,5,2,1,0,1,27,1,2,6,NA +64688,7,2,1,1,18,1,1,1,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11853.772636,11869.789876,2,96,4,4,0.81,4,4,1,1,0,1,36,2,1,6,NA +64689,7,2,2,42,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,2,5,2,2,2,2,1,2,2,2,2,2,2,27654.660303,28156.17371,2,103,4,4,0.79,3,3,0,0,0,2,42,2,2,5,NA +64690,7,2,2,11,NA,4,4,2,11,140,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8147.287486,8590.325322,2,90,2,2,0.38,4,4,1,2,0,2,32,1,4,5,NA +64691,7,2,1,0,10,2,2,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,1,2,2,1,NA,NA,NA,NA,5357.080288,5657.49929,1,103,12,12,NA,6,6,2,1,0,2,27,2,2,1,3 +64692,7,1,2,59,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,13728.308948,0,2,90,10,10,3.51,3,3,0,0,1,2,59,1,4,1,NA +64693,7,1,2,46,NA,2,2,NA,NA,NA,2,NA,2,2,4,NA,2,4,NA,2,2,2,2,2,2,NA,NA,NA,NA,23968.560941,0,2,90,6,6,0.66,7,7,2,2,0,2,24,2,4,6,NA +64694,7,2,1,23,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,44887.234615,46900.157613,2,93,9,9,2.6,4,4,0,0,0,2,58,2,4,4,NA +64695,7,2,2,38,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,11608.998717,12267.891619,3,90,14,14,3.47,4,4,1,1,0,2,38,2,5,1,5 +64696,7,2,1,11,NA,4,4,2,11,143,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8579.422451,8641.637123,1,99,14,14,4.86,3,3,0,1,0,1,42,1,5,1,5 +64697,7,2,1,8,NA,4,4,1,8,98,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11443.206518,11987.342507,1,100,6,6,1.65,2,2,0,1,0,2,42,1,4,5,NA +64698,7,2,1,13,NA,3,3,1,13,166,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23685.067681,25157.846578,1,94,7,7,1.21,6,6,2,2,0,1,31,1,2,6,NA +64699,7,2,1,16,NA,4,4,1,16,197,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18123.994406,21384.995218,2,101,8,8,2.81,3,3,0,1,0,1,35,1,1,1,2 +64700,7,1,1,24,NA,3,3,NA,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,108410.783716,0,1,91,8,8,2.51,3,3,1,0,0,2,24,1,4,1,3 +64701,7,2,1,57,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,3,3,NA,2,2,2,1,2,2,2,2,1,2,27131.416371,32184.468102,2,93,6,6,0.64,7,7,2,1,3,2,60,2,3,2,NA +64702,7,2,2,4,NA,3,3,2,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,31376.988784,34630.493438,1,101,7,7,1.82,4,4,2,0,0,2,27,1,2,1,3 +64703,7,2,2,22,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,32537.532358,33640.063825,2,90,5,5,1.19,3,3,0,0,0,2,50,2,4,4,NA +64704,7,2,1,62,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,11937.570805,12597.633777,2,96,8,8,1.33,7,7,2,1,1,1,62,2,1,1,1 +64705,7,2,2,72,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,13242.661868,13437.555164,1,99,5,5,1.69,2,2,0,0,2,1,79,1,1,1,4 +64706,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,20332.409486,19927.598485,2,93,6,6,1.47,3,3,0,0,0,2,47,1,4,5,NA +64707,7,2,1,10,NA,1,1,1,10,128,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,17882.621856,17720.218058,3,92,4,4,0.67,4,4,0,3,0,2,36,2,1,5,NA +64708,7,2,2,11,NA,1,1,2,11,136,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15002.25457,15643.939501,1,91,6,6,1.35,3,3,0,2,0,2,38,1,4,3,NA +64709,7,2,1,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,9221.19173,9652.555988,2,95,3,3,0.75,2,2,0,0,2,1,60,1,2,1,2 +64710,7,2,1,3,NA,5,7,2,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8981.553859,10073.795326,3,91,15,15,4.47,4,4,2,0,0,1,33,1,5,1,5 +64711,7,2,2,35,NA,5,7,1,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,18346.384255,19018.034723,2,96,15,15,5,4,4,1,1,0,2,35,2,5,1,5 +64712,7,2,1,6,NA,1,1,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14820.807433,14827.373008,2,102,5,5,0.89,4,4,1,2,0,2,36,2,5,3,NA +64713,7,2,2,50,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,22224.73066,22340.630739,2,94,15,15,5,2,2,0,0,1,1,77,1,5,1,4 +64714,7,2,2,17,NA,4,4,2,17,204,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13661.047334,13622.554377,2,97,5,5,0.76,5,5,1,1,0,2,47,1,4,5,NA +64715,7,2,1,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,26556.735732,2,101,4,2,0.83,2,1,0,0,0,1,24,1,4,5,NA +64716,7,2,1,1,19,3,3,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,46257.816906,54201.886729,2,94,14,14,3.36,4,4,2,0,0,1,31,1,3,1,5 +64717,7,2,1,1,13,4,4,1,NA,13,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10232.679671,11283.202594,2,101,5,5,0.89,4,4,1,1,1,2,38,1,4,77,NA +64718,7,2,1,18,NA,4,4,1,19,228,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14848.504688,14956.646891,1,98,2,2,0.45,2,1,0,0,0,1,19,1,4,NA,NA +64719,7,2,2,39,NA,5,6,2,NA,NA,1,2,2,1,7,NA,5,3,2,1,2,2,1,2,2,1,2,2,1,18480.909695,18518.646173,1,93,9,9,5,1,1,0,0,0,2,39,2,5,3,NA +64720,7,2,1,29,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,21548.249649,21044.90278,1,100,8,8,1.61,6,6,1,3,0,1,29,1,5,6,NA +64721,7,2,1,56,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,17206.320427,17151.440282,2,100,5,5,1.08,3,3,0,0,0,1,38,1,2,5,NA +64722,7,2,2,33,NA,1,1,1,NA,NA,2,NA,2,2,2,NA,1,1,2,2,2,2,2,2,2,1,2,2,NA,38218.668882,37187.448906,2,102,7,7,1.33,6,6,1,3,0,1,34,2,2,1,1 +64723,7,2,2,6,NA,4,4,1,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8292.876947,8466.609309,1,102,1,1,0,5,5,0,3,0,2,41,1,4,1,4 +64724,7,2,2,3,NA,3,3,2,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,50908.326714,52512.144621,1,99,15,15,5,4,4,1,1,0,2,42,1,5,1,5 +64725,7,2,2,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,101005.054511,105662.333954,2,91,15,8,4.48,2,1,0,0,0,2,55,1,5,6,NA +64726,7,2,1,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,18486.334367,18427.371573,2,95,8,8,1.85,5,5,1,2,0,1,55,1,2,1,3 +64727,7,2,1,50,NA,1,1,2,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,2,2,2,2,2,1,2,22446.308035,22401.210337,2,94,77,77,NA,4,4,0,0,0,1,28,2,1,3,NA +64728,7,2,1,23,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,3,5,NA,2,2,2,1,2,2,2,2,2,2,38474.772527,42632.210531,2,93,7,5,1.79,3,1,0,0,0,1,25,2,4,5,NA +64729,7,2,2,80,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,1,2,NA,1,2,1,NA,NA,NA,NA,NA,NA,NA,14314.616082,14811.078253,2,92,NA,NA,NA,2,1,0,0,1,2,58,2,4,5,NA +64730,7,2,1,9,NA,2,2,2,9,111,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9390.522479,10327.334743,2,90,1,1,0.19,4,4,0,1,0,2,44,1,2,5,NA +64731,7,2,2,34,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,5,3,2,1,2,2,1,2,2,1,2,2,1,36904.965687,36815.84758,2,93,9,9,3.14,3,3,0,2,0,2,34,2,5,3,NA +64732,7,2,1,17,NA,4,4,1,17,215,2,NA,2,1,4,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16261.995423,16380.43213,2,96,6,6,1.7,2,2,0,1,0,1,25,2,4,5,NA +64733,7,2,2,30,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,51753.495707,50822.503524,1,100,8,8,3.17,2,2,0,0,0,1,31,2,4,1,5 +64734,7,2,2,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,12331.419303,12882.003985,2,95,5,5,1.32,2,2,0,0,2,1,66,1,2,1,4 +64735,7,2,2,6,NA,3,3,1,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,49543.011606,52616.361829,1,94,15,15,5,6,6,0,4,0,1,38,1,5,1,4 +64736,7,2,1,6,NA,5,6,1,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,8376.521179,8870.870608,1,92,12,12,NA,4,4,1,1,0,1,33,2,4,1,4 +64737,7,2,1,19,NA,4,4,1,19,236,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,17558.40257,2,101,2,2,0.73,2,1,0,0,0,1,20,1,4,5,NA +64738,7,2,1,15,NA,3,3,2,15,183,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71458.892941,70519.759062,2,91,15,15,5,4,4,0,2,0,2,48,1,5,1,5 +64739,7,2,2,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,25026.664203,25574.319716,1,97,15,15,5,4,4,0,1,0,1,40,1,4,1,4 +64740,7,2,1,45,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,2,1,2,1,3,16903.372311,16842.307696,1,97,9,9,1.78,6,6,0,1,1,1,45,2,3,1,3 +64741,7,2,2,44,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,25189.042335,25070.255719,1,100,14,14,5,3,3,0,1,1,2,44,1,4,5,NA +64742,7,2,1,56,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,16287.780872,16419.180608,2,100,4,4,1.74,1,1,0,0,0,1,56,1,3,3,NA +64743,7,2,1,10,NA,4,4,2,10,126,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8017.552697,8398.795399,1,99,10,10,2.07,7,7,2,3,1,2,35,1,5,4,NA +64744,7,2,2,5,NA,3,3,1,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,29420.847299,31676.739048,3,92,7,7,0.81,7,7,2,4,0,1,40,NA,NA,1,4 +64745,7,2,1,51,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,20820.576198,20899.685598,2,96,7,7,1.49,5,5,2,1,0,1,51,1,5,1,3 +64746,7,2,1,29,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,18523.956321,18232.779525,2,99,15,15,4.34,4,4,0,0,0,1,59,2,4,1,5 +64747,7,1,1,5,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9787.724348,0,1,96,10,10,3.51,3,3,1,0,0,1,25,1,4,1,5 +64748,7,2,2,0,4,4,4,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5576.185193,5739.245437,2,96,6,6,1.35,3,3,1,1,0,2,25,1,3,5,NA +64749,7,2,1,2,NA,5,6,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9239.758777,10363.400393,2,91,14,14,3.47,4,4,1,1,0,2,36,2,3,1,5 +64750,7,2,1,18,NA,1,1,1,18,226,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,1,1,2,2,1,32326.52031,34735.818434,2,98,13,13,NA,5,5,0,2,0,1,48,2,1,1,2 +64751,7,2,1,9,NA,3,3,2,9,118,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,61920.455064,68552.408117,2,94,12,12,NA,5,5,1,1,0,1,37,1,4,1,3 +64752,7,2,2,62,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,16352.915834,17093.606152,3,92,8,8,2.97,2,2,0,0,2,2,62,1,4,1,4 +64753,7,2,2,68,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,1,4,NA,2,2,2,2,2,2,2,2,2,2,9716.805546,12994.252166,2,90,77,77,NA,1,1,0,0,1,2,68,2,1,4,NA +64754,7,2,1,36,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,14204.262514,14001.792163,1,99,6,6,0.6,7,7,2,1,1,2,69,1,3,2,NA +64755,7,2,1,3,NA,1,1,1,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14415.164987,15444.808928,1,103,77,77,NA,4,4,1,0,1,1,20,1,3,6,NA +64756,7,2,1,72,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,2,2,2,1,2,2,2,2,2,NA,13654.270555,14323.606702,2,93,10,10,3.04,4,4,0,0,2,1,72,2,3,1,2 +64757,7,2,2,9,NA,5,7,2,10,121,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,67349.005531,67425.922822,1,95,8,8,1.28,7,7,1,4,0,1,32,1,3,1,3 +64758,7,2,1,33,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,20071.705576,20986.552878,3,91,14,14,5,2,2,0,0,0,2,27,2,5,1,5 +64759,7,2,1,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,1,2,1,2,2,1,1,2,NA,52279.532372,58506.425352,2,101,5,5,1.63,2,2,0,0,2,1,80,1,1,1,1 +64760,7,2,1,4,NA,3,3,2,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,77702.196479,85091.48102,1,95,9,9,2.68,4,4,2,0,0,2,27,1,4,1,4 +64761,7,2,1,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,34099.599202,35276.751365,1,94,7,7,1.65,5,4,0,0,0,1,46,1,4,1,4 +64762,7,2,1,17,NA,4,4,1,17,214,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,18260.901254,2,101,10,10,3.89,3,3,0,1,0,2,49,1,4,1,3 +64763,7,2,1,27,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17583.693727,17701.770315,2,95,5,2,0.63,3,1,0,0,0,1,31,1,4,5,NA +64764,7,2,1,36,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,24555.036413,24478.369844,2,94,8,8,3.4,2,2,0,0,0,1,36,1,2,1,5 +64765,7,2,1,32,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,78529.577822,80778.70531,1,101,7,7,1.55,5,5,1,2,0,2,31,1,4,1,2 +64766,7,2,2,3,NA,1,1,1,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11859.546176,12772.78418,1,102,5,5,0.62,7,7,1,3,0,1,49,2,2,1,1 +64767,7,2,2,7,NA,2,2,1,7,85,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15510.382876,16516.802792,1,100,5,5,0.78,6,5,1,2,0,2,40,2,1,5,NA +64768,7,2,1,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,34099.599202,38616.556866,1,94,6,6,1.21,4,4,2,0,0,1,27,1,2,1,2 +64769,7,2,1,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,19093.004278,19397.753966,2,99,2,2,0.19,6,6,0,1,0,1,59,1,2,5,NA +64770,7,2,1,11,NA,5,6,2,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5855.595238,6345.238788,1,91,15,15,3.25,7,7,1,2,0,2,31,1,5,1,5 +64771,7,2,1,64,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,11937.570805,12505.355675,2,96,3,3,0.24,7,7,2,3,1,2,40,1,3,3,NA +64772,7,2,1,50,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,2,NA,2,2,2,2,2,2,NA,NA,NA,NA,31872.125984,32482.275435,2,96,4,4,1.56,1,1,0,0,0,1,50,2,1,2,NA +64773,7,2,2,16,NA,1,1,1,16,203,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,19836.724169,20428.617367,1,98,15,15,5,5,5,0,1,1,2,55,1,5,1,5 +64774,7,2,2,5,NA,1,1,1,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15457.736897,16648.051651,2,98,5,5,0.59,7,7,2,1,2,2,71,1,2,1,1 +64775,7,2,2,6,NA,5,6,1,7,84,NA,NA,1,1,NA,1,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,6402.995556,7222.354698,2,92,7,7,1.61,4,4,0,2,0,1,51,2,3,1,3 +64776,7,2,2,29,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,1,1,2,2,2,2,2,2,2,NA,NA,NA,NA,50915.06085,54061.945895,3,92,3,3,0.52,5,5,2,1,0,2,29,2,1,1,3 +64777,7,2,2,52,NA,4,4,2,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15790.702799,16017.473382,2,90,14,14,5,2,2,0,0,1,2,52,2,5,1,NA +64778,7,1,1,22,NA,5,6,NA,NA,NA,2,NA,2,1,99,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,13367.406737,0,1,92,15,15,4.44,5,5,0,0,1,1,65,NA,NA,1,5 +64779,7,2,2,30,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,26465.930618,27659.054945,2,100,7,7,1.79,4,4,0,1,0,2,51,1,3,3,NA +64780,7,2,2,18,NA,4,4,1,18,221,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13697.127402,14256.437303,2,100,4,4,1.16,2,2,0,0,1,2,18,1,2,NA,NA +64781,7,2,1,15,NA,2,2,2,15,185,NA,NA,1,1,NA,9,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,19820.949231,20163.121943,2,91,7,7,1.29,6,6,2,2,0,1,33,2,3,6,NA +64782,7,2,1,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,29250.663295,29731.562834,1,94,2,2,0.81,1,1,0,0,0,1,50,1,4,3,NA +64783,7,2,1,0,1,1,1,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4461.618312,4741.503802,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +64784,7,2,1,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,6,NA,1,2,2,1,2,2,1,2,2,1,20135.920214,21074.978603,2,97,5,5,0.76,5,5,1,1,0,2,47,1,4,5,NA +64785,7,2,1,76,NA,1,1,1,NA,NA,2,NA,2,1,99,NA,1,1,NA,2,2,2,2,2,2,2,2,2,NA,21815.897449,22383.010849,3,92,5,5,1.26,3,3,0,0,2,1,76,2,1,1,1 +64786,7,2,2,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,113128.964375,115356.428049,1,94,15,15,4.95,4,4,0,0,2,1,72,1,3,1,3 +64787,7,2,1,10,NA,2,2,2,10,129,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,9390.522479,10327.334743,2,90,1,1,0.06,3,3,0,2,0,2,32,2,2,77,NA +64788,7,2,1,5,NA,4,4,1,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12996.965152,13155.4937,1,101,6,6,1.72,2,2,1,0,0,2,29,1,4,5,NA +64789,7,2,1,67,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,7736.56115,7645.782314,1,99,15,15,5,2,2,0,0,2,1,67,1,5,1,5 +64790,7,2,1,2,NA,3,3,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,46703.291366,50234.137371,1,93,15,15,5,3,3,1,0,0,1,39,NA,NA,1,NA +64791,7,2,2,26,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,1,6,2,2,2,2,NA,NA,NA,NA,NA,NA,NA,31196.446669,31060.57243,2,99,12,3,0.52,5,3,0,1,0,1,30,2,2,4,NA +64792,7,2,2,40,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,35815.777398,37277.17285,1,91,14,14,3.9,4,4,0,1,0,1,41,1,2,1,4 +64793,7,2,2,14,NA,2,2,2,14,173,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23968.380373,25111.880337,2,91,14,14,4.19,3,3,0,1,0,1,55,2,3,1,5 +64794,7,2,2,29,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,NA,NA,NA,NA,20443.961017,19891.057859,2,99,6,6,1.13,4,4,1,1,0,1,33,1,3,6,NA +64795,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,9772.038079,10324.658831,1,95,3,3,0.78,3,3,0,0,2,1,80,1,4,1,3 +64796,7,2,1,1,15,5,6,2,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5492.796032,5818.035599,2,100,4,4,0.5,6,6,2,1,0,1,30,2,4,1,3 +64797,7,2,1,0,9,3,3,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21055.122405,22540.415493,1,93,15,15,5,3,3,1,0,0,1,33,1,5,1,3 +64798,7,2,2,6,NA,3,3,2,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,62595.719575,66478.781231,1,95,6,6,1.35,3,3,0,1,0,1,42,1,4,1,4 +64799,7,2,1,28,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,2,2,2,1,2,2,2,38502.344077,41392.592866,1,100,13,13,NA,4,4,1,1,0,1,28,2,1,1,1 +64800,7,2,2,52,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,12449.932013,12483.65832,3,90,10,10,3.67,3,3,0,1,0,2,52,2,3,5,NA +64801,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,29167.119125,32047.917266,1,101,6,6,1.52,3,3,0,1,1,1,62,1,2,1,3 +64802,7,2,1,42,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,24874.121286,25386.96888,1,102,8,8,4.78,1,1,0,0,0,1,42,1,3,5,NA +64803,7,2,1,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,19440.629552,20463.14283,1,93,6,6,1.16,4,4,2,0,0,2,33,1,5,1,4 +64804,7,2,2,9,NA,3,3,2,9,115,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,73810.484644,74865.697805,1,94,10,10,2.67,5,5,0,3,0,1,40,1,5,1,2 +64805,7,2,1,38,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,19008.083201,19380.210545,2,101,2,2,0.73,1,1,0,0,0,1,38,1,2,5,NA +64806,7,2,1,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,40157.007559,43267.510233,2,98,8,8,3.4,2,2,0,0,2,1,80,1,4,1,4 +64807,7,2,2,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,66503.043118,67175.236045,2,98,14,14,3.25,5,5,2,1,0,1,37,1,5,1,5 +64808,7,2,1,44,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16651.742047,16721.380189,1,100,15,15,5,4,4,0,1,0,1,44,2,5,1,5 +64809,7,2,2,26,NA,4,4,2,NA,NA,2,NA,2,1,2,NA,4,1,3,1,2,2,1,2,2,NA,NA,NA,NA,20146.149642,19228.641128,1,98,15,15,5,6,6,3,0,0,1,37,2,5,1,4 +64810,7,1,1,28,NA,5,6,NA,NA,NA,2,NA,2,2,1,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,11321.172632,0,1,96,3,3,0.54,4,4,0,1,0,1,28,2,5,5,NA +64811,7,2,2,16,NA,5,6,1,16,199,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8663.507259,9422.028242,1,102,9,9,2.39,5,5,0,1,1,1,55,2,5,1,5 +64812,7,2,2,17,NA,4,4,2,17,215,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16317.790515,16843.623891,1,101,6,6,1.43,4,4,0,1,2,2,72,1,2,1,NA +64813,7,2,2,55,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,19944.119297,19741.288195,2,98,2,2,0.52,1,1,0,0,0,2,55,1,4,3,NA +64814,7,2,2,1,22,4,4,2,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7348.24433,7960.486003,2,95,9,9,1.78,6,6,2,0,0,2,48,1,3,1,2 +64815,7,2,2,11,NA,1,1,1,11,137,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,15352.601806,16028.326912,2,102,4,4,0.61,5,5,0,3,0,1,34,2,3,1,3 +64816,7,2,1,21,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,21763.209029,22385.504537,2,92,4,1,0,4,1,0,0,0,1,21,1,4,5,NA +64817,7,2,1,1,20,4,4,1,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6376.965739,6454.74783,2,100,14,1,0,3,1,1,0,1,1,62,1,5,1,5 +64818,7,2,1,8,NA,5,6,1,8,103,NA,NA,1,1,NA,2,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,7030.880662,7583.419516,3,91,14,14,3.58,4,4,1,1,0,1,39,2,5,1,5 +64819,7,2,2,2,NA,4,4,1,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7382.686927,8250.113235,2,100,9,9,3.24,3,3,1,0,0,1,32,1,3,1,4 +64820,7,2,1,7,NA,1,1,1,7,91,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19774.151841,21259.203461,3,92,7,7,1.65,4,4,1,1,0,1,27,1,3,1,3 +64821,7,2,1,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,11281.447612,11722.687225,2,92,8,8,4.59,1,1,0,0,1,1,62,1,3,4,NA +64822,7,2,1,53,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,29181.414565,30161.390695,1,94,2,2,0.63,2,1,0,0,0,1,53,1,3,6,NA +64823,7,2,2,43,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,23799.136983,23147.811167,1,92,15,15,4.44,5,5,0,3,0,2,43,1,5,6,NA +64824,7,2,1,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,33364.852884,34296.975056,1,97,2,2,0.76,1,1,0,0,1,1,62,1,5,3,NA +64825,7,2,2,52,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,150482.742079,150600.121666,2,98,15,15,5,2,2,0,0,1,2,52,1,4,1,4 +64826,7,2,1,61,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,7800.5318,8105.625946,2,96,3,3,1.01,1,1,0,0,1,1,61,1,3,3,NA +64827,7,2,2,8,NA,4,4,1,8,107,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11334.095519,11753.9565,2,96,4,4,0.65,5,5,0,3,0,1,30,1,4,1,2 +64828,7,2,2,58,NA,1,1,1,NA,NA,2,NA,2,2,2,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,35505.352567,36934.11174,3,92,5,5,1.39,2,2,0,0,0,1,56,2,1,1,1 +64829,7,2,2,11,NA,2,2,2,11,134,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12985.951695,13541.394178,1,90,15,15,5,4,4,0,2,0,2,43,1,5,1,5 +64830,7,2,1,15,NA,1,1,1,15,181,NA,NA,1,1,NA,9,NA,NA,NA,2,1,1,2,2,1,1,2,2,1,15506.325263,15357.347514,2,103,2,2,0.42,3,3,0,2,0,2,51,2,2,5,NA +64831,7,2,1,24,NA,5,7,1,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,10559.047286,11105.574109,2,94,5,5,2.2,2,1,0,0,0,1,23,NA,NA,5,NA +64832,7,2,1,32,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,3,6,NA,1,2,2,1,2,1,2,2,2,2,40003.013263,41663.413523,1,91,7,7,2.1,3,3,0,1,0,2,29,2,3,6,NA +64833,7,2,2,37,NA,1,1,1,NA,NA,2,NA,2,2,2,NA,4,1,2,2,2,2,1,2,2,2,2,2,2,42456.72357,44069.75885,1,92,5,5,0.87,4,4,0,2,0,1,42,2,1,1,4 +64834,7,2,1,0,3,2,2,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,5271.338128,5219.242818,2,99,12,6,0.9,7,6,1,3,0,2,20,2,2,5,NA +64835,7,2,1,39,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,1,2,1,2,2,1,2,2,NA,25120.174741,26325.85923,2,102,3,3,0.76,3,3,0,1,1,2,66,1,2,3,NA +64836,7,2,2,0,2,2,2,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5803.542897,5681.443153,2,90,5,5,0.94,4,4,2,1,0,2,33,1,3,3,NA +64837,7,2,2,11,NA,5,6,1,11,143,NA,NA,2,1,3,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5710.045626,5965.417422,1,100,5,5,0.74,6,6,0,3,0,1,40,2,3,1,4 +64838,7,2,2,18,NA,4,4,1,18,218,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,10128.026158,10853.158449,2,103,2,1,0.43,2,1,0,0,0,1,20,1,3,6,NA +64839,7,2,1,37,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,39040.678458,39017.692919,2,98,14,14,3.25,5,5,2,1,0,1,37,1,5,1,5 +64840,7,2,1,10,NA,4,4,2,10,131,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8655.162127,8668.652185,2,95,6,6,0.86,6,6,0,4,0,2,32,1,4,6,NA +64841,7,2,1,8,NA,1,1,1,8,101,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,2,2,2,1,2,2,1,12622.023337,12821.760042,2,96,6,6,0.77,7,7,2,1,0,1,53,2,1,1,1 +64842,7,2,1,2,NA,1,1,1,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13968.423539,13738.28453,2,102,7,7,1.57,4,4,2,0,0,2,34,2,2,1,5 +64843,7,2,2,50,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,12764.594076,12653.781126,2,90,6,6,0.96,5,5,0,1,0,1,55,1,4,6,NA +64844,7,2,1,4,NA,4,4,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7431.820906,7890.591472,1,99,6,6,0.96,5,5,1,2,0,2,35,1,4,1,2 +64845,7,2,1,0,3,1,1,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,6298.658963,6480.002411,2,94,9,9,2.29,5,5,2,1,0,2,33,2,3,1,1 +64846,7,2,2,43,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,2,1,2,1,2,2,1,2,2,NA,NA,NA,NA,12977.791943,13288.853946,2,100,4,4,0.44,7,7,1,2,2,1,71,2,1,1,1 +64847,7,2,2,11,NA,4,4,1,11,136,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11784.279565,12220.817194,2,96,8,8,2.46,4,4,0,1,0,1,46,1,4,1,4 +64848,7,2,1,69,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,9535.353518,9981.414158,2,97,4,4,1.02,2,2,0,0,2,2,80,1,1,2,NA +64849,7,2,1,0,5,3,3,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20370.716701,20998.185689,1,91,14,14,3.06,5,5,2,0,0,2,30,1,5,1,5 +64850,7,2,1,19,NA,3,3,1,19,232,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,99014.954203,98621.869511,1,92,10,10,3.4,3,3,0,0,0,1,56,1,4,1,5 +64851,7,2,2,47,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,NA,20212.648666,20286.466668,2,95,4,3,1.1,2,1,0,0,0,2,47,1,3,4,NA +64852,7,2,1,2,NA,4,4,1,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8009.966208,8832.297541,2,96,7,7,1.79,4,4,2,0,0,2,49,1,3,1,3 +64853,7,2,1,0,11,1,1,1,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,6999.189812,7438.261811,2,98,6,6,0.59,7,7,2,2,1,2,52,2,1,1,1 +64854,7,2,2,69,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,10561.745159,14124.187136,2,90,6,6,1.73,2,2,0,0,2,2,69,1,2,1,2 +64855,7,1,1,8,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9720.482616,0,2,91,4,4,0.65,5,5,1,3,0,1,43,2,3,5,NA +64856,7,2,1,34,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,1,5,NA,2,2,2,2,2,2,NA,NA,NA,NA,30626.581617,30395.433124,2,90,6,6,0.96,5,5,1,1,0,1,39,2,2,1,NA +64857,7,2,2,54,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,189736.955264,189051.893543,1,98,4,4,1.34,1,1,0,0,0,2,54,1,5,3,NA +64858,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,36148.01469,38158.744241,1,101,3,3,0.66,2,2,0,0,1,2,65,1,2,3,NA +64859,7,2,2,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,133853.800452,138128.446231,1,94,8,8,2.43,3,3,0,0,0,2,46,1,3,1,NA +64860,7,2,1,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,23605.367258,23717.233847,2,97,7,7,1.89,3,3,0,0,0,1,50,1,2,1,2 +64861,7,2,2,26,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,3,2,1,2,2,1,2,2,NA,NA,NA,NA,19266.853809,20089.582382,2,102,8,8,1.72,5,5,0,2,1,1,63,2,5,1,5 +64862,7,2,1,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,22911.854766,1,95,5,5,0.92,5,5,1,2,0,2,30,1,4,1,4 +64863,7,1,2,34,NA,3,3,NA,NA,NA,2,NA,2,2,2,NA,5,5,3,1,2,2,1,2,2,NA,NA,NA,NA,81658.419251,0,2,99,15,15,5,2,1,0,0,0,2,34,2,5,5,NA +64864,7,2,1,12,NA,4,4,1,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17606.165994,18413.211403,2,101,3,3,0.54,3,3,0,2,0,2,36,1,3,5,NA +64865,7,2,2,2,NA,4,4,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6677.461889,6849.81851,2,90,5,1,0,3,1,1,1,0,2,48,1,5,5,NA +64866,7,2,2,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,17794.144581,16918.878285,2,96,4,4,0.4,7,7,3,2,0,2,25,1,2,5,NA +64867,7,2,1,78,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,12856.852517,14031.201295,2,98,5,5,1.24,3,3,0,0,1,2,58,1,2,5,NA +64868,7,2,1,5,NA,1,1,1,5,65,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16775.083123,17973.290893,3,92,3,3,0.54,4,4,3,0,0,2,22,1,3,5,NA +64869,7,2,2,11,NA,5,6,2,11,133,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7751.448366,8179.286646,1,96,15,15,4.34,4,4,1,1,0,1,36,2,5,1,5 +64870,7,2,2,80,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,2,NA,1,1,1,1,2,2,1,2,1,NA,18698.205673,19635.336647,1,97,9,9,1.78,6,6,0,1,1,1,45,2,3,1,3 +64871,7,2,2,49,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,141912.982157,152670.471787,1,97,14,14,3.36,4,4,0,2,0,2,49,1,5,1,5 +64872,7,2,2,2,NA,1,1,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12871.484115,13281.030392,2,102,5,5,0.89,4,4,1,2,0,2,36,2,5,3,NA +64873,7,2,2,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,111065.717962,115110.76456,2,91,15,15,5,4,4,0,2,0,1,53,1,5,1,4 +64874,7,1,1,80,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,8168.05279,0,2,98,3,3,0.56,4,4,0,0,2,2,79,1,1,1,1 +64875,7,2,2,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,99606.074294,100835.770786,1,93,15,15,5,2,2,0,0,0,2,34,1,5,1,5 +64876,7,2,1,47,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,28680.660607,28606.985896,3,91,6,3,1.33,2,1,0,0,0,1,40,1,3,5,NA +64877,7,2,1,46,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,19260.892847,19199.459573,2,97,1,1,0.09,2,1,0,0,0,1,46,1,3,5,NA +64878,7,2,1,12,NA,3,3,2,12,150,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19197.94644,18945.641357,1,98,6,6,1.73,3,3,0,1,0,2,39,1,4,1,1 +64879,7,2,2,8,NA,4,4,1,8,103,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9153.600624,9398.485171,1,100,8,8,1.61,6,6,1,3,0,1,29,1,5,6,NA +64880,7,2,1,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,153565.050575,157855.235487,1,97,15,15,5,2,2,0,0,1,2,49,1,5,1,3 +64881,7,2,1,33,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15014.015332,15213.152707,3,92,8,8,0.91,7,7,3,3,1,1,61,NA,NA,1,4 +64882,7,2,1,65,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,160284.827389,161788.081948,2,102,14,14,5,2,2,0,0,2,2,65,1,5,1,5 +64883,7,2,1,18,NA,2,2,2,18,218,2,NA,2,2,1,10,NA,NA,NA,2,2,2,1,2,2,2,2,2,2,13752.835112,15116.811136,3,90,7,7,1.48,5,5,0,1,0,1,43,2,1,6,NA +64884,7,2,1,8,NA,4,4,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7908.091112,7920.416775,2,93,8,8,1.67,5,5,1,1,0,2,31,1,4,5,NA +64885,7,2,1,0,7,4,4,1,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6275.847063,6684.003207,2,100,10,10,2.75,5,5,1,1,1,1,27,1,3,1,5 +64886,7,2,2,59,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,23476.411085,23902.152524,3,91,4,4,0.81,3,3,0,0,1,1,64,2,1,1,1 +64887,7,2,1,13,NA,3,3,2,13,158,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,32477.57544,34497.087834,1,95,7,7,1.66,5,5,0,3,0,1,34,1,2,1,4 +64888,7,2,2,2,NA,4,4,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8072.012312,8280.364646,3,91,8,8,2.24,4,4,2,0,0,1,39,2,3,1,3 +64889,7,2,2,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,20933.43123,22068.787492,2,95,6,6,1.19,4,4,0,1,0,1,44,1,3,1,2 +64890,7,2,1,29,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,15173.157442,14818.726738,2,96,12,10,2.17,7,6,2,3,0,1,29,1,4,3,NA +64891,7,2,2,11,NA,2,2,1,11,138,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,2,2,2,1,2,2,NA,14446.475406,15838.541371,1,103,7,7,1.03,7,7,0,3,0,1,50,2,1,1,1 +64892,7,2,1,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,16995.648055,17171.250195,2,100,5,5,1.08,3,3,0,1,0,2,50,1,4,3,NA +64893,7,2,2,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,76027.409363,76707.146234,2,103,15,15,3.44,7,7,0,1,2,2,79,1,3,2,NA +64894,7,2,2,6,NA,2,2,2,6,82,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15583.587534,16935.930722,1,93,14,14,3.06,5,5,0,2,0,1,46,2,1,1,4 +64895,7,2,1,67,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,7323.703412,7380.991723,2,100,14,14,3.58,4,4,0,1,1,2,55,1,5,1,4 +64896,7,2,2,0,0,4,4,2,NA,0,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4706.214557,5182.816301,1,96,6,6,1.21,4,4,2,0,0,1,24,1,4,1,3 +64897,7,2,1,42,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,12254.763576,14641.804715,2,90,4,4,0.81,3,3,0,1,0,2,41,2,2,6,NA +64898,7,2,1,45,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,17036.36313,16982.025069,1,99,2,2,0.48,2,2,0,0,0,2,44,1,3,1,3 +64899,7,2,2,13,NA,5,6,1,13,157,NA,NA,2,2,2,7,NA,NA,NA,1,1,1,1,2,1,1,2,1,NA,8868.843265,9252.179416,2,92,8,8,2.43,3,3,0,1,1,2,58,NA,5,1,5 +64900,7,2,1,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,42894.724338,43561.362107,1,98,3,3,0.93,2,2,0,0,0,1,21,1,4,5,NA +64901,7,2,2,18,NA,4,4,1,18,216,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10143.901078,10335.966318,2,95,2,2,0.67,2,2,0,0,1,2,61,1,3,3,NA +64902,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,20891.980831,21490.337905,2,97,3,2,0.83,2,1,0,0,0,1,30,1,4,5,NA +64903,7,2,1,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,23530.130278,1,95,5,5,1.84,3,1,0,0,0,1,35,1,3,6,NA +64904,7,2,2,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,34802.051557,36349.694696,1,95,6,4,1.7,2,1,0,0,0,1,53,1,2,6,NA +64905,7,2,2,1,12,3,3,2,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,41219.757096,41761.398587,1,93,15,15,5,3,3,1,0,0,1,35,1,5,1,5 +64906,7,2,1,80,NA,5,7,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,1,2,2,1,2,1,NA,13312.080419,14392.211707,1,102,10,10,3.22,4,4,0,0,2,2,29,2,5,5,NA +64907,7,1,1,41,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,126789.52929,0,1,101,15,15,5,4,4,0,2,0,2,40,1,4,1,3 +64908,7,2,2,60,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,13057.178942,13648.591881,1,102,6,6,2.94,1,1,0,0,1,2,60,1,4,3,NA +64909,7,2,1,74,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,12468.859946,13204.521198,2,96,7,7,1.33,6,6,0,3,1,1,74,1,1,1,NA +64910,7,2,2,15,NA,3,3,2,15,182,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71837.483011,75155.181458,1,91,14,14,3.06,5,5,0,3,0,2,46,1,5,1,5 +64911,7,2,2,15,NA,5,6,2,15,181,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5956.736263,6113.749553,2,100,15,15,5,3,3,0,1,0,1,48,2,5,1,5 +64912,7,2,2,9,NA,4,4,2,9,114,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8226.044997,8398.377256,2,95,1,1,0,4,4,2,1,0,2,27,1,4,5,NA +64913,7,2,1,12,NA,3,3,2,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,26824.630008,26493.112081,1,95,1,1,0.17,2,2,0,1,0,2,47,1,2,3,NA +64914,7,2,1,31,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,NA,1,2,2,1,2,2,1,2,2,3,17165.91562,17929.203991,2,96,15,7,3.67,3,1,0,0,0,1,31,2,5,1,NA +64915,7,2,1,4,NA,4,4,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8626.000813,9278.769273,2,97,2,2,0.21,7,7,2,3,0,2,32,1,4,5,NA +64916,7,2,1,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,15787.238477,16796.740962,2,98,4,3,1.01,2,1,0,0,0,1,46,NA,NA,77,NA +64917,7,2,1,5,NA,5,6,2,5,67,NA,NA,2,2,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8197.191196,8439.736714,1,93,7,7,2.32,3,3,1,0,0,1,36,2,5,1,5 +64918,7,2,1,1,16,3,3,1,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24594.444896,28818.16319,1,98,5,5,1.05,3,3,1,0,0,1,24,1,3,1,3 +64919,7,2,1,10,NA,5,6,2,10,127,NA,NA,1,1,NA,3,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,9720.482616,10818.978947,2,91,12,12,NA,7,6,0,4,2,2,72,2,1,2,NA +64920,7,2,2,8,NA,4,4,2,8,103,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8395.64512,8728.346696,2,97,6,6,0.92,7,7,1,4,0,2,29,1,3,5,NA +64921,7,2,2,16,NA,4,4,2,16,202,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12857.456314,12921.235691,2,97,5,5,0.92,5,5,0,3,0,2,54,1,3,2,NA +64922,7,2,1,9,NA,5,6,1,9,115,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8376.521179,8870.870608,1,92,15,15,5,4,4,0,2,0,1,55,1,5,1,5 +64923,7,2,2,64,NA,4,4,2,NA,NA,2,NA,2,2,5,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,9113.905743,12222.082956,3,90,8,8,3.3,2,2,0,0,1,2,64,2,2,1,NA +64924,7,2,2,10,NA,5,7,1,10,129,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12120.418061,12942.575479,2,101,7,7,1.88,4,4,0,2,0,2,36,1,4,1,5 +64925,7,2,2,0,4,2,2,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7534.765808,7789.750955,1,98,3,3,0.37,5,5,2,1,0,1,44,2,4,1,3 +64926,7,2,1,14,NA,4,4,2,14,169,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13352.786991,13379.186543,2,97,7,7,1.06,7,7,1,2,0,2,40,1,4,5,NA +64927,7,2,2,2,NA,4,4,1,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7382.686927,7498.263904,2,100,3,3,0.39,7,7,3,3,0,2,30,1,2,5,NA +64928,7,2,1,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,1,2,NA,14073.595908,15970.731967,1,92,6,6,1.3,4,4,0,1,1,1,25,1,1,1,3 +64929,7,2,1,19,NA,4,4,2,19,228,2,NA,1,1,NA,14,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10791.290138,11351.304534,3,91,2,2,0.25,4,4,0,2,0,2,35,1,3,5,NA +64930,7,2,1,74,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,13219.758853,13936.470139,2,97,4,4,1.29,2,2,0,0,2,1,74,1,3,3,NA +64931,7,2,2,59,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,26668.458882,27266.525973,2,102,6,6,2.04,2,2,0,0,1,1,64,1,3,1,4 +64932,7,2,1,6,NA,3,3,1,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,86609.650237,90490.601456,2,102,14,14,4.93,3,3,0,1,0,1,37,1,5,1,5 +64933,7,2,2,41,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,31301.615973,33816.788039,1,90,15,15,4.77,4,4,1,1,0,2,41,1,5,1,2 +64934,7,2,2,16,NA,5,6,2,16,192,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,1,1,2,2,1,5607.756021,6010.481547,3,91,6,6,1.81,3,3,0,1,0,2,47,2,3,1,3 +64935,7,2,2,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,73820.224093,75002.443492,2,95,9,9,2.22,5,5,1,0,0,1,55,1,4,1,5 +64936,7,2,2,6,NA,2,2,1,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17458.526997,17628.076413,1,94,2,2,0.26,4,4,2,1,0,2,25,1,4,5,NA +64937,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,37456.985636,43452.233213,2,98,6,6,1.7,2,2,0,0,2,2,80,1,2,1,2 +64938,7,2,1,40,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,27938.918808,28342.234566,1,100,7,7,3.4,1,1,0,0,0,1,40,1,5,5,NA +64939,7,2,1,19,NA,4,4,2,19,236,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,17558.40257,2,101,1,1,0.04,2,1,0,0,0,1,19,1,4,NA,NA +64940,7,2,1,13,NA,3,3,2,14,168,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,91343.020442,91081.271456,1,98,7,7,1,7,7,2,2,0,2,34,1,4,3,NA +64941,7,2,2,2,NA,1,1,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11512.764389,12710.400836,2,98,2,2,0.27,4,4,2,1,0,2,32,2,2,5,NA +64942,7,2,1,69,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,7101.739553,7433.956549,2,100,3,3,1.19,1,1,0,0,1,1,69,1,2,2,NA +64943,7,2,1,1,20,5,6,2,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7638.291796,7864.300119,1,93,14,14,4.86,3,3,1,0,0,1,30,2,5,1,5 +64944,7,2,2,18,NA,4,4,2,18,225,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,15259.822784,15553.106132,1,90,5,5,0.74,5,5,0,2,0,2,18,1,4,NA,NA +64945,7,1,1,37,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,105412.227726,0,2,101,8,8,1.72,5,5,0,3,0,1,37,1,3,1,3 +64946,7,2,1,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,5866.209951,5912.09729,2,95,5,5,1.05,3,3,0,0,2,1,60,1,1,1,4 +64947,7,2,1,6,NA,5,6,2,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7491.978458,7984.984584,2,100,15,15,5,4,4,1,1,0,1,36,2,5,1,5 +64948,7,2,2,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,8308.628726,8679.600111,2,95,5,5,1.92,1,1,0,0,1,2,61,1,4,2,NA +64949,7,2,2,26,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,110941.813533,112262.474161,1,97,15,15,4.77,4,4,0,0,0,1,56,1,4,1,4 +64950,7,2,2,6,NA,2,2,2,7,84,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11210.83392,12007.401767,2,90,6,6,1.34,4,4,1,2,0,2,36,2,3,77,NA +64951,7,2,1,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,107912.705683,113084.898879,3,91,4,4,1.02,2,2,0,0,0,1,22,1,5,1,5 +64952,7,2,2,8,NA,4,4,2,8,102,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7120.704736,7311.203604,1,99,8,8,1.76,5,5,0,2,1,1,37,1,4,1,3 +64953,7,2,2,44,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,30863.871606,31440.435679,1,101,3,3,1.12,1,1,0,0,0,2,44,1,4,1,NA +64954,7,2,2,54,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,1,3,NA,1,2,1,1,2,2,NA,NA,NA,NA,10964.859884,11022.841433,3,90,7,7,1.82,4,4,1,0,0,2,54,2,1,3,NA +64955,7,2,1,3,NA,4,4,1,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11342.022131,11810.675983,2,102,15,15,3.82,5,5,1,2,0,1,34,1,3,1,4 +64956,7,2,1,35,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,27572.205373,28421.693812,1,100,15,15,5,3,3,0,0,0,2,33,1,5,1,4 +64957,7,1,2,3,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11623.462338,0,1,100,3,3,0.73,3,3,1,1,0,2,32,1,3,5,NA +64958,7,2,2,8,NA,1,1,1,8,107,NA,NA,1,1,NA,3,NA,NA,NA,2,1,1,1,2,2,1,2,2,1,16217.354723,16799.750785,3,91,7,7,1.42,6,6,1,3,0,1,37,2,1,1,1 +64959,7,2,2,6,NA,2,2,2,6,82,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17962.96045,18326.284291,1,97,15,15,4.52,6,6,0,4,0,2,41,1,5,1,5 +64960,7,2,2,6,NA,3,3,2,6,83,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13450.606713,13706.553337,3,91,5,5,1.07,4,4,0,2,0,2,36,1,5,1,4 +64961,7,2,1,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,141773.363283,142016.391545,1,97,15,15,4.77,4,4,0,0,0,1,56,1,4,1,4 +64962,7,2,2,32,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,39483.840194,39476.565757,2,102,14,14,3.25,5,5,1,1,0,2,32,1,4,1,3 +64963,7,2,1,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,20666.038321,20600.123288,1,96,14,14,5,1,1,0,0,0,1,55,1,5,5,NA +64964,7,2,1,80,NA,3,3,1,NA,NA,2,NA,2,1,4,NA,3,1,NA,1,2,1,1,2,2,1,2,1,NA,35161.248998,42165.766203,3,91,15,15,3.33,6,6,0,2,2,1,80,2,3,1,3 +64965,7,2,1,26,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,71402.235366,73020.331081,2,97,15,15,4.97,5,5,1,0,0,1,48,1,4,1,3 +64966,7,2,1,6,NA,3,3,2,6,78,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,57305.501166,60591.443404,1,94,15,15,5,3,3,0,1,0,1,46,1,5,1,5 +64967,7,1,2,58,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,24905.670199,0,2,101,1,1,0.15,3,3,0,2,0,2,58,1,3,5,NA +64968,7,2,2,50,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16567.527819,16240.172481,1,96,15,15,5,4,4,0,1,0,1,56,1,4,1,5 +64969,7,2,1,18,NA,3,3,1,18,224,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,24849.884345,24917.808193,3,91,5,5,0.87,4,4,0,2,0,2,38,1,2,3,NA +64970,7,2,1,12,NA,2,2,1,12,149,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,15626.107676,15463.700675,2,93,9,9,1.94,6,6,0,3,0,2,37,NA,NA,3,NA +64971,7,2,1,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,6358.062034,6381.990744,1,99,8,8,3.4,2,2,0,0,1,1,60,1,3,3,NA +64972,7,2,1,61,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,7289.557268,8219.094942,3,90,1,1,0,2,2,0,0,1,1,61,2,3,1,3 +64973,7,2,1,64,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,35869.019314,36329.101667,3,92,4,4,1.13,2,2,0,0,2,1,64,1,3,1,4 +64974,7,2,2,42,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,27585.470618,27304.927199,2,102,7,7,2.65,2,2,1,0,0,2,42,1,4,5,NA +64975,7,2,2,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,23016.024704,24471.525432,1,96,7,7,2.78,2,2,0,0,0,1,48,1,2,6,NA +64976,7,2,1,69,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,7323.703412,7380.991723,2,100,10,10,2.33,6,6,0,2,2,2,35,1,2,5,NA +64977,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,15732.436891,17625.53878,1,97,2,2,0.72,1,1,0,0,1,2,63,1,3,5,NA +64978,7,2,2,56,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,24548.478709,25991.798892,1,93,5,5,1.43,2,2,0,0,1,1,60,2,5,1,4 +64979,7,2,2,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,18822.975485,18307.835382,2,99,2,2,0.19,6,6,0,1,0,1,59,1,2,5,NA +64980,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,33147.414266,37148.276517,2,94,99,99,NA,1,1,0,0,1,2,80,1,4,2,NA +64981,7,2,1,46,NA,3,3,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,1,1,2,2,1,19436.026093,20775.484483,2,97,5,5,0.8,5,5,1,2,0,1,46,2,4,1,2 +64982,7,2,1,16,NA,3,3,2,16,200,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,66448.116716,66629.743791,2,94,14,14,2.83,6,6,0,4,0,2,38,1,2,1,2 +64983,7,2,2,76,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,17836.372654,19170.472552,2,96,6,6,1.77,2,2,0,0,2,1,77,1,2,1,2 +64984,7,2,2,17,NA,3,3,2,17,213,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,NA,NA,NA,1,2,2,1,76360.13568,92040.479005,1,99,NA,NA,NA,4,4,0,1,0,1,50,NA,NA,1,NA +64985,7,2,1,65,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,7410.50521,7700.344649,2,96,6,6,2.75,1,1,0,0,1,1,65,1,2,2,NA +64986,7,1,2,28,NA,5,6,NA,NA,NA,2,NA,2,1,4,NA,5,5,3,1,2,2,1,2,2,NA,NA,NA,NA,16937.04417,0,1,95,3,3,0.43,4,4,0,1,2,1,65,2,5,1,3 +64987,7,2,1,17,NA,4,4,1,17,210,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,18413.211403,2,101,1,1,0.27,3,3,0,2,0,2,36,1,3,5,NA +64988,7,2,2,5,NA,4,4,2,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8897.78821,9699.858422,1,96,7,7,1,7,7,2,1,1,2,53,1,4,1,3 +64989,7,2,1,11,NA,2,2,2,11,138,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13217.721247,13515.808466,2,91,8,8,2.97,3,2,0,2,0,2,33,2,4,5,NA +64990,7,1,1,2,NA,5,7,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7390.005875,0,1,91,15,15,5,5,5,2,1,0,1,40,1,5,1,5 +64991,7,2,2,10,NA,5,6,2,10,128,NA,NA,1,1,NA,5,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,6198.268014,6618.822195,3,90,15,15,3.23,6,6,0,2,0,1,50,2,2,1,2 +64992,7,2,1,76,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,15176.622228,16064.120023,1,101,3,3,1.12,1,1,0,0,1,1,76,1,5,2,NA +64993,7,1,1,57,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,16287.780872,0,2,100,15,14,5,2,1,0,0,0,1,57,1,5,6,NA +64994,7,2,1,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,106239.028397,111331.003215,2,92,10,5,1.96,2,1,0,0,0,2,27,2,5,1,NA +64995,7,2,2,68,NA,3,3,2,NA,NA,2,NA,2,2,4,NA,1,1,NA,1,2,1,1,2,1,1,2,1,NA,35700.429895,37686.262589,1,93,2,2,0.41,2,2,0,0,2,2,68,2,1,1,1 +64996,7,2,1,0,6,3,3,2,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19091.539058,20438.314938,2,95,8,8,2.17,4,4,1,1,0,1,43,1,4,1,5 +64997,7,2,2,15,NA,2,2,2,15,184,NA,NA,1,1,NA,66,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13824.001771,15649.805677,2,90,2,2,0.3,3,3,0,1,0,2,51,2,2,5,NA +64998,7,2,1,54,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,144858.963765,145378.942103,2,97,9,9,5,1,1,0,0,0,1,54,1,5,5,NA +64999,7,2,1,32,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,18544.003944,19434.054198,2,100,5,5,1.63,3,2,0,1,0,2,50,1,2,5,NA +65000,7,2,1,4,NA,2,2,2,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,12403.412256,12420.172189,2,90,3,3,0.54,4,4,1,2,0,2,33,2,1,4,NA +65001,7,2,2,13,NA,5,7,2,13,160,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8777.486283,9116.900706,1,91,15,15,5,5,5,0,3,0,1,40,1,5,1,5 +65002,7,2,1,68,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,6,NA,1,2,2,NA,NA,NA,1,2,2,1,140267.560043,144186.249663,2,91,12,NA,NA,2,1,0,0,2,2,68,1,4,6,NA +65003,7,2,1,9,NA,3,3,1,9,109,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17738.769196,18719.457495,1,98,4,4,1,3,3,0,1,1,1,65,1,2,1,NA +65004,7,2,2,25,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,1,2,2,2,2,2,2,2,NA,NA,NA,NA,42557.597671,44940.417695,1,100,13,13,NA,4,4,1,1,0,1,28,2,1,1,1 +65005,7,2,2,71,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,73204.401173,96856.987278,2,91,12,12,NA,2,2,0,0,2,2,71,1,2,1,NA +65006,7,2,2,47,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,4,NA,1,2,2,1,2,2,1,2,2,1,25221.447441,25652.65048,1,100,14,14,3.93,3,3,0,1,0,2,47,1,5,4,NA +65007,7,2,2,24,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,1,1,2,2,1,2,2,1,2,2,1,18801.993237,20649.846581,2,96,14,14,5,2,2,0,0,0,1,30,2,5,1,5 +65008,7,1,1,80,NA,3,3,NA,NA,NA,1,2,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,11740.600601,0,3,91,13,13,NA,3,3,0,0,2,2,80,1,3,1,1 +65009,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,21491.090123,20964.324934,1,97,6,6,1.41,3,3,0,1,0,2,51,1,4,5,NA +65010,7,2,1,6,NA,4,4,1,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10469.725162,10967.571132,1,100,15,15,3.87,6,6,1,3,0,2,39,1,4,1,4 +65011,7,2,1,33,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,18289.793332,19321.216647,1,99,6,6,0.96,5,5,1,2,0,2,35,1,4,1,2 +65012,7,2,2,68,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,2,2,NA,1,2,1,1,2,1,1,2,1,3,12312.400687,12851.340698,1,103,2,2,0.45,1,1,0,0,1,2,68,2,2,2,NA +65013,7,2,2,7,NA,2,2,2,7,87,NA,NA,2,1,1,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9872.244853,10294.506103,2,90,6,6,0.66,7,7,2,2,0,2,24,2,4,6,NA +65014,7,2,2,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,160743.928829,166234.629208,1,95,9,9,4.01,2,2,0,0,1,1,60,1,3,1,3 +65015,7,2,1,19,NA,4,4,1,19,236,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,17558.40257,2,101,99,1,0.09,4,1,0,0,0,1,18,2,4,NA,NA +65016,7,2,1,11,NA,3,3,1,11,136,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22651.436723,23515.701302,3,92,7,7,0.81,7,7,2,4,0,1,40,NA,NA,1,4 +65017,7,2,1,4,NA,4,4,1,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10859.969359,11974.89205,1,101,2,2,0.47,3,3,1,0,0,1,35,1,2,6,NA +65018,7,2,2,14,NA,2,2,2,14,176,NA,NA,2,1,1,8,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,18254.385443,19735.623608,2,91,12,12,NA,5,5,0,1,1,2,43,2,3,6,NA +65019,7,2,2,9,NA,4,4,1,9,118,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11076.064101,11269.278002,2,102,15,15,4.2,5,5,1,2,0,2,29,NA,NA,1,NA +65020,7,2,1,62,NA,2,2,2,NA,NA,2,NA,2,2,7,NA,1,4,NA,2,2,2,2,2,2,2,2,2,2,8609.250304,11228.904188,2,90,2,2,0.73,1,1,0,0,1,1,62,2,1,4,NA +65021,7,2,2,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,96524.78948,102108.384344,1,97,7,7,1.87,4,4,1,1,0,1,35,1,2,1,4 +65022,7,2,2,29,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,3,1,2,2,1,2,2,1,2,2,1,13358.458751,14155.588359,2,92,15,15,5,3,1,0,0,0,2,33,NA,NA,5,NA +65023,7,2,2,13,NA,4,4,2,13,164,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11711.384457,11558.024533,2,95,6,6,0.86,6,6,0,4,0,2,32,1,4,6,NA +65024,7,2,1,9,NA,2,2,1,9,109,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16281.509392,17100.217351,2,91,6,6,0.93,5,5,1,2,0,1,34,1,2,1,3 +65025,7,2,1,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,20891.980831,22249.799905,2,97,5,5,1.08,3,3,0,0,0,1,38,1,4,5,NA +65026,7,2,2,0,11,4,4,1,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5058.520291,5113.753371,1,100,7,7,1.65,4,4,2,0,0,2,24,1,4,1,3 +65027,7,2,2,23,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,4,5,2,2,2,2,1,2,2,2,2,2,2,35424.746838,35270.456492,2,93,4,4,0.56,5,5,0,0,0,2,49,2,2,5,NA +65028,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,15682.233511,17550.107712,1,101,3,3,0.95,2,2,0,0,2,1,80,1,2,1,NA +65029,7,2,2,6,NA,2,2,2,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19936.606751,20692.800636,1,94,7,7,1.74,4,4,0,2,0,1,44,1,5,1,5 +65030,7,2,2,5,NA,4,4,2,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8041.669581,8652.926173,2,90,6,6,0.84,6,6,1,3,1,2,43,1,2,5,NA +65031,7,2,1,16,NA,2,2,1,16,199,NA,NA,2,2,1,8,NA,NA,NA,2,2,2,2,2,2,1,2,2,2,22124.028915,23090.48094,1,100,5,1,0,6,1,1,2,0,2,40,2,1,5,NA +65032,7,2,2,76,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,72094.421241,74006.954437,2,95,6,6,1.95,2,2,0,0,2,1,80,1,1,1,3 +65033,7,2,2,7,NA,4,4,1,7,90,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9453.111053,10094.338553,1,100,8,8,1.95,4,4,0,2,0,2,42,1,4,1,4 +65034,7,2,2,16,NA,4,4,2,17,204,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12161.822321,12177.990347,2,95,5,5,1.05,3,3,0,1,1,1,63,1,2,1,3 +65035,7,1,1,19,NA,1,1,NA,NA,NA,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,18042.255087,0,1,102,14,14,2.83,6,6,1,2,0,1,36,1,2,1,3 +65036,7,2,2,49,NA,4,4,2,NA,NA,2,NA,2,2,2,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,14814.740715,14686.129879,1,96,10,10,1.8,7,7,1,1,0,1,57,2,1,1,3 +65037,7,1,1,18,NA,3,3,NA,NA,NA,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,26749.020961,0,1,101,4,4,0.79,3,3,0,0,0,2,47,1,5,3,NA +65038,7,2,1,13,NA,2,2,1,13,162,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21898.969807,22012.999264,2,102,15,12,NA,5,4,0,3,0,1,42,2,4,6,NA +65039,7,2,2,6,NA,5,6,2,6,80,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8067.514021,8614.897055,1,90,14,14,3.33,5,5,1,2,0,1,41,1,5,1,5 +65040,7,2,2,58,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,18710.568399,18832.614147,2,92,12,12,NA,2,1,0,0,1,2,58,1,3,5,NA +65041,7,2,2,11,NA,5,6,1,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8290.163782,8692.019106,1,92,14,14,3.69,4,4,0,2,0,1,47,2,4,4,NA +65042,7,2,1,14,NA,5,7,2,15,180,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15861.375368,15958.829994,1,95,7,7,1.57,4,4,0,2,0,1,39,1,3,1,3 +65043,7,2,2,7,NA,1,1,1,7,92,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15841.451259,16252.614023,3,92,15,15,3.15,7,7,0,4,0,2,35,2,3,3,NA +65044,7,2,2,15,NA,5,6,1,15,188,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10142.281747,10534.471105,3,91,15,15,5,4,4,0,2,0,1,44,2,5,1,5 +65045,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,NA,10479.637868,11291.375185,1,92,2,2,0.87,1,1,0,0,1,1,80,1,4,5,NA +65046,7,2,1,74,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,13704.691347,15557.045236,1,90,15,15,5,2,2,0,0,2,1,74,1,3,1,3 +65047,7,2,1,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,126789.52929,140129.484883,1,101,14,14,3.3,4,4,0,2,0,2,42,1,4,1,3 +65048,7,2,1,61,NA,1,1,1,NA,NA,2,NA,2,2,9,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,10596.142548,10802.656005,1,94,7,7,1.52,4,4,0,2,2,1,61,2,1,1,5 +65049,7,2,1,24,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,37911.437415,38428.199561,2,103,7,7,2.64,2,2,0,0,0,2,21,2,3,1,3 +65050,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,39565.288792,43332.356399,1,99,10,10,4.89,2,2,0,0,2,2,63,1,5,5,NA +65051,7,2,2,80,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,1,2,NA,2,2,2,1,2,2,1,2,2,NA,17318.187297,23904.945555,2,90,3,3,0.92,1,1,0,0,1,2,80,2,1,2,NA +65052,7,2,1,42,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,109309.268477,109028.475475,3,91,15,15,5,2,2,0,1,0,1,42,1,5,2,NA +65053,7,2,2,0,4,1,1,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7565.515117,7406.345541,1,100,6,6,1.57,3,3,1,0,0,1,39,1,3,1,5 +65054,7,2,1,10,NA,3,3,1,10,127,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,32654.748828,36152.216038,1,94,5,5,0.87,4,4,0,1,0,1,40,1,5,1,5 +65055,7,1,1,33,NA,5,6,NA,NA,NA,2,NA,2,1,5,NA,2,1,NA,1,2,2,1,2,1,NA,NA,NA,NA,15014.015332,0,3,92,77,77,NA,7,7,2,4,1,1,62,NA,NA,1,NA +65056,7,2,1,76,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,8992.410435,9337.125921,2,90,9,9,3.02,3,3,0,0,2,2,70,1,4,1,2 +65057,7,2,2,3,NA,5,6,2,3,44,NA,NA,2,2,2,NA,NA,NA,NA,1,1,2,1,2,1,NA,NA,NA,NA,3864.413878,4049.242646,1,99,6,6,1.07,6,6,2,1,2,1,44,2,5,4,NA +65058,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,23338.32018,25560.39495,1,99,77,77,NA,2,2,0,0,2,1,80,1,5,1,4 +65059,7,2,2,0,1,4,4,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4128.726485,4173.807313,2,93,6,6,1.08,4,4,2,0,0,1,25,1,3,6,NA +65060,7,2,1,0,6,4,4,1,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6275.847063,6487.671462,2,93,9,9,2.86,4,4,1,1,0,1,30,1,4,6,NA +65061,7,2,1,38,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19091.246741,19659.303383,1,91,10,10,3.22,4,4,1,1,0,1,38,2,5,1,5 +65062,7,2,1,10,NA,4,4,2,10,128,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8836.464036,8942.399437,1,96,15,15,5,5,5,0,3,0,2,47,1,5,1,5 +65063,7,2,1,28,NA,4,4,2,NA,NA,2,NA,2,2,6,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,19137.192683,18862.909907,1,96,14,14,5,2,2,0,0,0,2,47,2,4,5,NA +65064,7,2,1,7,NA,1,1,1,7,84,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13665.416457,13541.311863,1,94,2,2,0.27,5,5,0,4,0,2,47,2,1,4,NA +65065,7,2,2,34,NA,3,3,1,NA,NA,2,NA,2,2,2,NA,5,1,1,1,2,2,1,2,2,1,2,2,1,77299.255327,80009.772488,2,92,10,10,4.89,2,2,0,0,0,2,34,2,5,1,5 +65066,7,2,1,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,32980.717958,33913.235726,1,95,7,7,2.16,3,3,0,0,1,1,45,1,3,1,4 +65067,7,2,2,42,NA,3,3,1,NA,NA,2,NA,2,2,2,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,118432.300444,124617.656629,2,93,9,9,3.77,2,2,0,0,0,2,42,2,4,1,5 +65068,7,2,1,0,8,4,4,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5572.446681,5657.975893,1,99,10,10,3.51,3,3,1,0,0,1,27,1,4,1,4 +65069,7,2,1,62,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,11397.665899,11580.241952,1,101,3,3,0.41,5,5,0,2,1,2,36,2,4,4,NA +65070,7,2,1,3,NA,2,2,2,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11186.576454,11318.825779,2,90,3,3,0.46,5,5,3,0,0,2,22,1,2,5,NA +65071,7,2,2,79,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,55250.133125,58713.117248,1,91,14,14,3.25,4,4,0,0,1,1,50,1,2,1,3 +65072,7,2,2,27,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,48991.867549,51378.468052,1,93,14,14,5,2,2,0,0,0,2,59,2,5,5,NA +65073,7,2,2,4,NA,3,3,2,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,52402.97499,54053.880354,1,91,15,15,4.59,4,4,2,0,0,1,35,1,5,1,5 +65074,7,2,1,42,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,21924.03349,22615.591521,2,98,10,10,4.76,2,2,0,0,0,1,42,1,2,6,NA +65075,7,2,2,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,108335.731552,113413.419502,1,98,15,15,4.34,4,4,0,2,0,1,51,1,5,1,5 +65076,7,2,2,50,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,16859.368198,16397.96736,1,99,8,8,1.99,5,5,1,0,0,1,55,1,5,1,2 +65077,7,2,1,60,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,5,1,NA,2,2,2,2,2,2,2,2,2,2,9068.437099,9213.701881,2,93,77,77,NA,2,2,0,0,1,2,50,NA,NA,1,5 +65078,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,51218.356583,59416.206008,2,98,2,2,0.77,1,1,0,0,1,2,80,1,1,2,NA +65079,7,2,2,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,29733.812317,30162.048345,1,101,4,4,0.99,2,2,0,1,0,2,35,1,3,5,NA +65080,7,2,1,74,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,7005.52791,7142.750358,2,99,6,6,2.24,1,1,0,0,1,1,74,1,4,2,NA +65081,7,2,1,8,NA,2,2,1,8,101,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,2,2,2,2,12577.115885,12876.06354,2,96,6,6,1.25,4,4,1,1,0,1,31,2,3,1,3 +65082,7,2,2,14,NA,1,1,1,14,173,NA,NA,1,1,NA,8,NA,NA,NA,2,1,1,1,2,2,1,2,2,1,20830.737445,21782.111761,3,91,7,7,1.42,6,6,1,3,0,1,37,2,1,1,1 +65083,7,2,2,0,2,1,1,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7798.643057,7634.568742,1,102,6,6,1.73,3,3,1,0,0,2,24,2,5,1,5 +65084,7,2,2,26,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,18299.168537,17535.171386,2,100,5,5,0.95,4,4,0,0,1,2,53,1,3,5,NA +65085,7,2,1,51,NA,4,4,1,NA,NA,2,NA,2,2,5,NA,3,1,NA,1,2,1,1,2,1,1,2,1,NA,18061.358948,18950.803173,2,93,4,4,0.69,4,4,0,0,0,1,23,2,3,5,NA +65086,7,2,1,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,98850.665857,102087.416196,1,103,15,15,5,3,3,0,1,0,1,46,1,5,1,5 +65087,7,2,2,74,NA,2,2,2,NA,NA,2,NA,2,2,5,NA,1,2,NA,2,2,2,1,2,2,1,2,2,NA,17318.187297,19970.708273,2,90,7,7,1.57,4,4,0,0,1,2,20,1,2,5,NA +65088,7,2,2,37,NA,3,3,2,NA,NA,2,NA,2,2,1,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,95214.22557,102645.939709,1,97,15,1,0.4,5,1,1,1,0,2,38,NA,NA,1,5 +65089,7,2,2,3,NA,4,4,2,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9630.497627,10362.522874,1,96,13,13,NA,5,5,1,1,0,1,42,1,3,5,NA +65090,7,2,2,18,NA,5,7,1,18,218,2,NA,2,2,2,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6391.016092,6494.382638,1,100,99,99,NA,6,6,0,1,0,1,53,2,2,1,3 +65091,7,2,1,33,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,23952.480361,1,95,5,5,1.03,4,4,0,2,0,1,33,1,3,1,3 +65092,7,2,1,8,NA,2,2,2,8,101,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8613.834494,9046.977066,2,90,7,7,0.89,7,7,1,3,3,1,60,2,3,1,3 +65093,7,2,2,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,29040.300396,28462.118402,2,101,6,6,1.67,3,3,0,0,0,2,22,1,4,5,NA +65094,7,2,1,10,NA,4,4,1,10,129,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13423.881856,14062.200951,2,101,6,6,0.96,5,5,0,4,0,2,36,1,4,4,NA +65095,7,2,2,41,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,4,1,1,2,2,2,2,2,2,2,2,1,2,33716.655399,34130.121207,1,100,7,7,1.3,5,5,0,3,0,1,43,2,2,1,4 +65096,7,2,2,7,NA,1,1,1,8,96,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15962.145468,16535.37648,2,98,3,3,0.33,7,7,2,3,0,1,40,2,1,1,1 +65097,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,28280.669788,29879.976389,1,99,14,14,5,2,2,0,0,2,2,79,1,3,1,5 +65098,7,2,2,68,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,11355.3308,12230.621635,1,96,7,7,1.39,5,5,0,2,2,1,69,2,2,1,2 +65099,7,2,2,1,18,4,4,1,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5987.067673,6442.151602,2,93,4,4,0.56,5,5,2,1,0,1,27,1,2,6,NA +65100,7,2,2,2,NA,5,6,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5520.445386,5614.569811,2,100,15,15,5,4,4,1,1,0,1,41,2,5,1,5 +65101,7,2,1,75,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,NA,10310.165525,10913.082896,1,93,3,3,0.92,1,1,0,0,1,1,75,1,5,3,NA +65102,7,2,2,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,17884.885732,18259.455804,2,95,3,3,0.52,3,3,1,0,0,1,37,1,4,1,4 +65103,7,2,1,12,NA,4,4,2,12,149,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10453.50644,10655.928666,1,90,14,14,2.96,5,5,1,2,0,1,31,1,5,1,4 +65104,7,2,2,8,NA,3,3,2,9,108,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,80369.555824,80552.420473,1,97,15,15,3.89,5,5,0,2,0,1,50,1,4,6,NA +65105,7,2,2,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,1,1,2,2,1,2,2,1,2,2,1,24654.107413,23441.410215,1,100,10,10,2.59,5,5,0,1,0,2,40,1,5,1,NA +65106,7,2,1,32,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,20880.649884,21300.208451,1,96,14,6,2.69,2,1,0,0,0,2,29,1,5,6,NA +65107,7,2,1,14,NA,1,1,2,14,174,NA,NA,1,1,NA,7,NA,NA,NA,2,1,2,1,2,2,1,2,2,2,20398.562455,21727.336109,2,94,5,5,0.67,6,6,1,3,0,1,37,2,3,1,4 +65108,7,2,2,8,NA,3,3,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,55469.656717,57246.428971,1,98,9,9,2.15,5,5,0,3,0,2,32,1,3,1,4 +65109,7,2,1,3,NA,5,6,1,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10273.602479,11522.969217,2,98,9,9,3.83,2,2,1,0,0,2,29,2,4,5,NA +65110,7,2,2,23,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,16929.836231,18593.694637,2,101,4,2,0.55,2,1,0,0,0,2,23,2,4,5,NA +65111,7,2,1,14,NA,3,3,2,14,177,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18645.590959,18737.183314,2,97,9,9,1.45,7,7,1,2,2,2,45,1,3,5,NA +65112,7,2,2,60,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,12592.413049,13033.910742,1,93,8,8,4.13,1,1,0,0,1,2,60,2,4,3,NA +65113,7,2,1,43,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,1,1,2,1,3,11558.146957,12085.615817,3,90,6,6,1.3,4,4,0,2,0,2,37,2,4,1,3 +65114,7,2,2,38,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,3,2,1,2,2,1,2,2,1,2,2,1,27375.405353,30621.081524,3,91,5,5,0.87,4,4,0,2,0,2,38,1,2,3,NA +65115,7,2,2,19,NA,4,4,1,19,232,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,2,1,0.23,4,1,0,0,0,2,19,1,4,NA,NA +65116,7,2,1,0,10,2,2,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4884.343512,4884.433539,3,90,7,7,2.1,3,3,1,0,0,1,34,2,3,1,2 +65117,7,2,1,16,NA,4,4,2,16,203,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11351.725436,12116.15399,2,95,14,14,4.58,3,3,0,1,1,1,61,1,4,1,5 +65118,7,2,1,8,NA,2,2,2,8,99,NA,NA,2,1,3,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10872.115681,10826.020253,1,99,77,77,NA,3,3,0,1,0,1,52,1,5,1,5 +65119,7,2,2,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,66503.043118,66707.517935,2,98,8,8,1.8,5,5,2,1,0,1,32,1,4,1,5 +65120,7,2,1,8,NA,2,2,1,8,100,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14570.588291,14577.04302,1,98,3,3,0.4,7,7,2,3,0,2,31,2,5,1,2 +65121,7,2,1,3,NA,2,2,1,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,15745.774489,15931.923312,2,91,2,2,0.19,5,5,3,0,0,1,24,2,1,1,3 +65122,7,2,2,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,114993.808573,120122.111367,1,98,8,8,2.62,3,3,0,0,0,1,50,NA,NA,3,NA +65123,7,2,2,13,NA,4,4,1,14,168,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19222.393687,19147.000359,1,92,9,7,1.74,7,4,2,1,0,1,45,1,4,2,NA +65124,7,2,2,28,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,1,1,1,2,2,1,2,2,1,2,2,1,30253.427014,31766.413225,2,90,6,6,1.62,3,3,1,0,0,2,28,1,5,1,4 +65125,7,2,2,40,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,2,3,2,2,2,2,2,2,2,2,2,2,2,25713.328161,27565.019075,2,103,5,5,0.65,6,6,1,0,1,2,61,2,1,2,NA +65126,7,2,2,63,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,10346.035773,10598.2543,1,99,15,15,5,2,2,0,0,2,2,63,2,5,1,5 +65127,7,1,2,17,NA,2,2,NA,NA,NA,2,NA,2,1,3,12,NA,NA,NA,2,2,2,1,2,2,NA,NA,NA,NA,26657.121865,0,1,97,15,15,5,4,4,0,2,0,1,51,1,5,1,NA +65128,7,2,1,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,26503.609729,2,101,1,1,0.09,2,1,0,0,0,1,23,1,5,5,NA +65129,7,2,2,45,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,23759.65375,25262.180522,3,92,4,4,1.22,2,2,0,0,0,1,51,1,2,1,3 +65130,7,1,2,38,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,1,3,1,2,2,1,2,2,NA,NA,NA,NA,95214.22557,0,1,97,15,15,5,5,5,2,0,1,1,43,1,5,1,5 +65131,7,2,1,24,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,39031.957066,40864.300461,1,101,5,5,1.24,3,3,0,0,1,2,61,1,4,1,3 +65132,7,2,2,33,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,1,6,2,2,2,2,2,2,2,2,2,2,2,38737.690941,37692.46666,1,97,4,4,0.72,5,5,2,1,0,2,33,2,1,6,NA +65133,7,2,2,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,99275.150567,104459.98751,2,95,3,3,0.93,2,2,0,0,0,1,45,1,4,1,5 +65134,7,2,2,2,NA,4,4,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6689.921427,7247.312894,1,96,15,15,4.81,5,5,1,1,0,2,35,1,5,1,5 +65135,7,2,1,55,NA,5,6,1,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16499.662173,16440.055989,3,91,15,15,5,2,2,0,0,0,2,46,1,5,1,5 +65136,7,2,1,7,NA,4,4,1,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10311.165779,10291.779514,2,98,3,3,0.54,3,3,1,1,0,2,29,1,2,1,NA +65137,7,2,2,8,NA,3,3,2,8,103,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,48532.852397,48387.04619,1,98,14,14,3.9,4,4,0,3,0,2,31,1,4,1,NA +65138,7,2,2,0,11,5,7,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4867.693222,4920.842696,2,97,2,2,0.43,4,4,3,0,0,2,25,1,4,5,NA +65139,7,2,1,61,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,14488.953694,16384.525762,3,92,4,1,0.11,7,1,0,3,1,1,61,1,3,3,NA +65140,7,2,2,11,NA,5,6,2,11,136,NA,NA,2,2,4,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6198.268014,6649.703491,3,90,14,14,3.47,4,4,1,1,0,2,38,2,5,1,5 +65141,7,2,1,21,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,39084.166385,39616.913732,1,97,4,4,0.65,4,4,0,1,0,2,45,2,2,3,NA +65142,7,2,1,23,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,17583.693727,18076.30431,2,95,1,1,0.03,3,3,1,0,0,1,23,1,3,6,NA +65143,7,2,1,62,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,10555.964373,11135.911352,2,92,6,5,2.2,3,1,0,0,2,1,80,NA,NA,2,NA +65144,7,2,1,48,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,87072.438071,86848.766907,1,102,8,8,1.91,5,5,1,2,0,2,38,1,5,1,4 +65145,7,2,2,57,NA,1,1,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,28713.036659,29755.292291,1,97,10,10,3.59,3,3,0,0,0,2,57,1,3,1,NA +65146,7,2,1,14,NA,2,2,2,14,176,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15991.147079,16786.340864,2,90,6,6,1.34,4,4,1,2,0,2,36,2,3,77,NA +65147,7,1,2,36,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,1,3,1,2,2,1,2,2,NA,NA,NA,NA,97803.500399,0,1,95,14,14,3.8,4,4,1,1,0,1,36,1,4,1,5 +65148,7,2,2,12,NA,1,1,1,12,145,NA,NA,2,2,3,5,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,18515.058419,19067.51542,2,96,5,5,0.89,4,4,1,1,0,2,36,2,4,6,NA +65149,7,2,1,31,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,19508.889464,19600.270833,2,102,5,5,0.67,6,6,0,4,0,2,33,1,2,6,NA +65150,7,2,2,53,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,25964.813959,25700.75257,2,101,7,7,2.52,2,2,0,0,0,2,53,1,4,1,2 +65151,7,2,2,26,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,44119.608456,48674.237788,2,98,7,7,2.16,3,3,1,0,0,2,26,1,3,1,3 +65152,7,2,1,13,NA,4,4,2,13,162,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16890.963304,17218.04077,1,91,10,10,2.56,5,5,0,3,0,1,51,2,5,1,4 +65153,7,2,1,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,NA,40859.270352,44024.169267,2,101,14,14,5,1,1,0,0,1,1,80,1,5,5,NA +65154,7,2,1,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,104488.914565,106745.836574,1,98,4,1,0.4,4,1,0,0,0,1,22,1,5,5,NA +65155,7,2,2,8,NA,5,6,1,8,100,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6273.782555,6554.366499,2,94,15,15,5,5,5,0,2,1,1,47,2,5,1,5 +65156,7,2,1,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,31053.116279,31557.850777,2,97,5,5,1.84,2,1,0,0,0,2,45,1,2,4,NA +65157,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,51644.110977,57877.507418,1,102,6,6,2.48,1,1,0,0,1,2,80,1,4,3,NA +65158,7,2,2,4,NA,5,6,2,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,6683.092466,6676.033575,3,91,13,13,NA,3,3,1,0,0,2,41,2,1,1,3 +65159,7,2,2,0,4,3,3,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10692.488346,11331.751026,1,95,1,1,0.21,4,4,1,0,1,2,75,1,1,2,NA +65160,7,2,2,17,NA,4,4,2,17,211,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12209.74498,12781.910285,2,90,5,5,1.08,3,3,1,1,0,2,23,1,2,5,NA +65161,7,2,2,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,18344.917534,19717.054857,2,101,3,3,0.9,2,2,0,0,1,1,57,1,2,5,NA +65162,7,2,1,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,83819.702285,89179.486894,1,99,9,9,5,1,1,0,0,0,1,32,1,3,5,NA +65163,7,2,1,70,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,69210.206708,72962.448849,1,98,12,12,NA,2,2,0,0,2,1,70,1,2,1,3 +65164,7,2,1,42,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,20953.00978,22152.088593,2,97,2,2,0.3,4,4,0,2,0,1,42,1,2,6,NA +65165,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,10561.526228,11375.629073,2,95,7,7,2.31,2,2,0,0,2,2,63,1,3,1,3 +65166,7,2,1,17,NA,4,4,2,17,205,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12462.601191,12584.643654,2,97,4,4,0.81,4,4,1,1,0,2,51,1,3,4,NA +65167,7,2,2,17,NA,4,4,2,17,208,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9608.885901,9632.551122,1,96,77,77,NA,7,7,1,3,0,1,56,1,3,1,4 +65168,7,2,1,16,NA,2,2,1,17,204,NA,NA,2,2,4,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,15626.107676,15463.700675,2,93,9,9,1.94,6,6,0,3,0,2,37,NA,NA,3,NA +65169,7,2,2,59,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,12649.084278,13261.522577,3,90,1,1,0,2,2,0,0,1,1,61,2,3,1,3 +65170,7,2,1,4,NA,5,6,2,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7580.437211,8294.186048,2,92,12,12,NA,7,7,2,4,0,1,54,2,2,1,5 +65171,7,2,2,13,NA,3,3,2,13,157,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,99340.784743,108314.70799,2,91,15,15,5,4,4,0,2,0,1,53,1,5,1,4 +65172,7,2,2,34,NA,3,3,2,NA,NA,2,NA,2,1,6,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,89807.047643,91549.325071,3,91,7,7,1.74,4,4,0,2,0,1,45,2,2,1,4 +65173,7,2,2,41,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,2,1,2,2,NA,NA,NA,1,2,2,1,12969.776823,13038.360257,2,90,3,3,0.65,5,3,1,2,0,1,44,2,5,1,5 +65174,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,85610.546667,92292.669073,2,91,14,14,4.19,3,3,0,1,0,2,31,1,4,1,3 +65175,7,2,2,50,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,15521.115746,16049.689005,2,100,5,5,1.63,3,2,0,1,0,2,50,1,2,5,NA +65176,7,2,2,45,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19130.246369,18737.157388,2,95,10,10,2.32,6,6,1,2,0,1,44,1,4,1,4 +65177,7,2,1,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,120604.496044,126250.391527,2,92,10,10,3.51,3,3,0,0,0,1,24,1,4,5,NA +65178,7,2,2,0,10,3,3,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7569.605114,7776.819156,2,102,6,6,1.01,5,5,2,0,0,2,18,1,3,NA,NA +65179,7,2,1,48,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,22433.902709,22559.455212,1,100,15,15,5,2,2,0,0,0,1,48,2,5,1,5 +65180,7,2,2,14,NA,4,4,2,14,172,NA,NA,2,2,3,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10694.834447,10900.381844,1,90,4,4,0.58,6,6,0,3,0,2,21,2,5,5,NA +65181,7,1,1,3,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,58618.419318,0,2,94,14,14,2.87,5,5,2,1,0,1,37,1,3,1,4 +65182,7,2,1,45,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,2,6,NA,2,2,2,1,2,2,2,2,1,2,34128.967046,33628.177065,2,93,3,3,0.43,4,4,0,0,0,1,45,2,2,6,NA +65183,7,2,1,62,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,3,1,NA,1,2,1,1,2,1,1,2,1,3,8623.181934,10090.425838,2,103,4,4,1.34,3,1,0,0,1,1,62,2,3,1,NA +65184,7,2,2,50,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,1,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,11446.604914,11655.034159,2,92,77,77,NA,4,4,0,0,0,1,27,2,2,5,NA +65185,7,2,1,6,NA,5,7,2,6,80,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9502.15317,9616.069144,2,100,9,9,2.46,4,4,0,2,0,2,36,2,4,1,3 +65186,7,2,2,0,2,4,4,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4418.651245,4685.491283,2,95,7,7,1.41,5,5,2,0,0,2,53,1,3,3,NA +65187,7,2,1,11,NA,4,4,1,11,141,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9418.975084,9571.535533,2,93,5,5,0.92,4,4,1,1,0,1,27,2,2,5,NA +65188,7,2,2,59,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,14068.752668,14242.311751,1,93,15,15,5,5,5,1,0,1,1,61,2,4,1,4 +65189,7,2,1,63,NA,2,2,1,NA,NA,2,NA,77,NA,NA,NA,3,77,NA,1,2,2,1,2,2,1,2,2,1,9250.428657,10460.65091,2,92,99,99,NA,1,1,0,0,1,1,63,77,3,77,NA +65190,7,2,2,62,NA,3,3,2,NA,NA,2,NA,2,1,7,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,44636.780791,48652.73082,2,91,6,6,1.78,3,3,0,0,1,2,62,2,3,3,NA +65191,7,2,2,30,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,13124.243834,13151.042447,1,93,15,6,2.3,6,1,0,0,0,1,34,2,5,5,NA +65192,7,2,2,52,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16677.13986,16814.569762,1,97,15,15,5,4,4,0,0,0,1,51,2,5,1,5 +65193,7,2,2,3,NA,4,4,2,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16749.124902,17181.448234,2,101,4,4,1.16,2,2,1,0,0,2,28,1,4,5,NA +65194,7,2,1,31,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,13936.822202,14121.672299,1,93,15,5,1.84,6,1,0,0,0,1,34,2,5,5,NA +65195,7,2,1,43,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,13232.135,13189.930654,2,99,15,15,4.9,7,7,1,4,0,2,53,1,5,1,5 +65196,7,2,2,1,22,3,3,2,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,41956.615141,45173.707461,1,91,14,14,2.44,7,7,2,4,0,1,33,1,5,1,5 +65197,7,2,1,6,NA,5,7,1,6,83,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15279.821652,15716.743409,1,102,6,6,1.34,4,4,2,1,0,2,27,1,4,3,NA +65198,7,2,1,32,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,34887.439952,34624.133414,2,94,7,7,1.34,5,5,2,1,0,1,32,2,1,1,NA +65199,7,2,2,51,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,17852.668137,18282.490034,3,91,7,7,1.33,6,6,0,0,2,2,51,2,5,1,5 +65200,7,2,1,10,NA,5,6,2,10,123,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7832.194045,8303.616222,1,96,15,15,5,4,4,0,2,0,2,41,2,5,1,5 +65201,7,2,2,77,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,56666.803206,58405.284869,1,101,15,15,5,3,3,0,0,2,1,79,1,4,1,4 +65202,7,2,1,45,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,1,4,NA,2,2,2,2,2,2,2,2,2,2,52941.648658,52164.811575,2,102,5,5,1.08,3,3,0,0,0,1,55,2,1,5,NA +65203,7,2,2,7,NA,3,3,2,7,88,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,70999.269152,75403.63644,1,93,15,15,5,4,4,0,2,0,1,51,1,3,1,5 +65204,7,2,1,15,NA,4,4,2,15,188,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12195.546462,12314.973739,2,97,6,6,0.92,7,7,1,4,0,2,29,1,3,5,NA +65205,7,2,1,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,166897.201244,171848.340208,2,91,7,7,1.61,4,4,0,0,3,1,65,1,3,6,NA +65206,7,2,2,6,NA,5,6,2,6,75,NA,NA,2,1,2,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10029.642884,10710.156898,2,91,14,14,3.69,4,4,1,1,0,1,53,1,4,1,5 +65207,7,2,2,11,NA,2,2,2,11,134,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,NA,15583.587534,16935.930722,1,93,14,14,3.06,5,5,0,2,0,1,46,2,1,1,4 +65208,7,2,2,5,NA,3,3,1,5,64,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,85197.465687,87881.529962,1,92,15,15,5,4,4,2,0,0,2,46,1,5,1,5 +65209,7,2,2,71,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,28297.225791,31371.640749,2,102,14,14,2.44,7,7,0,2,1,2,71,1,3,3,NA +65210,7,2,2,48,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,19333.302971,19445.46438,1,99,3,3,0.88,2,2,0,1,0,2,48,1,1,3,NA +65211,7,2,2,2,NA,4,4,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6293.31819,6860.614572,1,96,7,7,1,7,7,2,1,1,2,53,1,4,1,3 +65212,7,2,1,14,NA,5,6,1,14,170,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7291.654281,7756.692286,3,91,9,9,3.24,3,3,0,1,1,1,64,2,2,1,5 +65213,7,2,2,64,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,15369.196003,16553.883322,1,100,15,15,5,2,2,0,0,2,2,64,1,3,1,3 +65214,7,2,2,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,74517.751389,77393.175383,2,94,10,10,2.91,4,4,0,2,0,2,38,1,4,1,4 +65215,7,2,1,1,14,3,3,2,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,37534.976354,40372.682594,1,99,14,14,4.03,4,4,1,1,0,1,40,2,4,1,5 +65216,7,1,2,1,22,1,1,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9955.153132,0,2,94,4,4,0.72,4,4,1,1,0,1,30,2,1,1,3 +65217,7,2,2,22,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,60461.427433,62051.800421,2,92,2,2,0.4,3,3,0,0,0,1,50,2,4,1,4 +65218,7,2,1,74,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,1,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,9748.579573,13410.055152,3,90,13,3,0.79,3,2,0,0,2,1,74,2,1,1,NA +65219,7,2,1,2,NA,4,4,1,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6936.347746,7414.543244,1,98,10,10,2.59,5,5,2,1,0,1,45,1,5,1,5 +65220,7,2,1,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,1,2,NA,NA,NA,NA,9850.66662,10431.854777,2,98,3,3,1.1,1,1,0,0,1,1,80,1,1,2,NA +65221,7,2,2,2,NA,3,3,1,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,45222.58045,46647.274385,1,98,15,15,4.56,4,4,2,0,0,2,33,1,4,1,4 +65222,7,2,2,7,NA,1,1,2,7,90,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12958.860039,13685.571339,1,99,15,15,5,3,3,0,1,0,2,34,1,4,1,4 +65223,7,2,1,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,26556.735732,2,101,99,99,NA,2,1,0,0,0,1,21,1,4,5,NA +65224,7,1,1,58,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,16851.334496,0,2,95,3,3,0.95,2,2,0,0,1,2,65,1,2,3,NA +65225,7,2,2,34,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,16411.593279,16969.580129,2,103,15,15,5,2,2,0,0,0,2,39,2,5,5,NA +65226,7,2,2,3,NA,3,3,1,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,71938.505792,79397.866058,3,91,8,8,1.95,4,4,2,0,0,2,30,1,5,1,4 +65227,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,19831.16853,19892.1428,1,103,3,3,0.37,5,5,1,2,0,2,30,1,4,5,NA +65228,7,2,2,44,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,16237.004863,17542.96783,1,90,15,15,5,5,5,0,2,0,1,47,2,5,1,5 +65229,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,109181.566304,112638.448741,2,91,14,14,3.93,3,3,0,0,2,1,70,NA,NA,1,NA +65230,7,2,1,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,25128.435397,27949.568291,2,94,5,5,1.3,3,3,0,1,0,1,43,1,3,6,NA +65231,7,2,1,31,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,3,1,NA,2,2,2,2,2,2,2,2,2,2,34997.800447,36560.756135,2,96,6,6,1.25,4,4,1,1,0,1,31,2,3,1,3 +65232,7,2,2,0,4,1,1,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8775.375504,8590.751882,2,102,8,8,2.24,4,4,1,1,0,1,40,1,3,1,3 +65233,7,2,1,30,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,47932.213152,49254.617856,1,92,14,14,3.9,4,4,2,0,0,2,29,1,4,1,4 +65234,7,2,1,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,1,1,NA,1,1,2,1,2,2,NA,NA,NA,NA,26773.686592,29743.160504,2,98,99,99,NA,2,2,0,0,2,2,80,1,3,1,1 +65235,7,2,2,5,NA,2,2,1,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12541.254112,13845.88109,2,100,14,14,3.36,4,4,1,1,0,1,45,2,5,1,2 +65236,7,2,2,73,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,NA,12980.451176,13849.150613,1,96,7,7,3.58,1,1,0,0,1,2,73,1,2,3,NA +65237,7,2,2,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,53574.557692,55073.604958,1,90,4,1,0.27,2,1,0,0,0,2,25,1,5,6,NA +65238,7,2,1,19,NA,1,1,1,19,228,2,NA,2,2,4,15,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,29234.272259,29869.33924,3,92,4,4,0.46,7,7,1,2,0,2,31,2,2,1,1 +65239,7,2,2,60,NA,4,4,2,NA,NA,2,NA,2,2,8,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,15732.436891,16737.04767,1,97,3,3,0.9,1,1,0,0,1,2,60,2,4,2,NA +65240,7,2,1,73,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,1,1,2,1,1,2,1,NA,11066.512629,11628.392246,1,99,9,9,4.08,2,2,0,0,2,1,73,2,5,1,5 +65241,7,2,2,42,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,15147.212851,15611.778104,1,96,6,6,1.34,3,3,1,0,0,2,42,2,4,6,NA +65242,7,2,1,5,NA,1,1,1,5,65,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,1,1,2,1,NA,NA,NA,NA,17865.135763,18076.339981,3,92,6,6,0.96,5,5,2,1,0,2,26,2,1,1,1 +65243,7,2,2,35,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,49102.007191,48391.207475,3,92,10,10,3.77,3,3,0,1,0,2,52,1,4,6,NA +65244,7,2,2,49,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,18490.479848,19120.175119,2,100,4,4,0.91,3,3,0,0,0,2,49,1,2,1,2 +65245,7,1,2,2,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9008.172545,0,1,97,15,15,5,3,3,1,0,0,1,28,1,3,6,NA +65246,7,2,1,3,NA,3,3,1,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,95119.048777,111454.284969,1,100,6,6,1.78,3,3,1,1,0,2,35,1,5,4,NA +65247,7,2,2,8,NA,4,4,1,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10221.991799,10600.655937,1,98,6,6,1.36,3,3,0,2,0,1,35,1,5,3,NA +65248,7,2,2,67,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,118611.064701,118209.809508,1,91,14,14,5,2,2,0,0,2,1,68,1,4,1,4 +65249,7,2,2,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,53401.089129,55537.639072,2,101,99,2,0.73,3,1,0,0,0,2,22,1,4,5,NA +65250,7,2,1,41,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,29413.309667,30038.439043,1,94,3,3,0.9,1,1,0,0,0,1,41,1,3,5,NA +65251,7,2,2,10,NA,4,4,2,10,129,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9565.802332,9920.158544,1,96,14,14,2.58,6,6,2,2,0,1,40,2,4,1,4 +65252,7,2,2,6,NA,4,4,2,6,80,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9089.153301,9279.567325,1,93,1,1,0,2,2,0,1,0,2,38,1,4,3,NA +65253,7,2,2,3,NA,5,6,2,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4919.743181,5055.947351,1,97,7,7,1.48,5,5,1,2,0,1,40,2,5,1,4 +65254,7,2,2,2,NA,4,4,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5899.406975,6225.79867,2,103,7,7,1.55,5,5,2,2,0,2,31,1,4,3,NA +65255,7,2,1,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,24022.668025,24108.898567,1,99,10,10,5,1,1,0,0,0,1,52,1,5,3,NA +65256,7,2,2,36,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,85610.546667,92292.669073,2,91,15,15,5,3,3,1,0,0,1,37,1,5,1,5 +65257,7,2,1,13,NA,3,3,2,13,163,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,30100.326038,35344.542616,1,101,6,6,1.52,3,3,0,1,1,1,62,1,2,1,3 +65258,7,2,2,16,NA,1,1,1,17,204,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,21818.047789,22469.060223,2,102,7,7,2.16,3,3,0,2,0,2,41,1,5,3,NA +65259,7,2,2,5,NA,3,3,2,5,65,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,23388.579221,24855.504015,2,91,7,7,1.29,6,6,2,2,0,1,33,2,3,6,NA +65260,7,2,2,58,NA,5,6,1,NA,NA,2,NA,2,2,5,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,13354.133365,13424.749254,3,92,8,8,0.91,7,7,3,3,1,1,61,NA,NA,1,4 +65261,7,2,2,41,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,30932.175051,34795.188535,2,101,7,7,3.21,1,1,0,0,0,2,41,1,5,5,NA +65262,7,2,1,35,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,4,5,NA,1,2,2,1,2,1,1,2,2,1,20463.181443,21839.784829,1,93,7,7,2.38,2,2,0,0,0,2,46,2,3,5,NA +65263,7,2,2,13,NA,2,2,1,13,165,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20460.442471,21235.303768,2,102,14,14,2.44,7,7,0,2,1,2,71,1,3,3,NA +65264,7,2,1,40,NA,5,7,2,NA,NA,2,NA,2,2,3,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,26449.318243,30106.409363,1,90,77,77,NA,2,2,0,0,0,1,40,2,2,1,5 +65265,7,2,1,56,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,2,6,NA,2,2,2,1,2,2,NA,NA,NA,NA,24595.31055,24234.412281,1,97,3,3,0.5,5,5,0,2,0,1,56,2,2,6,NA +65266,7,2,2,55,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,24870.513993,25871.320138,3,92,7,7,2.45,2,2,0,0,1,2,55,1,1,1,1 +65267,7,2,2,37,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,14553.261149,15099.267285,2,92,15,15,5,3,3,1,0,0,2,37,1,5,1,5 +65268,7,2,2,20,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,4,5,2,1,2,2,1,2,2,1,2,2,3,16929.836231,17652.770039,2,101,12,8,4.82,2,1,0,0,0,2,21,NA,NA,5,NA +65269,7,2,1,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,22233.683089,22579.222386,1,102,5,1,0.21,5,4,1,1,0,2,24,1,4,5,NA +65270,7,2,1,24,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,34099.599202,34629.549705,1,94,5,5,1.47,2,2,0,0,0,1,24,1,4,1,4 +65271,7,2,2,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,36449.806171,39411.375474,1,91,6,6,1.07,6,6,3,1,0,2,27,1,4,6,NA +65272,7,2,2,1,19,4,4,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7618.827213,8513.998744,2,97,2,2,0.34,2,2,1,0,0,2,20,1,3,5,NA +65273,7,2,1,5,NA,1,1,1,5,65,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14098.200143,14544.470341,1,102,4,4,0.61,5,5,2,2,0,2,27,2,2,5,NA +65274,7,1,1,25,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,15435.176007,0,2,99,4,4,0.78,4,4,0,2,0,2,45,1,3,5,NA +65275,7,1,1,80,NA,3,3,NA,NA,NA,1,2,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,20611.860643,0,1,99,7,7,3.9,1,1,0,0,1,1,80,1,4,2,NA +65276,7,2,2,11,NA,4,4,1,11,133,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11593.230877,12022.691286,2,96,8,8,1.72,5,5,0,3,0,1,39,1,5,1,4 +65277,7,2,2,2,NA,3,3,2,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,44626.8774,45213.2896,1,92,14,14,4.59,3,3,1,0,0,1,31,1,4,1,5 +65278,7,2,2,13,NA,1,1,2,13,157,NA,NA,1,1,NA,8,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,18368.872199,20151.001259,2,94,6,6,1.34,4,4,0,2,0,1,37,2,4,1,2 +65279,7,2,2,6,NA,5,7,2,6,78,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19880.837381,19932.405693,1,95,3,3,0.45,4,4,2,1,0,2,26,1,3,4,NA +65280,7,2,1,62,NA,1,1,1,NA,NA,2,NA,2,1,8,NA,3,3,NA,2,2,2,1,2,2,NA,NA,NA,NA,14488.953694,16384.525762,3,92,1,1,0,6,6,0,3,1,1,62,2,3,3,NA +65281,7,2,2,8,NA,3,3,2,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,28912.545431,29050.529973,1,98,3,3,0.61,4,4,0,2,0,2,32,1,3,6,NA +65282,7,2,1,2,NA,1,1,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12387.68972,12183.594345,1,94,6,6,1.78,3,3,1,0,0,2,31,2,5,1,1 +65283,7,2,2,31,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,29102.738194,28317.485179,2,103,10,10,1.63,7,7,1,4,0,1,31,NA,NA,1,4 +65284,7,2,2,6,NA,5,6,2,6,77,NA,NA,2,2,1,0,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,5795.227223,6217.308223,1,91,14,14,4.32,3,3,0,2,0,2,37,2,5,1,NA +65285,7,2,1,44,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,2,1,NA,1,2,2,1,2,2,1,2,2,2,34153.424332,35777.45113,1,100,8,8,2.17,4,4,1,1,0,2,40,2,2,1,2 +65286,7,2,1,30,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,4,1,NA,1,2,2,1,2,1,1,2,2,1,20228.224986,20656.862173,2,102,15,15,3.82,5,5,0,1,2,1,60,2,2,1,1 +65287,7,2,1,1,15,5,7,1,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,26781.430467,29328.277521,1,102,6,6,1.34,4,4,2,1,0,2,27,1,4,3,NA +65288,7,2,2,41,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,3,2,1,2,2,1,2,2,1,2,2,1,37512.060155,37707.682357,1,92,14,14,5,2,2,0,1,0,2,41,1,5,3,NA +65289,7,2,2,73,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,17474.843163,17732.021804,1,98,6,6,1.57,3,3,0,0,2,1,66,2,2,1,4 +65290,7,2,2,0,8,4,4,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4439.36229,4707.453058,2,100,5,5,0.88,5,5,2,1,0,2,30,1,4,6,NA +65291,7,2,1,29,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,3,1,NA,2,2,2,2,2,2,2,2,2,2,35669.2076,37942.468331,2,94,9,9,2.51,4,4,2,0,0,2,34,2,3,1,3 +65292,7,2,1,3,NA,5,6,2,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7769.861689,8391.242047,1,101,8,8,1.81,5,5,2,0,1,2,37,2,4,1,2 +65293,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,10038.743429,10812.549167,1,96,6,6,1.83,2,2,0,0,1,2,64,1,3,2,NA +65294,7,2,2,15,NA,4,4,2,16,192,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11133.192774,11654.909373,3,91,2,2,0.25,4,4,0,2,0,2,35,1,3,5,NA +65295,7,2,2,17,NA,1,1,1,17,212,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18674.868029,19048.995585,2,92,8,8,1.42,7,7,0,4,0,2,37,1,1,6,NA +65296,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,38666.703155,42427.769217,2,94,9,9,3.97,2,2,0,0,2,1,80,1,5,1,5 +65297,7,2,1,72,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,3,1,NA,1,2,1,1,2,1,1,2,2,NA,12375.169389,14759.280437,2,92,4,4,1.14,2,2,0,0,2,1,72,2,3,1,3 +65298,7,2,1,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,140932.152825,151077.472396,1,97,14,14,3.36,4,4,0,2,0,2,49,1,5,1,5 +65299,7,2,1,29,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,94547.245282,99078.839738,2,94,15,10,5,2,1,0,0,0,1,29,1,4,6,NA +65300,7,2,1,1,16,3,3,2,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,50563.666669,55372.144903,1,95,9,9,2.68,4,4,2,0,0,2,27,1,4,1,4 +65301,7,2,2,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,10192.188896,10604.379638,1,99,8,8,4.13,1,1,0,0,1,2,62,1,2,3,NA +65302,7,2,1,11,NA,3,3,2,11,134,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21767.839187,23123.409471,1,101,5,5,1.28,3,3,0,2,0,2,44,1,5,3,NA +65303,7,2,2,19,NA,5,6,2,19,239,2,NA,2,2,2,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,7588.544207,7881.983727,3,91,4,4,0.69,5,5,0,2,0,1,45,2,4,1,1 +65304,7,2,1,43,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,30596.964094,35273.693641,2,102,14,14,5,1,1,0,0,0,1,43,1,4,3,NA +65305,7,2,2,16,NA,5,7,2,16,197,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6386.337576,6452.948976,1,93,15,15,4.59,4,4,0,2,0,2,45,1,5,1,5 +65306,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,30802.731848,31491.045952,1,100,6,6,1.18,5,5,1,2,0,2,30,1,3,1,4 +65307,7,2,1,3,NA,3,3,1,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27608.061058,31148.453885,1,94,6,6,1.18,5,5,1,2,0,1,30,1,3,1,3 +65308,7,2,2,54,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,6,NA,2,2,2,2,2,2,2,2,1,2,25483.560748,25945.701567,1,94,12,3,1.07,4,1,0,0,1,2,37,NA,NA,6,NA +65309,7,1,1,77,NA,1,1,NA,NA,NA,1,1,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,14200.083364,0,2,98,3,3,1.1,1,1,0,0,1,1,77,1,2,2,NA +65310,7,2,1,40,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,27356.080541,28721.124083,1,101,6,6,1.21,4,4,0,2,0,2,33,1,2,6,NA +65311,7,2,2,7,NA,1,1,1,7,87,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15510.382876,16516.802792,1,100,8,8,2.17,4,4,1,1,0,2,40,2,2,1,2 +65312,7,2,1,2,NA,5,7,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,26781.430467,29328.277521,1,102,6,6,1.34,4,4,2,1,0,2,27,1,4,3,NA +65313,7,2,1,3,NA,4,4,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6946.177172,7233.19413,2,90,4,4,0.57,5,5,1,2,0,2,33,2,2,77,NA +65314,7,2,2,16,NA,4,4,1,17,204,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12208.965913,12707.508076,2,100,6,6,0.99,5,5,0,3,0,2,40,1,3,1,3 +65315,7,2,1,7,NA,2,2,2,7,85,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12049.155217,12263.670489,2,90,14,14,3.58,4,4,1,1,0,1,37,1,3,1,4 +65316,7,2,1,54,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,26127.59163,27234.881102,1,100,15,15,5,4,4,0,0,0,1,54,1,5,1,5 +65317,7,2,2,26,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,15267.012422,14516.051827,1,99,6,6,0.6,7,7,2,1,1,2,69,1,3,2,NA +65318,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,51433.469947,56330.524019,1,98,5,5,1.97,1,1,0,0,1,2,80,1,4,2,NA +65319,7,2,1,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,11849.199361,13959.374637,1,100,4,4,1.16,2,2,0,0,2,1,73,1,3,1,3 +65320,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,109181.566304,114466.270601,2,91,15,3,0.98,7,1,0,0,1,1,49,NA,NA,5,NA +65321,7,2,1,7,NA,2,2,1,7,90,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,14820.807433,15173.085782,2,102,6,6,1.12,4,4,1,1,0,1,38,2,2,1,3 +65322,7,2,2,23,NA,2,2,2,NA,NA,2,NA,2,1,4,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,30253.427014,31766.413225,2,90,6,6,1.35,3,3,1,0,0,1,31,1,3,1,4 +65323,7,2,1,0,11,2,2,2,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6266.985765,6267.101277,1,93,15,15,5,3,3,1,0,0,2,32,2,5,1,5 +65324,7,2,2,0,3,1,1,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7011.218293,6988.118839,2,97,8,8,2.01,4,4,2,0,0,2,24,1,4,1,1 +65325,7,2,2,73,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,72443.10981,74665.593008,2,102,7,7,2.72,2,2,0,0,1,2,73,1,4,2,NA +65326,7,2,2,44,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,1,1,2,1,2,2,1,2,2,NA,NA,NA,NA,12544.244874,13448.569073,1,102,5,5,0.92,5,5,1,2,0,2,44,2,1,1,2 +65327,7,2,1,38,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,18544.003944,22007.065507,2,97,NA,99,NA,7,6,2,1,1,2,56,1,3,5,NA +65328,7,2,1,63,NA,2,2,2,NA,NA,2,NA,2,2,7,NA,4,5,NA,2,2,2,2,2,2,2,2,2,2,10903.483462,11078.143343,1,93,2,2,0.43,2,2,0,0,2,2,80,2,1,2,NA +65329,7,2,1,21,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,43287.255521,44006.168052,1,90,6,6,1.57,3,3,0,0,0,1,50,2,2,1,2 +65330,7,2,2,0,9,3,3,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8814.114917,8994.318513,1,95,6,6,0.81,6,6,2,2,0,1,30,1,3,1,4 +65331,7,2,2,54,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,16741.034883,16282.872546,2,95,2,2,0.81,1,1,0,0,0,2,54,1,4,5,NA +65332,7,2,1,7,NA,1,1,1,7,95,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14880.007592,15211.324369,3,92,2,2,0.47,3,3,1,1,0,2,33,1,4,5,NA +65333,7,2,2,2,NA,5,6,1,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,1,NA,NA,NA,NA,7029.864692,7462.832472,3,92,77,77,NA,7,7,2,4,1,1,62,NA,NA,1,NA +65334,7,2,2,60,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,2,3,NA,2,2,2,2,2,2,2,2,2,2,13676.984152,18290.186018,2,91,2,2,0.75,1,1,0,0,1,2,60,2,2,3,NA +65335,7,2,1,55,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,14288.99518,14503.461604,1,102,9,9,2.39,5,5,0,1,1,1,55,2,5,1,5 +65336,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,124037.944076,128061.498173,1,100,15,15,5,4,4,0,1,0,1,50,1,4,1,4 +65337,7,2,1,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,28478.57859,28745.66908,1,98,3,3,1.25,1,1,0,0,1,1,63,1,4,5,NA +65338,7,2,2,22,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,16239.242782,17454.103497,2,101,99,1,0.22,3,1,0,0,0,2,22,1,4,5,NA +65339,7,2,1,60,NA,2,2,1,NA,NA,2,NA,2,2,7,NA,4,6,NA,2,2,2,1,2,2,2,2,2,2,9911.173561,10104.337319,2,92,15,10,5,2,1,0,0,1,2,54,2,5,3,NA +65340,7,2,2,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,71351.478679,72494.161504,2,97,15,15,4.97,5,5,1,0,0,1,48,1,4,1,3 +65341,7,2,1,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,116464.874823,118336.754883,2,94,14,14,5,3,3,0,0,0,1,42,1,5,1,5 +65342,7,2,2,2,NA,4,4,2,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6306.491784,7047.470907,2,90,5,5,1.08,3,3,1,1,0,2,23,1,2,5,NA +65343,7,2,2,4,NA,3,3,2,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,40290.055403,42817.035801,1,99,15,15,5,5,5,3,0,0,2,34,1,5,1,5 +65344,7,2,1,60,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,9745.303498,10208.81789,2,98,6,6,0.63,7,7,2,2,1,1,60,1,3,1,2 +65345,7,2,2,1,13,4,4,2,NA,13,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7734.994032,8078.467013,2,95,1,1,0,4,4,2,1,0,2,27,1,4,5,NA +65346,7,2,2,49,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18490.479848,18110.536811,2,100,14,14,3.06,5,5,1,0,0,1,50,1,5,1,5 +65347,7,2,2,30,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,17983.231494,18657.922524,3,91,10,10,3.67,3,3,1,0,0,2,30,2,4,1,4 +65348,7,2,2,45,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,99120.116925,101722.54619,2,98,15,15,5,3,3,0,1,0,1,56,1,5,1,5 +65349,7,2,1,19,NA,2,2,1,19,231,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,17555.907575,18699.509115,2,93,3,3,0.52,5,5,0,2,0,1,41,2,4,1,4 +65350,7,2,1,6,NA,4,4,1,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11321.293503,11517.688596,1,98,6,6,1.36,3,3,0,2,0,1,35,1,5,3,NA +65351,7,2,1,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,21171.267805,24852.610883,1,97,5,5,1.79,1,1,0,0,0,1,36,1,3,3,NA +65352,7,2,1,53,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,3,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,11240.290931,11199.684593,2,92,12,77,NA,7,2,0,0,2,1,53,2,3,1,3 +65353,7,2,2,42,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,18490.479848,18110.536811,2,100,15,15,4.97,5,5,0,2,1,2,42,1,5,1,5 +65354,7,2,1,13,NA,3,3,2,14,169,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,127790.772987,128099.355719,1,97,15,15,5,4,4,0,2,0,1,49,1,5,1,5 +65355,7,2,1,37,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,19799.565045,19517.338151,1,90,5,5,0.74,5,5,0,2,0,2,18,1,4,NA,NA +65356,7,2,1,8,NA,3,3,1,8,103,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24713.905595,26080.214484,1,98,5,5,0.74,5,5,0,3,0,1,35,1,2,6,NA +65357,7,2,2,7,NA,4,4,1,7,94,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8504.389189,8822.229593,2,93,5,5,0.74,5,5,1,2,0,2,28,1,2,6,NA +65358,7,2,2,45,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,115926.402585,116948.004168,1,91,10,10,3.8,3,3,0,0,0,1,45,NA,NA,1,4 +65359,7,2,1,14,NA,2,2,2,14,175,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,25622.388537,25620.13505,1,97,15,15,4.52,6,6,0,4,0,2,41,1,5,1,5 +65360,7,2,2,17,NA,3,3,2,17,205,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,NA,NA,NA,1,2,2,1,111142.989658,114400.666347,1,95,NA,NA,NA,5,5,0,2,0,2,37,1,3,1,NA +65361,7,2,1,11,NA,4,4,1,11,138,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8447.917731,9106.89532,1,102,15,15,5,3,3,0,1,0,1,41,1,5,1,5 +65362,7,2,2,11,NA,2,2,2,11,142,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,15148.721588,15796.67129,2,91,2,2,0.22,4,4,0,3,0,2,45,2,5,4,NA +65363,7,2,1,4,NA,1,1,1,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16775.083123,16797.750213,2,98,14,14,3.91,4,4,1,1,0,1,36,2,3,1,5 +65364,7,2,2,8,NA,1,1,2,8,98,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,9872.244853,10294.506103,2,90,6,6,1.15,5,5,0,2,0,2,47,2,1,1,5 +65365,7,2,2,17,NA,4,4,1,17,205,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11078.202838,10933.134393,1,102,12,12,NA,7,7,3,2,0,2,52,1,4,5,NA +65366,7,2,1,60,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,3,1,NA,2,2,2,1,2,2,2,2,2,2,6449.12882,6755.867756,2,93,8,8,2.97,2,2,0,0,1,1,60,2,3,1,3 +65367,7,2,1,10,NA,2,2,2,10,127,NA,NA,2,2,2,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,16281.509392,17100.217351,2,91,4,4,0.43,7,7,0,1,1,1,41,2,1,4,NA +65368,7,2,1,23,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,27681.749998,29590.732776,2,98,3,3,0.54,3,3,1,0,0,1,23,1,3,1,2 +65369,7,2,1,3,NA,3,3,2,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,71085.311148,83293.12191,2,94,14,14,3.36,4,4,2,0,0,1,31,1,3,1,5 +65370,7,2,2,28,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,112992.533921,121615.449738,1,94,6,6,1.57,3,3,0,1,0,2,28,1,4,1,4 +65371,7,2,1,10,NA,4,4,1,10,121,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12552.995979,12770.757918,2,96,4,4,0.65,5,5,0,3,0,1,30,1,4,1,2 +65372,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,4,3,1,2,2,1,2,2,1,2,2,1,27367.658704,37250.756368,1,91,6,6,2.04,2,2,1,0,0,2,31,1,5,4,NA +65373,7,2,2,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,154825.466557,160454.355913,1,91,15,15,5,4,4,0,1,0,1,45,1,5,1,5 +65374,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,32415.779658,36870.322177,2,91,4,4,1.02,2,2,0,0,2,1,80,1,4,1,2 +65375,7,2,2,52,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,150482.742079,154420.406935,2,98,15,15,5,2,2,0,0,0,2,52,1,3,1,4 +65376,7,1,1,2,24,1,1,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12493.910388,0,2,98,5,5,1.63,2,2,1,0,0,2,31,1,1,3,NA +65377,7,2,1,19,NA,4,4,1,19,232,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13276.485807,13770.209619,1,96,4,4,1.29,2,2,0,0,0,2,48,1,4,3,NA +65378,7,2,2,3,NA,5,6,2,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4938.043177,5022.237557,1,91,15,15,5,4,4,1,1,0,1,43,2,5,1,5 +65379,7,2,2,12,NA,3,3,1,12,155,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,39616.634313,40360.331753,2,101,3,3,0.6,3,3,0,2,0,1,39,1,4,4,NA +65380,7,2,2,17,NA,5,6,1,17,205,2,NA,2,2,1,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9884.956101,9995.989819,1,94,99,1,0.32,6,1,0,3,0,2,45,NA,NA,6,NA +65381,7,2,2,9,NA,4,4,1,9,114,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9453.111053,10094.338553,1,100,6,6,1.39,4,4,0,3,0,2,29,1,4,5,NA +65382,7,1,1,4,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6946.177172,0,2,90,6,6,1.03,6,6,3,1,0,1,45,2,2,1,2 +65383,7,2,1,15,NA,1,1,2,16,193,NA,NA,2,2,2,9,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,20398.562455,20202.582308,2,94,77,77,NA,3,3,0,1,0,2,42,2,2,6,NA +65384,7,2,2,72,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,3,5,NA,1,2,2,1,2,2,1,2,2,NA,20267.042371,20898.557176,2,102,7,7,1.68,5,5,0,0,3,1,70,2,4,1,4 +65385,7,2,2,12,NA,5,6,2,12,149,NA,NA,2,1,99,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11975.458482,12414.350614,1,97,14,14,2.29,7,7,1,2,2,1,40,2,1,1,1 +65386,7,2,1,6,NA,1,1,1,6,83,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,19735.224235,20204.314213,2,102,8,8,2.24,4,4,1,1,0,1,35,2,3,1,1 +65387,7,1,2,34,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,3,3,1,2,2,1,2,2,NA,NA,NA,NA,71034.153987,0,1,98,7,7,1,7,7,2,2,0,2,34,1,4,3,NA +65388,7,2,1,33,NA,4,4,2,NA,NA,2,NA,2,1,3,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,23454.081246,24179.640416,1,91,99,99,NA,3,3,1,0,0,1,33,2,5,5,NA +65389,7,2,1,32,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,2,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,13593.59406,16516.502952,2,90,77,77,NA,4,3,1,0,0,2,30,2,2,5,NA +65390,7,2,2,2,NA,5,6,1,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6796.893869,7396.552454,2,93,15,15,5,3,3,1,0,0,1,41,2,5,1,5 +65391,7,2,1,51,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,1,1,2,1,3,11389.795225,13662.741278,2,92,8,8,1.91,5,5,0,2,1,2,47,2,1,1,3 +65392,7,2,2,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,17635.020067,18381.689068,2,97,6,6,1.02,6,6,1,2,0,1,37,1,3,1,3 +65393,7,2,1,78,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,15844.527135,16445.651972,1,101,4,4,1.22,2,2,0,0,2,2,77,1,4,1,3 +65394,7,2,2,71,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,28175.708676,28476.609384,1,91,4,4,1.7,1,1,0,0,1,2,71,1,4,2,NA +65395,7,2,1,77,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,NA,14537.797299,15440.503889,3,91,2,2,0.83,1,1,0,0,1,1,77,1,5,5,NA +65396,7,2,1,54,NA,5,6,1,NA,NA,2,NA,2,7,4,NA,5,3,NA,1,2,1,1,2,1,1,2,1,NA,12983.367248,13800.313434,2,101,77,77,NA,4,1,0,0,0,1,39,NA,NA,5,NA +65397,7,1,1,65,NA,5,6,NA,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,12579.986433,0,1,95,3,3,0.43,4,4,0,1,2,1,65,2,5,1,3 +65398,7,2,2,80,NA,5,6,1,NA,NA,2,NA,2,1,9,NA,4,2,NA,1,2,2,1,2,2,1,1,2,NA,18693.365067,19341.691824,1,92,12,12,NA,1,1,0,0,1,2,80,2,4,2,NA +65399,7,2,1,59,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,18753.573091,18839.996658,2,99,15,15,4.34,4,4,0,0,0,1,59,2,4,1,5 +65400,7,2,1,16,NA,1,1,1,16,197,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,22768.423624,22886.980387,3,92,7,7,1.41,5,5,1,2,0,1,40,1,3,1,4 +65401,7,2,1,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,31291.360507,35666.138476,2,91,4,4,1.4,1,1,0,0,0,1,55,1,4,5,NA +65402,7,2,1,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,99283.360764,105284.943086,1,94,15,15,5,4,3,0,0,1,1,33,1,2,5,NA +65403,7,2,1,52,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,4,NA,1,2,2,1,2,2,1,2,2,1,31291.360507,35666.138476,2,91,3,3,1.16,1,1,0,0,0,1,52,1,1,4,NA +65404,7,2,2,27,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,5,5,2,1,2,2,1,2,2,1,2,2,3,16929.836231,18593.694637,2,101,8,6,2.85,2,1,0,0,0,2,27,2,5,5,NA +65405,7,2,1,17,NA,1,1,1,17,211,2,NA,1,1,NA,12,NA,NA,NA,2,2,2,1,2,2,1,2,2,1,18635.323223,19040.145288,1,103,6,6,1.57,3,3,0,1,0,2,50,2,3,4,NA +65406,7,2,1,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,104488.914565,106745.836574,1,98,3,1,0.22,4,1,0,0,0,1,20,1,4,5,NA +65407,7,2,1,46,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,18790.641284,22550.069226,2,100,6,6,0.99,5,5,0,3,0,2,40,1,3,1,3 +65408,7,2,2,19,NA,2,2,2,19,233,2,NA,2,2,4,10,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,12680.621719,14355.413798,2,90,3,3,0.46,5,5,0,2,2,1,75,2,1,1,2 +65409,7,2,1,5,NA,4,4,2,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7701.757497,8492.447039,2,90,2,2,0.38,4,4,1,2,0,2,32,1,4,5,NA +65410,7,2,2,71,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,5,2,NA,1,2,1,1,2,2,1,2,1,NA,13838.805939,14318.765975,1,103,15,15,3.7,5,5,0,2,1,1,55,1,5,1,5 +65411,7,2,2,16,NA,5,6,1,17,204,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8391.252153,8615.174176,1,92,8,8,2.3,4,4,0,1,0,2,41,NA,NA,1,3 +65412,7,2,1,41,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,18898.73153,19305.900504,1,100,8,8,2.62,3,3,0,1,0,1,41,2,5,1,5 +65413,7,2,2,1,17,2,2,2,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8214.128831,8731.974448,1,99,8,8,1.91,5,5,2,0,1,2,38,2,4,1,4 +65414,7,2,2,6,NA,5,6,1,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8290.163782,8692.019106,1,92,15,15,5,4,4,0,2,0,1,55,1,5,1,5 +65415,7,2,2,1,22,3,3,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14349.194039,14956.025029,1,103,6,6,1.11,5,5,1,1,1,1,29,1,3,1,3 +65416,7,2,1,23,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,18088.213601,18580.673153,1,98,12,5,2.15,2,1,0,0,0,1,23,2,5,5,NA +65417,7,2,2,75,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,4,NA,1,2,2,1,2,2,1,2,2,NA,29145.675285,31446.34406,3,92,10,10,2.82,4,4,0,1,1,1,36,1,3,1,5 +65418,7,2,2,7,NA,4,4,2,7,86,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7282.523598,7776.514874,2,99,6,6,1.11,5,5,0,4,0,2,34,1,4,5,NA +65419,7,2,1,19,NA,3,3,1,19,232,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,33505.877646,33253.482687,2,101,5,3,1.1,2,1,0,0,0,1,19,1,4,NA,NA +65420,7,2,2,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,22306.465066,21209.244743,2,98,5,5,0.59,7,7,3,0,0,2,50,1,5,4,NA +65421,7,2,2,70,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,27103.875166,29389.100868,1,95,4,4,0.65,4,4,0,0,1,2,70,1,1,2,NA +65422,7,2,1,7,NA,4,4,1,7,94,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9535.948,10279.797103,2,93,4,4,0.56,5,5,2,1,0,1,27,1,2,6,NA +65423,7,2,1,50,NA,1,1,2,NA,NA,2,NA,2,2,7,NA,1,4,NA,1,2,2,1,2,2,1,2,2,1,21506.056514,21429.79108,2,95,6,6,2.24,1,1,0,0,0,1,50,2,1,4,NA +65424,7,2,1,2,NA,5,6,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8821.037035,9893.758146,1,102,15,15,4.59,4,4,1,1,0,1,35,1,5,1,5 +65425,7,2,2,2,NA,5,7,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21940.920626,22868.807624,1,95,6,6,1.15,5,5,2,1,0,1,29,1,4,6,NA +65426,7,2,2,4,NA,3,3,2,4,58,NA,NA,2,2,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,40064.649797,41758.993816,1,93,10,10,2.48,5,5,2,1,0,1,40,2,5,1,5 +65427,7,2,2,64,NA,3,3,2,NA,NA,2,NA,2,1,7,NA,1,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,36910.075958,37362.049791,2,97,2,2,0.38,4,4,0,2,2,2,64,2,1,1,NA +65428,7,2,2,8,NA,5,6,1,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9007.62445,9504.796896,2,102,15,15,3.92,5,5,1,2,0,1,34,2,5,1,5 +65429,7,2,1,17,NA,2,2,2,17,213,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16033.31661,16386.200415,2,90,5,5,0.8,5,5,0,3,0,2,40,2,1,5,NA +65430,7,2,2,34,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,3,1,2,2,2,2,2,2,2,2,2,2,2,35353.005268,36099.35979,2,94,9,9,2.51,4,4,2,0,0,2,34,2,3,1,3 +65431,7,2,1,73,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,NA,8497.912951,8664.367847,2,100,4,4,1.47,1,1,0,0,1,1,73,1,4,4,NA +65432,7,1,2,70,NA,4,4,NA,NA,NA,2,NA,2,2,6,NA,4,2,NA,1,2,1,1,2,1,NA,NA,NA,NA,12344.929687,0,1,93,2,2,0.78,1,1,0,0,1,2,70,2,4,2,NA +65433,7,2,2,60,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,34073.955911,34748.968506,2,92,14,14,4.03,4,4,1,1,1,2,30,1,5,4,NA +65434,7,2,2,34,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,1,2,1,2,2,1,2,2,1,2,2,3,15494.204758,16562.601332,2,95,15,15,5,3,3,0,1,0,2,34,2,5,1,NA +65435,7,2,2,4,NA,4,4,1,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8340.858072,8802.325948,2,103,7,7,1.55,5,5,2,2,0,2,31,1,4,3,NA +65436,7,2,1,28,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,15196.92397,16091.373745,2,101,7,3,1.1,3,1,0,0,0,1,21,2,4,5,NA +65437,7,1,1,73,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,14077.240235,0,1,97,5,5,1.84,1,1,0,0,1,1,73,1,4,2,NA +65438,7,2,2,3,NA,4,4,2,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8833.042831,9629.276723,1,99,10,10,2.07,7,7,2,3,1,2,35,1,5,4,NA +65439,7,2,2,15,NA,2,2,2,15,180,NA,NA,2,1,3,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16189.692833,16766.382859,1,93,15,15,2.96,7,7,0,1,1,2,18,1,2,NA,NA +65440,7,2,1,61,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,7323.703412,7380.991723,2,100,8,8,2.67,3,3,0,0,1,1,61,1,3,1,4 +65441,7,2,2,10,NA,3,3,1,10,121,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22308.590534,22137.463018,1,95,7,2,0.35,5,4,1,2,0,1,26,1,4,6,NA +65442,7,2,1,2,NA,4,4,2,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5942.817425,6188.375425,2,97,13,13,NA,6,6,2,2,0,2,24,1,2,6,NA +65443,7,2,2,37,NA,5,6,2,NA,NA,2,NA,2,2,1,NA,5,1,2,1,2,1,1,2,1,1,2,1,3,17978.142628,18308.053591,1,91,14,14,4.32,3,3,0,2,0,2,37,2,5,1,NA +65444,7,2,1,4,NA,1,1,1,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12870.245769,13452.024092,1,102,13,13,NA,7,7,3,1,2,2,62,2,1,1,2 +65445,7,2,1,1,22,2,2,1,NA,24,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8742.6471,9019.39043,2,100,99,99,NA,6,6,1,1,2,1,37,2,3,1,3 +65446,7,2,1,65,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,8168.47072,10568.054811,2,90,6,6,1.73,2,2,0,0,2,2,69,1,2,1,2 +65447,7,2,1,44,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,18841.134943,20530.600583,2,95,14,14,5,2,2,0,0,0,1,44,1,3,1,4 +65448,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,10346.035773,10764.448363,1,99,13,13,NA,3,3,0,0,2,1,67,1,2,1,2 +65449,7,2,2,2,NA,4,4,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7630.869112,8210.900286,2,95,6,6,0.97,6,6,2,2,0,1,37,1,3,1,4 +65450,7,2,2,49,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,2,6,NA,2,2,2,2,2,2,2,2,2,2,34954.173075,37266.073044,2,98,6,6,1.21,4,4,1,0,0,2,49,2,2,6,NA +65451,7,2,2,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,87322.7576,94138.522454,2,91,15,15,4.2,6,6,2,0,2,1,63,1,1,1,3 +65452,7,2,2,16,NA,4,4,2,16,203,NA,NA,2,2,3,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10694.834447,10900.381844,1,90,4,4,0.58,6,6,0,3,0,2,21,2,5,5,NA +65453,7,2,2,67,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,1,2,1,1,2,2,NA,NA,NA,NA,12823.794396,13219.948623,2,100,4,4,0.44,7,7,1,2,2,1,71,2,1,1,1 +65454,7,2,2,36,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,3,2,1,2,2,1,2,2,1,2,2,1,11329.574648,11407.369464,1,99,10,10,4.76,2,2,1,0,0,2,36,2,5,3,NA +65455,7,2,2,14,NA,3,3,2,14,175,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,101168.631125,108085.531312,1,99,6,6,1.52,4,4,0,2,0,2,43,1,3,5,NA +65456,7,2,2,12,NA,1,1,2,13,156,NA,NA,2,2,3,7,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,16594.391299,17940.930507,3,91,6,6,0.89,7,7,1,1,0,1,59,2,1,1,1 +65457,7,2,1,25,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,13367.406737,13987.969236,1,92,15,15,4.44,5,5,0,0,1,1,65,NA,NA,1,5 +65458,7,2,1,63,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,152458.779794,153888.636628,2,94,15,15,5,2,2,0,0,2,1,63,1,5,1,5 +65459,7,2,2,2,NA,4,4,2,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7348.24433,7537.914984,2,95,1,1,0.26,2,2,1,0,0,2,20,1,2,77,NA +65460,7,2,2,64,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,2,2,NA,2,2,2,1,2,2,1,2,2,1,7608.893707,8232.851326,2,100,5,5,2.11,1,1,0,0,1,2,64,2,2,2,NA +65461,7,2,1,50,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,23775.734331,24432.76962,2,96,5,5,1.08,3,3,0,0,0,1,50,1,3,1,4 +65462,7,2,2,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,149120.841984,149985.960772,1,98,9,9,5,1,1,0,0,0,2,49,1,5,3,NA +65463,7,2,2,6,NA,4,4,2,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7814.742747,8587.431439,2,95,7,7,1.55,5,5,0,3,0,1,30,1,4,1,4 +65464,7,2,2,29,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,18601.70604,17686.717051,1,90,14,14,2.96,5,5,1,2,0,1,31,1,5,1,4 +65465,7,2,2,7,NA,5,6,1,7,89,NA,NA,2,1,3,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11141.059365,11755.98595,1,98,10,10,1.89,7,7,3,2,0,1,50,1,5,1,5 +65466,7,2,1,7,NA,4,4,1,7,94,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12221.817539,12310.445459,2,100,10,10,2.75,5,5,1,1,1,1,27,1,3,1,5 +65467,7,2,1,0,6,5,6,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9148.090461,9086.623516,1,92,6,6,1.47,3,3,1,0,0,2,32,2,3,1,3 +65468,7,2,1,7,NA,4,4,2,7,85,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8655.162127,8808.105516,2,95,4,4,0.65,4,4,1,2,0,2,27,1,3,5,NA +65469,7,2,1,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,174520.785302,180467.973037,1,95,77,77,NA,3,3,0,0,2,1,80,NA,NA,1,NA +65470,7,2,1,4,NA,1,1,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14321.363328,13945.949794,2,96,77,77,NA,7,7,3,2,0,2,33,2,2,6,NA +65471,7,1,2,80,NA,5,6,NA,NA,NA,2,NA,2,2,7,NA,5,2,NA,1,1,1,1,2,1,NA,NA,NA,NA,10831.995402,0,3,90,4,4,0.92,3,3,0,0,1,2,56,2,2,1,2 +65472,7,2,1,3,NA,2,2,1,3,38,NA,NA,2,2,2,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13504.027725,14468.59111,2,93,7,7,1.52,4,4,1,1,0,1,44,2,4,1,NA +65473,7,2,2,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,8308.628726,9015.10987,2,95,2,2,0.81,1,1,0,0,1,2,60,1,2,2,NA +65474,7,2,2,63,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,10644.199654,11088.440242,2,98,7,7,2.72,2,2,0,0,2,1,63,1,4,1,4 +65475,7,2,2,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,15520.204559,18173.553405,1,96,15,15,5,6,6,1,1,1,2,44,1,3,1,3 +65476,7,2,2,32,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,38151.592121,38761.146216,1,101,1,1,0.21,3,3,0,2,0,2,32,1,4,5,NA +65477,7,2,2,50,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,3,3,NA,1,2,2,1,2,2,1,2,2,2,26609.95229,27262.621072,1,93,15,15,2.96,7,7,0,1,1,2,18,1,2,NA,NA +65478,7,2,2,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,9680.216878,10112.428208,1,96,10,10,5,1,1,0,0,1,2,61,1,4,2,NA +65479,7,2,1,51,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,21283.272729,21364.140181,1,100,15,15,3.7,5,5,0,3,0,1,51,1,5,1,5 +65480,7,2,1,10,NA,4,4,2,10,121,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8988.052978,9133.633722,2,95,8,8,1.85,5,5,1,2,0,1,55,1,2,1,3 +65481,7,2,2,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,12782.405209,13767.697696,1,100,2,2,0.54,1,1,0,0,1,2,62,1,3,2,NA +65482,7,1,1,78,NA,3,3,NA,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,81678.388584,0,1,92,9,9,3.97,2,2,0,0,2,2,67,1,3,1,5 +65483,7,1,2,52,NA,3,3,NA,NA,NA,2,NA,2,1,8,NA,5,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,119276.206757,0,1,103,15,15,5,2,2,0,1,0,2,52,2,5,3,NA +65484,7,2,1,1,14,5,7,2,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6328.720733,6765.026101,1,91,15,15,4.01,5,5,1,2,0,2,34,1,5,1,5 +65485,7,2,1,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,30407.489838,32596.441724,1,97,6,6,1.03,6,6,2,2,0,2,38,1,5,1,4 +65486,7,2,2,66,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,10346.035773,10598.2543,1,99,7,6,1.84,3,2,0,0,2,1,70,1,2,1,4 +65487,7,2,2,1,18,5,6,2,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8173.816615,8165.183174,1,97,14,14,4.45,3,3,1,0,0,2,35,2,5,1,5 +65488,7,2,1,38,NA,3,3,2,NA,NA,2,NA,2,1,3,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15425.274863,16733.462521,1,90,7,7,2.1,3,3,1,0,0,2,40,2,5,1,4 +65489,7,2,1,18,NA,4,4,2,18,216,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10558.572325,11042.564542,2,99,4,4,0.41,7,7,0,2,0,2,36,1,3,5,NA +65490,7,2,2,20,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,1,1,2,2,1,2,2,NA,NA,NA,NA,22225.098465,28674.927734,2,97,1,1,0.27,2,2,1,0,0,2,20,1,2,5,NA +65491,7,2,2,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,NA,NA,NA,NA,24654.107413,23891.769846,1,100,5,5,0.85,5,5,0,2,0,2,54,1,2,2,NA +65492,7,2,2,65,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,1,2,1,1,2,1,1,2,1,3,11342.714991,11512.583626,1,99,6,6,1.07,6,6,2,1,2,1,44,2,5,4,NA +65493,7,2,1,9,NA,5,7,2,9,109,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9720.482616,10484.39038,2,91,15,15,3.7,5,5,1,2,0,1,50,NA,NA,1,5 +65494,7,2,1,3,NA,3,3,2,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,88790.489317,95503.197037,2,91,15,15,5,3,3,1,0,0,1,40,1,4,6,NA +65495,7,2,2,5,NA,1,1,1,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11859.546176,12772.78418,1,102,13,13,NA,7,7,3,1,2,2,62,2,1,1,2 +65496,7,2,1,5,NA,1,1,1,5,64,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16613.593777,17139.487274,1,92,14,14,3.15,5,5,1,2,0,1,34,1,4,1,4 +65497,7,2,1,42,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,116464.874823,129540.216925,2,94,3,3,0.54,4,4,0,1,0,2,48,1,3,1,3 +65498,7,2,2,39,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,4,2,1,2,2,1,2,2,1,2,2,1,16369.013285,16419.342576,3,91,5,5,0.65,7,7,0,4,0,2,39,1,3,4,NA +65499,7,2,1,19,NA,3,3,2,19,231,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,19277.921327,19518.221274,2,97,7,7,1.89,3,3,0,0,0,1,50,1,2,1,2 +65500,7,2,1,46,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,17787.524589,17931.023329,2,100,8,8,4.82,1,1,0,0,0,1,46,1,3,3,NA +65501,7,2,2,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,53634.754806,54437.113731,1,98,5,1,0,3,1,0,0,0,1,32,1,5,5,NA +65502,7,2,1,1,16,4,4,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9438.902193,9727.203655,2,102,4,4,0.72,4,4,2,0,0,1,48,1,3,1,3 +65503,7,2,2,44,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,2,1,2,2,2,2,2,2,2,2,2,2,2,33767.584626,34595.8104,2,102,6,4,1.02,6,2,0,4,0,2,43,2,1,5,NA +65504,7,2,1,7,NA,1,1,1,7,86,NA,NA,2,2,2,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13870.762641,14200.45921,2,102,7,7,1.33,6,6,1,3,0,1,34,2,2,1,1 +65505,7,2,1,55,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17839.845235,18404.710221,3,92,15,15,5,3,3,0,1,0,1,55,2,5,1,4 +65506,7,2,1,50,NA,1,1,1,NA,NA,2,NA,99,NA,NA,NA,2,5,NA,2,2,2,1,2,2,1,2,2,2,30839.213846,30386.695922,1,95,4,4,0.68,5,5,0,1,0,2,38,2,3,4,NA +65507,7,2,2,13,NA,2,2,2,13,158,NA,NA,2,1,3,5,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,15442.648697,16179.397194,3,90,4,4,0.63,5,5,0,3,0,1,45,2,4,1,4 +65508,7,2,1,31,NA,2,2,2,NA,NA,1,1,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,32770.630289,33645.258285,2,95,14,14,4.45,3,3,1,0,0,2,29,1,5,1,5 +65509,7,2,2,10,NA,4,4,2,10,130,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9235.947079,9749.775473,1,96,7,7,1.39,5,5,0,2,2,1,69,2,2,1,2 +65510,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,44238.530111,44780.242841,1,95,3,3,0.87,2,2,0,0,2,2,65,1,2,1,3 +65511,7,2,1,7,NA,1,1,1,7,86,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,14820.807433,14905.891142,2,102,7,7,1.53,5,5,0,3,0,1,43,2,2,1,4 +65512,7,2,2,39,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,3,1,1,2,2,2,2,2,2,2,2,2,2,35353.005268,38359.582487,2,94,4,4,0.81,3,3,0,1,0,1,49,2,3,1,3 +65513,7,2,1,32,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,47932.213152,49921.729758,1,92,10,10,3.04,4,4,1,1,0,1,32,1,3,1,2 +65514,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,43425.032766,48500.713195,1,96,15,15,5,2,2,0,0,2,2,80,1,5,1,5 +65515,7,2,2,14,NA,4,4,2,14,173,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17185.945299,17434.901758,2,91,2,2,0.26,4,4,0,1,0,1,20,1,3,5,NA +65516,7,2,1,0,9,1,1,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,7527.69611,7949.840792,1,92,99,99,NA,5,5,1,0,0,1,46,2,3,1,3 +65517,7,2,2,25,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,4,1,2,1,2,2,1,2,2,2,2,2,2,39426.061521,41875.604468,2,94,8,8,2.7,3,3,1,0,0,1,27,1,3,1,4 +65518,7,2,1,45,NA,4,4,2,NA,NA,2,NA,2,1,5,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,16052.801806,16001.600848,2,90,6,6,1.03,6,6,3,1,0,1,45,2,2,1,2 +65519,7,2,1,13,NA,5,6,2,13,165,NA,NA,2,2,1,6,NA,NA,NA,1,1,1,1,2,1,1,2,1,1,10346.302718,11892.421636,2,91,99,99,NA,7,4,0,4,0,1,36,2,9,1,2 +65520,7,2,2,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,44274.069981,46208.106563,2,101,7,7,2.58,2,2,0,0,0,2,36,1,5,1,3 +65521,7,2,1,17,NA,3,3,1,17,205,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,26457.098276,26381.28385,2,98,3,3,0.38,5,5,0,4,0,2,39,1,4,5,NA +65522,7,2,2,71,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,1,2,NA,11550.700389,12323.715655,2,93,2,2,0.54,1,1,0,0,1,2,71,1,2,2,NA +65523,7,2,2,11,NA,4,4,1,12,144,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8362.256577,9003.967662,2,100,8,8,1.8,5,5,0,3,0,2,43,1,3,1,3 +65524,7,2,1,63,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,1,1,NA,2,2,1,1,2,1,2,2,1,2,10596.142548,10856.489537,1,94,7,7,1.48,5,5,0,0,1,2,52,2,1,1,1 +65525,7,2,2,69,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,11224.764345,11466.33634,1,101,5,5,1.84,1,1,0,0,1,2,69,1,5,2,NA +65526,7,2,1,4,NA,4,4,1,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12032.669734,13267.985975,2,96,6,6,1.48,4,4,1,1,0,2,25,1,4,5,NA +65527,7,2,1,11,NA,4,4,2,11,142,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7730.47951,9212.541007,1,99,7,7,1.53,5,5,0,3,0,1,39,1,3,1,3 +65528,7,2,1,0,0,4,4,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,5360.999096,5929.781839,2,90,8,6,1.46,4,3,1,1,0,2,21,1,5,6,NA +65529,7,2,2,1,18,5,7,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22159.470641,22857.583467,1,95,3,3,0.45,4,4,2,1,0,2,26,1,3,4,NA +65530,7,2,1,40,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,136880.768184,145646.064425,1,94,5,5,1.04,4,4,1,1,0,1,18,1,2,NA,NA +65531,7,2,1,11,NA,1,1,1,11,136,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11881.117946,11763.87702,1,102,15,15,4.47,4,4,0,2,0,2,30,1,4,1,4 +65532,7,2,1,18,NA,4,4,2,18,224,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11107.552941,11616.709768,2,99,4,4,1,3,3,0,1,0,2,38,1,3,5,NA +65533,7,2,1,4,NA,4,4,1,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11302.271106,11769.28244,3,92,6,6,0.93,5,5,2,1,0,2,37,1,5,1,3 +65534,7,2,2,31,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,27381.645976,28428.806364,2,95,15,1,0.18,5,1,0,0,0,1,47,1,5,1,3 +65535,7,2,2,0,10,3,3,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8383.207272,8612.693254,1,91,6,6,1.03,5,5,3,0,0,2,37,1,5,6,NA +65536,7,2,2,61,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,2,1,NA,2,2,2,2,2,2,1,2,2,2,15876.871857,17178.834476,2,102,4,4,0.67,4,4,0,0,2,2,20,1,1,NA,NA +65537,7,2,1,21,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,31707.924183,33437.457992,1,97,3,3,0.73,3,3,0,0,0,2,50,1,4,1,3 +65538,7,2,2,9,NA,3,3,1,9,119,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,47428.012402,47285.525443,2,98,10,10,4.42,2,2,0,1,0,2,41,1,4,3,NA +65539,7,2,1,27,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,13367.406737,13987.969236,1,92,15,15,4.44,5,5,0,0,1,1,65,NA,NA,1,5 +65540,7,2,1,9,NA,3,3,1,9,115,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,74331.764009,78594.005469,2,91,9,9,2.6,4,4,0,2,0,1,53,1,2,1,5 +65541,7,2,2,9,NA,4,4,1,9,110,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12120.418061,12444.673315,2,97,7,7,1.72,5,5,1,2,0,1,32,1,4,1,4 +65542,7,2,1,2,NA,3,3,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,37959.146468,40828.920669,1,94,15,15,5,3,3,1,0,0,1,35,1,5,1,5 +65543,7,2,1,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,108373.053289,112114.199141,3,92,15,15,5,3,3,0,0,0,1,56,NA,NA,1,4 +65544,7,2,2,19,NA,4,4,2,19,234,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,19113.842115,19208.656277,1,97,1,1,0.09,4,4,0,1,0,2,44,2,2,1,3 +65545,7,2,1,28,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,116462.885745,124577.022727,1,93,15,15,5,2,2,0,0,0,1,28,1,5,6,NA +65546,7,2,2,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,25052.373156,24265.266353,1,92,6,6,1.31,3,3,0,0,1,2,80,1,3,4,NA +65547,7,2,2,63,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,24832.705563,25238.431199,2,100,2,2,0.73,1,1,0,0,1,2,63,1,4,2,NA +65548,7,2,2,47,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,99120.116925,103753.216485,2,98,14,14,4.16,3,3,0,0,0,1,49,1,5,1,4 +65549,7,2,1,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,12291.154515,13189.875012,1,92,5,5,1.84,1,1,0,0,1,1,80,1,3,2,NA +65550,7,2,1,31,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,34997.800447,36838.864542,2,96,14,14,3.36,4,4,1,1,0,2,28,1,2,6,NA +65551,7,2,2,8,NA,3,3,2,9,108,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,60197.256541,63931.531988,2,94,14,14,2.63,6,6,1,3,0,1,39,1,4,1,4 +65552,7,2,2,13,NA,3,3,1,13,165,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,37065.780724,38522.476457,1,98,5,5,0.74,5,5,0,3,0,1,35,1,2,6,NA +65553,7,2,2,42,NA,2,2,2,NA,NA,2,NA,2,2,2,NA,2,4,2,2,2,2,2,2,2,NA,NA,NA,NA,32229.130119,41089.02125,2,90,13,2,0.46,2,1,0,0,1,2,80,NA,NA,77,NA +65554,7,2,1,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,32980.717958,34973.413194,1,95,5,5,1.19,3,3,1,1,0,1,47,1,2,3,NA +65555,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,53634.754806,56026.668478,1,98,5,2,0.45,3,1,0,0,0,1,32,1,5,5,NA +65556,7,2,1,5,NA,4,4,2,5,71,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10234.881417,10547.495238,2,97,2,2,0.33,4,4,2,1,0,2,34,1,2,5,NA +65557,7,2,2,30,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,5,5,2,2,2,2,2,2,2,NA,NA,NA,NA,27973.581456,27218.795462,2,99,99,99,NA,5,3,0,1,0,1,40,2,1,6,NA +65558,7,2,2,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,42489.109693,44345.173355,1,100,14,9,5,2,1,0,0,0,1,37,1,2,6,NA +65559,7,2,1,8,NA,2,2,2,8,101,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,9807.589376,10786.008845,2,90,3,3,0.38,5,5,0,4,0,2,33,2,2,5,NA +65560,7,2,1,4,NA,2,2,2,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11852.624029,11657.344215,2,99,99,99,NA,3,3,1,0,0,1,35,2,2,1,2 +65561,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,1,2,NA,60148.377616,67408.231176,1,92,8,8,2.17,4,4,0,1,2,2,80,1,3,2,NA +65562,7,2,2,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,9548.871949,9975.218868,2,95,6,6,1.36,3,3,0,1,1,2,62,1,4,5,NA +65563,7,2,1,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,39260.973419,41280.172031,1,91,4,2,0.55,3,1,0,0,0,2,22,1,5,6,NA +65564,7,2,2,36,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,4,4,2,1,2,2,1,2,2,1,2,2,1,19738.81952,19597.773568,3,90,3,3,0.37,5,5,2,2,0,2,36,2,4,4,NA +65565,7,2,1,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,108410.783716,113500.095101,1,91,15,6,2.69,3,1,0,0,0,1,27,1,3,6,NA +65566,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,85444.349063,88741.398785,3,91,8,8,1.95,4,4,2,0,0,2,30,1,5,1,4 +65567,7,2,1,20,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,32375.321924,34954.784356,1,95,1,1,0.21,4,4,1,0,1,2,75,1,1,2,NA +65568,7,2,2,36,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,1,1,2,2,2,2,1,2,2,NA,NA,NA,NA,45655.090694,44423.220436,3,92,6,6,0.86,7,7,1,4,0,2,36,2,1,1,1 +65569,7,2,2,16,NA,2,2,2,16,197,NA,NA,1,1,NA,9,NA,NA,NA,2,2,2,1,2,2,1,2,2,1,20678.81116,21093.085203,1,93,5,5,0.84,5,5,1,2,0,2,52,2,1,3,NA +65570,7,2,1,16,NA,3,3,2,16,200,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,35593.481513,36275.823276,1,91,5,5,1.36,2,2,0,1,0,2,49,1,5,3,NA +65571,7,2,2,74,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,1,2,NA,1,2,1,1,2,1,1,2,1,NA,13446.397433,14295.858749,1,93,3,3,0.65,3,3,0,0,3,2,74,2,1,2,NA +65572,7,2,2,79,NA,5,6,1,NA,NA,2,NA,2,1,9,NA,5,5,NA,1,2,2,1,2,2,1,2,2,NA,15984.880532,16163.124545,1,98,9,9,4.92,1,1,0,0,1,2,79,2,5,5,NA +65573,7,2,1,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,22165.906062,22485.884772,1,96,7,7,3.58,1,1,0,0,0,1,44,1,4,3,NA +65574,7,2,1,0,3,3,3,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8637.740206,8903.804193,1,91,6,6,1.07,6,6,3,1,0,2,27,1,4,6,NA +65575,7,2,1,28,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,2,6,NA,2,2,2,2,2,2,1,2,2,2,35669.2076,38557.984895,2,94,14,4,1.74,5,1,0,0,0,1,24,2,4,5,NA +65576,7,2,2,27,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,63207.045171,66153.523399,2,102,10,6,2.3,2,1,0,0,0,2,27,1,4,6,NA +65577,7,2,1,54,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15044.515884,15040.577827,3,90,15,15,5,3,3,0,0,0,2,55,2,4,1,4 +65578,7,2,2,1,18,1,1,1,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11901.705423,12408.80856,2,102,8,8,1.91,5,5,1,2,0,1,36,2,1,1,4 +65579,7,2,2,3,NA,5,6,2,3,47,NA,NA,2,1,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5380.83825,5375.15484,2,95,15,15,5,3,3,1,0,0,1,53,1,5,1,2 +65580,7,2,2,19,NA,2,2,1,19,233,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17338.975742,19021.185309,2,93,8,8,2.49,3,3,0,0,0,1,52,2,2,1,4 +65581,7,2,1,46,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,28542.421068,31101.791288,2,101,7,7,2.58,2,2,0,0,0,2,36,1,5,1,3 +65582,7,2,2,7,NA,5,6,2,7,95,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5481.728631,5853.665419,1,91,15,15,5,6,6,0,2,2,1,50,2,5,1,5 +65583,7,2,2,16,NA,4,4,2,16,202,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10694.834447,10849.760352,1,90,9,9,1.65,7,7,0,4,0,1,36,1,4,1,4 +65584,7,2,1,11,NA,4,4,2,11,134,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8579.422451,8987.382625,1,99,6,6,2.18,2,2,0,1,0,2,31,1,4,5,NA +65585,7,2,2,49,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,3,21956.01693,22136.94803,2,91,8,8,2.34,4,4,0,2,0,1,56,2,5,1,5 +65586,7,2,2,35,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,32501.623429,34582.098932,2,97,3,3,0.46,5,5,0,3,0,1,40,1,2,1,3 +65587,7,2,1,78,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,62466.132707,66344.89016,1,94,6,6,1.91,2,2,0,0,2,1,78,1,4,1,3 +65588,7,2,2,15,NA,3,3,2,15,184,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,74165.041171,75130.242541,2,94,10,10,3.51,3,3,0,2,0,2,39,2,4,3,NA +65589,7,2,1,69,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,2,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,10288.337394,10907.668198,3,91,14,4,1.02,6,2,0,0,2,1,48,2,1,1,1 +65590,7,2,2,43,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,18795.138328,22008.372948,2,93,9,9,2.07,5,5,0,1,0,1,55,NA,NA,5,NA +65591,7,2,2,5,NA,2,2,2,6,72,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11429.37307,11583.084593,2,90,15,15,5,4,4,1,1,0,1,49,1,4,1,4 +65592,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,26010.201422,30173.312639,2,95,3,3,1.21,1,1,0,0,1,2,80,1,2,2,NA +65593,7,2,2,51,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,5,1,NA,1,2,2,1,2,2,2,2,2,2,20178.078974,20795.430208,2,100,14,14,3.58,4,4,0,1,0,1,46,2,5,1,5 +65594,7,2,2,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,10192.188896,10604.379638,1,99,4,4,1.74,1,1,0,0,1,2,60,1,3,4,NA +65595,7,1,2,66,NA,2,2,NA,NA,NA,2,NA,2,2,4,NA,4,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,10235.0654,0,2,93,6,6,1.3,4,4,0,0,2,2,36,2,4,1,4 +65596,7,2,2,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11896.060838,12427.20723,1,96,15,15,5,2,2,0,0,2,1,61,1,5,1,5 +65597,7,2,2,19,NA,4,4,1,19,230,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,2,2,0.46,1,1,0,0,0,2,19,1,4,NA,NA +65598,7,1,1,53,NA,3,3,NA,NA,NA,2,NA,2,1,5,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,20980.882737,0,2,93,6,6,1.48,4,4,0,1,0,1,53,2,2,1,3 +65599,7,2,2,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,12127.194143,12676.977658,3,90,15,15,4.2,6,6,1,0,2,1,60,1,5,1,4 +65600,7,2,2,19,NA,1,1,2,19,232,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,20937.26435,21936.153747,1,90,15,15,5,4,4,0,0,0,1,54,1,5,1,5 +65601,7,2,1,15,NA,4,4,2,15,190,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13366.904548,14267.035861,1,93,7,7,1.79,4,4,0,2,0,1,53,2,4,1,4 +65602,7,2,1,19,NA,4,4,2,20,NA,2,NA,2,2,3,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12548.434193,12514.39179,2,99,3,1,0.31,4,1,0,0,0,1,19,2,4,NA,NA +65603,7,2,2,63,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,3,1,NA,1,2,1,1,2,1,1,2,1,NA,13656.521422,13861.04165,1,91,4,4,1.33,2,2,0,0,2,1,65,2,4,1,3 +65604,7,2,1,29,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,9668.679611,9969.996889,1,102,15,15,3.82,5,5,1,1,0,1,29,1,4,1,4 +65605,7,2,1,68,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,10274.998921,11619.264446,1,91,8,8,3.57,2,2,0,0,2,1,68,1,3,1,2 +65606,7,2,2,48,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,18215.139307,19150.911381,1,91,77,77,NA,4,4,0,2,0,1,50,2,5,1,5 +65607,7,2,2,5,NA,4,4,2,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8873.727797,9364.677323,2,99,5,5,0.78,5,5,2,2,0,2,30,1,3,5,NA +65608,7,2,1,5,NA,2,2,2,5,71,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15745.774489,16244.197679,2,91,4,4,0.67,5,4,2,0,2,2,66,2,1,1,NA +65609,7,2,2,1,19,3,3,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25257.21318,27876.153487,1,101,7,7,1.82,4,4,2,0,0,2,27,1,2,1,3 +65610,7,2,1,9,NA,4,4,1,9,117,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8714.559478,8865.734494,2,96,3,3,0.47,6,6,0,4,0,1,36,1,4,1,4 +65611,7,2,2,19,NA,2,2,2,19,232,2,NA,2,2,3,12,NA,NA,NA,2,2,2,2,2,2,1,2,2,2,12680.621719,13709.581084,2,90,99,99,NA,5,5,1,1,0,2,40,2,3,1,1 +65612,7,2,1,5,NA,4,4,1,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,7991.632447,8596.395387,1,102,12,12,NA,7,7,3,2,0,2,52,1,4,5,NA +65613,7,2,2,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,122472.877524,125201.399718,2,94,15,15,5,3,3,0,1,1,1,63,1,5,1,3 +65614,7,2,2,10,NA,1,1,1,10,121,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15859.146626,16025.878294,1,92,1,1,0,3,1,0,2,0,2,43,1,2,4,NA +65615,7,1,1,80,NA,3,3,NA,NA,NA,2,NA,2,1,9,NA,1,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,38666.703155,0,2,94,2,2,0.81,1,1,0,0,1,1,80,2,1,2,NA +65616,7,2,1,0,3,3,3,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9570.577309,9932.368407,1,95,6,6,1,6,6,3,0,0,2,23,1,4,6,NA +65617,7,2,1,41,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,21571.318341,21852.480517,1,102,15,15,5,3,3,0,1,0,1,41,1,5,1,5 +65618,7,2,2,2,NA,2,2,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9955.153132,10990.75621,2,94,9,9,2.51,4,4,2,0,0,2,34,2,3,1,3 +65619,7,2,1,20,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,50647.682308,53126.874664,2,96,8,8,3.67,2,2,0,0,0,1,58,1,3,3,NA +65620,7,2,2,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,110849.032294,115406.960266,2,92,14,6,2.75,2,1,0,0,0,2,25,1,5,5,NA +65621,7,2,1,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,8748.603912,9090.779024,1,96,14,14,5,2,2,0,0,2,2,61,1,3,1,3 +65622,7,2,1,7,NA,4,4,1,7,87,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15130.588085,15393.064565,2,102,15,15,3.82,5,5,1,2,0,1,34,1,3,1,4 +65623,7,2,2,25,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,18626.118419,17709.928623,1,93,15,15,5,5,5,0,1,0,2,25,1,5,5,NA +65624,7,2,2,11,NA,4,4,1,11,135,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12120.418061,12444.673315,2,97,7,7,1.72,5,5,1,2,0,1,32,1,4,1,4 +65625,7,2,1,23,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,14690.01297,15450.354874,3,91,2,2,0.73,1,1,0,0,0,1,23,1,5,5,NA +65626,7,2,1,11,NA,4,4,2,11,133,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8017.552697,8398.795399,1,99,4,4,0.41,7,7,2,4,0,2,43,1,4,4,NA +65627,7,2,1,16,NA,4,4,2,16,193,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,15970.206483,16086.518081,2,97,2,2,0.3,4,4,0,2,0,1,42,1,2,6,NA +65628,7,2,1,56,NA,1,1,2,NA,NA,1,1,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,25381.488007,25009.053797,2,99,6,6,2.33,1,1,0,0,0,1,56,1,4,2,NA +65629,7,2,2,51,NA,5,6,2,NA,NA,2,NA,2,2,1,NA,2,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,13281.538492,13864.580434,3,90,5,5,1.05,3,3,0,0,0,1,53,2,1,1,2 +65630,7,2,1,4,NA,1,1,2,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16214.341036,15789.305867,1,98,9,9,2,7,6,3,2,0,2,32,1,4,1,4 +65631,7,2,1,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,17420.978407,17014.040717,2,97,2,2,0.49,2,2,0,0,0,1,24,1,4,6,NA +65632,7,2,2,42,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,5,3,2,1,2,2,1,2,2,1,2,2,1,34639.996543,35471.414202,1,102,7,7,1.7,4,4,0,0,2,1,44,1,4,4,NA +65633,7,2,1,57,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,31699.998102,36617.55692,1,98,1,1,0.04,1,1,0,0,0,1,57,1,4,5,NA +65634,7,2,2,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,9113.905743,9695.883475,3,90,15,15,5,5,5,0,1,1,2,61,1,5,2,NA +65635,7,2,2,29,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,22225.098465,22243.480978,2,97,4,4,0.81,4,4,1,1,0,2,51,1,3,4,NA +65636,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,19323.414782,18794.578845,2,91,15,15,5,4,4,0,0,1,1,61,NA,NA,1,2 +65637,7,2,1,66,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,6358.062034,6283.458155,1,99,7,7,3.49,1,1,0,0,1,1,66,1,4,3,NA +65638,7,2,1,12,NA,5,7,2,12,153,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21852.102821,22875.024578,3,91,5,5,0.65,7,7,0,4,0,2,39,1,3,4,NA +65639,7,1,2,14,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,91539.042546,0,1,101,15,15,5,4,4,0,2,0,2,40,1,4,1,3 +65640,7,2,2,65,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,1,12751.545122,13835.806616,2,101,1,1,0.08,2,2,0,0,2,2,80,1,1,2,NA +65641,7,2,2,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,2,1,2,2,1,2,2,1,2,2,1,74517.751389,83352.706958,2,94,5,5,0.89,4,4,0,2,0,2,51,1,2,3,NA +65642,7,2,2,41,NA,3,3,2,NA,NA,2,NA,2,1,7,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,96255.674553,98150.909956,2,94,14,14,5,3,3,0,0,0,1,42,1,5,1,5 +65643,7,2,2,38,NA,3,3,2,NA,NA,2,NA,2,2,4,NA,5,1,1,1,2,2,1,2,2,1,2,2,1,95214.22557,102645.939709,1,97,15,15,5,5,4,1,1,0,2,38,NA,NA,1,5 +65644,7,2,2,19,NA,4,4,2,19,233,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18905.695776,2,101,1,1,0.23,2,1,0,0,0,2,19,1,3,NA,NA +65645,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,38666.703155,42427.769217,2,91,7,7,1.61,4,4,0,0,3,1,65,1,3,6,NA +65646,7,2,1,8,NA,3,3,1,8,99,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18596.465852,19407.799286,1,94,7,7,1.29,6,6,1,3,0,1,38,1,3,1,2 +65647,7,2,2,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,52701.331723,59294.433957,1,94,5,5,1.04,4,4,0,2,0,2,29,1,3,1,3 +65648,7,2,1,61,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,7101.739553,7379.502561,2,100,4,4,1.74,1,1,0,0,1,1,61,1,3,3,NA +65649,7,2,2,37,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,26428.874559,27145.22781,2,99,14,14,4.09,3,3,0,2,0,2,37,1,5,5,NA +65650,7,2,2,79,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,20101.581228,20798.747999,3,92,77,77,NA,1,1,0,0,1,2,79,1,4,2,NA +65651,7,2,1,6,NA,5,7,2,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7923.925927,8546.646915,1,91,15,15,5,5,5,0,3,0,1,40,1,5,1,5 +65652,7,1,2,1,20,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24749.659974,0,1,94,1,1,0.08,7,7,2,4,0,1,31,1,2,1,4 +65653,7,2,2,30,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,33506.462855,33906.169633,2,96,4,4,0.65,5,5,0,3,0,1,30,1,4,1,2 +65654,7,2,1,10,NA,5,6,2,10,131,NA,NA,1,1,NA,5,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,6850.601671,7624.777305,3,91,6,6,1.34,4,4,0,2,0,1,52,2,3,1,1 +65655,7,2,2,54,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,16741.034883,16786.385595,2,95,12,12,NA,3,3,0,0,0,2,29,2,5,5,NA +65656,7,2,2,2,NA,3,3,1,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,45341.612978,50043.120503,3,91,14,14,5,3,3,1,0,0,2,30,1,4,1,5 +65657,7,2,1,15,NA,1,1,1,16,192,NA,NA,1,1,NA,9,NA,NA,NA,2,1,1,1,2,1,1,2,2,1,29234.272259,28953.402615,2,98,7,7,2.25,3,3,0,1,0,2,51,2,1,1,1 +65658,7,2,2,49,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,29448.834066,28780.798614,1,97,9,9,4.92,1,1,0,0,0,2,49,1,5,5,NA +65659,7,2,2,71,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,12344.929687,13268.288363,1,93,2,2,0.74,1,1,0,0,1,2,71,2,2,2,NA +65660,7,2,2,73,NA,3,3,2,NA,NA,2,NA,2,1,6,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,36122.708948,47794.076653,1,97,2,2,0.75,1,1,0,0,1,2,73,2,2,2,NA +65661,7,2,2,38,NA,2,2,2,NA,NA,2,NA,2,2,5,NA,3,3,2,2,2,2,1,2,2,2,2,2,2,35678.162793,37949.435224,2,99,3,3,0.52,3,3,0,0,1,2,38,2,3,3,NA +65662,7,2,2,31,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,33506.462855,35016.985063,2,96,6,6,1.62,3,3,0,2,0,2,31,1,3,5,NA +65663,7,2,2,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,3,1,2,2,1,2,2,1,2,2,1,52701.331723,53172.517558,1,94,5,5,0.74,5,5,1,1,0,2,24,1,3,1,4 +65664,7,2,2,13,NA,3,3,2,13,160,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71837.483011,75155.181458,1,94,15,15,5,3,3,0,1,0,2,43,1,5,1,5 +65665,7,2,1,19,NA,1,1,1,19,233,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,15626.107676,15463.700675,2,93,9,9,1.94,6,6,0,3,0,2,37,NA,NA,3,NA +65666,7,2,2,3,NA,2,2,1,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15457.736897,16648.051651,2,98,8,8,1.48,7,7,3,0,0,1,26,1,3,1,3 +65667,7,2,1,8,NA,4,4,1,8,98,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10410.106675,10534.907593,2,96,6,6,1.32,5,5,1,3,0,2,30,1,4,3,NA +65668,7,2,2,8,NA,5,7,1,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11541.681354,12101.149927,1,92,15,15,5,4,4,0,2,0,1,41,2,5,1,5 +65669,7,2,1,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,152858.509804,159007.057971,1,95,14,8,4.41,2,1,0,0,0,1,47,1,4,3,NA +65670,7,2,1,13,NA,1,1,1,14,169,NA,NA,1,1,NA,7,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,17424.208904,17520.505085,1,102,5,5,0.92,5,5,0,3,0,2,39,2,3,1,3 +65671,7,2,2,2,NA,4,4,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7444.013422,7636.156038,1,90,5,5,1.32,2,2,1,0,0,2,27,2,3,5,NA +65672,7,2,2,22,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,30275.274308,33292.204257,2,101,3,3,0.92,1,1,0,0,0,2,22,1,4,5,NA +65673,7,2,1,2,NA,3,3,1,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24594.444896,28818.16319,1,98,1,1,0.16,3,3,1,0,0,1,28,1,2,6,NA +65674,7,2,2,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,17285.492411,17948.070756,1,102,7,7,1.8,5,4,1,0,2,1,47,1,3,5,NA +65675,7,2,1,66,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,11568.876339,11794.347884,1,102,10,10,4.42,2,2,0,0,2,2,62,1,5,1,4 +65676,7,2,2,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,71034.153987,75000.659691,1,98,9,9,2.6,4,4,1,1,0,2,35,1,2,1,NA +65677,7,2,2,4,NA,1,1,2,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,2,NA,2,2,2,2,NA,NA,NA,NA,13193.876261,13756.035699,3,91,7,7,1.23,6,6,2,2,0,1,36,2,1,1,1 +65678,7,2,1,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,12697.263698,13996.985089,1,91,6,6,1.07,6,6,3,1,0,2,27,1,4,6,NA +65679,7,2,1,69,NA,5,7,2,NA,NA,2,NA,2,1,7,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,10298.198778,10826.998489,1,95,2,2,0.72,1,1,0,0,1,1,69,2,4,3,NA +65680,7,2,2,44,NA,5,6,1,NA,NA,2,NA,2,2,5,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,16162.24986,16822.942152,3,91,15,15,5,3,3,0,1,0,2,44,2,5,1,5 +65681,7,2,2,4,NA,1,1,1,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16462.187772,17163.602129,3,92,8,8,1.55,6,6,1,3,0,2,38,1,5,1,4 +65682,7,2,2,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,143785.115498,143265.966876,1,99,10,10,5,1,1,0,0,0,2,57,1,5,3,NA +65683,7,2,1,51,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,2,1,2,1,1,2,2,1,7879.750437,7851.284287,2,92,10,8,2.01,7,4,1,1,1,2,27,2,3,1,3 +65684,7,2,1,14,NA,1,1,1,14,179,NA,NA,2,2,4,8,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,18242.832494,18343.652859,1,102,2,2,0.52,3,3,0,2,0,2,36,2,3,4,NA +65685,7,2,2,29,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,NA,NA,NA,NA,51746.15111,54067.303468,1,95,8,8,2.24,4,4,2,0,0,2,29,1,3,1,4 +65686,7,2,2,9,NA,4,4,1,9,117,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10085.683644,10404.203071,2,102,6,6,1.22,5,5,0,2,0,2,42,1,4,1,4 +65687,7,2,2,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,23128.736624,23721.941544,2,101,2,2,0.79,1,1,0,0,0,2,55,1,2,3,NA +65688,7,2,2,54,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,187291.098551,186266.152024,2,91,15,15,5,3,3,0,0,0,2,54,1,4,1,4 +65689,7,2,1,19,NA,4,4,2,19,234,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13969.457688,14694.403205,1,90,6,6,1.57,3,3,0,1,0,2,36,1,3,5,NA +65690,7,2,1,16,NA,3,3,2,16,198,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,107087.58296,107224.801212,1,95,8,8,1.28,7,7,1,4,0,1,32,1,3,1,3 +65691,7,2,2,16,NA,5,6,1,16,199,NA,NA,2,2,2,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8391.252153,8753.945484,1,92,12,12,NA,7,7,1,2,1,2,45,2,3,1,3 +65692,7,2,1,14,NA,3,3,2,14,169,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,26824.630008,26472.092796,1,95,6,6,1.3,4,4,0,3,0,2,46,1,4,3,NA +65693,7,2,1,7,NA,3,3,2,7,94,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,66868.503099,69864.859716,1,98,15,15,5,4,4,1,1,0,1,40,1,4,1,5 +65694,7,2,2,5,NA,2,2,2,6,72,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13366.393396,14756.857003,2,94,9,9,2.51,4,4,2,0,0,2,34,2,3,1,3 +65695,7,1,1,9,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,3,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,8943.919305,0,1,92,5,5,0.63,7,7,0,4,1,1,60,NA,NA,1,NA +65696,7,2,2,8,NA,4,4,2,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6996.144083,7533.021082,2,99,5,5,0.78,5,5,2,2,0,2,30,1,3,5,NA +65697,7,2,1,16,NA,3,3,2,16,201,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,26749.020961,27839.957403,1,101,6,6,1.21,4,4,0,2,0,2,33,1,2,6,NA +65698,7,2,1,8,NA,4,4,2,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11094.855304,11787.231447,1,90,7,7,2.1,3,3,0,1,0,1,35,1,3,6,NA +65699,7,2,1,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,152467.08796,154000.877343,1,94,6,6,2.24,1,1,0,0,0,1,50,1,3,3,NA +65700,7,2,2,18,NA,4,4,1,18,220,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14166.687432,14623.20249,1,100,13,13,NA,5,5,2,0,0,2,54,1,4,5,NA +65701,7,2,2,17,NA,4,4,2,17,205,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11711.384457,11558.024533,2,95,9,9,1.81,6,6,1,1,0,2,56,1,4,3,NA +65702,7,2,2,62,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,10999.00871,11458.057418,3,92,7,7,1.49,5,5,0,2,1,2,62,1,4,2,NA +65703,7,2,2,29,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,4,3,2,1,2,2,1,2,2,NA,NA,NA,NA,39426.061521,39893.228552,3,92,8,8,2.01,4,4,1,0,0,2,49,2,5,4,NA +65704,7,2,2,7,NA,3,3,2,7,87,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,82043.921571,82230.595899,1,97,14,14,3.9,4,4,0,2,0,1,47,1,3,1,5 +65705,7,2,2,63,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,4,NA,1,2,2,1,2,2,1,2,2,1,15207.312407,15975.329738,1,92,4,4,0.99,2,2,0,0,1,1,26,1,1,5,NA +65706,7,2,2,11,NA,3,3,2,11,143,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24070.467912,24097.958076,1,95,6,3,0.45,6,4,1,2,0,1,28,1,2,1,2 +65707,7,2,1,75,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,62703.997496,73870.695024,1,94,10,10,3.04,4,4,0,0,2,1,75,1,3,1,3 +65708,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,35334.703093,40990.264786,1,101,3,3,1.25,1,1,0,0,1,2,80,1,2,2,NA +65709,7,2,2,17,NA,3,3,2,17,209,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,97212.131473,101229.4133,1,91,14,14,3.8,4,4,0,2,0,1,50,NA,NA,1,5 +65710,7,2,2,17,NA,4,4,2,17,210,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11608.900361,11982.991894,1,99,8,8,2.59,3,3,0,2,0,2,46,1,4,2,NA +65711,7,2,1,46,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11113.602843,11073.454175,3,90,77,77,NA,5,5,0,2,0,1,46,2,3,1,3 +65712,7,2,1,75,NA,2,2,2,NA,NA,1,1,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,14090.083482,14599.284236,2,90,6,6,2.24,2,2,0,0,2,1,75,2,4,1,2 +65713,7,2,1,72,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,2,2,NA,14078.198261,14841.449973,1,91,1,1,0.05,2,1,0,0,2,1,72,1,1,3,NA +65714,7,2,1,72,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,64271.597433,68262.462992,1,94,15,15,4.95,4,4,0,0,2,1,72,1,3,1,3 +65715,7,2,1,12,NA,5,7,2,12,146,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,60818.973858,62775.825513,1,99,15,15,5,4,4,0,2,0,2,44,1,5,1,5 +65716,7,2,2,54,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,42559.487719,42454.393427,1,98,6,6,1.57,3,3,0,0,0,1,58,1,3,1,3 +65717,7,2,1,10,NA,4,4,1,10,124,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11443.206518,12087.326286,1,100,13,13,NA,3,3,0,1,0,2,52,2,3,1,NA +65718,7,2,2,11,NA,5,7,2,11,137,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9227.090107,9736.375878,2,100,15,15,4.83,4,4,1,1,0,1,43,2,5,1,5 +65719,7,2,1,68,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,2,1,NA,1,2,2,1,2,2,1,2,2,3,11145.558675,12081.671552,2,96,4,4,0.92,3,3,0,1,1,2,41,2,2,1,2 +65720,7,2,1,5,NA,2,2,2,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12403.412256,13289.361067,2,90,6,6,1.62,3,3,1,0,0,2,28,1,5,1,4 +65721,7,2,2,6,NA,1,1,1,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14538.945634,14691.797657,2,92,8,8,1.42,7,7,0,4,0,2,37,1,1,6,NA +65722,7,2,2,39,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,2,6,2,1,2,2,1,2,2,1,2,2,1,37237.570046,37476.359157,2,97,4,1,0,2,1,0,0,0,1,41,1,2,6,NA +65723,7,1,1,73,NA,3,3,NA,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,15200.416613,0,1,98,4,4,1.19,2,2,0,0,2,2,72,1,3,1,4 +65724,7,1,2,4,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,13366.393396,0,2,94,14,7,2.72,3,2,1,0,0,1,25,2,3,6,NA +65725,7,2,2,8,NA,3,3,2,9,108,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,70999.269152,75403.63644,1,93,15,15,5,4,4,0,2,0,1,51,1,3,1,5 +65726,7,2,1,23,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,15196.92397,16091.373745,2,101,7,3,1.1,3,1,0,0,0,1,21,2,4,5,NA +65727,7,2,2,3,NA,1,1,1,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15457.736897,16432.24332,3,92,7,7,1.41,5,5,1,2,0,1,40,1,3,1,4 +65728,7,2,2,0,1,4,4,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4581.358327,5045.31579,2,101,4,4,0.85,4,4,1,0,1,2,61,1,4,3,NA +65729,7,2,1,9,NA,3,3,1,9,113,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23188.935049,25672.571973,3,91,7,7,1.1,7,7,0,4,0,1,40,1,4,1,3 +65730,7,2,2,52,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16181.169973,16457.814273,2,100,14,14,4.86,3,3,0,0,0,2,52,1,5,1,3 +65731,7,2,1,17,NA,2,2,1,17,208,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18206.126374,18306.74388,2,93,4,4,0.84,3,3,0,1,0,2,46,2,3,1,2 +65732,7,2,1,2,NA,1,1,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10803.555682,10625.559962,2,94,6,6,1.32,4,4,2,0,0,1,29,1,3,1,3 +65733,7,1,2,29,NA,5,6,NA,NA,NA,2,NA,2,2,2,NA,5,1,3,1,2,2,1,2,2,NA,NA,NA,NA,12636.996393,0,3,90,12,12,NA,3,3,0,0,0,2,29,2,5,1,5 +65734,7,2,2,14,NA,4,4,2,14,170,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13841.239638,13875.328502,1,96,15,15,4.52,6,6,0,4,0,1,46,1,4,1,4 +65735,7,2,2,69,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,4,2,NA,2,2,2,1,2,2,1,2,2,1,9104.567599,9484.550932,2,93,6,6,2.69,1,1,0,0,1,2,69,2,4,2,NA +65736,7,2,1,71,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,3,1,NA,1,2,1,1,2,2,1,2,1,NA,16439.475505,17615.98168,1,100,5,5,1.18,3,3,0,0,2,2,34,2,5,5,NA +65737,7,2,2,53,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,33274.645943,34015.957912,2,95,14,14,3.34,4,4,0,0,0,1,43,1,3,1,3 +65738,7,2,1,35,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,18025.279948,19114.89054,1,97,4,3,0.93,3,2,0,0,0,1,35,1,3,1,4 +65739,7,2,1,71,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,58205.324383,61609.052582,2,98,8,8,3.06,2,2,0,0,2,1,71,1,4,1,3 +65740,7,2,1,68,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,10098.443227,10212.20306,1,102,9,9,5,1,1,0,0,1,1,68,1,4,3,NA +65741,7,2,1,26,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,6,NA,2,2,2,2,2,2,1,2,2,2,38560.502118,39812.43256,2,102,8,8,1.09,7,7,1,3,0,2,33,2,1,6,NA +65742,7,2,1,30,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,3,5,NA,2,2,2,2,2,2,2,2,2,2,34887.439952,35069.154585,2,94,15,15,5,3,3,0,0,0,1,41,2,3,1,NA +65743,7,2,2,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,25569.682712,26728.878308,1,97,15,15,5,4,4,0,1,0,1,40,1,4,1,4 +65744,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,54095.581484,0,2,94,77,77,NA,2,2,0,0,2,2,80,1,5,1,5 +65745,7,2,1,27,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,69082.166945,70155.790389,2,103,15,15,3.44,7,7,0,1,2,2,79,1,3,2,NA +65746,7,2,1,19,NA,3,3,1,19,234,2,NA,2,1,4,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,103364.662244,103614.262032,1,98,3,1,0.09,4,1,0,0,0,1,20,1,4,5,NA +65747,7,2,2,6,NA,4,4,2,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7646.649777,8233.445923,2,99,3,3,0.52,3,3,0,1,0,2,23,1,3,1,3 +65748,7,2,1,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,7907.095293,7814.315452,2,97,5,5,1.3,3,3,0,1,1,1,64,1,4,3,NA +65749,7,2,2,2,NA,2,2,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9534.013652,9566.569071,2,97,12,6,0.89,7,7,3,0,0,2,26,2,1,6,NA +65750,7,2,1,14,NA,3,3,1,14,173,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,79147.85144,78921.048453,1,98,15,15,5,5,5,0,3,0,2,44,1,5,1,5 +65751,7,2,1,54,NA,1,1,2,NA,NA,2,NA,2,1,7,NA,3,1,NA,2,2,2,1,2,2,2,2,2,2,22446.308035,22366.708253,2,94,1,1,0.03,2,2,0,0,0,2,48,2,1,1,3 +65752,7,2,1,21,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,4,5,NA,1,2,2,1,2,2,1,2,2,3,14385.653726,15564.966804,2,101,6,99,NA,2,1,0,0,0,1,20,2,4,5,NA +65753,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,27738.890335,33497.780693,2,101,3,3,0.3,7,7,1,2,0,2,50,1,2,4,NA +65754,7,2,2,50,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,5,NA,2,2,2,1,2,2,1,2,2,2,28701.155283,32172.514581,3,92,13,13,NA,4,4,0,2,0,2,50,2,1,5,NA +65755,7,1,2,64,NA,1,1,NA,NA,NA,2,NA,2,2,4,NA,1,2,NA,2,2,2,NA,NA,NA,NA,NA,NA,NA,16352.915834,0,3,92,NA,NA,NA,5,5,2,1,1,2,64,2,1,2,NA +65756,7,1,1,12,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7964.334824,0,1,95,6,6,1.34,4,4,0,2,0,2,32,2,3,2,NA +65757,7,2,2,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,21857.756498,21879.219845,2,97,9,9,1.45,7,7,1,2,2,2,45,1,3,5,NA +65758,7,2,2,67,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,2,1,NA,1,2,1,1,2,1,1,2,1,3,12403.522912,13236.989116,1,93,3,3,0.65,3,3,0,0,3,2,74,2,1,2,NA +65759,7,2,1,11,NA,3,3,2,11,134,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,53915.042746,57006.573447,1,91,14,14,2.44,7,7,2,4,0,1,33,1,5,1,5 +65760,7,2,1,41,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,37402.70356,37784.383858,2,102,14,14,3.8,4,4,2,0,0,2,41,2,4,1,5 +65761,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,67671.21344,73410.277865,3,90,15,15,5,3,3,1,0,0,1,31,1,5,1,5 +65762,7,2,2,11,NA,5,6,2,11,140,NA,NA,2,2,2,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6834.715094,7298.452383,2,90,3,1,0,5,1,1,2,0,1,44,2,5,1,5 +65763,7,2,2,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,8308.628726,8949.073879,2,95,6,6,2.69,1,1,0,0,1,2,68,1,3,2,NA +65764,7,2,2,73,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,63767.913736,65948.76781,1,94,6,6,1.91,2,2,0,0,2,1,78,1,4,1,3 +65765,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,128575.977224,134245.696628,1,102,6,4,1.42,2,1,0,0,0,2,23,1,5,5,NA +65766,7,1,2,67,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,4,NA,1,2,2,1,2,2,NA,NA,NA,NA,36706.470429,0,1,103,3,3,0.98,1,1,0,0,1,2,67,1,5,4,NA +65767,7,2,1,1,12,3,3,1,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,40111.361732,44385.174851,1,98,7,7,1.48,5,5,1,1,0,1,46,1,3,1,3 +65768,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,38954.135779,39992.589933,1,91,5,5,1.3,3,3,0,1,0,2,50,1,4,2,NA +65769,7,2,1,31,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,15051.024912,14836.484643,3,90,15,15,4.2,6,6,1,0,2,1,60,1,5,1,4 +65770,7,2,1,40,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,2,6,NA,2,2,2,2,2,2,2,2,2,2,37402.70356,40836.297693,2,102,7,7,1.79,4,4,0,2,0,1,40,2,2,6,NA +65771,7,2,1,55,NA,5,7,1,NA,NA,1,2,2,1,9,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,16575.817424,16910.393297,1,94,8,8,2.97,2,2,0,0,1,2,74,2,3,1,4 +65772,7,2,2,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,NA,20131.904783,20903.590301,1,92,6,6,1.31,3,3,0,0,1,2,80,1,3,4,NA +65773,7,2,1,34,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,22937.913723,23181.846174,1,100,14,14,3.47,4,4,2,0,0,1,34,1,5,1,5 +65774,7,2,1,24,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,37911.437415,38428.199561,2,103,6,6,1.98,2,2,0,0,0,1,24,1,2,6,NA +65775,7,2,2,49,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,18494.181242,18934.264494,2,99,9,9,2.43,4,4,0,2,0,2,49,1,3,3,NA +65776,7,2,1,2,NA,4,4,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5853.073657,6453.970807,2,95,2,2,0.41,3,3,2,0,0,2,19,1,2,NA,NA +65777,7,2,2,11,NA,4,4,2,12,144,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6655.097829,7106.52929,2,99,15,15,4.9,7,7,1,4,0,2,53,1,5,1,5 +65778,7,2,1,11,NA,4,4,2,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13423.881856,14179.490667,2,101,13,3,0.64,5,4,0,3,1,2,62,1,1,2,NA +65779,7,2,2,2,NA,5,7,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8579.211947,8800.655964,1,97,9,9,2.6,4,4,1,1,0,2,45,1,4,1,4 +65780,7,2,2,7,NA,3,3,2,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24070.467912,25563.654853,1,95,10,6,1.34,5,4,1,2,0,1,32,1,3,6,NA +65781,7,2,2,41,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,15091.588227,15171.391678,1,93,6,6,1.15,5,5,1,0,2,2,70,NA,NA,1,NA +65782,7,2,2,53,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,22565.644629,22012.541181,1,97,15,15,5,6,6,0,1,1,2,53,1,4,1,NA +65783,7,2,1,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,124820.027137,131239.54259,1,91,10,10,3.78,3,3,0,0,2,1,62,1,5,1,5 +65784,7,2,2,57,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,17622.141982,17298.28124,2,95,7,7,2.58,2,2,0,0,0,2,57,1,4,3,NA +65785,7,2,1,0,8,1,1,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,2,NA,2,2,2,2,NA,NA,NA,NA,6217.36354,6396.366428,3,91,7,7,1.23,6,6,2,2,0,1,36,2,1,1,1 +65786,7,2,1,8,NA,3,3,1,8,104,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,49922.147265,51884.389986,1,98,7,7,1.66,5,5,2,1,0,2,37,1,5,1,3 +65787,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,86986.68246,91060.136414,2,94,7,7,2.38,2,2,0,1,0,1,39,1,4,3,NA +65788,7,2,2,5,NA,5,6,1,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7143.995395,7265.801592,3,91,15,15,5,4,4,1,1,0,1,40,2,5,1,5 +65789,7,2,1,5,NA,4,4,2,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7430.518669,8193.361877,2,99,1,1,0.07,4,4,1,1,0,2,24,1,2,5,NA +65790,7,2,2,5,NA,2,2,2,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11429.37307,11793.034101,2,90,2,2,0.49,3,3,2,0,0,2,26,1,4,1,NA +65791,7,2,1,24,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,19775.099106,19464.255682,1,96,15,15,5,3,3,0,0,0,1,55,1,4,1,5 +65792,7,2,1,0,6,1,1,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6999.189812,6930.018579,3,92,4,4,0.43,7,7,2,2,0,2,36,1,2,6,NA +65793,7,2,2,16,NA,3,3,2,16,196,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,25543.162298,26253.778018,2,95,6,6,1.19,4,4,0,1,0,1,44,1,3,1,2 +65794,7,2,1,9,NA,5,6,1,9,116,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5062.43381,5485.75338,3,91,6,6,1.22,5,5,1,2,0,2,37,1,4,1,2 +65795,7,2,1,9,NA,4,4,1,9,111,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14792.384662,15048.994189,2,102,5,3,0.63,5,4,2,1,0,1,24,1,4,6,NA +65796,7,2,1,56,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,26135.885159,28479.463721,2,101,1,1,0.1,2,2,0,0,0,1,56,1,3,6,NA +65797,7,2,2,52,NA,4,4,2,NA,NA,2,NA,2,1,5,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,19735.328357,19287.640039,1,90,8,8,4.48,1,1,0,0,0,2,52,2,4,3,NA +65798,7,2,2,26,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,3,1,2,1,2,2,2,2,2,1,2,2,1,42621.881199,43251.535191,2,102,4,4,0.57,6,6,2,3,0,2,26,2,3,1,NA +65799,7,2,1,21,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,15572.117537,16108.231355,2,94,15,15,4.44,5,5,0,1,1,2,74,1,5,2,NA +65800,7,2,2,61,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,153418.292529,156457.542801,1,92,15,15,5,3,3,0,0,2,2,61,1,5,1,5 +65801,7,2,2,9,NA,4,4,2,9,109,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8579.490652,8919.477637,2,97,6,6,1,6,6,1,2,2,2,60,1,2,2,NA +65802,7,1,2,9,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7710.907339,0,2,99,6,6,0.94,7,7,0,4,0,2,32,1,3,1,3 +65803,7,2,2,15,NA,3,3,2,15,189,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,38400.791741,41869.717003,1,95,7,7,1.66,5,5,0,3,0,1,34,1,2,1,4 +65804,7,2,1,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,185033.990584,192737.162475,1,97,14,14,4.96,2,2,0,0,0,1,59,1,4,1,5 +65805,7,2,2,11,NA,3,3,2,11,139,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19824.800404,20630.46787,1,101,6,6,1.21,4,4,0,2,0,2,33,1,2,6,NA +65806,7,2,1,5,NA,1,1,1,5,68,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16632.464801,17158.955649,3,91,3,3,0.39,6,6,1,1,0,1,39,2,1,6,NA +65807,7,2,1,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,108408.375382,108975.782069,2,98,14,14,4.16,3,3,0,0,0,1,49,1,5,1,4 +65808,7,2,1,65,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,7323.703412,7610.147862,2,100,9,9,2.46,4,4,1,1,1,2,59,1,3,1,3 +65809,7,2,1,0,8,3,3,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22795.485282,22401.400888,1,92,9,9,3.24,3,3,1,0,0,1,30,1,5,6,NA +65810,7,2,2,2,NA,3,3,1,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,44572.321124,49194.06017,1,98,8,8,2.46,3,3,1,0,0,1,29,1,4,6,NA +65811,7,2,2,11,NA,1,1,2,11,136,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,13085.954443,13661.915968,2,97,4,4,0.6,6,6,2,2,0,1,35,2,2,6,NA +65812,7,2,1,30,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,14894.763298,16100.128566,2,92,6,6,1.35,3,3,1,0,0,2,32,1,5,1,5 +65813,7,2,1,11,NA,1,1,2,11,138,NA,NA,2,1,3,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,13285.093011,14158.005149,2,94,7,7,1.57,4,4,0,2,0,1,30,2,3,1,4 +65814,7,2,1,11,NA,4,4,1,11,134,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13213.679978,13442.903072,2,96,7,7,1.49,5,5,2,1,0,1,51,1,5,1,3 +65815,7,1,2,65,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,10033.449917,0,2,95,3,3,0.95,2,2,0,0,1,2,65,1,2,3,NA +65816,7,2,1,6,NA,3,3,2,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23796.980721,24835.204126,1,92,4,4,0.61,5,5,1,2,0,1,34,1,3,6,NA +65817,7,2,2,19,NA,3,3,2,19,235,2,NA,2,1,5,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,77001.138762,79232.910414,1,98,15,15,5,3,3,0,0,0,1,56,1,5,1,5 +65818,7,2,1,17,NA,5,6,2,17,211,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6852.192992,7202.81544,2,100,15,15,5,3,3,0,1,0,1,58,2,5,1,5 +65819,7,2,1,9,NA,1,1,1,9,108,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,11159.151566,11335.739115,1,102,6,6,1.03,6,6,0,4,0,1,34,2,2,1,1 +65820,7,2,1,77,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,2,2,NA,1,2,1,1,2,1,1,2,1,NA,9681.885604,10696.882276,1,93,2,2,0.74,1,1,0,0,1,1,77,2,2,2,NA +65821,7,2,2,5,NA,5,6,1,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,8171.700571,8311.029296,1,92,15,15,5,4,4,1,1,0,1,38,2,5,1,5 +65822,7,2,1,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,88617.795432,89088.673687,1,91,5,5,2.2,1,1,0,0,1,1,63,1,2,2,NA +65823,7,2,1,37,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,2,2,2,1,2,2,1,46446.757775,47702.387865,1,102,1,1,0.33,2,2,0,0,0,1,37,1,2,4,NA +65824,7,2,1,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,104228.438447,104101.346681,2,95,10,10,5,1,1,0,0,0,1,57,1,5,5,NA +65825,7,2,2,8,NA,4,4,1,9,108,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7619.934086,8032.046596,2,103,7,7,1.55,5,5,2,2,0,2,31,1,4,3,NA +65826,7,2,1,15,NA,1,1,1,16,192,NA,NA,2,2,4,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,29234.272259,28953.402615,3,92,13,13,NA,4,4,0,2,0,2,50,2,1,5,NA +65827,7,2,1,42,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,4,6,NA,1,2,2,1,2,2,1,2,2,2,37402.70356,37112.276585,2,102,15,14,5,5,1,0,3,0,1,42,2,4,6,NA +65828,7,2,2,22,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,118761.81384,123645.102237,3,91,7,4,1.74,2,1,0,0,0,1,23,NA,NA,6,NA +65829,7,2,2,3,NA,3,3,1,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,69494.442609,71683.798234,1,98,15,15,4.56,4,4,2,0,0,2,33,1,4,1,4 +65830,7,2,1,2,NA,1,1,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15546.999135,15568.006794,3,92,5,5,0.87,4,4,2,0,0,2,28,1,3,1,3 +65831,7,2,2,19,NA,4,4,1,19,232,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12531.903464,13043.632492,2,100,1,1,0.08,5,5,1,2,0,2,19,1,3,NA,NA +65832,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,1,2,1,2,2,1,1,2,NA,15852.523312,16085.826144,2,97,6,6,2.04,2,2,0,0,2,2,80,1,3,2,NA +65833,7,2,2,1,13,1,1,1,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11901.705423,12652.027961,2,102,4,4,0.65,5,5,1,0,0,2,58,2,1,4,NA +65834,7,2,1,6,NA,4,4,2,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10229.206765,10406.656985,1,96,15,15,4.52,6,6,0,4,0,1,46,1,4,1,4 +65835,7,2,1,18,NA,5,6,2,18,217,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6666.045669,7317.485505,3,90,77,77,NA,5,5,0,2,0,1,46,2,3,1,3 +65836,7,2,2,18,NA,5,7,2,18,216,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,58352.457563,59447.870488,1,99,77,77,NA,3,3,0,0,0,1,42,1,4,6,NA +65837,7,2,1,66,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,11212.469396,11300.176858,1,100,15,99,NA,5,2,0,0,3,2,50,1,4,6,NA +65838,7,2,1,63,NA,2,2,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,11061.174348,11020.288023,1,95,7,7,2.65,2,2,0,0,1,1,63,1,4,1,NA +65839,7,2,1,4,NA,1,1,1,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14318.290734,14771.527769,3,91,15,14,4.03,5,4,2,0,0,1,42,2,4,1,5 +65840,7,2,1,21,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,12395.607222,12971.055335,1,102,9,9,2.39,5,5,0,1,1,1,55,2,5,1,5 +65841,7,2,1,63,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,85242.614785,86042.075041,1,93,15,15,5,1,1,0,0,1,1,63,1,5,5,NA +65842,7,2,1,0,3,1,1,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9064.168162,9064.335231,3,92,14,14,4.71,3,3,1,0,0,1,29,1,5,1,5 +65843,7,2,1,9,NA,1,1,2,9,109,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,12477.812875,13297.681754,2,94,5,5,0.67,6,6,1,3,0,1,37,2,3,1,4 +65844,7,2,1,69,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,2,1,2,1,2,2,2,1,2,NA,12118.033999,12538.522644,2,91,3,3,0.66,2,2,0,0,1,1,69,2,5,1,1 +65845,7,2,1,13,NA,5,6,2,13,160,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7350.524832,7855.79983,2,90,8,8,2.59,3,3,0,1,0,2,41,2,4,1,4 +65846,7,2,2,61,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,4,1,NA,2,2,2,2,2,2,2,2,2,2,7278.790659,7582.574348,2,93,15,15,5,4,4,0,0,1,1,57,2,5,1,4 +65847,7,2,1,3,NA,2,2,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14004.176901,13773.448831,2,93,3,3,0.66,2,2,1,0,0,2,31,2,3,5,NA +65848,7,1,1,11,NA,5,7,NA,NA,NA,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7803.43213,0,1,93,15,15,5,3,3,0,2,0,2,40,2,5,4,NA +65849,7,2,1,74,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,17753.406962,18715.910687,1,101,4,4,1.29,2,2,0,0,2,2,74,1,3,1,3 +65850,7,2,1,58,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,25118.469449,25612.281104,3,92,5,5,1.56,2,2,0,0,1,1,58,1,3,1,2 +65851,7,2,2,20,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,50915.06085,56529.78018,3,92,4,4,0.65,4,4,2,0,0,2,20,1,3,5,NA +65852,7,2,2,29,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,40212.914634,41314.674899,2,92,15,10,5,2,1,0,0,0,2,29,1,5,6,NA +65853,7,2,2,14,NA,3,3,2,14,172,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,97212.131473,101229.4133,1,91,14,14,4.03,4,4,0,2,0,1,52,1,4,1,5 +65854,7,2,1,56,NA,5,7,2,NA,NA,2,NA,2,1,5,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11690.444016,12008.407525,3,90,9,9,3.14,3,3,0,0,0,1,56,2,3,1,3 +65855,7,2,1,8,NA,3,3,2,8,107,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23796.980721,24835.204126,1,95,5,5,1.03,4,4,0,2,0,1,33,1,3,1,3 +65856,7,2,1,63,NA,4,4,2,NA,NA,2,NA,2,1,4,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,8060.574062,8123.626407,1,93,7,7,1.61,4,4,0,0,1,2,27,2,3,5,NA +65857,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,97050.018399,96721.703127,1,99,14,14,5,1,1,0,0,1,2,64,1,5,3,NA +65858,7,2,2,5,NA,1,1,2,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13366.393396,14209.054666,2,94,7,7,1.23,6,6,2,1,0,1,33,2,1,6,NA +65859,7,2,2,0,10,3,3,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25770.951107,25070.954843,2,91,15,15,4.34,4,4,2,0,0,1,39,1,5,1,5 +65860,7,2,2,5,NA,4,4,1,5,63,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11142.946789,11989.935044,2,96,5,5,0.67,6,6,1,2,1,1,34,1,4,1,4 +65861,7,2,1,8,NA,2,2,2,9,109,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,2,2,2,1,2,2,1,10248.861635,10307.698588,1,96,15,15,5,4,4,0,2,0,1,36,2,3,1,4 +65862,7,2,1,54,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,27227.937106,27194.736507,1,101,3,3,1.1,1,1,0,0,0,1,54,1,4,3,NA +65863,7,2,2,33,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,15817.041096,15849.338176,2,92,15,8,4.59,2,1,0,0,0,2,25,2,5,5,NA +65864,7,2,1,0,1,3,3,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13392.309303,13898.571164,1,99,14,14,3.44,5,5,3,0,0,1,30,1,4,1,5 +65865,7,2,2,34,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,1,1,1,2,2,1,2,2,NA,NA,NA,NA,39561.667842,39822.148122,2,98,12,12,NA,3,3,0,1,0,2,34,1,4,1,3 +65866,7,2,2,6,NA,4,4,1,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9209.511624,9916.240201,1,100,5,5,0.85,5,5,0,2,0,2,54,1,2,2,NA +65867,7,2,2,60,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,37934.469637,38685.959862,1,94,6,6,1.98,2,2,0,0,1,2,60,1,4,3,NA +65868,7,2,1,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,NA,53069.077479,57205.244401,2,90,15,15,5,2,1,0,0,2,1,67,1,5,6,NA +65869,7,2,2,66,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,9518.80186,10252.529496,2,100,4,4,1.16,2,2,0,0,1,2,18,1,2,NA,NA +65870,7,2,2,1,12,5,6,1,NA,13,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,4488.195848,4764.622817,2,92,99,2,0.31,7,4,3,3,1,1,61,2,1,1,3 +65871,7,2,2,6,NA,1,1,2,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18721.371751,18903.185351,1,95,6,6,1.37,3,3,1,1,0,2,28,1,4,5,NA +65872,7,2,1,1,15,1,1,1,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10666.404974,11148.562293,2,96,8,8,1.33,7,7,2,1,1,1,62,2,1,1,1 +65873,7,2,2,36,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,3,1,2,1,2,2,1,2,2,NA,NA,NA,NA,18018.210636,21650.053885,2,91,14,14,3.47,4,4,1,1,0,2,36,2,3,1,5 +65874,7,2,2,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,127315.335607,127929.014806,1,91,10,10,2.77,5,5,0,3,0,1,43,1,5,1,5 +65875,7,2,2,4,NA,1,1,1,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15457.736897,16648.051651,2,98,6,6,0.63,7,7,2,2,1,1,60,1,3,1,2 +65876,7,2,1,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,21288.18311,20984.737161,1,102,1,1,0,5,5,0,3,0,2,41,1,4,1,4 +65877,7,2,1,76,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,1,1,2,2,1,2,2,NA,10123.333237,10321.62644,1,93,2,2,0.54,2,2,0,0,2,1,76,2,4,1,1 +65878,7,2,1,29,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,4,5,NA,1,2,2,1,2,1,NA,NA,NA,NA,9177.295801,9603.338468,2,92,7,7,1.89,3,3,0,0,1,1,36,2,3,5,NA +65879,7,2,2,32,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,45655.090694,45995.96139,3,92,15,15,5,3,3,1,0,0,1,34,1,5,1,5 +65880,7,2,2,26,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,53638.260635,61764.513393,2,96,4,4,1.47,1,1,0,0,0,2,26,1,4,5,NA +65881,7,2,1,1,13,4,4,2,NA,13,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6992.24593,7710.094516,2,97,2,2,0.27,3,3,1,0,0,2,21,1,3,6,NA +65882,7,2,2,11,NA,2,2,2,11,140,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10762.400563,11222.736057,2,90,12,12,NA,4,4,0,2,0,2,38,2,4,1,4 +65883,7,2,1,79,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,3,4,NA,1,2,2,1,2,2,1,2,2,NA,7199.330978,7568.243922,1,93,2,2,0.64,1,1,0,0,1,1,79,2,3,4,NA +65884,7,2,1,2,NA,5,6,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6672.795321,7206.439819,2,102,15,15,3.92,5,5,1,2,0,1,34,2,5,1,5 +65885,7,2,1,69,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,21941.544332,21717.573526,1,99,4,4,1.16,2,2,0,0,2,2,63,1,5,6,NA +65886,7,2,2,2,NA,5,7,1,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8692.478172,9353.203477,1,100,6,6,1.18,5,5,1,2,0,2,30,1,3,1,4 +65887,7,2,1,0,1,1,1,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,5875.721278,6205.225081,2,97,6,6,1.1,5,5,2,1,0,2,29,2,2,6,NA +65888,7,2,2,11,NA,4,4,1,11,134,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9139.784234,9759.758014,2,100,14,14,3.58,4,4,0,1,1,2,55,1,5,1,4 +65889,7,2,1,30,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,2,2,2,1,2,2,1,2,2,1,37080.526463,37488.446325,1,103,6,6,1.57,3,3,0,1,0,2,50,2,3,4,NA +65890,7,2,1,3,NA,2,2,1,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15915.595287,17052.411707,2,96,14,14,3.36,4,4,1,1,0,2,28,1,2,6,NA +65891,7,2,1,3,NA,4,4,2,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8961.738846,9235.465849,1,93,10,10,2.91,4,4,2,0,0,2,27,1,5,1,4 +65892,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,25840.959268,28861.347345,1,101,3,3,0.86,2,2,0,0,1,2,80,1,1,2,NA +65893,7,2,2,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,16058.142925,16013.018639,2,95,5,5,1.18,3,3,0,1,0,2,55,1,4,5,NA +65894,7,2,1,12,NA,4,4,1,12,150,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13653.432599,13787.136512,2,96,6,6,1.62,3,3,0,2,0,2,31,1,3,5,NA +65895,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,26922.241442,27209.395492,1,90,7,7,1.55,5,5,0,3,0,1,51,2,3,1,2 +65896,7,2,2,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,89807.047643,92868.804281,1,94,15,1,0.44,4,1,0,0,1,1,33,1,2,5,NA +65897,7,2,1,13,NA,1,1,1,14,168,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23389.620035,23146.524434,1,94,10,10,2.94,4,4,0,2,0,2,52,1,5,2,NA +65898,7,2,1,66,NA,4,4,2,NA,NA,2,NA,2,2,3,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,5950.975297,5997.525697,2,99,6,6,1.11,5,5,0,0,1,1,66,2,4,1,4 +65899,7,2,2,12,NA,4,4,2,12,147,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17426.123122,17678.558809,1,91,10,10,2.56,5,5,0,3,0,1,51,2,5,1,4 +65900,7,2,2,18,NA,3,3,1,18,218,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,76992.7514,81576.336255,1,100,15,15,4.07,5,5,0,2,0,2,41,1,5,1,4 +65901,7,2,1,22,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,11164.358128,11468.312676,2,93,1,1,0.09,1,1,0,0,0,1,22,1,4,5,NA +65902,7,2,2,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,1,1,2,2,1,2,2,1,2,2,1,18723.98095,21433.166124,2,95,7,7,1.41,5,5,2,0,0,2,53,1,3,3,NA +65903,7,2,1,2,NA,4,4,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4912.962876,5216.242891,2,90,6,6,1.03,6,6,3,1,0,1,45,2,2,1,2 +65904,7,2,2,4,NA,2,2,1,4,48,NA,NA,2,1,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13201.625134,13621.675873,2,92,15,15,5,4,4,1,1,0,2,47,1,5,1,5 +65905,7,2,1,54,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,1,1,NA,1,2,1,1,2,2,1,2,1,NA,15728.666463,15671.845555,2,91,15,15,4.63,7,7,1,2,0,1,36,2,4,1,3 +65906,7,2,2,7,NA,2,2,1,7,87,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15009.847173,16312.401192,2,93,15,15,4.34,4,4,0,2,0,1,33,2,5,1,5 +65907,7,1,2,47,NA,2,2,NA,NA,NA,2,NA,2,7,77,NA,3,1,NA,2,1,2,1,2,2,NA,NA,NA,NA,23968.560941,0,2,90,99,99,NA,4,4,0,2,0,1,39,NA,NA,4,NA +65908,7,2,1,17,NA,4,4,2,17,214,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18526.8521,18563.481203,1,97,6,6,1.41,3,3,0,1,0,2,51,1,4,5,NA +65909,7,2,1,35,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19091.246741,19659.303383,1,91,10,10,3.78,3,3,1,0,0,1,35,2,5,1,5 +65910,7,2,1,36,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,13601.994691,14414.229968,2,100,15,15,5,4,4,1,1,0,1,36,2,5,1,5 +65911,7,2,2,76,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,38833.86357,44548.098777,3,92,5,5,1.59,2,2,0,0,2,1,64,1,3,1,3 +65912,7,2,1,18,NA,5,6,1,18,220,2,NA,2,2,1,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9948.022697,10353.284725,2,101,99,99,NA,4,1,0,0,0,1,18,2,4,NA,NA +65913,7,2,2,19,NA,4,4,2,19,239,2,NA,2,1,4,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11478.437608,11446.094627,1,96,7,7,1.69,4,4,0,1,0,2,19,2,4,NA,NA +65914,7,2,2,79,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,12863.404053,17087.605763,2,90,2,2,0.87,1,1,0,0,1,2,79,1,2,2,NA +65915,7,2,1,37,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,3,1,NA,2,2,2,2,2,2,1,2,2,2,30626.581617,36447.669921,2,90,3,3,0.58,4,4,0,2,0,2,36,2,3,1,3 +65916,7,2,1,9,NA,4,4,2,9,117,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,9738.268689,9808.886868,2,99,9,9,1.78,6,6,1,1,0,1,46,1,3,6,NA +65917,7,2,2,6,NA,5,7,2,7,84,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18348.624469,20109.577944,2,90,77,77,NA,6,6,0,4,0,2,41,NA,NA,4,NA +65918,7,2,2,34,NA,5,6,1,NA,NA,2,NA,2,2,77,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,15792.287872,16329.218615,1,103,12,12,NA,2,2,0,0,0,1,34,2,5,1,5 +65919,7,2,2,73,NA,5,6,1,NA,NA,2,NA,2,2,5,NA,1,2,NA,1,2,1,1,2,2,1,2,1,NA,15825.056964,16318.16087,2,94,15,15,5,3,3,0,0,1,2,40,2,5,1,5 +65920,7,2,2,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,NA,NA,NA,NA,31335.13799,31552.004994,1,95,4,4,0.65,6,6,2,2,0,2,36,1,4,6,NA +65921,7,2,2,41,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,19711.72366,19172.260632,1,96,5,5,1.45,2,2,0,1,0,2,41,1,4,3,NA +65922,7,2,1,10,NA,4,4,2,10,121,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8861.237416,9017.822314,2,95,2,2,0.26,3,3,0,2,0,2,31,1,3,5,NA +65923,7,2,1,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,138075.879417,141933.339512,2,91,14,14,5,1,1,0,0,1,1,62,1,5,3,NA +65924,7,2,2,19,NA,4,4,1,19,238,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,15006.987722,14964.702287,1,98,2,1,0.21,4,1,0,0,0,2,19,1,4,NA,NA +65925,7,2,1,23,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,29738.952706,30589.306008,2,94,6,3,0.68,3,2,0,0,0,1,26,1,4,5,NA +65926,7,2,2,14,NA,4,4,1,14,170,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11791.755593,11820.79689,2,96,3,3,0.47,6,6,0,4,0,1,36,1,4,1,4 +65927,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,111378.575836,111001.787784,2,97,8,8,3.06,2,2,0,0,2,1,68,1,2,1,4 +65928,7,2,2,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,17884.885732,18357.680967,1,98,6,6,1.73,3,3,0,1,0,2,39,1,4,1,1 +65929,7,2,1,64,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,35869.019314,40296.63257,3,92,5,5,1.59,2,2,0,0,2,1,64,1,3,1,3 +65930,7,2,2,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,59902.67172,68981.485025,1,101,3,3,1.3,1,1,0,0,1,2,80,1,3,2,NA +65931,7,2,1,22,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,2,5,NA,1,2,2,1,2,2,1,2,2,NA,10141.381563,10817.596093,3,90,12,12,NA,4,4,0,0,1,1,62,2,4,3,NA +65932,7,2,1,17,NA,2,2,1,17,215,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18242.832494,18053.229058,1,102,77,77,NA,6,6,0,2,1,2,37,1,4,1,4 +65933,7,2,1,11,NA,3,3,1,11,143,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,54897.892683,57357.850008,2,98,15,15,5,3,3,0,1,0,1,56,1,5,1,5 +65934,7,2,1,3,NA,3,3,2,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,75551.990912,88526.885345,1,90,14,14,3.93,3,3,1,0,0,1,35,1,2,1,5 +65935,7,2,1,18,NA,4,4,2,18,223,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12530.944806,12773.594654,2,90,8,8,1.67,6,6,1,1,0,1,52,1,3,1,5 +65936,7,2,1,7,NA,3,3,2,7,88,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,64387.576177,71283.768751,1,95,8,8,2.7,3,3,0,1,2,1,69,1,5,1,3 +65937,7,2,2,61,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,3,4,NA,2,2,2,2,2,2,2,2,2,2,9716.805546,10855.679158,2,90,12,12,NA,3,3,0,0,2,2,61,2,3,4,NA +65938,7,2,1,0,2,1,1,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9064.168162,9064.335231,3,92,4,4,0.89,3,3,1,0,0,2,24,2,4,1,3 +65939,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,18097.801029,17207.598344,2,100,3,3,0.27,7,7,2,1,0,2,41,1,2,5,NA +65940,7,2,2,24,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,13851.686232,14498.558979,3,91,15,3,0.92,5,1,2,0,0,1,42,2,4,1,5 +65941,7,2,1,6,NA,2,2,1,6,74,NA,NA,2,1,3,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10212.603023,10439.995832,2,92,15,15,5,4,4,1,1,0,2,47,1,5,1,5 +65942,7,2,2,3,NA,1,1,1,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19235.084509,19847.108514,3,92,15,8,2.62,4,3,1,1,0,1,30,1,2,6,NA +65943,7,2,1,2,NA,4,4,1,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8009.966208,8254.622305,2,96,3,3,0.54,4,4,2,1,0,2,25,1,4,2,NA +65944,7,2,2,64,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,9716.805546,10308.451947,2,90,9,9,5,1,1,0,0,1,2,64,2,4,3,NA +65945,7,2,2,2,NA,1,1,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13464.808163,14038.51136,1,101,2,2,0.26,5,5,3,0,0,2,26,1,2,1,3 +65946,7,2,1,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,4,NA,1,2,2,1,2,2,1,2,2,1,15415.338508,15856.841729,1,93,4,4,1.65,1,1,0,0,0,1,34,1,5,4,NA +65947,7,2,2,3,NA,5,6,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,5485.572703,5823.427887,2,92,5,5,0.64,7,7,1,2,1,1,66,2,1,1,3 +65948,7,2,1,5,NA,4,4,1,5,68,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9016.053035,9388.597537,2,100,3,3,0.38,5,5,2,1,0,2,28,1,2,5,NA +65949,7,1,1,48,NA,2,2,NA,NA,NA,2,NA,2,2,6,NA,2,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,35393.002863,0,2,93,4,4,0.84,3,3,0,1,0,2,46,2,3,1,2 +65950,7,2,1,32,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,23471.353577,23675.625296,1,100,14,6,1.85,3,2,1,0,0,1,33,1,5,5,NA +65951,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,102085.27025,105869.935801,1,99,14,14,4.86,3,3,0,1,0,1,56,1,5,1,5 +65952,7,2,1,17,NA,2,2,1,17,212,2,NA,2,2,1,13,NA,NA,NA,2,2,2,1,2,2,2,2,2,2,24228.858782,24791.594558,2,102,6,6,1,6,6,1,3,0,1,35,2,3,1,3 +65953,7,2,2,24,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,2,5,1,2,2,2,2,2,2,1,2,2,1,36169.442288,36681.102865,2,96,5,5,0.68,6,6,0,3,2,1,60,2,1,1,1 +65954,7,2,2,66,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,10192.188896,10440.656902,1,99,12,12,NA,1,1,0,0,1,2,66,1,5,1,NA +65955,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,29016.444561,32518.702589,3,91,4,4,1.16,2,2,0,0,2,1,80,1,5,1,5 +65956,7,2,1,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,86986.68246,89821.123051,2,94,12,12,NA,5,5,1,1,0,1,37,1,4,1,3 +65957,7,2,2,47,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,30039.01139,30195.662279,2,95,8,8,3.44,2,2,0,0,0,2,24,1,4,5,NA +65958,7,2,2,13,NA,3,3,1,13,167,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,34778.638222,37920.357213,3,91,7,7,1.1,7,7,0,4,0,1,40,1,4,1,3 +65959,7,2,1,13,NA,1,1,1,13,159,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,1,2,1,1,2,2,1,20560.901695,20875.182272,2,96,3,3,0.46,5,5,1,2,0,1,37,1,1,1,2 +65960,7,2,2,0,4,4,4,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4137.127382,4386.966342,2,97,3,3,0.33,6,6,2,0,0,2,32,1,2,1,3 +65961,7,2,2,0,0,3,3,1,NA,0,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17773.146728,17119.196492,2,98,99,99,NA,7,7,1,1,1,1,19,1,3,NA,NA +65962,7,2,2,56,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,16966.723528,16502.38463,2,92,2,2,0.57,2,2,0,0,0,2,56,1,3,2,NA +65963,7,2,2,60,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,3,2,NA,1,2,1,NA,NA,NA,1,2,1,3,7575.470578,7867.856697,2,92,12,NA,NA,7,1,0,0,2,1,53,2,3,1,3 +65964,7,2,1,13,NA,4,4,2,13,164,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18526.8521,18563.481203,1,97,7,7,1.74,4,4,0,3,0,2,32,1,4,5,NA +65965,7,2,1,6,NA,2,2,1,6,77,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,8635.515602,8675.270774,2,93,4,4,0.56,5,5,0,2,0,1,37,NA,NA,1,1 +65966,7,2,1,19,NA,4,4,2,19,230,2,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16658.161391,16980.73087,2,91,2,2,0.26,4,4,0,1,0,1,20,1,3,5,NA +65967,7,2,2,36,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,35353.005268,35616.958386,2,94,8,8,2.01,4,4,1,1,0,1,44,2,4,1,4 +65968,7,2,2,8,NA,2,2,1,8,99,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12307.832776,12427.36095,2,93,14,14,3.25,4,4,0,2,0,2,46,2,5,1,4 +65969,7,2,1,14,NA,1,1,2,14,169,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20398.562455,20202.582308,2,94,14,14,4.03,4,4,0,2,0,2,33,2,2,1,NA +65970,7,2,1,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,20901.316439,20413.081322,2,91,14,14,3.42,5,5,1,0,0,2,28,NA,NA,1,NA +65971,7,2,1,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,80069.373466,86138.346669,1,98,6,6,1.31,3,3,1,0,0,1,30,1,5,1,5 +65972,7,2,2,1,17,1,1,1,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11512.764389,11667.597427,3,92,6,6,1.7,2,2,1,0,0,2,20,1,3,4,NA +65973,7,2,2,5,NA,4,4,2,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9287.93089,10379.213194,2,90,7,7,1.61,4,4,1,1,1,2,65,1,3,2,NA +65974,7,2,2,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,118611.064701,118209.809508,1,91,10,10,4.49,2,2,0,0,2,1,60,1,5,1,4 +65975,7,2,2,30,NA,3,3,2,NA,NA,2,NA,2,1,6,NA,5,4,2,2,2,2,2,2,2,1,2,2,1,19486.670926,20613.901312,2,90,5,5,1.19,3,3,1,0,1,2,60,2,1,4,NA +65976,7,2,1,13,NA,4,4,1,13,165,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13523.666124,13550.403519,2,98,3,3,0.88,2,2,0,1,0,2,38,1,4,5,NA +65977,7,2,1,3,NA,2,2,2,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11992.176902,12455.795985,1,96,10,10,2.59,5,5,1,0,0,1,32,2,4,1,2 +65978,7,2,1,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,145224.318312,145047.237897,1,98,6,6,2.86,1,1,0,0,0,1,57,1,4,5,NA +65979,7,2,2,2,NA,5,7,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6443.362832,6553.223138,1,98,14,14,3.9,4,4,2,0,0,1,39,1,5,1,4 +65980,7,2,1,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,13076.210481,14300.146605,2,97,5,5,1.19,3,3,0,1,0,2,41,1,3,1,2 +65981,7,2,1,8,NA,2,2,1,8,102,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,13742.678011,2,98,7,7,2.16,3,3,0,1,0,2,51,1,1,1,2 +65982,7,2,2,4,NA,3,3,2,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,78165.891242,86270.974005,1,101,6,6,1.28,4,4,2,0,0,1,44,1,4,1,4 +65983,7,2,1,9,NA,4,4,2,9,112,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,8017.552697,8398.795399,1,99,4,4,0.41,7,7,2,4,0,2,43,1,4,4,NA +65984,7,2,2,16,NA,5,6,2,16,196,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11225.761275,11659.847432,2,91,8,8,2.34,4,4,0,2,0,1,56,2,5,1,5 +65985,7,2,2,7,NA,5,6,1,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5139.061504,5511.809249,2,103,77,77,NA,5,5,0,2,0,2,39,2,5,1,5 +65986,7,2,1,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,82864.936499,86441.088971,1,99,15,15,5,4,4,0,2,0,2,44,1,5,1,5 +65987,7,2,2,29,NA,5,7,1,NA,NA,2,NA,2,1,6,NA,5,5,2,1,2,2,1,2,2,NA,NA,NA,NA,47970.616628,49304.059008,1,102,10,10,3.22,4,4,0,0,2,2,29,2,5,5,NA +65988,7,2,1,20,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,21763.209029,22385.504537,2,92,4,1,0.23,4,1,0,0,0,1,21,1,4,5,NA +65989,7,2,1,1,18,2,2,1,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8326.330571,8424.765655,2,93,3,3,0.37,5,5,3,0,0,1,28,2,1,6,NA +65990,7,2,1,53,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,2,1,NA,1,2,1,1,2,2,1,2,1,NA,17232.67865,17489.495288,1,100,99,99,NA,6,6,0,1,0,1,53,2,2,1,3 +65991,7,2,2,26,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,118761.81384,121363.708734,3,91,15,6,2.75,3,1,0,0,0,2,26,1,4,5,NA +65992,7,2,2,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11699.431733,12601.246533,1,96,14,14,5,2,2,0,0,2,2,61,1,3,1,3 +65993,7,2,2,65,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,149765.47604,152732.36321,1,92,8,8,2.17,4,4,0,1,2,2,80,1,3,2,NA +65994,7,2,1,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,7903.072331,7964.892648,2,99,NA,77,NA,7,7,1,0,1,2,51,1,2,1,3 +65995,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,21756.194073,0,1,93,2,2,0.59,1,1,0,0,1,2,80,1,3,2,NA +65996,7,2,2,9,NA,3,3,1,9,109,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20242.832328,19974.015949,2,93,6,6,2.05,2,2,0,1,0,2,30,1,4,3,NA +65997,7,2,2,17,NA,4,4,1,17,209,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13620.028009,14467.639304,2,93,9,9,2.07,5,5,0,1,0,1,55,NA,NA,5,NA +65998,7,2,2,67,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,7869.59899,8220.968168,2,100,3,3,1.1,1,1,0,0,1,2,67,1,4,5,NA +65999,7,2,1,69,NA,2,2,1,NA,NA,2,NA,2,1,9,NA,4,3,NA,2,2,2,1,2,2,1,2,2,2,9404.30514,9554.950099,2,93,6,6,1.72,2,2,0,0,2,1,69,2,4,3,NA +66000,7,2,2,54,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,24287.448154,24414.10513,2,92,15,10,5,2,1,0,0,1,2,54,2,5,3,NA +66001,7,2,1,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,100988.137931,99957.29004,1,99,15,15,5,2,2,0,0,2,1,62,1,5,1,4 +66002,7,2,1,11,NA,1,1,1,11,143,NA,NA,2,7,77,5,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,11367.678664,11216.859778,2,96,77,77,NA,7,7,3,2,0,2,33,2,2,6,NA +66003,7,2,1,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,26556.735732,2,101,4,2,0.64,2,1,0,0,0,1,24,1,4,5,NA +66004,7,2,1,2,NA,1,1,1,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13305.770449,13463.073191,3,92,6,6,1.06,5,5,2,0,0,2,54,2,1,77,NA +66005,7,2,2,18,NA,2,2,2,18,219,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14437.97544,15197.369043,2,90,6,6,1.21,4,4,0,0,0,2,59,2,1,6,NA +66006,7,2,2,18,NA,3,3,2,18,218,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,NA,NA,NA,1,2,2,1,36586.371708,44099.282264,1,101,1,1,0.08,6,6,0,1,0,1,51,1,2,5,NA +66007,7,2,2,9,NA,3,3,2,9,115,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24070.467912,23750.822126,1,95,7,7,1.17,6,6,1,3,0,2,44,1,4,1,NA +66008,7,2,2,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,154825.466557,160454.355913,1,91,15,14,5,3,2,0,0,2,2,73,1,4,3,NA +66009,7,2,2,0,6,4,4,2,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4581.358327,5045.31579,2,97,3,3,0.66,3,3,2,0,0,2,19,1,3,NA,NA +66010,7,2,2,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,27934.372045,28826.461363,1,101,6,6,1.17,4,4,0,1,0,1,41,1,3,6,NA +66011,7,2,2,12,NA,4,4,2,13,156,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10848.628906,11198.221038,1,99,10,10,2.07,7,7,2,3,1,2,35,1,5,4,NA +66012,7,2,1,2,NA,1,1,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11160.282155,11175.362327,2,97,8,8,2.01,4,4,2,0,0,2,24,1,4,1,1 +66013,7,2,1,23,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,35338.972549,35820.670014,2,90,2,2,0.25,5,5,0,1,0,2,41,2,4,1,NA +66014,7,2,2,5,NA,4,4,2,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10389.292229,10964.092156,2,95,1,1,0.09,5,5,3,1,0,2,31,1,2,1,NA +66015,7,2,1,64,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,7926.298973,8333.304577,2,96,8,8,3.06,2,2,0,0,1,1,64,2,5,1,5 +66016,7,2,2,4,NA,5,6,2,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7349.373527,7474.681451,2,91,7,7,1.56,4,4,1,1,0,2,37,2,5,1,5 +66017,7,2,2,11,NA,3,3,1,11,133,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,48532.852397,48387.04619,1,98,15,15,5,5,5,0,3,0,2,41,1,5,6,NA +66018,7,2,2,4,NA,4,4,2,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10788.880391,11608.95565,2,95,6,6,0.97,6,6,2,2,0,1,37,1,3,1,4 +66019,7,2,1,12,NA,4,4,1,12,155,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16089.133406,15900.017206,2,102,3,3,0.76,3,3,0,1,1,2,66,1,2,3,NA +66020,7,2,2,9,NA,1,1,2,9,110,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,NA,15225.935813,15373.8033,2,94,7,7,1.79,4,4,1,1,0,2,32,1,4,1,4 +66021,7,2,1,33,NA,5,7,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,25461.575888,25330.616054,1,93,15,15,5,2,2,0,0,0,2,34,1,5,1,5 +66022,7,2,2,35,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,28958.579549,28751.652767,1,96,12,12,NA,5,5,1,2,0,2,35,1,5,1,4 +66023,7,2,1,2,NA,5,6,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10123.286306,10611.741928,1,100,10,10,3.04,4,4,2,0,0,1,30,2,5,1,5 +66024,7,2,2,11,NA,1,1,1,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15841.451259,16252.614023,3,92,15,15,3.15,7,7,0,4,0,2,35,2,3,3,NA +66025,7,2,1,8,NA,4,4,2,8,105,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9468.438225,10001.401449,2,99,6,6,1.57,3,3,0,2,0,2,31,1,3,77,NA +66026,7,2,1,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,65748.417123,66365.048213,2,99,15,15,5,1,1,0,0,1,1,65,1,5,3,NA +66027,7,2,2,40,NA,1,1,2,NA,NA,2,NA,2,1,7,NA,5,1,3,1,2,2,1,2,2,1,2,2,1,31235.666551,32148.463094,2,94,6,6,2.94,1,1,0,0,0,2,40,2,5,1,NA +66028,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,96499.801072,98371.919459,1,92,14,14,3.93,3,3,1,0,0,1,20,1,4,1,4 +66029,7,2,1,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,108410.783716,112153.23206,1,91,8,8,2.17,4,4,0,0,0,1,59,1,4,1,5 +66030,7,2,1,13,NA,1,1,1,13,165,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,29234.272259,29395.83763,3,92,10,1,0,5,1,2,1,1,2,68,1,3,1,1 +66031,7,2,2,56,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,19183.115011,19284.554221,1,92,4,4,1.75,1,1,0,0,0,2,56,1,5,5,NA +66032,7,2,1,36,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,30626.581617,32055.135325,2,90,14,14,3.45,4,4,1,1,0,2,34,2,5,6,NA +66033,7,2,2,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,64581.191728,67036.946354,2,94,99,99,NA,2,2,0,0,0,2,37,1,2,1,4 +66034,7,2,1,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,26847.643051,29218.940922,1,98,7,7,1.03,7,7,0,4,0,2,20,1,3,5,NA +66035,7,2,1,0,5,1,1,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4461.618312,4741.503802,2,103,10,10,1.63,7,7,1,4,0,1,31,NA,NA,1,4 +66036,7,2,2,70,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,62212.598767,64340.261278,1,94,9,9,3.97,2,2,0,0,2,1,74,1,4,1,4 +66037,7,2,2,14,NA,4,4,2,14,169,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11711.384457,12440.215685,1,98,10,10,3.77,3,3,0,1,0,1,48,NA,NA,1,4 +66038,7,2,2,2,NA,1,1,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,9955.153132,10271.907274,2,94,77,77,NA,4,4,2,0,0,1,26,2,2,1,4 +66039,7,2,1,8,NA,3,3,2,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15653.970322,16628.805574,1,91,6,6,1.07,6,6,3,1,0,2,27,1,4,6,NA +66040,7,2,2,24,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,NA,NA,NA,1,2,2,1,40149.982555,41250.018596,2,103,6,6,1.98,2,2,0,0,0,1,24,1,2,6,NA +66041,7,2,1,58,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,31872.125984,32498.709833,2,96,8,8,3.67,2,2,0,0,0,1,58,1,3,3,NA +66042,7,2,1,15,NA,2,2,2,15,186,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18909.886675,20785.327762,2,90,8,8,1.72,5,5,1,2,0,1,20,2,1,1,2 +66043,7,2,2,64,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,166028.936087,173980.461474,2,101,12,12,NA,1,1,0,0,1,2,64,1,3,1,NA +66044,7,2,1,14,NA,5,6,1,15,180,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12013.02156,13498.577364,3,92,12,12,NA,5,5,0,2,0,1,47,1,3,1,3 +66045,7,2,1,55,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,22506.522055,23661.545768,1,98,4,4,0.75,4,4,0,1,0,2,48,1,2,1,3 +66046,7,2,2,0,0,1,1,1,NA,0,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9767.083234,10416.172562,3,92,8,8,2,4,4,2,0,0,1,30,1,4,1,4 +66047,7,2,2,0,1,4,4,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3594.351671,3811.412689,2,99,6,6,1.11,5,5,1,2,0,2,41,1,2,5,NA +66048,7,2,2,6,NA,5,6,1,6,72,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10387.513218,11140.943027,1,92,77,77,NA,4,4,1,1,0,2,40,2,4,1,4 +66049,7,2,1,73,NA,2,2,2,NA,NA,2,NA,2,2,8,NA,5,1,NA,2,2,2,2,2,2,2,2,2,NA,12343.565567,12789.649007,3,90,10,10,3.04,4,4,0,0,2,2,80,2,1,3,NA +66050,7,2,2,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,10072.885959,10423.387676,2,100,2,2,0.78,1,1,0,0,1,2,80,1,4,2,NA +66051,7,2,1,40,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,30451.148148,30372.925419,2,101,4,4,0.73,5,5,1,2,0,1,40,1,5,1,5 +66052,7,2,2,43,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,138322.767578,145546.941919,1,101,7,7,2.31,2,2,0,1,0,2,43,1,4,3,NA +66053,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,133492.667054,133041.068157,1,95,14,14,5,2,2,0,0,2,2,65,1,4,1,4 +66054,7,2,2,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,95214.22557,100722.009256,1,97,15,15,5,4,4,1,1,0,2,33,1,5,1,3 +66055,7,2,1,4,NA,4,4,1,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7311.663111,7763.016267,2,93,4,4,0.56,5,5,2,1,0,1,27,1,2,6,NA +66056,7,2,2,57,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,23325.73926,22943.576186,1,101,10,10,5,1,1,0,0,0,2,57,1,3,3,NA +66057,7,2,2,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,24004.078396,24148.434886,2,99,2,2,0.81,1,1,0,0,0,2,48,1,2,5,NA +66058,7,2,2,74,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,64463.340883,65151.773075,1,91,8,8,3.4,2,2,0,0,2,2,74,1,4,1,2 +66059,7,2,1,60,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,11019.434708,11234.197915,3,91,15,15,4.47,4,4,0,1,2,2,79,1,4,3,NA +66060,7,2,2,8,NA,2,2,2,8,102,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15148.721588,15796.67129,2,91,8,1,0,3,1,0,2,0,2,33,2,4,5,NA +66061,7,2,1,34,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,6,NA,1,2,2,1,2,2,1,2,2,1,22295.761589,23482.073935,3,92,4,4,1.34,1,1,0,0,0,1,34,1,1,6,NA +66062,7,2,1,7,NA,3,3,2,8,96,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23729.905536,29042.69522,1,101,3,3,0.61,4,4,1,2,0,1,38,1,2,4,NA +66063,7,2,1,0,0,1,1,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4944.997189,5222.307037,2,103,13,13,NA,5,5,2,1,0,1,32,2,2,1,2 +66064,7,2,2,3,NA,1,1,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,21465.084926,22148.063325,1,92,4,4,0.74,4,4,1,1,0,1,42,2,3,1,4 +66065,7,2,2,2,NA,5,6,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8173.816615,8894.95474,1,97,14,14,4.5,3,3,1,0,0,1,30,1,5,1,5 +66066,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,34718.816885,38024.444986,1,91,15,15,5,3,3,0,0,2,1,60,1,5,1,3 +66067,7,2,2,7,NA,4,4,2,7,95,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10994.192555,11605.838362,2,97,3,3,0.46,5,5,0,3,0,1,40,1,2,1,3 +66068,7,2,1,76,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,6945.351825,7467.825932,1,103,5,5,0.65,6,6,0,0,1,2,26,2,4,5,NA +66069,7,1,2,55,NA,2,2,NA,NA,NA,2,NA,2,1,7,NA,2,4,NA,2,2,2,2,2,2,NA,NA,NA,NA,24902.414229,0,1,90,77,77,NA,2,2,0,0,0,2,55,2,2,4,NA +66070,7,2,2,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,12174.606153,16326.594828,1,90,7,7,1.3,5,5,1,2,1,2,62,1,2,2,NA +66071,7,2,2,65,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11862.436765,13289.857171,1,90,7,7,3.85,1,1,0,0,1,2,65,1,3,1,NA +66072,7,1,1,77,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,NA,NA,NA,NA,NA,NA,NA,38226.070503,0,1,99,NA,NA,NA,2,2,0,0,2,1,77,1,3,1,NA +66073,7,2,1,80,NA,1,1,1,NA,NA,2,NA,2,7,77,NA,1,2,NA,2,2,2,1,2,2,2,2,2,NA,8388.502332,8563.888023,2,92,12,12,NA,7,7,0,1,2,2,64,2,1,2,NA +66074,7,1,1,42,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,18790.641284,0,2,100,15,15,5,4,3,0,2,0,1,42,1,3,5,NA +66075,7,2,1,69,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,3,12579.986433,13271.133625,1,92,7,7,2.1,3,3,0,0,2,1,37,2,5,5,NA +66076,7,2,1,17,NA,2,2,1,17,213,2,NA,2,2,3,11,NA,NA,NA,2,2,2,1,2,2,2,2,2,2,22721.243258,23068.545411,2,102,5,5,0.59,7,7,1,3,0,1,37,2,1,6,NA +66077,7,2,1,63,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,22234.74698,22519.946154,2,94,15,15,5,2,2,0,0,1,1,63,1,5,1,4 +66078,7,2,1,3,NA,3,3,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,32718.806351,38337.758985,1,94,6,6,1.21,4,4,2,0,0,1,27,1,2,1,2 +66079,7,2,1,54,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,26135.885159,26946.665552,2,101,4,4,1.65,1,1,0,0,0,1,54,1,3,5,NA +66080,7,2,1,8,NA,4,4,2,8,106,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10651.061092,11315.742189,1,90,6,6,1.57,3,3,0,1,0,2,36,1,3,5,NA +66081,7,1,1,8,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14610.93355,0,2,102,14,14,3.8,4,4,0,2,0,1,47,1,4,1,4 +66082,7,2,2,9,NA,4,4,1,9,112,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,10332.067017,12551.283611,1,100,9,9,2.6,4,4,0,1,0,1,45,1,3,1,3 +66083,7,2,1,21,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,1,2,2,1,2,2,1,36038.266622,37278.43838,2,101,3,3,0.3,7,7,1,2,0,2,50,1,2,4,NA +66084,7,2,2,76,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,73204.401173,80916.420945,2,91,7,7,3.13,1,1,0,0,1,2,76,1,3,3,NA +66085,7,2,2,5,NA,5,7,2,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27969.562936,30869.748923,3,91,7,7,1.57,4,4,2,0,0,2,29,2,3,1,3 +66086,7,1,2,80,NA,2,2,NA,NA,NA,2,NA,2,1,4,NA,1,2,NA,2,1,2,1,2,2,NA,NA,NA,NA,10430.111347,0,2,90,7,7,0.89,7,7,1,3,3,1,60,2,3,1,3 +66087,7,2,2,18,NA,2,2,2,18,223,2,NA,2,1,5,12,NA,NA,NA,1,2,2,2,2,1,1,2,2,1,19458.108146,21037.021471,2,91,4,4,0.94,3,3,0,1,0,2,50,2,1,4,NA +66088,7,2,1,2,NA,4,4,2,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5662.231921,5835.178913,1,99,5,5,1.13,3,3,1,1,0,2,30,1,1,4,NA +66089,7,2,1,34,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17802.31438,18635.96967,1,102,3,3,0.92,1,1,0,0,0,1,34,1,4,5,NA +66090,7,2,2,25,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,1,1,2,2,1,2,2,NA,NA,NA,NA,50915.06085,51518.363256,3,92,7,7,2.1,3,3,1,1,0,2,25,1,4,5,NA +66091,7,2,1,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,32980.717958,33092.243754,1,95,3,3,0.76,3,3,0,0,1,1,41,1,2,1,4 +66092,7,2,2,68,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,17243.546687,17909.086027,1,92,9,9,3.64,2,2,0,0,2,1,77,1,5,1,4 +66093,7,2,2,41,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,29010.447112,32633.46257,1,100,5,5,2.15,1,1,0,0,0,2,41,1,4,5,NA +66094,7,2,1,25,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,5,5,NA,1,2,2,1,2,2,1,2,2,3,14385.653726,15564.966804,2,101,8,6,2.8,2,1,0,0,0,1,24,2,5,5,NA +66095,7,2,2,72,NA,3,3,2,NA,NA,2,NA,2,1,9,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,64368.917314,67563.498341,1,90,8,8,3.3,2,2,0,0,2,1,77,2,2,1,5 +66096,7,2,2,26,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,34906.069211,35828.732314,1,103,1,1,0.03,3,3,0,0,0,1,50,1,2,3,NA +66097,7,2,2,11,NA,5,7,2,11,134,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16195.492817,16951.263709,3,91,5,5,0.65,7,7,0,4,0,2,39,1,3,4,NA +66098,7,2,1,29,NA,4,4,2,NA,NA,1,1,2,1,3,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,15920.644312,16085.139317,1,96,7,7,2.23,3,3,1,0,0,1,29,2,5,1,5 +66099,7,2,1,15,NA,5,6,1,16,192,NA,NA,2,2,2,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10346.302718,11006.15627,2,91,15,15,4.63,7,7,1,2,0,1,36,2,4,1,3 +66100,7,2,2,16,NA,4,4,2,16,203,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11711.384457,11743.959438,2,95,4,4,0.84,3,3,0,1,0,2,40,1,3,3,NA +66101,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,48943.054903,67007.111083,2,98,1,1,0.23,2,2,1,0,0,2,20,1,3,6,NA +66102,7,2,2,2,NA,4,4,1,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11846.491502,12833.518532,2,101,3,3,0.46,5,5,1,2,0,1,34,1,2,6,NA +66103,7,2,1,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,101419.325386,103801.228657,1,100,6,6,1.31,3,3,0,0,2,1,65,1,5,1,5 +66104,7,2,2,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,32467.087681,32793.115,2,101,3,3,0.3,7,7,1,2,0,2,50,1,2,4,NA +66105,7,2,1,6,NA,3,3,2,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,68423.584566,71489.623895,1,98,15,15,3.7,5,5,2,1,0,1,34,1,5,1,5 +66106,7,2,2,2,NA,2,2,2,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7660.112391,8143.030995,2,99,13,13,NA,6,6,2,1,0,2,31,1,4,6,NA +66107,7,2,1,16,NA,4,4,2,16,197,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13416.172328,13756.125082,1,96,14,14,2.19,7,7,0,2,0,1,39,1,2,1,3 +66108,7,2,1,53,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,1,1,NA,1,2,1,1,2,2,1,2,1,3,20111.196953,20296.097622,1,92,4,4,0.76,4,4,0,0,0,2,53,2,1,1,1 +66109,7,2,1,32,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,15561.574634,17154.493542,1,98,6,6,0.81,6,6,0,4,0,2,34,NA,NA,1,2 +66110,7,2,1,63,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105578.507837,104500.803223,1,99,15,15,5,2,2,0,0,2,1,63,1,5,1,NA +66111,7,2,2,26,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,26311.803433,25091.046888,1,97,15,15,5,6,6,0,1,1,2,53,1,4,1,NA +66112,7,1,2,33,NA,4,4,NA,NA,NA,2,NA,2,2,2,NA,3,5,3,1,2,1,1,2,1,NA,NA,NA,NA,29610.008111,0,2,93,1,1,0.2,2,2,1,0,0,2,33,2,3,5,NA +66113,7,2,1,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,105141.812429,109679.354704,1,98,15,15,5,4,4,1,1,0,1,40,1,4,1,5 +66114,7,2,2,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,21306.824647,21522.187676,2,98,3,3,0.38,5,5,0,4,0,2,39,1,4,5,NA +66115,7,2,2,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,196995.351093,202246.928022,1,91,10,10,3.51,3,3,0,0,1,1,21,1,4,5,NA +66116,7,2,1,47,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,28542.421068,28714.346279,2,101,5,5,1.36,2,2,0,0,1,1,79,1,4,2,NA +66117,7,2,2,16,NA,3,3,2,16,197,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,65698.166619,66553.178083,2,95,15,15,5,2,2,0,1,0,2,52,1,5,3,NA +66118,7,2,1,10,NA,1,1,1,10,126,NA,NA,2,2,2,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13581.60325,14182.420159,1,100,99,99,NA,7,7,2,3,0,2,35,2,1,1,NA +66119,7,2,1,14,NA,3,3,1,14,175,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,59545.101745,58762.542429,1,102,8,8,1.6,7,7,0,4,0,2,39,1,4,1,4 +66120,7,2,2,3,NA,4,4,2,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8451.169331,8918.740308,2,99,13,13,NA,5,5,2,0,0,2,21,1,3,5,NA +66121,7,2,1,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,146181.198007,146233.940601,2,91,15,15,5,3,2,0,0,1,1,47,1,5,5,NA +66122,7,2,1,2,NA,3,3,1,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,47507.757497,52569.646909,1,101,8,8,1.85,5,5,3,0,0,2,31,1,2,1,2 +66123,7,2,2,29,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,35250.588028,36394.049804,2,95,4,4,0.79,3,3,1,0,0,1,50,1,4,6,NA +66124,7,2,2,37,NA,1,1,1,NA,NA,2,NA,2,1,5,NA,1,1,2,2,2,2,1,2,2,1,2,2,2,38218.668882,37878.487888,2,102,7,7,1.04,7,7,1,2,0,2,37,2,1,1,2 +66125,7,2,2,46,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,37411.196266,38309.127338,1,102,14,14,4.59,3,3,0,0,1,2,46,1,4,1,1 +66126,7,2,2,67,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,12751.545122,13320.887973,2,101,9,9,4.08,2,2,0,0,2,2,67,1,5,1,3 +66127,7,2,2,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,11696.973403,11982.125462,2,97,7,7,2.65,2,2,0,1,1,2,61,1,4,3,NA +66128,7,2,2,1,14,2,2,2,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8230.248767,8808.52375,2,100,15,15,4.26,5,5,1,1,0,1,54,1,4,1,5 +66129,7,2,2,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,97050.018399,96721.703127,1,99,12,12,NA,1,1,0,0,1,2,60,1,5,3,NA +66130,7,2,2,0,1,3,3,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24719.075551,24047.650564,2,91,10,10,3.4,3,3,1,0,0,2,32,1,5,1,5 +66131,7,2,2,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,77778.949308,78018.093799,1,101,6,6,0.97,7,7,2,1,0,1,43,1,2,1,NA +66132,7,2,1,26,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,29738.952706,30589.306008,2,94,6,3,0.68,3,2,0,0,0,1,26,1,4,5,NA +66133,7,2,1,40,NA,2,2,1,NA,NA,1,1,2,1,7,NA,3,6,NA,1,2,2,2,2,2,1,2,2,1,39096.402803,46193.238956,2,91,14,7,3.94,3,1,0,1,0,1,40,2,3,6,NA +66134,7,2,1,4,NA,1,1,2,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14505.510202,15541.607306,2,94,8,8,2.7,3,3,1,0,0,1,27,1,3,1,4 +66135,7,2,1,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,NA,NA,NA,1,2,2,1,12861.670836,12561.234288,2,99,5,5,0.65,6,6,2,1,0,2,53,1,4,3,NA +66136,7,2,1,27,NA,3,3,2,NA,NA,2,NA,2,2,2,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,76458.986322,98479.173389,3,90,9,9,3.64,2,2,0,0,0,1,27,2,4,1,5 +66137,7,2,1,20,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,12135.693667,13397.909622,1,90,15,15,5,5,5,0,2,0,1,47,2,5,1,5 +66138,7,2,2,36,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,16579.134475,17201.146893,3,91,15,15,5,4,4,1,1,0,1,39,2,5,1,5 +66139,7,2,1,47,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,21571.318341,25887.074025,1,102,7,7,1.8,5,4,1,0,2,1,47,1,3,5,NA +66140,7,2,2,63,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,11879.290971,12794.969668,2,96,77,77,NA,1,1,0,0,1,2,63,1,3,5,NA +66141,7,2,1,71,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,1,2,1,2,2,1,2,2,NA,69210.206708,71835.969778,1,98,14,14,4.96,2,2,0,0,2,1,71,1,5,1,5 +66142,7,2,1,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,27271.751091,27454.884133,2,101,6,2,0.46,3,1,0,0,0,1,21,1,4,5,NA +66143,7,2,2,2,24,2,2,1,2,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9611.087345,9740.345081,2,93,9,9,2.6,4,4,2,0,0,2,20,1,5,1,3 +66144,7,2,2,4,NA,1,1,1,5,60,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19235.084509,21236.049482,3,92,7,7,1.65,4,4,1,1,0,1,27,1,3,1,3 +66145,7,2,2,5,NA,4,4,1,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13130.790087,14128.876607,2,102,5,3,0.63,5,4,2,1,0,1,24,1,4,6,NA +66146,7,2,2,6,NA,3,3,2,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21910.300386,22800.721264,1,99,3,3,0.88,2,2,0,1,0,2,48,1,1,3,NA +66147,7,2,2,57,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,5,2,NA,1,2,1,1,2,1,1,2,2,NA,13697.309651,14098.608838,1,103,2,2,0.53,2,2,0,0,0,1,29,2,4,5,NA +66148,7,2,1,0,6,4,4,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7434.326058,7971.593122,1,100,8,8,1.7,5,5,2,0,0,2,26,1,3,5,NA +66149,7,2,1,18,NA,4,4,2,18,227,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,18260.901254,2,101,14,14,4.03,4,4,0,1,0,2,40,1,5,1,5 +66150,7,2,2,72,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,1,2,1,2,2,1,2,2,NA,17029.680467,17622.254657,1,101,6,6,1.43,4,4,0,1,2,2,72,1,2,1,NA +66151,7,2,2,74,NA,4,4,2,NA,NA,2,NA,2,1,9,NA,2,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,16291.303936,17381.577792,1,93,2,2,0.61,2,2,0,0,1,2,45,2,3,5,NA +66152,7,2,2,58,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,13093.362366,12735.028157,1,99,7,7,1.06,7,7,3,1,0,1,38,1,4,6,NA +66153,7,2,1,78,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,15176.622228,16064.120023,1,101,4,4,1.11,2,2,0,0,1,1,78,1,4,2,NA +66154,7,2,1,74,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,68074.313029,74840.450144,1,101,3,3,1.16,1,1,0,0,1,1,74,1,2,2,NA +66155,7,2,2,19,NA,4,4,1,19,230,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11078.202838,10933.134393,1,102,12,12,NA,7,7,3,2,0,2,52,1,4,5,NA +66156,7,2,2,15,NA,4,4,2,15,189,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11711.384457,11558.024533,2,95,6,6,0.86,6,6,0,4,0,2,32,1,4,6,NA +66157,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,47098.572584,50542.386793,1,95,7,7,2.72,2,2,0,0,2,1,80,1,3,1,3 +66158,7,2,1,11,NA,5,6,1,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9175.735601,9896.833095,3,91,15,15,5,4,4,1,1,0,1,40,2,5,1,5 +66159,7,2,2,6,NA,5,6,1,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9007.62445,9504.796896,2,102,8,8,1.72,5,5,0,2,1,1,63,2,5,1,5 +66160,7,2,2,36,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,3,1,2,2,1,2,2,NA,NA,NA,NA,27627.442402,28606.852107,1,99,14,14,5,2,2,0,0,1,2,64,1,3,3,NA +66161,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,14979.892428,16276.055879,1,92,3,3,0.98,2,1,0,0,1,1,53,NA,NA,5,NA +66162,7,2,2,60,NA,2,2,1,NA,NA,2,NA,2,1,5,NA,5,1,NA,2,2,2,2,2,2,2,2,2,2,10614.141896,11057.12801,2,93,15,15,5,2,2,0,0,2,1,60,2,5,1,5 +66163,7,2,1,12,NA,1,1,1,12,147,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,18242.832494,18521.680571,1,102,6,6,1.03,6,6,0,4,0,1,34,2,2,1,1 +66164,7,2,2,6,NA,4,4,1,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8292.876947,8466.609309,1,102,1,1,0,5,5,0,3,0,2,41,1,4,1,4 +66165,7,2,1,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,26847.643051,28699.104328,1,98,5,5,1.05,3,3,1,0,0,1,24,1,3,1,3 +66166,7,2,2,2,NA,4,4,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7348.24433,8211.623817,2,101,2,2,0.22,4,4,1,1,0,2,41,1,2,4,NA +66167,7,2,2,23,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,11717.478547,12264.683949,1,95,5,5,0.73,6,6,1,0,1,1,62,2,3,1,NA +66168,7,2,2,8,NA,3,3,1,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,53931.048037,53769.024148,2,96,7,7,2.38,2,2,0,1,0,2,30,1,4,3,NA +66169,7,2,1,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105295.506815,106646.104223,2,94,15,15,5,3,3,0,1,1,1,63,1,5,1,3 +66170,7,1,1,2,24,5,7,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25107.382643,0,1,101,13,13,NA,3,3,1,0,0,2,20,1,2,6,NA +66171,7,2,2,6,NA,4,4,2,6,80,NA,NA,2,1,3,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9235.947079,9578.084112,1,96,9,9,2.78,4,4,0,2,0,1,54,2,5,4,NA +66172,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,53634.754806,54437.113731,1,98,3,3,0.73,3,3,0,0,0,1,52,1,4,1,3 +66173,7,2,2,32,NA,1,1,1,NA,NA,2,NA,2,1,99,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,39561.667842,40925.159766,2,98,2,2,0.27,4,4,2,1,0,2,32,2,2,5,NA +66174,7,2,2,4,NA,4,4,1,5,60,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10437.988787,11231.392369,2,100,5,5,0.88,5,5,2,1,0,2,30,1,4,6,NA +66175,7,2,1,13,NA,3,3,2,13,162,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,30943.024697,32205.010072,1,98,3,3,0.5,5,5,0,3,0,2,56,1,3,3,NA +66176,7,2,1,0,5,3,3,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,30452.785006,32601.018115,3,92,15,15,5,4,4,2,0,0,1,38,1,5,1,5 +66177,7,2,1,44,NA,4,4,1,NA,NA,2,NA,2,2,1,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,22480.961662,23102.216297,1,98,6,6,1.57,3,3,0,0,2,1,66,2,2,1,4 +66178,7,2,1,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,10420.275705,10827.833191,2,96,14,14,3.06,5,5,1,1,1,2,54,1,3,6,NA +66179,7,2,2,23,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,1,NA,NA,NA,1,2,2,1,16937.04417,17660.28577,1,92,2,2,0.33,5,5,0,1,0,1,51,2,1,4,NA +66180,7,2,2,15,NA,5,6,2,15,183,NA,NA,1,1,NA,9,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,7284.336217,7807.466703,3,90,2,2,0.45,3,3,0,1,0,1,55,2,1,1,3 +66181,7,2,2,11,NA,3,3,2,11,138,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,58907.362493,59749.51689,1,91,14,14,4.03,4,4,0,2,0,1,52,1,4,1,5 +66182,7,2,1,7,NA,3,3,2,7,87,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25019.74954,25881.463934,1,101,3,3,0.44,5,5,0,3,0,1,35,1,3,1,4 +66183,7,2,2,14,NA,4,4,2,14,173,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14143.201945,13910.07431,2,97,2,2,0.33,4,4,2,1,0,2,34,1,2,5,NA +66184,7,2,1,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25807.688156,25402.018879,1,100,15,15,5,4,4,0,0,0,1,54,1,5,1,5 +66185,7,2,2,2,NA,3,3,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23729.271287,24041.081904,1,94,3,3,0.65,3,3,1,0,0,2,20,1,4,6,NA +66186,7,2,2,45,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,2,1,NA,2,2,1,1,2,1,2,2,2,2,40337.933888,43242.784835,2,98,13,13,NA,5,5,0,2,0,1,48,2,1,1,2 +66187,7,2,1,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,26847.643051,28699.104328,1,98,5,3,1.07,4,1,2,0,0,1,24,1,3,6,NA +66188,7,2,1,7,NA,2,2,1,7,88,NA,NA,2,1,2,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11778.213296,12552.114189,2,93,5,5,0.87,4,4,1,1,0,1,41,2,5,1,3 +66189,7,2,1,47,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,31640.296506,32716.968709,2,94,12,12,NA,4,4,0,2,0,1,47,2,2,1,2 +66190,7,2,2,1,18,5,7,2,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7280.108168,7886.672866,3,91,14,14,3.17,6,6,1,3,0,1,39,1,4,1,5 +66191,7,2,2,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,24382.996387,23715.691726,1,92,3,3,0.98,2,2,0,0,1,1,70,1,2,1,2 +66192,7,2,1,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,32785.873783,35446.092694,1,94,9,9,3.8,2,2,0,0,2,1,80,1,4,1,4 +66193,7,2,2,25,NA,4,4,2,NA,NA,2,NA,2,2,1,NA,4,1,2,1,2,1,1,2,2,NA,NA,NA,NA,21640.010524,20942.104189,1,93,6,6,1.35,3,3,0,1,0,1,32,2,4,1,4 +66194,7,2,2,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,149804.257477,155051.140087,1,97,15,15,5,3,3,0,1,2,2,63,1,5,1,NA +66195,7,2,2,9,NA,4,4,1,9,109,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7619.934086,8032.046596,2,103,6,6,1.3,4,4,1,1,0,2,26,1,4,1,3 +66196,7,2,2,31,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,NA,NA,NA,NA,36053.766709,35796.14047,1,100,5,5,0.85,5,5,0,2,0,2,54,1,2,2,NA +66197,7,2,2,60,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,10536.096735,10975.825598,2,93,5,5,1.32,2,2,0,0,2,1,71,2,4,1,1 +66198,7,2,2,46,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,11762.034222,11824.231183,3,90,77,77,NA,7,7,1,2,0,1,41,2,3,6,NA +66199,7,2,2,58,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,2,1,NA,2,2,2,2,2,2,1,2,2,1,19676.781212,20033.616894,2,103,12,12,NA,3,3,0,0,1,1,60,2,2,1,2 +66200,7,2,1,10,NA,1,1,1,10,129,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12577.115885,12876.06354,2,96,7,7,1.79,4,4,0,2,0,1,43,2,3,1,2 +66201,7,2,1,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,152858.509804,159455.333031,1,101,8,8,2.24,4,4,0,1,0,1,45,1,4,1,NA +66202,7,2,1,61,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,1,1,NA,1,2,1,1,2,1,1,2,1,3,5201.567667,6086.620135,2,92,99,77,NA,7,3,3,3,1,1,61,2,1,1,3 +66203,7,2,1,17,NA,5,7,2,17,214,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13748.325141,13775.506705,1,97,15,15,5,6,6,0,1,1,2,53,1,4,1,NA +66204,7,2,2,33,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,36053.766709,37031.002154,1,100,14,14,3.47,4,4,2,0,0,1,34,1,5,1,5 +66205,7,2,2,28,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,3,2,1,2,2,1,2,2,1,2,2,1,112991.277498,114001.49655,1,93,14,14,5,1,1,0,0,0,2,28,1,5,3,NA +66206,7,2,1,62,NA,4,4,1,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,7973.883342,8036.257566,1,100,9,9,2.88,3,3,0,0,1,1,62,2,5,1,4 +66207,7,2,1,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,7663.797586,8115.960731,1,96,7,7,3.58,1,1,0,0,1,1,80,1,1,2,NA +66208,7,2,1,53,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,35958.875702,38183.099815,1,98,8,8,2.62,4,3,0,0,0,1,53,1,2,1,3 +66209,7,2,1,15,NA,4,4,1,16,192,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13731.625553,14242.275028,1,100,14,14,5,3,3,0,1,1,2,44,1,4,5,NA +66210,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,9113.905743,10210.592308,3,90,3,3,0.78,3,3,0,1,2,1,80,2,3,1,2 +66211,7,2,2,8,NA,5,7,1,9,108,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7877.267255,8558.823016,2,94,15,15,4.44,5,5,0,1,1,2,74,1,5,2,NA +66212,7,2,1,8,NA,1,1,1,8,106,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,2,1,2,2,2,16747.549238,17012.570163,2,96,4,4,0.81,4,4,1,1,0,1,36,2,1,6,NA +66213,7,2,1,18,NA,5,7,1,18,217,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,93641.872053,93270.118243,2,98,10,10,3.04,4,4,0,0,0,1,55,1,4,1,4 +66214,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,1,2,NA,30548.472436,31485.669458,1,101,7,7,1.83,3,3,0,0,2,1,67,1,1,1,2 +66215,7,2,1,47,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,28813.038041,28739.023279,1,94,3,3,1.16,2,1,0,0,0,2,36,1,3,5,NA +66216,7,2,2,8,NA,5,6,1,8,105,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,9620.269705,11100.236566,2,91,6,6,1.26,5,5,0,2,0,2,47,2,1,1,1 +66217,7,2,2,51,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,1,1,2,1,2,2,2,2,28701.155283,32172.514581,2,98,7,7,2.25,3,3,0,1,0,2,51,2,1,1,1 +66218,7,2,2,43,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,3,3,1,2,2,1,2,2,1,2,2,1,34666.955582,39600.756419,1,97,6,6,2.24,2,1,0,0,0,2,43,1,4,3,NA +66219,7,2,2,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,138322.767578,143283.052066,1,101,7,7,2.64,2,2,0,0,0,1,57,1,2,1,2 +66220,7,2,1,19,NA,3,3,2,20,NA,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,56223.281913,55799.760014,1,99,8,8,2.81,3,3,0,1,0,1,19,1,4,NA,NA +66221,7,2,2,59,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,1,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,11446.604914,11655.034159,2,92,3,3,0.7,3,3,0,0,0,1,58,2,1,1,1 +66222,7,2,1,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,16382.74577,17320.281883,2,99,7,7,1.63,4,4,0,2,0,1,53,1,3,3,NA +66223,7,2,2,9,NA,3,3,1,9,118,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,43528.185872,44344.05334,1,92,10,10,2.1,6,6,1,1,0,2,29,1,4,1,2 +66224,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,42992.537371,49508.460769,2,95,2,2,0.54,1,1,0,0,1,2,80,1,3,2,NA +66225,7,2,2,0,4,4,4,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4439.36229,4707.453058,2,100,14,14,3.06,5,5,1,0,0,1,50,1,5,1,5 +66226,7,1,1,2,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11853.772636,0,2,96,3,3,0.54,4,4,1,1,0,1,29,1,2,1,2 +66227,7,2,1,66,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,1,1,NA,1,2,1,1,2,1,1,2,1,NA,6357.471593,6706.752466,2,92,5,5,0.64,7,7,1,2,1,1,66,2,1,1,3 +66228,7,2,1,28,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,115391.113177,120921.742093,1,102,14,8,4.41,2,1,0,0,0,1,27,1,4,5,NA +66229,7,2,1,71,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,51668.474366,53628.722404,1,99,15,15,5,2,2,0,0,2,2,68,1,3,1,5 +66230,7,2,2,64,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,3,4,NA,2,2,2,2,2,2,1,2,1,2,13676.984152,14509.761798,2,91,8,8,1.85,5,5,0,2,1,1,39,2,3,1,4 +66231,7,2,1,0,4,1,1,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5323.166653,5657.098649,3,91,6,6,0.89,7,7,1,1,0,1,59,2,1,1,1 +66232,7,2,2,17,NA,5,7,2,17,209,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11975.458482,12291.118947,1,97,15,15,5,6,6,0,3,0,1,47,1,5,1,5 +66233,7,2,2,25,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,1,1,1,2,2,1,2,2,1,2,2,1,16937.04417,18839.574737,1,92,14,14,3.3,4,4,2,0,0,1,28,1,4,1,4 +66234,7,2,1,26,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,108504.032354,111983.725775,1,92,6,6,1.62,3,3,1,0,0,2,26,1,5,1,5 +66235,7,2,2,33,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,17978.142628,18308.053591,1,91,5,5,0.89,4,4,2,0,0,1,39,1,4,1,5 +66236,7,2,1,3,NA,1,1,1,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20874.345556,20902.551716,3,92,12,12,NA,4,4,2,0,0,1,30,1,3,1,4 +66237,7,2,1,52,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,2,1,NA,2,2,2,1,2,2,2,2,1,2,26554.904329,26165.252043,2,93,8,8,2.49,3,3,0,0,0,1,52,2,2,1,4 +66238,7,2,2,61,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,7869.59899,8538.749518,2,100,6,6,2.39,1,1,0,0,1,2,61,1,2,2,NA +66239,7,2,1,80,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,14765.705646,15876.477147,3,91,14,14,5,1,1,0,0,1,1,80,1,5,2,NA +66240,7,2,1,47,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,18194.001026,18423.321012,1,92,14,14,3.69,4,4,0,2,0,1,47,2,4,4,NA +66241,7,2,2,50,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,4,4,NA,2,2,2,1,2,2,1,2,2,2,18341.621382,19145.40337,2,90,5,5,1.19,3,3,0,0,0,2,50,2,4,4,NA +66242,7,2,2,14,NA,1,1,1,15,180,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16781.078148,17378.8338,2,103,2,2,0.22,7,7,0,3,0,2,39,2,1,5,NA +66243,7,2,2,39,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,26465.930618,27183.288271,2,97,15,15,5,4,4,0,2,0,1,47,NA,NA,6,NA +66244,7,2,2,3,NA,1,1,2,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13366.393396,14756.857003,2,94,4,4,0.81,4,4,2,0,0,1,26,2,2,1,2 +66245,7,2,1,47,NA,5,6,2,NA,NA,2,NA,2,2,7,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,18094.847125,18029.478224,3,91,10,10,4.42,2,2,0,0,0,1,47,2,2,1,NA +66246,7,2,1,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,NA,NA,NA,1,2,2,1,152858.509804,163392.947883,1,95,NA,NA,NA,5,5,0,2,0,2,37,1,3,1,NA +66247,7,2,1,70,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,9850.66662,10431.854777,2,98,2,2,0.75,1,1,0,0,1,1,70,1,2,2,NA +66248,7,1,1,4,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,81508.607355,0,1,91,8,8,2.51,3,3,1,0,0,2,24,1,4,1,3 +66249,7,2,2,27,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,18801.993237,20649.846581,2,96,8,6,2.39,2,1,0,0,0,1,26,2,5,5,NA +66250,7,2,2,21,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,3,1,2,2,1,2,2,1,2,2,1,55628.505329,57174.814545,3,92,4,3,0.52,5,4,0,0,0,2,57,1,4,1,2 +66251,7,2,1,33,NA,1,1,1,NA,NA,2,NA,2,2,77,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,53303.690379,52901.390578,1,100,4,4,0.78,4,4,0,0,1,1,33,2,1,1,1 +66252,7,2,1,0,0,3,3,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11017.136221,11794.318892,1,101,1,1,0,2,2,1,0,0,2,32,1,3,3,NA +66253,7,2,2,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,77778.949308,80709.488833,1,101,9,9,2.6,4,4,0,2,0,2,38,1,4,1,4 +66254,7,2,2,64,NA,2,2,1,NA,NA,2,NA,2,1,2,NA,2,5,NA,2,2,2,2,2,2,2,2,2,2,10235.0654,10662.230581,2,93,7,7,2.31,2,2,0,0,1,2,40,2,4,5,NA +66255,7,2,1,12,NA,1,1,1,13,156,NA,NA,1,1,NA,6,NA,NA,NA,2,1,1,1,2,1,1,2,2,1,32326.52031,34735.818434,2,98,13,13,NA,5,5,0,2,0,1,48,2,1,1,2 +66256,7,2,2,49,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,5,4,NA,2,2,2,1,2,2,2,2,2,2,31235.666551,31909.585756,3,92,8,8,2.01,4,4,1,0,0,2,49,2,5,4,NA +66257,7,2,1,15,NA,4,4,2,15,181,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9509.185342,11220.147089,2,99,2,2,0.2,7,7,1,2,1,1,63,1,1,2,NA +66258,7,2,1,14,NA,5,6,1,14,169,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7721.59541,8116.704638,2,96,8,8,2.7,3,3,0,1,0,1,49,2,5,1,5 +66259,7,2,1,45,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,3,1,NA,1,2,2,1,2,2,1,2,2,3,16898.996353,17358.625105,1,92,8,8,2.3,4,4,0,1,0,2,41,NA,NA,1,3 +66260,7,2,2,4,NA,5,6,2,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4938.043177,5373.703942,1,91,10,10,3.78,3,3,1,0,0,1,35,2,5,1,5 +66261,7,2,1,52,NA,3,3,2,NA,NA,2,NA,2,2,7,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,166897.201244,169896.236244,2,91,15,3,0.9,7,1,0,0,1,1,49,NA,NA,5,NA +66262,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,52209.836905,0,1,98,6,6,1.95,2,2,0,0,2,1,80,1,3,1,3 +66263,7,2,1,65,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,105401.67423,106753.633419,2,92,14,14,5,1,1,0,0,1,1,65,1,5,5,NA +66264,7,2,2,64,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,2,2,2,2,1,2,2,2,16352.915834,17178.789759,3,92,5,5,1.26,3,3,0,0,1,1,59,2,1,1,1 +66265,7,2,1,16,NA,4,4,2,16,194,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11462.850569,11753.30805,2,100,4,4,0.69,5,5,0,3,0,1,38,1,3,6,NA +66266,7,2,2,28,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,25052.373156,28677.21758,1,95,9,9,2.3,5,5,2,1,0,1,28,1,3,1,3 +66267,7,2,1,0,9,3,3,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17196.879565,17385.815001,2,94,14,14,2.63,6,6,1,3,0,1,39,1,4,1,4 +66268,7,2,1,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,166897.201244,167183.296853,2,91,15,15,5,3,3,0,0,0,2,54,1,4,1,4 +66269,7,2,1,19,NA,3,3,2,19,229,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,88582.269653,92728.90632,1,91,15,15,5,3,3,0,0,0,2,57,1,3,1,3 +66270,7,2,1,76,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,12101.489198,12967.543396,1,93,13,13,NA,2,2,0,0,2,1,76,2,5,1,5 +66271,7,2,1,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,14337.499427,15385.847212,1,95,3,3,0.82,2,2,0,0,2,2,80,1,3,1,2 +66272,7,2,2,29,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,4,1,3,1,2,2,1,2,2,1,2,2,1,11408.12687,12544.799584,2,90,5,5,1.08,3,3,0,1,0,2,29,2,4,1,5 +66273,7,2,2,13,NA,2,2,2,13,161,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13824.001771,14551.102227,2,90,8,8,2.7,3,3,0,1,0,2,31,1,5,4,NA +66274,7,2,1,5,NA,1,1,2,6,72,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11776.305841,11467.607256,1,90,4,4,0.46,7,7,2,3,0,2,34,2,1,6,NA +66275,7,2,2,20,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,18626.118419,18025.412332,1,93,7,7,1.79,4,4,0,0,0,2,37,2,4,6,NA +66276,7,2,2,47,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,26595.398371,26621.513872,1,94,3,3,0.79,2,2,0,0,0,1,51,NA,NA,1,4 +66277,7,2,2,19,NA,5,7,1,19,232,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9200.381964,9723.240452,2,101,2,1,0,4,1,0,0,0,2,19,1,4,NA,NA +66278,7,2,2,36,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,1,6,2,2,2,2,1,2,2,2,2,2,2,26494.281052,25779.409691,3,90,7,7,1.48,5,5,0,1,0,1,43,2,1,6,NA +66279,7,2,1,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,8074.676044,8165.637965,2,92,12,12,NA,1,1,0,0,1,1,62,1,4,5,NA +66280,7,2,2,69,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,1,1,NA,1,2,1,1,2,1,1,2,1,NA,19159.496319,19898.984475,1,92,77,77,NA,4,4,0,0,2,1,59,2,5,1,5 +66281,7,2,2,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,175997.804296,174522.064022,1,101,7,7,2.31,2,2,0,0,0,1,44,1,3,1,2 +66282,7,2,2,1,16,5,7,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7143.995395,7265.801592,3,91,15,15,5,4,4,2,0,0,1,35,1,5,1,5 +66283,7,2,2,33,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,32620.612627,33141.797376,2,98,14,14,3.36,4,4,0,2,0,1,37,1,4,1,4 +66284,7,2,1,12,NA,4,4,1,12,148,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13523.666124,13550.403519,2,98,8,8,2.43,3,3,0,2,0,2,31,1,4,1,NA +66285,7,2,1,51,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17493.087649,18732.743426,1,97,15,15,4.81,5,5,0,1,1,1,51,2,5,1,5 +66286,7,2,1,11,NA,4,4,1,11,140,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8714.559478,8865.734494,2,96,3,3,0.47,6,6,0,4,0,1,36,1,4,1,4 +66287,7,2,1,47,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15341.871992,16429.080873,1,90,15,15,5,5,5,0,2,0,1,47,2,5,1,5 +66288,7,2,2,4,NA,5,7,1,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7143.995395,7265.801592,3,91,15,15,5,4,4,2,0,0,1,35,1,5,1,5 +66289,7,2,1,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,116464.874823,118219.690774,2,94,7,7,3.21,1,1,0,0,0,1,41,1,5,5,NA +66290,7,2,2,63,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,162373.29329,165026.206299,2,102,9,9,4.08,2,2,0,0,2,1,70,1,5,1,5 +66291,7,2,2,33,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,26426.249254,26363.744856,1,99,8,8,4.59,1,1,0,0,0,2,33,1,5,5,NA +66292,7,2,2,9,NA,2,2,1,9,110,NA,NA,2,2,2,2,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,9897.093392,10755.962815,2,93,6,6,0.93,5,5,1,2,0,1,40,2,4,1,4 +66293,7,2,1,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,106709.949255,125265.093735,1,97,7,7,3.76,1,1,0,0,0,1,33,1,3,5,NA +66294,7,2,1,76,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,53149.251154,56449.488341,2,91,12,12,NA,2,2,0,0,2,1,76,1,5,1,2 +66295,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,46927.876098,52201.899738,1,91,7,7,4.02,1,1,0,0,1,2,80,1,3,2,NA +66296,7,2,1,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,24213.568265,24151.368594,1,99,3,3,1.1,1,1,0,0,0,1,43,1,5,3,NA +66297,7,2,1,68,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,6358.062034,6381.990744,1,99,7,7,3.03,2,2,0,0,2,1,68,1,3,1,3 +66298,7,2,2,61,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,5,3,NA,2,2,2,2,2,2,1,2,2,1,8725.210615,9089.36131,2,93,8,8,3.3,2,2,0,0,1,2,61,2,5,3,NA +66299,7,2,2,22,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,3,5,2,1,2,1,1,2,1,1,2,2,3,11232.759507,11862.956868,1,103,4,4,0.82,3,3,0,0,0,1,48,2,4,1,1 +66300,7,2,2,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,27738.890335,29287.813784,2,101,3,3,0.59,4,3,0,2,0,1,39,1,1,6,NA +66301,7,2,1,12,NA,4,4,1,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17606.165994,18260.901254,2,101,7,7,2.16,3,3,0,1,0,2,44,1,4,6,NA +66302,7,2,2,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,34922.33705,36456.366148,2,97,5,1,0,2,1,0,0,0,2,45,1,2,4,NA +66303,7,2,2,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,29040.300396,28103.729242,2,101,8,8,2.43,3,3,0,1,0,1,35,1,4,6,NA +66304,7,2,1,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,114838.671743,120356.919439,1,91,15,15,5,2,1,0,0,0,1,30,1,5,6,NA +66305,7,2,2,13,NA,4,4,2,13,158,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11711.384457,12440.215685,2,95,10,10,2.32,6,6,1,2,0,1,44,1,4,1,4 +66306,7,2,2,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,16058.142925,15728.179634,2,95,9,9,1.81,6,6,1,1,0,2,56,1,4,3,NA +66307,7,2,1,44,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,25454.275893,28114.898546,2,102,15,15,5,4,4,0,2,0,1,44,1,3,1,1 +66308,7,2,1,55,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,32461.799549,34082.06298,1,101,3,3,0.86,2,2,0,0,1,2,80,1,1,2,NA +66309,7,2,2,30,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,26546.087356,27263.66872,1,102,4,4,1.02,2,2,0,1,0,2,30,1,2,5,NA +66310,7,2,2,53,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,3,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,12649.084278,13204.362319,3,90,3,3,0.75,3,3,0,0,0,1,55,2,4,1,3 +66311,7,2,1,39,NA,5,7,1,NA,NA,1,1,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,13833.910151,14953.425412,2,103,15,15,5,3,3,0,1,0,2,37,2,5,1,5 +66312,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,28280.669788,29879.976389,1,99,14,14,5,2,2,0,0,2,1,80,1,4,1,4 +66313,7,2,2,1,17,1,1,1,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14326.094268,15816.39252,3,92,4,4,0.65,4,4,2,0,0,2,20,1,3,5,NA +66314,7,1,1,2,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13968.423539,0,2,102,3,3,0.7,3,3,1,0,0,1,28,2,2,1,2 +66315,7,2,1,2,NA,1,1,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13281.490378,14230.158403,1,98,8,8,1.95,4,4,1,1,1,2,59,1,3,1,1 +66316,7,2,2,0,8,3,3,1,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11902.236733,11578.945551,2,97,5,5,1.24,3,3,1,0,0,2,27,1,3,1,3 +66317,7,2,1,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,126435.397679,126594.17356,1,91,10,10,2.77,5,5,0,3,0,1,43,1,5,1,5 +66318,7,2,2,6,NA,4,4,2,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7746.357421,7953.594257,1,99,6,6,1.34,4,4,0,2,0,1,40,1,4,1,4 +66319,7,1,2,76,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,76025.86855,0,2,98,14,14,5,2,2,0,0,2,1,78,1,5,1,3 +66320,7,2,1,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,14519.343202,14452.61667,2,90,6,6,0.96,5,5,0,1,0,1,55,1,4,6,NA +66321,7,2,2,71,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,NA,56693.798097,58632.718368,1,103,12,6,2.51,3,1,0,0,1,2,71,1,5,3,NA +66322,7,1,1,56,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,17796.910402,0,2,103,8,8,4.83,1,1,0,0,0,1,56,1,5,5,NA +66323,7,2,2,49,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,34954.173075,37355.359293,2,98,5,5,1.07,4,4,0,1,0,1,53,2,1,1,1 +66324,7,2,1,1,13,4,4,1,NA,13,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5853.073657,6453.970807,2,95,2,2,0.41,3,3,2,0,0,2,19,1,2,NA,NA +66325,7,2,2,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,18670.751147,19670.682386,1,96,10,10,2.95,4,4,0,1,0,2,34,2,3,1,5 +66326,7,2,1,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,16429.782444,20771.252762,3,90,14,14,5,2,2,0,0,0,1,47,1,2,5,NA +66327,7,2,1,60,NA,2,2,1,NA,NA,2,NA,2,1,5,NA,5,1,NA,2,2,2,2,2,2,2,2,1,2,9404.30514,9554.950099,2,93,15,15,5,2,2,0,0,2,1,60,2,5,1,5 +66328,7,2,2,12,NA,5,6,2,12,148,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11975.458482,12291.118947,1,97,15,15,4.81,5,5,0,1,1,1,51,2,5,1,5 +66329,7,2,1,6,NA,5,7,2,6,80,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7482.593572,7851.213843,1,101,4,4,0.78,4,4,1,2,0,2,31,1,4,3,NA +66330,7,2,2,2,NA,3,3,1,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,55441.113009,56169.627907,1,92,9,9,3.04,3,3,1,0,0,1,48,1,3,1,4 +66331,7,2,2,77,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,36067.495928,36452.676481,1,95,2,2,0.83,1,1,0,0,1,2,77,1,4,2,NA +66332,7,2,1,19,NA,4,4,1,19,238,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14848.504688,14956.646891,1,98,2,1,0.36,2,1,0,0,0,1,19,1,4,NA,NA +66333,7,2,1,6,NA,2,2,1,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10738.959181,10632.988891,2,93,9,9,3.14,3,3,0,2,0,2,34,2,5,3,NA +66334,7,2,1,37,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,1,1,2,2,1,2,2,2,40003.013263,42339.292617,3,91,7,7,1.42,6,6,1,3,0,1,37,2,1,1,1 +66335,7,2,1,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,15262.313583,15991.170182,1,98,5,5,0.74,5,5,0,3,0,1,35,1,2,6,NA +66336,7,2,1,71,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,NA,43078.255639,45597.38383,1,93,12,12,NA,1,1,0,0,1,1,71,1,5,5,NA +66337,7,2,2,9,NA,5,6,1,9,116,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3972.173341,4164.719478,1,103,77,77,NA,6,6,0,2,2,1,70,NA,NA,1,1 +66338,7,2,2,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,71034.153987,76359.172033,1,98,10,10,2.2,6,6,1,3,0,2,31,1,4,6,NA +66339,7,2,1,18,NA,1,1,2,18,220,2,NA,2,2,4,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,26704.187335,28443.712885,1,95,9,9,2.46,4,4,0,0,0,1,42,2,2,1,3 +66340,7,2,1,0,10,1,1,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8143.831412,8871.858164,2,102,10,10,3.04,4,4,2,0,0,2,31,2,2,1,NA +66341,7,2,1,51,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,142634.419514,148973.13954,1,100,15,15,4.63,5,5,0,0,0,1,51,1,5,1,3 +66342,7,2,2,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,2,1,2,2,1,2,2,1,2,2,1,29733.812317,40471.383048,1,101,1,1,0,2,2,1,0,0,2,32,1,3,3,NA +66343,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,114993.808573,116714.079488,1,98,7,1,0.27,4,1,0,0,0,2,20,1,4,5,NA +66344,7,2,2,7,NA,4,4,1,7,85,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8610.003992,8918.857984,2,93,6,6,1.15,5,5,3,1,0,1,29,1,3,5,NA +66345,7,1,2,25,NA,5,6,NA,NA,NA,2,NA,2,2,2,NA,5,1,3,1,2,2,1,2,2,NA,NA,NA,NA,16021.911789,0,1,91,9,9,2.97,3,3,1,0,0,1,31,2,5,1,5 +66346,7,2,2,17,NA,5,7,1,17,207,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8111.892084,8425.568811,3,91,15,15,5,4,4,0,2,0,1,38,2,5,1,5 +66347,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,29265.714405,31405.602677,1,95,12,12,NA,3,3,0,0,2,2,56,1,3,5,NA +66348,7,2,2,13,NA,5,7,2,13,167,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8164.811332,8249.972742,1,93,15,15,5,5,5,1,2,0,2,40,1,5,1,5 +66349,7,2,2,24,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,10550.607376,12189.917123,2,100,6,6,1.62,3,3,1,0,0,1,32,1,3,1,3 +66350,7,2,1,14,NA,1,1,2,14,171,NA,NA,2,2,4,8,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,19483.203262,19562.510174,2,94,7,7,1.23,6,6,2,1,0,1,33,2,1,6,NA +66351,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,53612.939104,58717.503545,1,95,15,15,5,2,2,0,0,2,2,80,1,4,1,4 +66352,7,2,2,18,NA,2,2,1,18,224,2,NA,2,2,3,12,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,12970.724558,13432.752316,2,103,5,5,0.65,6,6,1,0,1,2,61,2,1,2,NA +66353,7,2,1,5,NA,1,1,1,5,65,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,14716.463544,14890.443704,2,96,4,4,0.69,5,5,2,0,0,2,57,2,1,4,NA +66354,7,2,2,6,NA,1,1,2,7,84,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,20753.369981,21498.66298,1,97,6,3,0.45,7,6,2,1,0,1,29,2,2,1,1 +66355,7,2,1,62,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,3,1,NA,1,2,1,1,2,2,1,2,1,NA,7576.466116,7992.718813,1,95,5,5,0.73,6,6,1,0,1,1,62,2,3,1,NA +66356,7,1,1,0,1,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8390.245917,0,1,92,7,7,1.83,3,3,1,1,0,2,28,1,3,5,NA +66357,7,1,1,52,NA,1,1,NA,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,1,1,2,1,NA,NA,NA,NA,26478.915067,0,1,102,99,99,NA,5,5,0,2,1,1,52,2,1,1,1 +66358,7,2,1,64,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,131818.641085,133509.443669,1,102,15,15,5,2,2,0,0,2,2,63,1,4,1,4 +66359,7,2,2,10,NA,3,3,2,10,131,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,72263.168327,73296.260667,2,91,15,15,5,3,3,0,2,0,1,44,2,5,3,NA +66360,7,2,2,6,NA,1,1,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10118.363218,10311.586628,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +66361,7,2,1,4,NA,2,2,1,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16775.083123,17306.08847,2,98,6,6,1.07,5,5,3,0,0,2,24,1,3,1,3 +66362,7,2,2,55,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,36187.441698,36468.041201,1,92,5,5,1.05,3,3,0,0,0,2,55,1,4,4,NA +66363,7,2,2,4,NA,2,2,2,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16921.080098,17459.476859,1,91,7,7,1.66,4,4,2,0,0,1,32,2,5,1,4 +66364,7,1,2,2,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10806.348349,0,2,91,6,6,0.93,5,5,1,2,0,2,50,2,1,5,NA +66365,7,2,1,4,NA,3,3,2,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23328.816247,26320.448782,1,97,6,6,1.03,6,6,2,2,0,2,38,1,5,1,4 +66366,7,2,2,26,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,1,1,2,2,1,2,2,1,2,2,1,26388.213487,25090.218308,2,96,6,2,0.64,2,1,0,0,0,2,26,1,2,5,NA +66367,7,2,2,6,NA,4,4,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9399.281543,9696.12347,2,96,6,6,1.32,5,5,1,3,0,2,30,1,4,3,NA +66368,7,2,1,7,NA,1,1,2,7,91,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,10591.186197,11647.778431,1,90,4,4,0.46,7,7,2,3,0,2,34,2,1,6,NA +66369,7,2,2,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,19943.783243,19397.969296,2,95,9,9,5,1,1,0,0,0,2,48,1,4,3,NA +66370,7,2,1,2,NA,1,1,1,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12413.685227,13300.367814,1,102,5,5,0.98,4,4,1,1,0,2,42,2,2,6,NA +66371,7,2,1,77,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,10244.849903,10769.823368,2,99,6,6,1.98,2,2,0,0,2,1,77,1,3,1,3 +66372,7,2,1,5,NA,4,4,2,5,67,NA,NA,2,2,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7246.488236,7794.862746,1,96,10,10,1.8,7,7,1,1,0,1,57,2,1,1,3 +66373,7,2,2,68,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,38724.771087,39491.917359,1,94,3,3,0.86,2,2,0,0,2,1,68,1,4,1,2 +66374,7,2,2,3,NA,1,1,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9468.006743,10197.085534,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +66375,7,2,1,31,NA,3,3,2,NA,NA,2,NA,2,1,4,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,59510.728426,64240.841943,1,99,77,77,NA,4,4,1,1,0,1,31,2,3,1,3 +66376,7,2,1,10,NA,3,3,2,10,121,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23796.980721,24835.204126,1,95,5,5,1.03,4,4,0,2,0,1,33,1,3,1,3 +66377,7,2,1,43,NA,3,3,2,NA,NA,2,NA,2,1,5,NA,2,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,20650.777724,21846.156397,2,97,1,1,0.21,4,4,2,0,0,2,34,2,1,1,2 +66378,7,2,2,59,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,162969.911481,163428.386694,1,101,6,6,2.42,1,1,0,0,0,2,59,1,3,3,NA +66379,7,2,2,1,22,2,2,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11346.120897,12219.822862,1,98,3,3,0.4,7,7,2,3,0,2,31,2,5,1,2 +66380,7,2,2,0,0,3,3,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,29555.41642,28752.625674,3,92,14,14,5,3,3,1,0,0,2,36,1,5,6,NA +66381,7,2,1,66,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,5854.547456,5900.343568,2,99,3,3,1.24,1,1,0,0,1,1,66,1,4,2,NA +66382,7,2,2,40,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,40337.933888,43255.958723,3,92,10,10,2.82,4,4,0,1,1,1,36,1,3,1,5 +66383,7,2,2,3,NA,5,6,2,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6184.163292,6565.044513,1,91,12,2,0.48,7,2,2,2,0,2,54,NA,NA,1,NA +66384,7,2,2,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,17246.716674,18154.82322,1,99,6,6,1.12,4,4,0,2,0,1,39,1,3,1,3 +66385,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,37453.906829,43130.398676,2,100,3,3,0.65,3,3,0,0,2,2,62,1,3,3,NA +66386,7,2,2,50,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,4,4,NA,2,2,2,2,2,2,NA,NA,NA,NA,17465.336559,18230.717255,3,90,10,10,3.04,4,4,0,0,2,2,80,2,1,3,NA +66387,7,2,2,2,NA,1,1,1,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,11414.885224,12293.882349,1,94,5,5,0.57,7,7,2,1,0,1,58,2,1,1,1 +66388,7,2,2,7,NA,4,4,2,7,94,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7814.742747,8106.808519,2,95,1,1,0.25,3,3,1,1,0,2,26,1,2,5,NA +66389,7,2,1,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,145342.530024,151393.302038,1,99,12,12,NA,2,2,0,0,0,1,55,1,5,1,4 +66390,7,2,2,9,NA,1,1,1,9,115,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,17053.854294,17778.314301,3,92,5,5,0.95,4,4,0,2,0,2,51,2,5,1,1 +66391,7,2,2,28,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,122655.643802,126707.897291,3,91,15,15,5,2,2,0,0,0,2,28,1,5,1,5 +66392,7,2,1,14,NA,5,6,2,14,170,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8305.829479,8595.334823,1,93,15,15,5,3,3,0,2,0,2,48,2,5,3,NA +66393,7,2,2,43,NA,5,7,1,NA,NA,2,NA,2,1,7,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,20577.778251,21052.363481,1,92,14,5,2.06,2,1,0,0,0,2,43,2,4,5,NA +66394,7,2,2,2,NA,1,1,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,7051.674614,7594.685042,2,103,8,8,1.29,7,7,3,1,0,2,53,2,2,4,NA +66395,7,2,2,41,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,2,6,2,1,2,2,1,2,2,1,2,2,1,12969.776823,15634.514863,2,90,4,4,0.81,3,3,0,1,0,2,41,2,2,6,NA +66396,7,2,2,46,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,1,5,NA,1,2,2,1,2,2,1,2,2,1,19299.941018,19401.997997,2,102,5,5,1.08,3,3,0,1,0,2,46,2,1,5,NA +66397,7,2,1,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,14699.320127,14652.436145,2,90,6,6,0.96,5,5,0,1,0,1,55,1,4,6,NA +66398,7,2,1,70,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,1,2,1,1,2,1,2,2,NA,17113.447343,18123.138698,2,101,77,77,NA,2,2,0,0,2,1,70,1,2,5,NA +66399,7,2,1,61,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,6815.198656,6994.794831,3,90,15,15,5,2,2,0,0,2,1,61,1,5,1,5 +66400,7,2,2,11,NA,5,7,2,11,133,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10699.45895,11230.540406,1,97,8,8,2.51,3,3,0,2,0,2,39,2,4,2,NA +66401,7,2,2,6,NA,1,1,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,14414.529053,14783.957167,2,96,1,1,0.06,5,5,2,1,0,1,27,2,3,1,4 +66402,7,2,1,60,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,121588.761604,120347.630506,1,91,15,6,2.3,3,1,0,0,2,2,73,1,4,3,NA +66403,7,2,2,5,NA,1,1,1,5,64,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,17887.570772,19748.358154,1,92,4,4,0.74,4,4,1,1,0,1,51,2,1,1,1 +66404,7,2,2,35,NA,3,3,2,NA,NA,2,NA,2,1,7,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,59095.849383,59277.549533,1,91,14,14,2.44,7,7,2,4,0,1,33,1,5,1,5 +66405,7,2,1,58,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,15601.50288,15551.741356,2,99,99,3,0.9,7,1,1,0,1,1,60,NA,NA,1,NA +66406,7,2,1,10,NA,5,6,2,10,120,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7923.925927,8546.646915,1,91,77,77,NA,4,4,0,2,0,1,50,2,5,1,5 +66407,7,2,2,9,NA,2,2,2,9,111,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,15148.721588,15796.67129,2,91,2,2,0.22,4,4,0,3,0,2,45,2,5,4,NA +66408,7,2,2,0,6,4,4,2,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3756.765605,4035.952682,1,99,7,7,1.06,7,7,3,1,0,1,38,1,4,6,NA +66409,7,2,2,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,31335.13799,31552.004994,1,95,6,6,1.09,5,5,0,3,0,1,31,1,4,1,4 +66410,7,2,1,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,NA,NA,NA,NA,27356.080541,28721.124083,1,101,6,6,1.31,3,3,0,2,0,1,43,1,3,4,NA +66411,7,2,2,11,NA,4,4,2,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7814.742747,8587.431439,2,95,10,10,2.32,6,6,1,2,0,1,44,1,4,1,4 +66412,7,2,2,41,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,4,1,2,2,2,2,2,2,2,NA,NA,NA,NA,32606.880052,32776.922157,2,93,6,6,0.93,5,5,1,2,0,1,40,2,4,1,4 +66413,7,2,2,4,NA,5,7,1,5,60,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11412.589323,11707.167617,3,91,6,6,1.75,3,3,1,1,1,1,63,1,5,4,NA +66414,7,1,2,4,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8937.555666,0,2,99,15,15,5,3,3,1,0,0,2,34,1,5,1,5 +66415,7,2,1,1,20,2,2,1,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,15546.999135,16657.487532,3,92,NA,NA,NA,4,4,1,0,1,1,67,NA,NA,77,NA +66416,7,2,2,63,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,129999.035519,131590.908764,1,98,8,8,3.3,2,2,0,0,1,1,50,NA,NA,1,3 +66417,7,2,2,5,NA,4,4,1,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12819.235788,13388.4749,1,98,5,5,1.26,3,3,1,1,0,2,27,1,5,5,NA +66418,7,2,2,26,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,1,3,1,2,2,1,2,2,1,2,2,1,16844.740449,17564.040518,1,94,10,10,4.76,2,2,0,0,0,1,58,1,5,1,4 +66419,7,2,2,7,NA,1,1,1,7,87,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15962.145468,16664.698857,1,92,6,6,1.12,4,4,0,2,0,1,20,1,2,1,2 +66420,7,2,1,12,NA,5,6,1,12,152,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6747.259224,7022.128717,2,94,15,15,5,5,5,0,2,1,1,47,2,5,1,5 +66421,7,2,1,73,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,12146.092906,13015.339209,1,96,9,9,4.23,2,2,0,0,2,2,71,2,5,1,5 +66422,7,2,2,33,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,29248.061332,30978.18811,1,102,7,7,1.57,4,4,0,2,0,2,33,1,4,1,4 +66423,7,2,1,7,NA,5,6,2,7,91,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6620.63733,7182.792631,2,100,4,4,0.44,7,7,1,2,2,1,71,2,1,1,1 +66424,7,2,2,14,NA,1,1,1,14,172,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18674.868029,19048.995585,2,92,8,8,1.42,7,7,0,4,0,2,37,1,1,6,NA +66425,7,2,1,79,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,9257.537917,9255.717695,1,99,5,5,1.69,2,2,0,0,2,1,79,1,1,1,4 +66426,7,2,2,2,NA,1,1,2,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9955.153132,10990.75621,2,94,14,14,3.58,4,4,1,0,1,1,80,1,3,2,NA +66427,7,2,1,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,2,2,1,8746.76562,8644.133303,2,97,15,15,5,3,3,0,0,3,2,80,1,3,2,NA +66428,7,2,2,1,15,5,7,1,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7483.230909,7475.326886,2,96,15,15,5,3,3,1,0,0,1,39,1,4,1,4 +66429,7,2,2,69,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,4,3,NA,2,2,2,2,2,2,1,2,2,2,9716.805546,10308.451947,2,90,2,2,0.73,1,1,0,0,1,2,69,2,4,3,NA +66430,7,2,1,11,NA,1,1,1,11,142,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,11036.458246,10927.552283,1,102,13,13,NA,6,6,1,2,0,2,36,2,4,6,NA +66431,7,2,2,23,NA,3,3,2,NA,NA,2,NA,2,1,6,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,85402.868381,88914.492385,2,94,15,6,2.94,2,1,0,0,0,1,29,1,4,6,NA +66432,7,2,2,9,NA,3,3,2,9,118,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17668.551726,18764.602322,2,95,6,6,0.9,6,6,1,1,0,1,49,1,1,1,1 +66433,7,2,2,11,NA,3,3,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16950.724686,16668.200588,2,98,5,5,0.8,5,5,1,3,0,2,37,NA,NA,4,NA +66434,7,2,2,17,NA,2,2,2,17,210,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17848.732433,18700.272027,2,91,7,7,1.29,6,6,2,2,0,1,33,2,3,6,NA +66435,7,2,1,3,NA,2,2,2,4,48,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15372.687224,15393.459347,1,90,6,6,1.68,3,3,1,1,0,2,36,2,4,4,NA +66436,7,2,2,74,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,2,1,NA,1,2,1,1,2,2,1,2,1,NA,18266.489157,18835.667339,1,100,5,5,1.18,3,3,0,0,2,2,34,2,5,5,NA +66437,7,2,1,9,NA,5,7,1,9,110,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8439.403196,8726.876152,1,103,3,3,0.37,5,5,1,2,0,2,30,1,4,5,NA +66438,7,2,1,44,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,4,3,NA,2,2,2,2,2,2,2,2,1,2,35393.002863,35146.477046,2,93,3,3,0.58,4,4,0,1,1,1,65,2,1,3,NA +66439,7,2,2,48,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,34954.173075,42301.098402,2,98,7,7,1.53,5,5,0,0,0,2,48,1,3,5,NA +66440,7,2,1,8,NA,2,2,2,8,96,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12477.812875,13297.681754,2,94,8,8,2.01,4,4,1,1,0,1,44,2,4,1,4 +66441,7,2,1,74,NA,5,6,1,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,11550.158096,12419.035396,2,94,8,8,1.8,6,6,0,1,2,1,74,2,5,1,5 +66442,7,2,1,48,NA,3,3,2,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,79677.823556,83116.431703,1,99,15,15,4.47,4,4,0,2,0,2,52,2,5,1,5 +66443,7,2,2,38,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,1,5,2,2,2,1,2,2,1,2,2,2,2,45655.090694,60280.136423,3,92,4,4,1.12,2,2,0,1,0,2,38,2,1,5,NA +66444,7,2,2,43,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,NA,NA,NA,NA,21969.841763,21746.409123,1,96,15,15,5,1,1,0,0,0,2,43,1,5,5,NA +66445,7,2,2,11,NA,1,1,1,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17053.854294,17496.484818,2,98,8,8,2.26,4,4,0,1,0,2,43,1,3,1,2 +66446,7,2,1,4,NA,4,4,2,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7466.078788,7482.321895,1,96,3,2,0.16,7,6,1,4,0,2,32,1,2,5,NA +66447,7,2,1,27,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,16995.648055,16598.645683,2,100,5,5,0.88,5,5,2,1,0,2,30,1,4,6,NA +66448,7,2,1,44,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,11113.602843,11073.454175,3,90,77,77,NA,7,7,1,2,0,1,41,2,3,6,NA +66449,7,2,1,62,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,7289.557268,7804.776102,3,90,77,77,NA,2,1,0,0,1,1,62,2,5,3,NA +66450,7,2,2,7,NA,3,3,2,7,86,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,74327.830279,75742.187091,2,91,6,6,1.34,4,4,1,2,0,2,33,1,4,3,NA +66451,7,2,2,52,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,24902.414229,25152.803056,1,90,9,9,5,1,1,0,0,0,2,52,2,3,5,NA +66452,7,2,1,2,NA,4,4,2,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6992.24593,7205.816818,2,97,3,1,0.44,3,1,2,0,0,2,20,1,2,5,NA +66453,7,2,2,22,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,16021.911789,17903.386129,1,91,12,2,0.48,7,2,2,2,0,2,54,NA,NA,1,NA +66454,7,2,1,13,NA,4,4,1,13,167,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19401.033367,19892.636678,2,102,15,15,5,4,4,0,2,0,1,44,1,3,1,1 +66455,7,2,2,2,NA,1,1,1,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9132.13761,9422.704955,2,103,9,9,2.6,4,4,2,0,0,2,35,1,4,1,3 +66456,7,2,2,70,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,NA,12842.559946,13702.031196,2,101,7,7,2.64,2,2,0,0,1,2,70,1,3,4,NA +66457,7,2,1,70,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,16066.082986,17530.388953,1,90,3,3,0.79,2,2,0,0,1,1,70,2,2,1,NA +66458,7,2,2,70,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,20137.117895,21220.947807,1,97,3,3,1.16,1,1,0,0,1,2,70,1,4,3,NA +66459,7,2,2,15,NA,1,1,1,15,191,NA,NA,2,2,4,9,NA,NA,NA,2,1,1,2,2,1,1,2,2,1,13963.420591,15453.171136,2,103,13,13,NA,3,3,0,1,0,1,47,2,1,1,1 +66460,7,2,1,37,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,83081.99261,85461.503598,2,92,15,15,5,2,2,0,0,0,1,37,1,5,1,5 +66461,7,2,1,0,11,5,7,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6751.596541,7305.608189,2,92,6,6,1.35,3,3,1,0,0,2,32,1,5,1,5 +66462,7,2,1,57,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,28017.200708,28243.226144,2,102,8,8,4.87,1,1,0,0,0,1,57,1,3,3,NA +66463,7,1,2,3,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,10683.855206,0,1,96,NA,NA,NA,4,4,1,1,0,2,37,NA,NA,1,4 +66464,7,2,1,21,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,37911.437415,38922.114085,2,103,4,4,0.79,3,3,0,0,0,2,42,2,2,5,NA +66465,7,2,2,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,NA,NA,NA,NA,74517.751389,74746.868777,2,94,14,14,2.83,6,6,0,4,0,2,38,1,2,1,2 +66466,7,2,1,6,NA,1,1,1,6,75,NA,NA,1,1,NA,0,NA,NA,NA,2,1,1,1,2,2,NA,NA,NA,NA,14150.136224,14374.05449,3,91,7,7,1.42,6,6,1,3,0,1,37,2,1,1,1 +66467,7,2,2,48,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,25511.973394,27084.821435,1,98,4,4,0.75,4,4,0,1,0,2,48,1,2,1,3 +66468,7,2,1,9,NA,5,7,2,9,112,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24259.597356,25770.339373,3,91,1,1,0.19,3,3,0,2,0,2,50,1,4,3,NA +66469,7,2,1,18,NA,3,3,2,18,217,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,26749.020961,27839.957403,1,101,1,1,0.08,3,3,1,0,0,2,19,1,2,NA,NA +66470,7,2,2,31,NA,1,1,2,NA,NA,2,NA,2,2,6,NA,3,3,3,1,2,2,1,2,2,1,2,2,1,35353.005268,35220.675513,2,94,7,7,3.21,1,1,0,0,0,2,31,2,3,3,NA +66471,7,2,1,10,NA,4,4,2,10,124,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7886.48532,7943.675078,1,99,10,10,3.13,4,4,0,2,0,1,35,1,4,1,5 +66472,7,2,1,61,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,113655.661464,115113.492348,1,103,15,15,5,2,2,0,0,1,1,61,1,4,1,5 +66473,7,2,1,56,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,144757.450849,145711.976748,1,101,15,15,5,3,3,0,0,2,1,75,1,2,1,2 +66474,7,2,2,43,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,33331.144292,34526.406365,1,91,1,1,0.2,2,2,0,0,0,1,44,1,2,1,2 +66475,7,2,2,61,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,2,1,NA,1,2,1,1,2,1,1,2,1,3,12403.522912,12838.398002,1,93,12,12,NA,4,4,0,0,2,1,66,2,4,1,2 +66476,7,2,1,60,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,131445.986898,130104.237054,1,101,9,9,5,1,1,0,0,1,1,60,1,4,3,NA +66477,7,1,2,32,NA,2,2,NA,NA,NA,2,NA,2,7,77,NA,2,77,3,2,2,2,2,2,2,NA,NA,NA,NA,27127.983961,0,2,90,77,77,NA,2,2,0,1,0,2,32,2,2,77,NA +66478,7,2,1,14,NA,1,1,1,14,172,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22768.423624,22944.003607,2,98,14,14,2.87,5,5,0,3,0,2,34,1,2,1,2 +66479,7,2,2,2,NA,3,3,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,46813.021927,48287.821192,1,91,8,8,2.17,4,4,2,0,0,2,28,1,4,1,5 +66480,7,2,2,13,NA,1,1,1,13,162,NA,NA,2,2,3,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,21818.047789,22992.945413,2,102,7,7,1.79,4,4,0,2,0,1,40,2,2,6,NA +66481,7,2,1,12,NA,2,2,2,12,152,NA,NA,2,2,2,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,14081.782012,15478.382268,2,90,3,3,0.46,5,5,0,2,2,1,75,2,1,1,2 +66482,7,2,2,9,NA,1,1,1,9,109,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,16986.005478,17733.622754,2,102,6,6,1,6,6,1,3,0,1,35,2,3,1,3 +66483,7,1,2,13,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,55,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12531.903464,0,2,100,2,2,0.25,4,4,2,1,0,2,39,1,2,5,NA +66484,7,2,1,6,NA,3,3,2,6,80,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23796.980721,25112.597396,1,95,5,5,1.19,3,3,1,1,0,1,47,1,2,3,NA +66485,7,2,2,33,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,27338.92927,27355.916385,1,96,15,15,5,3,3,0,0,1,2,62,1,4,3,NA +66486,7,2,1,1,23,2,2,2,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13002.944731,13931.716845,2,91,4,4,0.76,4,4,1,0,0,2,25,2,4,77,NA +66487,7,2,1,31,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,12556.207754,12615.022145,2,99,NA,77,NA,7,7,1,0,1,2,51,1,2,1,3 +66488,7,2,2,10,NA,5,7,2,10,124,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9620.269705,10273.007637,2,91,12,12,NA,4,4,0,2,0,1,40,1,5,1,5 +66489,7,2,1,4,NA,3,3,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,81508.607355,89259.872051,1,91,8,8,2.17,4,4,2,0,0,2,28,1,4,1,5 +66490,7,1,2,57,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,42475.69088,0,1,97,9,5,1.97,2,1,0,0,1,1,61,1,5,3,NA +66491,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,NA,NA,NA,NA,22138.245498,30132.880506,2,93,6,6,2.05,2,2,0,1,0,2,30,1,4,3,NA +66492,7,2,1,13,NA,2,2,2,13,157,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,1,2,NA,19186.411189,19608.692738,1,90,6,6,0.81,6,6,0,3,0,2,45,1,4,1,2 +66493,7,2,2,19,NA,4,4,2,19,238,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16363.914542,16162.453075,1,101,13,13,NA,3,3,1,0,0,2,19,1,2,NA,NA +66494,7,2,1,47,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,32719.762791,34375.255008,1,101,6,6,1.65,2,2,0,0,0,1,47,1,4,1,2 +66495,7,2,2,80,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,1,2,NA,2,2,2,2,2,2,2,2,2,NA,18241.877822,20223.807162,2,93,8,8,2.57,3,3,0,0,1,1,59,2,3,1,3 +66496,7,2,2,2,NA,5,7,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7734.994032,7934.647352,2,95,2,2,0.56,2,2,1,0,0,2,22,1,3,5,NA +66497,7,2,2,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,17794.144581,16918.878285,2,96,4,4,0.4,7,7,3,2,0,2,25,1,2,5,NA +66498,7,2,2,14,NA,3,3,1,14,176,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,113907.203714,124196.980527,1,94,8,8,1.67,5,5,1,2,0,1,52,1,4,1,4 +66499,7,2,1,21,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,114943.792804,135002.90259,2,101,1,1,0.37,1,1,0,0,0,1,21,1,4,5,NA +66500,7,2,1,15,NA,5,6,2,15,189,NA,NA,2,2,2,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7291.654281,7756.692286,3,91,4,4,0.69,5,5,0,2,0,1,45,2,4,1,1 +66501,7,2,1,0,6,1,1,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,7237.392208,7237.525607,1,102,6,6,1.43,4,4,2,0,0,1,39,2,3,1,3 +66502,7,2,1,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,25212.845431,2,101,2,2,0.46,1,1,0,0,0,1,20,1,4,5,NA +66503,7,2,1,50,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,16117.991297,17802.733496,1,96,15,15,5,4,4,1,1,0,1,50,1,3,1,4 +66504,7,2,1,70,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,53028.045147,56320.756196,2,94,8,8,3.23,2,2,0,0,1,1,70,1,4,3,NA +66505,7,2,1,34,NA,1,1,1,NA,NA,2,NA,2,1,5,NA,3,1,NA,2,2,2,1,2,2,1,2,2,2,41155.167164,40844.556107,1,102,5,5,0.92,5,5,0,3,0,2,39,2,3,1,3 +66506,7,2,2,25,NA,3,3,2,NA,NA,2,NA,2,2,2,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,39833.117645,41470.989238,2,94,4,4,1.16,2,2,0,0,0,1,39,1,2,1,5 +66507,7,2,1,74,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,53028.045147,62471.592039,2,94,8,8,4.59,1,1,0,0,1,1,74,1,3,3,NA +66508,7,2,2,13,NA,4,4,2,13,159,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16363.914542,16149.629998,1,95,12,12,NA,2,2,0,1,0,2,41,1,4,5,NA +66509,7,2,2,70,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,51433.805604,53192.834838,1,94,9,9,5,1,1,0,0,1,2,70,1,4,2,NA +66510,7,2,1,11,NA,4,4,1,11,133,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10199.928366,10684.945227,1,100,9,9,2.22,5,5,1,2,0,2,40,2,4,1,4 +66511,7,2,2,52,NA,2,2,2,NA,NA,2,NA,2,1,4,NA,4,5,NA,2,2,2,1,2,2,2,2,2,2,27285.659216,28272.553853,1,93,4,4,0.92,3,3,0,0,1,1,60,NA,NA,1,4 +66512,7,2,2,8,NA,1,1,1,8,103,NA,NA,2,2,2,2,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,21075.336925,21477.798111,1,92,5,5,0.87,4,4,0,2,0,1,42,2,1,1,4 +66513,7,2,1,69,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,2,NA,1,2,1,1,2,1,1,2,1,3,7289.557268,7804.776102,3,90,77,77,NA,4,4,0,0,2,1,69,2,5,2,NA +66514,7,2,2,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,38589.695298,44311.547931,2,96,8,8,4.59,1,1,0,0,0,2,36,1,5,5,NA +66515,7,2,2,70,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,5,3,NA,1,2,2,1,2,2,1,2,2,NA,14786.399759,15527.475853,1,90,15,15,5,1,1,0,0,1,2,70,2,5,3,NA +66516,7,2,1,6,NA,1,1,1,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13927.458372,14007.413517,2,98,14,14,3.91,4,4,1,1,0,1,36,2,3,1,5 +66517,7,2,1,2,NA,4,4,1,2,27,NA,NA,2,1,1,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7366.629832,7382.656579,1,98,10,10,1.89,7,7,3,2,0,1,50,1,5,1,5 +66518,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,10634.832821,11430.282078,2,95,6,6,1.65,2,2,0,0,1,2,80,1,1,2,NA +66519,7,2,2,9,NA,4,4,1,9,109,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11116.391625,11717.604707,1,92,5,5,0.95,4,4,0,2,0,2,33,1,4,5,NA +66520,7,2,2,54,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,21143.964074,22787.585902,1,100,8,8,2.36,3,3,1,0,0,2,37,1,3,4,NA +66521,7,2,2,8,NA,3,3,2,8,103,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15442.305642,15459.941839,2,94,7,7,1.62,5,5,0,3,0,1,30,1,2,1,9 +66522,7,2,2,46,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,154664.071516,157709.344695,3,92,15,15,5,3,3,0,1,0,1,45,1,5,1,5 +66523,7,2,2,66,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,10192.188896,10440.656902,1,99,9,9,5,1,1,0,0,1,2,66,1,5,2,NA +66524,7,2,1,51,NA,3,3,2,NA,NA,2,NA,2,2,5,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,22746.832388,23155.578286,1,90,7,7,1.55,5,5,0,3,0,1,51,2,3,1,2 +66525,7,2,1,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,151649.038926,156529.976221,1,101,6,6,1.98,2,2,0,0,1,1,80,1,1,2,NA +66526,7,2,1,0,7,3,3,2,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11414.210614,12219.404134,1,93,6,6,1.16,4,4,2,0,0,2,33,1,5,1,4 +66527,7,2,2,74,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,NA,14534.533756,15040.28544,1,96,14,14,5,2,2,0,0,1,2,74,1,5,5,NA +66528,7,2,2,13,NA,4,4,2,13,163,NA,NA,2,2,2,6,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,14105.705847,14983.542191,1,93,5,5,0.64,7,7,0,2,1,1,21,2,4,5,NA +66529,7,2,1,0,0,4,4,2,NA,0,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5360.999096,5443.282885,2,90,6,6,1.4,3,3,1,1,0,2,33,1,4,5,NA +66530,7,2,2,3,NA,5,6,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7143.995395,7774.277146,3,91,15,15,5,4,4,2,0,0,2,33,2,5,1,5 +66531,7,2,2,57,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,24217.957803,24014.890408,2,94,15,15,5,2,2,0,0,1,1,63,1,5,1,4 +66532,7,2,1,48,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17501.079959,17489.07354,1,90,9,9,2.6,4,4,0,1,0,2,49,2,2,1,5 +66533,7,2,1,79,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,17113.447343,17448.661073,2,101,5,5,1.36,2,2,0,0,1,1,79,1,4,2,NA +66534,7,2,1,3,NA,4,4,2,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9304.437652,9588.632035,2,100,1,1,0.06,3,3,1,1,0,2,30,1,4,5,NA +66535,7,2,1,23,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,11743.336654,12063.054146,1,90,8,8,1.43,7,7,2,0,0,1,23,2,4,1,3 +66536,7,2,2,52,NA,2,2,2,NA,NA,2,NA,2,1,5,NA,5,1,NA,2,2,2,1,2,2,1,2,2,1,19132.515566,20057.4089,3,90,77,77,NA,2,2,0,0,1,1,60,2,5,1,5 +66537,7,2,1,15,NA,3,3,1,15,182,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,39918.077466,39803.689785,1,98,9,9,4.03,2,2,0,1,0,2,49,1,5,3,NA +66538,7,2,1,69,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,92913.469106,93784.871586,2,100,8,8,3.4,2,2,0,0,2,1,69,1,4,1,4 +66539,7,2,2,9,NA,3,3,2,9,114,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,43531.157975,45562.561553,1,91,7,7,2.05,3,3,0,1,0,2,32,1,3,1,NA +66540,7,2,2,0,4,4,4,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5264.07765,5655.281842,2,98,5,5,0.59,7,7,3,0,0,2,50,1,5,4,NA +66541,7,2,2,70,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,15783.902141,16840.218806,2,95,5,5,1.45,2,2,0,0,2,1,72,1,3,1,3 +66542,7,2,1,63,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,8883.099096,8952.58549,2,96,8,8,2.7,3,3,0,0,2,2,52,1,3,1,4 +66543,7,2,2,57,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18724.594661,18534.166088,2,99,15,15,5,2,2,0,0,0,1,57,1,4,1,5 +66544,7,2,2,37,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,4,2,1,2,2,1,2,2,1,2,2,1,36053.766709,39664.262913,1,100,8,8,2.36,3,3,1,0,0,2,37,1,3,4,NA +66545,7,2,2,6,NA,4,4,2,6,80,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7814.742747,8106.808519,2,95,1,1,0.09,5,5,3,1,0,2,31,1,2,1,NA +66546,7,2,2,2,NA,2,2,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8867.166874,8986.419761,2,90,6,6,1.12,4,4,1,1,0,2,35,2,4,1,3 +66547,7,2,1,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,140586.349952,144513.945685,2,91,15,15,5,2,2,0,0,2,1,65,1,5,1,5 +66548,7,1,2,4,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,68579.013834,0,1,92,4,4,1.29,2,2,1,0,0,2,24,1,4,3,NA +66549,7,2,2,9,NA,1,1,1,9,117,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14414.529053,14932.182215,2,96,6,6,1.12,4,4,0,3,0,1,26,1,2,77,NA +66550,7,2,2,42,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,30932.175051,30363.701754,2,101,6,6,1.9,2,2,0,1,0,2,42,1,5,5,NA +66551,7,2,1,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,33109.285968,36756.097052,2,98,3,3,0.75,2,2,0,0,0,1,22,1,2,5,NA +66552,7,2,1,45,NA,2,2,1,NA,NA,2,NA,2,1,5,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,34825.476578,34314.466393,2,93,6,6,2.69,1,1,0,0,0,1,45,2,4,1,NA +66553,7,2,1,61,NA,1,1,1,NA,NA,2,NA,2,7,77,NA,2,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,6967.511455,7138.703058,1,102,13,13,NA,7,7,3,1,2,2,62,2,1,1,2 +66554,7,2,1,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,167711.394252,169284.299948,1,101,3,3,0.73,2,2,0,0,1,1,60,1,3,1,5 +66555,7,2,2,1,16,4,4,2,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6398.740074,6885.115697,1,93,6,6,0.83,6,6,3,1,0,1,37,NA,NA,1,3 +66556,7,2,1,60,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,121588.761604,120347.630506,1,91,10,10,3.51,3,3,0,0,1,1,21,1,4,5,NA +66557,7,2,1,13,NA,1,1,2,13,159,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21399.234084,21870.218555,1,91,6,6,1.35,3,3,0,2,0,2,38,1,4,3,NA +66558,7,2,2,54,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,159097.269198,162641.73095,1,97,15,15,5,3,3,0,0,1,1,64,1,3,1,3 +66559,7,2,1,47,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,38543.316567,38089.298859,1,90,15,15,5,6,6,0,4,0,2,48,1,5,1,5 +66560,7,2,2,5,NA,2,2,2,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13366.393396,14756.857003,2,94,8,8,2.01,4,4,1,1,0,1,44,2,4,1,4 +66561,7,2,2,42,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,2,3,2,2,2,2,1,2,2,2,2,2,2,32208.300114,36103.843028,1,102,3,3,0.54,3,3,0,2,0,2,42,2,2,3,NA +66562,7,2,2,26,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,97925.559493,98801.080755,1,94,10,10,4.3,2,2,0,0,0,1,27,1,5,1,5 +66563,7,2,2,73,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,15226.654411,15756.489509,1,96,7,7,2.64,2,2,0,0,2,1,68,NA,NA,1,4 +66564,7,2,2,47,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,105873.555835,105977.518969,1,98,9,9,2.88,6,3,1,3,0,1,51,1,2,1,3 +66565,7,2,2,66,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,37934.469637,38685.959862,1,94,3,3,0.8,2,2,0,0,1,2,66,1,4,3,NA +66566,7,2,1,17,NA,5,6,2,17,204,2,NA,2,1,4,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9099.599144,9679.941995,1,90,15,15,5,3,3,0,1,0,1,39,2,5,1,NA +66567,7,2,2,2,NA,1,1,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,7677.363986,8161.370187,2,90,6,6,0.96,5,5,1,1,0,1,39,2,2,1,NA +66568,7,2,2,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,126314.769628,126413.297707,2,95,15,15,5,2,2,0,1,0,2,52,1,5,3,NA +66569,7,2,2,62,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,9716.805546,12994.252166,2,90,2,2,0.64,1,1,0,0,1,2,62,2,2,3,NA +66570,7,2,2,28,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,15665.259727,17204.835995,2,93,77,77,NA,2,2,0,0,0,2,28,2,5,1,5 +66571,7,2,1,13,NA,3,3,1,14,168,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,61475.445782,62636.42019,2,92,15,15,5,5,5,0,3,0,2,46,1,5,1,5 +66572,7,1,2,2,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8474.492088,0,1,102,9,9,3.24,3,3,1,0,0,1,40,1,2,1,4 +66573,7,2,1,63,NA,1,1,2,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,9145.939054,9651.644596,1,101,5,5,0.87,4,4,0,0,2,1,63,2,1,1,NA +66574,7,2,1,6,NA,2,2,1,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14820.807433,14905.891142,2,102,15,15,4.47,4,4,1,1,0,1,32,1,5,1,4 +66575,7,2,1,38,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,107347.639721,114331.46987,3,92,15,15,5,4,4,2,0,0,1,38,1,5,1,5 +66576,7,2,1,15,NA,4,4,2,15,180,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11252.388648,11670.840662,1,99,6,6,1.35,3,3,1,1,0,2,42,1,4,4,NA +66577,7,2,2,11,NA,4,4,1,11,137,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7868.372593,8159.849106,2,96,12,10,2.17,7,6,2,3,0,1,29,1,4,3,NA +66578,7,2,2,18,NA,2,2,1,18,223,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16781.078148,17658.768186,2,103,12,12,NA,4,4,0,1,0,2,50,2,3,1,4 +66579,7,2,1,13,NA,5,6,2,13,162,NA,NA,2,1,3,6,NA,NA,NA,1,1,1,1,2,2,1,2,2,1,6666.045669,7317.485505,3,90,99,99,NA,4,4,0,1,0,1,40,2,3,6,NA +66580,7,2,1,1,19,4,4,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7084.261593,7811.556831,1,93,3,3,0.7,3,3,1,0,0,1,23,2,4,1,2 +66581,7,2,2,17,NA,5,6,1,17,209,2,NA,2,1,5,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,7534.59005,7613.178065,1,102,15,15,5,3,3,0,1,0,2,49,1,5,1,5 +66582,7,2,1,55,NA,5,6,2,NA,NA,2,NA,2,1,9,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,15215.995952,15234.024078,1,90,10,10,5,1,1,0,0,0,1,55,2,5,2,NA +66583,7,2,1,64,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,9611.684527,9576.156071,1,98,12,6,1.98,3,2,0,0,1,1,64,1,4,1,3 +66584,7,2,1,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,NA,NA,NA,1,2,2,1,122586.881737,126968.15893,1,90,7,7,1.89,3,3,0,0,1,2,75,1,4,3,NA +66585,7,2,1,40,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15271.238172,15463.719206,3,91,15,15,5,3,3,1,0,0,1,40,2,5,1,5 +66586,7,2,1,29,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,17265.374984,16862.072044,2,93,8,8,1.67,5,5,1,1,0,2,31,1,4,5,NA +66587,7,2,1,47,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,4,1,NA,2,2,2,1,2,2,2,2,2,2,36610.126289,37722.018472,1,96,8,8,2.17,4,4,0,0,2,1,80,NA,NA,1,NA +66588,7,2,2,4,NA,1,1,1,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,9468.006743,10197.085534,2,103,8,8,1.29,7,7,3,1,0,2,53,2,2,4,NA +66589,7,2,1,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,136372.590634,138491.991585,1,98,8,8,2.97,2,2,0,0,0,1,23,1,3,1,5 +66590,7,2,2,7,NA,5,6,2,7,88,NA,NA,1,1,NA,0,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,6347.955472,6765.486426,1,93,8,8,1.2,7,7,1,1,1,1,24,2,2,5,NA +66591,7,2,1,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,80355.847074,82974.223453,1,98,15,15,3.7,5,5,2,1,0,1,34,1,5,1,5 +66592,7,2,2,9,NA,4,4,2,9,111,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7588.605543,8103.359102,1,99,14,14,4.21,4,4,0,2,0,2,44,1,5,1,5 +66593,7,2,1,13,NA,1,1,1,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20638.769105,20614.097145,2,92,15,15,3.37,7,7,0,4,0,1,42,2,3,1,1 +66594,7,2,1,61,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,2,2,2,2,2,2,1,2,2,1,9430.93681,9879.499027,2,98,3,3,1.19,1,1,0,0,1,1,61,1,3,3,NA +66595,7,2,1,16,NA,5,7,2,16,198,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11834.781205,12120.23702,2,90,5,5,1.43,2,2,0,1,0,2,38,2,4,5,NA +66596,7,2,2,7,NA,5,6,1,7,91,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7231.974056,7722.665474,3,91,15,15,5,4,4,1,1,0,1,39,2,5,1,5 +66597,7,2,2,54,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,30236.240945,31437.061964,2,96,7,7,4.04,1,1,0,0,0,2,54,1,2,4,NA +66598,7,2,1,37,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,69324.74001,85851.040558,2,95,15,15,4.63,5,5,1,2,0,2,36,1,5,1,3 +66599,7,2,2,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,66503.043118,73915.916489,2,98,7,7,1.61,4,4,1,1,0,1,43,NA,NA,6,NA +66600,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,10325.56467,10786.590089,1,96,14,14,5,3,2,0,0,1,2,64,1,4,3,NA +66601,7,2,2,1,20,5,6,2,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3600.631978,3772.844478,3,90,10,10,2.41,5,5,1,2,0,1,44,2,4,1,5 +66602,7,2,2,40,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,105873.555835,110835.841786,1,98,15,15,4.34,4,4,1,1,0,1,41,1,5,1,5 +66603,7,2,2,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,2,1,2,2,1,2,2,1,2,2,1,24919.497762,30093.052062,1,98,3,3,0.5,5,5,0,3,0,2,56,1,3,3,NA +66604,7,2,2,1,19,5,7,1,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8892.555701,9937.38353,1,98,7,7,1.52,4,4,2,0,0,1,30,1,3,1,4 +66605,7,2,2,11,NA,4,4,2,11,142,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9122.654131,9410.759793,1,96,15,15,5,2,2,0,1,0,2,47,1,5,3,NA +66606,7,2,2,0,5,3,3,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20261.925369,19711.566478,2,103,15,15,5,3,3,1,0,0,1,31,1,5,1,5 +66607,7,1,1,67,NA,2,2,NA,NA,NA,2,NA,2,2,8,NA,3,1,NA,1,2,2,2,2,2,NA,NA,NA,NA,7379.175826,0,1,96,14,14,5,2,2,0,0,1,1,67,2,3,1,NA +66608,7,2,2,18,NA,5,6,2,18,219,2,NA,2,2,3,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8501.305782,9245.625446,1,93,7,7,1.64,5,5,0,2,0,1,47,2,5,1,1 +66609,7,2,1,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,26792.151435,27749.707744,1,97,3,3,0.73,3,3,0,0,0,2,50,1,4,1,3 +66610,7,2,2,3,NA,5,6,1,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7483.230909,7475.326886,2,96,14,14,4.32,3,3,1,0,0,2,33,2,5,1,5 +66611,7,2,2,15,NA,2,2,1,15,189,NA,NA,2,2,3,10,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,18556.092615,19088.608674,1,103,6,6,0.93,5,5,0,1,0,1,39,2,3,1,3 +66612,7,2,1,64,NA,2,2,2,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,10739.724778,10911.761454,2,99,15,15,5,2,2,0,0,2,1,64,2,5,1,4 +66613,7,2,1,9,NA,5,6,2,9,112,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9928.619925,10708.884665,1,91,12,12,NA,4,4,0,2,0,1,43,2,5,1,5 +66614,7,2,2,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,126334.218747,128844.689973,1,95,9,9,4.08,2,2,0,0,0,1,51,NA,NA,1,3 +66615,7,2,2,16,NA,4,4,2,16,193,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14085.742306,14155.614592,1,96,8,8,2.17,4,4,1,1,0,2,41,1,3,1,3 +66616,7,2,1,5,NA,1,1,1,5,64,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,10274.893484,10739.35317,2,103,8,8,1.29,7,7,3,1,0,2,53,2,2,4,NA +66617,7,2,2,11,NA,4,4,2,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12754.235811,13157.031697,1,97,7,7,1.74,4,4,0,3,0,2,32,1,4,5,NA +66618,7,2,2,11,NA,5,6,2,12,145,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10699.45895,11230.540406,1,97,14,14,2.87,5,5,0,3,0,2,40,2,5,1,5 +66619,7,2,2,72,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,72551.269339,74475.92188,1,95,6,6,2.04,2,2,0,0,2,2,72,1,3,1,NA +66620,7,2,1,64,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,157269.814945,159287.072954,1,92,15,15,5,3,3,0,0,2,2,61,1,5,1,5 +66621,7,2,1,0,2,3,3,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9570.577309,9932.368407,1,95,1,1,0.1,5,5,2,1,0,1,35,1,9,1,3 +66622,7,2,1,9,NA,5,7,1,9,114,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,41342.668304,42524.849071,1,102,8,8,1.91,5,5,1,2,0,2,38,1,5,1,4 +66623,7,2,1,62,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,7289.557268,7804.776102,3,90,12,12,NA,4,4,0,0,1,1,62,2,4,3,NA +66624,7,2,2,6,NA,4,4,1,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8504.389189,8682.552645,2,93,6,6,1.72,2,2,0,1,0,2,29,1,4,3,NA +66625,7,2,1,4,NA,5,6,1,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7812.10492,8436.863602,1,95,4,4,0.62,5,5,2,0,2,2,29,2,3,5,NA +66626,7,2,1,6,NA,5,6,1,7,84,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8095.170809,8542.249827,1,100,15,15,5,3,3,0,1,0,1,38,1,5,1,5 +66627,7,2,1,17,NA,4,4,2,17,206,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11023.237662,11433.168046,1,99,6,6,0.96,5,5,1,2,0,2,35,1,4,1,2 +66628,7,2,2,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,133853.800452,133985.238945,3,91,8,8,3.4,2,2,0,0,0,1,33,2,5,1,4 +66629,7,1,2,0,11,2,2,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,5263.118333,0,3,90,4,4,0.74,4,4,1,1,0,1,32,2,4,1,2 +66630,7,2,1,58,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,14897.510053,15360.906039,1,101,14,14,5,2,2,0,0,0,1,58,2,3,1,1 +66631,7,2,2,69,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,10192.188896,10604.379638,1,99,3,3,1.01,1,1,0,0,1,2,69,1,2,3,NA +66632,7,2,1,58,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,174520.785302,181786.280703,1,95,8,8,3.47,2,2,0,0,0,1,58,1,2,1,4 +66633,7,2,1,41,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19564.598892,20184.074944,3,92,15,15,5,3,3,1,0,0,1,41,2,5,1,3 +66634,7,2,1,9,NA,5,6,2,9,110,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10810.913614,11522.32071,1,97,15,15,4.07,5,5,0,3,0,1,42,2,5,1,5 +66635,7,2,1,78,NA,5,6,2,NA,NA,2,NA,2,2,8,NA,5,1,NA,1,2,1,1,2,1,1,2,2,NA,8637.841003,9256.016034,1,96,9,9,3.97,2,2,0,0,1,1,78,2,5,1,5 +66636,7,2,2,19,NA,4,4,1,19,232,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16537.460749,16413.673958,2,100,8,8,2.67,3,3,0,0,1,1,61,1,3,1,4 +66637,7,2,2,1,16,4,4,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7382.686927,7498.263904,2,100,3,3,0.39,7,7,3,3,0,2,30,1,2,5,NA +66638,7,2,2,9,NA,1,1,2,9,117,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,15225.935813,15598.502882,2,94,77,77,NA,5,5,1,1,0,1,41,2,2,1,2 +66639,7,2,1,10,NA,1,1,1,10,130,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,8828.580268,9491.612368,2,103,77,77,NA,7,7,0,4,0,1,38,2,1,6,NA +66640,7,2,2,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,85420.170155,90189.982607,1,91,7,7,2.05,3,3,0,1,0,2,32,1,3,1,NA +66641,7,2,2,46,NA,3,3,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,21224.321717,21245.16306,1,94,7,7,0.94,7,7,1,4,0,2,46,2,5,1,5 +66642,7,2,1,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,126717.185106,132638.866244,2,91,14,14,4.19,3,3,0,1,0,2,31,1,4,1,3 +66643,7,2,1,39,NA,5,6,2,NA,NA,2,NA,2,2,6,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,15875.759889,17102.194972,1,96,6,6,1.34,3,3,1,0,0,2,42,2,4,6,NA +66644,7,2,2,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,32647.160346,32536.716665,1,99,4,4,1.16,2,2,0,0,2,2,63,1,5,6,NA +66645,7,2,1,13,NA,5,6,2,13,157,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6666.045669,7317.485505,3,90,13,13,NA,3,3,0,2,0,2,41,2,3,4,NA +66646,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,81416.374938,85457.008484,1,97,15,15,5,1,1,0,0,1,2,70,1,4,2,NA +66647,7,2,1,19,NA,3,3,1,19,229,2,NA,2,1,99,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,110478.18082,109645.964567,2,101,3,3,1.15,2,1,0,0,0,1,20,2,4,5,NA +66648,7,2,2,7,NA,4,4,1,7,85,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10654.944111,10939.993817,1,100,4,4,0.99,2,2,0,1,0,1,36,2,4,4,NA +66649,7,2,2,53,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,15521.115746,15096.339697,2,100,7,7,1.34,5,5,0,2,0,2,53,1,4,4,NA +66650,7,2,1,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,16088.355002,16260.034828,1,96,1,1,0.2,2,2,0,0,0,1,25,1,2,5,NA +66651,7,2,1,10,NA,2,2,1,10,120,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19735.224235,20204.314213,2,102,8,8,1.91,5,5,0,3,0,1,39,1,3,1,3 +66652,7,2,1,17,NA,4,4,2,17,215,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9313.795042,9381.627752,1,96,77,77,NA,7,7,1,3,0,1,56,1,3,1,4 +66653,7,2,1,42,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,19436.026093,20405.866121,2,97,5,5,1.08,3,3,0,1,0,2,45,1,4,6,NA +66654,7,2,1,47,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,28542.421068,28714.346279,2,101,5,5,1.5,2,2,0,0,0,1,47,1,4,3,NA +66655,7,2,2,39,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,3,6,2,1,2,2,1,2,2,NA,NA,NA,NA,11608.998717,11632.703325,3,90,77,77,NA,7,7,1,2,0,1,41,2,3,6,NA +66656,7,1,2,41,NA,2,2,NA,NA,NA,2,NA,2,1,4,NA,2,3,3,2,2,2,1,2,2,NA,NA,NA,NA,34999.007145,0,1,90,1,1,0.32,2,2,0,1,0,2,41,2,2,3,NA +66657,7,2,2,16,NA,3,3,2,16,199,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,58352.457563,59447.870488,1,99,8,8,2.81,3,3,0,1,0,1,19,1,4,NA,NA +66658,7,2,2,14,NA,5,6,2,14,171,NA,NA,2,1,4,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10963.340882,11440.408955,2,91,15,15,5,4,4,0,2,1,2,56,1,5,1,5 +66659,7,2,1,7,NA,1,1,2,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18107.947773,18211.90239,1,97,10,10,2.32,6,6,0,4,0,1,42,1,4,1,4 +66660,7,2,1,37,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,18289.793332,19167.642325,1,99,2,2,0.31,4,4,1,0,1,2,67,1,3,3,NA +66661,7,2,1,68,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,20250.944593,20440.871021,2,95,6,6,1.36,3,3,0,0,2,2,60,1,5,1,4 +66662,7,2,1,36,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,18955.147124,22251.142446,1,90,2,2,0.63,2,2,0,0,1,1,36,1,4,5,NA +66663,7,2,1,62,NA,5,6,2,NA,NA,2,NA,2,1,9,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,9355.567184,9645.104203,2,97,15,15,5,2,2,0,0,1,2,58,2,5,1,5 +66664,7,2,2,0,8,4,4,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4439.36229,4769.277094,2,100,3,3,0.27,7,7,2,1,0,2,41,1,2,5,NA +66665,7,2,2,27,NA,3,3,1,NA,NA,2,NA,2,2,2,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,116919.871791,121727.422593,2,92,10,8,4.59,2,1,0,0,0,2,27,2,5,1,NA +66666,7,2,1,1,17,3,3,2,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,28617.223132,30780.732491,1,101,3,3,0.63,3,3,1,0,0,2,47,1,1,3,NA +66667,7,2,2,11,NA,3,3,2,12,144,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,80369.555824,80128.103294,1,97,15,15,5,4,4,1,1,0,2,33,1,5,1,3 +66668,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,99831.393624,101809.075589,1,100,15,15,5,2,2,0,0,2,1,79,1,5,1,4 +66669,7,2,2,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,137143.403965,141946.841153,2,91,15,15,5,2,2,0,0,2,1,65,1,5,1,5 +66670,7,2,1,53,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,1,NA,1,2,1,1,2,2,1,2,1,3,13537.092442,13488.188749,1,90,8,8,1.43,7,7,2,0,0,1,23,2,4,1,3 +66671,7,2,1,46,NA,2,2,1,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,28726.575428,29019.719154,2,100,14,14,3.58,4,4,0,1,0,1,46,2,5,1,5 +66672,7,2,1,60,NA,1,1,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,2,2,2,2,8961.035147,9135.681188,2,94,15,15,5,2,2,0,0,1,1,60,1,5,1,5 +66673,7,2,2,25,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,50915.06085,50887.683329,3,92,9,9,2.22,5,5,1,0,2,1,66,2,1,1,1 +66674,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,2,2,NA,35334.703093,40990.264786,1,101,3,3,0.9,1,1,0,0,1,2,80,1,1,3,NA +66675,7,2,1,4,NA,2,2,1,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,18754.85406,20094.472571,2,102,6,6,1.12,4,4,1,1,0,1,38,2,2,1,3 +66676,7,2,2,10,NA,4,4,1,10,129,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,1,2,1,2,2,1,8362.256577,9003.967662,2,100,5,1,0,3,1,0,1,0,2,50,1,2,5,NA +66677,7,2,2,18,NA,2,2,2,18,219,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,18368.872199,19023.186366,2,91,10,10,2.95,4,4,0,1,0,2,18,1,3,NA,NA +66678,7,2,1,21,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,137038.746155,146586.432966,2,101,4,3,0.92,2,1,0,0,0,1,21,1,4,5,NA +66679,7,2,1,30,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,21280.199633,23174.199432,1,97,15,15,4.84,6,6,2,0,0,1,53,NA,NA,1,NA +66680,7,2,2,56,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15892.013862,19604.056525,3,90,12,12,NA,2,2,0,0,0,2,56,2,4,1,5 +66681,7,2,2,23,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,1,1,2,2,1,2,2,1,2,2,1,26999.643202,25671.572746,2,102,5,3,0.63,5,4,2,1,0,1,24,1,4,6,NA +66682,7,2,2,6,NA,2,2,1,6,74,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,14580.994397,14937.78024,2,93,4,4,0.97,3,3,0,1,0,1,38,2,3,1,3 +66683,7,2,1,58,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,NA,27609.8026,29259.766266,2,101,10,10,2.91,4,4,0,1,0,2,51,1,2,5,NA +66684,7,2,1,16,NA,5,6,1,16,193,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,7669.677276,8194.5967,2,92,15,15,4.59,4,4,0,2,0,2,48,1,5,1,5 +66685,7,2,1,29,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,2,6,NA,2,2,2,2,2,2,2,2,2,2,33592.259589,37970.446859,1,103,13,13,NA,4,4,2,0,0,2,27,2,2,6,NA +66686,7,2,1,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,130229.183917,130070.387937,2,103,77,77,NA,2,1,0,0,0,1,50,1,5,5,NA +66687,7,2,2,53,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,22969.116046,23497.145655,2,93,10,10,5,1,1,0,0,0,2,53,2,5,5,NA +66688,7,2,2,19,NA,5,7,1,19,230,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,15318.952876,15275.788411,1,98,2,1,0.18,4,1,0,0,0,2,20,NA,NA,5,NA +66689,7,1,2,71,NA,1,1,NA,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,22927.30561,0,1,101,5,5,0.51,7,7,0,3,2,1,75,2,1,1,1 +66690,7,2,1,65,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,14067.170863,14844.985608,2,102,4,4,0.67,4,4,0,0,2,2,20,1,1,NA,NA +66691,7,2,1,1,20,4,4,1,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10232.679671,10655.495346,2,97,7,7,1.72,5,5,1,2,0,1,32,1,4,1,4 +66692,7,2,2,1,17,1,1,1,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11582.174418,12474.053558,2,102,5,5,0.59,7,7,1,3,0,1,37,2,1,6,NA +66693,7,2,1,18,NA,5,6,2,19,229,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11506.937395,12095.740214,1,97,9,9,1.78,6,6,0,1,1,1,45,2,3,1,3 +66694,7,2,1,1,13,2,2,2,NA,14,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10487.35151,11236.440262,2,95,14,14,4.45,3,3,1,0,0,2,29,1,5,1,5 +66695,7,2,1,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,21102.441005,21214.570635,1,98,2,2,0.67,1,1,0,0,1,1,60,1,3,3,NA +66696,7,2,1,11,NA,3,3,2,11,137,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,19926.440922,21167.339982,1,94,7,7,0.94,7,7,1,4,0,2,46,2,5,1,5 +66697,7,2,2,13,NA,3,3,1,13,161,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,61800.07471,62604.355485,1,102,8,8,1.6,7,7,0,4,0,2,39,1,4,1,4 +66698,7,2,1,21,NA,3,3,2,NA,NA,2,NA,2,1,6,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,100807.076657,102674.431164,1,97,15,15,5,4,4,0,0,1,1,60,1,5,1,5 +66699,7,2,1,33,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105412.227726,111169.022023,2,101,7,7,1.88,4,4,0,2,0,2,36,1,4,1,5 +66700,7,2,1,10,NA,4,4,1,10,124,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7005.767895,7244.406666,2,95,15,15,3.85,7,7,0,3,1,2,62,1,4,2,NA +66701,7,2,1,75,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,1,1,NA,2,2,2,1,2,2,1,2,2,NA,13928.293734,14290.365569,2,92,5,5,1.36,2,2,0,0,2,1,75,2,1,1,1 +66702,7,2,1,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,NA,NA,NA,1,2,2,1,18533.049642,19445.722605,1,99,13,13,NA,4,4,1,0,0,2,26,1,4,4,NA +66703,7,2,1,2,NA,5,7,1,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8375.75457,9394.325143,2,95,15,15,5,3,3,1,0,0,1,50,1,5,1,NA +66704,7,1,2,28,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,4,5,3,1,2,2,1,2,2,NA,NA,NA,NA,15967.106149,0,2,103,1,1,0.04,2,2,0,1,0,2,28,1,4,5,NA +66705,7,2,2,3,NA,4,4,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9727.363166,10265.541082,2,97,3,3,0.4,6,6,2,3,0,2,25,1,2,5,NA +66706,7,2,1,8,NA,5,6,2,8,102,NA,NA,2,1,3,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7923.925927,8546.646915,1,94,7,7,1.79,4,4,0,1,0,1,59,2,4,1,4 +66707,7,2,1,13,NA,1,1,1,13,161,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24902.864049,25040.491572,1,94,5,5,0.94,4,4,0,2,0,2,37,2,3,1,2 +66708,7,2,2,55,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,20842.960361,20272.538073,3,91,12,12,NA,2,2,0,0,0,1,52,1,5,1,5 +66709,7,1,1,57,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,19478.845078,0,2,95,2,2,0.46,3,3,0,0,0,2,48,1,2,1,2 +66710,7,2,1,66,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,9082.311855,9548.677283,1,96,7,7,1.83,3,3,0,0,1,1,66,2,5,1,3 +66711,7,1,2,11,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5885.567617,0,1,93,9,9,1.77,7,7,0,2,0,2,56,NA,NA,5,NA +66712,7,2,1,31,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,14221.330587,14963.406508,3,90,12,12,NA,4,4,0,0,1,1,62,2,4,3,NA +66713,7,2,2,60,NA,3,3,1,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,35354.773526,36055.159401,2,103,5,5,1.2,3,3,0,0,2,1,66,2,2,1,2 +66714,7,2,2,21,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,54951.692454,55140.36098,2,102,15,15,3.92,5,5,0,0,0,1,19,1,4,NA,NA +66715,7,2,1,64,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,9535.353518,9609.942052,2,101,9,9,4.08,2,2,0,0,2,2,67,1,5,1,3 +66716,7,2,1,30,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,21317.283165,21013.422419,2,96,4,4,0.65,5,5,0,3,0,1,30,1,4,1,2 +66717,7,2,2,23,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,2,6,3,1,2,2,1,2,2,1,2,2,1,35710.33222,42634.350272,1,90,3,3,0.43,4,4,2,0,0,1,31,1,3,6,NA +66718,7,2,2,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,36950.912612,36817.49814,1,97,3,3,1.16,1,1,0,0,0,2,56,1,3,3,NA +66719,7,2,2,9,NA,3,3,2,9,111,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,55469.656717,57246.428971,1,101,9,9,2.6,4,4,0,2,0,2,38,1,4,1,4 +66720,7,2,1,58,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,15044.515884,14996.530888,3,90,15,15,4.89,5,5,0,0,0,2,57,2,3,1,3 +66721,7,2,2,47,NA,5,6,2,NA,NA,2,NA,2,2,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,12977.791943,13644.504126,2,100,15,15,5,3,3,0,1,0,1,48,2,5,1,5 +66722,7,2,2,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,110070.015649,114078.798472,3,91,15,15,5,4,4,1,1,0,2,41,1,5,1,5 +66723,7,2,1,14,NA,4,4,1,14,178,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17606.165994,18260.901254,2,101,6,6,0.96,5,5,0,4,0,2,36,1,4,4,NA +66724,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,23630.941535,1,95,6,6,1.35,3,3,1,0,0,1,31,1,5,1,5 +66725,7,2,1,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,124263.643735,125259.229556,2,95,8,8,3.53,2,2,0,0,0,1,57,1,4,1,4 +66726,7,2,1,18,NA,5,7,2,18,217,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,21852.102821,22875.024578,3,91,5,5,0.65,7,7,0,4,0,2,39,1,3,4,NA +66727,7,2,1,11,NA,5,7,2,11,135,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8246.426933,9050.014459,1,99,10,10,3.51,3,3,0,1,0,2,44,1,3,1,5 +66728,7,2,2,71,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,3,1,NA,1,2,1,1,2,1,1,2,1,NA,13750.49328,15781.175386,2,92,4,4,1.14,2,2,0,0,2,1,72,2,3,1,3 +66729,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,137143.403965,141946.841153,2,91,15,15,5,2,2,0,0,2,1,68,1,5,1,5 +66730,7,2,1,9,NA,5,7,1,9,117,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11661.909323,12350.149476,1,92,15,15,5,4,4,0,2,0,1,41,2,5,1,5 +66731,7,2,1,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,117009.557742,134978.408148,2,99,14,14,5,1,1,0,0,0,1,46,1,5,5,NA +66732,7,2,1,16,NA,5,6,1,16,200,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9099.087557,9564.682199,2,102,12,12,NA,3,3,0,1,0,1,57,2,5,1,5 +66733,7,2,2,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,20131.904783,20903.590301,1,92,15,15,5,2,2,0,0,2,2,80,1,5,1,NA +66734,7,2,2,16,NA,5,6,1,16,201,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,1,1,2,2,1,5760.953091,5914.685125,2,92,8,8,1.91,5,5,0,2,1,2,47,2,1,1,3 +66735,7,2,1,24,NA,5,7,1,NA,NA,2,NA,2,1,4,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,27271.751091,27454.884133,2,101,6,4,1.74,3,1,0,0,0,1,21,1,4,5,NA +66736,7,2,2,55,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16178.54595,16391.328992,1,93,15,15,5,3,3,0,0,1,1,63,1,5,1,5 +66737,7,2,2,14,NA,2,2,2,14,173,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,2,2,2,1,2,2,1,15087.58237,15620.049499,1,96,15,15,5,4,4,0,2,0,1,36,2,3,1,4 +66738,7,2,1,59,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,27240.276328,26969.79863,1,90,15,15,5,2,2,0,0,1,2,63,1,5,1,5 +66739,7,2,1,9,NA,5,6,1,9,116,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5438.768263,5891.907372,2,92,15,15,5,4,4,0,2,0,2,41,1,5,1,5 +66740,7,2,2,0,7,3,3,1,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20899.681083,21326.972731,1,92,10,10,2.1,6,6,1,1,0,2,29,1,4,1,2 +66741,7,2,2,73,NA,3,3,2,NA,NA,2,NA,2,1,8,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,19901.857177,20582.498016,2,94,1,1,0.08,1,1,0,0,1,2,73,2,4,3,NA +66742,7,2,1,4,NA,4,4,2,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7431.820906,7738.904727,1,99,7,7,1.53,5,5,2,0,0,2,37,1,4,1,3 +66743,7,2,2,68,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,1,4,NA,2,2,2,2,2,2,2,2,1,2,8725.210615,9440.710379,2,93,4,4,0.94,3,3,0,1,1,2,68,2,1,4,NA +66744,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,10479.637868,11727.843054,2,98,77,77,NA,1,1,0,0,1,1,80,1,2,2,NA +66745,7,2,2,16,NA,1,1,1,16,197,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,26325.414456,27702.295836,3,92,10,10,3.77,3,3,0,1,0,2,52,1,4,6,NA +66746,7,2,2,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,97925.559493,100647.602701,1,94,6,6,1.31,3,3,0,0,0,2,46,1,5,6,NA +66747,7,2,1,59,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,152467.08796,157587.268357,1,94,9,9,3.97,2,2,0,0,0,1,59,1,3,5,NA +66748,7,2,1,73,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,9662.124837,9885.036833,2,92,3,3,1.01,1,1,0,0,1,1,73,1,4,3,NA +66749,7,2,2,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,16741.034883,16841.712651,2,95,2,2,0.7,1,1,0,0,0,2,56,1,3,4,NA +66750,7,2,2,73,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,2,3,NA,2,2,2,1,2,2,2,2,2,NA,20922.745102,23195.943223,1,93,15,15,2.96,7,7,0,1,1,2,18,1,2,NA,NA +66751,7,2,2,9,NA,5,6,2,9,114,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,7722.982971,8635.133042,1,93,8,8,2.24,4,4,0,2,0,1,44,2,5,1,4 +66752,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,21491.090123,20964.324934,1,97,14,14,5,3,3,0,0,0,2,51,1,5,1,4 +66753,7,2,2,2,NA,3,3,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20820.221848,22126.060024,1,95,6,6,0.81,6,6,2,2,0,1,30,1,3,1,4 +66754,7,2,1,47,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,19371.546331,19651.186711,2,95,12,12,NA,2,2,0,1,0,1,47,1,4,2,NA +66755,7,2,2,33,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,1,6,3,2,2,2,2,2,2,1,2,2,2,27127.983961,26396.013964,2,90,6,6,0.96,5,5,1,1,0,1,39,2,2,1,NA +66756,7,1,1,11,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11567.189008,0,2,90,14,14,3.06,5,5,1,2,0,1,42,1,4,1,5 +66757,7,1,2,59,NA,5,6,NA,NA,NA,2,NA,2,1,99,NA,5,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,12649.084278,0,3,90,77,77,NA,2,2,0,0,1,1,65,2,3,1,5 +66758,7,2,2,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,60071.993203,65791.533402,1,95,99,99,NA,1,1,0,0,1,2,80,1,4,2,NA +66759,7,2,2,57,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,12449.932013,12144.773422,3,90,7,7,2.23,3,3,0,0,0,2,51,1,4,3,NA +66760,7,1,2,32,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,2,1,3,1,2,2,1,2,2,NA,NA,NA,NA,25241.487585,0,2,97,3,3,0.33,6,6,2,0,0,2,32,1,2,1,3 +66761,7,2,1,2,NA,2,2,1,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10057.661774,10071.252045,2,93,12,77,NA,3,1,1,1,0,2,43,1,5,3,NA +66762,7,2,1,16,NA,4,4,2,16,195,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11252.388648,11221.862262,1,99,7,7,1.89,3,3,0,1,0,1,50,1,5,1,2 +66763,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,54095.581484,61529.339683,2,91,7,7,1.61,4,4,0,0,3,1,65,1,3,6,NA +66764,7,2,1,2,NA,1,1,1,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12005.116852,11690.420322,3,92,14,14,2.29,7,7,2,0,0,2,50,2,1,1,9 +66765,7,2,1,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,1,2,1,1,2,NA,NA,NA,NA,40859.270352,45390.978638,2,101,8,8,4.22,1,1,0,0,1,1,80,1,3,2,NA +66766,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,94698.084211,103561.845332,1,101,14,14,4.5,3,3,0,1,0,1,39,1,2,1,5 +66767,7,2,2,1,21,1,1,1,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12260.86913,13033.834526,3,92,5,5,0.81,5,5,3,0,0,2,23,1,4,5,NA +66768,7,2,2,7,NA,4,4,1,7,85,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8282.497467,8610.715242,2,96,4,4,0.57,6,6,0,3,0,2,29,1,3,4,NA +66769,7,2,2,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,18101.423817,17611.874146,2,99,9,9,2.43,4,4,0,2,0,2,49,1,3,3,NA +66770,7,2,2,22,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,59974.233027,60510.443557,2,97,6,6,2.85,2,1,0,0,0,1,22,1,3,6,NA +66771,7,2,2,31,NA,2,2,2,NA,NA,2,NA,2,2,5,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,37439.743351,39040.415193,1,98,3,3,0.4,7,7,2,3,0,2,31,2,5,1,2 +66772,7,2,1,0,9,3,3,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7907.070371,7770.374311,2,95,2,2,0.26,3,3,1,0,0,2,54,1,3,2,NA +66773,7,1,1,24,NA,4,4,NA,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,16052.583048,0,2,99,77,77,NA,3,3,0,1,0,2,46,NA,NA,77,NA +66774,7,2,2,17,NA,1,1,1,18,216,2,NA,2,2,4,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,26325.414456,26852.811114,3,92,13,13,NA,4,4,0,2,0,2,50,2,1,5,NA +66775,7,2,1,7,NA,1,1,2,7,95,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15039.041447,16539.35823,3,91,7,7,1.23,6,6,2,2,0,1,36,2,1,1,1 +66776,7,2,2,2,NA,4,4,1,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7382.686927,8250.113235,2,100,14,14,4.59,3,3,1,0,0,1,30,NA,NA,1,4 +66777,7,2,1,39,NA,2,2,1,NA,NA,2,NA,2,1,3,NA,1,1,NA,2,2,2,1,2,2,2,2,1,2,37631.514869,37347.497931,2,93,3,3,0.43,4,4,0,0,0,1,45,2,2,6,NA +66778,7,2,1,11,NA,4,4,2,11,142,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14125.862146,14370.909235,1,97,15,15,5,4,4,0,1,0,1,40,1,4,1,4 +66779,7,2,1,14,NA,5,6,1,14,176,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,1,1,2,2,1,6121.087833,6878.034577,2,92,3,3,0.4,6,6,0,1,2,1,78,2,1,1,1 +66780,7,2,1,0,2,1,1,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,5588.585837,5533.355248,1,102,6,6,0.8,7,7,3,3,0,2,34,2,3,1,1 +66781,7,2,1,68,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,11243.205109,11633.337706,1,90,4,4,1.12,2,2,0,0,2,1,68,2,4,1,1 +66782,7,2,2,19,NA,4,4,2,19,236,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10848.628906,11198.221038,1,99,4,4,0.41,7,7,2,4,0,2,43,1,4,4,NA +66783,7,2,2,0,0,3,3,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10692.488346,11331.751026,1,101,4,4,0.78,4,4,1,2,0,2,32,1,3,3,NA +66784,7,2,1,9,NA,4,4,1,9,109,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9261.557132,9701.953496,2,100,7,7,1.34,5,5,0,2,0,2,53,1,4,4,NA +66785,7,2,2,60,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,1,1,NA,1,2,1,1,2,1,1,2,2,NA,18002.759054,18633.946783,2,102,15,15,3.82,5,5,0,1,2,1,60,2,2,1,1 +66786,7,2,1,13,NA,5,6,2,13,163,NA,NA,2,1,3,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7046.884472,7540.251296,2,100,4,4,0.44,7,7,1,2,2,1,71,2,1,1,1 +66787,7,2,2,8,NA,2,2,1,8,99,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,12307.832776,12608.996083,2,93,5,5,0.89,4,4,0,2,0,1,42,NA,NA,6,NA +66788,7,2,1,31,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,39040.678458,39049.131011,2,98,99,99,NA,4,4,1,0,1,2,68,1,9,2,NA +66789,7,2,2,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,30505.56355,32886.600907,1,97,6,6,1.03,6,6,2,2,0,2,38,1,5,1,4 +66790,7,2,1,4,NA,4,4,1,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10040.033098,11070.778245,1,100,3,3,0.73,3,3,2,0,0,2,39,1,3,5,NA +66791,7,2,2,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,23845.8146,22808.13483,1,98,12,77,NA,4,1,0,0,0,1,21,1,4,6,NA +66792,7,2,2,4,NA,2,2,2,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11429.37307,12618.334582,2,90,6,6,1.35,3,3,1,0,0,1,31,1,3,1,4 +66793,7,2,1,50,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,16851.334496,17382.882475,2,101,7,7,2.64,2,2,0,0,1,2,70,1,3,4,NA +66794,7,2,1,10,NA,5,6,2,10,131,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6850.601671,7388.972861,3,91,14,14,4.03,4,4,0,2,0,1,51,2,4,1,5 +66795,7,2,1,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,34810.007519,37700.123082,1,94,3,3,0.37,5,5,0,3,0,2,29,1,4,4,NA +66796,7,2,1,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,79632.40863,82653.579173,1,95,9,9,4.08,2,2,0,0,2,2,65,1,5,1,3 +66797,7,2,2,30,NA,4,4,2,NA,NA,2,NA,2,1,4,NA,4,1,1,1,2,2,1,2,2,1,2,2,1,27303.803575,27793.15685,1,96,7,7,1.52,4,4,0,2,0,2,30,2,4,1,5 +66798,7,2,2,56,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,4,1,NA,1,2,2,1,2,2,2,2,2,2,34082.505027,35315.234687,2,92,2,2,0.4,3,3,0,0,0,1,50,2,4,1,4 +66799,7,2,1,6,NA,4,4,2,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10665.048307,10792.905577,1,96,8,8,2.78,3,3,0,2,0,2,34,1,4,3,NA +66800,7,2,1,11,NA,1,1,1,11,135,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,13742.678011,2,98,4,4,0.75,4,4,0,2,0,2,33,1,2,5,NA +66801,7,2,1,7,NA,3,3,2,7,91,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,57057.523607,61384.115788,1,101,14,14,4.21,4,4,1,1,0,2,37,1,5,1,5 +66802,7,2,2,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,NA,NA,NA,NA,26763.110196,28578.287397,2,94,5,5,1.3,3,3,0,1,0,1,43,1,3,6,NA +66803,7,2,2,22,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,1,1,2,2,1,2,2,NA,NA,NA,NA,43813.24867,44924.427845,1,98,4,4,0.66,4,4,2,0,0,2,22,1,4,6,NA +66804,7,2,1,74,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,12824.368043,13581.00423,2,101,3,3,1.13,1,1,0,0,1,1,74,1,1,2,NA +66805,7,2,2,2,NA,4,4,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7382.686927,8048.181894,2,100,14,4,0.43,7,7,1,3,1,2,62,1,3,5,NA +66806,7,1,2,29,NA,5,6,NA,NA,NA,2,NA,1,1,NA,NA,5,1,3,1,2,2,1,2,2,NA,NA,NA,NA,14698.806915,0,2,99,15,15,5,2,2,0,0,0,1,30,NA,NA,1,5 +66807,7,2,1,29,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,1,1,2,2,1,9177.295801,9603.338468,2,92,3,3,0.45,4,4,0,0,1,1,64,2,1,1,1 +66808,7,2,1,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,5950.162866,6182.885443,2,95,5,5,1.05,3,3,0,1,1,1,63,1,2,1,3 +66809,7,1,1,68,NA,2,2,NA,NA,NA,2,NA,2,1,6,NA,3,2,NA,2,2,2,1,2,2,NA,NA,NA,NA,9693.846555,0,2,93,5,5,1.04,4,4,0,1,1,1,68,2,3,2,NA +66810,7,2,1,14,NA,4,4,1,15,180,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16147.713323,16532.569027,1,92,NA,1,0.18,4,3,0,2,0,2,56,1,4,4,NA +66811,7,2,1,29,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,9177.295801,9548.31812,2,92,77,77,NA,4,4,0,0,1,1,20,1,2,5,NA +66812,7,2,1,15,NA,4,4,1,15,186,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10569.808278,10821.723266,2,92,10,10,4.76,2,2,0,1,0,2,40,1,5,4,NA +66813,7,2,1,9,NA,4,4,2,9,111,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8655.162127,9330.305761,2,95,10,10,3.67,3,3,0,1,0,1,43,1,4,1,4 +66814,7,1,1,34,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,23284.536512,0,1,97,7,7,1.74,4,4,2,0,0,1,34,1,5,1,5 +66815,7,1,1,4,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,58116.402634,0,1,97,15,15,5,5,5,2,0,1,1,43,1,5,1,5 +66816,7,2,1,0,8,1,1,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,4461.618312,4741.503802,2,103,8,8,1.29,7,7,3,1,0,2,53,2,2,4,NA +66817,7,2,2,72,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,22820.082463,26177.958982,1,94,2,2,0.83,1,1,0,0,1,2,72,1,3,2,NA +66818,7,2,2,8,NA,3,3,2,8,105,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,39976.778207,40067.73727,2,100,15,15,5,3,3,0,1,0,1,38,1,4,1,4 +66819,7,2,1,20,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,102928.893739,104528.537732,1,101,6,6,0.97,7,7,2,1,0,1,43,1,2,1,NA +66820,7,1,2,11,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7710.907339,0,2,99,6,6,0.94,7,7,0,4,0,2,32,1,3,1,3 +66821,7,2,2,30,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,3,1,2,2,2,2,1,2,2,2,2,2,2,41791.57979,45058.092516,2,102,6,6,1.12,4,4,1,1,0,1,38,2,2,1,3 +66822,7,2,1,19,NA,5,6,1,19,230,2,NA,2,1,3,14,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8431.995003,8807.000735,1,95,3,3,0.71,3,3,0,0,0,1,57,2,2,1,4 +66823,7,2,2,5,NA,5,6,2,5,71,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8173.816615,8894.95474,1,97,15,15,5,4,4,1,1,0,1,44,2,5,1,5 +66824,7,2,2,14,NA,2,2,1,14,175,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18728.878486,20545.934985,2,93,10,10,3.4,3,3,0,1,0,1,51,1,3,1,4 +66825,7,2,1,8,NA,2,2,2,8,107,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11546.167056,12698.029578,1,90,7,7,1.56,4,4,1,1,0,2,37,1,2,77,NA +66826,7,2,2,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,11355.3308,11862.334174,1,96,9,9,3.97,2,2,0,0,2,1,72,1,4,1,5 +66827,7,2,1,11,NA,5,6,2,11,140,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,9175.735601,9896.833095,3,91,15,15,5,4,4,0,2,0,1,44,2,5,1,5 +66828,7,2,1,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,116839.212439,120894.200639,2,97,9,9,4.08,2,2,0,0,0,2,24,1,4,1,3 +66829,7,2,1,8,NA,5,6,2,8,103,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6262.834446,6786.531245,3,90,15,15,5,4,4,0,2,0,2,41,2,5,1,5 +66830,7,2,2,12,NA,5,6,1,12,154,NA,NA,2,1,4,6,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,5668.184078,6218.795289,2,92,99,2,0.31,7,4,3,3,1,1,61,2,1,1,3 +66831,7,2,1,0,0,1,1,1,NA,0,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,7222.23638,7675.3005,1,94,5,5,0.57,7,7,2,1,0,1,58,2,1,1,1 +66832,7,2,1,75,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,7608.031426,7606.53553,1,99,5,5,1.84,1,1,0,0,1,1,75,1,5,2,NA +66833,7,2,2,39,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,3,1,2,2,2,2,1,2,2,1,2,2,2,41067.133288,41493.788594,2,91,3,3,0.73,3,3,0,0,0,2,22,2,2,5,NA +66834,7,2,1,31,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14366.887416,15220.688498,3,90,15,15,5,5,5,0,1,1,2,61,1,5,2,NA +66835,7,2,2,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,NA,15800.216306,15663.050285,2,99,2,2,0.19,6,6,0,1,0,1,59,1,2,5,NA +66836,7,2,1,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,35522.958395,37225.553122,1,102,13,1,0,2,1,0,0,0,1,22,1,4,5,NA +66837,7,2,1,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,16489.238752,16479.116235,1,96,6,6,1.21,4,4,2,0,0,1,24,1,4,1,3 +66838,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,152978.138087,155477.549686,1,101,7,7,3.13,1,1,0,0,1,2,65,1,4,2,NA +66839,7,2,2,25,NA,5,6,2,NA,NA,2,NA,2,2,1,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,11696.173591,12195.620792,1,93,15,1,0.41,6,1,0,0,0,1,34,2,5,5,NA +66840,7,2,2,10,NA,3,3,1,10,123,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,61059.578287,60876.138307,1,98,15,15,4.34,4,4,0,2,0,2,52,1,3,1,5 +66841,7,2,1,3,NA,5,7,1,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,26980.605125,29855.353305,1,94,2,2,0.3,5,5,1,2,0,1,23,1,1,6,NA +66842,7,1,2,0,9,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3861.876549,0,1,96,15,15,4.81,5,5,1,2,0,1,33,1,5,1,3 +66843,7,2,1,5,NA,5,7,1,5,63,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10859.969359,11974.89205,1,98,7,7,1.52,4,4,2,0,0,1,30,1,3,1,4 +66844,7,2,1,18,NA,4,4,1,18,221,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16147.713323,16270.487634,1,92,77,77,NA,5,5,1,2,0,2,41,1,3,5,NA +66845,7,2,1,1,15,3,3,1,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,47507.757497,53600.040598,1,101,14,14,3.15,5,5,2,1,0,1,35,1,4,1,5 +66846,7,2,1,2,NA,4,4,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6376.965739,7031.647494,2,100,8,8,2.7,3,3,1,0,0,2,41,1,4,1,3 +66847,7,2,1,31,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,16505.268729,17550.815541,3,91,14,14,4.32,3,3,1,0,0,1,31,2,3,1,4 +66848,7,2,2,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,141821.601523,151144.430607,1,90,15,15,5,4,4,0,2,1,2,52,1,5,1,NA +66849,7,2,1,0,11,5,7,2,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7317.981812,7846.84086,1,97,15,15,3.7,5,5,1,1,0,2,21,1,4,5,NA +66850,7,2,2,71,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,19603.448069,21893.680196,2,98,5,5,0.59,7,7,2,1,2,2,71,1,2,1,1 +66851,7,2,2,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,143866.32507,144700.959982,1,97,15,15,5,1,1,0,0,0,2,46,1,4,2,NA +66852,7,2,1,4,NA,4,4,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8569.438441,9098.434789,2,99,7,7,1.19,6,6,1,3,0,2,38,1,3,5,NA +66853,7,2,1,2,NA,2,2,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11030.983881,11818.903113,2,93,7,7,1.56,4,4,1,1,0,1,35,2,4,1,4 +66854,7,2,2,34,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,26021.868354,25835.926172,2,96,3,3,0.47,6,6,0,4,0,1,36,1,4,1,4 +66855,7,2,1,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17606.560681,17743.688949,2,95,6,6,1.7,2,2,0,0,0,2,54,1,4,2,NA +66856,7,2,2,2,NA,1,1,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,12871.484115,13281.030392,2,102,5,5,0.89,4,4,1,1,0,2,28,2,2,1,2 +66857,7,2,2,15,NA,5,6,2,15,181,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6486.356303,6952.179218,1,91,7,7,1.57,4,4,0,3,0,2,38,2,2,3,NA +66858,7,2,2,6,NA,4,4,1,6,83,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8362.256577,8929.488766,2,100,7,7,2.37,3,3,0,1,1,2,45,1,5,1,NA +66859,7,2,1,2,NA,4,4,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6441.733603,6839.385512,1,91,6,6,0.99,5,5,3,0,0,2,33,2,3,1,4 +66860,7,2,1,21,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,51965.135941,60918.873896,2,101,2,1,0.37,2,1,0,0,0,1,21,1,4,5,NA +66861,7,2,1,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17583.693727,17478.873252,2,95,6,6,1.36,3,3,0,1,1,2,62,1,4,5,NA +66862,7,2,1,10,NA,4,4,2,10,128,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9699.683862,9681.447258,2,101,2,2,0.27,6,6,0,3,0,2,45,1,2,5,NA +66863,7,2,1,77,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,70642.066815,74773.070249,2,96,12,12,NA,2,2,0,0,2,1,77,1,5,1,4 +66864,7,2,1,16,NA,4,4,1,16,196,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11666.009872,12200.765691,2,100,14,4,0.43,7,7,1,3,1,2,62,1,3,5,NA +66865,7,2,2,15,NA,1,1,1,15,184,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22424.988432,22856.096824,1,94,5,5,0.87,4,4,0,2,0,2,41,2,4,1,1 +66866,7,2,1,5,NA,4,4,1,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8385.172131,8641.288503,2,93,5,5,1.04,4,4,1,1,0,1,29,1,3,6,NA +66867,7,1,1,18,NA,1,1,NA,NA,NA,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,22721.243258,0,2,102,7,7,1.04,7,7,1,2,0,2,37,2,1,1,2 +66868,7,2,1,8,NA,5,6,2,8,100,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9928.619925,10708.884665,1,91,15,15,5,4,4,1,1,0,1,43,2,5,1,5 +66869,7,2,2,54,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,24719.680932,24496.884814,1,97,15,15,5,2,2,0,0,0,1,59,1,5,1,5 +66870,7,2,2,12,NA,1,1,1,12,153,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22424.988432,24600.637711,1,94,15,15,4.37,7,7,0,4,1,1,58,1,4,1,5 +66871,7,2,1,26,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,32375.321924,32878.474997,1,95,7,6,2.6,5,1,1,2,0,1,26,1,4,6,NA +66872,7,2,1,37,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,22424.102245,23525.946014,1,92,14,14,3.47,4,4,0,2,0,1,37,1,5,1,5 +66873,7,2,2,54,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,163102.567998,168673.834909,1,99,12,12,NA,2,2,0,0,0,1,55,1,5,1,4 +66874,7,2,2,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,53955.606082,55085.619185,2,100,15,15,5,4,4,1,1,0,1,29,1,4,1,4 +66875,7,2,2,69,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,4,1,NA,1,2,1,1,2,1,1,2,1,3,10760.495249,11342.54557,3,90,4,4,1.22,2,2,0,0,2,2,69,2,4,1,1 +66876,7,2,2,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,134694.414609,139412.076132,2,91,15,15,5,3,3,0,0,2,2,62,1,4,1,4 +66877,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,56180.550638,64695.241485,2,98,5,5,1.63,2,2,0,0,2,1,80,1,3,1,3 +66878,7,2,2,15,NA,1,1,1,15,180,NA,NA,2,2,4,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21062.314667,21780.985882,3,91,3,3,0.39,6,6,1,1,0,1,39,2,1,6,NA +66879,7,2,2,42,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,139800.409559,144765.126463,1,100,15,15,4.56,4,4,0,2,0,2,42,1,4,1,3 +66880,7,2,1,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,18452.546861,18969.498532,1,93,12,12,NA,3,3,0,1,0,2,48,1,3,5,NA +66881,7,2,2,40,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,20303.639991,20411.004471,1,97,15,15,2.33,7,7,2,4,0,2,40,2,5,1,4 +66882,7,2,2,22,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,50922.951701,51648.728485,1,92,15,15,5,3,3,0,0,1,1,57,1,3,1,4 +66883,7,2,1,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,109181.566304,117126.933999,2,91,7,7,2.31,2,2,0,0,0,2,58,1,5,3,NA +66884,7,2,2,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,20691.085146,20754.703375,2,91,4,4,1.22,2,2,0,0,0,1,53,1,4,1,4 +66885,7,2,1,76,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,6856.239408,7260.756717,2,95,5,5,0.87,4,4,0,0,2,2,77,1,2,1,1 +66886,7,2,1,5,NA,2,2,2,5,71,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14318.290734,14771.527769,3,91,6,6,0.83,6,6,1,3,0,1,37,1,4,1,4 +66887,7,1,2,6,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9122.654131,0,1,96,9,9,2.18,5,5,1,1,0,1,26,1,4,1,4 +66888,7,2,2,1,18,4,4,1,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7281.670423,7938.059494,2,96,4,4,0.4,7,7,3,2,0,2,25,1,2,5,NA +66889,7,2,1,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,19541.667675,22573.12843,2,95,1,1,0.36,1,1,0,0,0,1,59,1,5,5,NA +66890,7,2,1,14,NA,1,1,2,14,178,NA,NA,2,2,4,8,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,22044.437334,22214.434256,1,97,8,8,1.45,6,6,2,2,0,2,36,2,2,1,1 +66891,7,2,2,15,NA,3,3,2,15,191,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,132630.209478,136474.31291,1,97,15,15,5,4,4,0,2,0,1,49,1,5,1,5 +66892,7,2,2,1,16,3,3,1,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,46281.468826,49830.1764,1,92,9,4,1,7,3,2,1,0,1,45,1,4,2,NA +66893,7,2,1,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,168185.448935,169079.116739,1,95,9,9,4.01,2,2,0,0,1,1,60,1,3,1,3 +66894,7,1,2,1,12,5,6,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4938.043177,0,1,91,9,9,2.97,3,3,1,0,0,1,31,2,5,1,5 +66895,7,2,1,10,NA,2,2,2,10,121,NA,NA,2,2,3,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10490.055059,10676.813132,3,90,4,4,0.63,5,5,0,3,0,1,45,2,4,1,4 +66896,7,2,1,64,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,8886.717016,9234.294003,2,101,3,3,0.78,3,3,0,0,1,1,64,1,2,1,3 +66897,7,2,2,28,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,1,1,2,2,1,2,2,1,2,2,1,18097.801029,17642.065425,2,100,9,9,3.24,3,3,1,0,0,1,32,1,3,1,4 +66898,7,2,2,45,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,24056.374863,23398.009058,2,99,6,6,2.6,1,1,0,0,0,2,45,1,5,5,NA +66899,7,2,2,11,NA,1,1,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,2,1,1,1,2,1,1,2,2,1,22662.992756,24846.804377,2,98,13,13,NA,5,5,0,2,0,1,48,2,1,1,2 +66900,7,2,2,10,NA,4,4,1,10,121,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12120.418061,13050.526646,2,101,1,1,0.15,3,3,0,2,0,2,58,1,3,5,NA +66901,7,2,2,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11449.435533,12331.980143,2,99,6,6,1.98,2,2,0,0,2,1,77,1,3,1,3 +66902,7,2,2,32,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,1,1,2,2,1,2,2,1,2,2,1,25691.564623,26900.812132,1,90,3,3,0.63,3,3,1,1,0,2,32,1,4,5,NA +66903,7,2,2,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,74517.751389,74746.868777,2,94,7,7,1.17,6,6,0,3,0,1,40,1,3,1,5 +66904,7,2,1,27,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,11507.810748,11238.9991,2,99,NA,77,NA,7,7,1,0,1,2,51,1,2,1,3 +66905,7,2,2,25,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,18801.993237,20649.846581,2,96,8,5,2.2,2,1,0,0,0,2,25,2,5,5,NA +66906,7,2,2,11,NA,4,4,2,11,135,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10888.493631,11627.08662,1,101,1,1,0.21,3,3,0,2,0,2,32,1,4,5,NA +66907,7,2,2,37,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,2,1,2,1,2,2,1,2,2,NA,NA,NA,NA,11105.558187,11128.234812,3,91,14,14,2.5,6,6,1,1,1,2,37,2,2,1,5 +66908,7,2,1,52,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,17824.805721,17767.952896,2,92,2,2,0.57,2,2,0,0,0,2,56,1,3,2,NA +66909,7,2,2,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,2,1,2,2,1,2,2,1,2,2,1,52280.406546,52747.829022,1,95,3,1,0.28,2,1,0,0,0,1,27,1,3,5,NA +66910,7,1,2,8,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14465.997114,0,2,96,3,3,0.24,7,7,2,3,1,2,40,1,3,3,NA +66911,7,2,1,40,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,2,2,1,22697.846242,22639.540149,2,100,4,4,1.47,2,1,0,0,0,2,38,1,3,2,NA +66912,7,2,2,14,NA,5,7,2,14,172,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6386.337576,6452.948976,1,93,15,15,4.59,4,4,0,2,0,2,45,1,5,1,5 +66913,7,2,1,9,NA,2,2,2,9,119,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8516.07921,8708.134582,3,90,14,14,3.69,4,4,0,2,0,2,49,1,4,1,4 +66914,7,2,2,51,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,12162.017722,12226.329772,2,92,12,9,5,7,1,0,0,2,1,53,2,3,1,3 +66915,7,2,1,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,32461.799549,32422.216998,1,101,4,4,0.99,2,2,0,0,0,2,51,1,5,1,2 +66916,7,2,1,1,23,4,4,1,NA,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6376.965739,6859.539552,1,96,5,5,0.53,7,7,2,2,0,2,38,1,9,6,NA +66917,7,2,1,7,NA,1,1,1,7,89,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16747.549238,17012.570163,2,96,7,7,1.34,5,5,0,2,0,1,24,2,2,5,NA +66918,7,2,1,72,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,32916.648979,34960.567692,2,103,15,15,3.44,7,7,0,1,2,2,79,1,3,2,NA +66919,7,2,2,13,NA,1,1,1,13,158,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,27070.679378,27847.54398,1,92,14,14,3.15,5,5,1,2,0,1,34,1,4,1,4 +66920,7,2,1,77,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,17113.447343,18123.138698,2,101,6,6,2.04,2,2,0,0,2,1,77,1,1,1,2 +66921,7,2,2,34,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,38679.12951,52110.643741,1,98,3,3,0.86,2,2,0,1,0,2,34,1,4,5,NA +66922,7,2,2,12,NA,4,4,2,12,149,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12180.874919,12414.982997,1,91,15,15,5,6,6,1,2,0,2,42,2,5,1,5 +66923,7,2,2,47,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17623.300255,17855.084956,1,100,15,15,5,3,3,0,0,0,1,47,2,5,1,5 +66924,7,2,1,36,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,2,2,2,NA,NA,NA,NA,28519.067294,31362.329476,1,96,15,15,5,4,4,0,2,0,1,36,2,3,1,4 +66925,7,2,2,23,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,46764.716703,54568.74353,1,99,2,2,0.46,1,1,0,0,0,2,23,1,5,5,NA +66926,7,2,1,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,175544.769665,178639.415011,1,91,15,15,5,2,1,0,0,0,1,51,1,3,1,NA +66927,7,2,1,20,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,51965.135941,52673.460841,2,101,1,1,0.11,2,1,0,0,0,1,19,NA,NA,NA,NA +66928,7,2,2,64,NA,5,6,1,NA,NA,2,NA,2,1,1,NA,1,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,7118.69664,7393.452915,1,103,77,77,NA,6,6,0,2,2,1,70,NA,NA,1,1 +66929,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,12331.419303,12882.003985,2,95,14,14,5,2,2,0,0,2,1,73,1,5,1,4 +66930,7,2,1,41,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,3,1,NA,2,2,2,2,2,2,2,2,2,2,31640.296506,31576.726829,2,94,15,15,5,3,3,0,0,0,1,41,2,3,1,NA +66931,7,2,1,16,NA,3,3,2,16,194,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,68148.957861,67253.324127,1,93,15,15,3.92,5,5,0,1,0,2,54,1,5,1,5 +66932,7,2,1,60,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,10420.275705,10501.786379,2,96,7,7,2.58,2,2,0,0,1,2,55,1,4,3,NA +66933,7,2,1,34,NA,5,6,1,NA,NA,2,NA,2,7,77,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16770.056318,16992.484825,1,103,12,12,NA,2,2,0,0,0,1,34,2,5,1,5 +66934,7,2,2,12,NA,5,6,1,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7952.410952,8162.028124,2,98,9,9,2.29,5,5,0,2,0,1,36,1,4,1,4 +66935,7,2,1,1,19,5,7,2,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6370.584728,7024.611386,2,94,10,10,3.51,3,3,1,0,0,2,30,2,5,1,NA +66936,7,2,1,28,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,3,1,NA,1,2,2,1,2,1,1,2,2,3,14313.345971,14892.009983,3,91,6,6,1.12,4,4,0,0,2,1,69,2,3,1,1 +66937,7,2,1,37,NA,3,3,1,NA,NA,2,NA,2,2,6,NA,2,5,NA,1,2,2,2,2,2,1,2,2,1,63557.943986,66218.577849,2,103,12,12,NA,3,3,0,0,1,1,60,2,2,1,2 +66938,7,2,2,62,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,1,2,1,NA,NA,NA,1,2,1,NA,17248.011865,22917.917651,1,97,14,14,2.29,7,7,1,2,2,1,40,2,1,1,1 +66939,7,1,2,15,NA,2,2,NA,NA,NA,NA,NA,2,2,3,10,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,18053.382334,0,1,90,10,10,3.67,3,3,0,1,0,2,40,2,2,77,NA +66940,7,2,2,14,NA,1,1,1,14,168,NA,NA,1,1,NA,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,20117.170449,20520.192725,1,94,2,2,0.27,5,5,0,4,0,2,47,2,1,4,NA +66941,7,2,1,25,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,42077.383821,43345.097117,1,102,14,14,4.32,3,3,1,0,0,1,25,1,4,1,4 +66942,7,2,2,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,1,2,1,2,2,1,2,2,1,2,2,1,20562.749362,20582.941069,2,95,6,6,0.9,6,6,1,1,0,1,49,1,1,1,1 +66943,7,2,2,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,27934.372045,29727.618806,1,101,7,2,0.67,3,1,0,0,1,2,69,1,4,2,NA +66944,7,2,1,72,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,79754.311902,85970.307401,1,97,6,6,2.15,2,2,0,0,2,1,72,1,5,1,NA +66945,7,2,2,57,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,24646.971819,24775.50368,1,102,6,6,1.48,3,3,0,0,1,2,57,2,1,1,4 +66946,7,2,1,32,NA,5,7,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19384.896286,21271.615575,1,94,6,6,1.33,4,4,2,0,0,2,29,1,2,1,4 +66947,7,2,1,51,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,29883.483388,30470.971906,1,102,9,9,3.74,2,2,0,0,0,2,45,1,4,1,2 +66948,7,2,1,14,NA,5,6,1,15,180,NA,NA,2,1,3,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6140.97758,6391.148404,1,100,5,5,0.74,6,6,0,3,0,1,40,2,3,1,4 +66949,7,2,2,54,NA,5,7,1,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,21760.356138,21875.42365,1,92,14,14,5,2,2,0,0,0,1,56,2,4,1,4 +66950,7,2,2,48,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,111065.717962,123974.675785,2,94,3,3,0.54,4,4,0,1,0,2,48,1,3,1,3 +66951,7,2,2,8,NA,1,1,1,8,97,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,2,2,2,2,17053.854294,17496.484818,3,92,7,7,0.93,7,7,1,3,0,2,20,1,3,1,1 +66952,7,2,2,4,NA,3,3,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27257.164734,28409.87702,1,101,6,6,0.87,6,6,2,2,0,2,23,1,4,6,NA +66953,7,2,1,10,NA,1,1,1,10,124,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13837.588743,14876.800627,1,92,8,8,2.62,3,3,0,1,0,1,41,2,3,1,9 +66954,7,2,2,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,81857.569857,83650.947594,2,92,15,10,5,4,1,0,0,0,1,28,1,5,5,NA +66955,7,2,1,38,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,39040.678458,39049.131011,2,98,15,15,5,3,3,0,1,0,1,38,1,4,1,3 +66956,7,2,2,21,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,41018.498876,43893.586815,2,98,3,3,0.54,3,3,1,0,0,1,23,1,3,1,2 +66957,7,2,1,62,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,136361.754972,134969.826866,1,98,6,6,1.75,2,2,0,0,2,1,62,1,4,1,3 +66958,7,2,1,52,NA,3,3,1,NA,NA,2,NA,2,2,5,NA,3,1,NA,2,2,2,1,2,2,2,2,2,2,24127.240234,24937.486066,2,93,6,6,1.65,2,2,0,0,0,1,52,2,3,1,5 +66959,7,2,2,43,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,1,5,2,2,2,2,2,2,2,1,2,2,2,33767.584626,33943.679708,2,102,6,3,0.54,6,4,0,4,0,2,43,2,1,5,NA +66960,7,2,1,1,20,5,6,1,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8674.760516,9729.693026,1,94,10,10,3.04,4,4,2,0,0,2,30,1,4,1,5 +66961,7,2,1,36,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19144.719218,19850.422567,1,101,10,10,3.67,3,3,1,0,0,1,36,2,5,1,5 +66962,7,2,1,1,22,4,4,1,NA,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5930.749873,6003.089312,2,93,7,7,1.83,3,3,1,0,0,2,34,2,3,1,3 +66963,7,1,2,70,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,64383.454106,0,1,90,12,12,NA,2,2,0,0,2,1,75,1,5,1,4 +66964,7,2,2,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,100264.332334,101647.830414,1,99,7,7,4.09,1,1,0,0,0,2,27,1,5,5,NA +66965,7,2,1,2,NA,3,3,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21718.301271,24503.405153,1,94,5,5,0.74,5,5,1,1,0,2,24,1,3,1,4 +66966,7,2,2,14,NA,1,1,2,14,170,NA,NA,2,2,4,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19850.979841,20551.555585,1,97,3,3,0.5,5,5,0,2,0,1,56,2,2,6,NA +66967,7,2,1,33,NA,2,2,2,NA,NA,2,NA,2,2,2,NA,2,6,NA,2,2,2,2,2,2,NA,NA,NA,NA,39073.76885,38778.866765,2,99,99,3,0.79,4,2,0,0,0,2,42,2,4,5,NA +66968,7,2,2,11,NA,1,1,1,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21075.336925,21477.798111,1,92,14,14,3.15,5,5,1,2,0,1,34,1,4,1,4 +66969,7,2,1,10,NA,1,1,1,10,125,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8828.580268,8822.70874,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +66970,7,2,1,18,NA,1,1,2,18,222,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,26704.187335,27112.370182,1,95,8,5,1.36,3,2,0,0,0,1,50,1,9,6,NA +66971,7,2,2,8,NA,5,6,1,8,107,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9620.269705,10273.007637,2,91,15,15,4.63,7,7,1,2,0,1,36,2,4,1,3 +66972,7,1,1,1,13,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6376.965739,0,2,100,2,2,0.25,4,4,2,1,0,2,39,1,2,5,NA +66973,7,2,2,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,111065.717962,111174.779569,2,94,10,10,4.42,2,2,0,0,0,2,49,1,4,1,1 +66974,7,2,1,72,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,11034.04089,11599.454609,2,95,5,5,1.45,2,2,0,0,2,1,72,1,3,1,3 +66975,7,2,1,17,NA,2,2,1,17,205,2,NA,2,1,4,9,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,18206.126374,19392.083527,2,93,3,3,0.58,4,4,0,1,1,1,65,2,1,3,NA +66976,7,2,1,24,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,35338.972549,37196.45724,2,90,6,6,1.21,4,4,0,0,0,2,59,2,1,6,NA +66977,7,2,2,10,NA,4,4,2,10,124,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8579.490652,8919.477637,2,97,4,4,0.57,5,5,1,3,0,2,33,1,3,5,NA +66978,7,2,1,7,NA,5,6,2,7,86,NA,NA,2,2,3,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9928.619925,10758.848877,1,91,14,14,3.69,4,4,1,1,0,2,29,2,5,1,5 +66979,7,2,2,16,NA,5,6,2,16,200,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,1,1,2,2,1,7588.544207,8133.521636,3,91,6,6,1.34,4,4,0,2,0,1,52,2,3,1,1 +66980,7,2,1,7,NA,5,6,2,7,88,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9928.619925,10758.848877,1,91,2,2,0.32,3,3,1,1,0,2,28,1,4,77,NA +66981,7,2,1,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,27356.080541,27921.272721,1,101,4,4,0.84,3,3,0,1,0,1,42,1,4,1,4 +66982,7,2,2,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,21924.837493,22597.469895,2,95,7,7,1.13,6,6,0,3,1,1,52,1,4,1,4 +66983,7,2,2,9,NA,4,4,1,9,113,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,8362.256577,9003.967662,2,100,1,1,0.08,5,5,1,2,0,2,19,1,3,NA,NA +66984,7,2,1,75,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,53663.609619,56801.747638,2,93,10,10,3.61,3,3,0,0,2,1,75,1,4,1,4 +66985,7,2,2,77,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,3,2,NA,1,2,2,1,2,2,2,2,1,NA,18241.877822,19614.829564,2,93,3,3,0.66,2,2,0,0,2,2,69,2,4,3,NA +66986,7,2,1,9,NA,5,6,1,9,116,NA,NA,1,1,NA,4,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,6254.093492,6623.185563,2,92,5,5,0.64,7,7,1,2,1,1,66,2,1,1,3 +66987,7,2,1,0,8,5,6,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5507.575232,5738.278852,1,97,15,15,4.84,6,6,2,0,0,1,53,NA,NA,1,NA +66988,7,2,2,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,19870.689174,19326.875638,1,96,3,3,0.93,2,2,0,1,0,2,40,1,5,5,NA +66989,7,2,2,1,14,1,1,1,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11512.764389,11879.078775,2,98,2,2,0.35,3,3,2,0,0,2,20,1,4,5,NA +66990,7,2,2,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,18097.801029,17527.360027,1,96,12,12,NA,7,7,1,0,1,2,59,1,3,1,1 +66991,7,2,1,24,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,14313.345971,14703.033095,3,91,10,10,4.42,2,2,0,0,0,1,47,2,2,1,NA +66992,7,2,1,7,NA,3,3,2,7,86,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,42986.51011,44926.997612,2,94,14,14,2.83,6,6,0,4,0,2,38,1,2,1,2 +66993,7,2,2,38,NA,2,2,2,NA,NA,2,NA,2,2,2,NA,2,6,2,2,2,2,2,2,2,NA,NA,NA,NA,32982.479382,32092.542799,2,99,99,3,0.79,4,2,0,0,0,2,42,2,4,5,NA +66994,7,2,1,52,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,25264.512558,25333.955786,2,93,15,15,5,2,2,0,0,0,1,52,2,5,1,5 +66995,7,2,2,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,76827.086279,81170.917712,2,99,15,15,5,2,2,0,0,0,2,37,1,5,1,5 +66996,7,2,1,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,25651.892165,25827.388922,1,97,14,14,5,3,3,0,0,0,2,51,1,5,1,4 +66997,7,2,2,15,NA,4,4,1,15,185,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19770.196972,20174.150218,2,102,10,10,3.78,3,3,0,1,0,1,33,1,3,5,NA +66998,7,2,2,5,NA,4,4,2,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10771.85499,12037.490455,2,97,4,4,0.81,4,4,1,1,0,2,51,1,3,4,NA +66999,7,2,2,15,NA,4,4,1,15,191,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13801.622751,14246.373765,1,100,10,10,2.59,5,5,0,1,0,2,40,1,5,1,NA +67000,7,2,1,49,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,18533.049642,19320.837782,1,99,15,8,4.59,4,1,0,2,0,1,49,1,4,6,NA +67001,7,2,1,76,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,63147.363155,65543.108264,1,91,8,8,3.4,2,2,0,0,2,2,74,1,4,1,2 +67002,7,2,1,39,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,21317.283165,22287.043754,2,96,5,5,1.08,3,3,0,1,0,2,41,1,3,1,NA +67003,7,2,1,68,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,105401.67423,106753.633419,2,92,15,15,5,1,1,0,0,1,1,68,1,5,3,NA +67004,7,2,2,4,NA,1,1,1,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18078.669459,18848.958225,1,101,2,2,0.26,5,5,3,0,0,2,26,1,2,1,3 +67005,7,2,1,19,NA,4,4,2,19,239,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11351.725436,11256.943498,2,95,8,8,1.61,6,6,1,3,0,2,48,1,3,5,NA +67006,7,2,2,2,NA,4,4,2,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7348.24433,7906.792868,2,95,8,8,1.61,6,6,1,3,0,2,48,1,3,5,NA +67007,7,2,2,40,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,19247.348846,19527.962249,2,101,9,9,3.74,2,2,0,0,0,2,40,2,4,1,2 +67008,7,2,2,17,NA,4,4,1,17,210,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16659.324602,16705.662179,1,92,77,77,NA,5,5,1,2,0,2,41,1,3,5,NA +67009,7,2,2,33,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,90299.161173,96625.186997,1,92,6,6,1.57,3,3,0,1,0,1,29,1,4,6,NA +67010,7,2,2,3,NA,2,2,2,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11009.851855,11478.955249,1,93,13,3,0.54,6,3,2,1,0,1,23,NA,NA,5,NA +67011,7,2,2,67,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,4,NA,1,2,2,1,2,2,1,2,2,1,7869.59899,8538.749518,2,100,2,2,0.72,1,1,0,0,1,2,67,1,1,4,NA +67012,7,2,2,34,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,14138.631841,14167.501749,3,92,8,8,0.91,7,7,3,3,1,1,61,NA,NA,1,4 +67013,7,2,1,11,NA,4,4,2,11,132,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9699.683862,9867.948233,2,97,14,14,3.91,4,4,1,1,0,1,38,1,4,1,5 +67014,7,2,2,48,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,18696.216547,18746.863759,2,100,5,5,0.95,4,4,0,0,1,2,53,1,3,5,NA +67015,7,2,2,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,14516.765165,13843.248577,3,90,7,7,2.23,3,3,0,0,0,2,51,1,4,3,NA +67016,7,2,2,11,NA,3,3,2,11,136,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,48532.852397,48387.04619,1,98,14,14,3.9,4,4,0,3,0,2,31,1,4,1,NA +67017,7,2,1,11,NA,3,3,2,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,82670.203859,85919.643529,1,97,15,15,5,5,5,0,3,0,2,47,2,5,1,5 +67018,7,1,1,5,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,17458.543556,0,2,91,6,6,1.3,4,4,1,1,0,2,27,1,4,6,NA +67019,7,2,1,6,NA,5,7,1,6,83,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7101.095162,7528.512272,2,96,15,15,5,4,4,1,1,0,2,35,2,5,1,5 +67020,7,2,1,78,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,74473.849242,78828.927467,2,98,14,14,5,2,2,0,0,2,1,78,1,5,1,3 +67021,7,2,1,50,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15599.953109,16099.73574,1,99,7,7,1.89,3,3,0,1,0,1,50,1,5,1,2 +67022,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,35813.944922,38588.040359,1,100,8,8,2.97,2,2,0,0,1,2,54,1,4,3,NA +67023,7,2,1,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,8746.76562,8644.133303,2,97,15,15,5,3,3,0,0,3,2,80,1,3,2,NA +67024,7,2,2,12,NA,3,3,2,12,146,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71303.309206,77744.474592,2,94,15,15,5,5,5,0,2,0,1,53,1,5,1,5 +67025,7,2,2,78,NA,2,2,2,NA,NA,2,NA,2,1,NA,NA,1,1,NA,2,2,2,1,2,2,1,2,2,NA,17318.187297,23904.945555,2,90,2,2,0.89,1,1,0,0,1,2,78,2,1,1,NA +67026,7,2,1,40,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,35406.972937,38657.357615,2,98,12,12,NA,3,3,0,1,0,2,34,1,4,1,3 +67027,7,2,2,15,NA,1,1,2,16,192,NA,NA,2,1,4,10,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,18166.909002,19929.443801,2,94,7,7,1.33,6,6,0,1,0,1,55,2,2,1,1 +67028,7,2,2,6,NA,1,1,1,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10118.363218,10311.586628,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +67029,7,2,2,2,NA,5,6,2,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4938.043177,5373.703942,1,91,5,5,0.89,4,4,2,0,0,1,39,1,4,1,5 +67030,7,2,1,6,NA,4,4,1,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8714.559478,8865.734494,2,96,12,10,2.17,7,6,2,3,0,1,29,1,4,3,NA +67031,7,2,2,18,NA,4,4,2,18,219,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12857.456314,12921.235691,2,97,5,5,0.92,5,5,0,3,0,2,54,1,3,2,NA +67032,7,2,2,32,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,21351.921325,21766.153521,2,92,6,6,1.35,3,3,1,0,0,2,32,1,5,1,5 +67033,7,2,1,3,NA,4,4,1,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10615.626123,11270.93481,1,100,8,8,1.61,6,6,1,3,0,1,29,1,5,6,NA +67034,7,2,2,37,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,2,1,2,1,2,2,1,2,2,2,2,2,2,35353.005268,34399.106917,2,94,12,12,NA,4,4,0,2,0,1,47,2,2,1,2 +67035,7,2,2,3,NA,1,1,1,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13196.707564,14212.912965,2,96,8,8,1.33,7,7,2,1,1,1,62,2,1,1,1 +67036,7,2,2,16,NA,2,2,2,16,202,NA,NA,1,1,NA,11,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,12536.713942,13553.99602,3,90,12,12,NA,2,2,0,1,0,2,40,2,2,1,NA +67037,7,2,2,66,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,1,1,NA,2,2,2,2,2,2,1,2,2,2,13676.984152,18290.186018,2,91,4,4,0.67,5,4,2,0,2,2,66,2,1,1,NA +67038,7,2,1,11,NA,3,3,2,11,143,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13713.949705,16784.308768,1,99,6,6,1.12,4,4,0,2,0,1,39,1,3,1,3 +67039,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,63630.652053,80950.199865,3,90,9,9,3.64,2,2,0,0,0,1,27,2,4,1,5 +67040,7,2,2,19,NA,1,1,2,19,232,2,NA,1,1,NA,15,NA,NA,NA,2,2,2,2,2,2,2,2,2,2,18368.872199,18722.003928,2,94,9,1,0.14,3,1,0,0,0,2,48,2,2,6,NA +67041,7,2,2,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,42468.064168,44382.588967,2,101,3,3,0.54,3,3,0,2,0,2,36,1,3,5,NA +67042,7,2,2,9,NA,5,6,1,9,115,NA,NA,1,1,NA,3,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,8022.905544,9049.556422,2,92,6,6,1.34,4,4,1,1,0,1,40,2,3,1,3 +67043,7,2,1,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,17583.693727,17703.992112,2,95,15,15,5,3,3,0,0,0,1,59,NA,NA,6,NA +67044,7,2,2,19,NA,5,7,2,19,231,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11975.458482,12291.118947,1,97,15,15,5,6,6,0,3,0,1,47,1,5,1,5 +67045,7,2,2,1,13,3,3,1,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19168.30863,20370.539296,1,94,3,3,0.39,6,6,2,2,0,2,25,1,4,1,2 +67046,7,2,1,66,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,7514.993062,7713.03043,2,90,7,7,2.38,2,2,0,0,1,1,66,1,4,5,NA +67047,7,2,2,54,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,35630.227837,35816.036445,2,102,8,8,4.59,1,1,0,0,0,2,54,2,5,5,NA +67048,7,2,2,12,NA,5,7,1,12,148,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12427.929278,12265.18624,1,102,15,15,5,4,4,0,2,0,2,40,2,5,1,4 +67049,7,2,2,78,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,10072.885959,10826.303506,2,100,13,13,NA,1,1,0,0,1,2,78,1,2,2,NA +67050,7,2,2,8,NA,1,1,2,8,105,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,2,13231.432201,13813.796743,2,97,13,13,NA,3,3,0,1,0,1,28,2,2,1,1 +67051,7,2,2,37,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,13379.422066,13458.148951,2,103,14,14,3.48,5,5,0,2,1,1,43,1,4,1,5 +67052,7,2,1,56,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,28585.875492,29232.915291,1,94,2,2,0.75,1,1,0,0,0,1,56,1,2,3,NA +67053,7,2,1,67,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,1,2,NA,2,2,2,1,2,2,1,2,1,2,8017.002318,8145.424458,2,93,7,7,1.74,4,4,0,0,1,2,44,2,3,1,4 +67054,7,2,2,8,NA,1,1,1,8,104,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17458.526997,18973.577367,1,94,6,6,0.97,6,6,1,3,0,1,40,1,3,1,4 +67055,7,2,2,71,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,28244.319428,29210.271354,1,94,4,4,1.24,2,2,0,0,2,2,71,1,2,1,4 +67056,7,2,1,54,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,11551.71585,11723.869749,2,92,12,12,NA,7,7,2,4,0,1,54,2,2,1,5 +67057,7,2,1,0,2,4,4,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6423.674397,6522.268756,1,96,14,14,4.19,3,3,1,0,0,1,44,1,4,6,NA +67058,7,2,1,42,NA,4,4,2,NA,NA,2,NA,2,2,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,17602.101156,18151.242681,1,96,15,15,5,4,4,0,1,0,1,42,2,4,1,4 +67059,7,2,2,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,4,2,1,2,2,1,2,2,NA,NA,NA,NA,53799.276134,54280.27834,1,94,3,3,0.37,5,5,0,3,0,2,29,1,4,4,NA +67060,7,2,2,16,NA,4,4,2,16,198,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11478.437608,11446.094627,1,96,6,6,1.21,4,4,0,2,0,2,41,1,4,4,NA +67061,7,2,2,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,1,1,2,2,1,2,2,1,2,2,1,36955.110877,37344.656251,1,100,14,14,4.45,3,3,1,0,0,1,33,1,4,1,4 +67062,7,2,2,1,14,3,3,1,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,46813.021927,47428.160802,1,94,8,8,2.62,3,3,1,0,0,2,26,1,4,5,NA +67063,7,2,1,15,NA,3,3,1,15,188,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,84860.612577,85065.529717,2,98,15,15,5,4,4,0,2,0,1,48,1,3,1,4 +67064,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,94433.586146,97510.682342,1,91,14,14,3.06,5,5,2,0,0,2,30,1,5,1,5 +67065,7,2,1,36,NA,4,4,1,NA,NA,2,NA,2,2,4,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,23654.723526,31640.766843,1,100,4,4,0.99,2,2,0,1,0,1,36,2,4,4,NA +67066,7,2,1,65,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,26087.95381,26422.577042,1,94,4,4,1.24,2,2,0,0,2,2,71,1,2,1,4 +67067,7,2,1,23,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,3,6,NA,2,2,2,NA,NA,NA,2,2,1,2,38474.772527,42632.210531,2,93,3,3,0.43,4,4,0,0,0,1,45,2,2,6,NA +67068,7,2,1,32,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,17879.023129,20112.694262,2,90,2,2,0.87,1,1,0,0,0,1,32,1,5,5,NA +67069,7,2,1,19,NA,3,3,2,19,235,2,NA,1,1,NA,66,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,69216.263169,70543.167563,1,91,15,15,5,3,3,0,0,0,2,50,1,4,1,4 +67070,7,2,2,64,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,14809.997435,15471.247979,2,95,4,4,0.76,4,4,0,1,2,1,80,1,1,2,NA +67071,7,2,2,16,NA,3,3,2,16,194,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,135393.338842,139317.527763,1,97,14,14,3.9,4,4,0,2,0,1,47,1,3,1,5 +67072,7,2,1,11,NA,2,2,1,11,133,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11971.590477,11920.833512,2,100,14,14,3.58,4,4,0,1,0,1,46,2,5,1,5 +67073,7,2,1,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,42894.724338,43561.362107,1,98,6,3,0.9,2,1,0,0,0,1,24,1,5,5,NA +67074,7,2,2,37,NA,2,2,1,NA,NA,2,NA,2,1,5,NA,4,1,2,2,2,2,1,2,2,NA,NA,NA,NA,41791.57979,40949.069487,2,102,7,7,1.53,5,5,1,2,0,2,37,2,4,1,4 +67075,7,2,2,14,NA,3,3,1,15,181,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,114661.989457,116814.464785,2,101,14,14,4.86,3,3,0,1,0,1,53,1,4,1,5 +67076,7,2,2,10,NA,3,3,1,10,130,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23170.920553,22863.220381,3,91,2,2,0.33,4,4,0,3,0,2,31,1,4,4,NA +67077,7,1,2,4,NA,5,7,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5878.066175,0,1,91,15,15,5,5,5,2,1,0,1,40,1,5,1,5 +67078,7,2,2,75,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,28175.708676,28476.609384,1,94,3,3,0.92,1,1,0,0,1,2,75,1,4,1,NA +67079,7,2,1,53,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,20397.472757,21204.731618,1,90,6,6,2.15,2,2,0,0,1,2,79,NA,NA,77,NA +67080,7,2,2,11,NA,3,3,2,11,140,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14287.66096,14320.169639,2,97,7,7,1.92,3,3,0,1,0,1,57,1,4,1,4 +67081,7,2,2,50,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,22916.776688,24567.079865,1,102,5,5,0.62,7,7,1,3,0,1,49,2,2,1,1 +67082,7,2,1,18,NA,1,1,1,18,224,2,NA,1,1,NA,66,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,22768.423624,22766.421142,2,98,5,5,1.61,2,2,0,0,0,2,44,2,4,3,NA +67083,7,2,1,26,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,111284.977736,116509.217288,3,91,15,15,5,4,4,0,0,1,1,60,NA,NA,4,NA +67084,7,2,2,5,NA,3,3,2,5,65,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27528.66901,30383.138359,1,95,5,5,1.19,3,3,1,1,0,1,47,1,2,3,NA +67085,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,46498.414015,0,1,90,6,6,1.65,2,2,0,0,1,1,55,1,4,77,NA +67086,7,2,1,8,NA,1,1,1,9,108,NA,NA,2,2,3,2,NA,NA,NA,2,1,1,1,2,1,1,2,2,1,17882.621856,17720.218058,2,98,3,3,0.4,6,6,1,2,0,2,29,2,1,4,NA +67087,7,2,2,27,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,15267.012422,14516.051827,1,99,6,6,0.6,7,7,2,1,1,2,69,1,3,2,NA +67088,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,51218.356583,59416.206008,2,98,12,12,NA,2,2,0,0,1,2,80,1,2,2,NA +67089,7,2,2,29,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,24654.107413,23987.341621,1,100,6,6,1.39,4,4,0,3,0,2,29,1,4,5,NA +67090,7,2,2,0,4,2,2,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,6752.982789,6610.907942,1,93,12,7,2,4,3,1,0,0,1,43,2,1,1,5 +67091,7,2,2,12,NA,2,2,1,12,149,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,15809.066118,17342.845429,2,93,3,3,0.52,5,5,0,2,0,1,41,2,4,1,4 +67092,7,2,1,76,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,NA,63054.867183,67969.319596,1,90,14,8,4.03,2,1,0,0,2,1,76,1,5,6,NA +67093,7,2,1,10,NA,1,1,1,10,126,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,16288.924956,16868.808147,1,101,3,3,0.41,5,5,0,2,1,2,36,2,4,4,NA +67094,7,2,1,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,6038.685119,6085.921613,1,96,15,15,5,2,2,0,0,2,1,60,1,5,1,3 +67095,7,2,1,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,28585.875492,28917.983006,1,91,5,5,1.2,3,3,0,0,1,1,58,1,2,1,2 +67096,7,2,1,11,NA,1,1,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11399.23838,11295.714433,1,103,5,5,0.71,6,6,2,2,0,2,31,2,2,1,2 +67097,7,2,2,7,NA,1,1,2,7,85,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15225.935813,15373.8033,2,94,7,7,1.88,4,4,0,2,0,2,28,1,4,4,NA +67098,7,2,2,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,NA,9793.924718,10548.859356,2,100,14,4,0.43,7,7,1,3,1,2,62,1,3,5,NA +67099,7,2,1,7,NA,2,2,1,7,94,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12231.897958,14410.652577,1,100,3,3,0.39,5,5,1,2,0,1,32,2,1,6,NA +67100,7,2,2,29,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,20247.768461,26123.76716,2,93,6,6,1.72,2,2,0,1,0,2,29,1,4,3,NA +67101,7,2,2,8,NA,1,1,1,8,101,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,20495.125801,21365.773499,3,92,4,4,0.46,7,7,1,2,0,2,31,2,2,1,1 +67102,7,2,2,13,NA,3,3,2,13,162,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,133500.800632,137370.136946,1,98,5,5,1.39,2,2,0,1,0,1,46,1,4,3,NA +67103,7,2,2,3,NA,2,2,2,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11429.37307,12618.334582,2,90,5,5,1.19,3,3,1,0,1,2,60,2,1,4,NA +67104,7,2,1,8,NA,4,4,2,8,106,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9699.683862,9892.534359,2,97,5,5,1.08,3,3,1,1,0,2,27,1,3,5,NA +67105,7,2,2,43,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,34954.173075,36984.739503,2,98,2,2,0.35,3,3,0,1,0,2,43,1,3,1,3 +67106,7,2,1,21,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,35338.972549,36508.861705,2,90,2,2,0.25,5,5,0,1,0,2,41,2,4,1,NA +67107,7,2,1,77,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,9662.124837,10964.58979,2,95,2,2,0.6,1,1,0,0,1,1,77,1,3,3,NA +67108,7,2,1,6,NA,1,1,1,6,75,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11881.117946,11935.814841,1,102,6,6,0.96,5,5,0,2,0,1,32,2,2,1,3 +67109,7,2,2,29,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,47348.206546,48810.979214,1,92,14,14,3.9,4,4,2,0,0,2,29,1,4,1,4 +67110,7,2,2,7,NA,4,4,1,7,90,NA,NA,2,1,3,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7140.212598,7289.797115,2,93,12,12,NA,4,4,1,1,0,2,27,2,4,1,4 +67111,7,2,1,11,NA,1,1,1,11,141,NA,NA,2,2,2,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,2,13870.762641,14200.45921,2,102,7,7,1.33,6,6,1,3,0,1,34,2,2,1,1 +67112,7,2,2,64,NA,1,1,1,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,9581.703872,9981.600708,2,96,15,15,5,2,2,0,0,2,1,66,1,5,1,5 +67113,7,2,2,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,183805.817006,191979.640064,1,93,15,15,5,2,2,0,0,0,1,58,2,5,1,5 +67114,7,2,1,6,NA,5,6,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7241.695104,8178.415743,1,102,15,15,4.59,4,4,1,1,0,1,35,1,5,1,5 +67115,7,2,1,59,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,5,4,NA,1,2,2,1,2,2,1,2,2,1,28247.128624,28689.709737,2,93,9,9,2.6,4,4,0,0,0,2,58,2,4,4,NA +67116,7,2,2,31,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,43270.789623,48694.023669,2,99,10,10,5,2,1,0,0,0,2,31,1,5,5,NA +67117,7,2,2,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,47973.37979,50072.031476,1,101,2,2,0.22,4,4,1,0,0,2,25,1,4,6,NA +67118,7,2,2,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,88056.562286,93150.302158,2,91,6,6,1.34,4,4,1,2,0,2,33,1,4,3,NA +67119,7,2,2,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,110369.721342,112542.856633,1,94,6,6,1.31,3,3,0,0,0,2,46,1,5,6,NA +67120,7,1,2,30,NA,5,6,NA,NA,NA,2,NA,1,1,NA,NA,5,5,3,1,2,2,1,2,2,NA,NA,NA,NA,19305.389921,0,1,93,10,10,5,1,1,0,0,0,2,30,1,5,5,NA +67121,7,2,1,39,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19091.246741,19659.303383,1,91,5,5,0.89,4,4,2,0,0,1,39,1,4,1,5 +67122,7,2,1,17,NA,4,4,1,17,205,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12296.68269,12194.010773,1,102,7,7,2.72,2,2,0,1,0,2,49,1,4,5,NA +67123,7,2,2,33,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,32501.623429,43787.968887,2,97,5,5,1.63,2,2,1,0,0,2,33,1,3,5,NA +67124,7,2,2,8,NA,3,3,1,8,103,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23170.920553,22863.220381,1,94,4,4,1.06,2,2,0,1,0,2,26,1,4,5,NA +67125,7,2,1,2,NA,2,2,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8331.647763,8595.381151,2,90,15,15,4.2,5,5,1,0,0,2,50,NA,NA,6,NA +67126,7,2,2,22,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,16239.242782,17454.103497,2,101,99,99,NA,3,1,0,0,0,2,22,1,4,5,NA +67127,7,2,1,18,NA,4,4,1,19,228,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,NA,NA,NA,1,2,2,1,14650.502937,14679.468181,2,102,14,14,3.25,5,5,1,1,0,2,32,1,4,1,3 +67128,7,2,2,47,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,18811.641937,20880.310604,2,90,3,3,1.04,1,1,0,0,0,2,47,2,4,5,NA +67129,7,2,2,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,18723.98095,17927.802584,2,95,10,10,2.32,6,6,1,2,0,1,44,1,4,1,4 +67130,7,2,2,10,NA,2,2,1,10,121,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15352.601806,15746.071667,2,102,15,12,NA,5,4,0,3,0,1,42,2,4,6,NA +67131,7,2,1,8,NA,3,3,1,8,103,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18834.739821,20852.023287,1,94,5,5,1.2,3,3,0,1,0,1,49,1,4,5,NA +67132,7,2,2,32,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,1,1,2,2,1,2,2,1,2,2,1,36598.587714,38321.205688,1,97,7,7,1.74,4,4,0,3,0,2,32,1,4,5,NA +67133,7,2,1,33,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,32856.012738,32608.038024,2,103,77,77,NA,5,5,1,2,0,2,30,1,2,1,2 +67134,7,2,1,29,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,40333.47326,40883.249623,2,99,9,9,4.92,1,1,0,0,0,1,29,1,5,5,NA +67135,7,2,2,58,NA,4,4,2,NA,NA,2,NA,2,2,2,NA,1,2,NA,1,2,1,1,2,2,NA,NA,NA,NA,17277.829496,17117.256291,1,98,15,15,5,6,6,3,0,0,1,37,2,5,1,4 +67136,7,2,2,11,NA,5,7,2,11,137,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18348.624469,20109.577944,2,90,77,77,NA,6,6,0,4,0,2,41,NA,NA,4,NA +67137,7,2,1,67,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,8479.54701,8948.405799,2,98,6,6,2.04,2,2,0,0,2,2,66,1,1,1,1 +67138,7,2,2,8,NA,2,2,2,8,102,NA,NA,2,1,3,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15583.587534,16935.930722,1,93,15,15,5,4,4,0,2,0,1,50,1,5,1,5 +67139,7,2,2,0,1,1,1,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8907.98929,8720.575629,1,95,1,1,0,3,3,1,0,0,2,21,1,2,6,NA +67140,7,1,2,12,NA,5,7,NA,NA,NA,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5268.765205,0,1,99,9,9,2.68,4,4,0,2,0,1,43,2,3,1,NA +67141,7,2,2,72,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,72114.232586,74326.626155,2,96,12,12,NA,2,2,0,0,2,1,77,1,5,1,4 +67142,7,2,1,2,NA,4,4,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6992.24593,7710.094516,2,97,1,1,0.33,2,2,1,0,0,2,24,1,2,5,NA +67143,7,2,1,33,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,3,5,NA,2,2,2,2,2,2,2,2,2,2,34887.439952,34624.133414,2,94,14,8,3.06,5,2,0,0,0,1,24,2,4,5,NA +67144,7,2,1,54,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,13629.653758,14595.525499,1,90,15,15,5,5,5,0,3,0,2,46,2,4,1,5 +67145,7,2,2,0,9,4,4,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3320.8855,3567.679794,2,99,NA,77,NA,7,7,1,0,1,2,51,1,2,1,3 +67146,7,2,2,7,NA,3,3,2,7,87,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24070.467912,25563.654853,1,95,6,6,1.09,5,5,0,3,0,1,31,1,4,1,4 +67147,7,2,1,1,22,4,4,2,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6347.215153,6541.084247,2,95,6,6,1.3,4,4,1,1,0,1,38,1,4,1,4 +67148,7,2,2,68,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,3,2,NA,2,2,2,2,2,2,1,2,2,2,9716.805546,10855.679158,2,90,2,2,0.64,1,1,0,0,1,2,68,2,3,2,NA +67149,7,2,2,65,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,9518.80186,9943.806181,2,100,6,6,2.31,2,2,0,0,2,2,65,1,4,1,2 +67150,7,2,1,0,7,4,4,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6246.568228,6909.306675,2,95,1,1,0.13,2,2,1,0,0,2,22,1,3,5,NA +67151,7,2,1,5,NA,4,4,1,5,60,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10040.033098,10162.494907,1,100,1,1,0.26,2,2,1,0,0,2,28,1,3,5,NA +67152,7,2,2,10,NA,1,1,1,10,124,NA,NA,1,1,NA,3,NA,NA,NA,2,1,1,1,2,1,1,2,2,1,14414.529053,14932.182215,2,96,3,3,0.46,5,5,1,2,0,1,37,1,1,1,2 +67153,7,2,2,23,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,18097.801029,17538.193115,2,100,3,3,0.27,7,7,2,1,0,2,41,1,2,5,NA +67154,7,2,1,4,NA,5,7,2,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10276.262805,11525.953064,1,97,15,15,4.77,4,4,1,1,0,1,41,2,4,1,5 +67155,7,2,2,2,NA,5,6,1,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7128.631964,7757.55827,3,91,15,15,5,4,4,1,1,0,1,39,2,5,1,5 +67156,7,2,2,10,NA,2,2,1,10,123,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12307.832776,13375.906079,2,93,10,10,2.26,6,6,0,4,0,1,34,1,4,1,3 +67157,7,2,2,18,NA,5,6,2,18,222,2,NA,2,1,3,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6937.463063,7435.682574,3,90,99,99,NA,4,4,0,1,0,1,40,2,3,6,NA +67158,7,2,1,14,NA,2,2,1,14,178,NA,NA,2,2,3,9,NA,NA,NA,2,1,2,1,2,2,1,2,1,2,14117.225999,14174.690553,2,93,4,4,0.56,5,5,0,2,0,1,37,NA,NA,1,1 +67159,7,2,1,80,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,1,1,2,1,1,2,2,NA,11550.158096,12419.035396,2,92,5,5,1.08,3,3,0,0,2,1,46,NA,NA,5,NA +67160,7,2,2,15,NA,4,4,2,15,185,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12209.74498,12444.407921,2,90,14,14,4.25,4,4,0,2,1,2,45,2,5,5,NA +67161,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,48113.658256,0,1,90,5,5,2.15,1,1,0,0,1,2,80,1,3,2,NA +67162,7,2,1,4,NA,5,6,2,4,54,NA,NA,2,2,1,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8402.098771,9423.873047,3,91,5,5,1.08,3,3,1,0,0,1,29,2,5,1,5 +67163,7,2,1,4,NA,1,1,2,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,14505.510202,14964.673559,2,94,4,4,0.73,5,5,2,1,0,1,35,2,1,6,NA +67164,7,2,2,13,NA,5,6,1,13,165,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10142.281747,10534.471105,3,91,15,15,5,4,4,0,2,0,1,44,2,5,1,5 +67165,7,2,2,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,35126.205635,35160.69801,1,101,2,2,0.66,1,1,0,0,0,2,46,1,4,3,NA +67166,7,2,2,9,NA,2,2,2,9,116,NA,NA,2,1,3,2,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,16099.075547,16268.329655,1,93,5,5,0.84,5,5,1,2,0,2,52,2,1,3,NA +67167,7,2,1,8,NA,2,2,2,8,107,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9390.522479,10327.334743,2,90,3,3,0.7,3,3,1,1,0,2,25,1,1,1,NA +67168,7,2,2,11,NA,3,3,1,11,142,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18800.96526,18551.296274,1,94,3,3,0.37,5,5,0,3,0,2,29,1,4,4,NA +67169,7,2,1,30,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,27572.205373,27179.185736,1,100,7,7,3.58,1,1,0,0,0,1,30,1,5,5,NA +67170,7,2,1,13,NA,2,2,2,13,163,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16033.31661,17623.465787,2,90,5,5,0.76,5,5,0,4,0,2,32,1,2,3,NA +67171,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,19060.786733,19662.793534,2,97,4,4,0.81,4,4,1,0,0,2,51,1,2,4,NA +67172,7,2,1,5,NA,1,1,1,5,63,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19885.456648,19912.326587,1,92,10,10,3.04,4,4,2,0,0,2,37,2,5,1,5 +67173,7,2,2,21,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,5,3,1,2,2,1,2,2,1,2,2,1,19463.594852,21649.931723,3,92,6,6,1.6,3,3,0,0,1,2,77,1,3,99,NA +67174,7,2,2,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,1,1,2,2,1,2,2,1,2,2,1,100264.332334,116996.29597,1,99,7,7,3.63,1,1,0,0,0,2,25,1,5,5,NA +67175,7,2,1,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,31540.022655,31610.470938,2,94,NA,77,NA,2,1,0,0,0,1,49,1,3,3,NA +67176,7,2,1,17,NA,4,4,2,17,206,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11834.781205,12120.23702,2,90,8,8,2.59,3,3,0,2,0,2,35,1,4,6,NA +67177,7,2,2,74,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,66567.821082,69187.508251,1,94,3,3,0.88,2,2,0,0,1,2,74,1,2,2,NA +67178,7,2,1,4,NA,1,1,1,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,17865.135763,18430.646082,3,92,3,3,0.51,5,5,1,2,0,2,34,2,1,6,NA +67179,7,2,2,20,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,3,1,2,2,1,2,2,NA,NA,NA,NA,30275.274308,28786.080652,2,101,2,2,0.51,1,1,0,0,0,2,20,1,4,5,NA +67180,7,2,2,78,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,14534.533756,15507.238104,1,96,6,6,2.06,2,2,0,0,2,1,68,2,3,1,2 +67181,7,2,1,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,27356.080541,28721.124083,1,101,7,7,1.82,4,4,2,0,0,2,27,1,2,1,3 +67182,7,2,2,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,19130.246369,19076.4893,1,98,10,10,3.77,3,3,0,1,0,1,48,NA,NA,1,4 +67183,7,2,2,52,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,21143.964074,20709.49715,1,100,15,15,3.7,5,5,0,3,0,1,51,1,5,1,5 +67184,7,2,1,42,NA,1,1,2,NA,NA,1,2,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,31640.296506,31176.023929,2,94,15,9,5,2,1,0,0,0,1,42,1,4,6,NA +67185,7,2,2,51,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,1,1,2,2,NA,24178.173487,24760.288626,1,92,7,7,2.1,3,3,0,0,0,1,24,2,4,5,NA +67186,7,2,1,6,NA,2,2,1,7,85,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11971.590477,11920.833512,2,100,14,14,3.58,4,4,1,1,0,1,33,1,4,1,5 +67187,7,2,2,15,NA,1,1,1,15,188,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12996.753859,13369.730018,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +67188,7,2,2,13,NA,4,4,1,13,161,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14166.687432,14623.20249,1,100,6,6,1.39,4,4,0,3,0,2,29,1,4,5,NA +67189,7,2,2,70,NA,5,6,1,NA,NA,2,NA,2,1,8,NA,3,2,NA,1,2,1,1,2,1,1,2,2,NA,12813.709202,14706.046404,2,103,4,4,1.43,1,1,0,0,1,2,70,2,3,2,NA +67190,7,2,2,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,77778.949308,79987.938327,1,101,14,14,4.5,3,3,0,1,0,1,39,1,2,1,5 +67191,7,2,2,33,NA,3,3,1,NA,NA,2,NA,2,7,2,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,29733.812317,32603.623062,1,101,6,6,1.65,2,2,0,0,0,1,47,1,4,1,2 +67192,7,2,2,55,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,126117.5094,130777.186989,2,98,8,8,2.7,3,3,0,0,1,2,71,NA,NA,2,NA +67193,7,2,1,37,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,18289.793332,19321.216647,1,99,8,8,1.76,5,5,0,2,1,1,37,1,4,1,3 +67194,7,2,1,51,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,21762.20618,21775.974398,1,91,10,10,2.56,5,5,0,3,0,1,51,2,5,1,4 +67195,7,2,1,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,133542.212862,137840.368471,1,94,9,9,3.97,2,2,0,0,0,1,49,1,3,1,3 +67196,7,2,2,5,NA,1,1,1,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11859.546176,12772.78418,1,102,8,8,1.33,7,7,1,4,0,2,32,1,3,1,2 +67197,7,2,2,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,66253.989241,66961.54455,1,101,15,15,5,2,2,0,0,2,2,73,1,4,1,4 +67198,7,2,1,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,32461.799549,32721.879701,1,101,3,3,0.66,2,2,0,0,1,1,67,1,2,3,NA +67199,7,2,1,41,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15574.704266,15518.439557,2,92,15,15,5,2,2,0,0,0,2,36,1,5,1,5 +67200,7,2,1,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,97074.465027,101633.534325,1,93,15,8,4.41,3,1,0,0,0,1,30,1,5,5,NA +67201,7,2,1,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,20953.00978,22383.074498,2,97,3,3,0.46,5,5,0,3,0,1,40,1,2,1,3 +67202,7,2,1,11,NA,5,6,2,11,133,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8423.270513,9127.622487,1,90,15,15,5,5,5,0,3,0,2,46,2,4,1,5 +67203,7,2,1,33,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,17879.023129,20112.694262,2,90,5,5,2.02,1,1,0,0,0,1,33,2,3,5,NA +67204,7,2,1,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,20333.447198,20233.229833,1,98,6,6,1.72,2,2,0,0,0,2,53,1,5,2,NA +67205,7,2,1,5,NA,5,7,2,5,68,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8100.706553,8491.571292,1,98,14,14,3.9,4,4,2,0,0,1,39,1,5,1,4 +67206,7,2,1,8,NA,5,7,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21087.869274,22876.159598,1,101,5,5,1.26,3,3,0,1,1,2,42,1,3,5,NA +67207,7,2,2,10,NA,4,4,2,10,122,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7136.421849,7489.546444,1,90,9,9,1.65,7,7,0,4,0,1,36,1,4,1,4 +67208,7,2,1,67,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,7117.971973,7173.650989,2,100,6,6,2.31,2,2,0,0,2,2,65,1,4,1,2 +67209,7,2,2,80,NA,3,3,1,NA,NA,2,NA,2,1,9,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,51644.110977,57877.507418,1,102,5,5,1.84,1,1,0,0,1,2,80,2,5,2,NA +67210,7,2,2,63,NA,4,4,1,NA,NA,2,NA,2,1,4,NA,3,1,NA,1,2,1,1,2,2,1,1,1,NA,10585.751811,11058.394307,2,93,6,6,1.13,4,4,0,0,2,1,60,2,3,1,3 +67211,7,2,1,3,NA,4,4,2,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8973.990262,9895.291699,2,95,3,3,0.43,4,4,2,0,0,2,23,1,2,5,NA +67212,7,1,2,67,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,125706.50355,0,3,91,14,14,5,2,1,0,0,1,2,67,1,5,5,NA +67213,7,2,1,4,NA,4,4,2,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9304.437652,10259.663981,2,97,1,1,0.27,2,2,1,0,0,2,20,1,2,5,NA +67214,7,2,1,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,99283.360764,103932.649449,3,91,14,14,5,1,1,0,0,0,1,39,1,5,5,NA +67215,7,2,1,20,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,2,2,2,1,2,2,1,40899.412129,44211.773156,1,94,5,5,0.57,7,7,2,1,0,1,58,2,1,1,1 +67216,7,2,2,0,7,1,1,1,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6744.839779,7193.080406,1,103,5,5,1.02,4,4,2,0,0,1,25,1,2,1,4 +67217,7,2,1,41,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,2,6,NA,2,2,2,2,2,2,2,2,2,2,37402.70356,39087.586144,2,102,7,7,1.89,3,3,0,1,0,1,41,2,2,6,NA +67218,7,2,1,0,8,3,3,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13392.309303,13804.825901,1,99,15,15,5,5,5,3,0,0,2,34,1,5,1,5 +67219,7,2,2,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,61212.189461,64753.083655,3,90,15,15,5,3,3,1,0,0,1,31,1,5,1,5 +67220,7,2,1,33,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,106105.629898,111686.437619,1,100,14,9,4.92,3,1,1,0,0,1,33,1,5,5,NA +67221,7,2,1,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17583.693727,17293.36291,2,95,9,9,1.81,6,6,1,1,0,2,56,1,4,3,NA +67222,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,62796.637452,70136.543572,2,102,6,6,1.15,5,5,0,0,2,1,80,1,5,1,1 +67223,7,2,1,40,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,18898.73153,22670.159935,1,100,5,5,0.74,6,6,0,3,0,1,40,2,3,1,4 +67224,7,2,1,2,NA,4,4,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6760.288309,6966.774293,1,96,99,99,NA,4,4,1,1,0,2,35,2,3,1,3 +67225,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,8318.523894,9127.657215,3,90,3,3,1.07,1,1,0,0,1,1,80,1,5,2,NA +67226,7,2,2,43,NA,4,4,2,NA,NA,2,NA,2,2,6,NA,3,5,2,1,2,2,1,2,2,NA,NA,NA,NA,14831.744106,14871.922635,3,90,5,5,0.87,4,4,0,0,0,2,43,2,3,5,NA +67227,7,2,2,17,NA,4,4,2,17,208,2,NA,2,2,4,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11072.776368,11285.58755,3,90,9,9,4.1,2,2,0,1,0,2,41,2,5,5,NA +67228,7,2,1,41,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,14823.307875,15546.691363,1,96,8,8,2.62,3,3,0,1,0,1,41,2,3,1,5 +67229,7,2,1,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,138075.879417,141933.339512,2,91,7,7,1.61,4,4,0,0,3,1,65,1,3,6,NA +67230,7,2,1,59,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,25969.864445,26997.658596,1,97,15,15,5,2,2,0,0,0,1,59,1,5,1,5 +67231,7,2,1,18,NA,4,4,1,18,219,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16147.713323,16532.569027,1,92,5,5,0.95,4,4,0,2,0,2,33,1,4,5,NA +67232,7,2,2,48,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,35933.117795,36141.582386,1,98,1,1,0.18,1,1,0,0,0,2,48,1,5,1,NA +67233,7,2,2,7,NA,4,4,2,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8757.841043,9034.425418,2,97,2,2,0.49,3,3,0,1,0,2,27,1,4,5,NA +67234,7,2,1,12,NA,1,1,1,12,149,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20705.832333,21431.576705,1,100,9,9,2.02,6,6,0,3,1,2,39,1,4,1,5 +67235,7,2,1,6,NA,4,4,2,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14125.862146,14295.209168,1,97,5,5,0.91,4,4,0,3,0,2,44,1,4,5,NA +67236,7,2,2,48,NA,1,1,1,NA,NA,2,NA,2,2,77,NA,2,1,NA,2,2,2,2,2,2,2,2,2,2,43535.993088,43763.029589,2,102,77,77,NA,3,3,0,0,0,2,48,2,2,1,3 +67237,7,2,2,51,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,1,4,NA,2,2,2,1,2,2,2,2,2,2,23953.14388,24078.057488,1,90,12,12,NA,4,4,0,0,0,1,54,2,4,1,2 +67238,7,2,2,15,NA,5,6,1,15,189,NA,NA,2,1,4,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7382.152016,8028.485773,1,94,14,14,2.78,6,5,0,2,1,1,61,1,4,1,5 +67239,7,2,2,50,NA,2,2,2,NA,NA,2,NA,2,1,5,NA,1,4,NA,2,2,2,2,2,1,NA,NA,NA,NA,24004.6026,28934.42641,2,91,4,4,0.94,3,3,0,1,0,2,50,2,1,4,NA +67240,7,2,2,5,NA,1,1,1,5,63,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19235.084509,21236.049482,3,92,8,8,2,4,4,2,0,0,1,30,1,4,1,4 +67241,7,1,1,80,NA,3,3,NA,NA,NA,1,1,1,1,NA,NA,3,99,NA,1,1,2,1,2,2,NA,NA,NA,NA,14359.447628,0,1,97,4,4,0.81,3,3,0,0,2,1,80,1,3,99,NA +67242,7,2,1,2,NA,4,4,1,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8009.966208,8832.297541,2,96,6,6,1.35,3,3,1,1,0,2,23,1,2,5,NA +67243,7,2,2,68,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,7869.59899,8538.749518,1,96,3,3,0.68,2,2,0,0,1,2,68,1,2,3,NA +67244,7,2,2,13,NA,3,3,1,13,158,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,28650.637159,30356.286382,2,100,2,2,0.38,3,3,0,2,0,2,35,1,4,5,NA +67245,7,2,1,27,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,2,5,NA,1,2,2,1,2,1,1,2,2,1,9177.295801,9548.31812,2,92,77,77,NA,4,4,0,0,0,1,27,2,2,5,NA +67246,7,2,1,15,NA,3,3,2,15,185,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,68701.580401,72973.564721,2,94,15,15,5,5,5,0,2,0,1,53,1,5,1,5 +67247,7,2,1,8,NA,5,6,2,8,102,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8151.552109,8792.161653,1,90,14,14,3.33,5,5,1,2,0,1,41,1,5,1,5 +67248,7,2,2,14,NA,1,1,1,14,174,NA,NA,2,2,4,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,15690.47168,16407.081535,1,102,5,5,0.62,7,7,1,3,0,1,49,2,2,1,1 +67249,7,2,2,58,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,16663.192678,16751.306683,2,92,NA,4,1.65,2,1,0,0,1,2,58,2,4,5,NA +67250,7,2,2,43,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,42424.587753,44729.774642,2,102,4,4,1.38,2,1,0,0,0,2,43,1,5,6,NA +67251,7,2,2,7,NA,4,4,2,8,96,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,1,2,NA,NA,NA,NA,7814.742747,7984.793423,2,95,2,2,0.26,4,4,0,2,0,2,44,NA,NA,5,NA +67252,7,2,2,24,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,16844.740449,17983.530016,3,91,3,3,1.1,1,1,0,0,0,2,24,1,5,5,NA +67253,7,2,2,10,NA,3,3,2,10,121,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,44941.581862,44344.776378,2,94,10,10,2.91,4,4,0,2,0,2,38,1,4,1,4 +67254,7,2,1,8,NA,1,1,1,9,108,NA,NA,2,2,2,2,NA,NA,NA,2,1,2,2,2,2,2,2,2,2,13870.762641,13985.149896,2,102,6,4,1.02,6,2,0,4,0,2,43,2,1,5,NA +67255,7,2,2,38,NA,3,3,2,NA,NA,2,NA,2,1,4,NA,3,1,2,1,2,2,1,2,2,1,1,2,NA,53830.599426,56664.989337,1,99,77,77,NA,4,4,1,1,0,1,31,2,3,1,3 +67256,7,2,1,10,NA,1,1,1,11,132,NA,NA,2,2,3,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,15456.524105,16140.282926,1,100,99,99,NA,6,6,0,1,0,2,22,2,3,1,3 +67257,7,2,2,9,NA,5,6,2,9,112,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8556.454265,9137.012028,1,90,15,15,5,5,5,0,2,0,1,47,2,5,1,5 +67258,7,2,2,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,54256.870337,62240.534341,2,94,6,6,2.04,2,2,0,0,2,1,75,1,3,1,3 +67259,7,2,1,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,90081.78624,90194.909743,2,100,15,15,4.5,6,6,0,4,0,1,45,1,5,1,5 +67260,7,2,1,3,NA,3,3,1,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27542.701065,31074.71228,1,94,7,7,1.29,6,6,1,3,0,1,38,1,3,1,2 +67261,7,2,1,57,NA,3,3,2,NA,NA,2,NA,2,1,99,NA,1,5,NA,1,2,2,1,2,2,1,2,2,1,20160.790176,25538.533486,3,90,77,99,NA,2,1,0,0,1,1,62,2,5,3,NA +67262,7,1,2,68,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,1,1,2,1,NA,NA,NA,NA,9793.924718,0,2,100,2,2,0.39,3,3,0,0,1,2,45,1,2,5,NA +67263,7,2,2,3,NA,1,1,1,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,15979.952759,16987.381382,2,102,6,6,1.03,5,5,1,1,0,1,37,1,2,1,2 +67264,7,2,2,77,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,81062.798322,86779.337922,1,101,4,4,1.61,1,1,0,0,1,2,77,1,2,2,NA +67265,7,2,2,0,5,4,4,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4681.626184,4732.744026,1,96,6,6,1.31,4,3,1,0,0,2,27,2,4,1,3 +67266,7,2,1,36,NA,1,1,1,NA,NA,2,NA,2,1,5,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,51543.062078,51154.050295,3,92,7,7,1.3,5,5,1,2,0,2,33,2,2,1,1 +67267,7,1,1,10,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10524.718724,0,1,96,77,77,NA,2,2,0,1,0,2,35,1,3,3,NA +67268,7,2,2,18,NA,3,3,2,18,217,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,100004.689924,107460.712122,1,90,12,12,NA,2,2,0,0,0,2,45,2,3,4,NA +67269,7,2,1,59,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,21762.20618,22687.256739,1,91,15,15,5,4,4,0,1,0,2,51,2,4,1,5 +67270,7,2,2,42,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,34874.122648,36144.248604,3,92,6,6,1.41,4,3,0,1,0,1,41,1,4,1,4 +67271,7,2,2,4,NA,4,4,2,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8833.042831,9629.276723,1,99,4,4,0.41,7,7,2,4,0,2,43,1,4,4,NA +67272,7,2,2,11,NA,3,3,1,12,144,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18417.272091,18465.044105,1,94,5,5,1.04,4,4,0,2,0,2,29,1,3,1,3 +67273,7,2,1,44,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,31740.385214,31855.807763,2,96,5,5,0.78,5,5,0,2,0,1,37,2,1,5,NA +67274,7,2,2,46,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,19150.604366,19251.871661,3,91,14,9,2.68,6,4,0,0,2,1,48,2,1,1,1 +67275,7,2,1,77,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,16603.347511,18119.902269,2,98,3,3,0.88,2,2,0,0,2,1,77,1,1,1,3 +67276,7,2,2,60,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,2,3,NA,2,2,2,2,2,2,1,2,2,2,9716.805546,10855.679158,2,90,7,7,1.34,5,5,0,1,2,1,61,2,1,4,NA +67277,7,2,2,4,NA,5,6,1,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5307.591514,5454.53335,2,102,3,3,0.38,5,5,3,0,0,2,30,2,2,1,4 +67278,7,2,1,2,NA,2,2,1,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12493.910388,13386.323284,2,98,1,1,0.14,3,2,2,0,0,2,27,1,3,6,NA +67279,7,2,2,32,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,21765.629914,22790.091994,2,90,2,2,0.38,4,4,1,2,0,2,32,1,4,5,NA +67280,7,2,1,27,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,37911.437415,38428.199561,2,103,12,6,2.24,2,1,0,0,0,1,27,1,4,5,NA +67281,7,2,2,42,NA,1,1,2,NA,NA,2,NA,2,2,77,NA,2,6,2,2,2,2,2,2,2,2,2,2,2,31235.666551,35013.570975,2,94,77,77,NA,3,3,0,1,0,2,42,2,2,6,NA +67282,7,2,2,8,NA,1,1,1,8,102,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,1,2,2,2,13028.403003,13395.598637,2,96,6,6,1.11,5,5,0,3,0,2,32,2,3,1,2 +67283,7,2,2,56,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,12441.719186,12605.354834,1,96,15,15,5,5,5,0,0,0,1,58,2,5,1,5 +67284,7,2,2,12,NA,5,6,1,12,147,NA,NA,2,2,1,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6370.316505,6496.587233,2,94,8,8,1.8,6,6,0,1,2,1,74,2,5,1,5 +67285,7,2,2,70,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,22166.463682,23916.215243,3,91,6,6,2.89,1,1,0,0,1,2,70,2,5,2,NA +67286,7,2,2,6,NA,4,4,2,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6655.097829,7106.52929,2,99,15,15,4.9,7,7,1,4,0,2,53,1,5,1,5 +67287,7,2,2,10,NA,5,6,1,10,128,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7932.110938,8316.610099,3,92,8,8,0.91,7,7,3,3,1,1,61,NA,NA,1,4 +67288,7,2,1,24,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,17446.328105,17921.312041,1,92,15,15,5,2,2,0,0,0,2,49,1,5,3,NA +67289,7,2,1,5,NA,5,6,1,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7042.228842,7898.630137,2,103,14,14,3.47,4,4,1,0,1,1,47,2,5,1,5 +67290,7,2,1,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,34099.599202,35276.751365,1,94,7,1,0.09,5,1,0,0,0,1,46,1,4,1,4 +67291,7,2,2,74,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,35160.494695,36239.184013,1,100,4,4,1.16,2,2,0,0,2,1,74,1,3,1,5 +67292,7,2,1,8,NA,4,4,1,8,107,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9022.8939,9530.778148,2,100,6,6,0.99,5,5,0,3,0,2,40,1,3,1,3 +67293,7,2,1,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,132969.642582,137509.365878,2,91,15,15,5,4,4,0,2,0,1,53,1,5,1,4 +67294,7,1,2,44,NA,2,2,NA,NA,NA,2,NA,2,1,5,NA,3,1,3,1,2,2,1,2,2,NA,NA,NA,NA,32606.880052,0,2,93,6,6,1.48,4,4,0,1,0,1,53,2,2,1,3 +67295,7,2,2,4,NA,5,6,2,4,52,NA,NA,2,2,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4938.043177,4932.827461,1,91,14,14,4.32,3,3,1,0,0,1,34,2,5,1,3 +67296,7,2,2,15,NA,4,4,2,15,182,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12265.19283,12230.632995,2,97,2,2,0.21,7,7,2,3,0,2,32,1,4,5,NA +67297,7,2,1,29,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,123110.069898,136669.986833,1,101,8,4,1.34,2,1,0,0,0,2,20,1,4,6,NA +67298,7,2,2,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,17377.366864,16840.035727,2,99,6,6,1.11,5,5,1,2,0,2,41,1,2,5,NA +67299,7,2,2,7,NA,2,2,2,7,84,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15148.721588,16225.089736,2,91,2,2,0.42,3,3,1,1,0,2,27,1,3,5,NA +67300,7,2,1,72,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,71709.98031,77299.00621,2,91,9,9,2.6,4,4,0,0,2,1,72,1,4,1,5 +67301,7,1,2,18,NA,3,3,NA,NA,NA,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,45824.798023,0,1,97,1,1,0.09,2,1,0,0,0,1,24,1,3,6,NA +67302,7,2,2,3,NA,4,4,1,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16749.124902,18717.057678,2,97,1,1,0.13,4,4,2,0,1,2,62,1,2,4,NA +67303,7,2,1,8,NA,3,3,2,8,104,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,82670.203859,85919.643529,1,97,15,15,5,5,5,0,3,0,2,47,2,5,1,5 +67304,7,2,2,12,NA,2,2,1,12,151,NA,NA,2,2,2,5,NA,NA,NA,2,1,2,2,2,2,1,2,1,2,12712.538972,13945.896408,2,93,6,6,0.93,5,5,1,2,0,1,40,2,4,1,4 +67305,7,2,2,20,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,29040.300396,28462.118402,2,101,3,1,0.22,3,1,0,0,0,2,20,2,4,5,NA +67306,7,2,1,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,19186.370211,20284.349464,2,97,2,2,0.27,4,4,0,2,0,1,51,1,2,4,NA +67307,7,2,1,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,21022.682584,21611.637105,1,100,3,3,0.43,4,4,2,0,0,1,20,1,3,6,NA +67308,7,2,1,21,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,10131.732483,10407.573345,2,103,12,3,1.07,5,1,0,1,0,2,47,NA,NA,3,NA +67309,7,2,1,72,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,5,1,NA,2,2,1,2,2,2,2,2,2,NA,15435.262366,15704.333986,2,93,9,9,3.14,3,3,0,0,2,1,43,NA,NA,5,NA +67310,7,2,1,46,NA,2,2,1,NA,NA,2,NA,2,1,NA,NA,2,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,49060.272708,48961.70392,1,92,14,5,1.88,3,1,0,1,0,2,39,2,4,6,NA +67311,7,2,2,27,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,50915.06085,52488.027702,3,92,8,8,2,4,4,2,0,0,1,30,1,4,1,4 +67312,7,2,1,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,152858.509804,160001.813639,1,95,9,9,2.66,4,4,0,2,0,1,45,1,3,1,3 +67313,7,2,1,11,NA,1,1,1,11,136,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17882.621856,18280.794545,3,92,8,8,1.55,6,6,1,3,0,2,38,1,5,1,4 +67314,7,2,1,62,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,40561.050563,40941.458273,2,101,3,3,0.98,2,2,0,0,2,1,62,1,4,1,3 +67315,7,2,2,51,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,15521.115746,15890.452621,2,100,7,7,1.79,4,4,0,1,0,2,51,1,3,3,NA +67316,7,2,2,48,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,32208.300114,33976.532975,1,102,6,6,1.34,4,4,0,1,0,2,48,2,3,1,1 +67317,7,2,1,0,10,4,4,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6275.847063,6372.172481,2,100,14,14,3.58,4,4,1,1,0,2,31,1,5,1,4 +67318,7,2,1,26,NA,4,4,2,NA,NA,2,NA,2,2,2,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,12364.328404,12075.509308,1,96,10,10,1.8,7,7,1,1,0,1,57,2,1,1,3 +67319,7,2,1,6,NA,4,4,2,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8065.706636,8519.712345,2,99,1,1,0.03,3,3,0,1,0,2,56,1,3,5,NA +67320,7,2,1,19,NA,2,2,2,19,230,2,NA,1,1,NA,12,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,14081.782012,14782.02856,2,90,7,7,1.34,5,5,0,1,2,1,61,2,1,4,NA +67321,7,2,1,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,24930.322327,24899.923342,2,91,4,4,1.22,2,2,0,0,0,1,53,1,4,1,4 +67322,7,2,1,20,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,NA,NA,NA,1,2,2,1,20308.910079,25155.898326,1,90,NA,NA,NA,2,2,0,0,0,2,40,NA,NA,5,NA +67323,7,2,2,10,NA,1,1,1,10,122,NA,NA,2,2,3,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,20495.125801,21027.07407,3,92,3,3,0.52,5,5,2,1,0,2,29,2,1,1,3 +67324,7,2,1,7,NA,5,6,2,7,95,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7193.657603,7758.988679,2,90,14,14,4.32,3,3,0,1,0,1,48,2,4,1,5 +67325,7,2,2,15,NA,1,1,1,15,182,NA,NA,1,1,NA,8,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,16781.078148,17658.768186,2,103,77,77,NA,5,5,0,2,0,2,45,2,4,5,NA +67326,7,2,2,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,24126.537834,25148.058527,2,97,5,5,0.76,5,5,1,1,0,2,47,1,4,5,NA +67327,7,2,1,75,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,53149.251154,62614.383127,2,94,6,6,2.04,2,2,0,0,2,1,75,1,3,1,3 +67328,7,2,2,6,NA,5,6,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9007.62445,9504.796896,2,102,15,15,3.92,5,5,1,2,0,1,34,2,5,1,5 +67329,7,2,2,61,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,13934.943848,14557.124186,2,96,3,3,0.47,4,4,1,0,1,2,61,1,4,3,NA +67330,7,2,1,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,108408.375382,114958.410267,2,98,6,6,1.25,4,4,1,0,1,1,46,1,2,6,NA +67331,7,2,2,15,NA,4,4,2,15,188,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14417.957956,14453.467189,1,96,15,15,5,4,4,0,2,0,1,46,1,5,1,5 +67332,7,2,2,45,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,5,5,NA,1,2,1,1,2,1,1,2,1,3,13749.025709,14311.068456,1,103,7,7,1.89,3,3,0,1,0,2,45,2,5,5,NA +67333,7,2,1,42,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17602.101156,17988.968846,1,96,7,7,1.52,4,4,0,2,0,2,30,2,4,1,5 +67334,7,1,2,32,NA,1,1,NA,NA,NA,2,NA,2,2,3,NA,3,1,3,2,2,2,2,2,2,NA,NA,NA,NA,35353.005268,0,2,94,4,4,0.72,4,4,1,1,0,1,30,2,1,1,3 +67335,7,2,2,30,NA,3,3,2,NA,NA,2,NA,2,2,2,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,26074.980194,26960.560427,1,93,12,2,0.69,4,1,0,0,0,2,25,2,5,5,NA +67336,7,2,2,7,NA,5,6,1,7,88,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9054.387575,9459.329211,2,100,14,14,4.03,4,4,0,2,0,1,48,2,5,1,5 +67337,7,2,2,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,64463.340883,65151.773075,1,91,15,14,5,3,2,0,0,2,2,73,1,4,3,NA +67338,7,2,1,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,15971.744676,15898.343344,2,96,4,4,0.57,6,6,0,3,0,2,29,1,3,4,NA +67339,7,2,1,70,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,86445.94283,91501.124576,2,102,9,9,4.08,2,2,0,0,2,1,70,1,5,1,5 +67340,7,2,2,58,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,NA,NA,NA,NA,15521.115746,15096.339697,2,100,5,5,1.07,4,4,0,1,0,2,36,1,3,5,NA +67341,7,2,1,74,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,2,2,2,1,2,2,1,2,2,NA,14472.233702,14665.217659,1,98,8,8,3.3,2,2,0,0,2,1,74,1,1,1,3 +67342,7,2,2,48,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,1,1,NA,1,2,1,1,2,1,1,2,1,3,12770.403552,13002.937616,1,103,4,4,0.82,3,3,0,0,0,1,48,2,4,1,1 +67343,7,2,1,58,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,29903.342494,29807.964765,1,95,6,4,1.74,2,1,0,0,0,2,48,1,4,3,NA +67344,7,2,1,60,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,5,1,NA,2,2,2,1,2,2,1,2,2,1,9691.985299,9847.238527,1,93,5,5,1.43,2,2,0,0,1,1,60,2,5,1,4 +67345,7,2,2,2,NA,4,4,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6555.231326,6846.316857,1,99,6,6,1.35,3,3,1,1,0,2,42,1,4,4,NA +67346,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,40859.270352,45390.978638,2,101,12,12,NA,1,1,0,0,1,1,80,1,3,3,NA +67347,7,2,1,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,38321.717684,40488.857872,1,92,6,6,2.04,2,2,0,0,2,2,71,1,4,1,1 +67348,7,2,1,16,NA,5,6,2,16,197,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6666.045669,7317.485505,3,90,77,77,NA,5,5,0,2,0,1,46,2,3,1,3 +67349,7,2,1,42,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,23160.426103,23797.470648,2,98,10,10,3.78,3,3,0,0,0,2,46,1,4,1,4 +67350,7,2,1,2,NA,4,4,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8431.543376,9297.155306,2,96,14,14,3.58,4,4,2,0,0,1,36,1,4,1,5 +67351,7,2,1,10,NA,5,7,1,11,133,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20857.728985,21677.56405,2,98,3,3,0.38,5,5,0,4,0,2,39,1,4,5,NA +67352,7,2,2,40,NA,3,3,2,NA,NA,2,NA,2,2,3,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,25784.374025,27218.997103,1,90,7,7,2.1,3,3,1,0,0,2,40,2,5,1,4 +67353,7,2,1,61,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,8883.460927,9371.520203,2,92,15,15,5,2,2,0,0,2,1,61,1,5,1,5 +67354,7,1,2,18,NA,5,6,NA,NA,NA,2,NA,2,1,4,11,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,9758.609905,0,3,91,14,2,0.83,2,1,0,0,1,2,67,1,5,5,NA +67355,7,2,2,22,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,32455.694722,32314.335903,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +67356,7,2,2,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,80418.665565,96911.831087,2,97,15,15,4.97,5,5,1,0,0,1,48,1,4,1,3 +67357,7,2,1,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,36154.496177,37370.445999,2,101,1,1,0.14,3,1,0,0,0,1,23,1,4,5,NA +67358,7,2,1,7,NA,4,4,2,7,87,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9554.261994,9836.582126,2,90,8,8,1.67,6,6,1,1,0,1,52,1,3,1,5 +67359,7,2,1,74,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,59027.291951,62692.518829,1,103,15,15,5,2,2,0,0,2,1,74,1,5,1,5 +67360,7,2,1,10,NA,1,1,1,10,131,NA,NA,2,2,4,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,11159.151566,11226.781631,1,102,5,5,0.98,4,4,1,1,0,2,42,2,2,6,NA +67361,7,2,1,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,32185.399379,40770.619238,2,91,4,4,1.19,2,2,0,0,0,1,59,1,1,1,3 +67362,7,2,2,47,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,1,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,13219.753222,13289.658517,2,92,8,8,1.91,5,5,0,2,1,2,47,2,1,1,3 +67363,7,2,1,61,NA,2,2,2,NA,NA,2,NA,2,2,5,NA,1,4,NA,2,2,2,2,2,2,1,2,2,2,8609.250304,9380.869295,2,90,7,7,1.34,5,5,0,1,2,1,61,2,1,4,NA +67364,7,2,2,19,NA,3,3,1,19,231,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,22916.593117,23588.294062,1,102,4,4,0.97,3,3,0,1,0,2,19,1,2,NA,NA +67365,7,2,2,58,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,16033.091438,16307.204086,1,96,15,15,5,3,3,0,0,1,2,62,1,4,3,NA +67366,7,2,2,5,NA,3,3,1,6,72,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,64434.313673,68475.614831,1,101,5,5,0.89,5,5,1,2,0,1,31,1,2,1,1 +67367,7,2,2,32,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,3,1,2,2,2,2,2,2,2,NA,NA,NA,NA,35464.8385,41544.401264,2,96,6,6,1.11,5,5,0,3,0,2,32,2,3,1,2 +67368,7,2,2,23,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,3,5,2,2,2,2,2,2,2,1,2,2,1,41646.508203,41998.067158,3,91,4,4,0.81,3,3,0,0,1,1,64,2,1,1,1 +67369,7,2,2,3,NA,4,4,2,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9445.424546,10163.382174,2,99,6,6,1.15,5,5,1,2,0,2,34,1,4,77,NA +67370,7,2,1,41,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,1,1,2,1,1,2,2,3,19184.316833,20543.822351,1,97,15,15,5,4,4,2,0,0,2,35,2,4,1,4 +67371,7,2,1,13,NA,4,4,1,13,163,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9188.45337,9407.445906,2,95,15,15,3.85,7,7,0,3,1,2,62,1,4,2,NA +67372,7,2,2,0,8,1,1,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6588.463421,6449.849868,2,95,12,6,1.98,4,2,1,0,0,2,25,1,4,77,NA +67373,7,2,2,64,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,2,1,NA,2,2,2,2,2,2,2,2,2,2,10746.81867,11542.921605,2,100,99,99,NA,6,6,1,1,2,1,37,2,3,1,3 +67374,7,1,1,10,NA,5,6,NA,NA,NA,NA,NA,2,1,3,4,NA,NA,NA,1,1,2,1,2,1,NA,NA,NA,NA,8014.738552,0,3,92,77,77,NA,7,7,2,4,1,1,62,NA,NA,1,NA +67375,7,2,2,45,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,18490.479848,18318.636758,2,100,7,7,1.38,5,5,1,0,0,2,45,1,2,3,NA +67376,7,2,2,57,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,38254.514187,37933.750372,1,98,2,2,0.72,1,1,0,0,0,2,57,1,4,3,NA +67377,7,2,1,0,2,3,3,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9570.577309,9932.368407,1,95,2,2,0.18,6,6,1,2,2,2,69,1,2,1,2 +67378,7,2,1,9,NA,2,2,2,9,108,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,1,NA,NA,NA,NA,13217.721247,13882.368076,2,91,4,4,0.94,3,3,0,1,0,2,50,2,1,4,NA +67379,7,2,2,12,NA,2,2,1,12,148,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,2,2,2,1,2,2,1,23968.380373,25913.276293,2,91,14,7,2.91,3,2,0,1,0,1,40,2,3,6,NA +67380,7,2,2,58,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,16165.962054,15723.538094,2,99,12,5,1.88,6,1,0,0,0,2,57,1,5,2,NA +67381,7,2,1,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,32720.69734,32680.7991,1,95,2,2,0.74,1,1,0,0,0,1,50,1,4,3,NA +67382,7,2,2,18,NA,4,4,1,18,221,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18905.695776,2,101,4,4,0.86,3,3,0,1,0,2,18,1,2,NA,NA +67383,7,2,2,13,NA,4,4,2,13,165,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10401.399206,10736.579564,2,99,9,9,2.43,4,4,0,2,0,2,49,1,3,3,NA +67384,7,2,1,19,NA,3,3,2,20,NA,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,99249.131685,98501.502244,1,101,14,14,3.25,4,4,0,1,0,1,48,1,4,1,2 +67385,7,2,1,47,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,18116.816149,18405.984579,2,90,99,4,1.16,5,2,0,0,1,1,43,NA,NA,1,NA +67386,7,2,2,46,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,1,4,NA,2,2,2,1,2,2,NA,NA,NA,NA,27654.660303,28156.17371,2,103,77,77,NA,4,4,0,0,0,2,46,2,1,4,NA +67387,7,1,1,74,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,8601.453077,0,2,97,4,4,0.53,7,7,0,2,2,1,74,1,2,1,2 +67388,7,2,1,35,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,2,2,2,2,54095.173154,55399.110059,1,92,10,10,4.63,2,2,0,0,0,1,35,2,5,1,4 +67389,7,2,1,62,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,1,2,NA,2,2,2,2,2,2,1,2,2,2,10585.549682,13806.559103,1,90,13,13,NA,2,2,0,0,1,1,62,2,1,2,NA +67390,7,2,2,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,121463.203322,122574.38549,2,95,9,9,4.92,1,1,0,0,0,2,56,1,5,3,NA +67391,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,1,1,2,2,1,2,2,1,2,2,1,73849.700988,79023.338367,3,91,14,14,5,3,3,1,0,0,2,30,1,4,1,5 +67392,7,2,1,41,NA,2,2,1,NA,NA,2,NA,2,2,1,NA,5,1,NA,2,2,2,2,2,2,2,2,2,2,33029.272844,32544.619181,2,93,9,9,1.49,7,7,0,3,0,2,41,2,5,1,5 +67393,7,2,2,29,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,5,5,2,1,2,2,2,2,2,1,2,2,1,35424.746838,35546.372478,2,93,10,10,3.67,3,3,0,0,0,2,56,2,4,6,NA +67394,7,2,2,8,NA,5,7,2,8,101,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8746.306586,9472.024073,1,91,3,3,0.66,4,4,1,2,0,2,33,1,3,5,NA +67395,7,2,2,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,16741.034883,16850.233711,2,95,3,3,0.66,2,2,0,0,0,2,55,1,2,1,NA +67396,7,2,1,4,NA,1,1,1,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,16118.799226,16847.423074,3,92,4,4,0.46,7,7,1,2,0,2,31,2,2,1,1 +67397,7,2,1,20,NA,4,4,2,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,12364.328404,12075.509308,1,96,10,10,1.8,7,7,1,1,0,1,57,2,1,1,3 +67398,7,2,2,2,NA,4,4,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5847.515059,6374.625564,2,99,2,2,0.19,7,7,3,1,0,2,43,1,2,4,NA +67399,7,2,2,55,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11428.6913,11636.794359,2,103,7,7,1.65,4,4,0,1,1,2,55,2,3,1,NA +67400,7,2,1,61,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,NA,NA,NA,NA,NA,NA,NA,9355.567184,9645.104203,2,100,14,14,3.06,5,5,1,0,1,2,31,2,5,1,5 +67401,7,2,1,5,NA,1,1,1,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17865.135763,18076.339981,3,92,1,1,0,5,5,3,0,0,1,26,1,2,1,2 +67402,7,2,2,59,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,16181.169973,15883.791498,2,100,7,7,2.72,2,2,0,0,0,2,59,1,4,3,NA +67403,7,2,1,15,NA,1,1,1,15,191,NA,NA,1,1,NA,9,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,24325.638415,24460.075857,3,92,5,5,1.05,3,3,1,1,0,2,38,2,3,5,NA +67404,7,1,2,63,NA,5,6,NA,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,18002.759054,0,2,102,5,5,1.36,2,2,0,0,2,1,70,2,5,1,4 +67405,7,2,1,5,NA,1,1,1,5,68,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11388.091908,11522.723578,2,103,5,5,0.89,5,5,1,3,0,2,34,2,1,99,NA +67406,7,2,1,48,NA,1,1,2,NA,NA,2,NA,2,2,2,NA,5,5,NA,2,2,2,1,2,2,1,2,2,2,36475.684097,37047.191703,1,93,15,15,5,3,2,0,1,0,2,46,2,5,5,NA +67407,7,2,1,64,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,153565.050575,166235.02721,1,97,15,15,5,3,3,0,0,1,1,64,1,3,1,3 +67408,7,2,2,52,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,3,1,NA,1,2,1,1,2,2,NA,NA,NA,NA,14647.161643,15007.506342,1,90,8,8,1.43,7,7,2,0,0,1,23,2,4,1,3 +67409,7,2,2,45,NA,3,3,2,NA,NA,2,NA,2,1,6,NA,3,4,NA,1,2,2,1,2,2,NA,NA,NA,NA,123440.03559,156711.394658,1,90,12,12,NA,2,2,0,0,0,2,45,2,3,4,NA +67410,7,2,2,18,NA,1,1,2,18,220,2,NA,2,2,2,11,NA,NA,NA,2,2,2,1,2,2,2,2,2,2,19557.287652,19949.093379,2,94,99,99,NA,3,3,1,0,0,2,18,2,2,NA,NA +67411,7,2,1,43,NA,2,2,2,NA,NA,2,NA,2,2,5,NA,1,6,NA,2,2,2,1,2,2,2,2,2,2,27127.17615,26729.126657,3,90,7,7,1.48,5,5,0,1,0,1,43,2,1,6,NA +67412,7,2,1,77,NA,3,3,1,NA,NA,2,NA,2,2,2,NA,4,1,NA,1,1,1,1,1,2,1,2,1,NA,17775.981268,18879.758897,1,92,4,4,1.1,2,2,0,0,2,2,69,2,4,1,4 +67413,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,32485.015568,36087.933902,2,101,6,6,1.62,3,3,0,0,2,1,80,1,3,1,3 +67414,7,2,2,77,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,13045.741791,13445.321496,1,99,3,3,1.07,1,1,0,0,1,2,77,1,2,2,NA +67415,7,1,1,5,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,9016.053035,0,2,100,NA,NA,NA,3,3,1,0,0,1,29,NA,NA,1,NA +67416,7,2,1,4,NA,4,4,2,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9304.437652,9417.927146,2,97,2,2,0.45,3,3,1,0,0,1,24,1,2,1,3 +67417,7,2,2,58,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19329.435508,19798.346555,2,97,15,15,5,2,2,0,0,1,2,58,2,5,1,5 +67418,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,35434.580514,39416.921733,1,95,2,2,0.87,1,1,0,0,1,2,80,1,2,2,NA +67419,7,2,1,4,NA,4,4,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10291.033925,11347.547701,1,100,14,14,4.45,3,3,1,0,0,1,33,1,4,1,4 +67420,7,2,1,22,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,49741.714519,52356.235161,2,91,14,14,4.71,3,3,0,0,0,1,28,2,1,1,2 +67421,7,2,2,6,NA,1,1,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15962.145468,16664.698857,2,98,9,9,3.08,3,3,0,1,0,1,35,2,1,1,3 +67422,7,2,1,19,NA,3,3,2,19,238,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,83180.831932,83381.692822,2,100,14,14,3.06,5,5,0,0,0,1,55,1,5,1,5 +67423,7,2,2,3,NA,3,3,2,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25881.48305,27865.988236,1,94,7,7,0.94,7,7,1,4,0,2,46,2,5,1,5 +67424,7,2,2,32,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,42468.064168,46720.900929,2,101,3,3,0.65,3,3,0,1,0,2,54,1,3,5,NA +67425,7,2,1,37,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,2,2,2,1,2,2,1,2,2,1,41241.224595,41716.316195,2,102,6,6,1.03,5,5,1,1,0,1,37,1,2,1,2 +67426,7,1,2,24,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,1,3,1,2,2,1,2,2,NA,NA,NA,NA,112992.533921,0,1,91,8,8,2.51,3,3,1,0,0,2,24,1,4,1,3 +67427,7,2,1,24,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,14313.345971,14977.822328,3,91,15,15,5,3,3,0,0,2,2,61,1,5,1,5 +67428,7,1,1,72,NA,3,3,NA,NA,NA,1,1,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,68460.271241,0,1,102,9,9,4.08,2,2,0,0,1,1,44,1,3,3,NA +67429,7,2,2,65,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,2,6,NA,2,2,2,1,2,2,1,2,2,2,9716.805546,12994.252166,2,90,4,4,1.47,1,1,0,0,1,2,65,2,2,6,NA +67430,7,2,1,8,NA,5,6,2,8,99,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10105.916709,10900.114748,1,91,6,6,1.25,4,4,1,1,0,1,26,2,4,6,NA +67431,7,2,1,61,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,2,1,NA,1,2,1,NA,NA,NA,1,2,1,3,5526.665646,5830.301859,2,92,12,NA,NA,7,1,0,0,2,1,53,2,3,1,3 +67432,7,2,2,10,NA,4,4,1,10,124,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11195.065587,11638.702248,2,96,2,2,0.43,3,3,0,2,0,2,50,1,2,5,NA +67433,7,2,1,65,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,5,3,NA,2,2,2,NA,NA,NA,2,2,2,2,13473.930124,13736.530204,2,98,4,4,0.48,6,6,2,0,2,2,65,2,1,2,NA +67434,7,2,1,56,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,184986.088848,184760.524604,1,92,15,15,5,2,2,0,0,0,2,56,1,4,1,5 +67435,7,2,1,80,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,1,NA,12101.489198,12967.543396,1,93,3,3,0.82,2,2,0,0,2,1,80,2,5,1,1 +67436,7,2,2,23,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,19783.396474,19410.189178,2,92,14,4,1.38,2,1,0,0,0,1,29,1,5,6,NA +67437,7,2,2,0,2,3,3,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16423.151355,15977.062092,1,99,9,9,3.24,3,3,1,0,0,1,29,1,5,1,5 +67438,7,2,1,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,6943.972932,7268.809657,2,92,5,5,1.32,2,2,0,0,1,2,51,1,2,1,2 +67439,7,2,2,36,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,21619.283038,21663.427812,2,102,8,8,1.72,5,5,0,2,1,1,63,2,5,1,5 +67440,7,1,2,61,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,16913.219568,0,2,98,77,77,NA,1,1,0,0,1,2,61,1,4,2,NA +67441,7,2,1,80,NA,3,3,2,NA,NA,2,NA,2,1,9,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,36829.543424,39682.305625,1,93,15,15,5,2,2,0,0,2,2,75,1,5,1,5 +67442,7,2,2,39,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,28187.999204,29187.28099,1,99,7,7,2.72,2,2,0,0,0,2,39,1,5,5,NA +67443,7,2,1,4,NA,1,1,1,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,16169.123686,17324.04909,1,100,13,13,NA,4,4,1,1,0,1,28,2,1,1,1 +67444,7,2,1,3,NA,5,6,2,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9459.753034,9916.192713,2,91,10,10,3.04,4,4,1,1,0,1,37,2,5,1,5 +67445,7,2,2,38,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,2,1,2,1,2,2,1,2,2,2,2,2,2,32097.38481,33203.6204,2,100,4,4,0.81,4,4,0,2,0,1,56,1,4,1,2 +67446,7,2,1,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,19835.707828,24564.335286,1,94,6,6,1.18,5,5,1,2,0,1,30,1,3,1,3 +67447,7,2,2,57,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,140431.173819,139253.659476,1,94,8,8,3.47,2,2,0,0,1,2,80,1,1,2,NA +67448,7,2,1,4,NA,4,4,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7629.74403,7646.343205,1,99,4,4,0.53,7,7,3,1,0,2,26,1,1,5,NA +67449,7,2,2,48,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,111065.717962,115110.76456,2,91,15,15,5,4,4,0,2,0,2,48,1,5,1,5 +67450,7,2,1,55,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,166897.201244,166693.693828,2,91,15,6,2.24,7,1,0,0,1,1,49,NA,NA,5,NA +67451,7,2,2,18,NA,1,1,2,19,228,2,NA,2,2,3,12,NA,NA,NA,2,2,2,1,2,2,1,2,2,1,16594.391299,17940.930507,3,91,6,6,0.89,7,7,1,1,0,1,59,2,1,1,1 +67452,7,2,2,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,134694.414609,139412.076132,2,91,15,10,5,3,1,0,0,1,1,47,1,5,5,NA +67453,7,2,1,0,1,3,3,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23912.171644,23498.78217,1,94,9,9,2.39,4,4,2,0,0,2,30,2,4,1,3 +67454,7,2,2,76,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,53103.835249,56432.29308,2,92,6,6,2.02,2,2,0,0,2,2,76,1,3,1,NA +67455,7,2,1,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,87101.243392,95057.097172,1,101,6,6,1.65,2,2,0,0,2,1,70,1,3,1,1 +67456,7,2,1,7,NA,4,4,2,7,93,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9699.683862,9681.447258,2,100,13,13,NA,4,4,0,2,0,2,28,1,2,6,NA +67457,7,2,1,49,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,31010.243793,30930.584862,3,92,6,6,0.74,7,7,2,1,0,2,46,1,2,1,4 +67458,7,2,1,9,NA,5,6,1,10,121,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7921.96624,8581.996701,1,92,7,7,2.31,2,2,0,1,0,2,26,1,4,5,NA +67459,7,2,2,24,NA,4,4,1,NA,NA,2,NA,2,2,4,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,17427.779559,17080.798702,2,93,6,6,1.13,4,4,0,0,2,1,60,2,3,1,3 +67460,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,44341.489734,49693.466543,2,103,5,5,1.79,1,1,0,0,1,2,80,1,4,2,NA +67461,7,2,2,0,11,3,3,2,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10692.488346,11331.751026,1,101,1,1,0.08,3,3,1,0,0,2,19,1,2,NA,NA +67462,7,2,2,38,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,3,3,2,1,2,1,1,2,1,2,2,2,NA,35464.8385,46825.562506,2,96,3,3,0.95,2,2,0,1,0,2,38,2,3,3,NA +67463,7,2,1,28,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,124091.929364,131797.624562,1,95,15,15,5,3,3,1,0,0,1,28,1,5,1,5 +67464,7,2,1,10,NA,3,3,1,10,125,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,41165.805324,42583.611962,2,100,10,10,3.13,4,4,0,2,0,1,45,1,4,1,4 +67465,7,1,2,58,NA,4,4,NA,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,15519.323571,0,2,99,10,10,3.67,3,3,0,0,1,1,64,2,4,1,5 +67466,7,2,1,55,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,2,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,23431.677775,24302.344664,2,93,5,5,1.26,3,3,0,1,0,1,55,2,2,1,2 +67467,7,1,1,80,NA,3,3,NA,NA,NA,1,1,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,39280.460347,0,2,91,4,4,1.39,1,1,0,0,1,1,80,1,3,2,NA +67468,7,2,2,17,NA,5,6,1,17,212,2,NA,2,1,4,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9416.499309,9568.799193,1,100,15,15,5,4,4,0,1,0,1,44,2,5,1,5 +67469,7,2,1,23,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14313.345971,14703.033095,3,91,7,7,2.58,2,2,0,0,0,2,55,2,5,3,NA +67470,7,2,2,37,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,22326.231285,22166.696692,1,99,6,6,0.6,7,7,2,1,1,2,69,1,3,2,NA +67471,7,2,2,45,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,155208.037885,157114.920604,2,102,8,8,4.32,1,1,0,0,0,2,45,1,3,4,NA +67472,7,2,1,2,NA,3,3,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,40111.361732,45255.148434,1,98,15,15,3.7,5,5,2,1,0,1,34,1,5,1,5 +67473,7,2,2,79,NA,2,2,1,NA,NA,2,NA,2,1,9,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,29145.675285,31446.34406,3,92,7,7,2.31,2,2,0,0,2,1,77,1,4,1,4 +67474,7,2,1,38,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,18423.855809,18161.238545,1,96,12,12,NA,5,5,1,2,0,2,35,1,5,1,4 +67475,7,2,1,52,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,16287.780872,16793.056025,2,100,10,10,5,1,1,0,0,0,1,52,1,3,3,NA +67476,7,2,1,6,NA,3,3,2,6,78,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,57218.802967,58901.688698,1,95,9,9,2.13,6,6,0,4,0,2,44,1,1,1,1 +67477,7,2,2,57,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,12449.932013,12109.206963,3,90,15,15,4.89,5,5,0,0,0,2,57,2,3,1,3 +67478,7,2,1,2,NA,3,3,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,37959.146468,40828.920669,1,94,99,99,NA,3,3,1,0,0,1,31,1,4,6,NA +67479,7,2,2,5,NA,4,4,2,5,64,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10771.85499,11250.180007,2,97,2,2,0.38,3,3,1,1,0,2,27,1,2,5,NA +67480,7,2,2,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,6503.357568,6816.933235,2,95,15,15,3.85,7,7,0,3,1,2,62,1,4,2,NA +67481,7,2,2,69,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,10235.0654,10662.230581,2,93,3,3,0.66,2,2,0,0,2,2,69,2,4,3,NA +67482,7,2,2,14,NA,2,2,1,14,173,NA,NA,1,1,NA,11,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,18515.058419,19360.671834,2,96,2,2,0.27,6,6,1,3,0,1,34,NA,NA,1,NA +67483,7,2,2,64,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,1,3,NA,2,2,2,2,2,2,2,2,2,2,9570.416297,10355.226074,1,92,2,2,0.66,1,1,0,0,1,2,64,1,1,3,NA +67484,7,2,2,23,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,NA,NA,NA,NA,14289.513581,13586.634632,2,99,2,2,0.19,7,7,3,1,0,2,43,1,2,4,NA +67485,7,2,1,1,22,3,3,1,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22261.258803,25115.990281,1,94,7,7,1.21,6,6,2,2,0,1,31,1,2,6,NA +67486,7,2,2,41,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,36924.381422,38054.088192,2,102,14,14,3.8,4,4,2,0,0,2,41,2,4,1,5 +67487,7,2,2,75,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,16494.288293,17728.004854,2,101,3,3,0.92,1,1,0,0,1,2,75,1,2,2,NA +67488,7,2,1,29,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,NA,1,2,2,1,2,2,1,2,2,3,12241.196357,12961.680012,2,96,15,7,3.21,3,1,0,0,0,1,31,2,5,1,NA +67489,7,2,2,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,24158.28574,25084.308361,1,92,14,9,3.97,3,2,0,0,2,1,51,1,4,5,NA +67490,7,2,1,34,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,3,1,NA,2,2,2,1,2,2,2,2,2,2,41241.224595,41716.316195,2,102,4,4,0.61,5,5,0,3,0,1,34,2,3,1,3 +67491,7,2,1,2,NA,1,1,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,13968.423539,13738.28453,2,102,3,3,0.54,4,4,1,1,0,1,28,2,2,6,NA +67492,7,2,2,32,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,85444.349063,87754.040572,1,94,14,14,5,2,2,0,0,0,1,50,1,3,1,5 +67493,7,2,1,76,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,8517.336599,8684.171961,2,100,9,9,4.35,2,2,0,0,2,2,79,1,5,1,3 +67494,7,2,1,34,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,19384.896286,21265.137164,1,94,4,4,0.56,5,5,1,2,0,1,34,1,2,3,NA +67495,7,2,1,11,NA,2,2,1,11,142,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,2,13898.598114,14013.214919,2,102,8,8,1.09,7,7,1,3,0,2,33,2,1,6,NA +67496,7,2,1,2,NA,1,1,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13305.770449,13463.073191,3,92,4,4,0.59,5,5,2,1,0,1,20,2,1,1,3 +67497,7,2,2,6,NA,5,6,1,6,74,NA,NA,2,2,2,0,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,5064.232234,5712.276567,2,92,99,77,NA,7,4,3,3,1,1,61,2,1,1,3 +67498,7,2,2,16,NA,4,4,1,16,200,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14166.687432,14745.171396,1,100,1,1,0.04,4,4,1,1,0,2,51,1,3,3,NA +67499,7,2,1,73,NA,2,2,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,13345.06742,13314.236759,1,99,15,15,5,2,2,0,0,2,1,73,1,4,1,5 +67500,7,2,2,9,NA,5,7,2,9,111,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,18348.624469,20109.577944,2,90,77,77,NA,6,6,0,4,0,2,41,NA,NA,4,NA +67501,7,2,2,62,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,164066.603708,163511.57474,1,95,10,10,4.76,2,2,0,0,2,1,65,1,4,1,4 +67502,7,2,1,56,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,10976.758665,11141.51106,2,103,15,15,5,3,3,0,0,0,2,52,2,4,1,5 +67503,7,2,2,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,154825.466557,154977.498295,1,94,12,7,3.67,2,1,0,0,0,2,45,1,5,5,NA +67504,7,2,2,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,1,1,2,2,1,2,2,1,2,2,1,27532.825087,28471.490951,1,101,6,6,1.3,4,4,0,2,0,1,43,2,1,1,3 +67505,7,2,2,61,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,163604.158578,166277.181905,1,101,10,10,4.63,2,2,0,0,2,1,62,1,5,1,5 +67506,7,2,2,6,NA,3,3,2,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21658.223225,22668.915202,1,90,7,7,1.55,5,5,0,3,0,1,51,2,3,1,2 +67507,7,2,1,28,NA,4,4,1,NA,NA,2,NA,2,2,4,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17840.217575,17960.016751,2,93,6,6,1.13,4,4,0,0,2,1,60,2,3,1,3 +67508,7,2,1,18,NA,5,7,2,19,228,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13547.769361,13874.542578,1,91,9,9,4.35,2,2,0,0,0,2,40,1,4,3,NA +67509,7,2,1,79,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,NA,68266.732554,70856.701199,1,95,7,7,3.67,1,1,0,0,1,1,79,1,5,5,NA +67510,7,2,1,4,NA,4,4,1,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7311.663111,7613.781996,2,93,6,6,1.15,5,5,3,1,0,1,29,1,3,5,NA +67511,7,1,2,21,NA,2,2,NA,NA,NA,2,NA,1,1,NA,NA,3,6,3,1,2,2,1,2,2,NA,NA,NA,NA,44119.608456,0,1,92,6,6,1.51,3,3,0,0,0,1,46,1,3,3,NA +67512,7,2,1,3,NA,4,4,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9304.437652,10259.663981,2,97,13,13,NA,4,4,1,0,1,2,45,1,2,5,NA +67513,7,2,1,53,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,20286.317046,20915.633712,2,99,2,2,0.5,2,2,0,0,0,1,53,1,3,1,NA +67514,7,2,1,6,NA,1,1,1,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11036.458246,10936.228943,1,102,13,13,NA,7,7,3,1,2,2,62,2,1,1,2 +67515,7,2,2,56,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,15521.115746,18174.62037,2,97,NA,99,NA,7,6,2,1,1,2,56,1,3,5,NA +67516,7,2,2,47,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,20773.573942,20827.234538,2,93,6,6,1.47,3,3,0,0,0,2,47,1,4,5,NA +67517,7,2,2,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,23660.610934,25318.185975,3,91,5,5,1.07,4,4,0,2,0,2,36,1,5,1,4 +67518,7,2,2,5,NA,1,1,2,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11009.851855,11478.955249,1,93,13,3,0.54,6,3,2,1,0,1,23,NA,NA,5,NA +67519,7,2,1,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,6612.194774,6921.511029,2,99,6,6,1.98,2,2,0,0,1,1,63,1,2,2,NA +67520,7,2,1,79,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,11245.778093,11855.469704,1,98,4,4,1.19,2,2,0,0,2,1,79,1,3,1,3 +67521,7,2,2,76,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,NA,81062.798322,86779.337922,1,101,4,4,1.48,1,1,0,0,1,2,76,1,2,3,NA +67522,7,2,2,26,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,63207.045171,63513.398112,2,102,15,15,5,2,2,0,0,0,1,41,2,4,6,NA +67523,7,1,1,54,NA,3,3,NA,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,31699.998102,0,1,98,5,5,2.15,1,1,0,0,0,1,54,1,3,3,NA +67524,7,2,1,6,NA,2,2,2,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9807.589376,10786.008845,2,90,5,5,0.76,5,5,0,4,0,2,32,1,2,3,NA +67525,7,2,1,67,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,1,1,2,2,1,2,1,3,10497.261485,11239.197739,1,90,2,2,0.33,2,2,0,0,2,2,65,2,3,1,5 +67526,7,2,1,33,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,1,6,NA,2,2,2,2,2,2,NA,NA,NA,NA,34887.439952,36924.956604,2,94,7,7,1.23,6,6,2,1,0,1,33,2,1,6,NA +67527,7,2,2,29,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,17399.722435,17053.300185,2,99,6,3,1.27,3,1,0,0,0,1,41,1,3,6,NA +67528,7,2,1,2,NA,3,3,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,38457.282261,42554.855377,1,100,6,6,1.18,5,5,2,2,0,2,40,1,5,3,NA +67529,7,2,2,11,NA,3,3,2,11,142,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,80369.555824,80552.420473,1,97,15,15,4.07,5,5,0,3,0,1,36,1,5,1,5 +67530,7,2,1,52,NA,1,1,2,NA,NA,2,NA,2,2,7,NA,5,4,NA,2,2,2,2,2,2,1,2,2,2,22446.308035,22508.005013,2,94,4,4,1.38,1,1,0,0,0,1,52,2,5,4,NA +67531,7,2,2,1,15,4,4,2,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6880.068407,7403.030352,2,97,3,3,0.33,6,6,2,0,0,2,32,1,2,1,3 +67532,7,2,2,71,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,88916.414931,94489.544183,1,101,6,6,1.65,2,2,0,0,2,1,70,1,3,1,1 +67533,7,2,1,0,4,1,1,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6298.658963,6298.775059,2,94,7,7,1.57,4,4,2,0,0,2,27,1,4,1,2 +67534,7,2,2,56,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,31070.966983,31355.213599,1,102,2,2,0.66,1,1,0,0,0,2,56,1,4,2,NA +67535,7,2,2,63,NA,4,4,2,NA,NA,2,NA,2,1,8,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,9113.905743,9695.883475,3,90,9,9,3.14,3,3,0,1,1,2,63,2,4,3,NA +67536,7,2,2,14,NA,3,3,2,14,172,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,91797.787708,92992.465808,1,95,15,15,5,4,4,0,2,0,2,42,1,5,1,5 +67537,7,2,1,33,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19638.748956,20390.674215,2,99,15,15,5,2,1,0,0,0,2,31,1,5,1,NA +67538,7,2,1,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,25645.251384,25368.119207,1,92,6,6,1.3,4,4,0,1,1,1,25,1,1,1,3 +67539,7,2,2,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,2,1,2,2,1,2,2,1,2,2,1,102855.726146,103775.32646,1,101,5,5,0.89,5,5,1,2,0,1,31,1,2,1,1 +67540,7,2,2,12,NA,1,1,2,12,155,NA,NA,2,2,4,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13824.001771,14945.739837,2,90,3,3,0.58,4,4,0,2,0,2,36,2,3,1,3 +67541,7,2,1,11,NA,5,6,2,11,143,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10810.913614,11522.32071,1,97,15,15,4.07,5,5,0,3,0,1,42,2,5,1,5 +67542,7,2,2,30,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,3,1,2,1,2,1,1,2,1,NA,NA,NA,NA,11032.714892,11055.242776,2,92,5,5,0.64,7,7,1,2,1,1,66,2,1,1,3 +67543,7,2,2,16,NA,2,2,2,16,194,NA,NA,1,1,NA,9,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,12680.621719,13709.581084,2,90,7,7,1.34,5,5,0,1,2,1,61,2,1,4,NA +67544,7,2,2,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,17521.481386,17363.561951,2,101,1,1,0.2,2,2,0,0,1,2,55,1,4,4,NA +67545,7,2,2,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,124604.441853,125718.490512,1,92,9,4,1,7,3,2,1,0,1,45,1,4,2,NA +67546,7,2,2,30,NA,2,2,1,NA,NA,2,NA,2,1,3,NA,4,1,2,2,2,2,1,2,2,NA,NA,NA,NA,40476.413979,40301.503085,2,93,7,7,1.83,3,3,1,0,0,1,40,2,5,1,4 +67547,7,2,1,8,NA,3,3,2,8,101,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,16549.21669,17579.802509,1,91,12,8,2.15,6,4,1,1,0,2,29,1,4,6,NA +67548,7,2,2,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,12532.471503,12837.991566,2,97,4,4,0.84,3,3,1,0,1,2,68,1,4,2,NA +67549,7,2,1,25,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,36391.614555,41134.650764,1,103,5,5,1.02,4,4,2,0,0,1,25,1,2,1,4 +67550,7,2,2,13,NA,4,4,1,13,159,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15483.914568,16116.18632,1,100,4,4,1.23,2,2,0,1,0,2,47,1,2,4,NA +67551,7,2,2,28,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,17427.779559,17328.063231,2,93,12,12,NA,4,4,0,0,2,1,72,1,2,1,4 +67552,7,2,2,16,NA,3,3,1,16,194,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,76992.7514,81576.336255,1,100,15,15,4.07,5,5,0,2,0,2,41,1,5,1,4 +67553,7,2,2,9,NA,4,4,2,9,110,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7388.61397,8001.678046,3,90,2,2,0.39,2,2,0,1,0,2,32,1,3,5,NA +67554,7,1,2,61,NA,4,4,NA,NA,NA,2,NA,2,1,99,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,9024.164523,0,2,93,15,15,5,1,1,0,0,1,2,61,2,5,1,NA +67555,7,2,2,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,31785.728924,32871.467684,1,95,4,4,1.22,2,2,0,0,1,1,79,1,1,1,3 +67556,7,2,1,14,NA,4,4,2,14,178,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13251.602554,13095.83949,1,96,9,9,3.14,3,3,0,2,0,2,39,NA,NA,3,NA +67557,7,2,2,15,NA,4,4,2,15,187,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9728.028593,10125.263908,2,99,6,6,1.11,5,5,1,2,0,2,41,1,2,5,NA +67558,7,2,2,0,7,4,4,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4476.23918,4525.114437,1,90,3,3,0.43,4,4,2,0,0,1,46,1,3,1,4 +67559,7,2,1,9,NA,3,3,2,9,118,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,30554.050397,31755.009616,1,97,6,6,1.03,6,6,2,2,0,2,38,1,5,1,4 +67560,7,2,1,68,NA,5,6,1,NA,NA,2,NA,2,2,8,NA,1,5,NA,1,2,2,2,2,2,1,2,2,1,13527.667075,14341.996991,2,98,6,6,0.59,7,7,2,2,1,2,52,2,1,1,1 +67561,7,2,2,12,NA,4,4,1,12,146,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14086.017075,14046.326674,2,96,5,5,1.07,4,4,0,3,0,2,46,1,4,3,NA +67562,7,2,1,2,NA,4,4,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6347.215153,6609.482921,2,95,1,1,0.09,5,5,3,1,0,2,31,1,2,1,NA +67563,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,35434.580514,39416.921733,1,92,2,2,0.64,1,1,0,0,1,2,80,1,3,2,NA +67564,7,2,2,43,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,37512.060155,39042.669235,1,92,15,15,4.99,4,4,0,2,0,2,43,1,4,1,4 +67565,7,2,1,10,NA,3,3,1,10,131,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,58429.74688,62860.392832,1,100,15,15,4.56,4,4,0,2,0,2,42,1,4,1,3 +67566,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,30796.941649,33729.162412,1,98,4,4,1.26,2,2,0,0,1,2,80,1,4,2,NA +67567,7,2,1,69,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,9618.837002,9727.194029,3,91,14,14,3.53,5,5,0,1,1,1,69,1,4,3,NA +67568,7,2,2,7,NA,3,3,2,7,87,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22933.149195,22757.230737,1,95,6,6,0.81,6,6,2,2,0,1,30,1,3,1,4 +67569,7,2,1,7,NA,3,3,1,7,91,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23188.935049,25672.571973,3,91,7,7,1.1,7,7,0,4,0,1,40,1,4,1,3 +67570,7,2,2,6,NA,2,2,1,7,84,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13720.541363,13932.536034,2,100,14,14,3.36,4,4,1,1,0,1,45,2,5,1,2 +67571,7,2,2,35,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,28747.860416,30588.052168,1,99,6,6,0.96,5,5,1,2,0,2,35,1,4,1,2 +67572,7,2,2,1,23,3,3,1,NA,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,59617.625426,65799.423913,3,92,15,15,5,4,4,2,0,0,1,38,1,5,1,5 +67573,7,2,2,80,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,2,2,2,2,2,2,2,2,2,NA,25315.905293,28273.512517,2,98,4,4,1.15,2,2,0,0,2,1,80,1,1,1,1 +67574,7,2,2,12,NA,4,4,2,12,147,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11608.900361,11982.991894,1,99,8,8,2.59,3,3,0,2,0,2,46,1,4,2,NA +67575,7,2,2,6,NA,1,1,1,6,77,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,17458.526997,17642.073401,1,94,6,6,1.11,5,5,1,2,0,2,41,2,1,1,1 +67576,7,2,2,8,NA,1,1,2,8,97,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,2,2,2,1,2,2,1,15225.935813,15621.122955,2,94,77,77,NA,6,6,0,3,0,2,58,1,3,1,9 +67577,7,2,2,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,118933.861968,118531.514768,1,91,8,8,4.82,1,1,0,0,1,2,60,1,4,3,NA +67578,7,2,2,10,NA,3,3,2,10,131,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,73810.484644,75214.997077,1,94,15,15,5,4,4,0,2,0,2,47,1,5,1,5 +67579,7,2,1,10,NA,1,1,1,10,121,NA,NA,1,1,NA,2,NA,NA,NA,2,1,1,2,2,1,1,2,2,1,14880.007592,14744.872501,3,92,4,4,1.12,2,2,0,1,0,2,38,2,1,5,NA +67580,7,2,2,67,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,37934.469637,42910.885175,1,94,3,3,0.95,2,2,0,0,1,2,67,1,2,2,NA +67581,7,2,2,19,NA,2,2,2,19,235,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14437.97544,15197.369043,2,90,5,5,0.8,5,5,0,3,0,2,40,2,1,5,NA +67582,7,2,2,29,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,NA,22306.465066,22494.734848,2,98,3,3,0.54,3,3,1,1,0,2,29,1,2,1,NA +67583,7,1,1,6,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,14456.751936,0,1,90,15,15,5,5,5,1,1,0,1,32,2,1,1,4 +67584,7,2,1,38,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,19008.083201,19380.210545,2,97,7,7,3.67,1,1,0,0,0,1,38,1,3,5,NA +67585,7,2,2,55,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,16181.169973,16286.716901,2,100,10,10,4.42,2,2,0,0,0,2,55,1,2,1,4 +67586,7,2,2,54,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,17622.141982,17728.11858,2,95,2,2,0.75,1,1,0,0,0,2,54,1,2,5,NA +67587,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,33147.414266,37148.276517,2,94,7,7,3.49,1,1,0,0,1,2,80,1,5,2,NA +67588,7,2,1,4,NA,4,4,2,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7814.415054,8296.803376,1,93,6,6,0.83,6,6,3,1,0,1,37,NA,NA,1,3 +67589,7,2,1,79,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,8517.336599,8684.171961,2,100,6,6,2.11,2,2,0,0,2,1,79,1,3,1,4 +67590,7,2,1,31,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,36150.793885,43021.850084,1,90,3,3,0.43,4,4,2,0,0,1,31,1,3,6,NA +67591,7,2,1,40,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11113.602843,11678.105413,3,90,14,14,3.47,4,4,1,1,0,2,38,2,5,1,5 +67592,7,2,1,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,17810.743272,17839.416552,1,99,15,15,5,2,2,0,0,0,2,46,1,5,1,5 +67593,7,2,1,4,NA,4,4,1,5,60,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9016.053035,9035.668246,2,100,8,8,1.1,7,7,3,3,0,2,58,1,3,5,NA +67594,7,2,1,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,100867.796959,104079.392539,2,99,15,15,5,3,3,1,0,0,1,43,1,5,1,5 +67595,7,2,1,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25128.435397,25532.311821,2,94,6,5,2.11,3,1,0,0,0,1,26,1,4,5,NA +67596,7,2,1,63,NA,5,7,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,9314.746352,9205.449493,1,91,15,15,5,3,3,0,0,1,1,63,2,5,1,5 +67597,7,2,2,41,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,21077.085361,20500.255628,2,99,12,12,NA,1,1,0,0,0,2,41,1,5,5,NA +67598,7,2,2,80,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,1,3,NA,2,2,2,2,2,2,2,2,2,NA,16490.79781,18057.97943,3,90,10,10,3.04,4,4,0,0,2,2,80,2,1,3,NA +67599,7,2,2,4,NA,4,4,1,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13130.790087,13857.266639,2,102,5,5,0.76,5,5,1,3,0,2,30,1,4,4,NA +67600,7,2,2,15,NA,3,3,2,15,181,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,34552.42011,37128.535405,1,90,7,7,1.55,5,5,0,3,0,1,51,2,3,1,2 +67601,7,1,2,8,NA,5,7,NA,NA,NA,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7842.234542,0,1,91,15,15,5,5,5,2,1,0,1,40,1,5,1,5 +67602,7,2,1,43,NA,1,1,1,NA,NA,1,2,2,2,7,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,31740.385214,32919.784432,2,96,7,7,1.79,4,4,0,2,0,1,43,2,3,1,2 +67603,7,2,2,62,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,17243.546687,17909.086027,1,92,14,14,5,2,2,0,0,2,2,62,1,4,1,4 +67604,7,2,2,24,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,13820.210756,14410.358083,1,90,8,8,1.43,7,7,2,0,0,1,23,2,4,1,3 +67605,7,2,2,19,NA,1,1,1,19,230,2,NA,2,7,77,10,NA,NA,NA,2,2,2,2,2,2,2,2,2,1,18581.167701,19429.800436,2,96,6,6,0.77,7,7,2,1,0,1,53,2,1,1,1 +67606,7,2,1,48,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,28542.421068,29427.85637,2,101,2,2,0.74,1,1,0,0,0,1,48,1,2,5,NA +67607,7,2,2,63,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,3,1,NA,1,2,1,1,2,1,1,2,1,1,17248.011865,19146.123826,1,97,14,14,4.96,2,2,0,0,1,1,50,1,3,1,3 +67608,7,2,1,4,NA,5,6,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10273.602479,11522.969217,1,92,15,15,5,3,3,1,0,0,1,50,1,4,1,4 +67609,7,2,2,6,NA,4,4,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9399.281543,9563.245188,2,96,6,6,1.35,3,3,1,1,0,2,23,1,2,5,NA +67610,7,2,1,2,NA,4,4,1,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,5652.403121,6080.146007,1,102,12,12,NA,7,7,3,2,0,2,52,1,4,5,NA +67611,7,2,2,14,NA,3,3,2,14,175,NA,NA,2,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,76360.13568,77793.595093,1,99,15,15,5,3,3,0,1,0,1,50,1,4,1,4 +67612,7,2,1,6,NA,1,1,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17882.621856,18280.794545,3,92,7,7,2.1,3,3,1,1,0,2,25,1,4,5,NA +67613,7,2,2,21,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,11739.283384,12617.501315,1,96,7,7,1.83,3,3,0,0,1,1,66,2,5,1,3 +67614,7,2,1,53,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,25118.469449,26312.875816,2,98,5,5,1.07,4,4,0,1,0,1,53,2,1,1,1 +67615,7,2,2,17,NA,5,7,2,17,209,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12857.456314,12821.227649,2,100,9,9,2.46,4,4,0,2,0,2,36,2,4,1,3 +67616,7,2,1,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,167711.394252,175827.980982,1,101,4,2,0.81,2,1,0,0,1,1,63,1,2,6,NA +67617,7,2,2,13,NA,2,2,1,13,162,NA,NA,2,2,4,6,NA,NA,NA,2,1,2,2,2,2,2,2,2,2,15809.066118,17342.845429,2,93,7,7,1.52,4,4,1,1,0,1,44,2,4,1,NA +67618,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,39565.288792,43332.356399,1,99,14,14,5,2,2,0,0,2,1,80,1,4,1,4 +67619,7,2,1,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,120041.937453,120355.294586,1,98,15,15,5,5,5,0,1,0,1,53,1,5,1,5 +67620,7,2,1,54,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,28017.200708,32299.614806,2,102,3,3,0.92,1,1,0,0,0,1,54,1,4,3,NA +67621,7,2,2,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,43813.24867,44204.968982,1,98,4,4,0.67,5,5,1,2,0,1,29,1,4,1,3 +67622,7,2,2,29,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,18246.235208,18261.326779,2,99,1,1,0.07,4,4,1,1,0,2,24,1,2,5,NA +67623,7,2,1,8,NA,3,3,2,8,98,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,74331.764009,78960.699825,2,91,14,14,4.19,3,3,0,1,0,2,31,1,4,1,3 +67624,7,2,1,30,NA,3,3,2,NA,NA,2,NA,2,2,3,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,99283.360764,104566.027434,3,91,1,1,0.09,1,1,0,0,0,1,30,2,5,5,NA +67625,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,25359.946386,29418.979823,2,95,2,2,0.88,1,1,0,0,1,2,80,1,1,2,NA +67626,7,2,1,1,15,2,2,1,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13305.770449,13726.956753,3,92,9,9,2.22,5,5,1,0,2,1,66,2,1,1,1 +67627,7,2,1,25,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,23064.54207,22591.937434,2,91,15,15,5,4,4,0,0,1,1,61,NA,NA,1,2 +67628,7,2,2,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,53634.754806,54437.113731,1,98,99,99,NA,3,2,0,0,0,2,22,1,4,5,NA +67629,7,2,2,70,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,18698.205673,19635.336647,1,97,12,12,NA,5,5,0,1,1,2,40,2,9,1,NA +67630,7,2,1,12,NA,3,3,1,12,147,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,26749.020961,27839.957403,1,98,6,6,1.11,5,5,1,2,0,2,32,1,2,1,2 +67631,7,2,1,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,18544.003944,22007.065507,1,96,12,12,NA,7,7,1,0,1,2,59,1,3,1,1 +67632,7,2,2,51,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,24646.971819,25093.941214,1,102,7,7,1.89,3,3,0,0,0,1,53,2,1,1,1 +67633,7,2,2,9,NA,4,4,2,9,111,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7944.571197,8483.470772,2,99,15,15,5,3,3,0,1,0,2,48,1,5,4,NA +67634,7,2,2,8,NA,4,4,2,8,104,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8184.286585,8442.757341,2,97,2,2,0.21,7,7,2,3,0,2,32,1,4,5,NA +67635,7,2,1,65,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,10298.484448,10551.517983,1,94,6,6,1.98,2,2,0,0,2,1,65,2,1,1,1 +67636,7,1,1,7,NA,5,7,NA,NA,NA,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17227.332856,0,3,91,5,5,0.65,7,7,0,4,0,2,39,1,3,4,NA +67637,7,1,2,9,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7489.549692,0,2,100,10,10,2.33,6,6,0,2,2,2,35,1,2,5,NA +67638,7,2,2,19,NA,4,4,1,19,232,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11437.338051,11296.529267,2,95,2,2,0.41,3,3,2,0,0,2,19,1,2,NA,NA +67639,7,2,1,0,2,4,4,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6476.58432,6575.990772,2,97,3,3,0.75,2,2,1,0,0,2,22,1,4,4,NA +67640,7,2,1,58,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,27595.50738,27352.795749,1,98,6,6,1.57,3,3,0,0,0,1,58,1,3,1,3 +67641,7,2,1,5,NA,1,1,2,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,16074.564008,16583.394837,1,97,4,4,0.72,5,5,2,1,0,2,33,2,1,6,NA +67642,7,2,2,24,NA,1,1,2,NA,NA,2,NA,2,1,4,NA,2,6,2,1,2,2,1,2,2,NA,NA,NA,NA,39426.061521,43773.817687,2,94,8,8,2.33,4,4,2,0,0,1,24,1,2,6,NA +67643,7,2,1,76,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,51472.063256,53424.859661,2,95,15,15,5,2,2,0,0,2,1,76,1,4,1,5 +67644,7,2,1,56,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,23005.210125,23019.76475,1,95,14,14,3.04,6,6,0,4,0,1,56,1,5,1,4 +67645,7,2,1,15,NA,4,4,2,15,187,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11386.695644,11908.648036,2,99,6,6,1.18,5,5,0,3,0,2,38,1,2,5,NA +67646,7,2,1,48,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,4,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,11113.602843,11678.105413,3,90,5,5,0.93,4,4,1,0,0,1,48,2,4,1,NA +67647,7,2,2,70,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,NA,16494.288293,17598.146607,2,95,3,3,1.1,1,1,0,0,1,2,70,1,3,4,NA +67648,7,1,2,2,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6662.138091,0,2,95,14,14,3.58,4,4,1,0,0,1,39,1,3,1,3 +67649,7,2,2,41,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,2,1,2,1,2,1,1,2,2,1,2,1,3,16378.162652,17998.555773,2,96,4,4,0.92,3,3,0,1,1,2,41,2,2,1,2 +67650,7,2,2,11,NA,3,3,1,11,137,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16950.724686,16899.799988,2,98,2,2,0.34,2,2,0,1,0,2,30,1,4,4,NA +67651,7,2,2,26,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,20430.250572,20111.803159,2,97,5,5,0.76,5,5,0,0,0,2,50,1,4,5,NA +67652,7,2,1,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,108408.375382,118202.807654,1,92,6,6,1.51,3,3,0,0,0,1,46,1,3,3,NA +67653,7,2,2,39,NA,3,3,2,NA,NA,2,NA,2,1,7,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,74517.751389,77393.175383,2,94,10,10,3.51,3,3,0,2,0,2,39,2,4,3,NA +67654,7,2,2,16,NA,1,1,1,16,193,NA,NA,1,1,NA,8,NA,NA,NA,2,2,2,1,2,2,1,2,2,1,20460.442471,21235.303768,2,102,7,7,1.04,7,7,1,2,0,2,37,2,1,1,2 +67655,7,2,2,8,NA,4,4,2,8,99,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9326.540969,10100.402919,1,91,8,8,1.76,5,5,0,3,0,2,42,1,3,6,NA +67656,7,2,1,53,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,15599.953109,16217.343339,1,99,14,1,0.23,2,1,0,0,0,2,51,1,5,1,NA +67657,7,2,1,1,20,3,3,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,32956.344514,37182.588634,2,97,15,15,4.97,5,5,1,0,0,1,48,1,4,1,3 +67658,7,2,1,77,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,52501.717941,55571.910913,2,102,8,8,4.13,1,1,0,0,1,1,77,1,5,2,NA +67659,7,2,2,15,NA,5,6,1,15,188,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8272.446168,8996.728356,1,102,15,15,3.82,5,5,1,1,0,1,29,1,4,1,4 +67660,7,2,1,16,NA,5,7,2,16,194,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9136.388281,9764.423514,3,91,15,15,4.47,4,4,0,1,0,2,45,2,5,1,5 +67661,7,2,1,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,13408.721263,15005.802731,2,98,4,4,1.22,2,2,0,0,2,1,80,1,1,1,1 +67662,7,2,2,11,NA,5,6,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,5064.232234,5431.552443,2,92,10,8,2.01,7,4,1,1,1,2,27,2,3,1,3 +67663,7,2,1,3,NA,4,4,2,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9319.143734,9894.41982,2,95,8,8,1.85,5,5,1,2,0,1,55,1,2,1,3 +67664,7,2,1,11,NA,1,1,1,11,143,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11399.23838,11468.323492,2,103,77,77,NA,5,5,1,2,0,2,30,1,2,1,2 +67665,7,2,2,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,19498.713386,19633.661812,2,97,5,5,0.84,5,5,0,2,0,2,33,1,4,1,3 +67666,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,10816.310126,11253.74147,1,99,14,14,5,2,2,0,0,1,2,64,1,3,3,NA +67667,7,1,1,35,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,79582.971419,0,2,99,15,15,5,1,1,0,0,0,1,35,1,5,5,NA +67668,7,2,2,70,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,26302.518039,27109.453365,1,101,5,3,0.98,2,1,0,0,2,2,70,1,4,2,NA +67669,7,2,2,2,NA,5,6,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8818.200077,8968.551716,2,102,6,6,1.52,4,4,2,0,0,1,30,2,4,1,4 +67670,7,2,2,52,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15497.844354,15146.281728,1,99,7,7,3.21,1,1,0,0,0,2,52,1,4,1,NA +67671,7,2,2,5,NA,3,3,1,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,59321.981286,63042.638464,1,94,8,8,1.67,5,5,1,2,0,1,52,1,4,1,4 +67672,7,2,1,12,NA,3,3,2,12,145,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,27702.556686,27838.63939,1,98,7,7,1.03,7,7,0,4,0,2,20,1,3,5,NA +67673,7,2,2,13,NA,4,4,1,14,169,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14166.687432,14623.20249,1,100,15,15,3.87,6,6,1,3,0,2,39,1,4,1,4 +67674,7,2,2,0,4,4,4,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4964.196196,5466.923926,1,93,4,4,1.09,2,2,1,0,0,2,39,2,3,4,NA +67675,7,2,1,66,NA,1,1,1,NA,NA,1,2,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,11924.931067,12067.158788,1,98,8,8,1.95,4,4,1,1,1,2,59,1,3,1,1 +67676,7,1,1,6,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10651.061092,0,1,90,99,99,NA,2,2,0,1,0,2,26,1,4,5,NA +67677,7,2,1,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17342.255821,20631.425049,1,93,3,3,1.29,1,1,0,0,0,1,36,1,4,5,NA +67678,7,2,2,6,NA,3,3,1,7,85,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22308.590534,22137.463018,1,95,7,2,0.35,5,4,1,2,0,1,26,1,4,6,NA +67679,7,2,1,9,NA,1,1,1,9,110,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,14820.807433,14905.891142,2,102,7,7,1.53,5,5,0,3,0,1,43,2,2,1,4 +67680,7,2,2,10,NA,3,3,2,10,124,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,63555.637461,64464.244761,1,90,15,15,5,4,4,0,2,0,1,44,1,5,1,5 +67681,7,2,2,33,NA,4,4,2,NA,NA,2,NA,2,1,3,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,28747.860416,29850.909164,1,99,14,14,3.8,4,4,1,1,0,1,48,2,5,1,5 +67682,7,2,2,13,NA,1,1,2,13,157,NA,NA,2,2,3,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,19850.979841,20602.760063,1,97,4,4,0.72,5,5,2,1,0,2,33,2,1,6,NA +67683,7,2,2,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,31785.728924,32633.084965,1,95,7,7,2.16,3,3,0,0,1,1,45,1,3,1,4 +67684,7,2,1,8,NA,2,2,1,8,98,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14820.807433,14943.02937,2,102,5,5,0.89,4,4,0,3,0,2,44,2,2,4,NA +67685,7,2,1,9,NA,5,6,2,9,113,NA,NA,2,2,2,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9720.482616,10533.307174,2,91,14,14,3.47,4,4,1,1,0,2,36,2,3,1,5 +67686,7,2,1,12,NA,1,1,1,12,147,NA,NA,2,2,3,5,NA,NA,NA,2,1,1,1,2,2,1,2,2,1,32326.52031,34735.818434,3,92,7,7,1.41,5,5,1,2,0,1,20,2,1,1,1 +67687,7,2,2,8,NA,5,7,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,10422.423011,10751.577119,1,98,5,5,1.26,3,3,1,1,0,2,27,1,5,5,NA +67688,7,2,2,0,10,5,7,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8303.503441,8530.807571,1,94,7,7,1.23,6,6,3,1,0,1,32,1,4,1,4 +67689,7,2,2,18,NA,1,1,2,18,219,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,19557.287652,21454.715555,2,94,9,9,2.37,5,5,0,1,0,1,48,2,4,1,2 +67690,7,2,2,40,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,20480.890987,19920.377678,2,93,3,3,1.07,1,1,0,0,0,2,40,1,4,5,NA +67691,7,2,1,18,NA,3,3,1,19,228,2,NA,1,1,NA,66,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,34443.919252,35848.685683,2,101,3,3,0.3,7,7,1,2,0,2,50,1,2,4,NA +67692,7,2,2,12,NA,2,2,2,12,153,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23072.919059,23761.374659,1,97,15,15,4.52,6,6,0,4,0,2,41,1,5,1,5 +67693,7,1,2,2,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12871.484115,0,2,102,15,15,4.47,4,4,1,1,0,1,32,1,5,1,4 +67694,7,2,1,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,8890.956373,9238.69917,2,99,12,12,NA,2,1,0,0,1,2,46,1,3,2,NA +67695,7,1,2,55,NA,2,2,NA,NA,NA,2,NA,2,1,5,NA,4,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,23200.373382,0,2,93,3,3,0.63,3,3,0,1,0,1,53,2,2,1,4 +67696,7,2,2,67,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,18695.172864,19350.637044,2,102,7,7,1.68,5,5,0,0,3,1,70,2,4,1,4 +67697,7,2,2,3,NA,5,7,1,4,48,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8818.200077,9596.189182,2,102,15,15,5,3,3,1,0,0,2,34,1,5,1,5 +67698,7,2,2,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,129999.035519,129559.2554,1,98,9,9,3.97,2,2,0,0,2,1,63,1,5,1,4 +67699,7,2,1,32,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,18544.003944,20254.971305,2,100,9,9,3.24,3,3,1,0,0,1,32,1,3,1,4 +67700,7,2,1,69,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,9883.238978,10581.776717,1,90,4,4,0.78,4,4,0,0,1,1,69,2,4,1,3 +67701,7,2,1,54,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,17869.988703,18923.921245,3,90,14,14,3.69,4,4,0,2,0,2,49,1,4,1,4 +67702,7,1,1,8,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8613.834494,0,2,90,6,6,0.66,7,7,2,2,0,2,24,2,4,6,NA +67703,7,2,1,72,NA,1,1,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,21815.897449,22272.021332,3,92,6,6,1.98,2,2,0,0,2,1,72,1,4,1,1 +67704,7,2,2,10,NA,4,4,2,10,128,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10221.991799,10790.677347,1,98,15,15,3.7,5,5,0,3,0,1,37,1,2,1,2 +67705,7,1,1,78,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,12173.814732,0,2,92,3,3,0.98,1,1,0,0,1,1,78,1,1,2,NA +67706,7,2,1,7,NA,5,6,1,7,87,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9307.245755,9856.522898,1,92,14,14,3.47,4,4,0,2,0,1,37,1,5,1,5 +67707,7,2,1,46,NA,3,3,1,NA,NA,2,NA,2,1,4,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,27340.471576,32700.198254,2,91,2,2,0.44,3,3,0,1,0,1,46,2,3,1,4 +67708,7,2,1,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,22685.373982,23047.569967,1,98,6,6,0.97,7,7,1,2,0,1,49,1,2,1,2 +67709,7,2,1,60,NA,2,2,1,NA,NA,2,NA,2,2,7,NA,2,1,NA,2,2,2,2,2,2,2,2,2,2,9235.951997,10444.280286,2,103,12,12,NA,3,3,0,0,1,1,60,2,2,1,2 +67710,7,2,1,50,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,2,2,2,1,2,2,2,22238.49412,21912.1785,2,90,7,7,1.34,5,5,0,1,2,1,61,2,1,4,NA +67711,7,2,1,17,NA,4,4,2,17,214,2,NA,2,1,5,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11125.932433,11147.929312,1,96,77,77,NA,7,7,0,3,1,2,43,77,5,5,NA +67712,7,2,1,15,NA,4,4,1,15,181,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13653.432599,13680.426553,2,96,5,5,1.07,4,4,0,3,0,2,46,1,4,3,NA +67713,7,2,1,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,167711.394252,169284.299948,1,101,10,10,4.63,2,2,0,0,2,1,62,1,5,1,5 +67714,7,2,1,75,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,43188.300631,46554.36669,2,91,15,2,0.86,7,1,0,0,1,1,49,NA,NA,5,NA +67715,7,2,2,3,NA,5,7,2,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12382.393854,12702.004448,1,97,10,10,2.82,4,4,1,1,0,2,24,1,3,5,NA +67716,7,2,1,60,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,12585.09113,12786.688219,1,92,5,5,1.41,2,2,0,0,2,1,60,1,2,1,4 +67717,7,2,1,66,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,10717.375231,11218.730451,2,101,3,3,1.1,1,1,0,0,1,1,66,1,1,2,NA +67718,7,2,2,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,138322.767578,145546.941919,1,101,5,5,1.45,2,2,0,0,0,2,49,1,4,3,NA +67719,7,2,1,35,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,16214.132654,20271.082789,3,90,14,14,5,2,2,0,0,0,1,47,1,2,5,NA +67720,7,2,1,64,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,12279.19827,12375.250021,2,102,14,14,5,2,2,0,0,2,1,64,1,4,1,3 +67721,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,49191.372812,55403.452354,3,91,3,3,1.1,1,1,0,0,1,2,80,1,1,2,NA +67722,7,2,1,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,104488.914565,106745.836574,1,98,4,1,0.4,4,1,0,0,0,1,22,1,5,5,NA +67723,7,1,2,19,NA,2,2,NA,NA,NA,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,21850.358234,0,2,93,99,99,NA,3,3,0,0,0,2,54,2,3,4,NA +67724,7,2,1,1,14,4,4,1,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9438.902193,9727.203655,2,102,4,4,0.81,3,3,2,0,0,2,23,1,4,5,NA +67725,7,1,2,68,NA,2,2,NA,NA,NA,2,NA,2,1,5,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,10561.745159,0,2,90,77,77,NA,2,2,0,0,2,2,68,2,1,1,NA +67726,7,2,2,50,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,2,1,2,2,2,2,2,2,22466.936477,23282.464675,1,90,6,6,1.57,3,3,0,0,0,1,50,2,2,1,2 +67727,7,2,2,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,29733.812317,31414.972893,1,101,5,5,1.45,2,2,0,0,0,1,41,NA,NA,1,4 +67728,7,2,1,66,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,74792.980299,75494.43714,2,101,99,2,0.67,7,1,0,0,1,1,55,NA,NA,77,NA +67729,7,2,2,2,NA,2,2,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11412.410776,11898.666241,1,97,3,3,0.44,5,5,2,2,0,2,26,1,4,4,NA +67730,7,2,1,14,NA,3,3,1,14,178,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,102027.743122,101259.182802,1,101,8,8,1.85,5,5,0,3,0,1,41,1,3,1,4 +67731,7,2,1,0,11,3,3,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13392.309303,13030.480476,1,99,15,15,3.45,7,7,1,4,0,1,42,1,5,1,5 +67732,7,2,1,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,6910.118936,7233.371983,2,95,1,1,0,1,1,0,0,1,1,62,1,2,2,NA +67733,7,2,2,18,NA,2,2,1,18,217,2,NA,1,1,NA,11,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,15809.066118,16348.490248,2,93,6,6,1.39,4,4,0,0,0,1,53,2,3,1,3 +67734,7,2,2,19,NA,4,4,2,19,237,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18905.695776,2,101,2,2,0.48,2,2,1,0,0,2,19,1,3,NA,NA +67735,7,2,1,16,NA,5,6,2,16,195,NA,NA,2,2,3,9,NA,NA,NA,1,2,2,1,2,1,1,2,2,1,7350.524832,8448.96413,2,90,3,3,0.54,4,4,0,1,0,2,52,NA,1,1,2 +67736,7,2,2,5,NA,1,1,2,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,13366.393396,13935.903375,2,94,5,5,0.67,6,6,1,3,0,1,37,2,3,1,4 +67737,7,2,2,25,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,10800.351372,11406.289115,2,92,15,15,5,3,1,0,0,0,2,25,1,5,5,NA +67738,7,2,1,6,NA,2,2,1,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10738.959181,10632.988891,2,93,9,9,3.14,3,3,0,2,0,2,34,2,5,3,NA +67739,7,2,2,17,NA,5,6,2,17,214,2,NA,2,1,5,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11975.458482,12226.374363,1,97,15,15,5,3,3,0,1,2,2,63,1,5,1,NA +67740,7,2,1,52,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,3,4,NA,2,2,2,1,2,2,NA,NA,NA,NA,21084.369131,21009.599098,2,95,4,4,1.12,2,2,0,1,0,1,52,2,3,4,NA +67741,7,2,2,28,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,1,1,2,2,1,2,2,NA,NA,NA,NA,105039.256649,105978.379214,1,90,8,8,1.67,5,5,2,1,0,2,28,1,4,1,5 +67742,7,2,1,5,NA,1,1,1,5,68,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,17865.135763,18430.646082,3,92,3,3,0.52,5,5,2,1,0,2,29,2,1,1,3 +67743,7,2,1,51,NA,4,4,2,NA,NA,2,NA,2,2,3,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,16117.991297,16911.733026,1,96,3,3,0.43,4,4,1,1,0,2,39,2,4,1,3 +67744,7,2,1,11,NA,1,1,1,11,143,NA,NA,2,2,3,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,NA,13822.148996,14860.201344,3,92,4,4,0.55,6,6,0,4,0,1,36,2,1,1,3 +67745,7,2,2,72,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,29534.722322,30544.805853,3,91,2,2,0.83,1,1,0,0,1,2,72,1,4,3,NA +67746,7,2,1,10,NA,3,3,2,10,128,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,39772.343364,41554.529575,2,100,15,15,4.5,6,6,0,4,0,1,45,1,5,1,5 +67747,7,2,2,2,NA,1,1,1,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8141.317022,8654.572344,1,103,14,14,2.96,5,5,1,2,0,1,34,1,4,1,5 +67748,7,2,2,4,NA,3,3,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,26966.264969,27815.81123,1,98,6,6,1.31,3,3,2,0,0,2,22,1,3,5,NA +67749,7,2,1,1,19,5,6,1,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6389.003009,6767.308805,3,91,14,14,3.06,5,5,3,0,0,1,34,2,5,1,5 +67750,7,2,2,2,NA,4,4,2,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7016.981922,7550.350821,2,99,3,3,0.42,6,6,1,2,0,2,43,1,4,6,NA +67751,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,43813.24867,47373.102231,1,98,7,7,1.03,7,7,0,4,0,2,20,1,3,5,NA +67752,7,2,1,14,NA,5,7,2,14,172,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11506.937395,12095.740214,1,97,15,15,5,6,6,0,3,0,1,47,1,5,1,5 +67753,7,1,2,6,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,2,1,1,1,2,1,NA,NA,NA,NA,12789.411811,0,1,102,99,99,NA,5,5,0,2,1,1,52,2,1,1,1 +67754,7,2,2,36,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,14010.52688,14536.170828,2,103,15,15,5,3,3,1,0,0,1,35,1,9,1,5 +67755,7,2,1,11,NA,3,3,1,11,138,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,74331.764009,78594.005469,2,91,9,9,2.6,4,4,0,2,0,1,53,1,2,1,5 +67756,7,2,1,28,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,17583.693727,17958.024135,2,95,15,7,3.13,5,1,0,0,0,1,47,1,5,1,3 +67757,7,2,1,0,4,1,1,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6910.953528,6911.08091,2,96,4,4,0.97,3,3,1,0,0,2,37,2,2,1,1 +67758,7,2,2,7,NA,4,4,2,7,91,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5758.0032,6529.524105,2,90,6,6,0.84,6,6,1,3,1,2,43,1,2,5,NA +67759,7,2,1,4,NA,1,1,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17341.8035,17546.820808,2,102,7,7,1.53,5,5,1,2,0,1,36,1,2,1,3 +67760,7,2,1,17,NA,5,6,2,18,216,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,7057.977053,7861.480262,2,92,12,12,NA,7,7,2,4,0,1,54,2,2,1,5 +67761,7,2,1,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,27271.751091,29115.882263,2,101,6,6,1.67,3,3,0,0,0,2,22,1,4,5,NA +67762,7,2,1,53,NA,4,4,2,NA,NA,2,NA,2,1,4,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,21016.438271,22051.40744,1,93,12,12,NA,3,3,0,1,0,2,49,2,3,1,3 +67763,7,2,2,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,1,1,2,2,1,2,2,1,2,2,1,97803.500399,99700.910854,1,95,14,14,3.8,4,4,0,2,0,2,37,1,5,1,5 +67764,7,2,1,76,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,1,2,NA,2,2,2,2,2,2,1,2,2,NA,12962.876803,16930.790311,2,90,6,5,1.84,2,1,0,0,2,1,76,2,1,2,NA +67765,7,2,1,2,NA,2,2,2,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10803.555682,10931.27688,2,94,6,6,1.43,5,4,2,1,0,2,23,2,3,6,NA +67766,7,2,2,62,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,37934.469637,38685.959862,1,94,4,4,1.26,2,2,0,0,1,2,41,1,4,5,NA +67767,7,2,2,56,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,29167.119125,34820.124106,1,101,1,1,0.08,6,6,0,1,0,1,51,1,2,5,NA +67768,7,2,2,14,NA,3,3,2,14,174,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,103298.858809,112630.333619,1,101,8,8,2.24,4,4,0,1,0,1,45,1,4,1,NA +67769,7,2,2,54,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,1,NA,1,2,1,1,2,2,2,2,2,2,18295.488967,18390.898385,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +67770,7,2,2,0,10,4,4,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4418.651245,4685.491283,2,95,6,6,0.97,6,6,2,1,0,1,54,1,3,6,NA +67771,7,2,1,13,NA,4,4,2,13,158,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17606.165994,17558.40257,2,101,6,6,1.51,3,3,0,1,1,1,65,1,2,1,4 +67772,7,2,2,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,53541.401974,54113.194007,1,99,9,9,3.74,2,2,0,0,2,2,73,1,3,1,4 +67773,7,2,2,60,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17243.546687,17909.086027,1,92,8,8,3.44,2,2,0,0,2,1,71,1,3,1,5 +67774,7,2,1,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,16905.961576,17390.157008,2,94,8,8,1.6,6,6,3,1,0,2,32,1,4,1,4 +67775,7,2,2,29,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,93589.96496,99393.853006,1,93,15,15,5,3,3,1,0,0,1,33,1,5,1,3 +67776,7,2,1,54,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,1,NA,1,2,1,1,2,2,2,2,2,2,21139.303536,20829.116843,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +67777,7,1,1,2,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5547.430651,0,1,96,9,9,2.18,5,5,1,1,0,1,26,1,4,1,4 +67778,7,2,1,31,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,17802.31438,18749.539496,1,102,3,3,0.82,2,2,0,0,0,1,31,1,4,1,4 +67779,7,2,2,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,207644.101324,209788.639452,1,97,10,10,4.3,2,2,0,0,1,2,56,1,5,1,5 +67780,7,2,2,7,NA,1,1,1,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15962.145468,16061.826248,2,98,4,4,0.75,4,4,0,2,0,2,33,1,2,5,NA +67781,7,2,2,41,NA,2,2,1,NA,NA,2,NA,2,2,1,NA,5,1,2,2,2,2,2,2,2,2,2,1,2,32606.880052,32776.922157,2,93,9,9,1.49,7,7,0,3,0,2,41,2,5,1,5 +67782,7,2,1,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19440.793325,19514.660132,2,95,10,10,2.32,6,6,1,2,0,1,44,1,4,1,4 +67783,7,2,2,13,NA,5,6,2,13,162,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11975.458482,12226.374363,1,97,15,15,2.33,7,7,2,4,0,2,40,2,5,1,4 +67784,7,2,2,18,NA,1,1,1,18,216,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,15690.47168,16249.379042,1,102,5,5,0.86,5,5,2,0,0,2,21,2,2,5,NA +67785,7,2,2,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,27156.758177,27240.256208,1,95,4,4,1.21,2,2,0,0,0,1,46,1,2,1,2 +67786,7,2,1,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,152858.509804,153199.936938,1,95,3,3,1.33,1,1,0,0,0,1,45,1,2,5,NA +67787,7,2,1,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,24881.621161,24802.260383,2,94,15,15,5,1,1,0,0,0,1,44,1,5,3,NA +67788,7,2,1,62,NA,1,1,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,2,2,2,2,2,2,1,2,2,2,9871.376767,9834.888389,2,97,4,4,1.29,2,2,0,0,2,2,70,1,3,1,4 +67789,7,2,2,20,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,1,1,2,2,1,2,2,1,2,2,1,18723.98095,18882.01405,2,101,2,2,0.22,4,4,1,1,0,2,41,1,2,4,NA +67790,7,2,1,14,NA,4,4,2,14,172,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10065.83895,10440.165744,2,99,14,14,4.09,3,3,0,2,0,2,37,1,5,5,NA +67791,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,32489.451884,36954.118364,2,91,3,3,1.16,1,1,0,0,1,2,80,1,4,2,NA +67792,7,1,1,10,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,3,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,8943.919305,0,1,92,5,5,0.63,7,7,0,4,1,1,60,NA,NA,1,NA +67793,7,2,1,39,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,3,1,NA,2,2,2,2,2,2,1,2,2,2,37658.482129,41412.915504,1,100,7,7,1.74,4,4,0,2,0,2,39,2,1,1,3 +67794,7,2,1,51,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,25139.75955,26335.178282,2,99,77,77,NA,3,3,0,0,0,2,57,2,2,1,1 +67795,7,2,1,41,NA,5,6,2,NA,NA,2,NA,2,2,99,NA,3,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,11113.602843,11073.454175,3,90,77,77,NA,7,7,1,2,0,1,41,2,3,6,NA +67796,7,2,2,1,20,4,4,2,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7108.64371,7648.979929,1,90,14,14,2.96,5,5,1,2,0,1,31,1,5,1,4 +67797,7,2,1,45,NA,2,2,2,NA,NA,2,NA,2,1,77,NA,3,3,NA,2,2,2,2,2,2,2,2,2,2,28428.303115,29087.004066,3,90,77,77,NA,4,3,0,0,0,1,45,2,3,3,NA +67798,7,2,1,51,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,24923.937021,24558.216666,2,93,7,7,2.58,2,2,0,0,0,1,51,1,4,1,4 +67799,7,2,2,38,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,27945.726298,28031.650144,1,94,6,6,1.26,5,5,0,2,0,2,38,1,4,1,NA +67800,7,2,1,0,8,2,2,1,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5863.789973,5970.184293,2,93,8,8,2.17,4,4,2,0,0,2,30,1,4,1,4 +67801,7,2,2,11,NA,3,3,1,11,142,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17482.620882,17502.587262,1,94,7,7,1.29,6,6,1,3,0,1,38,1,3,1,2 +67802,7,2,1,4,NA,5,7,1,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8890.779467,9258.147648,3,91,10,10,2.48,5,5,2,1,0,2,27,1,2,1,4 +67803,7,2,1,6,NA,5,7,1,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18834.739821,19876.018883,1,94,2,2,0.3,5,5,1,2,0,1,23,1,1,6,NA +67804,7,2,2,4,NA,5,6,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4437.828549,4650.082829,1,102,5,5,0.92,5,5,1,2,0,2,44,2,1,1,2 +67805,7,2,1,6,NA,4,4,2,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7687.032802,8119.723568,2,99,4,4,0.78,4,4,0,2,0,2,45,1,3,5,NA +67806,7,2,2,16,NA,2,2,2,16,198,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,20937.26435,23702.551829,1,90,5,5,1,4,4,0,2,0,1,40,2,2,1,1 +67807,7,2,2,3,NA,3,3,2,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,76623.952798,84569.150751,2,91,15,15,5,3,3,1,0,0,1,37,1,5,1,5 +67808,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,1,2,NA,40827.983435,45600.111014,1,98,8,8,2.62,3,3,0,0,3,1,68,1,4,1,4 +67809,7,2,1,13,NA,1,1,1,13,164,NA,NA,2,2,3,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,32326.52031,34735.818434,2,94,5,5,0.65,6,6,0,3,0,1,44,2,1,1,1 +67810,7,2,1,0,6,2,2,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5420.429995,5420.529903,3,90,6,6,1.3,4,4,2,0,0,1,20,2,5,6,NA +67811,7,2,1,2,NA,4,4,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6376.965739,6454.74783,2,100,15,15,5,3,3,1,0,0,2,35,1,5,1,2 +67812,7,2,1,66,NA,2,2,2,NA,NA,2,NA,2,2,2,NA,1,5,NA,2,2,2,1,2,2,2,2,2,2,12118.033999,12538.522644,2,91,99,99,NA,3,3,0,0,1,2,31,2,5,5,NA +67813,7,2,1,67,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,8561.661692,8628.633702,2,99,15,14,5,2,1,0,0,2,2,65,NA,NA,6,NA +67814,7,2,2,19,NA,4,4,1,19,232,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,1,1,0.06,1,1,0,0,0,2,19,1,4,NA,NA +67815,7,2,2,0,4,3,3,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20786.668002,20803.881706,1,101,8,8,1.85,5,5,3,0,0,2,31,1,2,1,2 +67816,7,2,1,14,NA,4,4,2,14,170,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13969.457688,14306.402061,1,90,3,3,0.63,3,3,1,1,0,2,32,1,4,5,NA +67817,7,2,1,55,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,35958.875702,36129.285962,1,98,8,1,0,4,1,0,0,0,1,53,1,2,1,3 +67818,7,2,2,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,16007.367503,15264.693262,2,90,15,15,5,4,4,0,0,0,1,57,2,5,1,5 +67819,7,2,2,79,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,44856.466004,46390.551109,2,94,3,3,1.16,1,1,0,0,1,2,79,1,4,2,NA +67820,7,2,2,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,4,NA,1,2,2,1,2,2,1,2,2,1,19130.246369,19040.031855,2,95,7,7,1.74,4,4,0,2,0,2,47,1,5,4,NA +67821,7,2,1,7,NA,4,4,1,8,96,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12399.014378,12375.702699,2,96,2,2,0.31,4,4,0,2,0,2,30,NA,NA,6,NA +67822,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,38369.898602,44511.264162,2,95,3,3,0.95,2,2,0,0,2,2,80,1,1,1,2 +67823,7,2,2,15,NA,3,3,1,15,189,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,92934.523767,97199.343719,1,92,8,8,2.17,4,4,0,1,2,2,80,1,3,2,NA +67824,7,2,1,40,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,15851.563418,15794.298537,2,95,15,15,5,1,1,0,0,0,1,40,2,5,5,NA +67825,7,2,2,11,NA,4,4,1,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11116.391625,11531.852198,1,92,77,77,NA,5,5,1,2,0,2,41,1,3,5,NA +67826,7,2,1,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,122833.610417,128308.021903,1,98,15,15,4.34,4,4,0,2,0,1,51,1,5,1,5 +67827,7,2,1,53,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,27609.8026,27737.039026,2,101,8,8,2.7,3,3,0,1,0,1,53,1,4,1,2 +67828,7,2,1,67,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,10717.375231,11136.552864,2,101,4,4,1.22,2,2,0,0,2,1,67,1,1,1,2 +67829,7,2,1,19,NA,3,3,1,19,229,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,73465.215193,72911.812289,1,100,15,15,4.63,5,5,0,0,0,1,51,1,5,1,3 +67830,7,2,1,0,5,2,2,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,5733.655482,6014.350383,1,90,10,10,2.44,5,5,1,0,0,2,56,2,1,1,1 +67831,7,2,2,0,1,3,3,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16608.743852,16157.613485,1,94,15,15,5,4,4,2,0,0,1,37,1,5,1,4 +67832,7,2,2,71,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,17836.372654,18457.016952,2,96,5,5,0.67,6,6,1,2,1,1,34,1,4,1,4 +67833,7,2,1,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,12526.25131,13442.161918,2,95,5,5,1.32,2,2,0,0,2,2,78,1,2,1,2 +67834,7,2,2,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,92613.517882,94092.287712,3,91,8,6,2.75,3,1,0,0,0,1,23,1,4,1,4 +67835,7,2,2,9,NA,4,4,2,9,114,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7405.320425,7603.433563,2,99,6,6,1.13,4,4,1,1,0,1,33,1,3,6,NA +67836,7,2,1,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,108410.783716,112153.23206,1,91,8,8,2.17,4,4,0,0,0,1,59,1,4,1,5 +67837,7,2,1,8,NA,3,3,2,8,99,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,49922.147265,52289.098209,1,98,9,9,2.6,4,4,1,1,0,2,35,1,2,1,NA +67838,7,2,1,52,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,11650.457723,11613.298191,2,99,NA,77,NA,7,7,1,0,1,2,51,1,2,1,3 +67839,7,2,1,61,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,9313.036488,9824.696739,1,103,15,15,5,2,2,0,0,1,2,56,2,5,1,5 +67840,7,2,1,28,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,115954.75973,120569.27907,1,92,5,5,1.97,1,1,0,0,0,1,28,1,4,3,NA +67841,7,2,1,58,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11498.794569,11546.882913,1,96,15,15,5,5,5,0,0,0,1,58,2,5,1,5 +67842,7,2,2,63,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,16420.864787,17686.616768,2,102,1,1,0.16,3,3,1,0,1,2,63,1,2,4,NA +67843,7,2,1,3,NA,3,3,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24655.003412,27816.70307,1,98,2,2,0.36,5,5,3,0,0,1,25,1,3,1,3 +67844,7,2,1,23,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,111284.977736,114467.051739,3,91,8,5,1.5,3,2,0,0,0,1,23,1,4,1,4 +67845,7,2,1,49,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,18116.816149,22904.074887,2,90,8,7,3.21,2,1,0,0,0,1,51,1,2,5,NA +67846,7,2,1,15,NA,5,7,2,15,183,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,60818.973858,62775.825513,1,99,15,15,5,4,4,0,2,0,2,44,1,5,1,5 +67847,7,2,2,43,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,NA,NA,NA,NA,16186.470589,15743.485359,2,99,3,3,0.42,6,6,1,2,0,2,43,1,4,6,NA +67848,7,2,1,66,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,12585.09113,13280.957381,2,98,4,4,1.34,2,2,0,0,2,1,66,1,1,1,1 +67849,7,2,2,0,11,5,7,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5882.73073,5843.204067,3,90,15,15,5,3,3,1,0,0,2,32,2,5,1,5 +67850,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,10192.188896,10440.656902,1,99,10,10,5,1,1,0,0,1,2,64,1,5,3,NA +67851,7,2,2,5,NA,3,3,2,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,51483.624552,53105.566664,1,94,15,15,5,4,4,2,0,0,1,51,2,5,1,5 +67852,7,2,1,15,NA,5,6,1,15,183,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12412.721558,13262.259083,1,92,10,10,2.82,4,4,0,2,0,2,48,2,5,1,5 +67853,7,2,1,37,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,19923.530941,23644.216851,2,95,6,6,0.97,6,6,2,2,0,1,37,1,3,1,4 +67854,7,2,1,80,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,3,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,12101.489198,13370.144062,1,93,3,3,0.65,3,3,0,0,3,2,74,2,1,2,NA +67855,7,2,1,58,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11498.794569,11546.882913,2,100,15,15,5,3,3,0,1,0,1,58,2,5,1,5 +67856,7,2,2,0,8,4,4,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4523.857227,4573.252417,2,95,6,6,1.57,3,3,1,0,0,1,29,1,3,1,4 +67857,7,2,2,25,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,16021.911789,17471.931528,1,94,7,7,1.79,4,4,0,1,0,1,59,2,4,1,4 +67858,7,2,2,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,NA,10049.7347,10691.470743,2,90,5,5,1.98,1,1,0,0,1,2,68,1,4,5,NA +67859,7,2,1,18,NA,4,4,1,18,219,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12792.875152,12871.476461,2,93,99,99,NA,7,6,1,0,0,1,19,1,3,NA,NA +67860,7,2,2,14,NA,3,3,2,14,174,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23708.623398,24267.27809,2,97,5,5,1.08,3,3,0,1,0,2,45,1,4,6,NA +67861,7,2,1,60,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,85786.890667,86591.455494,2,96,9,9,5,1,1,0,0,1,1,60,1,5,5,NA +67862,7,2,1,1,20,3,3,2,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23524.870375,27564.905667,1,90,7,7,2.1,3,3,1,0,0,2,40,2,5,1,4 +67863,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,196995.351093,202246.928022,1,91,15,15,5,3,3,0,0,0,2,50,1,4,1,4 +67864,7,2,2,22,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,15735.580263,16455.708445,1,97,15,15,5,4,4,0,0,0,1,51,2,5,1,5 +67865,7,2,1,59,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17121.370948,17378.349074,1,92,14,14,3.9,4,4,0,0,0,2,55,2,5,1,5 +67866,7,2,2,60,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,13101.965339,13686.954077,1,100,6,6,2.24,1,1,0,0,1,2,60,1,5,3,NA +67867,7,2,2,11,NA,2,2,1,11,138,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,13720.541363,14610.824098,2,100,4,4,0.81,4,4,0,2,0,2,37,1,2,1,2 +67868,7,2,2,13,NA,1,1,1,14,168,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23979.296993,25562.604106,1,101,3,3,0.41,5,5,0,2,1,2,36,2,4,4,NA +67869,7,2,2,4,NA,2,2,2,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,1,2,2,1,NA,NA,NA,NA,12675.140707,13078.439708,1,96,5,5,0.94,4,4,2,0,0,1,32,2,3,1,4 +67870,7,2,2,20,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,1,1,2,2,1,10345.80475,11593.940235,3,90,15,15,3.23,6,6,0,2,0,1,50,2,2,1,2 +67871,7,2,2,6,NA,2,2,2,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15148.721588,15796.67129,2,91,10,10,3.04,4,4,1,1,0,2,31,2,5,1,5 +67872,7,2,2,78,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,1,1,NA,2,2,2,1,2,2,2,2,2,NA,20441.922878,23572.887355,1,90,4,4,0.94,3,3,0,0,2,2,78,2,1,1,2 +67873,7,2,2,50,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,2,2,2,2,1,2,2,2,20388.95294,20876.134898,2,96,6,6,0.77,7,7,2,1,0,1,53,2,1,1,1 +67874,7,2,1,69,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,138075.879417,141933.339512,2,94,10,10,4.3,2,2,0,0,2,1,69,1,4,1,5 +67875,7,2,2,9,NA,4,4,1,9,110,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11195.065587,11548.620784,2,96,15,9,3.74,3,2,0,1,0,2,38,1,5,6,NA +67876,7,2,2,5,NA,4,4,2,5,63,NA,NA,2,1,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11353.600279,12216.600549,1,98,15,15,5,6,6,3,0,0,1,37,2,5,1,4 +67877,7,2,2,79,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,33602.625,34751.830256,2,103,15,15,3.44,7,7,0,1,2,2,79,1,3,2,NA +67878,7,2,1,9,NA,4,4,1,9,116,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8714.559478,8887.823591,2,96,7,7,1.04,7,7,0,4,0,2,37,1,3,3,NA +67879,7,2,2,45,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,139800.409559,142430.529735,1,100,15,15,5,4,4,0,1,0,1,50,1,4,1,4 +67880,7,2,1,13,NA,5,6,1,13,160,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9737.713978,10134.40846,2,100,14,14,4.03,4,4,0,2,0,1,48,2,5,1,5 +67881,7,2,2,17,NA,3,3,1,17,205,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,31282.308091,32019.424199,2,98,3,3,0.38,5,5,0,4,0,2,39,1,4,5,NA +67882,7,2,1,4,NA,3,3,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,86817.367332,101726.917198,2,91,15,15,5,4,4,2,0,0,1,35,1,5,1,5 +67883,7,2,1,5,NA,5,7,2,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12996.965152,13393.943951,1,101,2,2,0.33,4,4,2,1,0,2,26,1,4,5,NA +67884,7,2,2,10,NA,2,2,1,10,120,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,16986.005478,17464.743129,2,102,5,5,0.89,4,4,0,3,0,2,44,2,2,4,NA +67885,7,2,2,80,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,3,2,NA,1,2,1,1,2,1,1,2,1,NA,12344.929687,13171.097693,1,93,1,1,0.28,1,1,0,0,1,2,80,2,3,2,NA +67886,7,2,2,3,NA,1,1,1,4,48,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12714.663639,13516.237728,1,102,5,5,0.86,5,5,2,0,0,2,21,2,2,5,NA +67887,7,2,2,67,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,17243.546687,17909.086027,1,92,14,14,5,2,2,0,0,2,1,62,1,4,1,4 +67888,7,2,1,8,NA,3,3,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,27154.222487,28089.451048,2,101,4,4,0.73,5,5,1,2,0,1,40,1,5,1,5 +67889,7,2,2,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,30863.871606,31440.435679,1,101,4,4,1.43,1,1,0,0,0,2,42,1,5,5,NA +67890,7,2,2,7,NA,1,1,2,7,88,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14300.71869,14814.284705,2,94,13,13,NA,5,5,0,3,0,1,32,2,2,1,1 +67891,7,2,2,29,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,17204.739153,20747.6165,1,93,9,9,5,1,1,0,0,0,2,29,1,5,5,NA +67892,7,2,2,10,NA,3,3,1,10,123,NA,NA,2,2,3,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,13450.606713,14078.286101,3,91,4,4,0.65,5,5,2,2,0,2,27,2,2,3,NA +67893,7,2,1,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,83819.702285,99717.125794,1,99,14,14,5,1,1,0,0,0,1,34,1,5,5,NA +67894,7,2,1,80,NA,5,7,1,NA,NA,1,1,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,38080.409655,40864.822189,2,95,6,6,1.95,2,2,0,0,2,1,80,1,1,1,3 +67895,7,2,1,8,NA,4,4,1,8,104,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9261.557132,9782.87535,2,100,7,7,1.79,4,4,0,1,0,2,51,1,3,3,NA +67896,7,2,1,17,NA,4,4,2,17,205,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,15381.581315,15476.088016,1,93,12,12,NA,5,4,0,2,0,1,32,1,2,5,NA +67897,7,2,2,71,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,60257.408263,62318.203523,1,103,15,15,5,2,2,0,0,2,1,74,1,5,1,5 +67898,7,2,2,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,11696.973403,12170.020379,2,97,6,6,1,6,6,1,2,2,2,60,1,2,2,NA +67899,7,2,1,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,NA,7663.797586,8115.960731,1,96,3,3,1.25,1,1,0,0,1,1,80,1,2,4,NA +67900,7,1,1,24,NA,5,6,NA,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,13859.220514,0,1,98,12,2,0.54,3,1,0,0,0,1,24,1,4,5,NA +67901,7,2,2,73,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,12222.312509,13136.499838,2,95,12,12,NA,3,3,0,0,3,2,73,1,2,1,1 +67902,7,2,1,21,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,3,4,NA,2,2,2,2,2,2,2,2,2,2,39915.513053,44831.646235,2,98,5,5,1.07,4,4,0,1,0,1,53,2,1,1,1 +67903,7,2,2,3,NA,4,4,2,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10771.85499,11049.894844,2,97,1,1,0.2,2,2,1,0,0,2,26,1,4,5,NA +67904,7,2,1,1,21,2,2,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12373.634774,12852.001067,1,92,10,10,2.63,5,5,2,1,0,2,26,1,4,1,4 +67905,7,2,2,10,NA,5,7,1,10,124,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5836.229473,6119.1334,2,103,14,14,3.48,5,5,0,2,1,1,43,1,4,1,5 +67906,7,2,2,8,NA,3,3,2,8,105,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24070.467912,25563.654853,1,95,15,15,3.62,7,7,2,4,0,1,59,1,5,1,2 +67907,7,2,2,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,74517.751389,89988.433696,2,94,12,12,NA,5,5,1,1,0,1,37,1,4,1,3 +67908,7,2,1,28,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,23235.97926,24675.846787,2,95,6,6,1.36,3,3,0,0,2,2,60,1,5,1,4 +67909,7,2,1,0,6,1,1,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,6218.697738,6608.80804,2,96,6,6,0.77,7,7,2,1,0,1,53,2,1,1,1 +67910,7,2,1,80,NA,2,2,1,NA,NA,1,1,99,NA,NA,NA,9,1,NA,1,1,2,1,1,2,1,1,2,NA,13906.496347,14197.251523,2,103,99,99,NA,6,1,0,0,3,1,80,NA,NA,2,NA +67911,7,2,1,15,NA,3,3,1,15,186,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,81936.967747,80860.127913,1,94,14,14,2.96,5,5,0,3,0,2,39,1,4,1,3 +67912,7,2,1,65,NA,2,2,1,NA,NA,1,2,2,1,7,NA,5,1,NA,2,2,2,1,2,2,1,2,2,1,14067.170863,14292.508976,2,102,15,15,5,3,3,0,0,2,1,36,2,5,5,NA +67913,7,2,1,11,NA,1,1,1,11,133,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13665.416457,13541.311863,1,94,2,2,0.27,5,5,0,4,0,2,47,2,1,4,NA +67914,7,2,1,8,NA,3,3,1,9,108,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23188.935049,25672.571973,3,91,7,7,1.1,7,7,0,4,0,1,40,1,4,1,3 +67915,7,2,1,24,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,15976.466658,17286.192057,2,96,8,5,2.2,2,1,0,0,0,2,25,2,5,5,NA +67916,7,2,2,39,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,4,1,2,2,2,2,2,2,2,NA,NA,NA,NA,38184.257672,39931.153329,2,91,8,8,1.85,5,5,0,2,1,1,39,2,3,1,4 +67917,7,2,1,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,22634.531479,23281.741513,1,103,9,6,2.6,3,1,0,0,0,1,27,1,5,5,NA +67918,7,2,2,19,NA,4,4,2,19,232,2,NA,2,2,3,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10694.834447,10900.381844,1,90,4,4,0.58,6,6,0,3,0,2,21,2,5,5,NA +67919,7,2,2,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,186864.831109,211461.930376,1,97,3,3,1.3,1,1,0,0,0,2,59,1,4,3,NA +67920,7,2,2,1,19,2,2,2,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13464.808163,13893.232889,1,101,15,15,4.99,4,4,2,0,0,1,31,1,4,1,4 +67921,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,51644.110977,57877.507418,1,102,14,14,5,2,2,0,0,1,2,59,1,5,3,NA +67922,7,2,2,3,NA,5,6,1,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7483.230909,7475.326886,2,96,15,15,5,3,3,1,0,0,1,34,2,5,1,5 +67923,7,2,2,11,NA,4,4,2,11,134,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8115.309776,8917.717263,2,95,6,6,0.97,6,6,2,2,0,1,37,1,3,1,4 +67924,7,2,2,45,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19150.604366,19611.675453,3,91,15,15,4.47,4,4,0,1,0,2,45,2,5,1,5 +67925,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,12449.932013,12144.773422,3,90,7,7,2.23,3,3,0,0,0,2,51,1,4,3,NA +67926,7,2,2,40,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,15819.48188,17091.862943,1,90,15,15,5,3,3,1,0,0,1,42,1,5,1,5 +67927,7,2,1,32,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,15137.053737,17738.779486,2,99,15,15,5,1,1,0,0,0,1,32,1,5,5,NA +67928,7,2,2,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,64581.191728,64779.757488,2,94,15,15,5,2,2,0,0,0,1,36,1,2,1,4 +67929,7,1,2,20,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,4,5,3,1,2,2,1,2,2,NA,NA,NA,NA,18723.98095,0,2,95,9,9,1.81,6,6,1,1,0,2,56,1,4,3,NA +67930,7,2,1,59,NA,1,1,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,27595.50738,28151.492876,1,98,10,10,3.51,3,3,0,0,0,1,59,2,5,1,4 +67931,7,2,1,0,10,1,1,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5991.543696,6284.863687,1,102,5,5,0.86,5,5,2,0,0,2,21,2,2,5,NA +67932,7,2,1,9,NA,3,3,1,9,110,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,48147.167375,49984.220445,3,92,14,14,2.74,6,6,2,2,0,1,35,1,5,1,4 +67933,7,2,1,12,NA,3,3,1,12,146,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,124064.997098,122531.713298,1,95,7,7,2.54,2,2,0,1,0,2,37,1,1,5,NA +67934,7,2,2,16,NA,3,3,1,16,194,NA,NA,1,1,NA,8,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,113907.203714,124196.980527,1,94,7,7,1.52,4,4,0,2,2,1,61,2,1,1,5 +67935,7,2,1,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,74265.831326,76685.765974,3,92,14,14,2.74,6,6,2,2,0,1,35,1,5,1,4 +67936,7,2,1,13,NA,5,6,2,14,168,NA,NA,1,1,NA,7,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,10346.302718,11357.395963,2,91,12,12,NA,7,6,0,4,2,2,72,2,1,2,NA +67937,7,1,2,74,NA,1,1,NA,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,15730.58404,0,2,98,77,77,NA,4,4,0,0,2,1,71,NA,NA,1,1 +67938,7,2,2,27,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,16026.268721,19326.470131,1,93,14,14,5,1,1,0,0,0,2,27,1,5,5,NA +67939,7,2,1,72,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,49653.587757,54188.961403,2,98,15,15,5,2,2,0,0,2,2,66,1,3,1,1 +67940,7,2,1,66,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,22184.040999,22468.589781,2,94,8,8,3.06,2,2,0,0,2,1,66,1,4,1,4 +67941,7,2,2,26,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,4,2,1,2,2,1,2,2,1,2,2,1,35313.648114,35629.376203,2,95,6,6,0.9,6,6,1,1,0,1,49,1,1,1,1 +67942,7,2,1,1,12,2,2,2,NA,13,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8331.647763,8595.381151,2,90,99,99,NA,5,5,1,1,0,2,40,2,3,1,1 +67943,7,2,1,5,NA,5,7,2,5,71,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6176.631821,6670.596558,1,93,15,15,5,5,5,1,2,0,2,40,1,5,1,5 +67944,7,2,2,10,NA,4,4,2,10,131,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11467.793741,13004.375485,2,91,6,6,0.78,7,7,1,4,0,2,38,2,2,77,NA +67945,7,2,1,6,NA,3,3,1,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,38712.032122,41122.784973,3,91,15,15,5,6,6,1,3,0,2,40,1,5,1,5 +67946,7,2,1,3,NA,1,1,1,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,18754.85406,18780.196288,2,102,5,5,0.89,4,4,2,0,0,1,33,2,9,1,2 +67947,7,2,1,79,NA,4,4,1,NA,NA,2,NA,2,2,4,NA,3,1,NA,1,2,2,1,2,1,1,2,2,NA,8074.739647,8488.510868,2,93,13,13,NA,3,3,0,0,2,2,63,2,2,1,3 +67948,7,2,1,27,NA,2,2,1,NA,NA,2,NA,2,1,4,NA,4,5,NA,2,2,2,NA,NA,NA,1,2,2,1,44549.73661,46547.524849,2,93,4,4,0.69,4,4,0,1,1,2,66,2,3,2,NA +67949,7,2,2,10,NA,2,2,2,10,129,NA,NA,2,2,1,5,NA,NA,NA,2,1,2,1,2,2,1,2,2,2,10762.400563,12070.126078,2,90,3,3,0.46,5,5,1,3,0,2,35,2,1,4,NA +67950,7,2,2,4,NA,4,4,1,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11914.048896,13313.886049,1,100,14,14,3.6,4,4,1,1,0,1,41,1,4,1,5 +67951,7,2,2,59,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,15521.115746,18174.62037,1,96,12,12,NA,7,7,1,0,1,2,59,1,3,1,1 +67952,7,2,2,65,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,1,5,NA,1,2,2,1,2,2,1,2,2,1,9716.805546,12994.252166,2,90,2,2,0.55,1,1,0,0,1,2,65,2,1,5,NA +67953,7,2,1,0,9,4,4,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5081.270665,5448.486113,2,99,77,77,NA,6,6,1,1,0,2,42,1,3,1,3 +67954,7,2,1,15,NA,2,2,2,15,181,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21399.234084,21768.65302,1,91,15,15,5,4,4,0,2,0,1,49,1,5,1,5 +67955,7,2,2,12,NA,3,3,2,12,147,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71303.309206,77744.474592,2,94,15,15,5,5,5,0,3,0,1,44,1,5,1,4 +67956,7,2,2,3,NA,5,6,2,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4369.011217,4638.097632,2,100,4,4,0.44,7,7,1,2,2,1,71,2,1,1,1 +67957,7,2,1,50,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,2,3,NA,1,2,2,NA,NA,NA,1,2,2,1,27609.8026,27521.740194,2,101,99,99,NA,3,3,0,1,1,2,78,1,1,2,NA +67958,7,2,2,43,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,3,1,3,1,2,2,1,2,2,NA,NA,NA,NA,9131.449129,11051.567129,1,103,77,77,NA,6,6,0,2,2,1,70,NA,NA,1,1 +67959,7,2,1,13,NA,4,4,1,13,162,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16147.713323,16532.569027,1,92,5,5,0.95,4,4,0,2,0,2,33,1,4,5,NA +67960,7,2,2,40,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,5,1,2,2,2,2,2,2,2,2,2,2,2,31334.47528,32058.983464,2,96,7,7,1.57,4,4,0,2,0,1,40,2,2,1,5 +67961,7,2,1,6,NA,2,2,2,6,80,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15882.795076,15889.831109,1,97,3,3,0.44,5,5,2,2,0,2,26,1,4,4,NA +67962,7,2,2,40,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,2,2,1,2,2,1,2,2,1,2,2,1,16339.124275,15891.961277,2,95,15,15,3.85,7,7,0,3,1,2,62,1,4,2,NA +67963,7,2,2,11,NA,4,4,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8146.767632,8771.942314,2,100,6,6,0.99,5,5,0,3,0,2,40,1,3,1,3 +67964,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,2,1,2,2,1,2,2,NA,NA,NA,NA,31335.13799,33684.15133,1,95,6,3,0.45,6,4,1,2,0,1,28,1,2,1,2 +67965,7,2,1,74,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,52448.388619,54438.22579,1,99,8,8,3.4,2,2,0,0,2,1,74,1,5,1,4 +67966,7,2,1,20,NA,3,3,1,NA,NA,2,NA,2,1,6,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,101419.325386,103801.228657,1,100,15,15,4.63,5,5,0,0,0,1,51,1,5,1,3 +67967,7,2,1,52,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,158509.005274,159425.752864,1,100,15,15,4.07,5,5,0,2,0,2,41,1,5,1,4 +67968,7,2,2,7,NA,5,6,2,7,88,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6878.565723,7345.278289,3,90,14,14,4.71,3,3,0,1,0,1,43,1,5,1,5 +67969,7,2,2,40,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,118269.59683,119479.91378,2,98,10,10,4.55,2,2,0,0,1,2,66,1,5,2,NA +67970,7,2,1,72,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,48254.793439,51076.634961,2,100,99,99,NA,2,2,0,0,2,1,72,1,4,1,4 +67971,7,2,2,11,NA,1,1,1,11,140,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,12215.503444,12532.555214,1,102,5,5,0.92,5,5,0,3,0,2,39,2,3,1,3 +67972,7,2,1,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,84910.063417,90031.493834,2,96,14,4,1.38,3,1,0,0,0,1,30,2,5,5,NA +67973,7,2,1,6,NA,5,6,2,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10810.913614,11522.32071,1,97,15,15,4.07,5,5,0,3,0,1,42,2,5,1,5 +67974,7,2,1,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,37814.382501,44413.458724,1,98,3,3,1.03,1,1,0,0,0,1,27,1,4,5,NA +67975,7,2,2,76,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,33731.056243,36109.766611,2,98,4,4,1.22,2,2,0,0,2,1,80,1,1,1,1 +67976,7,1,1,34,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,99134.771466,0,2,99,15,15,5,2,2,0,0,0,2,34,NA,NA,1,5 +67977,7,2,2,56,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,17152.714252,16747.256128,1,102,7,1,0.33,5,1,1,0,2,1,47,1,3,5,NA +67978,7,2,2,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,49428.481088,50236.061312,1,101,1,1,0.13,1,1,0,0,1,2,60,1,4,3,NA +67979,7,2,1,65,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,8232.241159,8296.636338,2,98,7,7,2.72,2,2,0,0,1,1,65,1,4,3,NA +67980,7,2,2,8,NA,4,4,1,8,105,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8362.256577,8929.488766,2,100,7,7,1.34,5,5,0,2,0,2,53,1,4,4,NA +67981,7,2,1,19,NA,1,1,2,19,238,2,NA,2,1,4,15,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,20174.283097,21488.447069,2,94,7,7,1.33,6,6,0,1,0,1,55,2,2,1,1 +67982,7,1,1,42,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,148374.146816,0,2,96,10,6,2.66,2,1,0,0,0,1,35,1,5,5,NA +67983,7,2,2,47,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,5,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,13568.706187,17300.043727,3,90,6,6,1.98,2,2,0,0,0,2,47,2,5,2,NA +67984,7,2,1,20,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,85040.351078,89341.219459,1,93,7,7,2.16,3,3,0,1,0,2,50,1,5,3,NA +67985,7,2,1,13,NA,3,3,2,13,157,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,58479.782556,60361.370686,1,99,15,15,5,5,5,0,3,0,2,43,1,5,1,5 +67986,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,8411.959333,8787.544319,1,96,9,9,3.97,2,2,0,0,1,1,28,1,5,5,NA +67987,7,2,2,6,NA,4,4,2,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7446.876055,7952.015759,2,99,6,6,1.15,5,5,1,2,0,2,34,1,4,77,NA +67988,7,2,1,14,NA,3,3,2,14,176,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,58479.782556,60361.370686,1,99,15,15,5,5,5,0,3,0,2,43,1,5,1,5 +67989,7,2,2,22,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,3,5,3,1,2,2,1,2,2,NA,NA,NA,NA,55392.206282,57222.234271,3,91,3,3,1.29,1,1,0,0,0,2,22,1,3,5,NA +67990,7,2,2,12,NA,5,6,2,12,148,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6937.463063,7205.726089,3,90,10,10,2.41,5,5,1,2,0,1,44,2,4,1,5 +67991,7,2,1,57,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,2,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,11690.444016,12095.591726,3,90,4,4,0.92,3,3,0,0,1,2,56,2,2,1,2 +67992,7,2,1,41,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,52941.648658,52646.945429,2,102,15,15,5,2,2,0,0,0,1,41,2,4,6,NA +67993,7,2,1,60,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,1,5,NA,2,2,2,2,2,2,1,2,2,NA,8609.250304,11228.904188,2,90,4,4,0.57,5,5,1,0,2,2,80,2,1,2,NA +67994,7,2,1,12,NA,3,3,2,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71458.892941,71654.216305,2,94,3,3,0.54,4,4,0,1,0,2,48,1,3,1,3 +67995,7,2,1,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,58072.588922,61468.555023,1,101,14,14,5,2,2,0,0,2,2,70,1,5,1,2 +67996,7,2,1,9,NA,1,1,1,9,110,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,2,2,2,2,11367.678664,11547.566042,2,96,5,5,0.78,5,5,0,2,0,1,37,2,1,5,NA +67997,7,1,2,25,NA,5,6,NA,NA,NA,2,NA,1,1,NA,NA,5,5,3,1,2,2,1,2,2,NA,NA,NA,NA,11934.941038,0,3,90,8,8,1.85,5,5,0,0,1,2,25,1,5,5,NA +67998,7,2,2,52,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,17991.883465,18228.515837,2,102,10,10,3.62,3,3,0,0,0,1,51,2,5,1,5 +67999,7,2,2,52,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,24004.6026,25447.359355,2,91,14,14,4.19,3,3,0,1,0,1,55,2,3,1,5 +68000,7,2,1,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,21941.544332,21717.573526,1,99,10,10,2.58,5,5,0,1,2,1,65,1,5,1,3 +68001,7,2,1,79,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,NA,8992.410435,11769.853451,2,90,4,4,1.43,1,1,0,0,1,1,79,1,2,4,NA +68002,7,2,2,11,NA,3,3,1,11,135,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,55469.656717,57246.428971,1,98,9,9,2.15,5,5,0,3,0,2,32,1,3,1,4 +68003,7,2,1,72,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,15262.668516,16210.38509,1,102,7,7,1.41,5,5,0,2,2,1,72,1,4,1,3 +68004,7,2,2,33,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,36002.138941,36147.943555,1,103,15,15,5,2,2,0,0,0,1,36,2,5,1,5 +68005,7,2,1,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,26556.735732,2,101,99,2,0.55,2,1,0,0,0,1,21,NA,NA,5,NA +68006,7,2,1,13,NA,4,4,2,13,167,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15970.206483,16374.875978,2,97,3,3,0.46,5,5,0,3,0,1,40,1,2,1,3 +68007,7,2,2,33,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,23038.68441,23821.989403,1,92,6,6,1.92,2,2,0,0,0,2,33,2,4,5,NA +68008,7,2,1,80,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,3,2,NA,2,2,2,2,2,2,NA,NA,NA,NA,9710.399795,10186.406299,2,93,99,99,NA,1,1,0,0,1,1,80,2,3,2,NA +68009,7,2,2,14,NA,4,4,1,14,175,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13697.127402,13594.601175,2,100,10,7,2.05,4,3,0,2,0,1,20,1,4,6,NA +68010,7,2,1,63,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,10717.375231,11218.730451,2,97,3,3,1.29,1,1,0,0,1,1,63,1,2,2,NA +68011,7,2,2,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,47973.37979,48741.666001,1,95,6,6,0.81,6,6,2,2,0,1,30,1,3,1,4 +68012,7,2,2,70,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,12537.004291,13376.026635,2,98,77,77,NA,2,2,0,0,2,2,70,1,3,1,2 +68013,7,2,2,60,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,14067.755128,14924.32642,2,91,8,8,4.3,1,1,0,0,1,2,60,2,5,5,NA +68014,7,2,1,16,NA,3,3,2,16,197,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,30100.326038,29873.584609,1,101,6,6,1.17,4,4,0,1,0,1,41,1,3,6,NA +68015,7,2,2,9,NA,3,3,2,9,109,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19824.800404,20630.46787,1,101,4,4,0.78,4,4,1,2,0,2,32,1,3,3,NA +68016,7,2,2,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,159097.269198,158226.612798,1,97,15,15,4.77,4,4,0,0,0,1,56,1,4,1,4 +68017,7,2,2,68,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,16352.915834,18960.412859,3,92,10,10,4.3,5,2,2,1,1,2,68,1,3,1,1 +68018,7,2,1,1,22,5,6,2,NA,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5891.941477,6363.138629,1,90,6,6,0.92,6,6,2,0,2,2,30,2,5,1,5 +68019,7,2,1,12,NA,3,3,1,12,149,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71934.689876,71392.8162,1,100,15,15,5,5,5,0,3,0,1,47,1,5,1,5 +68020,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,35334.703093,40990.264786,1,101,3,3,1.25,1,1,0,0,1,2,80,1,2,2,NA +68021,7,2,1,34,NA,4,4,2,NA,NA,2,NA,2,2,2,NA,2,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,20803.970543,21211.256549,1,93,5,5,1.84,1,1,0,0,0,1,34,2,2,5,NA +68022,7,2,1,58,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,37426.314738,36877.141263,3,92,15,15,5,2,2,0,0,0,1,58,1,4,1,4 +68023,7,2,1,25,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,5,5,NA,1,2,2,NA,NA,NA,1,2,2,1,9177.295801,9548.31812,2,92,77,77,NA,4,4,0,0,0,1,27,2,2,5,NA +68024,7,2,2,26,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,2,2,2,1,2,2,NA,NA,NA,NA,30253.427014,34374.819903,2,90,6,6,2.01,2,2,0,1,0,2,26,1,3,5,NA +68025,7,2,2,19,NA,2,2,1,19,238,2,NA,2,2,3,15,NA,NA,NA,2,2,2,1,2,2,1,2,2,1,18556.092615,19088.608674,1,103,6,6,0.93,5,5,0,1,0,1,39,2,3,1,3 +68026,7,2,2,1,19,5,7,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6683.092466,7272.710878,3,91,8,8,2.7,3,3,1,0,0,1,31,1,5,1,5 +68027,7,2,2,41,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,18490.479848,18604.849683,2,100,8,8,2.7,3,3,1,0,0,2,41,1,4,1,3 +68028,7,2,2,66,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,14994.337564,16150.13007,1,100,9,9,3.97,2,2,0,0,2,1,70,NA,NA,1,3 +68029,7,2,1,60,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,7973.883342,8285.757609,1,100,3,3,0.9,1,1,0,0,1,1,60,1,3,5,NA +68030,7,2,1,20,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,89234.06428,94763.645369,1,93,15,15,5,4,3,0,0,3,1,80,1,5,2,NA +68031,7,2,1,2,NA,1,1,1,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10276.786905,10398.280565,1,102,4,4,0.5,6,6,2,2,0,1,25,1,2,1,3 +68032,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,36401.782557,40656.559204,2,100,6,6,2.01,2,2,0,0,2,1,80,1,5,1,5 +68033,7,2,1,2,NA,3,3,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,49276.9767,53002.397569,2,98,8,8,2.42,4,4,2,0,0,2,31,1,4,1,2 +68034,7,2,2,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,11190.39853,11690.037855,2,99,99,99,NA,2,2,0,0,2,2,62,1,5,3,NA +68035,7,2,2,78,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,55793.89654,57702.040299,2,103,12,12,NA,1,1,0,0,1,2,78,1,4,2,NA +68036,7,2,1,3,NA,1,1,2,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14505.510202,14266.522334,2,94,5,5,1.3,3,3,1,0,0,2,38,2,1,1,4 +68037,7,2,1,13,NA,5,6,2,13,167,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6666.045669,7124.269577,3,90,15,15,5,4,4,0,2,0,2,41,2,5,1,5 +68038,7,2,1,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,7238.674935,7295.298127,1,96,15,15,5,3,3,0,0,2,2,44,1,5,5,NA +68039,7,2,2,0,1,1,1,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7849.042868,8059.930457,2,98,14,14,3.25,5,5,2,1,0,1,37,1,5,1,5 +68040,7,2,1,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,29738.952706,30201.133298,2,94,8,8,3.4,2,2,0,0,0,1,22,1,3,5,NA +68041,7,2,1,63,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,6441.264127,6693.19464,1,96,15,15,5,2,2,0,0,2,1,63,1,3,1,3 +68042,7,2,1,23,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,32375.321924,34954.784356,1,98,2,2,0.31,3,3,1,0,0,1,45,NA,NA,1,NA +68043,7,2,2,32,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,79351.077678,83837.616505,2,98,9,9,4.01,2,2,0,0,0,1,27,1,5,1,4 +68044,7,2,2,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,187291.098551,189225.431861,2,91,7,7,2.31,2,2,0,0,0,2,58,1,5,3,NA +68045,7,2,2,13,NA,4,4,2,13,162,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10671.280357,10591.403308,1,99,10,10,3.13,4,4,0,2,0,1,35,1,4,1,5 +68046,7,2,2,65,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,149575.283913,151406.873597,1,95,5,5,1.84,1,1,0,0,1,2,65,1,3,3,NA +68047,7,2,1,7,NA,5,6,2,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10810.913614,11461.625841,1,97,15,15,5,3,3,0,1,0,2,36,2,5,1,5 +68048,7,2,1,10,NA,4,4,2,10,131,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9699.683862,9815.96792,2,100,1,1,0.06,3,3,1,1,0,2,30,1,4,5,NA +68049,7,2,2,6,NA,5,6,2,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9227.090107,9736.375878,2,100,15,15,5,4,4,1,1,0,1,41,2,5,1,5 +68050,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,21143.97379,22281.995197,1,97,77,77,NA,3,3,0,0,3,2,62,1,5,1,NA +68051,7,1,1,51,NA,2,2,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,24595.31055,0,1,97,15,15,5,4,4,0,2,0,1,51,1,5,1,NA +68052,7,2,1,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,18404.681357,22792.166915,1,101,4,4,0.58,6,6,0,4,0,2,41,1,3,5,NA +68053,7,2,1,4,NA,1,1,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,18754.85406,20094.472571,2,102,10,10,3.04,4,4,2,0,0,2,31,2,2,1,NA +68054,7,2,2,8,NA,3,3,2,8,98,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,43531.157975,44359.496292,1,91,7,7,1.88,4,4,1,2,0,2,43,1,5,4,NA +68055,7,2,1,0,1,5,6,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5300.922196,5735.896742,3,90,7,7,1.82,4,4,1,0,0,2,54,2,1,3,NA +68056,7,2,2,6,NA,4,4,2,6,83,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7136.421849,7489.546444,1,90,9,9,1.65,7,7,0,4,0,1,36,1,4,1,4 +68057,7,1,1,6,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,66000.998272,0,1,90,15,15,5,4,4,1,1,0,2,39,1,5,1,4 +68058,7,2,1,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,123771.419917,129271.862102,2,98,8,8,2.7,3,3,0,0,1,2,71,NA,NA,2,NA +68059,7,1,1,29,NA,1,1,NA,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,35782.041084,0,2,96,3,3,0.54,4,4,1,1,0,1,29,1,2,1,2 +68060,7,2,1,18,NA,3,3,1,18,217,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,69211.537407,74163.290023,1,92,14,14,3.16,6,6,1,1,0,1,49,1,1,1,3 +68061,7,2,2,35,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,21619.283038,21815.3204,2,102,15,15,3.92,5,5,1,2,0,1,34,2,5,1,5 +68062,7,2,1,28,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,59682.963348,63721.06052,2,102,10,7,3.67,2,1,0,0,0,2,27,1,4,6,NA +68063,7,2,1,51,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,27227.937106,28128.07977,1,101,1,1,0.27,1,1,0,0,0,1,51,1,3,3,NA +68064,7,2,2,80,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,17057.278872,18910.504828,2,98,2,2,0.82,1,1,0,0,1,2,80,1,3,2,NA +68065,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,114993.808573,116714.079488,1,98,7,3,0.9,4,1,0,0,0,2,20,1,4,5,NA +68066,7,2,2,63,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,9518.80186,9943.806181,2,100,14,14,5,2,2,0,0,2,2,63,1,5,1,4 +68067,7,2,2,2,NA,5,6,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8818.200077,9596.189182,2,102,9,9,2.68,4,4,1,1,0,2,38,2,5,1,2 +68068,7,2,2,79,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,74947.363106,78666.944453,2,91,15,15,5,2,2,0,0,2,2,79,1,5,1,5 +68069,7,2,2,66,NA,2,2,2,NA,NA,2,NA,2,2,7,NA,3,5,NA,2,2,2,2,2,2,NA,NA,NA,NA,10136.963678,11753.317783,2,94,2,2,0.41,2,2,0,1,1,2,66,2,3,5,NA +68070,7,2,1,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,19692.655418,21458.476044,2,99,13,13,NA,3,3,0,0,1,1,80,1,2,2,NA +68071,7,1,1,27,NA,5,6,NA,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,11977.649578,0,2,92,15,10,5,3,1,0,0,0,1,29,1,5,5,NA +68072,7,2,1,14,NA,3,3,1,14,173,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,75412.415858,75196.316989,2,98,15,15,5,4,4,0,2,0,2,46,1,4,1,NA +68073,7,2,1,58,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,26124.541799,26375.922323,1,94,4,4,1.34,4,1,0,0,0,1,58,1,4,6,NA +68074,7,2,1,1,16,5,7,2,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8361.302835,9219.703624,1,101,13,13,NA,3,3,1,0,0,2,19,1,2,NA,NA +68075,7,2,1,59,NA,5,6,2,NA,NA,1,1,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15693.68983,16353.841376,1,94,7,7,1.79,4,4,0,1,0,1,59,2,4,1,4 +68076,7,2,1,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,101896.080785,107317.764998,3,91,6,6,2.3,1,1,0,0,0,1,30,1,4,3,NA +68077,7,2,2,72,NA,3,3,2,NA,NA,2,NA,2,1,9,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,54256.870337,56112.448006,2,91,12,12,NA,2,2,0,0,2,1,76,1,5,1,2 +68078,7,2,2,38,NA,5,6,2,NA,NA,2,NA,2,2,99,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,13392.164122,13834.479479,3,90,15,15,5,3,3,0,0,0,1,46,2,3,1,3 +68079,7,2,2,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,37307.740846,37344.375392,1,91,5,5,1.36,2,2,0,1,0,2,49,1,5,3,NA +68080,7,2,2,39,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,19741.154437,19920.161476,1,100,5,5,0.74,6,6,0,3,0,1,40,2,3,1,4 +68081,7,2,2,1,21,4,4,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7382.686927,7573.246603,2,100,8,8,2.33,4,4,1,0,0,2,50,1,4,3,NA +68082,7,2,1,0,0,3,3,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22570.662508,23423.888442,1,95,12,12,NA,5,5,1,1,0,2,46,1,4,1,4 +68083,7,2,1,10,NA,2,2,1,10,128,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,12577.115885,12649.318947,2,96,7,7,1.57,4,4,0,2,0,1,40,2,2,1,5 +68084,7,2,1,20,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,26556.735732,2,101,1,1,0.14,2,1,0,0,0,1,20,1,4,5,NA +68085,7,2,2,16,NA,1,1,2,16,194,NA,NA,2,2,4,9,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,18368.872199,19023.186366,2,94,4,4,0.63,6,6,1,2,0,2,36,2,3,1,1 +68086,7,2,2,27,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,3,1,2,2,2,2,2,2,2,1,2,2,2,50915.06085,50693.303376,3,92,4,4,0.55,6,6,0,4,0,1,36,2,1,1,3 +68087,7,2,1,12,NA,4,4,2,12,154,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13742.402649,13769.572504,1,96,15,15,5,2,2,0,1,0,2,39,1,4,5,NA +68088,7,2,2,10,NA,5,7,1,10,123,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5139.061504,5511.809249,2,103,15,15,5,3,3,0,1,0,2,37,2,5,1,5 +68089,7,2,1,46,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18335.522037,18907.54446,1,96,15,15,5,4,4,0,2,0,1,46,1,5,1,5 +68090,7,2,1,13,NA,3,3,1,13,167,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19381.771783,19434.749274,1,102,4,4,0.97,3,3,0,1,0,2,19,1,2,NA,NA +68091,7,2,2,74,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,2,2,2,NA,17057.278872,19050.04708,2,98,13,13,NA,1,1,0,0,1,2,74,1,1,2,NA +68092,7,2,2,11,NA,5,7,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6137.256046,6434.751859,1,103,15,15,3.7,5,5,0,2,1,1,55,1,5,1,5 +68093,7,2,1,47,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,18186.25816,18751.952628,2,101,9,9,3.74,2,2,0,0,0,2,40,2,4,1,2 +68094,7,2,1,3,NA,2,2,1,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,14716.463544,15182.304505,2,96,1,1,0.06,5,5,2,1,0,1,27,2,3,1,4 +68095,7,2,2,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,35126.205635,35295.519265,1,92,4,4,0.5,6,6,0,3,0,2,41,1,4,1,NA +68096,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,28280.669788,29879.976389,1,99,77,77,NA,2,2,0,0,2,2,80,1,4,1,4 +68097,7,2,2,8,NA,4,4,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9399.281543,9696.12347,2,96,3,3,0.54,4,4,2,1,0,2,25,1,4,2,NA +68098,7,2,2,9,NA,3,3,2,9,112,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12892.545573,15136.637829,1,99,6,6,1.12,4,4,0,2,0,1,39,1,3,1,3 +68099,7,2,2,35,NA,1,1,1,NA,NA,2,NA,2,2,2,NA,1,1,2,2,2,2,2,2,2,1,2,2,2,34898.504426,33956.869463,1,100,99,99,NA,7,7,2,3,0,2,35,2,1,1,NA +68100,7,2,2,65,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,1,1,NA,1,2,1,1,2,2,1,2,1,NA,11313.630983,11818.772505,1,93,2,2,0.54,2,2,0,0,2,1,76,2,4,1,1 +68101,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,11740.600601,12693.223305,3,91,4,4,1.16,2,2,0,0,2,1,80,1,5,1,5 +68102,7,2,1,75,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,2,2,2,1,2,2,NA,12962.876803,16930.790311,2,90,3,3,0.46,5,5,0,2,2,1,75,2,1,1,2 +68103,7,2,2,0,3,4,4,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4099.350341,4514.494508,2,99,6,6,1.39,4,4,1,0,1,2,63,1,3,3,NA +68104,7,2,1,18,NA,5,7,1,19,228,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14831.338089,14976.576871,1,98,3,3,0.43,4,4,0,1,0,2,39,1,2,5,NA +68105,7,2,2,58,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,16675.763807,16960.86402,2,95,12,12,NA,3,3,0,0,2,1,65,1,4,1,4 +68106,7,2,2,72,NA,3,3,1,NA,NA,2,NA,2,1,9,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,69886.968852,72277.093789,1,102,7,7,2.86,2,2,0,0,2,1,73,1,5,1,3 +68107,7,2,2,8,NA,3,3,1,8,105,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,64166.795496,75335.746424,1,101,5,5,0.89,5,5,1,2,0,1,31,1,2,1,1 +68108,7,2,1,2,NA,4,4,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6314.243514,6704.025386,2,97,5,5,0.76,5,5,1,1,0,2,47,1,4,5,NA +68109,7,2,1,45,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,18790.641284,20754.743742,2,100,8,8,2.7,3,3,1,0,0,2,41,1,4,1,3 +68110,7,2,1,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,121588.761604,120347.630506,1,91,10,10,4.49,2,2,0,0,2,1,60,1,5,1,4 +68111,7,2,2,12,NA,1,1,1,12,150,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17490.464019,17826.708822,1,102,15,15,4.47,4,4,0,2,0,2,30,1,4,1,4 +68112,7,2,1,52,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17950.706927,18011.264999,2,94,15,15,4.44,5,5,0,1,1,2,74,1,5,2,NA +68113,7,2,2,0,6,1,1,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,7565.515117,7406.345541,1,100,1,1,0.09,4,4,2,0,0,2,28,2,2,1,2 +68114,7,2,2,16,NA,3,3,2,17,204,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,71303.309206,77744.474592,2,94,15,15,5,3,3,0,1,1,1,63,1,5,1,3 +68115,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,44066.377955,48262.000685,2,97,5,5,2.11,1,1,0,0,1,2,80,1,4,2,NA +68116,7,2,1,8,NA,1,1,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,14042.313177,2,98,3,3,0.54,3,3,0,2,0,2,35,1,3,5,NA +68117,7,2,1,28,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,23022.732862,27069.827485,2,102,14,14,3.25,5,5,1,1,0,2,32,1,4,1,3 +68118,7,2,2,18,NA,4,4,2,18,221,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,10154.02528,10568.655787,2,99,2,2,0.19,7,7,3,1,0,2,43,1,2,4,NA +68119,7,2,1,30,NA,4,4,1,NA,NA,2,NA,2,2,3,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,30195.362375,34424.306144,2,102,8,8,4.59,1,1,0,0,0,1,30,2,4,5,NA +68120,7,2,2,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,NA,NA,NA,NA,26388.213487,26374.704746,2,96,6,3,1.01,2,1,0,0,0,1,25,NA,NA,6,NA +68121,7,2,1,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,62513.129282,66168.769092,2,95,15,15,5,2,2,0,0,2,1,70,1,5,1,5 +68122,7,2,1,70,NA,3,3,2,NA,NA,1,9,1,1,NA,NA,4,1,NA,1,1,2,1,2,2,NA,NA,NA,NA,68074.313029,72055.159478,1,101,9,9,4.13,2,2,0,0,2,1,70,1,4,1,3 +68123,7,2,1,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,1,2,1,1,2,1,2,2,NA,15437.181938,16772.913227,2,98,3,3,0.97,1,1,0,0,1,1,80,1,1,5,NA +68124,7,2,1,22,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,14385.653726,15564.966804,2,101,14,6,2.3,2,1,0,0,0,1,22,2,4,5,NA +68125,7,2,1,15,NA,4,4,2,15,190,NA,NA,2,1,2,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13969.457688,14694.403205,1,90,14,14,3.25,4,4,0,2,0,2,33,2,3,1,3 +68126,7,2,1,6,NA,3,3,2,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22394.780012,23678.914206,1,91,4,4,0.81,4,4,1,1,0,1,32,1,4,6,NA +68127,7,2,2,50,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,2,NA,NA,NA,NA,11446.604914,11862.967437,2,92,8,8,2.01,4,4,0,0,0,1,53,2,3,1,3 +68128,7,2,1,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25786.235831,27127.570278,1,93,3,3,0.75,2,2,0,0,1,2,80,1,1,2,NA +68129,7,2,2,78,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,16494.288293,17728.004854,2,97,2,2,0.84,1,1,0,0,1,2,78,1,2,2,NA +68130,7,2,1,7,NA,3,3,1,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,57057.523607,61384.115788,1,98,9,9,2.15,5,5,0,3,0,2,32,1,3,1,4 +68131,7,2,2,39,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,34898.504426,34194.957212,1,100,9,9,2.02,6,6,0,3,1,2,39,1,4,1,5 +68132,7,2,1,5,NA,1,1,2,5,63,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14505.510202,14964.673559,2,94,4,4,0.63,6,6,1,2,0,2,36,2,3,1,1 +68133,7,2,1,44,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,4,1,NA,2,2,2,2,2,2,2,2,2,2,33029.272844,33302.556931,2,93,7,7,1.52,4,4,1,1,0,1,44,2,4,1,NA +68134,7,2,1,32,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,4,6,NA,2,2,2,2,2,2,1,2,2,2,41155.167164,40844.556107,1,102,13,13,NA,6,6,1,2,0,2,36,2,4,6,NA +68135,7,2,1,50,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,16590.074977,17115.36835,1,92,15,15,5,3,3,1,0,0,1,50,1,4,1,4 +68136,7,2,1,15,NA,2,2,1,15,188,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19570.996814,20425.923806,2,100,4,4,0.81,4,4,0,2,0,2,37,1,2,1,2 +68137,7,2,2,68,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,2,4,NA,1,2,1,1,2,1,1,2,1,NA,9991.888445,10532.363744,3,90,8,4,1.72,3,1,0,0,1,2,68,2,2,4,NA +68138,7,2,2,5,NA,3,3,2,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,78165.891242,86270.974005,1,101,14,14,4.21,4,4,1,1,0,2,37,1,5,1,5 +68139,7,2,2,52,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,19210.136544,19335.440888,1,96,15,15,5,2,2,0,0,0,1,48,1,2,1,3 +68140,7,2,1,30,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,19008.083201,18737.138245,2,97,1,1,0,2,2,0,0,1,2,63,1,4,5,NA +68141,7,2,2,4,NA,4,4,2,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10544.002566,11345.464378,1,91,6,6,0.99,5,5,3,0,0,2,33,2,3,1,4 +68142,7,2,1,36,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,15732.465299,16321.860662,2,92,14,6,2.75,2,1,0,0,0,1,48,NA,NA,5,NA +68143,7,2,2,80,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,4,4,NA,2,2,2,1,2,2,2,2,1,NA,16623.349489,17874.484754,2,93,6,6,1.41,3,3,0,1,1,2,80,2,4,4,NA +68144,7,2,2,29,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,1,1,2,2,1,2,2,1,2,2,1,15083.375446,14341.447668,2,99,5,5,0.65,6,6,2,1,0,2,53,1,4,3,NA +68145,7,2,1,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,9221.19173,9581.850682,2,95,6,6,1.65,2,2,0,0,2,1,62,1,1,1,3 +68146,7,2,2,6,NA,5,7,2,7,84,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22371.648216,22397.198208,1,92,3,3,0.46,5,5,2,1,0,1,30,1,3,1,2 +68147,7,2,1,75,NA,4,4,1,NA,NA,2,NA,2,1,8,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,9662.124837,9885.036833,2,92,3,3,1.1,1,1,0,0,1,1,75,2,4,3,NA +68148,7,2,1,9,NA,1,1,1,9,111,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,14258.502552,2,98,6,6,0.63,7,7,2,2,1,1,60,1,3,1,2 +68149,7,2,1,8,NA,1,1,1,8,99,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11399.23838,11468.323492,2,103,77,77,NA,5,5,1,2,0,2,30,1,2,1,2 +68150,7,2,1,13,NA,2,2,1,13,167,NA,NA,1,1,NA,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,19820.949231,20163.121943,2,91,8,8,1.85,5,5,0,2,1,1,39,2,3,1,4 +68151,7,2,1,29,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,16088.355002,15889.986896,2,100,3,3,0.92,1,1,0,0,0,1,29,1,2,5,NA +68152,7,2,1,56,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,16287.780872,16737.888813,2,100,14,14,4.86,3,3,0,0,0,2,52,1,5,1,3 +68153,7,2,1,14,NA,5,6,1,14,170,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6015.001712,6229.601178,1,102,5,5,0.92,5,5,1,2,0,2,44,2,1,1,2 +68154,7,2,1,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15599.953109,15843.647059,1,99,15,15,5,2,2,0,0,0,1,56,1,4,1,4 +68155,7,2,2,40,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,25189.042335,24499.678113,1,100,10,10,2.59,5,5,0,1,0,2,40,1,5,1,NA +68156,7,2,2,61,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,13934.943848,14557.124186,2,96,12,10,5,2,1,0,0,1,1,53,1,4,3,NA +68157,7,2,2,24,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,14756.436992,15386.562828,2,95,14,8,4.59,2,1,0,0,0,2,24,1,4,6,NA +68158,7,2,1,7,NA,1,1,1,7,88,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,8828.580268,9491.612368,2,103,77,77,NA,7,7,0,4,0,1,38,2,1,6,NA +68159,7,2,1,66,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,5,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,12579.986433,13271.133625,1,92,6,6,2.15,2,2,0,0,2,2,61,2,4,1,5 +68160,7,2,1,11,NA,3,3,2,11,143,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24713.905595,26080.214484,1,98,6,6,0.97,7,7,1,2,0,1,49,1,2,1,2 +68161,7,2,1,10,NA,4,4,1,10,131,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8894.789377,9317.745565,2,97,NA,99,NA,7,6,2,1,1,2,56,1,3,5,NA +68162,7,2,1,0,9,3,3,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17196.879565,17846.963436,2,94,99,99,NA,6,6,2,0,0,2,26,1,4,1,NA +68163,7,2,2,0,9,3,3,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20729.303307,20166.249394,1,90,14,14,4.98,3,3,1,0,0,2,33,2,5,1,5 +68164,7,2,2,14,NA,1,1,1,14,169,NA,NA,2,2,3,8,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,26325.414456,26852.811114,3,92,4,4,0.67,4,4,0,3,0,2,36,2,1,5,NA +68165,7,2,2,0,5,2,2,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6850.346072,7305.598313,1,90,3,3,0.43,4,4,2,0,0,1,31,1,3,6,NA +68166,7,2,1,6,NA,5,6,2,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6631.058951,7488.793181,2,92,12,12,NA,7,7,2,4,0,1,54,2,2,1,5 +68167,7,1,1,5,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27873.065855,0,1,91,6,6,1.13,6,6,1,3,0,1,40,1,4,6,NA +68168,7,2,2,2,NA,5,7,2,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6247.52442,6810.692829,1,99,6,6,0.6,7,7,2,1,1,2,69,1,3,2,NA +68169,7,2,2,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,55695.737629,59946.103917,1,92,5,5,1.15,3,3,1,0,0,1,23,1,4,1,4 +68170,7,2,1,37,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,22856.432972,22931.992355,2,98,14,14,3.36,4,4,0,2,0,1,37,1,4,1,4 +68171,7,2,2,58,NA,1,1,1,NA,NA,2,NA,2,2,8,NA,1,4,NA,2,2,2,2,2,2,NA,NA,NA,NA,28349.668436,29490.478086,1,102,1,1,0.33,2,2,0,0,0,1,37,1,2,4,NA +68172,7,2,2,9,NA,3,3,2,9,111,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,50928.148447,51656.230004,3,91,14,14,3.4,4,4,0,2,0,1,40,1,4,1,4 +68173,7,2,1,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,22824.336433,1,95,4,4,0.65,6,6,2,2,0,2,36,1,4,6,NA +68174,7,2,1,5,NA,3,3,2,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,30883.231636,34843.624635,1,95,6,3,0.45,6,4,1,2,0,1,28,1,2,1,2 +68175,7,2,1,4,NA,2,2,1,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16169.123686,17324.04909,1,100,10,10,2.91,4,4,1,1,0,1,32,1,5,1,5 +68176,7,2,2,58,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,17164.211773,17267.434455,2,99,6,6,2.24,1,1,0,0,0,2,58,1,2,5,NA +68177,7,2,1,62,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,32476.100686,32780.682518,1,100,4,4,1.19,2,2,0,0,1,1,62,1,5,1,5 +68178,7,2,1,18,NA,5,6,2,18,223,2,NA,2,1,3,15,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,9923.450213,10376.279545,2,100,5,5,0.89,4,4,0,1,0,2,40,2,3,1,3 +68179,7,2,1,17,NA,4,4,2,17,213,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,NA,9178.515376,10829.980579,2,99,2,2,0.19,6,6,0,1,0,1,59,1,2,5,NA +68180,7,2,1,31,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19799.565045,19517.338151,1,90,14,14,2.96,5,5,1,2,0,1,31,1,5,1,4 +68181,7,2,1,74,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,15498.463997,16338.715638,1,98,5,5,1.39,2,2,0,0,2,2,71,1,3,1,3 +68182,7,2,1,47,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,138834.18124,153441.395439,1,100,15,15,4.56,4,4,0,2,0,2,42,1,4,1,3 +68183,7,2,2,28,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,NA,NA,NA,NA,23293.719218,24697.897932,1,92,7,7,1.83,3,3,1,1,0,2,28,1,3,5,NA +68184,7,2,1,7,NA,4,4,1,7,87,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12267.215138,12414.279886,2,102,5,5,0.76,5,5,1,3,0,2,30,1,4,4,NA +68185,7,2,1,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,26847.643051,27369.434392,1,98,4,4,0.67,5,5,1,2,0,1,29,1,4,1,3 +68186,7,2,2,17,NA,1,1,1,17,206,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,20370.629189,20955.218165,1,95,15,15,5,3,3,0,1,0,1,50,1,3,1,4 +68187,7,2,1,3,NA,4,4,2,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7431.820906,7890.591472,1,99,10,10,2.71,5,5,1,1,2,1,75,1,1,1,3 +68188,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,64463.340883,66173.435468,1,91,14,14,5,2,2,0,0,2,2,70,1,2,1,NA +68189,7,2,2,10,NA,4,4,2,10,129,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9326.540969,10100.402919,1,91,8,8,1.76,5,5,0,3,0,2,42,1,3,6,NA +68190,7,2,2,1,12,1,1,1,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11438.839305,12628.78556,1,102,14,14,4.32,3,3,1,0,0,1,25,1,4,1,4 +68191,7,2,2,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,29167.119125,34820.124106,1,98,3,3,0.5,5,5,0,3,0,2,56,1,3,3,NA +68192,7,2,2,5,NA,1,1,1,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14852.990935,14903.708855,2,94,6,6,0.8,7,7,1,3,0,2,36,2,3,1,1 +68193,7,2,1,17,NA,3,3,2,17,208,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,71458.892941,70519.759062,2,94,10,10,3.51,3,3,0,2,0,2,39,2,4,3,NA +68194,7,2,1,15,NA,4,4,2,15,180,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10745.098568,10766.342508,1,96,12,12,NA,5,5,1,2,0,2,35,1,5,1,4 +68195,7,2,1,74,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,17306.956173,18319.031536,1,100,4,4,1.16,2,2,0,0,2,1,74,1,3,1,5 +68196,7,2,2,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,125040.051934,125530.45672,1,97,15,15,5,4,4,0,0,1,1,67,NA,NA,2,NA +68197,7,2,1,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,196252.38045,199712.076327,1,98,14,14,5,2,2,0,0,0,1,59,1,3,1,4 +68198,7,2,1,32,NA,4,4,2,NA,NA,2,NA,2,2,3,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,17371.064048,17243.514155,1,96,12,12,NA,5,5,2,0,1,2,63,2,5,3,NA +68199,7,2,2,30,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,5,4,2,1,2,2,1,2,2,1,2,2,1,29148.354549,29365.982413,2,92,14,14,4.03,4,4,1,1,1,2,30,1,5,4,NA +68200,7,2,1,51,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,31155.769617,31636.819209,1,92,14,9,3.97,3,2,0,0,2,1,51,1,4,5,NA +68201,7,2,2,12,NA,1,1,1,12,153,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,19719.98657,20781.904006,2,102,6,6,1.03,5,5,1,1,0,1,37,1,2,1,2 +68202,7,2,1,9,NA,3,3,2,9,115,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,75923.373594,80651.425295,1,94,15,15,5,4,4,0,2,0,2,47,1,5,1,5 +68203,7,2,2,78,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,72551.269339,73326.076054,1,95,4,4,1.12,2,2,0,0,2,2,78,1,4,1,2 +68204,7,2,2,18,NA,3,3,2,18,226,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,24820.424867,25966.716212,1,91,3,3,0.62,3,3,0,1,0,2,55,1,4,4,NA +68205,7,2,1,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,15601.50288,15850.523572,2,99,2,2,0.2,7,7,1,2,1,1,63,1,1,2,NA +68206,7,2,2,35,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,17978.142628,18308.053591,1,91,10,10,3.22,4,4,1,1,0,1,38,2,5,1,5 +68207,7,2,1,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,22013.270774,22643.742168,1,101,3,3,0.66,2,2,0,0,1,2,65,1,2,3,NA +68208,7,2,1,16,NA,5,7,2,16,195,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,5388.361335,5758.757236,3,91,15,15,4.47,4,4,0,3,0,2,44,2,5,1,NA +68209,7,2,1,15,NA,2,2,1,15,189,NA,NA,1,1,NA,9,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,20798.348063,20598.526957,2,93,12,12,NA,3,1,1,1,0,2,43,1,5,3,NA +68210,7,2,1,27,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,9177.295801,9548.31812,2,92,15,8,4.59,3,1,0,0,0,2,25,1,5,5,NA +68211,7,2,1,2,NA,3,3,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,37959.146468,40828.920669,1,91,6,6,1.62,3,3,1,0,0,1,30,1,4,1,4 +68212,7,2,2,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,1,2,1,2,2,1,1,2,NA,16321.652472,17542.456462,2,97,4,4,1.02,2,2,0,0,2,2,80,1,1,2,NA +68213,7,2,2,41,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,1,4,2,2,2,2,1,2,2,NA,NA,NA,NA,40880.818805,41857.641766,1,101,5,5,0.51,7,7,0,3,2,1,75,2,1,1,1 +68214,7,2,2,3,NA,5,6,2,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6858.963321,7464.098007,3,91,15,15,5,3,3,1,0,0,1,40,2,5,1,5 +68215,7,2,2,14,NA,4,4,1,14,178,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13895.342981,13964.270792,2,102,5,5,0.67,6,6,0,4,0,2,33,1,2,6,NA +68216,7,1,1,60,NA,5,6,NA,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,11992.891463,0,1,100,15,15,5,2,2,0,0,1,1,60,2,5,1,NA +68217,7,2,2,19,NA,4,4,2,19,234,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13841.239638,13875.328502,1,96,15,15,4.9,4,4,0,1,0,1,47,1,3,1,5 +68218,7,2,1,1,14,1,1,1,NA,14,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13968.423539,13738.28453,2,102,6,6,1.62,3,3,1,0,0,1,20,2,4,1,4 +68219,7,2,1,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,25270.027986,25977.972711,1,100,9,6,2.24,3,1,0,0,0,1,25,NA,NA,5,NA +68220,7,2,1,14,NA,4,4,2,14,169,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11834.781205,13035.394237,2,90,2,2,0.31,5,5,0,2,1,2,71,1,2,2,NA +68221,7,2,1,3,NA,4,4,1,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8385.172131,8641.288503,2,93,6,2,0.46,3,2,1,1,0,2,31,2,3,3,NA +68222,7,2,1,36,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,39040.678458,38746.026117,2,98,14,14,2.87,5,5,0,3,0,2,34,1,2,1,2 +68223,7,2,2,9,NA,3,3,1,9,109,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,25527.806244,26565.240294,2,101,3,3,0.3,7,7,1,2,0,2,50,1,2,4,NA +68224,7,2,2,11,NA,2,2,2,11,134,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,15166.167659,15710.813593,2,94,1,1,0.01,7,7,1,3,0,1,41,2,1,1,1 +68225,7,2,1,2,NA,5,6,2,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6389.003009,6946.807658,3,91,6,6,1.15,5,5,1,0,0,1,55,2,5,1,5 +68226,7,2,2,38,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,20039.469886,21581.359058,1,97,15,15,5,4,4,1,1,0,1,44,2,5,1,5 +68227,7,2,2,56,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,25638.18991,26248.270338,2,101,2,2,0.38,3,3,0,2,0,2,56,1,3,2,NA +68228,7,2,1,11,NA,4,4,2,11,140,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6377.235034,7094.374704,2,90,6,6,0.84,6,6,1,3,1,2,43,1,2,5,NA +68229,7,2,2,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,97803.500399,102601.600974,1,101,14,14,5,3,3,0,1,0,2,36,1,5,1,5 +68230,7,2,2,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,59001.303336,68847.35366,2,101,2,2,0.46,1,1,0,0,0,2,22,1,4,5,NA +68231,7,2,1,13,NA,5,6,2,13,157,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10592.643259,11320.781443,2,91,77,77,NA,3,3,0,1,0,2,43,2,5,1,5 +68232,7,2,2,9,NA,1,1,1,10,121,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13064.573334,13403.6626,2,103,2,2,0.22,7,7,0,3,0,2,39,2,1,5,NA +68233,7,2,2,5,NA,3,3,2,5,64,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,51483.624552,53105.566664,1,91,15,15,5,4,4,1,1,0,1,38,1,5,1,5 +68234,7,2,1,68,NA,1,1,1,NA,NA,2,NA,2,1,8,NA,4,5,NA,1,2,2,1,2,2,2,2,2,2,12845.115724,13050.878075,1,100,9,9,2.02,6,6,0,3,1,2,39,1,4,1,5 +68235,7,2,1,79,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,19473.412374,20529.16646,1,95,4,4,1.22,2,2,0,0,1,1,79,1,1,1,3 +68236,7,2,2,41,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,26595.398371,26621.513872,1,94,4,4,1.26,2,2,0,0,1,2,41,1,4,5,NA +68237,7,2,1,63,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,120735.071461,122283.708051,1,94,8,8,2.41,3,3,0,0,3,1,63,1,4,1,5 +68238,7,2,2,4,NA,4,4,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10771.85499,11250.180007,2,97,5,5,1.08,3,3,1,1,0,2,27,1,3,5,NA +68239,7,2,2,7,NA,1,1,2,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20753.369981,21285.255435,1,97,10,10,2.32,6,6,0,4,0,1,42,1,4,1,4 +68240,7,2,1,19,NA,1,1,2,19,239,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,25099.482648,25097.275151,1,97,4,4,0.65,4,4,0,1,0,2,45,2,2,3,NA +68241,7,2,1,9,NA,1,1,1,9,114,NA,NA,2,2,3,3,NA,NA,NA,2,1,1,1,2,2,1,2,2,1,19774.151841,21259.203461,3,92,7,7,1.41,5,5,1,2,0,1,20,2,1,1,1 +68242,7,2,1,7,NA,1,1,2,7,85,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9390.522479,9862.720437,2,90,3,3,0.58,4,4,0,2,0,2,36,2,3,1,3 +68243,7,2,1,13,NA,1,1,1,13,158,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22621.505951,23112.921348,1,92,14,14,5,2,2,0,1,0,2,41,1,5,3,NA +68244,7,2,1,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,28813.038041,29108.403608,1,94,2,2,0.42,3,3,0,0,0,2,52,1,4,1,1 +68245,7,2,2,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,18246.235208,18400.236064,2,99,1,1,0.07,4,4,1,1,0,2,24,1,2,5,NA +68246,7,2,1,9,NA,5,6,2,9,116,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5377.477719,5731.340117,2,100,4,4,0.5,6,6,2,1,0,1,30,2,4,1,3 +68247,7,2,2,80,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,12833.793728,13278.897744,2,92,77,77,NA,2,2,0,0,2,2,80,1,3,1,4 +68248,7,2,1,15,NA,4,4,1,15,190,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12147.046136,12703.852077,2,100,8,8,1.8,5,5,0,3,0,2,43,1,3,1,3 +68249,7,2,1,60,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,117075.881463,118577.582598,1,94,9,9,3.97,2,2,0,0,1,1,60,1,4,1,4 +68250,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,207644.101324,214736.820497,1,97,15,15,5,2,2,0,0,0,1,50,1,3,6,NA +68251,7,2,2,34,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,39561.667842,39233.767064,2,98,14,14,2.87,5,5,0,3,0,2,34,1,2,1,2 +68252,7,2,2,45,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,15206.604563,15895.992431,2,90,14,14,4.25,4,4,0,2,1,2,45,2,5,5,NA +68253,7,2,2,8,NA,3,3,2,8,102,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,63165.080986,62975.31549,1,98,15,15,5,3,3,0,1,0,2,43,1,5,1,5 +68254,7,2,2,55,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,15499.383981,15426.291906,2,99,5,5,1.26,3,3,1,0,0,2,50,2,3,5,NA +68255,7,2,1,60,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,6800.602543,7011.068263,1,99,6,6,1.62,3,3,0,0,1,1,60,2,5,1,3 +68256,7,2,2,46,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,22790.461787,22849.332243,2,98,10,10,3.78,3,3,0,0,0,2,46,1,4,1,4 +68257,7,2,1,12,NA,5,6,1,12,155,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10346.302718,11892.421636,2,91,6,6,1.26,5,5,0,2,0,2,47,2,1,1,1 +68258,7,2,1,3,NA,2,2,2,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,12403.412256,13289.361067,2,90,10,10,3.13,4,4,1,2,0,2,39,1,5,4,NA +68259,7,2,2,4,NA,5,7,2,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27257.164734,28966.726071,1,95,10,6,1.34,5,4,1,2,0,1,32,1,3,6,NA +68260,7,2,1,61,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,10717.375231,11218.730451,2,95,3,3,1.29,1,1,0,0,1,1,61,1,2,3,NA +68261,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,14971.827573,15192.169136,2,97,15,15,5,3,3,0,0,3,2,80,1,3,2,NA +68262,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,10576.529742,11391.789088,2,99,2,2,0.72,1,1,0,0,1,2,64,1,3,5,NA +68263,7,2,2,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,118611.064701,118209.809508,1,91,10,10,3.78,3,3,0,0,2,1,62,1,5,1,5 +68264,7,2,1,9,NA,1,1,2,9,111,NA,NA,2,2,3,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,11917.887108,12700.965428,2,94,7,7,1.34,5,5,2,1,0,1,32,2,1,1,NA +68265,7,2,1,11,NA,1,1,1,11,143,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11881.117946,11953.12349,1,102,6,6,1.18,5,5,0,2,1,2,42,2,2,2,NA +68266,7,2,2,11,NA,4,4,2,12,144,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8757.841043,9104.895674,2,97,7,7,2.16,3,3,0,1,0,2,31,1,3,6,NA +68267,7,2,1,3,NA,5,6,1,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6389.003009,6899.95174,3,91,6,6,1.22,5,5,1,2,0,2,37,1,4,1,2 +68268,7,1,2,6,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10332.067017,0,1,100,3,3,0.73,3,3,1,1,0,2,32,1,3,5,NA +68269,7,2,2,2,NA,1,1,2,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13065.99844,14425.21291,1,95,8,8,2.24,4,4,2,0,0,2,29,1,3,1,4 +68270,7,2,2,48,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,2,6,NA,2,2,2,2,2,2,2,2,2,2,31235.666551,31802.121006,2,94,9,9,4.21,3,2,0,0,0,2,48,2,2,6,NA +68271,7,2,1,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,127198.447143,126871.700524,1,101,8,8,4.25,1,1,0,0,0,1,43,1,4,5,NA +68272,7,2,1,1,18,5,7,1,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,26337.322319,30860.353051,1,94,6,6,1.33,4,4,2,0,0,2,29,1,2,1,4 +68273,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,59510.728426,64021.429591,1,99,5,5,0.89,4,4,2,0,0,2,31,1,4,1,5 +68274,7,2,1,12,NA,5,6,1,12,147,NA,NA,2,2,3,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8530.740143,8910.137481,3,92,8,8,0.91,7,7,3,3,1,1,61,NA,NA,1,4 +68275,7,2,1,71,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,7608.031426,7606.53553,1,99,14,14,5,2,2,0,0,2,1,71,1,4,1,3 +68276,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,31909.283101,36294.223393,2,91,6,6,1.26,5,5,0,1,2,2,80,1,4,2,NA +68277,7,2,1,61,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,4,1,NA,2,2,2,1,2,2,1,2,2,2,11568.876339,11794.347884,1,102,6,6,1.48,3,3,0,0,1,2,57,2,1,1,4 +68278,7,2,2,0,0,1,1,1,NA,0,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7896.364456,7653.697165,2,102,15,15,2.43,7,7,3,2,0,1,28,2,5,1,4 +68279,7,2,1,20,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14424.961621,15273.975784,2,102,8,8,2.01,4,4,0,0,0,1,59,2,4,1,4 +68280,7,2,2,15,NA,3,3,2,15,184,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,91539.042546,96988.607103,1,101,9,9,2.6,4,4,0,2,0,2,38,1,4,1,4 +68281,7,2,1,27,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,17420.978407,17115.540769,2,97,12,6,2.75,3,1,0,0,0,1,21,NA,NA,77,NA +68282,7,2,2,54,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,13728.308948,13865.439464,2,90,6,6,1.12,4,4,0,1,1,1,63,2,1,1,1 +68283,7,2,2,54,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,3,1,NA,1,2,2,1,2,2,1,2,2,3,13155.047649,13224.610785,3,90,15,15,3.7,5,5,0,0,0,1,56,2,3,1,3 +68284,7,2,1,8,NA,1,1,1,8,106,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,14007.413517,2,98,15,15,4.97,5,5,0,3,0,1,39,1,5,1,5 +68285,7,2,2,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,110369.721342,114327.60352,1,94,9,9,3.97,2,2,0,0,0,1,49,1,3,1,3 +68286,7,2,2,16,NA,3,3,2,17,205,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,NA,NA,NA,1,2,2,1,111142.989658,114400.666347,1,95,NA,NA,NA,5,5,0,2,0,2,37,1,3,1,NA +68287,7,2,1,12,NA,1,1,1,12,145,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22768.423624,22944.003607,1,92,5,5,1.24,3,3,0,1,0,2,29,2,3,6,NA +68288,7,2,1,28,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,2,1,NA,2,2,2,2,2,2,2,2,2,2,34708.958385,37032.750996,2,97,13,13,NA,3,3,0,1,0,1,28,2,2,1,1 +68289,7,2,1,9,NA,4,4,1,9,115,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10199.928366,10774.065998,1,100,5,5,0.85,5,5,0,2,0,2,54,1,2,2,NA +68290,7,2,2,74,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,81416.374938,89993.518978,1,97,9,9,3.97,2,2,0,0,2,1,79,1,3,1,3 +68291,7,2,1,69,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,3,1,NA,2,2,2,2,2,2,2,2,1,2,6687.985443,7006.085081,2,93,4,4,0.99,2,2,0,0,2,1,69,2,3,1,1 +68292,7,2,1,2,NA,1,1,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12005.116852,11690.420322,3,92,14,14,2.29,7,7,2,0,0,2,50,2,1,1,9 +68293,7,2,1,54,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,191792.099076,199776.618579,1,98,6,6,1.98,2,2,0,0,0,1,54,1,4,1,3 +68294,7,2,2,11,NA,3,3,2,12,144,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,55469.656717,57246.428971,1,101,14,14,3.3,4,4,0,2,0,2,42,1,4,1,3 +68295,7,1,2,20,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,2,6,3,1,2,2,1,2,2,NA,NA,NA,NA,60324.348827,0,1,101,13,13,NA,3,3,1,0,0,2,20,1,2,6,NA +68296,7,2,2,69,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,46357.185832,48577.343045,2,98,3,3,0.68,2,2,0,0,2,1,80,1,1,1,3 +68297,7,2,1,5,NA,5,6,1,5,63,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10273.602479,11522.969217,1,92,14,14,3.3,4,4,2,0,0,1,28,1,4,1,4 +68298,7,2,1,1,23,2,2,2,NA,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12567.081957,12360.030976,1,97,15,15,5,4,4,1,1,0,1,42,1,5,1,5 +68299,7,2,2,12,NA,4,4,2,12,148,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19113.842115,19208.656277,1,97,1,1,0.09,4,4,0,1,0,2,44,2,2,1,3 +68300,7,2,1,16,NA,4,4,2,16,193,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11023.237662,11433.168046,1,99,14,14,4.05,3,3,0,1,0,2,52,1,4,4,NA +68301,7,2,1,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,166897.201244,169896.236244,2,91,15,2,0.63,7,1,0,0,1,1,49,NA,NA,5,NA +68302,7,2,1,17,NA,4,4,2,17,209,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11023.237662,11433.168046,1,99,15,15,5,4,4,0,2,0,2,46,1,5,1,5 +68303,7,2,2,16,NA,1,1,2,16,198,NA,NA,2,2,1,10,NA,NA,NA,1,2,2,1,2,2,2,2,2,2,16896.101801,17220.920308,1,93,15,1,0,3,1,0,1,0,2,46,2,5,5,NA +68304,7,2,1,19,NA,1,1,1,19,232,2,NA,2,2,3,11,NA,NA,NA,2,2,2,2,2,2,1,2,2,2,25268.119938,26371.916437,1,100,99,99,NA,6,6,0,1,0,2,22,2,3,1,3 +68305,7,2,2,17,NA,5,6,1,17,205,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,1,1,2,2,1,10081.858636,10347.605805,2,102,15,15,3.82,5,5,0,1,2,1,60,2,2,1,1 +68306,7,2,1,17,NA,3,3,2,17,212,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,88448.252445,87285.839384,1,95,8,1,0.09,4,1,0,1,0,2,57,1,5,5,NA +68307,7,2,1,2,NA,4,4,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6347.215153,6424.634366,2,95,7,7,1.83,3,3,1,0,0,1,33,1,3,6,NA +68308,7,2,1,0,2,3,3,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8361.170561,8618.715512,1,94,4,4,0.56,5,5,1,2,0,1,34,1,2,3,NA +68309,7,2,1,57,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16499.662173,16747.308945,3,91,7,7,1.33,6,6,0,0,2,2,51,2,5,1,5 +68310,7,2,2,47,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,20303.639991,21936.687597,1,97,10,10,3.67,3,3,0,1,0,1,47,1,5,1,5 +68311,7,2,2,33,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,3,1,1,1,2,2,1,2,2,1,2,2,1,18018.210636,18055.00232,2,91,15,15,4.63,7,7,1,2,0,1,36,2,4,1,3 +68312,7,2,2,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,62471.259637,62663.338056,1,92,8,8,1.45,6,6,1,3,0,1,36,1,3,1,4 +68313,7,2,1,5,NA,3,3,2,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,67064.292398,74209.90502,2,91,15,15,5,5,5,2,1,0,2,40,1,5,1,5 +68314,7,2,1,31,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,16995.30979,18681.886374,2,103,6,6,1.3,4,4,1,1,0,2,26,1,4,1,3 +68315,7,2,2,10,NA,1,1,1,10,121,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,15510.382876,18634.445505,1,100,6,6,1.11,5,5,0,2,1,1,38,2,2,1,1 +68316,7,2,2,70,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,13204.205822,14198.003586,1,98,9,9,2.49,4,4,0,1,2,2,70,1,4,2,NA +68317,7,2,2,19,NA,3,3,1,19,237,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,36241.268921,38398.808798,2,101,2,1,0.28,2,1,0,0,0,2,21,1,4,5,NA +68318,7,2,2,80,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,18693.365067,19341.691824,1,92,12,12,NA,7,7,1,2,1,2,45,2,3,1,3 +68319,7,2,2,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,90708.718111,91145.947869,2,100,15,15,4.5,6,6,0,4,0,1,45,1,5,1,5 +68320,7,2,1,0,1,5,7,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6420.539566,6377.399312,2,99,15,15,5,3,3,1,0,0,1,35,2,5,1,5 +68321,7,2,1,80,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,28658.58153,30983.915333,2,94,8,8,3.4,2,2,0,0,2,1,80,1,3,1,5 +68322,7,2,1,32,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,30626.581617,32055.135325,2,90,6,6,1.62,3,3,1,0,0,2,28,1,5,1,4 +68323,7,2,2,16,NA,5,7,1,16,202,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,23414.779924,23738.339087,1,102,5,5,1.27,3,3,0,2,0,2,38,1,2,3,NA +68324,7,2,2,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,90179.087212,91826.282843,2,95,14,14,5,2,2,0,0,0,1,39,1,4,1,5 +68325,7,2,1,2,NA,4,4,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5662.231921,6243.536586,1,99,2,2,0.31,4,4,1,0,1,2,67,1,3,3,NA +68326,7,2,1,12,NA,4,4,2,12,152,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,NA,NA,NA,1,2,2,1,10366.393886,10567.129261,1,90,9,9,1.65,7,7,0,4,0,1,36,1,4,1,4 +68327,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,26019.443397,28034.871168,2,100,6,6,2.01,2,2,0,0,2,1,80,1,5,1,5 +68328,7,2,1,5,NA,3,3,2,5,68,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,56162.95725,63365.162813,2,95,15,15,4.63,5,5,1,2,0,2,36,1,5,1,3 +68329,7,2,2,15,NA,1,1,1,15,188,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18515.058419,19512.090709,2,96,7,7,1.79,4,4,0,2,0,1,43,2,3,1,2 +68330,7,2,2,69,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,133492.667054,133041.068157,1,95,6,6,1.94,2,2,0,0,2,2,69,1,2,1,4 +68331,7,2,1,66,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,14488.953694,14771.336069,3,92,14,14,5,2,2,0,0,2,1,66,1,4,1,4 +68332,7,2,2,17,NA,3,3,2,17,205,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,123622.182994,127245.633335,2,94,7,7,2.72,2,2,0,1,0,2,43,1,3,3,NA +68333,7,2,1,2,NA,3,3,2,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,43818.485,49773.383799,2,91,10,10,2.5,5,5,1,0,0,1,57,1,9,1,3 +68334,7,2,2,38,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,26682.998353,27885.908424,2,99,3,3,0.56,4,4,1,0,0,2,38,1,3,5,NA +68335,7,1,2,14,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7969.467397,0,2,103,15,15,4.34,4,4,0,2,0,1,48,2,5,1,5 +68336,7,2,2,1,22,1,1,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8832.868731,8863.029978,1,102,6,6,0.8,7,7,3,3,0,2,34,2,3,1,1 +68337,7,2,1,36,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,4,1,NA,2,2,2,2,2,2,1,2,2,2,41241.224595,42341.927552,2,102,10,10,3.04,4,4,2,0,0,2,31,2,2,1,NA +68338,7,2,1,51,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,3,1,NA,2,2,2,2,2,2,2,2,1,2,25108.558777,27044.812277,2,93,4,4,0.82,4,4,0,0,0,1,51,2,3,1,3 +68339,7,2,1,46,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,18790.641284,20754.743742,2,100,4,4,0.85,4,4,0,2,0,2,39,1,3,6,NA +68340,7,2,1,10,NA,3,3,2,10,125,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21087.869274,22876.159598,1,101,4,4,0.78,4,4,1,2,0,2,32,1,3,3,NA +68341,7,2,2,64,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,2,1,2,2,2,2,2,1,10235.0654,10993.258671,2,93,10,10,3.04,4,4,0,0,2,1,72,2,3,1,2 +68342,7,2,1,59,NA,5,6,2,NA,NA,2,NA,2,2,7,NA,1,6,NA,1,2,1,1,2,1,NA,NA,NA,NA,16499.662173,16440.055989,3,91,6,4,1.38,3,1,0,0,0,1,59,2,1,6,NA +68343,7,2,1,52,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,17206.320427,17285.613667,1,96,2,2,0.4,3,3,0,0,0,2,56,1,3,3,NA +68344,7,2,1,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,152858.509804,152465.847768,1,95,14,7,3.67,2,1,0,0,0,1,47,1,4,3,NA +68345,7,2,2,18,NA,4,4,1,18,223,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,2,1,0.09,4,1,0,0,0,2,19,1,4,NA,NA +68346,7,2,1,0,4,4,4,2,NA,4,NA,NA,2,2,99,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6327.979262,6425.104835,1,90,13,13,NA,3,3,2,0,0,2,21,2,4,5,NA +68347,7,2,2,0,2,3,3,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20899.681083,21326.972731,1,92,14,14,3.16,6,6,1,1,0,1,49,1,1,1,3 +68348,7,2,1,32,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,51543.062078,54569.929737,3,92,14,14,3.25,4,4,2,0,0,2,33,1,5,1,5 +68349,7,1,2,12,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24869.937631,0,2,93,6,6,1.48,4,4,0,1,0,1,53,2,2,1,3 +68350,7,2,2,12,NA,3,3,1,12,151,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,93740.540203,99321.165816,1,100,15,15,4.56,4,4,0,2,0,2,42,1,4,1,3 +68351,7,2,1,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,90303.174138,92885.29608,2,92,15,15,5,4,1,0,0,0,1,27,NA,NA,5,NA +68352,7,2,2,75,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,30548.472436,32702.747345,1,95,1,1,0.21,4,4,1,0,1,2,75,1,1,2,NA +68353,7,2,1,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,94698.084211,100770.654731,1,101,14,14,4.21,4,4,1,1,0,2,37,1,5,1,5 +68354,7,2,1,45,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,4,1,NA,1,2,1,1,2,2,1,2,1,NA,14879.667962,14825.914121,3,91,4,4,0.69,5,5,0,2,0,1,45,2,4,1,1 +68355,7,2,2,19,NA,4,4,2,19,236,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,1,1,0.05,1,1,0,0,0,2,19,1,4,NA,NA +68356,7,2,2,73,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,15616.794261,15846.627851,2,97,7,7,3.49,1,1,0,0,1,2,73,1,4,3,NA +68357,7,2,1,1,12,3,3,1,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,26337.322319,28328.46741,1,94,3,3,0.93,2,2,1,0,0,2,25,1,5,3,NA +68358,7,2,2,0,5,1,1,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,6787.112205,6644.319314,2,94,3,3,0.54,3,3,1,0,0,2,21,1,4,1,3 +68359,7,1,2,11,NA,2,2,NA,NA,NA,NA,NA,2,1,4,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15821.530831,0,2,91,6,6,0.93,5,5,1,2,0,2,50,2,1,5,NA +68360,7,2,1,13,NA,5,6,1,13,166,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12013.02156,13498.577364,3,92,12,12,NA,5,5,0,2,0,1,47,1,3,1,3 +68361,7,2,1,8,NA,1,1,1,8,101,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,1,2,2,2,13898.598114,14118.535925,2,102,5,5,0.59,7,7,1,3,0,1,37,2,1,6,NA +68362,7,2,1,2,NA,4,4,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8009.966208,8254.622305,2,96,13,13,NA,4,4,1,1,0,2,40,1,3,77,NA +68363,7,2,1,1,21,5,7,1,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6929.441295,7483.610581,2,102,15,15,5,5,5,1,0,2,1,30,1,4,1,5 +68364,7,2,1,63,NA,4,4,2,NA,NA,2,NA,2,2,7,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,7514.993062,7713.03043,2,90,6,6,1.12,4,4,0,1,1,1,63,2,1,1,1 +68365,7,2,1,64,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,22184.040999,22468.589781,2,94,6,6,2.66,1,1,0,0,1,1,64,1,4,3,NA +68366,7,2,1,3,NA,4,4,1,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11920.911192,13144.753907,2,96,3,3,0.59,3,3,1,0,0,2,25,1,4,1,NA +68367,7,2,1,78,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,10160.645851,10681.304475,1,96,8,8,3.14,2,2,0,0,2,2,64,1,3,1,2 +68368,7,2,1,17,NA,5,6,2,17,207,2,NA,2,1,3,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9136.388281,9764.423514,3,91,9,9,4.08,2,2,0,1,0,2,54,2,5,1,NA +68369,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,39565.288792,43332.356399,1,99,77,77,NA,2,2,0,0,2,2,80,1,4,1,4 +68370,7,2,2,10,NA,3,3,2,10,129,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24070.467912,23750.822126,1,95,7,7,1.17,6,6,1,3,0,2,44,1,4,1,NA +68371,7,2,1,10,NA,3,3,1,10,128,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18216.94614,18737.854061,1,94,3,3,0.39,6,6,2,2,0,2,25,1,4,1,2 +68372,7,2,2,2,NA,5,6,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4962.240532,5199.576603,2,94,77,77,NA,6,6,2,0,0,2,18,1,3,NA,NA +68373,7,2,2,8,NA,4,4,2,8,97,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11467.793741,13004.375485,2,91,6,6,0.78,7,7,1,4,0,2,38,2,2,77,NA +68374,7,2,1,45,NA,5,6,1,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,20696.713928,21333.342141,2,102,15,15,5,3,3,1,0,0,2,34,1,5,1,5 +68375,7,2,2,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,29167.119125,29596.13582,1,101,1,1,0.1,4,4,1,1,0,2,52,1,4,3,NA +68376,7,2,2,61,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,13057.178942,13648.591881,1,102,77,77,NA,2,2,0,0,2,1,68,2,4,1,1 +68377,7,2,1,74,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,NA,8386.962371,8877.414759,1,93,3,3,0.93,1,1,0,0,1,1,74,1,4,4,NA +68378,7,2,1,0,3,3,3,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9419.940635,10084.452218,2,98,3,3,0.54,3,3,1,0,0,1,23,1,3,1,2 +68379,7,2,1,1,16,4,4,2,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6580.937346,6661.20735,2,97,4,4,0.97,3,3,1,0,0,1,38,1,3,6,NA +68380,7,2,2,43,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,17623.300255,17855.084956,1,100,15,15,5,4,4,0,1,0,1,44,2,5,1,5 +68381,7,2,1,9,NA,3,3,1,9,118,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21087.869274,22876.159598,1,101,4,4,0.99,2,2,0,1,0,2,35,1,3,5,NA +68382,7,2,1,0,11,3,3,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9081.731172,9425.042696,1,95,8,8,1.45,6,6,1,1,2,1,69,1,1,1,3 +68383,7,2,2,0,9,3,3,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21905.560663,22505.214105,1,95,5,5,0.76,5,5,2,1,0,2,27,1,4,6,NA +68384,7,2,2,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,61710.107686,62261.838119,1,98,2,2,0.63,1,1,0,0,0,2,23,1,4,5,NA +68385,7,2,2,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,22912.222762,29561.458771,2,96,5,5,1.5,2,2,1,0,0,2,22,1,3,5,NA +68386,7,1,1,30,NA,1,1,NA,NA,NA,2,NA,2,2,77,NA,3,5,NA,2,2,2,2,2,2,NA,NA,NA,NA,53303.690379,0,1,100,4,4,0.78,4,4,0,0,1,1,33,2,1,1,1 +68387,7,2,2,70,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,1,1,2,1,1,2,2,NA,12296.397953,12433.512478,1,99,9,9,4.08,2,2,0,0,2,1,73,2,5,1,5 +68388,7,2,2,2,NA,3,3,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,47602.397711,49102.065502,1,102,6,6,1.23,4,4,2,0,0,2,25,1,5,1,5 +68389,7,2,2,15,NA,4,4,1,15,191,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15483.914568,16116.18632,1,100,6,6,2.12,2,2,0,1,0,2,44,1,3,3,NA +68390,7,1,2,25,NA,2,2,NA,NA,NA,2,NA,1,1,NA,NA,2,1,3,1,2,2,1,2,2,NA,NA,NA,NA,39550.779175,0,2,96,3,3,0.54,4,4,1,1,0,1,29,1,2,1,2 +68391,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,1,2,1,2,2,NA,NA,NA,NA,60163.952904,0,1,97,12,99,NA,2,1,0,0,2,2,65,1,2,2,NA +68392,7,1,2,61,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,14994.337564,0,1,100,10,10,4.63,2,2,0,0,1,1,58,1,5,1,4 +68393,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,23845.8146,22808.13483,1,98,2,1,0.11,4,1,0,0,0,2,19,1,4,NA,NA +68394,7,2,2,58,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,139343.551779,138175.157138,2,98,14,14,5,1,1,0,0,0,2,58,1,5,5,NA +68395,7,2,2,0,10,1,1,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9179.885292,9149.640857,1,101,5,5,1.23,3,3,2,0,0,2,24,1,2,5,NA +68396,7,2,2,0,6,1,1,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6700.950086,6495.019697,2,96,3,3,0.34,7,7,3,1,0,2,49,2,1,4,NA +68397,7,1,2,55,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,141316.739364,0,2,94,8,8,1.33,7,7,1,2,1,1,34,NA,NA,6,NA +68398,7,2,2,6,NA,4,4,1,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11116.391625,11531.852198,1,92,7,7,1.83,3,3,1,1,0,2,28,1,3,5,NA +68399,7,2,2,5,NA,5,6,1,5,64,NA,NA,2,2,1,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,6858.963321,7464.098007,3,91,14,14,3.58,4,4,1,1,0,1,39,2,5,1,5 +68400,7,2,1,8,NA,4,4,2,9,108,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7687.032802,8052.55895,2,99,9,9,2.43,4,4,0,2,0,2,49,1,3,3,NA +68401,7,2,1,43,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,1,5,NA,2,2,2,2,2,2,1,2,2,2,31347.36219,35248.66815,2,90,2,2,0.46,1,1,0,0,0,1,43,2,1,5,NA +68402,7,2,2,1,19,5,7,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3842.794137,4026.588867,1,93,15,15,5,5,5,1,0,1,1,61,2,4,1,4 +68403,7,2,2,36,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,2,1,2,2,2,2,2,2,2,NA,NA,NA,NA,35425.867861,35132.246023,1,97,8,8,1.45,6,6,2,2,0,2,36,2,2,1,1 +68404,7,2,2,65,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,11879.290971,12409.688606,2,96,3,3,1.1,1,1,0,0,1,2,65,1,4,5,NA +68405,7,2,1,1,20,3,3,1,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22170.765881,25013.89276,1,94,3,3,0.39,6,6,1,0,2,1,80,1,4,1,3 +68406,7,2,2,17,NA,5,6,2,17,215,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12054.065975,12306.62888,1,98,6,6,1.65,2,2,0,1,0,2,52,2,5,1,NA +68407,7,2,2,80,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,2,1,2,2,1,2,NA,NA,NA,NA,16878.750567,18850.661665,2,98,3,3,0.98,2,2,0,0,1,2,80,1,1,2,NA +68408,7,2,2,16,NA,3,3,1,16,194,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,91539.042546,96988.607103,1,101,7,7,2.31,2,2,0,1,0,2,43,1,4,3,NA +68409,7,2,1,3,NA,1,1,1,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,20874.345556,20902.551716,3,92,5,5,1.05,3,3,1,1,0,2,38,2,3,5,NA +68410,7,2,1,16,NA,4,4,2,16,193,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13863.378072,13964.345562,1,96,15,15,5,4,4,0,1,0,1,56,1,4,1,5 +68411,7,2,2,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,89807.047643,92234.66957,3,91,9,7,3.67,2,1,0,0,0,1,41,NA,NA,3,NA +68412,7,2,1,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,22013.270774,24073.71767,1,101,5,5,1.05,3,3,0,0,1,2,55,1,4,1,NA +68413,7,2,2,3,NA,1,1,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15326.318384,16920.666785,1,94,6,6,1.3,4,4,2,0,0,1,24,2,1,1,4 +68414,7,1,1,77,NA,3,3,NA,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,19418.523783,0,1,101,4,4,0.99,2,2,0,0,2,1,77,1,3,1,3 +68415,7,2,2,43,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,4,2,1,2,2,1,2,2,1,2,2,1,49900.868115,50161.096898,3,92,7,7,2.78,2,2,0,0,0,1,24,1,3,5,NA +68416,7,2,1,63,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,1,2,2,1,2,2,3,8623.181934,10090.425838,2,103,3,3,0.63,3,3,0,0,1,1,63,2,3,1,NA +68417,7,2,1,19,NA,1,1,2,19,230,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,14081.782012,14391.713696,2,90,6,6,1.15,5,5,0,2,0,2,47,2,1,1,5 +68418,7,2,2,70,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,2,1,NA,2,2,2,2,2,2,2,2,2,NA,20621.25319,22173.285594,2,93,9,9,3.14,3,3,0,0,2,1,43,NA,NA,5,NA +68419,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,81416.374938,85457.008484,1,97,5,5,1.79,1,1,0,0,1,2,70,1,4,2,NA +68420,7,2,1,22,NA,4,4,1,NA,NA,2,NA,2,1,4,NA,4,5,NA,1,2,2,NA,NA,NA,NA,NA,NA,NA,18831.340773,18391.458344,2,93,9,9,2.07,5,5,0,1,0,1,55,NA,NA,5,NA +68421,7,1,2,15,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12327.773112,0,2,95,12,12,NA,2,2,0,1,0,1,47,1,4,2,NA +68422,7,2,2,60,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,53370.063988,54427.336747,3,92,13,13,NA,2,2,0,0,2,2,60,1,4,1,NA +68423,7,2,2,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,154825.466557,157902.106304,1,91,15,15,5,2,2,0,0,0,1,44,NA,NA,1,5 +68424,7,2,1,72,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,1,1,1,1,2,2,1,2,1,NA,16828.011748,23148.456034,1,97,14,14,2.29,7,7,1,2,2,1,40,2,1,1,1 +68425,7,2,1,56,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16287.780872,16385.890285,2,100,12,12,NA,2,2,0,0,1,1,56,1,5,1,4 +68426,7,2,2,19,NA,3,3,1,19,236,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,35205.804094,37301.699975,2,101,99,2,0.55,3,1,0,0,0,2,19,1,4,NA,NA +68427,7,2,2,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,30275.274308,30259.77569,2,101,3,1,0.18,2,1,0,0,0,2,25,1,4,5,NA +68428,7,2,2,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,4,2,1,2,2,1,2,2,1,2,2,1,24842.055317,24866.449113,2,95,3,3,0.63,3,3,0,0,0,2,44,1,2,4,NA +68429,7,2,1,27,NA,3,3,2,NA,NA,2,NA,2,1,4,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,118671.226879,152848.538678,2,91,15,9,5,2,1,0,0,0,2,26,1,5,5,NA +68430,7,2,2,2,NA,5,6,1,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6365.363193,6669.808387,1,92,14,14,2.42,6,6,1,3,0,1,30,1,4,6,NA +68431,7,2,1,3,NA,4,4,2,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7076.684446,7163.001252,2,99,2,2,0.22,4,4,1,1,0,1,18,1,2,NA,NA +68432,7,2,1,80,NA,2,2,2,NA,NA,1,2,2,1,9,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,11755.776731,12827.229805,3,90,3,3,0.78,3,3,0,1,2,1,80,2,3,1,2 +68433,7,2,2,6,NA,1,1,1,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13085.05107,13640.91346,2,92,14,14,4.03,4,4,1,1,1,2,30,1,5,4,NA +68434,7,2,2,67,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,11576.638399,12468.988052,1,101,3,3,1.12,1,1,0,0,1,2,67,1,2,5,NA +68435,7,2,2,8,NA,3,3,1,8,101,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,25826.784831,26196.011011,2,91,2,2,0.44,3,3,0,1,0,1,46,2,3,1,4 +68436,7,2,2,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,196995.351093,203724.329642,1,91,15,12,NA,2,1,0,0,0,1,51,1,3,1,NA +68437,7,2,1,4,NA,4,4,2,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7354.77583,7808.790358,2,90,8,8,1.67,6,6,1,1,0,1,52,1,3,1,5 +68438,7,2,1,43,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,25964.952645,26783.974907,2,96,5,5,1.36,2,2,0,0,0,2,51,1,4,1,3 +68439,7,2,1,14,NA,5,7,2,14,174,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8676.361974,9272.775003,1,90,77,77,NA,4,4,0,2,0,2,51,1,5,1,5 +68440,7,2,1,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,116464.874823,116165.700541,2,94,99,99,NA,2,2,0,0,0,2,37,1,2,1,4 +68441,7,2,2,51,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,22631.175755,22215.258702,2,96,5,5,1.36,2,2,0,0,0,2,51,1,4,1,3 +68442,7,2,2,42,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,1,5,2,2,2,2,2,2,2,1,2,2,2,30678.628571,30838.615009,3,91,5,5,1.03,4,4,0,2,0,2,42,2,1,5,NA +68443,7,2,1,73,NA,4,4,2,NA,NA,2,NA,2,2,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,6725.306794,6857.040344,2,99,77,77,NA,1,1,0,0,1,1,73,2,5,1,NA +68444,7,2,1,23,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,19537.368697,21142.881814,2,96,3,3,0.47,4,4,1,0,1,2,61,1,4,3,NA +68445,7,2,1,70,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,4,1,NA,2,2,2,2,2,2,1,2,1,NA,9710.399795,9879.67408,2,93,12,12,NA,5,5,1,0,2,1,70,2,4,1,5 +68446,7,2,2,68,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11838.431472,12295.352662,2,92,7,7,2.72,2,2,0,0,2,2,68,1,5,1,NA +68447,7,2,2,68,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,49568.196121,49400.509437,1,95,2,2,0.78,1,1,0,0,1,2,68,1,4,3,NA +68448,7,2,2,28,NA,5,7,1,NA,NA,2,NA,2,1,3,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,18100.072824,19323.729203,1,92,7,7,3.22,1,1,0,0,0,2,28,2,5,5,NA +68449,7,2,1,12,NA,2,2,2,12,145,NA,NA,2,1,4,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,15594.049321,16140.624477,2,99,77,77,NA,4,4,1,1,1,2,38,2,4,1,4 +68450,7,2,2,47,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,21481.050033,21698.613985,2,91,8,7,3.31,2,1,0,0,0,1,55,1,2,6,NA +68451,7,2,1,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,16995.648055,16715.026676,2,100,7,7,1.38,5,5,1,0,0,2,45,1,2,3,NA +68452,7,2,2,69,NA,2,2,2,NA,NA,2,NA,2,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,9716.805546,12994.252166,2,90,4,4,1.47,1,1,0,0,1,2,69,2,2,3,NA +68453,7,2,2,79,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,56393.181686,58123.268911,2,93,7,7,2.31,2,2,0,0,2,2,79,1,3,1,5 +68454,7,1,1,28,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,103001.073873,0,2,97,7,7,3.21,1,1,0,0,0,1,28,1,4,5,NA +68455,7,2,1,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,39031.957066,39638.562591,1,95,3,2,0.74,2,1,0,0,0,1,27,1,3,5,NA +68456,7,2,1,3,NA,2,2,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11179.43727,11311.602194,2,93,3,3,0.37,5,5,3,0,0,1,28,2,1,6,NA +68457,7,2,1,0,8,3,3,1,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,28319.415322,27829.834184,1,92,9,9,2.6,4,4,2,0,0,2,32,1,3,1,5 +68458,7,2,1,44,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,29797.944087,29360.705192,1,103,5,5,0.74,5,5,1,1,0,2,40,99,3,1,1 +68459,7,2,2,16,NA,4,4,2,16,201,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10460.187371,12283.198438,1,99,7,7,1.53,5,5,0,3,0,1,39,1,3,1,3 +68460,7,2,1,4,NA,2,2,1,4,55,NA,NA,2,1,2,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11179.43727,11533.315753,2,93,14,14,2.91,6,6,2,0,1,2,74,NA,NA,2,NA +68461,7,2,1,6,NA,3,3,2,7,84,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,63513.013195,68899.034203,2,101,7,7,1.57,4,4,0,2,0,2,28,1,3,6,NA +68462,7,2,1,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,29483.108693,29407.372658,1,94,4,4,0.79,3,3,0,1,0,1,49,1,2,3,NA +68463,7,2,2,0,7,3,3,2,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19676.563141,20852.949038,2,99,15,15,5,3,3,1,0,0,1,43,1,5,1,5 +68464,7,2,2,61,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,42101.975168,41959.54633,1,101,5,5,1.24,3,3,0,0,1,2,61,1,4,1,3 +68465,7,2,2,80,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,1,2,NA,1,2,1,1,2,1,1,2,1,NA,13689.379977,14234.742701,2,92,2,2,0.89,1,1,0,0,1,2,80,2,1,2,NA +68466,7,2,1,33,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,25624.148584,27611.905955,2,96,12,12,NA,2,1,0,0,0,1,33,1,3,3,NA +68467,7,2,2,47,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,139800.409559,144765.126463,1,100,15,15,5,4,4,0,2,0,2,47,1,5,1,5 +68468,7,2,1,61,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,1,1,NA,2,2,2,2,2,2,1,2,2,2,9115.676792,11889.427964,2,90,6,6,1.7,2,2,0,0,2,1,61,2,1,1,2 +68469,7,2,1,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,29738.952706,30201.133298,2,94,3,3,1.01,1,1,0,0,0,1,25,1,4,5,NA +68470,7,2,1,56,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,3,1,NA,1,2,2,1,2,2,1,2,2,3,12158.061776,12114.139928,3,90,15,15,3.7,5,5,0,0,0,1,56,2,3,1,3 +68471,7,2,2,15,NA,4,4,2,15,185,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11600.051433,12143.645666,3,90,14,14,2.97,5,5,0,2,1,1,73,2,3,2,NA +68472,7,2,1,9,NA,3,3,2,9,110,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18324.386573,20287.008963,2,95,7,7,1.13,6,6,0,3,1,1,52,1,4,1,4 +68473,7,2,1,28,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,17811.496463,19015.919888,2,99,NA,77,NA,3,2,0,0,1,1,63,1,3,5,NA +68474,7,2,1,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,108014.727405,114941.235527,2,94,14,14,4.71,3,3,1,0,0,1,35,1,5,1,5 +68475,7,2,2,12,NA,3,3,2,12,146,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,36586.371708,39087.782259,1,98,3,3,0.5,5,5,0,3,0,2,56,1,3,3,NA +68476,7,2,2,9,NA,4,4,1,9,112,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9139.784234,11102.911341,2,100,3,3,0.63,4,4,0,1,0,1,51,1,2,77,NA +68477,7,2,1,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,16386.190684,20296.970403,2,90,7,5,1.84,2,1,0,0,0,1,21,1,3,5,NA +68478,7,2,1,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,116464.874823,120278.381994,2,94,10,10,2.91,4,4,0,2,0,2,38,1,4,1,4 +68479,7,2,2,3,NA,3,3,2,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27257.164734,28966.726071,1,95,4,4,0.65,6,6,2,2,0,2,36,1,4,6,NA +68480,7,2,1,6,NA,1,1,2,6,77,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12477.812875,12553.43469,2,94,7,7,1.17,6,6,1,2,0,2,30,2,3,6,NA +68481,7,2,2,19,NA,3,3,2,19,239,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,100004.689924,104137.373956,1,90,15,15,5,3,3,0,0,1,1,66,NA,NA,1,4 +68482,7,2,2,77,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,15211.382198,16229.383727,2,101,3,3,0.65,3,3,0,0,1,2,77,1,3,2,NA +68483,7,2,2,14,NA,1,1,1,14,176,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16427.640886,18021.433684,1,102,7,7,1.41,5,5,0,2,2,1,72,1,4,1,3 +68484,7,2,1,47,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,10846.102759,10858.953375,1,99,7,5,1.84,2,1,0,1,0,1,47,2,4,5,NA +68485,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,122483.259869,124909.680309,3,91,6,6,2.3,1,1,0,0,1,2,64,1,5,3,NA +68486,7,2,1,3,NA,4,4,1,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10690.995725,11500.031681,2,98,5,5,0.59,7,7,3,0,0,2,50,1,5,4,NA +68487,7,2,1,19,NA,4,4,2,19,229,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,NA,NA,NA,1,2,2,1,10817.360862,11219.635132,2,99,6,6,1.15,5,5,1,2,0,2,34,1,4,77,NA +68488,7,2,2,58,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,16741.034883,16282.872546,2,95,4,4,1.61,1,1,0,0,0,2,58,1,5,5,NA +68489,7,2,2,10,NA,4,4,2,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7746.357421,8271.811673,1,99,7,7,2.52,2,2,0,1,0,2,40,1,4,3,NA +68490,7,2,2,4,NA,5,6,1,4,59,NA,NA,2,2,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5601.441711,5595.525293,1,103,5,5,1.26,3,3,1,0,0,1,40,2,5,1,5 +68491,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,35161.248998,42165.766203,3,91,7,7,2.45,2,2,0,0,2,1,80,1,2,1,2 +68492,7,2,1,0,1,1,1,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8519.229538,8673.80493,1,101,15,15,4.99,4,4,2,0,0,1,31,1,4,1,4 +68493,7,2,1,29,NA,5,6,2,NA,NA,2,NA,2,2,1,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11770.082057,12287.307332,3,91,5,5,1.08,3,3,1,0,0,1,29,2,5,1,5 +68494,7,2,2,38,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,2,1,2,2,1,2,2,1,2,2,1,20626.479002,22617.279591,2,100,4,1,0,2,1,0,0,0,2,38,1,3,2,NA +68495,7,2,2,78,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,27275.632708,28985.222851,2,96,2,2,0.87,1,1,0,0,1,2,78,1,3,2,NA +68496,7,2,1,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,17188.361186,17721.573899,2,95,2,2,0.75,1,1,0,0,0,1,56,1,2,3,NA +68497,7,2,2,34,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,3,2,1,2,2,1,2,2,1,2,2,1,59613.972918,59797.266126,2,100,5,5,2.2,1,1,0,0,0,2,34,1,5,3,NA +68498,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,9547.901254,10157.592212,3,90,15,15,5,5,5,1,0,1,1,38,2,3,1,4 +68499,7,2,1,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,175544.769665,182852.89468,1,91,15,15,5,2,2,0,0,0,2,56,1,4,1,4 +68500,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,12863.404053,13555.744544,2,90,6,6,2.75,1,1,0,0,1,2,80,1,5,2,NA +68501,7,2,1,52,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,152467.08796,152865.087613,1,94,8,8,1.67,5,5,1,2,0,1,52,1,4,1,4 +68502,7,2,1,27,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,21873.767586,21362.816924,2,99,77,4,1.43,2,1,0,0,0,1,27,2,5,5,NA +68503,7,2,2,3,NA,5,7,2,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27257.164734,28966.726071,1,92,3,3,0.46,5,5,2,1,0,1,30,1,3,1,2 +68504,7,2,2,6,NA,3,3,1,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,48532.852397,48387.04619,1,98,14,14,4.12,4,4,0,2,0,2,36,1,5,1,3 +68505,7,2,1,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,NA,151766.599459,156268.87702,3,91,7,7,1.97,4,4,0,0,1,2,77,1,5,2,NA +68506,7,2,2,77,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,32189.450686,32533.215899,1,101,4,4,1.22,2,2,0,0,2,2,77,1,4,1,3 +68507,7,2,2,49,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,25189.111847,25556.352159,2,102,10,10,4.76,2,2,0,0,1,2,49,2,5,5,NA +68508,7,2,2,31,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,5,1,3,1,2,2,1,2,2,1,2,2,1,15542.93857,15828.161907,3,91,8,8,2.7,3,3,1,0,0,1,31,1,5,1,5 +68509,7,2,2,26,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,14140.449064,14449.882828,3,90,15,15,4.2,6,6,1,0,2,1,60,1,5,1,4 +68510,7,2,1,65,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,8886.717016,8956.231711,2,101,6,6,1.51,3,3,0,1,1,1,65,1,2,1,4 +68511,7,2,2,9,NA,5,7,1,9,114,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8128.755281,8415.30848,1,98,4,4,0.94,3,3,1,1,0,2,28,1,2,77,NA +68512,7,2,2,57,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,2,1,NA,2,2,2,2,2,2,2,2,2,2,20734.495277,22158.856945,2,99,77,77,NA,3,3,0,0,0,2,57,2,2,1,1 +68513,7,2,2,69,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,54370.517487,54186.584794,1,95,6,6,2.04,2,2,0,0,2,1,71,1,3,1,4 +68514,7,2,1,3,NA,5,7,2,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7629.74403,8207.121237,1,99,6,6,0.6,7,7,2,1,1,2,69,1,3,2,NA +68515,7,2,1,13,NA,5,7,1,13,163,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,63907.082519,63067.196763,1,102,8,8,1.91,5,5,1,2,0,2,38,1,5,1,4 +68516,7,2,1,30,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,28213.419452,32164.786649,1,100,7,7,3.13,1,1,0,0,0,1,30,1,5,5,NA +68517,7,2,1,28,NA,2,2,2,NA,NA,2,NA,2,2,2,NA,1,1,NA,1,2,1,1,2,2,NA,NA,NA,NA,49741.714519,52356.235161,2,91,14,14,4.71,3,3,0,0,0,1,28,2,1,1,2 +68518,7,2,2,25,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,19495.209875,21153.580858,2,90,7,7,3.31,1,1,0,0,0,2,25,1,5,5,NA +68519,7,2,1,5,NA,3,3,2,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,58332.578536,63879.860857,1,91,7,7,1.88,4,4,1,2,0,2,43,1,5,4,NA +68520,7,2,2,17,NA,3,3,2,17,207,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,29885.567338,31265.78413,1,94,7,7,0.94,7,7,1,4,0,2,46,2,5,1,5 +68521,7,2,2,28,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,47348.206546,48810.979214,1,92,10,10,2.93,4,4,1,0,0,2,55,1,4,1,4 +68522,7,2,1,12,NA,4,4,2,12,154,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10817.360862,11219.635132,2,99,6,6,1.15,5,5,1,2,0,2,34,1,4,77,NA +68523,7,2,2,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,11879.290971,12889.384857,2,96,2,2,0.83,1,1,0,0,1,2,62,1,2,5,NA +68524,7,2,1,11,NA,4,4,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9184.716222,9199.031651,1,102,7,7,1.57,4,4,0,2,0,2,33,1,4,1,4 +68525,7,2,2,39,NA,1,1,1,NA,NA,2,NA,2,1,5,NA,3,1,2,2,2,2,1,2,2,2,2,2,2,36453.846815,35470.245447,1,102,5,5,0.92,5,5,0,3,0,2,39,2,3,1,3 +68526,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,15799.067708,16817.831279,2,95,5,5,1.52,2,2,0,0,0,2,58,1,2,2,NA +68527,7,2,2,8,NA,3,3,1,8,107,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,48532.852397,48387.04619,1,98,15,15,5,5,5,0,3,0,2,41,1,5,6,NA +68528,7,2,2,70,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,90234.245265,93002.542962,2,101,6,6,2.75,1,1,0,0,1,2,70,1,4,3,NA +68529,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,42992.537371,48017.666119,2,95,5,5,1.79,1,1,0,0,1,2,80,1,4,2,NA +68530,7,2,1,13,NA,2,2,2,13,166,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13921.972975,14228.387356,3,90,14,14,3.93,3,3,0,1,0,2,36,2,3,1,4 +68531,7,2,1,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,6038.685119,6085.921613,1,96,15,15,5,3,3,0,0,1,1,60,1,4,1,4 +68532,7,2,1,3,NA,1,1,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,17341.8035,17890.748046,2,102,6,6,1,6,6,1,3,0,1,35,2,3,1,3 +68533,7,2,2,9,NA,5,6,2,9,111,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6198.268014,6830.048804,3,90,77,77,NA,5,5,0,2,0,1,46,2,3,1,3 +68534,7,2,2,8,NA,2,2,1,8,99,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,2,2,2,1,2,2,1,15897.166957,16345.216522,2,102,4,4,0.57,6,6,2,3,0,2,26,2,3,1,NA +68535,7,2,2,0,7,1,1,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6787.112205,7016.795893,2,94,13,2,0.36,5,4,1,1,0,1,25,2,4,1,4 +68536,7,2,2,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,8308.628726,8949.073879,2,95,7,7,3.22,1,1,0,0,1,2,60,1,3,3,NA +68537,7,2,2,34,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,95087.883412,95380.247139,1,100,15,15,5,2,2,0,0,0,1,46,1,4,1,5 +68538,7,2,1,72,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,9472.03363,9657.569347,2,93,12,12,NA,4,4,0,0,2,1,72,1,2,1,4 +68539,7,1,1,72,NA,2,2,NA,NA,NA,2,NA,2,1,9,NA,2,5,NA,2,2,2,1,2,2,NA,NA,NA,NA,12962.876803,0,2,90,3,3,0.92,1,1,0,0,1,1,72,2,2,5,NA +68540,7,2,1,9,NA,3,3,2,9,116,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14307.565788,15198.555052,3,91,5,5,1.07,4,4,0,2,0,2,36,1,5,1,4 +68541,7,2,1,12,NA,4,4,2,12,151,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11125.932433,11147.929312,1,96,77,77,NA,7,7,0,3,1,2,43,77,5,5,NA +68542,7,2,2,4,NA,3,3,1,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21341.740728,22680.288414,1,98,4,4,0.67,5,5,1,2,0,1,29,1,4,1,3 +68543,7,2,2,13,NA,1,1,1,13,160,NA,NA,2,2,3,6,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,21819.210646,22256.331163,3,92,6,6,0.86,7,7,1,4,0,2,36,2,1,1,1 +68544,7,2,1,66,NA,5,6,1,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,8623.181934,9096.941425,2,103,6,6,2.28,1,1,0,0,1,1,66,1,4,5,NA +68545,7,2,1,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,8219.195224,8540.664122,2,99,2,2,0.2,7,7,1,2,1,1,63,1,1,2,NA +68546,7,2,2,6,NA,4,4,2,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10888.493631,13227.224662,1,101,13,13,NA,5,5,0,1,0,1,53,1,3,1,3 +68547,7,2,2,21,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,60914.616471,70137.240791,2,91,2,2,0.58,1,1,0,0,0,2,21,1,5,5,NA +68548,7,2,2,66,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,124544.616504,126579.471046,1,100,6,6,2.82,1,1,0,0,1,2,66,1,5,2,NA +68549,7,2,1,67,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,2,2,2,1,2,2,2,2,2,1,9430.93681,9952.400706,1,92,3,3,0.88,2,2,0,0,2,1,67,1,1,1,1 +68550,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,1,2,1,2,2,NA,NA,NA,NA,37019.065541,0,1,96,15,15,5,3,2,0,0,2,2,57,1,5,6,NA +68551,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,11532.104822,12394.666968,2,99,4,4,1.61,1,1,0,0,1,2,80,1,1,2,NA +68552,7,2,2,0,2,4,4,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4439.36229,4769.277094,1,96,5,5,0.53,7,7,2,2,0,2,38,1,9,6,NA +68553,7,2,2,61,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,129999.035519,131590.908764,1,98,6,6,1.11,5,5,0,2,1,2,37,1,1,1,1 +68554,7,2,2,11,NA,1,1,1,11,132,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17053.854294,17496.484818,2,94,6,6,0.8,7,7,1,3,0,2,36,2,3,1,1 +68555,7,2,2,6,NA,3,3,1,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22308.590534,22137.463018,1,101,5,5,0.71,6,6,1,1,1,1,63,1,2,1,5 +68556,7,2,2,50,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,24902.414229,28566.820481,1,90,15,15,5,2,2,0,0,0,2,50,2,4,3,NA +68557,7,2,1,71,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,15176.622228,16562.859469,1,101,3,3,0.98,1,1,0,0,1,1,71,1,3,3,NA +68558,7,2,2,62,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,1,1,NA,1,2,1,1,2,1,1,2,1,3,17243.546687,17998.333464,1,92,1,1,0.26,2,2,0,0,2,1,63,2,1,1,1 +68559,7,2,1,80,NA,3,3,2,NA,NA,2,NA,2,1,9,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,34540.440658,39912.090511,1,90,6,6,2.69,1,1,0,0,1,1,80,2,3,2,NA +68560,7,2,2,28,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,51746.15111,53882.638481,1,95,6,6,1.37,3,3,1,1,0,2,28,1,4,5,NA +68561,7,2,2,65,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,145813.864843,148196.223981,1,100,6,6,1.31,3,3,0,0,2,1,65,1,5,1,5 +68562,7,2,1,3,NA,4,4,1,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9016.053035,9126.025,2,100,6,6,1.52,4,4,1,2,0,2,39,1,4,3,NA +68563,7,2,2,17,NA,4,4,1,17,208,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13452.957635,13490.376767,2,93,4,4,1.29,2,2,0,1,0,2,56,2,3,4,NA +68564,7,2,2,35,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,4,1,2,1,2,1,1,2,1,NA,NA,NA,NA,20039.469886,21581.359058,1,97,15,15,5,4,4,2,0,0,2,35,2,4,1,4 +68565,7,2,2,65,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,14102.354333,14646.654863,3,91,15,15,5,2,2,0,0,2,2,65,2,5,1,5 +68566,7,2,1,13,NA,4,4,1,13,158,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13468.614146,13600.508187,2,102,5,5,0.67,6,6,0,4,0,2,33,1,2,6,NA +68567,7,2,2,54,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,24217.957803,24014.890408,2,94,4,1,0.09,2,1,0,0,0,2,51,1,2,6,NA +68568,7,2,1,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,114410.004231,116188.079068,1,97,15,15,3.89,5,5,0,2,0,1,50,1,4,6,NA +68569,7,2,2,56,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,19977.261965,20728.031273,1,92,NA,1,0.18,4,3,0,2,0,2,56,1,4,4,NA +68570,7,2,1,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,23465.604153,23669.640656,2,98,3,3,1.19,1,1,0,0,0,1,49,1,3,3,NA +68571,7,2,1,53,NA,5,6,1,NA,NA,2,NA,2,2,5,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,10562.541357,10524.383438,1,103,5,5,0.65,6,6,0,0,1,2,26,2,4,5,NA +68572,7,2,1,42,NA,3,3,1,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,117613.757473,123317.366869,2,93,9,9,3.77,2,2,0,0,0,2,42,2,4,1,5 +68573,7,2,1,64,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,10288.337394,10853.580889,3,91,9,9,3.24,3,3,0,1,1,1,64,2,2,1,5 +68574,7,2,1,20,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,39915.513053,40776.102186,2,98,7,7,1.53,5,5,0,0,0,2,48,1,3,5,NA +68575,7,2,2,42,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,33565.905936,33977.52311,2,93,14,14,3.52,5,5,1,2,0,1,44,1,5,1,5 +68576,7,2,2,1,20,1,1,2,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,9439.743884,10034.856293,1,90,6,6,1.11,5,5,1,2,0,1,30,2,1,6,NA +68577,7,2,1,19,NA,2,2,2,19,239,2,NA,1,1,NA,14,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,22228.448129,22318.929635,1,93,6,6,1.78,3,3,0,0,0,1,19,1,3,NA,NA +68578,7,2,1,8,NA,2,2,1,8,100,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13665.416457,13530.568366,1,94,5,5,0.74,5,5,1,1,0,2,24,1,3,1,4 +68579,7,2,1,6,NA,1,1,1,6,74,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11159.151566,11335.739115,1,102,6,6,1.03,6,6,0,4,0,1,34,2,2,1,1 +68580,7,2,2,47,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,46405.063078,46647.061535,2,98,10,10,4.43,2,2,0,0,1,2,47,1,4,3,NA +68581,7,2,1,43,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18790.641284,18862.037788,2,100,15,15,4.97,5,5,0,2,1,2,42,1,5,1,5 +68582,7,2,2,42,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,19807.824556,20146.472612,1,96,9,9,3.35,3,3,0,0,0,2,42,1,4,5,NA +68583,7,2,1,11,NA,3,3,2,11,135,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,42986.51011,44926.997612,2,94,14,14,2.83,6,6,0,4,0,2,38,1,2,1,2 +68584,7,2,1,18,NA,3,3,2,18,221,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,88198.948426,91036.751291,1,101,9,9,2.6,4,4,0,1,2,2,63,1,4,1,4 +68585,7,2,2,1,17,1,1,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11901.705423,12652.027961,2,102,7,7,1.33,6,6,1,3,0,1,34,2,2,1,1 +68586,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,29040.300396,28462.118402,2,101,1,1,0.05,4,1,0,0,0,2,21,1,4,5,NA +68587,7,2,2,2,NA,4,4,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6854.479526,6961.787344,2,91,6,6,0.78,7,7,1,4,0,2,38,2,2,77,NA +68588,7,2,2,14,NA,1,1,1,14,174,NA,NA,1,1,NA,8,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,16360.434077,17687.989006,3,91,5,5,1.03,4,4,0,2,0,2,42,2,1,5,NA +68589,7,2,1,54,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,16117.991297,16472.240486,1,96,10,10,3.04,4,4,0,1,0,2,43,1,5,1,4 +68590,7,2,1,1,16,4,4,2,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5942.817425,6188.375425,2,97,13,13,NA,6,6,2,2,0,2,24,1,2,6,NA +68591,7,2,1,67,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,8088.223021,7993.317874,1,99,14,14,4.86,3,3,0,1,1,2,56,1,5,1,5 +68592,7,2,1,34,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,24793.720953,26072.340199,1,92,14,14,3.25,4,4,0,2,0,1,34,1,4,6,NA +68593,7,2,2,63,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,14745.12656,14965.949752,1,98,10,10,5,1,1,0,0,1,2,63,2,5,5,NA +68594,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,12291.154515,13189.875012,1,95,3,3,0.92,1,1,0,0,1,1,80,1,3,2,NA +68595,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,36676.329165,37617.002202,1,95,4,4,1.56,1,1,0,0,0,2,50,1,2,4,NA +68596,7,2,2,6,NA,3,3,1,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17958.854782,18534.102526,2,100,2,2,0.38,3,3,0,2,0,2,35,1,4,5,NA +68597,7,2,2,73,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,NA,49260.413155,50771.674072,2,100,14,5,2.2,2,1,0,0,2,1,71,1,2,6,NA +68598,7,2,1,14,NA,4,4,1,15,180,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11429.628358,11512.87076,2,96,3,3,0.47,6,6,0,4,0,1,36,1,4,1,4 +68599,7,2,2,2,NA,1,1,1,2,32,NA,NA,2,1,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13322.479814,13501.65138,1,92,5,5,1.15,3,3,1,0,0,2,27,1,3,1,3 +68600,7,2,2,7,NA,3,3,2,7,94,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24070.467912,24097.958076,1,95,6,3,0.45,6,4,1,2,0,1,28,1,2,1,2 +68601,7,2,1,26,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,9956.598907,10266.888978,1,95,5,5,0.73,6,6,1,0,1,1,62,2,3,1,NA +68602,7,2,1,36,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,2,1,NA,2,2,2,2,2,2,2,2,2,2,34997.800447,35379.102239,2,96,6,6,1.11,5,5,0,3,0,2,32,2,3,1,2 +68603,7,2,1,19,NA,4,4,2,19,234,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12721.673656,12814.326071,2,97,2,2,0.49,2,2,0,0,0,1,24,1,4,6,NA +68604,7,2,2,2,NA,4,4,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5403.773938,5854.006054,3,90,10,10,2.59,5,5,1,2,0,2,32,1,4,1,4 +68605,7,2,2,46,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,23003.908463,24458.642976,2,99,12,1,0.12,2,1,0,0,1,2,46,1,3,2,NA +68606,7,2,2,62,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,1,2,NA,2,2,2,2,2,2,1,2,2,2,9330.330573,12477.420452,3,90,6,6,1.04,5,5,0,1,1,2,50,2,1,1,2 +68607,7,2,2,0,4,5,6,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5507.575232,5813.640954,1,97,15,15,2.33,7,7,2,4,0,2,40,2,5,1,4 +68608,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,2,1,9,NA,5,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,41252.836432,0,1,100,15,15,5,3,1,0,0,3,2,69,NA,NA,5,NA +68609,7,2,1,4,NA,1,1,2,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14505.510202,14676.996441,2,94,7,7,1.17,6,6,1,2,0,2,30,2,3,6,NA +68610,7,2,1,6,NA,4,4,1,7,84,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8439.403196,8726.876152,2,103,6,6,1.11,5,5,1,2,0,2,36,1,4,5,NA +68611,7,2,1,8,NA,5,6,2,8,96,NA,NA,2,2,3,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9928.619925,10708.884665,1,91,12,12,NA,4,4,0,2,0,1,43,2,5,1,5 +68612,7,2,2,55,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,24713.281483,24036.937705,1,92,10,10,4.3,2,2,0,0,0,2,55,1,4,1,5 +68613,7,2,2,61,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,166028.936087,168741.576297,2,101,8,8,4.41,1,1,0,0,1,2,61,1,4,2,NA +68614,7,2,1,73,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,2,1,NA,2,2,2,1,2,2,2,2,2,NA,15301.031416,16695.608527,1,90,4,4,0.94,3,3,0,0,2,2,78,2,1,1,2 +68615,7,2,1,0,0,2,2,1,NA,0,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7530.249163,7898.897498,2,102,14,14,3.25,5,5,2,0,0,1,27,1,5,1,5 +68616,7,2,2,17,NA,4,4,2,17,206,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11956.705842,11923.015236,1,96,15,15,5,5,5,0,3,0,2,47,1,5,1,5 +68617,7,2,2,2,NA,5,6,2,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4938.043177,5022.237557,1,91,14,14,3.69,4,4,1,1,0,2,29,2,5,1,5 +68618,7,2,2,13,NA,5,6,2,13,158,NA,NA,2,2,2,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7649.811754,7945.620413,2,90,3,1,0,5,1,1,2,0,1,44,2,5,1,5 +68619,7,2,1,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,16942.073558,18043.178849,1,99,6,6,1.12,4,4,2,0,0,2,29,1,2,1,4 +68620,7,2,1,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,16555.478822,16319.493771,2,96,3,3,0.47,6,6,0,4,0,1,36,1,4,1,4 +68621,7,2,2,14,NA,4,4,1,14,178,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,6,6,1.16,4,4,0,3,0,2,36,1,4,4,NA +68622,7,2,2,70,NA,3,3,2,NA,NA,2,NA,2,1,9,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,64463.340883,65151.773075,1,91,7,7,2.72,2,2,0,0,2,1,70,NA,NA,1,4 +68623,7,2,1,28,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,11279.598253,11667.930056,1,93,15,15,5,5,5,1,0,1,1,61,2,4,1,4 +68624,7,2,1,45,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,34205.013302,34329.398365,2,102,14,14,2.44,7,7,0,2,1,2,71,1,3,3,NA +68625,7,2,1,24,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,2,2,2,1,2,2,1,2,2,1,47487.549895,49034.44781,1,102,77,77,NA,3,3,0,0,1,1,61,2,4,1,1 +68626,7,2,2,70,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,54781.947924,56462.604085,2,93,10,10,3.61,3,3,0,0,2,1,75,1,4,1,4 +68627,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,74668.489197,78374.230241,2,91,77,77,NA,2,2,0,0,2,2,70,1,3,1,4 +68628,7,2,2,17,NA,4,4,2,17,206,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11711.384457,11558.024533,2,95,8,8,1.61,6,6,1,3,0,2,48,1,3,5,NA +68629,7,2,2,4,NA,4,4,1,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10437.988787,11378.89676,1,96,12,12,NA,7,7,1,0,1,2,59,1,3,1,1 +68630,7,2,1,32,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,69063.138927,75761.928286,1,92,10,10,2.1,6,6,1,1,0,2,29,1,4,1,2 +68631,7,2,1,12,NA,5,6,2,12,149,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10618.845283,10988.972369,1,93,14,14,5,2,2,0,1,0,2,46,2,5,3,NA +68632,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,134694.414609,139412.076132,2,94,15,15,5,2,2,0,0,2,1,64,1,5,1,5 +68633,7,2,2,38,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,3,1,2,1,2,2,1,2,2,1,2,2,2,46400.322077,46544.459997,1,95,9,9,2.46,4,4,0,0,0,1,42,2,2,1,3 +68634,7,2,2,15,NA,4,4,1,15,181,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17373.928778,17416.718116,2,96,15,15,5,3,3,0,1,0,1,55,1,5,1,4 +68635,7,2,2,58,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19183.115011,19284.554221,1,92,14,14,5,2,2,0,0,0,1,40,1,5,1,4 +68636,7,2,1,1,17,5,6,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6319.053383,6506.026949,1,91,15,15,5,3,3,1,0,0,1,39,2,5,1,5 +68637,7,2,2,33,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,28059.790487,28146.065043,1,94,7,7,1.21,6,6,2,2,0,1,31,1,2,6,NA +68638,7,2,1,18,NA,4,4,2,18,222,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11538.004005,11560.815583,1,96,9,9,3.35,3,3,0,0,0,2,42,1,4,5,NA +68639,7,2,2,19,NA,3,3,2,19,235,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,123622.182994,134789.559864,2,94,15,15,3.82,5,5,0,0,0,1,50,1,5,1,5 +68640,7,2,2,28,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,12412.338679,12980.380959,3,90,15,15,3.7,5,5,0,0,0,1,56,2,3,1,3 +68641,7,2,2,14,NA,1,1,1,14,176,NA,NA,1,1,NA,7,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,20460.442471,21235.303768,2,102,7,7,1.04,7,7,1,2,0,2,37,2,1,1,2 +68642,7,2,1,16,NA,4,4,1,16,197,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16147.713323,16532.569027,1,92,NA,1,0.18,4,3,0,2,0,2,56,1,4,4,NA +68643,7,2,1,17,NA,1,1,1,17,207,2,NA,1,1,NA,10,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,32326.52031,34735.818434,2,94,5,5,0.65,6,6,0,3,0,1,44,2,1,1,1 +68644,7,2,2,1,13,2,2,2,NA,14,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9889.733317,10584.607254,1,91,5,5,0.8,5,5,2,1,0,2,31,2,3,1,1 +68645,7,2,2,75,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,NA,48731.095364,50226.117333,2,93,6,6,2.51,1,1,0,0,1,2,75,1,5,3,NA +68646,7,2,2,45,NA,1,1,2,NA,NA,2,NA,2,2,6,NA,2,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,34226.159502,34404.646012,1,97,4,4,0.65,4,4,0,1,0,2,45,2,2,3,NA +68647,7,2,1,54,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,16851.334496,17374.092058,2,95,3,3,1.07,1,1,0,0,0,1,54,1,2,5,NA +68648,7,2,2,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,85420.170155,86011.35365,1,91,14,14,3.06,5,5,2,0,0,2,30,1,5,1,5 +68649,7,2,2,0,8,1,1,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6787.112205,7238.162123,2,94,8,8,2.33,4,4,2,0,0,1,24,1,2,6,NA +68650,7,2,1,3,NA,5,7,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6538.646646,7333.807608,2,100,6,6,1.62,3,3,1,0,0,1,32,1,3,1,3 +68651,7,2,2,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,87808.984693,88078.968218,1,93,15,15,5,1,1,0,0,0,2,32,1,5,5,NA +68652,7,2,1,34,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,78529.577822,86568.047729,1,98,10,10,2.2,6,6,1,3,0,2,31,1,4,6,NA +68653,7,2,1,9,NA,1,1,1,9,118,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11159.151566,11210.524757,1,102,4,4,0.5,6,6,2,2,0,1,25,1,2,1,3 +68654,7,2,2,2,NA,4,4,2,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5687.793894,6161.690011,2,90,7,7,1.53,5,5,2,1,0,1,35,2,4,1,4 +68655,7,2,1,19,NA,5,6,1,19,231,2,NA,2,2,1,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9948.022697,10353.284725,2,101,5,5,1.84,2,1,0,0,0,1,19,2,4,NA,NA +68656,7,2,1,10,NA,4,4,2,10,126,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8183.206265,8693.880504,3,90,3,3,0.78,3,3,0,1,2,1,80,2,3,1,2 +68657,7,2,1,67,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,1,22705.790734,22474.018796,1,95,3,3,0.76,3,3,0,0,1,1,41,1,2,1,4 +68658,7,2,2,17,NA,5,6,2,17,212,2,NA,2,1,99,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10998.124667,11476.706351,1,91,15,15,5,4,4,0,2,0,2,55,1,5,1,5 +68659,7,2,2,15,NA,3,3,1,15,186,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,85039.920663,86146.650284,1,94,14,14,2.96,5,5,0,3,0,2,39,1,4,1,3 +68660,7,2,1,24,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,1,5,NA,2,2,2,1,2,2,2,2,1,2,38474.772527,38999.213364,2,93,4,4,0.56,5,5,0,0,0,2,49,2,2,5,NA +68661,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,10991.458112,11482.21496,2,99,15,15,5,2,2,0,0,2,1,60,1,5,1,5 +68662,7,2,2,37,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,1,1,1,2,2,1,2,2,1,2,2,1,16369.916397,16518.354031,1,101,8,8,1.81,5,5,2,0,1,2,37,2,4,1,2 +68663,7,2,2,48,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,3,1,NA,1,2,1,1,2,1,1,2,1,3,14101.070104,15807.720539,2,92,6,6,1.3,4,4,0,1,0,2,48,2,3,1,3 +68664,7,2,1,6,NA,2,2,1,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13953.558291,13815.866863,2,93,15,15,5,3,3,0,1,0,2,36,2,3,1,5 +68665,7,2,2,51,NA,2,2,1,NA,NA,2,NA,2,1,4,NA,3,3,NA,2,2,2,2,2,2,2,2,1,2,23122.188977,23961.502212,2,93,4,4,0.69,4,4,0,1,1,2,66,2,3,2,NA +68666,7,2,1,15,NA,5,6,1,15,183,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9587.464696,10013.85895,3,92,15,15,5,3,3,0,1,0,1,55,2,5,1,4 +68667,7,2,1,51,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,1,1,2,1,1,2,2,3,22345.774392,22681.166648,1,92,7,7,2.1,3,3,0,0,0,1,24,2,4,5,NA +68668,7,2,2,8,NA,1,1,1,8,104,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15841.451259,16252.614023,3,92,15,15,3.15,7,7,0,4,0,2,35,2,3,3,NA +68669,7,2,1,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,27140.404673,27611.034752,1,101,1,1,0.08,6,6,0,1,0,1,51,1,2,5,NA +68670,7,2,2,5,NA,5,7,2,6,72,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8173.816615,8894.95474,1,97,15,15,5,4,4,1,0,0,2,39,2,5,1,5 +68671,7,2,1,6,NA,4,4,2,6,81,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7504.213986,7926.614174,2,99,2,2,0.19,7,7,3,1,0,2,43,1,2,4,NA +68672,7,2,1,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,133495.735893,134864.21511,3,92,12,3,0.98,3,1,0,0,0,1,45,1,3,1,3 +68673,7,2,1,62,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,10004.038848,10164.290775,2,102,5,5,1.3,3,3,0,0,1,1,56,1,2,5,NA +68674,7,2,2,0,5,5,7,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8377.359485,8321.07116,2,96,12,12,NA,4,4,1,0,0,2,20,1,5,1,5 +68675,7,2,2,73,NA,5,6,1,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,18481.000773,19056.863071,1,100,10,10,4.63,2,2,0,0,2,1,78,2,5,1,5 +68676,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,19060.786733,19514.352798,2,97,4,4,0.81,4,4,1,1,0,2,51,1,3,4,NA +68677,7,2,2,2,NA,5,7,1,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8474.492088,9470.199674,1,102,7,7,1.9,4,4,1,1,0,1,29,1,4,1,3 +68678,7,2,1,43,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,2,2,2,2,2,2,1,2,2,2,27776.016947,27368.446715,2,90,99,99,NA,5,5,1,1,0,2,40,2,3,1,1 +68679,7,2,1,43,NA,5,7,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,168806.614822,176992.791466,2,101,5,5,1.36,2,2,0,0,0,2,47,1,2,1,4 +68680,7,2,2,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,53634.754806,54437.113731,1,98,12,2,0.45,3,1,0,0,0,2,22,NA,NA,5,NA +68681,7,2,1,66,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,1,2,NA,7973.883342,8285.757609,1,100,5,5,2.06,1,1,0,0,1,1,66,1,3,3,NA +68682,7,2,2,79,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,98976.420245,105956.226489,2,101,5,5,1.63,2,2,0,0,2,1,80,1,1,1,1 +68683,7,2,2,20,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,2,2,2,1,2,2,1,2,2,2,32537.532358,33640.063825,2,90,8,8,2.01,4,4,0,0,1,2,67,2,4,2,NA +68684,7,2,2,6,NA,3,3,2,6,72,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23521.178662,23340.749474,1,101,3,3,0.44,5,5,0,3,0,1,35,1,3,1,4 +68685,7,2,2,12,NA,4,4,2,12,154,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11905.948878,13980.928588,2,99,5,5,1.32,2,2,0,1,0,1,46,2,2,5,NA +68686,7,2,2,25,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,2,1,2,2,1,2,2,NA,NA,NA,NA,18801.993237,20608.537144,2,96,7,7,2.45,2,2,0,0,0,1,24,2,5,5,NA +68687,7,2,2,45,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,39843.937983,40051.72073,1,102,9,9,3.74,2,2,0,0,0,2,45,1,4,1,2 +68688,7,2,1,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,94644.050918,104631.781207,2,91,15,15,5,4,4,2,0,0,1,35,1,5,1,5 +68689,7,2,2,70,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,1,1,NA,2,2,2,1,2,2,1,2,2,NA,18607.968221,20176.873294,2,92,5,5,1.36,2,2,0,0,2,1,75,2,1,1,1 +68690,7,2,2,29,NA,5,7,2,NA,NA,2,NA,2,1,6,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,45549.853584,51248.283427,3,91,7,7,1.57,4,4,2,0,0,2,29,2,3,1,3 +68691,7,2,1,7,NA,3,3,2,7,90,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,66868.503099,69864.859716,1,98,14,14,3.15,5,5,0,3,0,1,34,1,4,1,4 +68692,7,2,2,4,NA,5,6,1,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5081.860978,5222.553414,3,91,14,14,3.06,5,5,3,0,0,1,34,2,5,1,5 +68693,7,2,1,28,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,124091.929364,126020.47345,1,95,15,15,4.14,7,7,2,2,0,1,28,1,4,5,NA +68694,7,2,1,57,NA,4,4,2,NA,NA,1,1,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16589.308426,16584.966003,2,90,15,15,5,4,4,0,0,0,1,57,2,5,1,5 +68695,7,2,1,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,25815.880139,26556.735732,2,101,1,1,0.23,2,1,0,0,0,1,20,1,4,5,NA +68696,7,2,1,44,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,5,3,NA,2,2,2,2,2,2,2,2,2,2,35393.002863,36467.929589,2,93,3,3,0.58,4,4,0,1,1,1,65,2,1,3,NA +68697,7,2,1,10,NA,1,1,2,10,130,NA,NA,2,2,3,5,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,12477.812875,12675.267527,2,94,4,4,0.73,5,5,2,1,0,1,35,2,1,6,NA +68698,7,2,1,8,NA,1,1,1,8,107,NA,NA,2,2,2,2,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13870.762641,14200.45921,2,102,7,7,1.33,6,6,1,3,0,1,34,2,2,1,1 +68699,7,2,2,18,NA,4,4,1,19,228,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13801.622751,14246.373765,1,100,10,10,2.59,5,5,0,1,0,2,40,1,5,1,NA +68700,7,2,2,4,NA,1,1,1,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12261.362755,13536.873522,1,103,13,13,NA,4,4,2,0,0,2,27,2,2,6,NA +68701,7,2,2,27,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,2,6,2,2,2,2,2,2,2,2,2,1,2,32455.694722,36034.785338,1,103,13,13,NA,4,4,2,0,0,2,27,2,2,6,NA +68702,7,2,1,56,NA,4,4,2,NA,NA,2,NA,2,2,7,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,20066.354173,20688.84721,1,96,6,6,2.24,1,1,0,0,0,1,56,2,3,3,NA +68703,7,2,1,63,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,9235.951997,9415.956027,1,103,15,15,5,2,2,0,0,1,1,63,1,5,3,NA +68704,7,2,1,7,NA,3,3,2,7,95,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18324.386573,20287.008963,2,95,7,7,1.13,6,6,0,3,1,1,52,1,4,1,4 +68705,7,2,1,6,NA,4,4,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12267.215138,12414.279886,2,102,5,5,0.76,5,5,1,3,0,2,30,1,4,4,NA +68706,7,2,1,26,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,21022.682584,20897.3615,1,100,5,5,1.05,3,3,1,0,0,2,35,1,4,6,NA +68707,7,2,1,54,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,166897.201244,167183.296853,2,91,15,15,5,4,4,0,0,0,1,54,1,5,1,NA +68708,7,2,2,61,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,1,2,NA,2,2,2,2,2,2,1,2,2,2,6278.072933,7279.121105,2,103,5,5,0.65,6,6,1,0,1,2,61,2,1,2,NA +68709,7,2,2,30,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,2,5,3,1,2,2,1,2,2,NA,NA,NA,NA,12801.027411,15381.268364,2,90,77,77,NA,4,3,1,0,0,2,30,2,2,5,NA +68710,7,2,1,40,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,4,1,NA,2,2,2,2,2,2,2,2,1,2,33029.272844,32544.619181,2,93,6,6,0.93,5,5,1,2,0,1,40,2,4,1,4 +68711,7,2,1,26,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,12084.002684,12472.760771,1,99,9,9,5,1,1,0,0,0,1,26,1,5,5,NA +68712,7,2,1,0,2,5,7,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7882.953266,8003.945501,2,96,7,7,1.79,4,4,1,0,0,2,30,1,4,6,NA +68713,7,2,2,22,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,45798.520132,47350.399021,2,91,3,3,0.73,3,3,0,0,0,2,22,2,2,5,NA +68714,7,2,2,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,18441.731082,18102.807884,1,96,15,9,5,2,1,0,0,0,2,55,1,4,5,NA +68715,7,2,1,54,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,145342.530024,147904.745834,1,99,15,15,5,2,2,0,0,0,1,54,1,3,1,4 +68716,7,2,2,12,NA,1,1,1,12,151,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,24481.187693,24971.637581,1,95,13,13,NA,5,5,1,2,0,2,34,2,1,1,1 +68717,7,2,1,80,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,9835.055907,10027.702409,2,99,10,10,4.76,2,2,0,0,2,1,80,1,2,2,NA +68718,7,2,2,6,NA,1,1,2,6,79,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,13231.432201,13706.598104,2,97,4,4,0.67,4,4,0,2,0,1,39,2,2,6,NA +68719,7,2,1,2,NA,5,6,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6185.185728,6679.834548,1,97,15,15,4.84,6,6,2,0,0,1,53,NA,NA,1,NA +68720,7,2,2,53,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,215228.012584,222579.783434,1,98,6,6,1.98,2,2,0,0,0,1,54,1,4,1,3 +68721,7,2,2,39,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,NA,NA,NA,NA,36053.766709,37679.125242,1,100,3,3,0.73,3,3,2,0,0,2,39,1,3,5,NA +68722,7,2,1,40,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,2,1,NA,2,2,2,2,2,2,2,2,2,2,31740.385214,33170.19689,2,96,7,7,1.57,4,4,0,2,0,1,40,2,2,1,5 +68723,7,2,2,9,NA,1,1,1,9,110,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10118.363218,10311.586628,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +68724,7,2,2,27,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,35160.335949,35576.957572,1,103,5,5,1.02,4,4,2,0,0,1,25,1,2,1,4 +68725,7,2,2,0,9,3,3,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27484.911887,26738.360643,1,92,14,14,3.9,4,4,2,0,0,2,28,1,3,1,3 +68726,7,2,2,52,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,27769.056387,27890.133764,1,94,2,2,0.42,3,3,0,0,0,2,52,1,4,1,1 +68727,7,2,2,9,NA,5,6,2,9,115,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6198.268014,6618.822195,3,90,12,12,NA,5,5,1,2,0,1,37,2,5,1,5 +68728,7,2,2,9,NA,2,2,1,9,117,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,12307.832776,12608.996083,2,93,5,5,0.89,4,4,0,2,0,1,42,NA,NA,6,NA +68729,7,2,1,8,NA,1,1,1,8,100,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,12577.115885,12776.141954,2,96,6,6,1.11,6,6,0,2,1,1,40,2,2,1,2 +68730,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,9260.072397,9783.740861,1,98,3,3,1.07,1,1,0,0,1,1,80,1,4,3,NA +68731,7,2,2,1,16,1,1,1,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14326.094268,15816.39252,3,92,14,14,3.25,4,4,2,0,0,2,33,1,5,1,5 +68732,7,2,2,33,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,42456.72357,41469.55672,1,92,14,14,3.15,5,5,1,2,0,1,34,1,4,1,4 +68733,7,2,1,54,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,37557.946192,44552.872015,2,102,15,15,3.92,5,5,0,0,0,1,19,1,4,NA,NA +68734,7,2,2,1,17,3,3,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27529.278041,30383.810541,1,92,5,5,1.15,3,3,1,0,0,1,23,1,4,1,4 +68735,7,2,1,11,NA,4,4,2,11,136,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8453.214884,8599.856249,2,97,6,6,1.02,6,6,1,2,0,1,37,1,3,1,3 +68736,7,2,2,45,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,28499.00321,31184.438144,2,98,3,3,0.75,2,2,0,0,0,1,22,1,2,5,NA +68737,7,2,1,42,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,2,1,NA,1,2,2,1,2,2,1,2,2,2,41527.444056,43502.112286,1,95,9,9,2.46,4,4,0,0,0,1,42,2,2,1,3 +68738,7,2,1,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,120220.968534,126279.549,1,98,6,6,1.78,2,2,0,0,0,2,48,1,4,5,NA +68739,7,2,1,31,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,14204.262514,14001.792163,1,99,6,6,0.6,7,7,2,1,1,2,69,1,3,2,NA +68740,7,2,1,64,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,101090.822284,102038.91726,2,102,14,14,5,1,1,0,0,1,1,64,1,4,3,NA +68741,7,2,2,31,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,3,2,1,2,2,1,2,2,NA,NA,NA,NA,39561.667842,38928.923531,2,98,5,5,1.63,2,2,1,0,0,2,31,1,1,3,NA +68742,7,2,2,3,NA,2,2,1,4,48,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15358.480588,15565.033952,1,102,5,5,1.36,2,2,1,0,0,2,21,1,3,5,NA +68743,7,2,1,9,NA,5,7,2,9,116,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5171.245544,5675.166629,1,99,15,15,4.47,4,4,0,2,0,2,52,2,5,1,5 +68744,7,2,1,33,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,19384.896286,22235.85388,1,94,5,5,1.04,4,4,0,2,0,2,29,1,3,1,3 +68745,7,2,2,61,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,31362.042024,31983.331006,1,94,4,4,1.4,1,1,0,0,1,2,61,1,4,3,NA +68746,7,2,2,60,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,2,1,2,1,NA,17243.546687,17909.086027,1,92,7,7,2.1,3,3,0,0,2,1,37,2,5,5,NA +68747,7,2,2,10,NA,2,2,1,11,132,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15962.145468,16061.826248,2,98,6,6,0.78,7,7,1,3,1,2,63,1,2,4,NA +68748,7,2,1,70,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,18239.933451,19545.291055,2,102,7,7,1.68,5,5,0,0,3,1,70,2,4,1,4 +68749,7,2,2,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,15842.721579,15352.843701,2,99,3,3,0.42,6,6,1,2,0,2,43,1,4,6,NA +68750,7,2,2,16,NA,4,4,2,16,200,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12209.74498,12781.910285,2,90,3,3,0.95,2,2,0,1,0,2,49,1,3,5,NA +68751,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,83226.861139,86401.777435,2,95,14,14,5,2,2,0,0,0,1,39,1,4,1,5 +68752,7,2,1,45,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,46745.699003,51362.782618,2,98,8,8,2.26,4,4,0,1,0,2,43,1,3,1,2 +68753,7,2,2,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,127315.335607,133282.596235,1,94,15,15,5,3,3,0,1,0,2,43,1,5,1,5 +68754,7,2,1,57,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,143642.364525,143888.596634,1,90,14,14,3.25,4,4,0,0,1,2,77,1,3,3,NA +68755,7,2,2,2,NA,1,1,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11512.764389,11879.078775,2,98,2,2,0.35,3,3,2,0,0,2,20,1,4,5,NA +68756,7,2,2,4,NA,1,1,1,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10024.946819,10059.178608,2,93,77,77,NA,7,7,3,1,0,2,43,2,1,1,9 +68757,7,2,2,0,2,5,6,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9150.459338,9901.313011,1,97,15,15,5,4,4,2,0,0,1,40,2,5,1,5 +68758,7,2,1,78,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,1,2,NA,17703.366452,19320.397271,1,101,4,4,1.22,2,2,0,0,2,2,77,1,3,1,2 +68759,7,1,2,15,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,8,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,15809.066118,0,2,93,3,3,0.63,3,3,0,1,0,1,53,2,2,1,4 +68760,7,2,1,24,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,37326.925742,39362.952436,2,91,6,1,0.31,4,1,0,0,0,1,25,NA,NA,5,NA +68761,7,2,2,4,NA,4,4,2,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9268.093277,10357.044767,1,99,3,3,0.54,3,3,1,0,0,2,29,1,4,1,4 +68762,7,2,2,34,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,NA,NA,NA,NA,60664.751082,63005.627956,1,102,14,14,4.05,3,3,0,2,0,2,34,1,4,3,NA +68763,7,2,1,26,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,2,5,NA,1,2,2,2,2,2,1,2,2,1,35338.972549,37196.45724,2,90,4,4,0.81,3,3,0,0,0,1,39,2,3,5,NA +68764,7,2,1,3,NA,4,4,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9016.053035,9572.619158,2,100,7,7,1.38,5,5,1,0,0,2,45,1,2,3,NA +68765,7,2,1,25,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,4,6,NA,1,2,2,1,2,2,1,2,2,2,41258.226616,42207.135518,2,99,15,1,0.27,2,1,0,0,0,1,31,1,5,6,NA +68766,7,2,1,6,NA,4,4,2,6,77,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10464.577474,10672.635694,1,96,99,99,NA,4,4,1,1,0,2,35,2,3,1,3 +68767,7,2,2,68,NA,4,4,2,NA,NA,2,NA,2,2,3,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,9680.216878,10112.428208,1,96,99,99,NA,2,2,0,0,1,2,68,2,4,5,NA +68768,7,2,2,77,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,1,4,NA,2,2,2,2,2,2,1,2,2,NA,17318.187297,23904.945555,2,90,2,2,0.64,1,1,0,0,1,2,77,2,1,4,NA +68769,7,2,2,56,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,14680.520497,15280.64157,3,91,15,15,5,3,3,0,1,1,2,56,2,5,1,NA +68770,7,2,2,39,NA,1,1,1,NA,NA,2,NA,2,7,77,NA,1,6,2,2,2,2,1,2,2,1,2,1,2,29102.738194,30199.222574,2,103,77,77,NA,7,7,0,4,0,1,38,2,1,6,NA +68771,7,2,2,24,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,23845.8146,23474.128607,1,98,15,15,5,4,4,1,0,0,2,24,1,4,1,NA +68772,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,1,2,1,2,2,NA,NA,NA,NA,11289.606124,12205.635507,1,94,8,8,1.39,7,7,2,0,1,2,52,1,5,2,NA +68773,7,2,2,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,163102.567998,164787.083347,1,99,9,9,4.35,2,2,0,0,1,1,66,1,2,1,3 +68774,7,2,1,21,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,43104.257334,46015.156479,2,101,5,5,1.36,2,2,0,0,0,2,22,1,4,5,NA +68775,7,2,2,18,NA,5,6,2,18,217,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6226.949288,6291.898221,2,94,77,77,NA,6,6,2,0,0,2,18,1,3,NA,NA +68776,7,2,1,8,NA,3,3,1,9,109,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,42634.626688,43853.750605,1,102,14,14,4.05,3,3,0,2,0,2,34,1,4,3,NA +68777,7,2,1,17,NA,3,3,2,17,210,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,30100.326038,35344.542616,1,101,6,6,1.31,3,3,0,2,0,1,43,1,3,4,NA +68778,7,2,1,0,8,1,1,1,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7284.164858,7692.652584,2,98,4,4,0.65,5,5,3,0,0,2,23,1,4,5,NA +68779,7,2,1,16,NA,1,1,1,16,193,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,27378.670648,27376.262696,2,96,9,9,4.21,2,2,0,1,0,2,50,1,4,4,NA +68780,7,2,1,78,NA,5,6,1,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,16632.531676,17822.854094,1,100,10,10,4.63,2,2,0,0,2,1,78,2,5,1,5 +68781,7,2,1,34,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,13936.822202,14121.672299,1,93,15,6,2.3,6,1,0,0,0,1,34,2,5,5,NA +68782,7,2,2,0,8,4,4,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4543.931297,4593.545672,1,96,14,14,3.34,4,4,1,1,0,2,43,1,5,77,NA +68783,7,2,1,22,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,32375.321924,36431.315201,1,101,2,2,0.22,4,4,1,0,0,2,25,1,4,6,NA +68784,7,2,2,18,NA,1,1,2,19,228,2,NA,2,2,3,12,NA,NA,NA,2,2,2,1,2,2,1,2,2,1,16594.391299,17940.930507,3,91,6,6,0.89,7,7,1,1,0,1,59,2,1,1,1 +68785,7,2,1,0,10,4,4,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6275.847063,6372.172481,2,100,1,1,0,4,4,2,0,0,2,23,1,4,5,NA +68786,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,33707.673642,36318.620408,2,95,15,15,5,2,2,0,0,2,1,80,1,5,1,5 +68787,7,2,1,65,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,149474.480274,150876.348421,1,100,6,6,1.31,3,3,0,0,2,1,65,1,5,1,5 +68788,7,2,1,9,NA,1,1,1,9,113,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,14258.502552,1,92,6,6,1.12,4,4,0,2,0,1,20,1,2,1,2 +68789,7,2,1,16,NA,3,3,1,16,196,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,126777.533816,125822.536881,1,100,15,15,5,4,4,0,2,0,1,46,1,5,1,5 +68790,7,2,2,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,38954.135779,38857.944364,1,91,3,3,0.62,3,3,0,1,0,2,55,1,4,4,NA +68791,7,2,2,45,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19130.246369,18752.254654,2,95,14,14,3.47,4,4,0,0,0,2,45,1,4,1,4 +68792,7,2,2,5,NA,1,1,1,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15457.736897,16648.051651,2,98,5,5,0.59,7,7,2,1,2,2,71,1,2,1,1 +68793,7,2,1,73,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,68460.271241,72711.227332,1,102,7,7,2.86,2,2,0,0,2,1,73,1,5,1,3 +68794,7,2,1,74,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,87101.243392,92194.745773,1,101,14,14,4.96,2,2,0,0,2,1,74,1,4,1,3 +68795,7,1,2,4,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13366.393396,0,2,94,7,7,1.18,7,7,1,4,0,2,31,1,4,6,NA +68796,7,2,1,4,NA,2,2,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14505.510202,15161.207968,2,94,1,1,0.01,7,7,1,3,0,1,41,2,1,1,1 +68797,7,2,1,27,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,17138.242525,20070.705691,1,90,6,6,1.57,3,3,1,0,0,2,25,1,3,6,NA +68798,7,2,1,0,6,1,1,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7757.493251,8192.524698,3,92,13,13,NA,6,6,1,2,0,1,53,1,9,1,3 +68799,7,2,1,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,NA,46023.826844,48519.015788,1,91,99,99,NA,1,1,0,0,1,1,70,1,2,3,NA +68800,7,2,2,1,22,1,1,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11512.764389,11552.07657,2,98,7,7,1.26,7,7,1,2,0,1,43,1,2,1,1 +68801,7,2,2,3,NA,4,4,1,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10295.166918,11223.20055,2,96,12,10,2.17,7,6,2,3,0,1,29,1,4,3,NA +68802,7,2,2,40,NA,4,4,1,NA,NA,2,NA,2,1,4,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,25189.042335,24671.456999,1,100,9,9,2.22,5,5,1,2,0,2,40,2,4,1,4 +68803,7,2,2,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,18070.666316,17788.997869,1,99,14,14,4.05,3,3,0,1,0,2,52,1,4,4,NA +68804,7,1,1,31,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,19788.748292,0,1,94,1,1,0.08,7,7,2,4,0,1,31,1,2,1,4 +68805,7,2,1,0,4,3,3,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17113.022485,16817.175522,1,91,15,15,5,3,3,1,0,0,1,36,1,5,1,5 +68806,7,2,2,3,NA,5,6,1,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8171.700571,8311.029296,1,92,7,7,1.65,4,4,2,0,0,1,24,1,4,1,3 +68807,7,2,2,35,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,15388.376971,15419.798755,2,99,15,15,5,2,2,0,0,0,2,35,1,5,1,5 +68808,7,2,1,58,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,18032.281818,17974.767241,2,99,1,1,0.4,1,1,0,0,0,1,58,1,4,3,NA +68809,7,2,2,2,NA,4,4,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7348.24433,7906.792868,2,95,6,6,0.97,6,6,2,1,0,1,54,1,3,6,NA +68810,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,NA,12291.154515,13189.875012,1,95,2,2,0.66,1,1,0,0,1,1,80,1,3,5,NA +68811,7,2,1,0,7,1,1,1,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6999.189812,6930.018579,3,92,12,12,NA,7,7,2,1,0,2,30,1,2,1,9 +68812,7,2,1,42,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19184.316833,19115.012147,1,97,15,15,2.33,7,7,2,4,0,2,40,2,5,1,4 +68813,7,2,2,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,52701.331723,59294.433957,3,91,3,3,0.73,3,3,1,0,0,2,24,1,2,1,3 +68814,7,2,1,32,NA,4,4,2,NA,NA,2,NA,2,1,3,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,20133.630718,20200.188991,1,93,6,6,1.35,3,3,0,1,0,1,32,2,4,1,4 +68815,7,2,2,10,NA,5,6,2,10,123,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6412.057856,6879.064185,3,91,8,8,2.81,3,3,0,2,0,2,31,1,4,3,NA +68816,7,2,2,58,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,21097.069797,24201.517172,2,90,2,2,0.83,1,1,0,0,0,2,58,1,3,3,NA +68817,7,2,1,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,18353.275855,20231.959782,1,91,12,8,2.15,6,4,1,1,0,2,29,1,4,6,NA +68818,7,2,1,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,7410.50521,7700.344649,2,96,6,6,2.75,1,1,0,0,1,1,62,1,3,3,NA +68819,7,2,1,47,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,52941.648658,53777.896935,2,102,77,77,NA,4,4,0,1,0,1,47,1,2,1,3 +68820,7,2,2,2,NA,2,2,1,2,31,NA,NA,2,1,2,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10164.721409,11222.125211,2,93,5,5,0.87,4,4,1,1,0,1,41,2,5,1,3 +68821,7,2,1,2,NA,4,4,1,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9438.902193,9727.203655,2,102,4,4,0.72,4,4,2,0,0,1,48,1,3,1,3 +68822,7,2,1,3,NA,2,2,2,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15745.774489,16244.197679,2,91,4,4,0.67,5,4,2,0,2,2,66,2,1,1,NA +68823,7,2,1,64,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,7897.700656,8206.595267,2,95,7,7,2.31,2,2,0,0,2,2,63,1,3,1,3 +68824,7,2,2,80,NA,2,2,2,NA,NA,1,2,2,1,9,NA,2,1,NA,2,1,2,1,2,2,1,1,2,NA,18824.116627,20613.042179,2,90,6,6,2.24,2,2,0,0,2,1,75,2,4,1,2 +68825,7,2,1,51,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,1,4,NA,1,2,1,1,2,1,1,2,2,3,16590.074977,17628.593761,1,92,2,2,0.33,5,5,0,1,0,1,51,2,1,4,NA +68826,7,2,1,7,NA,4,4,2,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10409.90361,10390.331705,1,96,3,2,0.16,7,6,1,4,0,2,32,1,2,5,NA +68827,7,2,1,43,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17046.256569,17889.989739,1,96,9,9,4.1,2,2,0,0,0,2,45,2,5,1,5 +68828,7,2,1,24,NA,5,6,2,NA,NA,2,NA,2,1,99,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,10141.381563,10448.082757,3,90,15,15,5,3,3,0,0,0,1,46,2,3,1,3 +68829,7,2,2,0,6,2,2,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7446.889459,7422.354625,2,96,6,6,1.12,4,4,2,0,0,1,27,2,2,6,NA +68830,7,2,2,39,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,13027.332011,13103.987136,2,103,77,77,NA,5,5,0,2,0,2,39,2,5,1,5 +68831,7,2,2,79,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,16612.938931,17191.011953,2,99,14,14,4.96,2,2,0,0,2,2,79,1,5,1,NA +68832,7,2,1,15,NA,5,6,2,15,189,NA,NA,2,2,2,8,NA,NA,NA,1,1,1,1,2,1,1,2,1,NA,6666.045669,7091.184391,3,90,6,6,1.31,3,3,0,1,0,1,49,2,3,1,4 +68833,7,2,1,41,NA,2,2,2,NA,NA,2,NA,77,NA,NA,NA,3,5,NA,2,2,2,2,2,2,2,2,1,2,25035.846455,25142.418345,2,99,99,4,1.61,5,1,0,1,0,1,40,2,1,6,NA +68834,7,2,1,7,NA,4,4,2,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14832.155253,15089.454697,1,97,15,15,5,4,4,1,1,0,1,35,1,5,1,5 +68835,7,1,1,49,NA,2,2,NA,NA,NA,2,NA,2,1,77,NA,1,6,NA,2,2,2,1,2,2,NA,NA,NA,NA,39096.402803,0,2,91,1,1,0.17,4,4,0,1,0,1,49,2,1,6,NA +68836,7,2,1,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,4299.99963,4413.31452,3,90,15,15,4.2,6,6,1,0,2,1,60,1,5,1,4 +68837,7,2,2,35,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,4,2,1,2,2,1,2,2,1,2,2,1,22326.231285,22166.696692,1,99,10,10,2.07,7,7,2,3,1,2,35,1,5,4,NA +68838,7,2,2,7,NA,1,1,1,7,93,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12789.411811,13121.359297,1,102,2,2,0.52,3,3,0,2,0,2,36,2,3,4,NA +68839,7,2,1,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,44041.763669,47615.276255,1,92,10,10,3.67,3,3,0,0,2,1,52,1,4,77,NA +68840,7,2,1,31,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,19117.284298,19018.955883,2,95,7,7,2.58,2,2,0,0,0,2,57,1,4,3,NA +68841,7,2,1,49,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,21158.364877,21090.879552,1,96,7,7,2.75,2,2,0,0,0,1,49,1,4,1,5 +68842,7,2,2,43,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,29670.405171,29530.485322,2,95,4,4,0.76,4,4,0,1,2,1,80,1,1,2,NA +68843,7,2,1,52,NA,1,1,2,NA,NA,2,NA,2,2,2,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,22446.308035,24663.335125,2,94,4,2,0.54,4,2,0,0,0,1,46,NA,NA,1,NA +68844,7,2,1,8,NA,3,3,1,8,100,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,85148.03778,90558.20429,1,98,7,7,1.48,5,5,1,1,0,1,46,1,3,1,3 +68845,7,2,2,75,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,36067.495928,37024.300659,1,95,3,3,1.1,1,1,0,0,1,2,75,1,3,2,NA +68846,7,2,2,49,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,23742.825358,24088.979786,1,100,15,15,5,2,2,0,0,0,1,48,2,5,1,5 +68847,7,2,1,21,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,2,2,2,1,2,2,1,59682.963348,63262.110969,2,102,9,9,3.24,3,3,0,0,0,1,54,2,4,1,4 +68848,7,2,2,4,NA,3,3,2,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20589.729566,21881.111334,1,97,6,6,1.03,6,6,2,2,0,2,38,1,5,1,4 +68849,7,2,1,5,NA,4,4,1,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10040.033098,10346.695479,1,100,3,3,0.52,3,3,1,1,0,2,25,1,3,5,NA +68850,7,2,1,27,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,52698.05363,59566.360508,3,92,7,7,1.65,4,4,1,1,0,1,27,1,3,1,3 +68851,7,2,2,13,NA,1,1,1,13,157,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,18515.058419,19360.671834,2,96,6,6,0.87,6,6,1,3,0,1,46,2,1,1,1 +68852,7,2,1,7,NA,5,6,2,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9720.482616,10533.307174,2,91,7,7,1.56,4,4,1,1,0,2,37,2,5,1,5 +68853,7,2,2,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,27428.65319,29015.297283,2,91,2,2,0.26,4,4,0,1,0,1,20,1,3,5,NA +68854,7,2,1,65,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,1,2,2,1,2,2,NA,10037.214229,10115.728475,2,92,6,6,2.31,2,2,0,0,2,2,65,1,4,5,NA +68855,7,2,1,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,71070.181743,73766.510327,1,95,6,6,1.94,2,2,0,0,2,2,69,1,2,1,4 +68856,7,2,2,14,NA,5,7,1,14,175,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15711.746539,15750.442176,1,98,14,14,3.16,6,6,2,2,0,1,39,1,5,1,5 +68857,7,2,2,12,NA,3,3,2,12,151,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,91797.787708,92992.465808,1,95,14,14,3.8,4,4,0,2,0,2,37,1,5,1,5 +68858,7,2,2,12,NA,2,2,2,12,148,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14437.97544,15197.369043,2,90,8,8,2.24,4,4,1,1,0,2,29,1,4,6,NA +68859,7,2,2,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,85444.349063,86035.699897,1,94,14,14,2.96,5,5,0,3,0,2,39,1,4,1,3 +68860,7,2,1,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,23248.471636,1,95,7,7,1.66,5,5,0,3,0,1,34,1,2,1,4 +68861,7,2,1,8,NA,1,1,1,8,100,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,11159.151566,11057.808004,1,102,2,2,0.31,4,4,1,2,0,2,25,1,2,4,NA +68862,7,2,1,3,NA,5,7,2,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8269.653573,9275.321266,1,101,4,4,0.78,4,4,1,2,0,2,31,1,4,3,NA +68863,7,2,1,4,NA,5,6,2,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6319.053383,6623.952114,1,91,6,6,1.25,4,4,1,1,0,1,26,2,4,6,NA +68864,7,2,2,57,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,1,1,NA,2,2,2,1,2,2,2,2,1,2,29695.385784,37858.742479,2,91,3,3,0.66,2,2,0,0,1,1,69,2,5,1,1 +68865,7,2,1,62,NA,4,4,2,NA,NA,2,NA,2,2,2,NA,4,1,NA,1,2,2,1,2,1,1,2,2,1,8460.109732,8526.287371,1,93,5,5,0.64,7,7,0,2,1,1,21,2,4,5,NA +68866,7,2,1,22,NA,2,2,2,NA,NA,2,NA,2,1,2,NA,4,6,NA,2,2,2,2,2,2,NA,NA,NA,NA,31312.870743,31739.689334,2,90,6,6,0.66,7,7,2,2,0,2,24,2,4,6,NA +68867,7,2,1,12,NA,3,3,1,12,151,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,79897.505873,82468.209663,2,100,6,6,1.7,3,3,0,1,0,2,33,1,4,6,NA +68868,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,47157.788417,52669.767304,2,95,10,10,4.3,2,2,0,0,2,1,80,1,5,1,4 +68869,7,2,2,54,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,156728.884529,156630.502182,1,100,15,15,5,3,3,0,0,0,1,53,1,5,1,4 +68870,7,2,2,0,10,4,4,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4581.358327,4631.381361,2,97,5,5,1.3,3,3,1,0,0,2,46,1,3,1,3 +68871,7,2,1,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,83239.679734,92023.913528,1,90,15,15,5,4,4,0,2,0,1,37,1,5,1,5 +68872,7,2,1,34,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,39040.678458,40784.183756,2,98,2,2,0.35,3,3,0,1,0,2,43,1,3,1,3 +68873,7,2,2,24,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,16131.725974,16820.579069,1,99,2,2,0.66,2,1,0,0,0,2,24,1,5,5,NA +68874,7,2,1,55,NA,2,2,2,NA,NA,2,NA,2,2,8,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,32634.009061,33534.701149,2,91,10,10,4.63,2,2,0,0,0,1,55,2,3,3,NA +68875,7,2,1,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,174520.785302,181786.280703,1,95,7,7,2.86,2,2,0,0,0,2,50,1,3,1,3 +68876,7,2,2,78,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,81416.374938,89993.518978,1,97,15,15,5,2,2,0,0,2,1,78,1,3,1,3 +68877,7,2,1,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,18533.049642,19387.053587,1,99,14,14,3.67,4,4,1,0,0,2,49,1,3,1,3 +68878,7,2,1,2,NA,2,2,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10803.555682,10625.559962,2,94,1,1,0.16,2,2,1,0,0,2,25,2,3,1,NA +68879,7,2,1,2,NA,3,3,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21718.301271,24669.801903,3,91,6,6,1,6,6,1,1,0,2,39,1,1,3,NA +68880,7,2,1,15,NA,5,7,1,15,184,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,59545.101745,58762.542429,1,102,8,8,1.6,7,7,0,4,0,2,39,1,4,1,4 +68881,7,2,2,35,NA,3,3,2,NA,NA,2,NA,2,1,1,NA,2,1,2,1,2,1,1,2,1,NA,NA,NA,NA,19498.713386,20960.418705,2,97,5,5,0.8,5,5,1,2,0,1,46,2,4,1,2 +68882,7,2,2,48,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16313.554006,16926.636803,1,103,8,8,1.95,4,4,0,1,0,2,48,1,5,1,5 +68883,7,1,1,53,NA,2,2,NA,NA,NA,2,NA,2,2,7,NA,2,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,23431.677775,0,2,93,3,3,0.63,3,3,0,1,0,1,53,2,2,1,4 +68884,7,2,2,54,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,24905.670199,26841.707501,2,101,3,3,0.65,3,3,0,1,0,2,54,1,3,5,NA +68885,7,2,2,11,NA,1,1,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12481.057866,12612.274737,1,102,4,4,0.61,5,5,2,2,0,2,27,2,2,5,NA +68886,7,2,2,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,83441.642088,87352.545854,1,99,15,15,5,4,4,0,2,0,2,44,1,5,1,5 +68887,7,2,1,1,19,1,1,1,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12493.910388,13386.323284,3,92,3,3,0.54,4,4,3,0,0,2,22,1,3,5,NA +68888,7,2,1,74,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,NA,9257.537917,9255.717695,1,99,9,9,5,2,1,0,0,2,2,61,1,4,6,NA +68889,7,2,2,3,NA,4,4,2,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8674.686381,9059.886471,2,99,77,77,NA,4,4,1,1,0,2,47,1,2,77,NA +68890,7,2,2,8,NA,1,1,1,8,101,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,22618.378147,23255.85994,2,102,5,5,0.89,4,4,1,1,0,2,28,2,2,1,2 +68891,7,2,2,3,NA,1,1,1,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15308.9293,16274.054403,2,98,4,4,0.48,6,6,2,0,2,2,65,2,1,2,NA +68892,7,2,1,75,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,8431.365248,9201.488687,1,96,10,10,3.04,4,4,0,0,2,1,56,1,3,3,NA +68893,7,2,1,15,NA,4,4,2,15,188,NA,NA,1,1,NA,11,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11023.237662,11433.168046,1,99,15,15,5,4,4,0,2,0,2,46,1,5,1,5 +68894,7,2,1,36,NA,5,7,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,65033.706797,66896.305599,1,102,8,8,1.6,7,7,0,4,0,2,39,1,4,1,4 +68895,7,2,2,74,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,12813.709202,13258.116643,2,103,14,14,3.47,4,4,1,0,1,1,47,2,5,1,5 +68896,7,2,1,4,NA,4,4,2,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8402.233801,8749.415211,2,97,3,3,0.4,6,6,2,3,0,2,25,1,2,5,NA +68897,7,2,2,1,15,5,6,2,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6184.163292,6116.466669,1,91,15,15,3.25,7,7,1,2,0,2,31,1,5,1,5 +68898,7,2,1,3,NA,2,2,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13891.873932,13527.718832,2,97,12,6,0.89,7,7,3,0,0,2,26,2,1,6,NA +68899,7,2,1,48,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,18790.641284,22550.069226,2,100,14,14,5,7,1,1,3,1,2,62,1,3,5,NA +68900,7,2,1,7,NA,3,3,2,7,93,NA,NA,2,2,2,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,49047.972936,54301.226542,1,93,10,10,2.48,5,5,2,1,0,1,40,2,5,1,5 +68901,7,2,2,47,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,112011.120236,112121.110185,2,92,15,15,5,2,1,0,0,0,1,52,1,5,6,NA +68902,7,2,2,78,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,12493.113557,12927.830854,2,99,77,77,NA,1,1,0,0,1,2,78,1,4,2,NA +68903,7,1,2,75,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,89167.746947,0,1,95,5,5,1.43,2,2,0,0,2,1,80,1,3,1,4 +68904,7,2,1,6,NA,4,4,2,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7758.863533,8127.805304,2,99,3,3,0.42,6,6,1,2,0,2,43,1,4,6,NA +68905,7,2,2,1,21,4,4,2,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7618.827213,7815.48207,2,97,6,6,1.7,2,2,1,0,0,2,20,1,4,5,NA +68906,7,2,2,16,NA,1,1,2,16,195,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,12680.621719,13347.583861,2,90,6,6,1.15,5,5,0,2,0,2,47,2,1,1,5 +68907,7,2,2,26,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,3,1,2,2,1,2,2,1,2,2,1,119412.590109,122441.098321,1,92,6,6,1.62,3,3,1,0,0,2,26,1,5,1,5 +68908,7,2,2,59,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,15521.115746,16727.646581,2,100,9,9,2.46,4,4,1,1,1,2,59,1,3,1,3 +68909,7,2,1,16,NA,4,4,2,16,202,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13416.172328,13513.882801,1,96,15,15,5,4,4,1,1,0,1,50,1,3,1,4 +68910,7,2,1,74,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,2,1,NA,2,2,2,1,2,2,1,2,2,NA,17419.161186,17783.358691,1,102,7,7,1.7,4,4,0,0,2,1,44,1,4,4,NA +68911,7,2,1,13,NA,4,4,1,13,164,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17606.165994,17558.40257,2,95,4,4,0.76,4,4,0,1,2,1,80,1,1,2,NA +68912,7,2,2,80,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,1,2,NA,1,1,2,1,2,2,1,2,1,NA,9509.326077,9839.130289,3,91,14,14,2.5,6,6,1,1,1,2,37,2,2,1,5 +68913,7,2,2,27,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,NA,NA,NA,NA,17260.508485,17587.688781,2,101,1,1,0.1,6,6,1,2,1,2,27,1,2,1,2 +68914,7,2,2,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,38093.351998,47908.278929,2,91,4,4,1.19,2,2,0,0,0,1,59,1,1,1,3 +68915,7,2,2,48,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,80418.665565,83274.564983,2,100,8,8,2.91,3,3,0,2,0,2,48,1,5,1,NA +68916,7,2,1,56,NA,1,1,1,NA,NA,2,NA,2,2,2,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,37426.314738,38162.090079,3,92,5,5,1.39,2,2,0,0,0,1,56,2,1,1,1 +68917,7,2,1,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,65331.560138,67202.689613,1,91,14,14,2.44,7,7,2,4,0,1,33,1,5,1,5 +68918,7,2,1,55,NA,1,1,2,NA,NA,2,NA,2,2,2,NA,1,5,NA,2,2,2,2,2,2,2,2,2,2,22446.308035,22401.210337,2,94,77,77,NA,4,4,0,0,0,1,28,2,1,3,NA +68919,7,2,2,37,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,71034.153987,71525.773464,1,98,7,7,1.66,5,5,2,1,0,2,37,1,5,1,3 +68920,7,2,2,48,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17805.920521,18110.342663,2,93,12,12,NA,4,4,0,0,2,1,72,1,2,1,4 +68921,7,2,2,8,NA,3,3,1,8,96,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,45129.675368,44994.093254,1,98,10,10,2.2,6,6,1,3,0,2,31,1,4,6,NA +68922,7,2,2,74,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,88916.414931,91644.282892,1,101,14,14,4.96,2,2,0,0,2,1,74,1,4,1,3 +68923,7,2,1,13,NA,5,6,2,13,157,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11506.937395,12032.024805,1,97,7,7,1.48,5,5,0,1,0,2,46,2,4,1,NA +68924,7,2,1,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,104488.914565,106745.836574,1,98,4,1,0.18,4,1,0,0,0,1,22,1,5,5,NA +68925,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,8286.407536,8892.303775,1,91,3,3,0.98,1,1,0,0,1,1,80,1,3,2,NA +68926,7,2,2,3,NA,5,6,1,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6537.38756,6940.023785,1,92,12,12,NA,7,7,1,2,1,2,45,2,3,1,3 +68927,7,2,1,11,NA,4,4,2,11,137,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10669.652248,10854.743056,2,97,5,5,0.76,5,5,1,2,0,1,32,1,4,6,NA +68928,7,2,1,41,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,19260.892847,19291.900704,2,97,13,13,NA,1,1,0,0,0,1,41,1,2,5,NA +68929,7,2,2,19,NA,2,2,2,19,235,2,NA,2,7,77,13,NA,NA,NA,2,2,2,2,2,2,2,2,1,2,23968.380373,25913.276293,2,91,4,4,0.43,7,7,0,1,1,1,41,2,1,4,NA +68930,7,2,2,6,NA,4,4,1,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8031.102104,8647.40073,2,100,14,4,0.43,7,7,1,3,1,2,62,1,3,5,NA +68931,7,2,1,22,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14424.961621,15273.975784,2,102,8,8,2.01,4,4,0,0,0,1,59,2,4,1,4 +68932,7,2,1,4,NA,4,4,1,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10040.033098,11070.778245,1,100,8,8,2.36,3,3,1,0,0,2,37,1,3,4,NA +68933,7,2,1,72,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,8517.336599,8684.171961,2,100,14,14,5,2,2,0,0,2,2,63,1,5,1,4 +68934,7,2,2,47,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,1,4,NA,2,2,2,2,2,2,1,2,1,2,35815.777398,36002.553631,1,94,2,2,0.27,5,5,0,4,0,2,47,2,1,4,NA +68935,7,2,1,46,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,3,1,NA,2,2,2,2,2,2,2,2,2,2,31233.526521,31366.480522,1,100,8,4,1.43,6,1,1,0,0,1,33,2,3,6,NA +68936,7,2,2,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,26465.930618,28724.649216,2,100,4,4,0.86,3,3,0,2,0,2,36,1,3,5,NA +68937,7,2,1,31,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,19185.622388,22768.505116,2,95,15,10,3.67,5,3,0,0,0,1,47,1,5,1,3 +68938,7,2,1,9,NA,1,1,2,9,109,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,NA,11316.846999,12445.832216,3,91,4,4,1.02,2,2,0,1,0,2,24,2,1,5,NA +68939,7,2,1,7,NA,1,1,1,7,91,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14820.807433,14905.891142,2,102,8,8,1.91,5,5,1,2,0,1,36,2,1,1,4 +68940,7,2,2,2,NA,2,2,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11981.824297,12363.063282,2,91,2,2,0.22,4,4,1,1,0,2,48,2,9,5,NA +68941,7,2,1,0,10,4,4,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6780.545196,6884.61702,1,96,14,14,3.47,4,4,1,1,0,2,27,1,3,6,NA +68942,7,2,2,14,NA,4,4,1,15,180,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12531.903464,12935.738352,2,100,5,5,1.08,3,3,0,1,0,2,50,1,4,3,NA +68943,7,2,2,0,2,4,4,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4099.350341,4144.110416,2,99,7,7,1.74,4,4,2,0,0,2,56,1,2,5,NA +68944,7,2,2,79,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,20341.088581,21872.034752,1,93,2,2,0.64,1,1,0,0,1,2,79,1,4,3,NA +68945,7,2,2,27,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,17568.277926,19062.73336,1,90,2,2,0.56,2,2,1,0,0,2,27,1,3,5,NA +68946,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,82743.954233,85642.054874,2,91,15,15,4.2,6,6,2,0,2,1,63,1,1,1,3 +68947,7,2,2,65,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,10033.449917,10806.84762,2,95,13,13,NA,2,2,0,0,2,2,80,1,1,1,NA +68948,7,2,2,36,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,86578.861495,89037.775585,2,101,7,7,1.88,4,4,0,2,0,2,36,1,4,1,5 +68949,7,2,1,63,NA,1,1,2,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,9145.939054,9651.644596,1,101,5,5,0.87,4,4,0,0,2,1,63,2,1,1,NA +68950,7,2,2,38,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,6,2,1,2,2,1,2,2,1,2,2,1,39561.667842,38494.210933,2,98,7,7,1.33,6,6,0,3,0,1,31,1,3,6,NA +68951,7,2,1,4,NA,1,1,1,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16775.083123,16973.400581,3,92,4,4,0.6,6,6,2,2,0,2,24,1,3,6,NA +68952,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,56397.521122,64147.609457,2,91,7,7,2.51,2,2,0,0,1,2,80,1,5,2,NA +68953,7,2,2,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,2,1,2,2,1,2,2,1,2,2,1,24919.497762,27697.191423,1,101,4,4,0.78,4,4,1,2,0,2,32,1,3,3,NA +68954,7,2,2,1,22,4,4,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10927.526143,11412.763748,2,102,2,2,0.36,4,4,1,2,0,2,36,1,3,5,NA +68955,7,2,1,59,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,32720.69734,32680.7991,1,95,15,15,3.62,7,7,2,4,0,1,59,1,5,1,2 +68956,7,2,2,14,NA,3,3,1,14,177,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,28004.718304,30534.517049,1,94,7,7,1.21,6,6,2,2,0,1,31,1,2,6,NA +68957,7,2,1,12,NA,4,4,2,13,156,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11351.725436,11256.943498,2,95,6,6,1.85,2,2,0,1,0,2,48,1,4,5,NA +68958,7,2,1,12,NA,1,1,1,12,155,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,30061.88611,32302.400867,1,92,10,10,3.04,4,4,1,1,0,1,32,1,3,1,2 +68959,7,2,2,43,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,17777.254625,18901.462969,2,99,NA,77,NA,3,1,0,0,1,1,63,1,3,5,NA +68960,7,2,2,10,NA,2,2,1,10,128,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,2,2,2,1,2,2,1,15897.166957,16345.216522,2,102,4,4,0.57,6,6,2,3,0,2,26,2,3,1,NA +68961,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,117419.769432,119338.215671,2,95,15,15,5,2,2,0,0,2,1,70,1,5,1,5 +68962,7,2,1,14,NA,5,6,2,14,177,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11506.937395,12032.024805,1,97,15,15,5,3,3,0,1,0,2,45,2,5,1,5 +68963,7,2,2,2,NA,4,4,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6306.491784,7047.470907,2,90,6,6,1.34,4,4,1,0,0,1,38,2,4,6,NA +68964,7,2,1,20,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,19130.866571,19325.235628,2,99,6,6,2.95,1,1,0,0,0,1,20,1,3,5,NA +68965,7,2,2,2,NA,3,3,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20820.221848,22126.060024,1,101,5,5,0.89,5,5,1,0,0,1,25,1,2,77,NA +68966,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,11696.973403,11982.125462,2,97,1,1,0,2,2,0,0,1,2,63,1,4,5,NA +68967,7,2,1,20,NA,4,4,1,NA,NA,2,NA,2,1,5,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,26503.609729,2,101,3,1,0,2,1,0,0,0,1,20,2,4,5,NA +68968,7,2,1,12,NA,1,1,1,12,151,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22768.423624,22944.003607,3,92,4,4,0.81,3,3,0,2,0,2,31,1,3,1,NA +68969,7,2,1,0,1,1,1,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7040.676479,6971.095244,1,98,9,1,0.04,7,1,3,2,0,2,32,1,4,1,4 +68970,7,2,1,11,NA,1,1,2,11,136,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13285.093011,13153.997889,2,94,7,7,1.88,4,4,0,2,0,2,28,1,4,4,NA +68971,7,2,2,21,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,42583.505439,50840.190326,2,91,4,4,0.69,4,4,2,0,0,2,21,1,3,6,NA +68972,7,2,2,60,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,11771.283932,12488.02543,1,90,9,9,2.07,5,5,0,0,1,1,46,2,4,1,3 +68973,7,2,1,2,NA,4,4,2,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6837.992772,6921.398172,1,96,15,15,4.34,4,4,1,1,0,1,39,2,5,1,5 +68974,7,2,1,10,NA,4,4,1,10,123,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10311.165779,10434.780551,2,98,8,8,2.43,3,3,0,2,0,2,31,1,4,1,NA +68975,7,2,2,3,NA,3,3,1,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,85197.465687,87881.529962,1,92,15,15,5,4,4,2,0,0,2,46,1,5,1,5 +68976,7,2,1,44,NA,5,7,2,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,12254.763576,12210.492386,2,90,3,3,0.65,5,3,1,2,0,1,44,2,5,1,5 +68977,7,2,1,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,1,2,1,2,2,1,2,2,NA,13251.84987,13511.423639,2,95,4,4,0.76,4,4,0,1,2,1,80,1,1,2,NA +68978,7,2,1,39,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,113559.363135,120841.424294,2,102,15,15,5,4,4,0,2,0,1,39,1,4,1,5 +68979,7,2,2,36,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,71034.153987,74518.988514,1,98,14,14,4.12,4,4,0,2,0,2,36,1,5,1,3 +68980,7,2,1,20,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,2,1,NA,2,2,2,2,2,2,1,2,2,1,35669.2076,36620.108921,2,94,77,77,NA,4,4,0,0,0,1,28,2,1,3,NA +68981,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,NA,40870.323556,45647.400007,2,99,15,15,5,1,1,0,0,1,2,80,1,5,3,NA +68982,7,2,2,27,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,3,1,1,1,2,2,1,2,2,1,2,2,1,17858.942687,18621.550977,1,97,15,15,4.84,6,6,2,0,0,1,53,NA,NA,1,NA +68983,7,2,1,67,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,11568.876339,11794.347884,1,102,6,6,2.75,1,1,0,0,1,1,67,1,5,3,NA +68984,7,2,2,0,9,3,3,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21671.775435,21083.121886,1,101,9,9,2.39,4,4,1,0,0,2,57,1,2,77,NA +68985,7,2,2,16,NA,4,4,1,16,201,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13895.342981,13964.270792,2,102,5,5,0.67,6,6,0,4,0,2,33,1,2,6,NA +68986,7,2,2,18,NA,4,4,2,19,228,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,21329.621847,2,101,12,12,NA,4,4,0,0,0,1,57,1,3,1,3 +68987,7,2,1,9,NA,2,2,2,9,109,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15882.795076,15889.831109,1,97,3,3,0.44,5,5,2,2,0,2,26,1,4,4,NA +68988,7,2,1,72,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,4,NA,1,2,2,1,2,2,1,2,2,NA,8497.912951,8999.288803,2,100,2,2,0.73,1,1,0,0,1,1,72,1,1,4,NA +68989,7,2,1,34,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,13601.994691,14463.629992,2,100,9,9,2.46,4,4,0,2,0,2,36,2,4,1,3 +68990,7,2,1,53,NA,1,1,2,NA,NA,2,NA,2,1,7,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,22446.308035,22116.943066,2,91,10,10,2.95,4,4,0,1,0,2,18,1,3,NA,NA +68991,7,2,1,40,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,2,1,NA,2,2,2,2,2,2,1,2,2,2,34152.149953,40351.498105,1,90,5,5,1,4,4,0,2,0,1,40,2,2,1,1 +68992,7,2,2,53,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,17533.239798,17053.396696,2,99,3,3,0.44,5,5,1,1,0,2,53,1,4,1,3 +68993,7,2,2,34,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,19005.010125,19043.816772,1,92,2,2,0.24,5,5,0,2,0,1,35,2,4,1,3 +68994,7,2,1,19,NA,3,3,1,19,233,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,74191.50646,74370.660378,1,98,8,8,2.62,3,3,0,0,0,1,50,NA,NA,3,NA +68995,7,2,1,75,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,60942.568495,64726.722108,1,94,14,14,5,2,2,0,0,2,2,72,1,5,1,5 +68996,7,2,1,24,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,4,5,NA,1,2,2,1,2,2,1,2,2,NA,14385.653726,15564.966804,2,101,7,5,1.84,2,1,0,0,0,1,23,2,4,5,NA +68997,7,2,1,7,NA,2,2,1,7,88,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13599.766245,14493.354345,2,93,3,3,0.48,4,4,1,1,0,1,49,2,3,1,4 +68998,7,2,2,43,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,15206.604563,15895.992431,2,90,15,15,5,4,4,1,1,0,1,53,2,5,1,5 +68999,7,2,1,2,NA,4,4,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8022.100831,8353.575096,2,102,4,4,0.53,6,6,2,2,0,2,27,1,2,1,2 +69000,7,2,1,13,NA,3,3,2,13,163,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,26824.630008,26472.092796,1,95,1,1,0.12,3,3,0,2,0,2,40,1,5,3,NA +69001,7,2,1,68,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,6038.685119,6274.869975,1,96,6,6,1.57,3,3,0,0,1,1,42,1,3,NA,NA +69002,7,2,1,8,NA,1,1,1,8,106,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,14007.413517,2,98,15,15,4.97,5,5,0,3,0,1,39,1,5,1,5 +69003,7,2,1,7,NA,2,2,2,7,88,NA,NA,2,2,3,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9390.522479,10327.334743,2,90,2,2,0.32,4,4,1,2,0,2,34,2,1,77,NA +69004,7,2,1,16,NA,1,1,2,16,200,NA,NA,1,1,NA,9,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,18120.499457,19917.650851,1,90,1,1,0.02,5,5,0,1,0,2,39,2,1,1,2 +69005,7,2,2,77,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,60723.387753,62586.321432,1,96,14,14,5,2,2,0,0,2,1,74,1,5,1,4 +69006,7,2,2,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,NA,NA,NA,NA,95214.22557,95506.977757,1,97,15,15,3.89,5,5,0,2,0,1,50,1,4,6,NA +69007,7,2,1,55,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,2,4,NA,2,2,2,NA,NA,NA,2,2,1,2,24211.824535,26078.926124,2,93,3,3,0.43,4,4,0,0,0,1,45,2,2,6,NA +69008,7,1,2,65,NA,2,2,NA,NA,NA,2,NA,2,1,8,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,9325.158469,0,1,93,15,15,5,2,2,0,0,2,2,65,2,4,1,5 +69009,7,2,1,56,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18416.819037,18519.889723,2,96,8,8,3.67,2,2,0,0,0,1,56,2,5,1,5 +69010,7,2,2,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,13799.782578,14328.748539,2,103,7,7,1.48,5,5,0,1,1,2,80,1,4,3,NA +69011,7,2,2,59,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,170311.62251,172039.874492,1,94,15,15,5,4,3,0,0,1,1,33,1,2,5,NA +69012,7,2,1,1,14,2,2,2,NA,14,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8331.647763,8430.145707,2,90,8,8,1.72,5,5,1,2,0,1,20,2,1,1,2 +69013,7,2,1,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,18876.596264,19790.278241,1,101,3,3,0.44,5,5,0,3,0,1,35,1,3,1,4 +69014,7,2,1,78,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,60942.568495,64726.722108,1,91,10,10,4.3,2,2,0,0,2,1,78,1,4,1,4 +69015,7,2,1,0,1,2,2,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,7328.080818,7255.659238,2,102,15,15,2.43,7,7,3,2,0,1,28,2,5,1,4 +69016,7,2,2,13,NA,4,4,1,13,159,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14166.687432,14745.171396,1,100,1,1,0,4,4,1,2,0,2,35,1,2,5,NA +69017,7,2,1,58,NA,4,4,2,NA,NA,2,NA,2,2,7,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,16695.743237,21107.492082,3,90,12,12,NA,2,2,0,0,0,2,56,2,4,1,5 +69018,7,2,1,39,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,16058.989596,16964.610342,2,97,2,2,0.21,7,7,2,3,0,2,32,1,4,5,NA +69019,7,2,2,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,138322.767578,151356.795097,1,101,13,13,NA,2,2,0,0,0,1,40,1,2,1,3 +69020,7,2,1,42,NA,1,1,1,NA,NA,2,NA,2,1,8,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,35551.992846,35164.643493,3,91,15,14,4.03,5,4,2,0,0,1,42,2,4,1,5 +69021,7,2,1,18,NA,1,1,1,18,220,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,27186.265479,27336.512389,1,95,4,4,0.68,5,5,0,1,0,2,38,2,3,4,NA +69022,7,2,2,48,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,42424.587753,44729.774642,2,102,4,4,1.16,2,2,0,0,0,1,48,1,4,1,4 +69023,7,2,2,7,NA,5,7,1,7,90,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14676.903347,14493.499136,1,102,5,5,1.27,3,3,0,2,0,2,38,1,2,3,NA +69024,7,2,1,65,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,11764.405491,12074.424659,1,97,4,4,1.34,1,1,0,0,1,1,65,1,5,3,NA +69025,7,2,1,60,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,11937.570805,12597.633777,2,96,5,5,0.68,6,6,0,3,2,1,60,2,1,1,1 +69026,7,2,2,1,19,1,1,1,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9469.751474,10066.755659,1,102,13,13,NA,6,6,1,2,0,2,36,2,4,6,NA +69027,7,2,1,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,96536.931937,99301.799305,2,91,15,15,4.2,6,6,2,0,2,1,63,1,1,1,3 +69028,7,2,1,0,5,1,1,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,7757.493251,8192.524698,3,92,8,8,1.45,6,6,2,0,0,2,58,2,5,1,9 +69029,7,2,2,6,NA,4,4,2,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7631.175557,7797.231767,1,93,1,1,0.02,5,5,0,4,0,2,36,NA,NA,5,NA +69030,7,1,1,36,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,22188.836739,0,1,101,13,13,NA,3,3,1,0,0,2,20,1,2,6,NA +69031,7,2,1,22,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,3,5,NA,1,2,2,1,2,2,1,2,2,3,15196.92397,17064.888752,2,101,9,6,2.3,4,1,0,0,0,1,22,2,3,5,NA +69032,7,2,2,64,NA,1,1,1,NA,NA,2,NA,2,7,77,NA,1,2,NA,2,2,2,1,2,2,NA,NA,NA,NA,6287.91334,6572.718606,2,92,12,12,NA,7,7,0,1,2,2,64,2,1,2,NA +69033,7,2,1,2,NA,3,3,2,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,50064.977404,56868.769772,1,95,14,14,2.98,5,5,1,2,0,1,33,1,4,1,5 +69034,7,2,1,33,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,2,6,NA,2,2,2,1,2,2,2,2,1,2,36418.962534,36838.502529,2,93,4,4,0.56,5,5,0,2,0,1,37,NA,NA,1,1 +69035,7,2,1,68,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,121588.761604,120347.630506,1,91,14,14,5,2,2,0,0,2,1,68,1,4,1,4 +69036,7,2,1,76,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,8764.234393,9213.337166,2,98,77,77,NA,2,2,0,0,2,2,70,1,3,1,2 +69037,7,2,1,41,NA,1,1,1,NA,NA,2,NA,2,1,4,NA,2,1,NA,2,2,2,2,2,2,1,2,2,2,37402.70356,39181.236579,2,102,15,15,5,3,3,1,0,0,1,41,2,2,1,5 +69038,7,2,2,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,99381.891022,103468.309204,2,92,15,15,5,2,1,0,0,0,2,29,1,5,1,NA +69039,7,1,1,39,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,83081.99261,0,2,92,15,15,5,2,1,0,0,0,1,41,1,4,5,NA +69040,7,2,2,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,92399.914697,92845.296279,2,92,15,15,5,5,5,0,3,0,2,46,1,5,1,5 +69041,7,2,1,8,NA,4,4,1,8,101,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10311.165779,10516.173846,2,98,4,4,1.29,2,2,0,1,0,2,27,1,2,5,NA +69042,7,2,1,33,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,3,6,NA,2,2,2,1,2,2,1,2,2,1,43108.74283,43605.347908,2,91,7,7,1.29,6,6,2,2,0,1,33,2,3,6,NA +69043,7,2,1,5,NA,5,6,2,5,64,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4526.77644,4888.797036,3,90,12,12,NA,5,5,1,2,0,1,37,2,5,1,5 +69044,7,2,2,0,8,2,2,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,7135.777159,7112.267328,1,90,7,7,1.56,4,4,1,1,0,2,37,1,2,77,NA +69045,7,2,2,65,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,4,1,NA,2,2,2,1,2,2,1,2,2,2,13676.984152,14509.761798,2,91,8,8,3.3,2,2,0,0,2,1,65,NA,NA,1,4 +69046,7,2,1,3,NA,5,6,2,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8402.098771,9423.873047,3,91,7,7,2.16,3,3,1,0,1,2,36,2,5,1,NA +69047,7,2,2,61,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,98514.948291,98181.677236,1,99,15,15,5,2,2,0,0,2,1,73,1,4,1,5 +69048,7,2,2,11,NA,3,3,1,11,143,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18800.96526,18551.296274,1,94,6,6,1.26,5,5,0,2,0,2,38,1,4,1,NA +69049,7,2,1,71,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,1,2,1,2,2,1,1,2,NA,10776.442569,10987.528707,1,96,7,7,1,7,7,2,1,1,2,53,1,4,1,3 +69050,7,2,2,1,22,3,3,2,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,39879.891371,42381.146401,1,91,14,14,3.06,5,5,2,0,0,2,30,1,5,1,5 +69051,7,2,2,5,NA,3,3,2,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,51483.624552,53105.566664,1,94,15,15,5,4,4,2,0,0,1,51,2,5,1,5 +69052,7,1,2,40,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,1,3,1,2,2,1,2,2,NA,NA,NA,NA,115926.402585,0,1,101,15,15,5,4,4,0,2,0,2,40,1,4,1,3 +69053,7,2,2,43,NA,2,2,2,NA,NA,2,NA,2,1,1,NA,3,6,2,2,2,2,2,2,2,2,2,2,2,34503.935186,34683.870273,2,91,12,12,NA,5,5,0,1,1,2,43,2,3,6,NA +69054,7,2,1,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,19633.637051,21699.358471,2,95,5,5,1.05,3,3,0,1,0,1,43,1,3,1,2 +69055,7,2,2,28,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,53638.260635,56251.206542,2,96,7,7,2.38,2,2,0,0,0,1,29,1,3,1,4 +69056,7,2,1,39,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,13593.59406,14525.996816,2,90,5,5,1.08,3,3,0,1,0,2,29,2,4,1,5 +69057,7,2,2,39,NA,1,1,1,NA,NA,2,NA,2,7,77,NA,4,1,2,2,2,2,1,2,2,NA,NA,NA,NA,29148.354549,28361.870708,2,92,12,12,NA,7,7,0,1,2,2,64,2,1,2,NA +69058,7,2,1,43,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,6,NA,2,2,1,1,2,2,NA,NA,NA,NA,37324.655911,39220.856657,1,102,5,5,0.86,5,5,2,0,0,2,21,2,2,5,NA +69059,7,2,1,2,NA,4,4,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5662.231921,6243.536586,1,99,13,13,NA,4,4,1,0,0,2,26,1,4,4,NA +69060,7,2,1,20,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,137038.746155,146586.432966,2,101,3,3,0.92,2,1,0,0,0,1,21,1,4,5,NA +69061,7,2,2,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,26105.275101,26256.723843,1,98,9,9,4.03,2,2,0,1,0,2,49,1,5,3,NA +69062,7,2,2,70,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,2,NA,1,2,1,1,2,1,1,2,1,NA,10831.995402,11374.881633,3,90,77,77,NA,4,4,0,0,2,1,69,2,5,2,NA +69063,7,2,1,2,NA,1,1,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,10803.555682,11221.222518,2,91,6,6,1,6,6,1,3,0,2,35,2,2,1,1 +69064,7,2,2,6,NA,4,4,1,6,81,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11195.065587,11390.355382,2,96,2,2,0.31,4,4,0,2,0,2,30,NA,NA,6,NA +69065,7,2,1,28,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,14313.345971,15171.949804,3,91,15,6,2.75,2,1,0,0,0,2,30,NA,NA,6,NA +69066,7,2,1,74,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,60942.568495,64726.722108,1,94,9,9,3.97,2,2,0,0,2,1,74,1,4,1,4 +69067,7,2,1,53,NA,5,6,1,NA,NA,2,NA,2,2,77,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16590.074977,17115.36835,1,92,12,12,NA,4,4,0,1,0,1,53,2,5,1,4 +69068,7,2,2,39,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,1,1,2,2,1,2,2,1,2,2,1,16614.865368,17238.21833,3,91,15,15,5,3,3,1,0,0,2,39,2,5,1,5 +69069,7,2,1,11,NA,4,4,1,11,136,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10269.191209,10473.364732,2,102,5,5,0.67,6,6,0,4,0,2,33,1,2,6,NA +69070,7,2,2,4,NA,5,6,2,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6683.092466,6676.033575,3,91,6,6,1.34,4,4,1,1,0,1,36,2,4,1,NA +69071,7,2,1,4,NA,5,6,2,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7580.437211,8294.186048,2,92,12,12,NA,7,7,2,4,0,1,54,2,2,1,5 +69072,7,1,2,0,3,5,6,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6440.205112,0,2,103,14,14,3.86,4,4,2,0,0,2,37,2,5,1,NA +69073,7,2,2,3,NA,5,6,1,3,44,NA,NA,2,2,1,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8052.137959,8043.633038,1,100,7,7,2.2,3,3,1,0,0,2,28,2,2,1,3 +69074,7,2,1,19,NA,1,1,2,19,237,2,NA,2,2,4,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,26704.187335,28443.712885,1,95,9,9,2.46,4,4,0,0,0,1,42,2,2,1,3 +69075,7,2,1,2,NA,4,4,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5163.141902,5376.483607,2,99,13,13,NA,5,5,2,0,0,2,21,1,3,5,NA +69076,7,2,1,18,NA,3,3,1,18,220,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,24446.632088,24751.360191,1,94,4,4,0.79,3,3,0,1,0,1,49,1,2,3,NA +69077,7,2,2,58,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,35630.227837,37045.269032,2,102,6,6,2.75,1,1,0,0,0,2,58,1,2,3,NA +69078,7,2,2,59,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,27617.038547,29182.278408,1,93,14,14,5,2,2,0,0,0,2,59,2,5,5,NA +69079,7,2,1,32,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,18683.628164,19333.389409,1,98,5,4,1.7,3,1,0,0,0,1,32,1,5,5,NA +69080,7,2,1,16,NA,4,4,1,16,202,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,18413.211403,2,101,4,4,0.86,3,3,0,1,0,2,18,1,2,NA,NA +69081,7,2,2,23,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,15992.133387,15205.505219,2,95,15,15,3.85,7,7,0,3,1,2,62,1,4,2,NA +69082,7,2,1,8,NA,1,1,2,8,97,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14134.674028,14215.818763,1,96,8,8,2.62,3,3,0,1,0,1,41,2,3,1,5 +69083,7,2,1,14,NA,3,3,2,14,171,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20051.633575,19994.174398,2,97,5,5,0.84,5,5,0,2,0,2,33,1,4,1,3 +69084,7,2,1,2,NA,4,4,1,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10232.679671,10545.226242,2,101,1,1,0.21,4,4,1,2,0,2,26,1,3,5,NA +69085,7,2,2,49,NA,2,2,1,NA,NA,2,NA,2,1,4,NA,2,5,NA,2,2,2,1,2,2,1,2,2,2,28065.587512,28753.959014,2,93,4,4,0.56,5,5,0,0,0,2,49,2,2,5,NA +69086,7,2,2,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,104934.725755,111516.131691,2,98,10,10,4.42,2,2,0,0,0,1,25,1,5,1,5 +69087,7,2,2,15,NA,3,3,2,15,189,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,36586.371708,39087.782259,1,98,3,3,0.5,5,5,0,3,0,2,56,1,3,3,NA +69088,7,2,2,67,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,4,1,NA,2,2,2,1,2,2,2,2,2,2,10614.141896,11057.12801,2,93,4,4,0.99,2,2,0,0,2,1,72,2,3,1,4 +69089,7,2,1,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,12170.074646,12490.78413,1,97,9,7,3.36,2,1,0,0,1,1,61,1,5,3,NA +69090,7,2,1,2,NA,3,3,2,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20538.767297,24065.985233,2,95,3,3,0.52,3,3,1,0,0,1,37,1,4,1,4 +69091,7,2,1,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,132969.642582,135979.403496,2,94,8,8,4.59,1,1,0,0,0,1,52,1,3,3,NA +69092,7,2,2,0,4,3,3,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16423.151355,17405.028297,1,99,15,15,5,3,3,1,0,0,2,31,1,5,1,5 +69093,7,2,1,16,NA,5,6,2,16,201,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9099.599144,9725.105491,1,90,9,9,2.6,4,4,0,1,0,2,49,2,2,1,5 +69094,7,2,1,3,NA,3,3,1,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21357.821814,24096.698658,2,96,3,3,0.53,5,5,3,0,0,2,26,1,4,1,4 +69095,7,2,1,1,14,1,1,1,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12387.68972,13272.515505,1,94,6,6,1.3,4,4,2,0,0,1,24,2,1,1,4 +69096,7,1,1,12,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13731.625553,0,1,100,8,8,1.95,4,4,0,2,0,2,42,1,4,1,4 +69097,7,2,1,31,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,2,1,NA,2,2,2,1,2,2,2,2,2,2,36946.697686,39295.803447,1,98,3,3,0.4,7,7,2,3,0,2,31,2,5,1,2 +69098,7,2,1,14,NA,1,1,1,14,178,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18242.832494,18343.652859,1,102,6,6,1.34,4,4,0,1,0,2,48,2,3,1,1 +69099,7,2,1,6,NA,4,4,2,6,82,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9399.447563,9986.021526,2,90,7,7,1.61,4,4,1,1,1,2,65,1,3,2,NA +69100,7,1,2,77,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,14125.505993,0,1,99,14,14,5,2,2,0,0,2,1,79,NA,NA,1,5 +69101,7,2,1,58,NA,2,2,2,NA,NA,2,NA,2,2,5,NA,1,4,NA,2,2,2,2,2,2,2,2,2,2,25042.846308,24675.381142,1,93,14,6,2.94,5,1,1,0,0,2,22,2,1,6,NA +69102,7,2,2,61,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,3,1,NA,1,2,2,1,2,2,2,2,2,2,5852.076897,6537.978856,2,90,7,7,0.89,7,7,1,3,3,1,60,2,3,1,3 +69103,7,2,1,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,1,2,2,1,2,2,1,16995.648055,16598.645683,2,100,3,3,0.27,7,7,2,1,0,2,41,1,2,5,NA +69104,7,2,1,5,NA,1,1,2,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14993.478359,14600.446315,1,97,6,3,0.63,7,3,2,1,0,1,29,2,2,1,1 +69105,7,2,1,55,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,1,5,NA,2,2,2,2,2,2,1,2,2,2,37557.946192,37006.841228,2,102,5,5,1.08,3,3,0,0,0,1,55,2,1,5,NA +69106,7,2,2,0,8,5,7,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7434.184594,7384.233557,1,93,15,15,5,3,3,1,0,0,2,34,1,5,1,5 +69107,7,2,1,69,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,7323.703412,7380.991723,2,100,4,4,0.97,3,3,0,0,3,2,80,1,5,2,NA +69108,7,2,1,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,20891.980831,22154.880695,2,97,5,5,1.08,3,3,0,0,0,1,38,1,4,5,NA +69109,7,2,1,1,19,1,1,1,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11793.948458,12249.904005,1,100,5,5,0.65,6,6,1,2,0,1,32,2,2,1,2 +69110,7,2,2,56,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,24870.513993,25282.537357,2,98,8,8,3.06,2,2,0,1,0,2,56,1,3,3,NA +69111,7,2,1,9,NA,3,3,2,9,110,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,40950.556053,42121.524492,2,95,15,15,4.77,4,4,0,2,0,2,36,1,4,1,5 +69112,7,2,2,23,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,16929.836231,18556.498474,2,101,4,3,1.1,2,1,0,0,0,2,23,2,4,5,NA +69113,7,2,2,8,NA,3,3,1,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,51531.402068,50847.086518,1,94,6,6,1.57,3,3,0,1,0,2,28,1,4,1,4 +69114,7,2,2,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,NA,NA,NA,NA,NA,NA,NA,84761.905629,91116.013503,1,95,NA,NA,NA,5,5,0,2,0,2,37,1,3,1,NA +69115,7,2,1,79,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,8517.336599,8684.171961,2,100,4,4,1.02,2,2,0,0,1,1,79,1,1,2,NA +69116,7,2,1,14,NA,3,3,2,14,170,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,88198.948426,91036.751291,1,98,10,10,3.04,4,4,0,2,0,2,47,1,4,1,3 +69117,7,2,1,28,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,21399.459455,20960.973241,1,90,15,15,4.34,4,4,0,0,1,1,62,2,5,1,3 +69118,7,2,1,11,NA,5,6,2,11,133,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,1,1,2,2,NA,4852.395137,5120.382571,1,99,6,6,1.07,6,6,2,1,2,1,44,2,5,4,NA +69119,7,2,1,16,NA,4,4,1,16,203,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,15008.400374,15566.530345,1,100,7,7,2.78,2,2,0,1,0,2,37,2,4,5,NA +69120,7,2,2,39,NA,4,4,2,NA,NA,2,NA,2,2,3,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,27303.803575,27591.614113,1,96,3,3,0.43,4,4,1,1,0,2,39,2,4,1,3 +69121,7,2,2,5,NA,5,7,2,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27257.164734,28966.726071,1,92,3,3,0.46,5,5,2,1,0,1,30,1,3,1,2 +69122,7,2,1,16,NA,4,4,2,16,197,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10732.729133,10991.603368,3,90,10,10,3.67,3,3,0,1,0,2,52,2,3,5,NA +69123,7,2,2,63,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,128590.415432,131137.819973,1,102,15,15,5,2,2,0,0,2,2,63,1,4,1,4 +69124,7,2,2,19,NA,5,6,2,20,NA,2,NA,2,2,2,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10353.070387,10520.518116,2,101,5,3,1.1,2,1,0,0,0,1,29,2,4,1,NA +69125,7,2,2,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,133853.800452,137185.265763,3,91,5,5,1.93,1,1,0,0,0,2,49,1,3,3,NA +69126,7,2,1,3,NA,3,3,1,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,77607.205684,90935.051698,1,98,6,6,1.31,3,3,1,0,0,1,30,1,5,1,5 +69127,7,2,1,8,NA,3,3,1,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,54897.892683,58386.014643,2,98,7,7,1.61,4,4,1,1,0,1,43,NA,NA,6,NA +69128,7,2,1,8,NA,4,4,1,8,97,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9184.716222,9199.031651,1,102,1,1,0,5,5,0,3,0,2,41,1,4,1,4 +69129,7,2,1,18,NA,2,2,1,18,225,2,NA,2,2,2,66,NA,NA,NA,2,2,2,2,2,2,1,2,2,2,22721.243258,22896.459408,2,102,8,8,1.09,7,7,1,3,0,2,33,2,1,6,NA +69130,7,2,1,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,31540.022655,39899.113022,2,91,3,3,0.86,2,2,0,0,0,2,41,1,4,1,3 +69131,7,2,2,25,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,22941.698794,22487.759922,2,96,5,5,1.08,3,3,0,0,0,1,50,1,3,1,4 +69132,7,2,1,6,NA,4,4,2,6,78,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11277.594097,11665.009628,1,90,5,5,0.74,5,5,0,2,0,2,18,1,4,NA,NA +69133,7,2,2,1,20,4,4,2,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7623.406054,7742.751523,1,97,4,4,0.46,7,7,3,3,0,2,31,1,3,1,NA +69134,7,2,2,2,NA,4,4,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5687.793894,6002.477845,2,90,8,5,1.36,5,2,2,0,0,2,25,2,3,5,NA +69135,7,1,1,40,NA,5,6,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,17210.953493,0,1,91,15,15,5,5,5,2,1,0,1,40,1,5,1,5 +69136,7,2,2,36,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,4,6,2,2,2,2,2,2,2,1,2,2,2,36453.846815,35470.245447,1,102,13,13,NA,6,6,1,2,0,2,36,2,4,6,NA +69137,7,2,1,56,NA,4,4,2,NA,NA,2,NA,2,2,8,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,15760.921402,15756.795819,3,90,15,15,5,4,4,0,0,0,1,56,2,4,1,5 +69138,7,2,2,16,NA,3,3,1,16,195,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71832.578284,73443.760792,1,92,8,8,1.45,6,6,1,3,0,1,36,1,3,1,4 +69139,7,2,2,17,NA,2,2,1,17,209,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,2,2,2,NA,NA,NA,NA,27070.679378,27847.54398,1,92,6,6,0.93,5,5,0,2,0,1,47,2,1,1,1 +69140,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,44862.41765,53799.51702,1,92,7,7,2.64,2,2,0,0,2,1,80,1,3,1,3 +69141,7,2,1,80,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,8497.912951,8999.288803,1,96,6,6,1.82,2,2,0,0,2,1,80,1,1,1,9 +69142,7,2,1,72,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,38226.070503,39676.327756,1,99,9,9,5,1,1,0,0,1,1,72,1,5,2,NA +69143,7,2,2,10,NA,4,4,1,10,130,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7489.549692,8064.290136,2,100,8,8,1.1,7,7,3,3,0,2,58,1,3,5,NA +69144,7,2,2,71,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,14936.784897,17875.873101,2,94,5,5,2.2,1,1,0,0,1,2,71,1,3,2,NA +69145,7,2,1,18,NA,1,1,1,18,217,2,NA,1,1,NA,66,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,15469.666055,15805.71937,2,92,12,12,NA,7,7,0,1,2,2,64,2,1,2,NA +69146,7,2,1,41,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,28361.113525,28406.771707,1,97,8,8,4.7,1,1,0,0,0,1,41,1,3,3,NA +69147,7,2,1,39,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,1,NA,1,2,2,1,2,1,1,2,2,1,17605.619977,18470.699991,3,91,15,15,5,3,3,0,1,0,2,40,2,5,1,5 +69148,7,2,1,5,NA,3,3,2,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,73006.119819,83450.013654,1,101,6,6,0.97,7,7,2,1,0,1,43,1,2,1,NA +69149,7,2,1,62,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,7791.74373,7821.068134,1,91,7,7,3.54,1,1,0,0,1,1,62,1,3,3,NA +69150,7,2,2,6,NA,4,4,2,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8757.841043,9104.895674,2,101,5,5,1.03,4,4,1,1,0,2,31,1,3,5,NA +69151,7,2,1,57,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,119599.307776,123574.267321,1,93,15,15,4.59,4,4,0,1,0,1,57,1,5,1,5 +69152,7,2,1,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,17420.978407,17743.990528,2,97,3,3,0.82,2,2,0,0,0,1,24,1,3,5,NA +69153,7,2,1,10,NA,4,4,2,10,124,NA,NA,2,1,3,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10229.206765,10406.656985,1,96,9,9,2.78,4,4,0,2,0,1,54,2,5,4,NA +69154,7,2,1,66,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,29999.543427,32474.673757,2,91,2,2,0.56,1,1,0,0,1,1,66,1,3,3,NA +69155,7,2,2,9,NA,3,3,2,9,110,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,74327.830279,75742.187091,2,91,6,6,1.34,4,4,1,2,0,2,33,1,4,3,NA +69156,7,2,1,12,NA,3,3,1,12,155,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,84860.612577,85065.529717,2,98,15,15,5,4,4,0,2,0,1,48,1,3,1,4 +69157,7,2,2,21,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,3,1,2,2,1,2,2,1,2,2,1,112960.559471,113970.503883,1,94,99,99,NA,5,5,1,1,0,2,21,1,3,5,NA +69158,7,2,1,44,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,32719.762791,33772.872733,1,91,1,1,0.2,2,2,0,0,0,1,44,1,2,1,2 +69159,7,2,2,22,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,5,5,2,2,2,2,2,2,2,1,2,2,1,35424.746838,36356.556801,2,93,NA,3,0.98,4,1,0,0,0,1,28,NA,NA,4,NA +69160,7,2,1,17,NA,1,1,1,17,208,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,27186.265479,27776.842846,1,92,15,15,4.99,4,4,0,2,0,2,43,1,4,1,4 +69161,7,2,1,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,22233.683089,25178.838093,1,102,2,2,0.49,3,3,1,0,0,1,20,1,2,6,NA +69162,7,2,1,72,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,4,5,NA,2,2,2,2,2,2,2,2,2,NA,12250.041239,12463.587232,2,100,1,1,0,1,1,0,0,1,1,72,2,4,5,NA +69163,7,2,2,10,NA,5,7,1,10,123,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9007.62445,9504.796896,2,102,15,15,5,4,4,0,2,0,1,39,1,4,1,5 +69164,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,51433.469947,56330.524019,1,98,7,7,3.8,1,1,0,0,1,2,80,1,5,2,NA +69165,7,2,1,72,NA,5,6,1,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,18239.933451,19545.291055,2,102,8,8,3.67,2,2,0,0,2,1,72,2,5,1,NA +69166,7,2,1,60,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,6038.685119,6085.921613,1,96,15,15,5,3,3,0,0,1,1,60,2,5,1,5 +69167,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,27681.279,30792.259764,1,91,3,3,1.29,1,1,0,0,1,2,80,1,2,2,NA +69168,7,2,1,75,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,4,NA,1,2,2,1,2,2,1,2,2,NA,8902.117734,9243.371936,1,97,15,15,5,6,6,0,1,1,2,53,1,4,1,NA +69169,7,2,1,10,NA,4,4,1,11,133,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10122.702296,10604.04698,2,100,3,3,0.42,5,5,0,1,0,2,51,1,4,5,NA +69170,7,2,2,51,NA,1,1,2,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,22224.73066,22340.630739,2,91,10,10,2.95,4,4,0,1,0,2,18,1,3,NA,NA +69171,7,2,2,40,NA,2,2,1,NA,NA,2,NA,2,1,2,NA,4,5,2,2,2,2,2,2,2,2,2,1,2,32281.860274,32750.128834,2,93,7,7,2.31,2,2,0,0,1,2,40,2,4,5,NA +69172,7,2,2,65,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,9570.416297,9969.842041,2,98,6,6,2.75,1,1,0,0,1,2,65,1,4,3,NA +69173,7,2,1,14,NA,4,4,1,14,171,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14848.504688,14877.861397,1,98,8,8,1.95,4,4,0,2,0,2,31,1,2,1,5 +69174,7,2,2,12,NA,4,4,1,12,152,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11838.873374,11750.256617,2,100,15,15,4.47,4,4,0,2,0,1,39,NA,NA,1,5 +69175,7,2,2,32,NA,2,2,2,NA,NA,1,1,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,27127.983961,26396.013964,2,90,15,15,4.2,5,5,1,0,0,2,50,NA,NA,6,NA +69176,7,2,2,57,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,26668.458882,26828.838508,2,102,5,5,1.84,1,1,0,0,0,2,57,1,3,6,NA +69177,7,2,1,66,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,121588.761604,120347.630506,1,91,8,8,3.4,2,2,0,0,2,1,66,1,5,1,4 +69178,7,2,2,28,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,1,1,2,2,1,10800.351372,11472.015663,2,92,3,3,0.45,4,4,0,0,1,1,64,2,1,1,1 +69179,7,1,2,14,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15847.19263,0,1,98,15,15,5,4,4,0,2,0,2,50,1,5,1,5 +69180,7,2,2,1,16,4,4,2,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7348.24433,7906.792868,2,95,10,10,2.32,6,6,1,2,0,1,44,1,4,1,4 +69181,7,2,2,37,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,5,2,1,2,2,1,2,2,1,2,2,1,90299.161173,91599.679037,1,95,7,7,2.54,2,2,0,1,0,2,37,1,1,5,NA +69182,7,2,2,8,NA,2,2,2,8,103,NA,NA,2,2,1,3,NA,NA,NA,2,1,2,1,2,2,2,2,2,2,10762.400563,12070.126078,2,90,3,3,0.46,5,5,1,3,0,2,35,2,1,4,NA +69183,7,2,1,6,NA,1,1,1,6,83,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10658.399025,10722.994279,1,102,8,8,1.33,7,7,1,4,0,2,32,1,3,1,2 +69184,7,2,1,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,32720.69734,34082.896551,1,92,4,4,1.16,2,2,0,0,1,1,56,1,2,1,3 +69185,7,2,1,60,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,2,1,NA,2,2,2,1,2,2,2,2,1,2,6449.12882,6552.435629,2,93,7,7,1.83,3,3,0,1,1,1,60,2,2,1,4 +69186,7,1,1,5,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9016.053035,0,2,100,2,2,0.25,4,4,2,1,0,2,39,1,2,5,NA +69187,7,2,2,22,NA,2,2,2,NA,NA,2,NA,2,2,2,NA,1,5,2,2,2,2,2,2,2,NA,NA,NA,NA,31196.446669,31060.57243,2,99,99,1,0,5,1,0,1,0,1,40,2,1,6,NA +69188,7,2,2,8,NA,5,7,2,8,100,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9620.269705,10273.007637,2,91,12,12,NA,4,4,0,2,0,1,40,1,5,1,5 +69189,7,2,1,3,NA,4,4,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10040.033098,10162.494907,1,100,15,15,5,3,3,1,0,0,2,34,1,5,1,5 +69190,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,9,2,NA,1,1,2,1,2,2,1,1,2,NA,14971.827573,15430.401607,2,97,13,13,NA,4,4,1,0,1,2,45,1,2,5,NA +69191,7,2,1,0,11,1,1,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,7757.493251,8192.524698,3,92,1,1,0.18,5,5,2,2,0,2,31,2,1,5,NA +69192,7,2,2,0,2,5,7,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9288.555962,9478.459453,1,95,6,6,0.96,5,5,1,0,1,2,69,1,1,2,NA +69193,7,2,2,12,NA,2,2,2,12,147,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,19480.517135,20370.224649,2,94,1,1,0.01,7,7,1,3,0,1,41,2,1,1,1 +69194,7,2,2,6,NA,4,4,1,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10332.067017,11124.939356,1,100,3,3,0.52,3,3,1,1,0,2,25,1,3,5,NA +69195,7,2,2,10,NA,3,3,2,10,130,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14935.786363,14737.445343,2,94,7,7,1.18,7,7,1,4,0,2,31,1,4,6,NA +69196,7,2,1,27,NA,5,7,1,NA,NA,2,NA,2,2,3,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,32866.0119,33575.905633,1,98,99,99,NA,3,1,0,0,0,2,22,1,4,5,NA +69197,7,2,2,2,NA,1,1,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8832.868731,8863.029978,1,102,2,2,0.19,7,7,2,2,0,1,48,2,9,1,9 +69198,7,2,2,13,NA,1,1,2,13,158,NA,NA,1,1,NA,7,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,18368.872199,20151.001259,2,94,5,5,0.67,6,6,1,3,0,1,37,2,3,1,4 +69199,7,2,1,60,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,2,4,NA,1,2,1,1,2,1,1,2,1,3,6814.038157,7682.939373,2,91,12,13,NA,7,1,0,4,2,2,72,2,1,2,NA +69200,7,2,1,34,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,22957.82581,23425.428596,2,102,15,15,3.92,5,5,1,2,0,1,34,2,5,1,5 +69201,7,2,2,44,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,20834.783335,21091.811515,2,94,15,15,5,5,5,0,2,1,1,47,2,5,1,5 +69202,7,2,1,9,NA,1,1,1,9,116,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,17882.621856,17720.218058,3,92,7,7,1.3,5,5,1,2,0,2,33,2,2,1,1 +69203,7,2,2,66,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,8308.628726,8679.600111,2,95,9,9,4.92,1,1,0,0,1,2,66,1,5,3,NA +69204,7,2,2,16,NA,3,3,1,16,201,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,30430.369428,31312.351655,3,92,5,5,1.03,4,4,0,3,0,1,55,1,4,4,NA +69205,7,2,2,47,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,3,1,NA,1,2,1,1,2,1,1,2,1,3,14872.222375,15689.845848,2,92,7,7,1.61,4,4,0,2,0,1,51,2,3,1,3 +69206,7,1,1,80,NA,2,2,NA,NA,NA,2,NA,2,1,8,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,13654.270555,0,2,93,7,7,2.31,2,2,0,0,2,1,80,2,1,1,1 +69207,7,2,2,30,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,27381.645976,29718.515739,2,95,5,4,1.52,3,1,0,0,0,1,31,1,4,5,NA +69208,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,1,2,NA,25840.959268,29976.982116,1,101,3,3,0.86,2,2,0,0,2,2,80,1,2,1,1 +69209,7,1,2,47,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,35469.911999,0,2,91,6,6,2.78,1,1,0,0,0,2,47,1,4,3,NA +69210,7,2,1,23,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,26556.735732,2,101,1,1,0.09,2,1,0,0,0,1,23,1,5,5,NA +69211,7,2,1,0,2,5,6,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8377.359485,8321.07116,2,96,4,4,0.97,3,3,1,0,0,1,31,2,5,1,5 +69212,7,2,1,38,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,2,4,NA,2,2,2,1,2,2,1,2,2,2,41241.224595,41716.316195,2,102,6,6,1.03,5,5,1,1,0,1,37,1,2,1,2 +69213,7,2,2,47,NA,3,3,1,NA,NA,2,NA,2,2,7,NA,5,1,NA,2,2,2,1,2,2,2,2,2,2,27349.117265,27628.99562,2,93,6,6,1.65,2,2,0,0,0,1,52,2,3,1,5 +69214,7,2,2,0,1,4,4,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4137.127382,4416.757179,2,97,1,1,0.12,5,5,1,2,0,2,24,1,3,5,NA +69215,7,2,2,9,NA,1,1,1,9,112,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19194.228411,19883.529671,2,96,7,7,1.34,5,5,0,2,0,1,24,2,2,5,NA +69216,7,2,1,35,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,22937.913723,23137.542903,1,100,3,3,0.43,4,4,0,2,0,1,35,1,4,1,3 +69217,7,2,2,16,NA,5,7,2,16,193,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,31716.869763,32129.640601,1,101,3,3,0.92,2,2,0,1,0,2,53,1,5,3,NA +69218,7,2,2,2,NA,1,1,1,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9132.13761,10082.124987,1,103,13,13,NA,4,4,2,0,0,2,27,2,2,6,NA +69219,7,2,1,4,NA,2,2,1,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15915.595287,17052.411707,2,96,6,6,1.25,4,4,1,1,0,1,31,2,3,1,3 +69220,7,2,2,21,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,2,6,1,1,2,2,1,2,2,1,2,2,1,52280.406546,52747.829022,1,92,1,1,0,2,1,0,0,0,1,30,1,3,6,NA +69221,7,2,2,51,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17950.494975,18684.288461,1,92,15,15,5,4,4,0,2,0,1,55,1,5,1,5 +69222,7,2,1,6,NA,3,3,2,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,47178.298856,52231.302145,2,95,8,8,2.17,4,4,1,1,0,1,43,1,4,1,5 +69223,7,2,1,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,100722.771154,105550.354971,2,92,14,8,4.13,2,1,0,0,0,2,25,1,5,5,NA +69224,7,2,2,59,NA,2,2,2,NA,NA,2,NA,2,1,4,NA,1,6,NA,2,2,2,1,2,2,2,2,2,2,18341.621382,19145.40337,2,90,6,6,1.21,4,4,0,0,0,2,59,2,1,6,NA +69225,7,2,2,17,NA,1,1,2,17,209,2,NA,2,2,4,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17544.592739,19246.751055,3,92,6,6,1,6,6,1,1,0,1,42,2,1,1,4 +69226,7,2,2,10,NA,4,4,2,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7588.605543,8103.359102,1,99,14,14,4.21,4,4,0,2,0,2,44,1,5,1,5 +69227,7,2,2,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14994.337564,15663.818698,1,100,15,15,5,2,2,0,0,1,2,48,1,5,5,NA +69228,7,2,1,17,NA,3,3,2,17,215,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,61479.689958,61628.148021,2,100,15,15,4.5,6,6,0,4,0,1,45,1,5,1,5 +69229,7,2,1,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,93164.281583,96130.599912,2,95,8,8,2.17,4,4,1,1,0,1,43,1,4,1,5 +69230,7,2,2,8,NA,1,1,1,8,102,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15962.145468,16412.026403,3,92,4,4,0.81,3,3,0,2,0,2,31,1,3,1,NA +69231,7,2,1,10,NA,4,4,2,10,131,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12701.072484,14129.347101,2,91,6,6,0.78,7,7,1,4,0,2,38,2,2,77,NA +69232,7,2,1,0,6,2,2,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,6837.213048,7220.636221,2,91,1,1,0.02,5,5,1,2,0,2,27,2,3,1,2 +69233,7,2,2,2,NA,3,3,2,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,26167.521666,28880.852575,1,93,6,6,1.16,4,4,2,0,0,2,33,1,5,1,4 +69234,7,2,1,8,NA,4,4,2,8,107,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12176.538896,12609.8265,2,97,2,2,0.27,4,4,0,2,0,1,51,1,2,4,NA +69235,7,2,2,3,NA,5,7,2,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27969.562936,30869.748923,3,91,7,7,1.57,4,4,2,0,0,2,29,2,3,1,3 +69236,7,2,1,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,18417.587863,18945.077049,2,97,4,4,1.35,2,2,0,0,0,2,27,1,4,1,4 +69237,7,2,2,49,NA,2,2,2,NA,NA,1,2,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,46417.079566,46883.794079,1,97,15,15,5,2,2,0,0,0,2,49,2,5,1,4 +69238,7,2,2,5,NA,3,3,2,5,64,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,54058.97777,57449.540919,1,90,8,8,1.67,5,5,2,1,0,2,28,1,4,1,5 +69239,7,2,2,19,NA,5,6,1,19,239,2,NA,2,2,1,15,NA,NA,NA,1,2,1,1,2,1,1,2,1,NA,9200.381964,9723.240452,2,101,2,1,0.32,2,1,0,0,0,2,20,NA,NA,5,NA +69240,7,2,2,7,NA,5,7,1,7,89,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8504.389189,9345.267202,2,93,6,6,1.95,2,2,0,1,0,1,30,1,4,5,NA +69241,7,2,1,36,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,19332.808712,19683.545891,1,90,15,15,5,1,1,0,0,0,1,36,1,5,5,NA +69242,7,1,1,25,NA,2,2,NA,NA,NA,2,NA,2,7,77,NA,2,5,NA,2,2,2,1,2,2,NA,NA,NA,NA,44074.735764,0,2,91,1,1,0.17,4,4,0,1,0,1,49,2,1,6,NA +69243,7,2,2,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105483.169118,109229.181554,1,93,15,15,4.59,4,4,0,2,0,2,45,1,5,1,5 +69244,7,1,2,10,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7899.813226,0,2,100,15,15,5,4,3,0,2,0,1,42,1,3,5,NA +69245,7,2,2,29,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,21398.47235,20994.797177,1,103,14,8,4.21,2,1,0,0,0,2,29,1,5,5,NA +69246,7,2,1,19,NA,5,7,2,19,235,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,7672.605662,7940.039559,1,93,14,14,5,2,2,0,0,0,2,53,2,5,3,NA +69247,7,2,2,80,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,12344.929687,12774.490696,1,93,2,2,0.83,1,1,0,0,1,2,80,1,4,3,NA +69248,7,2,2,43,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,3,1,2,1,2,1,NA,NA,NA,1,2,1,NA,18255.735511,22006.513457,2,91,6,6,1.57,3,3,0,1,0,1,41,2,3,1,3 +69249,7,2,1,34,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,1,2,1,2,2,1,1,2,NA,19923.530941,20672.635795,2,95,6,6,1.08,4,4,1,0,0,2,42,1,4,4,NA +69250,7,2,2,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,17622.141982,18017.336291,2,95,8,8,2.97,2,2,0,0,0,2,56,1,5,2,NA +69251,7,2,2,5,NA,1,1,1,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14665.744588,16191.375588,2,96,5,5,0.89,4,4,1,1,0,2,36,2,4,6,NA +69252,7,2,1,11,NA,1,1,1,11,140,NA,NA,2,2,3,5,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,14820.807433,15173.085782,2,102,7,7,1.79,4,4,0,2,0,1,40,2,2,6,NA +69253,7,2,2,45,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,2,3,NA,2,2,2,2,2,2,1,2,2,2,36457.299109,41822.014095,1,90,5,5,1.79,1,1,0,0,0,2,45,2,2,3,NA +69254,7,2,1,4,NA,1,1,1,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15835.271712,16336.527884,1,100,8,3,0.68,6,3,1,0,0,1,33,2,3,6,NA +69255,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,42993.150248,46481.579201,1,92,6,6,1.98,2,2,0,0,2,1,80,1,5,1,4 +69256,7,2,1,56,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,32720.69734,34082.896551,1,101,3,3,0.88,2,2,0,0,0,1,56,1,2,1,4 +69257,7,2,2,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,24344.236685,23945.277508,2,96,6,6,1.48,4,4,1,1,0,2,25,1,4,5,NA +69258,7,2,2,51,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,21621.615608,21991.273471,1,100,15,15,5,4,4,0,0,0,1,54,1,5,1,5 +69259,7,2,1,30,NA,5,7,1,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,84910.063417,90031.493834,2,96,14,6,2.75,3,1,0,0,0,1,30,2,5,5,NA +69260,7,2,2,3,NA,5,6,2,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4882.863582,4877.706148,1,99,7,7,2.89,2,2,1,0,0,2,35,2,5,1,NA +69261,7,2,2,51,NA,3,3,1,NA,NA,2,NA,2,1,8,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,27769.056387,28050.845299,1,94,99,99,NA,3,3,0,0,2,1,74,NA,NA,1,NA +69262,7,2,2,2,NA,1,1,1,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12871.484115,13281.030392,2,102,14,14,3.58,4,4,2,0,0,1,25,2,3,1,1 +69263,7,2,1,73,NA,1,1,1,NA,NA,2,NA,2,1,9,NA,1,1,NA,1,2,2,1,2,2,2,2,2,NA,14673.422679,15506.302145,2,98,5,5,0.59,7,7,2,1,2,2,71,1,2,1,1 +69264,7,2,1,69,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11212.469396,11300.176858,1,100,15,15,5,3,3,0,0,1,1,69,1,5,1,4 +69265,7,2,2,11,NA,1,1,1,11,133,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17053.854294,17233.146259,3,92,1,1,0.26,2,2,0,1,0,2,45,1,2,3,NA +69266,7,2,1,27,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,17265.374984,17186.028516,2,93,4,4,0.56,5,5,2,1,0,1,27,1,2,6,NA +69267,7,2,1,29,NA,3,3,2,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,94165.180484,100725.804227,2,99,14,14,5,2,2,0,0,0,2,28,1,5,1,5 +69268,7,2,2,11,NA,4,4,1,11,135,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11076.064101,11269.278002,2,102,4,4,0.53,6,6,2,2,0,2,27,1,2,1,2 +69269,7,2,1,16,NA,3,3,2,16,195,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,91343.020442,91081.271456,1,98,7,7,1,7,7,2,2,0,2,34,1,4,3,NA +69270,7,2,1,66,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,4,1,NA,1,2,1,1,2,1,1,2,1,3,9048.959172,9513.611982,1,93,12,12,NA,4,4,0,0,2,1,66,2,4,1,2 +69271,7,2,1,1,21,1,1,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12569.23571,12239.751631,2,102,15,15,2.43,7,7,3,2,0,1,28,2,5,1,4 +69272,7,2,2,43,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,1,6,2,2,2,2,1,2,2,NA,NA,NA,NA,32606.880052,32776.922157,2,93,4,4,0.56,5,5,0,2,0,1,37,NA,NA,1,1 +69273,7,2,1,5,NA,4,4,2,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8890.779467,9258.147648,3,91,1,1,0.07,6,6,2,3,0,2,30,1,2,3,NA +69274,7,2,2,21,NA,4,4,2,NA,NA,2,NA,2,1,3,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,18446.691823,17539.327743,1,90,4,4,0.58,6,6,0,3,0,2,21,2,5,5,NA +69275,7,2,1,13,NA,4,4,2,13,158,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11351.725436,11265.8817,2,95,99,99,NA,2,2,0,1,1,2,80,1,2,2,NA +69276,7,2,1,16,NA,1,1,1,16,199,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,22768.423624,22886.980387,2,98,15,15,5,3,3,0,1,0,1,38,1,4,1,3 +69277,7,2,1,24,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,14313.345971,15171.949804,3,91,6,6,2.94,2,1,0,0,0,1,24,1,5,5,NA +69278,7,2,2,78,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,NA,13549.492282,14020.967918,2,93,6,6,2.28,2,2,0,0,1,2,78,1,5,3,NA +69279,7,2,2,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,126314.769628,126413.297707,2,95,15,15,5,2,2,0,0,1,1,61,1,5,1,5 +69280,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,22419.63376,27869.657009,1,94,3,3,1.29,1,1,0,0,1,2,80,1,3,2,NA +69281,7,2,1,3,NA,3,3,2,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,76114.759421,89186.300704,1,98,15,15,5,4,4,1,1,0,1,40,1,4,1,5 +69282,7,2,2,19,NA,3,3,2,19,236,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,31627.471241,33789.841724,1,101,1,1,0.08,3,3,1,0,0,2,19,1,2,NA,NA +69283,7,2,2,73,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,NA,10298.451562,11068.740647,1,93,99,1,0.32,2,1,0,0,1,1,50,NA,NA,5,NA +69284,7,2,1,53,NA,2,2,1,NA,NA,2,NA,2,2,7,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,26651.800212,26887.922292,2,92,3,3,0.92,1,1,0,0,0,1,53,2,4,3,NA +69285,7,2,2,26,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,37537.641876,38018.476349,2,97,5,5,0.84,5,5,0,2,0,2,33,1,4,1,3 +69286,7,2,1,39,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,19096.438335,22378.694988,1,93,15,15,5,1,1,0,0,0,1,39,2,5,5,NA +69287,7,2,1,54,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,173139.914798,174527.091604,1,95,14,14,5,2,2,0,0,0,1,54,1,4,1,3 +69288,7,2,2,69,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,9518.80186,10252.529496,2,100,3,3,0.75,2,2,0,0,2,1,67,1,3,1,2 +69289,7,2,2,5,NA,2,2,1,5,69,NA,NA,2,1,3,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,15979.952759,16987.381382,2,102,7,7,1.53,5,5,1,2,0,2,37,2,4,1,4 +69290,7,2,2,1,21,1,1,2,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9955.153132,10990.75621,2,94,4,4,0.81,4,4,2,0,0,1,26,2,2,1,2 +69291,7,2,1,0,9,5,6,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,1,NA,NA,NA,NA,4326.150649,4507.366261,1,99,6,6,1.07,6,6,2,1,2,1,44,2,5,4,NA +69292,7,2,1,12,NA,3,3,1,12,147,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,109750.935844,116575.440818,1,94,7,7,1.52,4,4,0,2,2,1,61,2,1,1,5 +69293,7,2,1,9,NA,3,3,1,9,119,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23188.935049,25672.571973,3,91,7,7,1.1,7,7,0,4,0,1,40,1,4,1,3 +69294,7,2,1,5,NA,4,4,2,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6946.177172,7374.968672,2,90,6,6,1.03,6,6,3,1,0,1,45,2,2,1,2 +69295,7,2,1,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,28813.038041,30658.109612,1,94,5,5,1.2,3,3,0,1,0,1,49,1,4,5,NA +69296,7,2,2,6,NA,5,6,2,6,78,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9849.323746,10517.603056,2,91,10,10,3.04,4,4,1,1,0,1,37,2,5,1,5 +69297,7,2,2,7,NA,2,2,1,7,86,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13498.913367,14670.348602,2,93,7,7,1.56,4,4,1,1,0,1,35,2,4,1,4 +69298,7,2,1,1,20,4,4,2,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6429.93791,7090.057975,1,90,3,3,0.63,3,3,1,1,0,2,32,1,4,5,NA +69299,7,2,1,72,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,14781.102246,16162.46005,1,97,12,12,NA,4,4,0,0,2,1,72,1,2,1,3 +69300,7,2,1,23,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,29159.620934,2,101,1,1,0.33,1,1,0,0,0,1,23,1,4,5,NA +69301,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,99831.393624,101809.075589,2,94,8,8,3.4,2,2,0,0,2,2,64,1,4,1,2 +69302,7,2,2,14,NA,4,4,1,14,176,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17373.928778,17416.718116,2,96,8,8,1.72,5,5,0,3,0,1,39,1,5,1,4 +69303,7,2,2,12,NA,1,1,1,12,154,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,29109.978779,29945.36646,3,92,10,10,2.82,4,4,0,1,1,1,36,1,3,1,5 +69304,7,2,1,11,NA,3,3,2,11,138,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,44777.275016,47565.734765,1,91,14,14,3.06,5,5,0,3,0,2,46,1,5,1,5 +69305,7,2,1,14,NA,4,4,2,14,175,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12462.601191,12584.643654,2,97,4,4,0.57,5,5,1,3,0,2,33,1,3,5,NA +69306,7,2,2,0,5,4,4,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7123.540273,7201.320939,2,101,1,1,0.16,3,3,2,0,0,2,21,1,2,5,NA +69307,7,2,1,58,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,21168.33083,21825.00909,2,99,5,5,1.79,1,1,0,0,0,1,58,1,2,3,NA +69308,7,2,1,12,NA,4,4,2,13,156,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11351.725436,11256.943498,2,95,8,8,1.61,6,6,1,3,0,2,48,1,3,5,NA +69309,7,2,2,45,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,27149.46917,27176.128744,1,94,2,1,0,2,1,0,0,0,1,53,1,3,6,NA +69310,7,2,2,18,NA,2,2,2,18,224,2,NA,2,2,2,66,NA,NA,NA,2,2,2,2,2,2,2,2,2,2,16896.101801,17234.594007,1,93,14,8,2.01,5,4,1,0,0,2,22,2,1,6,NA +69311,7,2,2,16,NA,1,1,1,16,198,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,20502.928313,21226.512457,3,92,9,9,2.46,4,4,0,2,0,1,43,2,3,1,4 +69312,7,1,2,6,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13257.060167,0,2,90,14,14,3.06,5,5,1,2,0,1,42,1,4,1,5 +69313,7,2,1,1,14,4,4,2,NA,14,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6580.937346,6661.20735,2,97,5,5,1.08,3,3,1,0,0,1,28,1,3,5,NA +69314,7,2,1,50,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,16117.991297,17081.203544,1,96,7,7,1.69,4,4,0,1,0,2,19,2,4,NA,NA +69315,7,1,1,11,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15653.970322,0,1,91,6,6,1.13,6,6,1,3,0,1,40,1,4,6,NA +69316,7,2,2,17,NA,5,6,2,17,214,2,NA,2,2,2,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,7588.544207,7881.983727,3,91,4,4,0.69,5,5,0,2,0,1,45,2,4,1,1 +69317,7,2,2,3,NA,4,4,1,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10447.689164,11241.830083,1,102,7,7,1.8,5,4,1,0,2,1,47,1,3,5,NA +69318,7,2,1,4,NA,4,4,1,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11324.865632,11670.771889,2,96,2,2,0.44,3,3,2,0,0,2,22,1,2,5,NA +69319,7,2,1,42,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,18790.641284,19715.999527,2,100,15,1,0.05,4,1,0,2,0,1,42,1,3,5,NA +69320,7,2,2,58,NA,1,1,1,NA,NA,2,NA,2,2,2,NA,4,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,29087.427528,29444.125326,1,101,3,3,0.41,5,5,0,2,1,2,36,2,4,4,NA +69321,7,2,1,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,21390.017054,21518.859775,2,99,15,15,5,2,2,0,0,0,2,51,1,5,1,5 +69322,7,2,1,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,24460.137584,1,92,4,4,0.61,5,5,1,2,0,1,34,1,3,6,NA +69323,7,2,1,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,7903.072331,8212.17704,2,99,NA,77,NA,3,2,0,0,1,1,63,1,3,5,NA +69324,7,2,1,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,153755.392794,158780.137655,1,91,15,15,5,3,3,0,0,0,2,50,1,4,1,4 +69325,7,2,2,36,NA,5,6,2,NA,NA,2,NA,2,2,7,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,20039.469886,21176.851739,1,97,15,15,5,3,3,1,0,0,1,40,1,3,1,5 +69326,7,2,2,2,NA,5,6,1,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6213.806926,6511.003426,1,95,4,4,0.62,5,5,2,0,2,2,29,2,3,5,NA +69327,7,2,2,10,NA,3,3,2,10,131,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,55626.447796,54887.751745,1,95,15,15,5,4,4,0,2,0,2,42,1,5,1,5 +69328,7,2,2,71,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,13495.958207,13916.488113,1,96,9,9,4.23,2,2,0,0,2,2,71,2,5,1,5 +69329,7,2,2,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,42559.487719,42202.626733,1,98,3,2,0.62,2,1,0,0,0,2,50,1,2,3,NA +69330,7,2,2,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,NA,NA,NA,NA,23614.167119,30467.110717,2,97,1,1,0.33,2,2,1,0,0,2,24,1,2,5,NA +69331,7,2,2,13,NA,4,4,2,13,166,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10848.628906,11198.221038,1,99,4,4,0.41,7,7,2,4,0,2,43,1,4,4,NA +69332,7,2,1,15,NA,4,4,1,15,186,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17606.165994,20773.995354,2,101,9,9,2.46,4,4,0,2,0,1,42,1,3,1,3 +69333,7,2,2,76,NA,5,7,1,NA,NA,2,NA,2,1,5,NA,1,1,NA,1,2,2,1,2,2,1,2,1,NA,33487.945981,34633.22923,1,102,10,10,3.22,4,4,0,0,2,2,29,2,5,5,NA +69334,7,2,2,58,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,24938.6524,26291.237881,2,98,5,5,1.24,3,3,0,0,1,2,58,1,2,5,NA +69335,7,2,1,54,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,32720.69734,32838.149884,1,95,5,5,1.84,1,1,0,0,0,1,54,1,4,3,NA +69336,7,2,1,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,19401.570044,22254.979865,1,92,6,6,1.24,4,4,1,1,0,1,30,1,3,3,NA +69337,7,2,2,23,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,NA,NA,NA,NA,22912.222762,23105.605227,2,96,6,6,1.35,3,3,1,1,0,2,23,1,2,5,NA +69338,7,2,2,13,NA,1,1,1,13,164,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15690.47168,16249.379042,1,102,8,8,1.33,7,7,1,4,0,2,32,1,3,1,2 +69339,7,2,1,22,NA,1,1,1,NA,NA,2,NA,2,2,2,NA,1,6,NA,2,2,2,2,2,2,1,2,2,2,38560.502118,39812.43256,2,102,8,8,1.09,7,7,1,3,0,2,33,2,1,6,NA +69340,7,2,1,14,NA,5,6,2,14,179,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6666.045669,7091.184391,3,90,10,10,2.41,5,5,1,2,0,1,44,2,4,1,5 +69341,7,2,1,6,NA,1,1,1,7,84,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11159.151566,11057.808004,1,102,2,2,0.31,4,4,1,2,0,2,25,1,2,4,NA +69342,7,2,1,23,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,16645.008535,17770.553389,2,95,2,2,0.4,2,2,0,0,0,2,43,1,1,1,NA +69343,7,2,1,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,118671.226879,120869.497653,2,91,15,15,5,3,3,0,0,0,2,54,1,4,1,4 +69344,7,2,1,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,26091.362984,2,101,5,5,1.5,2,2,0,0,0,1,47,1,4,3,NA +69345,7,2,1,11,NA,3,3,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18216.94614,18737.854061,1,94,3,3,0.39,6,6,2,2,0,2,25,1,4,1,2 +69346,7,2,1,18,NA,2,2,1,18,226,2,NA,2,2,4,13,NA,NA,NA,2,2,2,1,2,2,1,2,2,1,15506.325263,16662.012915,1,103,5,5,0.74,5,5,0,1,0,1,47,2,1,1,1 +69347,7,2,1,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,20137.063643,20085.335673,2,95,6,6,0.9,6,6,1,1,0,1,49,1,1,1,1 +69348,7,2,2,13,NA,3,3,1,13,166,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,31627.471241,33789.841724,1,98,6,6,1.11,5,5,1,2,0,2,32,1,2,1,2 +69349,7,2,1,74,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,59483.758768,62962.247193,1,96,14,14,5,2,2,0,0,2,1,74,1,5,1,4 +69350,7,1,2,77,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,12888.823036,0,2,99,7,7,3.13,1,1,0,0,1,2,77,1,5,2,NA +69351,7,2,2,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,66503.043118,69008.73133,2,98,7,7,1.94,3,3,1,0,0,2,31,1,4,1,NA +69352,7,2,1,0,10,1,1,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9064.168162,9228.63108,3,92,12,12,NA,4,4,2,0,0,1,30,1,3,1,4 +69353,7,2,2,2,NA,4,4,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7871.443574,8074.618887,1,96,9,9,2.88,3,3,1,0,0,1,27,1,3,1,5 +69354,7,2,1,42,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,1,4,NA,2,2,2,1,2,2,2,2,2,2,34205.013302,34329.398365,2,102,7,7,1.04,7,7,1,2,0,2,37,2,1,1,2 +69355,7,2,2,27,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,NA,NA,NA,NA,44119.608456,46238.164206,2,98,1,1,0.14,3,2,2,0,0,2,27,1,3,6,NA +69356,7,2,2,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,11696.973403,11982.125462,2,101,5,5,1.08,3,3,0,1,1,2,62,1,4,2,NA +69357,7,2,1,8,NA,4,4,1,8,103,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,8894.789377,9317.745565,2,100,3,3,0.27,7,7,2,1,0,2,41,1,2,5,NA +69358,7,2,1,11,NA,3,3,1,11,134,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,41342.668304,42524.849071,1,102,8,8,2.42,4,4,0,2,0,2,34,1,4,1,3 +69359,7,2,1,8,NA,4,4,1,8,106,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9022.8939,9530.778148,2,100,3,3,0.31,7,7,3,2,0,2,28,1,3,1,3 +69360,7,2,2,0,0,5,7,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6280.554922,6238.355238,2,92,10,10,3.78,3,3,1,0,0,1,35,1,4,6,NA +69361,7,2,1,66,NA,4,4,1,NA,NA,2,NA,2,1,7,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,10209.064769,10089.274206,1,98,6,6,1.57,3,3,0,0,2,1,66,2,2,1,4 +69362,7,2,1,5,NA,4,4,1,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8892.687359,9565.637195,2,96,12,10,2.17,7,6,2,3,0,1,29,1,4,3,NA +69363,7,2,2,22,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,NA,NA,NA,NA,44119.608456,46238.164206,3,92,3,3,0.54,4,4,3,0,0,2,22,1,3,5,NA +69364,7,2,2,11,NA,1,1,1,11,143,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,10118.363218,11093.371216,2,103,77,77,NA,7,7,0,4,0,1,38,2,1,6,NA +69365,7,2,2,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,185476.232403,190233.31946,1,95,4,4,1.47,1,1,0,0,0,2,59,1,1,5,NA +69366,7,2,2,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,89167.746947,90120.008289,1,95,15,15,5,2,2,0,0,2,1,72,1,3,1,4 +69367,7,2,1,2,NA,3,3,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,46257.816906,54201.886729,2,94,15,15,4.59,4,4,1,1,0,2,37,1,5,1,5 +69368,7,2,1,52,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,16851.334496,16952.838472,2,95,14,14,5,2,2,0,0,0,1,52,1,4,1,NA +69369,7,2,1,66,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,2,2,1,6910.118936,7233.371983,2,95,2,2,0.63,1,1,0,0,1,1,66,1,1,3,NA +69370,7,2,1,17,NA,1,1,1,18,216,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,20560.901695,21038.44606,2,96,10,10,3.78,3,3,0,1,0,1,42,1,3,1,3 +69371,7,2,1,32,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,22650.334558,22327.472235,1,93,12,12,NA,5,1,0,2,0,1,32,1,2,5,NA +69372,7,2,1,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,23022.732862,22484.94345,2,102,5,3,0.92,5,1,2,1,0,1,24,1,4,6,NA +69373,7,2,2,20,NA,3,3,1,NA,NA,2,NA,2,2,2,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,114492.825466,119073.624167,2,101,3,2,0.73,3,1,0,0,0,2,20,2,4,5,NA +69374,7,1,2,52,NA,2,2,NA,NA,NA,2,NA,2,2,5,NA,1,6,NA,2,2,2,1,2,2,NA,NA,NA,NA,24004.6026,0,2,91,1,1,0.17,4,4,0,1,0,1,49,2,1,6,NA +69375,7,2,1,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,148501.534483,154474.828602,2,91,15,15,5,2,2,0,0,1,2,66,1,5,3,NA +69376,7,2,1,19,NA,3,3,1,19,236,2,NA,2,1,5,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,41421.091412,40909.179995,1,94,3,3,0.93,2,2,0,0,0,2,41,2,2,3,NA +69377,7,1,1,70,NA,5,6,NA,NA,NA,2,NA,2,1,3,NA,5,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,17564.38036,0,2,102,5,5,1.36,2,2,0,0,2,1,70,2,5,1,4 +69378,7,2,2,20,NA,2,2,2,NA,NA,2,NA,2,2,2,NA,3,5,2,2,2,2,1,2,2,2,2,2,2,39788.68078,43305.780488,2,99,3,3,0.52,3,3,0,0,1,2,38,2,3,3,NA +69379,7,2,1,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,22824.336433,1,92,1,1,0,2,1,0,0,0,1,30,1,3,6,NA +69380,7,2,1,20,NA,2,2,1,NA,NA,1,2,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,39915.513053,42587.888675,2,98,1,1,0.13,4,4,2,0,0,2,52,1,2,4,NA +69381,7,2,1,6,NA,2,2,1,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13837.588743,14145.695126,1,92,9,9,2.93,3,3,0,1,0,2,30,1,5,1,5 +69382,7,2,1,6,NA,5,6,2,6,79,NA,NA,2,2,1,0,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,9720.482616,11328.640802,2,91,99,99,NA,7,4,0,4,0,1,36,2,9,1,2 +69383,7,2,2,10,NA,5,7,2,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5010.242859,5375.15147,3,91,15,15,4.47,4,4,0,3,0,2,44,2,5,1,NA +69384,7,2,1,7,NA,4,4,2,7,90,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10665.048307,10877.092316,1,96,4,4,1.12,2,2,0,1,0,2,44,1,2,5,NA +69385,7,2,1,12,NA,5,6,2,12,154,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6666.045669,7091.184391,3,90,77,77,NA,7,7,1,2,0,1,41,2,3,6,NA +69386,7,2,1,52,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,19541.667675,19738.253068,2,95,3,3,1.21,1,1,0,0,0,1,52,1,3,5,NA +69387,7,2,1,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,71632.519751,73567.29218,2,100,8,8,4.13,1,1,0,0,0,1,25,1,3,5,NA +69388,7,2,1,61,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,11568.876339,13082.418262,1,102,4,4,0.78,4,4,0,1,1,1,61,1,3,1,1 +69389,7,2,2,22,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,53638.260635,55237.528848,2,96,5,1,0.37,2,1,0,0,0,2,22,1,3,6,NA +69390,7,2,1,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,96485.877517,101155.423058,2,95,9,9,3.97,2,2,0,0,1,1,62,1,2,1,2 +69391,7,2,2,52,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,30868.065568,31427.853631,3,92,10,10,3.77,3,3,0,1,0,2,52,1,4,6,NA +69392,7,2,1,35,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,13147.594977,13793.622874,1,102,15,15,4.59,4,4,1,1,0,1,35,1,5,1,5 +69393,7,2,1,33,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,23511.361566,23589.086028,1,100,14,14,4.45,3,3,1,0,0,1,33,1,4,1,4 +69394,7,2,2,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,30275.274308,39061.30289,2,101,4,4,1.22,2,2,1,0,0,2,25,1,4,5,NA +69395,7,2,1,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,20813.587171,20953.352876,2,93,6,6,1.47,3,3,0,0,0,2,47,1,4,5,NA +69396,7,2,1,10,NA,4,4,2,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8439.71412,8914.772112,2,99,7,7,1.19,6,6,1,3,0,2,38,1,3,5,NA +69397,7,2,1,46,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,30152.053647,30291.005728,2,101,14,14,4.03,4,4,0,1,0,2,40,1,5,1,5 +69398,7,2,1,3,NA,4,4,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9016.053035,9388.597537,2,100,99,99,NA,6,6,2,1,0,2,44,1,3,1,4 +69399,7,1,1,58,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,25583.266805,0,1,100,10,10,4.63,2,2,0,0,1,1,58,1,5,1,4 +69400,7,2,1,45,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,NA,NA,NA,1,2,2,1,21600.805431,21945.582953,2,96,3,3,0.38,5,5,1,2,0,2,30,1,3,5,NA +69401,7,2,1,0,0,1,1,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,4724.065742,4677.378988,2,93,77,77,NA,7,7,3,1,0,2,43,2,1,1,9 +69402,7,2,2,59,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,14262.400197,16582.236492,1,93,14,14,5,2,2,0,0,0,1,36,2,5,1,NA +69403,7,2,1,61,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,115789.798768,119024.647099,1,90,6,6,2.86,1,1,0,0,1,1,61,1,5,3,NA +69404,7,2,1,57,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,20536.772571,20857.586819,1,90,15,15,5,2,2,0,0,0,1,57,2,4,1,5 +69405,7,2,1,59,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,152467.08796,155127.202914,1,91,8,8,2.17,4,4,0,0,0,1,59,1,4,1,5 +69406,7,2,1,16,NA,5,6,2,16,197,NA,NA,2,2,2,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8168.705487,9098.657657,1,93,7,7,1.64,5,5,0,2,0,1,47,2,5,1,1 +69407,7,2,1,14,NA,5,6,1,14,173,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,1,1,2,2,1,6121.087833,6878.034577,2,92,7,7,1.17,6,6,0,1,1,1,78,2,1,1,3 +69408,7,2,2,16,NA,1,1,1,16,202,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,2,2,1,1,2,2,1,13963.420591,14243.160208,2,103,2,2,0.42,3,3,0,2,0,2,51,2,2,5,NA +69409,7,2,2,50,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,21605.242088,21013.957961,3,91,14,14,5,1,1,0,0,0,2,50,1,5,5,NA +69410,7,2,2,46,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,23409.362971,23343.581342,2,96,15,15,5,3,3,0,1,0,1,55,1,5,1,4 +69411,7,2,1,75,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,68266.732554,70856.701199,1,95,7,7,2.72,2,2,0,0,2,1,75,1,5,1,5 +69412,7,2,2,75,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,2,2,NA,38833.86357,40362.1181,3,92,3,3,0.93,2,2,0,0,1,2,75,1,1,3,NA +69413,7,2,2,50,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,15521.115746,15477.500537,2,100,5,5,1.08,3,3,0,1,0,2,50,1,4,3,NA +69414,7,2,2,4,NA,3,3,2,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,51483.624552,56822.002088,1,91,4,4,1.29,2,2,1,0,0,2,26,1,4,5,NA +69415,7,2,2,15,NA,1,1,2,15,182,NA,NA,2,2,4,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18368.872199,18995.639955,2,94,12,12,NA,4,4,0,2,0,1,47,2,2,1,2 +69416,7,2,1,2,NA,5,6,2,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8100.706553,9085.828699,1,98,15,15,5,4,4,1,1,0,1,40,NA,NA,1,5 +69417,7,2,1,27,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,124091.929364,126503.690206,1,95,12,12,NA,6,6,2,0,0,2,42,1,2,1,5 +69418,7,2,1,71,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,60803.589953,64579.113865,1,91,15,15,5,2,2,0,0,2,1,71,1,5,1,4 +69419,7,2,1,57,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,25963.141347,25880.331023,1,92,10,10,4.3,2,2,0,0,0,2,55,1,4,1,5 +69420,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,NA,NA,NA,1,2,2,NA,60163.952904,72064.333099,1,97,NA,NA,NA,2,1,0,0,2,1,80,1,3,6,NA +69421,7,2,2,68,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,149765.47604,152732.36321,1,92,7,7,2.64,2,2,0,0,2,2,68,1,4,1,4 +69422,7,2,2,37,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,38477.954475,38315.213578,1,97,15,15,5,6,6,0,1,1,2,53,1,4,1,NA +69423,7,2,1,6,NA,3,3,1,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,58429.74688,62860.392832,1,100,6,6,1.78,3,3,1,1,0,2,35,1,5,4,NA +69424,7,2,1,4,NA,4,4,2,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7359.751824,8115.329859,2,99,5,5,1.26,3,3,1,0,0,2,50,2,3,5,NA +69425,7,2,2,66,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15268.186173,15857.48365,1,102,14,14,5,2,2,0,0,2,1,65,2,5,1,5 +69426,7,2,2,14,NA,5,6,2,14,175,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11975.458482,12226.374363,1,97,14,14,2.87,5,5,0,3,0,2,40,2,5,1,5 +69427,7,2,1,44,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,18533.049642,18774.610796,1,99,3,3,0.54,3,3,1,0,0,2,29,1,4,1,4 +69428,7,2,2,42,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,33331.144292,35071.927834,1,101,7,7,2.71,2,2,0,0,0,1,43,1,4,1,4 +69429,7,2,2,18,NA,1,1,1,18,221,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,24481.187693,25353.22751,1,95,4,4,0.68,5,5,0,1,0,2,38,2,3,4,NA +69430,7,2,2,67,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,1,1,NA,1,2,1,1,2,1,1,2,2,NA,14102.354333,14719.644428,3,91,14,4,1.02,6,2,0,0,2,1,48,2,1,1,1 +69431,7,2,2,79,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,12183.823561,12607.778632,2,100,9,9,4.35,2,2,0,0,2,2,79,1,5,1,3 +69432,7,2,1,0,5,4,4,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5782.580039,5871.334502,2,99,7,7,1.65,4,4,2,1,0,2,29,1,3,4,NA +69433,7,2,1,53,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,1,1,NA,2,2,2,2,2,2,1,2,2,2,29883.483388,29823.443389,1,102,7,7,1.89,3,3,0,0,0,1,53,2,1,1,1 +69434,7,2,2,43,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,40880.818805,42051.054889,1,101,4,4,0.84,3,3,0,1,0,1,42,1,4,1,4 +69435,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,47157.788417,52669.767304,2,95,15,15,5,2,2,0,0,2,1,80,1,5,1,5 +69436,7,2,1,46,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,28353.771379,28938.361413,1,92,8,8,4.66,1,1,0,0,0,1,46,1,4,5,NA +69437,7,2,2,6,NA,3,3,2,6,83,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19880.837381,19616.828143,1,95,6,6,1.3,4,4,0,3,0,2,46,1,4,3,NA +69438,7,2,1,16,NA,3,3,2,16,201,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,29222.804528,30590.757203,1,90,7,7,1.55,5,5,0,3,0,1,51,2,3,1,2 +69439,7,2,1,67,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,7903.072331,8272.775405,2,99,8,8,3.57,2,2,0,0,1,1,67,1,2,1,2 +69440,7,2,1,3,NA,5,6,2,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6940.398869,7784.416682,2,100,15,15,5,4,4,1,1,0,1,36,2,5,1,5 +69441,7,2,2,9,NA,3,3,2,9,110,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,62419.284609,61940.470984,1,101,14,14,4.5,3,3,0,1,0,1,39,1,2,1,5 +69442,7,2,1,19,NA,4,4,1,19,229,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,17558.40257,2,101,2,1,0.18,4,1,0,0,0,1,19,1,4,NA,NA +69443,7,2,2,29,NA,5,6,1,NA,NA,2,NA,2,2,5,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,11378.639129,11910.020755,1,102,15,15,3.82,5,5,1,1,0,1,29,1,4,1,4 +69444,7,2,2,2,NA,4,4,2,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7618.827213,8513.998744,2,97,5,5,1.63,2,2,1,0,0,2,33,1,3,5,NA +69445,7,2,2,41,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,18490.479848,17984.43936,2,100,3,3,0.27,7,7,2,1,0,2,41,1,2,5,NA +69446,7,2,1,25,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,1,5,NA,2,2,2,1,2,2,1,2,2,2,38560.502118,39812.43256,2,102,5,5,0.59,7,7,1,3,0,1,37,2,1,6,NA +69447,7,2,2,13,NA,4,4,1,14,168,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16659.324602,16974.753267,1,92,15,15,4.44,5,5,0,3,0,2,43,1,5,6,NA +69448,7,2,1,3,NA,4,4,1,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8892.687359,9565.637195,2,96,4,4,0.4,7,7,3,2,0,2,25,1,2,5,NA +69449,7,2,1,46,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,19260.892847,19877.962631,2,97,12,12,NA,3,3,0,0,0,1,33,1,4,5,NA +69450,7,2,2,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,93796.829073,96460.739647,1,100,15,15,5,4,4,1,1,0,1,40,1,5,1,5 +69451,7,2,1,67,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,7736.56115,7765.677873,1,99,13,13,NA,3,3,0,0,2,1,67,1,2,1,2 +69452,7,2,2,56,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,175633.860713,177416.12057,3,91,15,15,5,4,4,0,0,1,1,60,NA,NA,4,NA +69453,7,2,2,54,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,171038.159272,172773.783849,3,92,15,15,5,3,3,0,0,0,1,56,NA,NA,1,4 +69454,7,2,2,13,NA,4,4,2,13,158,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,6,6,0.96,5,5,0,4,0,2,36,1,4,4,NA +69455,7,2,2,80,NA,3,3,1,NA,NA,2,NA,2,1,4,NA,3,1,NA,1,2,1,1,2,2,1,2,1,NA,49191.372812,61149.379277,3,91,15,15,3.33,6,6,0,2,2,1,80,2,3,1,3 +69456,7,2,1,52,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,21897.080981,21827.239495,3,91,12,12,NA,2,2,0,0,0,1,52,1,5,1,5 +69457,7,2,1,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,13408.721263,14895.884708,2,98,3,3,0.68,2,2,0,0,2,1,80,1,1,1,3 +69458,7,2,1,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,167711.394252,174540.034956,1,101,7,7,2.31,2,2,0,0,1,1,60,1,3,1,3 +69459,7,2,2,62,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,15207.312407,15896.113669,2,98,10,10,4.43,2,2,0,0,1,2,47,1,4,3,NA +69460,7,2,1,50,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,23259.140149,22917.848121,1,90,10,10,2.44,5,5,1,0,0,2,56,2,1,1,1 +69461,7,2,2,13,NA,5,6,2,13,160,NA,NA,1,1,NA,7,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,6937.463063,7205.726089,3,90,15,15,3.23,6,6,0,2,0,1,50,2,2,1,2 +69462,7,2,2,36,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,4,2,1,2,2,1,2,2,1,2,2,1,42468.064168,42460.239922,2,101,6,6,0.96,5,5,0,4,0,2,36,1,4,4,NA +69463,7,2,1,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,27771.028362,29170.559675,3,92,4,4,1.16,2,2,0,0,0,2,20,1,2,1,2 +69464,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105261.096488,116369.237268,1,97,15,15,5,4,4,0,2,0,1,39,1,5,1,NA +69465,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,134213.669088,137679.44344,1,93,7,7,2.16,3,3,0,1,0,2,50,1,5,3,NA +69466,7,2,2,8,NA,5,6,1,8,99,NA,NA,1,1,NA,3,NA,NA,NA,1,1,1,1,2,1,1,2,1,1,6189.617175,6489.651129,2,92,5,5,0.64,7,7,1,2,1,1,66,2,1,1,3 +69467,7,1,1,3,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,77702.196479,0,1,95,14,14,3.8,4,4,1,1,0,1,36,1,4,1,5 +69468,7,2,1,69,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,7736.56115,7645.782314,1,99,6,6,1.84,2,2,0,0,2,1,69,1,3,1,4 +69469,7,2,1,0,2,1,1,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8519.229538,8996.978293,1,101,4,4,0.57,5,5,1,2,0,1,28,2,1,1,1 +69470,7,2,2,4,NA,1,1,1,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,16462.187772,17500.018071,3,92,7,7,1.3,5,5,1,2,0,2,33,2,2,1,1 +69471,7,2,2,72,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,62212.598767,64340.261278,1,94,14,14,5,2,2,0,0,2,2,72,1,5,1,5 +69472,7,2,1,38,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,18544.003944,19241.239961,2,100,14,14,3.76,4,4,1,1,0,1,38,1,3,1,NA +69473,7,2,1,79,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,8456.543684,8955.478754,2,92,7,7,3.13,1,1,0,0,1,1,79,1,1,2,NA +69474,7,2,2,66,NA,2,2,1,NA,NA,2,NA,2,2,5,NA,3,5,NA,2,2,2,1,2,2,1,2,2,2,11570.073931,12052.956309,2,93,7,7,1.74,4,4,1,0,1,2,24,NA,NA,4,NA +69475,7,2,2,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,20430.250572,20111.803159,2,97,5,5,0.76,5,5,0,0,0,2,50,1,4,5,NA +69476,7,2,2,0,5,4,4,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4584.111679,4588.281921,1,97,4,4,0.46,7,7,3,3,0,2,31,1,3,1,NA +69477,7,2,1,0,5,1,1,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6390.260233,6748.618798,2,96,6,6,1,6,6,1,3,0,2,32,2,1,6,NA +69478,7,2,2,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,34874.122648,37228.07844,3,92,6,6,0.74,7,7,2,1,0,2,46,1,2,1,4 +69479,7,2,2,47,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,1,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,25713.328161,27565.019075,2,103,10,10,1.63,7,7,1,4,0,1,31,NA,NA,1,4 +69480,7,2,2,1,13,4,4,1,NA,13,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8480.466509,9125.076563,1,100,9,9,2.22,5,5,1,2,0,2,40,2,4,1,4 +69481,7,2,2,0,2,1,1,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6787.112205,7016.795893,2,94,7,7,1.33,6,6,2,0,0,2,32,2,2,1,2 +69482,7,2,2,3,NA,1,1,1,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17282.036547,19079.832121,2,102,3,3,0.45,4,4,2,0,0,1,21,2,2,6,NA +69483,7,2,1,4,NA,5,6,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10273.602479,10769.310961,1,92,77,77,NA,4,4,1,1,0,2,40,2,4,1,4 +69484,7,2,1,17,NA,1,1,1,17,210,2,NA,2,2,5,10,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,19996.544021,19901.195571,1,100,7,7,1.3,5,5,0,3,0,1,43,2,2,1,4 +69485,7,2,1,0,0,3,3,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22329.779038,21943.745693,1,98,14,14,3.93,3,3,1,0,0,1,36,1,5,1,5 +69486,7,2,2,67,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,11679.736252,12242.903977,1,103,8,8,4.66,1,1,0,0,1,2,67,1,5,3,NA +69487,7,2,1,21,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14852.674152,15542.18804,1,92,14,14,3.9,4,4,0,0,0,2,55,2,5,1,5 +69488,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,33784.29429,34197.992084,1,95,5,5,1.3,3,3,0,0,1,2,19,1,3,NA,NA +69489,7,2,2,62,NA,5,6,2,NA,NA,2,NA,2,2,7,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,12941.046565,13394.767563,1,93,9,9,5,1,1,0,0,1,2,62,2,4,5,NA +69490,7,2,1,0,3,3,3,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22795.485282,22401.400888,1,95,9,9,2.46,4,4,2,0,0,2,25,1,5,1,5 +69491,7,2,2,67,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,1,2,NA,2,2,2,2,2,2,2,2,2,2,10490.803278,11351.088215,1,93,2,2,0.77,1,1,0,0,1,2,67,2,1,2,NA +69492,7,2,2,15,NA,4,4,2,15,185,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18163.985724,18905.695776,2,101,3,3,0.54,3,3,0,2,0,2,36,1,3,5,NA +69493,7,2,2,17,NA,4,4,2,17,209,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11711.384457,11558.024533,2,95,7,7,1.74,4,4,0,2,0,2,47,1,5,4,NA +69494,7,2,2,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,97803.500399,98104.213748,1,95,8,8,1.28,7,7,1,4,0,1,32,1,3,1,3 +69495,7,2,2,22,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,3,5,2,1,2,2,1,2,2,1,2,2,3,16239.242782,18510.062544,2,101,9,3,1.1,4,1,0,0,0,1,22,2,3,5,NA +69496,7,2,2,9,NA,4,4,2,9,116,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7631.175557,7916.380746,1,93,12,12,NA,3,3,0,1,0,2,48,1,3,5,NA +69497,7,2,1,26,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,13749.764747,14666.581717,1,90,4,4,0.78,4,4,0,0,1,1,69,2,4,1,3 +69498,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,59510.728426,64021.429591,1,99,15,15,5,3,3,1,0,0,2,31,1,5,1,5 +69499,7,2,2,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,129098.803996,133683.475932,1,93,15,15,5,4,4,0,2,0,2,42,1,5,1,NA +69500,7,2,1,19,NA,3,3,2,19,234,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,119111.433099,126517.990149,2,94,15,15,3.82,5,5,0,0,0,1,50,1,5,1,5 +69501,7,2,2,0,9,3,3,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10692.488346,11331.751026,1,101,2,2,0.22,4,4,1,0,0,2,25,1,4,6,NA +69502,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,143785.115498,143265.966876,1,99,8,8,4.87,1,1,0,0,0,2,50,1,5,5,NA +69503,7,2,2,44,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,35933.117795,41598.834451,1,98,2,2,0.72,1,1,0,0,0,2,44,1,4,3,NA +69504,7,2,1,54,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,13002.519536,13402.475336,1,93,15,15,4.59,4,4,0,2,0,2,45,1,5,1,5 +69505,7,2,1,60,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,6,NA,1,2,2,1,2,2,1,2,2,1,27043.072221,28144.17465,2,101,5,4,1.47,2,1,0,0,1,2,49,1,3,6,NA +69506,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,12256.510142,13615.881666,1,95,4,4,1.26,2,2,0,0,2,1,80,1,1,1,3 +69507,7,2,2,8,NA,5,6,2,8,103,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10699.45895,11290.011566,1,97,10,10,3.67,3,3,0,1,0,1,47,1,5,1,5 +69508,7,2,1,37,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,34887.439952,36924.956604,2,94,7,7,1.04,7,7,0,3,0,1,37,2,1,1,3 +69509,7,2,1,62,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,1,1,NA,2,2,2,2,2,2,1,2,2,NA,8609.250304,11228.904188,2,90,4,4,1.02,2,2,0,0,1,1,62,2,1,1,1 +69510,7,2,1,19,NA,5,6,1,19,234,2,NA,2,1,4,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6558.308393,6849.983523,2,92,99,1,0.28,4,1,0,0,0,1,19,1,4,NA,NA +69511,7,2,2,65,NA,4,4,1,NA,NA,2,NA,2,2,3,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,13998.494211,15077.525188,2,102,4,4,1.38,1,1,0,0,1,2,65,2,2,4,NA +69512,7,2,1,17,NA,3,3,2,17,213,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17395.533107,17264.495057,1,99,13,13,NA,4,4,0,2,0,1,55,NA,NA,1,4 +69513,7,2,1,46,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18533.049642,19320.837782,1,99,15,15,5,4,4,0,2,0,2,46,1,5,1,5 +69514,7,2,1,57,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,20181.678047,20475.67653,1,96,14,14,3.99,4,4,0,0,0,2,53,1,3,1,4 +69515,7,2,1,63,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,6612.194774,6663.917441,2,99,7,7,3.55,2,1,0,0,1,1,63,1,4,3,NA +69516,7,2,1,74,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,52448.388619,54438.22579,1,99,9,9,3.74,2,2,0,0,2,2,73,1,3,1,4 +69517,7,2,1,0,7,4,4,2,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6246.568228,6652.820193,2,95,7,7,1.41,5,5,2,0,0,2,53,1,3,3,NA +69518,7,2,2,2,NA,4,4,2,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6226.488588,6570.976462,2,99,6,6,1.03,6,6,3,0,0,1,33,1,3,6,NA +69519,7,2,2,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,NA,NA,NA,NA,19520.240895,25185.107635,2,95,1,1,0.13,2,2,1,0,0,2,22,1,3,5,NA +69520,7,2,1,22,NA,3,3,1,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,101419.325386,103801.228657,1,100,15,15,4.63,5,5,0,0,0,1,51,1,5,1,3 +69521,7,2,1,74,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,48254.793439,51076.634961,2,100,6,6,2.04,2,2,0,0,2,1,74,1,4,1,3 +69522,7,2,2,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,52701.331723,53172.517558,1,94,3,3,0.39,6,6,2,2,0,2,25,1,4,1,2 +69523,7,2,1,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,128128.531162,128588.454794,1,99,8,8,4.78,1,1,0,0,0,1,56,1,3,3,NA +69524,7,2,2,61,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,9518.80186,9943.806181,2,100,12,12,NA,2,2,0,0,1,1,56,1,5,1,4 +69525,7,2,2,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,18462.756377,17957.474615,1,99,2,2,0.48,2,2,0,0,0,2,44,1,3,1,3 +69526,7,2,1,19,NA,3,3,2,19,239,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,22313.526699,26201.091455,1,99,5,2,0.73,4,1,0,0,0,1,19,1,3,NA,NA +69527,7,2,1,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,39031.957066,39638.562591,1,95,6,4,1.47,2,1,0,0,0,2,28,1,2,6,NA +69528,7,2,1,80,NA,1,1,1,NA,NA,1,2,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,14673.422679,14929.213705,2,98,15,15,4.56,4,4,0,0,3,1,80,1,1,1,NA +69529,7,2,1,7,NA,3,3,2,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,82670.203859,85919.643529,1,97,14,14,3.36,4,4,0,2,0,2,49,1,5,1,5 +69530,7,2,2,9,NA,3,3,2,9,119,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,46660.163959,46766.329713,1,98,15,15,5,5,5,0,1,0,1,53,1,5,1,5 +69531,7,2,1,28,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,26847.643051,28917.411142,1,98,1,1,0.16,3,3,1,0,0,1,28,1,2,6,NA +69532,7,2,2,47,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,3,1,NA,1,2,2,1,2,1,NA,NA,NA,NA,19150.604366,19847.194665,3,91,6,6,1.81,3,3,0,1,0,2,47,2,3,1,3 +69533,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,114993.808573,116714.079488,1,98,7,4,1.34,4,1,0,0,0,2,20,NA,NA,5,NA +69534,7,2,2,29,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,21503.272394,20634.528185,1,96,14,10,5,2,1,0,0,0,2,29,1,5,6,NA +69535,7,2,1,16,NA,5,7,1,16,199,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,59545.101745,58762.542429,1,102,8,8,1.6,7,7,0,4,0,2,39,1,4,1,4 +69536,7,2,2,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,126314.769628,126413.297707,2,95,8,8,3.53,2,2,0,0,0,1,57,1,4,1,4 +69537,7,2,1,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,140932.152825,151077.472396,1,97,15,15,5,4,4,0,2,0,1,49,1,5,1,5 +69538,7,2,2,19,NA,2,2,2,20,NA,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,26657.121865,27597.898247,1,97,15,15,5,3,3,0,0,0,1,45,1,4,1,3 +69539,7,2,1,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,27199.141352,29187.923269,1,98,7,7,3.58,1,1,0,0,1,1,80,1,2,2,NA +69540,7,2,2,45,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,24625.307946,26182.57746,1,93,2,2,0.61,2,2,0,0,1,2,45,2,3,5,NA +69541,7,2,1,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,34952.089182,39581.970787,1,94,4,4,1,3,3,0,1,0,2,41,1,4,5,NA +69542,7,2,2,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,3,1,2,2,1,2,2,1,2,2,1,35126.205635,36772.568368,1,95,3,3,0.65,3,3,1,0,0,1,58,1,3,3,NA +69543,7,2,2,9,NA,4,4,1,9,112,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,9139.784234,9759.758014,2,100,14,14,4.86,3,3,0,1,0,2,41,1,4,3,NA +69544,7,1,2,16,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,10,NA,NA,NA,2,2,2,1,2,2,NA,NA,NA,NA,20347.899985,0,2,94,5,5,0.65,6,6,0,2,0,1,53,NA,NA,6,NA +69545,7,2,2,17,NA,3,3,1,17,213,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,30430.369428,31312.351655,3,92,5,5,1.03,4,4,0,3,0,1,55,1,4,4,NA +69546,7,2,1,26,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,15976.466658,17286.192057,2,96,8,6,2.39,2,1,0,0,0,1,26,2,5,5,NA +69547,7,2,1,73,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,8543.37447,9341.78969,3,90,14,14,2.97,5,5,0,2,1,1,73,2,3,2,NA +69548,7,2,2,7,NA,4,4,2,8,96,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7710.907339,8302.634544,2,99,77,77,NA,4,4,1,1,0,2,47,1,2,77,NA +69549,7,2,2,66,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,10429.953328,10902.368494,2,94,15,15,3.42,6,6,0,1,2,1,40,1,3,1,4 +69550,7,2,1,58,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,16287.780872,16385.890285,1,96,9,6,2.3,2,1,0,0,0,1,58,1,4,6,NA +69551,7,2,2,12,NA,4,4,1,12,148,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11838.873374,11750.256617,2,97,15,15,5,4,4,0,2,0,1,47,NA,NA,6,NA +69552,7,2,2,64,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,1,10999.00871,11813.793381,2,98,6,6,1.25,4,4,1,0,1,1,46,1,2,6,NA +69553,7,2,2,70,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,1,1,NA,2,2,2,1,2,2,2,2,2,NA,15705.521724,21678.922586,3,90,12,12,NA,6,6,0,0,2,1,70,2,1,1,1 +69554,7,2,1,6,NA,4,4,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9344.689579,9327.120408,2,96,4,4,0.4,7,7,3,2,0,2,25,1,2,5,NA +69555,7,2,2,11,NA,4,4,2,11,136,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7282.523598,7776.514874,2,99,6,6,1.11,5,5,0,4,0,2,34,1,4,5,NA +69556,7,2,1,59,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,17738.246838,17681.670095,2,95,2,2,0.54,1,1,0,0,0,1,59,1,4,1,NA +69557,7,2,2,0,1,1,1,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8775.375504,8746.463784,2,102,14,14,3.58,4,4,2,0,0,1,25,2,3,1,1 +69558,7,2,2,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,47973.37979,51335.952518,1,101,7,7,1.82,4,4,2,0,0,2,27,1,2,1,3 +69559,7,2,2,8,NA,1,1,2,8,100,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,14300.71869,15541.734563,2,94,9,9,2.1,5,5,1,2,0,1,31,2,4,1,4 +69560,7,2,1,26,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,2,5,NA,2,2,2,2,2,2,1,2,2,1,59682.963348,66132.07802,2,102,5,5,1.08,3,3,0,0,0,1,55,2,1,5,NA +69561,7,2,2,19,NA,4,4,1,20,NA,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11224.041366,11682.365019,1,96,5,5,0.53,7,7,2,2,0,2,38,1,9,6,NA +69562,7,2,1,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,1,2,NA,27199.141352,29187.923269,1,98,77,77,NA,2,2,0,0,2,1,80,1,1,1,3 +69563,7,2,2,5,NA,5,6,1,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8818.200077,8968.551716,2,102,6,6,1.52,4,4,2,0,0,1,30,2,4,1,4 +69564,7,2,1,37,NA,4,4,2,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,18743.758781,18547.428727,1,98,15,15,5,6,6,3,0,0,1,37,2,5,1,4 +69565,7,2,2,6,NA,2,2,2,6,72,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10276.40638,10435.186077,2,99,99,99,NA,5,3,0,1,0,1,40,2,1,6,NA +69566,7,2,1,80,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,1,1,NA,2,2,2,2,2,2,2,2,1,NA,13654.270555,14429.301836,2,93,12,12,NA,2,2,0,0,2,2,79,NA,NA,1,1 +69567,7,2,2,10,NA,3,3,1,10,122,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18800.96526,18551.296274,1,94,3,3,0.37,5,5,0,3,0,2,29,1,4,4,NA +69568,7,2,2,11,NA,4,4,1,11,137,NA,NA,2,2,2,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8504.389189,8822.229593,2,93,6,6,2.24,3,1,1,1,0,2,31,2,3,3,NA +69569,7,2,2,61,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,1,1,2,1,1,2,1,3,17243.546687,17909.086027,1,92,6,6,2.15,2,2,0,0,2,2,61,2,4,1,5 +69570,7,1,1,8,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,73471.277275,0,2,101,8,8,1.72,5,5,0,3,0,1,37,1,3,1,3 +69571,7,2,1,19,NA,5,6,2,20,NA,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11506.937395,12032.024805,1,97,7,7,1.48,5,5,0,1,0,2,46,2,4,1,NA +69572,7,2,2,42,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,4,4,2,1,2,1,1,2,1,NA,NA,NA,NA,13568.706187,14164.354411,3,90,8,4,1.61,3,1,0,0,1,2,68,2,2,4,NA +69573,7,2,2,53,NA,4,4,2,NA,NA,2,NA,2,1,5,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,15790.702799,16017.473382,2,90,2,2,0.48,2,2,0,0,1,2,53,2,3,1,3 +69574,7,2,2,48,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,163194.688032,166437.638141,1,97,14,14,4.96,2,2,0,0,0,1,59,1,4,1,5 +69575,7,2,2,12,NA,4,4,2,12,146,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11990.226944,12023.57752,2,95,2,2,0.26,3,3,0,2,0,2,31,1,3,5,NA +69576,7,2,1,2,NA,4,4,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7325.198118,7777.386797,1,100,13,13,NA,5,5,2,0,0,2,54,1,4,5,NA +69577,7,2,1,52,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,18061.358948,18207.066814,2,93,3,3,1.07,1,1,0,0,0,1,52,1,3,4,NA +69578,7,2,2,6,NA,1,1,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15962.145468,16412.026403,2,98,2,2,0.35,3,3,0,1,0,2,43,1,3,1,3 +69579,7,2,2,4,NA,5,6,1,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6443.362832,6436.557151,1,98,15,15,5,3,3,1,0,0,2,37,2,5,1,5 +69580,7,2,1,23,NA,4,4,2,NA,NA,2,NA,2,2,2,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,20623.434727,20610.774284,1,93,3,3,0.7,3,3,1,0,0,1,23,2,4,1,2 +69581,7,2,1,23,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,16386.190684,16050.428992,2,90,15,15,5,4,4,0,0,0,1,57,2,5,1,5 +69582,7,2,2,30,NA,2,2,2,NA,NA,2,NA,2,7,77,NA,1,2,2,2,2,2,2,2,2,NA,NA,NA,NA,35353.005268,36684.976775,2,94,1,1,0.01,7,7,1,3,0,1,41,2,1,1,1 +69583,7,2,2,4,NA,5,6,2,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4962.240532,5199.576603,2,94,77,77,NA,6,6,2,0,0,2,18,1,3,NA,NA +69584,7,2,2,0,6,5,7,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14926.308861,15231.47558,1,102,8,8,1.91,5,5,1,2,0,2,38,1,5,1,4 +69585,7,1,2,4,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,53166.229434,0,1,98,7,7,1,7,7,2,2,0,2,34,1,4,3,NA +69586,7,2,1,2,NA,4,4,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5255.532858,5319.636438,2,99,1,1,0.1,3,3,1,1,0,2,28,1,4,5,NA +69587,7,2,1,77,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,16823.6553,18089.239033,1,92,9,9,3.64,2,2,0,0,2,1,77,1,5,1,4 +69588,7,2,2,10,NA,3,3,2,10,127,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,66519.408962,66670.760406,1,98,15,15,4.34,4,4,0,2,0,1,51,1,5,1,5 +69589,7,2,1,0,2,3,3,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17418.612172,17117.482228,1,91,14,14,5,3,3,1,0,0,2,30,1,5,1,5 +69590,7,2,2,19,NA,4,4,2,19,235,2,NA,1,1,NA,14,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12857.456314,12921.235691,2,97,4,4,0.81,4,4,1,0,0,2,51,1,2,4,NA +69591,7,2,2,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,122655.643802,123752.268878,3,91,15,15,5,2,2,0,0,0,1,29,1,4,1,4 +69592,7,1,1,16,NA,1,1,NA,NA,NA,NA,NA,2,2,3,10,NA,NA,NA,1,2,2,1,2,1,NA,NA,NA,NA,18242.832494,0,1,102,99,99,NA,5,5,0,2,1,1,52,2,1,1,1 +69593,7,2,2,1,18,4,4,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6306.491784,6469.273037,2,90,9,9,3.18,3,3,1,0,0,2,38,2,4,5,NA +69594,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,22911.854766,1,95,6,6,1.09,5,5,0,3,0,1,31,1,4,1,4 +69595,7,2,2,3,NA,3,3,2,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,68579.013834,75690.025736,1,95,15,15,5,3,3,1,0,0,1,26,1,3,1,4 +69596,7,2,1,64,NA,4,4,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,8219.195224,8283.488354,2,99,10,10,3.67,3,3,0,0,1,1,64,2,4,1,5 +69597,7,2,2,2,NA,5,6,2,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6075.554662,6069.137471,1,93,15,15,5,3,3,1,0,0,1,37,1,5,1,5 +69598,7,2,1,34,NA,3,3,2,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,87891.395784,90408.650551,2,99,15,15,5,1,1,0,0,0,1,34,2,5,1,NA +69599,7,2,1,21,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,2,5,NA,2,2,2,1,2,2,1,2,2,1,42077.383821,44759.048785,1,102,4,4,0.67,4,4,0,1,0,1,23,2,4,5,NA +69600,7,2,1,65,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,6815.198656,6994.794831,3,90,15,15,4.34,4,4,0,0,1,1,65,1,3,1,4 +69601,7,2,1,17,NA,3,3,2,17,205,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,93665.036597,95017.313859,1,91,15,15,5,4,4,0,1,0,1,45,1,5,1,5 +69602,7,2,1,49,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,1,2,2,1,2,1,1,2,NA,35406.972937,36103.049421,2,98,3,3,0.98,2,2,0,0,1,2,80,1,1,2,NA +69603,7,2,1,26,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,2,1,NA,2,2,2,2,2,2,2,2,2,2,35669.2076,40318.090187,2,94,4,4,0.81,4,4,2,0,0,1,26,2,2,1,2 +69604,7,2,2,1,15,1,1,1,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,1,1,2,1,NA,NA,NA,NA,14326.094268,14518.763259,3,92,4,4,0.65,4,4,2,0,0,2,24,2,2,1,2 +69605,7,2,1,12,NA,2,2,1,12,147,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14532.438529,15479.089621,2,93,14,14,3.52,5,5,1,2,0,1,44,1,5,1,5 +69606,7,2,2,11,NA,4,4,1,11,137,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10332.067017,11124.939356,1,100,12,12,NA,3,3,0,1,0,2,52,1,3,1,3 +69607,7,2,1,40,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,2,2,1,32980.717958,34025.01199,1,95,2,2,0.46,1,1,0,0,0,1,40,1,1,3,NA +69608,7,2,1,75,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,9662.124837,10964.58979,2,92,4,4,1.43,1,1,0,0,1,1,75,1,3,3,NA +69609,7,2,1,4,NA,4,4,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9885.965005,10187.921537,2,97,3,1,0.44,3,1,2,0,0,2,20,1,2,5,NA +69610,7,2,1,20,NA,5,7,1,NA,NA,2,NA,2,1,5,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14385.653726,15040.55165,2,101,99,2,0.46,4,1,0,0,0,1,18,NA,NA,NA,NA +69611,7,2,2,8,NA,5,7,1,8,98,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9007.62445,9504.796896,2,102,15,15,5,4,4,0,2,0,1,39,1,4,1,5 +69612,7,2,2,15,NA,3,3,1,16,192,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,74658.856536,76060.378937,1,100,15,15,5,5,5,0,3,0,1,47,1,5,1,5 +69613,7,2,2,27,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,11739.283384,12336.327776,1,96,15,15,5,5,5,0,0,0,1,58,2,5,1,5 +69614,7,1,1,3,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,81535.075159,0,3,91,8,8,3.4,2,2,1,0,0,2,31,1,3,5,NA +69615,7,2,2,12,NA,1,1,1,12,150,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18515.058419,19360.671834,2,96,6,6,1.12,4,4,0,3,0,1,26,1,2,77,NA +69616,7,2,2,26,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,44672.331977,48301.986589,1,98,2,2,0.36,5,5,3,0,0,1,25,1,3,1,3 +69617,7,2,2,25,NA,3,3,1,NA,NA,2,NA,2,1,6,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,97925.559493,100647.602701,1,91,15,6,2.69,3,1,0,0,0,1,27,1,3,6,NA +69618,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,127865.461733,133989.246473,1,101,4,4,1.26,2,2,0,0,2,1,62,1,3,1,3 +69619,7,2,2,41,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,4,2,1,2,2,1,2,2,1,2,2,1,19075.861607,18884.31701,1,96,6,6,1.21,4,4,0,2,0,2,41,1,4,4,NA +69620,7,2,1,49,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,31347.36219,32073.699502,2,90,6,6,1.21,4,4,0,0,0,2,59,2,1,6,NA +69621,7,2,1,35,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,30631.476666,32101.68524,2,101,8,8,2.81,3,3,0,1,0,1,35,1,1,1,2 +69622,7,2,1,58,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,27140.404673,28346.53309,1,95,5,5,1.1,3,3,0,0,1,2,63,1,4,1,5 +69623,7,2,1,4,NA,1,1,1,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19411.993395,20798.549942,1,92,14,14,3.9,4,4,2,0,0,2,29,1,4,1,4 +69624,7,2,1,67,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,3,1,NA,1,2,1,NA,NA,NA,1,2,1,3,11145.558675,11717.869264,2,96,NA,NA,NA,4,4,0,0,1,1,67,2,3,1,2 +69625,7,2,1,29,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,111450.827181,113182.912698,3,91,15,15,5,2,2,0,0,0,1,29,1,4,1,4 +69626,7,2,1,53,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,161135.312305,165002.304429,2,101,14,14,4.86,3,3,0,1,0,1,53,1,4,1,5 +69627,7,2,2,13,NA,4,4,2,13,158,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13621.250519,13659.137754,1,93,7,7,2.72,2,2,0,1,0,2,45,1,3,3,NA +69628,7,2,1,9,NA,1,1,1,9,116,NA,NA,2,2,3,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,14821.597351,14686.992722,3,92,6,6,0.86,7,7,1,4,0,2,36,2,1,1,1 +69629,7,2,2,16,NA,3,3,1,16,197,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,39616.634313,47751.80094,2,101,3,3,0.59,4,3,0,2,0,1,39,1,1,6,NA +69630,7,2,1,23,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,33592.259589,34050.148008,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +69631,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,1,2,1,2,2,1,2,2,NA,29183.224814,31443.714435,1,95,7,7,2.3,3,3,0,0,2,1,80,1,4,1,2 +69632,7,2,1,44,NA,2,2,2,NA,NA,1,2,2,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,31640.296506,32279.766727,2,94,8,8,2.01,4,4,1,1,0,1,44,2,4,1,4 +69633,7,2,1,38,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,42890.643983,44071.237251,1,98,15,15,5,5,5,2,1,0,1,38,1,4,1,5 +69634,7,2,2,14,NA,2,2,2,14,174,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22139.315046,23195.552704,3,91,6,6,0.83,6,6,1,3,0,1,37,1,4,1,4 +69635,7,2,2,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,175997.804296,180603.118894,1,101,7,7,2.31,2,2,0,0,1,1,60,1,3,1,3 +69636,7,2,1,2,NA,2,2,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10136.626471,10860.663695,2,100,14,14,3.58,4,4,1,1,0,1,33,1,4,1,5 +69637,7,2,2,58,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,189736.955264,194603.321172,1,98,6,6,2.6,1,1,0,0,0,2,58,1,2,3,NA +69638,7,2,2,5,NA,3,3,1,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,53166.229434,56500.79967,1,98,7,7,1.66,5,5,2,1,0,2,37,1,5,1,3 +69639,7,1,1,48,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,20480.361635,0,2,99,6,6,2.69,1,1,0,0,0,1,48,1,3,5,NA +69640,7,2,2,23,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,14883.664782,16969.371761,2,90,5,5,1.08,3,3,1,1,0,2,23,1,2,5,NA +69641,7,2,2,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,1,2,1,2,2,NA,NA,NA,NA,109290.289961,113677.204054,1,100,15,15,5,3,3,0,0,0,1,53,1,5,1,4 +69642,7,2,1,19,NA,4,4,1,19,232,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,17558.40257,2,101,1,1,0.05,1,1,0,0,0,1,19,1,4,NA,NA +69643,7,2,2,0,10,1,1,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7565.515117,8068.295221,1,100,3,3,0.43,4,4,2,0,0,1,20,1,3,6,NA +69644,7,2,2,64,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11838.431472,12295.352662,2,94,8,8,1.8,6,6,0,1,2,1,74,2,5,1,5 +69645,7,2,2,71,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,49260.413155,50771.674072,2,100,99,99,NA,2,2,0,0,2,1,72,1,4,1,4 +69646,7,2,1,9,NA,3,3,2,10,120,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,82670.203859,86374.629723,1,97,15,15,4.07,5,5,0,3,0,1,36,1,5,1,5 +69647,7,2,1,36,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,113559.363135,116811.761686,2,102,14,14,3.44,5,5,1,2,0,2,34,1,4,6,NA +69648,7,2,1,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,32471.790416,34987.007487,2,96,15,15,5,2,2,0,0,2,2,80,2,5,1,5 +69649,7,2,1,27,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,5,5,NA,1,2,2,1,2,2,1,2,2,3,14385.653726,15533.829525,2,101,7,5,1.84,2,1,0,0,0,1,27,2,5,5,NA +69650,7,2,1,48,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,36244.629058,36151.524069,1,98,3,2,0.72,2,1,0,0,0,2,50,1,2,3,NA +69651,7,2,1,15,NA,4,4,1,15,185,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18120.648572,18071.489418,1,100,3,3,0.43,4,4,0,2,0,1,35,1,4,1,3 +69652,7,2,2,74,NA,5,6,1,NA,NA,2,NA,2,1,8,NA,1,1,NA,1,2,1,1,2,1,1,2,1,NA,12833.793728,13278.897744,2,92,5,5,1.08,3,3,0,0,2,1,46,NA,NA,5,NA +69653,7,2,1,28,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,108410.783716,111887.48668,1,94,9,9,3.14,3,3,1,0,0,1,28,1,5,1,5 +69654,7,2,1,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,23484.626749,23642.328802,2,96,14,14,4.26,3,3,0,0,0,1,20,1,4,5,NA +69655,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,22419.63376,27869.657009,1,94,2,2,0.72,1,1,0,0,1,2,80,1,3,2,NA +69656,7,2,1,62,NA,4,4,2,NA,NA,2,NA,2,2,7,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,7514.993062,8122.478925,2,90,14,14,4.59,3,3,0,0,1,2,56,2,3,1,1 +69657,7,2,2,4,NA,5,6,1,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6899.969666,7508.722153,1,94,10,10,3.04,4,4,2,0,0,2,30,1,4,1,5 +69658,7,2,1,73,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,11034.04089,11250.172797,2,95,14,14,5,2,2,0,0,2,1,73,1,5,1,4 +69659,7,2,2,12,NA,3,3,2,12,151,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,42712.452836,43268.322789,2,94,3,3,0.95,2,2,0,1,0,2,45,1,5,3,NA +69660,7,2,1,66,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,NA,NA,NA,NA,NA,NA,NA,11764.405491,12715.399053,1,97,NA,NA,NA,7,7,0,3,1,2,47,NA,NA,1,NA +69661,7,2,2,57,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,1,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,11446.604914,11507.133903,2,92,3,3,0.45,4,4,0,0,1,1,64,2,1,1,1 +69662,7,2,2,7,NA,1,1,2,7,93,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14300.71869,14671.891945,2,94,7,7,1.17,6,6,1,2,0,2,30,2,3,6,NA +69663,7,2,1,3,NA,3,3,1,4,48,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,78932.667512,89054.807173,3,92,14,14,2.74,6,6,2,2,0,1,35,1,5,1,4 +69664,7,2,1,3,NA,5,6,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5492.796032,5932.072263,2,100,14,14,3.06,5,5,1,0,1,2,31,2,5,1,5 +69665,7,2,2,50,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,16058.142925,15728.179634,2,101,7,7,1.3,5,5,2,0,1,2,50,1,4,1,3 +69666,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,69047.315371,69784.701831,1,95,8,8,2.7,3,3,0,1,2,1,69,1,5,1,3 +69667,7,2,2,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,118611.064701,118209.809508,1,91,8,8,3.4,2,2,0,0,2,1,66,1,5,1,4 +69668,7,2,1,61,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,120367.559207,121496.443862,2,95,15,15,5,2,2,0,0,1,1,61,1,5,1,5 +69669,7,2,2,66,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,9680.216878,10112.428208,1,96,15,15,5,1,1,0,0,1,2,66,1,5,2,NA +69670,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,60148.377616,67408.231176,1,92,6,6,1.98,2,2,0,0,2,1,80,1,5,1,4 +69671,7,2,2,58,NA,2,2,2,NA,NA,2,NA,2,2,7,NA,1,4,NA,2,2,2,2,2,2,2,2,1,2,20130.149569,20235.126587,1,90,99,99,NA,6,6,0,3,0,2,58,2,1,4,NA +69672,7,2,1,69,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,2,1,NA,1,2,1,1,2,1,1,2,1,3,5201.567667,5487.342926,2,92,10,6,1.12,7,4,1,1,1,2,27,2,3,1,3 +69673,7,2,2,2,NA,4,4,2,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6247.52442,6810.692829,1,99,10,10,2.07,7,7,2,3,1,2,35,1,5,4,NA +69674,7,2,1,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,7736.56115,7645.782314,1,99,8,8,1.76,5,5,0,2,1,1,37,1,4,1,3 +69675,7,2,2,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,24977.658912,25631.246482,2,95,5,5,1.52,2,2,0,0,0,2,58,1,2,2,NA +69676,7,2,1,5,NA,2,2,1,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16632.464801,16654.939181,1,94,2,2,0.26,4,4,2,1,0,2,25,1,4,5,NA +69677,7,2,1,13,NA,4,4,1,13,157,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13653.432599,13680.426553,2,96,5,5,1.07,4,4,0,3,0,2,46,1,4,3,NA +69678,7,2,2,1,18,1,1,1,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13464.808163,14865.509546,1,101,9,9,2.88,3,3,1,0,0,1,36,2,2,1,4 +69679,7,2,1,26,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17420.978407,17095.959542,2,100,8,8,3.06,2,2,0,0,0,1,26,1,5,1,5 +69680,7,2,1,37,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,16058.989596,16124.245459,2,97,3,3,0.33,6,6,2,0,0,2,32,1,2,1,3 +69681,7,2,1,4,NA,4,4,2,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7299.892005,7750.518524,2,99,9,9,1.78,6,6,1,1,0,1,46,1,3,6,NA +69682,7,2,1,67,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,1,2,NA,2,2,2,2,2,2,1,2,2,NA,8609.250304,11228.904188,2,90,4,4,1.02,2,2,0,0,1,1,67,2,1,2,NA +69683,7,2,1,8,NA,4,4,2,9,108,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8017.552697,8398.795399,1,99,10,10,2.07,7,7,2,3,1,2,35,1,5,4,NA +69684,7,2,1,65,NA,2,2,1,NA,NA,2,NA,2,1,4,NA,1,3,NA,2,2,2,2,2,2,2,2,2,2,9404.30514,9554.950099,2,93,3,3,0.58,4,4,0,1,1,1,65,2,1,3,NA +69685,7,2,2,17,NA,1,1,1,17,205,2,NA,2,2,3,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,24654.448233,25524.546766,2,96,6,6,1.12,4,4,0,1,0,1,57,2,1,1,4 +69686,7,2,1,5,NA,1,1,1,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,17048.276421,17071.31266,1,94,5,5,0.87,4,4,1,1,0,1,35,2,1,1,1 +69687,7,2,2,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,141912.982157,152670.471787,1,97,15,15,5,4,4,0,2,0,1,49,1,5,1,5 +69688,7,2,1,42,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,NA,NA,NA,NA,17036.36313,17063.789746,1,99,14,14,5,1,1,0,0,0,1,42,1,4,4,NA +69689,7,2,1,51,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15413.22404,15402.649971,1,97,15,15,5,4,4,0,0,0,1,51,2,5,1,5 +69690,7,2,2,2,NA,3,3,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,37915.354974,40293.39486,2,91,15,15,5,5,5,1,2,0,1,37,1,4,6,NA +69691,7,2,2,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,24919.497762,30093.052062,1,101,1,1,0.08,6,6,0,1,0,1,51,1,2,5,NA +69692,7,2,1,50,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,29903.342494,29985.789069,1,95,12,5,1.84,4,1,0,0,0,1,29,1,3,6,NA +69693,7,2,2,37,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,43463.010321,44104.658285,1,98,6,6,1.98,2,2,0,1,0,2,37,1,4,3,NA +69694,7,2,1,15,NA,4,4,2,15,191,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9757.309092,9852.859452,1,96,15,15,5,6,6,1,1,1,2,44,1,3,1,3 +69695,7,2,1,80,NA,4,4,2,NA,NA,2,NA,2,1,9,NA,9,3,NA,1,2,2,1,2,2,1,2,2,NA,8992.410435,9337.125921,2,90,4,4,1.54,1,1,0,0,1,1,80,2,9,3,NA +69696,7,2,2,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,32144.824104,32969.273278,2,97,3,3,1.1,1,1,0,0,0,2,56,1,2,3,NA +69697,7,2,2,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,23845.8146,22808.13483,1,98,12,2,0.54,4,1,0,0,0,1,21,1,4,6,NA +69698,7,2,1,49,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,20851.046913,21183.857295,2,99,2,2,0.19,6,6,0,1,0,1,59,1,2,5,NA +69699,7,1,1,11,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10229.206765,0,1,96,14,14,2.96,5,5,0,3,0,1,46,NA,NA,1,5 +69700,7,2,2,27,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,3,5,3,1,2,2,1,2,2,NA,NA,NA,NA,42583.505439,50840.190326,2,91,2,2,0.42,3,3,1,1,0,2,27,1,3,5,NA +69701,7,2,1,52,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,17636.923031,17665.316481,2,101,6,6,2.57,1,1,0,0,0,1,52,1,3,5,NA +69702,7,2,1,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,97539.00155,100726.587936,1,99,77,77,NA,3,3,0,0,0,1,42,1,4,6,NA +69703,7,2,1,0,3,4,4,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7441.732415,8231.273488,2,98,3,3,0.54,3,3,1,1,0,2,29,1,2,1,NA +69704,7,2,1,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,109181.566304,114466.270601,2,91,15,3,1.07,7,1,0,0,1,1,49,NA,NA,5,NA +69705,7,2,1,42,NA,4,4,1,NA,NA,2,NA,2,2,4,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,17221.349323,18312.731752,1,103,8,8,1.95,4,4,0,1,0,2,48,1,5,1,5 +69706,7,2,2,9,NA,5,6,1,9,110,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,8290.163782,8692.019106,1,92,2,2,0.24,5,5,0,2,0,1,35,2,4,1,3 +69707,7,2,2,4,NA,4,4,2,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12736.176535,13301.727395,1,97,15,15,5,4,4,1,1,0,1,35,1,5,1,5 +69708,7,2,1,14,NA,1,1,1,14,179,NA,NA,1,1,NA,8,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,15469.666055,15805.71937,2,92,12,12,NA,7,7,0,1,2,2,64,2,1,2,NA +69709,7,2,2,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,128065.93322,127632.692688,1,98,4,4,1.52,1,1,0,0,1,2,62,1,4,5,NA +69710,7,2,2,28,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,51744.846443,52360.821424,1,97,4,3,0.93,3,2,0,0,0,1,35,1,3,1,4 +69711,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,16675.763807,16530.996917,2,95,8,8,1.85,5,5,1,2,0,1,55,1,2,1,3 +69712,7,2,1,31,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,23631.037942,24059.753898,1,91,9,9,5,1,1,0,0,0,1,31,2,5,5,NA +69713,7,2,1,17,NA,2,2,2,17,207,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,24585.624844,25010.051033,3,91,6,6,0.83,6,6,1,3,0,1,37,1,4,1,4 +69714,7,2,1,18,NA,4,4,1,18,226,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,17558.40257,2,101,99,99,NA,4,1,0,0,0,1,18,1,4,NA,NA +69715,7,2,2,43,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,4,2,1,2,2,1,2,2,1,2,2,1,15598.269927,16258.702668,1,99,4,4,0.41,7,7,2,4,0,2,43,1,4,4,NA +69716,7,2,2,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,126687.376548,128757.240271,1,101,9,9,2.6,4,4,0,1,2,2,63,1,4,1,4 +69717,7,2,1,3,NA,1,1,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19476.194601,19502.51153,2,102,15,15,5,4,4,2,0,0,1,32,1,5,1,5 +69718,7,2,2,15,NA,3,3,1,15,182,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,82923.224369,87859.866176,2,100,8,8,2.91,3,3,0,2,0,2,48,1,5,1,NA +69719,7,2,1,3,NA,3,3,2,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,31190.854587,34157.026846,1,95,2,2,0.22,4,4,2,1,0,2,22,1,2,5,NA +69720,7,2,2,3,NA,4,4,1,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10437.988787,10901.48845,2,100,5,5,0.94,4,4,2,0,0,2,33,1,4,6,NA +69721,7,2,2,22,NA,4,4,2,NA,NA,2,NA,2,2,2,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,14500.122872,13786.884381,1,96,10,10,1.8,7,7,1,1,0,1,57,2,1,1,3 +69722,7,2,1,7,NA,4,4,2,7,86,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7758.863533,8127.805304,2,99,3,3,0.42,6,6,1,2,0,2,43,1,4,6,NA +69723,7,2,2,9,NA,3,3,2,9,117,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22371.648216,22397.198208,1,92,4,4,0.61,5,5,1,2,0,1,34,1,3,6,NA +69724,7,2,1,58,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,28585.875492,28917.983006,1,91,5,5,1.2,3,3,0,0,1,1,58,1,2,1,2 +69725,7,2,1,5,NA,3,3,1,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,73006.119819,82368.252941,1,101,14,14,3.15,5,5,2,1,0,1,35,1,4,1,5 +69726,7,1,1,53,NA,3,3,NA,NA,NA,1,2,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,30582.99679,0,1,97,4,3,1.07,2,1,0,0,1,2,66,NA,NA,5,NA +69727,7,2,1,4,NA,5,6,1,4,55,NA,NA,2,2,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7042.228842,7250.600343,2,103,9,9,3.14,3,3,1,0,0,1,32,2,5,1,5 +69728,7,2,1,35,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,20071.705576,20986.552878,3,91,14,14,5,2,2,0,0,0,1,35,2,5,1,5 +69729,7,2,2,3,NA,4,4,2,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10389.292229,11178.99433,2,101,7,7,1.3,5,5,2,0,1,2,50,1,4,1,3 +69730,7,2,2,70,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,11194.560864,11943.741966,2,95,5,5,1.88,1,1,0,0,1,2,70,1,3,3,NA +69731,7,2,2,19,NA,4,4,1,19,236,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,99,1,0.02,2,1,0,0,0,2,19,1,4,NA,NA +69732,7,2,1,33,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,5,1,NA,1,2,1,1,2,1,1,2,1,3,12327.761744,13173.339372,3,90,5,5,0.93,4,4,1,0,0,1,48,2,4,1,NA +69733,7,1,1,67,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,7117.971973,0,2,100,3,3,0.75,2,2,0,0,2,1,67,1,3,1,2 +69734,7,2,1,26,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,74110.989124,87044.270957,2,95,8,8,4.48,1,1,0,0,0,1,26,1,5,5,NA +69735,7,2,1,20,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14949.232836,15463.9021,1,100,99,99,NA,6,6,0,1,0,1,53,2,2,1,3 +69736,7,2,2,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,29040.300396,28462.118402,2,101,1,1,0.14,4,1,0,0,0,2,21,1,4,5,NA +69737,7,2,1,31,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,58170.292683,59516.479078,3,92,8,8,2.36,3,3,0,0,0,1,34,NA,NA,5,NA +69738,7,2,2,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,34802.051557,35120.431265,1,101,4,4,1.16,2,2,0,1,0,2,51,1,4,3,NA +69739,7,2,2,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,17260.508485,17587.688781,2,97,6,6,1.02,6,6,1,2,0,1,37,1,3,1,3 +69740,7,2,2,66,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,95538.505899,97099.44822,2,95,14,14,5,2,2,0,0,2,2,66,1,5,1,5 +69741,7,2,1,10,NA,1,1,1,10,129,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,17882.621856,17720.218058,3,92,3,3,0.51,5,5,1,2,0,2,34,2,1,6,NA +69742,7,2,1,4,NA,5,6,2,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6138.820061,6885.358348,1,99,10,10,4.76,2,2,1,0,0,2,36,2,5,3,NA +69743,7,2,1,46,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,28813.038041,29276.135189,1,94,7,7,1.65,5,4,0,0,0,1,46,1,4,1,4 +69744,7,1,1,12,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13416.172328,0,1,96,14,14,2.96,5,5,0,3,0,1,46,NA,NA,1,5 +69745,7,2,2,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,27005.036261,27122.782392,1,102,1,1,0,3,1,0,0,1,2,50,1,3,3,NA +69746,7,2,2,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,115926.402585,118970.086068,1,101,15,15,5,4,4,0,2,0,1,43,1,4,1,5 +69747,7,2,2,34,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,2,1,2,2,NA,NA,NA,1,2,2,1,21229.081867,21886.919672,3,90,5,5,0.87,4,4,0,0,0,2,43,2,3,5,NA +69748,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,160743.928829,162404.084268,1,95,7,7,2.86,2,2,0,0,0,2,50,1,3,1,3 +69749,7,2,1,12,NA,2,2,2,12,151,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16033.31661,17623.465787,2,90,5,5,0.76,5,5,0,4,0,2,32,1,2,3,NA +69750,7,2,1,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,79677.823556,83116.431703,1,99,15,15,4.47,4,4,0,2,0,2,43,1,5,1,5 +69751,7,2,2,0,5,1,1,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6787.112205,6644.319314,2,94,8,8,2.43,3,3,1,0,0,1,24,2,4,1,4 +69752,7,2,2,79,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,81062.798322,83549.725067,1,101,4,4,1.75,1,1,0,0,1,2,79,1,5,2,NA +69753,7,2,2,63,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,14204.126838,14796.942631,1,92,5,5,1.41,2,2,0,0,2,1,60,1,2,1,4 +69754,7,2,1,80,NA,4,4,2,NA,NA,2,NA,2,1,5,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,10160.645851,10359.669924,1,96,77,77,NA,7,7,0,3,1,2,43,77,5,5,NA +69755,7,2,2,65,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,156359.467613,158914.124584,2,102,14,14,5,2,2,0,0,2,2,65,1,5,1,5 +69756,7,2,1,2,NA,4,4,1,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4778.010882,5139.584556,2,93,99,99,NA,7,6,1,0,0,1,19,1,3,NA,NA +69757,7,2,1,19,NA,5,7,2,19,229,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11125.932433,11147.929312,1,96,10,10,3.04,4,4,0,1,0,2,43,1,5,1,4 +69758,7,2,2,42,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,2,1,2,2,1,2,2,1,2,2,1,21127.557839,23583.17383,1,102,4,4,0.97,3,3,0,1,0,2,19,1,2,NA,NA +69759,7,2,1,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,32720.69734,32838.149884,1,95,1,1,0.18,1,1,0,0,0,1,51,1,4,4,NA +69760,7,2,1,5,NA,4,4,2,5,65,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9446.305539,9734.833129,2,95,1,1,0,4,4,2,1,0,2,27,1,4,5,NA +69761,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,14859.685983,15314.825218,1,95,3,3,0.96,1,1,0,0,1,2,80,1,2,2,NA +69762,7,2,1,15,NA,1,1,1,15,190,NA,NA,2,2,3,9,NA,NA,NA,2,1,2,1,2,2,2,2,2,2,24230.15013,23997.357824,3,92,6,6,0.86,7,7,1,4,0,2,36,2,1,1,1 +69763,7,2,2,6,NA,4,4,2,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7282.523598,8846.731146,2,99,5,5,0.76,5,5,0,2,0,1,51,1,2,1,2 +69764,7,2,2,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,18097.801029,17328.248994,2,100,14,14,3.06,5,5,1,0,0,1,50,1,5,1,5 +69765,7,2,2,65,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,3,3,NA,2,2,2,1,2,2,1,2,1,2,11495.911371,12347.505602,2,99,3,3,0.52,3,3,0,0,1,2,38,2,3,3,NA +69766,7,2,2,4,NA,2,2,1,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15979.952759,16987.381382,2,102,14,14,3.25,5,5,2,0,0,1,27,1,5,1,5 +69767,7,2,1,56,NA,5,7,2,NA,NA,1,2,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,151766.599459,154414.492891,3,91,7,7,1.97,4,4,0,0,1,2,77,1,5,2,NA +69768,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,23845.8146,23041.888919,1,98,3,2,0.81,4,1,0,0,0,2,21,NA,NA,5,NA +69769,7,2,2,0,4,3,3,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21671.775435,21083.121886,1,98,7,7,1.83,3,3,1,0,0,2,26,1,5,1,4 +69770,7,2,2,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,13495.651715,13078.348337,2,99,NA,77,NA,7,7,1,0,1,2,51,1,2,1,3 +69771,7,2,1,3,NA,5,6,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,10276.262805,11525.953064,1,97,15,15,5,4,4,2,0,0,2,35,2,4,1,4 +69772,7,2,2,3,NA,4,4,2,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11671.9972,12190.293097,1,93,4,4,1.03,3,3,1,1,0,2,35,2,3,4,NA +69773,7,2,1,40,NA,4,4,1,NA,NA,2,NA,2,1,5,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,23242.990557,23331.304109,1,100,9,9,2.22,5,5,1,2,0,2,40,2,4,1,4 +69774,7,2,1,54,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,2,1,NA,2,2,2,2,2,2,2,2,2,2,20167.650721,25203.089486,3,90,12,7,3.58,2,1,0,0,1,1,71,2,2,1,NA +69775,7,2,1,8,NA,2,2,2,8,101,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15039.041447,15306.786696,3,91,6,6,0.83,6,6,1,3,0,1,37,1,4,1,4 +69776,7,2,1,57,NA,1,1,2,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,22446.308035,22116.943066,2,94,7,7,1.33,6,6,0,1,0,1,55,2,2,1,1 +69777,7,2,2,60,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,10003.290711,10218.574955,1,99,10,10,2.58,5,5,0,1,2,1,65,1,5,1,3 +69778,7,2,1,42,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,3,6,NA,2,2,2,2,2,2,2,2,2,2,31640.296506,31576.726829,2,94,9,9,4.21,3,2,0,0,0,2,48,2,2,6,NA +69779,7,2,1,13,NA,3,3,2,13,166,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,77169.155154,76948.022242,1,98,14,14,3.9,4,4,0,3,0,2,31,1,4,1,NA +69780,7,2,1,9,NA,3,3,2,9,118,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,66868.503099,69864.859716,1,98,14,14,3.15,5,5,0,3,0,1,34,1,4,1,4 +69781,7,2,1,0,10,3,3,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,26076.0211,27915.503824,2,91,15,15,5,4,4,2,0,0,2,33,1,5,1,5 +69782,7,2,1,52,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,20075.522681,20698.300141,2,98,77,77,NA,1,1,0,0,0,1,52,1,3,5,NA +69783,7,2,2,66,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,13023.675419,13480.293434,2,96,2,2,0.55,1,1,0,0,1,2,66,2,5,5,NA +69784,7,2,1,69,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,10717.375231,11218.730451,2,101,2,2,0.64,1,1,0,0,1,1,69,1,2,5,NA +69785,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,22419.63376,25250.873067,1,94,3,3,1.08,1,1,0,0,1,2,80,1,2,2,NA +69786,7,2,2,1,22,3,3,1,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,46813.021927,47428.160802,1,94,15,15,4.77,4,4,2,0,0,1,48,1,4,1,5 +69787,7,2,2,57,NA,2,2,1,NA,NA,2,NA,2,1,9,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,22969.116046,23893.408664,2,93,9,9,3.97,2,2,0,0,1,2,57,2,3,1,1 +69788,7,2,1,14,NA,2,2,1,14,169,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,30061.88611,30025.949584,1,92,6,6,0.93,5,5,0,2,0,1,47,2,1,1,1 +69789,7,2,2,53,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,16181.169973,15916.061734,2,100,99,99,NA,1,1,0,0,0,2,53,1,2,3,NA +69790,7,2,1,12,NA,5,6,1,12,146,NA,NA,1,1,NA,6,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,6886.223812,7737.788899,2,92,7,7,1.61,4,4,0,2,0,1,51,2,3,1,3 +69791,7,1,2,63,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,30036.804187,0,2,100,4,4,1.29,2,2,0,0,2,1,65,1,3,1,3 +69792,7,2,1,5,NA,4,4,1,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11324.865632,11670.771889,2,96,3,3,0.54,4,4,2,1,0,2,25,1,4,2,NA +69793,7,2,1,35,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,20181.692021,20449.370526,1,92,2,2,0.24,5,5,0,2,0,1,35,2,4,1,3 +69794,7,2,1,9,NA,2,2,1,10,120,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8889.501355,9473.596146,2,93,14,14,3.52,5,5,1,2,0,1,44,1,5,1,5 +69795,7,1,2,80,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,1,2,1,2,2,NA,NA,NA,NA,16321.652472,0,2,101,1,1,0.08,2,2,0,0,2,2,80,1,1,2,NA +69796,7,2,2,3,NA,4,4,1,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11990.08101,12901.460915,1,100,15,15,3.87,6,6,1,3,0,2,39,1,4,1,4 +69797,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,1,1,2,2,1,2,2,1,2,2,1,74517.751389,77393.175383,2,94,14,14,3.36,4,4,2,0,0,1,31,1,3,1,5 +69798,7,2,2,21,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,65591.951555,69705.816211,2,101,2,2,0.46,2,1,0,0,0,2,21,1,4,5,NA +69799,7,2,2,8,NA,4,4,2,8,96,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7814.742747,8587.431439,2,95,7,7,1.55,5,5,0,3,0,1,30,1,4,1,4 +69800,7,2,1,10,NA,5,6,2,10,131,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6262.834446,6755.01452,3,90,77,77,NA,7,7,1,2,0,1,41,2,3,6,NA +69801,7,2,2,3,NA,5,6,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3864.413878,3822.111022,1,99,14,14,2.66,7,7,3,1,0,1,35,1,5,1,5 +69802,7,2,2,17,NA,3,3,1,17,207,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,36586.371708,39087.782259,1,101,4,4,0.58,6,6,0,4,0,2,41,1,3,5,NA +69803,7,2,1,44,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19184.316833,20543.822351,1,97,15,15,5,4,4,1,1,0,1,44,2,5,1,5 +69804,7,2,1,6,NA,3,3,1,6,80,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,47373.769078,49181.31317,1,103,15,15,5,3,3,0,1,0,1,46,1,5,1,5 +69805,7,2,1,1,19,5,6,1,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6389.003009,6767.308805,3,91,14,14,3.06,5,5,3,0,0,1,34,2,5,1,5 +69806,7,2,1,75,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,8763.51401,8935.17143,2,100,6,6,1.62,3,3,0,0,2,1,75,1,5,1,NA +69807,7,2,2,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,10083.559248,10533.779383,1,96,14,14,4.06,3,3,0,0,1,2,61,1,4,5,NA +69808,7,1,1,80,NA,4,4,NA,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,1,2,1,2,2,NA,NA,NA,NA,7041.644998,0,2,100,13,13,NA,2,2,0,0,2,2,77,1,3,1,4 +69809,7,2,1,4,NA,4,4,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8973.990262,9344.796725,2,95,1,1,0.09,5,5,3,1,0,2,31,1,2,1,NA +69810,7,2,2,14,NA,2,2,1,14,178,NA,NA,2,2,2,9,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,15809.066118,16348.490248,2,93,5,5,1.26,3,3,0,1,0,1,55,2,2,1,2 +69811,7,2,1,59,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,124170.603852,124019.195449,2,98,6,6,2.75,1,1,0,0,0,1,59,1,4,4,NA +69812,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,47636.23298,52171.746701,1,91,9,9,4.27,2,2,0,0,2,1,80,1,4,1,3 +69813,7,2,2,30,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,28351.482206,28369.098474,1,96,9,9,3.35,3,3,0,0,0,2,42,1,4,5,NA +69814,7,2,2,15,NA,2,2,2,15,187,NA,NA,1,1,NA,9,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,18368.872199,19023.186366,2,91,10,10,2.95,4,4,0,1,0,2,18,1,3,NA,NA +69815,7,2,2,70,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,15205.189409,15734.277598,2,96,7,7,2.72,2,2,0,0,1,2,70,1,4,2,NA +69816,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,20000.263815,22894.1152,1,102,12,12,NA,7,7,3,2,0,2,52,1,4,5,NA +69817,7,2,1,33,NA,5,7,2,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16045.513116,17473.610556,1,90,6,6,0.92,6,6,2,0,2,2,30,2,5,1,5 +69818,7,2,2,19,NA,5,6,1,20,NA,2,NA,1,1,NA,15,NA,NA,NA,1,2,1,NA,NA,NA,1,2,2,1,9278.834813,9462.75742,1,92,2,2,0.33,5,5,0,1,0,1,51,2,1,4,NA +69819,7,2,2,11,NA,3,3,1,11,140,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71426.628275,70878.719996,2,101,10,10,2.33,6,6,1,3,0,1,39,1,2,1,4 +69820,7,2,2,6,NA,1,1,1,7,84,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15962.145468,16664.698857,2,98,6,6,0.63,7,7,2,2,1,1,60,1,3,1,2 +69821,7,2,2,66,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,7869.59899,8476.202883,2,100,1,1,0.14,1,1,0,0,1,2,66,1,3,2,NA +69822,7,2,1,64,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,1,1,NA,1,2,1,1,2,1,1,2,1,3,8636.698123,9111.200197,2,92,3,3,0.45,4,4,0,0,1,1,64,2,1,1,1 +69823,7,2,2,55,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,18441.731082,18102.807884,2,100,1,1,0,2,2,0,0,0,2,55,1,5,3,NA +69824,7,2,1,64,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,2,1,NA,1,2,1,1,2,1,1,2,1,3,8275.049402,8531.146449,1,99,6,6,1.07,6,6,2,1,2,1,44,2,5,4,NA +69825,7,2,2,43,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,139800.409559,140918.856987,1,100,15,15,5,5,5,0,3,0,1,47,1,5,1,5 +69826,7,2,1,3,NA,3,3,2,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,20717.313101,23374.052614,2,97,5,5,0.8,5,5,1,2,0,1,46,2,4,1,2 +69827,7,2,1,30,NA,2,2,2,NA,NA,2,NA,2,2,4,NA,2,4,NA,2,2,2,2,2,2,1,2,1,2,27605.196104,27396.850962,2,99,12,77,NA,5,1,0,1,0,1,30,2,2,4,NA +69828,7,2,2,18,NA,4,4,2,19,228,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,NA,NA,NA,1,2,2,1,13176.946531,13078.313981,2,99,6,3,0.94,3,2,0,0,0,1,41,1,3,6,NA +69829,7,2,1,18,NA,4,4,1,18,221,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,19844.605924,19989.134915,2,102,14,14,4.05,3,3,0,1,0,1,18,1,2,NA,NA +69830,7,2,2,4,NA,2,2,1,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13301.733639,13724.969642,2,93,3,3,0.48,4,4,1,1,0,1,49,2,3,1,4 +69831,7,2,2,80,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,1,2,NA,1,2,1,1,2,1,1,2,1,NA,13689.379977,14234.742701,2,92,2,2,0.88,1,1,0,0,1,2,80,2,1,2,NA +69832,7,2,1,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,7074.645577,7351.348885,2,95,7,7,3.31,1,1,0,0,1,1,61,1,3,2,NA +69833,7,2,2,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,29534.722322,30544.805853,3,91,77,77,NA,2,2,0,0,2,1,73,1,5,1,5 +69834,7,1,2,37,NA,5,6,NA,NA,NA,2,NA,2,1,5,NA,5,1,3,1,2,2,1,2,2,NA,NA,NA,NA,13379.422066,0,2,103,14,14,3.86,4,4,2,0,0,2,37,2,5,1,NA +69835,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,35334.703093,40990.264786,1,101,2,2,0.63,1,1,0,0,1,2,80,1,1,2,NA +69836,7,2,1,1,21,3,3,1,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,35290.29852,40086.22326,1,102,14,14,2.87,5,5,3,0,0,1,35,1,5,1,5 +69837,7,2,2,9,NA,1,1,1,9,114,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15962.145468,16535.37648,2,98,3,3,0.33,7,7,2,3,0,1,40,2,1,1,1 +69838,7,2,1,31,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,30626.581617,36447.669921,2,90,6,6,1.35,3,3,1,0,0,1,31,1,3,1,4 +69839,7,2,2,8,NA,1,1,1,8,97,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,1,2,2,NA,15841.451259,17367.937444,3,92,4,4,0.55,6,6,0,4,0,1,36,2,1,1,3 +69840,7,2,1,67,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,6910.118936,6964.172059,2,95,6,4,1.5,2,1,0,0,1,1,67,1,4,3,NA +69841,7,2,1,16,NA,1,1,2,16,199,NA,NA,2,2,3,10,NA,NA,NA,2,2,2,2,2,2,2,2,2,2,21633.039913,21721.097793,2,94,7,7,1.04,7,7,0,3,0,1,37,2,1,1,3 +69842,7,2,1,37,NA,1,1,2,NA,NA,2,NA,2,2,6,NA,4,1,NA,2,2,2,1,2,2,2,2,2,2,34887.439952,35849.951626,2,94,6,6,1.34,4,4,0,2,0,1,37,2,4,1,2 +69843,7,2,1,40,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19184.316833,20543.822351,1,97,15,15,5,4,4,2,0,0,1,40,2,5,1,5 +69844,7,2,1,6,NA,1,1,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11770.89642,11957.164501,2,96,5,5,0.68,6,6,0,3,2,1,60,2,1,1,1 +69845,7,2,2,15,NA,3,3,2,15,186,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,29885.567338,31265.78413,1,94,7,7,0.94,7,7,1,4,0,2,46,2,5,1,5 +69846,7,2,1,23,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,100807.076657,102674.431164,1,97,15,15,4.77,4,4,0,0,0,1,56,1,4,1,4 +69847,7,2,1,80,NA,5,6,2,NA,NA,2,NA,2,1,9,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,14603.225127,15344.674112,1,98,15,15,5,2,2,0,0,2,1,80,2,5,1,NA +69848,7,2,1,70,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,15417.485683,15656.172471,1,92,3,3,0.98,2,2,0,0,1,1,70,1,2,1,2 +69849,7,2,1,70,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,9257.537917,9255.717695,1,99,7,6,1.84,3,2,0,0,2,1,70,1,2,1,4 +69850,7,2,2,10,NA,1,1,2,10,123,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,15225.935813,16547.241996,2,94,7,7,1.57,4,4,0,2,0,1,30,2,3,1,4 +69851,7,2,2,31,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,18901.436377,19544.076782,3,91,14,14,5,2,2,0,0,0,1,35,2,5,1,5 +69852,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,35434.580514,39416.921733,1,95,4,4,1.47,1,1,0,0,1,2,80,1,3,1,NA +69853,7,2,1,10,NA,4,4,2,10,128,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8227.856305,9153.104022,3,91,1,1,0.07,6,6,2,3,0,2,30,1,2,3,NA +69854,7,2,1,65,NA,5,7,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,168185.448935,166468.676858,1,95,10,10,4.76,2,2,0,0,2,1,65,1,4,1,4 +69855,7,2,2,41,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,11762.034222,12708.069605,3,90,15,15,5,4,4,0,2,0,2,41,2,5,1,5 +69856,7,2,1,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,78240.016337,83257.203545,1,93,15,15,5,3,3,1,0,0,1,37,1,5,1,5 +69857,7,2,1,16,NA,5,7,2,16,203,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,21852.102821,22875.024578,3,91,5,5,0.65,7,7,0,4,0,2,39,1,3,4,NA +69858,7,2,1,65,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,9655.271429,9541.978936,1,95,9,9,4.21,2,2,0,0,1,1,65,1,5,1,4 +69859,7,2,2,2,NA,4,4,2,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9503.429019,9925.429486,1,91,3,3,0.66,4,4,1,2,0,2,33,1,3,5,NA +69860,7,2,2,18,NA,4,4,1,18,222,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11224.041366,11682.365019,1,96,5,5,0.53,7,7,2,2,0,2,38,1,9,6,NA +69861,7,2,2,73,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,15852.523312,16085.826144,2,97,6,6,2.04,2,2,0,0,2,2,80,1,3,2,NA +69862,7,2,2,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,75772.894935,76005.871473,1,94,15,15,5,2,2,0,0,0,1,29,1,4,1,5 +69863,7,2,1,43,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,28337.446334,29138.677291,2,95,14,14,3.34,4,4,0,0,0,1,43,1,3,1,3 +69864,7,2,1,52,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,25583.266805,26390.249407,1,100,12,99,NA,2,1,0,0,0,2,50,1,3,6,NA +69865,7,2,2,4,NA,3,3,1,5,60,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,28877.220658,31871.522389,1,94,6,6,1.21,4,4,2,0,0,1,27,1,2,1,2 +69866,7,2,1,63,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,1,1,NA,1,2,1,1,2,1,1,2,1,3,12579.986433,13337.268472,1,92,1,1,0.26,2,2,0,0,2,1,63,2,1,1,1 +69867,7,2,2,50,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17521.481386,17363.561951,2,97,15,12,NA,2,1,0,0,0,1,54,1,4,5,NA +69868,7,2,1,40,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,2,6,NA,2,2,2,2,2,2,1,2,2,2,31740.385214,32919.784432,2,96,5,5,0.89,4,4,1,1,0,2,36,2,4,6,NA +69869,7,2,2,36,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,1,6,2,2,2,2,1,2,2,NA,NA,NA,NA,38218.668882,37878.487888,2,102,5,5,0.59,7,7,1,3,0,1,37,2,1,6,NA +69870,7,2,1,68,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,131818.641085,133509.443669,1,102,15,15,5,2,2,0,0,2,2,68,1,4,1,5 +69871,7,2,2,1,22,1,1,2,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12602.647442,12772.138116,1,91,99,99,NA,4,4,2,1,0,2,36,2,2,4,NA +69872,7,2,1,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,105412.227726,110446.221907,2,101,10,10,2.33,6,6,1,3,0,1,39,1,2,1,4 +69873,7,2,1,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,20135.920214,20700.031916,2,97,2,2,0.27,3,3,1,0,0,2,21,1,3,6,NA +69874,7,2,2,3,NA,3,3,2,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,78448.332626,86582.701959,2,91,15,15,5,4,4,2,0,0,2,33,1,5,1,5 +69875,7,2,2,45,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,2,1,NA,1,2,1,1,2,1,1,2,1,3,11762.034222,11824.231183,3,90,15,15,3.23,6,6,0,2,0,1,50,2,2,1,2 +69876,7,2,2,36,NA,1,1,2,NA,NA,2,NA,2,1,3,NA,3,1,2,2,2,2,2,2,2,2,2,2,2,35353.005268,34399.106917,2,94,7,7,1.04,7,7,0,3,0,1,37,2,1,1,3 +69877,7,2,2,38,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,2,3,2,1,2,2,1,2,2,NA,NA,NA,NA,19257.047323,21540.196698,1,102,5,5,1.27,3,3,0,2,0,2,38,1,2,3,NA +69878,7,2,2,15,NA,5,7,1,15,189,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15844.005605,16354.571478,1,100,7,7,1.83,3,3,0,1,0,2,40,1,4,6,NA +69879,7,2,1,22,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,104488.914565,106745.836574,1,98,4,2,0.56,4,1,0,0,0,1,22,1,5,5,NA +69880,7,2,2,34,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,NA,NA,NA,NA,102720.446375,103036.277703,2,102,14,14,3.44,5,5,1,2,0,2,34,1,4,6,NA +69881,7,2,1,12,NA,2,2,1,12,155,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18242.832494,18053.229058,1,102,77,77,NA,6,6,0,2,1,2,37,1,4,1,4 +69882,7,2,2,14,NA,4,4,1,14,179,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11791.755593,11850.248564,2,96,7,7,1.04,7,7,0,4,0,2,37,1,3,3,NA +69883,7,2,2,14,NA,1,1,2,14,172,NA,NA,1,1,NA,8,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,23979.296993,29080.570096,1,101,5,5,0.51,7,7,0,3,2,1,75,2,1,1,1 +69884,7,2,1,56,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18916.732604,18903.754992,2,91,8,8,2.34,4,4,0,2,0,1,56,2,5,1,5 +69885,7,2,2,42,NA,4,4,2,NA,NA,2,NA,2,2,3,NA,5,3,2,1,2,2,1,2,2,1,2,2,1,19075.861607,19022.257361,1,96,9,9,2.78,4,4,0,2,0,1,54,2,5,4,NA +69886,7,2,2,5,NA,2,2,1,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,15979.952759,16987.381382,2,102,4,4,0.57,6,6,2,3,0,2,26,2,3,1,NA +69887,7,2,2,23,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,17260.508485,16474.419641,2,97,3,3,0.33,6,6,2,0,0,2,32,1,2,1,3 +69888,7,2,1,36,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,19384.896286,19940.089683,1,94,3,3,0.8,2,2,0,0,1,2,66,1,4,3,NA +69889,7,2,1,13,NA,4,4,2,13,163,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13975.179508,14076.961252,1,96,15,15,5,4,4,0,2,0,1,46,1,5,1,5 +69890,7,2,1,5,NA,3,3,1,5,71,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,76114.759421,89186.300704,1,98,15,15,4.34,4,4,1,1,0,1,41,1,5,1,5 +69891,7,2,1,79,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,8992.410435,11769.853451,2,90,10,10,4.76,2,2,0,0,1,1,79,1,2,2,NA +69892,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,8521.670488,9178.537312,2,95,4,4,1.34,1,1,0,0,1,2,63,1,3,2,NA +69893,7,2,1,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,32866.0119,33575.905633,1,98,12,4,1.52,3,1,0,0,0,2,22,NA,NA,5,NA +69894,7,2,2,3,NA,3,3,2,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27257.164734,28966.726071,1,95,4,4,0.65,6,6,2,2,0,2,36,1,4,6,NA +69895,7,1,1,17,NA,3,3,NA,NA,NA,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,68701.580401,0,2,94,14,14,2.87,5,5,2,1,0,1,37,1,3,1,4 +69896,7,2,1,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,146181.198007,148606.927767,2,91,15,6,2.86,7,1,0,0,1,1,49,NA,NA,5,NA +69897,7,2,2,1,16,1,1,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14326.094268,15816.39252,3,92,7,7,2.1,3,3,1,1,0,2,25,1,4,5,NA +69898,7,2,1,71,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,19473.412374,20212.213332,1,95,6,6,2.04,2,2,0,0,2,1,71,1,3,1,4 +69899,7,2,2,33,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,6,2,1,2,2,1,2,2,1,2,2,1,30664.033268,30444.919975,2,102,5,5,0.67,6,6,0,4,0,2,33,1,2,6,NA +69900,7,2,2,40,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,2,1,2,1,2,2,1,2,2,1,2,2,NA,31334.47528,33406.965231,2,96,7,7,1.79,4,4,0,2,0,1,43,2,3,1,2 +69901,7,2,1,17,NA,1,1,1,17,214,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,20638.769105,20614.097145,2,92,15,15,3.37,7,7,0,4,0,1,42,2,3,1,1 +69902,7,2,2,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,19130.246369,18964.171442,2,95,8,8,1.61,6,6,1,3,0,2,48,1,3,5,NA +69903,7,2,1,3,NA,1,1,1,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,1,1,2,2,NA,NA,NA,NA,17865.135763,18076.339981,3,92,7,7,1.41,5,5,1,2,0,1,20,2,1,1,1 +69904,7,2,2,16,NA,2,2,2,16,198,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14437.97544,15197.369043,2,90,7,7,1.66,4,4,0,3,0,2,34,1,5,3,NA +69905,7,2,1,65,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,1,1,2,1,1,2,1,3,9596.331248,10123.555774,2,92,8,8,2.43,3,3,0,1,1,2,58,NA,5,1,5 +69906,7,2,1,42,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,41410.39303,40802.759347,1,101,12,3,1.07,3,1,0,0,0,1,41,2,1,4,NA +69907,7,2,1,8,NA,1,1,2,8,100,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,11417.89405,11689.28796,2,97,4,4,0.6,6,6,2,2,0,1,35,2,2,6,NA +69908,7,1,2,0,9,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4090.43871,0,2,99,6,6,1.73,3,3,1,1,1,2,60,1,4,3,NA +69909,7,2,2,15,NA,5,6,1,15,185,NA,NA,2,1,2,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11626.310625,12128.832209,1,92,7,7,1.56,4,4,0,2,0,2,38,2,4,6,NA +69910,7,2,1,20,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,102928.893739,104528.537732,1,101,6,6,0.97,7,7,2,1,0,1,43,1,2,1,NA +69911,7,2,2,80,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,3,2,NA,1,1,2,2,2,2,NA,NA,NA,NA,16490.79781,19016.592593,3,90,8,8,3.21,2,2,0,0,2,2,80,2,3,2,NA +69912,7,2,2,8,NA,4,4,2,8,103,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6388.247052,6617.403229,1,93,6,6,0.83,6,6,3,1,0,1,37,NA,NA,1,3 +69913,7,2,2,64,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,1,1,NA,1,2,1,1,2,1,1,2,1,NA,14102.354333,16246.228018,3,91,6,6,1.12,4,4,0,0,2,1,69,2,3,1,1 +69914,7,2,1,11,NA,3,3,2,11,138,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,46081.129115,51016.6207,2,95,9,9,2.6,4,4,0,2,0,1,42,1,4,1,4 +69915,7,2,1,6,NA,4,4,2,7,84,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,7970.311962,8205.827749,1,90,14,14,2.96,5,5,1,2,0,1,31,1,5,1,4 +69916,7,2,2,22,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,2,2,2,1,2,2,1,35710.33222,35554.798024,1,90,10,10,2.44,5,5,1,0,0,2,56,2,1,1,1 +69917,7,2,2,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,72686.111056,73189.163544,1,98,15,15,3.7,5,5,2,1,0,1,34,1,5,1,5 +69918,7,2,2,11,NA,4,4,2,11,140,NA,NA,2,2,3,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7659.302568,7901.193624,1,96,3,3,0.43,4,4,1,1,0,2,39,2,4,1,3 +69919,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,83819.702285,99717.125794,1,99,12,12,NA,1,1,0,0,0,1,31,1,4,5,NA +69920,7,2,2,9,NA,5,7,1,9,113,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22308.590534,22137.463018,1,101,5,5,1.15,3,3,0,1,0,1,49,1,3,1,4 +69921,7,2,1,75,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,10980.245706,10978.086763,1,91,12,6,2.39,2,1,0,0,2,2,73,NA,NA,77,NA +69922,7,2,1,42,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,31640.296506,33247.715312,3,92,6,6,1,6,6,1,1,0,1,42,2,1,1,4 +69923,7,2,1,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,169546.363168,172535.263747,2,91,15,15,5,2,2,0,0,1,2,60,1,5,1,5 +69924,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,27426.222967,30692.896319,2,95,3,3,0.95,2,2,0,0,2,2,80,1,1,1,2 +69925,7,2,2,80,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,1,2,2,1,2,2,NA,17563.671235,18809.520935,1,99,3,3,1.3,1,1,0,0,1,2,80,1,1,5,NA +69926,7,2,2,11,NA,2,2,1,11,132,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16699.231378,17036.994683,1,98,3,3,0.4,7,7,2,3,0,2,31,2,5,1,2 +69927,7,2,2,2,NA,2,2,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,8645.395449,9311.129497,2,93,6,6,0.64,7,7,2,1,3,2,60,2,3,2,NA +69928,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,1,8308.628726,9015.10987,2,95,1,1,0.36,1,1,0,0,1,2,63,1,1,2,NA +69929,7,2,1,6,NA,5,6,2,6,83,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10810.913614,11461.625841,1,97,15,15,2.33,7,7,2,4,0,2,40,2,5,1,4 +69930,7,2,2,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,112992.533921,115858.21849,1,94,9,9,3.14,3,3,1,0,0,1,28,1,5,1,5 +69931,7,2,2,28,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,1,1,2,2,1,2,2,1,2,2,1,129336.409693,136474.939567,1,95,15,15,5,3,3,1,0,0,1,28,1,5,1,5 +69932,7,2,2,21,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,38177.662675,41078.639454,1,103,3,3,0.37,5,5,1,2,0,2,30,1,4,5,NA +69933,7,2,2,6,NA,3,3,1,6,79,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20531.504977,20629.491172,1,98,7,7,1.03,7,7,0,4,0,2,20,1,3,5,NA +69934,7,2,2,56,NA,3,3,1,NA,NA,2,NA,2,2,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,153680.330235,152391.721814,1,103,15,15,5,2,2,0,0,1,1,61,1,4,1,5 +69935,7,2,1,80,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,8543.37447,9341.78969,3,90,9,9,3.65,2,2,0,0,2,1,80,1,2,1,3 +69936,7,2,2,54,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15072.065618,15224.718219,1,99,15,15,5,2,2,0,0,1,2,54,2,5,1,5 +69937,7,2,1,19,NA,3,3,1,19,233,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,23403.89977,23518.866268,1,98,4,4,0.75,4,4,0,1,0,2,48,1,2,1,3 +69938,7,2,2,16,NA,4,4,1,16,203,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11791.755593,11850.248564,2,96,7,7,1.04,7,7,0,4,0,2,37,1,3,3,NA +69939,7,2,2,0,5,2,2,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7192.863376,7041.533956,1,90,3,3,0.52,3,3,2,0,0,2,20,1,4,5,NA +69940,7,2,1,61,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,3,1,NA,1,2,2,1,2,2,1,2,2,2,6638.80908,6954.569749,2,93,15,15,5,2,2,0,0,2,1,61,2,3,1,3 +69941,7,2,2,52,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,2,1,NA,1,2,2,NA,NA,NA,NA,NA,NA,NA,12098.219371,13033.848361,2,100,14,14,3.06,5,5,1,0,1,2,31,2,5,1,5 +69942,7,1,1,61,NA,2,2,NA,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,9691.985299,0,1,93,6,6,1.55,3,3,0,0,3,1,61,2,4,1,1 +69943,7,2,1,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105141.812429,109679.354704,1,98,15,15,5,4,4,2,0,0,1,40,1,5,1,NA +69944,7,2,2,18,NA,4,4,2,18,217,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,NA,11072.776368,11285.58755,3,90,5,5,0.87,4,4,0,0,0,2,43,2,3,5,NA +69945,7,2,1,2,NA,5,7,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8100.706553,8340.3972,1,98,15,15,5,4,4,2,0,0,2,35,1,5,1,4 +69946,7,2,1,6,NA,3,3,2,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,42986.51011,44926.997612,2,94,14,14,2.83,6,6,0,4,0,2,38,1,2,1,2 +69947,7,2,1,7,NA,2,2,2,7,88,NA,NA,2,2,1,0,NA,NA,NA,2,1,2,NA,NA,NA,NA,NA,NA,NA,8966.477743,10563.593323,2,99,12,3,0.52,5,3,0,1,0,1,30,2,2,4,NA +69948,7,2,1,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,27739.528889,28855.317795,1,90,4,3,1.07,2,1,0,0,0,2,25,1,5,6,NA +69949,7,2,2,12,NA,3,3,2,12,148,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,38400.791741,41869.717003,1,95,6,6,1.09,5,5,0,3,0,1,31,1,4,1,4 +69950,7,2,2,10,NA,4,4,2,10,124,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8115.309776,8285.322178,2,95,7,7,2.78,2,2,0,1,0,2,32,1,4,5,NA +69951,7,2,1,4,NA,5,6,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6185.185728,6767.562311,1,97,15,15,2.33,7,7,2,4,0,2,40,2,5,1,4 +69952,7,2,1,4,NA,3,3,2,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,77702.196479,91046.355703,1,95,15,15,5,3,3,1,0,0,1,28,1,5,1,5 +69953,7,2,1,8,NA,2,2,2,8,106,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,13804.767816,14116.094161,2,91,9,9,2.6,4,4,1,1,0,2,31,2,4,1,5 +69954,7,2,2,43,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,1,1,2,2,1,2,2,NA,NA,NA,NA,15309.176013,16540.512525,1,90,14,14,3.33,5,5,1,2,0,1,41,1,5,1,5 +69955,7,2,1,10,NA,5,7,2,10,121,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7482.593572,7851.213843,1,101,4,4,0.78,4,4,1,2,0,2,31,1,4,3,NA +69956,7,2,1,3,NA,3,3,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,33334.752566,38103.457131,3,92,7,7,0.81,7,7,2,4,0,1,40,NA,NA,1,4 +69957,7,2,1,32,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,37658.482129,38663.56389,1,100,10,10,2.91,4,4,1,1,0,1,32,1,5,1,5 +69958,7,2,1,12,NA,3,3,2,13,157,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71231.747774,75661.062324,2,95,9,9,2.6,4,4,0,2,0,1,42,1,4,1,4 +69959,7,2,2,51,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,2,4,NA,2,2,2,2,2,2,2,2,2,2,29695.385784,34065.080879,2,91,1,1,0,1,1,0,0,0,2,51,2,2,4,NA +69960,7,1,2,14,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,116754.883807,0,1,94,5,5,1.04,4,4,1,1,0,1,18,1,2,NA,NA +69961,7,2,2,56,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,204553.029374,204712.584881,1,100,15,15,5,2,2,0,0,0,2,56,1,5,1,NA +69962,7,2,1,69,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11992.012141,12562.386395,2,102,4,4,1.09,2,2,0,0,2,2,68,1,3,1,3 +69963,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,30275.274308,29672.504425,2,101,12,4,1.79,4,1,0,0,0,2,20,1,4,5,NA +69964,7,2,2,75,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,70652.532816,72526.815349,1,98,6,6,1.65,2,2,0,0,2,1,80,1,3,1,3 +69965,7,2,2,27,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,16181.486766,17520.496205,1,90,4,4,0.78,4,4,0,0,1,1,69,2,4,1,3 +69966,7,2,1,3,NA,4,4,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13345.162299,13752.776418,2,102,4,4,0.81,3,3,2,0,0,2,23,1,4,5,NA +69967,7,2,1,8,NA,3,3,1,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,28126.350642,29877.890829,2,91,5,5,1.2,3,3,0,1,0,2,40,1,5,1,5 +69968,7,2,1,16,NA,3,3,1,16,193,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,113571.164423,112715.649169,2,101,10,10,2.33,6,6,1,3,0,1,39,1,2,1,4 +69969,7,2,1,11,NA,1,1,1,11,138,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,14007.413517,2,98,1,1,0.18,4,4,0,2,0,1,29,1,4,6,NA +69970,7,2,2,13,NA,5,6,2,13,162,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9029.632215,9422.555256,1,90,77,77,NA,4,4,0,2,0,2,51,1,5,1,5 +69971,7,2,2,2,NA,3,3,2,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22159.470641,22450.653549,1,95,3,3,0.7,3,3,1,0,0,1,25,1,4,1,4 +69972,7,2,1,79,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,53149.251154,56449.488341,1,100,15,15,5,2,2,0,0,2,1,79,1,5,1,4 +69973,7,2,2,55,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,26637.81974,28307.564263,1,98,7,7,1.03,7,7,0,4,0,2,20,1,3,5,NA +69974,7,2,1,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,15408.94893,15514.368855,1,99,13,13,NA,3,3,0,0,2,1,67,1,2,1,2 +69975,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,60071.993203,65791.533402,1,95,14,14,5,1,1,0,0,1,2,80,1,4,1,NA +69976,7,2,1,61,NA,2,2,2,NA,NA,2,NA,2,1,99,NA,4,1,NA,2,2,2,2,2,2,2,2,2,2,10585.549682,10952.862023,1,90,12,12,NA,3,3,0,0,1,1,35,2,4,5,NA +69977,7,2,2,12,NA,2,2,2,12,154,NA,NA,1,1,NA,7,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,13824.001771,15649.805677,2,90,3,3,0.54,4,4,1,2,0,2,33,2,1,4,NA +69978,7,2,2,13,NA,4,4,1,13,157,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18698.220599,19300.762251,2,101,6,6,1.54,3,3,0,1,0,2,34,1,4,1,3 +69979,7,2,2,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,3,1,2,2,1,2,2,NA,NA,NA,NA,26465.930618,28724.649216,2,100,5,5,1.07,4,4,0,1,0,2,36,1,3,5,NA +69980,7,2,2,0,2,2,2,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,4481.392842,4662.235074,2,90,6,6,0.66,7,7,2,2,0,2,24,2,4,6,NA +69981,7,2,1,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,15991.70237,16076.675652,1,99,3,3,1.29,1,1,0,0,1,1,62,1,3,5,NA +69982,7,2,2,8,NA,3,3,2,8,98,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,41790.228676,44382.642913,2,94,7,7,1.17,6,6,0,3,0,1,40,1,3,1,5 +69983,7,2,1,38,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,19384.896286,19940.089683,1,94,3,3,0.95,2,2,0,0,1,2,67,1,2,2,NA +69984,7,2,2,16,NA,1,1,1,16,200,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,27070.679378,29958.836982,1,92,4,4,0.74,4,4,1,1,0,1,51,2,1,1,1 +69985,7,2,2,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,19901.857177,20582.498016,3,92,3,3,1.29,1,1,0,0,1,2,73,1,4,2,NA +69986,7,2,1,14,NA,1,1,1,14,170,NA,NA,2,2,4,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,25525.43565,25915.600865,1,94,5,5,0.87,4,4,1,1,0,1,35,2,1,1,1 +69987,7,2,2,3,NA,1,1,1,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15457.736897,17065.756351,3,92,3,3,0.54,4,4,3,0,0,2,22,1,3,5,NA +69988,7,2,1,14,NA,1,1,1,14,175,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18242.832494,19431.180704,1,102,7,7,1.41,5,5,0,2,2,1,72,1,4,1,3 +69989,7,2,2,75,NA,2,2,2,NA,NA,2,NA,2,1,99,NA,1,6,NA,2,2,2,2,2,2,1,2,2,NA,17318.187297,19970.708273,2,90,4,4,1.02,2,2,0,0,2,2,75,2,1,6,NA +69990,7,2,2,23,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,18412.14266,22203.276692,2,90,2,2,0.67,2,2,0,0,1,2,64,1,5,5,NA +69991,7,2,2,56,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,27407.957254,27308.998449,2,90,3,3,1.29,1,1,0,0,0,2,56,1,3,3,NA +69992,7,2,2,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,150082.940829,151424.784638,1,98,8,8,2.97,2,2,0,0,0,1,23,1,3,1,5 +69993,7,2,2,40,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,29650.79971,37801.89955,2,90,4,4,1.29,2,2,0,0,0,2,40,1,2,5,NA +69994,7,2,2,21,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,71351.478679,74215.528244,2,100,10,10,3.13,4,4,0,0,1,2,53,1,2,1,2 +69995,7,2,1,18,NA,1,1,2,18,226,2,NA,1,1,NA,11,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,18120.499457,19917.650851,1,90,1,1,0.02,5,5,0,1,0,2,39,2,1,1,2 +69996,7,2,2,40,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,18490.479848,21651.629766,2,100,6,6,0.99,5,5,0,3,0,2,40,1,3,1,3 +69997,7,2,1,46,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,25123.480232,25043.348036,1,92,15,15,4.44,5,5,0,3,0,2,43,1,5,6,NA +69998,7,1,1,17,NA,4,4,NA,NA,NA,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,NA,NA,NA,NA,NA,NA,NA,11125.932433,0,1,96,NA,NA,NA,4,4,1,1,0,2,37,NA,NA,1,4 +69999,7,2,2,29,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,20247.768461,20264.515513,2,93,5,5,1.05,3,3,1,0,0,2,29,1,3,5,NA +70000,7,2,2,59,NA,4,4,2,NA,NA,2,NA,2,2,6,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,19100.364335,18787.428747,1,96,4,4,1.34,1,1,0,0,0,2,59,2,3,3,NA +70001,7,1,2,61,NA,5,6,NA,NA,NA,2,NA,2,1,4,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,17243.546687,0,1,95,3,3,0.43,4,4,0,1,2,1,65,2,5,1,3 +70002,7,2,2,9,NA,2,2,1,9,116,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,19084.197249,19808.05931,2,91,15,15,5,4,4,0,2,0,1,45,2,5,1,4 +70003,7,2,1,3,NA,5,6,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5953.107662,6677.061573,3,90,14,14,3.47,4,4,1,1,0,2,38,2,5,1,5 +70004,7,2,1,12,NA,3,3,2,12,151,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,99249.131685,98501.502244,1,101,15,15,5,4,4,0,2,0,1,43,1,4,1,5 +70005,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,1,2,1,2,2,NA,NA,NA,NA,33146.291352,36370.393263,1,90,5,5,1.05,3,3,0,0,3,2,60,1,5,77,NA +70006,7,2,1,29,NA,2,2,1,NA,NA,2,NA,2,1,4,NA,5,5,NA,1,2,2,NA,NA,NA,1,2,2,1,38474.772527,40200.135096,2,93,NA,NA,NA,4,2,0,0,0,1,28,NA,NA,4,NA +70007,7,2,2,4,NA,4,4,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10389.292229,10850.629517,2,95,1,1,0.25,3,3,1,1,0,2,26,1,2,5,NA +70008,7,2,2,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,160743.928829,165029.101567,1,95,8,6,2.04,4,2,0,1,0,2,57,1,5,5,NA +70009,7,2,1,26,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,74929.366953,78437.058525,1,93,15,9,5,3,1,0,0,0,1,26,1,5,5,NA +70010,7,2,2,58,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,12441.719186,13672.655562,1,96,7,7,1.83,3,3,0,0,1,1,66,2,5,1,3 +70011,7,2,1,14,NA,5,6,2,14,170,NA,NA,2,2,3,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8168.705487,9098.657657,1,93,7,7,1.64,5,5,0,2,0,1,47,2,5,1,1 +70012,7,2,1,0,5,4,4,2,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5719.440362,6091.410026,1,96,15,15,5,6,6,1,1,1,2,44,1,3,1,3 +70013,7,2,1,70,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,7526.944058,7674.379869,1,96,15,15,5,2,2,0,0,2,1,70,1,5,1,4 +70014,7,2,1,32,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,85120.619542,87558.517853,1,93,15,15,5,1,1,0,0,0,1,32,1,4,5,NA +70015,7,2,1,57,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,16851.334496,19427.051933,2,95,4,4,1.66,1,1,0,0,0,1,57,1,4,3,NA +70016,7,2,2,72,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,62212.598767,64340.261278,1,94,15,15,5,2,2,0,0,2,1,73,NA,NA,1,5 +70017,7,2,2,14,NA,4,4,2,14,169,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12209.74498,13384.042162,2,90,4,4,0.57,5,5,1,2,0,2,33,2,2,77,NA +70018,7,2,1,28,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,27911.790319,31609.08818,3,91,7,7,1.57,4,4,2,0,0,2,29,2,3,1,3 +70019,7,2,1,38,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,18544.003944,18279.674065,2,100,5,5,1.08,3,3,0,0,0,1,38,1,2,5,NA +70020,7,2,2,21,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,4,6,3,1,2,2,1,2,2,1,2,2,1,16929.836231,18556.498474,2,101,8,5,2.2,2,1,0,0,0,1,24,2,4,6,NA +70021,7,2,2,10,NA,3,3,1,10,128,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,45129.675368,44994.093254,1,98,10,10,2.2,6,6,1,3,0,2,31,1,4,6,NA +70022,7,2,2,2,NA,5,6,2,3,36,NA,NA,2,2,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8173.816615,8313.181418,1,97,15,15,4.34,4,4,2,0,0,1,35,2,5,1,5 +70023,7,2,2,9,NA,4,4,2,10,120,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7659.302568,7901.193624,1,96,77,77,NA,7,7,0,3,1,2,43,77,5,5,NA +70024,7,2,2,9,NA,4,4,2,9,109,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7268.721126,7826.51541,2,99,4,4,0.41,7,7,0,2,0,2,36,1,3,5,NA +70025,7,2,2,16,NA,2,2,2,16,196,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14437.97544,15197.369043,2,90,7,7,1.66,4,4,0,3,0,2,34,1,5,3,NA +70026,7,2,1,31,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17554.053699,17463.765652,2,100,7,7,2.72,2,2,0,0,0,2,59,1,4,3,NA +70027,7,2,1,7,NA,1,1,1,7,95,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10658.399025,10722.994279,1,102,8,8,1.33,7,7,1,4,0,2,32,1,3,1,2 +70028,7,2,2,2,NA,3,3,2,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22159.470641,22857.583467,1,95,2,2,0.22,4,4,2,1,0,2,22,1,2,5,NA +70029,7,2,1,16,NA,4,4,1,16,198,NA,NA,1,1,NA,8,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,18413.211403,2,101,99,99,NA,3,3,0,1,1,2,78,1,1,2,NA +70030,7,1,2,61,NA,5,7,NA,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,10346.035773,0,1,99,9,1,0,2,1,0,0,2,2,61,1,4,6,NA +70031,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,10192.188896,10440.656902,1,99,7,7,3.31,1,1,0,0,1,2,63,1,4,3,NA +70032,7,2,1,80,NA,5,6,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,11550.158096,12419.035396,2,92,77,77,NA,2,2,0,0,2,2,80,1,3,1,4 +70033,7,2,2,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,160743.928829,166234.629208,1,95,12,12,NA,2,2,0,0,0,2,53,1,4,1,NA +70034,7,2,2,4,NA,4,4,2,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9689.370244,10119.627445,1,99,2,2,0.43,3,3,2,0,0,2,26,1,4,5,NA +70035,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,91836.529686,93336.987794,1,93,10,10,5,1,1,0,0,1,2,65,1,5,3,NA +70036,7,2,2,7,NA,2,2,1,7,86,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16765.041162,17104.135554,1,98,4,4,0.94,3,3,0,1,0,2,35,2,5,1,5 +70037,7,2,2,1,23,2,2,2,NA,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8563.768225,8593.010585,1,93,6,6,0.74,7,7,1,2,0,1,53,2,2,1,2 +70038,7,1,1,20,NA,2,2,NA,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,44074.735764,0,2,91,4,4,0.69,4,4,2,0,0,2,21,1,3,6,NA +70039,7,2,2,9,NA,3,3,2,9,109,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,NA,NA,NA,1,2,2,1,22933.149195,26924.921202,1,101,1,1,0.08,6,6,0,1,0,1,51,1,2,5,NA +70040,7,2,1,3,NA,5,7,2,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12996.965152,13393.943951,1,101,5,5,1.23,3,3,2,0,0,2,24,1,2,5,NA +70041,7,1,2,80,NA,5,6,NA,NA,NA,2,NA,2,1,7,NA,1,2,NA,1,2,1,1,2,1,NA,NA,NA,NA,13689.379977,0,2,92,2,2,0.64,1,1,0,0,1,2,80,2,1,2,NA +70042,7,2,2,2,NA,3,3,2,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16635.553691,18360.507343,1,91,4,4,0.81,4,4,1,1,0,1,32,1,4,6,NA +70043,7,2,2,12,NA,3,3,2,12,150,NA,NA,2,1,4,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,74165.041171,75130.242541,2,91,15,15,5,4,4,0,2,0,2,48,1,5,1,5 +70044,7,2,1,9,NA,4,4,2,9,117,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9023.469661,9333.449986,2,90,8,6,1.46,4,3,1,1,0,2,21,1,5,6,NA +70045,7,2,2,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,70772.230318,73438.772102,1,93,15,15,5,3,3,1,0,0,1,37,1,5,1,5 +70046,7,2,1,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,19541.667675,19517.839388,2,95,3,3,1.16,1,1,0,0,0,1,55,1,4,4,NA +70047,7,2,2,19,NA,4,4,1,19,233,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11838.873374,11750.256617,2,100,14,14,3.06,5,5,1,0,0,1,50,1,5,1,5 +70048,7,2,2,11,NA,4,4,2,11,142,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7891.243393,8330.261165,2,100,4,4,0.69,5,5,0,3,0,1,38,1,3,6,NA +70049,7,2,1,41,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,2,1,NA,1,2,1,1,2,2,1,2,2,NA,20696.713928,21871.839567,2,102,9,9,2.68,4,4,1,1,0,2,38,2,5,1,2 +70050,7,2,2,49,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,29010.447112,29720.490657,1,100,9,9,3.64,2,2,0,0,0,2,49,1,4,5,NA +70051,7,2,1,10,NA,4,4,1,10,124,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11185.87189,11566.898318,1,92,7,7,2.25,3,3,0,2,0,2,35,1,4,77,NA +70052,7,2,2,28,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,11696.173591,12195.620792,1,93,15,5,1.84,6,1,0,0,0,1,34,2,5,5,NA +70053,7,2,1,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,32866.0119,33575.905633,1,98,99,99,NA,3,2,0,0,0,2,22,1,4,5,NA +70054,7,2,1,38,NA,1,1,1,NA,NA,2,NA,2,7,77,NA,1,6,NA,2,2,2,1,2,2,NA,NA,NA,NA,32856.012738,34774.888791,2,103,77,77,NA,7,7,0,4,0,1,38,2,1,6,NA +70055,7,2,2,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,115926.402585,120043.284447,1,98,10,10,3.04,4,4,0,2,0,2,47,1,4,1,3 +70056,7,2,1,3,NA,5,7,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11821.601823,13035.249088,1,95,4,4,0.97,3,3,2,0,0,2,22,1,4,5,NA +70057,7,2,2,7,NA,1,1,1,7,91,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,15352.601806,16028.326912,2,102,4,4,0.61,5,5,0,3,0,1,34,2,3,1,3 +70058,7,2,1,38,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,20071.705576,20986.552878,3,91,7,7,2.45,2,2,0,0,0,2,29,2,5,1,5 +70059,7,2,2,74,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,25812.913537,27633.237354,2,95,3,3,1.27,1,1,0,0,1,2,74,1,2,2,NA +70060,7,2,1,2,NA,4,4,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5447.377416,5513.820992,2,90,4,4,0.97,3,3,1,0,0,2,23,2,3,5,NA +70061,7,2,1,65,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,7101.739553,7433.956549,2,100,2,2,0.83,1,1,0,0,1,1,65,1,2,5,NA +70062,7,2,2,75,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,69726.261922,71865.394906,1,93,15,15,5,2,2,0,0,2,2,75,1,5,1,5 +70063,7,2,2,43,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,1,2,1,2,2,1,2,2,1,2,2,1,27585.470618,28524.896774,2,102,15,15,5,4,4,0,2,0,1,44,1,3,1,1 +70064,7,2,2,38,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,2,1,2,2,1,2,2,1,2,2,1,23725.035562,24103.855713,1,91,7,7,2.2,3,3,0,0,1,2,60,1,2,2,NA +70065,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,71034.153987,74518.988514,1,98,14,14,3.9,4,4,0,3,0,2,31,1,4,1,NA +70066,7,2,1,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,19260.892847,19199.459573,2,97,8,5,2.02,2,1,0,0,0,1,47,1,4,3,NA +70067,7,2,1,5,NA,3,3,1,5,64,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21357.821814,24096.698658,2,96,3,3,0.53,5,5,3,0,0,2,26,1,4,1,4 +70068,7,2,2,8,NA,4,4,2,8,103,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8147.287486,8590.325322,2,90,2,2,0.38,4,4,1,2,0,2,32,1,4,5,NA +70069,7,2,2,50,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,4,6,NA,2,2,2,1,2,2,2,2,2,2,24352.519425,24479.515743,1,97,3,3,0.5,5,5,0,2,0,1,56,2,2,6,NA +70070,7,2,2,2,NA,4,4,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9273.23044,9512.588252,2,96,NA,3,0.65,4,3,1,1,0,1,21,1,2,6,NA +70071,7,2,1,0,0,1,1,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,7757.493251,7980.837709,3,92,4,4,0.59,5,5,2,1,0,1,20,2,1,1,3 +70072,7,2,1,53,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,26135.885159,26659.265986,2,101,7,7,2.16,3,3,0,1,0,2,44,1,4,6,NA +70073,7,2,1,19,NA,3,3,2,19,239,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,30943.024697,36334.060093,1,101,1,1,0.08,6,6,0,1,0,1,51,1,2,5,NA +70074,7,2,1,16,NA,1,1,1,16,195,NA,NA,1,1,NA,9,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,19996.544021,19901.195571,1,100,7,7,1.3,5,5,0,3,0,1,43,2,2,1,4 +70075,7,2,2,31,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,3,1,2,1,2,1,1,2,1,NA,NA,NA,NA,11608.998717,13948.968232,3,90,5,5,0.93,4,4,1,0,0,1,48,2,4,1,NA +70076,7,2,1,14,NA,5,6,2,14,172,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6232.587755,6661.015771,1,91,15,15,3.25,7,7,1,2,0,2,31,1,5,1,5 +70077,7,2,2,58,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,16119.136275,16768.999275,1,101,14,14,5,2,2,0,0,0,1,58,2,3,1,1 +70078,7,2,2,33,NA,1,1,2,NA,NA,2,NA,2,2,2,NA,2,1,2,2,2,2,2,2,2,2,2,2,2,32982.479382,35082.144563,2,99,99,3,0.66,4,2,0,0,0,1,35,2,4,1,2 +70079,7,2,2,1,21,2,2,1,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9267.834226,9392.475614,2,93,15,15,4.51,4,4,1,1,0,1,40,1,4,1,5 +70080,7,2,2,32,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,16614.865368,17238.21833,3,91,15,15,4.47,4,4,2,0,0,1,33,1,5,1,5 +70081,7,2,2,19,NA,4,4,2,19,236,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,NA,NA,NA,1,2,2,1,10154.02528,10568.655787,2,99,2,2,0.19,7,7,3,1,0,2,43,1,2,4,NA +70082,7,2,2,29,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,21398.47235,20994.797177,1,103,14,8,4.39,2,1,0,0,0,2,29,1,5,5,NA +70083,7,1,2,24,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,3,3,1,2,2,1,2,2,NA,NA,NA,NA,129336.409693,0,1,92,4,4,1.29,2,2,1,0,0,2,24,1,4,3,NA +70084,7,2,1,66,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,7117.971973,7450.948316,2,100,3,3,0.68,2,2,0,0,2,1,66,1,2,1,2 +70085,7,2,2,6,NA,3,3,2,7,84,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18732.936406,19089.39853,1,94,7,7,0.94,7,7,1,4,0,2,46,2,5,1,5 +70086,7,2,1,18,NA,1,1,1,18,223,2,NA,2,2,4,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,23389.620035,23484.828122,3,91,3,3,0.39,6,6,1,1,0,1,39,2,1,6,NA +70087,7,2,2,7,NA,4,4,1,7,95,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7899.813226,8111.155436,2,100,15,15,4.47,4,4,0,2,0,1,39,NA,NA,1,5 +70088,7,2,1,47,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17452.049284,17389.002535,1,93,7,7,1.64,5,5,0,2,0,1,47,2,5,1,1 +70089,7,2,1,0,11,1,1,1,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7222.23638,7150.860834,1,94,4,4,0.32,7,7,3,2,0,2,28,2,2,1,9 +70090,7,2,2,7,NA,3,3,2,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,39810.933651,39282.261038,2,95,15,15,4.77,4,4,0,2,0,2,36,1,4,1,5 +70091,7,2,2,4,NA,3,3,2,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,78813.208592,86985.412201,2,91,6,6,1.34,4,4,1,2,0,2,33,1,4,3,NA +70092,7,2,1,24,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,120604.496044,126250.391527,2,92,10,10,3.51,3,3,0,0,0,1,24,1,4,5,NA +70093,7,2,1,14,NA,4,4,2,14,177,NA,NA,2,2,3,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14853.30651,14966.239185,1,93,14,14,5,2,2,0,1,0,2,52,2,3,1,NA +70094,7,2,2,36,NA,3,3,2,NA,NA,2,NA,2,1,6,NA,4,1,1,1,2,2,1,2,2,1,2,2,1,60859.550805,63845.233776,2,100,15,15,5,3,3,0,1,0,1,38,1,4,1,4 +70095,7,2,2,59,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,1,1,2,1,3,10420.55184,10475.655119,2,92,5,5,0.64,7,7,1,2,1,1,66,2,1,1,3 +70096,7,2,2,77,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,66567.821082,68844.431611,3,91,7,7,1.97,4,4,0,0,1,2,77,1,5,2,NA +70097,7,2,2,7,NA,3,3,2,7,87,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,67046.323141,68322.122844,1,90,8,8,1.67,5,5,2,1,0,2,28,1,4,1,5 +70098,7,2,2,0,9,2,2,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5234.186769,5411.317673,2,90,14,14,3.08,6,6,1,1,1,2,60,2,5,2,NA +70099,7,2,1,17,NA,3,3,2,17,213,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,100370.520459,102294.664852,1,90,15,15,5,4,4,0,1,0,2,53,1,5,1,5 +70100,7,2,2,11,NA,4,4,1,11,143,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9399.281543,9696.12347,2,96,6,6,1.32,5,5,1,3,0,2,30,1,4,3,NA +70101,7,2,2,45,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,22109.546782,29180.501371,1,93,7,7,2.72,2,2,0,1,0,2,45,1,3,3,NA +70102,7,2,1,68,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,92308.775062,93174.50633,1,100,6,6,2.24,1,1,0,0,1,1,68,1,4,2,NA +70103,7,2,2,56,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,125443.355152,127362.409324,2,103,10,10,5,1,1,0,0,0,2,56,1,4,3,NA +70104,7,2,2,34,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,16614.865368,17238.21833,3,91,15,15,5,4,4,2,0,0,1,36,2,5,1,5 +70105,7,2,1,25,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,NA,52698.05363,53416.368765,3,92,5,5,0.81,5,5,3,0,0,2,23,1,4,5,NA +70106,7,2,1,18,NA,3,3,2,19,228,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,30100.326038,29873.584609,1,101,6,6,1.17,4,4,0,1,0,1,41,1,3,6,NA +70107,7,2,2,0,10,4,4,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,4358.100179,4799.448128,2,93,NA,NA,NA,4,4,1,0,1,1,63,NA,NA,6,NA +70108,7,2,2,11,NA,3,3,2,11,138,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,49164.586897,48511.701666,2,95,15,15,4.63,5,5,1,2,0,2,36,1,5,1,3 +70109,7,2,1,57,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,27595.50738,28151.492876,1,98,15,15,5,5,5,0,1,1,2,55,1,5,1,5 +70110,7,2,2,0,11,4,4,1,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5576.185193,5739.245437,2,96,5,5,1.24,3,3,2,0,0,1,29,1,3,5,NA +70111,7,2,1,56,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,12517.592486,15021.976783,1,96,77,77,NA,7,7,1,3,0,1,56,1,3,1,4 +70112,7,2,2,9,NA,3,3,2,9,110,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,55626.447796,54887.751745,1,95,14,14,3.8,4,4,0,2,0,2,37,1,5,1,5 +70113,7,1,1,28,NA,1,1,NA,NA,NA,2,NA,2,1,77,NA,1,3,NA,2,2,2,2,2,2,NA,NA,NA,NA,35669.2076,0,2,94,77,77,NA,4,4,0,0,0,1,28,2,1,3,NA +70114,7,2,1,39,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,3,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,14221.330587,18275.989104,3,90,12,12,NA,2,2,0,0,0,1,39,2,3,1,NA +70115,7,2,2,36,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,3,4,2,2,2,2,2,2,2,NA,NA,NA,NA,36453.846815,39554.044508,1,102,2,2,0.52,3,3,0,2,0,2,36,2,3,4,NA +70116,7,2,1,0,4,3,3,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23841.007913,24575.370543,2,101,10,10,2.33,6,6,1,3,0,1,39,1,2,1,4 +70117,7,2,1,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,79677.823556,79777.881901,1,99,15,15,5,5,5,0,3,0,2,43,1,5,1,5 +70118,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,54095.581484,61529.339683,2,91,5,5,1.84,1,1,0,0,1,2,80,1,4,2,NA +70119,7,2,1,19,NA,3,3,1,19,229,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,110478.18082,109645.964567,2,101,1,1,0.11,2,1,0,0,0,1,19,1,4,NA,NA +70120,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,53541.401974,54113.194007,1,99,8,8,3.4,2,2,0,0,2,1,74,1,5,1,4 +70121,7,2,2,33,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,22758.541444,23956.866949,1,101,2,2,0.47,3,3,1,0,0,1,35,1,2,6,NA +70122,7,2,2,37,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,86578.861495,87453.974807,2,101,10,10,2.33,6,6,1,3,0,1,39,1,2,1,4 +70123,7,2,2,45,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11762.034222,11824.231183,3,90,77,77,NA,5,5,0,2,0,1,46,2,3,1,3 +70124,7,2,2,16,NA,4,4,1,17,205,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,12531.903464,13043.632492,2,100,4,4,0.85,4,4,0,2,0,2,39,1,3,6,NA +70125,7,2,1,30,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,28996.250643,29563.919546,2,101,4,4,1.52,1,1,0,0,0,1,30,1,3,5,NA +70126,7,2,1,2,NA,4,4,2,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6992.24593,7077.532809,2,97,1,1,0.33,2,2,1,0,0,2,29,1,3,5,NA +70127,7,2,2,26,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,112960.559471,154652.396986,1,91,4,4,1.29,2,2,1,0,0,2,26,1,4,5,NA +70128,7,1,1,24,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,41383.258526,0,1,97,1,1,0.22,2,1,0,0,0,1,24,1,3,6,NA +70129,7,2,1,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,19384.896286,19940.089683,1,94,3,3,0.39,6,6,2,2,0,2,25,1,4,1,2 +70130,7,2,1,42,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,21600.805431,23858.641894,2,96,7,7,1.79,4,4,2,0,0,2,49,1,3,1,3 +70131,7,2,2,53,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,12449.932013,12144.773422,3,90,15,15,4.34,4,4,0,0,1,1,65,1,3,1,4 +70132,7,2,1,62,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11761.359913,11893.852645,1,92,9,9,4.23,2,2,0,0,1,1,62,1,3,1,4 +70133,7,2,1,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,20891.980831,21490.337905,2,97,3,1,0.09,2,1,0,0,0,1,30,1,4,5,NA +70134,7,2,2,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,138322.767578,139738.299504,1,101,10,10,4.63,2,2,0,0,1,1,64,2,3,1,4 +70135,7,2,1,2,NA,1,1,1,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,8481.734412,8582.006703,1,103,8,8,1.85,5,5,2,1,0,2,25,2,2,1,2 +70136,7,2,1,39,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,17643.563124,18168.544198,3,91,15,15,5,4,4,2,0,0,2,33,2,5,1,5 +70137,7,2,1,17,NA,2,2,2,17,210,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14081.782012,14391.713696,2,90,2,2,0.25,5,5,0,1,0,2,41,2,4,1,NA +70138,7,2,1,6,NA,5,7,2,6,80,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21147.476454,21752.181979,1,95,5,5,0.89,4,4,0,1,0,2,42,1,4,6,NA +70139,7,2,2,10,NA,4,4,1,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9453.111053,10178.533204,1,100,6,6,1.13,4,4,0,3,0,2,32,1,3,5,NA +70140,7,2,1,3,NA,3,3,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,60239.023202,67963.933877,1,98,7,7,1.66,5,5,2,1,0,2,37,1,5,1,3 +70141,7,2,1,0,11,2,2,1,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,4854.394074,5092.044151,2,93,15,15,4.84,6,6,1,1,2,1,66,2,4,1,3 +70142,7,2,2,69,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,105434.067206,107522.739372,2,103,9,9,4.92,1,1,0,0,1,2,69,1,4,2,NA +70143,7,2,2,40,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,13510.18419,14596.8255,2,90,14,14,4.32,3,3,0,1,0,1,48,2,4,1,5 +70144,7,2,1,12,NA,1,1,1,12,149,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18987.489596,19405.392998,3,91,6,6,2.04,2,2,0,1,0,2,51,1,4,4,NA +70145,7,2,2,49,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,29010.447112,28216.500124,1,100,6,6,2.51,1,1,0,0,0,2,49,1,4,3,NA +70146,7,2,2,12,NA,5,7,2,12,152,NA,NA,2,1,4,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10767.566937,11183.935292,2,91,15,15,3.7,5,5,1,2,0,1,50,NA,NA,1,5 +70147,7,2,1,14,NA,5,6,1,15,180,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8915.81491,9312.338117,1,92,14,14,3.69,4,4,0,2,0,1,47,2,4,4,NA +70148,7,2,1,30,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,24460.137584,1,92,3,3,0.46,5,5,2,1,0,1,30,1,3,1,2 +70149,7,2,1,13,NA,5,6,2,13,160,NA,NA,2,1,4,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8965.57404,9581.867503,1,90,15,15,5,5,5,0,3,0,2,46,2,4,1,5 +70150,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,76374.321112,79950.814336,2,99,14,14,5,1,1,0,0,0,1,39,1,5,5,NA +70151,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,64679.499599,80402.538623,3,92,6,6,2.69,1,1,0,0,1,2,80,1,3,2,NA +70152,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,161992.272945,167997.905035,1,91,14,14,3.8,4,4,0,2,0,1,50,NA,NA,1,5 +70153,7,2,1,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,24930.322327,25181.116543,2,94,3,3,0.92,1,1,0,0,0,1,57,1,2,3,NA +70154,7,2,1,0,1,5,6,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5941.765349,6071.648885,2,102,3,3,0.38,5,5,3,0,0,2,30,2,2,1,4 +70155,7,2,1,18,NA,5,7,1,18,219,2,NA,1,1,NA,66,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13276.485807,15665.287637,2,100,99,99,NA,3,3,0,0,0,1,46,1,9,3,NA +70156,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,34853.379657,39642.894617,3,90,10,10,4.3,2,2,0,0,2,2,80,1,4,1,5 +70157,7,2,2,1,22,4,4,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9273.23044,9685.008923,2,96,5,5,1.24,3,3,2,0,0,1,29,1,3,5,NA +70158,7,2,2,53,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,24905.670199,24973.138542,2,101,8,8,2.7,3,3,0,1,0,1,53,1,4,1,2 +70159,7,2,1,13,NA,4,4,2,13,162,NA,NA,2,1,4,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11125.932433,11147.929312,1,96,7,7,1.52,4,4,0,2,0,2,30,2,4,1,5 +70160,7,2,2,61,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,2,1,NA,2,2,2,2,2,2,1,2,2,2,10288.382343,13758.61994,2,90,6,6,1.7,2,2,0,0,2,1,61,2,1,1,2 +70161,7,2,1,34,NA,1,1,1,NA,NA,2,NA,2,2,2,NA,2,1,NA,2,2,2,2,2,2,1,2,2,2,37715.365512,38149.83984,2,102,7,7,1.33,6,6,1,3,0,1,34,2,2,1,1 +70162,7,2,1,34,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,21288.18311,21326.274673,1,102,7,7,1.57,4,4,0,2,0,2,33,1,4,1,4 +70163,7,2,2,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,163605.682975,162710.354378,1,90,15,15,5,4,4,0,1,0,2,53,1,5,1,5 +70164,7,2,2,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,23884.62129,24647.381072,2,98,5,5,1.24,3,3,0,0,1,2,58,1,2,5,NA +70165,7,2,2,18,NA,2,2,2,19,228,2,NA,1,1,NA,13,NA,NA,NA,2,2,2,2,2,2,2,2,2,2,15442.648697,16179.397194,3,90,77,77,NA,4,3,0,0,0,1,45,2,3,3,NA +70166,7,2,1,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,137038.746155,146586.432966,2,101,4,2,0.66,2,1,0,0,0,1,21,1,4,5,NA +70167,7,2,2,47,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,153972.608815,154123.803084,2,101,5,5,1.36,2,2,0,0,0,2,47,1,2,1,4 +70168,7,2,1,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,64901.456576,67363.75016,1,101,15,15,5,2,2,0,0,2,2,73,1,4,1,4 +70169,7,2,2,23,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,60324.348827,60863.689632,1,95,6,6,0.96,5,5,1,0,1,2,69,1,1,2,NA +70170,7,2,2,59,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,15497.844354,15073.705188,1,99,4,2,0.74,2,1,0,0,1,2,59,1,3,3,NA +70171,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,27532.825087,28260.668065,1,101,6,6,1.3,4,4,2,0,0,2,30,1,4,6,NA +70172,7,2,2,11,NA,5,7,1,12,144,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5147.116597,5520.448595,2,92,15,15,4.59,4,4,0,2,0,2,45,2,5,1,5 +70173,7,2,2,2,NA,2,2,2,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9653.164181,10331.416262,1,90,15,15,4.44,5,5,2,1,0,1,36,1,3,1,4 +70174,7,1,1,54,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,24930.322327,0,1,100,1,1,0.01,1,1,0,0,0,1,54,1,2,3,NA +70175,7,2,2,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,11699.431733,12221.798846,1,96,7,7,2.31,2,2,0,0,1,2,62,1,3,3,NA +70176,7,2,2,23,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,41953.42893,43056.968872,2,99,4,1,0.22,4,1,0,0,0,2,21,NA,NA,5,NA +70177,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,45403.540522,46903.566528,2,92,9,6,2.75,2,1,0,0,0,1,26,99,9,6,NA +70178,7,1,2,6,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,62595.719575,0,1,95,14,14,3.8,4,4,1,1,0,1,36,1,4,1,5 +70179,7,2,1,4,NA,4,4,1,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10690.995725,11017.541084,2,98,2,2,0.31,4,4,2,1,0,2,27,1,2,4,NA +70180,7,2,1,4,NA,4,4,1,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10510.490567,10944.785425,2,98,6,6,1,5,5,2,1,0,2,31,1,4,6,NA +70181,7,2,1,6,NA,1,1,2,6,72,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,13285.093011,13365.60735,2,94,77,77,NA,6,6,0,3,0,2,58,1,3,1,9 +70182,7,1,1,14,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10346.302718,0,2,91,4,4,0.65,5,5,1,3,0,1,43,2,3,5,NA +70183,7,2,2,37,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,2,1,2,2,2,2,1,2,2,1,2,2,2,41791.57979,40663.955172,2,102,6,6,1.03,5,5,1,1,0,1,37,1,2,1,2 +70184,7,2,1,16,NA,4,4,1,16,194,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16147.713323,16532.569027,1,92,15,15,4.44,5,5,0,3,0,2,43,1,5,6,NA +70185,7,2,2,4,NA,2,2,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13647.772496,15067.50708,2,93,7,7,1.83,3,3,1,0,0,1,40,2,5,1,4 +70186,7,2,1,48,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18533.049642,19320.837782,1,99,14,14,3.8,4,4,1,1,0,1,48,2,5,1,5 +70187,7,2,1,15,NA,5,6,2,15,180,NA,NA,2,1,99,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10567.840237,11294.273462,1,91,15,15,5,4,4,0,2,0,2,55,1,5,1,5 +70188,7,1,1,34,NA,5,6,NA,NA,NA,2,NA,2,2,4,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,15057.879445,0,3,90,12,12,NA,3,3,0,0,0,2,29,2,5,1,5 +70189,7,2,2,20,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,5,3,2,1,2,2,2,2,NA,NA,NA,NA,32455.694722,32314.335903,1,103,5,5,0.74,5,5,1,1,0,2,40,99,3,1,1 +70190,7,2,1,9,NA,1,1,1,9,116,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,14042.313177,3,92,4,4,0.6,6,6,2,2,0,2,24,1,3,6,NA +70191,7,2,2,52,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,1,3,NA,2,2,2,1,2,2,NA,NA,NA,NA,30346.899457,30505.155956,1,93,5,5,0.84,5,5,1,2,0,2,52,2,1,3,NA +70192,7,2,2,22,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,NA,NA,NA,NA,17397.027021,18573.153951,3,91,7,7,3.21,1,1,0,0,0,2,22,1,5,5,NA +70193,7,2,2,17,NA,2,2,2,17,208,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17277.331962,18186.067085,1,90,6,6,0.81,6,6,0,3,0,2,45,1,4,1,2 +70194,7,2,2,29,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,13440.945024,14349.620822,1,103,15,8,4.03,2,1,0,0,0,2,29,2,5,6,NA +70195,7,2,1,79,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,16888.31509,17277.940364,1,92,14,5,2.15,3,1,0,0,2,1,51,1,4,5,NA +70196,7,2,1,37,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,1,5,NA,2,2,2,1,2,2,2,2,2,2,34997.800447,35379.102239,2,96,5,5,0.78,5,5,0,2,0,1,37,2,1,5,NA +70197,7,2,1,0,6,2,2,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5385.874932,5867.351136,2,90,8,8,2.24,4,4,1,1,0,2,29,1,4,6,NA +70198,7,2,2,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,11355.3308,11862.334174,1,96,15,15,5,2,2,0,0,2,2,62,1,4,1,2 +70199,7,2,1,7,NA,3,3,2,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,60593.636684,64068.123183,1,91,10,10,2.77,5,5,0,3,0,1,43,1,5,1,5 +70200,7,2,1,52,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,27551.752692,27682.321328,1,98,3,3,0.73,3,3,0,0,0,1,52,1,4,1,3 +70201,7,1,2,26,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,5,3,1,2,2,1,2,2,NA,NA,NA,NA,130601.953362,0,2,91,15,9,5,2,1,0,0,0,2,26,1,5,5,NA +70202,7,2,1,22,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,9956.598907,10266.888978,1,95,5,5,0.73,6,6,1,0,1,1,62,2,3,1,NA +70203,7,2,1,46,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,12262.33683,12218.03828,2,100,4,4,0.44,7,7,1,2,2,1,71,2,1,1,1 +70204,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,18097.801029,17328.248994,2,100,7,7,1.38,5,5,1,0,0,2,45,1,2,3,NA +70205,7,2,2,7,NA,3,3,1,7,90,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,25527.806244,26565.240294,2,101,3,3,0.3,7,7,1,2,0,2,50,1,2,4,NA +70206,7,2,2,34,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,12808.938247,13423.491647,2,100,15,15,5,4,4,1,1,0,1,36,2,5,1,5 +70207,7,2,2,8,NA,3,3,1,8,101,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,63195.899182,62711.128206,1,100,15,15,5,4,4,1,1,0,1,40,1,5,1,5 +70208,7,2,1,2,NA,3,3,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,29706.069213,32871.212026,1,99,10,10,2.48,5,5,2,1,0,1,33,1,5,1,5 +70209,7,2,2,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,27945.726298,28031.650144,1,94,7,7,1.29,6,6,1,3,0,1,38,1,3,1,2 +70210,7,2,1,47,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,41527.444056,55523.849773,1,95,5,5,1.5,2,2,0,1,0,1,47,1,4,2,NA +70211,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,38758.039282,44961.529423,1,98,6,6,1.9,2,2,0,0,2,1,80,1,1,1,2 +70212,7,2,2,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,20048.680628,21335.7055,2,95,5,5,1.05,3,3,0,1,0,1,43,1,3,1,2 +70213,7,2,1,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,94698.084211,97410.286035,1,101,14,14,3.15,5,5,2,1,0,1,35,1,4,1,5 +70214,7,2,1,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,25212.845431,2,101,1,1,0.42,1,1,0,0,0,1,21,1,4,5,NA +70215,7,2,2,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,1,2,NA,44484.790948,49854.064081,1,103,15,15,5,3,2,0,0,3,2,63,1,5,77,NA +70216,7,2,2,0,8,4,4,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3941.796129,4340.984667,1,99,14,14,3.67,4,4,1,0,0,2,49,1,3,1,3 +70217,7,2,1,40,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19158.860679,19089.647955,1,100,15,15,5,2,2,0,0,0,1,40,2,5,1,5 +70218,7,2,1,0,11,1,1,2,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6298.658963,6412.943673,2,94,5,5,1.07,4,4,2,0,0,1,37,2,1,1,1 +70219,7,2,2,69,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,42101.975168,42617.525207,1,95,6,6,0.96,5,5,1,0,1,2,69,1,1,2,NA +70220,7,2,2,31,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,2,1,2,2,2,2,2,2,2,2,2,2,2,41791.57979,43231.925575,2,102,10,10,3.04,4,4,2,0,0,2,31,2,2,1,NA +70221,7,2,2,11,NA,4,4,2,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9122.654131,9281.792212,1,96,9,9,3.14,3,3,0,2,0,2,39,NA,NA,3,NA +70222,7,2,1,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,88617.795432,87713.219218,1,91,15,15,5,1,1,0,0,1,1,60,1,5,1,NA +70223,7,2,1,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,146181.198007,145805.68861,2,91,15,2,0.79,7,1,0,0,1,1,49,NA,NA,5,NA +70224,7,2,2,19,NA,3,3,1,19,234,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,39616.634313,40360.331753,2,101,5,3,1.1,2,1,0,0,0,1,19,1,4,NA,NA +70225,7,2,1,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105262.341027,112370.874556,2,98,10,10,4.42,2,2,0,0,0,1,25,1,5,1,5 +70226,7,2,2,70,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,1,2,2,1,2,2,NA,12849.335508,13810.42202,1,96,4,4,1.18,2,2,0,0,1,2,70,1,1,5,NA +70227,7,2,2,46,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15819.48188,17091.862943,1,90,15,15,5,5,5,0,3,0,2,46,2,4,1,5 +70228,7,2,1,67,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,7410.50521,7700.344649,2,96,3,3,0.82,2,2,0,1,1,1,67,1,3,3,NA +70229,7,2,2,30,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,3,1,2,2,2,2,1,2,2,1,2,2,1,41791.57979,41445.196731,2,102,4,4,0.61,5,5,0,3,0,1,34,2,3,1,3 +70230,7,2,1,24,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,15976.466658,17286.192057,2,96,7,7,2.45,2,2,0,0,0,1,24,2,5,5,NA +70231,7,2,2,37,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,21619.283038,22210.398171,2,102,15,15,5,4,4,0,2,0,1,39,1,4,1,5 +70232,7,2,2,18,NA,5,6,2,18,217,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12224.9472,12547.183925,1,97,14,14,2.72,7,7,0,2,0,1,40,1,5,1,5 +70233,7,2,2,12,NA,1,1,2,12,148,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,15591.526146,16856.688627,1,90,6,6,1.11,5,5,1,2,0,1,30,2,1,6,NA +70234,7,2,1,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,1,2,1,1,2,1,1,2,NA,28559.076421,28710.827523,1,95,2,2,0.87,1,1,0,0,1,1,63,1,1,5,NA +70235,7,2,1,61,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,1,2,NA,2,2,2,1,2,2,1,2,2,2,8609.250304,11228.904188,2,90,3,3,1.01,1,1,0,0,1,1,61,2,1,2,NA +70236,7,2,2,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,11355.3308,11862.334174,1,96,15,15,5,2,2,0,0,2,1,68,1,3,1,4 +70237,7,2,1,13,NA,3,3,2,14,168,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,68701.580401,72973.564721,2,94,15,15,5,5,5,0,3,0,1,44,1,5,1,4 +70238,7,2,2,25,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,3,2,1,2,2,1,2,2,1,2,2,1,32455.694722,32314.335903,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +70239,7,2,2,43,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,19075.861607,19022.257361,1,96,10,10,3.04,4,4,0,1,0,2,43,1,5,1,4 +70240,7,2,2,4,NA,4,4,1,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10437.988787,11015.482909,2,100,99,99,NA,6,6,2,1,0,2,44,1,3,1,4 +70241,7,2,1,13,NA,4,4,2,13,157,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11800.231369,11823.561392,1,96,7,7,1,7,7,2,1,1,2,53,1,4,1,3 +70242,7,2,1,1,22,4,4,2,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5799.129348,6038.750138,1,90,4,4,0.67,5,5,3,0,0,2,32,2,3,3,NA +70243,7,1,2,51,NA,5,6,NA,NA,NA,2,NA,2,1,6,NA,1,3,NA,1,2,1,1,2,1,NA,NA,NA,NA,21018.496735,0,1,91,9,9,2.68,4,4,0,0,1,1,20,NA,NA,5,NA +70244,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,9772.038079,10486.56257,1,95,77,77,NA,3,3,0,0,2,1,80,1,1,1,3 +70245,7,2,2,1,16,4,4,2,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6398.740074,6885.115697,1,93,6,6,0.83,6,6,3,1,0,1,37,NA,NA,1,3 +70246,7,2,2,72,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,NA,14971.827573,15430.401607,2,97,8,8,2.7,3,3,0,0,1,2,72,1,2,3,NA +70247,7,2,2,54,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17852.668137,17947.072019,3,91,9,9,4.08,2,2,0,1,0,2,54,2,5,1,NA +70248,7,2,2,67,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,7278.790659,7875.678599,2,93,3,3,0.66,2,2,0,0,1,1,54,2,1,1,1 +70249,7,2,1,7,NA,3,3,2,8,96,NA,NA,2,1,3,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,38712.032122,41122.784973,3,91,15,15,5,4,4,1,1,0,2,41,1,5,1,5 +70250,7,2,2,0,5,2,2,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10218.497734,10897.586635,2,101,15,15,5,3,3,1,0,0,1,37,1,5,1,5 +70251,7,2,2,43,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,43535.993088,45116.30779,2,102,77,77,NA,4,4,0,1,0,1,47,1,2,1,3 +70252,7,2,1,40,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,14227.821648,16178.124803,3,90,2,2,0.45,1,1,0,0,0,1,40,2,2,4,NA +70253,7,2,1,0,8,5,7,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6246.568228,6909.306675,2,95,1,1,0.03,3,3,1,0,0,1,23,1,3,6,NA +70254,7,2,2,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,115926.402585,128579.517653,1,101,14,14,3.25,4,4,0,1,0,1,48,1,4,1,2 +70255,7,2,2,38,NA,1,1,1,NA,NA,2,NA,2,2,99,NA,1,6,2,2,2,2,1,2,2,2,2,2,2,53370.792448,51930.736348,1,100,NA,13,NA,2,1,0,0,0,1,32,2,1,6,NA +70256,7,2,2,16,NA,5,6,1,16,198,NA,NA,2,1,4,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9098.177005,9288.806625,1,98,14,14,3.9,4,4,0,1,0,2,52,1,5,1,5 +70257,7,2,2,1,20,4,4,2,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5687.793894,6002.477845,2,90,8,6,1.67,5,3,2,0,0,2,25,2,3,5,NA +70258,7,2,2,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,61710.107686,72008.199278,1,98,3,3,0.9,1,1,0,0,0,2,24,1,5,5,NA +70259,7,2,1,65,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,1,2,2,1,2,1,1,7514.993062,8122.478925,2,90,2,2,0.48,2,2,0,0,1,2,53,2,3,1,3 +70260,7,2,2,0,11,2,2,1,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,6235.764746,6104.57156,2,92,NA,NA,NA,4,4,2,0,0,1,40,NA,NA,1,NA +70261,7,2,1,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,37814.382501,38402.065392,1,98,6,4,1.34,2,1,0,0,0,2,22,1,4,6,NA +70262,7,2,1,7,NA,2,2,1,7,90,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10738.959181,11444.574702,2,93,10,10,2.26,6,6,0,4,0,1,34,1,4,1,3 +70263,7,2,2,49,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,18522.193415,19195.925691,1,90,9,9,2.6,4,4,0,1,0,2,49,2,2,1,5 +70264,7,2,2,23,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,NA,NA,NA,1,2,2,1,11475.373333,11965.391974,2,92,12,NA,NA,7,1,0,0,2,1,53,2,3,1,3 +70265,7,2,2,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,19378.199212,21493.287569,2,100,5,5,1.3,3,3,0,1,0,2,46,1,3,2,NA +70266,7,2,2,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,16420.864787,17154.038836,2,102,14,14,5,2,2,0,0,2,1,64,1,4,1,3 +70267,7,2,1,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,20891.980831,21490.337905,2,97,12,14,5,2,1,0,0,0,1,53,NA,NA,5,NA +70268,7,2,1,46,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,30549.358294,31009.655884,2,101,5,5,1.93,1,1,0,0,0,1,46,1,4,5,NA +70269,7,2,1,0,9,3,3,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8212.233655,8522.67606,2,91,2,2,0.33,5,5,1,1,0,2,48,1,4,3,NA +70270,7,2,2,2,NA,4,4,2,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6084.391121,6799.274602,2,99,3,3,0.56,4,4,1,0,0,2,38,1,3,5,NA +70271,7,1,1,34,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,17879.023129,0,2,90,7,7,3.49,1,1,0,0,0,1,34,1,3,5,NA +70272,7,2,2,1,16,4,4,2,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6811.556363,7329.310628,1,96,12,12,NA,5,5,1,2,0,2,35,1,5,1,4 +70273,7,2,2,8,NA,1,1,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15962.145468,16371.237244,2,98,1,1,0.18,4,4,0,2,0,1,29,1,4,6,NA +70274,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,30212.098573,33743.401779,2,98,3,3,1.24,1,1,0,0,1,2,80,1,4,2,NA +70275,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,26999.643202,25671.572746,2,102,6,6,1.22,5,5,0,2,0,2,42,1,4,1,4 +70276,7,2,2,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,164908.075108,169304.25724,1,98,15,15,5,3,3,0,0,0,1,56,1,5,1,5 +70277,7,2,2,0,9,1,1,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6021.973783,5836.909368,1,102,2,2,0.19,7,7,2,2,0,1,48,2,9,1,9 +70278,7,2,2,36,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,2,1,3,1,2,2,1,2,2,NA,NA,NA,NA,21765.629914,21610.101161,2,90,6,6,1.03,6,6,3,1,0,1,45,2,2,1,2 +70279,7,2,2,53,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,17521.481386,17626.852628,2,97,99,99,NA,1,1,0,0,0,2,53,1,3,3,NA +70280,7,2,1,18,NA,2,2,1,18,221,2,NA,2,2,3,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,20606.470013,20581.836663,1,103,6,6,0.93,5,5,0,1,0,1,39,2,3,1,3 +70281,7,2,2,30,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,20012.879008,20053.743639,1,100,7,7,3.13,1,1,0,0,0,2,30,1,4,5,NA +70282,7,2,1,24,NA,1,1,2,NA,NA,2,NA,2,2,6,NA,2,5,NA,2,2,2,2,2,2,1,2,2,1,35669.2076,36155.406423,2,94,7,7,1.23,6,6,2,1,0,1,33,2,1,6,NA +70283,7,2,1,11,NA,1,1,2,11,141,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13484.595524,13595.798197,1,97,8,8,1.45,6,6,2,2,0,2,36,2,2,1,1 +70284,7,2,2,4,NA,5,6,2,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3864.413878,3822.111022,1,99,14,14,2.66,7,7,3,1,0,1,35,1,5,1,5 +70285,7,2,2,37,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,NA,NA,NA,NA,28958.579549,29304.033516,1,96,13,13,NA,5,5,1,1,0,1,42,1,3,5,NA +70286,7,2,1,18,NA,3,3,2,18,221,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,71231.747774,75661.062324,2,95,9,9,2.22,5,5,1,0,0,1,55,1,4,1,5 +70287,7,2,2,46,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,18844.090639,18943.737107,2,96,15,15,5,2,2,0,0,0,1,45,1,5,1,4 +70288,7,2,2,37,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,2,1,2,2,1,2,2,1,2,2,1,71034.153987,76359.172033,1,98,6,6,1.11,5,5,0,2,1,2,37,1,1,1,1 +70289,7,2,1,8,NA,3,3,2,8,99,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,46228.073505,47549.950918,2,94,10,10,2.91,4,4,0,2,0,2,38,1,4,1,4 +70290,7,2,1,60,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,10880.024478,11166.738171,2,91,9,9,3.14,3,3,0,1,2,1,60,1,4,1,3 +70291,7,2,2,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,35313.648114,35629.376203,2,95,6,6,0.9,6,6,1,1,0,1,49,1,1,1,1 +70292,7,2,2,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,19130.246369,22400.771377,2,95,15,10,3.67,5,3,0,0,0,1,47,1,5,1,3 +70293,7,2,2,51,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,17149.727778,17902.578105,1,90,4,4,0.78,4,4,0,0,1,1,69,2,4,1,3 +70294,7,2,1,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,8219.195224,8283.488354,2,99,15,15,5,2,2,0,0,2,1,60,1,5,1,5 +70295,7,2,1,47,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,21158.364877,21743.069946,1,96,15,15,4.9,4,4,0,1,0,1,47,1,3,1,5 +70296,7,2,2,0,4,3,3,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17674.326293,18731.006164,3,90,15,15,5,3,3,1,0,0,1,31,1,5,1,5 +70297,7,2,1,73,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,60942.568495,64726.722108,1,94,7,7,2.51,2,2,0,0,2,2,72,1,4,1,1 +70298,7,2,2,42,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,22707.329726,23560.698233,2,97,2,2,0.3,4,4,0,2,0,1,42,1,2,6,NA +70299,7,2,1,62,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,7192.368251,7796.453586,1,93,4,4,1.74,1,1,0,0,1,1,62,2,3,2,NA +70300,7,2,1,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,27466.648066,27396.091902,1,99,2,2,0.61,2,2,0,0,0,1,46,1,5,5,NA +70301,7,2,2,51,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,21143.964074,21647.100945,1,100,1,1,0.04,4,4,1,1,0,2,51,1,3,3,NA +70302,7,2,1,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,42894.724338,43561.362107,1,98,6,4,1.34,2,1,0,0,0,1,24,1,5,5,NA +70303,7,2,1,56,NA,2,2,2,NA,NA,1,1,2,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,34813.426994,34467.752983,1,97,15,15,5,1,1,0,0,0,1,56,2,4,3,NA +70304,7,2,2,65,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,122483.259869,138551.168646,3,91,7,7,3.49,1,1,0,0,1,2,65,1,3,2,NA +70305,7,2,1,0,2,3,3,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23912.171644,23498.78217,1,91,15,15,5,3,3,1,0,0,1,33,1,5,1,5 +70306,7,2,1,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25645.251384,25514.298117,1,92,6,6,1.31,3,3,0,0,1,2,80,1,3,4,NA +70307,7,2,1,22,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,47487.549895,49034.44781,1,102,6,6,1.48,3,3,0,0,1,2,57,2,1,1,4 +70308,7,2,1,5,NA,1,1,1,6,72,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20874.345556,20902.551716,3,92,2,2,0.47,3,3,1,1,0,2,33,1,4,5,NA +70309,7,2,1,2,NA,1,1,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12413.685227,12209.161559,1,102,6,6,1.46,3,3,1,1,0,2,28,2,4,3,NA +70310,7,1,2,11,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,1,2,NA,NA,NA,NA,9139.784234,0,2,100,6,6,1.18,5,5,0,2,1,2,70,1,2,77,NA +70311,7,2,1,7,NA,1,1,1,8,96,NA,NA,2,2,2,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13581.60325,14182.420159,1,100,99,99,NA,7,7,2,3,0,2,35,2,1,1,NA +70312,7,2,2,73,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,1,4,NA,2,2,2,1,2,2,1,2,2,NA,17318.187297,23904.945555,2,90,2,2,0.73,1,1,0,0,1,2,73,2,1,4,NA +70313,7,2,2,37,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,26465.930618,27183.288271,2,100,15,15,4.47,4,4,0,2,0,1,39,NA,NA,1,5 +70314,7,2,1,5,NA,3,3,2,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,58618.419318,66135.507563,2,94,12,12,NA,5,5,1,1,0,1,37,1,4,1,3 +70315,7,2,2,5,NA,5,6,2,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4938.043177,5373.703942,1,91,5,5,0.89,4,4,2,0,0,1,39,1,4,1,5 +70316,7,2,1,2,NA,4,4,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4667.634102,4955.769831,3,90,15,15,5,5,5,1,0,1,1,38,2,3,1,4 +70317,7,2,1,51,NA,5,6,1,NA,NA,2,NA,2,2,5,NA,3,1,NA,1,2,1,1,2,1,1,2,1,3,12813.519628,13398.278814,2,92,7,7,1.61,4,4,0,2,0,1,51,2,3,1,3 +70318,7,2,2,23,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,85402.868381,86166.428247,2,94,77,77,NA,2,2,0,0,0,2,23,1,3,6,NA +70319,7,2,1,55,NA,5,6,1,NA,NA,2,NA,2,2,7,NA,1,4,NA,1,2,2,1,2,2,1,2,2,1,16499.662173,17012.89406,3,91,6,6,1.77,2,2,0,0,0,2,57,2,1,1,1 +70320,7,2,1,4,NA,2,2,1,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16288.754504,16020.386509,2,92,15,15,5,3,3,1,0,0,2,48,2,5,1,5 +70321,7,2,1,63,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,4,3,NA,2,2,2,1,2,2,1,2,2,2,9115.676792,9431.98541,2,90,5,5,1.84,1,1,0,0,1,1,63,2,4,3,NA +70322,7,2,1,16,NA,1,1,2,16,200,NA,NA,2,2,3,10,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,21633.039913,21721.097793,2,94,7,7,1.04,7,7,0,3,0,1,37,2,1,1,3 +70323,7,2,2,7,NA,4,4,2,7,89,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8757.841043,8910.615224,2,97,2,2,0.38,3,3,1,1,0,2,27,1,2,5,NA +70324,7,2,2,14,NA,3,3,2,15,180,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,109773.944307,112955.590462,1,98,15,15,4.34,4,4,0,2,0,1,51,1,5,1,5 +70325,7,2,2,6,NA,1,1,2,6,83,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14679.337943,15307.210901,1,90,15,15,4.77,4,4,1,1,0,2,41,1,5,1,2 +70326,7,2,1,35,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,22188.836739,23952.480361,1,95,5,5,0.89,4,4,0,1,0,2,42,1,4,6,NA +70327,7,2,2,62,NA,2,2,2,NA,NA,2,NA,2,1,5,NA,4,1,NA,1,2,2,1,2,2,1,2,2,2,12121.359422,12627.249955,2,99,15,15,5,2,2,0,0,2,1,64,2,5,1,4 +70328,7,2,1,27,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,3,1,NA,2,2,2,1,2,2,1,2,2,2,35782.041084,36966.59793,2,96,1,1,0.06,5,5,2,1,0,1,27,2,3,1,4 +70329,7,2,2,60,NA,2,2,2,NA,NA,2,NA,2,2,5,NA,2,1,NA,2,2,2,2,2,2,2,2,2,2,9716.805546,12994.252166,2,90,3,3,0.46,5,5,0,2,2,1,75,2,1,1,2 +70330,7,2,1,77,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,8286.514589,8909.879599,3,91,7,7,1.33,6,6,0,0,2,2,51,2,5,1,5 +70331,7,2,1,52,NA,1,1,1,NA,NA,2,NA,2,2,7,NA,1,1,NA,2,2,1,2,2,1,NA,NA,NA,NA,33162.406014,36437.864606,3,92,4,4,0.66,4,4,0,1,0,1,52,2,1,1,1 +70332,7,2,1,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,NA,10676.164039,11456.797578,2,97,4,4,1.29,2,2,0,0,2,1,74,1,3,3,NA +70333,7,2,2,20,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,48470.428632,49964.788237,1,98,15,15,5,5,5,0,1,1,2,55,1,5,1,5 +70334,7,2,1,11,NA,4,4,2,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10229.206765,10406.656985,1,96,15,15,4.52,6,6,0,4,0,1,46,1,4,1,4 +70335,7,2,2,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,38954.135779,44674.189003,1,91,3,3,1.15,1,1,0,0,0,2,53,1,4,3,NA +70336,7,2,2,74,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,2,2,2,NA,15730.58404,17568.357111,2,98,6,6,1.62,3,3,0,0,1,2,74,1,1,2,NA +70337,7,2,1,38,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,19788.748292,24506.181083,1,94,7,7,1.29,6,6,1,3,0,1,38,1,3,1,2 +70338,7,2,2,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,4,2,1,2,2,1,2,2,1,2,2,1,42468.064168,42915.721814,2,101,6,6,1.16,4,4,0,3,0,2,36,1,4,4,NA +70339,7,2,2,55,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,33248.548181,34134.900613,1,98,4,4,0.89,3,3,0,0,1,2,55,1,5,1,NA +70340,7,2,2,16,NA,5,6,2,16,197,NA,NA,1,1,NA,8,NA,NA,NA,1,2,1,1,2,1,1,2,2,1,10767.566937,11540.84845,2,91,12,12,NA,7,6,0,4,2,2,72,2,1,2,NA +70341,7,2,1,62,NA,5,6,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,12579.986433,13271.133625,1,92,14,14,5,2,2,0,0,2,1,62,1,4,1,4 +70342,7,2,2,61,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,2,2,NA,1,2,1,1,2,1,1,2,1,3,11838.431472,12295.352662,2,92,7,7,1.89,3,3,0,0,1,1,36,2,3,5,NA +70343,7,2,1,11,NA,5,6,1,11,133,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7179.084455,7651.500738,2,98,9,9,2.29,5,5,0,2,0,1,36,1,4,1,4 +70344,7,2,1,1,18,4,4,1,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6376.965739,6390.839385,2,100,3,3,0.31,7,7,3,2,0,2,28,1,3,1,3 +70345,7,2,1,5,NA,1,1,1,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14321.363328,13945.949794,2,96,77,77,NA,7,7,3,2,0,2,33,2,2,6,NA +70346,7,2,2,11,NA,1,1,1,11,142,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15962.145468,16412.026403,2,98,14,14,2.87,5,5,0,3,0,2,34,1,2,1,2 +70347,7,2,1,70,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,10025.317761,10256.608873,2,103,15,15,5,3,3,0,0,1,2,55,1,4,1,5 +70348,7,2,2,9,NA,3,3,1,9,118,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22933.149195,23865.138008,1,101,4,4,0.58,6,6,0,4,0,2,41,1,3,5,NA +70349,7,2,2,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,12535.973802,12972.182488,2,100,4,4,0.97,3,3,0,0,3,2,80,1,5,2,NA +70350,7,2,2,18,NA,3,3,1,18,221,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,71832.578284,79011.982474,1,92,14,14,3.16,6,6,1,1,0,1,49,1,1,1,3 +70351,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,38799.676345,43482.761402,1,94,6,6,2.24,1,1,0,0,1,2,80,1,4,2,NA +70352,7,2,2,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,29167.119125,29652.805867,1,101,2,2,0.22,4,4,1,0,0,2,25,1,4,6,NA +70353,7,2,1,6,NA,4,4,2,6,83,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8227.856305,9153.104022,3,91,1,1,0.07,6,6,2,3,0,2,30,1,2,3,NA +70354,7,2,1,17,NA,4,4,2,17,207,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11125.932433,11147.929312,1,96,8,8,2,4,4,1,2,0,2,40,1,4,5,NA +70355,7,2,1,65,NA,5,6,2,NA,NA,2,NA,2,1,4,NA,4,1,NA,1,2,1,1,2,1,1,2,1,3,9963.081107,10271.419526,1,91,4,4,1.33,2,2,0,0,2,1,65,2,4,1,3 +70356,7,2,2,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,5,2,1,2,2,1,2,2,1,2,2,1,67534.233567,86418.733625,1,97,3,3,0.83,2,2,0,0,0,2,25,1,1,5,NA +70357,7,2,2,43,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,12601.697316,12716.910215,1,91,15,15,5,6,6,0,2,2,1,50,2,5,1,5 +70358,7,2,2,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,154825.466557,155723.680504,1,91,8,8,4.41,1,1,0,0,0,2,44,1,4,3,NA +70359,7,2,1,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,46965.818538,50603.721684,1,95,14,14,5,2,2,0,0,2,1,80,1,4,1,NA +70360,7,2,2,8,NA,4,4,2,8,104,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8147.287486,8823.301862,2,90,5,5,1.63,2,2,0,1,0,2,38,2,3,5,NA +70361,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,1,2,1,2,2,1,1,2,NA,50879.818001,58591.132961,1,101,6,6,1.31,3,3,0,0,1,2,80,1,1,2,NA +70362,7,2,2,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,8308.628726,8679.600111,2,95,5,5,1.96,1,1,0,0,1,2,61,1,4,2,NA +70363,7,2,2,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,122483.259869,124909.680309,3,91,15,15,5,2,2,0,0,2,2,62,1,5,1,5 +70364,7,2,1,73,NA,5,7,1,NA,NA,2,NA,2,1,9,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,11532.082424,12399.599956,2,103,12,12,NA,2,2,0,0,2,1,73,2,5,1,4 +70365,7,2,2,7,NA,5,6,1,7,95,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8290.163782,8692.019106,1,92,2,2,0.24,5,5,0,2,0,1,35,2,4,1,3 +70366,7,2,2,49,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,19693.606802,19976.427063,1,99,12,12,NA,2,2,0,0,0,1,28,1,5,5,NA +70367,7,2,2,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,56938.907307,57514.428801,2,95,15,15,4.63,5,5,1,2,0,2,36,1,5,1,3 +70368,7,2,1,55,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19779.547388,20214.272069,2,96,15,15,5,3,3,0,1,0,1,55,1,5,1,4 +70369,7,2,2,22,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,60324.348827,64927.943097,1,95,4,4,0.97,3,3,2,0,0,2,22,1,4,5,NA +70370,7,2,1,6,NA,4,4,2,6,83,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8439.71412,8914.772112,2,99,7,7,1.19,6,6,1,3,0,2,38,1,3,5,NA +70371,7,2,2,2,NA,1,1,1,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11582.174418,12474.053558,2,102,7,7,1.04,7,7,1,2,0,2,37,2,1,1,2 +70372,7,2,1,61,NA,1,1,1,NA,NA,2,NA,2,7,77,NA,4,1,NA,2,2,2,1,2,2,1,2,2,1,11568.876339,11794.347884,1,102,77,77,NA,3,3,0,0,1,1,61,2,4,1,1 +70373,7,2,1,5,NA,1,1,2,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,14505.510202,14676.996441,2,94,77,77,NA,5,5,1,1,0,1,41,2,2,1,2 +70374,7,2,1,4,NA,3,3,1,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,33334.752566,38103.457131,3,92,6,6,0.74,7,7,2,1,0,2,46,1,2,1,4 +70375,7,2,2,19,NA,1,1,1,19,231,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,21062.314667,23105.758741,1,94,6,6,1.3,4,4,2,0,0,1,24,2,1,1,4 +70376,7,2,1,17,NA,1,1,1,17,215,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14432.845547,14415.592261,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +70377,7,2,1,3,NA,2,2,2,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12403.412256,12420.172189,2,90,2,2,0.49,3,3,2,0,0,2,26,1,4,1,NA +70378,7,2,1,53,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,18681.278463,18953.419735,1,93,7,7,1.79,4,4,0,2,0,1,53,2,4,1,4 +70379,7,2,2,2,NA,2,2,1,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11443.671453,12634.120381,1,100,15,15,4.34,4,4,2,0,0,2,35,1,5,1,5 +70380,7,2,1,9,NA,4,4,2,9,117,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7730.47951,9212.541007,1,99,6,6,1.3,5,5,1,2,0,1,34,1,2,1,3 +70381,7,2,1,24,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,39031.957066,39638.562591,1,95,6,6,0.96,5,5,1,0,1,2,69,1,1,2,NA +70382,7,2,1,1,22,1,1,1,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,10960.671575,11090.250219,2,96,4,4,0.69,5,5,2,0,0,2,57,2,1,4,NA +70383,7,2,2,4,NA,3,3,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,67177.961189,74143.696839,1,98,9,9,2.6,4,4,1,1,0,2,35,1,2,1,NA +70384,7,2,1,25,NA,4,4,1,NA,NA,2,NA,2,1,4,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,23484.626749,24110.252242,2,96,6,6,1.7,2,2,0,1,0,1,25,2,4,5,NA +70385,7,2,2,57,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,15002.194143,18506.393496,3,90,7,2,0.77,2,1,0,0,0,1,44,1,2,6,NA +70386,7,2,1,38,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,17554.053699,17499.245836,1,96,2,2,0.73,1,1,0,0,0,1,38,1,3,5,NA +70387,7,2,2,60,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,3,6,NA,2,2,2,2,2,2,2,2,2,2,15876.871857,17052.998921,2,102,14,7,3.67,2,1,0,0,2,1,65,2,3,6,NA +70388,7,2,1,12,NA,3,3,2,12,155,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,56223.281913,55799.760014,1,99,14,14,4.86,3,3,0,1,0,1,56,1,5,1,5 +70389,7,2,2,56,NA,2,2,2,NA,NA,2,NA,2,2,5,NA,3,4,NA,2,2,2,2,2,2,2,2,1,2,20734.495277,22784.232134,2,99,99,99,NA,4,1,0,0,0,2,42,2,4,5,NA +70390,7,2,2,8,NA,1,1,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10118.363218,10311.586628,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +70391,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,NA,27681.279,30792.259764,1,91,3,3,1.29,1,1,0,0,1,2,80,1,3,5,NA +70392,7,2,1,0,10,2,2,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5610.286388,5712.081065,2,90,6,6,1.34,4,4,1,2,0,2,36,2,3,77,NA +70393,7,2,1,9,NA,4,4,1,9,111,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9418.975084,9571.535533,2,93,5,5,1.04,4,4,1,1,0,1,29,1,3,6,NA +70394,7,2,1,47,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,13883.119445,13832.965705,1,103,15,15,5,1,1,0,0,0,1,47,1,5,5,NA +70395,7,1,1,36,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,114168.79702,0,1,95,14,14,3.8,4,4,1,1,0,1,36,1,4,1,5 +70396,7,2,1,6,NA,5,7,1,6,77,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8930.369586,9194.254241,3,91,14,14,3.8,4,4,0,2,0,1,47,1,5,1,5 +70397,7,2,1,19,NA,1,1,1,19,234,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,21898.969807,22012.999264,2,102,15,15,3.92,5,5,0,0,0,1,19,1,4,NA,NA +70398,7,2,2,57,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,2,4,NA,2,2,2,1,2,2,1,2,2,NA,21097.069797,24201.517172,2,90,6,6,1.7,2,2,0,0,0,2,57,2,2,4,NA +70399,7,2,1,24,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14379.41014,15757.242021,1,94,7,3,0.9,4,1,0,0,0,1,24,NA,NA,5,NA +70400,7,2,2,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,1,1,2,2,1,2,2,1,2,2,1,18723.98095,17927.802584,2,101,7,7,1.3,5,5,2,0,1,2,50,1,4,1,3 +70401,7,2,1,47,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,14879.667962,15815.934168,3,91,15,15,5,3,3,0,1,0,1,47,2,5,1,5 +70402,7,2,1,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,8796.577101,9772.20689,2,95,5,5,1.32,2,2,0,0,2,1,80,1,3,1,3 +70403,7,2,1,13,NA,1,1,1,13,159,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,19242.904841,19537.039374,2,96,5,5,0.68,6,6,0,3,2,1,60,2,1,1,1 +70404,7,2,2,7,NA,2,2,2,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20753.369981,21666.802981,1,97,5,5,1.04,4,4,1,1,0,1,32,1,3,6,NA +70405,7,2,2,61,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,118611.064701,118209.809508,1,91,15,15,5,2,2,0,0,2,1,63,1,4,1,5 +70406,7,2,2,41,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,31454.59168,33587.963223,1,94,4,4,1,3,3,0,1,0,2,41,1,4,5,NA +70407,7,1,2,3,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9492.170663,0,1,90,4,4,0.67,5,5,3,0,0,2,32,2,3,3,NA +70408,7,2,1,33,NA,4,4,2,NA,NA,2,NA,2,1,5,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,20803.970543,21557.298406,1,93,14,14,5,2,2,0,0,1,1,33,2,5,5,NA +70409,7,2,1,44,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,126789.52929,130374.456976,1,101,6,6,1.31,3,3,0,0,1,2,80,1,1,2,NA +70410,7,2,1,6,NA,2,2,1,6,80,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9741.585979,9800.624884,2,103,12,2,0.59,5,2,0,1,0,2,47,NA,NA,3,NA +70411,7,2,2,23,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,50915.06085,50693.303376,3,92,5,5,0.81,5,5,3,0,0,2,23,1,4,5,NA +70412,7,2,1,14,NA,1,1,2,15,180,NA,NA,1,1,NA,10,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,20398.562455,21727.336109,2,94,6,6,1.34,4,4,0,2,0,1,37,2,4,1,2 +70413,7,2,1,51,NA,5,6,2,NA,NA,2,NA,2,1,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,9889.944368,10305.962662,1,99,10,10,3.51,3,3,0,1,0,2,44,1,3,1,5 +70414,7,2,2,25,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,128171.594518,136210.394705,1,93,15,15,5,2,2,0,0,0,1,28,1,5,6,NA +70415,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,52752.276869,57774.89643,1,98,12,12,NA,1,1,0,0,1,2,80,1,5,2,NA +70416,7,2,1,25,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,4,5,NA,1,2,2,1,2,2,1,2,1,2,38474.772527,40200.135096,2,93,7,2,0.81,3,1,0,0,0,1,25,2,4,5,NA +70417,7,2,1,80,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,3,NA,2,2,2,1,2,2,1,2,2,NA,14200.083364,15006.095575,3,92,4,4,1.65,1,1,0,0,1,1,80,1,1,3,NA +70418,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,47566.45715,0,1,90,6,6,2.86,1,1,0,0,1,2,80,1,4,5,NA +70419,7,2,1,2,NA,2,2,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,1,2,2,1,NA,NA,NA,NA,10244.841997,10258.685193,1,96,5,5,0.94,4,4,2,0,0,1,32,2,3,1,4 +70420,7,2,2,55,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,1,2,NA,2,2,2,2,2,2,NA,NA,NA,NA,24004.6026,24129.784561,2,91,4,4,0.43,7,7,0,1,1,1,41,2,1,4,NA +70421,7,2,2,51,NA,2,2,2,NA,NA,2,NA,2,2,7,NA,2,3,NA,1,2,2,1,2,2,1,2,2,2,17054.056149,20556.446647,2,90,8,8,2.7,3,3,0,1,0,2,31,1,5,4,NA +70422,7,2,2,9,NA,2,2,2,9,114,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15148.721588,16989.423356,2,91,2,2,0.22,4,4,1,1,0,2,48,2,9,5,NA +70423,7,2,1,52,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,32461.799549,39033.401297,1,101,5,5,0.89,5,5,1,0,0,1,25,1,2,77,NA +70424,7,2,1,28,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,2,6,NA,2,2,2,1,2,2,1,2,2,1,41271.869706,41834.437137,1,96,9,9,3.97,2,2,0,0,0,1,28,2,2,6,NA +70425,7,2,1,77,NA,3,3,2,NA,NA,1,2,2,1,9,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,63054.867183,67969.319596,1,90,8,8,3.3,2,2,0,0,2,1,77,2,2,1,5 +70426,7,2,1,66,NA,1,1,1,NA,NA,2,NA,2,1,8,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,14488.953694,14844.946966,3,92,8,8,1.85,5,5,1,0,2,1,66,2,1,1,1 +70427,7,2,2,36,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,75152.05379,75383.121447,2,92,15,15,5,2,2,0,0,0,1,37,1,5,1,5 +70428,7,2,2,0,4,3,3,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10025.884543,10365.226844,3,92,6,6,0.74,7,7,2,1,0,2,46,1,2,1,4 +70429,7,2,2,44,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,134587.275919,139488.984678,1,92,8,8,2.17,4,4,0,1,2,2,80,1,3,2,NA +70430,7,2,2,37,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,97705.030285,101386.34066,2,94,14,14,4.71,3,3,1,0,0,1,35,1,5,1,5 +70431,7,2,2,20,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,2,5,2,1,2,2,2,2,2,1,2,2,1,32455.694722,32314.335903,2,103,5,5,0.65,6,6,1,0,1,2,61,2,1,2,NA +70432,7,2,2,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,145772.192378,152604.524555,1,95,15,15,5,4,4,0,2,0,2,42,1,5,1,5 +70433,7,2,1,16,NA,1,1,1,16,199,NA,NA,1,1,NA,8,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,22768.423624,22944.003607,2,98,14,14,2.87,5,5,0,3,0,2,34,1,2,1,2 +70434,7,2,2,0,2,3,3,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18869.566209,19997.704869,2,92,15,15,5,3,3,1,0,0,1,48,1,5,1,5 +70435,7,2,2,17,NA,2,2,1,17,214,2,NA,2,1,4,14,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,14984.257718,15414.270546,2,92,10,7,2.38,3,2,0,1,0,1,29,2,4,6,NA +70436,7,2,2,8,NA,4,4,1,8,103,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11116.391625,11717.604707,2,98,6,6,1,5,5,2,1,0,2,31,1,4,6,NA +70437,7,2,1,43,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,2,1,NA,2,2,2,2,2,2,2,2,2,2,34153.424332,40514.279864,1,100,7,7,1.3,5,5,0,3,0,1,43,2,2,1,4 +70438,7,2,1,42,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,138834.18124,143254.613293,1,100,15,15,5,4,4,0,2,0,2,47,1,5,1,5 +70439,7,2,2,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,76027.409363,76707.146234,2,103,15,15,3.44,7,7,0,1,2,2,79,1,3,2,NA +70440,7,2,1,14,NA,4,4,2,14,178,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10081.970102,10544.115448,2,99,4,4,0.78,4,4,0,2,0,2,45,1,3,5,NA +70441,7,2,2,2,NA,2,2,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8134.473424,8243.872451,2,99,5,5,1.26,3,3,1,0,0,1,24,2,2,1,1 +70442,7,2,2,57,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,41309.21018,41341.432201,1,100,4,4,1.19,2,2,0,0,1,1,62,1,5,1,5 +70443,7,2,2,52,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,33018.025291,33777.066037,1,92,8,8,3.3,2,2,0,0,0,1,40,2,5,1,5 +70444,7,2,1,13,NA,1,1,1,13,160,NA,NA,2,2,3,6,NA,NA,NA,2,1,1,1,2,1,1,2,2,1,27378.670648,27589.802811,2,96,3,3,0.95,2,2,0,1,0,2,38,2,3,3,NA +70445,7,2,2,71,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,52814.190351,53378.216173,1,98,12,12,NA,2,2,0,0,2,1,65,1,4,1,3 +70446,7,2,2,45,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,4,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,31235.666551,31398.557894,3,92,6,6,1,6,6,1,1,0,1,42,2,1,1,4 +70447,7,2,1,29,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,23484.626749,23642.328802,2,96,14,14,4.26,3,3,0,0,0,1,20,1,4,5,NA +70448,7,2,1,29,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,3,4,NA,2,2,2,1,2,2,2,2,2,2,40078.999044,48909.175469,3,91,6,6,0.89,7,7,1,1,0,1,59,2,1,1,1 +70449,7,2,2,12,NA,2,2,1,12,146,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20128.183753,21457.209224,2,100,14,14,3.58,4,4,0,2,0,2,40,NA,NA,1,4 +70450,7,2,1,15,NA,3,3,1,15,190,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,63633.689496,63154.346062,2,100,10,10,3.13,4,4,0,2,0,1,45,1,4,1,4 +70451,7,2,2,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,17286.767396,17743.751095,2,100,2,2,0.38,3,3,0,2,0,2,35,1,4,5,NA +70452,7,2,1,17,NA,5,6,2,17,213,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,6232.587755,6841.667882,1,91,7,7,1.57,4,4,0,3,0,2,38,2,2,3,NA +70453,7,2,2,1,22,4,4,1,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8221.160724,9187.103228,1,100,8,8,2.36,3,3,1,0,1,2,60,1,3,3,NA +70454,7,2,2,2,NA,2,2,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8200.025088,8716.981559,1,93,14,8,2.01,5,4,1,0,0,2,22,2,1,6,NA +70455,7,2,1,69,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,4526.005699,5855.572964,2,90,6,6,0.84,6,6,1,3,1,2,43,1,2,5,NA +70456,7,2,1,0,4,1,1,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7594.427215,7594.567194,1,98,15,15,5,3,3,1,0,0,1,38,1,4,1,5 +70457,7,2,2,40,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,1,2,1,2,1,1,2,1,1,2,2,3,16797.688743,17484.356971,3,91,15,15,5,3,3,0,1,0,2,40,2,5,1,5 +70458,7,2,2,35,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,26465.930618,26765.106529,2,100,10,10,2.33,6,6,0,2,2,2,35,1,2,5,NA +70459,7,2,2,4,NA,4,4,2,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9727.363166,10466.751274,2,97,6,6,1.02,6,6,1,2,0,1,37,1,3,1,3 +70460,7,2,2,5,NA,4,4,2,5,60,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9156.613403,9392.96113,2,99,7,5,1.59,3,2,1,0,0,2,23,1,4,5,NA +70461,7,1,2,5,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13196.707564,0,2,96,3,3,0.24,7,7,2,3,1,2,40,1,3,3,NA +70462,7,2,1,8,NA,2,2,2,8,107,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13232.943476,13442.347662,2,94,1,1,0.01,7,7,1,3,0,1,41,2,1,1,1 +70463,7,2,2,7,NA,3,3,2,8,96,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,35359.674949,35088.43355,1,99,10,10,2.48,5,5,2,1,0,1,33,1,5,1,5 +70464,7,2,2,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,11523.037911,11803.949722,2,97,4,4,1.65,1,1,0,0,1,2,60,1,4,5,NA +70465,7,2,1,4,NA,5,6,2,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9239.758777,10363.400393,2,91,14,14,3.47,4,4,1,1,0,2,40,2,5,1,NA +70466,7,2,2,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,6,1,1,2,2,1,2,2,NA,NA,NA,NA,43813.24867,46530.283453,1,98,1,1,0.16,3,3,1,0,0,1,28,1,2,6,NA +70467,7,2,1,12,NA,2,2,2,12,146,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21399.234084,21768.65302,1,91,15,15,5,4,4,0,2,0,1,49,1,5,1,5 +70468,7,2,2,0,8,5,7,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5820.770257,5884.32621,1,95,2,2,0.47,3,3,2,0,0,2,24,1,4,5,NA +70469,7,2,2,0,4,2,2,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4807.611315,4659.866106,1,103,4,4,0.54,7,7,2,2,0,2,35,2,1,6,NA +70470,7,2,2,43,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,15552.65085,16188.423507,2,92,15,15,5,3,3,0,1,0,1,45,2,4,1,4 +70471,7,2,2,16,NA,5,7,2,16,198,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11711.384457,12440.215685,2,95,6,6,1.31,3,3,0,1,0,1,39,1,3,1,4 +70472,7,2,1,12,NA,3,3,2,13,157,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20152.417898,19887.568869,2,94,7,7,1.18,7,7,1,4,0,2,31,1,4,6,NA +70473,7,2,2,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,126117.5094,130777.186989,2,98,10,10,3.78,3,3,0,0,0,1,53,1,3,1,4 +70474,7,2,1,44,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,18116.816149,22904.074887,2,90,8,8,3.4,2,2,0,0,0,1,44,1,3,6,NA +70475,7,2,1,16,NA,5,6,2,16,200,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,NA,NA,NA,NA,NA,NA,NA,6269.080236,6655.935643,1,93,NA,NA,NA,3,3,0,1,0,1,53,NA,NA,1,NA +70476,7,2,1,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,16905.961576,20936.167849,2,94,7,7,1.62,5,5,0,3,0,1,30,1,2,1,9 +70477,7,2,1,9,NA,2,2,2,9,117,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15353.368573,15360.170072,1,97,7,7,1.74,4,4,0,3,0,2,32,1,4,5,NA +70478,7,2,2,34,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,1,3,1,2,2,1,2,2,NA,NA,NA,NA,17430.913214,17550.603029,1,90,15,15,5,2,2,0,0,0,2,34,2,5,1,5 +70479,7,2,2,25,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,17397.027021,18573.153951,3,91,2,2,0.73,1,1,0,0,0,2,25,2,4,5,NA +70480,7,2,2,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,136832.42119,141624.96623,2,91,15,15,5,2,2,0,0,1,2,60,1,5,1,5 +70481,7,2,1,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,32946.178621,33458.203523,2,96,3,3,0.53,5,5,3,0,0,2,26,1,4,1,4 +70482,7,2,1,17,NA,5,7,2,17,207,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11712.675291,11995.185902,3,91,15,15,4.47,4,4,0,1,2,2,79,1,4,3,NA +70483,7,2,1,20,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,15740.336751,16168.874324,2,91,10,10,2.59,5,5,0,1,0,1,48,NA,NA,6,NA +70484,7,2,2,19,NA,1,1,2,19,236,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,19557.287652,19933.266032,2,94,5,5,1.08,3,3,0,1,0,2,37,2,2,4,NA +70485,7,2,1,34,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,14790.183811,15030.369455,3,90,14,14,4.71,3,3,0,0,2,1,64,NA,NA,1,NA +70486,7,2,1,10,NA,2,2,2,10,130,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13285.093011,13365.60735,2,94,2,2,0.41,2,2,0,1,1,2,66,2,3,5,NA +70487,7,2,2,53,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,12935.870815,12581.846775,2,99,5,5,0.65,6,6,2,1,0,2,53,1,4,3,NA +70488,7,2,2,6,NA,4,4,2,6,83,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,1,2,NA,NA,NA,NA,7814.742747,7984.793423,2,95,2,2,0.26,4,4,0,2,0,2,44,NA,NA,5,NA +70489,7,2,1,40,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,5,1,NA,2,2,2,1,2,2,2,2,1,2,36225.654087,36329.951191,2,93,7,7,1.83,3,3,1,0,0,1,40,2,5,1,4 +70490,7,1,1,43,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,140932.152825,0,1,97,15,15,5,5,5,2,0,1,1,43,1,5,1,5 +70491,7,2,2,5,NA,4,4,2,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8833.042831,8971.325329,1,99,4,4,0.53,7,7,3,1,0,2,26,1,1,5,NA +70492,7,2,2,0,0,4,4,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3516.231705,3777.543312,2,99,2,2,0.19,7,7,3,1,0,2,43,1,2,4,NA +70493,7,2,1,52,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,19541.667675,20187.705923,2,95,2,2,0.67,1,1,0,0,0,1,52,1,2,5,NA +70494,7,2,1,25,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,39915.513053,44831.646235,2,98,7,7,1.97,4,4,0,1,0,1,40,1,3,1,3 +70495,7,2,1,68,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,26026.192556,26360.023593,1,94,3,3,0.86,2,2,0,0,2,1,68,1,4,1,2 +70496,7,1,2,30,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,5,1,3,1,2,2,1,2,2,NA,NA,NA,NA,39483.840194,0,2,102,14,14,4.86,3,3,1,0,0,1,30,1,5,1,5 +70497,7,2,2,3,NA,4,4,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,10437.988787,11664.396755,2,100,NA,NA,NA,4,4,1,0,0,2,38,NA,NA,77,NA +70498,7,2,2,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,98514.948291,98181.677236,1,99,10,10,4.89,2,2,0,0,2,2,63,1,5,5,NA +70499,7,2,2,47,NA,1,1,2,NA,NA,2,NA,2,2,6,NA,1,1,NA,2,2,2,2,2,2,1,2,2,2,23968.560941,24093.554947,2,90,6,6,1.15,5,5,0,2,0,2,47,2,1,1,5 +70500,7,2,1,15,NA,1,1,1,15,183,NA,NA,1,1,NA,9,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,24902.864049,25283.513039,1,94,8,8,1.85,5,5,0,2,0,1,44,2,1,6,NA +70501,7,2,2,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105873.555835,106383.882408,1,98,15,15,5,5,5,0,1,0,1,53,1,5,1,5 +70502,7,2,1,71,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,NA,48254.793439,51076.634961,2,100,14,10,5,2,1,0,0,2,1,71,1,2,6,NA +70503,7,2,2,18,NA,4,4,1,18,224,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,99,1,0.09,4,1,0,0,0,2,18,1,4,NA,NA +70504,7,2,1,59,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,19671.580166,20292.088214,2,99,7,7,2.31,2,2,0,0,1,1,62,1,3,5,NA +70505,7,2,2,40,NA,2,2,2,NA,NA,2,NA,2,2,2,NA,3,1,2,2,2,2,2,2,2,1,2,2,2,23968.560941,24556.443674,2,90,99,99,NA,5,5,1,1,0,2,40,2,3,1,1 +70506,7,2,2,9,NA,4,4,1,9,116,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9153.600624,9398.485171,1,100,8,8,1.61,6,6,1,3,0,1,29,1,5,6,NA +70507,7,2,2,80,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,3,5,NA,1,2,2,1,2,2,1,2,2,NA,19066.820866,21138.377975,2,99,6,6,2.69,1,1,0,0,1,2,80,2,3,5,NA +70508,7,1,1,0,3,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6246.568228,0,2,95,9,9,1.81,6,6,1,1,0,2,56,1,4,3,NA +70509,7,2,1,8,NA,2,2,1,8,98,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13395.612951,13472.514863,2,102,15,12,NA,5,4,0,3,0,1,42,2,4,6,NA +70510,7,2,1,6,NA,5,6,2,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6905.911299,7686.337387,2,90,4,4,0.81,3,3,0,1,0,2,41,2,2,6,NA +70511,7,2,1,36,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17643.563124,18510.507534,3,91,15,15,5,4,4,2,0,0,1,36,2,5,1,5 +70512,7,2,2,25,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,2,1,2,2,1,2,2,1,2,2,3,16929.836231,20416.10433,2,101,5,3,0.92,2,1,0,0,0,2,20,NA,NA,5,NA +70513,7,1,1,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,15682.233511,0,1,101,6,6,1.78,2,2,0,0,2,1,80,1,2,1,1 +70514,7,2,2,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,52160.006964,54246.901852,1,100,4,1,0,3,1,0,0,0,1,22,1,4,6,NA +70515,7,1,1,5,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7843.21745,0,1,96,12,12,NA,5,5,2,0,1,2,63,2,5,3,NA +70516,7,2,1,11,NA,4,4,2,11,135,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12176.538896,12609.8265,2,97,3,3,0.46,5,5,0,3,0,1,40,1,2,1,3 +70517,7,2,1,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19440.793325,20264.695737,2,95,14,14,3.47,4,4,0,0,0,2,45,1,4,1,4 +70518,7,2,2,2,NA,4,4,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,7032.269464,7566.800387,2,99,3,3,0.44,5,5,1,1,0,2,53,1,4,1,3 +70519,7,2,1,5,NA,5,6,1,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8981.553859,10073.795326,3,91,15,15,5,4,4,2,0,0,1,36,2,5,1,5 +70520,7,2,1,67,NA,1,1,1,NA,NA,1,1,2,1,8,NA,3,1,NA,1,2,2,1,2,2,2,2,2,2,11992.012141,12562.386395,2,102,5,5,1.36,2,2,0,0,2,2,66,2,2,1,3 +70521,7,2,1,53,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19084.761772,19687.829911,1,91,15,15,5,4,4,0,2,0,2,52,1,5,1,5 +70522,7,2,2,21,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,41953.42893,43056.968872,2,99,4,1,0.08,4,1,0,0,0,2,21,NA,NA,5,NA +70523,7,2,1,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,86986.68246,93900.677523,2,94,14,14,3.36,4,4,2,0,0,1,31,1,3,1,5 +70524,7,2,1,0,6,3,3,2,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17113.022485,16817.175522,1,91,15,15,5,3,3,1,0,0,1,41,1,5,1,5 +70525,7,2,1,63,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,7410.50521,7700.344649,2,96,2,2,0.71,1,1,0,0,1,1,63,1,3,3,NA +70526,7,2,2,10,NA,5,7,1,10,131,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8063.227462,8462.212273,3,91,14,14,3.8,4,4,0,2,0,1,47,1,5,1,5 +70527,7,2,1,63,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,2,1,NA,2,2,1,2,2,1,1,2,2,2,9235.951997,9462.879133,2,103,7,7,1.89,3,3,0,0,1,1,63,2,2,1,9 +70528,7,2,1,15,NA,4,4,1,16,192,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19163.050164,19302.61536,2,102,8,8,3.1,2,2,0,1,0,1,40,1,4,3,NA +70529,7,2,1,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,36244.629058,37702.525304,1,98,3,3,0.73,2,2,0,0,0,1,49,1,2,1,2 +70530,7,2,2,1,17,4,4,1,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6765.566493,7560.484444,2,103,6,6,1.3,4,4,1,1,0,2,26,1,4,1,3 +70531,7,2,1,49,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,30152.053647,30985.296608,2,101,10,10,3.89,3,3,0,1,0,2,49,1,4,1,3 +70532,7,2,2,9,NA,1,1,1,9,113,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20495.125801,21365.773499,3,92,8,8,1.55,6,6,1,3,0,2,38,1,5,1,4 +70533,7,2,2,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,196995.351093,196284.082305,1,91,7,7,3.31,1,1,0,0,0,2,51,1,5,3,NA +70534,7,2,2,48,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,2,6,NA,2,2,2,NA,NA,NA,2,2,2,2,46417.079566,59177.283457,1,97,NA,NA,NA,2,1,0,0,0,1,30,NA,NA,6,NA +70535,7,2,1,40,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,28813.038041,29206.765831,1,94,5,5,0.87,4,4,0,1,0,1,40,1,5,1,5 +70536,7,2,1,30,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,18838.303827,25198.281368,2,93,6,6,1.95,2,2,0,1,0,1,30,1,4,5,NA +70537,7,2,1,26,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,37326.925742,39362.952436,2,91,6,77,NA,4,1,0,0,0,1,25,NA,NA,5,NA +70538,7,2,2,11,NA,1,1,1,11,134,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,19059.339877,19554.022514,1,95,4,4,0.68,5,5,0,1,0,2,38,2,3,4,NA +70539,7,1,1,22,NA,5,6,NA,NA,NA,2,NA,2,1,6,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,13205.475858,0,1,102,1,1,0,2,1,0,0,0,1,18,NA,NA,NA,NA +70540,7,2,1,18,NA,3,3,1,18,227,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,105662.708061,105798.100528,1,94,12,12,NA,2,2,0,0,0,1,18,1,3,NA,NA +70541,7,2,2,13,NA,2,2,1,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19836.724169,20587.964512,1,98,4,4,0.75,4,4,0,1,0,2,48,1,2,1,3 +70542,7,2,2,14,NA,4,4,2,14,177,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11956.705842,11923.015236,1,96,15,15,5,5,5,0,3,0,2,47,1,5,1,5 +70543,7,2,2,78,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,1,2,NA,11684.182359,12558.119387,2,98,6,6,1.35,3,3,0,0,2,1,79,NA,NA,1,2 +70544,7,2,1,77,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,7608.031426,7606.53553,1,99,6,6,1.34,4,4,0,1,1,1,77,1,3,1,5 +70545,7,2,1,31,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,32856.012738,39257.112915,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +70546,7,2,2,0,8,4,4,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4418.651245,4866.131244,2,95,3,3,0.43,4,4,2,0,0,2,23,1,2,5,NA +70547,7,2,2,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,23204.787354,24686.898455,1,98,3,3,0.43,4,4,0,1,0,2,39,1,2,5,NA +70548,7,2,1,34,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,3,4,NA,2,2,2,1,2,2,2,2,2,2,54095.173154,53686.899782,1,92,7,7,1.48,5,5,0,1,0,1,42,1,5,1,4 +70549,7,2,2,4,NA,1,1,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,12468.876716,13254.955592,2,97,4,4,0.6,6,6,2,2,0,1,35,2,2,6,NA +70550,7,2,1,76,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,14073.595908,14398.283636,1,92,14,14,5,3,3,0,0,2,1,76,1,4,1,4 +70551,7,2,1,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,13209.159597,13903.918018,2,95,3,3,0.52,3,3,1,0,0,1,37,1,4,1,4 +70552,7,2,1,3,NA,4,4,2,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9969.040341,10992.497123,2,97,4,4,0.84,3,3,1,0,1,2,68,1,4,2,NA +70553,7,1,1,2,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6347.215153,0,2,95,1,1,0.09,7,7,2,4,1,2,60,1,3,5,NA +70554,7,2,2,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,71034.153987,74518.988514,1,98,15,15,5,4,4,1,1,0,1,40,1,4,1,5 +70555,7,2,2,22,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,NA,NA,NA,NA,16239.242782,17706.77205,2,101,12,12,NA,4,4,0,0,0,1,57,NA,NA,1,NA +70556,7,2,1,2,NA,4,4,2,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5125.369941,5513.229844,1,96,77,77,NA,7,7,1,3,0,1,56,1,3,1,4 +70557,7,2,1,1,22,4,4,2,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5547.430651,5776.651218,1,96,8,8,1.61,6,6,3,0,0,1,33,2,5,1,4 +70558,7,2,1,14,NA,4,4,2,14,172,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11125.932433,11147.929312,1,96,6,6,1.21,4,4,0,2,0,2,41,1,4,4,NA +70559,7,2,2,1,22,4,4,1,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8221.160724,8586.221973,1,100,1,1,0,4,4,1,2,0,2,35,1,2,5,NA +70560,7,1,1,3,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7843.21745,0,1,96,12,12,NA,5,5,2,0,1,2,63,2,5,3,NA +70561,7,2,1,42,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,18533.049642,19320.837782,1,99,14,14,4.21,4,4,0,2,0,2,44,1,5,1,5 +70562,7,2,1,52,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,1,2,2,1,20958.247774,21169.083688,2,103,3,1,0.36,2,1,0,0,1,1,52,1,2,6,NA +70563,7,2,1,0,6,5,7,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6395.296043,7073.813977,2,95,4,4,0.79,3,3,1,0,0,1,50,1,4,6,NA +70564,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,8796.577101,9772.20689,2,95,3,3,1.03,1,1,0,0,1,1,80,1,3,2,NA +70565,7,2,2,76,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,18693.365067,19341.691824,1,92,7,7,3.58,1,1,0,0,1,2,76,1,4,3,NA +70566,7,2,2,31,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,28033.589927,29297.386717,2,95,2,2,0.26,3,3,0,2,0,2,31,1,3,5,NA +70567,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,28280.669788,30348.532296,1,99,13,13,NA,2,2,0,0,2,1,80,1,2,1,3 +70568,7,2,1,67,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,8561.661692,8628.633702,2,99,99,99,NA,3,3,0,1,1,1,67,1,4,2,NA +70569,7,2,1,0,0,3,3,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22570.662508,23265.895325,1,95,12,12,NA,6,6,2,0,0,2,42,1,2,1,5 +70570,7,2,1,62,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,8621.533452,8958.738589,2,99,7,7,2.31,2,2,0,0,1,1,62,1,3,5,NA +70571,7,2,1,0,11,4,4,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5707.173345,6119.621796,1,90,2,2,0.22,5,5,1,1,2,1,44,2,4,5,NA +70572,7,2,2,2,NA,2,2,2,2,25,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10806.348349,11487.615985,2,91,99,99,NA,6,6,1,3,0,2,20,2,2,5,NA +70573,7,2,1,24,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,59473.789098,60284.462735,3,92,7,7,2.78,2,2,0,0,0,1,24,1,3,5,NA +70574,7,2,2,36,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,42468.064168,44382.588967,2,101,1,1,0.27,3,3,0,2,0,2,36,1,3,5,NA +70575,7,2,2,69,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,149804.257477,155051.140087,1,97,9,9,4.92,1,1,0,0,1,2,69,1,4,3,NA +70576,7,1,1,31,NA,5,6,NA,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,19091.246741,0,1,91,9,9,2.97,3,3,1,0,0,1,31,2,5,1,5 +70577,7,2,2,80,NA,5,6,1,NA,NA,2,NA,2,1,8,NA,4,77,NA,1,2,1,1,2,1,1,2,1,NA,14314.616082,14811.078253,2,92,77,77,NA,1,1,0,0,1,2,80,2,4,77,NA +70578,7,2,1,16,NA,4,4,2,16,194,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12721.673656,12746.825446,2,97,7,7,2.65,2,2,0,1,1,2,61,1,4,3,NA +70579,7,2,2,4,NA,3,3,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,85219.527381,94056.007225,1,97,15,15,5,4,4,1,1,0,2,33,1,5,1,3 +70580,7,2,1,58,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,19581.573846,19613.097963,1,90,6,6,2.12,2,2,0,0,0,2,55,1,3,1,3 +70581,7,2,2,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,128226.88751,129797.060324,1,95,7,7,2.86,2,2,0,0,1,1,59,1,2,1,2 +70582,7,2,2,36,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,4,6,2,2,2,2,2,2,2,2,2,2,2,35464.8385,35311.584148,2,96,5,5,0.89,4,4,1,1,0,2,36,2,4,6,NA +70583,7,2,1,14,NA,2,2,1,14,170,NA,NA,2,2,4,8,NA,NA,NA,2,1,2,2,2,2,2,2,2,2,22721.243258,22896.459408,2,102,8,8,1.09,7,7,1,3,0,2,33,2,1,6,NA +70584,7,2,2,36,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,20039.469886,21581.359058,1,97,15,15,5,4,4,2,0,0,1,40,2,5,1,5 +70585,7,2,1,5,NA,5,7,2,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6138.820061,6435.022409,1,99,14,14,3.94,4,4,1,1,0,1,43,1,4,1,5 +70586,7,2,2,51,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,24905.670199,25753.835465,2,101,10,10,2.91,4,4,0,1,0,2,51,1,2,5,NA +70587,7,2,2,50,NA,2,2,2,NA,NA,2,NA,2,2,2,NA,2,5,NA,2,2,2,2,2,2,NA,NA,NA,NA,17054.056149,17142.991601,2,90,6,6,0.66,7,7,2,2,0,2,24,2,4,6,NA +70588,7,2,1,0,8,4,4,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5836.706781,6455.960401,2,93,12,12,NA,4,4,1,1,0,2,27,2,4,1,4 +70589,7,2,2,10,NA,4,4,2,10,122,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7889.400564,8100.464206,2,99,99,99,NA,3,3,0,1,1,1,67,1,4,2,NA +70590,7,2,2,51,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,27551.607232,31605.843936,1,90,12,9,5,2,1,0,0,0,2,51,1,3,3,NA +70591,7,1,2,65,NA,2,2,NA,NA,NA,2,NA,2,1,8,NA,1,3,NA,2,2,2,2,2,2,NA,NA,NA,NA,9716.805546,0,2,90,4,4,1.65,1,1,0,0,1,2,65,2,1,3,NA +70592,7,2,1,2,NA,4,4,1,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7561.636249,7792.598574,2,98,2,2,0.31,4,4,2,1,0,2,27,1,2,4,NA +70593,7,2,1,80,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,7608.031426,7725.81565,1,99,5,5,1.84,1,1,0,0,1,1,80,1,3,2,NA +70594,7,2,2,3,NA,4,4,1,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11729.417673,12620.984256,2,96,7,7,1.49,5,5,2,1,0,1,51,1,5,1,3 +70595,7,2,2,60,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,13057.178942,13648.591881,1,102,77,77,NA,6,6,0,2,1,2,37,1,4,1,4 +70596,7,2,2,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,85420.170155,89610.762168,1,91,15,15,5,3,3,1,0,0,1,36,1,4,1,5 +70597,7,2,2,49,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,35126.205635,35814.074718,1,95,6,6,0.96,5,5,1,0,1,2,69,1,1,2,NA +70598,7,2,2,28,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,12426.473257,13266.565637,2,92,15,14,5,2,1,0,0,0,1,27,1,5,6,NA +70599,7,2,1,54,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,36134.528328,36424.033955,2,101,6,6,2.04,2,2,0,0,0,2,51,1,5,1,4 +70600,7,2,2,80,NA,2,2,1,NA,NA,2,NA,2,2,7,NA,1,2,NA,2,2,2,1,2,2,2,2,2,NA,18142.687884,20262.262283,2,93,5,5,1.07,3,3,0,0,1,2,44,2,1,1,1 +70601,7,2,1,0,2,1,1,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,1,1,2,1,NA,NA,NA,NA,9064.168162,9064.335231,3,92,3,3,0.48,4,4,2,0,0,1,21,2,1,1,2 +70602,7,2,2,10,NA,3,3,2,10,128,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,60197.256541,63931.531988,2,91,15,15,5,4,4,0,2,0,1,53,1,5,1,4 +70603,7,2,1,55,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,127000.852889,130891.431194,2,92,15,15,5,2,1,0,0,0,1,57,1,5,6,NA +70604,7,2,1,39,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,13855.593765,14267.864487,2,92,15,15,5,3,3,1,0,0,1,39,2,5,1,5 +70605,7,2,1,56,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,20020.224623,20868.683381,1,96,15,15,5,4,4,0,1,0,1,56,1,4,1,5 +70606,7,2,1,80,NA,2,2,1,NA,NA,2,NA,2,1,9,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,16325.758272,16610.353254,2,93,8,8,2.17,4,4,0,0,3,1,80,2,2,1,2 +70607,7,2,2,45,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19870.689174,19814.851418,1,96,15,15,5,4,4,0,2,0,1,46,1,5,1,5 +70608,7,1,2,68,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,54217.266426,0,1,101,4,4,0.99,2,2,0,0,2,1,77,1,3,1,3 +70609,7,2,2,1,23,1,1,1,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8832.868731,9513.039058,1,102,13,13,NA,7,7,3,1,2,2,62,2,1,1,2 +70610,7,2,2,1,21,1,1,1,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11414.885224,11453.86318,1,94,4,4,0.5,7,7,2,3,0,2,32,1,4,6,NA +70611,7,2,2,12,NA,3,3,2,12,150,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,101168.631125,108085.531312,1,99,6,6,1.52,4,4,0,2,0,2,43,1,3,5,NA +70612,7,2,2,5,NA,2,2,1,5,60,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13647.772496,14082.018801,2,93,7,7,1.99,3,3,1,1,0,2,38,1,3,4,NA +70613,7,2,2,54,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,1,1,NA,1,2,1,1,2,2,1,2,1,3,12649.084278,13204.362319,3,90,8,3,1.25,3,1,0,0,1,2,68,2,2,4,NA +70614,7,2,2,20,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,1,1,2,2,1,2,2,1,2,2,1,22225.098465,28674.927734,2,97,2,2,0.34,2,2,1,0,0,2,20,1,3,5,NA +70615,7,2,1,8,NA,1,1,2,8,106,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,10591.186197,11123.758959,1,90,6,6,1.11,5,5,1,2,0,1,30,2,1,6,NA +70616,7,2,1,17,NA,5,6,2,17,214,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8869.170018,9434.816848,1,90,99,99,NA,3,3,0,1,1,1,60,1,5,1,5 +70617,7,2,2,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,21857.756498,23340.233721,2,97,5,5,1.08,3,3,0,1,0,2,45,1,4,6,NA +70618,7,2,1,13,NA,1,1,1,13,161,NA,NA,2,2,3,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,22768.423624,23297.239555,2,98,5,5,1.07,4,4,0,1,0,1,53,2,1,1,1 +70619,7,2,2,4,NA,1,1,1,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,16462.187772,17163.602129,3,92,12,12,NA,6,6,1,3,0,2,33,1,5,1,4 +70620,7,2,1,9,NA,1,1,2,9,113,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12477.812875,12364.493687,2,94,14,14,4.03,4,4,0,2,0,2,33,2,2,1,NA +70621,7,2,1,10,NA,5,7,1,10,122,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9375.648038,9390.261055,1,102,6,6,2.11,2,2,0,1,0,2,32,1,4,99,NA +70622,7,2,1,50,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,32274.162059,31800.588482,1,102,14,14,4.59,3,3,0,0,1,2,46,1,4,1,1 +70623,7,2,1,54,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,32720.69734,33691.382998,1,101,5,5,1.24,3,3,0,0,1,2,61,1,4,1,3 +70624,7,2,1,42,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,44926.921381,45636.571752,2,96,10,10,3.78,3,3,0,1,0,1,42,1,3,1,3 +70625,7,2,2,0,7,1,1,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5289.715679,5503.176989,1,90,3,3,0.23,7,7,3,1,1,2,35,2,2,5,NA +70626,7,2,1,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,13076.210481,13450.719873,2,97,5,5,0.84,5,5,0,2,0,2,33,1,4,1,3 +70627,7,2,2,52,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,15497.844354,15606.405341,1,99,14,14,4.05,3,3,0,1,0,2,52,1,4,4,NA +70628,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,47566.45715,54102.989916,1,90,7,7,3.22,1,1,0,0,1,2,80,1,4,2,NA +70629,7,2,2,28,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,16131.725974,16820.579069,1,99,14,7,3.4,2,1,0,0,0,2,28,2,5,1,NA +70630,7,2,1,11,NA,5,7,2,11,142,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,28060.491868,29807.930767,1,91,4,4,0.76,4,4,0,2,0,2,44,1,4,6,NA +70631,7,1,1,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,46965.818538,0,1,101,10,10,4.3,2,2,0,0,2,1,80,1,2,1,4 +70632,7,2,2,0,0,3,3,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22123.758463,21522.828051,1,95,7,7,1.57,4,4,1,1,0,1,37,1,3,1,5 +70633,7,2,1,56,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,123771.419917,127885.04228,2,98,15,15,5,3,3,0,1,0,1,56,1,5,1,5 +70634,7,2,2,9,NA,1,1,1,9,119,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15962.145468,16664.698857,2,98,7,7,1.33,6,6,0,3,0,1,31,1,3,6,NA +70635,7,2,2,68,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,129999.035519,131590.908764,1,98,12,12,NA,2,2,0,0,2,1,70,1,2,1,3 +70636,7,2,1,12,NA,3,3,2,12,147,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,93665.036597,95017.313859,1,91,10,10,2.77,5,5,0,3,0,1,43,1,5,1,5 +70637,7,2,1,8,NA,4,4,2,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8483.005475,8584.70346,1,96,7,7,1.52,4,4,0,2,0,2,30,2,4,1,5 +70638,7,2,1,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,32720.69734,34178.98356,1,95,5,5,1.08,3,3,0,1,0,1,53,1,4,1,4 +70639,7,2,2,34,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,6,2,2,2,2,1,2,2,1,2,2,2,40536.844796,42064.124353,3,91,7,7,1.42,6,6,1,3,0,1,37,2,1,1,1 +70640,7,2,1,6,NA,5,7,2,7,84,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10810.913614,11522.32071,1,97,15,15,4.77,4,4,1,1,0,1,41,2,4,1,5 +70641,7,2,1,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,1,2,2,1,2,2,1,6910.118936,7233.371983,2,95,2,2,0.75,1,1,0,0,1,1,63,1,1,5,NA +70642,7,2,1,12,NA,3,3,2,12,146,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,30943.024697,30709.935341,1,95,6,6,0.81,6,6,2,2,0,1,30,1,3,1,4 +70643,7,2,1,7,NA,3,3,1,7,91,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,39769.597728,42234.385729,2,92,15,15,5,5,5,0,3,0,2,46,1,5,1,5 +70644,7,2,1,41,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,126789.52929,130826.463813,1,101,9,9,2.6,4,4,0,2,0,2,38,1,4,1,4 +70645,7,2,1,6,NA,4,4,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11543.27965,11681.665542,1,98,1,1,0.03,3,3,0,2,0,2,38,1,4,5,NA +70646,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,54095.581484,61529.339683,2,94,14,14,5,1,1,0,0,1,2,80,1,4,2,NA +70647,7,2,2,38,NA,5,7,1,NA,NA,1,2,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,58826.425292,59007.297053,1,102,8,8,1.91,5,5,1,2,0,2,38,1,5,1,4 +70648,7,2,1,9,NA,1,1,1,9,113,NA,NA,1,1,NA,2,NA,NA,NA,2,NA,2,1,2,2,1,2,2,1,13395.612951,13714.015605,2,102,4,4,0.61,5,5,0,3,0,1,34,2,3,1,3 +70649,7,2,1,63,NA,2,2,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,9745.303498,9901.410815,2,98,15,15,4.56,4,4,0,0,3,1,80,1,1,1,NA +70650,7,2,1,2,NA,3,3,2,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,43818.485,49437.664471,2,91,15,15,4.2,6,6,2,0,2,1,63,1,1,1,3 +70651,7,2,2,3,NA,5,7,1,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10292.958129,10862.428258,3,91,10,10,2.48,5,5,2,1,0,2,27,1,2,1,4 +70652,7,2,2,16,NA,3,3,2,16,199,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,NA,NA,NA,1,2,2,1,31716.869763,32155.152073,1,95,NA,NA,NA,2,2,0,1,0,2,51,NA,NA,3,NA +70653,7,2,1,43,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,137286.989143,140204.788291,3,91,14,14,5,1,1,0,0,0,1,43,1,5,5,NA +70654,7,2,2,28,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,6,2,1,2,2,1,2,2,1,2,2,1,52280.406546,52747.829022,1,95,6,4,1.47,2,1,0,0,0,2,28,1,2,6,NA +70655,7,2,2,17,NA,3,3,2,17,206,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,103298.858809,106172.653059,1,95,9,9,2.66,4,4,0,2,0,1,45,1,3,1,3 +70656,7,2,2,2,NA,1,1,1,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12260.86913,13033.834526,3,92,8,8,1.85,5,5,1,0,2,1,66,2,1,1,1 +70657,7,2,1,0,8,3,3,2,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,22595.741117,22205.109866,1,93,15,15,5,3,2,1,0,0,2,35,1,5,6,NA +70658,7,2,1,16,NA,1,1,1,16,200,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,27186.265479,27776.842846,1,92,14,9,3.77,3,2,0,1,0,2,39,2,4,6,NA +70659,7,2,2,0,5,1,1,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,6787.112205,7238.162123,2,94,99,99,NA,3,3,1,0,0,2,18,2,2,NA,NA +70660,7,2,2,19,NA,1,1,1,19,230,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12996.753859,13369.730018,2,103,10,10,1.63,7,7,1,4,0,1,31,NA,NA,1,4 +70661,7,2,2,65,NA,2,2,1,NA,NA,2,NA,2,1,2,NA,4,1,NA,2,2,2,1,2,2,2,2,2,2,15876.871857,16539.500436,2,102,15,15,5,3,3,0,0,2,1,36,2,5,5,NA +70662,7,2,1,11,NA,3,3,2,11,134,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,42986.51011,47590.55439,2,94,7,7,1.17,6,6,0,3,0,1,40,1,3,1,5 +70663,7,2,2,29,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,3,3,1,2,2,1,2,2,1,2,2,1,23740.353448,26106.078779,2,99,6,6,2.6,1,1,0,0,0,2,29,1,5,3,NA +70664,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,39561.827371,40611.330327,1,92,4,4,1.16,2,2,0,0,1,1,56,1,2,1,3 +70665,7,2,2,13,NA,4,4,2,13,164,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14241.099758,14200.972378,1,96,15,15,5,3,3,0,1,0,2,37,1,4,1,4 +70666,7,2,2,3,NA,1,1,1,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,16462.187772,17500.018071,3,92,3,3,0.52,5,5,2,1,0,2,29,2,1,1,3 +70667,7,2,1,5,NA,3,3,1,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,73403.03993,82816.073154,1,92,8,8,1.45,6,6,1,3,0,1,36,1,3,1,4 +70668,7,2,2,41,NA,2,2,1,NA,NA,2,NA,2,1,5,NA,1,1,2,2,2,2,2,2,2,NA,NA,NA,NA,37512.060155,37707.682357,1,92,6,6,0.93,5,5,0,2,0,1,47,2,1,1,1 +70669,7,2,2,3,NA,1,1,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13366.393396,14756.857003,2,94,8,8,2.33,4,4,2,0,0,1,24,1,2,6,NA +70670,7,2,2,56,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,20130.149569,20235.126587,1,90,10,10,2.44,5,5,1,0,0,2,56,2,1,1,1 +70671,7,2,2,5,NA,4,4,2,5,71,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10683.855206,11939.151167,1,96,15,15,5,4,4,1,1,0,1,50,1,3,1,4 +70672,7,2,2,1,19,2,2,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9534.013652,9566.569071,2,97,12,6,0.89,7,7,3,0,0,2,26,2,1,6,NA +70673,7,2,2,14,NA,5,7,2,14,173,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13124.737024,13189.842246,2,97,5,5,1.19,3,3,0,1,0,2,41,1,3,1,2 +70674,7,2,1,67,NA,3,3,2,NA,NA,2,NA,2,2,6,NA,5,2,NA,1,2,2,1,2,1,1,2,2,NA,21313.15842,21908.589414,2,90,2,2,0.87,1,1,0,0,1,1,67,2,5,2,NA +70675,7,2,1,65,NA,5,6,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,12579.986433,13271.133625,1,92,14,14,5,2,2,0,0,2,2,62,1,4,1,4 +70676,7,2,2,75,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,44856.466004,46390.551109,2,94,10,10,4.69,2,2,0,0,1,2,59,1,3,3,NA +70677,7,2,1,29,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,11977.649578,12533.694613,2,92,15,14,5,3,1,0,0,0,1,29,1,5,5,NA +70678,7,2,1,21,NA,2,2,1,NA,NA,2,NA,2,1,5,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,37235.048974,39044.599947,2,93,6,6,1.48,4,4,0,1,0,1,53,2,2,1,3 +70679,7,2,1,4,NA,3,3,2,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,97310.423154,111231.170223,1,95,8,8,1.28,7,7,1,4,0,1,32,1,3,1,3 +70680,7,2,2,2,NA,4,4,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,6543.852411,7133.732612,1,102,12,12,NA,7,7,3,2,0,2,52,1,4,5,NA +70681,7,2,1,61,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,125558.167126,127168.668278,3,91,15,15,5,2,2,0,0,2,2,62,1,5,1,5 +70682,7,2,2,22,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,24654.107413,23441.410215,1,100,10,10,2.59,5,5,0,1,0,2,40,1,5,1,NA +70683,7,2,2,0,1,1,1,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9767.083234,9561.595244,3,92,8,8,2.51,3,3,1,0,0,2,25,1,4,5,NA +70684,7,2,1,58,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,13189.987508,14124.702096,1,90,77,77,NA,4,4,0,2,0,2,51,1,5,1,5 +70685,7,2,2,5,NA,2,2,2,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13474.744062,14324.236113,1,93,5,5,0.84,5,5,1,2,0,2,52,2,1,3,NA +70686,7,2,2,48,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,29010.447112,28477.291441,1,100,15,15,5,2,2,0,0,1,2,48,1,5,5,NA +70687,7,2,1,59,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,169076.731628,169683.640829,1,98,14,14,5,1,1,0,0,0,1,59,1,5,2,NA +70688,7,2,2,13,NA,4,4,1,13,163,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16598.888685,16325.283055,2,102,4,4,0.53,6,6,2,2,0,2,27,1,2,1,2 +70689,7,2,1,56,NA,5,6,2,NA,NA,2,NA,2,2,99,NA,5,5,NA,1,2,1,1,2,1,1,2,1,3,11690.444016,11682.423918,3,90,77,77,NA,4,4,0,0,2,1,69,2,5,2,NA +70690,7,2,2,13,NA,5,6,1,13,160,NA,NA,2,1,3,6,NA,NA,NA,1,1,2,1,2,1,1,2,2,NA,8878.081187,9740.503961,3,92,77,77,NA,7,7,2,4,1,1,62,NA,NA,1,NA +70691,7,2,2,14,NA,4,4,1,14,172,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15114.677795,15072.08892,2,102,6,6,1.22,5,5,0,2,0,2,42,1,4,1,4 +70692,7,2,1,3,NA,4,4,2,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9304.437652,10259.663981,2,97,4,4,0.81,4,4,1,0,0,2,51,1,2,4,NA +70693,7,2,1,5,NA,3,3,2,5,71,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,31190.854587,36547.404966,1,95,3,3,0.65,3,3,1,0,0,1,58,1,3,3,NA +70694,7,2,1,8,NA,5,6,2,8,106,NA,NA,2,2,2,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,7923.925927,8546.646915,1,91,10,10,3.22,4,4,1,1,0,1,38,2,5,1,5 +70695,7,2,2,39,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,4,2,2,1,2,2,1,2,2,NA,NA,NA,NA,20039.469886,21176.851739,1,97,8,8,2.51,3,3,0,2,0,2,39,2,4,2,NA +70696,7,2,2,47,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,3,1,NA,2,2,2,2,2,2,2,2,1,2,29105.053716,31982.273564,2,93,4,4,0.82,4,4,0,0,0,1,51,2,3,1,3 +70697,7,2,1,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,82917.179795,86800.065133,2,95,7,7,2.91,2,2,1,0,0,1,32,1,5,2,NA +70698,7,2,1,1,16,5,6,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8674.760516,9729.693026,3,91,10,10,3.67,3,3,1,0,0,2,30,2,4,1,4 +70699,7,2,1,0,7,2,2,1,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,4724.065742,4677.378988,2,93,77,77,NA,7,7,3,0,1,2,40,2,4,1,NA +70700,7,2,1,0,2,2,2,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,4854.394074,4994.155976,2,93,3,3,0.37,5,5,3,0,0,1,28,2,1,6,NA +70701,7,2,1,24,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14782.636003,15547.771991,3,91,10,9,5,2,1,0,0,0,1,24,1,4,5,NA +70702,7,2,1,26,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,27681.749998,28652.049948,2,98,8,8,1.48,7,7,3,0,0,1,26,1,3,1,3 +70703,7,2,2,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,160743.928829,166234.629208,1,95,8,8,3.47,2,2,0,0,0,1,58,1,2,1,4 +70704,7,2,2,35,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,NA,NA,NA,NA,36453.846815,42702.950408,1,102,14,14,2.83,6,6,1,2,0,1,36,1,2,1,3 +70705,7,2,2,0,3,5,7,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5820.770257,5884.32621,1,95,1,1,0.13,3,3,1,0,0,1,31,1,4,1,4 +70706,7,2,2,48,NA,1,1,1,NA,NA,2,NA,2,7,77,NA,1,1,NA,2,2,2,1,2,2,1,2,2,2,34639.996543,34820.641178,1,102,77,77,NA,3,3,0,0,1,1,61,2,4,1,1 +70707,7,2,2,10,NA,5,7,2,11,132,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8746.306586,9472.024073,1,91,3,3,0.66,4,4,1,2,0,2,33,1,3,5,NA +70708,7,2,1,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,173139.914798,178954.334223,1,101,7,7,2.64,2,2,0,0,0,1,57,1,2,1,2 +70709,7,2,1,9,NA,4,4,2,9,111,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8988.052978,9133.633722,2,95,8,8,1.85,5,5,1,2,0,1,55,1,2,1,3 +70710,7,2,2,57,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,29087.427528,30575.794656,1,101,3,3,1.07,3,1,0,1,0,2,57,1,4,3,NA +70711,7,2,2,12,NA,5,7,2,12,151,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,47404.963472,48778.930863,1,97,8,8,2.51,3,3,0,1,0,1,35,1,3,1,4 +70712,7,2,1,2,NA,5,6,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10123.286306,10611.741928,1,100,10,10,3.04,4,4,2,0,0,1,30,2,5,1,5 +70713,7,2,2,30,NA,5,6,2,NA,NA,2,NA,2,2,4,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,15109.988743,16272.590756,1,90,6,6,0.92,6,6,2,0,2,2,30,2,5,1,5 +70714,7,2,2,1,21,5,6,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4576.510691,4795.397917,1,102,15,15,3.82,5,5,1,1,0,1,29,1,4,1,4 +70715,7,2,2,79,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,53541.401974,54113.194007,1,99,14,14,5,2,2,0,0,2,2,79,1,3,1,5 +70716,7,2,2,43,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,99538.625493,103163.851943,1,103,15,15,5,3,3,0,1,0,1,46,1,5,1,5 +70717,7,2,2,70,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,24480.331443,26311.37682,2,101,6,6,2.04,2,2,0,0,2,1,77,1,1,1,2 +70718,7,2,1,75,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,13555.672819,14397.395601,1,94,4,4,1.43,1,1,0,0,1,1,75,1,4,3,NA +70719,7,1,1,12,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,58479.782556,0,1,99,77,77,NA,4,4,0,2,0,2,45,1,3,1,NA +70720,7,2,2,7,NA,5,6,2,7,86,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8371.948513,8981.698617,1,90,15,15,5,3,3,0,1,0,2,34,2,5,1,5 +70721,7,2,2,79,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,2,1,NA,2,2,2,2,2,2,2,2,1,NA,18241.877822,19614.829564,2,93,9,9,3.64,2,2,0,0,2,2,79,2,2,1,4 +70722,7,2,2,5,NA,4,4,1,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10936.097083,11421.715282,2,95,1,1,0.18,4,4,2,1,0,2,38,1,2,5,NA +70723,7,2,1,32,NA,3,3,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,86204.990126,88673.945369,2,103,14,14,5,2,2,0,0,0,1,32,2,5,1,NA +70724,7,2,1,5,NA,2,2,2,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,15527.966893,15272.133376,1,90,9,9,2.6,4,4,1,1,0,1,41,1,5,1,4 +70725,7,2,2,60,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11173.849134,11672.749546,2,93,10,10,3.93,3,3,0,0,2,1,54,1,5,1,5 +70726,7,2,1,5,NA,4,4,2,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9310.029529,9330.284312,1,97,4,4,0.46,7,7,3,3,0,2,31,1,3,1,NA +70727,7,2,2,44,NA,4,4,2,NA,NA,2,NA,2,1,5,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,18488.34481,18123.036332,2,99,15,15,4.34,4,4,0,0,0,1,59,2,4,1,5 +70728,7,2,2,42,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,2,1,2,2,1,2,2,NA,NA,NA,NA,24056.374863,24201.045855,2,99,1,1,0,1,1,0,0,0,2,42,1,3,5,NA +70729,7,2,1,80,NA,5,7,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,26344.362464,28384.957988,2,95,7,7,2.55,2,2,0,0,2,1,80,1,4,1,2 +70730,7,2,1,55,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,1,1,NA,2,2,1,1,2,1,1,2,2,2,33162.406014,36437.864606,2,98,7,7,2.25,3,3,0,1,0,2,51,2,1,1,1 +70731,7,2,1,6,NA,5,6,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8580.826574,9087.233306,1,92,14,14,2.42,6,6,1,3,0,1,30,1,4,6,NA +70732,7,2,2,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,157061.455656,156201.940202,1,90,7,7,1.89,3,3,0,0,1,2,75,1,4,3,NA +70733,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,10634.832821,11430.282078,2,95,3,3,0.92,1,1,0,0,1,2,80,1,1,2,NA +70734,7,2,1,7,NA,3,3,1,7,92,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,50961.223155,56419.394274,1,94,15,15,5,6,6,0,4,0,1,38,1,5,1,4 +70735,7,2,2,80,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,10072.885959,10826.303506,2,100,3,3,1.1,1,1,0,0,1,2,80,1,2,2,NA +70736,7,2,2,32,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,5,4,2,1,2,2,1,2,2,1,2,2,1,31816.953817,32705.969127,1,91,15,15,5,3,3,0,0,1,1,63,2,5,1,5 +70737,7,2,1,28,NA,2,2,1,NA,NA,2,NA,2,1,5,NA,4,5,NA,2,2,2,2,2,2,2,2,1,2,38474.772527,40200.135096,2,93,NA,1,0.22,4,1,0,0,0,1,28,NA,NA,4,NA +70738,7,2,2,39,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,1,1,2,1,2,1,1,2,2,1,2,1,NA,18229.985416,18267.209527,1,93,7,7,1.64,5,5,0,2,0,1,47,2,5,1,1 +70739,7,2,1,63,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,22641.791224,22854.140555,1,101,5,5,0.71,6,6,1,1,1,1,63,1,2,1,5 +70740,7,2,2,7,NA,4,4,2,7,91,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9795.701448,10340.670956,1,96,13,13,NA,5,5,1,1,0,1,42,1,3,5,NA +70741,7,2,2,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,18723.98095,18120.118724,2,95,7,7,1.74,4,4,0,2,0,2,47,1,5,4,NA +70742,7,2,1,10,NA,2,2,2,10,124,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,11316.846999,12445.832216,3,91,13,4,0.81,4,3,0,2,0,1,57,NA,NA,1,NA +70743,7,2,1,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,105141.812429,105376.658808,1,98,5,5,1.39,2,2,0,1,0,1,46,1,4,3,NA +70744,7,1,2,65,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,10135.841003,0,2,99,2,2,0.67,1,1,0,0,1,2,65,1,2,5,NA +70745,7,2,1,51,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,17206.320427,18234.571296,2,100,4,4,0.91,3,3,0,0,0,2,49,1,2,1,2 +70746,7,2,1,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,NA,NA,NA,NA,NA,NA,NA,16589.308426,20972.932517,2,90,8,NA,NA,2,1,0,0,0,1,51,1,2,5,NA +70747,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,14359.447628,16592.595881,1,97,3,3,1.16,1,1,0,0,1,1,80,1,3,2,NA +70748,7,2,2,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,31335.13799,31552.004994,1,95,7,7,1.66,5,5,0,3,0,1,34,1,2,1,4 +70749,7,2,1,2,NA,2,2,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8571.222647,8842.539637,2,93,14,14,3.52,5,5,1,2,0,1,44,1,5,1,5 +70750,7,2,2,26,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,16844.740449,17564.040518,3,91,1,1,0.11,1,1,0,0,0,2,26,1,5,5,NA +70751,7,2,1,27,NA,1,1,1,NA,NA,2,NA,2,7,3,NA,3,4,NA,2,2,2,2,2,2,1,2,2,2,35210.641637,36376.282426,1,100,8,3,0.9,6,1,1,0,0,1,33,2,3,6,NA +70752,7,1,1,40,NA,3,3,NA,NA,NA,1,2,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,18872.772727,0,1,91,6,6,1.13,6,6,1,3,0,1,40,1,4,6,NA +70753,7,2,1,30,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,1,6,NA,2,2,2,1,2,2,2,2,2,2,37657.076964,37372.8671,1,90,6,6,1.11,5,5,1,2,0,1,30,2,1,6,NA +70754,7,2,2,15,NA,3,3,1,15,184,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,77001.138762,80653.195108,1,98,9,9,2.88,6,3,1,3,0,1,51,1,2,1,3 +70755,7,2,1,56,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,141736.66089,144209.560558,1,92,10,10,3.4,3,3,0,0,0,1,56,1,4,1,5 +70756,7,2,1,78,NA,3,3,2,NA,NA,2,NA,2,2,3,NA,1,1,NA,1,2,1,1,2,1,1,2,1,NA,12786.51051,14057.405205,1,93,2,2,0.41,2,2,0,0,2,2,68,2,1,1,1 +70757,7,2,1,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,74874.432638,77018.874888,2,99,15,15,5,2,2,0,0,0,2,35,2,5,1,5 +70758,7,2,1,21,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,41120.308343,45563.612914,1,93,5,5,1.36,2,2,0,0,0,2,41,2,1,4,NA +70759,7,2,1,41,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,1,4,NA,2,2,2,1,2,2,2,2,2,2,41410.39303,40802.759347,1,101,12,7,3.58,3,1,0,0,0,1,41,2,1,4,NA +70760,7,2,1,7,NA,5,6,2,7,87,NA,NA,2,1,2,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6681.315407,7240.00549,3,91,15,15,4.33,7,6,1,3,0,2,40,1,5,1,5 +70761,7,2,1,46,NA,1,1,1,NA,NA,2,NA,2,2,77,NA,4,1,NA,2,2,2,1,2,2,2,2,1,2,33629.261806,33135.804229,2,103,12,8,4.48,2,1,0,0,0,1,46,2,4,1,NA +70762,7,2,2,34,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,23408.914544,24510.72255,2,90,7,7,1.61,4,4,1,1,0,2,34,1,5,1,5 +70763,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,54095.581484,61529.339683,2,91,6,6,1.88,2,2,0,0,1,2,80,1,5,2,NA +70764,7,1,2,2,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,34597.213785,0,1,98,7,7,1,7,7,2,2,0,2,34,1,4,3,NA +70765,7,2,2,67,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,1,44238.530111,44780.242841,1,95,6,6,2.04,2,2,0,0,2,2,67,1,1,2,NA +70766,7,2,2,45,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,16354.688935,16518.054061,2,90,9,9,3.02,3,3,0,0,2,2,70,1,4,1,2 +70767,7,2,2,18,NA,1,1,2,19,228,2,NA,2,2,4,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,19850.979841,20551.555585,1,97,3,3,0.5,5,5,0,2,0,1,56,2,2,6,NA +70768,7,2,2,9,NA,3,3,2,9,113,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,73410.202745,74807.098361,2,91,15,15,5,5,5,2,1,0,2,40,1,5,1,5 +70769,7,2,2,21,NA,1,1,1,NA,NA,2,NA,2,2,6,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,40653.73216,40476.667284,1,102,5,5,0.86,5,5,2,0,0,2,21,2,2,5,NA +70770,7,2,1,7,NA,1,1,2,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18107.947773,18211.90239,1,97,10,10,2.32,6,6,0,4,0,1,42,1,4,1,4 +70771,7,2,1,17,NA,5,6,2,17,213,2,NA,2,1,5,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8434.080857,8971.979116,1,91,77,77,NA,4,4,0,2,0,1,50,2,5,1,5 +70772,7,2,1,71,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,50383.817476,53512.338469,1,94,15,15,5,2,2,0,0,1,2,56,1,3,1,4 +70773,7,2,2,33,NA,2,2,1,NA,NA,2,NA,2,1,5,NA,5,1,1,1,2,2,1,2,2,1,2,2,1,38161.026403,38068.87516,1,100,10,10,2.91,4,4,1,1,0,1,32,1,5,1,5 +70774,7,2,2,49,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,18811.641937,19081.796284,2,90,3,3,0.95,2,2,0,1,0,2,49,1,3,5,NA +70775,7,2,1,60,NA,2,2,2,NA,NA,2,NA,2,1,4,NA,5,1,NA,1,2,2,1,2,2,1,2,2,2,7807.558918,8078.476616,3,90,77,77,NA,2,2,0,0,1,1,60,2,5,1,5 +70776,7,2,1,60,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,1,1,2,1,1,2,1,NA,13133.86792,13808.275718,2,102,15,15,3.82,5,5,0,1,2,1,60,2,2,1,1 +70777,7,2,2,0,10,5,7,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8582.213422,8500.433306,2,94,77,77,NA,4,4,2,0,0,2,23,1,2,6,NA +70778,7,2,2,3,NA,3,3,2,4,48,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,54402.653374,57814.771756,1,98,15,15,3.7,5,5,2,1,0,1,34,1,5,1,5 +70779,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,32785.873783,35446.092694,1,94,6,6,1.82,2,2,0,0,2,1,80,1,4,1,3 +70780,7,2,2,11,NA,3,3,1,11,133,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,49777.284509,49627.739683,1,98,15,15,5,5,5,0,3,0,2,44,1,5,1,5 +70781,7,2,1,19,NA,1,1,1,19,229,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,20638.769105,20614.097145,2,92,15,15,3.37,7,7,0,4,0,1,42,2,3,1,1 +70782,7,2,1,64,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,138075.879417,141933.339512,2,94,15,15,5,2,2,0,0,2,1,64,1,5,1,5 +70783,7,2,2,37,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,13027.332011,13925.626381,2,103,15,15,5,3,3,0,1,0,2,37,2,5,1,5 +70784,7,2,2,54,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,196995.351093,203724.329642,1,91,15,15,5,2,2,0,0,0,1,53,1,5,1,5 +70785,7,2,2,41,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,20434.221508,19874.985431,1,102,1,1,0,5,5,0,3,0,2,41,1,4,1,4 +70786,7,2,1,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,153565.050575,157855.235487,1,97,15,15,5,4,4,0,0,1,1,60,1,5,1,5 +70787,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,164066.603708,163511.57474,1,95,7,7,2.86,2,2,0,0,1,1,58,1,4,1,3 +70788,7,2,1,34,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11793.151047,11949.568707,3,91,14,14,2.5,6,6,1,1,1,2,37,2,2,1,5 +70789,7,2,1,5,NA,3,3,2,5,68,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,27873.065855,31447.442273,1,91,6,6,1.07,6,6,3,1,0,2,27,1,4,6,NA +70790,7,2,1,49,NA,3,3,2,NA,NA,2,NA,2,2,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,116464.874823,120278.381994,2,91,15,15,5,4,4,0,2,0,2,48,1,5,1,5 +70791,7,2,2,45,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,20303.639991,21936.687597,1,97,15,15,5,3,3,0,1,0,2,45,2,5,1,5 +70792,7,2,1,30,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,1,6,NA,2,2,2,2,2,2,NA,NA,NA,NA,27605.196104,27396.850962,2,99,12,3,0.52,5,3,0,1,0,1,30,2,2,4,NA +70793,7,2,1,66,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,100988.137931,101524.74706,1,99,9,9,4.35,2,2,0,0,1,1,66,1,2,1,3 +70794,7,2,2,66,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,9518.80186,10328.183799,2,100,3,3,0.68,2,2,0,0,2,1,66,1,2,1,2 +70795,7,2,1,12,NA,4,4,1,12,144,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11429.628358,11541.555232,2,96,7,7,1.04,7,7,0,4,0,2,37,1,3,3,NA +70796,7,2,2,10,NA,5,6,1,10,125,NA,NA,2,2,2,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8281.624869,8692.694009,2,102,9,9,2.68,4,4,1,1,0,2,38,2,5,1,2 +70797,7,2,2,33,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,16614.865368,17760.536638,3,91,15,15,5,4,4,2,0,0,2,33,2,5,1,5 +70798,7,2,2,53,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,15693.813435,15736.327271,2,100,5,5,0.95,4,4,0,0,1,2,53,1,3,5,NA +70799,7,2,2,4,NA,5,7,2,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7143.995395,7774.277146,3,91,15,15,4.47,4,4,2,0,0,1,33,1,5,1,5 +70800,7,2,1,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,NA,NA,NA,1,2,2,1,23484.626749,23642.328802,2,96,NA,NA,NA,3,3,0,0,0,2,50,NA,NA,1,NA +70801,7,2,2,28,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,103632.167909,110131.87866,2,99,14,14,5,2,2,0,0,0,2,28,1,5,1,5 +70802,7,2,2,5,NA,3,3,1,5,67,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21760.206232,23124.999952,1,98,2,2,0.36,5,5,3,0,0,1,25,1,3,1,3 +70803,7,2,1,7,NA,3,3,1,7,95,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15279.821652,15716.743409,1,102,5,1,0.21,5,4,1,1,0,2,24,1,4,5,NA +70804,7,2,2,53,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,12037.168211,11707.739526,2,99,15,15,4.9,7,7,1,4,0,2,53,1,5,1,5 +70805,7,2,2,55,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,12649.084278,13109.186175,3,90,77,77,NA,4,4,0,0,0,1,59,2,2,1,2 +70806,7,2,2,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,126334.218747,127212.680922,1,95,9,9,3.24,3,3,0,0,0,2,42,1,4,3,NA +70807,7,2,2,22,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,46606.430863,46728.795934,2,102,14,14,3.25,5,5,2,0,0,1,27,1,5,1,5 +70808,7,2,1,77,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,7225.866295,7367.404674,1,96,15,15,5,4,4,0,0,2,1,77,1,5,1,3 +70809,7,2,1,0,1,5,7,1,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9289.230808,9431.807424,2,102,6,6,1.65,3,3,1,0,0,2,21,1,4,1,5 +70810,7,2,2,3,NA,5,6,2,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,4735.146545,5152.906676,3,90,5,5,0.93,4,4,1,0,0,1,48,2,4,1,NA +70811,7,2,2,30,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,4,1,2,1,2,2,1,2,2,NA,NA,NA,NA,14138.631841,14167.501749,3,92,8,8,0.91,7,7,3,3,1,1,61,NA,NA,1,4 +70812,7,2,1,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25645.251384,25046.202404,1,92,14,14,4.96,2,2,0,0,0,1,25,1,4,5,NA +70813,7,2,2,7,NA,2,2,1,7,94,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15992.05837,16407.130123,2,93,7,7,1.99,3,3,1,1,0,2,38,1,3,4,NA +70814,7,2,2,7,NA,3,3,1,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,48532.852397,48387.04619,1,98,14,14,4.12,4,4,0,2,0,2,36,1,5,1,3 +70815,7,1,2,9,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6758.20346,0,2,103,1,1,0.04,2,2,0,1,0,2,28,1,4,5,NA +70816,7,2,2,30,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,NA,NA,NA,NA,26465.930618,26276.815338,2,100,5,5,0.88,5,5,2,1,0,2,30,1,4,6,NA +70817,7,2,2,21,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,32537.532358,33640.063825,2,90,8,8,2.01,4,4,0,0,1,2,67,2,4,2,NA +70818,7,2,1,0,1,2,2,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4997.17037,5277.406032,2,93,15,15,4.84,6,6,1,0,0,1,50,1,4,1,2 +70819,7,2,2,0,2,5,6,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9150.459338,9253.718962,1,97,15,15,4.34,4,4,2,0,0,1,35,2,5,1,5 +70820,7,2,2,13,NA,2,2,2,13,160,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,20099.773776,21058.707597,1,90,15,15,5,3,3,0,1,0,1,55,2,3,1,5 +70821,7,2,1,63,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,112072.982256,113124.075015,2,98,14,14,5,2,2,0,0,1,1,63,1,4,1,4 +70822,7,2,2,0,3,4,4,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3484.44779,3522.493854,2,99,1,1,0.03,3,3,1,0,0,2,19,1,3,NA,NA +70823,7,2,1,9,NA,4,4,2,9,112,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10103.729975,10224.857915,1,96,15,15,5,4,4,0,2,0,2,39,1,4,6,NA +70824,7,2,1,10,NA,1,1,1,10,123,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8828.580268,8822.70874,1,103,7,7,0.51,7,7,3,4,0,1,54,2,1,1,1 +70825,7,2,1,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,129971.22201,129637.352703,1,93,15,15,5,1,1,0,0,0,1,47,1,5,3,NA +70826,7,2,1,60,NA,1,1,1,NA,NA,2,NA,2,1,9,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,14488.953694,14771.336069,3,92,8,8,2.97,2,2,0,0,2,2,62,1,4,1,4 +70827,7,2,1,7,NA,3,3,2,8,96,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,23729.905536,29042.69522,1,101,3,3,0.61,4,4,1,2,0,1,38,1,2,4,NA +70828,7,2,1,26,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,94547.245282,96016.627954,2,94,77,77,NA,2,2,0,0,0,2,23,1,3,6,NA +70829,7,2,2,13,NA,4,4,1,13,159,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18163.985724,21329.621847,2,101,9,9,2.46,4,4,0,2,0,1,42,1,3,1,3 +70830,7,2,2,0,10,4,4,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4484.450821,4755.264461,1,91,6,6,0.99,5,5,3,0,0,2,33,2,3,1,4 +70831,7,2,2,30,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,4,1,2,1,2,1,1,2,1,1,2,2,1,19005.010125,19718.036027,1,92,12,12,NA,4,4,1,1,0,1,33,2,4,1,4 +70832,7,2,2,31,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,3,1,2,2,1,2,2,1,2,2,1,53830.599426,56471.452044,1,99,5,5,0.89,4,4,2,0,0,2,31,1,4,1,5 +70833,7,2,2,35,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,36053.766709,36629.803511,1,100,5,5,1.05,3,3,1,0,0,2,35,1,4,6,NA +70834,7,2,1,9,NA,3,3,2,9,115,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,53956.99815,55499.881764,1,93,15,15,5,4,4,0,2,0,2,42,1,5,1,NA +70835,7,2,1,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,20439.261207,20630.95379,2,95,4,2,0.81,2,1,0,0,1,1,62,1,5,3,NA +70836,7,2,1,6,NA,1,1,1,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19735.224235,20204.314213,2,102,6,6,1.35,3,3,0,1,0,1,37,2,1,1,3 +70837,7,2,1,60,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,2,1,NA,2,2,2,1,2,2,2,2,2,2,8489.547987,8893.33507,2,96,5,5,0.89,5,4,0,0,1,1,22,2,3,6,NA +70838,7,2,2,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,29040.300396,28462.118402,2,101,6,6,1.67,3,3,0,0,0,2,22,1,4,5,NA +70839,7,2,2,13,NA,3,3,2,13,167,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,132630.209478,133894.993306,1,97,8,8,2.72,3,3,0,2,0,2,43,1,1,3,NA +70840,7,2,2,10,NA,4,4,1,10,122,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12474.59761,12808.328162,1,100,3,3,0.43,4,4,0,2,0,1,35,1,4,1,3 +70841,7,2,1,11,NA,3,3,2,11,142,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21147.476454,21752.181979,1,95,1,1,0.12,3,3,0,2,0,2,40,1,5,3,NA +70842,7,2,1,32,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,16945.481829,17170.237079,1,99,77,77,NA,1,1,0,0,0,1,32,2,5,5,NA +70843,7,2,2,0,11,3,3,2,NA,13,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8383.207272,8554.601009,1,91,12,1,0.07,6,1,1,1,0,2,29,1,4,6,NA +70844,7,2,1,19,NA,4,4,2,19,235,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10817.360862,11313.215634,2,99,99,99,NA,5,5,0,2,0,2,20,1,3,6,NA +70845,7,2,2,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,1,1,2,2,1,2,2,1,2,2,1,20000.263815,19016.481945,1,102,12,12,NA,7,7,3,2,0,2,52,1,4,5,NA +70846,7,2,1,9,NA,2,2,2,9,112,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,11807.210833,12400.930816,1,90,7,7,2.1,3,3,0,2,0,2,37,1,3,5,NA +70847,7,2,2,19,NA,4,4,2,19,232,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11711.384457,11743.959438,2,95,3,3,0.43,4,4,2,0,0,2,23,1,2,5,NA +70848,7,2,1,80,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,6994.319065,7352.726651,2,99,3,3,0.9,1,1,0,0,1,1,80,1,3,2,NA +70849,7,2,2,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,37102.230755,38432.724809,2,101,5,2,0.73,2,1,0,0,1,2,49,1,3,6,NA +70850,7,2,2,28,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,17858.942687,20013.475913,1,97,14,14,4.5,3,3,1,0,0,1,30,1,5,1,5 +70851,7,2,1,36,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,73127.351507,77018.309866,1,103,9,9,3.64,2,2,0,0,0,1,36,1,5,1,5 +70852,7,2,1,14,NA,4,4,1,14,179,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10879.348751,11378.044973,2,100,8,8,1.1,7,7,3,3,0,2,58,1,3,5,NA +70853,7,2,2,19,NA,4,4,1,19,235,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12531.903464,12935.738352,2,100,5,5,1.07,4,4,0,1,0,2,36,1,3,5,NA +70854,7,1,2,53,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,NA,NA,NA,NA,16033.091438,0,1,96,8,8,2.62,3,3,0,0,0,2,53,1,4,4,NA +70855,7,2,1,9,NA,1,1,1,9,115,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14820.807433,15173.085782,2,102,7,7,1.53,5,5,1,2,0,1,36,1,2,1,3 +70856,7,2,1,1,16,4,4,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10232.679671,11283.202594,2,97,1,1,0.13,4,4,2,0,1,2,62,1,2,4,NA +70857,7,2,1,9,NA,3,3,1,9,119,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,26240.82746,27691.552263,1,94,4,4,0.56,5,5,1,2,0,1,34,1,2,3,NA +70858,7,2,2,9,NA,3,3,2,10,120,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,58907.362493,59749.51689,1,91,10,10,2.77,5,5,0,3,0,1,43,1,5,1,5 +70859,7,2,2,0,6,4,4,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4581.358327,4715.327587,2,100,6,6,1.34,4,4,1,2,0,2,31,1,4,5,NA +70860,7,2,2,44,NA,4,4,2,NA,NA,2,NA,2,1,4,NA,2,1,2,1,2,2,1,2,2,NA,NA,NA,NA,19075.861607,19725.492117,1,96,7,7,1.69,4,4,0,1,0,2,19,2,4,NA,NA +70861,7,2,2,16,NA,2,2,2,16,201,NA,NA,2,2,2,10,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,17848.732433,20206.102312,2,91,99,99,NA,6,6,1,3,0,2,20,2,2,5,NA +70862,7,2,1,75,NA,5,6,2,NA,NA,2,NA,2,1,9,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,10818.545624,11805.931644,3,90,4,4,0.99,2,2,0,0,2,1,75,2,4,1,NA +70863,7,2,2,77,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,3,NA,1,2,1,1,2,1,1,2,1,NA,11462.813869,11819.991623,1,93,2,2,0.87,1,1,0,0,1,2,77,2,4,3,NA +70864,7,2,1,1,12,2,2,2,NA,14,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12567.081957,13464.72134,1,97,5,5,1.04,4,4,1,1,0,1,32,1,3,6,NA +70865,7,2,1,45,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,116464.874823,119757.87667,2,94,9,9,3.35,3,3,0,0,1,2,52,1,2,1,3 +70866,7,2,2,16,NA,3,3,2,16,203,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,80091.55101,82618.226829,1,101,14,14,3.9,4,4,0,2,0,2,41,1,2,1,2 +70867,7,2,2,29,NA,1,1,2,NA,NA,2,NA,2,1,3,NA,4,6,1,2,2,2,2,2,2,2,2,2,2,39426.061521,39404.861746,2,94,7,7,1.34,5,5,2,1,0,1,32,2,1,1,NA +70868,7,2,1,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,32980.717958,32895.997285,1,95,4,4,1.21,2,2,0,0,0,1,46,1,2,1,2 +70869,7,2,1,38,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,2,1,NA,2,2,2,1,2,2,1,2,2,2,37715.365512,45063.17837,2,102,7,7,1.04,7,7,1,2,0,2,37,2,1,1,2 +70870,7,2,1,7,NA,3,3,2,7,86,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,46228.073505,47549.950918,2,94,15,15,4.59,4,4,1,1,0,2,37,1,5,1,5 +70871,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11565.978374,12485.281365,1,99,6,6,1.12,4,4,0,2,0,1,39,1,3,1,3 +70872,7,1,2,23,NA,5,6,NA,NA,NA,2,NA,2,2,3,NA,5,5,3,1,2,2,1,2,2,NA,NA,NA,NA,16844.740449,0,3,91,7,7,3.9,2,1,0,0,0,2,21,NA,NA,5,NA +70873,7,2,2,6,NA,1,1,1,6,82,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15962.145468,16371.237244,2,98,14,14,3.25,5,5,2,1,0,1,37,1,5,1,5 +70874,7,2,1,73,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,6725.306794,7069.929472,2,99,7,7,1.52,4,4,0,0,2,1,73,1,3,2,NA +70875,7,2,1,15,NA,3,3,1,15,191,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71757.969058,71552.342229,1,98,10,10,2.2,6,6,1,3,0,2,31,1,4,6,NA +70876,7,2,1,3,NA,4,4,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9090.947573,10024.256259,1,90,6,6,1.57,3,3,1,0,0,2,25,1,3,6,NA +70877,7,2,1,56,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,1,1,NA,2,2,2,2,2,2,2,2,2,2,22157.736644,21832.606015,1,100,99,99,NA,7,7,2,3,0,2,35,2,1,1,NA +70878,7,2,1,25,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,19163.076248,19068.627285,2,97,7,7,2.38,2,2,0,0,0,1,25,1,2,1,2 +70879,7,2,2,10,NA,5,6,2,10,122,NA,NA,1,1,NA,3,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,9620.269705,10600.850342,2,91,6,6,1.57,3,3,0,1,0,1,41,2,3,1,3 +70880,7,2,2,0,1,3,3,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8114.787453,8336.925581,1,94,6,6,0.87,6,6,2,2,0,2,24,1,4,6,NA +70881,7,2,1,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,112935.983109,114691.149811,1,102,10,10,4.76,2,2,0,0,0,1,23,1,5,5,NA +70882,7,2,2,8,NA,4,4,1,8,98,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12120.418061,12942.575479,2,101,6,6,0.96,5,5,0,4,0,2,36,1,4,4,NA +70883,7,2,1,31,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,19117.284298,19849.243802,2,95,8,8,2.97,2,2,0,0,0,2,56,1,5,2,NA +70884,7,2,1,14,NA,4,4,2,14,175,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12721.673656,12746.825446,2,100,6,6,1.34,4,4,1,2,0,2,31,1,4,5,NA +70885,7,2,1,8,NA,1,1,1,8,106,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11399.23838,11468.323492,2,103,2,2,0.22,7,7,0,3,0,2,39,2,1,5,NA +70886,7,2,2,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,29040.300396,28462.118402,2,101,1,1,0.11,4,1,0,0,0,2,21,1,4,5,NA +70887,7,2,2,59,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,26668.458882,26178.344406,2,102,10,10,4.62,2,2,0,0,0,2,59,1,5,1,NA +70888,7,1,1,0,9,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17196.879565,0,2,94,14,14,2.87,5,5,2,1,0,1,37,1,3,1,4 +70889,7,2,2,18,NA,1,1,1,18,220,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16781.078148,17378.8338,2,103,2,2,0.22,7,7,0,3,0,2,39,2,1,5,NA +70890,7,2,2,59,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,16181.169973,15883.791498,1,96,9,7,3.21,2,1,0,0,0,1,58,1,4,6,NA +70891,7,2,2,43,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,4,2,1,2,2,1,2,2,NA,NA,NA,NA,14599.561708,14200.006402,2,99,2,2,0.19,7,7,3,1,0,2,43,1,2,4,NA +70892,7,2,1,6,NA,4,4,2,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8453.214884,8437.321796,2,101,1,1,0.1,6,6,1,2,1,2,27,1,2,1,2 +70893,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,97101.614214,108614.152251,3,92,6,6,1.17,4,4,0,2,0,2,30,1,2,1,4 +70894,7,2,1,70,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,53149.251154,56449.488341,2,94,8,8,3.4,2,2,0,0,2,2,64,1,4,1,2 +70895,7,2,2,75,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,2,2,NA,27602.403077,28546.401551,1,94,2,2,0.55,2,2,0,0,2,2,75,1,1,3,NA +70896,7,2,1,15,NA,4,4,1,15,191,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13276.485807,13240.468288,2,100,10,7,2.05,4,3,0,2,0,1,20,1,4,6,NA +70897,7,2,1,52,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,17801.655316,19397.911862,2,95,7,6,1.88,3,2,0,0,0,2,56,1,3,2,NA +70898,7,2,1,71,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,2,1,NA,2,2,2,2,2,2,1,2,2,NA,11755.776731,12827.229805,3,90,12,13,NA,2,1,0,0,1,1,71,2,2,1,NA +70899,7,2,2,66,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,136832.42119,141624.96623,2,91,15,15,5,2,2,0,0,1,2,66,1,5,3,NA +70900,7,2,1,59,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,19093.004278,19032.106491,2,99,2,2,0.19,6,6,0,1,0,1,59,1,2,5,NA +70901,7,2,2,0,11,4,4,1,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4943.553921,5444.191192,1,100,14,14,3.47,4,4,2,0,0,1,34,1,5,1,5 +70902,7,2,1,11,NA,4,4,2,11,140,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8930.369586,9237.151674,3,91,4,4,1.09,2,2,0,1,0,2,40,1,5,3,NA +70903,7,2,2,18,NA,1,1,2,19,228,2,NA,2,2,4,13,NA,NA,NA,1,2,2,1,2,2,2,2,2,2,17544.592739,19246.751055,3,92,6,6,1,6,6,1,1,0,1,42,2,1,1,4 +70904,7,2,1,19,NA,2,2,2,19,233,2,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,15351.501195,16874.029415,2,90,4,4,1.29,2,2,0,0,0,2,40,1,2,5,NA +70905,7,2,2,1,16,1,1,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13322.479814,14708.37523,1,92,14,14,3.9,4,4,2,0,0,2,29,1,4,1,4 +70906,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,78529.577822,84771.37367,1,101,14,14,3.9,4,4,0,2,0,2,41,1,2,1,2 +70907,7,2,1,2,NA,4,4,2,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8064.791396,8892.751276,1,91,99,99,NA,3,3,1,0,0,1,33,2,5,5,NA +70908,7,2,1,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,74379.731652,76506.539917,2,92,15,7,3.67,4,1,0,0,0,1,28,1,5,5,NA +70909,7,1,2,13,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6587.470541,0,1,93,9,9,1.77,7,7,0,2,0,2,56,NA,NA,5,NA +70910,7,2,1,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,18404.681357,20079.255367,1,101,3,3,0.61,4,4,1,2,0,1,38,1,2,4,NA +70911,7,2,1,59,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,146786.506338,147038.128142,1,90,15,15,5,3,3,0,0,0,1,59,1,5,1,5 +70912,7,2,1,67,NA,2,2,1,NA,NA,2,NA,2,1,4,NA,5,2,NA,2,2,2,1,2,2,2,2,2,2,7290.319536,7407.101145,2,93,10,10,5,1,1,0,0,1,1,67,2,5,2,NA +70913,7,2,1,28,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,15507.441472,18423.236589,1,100,9,9,4.92,1,1,0,0,0,1,28,1,5,5,NA +70914,7,2,1,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,20568.024192,20733.954282,2,92,1,1,0.02,1,1,0,0,0,1,40,1,2,5,NA +70915,7,1,1,80,NA,3,3,NA,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,37318.801462,0,1,98,6,6,1.95,2,2,0,0,2,1,80,1,3,1,3 +70916,7,2,2,78,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,1,2,NA,2,2,2,2,2,2,1,2,2,NA,17318.187297,23904.945555,2,90,6,3,0.92,2,1,0,0,2,1,76,2,1,2,NA +70917,7,2,1,48,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,22165.906062,24153.500588,1,96,7,7,2.78,2,2,0,0,0,1,48,1,2,6,NA +70918,7,2,2,7,NA,3,3,2,7,89,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21910.300386,22612.118571,1,99,4,4,1.02,2,2,0,1,0,2,27,1,4,3,NA +70919,7,2,2,7,NA,3,3,1,7,89,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,64166.795496,75335.746424,1,101,5,5,0.89,5,5,1,2,0,1,31,1,2,1,1 +70920,7,2,2,46,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,20303.639991,21936.687597,1,97,7,7,1.48,5,5,0,1,0,2,46,2,4,1,NA +70921,7,2,2,29,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,13976.989712,14629.714123,2,94,77,77,NA,6,6,2,0,0,2,18,1,3,NA,NA +70922,7,2,2,2,NA,5,7,2,2,28,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5588.504831,5743.223814,2,91,15,15,3.7,5,5,1,2,0,1,50,NA,NA,1,5 +70923,7,2,1,63,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,89103.220446,89938.887954,2,98,7,7,2.16,3,3,0,0,1,2,55,1,3,1,4 +70924,7,2,1,0,10,5,6,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5528.06179,5490.918184,1,91,14,14,4.48,3,3,1,0,0,1,31,2,5,1,5 +70925,7,2,1,57,NA,5,6,1,NA,NA,2,NA,2,2,7,NA,3,1,NA,1,2,2,1,2,2,1,2,2,3,18416.819037,20059.719948,2,96,5,5,1.36,2,2,0,1,0,1,57,2,3,1,NA +70926,7,1,2,15,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,66,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,17848.732433,0,2,91,8,8,1.85,5,5,0,2,1,1,39,2,3,1,4 +70927,7,2,1,63,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,1,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,9946.027787,10495.972559,2,93,8,8,2.62,3,3,0,0,2,2,64,2,1,1,1 +70928,7,2,2,6,NA,4,4,2,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,7814.742747,8095.069503,2,95,6,6,0.97,6,6,2,1,0,1,54,1,3,6,NA +70929,7,2,2,29,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,17350.142808,18091.024448,1,92,14,14,2.42,6,6,1,3,0,1,30,1,4,6,NA +70930,7,2,1,6,NA,2,2,2,6,79,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,9807.589376,10028.771,2,90,10,10,3.13,4,4,1,2,0,2,39,1,5,4,NA +70931,7,2,2,44,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,36945.168658,38286.242768,2,96,10,10,3.78,3,3,0,1,0,1,42,1,3,1,3 +70932,7,2,2,43,NA,5,6,2,NA,NA,2,NA,2,2,1,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,18396.558383,19503.669771,1,90,15,15,5,4,4,1,1,0,2,43,2,5,1,5 +70933,7,2,2,46,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19150.604366,19251.871661,3,91,15,15,5,2,2,0,0,0,2,46,1,5,1,5 +70934,7,2,1,0,4,4,4,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5089.464235,5316.261144,2,93,6,6,1.15,5,5,3,1,0,1,29,1,3,5,NA +70935,7,2,2,46,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,20084.755052,20430.629009,1,99,15,15,5,4,4,0,2,0,2,46,1,5,1,5 +70936,7,2,1,27,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,2,2,2,2,35669.2076,40318.090187,2,94,8,8,2.7,3,3,1,0,0,1,27,1,3,1,4 +70937,7,2,2,12,NA,4,4,2,12,154,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12857.456314,12921.235691,2,97,4,4,0.57,5,5,1,3,0,2,33,1,3,5,NA +70938,7,2,1,49,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,52756.101074,53793.249362,3,92,8,8,2.97,2,2,0,0,1,1,49,1,2,3,NA +70939,7,2,2,28,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,2,6,2,1,2,2,1,2,2,1,2,2,1,39550.779175,41765.24601,2,96,14,14,3.36,4,4,1,1,0,2,28,1,2,6,NA +70940,7,2,1,21,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,131567.279219,134004.437006,1,97,15,15,5,3,3,0,0,0,1,53,NA,NA,1,4 +70941,7,2,1,45,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,87954.465296,90754.904886,2,100,10,10,3.13,4,4,0,2,0,1,45,1,4,1,4 +70942,7,2,2,16,NA,1,1,1,16,194,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,15690.47168,16249.379042,1,102,8,8,1.33,7,7,1,4,0,2,32,1,3,1,2 +70943,7,2,1,8,NA,1,1,2,8,105,NA,NA,1,1,NA,3,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,10449.970381,10975.442173,1,90,2,1,0.17,5,2,0,1,0,1,25,2,2,6,NA +70944,7,2,2,0,0,1,1,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7798.643057,7634.568742,1,102,8,8,2.06,4,4,2,0,0,1,28,1,2,1,5 +70945,7,2,1,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,33558.731068,36590.680559,1,101,3,1,0,2,1,0,0,1,2,65,1,3,3,NA +70946,7,1,1,2,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24594.444896,0,1,98,5,2,0.42,4,3,2,0,0,1,24,1,3,6,NA +70947,7,2,2,4,NA,3,3,2,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,59430.588937,63158.057955,2,91,15,15,4.2,6,6,2,0,2,1,63,1,1,1,3 +70948,7,2,2,19,NA,4,4,1,19,231,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18905.695776,2,101,1,1,0.05,2,1,0,0,0,2,19,1,3,NA,NA +70949,7,2,2,68,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,128590.415432,131137.819973,1,102,15,15,5,2,2,0,0,2,2,68,1,4,1,5 +70950,7,2,1,50,NA,2,2,1,NA,NA,2,NA,2,2,6,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,34422.302712,34961.637568,2,92,2,2,0.4,3,3,0,0,0,1,50,2,4,1,4 +70951,7,2,1,7,NA,4,4,2,7,91,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8050.419807,8503.565046,2,99,4,4,0.41,7,7,0,2,0,2,36,1,3,5,NA +70952,7,1,2,64,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,19125.640099,0,2,95,77,77,NA,2,2,0,0,2,1,64,NA,NA,1,3 +70953,7,1,2,9,NA,2,2,NA,NA,NA,NA,NA,2,1,2,4,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12789.411811,0,1,102,7,7,1.9,4,4,1,1,0,1,29,1,4,1,3 +70954,7,2,1,61,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,28559.076421,28710.827523,1,95,5,5,2.2,1,1,0,0,1,1,61,1,3,3,NA +70955,7,2,1,14,NA,5,6,1,14,169,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8915.81491,9312.338117,1,92,12,12,NA,4,4,0,1,0,1,53,2,5,1,4 +70956,7,2,1,15,NA,1,1,2,15,188,NA,NA,1,1,NA,9,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,20398.562455,20481.595361,2,94,4,4,0.81,3,3,0,1,0,1,49,2,3,1,3 +70957,7,2,1,12,NA,1,1,2,12,154,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,20398.562455,20710.36162,2,94,13,13,NA,5,5,0,3,0,1,32,2,2,1,1 +70958,7,2,1,1,15,2,2,2,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9237.934626,9085.733547,2,90,5,5,0.89,4,4,2,0,0,2,26,2,2,4,NA +70959,7,2,2,55,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,44296.789734,46174.107604,2,102,4,1,0,2,1,0,0,0,2,43,1,5,6,NA +70960,7,2,2,8,NA,4,4,2,8,100,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7120.704736,7311.203604,1,99,15,8,2.7,4,3,0,2,0,1,49,1,4,6,NA +70961,7,2,2,74,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,NA,55881.349278,57792.483909,2,92,9,9,5,1,1,0,0,1,2,74,1,5,5,NA +70962,7,2,1,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,NA,NA,NA,1,2,2,1,14321.1466,14549.731107,1,96,15,15,5,6,6,1,1,1,2,44,1,3,1,3 +70963,7,2,2,17,NA,2,2,1,17,209,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,18556.092615,20535.833102,1,103,7,7,1.03,7,7,0,3,0,1,50,2,1,1,1 +70964,7,2,2,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,27738.890335,27824.17823,2,101,4,4,0.73,5,5,1,2,0,1,40,1,5,1,5 +70965,7,2,1,5,NA,2,2,2,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11161.439403,11514.748174,2,99,13,13,NA,6,6,2,1,0,2,31,1,4,6,NA +70966,7,2,1,20,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,14313.345971,15054.191928,3,91,5,3,0.92,2,1,0,0,0,1,20,1,4,6,NA +70967,7,2,1,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,22723.53676,24747.854656,1,92,NA,99,NA,4,1,0,2,0,2,56,1,4,4,NA +70968,7,2,2,8,NA,3,3,1,9,108,NA,NA,2,2,3,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,13450.606713,14078.286101,3,91,4,4,0.65,5,5,2,2,0,2,27,2,2,3,NA +70969,7,2,1,45,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,3,1,NA,2,2,2,2,2,2,1,2,2,2,31233.526521,31366.480522,1,100,8,3,1.03,6,1,1,0,0,1,33,2,3,6,NA +70970,7,2,1,31,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,15867.258653,18200.873488,3,91,3,3,0.76,3,3,0,1,0,2,24,1,4,6,NA +70971,7,2,1,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,20486.987447,21605.827026,2,101,3,3,0.6,3,3,0,2,0,1,39,1,4,4,NA +70972,7,2,1,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,5950.975297,5997.525697,2,99,77,77,NA,3,3,0,1,1,1,61,1,4,3,NA +70973,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,18404.681357,19079.901799,1,101,7,7,1.83,3,3,0,0,2,1,67,1,1,1,2 +70974,7,2,1,49,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,27356.080541,30234.306382,1,101,5,5,1.15,3,3,0,1,0,1,49,1,3,1,4 +70975,7,2,2,22,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,118761.81384,123645.102237,3,91,4,4,1.02,2,2,0,0,0,1,22,1,5,1,5 +70976,7,2,1,17,NA,2,2,1,17,211,2,NA,2,1,4,11,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,20327.892981,20116.618863,2,93,4,4,0.69,4,4,0,1,1,2,66,2,3,2,NA +70977,7,2,1,3,NA,3,3,1,4,48,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,98584.614475,106037.774242,2,101,6,6,1.35,3,3,1,0,0,1,42,1,4,6,NA +70978,7,2,1,15,NA,4,4,2,15,183,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11834.781205,13035.394237,2,90,2,2,0.31,5,5,0,2,1,2,71,1,2,2,NA +70979,7,2,1,46,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,18230.629439,18292.13185,1,93,15,15,5,5,5,1,2,0,2,40,1,5,1,5 +70980,7,2,1,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,114168.79702,117438.65007,1,95,8,8,1.28,7,7,1,4,0,1,32,1,3,1,3 +70981,7,2,1,50,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,13525.38183,13953.637486,2,95,15,15,5,3,3,1,0,0,1,50,1,5,1,NA +70982,7,2,1,44,NA,2,2,2,NA,NA,2,NA,2,2,1,NA,2,1,NA,2,2,2,2,2,2,2,2,1,2,39096.402803,46193.238956,2,91,4,4,0.76,4,4,1,0,0,2,25,2,4,77,NA +70983,7,2,2,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,71746.365541,83232.546386,1,93,15,15,5,2,2,0,0,0,2,39,1,5,1,5 +70984,7,2,1,73,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,96955.880155,102625.661518,2,101,15,15,5,2,2,0,0,2,1,73,1,5,1,5 +70985,7,2,1,74,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,NA,8601.453077,8734.617022,2,101,5,5,2.02,1,1,0,0,1,1,74,1,2,3,NA +70986,7,2,2,4,NA,4,4,2,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9268.093277,9507.318489,1,99,14,14,4.05,3,3,1,0,0,2,32,1,5,1,4 +70987,7,2,2,22,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,130601.953362,132156.64994,2,91,15,15,5,4,4,0,0,0,1,54,1,5,1,NA +70988,7,2,2,1,14,4,4,1,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8754.193667,9419.610037,2,98,8,8,1.8,5,5,2,1,0,1,32,1,4,1,5 +70989,7,2,1,16,NA,3,3,1,16,193,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,30943.024697,32205.010072,1,101,4,4,0.58,6,6,0,4,0,2,41,1,3,5,NA +70990,7,2,2,6,NA,1,1,1,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15048.411882,15335.781033,1,103,14,14,2.96,5,5,1,2,0,1,34,1,4,1,5 +70991,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,25520.996455,28503.986081,1,93,3,3,0.75,2,2,0,0,1,2,80,1,1,2,NA +70992,7,2,2,1,14,2,2,2,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11412.410776,11898.666241,1,97,3,3,0.44,5,5,2,2,0,2,26,1,4,4,NA +70993,7,2,1,38,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,15399.847485,15604.102319,1,103,8,8,4.3,1,1,0,0,0,1,38,1,5,5,NA +70994,7,2,2,13,NA,2,2,2,13,159,NA,NA,2,2,3,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,12680.621719,13709.581084,2,90,99,99,NA,5,5,1,1,0,2,40,2,3,1,1 +70995,7,2,2,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,2,1,2,2,1,2,2,1,2,2,1,31335.13799,32015.561405,1,95,6,4,1.26,6,2,1,2,0,1,28,1,2,1,2 +70996,7,2,1,41,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,49072.976757,48585.714967,1,97,15,15,5,2,2,0,0,0,1,41,1,5,1,5 +70997,7,2,2,62,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,9570.416297,10279.373686,2,98,2,2,0.83,1,1,0,0,1,2,62,1,3,3,NA +70998,7,2,2,6,NA,2,2,2,6,73,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,10762.400563,11527.105697,2,90,6,6,2.01,2,2,0,1,0,2,26,1,3,5,NA +70999,7,2,2,50,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,161992.272945,161249.908749,1,91,14,14,3.15,5,5,0,1,0,2,50,1,5,1,5 +71000,7,2,2,0,6,5,6,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,7065.624287,7145.357301,2,92,6,6,1.34,4,4,1,1,0,1,40,2,3,1,3 +71001,7,2,1,6,NA,4,4,2,6,74,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7386.979006,7738.237294,2,99,5,5,0.65,6,6,2,1,0,2,53,1,4,3,NA +71002,7,2,1,13,NA,1,1,1,13,165,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,24228.858782,24226.727855,2,102,7,7,2.16,3,3,0,2,0,2,41,1,5,3,NA +71003,7,2,1,78,NA,3,3,1,NA,NA,2,NA,2,1,7,NA,1,5,NA,1,2,1,1,2,1,1,2,1,NA,12184.871688,13005.96831,2,103,2,2,0.45,1,1,0,0,1,1,78,2,1,5,NA +71004,7,2,2,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,14809.997435,16069.288743,2,97,1,1,0.13,4,4,2,0,1,2,62,1,2,4,NA +71005,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,11355.3308,12230.621635,1,96,8,8,3.14,2,2,0,0,2,2,64,1,3,1,2 +71006,7,2,2,4,NA,1,1,2,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,13366.393396,13546.155547,2,94,5,5,1.3,3,3,1,1,0,2,34,2,2,5,NA +71007,7,2,2,1,16,1,1,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14326.094268,15816.39252,3,92,15,15,5,3,3,1,0,0,1,34,1,5,1,5 +71008,7,2,1,50,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,25969.864445,25963.066565,1,97,14,14,5,3,3,0,0,0,2,51,1,5,1,4 +71009,7,1,2,4,NA,5,7,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15046.736747,0,1,101,6,6,1.07,5,5,2,1,0,1,30,1,3,1,3 +71010,7,2,2,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,43813.24867,47156.814318,1,98,5,2,0.42,4,3,2,0,0,1,24,1,3,6,NA +71011,7,2,2,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,175997.804296,176135.086149,1,101,3,3,0.73,2,2,0,0,1,1,60,1,3,1,5 +71012,7,2,2,24,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,144794.522788,148845.957701,1,97,8,8,3.57,2,2,0,0,0,2,49,1,3,3,NA +71013,7,2,2,5,NA,4,4,2,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9986.420822,10886.623262,2,97,2,2,0.21,7,7,2,3,0,2,32,1,4,5,NA +71014,7,2,2,16,NA,5,7,2,16,199,NA,NA,1,1,NA,8,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11747.462633,12227.159724,2,99,6,6,1.18,5,5,0,3,0,2,38,1,2,5,NA +71015,7,2,2,49,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,23342.391629,23465.824854,1,92,15,15,5,2,2,0,0,0,2,49,1,5,3,NA +71016,7,2,2,5,NA,1,1,1,5,62,NA,NA,2,2,1,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,17282.036547,19079.832121,2,102,15,15,5,3,3,1,0,0,1,41,2,2,1,5 +71017,7,2,2,10,NA,3,3,2,10,122,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15442.305642,16400.253331,2,94,5,5,1.3,3,3,0,1,0,1,43,1,3,6,NA +71018,7,2,1,5,NA,3,3,2,5,62,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,57680.74785,67586.530677,1,99,5,5,0.89,4,4,2,0,0,2,31,1,4,1,5 +71019,7,2,2,10,NA,1,1,1,10,130,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,15962.145468,16061.826248,2,98,2,2,0.27,4,4,2,1,0,2,32,2,2,5,NA +71020,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,30212.098573,34791.026267,1,92,3,3,1.01,1,1,0,0,1,2,80,1,3,2,NA +71021,7,2,1,64,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,6011.560142,6320.246286,2,99,3,3,1.29,1,1,0,0,1,1,64,2,4,3,NA +71022,7,2,1,11,NA,3,3,1,11,135,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19590.665143,20475.022602,3,91,5,5,0.87,4,4,0,2,0,2,38,1,2,3,NA +71023,7,2,1,10,NA,1,1,1,10,122,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13822.148996,13905.918164,3,92,15,15,3.15,7,7,0,4,0,2,35,2,3,3,NA +71024,7,2,1,7,NA,3,3,2,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,47596.305609,49235.587472,1,99,14,14,5,3,3,0,1,0,1,35,1,5,1,5 +71025,7,2,1,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,7514.993062,9722.610426,2,90,1,1,0.28,1,1,0,0,1,1,61,1,2,5,NA +71026,7,2,1,14,NA,1,1,1,14,176,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19570.996814,19477.677478,2,100,4,4,0.81,4,4,0,2,0,1,56,1,4,1,2 +71027,7,2,1,10,NA,1,1,1,10,129,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13927.458372,14147.852881,2,98,3,3,0.33,7,7,2,3,0,1,40,2,1,1,1 +71028,7,2,1,2,NA,1,1,1,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10736.26749,11503.13577,1,103,5,5,1.02,4,4,2,0,0,1,25,1,2,1,4 +71029,7,2,2,25,NA,5,6,2,NA,NA,2,NA,2,1,3,NA,3,1,2,1,2,1,1,2,2,NA,NA,NA,NA,13820.210756,14687.211885,1,90,8,8,1.43,7,7,2,0,0,1,23,2,4,1,3 +71030,7,2,2,13,NA,2,2,2,13,167,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14437.97544,15197.369043,2,90,14,14,3.45,4,4,1,1,0,2,34,2,5,6,NA +71031,7,1,1,27,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,34087.731449,0,3,92,4,3,0.52,5,4,0,0,0,2,57,1,4,1,2 +71032,7,2,2,12,NA,1,1,1,13,156,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,2,2,2,1,2,2,2,22424.988432,23223.784759,1,94,7,7,2.1,3,3,0,1,0,2,48,2,1,1,3 +71033,7,1,1,26,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,17579.006909,0,2,103,15,15,5,1,1,0,0,0,1,26,1,3,5,NA +71034,7,2,1,11,NA,4,4,2,11,134,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8451.853606,8465.026785,1,93,7,7,1.97,4,4,1,2,0,2,33,1,4,3,NA +71035,7,2,1,7,NA,5,6,2,7,95,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10810.913614,11461.625841,1,97,14,14,2.87,5,5,0,3,0,2,40,2,5,1,5 +71036,7,2,1,29,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,105262.341027,110623.017272,2,98,8,8,3.67,2,2,0,0,0,2,54,1,4,3,NA +71037,7,2,2,52,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15497.844354,15358.163767,1,99,15,15,5,2,2,0,0,0,1,56,1,4,1,4 +71038,7,2,1,60,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,2,1,NA,2,2,2,2,2,2,1,2,2,2,8609.250304,11228.904188,2,90,1,1,0.27,2,2,0,0,1,1,60,2,2,1,1 +71039,7,2,2,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,81385.450947,85814.999376,1,93,15,15,5,2,2,0,0,0,2,30,1,5,1,5 +71040,7,2,1,63,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,36541.405352,36735.571242,1,101,4,4,1.22,2,2,0,0,1,1,63,1,2,1,2 +71041,7,2,1,9,NA,5,6,2,9,112,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10810.913614,11461.625841,1,97,15,15,2.33,7,7,2,4,0,2,40,2,5,1,4 +71042,7,2,1,38,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,2,1,NA,1,2,2,1,2,2,1,2,2,3,13147.594977,14207.241654,1,102,5,5,0.92,5,5,1,2,0,2,44,2,1,1,2 +71043,7,2,2,9,NA,4,4,1,9,110,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13192.206605,13608.834197,2,102,9,9,2.68,4,4,0,2,0,2,32,1,4,1,NA +71044,7,2,1,0,2,5,7,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9570.577309,9675.725524,1,95,6,6,1.15,5,5,2,1,0,1,29,1,4,6,NA +71045,7,2,1,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,NA,10947.445281,13128.299802,1,94,4,4,1.43,1,1,0,0,1,1,80,1,3,4,NA +71046,7,2,1,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,144353.133634,150786.620773,1,91,14,14,4.03,4,4,0,2,0,1,52,1,4,1,5 +71047,7,2,2,52,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,NA,16058.142925,17306.419461,2,95,5,5,1.18,3,3,0,1,0,2,55,1,4,5,NA +71048,7,2,2,52,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,NA,NA,NA,NA,24870.513993,26515.471818,2,98,1,1,0.13,4,4,2,0,0,2,52,1,2,4,NA +71049,7,2,2,20,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,57438.356061,66140.327263,2,101,1,1,0.08,1,1,0,0,0,2,20,1,4,5,NA +71050,7,2,2,0,4,2,2,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,4481.392842,4343.672823,2,90,12,12,NA,7,7,2,2,0,1,39,2,1,1,1 +71051,7,2,1,78,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,79754.311902,90534.066519,1,97,15,15,5,2,2,0,0,2,1,78,1,3,1,3 +71052,7,2,2,18,NA,1,1,1,18,217,2,NA,2,2,3,10,NA,NA,NA,2,2,2,2,2,2,2,2,2,2,19993.78712,21491.713499,1,100,99,99,NA,7,7,2,3,0,2,35,2,1,1,NA +71053,7,2,2,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,9680.216878,10112.428208,1,96,12,10,5,2,1,0,0,1,2,61,1,4,3,NA +71054,7,2,2,51,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,140431.173819,139253.659476,1,94,10,6,2.42,2,1,0,0,0,1,59,1,4,6,NA +71055,7,2,2,0,4,5,6,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6580.402774,6856.045444,1,94,99,99,NA,5,5,1,1,0,2,21,1,3,5,NA +71056,7,1,1,59,NA,5,6,NA,NA,NA,2,NA,2,1,7,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,15409.233853,0,1,92,12,12,NA,4,4,0,0,0,1,59,2,3,1,4 +71057,7,2,2,62,NA,4,4,2,NA,NA,1,2,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,11190.39853,11690.037855,2,99,99,99,NA,2,2,0,0,2,2,62,1,5,3,NA +71058,7,2,2,19,NA,3,3,2,19,233,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,41202.729804,43105.611516,2,91,6,6,1.26,5,5,0,1,2,2,80,1,4,2,NA +71059,7,2,1,53,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,27227.937106,27501.844882,1,101,3,3,0.96,2,2,0,0,0,1,53,1,1,1,2 +71060,7,2,2,31,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,2,2,1,2,2,1,2,2,1,2,2,1,21306.824647,21870.080421,2,98,1,1,0.19,3,3,2,0,0,2,31,1,4,2,NA +71061,7,2,1,54,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,4,1,NA,2,2,2,2,2,2,1,2,2,2,37557.946192,37296.340716,2,102,9,9,3.24,3,3,0,0,0,1,54,2,4,1,4 +71062,7,2,1,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,31291.360507,32453.566842,2,91,12,3,1.25,3,1,0,0,1,1,52,1,4,3,NA +71063,7,2,2,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,NA,NA,NA,NA,85610.546667,92292.669073,2,91,15,15,5,5,5,1,2,0,1,37,1,4,6,NA +71064,7,2,2,26,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,10783.449126,11243.921386,1,103,5,5,0.65,6,6,0,0,1,2,26,2,4,5,NA +71065,7,2,2,51,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,38739.556504,38769.774143,2,101,6,6,2.04,2,2,0,0,0,2,51,1,5,1,4 +71066,7,2,2,71,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,1,5,NA,2,2,2,2,2,2,1,2,2,NA,18697.677461,20882.090197,1,93,6,6,2.66,1,1,0,0,1,2,71,2,1,5,NA +71067,7,2,2,4,NA,1,1,2,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14812.229505,15746.041025,1,97,4,4,0.72,5,5,2,1,0,2,33,2,1,6,NA +71068,7,2,1,45,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,108839.502248,114346.356011,2,95,3,3,0.93,2,2,0,0,0,1,45,1,4,1,5 +71069,7,2,2,62,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,133022.268903,132572.26134,1,98,6,6,1.75,2,2,0,0,2,1,62,1,4,1,3 +71070,7,2,2,79,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,2,2,NA,1,2,1,1,2,1,1,2,1,NA,12344.929687,13268.288363,1,93,2,2,0.73,1,1,0,0,1,2,79,2,2,2,NA +71071,7,2,2,36,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,14939.690418,15327.735621,2,92,8,8,3.54,2,2,0,0,0,1,30,1,5,1,5 +71072,7,2,2,50,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,3,4,NA,2,2,2,1,2,2,1,2,2,1,19676.781212,20391.029456,1,103,6,6,1.57,3,3,0,1,0,2,50,2,3,4,NA +71073,7,2,2,18,NA,4,4,2,18,226,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12857.456314,12921.235691,2,97,8,8,2.7,3,3,0,0,1,2,72,1,2,3,NA +71074,7,1,2,1,14,5,6,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6565.095533,0,1,90,15,15,5,4,4,2,0,0,1,36,1,5,1,5 +71075,7,2,2,3,NA,3,3,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,50908.326714,56187.051166,1,99,5,5,0.89,4,4,2,0,0,2,31,1,4,1,5 +71076,7,2,2,63,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15732.436891,16737.04767,1,97,14,14,3.93,3,3,0,1,2,2,63,1,4,1,4 +71077,7,2,2,13,NA,1,1,1,13,158,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,2,2,2,1,2,2,1,12970.724558,13432.752316,2,103,8,8,1.29,7,7,3,1,0,2,53,2,2,4,NA +71078,7,2,2,22,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,23614.167119,30467.110717,2,97,3,3,0.82,2,2,1,0,0,2,22,1,4,5,NA +71079,7,2,1,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,78529.577822,81088.445647,1,98,14,14,3.15,5,5,0,3,0,1,34,1,4,1,4 +71080,7,1,1,22,NA,3,3,NA,NA,NA,1,2,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,88006.905716,0,1,92,6,6,1.51,3,3,0,0,0,1,46,1,3,3,NA +71081,7,2,1,4,NA,4,4,2,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7431.820906,7738.904727,1,99,5,5,0.84,5,5,2,1,0,1,35,1,3,1,2 +71082,7,2,1,78,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,1,1,NA,1,2,1,1,2,1,1,2,1,NA,11550.158096,13775.328408,2,92,7,7,1.17,6,6,0,1,1,1,78,2,1,1,3 +71083,7,2,2,62,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,9497.094386,9921.129492,2,100,15,15,5,2,2,0,0,1,1,51,1,5,1,3 +71084,7,2,1,55,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,23205.758348,23717.246496,3,92,5,5,1.03,4,4,0,3,0,1,55,1,4,4,NA +71085,7,2,1,1,15,4,4,2,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6837.992772,6921.398172,1,96,12,12,NA,2,2,1,0,0,2,32,1,5,5,NA +71086,7,2,1,24,NA,5,6,1,NA,NA,1,2,2,2,3,NA,4,5,NA,1,2,2,1,2,2,1,2,2,3,14979.624397,15387.451243,1,102,3,1,0.28,2,1,0,0,0,1,24,2,4,5,NA +71087,7,2,2,65,NA,1,1,1,NA,NA,2,NA,2,2,8,NA,1,2,NA,2,2,2,2,2,2,2,2,2,2,15207.312407,15896.113669,2,98,4,4,0.48,6,6,2,0,2,2,65,2,1,2,NA +71088,7,2,2,13,NA,2,2,2,13,160,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,16680.090857,17557.412914,1,90,15,15,5,4,4,0,2,0,2,43,1,5,1,5 +71089,7,2,2,32,NA,1,1,1,NA,NA,2,NA,2,7,77,NA,3,4,2,2,2,2,2,2,2,1,2,2,2,49274.703023,52411.531429,2,102,77,77,NA,3,3,0,0,0,2,48,2,2,1,3 +71090,7,2,1,49,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,11113.602843,11901.173467,3,90,15,15,5,4,4,0,2,0,2,41,2,5,1,5 +71091,7,2,2,39,NA,2,2,1,NA,NA,2,NA,2,1,NA,NA,4,6,2,1,2,2,1,2,2,NA,NA,NA,NA,45662.166351,45001.161705,1,92,14,9,3.77,3,2,0,1,0,2,39,2,4,6,NA +71092,7,2,2,71,NA,1,1,1,NA,NA,2,NA,2,2,77,NA,1,2,NA,2,2,2,2,2,2,2,2,2,NA,20127.084548,24087.460037,1,103,77,77,NA,4,4,1,0,1,1,20,1,3,6,NA +71093,7,2,1,50,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,185033.990584,188295.9196,1,97,15,15,5,2,2,0,0,0,1,50,1,3,6,NA +71094,7,2,2,51,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,140431.173819,139253.659476,1,94,9,9,3.97,2,2,0,0,0,1,59,1,3,5,NA +71095,7,2,1,31,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,81642.217375,85347.318656,3,91,14,14,5,3,3,1,0,0,2,30,1,4,1,5 +71096,7,2,2,13,NA,1,1,1,13,160,NA,NA,1,1,NA,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,18515.058419,19360.671834,2,96,6,6,1.11,6,6,0,2,1,1,40,2,2,1,2 +71097,7,2,1,21,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,25989.236835,2,101,99,1,0.21,3,1,0,0,0,1,20,1,4,5,NA +71098,7,2,2,19,NA,4,4,1,19,229,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,2,1,0.32,4,1,0,0,0,2,19,1,4,NA,NA +71099,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,NA,47566.45715,54102.989916,1,90,9,9,5,1,1,0,0,1,2,80,1,5,3,NA +71100,7,2,2,80,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,12020.872946,12623.344251,3,90,15,15,4.07,5,5,0,2,1,1,42,1,5,1,4 +71101,7,2,1,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,19541.667675,20187.705923,2,95,2,2,0.75,1,1,0,0,0,1,51,1,2,5,NA +71102,7,2,1,19,NA,5,6,1,20,NA,2,NA,2,2,4,15,NA,NA,NA,1,2,2,1,2,1,1,2,2,1,5904.602463,6308.718622,2,92,6,6,1.3,4,4,0,1,0,2,48,2,3,1,3 +71103,7,2,2,57,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,32709.179605,34590.259766,1,94,6,6,1.26,5,5,0,2,0,2,38,1,4,1,NA +71104,7,2,2,78,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,13141.36586,14020.834298,1,96,6,6,2.46,1,1,0,0,1,2,78,1,2,2,NA +71105,7,2,2,66,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,2,3,NA,2,2,2,2,2,2,2,2,1,2,8725.210615,9371.55685,2,93,14,14,2.43,7,7,1,1,1,2,66,2,2,3,NA +71106,7,2,1,1,19,3,3,2,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,31199.621619,36557.677601,1,97,5,5,0.87,4,4,2,0,0,1,35,1,5,1,5 +71107,7,2,2,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,2,1,2,2,1,2,2,1,2,2,1,64581.191728,65990.445093,2,94,15,10,5,3,1,0,0,0,1,26,NA,NA,77,NA +71108,7,2,2,20,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,23845.8146,22808.13483,1,98,2,1,0.04,4,1,0,0,0,2,19,1,4,NA,NA +71109,7,2,2,0,9,1,1,1,NA,9,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8775.375504,9358.559086,2,102,14,14,3.8,4,4,2,0,0,2,41,2,4,1,5 +71110,7,2,1,2,NA,2,2,1,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13002.944731,13931.716845,2,91,4,4,0.69,4,4,2,0,0,2,21,1,3,6,NA +71111,7,2,2,75,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,18069.116427,18623.459107,2,100,2,2,0.62,1,1,0,0,1,2,75,1,4,2,NA +71112,7,2,2,55,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,22631.175755,22215.258702,2,96,7,7,2.58,2,2,0,0,1,2,55,1,4,3,NA +71113,7,2,2,26,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,12412.338679,12980.380959,3,90,15,15,3.7,5,5,0,0,0,1,56,2,3,1,3 +71114,7,2,2,20,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,132307.499792,154386.779868,1,98,1,1,0.31,1,1,0,0,0,2,20,1,4,5,NA +71115,7,2,2,12,NA,4,4,2,12,150,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11600.051433,12143.645666,3,90,14,14,2.97,5,5,0,2,1,1,73,2,3,2,NA +71116,7,2,2,7,NA,4,4,2,7,92,NA,NA,2,2,2,0,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,9412.419416,10343.079616,1,93,5,5,0.64,7,7,0,2,1,1,21,2,4,5,NA +71117,7,2,2,32,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,25241.487585,26857.231387,2,97,2,2,0.21,7,7,2,3,0,2,32,1,4,5,NA +71118,7,2,2,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,34802.051557,36349.694696,1,101,4,4,0.99,2,2,0,0,0,2,51,1,5,1,2 +71119,7,2,1,36,NA,3,3,1,NA,NA,2,NA,2,2,4,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,71111.990643,75738.399435,2,103,15,15,5,4,4,2,0,0,1,36,2,4,1,5 +71120,7,2,1,69,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,28559.076421,28710.827523,1,95,2,2,0.88,1,1,0,0,1,1,69,1,3,3,NA +71121,7,2,2,0,11,5,6,2,NA,12,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4326.150649,4254.537398,1,99,14,14,2.66,7,7,3,1,0,1,35,1,5,1,5 +71122,7,2,1,63,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,121588.761604,120347.630506,1,91,15,15,5,2,2,0,0,2,1,63,1,4,1,5 +71123,7,2,1,41,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,23824.065321,24171.124621,1,100,14,14,3.6,4,4,1,1,0,1,41,1,4,1,5 +71124,7,1,1,0,10,2,2,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,4944.997189,0,1,103,13,13,NA,6,6,1,2,0,2,40,2,3,3,NA +71125,7,2,2,9,NA,5,6,1,9,115,NA,NA,1,1,NA,3,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,5064.232234,5712.276567,2,92,99,2,0.31,7,4,3,3,1,1,61,2,1,1,3 +71126,7,2,1,5,NA,1,1,1,5,68,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,14321.363328,14968.737037,2,96,6,6,0.77,7,7,2,1,0,1,53,2,1,1,1 +71127,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,59282.809425,61101.54758,1,101,14,14,5,2,2,0,0,2,2,70,1,5,1,2 +71128,7,2,1,6,NA,3,3,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19998.804,20570.663593,1,94,3,3,0.37,5,5,0,3,0,2,29,1,4,4,NA +71129,7,2,1,16,NA,3,3,1,16,200,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,69211.537407,74163.290023,1,92,14,14,3.16,6,6,1,1,0,1,49,1,1,1,3 +71130,7,2,2,50,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,4,NA,1,2,2,1,2,2,1,2,2,1,19130.568715,18607.010517,2,98,5,5,0.59,7,7,3,0,0,2,50,1,5,4,NA +71131,7,2,1,13,NA,3,3,1,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,71934.689876,71392.8162,1,100,15,15,5,4,4,0,1,0,1,50,1,4,1,4 +71132,7,2,2,68,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,134694.414609,139412.076132,2,94,10,10,4.3,2,2,0,0,2,1,69,1,4,1,5 +71133,7,2,2,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,82820.100138,94804.847305,1,90,7,7,3.13,1,1,0,0,0,2,36,1,5,5,NA +71134,7,2,1,3,NA,3,3,2,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,57680.74785,67586.530677,1,99,77,77,NA,4,4,1,1,0,1,31,2,3,1,3 +71135,7,2,2,76,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,33731.056243,35845.261325,2,98,3,3,0.88,2,2,0,0,2,1,77,1,1,1,3 +71136,7,2,1,21,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,9956.598907,12313.186759,1,95,5,5,0.73,6,6,1,0,1,1,62,2,3,1,NA +71137,7,2,1,61,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,NA,14488.953694,14771.336069,3,92,1,1,0.24,1,1,0,0,1,1,61,1,4,5,NA +71138,7,2,1,0,7,4,4,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10070.421465,10410.321555,2,101,4,4,0.76,4,4,1,1,0,1,28,1,2,1,4 +71139,7,2,1,7,NA,1,1,1,7,90,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11159.151566,11210.524757,1,102,4,4,0.5,6,6,2,2,0,1,25,1,2,1,3 +71140,7,2,1,26,NA,1,1,2,NA,NA,2,NA,2,2,2,NA,4,5,NA,2,2,2,2,2,2,2,2,2,2,35669.2076,36620.108921,2,94,15,15,5,3,3,0,0,0,1,41,2,3,1,NA +71141,7,2,2,78,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,20131.904783,20903.590301,1,92,8,8,2.51,3,3,0,0,1,2,78,1,5,2,NA +71142,7,1,1,8,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,1,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,8943.919305,0,1,92,5,5,0.63,7,7,0,4,1,1,60,NA,NA,1,NA +71143,7,2,2,53,NA,4,4,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19548.136896,19371.951409,1,90,15,15,5,2,2,0,0,0,1,57,2,4,1,5 +71144,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,NA,NA,NA,NA,22758.541444,24464.617143,1,98,7,7,1.03,7,7,0,4,0,2,20,1,3,5,NA +71145,7,2,2,15,NA,1,1,1,15,191,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21062.314667,21467.226385,1,94,10,10,2.94,4,4,0,2,0,2,52,1,5,2,NA +71146,7,2,1,34,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,130589.073376,141489.20172,2,102,8,6,2.57,2,1,0,0,0,1,34,1,4,5,NA +71147,7,2,2,11,NA,4,4,2,11,134,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8757.841043,8910.615224,2,100,13,13,NA,4,4,0,2,0,2,28,1,2,6,NA +71148,7,2,1,0,0,4,4,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6433.441277,6898.375971,2,97,1,1,0,5,5,3,0,0,2,23,1,2,1,3 +71149,7,2,1,12,NA,5,7,2,12,148,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5504.178541,5957.59682,1,99,15,15,4.47,4,4,0,2,0,2,52,2,5,1,5 +71150,7,2,1,8,NA,4,4,2,8,100,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14125.862146,14295.209168,1,97,5,5,0.91,4,4,0,3,0,2,44,1,4,5,NA +71151,7,2,2,8,NA,4,4,1,8,102,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11334.095519,11753.9565,2,96,5,5,0.67,6,6,1,2,1,1,34,1,4,1,4 +71152,7,2,1,80,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,1,2,1,2,2,1,2,2,NA,18812.694081,19206.027387,1,102,14,14,4.59,3,3,0,0,1,2,46,1,4,1,1 +71153,7,2,2,7,NA,4,4,2,7,94,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6655.097829,7106.52929,2,99,15,15,4.9,7,7,1,4,0,2,53,1,5,1,5 +71154,7,2,2,11,NA,5,6,2,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10699.45895,11230.540406,1,97,15,15,2.33,7,7,2,4,0,2,40,2,5,1,4 +71155,7,2,1,41,NA,3,3,2,NA,NA,2,NA,2,2,5,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,117676.178911,119449.245993,2,99,14,14,5,1,1,0,0,0,1,41,2,5,5,NA +71156,7,2,2,16,NA,1,1,2,16,194,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,21120.105603,21750.292661,1,98,6,6,1.98,2,2,0,1,0,2,37,1,4,3,NA +71157,7,2,2,26,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,1,1,2,2,1,2,2,1,2,2,1,15967.106149,16195.487754,2,103,6,6,1.3,4,4,1,1,0,2,26,1,4,1,3 +71158,7,2,1,39,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,5,1,NA,1,2,1,1,2,1,1,2,2,3,16939.617906,17443.653216,3,91,14,14,3.58,4,4,1,1,0,1,39,2,5,1,5 +71159,7,2,2,37,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,97101.614214,100848.484003,3,92,15,15,5,4,4,2,0,0,1,38,1,5,1,5 +71160,7,2,2,3,NA,5,6,1,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7483.230909,7475.326886,2,96,7,7,2.27,3,3,1,0,0,1,34,2,5,1,5 +71161,7,2,1,45,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,18178.365056,18317.23991,1,94,6,6,2.05,2,2,0,1,0,1,45,1,2,5,NA +71162,7,2,1,13,NA,5,7,1,13,164,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14831.338089,14976.576871,1,98,3,3,0.43,4,4,0,1,0,2,39,1,2,5,NA +71163,7,2,1,11,NA,3,3,1,11,135,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,49922.147265,52289.098209,1,98,6,6,1.11,5,5,0,2,1,2,37,1,1,1,1 +71164,7,2,1,48,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,126789.52929,129656.862785,1,101,14,14,3.25,4,4,0,1,0,1,48,1,4,1,2 +71165,7,2,2,0,4,4,4,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4128.726485,4546.845595,2,93,5,5,1.05,3,3,1,0,0,2,29,1,3,5,NA +71166,7,2,1,55,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,NA,32720.69734,33802.428047,1,95,2,2,0.67,1,1,0,0,0,1,55,1,3,3,NA +71167,7,2,2,19,NA,5,7,2,19,231,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,91797.787708,94488.443358,1,95,5,5,1.3,3,3,0,0,1,2,19,1,3,NA,NA +71168,7,1,1,79,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,7,1,NA,1,2,2,NA,NA,NA,NA,NA,NA,NA,74473.849242,0,2,98,NA,NA,NA,2,2,0,0,2,1,79,1,7,1,NA +71169,7,2,1,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,121588.761604,120347.630506,1,91,10,10,3.78,3,3,0,0,2,1,62,1,5,1,5 +71170,7,1,1,23,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,106185.516032,0,2,96,5,5,2.02,1,1,0,0,0,1,23,1,4,5,NA +71171,7,2,2,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,12331.419303,13281.948926,2,95,6,6,1.65,2,2,0,0,2,1,62,1,1,1,3 +71172,7,2,2,42,NA,4,4,1,NA,NA,2,NA,2,2,5,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,25774.834017,25516.02368,1,100,5,5,1.05,3,3,1,0,0,2,42,2,5,1,NA +71173,7,2,2,5,NA,1,1,1,5,69,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,1,1,2,1,NA,NA,NA,NA,13560.780585,14415.696661,2,96,3,3,0.46,5,5,1,2,0,1,37,1,1,1,2 +71174,7,2,1,11,NA,3,3,1,11,141,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19272.757026,20113.595939,1,94,6,6,1.18,5,5,1,2,0,1,30,1,3,1,3 +71175,7,2,1,73,NA,2,2,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,16928.800715,17540.589716,1,90,9,9,3.97,2,2,0,0,1,1,73,1,4,1,5 +71176,7,2,1,62,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,7611.107768,7521.80097,2,97,4,4,1.47,1,1,0,0,1,1,62,1,4,3,NA +71177,7,1,2,4,NA,2,2,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10308.095307,0,2,90,14,14,3.06,5,5,1,2,0,1,42,1,4,1,5 +71178,7,2,2,4,NA,2,2,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10308.095307,10747.298537,2,90,3,3,0.46,5,5,3,0,0,2,22,1,2,5,NA +71179,7,1,2,32,NA,4,4,NA,NA,NA,2,NA,2,1,2,NA,5,1,3,1,2,2,1,2,2,NA,NA,NA,NA,27303.803575,0,1,96,7,7,2.23,3,3,1,0,0,1,29,2,5,1,5 +71180,7,2,1,57,NA,4,4,2,NA,NA,2,NA,2,2,2,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,12517.592486,12477.667198,1,96,10,10,1.8,7,7,1,1,0,1,57,2,1,1,3 +71181,7,2,1,19,NA,4,4,1,19,235,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,17606.165994,17558.40257,2,101,6,1,0.23,3,1,0,0,0,1,21,1,4,5,NA +71182,7,2,2,47,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,20084.755052,20500.648257,1,99,15,8,2.7,4,3,0,2,0,1,49,1,4,6,NA +71183,7,2,1,38,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,107347.639721,117795.71535,3,92,6,6,1.17,4,4,0,2,0,2,30,1,2,1,4 +71184,7,2,1,64,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,12579.986433,13271.133625,1,92,14,14,5,1,1,0,0,1,1,64,1,5,3,NA +71185,7,2,1,29,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,50647.682308,56120.478778,2,96,7,7,2.38,2,2,0,0,0,1,29,1,3,1,4 +71186,7,2,1,1,17,1,1,1,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12493.910388,13386.323284,2,98,6,6,1.25,4,4,1,0,1,1,46,1,2,6,NA +71187,7,2,2,63,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,3,NA,1,2,2,1,2,2,1,2,2,1,9609.522554,10094.833809,3,91,2,2,0.72,1,1,0,0,1,2,63,1,1,3,NA +71188,7,2,2,1,14,4,4,1,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7382.686927,7710.515686,2,100,5,5,0.94,4,4,2,0,0,2,33,1,4,6,NA +71189,7,2,1,51,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,144353.133634,150786.620773,1,94,15,15,5,3,3,0,1,0,2,43,1,5,1,5 +71190,7,2,2,12,NA,4,4,1,12,150,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12327.773112,12176.001893,2,95,1,1,0.18,4,4,2,1,0,2,38,1,2,5,NA +71191,7,2,2,15,NA,4,4,2,16,192,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11711.384457,11558.024533,2,95,8,8,1.61,6,6,1,3,0,2,48,1,3,5,NA +71192,7,2,1,1,23,2,2,2,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13002.944731,13931.716845,2,91,9,9,2.6,4,4,1,1,0,2,31,2,4,1,5 +71193,7,2,2,7,NA,3,3,2,7,85,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,19880.837381,19616.828143,1,101,6,6,0.87,6,6,2,2,0,2,23,1,4,6,NA +71194,7,2,2,61,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,11696.973403,11982.125462,2,101,4,4,0.85,4,4,1,0,1,2,61,1,4,3,NA +71195,7,2,2,36,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,16663.714937,17273.763859,2,93,15,15,5,3,3,1,0,0,1,41,2,5,1,5 +71196,7,2,2,18,NA,5,6,1,18,223,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,1,1,2,2,1,5668.184078,5913.178809,2,92,10,8,2.01,7,4,1,1,1,2,27,2,3,1,3 +71197,7,2,1,36,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15867.258653,16587.349339,3,91,5,5,1.07,4,4,0,2,0,2,36,1,5,1,4 +71198,7,2,2,11,NA,4,4,1,11,138,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11195.065587,11726.17681,2,96,7,7,1.33,6,6,0,3,1,1,74,1,1,1,NA +71199,7,2,2,49,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,21857.756498,23340.233721,2,97,7,7,1.92,3,3,0,1,0,1,57,1,4,1,4 +71200,7,2,1,51,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,151766.599459,151581.541662,3,91,15,15,5,1,1,0,0,0,1,51,1,5,5,NA +71201,7,2,2,57,NA,5,6,2,NA,NA,2,NA,2,2,6,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,17852.668137,18572.420627,3,91,6,6,1.77,2,2,0,0,0,2,57,2,1,1,1 +71202,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,65706.229298,75664.626338,1,101,7,7,2.64,2,2,0,0,2,1,80,1,1,1,3 +71203,7,2,2,63,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,10391.563983,10953.658293,3,90,14,14,4.96,2,2,0,0,2,1,68,1,3,1,5 +71204,7,2,2,27,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,46606.430863,46728.795934,2,102,14,14,3.25,5,5,2,0,0,1,27,1,5,1,5 +71205,7,2,2,12,NA,3,3,2,12,146,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,132630.209478,135755.422092,1,97,14,14,3.36,4,4,0,2,0,2,49,1,5,1,5 +71206,7,2,1,29,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,9938.495618,10209.075521,1,93,15,6,2.3,6,1,0,0,0,1,34,2,5,5,NA +71207,7,1,1,7,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10229.206765,0,1,96,14,14,2.96,5,5,0,3,0,1,46,NA,NA,1,5 +71208,7,2,1,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16905.961576,17805.493573,2,94,10,10,4.76,2,2,0,0,0,1,32,1,5,1,5 +71209,7,2,2,2,NA,1,1,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,10099.930724,10530.26456,2,96,6,6,0.87,6,6,1,3,0,1,46,2,1,1,1 +71210,7,2,1,0,0,3,3,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12909.068713,12685.898973,2,92,1,1,0.22,3,3,1,0,0,1,22,1,4,6,NA +71211,7,2,2,32,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,23038.68441,23821.989403,1,92,6,6,1.92,2,2,0,0,0,2,33,2,4,5,NA +71212,7,2,2,65,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,49568.196121,50175.172049,1,101,3,3,1.24,1,1,0,0,1,2,65,1,2,2,NA +71213,7,2,2,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,65897.669973,68436.247366,1,99,77,77,NA,3,3,0,0,0,1,42,1,4,6,NA +71214,7,2,2,11,NA,1,1,1,11,138,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,14414.529053,14932.182215,2,96,6,6,0.87,6,6,1,3,0,1,46,2,1,1,1 +71215,7,2,1,60,NA,3,3,2,NA,NA,2,NA,2,1,77,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,19328.482066,19868.466665,3,90,77,77,NA,1,1,0,0,1,1,60,2,5,5,NA +71216,7,2,1,46,NA,5,6,1,NA,NA,2,NA,2,2,5,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,20197.354438,20310.389983,2,96,8,8,3.4,2,2,0,0,0,1,46,2,4,1,4 +71217,7,2,1,28,NA,1,1,1,NA,NA,2,NA,2,7,5,NA,3,5,NA,2,2,2,2,2,2,1,2,2,1,47487.549895,48753.51505,1,102,7,7,1.89,3,3,0,0,0,1,53,2,1,1,1 +71218,7,2,1,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,87891.395784,92007.216086,2,99,15,15,5,1,1,0,0,0,1,33,1,5,5,NA +71219,7,2,1,60,NA,4,4,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,7915.816068,7977.736071,2,93,6,6,1.13,4,4,0,0,2,1,60,2,3,1,3 +71220,7,2,1,4,NA,4,4,1,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11920.911192,13144.753907,2,96,14,14,3.58,4,4,2,0,0,1,36,1,4,1,5 +71221,7,2,1,1,20,1,1,1,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13305.770449,13726.956753,3,92,5,5,0.68,6,6,3,0,0,2,19,1,4,NA,NA +71222,7,2,2,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,191547.714427,193526.009858,2,91,15,10,5,2,1,0,0,0,2,55,1,5,6,NA +71223,7,2,2,25,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,30275.274308,30320.430859,2,101,3,2,0.82,2,1,0,0,0,2,25,1,4,5,NA +71224,7,2,2,68,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,44636.780791,46200.180609,2,91,6,6,1.26,5,5,0,1,2,2,80,1,4,2,NA +71225,7,2,1,0,1,1,1,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6298.658963,6412.943673,2,94,77,77,NA,4,4,2,0,0,2,27,2,3,1,3 +71226,7,2,2,0,7,3,3,1,NA,7,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7272.728291,7471.815482,1,98,13,13,NA,5,5,2,1,0,1,31,1,3,1,3 +71227,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,13209.159597,13930.540942,2,95,6,6,1.08,4,4,1,1,0,1,39,1,4,1,4 +71228,7,2,2,49,NA,1,1,2,NA,NA,2,NA,2,1,7,NA,1,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,31273.071103,31436.157508,1,90,12,12,NA,5,5,0,1,0,2,49,2,1,1,NA +71229,7,2,1,48,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,4,1,NA,1,2,1,1,2,1,1,2,1,3,12066.381592,12177.318898,1,103,4,4,0.82,3,3,0,0,0,1,48,2,4,1,1 +71230,7,1,1,30,NA,1,1,NA,NA,NA,2,NA,2,2,4,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,34887.439952,0,2,94,4,4,0.72,4,4,1,1,0,1,30,2,1,1,3 +71231,7,2,1,52,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,17796.910402,18163.842064,1,103,2,2,0.63,1,1,0,0,0,1,52,1,3,4,NA +71232,7,2,2,55,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,32041.645327,32131.786508,1,94,2,2,0.73,1,1,0,0,0,2,55,1,3,3,NA +71233,7,2,1,6,NA,4,4,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12267.215138,12511.113657,2,102,2,2,0.36,4,4,1,2,0,2,36,1,3,5,NA +71234,7,2,2,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,154825.466557,157902.106304,1,91,7,7,2.92,2,2,0,0,1,2,47,1,5,5,NA +71235,7,2,1,9,NA,3,3,1,9,115,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,38712.032122,41122.784973,3,91,15,15,5,6,6,1,3,0,2,40,1,5,1,5 +71236,7,2,2,2,NA,4,4,2,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7338.212404,7527.624117,1,93,7,7,2.16,3,3,1,0,0,2,28,2,2,1,5 +71237,7,2,2,43,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,NA,NA,NA,NA,35469.911999,36868.395052,2,91,12,13,NA,3,1,0,0,1,1,52,1,4,3,NA +71238,7,2,2,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,20186.483875,20343.011032,1,99,13,13,NA,4,4,0,2,0,1,55,NA,NA,1,4 +71239,7,2,1,48,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,1,148090.195644,148420.972197,1,98,5,5,1.79,1,1,0,0,0,1,48,1,1,2,NA +71240,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,6512.729795,7017.196255,1,93,2,2,0.69,1,1,0,0,1,1,80,1,5,2,NA +71241,7,2,2,0,9,4,4,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4588.59937,4638.701468,2,95,10,10,3.82,3,3,1,0,0,1,25,1,4,1,4 +71242,7,2,1,48,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,19436.026093,19746.342802,2,97,9,9,1.45,7,7,1,2,2,2,45,1,3,5,NA +71243,7,2,1,17,NA,4,4,1,18,216,2,NA,2,1,5,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,19163.050164,19648.623212,2,102,14,14,4.48,3,3,0,2,0,1,41,1,3,4,NA +71244,7,2,2,64,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,118437.327505,122585.585802,1,90,15,15,5,1,1,0,0,1,2,64,1,5,3,NA +71245,7,2,1,62,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,7654.035132,8195.014907,3,90,9,9,2.6,4,4,0,0,1,1,62,2,4,1,5 +71246,7,2,1,14,NA,1,1,1,14,178,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23389.620035,24913.232835,1,91,14,14,3.9,4,4,0,1,0,1,41,1,2,1,4 +71247,7,2,1,13,NA,4,4,1,13,162,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,NA,NA,NA,1,2,2,1,14650.502937,14679.468181,2,102,14,14,3.25,5,5,1,1,0,2,32,1,4,1,3 +71248,7,2,1,4,NA,5,6,1,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10123.286306,10422.822776,1,100,15,15,5,3,3,1,0,0,2,28,2,5,1,5 +71249,7,2,1,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,17420.978407,17335.115714,2,97,3,3,0.82,2,2,0,0,0,1,24,1,3,5,NA +71250,7,2,1,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,1,6,NA,1,2,2,1,2,2,1,2,2,1,20486.987447,22351.0228,2,101,3,1,0,4,1,0,2,0,1,39,1,1,6,NA +71251,7,1,1,60,NA,4,4,NA,NA,NA,2,NA,2,1,99,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,12848.137255,0,1,101,15,15,5,2,2,0,0,2,1,60,2,5,1,NA +71252,7,2,2,41,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,4,1,2,2,2,2,1,2,2,1,2,2,2,25778.164795,25912.595732,2,90,2,2,0.25,5,5,0,1,0,2,41,2,4,1,NA +71253,7,1,2,47,NA,3,3,NA,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,125799.478905,0,1,100,15,15,4.63,5,5,0,0,0,1,51,1,5,1,3 +71254,7,2,2,50,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,10964.859884,13217.672684,3,90,77,77,NA,3,3,0,1,0,1,54,2,3,1,3 +71255,7,2,2,2,NA,3,3,1,2,35,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14950.402914,15888.087777,1,102,5,1,0.21,5,4,1,1,0,2,24,1,4,5,NA +71256,7,2,2,7,NA,4,4,2,7,94,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7428.928475,8424.338415,3,91,1,1,0.07,6,6,2,3,0,2,30,1,2,3,NA +71257,7,2,1,7,NA,4,4,1,7,90,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8714.559478,8887.823591,2,96,7,7,1.04,7,7,0,4,0,2,37,1,3,3,NA +71258,7,2,2,19,NA,2,2,2,19,232,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,15442.648697,17482.235263,3,90,10,8,4.3,2,1,0,0,0,1,33,1,3,6,NA +71259,7,2,2,23,NA,5,6,2,NA,NA,1,2,1,1,NA,NA,3,5,2,1,2,2,1,2,2,1,2,2,1,16844.740449,19586.242825,3,91,99,1,0.28,3,1,0,0,0,2,23,1,3,5,NA +71260,7,2,2,14,NA,1,1,1,14,171,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20347.899985,21072.708732,3,92,15,15,3.15,7,7,0,4,0,2,35,2,3,3,NA +71261,7,2,2,50,NA,2,2,1,NA,NA,2,NA,2,1,3,NA,1,5,NA,2,2,2,1,2,2,1,2,2,2,24004.6026,24129.784561,2,91,6,6,0.93,5,5,1,2,0,2,50,2,1,5,NA +71262,7,2,1,15,NA,3,3,2,15,181,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,61479.689958,61628.148021,2,100,15,15,4.5,6,6,0,4,0,1,45,1,5,1,5 +71263,7,2,1,47,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,22685.373982,23664.393091,1,98,1,1,0,4,4,0,3,0,1,47,1,5,3,NA +71264,7,2,2,20,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,30253.427014,30121.660039,2,90,15,15,4.2,5,5,1,0,0,2,50,NA,NA,6,NA +71265,7,2,2,15,NA,4,4,2,15,187,NA,NA,1,1,NA,10,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7825.55935,7975.961244,3,90,15,15,5,5,5,0,1,1,2,61,1,5,2,NA +71266,7,2,2,68,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,117131.838201,119452.245796,1,94,14,14,5,2,2,0,0,2,1,74,1,4,1,4 +71267,7,2,2,41,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,20001.392001,20614.835005,1,100,8,8,2.62,3,3,0,1,0,1,41,2,5,1,5 +71268,7,2,1,5,NA,3,3,2,5,65,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,26500.001625,29898.299515,2,91,7,7,1.29,6,6,2,2,0,1,33,2,3,6,NA +71269,7,1,1,33,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,12556.207754,0,2,99,13,13,NA,6,6,2,1,0,2,31,1,4,6,NA +71270,7,2,2,15,NA,2,2,2,15,185,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17381.663677,18792.085501,1,90,7,7,2.1,3,3,0,2,0,2,37,1,3,5,NA +71271,7,2,1,57,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,31872.125984,31404.451666,2,96,6,6,1.12,4,4,0,1,0,1,57,2,1,1,4 +71272,7,2,1,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,77440.662578,92128.223811,2,93,8,8,3.3,2,2,0,0,0,2,26,1,4,1,5 +71273,7,2,1,11,NA,1,1,1,11,135,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,13933.628196,2,98,6,6,1.9,2,2,0,1,0,2,34,1,4,3,NA +71274,7,2,2,28,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,5,6,2,1,2,2,1,2,2,1,2,2,1,49473.624024,50829.110777,3,91,14,14,5,2,2,0,0,0,1,32,1,4,6,NA +71275,7,2,2,12,NA,4,4,1,12,148,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,20473.346601,20523.769333,2,102,15,15,3.82,5,5,1,2,0,1,34,1,3,1,4 +71276,7,2,2,0,5,3,3,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17070.097848,16606.436082,1,101,7,7,1.79,4,4,1,0,0,2,30,1,4,1,4 +71277,7,2,2,4,NA,1,1,1,4,52,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15457.736897,17065.756351,2,98,2,2,0.27,4,4,2,1,0,2,32,2,2,5,NA +71278,7,2,1,69,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,7101.739553,7379.502561,1,96,99,99,NA,1,1,0,0,1,1,69,1,2,1,NA +71279,7,2,1,40,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,27356.080541,27285.808403,1,98,6,6,1.11,5,5,1,2,0,2,32,1,2,1,2 +71280,7,2,2,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,95778.224595,101717.815372,1,98,8,8,2.46,3,3,1,0,0,1,29,1,4,6,NA +71281,7,2,1,2,NA,4,4,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7667.509064,7761.032365,2,91,2,2,0.41,3,3,1,1,0,2,30,1,3,5,NA +71282,7,2,2,25,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,12426.473257,13266.565637,2,92,14,7,3.4,2,1,0,0,0,1,26,NA,NA,5,NA +71283,7,2,2,0,1,2,2,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,8168.833251,8141.91987,2,91,5,5,1.05,3,3,2,0,0,2,26,2,3,4,NA +71284,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,10049.7347,10691.470743,2,90,2,2,0.67,2,2,0,0,1,2,64,1,5,5,NA +71285,7,2,1,10,NA,1,1,1,10,125,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13927.458372,14258.502552,2,98,5,5,1.07,4,4,0,1,0,1,52,2,1,1,3 +71286,7,2,2,56,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,22969.116046,23497.145655,2,93,6,6,2.69,1,1,0,0,0,2,56,2,4,3,NA +71287,7,1,2,3,NA,3,3,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,26966.264969,0,1,98,4,4,0.66,4,4,2,0,0,2,22,1,4,6,NA +71288,7,2,1,68,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,140586.349952,144513.945685,2,91,15,15,5,2,2,0,0,2,1,68,1,5,1,5 +71289,7,2,2,32,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,27375.405353,27906.494528,1,94,5,5,0.95,4,4,1,2,0,2,32,1,4,3,NA +71290,7,2,2,10,NA,2,2,2,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12737.089292,13281.887291,1,94,14,14,3.4,5,5,0,3,0,2,41,1,4,1,4 +71291,7,2,1,50,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,4,NA,1,2,2,1,2,2,1,2,2,1,182279.501656,183739.903765,1,100,6,6,1.98,2,2,0,0,0,1,50,1,5,4,NA +71292,7,2,2,36,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,28747.860416,30448.398533,1,99,10,10,3.13,4,4,0,2,0,1,35,1,4,1,5 +71293,7,2,1,3,NA,2,2,2,3,41,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,13353.801759,13870.061432,1,93,6,6,0.99,5,5,1,0,0,1,28,2,2,5,NA +71294,7,2,1,16,NA,4,4,1,16,201,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,19163.050164,19648.623212,2,102,14,14,4.48,3,3,0,2,0,1,41,1,3,4,NA +71295,7,2,1,25,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,110218.970616,129453.540645,1,93,7,7,3.21,1,1,0,0,0,1,25,1,4,5,NA +71296,7,2,2,32,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,27367.658704,28808.672115,1,91,4,4,0.81,4,4,1,1,0,1,32,1,4,6,NA +71297,7,2,1,19,NA,4,4,1,19,230,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12147.046136,12703.852077,2,100,4,4,0.91,3,3,0,0,0,2,49,1,2,1,2 +71298,7,2,2,7,NA,5,6,2,7,90,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10699.45895,11403.206057,1,97,14,14,2.29,7,7,1,2,2,1,40,2,1,1,1 +71299,7,2,1,2,NA,4,4,1,2,26,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10232.679671,11283.202594,2,101,4,4,1.22,2,2,1,0,0,2,25,1,4,5,NA +71300,7,2,2,35,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,12381.032047,12845.540957,1,102,15,15,4.59,4,4,1,1,0,1,35,1,5,1,5 +71301,7,2,2,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,44417.237618,45651.906489,1,102,1,1,0.24,3,2,0,0,1,2,50,1,3,3,NA +71302,7,2,2,56,NA,4,4,1,NA,NA,2,NA,2,2,6,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,18146.994087,18265.36374,2,93,4,4,1.29,2,2,0,1,0,2,56,2,3,4,NA +71303,7,2,2,4,NA,4,4,1,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9707.610673,10138.677842,2,93,5,5,0.92,4,4,1,1,0,1,27,2,2,5,NA +71304,7,2,1,1,19,1,1,2,NA,19,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,11972.170914,12351.142913,1,97,8,8,1.45,6,6,2,2,0,2,36,2,2,1,1 +71305,7,2,1,7,NA,1,1,1,7,89,NA,NA,1,1,NA,0,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,14820.807433,15055.338709,2,102,7,7,1.89,3,3,0,1,0,1,41,2,2,6,NA +71306,7,2,2,52,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,12441.719186,12605.354834,2,100,15,15,5,3,3,0,1,0,1,58,2,5,1,5 +71307,7,2,1,58,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,132969.642582,132807.505002,2,91,5,5,1.39,2,2,0,0,0,1,58,1,4,3,NA +71308,7,2,2,69,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,8493.33267,8821.144988,3,91,7,7,1.33,6,6,0,0,2,2,51,2,5,1,5 +71309,7,2,2,59,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,NA,25964.813959,27606.791932,2,101,NA,77,NA,2,1,0,0,0,1,55,1,2,3,NA +71310,7,2,2,67,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,11879.290971,12409.688606,2,96,6,6,2.04,2,2,0,0,1,2,67,1,4,2,NA +71311,7,2,2,80,NA,2,2,1,NA,NA,2,NA,2,1,9,NA,2,1,NA,2,2,2,1,2,2,2,2,2,NA,21810.940874,23452.513609,2,93,8,8,2.17,4,4,0,0,3,1,80,2,2,1,2 +71312,7,2,2,52,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15521.115746,15214.43631,2,100,8,8,2.67,3,3,0,0,1,1,61,1,3,1,4 +71313,7,2,1,52,NA,4,4,2,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,19374.410926,19463.695548,1,96,77,77,NA,4,4,0,0,0,1,52,2,5,1,5 +71314,7,2,1,51,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,30839.213846,30386.695922,1,92,4,4,0.74,4,4,1,1,0,1,51,2,1,1,1 +71315,7,2,1,44,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,105141.812429,112387.793787,1,98,6,6,1.11,5,5,0,2,1,2,37,1,1,1,1 +71316,7,2,1,57,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,100418.893093,106630.269812,2,100,10,10,3.13,4,4,0,0,1,2,53,1,2,1,2 +71317,7,2,1,0,1,4,4,2,NA,2,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5848.582979,6109.207769,2,97,4,4,0.57,5,5,1,3,0,2,33,1,3,5,NA +71318,7,2,2,2,NA,4,4,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6984.912286,7371.360851,2,93,10,10,2.26,6,6,2,0,0,1,34,1,4,1,4 +71319,7,2,2,2,NA,3,3,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,33502.281129,33942.512387,1,91,15,15,5,4,4,1,1,0,1,29,1,4,6,NA +71320,7,1,1,2,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,10273.602479,0,1,92,12,12,NA,4,4,1,1,0,1,33,2,4,1,4 +71321,7,2,2,54,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,24713.281483,24036.937705,1,92,14,14,4.96,2,2,0,0,0,1,25,1,4,5,NA +71322,7,2,2,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,20949.647122,21006.826162,2,91,4,4,0.84,3,3,1,0,0,2,21,1,4,1,2 +71323,7,2,1,9,NA,5,6,2,9,117,NA,NA,2,2,1,3,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,5855.595238,6345.238788,1,91,14,14,4.32,3,3,0,2,0,2,37,2,5,1,NA +71324,7,2,1,35,NA,2,2,2,NA,NA,2,NA,2,2,5,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,38923.140296,40970.697433,1,93,9,9,2.46,4,4,0,2,0,1,35,2,1,1,1 +71325,7,2,2,3,NA,3,3,2,3,42,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24600.429016,26143.360672,1,91,6,6,1.07,6,6,3,1,0,2,27,1,4,6,NA +71326,7,2,1,6,NA,4,4,2,7,84,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8655.162127,8668.652185,2,95,6,6,0.86,6,6,0,4,0,2,32,1,4,6,NA +71327,7,2,1,3,NA,1,1,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,16775.083123,16797.750213,2,98,2,2,0.27,4,4,2,0,0,2,20,2,2,6,NA +71328,7,2,2,54,NA,2,2,1,NA,NA,2,NA,2,1,4,NA,4,3,NA,2,2,2,1,2,2,2,2,2,2,26226.50904,26977.257389,2,93,7,7,1.74,4,4,1,0,1,2,24,NA,NA,4,NA +71329,7,2,1,8,NA,3,3,2,8,100,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,63513.013195,68899.034203,2,101,7,7,1.57,4,4,0,2,0,2,28,1,3,6,NA +71330,7,2,1,39,NA,5,6,1,NA,NA,2,NA,2,1,3,NA,3,1,NA,1,2,1,1,2,1,1,2,1,3,9585.652672,9712.791327,2,92,10,6,1.12,7,4,1,1,1,2,27,2,3,1,3 +71331,7,2,2,0,10,1,1,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,9767.083234,9561.595244,3,92,10,10,2.82,4,4,2,0,0,1,26,2,3,1,3 +71332,7,2,2,10,NA,1,1,1,10,129,NA,NA,1,1,NA,4,NA,NA,NA,2,1,1,1,2,1,1,2,2,1,20495.125801,20710.596821,3,92,6,6,0.96,5,5,2,1,0,2,26,2,1,1,1 +71333,7,2,2,30,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,52521.960331,51104.807504,1,92,10,10,4.63,2,2,0,0,0,1,35,2,5,1,4 +71334,7,2,1,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,15682.233511,17550.107712,1,98,6,6,1.9,2,2,0,0,2,1,80,1,1,1,2 +71335,7,1,1,61,NA,2,2,NA,NA,NA,2,NA,2,1,6,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,6449.12882,0,2,93,77,77,NA,3,3,0,0,3,1,61,2,1,1,1 +71336,7,2,1,45,NA,5,6,1,NA,NA,2,NA,2,1,8,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,14695.245859,15160.543049,2,92,15,15,5,3,3,0,1,0,1,45,2,4,1,4 +71337,7,2,1,7,NA,3,3,2,7,87,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,64268.100156,67147.933534,1,98,15,15,4.07,5,5,0,3,0,1,38,1,2,1,4 +71338,7,2,2,8,NA,4,4,1,8,106,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11195.065587,11638.702248,2,96,2,2,0.43,3,3,0,2,0,2,50,1,2,5,NA +71339,7,2,2,69,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,39030.893313,39804.103933,1,91,5,5,1.2,3,3,0,0,1,1,58,1,2,1,2 +71340,7,2,2,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,3,2,1,2,2,1,2,2,1,2,2,1,24919.497762,26574.647337,1,95,5,5,1.1,3,3,0,0,1,2,63,1,4,1,5 +71341,7,2,1,58,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,14897.510053,14959.811939,1,101,15,15,5,3,3,0,0,1,1,58,2,4,1,5 +71342,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,46965.818538,52174.805072,1,101,7,7,2.64,2,2,0,0,2,1,80,1,1,1,3 +71343,7,2,2,29,NA,3,3,2,NA,NA,2,NA,2,1,6,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,105134.853452,122679.602456,2,99,14,14,5,1,1,0,0,0,2,29,2,5,5,NA +71344,7,2,1,65,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,28953.774534,29107.622897,1,98,4,4,1,3,3,0,1,1,1,65,1,2,1,NA +71345,7,1,1,60,NA,1,1,NA,NA,NA,2,NA,2,2,77,NA,2,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,12845.115724,0,1,100,4,4,0.78,4,4,0,0,1,1,33,2,1,1,1 +71346,7,2,2,74,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,NA,44856.466004,46390.551109,2,94,12,9,5,2,1,0,0,1,2,74,1,5,3,NA +71347,7,2,1,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,32461.799549,32422.216998,1,95,6,2,0.81,2,1,0,0,0,1,53,1,2,6,NA +71348,7,2,1,56,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,25118.469449,26051.81361,2,98,6,6,1.4,3,3,0,1,0,1,56,1,2,1,2 +71349,7,1,1,78,NA,5,6,NA,NA,NA,2,NA,2,1,6,NA,1,2,NA,1,2,1,1,2,2,NA,NA,NA,NA,9748.579573,0,3,90,8,8,1.85,5,5,0,0,1,2,25,1,5,5,NA +71350,7,2,2,6,NA,4,4,2,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9502.76472,9802.874785,1,96,3,3,0.93,2,2,0,1,0,2,40,1,5,5,NA +71351,7,2,2,52,NA,5,6,1,NA,NA,2,NA,2,1,8,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,11876.875273,12162.823626,2,103,15,15,5,3,3,0,0,0,2,52,2,4,1,5 +71352,7,2,2,10,NA,5,6,2,10,130,NA,NA,2,1,3,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9227.090107,9685.088635,2,100,5,5,0.89,4,4,0,1,0,2,40,2,3,1,3 +71353,7,2,2,76,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,36067.495928,37024.300659,1,95,3,3,1.29,1,1,0,0,1,2,76,1,3,2,NA +71354,7,2,1,50,NA,1,1,1,NA,NA,1,1,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,37557.946192,37666.079092,2,102,7,7,3.49,4,1,0,0,1,1,63,NA,NA,5,NA +71355,7,2,2,20,NA,5,6,1,NA,NA,2,NA,2,1,4,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,16026.446928,16774.880842,1,94,14,14,2.78,6,5,0,2,1,1,61,1,4,1,5 +71356,7,2,1,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,129867.826953,131085.811251,1,101,9,9,2.6,4,4,0,1,2,2,63,1,4,1,4 +71357,7,2,1,29,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15408.94893,15333.002917,1,99,7,7,2.38,2,2,0,0,0,2,27,1,5,1,5 +71358,7,2,2,1,22,2,2,1,NA,23,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,7815.664341,8308.38945,2,103,5,5,0.65,6,6,1,0,1,2,61,2,1,2,NA +71359,7,2,1,41,NA,2,2,2,NA,NA,2,NA,2,7,77,NA,1,1,NA,2,2,2,2,2,2,NA,NA,NA,NA,31640.296506,33247.715312,2,94,1,1,0.01,7,7,1,3,0,1,41,2,1,1,1 +71360,7,2,2,25,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,2,1,2,2,2,2,2,2,2,NA,NA,NA,NA,39426.061521,43773.817687,2,94,4,4,0.81,4,4,2,0,0,1,26,2,2,1,2 +71361,7,2,2,55,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,17852.668137,17947.072019,3,91,7,7,2.58,2,2,0,0,0,2,55,2,5,3,NA +71362,7,2,1,23,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,25815.880139,26503.609729,2,101,2,1,0.05,2,1,0,0,0,1,24,1,4,5,NA +71363,7,2,2,30,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,5,2,1,2,2,NA,NA,NA,1,2,2,1,16165.066826,16696.869727,2,96,NA,NA,NA,4,4,0,0,1,1,67,2,3,1,2 +71364,7,2,1,13,NA,4,4,1,13,157,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11666.009872,12200.765691,2,100,14,4,0.43,7,7,1,3,1,2,62,1,3,5,NA +71365,7,2,1,22,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,21763.209029,22385.504537,2,92,4,4,1.38,4,1,0,0,0,1,21,1,4,5,NA +71366,7,2,2,20,NA,3,3,1,NA,NA,2,NA,2,2,2,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,114492.825466,119073.624167,2,101,3,1,0.06,3,1,0,0,0,2,20,2,4,5,NA +71367,7,2,1,7,NA,2,2,1,7,91,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13533.281742,13475.903543,1,100,10,10,2.91,4,4,1,1,0,1,32,1,5,1,5 +71368,7,2,1,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,82363.088109,88909.58429,1,90,14,14,3.93,3,3,1,0,0,1,35,1,2,1,5 +71369,7,2,2,3,NA,5,6,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3842.794137,4026.588867,1,93,6,6,1.15,5,5,1,0,2,2,70,NA,NA,1,NA +71370,7,2,1,9,NA,3,3,2,9,114,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,21087.869274,22686.932895,1,101,1,1,0.1,4,4,1,1,0,2,52,1,4,3,NA +71371,7,2,2,63,NA,5,7,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,28759.144922,29766.431764,3,90,7,7,2.51,2,2,0,0,2,1,62,2,3,1,4 +71372,7,2,1,37,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,1,6,NA,2,2,2,2,2,2,2,2,2,2,51543.062078,51154.050295,3,92,3,3,0.51,5,5,1,2,0,2,34,2,1,6,NA +71373,7,2,1,1,15,1,1,1,NA,15,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,9925.934568,9762.398236,2,92,5,5,1.08,3,3,1,0,0,2,30,2,3,1,2 +71374,7,2,2,52,NA,5,6,1,NA,NA,2,NA,2,2,5,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,12059.495297,12169.751033,1,102,15,15,3.82,5,5,1,1,0,1,29,1,4,1,4 +71375,7,2,1,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,70819.746399,73506.573727,1,98,6,6,1.98,2,2,0,0,2,2,70,1,4,1,5 +71376,7,2,1,3,NA,1,1,1,3,39,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18754.85406,20094.472571,2,102,14,14,3.8,4,4,2,0,0,2,41,2,4,1,5 +71377,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,27199.141352,28737.286192,1,98,77,77,NA,2,2,0,0,2,1,80,1,5,1,5 +71378,7,2,1,5,NA,3,3,2,6,72,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,39061.897809,44649.899401,1,95,15,15,3.62,7,7,2,4,0,1,59,1,5,1,2 +71379,7,2,2,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,27351.473517,27893.312251,2,94,2,2,0.83,1,1,0,0,1,2,60,1,5,5,NA +71380,7,1,2,80,NA,4,4,NA,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,15914.916287,0,2,99,5,5,1.59,2,2,0,0,1,2,80,1,3,2,NA +71381,7,2,1,48,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,1,2,2,3,12765.378725,13670.003215,2,90,14,14,4.32,3,3,0,1,0,1,48,2,4,1,5 +71382,7,2,2,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,54095.581484,61529.339683,2,94,9,9,3.97,2,2,0,0,2,1,80,1,5,1,5 +71383,7,2,2,3,NA,5,6,2,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,2,NA,NA,NA,NA,5060.292252,5371.954509,1,90,8,8,1.43,7,7,2,0,0,1,23,2,4,1,3 +71384,7,2,2,7,NA,1,1,1,7,91,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15962.145468,16285.000268,3,92,7,7,1.49,5,5,0,2,1,2,62,1,4,2,NA +71385,7,2,2,6,NA,3,3,2,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,28723.999699,28637.704803,1,97,6,6,1.03,6,6,2,2,0,2,38,1,5,1,4 +71386,7,2,2,1,23,1,1,1,NA,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14326.094268,14518.763259,3,92,14,14,3.47,4,4,1,1,0,1,44,1,3,1,5 +71387,7,2,2,3,NA,4,4,1,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10437.988787,11378.89676,2,97,NA,99,NA,7,6,2,1,1,2,56,1,3,5,NA +71388,7,2,1,42,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,49060.272708,48340.388834,1,92,7,7,1.48,5,5,0,1,0,1,42,1,5,1,4 +71389,7,2,2,1,21,2,2,1,NA,22,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11443.671453,12634.120381,1,100,15,15,4.34,4,4,2,0,0,2,35,1,5,1,5 +71390,7,2,1,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,30626.145273,33111.125584,2,103,7,7,2.64,2,2,0,0,1,1,54,1,5,5,NA +71391,7,2,1,11,NA,3,3,1,11,137,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,38712.032122,41122.784973,3,91,15,15,5,6,6,1,3,0,2,40,1,5,1,5 +71392,7,2,2,43,NA,2,2,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,35485.230645,37305.51435,1,94,7,7,1.74,4,4,0,2,0,1,44,1,5,1,5 +71393,7,2,1,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,108410.783716,113500.095101,1,91,15,7,3.58,3,1,0,0,0,1,27,1,3,6,NA +71394,7,1,1,78,NA,5,6,NA,NA,NA,2,NA,2,1,5,NA,3,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,12320.168636,0,2,92,4,4,1.09,2,2,0,0,2,1,78,2,3,1,NA +71395,7,2,1,30,NA,2,2,2,NA,NA,2,NA,2,2,6,NA,1,4,NA,2,2,2,2,2,2,2,2,2,2,27605.196104,27396.850962,2,99,12,7,3.31,5,1,0,1,0,1,30,2,2,4,NA +71396,7,2,2,3,NA,4,4,2,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10936.097083,12221.030117,2,95,2,2,0.42,3,3,2,0,0,1,25,1,3,5,NA +71397,7,2,1,24,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,4,NA,1,2,2,1,2,2,1,2,2,1,23484.626749,26526.417464,2,96,14,14,5,1,1,0,0,0,1,24,1,4,4,NA +71398,7,2,1,13,NA,4,4,2,13,167,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11788.330261,12582.159913,2,95,6,6,0.97,6,6,2,2,0,1,37,1,3,1,4 +71399,7,2,1,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,103893.364631,104494.238956,2,95,9,9,2.22,5,5,1,0,0,1,55,1,4,1,5 +71400,7,1,1,18,NA,5,6,NA,NA,NA,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,7669.677276,0,2,95,3,3,0.99,2,2,0,0,0,1,18,1,3,NA,NA +71401,7,2,2,54,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,31785.728924,32871.467684,1,101,4,4,1.22,2,2,0,0,1,1,63,1,2,1,2 +71402,7,2,1,0,3,3,3,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21055.122405,20691.125102,1,93,15,15,4.47,4,4,2,0,0,1,31,1,5,1,5 +71403,7,2,2,30,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,31335.13799,31552.004994,1,95,5,5,0.92,5,5,1,2,0,2,30,1,4,1,4 +71404,7,2,1,18,NA,4,4,1,18,219,2,NA,1,1,NA,66,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13276.485807,15665.287637,2,100,99,99,NA,3,3,0,0,0,1,46,1,9,3,NA +71405,7,2,1,63,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,6910.118936,7233.371983,2,95,4,4,1.77,1,1,0,0,1,1,63,1,2,3,NA +71406,7,2,2,8,NA,4,4,1,8,106,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8558.832746,8898.001102,2,96,3,3,0.38,5,5,1,2,0,2,30,1,3,5,NA +71407,7,2,2,49,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,20418.635225,20918.390367,2,95,14,14,5,2,2,0,0,0,1,44,1,3,1,4 +71408,7,2,2,19,NA,2,2,2,19,238,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,16995.403684,17639.03984,2,97,2,2,0.27,3,3,2,0,0,2,19,1,3,NA,NA +71409,7,2,1,41,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,116464.874823,116165.700541,2,94,14,14,2.83,6,6,0,4,0,2,38,1,2,1,2 +71410,7,2,2,15,NA,3,3,2,15,182,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,103007.696238,104941.393061,1,101,15,15,5,4,4,0,2,0,1,43,1,4,1,5 +71411,7,2,1,8,NA,4,4,1,8,103,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,NA,9261.557132,9782.87535,2,100,1,1,0.08,5,5,1,2,0,2,19,1,3,NA,NA +71412,7,2,2,35,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,35269.9609,35902.08788,2,96,14,14,3.58,4,4,2,0,0,1,36,1,4,1,5 +71413,7,2,1,40,NA,2,2,1,NA,NA,2,NA,2,2,77,NA,4,6,NA,2,2,2,1,2,2,1,2,2,2,28726.575428,28964.259011,2,100,3,3,0.76,3,3,1,0,0,2,31,2,1,6,NA +71414,7,2,1,17,NA,1,1,1,17,213,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,24228.858782,24355.020132,2,102,77,77,NA,4,4,0,1,0,1,47,1,2,1,3 +71415,7,2,1,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,78529.577822,86568.047729,1,98,7,7,1,7,7,2,2,0,2,34,1,4,3,NA +71416,7,2,2,39,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,16411.593279,16445.104385,2,103,15,15,5,2,2,0,0,0,2,39,2,5,5,NA +71417,7,2,1,64,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,12279.19827,12375.250021,2,102,6,6,2.04,2,2,0,0,1,1,64,1,3,1,4 +71418,7,2,2,7,NA,5,6,2,7,90,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10699.45895,11230.540406,1,97,15,15,4.77,4,4,1,1,0,2,40,1,5,1,5 +71419,7,2,2,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,NA,NA,NA,NA,74517.751389,77393.175383,2,94,15,15,4.59,4,4,1,1,0,2,37,1,5,1,5 +71420,7,2,2,54,NA,1,1,2,NA,NA,2,NA,2,1,6,NA,2,1,NA,2,2,2,1,2,2,2,2,2,2,23953.14388,24078.057488,1,90,12,12,NA,4,4,0,0,0,1,54,2,4,1,2 +71421,7,2,1,3,NA,3,3,1,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,32621.433667,37288.094364,2,101,3,3,0.3,7,7,1,2,0,2,50,1,2,4,NA +71422,7,2,2,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,77778.949308,80709.488833,1,101,14,14,4.21,4,4,1,1,0,2,37,1,5,1,5 +71423,7,2,1,27,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,12540.575493,12881.998163,2,94,77,77,NA,6,6,2,0,0,2,18,1,3,NA,NA +71424,7,2,1,19,NA,3,3,1,19,236,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,110478.18082,109645.964567,2,101,1,1,0.09,2,1,0,0,0,1,19,1,4,NA,NA +71425,7,2,2,13,NA,3,3,2,13,167,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,35589.982806,42898.287648,1,101,6,6,1.31,3,3,0,2,0,1,43,1,3,4,NA +71426,7,2,1,3,NA,4,4,1,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9016.053035,9035.668246,2,100,3,3,0.31,7,7,3,2,0,2,28,1,3,1,3 +71427,7,2,2,39,NA,5,6,1,NA,NA,2,NA,2,2,1,NA,5,1,2,1,2,1,1,2,1,NA,NA,NA,NA,15951.963269,17051.924396,3,91,14,14,3.58,4,4,1,1,0,1,39,2,5,1,5 +71428,7,2,1,35,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,99741.609501,104268.100538,1,94,6,6,1.57,3,3,0,1,0,2,28,1,4,1,4 +71429,7,2,2,9,NA,4,4,2,9,111,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8579.490652,8729.153641,2,97,3,3,0.4,6,6,2,3,0,2,25,1,2,5,NA +71430,7,2,2,65,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,1,10468.473646,11728.156896,2,90,7,7,1.61,4,4,1,1,1,2,65,1,3,2,NA +71431,7,2,2,2,NA,4,4,1,2,32,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7382.686927,8048.181894,2,100,3,3,0.27,7,7,2,1,0,2,41,1,2,5,NA +71432,7,2,2,13,NA,1,1,1,13,158,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,19329.26314,19883.967368,1,103,14,14,2.96,5,5,1,2,0,1,34,1,4,1,5 +71433,7,2,1,33,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,19020.83925,19736.003798,2,99,6,6,1.13,4,4,1,1,0,1,33,1,3,6,NA +71434,7,2,1,17,NA,5,6,2,17,214,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9202.202094,9789.088627,1,90,15,15,5,5,5,0,2,0,1,47,2,5,1,5 +71435,7,2,1,4,NA,5,6,2,4,49,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6564.381324,7362.671865,2,90,77,77,NA,4,3,1,0,0,2,30,2,2,5,NA +71436,7,2,2,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,11355.3308,11862.334174,1,96,9,9,4.13,2,2,0,0,1,2,55,1,4,5,NA +71437,7,1,2,3,NA,4,4,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,9707.610673,0,2,93,1,1,0.2,2,2,1,0,0,2,33,2,3,5,NA +71438,7,2,2,70,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,13549.492282,14020.967918,2,93,12,12,NA,4,4,0,0,2,1,72,1,2,1,4 +71439,7,2,2,3,NA,3,3,2,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,64475.568487,69419.338511,1,91,14,14,2.44,7,7,2,4,0,1,33,1,5,1,5 +71440,7,2,1,10,NA,1,1,1,10,124,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12665.770043,13116.669484,1,100,9,9,2.02,6,6,0,3,1,2,39,1,4,1,5 +71441,7,2,2,12,NA,2,2,2,12,153,NA,NA,1,1,NA,7,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,14437.97544,16344.869867,2,90,3,3,0.38,5,5,0,4,0,2,33,2,2,5,NA +71442,7,2,1,1,12,3,3,1,NA,12,NA,NA,2,2,1,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,21380.815125,22997.240081,1,103,15,15,5,3,3,1,0,0,1,26,2,5,1,5 +71443,7,2,1,7,NA,1,1,2,7,94,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,12477.812875,12553.43469,2,94,4,4,0.63,6,6,1,2,0,2,36,2,3,1,1 +71444,7,2,2,0,3,5,7,2,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6399.50062,6586.636465,1,101,2,2,0.33,4,4,2,1,0,2,26,1,4,5,NA +71445,7,2,1,2,NA,2,2,1,2,30,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,12005.116852,12547.788444,3,92,6,6,0.86,7,7,1,4,0,2,36,2,1,1,1 +71446,7,2,2,11,NA,1,1,1,11,135,NA,NA,2,2,3,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,16986.005478,17733.622754,2,102,4,4,0.57,5,5,0,3,0,1,41,2,1,1,2 +71447,7,2,2,1,19,4,4,2,NA,20,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7348.24433,8211.623817,2,95,2,2,0.33,2,2,1,0,0,2,21,1,1,5,NA +71448,7,2,2,6,NA,4,4,1,7,84,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9453.111053,10094.338553,1,100,6,6,1.39,4,4,0,3,0,2,29,1,4,5,NA +71449,7,2,2,40,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,3,2,1,2,2,1,2,2,1,2,2,1,128187.954928,128313.829811,1,99,12,7,3.81,2,1,0,0,0,2,40,1,4,3,NA +71450,7,2,2,34,NA,3,3,1,NA,NA,2,NA,2,2,2,NA,4,1,2,2,2,2,2,2,2,NA,NA,NA,NA,66218.994273,67969.524045,2,93,7,7,1.52,4,4,1,1,0,1,44,2,4,1,NA +71451,7,2,2,51,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,24217.957803,24014.890408,2,94,4,4,1.56,2,1,0,0,0,2,51,1,2,6,NA +71452,7,2,1,36,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,96897.480702,107123.014093,2,91,15,15,5,4,4,2,0,0,2,33,1,5,1,5 +71453,7,2,1,14,NA,5,6,1,14,171,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11657.164593,12175.606972,1,92,9,9,2.88,3,3,0,2,0,1,50,2,5,3,NA +71454,7,2,2,8,NA,3,3,1,8,106,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,18417.272091,18465.044105,1,94,5,5,1.04,4,4,0,2,0,2,29,1,3,1,3 +71455,7,2,1,70,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,NA,13838.08267,16302.45001,1,94,3,3,1.16,1,1,0,0,1,1,70,1,3,5,NA +71456,7,2,1,21,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,59473.789098,72577.011806,3,92,4,1,0.27,5,1,0,0,0,2,57,1,4,1,2 +71457,7,2,2,53,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,23325.73926,22687.369215,1,101,2,2,0.68,1,1,0,0,0,2,53,1,4,2,NA +71458,7,2,1,5,NA,5,6,1,5,66,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,1,NA,NA,NA,NA,8838.066777,9670.229845,3,92,77,77,NA,7,7,2,4,1,1,62,NA,NA,1,NA +71459,7,2,2,14,NA,3,3,2,14,176,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,93307.64449,95903.481223,2,94,15,15,5,3,3,0,1,0,1,49,1,3,1,1 +71460,7,2,2,47,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,19075.861607,19022.257361,1,96,15,15,5,4,4,1,1,0,1,50,1,3,1,4 +71461,7,2,1,7,NA,4,4,1,7,88,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13423.881856,13521.226684,2,101,8,8,2.43,3,3,0,1,0,1,35,1,4,6,NA +71462,7,2,1,7,NA,4,4,2,7,91,NA,NA,2,1,1,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10424.657432,11237.830079,1,93,6,6,1.35,3,3,0,1,0,1,32,2,4,1,4 +71463,7,2,1,0,4,1,1,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6298.658963,6651.880632,2,94,15,15,3.33,6,6,1,2,0,2,20,1,3,1,NA +71464,7,2,2,3,NA,1,1,2,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13366.393396,13791.68675,2,94,5,5,1.07,4,4,2,0,0,1,37,2,1,1,1 +71465,7,2,2,26,NA,4,4,2,NA,NA,2,NA,2,2,3,NA,5,4,2,1,2,2,1,2,2,1,2,2,1,18070.666316,19871.407536,1,99,6,6,2.75,1,1,0,0,0,2,26,2,5,4,NA +71466,7,2,1,2,NA,2,2,1,2,33,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,11727.291842,11865.933582,2,91,2,2,0.19,5,5,3,0,0,1,24,2,1,1,3 +71467,7,2,1,48,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,141134.499671,148176.982862,2,101,8,8,2.81,3,3,0,2,0,1,48,1,3,3,NA +71468,7,2,2,50,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,17191.889135,17295.278265,2,93,2,2,0.75,1,1,0,0,0,2,50,1,3,3,NA +71469,7,2,1,12,NA,4,4,2,12,155,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9313.795042,9381.627752,1,96,77,77,NA,7,7,1,3,0,1,56,1,3,1,4 +71470,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,52209.836905,58077.47757,1,98,8,8,3.48,2,2,0,0,2,1,80,1,3,1,2 +71471,7,2,1,10,NA,3,3,2,10,123,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,44777.275016,47565.734765,1,91,7,7,1.88,4,4,1,2,0,2,43,1,5,4,NA +71472,7,2,1,32,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,16058.989596,15830.0816,2,97,3,3,0.33,6,6,2,0,0,2,32,1,2,1,3 +71473,7,2,2,38,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,34007.843648,34076.908664,2,98,10,10,4.76,2,2,0,0,0,1,42,1,2,6,NA +71474,7,2,1,25,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,18509.151292,19027.688747,2,95,2,2,0.42,3,3,2,0,0,1,25,1,3,5,NA +71475,7,2,2,12,NA,3,3,1,12,145,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,39616.634313,47751.80094,2,101,3,3,0.59,4,3,0,2,0,1,39,1,1,6,NA +71476,7,2,2,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,35375.447072,38057.027004,1,97,8,8,2.51,3,3,0,1,0,1,35,1,3,1,4 +71477,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,12842.559946,13702.031196,2,95,13,13,NA,2,2,0,0,2,2,80,1,1,1,NA +71478,7,2,1,43,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,6,NA,1,2,2,1,2,2,1,2,2,1,35406.972937,35535.728875,2,98,7,7,1.53,5,5,0,0,0,2,48,1,3,5,NA +71479,7,2,2,15,NA,5,6,2,15,190,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,12224.9472,12547.183925,1,97,14,14,2.72,7,7,0,2,0,1,40,1,5,1,5 +71480,7,2,2,5,NA,4,4,2,5,65,NA,NA,2,1,2,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10544.002566,11127.362026,1,91,7,7,1.49,5,5,3,0,0,2,38,2,4,1,4 +71481,7,2,2,26,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,50915.06085,56529.78018,3,92,7,7,1.65,4,4,1,1,0,1,27,1,3,1,3 +71482,7,2,2,40,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,19075.861607,18884.31701,1,96,8,8,2,4,4,1,2,0,2,40,1,4,5,NA +71483,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,49191.372812,61149.379277,3,91,7,7,2.45,2,2,0,0,2,1,80,1,2,1,2 +71484,7,1,1,39,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,113827.590402,0,2,91,7,7,1.89,3,3,0,0,2,2,69,NA,NA,1,4 +71485,7,1,1,18,NA,5,6,NA,NA,NA,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,6666.045669,0,3,90,8,8,1.85,5,5,0,0,1,2,25,1,5,5,NA +71486,7,2,2,11,NA,4,4,2,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8549.049441,9128.952244,2,99,6,6,1.73,3,3,1,1,1,2,60,1,4,3,NA +71487,7,2,2,13,NA,5,6,1,13,159,NA,NA,2,2,4,7,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,6145.01663,6410.621394,2,92,6,6,1.3,4,4,0,1,0,2,48,2,3,1,3 +71488,7,2,2,41,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,23409.362971,23966.406748,2,96,5,5,1.08,3,3,0,1,0,2,41,1,3,1,NA +71489,7,2,2,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,23545.494186,22594.243077,2,99,8,8,3.47,2,2,0,0,1,2,74,1,5,2,NA +71490,7,2,2,11,NA,1,1,1,11,133,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13490.527555,13974.998066,2,96,5,5,0.68,6,6,0,3,2,1,60,2,1,1,1 +71491,7,2,1,12,NA,3,3,1,13,156,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,23588.786918,23619.012758,1,94,7,7,1.29,6,6,1,3,0,1,38,1,3,1,2 +71492,7,2,2,51,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,32041.645327,31772.976371,3,91,7,7,1.1,7,7,0,4,0,1,40,1,4,1,3 +71493,7,2,1,14,NA,1,1,1,15,180,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22768.423624,22944.003607,2,98,8,8,3.06,2,2,0,1,0,2,56,1,3,3,NA +71494,7,2,1,29,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,5,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,17191.065623,18600.361918,1,100,5,5,1.79,2,1,0,0,0,1,27,NA,NA,5,NA +71495,7,2,2,28,NA,2,2,2,NA,NA,2,NA,2,1,6,NA,3,6,2,1,2,2,1,2,2,1,2,2,1,43200.700326,51577.05558,1,97,5,5,1.04,4,4,1,1,0,1,32,1,3,6,NA +71496,7,2,1,65,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,1,4,NA,2,2,2,2,2,2,1,2,2,2,6564.099986,6927.048144,2,95,12,3,1.07,4,1,0,0,1,1,65,2,1,4,NA +71497,7,2,2,71,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,NA,27602.403077,28546.401551,1,94,2,2,0.55,2,2,0,0,2,2,75,1,1,3,NA +71498,7,2,2,5,NA,4,4,2,5,68,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10389.292229,10850.629517,2,95,4,4,0.65,4,4,1,2,0,2,27,1,3,5,NA +71499,7,2,1,16,NA,4,4,2,16,197,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11834.781205,12063.950506,2,90,15,15,5,4,4,1,1,0,1,53,2,5,1,5 +71500,7,2,1,6,NA,1,1,1,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13927.458372,14007.413517,2,98,15,15,4.97,5,5,0,3,0,1,39,1,5,1,5 +71501,7,2,2,2,NA,3,3,2,2,29,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,50995.241584,52601.79766,2,91,14,14,4.12,4,4,2,0,0,1,35,1,5,1,5 +71502,7,2,1,16,NA,1,1,1,16,193,NA,NA,1,1,NA,8,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,19570.996814,19477.677478,2,100,4,4,0.81,4,4,0,2,0,1,56,1,4,1,2 +71503,7,2,1,30,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,21200.922169,24170.169783,2,99,9,9,4.92,1,1,0,0,0,1,30,1,4,5,NA +71504,7,2,2,16,NA,1,1,1,16,200,NA,NA,2,2,4,10,NA,NA,NA,2,2,2,1,2,2,1,2,2,1,22753.900764,24458.613732,1,100,2,2,0.35,4,4,0,1,0,2,40,2,2,5,NA +71505,7,2,1,33,NA,2,2,2,NA,NA,2,NA,2,1,3,NA,3,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,43108.74283,42989.380022,2,91,7,7,2.64,2,2,0,1,0,1,33,2,3,1,NA +71506,7,2,2,0,2,5,7,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9148.090461,9086.623516,1,92,9,9,3.14,3,3,1,0,0,2,30,1,5,1,5 +71507,7,2,1,1,15,1,1,2,NA,16,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11160.282155,11957.436882,2,97,2,2,0.27,3,3,2,0,0,2,19,1,3,NA,NA +71508,7,2,1,1,20,4,4,2,NA,21,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10232.679671,11283.202594,2,101,2,2,0.48,2,2,1,0,0,2,19,1,3,NA,NA +71509,7,2,2,60,NA,5,6,1,NA,NA,2,NA,2,2,5,NA,3,2,NA,1,2,1,1,2,1,1,2,1,3,12627.660237,14547.347393,2,101,99,99,NA,2,2,0,0,1,2,60,2,3,2,NA +71510,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,1,2,1,2,2,1,2,2,NA,46372.402053,52744.849017,1,90,5,5,1.05,3,3,0,0,3,2,60,1,5,77,NA +71511,7,2,1,64,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,4,1,NA,1,2,2,NA,NA,NA,1,2,2,1,11842.156092,12450.236093,2,96,NA,NA,NA,3,3,0,0,2,1,64,2,4,1,NA +71512,7,2,1,18,NA,2,2,1,18,222,2,NA,2,2,2,13,NA,NA,NA,2,2,2,2,2,2,2,2,2,2,18206.126374,18280.234909,2,93,4,4,0.82,4,4,0,0,0,1,51,2,3,1,3 +71513,7,2,1,18,NA,2,2,1,18,217,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,2,2,2,1,2,2,1,17555.907575,17627.369376,2,93,6,6,1.39,4,4,0,0,0,1,53,2,3,1,3 +71514,7,2,2,1,15,2,2,2,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,9955.153132,10089.038145,2,94,77,77,NA,4,4,2,0,0,2,32,2,4,1,4 +71515,7,2,1,55,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,27735.830485,32770.478954,2,91,14,14,4.19,3,3,0,1,0,1,55,2,3,1,5 +71516,7,2,1,23,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,79607.896323,82775.96099,2,92,6,6,2.75,1,1,0,0,0,1,23,1,5,5,NA +71517,7,2,2,56,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,15521.115746,15563.161752,1,96,2,2,0.4,3,3,0,0,0,2,56,1,3,3,NA +71518,7,2,2,6,NA,5,6,1,6,75,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5139.061504,5511.809249,2,103,77,77,NA,5,5,0,2,0,2,39,2,5,1,5 +71519,7,2,1,12,NA,4,4,2,12,155,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11085.075029,10992.519505,1,93,7,7,1.97,4,4,1,2,0,2,33,1,4,3,NA +71520,7,2,2,2,NA,2,2,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,10099.930724,10736.663476,2,96,1,1,0.06,5,5,2,1,0,1,27,2,3,1,4 +71521,7,2,1,18,NA,4,4,2,18,222,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,13416.172328,13513.882801,1,96,15,15,5,4,4,0,1,0,1,42,2,4,1,4 +71522,7,2,2,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,41983.304745,42669.243011,1,95,5,5,1.1,3,3,0,0,1,2,63,1,4,1,5 +71523,7,2,1,2,NA,4,4,2,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6441.733603,6707.906886,1,91,7,7,1.49,5,5,3,0,0,2,38,2,4,1,4 +71524,7,2,2,16,NA,5,6,1,16,199,NA,NA,2,2,2,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8391.252153,8753.945484,1,92,12,12,NA,7,7,1,2,1,2,45,2,3,1,3 +71525,7,2,2,70,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,72295.614975,73067.691445,1,98,6,6,1.98,2,2,0,0,2,2,70,1,4,1,5 +71526,7,2,2,39,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,17978.142628,18083.929198,1,91,15,15,5,5,5,0,3,0,1,40,1,5,1,5 +71527,7,2,2,54,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,32032.578233,33220.140308,1,91,4,4,0.86,3,3,1,0,0,2,54,1,4,3,NA +71528,7,2,2,30,NA,4,4,2,NA,NA,2,NA,2,1,4,NA,5,5,2,1,2,2,1,2,2,NA,NA,NA,NA,26462.874679,26479.317455,2,99,15,15,4.34,4,4,0,0,0,1,59,2,4,1,5 +71529,7,2,1,33,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,26972.787983,26588.312559,2,96,10,10,4.76,2,2,0,0,0,2,26,1,5,1,4 +71530,7,2,1,68,NA,1,1,1,NA,NA,1,1,2,1,9,NA,4,1,NA,2,2,2,1,2,2,1,2,2,2,11568.876339,11794.347884,1,102,77,77,NA,2,2,0,0,2,1,68,2,4,1,1 +71531,7,2,1,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,109181.566304,112308.582503,2,91,15,2,0.72,7,1,0,0,1,1,49,NA,NA,5,NA +71532,7,2,1,21,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,42077.383821,44759.048785,1,102,6,6,1.34,4,4,0,1,0,2,48,2,3,1,1 +71533,7,2,2,38,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,2,1,2,2,1,2,2,NA,NA,NA,NA,31646.000316,42635.226522,1,93,1,1,0,2,2,0,1,0,2,38,1,4,3,NA +71534,7,2,1,76,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,15844.527135,16703.540642,1,95,6,6,2.04,2,2,0,0,2,2,67,1,1,2,NA +71535,7,2,2,46,NA,2,2,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,32606.880052,33604.492246,2,93,14,14,3.25,4,4,0,2,0,2,46,2,5,1,4 +71536,7,2,2,44,NA,3,3,2,NA,NA,2,NA,2,1,5,NA,5,3,1,1,2,2,1,2,2,NA,NA,NA,NA,26763.110196,27429.212861,1,100,3,3,0.92,1,1,0,0,0,2,44,2,5,3,NA +71537,7,2,1,11,NA,4,4,1,11,135,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13423.881856,14179.490667,2,101,4,4,1.02,2,2,0,1,0,2,30,1,2,77,NA +71538,7,2,2,54,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,1,21143.964074,20565.30403,1,100,5,5,0.85,5,5,0,2,0,2,54,1,2,2,NA +71539,7,2,2,16,NA,5,6,1,16,195,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,4445.888073,4534.01329,1,103,77,77,NA,6,6,0,2,2,1,70,NA,NA,1,1 +71540,7,2,2,57,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,17364.980275,17214.230108,2,93,8,8,1.67,5,5,1,1,0,2,31,1,4,5,NA +71541,7,2,2,8,NA,1,1,2,8,104,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,1,2,2,NA,15225.935813,15373.8033,2,94,5,5,1.08,3,3,0,1,0,2,37,2,2,4,NA +71542,7,2,2,67,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,99831.393624,101809.075589,2,94,15,15,5,2,2,0,0,2,2,67,1,5,1,NA +71543,7,2,2,10,NA,1,1,1,11,132,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,1,2,2,1,2,2,1,12789.411811,13899.276526,1,102,4,4,0.67,4,4,0,1,0,1,23,2,4,5,NA +71544,7,2,1,49,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,126789.52929,140129.484883,1,98,10,10,3.04,4,4,0,2,0,2,47,1,4,1,3 +71545,7,2,1,27,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,39915.513053,42616.161803,2,98,9,9,4.01,2,2,0,0,0,1,27,1,5,1,4 +71546,7,2,1,9,NA,4,4,2,9,117,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6377.235034,7094.374704,2,90,6,6,0.84,6,6,1,3,1,2,43,1,2,5,NA +71547,7,2,2,21,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,NA,NA,NA,NA,61710.107686,63275.181919,1,98,5,3,0.9,3,1,0,0,0,2,21,1,4,5,NA +71548,7,2,2,13,NA,1,1,1,14,168,NA,NA,1,1,NA,7,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,26325.414456,26852.811114,3,92,3,3,0.51,5,5,1,2,0,2,34,2,1,6,NA +71549,7,2,1,80,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,7912.12564,8378.94011,2,99,5,5,1.88,1,1,0,0,1,1,80,1,1,2,NA +71550,7,2,2,0,3,3,3,2,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10692.488346,10402.056617,1,98,4,4,1.01,3,3,1,0,0,1,23,1,2,6,NA +71551,7,2,1,3,NA,3,3,2,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,39061.897809,44649.899401,1,95,15,15,3.62,7,7,2,4,0,1,59,1,5,1,2 +71552,7,2,1,65,NA,2,2,2,NA,NA,2,NA,2,1,9,NA,2,6,NA,2,2,2,2,2,2,1,2,2,2,8609.250304,9380.869295,2,90,4,4,1.02,2,2,0,0,2,2,75,2,1,6,NA +71553,7,2,1,2,NA,4,4,2,3,36,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4455.468915,4730.507566,3,90,3,3,0.37,5,5,2,2,0,2,36,2,4,4,NA +71554,7,2,2,33,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,25558.459243,25637.043034,1,101,3,3,0.44,5,5,0,3,0,1,35,1,3,1,4 +71555,7,2,1,60,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,14488.953694,14771.336069,3,92,6,6,1.65,2,2,0,0,1,1,60,1,3,1,4 +71556,7,2,1,0,2,1,1,1,NA,3,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6876.075003,7261.67751,1,100,4,4,0.56,5,5,3,0,0,2,28,2,2,6,NA +71557,7,2,1,6,NA,5,7,1,6,76,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13614.165673,13762.118902,1,95,14,14,3.04,6,6,0,4,0,1,56,1,5,1,4 +71558,7,2,2,12,NA,4,4,2,12,145,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12581.940435,12644.353117,2,97,6,6,0.92,7,7,1,4,0,2,29,1,3,5,NA +71559,7,2,2,51,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,2,5,NA,2,2,2,2,2,2,1,2,2,2,17054.056149,20556.446647,2,90,2,2,0.3,3,3,0,1,0,2,51,2,2,5,NA +71560,7,2,1,80,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,14979.892428,17964.055881,1,92,5,5,1.45,2,2,0,0,2,1,80,1,2,1,3 +71561,7,2,2,40,NA,5,6,1,NA,NA,2,NA,2,2,4,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,12544.244874,13057.037707,1,102,15,15,5,4,4,0,2,0,2,40,2,5,1,4 +71562,7,2,2,0,8,4,4,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4543.931297,5004.098497,1,96,7,7,2.23,3,3,1,0,0,1,29,2,5,1,5 +71563,7,2,1,4,NA,2,2,1,4,58,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,15745.774489,15931.923312,2,91,6,6,0.93,5,5,1,2,0,1,34,1,2,1,3 +71564,7,2,1,25,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,27374.067425,29791.861332,1,98,2,2,0.36,5,5,3,0,0,1,25,1,3,1,3 +71565,7,2,2,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,142639.05444,145244.474771,1,100,8,8,2.97,2,2,0,0,0,1,24,1,5,1,5 +71566,7,2,1,48,NA,5,6,2,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19184.316833,20543.822351,1,97,15,15,5,3,3,0,1,0,2,45,2,5,1,5 +71567,7,2,1,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,6,NA,1,2,2,1,2,2,1,2,2,1,114838.671743,123195.718665,1,91,15,3,0.92,2,1,0,0,0,1,30,1,5,6,NA +71568,7,2,1,13,NA,1,1,1,13,159,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,NA,NA,NA,1,2,2,1,22768.423624,22766.421142,3,92,7,7,1.49,5,5,0,2,1,2,62,1,4,2,NA +71569,7,2,1,56,NA,1,1,2,NA,NA,2,NA,2,2,77,NA,1,1,NA,2,2,2,1,2,2,2,2,2,2,22446.308035,22116.943066,2,94,6,4,1.38,2,1,0,0,0,1,40,2,3,1,NA +71570,7,2,1,3,NA,2,2,1,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,17458.543556,17482.134163,2,91,5,5,1.05,3,3,2,0,0,2,26,2,3,4,NA +71571,7,2,1,60,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,10717.375231,11136.552864,2,101,14,14,5,2,2,0,0,1,1,60,1,3,1,3 +71572,7,2,1,8,NA,3,3,1,8,104,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,54897.892683,57357.850008,2,98,14,14,3.58,4,4,0,2,0,1,36,1,3,1,4 +71573,7,2,2,11,NA,3,3,2,11,134,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,39810.933651,39282.261038,2,95,15,15,4.34,4,4,0,2,0,2,37,1,4,1,4 +71574,7,2,2,31,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,3,1,2,2,2,2,2,2,2,2,2,2,2,35464.8385,36323.14329,2,96,6,6,1.25,4,4,1,1,0,1,31,2,3,1,3 +71575,7,2,2,66,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,16420.864787,17817.127844,2,102,3,3,0.76,3,3,0,1,1,2,66,1,2,3,NA +71576,7,2,2,0,9,1,1,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8359.077295,8583.668456,3,92,5,5,0.68,6,6,3,0,0,2,19,1,4,NA,NA +71577,7,2,1,47,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17787.524589,17894.667705,2,100,4,4,1.02,2,2,0,0,1,1,79,1,1,2,NA +71578,7,2,1,41,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,126789.52929,152250.956405,1,101,8,8,1.85,5,5,0,3,0,1,41,1,3,1,4 +71579,7,2,1,0,9,1,1,1,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,6910.953528,6911.08091,2,96,3,3,0.59,3,3,1,0,0,2,28,2,1,77,NA +71580,7,2,2,50,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,3,1,NA,2,2,2,2,2,2,2,2,2,2,19969.163208,20694.024637,2,93,8,8,2.57,3,3,0,0,1,1,59,2,3,1,3 +71581,7,2,1,36,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,51543.062078,57043.574316,3,92,10,10,2.82,4,4,0,1,1,1,36,1,3,1,5 +71582,7,2,1,29,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,3,6,NA,2,2,2,2,2,2,2,2,1,2,39084.166385,40378.039385,1,97,4,4,0.72,5,5,2,1,0,2,33,2,1,6,NA +71583,7,2,1,53,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19374.410926,19463.695548,1,96,12,12,NA,3,3,0,0,2,1,77,NA,NA,6,NA +71584,7,2,2,58,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,21171.165283,21533.12194,1,100,15,15,5,3,3,0,0,1,1,69,1,5,1,4 +71585,7,2,2,39,NA,2,2,1,NA,NA,2,NA,2,2,4,NA,4,1,2,1,2,2,1,2,2,2,2,2,2,36904.965687,36815.84758,2,93,8,8,2,4,4,1,1,0,1,50,2,4,1,4 +71586,7,2,2,7,NA,5,7,1,7,87,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7252.281896,7644.510498,2,92,6,6,1.7,2,2,0,1,0,2,32,2,4,3,NA +71587,7,2,2,9,NA,1,1,1,9,116,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17053.854294,17379.519997,3,92,8,8,2.17,4,4,0,2,0,1,40,2,2,1,4 +71588,7,2,2,12,NA,3,3,2,12,152,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,86497.469217,90071.968674,1,91,14,14,2.44,7,7,2,4,0,1,33,1,5,1,5 +71589,7,2,1,4,NA,3,3,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,30883.231636,34843.624635,1,95,5,5,0.92,5,5,1,2,0,2,30,1,4,1,4 +71590,7,2,1,0,3,1,1,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5777.934266,5882.770795,2,103,9,9,2.6,4,4,2,0,0,2,35,1,4,1,3 +71591,7,2,2,55,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,38954.135779,39992.589933,1,91,4,3,0.82,3,2,0,0,0,2,22,1,5,6,NA +71592,7,2,2,8,NA,1,1,2,8,103,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,1,2,2,2,2,2,2,14300.71869,14671.891945,2,94,9,9,2.29,5,5,2,1,0,2,33,2,3,1,1 +71593,7,2,1,45,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,26215.80546,26132.189255,1,92,9,7,1.74,7,4,2,1,0,1,45,1,4,2,NA +71594,7,2,1,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,91704.59836,93129.802737,1,93,15,14,5,6,1,0,0,0,1,23,1,5,5,NA +71595,7,2,2,56,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,207590.346266,205849.702763,1,92,15,15,5,2,2,0,0,0,2,56,1,4,1,5 +71596,7,2,2,19,NA,1,1,1,19,237,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,NA,NA,NA,NA,26325.414456,27702.295836,3,92,5,5,0.68,6,6,3,0,0,2,19,1,4,NA,NA +71597,7,2,2,13,NA,3,3,2,13,158,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,29885.567338,31265.78413,1,94,7,7,0.94,7,7,1,4,0,2,46,2,5,1,5 +71598,7,2,2,27,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,1,1,2,2,1,2,2,1,2,2,1,77792.702831,79765.659277,1,102,14,14,4.32,3,3,1,0,0,1,25,1,4,1,4 +71599,7,2,1,59,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,105379.391148,106074.259408,1,93,15,15,3.92,5,5,0,1,0,2,54,1,5,1,5 +71600,7,2,1,1,14,1,1,1,NA,14,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,12493.910388,13386.323284,2,98,1,1,0.13,4,4,2,0,0,2,52,1,2,4,NA +71601,7,2,1,6,NA,5,7,2,6,81,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9502.15317,9616.069144,2,97,5,5,0.84,5,5,0,2,0,2,33,1,4,1,3 +71602,7,2,2,9,NA,3,3,1,9,112,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17685.703081,17770.107744,1,98,6,6,0.81,6,6,0,4,0,2,34,NA,NA,1,2 +71603,7,2,2,16,NA,4,4,2,16,201,NA,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10484.6104,10912.740054,2,99,13,13,NA,3,3,1,1,0,2,36,NA,NA,5,NA +71604,7,2,1,49,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,3,1,NA,2,2,2,2,2,2,2,2,2,2,31640.296506,34765.415986,2,94,4,4,0.81,3,3,0,1,0,1,49,2,3,1,3 +71605,7,2,2,38,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,64581.191728,64779.757488,2,94,9,5,1.84,2,1,0,0,0,2,38,1,4,6,NA +71606,7,2,1,13,NA,5,6,1,13,163,NA,NA,2,1,3,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9048.093498,9416.694257,1,100,8,8,2.62,3,3,0,1,0,1,41,2,5,1,5 +71607,7,2,2,8,NA,1,1,1,8,104,NA,NA,2,2,2,2,NA,NA,NA,2,1,2,2,2,2,1,2,2,NA,16986.005478,17733.622754,2,102,4,4,0.57,5,5,0,3,0,1,41,2,1,1,2 +71608,7,2,1,10,NA,4,4,1,10,125,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10138.00454,10211.52145,1,100,8,8,1.61,6,6,1,3,0,1,29,1,5,6,NA +71609,7,2,2,28,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,NA,NA,NA,NA,21640.010524,20970.872814,1,93,6,6,0.83,6,6,3,1,0,1,37,NA,NA,1,3 +71610,7,2,1,16,NA,1,1,1,16,200,NA,NA,2,2,2,8,NA,NA,NA,2,2,2,2,2,2,2,2,1,2,22203.024273,23172.92708,1,100,99,99,NA,7,7,2,3,0,2,35,2,1,1,NA +71611,7,2,2,15,NA,3,3,2,15,191,NA,NA,1,1,NA,9,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,103007.696238,104941.393061,1,101,14,14,3.25,4,4,0,1,0,1,48,1,4,1,2 +71612,7,2,2,4,NA,2,2,2,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11429.37307,11583.084593,2,90,2,2,0.54,2,2,1,0,0,2,32,2,2,4,NA +71613,7,2,1,75,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,12824.368043,13581.00423,2,101,9,9,4.08,2,2,0,0,1,1,75,1,1,2,NA +71614,7,2,1,4,NA,4,4,1,4,50,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10040.033098,11070.778245,1,100,14,14,3.47,4,4,2,0,0,1,34,1,5,1,5 +71615,7,2,1,41,NA,5,6,1,NA,NA,2,NA,2,1,7,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15952.616949,16443.31736,2,93,15,15,5,3,3,1,0,0,1,41,2,5,1,5 +71616,7,2,2,3,NA,2,2,2,3,46,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,13490.92482,14894.343037,1,90,2,2,0.56,2,2,1,0,0,2,27,1,3,5,NA +71617,7,2,1,2,NA,2,2,2,2,27,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9237.934626,9250.417252,2,90,3,3,0.7,3,3,1,1,0,2,25,1,1,1,NA +71618,7,2,1,4,NA,4,4,2,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7604.054172,7918.254694,2,99,6,6,1.03,6,6,3,0,0,1,33,1,3,6,NA +71619,7,2,1,55,NA,5,6,2,NA,NA,2,NA,2,1,2,NA,4,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,11690.444016,12095.591726,3,90,3,3,0.75,3,3,0,0,0,1,55,2,4,1,3 +71620,7,2,1,65,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,1,NA,1,2,1,1,2,1,1,2,1,3,7289.557268,7804.776102,3,90,77,77,NA,2,2,0,0,1,1,65,2,3,1,5 +71621,7,2,2,12,NA,1,1,1,12,152,NA,NA,2,2,4,6,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,26325.414456,27702.295836,3,92,4,4,0.46,7,7,1,2,0,2,31,2,2,1,1 +71622,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,40157.007559,44610.827766,2,98,5,5,1.63,2,2,0,0,2,1,80,1,3,1,3 +71623,7,2,1,12,NA,4,4,2,12,144,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8364.097643,9212.617312,2,90,6,6,1.03,6,6,3,1,0,1,45,2,2,1,2 +71624,7,2,2,18,NA,5,6,1,18,219,2,NA,2,2,2,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,10767.566937,11183.935292,2,91,15,15,4.63,7,7,1,2,0,1,36,2,4,1,3 +71625,7,2,1,39,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,18353.275855,20139.588201,1,91,4,4,0.76,4,4,0,2,0,2,44,1,4,6,NA +71626,7,2,1,59,NA,2,2,1,NA,NA,2,NA,2,1,7,NA,4,1,NA,1,2,2,1,2,2,2,2,2,2,30839.213846,31462.493685,1,92,10,10,2.93,4,4,1,0,0,2,55,1,4,1,4 +71627,7,2,1,19,NA,5,6,2,19,236,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,9099.599144,9725.105491,1,90,9,9,2.6,4,4,0,1,0,2,49,2,2,1,5 +71628,7,2,2,53,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,3,4,NA,1,2,2,1,2,2,1,2,2,1,28349.668436,29178.127765,1,102,5,5,1.56,2,2,0,0,0,2,53,1,3,4,NA +71629,7,2,2,16,NA,4,4,1,16,193,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,20473.346601,20523.769333,2,102,14,14,4.05,3,3,0,1,0,1,18,1,2,NA,NA +71630,7,2,1,0,10,1,1,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6121.794646,6421.491158,1,102,4,4,0.61,5,5,2,2,0,2,27,2,2,5,NA +71631,7,2,1,22,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,37970.860743,38488.432875,2,92,NA,3,0.92,5,1,0,0,0,1,22,1,5,5,NA +71632,7,2,1,18,NA,4,4,1,18,223,2,NA,1,1,NA,12,NA,NA,NA,1,2,2,NA,NA,NA,1,2,2,1,13276.485807,13885.0639,2,100,NA,NA,NA,4,4,1,0,0,2,38,NA,NA,77,NA +71633,7,2,2,64,NA,5,6,1,NA,NA,2,NA,2,2,5,NA,1,1,NA,1,2,1,1,2,1,1,2,1,NA,11838.431472,12356.62484,2,92,3,3,0.4,6,6,0,1,2,1,78,2,1,1,1 +71634,7,2,2,21,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,36850.868456,37875.214466,2,103,5,5,1.2,3,3,0,0,2,1,66,2,2,1,2 +71635,7,2,2,42,NA,4,4,2,NA,NA,2,NA,2,1,8,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,22513.236051,21981.065205,1,91,15,15,5,6,6,1,2,0,2,42,2,5,1,5 +71636,7,2,1,9,NA,4,4,1,9,111,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10410.106675,10390.534388,2,96,4,4,0.57,5,5,0,3,0,2,26,1,2,5,NA +71637,7,2,1,14,NA,3,3,1,14,173,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,95925.820151,96188.020601,1,92,8,8,3.17,2,2,0,1,0,2,35,1,2,77,NA +71638,7,2,1,23,NA,4,4,1,NA,NA,1,2,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,25815.880139,26503.609729,2,101,99,13,NA,2,1,0,0,0,1,24,1,4,5,NA +71639,7,2,2,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,22109.546782,22635.660435,1,93,12,12,NA,3,3,0,1,0,2,48,1,3,5,NA +71640,7,2,2,51,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,15497.844354,15720.409191,1,99,14,14,5,2,1,0,0,0,2,51,1,5,1,NA +71641,7,2,2,7,NA,3,3,2,7,95,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,73810.484644,74865.697805,1,91,15,15,5,4,4,1,1,0,1,38,1,5,1,5 +71642,7,1,2,3,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,15358.480588,0,1,102,2,2,0.31,4,4,1,2,0,2,25,1,2,4,NA +71643,7,2,1,31,NA,2,2,1,NA,NA,1,2,2,1,3,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,53303.690379,53390.332272,1,100,8,8,3.17,2,2,0,0,0,1,31,2,4,1,5 +71644,7,2,2,80,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,45973.010529,51521.910389,1,91,7,7,2.68,2,2,0,0,2,1,80,1,4,1,4 +71645,7,2,1,7,NA,3,3,2,7,93,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,44777.275016,47565.734765,1,91,14,14,3.06,5,5,0,3,0,2,46,1,5,1,5 +71646,7,2,1,3,NA,1,1,1,3,44,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,16775.083123,16335.348923,2,98,3,3,0.33,7,7,2,3,0,1,40,2,1,1,1 +71647,7,2,2,80,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,16076.770364,16942.062244,1,90,14,14,5,2,2,0,0,1,1,58,1,5,5,NA +71648,7,2,1,7,NA,1,1,1,7,91,NA,NA,1,1,NA,1,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,13533.281742,14131.961026,1,100,7,7,1.74,4,4,0,2,0,2,39,2,1,1,3 +71649,7,2,1,12,NA,5,6,2,12,154,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7057.977053,7861.480262,2,92,12,12,NA,7,7,2,4,0,1,54,2,2,1,5 +71650,7,2,1,60,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,7140.457792,7196.312698,2,101,7,7,1.3,5,5,2,0,1,2,50,1,4,1,3 +71651,7,2,2,46,NA,5,6,2,NA,NA,2,NA,2,2,6,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,19294.309,26319.879121,1,93,14,14,5,2,2,0,1,0,2,46,2,5,3,NA +71652,7,2,1,5,NA,5,6,2,5,68,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,4858.407579,4952.635908,1,99,14,14,2.66,7,7,3,1,0,1,35,1,5,1,5 +71653,7,2,2,10,NA,5,6,1,10,124,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6779.975678,7239.998882,3,91,15,15,5,3,3,0,1,0,1,47,2,5,1,5 +71654,7,2,2,17,NA,3,3,1,17,213,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,49337.766842,51601.905979,2,103,15,15,3.44,7,7,0,1,2,2,79,1,3,2,NA +71655,7,2,1,65,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,1,2,2,1,2,2,1,7323.703412,7380.991723,2,97,NA,99,NA,7,1,2,1,1,2,56,1,3,5,NA +71656,7,2,1,58,NA,3,3,2,NA,NA,2,NA,2,1,9,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,163791.427718,171966.737289,1,93,15,15,5,2,2,0,0,0,1,58,2,5,1,5 +71657,7,2,2,49,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,2,2,2,1,2,2,1,2,2,1,30105.942022,31646.425257,1,96,8,8,2.17,4,4,0,0,2,1,80,NA,NA,1,NA +71658,7,2,1,25,NA,1,1,2,NA,NA,2,NA,2,2,3,NA,2,6,NA,2,2,2,2,2,2,1,2,1,2,37987.561992,39245.132137,1,90,2,1,0.13,5,3,0,1,0,1,25,2,2,6,NA +71659,7,2,2,33,NA,4,4,2,NA,NA,2,NA,2,2,3,NA,3,1,2,1,2,2,1,2,2,NA,NA,NA,NA,32799.237043,32564.866412,1,91,6,6,0.99,5,5,3,0,0,2,33,2,3,1,4 +71660,7,2,2,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,4,3,1,2,2,1,2,2,1,2,2,1,19498.713386,19652.836095,2,97,4,4,1.09,2,2,0,1,0,2,35,1,2,4,NA +71661,7,2,1,8,NA,5,6,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,5651.170392,5934.27615,1,102,5,5,0.92,5,5,1,2,0,2,44,2,1,1,2 +71662,7,2,1,74,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,79733.665046,84684.628619,1,92,9,9,3.8,2,2,0,0,2,2,66,1,4,1,5 +71663,7,2,2,9,NA,4,4,1,9,112,NA,NA,1,1,NA,2,NA,NA,NA,1,1,1,1,2,1,1,2,2,1,11195.065587,11548.620784,2,96,77,77,NA,7,3,1,4,0,2,32,NA,NA,1,NA +71664,7,2,1,4,NA,4,4,2,4,55,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6299.35077,6688.213305,3,90,3,3,0.37,5,5,2,2,0,2,36,2,4,4,NA +71665,7,2,2,7,NA,4,4,1,7,88,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11334.095519,11753.9565,2,96,4,4,0.65,5,5,0,3,0,1,30,1,4,1,2 +71666,7,2,1,61,NA,4,4,2,NA,NA,1,1,1,1,NA,NA,5,6,NA,1,2,2,1,2,2,1,2,2,1,7017.945976,7072.842557,2,99,1,1,0.36,3,1,1,0,1,2,55,1,4,6,NA +71667,7,2,2,9,NA,1,1,2,9,118,NA,NA,1,1,NA,4,NA,NA,NA,2,1,2,1,2,2,1,2,2,2,13231.432201,13706.598104,2,97,4,4,0.67,4,4,0,2,0,1,39,2,2,6,NA +71668,7,2,2,12,NA,4,4,1,12,154,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12531.903464,13043.632492,2,100,9,9,2.46,4,4,1,1,1,2,59,1,3,1,3 +71669,7,2,1,0,7,5,6,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6918.643947,7208.455005,1,101,8,8,1.81,5,5,2,0,1,2,37,2,4,1,2 +71670,7,2,1,24,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14860.312419,14555.816778,3,90,15,15,4.34,4,4,0,0,1,1,65,1,3,1,4 +71671,7,2,1,65,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,125558.167126,127168.668278,3,91,7,7,2.89,2,2,0,0,2,2,64,1,4,1,4 +71672,7,2,2,18,NA,4,4,2,18,227,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11372.489138,11836.874523,1,99,14,14,3.67,4,4,1,0,0,2,49,1,3,1,3 +71673,7,2,2,9,NA,5,7,2,9,112,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7294.840846,7578.01532,1,93,15,15,5,5,5,1,2,0,2,40,1,5,1,5 +71674,7,2,1,10,NA,4,4,2,10,127,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,9502.15317,9484.287948,2,97,3,3,0.4,6,6,2,3,0,2,25,1,2,5,NA +71675,7,2,2,73,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,66567.821082,76362.988288,3,91,7,7,2.92,2,2,0,0,2,1,74,1,3,1,3 +71676,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,37318.801462,40047.525745,1,98,8,8,3.48,2,2,0,0,2,1,80,1,3,1,2 +71677,7,2,1,35,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,29756.291619,30875.098449,2,101,5,5,1.19,3,3,1,0,0,2,32,1,4,1,3 +71678,7,2,2,70,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,98976.420245,102012.919253,2,101,15,15,5,2,2,0,0,2,1,73,1,5,1,5 +71679,7,2,2,27,NA,1,1,1,NA,NA,2,NA,2,2,5,NA,2,5,2,1,2,2,1,2,2,NA,NA,NA,NA,41537.508946,44104.799715,1,102,4,4,0.61,5,5,2,2,0,2,27,2,2,5,NA +71680,7,2,1,55,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,28575.926692,28541.082411,3,92,4,3,0.52,5,4,0,0,0,2,57,1,4,1,2 +71681,7,2,2,35,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,1,1,2,2,1,2,2,NA,NA,NA,NA,68146.615091,70082.037538,2,99,15,15,5,3,3,1,0,0,1,43,1,5,1,5 +71682,7,2,1,69,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,7101.739553,7379.502561,2,100,5,5,1.56,2,2,0,0,2,1,69,1,2,1,3 +71683,7,2,2,1,17,4,4,1,NA,18,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8412.350508,9400.756791,1,100,5,5,1.05,3,3,1,0,0,2,42,2,5,1,NA +71684,7,2,1,0,3,3,3,1,NA,4,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24509.975935,26238.985016,1,94,5,5,1.04,4,4,1,1,0,1,18,1,2,NA,NA +71685,7,2,2,11,NA,4,4,2,11,140,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10994.192555,11605.838362,2,97,2,2,0.27,4,4,0,2,0,1,51,1,2,4,NA +71686,7,2,1,31,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16505.268729,16996.380056,3,91,8,8,2.7,3,3,1,0,0,1,31,1,5,1,5 +71687,7,2,2,13,NA,5,6,2,13,157,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,6486.356303,6952.179218,1,91,7,7,1.57,4,4,0,3,0,2,38,2,2,3,NA +71688,7,2,1,51,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,16628.326744,17325.680634,2,102,10,10,3.62,3,3,0,0,0,1,51,2,5,1,5 +71689,7,2,2,17,NA,4,4,1,17,210,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16659.324602,16974.753267,1,92,15,15,4.44,5,5,0,3,0,2,43,1,5,6,NA +71690,7,2,1,2,NA,2,2,1,2,31,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,9382.177239,9806.283152,2,93,6,6,0.64,7,7,2,1,3,2,60,2,3,2,NA +71691,7,2,2,76,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,62212.598767,64340.261278,1,91,10,10,4.3,2,2,0,0,2,1,78,1,4,1,4 +71692,7,2,1,8,NA,3,3,1,8,104,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22651.436723,23515.701302,3,92,7,7,0.81,7,7,2,4,0,1,40,NA,NA,1,4 +71693,7,2,2,4,NA,5,6,2,4,53,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7497.268293,8158.717681,1,101,10,10,3.67,3,3,1,0,0,1,36,2,5,1,5 +71694,7,2,1,0,6,5,6,1,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,5024.464768,5303.683185,2,92,10,6,1.12,7,4,1,1,1,2,27,2,3,1,3 +71695,7,2,2,54,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,1,1,NA,1,2,2,1,2,1,1,2,2,1,14680.520497,14758.150245,3,91,6,6,1.34,4,4,0,2,0,1,52,2,3,1,1 +71696,7,1,2,58,NA,5,6,NA,NA,NA,2,NA,2,2,4,NA,1,2,NA,1,2,1,1,2,1,NA,NA,NA,NA,12649.084278,0,3,90,12,12,NA,4,4,0,0,0,2,58,2,1,2,NA +71697,7,2,1,74,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,NA,15176.622228,16685.078243,1,101,4,4,1.16,2,2,0,0,2,1,74,1,1,1,2 +71698,7,2,2,46,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,127315.335607,133282.596235,1,91,14,14,4.03,4,4,0,2,0,1,52,1,4,1,5 +71699,7,2,2,15,NA,2,2,1,15,184,NA,NA,2,1,4,9,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,15809.066118,17342.845429,2,93,3,3,0.52,5,5,0,2,0,1,41,2,4,1,4 +71700,7,2,2,1,16,5,6,1,NA,17,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5610.22155,6105.185511,2,92,15,15,5,3,3,1,0,0,1,39,2,5,1,5 +71701,7,2,2,17,NA,3,3,1,17,210,2,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,113907.203714,124196.980527,1,94,8,8,1.67,5,5,1,2,0,1,52,1,4,1,4 +71702,7,2,2,70,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,2,NA,1,2,2,1,2,2,1,2,2,NA,12669.609493,13517.506264,2,98,99,99,NA,1,1,0,0,1,2,70,1,3,2,NA +71703,7,2,1,53,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,NA,NA,NA,NA,26240.791307,26334.983911,1,90,4,4,1.34,1,1,0,0,0,1,53,1,4,6,NA +71704,7,2,1,77,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,71070.181743,73766.510327,1,95,4,4,1.12,2,2,0,0,2,2,78,1,4,1,2 +71705,7,2,1,67,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,14488.953694,14771.336069,3,92,3,3,0.95,2,2,0,0,2,2,63,1,4,1,1 +71706,7,2,2,30,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,4,2,1,2,2,1,2,2,1,2,2,1,25423.189953,34604.094756,2,98,2,2,0.34,2,2,0,1,0,2,30,1,4,4,NA +71707,7,2,2,7,NA,4,4,1,7,91,NA,NA,1,1,NA,2,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11195.065587,11638.702248,2,96,13,13,NA,4,4,1,1,0,2,40,1,3,77,NA +71708,7,2,1,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,6612.194774,6663.917441,2,99,5,5,1.88,1,1,0,0,1,1,64,1,5,5,NA +71709,7,2,2,45,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,40760.712736,41570.695949,1,98,3,3,0.73,2,2,0,0,0,1,49,1,2,1,2 +71710,7,2,1,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,8491.292032,8557.713588,1,96,15,15,5,2,2,0,0,2,1,68,1,3,1,4 +71711,7,2,1,3,NA,4,4,1,3,43,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10273.522239,11328.238204,1,100,14,6,1.85,3,2,1,0,0,1,33,1,5,5,NA +71712,7,2,2,53,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,19927.035776,20217.558558,2,96,8,8,3.67,2,2,0,0,0,1,56,2,5,1,5 +71713,7,2,1,17,NA,5,6,2,17,213,2,NA,2,1,4,12,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,8965.57404,9581.867503,1,90,15,15,5,5,5,0,3,0,2,46,2,4,1,5 +71714,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,29470.441678,31861.649147,1,103,6,6,1.98,2,2,0,0,2,1,80,1,5,1,4 +71715,7,2,2,47,NA,4,4,2,NA,NA,2,NA,2,2,6,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,21969.841763,21566.078744,1,96,14,14,5,2,2,0,0,0,2,47,2,4,5,NA +71716,7,2,2,21,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,14516.765165,13843.248577,3,90,15,15,4.89,5,5,0,0,0,2,57,2,3,1,3 +71717,7,2,2,27,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,5,2,1,2,2,1,2,2,1,2,2,1,23255.074483,22360.893796,2,98,4,4,1.29,2,2,0,1,0,2,27,1,2,5,NA +71718,7,2,2,68,NA,4,4,2,NA,NA,2,NA,2,2,4,NA,4,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,9644.668529,10075.292662,1,93,3,3,1.1,1,1,0,0,1,2,68,2,4,1,NA +71719,7,2,1,59,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,1,5,NA,1,2,2,1,2,2,1,2,2,1,26135.885159,26946.665552,2,101,2,2,0.74,1,1,0,0,0,1,59,1,1,5,NA +71720,7,1,1,80,NA,3,3,NA,NA,NA,1,2,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,6882.452425,0,1,99,3,3,1.19,1,1,0,0,1,1,80,1,5,2,NA +71721,7,2,2,11,NA,3,3,2,11,138,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,73810.484644,74865.697805,1,94,10,10,2.67,5,5,0,3,0,1,40,1,5,1,2 +71722,7,2,2,48,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,4,NA,1,2,2,1,2,2,1,2,2,1,19258.69251,18731.627868,2,99,12,3,1.16,6,1,0,0,0,2,57,1,5,2,NA +71723,7,2,2,5,NA,5,6,2,6,72,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5520.445386,5614.569811,2,100,15,15,4.83,4,4,1,1,0,1,43,2,5,1,5 +71724,7,2,1,54,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,37557.946192,39249.822004,2,102,99,3,1.29,3,1,0,0,1,1,61,NA,NA,5,NA +71725,7,2,1,24,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,9177.295801,9548.31812,2,92,15,3,0.92,3,1,0,0,0,2,25,1,5,5,NA +71726,7,2,2,5,NA,4,4,2,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9268.093277,9507.318489,1,99,3,3,0.82,2,2,1,0,0,2,29,1,4,4,NA +71727,7,1,2,11,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,5,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,8851.712341,0,1,92,5,5,0.63,7,7,0,4,1,1,60,NA,NA,1,NA +71728,7,2,2,61,NA,2,2,1,NA,NA,2,NA,2,1,3,NA,5,1,NA,2,2,2,1,2,2,1,2,2,2,11291.029617,11762.265953,2,102,14,14,5,2,2,0,0,2,1,68,1,4,1,5 +71729,7,2,1,33,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,16214.132654,20271.082789,3,90,8,8,3.3,2,2,0,0,1,2,64,2,2,1,NA +71730,7,2,2,40,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,1,3,2,2,2,2,2,2,2,1,2,2,2,33737.181071,33913.1176,2,91,99,99,NA,6,6,1,3,0,2,20,2,2,5,NA +71731,7,1,2,24,NA,5,6,NA,NA,NA,2,NA,2,1,4,NA,5,5,3,1,2,2,1,2,2,NA,NA,NA,NA,15265.136017,0,1,93,15,5,1.84,3,1,0,0,0,2,24,2,5,5,NA +71732,7,2,1,63,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,1,6,NA,2,2,2,2,2,2,1,2,2,2,10596.142548,10856.489537,1,94,12,3,1.07,4,1,0,0,1,2,37,NA,NA,6,NA +71733,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,1,2,NA,1,2,2,1,2,2,1,2,2,NA,24816.484998,26631.048773,1,91,3,3,1.33,1,1,0,0,1,1,80,1,1,2,NA +71734,7,2,1,5,NA,4,4,1,5,65,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7311.663111,7613.781996,2,93,6,6,1.15,5,5,3,1,0,1,29,1,3,5,NA +71735,7,2,2,67,NA,2,2,1,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,14204.126838,15368.918135,2,98,4,4,1.34,2,2,0,0,2,1,66,1,1,1,1 +71736,7,2,1,74,NA,3,3,1,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,1,2,1,2,2,1,2,2,NA,62359.83753,66231.994715,1,94,14,14,5,2,2,0,0,2,1,74,1,4,1,4 +71737,7,2,2,36,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,1,5,2,2,2,2,2,2,2,2,2,2,2,45655.090694,49537.803198,3,92,4,4,0.67,4,4,0,3,0,2,36,2,1,5,NA +71738,7,1,2,80,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,NA,NA,NA,NA,47254.485787,0,1,90,8,8,4.21,1,1,0,0,1,2,80,1,4,2,NA +71739,7,2,1,80,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,NA,13116.035678,14391.817492,2,91,4,4,1.02,2,2,0,0,2,1,80,1,4,1,2 +71740,7,2,2,20,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,NA,NA,NA,1,2,2,1,20443.961017,19438.354372,2,99,3,3,0.44,5,5,1,1,0,2,53,1,4,1,3 +71741,7,2,1,63,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,5,3,NA,2,2,2,1,2,2,2,2,2,2,6449.12882,6552.435629,2,93,3,3,0.9,1,1,0,0,1,1,63,2,5,3,NA +71742,7,2,1,0,10,5,6,2,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,5441.212985,5743.59084,1,93,8,8,1.2,7,7,1,1,1,1,24,2,2,5,NA +71743,7,2,2,0,4,1,1,1,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,2,2,2,NA,NA,NA,NA,8359.077295,8418.658174,3,92,6,6,1.06,5,5,2,0,0,2,54,2,1,77,NA +71744,7,2,2,40,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,144726.059187,145423.66071,2,98,15,15,4.17,6,6,1,1,0,2,40,1,4,1,4 +71745,7,2,2,21,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,5,2,1,2,2,1,2,2,1,2,2,1,130601.953362,132156.64994,2,91,15,15,5,4,4,0,0,0,1,54,1,5,1,NA +71746,7,2,2,18,NA,3,3,1,18,219,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,32725.110579,34226.88515,2,92,3,1,0.28,2,1,0,0,0,2,18,1,4,NA,NA +71747,7,2,1,7,NA,5,7,1,7,88,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11800.806722,11886.381633,1,100,9,9,2.78,4,4,0,2,0,1,39,1,5,1,5 +71748,7,2,2,36,NA,1,1,2,NA,NA,2,NA,2,2,4,NA,3,6,2,2,2,2,2,2,2,2,2,1,2,32910.619435,32637.844845,1,90,2,1,0.17,5,2,0,1,0,1,25,2,2,6,NA +71749,7,2,2,0,4,1,1,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7011.218293,6863.710472,2,97,6,6,1.84,2,2,1,0,0,2,27,1,4,5,NA +71750,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,NA,19552.617734,21912.600615,2,92,4,4,1.12,2,2,0,0,1,2,80,1,5,2,NA +71751,7,2,2,42,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,2,1,2,2,1,2,2,1,2,2,1,110070.015649,117535.388004,3,91,14,14,3.4,4,4,0,2,0,1,40,1,4,1,4 +71752,7,2,1,36,NA,1,1,1,NA,NA,2,NA,2,2,4,NA,2,1,NA,1,2,2,1,2,2,2,2,2,2,45660.210546,48177.324946,1,101,9,9,2.88,3,3,1,0,0,1,36,2,2,1,4 +71753,7,2,2,80,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,NA,33147.414266,37148.276517,2,94,14,14,5,2,2,0,0,1,2,80,1,4,2,NA +71754,7,2,1,0,0,3,3,1,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20173.318836,21596.406788,2,103,15,15,5,4,4,2,0,0,1,36,2,4,1,5 +71755,7,2,1,22,NA,2,2,1,NA,NA,2,NA,2,2,3,NA,2,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,38474.772527,42632.210531,2,93,7,3,0.9,3,1,0,0,0,1,25,2,4,5,NA +71756,7,2,1,63,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,133262.624177,131902.330801,1,98,9,9,3.97,2,2,0,0,2,1,63,1,5,1,4 +71757,7,2,1,23,NA,5,6,1,NA,NA,2,NA,2,2,3,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,13254.861315,15747.113823,1,101,8,8,4.48,1,1,0,0,0,1,23,2,5,5,NA +71758,7,2,2,24,NA,2,2,2,NA,NA,2,NA,2,1,3,NA,4,6,2,2,2,2,2,2,2,1,2,2,2,30253.427014,30121.660039,2,90,6,6,0.66,7,7,2,2,0,2,24,2,4,6,NA +71759,7,2,1,16,NA,4,4,1,17,204,NA,NA,1,1,NA,10,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,17606.165994,17558.40257,2,101,8,8,2.7,3,3,0,1,0,1,53,1,4,1,2 +71760,7,2,2,60,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,14994.337564,16150.13007,1,100,8,8,2.36,3,3,1,0,1,2,60,1,3,3,NA +71761,7,2,1,38,NA,1,1,1,NA,NA,2,NA,2,7,77,NA,2,1,NA,2,2,2,1,2,2,NA,NA,NA,NA,32907.512068,32659.148672,2,92,12,12,NA,7,7,0,1,2,2,64,2,1,2,NA +71762,7,2,1,12,NA,3,3,1,12,155,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,30943.024697,32205.010072,1,101,4,4,0.58,6,6,0,4,0,2,41,1,3,5,NA +71763,7,2,1,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,109309.268477,116308.99629,3,91,14,14,3.4,4,4,0,2,0,1,40,1,4,1,4 +71764,7,2,1,33,NA,5,6,1,NA,NA,2,NA,2,1,5,NA,4,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,20181.692021,21173.351412,1,92,12,12,NA,4,4,1,1,0,1,33,2,4,1,4 +71765,7,2,1,39,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,19144.719218,23354.203452,1,101,8,8,1.81,5,5,2,0,1,2,37,2,4,1,2 +71766,7,2,2,38,NA,2,2,1,NA,NA,2,NA,2,1,2,NA,3,1,3,2,2,2,2,2,2,NA,NA,NA,NA,40476.413979,41456.006766,2,93,5,5,0.87,4,4,1,1,0,1,41,2,5,1,3 +71767,7,2,1,38,NA,1,1,2,NA,NA,2,NA,1,1,NA,NA,5,3,NA,1,2,2,1,2,2,1,2,2,1,39133.405322,40210.578118,1,91,12,8,4.59,5,1,0,2,0,1,38,1,5,3,NA +71768,7,2,2,7,NA,3,3,2,7,88,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,24070.467912,25563.654853,1,95,15,15,3.62,7,7,2,4,0,1,59,1,5,1,2 +71769,7,2,1,24,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,4,5,NA,2,2,2,2,2,2,2,2,2,2,35669.2076,36155.406423,2,94,14,8,3.06,5,2,0,0,0,1,24,2,4,5,NA +71770,7,2,2,64,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,10687.292889,11164.469085,2,99,3,3,0.75,2,2,0,0,1,2,64,1,4,3,NA +71771,7,2,1,3,NA,1,1,2,3,47,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18363.136017,19674.774959,1,91,4,4,0.86,3,3,1,0,0,2,54,1,4,3,NA +71772,7,2,2,52,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,NA,NA,NA,NA,37518.850453,37204.255172,1,98,3,3,1.18,1,1,0,0,0,2,52,1,4,3,NA +71773,7,2,1,53,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,4,3,NA,1,2,2,1,2,2,1,2,2,1,23775.734331,23918.947415,2,96,12,99,NA,2,1,0,0,1,1,53,1,4,3,NA +71774,7,1,1,2,NA,1,1,NA,NA,NA,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,10412.950357,0,1,90,15,15,5,5,5,1,1,0,1,32,2,1,1,4 +71775,7,2,1,62,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,28216.191929,29365.059825,1,101,6,6,1.52,3,3,0,1,1,1,62,1,2,1,3 +71776,7,2,2,0,10,1,1,1,NA,11,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,5328.475628,5508.797375,1,103,13,13,NA,6,6,1,2,0,1,42,2,9,1,9 +71777,7,2,2,77,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,9807.668192,10541.248354,2,95,5,5,0.87,4,4,0,0,2,2,77,1,2,1,1 +71778,7,2,1,20,NA,2,2,2,NA,NA,2,NA,2,1,3,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,43798.832177,56306.768819,1,90,3,3,0.79,2,2,0,0,1,1,70,2,2,1,NA +71779,7,2,1,19,NA,4,4,2,19,239,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11125.932433,11147.929312,1,96,6,6,1.21,4,4,0,2,0,2,41,1,4,4,NA +71780,7,2,1,10,NA,4,4,2,10,128,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,14125.862146,14370.909235,1,97,15,15,5,4,4,0,2,0,1,48,1,5,1,5 +71781,7,2,2,52,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,17152.714252,16683.285229,1,102,12,12,NA,7,7,3,2,0,2,52,1,4,5,NA +71782,7,2,1,5,NA,5,6,1,5,70,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,10498.222836,11004.769407,1,100,15,15,5,4,4,2,0,0,1,39,2,5,1,5 +71783,7,1,1,12,NA,5,6,NA,NA,NA,NA,NA,2,1,3,5,NA,NA,NA,1,1,1,1,2,1,NA,NA,NA,NA,6174.283826,0,2,92,77,77,NA,5,5,0,1,2,2,80,NA,NA,1,1 +71784,7,2,2,24,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,41018.498876,42180.331054,2,98,6,6,1.07,5,5,3,0,0,2,24,1,3,1,3 +71785,7,2,1,64,NA,4,4,1,NA,NA,1,1,1,1,NA,NA,2,3,NA,1,2,2,1,2,2,1,2,2,1,7101.739553,7433.956549,2,100,77,77,NA,1,1,0,0,1,1,64,1,2,3,NA +71786,7,2,2,42,NA,5,7,1,NA,NA,2,NA,2,1,7,NA,5,4,2,1,2,2,1,2,2,1,2,2,1,18588.235275,19331.159141,2,96,15,15,5,3,3,0,2,0,2,42,2,5,4,NA +71787,7,2,2,50,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,16859.368198,17492.963343,1,99,13,13,NA,4,4,1,0,0,2,26,1,4,4,NA +71788,7,2,1,6,NA,1,1,1,6,82,NA,NA,1,1,NA,0,NA,NA,NA,2,1,1,2,2,1,NA,NA,NA,NA,14880.007592,15997.505807,3,92,4,4,0.66,4,4,0,1,0,1,52,2,1,1,1 +71789,7,2,2,50,NA,5,7,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,13149.417265,14101.663559,2,103,15,15,5,3,3,0,0,0,2,50,1,4,1,4 +71790,7,2,2,11,NA,5,7,1,11,139,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,10332.067017,11032.916236,1,100,9,9,2.22,5,5,0,3,0,2,52,1,4,1,4 +71791,7,2,1,18,NA,1,1,1,18,221,2,NA,1,1,NA,12,NA,NA,NA,2,2,2,1,2,2,1,2,2,2,32262.881978,32260.04446,2,102,8,8,2.01,4,4,0,0,0,2,48,2,4,3,NA +71792,7,2,1,34,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,69548.324117,71540.223881,2,95,6,6,2.69,1,1,0,0,0,1,34,1,5,5,NA +71793,7,2,2,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,170311.62251,168883.560827,3,91,15,15,5,2,2,0,0,0,2,57,1,5,1,5 +71794,7,2,1,4,NA,4,4,2,4,57,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8973.990262,9527.959827,2,101,7,7,1.3,5,5,2,0,1,2,50,1,4,1,3 +71795,7,2,2,8,NA,5,7,1,9,108,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,17706.623308,17924.842987,1,94,2,2,0.3,5,5,1,2,0,1,23,1,1,6,NA +71796,7,2,2,45,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,2,NA,1,2,2,1,2,2,1,2,2,1,135834.019526,140154.312816,2,92,10,10,3.51,3,3,0,0,0,1,24,1,4,5,NA +71797,7,2,1,80,NA,3,3,1,NA,NA,1,1,1,1,NA,NA,3,2,NA,1,2,2,NA,NA,NA,1,2,2,NA,26771.485907,29740.715741,2,100,NA,NA,NA,4,4,0,0,2,2,51,NA,NA,1,NA +71798,7,2,1,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,8491.292032,8557.713588,1,96,15,15,5,2,2,0,0,2,2,62,1,4,1,2 +71799,7,2,1,59,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,151766.599459,151581.541662,3,91,14,1,0.41,2,1,0,0,0,1,47,NA,NA,5,NA +71800,7,2,2,33,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,2,1,2,2,2,2,1,2,2,1,2,2,1,45655.090694,44423.220436,3,92,7,7,1.3,5,5,1,2,0,2,33,2,2,1,1 +71801,7,2,2,4,NA,1,1,1,4,56,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17887.570772,19748.358154,1,92,10,10,3.04,4,4,1,1,0,1,32,1,3,1,2 +71802,7,2,1,53,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,NA,1,2,2,1,2,2,1,2,2,1,27250.100481,27216.872857,1,99,6,3,0.92,2,1,0,0,0,2,50,1,3,6,NA +71803,7,2,2,52,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,16063.886913,15924.431895,2,96,4,4,0.57,6,6,0,3,0,2,29,1,3,4,NA +71804,7,2,2,9,NA,1,1,1,9,110,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,13616.85154,13950.045541,1,102,6,6,0.96,5,5,0,2,0,1,32,2,2,1,3 +71805,7,2,1,22,NA,5,6,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,1,2,2,1,14313.345971,14977.822328,3,91,7,7,1.33,6,6,0,0,2,2,51,2,5,1,5 +71806,7,2,2,12,NA,4,4,1,12,147,NA,NA,1,1,NA,6,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12412.374308,12473.945857,2,96,4,4,0.57,6,6,0,3,0,2,29,1,3,4,NA +71807,7,2,1,12,NA,2,2,1,12,147,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,2,2,2,2,2,2,2,20560.901695,20667.963739,2,96,7,7,1.57,4,4,0,2,0,1,40,2,2,1,5 +71808,7,2,1,12,NA,1,1,1,12,147,NA,NA,1,1,NA,5,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,24228.858782,24355.020132,2,102,7,7,1.53,5,5,0,3,0,1,43,2,2,1,4 +71809,7,2,1,5,NA,4,4,1,5,65,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,14467.4421,14643.906573,2,101,4,4,1.22,2,2,1,0,0,2,31,1,4,77,NA +71810,7,2,1,33,NA,2,2,2,NA,NA,2,NA,2,2,2,NA,3,1,NA,2,2,2,2,2,2,2,2,2,2,34887.439952,34624.133414,2,94,14,4,1.47,5,1,0,0,0,1,24,2,4,5,NA +71811,7,2,2,53,NA,5,6,2,NA,NA,2,NA,2,1,99,NA,1,3,NA,1,2,1,1,2,2,1,1,1,3,14813.321679,14891.653673,2,94,77,77,NA,6,6,2,0,0,2,18,1,3,NA,NA +71812,7,2,1,3,NA,2,2,1,3,38,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,11179.43727,11533.315753,2,93,14,14,2.91,6,6,2,0,1,2,74,NA,NA,2,NA +71813,7,2,1,36,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17091.959624,18613.193853,1,90,15,15,5,4,4,2,0,0,1,36,1,5,1,5 +71814,7,2,1,3,NA,1,1,1,3,45,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,20874.345556,22365.354744,3,92,14,14,3.25,4,4,2,0,0,2,33,1,5,1,5 +71815,7,2,1,0,0,2,2,2,NA,1,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,2,2,2,NA,NA,NA,NA,4857.494088,5095.295928,2,90,4,4,0.57,5,5,1,0,2,2,80,2,1,2,NA +71816,7,2,1,2,NA,1,1,1,2,34,NA,NA,1,1,NA,NA,NA,NA,NA,2,1,2,1,2,2,NA,NA,NA,NA,14457.854197,14219.651494,1,92,6,6,1.34,4,4,1,0,0,1,25,2,3,1,3 +71817,7,2,1,8,NA,4,4,1,8,103,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8294.996898,8761.908989,2,100,8,8,1.1,7,7,3,3,0,2,58,1,3,5,NA +71818,7,2,1,8,NA,1,1,1,8,105,NA,NA,1,1,NA,2,NA,NA,NA,2,1,2,2,2,2,1,2,2,1,11036.458246,10927.552283,1,102,13,13,NA,6,6,1,2,0,2,36,2,4,6,NA +71819,7,2,2,25,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,4,1,2,2,2,2,1,2,2,1,2,1,2,51600.2972,52672.714014,1,101,9,9,2.88,3,3,1,0,0,1,36,2,2,1,4 +71820,7,2,1,47,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,138834.18124,139448.476692,1,100,15,15,5,5,5,0,3,0,1,47,1,5,1,5 +71821,7,2,2,50,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,NA,NA,NA,NA,16181.169973,16286.716901,2,100,6,6,2.04,2,2,0,0,0,2,50,1,2,1,3 +71822,7,2,2,19,NA,4,4,1,19,229,2,NA,1,1,NA,15,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,18163.985724,18749.311901,2,101,2,1,0.32,2,1,0,0,0,2,19,1,4,NA,NA +71823,7,2,2,4,NA,3,3,1,4,51,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,87448.870731,96516.512941,2,94,14,14,4.71,3,3,1,0,0,1,35,1,5,1,5 +71824,7,2,1,48,NA,2,2,2,NA,NA,2,NA,2,2,3,NA,5,1,NA,2,2,2,1,2,2,2,2,2,2,44123.284536,45145.647683,2,91,3,3,0.73,3,3,0,0,0,2,22,2,2,5,NA +71825,7,2,2,53,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,102322.335011,107871.941405,2,100,10,10,3.13,4,4,0,0,1,2,53,1,2,1,2 +71826,7,2,2,77,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,2,2,NA,1,2,2,1,2,2,1,2,2,NA,30880.887565,31700.101057,1,98,1,1,0.04,1,1,0,0,1,2,77,1,2,2,NA +71827,7,2,1,14,NA,5,6,1,14,169,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7669.677276,8194.5967,2,92,15,15,4.59,4,4,0,2,0,2,48,1,5,1,5 +71828,7,2,2,49,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,34954.173075,38933.293222,2,98,7,7,1.97,4,4,0,1,0,1,40,1,3,1,3 +71829,7,2,1,57,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,4,NA,1,2,2,1,2,2,1,2,2,1,23857.322871,24256.958876,2,103,2,2,0.46,2,2,0,0,0,1,57,1,2,4,NA +71830,7,2,2,34,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,3,6,2,2,2,2,2,2,2,NA,NA,NA,NA,34898.504426,34609.253559,1,100,8,3,0.68,6,3,1,0,0,1,33,2,3,6,NA +71831,7,2,2,41,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,3,1,2,1,2,2,1,2,2,1,2,2,1,33767.584626,40865.10406,2,102,14,14,2.44,7,7,0,2,1,2,71,1,3,3,NA +71832,7,2,2,49,NA,1,1,2,NA,NA,2,NA,2,1,6,NA,1,3,NA,2,2,2,1,2,2,2,2,2,2,40880.818805,41857.641766,1,101,5,5,0.51,7,7,0,3,2,1,75,2,1,1,1 +71833,7,2,2,30,NA,5,6,2,NA,NA,2,NA,2,2,2,NA,5,1,2,1,2,1,1,2,2,1,2,2,1,20963.809917,22126.306943,1,93,10,10,4.76,2,2,0,0,0,1,29,2,5,1,5 +71834,7,2,2,0,5,4,4,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3861.876549,4016.370213,1,96,8,8,1.61,6,6,3,0,0,1,33,2,5,1,4 +71835,7,2,2,17,NA,4,4,2,17,207,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,11133.192774,11654.909373,3,91,2,2,0.25,4,4,0,2,0,2,35,1,3,5,NA +71836,7,2,1,65,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,6036.688933,6083.909812,2,99,15,15,5,2,2,0,0,2,1,65,1,4,1,5 +71837,7,2,2,13,NA,2,2,2,13,161,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13824.001771,14551.102227,2,90,12,12,NA,4,4,0,2,0,2,38,2,4,1,4 +71838,7,2,2,10,NA,4,4,2,10,126,NA,NA,1,1,NA,4,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,7814.742747,8106.808519,2,95,4,4,0.65,4,4,1,2,0,2,27,1,3,5,NA +71839,7,2,2,57,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,160743.928829,165029.101567,1,95,8,6,2.04,4,2,0,1,0,2,57,1,5,5,NA +71840,7,2,2,11,NA,2,2,2,11,134,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,13450.921832,14026.252281,1,90,6,6,0.81,6,6,0,3,0,2,45,1,4,1,2 +71841,7,2,1,45,NA,2,2,1,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,28726.575428,29281.498535,2,100,14,14,3.36,4,4,1,1,0,1,45,2,5,1,2 +71842,7,2,1,6,NA,2,2,2,7,84,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,18107.947773,18211.90239,1,97,7,7,2.2,3,3,0,1,0,1,34,2,3,1,4 +71843,7,2,1,28,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,NA,NA,NA,1,2,2,1,11507.810748,11454.924313,2,99,13,13,NA,6,6,2,1,0,2,31,1,4,6,NA +71844,7,2,2,43,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,1,1,2,1,2,2,1,2,2,1,2,2,1,19943.783243,19397.969296,2,95,2,2,0.4,2,2,0,0,0,2,43,1,1,1,NA +71845,7,2,1,8,NA,5,7,2,8,97,NA,NA,1,1,NA,1,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,8246.426933,10295.491756,1,99,10,10,3.99,3,3,0,1,0,1,36,2,2,6,NA +71846,7,2,1,72,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,NA,15429.860615,16015.253437,1,98,12,12,NA,3,3,0,0,2,1,72,1,5,1,3 +71847,7,2,1,37,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,94644.050918,104631.781207,2,91,15,15,5,3,3,1,0,0,1,37,1,5,1,5 +71848,7,2,1,26,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,10559.047286,11105.574109,2,92,8,6,2.75,2,1,0,0,0,1,26,2,5,5,NA +71849,7,2,2,56,NA,2,2,2,NA,NA,2,NA,2,1,7,NA,3,4,NA,2,2,2,2,2,2,1,2,2,2,21097.069797,24201.517172,2,90,3,3,1.1,1,1,0,0,0,2,56,2,3,4,NA +71850,7,2,2,6,NA,4,4,1,7,84,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8437.316833,8584.499702,2,96,4,4,0.4,7,7,3,2,0,2,25,1,2,5,NA +71851,7,2,1,17,NA,1,1,1,17,213,2,NA,2,2,3,11,NA,NA,NA,2,2,2,2,2,2,1,2,2,1,29234.272259,28953.402615,3,92,4,4,0.67,4,4,0,3,0,2,36,2,1,5,NA +71852,7,2,1,19,NA,5,7,1,19,232,2,NA,1,1,NA,13,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,24231.355333,25219.610889,2,100,5,5,1.3,3,3,0,1,0,2,46,1,3,2,NA +71853,7,2,1,14,NA,4,4,1,14,169,NA,NA,1,1,NA,7,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,12046.265019,11945.683979,1,102,7,7,1.57,4,4,0,2,0,2,33,1,4,1,4 +71854,7,2,2,0,7,4,4,2,NA,8,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3756.765605,3760.183196,1,99,4,4,0.53,7,7,3,1,0,2,26,1,1,5,NA +71855,7,2,2,62,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,1,60351.409248,61337.452183,2,101,3,3,0.98,2,2,0,0,2,1,62,1,4,1,3 +71856,7,2,2,32,NA,5,6,2,NA,NA,2,NA,2,2,3,NA,5,1,2,1,2,2,1,2,2,1,2,2,1,16369.916397,16695.550655,1,101,10,10,3.67,3,3,1,0,0,1,36,2,5,1,5 +71857,7,2,1,39,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,78529.577822,81088.445647,1,98,15,15,5,5,5,0,3,0,2,41,1,5,6,NA +71858,7,2,1,27,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,NA,NA,NA,1,2,2,1,33510.443506,34234.256724,1,98,4,4,0.89,3,3,0,0,1,2,55,1,5,1,NA +71859,7,2,2,0,9,4,4,2,NA,10,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,NA,NA,NA,NA,NA,NA,NA,3516.231705,3777.543312,2,99,2,2,0.19,7,7,3,1,0,2,43,1,2,4,NA +71860,7,2,1,28,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,16436.212192,16355.203111,1,99,12,12,NA,2,2,0,0,0,1,28,1,5,5,NA +71861,7,2,2,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,1,2,2,1,11862.436765,15907.964192,1,90,3,3,1.07,1,1,0,0,1,2,68,1,2,5,NA +71862,7,2,2,54,NA,5,6,1,NA,NA,2,NA,2,2,6,NA,1,4,NA,1,1,2,1,2,2,NA,NA,NA,NA,10489.35334,11245.538838,3,91,14,14,2.5,6,6,1,1,1,2,37,2,2,1,5 +71863,7,2,1,38,NA,3,3,2,NA,NA,1,2,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,67281.364849,72381.052566,2,100,15,15,5,3,3,0,1,0,1,38,1,4,1,4 +71864,7,2,1,18,NA,2,2,2,18,221,2,NA,1,1,NA,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,16033.31661,16386.200415,2,90,8,8,2.01,4,4,0,0,1,2,67,2,4,2,NA +71865,7,2,2,1,23,4,4,2,NA,24,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6247.52442,6810.692829,1,99,7,7,1.06,7,7,3,1,0,1,38,1,4,6,NA +71866,7,2,1,32,NA,1,1,1,NA,NA,2,NA,2,2,3,NA,4,3,NA,2,2,2,1,2,2,2,2,2,2,37080.526463,37974.333878,2,103,12,77,NA,2,1,0,0,0,1,46,2,4,1,NA +71867,7,2,2,26,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,5,5,2,1,2,2,1,2,2,1,2,2,1,11696.173591,12753.148801,1,93,10,10,3.09,4,4,0,0,1,2,41,NA,NA,1,NA +71868,7,2,1,43,NA,1,1,1,NA,NA,2,NA,2,1,6,NA,3,1,NA,2,2,2,2,2,2,1,2,2,2,35406.972937,38657.357615,3,92,9,9,2.46,4,4,0,2,0,1,43,2,3,1,4 +71869,7,2,1,69,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,130234.912635,128905.524968,1,95,8,8,2.7,3,3,0,1,2,1,69,1,5,1,3 +71870,7,2,2,41,NA,3,3,1,NA,NA,2,NA,2,1,5,NA,2,3,2,1,2,2,1,2,2,1,2,2,1,26595.398371,26621.513872,1,94,3,3,0.93,2,2,0,0,0,2,41,2,2,3,NA +71871,7,2,2,43,NA,5,6,2,NA,NA,2,NA,2,1,6,NA,3,3,2,1,2,1,1,2,1,1,2,1,NA,18255.735511,18352.270791,2,91,12,12,NA,5,5,1,1,0,1,39,NA,NA,1,NA +71872,7,2,1,77,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,NA,12468.859946,13204.521198,2,96,6,6,1.77,2,2,0,0,2,1,77,1,2,1,2 +71873,7,2,2,20,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,4,6,2,1,2,2,1,2,2,1,2,2,1,122726.905904,130163.310429,1,101,8,6,2.69,2,1,0,0,0,2,20,1,4,6,NA +71874,7,2,1,24,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,14385.653726,15564.966804,2,101,8,4,1.7,2,1,0,0,0,1,24,2,5,5,NA +71875,7,2,1,42,NA,4,4,2,NA,NA,2,NA,2,2,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,23766.023166,23781.059132,1,91,15,15,5,6,6,1,2,0,2,42,2,5,1,5 +71876,7,2,1,14,NA,1,1,1,14,177,NA,NA,1,1,NA,8,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,22621.505951,24307.488585,1,92,4,4,0.65,4,4,0,1,0,1,47,2,2,1,3 +71877,7,2,1,72,NA,2,2,1,NA,NA,2,NA,2,2,2,NA,2,1,NA,2,2,2,2,2,2,1,2,1,NA,14336.984082,15039.787037,2,100,99,99,NA,6,6,1,1,2,1,37,2,3,1,3 +71878,7,2,1,35,NA,2,2,1,NA,NA,2,NA,2,1,3,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,39943.378263,40565.140919,2,93,7,7,1.56,4,4,1,1,0,1,35,2,4,1,4 +71879,7,2,1,24,NA,4,4,1,NA,NA,2,NA,1,1,NA,NA,4,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,25815.880139,26556.735732,2,101,2,2,0.51,2,1,0,0,0,1,24,1,4,5,NA +71880,7,2,1,46,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,21158.364877,21285.812242,1,96,12,12,NA,2,2,0,0,0,1,46,1,4,1,5 +71881,7,2,1,20,NA,5,6,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,13370.868197,14159.618266,1,97,15,15,5,4,4,0,0,0,1,51,2,5,1,5 +71882,7,2,2,60,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,2,1,NA,1,2,2,1,2,2,1,2,2,1,12331.419303,13379.957577,2,95,3,3,0.75,2,2,0,0,2,1,60,1,2,1,2 +71883,7,2,1,42,NA,5,6,2,NA,NA,2,NA,2,2,5,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,17210.953493,17934.928395,1,91,77,77,NA,4,4,1,1,0,1,42,2,5,1,5 +71884,7,2,1,49,NA,5,6,1,NA,NA,2,NA,2,2,5,NA,1,1,NA,1,2,1,1,2,1,NA,NA,NA,NA,11601.882948,11708.549693,2,92,77,77,NA,4,4,0,0,0,1,27,2,2,5,NA +71885,7,2,1,35,NA,1,1,2,NA,NA,2,NA,2,2,5,NA,2,6,NA,2,2,2,1,2,2,2,2,2,2,31045.881083,33019.807379,2,97,4,4,0.6,6,6,2,2,0,1,35,2,2,6,NA +71886,7,2,1,61,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,5,NA,1,2,2,1,2,2,1,2,2,1,6612.194774,6870.810722,2,99,6,6,2.28,1,1,0,0,1,1,61,1,3,5,NA +71887,7,2,2,68,NA,2,2,2,NA,NA,2,NA,2,1,8,NA,1,1,NA,1,2,2,NA,NA,NA,1,2,2,1,12689.611047,13462.268549,1,90,4,4,1.12,2,2,0,0,2,1,68,2,4,1,1 +71888,7,2,2,67,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,3,3,NA,1,2,2,1,2,2,1,2,2,1,10346.035773,10764.448363,1,99,2,2,0.31,4,4,1,0,1,2,67,1,3,3,NA +71889,7,2,2,5,NA,5,7,1,5,61,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7483.230909,8143.441843,2,96,15,15,5,4,4,1,1,0,2,35,2,5,1,5 +71890,7,1,2,31,NA,3,3,NA,NA,NA,2,NA,1,1,NA,NA,3,5,3,1,2,2,1,2,2,NA,NA,NA,NA,76271.00266,0,3,91,8,8,3.4,2,2,1,0,0,2,31,1,3,5,NA +71891,7,2,2,54,NA,1,1,1,NA,NA,2,NA,2,1,7,NA,4,1,NA,2,2,2,2,2,2,2,2,2,2,30976.631116,32561.666417,2,102,9,9,3.24,3,3,0,0,0,1,54,2,4,1,4 +71892,7,2,1,0,3,4,4,2,NA,5,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,6327.979262,6425.104835,1,90,6,6,1.92,2,2,1,0,0,2,51,2,1,5,NA +71893,7,2,2,3,NA,5,7,2,3,40,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,8173.816615,8894.95474,1,97,15,15,5,3,3,1,0,0,1,40,1,3,1,5 +71894,7,2,1,11,NA,2,2,2,11,143,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,11113.498573,11364.131177,1,94,14,14,3.4,5,5,0,3,0,2,41,1,4,1,4 +71895,7,2,1,31,NA,5,6,1,NA,NA,2,NA,2,2,2,NA,5,1,NA,1,2,2,1,2,2,1,2,2,3,17165.91562,17929.203991,2,96,15,6,2.3,3,1,0,0,0,1,31,2,5,1,NA +71896,7,2,1,4,NA,4,4,2,4,59,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,9304.437652,9588.632035,2,97,14,14,3.91,4,4,1,1,0,1,38,1,4,1,5 +71897,7,2,2,68,NA,4,4,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,12331.419303,12882.003985,2,95,77,77,NA,2,2,0,0,2,1,68,1,4,1,4 +71898,7,2,2,65,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,15207.312407,15896.113669,1,92,9,9,3.97,2,2,0,0,2,2,65,1,4,1,4 +71899,7,2,2,3,NA,3,3,1,3,37,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,66833.888628,73763.947124,2,98,7,7,1.61,4,4,1,1,0,1,43,NA,NA,6,NA +71900,7,1,2,26,NA,2,2,NA,NA,NA,2,NA,1,1,NA,NA,5,5,3,1,2,2,1,2,2,NA,NA,NA,NA,55675.708832,0,1,93,3,2,0.46,2,1,0,0,0,2,26,1,5,5,NA +71901,7,2,2,48,NA,5,7,2,NA,NA,2,NA,1,1,NA,NA,4,1,NA,1,2,2,1,2,2,1,2,2,1,30442.30641,31779.541767,1,101,3,3,0.88,2,2,0,0,0,1,56,1,2,1,4 +71902,7,2,2,67,NA,5,6,2,NA,NA,2,NA,2,1,8,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,11494.286549,11937.924974,2,94,8,8,3.4,2,2,0,0,2,1,80,1,3,1,5 +71903,7,2,2,6,NA,4,4,1,6,73,NA,NA,1,1,NA,0,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,11076.064101,11514.984987,2,102,2,2,0.36,4,4,1,2,0,2,36,1,3,5,NA +71904,7,2,2,30,NA,5,6,1,NA,NA,2,NA,2,1,6,NA,4,5,2,1,2,2,1,2,1,NA,NA,NA,NA,11032.714892,11055.242776,2,92,5,5,0.64,7,7,1,2,1,1,66,2,1,1,3 +71905,7,1,2,8,NA,5,6,NA,NA,NA,NA,NA,1,1,NA,3,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,7405.452085,0,1,95,6,6,1.34,4,4,0,2,0,2,32,2,3,2,NA +71906,7,2,2,4,NA,5,6,2,4,54,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,3788.132941,4021.443194,3,90,77,77,NA,7,7,1,2,0,1,41,2,3,6,NA +71907,7,2,1,80,NA,3,3,2,NA,NA,1,1,1,1,NA,NA,3,1,NA,1,2,2,1,2,2,1,2,2,NA,47098.572584,50542.386793,1,95,9,9,4.08,2,2,0,0,2,1,80,1,3,1,NA +71908,7,2,2,66,NA,3,3,1,NA,NA,2,NA,1,1,NA,NA,5,2,NA,1,2,2,1,2,2,1,2,2,1,93265.413087,94789.216803,2,98,10,10,4.55,2,2,0,0,1,2,66,1,5,2,NA +71909,7,2,1,28,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,2,5,NA,1,2,2,1,2,2,NA,NA,NA,NA,37911.437415,38428.199561,2,103,2,2,0.46,2,2,0,0,0,1,57,1,2,4,NA +71910,7,2,2,0,5,3,3,2,NA,6,NA,NA,1,1,NA,NA,NA,NA,NA,1,1,2,1,2,2,NA,NA,NA,NA,17151.556324,17621.071345,1,98,14,14,3.37,5,5,1,2,0,1,27,1,5,1,5 +71911,7,2,1,27,NA,1,1,1,NA,NA,2,NA,1,1,NA,NA,5,1,NA,1,2,2,1,2,2,1,2,2,1,42165.369652,43039.787442,2,102,14,14,3.25,5,5,2,0,0,1,27,1,5,1,5 +71912,7,2,1,40,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,1,1,NA,1,2,2,1,2,2,1,2,2,1,19633.637051,20770.138122,1,98,6,6,1.73,3,3,0,1,0,2,39,1,4,1,1 +71913,7,2,2,18,NA,5,6,1,18,226,2,NA,2,1,4,11,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,7382.152016,8028.485773,1,94,14,14,2.78,6,5,0,2,1,1,61,1,4,1,5 +71914,7,2,2,10,NA,3,3,2,10,131,NA,NA,1,1,NA,5,NA,NA,NA,1,1,2,1,2,2,1,2,2,1,60197.256541,63931.531988,2,94,14,14,2.63,6,6,1,3,0,1,39,1,4,1,4 +71915,7,2,1,60,NA,3,3,2,NA,NA,2,NA,1,1,NA,NA,5,5,NA,1,2,2,1,2,2,1,2,2,1,88961.259215,91446.591982,3,90,10,10,5,1,1,0,0,1,1,60,1,5,5,NA +71916,7,2,1,16,NA,3,3,1,16,198,NA,NA,1,1,NA,9,NA,NA,NA,1,2,2,1,2,2,1,2,2,1,24446.632088,24751.360191,1,94,4,4,0.79,3,3,0,1,0,1,49,1,2,3,NA diff --git a/pandas/tests/io/sas/data/DEMO_G.xpt b/pandas/tests/io/sas/data/DEMO_G.xpt new file mode 100644 index 0000000000000000000000000000000000000000..587bc3c4eb64964dff48044fded8e0e2fc3a5a14 GIT binary patch literal 3753760 zcmeFa3A7wlwf|p$zK4QxnG=TOKp+VrKocSa2vDav7ZM<$xe4QA=nw)#Ah~2fK=ehC zK~M=k5gCnu1A+{WD4-FMK|nwd5Qi5Ak?F~Ow-G^5f1R`UXE%N77M+`?Z@u+j|FiC@ zZ|~{5_t{mqs=B(mdQ!I!-94_Sd(ZuQ!WbF*O|JI%o>q^z7TZ^*?tjqs<9BKPePG93cGxkM2M8IIs%=g>-znWw zs?8p(*nGF&kq&G>zLLZ!UzoOMPi=b7it)h~vDLTGJ!Su+_KtG!)BXSH{!R5$x~FxR z@AwIA6IH>vY?Um_R?k+M)_uU#ucXMvkP>*R&A=FFVs<}5tv=vngy zUd_$uIItqG6iO|tP77gL&q0Uwgc%EFxOuZqoHb+qEI0GCnJ3O7%fLuZtSYaLeo!d2 ztkyF*Oq=ef_e`F%VAi}-X3zT2cHO7WIBE8r84G65o#W;&n6Y5teDb{=$%RhkF7$&! zspVZgll|V^Q$x?*vzjZYWPOnD?MN-Rw)Vm_vK~akJd)IqtMsGv>{A zGnyOC%z3kB&6z#tIFcnuF4QVJ^n*gF3p%yr|&yOZYH z`XX0S%V_k2LaF6no3-?GPoK1X&y1O~wwpA2e(#JqvuC=w^JX8{?9w|7a^a$K1^u8< zYFS-t*iI?4I@vu70QB3Dw&1oVSKspUP*TDlLJvj5a+vrlcVg60Zx3ud1* z%k|EhH+$|eBpLw8g-PWc{h&~4d2g)6?|o>x?apfcXS;3OxaC{Q_B-n~P2wh!3zf=~ z&<_fwmNm4NDTQrUi;0`<0L73isilT~P$;#m8KxXCW%7QL51ZE9)6F`@&6v4h_9?U6 zF$))*w)|iGz-i9UJE^JPYwo<6v+SR!yf23gss@~;miIMl@nNq$oz~LKKf@h6ciybo z$IWpw=PsPHVBTp=y$zv3sd63tpipXgf0z=g`}a&8zo+J%d#pQp_PpjM++mOlH!P2F?$g4w6fnlu0S*}d;@6e6>8 z)|}&--DtKY{!h!cLZShXT$ofo75$)4YFWEkOLsVM&mFptSvXUU%_q(}W!8z!f9E}q zpMByn^JdN2?$Egln^o?(P4mC0HOPfVlgy|}Nd_3T}nH|w}L zGv>@ZjcS_b4RLozvINP6TIDaG9~4S0>rb2P_w!TtcKgkkbKJri&6Aks);q1{>@<&D zDUu7l%1h7>3Z<3}nzc0FksWeqZQk5dPjd_BH}|p4|Dm^6vj^$gkX$HL{u=s0q13Wr zvzEzIr}xxiBYNk}YM%A3P(|cQYPkvhpipWVvey9XXs_nU_lc*y^LipzQp+vq2Zd70 z&}Jlgqh>9o>*$>-B3Dw&J?IC8Qp?6u23SW^y7zqN71S(% zTuChtp&t}VEt@oJ8DJf?Ra0C+<+VI9s2XsVS~hLgQo4@bsUm2SzAQsOD3n@;d09vP z?fW{vr=K>t!&)L&Qp@w`2Zd70@Pnr}+cNIZp1l^H*gP1`IH|Y!rhZ&J)o$+d^ewjg zQ1dkJ_ywemNG_Br_n{vYN-gf->CH7Ua4pMMQ9L9KP>Lg0Qp@Y;2Zd70X4ArygKE)`KcTt*Kc#uNZXO|9#P$;#GY@TCInlx?lVbkX>Kf~0kh0T(BXU$#y@7+9? zS#bO;H_29Ad+sc2msc4*s2XsVT1GW%*=tJ2TJ$JM;wD0aO5{~4=m&*T%V_D#VSdE& z^)pl7hPV&tg@w8E+-c1>l*ctsu$mp3G>OcmwM-aP4LD0JTWKxT_F9^6hIXQy_&?QF z$MTAxp*oRQ$_wt=g z^GfsbO?KY=xpPL%cgJfJrmB_0c zf__jawQLh>srHOdYvI~UbBiAE8VI?PTI%Qrg;LA5v6kk$`?Qu>$$Ncrg5+wKS2-U2 zpipYrF4od~N1fJkQiu1~`tG4?ZLsLYr_S#lI`7_|Ew;D{p6b6zUcW5CamU8twJuP+vW$3>oxG}!6U0tj zDu@1GM8`t^m507qpSluW-wD$=?}Xp<$cml#!o(T3eRF3Q+cCc+jxYGS>OrTnqDam4p>kEv_N-3r4?NTv`X7m*h5pmmWzn4B zZ|kuxdhZmM307|w#2>UelK%Te1N)c7c_;qHhYV6TGN}5n$?tEhJ+t@BFzog-Dobv@ z&H8^>7KiUk&=0%ifS1m4Ht(9)S$Ze0tRBr%9h2xa&irz*hyF+7i$edM*sEg&tsfPU zsT)4*AbRiAU+Y4TAL#SMFO5S6;X_u0uME3~Z@BP{)sNbzd-!gTTzT&L&r(nI$Se-F z2g8k)w}03zY5$4ynpdJ*`=jc3(U@O8upIh-84YFqXMf0itWWScwBaevhmOZ+Q9UtNwb+UzdfygyHADbbl{A9Dl3YACNfq+hC@H^<=WJiaLFzwzEAuZ*XzvhrD-OGS?iS{;c$_A*T43bGrxjxxTm?vhj9e2?2DtoOYe ze!pXFnAdoYztx$-SLlDQ=E)T+3F^G4uezJsonm5VI}ZI%M2E8ev%Z;+^`Cm+txKNj z0fP90)CHm!@63+x=t2AqokSF5)e~2SVZ-$X+0Q?`R!?vaGnZ9>mf3w36I`Q{qSmvkta7f>Y^S*fK{o&AK|MHh> zPC3!~&-<^%$`6h@eDt60xZrS(v!HmrL;uNl?PNl z?4WnzXEJ%q{43&b|6dxvvfH^Q9@M;HcI~cR&l`GAnBD)vya&#B6~Agdi^KBQRCe+7 zzsKrO9Z-bHl=iqu-72kD#DnTxNXCE`8zDN8bvkJb&&ZpL(+O{)_7m%u6w!`S^Qx z>%IC3&ch9pyi!%`cu*ZG8mlu;WG8ayUlv~!`mgrt=Hkb^3jJ3yr6zo?Z`++&bz;_u?V&^S;Z@d-}3Ctgc!j>-rIGYw4Z5rf={YkIo-C z^gk5?3jJ3<&SR~w)KQl_@M8zP6MHJ7XFZx-=acdwTO9R+ng>7l$}kMOd+8=uU-O+X z?7B&JuYTMO@N)jodidXa|NC^de3bTI`^|jlo1u&0h4-)YqYU8#%lIr_^LQ}re+NBz zLS?m4pV{}EABG*yT9(~A?Q;0-_zSP%9#u6x;7{j=9QvP)p{4yFAPzhKg%A8~K3B$PLaC6is(cCbMb}sAG>08U-6@U>IKoK=v@+*@D}`Q;!mHk zBg1D1pVK%+;>s|*cErZ>KK9oyNK|wG!94l=WvTKh!sDIl6-8<-z3Pom`-WWD{&R^9 z{eOrrtp5oQ>)qf8&?4x^qq>mxs!w21nv$4LEr-e;#!#=zk$9O8c+8_}je6r&tB=ZRanGGyRf*{mt&g?pF|rE5k79zMdccaD!Ud zr@rO(Ul_dzUOs=}ydQ*z?E;jl1~~uGI;-1=2mH4E*F+C*=zlRP3jKHDS6#c^-*|G- zvp(>vKA?IhNFMw^(=TcFLl5Hbo%k8D{s*ZKTR-vM&%buSn6S-NN1XihWhcU4cKtIy zCw~5Qll7-JsYCONwzWR&`ag9p*0{PL^9%hiMTPM@v3pSC@TcAas$X@!#N!X9{-&=C z>~D7J=n_9)CU?@SJ&Zd0@Hd})yeDjZ;8Cluw%StoI&S~HSse69q-MW!UI{W!>%VK) zr^nOK|8jJI*Qfp}7y41+mmV)Hj{Yg4?}TObn4UTmJMo2W*Y5xM7Pnp%wmWC<<6iys zGV2$|-&z(2`3Xwe{>zw%RO=>M>fsBU{qoLN z4mxIW7;)hjhwS;2YpBbPzss^Xs3(6e-u`^NcdFOCBDI#@$!oB>x5(rL;YAMpebKSd ze<$(&rgk3cHB9`fpKkXjkNIo;rmqj|Z+7*!c=Dk~20Q4(PXBu9*l(`&%`jn$n)Bt6ivTG{E<^E^xY{xe8^x2eHeTCH`o9Em%kRa{lu4le`EhuUgNm#Z~K{g z7KdMF`RzZaKV`?gn&?@Utq(i@N$b&g<{A3?B~Py~dY9m0k?|*=A^z%DJ-OZA;<}Vq z(e#yp{mq`ldDqTk@(Sw11z-Qg`D0G#3ZDzlefr|DPq2Qi|9#32)>KYVw9W0m!YlQ2 zg0}zfQa$mB9Qqs4p>+EnA@SCaT=_sJGIr1>dSvFy|B?CUSsZMCrl($~cCbWGTp5Pl zo*w$e#nYCAeEMHcS?|#4Uis|v_eEJ8)Zs_Q~QKDM=BkmhC|5`gQ^@aXd zC10=Fonpez{)P2zsQ&O(*O7Q+(CScL^qnw`^P11}$cml#!uhY=uy^-_zHr40XFdC| zKTUwouKz5~;^4ZwAF=%Q^Ow#}_dnzp@75B1;q#YVe6Yh8`u`LSh3(&oe>LfsZ3ns7 z;lr=`Q_-h=zz@XEc{Xy2_v_P^(*>knV|Ew6EQ{@Its z!Fe}*O6M(Jt?k`p&0o0xyrR6+rx^NQlYG5ucZr|s>5FammBe5Dsz-<(f6)5a)ow?R zUy9fbo#s^@kF1!+r*b%T^UEIouPfdPA9?hr*N*!S`&_qrwDK}9#r*Pk`{C#n?!VSi zr7r4(w=Q|8EA;<4Dhk`b##MK2_b>Ec{on(=OX8@b%)ci7?01caIOX%;O6t3Zukz_H z&w1sl?%}U~_qnZaco%&3`Bz^SM`8O<@`_h+$#z2DknR7dE-}n6^uHbzrTy3Mk>Q{6 zc-PKneu*AGtEaa;&Um%2#whxe>Mjyz{eY+5uE!2~*KS8nG4;o8`lKGKKh?V| z4&ur%?AyQBZD(G4qIfs=-^ydh-?~`%KlJM_zx=S<$t%78amrUwRGIoh|6iiP`mc7Y zYYU0PAAO40L9aX}(=SEzApY>0o;c-8{Y=K*!9Ohd$(Z}9pX>@>{M2WT_}6L|`Rw?+ zEQ`ZFf7~X%{hDO!qptQOk7(bvKDBS~dzZvjL=OFL#usJ%=X;pek6d{`r}82}-pAk% zS{+)q=`|iX#Wc>Vzv+>|q%Zix8^%t2b=L6R&kNhUd)?Si`eT^#AS;&f zpbzUm{I3^2x0hUW$y(=+eEwp`-M*%AxxJ>Cx1&g{c$0O0Rp#e34j#3`8~Xnm9jyQO z!;{?4PrnS+pZS44(OdtN7eA1CK+{t%b%86X?;bk!&5b^?#gOiycl~|NvqLwcE_?sm zm&IZ0p{8=)jv~Fo_WuF(r%rehIrRT6Iu`oxB)&RA(E3pk+4`@1*g@}-IP#YH=i*O4 zyepAA=*eG(-Cf6C^wOyN{@mSl<~2({d;g!Ahn;^e)4br;gS`J-=SSacJ~ic~54p&p z|L^fdq5tY9Uae(%9sob{P32TyX2&l@{0*JdQjk@j#;0=Fc)u%#{pvq|9fplt?~8Xm z*$t1~|3ja^u6X_V0oIdYS%0*SLjQBEyDmswK`y_Q-~T+a-QPQr4YdyVK+Afr``Y1 z-@Avr@xiNi{ndT&tn~4>ByZvQC$;vvnO|tE77f;a=BvEbJtPi)>IKoK=rtZaGFZ63 zjJ%3vam16~WS7N3Tp5Nvmc6)O$YYDeySe>VCwlB)XuK;r zmi52W`>(Qjrh4*$*uB<=oFe{)>3ono@W30kc;k{So~p}3FS6GD!@mE%w5e>4m45%F z+fQ|Z@~CX*Er}c&@0NVM>hF>`m8su&a`jWYSLAzt_=DCz&5ynlrg7vkeUiTuUs!+J z`WoZ^aC}(5_snxs20bgXM?cJ1(&Eo|@X7V7m0Gucq(6D&omJnRjUXYc!Ph-%A|+#2acp?4VEd z)|ak!KJ(8-kDuw)-#fK~E2-}uep}y#XB_!j_wX+**kIWHr;yJ+e|bprC|VulRTQt* z(yJb<*2D%sdJx{wSUoxx`mgg7mgfGjF0ZWb6_wQwK6sEpt0VEpUWRF$ckOviR(%-{ z`sVfLkF2@dmRp2(V1V~^)n$X`L`369K9xd zw*BW>92^Jo*W>dyYUy?-Ud`G9yP3%@rg^}A@ zUt9QC*k;!EcmM2~{{o-&KhNS|d&uL*U(NfUdsPpTzan~;)LT_Ovb~=mZm~p!#+vbk z@jKCbMfPLNi@FTe4?9S_$>dc;53=7eR9>qe8H5iR?4S=HyY@XV9{2pDaOSFuuNd`{ z#qitphs9YO)~~7h{a<($#jCjVPIUAQew$ZK?94kf-WL_6{nzjD;ctDmpJJHK<>fv1LJW~{dSK6Lo)`scDN4%`3yv%mk?c)#T9 z)x1to=T}wP`czRnnApuP(KAmFKQQgT>9r28e1aous`?5F+pT8ve#jCjV>R0&wMNNFk$9zL$t!OB0|2n_wjuL16fTyl}uH9~aiQcPh z`RiF6=3fyRyXmzK@6-;iq&|$g=r{K*J)$RUx9>d<{^+jh9Ot?I#(4{9bx@z8RMR@% zrZRo=)Jr^ds~j4lDD+?Bs+$V4)d63E*g>!MR7TH!%H^e z`uSt7dPR|%+rQ(LAaddU^IYxJr*de>4a6%zW9-%$_T6 z!UxYvW5O3ca8UQ28(x2B*fH#T)%iDF1+VS@7iV!$pWj9AKX*4=DG}eudrTrIk>;999@`6s}s`^#s>%_KQSbxc@X!<^hM^4e!m*(*xe8^x2eRtPS ze>J)1f}!1OKlH2j40&cGeD?iEt;e?iS{4V#OHi}}Z2y|SaQzJ)wZjt{>j{6a=5dPf zj1!r@aJ({9f9#-7^wvk^#Sf$&{7g^1$_FogCS&j5-#udYbw(U<+z+}(9QeiESHIyc zceOjf>9f9w^>Ux=eF5PxK_L=RsW_t}R(*f-(n@PTt) zdUnejo}-@XtSk=elh)zaj=w)L+$a3v-CCMm{rKK4{Os@Xvw9jEMv}t*AD*&)Fi%DJ zz%-B5QRc6C@G~CrXg%=aXEOE<{^9Fu{cYFPp6&_{{QNuh${jiJcKxR>i^KN+yX*Cb zn(7rrDlVyu;~_HrXI@}v43T`j%J15d-;u$;@PDZO*g>D@t&T3?#}A}F^V9f({6Exp zZ!+cB-nU+VfA^+qe5ro!W3BuD+5VSFeYXD}uh&0w)hmiL!133Kg}zz+8fS7v{6a$> z)LOdzcewunKKclHC;TRB-#huU9tv^F2QNG(tDU$q3}gPh>Mjp|dSux4j6XlWUagP# zvi-k3e}nB4WImNFjI;nxojIsA`9_X_;xB7aZx=y_WJIRjrjBu`0V_XzJs+a4z^oE%`4W{lDZ1_f5OuxzO2X4kQ>Yk{Z~JJ zM}~P3XXj^H2X@dWdSvP&-u&|{4(z6jYAF%xxiNFzs425|5Z^wQ2FQ==r!JC@|OA6lsB<6 zANUO6(>yDx@7{b;|J|?6IlFtazaRGL9lzar{tv(Z*qg;+{TQ=?*B_F4=$qAFQ(pXX zkwZh?C|EwHeu^SBkMp_eD+wx}^;!LpL8~M2$6khMoOha^c_1s6@t_X}jJs~tZ65zx zIAG7GzIVo|H^Fb`|BI9#+*F@`SW`V%5jkCYCmPXW{ww7-K9!jt zh&`3jgJ#$HuwTOdkH&E6s%!n|@r_pxmmGBPTbpmuI{!H`i^I-4yF|`6T>kj~$KPxF zUDb=VxTL@K`(Pu)o=LzQlPa z{snmj_2KxLG+yve?#&o^-)hJzOcpmuk87iz5f{D(V29JIIUeDsN z{oru@{X6&-#jCh9KD};?Z1c+{hWUlYh^Q#^--#dbRpwPuJ*a;0;fD;u1ETMQ_!}m5 zdE}&yGG06Xzvubiov_^q+mHUk&;Mn!ZrvYO%i^$ks2m#dMeRcWyW0Kj`9FysKIY4P zU&tVSAo@;-zoGWIAgjI;Uzl+1b=TLse-I|z-hIMLPaW$uuJrmFeao{rY(J!NqHQgy ztEzTn+y3dF@`lEiF}SquXmx0Q(`!6(ifNozf72s_%nw-+ z-Y{mD7oNWCgy!>ySMAwo&Hr8m4|Nx=KWLwR}^iF7ycBPpVv5Ku*@HO8Nz4r%5VBaF5^KTHW_pDnTxBx3Y$(jZ2EIgZw@c} zf4+wS5@*|I+NVm^HmJ+4KTuznxRDRu&=?gB#*d%INmR4{b$OL;2k>-4@`9cGvEy&3 zbrodQ6IX^|y^+5@`{tuQDYI#9SGN7rUvT4i|HI?yyuIdw2c#}L|Dm5X@xac0BsAnf z#nyl2#jl!v{{o)6@~MA1U!Ula4J%n3#E}my=$-guH(WtIe0Ke3kr)j zvq&ElX9jrgSYVUey4^#iqkR!>82@HBq>6w~Kv|8Jdtdg@@F$lyxq!*i!i z`RYj*-xQud_o16_eXvFzyZ(@8aoG00LHzmU^=y4>Uhyg}y%QaMvpDTL+kLD5@}Qc+2>C*?y1sS6oYeYp4FwVpj>wTHsbf4{-FCCBD$ zPo?)?QCB641ApoiZEK01z9G~1R1S@;g|}BYylcmQ`0w9oA6Vbk=Unru-Sqe;b=f=y zsSn=$;?u`G-WRIh+-}Rd-c?r&v*BKDo%DF*d%S zAKr=H@FtP14&qZ(zv>0;{$+8duMO;P_B4+Zf9C0tLHvEyHFvCqGjW^ok0|E~RF{^Hf#-W{(5jZgK+9$9&)FEqA`fo1(KoPSoL zyFE7nUiiR5UiznqA87wiiM_zXJm5nHJLtoL5l_Fd%IXutf*=3x(%rT>7k>NxbtQ{~ zdhGWH;a3!|*3zpUq;K$}H(B$9hP){4e=Uon&?oA2?RN9)61`WuNaoG9FMtc6KeqJ#;y={RWwELf_vm(wm zAJvD3JgBeGf2aP6)St$itbYIS>;KfjJd%2-Yeg|^@z_UHfghZm1JH+=PgwN9(9k<$-5|EX&p#r%(P`yZ0_U%cL7|F3=X zPCUtzVrc9T9ZLJZo5V98jz5OVj~%o;YBxFQuleVahyAYUSugN_E2$5|CS35k_Q$1mMTdWaSGDc~DAO|Nr6pAFZ#j|H<3yFZAE39fSvY1u=Z=-QB<1 z=$7Zh#}DfH<3$U4y~f%3=dvsg&b$3!KYxdh{;O>JFY3-?nk+Qr3#NtsYn*sB*Ygs2 z0tc4W}jd*YA1480RRi$^ZOG9L6{le_1Pf9cWO(_*sJzgB{MC>8?f!0$X z?}TX_c}%bQ(0AesbMM`G_%?Uk6h8ju&%XV_$0opI*FTw;V%hI65J%oj2l~?AKgz|C zIzvMqR94piPTxcJuAL8F>cDPtN&i#7QXb7`e8`~k8=vao3->RZdB(Ya_(FL6&8L5J z$NLU}$Bw_fSsd1nt@Za`l*cPZr?)N88{ZT$BByq?FxUhV#J&xxnQa^Eq>W>{P z&0E(0yghFzkLD}zub@8cx4{z!ufE0Nu-`e0K7Z>M?(rID?|;+&ucPn3>RB8dw?t|k zf2|&!AAK`@uJLN;JS#MI5nl4TcE4&avh|~)cF?uk%`ee=m93t77Ki!QL}tC3zDxBk z!Ijj9h1;D!Z1vH1hEo@Q`NdoB+r%rc?f;i*9_ra7K7a2T>Y{$}YAw-&rTwo+4D$;O zxlmo${+;+$y&?JzyukAS>_6>ciO|{rQErjJPM9Km3R5 z?)S_x^4RuY&*ES^_iO9#FXXBRS^tV+ecSO@-~U>jX`ITTF)@ag^`HAOtWQZ^_^Ya~ zE{$}yzRKcEzj$DOvpcc-6scp7`rwbea>|m+?+oFv7teY1z^}n;&;O}qad4hv-#<$9 z;??Z8DTUuei1+(Q3#eud|6Qt#9r%;hu2nt#533 z>QSFUZ|}eI=x*P4$mQ{$b*oG36tCjaJJA(>e?;~0Fz?WiFF2NM|LF0z{d6vQJiNO8 zst1c6e~|SAq8IP1u)o5NzajZb`cGULhK+Zf|ED)LSS7gmf8X#whd$<&uk`$r>(6-> z2lE%{9riyf{QOlRrqA99v(0z0$jry=C4SYp?fw==zZ6Yh8Q9rb!Rog(wnd3e=V<*?e?|0j0l7aDSd zmGQ$1_GNM4&;Jkg*?LmH>Mia57S|BF#l z-~8pdM{Lh|oSlCzQ+_ayzrSz>+dU{=@38&LW#v}?bLC||HIYN38ebIpkDn7i>qjm! zb)sUCDHD~i-wANKnfUFw%BDlc`0#-7no=)V*HYWn;`^4G=gogjQ*szN*v%gqWM0T%2Ys0E!Q-9%B*y;4{&8J_}_rN`; z5O2?aSe(Vd_EhMb@{3n->7D54o7I_%p8hev(AX;)O8Y-a;_xTlQ0u`CdY8n}moooc z{HfP?h*Lff4pI+a^Y=b3nRMSiec{dN)n_(6c{KU#^RHg5101ct|4Uv)@oFu->cOh^ z4Sw6cs9P~K_KqZl{;R!Oi)K5X8c!~I?*y42XnECcvhv~w!ULLK{m~l^QV(C)qVJx^ zzB&3gVdO9C8&4m4IDEGMc_@p+D-vhhTSe^YGMyiNGrf}tuLy5w$cifTU;TKFDE@{1 zYag(KX&$Si%sJmAK9|G!hc zqDaN1S37;PI&<;GPM<LeXYcsC6w)Oj`GqX7CeNA2DJU;&IPfP7MP*cAozi8if{2h__<%#S> z4vl@{3;N(y5Aqxv`tncP{+U-LmebnbCG(^%TQ5nyo%EsSnN6SH|2@+~?aE`1KjM2Q z;?Mn8Y5)BSZvXI5ANwEr24Y7JjeWD_qtO3k-uPP{?4XM@S$e&+EDn7qEacNXrbkxn z#1|%<_T=?n{f|S!#E*Y!*pBspW6Lo-W(&#JjchPF|Ou|In{o z{6b^D_@dB%C-!PC$h^|y5cOjRy-VWAtB4yDx@80CcTP&LO-QBu3 z?fLV||1x9}{C57cD2u~`BjW|o#p%4otF<({)>U}^ST4TgW4@sw9~77NUmg$L+8(`k8SJ3%9=he5BYw8kd~EVxrvDUv zyZ&F#;;{Wt$@4$OtGVCMe5uU$hOpcJ5B22|5gJqDi$ed^kLTFa7vs@*?_#*EAM;D} zR)_N92io>iYv;2#)V)MqmG4*ubg51zZ$ zDsO%w9QNP0KXTult?%C*m&IY%9qKBN(C5!rR91eGic9J$eEwL|ctz$N8V5u}Y5(Qz zajPHj^eSJMAbg;A?Rx40@dNQ^{hD6+&91!gtf(HoFurm9Zoj?Rh23ZTXu_J`y$2pU z{#LR$sKcH=OdX2i)mp*>R<&=)-ia@~R!^fR8Vdc#UlD)XPV*RM|Bqh%{?DI(ZhFTf zcka5)BVqXZTkQ9r*I#XQ&(isn^qIaH>U<@twWQxXw+-3iYT`>jLt|Q06#B1q@H?vb zbKYeB>W3Y)yv)Z*UehlU+5Gc%yXn(-*Y4+$!4Cdm>ILIYIP=|4gsFR-zw>7HH-?Km{1M}zeff1O8F^IJbE$^)uDcF^)DPb%XF z+Img+ERMQBc#)GhcvB1){Pz4!fBDF=aM`NMpKSXC{Py{8UlvE<^Os!pI&YC$OWVG+ zF8T(4+5AG|z(`Ws|2-uK^RPTxhj;CE^V2#kzxtWHxIK>T0saCH@gV%jV2K{SFn9OQ z4Lj|)`$O;K>n1&N!|~K(=l_)~4(jmh=aB-x`stX122dN7*eLk?i*`3&l^JVf1>cbW{ob}`F-t$Bleei)dfA5OL@Y(bC zlK$uC#m7(P*1tT9BdtfYZ@d1snff_J)?Y3%^9v2RVW-f4jjPJXW3BTa)%yhD1F@&F z)vfERU{_w#Q+JZjcm}BtTfX}93488#Vi{t;a7cLWUqRQOX@d$pZFuEn9d8{mDPtGesXF4m&?}) zJN)*6X_uXQE$*cKulVnO9VYpDmES2czdn(zADZ8ePwHP)|LW3qe~W`p(ez6O_BVSH=biZ5 zdPN5DM+Q~jy}|FgE_&{^qq{d8chS2qT&^*;~Sscs*Bp?3=p&<|c zF5CXO4r}o>;kD}|%x|&it0xNL4_aQW&-5CPoMIa1)!+2Upz@nvq7Os=u-1mNuU$KA zn&0=n=U;meKRf?ln#JK2smJ=$t9GZD3$mmx`~1;~#e>vW6MbkL9$y$geqcSCJMMp2 zA60ofXZ}_{c@Kz3*C< z#o@hv|FNR7qDZatom4K||5np@Md}KTBch?u|1RR32OsF2#G9;s^btLXKmV7e zCroE?s9egCnZ#ldzDS{=P=2Q|NFTgL(KL{s|xcj{IQ zjU!_~q5tscb!@9c^&V6n>>&13M$i9?*{Q3R#bJ6UvQH8JRNp}k`~Bwbzkd8fd6??2 zc=;#K41u?LTown{{XwfE7dz((A{Ce3iO#0zrW-}=nMKRVqjkG=n@XK^s^j`yEvT@uyW4^>Agk4sSfsV_8U zM2ABEneP&b$KU#n-HDy^&{V&z|DZ+=Jq2?Fw z){=Uwqm{=g))c8b7dbR$#usJ%=XaE>4vphD;#~~4Fi z`Y`Ns!$uyp)?s0@HD8=D`L0*tv;OB<95z3mKkQVmC{k*xZ zk3#>|uR1lFt-r=o5xrMG_`pWFp z{xW-+-r`*r$6)1f#f7h|JL#J)TzSksjXR!vf;#N|Z?3$G`NDYr!=2WjUbWK)Co)T~ zGS9(7HY83lG>*-d594=H%sjYAWa~#Rvi*NLl@|%J9pMj_tuOTWrHI{-e9QwG?4S>u zK6Kp0$1NTgh8=kRQ}=#*J+E7Dw85V<<#ldw) zU-tWVnn%1_eW$Pj#)W=DrY|MBaUcHl$XTBV__{^$({sfRC& z8*{-SSN`?v@PU^`{CM^q6XE0YFRnv_#Nh{KI?{Pzr*CF=5`Z1v(2x&I3jNplaUBDH z;tf0Wvk*t$dZnH+yXo6nEq=Lw+2QxYYoGrne0g{C*Dqc=%h4~x#4pp~RbTl1gqxDtuEDq*Vw!MbN z$>K*or*YsSk*yy!wbvE96c-7y9pMjpCw8l+qOu}>DVjfdQXXV5^-J_&=$h|;@GF-+ z6*l_Oun|w4{e?pRmt}F-=kP20{2#sl#OkjpFZHP$8uKJyui9OLxybaz)}!i;Uu8ab z#ve34&5ynlrg7e7ahM)iu@heyz0;UmM;tsXZ2kBS2k(B}j$Y&J{a2pF!94S0<2c^- z&-AIHeom03SGhV=G=(~AqNmQ#m>(6!ukk)X{He<@@%#V3f04zp;@`hm5dF*g&vVSI z|GNFA>lHibU8HS&ZlE&y^mAfcpZPn-5ufJosCU8}#y+s%rwf;!61F?%;YTkTG6o*| z{DtjWG2cbMf88tb)+hYBR1a#N!tbA|p5r)u3Jtko+<5VWr_=qPnioFMYrM(SSLR>I z;_!7vr$`(!xRUyC>Z4;H+M?HmMW;^M>cQVSuY4V^Ka*E6D;c&v`Th%aCi9~&p>axN zD)e9DxQ`ltj#q|S4|dQedg=o41KBQ8J2HHR%uDmEsJ?s4k396?s9P)DBR|==^E2<- z1%A8!vm}ed-tPpx|I?{nQKZ%q93IZvNr-R;`Z>m7}k?ajPj$z-RlPdKQQMAFiGJ z{$MV4mR28m747*0PCU!}LgUovP}YC$d!R4Y_dMZUEOJ#=r|tJGE~(G-6^TPmu~4^5 z@*;!eK?YSHhW+}X?LJqnhT%7k`q@_x`Jz|5ZU22)9EJP;bJZ(~)Ox>VvgS#Dw}pAJ zUXerNv>00Ge=-mJy^~Cai66gvi$9q55q&2t%VT<%#ev+3FKoMZfAe$qtD48(y^nkK z*UPAj_bs+xtq5;EQ@?**(>!1@Z?QJlhw7_N{XzWeqNmQ#I6W!~{nxm{^PkDDJmiB9 z^ctVa=$WV4;iXQEhu`AOg1`Fx!=FD)9iYx{W!Fn^JD#-fhev-tZ1&gR ztiGVy`u_V&nWy!AnaX=KpVHdA1X{Y^)w;y{ZQH+|H(H&!@-dH^$f0pYd{OA9`td%( zw$qC8fa-@G^iKRtRzK=N58C|dC2`8{&?G53qD{>yDp z_S|!;rz<{y>^1&h$)A-B=11R_SrBZj{{b8@@@fWGJ zL{EKHo`3GdC!HU1XnZs}6#CEn7E3(yAkHxH`#=Bvqy5hQ`d$y-Gdk@5=DY9R{i;Wq zhh6`yX}=Wh_Xl&;D~dG0`HvGz>F-Zf#7;j$pfXhT*y-pB*knI9A zeeb~jX73U^ao&~4E2s}U&Ujp2|KD}s{l8T44*i#( zm9_IB+y5}1To^)QQG8L>f1V>}{csZJLFK~^Vozo3W72o4ub#zGh;yR%|3eH%{>LNh zegD4W!;JZ37Vh}IcfrSfGxolBNfw9i@cmQqYAwB!*Y^Hb<+AyO#>Zknq5s4!iRRY% z59<$1{Hk&~W&WjotOzg16Z}&2@Td9Erv`mpQnV_y1}e>zOOx%=k7e7_g|vd@2g zSN#0<8tYGQ);9S6LCN-IpT9e?pf{|DJ~TcaUl@N${q)7+)en6q{71!kCv_VivSOz? z!gkO8?%$`}{<^R<_aE8^JO0*`Uome!e6o23bO`4Qh8hPC=$*VOeEyh=KCLe_&Wr}@ zzY{;^mtN1OUc;_|dA;(G4?Yk*X#bxHAO7$es-MZ4uakfG+H|STn^FFoF zd3Op%N=_wc^2o^bcJqo~WSKi6eGww?3&d#-xWiJUHtPy2?P&JQ^> z&WaRe{pUVZ>l5n_)H?91%I&O)xbFQ-2BA7ev! zaQ_GX_Gat*Uo)~eI3EMc?td2V;?g_O@%#;|KNmf`%sVvBjRxy~GOwzWIQ*?odAlAv z=v}+r>QH|CKgf|bekn2^pT=w6!Ro`%E2s89{>A}eqaVL{PxAwK;yM54zH{>C6PkB_4!fnt>eaki z$smrtArqg@FEq}NOojeCNx*Yli7Om`)ek%96TQ`;_2UN`Z&{q`;WfS$)RQM1^XLn| zz2LJqg%d{p;<~>-a6Nq7|8I3vvN-&@@&5C}Eni9h&93#(H`BwXcB`lHDaq3tA@gbn zS&#U$9u3tWzv|0b9K>6k)=fR8Un>5{DdG>GA$(5aS5zOyUiO=HCUiX?ww<%hOVw4M zB+j;b`T(x0*Wc=z2PA(*^enxTSM0Z~e)VJj!8}9b(=niQ``=Lb@Gop1nh!hZ6TQ`; zb>j!x`ea@jZ*jbHXW?oFS&@V~A-`$OHsKGVJZO?!QU`PlX6MOhs5$q!MP{yCA$ zmek90laLGZ%Y`8{J`-P*_CLM2rWpUX1-?jDk z$9q)|>U`;sSl`zFUDRLaiy!NeIz!_^$p^1%kE_;1#@~3zum0GVtl|`nM|K+HbtN*U>*%=9SO(|9x2;_CBDdvif;N(YBW8!NT)5;Gu4n zL*uiNq;&hgNpetsS^vHIRpoSQ#rHp22mC3rJ`A1IQ;=6s-@Wb^hdle$Z~wk~-F@pr z&$(+HJa+%vBIO6!|5sG6C{k;CPvye?zpFi-d4|S+L_=Bss~U&DZKrkRQ#*dupNbxT z(7SejTaQabk6()P$2;*eWIlt`hfP-h(OqZHxFHN%bJPdMd}0E1*!JJ6dB9Ed^RHa> ziXy$k@mKE~@lHGqskh?}}u!E`I@+vQWpdHr}yTxf9cvn{6z2WNC%!Op)cR9@ao>w!%R|2~k z^B1qylDh2qZ}ce_AMC7;(6~4nEU!~PMV@2ByeuAG^}`POM2~D($>K17_!Ld=#LxeO zFdX#It!tip&7yF`+T%WdRL=rTp9kSX20Q4(*e@^n&Dzgy8@5^hGb3)?{+saG{m;Ew9M+Fc{m;dp zrFZhm?mwZPis7dgK(-IG@&steD29 zau`4NsUO!az9;N7^COoZwtMUI-&t84X}5XJ{viIyp>cU+D(gRa@h853 zH7SVojz5_Cqwj=ioOhDP^vH^x_`)tb9I^UqlkN$*-*1gATrWJO*MBzG-yg{}FPQXO ztgZLMcK+iu4kS+H(6}Nxl=YwE2=ik9mHvNX2fgyAGV_JU?0GDsbv;kx%^w*|=HXQj zUm4o>kER_mV*HHC<;2Q5&f3{=n$f0p%Bq{V?=U1&s z4(4O~hl=Ro#|~N^r+)NH5kJs)6MKOtgmnmNd2*! zKJ6zmsCkGpO!XDX7e-z5oBNg?(Gy0UoIm`DJLb^``~GjA_}Tm49pnaFn{Qq0EWP8E zAo=b7AGLchsXH{5WXngP|1PRC)}!@9^&V6{>>&13roO`Wij%r5F44nlJk$Y}_`??t zJNIYx-3~rA)L*@S)F-Z;1dnb1l`Ia{i#`7hens(WE#dKMr*Fuq-Rfz4DUy`+pZy^I z#;19VcTuFtIF8{D7UGzv^5B=E#ie~sW#zGYkU{kiyS{$uT7SyV3%hOSzW40!Cc6QhA(Wu1MV~hsIUWVEJ6TUv+(v>B~y*f5@u{AIR|te$x|2 zUEoUU!;b5IedhcVmxZ0adtmLYlOJP!mF|Cb-C_9{PhFsNrMNV{9mU2xK$rJY4`{^-1*?e@E^vGK$Twd7^UfcgHQa;cvfBfg+qwW7Q7kk-~ z`gjhQcj9UB)R&7sG`<{P6#B1zoX1i>{3&7wy%YXaMo&JoQT=2Cw>T8X!F%Rp1J&VKszt>;>?f2bIw*KW}XGy%tPF__jf?j2g=lFx6 z@s;REKg?g__#FZ2H=cwaI|z@-^hFUpX!A>WsS`d!__R*+@ReaW{kRd+YVR2vKK_@( zZ+z{M3jB8ef1brbJ^9`7{PQMc!z8~%z3u()aOJ@-S6SyreWCHyXejhw8W4&h`0QM)Q1P2eBTegIb=w9{)WT%$uH_-d$P}8@+=Pe zoPQ<0|2iJoQ1gm+Yw1;AT~BmYC-bX`p818w*P^1V|Al_!$_qM`>w?wEg7AY@hxQY_ zc>gaxe}34w(|*5^UuC44&HbAef0Zx@M>Q1YAro>#j5tr z^we9);$VKEArEXQ^k4nCj*7qiANC3l{lpHEH#MI^9*(|f^dAqqt&4i>{AY>Q0q&sp-{z`U6sfiJPF{n2ZyK5Tg47op*T=xp{_8yP zr`{C#oVa?w)PX;k`kQ{q!2V`;V)rSQ^xyK7_=ky~ofhU^c4?US{JXC``;@QYZ{I&! zl*M7&KRh+nD~i-S-_bfuR)3z`<5jl(SyFdsd?SXI_CLM;kVp5{=kLl-zd)bpsT0Hx zwEA>@rl(#|dEp_CVd|IE5hhmp|7+dPjSCYmJ@TYUtGo`69e;bXIN-}~kLUlrR-f{V zS8IvC@$Drxbojd>Fsl z;p2N8_}g|+iD8*nf9znYhY!RLWWNZSKB*gCcudwhI_bMNnEA=iKK06i?hS6c{E1bU zei?sz{!CvMhizXQ$O8yC5654;ic9SD4Vig?%r7)t;Y#Ym3FD{!ZPTyb6i(c{??*TPTkHIDmh$%|xP?Cd&8c2d zq}I~rr*@tjZ2DY$z1o>yXxtnPW!t|*HUB@B$Sd0}a+Q;Q;s-xybtL}S%P@`eu05~G zsxRX~A2!TmTlz8M3``p^3~{4Ia4-3_OJ6Yy~^+zuB1NnT({=kTm9+saOiGZ z4QagG`u&#~G7mfNS)}r4z5ZEISy7|`&VRHn`eyScUhQnhp&>Vnm-avX{$s9nfKFuk z1^Ps9b!dM4KFX>lkd?zxM_FwZ+f7$-0s(#fwq%izJ zt0U!GGO&Lk&WWD~mB-{2)Q4^FUU<~H=l?uZRvR{d|Bt^$UDp4;EDpQ>p{8=Z9Yt#H ze=59EyVk|?x8b+GsU0~qz8eEe`>(%aV*N<#Q9kUTcT!I(qX*3nFR1ZWKQgF!;W6x> z4?C^(+C>X`Uk*FZe)Jy-)hoY9#ie(mv)_MFnSO)JJ2bu* z14{cpTKMp{elSl(?4Z}YCU^3$NFL+Y_=5aD)Q5A=8$0!yuPqIq{^%P|%xmOc`Rx9a zzATQy_D_9^;`I*4-;v7GrI;tO@`T1M(NNZZoBz@z9$rQ3^F!i?3>r`3kG%}Nd*{5V zy^IGv&p!;?zp~dyzx$)W{=>Hay5>{NkRZi!R92(z` zp@se{FQ4O3KXEBy2dTqk4{?=;7aPlWiOq1@X47AKkUBl^1Jt4M4h(% z_hxZeKT7WZ5wGIXJJD4ug3PyUexdP$=xF&g-r|_&vV;%4qV-GtkinD>eJ4!gyi4jp zR_w$VDi6G{R%O`rVf;QlcijH(JznFi|H-`a&Gh$w^DGX|+r1MxU81j!7mL;JRL%vd zGc<0EibDUL_z_=C>Ov2~m;PQndi+52ozN$Bl*;PYi7$-X`@?_y)60v(4p05--9H`K z1)p92UzEjR$KMh0^RFkY@44EOeu%bpJh1m)PW>wb$A!ji(NO3={7WQ`dC|WDs$c2z zhv8E+zPz0uJ?Nd-Egy0jk|(vRK5TW>u_G5JY?^wiu2Ddtz|{r|nH z2fI|R2(mOjp11Xd(xS2!DrWh9?IhIip2T4>J>$59dE53orgVtz-e5r$UG|| zhsF=%3;N-m=#6i&$X18O+3^H_^{Xyy_qVvNc3#sfA99LT2l3=f{pBCUM3$F%DDpcF^o4p;#149mPi6G%Pt8t06-}=^ z7WY4t!;FzvrO;A--GxggC%2}}~zw~}-i=!`!=sO|x8j=q_WW`Q=!DV;M z`rz%)2KVSLAKK*N*7^U;EDnD62!xM57$*If=+^$oo+3aHHip~JL8K&|JASX z{p(!fY&|%UsT1^x-ukD!_<=Srt;_W451(O)p13j$2fop}>&L!Z3x{0uf&b_|?|S&` z{)hViWA4oZ<*17G{}w^nl*(_)6cg*YM~YcGdhs|GhF^k7#+P$ov{&Garjr@VM>*K4^8Q zF8)qv<53UeR$wQ+u>SE!UG>QEF`d2M>>@lW2a%ltzB^cb-He=<&8?_&v{{($&X^iIaZPhB8>(E6|A zo1c1A4_$Q3);M`p*gfi^GbaD|iDNQd(6Q^!dn91}7^C-}QGKr{tkxPGkiKE3^P_*E z|Nbaq8Fmc(+@A*Xqo*_3pniKEbMbqpsJ#F6 z_h0G&885ZfPn}g5hCP47+dp~t+A#d7orhhqPwV`{UhVVXc+q};)YLfW)Ydx0YP}!W z`3LIEm0uI2zR>?*bd>tbeEe$aFq$#~?c9=f(&&7nVRc&H5SyZ^sr&C-AEo7I`CZZf~nFBk0Fcusjb@4BM( zNo~_rKD@fTo!kD(^3VnIEG|{oD^})_Uxn0{#?>FTe`<>*_Idrhu;YkLo;q>98@=*u z|I^IkV#k~M`$OA4a*4C{jz@y@#g2b8?!lzK(Eo5utn9y!Ve6;u_iN(!u03vjW1I?v z2ip9a?Rw0|JMkN$JBR*mxAZq2*x{@zyNBKL{RjRr;sW&T_oodV2V7R~znzRDtkxPG zu&#Z>u9{!ye~Bg&^zIoO~0!6HPvnW`#kTIhaNVFA5=eegiXe6 z`^DZ*r-klCAARP&)6b(nm7l-l^v{0jb|+Po*Ivi7|39UUnsCvvdio!Yfl~j`Sx)lc zlW(Xz;-F9b%oBtMrq3rlL)AlfZvEZEet7a(LpGk!J$#$%=0AGBrO>hC&zV_VIPc5* zu+5h~YF~xjTASVw)lr`1W9JeN{WIc&%Kk4dJnNGa+b3xM|MFIQ-0~P#(fl&J?ceZ& z%qJahZvA0gmL1V`_!42gKMh&^j3a-KuD$+iyxLXU zSLlB%CRFxcpGWHldSKGelxKED|1F-!GBVrV5`B*iqK6H3@Q0~C8u5Pk?K0uU`8Hf& z{k>bizuO~=i|Ice7d!Mn5g%Cpi7R%x{|56)(L2${ zPhB8>(DsjwiagcxAa!CZqF06CjS(OG^dHB{O)s)m-?;vV?_EH2tiF68X_kfg;KD9l zf9`}qoc&1Xe=-KF{|S%ZaabMNAA5Z&pE&3fzm2PXg9oy`f#$F1v*}SU*nuC`ePicy z&RTnq@YPNFNAB|6#q`1M|KH5wg1%U-=PC6Q2Z^)YVlyw0`Gx+cWV{|x@U9)}6UBy4 zy@tvs4*JAzbyW2~&*I{ppy_nzCw#*SK6>H9u6s7Q<;x$2k8XZ{+}~er?SFQaaqYNs zrrHffbQQ(hTHE}b_~@JYbMbp8X!Z2}DLQQXXB_2m9RU3Fb%?(cS{4BBkJoZ0!{L?oO99aDLhvJgXTcWLev$d|${wEiP%|q?b z|8xvk|6Mz;bp1K;1k-tXjpN5Q)DD#5jVvxDzZ1Xz4>4T&?&t%ioH9LJ{k`iSyY_}Y z`oZzPeGjbl<$sMIfLot`wZBqLd2M>DNsM`b=wpZeKSyGz|4#Dx9V>j>?`VBqdBnlg zk8P;)Fn%M83-zZwvt6Xw_l9%o?_T_!U*43TwtM%IUE{9!Xvmkyr!STJAI4S8Di-Qb z`)2dhdGkIH`k#qPrT)X4ECZGDiGx#wSJ!Wr;Dgi!n!nbrj~|3@=!9Rg2k8$Zjz8~= zL(hFPj5zA1Lq1t_53hN4|C1?MTsSXl*B|z%A7tLS_*qjI*S|3jkU0CZ(Epd1TG{`Z zqHOuO*yMYsw({!obDGlm1RGcDHoacS$4)WL^G^8Y#|G&OHmLqE`o2Hk@$!@Z3}fEd z>e=1*I@N33u7At3xG=wbYW)506t-7TSOfe28jmzj$Fb{ws7HAmH!{!A|E!GHqxmku zX=1~tE<^N{S3jqnZ+Y+(&EJ?k-{MK0cfz0R{^w7<-23c0M}&>H(PnsgCDf|5Y^Xjx) z!@XHoiS<$=nlk0AL~2oLnC!}DN%>Lm`&r9bR*=yO|!<7?sYNnwGZ^B+Nd_WO7A7481> zjHj4YET%Vbp8K-}+q=)K0lOi{{6c?Ubdu9#1)4vfJ>TNg0lI(BC$9@m2qwR?^8Xnp|)#Bv07{I zA@I(L0@j+Gpg*EWJ_Ncz*QMY&Ew?0m8x6R)re(J;D3E>;+cqLo?o%BL} z*;UDw;WKXNe^tgq*R|`{R}q_ek!PrU;-F9b z*oJu)mz1A=J~X})zS+!Yuzux*6E7Zr!M-=m3_rMFsf!nzIR!o2{_`v@*8i*H@9$2< z28Gq?x7k`3ee+HthE1`Vcj*6XblCIjBu;Eizh%RayKAC_-f}99Q3MVHsdPd2dO93!$!{#Jsqc$pK-$9zI@K9FYh-q zeDc(-1OG!?L*K4HoSDVNjzb%2yLJ?-wf0UPo1XR!JDneW4E?Xi#LE6JCA!paddd8W zgWe^1YFmDu#MLHlI>|WRNnRy(;D;aV@RgM(EO=!&^S%Xd{N+A7qhtN=QGIYpz5hv5 z{fc6>jyF@guK%0kokR?&Pcihr5eb$350Y>FaANxeiG##b8$ZVr7EkIje=?5c!%yQ? ze&vPXKhGC`d%~B)>d(G*%yWD8pl9#DiC(_7UjLkDabY_atF`t{9<%*a-hW=}V}7Ck z&6rr#e^DKHo_ke4Ptb{t9_LN4LFxkWcS87vI$p_6>ZsBU%XJ?)aNK!aSZ?Z1H`r*} znqKql{#TQ;xG)dD`t1A9_`+_j@!S13((M5|mz>Zq7YvsA?Pl_(Q)d;1;av}9UH5$$Ry=Rr`a@$5@S11$f9}oVV%ysgz5h%@{UGBjO0;!6#P=^e zAJ#9;r@qktwv5-K`mP;I*PrAX2c6jHfj;qD9a=9uko}RN=35>%s5%}S?BEaE{j2-8 z7yn~QXk2*8y2GCwPriNsLcU_A3p@~iio|XIk6uHVyzesa(EmR%P__N1=P~Sg%vH|` zq6hlKZ++By;DJ@TmZy5?&aFRu?ZPh{xaZMthp&y_Y5bFi?~R^a|JKan!sigbXvO`< zlIKUVX4(Gr{EyX{tF9vR4*h?NfvWz~7x>;qdS=M;n~R_8-r<8*M@9ePrI_ZG#&t3- z{xmcURZ#&cRf$oSY`2Y(3L9(DhFpWYg}FTLr= zT_0+l|Cyxv%>?=X_nP_@#hT6Wr;~_%{-vH={5+4$GxX1lfl~j;n<07dtslfe<<&23 z=Ubi=zXx-*dj#R7=$-Ny2fGTZc-~HmKuzY_Nkre0$Gtp78p&M~73^SYxRV515~Pd;jgr;$q+beno$OovR;Y97Tz? zUJs@J-)U$*c~0!m|6Y7h>c8^p>kH5NR1=%$-MjX<@e;rFQFY;gywAWhe^M{w!82RO z>*NnxZ2Q1{n;v~eX!xC8{qrjelW*tWda}4!KjQ5-R=CyXtMeCDtKX?j->jZo7^xll z`(wbybIMZ`YhavQ{W`wYqy4ddHH3!^+PDdycomXI+&is9b<9>jyebSU-n{E?rhRVV zu=3OIKC;Zn{m8F+|IIhh@84A)K6PcuLY(V1;aR_$l0bc-zle@f|8;)6kHAMiMdBdw z)W*;AY4KVX7xuH}ck2IN!f?vGhwgCi8a?3$D=+=TyLPmh5q-Wqtt)$6yZ~sVVCH6<#FFv_#kzG_&XtdLmj7NtG|<882aHg)s)-r{+bwd*y^@6?|n^Gf|4>@f7;2lkut_~v2xOMW>2F-M&2 zm2c-C8d+TEljr|;l;;&AX_l4EOY7pkJ@5^wJD23p|M&Qy)PHmq6Q1=$^KAQf61U@j z`8qT6>bE?`RW$z$$;VF7=B0Y*&aFQTTkw$E@BZOgVc6wc-+%Sg<=HRT{%5AtQ#$|O zP(RouHfyU#{Tw%XCk)0hUM~L7|6zPk+5bg_M_;V(nvb6S|D*hzX7K%=*{;txAo|`3 z&rs)SHt`Pr@RiG^thC`n1NT3?bi#h)S34g4%Hz+C=JNWtHML&qX{enGGQZIOkLalE ze|I~-)PL6=N7pBQZ05^&#-~25*ZkD4dL^AX^oJGJJNt^0?_MP=|J?NM+upbP>T&&# zZTB;?xY&Lu8Aqb6wNK*QhsXTM{Ll^kA4L)RVSMI2BZlpIa`Nnb%1NC2RabugMNiTC z+taR(AGGI-e8x-dLHfgx1s9t7!LJVuL(e~c^Dp~_*e|gE;dhUq)lt)VDvI@Qj{mv- zkh;jHZnZ=I$5E=X|GA7*Rfl&H=XJsF?%{)JAMtm>G|$Fk9*nQpNw0gc>EFKioX586 zUUID!?tOgm*8fj^L+3HA9nrJxt*LP*n6AD0Id5$BtKaN~WQP7v;)AOG*OT8dvAq~- zAE*bq{UCbSUJM97TEFJjT^eD}^EZ98_u1C|znR6Qw4Haj{{Zz#_CWugJfd&J)A=#)(Erbv zP}P4wRrKJSPEGu#ue|zc?R?AYYUi83F?+tnlRWR*b<7^5zkBhcyT5qK#67wf-)5m@ zulZIF_1p2!Ow|XqKHJ`MiL>^OM}nrO6ToJBPwmkEub5csKlAPp-O7F>Jif<<58AlO zPyN^_dMD#sK6VAB`KcYgw$aEJzT|6RyM@LsHQ&nfdeyP(Pa0WV=vUSKAA~iqz1Mi8 zc{;z+{m0aA^H4j;>7Um1wJy<7n|UyvVG=i9;!nqg2bx|pi;MZGTlG9Rm;SKhHBXH_ zdFI6M^;LIt-x}Jw{*(9LDOp@>d*uFm4fQiF=)}*OdDhjBT{XW@d@iQic;qSaJX$~U zNN=4FRUUEBCw{9#>xKuiy%|sQOZI>04_h5^*A`2CXL{KBk)MCzw|{vBeLMc|%i?0^ z|2LWa^(Vq=?e9~YzM)^5pFW?Vm`BFz5e4rQd46-Ttxsy({?JJroAVyP$pzGT6xQ`!vdmSa?69>KOnBCQ`iyut45A&0!dgwB5vx#@$cMm;(qanw< zw@dfX{$;n_cGCyZsoMYh5%K;5li-0_vfu~l8+O(FLNRYts@nfB4CgDo6HkiPugMa} z2CWY55B^SQ<5cz4$$0QO>4hQRANt=%pLBRww%F#5v-?*iud4s~%N75>rsK-!t+nZ` zC_Zf4{*!rxV!lW$^&j43(P3VulZ)T_s=WHa?R?8aPtp8t_I!&cd0zQ8E;a}s8&rQ7 zd&rA-HGVWbjD2i_FMe=v>-_URSzOZj9h4`Sd z|H`ZEN1|7+i7tFFtrveM^iK3FAG-oO>4mi?Z1m48<2C!SvxP*0+>==EZo1szV&~ns0V8E`E?c zpj+~*9=gU){Nz<(_{~*IEcD=CUASfWgAU(x*fHpF{R!{GAUf&xpXp+CFh8w_^I#TN zzuCD6gkt{qpwxfW<2ur+I=mAd;$Z5xajTxMhUlcn2bQPfpgXtz@L${i<$>*voF4w` zku!gPDssQvaRg^Sz4oH5b2a z52_EZF1Hi4^B|T-9g60kB6`><$6-H4uk!wr zE9mi0GESxo^_i`_()rJZFgOlko}pMU1}gi%spP?D|6rK((|B5k+4QT5U(@=k^3?A^ z^svDSKXrs-51;S6Wg0t&)QMF zpRYHZbN`b+dGK$&*cNC>D-NmHl5;1}gQ5{#W(WC3*C# zijQ8SB2V=^h#ofB!5{elokQ;UWUp&i&gqN&{;`?G1;1Y+-hb(G@6@mQVwKnQyRLo1 zHoatiq4-i1sp|h6pMU;=)Wq`zS{+)y`SaQHElxi{Cw$Yx22)<*4?ArAlRY+HZ*2Jb zu8Uvz?x_c%WBZ>T)d#zd{p<_)X3itcbApxEzcnPz{6g{N=rDcFr>^w< zg7FNyL=Rqlqjr79F+Jtu?}TZdPwT^0?4%dmc3+>o+O>_~Zr$wUux*Z>{r-aaC~h3v ze{3H-Lyc$2LcOK?uhxV}pQtkwi^M>w|4#Vzl?36F@0;THPLMd5`mw2xeB(Dn$KtNS z?%+qS3d5M4|8@17Pka!@^xXKR=`zbmuI8yIFPkmsQ@|+-R>bLWs8n4=3 zLm@(?{;NK}<6}P5Yp6Wppx1HDrrs)kO=dv-r99>VqK6H3@VCDIeg5z_t{JyI||F!k_GuLszWE_dk za{gfz<-=>V=QHn6$OVR#{dbZF-?saj>LqdGCH{{6&sDFf=oG0R8?5j%eiepe_g!(j z#sZ_marx&q-RyvCsLS^MQ*_*>;*#4*!XF31oG#_3~?WQ30g+l&dq||@% zrnd8|`tPR+uRgh*&p6BzG=F3Ee2Y^D=!B1s#|Beg;tzYzJOA6$mv}l%>KlE@q*DrX ztp9ly7xp(*-~W22JVmi)+5Yu?6Mgj3_q|Xo5mQV3S6x0wTR(Cc$@Fx-#6hq5X497{ zKDzKU-|EK(Q{Eu`;lR6&IrhM(UJv^{a?+1p-TMfyc~$2>?f>5!SzLG>h}G(MYSTBX zKNm)7hhoVXDD_|GSC??>`a^U;<@!)ILV6w^HKq#pBQE2jCW-97XVpSybByH637*7i&P?R&mnVff|Z{g+N+ zy8tb}#v|qH{OT*W+txSwt9hYVIuhxFckOw6kI6hN4o{IdX#HV4_-exgO*e^~4*6iJ z+riH`VYQzfvfak@abe6ZhwXCprgik~_@^(6iyeP9)OPJCR%`8@JT^VAHhz%0YZ4EI z{K291pEx*Kc+_vtqvqr9gwzGXM-Mwi_=f2`uoct%)DBY~eq^}}`a|-`Q#%yP##HOSi>4hHo1UHrwf&gp6K9_3{W|c&1Mzo4 z_=eQSJg^lz>4l+(UOnYc#~mMr&9le`|C+kB*F3)eO6Ql$>Hqz#KRsDo*lx%d?<~)! zwAG#ucz+gD|XTg;}`$l zrjK5FK-g&Kmp6Xnqi4{yz?Ec1?I;voDU$XIyn<*y4iAmK-<` z@|tJ;pD8@s4>igY-Inqt>C7laQ| z7l>cjnH}Ha2jLq!$tcFTk$XB9)5397{1Z>w!3EQ@30@S^Z!#+A6!NM|BiWs zIv#aM)-3y<^!_IapZ#1YmXE2W{wv>f={MtJcL{^}a{Pu3QYVPN6T&ytx=ObCJL!dG zuX^v)cm6pl3=MmKeXR$+?KRJ~f93^_n*H~`jGrZo>5Y;AJo?RcPo1GyAredd*S^;E zIwJV!8z%ivd8uuAwK%M8_f}t`XZ1VbTfXwlPU97JIB(l$zkbD6^YD{*u3q(xF*}pb zb@R60>B-{aH_-c!p{L_Yw7jNX`sT^Yg-e{ighDQqsO-PKkHF{kYMAuPcsj1x^s56u z-9D(-tDYfxiJ$SRuzSfRC!c@jMc?RN^4c*ohHw5jdiMM4DOp_Xdw})z{wFo{D~i=x zdnb>nm;0t-r`s2HC?v6}|L~ZP=`nvWm+`spF?=x1!`}(Llvh5E!=GR$y|7vRnah5? z%oAa=-PYdtZ=+k+pG?Z)!t)Q#@%5kD54-<1^-WfN`lbF*43}|xG_HQ@*EI1nFWc^G z314~t&;S3<%vGlTe5rk=gqgRm{K}1+JMUy3mA^llC~=NMK=BT|-{w40p7z)7|3kkU z?Qx#pP>hIy()RC^KSR*^k6wz(tN*N>UzKP6p4sy)o{r;$Z`%hp2p=0%f7trCzwUFu z?_LSpHf}oP$UjeEdn)b!XJ&EnYwP=OkJ^f2&8GiOBHV|>tIaqd^9#j_F`=^mTDN^3 zX^4KxN00X}Y|#3pdiXnGnrHK&F6O7$NiVGY=toO0u+}_bl;8H8lOA~q{mS$IAL{+* zYdUXG#}jXB?bTnp{wEg>+dXxLLcS2wKipHm~itWj~&0qhm2?EAEsq-;rk@3gK-swRbHE(CSqH^sJkJ#p^y`$ zmHn5))7EifP4ajhp--KlcfvE9aTW1{Y5&Pb-)z;LTYvXrpRQTk{ru;;7hiaI?>c+# zh>o3q?#tq0uXpx8+Apg!sY9}6IsYKLmDX`XE)2#)Cln)Npwxfu6ZGu2> zFY(*BRsFAt4)1TKqkOM=hI8o;>;Lf8y@&0+d07AHnR~ze#--@l_b$|-m>(F=|DQ-* zhROJuE~dA-IBedz+D$>`7Yg~JzN-H`Pw;KKr%tE34Z-yL0?VUbMf{!6>L;J^Qd|9< z^uo?(?(*m_FX{_>y?)>~PyBl8`12b&k7hgC|Ia4=v@Xe-W&2-Cb*QrfLm?L`miq4` zpYuq}%d6i|b%}$-QyV|~LyJ>iGmDGy+<$1Jt2*KQMZa+O&QG2i&RJ-;i-ukOWBO>v zpEVs%G2csn|J6`G*wtQ_ShMuswZ|EU*AMd!h1_7Hs{d^7@J)yD!J3T6{~p2z(>(m0 z&@&Il!LGngdSRV2ZW{6CB2R>M{`=k~RN>JJ5e8k4m4v z)L-tuoG3DVc<5*WR!0)2uf##GywrxD)?xF}K9}+;#$_CkyebTb{^m>5wtpvtBOg0` z{>``9jXXR5kY{n>^_k1xzszs->2@Zp*7k0;lSg$ez^iTf$#xV9d84T8zg!+?@2}QB zbQJCPFOSOjj7uNVJo8TxK6Z+h=Y;1$#>WOb_`}5WNB!;azxIYLw*Si`A6lAt#;GqCI}|e0()RBpUe`S9 zM@{@zkLnQzz2=+ExK(^~lepEddg#unpE|<(tDn5dPp+Rn@cC=YSO0a{ZRptfpPno( z){jwf2egrm>m;78Ew1&`H}unfVTVF)lxX9rzU57pk*J?ML-@+8|5rQT^0Y2=&9C{` zDZ)2oJoK=^4*sy`Z|{8Znd24?`z(9Z@0`hCtz~gxe!1L!>~ed3HYCm(p4mD+ zpIgy~XLc?bp^yQq`k(I4t&XPTqgU#ilQ@2_Hhgp}uJa;pew~lyr#jeR2Yv|mUUKt{ zg+iD(@`kl99@|S_I`04TEG`@uQ70(T*6|R1^G+hn7e00<1c`%Q z{o(KxuKDuP%YGP6zh{w|R}H_Ce%SSIjVvzohq(1UmpE(h zcq9nlj(;@n!DODHSUV<``ma2GM`Qh{MY`R03ti%1>ZdLc9>{i>#<9^eWF9Vy%V7Os z%imnK4){daNw{d*pVsrGePKEvY*c>%`Pj|D7mu z9kumI{XRkBAo0}3&-P?->S|6Wa)Q6qfAptFzV!*c6qQ&1Zad%d&{H&jK6}2!lRWQ) zZ}Y$g;bVj94-k~h=^{qFHi}AH?;w8TmK5@fA`q2xc|FUOu zk+F@i(L!6ie*01vqr?86^FAPXo^cejWbx`ZJ?&fSCk}>UTvW3DCw)lIr`h{bGp5;j z3F-tHH?4nu781cfME6pE9ZGO%7PkCWCbnN(_c`4fU zKa2}@Nge6hYnhE6t`yWpJ+l&;xzqw>nf89%%csq&~}2J#^>RANKs_wkOVa!<4Ys z1(zJW{h>YR+41M(EH3u{zYVpuZpIaFc}<+YVOPyB6!Jt>_FpcKYxO_ZdO#;OeFS~t z$2Ls+oM&;NUejUTPUGNQ`or$|Ye(FEKyTP>!k_n@l(#G;fL^ycDf|ows+wH$OI*@)Cbo?bUs+|H`3%46E!~j&jaIfA0%wXFtu=m-zF}M44aq?_6nS)1 z_CLMAbKG2)W%=h{^@5ki%Vsmleix&sxEqA&j+9W&4fKp4b9z-d+poXjz@>< z-%=!x?Sk!Cs%U+FPVLh9=Zg7-!pGFg{-?+Pxr}7nzZ07}L7(`sZF}fx&ojNGU&dFx zlFl6Z!-kXJdNFGr6Y^V6xb+XaZAm}>)BTUkruBT-@jvzD!m#+#S@3@{GINRBY$%9kB)ou!BF0`TPPmp0>cMVT~sje*bGLT!5aP|DTb? zh5z%j`p{JrR%>nZbKqh-wD$^ zuj82?Td|W~82R!l>s)!+hhdfd7r5v7*G}-7XZxStEH3DPDbUTpD^%S|}?M&@DH zU-e~ivGbp+>-FbN^@C1rMX_c%{#jAu)T#Zk`Q#E0h5W#>)PKz@U4Md3vt2)(w|5f9 zk8P+rrFb*R(>g5AN!;dv4R+v%buanx8#kQU4C^iQjk|u(809t3`ad&^OX>Wl@)%cG zlT^z8U^W)>IDC(`e#c?-L>rY`&c7y8yv-NX*X7BXIs#$7v)>v)+@sSed64tmuw zn{hkvEAv#3IP)}{IOAZ0>JO_u*IZ=9(+&%(|M9keJ~eiG^6mOh_=B>UqZ2ERI1wk8HU$c36p-Zy`T?T9SNU!6{dNfd8m%r>aWtlA9i|Vsh=J2 z!y@c(;Hf`e?kzj7Y~TM*$>L)7->9jbC+NgZ*VJ3rIJRxK)ZLKGP{;>@s{WTghfoiQ z-VD*JUnaV-n?{+?|iNG|BsVY zw=Y5c1FCep6ISbZGqp?mpImiog3K=zTSZ4@|EIR|y%XDz`6_RY|Nl3shq}PhIO+JQ zO+6L@+>Ydrk;eD1MYSo7e&uk*}@ zedyc%2Yto7uJ`}Xv$&M@BU*<p-oFma!SZ`$#!6T=qAJax>AKWu&fvX{)mK6mx0{k8b}qu*EV zf3yx^&+_>zz5iH4^{89zP;3(eRsH8Yi1ov%PKv$auOBD~AEd9K`BeuyMfirwFWH0i zhYi=?;FI6&&qqYs;Z*JaSL^KUbyemnl0sP{ip=QQh6Qipds&Sb&*p@Q&1t0Q^7 zyqi7W@-%MwRrVnLVeBMN%e;(7p7KlkwVLWS1P85yZYR`n!(Lk* za7eT7?XXXMoi`WydEqsW@87KtJy~4*V1NIp`plcYq0jTleuw#mVh0(A@mw@p*JaDk z16uuWXuR3ZLl5*$;;Bu2yuK}7o4ua#RnKhZi(e7FDh#V_x6E6={ly+()L~B^{_(L* zuX(or>CNK8_TioU0LJzoxx}m1)K}NOc_&QkKXvPTL$PBd*z*n_oGkgwhv(1u=qaz> z)6Ta%c#7t)&7N=ZB+onHGf%Sz=?`x2zVH5I!|lWH#V-2znj2R|$F9Gfsrq0;?Yteu zn$7;lNrdyCUTy0e^T~x5ik;#E>nD6g{zt(2QIkA-K9xrt^ogH&g7CmneVPv+JwxUJ z5A5LYUUJ1T)35z__3kC#S!ADEHoF!*JO48?i;ErqG}NvosN;yYwWePBhFzMU>V{(H zm|WR^xjjv5|5H&1eFVLen#|UEsRut;s!#Lbqi2X7Jg|emd&KaQ&ph+?x4TE|`(HP& zmyJN*_Wv`pxLE(ae*SH!Us0@ov;Aw|&@as|7llxKJw7P)pLz5Ouhgfe_`Or?6=WXp zL90V`&0mu|>=e^H?}TrDY>>LKLG_2lw>fW}o7P`DEb)#z?d46@;&p7FfBUky*yk|4 z{*3o4L&ix_$FuEUZ4bh4WN`__E)h%phd)zz^u?aXrueN7%B$br&bK_qRW$$P+4C)) zq~fz+y1{Vi;MMtZa@F7D>}>rw7MG- z4~1O#Uh2Qr%Y6(iKbJf^9#9@}&^zIotvtrZ51L-HB2V=^h#ofB!5?dHDCpemw*2?C5wxl z{~s@ZfJ(iFTBopEYwBgY^-f~8-(Y?^-%#ux38ntSn3-x&?HvIxqKl23Pf$4T-{<-Wk;~T0@*y@P8 zwpi*r(?es4Ulgm~ydU-0_rHBvTx|c(?_YBDgQ_Rq*4jIHOyAH?=Z75%`Qd9-|L6Gp z&z#=>CzpEZcZ%Lg+~!ebIgq<$Ff6E;3-phgaTt_lT{+nj3F7Vzps6QWxKUaeWSGb@a$Qtxt?!6F+Oylb==F`?EbyT*! zX`Xj7j`^__JL!e)`$ych`)?jr%;UWZK?hw+Jn-bp;Q@q-p`2p@g((+{U{a8CU-$q(zT zb;T|hY;{H$|LZ4qoi=2gS3Nub*&{sa@XPD}k1LP)3ah*(USCdi$ZM#r$oxVvDF#aY zCr=T+ZTERhYrVfKk8$CH)CJ=2gzyb@oRY2nPI_U5asM2+?qN(AcGl5nt@e*cJ#};c znXW7@=;v!J5kCNz=B;_~B&+p)Tp!u40MfCXGhz+KB_&cF@$+*}R*hw#pI{e-ZF5CZ;Fy@fm=Y4XUr>M)0 zKXV;VF^`9zXE$ws)X=ySOxIrhb^d==ZTyUf9g6+4&z7qG^E@&ywok*P|HgAswsqW8 z@q8JdcvYVIJ(%$EqgRFDhV$pS{)<~(8SeLQERw%Eg??22{%hTT|Nfilf(~`iH}fa; zQeP+zhzh0t!*9v}@GJL&%Hy~TKA6^lU)29E{{QLlM_2paD%U?0hQEB;eBWOB1n;B* zd;e7%f4*M4{%YIOyy=gksLZnePrv^{Hy0-J4~1N?R_ed<>x%GgyRV6kPvXW){8jxo zena&@&9^*kQ0MKj!4CfJC7!wa*5dGayO;RaO;?`r+Pvsje*aa|`+rgwsN-eH;?-aJ z{R?^2r*oUrs|(c$R%*I9VQkEWty zpTC+}Tx@=e{^YaeLiFJatGtFs-{4oxFBAtyk*falxt94@UD_wcgIE8h)C3>2I<%kW z@0~s0;`ASM!Z$r^Fy$rwZnx)NyUcgt5#7Tly?WSG_j`2g`oqbp4|4opQ@^5E1N-|f z9%~km#uDe|?#DzUgQk-nGZkP5smf!UJvlpkCF5kDg)58zjGb z*bh%WYskhEx`%zd*wRnkyes;){ZGr{Qab^busd)P`Sqo=wLsWUSp+ zz|Mb8&f>!NPJTt*|2NbR>Nw(U9rsWdee+K9lusNCgc8gIHz+8N_4^CC3?cJPnXjR{?6V)hU+x)KQCO|DS3gMK z=yU1yt@VfEuqaa1f9jzAQvc~gGsgY{zE}jWZv|4AieS*Ns&6UwNT8JSN(B>L;(hRSa7{ z@ux^XL5rgYQupLUHy4{c(;+WKbOz~1FRb;q%Xi)USBHhQA3o;2Cu-~uoNaIeh^!uY+pyX)<4qB?ek66^ z?}XMb)rZgf5_XDdo_A7@`LPw#{L~J+obcAKb~|ajsI>ZL=ihRPP?ujK{{CV$@6@j- zRx3ZXE3ZG){!v#bj*h9;f7SPj-1iE;ZTCD6+D~}(C!$6c&q*UP$>+4$Tm*7)_g=-c_{UeyDK#rt2aW6wt}an|0+BXqD!-#<2# z*A!%)p*SWwO8wXQ@jvqLOZ|84dd5rqR-fv^15H=!G{5q_>KV?ZKO8Zt|CQ5szA+rT zrT#n7uBZLT#c$h#^5_@n z6Tj7=eS-&DeN}ntN7wWQ>1Ujs3-sEv%8fY z+XoL6R%?ylJMqys^P{VAtEZ3)W=s9oKGi3P9(?LDO!SPW`Cj8zhx*}xrq@WuOLbB` zY_NjQINe=WU4856KUko9(Q`gsvv-G2sLT4V(@%Kl=d@^p9Ea-rB?HhL0{6cYjRI2R%k}?qW54!&@$)jJ2=z&&WqasiB zJcu4P*umdD{D`+^=IdY6z2Ys~eR<)ZUV>h!|EgoxKP38D#X`U9+BfW~`Gw-!QK{5_ z9gpjHtxq)>*q#UF5eI$Zw>orQ@Ib3i_06w*^bF_Fk6w7^p%>nNee3zd$2XjvT~zC# zuFCIUR**lyw)tv(S+bx*ee})zxg?-t^%UQUfl~jO-&DziZ+$|~N!%;%|M~mx4PWtd zlOFZ&h2f{(xMZ=%!t}?^KlJE)z~$!f{~xs<^~F_3ZTdzZ!BBiR5-a;ZMRefv{26vh z+}UJQTh0K~?{SH84&?9+^%}Z6|2`QXV!)T_FBW z2;Wf0E7?gMRk~s2yIw!}*JB1hnZAR{Ll5+cpL#)fpgq4;dFn?O9kYpdWpPRUj1xY7bf>V~t<%CkzkBcN z8`XD4$F6_q(R^?Xz5lRNzoJ;JH9B77{BNWA8^UFtR!?z443zq>^N_58?SG2q=LtHo z(SwH#S{&8*f5Ot6eRstPJB~!h-hcBfF6^)KtK;`y zr_wjWn()%KcjBolVyF8J>I{YKuuJ_%XQ=4GxAA+#PyNI}#!YSb=vX{&*Ry){`9jZd zF8yKLa@Sq+*KZ#aHaOtKGru#>-i%}0f1lO?cI*8IwBPWVhp<~~{Pz8elUSO^^Bam2 zBcZbYVF#1Ke(yQlQ zfR5e&oH`WqU&hlv`}t-T7f`I$8XfP%XTN_@oB5{OT_{e936=fVGqu*IT=Y#(^@)Q% z@mqaKe^Y*Cp6U@dy*c!UF;o9hoB7Qr!kVYgx9(Xlos6Dcf6~n2LZ5A%D!)^Hf`h;R z?a5J~)PL<$`hN=6_gu$Kc=Z?5?*y&C%40s}pVrR955o6Oc!ucBp+Af{cb=0!{PoDN z=9Rynce^c~M$gVa_hoTmp83Ig{eO@8LB_ACAKQs-|Nn&_UPI!{FBGRlN2&kltliG1 zE<@#^2l~Wsebjp3fu@(#XL+iJ?%euAIP|#jL;pH0gtynd^ZV=7(6jAd$7es0$DhC7 zJsBI!Di+hzzFD2QB#_U%Lvd;hl=`oIN{?sIH%xeSIh|qO*E5dQp?ve_arhG7;^>1; z_@;*qrabkd7gl@k{WD&A?7gtssh_U${_72|ar^(rzAP^G`AgsbZ2QP1&e}U331aiV z3E1@A>du89iqqnQs{Yr#@Tfnn13ls(e(xmT?4r}dQn{|FuGt4VGsPLB^t{dW?tYo7H(^Xzy^ zdBj1V_^m!2A0BAWYgL~5(KWq6`WYv@dBs2LXKpgFyK9jTPn~hv5OnPGZ*LYC`~GV) z{r_358@{kwYy94ckG`2d7r%FcR!{MR=&0=f$acQzCH0^OdiU@1;DPY%xI^=8JZzBh zvB3`haN%iJ{rxw`zaD=1)0bX&ZP)Vk56vttR>u-*=LzaK;%$8oPrbJPN$Y7!2J;TZ z8PQSM|GEAC#kG$^T_EFw_Ihemvtx2)|8vo1Ue^e{1L#lf&v8{o$)!M^B(G`~1cBq_}as{Q0t{E9ad&58h>QDfPc5e)hBUB@~lm z!1}Lx-U+|Hi6DIZApR87I5zXG;-_(J%Xe8^()_{NVd%jR>^J4{%|&IF{m<_IUe`N` zgJQMT_^G!ZB$n1i9DXRyiA3walRV?e(_^2Hc>WCGE3e)o`IY19^Gh5Z>=da3zh^wv zF?*2yu=l+4zde14r^BSa(U(j*r9j_~KO0$G*iYO4A0>YB7+uR`k6c$l!`Gw*)-{Ow_fubsQ| z)?2o|f8RTc3)=&j@1_4gIN1K{c=fa%>S)S1%r6u_lyQ5+@0}t#W5kBfc46;N$|nx` z#E)&*%;J*r+0UAvx}ESnIG6q~=HWZ89(Qb4SYws#zVyvI7emjs|C){iE+#i1u;*XL z6;^o-udaPV-!PXP<`;_dqN8g2=RO|rOa0e+h=Yum+W0|>H)0v~dqU#O$81-@Q$KoD z7`8ur;*n#1b!ymg_>RASYLAKN+VTHn)dxq#>knUIyD-#wQ=eqbvj5S~t>|0d$a9hr ziu2=x%Kqz-GV`(VbIE6X;-GgDH=A)4@q?xdPxH}7&oJc;k{=F!bJz3U{9;eo|Hx}U z*k;su=-cs6EsKlof3A$bfB(?N)p3M1&~L57Y>n5oZ^YC2G4D`Z5K~M2XMWQ}89sFx z>UjKKvwmSa-}2B?G=FpUe2XV}-U;94g$=^T2Gt*4Ib`|{KU!#FcUfp!**8lJ9 zmBpp>`-@HE_rE7v|8n86_Krt_*!KC0dKwx}>`?qD21@;R!e`C(RcRewdDgF~l7S6c zzjS>3oiNQ~JoD@L_&e!s^45#XT|NAJJ8$xSM>n?UP^t%uY8NA{xqK0j1$J+JafXZ zpUo5U7caf$&#&jajw{bUOpPDFrr5k2Qa5X_Ob@Al|1UV zI(ia4<<;eOPUYiI^b}3cwd>;t(|Wz~&5sQ-4mPO%;Qqevy3g#{7e*}Ax5amN+1_j1 zzJG6Kak1@d(f9?}JvQ%##*=ZxJInj;N(nDlTl1M;C}f9S+5ah$2jA-GQ9Y+PMX)ZH za}B=#Nj;1MTA!2onxFB(l$ZF!3jP1Hz?qvKABKPP7ZgbR1Cih=a_l3gLm&1Dap;z0+~QIrO6! zcK_3@n_U0gdSRcTXFsy-^ADk0Z>oMzg6r%Se}D&%`3S4E#t+grY~}@8J;lXQq||>W z+W6sn{2+bMy19=6e9-Dpe(J|gG0n5_8J9W~)BMy96JHr}$@mLS4O=bx_i3X(eHK02 z{+n4`*#G2zi>E)Ap>KxC_>$E+9<}?=Cp?=^O)^69QyH&EZ0{5`kKYk9kJ9mv@`!`z zn5{g<#}Arrq6Z&6L-br07yRf|Vc6w?PtRZJ&L_fd_dm7rzz<;1x8wg_)dL6l`$x%| zW&b})b*M9+y&Z~6Vu0=3JLQ2h#b!SC{5RWqbseWZt(|XqPWfp)lV{JjJow~;bLkJG z9^WY}`0hht^wj-+GhvU@z3SQaKQoI{5Eb^1wR)*JkwFWcNz!h(2riY@YOvV&%FG4_}MRBAHK|YdeOD> zKTRD6TvmVo#e5Zo)mnQekEy@3|4-t)-a|1ZCRX-;RnevXs_maR=v|VhHua%n{D$x? zp7OE5ivFWlg<;dJZ{Pf|H=hn$T=l1;Ro3sQax<+4AIkZ2I~)B-FNGMymsq?VWVZ1 zUv~f9TfcwbEsKlocV?>HP^`71Sgp0ysdd@^Z>r64s_}Bk4aKGLL8<@b%@CgT2|ws0 zZuftaxAVat|2Nz983*)E;#PN+JxG7p%(KkKS zXTI>T!MX0gj&J$Q2S0XF2jiv~{MQy+`|O{E(7n&u=eZl6@S11+@6F<3*S{^R_n&O4 zAEXajheT)D|2ySVXFhv76u*dpQvb=DDtXL<_f0;ZcqehrkJe9b=UX0niso<3o^SCa z&pY9lo{vHL!!k!bc}YHf(=cSO&);(Xm=)QM?Dt30RA15R&^m4Y)TQG}) z$ev#u?-uTYU;6!fP4&3`kUA74+FGOIo%rZm>LNMN+h=blq+-&Ml#1F#fb!vX{RL^*G=@0Ae z{K4o4Hy9b#e{t&P{=Ut;=-KzbjVvzI;VXXsE3DT3#%w2#?D|`^d0x0zx&zs&l(zu+~G^M;kj z|L7}b$>P;dy>;yyw)Ktr8X^#itKx%F|CPsn%=%c9JlhZHxWqvlSL0@@E}cPdc8CYk7>T zX#Od}$4-&?c-1pRZw~!o&BGUZ^qk8^hqZq8%gXFuDt(7Lzv7n6u*}7dc^Ntf;F+>QH$<0kcX(hKWs z{=gT;lX(Bbv9|p-G@gtv-q!QM_Mg_neDH_j+L&6} z{+*69SrESUJ=eHXR9;%kgJ)h&9jGQ8RAo0}3Pu&)$ zzGfB|T+W47wftv~F0{ZhMqaNN*v z_@_OqE#2rv$M!$XEG|~ZqHlfn1iW4Kx)*k9?VUWf`)@GsWPbD|6u*fHrT&wrdhn^Y zgsRW`4ty}>HfnmTD2>zcEHltT`{6{{o$l6F7~}kpW0*e z{#Q-4!K6PDZG9f252ZTE(|(2G`j}ePf3BlrKDOO!zIQS*aWM5;9XdWd&~zK^dX}f- zpgXtzu;VpPjXinh#IW1C*R6f=*w*&{4H?&Twa$E~-v3!`;=(SkiPJais`-WDhA2|% zzY}e*$p64py$(|zeFd$KivGh(G0kHf^J{(h)BMy9ORf9h_{&!KPFQBqsh6L9!HeW^ z{*&uKLGr7Pf3jquu6lajt|mlu&<(|nF;MEi_KDBY@Js!#=%;rwp5-Zz`tXCMTa~AN z52A++cJPNW@9wr@eWm@v8b>}e;oTj}zkhFJaiLFssDA%~uA;D7Yvu*gH|(nUh2ka| zuSc}JOK_Ul)+g#~Dv!F9SC`94%g3Ki`6)KVkA8|)KjV;}+Jp3mW&S?z3KzbATUd6F zJ&VcBub|K8FSdJ-dhGpJ>y@fo`}@=`{r^Q;UrjPXadUi7+WwW#_sCX9E_qeYr|~r3 zZ2F~$AN0znALb{II5?O7u*t&X#@)61=-|g4{luLMoX5EI+s^y+W^wWB$Lmj4fCoyn zwWe;6-;H9Em)fDYC5n{#uk&k-AdgJPsh#vwyygGXv2hbVeXhbZ&&IFlf0YjYFznGM z{&c`iy#x4hxc-)7(6i&uhR#bde_4Njm#ZIiVyA2Bwd>ERvnEX9Y`>wnH3mxkcfzl0 z9n^38#iqp3BMy2e@zjRT>)GPendT*a%TN89XYs^loUrBi{p;r(Ie*yd;?s7y?pv8x zp7no978eh0spo&3`W3}$tw(Yc~C= z;@4ss_WfxoPxa7E=iAATURddAh7+<$BLj_6h1|6zH({sDc(ELptzsn`C0E%6hFABx{*pDmUB zUpI#Bc*dT`n)uOUexP?r9(_@S2d28_Cr|azWn8m~SMbs6UU~hs_M3Re&D|rrUb=hg zo2}2kdt`B8zi7C+-hUfiMPZlM)K%BMVOPyB6t~N`J)-5EqK?D&nAC52Nj=0tbj%(k zpVwt6kGeqgu)z-gaMDjdzwg~Ae;6+L?y<{m^3&J7=Gp$gFN=$vhge1X-=ls-v0CSw ztuIa@w*6C2P4g9*UnqVT1GfFcSLA&JzP*n$MGt?9-nGZE4f7;UeL60B>AcLQPA59} z$*aOJcA=@KZv5mtlGW;;egE53J#Y#A|G`}Siek;?_}@vS&V1Ci?Vq|e9*R3+LRJ4u z+atP)Rr5^!^ao7iRW^J>=EFR&6{~dchoirK=|T&>|4E$mzzSWv|9nFt-U;RhCcpn> z{u&onYi)Y^zrEPj|E6%Qp5o5vu>L!VTb|CdzO?%D1T~HxJZv!4!`}(hJnwXT9oKC2 zchc*4|E0;Q53Z)~zq$HB##fZ+EYGK2H)Zvwd1{AZT1+kV-$l2r?0VE>T(3Ol4G$Z% zIy4`DCq&=!b-a?D)KR4yhJ1hMe;<9);bFP;kDfm5k#o_v>p$}>F1+sZ+w}jZ8tMm~ z+BzSxX6gTm8c%qCv(Gy@tkh9$v>4??C_6w%^~W%{VEB z;_jGQ>c8r99gEdbi*)p zAAkCcoiW^%&+rep3E;4_sFw~F?*2yu;c}QefI7z?HHE2;jC{B`~eL4_WSpq zEH1V^4vF`lpTzzM6jp10lh$d+|4#L4idr}G3&rnaz{Z0QYCZ7nb=Zt)cKkqoeP{8* z2fa)9i48ABo0rx-w>s#C6+Rg{>EQcc3M2Ygy7q|o-eY^@_>=#o2gzf8hSZ;DaY^gs z`Umnsc-W!1SMs&b-bK^;TG#gx_|$8dJfFr>9kc0Q6+hQ;8-k@g)$<^F*kA{L_Y$|g z`1!kj(CA+Bqrd<4j$=PU&(8n!WpS}SZ=m;It*Kx2#VW7quYLdMG_TQ)%r6x8#ej_m zADk+A@X;@!^5*#W7kRrrtEmePp$L+-^}7- zedtkpSUmneiTw^J(boPawe9zh)KgQv#16$DV!*~F?v$^2)(`xO_&Z_R$4-3WX`W~N zw9Y}+6V|@{6n4SBz7pKM+xh{>Y7e2 zBUnFbVt3sCFir)c2ip6J^36{^b%Ar~4}0zQ+Qr*EGcD|W*_fT{w~V!Ua{jGX=Le3} z;~&OV6jp2P)o*&a#IV!((Z5jKABm;@s~+E@SwGMPlYSXb^UbDTRebbnSzJnas^>xU zu)z-gFm~G6dmjDf(6H_nx9oQ2&nD9!JO9>`#l`wDRL}o3)SoNrIAXPq`%Q1nb{_ra zd0>8_cpwsOJU9D1_~hI1G4riSUV6U{%Y&zgzY{Va!?ZqZ#ZG!*#L0^;v*ju-tbE@F ze;PJ!Bz@ud$G&&z&Ek^3uJ_;T5!?Et^CjQv)A@1zLt2mVa?uOLgYkjsI`JE?DK3NqzH&=dV5Y;e*y}9p~+u z#l_A$_o-cR{@FW?=Yp)M*M9#?oekwDb|@Z-fvWza!+d!EH&i|1pm#E!*^FDo&xKF@ zr99R1AbQwf2Y>jc8~xkIdqZg6w%pez+%pzE+y6APxY+A^aXtQNs9#a6)_y0o>!XxU zJ^AeIP&^z1gYJJ#bwDRJ{Q`aBr>`J9(4J47pZTd@_0UDfY~mgGVVjFSUix=Gcs=a+ z#zXg=`$6mbmp!w%Sl^p!FRAB0nXjTm%WLn%S05*6b&}r{Kl2O4BhgXS|I&FVbU-J1 zyl(6Ab3XgNl5wn#uJ-YnFT51fJkNNlWA-5Z-9uLT$94}tcX0QRF=sq>&;Q5Vd&kdF zRsa8^q5>b#H>8k|1TIPFZGq4O%w6uAUX~V$2utWq-t-PZFoYh;hbC%(fJig+j!}>% zC7=RIF%&T%dM9?1$*YJS zG+wRC^u#G2yyP_*dmI0-_>!*=zwL~b!{U4S4_-W=b^Nne76<1Yz)~H`FW#-C)uVR$ zX8K%wSr71ro@vog>c3NeG28ZReah9Y`FNkyuMj=_pw*%Nrf<&P-|X;%PW+7z8BF~W zeOP{@)!v)=2ZUj{JBIIj(Vo-bvHkzlEDoOMYjt#~9;~VC1X+5O`5y}St#1wSgg5j& z8x^JgcZeU~vl|kY3(@)y} z!TIiHK9#>eT_pbgYc=+hhDkkAW$W{p>ma4NlX-@o=VEB7|H@xF{;5eO-be7K^G)^4 z6D;{Rl$ZKkh1^Fy`NE;=^*iI!b!UXbuNZaXysIojJ$C+~sd+)3KcA}}bRwrquX_3h zzkP3Fez`=3p5Mk7rT(j*c+Fg|6Xlily{57gWPaSYhYVUB%8R}org2{LnI2iO9bc#q z{>^D4zCA7Eh5yQlH;;qI?!WEM;%IyRN9RGjM9s4O4^bZc8nc%}&+jB(m)c!#EdBns zrgl*M@vA@IJDz?}CusU9;*Xpn{$BGKc1WDb*xTqkhYUaD_g7Ckt#im5QyYltN zamow+*N=aG9}||gecKbWT?cF=3Q$?g1e$!Gi;Uy}breRyT>Q8(_|KMSuP zzUC{Je7uZTKKuNgl*PgO*w5kpw-PmT``3B$+yG?z{G|`MM1&sMU`zdnCy1Z*(TVIq z^}`NgPi6Fcj+vdh5aJTouc~jJ1Y1=4_|`9{w3#s@FyQ~ z1(L4{!}^1ld*GI9pAQ@SVac2t4H~s|Ag{A*bpm2t|4 z-T3BGA8y|D(bxW3&%%QX+_%U)|IXpF=g&}wqCNj4$&)1`t%tsu9$w{P{-H;1*s1J) zdjEez63_MiNAp@eP2r&q+pl#bdX>Esyac8VHG=xcepLyNh9n{1AhwlNP z)mPK}iXyePcX+`1X!WPg#_Z+L^Kvv)_J5efRn&0HT&Uh(`H_^aj@dR~bE zrT!~F=P|5L4dJ)#zp9^FhsCLE^)!XY>@|^jADCYGy;D0fIG6gcYUkJ)-@Nb6FzVFC z&3i51y8mZaJkg`QWVqSSwlt0(@(qjiuM zKG18t$y!&XKYUejs)rXIld+QrS&=%zMMuAQl}1>mv!-^M9F2yd=>kh;_^)>-yH9RJ`) zTr%I#Gd+e@^`GZ%SU)u0_CGF$+xIi`OY~l4=3Cm&EC%&Prfe|{u#5Ugw8((TRUT>L`M>+waY|4!_s=a1#0xBZa%rStWP-tuaFCI38& z!`6ewTO2Z|JRTWrqn~-)x$#oVOt?L4cGP_j9PeuIv;A}22~ua8C(}W^$xdF;H|*B0 zhS=c^J#R!sssB0;o}-1o@#Mk-KX%YNv71a^6w!nD(|^+wr+ntuM}63&>nmS9cE@?b z=J#EG;L0QBfsf~(SRcBwIPCc|xyp(nwT}NycJgY@|Ns5X7+`smdDT}HKI=y=GV1|- zir$GIde#$&9)#~d?LT}~7{Z~)j~@EgRH<&3&r9wcI{YZ}J@aSH@{Q*sYF4PrfP)OO8F@g03kCg=LoB{b#S= z-#Y#uo5jKUHyokQ|4ix^@76M{hrXqH?<9u$LeE=KQR=_u<9`J3=kvxe;WNKPZ*?dy zexUI+de>)h%I87)feg0Mhi%`O_^oHIy&{bH)9OdAv;I{0{=Yl_^Ox+4QR=^o>MXsV z*4{_C*u4{^k6@~|K34T#d5j;qO+U$F*v3C>cJBrY&-dQcu;swR4&3ADcZsv-uaZ|W zUu&B$9zeOR)s<&)r1KH&Ec>%%6F;YNE=Zq3&y1)j^`Ci6>+N5v!-?JQ|M^L8fAXaD zn!Y)Ef3s5u=)~Xnkipb1(T5dwyywmpj@~J(y#3M#zH#C^UhQ`OTQiHpz6Vr1f57r< zK8b359@m#r9_qu-6r+wp%hi^KXqF#i7fHTyis)$SCPSEO0Cf1VeB zUrqg+g3K@Uyb~4Hf9hAPH+uWqep>n9NzuC`F5xZtYaaTF9T`4D_%siC;;JxQGXL!} z|Mz0vOb2oFpZg9$c<=)?+yFgD->m**eyq>X^Vi5>{dZBc>-fa;T!*Ut4|U>)48jAV zZ-@9Bru88!w&M%^Y9F6G|5b~Ie*gUBl95Mj?=_C^Uv}T^xGWC8sQ&)3sWRwPRupNL z?SDjXJN;(+rOwdvZgi~d|3u-zztn%WL-m{6{SUlVXniM-(|M)x9O}b}_1}E{%01o; zORn?is5|-{<~7c~f7O&ye* z6tRQYO(w4*deGKuGmC?G>%aTI(TAf}Jh<`1(C%=;!2KT|*8ffNRBrzxDxQCs%txx5 zrT==}%IeRx{-z-F4n6Njh4mj^Cw`^-fYB?$(+;hUr2qU*zbZbB^GW_xuHr!-wjSH) zxMu$;nGWLZ_`fLz`~02T;~yvSmG@sMztz+8K@70|YrIbof7?#;EDrB_V|}p5_=9O5 z(YM1i&hn+(iPq7MFD$nI1;09O&%?tKpN&~=w_|pNzw-S5aNYmsGB2Ay`4z=F%l4lh z|JO90IF&FUjg9b{frh#$!368@&wI=s_-;9TnA3nLEMum52u zJ{gufX~N;}|9L6$ar|TRYiPc~@%fX7dh#lYwYBuB2l*W${OSCVL(ktLNvZ$JSGxZ* z$wNNo2c~+fL;HpwSlX`0Gnaj~Jj8>_6V4p`m7Uk${q8V+_aVEzeR-2QZU57q#Zfx{ z509dFm6u-a^v&wY#W$5h&xg@a+5e>_uGA;^TyMMiC3?$S)&GX@*!HLK7KaQfk4FaE z=)>e|-yJ*uT^-?>9nR~2)XyitZ@<5on8jiD+pnwV-{4miukzCPs`kS=Px=&kK8hrj z{a;0L;LrQU(Dk;PU!tcj5I@kXe`-fId*U~@`mpCu|6bd9FBkUy;rTxsf7EsG+x>6N zEDk<@^H1aXpN93POYO;e7VXUazv_8zB(it2j|)9L(O~?EAHQR=KI!_SKCk}R!BmfI z^(c?|*RnXUn_lNril1A(@`q9Xes`Or`tK6fJaFg+T|4c>c4YgXrq%%t(EWd|dPR|1 zpSLD!UG&X6v5<%PfuW~}4yFDRH%a2HA36`?OZ@7`_4cFMYn{;vafG zjxS36hewa2tRGI|Jg9!yLF}oV)?@aD@+N%n%qhZGh2fl2etOvYuT2f-y#3KB`71fR z_WLjDP|RPQ%i~Y2pY!(c+vl3vS#P1|lkAI8)qna`9}wMaeG~7U=$UWj`5*OzFO4HU zMf0cb3Z6OT3!6^5Vxftfj}Bux&%gHh%Ub(up8wFy;%Mz(_5Vkl>OrkXw5_FAdujV` zh!5`r<{5fEjfPVHl_&N0=t26DXL0bmd-%b$kLcTB8t0wlF+H+kJHD{~3*(PmW5KV6 z&ON@p!D9!#>ov~y|9KV%^UkNl{!c~zrJ~I9*a-<0zX*ghaNu=J!pC-`Td+C zbtV4rg*A^Eu;Y%uzdo$<<3UGXeqe*`$?kuyX+FjL=J*51L1}*RYAwyK`RN<;slw3n z&qz|$f9}Jke(E)JQiGo&SU*h=e=yB&`h52OX4m;xyc7SDJcs&l&%0ZV-fL7dOx^C{ zy(V7WfQS37cpn?Cr~kjx(7cKweaZ39+DVh&u7uG)PE=OJjcfR0Z&ux-U(s{ zQ#~^CP4|PP;{)Y2e`Jt(AcJl6VZ{&U{nf8ue>$u(d7)>oT5K}gk)3~@n#EB%@5{Ww zB#%VRTpz0Ez7g-lQktLggr3i%LuLQd`>$%s2P!{(2YsTaE)YMEK9tsr&IdljKI+4X zpDnS>%}4Zv6W+UZ;9st8{r-jhe>022K4%BY53tj`;#FSaSGxa-el;bAd54~VMMbIq zI=}h`z5T7OT=d=v!Uxi4>?SKOexUI-dh=PF<|*;br9Lcj@z3`x4tq5$^6S68|GV8T zfZxtPH?ug{4*W9u|3_Lk{^HeIq6g_4a@G7o&%Y&KmvDKfsQK!fiEMqso*;J6t38#i zo^e?mW@mnRqIV+u6!A~>6*76k+H1V`&b~7?3ZswOYTzRShEtFAzdMV=t^;p&-WNYW zo#^4!yyDebnqB?u_oph8hxfVFBc~rKzkk)&ksSCFZ>ap(L7(U?zt)c*nC>4N}TQaQ{ojZaYusl3#i8`fGmuDf6)7|8AMDef~C7*7awxtM zNlN=4C*1H^KWeH66F;82fQ1nsa-N_QIbBj0eLzmPFXT{sHByxNuYDrE^na>}oq6-U4;h39MBfg*lNl0c zGWrDD@rBw`Z-2YC*o|S-X50Sifs?m|uj>4>@29_i&$BpeKd$o^ZENYBytebtDx>Ff zg*rp=wHQ$9zs7MM1ApGHhU$kMB%jF@>&xtoScV-Pxj6Zt{ShaL2Z z-tubQ_T=AY;F1&t?Nu4XLdhv|E^gjx||8)Ov^V50H`Tqaw*%za# z|7_>hk4ALQcprJEa!oM(zc!1bUyA74q0K9aH$C|k+wp~d4{!GDH{M?*^#5niF1KC( z06g~nYn;r(j(@oSvrF}$<`ZpeX?*GZpSk#8r>~*-Ml@LenWyXR&;O7R2ez9pcFU{$ z=-Xi$=aafjR((6ZaCzsUcaIsCXF9x-7xw(0ZjnpJKe_5bCvv)^-qP={iKD*_i44Uz z;|uG*6TMfgs~_{C?}o~U9VFgl@>cohSsc_~igRiQ;X?*1^zeln7XR0SPrtHB_=W5G z#yNMlo`1Nz@;4LY|6kx&6tC9Os~)6p%qRWaMh=DC3S8NL<)wb&3=_NgC3>r)s{f5F z4*ML?c#A^@EBf5dKb*Yw(67DnQvYzlzztuTbWQ8|hs`Vw?t2DpKIBsr@AA?+(bW~L zo@9RThGM=LP}P5npDw)K%^u<7eTxhlkJgXA9a=u-PySR^eLKD|WYxom9Q%j)!>}RO z9QpH~kLP`C`~Rs~9CLjBP3w@TS{=Udw^%kcxdc&z%9QdVZeop-UQ#ow^{O0}lo^eswVWnH|-)8#> z@Nxek`T0GS$!vdGhg4Hu(jWW%yZU>unZ*HbC>DrvssB#=_#a#R;Wt!2?4VEd$cA|q zN9qr6N$8(lc>vInlVeb{3B_QRjNZ8CiH9~sQ!>9_Nm$Y7QX^dNmhHe|b1 z48?-k7sL9W*wg(9GVfF7)eh+^*v=n2{)R4=TmGJ`dg7`uxb>#`eSfA;`r(ja`eTwOKRsRmo`N5kC4+fkFa7>HmjLX{FBIR7 zhRXh@_utl(2XrFC2l_;B^=aSm1MTxXv0I$-!P{4T=ipnPJ!{QuLirWd8VpZSIdroZ`M?=C{k%SUjd`@`M`C4AJo2+$~{2Ri9 zA9iF952!qIs)sMU^Ov73__tj)37@ZV$@i~Zc_RF_{~4FXVdp=W*55xS^A@kx(mQ!= ze0tp+xwPKYKNS6AV5$FV=X(|P+kQHaVXe;_{hn5z(OYkE^h?q7Q^g-SMXTTS=IyIK zth)9Q)Bbhl?P1kpM~_*5+Q(jb?elk><^h-2-@ml$Kl6ic6Xi+tY`4ra6mny8RsV%$ z=I0FgbL9n{$ndd#kU{I0)`Pwsrg2{LnI2iO9p8q3+wB`Wt^b2#LdT+Sz5Ch*o5N%G zKTOZ!@Zd^%|3g#tVA|J2t~>Qd-$>+KOUE5|5X!>y&uwkHO0<+LCc%?W3R$A&O6~ZJ+fjI5Be}{`FmH` z;qhh|cH*yZz3Sv4%)_of=E|>_UmK4<_p|vo)b4ttNV6RO59w|9PG#+%)l&?LhN}K^ zA0z(81D_&x5WiHmzUe&7&b&IZI83iHe1?70hwJuwaJeD7jtf&ZIPJJ|_IeRM>;J@< zzV&}Y%j)x=bJc_7R}||k{a;-Dono#?Ttnng$PXn;{dZ#LJd*Vz7d`JE`iUL%PW((J zuOfO7|CA5e?8-a0dh&#oFL~~FZ$4fRt1Z36Q-6MT9K81Y4eg6ySod4mv7YL7SZ+RAn)tPKp#;1Klws`#WNRx#^c2uSQs~^9kY*z<%ki4mE zbvGry*>jPxo1S`|+QB~R!)C`ke9qvVE)82wy02%4pZ?A(pZ)%BdKQNr|Ev)I|Kd1% zKQzQ%wWNOf=AC$!`mg#>$b)LC`d_*Z0Ut;orb(XqWI_4|di+7+x@PZRio>7yK4MsJ zosWON%R2Lh(U&f8?S@az>y^j)KQW8Le*eBm{QmWV^+)R!uhwz7$xdFi{rzJ@{6ZlQ zsxf|zxAj;PKkG*>vaL_`#}4{L46sO;dSw53 z;IZTX?koS zhE?k|)prQL#W|HL^yII?u>QiE^}phfHA4OTuRr*`z1M=*u77EMj!W(LU+6WzR55e= z*E&pAJALyi+xDtF)E5dlP%8Cbc}w4ur~^#=_#O{GNS$D+M@|ucL*_$XWU!4s?6CD^ zyKT1CsIc>Q`d$9F2?xPz_uqD9aacb(;{N}5lhrO>Gy9+OO3*vO(*4RcwKoKrXDB4V z_(in!S|8EdpY6g>{owae;RiChyoU3w=P{+)=9YKCl7Kh<~`~TMp zIZajjGw7#z1dhAt5-$DGm`*I%q4b{ISx8WgQSZuj}Uj5nQx|eKye%kln zZmGwPgP3nk^@<|RjCZ^eM7H}M)Xw{c?cVAsKmLk!xjfU_9`h^?c<2Lspm*YD za)-#3{<+$juj}pakwN&770FkHVdIxR-*&k}^04t?_f387q1OGM-_7E%^9~bLUZTzO zAG8kfo@ISjJ^w`KM}47?8+@wz&-)O6@3gKICy9QppFdWUIQUa!eHoHh=k1ZfHu|vC z#Px6f`3>8LWw*TT+-HAr9PxJjt1F9xe&lcH_4h=4=hf78w0&i*zO@_;e& zdHmrC5>H<&uiBBZgXmK}^!S0)liHDuC#i34^|NUa-BM|We#oAh;2kRS(&FW9q5AzMha?!EWf0x8hOXQ0FbKFc{L90XK(6_@h z&Zl)DE4JecE5G^b$y+|~X;`hk(~wJ6A59-PZsYxKI8vYg(9Ghn?`6!tCVH0CTRQ&9 z#REGSishrB)PLpYJ|gBty@nP2^iJ|w9Q9WD=i*OYr8wpDAbiMR8-3@H0UI20)~k(1gReg49)C|>2I@l`$locbCP8w&ZOyi)%)5C0=({ZPFJl@B|J z-DL7s`PZ^II8G?Vkq?9q8Em5uJKr_x8I4; z->hO7-)iE3ziqD#(KEkL$O-1k{_A?gpZ=upL+l`Rq%wNY>{^%A>#{gX@_(og{>cZ< z?RaL_5U!Yi_dQ3?pf7g)y-V|gOY8ktKz2oXv;&G(Y-wd|{EV{cWDBKI;yHUK(*??WT?4x8t8?76m|+0cm}ORq9$&)-Jh$l@R$eGJ9QqVG!VPGr7EFfXrq5PgcN-SSrS-@Z>! zXDy4v;+)EgX}&p?!{N_8dCzb5)4Ky(-=FAz6_3@S^-DG7rPuoCo5khgOW(;83YlnW z`-exq9=E=KX@28X89vZ!d@7@7J2JbJVb?RvPkBl*byV3a^x>Kp?^*Ztm73wo-Iw~y zTPr@sJnZ~Wp2flTVAsDIs#g@L^?l1^C$H@Pzf@*F2QTvug*@QAs{iTvN#mhk@K4jc zR|&!oGEdO-&Dr~#y+iE8c~>FNp}uqBA&u*TS>+_d7K;?%IB(KRGF^#<+(S!DWQeM*&r+MII{h6$G;;Jwl zwBofV{Nj6gIC!2tkGTBhiR@2o|3f~-yr$1T*She}bdW!lOV9tz#e;b23&k4IVEs?> z*BcUtKk*>?6ifX{cuW4&r+(N=JWlGV(i2yOVa0i#-skk5F~ZW?e(m$Oru>Teo_hYX zsd`ZB6m9Esn7Zg2vTa`)NB=@0C#p;Rcf!o?h?$R9J%~QV)Q)WRG_yE}Gri8+^iKTz z93pX57*;>%pMM*B^A^I|>YHu<8%Y@3b!ZMm%WsJJHnfSJ~_hu~T0t)`|-2 zf9gN6x4*5&T=d2}N#y!5g7Aap*U{U~eDF&#jq^_8O^*yJzefgDAC|d($kq#8#G#|`}YlAZnTA93%kg9Z|B`=9Aq99+lnUbp{T^`H|uT~aUi4I*3rsaw}W zDAtV>rT(j5UGp$6>w8Ty`^0X3iQei^Ui?7gZDw(pUj4ljyUW!-}*%V=|e;O z+y3rG{o!ke%-4{*$cwDlj<2)-N$>yk^rtWG9QfnUuO7d}o$%W4kGf?ZrQ`ox^&t5b z#X8IJlw4M~<8As#KIBlW7eg!iUlV5LW${UV)(g*l!XHfI(6_@h&O52c^vH_s_`))u zKfd&zm)I#RcmG>gjJWG!c(9`s@H^)6X`xzpAPOFZ@BfHyAb zZ`;jxl&3qvhRS(@PULh+UH1GX^`kG?kwdXTq^RmYeWrd7AE^DrufBDpv+v6mho2&P z@$SR(H^`qLyj2+1AH3WHw_N*t*x(Oq9{ky^uX>HQ`ycWw4vw?)^W*354CZI3ajZXy znq~W!pEYJa@uUCL7m7TFmin)K;(dg_@xhz)GxamMqW@;E#WHMvX?o(w2li1PE**E% z&kx%F?r`TCw`}y=zqhVGa{pnM)&cVSuZHRsMQZh%zIrE?y6jeLKCDmd%r6u^22}N* zIQ)?fH6Jq1y}=)}`Z}aO^zAT>qkhwqU$GruSiJM}J#XDH3&W4!_u^kXF&-X{ckH-x zdKQNt{bU@>&rs(N@<|JMGe9vv$C-zD+P%i@##-1pD-Nc_PxF4ZHa z=$+)Z_$pcTX?!Y&{FNaaetXkRLjGLGGV870dj34yf3Eoy^9SPpZ+2PVu_yDFs9E|i zw^dc{f8YznhB36%e0% zf4pdouxhsS_(_+R|9_`j^MVbP6BKQE$^6(4cqcwKua4ewDC7piQvcPD?{U^A;z8xd z4*EooY*@?UFn{=(iQb9qQ^Y^jw~@nyjk_k@a^JKtadgic|Nfx-`^P+sgY7fFH=ci( zZvAVh-6?7vBF(b>kC?q5+b#1BMQ3y@_1{T+T|dY1w>p|)XMJM_y%T#XqX*5Ni$C>Y zhu>tUepUT%qYw2Zp6@q$=!~$*2`B&X^G{~LYrj8gDnET5k=*~2^jo}JOYh`0eM4q_ zGC$@Wij87GRsZ>4LHvy;mpsNlMdbQfg7|~hN3GxV`Rx78uKpJ9#J?oZp*{?IW`)0P zdB^{T#g1HehXG%2J%5SoUrD|V;@^*N^R8`o4av`veweIvaQx3aKzNWtv2hG7^`APX z3p4&4?;0k4^_zP8TO59hrtg})zuA*G@5H~){on1UyL__fNwqNc)GPK}>-H1iv-1xP zsnBDMB6COdg$*B?7XwtlId`GrDmMJn|l9=SYh-=~a+dJ;Z(eWJI1X+8LX zY)9~$p8Ay!UgMifeb{BE`!C$HI3nzO#Xkm|-k1uH{r;MHf&BkCbtq;PgZk|G8^q-j zkj^g@n?^&a{~A}pnl>E)ek%9 z6FstFp2cDQ%rn(@h@W=}_E8_UTyf-KAB~>znfreBKaLF%GT(-Y@3AJ|8I zIP+J<&bz;NT{!!%-@I(AMRV%6^FPxxkD}F)-()b1{4|V1$Hw57gg)|+W%`j$5B4 z_D@}ggX8_5i}OAKB};i}e0twnnjbs*P{;}UQvY3VZv1)w8ESv5UmAxDTHeGTdljZ} zR!7Bpsp3Ju;d|ddspA{p9Jk@;6PI56-^V@%pZ)%*mc>Dz?e|Ab)q~^*o!S+-Z_1;G z7k`yQ(TIUn{pUI;^;4H2{N0fztKTJvKbZE>^b=?AZ}twcTfD1~=TP4{c&!y5xcQU` zorC}S#9{CK>nM2b_*3h#-(fGO&mYb;ucAn^IsVal?f8fKYZ{-(q1YxGs`}4!Jggt^ zfKGVvtN&T^sNUkptBAf{7k}gw>5o@FL-^)WA9n4(+42YcW#;eicKODsZ!S3nKI=d8 zQp~U1_=`Uv*~GRt)n^sM_~d7et?MsN0@!ab&roa|4W<4wzox|DpVndhQNQ|Dz5Oi? zK1KB6-G}Rs@Ht`dittup_{SQTKDFIE6FNIaFMIBWBkR;}_g^)Y4{WplCtjjj$3yn} zJL=4P^HN_Zwu^>R|DE{rJ7WB+`i~v-8gDXnB>gvgF8P=b@g-S#`>O9;XveFLx^Ll$ zoeLecYjeosXToFWAEs*^;HvunBMsFniZrv|HD0M*=SSb*XS+}3P;4IqOZ|sOPEXr$ zEcpzb0gZ&-5$~ zyAIh<+4V+|T1)TbHOPL|>V!w<$9zMvLkukSU-@{BsP!q2?xo+!VF$g&r!soB2eVTj zsPR@mGRQoT!8ZC(zxpqq|N6qAVcSs;j=QZs~}LwN8ro<8=O{D#WYIqc_KtbF=S^L7r~GsO|AQU@^Uzhs3-k9D`XnZ;!Wcmeq%{BtuOoxvU0!-8I{Jpc^-b-}FBH4PfU5qt{r*cIx;5{Eg7AY@ zhx(g--0c0$4nOF`-}sQh)GyI@4*ug>XMBEK|IS6n9Qo?|FSh=FYH#Ij_QuNp|4n!$ zs`YtXm&f|p=d(}znO`X6#?z|)b00hYwjawSBfQwbRF7ymi$Z-~tMzU1%J4z}sD z#V7t@^L1|7WQRX46EEgZ#1FLhRYUkq&w6p17o1Byd|}i_H?H@}EiSCJ-rgGxIXF;X)$dRJ!utGG@+nF+ zttC7lze7Q;nqMe(izJo(Pp`k{lGFH{$jlG4ylOXD>&FlD_$iv6c_((`n?pT(VTsEQ z|Ll*qTo8sY_M>9y-QMvUZ`WTMSsb<>oYVjRqE1EbTd1nj>M3@Yd|jH?DZ-bFjK6mx z8zz2y@4z3lI@BM1J51wBd6RnD@r6-Szd7i$#wKCZa~th?%KT&Cv-AJsvN$|gasA!9 z=vG>wFFpSu7gw8)%AsgRL*@3rh{Q23+b$%`K3}kd-X(D=qo)q@&&3~p)3bhwM+WCq zUlabY)+#^U<>IZ*4x^v^WZcnbO!gXY+kcn%+53B7^8ZtsSG>wg=2h3ec_%)`*ASWc zg<_AWsO-Ny9<;Up&l5f;GJK#<^wb672ioUJvo|09@EK|!Om!;uYUb&M-XqvKTYMS3Fh7L`45w4a^Kc)R^(mLk zwmy{~JLomuWa_B$Pk2jl$_Fp=G#NX2kU`al(N_+;^7P4*!#bai`FQ3ZVH0onA9iJN zSU(2n_4kJA6-AoO_g^O#_QO_ZE_(XL{6ev3G*~{Teu{PZd2nm{uc;mE5E(wuYrM(o zM?L64_B*K^89qb!G|!ysJBM%b;iDgaZ+z$QtG>3!Z8yIUzn%XXm&MU;|1VzUrSWOs zkm#@|Qruu0gu!G)-J(ba???bCA;WNGRnXG=sm)aAVI>HXw zcb0#7sh7i;voBs{k1Jn>&#r&vSse7s&VMF)@hUI96CHg+CQpi?kT2eq+kbK&5p@}A zKIR9K*JS#Y^w;b)sk<^x^I*4m%%wi8fA7eP@4NJ|P`~BsLBm(S9X`AM(v`)*_Td-P z&)=jz@hUI96CL+oS)EBd^A5#6F`(4{4&_-w5P$m|s^Bv}m&7R__238E{-;?Hr+glS z4;gHu?;JYc0lzr%;vaMleg2={z3i($q@L3LC(q*WeLeoszQJELzfkNedAc;8Q`C8I zAG`G_S3BrL#t!;KZ*?dyejxt5uT0;jpBAV2EDtiMJYnmfz20;7O2>q4ho8CGO;?X5 zp8qYgc=gX$((Ru*6tjv!JbgnZPddL)>=&6z+rN_pZO^}HJ@gACpUFx6=t0{)@YDRI z`u{_HIQ*@B@7Z+hCgIR4mU?H~5A3>@9sf_y;;`$#dHnyi6X+W#Uaj+;COdgW->m+I z=u@l{j}}0tG7O# zx7T>|%o9WpqOXioKJ2MKdibg^oHOz6AC7(Wm`n#ew*A+{VEtW4pZ`N&6p07n@lG`K z&Gf{po%%v?U<|PStKBO`UxR`oo7F8BFUy-wx9_?=*ihAN1|`?ElyH+iS=s zZywo%-}>KFeo%J*$FrRqI`K=F#+TmzmWwBLctUYdG|)Hi)K4+JkJ`42T5rE}-d^L; zTVCxOdXRc-J;PtoXXDd6X+C&U41b-x^A`8K`)PRp&HD$xk+q)xDW@S@|MRE3cKw$; zT2EFnsLOtTO8v~IDS^}#ii4w~)c+)p^&P$h;bZ@V3{n?}z8&Ik=%oIVton9*!M(TQ zS(6u>I1_i+X7ZZfhsTcpyOkeos9a01L*#U6e0tstnZBfQD83gds`}4y0Q0i-Ks`<> zz;CX9e>zU$;ZM=()%kiS{-#F;Q@=#tIpETH=KtEmw{#BJ`n%s7_RdrA*#2jF76QSTRd_Fw&M#+ zFZ1Epd#8^MOK-m7J^8<$_R7Qg|GNAfC!m#`$OFi|?AJoF~e|&7vr;D}jKi^05G<#z%k6)T*{n2@*+o43Y-Vb*Fxl_KH zBK=Yf#o^Il{P-(!A2su_&!c7xv-OJ|^crt+k{3N_yba+sJ#oqhZ{_-E;~!Q&aMyDO zd}YzF$_9Ua4_|udirN;_!i~4BwEw`+;gy%q z-*i8dKdaB*%dhSw>c7UZUuQnV876-9S2UmMEsng& z`bz&_27i$Gc;z#MZ!Y!W_%~m@?(w@rIC=c38;|{82S59Nj(b3>qn5?NyhSQ6y%SyO z`mgHK?JE>V#emBG>-T7@BNu+Q1Mft}4yJnM3E~GbFX|&6f0JDnM_=`J|KVmAj+?yh z?F+zb*WafqKmF$ZtETF8y^B;{dMCO$zyI*)7*N%J`eJ>`g+J{-e)T2Ajz4Jq()!W2 z!!*u2&96KrtG*py7=Ft9AAj0i4@>@NqZhV0>s#iF4{?HS|+xOo_7KgnL`^N`hZEN$bi9KDW_0-o?9_r9I^HY5&j+J~} z>hF5TwY~pD{plB&=1FDzK%e9_J@sP;`=}55Eb{tgkKX@q=z3`CpLUvhHhHc8r|yQxp*SwSsM`M7&saZl=yW_%g7rlA;m>oZZ&#ph#vN+h@?f6Ie@ux2FZY{Cf|7B8V zEsqRxtA};cbd+^%N&Wh4tTwo&4zWXZ_iEdi=X2 zj{6Ak2aQkHtLZf!ImI;2laK8KxeX7zVd0*=m%jDMLqq>;kM0$=+y{R9{GFb~Vc)-a z{)5hse(?SvUZPs-gXgAVx4uz#Ln13(r*g>y6sf?cWSF&qe zrdNK8Q(o$uTYcE{yd6iswa%_#%!!j9dSh@-ynX+gp2cDPUm;$9KhFB6^@&$`NnN)8 zcjA-w%d0*VCq)NaugQGtX+QGlZoj{#PbqpQe(0$SL=U2`j8i`BsXu!7sxZ_(`TOPr zPkJoVL7d(Hsq;l|$DcI`Krd2ji5{eHR%g;z>I=omk;MA%qG;!JO20qFo*;anS9>a3 zJzZHGW@p}7r|FZr;WZx3GpG9S{dXo`bK_HuaOTw;Zrzw<|0{04fA5ZEW&G~Yw)KT^4L{GZGZ-cXE-p_Tn#N|>!5xyb4J2|MVW_@%P-tt+uppQ6Qeh@W=}_E8_k z{OpbS-ahBaF!rhbuS`B`FY2-9Z**y%Tybzb|NM}5Dr;Vm%1h(ZzIi7;^c!S;p*STv zl=`o6+((4J^*vW!^}`POL{D8HexUKDeNAQMg@-(bZT!P#r@nl_M!(3yW`F@>+?31OV?kBr#_WK@%c9Gxj(=*xZ^vJnFP*nf^j3%RR{2vGc8gO!c>AjFTx7s%@80||cNBsJi8$1gi$Iaw792l=Zu7@9(RSfE~=l|31TmtAD^9#jk(NOBYQ~z8Lf7?%} z-aE0Q|3AO}PW>SHQd#xXS%qPV%lE!()0-X(OP(`);aQ*mv!~8uT5p7hz8R8VQKDL( z$8-Mt^B=^}Qva1V-JjTY(bPHerbh--zeFEayKC2;Q=j-OtbW{>@wxjxeX;$2 zGmC@ohxz;Q2E;+O-8Izi6g8hnvuyuM_O@F*_0_~4iqqqZQvaRU>&gAh)N81IrM@M4 z>zC#)`R9@kUhEd9d>&*T$Y2|N=ZFtxd~oxzBRfYfxyilOgpcd`y4d1-|p9f67XTpXbo_5c%Ki;PG{24xf z$H{!y@A}2#|Bt_5^QR9T;+HO|%l7~3XYt5RB13Uzd{OGZ`jwu4o=cpq2PZOq^=WEX zJ$;}q(DYNqA2~(*z493*^O#e8Sn;Em_t}w~)ek$E z>MgJK4L{I$6T8J}9(dyJK(xM!NdJOwtefo?f1W07yel? z&{JQXc;adjfS=V!OXPbkk zge3=FJ!7M5o@ZXx|6KVM?e{N<-n(qJgSt3xgKYb4)<-Vkp*SzTDD~fo-QuSuGCYdR zyXt;6<4OF9tHLzSr*$DKR`H+@YkoXvi4QO92x~v^?*5M)ycXMwegA4?aqvFKU!DE` z?|988-mNA2`cTnX{k7T4p_mvARsH8aF6yUV!-{@VhsCKM^`HlB`)Ej=rYFvc9$t7% z#@@z1oVe^Yqn0{mt#Hy-zrOO_t&V`lol*yrdrb27lH3LUDd1vHmA{ zIIgsQG$d!$alZLE@iRH$HT}fie&|7~U->);AF?8GRT!2zV9S%<>wG8E;Wf@ae{(U= zul!nl{w?zc$zKzHmegz4znt=^AMY#b3&jP|!TOIssQK`>?H<0wuO1@Z9H0Mzw+fk$ zA^AEak4ILl;z1vdoA|l=@WE-}^o>s1=A#*x!fW?m!K;`Lj?dq|ob_Rt_(?Uhoc|dm z8u%@)CVH!QZ~w~vt6%-n-u~pVI+WM+joJH~ojO1#{>Fz4rhbV&Y`^Df zE6zW1ld%0suU+@S(LG-6_W7&zu$}sj_ydfm(s^e(s3Vna`=_3oc;N4y$f39>zNqRy zpX1gKctGXB4*EoooaQ(GJc|Rn>8anT9qgk%?EJ@}2mWb`6~Z1n9{JKyx83iR&+dO~ zW^u58wdYUfst3uhDAv~Z0s7{hSZwiu~>ve~`X{ z=-VOwhU8-&$cpXw!jM5bfBm&zZWD$+JYnmeQH#QB*WbIdI7-(ao2m!3f1+(IjgRxs zPHe6BlghbZC@zjKD*Hc8{FoQ~B~-uq)xG^K&WYZGxyoIF_@(Hb`gJHGS7DW(>cei6 zU;4v{p?ihq3zv@i=<|!b^4RYm^DGXwul#)d{YyjjiXzS2-fO&4yXLXqU%1|S>znFB zaY=Nj>VIAN>5Hw$uHL-ZL9hI&Z28AYyxAKfV>i9d$2+xyebk5A6UE7QUUphob<9!A zy*XtNymtKGt$DyrXK^d|;OPuW_a0&zkap%7??dGr2q>Jv@k+wOnzERH$u zzfQ#JCq9{fC@vG;F6F0wuv_%>h5R7;c4&1dFaDfoM@}(~^De0qSuu@I<*?kzJ8%5p zOY)=JnN(imxc=fN%~Z}DhD+-Ie>J1po&zINE8aV`E86uJ`iQ+De$*9;%cG)l``71~ z;a}Ac?4Z|tsf<2df9yx?xI^R3Kb6yYshxaP7{0&xcTV5`-C8(hjXz)X+SQ*jAG`ms znZ;psETs3pY2R%Bo8*_Mnd?LK_WWU|eE2s-roK@8Fe)nhua5XzpK8kMdh?lIqNgqp zKalkSzv-!4`8?Q1edi+o_|djQez|n#pg%nEo8PXtEcMv+mvLDfyf6I*D_a z;b(r#3k<~-lCMkiy53mdOl17&kD>Zw2YsTqIr*az z<5$1>+}?kpdEigc_^HPwd6B`?FVTllhrIU7#wCx1HTOL6ie1)U$E&^a{KHc5{LenN zz16Z_gLst4rTgDBj{O(&48@OQXl4Jqg&F_K{;Oa8zTW;8*CBfPY5H-q_cwbI=biY& zWAYs8L%#J#2P}5VlOccfrC&U<{Q|_>{kIL}SIlpSzdxE_p9f8~QqoA7(5XCKuzrIe{-E_s>oNVf+54NF`aviD#)k~1eu+M8^wDQK z?RV7lu+c%^Sn{-<$GqBY`yZFZVf+98`tKhnNxm-4>w0577uotzQ#+{h#SVIx#Hk;B z!w=+rnc9)zGlWm`%&ER}=r2c(+xyP-I)`oh$-CbevIVmspapWnrT z@T8x6S;b&}^ewFqJNi&u9hv9{da$njwmNFj-Of9~haDu|WcsLx9<=!-cKG2lgwJJh zw9}I(+_3n+9(?+hO~Nl+*Ei0&`)>Gc|KFv2;7C3GcdAzuskQW~2k9I9>HP2u#pLXZ zQQ7~I(QMbV*j@~ielbt)k~sQR%{Yx9BCid=_>s{@JzD zj#KE5?fI8x58HA0Tn~Ti0_V06COk52`f(dd#!uy@4`w?yTvwM z_)YA#|0kbfF6|$MADAV>=I6vvy8k&BJ$++-p|~y@O8uvfro`b-ykX+Ua~JUktv>Zf zFYI6P{6qLbClSVntO#!vhGniFvh{+eZ55Wec*0ZH{-x5{#r5Ks_TP!Up7hH;kD6kK55M{^MW6P; zyWalBGfDKRKQjE@iJxIb|J(ToxA51V{p$UPg(Z&o)7#g6x)gks*B=|2H}8!i&GP*> zsmG50(ZiF-p^y_crTveS44g-`e&nLJ^{IZ?L7(WYFPa}e5Pv?0O;5ebXMS_3hcB$Q z!*_p{t^ZC~bA`?SclTq~pf210OwHo(ip1duidSoi-aFCJw^X0bFBCV%fKvZ;e*BNE z)sYMT|9Su6Z3@5To6A0z=IewHeb{)FQwCmlcX!xi-LFhIZ~RcNcDw&7SAWI)iP=B@ z89woDEz#3AWb0Q`e3@S;ZiO#escp5xy!6H}3eu zhkpG)N4WjR%gwj%>C@o1>+j7h4m;o6Z`WnM_y%nK%hj&)7pb-MPF~m3=U=HFUg`_Q z%`u>=|9+zI;a{o)UiGVY_4c~s1-D@~#!OY2E)kR(U5SI3j)l8^wOZko-0AXX%~1sxK~R_2()#1*tO> zw?swN_Ro65-#dvfVMFxw4FvHAtq!dleLGC!EHCqSk`H}5z7SSDYpD|sJUfKVhVOLB zmD>|%*WV{8A6Se3|2w&~-zC4IM754bt^ZE>8j8fJ9Ew|`q11nks}GVm=2fag{jh^R z(WiaH55(X6G`=MNhx)MN;~Sm#lkmH+;{ungxZ5{d$3J^yaabR^R9;H|e~`S2Vl6MJ zm*)T=dyPxwP}~+tEH8eF+{akzQ{smm^lDFK^rhgT?aF6PdF*WV!<_%5E)X8u?sWT=YRgOdOW&+c=BEg6C~l7=mHiKr1AnU{7rl3a z*uhj!T_AoSeW;AnJlNqe8N2qUN*~-2lX_O3u~ZmdEd2I;-7n*3$N$YN4z>foydM9+ zt0-QrWm*rv12Vn#-8+e4-l4c7Dk}S*_x9)W!0L0o?eO|Uk8JgH^^P-MouBcjZ1Ku# zJjh@h|FGiScN{$U)u+SC+YMNE-hNNOW9L7+vpDQJKtttPZxpGuga@?yf1JkUiqxmP zp|~>|D*LZ_tshAp-U%;uFx68Rh#zS0E3MD;D#K^kM}0VBjZu?NdS*sAZ}R!$Cf+`X zdTje|$b4*jSV*sb)l{!2(rnKEYh9)159AuJcIF$3yP~1gfBd>6&ic_%o~EKxRKNNd zk?{waCx~9W=i>Y`b%Eqp8Qv-kOMZI%*>_JLAC{~CV{dtZ&j=X5L=?s4o#<3nkac^{}>VJz_d6;*C%#ZxYAbJpeJH+46C3PSxw&M$n{`uKu zFB@}G81nQBTikL+>-M9;GQS6%-~;zA)e%vSZkuKvu+ zC-ouYSJx+7;}2T@Qoe~&IesZxT$(qT2X(ab4{PoH_d5@{aE-9;cXyll{KMynWBa%M zHl!~5|237di*;tdYrGQlPLSsYBU@d{!#qQAe+($~ze9P_`yZU>eS+|T*i#ujb(>xD zF+bBMc`P3PDtm=K9DU?|f1CfZyTh>q>jN%bA1d4L zuc?E2o8rm*Lh(RUSU&s}Igf$APwdFp!BkIOAby~It~awdh{xY#7tJ=$R1aSjhUUOQ zcWwEn3E{gZ?K$?v(FYN4=bxu3zal&w_ZT|$)4qx{%l>DM&%ZqwL#z6a-s;FTFUWfB z5YwV7Q%vK$lRTzJR!rklIjp?Xf#3L_Tkj96{$u=eowr`#HO{_&P0Hf% z;7alO`$XG5aX`A1U4*~ge>g6S!|uNtrpKQR)hmkB z+TKmpJp6BhcVZzA?;qwDimB0|vj6K!JpSx24K*Kj(7Pl~{ip*!(D-TjRi z`p!X@9<<_h$9=PN&|m;=E)YME_ob}|<;5R9L;8WAB79XCw)p1#4_-9#&9K)_OXbhPrRY#!;kYG z_=8r5_6>bIOyfNH`r7{kC)^O$+vxUi(1-ip^pp9A!f)q);8o1G*6Z)&1=XGhf% zh#26ve$_n*WR-c<0^d*D(>6>@rZ1MD0^`Uq;I#~blS3my8jrqWz zB6iR#kICd!L=RehjaZI-ep?>u0^vgj+vvkZiyZQggFkpITsr@-Q8)hlQ?GIM|I2w6 z2lZ6l|07=IrFWt${r`yS+0QcXQ2aUuRP{f7o*~ z6C__M&!N7vtY*3f&|EEhm9$Zy_|JzW#qDWt||6f)8sgw9b z4uu>rR`s9jnAVS6c|j*Kc93~_wVSN@@dMf3K+{t%JYXO7VdL#S-|3$lz8E(8%ZPP0 z9&sb}+5W$o#ldl+=lp-JdQkI=wzc$5UfcFhow?}K`Gw-qXejmHi9f$1wtm!<2UI`o zpilJ3Ap2SKr%q7gZM~&(g&%xX7#i=5o&U54^Dt)6PyXkG)#tH#xc&~WV!nEO{>HE9 zo1xZ|CByiJivii@0lyP_DCC4$ssH%Z#EiH#`B#rzDy(|OP4C?Io7VjgyJT_LerLMMYwPu|TxF2_iehc;k4pc4r2Vn^C{HLJ zkD;ahJ7MH|IPL>ovXdAg7qdSw7}Zez1)`EdJX~FTMHhZ-*t$ z-ENaDkGc^)JO4H=i^INuEvnDoXsRA`Dl3XK%kk8R-gZ8B`P^ilp?D%XR`tJh{*Cnr zYJT`YpXk#*;s@G#ZNzfyxX|L158l4&I|sbm{m_zIeXn!iY1iL*-n<*p5BvOWYJPCp zc>aGe%ctvCyvj@OL`UB&E*CxPfq94GH_=e)zve03{|pbPe(-_bWpS9Se$;~=wEj0M z;+*I`2p_T{d{r2>KKQWX&+mFMY&GwPz5f4hY!(OCeXX7W`u#UoyXFM5Rz0hRqzWb_HP;|nWX{Q8gQ z_g96LPTb%t7r)p8ubqExW^s6MZlC|=B);wQKecb6ct-MdNt}0zYESQHw(Y*w+drK* zJm{Gxh#s{4n&vgV%J3QXQQx`59S<(N$a(v94!`#I-yhtuJ3#Gb{^C#Dbky~zi)yYE+fRFt@_dSDF zUy?sd20YY7-;mS!A&26*$W+;XJ&$d5u$~jd4pL7lqbI-F^H_%c&ztF;>Qf~DT5rik|B^h1`mp@fhcEEegMJlOnl$f8i+r~~JofwR>BeJ7k_8&X%B zI*Cs)obmgKOPsjOCNjHO&KGe1r*21VcWkfLzg!}%9<9UpRIU$8{L(m;Lm>~$w*EVf zQ{=hY%){zw&fc#shf}4`g|uGdn<6~ODO#M?<3XLL$#bX=D?NJT?)}bsI@G?l*x_4$ z|6};={}(1^aqzoaKlHfQW}bmN-adDmVrNNwij}{Aq3)V6gyQ-5qO$+#^OrQv-UrH8 z>YEdO^l5$QL9hOx>8Urd8{b^&!=*>PIc3>p28A1s9yxyJ&05cY{!SK$^`T2;e*c)v zTdb{RT8}+{siJ=77m62p=XJsfANLXA@3Di*j~&D>mEmQ3GJBrIf!*{K_2WmLImNK; zgHGcHq?yJDBRNUsdZ1-n8GTtoe)&xuOp00mE_oj@Yg--wI*e9~S)djT<}Q z;rP?`uZ=7Y-=X`TTxCU(TAv>#YrUo4U)MCA`KTOSw(fy54s4OZ3bW z#1Aw+t96|)c-~2?DwZ>UG+SRgL#1D zW4@vIWAv-+zw6CIoMF;0%ag{@uPXnV!yZt_FdYpfw-lpnOcTMat>zVkV z)PK$6K7Q7ZT=H!HqdelEPyEyc!UIh&&*EZ!>Qy}t&Z9r9GIjHXR{P56Fn-kepLyg5 zPoqcw?R_B6;$r{5)>PXGrfcQ}a{i5ZfW)!GtY@Q?^`Chu)@w2nd|o#o{uI4S@)F$= zzbX8*K5X<1(Q}%onChl>W9VLkZ+hb7Z%I~r|6%7J`cx03uA2J69uzu}dg&WB`_pDi5+Imj1NlvcM>n1 zf5^pe`-_BEm(zLuKmRa8^wPd#TmN+&4>AvIu!}#e^4OE-JhIF5u*%h+J-c_E70|Qu z54~AjtRFq```^NB;>kFYHOK2|ldd5XM_;9KAGP9Aa4Yrfgk zp@<($U!UeDPxa89*XJc{bm5-wJ+<_Vu<`S6U-RnwH=$E+WpUwkY`9sx{&Vki-okEg z@2Op1QT3?5me@_P!>s4ygHr#=n<6~xM=rM24c&Xh_ ze;Dz?4Fk?P=KV0@*M}TGY2Zw+dG`A+bQJCPzpX4Td@c}ccK@}0vzL1}zQ=*@o#cb~Q%vLZ9W3#aafzc-!h}zpyebT1e|6aXhfn`nrVI6O{A1fy zE`NC5G zAHDG8Z~t*|cz^!z(&UAod-mP-|G(^!#l`kJeQGxqYn>=od+nV(X1??dJDnePnDtUj zEcG9~Nuo>r#&4`Q{R5?H!K*e^Ms{i%0Uo{;E zbYjD+%jLZP^T(e$jt7|sHrU1A7`k2UN8y&gG#0x4p<90ZgDdEd?f++Hak2i7kN-dZ zNWc59byQyePkq|IFze4UUayX;`CgIlk=ZWOI^YopQ$OOSf{;=G*&mFMi``3lhhkW9+@9%sL zb=doFpN<2rsrUbEs$Ws8@|yY8S69AMtSR<%+F{mTqNB3^V})=1ppKU6xX!ro62DhF zMdPEZ^_ib~)BJh#hjn+|{Gq`=-aD-Kt0NCu^>34?$F6^ynZ?ESKNZ(M&}UGx=J@=z zjOsYWnj&>K#SXJxjSnjOKTLSmk6diFQ}4tk4yJzU1mS`9y3)GLPrW)0y6BiqybC{U zu*2KGKJ^QWkwo}x4#44}76CZuEI?>U5<{f6e9uuniFKV-2SH1Gc`qWa}38KgEcCbOK zBN?A~6{dMU8Luj@${z;oc=!2NuJD^MaOhu8`t$Sy(X;)3P3FV)m0zjnADW$ht4r&V zX#0GGz5i<5`lj>84zvCmQ?0-7L9K`RkjLk6t3!F+&U?d0uN%TQq%P)xt=LU34F2i4 z2YhXW3qv+L`IM<6zw9;7zW>sBSpRp`^PkNuE;fJViMPG>P9F0=IM{|g5@-J%X1x&| zRsH9CH0DKJhN?##WIVGOw~AlWx{`RQ@2-=_IEIV^PmwwrJ@ZZLnfSv`H+rVLvF>pP z9fH1H|KFFz#r8j&>+w&nenqj`YwzT->FE`A*lEA8!>qr>#H#+6e*fOoaax_@!K+^> ze&!2$m+*77dj;X8X!WOkO>L)oUHt#|{(q-07GBx^T4#TF-54@&<#FD%TOZSSmHl!1 zU!{XTjQD8BcMmyWzA)02H*c5AQobKb=EtF0_9Ha<33^*pg3z!-=^w=3#+aA@Tp6p?KN@kTLO>$4euXTNGJPZ8@>~qt`JAf$lgIlG`k?9k z|L*^i8@{vM-dijk+{8;Z-*vBLtnQY~6C2Fs@`uZ7fy43zp0 zza@FrN4ASz@q4G(60C0_2p_EKzpy{X{hys=m>#ww<5gkUX7?-p{`DuPhaDd~bcbOl zw$DHBqPndF2gc*iJ?WdFjz=AmHOKL1)%EA9Pkmw5+cDMpuiGcOJdf~g{6vR1=v|a; zzitv;c%Z$m^!YVEd8(J{!s{=Fafd$s;kgINq-U3C^VI9#s&uTre0I`Y7IeVM-+?Cc z3$tcLHS50a40NL`GJt=LU39CFjB4F{fgayah1 zw^qD*Li_sjJ#{{bUVf}zf2H%Y{g93?S?x7-a^4c#quUZLb%t3*bX4^}eIC>8z`IU8 zbW=ZeiYdQTZ$+O=JeS3#n?LM))Qd0P_VuU3F6({f;SYwizklCJ^OHX3KZ(cxHS0U^ zZvE%}Z`75melnjh>zx=V^^|yUlTU$79)laV%E%j0P_`6|goOE1l#cq0GmC+0St#Q!>VfA9g-_Kg&AL!fnkF_i=Z11_; zerXD}q0U3t?X`FE*v`MHZLiCg#HlmPdM`RE`#-aj&+}&JB+mKr`i-4@#(&=I=+6tc@4vzCkEUjE@nHP-$592_AtYy{%!sL(_H-^{ZW+Y9LH1X@u$;#^3)EqK8OM9f5PK;tn`bz43$qD z^ohT!|Hf}-ae-(4q+WF4neAe_9Zy#I8>62(wR!3~6B}bLEG~Qg%=yu=@4xy~2c$0a z6@^t^o8BrC!!B+AsvBngBND6nPoJ$HPIXhPiJ$v^zz4049^v8dhH0K>T-7mK{oVBJ z{|7f5@V)yldtoa1*8e<<3;S2Q|AWqteo&XN+iUOSF~^PY&@)?kVb+H+p|bx|MUQz| zKJy2aS3jncUzKP6eC~XUqYpaaTOHV7%1ivA@yAP6di%g9!uoqZyw|2f$J0-{{(O2C zm(uSaTj~cHrzU>Z-pQl>AOHL_CRFu5J)c9phB_Yo0)67QeretCKzn^QV>#u|&8mm4 z>3vLpW5`<9ytc#gn>L27SzlrLAseG-=l`ce9LyS6JmWar%aBI?4RPtdC^8 zUeWR{!Kq?fpK^_ZPVK3J_2UHLgI0&m!~FT&`4&&+?Va#V4;zG!4XQs3duge~T0c5B zEdJ`Yo@AND#rR~3|elQtFqI0~T4_7|CG%uHUkdMFZ?{}T%DRN&i>qkxF zV8W|E)0uC1%JU%g%*^5fFGc#}o$w5M5}SAze`D|h-#_iHci(Ode&@S~PnmEyuVdT) z&s1HII?-1YR(b6;Uav_E+qQr8GtW?bA`&b2KZ}SCeA9E{$0iP@e(R(52Oenk>3q$v zeDn;P59=YCgWkJ$b8be=G7m*yk7IW^G$GJdw(x;EN*wf>Z+1oh zEuPDG)MtM3RL_I+=npfF?%8$n@!N(czQ6S797GRWG1X1& zu<>m#9rDqypBAF6kJ|2n_A z>*SkGE`IdU1HF^D+4M^hKiHmcMV{(;5It-~^r|rI=EnZ?H?wcQy8D>s(LcL=I(l~f zPhS=nJO8QLR(kDX$g=NXUsuoct%)DBCm zeDu#pKe<>K{*P}Tf6&ISpl{cIwz9aeAM}HD|I<`In2aycIkx|$m7nk!kLNcO^UHX> z%6AFsc+A76act%NkDvdievt7}Tm95og<JV zPaI5m^+{P=;Da`<^39*moo{jUK_`6E!v<4c;tykn9lrU6OP(0UY#!G6!Sxf-vHs7@ z;==c-{(t-a8^sr7yj~sGC8+bM4^n?qvDL|=UwrR?4Khya3*iB9X#I=ktGCI9;C zFZ^iY{$c66-`?l<<5xh}jz9ZU4`jZ%`avgly7ublzJ=J;+y57%MAi1s@3@&)Y5%X| z5eJ!t(U%`pUw|E6kn3@dUafm_!P5tBe%%sld`zj`|pbV>%U~pas1D9)705i-NX)sI7-{U zQ~ne|>l684!mA(F$*;;Ye{Jr3i!%<$c&Xh_f7syl3%030{dU;s3k%+G#&Tcf`L*A_ z=TZ;*i>mK`h1KqNYIptpyN(lz0Wr0z|NmqEe@DL*=`WaW|K7#2x?ayO72}%**^bPs_swT^1MO{UP5qqY-dZT;u?U*o)Aj>J;`o#sywgzxdEsJ!|ioqWsd5kK{qKc73_;z^!&!bivK ze)_|>hrYDeqCejxOc?aad6Uk1nSA^G@1!g)_WrwAeE&Tm?SDGHWVPQ9?D})3dKL2s z#Sj^k-Fp|5`NqGmYkgtxBx z=Dt^vZ`VK6vbeBa=Wprt=jba6tG)K>2RRRE_2*e!Qacnw#ow!Or>OJbbC>nCsqt3A zlb2EZx*aER%TqnZH-BFz4?hUstDYfxAJZQexp=cTfB)VVVbPy|@ZfTfJ%b+Sf7lO! zR!5%2g?hv)uZhz)Z1&sefuWEe3iNybL;rg_>*4cC{b2E<4_Y1CH}lu#&bPSoE#D>M zVuSFpLG_1)-t(ufx!`(Xk)@{}^~96o(Y5>k_hoUh@4vd-f5s>Jrnj2vP)~F2b|~b7 zLRJ4u`=4BOKqof+0)67AE)X7Q``22h9(?o+nU{{!Uw>F(>w_M9;%5tll{S9;&QJe# zEc&+pPxSIbnYW*YaBjkJk`bD z4bwdDvbdNZTd|v77;)bFpI>{U7sJTcmU?W^qpy0+tK9!r-T#69Xnv-PSG#omVND2Z zXVe*rMPs1Ue{@!rJosMyh86uJUdp5HNm*Pho{LRi&F|EoB07nmyebS^op$w&pP#gM z*!GIYU;5Q%Pk7Deds92^nUuxFkJRtK=%1oQ%WLYdkJo&cAa&>B55;2fL8<@BtB(>M z^WnI_&~?U*m-wj*ga>-%r*UkHC%ljAZ;bd%&n3OjPH8Oj*IVy8>E`{>xAPA(vbb3Td(AUjpT|%Piz21|D?j}`DX$;I4}twef=%Z)R~x>oeOW zaUIv=2P^!H6IOm_`u4-Fn?H>E^NKeWLk}UJ;~#t9?aSg~^R1~3_K2OXsn@Rmahlgu z)I4-UAwQHU^_9jVLyPAdQ10TqOJr(u|!O*?7#kwhQ2VKq3RI_ zy_1T}PR7L#R_QSx^bFC{ar*0TEV0@xx4!?vu8k!ZA2(+9?*PcF@0P{IPfc*G`2FM2 zR$ooxRcq?ycU#z&Pu;mFgks6~ptAo>;g$NNb$HhqHy-0?-0Dz2JTR@#{Hac=hYeQn z(F?b}f5utQ9I|@2XSXkA2YjJ?|A*czF4m7_=kfoKnyN!RwZzWF4uy*kY&{xIR;&$NE%7oN=%f z87I`N(`Rl7$T&xehYL9sBUnf9cxBk_{&)Vup#x33dzo~l4 zvwDi*F<|35Ab}|m+fEcvd>@CiB4jNLcXXe^&cHCBUwLcNj znK}+gU5u+JtoGWgpLx?a^P@xEYKKC;C@XFM=uOMwV*SX)w&&BSt-SgPV#5ci3pBsh zft@0JL&odTak0TJ{;;%`V;acYxe%g$s^9& z5VwBSB+fiTAwMuG^iJ8Bd*tyuDkP}5Vo>Lw; zQ*7%K{uGs0zowI4nO74YZzcHx^pzx97u2JQT~t2lOH3*Vh*weX%-P;)hQhO#Rp? z8ei+CzLMW%aUpJqUy-~j47KM@eERg;dT0Ot;~STrwZ^kv^X&S!Jc|pjN4x)Iulhmi zb?OJzPv3mvx4M-ViqTP^s{b4ZFrV}|%=D&4ne1cVf1@UO=%;A<>3mb$iH`BHLFT*f zz}4S=<**0C0krZCdcGuK+GQN1*$D^hFf10OuD8|Tm zy{hXvv9$kB;>@?yH|62?p3fxbC}qqig%0mW~Gwi2MJ0)9qha z?c*JEz|#32r@Hh@F%-+kK&k)Auj@pZkJZsqJ>?Mxed5P9tYvXAzS`)UpL(6}nV(_e zC$9>_CSmboFS=t&*z&QT-?#EF+yB43lj`>-*w6EylGT1b=$m({S24d(NTsFz>%8mf z^`Gc~%1h_VxcIGKRebfsE9Ef{5I#28#oriu<|==yUH(F2=;WOa|JCQ)$NxKKak1aM z%v5{j`1{`}oQE`I97SQzvH#b;F`p9FBqJ0n#s`)CPw#)CdF;o$6CL^mrv7w%c%bbc zTb+8A=fv+p=8p|_@i&$@DnDrC_;(viym?%0!UxBrZ^!?wEH2D1kEh>WJJ~z+tG-y} zwRhs<{wL^H%`X(v&{F@=*-Z3Geagk}ogi^A^;;cUKRnRtlXBYUTa}NV;m7ndPFU%z zJ?}qm_?uzeW?#7Mo&na^I``k6p2fv0lE?D_3ah>LP98H)d;g`*yfe;xLm@Ae%KmRI zc~%EHpz0F`z02ZaHg$C2SLUf6apq|@amK+`q>ixK8Sk8U`~p7+YkYIQRUa<%2A}U|MK-@dwZ8i)+hWaDzAQXC%-Ds{FCO+x44sdqnB$c+~q}JN~D> z1gWzM!|I!z)H~l{e+(18vceUY@AIVBeEa@!QWh7x4sMA4|5UH~L8o>_KkWMRF1%34 z1tyjK*L>z4nW}2hBKU@=alj8y30g{22~C zuK%fTn#Bb~kL}!0#}jsY?bXl!##;UAXFEkV6mo%QssGxi^!L=Jmpt$2fz*-O_<0^n z@m3ZW^D~YUzX#{hA1=Rn=S#nJ$SdK;1J51w+_d)gzo+WgI?=9w&eab(vC}pEp>JM! zhOI0v%r6vjp?azRPI&#@4$uR==9^8w6!C-9U74qP#8ZAZKjVZoH^1nye@;6rZ1&Lf z<-Y#@Cg|At=T;UMwo^Y$_y11)iek0b=y;9OH}f-pjaxm%>QSWBfA}+FxPAPSi=Dm> z`P^8)u9MF=X&vV8ojc#+)B!pjKeg3QUKNHhn+{rKk-PFR`n`4bKl+%9smsp4p{uyg z7JB_dQ^!>lYcB7 zd{I{Fztiz_eCtz9{b0i5cOURUtD{Hq@pr>C&oiFtn63V9dSTzAMvwi(%cq8em%aVD zhdw$BeY^fS&*H-CJHJq`e`u;7OvaaJ`}wfzui(`*za>bWp;$9IO8qCVx0BEFR^ls< z;|TOY<_V^L>=fY}I;p2*t3S<8?Xct)ryV={{u{wP`tT*;iS^Lo_jb15Z^=0M`EfbV z=BxAcIv)9k>aRB>-~2VTHBMcjSStohU*le}zEKRf_y0|?txn~k2l~X1ZK(4weoqz` zK3A9gPU9eaY_P(QURZgTYrZ;b(b$c$=7krkFD5EFD&u1gTLGJ*bZUIZ(n>+Yvya{*yrC?78kY;zfk=B z*Y9~<8frblZm+$Q$Cc-wn-ZtaP^=Rj{q8?~0ChaC$omfZAoB$AcSHDwj8`#!H@&dz z3nRx}b=kXNxvlS6<;-2~qo4Nv+sfi%{b!sizs4nNj`v@=tReGGB33_ox%fjN532fK zPk;Z|RK1p>>ZSAcPU85n4O)N9kYpd;fK{n?fd-=FZyIy>nFec-nX-<c11k()U2Q`a!4mbiw)=g6Mq2z^B4B){k`LnAa?(Me;Q(9ssB1ZUPo3(O~(P9*u+7f_^pqs3lB`^ zXMW|QXE=}k#-N=x|Kh+`CNviM+zAgXH|rwmvg6OGSzNg8&+1}4MPZfKrZ-%5$ZM*t z$oxVfAJj_ycf#j;O!(-hNF4N~2TKdgVmv19w*oBjV^ zzkBH)r?0#^dba(DCX|^HV#l_Sn_que+fLtM!dJ;AT*PP?YE#$NzF$ z4SRp!{nqADlZ;So5FeEKuex=)J%N3mwf$jB^1TyG=b8GkQzXxHbzJkSo#xp*ToxCL zcj1SLpIY>L5577*th?|DpPoLhg-+%Ezn{PVk}BrtzxK`QY6^q#m~SZLg4xRc>wTZC zA5L{Zov+>h;V+%>QXc)Go<8BjOVR3Oyl#ko%2R(h{D9}5T=dzi!r>dfwb0*Y+Ih9A z&%gN_ard(@{Q{-x_Ws^%9j7jr6}Gp3>Pj#aa)Pt6|LOgQa~a8APfqMo-x9x9J4NzL zubIWg{M4J~&!a!oHu=iH4Sqi}tT^VIyM1!Y^LT#k_i*XxBNXyOnacjl?K!IVKRQ1B0=*Oc)W*;IhsB%1ryu55eamy5ypQV-+kERU zTdeSlJHt+U4c!0q!@okl^?#=7gVg2JuP9b|P5pNNE9LXLA&wo2O`=Gt|C-1D2v|R; zrxnvO^oWCA^HUo>+q=clt*Nf&8y_3&NqFiN1o`(J$U1p0RU zVXx|fqyGKxf614u_Wp>zd990nr`ug9F5UlcWNE#{(Zs`R3R9uv7FJ zxBM!*pZ>7YtbO)BWRa!AIKRXDFSNdku3i7e^RH9CqD(wH_$c7cTu%G%)USOJtGqTnK7T2{YJQ>k zaug}`--&kVK8#KEw-lY?48i)vg7g)nF3|kFbLU%}`avgrbUZeg@)Cbo=$#|?d-R0y zVc{2VyY}Ff4?(Bv^;fMdE<7({wU2ka6OZZ1&nnv6f3q{rb{2{)W1!T3=Fuy8){k6u zz=T)drju`Z@D%Y2```TjG|BT$GMJ~?>PNQE^TzkIcCZWhLjTk!n9|ET@{M_*Ol zR)YNhZ?1ktv1a$b#v_e8!MdW=sXY2leW8#WhL!qHey`}kH@&nk%Huo=d@#*R{n#m{ zd7io$A6qfaPwg;n{xg0t-wxBlxbMAw-;e{BMc>XpH?z3Vr~Kx4{mFE;d&6X0$!fnI z?DsDTkMX?vL$Os%t?ECYBbbNn$MTqFuP@@DcM><7ektMyO}8a^RwsF?=fMg;dQ}*< zS#`}-j@zpzY<$trM_RRX%6Hr)xh}V>-b%-^``|q-y@pOLF7mBST z(e&Ni^Wd9KK6hSSE@!Mf?xL^gbwl`uX?@s=-Son8_x$tEzuj|282kGC!zRA}iq|}Q z|7~V*vF-od+26ig_WSv*KS}?E-ClbqkL&FJ_4z@M{!wQrWQT3~nvV{C`0Vct(bu~E z&+q?VKkc&dOAMYK`u_3bkJkCpOy*&qzgp5i+s_Qt`#0`|gLYov@^hr~MRGd!5GVTk22e7mDp?P;4IwRog#2>c=)jU;D{@r{RM(uJ#9iH%#+9 z<8|qCm2UW3{gm1NzuDX9d2Zn88>}@Aef#~#a(5bC;h}GU7@jFEH zd7jKxolxu$Q%n6PN^p z@Il4}&96GxDZ)2&o%8V6Aa!Faq8s*{^qVL4TXuTbf6QsSE_T@?=<>S}+fMT=E_uBD z$>}XSerbxIJnzJ2ji2+7*zo9|)l=*gQ>*&l_4+p_Bclg0UTWi~E{o@)!#vGj(NFSC z&uM-Yzb5gp`bt0C{+!KD4QpNh#17LJU6p)0|JG6+CusNIQ=4%mT3!>UZ=Sqdxas^t zv2zTR`mcF)xje{@=e_z3b-wfqB%a#T$9NW}Z_O+&=67QI6yc}-3LCxf&X-EsKlk$hv z+zlVh*N48n|F*KYcyMt1`fEAsdsE`9z2lJ}w%vb^dUB1c%{)UPFTAS$(`WehJSKYZ z7#BXsJVE^35Wb=7oQKC&?4}onul|KIe{$uiVZ`%WUv=Vzdy>!gPd?X!Vq1S&SzO4o zy6Cf$cwKHQVAqGxAD%~aL$SN)^-7+12~HOqzIS39GVXNoSMGn{DVknO@~~5^nxFZd zmyiC*nDM*T$g}fr)3dm+KPBJ#SCcqv>M+~MqtfwbE`Ipv zg+jh~u;*9v;qg9Vb)W+(pE&3fKYAcMkb2TMHhP9UpE^!|{o%{gzVhSsc0M_5zvxjL zE&97a9@{%Mn8&}L+H-GgFsoRIm;Qf2pEsM2+M(DZDpmGh&tsSRgr1W^h=bH&HvLk> z5AwRCKJ$~OS@m14#l1_Ua!Vo!fT0L>Ib@@@+z-Cho^|1sQ(wge_Z;Qvrawu zj90_bU%UCBe+<~!JE_3F|C%bc2N&(~|L?i*S+ia6{TK5DiSvAhVy~E5x&03iWy^1h zZN~@N&${yJPiWjFd1~AE=uGeAF%D>a@l=hQAzkS&k0JKlVPP^Rw&ElkqcM$TwSg z^o@8rKk5s`J~6e_f9(_J@!(s3dSyhf;}Hi_KepAQ`Nr4z5;worVfi{9ypQV-8&CTB z-q-IvIc$325l8IsjT-&7^Uur+>~j5Wri;~~_0l)<*Mw+w=VFIq-}s=^e<$(u`&{ZZ zR37~Ted4!qwLW+teD-VRSH8tn*YvQ#F8s#mcbN zeD?nrfcp5joypwO+eNFt{DdvLd{UR90^t2xQ-7wAbPR21mwqiHE zaN`!Q{Pl^+&G68QZ|*;Jt@i(K?ykCh2{zSEu_pd>?VWf^`+xF#B+h<36bD2{ssF0O zcFDZ34HMq~`To0&kG*RB8`lfN$KO3=@XZ%^t-tc~S8X1D|0{nhQr`dMSzPF6C=QHv zRsXq<6a3Qlug@oO(7PyW=Rs|It>V{&Uzw+R#7(cCe(DNu&3FC=4{y{LKKM%gg~bmZ zg-+$?uVxk(8&}5@Z+mTVouB>xk=nMsV%H=$6bHozrT!;8>aXfQe(w7RA7q{&{%#20 z&`I4TTm9Yi8iQWm@YdUB?%Ei#*Zwz*+4B~3tp77rzoodSoPc0{-l<>vBUXEDdW&}Q ztp6?XQ)ei8qob<->Gcfkw|F1-u2T=))Q?SljA#5@bcmau?Z9aqtniapg<;G-7i{_3 z8dJjX#`W<4!2d$2t z&N$cEz)LaB^G@>3j}5B6#|G6OroMT{69;WODO|Pdg{L;>dl#Nv|KG~uVz2LR-@jCh zS8u8g@my^u$UH-FXmphNuX&~K-)pJ^Di1x-JK>qFJo<(owC$r+k*9hdL=PLR@H0;M zAH?f226 zj;W$s|7NGY<-t=le?E7<#gjbmgm3dAp8Byt<%O+AefoRXes@aPzIUgQJOAZE=41Px z-cEh7X7{zqpTp?PEB*hV@~rNf@I&#n_`vj?_~Ef1vwmnEb;BbL;`fOjet00;Q5wfK zom3AS?8)Ndu@xC7?7QTFZ>@Ei3y1vl7r&YOwYSl?<4^hwE~n=o61^;0d=f96e`rbq z<1yb*$OVI?{-c*m9`%!F$b6OefBy6DsQW)$YQ&&h!tzu0_}-mco#>s+&+b1kMQpC8 z^dt56zq$HB=AEJw9_KBs-!;vLuXZSojDb@B;Z2e}>%SA*g9(rGi10zHL;2=c9qbg- zywbST0a6b(sQ$3Z-Y-3N-Vzu}8sm=bU zaz3gPilbs`RsXvl|2P@BG*04A`&i=FLAhSje#58 zG=7m6?EH*f|J<9!#riRF+`pFbsh+Uf>oiW^to~$v?7u^CbQH1ioM@WfRI#a_JVPgO z-lyxwck(TdekqzipF7{;NuGDYw|QZM@UcPlhu22<>#n_j;qcy1Pp<8B_DrvF+yBqZ z;$qwXX#M`TrGAicYT{>2U3ULTc)2i$v;Pjo*JGfv|Eh2OaH<2MuRK2Y!3S+z<>T*$ zJ{wK_NG=1hvU8&tqf7s@{4?p*( z(ao^k1`9uQ_;N?0Z`*%U>gV&I{r_^Venqj`{Z8%D{pU50@xV|V8&gaDXMQbFrZ2p1 z4B_<(uP#3)w)4)G2Tu{dWc?SfKN&Oru1|fiS_orq-};&FwBGbi>uw3!c?ax;^!}5% z#=&Gh5^X;p+=r7sTfdsZWd5NzE;>s6cf#Yodhksz86RH#4axJGZ+U9tH=XHOT;Qc> zd?&pB)DHQW%Lgp_$0I}iut&dh+@>#5hwXo~Keqk9umAs^>-bOXm{7`FY1y^eD6o1XIO zayvQWm-^V#sVCNdasRXAaWtl zAC~^jjEnxh5N7%Kzoz<%`GI=;nX6w>tl9lf>rmVNf7xkXQxSem>`;6&KCu3~P9FC$ zpkQ7%hRP=n`owQ_>iFax#Y+864u z|6gW2#jIj6y_JQ4ZFQ?Y^9#kdBC*teC;9cdAbhX>6p4exQ`_q4&EjJ5WPav%!t?(_ z7)HGP=<4gPy?9eT%CO^9#k~C{pUb z@=EuAOY76|&;uFQ?4*ADpv|`x%dz(lt6RrGx5|%R6^6Z^yZqu0PrNGhF1F~+Yk#!| z9lQQ2m+^U@%dd*}pLrMCQ0ozPd+pUI=mQ(NXHZ6CR%<;9H+~9+XEM z^oif<(B}aj2;X>`U$Xy0e`Aq9Ed1h6)}Gc_Y}pT%9CP*K=yTrC&O6M^;^HUj^$)q) ziek0je^Z;jq3@kAQacnU#z3k6+9&QKVSUO~2dwI+`fa?#Mh~>@Bhj;br{jX?VS`=# zVVfULd}7v=wZeA09kS9AC;Y=}UgiCVE6#tCmn94J(l_jMejE>l;-sih)ql=|Sf7|* zij$-ee#ZbG^iJZ8!~d&LAG{PTkMYn+?SA^h*pt@&%#qVS6UHui%S(grY2W`%eg=^- zU&F=q{)bJiA56xRX#4eGpMT+jN!_848`jbn4^NTz5&FXO#_=b7;$Z5>rr(TbeCkpx z`L&*s&d2nJbsJZH=7g>C(Ae(k{m=Yid;c##L(gnptt>8nv3UM@oOfy~iq&3w^@H>c z+vd$YYgt@EaY{t`VSE?ec7MN8KhQ~d|KsOB83$y%)K))rR$&MyjsJY(N3F2&Riht$ z<4bGN7d!r(l*Pri|Kaif2cNY4N3L-vn6AD0E5CoI?p!iM@$LAa)PHz=!ec(A?LXnw z@9N}R9y~?!PnkR4;z^!&!l!R$_tPJiKV|gN_cfOdE1v%S-6n5$4m$SxBdy22e_TO7 zf7NtcMX}oZ8?&`uJ~w$M5$l`gGrv%LClV_ApG!Xc((#Xu$M(VZp725D3F7aD@C}{R zQ?k|HO)ngOz(Mmp^2Uzg^!x63;KViiyyn^QPoBkv`Plt`n(7Ccw-Y~W>f-xHZ0kRH zsuPM+V`{1Ys>^lk%&XLYqi^V^^JCtjI4vfW`mf{gKdSIerzPW{PaO14 z;;D@vw0JIj-v7+cyq(6ukL#~Ve%Jf|Oww_hio=ude|21mme=%yzIi7cUQg-#Lh;>5 zsO-OvSL%oQZ9KK>zmW0ZgYZD}Pn|p8;*8^j=fQdOcm4mJUL6PI`<+ay1_YJoCo5D@)P)vz|QvX$l=gIn1 zQys8J?Od=vK@dJ@<0>D2H%#-q6MgezD|XWhPmcP==vz+h3oqTe(nTk)In`^Peg8Ex zi;JCySS{ZFb^&`Hni^-`PHfg*?b7}~7e;yi^Su}-^&kBylE=Jk-o!!W)eq_9SLK=C z&7E&?^g-oY9oS&XOZ;KYZ!G-q4d?6_*4g(H6MlPl`};?^4O?d0eP0$A-{{}}k61lA zKHH)BbJe51rr4o4L&oXVxJ$4mw)G>8qo=&|_cE1vszbaAJ>xMBb_G`H;18qLp1jdn zM?M%v|6#Mi@`b%3}7uJ6A?UP=6b1XWx|DTe@#XkQo`>)%dFEG*Ot#t~ky}hFYTK}mx znIC$gI9tZ+6-5tUk?--XPk9!XkG=h?9`%*_k8Y3X+jwew^1*rZhpl(|&U=5pZNaeB z@!#I^#M5U`kL`cx12{~7|C?uVVcuf>oAVE=DBmgOiqx%kD9(w2()O>sbboHIqnheD zLHY&y#LqlIcp%$D8plS@ka_4h{q;8%Tjd*@eRh}L#^R&jcwphBS4H2xf9%WRV(-IE zwewCC>)-UB-#=0(I*A>MbE8yc|A)y)r9QR9PyNI}?<8(E<0|3@*{)JOZ1fD#({cLi z538^Cz=n@){c4ynZPis4f8(F%+xfSdSzPS+bG+Pu((21K?gZ1d>1p4vEuXq`$qdDL z@j<7al7?Dmppr&k*9hdoJW5+=dK@rwBQ}@hKnv=_D2&Y zOhM1S|7vA%vDf3`di||azm6+bdF`F}>Wd1}AN=Ujmr$G^iKYHCznP)~-}H!s%Bx@3 z$*;;YfA8G+7Dpd+!nZoG!IYQy!?u0t9xPAZL%HqQNlOLM= z{=Ft~*3^;OrTb4hVZirlhvI@5DD_|Cb)Ap(qZ#S#?}-v#{XU7q2fgwP>C>bvF7Q%J z>%~ra*u>Luh=||_sSz)m*&rTq;bu& z?|+^8r*y`>`a|*k7%+YH69=i^_G5V#mvp{d_YNO4p4NxI8>V^GV}7S~D0b5eOSvNs zSh~JZ7=Fc|^Dq5yC^~li;SAMNM34Q3p^hi)_S&nT<976|Z%yH%8;T2~qpJV)gm2@w zM8`Wp^uW}QZCLBfv-(`(?-ARp-{zD0(|BT2SJ-fY@mKz1+v~z6KiPVP;}6;dJ^TG{ zUltcT{@Em6|MRxZyD4$j0c{B(YyxHw9b`mcPx$G3i<18N_LgI?pQZS}OWxLCXapCotPm73ClUJQ)#iNzoB4&}l9*8HKigMd z47aZb%*Cb-@6=Xa{cf?*2d(d_YyK&7=UZI)-U-ie9{pkLHn-n0_K0o6n0Nnv%8SMO|^4D<`;^o(NWd^`oK=UJ&!f@I!HFd|>0bL}w+jtsk1le#|SMIGFmeZTwai z7vrPbllay4PW*=R=ntFT@%s^<|8fYMKEChGH$L$Mde;9w9S3YC<2tdc*37T2ee=q< z_iyS}-BA1>N>%lr;{fYZO-AN;_Q%I%&+sp zPSHEjvwZ9-q@Fac{!knA+uEZ~yclX{PdN7BXYTOIod zIC;c%nAq?PrwA_;SHuUV@5JvFyPkhj9(97`r#61J2aDITxLAFz!tUZnuL{GyKm66L z3vK&iIDG3bU9|H?t23T$|5J5ba7DfTCs)6sSpR1Kv$XP^VmiO3_(O4Jd{F9tkHq;L z&3x#aq4J1>-budM%45Fx!Swyf{N$+~y6BiqybC{UbmV=fEwb%-VbiHUeS51Pw%>o{ zZ|E}aV_`Tk{{HuQ+um#0ToM(V=Ev`zut9V{{N2#TqwZu}izjtd>4wqc zzU#;Ky&uM$a?0{gJ+(9WcKq3!#l`wDM9=>>)ekzgwJx#R`=dH>PzR5lBZE|v$F#XKekKFz4 z()QofaTL4Ve?!>qwRiHk^!{5DKl@qc7m90QpwxfW>DDKnPk7w-7=6%q+Bf`C)qnB# z@9r9Z!)Jf?N*F%oo!f5Q{VcEZwDZsONipA5&%ZULQu;?8^OEQs$3N-!k2((d%s&*@ z#?(^(wNLzx4?f$Aq4J1>)MvKxs24xTcAv(v(KAHPWpU}RKOD96#C=wKJPXGza@p>e zPMM#1*!RCJ)dPphAJAHTHHovPzSO2~#4Vq?bIAyWd|_DG|53uTK9LW)&baXszgHV9 z9bfCbs5jLi&oCL^{4R@2l^wo*-o}6a^(sB#s0TmeR?YT@XZ!zFOt=3P_e1sif2V#O zSFG}y`tAG=c@^^u#r097)PK!mKW6Jcg?j7|0 z+oKvIb~|#vOSZZY9XtOyC5sE6JN;5R$NV#dkDVfX^bFB+lHXr{W9YLzXTN;sZjFUjKKs+( z`_x0|+xee9)dQPq=bb24`**?+x`h`!iFS0!J6!CXM=3~gX=wT~%(+eyA^6!6maly%9)%|Y$*2uj!qkh}}BzpON z@$>Iw`ew-ZikU8^x0pDr?wZ<7LFx>}jnPrnf39PJZ+gl2#6j;m$H6wtlX!)m@tyF^ zhM)So*x{jVzVza@pPCUKcznT`OAj7JKdX*E^QYtgFORbR^lBZ9pNq}fJ9$Lku&eqO zil4*;)7O0Xe6MJIt%*K$qi^rOFKS$MERT8>%|Ao<*eOy6;~1i+qFHW4`!7dtXoV;G<`Vp3CCW&CfVt`u_XecjEQ6@QW?4JN<+2j3UoI|4xc> z?fY=7s@ESn^(%_iUVHUZubqFR&Rn>}nQtg=j)AJ}-_MXd>M!-*sjl+sayge>FJXDq zp=kbAr#^nr>Ls7?QoEo2aNILzHa|CZMmTomyPw`^_mk1L z8XcyuarEl7PJXxkqf5R|^zg$2+1}DPw&|pL*kDf<7mp39Kb*Pa$Qw>LYgD-48&e*b zaA*7a=RIRu`@BQ0_CP)VgN~v^%WLYTZ`kSlutRZM6e;yz$EmL^V_2W``LO3fdBj1V z_^l4@13a)w&+=3c-Ffvlmi$xC(bxUqm4*u+{rIPkZHA6rf8NaEVz2jM@%qmK9w@B# z8ozhqqi^P?ZyIObp}0LJRQ7*_=)w1zZ&=k&88)w~c!fV) zu;|i1``QoQ4QKy-+VU6vJ@?A9)!mzx47z>9yZv8-xxk^%Q2tZee1?D>%Z~j(yQEru6_RP%i@AR|3ke0 z+j-U>)fZNKZS`oJ>))6Mn9eU0a>1JEJJCds&k>f-{6OUs2Yup44}=F&R~pAg&oCLk zzy7fAV~3x1-H21eKBw(`(xPh)M&HiA^<{Cf&tFZo^G+12yf(d6l~0}MBz7p|hGEu! z;!g8t2wI0t2__Z5+-$pLM|A${yT|##eV<)-2lm}|Ee>eajbt@pZTW=A3H_M z(|SCpI%fCNA4a#Pe)Pek^)Tk{)z5yj(fS<+hF@95Q;^x;nywf;1kN!{_ zdG9BFc**NweD6Dd{mZeR;eE{ZKht#_MXQ7Qz+`;Mn*DlEKYjB~BGxzZR3{YEBcZDQ zTnARFqoum&O%=QTEkXDob%Ey3=gzk{`k)g&IvyKLd5J$PHTgFeY=6ofVfgWD-v0NI zc3f}gKWAofvEN_w{Yy*zAmh};&zidGODG>+u69e1`Gw-=(NXHZ6JGjzMz$A2)khEX znr}9BRPl2eFMVAxAM^~-bDF2fcvTo4zxVs2YE!3$Cx7r0`>)p2 zuP9b~dpFz3Bl>3b=i+C6)EA0-VxY4BdLIe+rkBj0IOv^>m)iK#{a~tFnWws@li1`{ zVc2@cPZvD-@=0NvaOsi54sQScm-9cpSzN3iL-hKemiiUNnoIw+FQxmRIL)VTilMkS z25h{9S05~S%*Xa)E%DPg;-GgDPi_3A{?~+$zU5J`>RJ9g`or*FzVXrXmpvMmxo^); z-sYE~e(qb%_+ZufPpP)OMu+{M5!z zJr+l=Tb{we1xz=Z9{5A|(f6d0IQET%)W}kn1v$$A4y6k_1J;(Fl#MA%ZU;mbeQ@4Z2o{LJ5m9+*ubZpWW9v$)vt z4|P$8qOi(q@5D#nY~JW7GQUtf5EJbA)_lwB6FvBBFNRtVpV#YhI=lbhe@*JtXB^P# zV4So*Y%t{|{&4hR#~iWD`=^Glf3VJ{CamAy|L>N?h36m4?~2!-_hN&>p1s}Ic%=C{ zetmeS?VZ{!LFN~V2cx5^|9r0wpZ&I>6FqqKm&Nw#w>2-tH=%w=u#Y0h~vj1zz804X2 znDo2>k&vroqXn`*Z@ zQLOTDr|GS%e5Z&`Pp2IUIbmG6|5;b~)+h85Bo2Cwr#AJmeOWxu;^Lj4`Ked^#7*a8 z`tASkMukIfe5Ch8^z8U^QWh8L^G>fn%+;?bR(qY+Q`f$ECmi0_u$f;d9*u;m{&O4* z-*j@-b&4|u>+*A^{vUr%kv#NMw0Wh^LuxzGF+Mh^{IJ{&>)(IWo8!ZByX>{jN(Z)o zf4#fr_bEAPa?1z`$^2FNng}olzbdH>ZoeJj=3luYIwiW$*S>Ob?Sb@KVVVW{I_bKM>Kpw*$e_`6}6=NXUsuob)M zHHNG_Wahb(UvDh5>Owc4He@7vUHAWa78l$8oY?7_dinkl+qNs}Zc1h-ejOk5yZ^c5 zSsm!r#Gf9o(hsXc`S`nGn&+L4&$!r%-Soo9)#pEI*v3=Bn28Up*>l8t=yTl0b_Cl0 zAL;z?dmS%J7X0;*5!DP?)-|vGu+kfQ{qz^FEf~fx=hk|D{)^G6+W&a&|3=>wrP}ry9k2D#H}mJh zP3ISi-^M^y|0Qeo>#EtAkB)cZXZwPO4O$&qKmKl*<}sf6b$tBY^unO8uDr&}*M2_? zx?s<>J{UI;9lQQB>2ofJ|3%g7o_y&-`}0_7|5Fo2+MiJTE(S{d*E+b58S|>DgE*M> zgE~QYAa$j2Z1fE2lgr}LUw>nfGk>%G)z9D6Smg3o`+oJ{;pEx%4}Gc!Hq}m0ytDgV zTe9l ze|GPXGhc2DxNZ0cC%*l4bW8p3&En!Ge9YhfYJH{aPnxQi%s&)Q$~cUt`ChTEdDf>| zq}%gOABcnGo2}zk@^jU7f~7q5dk{Tru)>dCxbzE;?)iy3&kHvlH~yMe-?!gY+xPFS zEH3u`yLkNnoh7|fzoJ;}wO2p$<9Bn|7H>+9)l@{S+^T(dr4Eua?k6(X&$v*V#_=oWo^LYAs)^z3xW)+L+ zY2V=yp8S2&cEfVr`YZOpTce*_j@NE`~L;C@lz*uD4vQ5rT**u(*N@% z&ro^kJc;ASHtfyfV)2^T)MtLD{{EuYUp4y>!CI(9Vcfzl$PGud+BM$n+Z~fBo;eobaqrMbX4_(vynEtTdIoF(d z!(oqv#y{Wu?4%EEzi$6K(woJ_`mt+qm*(|kaj|@jn+`Tu!DpN>Y{{?O@!lRchGExjcih9bzK@P= z|9KV{+uj%cn7@D2@#&j)s#h_;P&_B&^(qfO^O!Dv>Sw=YnDFZIcVhkj{-xEa&p06c zF?7N&+5Pl~eFs1C*7$WU5Bm?fdaDI5ZU6t>j;c$az@GU3U%#^X){^mc{^Ffu|1&({ zy>^FO&{7_TXID4vfGs`@XiImXevl+QfbZ(@TsZjUJ9?}llfcam>@Y{hPR zVc{2!e`(9>HV=z5HvatfKOE{c&+fl5J&TK-|L6YCx%z90IuEhhYw9Xpe@Ndnk3NOs zg_xZ7(>u|uYu^83?#$!ony>$VtE#1{u?La7MGz4TQd=$eJ>H+#8KJ1HBVyk^6jimP zD5}Ay)K2@cwU$u(QcY4s(OsMRCdjGwe=v$pmZ1g~%_^l4DA07zb zc$#0b|4o0`=Le@Re8wk}!oC|`)OYKxQyJI(|EoKTi}ho$-v1<-x3J1<>gBpAZ104d z+F|;iW5C8scwEO~eM;)^$|nw{ejB%{|LTWlI{2vz?595*wDxK<|Nd@xI5_|Pw&!mD zFyq| z)NkWz-{66!*KF6bJSTn+s*c&S=?`liz0s(_i~KsQdBdZRt#e_jHk3jvl->)f8`NTn=_^Ato2ikGA)@Od|RXudkF`IY?e%BH={_aO> zWgB-5+IQi{o;&$LbZr0Mm&K*i@jrROZmqqO$MqqC@Y4BVhv|Qb#LE7s=ih3o3+jC7 z7wDbv%vK)t;0Nt_P4wWSXNaDTGrRuq@w_+pUGcX!x)xk^;>&yXwSIrSR~8pL?>Sy= zet(^-tteLO^KNRFuD{Yen@>$L!t^)egR1`fUg24vaf-K3x9NkzVs`|BsGh9xwk~W0du$JBv$ce8yF* zZ;0H=Rrr` z{O+H-mKyioT8r=aGCKDC?^K-^SatnXG9StM%=_zS)PxJmfQJEISu6ego0~38h3)F^ZzxC=Yq^POn)yrtp85p%$xJr@bMcepE&3fKel0>#l`po z#75WrPW>suPyFOnVK`;u?%o^joDxp`-oj_RxXKK#`S$-u^hq)QUHtvg8}PuaVxfNe z1|Pqn6M-=O{rI4=|LN!7hUD=+;+@#(d{aNRjo;m#$NL?Kp7EXVOFDDt4~^&F>$>F8 z$zhwfj~+etS5r&df1i#6GT#B}R}`zfre53ro#vsV$o#_eX^~Lb|MdEkn(BcA#6}PF zuH8>vAUu%1gXUL#^bGsy4?FI6%2z-4j}yaA@80#~WB2z^k6r)SBXu)Q{=_byUV+}^ zd4eyj@|t+%^(PIJkTi;!o>?2eRG6Gk--t;W3V32Y%T4l&ubW{;FEoYM~()uQzEE^6dOi zUltcT4sEFI+EJ|X+B@;t{gn0U$RxVzy5H@b(22aq&6rVbn(%nu79a@|A)P^xL6;w|M~Iy z`$x4cUQ>Q9$eOzN+~l1^@TVB2|2+~+7rr9rQLT@8q}y??R~~UN^<&%kIv?XVvbg-G zxBrfxe@>tA3nC>fS7LQzv@nG_@LFN zeEglTY8=zk@!)mR3&YlU`i!R^J1Pvj{=4`5;R$^yoMs|G(2zKgjrs68+5nM}N0v_2-&j z6QsT{{p0AU?0>@Nb=mLp4^HdQx~ao-8}0R3{p5j~@3FxS{;>9UrjC5_;!A^HsJZTd zi@$_E`+xi2ot`W%Ru{U85-qPyucrAW}G;vn(V#?SU{ z@jQ!*&DW{@?;?Fuop8w0Yact$IfKIC@2`8sp^LPB|G0;YXZ!1<&(uqQgHz7p2dZF;MK1dKYY-5Ixq8&pEcj&^bd5xH$7}H1Y&@qt)vK!wztsOEZanI#KzJZ^v0pPk`P2pW(;s%4_QM^YyXB3r|M4$ResGm7 z=#PE>-pJx&{T>ki0Os=2ycu6nvS#l8)uwOQRr3qQTrys_j_cZy^VrsphQ^zUIv#P* zJBgdEJnF#@+I-R1eE8@Yq6ZJ`;164GKi}D7Uj8U-v*%eq?R&R%{W-rss%3GZU$cAt zxnzCj{dZZ_p`M1?ip)C{pO^8vHSXGx&vEdrALuF)2fgZ;-HD&;xQU*Pr+OYl4;!rT zQ-_`ZtUqw!$cMhP4}G@tpS3J5`1AMU1z0tYzG5VOX64ln(l@I=&*EaX))|Vq#ow)Q zr>OCILu~5z#6jf|2h(v<8y?7fLG!DA)wr|kM=z{(6`g$l90|X6kmves{Oxrl1KgI8z%ipuh+Fac#8Ntq18{m)?3jV|x0IK}O(*e_SA}8p_M@M^_#1D8EkC^Qi9xH6 zqfR^iuVryz`^vA>>#ve=gwxoWaj8SG zlV0#AFS_xBlO}|XE?De}8Cy(5&-OpfEG}$!enGwekM@E7rG1yI&m8{@R2}N9&Dsvd zyfR*Q5+|=m{LG7SLHwOC?IZk7I%%GFNuAh==vHAk>!BKi{Lluj&VJ&^s9~wehokS)4i*&96F^hYcTF zk-RDlV}J0?A6&cbgs}U<8@+MO)|d1A+xgFC78mNvUx_C`pSAvUYn<^F#rv7(li$C) zS@Y>*D83vCrl0cFW6-lDBXXumc_;9qj{m2U;L>qdU*`n`(ye#^iJaVx$YwEgN>`Y zX}ro#^K3loV|>Lb9sFU~{0Gjp++nMP74Cj+t<4|25`BCB9iPR8<1_En&J#?=m*{8q z|9ajYefq=uht*SjC8n0Peqi6#;YACDHD375wilk#`u?{mb=c=#t&9DIz20lWt6F;}k<#|x5WmeQ7dsSESY`h= z7an~{>p+hnMJo78xbQ0d|`qAqe@SQ^*TVSazx)wZe+Ht#`_%3zX z{RjH8xY+a1P`lQSVzt)Z$z#*gzG0_v>`*Ki6D#|_k?2x?ssFA$ZoI^Ab!b1~fwrAB z+x09@^*pFLX3wU-Yv6l_t?<-N&6)Qf{?k>%7e9i0`~5{<78iT}-BkboF3}fOYweRb z*Ueh}4dIse{|m)HW&h>rvF)|0|NKvU{V5$s>#_dQFGcfTFMRA2t$x+>AbQwf2Y=V1 zryM+T@?DR0Ew;|%U)c24*6)w@%;Hk|{&z(^|CwuCQLNVf-#hVuoCmP_bB$A<+M$p? z$}9EX3BTSDq%Y}q>XkfLi|xrif7N7s{Q0Tz{xdl?D6G~xt*gF{>Nv$*k-Bx9P%IJyrT)X~mOSQBI{ry` z9LK>2)A~|Bc8Y19cT$h}u@%$&)DA1(we1V%P9GUoeRJ@t7hUmdbnO0P<5eFV74N_I zxOeJT6sxuUo2~KE|L^3&;Q6DjP{@TsrT(kV93OvPF5{pNGEdO_$@7aJgl|Y)Jnz_G z2Y*=Uyv6=x~EDqiwC;d`({pN*${^ukWB-*WT^(_Rf@7rFWP z?;TwJ|Lg87F4hnJemw$hiL!V#BYyEco-;j!I zo(ul#`@bd`xaO=UFoilx#;o<4~UiikD&$Z4!>?-rH&%J$WHxz3L>O92TT2q(Z|3P{5n>cnT z~>QNqXkbJYLR}nv0+P|tU^FhxL zJ$PUTe>m;tFTZ%`fXU(X`k1*6Uv3lh`259jCusH6vbb>kE>?L>y_|m|A0*CvLm@k= z%KoSS|B}m?x!<|?nYXCN_CL)mE*y8{GvfWX8`z+*X7<0P z{G=Yc|E){n@DxKKH%ct^U*o0!UqcU69`gge##5X5GM>e2SzL&lUv>QKqSjO5tH0y_ zzcjPB&}YAx-hVh(zoJ;>HTBas?}Sr&o^-yU5CZ)$zSdjUeCtO;^_q&xBMy2e@zll- zT3mIgzvNdv;)eJ^bP^lAu+1y`ed*|*hp^p=t6a0OZ~gvdToxDG59&Pa_jgT=D~i?H z{>|3$+5dYdky8J2@rPoW_@LB(C-J(xJ-5z3=i;~hzw($L=o3G-q1I`9ov&?onm^lp zo=tw}KKI})=Dlx9IB=n--n%&OL*K6dZ)S19FV@UHJ9(7)iG##B{=v5AwJA*I9g3mR zQR=_;$8>w;k@Y=S+X>ktV7`C6|Hg^7{noO$5ErYprmnhtthL``Sbjse)EA0nqodS+C%n@6H^v8* zhaTt?KXrldK&!7RPyOhkV>a;tSzJ;-cXkg{kPU z{d3+EB+vAKTlPCaFp7O1fAMCAF*1mhuJ^>vqDU*>Oa@@Ft7A|oIb#-%k7Nqc){|h z7i2zDq;hyEqVLHAb>4I84?~Xqz`ghDC&Ds6e{9g<-=BcK{r+om78kEbJ=ULYji=)$ zwq5`1lwV8o)DDGQsA&4|75N^`>S(A=ce@^O&}+Wg13K_?jiaym^v7c>qMKqBAAi@N zg$MoZ&Gn0}LC0Rwb@taz#&6f(>by$2c@`J=VwKn4iI2X)x8p{gANrw?6BVWYYrpDs z(SvU~x%km14&wK&orfPDXvaS~uKCqAom3AS?7$BrUwvrC8pppHR{8m+V-Eb$VCu2& z-}|z-l+OR4uPCh6+N&R|=UH6PPv?gniV?!=R=!KHA-45HZF{~3C|~^@?>nu0^g5x{ z&p6bV+UoD57go9XM+2`s@X)aOd3Wvn!V&wRZ}vJ~whRR2e*9SI;9*Dma!Z%Fn^VmrpRk~rv5m#L> zIZrLYvN~ZdU9H!_4z9o266fnijgr;)qk#| zV?Ne*9oIY2B@U*38&~Uw2UCP7e9F+jPY-(e^gl2^+y9JLeZ~Ly|1T8sLAlg_o;O9dZ}wY;$-Ma-koF<%V;Zlr(>$-^ znIAi;qe?F?T=V9Uk~cH z^Z)cQ7Y5Ha^@XAq1C{-sEP3#)zNYxC56Y{b)6Ta%>QFR)W7d3&Cwblp-^RrT;bVj9 z4@;jkdhGdQwg`j&wDMOM=$Yg-&h=Mz+}W4K#pYX6TT!f5zf+s<&ApR|>EvQFzfi~@ z^;Gqr?-AhJymda<#6j=c$H6wtlepTcp7EXV|6Mzry4Bwo+H2H+@S|_Pv($kbO+n8- ze>HU+aACdwZLWS$=P%yY+B=R9LHY{f z?}YFTo#>Zr^>@+>D{XrIwByd-Agnay{^L$LVi&J@cK*Ma#l`kBi|GH4=jsQUck(-YzqW;-vPw|5f9k8R_3C-Gct;+Cg%SiWoL z&8a_ZcGqLS`0LCwpPS!#)~54(zCpen|MaOoxLJqmf1L1HQ$Kz48mHgHc^?RcoZzeM z|JI^x{iumeKKjH#8;9}fqar*ot3&q!>ij9YkV!aW=t?j=iHu+w8%nwZc zR)_Wt9$2Mkc{&cdbLwZD@Qc15{&>0Vr-XaP9X{=Y`C7*--^$`*buiC`;^ntz*mzpM zuv=@3JMqys^P{J6tEX5qCRFub)MmcV_sFACeTg1C^g*jb>&M>-(>$-^nIAi;qe?fd zc>ghTZ?NsqF#OU5j{Wv!d!fVkFSecLSzPi*XZ`yl@`Tk|<1bx*MgMjF^f46jg-KQa z>syI7^GWMtJmR2tNgm@?^dFtdJk=wf@;mvdD{MCZ{u})8$_ZiXb2cBlz#FaOAN~Cu zp94Vh=ogsjVs$V-Cjt6a6-PG|a--bJ{;w-~@Js#I@rZ*y@nc&(%`7g)Z-`CY{K>q$ zYv-A*e)6g?Y;eiq`+RTVi^7ICjoRSD*Uw};j{o`I5kx1Qw^UJHn;zerX%S%KqnB+v1&= z9Dgzo5FPAL$c=(Z{nzpN909+o|HMJ+FguwSevrC&U74Reofo?F&1~Ww_~9EXoIKB; zf71xtk6r7h^L(}S`@3cqm(uyqhWgP}6mNM=UGxpRYJQ>5qN>ohlXC^H9_K&otGqDAvs5 z!kkBfrYCQ!t?yqN?Q!x#v3?Acwtpx5^!r5gQ>34u*L<@p`fqXNSLP*g8-EV{VY#vU zo!Nc;0b%&J*ZI>&+a1MtcK(MxfrCH&`thijY&@MumMj+6dgxo~XFTQ|iVdPtW&b7I z`us&*-BJMh=z-}t*zkEh8b4RPnxOf0zLxKl*H3>~?rYzF^z>CmhUH&;;)f5s-unE- z@4tFfA8aJ!>A2!;t!-XTeD?pdYO}qk``1v&AH|pYujABpeCtO|^wD8l;voJiga`6` zz&F3v;h6_Ghko?JFSlQ8;Jzm`!lXC zLjA_|KR<5m9{l$GSD)4aZXEyrWqzBt)+wyk8Xd6iCC27!h?U#FoSts&e`;DE z=sJv}aT|9)1z+a{uT(EOpz3&Ru!GC+>aL4o$|h^AA(AxY+wIb=B0bDAvsF zUdJ=r$s>+GiPP`Y4n;jCl=_d3+#aIUf9h$ZhGEV!byHK;M3U-IvA1z6bkyy#M^Dv_4^#*QO_@)vRwePpuC- z6dTES-J<24qVl+}5`6Sigjb)^u2Yp~{$Al@r)cB2cHRE^!;XCi9Qw0wJ|1@3;?Vs* z`t2aEx_122qvL>`?!Tn@lGS=WlJ-$^fQzU-ox?tb^$Va&AouYP`wNgPMm{y)#+!tsiI|DO20i?q@Tf9d+q zno!ezhN3G5s`{Vyle!F@=%(}bi9hXQiBEls#4S(t(Cx23)PDTn4c%K$3!@sVFY>1! zEruTFJ2@^etoZ#S<7c{Ho1Pq2+WP~~GsyfxF*+)g`mft*>GyX@pBNWCkUGp(y-I!_ z%gEC013r3&=sC>;JNUZ>FR=UIEqDEXms|AXbw;oE5&HK2+fY4l@woqA!=tY#tn%7x z+`fOOo?N)){%;%uRsF|b)kp7Cw_C9O4MF&zcj7m!iOm{bipFOiPBP4&*lB)hhn1Hc zasMjkT@Y5g;EQJ-aO`!~|D9AfZ^!&d{r$DhkA9E`N>=N0fL;IQRFC=-429g_%5>o? z))kot&!1ruH(uhWP7oewdbKPrUi0Cb{crla1`axT`n~&J)HQhG&;BxM;`Qj3zJG6K zaq%m~?VUU-{r)!>KkqBd zFBF@_fQ{#rr~0M-69<)t9_Tfm+VsD)pKoMwA#QohQ}sO9Pk%V+q_?ho`Q_WgNrztX z-mflpjAy^U=!s>tzVCK%|1*ZZfx?=(zo{udsfX)e(YNCjcn!%5h1@8&s{ekx@T?z< z2coCE`abP^%Y&zw`ms}_e*E5vK7O-j)892<+fC-b;fF_cEx5pG_w3noEIM}l->3Ru zm)`$2S3j7{N1~rO{-5pppU4UOs{WUbe;6N3`kD60?4(}&Ag>3|{Ny>Ap$Gfv4?Dc{ z$@o(m7tO>C_CDdFXRZHxseZ1gF);3PrnrLcS7_Hoz{UKwqhr}ub?r)$&G`(|L%_tXx>R#BqVf7RzYYUX7+xr~e+anNhN+4QTT|L}Q!OL?ki zJoM(!AND+8%C|25V3V-Nef!?})cSLyXUG4Ep8fuv@f5R)#q{K|w$|q_bQ_Y${6ZlY z%9Z-B0d4P{3f@KVf*3w6JEcd)zK|| zvp(tg;%)8sz1GM77NK8ypVs_PG@^v*YCd`C|EZZyEv98&`P2!LUxn~M)784nuQuZt z&Y>T@@bT?0-SNyn$9FAs&rU!4!kf!6t{wk(XK}H9ta5ui0J3qN#M8CKwJ!Q*^RG!p zYKLN*7_jkNJFl+aBU68A`&T}3&?kNySL=fZvfqJce&wTQ*iV1hdeiKf51tqnw!W|a z@YZLve*fOg;$r8W`_yhIx^@(+yrzEohMmq2I}~H0NU8s>U7O?3QvbCN)~_DP!v?L6 zgipK*(>$+rnIBuRN(X-!u+PYa&)Z|2u;73-=6>OpG3eR#KfPI8?093?@c0LicD&Kl zxD!m*)Kwp|Dde?9pgf#=QY(K?tP=o3G-VKa-1@oQpJzxk=xX&kKZ zlUIeIYtl7ep6C8NY%+TO?C|dUOZy+{Q2fu||9`tUu~Pq;U$4xY`PlQAi{CrNUcvfz z1>uAA6*PZ7Yre&)19ZYi$76#jFY$+EH$3v|H-C9l7k+%ohxrSub-pdFr*!{~hUzLZzfi~t%c}m@o#erv?d_jFfU4ucqz<-2^liMV{C@gF zxM9)_7x-q__?}N@e*L>P_1OFGL=Th;RSj}6ITexcYt zIx71wwm(Cg)>i>7@!GGAM zb^derEG`^(+Wu8;4^TSKBkb~;@#~99jQN4-^~s^wLB{PCKKb5>U6-GSwfe99p+0m| zBp*Mv>FanFSKYKe<7+)7oy3n`6^4z^{O->;yg*L6XRY&RrR{&}%;3z$UI#9k_IXOI z^4dG`(Kl>#()opA$Cyys{?SRd|D^xur}Ity*eQ}neWiX6$l{Xbm3U5h9r(dNe%I*( zp4cUf-tgpKx;J-Wzi0jLRvmDqc>lwb((#2=Uc;+v->^&bYe;S=z8N2s`p-O?!m~cr z#HMcV)NTsa<#5(sFP4X%V(Q0E(fUk2~lav?YteuYQ6uOt#xtU9{uY6%NK8@{yQD7Dabr*Kb>pbDK-V`@^Nmq`!8Al z#p|DkO*{F{XKtK3EWiIjjR*d^I{mTp4>}L_uYSmVLf^dhqReY>FR!;-GgDPi^>&XK}5IxcPN{=o$9YAN)P9&b96V zuZD2wyl)Sm@CJRc{ZF6N{cmso%d3u4Oy)-)L$OOF&<_t^=T~1=@~t1}G{v9x%Xq4j z+VI)FES}F=&-kk6L9H*f(W}C+@_mo5dHdgQ46A&rcJ#O3d5wHK|IGXp^LY9HW!3+G zoz|muao;xdZ64HJ%ia@Pe}(o<#FF#_+XlczZ0f;p1P88@psY-YfT@x z%zNh#2sVn4t3YWABx@LgR1^> z9L2n<_W#5|??m5h##O`*!l!=olc#zq&uroy_+jNW*T~2Jy(g^n_d}Lm^N@+;bNvreTWzWCnr@Z?0?R?7{(5`F# z=B)V^Px8DIzKx3w!p8>HAI6>XkgSYLl^`SN9Q?>u`1LFPnzL#!ynJ(m; zt>bXrzSWrv10CiUie?Oy`tO9#?`YtYZ>ZxD2fgN-O<$_`xs1nrN_nd1LG-Y}4*syb zf9qG<^v)AjDAxSJTkB6m&#r&y&EjI`Kl%LIP(R2x6==u*#A}+5-4Htz^1-{Z{~HU> z`hgDU+T+Gc{I(tF`0zmb4$u6`_o`<&hko?J0r#%4=!5IN8jf7+o6j9|;bg|^c>JG? zpCwCL*X)1)GBzre`mggV?f+{su=UG{&HO;-cRul_b2oBv(MzGXurQ0ar+mp zI%vnssmp%bLt}rS)SsBqtPm#0RDRqccQ!R!2?jj{Vns z%VS(c{9w92^O{e8%>Fn1;ltWPcR#q@fbii5x18|cBjeGv{x`F@Sf4w5{t|X;O}!kq zW0&TaOHL@p#RpaW=XlKel=RW``$Vt)D?#|6@sj@Yy&Aj}y%Rml$F4%^OXKPfn{2zl zvlu;4tgi?)W)Co-Cj>RuH`XL)wBG5`e)w%e7?mRm+u;u-FE$rSNP+v(X;IuOA|aKA7^&uXSOk znC97h`rH2zr!IN;=qEo6BUTtS_hHXHh`#;)G?)6V{}<`^zYX<+PHjc8TIb{J_$T4< zKF2&mv3E?Z?0>H)!>2Ao^p#gXww-Tz^h?qF`K%dMi&GSz5&5sSLzQ+dD9|nIkXs?4Fc{#YFFIx4`o5!%ZT5nx0kGAv4w%zCL{8Hak2fy`8b@78$ar3L52UW*x^`lpX zVVULzYyIZ&E5ouo?lfw<3&)ag$3K&^xRj1RYwA}NtF`8N^iF(r`B{aXpT=+V(fUKN zUrZ=%{~G6a1k9(@f8`Me({WN89%#BcKl7`;)$d}t<$0-}yebTbgnY!Zch3k1|NNcn zZ@aI7u3i5*RrSG!+SH*a-twAx(l_j?`GsQtC{pUb6K&4pF(2MPZ2zM?;-F9b*j7(7 zi;MBoc{+(39~-0}*kA{LSpS5lx7cew7xFKjJ#mMZbGBPM|35X0i(P+`>;Et0>Q@x2 z^?qx%j?d>-?<8XVr@n^xLvcWSQ0hNAOA62WR8u`rdBnj|-;{?R9+=i=e)61-3-;3= zez3;*dv3Y=_2J~M33q(w2i??Tzdvf|IEr@tbK)ltl&seCQ6H!})LBdHTzCZ#v&l$O(hW{s+ltKK4B3?YhK4#xt97 z74d_no5Yz9dWPt^EH1O_55xDHcliF_TqLZxmtW$yUu>e!_m8%pZ>sLn@%+yrrtid` zt}U*0)w_g&e)_yI-%uPHiKYIdGfZ@>Ps}?(;vn^;Hh#vpc!i$vT^5&4HhNVU9@*im zukQHyDdFiQw%%jDA2;ZWeg4gL9L0P>y#N1{^m!51%>BRe%uf2|okZ+;V4gMchvM7u zL8<@b^$CyplyuNjUj43iepR0NduGkIICU^il|7sOu-DW3E%C_HypJnxEQXouhXfyumN^SK_VX7yJCx%;I9(`$GEv3#p$zX&)r(GkU{S2VSmrQ;@ns zad>oC|DD9WVm;}zy&pA|r~2rDR$6P{r|{as6pcy5Ve#$#Pe-uc~omRkBO>aowieL4=i1tYwLzVScw1}gO8aK>)I8=JiX&y5 zZq;{+%B$yMTYgRBpz?@=)KP`-!1VQGe%1F*#|8W8?;8Azu@fgWzR)#f{`qbQT~|}z zY;XTMZ>grWMhCRdziFKf$zgt>kRQxdZvTt7^Q})>2jig!dY9x;haxOcD2U())7UeZtgrx1M*9T0yfgm36#xptnyV=H#j3xglo{KHq8r-fzQE}yJ^ z)X&Mc>mR0Oak2eSE&lv>y7fPKeylF-gX!t-b}e30-KHRQhT?nCQQ7~VcE0H)4?pFVH+!K%3Voz#JDl^?w-42L{7@lQu>C6k^h$^O(n|2B0T za5k^M@=o)#FLnKepVgVS$MJ{am>97BJLM_zJsR^N&ro^9L7(`s4I5cpQobE$JBj-$ zWIL#eSNOwGudZ_3mUqnvDYW%Hjf5BvOEllkJ$ z)$>IkPku=?R^ekBVR{PTpDw%e$6{e%1rE91H! z!=ZZpXP(7{{gPO%wRiFu^S1?@hG_>FT{`S=+R z#1ERTcEbE+yZ}7mX_Y=$0anPMpKjVZk=l*@O zEms;DcE0kN{dXNb5*<7KoSem_bp2tjenqibYwzT->7~d2Nu2qH;s-IYvj3}#F7yKRf?6 zS!}*{^&Ee8tG}t}6l;R4*-lFL-^knf^fMHv#6YS4PWeqi_~;uZygB{+E9(E^{wL`3 zJY!cNbyi_m>yNj6wBWAO!+N(&nt#mF&wJIe^Zz|rTsWWTmx<>eF0k#sq4A_1@wWCy zoVW2#`MDx_YKP*7F;Ln6UddxVw%u#KcM@lNsmtkXJ6~scjH_t=n&e}rXwNhGjF;N8 z=?_Dm-1NNbZvSdn=Jahh+46{qUh{1KGd_!peg0Z;*57{#tF^yLZ9D%%ow+bLzGa@F zI5h?;``?s2_}+}pgo)>II>a4=>^^v#V@z1+zVf~x0dG4z1hI-Aj z^FM0i&)3%9ALUtG*#5+t+5Z}kH0}gT=bsxI&jqP36!L~y>Ob?H(#~gmLnm>sy!s{W ze8#bUsjm5(v*ufzIzT6U)58W+Ug8fc+}OMJ>60D|E1vkB*VkQh7q4-ycS!d$`9tyg z+v`(5d0?gsHsAkRoi!n#!~8-qAqGnQcgk0UZ+aE;tS9`cJp7%|tDcREU4fnS!t$@I zu*Q{_%?QJXys^qWyJxN_0f_CHp~>q&6^ z`1ik?c*X^Fd||iNUgPu)+n&FMaG7@~PLGaK|5d-fPCMUpa`Ag7h#r{wt?#M}543oq zm-^A|uRr)Fzj4XQmtP(>dU5d|4|;z)^z8gYp2fxHxu{-$t93K3u*z%V^bOn4Ne=T4 z#Tn62>c8^%-W@*gH--tX{#()a>bJb4e)IPVA3MdgZgl(W58Dr3e*TLu9u;apWb{(r4o{UGyk>QB(#f8o`X*AQf$q3DT@Qvb>8ZRgv(iG#|k>o=?D zgK3`mYwh~@!8Ff1;hP^Dq#kTg{b9|$58vU%uk01pe7-Sqw~>GJ8n^S$S`XV-r~7Y{ zpXq{KI{rkrAw=dEiXTTussB!Re2)X)UYEJVy%Qu3rhe;}_5mJfx>_IkmZ#(VyZ&&{ zg$t~+^p06L`}61R^z_=-qi>&o`?9!{et(gSFRa$*EwhzJ-_TFz$Gk&vW=yT@|3ab+ z-}JPP*!6{lSASK|JHe_v^G|8#;Rn%2uOm{I6J6u0KWzBM!9%V&=e^Li_&--YZqo^D z=XU(x&^)ly{l|11$!dKbv-|&O+~%RW%r6vY#ne*&wO&5g!nfx!ml3^F>=mrb*U7X0 z`8St5^i#CDsmBT5{Md?Verkv1KiPc#t5-iLtnk3YTU^w0KRUMm&sASBKU{zR)vbQ8 zrnaJ3pXtBeHwk^7XKd;U#ZO{tRsZX&iZXoDVSLvfH(ugrTo4}UmCy5Q{sE$Iad`do zcMYDl<9B8}I`j8;PYykN%faVRkNy6xTgL;JdM5q>By}0;_`+_jO;4{sWF8=KbV6}< zOs?ub{kHsE#vPR8$~7lyw7%!G{&zBep4X!TQm z+r2LOw*To9oznilseUjSU!tGc{zoX^DK-?zQ#%yr#6VU5c^_e3)N82v#6h3hWVUt&d;X9Z9ttwu0L_A*HEMn>JPNHkr|XWsQiL<^dJ%H-u>+1baYFS+9 zi&*6~ar%aB{c1=K^9#jAqT4Ng@7j_3$iX+Ayq!lJ^iJZboz`PGv1HZmmsE&j(;`fU6#l)t5 z>NQk7;vo5EGj0_>mvMOiC7Jl=n8 z{6-cRzi2%Fb{{-YqUAMl`eya#!bP9HghDP@sO-PKkH9xQ=Ba4spWo7PoM2U+`TN>= z_(2;_^*qS<*kA{LXe{>j6@R_$urTJ$6KcCZbP;;?{mb|)F0=djOW3V7_14!_9j8cL zU`_m?xHLW}^m<&ae6uxf|G%s@+dc0K%r6v|#ne*&(b0ax zxBX5nrrGbHiGyDA&8A<9_(9W6;>-^{L-ceU{ODC-IJ5E5n9CQO9Iia(-RoaCAtR6N zpW}ZJ9e7}tEcn5CLkL!XEwQ!!P+T4ZrT%Li9jgN!(1|W_&?kPYLv`VSY#;Ed;R{EI#x9XP-J`;QMbq zjxNWaZ1?DAx>$c|cmz2v#Y8GHlnG+x_&g>aXXU@r7?qqJFM_qo0Ng&ff74I@DqR zzv;5Ln4b2}>M4F9@owe01QWiE!~8+z@w-3t!88wlCrtA^^)W8CVkf;Y`lr|Ie)WcP zhfRAw*y`)Q8HK(beA--~hV`>*jj zPE&F5PuqXSE;euNo7JWB5q9hQ0RI2lc**>je<-essg?a-O_bsDyc@dqxbYId)uHvm z15HooXMW{-)idm;ziWkOu9<7)XRhuVe(6qMdvg2jsLQ_p8lT05{i9ze9)AwDah=4g z*6`}uH}tFK7mBN+RAvA5f1Ir!4H?84Z`NAt+X?_3oEzNiBXxCpQ^A=Wl?Vb4OTdAI8KB2fa zCRFx+GtsktDuGQOZ-+xRsS2RTa~AN4<_^I_UbiTCr%2v-vEfsfVZy8L-_Ex@c#7tCv*ueo$@9wZ@ADGA_Tf#} zcOQOpSZD52p1$IavFKHP{@PuCe^krjLLbHY7yVyb`P5Io6F>6{#SPI>x&3#w^R4f> z_`MTE4@~{mFRce2Xu64><*6Ut{`$j8pIhkMyDxt*jC||j>n>YtN%U<0pNx~AtiOLu z{8_T3^_1@apG$&wg3LPy>r{#A^*b_n}2WkPtdddPc4fJ+j-s-A8&|11u0PRwO4mQd^|3aO)FyOPDh2rKIDD~e7zpkGn z=!;i>io`+Usg0lUOL6M07{~Ja84vr;d&lleUG{kR!CxM`Z0Ijr*FW(6`{XPxrThOR z;|XgP{Z}4+^GP24XWpT>B}!HHfBhJ?;~8F;hN_Pq$T(i(W@~@pfjl234tgazXA9(D<`kT~c_y6gW`BrZKHHoKd>xb4=Z>WyiHa{nR<`;@vqocC_ z>G>ZW-^NSy(DjL*d4ljj(`#mN@lKKYvB7@&yO#R(<>&tVsBOCjOHW0XJ`EX9f2UV}S;tX7<1kOq z{MQQ~J4N_j^$eNU9QuQsIPjBYZn`ckv(;C>`*8h3^z8g|Gm8s-s`~wfux7FUp>J!c z4t3^gHwBqzC~l9AQvaRs@WZ#h=Nd=9TWroF!UwI7%1`~+DW-YeCF5c%runHImcH+A zu736}!jR^d-@a|QL*K4{7_a)^5dHl{u6{+aX3>8qk$O!KecS)guZF}!aYuYWpS=^m zVN-18Vf%?xUwP^Gm6pf2iugtSU)+Bw$@5M!Y+l%k=vHBPceP`0zxC`!nDN)m&idtH zQ@rNc{--;Oi}ho${{K#{enqin_TTYH5WDpGH`h4xX^0()U&RMDo>QJ8@8is;s{h16 zulZ&(t|ERAzHKL(U)AUS`orF@?D>z&8xzC6+g^Ltm#0oehu5w3p*xFGwyB52{`|Pwzw@KXrllL3>|OUGu9xdWQYVN4vq=t;i@qy`25r6$sLHY+8 zPy1&6=B)V^SALprwv&9b;o}F@ABH|T<>Gf2UnLA{Zh!jI@2*Xr{r>`d#e6&c|LaB; z7xs^0&Eox6$Kkpu?}W)XJP*t-6u*vyQvWrtp2u+O^B402m4_ba6TgkGdGJ8n-f9)& zsUEtfH;4YP>X%>q!lch#9%@f~eD7VyjX}ru|IABq!}#})kK6k+e5tavPU|ZD{{_0q z{6cYe6shVzy4I(LjA`4yQ(JX;--i!UH;BIz!Z&0*=7X)+NiQt^yZ5d=^RSo0z;*t* zUiTyCqFZ_Xr%UgDm}hZe`y~&QXzTSrAH0(YuN(N-p_m*ArT**uO7~x4-U-6{pTGao z?xSB=y>ZL1u=|u_p1JC0z4XJbe`~b&EANZ@|6c2RE^*f0@kkK6bp2aRAb!7JCyyN?SH0bak1-Aa^3&u>Q@wN7RUcOPHF#7K6N)lAr$w<2bKNT-w{|pYLZ9Y z-ib{dO#Rju9UmSDA6@fPua0B9e)_|4Q*J!*Prt2&6X!edCr5ucoVx7zGo9bU@%AV4 z!2`33g}D9xBCS*PnO`XGi%O;bI~joMVBp(!nrl4a{m=jZ*K)i5X3tB)DPj4^!_V3N zoe!;FO`Xrw1XtAWABlrbd5U6vX8%9i*Z~P0sq>|89vh@C zY*77SlWiB6Her<`!{)u${IfB1N3U^v|7~V*VgKkCj2A$>m2P*!n%VCfk2Fumsk;8Q z{QH*&WV~+8cZtq_>-t-r2ge1a=bO4fou|hJJNQFBS8dAGznB`f-ekrB+fC{t-_C#1 zhdhgmUrf*cIJH$@tn!-eqICUjP4gRq%r6uVMn~oLpSSaQz73tk**Q>Qo+akUCNuKl!D2QlHiDqz=p1aZ7gUCr|xh ztG#dT?Va{)XnbeeTZZr7`u=fe)o&))VgDoS)^QN^+Vu}k^=gXLM?J;EF;LZi{MILQ zK=isrzy3`@_#kzG=6AE^TO55*`RI6TMe?dJOj>xvbBmokJlvPBeej~Y+yC0x`G=`l zT6WAKU)BmFE<5!TIG9w;x%4+-KaklkH^$f|21CMap_A{|A~WM^UbDSMf@Oq(ELdraeH2LoKAl9 z!l6eTzuz48_W8Fji%aSHlbZS!#cHk7dbn?@`E$)@J`J%$@tgRd zvj6G+C#eH{<_CHw`erk(B7QL4?#M^qY}M_rziY@IpImy&r3-Yq>kfG9TkEub|FW0V zYx~t)?KwUE*ZO9E|MS~8P^tggr+Oa4wm-4gk@~%Bk6S;Jep|n)`du>wAxRkbk^c98GT6^`IUM?|g+iy^x#zXN$ zBv$r6J^$%c7gRm^1$vE}&A3(kn(8KTtDo^e^svDW{;>1O*B*b>S@VZI-nsqRFs1eV zivJHPf=K{{e5b49BB2EPkp&$gknm3P}%>{!m~c21F9Zz(7Pm0dDH_B zgwKA+{N$;g@#fHvUU>T0*Y+5D#T((}$-Qrko9j#H+4VooEH1Vm>Tv%PVYiMunIHG9 z!uC#>>HI?RyBMhIKd&3=Hy!lAT9nCn-+>QW9ojeioiNR_@hke@NiVE@)$tddKYXXK z&UT+HcF!y0(6jGfnlcaT|6=j}hcD4L+y6S{X&qv1v(kujsSs>G+J-$sd|$T(`{`8$2HNUuC|D&%Keux6faB z78k2$3BCUVbtnp}y!K9f^bLK}OXe4fCu2fY|M?xD^+WS{T`(Rz<^dnHI<)@OkDX$g zXXB}k*)EHV@v%YWhu!A+&LfN5uus_K!QUV8-1lnm?fA1>b-;!6|2LcJ2N}O6e%9W} zBktRYzV%=8m}e-SiV0Qy7q!;w%KEP3qK_VNY!DuZzZ1eY)cKWc^>@+>?wBjr-S+lZ z!!j$}^6uJ`TGzkrCNs4@bgR9v-v6PgHt5t=6sz@qZ2$kL{r1W$?|+_-sWvV=Mb2Y0 zFPnFwOC0o?Z#H~I{Ge?gjVvzY+jy$?@A|`~Yu$b4Sf7VqJ+|Qz4_r1CJzlpqp4Pdc z{QPlx>dz_`)6?%wEH9S?-ba{sDE<%wrT**q^`u|cC-f3t{XQ87ebDMi^?GK_FXcJm zc~EuCo=txU4~{tY?A5jl8*lg8?Pq_n_5QnO78kZp!-eAgXU<6HE$o@wdyPk$?*#4o z6Y9)0uE_jC@k|Vq`tOvV^e3&uC%pP$l7~K6*?+AcUW#d+jmNwgAMD@{F8jk{4^RGW z7&`JNn_T(o&VfR^P34)eE((RXdROEnfG7qn|G=QQg==K zp?E$%sOmra!BU@6y{72Zw-tmBruE_%_J8sB7aLsk(VOpWb8pz-n_t{w-ur$_UH1Fm zsZx;#H`n7I?H8|8>Lx$6*=~umy`^?2{x7Ch^}jwyl<60*8$*3QiG$uHc|7kO_|$1S z7{+wn>%bYu>Q5nmv8O=x&O};sk?ODcQTH!TI;l)`o3`a9_+XmnT{12<*x~sK!~cB8eA(?Vo%_8Exi%C6`Br@N`_`sa^X2NA;)k4aEyFwW|O1nkZNHL+c<8dL1vd z@t5{%%qP(^zUt90!_<#mf#KHkZ+!QUjVa;osV5FvWsTPLKV!4Fu-$`Je?#MnV$FPB z9FGL4i@p)ZZ>V{pcrhkc^}n)?W=ym3;nn4K_V#^&)vx1ukbXC_xWG%%>V>bE^01Ac zj6*yO-*3xf-|l)dta8}?ecNwf*Y)%LuN{Z#I8xorK6lHb(*8e(p>fT#rbc3A{~MxXeHxJHs!bfU zac7sGtF99)>8Rg>=wgEve)K|kK#vC|2uu z6CLj)Qcr(>P5uCh^F9!Ym!hMx|NpJypS-;;`jyVhJLNGCY_NkrjC`1m_v znrGuOkEEVXdcke-?Aq%-*B6FdFunfn+XK1pE z8S-(@i}SYb|IGKl$$0re$@R|~&yvNfAEa;CHgDxwJ;kfpr%S2-RsE<%x}Ep)PHn~X z{hD!XeAUI@3DZ0qFR2fIC%tgTS@-;P%p+66Q$zmIT>RWA=-d4Vnps@DVm?E!KkQaN z=+xFa#A>}C)K_Ycd)IDLXDI$01Eu~ezvKCT9Unc=YrfgaW4`#o((!@j!$;2$J$PUT ze>i2Q1#f%e&!>bRO}q2RcZTnSzJ32rzT$@Q=U+!nwfbt=zg$djb)jKNb=xw++?V_+^424r<3e+f#oIhGXM4M`uM?8{W@NM{bBgDi#8g0#Zh5I?_Gzj z{KRNa{d-6qX`gdB{ol0t=E~Fgh}BwqCyz_#f3)xH_nBWPUXKZt{a;`7;HUL@*B&=s z;ZGG^iv7di{e_zoJ;>wbwX(E7eckx#Wi8&G?|Q|EmeF)F;jNu03wN#BX&Z{WX3~ zbkhB!<*A+rnFlu5!5_lCOZ<74uRb2eoN(9j{-)vR+wUKzW^u9O|7G>}zuoFr6l>=3 zj^mM_>8;RidzaWXu|x6K_@LB(_`SljKILMk+pC>_KCzu|dGtlm{HlkYBJ(kHIzD>X zV1*yuu|MqyVdA9vGv$(LI$)AhAzc>#Y6xP4!|JPNAdTO(_ zLm__{Sn9v#@j9Y@>wDg=M;!D{;$~BaB7P7)@9*X(PxXw~Pk-2J@t4nkXwv;*&-+hW z>gAh%f*!~JjLUgYYc0~nuS57Ae~MGHxYXr#>gDs# z^h?q7dL$1!MeihDng@Es!4Ceg_MU&g_29GC3>!ZC;x8|l@S@i|jvILX!JPYVH?p{} ze-vxx_Mh`e&^y8UGVL~g@|kBS-igFg|C4#ZXFNmKKFyV;!BG4yCYSn;j*ns6o^5>U zR33VuPyANLfOb9OqnFfad8&u*ocb9j^!)yndB>bFFr0huu>)2(yBi($Kc)FCm|Xv( z^~)?;Yjmiuu6;9q#r#6i7ey-jKTyV?{!)FeJ#M_jZ*^$>@IZS#HAT;BzU5(qs$;y_ z^mi@u!_!~*=fr)wmifar|Bt!zj+>*Z{{Nt;RCyB8NJw&lgkBa15c=HZE?rpZhKNfO zRK!ONDE*NpB|wlOpg@$4fB}&rf)VKghN2)KM5QUkhawsIBK*QV=Y2MNC*vmh{=UAv zf6Ql3`QaS)F+nhsjRlFzYSRlUMcl@jo)mr__J-#|{$j6F&6#fwui8>l1(DN%@dL z)-$pqc^U)r@Ad65=$yvV*WR<)TZ$PWa+%KWWWRR`~mWUHlJJw z!mPLB3(M<7k01XdVDUAzgF0XApjRG~)em0up!L5g{H7;P`QU}eWb9r18zWZUv3b-9 zH#bHM`Nr($0G&d-oqy=h;$T1FS6A67D!)kOC3Vp^MUKd<_*<=gM?ao}My!#11l_|%<;oKKt` z|HG@8bN@@Oc~mb_dFh?#_}$Rz&qZI}{)=cRZU0XFrwEq%k<`g^`s#;v`d7u7zBYG% zvpccdJgVe*)Q43Mdvl|kYVU+q|GM$k$NuKKUgPZd$7U7>eX{+3qPKdqeu{p^t@7-_0MRi>VIANOZ|XX5j*G;J+fiSt1|OSc`QzOD`lN$l|3B)z?u_&yj319 z{=@y#j$W)EKRf=+vp9I4=kfN_+5dl?=&4h@+e`1{HRoZFi7!8Y@4XmM*?;{V-};dY zKYZ|G2U9&Vb@TaV{&^OM*Jqef)wpAUC zZ(-K^(NNX@^m`oZ+fe(T&YL>WTiz;v>Z?her8p;gc#Us9^^KvMp7+2ruZ?XCJ#^m* zcMQJ@9{c@;I>6=i__IgzD~i-!!UNJbWcp3tm|vLnuSina|5YUi{=9DtU8mjr61~-- z_2UOp7j>GRc`Kg>7f>Ize(l9oPCoPGuJ@;vIp zCeNRD;MB*r4fU1Y*y5p2U2FZ~`on(Z2b(JAohVZKc*kU|tGqoAJ}dcbckK6loqlf2oJ~L=JNJU%vj+X}*@A^&?k32wzL||MmYr zHRkhgUHNJt#tg^RNjQo{D}wE&m)6f^o>E!zCZp)S1;KZeCRu?fBCK< z#8)2w>=4gCZ-O6~RSfJrHyD}yjm<~r8;Xxbrc(c1RA*dQVtuWt9%R1LL|>mQh(Bm~ z)!+2Zx%-~c>QO%PtI!iyg<-@UckH~!L03z4b8N3{|245tM;;&l|EhgY zW*)?Y=t261?45YROMRjEL^N3c@l)h?eEeB&?7zMGVFyz^a*F29ytS{UPy37vYJTHQ z^~kj>jxc`Q&##|;$o665%@lKKuW#L@#XZrBCei&Ged|eDpCCpNu3` z{g1}k3**T}WA6isb8WGtAD?DuN}VBAJ+KBmA_i?iiyPA`RCp& z4x1m>pEp$xI+Ybgnxp@t)lNOR$~8ge7m7uq!ty!w(>VUe2Y>cohU$kMwEmDkLHt1D zt990AJk$lkhYWVnH%2wDzPfeudyP>S-+tdhJwNmsXP>{VEDqb=SBckOjj}qFU%bjo z{OB8U)%-%SsO0MvF7Fc5eD-@zYFC*)^1mC%pw*##=(}MW=gF@;CMR`N@rIQ)e|h;~ zd;B7-w(CkK{pF_i^=}ihIMP1Z{g1iYK__y$q~6l=_iEzFdZoTlEEWx=?ca$%&vC`y z_WQZm$%h^EPV6SruZsTLysP3=??L#G!3sTfgdHw@`I2WIdp}J0?$JxmJiGn-?><=^ z^vAF!KL9XKP`ukq^xlb%zL~xzdhAwDv3N98_J28v!=HG=s(!jIam~a|-&&&g?*9hE zx!eAD<3Bt&Go1bIQ7>&Y(UHflKk3uF;DC7f)z_^*y=tf4n#e4@lUJqdpYd;roqmR5 ziKsAsCwA6beZ5Zq()O?Z@PWj8wVSMU;0N0Fuk$fIb;AQLpgwGL!Ponz%o-OqJ!;)c z*FCI#{mH&r9M*?km50yg|Bq{3^v&j7Q$9uJ7m6i2^J=_z>YoeZZ+z;9z8n7kit|qD zF+OC)Zgqrl>+koo3$AGi%k1M7?T4NJ=@W|wo$h~f)q^^J(YBY~$?Lk_w}ouSjk(yV zGZdeW3R~|^?98veO{YKg8md2hpilJhf%t*=r*>qsEARa3$rDz4{l2w_?~;c#&Odmc zaaSD!j~)LsvpB3@%jo`xyo%!0UV7CVpY{zowZk8Zr6NhG|M2PdLH*Wu)f>Oc+;@*Z zXns90o!w7>y$aK~(mbeBv5E(Mcx2Y6r<}TCD?EA2Z5yAl-O0?yet++m`t1CFQ{`5I zIuGK-I>+(<%Ic3FJc%3%QIz_xe(8P0rGBU%^8>w;{Hbj5y@_3Uuv;8@<@4bD>NPHW z`_u#aPd@wBaPsJtK7P;-7NLK(|CyS_VZXlz`2!xdSMSsgQU|E|dQd;~wM15ZDCCE$ zQva1lqGqqxlqca!`-Kczyz-#$hH0GFx=fF(*o`ktxbl?#AKtlo*kkS0u3GauPrzf> zA5O{Qu=5XV>gO->1$7?c-ClYpuX%1H{C3=;amb+<7z3*MU*AOdn3s1Vr-&WIp33Ms zzBYSOm+4&=hsAZ1;i>QoW8ZmX{1%%Y6jl!3xbdOhli;=UZ_O+Y+ur&A-D3>-;R5P|n>6*E zwO&~LsQ3S@|FvQ3UA*)SXEbWx+w1h<`+t&rU5`JtA6`+a zEibXtH~6jpY#+JAhC=?}uGD}06zh6@Pigyi!b88f4*(goI+PE6H%#Na<}*FAVmH3f z8u;}WZrav`-tVq*@1uWO1RmS|^DGYfW%pkvdheoJX@Q>e!0^MD9(RO7Zd9%6e?6BJ z%nN?Qihg3RjB6&m%0s=CaZcjkpI;2CE&jD_KKb!xs9iVglW*-){{EFKzheH|`26!$ zy4{Iadr943UE{sW;y|BbD3%d>@7(>^?yVoW`YEDM(L3=&pXN3F)J{M2pv5Vl2U%ap zio{i6*d#mg%3mFEYNi8z+y48-Q2PFxt6ov0_V?XXp6BZii{UY}wEa6_tTzRjkG+p_ z(R(L|9ZdDi6T}a+?JMEKA3np1{#W^vCyd*D??E>l@LJet$SV8axOg9YcKn}bao9YU zj@KWqiytUn?In7UzF9pr@x^ZS6eFUcvj6K#9RAj)3O@65NgVxB#1FLjCU&b|`8)_8 zGT24mSnuw07ax#c*jRt3JKk@7Y%2V={m;ze;CFBP9tOXnc$JsNx3Soet#8zqi#-(b zfZ=)Wzf)c?>6i6W?WydEYh`hmJ*ms|_@{_}s_!C)Jr4ZsHj_7ABJA(+oUhyg~z4~$944FJ^XUs1YBO^(v|IBY{G`ElcnD@ku8637KiyaMTXb(Y5h+97Em8H`^UfTx%4knL;l+D1`hn}Ce&lw{}km1 zSCV*Z zWYu@$3%gu-?{Tjjy?WSb&(Z6?`pc)_vCrRrnXK)9w%6xB=c)&t$m!DfRuBs^>&;|c zAEA&79IEKSv^zc?r(AUgDDR(nEEC9;9hxirNuva zcUa-Uv-kVthQIe}xBX9l76M6nuq>VS50|)6jcs|Tqso4e}2cbe&iYtDj#;xJMlA_yjA`QAMy0Z zWS7OUp!#slJ)?_-*380n$6jz_-EwggYrM(oM?L64wyV^R44)xhL3JTT^WAc|1&Sed|!S3 zVJnM+?>WSYcJ}+g$t(2q4cYon-O3*ddB9Ss|LRv)Ug|eKcoeaN)RW5SsmJW_wX!%& z??m<~;yDBgNK5YM%cSaV6$>hH@>iaf7a#BYXZ&+)aWtV&Ff}XJE$3FeSqgOQHvFkruG9TWb`4jQ?mv32ruxmczC93^> zfcsFev%RNsDC9;Z<4<_;=ejoDf7IvIpZVg448jAV?}qprru88wbyV?&!IOS-|MW+` z8;1OR(UA);dym&Rj(@CAc@{_BCus9+sq7SWdlqSq{>#t0na3{|51WT<_E{*_iH1`D zHIC;PGoQ3RuYTCURBw5;Ui?5?uZi8_G!MKBst@^hmRi)`??Slzr7u1Iz4r6p_Riwq zyc=lsX}@^ii+6j8-aFCJH>!s(7(@|0gv5(M4uG%=k)*Y@+=OvKatw| zeUmkR>G+4bz+56iAt#ti{dZ!oZzxD#Y(3XR?_HW5Uza{t z@rDhLIcxl>r!5jTJ^qc2E*YPD<*WSvFaCeLmBnG7!{o1to~3v4s`URCx#;N|^@U=C zXsGIc>Hl|{%F|MGg7ASp(IXpb-R2K(PZo#iRkk=}(1}0mhqx*X<4)TD&egV=5Y}I1 zpV_y;^blvi|ISc8MR+W~<`b{>((GDSeS5K)zNxa>Io=3`JSeWze|R>RxT-qf#}1}? zWW(Mp4)f1NrXQwvs-IV6eii;<%;|qRe8v@rg|Ux)?_V!3w}e+e+y6|>;_#qTS?i@w zqMiNuTT?rE73qY%U+g^Ir zm#+WG#V74gDC7qBQva1lqGs>srfPge9n3dnh2r@5UEac=4d0 zHSRk#EdT1G_b+$QKJeJ~-_m)43;O?4a$8;d_h0&!3qN&+LT=Qq>VLYvY`a$;?}QJ( z`c@*t4>C^>eK*A4kbJC9WW{cLVazWMe{jz~gs|e>501L@bL+!v*FVq9;^2F~UuCQ7 zXFmaAYdh|2soiy=NOPS3;rS={sxWN$*6ZJ2?8ZNb?T?&r!MO)KYyCe!^TV&0f9Iq2Z)M+;z3MX^ z#G9;k`i9;1TTSt#zEH@4Qf2?s-yi7{sC|GB^je3>>PJ23L8~v}!yi6F_%zSF>Knr^ z-mG`fTKhCc-}~ms6Bd1uIJ^I-Ka0bTgEm+!-hgEN=@mQ6s`~0XDi3wmRCa>QI}~|T z7{A)#vwbL1)cXNPVFQlCM|o312-IJ^rjW){A$YcJoX0_*p%g&-|OhgWdE!s#m@e&wT2`3P(cRpZC+W|mj;WyvtR)?3fT!Sqd))lMHn(TIl1?Y|~* z)(?1kmCq@vAOE|7Kgc{m^xY7DL-O?``BmSIFRU>5^apNQ`uwoc=YDhHQA4j{e{9#E z^eZpeRJoR5kI3oLJJFP`KY>T%+Wp@$29){_&p_d0Ues%dpFGAatG^)rUiB77UPaUQ zi$8LTrM@fgg6bQ?@`ujY;kqRn!@vIHonPPS80xg&zott)rQctgst0u*)FIY6j(_B~ zmiG7GT;qwWi5v>~!dmLT`f(o_ePO-*ulGNFlHYi9k(rO_sn@9;TtI!;w*RvuW?i>^ zXm0-4iRVvfKmTxVi$#mD@xQH{g=ziGw+lyS0t_}awxWrFDm=5-=nP`HRS=F z$k;)j=;c<;o{|Cm@mRF55fim4shu$9F@oYkfI zP4C3dr3SJ^#>YT&_sn zilNvx8m#}W(~sw9F(1oAJ?f7g^ohQzzvd5LQ}UZ$>x9p60rla>leWBe=V^W6`cYs0 z_42DY>MEW8RDP*u_Ih&i3V-w|Vy}$dwm#BG=Wgez(Z@ zgVY6@e(K!)&91!OiJ##D>cgsj;@;o5b+54c+8eC<;M%Xi$M@Y7t&XPVR}`teUQ)UA z{KJ~Yw*=u0h5S%#{ZHmq*L?V=+dujgz3a?F-@%f9F8-+<**mp^NuAhJ-c)Y%EOzOS z$L%c_UCyS2-|qjKI$Jq=*zX-Y|Jg;gy$?icFTIo3#;1KlPVKxOLh+dxX#Ix=%%j=9 zpBf+Y2i1@J4$^t2ajCsZPUF0jdQ6Y3SjB@rtW})z+%cCIVcfezdl#PcwAVPh{%vL! zhkgIek(Jw>hog#c(|B1b+yt$y&Q|u5GRsH8WM*J2bmW#SfM9R*zgY@?*3BmpTlPNZ1d_;*S!UwZT~Z}IC!7u@%G0{pR(ibT#zEaWf{5Ay`cpZ?D`dJuo_I(ZjVAJ#5rF8b~Yi-dKyI{crXTlO({ zIsaez`P-}cTZ%ewk=je@qHoBoH|7O~;HRET^(n#!VmDdqr5^Mk{=C0R z`ZOp#;J&1qGYjG}?X~!Q`dibg^G$x&U?i)+@3fmmVKb(K4 zX�pJ?6WB_2c%(l=!9#50{=D0Yqp>py;)kLyU859`mjlu!M!gFewCTRpv59H~FN zrf1&DSK^sZeOU9uMbFsp#&yEl|6KXBlWu5H5BnWE?wp#%;WyX&uWKqRiqu|zsZ8Iz z6N_O>Wabx&U817Ye0_0DQ}WsR)p+^?pCNqsfnD^CrQcs> z)bd}rqA}>8wN}|<=l1!}iCG->yL+$78zuK&D8E?COX{U>$TqL0#L$;e>>3r7{U6ln zU;6xYopyMAqPIGceiQ{`td+2la%V7k_lQQ|@R#f3=mxVSVUT zd5k{)!Kth$QhS-!Svvl#X}luy4#jTKV0oST^(eLk@wfG;dhf)}extsF$oPX+pYo=9 zeT#ZSDE*b>8Ur(yMX#|!S=(idVk4U_}Zjkf0z)~z5jq~zy0_%@Z0qdc@_uTYu=}FD?z8SqDbxiQGFS;r*$IN#2yNH<5l&) z^!sD3yr2^~owrZ)%oD8gXMGcozsbtGp!(35@TLFRd)$n$#iSF?zH&elUOWD5WpVI6 z^aJiXbM_P9er={E#{a1eOqrsp3rJ?#^2dTqk>aFsx z#WFJHS&Gwn!CRUacJd&Ds&9-u{LL{pZ9BR#^6A@mS!0_|!(+cc_GNL{`JWX(YX4nR zY`$8jc(s>i*F5wMe%?lQaKc@XejmHiGN*xN27l0 zQ!aM+u!G)--DLVz)&H9EH5D};evb^ohYWVnhvWWy*wNSQcz-zIkk!6A;K=szPb-VV zwkxeC-&^lLqAoDWBhhml|EIq{Dj)A7`VtB`@mke?_JjDB`tP*vK0%J-@CU7rTDR#n z9y!G{&a1!ak-?-dsef4ZkdqcW;m$lPf5z>r@3Ys1*r}WTfO$OpoPGS$%;Eq=YWLe@ z&0o6z$caZbX@8fPz%>mx){s*`>>v4>*c_@b)+oX2Kf z>H8CY?4VbElgV4rf8$N;@WW>apXQlYeWT~uLpEIegbNx2_87SJ72o>_{Py|Vm&L*R z-VfFPUu&rzB!5lxERAoB`X_$a*^Wc8Uvw<>-$^{5Bi2XFSJf}`(|D7qLlHelJ=AA< z;*`&W^Qng~{BX%fzPQjY&JQ;oGwzzdy>~15?Ebq}7Kd%;qvQRL*L$aWMUmP|uX^Ka ziVd0lmdUx;L$QB+QR=_Wuhf5dTM`c+^8>vTdn)5kJ!Yq$iY2}B!D~F|lX&w}JA7fC zou2!`xo15R*5CV6k2ap#2|u=fe)k25qh2u6QL01prF}D=q|Q)$F)~&5U;duiuJ5t! zG?xOr6NDd3^~hFlvop?kQ$6)M@h|brr#|dFYx)V751tWr+G)A%4|{YN^>jV|-_rbG zQkPig*#EDp{!UTnMLyPJC=Q4YrT%Li=k=|x@U)ay>%g!6n(8&);#Bq^_0P!Sz%NCs zH;p5{n_lhkhG7dW?Eg4?`7rFV4?p+ej!$~E+x1t|vpC2r((JxA=dQ;NdMB^w2QvF> z>HtG=U<|10Kk@k6cACd9`@F}G|9!(BWIiDJZiv63)4XZ_Ro{&-3_D@_*LT?doG|>% zjh_5x^ERt{--K8B^JVn+mzLT=r*ch@rFZhGK3cF;x2{L(48@nCW2yg|zqB9Je5_CJ zBoBO`C$Ia}&yuY#Z=D)rFQ|q;Vf0xt1cHK{D z-btP;89uSw^WUhyCIMD=E^;Wo9A8xR-#hW6e(O^%dha@s^^QMid6WKPufjCW@-eR@ ze-#hlV8X2!r>&;^)7ev=#i7-@oMXhl|+$r`b7Q;zc`qKc#imS5QCdtR=D& zITT-sFG~GaJKrOy-|A>dJo&JLUgJ|4J!tld`ryakWag!L(8E`SVe~r2FBAvG7sijj`jwu) zM|^_me7*9Zr#~Qi5dYMUY;4_joD7d0|MX^YSU)z@ z^FPUa#jCx92c&Pv-ia@=)l+;m8Z2MpSGxbGsXQ%3tqVTTJF%xS`m_$)uPML9F>mEd z<4smOaa9<`uYKi&^LCgVHhE}P-#1UWjCec#nVH35-+#C1jaQ)KN8RF8USg+j7MF`J z`w!|1#lg`~>c12JdV2q_>V1Ov)t{F*?Bt;?(Dc*9A2~(*;WM1i^#V`WeA62~-}l0K z!5_2F$Cp^)ee_)aTsr^E|9|yLo%Bhh_Wm9o>Z2d2KAm4Ez80zAP5sm7IlvF5e8{OC z+4%diILx1ZDw^JjpPxr0t_s6ee&(%je&!}&o#Xow_dh0m_VarE8}(`ZeE+q&lljqy zP)I`SKRn>nEDq*lpCdKV!^7uRy|2^X;*$KP&*$!M_9V_b@wdFlApXdp>cf5K-qE|n zQ+tGa-{17e6P}ss)o$lMXJ&EmyQ}B(RV}ZGx4so{%-N9J>2uewfU!Z?+H)8@|%}_H+hOzKA!)d z`XjU7HcaY~s@nUb^t`e0)H>@A#UW8q)&Kw4|KDj{9QRqh$vnN2`fa@+gI)Z?Hs^ij zzVDB_ENnmF!v3MVO!FFN`~PMZN4w7teKa1f+U~@=z4T6Am#%+ce#+1MLLoP7RrNo8 zo}|s2d4kh4@9Bc@gK3=UlX}pD-U*+@BUd5yq;}PZaev&}ZMN?#VS{zw{lr7}{{UXL zf9pf9<{cql!0h-e7d=bwcqNEjdj1J@fi>|D#iaPc@;cG`1g#&$r>OoscNBk+d4lM> zA^wKcMSf((ZhT?&^ZtDCV*W?7|Nmw16Hkp>3|_nbsyBu^RLE^T50_iSO0k!yTmp( z|Kq{8sNe2?tVu=eZ}Kn3aUlCeL*}ji%)|JquK#R`C;be?VNp@p|5m3zbs6@Eox0NF zRr1*SRDbjm{U7ZAf8F(O-;6D&>Oa?!(yz3S-bn|sgQ*_b;#-|@soi*3zfSy9KaM(irWqe>mR+-j7={*4_-U| zX=QQne$LK?b|{hl!7Fb>GO=eqpbVEw$&}+r0y?{r=v};;?=%t=B)) zRIeyfd+D9LW`1?;o9Pof`cNDZ11tNl^Ra&9(LJ-zVJC83{p!CK8Gq2LpJATGksjaT z4>BK%a~h}l=2suq`1m%*AAjA{u*UEKPx$qAf#05g*r)v9^2z;=x!75H$16eP`Wl_G zt66}GY z6RFq!e@*?YZ;2d=qa%U!-wB`bq}ShKPcWUYS37!Sc=>!Yd!EIC-SpJ!)DBkYiL1hJ z(TD*D?Y+~=;gZuwhsEA(|NlGZKc{FOaC5!>s@9obr1H``(b?}`)JZ&YE`FgnCcY^3 z--*4xks$L)--llPu!E_dzJmCH_``2{>eW2vw}AR^^pwYnBWCo4z6Torn7qiX@Y(mT zBu{=%-2XW02D6G`eA+ks(WmnZ#j%m8vj4*+k@bW12)a(Y`6YU*L;HpwXz$Oa@Oh25 zIAl+olh{Hmgv1xRKLpSFX0DoP2&`)Cq=8DxReJOO#Kpl7<%gP+HKFS z6^5UC?oA(i^dPVH%Fo{c`ulI5#lh!hT9;Vo_&nkHOVmSrB8TGJ!rLnh-YKdd=TYek z?;k_;!w$k@GI^`~o1J+}anuFEhYWVnH-;?!)&t@7-p0`3hhFxZJ#X?FXV*XUXK^g> z@4xVkR37SScFN2z6vs)vUbVZqBmSlBAAXHXe=o2&{1nl5!*o0I)CV83VmH1p^yIZy zp7F6ihh@Hh-JS3J;2-eW_pkme4*MOrsd6nr%`e*al6p(m|4h+%jiZmDI6fLG_dn`i z=X`2Sc|f%@Z+`bj2B`}~-wp9MbjduB6}$0;-_|bm8=h1PZ{2hH$-jEWo*!)2pY$m| zI9k8|=Bfvs$m!Cnp8K|}?xuK}oQoWa6XJ`i{_`AN=4JaojfW3Ah`tK(1C2LXpBC?= zUJt6D$@8cW8!Y$3UqAQqabbh$mz=!I!|m(;TdK6j^Narn*S4Ui|LdBoaYrghdg z7MtlS`+s6|EcM?>e5wC6*Bm6{OSCVLvd21D)k?p^!lHg zWVX*=CoPvl8UenJIf8-Rce&zF^@|Zl2`o`e%7hde+ci+$$yyNE&9{IP2 z;kV{IfeRbKJ>c*8xYJ_pPH|fxqhdTkmF{{TX%ZPWXXYGSGwd zrZ^y1%`X%uN5@kCoyPYHQor#re^CAS9Rq*R!w1%c7e4$_v^eq*Ux8_SDu?xIfB4{} z<2MN#K62vGN3H%N^4s-4tt<}CkN6SE`3J3AyxPb6scfIWPAt}c=8=m&6sN=&RsF9I z6+h-dy(wY`y-VU$wzyVe&qcO*JC&&y8C*bpxc4tho&Cq}72)yCzC3A>rOxMlY{&ob zE86eB+IRe!N0toqcD$I@*^&VI6pB-$qO$+`Jq~}%Pd&qy7J1UzB?7dsm;z-2c+6vZ6?{xBr}1YIlNm{fYJm zKdU_2mKx?^LfSQhVuDPyO|#*pRFHKRG&D|CyI!x8qOdm7?WM z>Ox=guW7xC*zx!9fYgbsNL&>*MqRnjwUe8l$aJtB+4ryMVzB)+pTAD^npdRu(&|(@ zeKWnzi+t1<3b|m-`cEFk^g2eXt0lR;`oRaLdSt^qi^KezB3r$#LjGUs!zMTPZ#nvs zkA+R2Smlsk{OkkzVg2vbyx{V4fBqG9ig){S*lT_C4cYqEBN5Cy6lX+5RsYl9XQ|gv z`S7d%|GxeZeo*Jf{B3^os1H*&sc-Py*;B(MXW#a!o76u3JRr7VzMnrs9TMGM4?MRV zJN-}Z>kGx1F|?}xb)6UM%lcl^dK0_(C3>qv`-mS%eded}CHa4;Zwz|rs$Y(M@vDs? zCvSVxiGO+8E1zxu{aGBP^KZH8K__y$q~6l`x2AYvXTG7B5)GC8*Es7_O?f~kGIr1> zddu5W;SXP8w>ag4cYgKM)#w?r`PTcN^OMGa7YFTfZ>xR$&-0ghr9SJ&YV&yhhx)Ys zdQ*8+w(T~NLvdDgw0tQ~O?ar^KF4#>dnbq;O!d|;&5s|*_JE)1saNyBi=WBZyZDE7 zr+u_Xe>bdq;v1XqzQzm0+xOr8EDqcLchKuUdsPpTCl@_S@8nf|JwdCpId?e}-;IW< z{?l*kN3J}e>y*ihKWKGm-RQev8t2K!{E-#A@rC7oe&1;;ZrBWCRvr6LeczqfE^Yq< zuVT*qr>!iG()Yuf=vjIvuk8CTe5xlOb%x^X7*N%J{>L1D;tiD_J7|3)FLhM;H>Gak zDtS^qWH8C&RS#cy@`1Mx|Lcrb!V3rPJ7&_(?c;y0f9TEPuzt+%{a>1ozA>M)U%cN# zaZYDGjVG=?Ae!61|K=j|zVJ?D{OS*>UG>ZxKG5{j#2-0D{JruS(&zcqht+lsuik$7 z?EhbT^WjG}ylFT1Z2wcs;-Fu-+v1>ai&q~< zyzPJbvp8)3-&DEQi6XW8ZL-!?UtRs3V!}gRp*Sx(l=`o6JVz9N>pS&mKJ1{^c$3wS zexe7{?aK7TDIdJ>n2f!Pe`C}M@4ejI__)UCQ8P~1>eoxc!*;`coQ9l#sAX|5f04>d z;~TI3)RQN2Q{+&5FTN=CU*qZVK$jd&uN#x7$9{ zcU{X}x%njP*S{a@Ozv`+DEFO5(8hMdNECo!QoKfb8yzj(F3*RlQ$mB;#+ z$fobofBaIk`iWP5(<4{pP5HtygT{?t*kbqL6|y)qBY6N(EWMOFX(^k{B>{wDQVAJnfdhqLC-{LN4Eo4(n}j~-0p zyc2)ZBZJECkwMirmj2yYgAZPD_Wu9Ag}3;@Qv>m{-(O~CaoF#_|L6B#Unu!{HLp|D zdifs#>qoA3(20y4^oid3rM&ooY;PQ2n_m6lGh9G@`0UV;$9%BF%&_yspPc>BqwVMQ zaQvxtlP8ZqKhA#sOf8E86sf&mNM-tF^CrF}p3E;4-;avY_V2`x-_`LazoGU4zyJ2< zZ!`v+a!P*1ZJRa*?mlI!=O?~Je=0wJm#cXG80%kAy45}&;<`cjOZy+~Zzz5cL#z7l z)zA8$M|b<($N1Hcy!eCWr}d)mhH0GFJf=rh?8X;HZ+l4Jk+<&?+%oI^|5Seb$uHjRCHA@#9e(!t+Y~)@hT@{AuzYD;f2Y6kP$&NCSHG>(pFCDa zPp6&hJn%~~jq~JF9+T%$AJ#j1r}IxP_6h5qzyBr=FV#N(+{)sx`88GM`L|AGMUm#R z|51DC{>NM**l#e;P+S}h)_>*&PLVkL(|(}uhE;Lib@Hb?*ezcDOL7+;c*A-htT^Vv zr8kkRbL=nn5>49w{0cq))>6BoNbPu&wQqd?B`=tMzJ=nF7;61@!sHXQ_+0H^;>Z8) z;SXBBv_ACRFpcx%WBWi>?8X-ctvL1%$L@1W82tF(#=UysP_{>|o8deJ2w%EhN_8dm zl>Yyvmc`+dIzus4^t}=6{DpTSr_VE_UPH}GognchQ(u*TF8OR;u0sA_>Kg-g|IuN$ z51i5%aM;AwhzCw3pFMw!K7hmY`6o@S2kcQ<>k+B_exPsgSAYKgFou@4fA|&cdX}2z z1vNi?POle2Z*?di`fixUdCg~fy6J5uYT!#Q#~^CC7w9bYkeiX=1KjGH?=1+d|}*4``@|RHWR}7tL*dXS03$w z&yGLot786WeE#PvX`U<@X4m@bO>rPvYPuugRLv^u(zizEI!y@|WJYW#O>HQuXhT|NRp9+x_p;vp7oUU3ygy zD!*vk$D!VdrgZ&(P4wggLveXDl==_P(h^7gUiF4r2haPizbO9LE90nVy7=RlB0MG3 zyd}NM;xK*U7uLS&l=lW)eOOrM2XAcg`neb4XWM@s!|nTJ9xwknvt{r5rr4{N-igJo zKT#PyJj^o`SHyr)|EYVL@ZoPfPV^?LUtKPz{;xg%7FOKxyg%)6=n`S2)$eKfQ^r$2 z$Dh`RUY*}?Spn($UaYgX`x>v*-wARb9NRg4$De+N;>s9cc{ScE^1CAb#2Y4l^+&Tf zyy`8kNA-%PpC)?b6s>;c^B{c4U>AL3=R11b$oPZQ37Wod?*3+1Uhl-uZ~^sU z)LCb)edax%2%}eechHaxj-ao${ns>)V!oF?|D>sU(5b8_(j3SCV><0=-)V@R*Ffi+|YJtvh1uyW_%ylh$4G zpLevM|FCx!hn@GIsq)r}wFEVfXxmHbqHoC7H|lFjOen62FRJ>_@1WL?iaPMCA0@o_ zgI0&;H@)&9r2b|Lx1-u=@a3 z*XJKkzX#F97g7|?|e8xjv zAbiMRg`Pa&%m)W<{Nbrn!iAqX|K0w{?d_lI4`*g^*!!TVGQ5hSZ7;1(wb#?Qn({b7 ztEad&Dk``CX`TMoC-{2A&hc`6N~b@0()vu_oV&l-;Rl`OOXYdghaGwkKKZm=CWhwq zQ%B$b_8982=YQtP50a;*dPR}i$NeTdc~yD-xhZz$7mDkmqSSwQ)Di!x{$mHd#-}p# z<@3nwc@_tD(=&hN^WXyN!?#8Z``Nda?G48~@Z^yb{#Jv}j(^}+wCk_5@612bLA|M5 z*9mwh7UJ+{{-L-&I+Xga^Q$){-ujW$;a#WQ{50P3B{F_s`uSvf;xrHWjCUUO@P*@l zeC;(yN}j=o@l6zjptB5=lz^ zS3d3|rhen2FN)Yf>PcnvOVaC94GvVH-GiR4wBbo^<%#1 zLF27Sy{0G5i5^~fOvc{DKWy{D?Wg^H@@rw6{~5dC(?eRs+x}-t76-qJT3zreidT6_ zT|74o*?4La!~8;VgXHTKy?2V5C;dNJvnPJ=f%v7e)uZ{$u60qT>B*C#)<>R7O#H&0 z-?{F%tIu8{9CY?|J6`+JQN-E#pP5-4cKoxZ-v8B7J?KPcY4cJ&|G&dLK(BV>P}~?P zY#n!gm6)RoScd{uh*!ndaWWrIU+9TbjyVwvZj zAGHYn_WTX%0J;8O>&8Dz272nCZ>ERONubqJ{4^@6`VSxerT!})e*g8q|DJK-*G}Dd z_xrH^VsL;MY$)LoKQ-;FP9c+_FnJUXKuHd^)T=bih9;a=nH{J+kF{b1Gq zkEi*is`lq`>H71uUp0vc#n0l4s{ZpkGV`k1{;`AJ3BSone)J&oWnQKyPWj-)&t$a| zSA~tiy&GJy?k9R0LzX;X_@NKCe}9>f#bM_i`cxj?<@}r0C*JMz5$p%7&RqGJPgCSj z+#Fw&`tQ`gPmuZ8aa68$r`RXRb;S6CR)^+K^~fovao$ND(<6h-4;fT_Saz$$r@m63 z6hXR>kW<9pkh;=3RiDPEau{^ZuU|O)tY65i=Gb5CD;hiQoTBoo@%i(A zw*KWrs@)~B)>U7v({B3>wNqCpZjFxCf913JY907{>?x{Wy*G;kf6(}n{9bwROA))p ztA9zJM}1ght=c|MEOvcZ`-T+N)R@oq2C+wU*x zXYnoZrOr^?9u<}kf95y6)4#O+`5UUu64 zD>Rn*)LLH{wa6;)+y19Ni=%Y^W3GBdk=n;QCTm@G{7IcnjZfrI$VX9S|DD9q7h8|H z@~Pb`^4tmhLFNgf?}qprl8<>HD|X`xgSTp3yzBITg`tOC^84EIfAktRzx}s*w2n*% zdai$l-}W2S-IRb(+!0@t`tQW<6`2?OhKV1?z4(LXuldn;!!(Y1OrPZM#uwII^ur5& zecHld{O8vHcH`3@z+=y!Y-MrS{&lea|8-6Epw5GMv9{k2^Z~nVw@vY+&QRPb`FfS# zbt3oi;qMbWGIlW4TOC?Aejxs~osfrlB6q_of7OR$mpOUsi&nZj9DiWXZom8ORphnp zpLoT5yzc*NSsd(tMQVTEn(X9N9eMe(|22XPUeszo8R7eEbfH z40h{3ekrDLUh|nASuu@I<*?&-Z#n#yqZST3opaxD2S0rkJa+!MKa0cm|69c8KaaER zLi@-3#k;-qPF~j);kWHy`wCwurpJJ){@2$RKIUP3@Ms?FpilJ3hP5mX^M_B-^iKTz z|0fK)U%$gMd*8lrn7I7qTfBMS9`M@lzrC6d?1_Irewp=0_2M;qyR7j_<9Y<^V^wyY zdh4IYh2mGy!T9k9n-XvR&^Yq?#IL?(r@zH%UF1jK4e>W5AAHD)-T1=G>o2#+kM{2m z{r}qTbH7-(@ETXS{pgy|u!s>>Hx>XLvUD2`BfBa?&5A(9V*OcEW z&J?Uq6T~01I+WM+eRKCWJ9U6g{EZJ8O#KplW8jkmk38WU^IAoji-X1Zy{rpw#zpE*qV!p3Ff4il6Q1gqn zecVI6^``o(Uphaw<51ik9ZUUp5?|W?=b}&f=p#rSRfr#Gd^&H_Q@_^Z!3ETZEuMJq z?azF9SoqvYr{D12ZN2c>?~lD%9M+H3*o!Y^w{$sDeP&;?Ysb4;Lov_aRtw$C=YfbuL z`~RlqQOy6M+kdWlMUm#%?w#7TE_?ne`dX)*?JX4dM#F!s|5_jCdAs%>f99b`T#Bqe zpZKE>Ydw6b@()Im48$65Ld~Q@*#uD?~y^(hc(`uxI%r*fnm)WxqPhRpjtl|ylVB&q5@yw(ruX~nP%JIHfC@CPlg z_5po2OykI7dY8q4z8l|v-}%o6r2gK{c`zUT$Ci1qzcj4qC-D}ie)J7JSjC4we1`D3 zERK2AH-_9ZmT}+4{WOJI#Hzd(mQz#(l@KWruLpr48?;{ zQMLW2&oi<2QA>H8AbkXVqDMB=dd#1BYJH}sUgfj+1=NRiemr{V4PIS2tpC{IYlLUp z=O1|fPk$DNoquSm>^f1T@{)ejH}Ay9`bOQ#8w$B$sIvcqCBD=TcoeaNcb_!{?fO%$!`tU{`8*LPhY?N|5F_QH?uhCSALe>|J$p25PszqYx{WA zu0KrtSU+GWm1wv81;9G_=CCVL-9y_Vg1Jrw#1LV*ym)eQ;)q0t$)gkpRi=5 z^@Tl+^G+g6kE}>s6^5G@T5iefH|rIa_V*|I{iR>|!EFb{FTm`N40Rq}^BSM(>#Dap zbMZ*!Q2aI;s`_8L{uv&{s(D%-7wPRecj(hNTdz&=$Bw^;2TbOVowzCtoBaLl58j?~ zQlzY|7_>lc~%n4bBA>Q|Sab4uHD8fScc zo&4xQ@5FBL$Q4Lj6^5;Uv(ue-4rzvM)@gov{FSf3YyW@R)O?Eh9r6HZ+wQUVbk-%( z9PcMR|7`2E*6B~)P{9vdSvo4AM;Pk{{rxLfy`o6%{k_RfUfJ^pRJQG(`RIH@ z@mLI~?Ek95$9z0|pz>h{@iUpcRsPtUSsWHeo_R%h@K5rE6PvT9?0m_s;rNMb?6TbS zt-bo$`JY-A2mQ*Q)9Y{5&nrf!ITox(>~$|jWZQ0)pZSI2@#t9UzmxdV^S5)+8=v~Y z2l_;B^(Fl<|2&JM^uADk@6--1pgwFp>Q_H}ZHGx=>#0MA{$;y!sLS>fEzJWC)&D=$ zzOi1!tG$HBJJHpfg3J%ZjvR_7qGPH5IzN6#!{7Rz3xCRIeopKr(=SEzAoGIX^u#G2 z`O^8$tKRlMI~{+=ivFi7!(;oOnOPim{%wWDHvQ-m!1~iGc9!1pO3?UL>Xf}pBBVf|F3`k&maGC z!)kZEe_(jw!*v$^$#>h^|9(2(`R%{<1CUWBdQVmoy%Vf2*D0fCzt22F@pN>o?Eehm!JqYSNMF>iE|2q` z_wT>0PJZ%$)@SlK@i#p(nEEC9(7VZQEBvy!Dtzt1#-{GA>CD4^f9zF$#r(JN__JmG z!47XO{w%30J#H+`N8>{AObj)CCrnF!tRme)EU#-0M|u-@m449&iW!{zX1T@oFz^Uhr+MJSsCUkokq; z+2~m6zve0Rzp1<}MfHOZ^iJ%lY<2b}c6bv$^H)9(s-Ma8s1NIY=DFHJJ5CPk^>2CZ zq~6zvuRQ-eBL4s5iR?GPOb78+r`AQ^%%02-o>2TTGL`!8qB^Um{m4bne$P9R@vFs`L+K2|IZR{ zd`@1~S5zMQ*;HAP`G;a=G|*@7qG;<88GqaEYwCx;;(z`1KRfMs#VK3-DGRMb_E~(y zFWzB(+_%s1x#7UMKmV4#gVK#Tw*L{Lf#0@&)`#{l6n~12RsBz&<67#!^09tE@|sNE zD*vX|+Y&6rIkkiEA%k7?jnNC8e)++F|50Pqtmj{N_jB$0uO}*he}dGN@QYV@$@-vg z$b8sj=yZAQ-AHLGppWfi_jlpLfvGOu! zjwIf;|9<5IcZ~a=6RdwtwQC-c+Dq!M*E;>GxBtIH$5Q{D#`g(YKXTQBS_k{f`j~s1>d!n)o=1IHYmcvda`78~5!QL{q{kXJewTQ@x2M~e{r^raivznz z?fs3(PF``{zST*5Q|!zy6fZRU#HWbe1;YM^cqh;EPs`ME_vWB#VMZ$)Accr`Y`2@rQR6vXn#2S6RTeI=?ffn z+4%=}6*rgLFJ@X^&6DXM-ek4YH|)0msfj1^3&l%OG0**1exD$GAof(YdYV}rX4kq( zaoPv;h6|_<-`w%>?TYuG3MW6eSMM=HTkzTOzsur4pFg{Dd;pNut$D?(yd=*0@5G1g z3On--#mmv5)PK#x{~qCQJT>K2KkQ(dC#@ep(0H4|YkK8}&u{_t;ly<>*#9$oPYd5^ zOup@#C-zd8-G8e2+21Vi^(X1~-d-o|gJ~YCqskw?rsO5w;#{YmJSkTBqi-zx!9|}qdg$?u zkw4scw|^dXIC|Uu`!o-@qCWqM`V_^hy!1|V_Wb7_k*#kkQ+Fu-79Fhr_$#K*GvR#$ z!jqzRk{3Pm1kr==rFLZaK;q^X!xqotegs`#wFTq3P5?UV62dBypk zl+WUF@e9SP@rCu@iQX%w`xA>#)>A2u#-mT;(1YZ|&-CO`zSPfT>|OlB21mJXO*-*~ zu<`l3FZ;cvj$*sC4xfy^G~#7S}~4t_s5gyD#;^9v`0(?t5&BnadAanL4ch)S;Nm z>3?VIqnXsxBQi_x)NKF%;}@iF)`wn|2kP%%EtNs~seKgd?ERUOSEc7)<)Wwm^fMH1M1$pZ>IY62 z8GnyH#pzib^|J->2gwVXzBzY)volZ7i9b9Z8BF~Wec1Vd8y>v)A7jF9+uX9{(u>{? zj~)NivN+hja(Vp2f!3d1wZp6aqMf7vqZ7ZT$~~R>Q2aeAjNf(I`P{L7z|&Gc_;Qi? z-#7e0<_V(jhWH!0BrmdJH@+}r)`|U_4%#aW+xxODZ@%beuW|PMt3Qjwet+Tqi(b`( z%v+bv81+{>eYAPh#2$+N_@c7^)5MQ?Sv>hc^_%bO-||j=@_?m$PW(-e3{np= zsCv8qsL{COphvzi$*bLd|Lx77=YZ4KPH#_q=(c{PW zXzPc@@&5Gcj~z_)@PYV&wtcj+I82|`@5By|M+Uq2H&)vEgiC+AZ*OCzO?>aOhn)dW z<@x8$;^nuSS>KzIpQYElCTo8B27lH3Lh)7%we_uh7B^Fvtsga!t^dqd{p#0r`db`+ zil*O%(cM+Q~j7;xsRi{Ad_%Nqj@UUI3GZf`$-X1^>B+wb(N+?+d# zcJ}sOoXK@$H1!o*Vh$Z>xa`k31SDm+EW>Q<^M11e5&GzuR`Lg zFzj{EQ{P_gg~_3{@rB#2^p!q%s;+DR)L0lymm{hiv!ixZT;w-fF8 zGl`x$>8GLA#dCYS=1uL$q4-BkSn9vV@jcG^0UxOOu!COhscd!Ye9WF_aj>5>z4Cjf zc5ngpVb6v3{L-##*1}%fefYEMFC7MN<@IkB|9>IXl$XZ0k=T%#7s&iV@osc1^# zdj57(d3!tg;RDHQGW}9S4^kJ~h3ScNo%w7%&Zj=~J#ouJ!@mAX=sWhq9R@tpKK|$X zSHfrazf*^zR8wA37kxvnnqMepMUu+?kCYtNkEY1JrRW5)gFewC8|pmFKhNTzUehyg z*J;PkqgN!Z3d8udubgn+4wJ(s*FCZ4^x^9hZ@>RCzgiZDAE5vLqWt6)Yk7&ibp9b1 zU+mNuiXs{+`>)?4@VE6?!DoI>@|jE@714ui_i25|Uil2+n@@fC=9Nc$^8T+c8;)A3 zcJk)GTNpm;e=Cc_wy$AxKmRXY?c;ruoxHO9PpLB(z0Id4awz^8UsUy<>l&<2@PP35 zMVhRBpdkLBccQOC{8F@f$pcR+t3HiS<*@os$36G?VT*;eHrV+)NBp$?|C=0tPSHBS zW%c`4OZ6c66~#JxeK>hlUqcW%-M)}R@m_SS>Oa3DFptvq?<5a)kh~@*^`ZxQe`mo(5FU-F0XqT|7C@ZtY|)_=|C6)oTYr~bni*4g}k-i0SU9qPaQ{?c3D*zP~apGh70j(Y!Z zvR*PB9{;4yy7sMVJF$9-_ajrO|M2xivt6%Z+i6Yo-YND8avvM~pw*$grvHD;oq7CR z^Y#Dl*V0lgAA$styt5HWFtNpwaNpxS5vnovrIt}jT3R1lt<}_4x=GbuG&EI8Q+!di zrf4ausZyoB8mg71=tph2q4kURInOiZeRDgJE|1^+tmrcb-!1SUhh7(C;R@TsrtF%>hb)~1n<;VeX-hW@8mJa?=Kh^WIXIJ{~s~2 z)PL2huPVCm$v4z`h=bH&Hhrn$*JK>_yQ#iCe_B_`PW_Che(GqfcKjJ%+Hm6s8>{cW z&P&&Rdk?R1JO4jDi;D-x%LQ0gkJc%y@|wE1Z!@;#Q&%pzVg5VuK~?`t_emu`#c85f zKSq%Gf>wv-nZLGZzQxmVobXK#Tammf3|sFrV#2{IULLl2b^IO6K0d~4p7noD78eiZ z`v1#XCw&ywBKAL8Z|VAf@|jOA3PBD(v#S2{KF)j?-_VH;anL7z>z9rX547z*iCdoP zp}VC1Ms2{Rm-oBj>y5Fk&px^SMYGVU{Qjkx#l_}Z6Pq>psm=B0%nyXmd_(a+qTeeD z-YIGy#9H_~=be7jQJ(2Hg@+AR@rhSqn&(|b9qO;r!5;=5GisBe6Hg9!&|e$Dj6mqiqsfm6>d+tbK<}b#`?!fZ z6ybqZU$Y`l^*o3kHrT}{@J=xoVv>`?b!wow~pz4_+vii4vv$lXra#x3^>bpw_{C zQtDspZ8D`JZChm|E3;&ZAmCnlh4Y|2iIV&^zI!HvMP+YVln77T5er zJMA+znC90+KMecvy({1Hp?P80yuptwnDIHUJUjlMEM1@D6Ds?k9{=Z}&wj}}vFRh2`fXgT7anMOP0=IY^4!12ANCtD`|RoK?iThQ zF!c1zmpdJOd;guK<7l0EW6--7U^%N#{lY4*O;7LJjBWWX$zk51_+WHY^}n>8>iDLM z-4tE!`;9)B^6_`WG|xL3$Nboe-Soo9zyEyQKMvU|jQ-}{8~tp=Jg<4&f0FN|LGCR8h3P((T=yL<_m`z=F{ke{B_2}y#9w=RCuknNQ4cqjR`GsQnC{o#fy^l?) zAIx8oI7pvT8$b0}oO!omIo0p?PU3zEF`Rkki&sDQ+^ylPe&4%%`_b*sU!4D+s`^?_ zJ~Q5bc!0g%QK!yZ*zGm_=DKC?B*uKvVcwzmP$ZQ4uX&~O|BMf69q55x^UYQs_2CC? z|Cs2(N6!#F9cOX*9sJ>@8-C?x zeQIvFXP>v;I`M#pZS71zOdVC>ZK3ZrT2}dkGGG4FP-Ypub-?r0S zd0K~eV)MQOAGA7@kG~tHd5mLz9T$H$y)bm6vFEMVzZphsxAXHKKK&^2Z2NC!aiL%N zh4KEM%h~qXtMQs(y7o>yoHw&N(aj}JouT+hbeO)AxCb>4zWIrR%KNup|Brr878j3Q zfz(-rVdx3x4BqpmfnmguQCH-9E{C3d{%vJ(VLzQ;9e@8a-}=+5ao34rwf9Hd2hzjK zg@G>hg<{1RFn#zsU;alJKHCL4Uirkq)Q@eb&zJG@EH3DqzpC%=*IyI;FnZ=K=kM~| z+hK!?_I+U0Cq{ZFe)@0MA?D)utLy#$HJ|NSqV2VJ@|ZZsKgiuK5{dtH!fvQKJ zLB=(kajW>bjKjF4Jg0FGJ#4Uxf8qWAU(VKhWlEU#nR|8}@+aGW*!j1njsq^~`Dg7L z^ReSb>dR%EQ1purO8s{duglA$-GAz8N!&X@c=ca&`mK-3E7dby`0!FRKKUT?zy`bc z!`jdO@|OcQIwTBU{_yFj+qquz?DN;GEG}M=dMf+xgwL9K?e{Ml_h3?YDC9(GssGBWH$~a%sEN(~ zi}8qq_^S{eXgsa6Hk$`t#~%tTMbYa;vesA9T%Y=-BaRUly0r`G;g2VU^dWx0%GSt#8aHmv|`T zjiPe1dTUKqW?5!-)Y>1G(S<_704xcX}P zV%MMKSzK&;;PaQ}Q@^m*WSrvu=4-M zbK%jKP^=LhmHi*n$@fm|6ww2Tr#9OM+nvRkpZ3rEYNKaZ<#$oJh91btVG|xN9H$OJ0`W_ope;9eo;X52P@2RlfJKwx( zl|g?($KHQySzOpY?EY`)C5Y~c zwr#i6mrFbp1LK3r{(IqBKWbvLeR$UyH(uhmI&^$^U|OH~saN$pxP<=3kSTY6@<-dx zYz$fX$Or#2^+oEk|396R#l?<4n`*mG6sx`VP9B5y|98}xYdo<-AwO`e?0IANuY4}9q1*SzN0@z1m@E?oD` z{(|QT6jpgnyz=~KGQUs^lJR;)%R5EpF;#5qM@{2k!mIrK9Xv(TbDjG5LFQwq{F1$x z{_yIq>ns?w_4SRObx!`kkMCWU*Rh@dX-PdCpLy=Tl&fD+toHfO)aHA@Ql0ccbwVLG z3My^?P87-KK8UtGv^1_f`jwuq!;cP#zZ=3g)bUDoQb(0;SY`4l7tZ{@M?$|1W*o4? zDo3MZ$DebuxY+(@(~AH9qK`VRWVPRq`QKRdt#9NtB_k9fQ0hOtUg0qx(<3hF_WObQ zo}K)vJo9Tlc8VVTbiCB=qJwVOX8MaipLfaDVTU(%o3H4z;dApQ_dfTNG2ys7-#+E;p|jxg{>yc5pw-8G6|;(kdgvSa_?aKNp%@yKs`}6C z5WXF!% zZ=hq}zo4U-e<@yob-hpgnJ(m;tvtK`4E5K9QSSe+7%26hyj=3EA8On4;3STn{$Hx) zp{Izy8!|sb#zPNVv725P(R1-*wa@Mw)_vx?pV@c8bzbxA_ouBaF1-Kde~CAsJj?pi zt8vE9#b)iDJhJ;gz^jSB-2dS*VEUTx6}gWh^ZJkNfATCYi`(ZCPwS=5Dhy|Dw#kfB zemOUs{rl5j{#sA_{~x*k3H=2*{%L7_VA5BKwvUJCo7I_XzUEP1C`QCUW&gL4Jow&; zZJ6k#ywtWl)v+)qkG9x_nKU@%k}-(*N}L!lgbYeBxEe z_=YZ)+kW4{R;<#&ANsE_Wy8}>+93>h=-1Exb*WR)wf%oh^%e8|_5Q0(^@BZX>$qaQ zXZx3rRqZ-++^U)=#HAaJty=?Rm__PyPH}r+#WD-}0ze(fq1|oucWJ?~-w` z!IYQy!_epUJokf-%?ZPPl`pt+`W*DRZ-RaA(x>BWweh>lCbr!*#m}1kvDrF)>HSyp z7!SQrjEt#O{ZIeL)ADs3>{8#HRD|EF4L^My+VhGo{P&5BSB2rMFOQi#xT|I?It+3Pb`oADLJ`=0*GZpE(Kp$^8Q zkD-uF!M1N&)9u6jYTY@ z@8w3__R3P5qr>N4+pbz!Tu>(K%kRx}VLo>LvrhuHA8M-Y6ty0)%4_PPZ`f7y3x!l_{ZGc}`u#8b>!p_Z zmju)KTOC?Iaq7ZOG0pRgr#faYraz4N$VIE)dBas<{ckMwxwn>g=v)7rSzM^A>i?&N z)qej@?b7$}xiHfCg<``PDD~e7pEd8p)SDu@@UX!uKJh9{^Sst&er&}m9sFU{`btMEkDgtBUd!Ub>mrxiU-epldNuAkd1*b1zyFZ|O&{F^sh{nl>U}8v zza+~`^t{@{tI+1vlf{KR^Q#X2Djocx|LtG<$8Hl7f0&cSh4a3? z;`bNi3A??FwVr+a+)qn0|K>chNhN@2-^ogIkKzN`X-^u2Xkq_T& z)vfZQSB2q=H*P)ef!1E(hG|!R;bV8U&%Yg%#l`B#)$Un%_klsT?&6)=iej~ocg)r} zeX}}qVbC|~3&ln;Q0hO=ljgxE-_WV9^7!2!e9-DpUHsiJ&GU>$eb|cK^cwvpjymW2 zzxZ}z_4CiV;K7N{G7tOyy)TPP>HRlXKgjrs5^cX9l)isyYCd^theB@fU)BHmR+7iO zsMk>Sh=blGd5l}d&xOx^&2-39Jr6FSzcFHqu~R19bxUK!`sdDmW8E!ze(n16=~-N? zALI4@C&{?NDz8mX`-W|=tCr+2zff!v9X6hmxL2&p*8}!B&h}%yojmkF^3A4Siugg( zZFTBe{qEo658FR}#4U$zct`l?4u832sWV<=J1TAe({;S&qQ@0)dF`F}=$qA#PD|p< zI~1EnN2&kxYfdMhdJUb#dB1w!|35OdQ=hs(tDkxpFSQrb-x$2ni~Fy8*<+2t6F0r# z1BVvq+xK66QjhI_{?q^eQpknsrT#M?Me9dR>jM)W?>q28<_Y5OhVTs;ucH6m^uo_> znDUpio5zRWee{PTkG`XQ{^yfAuigaLTJ-Oas7KiCHGY17oz_E~{ZS~!#pF`|b$Jdu;B;hJ~;5N!+9Os z{->qmfGfu@fM29vps?C&@8mIc(Kqua^Me-(xxld0fAloZ`q9*JT8ase{T_Uf@j(3D z5WXSf^(5n~znfm@x5Ek_`R-GFq2J}>XP>=BAG+3m^cC}X__?#aPw;$^2TC>XIsTE$ zYVCY9&s%z3ODN=rdR6_e=c3HK&@ptKapNU^>IC6|_PR=+?}|R7<8>UvF8r|W?Apgq z{>NTn}d{IB(Y(x9JQ@#5rT!o-vQdfo#kOrY?kguQVxK^7gJT{A$x4bZq}WD~n6%_mAi+3ah>L>Nh>@8+JNB>`=%BZl(UK9;(|VL&^w8nO<$7!TD&HFURUNPPxU;wg#IvUgRxh9X~v8&X0zY*AGg~m^z8dz z^cC}K_4?1mpCt=@@XDia=C7DvDDtRe{ZHpLJBHime=2n7R~1?vst=EGu~SU*ypwv& zkFA*Ir*`=G&3A5d@=Mc1rM>^QCY1WGyt<S?d8t2*CpLOv?K3BS;^JL)41)(gdg^(X?BSJX-#_MATuE zqFC+Mk8S5pB6j{?Z4V}Ohhp29VEs?x)It5&ApUM>^{IY0KJhfKY99E>tHQA1fj10& za?;ar(hFDU+xgF4iC90X&c8VnI??oost)~ZF4_)7hyfeVDNm96*f1aZZdlPzulbfo z9bNdmu1a~Thc5Fpn>gcOgX#~*KQ?Ce@Lm^=J>$x+j5_SYOLG-}XkDa3No1#P9{8|To!wNrnRT%nD zJ@v3{{`^+x|HE^?_Lr5~=YRIo@$&?^{|VzM3ah=g{%V}Qc_$o>H?gTN6mpQ(SRp0JEHe2KBXB@A#)tP5;Nv|)04;r6*Q0M8f!7lzV=Ocf5?W9AeHhPZi{p*ge zeHVR>|2aPiS{->77u)_AM^U2h*`EzozEi9zlBaekwu^zP{-^JcHh!*pPI0Or*RfJJ z$UH&wYd&_0@C}t;vKP}IhMaczh`WCVdTaosHXhPlLCp`Sx++*7RKi)3hT40Sy8K%e-nj-EK2eV#UcQXl!2 zr+VG|=vATp{_&g{-rUB^N`3J2Je;)t-WzelQU+tgFqP@RKZ9D&rUNS%G3&lrc zYE}PD_j-9mU4~Bl=vBR+8ei*y-wo3|uj82?Td|v7So>RRZMMwi4~4(8_9`j=>cGC;%t@!!# zA3kGh7&YtXD_=V2b6)c}ZsYTxA$>@n7sl0iri)jrb!5S$oGLLB_AI z+i823*fp_3u~U3t`U#Kwcw0X-&)$!;Kg2<=I%X@6`tXDHd^Te__WleXJwx<#9Q^22 zVPlO`zkcP4bAQ!X^U=fa8Fk~w(YNitmc<4C69502+}7F7FICMi6f$A!Kl4)LK7P!H z{a5-rA`Yhgz-C_P7@v9Par*zU{)hVH$M62ZdKbwk?c+zg{(qW8?ELdUz5cvc{fd_F zc%*T)EBC`q;W6(}>>M4|f6b>)?8o5KH~J5cIOtW!?4CG`Z4dZC(@p9_A3a0#bezTY zhi3869z%|~FYLC|l!MlN>2zMl_WjG8EG{0@daMo|S6JmWJo<)RHNQ~oBIEUnmhqY2 zH1S)XYRXf@-wjh;_}+Eu;|J3`_~zGq{Nz<(cxwN*ANu-oQ)JZlZ2yOf$BsMu)NU%e zP86%1Z?^LI|7)+d^*`xbD0YphrR_h_VLogxhCMOe_7}#}e6y8D-|&M~dhosK8KU<- z{o$DL-~Yn~o4at_c9YLJ{I4&fXZ!y=i;L~A`|10yQ@^5E<+azi^`AOx!eu_pI~2Rc zKxO}P$%AiwLeF)^jhFZt7la4ee6?QlQ@`qYa0&g5fv=BUe(8C=je$=OeD$tt22qdg zf2L<~;eH~1$K>}HI=-;lYp;H=o@a4EKb;@1hfwS;yk6zIPOQuA32gf=^*`yC^-Xom zre9V3T*rl{`BpzRnDW%mcvaZwcjc(7);)P*W7XZR`OxUgCV0)W&tKECxLE%;((iwB z^(%_i{=93p)>W6ERonYrssE}IialcLVsHOWM)F`)KQ&GtQa-igvVYYdCcACVK5#-U>@ex1 z*4_IZPhaf%&sjQ-qSe={ez2yt6J+hxF75wwVX)spHxzrtK&k&aZ~n&<{(o!#W8-NZ z-f8{d68anNl7kL?_U+{wL#~~MHzJF;7 zljj?~Q0yH8RsH94i1kDDJxCv>MVYMrIYIcKjjMd~*A~sUIQn2ZE;g9*5`Wn2&|~*} z<|BuMO=te#T=&jyUgLKB)5_w)>oC7x??2hAevol$>c@6s+x_1XKYeF@q1Y!Tl=_d( zCZY$Qd_&bI4tmW`ZF?SCiLUaATV7g!!t>Z*7k-#9{mJ8=-2JF9X;i-fyM6gm@@@Y= zCyR@H{u*=Qv+te(WxoLmtGuRu+y6M>r2X>h55>nLp|bySdip=V|F3DCT34z6X;5115UtrsG&0Jy~4ff%f`L^ej*P9#kE(yZ9L=%c8gI+fE?{R@|)W7nTd&*Ea+KmY&IR6nTwA>TWVCw;Sc%C8AO6s`E6s{hnM{kENI zzIQP_gIE8&*zm!$4)f=W=36|i+X>(FutE6Pp!&lz&z`x$T6fG3e|=!>8I!i3gpM75 zPS4`Pe0=SF{{BwKD_wt*t1k7`#16%Q@j+Gp)B6cfZ;JE_^iK56W?V)5puOIcxQ(NF z=q{;0{CVgbmlY>o9p?XerBfyhoQ0l!{+*u1#rnUP=iel&eLkALS)IA+(g(Fe@rf9y z>c3k)Q)?VUWX{QiY`G=&$6gW?10ujYGjb_}=o|M*i> zUj5sheEQ*2eoc6M&V-ku<@G2()k*Ctp87-M*4wAv_TAINu)QIi(gC6|1hqiMB8hxevs>@(YL-)cT;jh z@yYn0vj1{>-1hcgle|=q`GGbr<0S|WwEe&8o1Z+V~LwnH!GUX3XhN zeEKHpwBP^g_%^?OdjD<4Q5^@oWLbLv9t<&?|J=f10z_{%H zOZ%T;@%~G{P3zAp7I^dxn{m?lg+eX`sO-P~j>Gzq%fR+}a$*w)ed4z|s`}p)owPp7 zQ$2K-)E~ClW60~5{&Hs6`m)zYEcNmO^riCp+sXR><4Ju|ZFx<7b-ArihRrxucTI9b z@u~Qrvj0PcXMNSYRF62wxMuf=4&&no?fF!m`N?yd4=$m<(>DjN zFme~_vHu^LqvL@5|8Gw&>Gukokq;u zw(_V4KbStB<|ofd-RPoYHt{a}#yU67UhR_FMU8d$c&GkC?_KCre*eAZPSNHy)vC7LU zK78zbS9{H~{?MJ{Y{)PjY7KfVBc>bC8^FN+J?ANvbK#!(dZd-|V#{%xurd1{BE zSH=;r_Y?1gpZ=enjh{=L`b&M&eEjf0{GjP-U0(CyoBc2P!+s-XpFMru-NN1jhMvCp za;Kwj=l>J^e2D)3E6?HrU##-lJMr271@?%|{FpBoiX$Vzwtx6wF8T1$0r7W3>IS>< ziNiN^QcuZNKY3Lc+y__v->DDDLB~QVbnNq2uKJ30|0}i&uqM28?VWh)irDsksd>~F zilbtJ^*_lgZU0G}`q2X!H?{Gn&!72QSzORHzf=Feh~cWwKYM|HV^X;P8wakl%ZUzs zJN}=lCDqCH05Uch98!+1@bmL&yAnox^6JXXl@5SzM?mkL~ZBVebP?jcc9M zA>Q}&Uv4XJuP^Htc{)xgj*;%H9SyQA#3l|} zJ&cziJdo{y&k^P)Psj1#`}Ct1zW4T|FWxwCQn+W}rcclR=6vep_!A!AUty=?X33Jq z>rEk`Up2o_92bdI{V(++S6$Gl-4d+J?L6#!k=0MX6!A+|R^k)4`pI{aVSa2y@~SY5 z7_{zY>x{WAjQH^Ox4iz~-@NA0e?L2m3y2PVFw{J+7?T#Ru=)RmQk^Yfq8p0iqocC_ zwNAe2rTtJI_gzFEO!M$}L+@mK%g3(3ZhB$MZ=87S9bs13V!5OI|Bh_me~Hh(Egeti zlOG*F|88RaA
  • v(Z6lSmc+diBo3y28@I0^zk$!{bR2k>)lXmH#@CJP z`pltI!jFFY#2dD|?XU2d{sYpV>5=Q_FYTLr$*R`)=?~q^-!Q+Z|6svlEDrY;J5LZx6?S-;HRz$!<)}L=RJ!)e{y*90dGEN&PIdiW3RvH1g@(0A7DI1 z>88FmKGhA|p5Mw#e^LLTf~8S^{T~zZ`MhoDqzB@ljjM69wSV$JbSKaJn(yHOm(w4n z{N&ZYI`Q5e!<469xa^2EzQK5=|3FqQrbkPEf5*6rl2xs}`i-yG|G%`PHtIiIJhJgz zVMW^&>v;T0@Bi#dx5XI`y+E&dsZBobFD%|sUhT{L$^&0d>{Fh!PW@rV>RUef^Z!^S ztbfKqr`_}0o8YteKZ9Bi^8X7?{fc5$YnzwG(ao#1xus;YZTEj??*afqaWj< z7wDbjr8a)h;w{OyKAqaC8~Kc9nC7MV#Fam6I`Z9juJZRa!e;v~d1kL^=fh+7U(K^} z;rVUXf6||#WYyQkN4}FB_=tmX|6f>SH0r;;)Yo)^zi)ip^4xeoycF}#l=mNA(ezWlbXOfG%+`LnZ!PmR z>`Er{j`}YuJT?85r}jQ9mwM}mypkmzuP&B2{Gg4m`R4B~J>TN+gHG~|4;xJL5`PGH zY;xLpo9`EP-S3H;&U^XcUgM_!KvpiM#|-`dwU+u7#frM=cxF3!RQvun7eB`h<`?xJ zDIzwWE39g}*6u%$^x<8RT^*m`^{((%e)gYJiFIJ@o{o_y3JMo|!`S{cMMg2#MibnmX2%qVciwzI_ z#6j;Q&usKk#19%@SL(b|qz)TgPJejUz@ASm{PaWN?ccxos~4U%;+4<#|G}(W?EFu! z?;o`fsfs$PUHkq$SKh85^X~6!m5!SJ@Jt(@Z^vmV%e)&Ohd1@p7f2pxue;sxeB{Gt zsQsDkvU2H_-#>MwV{Se1{15f7d+6Rby!4CPlW(8DMwJKbsO=PWT(RnF<5S(Ro8}kw zFBxn4Gu|Q@!*t5Uw*589YhOA(-|Dn)`Z52&((^5z)Op6kj}7+lhdH+%`nEgPyD79T z*m~>r9@qgs`~AysRxTdgU|M+tXpY;4$v9FqX8)h9Jmlp|x1;{9h&1Wn-bU(7k6i3j zf8wBbl4mx3DB=f=uhmeed>(`k8|>i^JKa3<{BK=wXV`II&fiXZTMnP;Ka`aV=Y9QD zz5iQR{UGBgN;K-|fb6VYWgcLuj4hHav2AH;-GgDH#_MUKWN7_%`-oB z$_FpwnoYb%emLwMyA1#Sge}6`?|b6qQ(wP?di(xmG%J_d_rJON6~(IbHe1I>H|w*d z_4KD0^)DG4_1{|ROb>WK(GzA{gby3+;SWcCZHV7 z|8e#2jt$!#ee7?3{NnxKwevrtIu6)TyEPug`X~E8*MB;#gQsu29rYh9Jf^2BtcsW8 znCX%9;a!nkJwGguaah4;yUAhcCS28`VSS6n*MHlKG&7QpCa>XPm(%puqmH- z6Q*@O>C0^OH}T*PTR%Hxy{A9h7k0e!@Kam=^)g;(`TYgDfcfW(+rP#8zvWrE80vW9 zt=3-s{BG3xZ%Gcv5BiDv4;K;BpX~*@B3kX=#;0+3df{@{xpMgG`Gehnz4{1S4ITUT zSMIJ?X~%ghkG=k)lVbk$^8Rlvwv(aGKT8(o$$gXHH`F@psQ-e3!t{5A740OCd6}O$ z=*qH;>#oTM(>naUFs<`Cj`^__d+~*Nzk1(Cj~@O?*!uigXRmc&b^ia*tX%B8&!F1e ze=k=XWZo${$+P>95pQWdwqn$OVG*h6?+S0VSYPmf%EP?*+=vaDUP(UjCQR$Rlb+0v zt=PnaKWui|4i}#JrANb@?Vf)1Wd~pHwXSylvy+vJpIrX`?1Q%7C-au9YVDmo=6_qP z&!i5%sQ;p(qNabJ^8CBMf7fx*%ldcHj|Vl+>pWXt$ zoqrq7%EiwA%+~k+9rY`U6?JRzNaNa<{r*&Kj{A&<9rYh6Dr@??!W;eF5~}m@x!CFJ z5P9kE25UNMKlp{MdVRqUTEEQ0>G-Lwe(IVqOl!aC^gr$Mn=pOV3)b6imAAof?|-_= z53XKbfA}t&Z!U4x-tkD#_-2i_t)9MfiAViMi$|tEaVK%EBVs;mZxDZqUhDDG7l~y|9@tcm?Pg6e6OX60ieDE%>KWua30gtS)`tQRwCtUK2)qi*_eD?c` zZdNWlpY!tc=Q9iG3zV$5-|Bo)+dltl-1h&jWHP^~f0AlC?UnTOZ0>%kKyu+kv>%DIdH| ze(IVq^ndWTed}cUtw!8`mDlti%*w^y@9m}6|0lXgR((xh=!VU_K>CaNw+a^1-wCHz zOz-1ud`Um>fz+oq{&YV`c^m7rFXLH8JDmBk*?V7k&BAcb@80y>a99A39siU4z^__< z|1~|`?=oHJJGI+--V%QDQjGfNMQu(0L|^Vhmi7Uk>9wd}tM+^ICw%yOA^e7}q_^X) z$5!mc7uNXsq@T?_VtQC>%fEj3?r+^py?y_c@a0G8?@wD8IO)JkaLFcG7yQ!v@p53Hrlse^|K3rtdyA?D>-$fssOW9;8ui~{e7<*L8>$ZQfy7hW`bqjgcjC3Wbli#B z+8_Odncv=f-K!T|A2!(c&)Y7T`7D0B{&qAgm)iHgPW_5vRco()`fsal=GXZWXTDMY z!7MH{{axXW{)Ht}pMT-$ir+iMg@Vg`|A(&Bp?8YbZ=$35oz#0U%}e}Yv+tkymdPK# zJG5Rr?fBQd|5u&d%C2zR;zA){JTitTZyz2hXZ&luWJU0FRX*#}CMID_y;ykeB z<>KdfM}KkuA1)#_Jze3g_{gW;5M5oFUi-fQt0OPbpLi2m|J3U^=EqiS;=vytI%n!1 zW*s>z{OsuATRw2`C_MK4%aF{2^SHiM{`~Yxn{P|w+K*V(+W3+@tJgYSKcoH&is~l) z)AMP@1HY58nIGsAKYfAZf%MZ@r+mcWF`GE^!&YRRaMoj|J+uCtS>Y3he|gT&e%6Jj z@%`uh55vl2~^Wc3f`KD7|RK>a^ul)!0J3*^M z7e(`rj?cpn!ta&O5WZ#fhwZX6=52reg0S7OS6%q|l~#k_K7YZd{JGqIU{*SA*030# z-nSDQ-Ix~`^JH1<+nG{uOWT3B%d|?qnmdU!JlH(f3(PG)PH@cH=T4m`i74< z=v`7rzl!96Q>T2Ex19d4{iU5<{`;HfhMnHM={c*+elz{s@uwqw^FA;CPrd&` z;+L$bTZ>0pr+w9~|92Yi3NpX`zSoqFM*Vx8e|F>f;ALK3<7R7rPs)c4 zHsr$>uGz)E>k~I!5WcqVq3y5SyA6HW@n>OHF161+@G44HwT1_5JpbI08v2R)my9+2 zo#b&GBp-d)PZ^KA_6g(ptuE=`{H>+uTRf@rPV#MBY(?staQHL^?)W2k?>973h^=!t+_>4D6$9+W5tDkYO4fCvA();_7588N6^TA|% zKS6&usx$YQx!*lJymOr!PQPq;5&ZW4e@MpxH`4oGujJuLY8a4L0QyZ$j^qaj_e)i3fj}e$V|69`lXCFyrc)(d6aP$aKliaD-;O)F{Zc;5({au2D@Lh~ zC-^~oUC})AQ|HR^%!AA6@1Jqs=Wp2Wb&vPYeDi(Ve`1d_>EHhUaa6|vU#tIr*HOQs zSoO8>&C~q8@pZ(}G3wtfGHUv}!W(~2?MkT5f40P?-Y0R>E%Dp9+7Ef4@xib4R)-BT zFKn=fKb)|^JxgA6+uy>aYaaUQcdl3+e>nd}y<(<|haXJhnr8b4x>^4%$zYz$JL*4B zL>l#9P3p*}-cb7>4tkf=X&(KM2iogXN9w32-)yIG_^`nq{{E?xr~dKD9skll^|Z_S z&)f1N@Z0O}zC^>zhqaRlktP(8#<||+3KgR3B!7qzv!ixUUgiU@spoj`IE;_^IB)`|GQba z*!!?a<@M)BpqpV!^OEgBU;J+xw(YMSsiVKB|4`vUKg)NL*IrAId>g+z-cKC#ZhRa& zMa$Q|Z2Xqgo9)yF`x0CI@HJuB>Ac^*E&IpAQeEvocK)GMBl^L)@$SR>{O-f1U(y#m zApO=}e>;*v937+n!$rjOcZC&gSFGdle^o)|L!IUKNu0d)Jz{$&Xmy&$_~u_QJ`X=g zK75AoIjNthKTQAX<8Qg-ncai??Kzixc7+Sk)$|`#KCq*9J|4xYbTV7}YHz6dP7xlB zqhr*6L6Kp4k`Is8nI0|W12vyGXu2s+YLf>x#m!G&AiUUMgP(E2ar@lx*iQeL5>EN@ zg}bl3yBhe$UrW*lJ#I+x~a)IzT^m{2x_*u%mX0 zed^b7#j2hUcK=WEs88&u|DvL{rl<1OuK$6jTU5n5u6kEkRX)#q%Woxd z*I@sP{;=^@7oGa|1G{1FB~RY@sV^SomCxS)ce8S_*Wb0u|3Cdb{Tk}LCA(UqN8|fX zt%E1(KT=fJ^e3;3j?oc6=!)uCKT<;0{-Pkdg5HVWFjsp}kh~PVlQ{D-8-I#vU&PfP z_FQL+6`wohgJI8O-hbiu-WuR*y#MxO{r|6SRxY+(oZ8AOR<(cG{a2Gb>X~QMf3&Er z>92L|P7xiWe@kq(pLb%z2c~}0Px~bgwD+}1-0E~3c$d}BIN{!7M~|L%^_k(<2M;cO z{q}wE*ztc<=Cj<_|7f4+27fv~?EXIapxmgxo>!xPo4<|&Kj-P%BU!n4C)ilWc3e0< zk9^QOiBA}H`_=N*A6|3qH&4BM(yPK2ColZWoh!D<E* ztJ?34PwyLvjZWCiFX~@1)$~_>`XS$Vh=U#Jr0oUC2h%$Iy)dn#|0e9k7q&h1q-*b8 z)DGKi@~-pG`{~;7*!7=zRxa$H`GWHLlVNl)bdr~@z53hy|GU~Yk31_E`i%OQ88!V& zZ``ikGQB$ggWe!{1CrO?Z+t%E*!Wt9U$U#?8-CC`$*_9t2BfYD!#=CN`Nqi`%?z)9 z?hD_1)fX1SYuCSZb-b2fe!QOl*ZHA`cbcd35vzJ0*ymsJK+R{|sDC+9O@Gbfe+-$A zZU3%}2oG`4>v*Y6KI2)u?4s)5{Pe4QR-gLQcw)mB*1h1}ukS9J8K!S@(@Wg5e}d1B z|DCK{m}h=-`TtWRrh8Z8I=)y{-^RC-<~hZdqSnz*)ITpG=s`UwRb%+e>}V^((+lAP zd*u@+-_S`%HCz4EHDPdf?6l;V;WLG$nwMSwHlY0Ax=a86cN<^zG(PRae*dm<0$lm7l2t`XKT z`X3=4euoOe!}9^OdMA%kUC_-p#L18PcMAs7KgngISPNMCmUH;wbWp`Q6g{RfN6M*X`*bd3I~ z9@yP7ElYo|%{cT0;+Lw5A3sRGp_7W5t$ym7Fl_YUgPwo*!F|K5UC*DBU;89H_WplB z`4#i`>-~olKRU7>OI6fS^=NOUJlYrj6r=t_MP^NZ<$<61c=*61Fa5sQ>c~^X-wV@z zZG8CXL$McM@E=)y*Rwu+R+xX`8dv>w&o9Ae-+y(payoHw)n zH9xhZ{=-E@O@CK-W04;rgh%QIOfMz?8O(_Ypr(1UdNpr!n{)^zvEqZ!DGk&0p$nR zE{{K-L55i$K$AMXlpr7!dHuJG3V#GBB@RsN>D)SuRQ zSBxIV@6l(>6>ObZjam)q6Qq9Bk?@EN@&ocl2my`OoTvl7{r}W#AIQ>NZ7Zo1U z-$~r&my1n)`aJYb;%v|K_wrW9_=@;@q4hyMo^njha*?p>gv@$;KHFRZ2j1ZC!au_oqy|Q<$^yiPrqI95H{OO zvg&K{(2acj%nv=I{-Xs;lm2XP@=f==sET#y&@IiZy}Ka!pyg>ke#!nfzWc>ve`00o7Qd|9LeQJk|-!k{wi*~;>%sKu0yZV>EhQ93g z?|D`(>}UCj-z{E%Z!>*#p5AF*OOUm9@~HOvBl_)1oW7#|WyJWk-Yd41pM3f<)I8!K z^=31^B7V^L(($O%ao|Navx)b}4}1OQ&i7yZ!u?^7+yA=$od?aN-p+pxW#wYm|8&%b zS5dt6wRhq}H|sMO|5EnS3VmTy8(Ogh1UDN z`nHu0+ds@(^5%_S_0$UR+V#(!tXw!w=g;Z+=Un|@G9QVKdHrp={=6%h^cna6R^e&V zzxF$jTpn^&JR24pBGe3`m-OY9rws1zx(@l;J4S`;jCQj`v0l=`FBA5Amb}a zH0q&tQC<~QO0Ur5G5H~4M8P5O)acZ&g=^rw%S9`JzL zH}@SSAGAI+AAc`Q>%1$vt@bnT3M<;KSl6F-DSw#xw-m=I;eY}%=L(Marc!Qt* z!{U=}KE)j{6rLM>@S~sp)D-wR{&U<0sk8kz&&q{5P`uUr(9JuEm|pbP5`WzP2a88d z`g0t#KD5sGTu~kChfB!X?-3*)q)*WN%7>jI`G!vTYxV^F;h-hY_nrC5!EnfnxA33+ zdw|E@e|D4~j2w&~TUew}yJf2i;@>7O1?j7RIeE2^vg9R0j2 zyp`X(!iwWCen;#ypSbzScal%sF!58@grRfo_ow{sqn+@E{b#LodEW9`Z|8p)Pch%J z{QULZ+WxO~S+aQbH-7$tU-P2=!&zKv`fHr~s576M{>n!j^jdE=<2L2DViR$JzgFkU zuy@uAi1hpUZA`csc2gKhC$v0$t9oJ(k_Tme(_nr3VbyjbM zx!=04I1dM}egBo{l*`YLjxv1)vU2e{9(sy5>ZtzO@xLP(j7MJ7e_;_Zepgr(k66{3 z@eEZ5<_$kKh#$n?3&}Th(o4-&e=ojp{PD+JIJo;!VW2ZSd(#J(Fc16wYee}$bZe;} z)Om}yT6-st+cO2NPk6fGr_ZSWqQX>OWF=YWlmv z8_)mieeB6MzN8=Gpm#;}nAhK4`5nnOe%E0Ci~exQIe(nI+t*yU<}c5@;};K)qO}dx{O>UNppDxn z`S^QbTIU&0dCXRSFTOBk*2X_RW23K!wXS+aybsgP@N?dr-+jTGmtTLKGSaAzj+^O1 z9oL_v{V*Q>{=O~C$|in($45Tf!_djV#6kKn8@(FzH$L)|mww?hO!Fp~ALhUL#&|upuA5@Tt8PZ}9XAec_r@j)?!ivw%AL{Z}h1 z7xoAH{YB!JtZMC@JO;UL-1vwiY-AH8&Ce|c!V^jE+7+;^{m-_Cz_ zWFDMP&(A53KZ{MD0gW@h&R@Loc%*iFyQSw52c!OZk!k#{u&U!4e&)sgY3Ryyt7jhM zgNz5Je(V&r*>S`|vF@uYGunF#X!QpI_@Yuc6+~|G=x5=UKV%JTXl2 zB&*VG;@3ZH*(qw#*}KAuw(@fw9rLj9TbkztiGx1z!v~TFvcIKqZ1@bBzm79efB*XL zU3|)O$Ia-U`Rr;BY;ac_etZ30n3W6fgRDQNenqkBYva@FHnG$BvH!;Xzgw`>^mm0f z`iBIq59Xbs=CzL>pWjqx{?^j-E$$?4^T2LE>Y6aLFM0UuEBs}4=s&J=(#vm~!v19M z|2sOKV*cmy_g^ja0wpWzqkW_{*C7+H>E9<6QU8JBQIr1sj)wWruc7h~2N~CF%|l20 zV68t_bQSxe&XuOkcX|E&>;CDvyp#eo<6BzzES_d!c)_qypbYWy}zW+-XFU%T>bw2TCvd;v_6#A`~yqRw>W)(jQ9Vj zd-FI+s$%^+Q4~bf7KUv$&J4o<14yxN1LP@Av5O_R03sHPvR-XO6cDsQKtz@nQ9u-0 z0a=BXUDgq~ih$5^kpV7Cqllu2jVu=x;myp5Z{*a;PHU#U{yz83Kk})Fcp^@mC$qAu zva+&bdm;V79s1yIx9omnnDO-wUjOplf1$6Q_kUlCipT64N`OGiVe=v_! z^w+$~`G4y4YrQ@}>VaPKW1IOhp2btCW7}8rjgJlLxE>qq;ScS-f42GIGbV*y=6>`Y zD{OTm_3ijGniNafNAdqVuXv|^tuIz}?Vb404V&W@b(wdj|4^QR9>#aMx7@z)>9>Ne zpjWOJt;@sT3+u-5I$n&u>V+v^z2cgIdkzWHF1u>4XTP;C`S$(qKvFCooTT^v?^i$A zP`gi%wRiHU^8ZunXFJemrvGrBQKx@%4XFp8?H3<^h=blmd1~X2`!IheDHgMJerCHO zJmJ@cu->IVef-I-?+RNi^QHUVb;onmwa;Iv)>lmLFYf=@;kW||E7Pq`f7Q)9VH$SC zMyE{wdAX-nec36VPI;Y3Dz@oWr>BeZ(4_}I-al-=Az$nE^uvq#2+c=6eZn%UtQfYr zXO&~G-+6Jb@uq(N%XKH9?H^ISBw9Ri?UVa&Tc5<+l1SZb{~yaewfgIQRN$NLsnqdK zkT_`LqFaRUKy)Y1{N(94j0ewb;yw6b^7-ri_w@5u3sZKv=cu>u^$5pv+yBRsVzJ+0 zx7DV;qIk=-S3i9LCM;+h%S`Rx$ z=3~fsTF1s;NPk%4p2Ls1eY=6M<`XCHwsfQX{o~%!x4rKfP&@wr*@pO8d&eU|>?Y^` zHE#Q@+RQW4znDp_{+*;);F}K2A5`A|`tM(yjBn%C+3KgyIt-ia{N?_|Kl5nV^!AsR z{Oz$LUi0kwTjryY;f9kcn`BsnlgUV~B-F(K0^YHgV@1&mP z*V*dtRWG!jJY)K_8ApWo&icqze_wozy7u`u)$tVV|6d|Md7xxv`?2a#Isb-E5oY=q zmFx89e`J`K?FS7R(K|unVC=`HKgKhDTk2Tc)!6@{KRh%4kwfRa^RDo_Q%|{a-T8ft zXZ!!Cjtj1~RPhH?^lL~Tsqk4_z4-dicz$YU`WKHXb^D|u-{Y80I-WhBobZT)^x-vb zw$_CQT0QN{{K~gFaXoCX2S3bxVc?eA_8tjyZ@=~D$KG}|bvgeJvY(~T=>7NllVXYI zA>MMoCmtWS=@x2GA{GL25bD(4V$lZl=P{o7b$tB2>TUgpd$;=I5<48R^=n&gxz6WT7@>|Gclu>M%)|HT z`RCZLx|1(inNB)CuA9O(-8#ag&rJWp++*W8iF-w^ql3?J(NKBBLGsPkzUT)(X#K&{ zy6~xI81s}*y*dm>?Z3*d?YFHJKA$eO?e+&X_>lM)j&LgMl2c6o=Yo020Jb&w>uRG3u74xyEN?fR1$U;X5# zf4<`;@NEAdN{WT|6Mj{_{?1`^ZB*zqP~6q+ewOr z=fNW1|3~}hx^e1ryg@JQO#hL5z&icmF(39<w{C(ZbZnuYp`GWM}%1mzUT7?3olxT^{?Jq0U>fGX2yB>+Zu4f39ow$%S$i{dJx^kLb%@#~U&-;}HkF z=9?Y$haa@M$}>NCT93MnYc}y7{MO31jD79syFTGyRK>>-6V&WBI9!8Se-1_}vqH(8i7QC0>Vdo_9Kajm~v- z@Q3x5oxA6^Hn=-%u+x!WfBWbY)HnUxS|6OWdj1A<4fHZ(d_~Dx;CROQe|XeWq@PUx zVn%iPa~&^ybmsj5{Sya$QT1PV|I?5eQJ;E-%uB~vSby;M-g;W!{d0J(jO*& zq`Bv@OMN&@`J{XC>}UJE=JC4)d*3sr<1pU{C0ZVDtUv9mxsK**{mTBY^)mg7M|Jx1 zJsP@LolbXs;-J@YW83;1kbH|XKkeWAYE#c}5&hH)>wJ2JyM~weR#dP(che$l z@4JYD-bvhS##O`*T3vXWk3Q5hjCl*m51&romHq$S2I0szHCFrECmtC zN_zjvRBc7E%KbmK(arkTyx7k4FGjL)om4WO_F+1;HQrHlipt|WI(*RT^$FknnvWf0 zoadEqe(WA~s2f&#?1I(bwagu1>O1d#_VF*=OFjGjW118T`$Jls{_MZd^wD|NjgP;w z|LeSogPHyV`G8j6ehGFwVo@3*$V-F~5$BzgN95`!}ntf5`=}hB?pw_?v$_?)T)``=8OISa?4BP4)Wc zRQ;gpEZ$5fwZSIa5r0GTogjT?`VZ%xivF5cIscGK{=aSi*FNcw`FN+}g4*Ar`oqjQ zKY8PdX>-DiJ7>C8H~lR6cK@p}9S3Zy-RMTK%C&ticH&zE{paNw75!cAU0}P${q%Ro z(W}`fer(Wmtck;mG0yX@dt9^CALqw*n6~-mQy*P(k1%chU9bGb5+{&v??0I**w*nH z5%!54uj$vW|5TpsuaTYY|08*gI{lk18G(6tCw7d)LE^DZf4sg~Jk`1lLGwHHFDx>@ z8h%*yrTaG7dW%EC>fx}f@9jU)E6=Wfj`HmP@2IPoM2pRfda9dO+w_m-m+3#6WJ^VV z#v9I~<@o^{hd8Lb=4su0(-EGc`P<|3E$$?4<6_q#8b_j;j!qtN-2%4h-;p?VGyTVMPep$xyygmm@a;I)5Wjcb zapOgPc+59G?-2J_;cLBr)!&-*$Pb?W&5<{>mb-4X(fe0ijC%I^Td0xg(MgKMuM@rh zQQak4T{A!X|4rq`^Fyar-zG9nztr+B!a=d&*Xhsa_2y^0`Ibi?iso;O&$oD#=biAG zr`ZeX53R*d-Qo}1>>0M->09gmbjM4`x9{IONwJ{6U4Iz)ZG7!dva;=_HvhwdU5ACq6|wu6ob@BjSu56z8!bi!Ls9Swf!`n%t<{;S@}eCp2sr}Fj7h-?Fw>LK0*jz9YO3*A7T2iTeZMF(~IH{UIl;roJJj)M)a`GD~8TOR!?ntxdM*fH9E zlKOq5O^@ z^UU-wA~r7kKILm3d`~>aG%1#5#J4=^DdO*irZf4B8{6vdRWHn4`l>%Yd(2zI+!=?T z^^=E=r@p=a=@io_kN=a4|6h2@p6?BfGrppD%l)$U^*0qJeP;R>o{IiXc+DNU`Sv{Q zh@bwc2YM&**v20}56!Rhu{`Ff^%&0(KUl-3Uf6bzV>aFIqHl-ozkb<6H=VK@b?p3G zJ1G{M=Q88#FRXIyojeBZ`Um_`UHZxNFCrEFna7~ig>QZJ>v$2b`N?j+<-t=le`9>U z#iKm$gpY1!FQh-X|Ji@zNB15LGnXIT@r!$u|3ACG*42L4-_yPH{99Y&Absh$5?x^b zpRRm(soI*){4)JJd8AH%uA_tRUCu7YK?mZXcR6c;&ke3H6Az|h8((eaZGQL@Mdq*h z;bfoO{M|J>;VU~FwenSW+4m}T{aYg`7WzofEN(z~1kYDcSeXvgkFe>rj#DJA^_gF$ ze}B%X=&yP4@w{$4>VaPK&1QT>{2=q?`DK3cv>tV>k45x{lTKc1=L;{H7cSajrN)!z zj#AIw{}gI0pvQ`Z{-38WFsWIrUPB1hCw5!>%rDb_Aoo=C?-O3-Jb_bxj0585JPP$e z>qG0te(V_IJkPlFhYj}dhsLcVD}Mbmw}kayxbn-Noc0Im+2^l;q*$1@cY6Ips(z4h z6eU_dA2@D!ClQ`EHlI}dnf`dqBoVZ}uztGe3MMd=D<7pL${K>8l^{ zw_Aro<4bRQ*YzJdfO@t1^Zz@Yq*(Y|CRVApcjBRcbTfauJI;JF{fF{Mt^N(kqklV2 zsNcKpxbY%CJo;mN<466OzfbC0zSgxm*kBKSYuUpdUwiv^?%$fU!%u(j_rH`n_WjFP zQY>~I#J1V5kKX^s<8A*}O^U_z=K7zg zja&r7nUR{!|-FKN!s_-O(5eujFWcM>-n-4yYIY!5@tx4yAK z>R~HVuMWcpe>wk{pI!c1_`qKdxMjN+%IhCGNwL^|Kc;qDk?|D8TU~o6KD+-IdDKy) zzfAv;Jflv3&g;N8J*W!~>$qPPgb&7f=5LSBw>b4dCw!}i4aU64Z}*=JyB$4k$(@h) z8n^d914*&i>+h`bUw?)5Pxk-1-ycQ&W%`fiwJQ3eriG(c z|FydL)`!;dt~*ZM*pF?PCdCr-|Lyv(dEmEp{o|%XVNL&;FtY4r?C+d^Sy7`ueUW)jIUB15H2bqr{<53S=k-R!=Ewkz|+dc5zzqXb+=GU+9I^!9yd3OH4 zlN1ZjL%*E<{yJ4Z7>zH{1&%+Q|A$9jLzwiJ>0fy2^p9Upy_5VHsRt5|ZPT$|>R4R+ z;(1{HzVXMiIxdR$;D^1pIcTY859kZ~q`&yavadcy9gaVIk80S~`qWhvZ*}dR_|Og8 z_E)XP{4)L1JfosNY7j(%$mr zaPyQfb=6-^er@red(E@!uNq0QP~Wb2ul=q+`#iGy1xr%Ljz9B~ zVqu(r|NkFt+Q|p3=&w5Ykv#fO_Wy|2yttch`odE*fB*P=i${6h2_N0eUPym--`gcm zzV)MbhWB3ky?=b*1Cz+N>wm_yKG;?}?MAW6{lskT*Uo>cZO0#M#ccoY&vPsKJE`au zYxSU>^8U{se^$9-mCX+L;kUx9`>y@m^E=OGes=%u!K7I1_`k+E`SLA$ex@3Cg7Mm` zpZm7iev%5)?1tEx{sZ}=I{mqitLdNS?2MllV8?&rpm!2Co9(2CAGG}=t;y4R9;6;N zSmS4$u-=^0cRTMNCx#7AT;jFt4=|`__dn@RipBJpQ{4V-OL(BL%C+h0#D{L?Z-^h= z=qJ;EFpt=HPI)>H>cIEJBP0&Oi*4&?ASsr*xcRjY>KQJgpL$`-JC@t>jHTZi!cqI4 zamW{j>BrvxG^8)~hxG13|J25H5|7u^0nshaBThe={zG}KivBM5X6K)`5roh7Gj!c? z<3)bP1>u2KPv>v`Mbw%4UdIQi7k0jM`g=b5)Sh9Nr~h)nQ9pSOe&ziS+hLj0iyP3x z1CwaM&v@u&{!|j!uJn`XKb%ME^yhpme5;2p;F#2J-Yy6q^iKSSsoH~r@M1JRC$^_kZnzwYL;Z?kWx8qI*slEkRC(~h zO#gX#Zbg6P^E)PVVfz^>k2vUEUUh-@P0{%9Kzm;s)uT`E@@iS1E?<}P_Aa*;-rt(E z-T6=5{FcABCOy8}J8${ve|nc^XU}uJ{|5V;VO!$fb)#6-HT|L+c051qO#hL*N}c|U z!1n;;89&x*STfKRv_2v}@j8t2yp#Io$5yPXgFj6E*xA=VyWx+*)UTX-^){Q7pTE$* zqxHe%^!-mm{h-c6yyek;qv{D_QCu> z?}TTz@)#dKXwRocO`g{CAoZ{nsaJ>LKMq}g?{kh=I(%~1O`ksFmIn3g{r^BxEcX4& zd-eZM8BbAI)iv`&H|)CkW%`fhRZM@zaZ;N+=3}pusl@U3LhD28_u>t9~e%ATtb>n%*eOWyxk2wA?E@*kF@LGMF6%R~*Cw_R%<;1S!vz==6 z^jgRA)W*;G3yY^ov2c7dKjS!!gA??JGgtW3-~QvzcZIW;nzq3eSC!YFbNyke<0#tu z&xZOH#j38o6JK+VAhuy!{LC-YznDoyesENt zc+`!twamJEzGe7`M_S8l^@C@QyMA@XvFm?ElVb7U+WPy`j`|hF%AP+dj|9CFM7K(x zh1yx3@-qF4M-~0yb&_IX9#)SysJ!Nm-Tb;d{K78ZC*cRZ6GrX0#1+Y_!?5u-GoSqW z5zmKBX20XbcYg5|_9Hw0*01A%{QkY8evo-L#Lt>OIsSMj5&SV``WF={`fFZO>oXtv zHPk+cgN$po@)#dK2tSTvTU`5^s6Xs`@n0Le@8`mS_dc-w;34-?$F6@!lVV}NPUm&o z_{Cekewgj#5xP;|tKV$ZHPb)MGb;L{*I*tk_fL+`6~6MC{oQ=imwJlkZ;a2kc$DXz z@NFL0isaQ{*y|@}Kex+;ePO@LKJ?UVH;etq_Wx+Sl>fihKH*Cj<#8mox&NE>nF@n^ z`pfih=aGt@+DCI4$%9Y6p_4eg<|ATz^;=#vFY^xzA3H|tU+YnKqW*BeF>m?kHJctC z4!Qm175998Me5l3x2Vr_l3xGZkUnkR$`fz7roZO0%BP+aJDy*re zrpNll>7PAqzB;b3s%xvKx?$UPYe)|B&h#%vwDFw8ZM>mwKF2>p_{wV@-OXnltEc^& zKOLWMaphaS6Mn^BNPn1q-_ctCuop+n!pFIOv`5%x2s=eky$WujFYx4^j^s?BQ=s zx&5ebd}Ny=T2m*VHG0X8PcxpKe;Z4RWl_)nPzOXe?0=&FU_M~2{_^we^87HBc(IF6m||3i6ZMSqujv*Z7of~Hf{2la`A-bHzRV)x<`C%$q)!&eQLgTO#9zXqz;jrH9 zkN@WI6Kw=O-Qa_9-Aw<{JOf?e*>)?U<^FAc zwiBqlMZW)Ge>QlwFwYn(tk;a{%{r+9Y_3F3zsGaFQmSKYVbrr8j@3Gd1i!@|U-MbLu8udG`HRe^M-_$J7hvXLn$_(DX|s&e}U33DRff z|6i2nJHlh0nf^s*HXc0YF|V8NiN`oEDVEy%Z^wM>PLWoB{DDV5*mYRPQ-9cavAfnC z+hS6f``Nu8Yu~rC*E$^k?RniuiX}ZfyZis@FW(t|K9Oj4+;H;9P{-jseC%iY;Lr4T zNw!q-810t>kpkoY`bb4JJY|YVEPj$Z$$Dv z5Nm;Yt98e{Q@brlf0_Pi?lJvcZk5}S{Qy4O&CnHe`#jc+>RTRqDaL;67};Kij7L3e z#W+8izc`I!}=C6(w5gX4hZAqfgDJ zuT1}To?Fph`SJaP>-GccfnM{?rVmB@pv|}FChJG@(Z}q6(H{=F`%h0_@TJwmLI3@S zvo~Ds6zbdWPsfsCS>*FS%uoBX-+$pxwQfU@d1v}}a!*Bn@)WrbrFS`d0exIy+VKM( z^+Dzd;_roC>yu|T{s?>33+vte!Xr;FeSFyH;2XZQ+WzJ1FXw;Kq*&O`(&FX!jzMfg z)kWCl+BV@<&$RqCdP;^3cWh(}ws> zALa48Klq^Wv@ZT$80UExjf<_=t6po_XSToLN27bUmb?4b7yo$seCnJ216m*a_kaE! z%11Q)(cR_V?EL?xg3K${AAgMAMS0Y%(cg||jhZ~I=RxXWE5?1qcDU$|!;c^Lk0TOY zO#i*b!RH-&p08Zo|K|knG_K={Rj#Q6R=)pgYkoApZ2uq5Bc{Joo??7HC)S_w;(2<_ z#}5z0561eF58rI9Tj!@<9fl7bwdKPv{dI0Q{Mx4un7{iNb?y9rCn*-*cYF8G|9_A^ zBrDU=$s=?l9?y^dGX3Y}85R9?J6C>(;Bt03j(540*&~9@?+8*Kv_7=1`TNJ`Tbw>X zCw!}i4Z;J}Prb0iUk=>*%FB-kJDvT*4{!UsaXDwqu%SmD>Sc^Z9PS<9bd!#;wjm+Kdyn^`kew@ZM{~&TriJy)`#ElR9?%A4rPD&i}8W z|Nq`lzoJ;>8Xdrw7}!O4e;!^(Ut`I%=9m2 zQoH}_=VSbfbm{%BGx@NGLXzUs;Mrp@7Q zzU9GFH2=W(e2YhU-U+`luZ8rtrrf`N^T2N(-I~73`-Zoev=?>k`p*Ha4^Cdh`)_N% zd=8~P+cUN^{Tum+75%9b|Nf|8XME7*+2!kl>E;S|dEP?nF+M!d`fJzZX*~~84;$>^ zZ%uvIYgg^MRkP(@zVgsFZ8Vp9_WDb{qTT;I@+UP5^W%40*md*E^iT6rb^3E2)^uvi zz`TBXr}mg&^HxFlpm*Z0;E?#?#c2IAeZUjIixA!5VS|hZ z;_ro?{uvLu27A>DQ(OD(f7=H)&+x?4-m_+T{{It6vCzNa^7{V^sm4JkcD!bO_Wx_j zV>=Va&h+o(HR|-|K2FTbwqu%SWqGO#anQRwS{|3+f$%`9+pfuT;`d*kl~-=9~h)qg!1qoPwPe(wZ{ zgEp?l&DQ?mf$R_P%+I{315VH%wtV>?|9Qc0hC=J)n}53Bf6Ze&d;c>g^WnJ5@y7H^ zHO}~o;w|^b%I}XFQJ&Vx^dHD0wfb+9M;F+SsROzqY_GGmx@n)x7anNy)4Jv-Psj1# zBKoNp?w|Gc)8@T85FSf@{LOcLw?n; zKlNiI4tf{mscm`fDBcj8cwEPm4=$qqe+ld!==7oN9fJ9)GS$N!-`qoTjdz1jKy`12P!fv%uc_y5T6)sE40X5M{Cu^2D5 zoyI5V59gjUecIo@ep2}E>;KsDnsdtEzwf1W(+Ii$k5j**Sk<*{&${bR=({bs+3{yM ze^k+5dHj!q>C})sw!3#?69?&oxY=449%%KnzWJ4Jb#y$dhYi;7sb|NZmzFr|^e5g^ z)qfx<7CZm5z5D^Zji>d6Rj%VWx|v_+#XRXJ(|=xGrJ}#fy_tVR5WXF!Q}KJJsJ!O+ zV#5cm5AEOl1LN~8u6)aP!mrp1=`YVeFZbPTPdTxC|M@YkJFgq959%ulE7M2&Gh5?! z|7ZG7g~9QQx|#kXd8AH%UPs`w{S37}anQTG>H@FZ^iju83wq+6pw%Hy{luBK*~Dx3 z)C+e#d*9r?_n#E*zw1{wyl~hN)Uns!!K7F!-$PPYQCQ`gI^Ky7-OQhg-#bD2$@Cx1 zJr(^KZ@8OZ+5aP6^Mr0b<5(ZcH-CG4zQw5zI^kPAY%u0U{;=BoKV7=t?{5ri-15TT zK7Pth)UoRi2em%9O7Z>giR`zA(KwQo>8Rt`^$%*BZses&v1Iy>f1ub>(}}&!U3@>`OF_w9>1f74|*qc>JVOx#%CPr z#P&k^!!EmiD7|4|gRtvw?|pxA!I|h`_kU|A#ln7&J~969FN9U@?`CUXmHYqDFPKV3 zrhnn7?Elnh2@hT9%Xh@@ogi^A_G2?&@{Qk?I`GU-zfR-W;KKSFl0V`5|ED^>qFCkH zJ9*6fnyQ=m+ZtCM^UL%vJQe+w$N#9p=XqnOIuHkaG!A`%@Ic$HQJ?UsXQ<g9L zhi~5c!#9?`>DBOq6;5ewaO)A|+xdrqq*xa9{ikG=&j)m~ewddc^Um~d51qThc46+tNB(kPuzLN; znAQc`YBQdqc+0iT%Zbn4|EOIzzfAu$$nc%3{tjvmCpPC-kWy9hF0(E3w8eqopO z@q=-mcamX#Y(?_wFl@B{;U9kD9iIpr-EiPz=@Pqn&9nP2jV8sy_j!KH;tSx~>!0aE zvN9cYezyNRt=CXQueR8k{>7tO{WYKY*l{O~>M1Y&za-1kIQ@ad>kyt{TnAgRt`7b% z^{3yRd-<_{4O1VwWVzF?-IBVd|4>paTuFs>r?KLq zJ0z_2{p~i|`lRQmXV*V;l49Zg!NOku6ZeB|=4bv+>eElAe}C?&)1S|Q@GJULcTnms z^6&4Q%L`Qx{Nxt?>}EGZotuq=bh-{ zwSH{#yCLdZep{H#Gt++{_tfd%+^CyxbyD$DpL(Eo5;q%N6!C+0J|pUn`qVR|o{qDy z{;KMzxf;gxZ;3IsBfQt2a;m3_dl!34`8gnhQ@V#vC6e~^4RWwNT1}T z5@+6-{)4%vPJjAS;c9jOb&?c+|rNd-%iLU3T8`)JL=TKl{Eg zbo_)#K^Z60e<*)szdMfk*zspf`vu!- zH@Z=*Os75`XZ zO#gX#Mn!+^x2eA)gU|kA7}Yag<;v0*S~d=VzKw1 zD^KwI_qNudPtb{<`DOZ#=AJtJg}p$$As*AqiA_D~V1vvP#NP|y8|ru!JL;pZZdmJ^ z8~)+mGbV*~Zr=a!T@T!VI(GiKBl9u+`TjRmKj_4c*IxaV_n-0hNF_7Ve=L85ZrQSXPzKDFn)fUKiY4oOC7UaQY>|T>V-{ydeViPU%PbJ zY~J?EoPE-(@a_EDsMZ0|&8c5eta42quW@uUKXnx8r`5M*QKU})MgIQx0_lnRAbnM^ zGd|z)(7z6yO18XUU|IWy4U^>m---mf%tnNd_yNARBZM4suw1W zy>aJ{KRy_i+j_f$XW#y1uX*Xkmv%(jTy>^m#WVAK2iJ z-wMw(&bju^6Aa zI@ffED;el4qMdOjLeh)69pK(I-kx!qn%qlB}u==HQ zCw(bU&))yHlVY*`b-CjF+fU$u!YbEnN3f~7Vb{$s)4!8fsneh1BmG;QG|wuJKk&Hj z1wLqfsBZXsVVvh(K5Dt&d6!#dAHC{@S-bVW?UhxQ46A)|9Q;MknOK1*<~M<^KYrv*X>K6ng0EGq)z|%eoD4qbhMrmq#o!aKel0-6pQiI zX8z`9o=*54oS;9f{p5$<^U~-tVV#xV@z|GE*@*G%_%op6fc*ZYP^qeuu* z7kD4j5SzYjT;(;dmpFZc#?!jy9~hr+aphaS6Mn^BNPjr@`DGuxe`GM6`}yH-+&;D) zb?p1!L9GwUAHW~&sjn!kOgHV%Y>nINFa5WLfo{w%(|;(B)alQ4MDWQs)cV9h@A9e( zY&ZI-L93{fF~M75$yWIS+1nG}I49yymAR4j;5WbbRw`9qbt6JkNMq z$Lxjlhs)l+`k_l-eRlZa-%kJWpZ{8(|M^H#ET%)h+T8y~`((e9=s(&2o#xXg`D!!I zO#gW~)5dkVmDw&|my2z`RD5Pf^*F9kAB=Ux-wWeB?{s|Tg{|1DUYInv?C28@d?hTu z!gjOooqD0yJUjlhlVV{W=~>17KbJIp`ZeAVjMv_Yr}Fzd>ZTH>&rJW3+++2f#JwWd zaltp;({3JdkbJZIl&^>%WWPdh^ONVg$AiZ>hBbWZg)J_A<}cSDyif3VZSv;JPh5*U zyZ)rD{&6ySbbsl_HlTX1@B3Uth7_Oz*NLv&GIo4-{r%$Nx18{Wm}_P*~;KJ9!N9zOh#SRQQ?x zWBH?s{!ZeR^FOKhZ96Cr-9aDut&h6x)z-RE-14*@btmc%+s`_C^K0L}LD=pSbB@0) zEkFP6tK%1HWO{(~6^$?K>YBdL4cqk6`Eh(}^~s5%TK!j*F-+G~?6@9r&^zIo&A5vA zLG~-FtNGSHHb^~eu!ldK@~bhy7Uv}T$SNh$0Ui0ku z&-PPHSJ3-!>%7=+ZGTaobd^1CsJF6sDt#)S{<8hQ$f(s{>%sSG*CF@4hYwmG(fGvc zFwV1awT{`^S6vgo7qE3bKYH@_~={OS07i^t_Abf04{b7@x zzudq0XC4ikec(M8U3lQbUgP%uvy&7{uj7xfGTjdg3tI&L`P!L>$a^}F!dbFcaS>`#^Y ze_Z-C-8*Ve|L5m_=ug<0E>0ey8*!UgTjI<+)4xCWRP@(=xsM`zwzr}3h=bncRTnsJ zDUW{Of!^iptdF*!`N?zQ_uvHm;n)vuzu|hn8xE(Rx#Gj0*sIiQAFbc$#&n+At{cUw zu9+XYVS6Xscz&7w19_xYf4z^f>EUvAxt*y`9P}<{EugebhalsPD-KC#wHR zZ#!zS#g|UPiA#O%<0nmCk-B#MPbVoBdmgN*?>`w&QCQVA{o3`Z{-SlH22!`ZscR#s^(tTHN}L z{L};CfmToZGk?^t*Sgd(dm;U;)n9mJ=l@>k6RkBq5bpivHmg&|zJF(4is?i_s(cekj>XDbKA9P})S5tY|Aay|ey%4^kj#sgxKI-a* z6<@#g@MHEkKTQ7ZwDaD7`c~AbJN~2(9+hu^BkXw8k@?2^i+D2~b^blCLu*JzrvJSB zQAL01OmzR(eENwo)`!;vc*L>2%e&0_bosiRr+2xv@c!1ct9DxF%RgAMHEr*kmOu6I z^Qmv2zlMr(_P;lNlhunWAWU!7O<3icI-p(ui4LjOZ3{BLO#hMGQ>TCB`djLOPU^v1 zbxr?{^ke(`a_8jBx9arw zI<91u`$2QI)}hZ-?S>%p%=914Jr(_3?#<4N$G?BneCx**wEzDc|7XuXbLa1!{_ECi z*G>81t3!jP|K2hWJMTQEc3Ux;w|L7nb--pSF>KSTPyJdi(|;`YRP{Typu+e_kopV_y zYIWHLQKH%QP`kO7@||L;NS@l6{)MMbe?CVu zFY3og9Q01ci*5YuHJsxuu6$&OdD#1Z##c;F zF7E%3+VMmE(p9FnjuYGHM%+;InRljtQK43UJ&y!G?t}h`gN$c(Us5c!{C0PoeE4R& z8s0+sTPys2w->K`{u`|oSAO>n|M}yT`nLZMX?-yK^Z%a`cDc5CQ?w5Aj_0R#rhg-6 z*6P2#)G-}XvDtsU6Pq{~`{@gW2UewmNyF{jx7x3qDc#N)NawXbFn2IGQ^$9yyW)0}DJx!fwBSC!Y_hQ>i1kNTi@ z!ZTZWjE^6*=T%e>KJ^T#r{gTFzqQgMD?WYA7Q?NTo;>@#SNG4MzFq$_m=p`wo%#1n zDn5YO`_YER8DDMjX1b~0e*X*4jh~n4-_9eo`Zp!7qLYrxc*H^PqCEOgga@)eupP`# zp4Ou-nP zVU@3*v-iMz&kn2J|HABVeB*WM_k91^NQwph#47jq*sk1vyRG@!2lLGI@6RI@{hjg^ z;nS~S#QR_W{+;^Tx5utQ`mDn+?H?a~&pNkd*FQY9*PUN)9zZ?QzmpUT$D5SjU!WgI zKZ=r7ZjXxoPU|Y4{xbas@<^TjT*n2U?QiJv?D9Ob@m%4y&&9UAqVbL27Cv>&uY9ld z3@7MsO@HW$_QwyK(VG6)gFiXop!ZVGu0I(|ily@T%c);ctWs~YHI8mJ@0$5#`VZ#0 zwff7?vr8SdzK;;SK;p4&{S>2?+YSD|TmP`?OZRQE^%jSO_3lVdeetk4jBERUG%x%8 zbzf2}^epePy-bp;Rt$*ukohphHbB#^sV#F z^dHVkRrH59l1CTNzaci;(exP=fAbPS`UZ`sb_>rHyIt;7)Xzb(j?|DyH^_q(o-*VTP%)`EaZ0mT6z3#s)>~cS~di?&K{^(bc z{xbbX^BQ&f^F9o|cR9N{9=!OtX?Zo{OSSU(1s`NShE6gnw))Ad!?5Yo_g(X+hp!Kt zZTj?#ryeMeKcCRLV-ap!-2eZ0`+S&cJerSqGhNi*bP=x|*{Rr>{>6xO`tv;w^Ree& zqq`n)&^w8nt$ooSelUK0GCz4*kGj+`TjS)_VQcb+o6X&6`Q2Mnp1tAtZ|;9R^Rep> z3pFww!FEzC75#NQiI!{nLO1Mqe%P%(FZKJShIfj6irNSBsq2G&o7YMne9-#Py5=7o zpKo#c0G;*~+Y9LrYreGa7e009C1LGv&foC-m&(t-`)b`zgf-tk#`BS^^0;85^wpfEar#NsRzCB~^mlor zPXFdId9*w}dzV|~cDC`9*L<=&&N%Qu^N$H1J4X24b=RG!KloE8Eq3f^8p7Z1y!fHR zQtEQwSU*q4QKX-EzY|ubgX(LxlSlUZN3}W5!KYrPf3Za>`fFXTYp_1rQlHmbPyG?` zH@_zcA2c3y8^WUwycn%t@?vya2PBRSsy|F#>g~g?{NT_q^&3a;@J|1Bbm8+a_W=dz z2faWUbsYV59PUHn8NVSJ@q9D=(>zkEKlPT?{DPhFK_`CtY$^{MjCI7{3*$Vmf7U+WHGi2`%j&s&JEEB1s?|TuqYLcEQ6G#)JrI8#!UOGiruEIwc&=!}o>x|P5&dD+ zuU@sq8Xwv$ta`v|FNM)Zsbk+i&eL%^iroLUp|%r@*VF-~bf8(@YAGkc*-LC%}lfL*oY4dHUAJp;1 zTdvU?-Mo{S)zLicO#h*rQLDdxjxasaoSpI00_^pNIOv_k&1PIh{2+Y#GCz4*&v-R{ z^6D^bedxzte{t~m;GbCL&?9djrJlY28Ayu7u78`XpTFAb2N_>cqM0sE9yR0NU#H@a z=a=a}oJVT)kMDo(v`&Qae7(l;V;eS-VzGECHv5nHo%&;B{;|J@9Zr9f`@oS8J{-Qj z$Kp4gdGqfX&p!VSB*kL-^Z7SbzoJ;>+Bx2CMw_p8W zL+w66*51jZ=7xgUhN<}JGt+-0_f+)Px|RDMGA^h*>Ve+nRm<1qSVwpu$6MgMX{=DtH6{U_WtjXQN1u4myZ=O9?eTxv;`8tN%+C-VYtVlGPJf{4M}L|AWBClMEU zeQZB%h~GOw>VdJJd4ljjUYFuHHuVg(KeJs@EWP-xWzXB<)bD)m@YW>%o3C$u*o*LO z|1Z?Y^Z++6^xwexYwI}Cc;e0Sv@dkSwtS6axB8mWcYjou`h(&(ol@n2PHp8iKP5JN z(E8Bv&EFWGZ*k>YKI7rX27CC!v{uU@_FiVzDnU14LvG}d` zEPeoB@Ap#iv-XZhg4lNbKmD{d9@&}xg{N-+ublrRKgJQM-~6^9`hm<7G=FD&zQyBl zobajRu|fFQp!!4mygQ%nyS@?HC$9L!7cP3$YrOXRuMPG7GyO@ipp#hHc1n39NMH8- zi&KB2I}R_?zhF?;7uEvLqyMenU!#j6`hxLs&HVJUu*f(uU;Sa$Lw|nYXEzRp*=K!m z-sX2c;FV{`pGHzF*3YsF_xpGB1e>ZGw$;=6%rDcw@YL$xC;hZS19Jao##Iznxu2S? zar^#PZJsCS$NVz=(rk$Mehd@8lUap3AKTw&SW|n;s3Z*>Ak-jvFu52U`&y zX!W9arGFi7qW)IjbN9@j^rL%PlU^SFzFT!u`meqIc?bRdOIzk?&wu8l{YW&^O>O)C zYk1^iHzXs|zdwIe(Vsec9;u>}=A#2~&}+WgjH`$rH2phCv3RFQ9yT~Zf2(h9|M0iJ z_&GQOfj7uavt6c6(QIc{TT`VZtx8_(s| z0^2by%yA@5_OqQpc%ap5bk~DVJwxX2guk%s7GZlvUv7`BA`VZ#JI{nFSPSSex(^1l#+@K*`m)bo8n^w6Jgt-IKa}Uz>Cg8F%*T!sjXW#M(|W`~@ABvZ=Pekwj^7qO z^}MUGd-xf@4(<1kQ{J`3=UzJdPR6t2&zO!2w)%@3Aew$@)R)djyrmw^H6mU-KeaRc zhjV7F{xhWx^Qq|Xy5q)+{LB-C2U=at^PtYp?0?Z8_Wtk}|KkH^HNxSizGK%vAG|01 znEs8VSm-AmP+Rrlc_7|QH|3k{?;{rD4R z-f~*=;Oq7XvwykII&ZmnNt@q3s#kx6E51KJ0kKPE|98TR*H&-3)=_=XojCO}{YP@9 z>FG2N92OgW$uo?2&C|O1mPa3o=I@Npw|JE2o$zfQ*dTmtQ2k+*?_GTOj7d9%S$EuV z>^<95Uw_AvVzJL*ZMCVRDBesrt!uW%`Q18k+ix4fWPX|cqq(O}e_>~F`q2D%dr~ic ze`S8{lYa2`LimPGI;z;}?^Q3nZ`J3X{`*nEu2Sn_J5W%*#8mLHsesacskZq*%zS-JbO0gztMG_3AK8 z{jas(^!(3$F4fBYx99uMXk79^bO+JRlh+U?ar$fZiNK~me8tN3&uy*KQH*%a|L*oP zj#r)`{R|5qUX1i>dDQ8XyhZhg85{1r?M;_n8)i)#nzq>m2T|X?|LWJe;N;@|8(Xly zg2KwSpW~6B)zj-1h?7Sg%=9m2Qqf=M$90V8V*6=B$8&oG2V(CRAB{N(94 z)TNHu#C!0=0hi3a^U!OL2#0+9y7yf7@)&iRdnd(WeKf?j`%kJ(T%y&rcjB|_&*>)> zKjTp+)4#~5=wEoV?W^OO9&N4DQKY`|IPV7^v~iV>zZb@N-bLeLEB2}vrft6Y)JNCc zBTQR=*DJF>!1kI~d;Dol&o2OS=wPVxNTS8+&5!_gW&dwWJk!5Wu2%ms;W00(=frRN zD6e^IH@`No7&Y6zL*w%+d3`#bQ;h9}^oQ+^Ug5g$Jm$g z`QrTBil$#%;;hM!ZT3U&Boc2o>`ebOXH@jp`8A8^0-s}DM$Q)R@^y*m1$vi9$H_xC zorm$&Ww7#PCk*Z%&taAS|Tl;P5`A_d8V$UDu(-wcGe+7HC&lSVl=Npd=#=M2}x2Dhk%@<#L^y=2ipSfqL8SdAn|HqPIVSh99 z+1-badTKjQeZ_)2&dTboFG02+NPL0gPk+v=)nDgn`Ki?9`Qx3~P31LzEH-@5#*OO@ zj?b^;={O$LI%Y4VKWzM#+m^UzRU-tStkQ7U=>p#f@C98ZsRL;LO zv@U(Co$dbvd8DF0ytd?3^dJr@ubFo9>+gWYtM^p8KPVHgA=2rz{9ZWynUc@;r zV#gTgc_;PFj}2;lj}59ntoy*+y{0a=R9OGxOHSMIpmP7;J1G{^pM|)VJ^7J30>N#{d5*VU=s|^_E6Pn+f5vUi{`g0i%5Hqsn*EzY=lpV{iNE&x z!?upgd=hvj3?M(lX+*7B&u(P<6M-~0mZ+bO^hYd1M5PvU(Z%7}^ z13T)Yu5MU$pReD2!rjAR_Pu+(?~fzrQP=i=>MEuy>HYuPGB4Xt7*}}`&2& zFTQ_2-R9kn=Iz90?bWvH579f$OC=-Ie=L7gr$6`ctLWd6eD4H_gRvjmwpXV+&)#1& zB0v2*@q2KB{xJ2((=MHM?vyb7=$~GB-qU-sAKLW~^K=|Vn@_5KMX}1Y%}e`2H}8Z~ zH@{Y2CDL3xEHaP|U2NKl4qcGV`(L zVMqLo&-QI@-p#i>>M7#yg;t+@7mbUp*sETce*EOe-ZpS`a4$W++*@|IgZg&-Pqn^c zdS1=%Pjx(DFYx-Pe$!uVo@eMspV|IjFlzPhB*g-seA_?JrC)f>4|MY_51t}^VgEne z|5rYL&6`#^`tdMhudQCX^yEuS|NW#B``&#_?Y3g08^tPLZ)3aX@n7p^`WKb!^yj`N z=*50+=wxJg&HKdm>bJZ;^(&fxNc`9_qK|jNGo;=k`oo&juJFm#+DizNI#! zzWx5FqvI&{y#KR~FInaDpz{7F6$ayZCw8WPnm?-O?{aT;{!_2xLKk};6#kL~*O)7S#e-SbLi93n2<~)Q~KZt(4kiNiPeBz8lp4m|!_{pooFzNK^ zJ1+Cyr4n6Cf3E*bCBXALy>)`uA3CiArfNs}X8L#Xi0SWgE3=i?+)QldQ+NC)4tf{m zsZCwRGk!z(#LXY|>*G4uU=M!SY~AC|_|+bpg}KS^{<79$&oG|de|0n|7CZmbR@-%> zSk<-FQ{B814%?YHnCai2Gtk4k+$xVN&6e1vM=Cb^hj(HV2V+0uf$%{1^lAPcdRm@K ziiP@h{;=9e<0D7EzFS!1kiFM=>84%a+ws4lb-=#j=cjAH1BF$t@iQ*E#r}AHnf?QL zm5Tm4@5=s9U!cpg%j0}SH?3oRs0|PFF0WEP@61oWE5hY@kO}%*bIv_w@azYVXsxyP zg~#0SANSFhz5g80alnlyc>e+R|F}==RL05lAIu-s>L0)Uwk0oq|A}6pcfyNp=1acC zsjK~&Uv26cPS78AI^>-8Yll7@c7Ax-{C_yv9~brf8}pD^mTUaHZ?S%(`7v&$|4^P? zr+@AFpH7~Y<+*&F;hv(z&^ zsM8U@cY?&h*pF>EkQ9sYsjK~&Uv1072A$MFM??II)C)6ioxkI&uRRoIt-ko=UpzGg zzuD0`gAs1EaK|6&favC()@v)$x7wNh^YTbVf0ui+_y5uTJneNiEyCsdB;_@KuYM=U zyy*)x|426vKM3DzJwxWPi2jhCbNGuFec_6bPP^iyKvFFB{qGz-|Ikps zqFCkH`qaKE=l{vqI67wfkK|SA^rvn!$|IjRdht0J8-xeq?}hLUoy@sntG`#hF!iCU z9{u#iw}xpuY<+t6J($-#(;uA_(|z^-7y5NvFycux(~)}d?=RHPafd!L{YR5*snvfl zkCyLmY~Fq1_fAoH&BMi}J{aejU*~}xqt%bci*5So;SZZ1`RA|oKeSeu`~1KyPyTrx z_4)rt{=WdEA9OQh97UORrlZ=G*Wa4{GX2N$+=~9xAIhWuME{7_Jg%E>{lily(j z;!fhs%h%Zp=?^oHJ~R8<**}C;m;Lsgvp1YhJ+41NchLIiPl^Sd#L9F_c_c`mcK*$E z`+Y}j>bCmi0~U3?lQ`H^8$Pf1hEd#jk)OUmc%ao|J83?Aj}6AWh4i;3J$lEDFCDgc zYx%{Wyz!3d)2MIfeORmf7s>syX^V)a|XjM|5)R;o#t=HcpQIBhkh9^ z9V@PX7>fPmRrY`F&;I|2exvzu9Lw}C7*-#?&X41m>Cs4v#kPa;iGx1!TfbTt9%%KV zxaDa*>Q2<(n*QepURn3L6IyQXKRq_{^4Zk0{eLtm7CZm5b#ecnrL8}mzp(zv{$F?h zPn{q2GX0B+wfe6nBbiQZ?XRQg1c`$_@?#rzl46PZ@$qq@I@>r-#-UEw^38XgwA^pb z4$b@4efFx4*nLgx{eMUK;Bv+J&qLA6P<0e`x%N&TS3ZBG;*aN->0d-D`a9t_w-Pj+ zimjDxcTjo6K_B^qsp#)=Z}$0j8$tM1NBi)uJ8rzlZ@To=@Kf=7Cunt)kKb?+{nTqMJ9@$!xBVk& zO?td<=Dg_#(pT;IpSJV^4-{{?#!r9fX8u&T#I2vcR(~F;=&$puoPTI&9WdfGf2(mP z$T(hY`myKxST_$o2;cIfI*aOWO@8&)Phaq*)moG9eDafz-**i8_WcX<0+)%tf7JOV zH4FW69|r2%@vJS0%rD#j2XaqEf0uhpKYXi`7Ge9mM}G5z;)f5?7l>cjKa7+wOm*{8%^o z%k&@2bJ5SkSL8m@@Z&m+PaLF<*}C7<^3$YPs9(u*8V9L|4fgPdx!;}k-7jBwVc6`| z552PY3#WL^GyVIMVlh2<|I=2#qF9-3jz@yn=*E1R7s&iF{fF{Q8_(rdX6rnfD~S!C zd_(0C2fd5(lt*27Abj2@n4dhYXS@me!b4d zCw!}i4aU64AKtsdru%btMxp{ys*I@ z{xECrd%pbSo_`6mE*iRak7vr~c_%3r&bxuuN58~PC&p=rpEdo~-hYX_R5CLCNAgEj z*NNZi%n{rA(DB&rUirj9@`;}4)T_g=_HJ)n{)Y!&32XOX`;IAxewODk*Z*_e0jXp9^p8Kjc+2NQ?dRW) zFsa+>lMlF7KbjYH;Cmm+4=OWco+^ruG5fbkRE0h1dL9 zQY`R6`UK7Y|55km0diF3`hO{SK}8+*5JEU41d>3Y+4oSVIYR;jXdvu}4I(=-Y?noZ zQDjFP5EYRtgRHWS3y3Hf1X%GRPtV!ndm4_w&}wsgxb&UcbNZul}f~ z-|ac?dAg>$rn;)SfByLv*YUlRJi}4?!!tw2On7|eV0eCmC+2OtPI>?7rRdO>F@H|I{i5hPQL9I+j&)1=kj%ig9RkbYX!*%y%T@j z595*-qvgkX$9Bw*4QhUv`ro^TkG%3gSbgBY?`^sFSzhCI{(mSb7Q6nZrFJ?J#TqvM zsGrL54?N_lo$24mxzNG8+{$e7dc;N-^AmRxr=I(G$Oq%P*pD4!T;~}t9$)=&eQbv< zfAY6W(_I%0Gba9Np+)|-Dg1W))1MRz|M&Pw#r0R??Q^Luan@|tW@}$|{+~Wm$zVL{ zGX0AP`gu1puQ^F<=0TmI<`V~f)v*+*8PeT&fk1@YY|5hHU)jvob`P3V_k#Wmo zT#Z|Q>L(Ah_cMxH9Xw#nzkvR*{n%IL&bxonu>HIz-rnRG=e5qRzilVQV%z)D`uy{T z`W3}0*WSr%u(_$)^x2B+RP0RucK)hX|CJ@rbc*`$Ze-l@7`Fz=15JPJ)BM!yIE**i z_a$uo(MjKZ(H$Ljd2!`qcisMIcsTwvejP8pbitng0}p8DKj}YBiiOWl=9lT;lLu<` zpDuOeQ*Yac8yUB}$nVvrKd#rceAS(}`RSi}aFqVA^4aID{pJaC!=&rSUccC8tI(J2 z|A&%dseJx2uA*d>Yx?#Y=em7t(~Z7cQk&`Do4=~)?{aUs*!iebe73zRpXt>vact1~ zi}H!rVO-~(@S7i7v5p6SSbW#Le)i$7T^p9Tbjz17eda~4b$0%_krWI2*R)%&f9_E~ zsCD!q(M(76+ws577eDU<{bc(0<+XMCHWvKk{f!^g+!|GgNT7O<`%Wr8s z?l-ozj(Njl*df1r+}WT1+waC-(!Iiu-uU^@;MLUG_5b}zvDo&uRrLLf@=I2A?Vb40 z4IAC*n|WvY_vaNA{h5a%=QmCFMqX9E-|)rH%f-*QApTBBzM(6IEsu|}6+7{T!|&T@ z#c4ku2*+*ulP{hA(f0q5pE~KLTzl=Ka{i}Fa?p=HGyMnhNJW3;;l4-m zsW&8VPDYX~%|1c$LHY&F-|E&Hu^ z;pw>lPm^Ma_q)_fwA>!;{!{vCX+3sB>`ebT`Kvnpn`s^$wja^DSby{az00eH)iJIj zd0;#*^Hb*vx^n+DN`Lp5PyOetnb*JEJ?3|duXpjUZ$v-a{}1UnU@IC|`w(xr_Ubpj z?IngC?|+zYrvKbLvsVBA+t*)pN#FlA{a5uDbr1y&f;Gf8JB|LU*vG zem4@uD%al0YxHjVk#bT}t z+v>1ECp?une)1ZU7uJ1h*=uI(e|K1Sps~YmZ=Y;>asIPc^6c}M-yiHzKj_4c*T%>1 zFKZs#J8||Onf^mLN3H%Ugy{hv*p*k8`HlA;`W3~>bkja!d%>@No>7$6>aYJtSM-SHPaLGL zIwTMLU(?@-621n*#1Aa`-hRJ6H%z?ohJANg|Fh)T_2)xc54O~f_{3YT<9T*G|De3x zT{F6JmWuvPxGUG6sNaJ{^#W}Fnn$1L4#xV&{ANevzF&X1`iv%`^@Ckvv-VD2F~4S0?Z{6Y%=9lZYW3IgOs9tOg4zdh(CfHn zYaZj`2l2CCtoW4=UdvxVKYZb=|M zKjz`E-ATv2bA#amCu9lVS^p~ z-76ja;!wKLb=?#1yYerG-Eld5w*OC)Vqt!1vHN-P4D#qlvZ`z1=!VVv7SAu!zb9v@ z)nD)9HeFjXrtKe{*u+5}`K=Gthdj{wi{e(NeDIFeA9ix`G-+JO>ejst|O#j|Iv!cJtz1jIR{T)g2O{cU7C*BDX2V*}r z<1?P+r@}+r{Q7=ay&IYLe*IzkQVX4P%w1Q9Sr>f#xI-r#$aZeuf6Y;TMeD<{f3)?H7VdxF^@nZcr%%wS9~`AW9CYmES3mvq zEn&YI=U?)EEAw>h$OD zjePI&>@45q>vBBunp=yXe2~6C{E}V$|BbkJl0jXqt$EbdVc77scl&{?&(*1jpF{( zKKcSBIv@R=M4I~hJC*IGC2{)6^q-S^YW0uw;`5vB9sZ`~z3<=O>CZ?17u)~))R}&~ z%UQE|xqMyf6jpfs`h8x%eed8tc;Owh?>o)AJeuui_Y$=4-G|iX`^QvmMX^f$*lsp7 zpZ;6(w=@0c=8;^$*;1?j+Ih6xKH2{nE~x&U z-mjL7Q+Z#ga~cOn>F-|g{vBW0@zp!KSG;5Yb^m$T$zJ);pX<&+(?1$Fsafp(RNb(d z7YJ{r|4>d<(cj4cmG58F??Kf8K9G29TR-ijSS;QWJMPc&oyNgY>K}Ih@d`IJ9(*|L zdEcb1uJ}x^S3a&=t?d8li}4hto9f#5^!Oi}d4bHkyK81aQmcRb{5SgPk+IQ->#dv5 zsox1Q4tb#Yheqb%2g&!!XQ(=ut#Rt=Fl_SZt1sVh{K{dIYwvqx+sn)6f9{(Ui}5Lc zx<>K)n@ib#NBxprT~p8f*Kr^7(Z66c{iAs`-)yVjF+FA2gjKUdwk$ zv6xLCk)QGEFzo)7?KVC7ZGB39_IIy5cK)HI<0z(I)Z?EX^(%^%)i>1d6uShQ+sxn3 zcwn~u7nP=eVGZj`zei?1)Wt{~^vV<4_!-aQ+BdxBr=R~N4C}wT+HKb#GbU{G&O5CK zx4VIU?EkMcDHgW(^xES4mou*T4JXw2y=-mz?zAyN}v^)pq`;NBKd1f25^;uuE+n zSFB;%t@^F+R5D^a)4zyR^pEQ8`Zvanka=_76&tj1wGMwL^idzRw&r!>3)7~re&yB2 zJQ^Cee(a{VCiK8(+kY!57WUKWH;U(9Eo%DoXq@>s#AeMrxo^TdiQtd-yP5vQh{hk~ z$9mEiJn-A+?;mwMXm!fxLFF-f0sUe1zrFmA@%#0JH8;QQq-&;r z1b*B8`;ua@?|;Xg@KkmM#OG|jZHcp{K1RF$*GZ;*PSAHl{F(mk{8gR)#yeN?y_-Lx z8~1@>gT|xd-;{G}&c`5JvS_m#ek--&IXzl^6S(Q*wBh;G=iZp<&!zc**8 z(|>`lza5ZXnr8~a4_Y7EzxiA9&$qbdTfLL~irv8vU)c81p201?GbZeKnHK1K8Q%s=1aQJr@qdCXow ze|Y7F*wM@ev!nGcc!n zxy4s`Zm|ttr62qJ9a4U4jcf1nBuD$<`{#ufs=|3lbW#hTrDz|^mN10A(5w^cO)2Cnj%@YO52Q5#>H-9?+ ze2ddB=p^6xu)#Pl@`vR=dDjJZ&A2m+n|0vwTkQB5uW>v68A^)9z6aP$e}A?|{fc5` zIyKbarRW6h_c!sUBje03(|>LrsnehH8034GXJ`2?UzhI_anQRwI_$i)D@@z3aecSt z)2E{Osf&^P1@yxg)<1BYwO6_G*08}tpZwZimn^Tp<@=YG_6c(TZ#0f%m1|F2`$D(a zZ`+OLW%>^$*;1>2r|18;yt>qxd3u+7OMg5rd7y1aI&br9pWfwc<@-)NMjOW!>6Psy z%>2w^H|=unm%=u$-}27oQ(q$A?th=Fb&BxY=Vz*M?Mtk3?VY@CjunJIu7@|%e<;ta z)8D&1I!wow*p{z-cqjhmI6?S9>qGPLcfz=iam=sd;_t*4+>h>9?c~9;!ph&@Y18v} zEx&*Kl=N-;okEpN7v|g0IM^k2yr!?p^@s3i9es9pZB96%3Y)BpXrc)|@?-VtU&wcVi>qGm+-wES78&7%6);>D%g-P%2 z_2!X>EgU8gMWXdeH^kPmt%JfmAd!+ z|8HCBV%@Q=55{wnZ+>hr&WrqEpDq8ecI&24(SkZz;c`oqtHxuP9cz z|BCI(`R9h#NBw2`7mOACo#Zz+5HvlC>}*_6`G|wwCBK_^=hJ ztHZFv!LR>zgO~RT+c(bq*($I6p8bbie?BKE7Ox18cZJ)Ir^KqR>A$(I=F?|1KlUS; z{zb&}CttC-k?>d_4dqqDAES3sUBp|LPyfWR;WLC!$H7ls9fn!U%-eO*mHQH1sI&jS zoCNTv#}v>1xr_G+l&tESx~A$==|Ac_)4!dwnEo!eGF$VwkBs^7zQy~0;-Ggq%dq2C zSD41jJS@L8GH(8;Umx>egB|k2#1poEz3XJydF*o4?KV=kfS#zxXr#dvgZkceypJuNB26 zpL%rjZe-l@B0qViTRO7Nc=i1?o-WC=dZ+d%{b7SI&%Si`FPs=QKI*`(=TGfpJM4J< zf2!juidC*{UfP$P|8bIIpO+1>nO~-VU+$^s?<9}&sN~bHp^ir!^jaU=)@NJz>*D5D ze)tUEryss>*4Vp0^uWCrhI1a9{NNWpFa)3d|J9PdYmwCAy;ARgM?Xc0W;!%@B}iXQ)eYNtb^hK-ey0E2{8dGN zjaTmfQooN79YNw|Gp-_jkbL`GZzRQH{X4Zm_^`na{&4n5J1@5P54ysYXYGFNVaYA< z+vhLiDyE;E|My>B$4jEc`1H61oA-(NG4D+O!JML2|M>pLs6X%W>T>@CuXnk(^jjaQ z4|$;Vr~KyEeD89$tWTG(GaRKq^zHKJ{-G;|!dHHC@@Zq+^WdpF|7O2`$9Rg;MRo0+ z_&ENAzixh+{zG|1o&M|xO^=4~8=q6VPq2BoAo-y6q5Yd*`LJV*>%0?w^J9a`@3BGk zhk4guJ^rHCZwXI+`>@}9bj9-i7w-S+ONxbg`ziYUW2$~dvC8csw(b5q`q4V;TkY*^oNb_`cl_FW}Oi>zToud zx4hy}wjcqUHXm^8DMD)>Ef; zrhg-kRP-lrQ0h#lRP4&}l;$tx zzswiZJ|(+c8()0<*-~DuW4@XGMWj~$4(I>96JB%#V?TX?)Q`rx3d3lCB?$_Y5)KBs9#a6Ooxu(zxPX(D@rhma$r+?-8^Oo{xAKs}w zAlN)z5Pr~f>>3$op5(SLGCT={+@7>5ac?;@?FRXX-Ni)8={M})*D^`DQ%!6xCZ_j^7bsTVs z;`o0v@<7Qd*ZAqHDYsRZ|F1{$h?M(mvJW|o0`SnR%MW-I|8^7i?j~SV7b?_;gzcK%O zi$`_dNxqGX4U&%ysy|Ho^7HrYw{LG~OkX8@V&Ylw*!Dk^6icV$f62;pbG#BXKK;Lm zc)WdKXZjE1m38{_9And?q2qu~Y~rAg{PYEq2htB{{x0E%&v2Cf?s4ahdFhLTW4qmJ ztDJuK+9$zh`~ShDSnU40mfDP`DBkLt`Jo#&^8%S)rvIFrrB;6(k9_Lm{eM&Qn)gai zw;7`=4l>Bw83BL^t^B=9lR|m{Zm2AL(U0 z?Yydd|84ufPyF%xt&f=v2cG#$KM}!njej8zyA`)$!kbmrvFg>s-nN<@f<(Xqa}5guX)5l?<6m_P0uun ztIhjsb;|FZ7N!%+|uK(0}+y84Gd?59)ZT+;9VzD^9IzRKP4WHpC{bBR9 z&%Se?CDV{DGJC_{%)6X^?E0%A>8o=5(@?*nSmhcXu(`SB(`Re`cBX%kQKvu00j7sj zo)`zk-@IOse2~6C^Y_j_-{SNSI?4BSSeK{%u<^@ZTV|b^tB1`NUhLYxAKajyW?OlO zB5bLhjzqDt{f6U}pz*D&w#MVXAO0RP{fmr>{@O3sv6~+7MMxa<8jo%I) zIZK`XwK}!)s`B%fyyns3kLPdwscyvCF0f;a>%0?w^J9ZG?+f{1>FIY}_w+WmhGiFe zeycM+yq(uN`~0obKTV2-?N+S$Z2wLo3;zAfHfdf_(ck6XavZe{tP&KKP{Zomea zKZw5*l5gmwr;4rqPJCg9%Wl~7=!G}V*`azvH^%5=L zkIM5;Qmv=XiJk3#TKOx}SN&eGxm6a-7UpHTH^lFqAaOAEW7BW^KCxZFZ#+@I6&{xq zOD8{kVcXk&clfKM_m##{jnS3&-CxfUsd#X5^w5xJm%%q zZ>W64LE^EEpMES(UujY-=67QI(IUKBA0{ll-V1-&eX+3e;7z~#%u9E{Ysa5yQY`dm z_kT6iuP9cg8~mV?2)cQ-EiaWg^UL(_%{>+Um9NgQ1jpeZ9MvlkUY>k83!~! zby3{-7tjx1nEdi(+w8Ex)G+PWk3ID1kC*!IE93LrTaY>%Py3PVavjIf&HPcH@Mik= zx{V$EASjZQv-2TmW@~U$GT`GS5Z)JX&{{4BRR{!|-7otAU z2R_idoV9!($OFj(`FrR5gLip#IX~}mE3;j`9=*SN+2?nA^@hKGt$WP*$6x;4U)@DN z_W9c`=o57EOBG)~zJczbWL4MRiLdtjb6eufJJWw4_f+)P?Wpqq*S|}Dcv~a;Lm%(N zZ>as5O}vA@d(uX09XRv$o4Z#ze%);vH(dk2>EEa0fSc>@-@&gaS=F`iZ6Yyj-Z!?d zVfxR>Gi&vak3U<=+oRYe2p{O(NWb+_m!C>LeHxGEd#7>mefr@GlWt$}^&MKX!z%yv z;lrL;`(Su&|I^TMz^>x^_bGAzl2xwbxE+7eXF8IXezNU+9t}x5vwYJ!CK7CgF)Pr^J z3-Jiy3p?C;`_DeGb|dWZtyhli`oP2JW5=IENwL`ZpEdU?{s2s$9*MK2-q1;e>!D&l zeSn$%#fZkQ^&adKKJwXr+2>P~*F17$zSVV!pMK1*_1H0zZ^(G?VS_b(c*FRqZ(sJ# zeJ_TUelp<`AKAP7{dazUr#C4U{y+Bat60`{Dku!?QcB@#LTO zg2(QE8BB_W^X}GHL;Z?kRoC8$58dE59_EuuUZ#KHsng#($um8oK8S<-?g{yz^`YbA z?}Tw3z77 zvcCtNU*-NgbkjK7Z>E32P|+Wr`2LGjMxx#q)#ZMk`O`-ok_XoDS-pNYh^@Gv)5*?=hRLv(Zjcnz~ z^e@Puf^f*l5x$iHhk0@>Nwcc)nQocpryY$eZ9M7 zluWONyh4A@yMgf7_&Of-Amf8gFA?t)bsXYscbWd}Jj3*Nxs};2UuV}pw*;*ZctAHY zZh2kexA7vIJn-MW|7V==!d34!zr4lF?ygU~dB)r`$I>7Bf3^#dI`Tl7UAe~Zo%qnr z{Hgf86NER@zbE(9>L1_#l1hD@UhsM+d1iOgzoop5nmXn4Abi+h2Y+Z>`qv8&IQYo0 zX3sC)efzZ!#@oN?(32F4pLj#@4H$W#M5}A>#8)}~OvO(c8po=&=2N zTA23z7d$a~Cwcg7J5YZ7Ao-Tpkb1L~uaXz};j6>2?>A3;XzxEPEbPPn&i4@y`he-| z;`^@?+20!KxbT2p{peQdAANOxnf`rwt?94zaUJvV#6kGf=DDNfgT|-%_&cGE7mZ_n z`cUk|7gpWsy9a-0*WJRZyQZgo=)m#t*!hP!NwHM+{|)iS^T3wqu=jJdD8Hq)^3Z3d ze}5jS)4!?nCg1iuXpLF;USBKRe z`TcF{JoI(?VSjHr^yqlw_5PQJ+KOUj^VdFNyVLWBy_1Se|AG9KjjMjI*lcCNY%x7_ zeEvSY%cEu7@?7CA>$Fe$B@Z+{%`-oB%ICrN>4z`;_x2b3e!{hr!_e-#ynbv~d7QDQ z^lLiwsJ&eA{KGrr?M|ZQI_}?&f9SKJd~HGc$@HI-du&{nTjl%eU2c{7qF)7Fk(M+k z3c?S@`S?3wT<3Kh^J6P^;tOm1afKHantoiEcFiFdZTR*t(ap~P4<^N8$Nwwq^KW@S zKpjuA%e7a(ZU1Upzq}uPQj_UFH-A;9f3qcd%!_^vm5(^+T~w!e^g$kId>!iGg~u?e zgEz)-$+x=KyYIOJ!}qp(>x>VL>xa*-f9_3+#m;|jQGEaPB^yWQAz9@*j-#9T8_Meh z=_}KJF!$8zzv{?*)2W6JUhks1PWo#;{TdH-#KBSeyO$k*>281d&YIoJ&HS&^PMG&B z-bKM~t@{vqxYyCoHw{zLhzivCXG)}KCy4?9Nm;<|Tiur8l? z9maJ&>dS2P*YV&FYd-tL$=lxcW?1{rk6d=r>({V7+5Sg$;r~-R{~7tIOQMB-cy5^W zpGqRU^wZt7Z4s!|zenoGk3WZuU-OzfkIc8aE*)PHzj({f7yO{{Q}0Idcx*-L>M*!H zPTu{!?H>&*edkAGmpkoN>h1dTG$|J5nV$Dzz5{yRj{DmZXC1!}-bti#{bxh`@X}wV ze=)N<{VVssq$7E}6B|A-_QyJs2htbY2lZB`gx9E zBfsgW{lEv>{Mt#ec&AAJ*x)Gr;e{l)jxCT{`# z;rz{SpOtRi6E5sMce`bk=9y#{wVf-GS zdT8FsXLzmS{3kY;Hud+%<9X|Rl4zl>xxNIfPwJic=`Yj2m3vHo<)yCE{vVzgy~|n4 z{gQXNRc;^N<<_uxTA23yVfg67JB@?%iLFRo9fs*Y{95xnn+jRB_FBjOpYuK-Jhr{5 zju6nF+3;y0^EF(4O@FQTiu~?1^^6xI*Sj?5Wn|gnUA`WsuPaRR+x~t){P4$U zu(Qy_SaWTy=OMM+VSUH?NgC{tlw1Pti9uvAa?Elzb(mwH`Bi-_f+)n zl05w6+x&>Tk$Ltj1?{Iic-+}-`9kbY~=AHK27(WZah z`JW~A{C_(s77seL6~)SQ)IRO`Z{*Q`gqi-mc`Z77ms{ob>s@XQ+m6%1wDbRr=biZf zpTGWO+@m-C_Qv*e4eK?S~&&+iToDe;Y}$ z*!EvMet!ST@83yQslRvPq2K0qYDa$JV5a|^JhM*!JeDusS3deR8-Ij#Jov-X4}a|+2R(CNSo**Ac;$@4%IgpJPl|>81DHNgEOSkt9?ffv zM6pVpniDkcM&{W(RF6#mxp{{1ldo~^ORv ze^~9sb-(zb_r`~5N9=Ocb6+U$f8qH9sg46KUYvh>$vgEcidC+?`sp9t%%3L3LLAKW zAIt+4{axfj`#oMHDA)W6SO+?QZ#@6$UOWYeR?N(hVZ>lf0)|tfA8%F zn_;!vH~iY)x>tvv|G#Wrsf?E%SX}@1n(5Oce(w}hLDt^Mt7iQDdn$hTnO~;=P#!UU zr+JEEm238khMET-^T7u3gZMik`Gz`P#g6)@;|){p`uxJvcbX7Z-(t=dn;m*2Jof!d zniLB~7@+@8NTloX2{e{Pn2`;vVG&@U)i+4f%32fBGD znTF&mX8IQy75&MZEA^&FOZnQ0PEqrkmx)b2=*fe(p>=)p&$l}A!I%ddjPoLYSpKx9 z5541;8;2E+PG`T|{cW#t+y2v}Sok|n<=@}SO`jf(cNmA~274FfQ>U2e-^eqJpM0=4 z3ucSy;R-YHAbh>z=W~pF(6$rJH^1^=$LKX~^>y|F`a@&MM;Z@4{A^e&efPa>t{4ly z{r}raiiPbweP)#Re@zvh%6!@*{h9u0?y2anI`KUMJl^Hm<>w5#5C^@>vxdEIyibw- zmY){scHg=AsdE|!N9oVLce(z?Ll3$oTzKY6XTChI-2dz={A@p<^^xlMppGxz>VC+M zKk2hi>zyFHnf|TZQ_)}Z_#Gee?YO5QJYMsNgR!6Y2_z3>dm~PL#n$mg>kr3H=?in0 zygKy$=Qkhu$QeWM*!hQcQY_`?n?8Rg_B+i}6sz1W(GC8}{8FjN^l#^{D*E%jrFnGN zcAOSwdHyHLi|^MkeaTbA-wCaM>Ya`s+v@Mc7gpVH(c{N$(-&5oe$OUd$8HI~?f)Cf zubAFieE&XJx7|s0x%N(8bKVkut8Yo1J~RD$a!*BnCwcMpJbW%0c1a$5px1h{HIIJq zgT~huKI+Lg+i4s=Y_Nkrd~(vd_rG(5KmMM|`cr<%s;=o5 z-LS2`Ej7$L)4wfixs};z0SVuGk`G!R+CTnI7}t3xFJkW9=3SysXsq8a0Yz#|8GxHET+fQ z;`;OHwI{E&>ERR21V%v7&)b1B-o+1c8 zXne|Re&xfCF|PAY_|1jhuj87{xE=C&Un_OW z2QTw9n>gcOgX#~bzx9Ea?tErU_-a`3i`&jBfB$svq*!cT4YAX`|Ml-LlP}rj`%$_6 zFqI762j-XQKPQjW>aTxC)!FO#Amel|dBlI5 z`Ll~Z-M!Sb$HUgom(PFT_ow=#Z@b=Uw7krM%b;u`~VW=C3OHJBi2NBlG?l zY98|gedNbBJ=3IEEMIMl($2PioClS+&zFXqY^dHP$)#|U$5i*@zo?VWMUc^D~@~raj!n@oWR@W}f z^6w)RJLbdoPU9f+z+ONMr%w37R!4PR7|wik)1{95{Jmc5?EA-v$F4tYB*nttsaVxD z{c_ymo#a&XPsN|)mku z`R={hUgP%vSDF+H+gCbgq>W#^|3d$jv@Y(aC4T0a>0e~j>fbXmpZCwu={TClaUA@h z=~awYZtwWXi;=h?<0+5FR*dUoJ3R5!X&+twr*DRr?_J^YnOn_-*N*=OlVY*s&n@)% zW2yQ<##fYR`F>QMKitxK>eSBkcX_0uf8ou(|JCm?nTP4#$g9fnh=bnc(W=hOmpqW| zisMi3^6HGAeqCYO@phdbzB&vCo;_~82gghfhks|Q^B+I^O|QK6{7Kct&X+A!T>tQ^ zc;3=Y*M&e8Y4fWe-L^N}c;aEFZ~oRC_-pq+R~O&EziIuYNwIh*7_aeH?!QPSlkJQCGX0B4 zMSq=Nb8D$1zpfABpm$LndMT0zqC;(+@)3{oJNX$WtUYG$4ZEH{Ev)yk4=laGCD%}A z`=3TqEVi93ArGLp@w9)*D%Wuw-D1CYQbRwP{;k|o(O>htBKZ~lwIBNBx?l3axDWiD zFs`E?^VjrQ#~Zl*cEz4G`Y!s*uJGCU=fR{{n5SL;*`t0ins?k!bGqioc`fnNU#5RM z_n7|VyWE@Y|5p$s-}Z~`k$&Q!cO&E0pYoFj8ZYBFq~7Y3&x7P)E5cWYVWWc|Jn!wz zuS|6DTF3WY_PzU%Bv@aXN4`X>Yx?E?uecB5@Mik=Y0mek=Yc9wt7%&5!@9+l5J|>~h7Ux8F{^{r}ZSiiN-D^r+(bx9jrv07_Q5 zwsE!JW-2l76q!eanf|?bMy>w(J65JgnrCPEX#sXTLLBr?;$}zV;s?oR9_FV``QkjY ziPz-A7nYqq<$GWM#HC@(DoWC&iHj0CjDXF zj~-rDf7PR$N8R-gzDu9KktW5$b|==b`Or_Kn|G2~H@{5({yf9#s z!}J^yfAfbU^U=Zbv_JFr%|GAbns4<^@+l3;!vhy5D7nf`O~%sTx!9w0yN!}y1Cmh5=xRzdPX z%hUeM-#7n!i)+5sJISxu3+NA1wtDcz?Jis@Ozr;hs;@s^{{H4(%G)2|tm5(Klf6^F zqFCjA!fcIK?tf`X2Kq6-O#iugq*nh(AL*4gK8e^M-b@9H zdFfB>O#i_=V)_$@XF?tw_BomsrXByFQ;gn89)8nLdGUk%U6N;h`cXc3$unEy)YW0w z#uqwV0!TTv#I()Cw9CxKGh97o*#Cm|4`0h<1t?_l@XW^?>~sY z6Ivh2+bN%TT<4wiV}5K!>gq5&v+w~+e(39|u$0@c>0hXk`N5WCqL-nL7xU0pW&4Mx zYb0NHmjq0I<%P%id$o@G2$?T`x7Z+jApTBBz9Ie9U?;w?_+j7P>U(efI4p5$??0yg zHt~!X7y>Cul+4cWw<7dB3pPBvzLq&ghW(ptqZ1;vb zK5>xo%+@?~#19&uj%$AE)DJH_W)rW;hcE2D^3vJmhc4{zUq5-?sz<|P=bzzGOdl@3 z|2mF5Fo_oYUhA5woB113K%9Ou{fmk^{n;MKH=QCr@|p+9IOK!Yhw5W~<-v|IuA_hR z7u}cJT_=CIc=A0Pp8wY~!o{cj^tM|+QXc>8Cw<#~XGraqqV~!B#ar&bV!LwvZK^z7 zifU*27t9s?HIL(1bYXl$Cw#;~ANfref2Myse^t?6 z^SH0qbZkkT^%vzepVPP#v^uqUzij_CI5LlX(DJDVbzF}PcJK%H^1HWuDLphyI3Q`P zmc9YM{r}ZSiiQ5tn~LWT&%p*ItGYHmeQv*Zl4D4JsrWPfd-7Km{hh=s`{S1S+lo$6 z{oHqkejt5;_&Xu_hK$ENuoXM;g%$tun~&WvJ=6d8*A9AY-8JCtc>JFx#lrC;b)ZCt zZU3ulzEeau`c^yBzcv>tK00K5>qluw#ttyc2%& zV}r~O8&rSTYVzQ-$F!5s@-Oc>ZQImq+|EDuC&j}5F@Ak{00-MWC|RZ6v2E8s(@#Tr zu~V@#{rmD)75&NUl{|FuPV5Tyiobb?Ao-x_rM%{E%s=1a^Z`1_H$H4I&WrqE+SsXg zt#RQ+VXcc6Ised;Zh*(`zvxkZaE$)`PO5&8aTFz*>FDHD<@z7|^0-?&-pKUt&nqhW zlfPLW9k%_ag=y!1yb~L}z}U|`LGnQMb8#FSK11g3l48LRUmb?k_ndv>_8`xX0e^p%@#(kn|5vKK^xYCW(|;g;RneckB05Zet+UTxCp-}c4W2m>!-@c?+IG^qp@cMtLD&G&D+iCr$%1hr3u`~VW=CA7X zk3S#TUMt&wukdjm3td6$L&rD2lRE4e<2tYT=Eqiy>tj2N`Oinbo!&bmjQ!&2?_IaT zRNi;n{|_a_Vt)rMwbPL(R;hPvH>YU6Q-nw7kB*uCgLy_pf0uhR|C|Es^MX3lA3n`% zUNkbF`FkgR(=AoIEl6IBmd`jbI_0VJNAa-C%4@Fp(@(w-#%z86g`e2@39oVc{!#xk z#-DB<{r+#76btX0VKjbIKR4m2%%dSZnf^ohtBU^0Ct2mX!lQZUMkY2G*WvGkah-QE zLLGME3**+`^pyQ~_-t6=?OA_0@4%zrxAUJZWT-iLJ;uVTD^3{pcCLoE}!V z`OGmVowx`*cK)Z86pQtCY4C)N>NP%0|3=PL(I1}p{<}2K&hpa&?EgLZ zK<^}OHhn1K2hoA~nV&l4gV*N0fPVPGX18qcsfR!J*Ra`|i#>V5b|=7R`=5v}ZRq!p z?W9;Z?hvcgKel-eNu|%0WYSNz{inI7qQB0Y`v}N4of^UeKXK4IiJOg%iugf$KU+0* z%I87&u)z-gaMsUX-~Cg6xjLM8(3ih?Kp3EZJO4a4DHi-5LDHgVKzhuq*UrzGkwehKL*zx?>jx+t+IY&i*mwU78+4Mec)2ShT>b(=2I2ik_ zk4Rt3Plczp&W(&4-~04;PuhLLdKcY1w|mm-yWM}vIZMD}-#_-~IN;jv^Zpm@2i@Q| z{WZ?KGyRK^YW0uLKeV*JwqnEwuaErLhFwXqSU$XLNyZ_f|9rIFT-kJWrc}AW7oChc0w)=+i zb}9A?Ha{mwK1g4n`IQGdM)D1v@K@{w^oPHma=}Ob>-mB3mj@;uxXeLw;j#PghLj)d zmIol(_M1wawRgM{#BQ#uHu~s18IR+PO#i+-v!Z`ghc47vew5eLMDjr!SM%|A!nlt4 zncwBzm!FTF_`*T^UG(SU9)2ereaRk2u05-K{uR$ZaRr@y-lnsP`!61_dABsqyft6E zSs&`>xiQ#=ZONq1O#lAeQ_<7q-g2??fvNb|9^mPOHva#|`fu^d-v6=N^iz*xX7pvB zze74daF#y*xut%vOKm5}+W6x8-{ZQ5RAl-OZ^^iAL{qs)Cgf;Gd_LHAJV}JN<|I?Eci_Nd*`6ph-lk9SBeB;H3 zZToHXN+q7@KPP`>|*vyOV-cb4M_mBP{`QAl!jH_sTb4TXk2d#hQ^B{cK zU1Wcm;0nYH@6JUZN{nulj z|NF+*{_6O!{IOU3eCqf2^iKQc`p;gm`QFK|ruU!f{NnwHWS492=KN(Gw(+Fv2A?ip)Q`6`IX=5mDhN%!4CN$JwDuj=n2{HpPoDS zo^4J!hjH!v=U`GSTui|3y^@(gtx$u8IQg>K$SEUx44p}T9xBB7$c)^QwbdNd+F z%_9!_$WLD&c_4p}aU2^y!wx!Ip7sGx_k>FxzvR&i9_U{AxznDxYijxVyGK$iwx1nR zyQP?pM6t5%)~P@02i+LAZho2mMP)^Q<+E`HbexuAkD?Pa9p{X+&98j)gTE7!Z>Zx{ zZ1s2I3*)}__HK*3`%oBvf$R^x%d6&7=L&c2-@7NyIM|^2yC<%A#X2W`^M>w;JNa&P%}K)&%Yf3P9>yzk_LaUK3n=$-IdeVwiTPJChQ8K3*>>o4_& zb)H(}o?ooe<+aYf|7|73!hX8$_jjxxoxfC-dNs!>kBbn!)Sv0!muJ-KKP2_ci@t38 zP#%sKnm3Kiw>rjEG=Km6^DQ3Lc_;Zce{7I^Y*77Sxn1^LKYQ>}Sbp-fr#2b$J9zB> z-=3scOpoP?|9@As{YNTs)*KI-?c|kx{;KW4hQyg)rhk9#vGH7PmG2`ulOOj%|Kv4y zl6vw%%TpcjOLqA=fggPDs8X z<3;^3ekZ=L+?1dE>VSW49>)G?-+>p#Yyhuq|Lvq$*q+jV>i3W62R0-xUVA5=Cf~oP z&HKi9=$Gj~C$Ff}pZ}wom(??WMdF}$IcvFH$93d^^uzwm{EXuYy2OJue(LHl{PLd@ zeznM{jqsN@p8U}juZ@Avu78O5?e`ZVe^RqBPjthsn_s5?+?;BG^-pDB<8xxi^YxM6 z`lx%qQsMcZ>Tl!d_#V`8L%8vK7cTqby+YXX$uA#q*y_Y=(P)D_D6y=ueS2JlHYT&CmQ!#`7TK zV1w!plb*S8@e59TD@^*$ktg>qF-Toy``5aZ^Uv+1SlBO#mFdvnmByW5W&f}K`RG5C zSJdfW`5sQkV_bBo)s1m={z^^0_=X1%6%tJwz4OnvIQ*cKeBMWo4aRwqKkRzpKR^7` z)K=Ji=9v4AnPSiNslET@34Q)sniLD$oml1HU2NO;k4_@gS>M`cw*40w75$ka#@&cEqA%KgRs zpT8RSmFR+>=XPS-{<4)EW^nf^fd)WFlHtoF^t;OY4N7yODz&0>6;NCI{| zKkQ8ZcAiQ{c_+c@&ac`b=&`WK89{gtmdR`N`bhVp`% zM;!Dns?$9BBM-EBv}@{=&x7z`E5cWY;iO}CJm})jJrH_7d;Ey|kNEw;o}^f8|2pNH z`En84EBzblxKcH2|D(EDUMd;z(qE>3Zyu@DUq464w?5i=RXHAU(7Qa^`SaKETar(| z=C9G2Jnys)?2w;r|6^Cb^{Cauo`3w?w0~^S2M^bO+I3fh%Dcg-`3dOLZ2L&X&)Pd) z2^t@tzw{H$4;?f8`|`?){!aKR`~Qac?RZM_&d#!eYPSy6+6?vKYvxL|B8}l zdNjl~{gt0M=v`DtA9eYuC-|p;dj{nIGY)5wgbt-xE=a($@Kc{-9enqj$wO2pb zRNb(RkNGHO`p-$SrB;7Ek7|18Jm?#K;$ZAIy>wjiz_>s2)33{#%ogwRb-A5+ms<<& z?{-h`|DDY~`d0UZ5Bm)sZA|bkE6$dV$N$m1rK((eC$GWE?@y%~XFh6Y`p?ZHwfeWD z&h&`-Kv&|RcTpYVDv}4<`x(XIhtCi`9jB9@al-G8KIe^#zdID(`O#*-dE*1eQCD~U zp&zUFpGM;((E< zb6Ur^*kA{L*m29&xeIUX!p=**v+tc-+=33a{Wp|fG5z#o`2}$EIBqm#97U-r|1NoM ziI4kgOD6No^dHJSb^14_j?AZDLmeMJ(7UKE>X$q)-v3Z<`YYf6)ZaZ}rPIFr<)6OP zJ#p8EKJ@P5<^9+DNxy7whBNj4yHw+fVr9BHUI}8OoAsY+932{BcX!P$UR3ngJf34# z(Fr~$Jj6jC`K=G-B@eXsvo(^>>XZ-O(fY&8rOtkD>G4O0t(QCblKU517#=(SIY-8? z9Dk}_^eI{8_HMS5S55MDzSMEt&%87Ji;Rl?@Jtpy@~OAKL(L-&`pA#Xd>PO3TS>84 zT0e}2c(sn_crYLOjgdI$wcc#TRm2b4{)hKL>unrt5I$_MgFoE;!mHo8 zetKWHZ_cfse|PPE`0fASp`=)B|3g0w^(%^1uHgZj8)<&m$U5T8H`BjhtmvG<~dvF67T=~DQf2?b4yQ01F`w48Y zLw;!e_tFozNA?RlEb+sxeYbtsYn>hcq)D;Z_OO8ee@RvOdBA-Tra!#&r*@`)E3d8S zuX)W`d34zSziDBX=UtuHyq}N$$oWHM3|DHTz`p5HXOC9r}4#eLHZCvFiU)amPFZ^I!=bcoTA6t>S zIt&xPbJZ~`Ev0|e<@;#Y|MV$8IN_=M0O)D9FGF-vl&WETb?5)7OQj;yzc+tn`n%l9 zY?rTx@hzct8lmzr-}v`3@S9#yKJhw?>%7zPnFqFF9S{C6?ucvqAN$;iVZ{%AV&nG) zw)a|R`lm^;uzjWS_<;jWpB{}j6rCVzuQt!kw*J-6`$s>S{(X6^@sp?X<2ioh^Y?0~ z`NTmV`K=G-B@d)8(EOSYpW!I|Vb;#qy!5m0?G<*L<_7P3y!`y-`!9GEQ+fQ`>4r+5B5Vj%x zDQ5Z)B-v8Y-{s!SKO~4Qw%s?x@13INHGd{H`5^NI&EG%&e2aG}zl(5`{%*I=r}y~K zMMrn9w8Ir2xc-H+=*RTec`$FbSN2DsbW`eWw)WLrOZ%nI*8J^E|2cW2R)6^5X($h< ze)MWaHh$&_;_rmy8#*0d$MM*To%q5J?>lbcw-@gXKfiL?85qqnf^n0hUrf}dHs2`JU;g>x3YYfuZLlOVJ6Mv z1>^a9Cw{|*+KO>LHuEurhyJj^4*oFtt&fk{?WcQ$DL57ha&3L;_{~&e z=68{;{F(kmMMZzj=RRW7F;yN=$0H7UCwXRT9{u14A5S1 z@Iu$mzPIOY@Y(ezebTq>|JTv)9~$O7KYa4dPyPKvr=A%0-2a0+^nd6s`0VQ@x2{QNV!OI|e_n(q|V zANQTl0nP+LiBL8j=uiZ<+qBJW{8BGe{k}nC_|g zy%Qu3#(vXF=S3bE?;p*t`S2Nz(jQj;@!=;gyTRH;@M= z(So0OR_=dnNC9!?o9W-qBenXwJo+!Te^-R<{ut9O^4qv|`lrIf_7UfM9O zclLxu`ocj={rXQ&y|XCe*zqSiDcbR8*GPMQ8ad8AhVRi%!6 z-Zw)xGH!X1pFTnIK>8x?wZ3Bi5B=RsthLvN29A5Ydx_1jf9{JnPi7pu|DsT1m>x^$ z{g3UWSomBPE6daO#cb8vu76OQJeyBTDl+|h^H&x9;aOhtOs7rL z2YStCe&(lt;@~L#Vf9yk{pB@wm=#ug?!8|u{DmIIW&gvt>_4zgpPr;xn2%VQeo;S_ z-~V+Z>v%sh{rmEW@sp>>_XyTUE6>jI+XdLT#6j;Q9^3dqi^B_QeLOGfM(Yn-9r13s zXJGZv`tH0RpE2uD`0V?a-lSOU?{T6&|I?|zOR#S3okY+L+xT>T%sbP+Kd-3MzjFMU z=Gj?(T0r8R;(%cDbV2w*(@Xm|f7B0tFs}1X_|1Wp$cI9P}=a4m)mQ+`9aCf!^Tg0E#EXJq0VcYy#QpmhB{ReZ8=^yvsn@1OXJBBw# z>qGg83wz0rAB^j~lM3@=D^gd7VZ$%H*Zlo!`wB}oZ>_V>--blcE6tz&oZSIn`t)dA z>&T1ysXhKwe)`MwAIcexAKtFqn{EH`_0L)#j|(5@U0z-4YW*oMd7#CWzv6cVo#|!y z@6#W4`|iBUS`YPx-3~u%#=1wm>9yX@f2K*X*nW1|=>9M5U$Uxe@5E=%Kh%8FOXJM1 zyK9%CprXI#aa{xXb{y3sS$Nb8J9b;T)<1sJB2Rrz~n7{3E$Ueuu z5SCl_Z$rz@E`R^=lhU_+?>?mVR=WMCYAcFW{{6&u?fFmoYe_|>f8nX zVqv@Uo9OemQ}ru~^)K{~uYaJP{?yL&cR5!@|H7N?f7Z#P!}cRBvF&(F`OpjWPV!>g zbZbZPhS_di@oIt%D|IA5>#lC-GzCG&K ze#9!*-pOnFvg@DmH%7*J-!uJ-NJW1gr=mYRpz_D_^^u>xz`Fc6jt!ro_Gh+^*C{`o z^s5JMoWA8b;iM}!$j?KNZ|9%Wq*&Ph+w(u+SCp*k+G`x$;J5vD)OV(TnzPjDPrdz~ zUXh*UgUW}V@%>`>;Q{e?Lh=n=)@-&^Y*)A|=V4?1!0#W8z5gt?)R8B_W5+)&;X$wT zbp8IfNBy9V=M^QI^`&;@^S4hj*l*BRrhhSFt^PV5`Se>s&1>#1Nf+`r)m z<2vu8!u;5Z)YW0|hn#cK@4s?OXdZpu8|yr{pVvCmU*}MzoCc02>c9*BEEYVuLk)#tFR_ zTsiT?-`^X~=sR!r7TY!8YaWmkOXc`y1O5E%(YT^mrB2>yT(P;m+Vs;H8K<92|AMii zzw%VR2Zb-jxx(N4wjlhV>8N$)Z_hv9;_*06@{JE0Bp(}8e>mv&>tFoQQT^f2d(Lil zzkVG&_W3)Q6pJ1I)cyWlG>%l2?+3qI0e`%|#?JKb&1)RA4V_G zJBgc3zl!)l>oba*U-{sDzkbFE&Bu>Cb%}{%!j|_>{KC!KEexN1{&gWeU<&VD@|A0B9RTaxFU zqCOvH|BL?ciIvw{|KeGL;YSzEy?68bJJ+ATL&s}$WB$-Tzy6bXNmkUu$s=?lp3aYX zNBxJ3j5_@*zXQ(;cI4+J*m&@o9~VFMLF+^HG=F%Co#s)8Jhh|#^9n|t{>@D#-}InP zTm1A-9Q01&sg0l4x5cU3m*lB!c~0VtyQuyUezE(*XTN-M=)YsdJHEEz<Oa3=RP@(8K6f!6tJBcB$|DZ?#BaK5fAB!I2Sd%bJZw{U4THoa{+{yt$GfZ#tzVKSSb5DuH_K}YnfWpAsQ+jYspzlc z@Hra(e@y>|_NU|8cxrp6ad0vH{nLMSVe7)}XZ3G%#q(D_dP(Q{AMQUersIIEiN_Uh zb&cNWhHbiO-^?%SUyf9(|6~~hzIS39CiPOD*~+6I{2;H#bo+pB<1s$%W-&ZS-e>Sx7wSHE!&~M%EaG5XI-*;eHsiMD=0h+lWeDvY{lktdy z-X(cz!v~F@=vL>aF8O9_ee&usOuhex177*gN@2C%y!+{G?|y_jeE#LQ9i)!Ux1Dt@ z-ic|b^JwlUe)2)a1Ecwach|A)G#9^jg4737KjVS$K&z|$n?HG8s7oEQT~;o= z{PDWa;|_dRwno@&pCw+q;mm8ux9@*PwGP-)+jXN@owaxJnDLvtblbMwGM`-HQGZuF zs_3sg{zssq2X)Hq&gU%SC4R5Aq0YzU670{>(o# zG2i0kIpKM5G5!60SMIg%k#F3mzwfyHH;p$SqaVBf*qDw3&eH2YTk2O7D{i-aJkq!m z_P^{~Mn{;OZZU zZ_i^}{M4r&=$*t zeyxL@qSYtgCF5d)DKGJdHT#a4x!$SUgf$<1-J7Q!zpdA}{r+euD;N9zcO5G{uw`a#u2yq&do^0>K%AhzkI zdGs0eA1s)4`d5B`+A7$Q-!8%4f2ap~C-Kxa9rGlf%**n$U+;u(nEJ87l;4p2u;uS& zUwqMKhlFiryy~J^PoKkaiJkv!3(w{^N&o*MSHGfIowaxJnE6%izpdjF2c!N&MP{x3 z>q=eIgF0Yeciecc+wavjY-ik&wAt& z^QqHw|KHGY6~*eT(GzTP{|BdeVA5aIf4E>&^jALj@iskL1$*4~?@H6&AB>myz1oI4 z598C9V#QB>5{Cyee;X&^haK9le*MS4vLGCE?(hGw@zU$SxBJhGX60hvKeGQHP(P@3 zy;DD^ey-cJIQ#^o{_~0oray7&jTO<(=PU0DEAm~j?)aI19eP)=J9)hB+0XXOOZ!S~ zSC+4A2Wj1~F^%Jbl8Rxa@Aw}Q%RUfs>F%QOGL#C(f8iQ9PCb*Os!G_L-z=f^kt!Ugl@hSmme zI^g(c9d+&fH_5aAKW$~@!v0aLs9Vk>LGJ|l-U^#>;4{Cd|NJ7OqQCab@0j3Q9gSlX z2N~DwUi`f1qO(0!@?06V*B#^fMf8WcZ+k~`<)5AxzP$I8hpv8flsfkRccVHE*kAnp z&biKBuMLgse8lRkt==l#Jai|H9rYhAm=*n9;f>dSZX{@WFh1zI#!4@~1; z^UaS9Y8~S(q(8Lwxg-AV#Y3TW@rZ%U-Noi z|K_AF^}w_rv$ZZfko_t=^V7eMX;WMA?pZxMVe^`C5D;Up?f7)5O z*mgcm@BhHKio)uwz51=5UN?wcH@~QVIg^V1T95mv)4$bY{))sw`blm4jA!whddAm& z%!Z%(d)T49i^yWR4{Nw56m zp|J1DpSf(G+qb2@9sjhmab?r)#hN%Kv|@jY@c zU5SHU>zIu$iughJ^k;tZ=nGs-e>nNJGwx_TwsSaT$p!a6@c7+~XU9K7S-IHxx3}p1 zuNYTRSk<-Hc;)v;xo{Z|Ufll6NS*$C4m3SlTBogu4r5~X{Qo2P4b2}Eq<)IlFL_S$ zQhOo&Vda-zXkPo1t;3YRt^4wQ&v&jrIb7?ux-q|9fB)51zoJ-iyVZG^t$kHKe<{!A zlS@X_zZ|imzmquMLqc@(|R6Mp4sZBULE#NUH#K%e(#ot`qv1@&VTivzd*k2|C9OU z=a)YK&e03ZbfG`9ojgJ};_3XbJ2c;hKv3krOR3681@WC`M^<$^#os4h! zbvFJU7@D8?<=0>Jw;3VKK6CY@_UxR0o}=TGYIO9-|6Kn6-4xS@IP-2u9&7L95#NJY z-^w#v^P~QQMTXUPh1J>qdA`ww=grWSbUP2i@BZL}>A3iNVVdU|kNIOO_No_-J#G4o zC4PHhIN|T3=N$U?G3wV{|8M_4($31oj*E0WuUH1-3cGUsVJ-~v=`-p-R75KJGrwWU zgHOK|R9^GUZhl>!`SXeS7N-x;3E#%YRwS3qE_vODLe=@YE*Tf(>Z zcc-@2_t@ZK`ukUSX_Ymmz5R>*laJqY!`b_uMO}OU9m~qa&Og&vu6`X?tm>Ni@x39o z%}eVs|ET}GqDn=79f#jh!nb-F$0iPXS5#dD{qvG;+mqE{9H((`G5z84!LMC+{4uwL z%Rju_wQpm4%kw=(T!sLlkI;!<@YJ(iLLdb{v$=CqQ5J=ar<9U(DX?9 z__yirWE|@GI{RPrhv9AB|BVBV9tmH4&t+Smy+r5x$9L%X$^7j2C-Kv#WOdf~EBk-) z`*a*981;N`|n?C*DEH!?cA06-7}xr<`=*CuIYvT z?XvC8^#!Y`?YdE{&e}V9Z1rY!+Z@Lb=lCV+KUy$r^`9wqOjmRV7gm2Kbv)P-yHYo) zXMFNN<<-XFB^VCbe!KVo{iegip~swg!cY5eB;T$-Y-i=daf)Ac;m&_bR%d_2eZ#zy z2zs0TI{&EuSiz|1uX&a4Uzm4-=m>hvPi_3XA6cAwt*l&(=N8eXPEB6;z>^>U%CgrU z9nSg0wM!iM=LZ?b&i}M^yoR9dfAHtxXU%qPwv$Kb=G9JhXWspNa>Ia%{>($sK2Nly zzHJ{Gr(XJgf#1eY_{8fl&8y6Z`6<@b!5`Lo^t;z@JaeV6-V)azyWSI>>kkjnc?~Dn z(CgoF^@C3AbnVq|*I(&;d7f-Os$)GR zI*#Qpra$a@_bqFD^;`489_Jsw@j*XpGoF3^8qUhajz4Rze;`lToi%=R^G;%>8-441 zqy8nMPJgSnlGe!+)OzT}eJ8L%>VWurA$-HK+s=O0V=MNm7gnBr$KSrT+xlVUcdzvD zdN;2_zU}`9vT`v!R@d|Y4fTUMZ}E24-pOP8{k7Va{lC_U`j?MvJgsYWc9eR|%j#%8 z{uIf_Z~AF{{2;Ffc;-*~p)NeLT~;pS)nRz?{U_(Up7fNkJI|wC|1%D*p=-+mw42_d}R7N@q5Mpwfj%DbX-u!xBjugMSNbU(?8|(bNAlvXZwiym!69LjHmVBr`x~PXWVJs z^(~Kjiuik>)hA!aF+a9quXpYP$O%-uG;{pO=?nRf#%pL@<@0)v`nQWnMSoX#qkpsnyROgbQO8LfUh^y6eDv_}z&tCL^!zq_ zuy(w3eCqb_$M@YAe*Vy^JBQXz7j5&Ck5sS!Y-nGK`JmdZZWODdx7j+r-T&5U9@rL} z`9=K)3Qt9U>a5kx_fG5-sRt5IZTw&*-p0W_#y#MSoHM!Gc-Q-xc2IpC<^vvOVha1h08P zH@_~={H=-k7ND-}SNIhiUJ< z>BN^N9YP(u{(m4V7xc)_{%G+Jz(d%E4dvy6tgYS_8h48Hm0;ZdhYN<)b%hme9@I==F{hp+HFDR7xkZ4cq;lk;id17?7!^uFZBn7-#ku` z`XGIQ=64hGElzz<`PA{)V9HDUVckpnmpN?dkA)4^TH@LFe{vT2_WPr;tX%B(*SnVA z|4uf21~i_GBi?b_?K;YHin$_r4Y8yCBgG@DPdvr>!ngO4Tx@jeg{GI*?Zqda=6NUM znjc${ygKCmpHd~xTl4JxOKRiK^Q>Ibc@w7(VU4S|zUHB$Q@bTde^LKZS<@e$BG*yF zx7T4q>nV>o=o3GEg783_pZ05hwW(*gn0~wdWa>J{pSJq!j{Y3~4`=0K=YMw8-@iEZ zD~eTJdyS(T^G)Z6Kk7ePRH^8%^Q&C{oEPlK&r7iW(F>%09l`_cdHoOR&p2VDRhK^H zt~uWd8}D`Uq5iN_y_0#^_5UrM7s&s=E6*zmtFy+>e9;X%oga48f2^odtN$b!gZ^zC zCw}TTmDjva{Ti?2slN974GAA!iq!X7uP0JJ<*PsJw1zu)nJ4kuKKZ5b-)FC#e5&=EOPsa!$GD13FAVP#iEDl4+240?NvPF-isZxhPV5wkgT&2d zd`0}A&95c(%}<^aKXq;1i|7yMFTG&Fg1IB%;@#i#hHGaHP|wc4wX<^Z;0ndz=SSyQ zf4Ri#*4{~^`4&NJ>$fd_<`?xZXIRnS36K3C{In0B@D};{pN5P>{S>Y5^m#~a`sm>g zYklB-pFZT{w}ffGIDNC4JiQALBH{&)Vwgc?0WTdE_(CsJ|<6 zEBaG^poot99=#>DZ68i;T|D?B`AYc2+JnKkk2? zt6x#9j^1YL_&vXWS6Xc??Kp)1xJ}_2<-9UUPOg-}0!ZnEJ6(OrJN;c+|sI zO!HH_=k}k}%j1tv<1bME$n#{#_==e>Uh^y0zvV(m@4FNAZxxX`{hN9ok9pYm4XNjy zAaOADr#iv|ZNIDYF~8c>Gh9r6IQF5*pWT1trSQ&4{Xbb|&Fb~Hc~&m;lb1h#`NdM{ zyoD8Y(>_ug-H1~!wWI#+BDbQyE4=agH@%OX>Cq6s_2a}Q4*JAz<7&U~K=`2f7gJ~I zfYb~7&;9ukW6uqT+5M;P_WIvlNZgM92eNW8J(kz=4{h};iWPN35BgNJ{SW?J^AkJj zKTt$$Jf}QxNNoDI=P_3v^_16qS2y4C;3=BFF)`obNuGDYx91ZZgpaLAz2Ls$uY27c zPldHN`r9ox9CoSKxSf9>UojW?&#?EsR#qq@uHyUtI&@RQG7vE0jZmkJO6=8hv~ zZkc<{x6fb0S`Xa1JpWny|EI#P_JwZNPp);DkJ?fHp(0Yz-xc1t|4HuG;a%aaj>r5! z?+Vtq<5pLiw!f^LS8Zrqc~*yh$p`EF#6jgzFYM$;Z~5yUH-_Cm`@kW49y8{ZXZq8J zVt#e;>1%@p@W3ot@PqdIcPB*KzO*j=ME!>gMn!+^o8Jk*x7T62$clLhuX(cg;e*Cg zzWMWs`4*>s9XiQR>wEm5=7%XKto)aiPag=Y{Cd;>dgFNq!L#qbT3NX;&%C_+?aBvD zp8<{2k4tRzH=F9OiL)Px`p+wvRzKkvv0`C+r7zzu-S)aSy@te(4W|0w?}cfecRFr5 zfA#mO7gqehCC#Hg@R_jEu{*uv!EN73ef#`7ruD%+CjR}Y*YSkiSzEo$#h3O&9Dme* zq+nL`cZE0Ze>M<=Z?B_V{N4!?2U9=eg783lURzR+eE4QNjo0Ykiy!v*?7Vm0v|Ju` zJ8N{qA3xl={-LdP8{KHXzjNwW6sz<4H(TTAX8kZPMdls#pI_wG>2LX?1*2J865>oCppPR27owqji!{9(p|JFk6Z&hcTBqb5)PWaHaj^X&I8Nxl4_PZrDN z>@N(JmnDl=zg@4a`q*|`)qk`oQKx@%r6M}+eKar4&iVgDf9C0(@XV%<9(;QpXnvi} zi|Y?NUVmTTwim4v!YOm6fAia&|DS40zg%|+TK@wY2X!9e9k*RN@!0uK<=OLwtr)le zu_B|Qzs9+a%lc?Z{kr|X@tp8d+p8b6c%GFD`kP<-fNxmiC$A3e{*zmuxYVh?d;Tlv zV&{K`%QiC3`}#%w{tMmG|9^2>SL5{8-}lxsP^UkB`1ET?-2tgzd;Sx>lJVJ&sh=Wo zL&l>Xwqlx}+TlyTKl`$aztj%bo%PCLgRh$6HP3$kKAM$_UH`MOUjN)yzoJ-iyUlqd z=$#^wef?McsnZ>;){yZtHUt(^V4tn!JM7L`{tbg z?C7N3sn2z9Aluh}{`$AKx-2dg{dN1|JO=ZjU&Ca3PI;+qd5t(|TrA!ao9)H?THia3 zgNx}8@0j<2Ewi0=5AS&H*=rBq$Uaxu=dUpx2W%zd>iFWVuIV4$yz<$v6333)f1{{U z(O>hL>lV>*I+g6m2VEI99Zk2yZ{zmW@LS?Hp5|BV9)8BJ!~W&JbN!)5u5?rX3P0K8 z+NXc8nb&%@-H&DE!f_lr!2^X=T@yz)>pz{J+EM?!s8Z3tukgm>|6a#GuB83z`ZuHw z^F=@FU&ryF_Gk7&`orOGe*Ex1{%&qK{ISD^4nJ}p_3iW5XjU$^|5u%Cd(%FJRb6{0 zKJHsZedc9$F1)CJIYLE$t;2R;dT1W+J63N@>NRf_gby08uRG4?YIrH8dEQCB`LRK* z@3BGkhds90?D+5eaNDrw{p+24-OmC%`~Bl^RxWn@vu%0(!|8SU(}!fm?O4ZYF7)@m zZ*3R3wfeWDGP;=Vx%jmSgW0D7-d_x_d z*Gcn^ZocKgQ#Ak3#C(eiM6R`W40MZ0}|}c~pD;LFdE#qW*(LMn!++aUC6e(>*V;I?p$8ka^W1JkaX4%d+-< zV);(|9@PHKUPyn~`G{q2I^d#n!fvPj{M6rEGK2b+_us*+T>LJof3CKUD^_RzW+%E) zzixg}|DhtcR{!0lvgyz9c}mb zW$(J}-AirGc=q{sFe?|{m;4qBcmDzJG*7X)tJ?ILC${pU{=-G2R)4*Ym;UW}%!{m; zr}c@0-WAbt$F20ygP+#J_O7Vb+1?h{ANK9v`F(?jw8MeRE%WC0?$zgAQ8gatwM)qC zIHjfbq=|q3E78vU&35w0?!Uym8{(&Lbc*`VDESHI}GA92I7)cZfwe=Lhj zo&FrhGB4YYwTrBnSEHwQMReTr=1SA}ynd{1p2Sl=HmKuzY_Nwv?6&Dadn~wqcG!KV z=l6eR?<>f+{m)QVE?jru7x(_p`n@gnnJ;?b@9&!<;|-Kx+dFZ7S5eU^SH8{{Uh}8j zbu15E6wN$|?ApO{O*P3{I z@pkqXwZ}i2Pu-}0*>Uamujet~|HqDha-COO(JA)O)58OM@cUO<=7vkBuRYSg@+X#m zd2Hx!ss5Gy{3_-DzpR+JVE@u(7U2)XP({_)&%3b(rw3Sb#=o=OZ;+$jrJc4Gk*T;1)JS}KK67~(rhV`_!XNc7E7s}X+(9bCxBXZye#R#bdM9zS`+D$+=QVjw z^FjF7U=M#-2n9&%XZHG-;O&6vvSFw((4Zg#5SE8YJ-fc*xXLy%n!8r;N`B>Z5S=NV7yn7xqxuA6}Z*l6cg=T|BDj?+S0+pEm?ek6diq4{3ek zpm)MEn?CCJI!@(vL7vv5u01b{=nuOrwdINNf5*cv+dRDHUaev3+5V@Ml?#33&%C)< z8aCft;^{i=hx@>rzh-_>|AC@bt^WU8-#_LuBj)X$+W&6-x4QAp-~957Wy2OLZ2yIC zzxI6F{~WC2m1>OBW9^0e|0Btcc}^ZxzJJNZ&v;-Ka+AMXlw zB|Go(yWIF@#_d@uFt{m3J_le!BUI?3Au-=)+T_{yL^RVw<+7j{L+dzI6X@ z{isfrdQ-IyI_7G(1?e;DKdVb){cI_Tuz&Y>^+{(T@T7t>>n^8Sb0p%*Bu zsGIhY+Lil{wX|;1chrBhh}d|pusU^{nrC`6G!AM#;vjw1Av} z&cIGF&9m{C7vqCH{QZ-TKKbeYdijd}~)E@V55T%RPSD5czihr6ezZO#lCsx{6t{r2U{9^)0UX^b_?jN37_tyvp~FCA;&y zQlHj~}$xXVM?_sb@&NntqH&T>0VrJ^#M^ku&?kCzg2Yg;%fA(f=J;x$t@i zEB!Tu$D01kcJj#X|4*N}_~A3(sDD|pqQBPRb!dH14^&=L?dFJ#10VFtv+;A``C&nL zDS9Vy%fqh2q%RLo{b9%FpStnj-)3Qt|9RIHf8BPBxb1%ivT`v!`2Xd$`W3~B+o9G? z?dF!scL~y;V$?q`7&e|Ota00SyEJWo&Uh769``+{=$MR4T&n*se*RkjgL7W|+L6 zxc#?_?27*I%4lc%v^sf_74s5ab1(6yIv7v+#D(3dj~`6)yps&`V=IzZhhf^%AKm(y z{DCm-^WS>iz3*GiYo48d7|F_o{i2_y_g@`Qe_PQhHUwGI7uzAW>85=8i~5(9EBb4F z?yC*o>f|z_Pk7B=b^9%^PyLGKpD%vw6s><9$Ai?v27CC!W?P>;{~zby7&bd?+XH6J zf1Y}F{NKvTh1Ypr-hS*4=V61wirZgt{pYe^a^Q9i> z(>&#&Cp^&V!qa^C)H6(Z3&{`r9`%P`J^P`T!v0IHzsGtnE|<^WG0a%)I)yvskT?1 z9e<8wGv-1;4Hl5fGQ*0Df;`5cV-F7c+<4sFZV#$@|@q@vceLFlW*_8gIT%Q_RsPEfcinF zHhm??|Hk?ryr}<3QK?Se;CQirLz4y^(%_i*)Gi1INPCD+c24b)PH^! zmsHWf6KpGo`3Ef{}6Y=W9`)j`5i8H-S!vtAI;)Y(VuzDE286cY>3Tv zSpe$Z>3e;0@sbMC60gcJnGu<5Blpo^R@N-b0aGk<}Frr?Vb3V@YTl8>x%hB{mYSR^~(j-C{@m^P);ce^+?paecb~Z;3zU z!)rdNaVN+)^aYw<=Zl>pe6RHk7gzsHe(;{RuW-Trq4DZ9-o4k^&r{#dzt!pA&dLS- z#EQB#c%*Ud%Rc|oXRABT^BVOpBdMO=CH(nnHxld<8{OD$utDYt;_rp<4V{j!<9ckx zUiHGRqkFxoJ!2^B^|AAp+vD_ysB7=P$^7!e^!>M;l?(cNr?#S4@%d1@xr%tGZ~Kj; z@2G#fU{>^39-kwam(|IOtj>7ELGOxa=eWlD(7NzItJ|u{bK>`4GLMDzht&`M$JTpY z^V_h-TCe+I^V*+~XUCrdS-F@V>*@Xfo%$8U>a5fGp_}=0;j-N`@2Gz{qK)U2r^tD1 z_&$kKpEw9Fwej=1wYch@_D6loWBkR{|AI}n+4qAVogXfE>h=4*=gKM6wfkQUX60h% zf4Kh)<0%TOy7o?d=!VU_K;|9wA1pHJ^sk%;L8pNtE9TMHNNHxxO9iP98c+G=Z%@p( zIQ2m%e5;2Iro6--8W;TOi4DK`c-ZuJn||&1o3@~i=|86R!Is*2H;UDH{ik;2_=h@u z-F2e=LxrbKf39OOJ@W2+`X>&0Cp@zmR}nu5A2dICT8}v6noYchPrb1I>__k4{Orra z^j-J3;o@7?A>Ypb&(F%mE0Sm9X?%I%-@)pZ*eDa%g3(6w6BC0-hAB&b8kIw_pteMKlBD*s` z?+Pp0u2?U=Kg`_W_+`&IX(Y`2*m`SyX_XtjE2_oq<|sk?-Q%d*Tk7w>=uc6i6ZE6~ zH#gFJ=94G3)`|L$6p@Pl)Y(e%OxK3kYzJ(Y#6jB(IPuG) zZq{K>g_9b;Z=L<)&i~)ZX|T@otK;P(S-CJzP`uSO@uupA?VWJb`9=Nb7m-^1caS{O z!-?&|9(q>t&}~5KSRB9BH9zB|nD9OE)SuX4&Aqcv{O+*!I4HZvWI*6jo>L)eoW@_3@jX zXXVo0_x2Ji`s+OS96|qlez5JpNgd*#PyE<6emg6dluz9JHG0Ccx=!l#@`sJryxBj! z{DWcRA3gG=M}GcI^6mWRh}KcGJ_gheI<<2_){KvC-buue`Lx6z^)Eej`U`8^{wNoZ zZ68i->$A~qo1bwN@%KXJW2oa*?4*ynx?z=-ueoaHwa*Q!oF49b(|bQn9lQQvRQk8$ zpdFSjz5rX+Uhg_T@`T-4CgX2!Z+RXBCGSf5C^?0qT`M~ zU1??>91z?1x!Ck?e)=a5Tugrm-(BU2Im_J}c6@S{JMf~;@9)|={#b%7wexNiE6!j0 zNNsfUP9mloedpqj`ZtP475$yWxsJ>9DA|z@YCYnh*L<^;N5A+%>o1pa$%k*Y(>V38 z6{%N;;iv2GIPc~=Tj7DFcJjOKu>tjM|IfS>^F#Fd+m?>2C{{-Yv$YR&qkcL+`iuJK zMQ*MBeMNNKcAS@H<^G3Fg|6AYx(jU5}=!!05 zpRQO(B<@Ocq5Wa#t*gD`#P(>oc=YP5@B0g=W9NTHwH~-rdH#wGW=p|L?zl`NqSq{miRg{ZzO$Y&d=I#~kys{m)=I z&CdTVZurmt|616cwRiHE`=C-Ey*YkFuc-e(kx|iKbxrlG(;r^*TQWZNLF*%Zz6K}e z*X4O9e5;2I(l<7!{{G1~-*eyU=kMP?`L4@PyXCx-yvFVR17lgaFdy_X{c?%3_Krt_ z^wn&1+oqfLLA|K|V8N`@|NrXxtBU@v=rZ={igiTh;YxF%{rxMyah3Kjhkw++^8LsD z{@3sN19j~B!%?jVHp=_IJ#WuLt9v}LI&1IbadQnp>RUd2Yn`b7P{FL#|KGL$ciIn_ zi)}iq&30ja`XvusOn;bj4)*b{AKhLmxrUaxa8I)cRZf@w*SwwaIXXJdYOV z`D|z9VtyyKPmp+uDGz<|hs$>T-d+36-Z^~nyv^h1u)Fcw`A_;#%umqo-}9_ocwWS+ zt{IQt0eL4J!?xJ;9k>6H!c(iiE26!R|EULhS7eRL)8~o);DNS(Y}MpB@q3VZ*kBKT z=>NrQ|8Vyk_6_-yrcf}i%6~h z>Gf9)tphr-?fR=fXk7h_Lth|%uG7Fy5x&=YhJ9JNnC;Z=;qUMJ?Q1q4yW!maNn1UA z?E&Z8eQoXicTDSmEwvlnC{}gto%kyE|H;K~^HDqMKUzd8`n$p#zn9C4^|;0lWA;M&!`A=v%7MQfcrO3En>kk`R zm;PE}_xH8S2NnI{%@dyK;l%b}!fT!@armI=rF`=@Cgxk5`d~UPHkk4ff7s;CPi#0i zW9hKT(9aHC`k|kCjn|%k*joSpD9_5JZu^($IQ?}3-bsZ0zUj~sf7HM1xK97d|6ez> z9$02~_D`mpj%&8^7#}}q{k5dN`N`9I9$ZX+_|iqUp0MSLdHBkPbH2XJ!B^9deg17{ z<-+Sb|805y)uX*rzm6+bb#3*gbo1-x7xi}qv!Z|Ljr;#~i)cq*?+PpOU9s*w-{=K; zS41bsL$_8E&&8&H@6@hC>LhW-ufuTV_LshMjlQAqx#uR`^V_2bz2@8XZv$Dmm>yHg z|KB_#)i3Q+vO2n;n|Bi7bz^zC_@n-f;*pK##P7jTv6&Csg&{hO39ormH{bH$DVl$9 zV!p+bJnw{W^S}n-V}t7NpZbrtKQRBHm-|;c=gAYld&F^G#F9H{iQ$ zz72`9rjOLF+Ob;F3`eDAxYo6WBw>)@?=FcbQTRh40PWUzt zY(?_wFdTgLAHF>2+-<`_du?^>V^?0yJnZ;0sh96pZvSW6_M2y23;8&`lSiC4vp#e2 zQ-}Vd{;eWn`ZHfQF`wrRK6QGb^^xemsCk~g(mvJSt6o^?`M$9S!+e=tXa8yXk4dC* z{knJLk zV|%S<$UGL(&p6?PLi|Fw$x4;CX<^mm0f`sWFn9u2Wg zFQ>NhntEq(_#n?aX#Te3W2Xq;knxiF`i1m|Er0ywnb%zReAseqqj$b{T}Pf>|2&YD zi|JAR{nQ$_*nD;V!ixH6AF18kTIQJ8=P}TQ@lqrXdd*L5{Jf4V zPM?bA*E*Kh!zQl|!$IeK|7S}cI5WKcZ<9}2>aCso4#qu>s}LZ$oU> z=)(V2dnXY?@)V=~!v&+Fzs4)~A4~FWyyc53@%>`71bhEL2Y4X<|LyZ{VfNVZ#s40K z*?&In$ItJ+z&q*JK7X~ee~|kxDbFhktGXs`*Iy~0?TyzF^N;$^E2`A$ziJWf?BB^Z z)aQeGpm#;qxIE>h`lo!hJM)w0%5wI+QpYc%zkljRd(FN7zh2e9+PnVy^jE$3Hs;au z_$Sx#6~*eTtsm{Txvj*Q2gvh)9rYh6m=*n9;f?;5Abk2YbP|WxOujF(JakmV-wV@u zdB&q2wqmb(;kfg@8n)Z<#_-&T*YwwXul#1Kh>qKi8)Bz*s0Vr{JhQ2*h#!Oxnx8zaN1S<@t#R_| zFx>QoV_te}MXfqcue#%(RTuI2M=RU?Z|OhJ%4LG%pV1;<`n$sF^yLRjv-UXBbQls| z^Z0H)I(R33!$!9aFGb@sj})EOsq-iC(ErQNz3JH#9tizgZTZtLP9LVe?f>aZF&Fud z_H7*pWIQK+)?RJ9{!nexDbLD= zD;Mt+lX?ED{_vJ7U%kmnmyCq1ZhS*H@B7QrSKa-W^78hRpS;}k8PLAc{*peM{Yn3g z?mXt*-*;#ku=>raOHt`xh^}^lDzjc=-U)>5% zO_}+vv2WZ$zHR>_S-Dhx_w3ZKC{|~k_JeMz-)lboME%Q-YxQ4U#(;0zX)b>169>JM zxY_8Uh#$1NEv>8h)c4pR^{^GGSBIg!=hXKfx9W`Wwyz&}@@abxQ{TRSX=mkP`Y z|6g+TD~c8O8x0<5+zHzLhdy(S>-^|1>hFq3o&H6vSWF*Rnw{f7wW$XW8)Tj!{$A)A zk9pMDNgs7}!>a8UFJEr|N5U#Ued>hAmi-F-*!3qPS-Dj7$knfUlPBKJ8o&Mi)M+05 zgDvq#{mY7#@yHty9`o_|LF$cU<8M@4Dm0OZ+yj)`bV!{Mz01EKlosQ0ti8!_PS3`8^+g<;KCQ`};0marM6I zU!lHT|1+AE3&(vv*Yp2LePMOhK8d3n_3P#r^=}nb>h$-+MReTpKl4s;xTIyxvjmwh zXnkm&`SXeS7Ei}V@rx|9H>uKQ$}($M1jimLGI}e{@7vF6__2{OI!f z+hJn{B?0Y{G>)U~?OxVS6{R9rYh9B6a##&i^yN6z56( z=0^n46|_Dy&-|^4`4&&dal*HH*dTmtQ2k-Xhqhbt)R85_j5WXU#N_X8LLIyRhW3O0 z`Ay~b?<00R+Rna{u;;MQ;&h7N<9C(zaV_j##J3s zKX!^~o_A8;{Md?Verkueoc5QC2Yx;yZ2N}$u9|eksMkFE{!!<_y!{&g{Q76|GhMLl z{xdpXdwu6Zq_3#|aN)7>Tw#rS9%qRSKdplf#6j-j%^bMN6L!-g||^^fnoWte*Q`D-*Q7v^c#pAV=XWc-Huv7Oj<|9SlIS`w$9sQ_PC9;Kr}?QJHvPd9qnGTvN!V)FFDN@WV7wY+e30#p zd3#rk-kFbgg%xdAtQX%O_FDO_TOZmq4@WL@*WsUB@?P(XYH^-#6Rhm#r^*K~?=-F` zR;LfMHI8mPAL;y(LLGR@9!;|IOQEg!oM z=_`$^KeU!}Gtav4-mvd`zW4**w-PL ze-w7-d9ZqX{&HH6{&TVEv%gQC_=^7U2fO*aE)AW;*-tf(?dCI1o&H|);iZ`7!8gC= zFRVYT``C4pE z&!~S{p`t%^^ge#@t)7mHO&p{TvwQIy(l_oaoyvqIOwZ2afok8NMji1+# z#p$n=m5cec56g2BXWonI59cqvV8Md9BjKVe9=iO7-_0lA_CM5D%=4^V?EOId6K`j~ zW44n=z0N-~zo>sX^IHA)kb3aZ%~16r4p#I*$2xx5MVyafZ5^ForG5`T^}@Q}Uh}!V zTl06)eHP5a;p^l<`|C{umC5!c=Jg##vf6e@&{&`WVPJiDnqV?x7t$w?t zRj!w2-p14Z@Jm*wK7PYT-26`-_uc{O+V5Z5I-X)) zo_>CAq;~t)J|wI2dT6TOr^xfD&qvh1RpeIm*E~K)FdzC&kvQlz-)zQJ#1E#gC-Q9^ zt@p3`!!6Iw9Np|!E`0kdFMRE;d+w&b9siGJ<>JAb-#>a+(kd(b=tlj@{BlW+`nQWm zb^7D4=#&>(o#z`K`%(CycLjSKU!Q;P3hzJJ{*&j;yMjHgjw{W~D>7akhN+Klar{p& z-Yl$k+P%wt^St%Rv(LXHS-DiMf5_DjCVfdX_T}V}9sj8PPxK!sGSJV%r~bSm+R?|J zPvW5RYR^BrvW)fTv2*d)>18~|A+HX5@WK|G+_~+*J(q^8n zmQu&`Xo$^mig(>{<0XFUqpyaa^k;RH@3o%cV*2}M{QBYLPkwZI|0a)obGxtK@eTU2 z{m+>6SNZ){GQO~?YpciiU*sk88>jzpQKh0k^J^E;ar+5^MWjyu^na|pE7+0migjnabiUpd(T=X(6;_AOc;vA?@IGaJ`gdh| zWn90A{&2#9?oWfCKQjzWy6O6Tp1F&B+x~}i9B@PZ|M6V?iegpQ-ign?|5clLfy^)J zKT>3%hj)e5(UT}ik12jeAv3y`p+*iYW3Iuv4U^= z{f3O=ogi^A^;;i(HT+!s?8nHrJg0F`>lkk#{o%GF4&D0q56=#FUcS^DfBePEsn7Qo zX}x@_^8WMtu-zK!_*t@$U;F=GZAn0O7gwNWm-BjJI|6Ca8H%|YtB2ufr65*3? zs5%e_y(_AY+yA3i9ls@fwtuTbp3^wEnEw7%pPqXDXP&yTf9h#}y78|2UPK?hw`P39 z$>j-%=WV_X>7#D#wZF>uFE#V)?>nq4W%?)c;y#w9M@=2#AbprkT}Aw$?PsaqDBH5* z3F;YA&nZuldUY6fJLa&TJoJmDBr9%rTF2ghhqWF^Ka8hHev00S2i>Tj&X4}0{$)ke zU-QZ1f3)FSowkhQl}{W@{nP{Df$4tL{LEX&p)PgICfBqg{(y;rF_V~)# zYj*B`{?@EqY zf|iFaism2g=HUmef34?1>S2RD{GqYbwRe5*6MKeDzwoN>zUJv`sc*;s16jG49vhX{ zKYY+rU$G>`75Qds+ZR|$4IxgT ze_llD^yhosicZu+hryysy#C}ULHHp3g64M<^DRz&Q2EsH*kH;_{FVFvDBs?H^Q>HK zJL}W;U+O3d>z{1@s|v&V%++oQGQYU}m!69L$vohDr*&$O^M3F_>qG0}?}cfeji2;~ zzgNBBF1YHye{|PSSnHBQFInwNKPQj#=62p^EGrkkgWmtYr8cPJi?_4(P9CTC&7d!i ztFWW~?INS1zbm|PdtONpKCd6+Yahfx?~**_p(i}hwm0Ud`S7V{NIiI94}aMH#98Y- z|He0lu*s@t{osS!QjgDHd=3V!zeZLr^dnYv?Vb404SqU5>PGzsiVV|J^U15+e~0`+sD=K1Q~~Vp!r9-dH6y2-U$yRo?;yzf7p7P!I9IieL8G?;3>bn>CP*t zXYaqGIu1Bf?N)+3&x#U_x;S~%RAk&#FW&E>{)0tEMSsobJ_gLw>f}XMXFTGdcSUsE z=Py^9_Ixp(@pFyqJk8HM$p>rvu=mhjANcl>9lZ|L@f!*DU0ED} z+W)knr?9GP@5G00*rr=Dzo`FEk%1n@cZE0ZfAl_<%*X2FW!U;B4tghXv(ZZtKWKHe zKk}Kc*;;pT{b852UtIJ2*S{w0_`3Vz?U#0^u3i5yoRy26e_K~>z{Gyrkoh>_v!*Y* z{|vlb7{r-x)PJ~$)ag$jrjsk!o$cQ04VUh$d72=6(0Gadybj@|nC5vW_05kBGGA;^ z{oySOKDFF)`T1eHvzMQJ*X=XC#%=#ImX%B8``?!Ob3xYhZ?=w8`~P2UVKUFC|GdIe z(O>KQr;k7BqYh1H`f<{~`LV&2m-xdjAG`hVr*3{c?0xcUpI`fR)&E~;X??KA@4x6n zvZ9VU{vv$-8Yyxs`n$p#_dl)#JHOBC#xRMaqfh+w1;PWZ9`n|Dz^9%eaUEx2{o$hL zFFX18TaOMOzkcSI7A(=AzJ2~0$jZg^=<)xvmF-{qK{x8B^JCs|`=4J7Xya)!X#G z3w@&x`4WvfI(bw%|EB&lFX}&9WYp@P{{DqJ3DPI%HEuR~DdGohUOG?nlb4K_&V%uK z`NL;Ea@zVA&l(J$`}+1z?)vg<@;Ki2Ls_{P?zx!%f1$crKRPew$^4@JW5s|K{ax8c z#`l=;?RAt(+&e+yVCu(C(fEnZb$**q!ly2Ibr^Pd-vbBz&(H>;`LMrYms0}u?E2?Y zji?9M%F4x#d)jI@1k<(ETUU8bkve^9=YsuxhnElP^soH>mpTJl$0^PeY<^fUoj-kn z#MA8oKM3ED@t6lT*ux)oy5@5azj^mLVb70T^6c9C{LyP3=O5VbfYwJlD;J&zvEuWT z^GMJ;L9Tx$AB2w`^)EZF=&$o=?jZHxQ{PZ|#6jX_qe~sXCQh9S!gpD@RN^)Mu-2Lv zF7@CaFALM2J?hbSoi;+AZU618T+rD&{ru(BuP9b$?bQ!*UrpO?=r_Trf0h!0&s9~VcL&gZm#y4W|-D@$(-XSeStdm`ImVq+WZ^xh~rK2h=bnABYw964?nLf z`iuJKMTY6m_^$9q|40d)k@sKhu9x`~d7x_S>Lz@lUpY{oR(=zS8H7{-XY+qUdRS&1>$Ql?!~U(-1%X z69>JMxY_8Xh#zELX+3P}8B$Nj>E&mf{z;!+`<82_Ea+c;-D6*K+p5o!XV>43WaU!X z?wtA+#p+tjee8dElV^XzBHx6i+fqiFwsk&HuMS+ZEY^#6Cr zZ%G1uMg0c~Po4gi&%e}3kb0okcxvP4d9-*VD;KsC^J^W;`xotS-cAF<*WWfjd?=r` z{&8nLNL~B>aWpFz&U^Xv|7V@V>()FkoHzE$XTItDqW*(fTx#{-qKJ+=kFU=k`)%)v zXeXZPmfG}3zQw8A*Im!}PW&EJp4sZBULA(bxBJqZ>oy(?Tfh2)A6oS-lc~?~za96C zW#xiiDM~cz*5HxyouFNRrMBr$-!0)q{fCN275z0&STSC*BIe>TR37t%hYd1M5PvU( zZ>Zx{?4*ynx;y;%qK#g)>@K(L@W>0JCtlO;{QmbanHPENzp+i90pT&eSW%Zg^*h1l zo7GPI#%oAC>OWjOvih#DqV0-x$8UXT9?!dX1*^kr9w;_^(8kq%Qa^TzX`Xjd-~8B$ zX?|*lLzX<{p2^oA9S-}|m*$?d%{=PZ@n>7>gOl|7&zAZX#ftM*J z#6jYzO@HZritVU2&t>I8ycXGRTuJY|j@1t9K70RGJ8XA+n10mP9^U=EKc$ZC|C7AD zJpFLxgVuMVw~W%!-Rh}s*md)Z`p++F)avhxXy^U4ZaY9{(7PgQTpqe8!UL^t61RD~ zvRp?`>Mf!_th>p5JKk{Ig0S8%?mu+&dslnSxBFkUvvTp^I{*CoD`7|doIC>g9}4R~ z*Lc!*)PJ;yRPMv{ownL;Lt*_J&bndiD?9)HE2rUY|IK)L^P_V% zcrO0`axxC_ELq@H?tfUT{}E+{ivCUt`(Z)m#kdtzUh~XueqEmVwJ+=xy_0&C@p#_r z;`l-3hpqPf(7{$1drar^S34^g+fQp>cK@-K#zC#;o%mVPr~Usayc9LB zzwd}LQm21hB7s)i14RYhCn)&;F>w z&xV*x~*g1{{WIYcK&Bf>w~+O zzrSmtgCY7TN>)cV)1UrwtxH}@?5KaEcvR8TN!%+kFVnxRaq20L`%b_InJ0+97s5Bx zaVoa@d({h5AN>5?ho6^+HTJmZ_jk7sWIMOt-;L?Kz?Ry1g3Mb{qT}>mU-?dvI^?Mx z_0J1Nt^UJBblh>ID@}WynGVC^Z=NBT>R>z_pE%nSc8Y19cT(T{*dX)62Gt)LEBvf+ z{{xSQncF}4?2o2g?lo@Te+_5l!uy_IC%*f}w_x8^zoJ-Cx12|U^u=}f*znA52tVrI zDjwD8U%CFdWJi8m^1Ty;*L)W2OYD*9_(KUzc=V*4L0 zX<73ILF$9viQkZZhQtpqMdQMB#JSTI?sPDb~E|D%<^2q*ZB~y zL+hXUQzy06Usnfza6esTmp2}HZ&>HYzaD(mlt!FB_|Ph>HA*Pf23ek z^mm0f`sWKWFYBW%eygjz=2hK%%j5Y{H2=WFe2XV}-U;8v#RlPHgX$0Ke|Bg0u^mUk zhU=aBsRIstsj~f#W#z*Anawv>zoJ-CAIBp>`m*02Y2512UrYE=|M|tEI{nl4ar!mX z`sfAv#7|!!Jdo{;y5^@}tw&wzm`%I~KYVzLnRCzj@l)Z$vu~Jp_xU?hhu@#3_42FB zk6#PmfmzK$KDxoLn_tv_v?x`lf8~2BSFk(JyLW{Z?IFSD$%5zydMAGCr=hm`;ic#` zZh3X~Li)q3$5uFS*#lZ(`wRCwynpS^{r~y@i&j=HY_EBF`;A{Lg>9HT&yv;Ah5fL1 z60!bu-pn)VKUOd*`cr>U^5J_Yb_EB;-#ktbK4{}>UGuw%`4*=SQ2AC58%%kLKdf`@ zx!>9J^$&&(4&49xn?Gac_3iklk(CRt!~E;z@828PhK!>qS>yEA>(H#vgwOo?`;L_H z26SAP;5@P6+xQLTQCE4*i@N!i2T#%bt%>;-Px8DIzRe38gpUoXKkPE_x2ZdwvrgFg zl-(Y@;_<7gWADF>tX!C9eu)15rKNsFv7+9}H`~c0JO1et+x9=X*vv2LUoz^pe}2aV zKdnRm#6j=6$AJgJ1MPLK^D{sFYdz{x$86#~_~E!4XU4xjIx`%*(WVz3{&MH~lf$!e zF&zff-dewZq^_bwt84l~H|)CkMg3h-#l|hIXfxjYB0BDKlq*ep-}l(cYhEdF^aHJ) z)-`{7V!p-c17y6^UPynK_4&QuaPV6%3*i^1Z1TD!)8>WU@Bh)d%j^@p=Gpfzc~&m;k^i{(`Zu0`OZ?17*qya^@)+IV z;inGsi~8q9Mn!+-(JG?jwtxI7DzCYHH=l8=56#2h3)4LBq`vvF6?@eSdwlKW-#)t5 z-C>Wr_x<$Dxw})x&c6+7eXyawKg!h)In`7UtM@${_PhAc=N|dmG2TB- z|F+fxSJC_5(vPCBI%{DSN|*`3#i>FG*$=XW#c zq6iN(-CL4peUqp4Jh+(t{wb%QyZ3e{|Ga;-q42HUUjKW>K^N0uK>OH6??1+PiW04^ zy%Qh5gP?voKjt0vFGsR*HJ?20V*uZDZ^$@a`NYB0PdyMGnASBv^VV^wOC7U`_uz*! zX1?~`n;)1G&N%(lj~;&E0Qnq$GA`RMHay17Y8K+?hFv$msQ+M5s#gDA_dj$+c0BLK z{SWUK+dDz)A6*p9Kd+mIAGH3po(HLi4fgPdO}6>L%Wqin)UfI7ZNGNlw+5+a$3KHv zx!CV7HY~4y9!T{QR@BY$ND$l3ztK;w@wOoIi~0{0o{IiXc+Fh};ir9|4{^|IzS;Dl zh#!Oxnx8zaN1SoZCSJp*UVmTyzO~o;&5Hefk8icskM4bjI&A;x{PObj=WljS$H|g~ zI`Ggf^%L*(bL4PQs-i#h$ct!a|8Dh&gLz5Inwtw!A58P`_d@Tap5A4_oR7`_$Gx#Ok~r?D_|dTmNlg(r4WM=M|oc z{!Vz!$%63hI3*XqcY?&h)NkW!-|#@I%lsNr*Yb26>el(GSBK%fJM6givKKEE22PrM z)fG#2u78uuFl~RrIHte$&A8HCbxj_+!B6K$e^LLDqDrm)dlu1g+kak~_PNhHv8e~9 ze(R%7f2{-0>M(EO;3E2|7qOWdM zs_5?uZ#=Gd?Z56|i--#d+ii|G&heB$~0KJu+UhSn4B@89Y#Ga1jWe;(6uz_(0% z|65^2-5Na7JngG;{-@O)=kr*9-%(|tqCa(}OI}3}jeFM}ukbX^JVAJ%J)i%Nx;KxL zq^i=t104_)v4Do|hNihdH){h4G^^0WO>VK%B#6=|O9HaVRstddN}+(L2n8adA{2|# zIx4NGpn%jMA|hgo$mUimASy^XBEzsazleLz_e9l=j>+yG=l#rk|A;5g_S|!yjEu~T z%#7?#&`v`03Vt;eGcm=eEgCpw3=@j%9K1c^*2EM?aEP-x9a$ zAJR`Qxje71?n3{VQPbZ^KKCQ})=yXYl%F{0oy1d{{?OavokjCmzVdmHb)|Od*EsWp zLrdBI`5>y{Md@rHDTE7d!O29aMtU?WZ{(M z$FW%B^w;m9d8c{wm0+QNUS=4tE3LwI<+i|ug6P70hVW@#``U^5R!5$q`3DxAZ}Ftg zJIO~kvlr7Jwtd@8FMn+P9m4i&Z}spI@4Xuy+yA@D53asbJOO0u%_YvZ_WbK4()jsr zN8+rn&_8;b^l$GvG5?=-{2xqsTVi{c_|aee=w;Z;pYVk(-}udocbU-++n@4=->&nQ zW4*?0|If2HOz+j@1dz?A{YX}|wYU=>x_Py^pRmX2-_45AsQ-*IS{@649Nwv4QLOsb_%_#k`pgsCiCySF zSl&u?@vih1?|*y!|Lo%GPajSBd7RGnC%a#$a~cPi(%(P*zIp2per}KcwV%E0)u%T- z%PXI~{~gTYu=D?$==-mh`W3~hw&?Di_}aS(vJQ|qcA@`JnOW0c*T?6`P-)wk9pxE4tkf=X^~fjEWfX;uJp1z7v0tKtLD)Ud7$woeB^uOGlXvm{b9$4zp>{%KVA@ayn5h*2X6Zg zeD?YKSQZE8v#hUV9?7b2z2>#`eXI4~k=)eo?~|Wmnx0xu9^a!jom$ERYCdt$Cw}-q z@<3as_G$h;olp7Tg~x2-J@We}-@L)V$olv9PgywSqAL%39X$5?_pvMvdmh+TzyHuu zzw(P!-x}ZaiTO?IEA)?yjrymb|K`dIIS)pm3L|VVZX;-Gx*n8 z8-@cua9B(W4ElT zo`1<}pD%uN1#MpK$NZf|=UbfqK_~gfhYhBAi9fhM^yddIKO?Mn<2CP?`->Jl_Widp zJ>>E%Tvcp?_p((x1-}OpjdnjZgWBgFf+N+x!Dr9G0)^ zpfB_HNuJd^wU^Q#`X5>L&-d1lx3Df666Z=-(}G)$}B9u#Aq|k5y;OPx9KwP0U9R>qGO+?-reJ zari;aH$H4I%}e}Y|HFQL;T>NL;h=*z{KdPb55i;bzdBhQrq?7r|JG8!qF9A)%m+G& z)V}{feMjP~uh4&>^q8Jb;vO6pn|wR&~-JcVJ-i$-~ZdPQ{b`V|B&*7oc|wCzoJ;xaoFrWxmA1ppNpUQ(6P{e zu#D97hi4b5Gd*&#sfUj^=$+)5t$e-m(>OMKhVbb;i|Y?-{`ke8ZTBBnhH1-w;l^!O zUWvNK&wu~duRnb3P*#kZ{;u>E$McS$>DUt6p3hYW;-Gg)o!X{noOKb$wX{z8J@w!c z`spkD@b+oPzB={F@VnK{eQM^B$I_3Tf9qs%*zspAz5YD&DN0tgg~vPbp&PdGX`FtF z{eQTuXw*OXJ)`aSd0AD|X+Q9R-j&gD`nqUZK0KD+niw}f>rU%ae^Wf^FKl$s$mutI z5ts%RgLa zSFd$;{cT+b`s61qT7T-YWZ>tz!L`0SQb3=D{v)Nw^iS5sc|7vz3&fwIcO~n%{U2V% zTE6zhhdx=PRBf&_5EM{Tz3V2(D?eK9)B-P>%5bC^J6Ra;tMn8 z4Xp6Oddr6mSN-Bk^XG5uwa(7}Xn*kA^`Db@h-b<0npgY%i~7_3tI&TmD@K$4e2;~7 zvA-BL=;>X_I_`c;)&(C(U!eJ^bCIq(-YuoSf67i|~#?KIRm@%vr~PnxIxaLxCQ zJ9m%#*6`(ai~GeTDw6^fc)&*+qO%ZlRl@6F+=>Zvz`l?>GFtFs<`CpZT#Bd+~+YoqabuYp09D z*3bOxq@B;W9Ui;>!&nwa?eoWu`oZLWkZ7TklUw%t-#)Rc$J0c+&_6QO^oK{0b(!uu zzfa5Y{51*Ozre}WDzh*clJNpL@!ei%uMzc709Q(O?{kN9-;a3!IwKYEd-ZD1p z0$E?7f2XW&)c^mh&tGy`L)xG9?=%jg12)*hAHq#vyK?fk4-UJ0{NO+Q>wDhmwa(uE z4rOte{?qjL*Io64%%dpLamSzSG@ra&?Uvv;{kx^7roSt_|HSie^r6W7py`~%&F^A6 zc6?n@Kl6mY?|psijZfU$zw+7_e*KJ(FYwB1&%a%r2V6xzf9a@SQLJj~o!q9cHu<_9 z_ABCGvHuU0l{Njfj?eMw-*~zc=OYe!C-Kx~UXHsKZ!Mb7@|DkntP30L;SYN}cw7HP z53U&IUA*C62VZ|S{C525WO1<0{0_bTOjrGiVime+-)8Il=;ob7$YZ{7uRjONj3)iP zE2HDyN9ED1pMTIk_}l?qL7P|m!`}j`4l^lwSL*#C#hTTS}6JCbL8Xr1x7iE+#86F=)q zkvx$7%_n?Tr+(r~>SvzdK5)TWf4cv&;`=A_cb@ZFyRMSG{z=!jvR?loSx=S>>qqm@ z&HNoHpnv))^dBxGHT_-bEsp=22$FC2e=dIS1c`&GAKS2-#bNoZJL%KldrO^-g@~hRr6n*M!~4-dLf@4FKBPLMd5`myO39?NeD z4{`HrA6D-s<}Il|Z2yP33!m(^!;ars@ao1#e2Muu{@eTfL7iu#c>e7f?k7;9g>Kqk zYNMNX5}}TK)?e)Z3(Jf~{k_zi9_R$RiE+zI{5G%lOCD(bbtm#!o$`55dCXo+fB!n) z{O&4~FFCe<-7oz7q#M@#DD`&!c_53!^q3kiK>R(9JD_A$TkBu@s{Q|OOL@D3th>;E zwDj0~uC%KAwf$zX$!C5;H!*H`=+l7Yf$R_HZGP%?9uF?1KOFV$U+o>vZiVCCGyl_f z9#sAQ>u_0zop%^hyQ9dwisG$r=^x#&)AeB&`p20X^kk(lGJS)E8xDt$Z~Do}JKr)Ne*69*yo&i|OL_g&@5%@|`)F0ie?L+Li$Q)(4|EubX<7S|g5-l~ zo%uV9&bK)Hpp$&#!v@p5#2-4p{rRf*T+j_~x#bhrjlF#U9y|Xqr2HV?KkuqvQLI9z zmim)g`^p7{c_IG{ZOMiRa zCuY8(0}tmv=?khEEPLoS{1^Zg#aC>*-dO1E72sp|Ld`v1QiZ$axL&*ES|7OTERU;c;D zJISGrIQ~NaZkf@fKd*S(7Pl*S9?&9ycDh9RPWSw%Gbjm zraip=pUw|wgtZSo`yE$bRK5N`RO?3*oONcr0F$j(*DqOx4o>r#SFycQ;?EOXc?$gp z%1D#`wclSmxno5t~b^669Ow>bVhi^KfX^+K(ut_i~-cYkJ=e;hm%-o4z38_oW5 z_4y0e-^khSl%+I%ifW9#*m_|ANcbu z4)pU*>~u@N{B9cC>d-5f%0mC4@|NjOT(RAeJk~|OApR6>-h>Z-Ex#r6)IRq}>z(Am zy zX;poBS6bs<|K!oM$EWc-@$AEdX^-SYUc3S6k9`j=>Hu2yOYwz(N2OYcLx?$bH zy*K&%@OJRo`Oj_^haK-%TlDo;ZGK&kR28~8xwZJ;KY!PPvZAIxJjw4F*)I$m^z>TK zy3>B~gXFWHm|yuVuDr&F4fe@wcxQgdem#)yMqZMdw=_JwYe= z#)l22d5J%q{HN=uJ-*4o;q=S*=p69n4ZX(g{V(~7`Qdu~pH>zJ>lUlp-&338fc2S6 zM*4hO=)bUx)b!VNa2~_-=qOKDQS*p{-bp;Qtsmv7#Z$lX!)KWKu|b_b%@6B6w%4~m z*YCp2z4sf~_xahxdH&^n7ij(ESsXm>#j0;vAG&!bIffmv;VtwZEj>;8^SYjV?tep9 zRu}bSg6(e$k`K}sX#Sx^=UW^-K_~g}cx*7uOZ@$P-+ye6{V$o?zw*MzR+#ep^{Kb- zpO0m6upai;6eVjB`@hcL`1^PI>j*=kf1I&V|3S$!J+#iApPa;b-NyTP@jnk#X|phtG54Z zKELBY7keCbl~;L)gI?=X+wQjkskeA8HvHySe(y96E~P*C$y>Z--U;)AKY8v4?|E)V zcsc&tyt&TD@86SoC0cE5U7Ck(UTvN?()AVk$Bah()9?RuvN$YXb)aw1N7ixg8_=sM zAKq@Hx7X3>y5L<}fB5{ZSDgOx3NC#0+xN^J+t6NDa@;hXa_wVv{rtPS%G`h5oIw!t~U74-OQ;Vvt|!&&9C)K7;o$f~epGLoiIv;uM_sKl)gVu-k zZ~lDI`4-oFt7ks^*kBKTxas4UKmYV;_lB=*chAT-kD2GS&R%~GXK}C|zu8jm|LZFc z{p5?b3;nxgq^3W0gJraNKQ>*4B(HsfAo_vciN6WSOVRS_rw5PaH{^wFHv8U=13$bX zY;*1Uhkf$W2gtYMe>aPR$vO-D2TD&(f30il{Ybv; z_buV^nnxT={n*w=p2cDL@b!t`JH^yqTz^u>JmJAzKYi-UXLiG{=DphA`dIb*-y>x{ z-uHskp|c_Uic($ZsJ8w7i+)>@K^z?m{RhiPO@H`@%jjb4{|h9qeV!nFgWieXkbZLU zlb539(~pyU^QU%Gp8CU%-3vDP?tNblJFmF=i4T9Sy8huig_rZgApO|;&93CJMNhMx z+-mFZw!GTtO+I#^|4>;`)1SH+9jAY%%r5e~5vtdTn%5o>KYfEXZ{Nf?eUO)8TIZeA zn;#ogevb{RKdf`YX?uQaxm&|}UwiGO@b>HAvF|^RWpVtE&p+`07tG&LUiwqJ(0{m$ zH0jTEq|pVv3|(1W-9MJ+qPu$C%Djr?fyS4_(Z?&FA$*JJXP)r0v0p!M%p-&0Z|=hT zE<5aL`0V+&o5fM>U-9$D8^{ACtJ>nH-}YV-!%o+SUFg4{%&h6J>uYZ+JoHbUp_>@D zyu@#PB>Gx@M|j9Hzvg@8Gh9l4|7!o3cKN_NpY30L+08DV`;`^pv-8hmSsb?i@2+-t zB8pY$mUBzv+84U99$t5*cA@`BS=ppNI-4FXod?wU$ooHk{iWltxcQ8KJl75HJK|dh zzI@!3=wbW+fUXamr2BtY{a~_gi55DlpVuwe=wr5%ibDT|<*g?D+uKWCqyBDU-0~8? zSDX23&u^@!Gm(#ct50~A)F0-K+_(FGo-`>OdDn%PedO^`bm92V>z-PF$-G%ItWWfx zBMIIq>bjT@oeKR&%M8;W9bD-xzRwT@$>;F~;!n}Lqz+!iru>xG)}?%A!s$D`;5X34O=w61piQ9s8a`YQAvE2&I>^1xOZ zt)5?P9@YrSe! zKf~{j2C_Izk4g8J6k~;UiYF? zFHGkppY>p;nAUlx^V1);Vp^ZtVWVlcl-GY2mg=~{{?Bm(q~0g-Z05)KbFPe)o*p^rD*FD>92h7Hu=-~)b5}1uDw=x z_Rfp?*Vu8dcRs$t{qXSmlkc5_)Z4t;N7gXNv)4bGZ+hiY2ydZ(r}WhHSDpABCHcLM z|CZ+_&cixE@<1L3X&l=tpP}|=Ht`;Q`~Sgt*Z6OLe*RVP+vhK1SseEMx1)AG5yh$= zKW00*Rr~+JT>Q+(dJFx#Wu&IRE4{_*|BVGrr!0UMpQu)`UgpiS3Bx-v?eiC1FMJ?<7WxmC6~^yMYaIUq zvB_tDF@%3a^4dca^R13NMe}zSop14^&O6CRH?tShA9nj||Be54=7nMRb>6V&d#BaU z|7brPAAFyl|H*Y;MX@TKQoHv5?=7uoJ!%*F50#Oc{+gG5j&JLRM-jb1?@HEjubW&n ztNo@cw&geQc_(=_B)=)%;16fLa>VD3IP1l5_Q??n*O@J+UsC=LCs6o>vdlI%m?BJ@q;zL^1(}<*~GP<)DK_Ce=+Zc zv!-QX*B?Cf^3*@H$g}GYur9?s%KuwCPegu}4E*$kZrC+c{zCtelB%Y^E4{`3zoQ`i zdna~^#6jYzji3Fg7T3ONewW2z^-Xr)f1ej-KL5aWyXEzG4U_r`FCMi(WHNxZ+&#js-jNkBQJfwU^L&Y_gZJ?KU-NG9GC3(7ajG3nkU|B>z&-T z-`}Z?-`0~$Ua|j=mbYs9_etD4LDq$BnB?)iPd;e#YCis6nASDTi@z6NX#Mz)Z)#2d za+tm6{&(GR&i>Td@rU&&=IbTr|6LXbbz)Up{C55gzLw;qkNZOZu`*KA-${O3k#(`3 z8zynfOZ=&h{_xgiuWi3!r@g~L%T2!M&s(41wa$(|@F?2; zpJ#DcpUFHDEp&8ptF37L=i=x1!MgkVPK<$?{>sn&NWSqgzanw4DGxt+pslMTeVU(n zoc0ecr9WIg`Q(>QeD2i2&1^UE~4f6UR&9`-HJ^BAE3}?RfOV_Wy#}GMmTj=jfN0a{iu84fQKhYl?ELn=zOGgWm52kgg zA3H_wgwN`+n~;7Q;$h1tj@e}P;jJ+HEuYSJ^|QR@+54|<7KiNzE9>?5I_g&xtI(~* zEsZ-tJN~FmUb>D#|H#m!e_OBPjV>I&tUu)^4*JB8ZS(6oEMINn=I_hmuzIKVQu;&d z)3;8$>{s6nTkQ1qhmZc^2bs_GA5(s?qjrMgt#6HQ2d#68)b~xa3;pxb)1-gxJUBev zvZ{Lgk=Oo&_|XsaPW*=S(G@>=DY70zC;2sdG5ul6@Ag}+_yt&)+P&g{r+&At*E)_r zJl+kb>E~bFEDrWNvBs_6XQ={A(b zVR7xJR@Wza_zjoP&phF6SDb$HuT~xjhn%qO8XNDkfa8Llf9UFb;AZjvU*=ffI*k_x+V-$|7+?S_TS|*Va>Zfe9lGl*YH|r=by*2IPCM6x$*k{$C^F^8rOB8 zhj=SJ7XSCZCq_o&cghbAi_LoMe(Z|hbkMx^p!mrLEl>M4f4=B^i?dG9Nxt!6gK1vk z57S?ncJxU<{9SNoU$yxu-`L;we?EU7%;MnqW7mHhP(RpG8|+hCk@F9%1GK(#$uIW* z!SYs3e|V-yp4E56wtUSa4tkf=X&&>F2ipCr{N|@l`QU}eY~nrgLs=_F2H=t4d=$D1^- z-7odzgO;cL;O~WLolpD2R_w(W*8JcOpFHx5&xh%cU)z~~i5)-f{nuy~hX=PU|NidJ z7jT?0)OAR9p`Y3y-$V9JGE6t}I^r+%A1-e->EE6zd8~`atD*7{2fa(`n71iEmwfs( z9_p0OgG=cT=Wq4tqxLy`LHNl1BL|)K)I8JcP@R83QTg-9<<~zSc|CoRFIn|1aXvS} zPVYDDLjMINOHF?#+`Nu5JzC=br|aKQUR}4%r?z(*2ba(fU--zp58QCTuLr}&FWG#} z_dT{e`q=kxbDalVML&OMUPZ~OwqE_lx2MFgQ@yx93;jpR%$okJZ?KG3uTQw&43oU} z@e}h|xAmd<=68$Ew>bQu<{KY2nC2z^u-=>}9=`VQC&J7felYu+tDfRDZu)n#I5@uM zug3HL1Ly@xR-s#qTUzG??f-Y^NAs+2wF~_hmK9C<^E%4(fCtq1h=V@y+q`|nWW^v4 zWL@N$U-P{ytBO8dxh-%h{r&Fr&+Ps$eXag=E_>_a*Zuw;?@G4fyb0Go922|t{F|#^ zQLL&Tv$ZdDvpzdoPv6Q@=s#LUOn>68^cH>15;Q%W*j^ETifJ4>?T0${Ba64PILvlx zgGrxW{qQwmIOovc?zQ>{?iIF5zsCQ6pDY(3v&VZ*r6B#`#Www&=7IFB>nrphD>F=g z^1!?Z7K8QJe%g)hYJbedZv6jim*lGrZxgZ}LwHyR_G0pdRrh?u8$bW}SXedu?4h%s zy#-#||3|Yp?DbE7{Qo0o`-kdAon%*A{PzEwPGY7TeJf9Y-$^l{QU5WiN1t>*@J`}9 zUfbWAm~VB=t7!hAMdw>Qsq;?qS*O{H=?|O#af?IdUa}x;e$RDp`1>OlQg7Fvghw&o zI-dW$%k&w@;_w>hei84u#}je-&n1KVf%O&o#|-07>e9~>na@!3;RAi*H@&nU@<8Kj zWpS9F{*}*zOX=^QzS7~-_c`R;{&f!d&3!8${dW4Y-(L)7aoB#w=kLk9l2zY&&8z+Y zdoH;=ud}{F|5#DeAD;C6FFc^;!3X-p&pJW!K+`|fHMNx&9_BIZkso&X?IFeAuT2X6 z-W6sZbLHjK+4-Mt76&@{hV%dF{8BY;|KCV?(tbMPXWfPVtP4yeqA$ zzI}k$4?=%1G!boQ>aD*m?q{+RrxK8S<#Ggkd^(%^1 zZM~D*tPkC=)AjK@Q0U(&D^34oUA=yP!MalPu4Enex(QxI^3&_P;s>pN7wLNHPhAs+ zo3s1(+HuYq!dC4U_WYalY3Co9U*puLNWZn~FSR6-;~M=H`ghBS>F-LbuwA(=Q0vII z`!SE<>T#%feD9rnkiJ0ty^wrEm&}W;*o!at6W9LFPyBp#*ki4gzW2&5-|$-3sQ)^8 z{mDFw!`82L;;rsSul_c_zt;N;KlSuk=s!?$)byuru#Aq|k5y;x7w^hwQ9l?VYac5J zKWO?Xule1g^DRyvpynGNb`Ku-`X_Dn`(2i~akzidf4;QWlYK+*nEqqZ53i>=PNXPV z)&6O=_ObZqKTjGgYis%|51;GOzsC=v%SaYS`%{ACgWie1h8^*fm!jn}kCS}!r*@i` z*kOzLA9-ozFP#8$QFMx$@3Fz9^oMs3?ETcpCmszaZ#I43Pd$4t zy!QIPmBm5-`F8u2f52&ZO5AiUUx{tL^BM*Y+GU&`-6ogY4s zcxvMZE#ArEU_It{>IZc`kG+@(U)XW4T{b@cjaP)7=Dl@r&0Y3~$DV(OvN-JbM_cOm ze_HBS6sy{LC%5UJ-#;=h$o`96=s#LkHtMg5G|`5SJy(>DDZ^sV(TUwX?c%xBl%9?Igd zQ7R3*F#P*N0u`A1j*l_Y0(-njSij@oQfD!io7!b>{CZI^W{yJWld$UTlzj zY*787|M6?Kc>bh^!fwa@=$L(0uYP~p@clPGWzpZiwfS4wgyD6*+V%H3l4f&%S5B!`B?9 zzV`l0*O|W*e}Fnl9+)KqKmDT{cDg?FEA)>u)%15V0q-MBk6ipIA92t-$uk=r74d`g z1Dc;Y<%8GOwS@k#IpHx-*2fA6G zE#*~YeTDvc8L|0XX^rDa@{LdD#fA^`u4HL`{lk2gPk+j5e&%tJuJ*diFQGrYf8pK_ z^&c}SoOkPm?_Tli)%72aR{m~+eEyiLUs0?=C+*K{C%4+HM{V{i^1(v?PFaB-p8S+| zxJW97?e}>!ZGO{hf%vgO<^%EfLh=pW#CbioVlTch*WM51$_F~NyMpb#1*8^Gya{TY8Us0?|e|T6YzrRTR96wlRq5n|H)u{j0WwhEqx&IAa4BL5Tczxoh zFOWQtzS1~0e1?5l9A-PUd-%iQN9=#zvNsQh!*_Y6J7d#-q24}!&y@#UHGco$eDXla zs&DDnzJEwRx#SXOy@me6Wu#I6Ugw`(SzX;frl*T;yI*Wx?T0*&e$w+(R;PSE;iVpI z%Euq}J>cn=p8NZ~;fP(HeDP}s&cM(94^N8B3uX<2IG=yjcsf#uztDd{>8a`OB#-AK z^69sR!;;@VQ;>YnJMkN~)K(-fMa%C?>Rp4~l&AjCd6S#{fs5`B`>(gk3;B^(@i?~g zKiUueTy}rQeqqS`$}8OzI;x-V4Pw{y(s>H~N3vqn^jBW|=wkDC%c^QV@;Hw|KA6sL zeiv!%ae^OA>%8WhA6t>SCJfW2&Y$zLTAhxux#c}x8L+rf$a%maESahngj0jX=k z;6Fa8`1mCc;ibE8eE8JdD-WN)aNP$G9`ph;9n`0`oqux@X*#|Z`j3_wP5P&w;}}m~ zR#o>CeS+SV(dv1@^wayFJkWSM4RucZ9#kH)7tJNWGS1KlhR-uHN?=YO2~6~(HyUj6ii zZrJJiu>1Q4B1uhu__{G{$7j3$)o=W2|DS*Vwch5}{Nni!zY=D;`{`_y)VY|*3V?=O_6&_6OY>CgKJ_8&V=w^R1rw@MD8L{9(KO{=Va9ZhKAG?mh4Q^w;mM zuK(Q4;$Xdo9rfoEQLI8Y$1OqntmzLA{i$8(pO=+Q`tv%<^nj;ZR#o>qanQRmI&S}W z(X@Vf{8&D`istW2>aan0vBA{e5}vU2HX*!C$NzyW4%1_be*ZI9zoJ-+*#DhG?E4RDvtH)o z{wVbCmK8Q%vaYt)*Ywal{3-ghPHp_`N7P|kf2}MI%Xf=s!`Fo2qf?hTXwG4`3R|V0 zz5dK~9>rW<{xm-S(y#HUE;aqp71Z?=`VW+~roR&=uLuw8F`cxYJmO&L$4=4mJ6Rm; z7nawjewW}<`omF=zx3=s-gA36u5-y5Gajn`f8-G5ADK8$z97E-A5QmQ$*OO?6Cc+t z!M1hjI$3w2|6p0sq(6T0ji*~y6?Nz^Aa;A6Ao-x>X@2U*PBE?XPWa7_t(exQc3AJw zBez@Y{Uc$<-wxPq)=Kry-$#`n%;N_zXPf@H#MyesEkXKfx760S>4sh%i5L10mABB@ zyV5FbS8j{z59}exI+l9>XI@44KwDocnU6Xreh)68pLxQ|KRGb0dFx2{$2s4BdQQ8# z{!DuPhf$rcqo{p(Cw98U5273QN7MQW{fA4gn*O>zen&?C#*>#-)&5T$^sbDKd;No6 zisXTIzjhkxocKKmA2!&-ALbnS-QxPI)5DxK4t(;Rr|u4)J^$ue9IP|HwEX$Seox!` zfv&{a(zn@8ZrS-~`e}(DUe;aczo3jX>c6YhkfNv%eS*b;{?# zCG^ABzuFqVS?!^xU+!Q1Z9o0TF2@`SpPhdg%Hpu^AA0@%1M@0MR<-r&r+>TtA$@iv zmpI3xLjRF6Qq$j+-s1SPgCPBL{~0PjanQS@jy`(i!)rX$DIdJHj>J!06WaH$c6jk^ zFC6~Yi{P{GpLDV~(8u%r|6KiwVpWfSvz^@He*>-0miXbNze4|oWu&IRuDf>qEA<<% z6JGd0pZG0b>&XL~;_%dvK6D<7H~5(+YS*LwPv z)zr7fM?U@Kl1m&c^dBoDHU0Y}zvuDCNgjHE@HZiOAnRp)=GQ*F&SSWg{&3n)4*beZ zKb;g#JNbo|9{$k*^vCC>wqNC09DdFC{|k?#^GdY7C6E0U{-*Wy_q{ukH0r;j%wc*s zv3-KXLE@>6zov61i-Ud}>)gaVj}7*?U&72^yt3oPGw%)Szjvp;HM4D)&whVIAK>cp z2R!LKSu)@O`QIF?gOC1lAt>~Zo|^tn;$G48$kh*O+$*;AdynLU^abMYh2$GDAM3zY z?8O(Re)i4VZ5D0`YkYUVwLW@4b^Y@rvp8)3@2b6;UVo#dHpu*n5-pA+PHx%t|H;e6 z&*OkT3;kn7O@FOxcchMd_FF^EBMzEg8c%KVK;vsA^HCqWtk!3Ii|J>caLs4?xBlT% z$A+)Xed)qwrwx0px9|UtWO3Af_fH>+l2vW3AC04%`CH1XNI!-C(X-V0JK=>7^oig4 z(Ei8+o8snIK6sbZ&phFiU%TZy>zzFsuHNt$pF8K#4dJox|8%oB>~XN9pZ_|kZ||l& zx?c8s<^v1;^D?th|3t6a>mOHE7q62vul;?oSue%3+&Z&Q9gvAPK$Gwivqgnk91AZs||MSnk?zxYx zvEclZ!i@bkxNF_Hqn^II(tqvyZ%x0y&^oEAw&+`X|C>t=agHN}{sU#CQU777v;DCn zw&kP$0`a%cpO|lTDGuJx?1(0{Oun4Ye*3fq<2;<&z{Ao=uV z=qAQ3FY$Y|4O>|pmal!)bfy2azQGS)6NY!*df3Ps+dL88wf}#$XB;!ad>sExhXLu& zuk+u({*z&+2H*EHI=Hq@V^dBlKY+hGdg{|x3dn~NSc)Bso@`!`pN!)DoQp67$ zZxW{u_zdCGc^20n*1h>X|F!SKkA@kaJ%846Q>ydN2WN5E_a4U7?kKv6C|0%gPHxjL zx?!j5!!Gn6E-Ov{WL>rE@6k`Utg4@Bl$Z_pf^0#$VXr>|OgO&wq9Ab$|;dI(z*yn8jhAzpSC(f9|RuOztO%7CNY(>+oZt?GFi(4|*ql z!(8o_AbBZTK7Bi_Yr>{H^@mM9wf>J^cYZ5u^7y?|UY!0>)?uH&ca&ezu78rNA7may zi5B;plUsKFS#A8jD{>9SDw$MK^*7T3w z;{C7wj*0c~xZ!?;mpJIPKDEh*$Knn5kL4?$+2kjF_?j?mb^XZCHf#-tt^PRw@=O2y zm-K1pp9ixz?D#)j|9`5ZenqjW{oVR<5>oGof6^Vo7 zr8fRX{qro2#yTf)--N8MAs*JbHedCbT~`V-Zus>_*LwMKcBHhJwoXxs@}o#wG0n18{!YpJNekNR=q0heDmx%Fa6;%ch`*hPn># zB%iJErPp85JmO%X|3F#Wq<>rM(1*GdiGx0^QyV|~yXCjCIE**VUrZamCJZ0F@ZlSu z``m(X-R?(!d&eJF?|-{-dix(~dwtRH|2vXh=%#&}?c^4^d9}$)*H`F2SXMOZpPqk! z57hl2U9ZrU;UQnByX{8?WdNn{&3w_KRESMgSUnoR$6(B&_5qtnqGhA zKRI~Azu$F9mpo9qs%dm*#TRBgdBRO^`0E$K1~(u6@CJ7t;kC}Lf7Q+6u>F6P`2XWa+i|C(@nnAS z7CJh)Wxu~BFIWFQg7jJFKU{ih`fFYKe=gQXx2&q>gAeqsj8?~E?@Fu4r=M;#tKZe< zViPwW^4-KZdA^sQcuVTTo@4v1`5#}|J9Kt_@cb*^{t!Hk=RZ4A$MMooc_mtHz53hh zi4WWASWhnTLjMKjttS1`|0gmYt*3wT+Sdy&`hk|G^P7KQ(fJmqU(iXu@nM5$Ug8hy zJ#kv!O7D6l%((NE^`G2rir2V(|8p#h!(RXFq@O-HMQkDX#Ip4>la{{Pet8@=??%inRrc45=i|NNJ^>puh^zrWyl0JQ$PSsZr1 zpeyJ!u6BE-+W1=&<2-H({YT4)=?@<`Tt>&~-w~TW=&u)gr*-hQ)XoK)>eoz}sV z*iAh6!`gH2Soymr9VINqy0s6^|FD08)R{g5IzRK~YSWJ*#~Xgs&meL<~-{H(h_WRh1VYiE~f6Zm*+UM%*|E9x0 z7KdM5|9_#QHmLK9x7vCqx6uu~@u%zS?>jXX*t}X#KG$(IJz6o{_8a(#gVdX?{Wj+3 z6M1WOPU9ea*kBKT_~c0+y7sO^mJgS2Ix^+(wI8J)`~D$3iuU_=conmT;k6Fk;J5v@ zC55c7&_6QP^iSr2m;299=YtPFHfVh)FaBPb)>$9QV|LO<6K|M##3$}M|Goub{reBy z_s!#Cd+w%Os(9d@|*6!;t8|BmVP2U=Mi z>|bIPI_SJ+JGs@~QqZeS9*+b1DfExbHT|g@$>Jd2JFyLQetxgezFgwugEqhBo4>Q@ ze2c>mI?4A<*p#RKaLDlC-L+rdJsh?DuD7js;52wl|IsWC(_=IJ{!dH&PLM6@O6}VB z52^1;ChIHo&r6TZ=Sr*EznA*=cP`RY?+egHk@-RP3*KLwpE~99;8Oa-&NE-wW0SjH z2|FGCk&zFFS@7|Cll_Q3JpX^x&EiP)7H_rnPHxwZ{~hrYXMKhKF;dfC*T>_Cd>(&> zIv;T`ork_b@<7wSC4HJ-`+(1IDgEJ`g>CbE@x6p&Q zS3m0l3;hSmiYEOzuR}h487eP%?a!BN)%!K8Ll;H!$Jy)o7EkKDE7^;F@w;eNuLpYg z!-m%$z2zZ$9vU_pyZq3($BlWdx8u){@`3Be-=F@@)|*S5t#{lK#BS^NHm%Q=#uZs# zq5ohRsp;P*`FxH^e!3rcr+JeEEl>SsDbJ^9KT5B>N7@bUczdmQ9h9DbGL z`zKEP>DKzu^)>$gW3v82|KXCWroSt_#qnPSGM!v>*BaJ}tzy`zMz|cnkd( zlpfQcbt&>aM)H{-#Gj&fC2MtDxA~Q~DW5#z*xr?FMW3$R7D%5in&zjj3Bx*{KJ}Hy zzjm2aRebjOORjv1`G@2A=LM$E0F|2O5=?Zf^_lct=s!|27{4p6YJGf;!+MMl9!27y zcO}a>{arLG-MV61erICb{LJGt4mSCTH(>bLzO&x);F~rKj~+8^*3UjS%z8Ng$@g6i z_tf(b318MQjBh>jdjVq#)rSh z{Sr1h=%QQx{`uKq>%UyG&SCdYF}>cN#lh=1u&EEpuD0-ioPYLCa`0orTj)PpX4Ld| zrMEc#Zzo8;Jx{j8@7=_>kjEqO8Y&;W7GFYt|0=Kh?6x~^aZ>-Pd;a~nzkTShtfzMVXEci=-7m;ze#xq~ z-pOsQn@V17eI3c`?>jBtsOhhI)xQ6r{2p9f{mBQDI$LMzC;y*>;oN2WA6w(rr^7k! z>JNVY@V&kA+Vy{ivN&x2pRCtk8c@HYSjB$Z;+DppAi8nPfO z3LhA&tLND?FSTud=w@+PydyS!d8fAWn~lE-oAT8kKH9yrbRa!`$8j6m_W!QLS$CnoD?Ls6`@u3=>1+B7h~GQKK|$U} z!VjA6s)PBp9y`Uf&TGE;u|egxyu=^Y-FWWV$A9cyVP>}6MxX!lt?=0KNA+dD&gaL| zKQEeoc@_tC-igf?ySv)~n5RV6Y9TQ!MoFWW_LkS6YRw@pgJ%EstTl?uU0` zr|VAr*eO!SdO5zCpMG5yht)5o|Nq|q&z{yT7gp2XmEOYd%I&!QKaXZjfA7T4_fF9d z>ag2QSPu{=6PaO1GpW5UzpT%2Q9K_APl>0gJ z8!Atjb;E-HT;cfdgiTNT(L1JHP+kAz7@a?=F>ZfcL%;u=Yg|#RB9Hz+ClPe>YNz)X z>nrphEGy8%yV9z7YrntiXuPZF1mOdH;>S+W^5NBWm|yLtI`S45!;Ftzy5);!E(|lj zvc^|d+-ZGy?fX}wSsZr$zZGBqykPrX(x+rsdU+?F+VMZBgRjtksH|wxpFGxAyT8## z`MK^l`CwXyzZa%;-j&_f*Xv#Bt&WpTyrKVx>;Bszn`U9xs}5ZLA72jam-hV2dK87V zm`~Sd*dj6!>6ia)$M#P0QoGQ9xQx{Fccr&D|MO-+*2D43Q2QhfdY9B`9`ln28lSGq z{M0EQyzrP!yhnc6dFxF+bkOay!<;wVcgH|F~6zK{6mY*x44tI&5PZH^pnQb9}Zjg z`ja<)`|jbei&j1UkX>h?hwc9zoe%7b*FXO;d4|lRC{=}yIxoKi!nQi)WqpPIBUv#_ ze|TN#E%yJN1P)#B^_?|J4?q#pF@r!S7f=7*Q{Umx3SO@$y)Vy|( zdh)?EAAc|OX+KT2`g`$(S!YlF@a~7s37fv=+dCe6^cwK<`J?Hs>*@LX(^fWN7~i_$ z#%BGz&ZW;n|IsqDN&mL;lMj!fE350zd)}4a%I{riRr{NFr8O?D>t+2mpNnbsxc1n6 zOX%0SaPe6;xtG`7Jq+El#j2-Ve;(THi>*?dx#5kXud#8SQ6j@)P z|5%yPsDCc?2=` zj@*TQ_WAFa_6>H_ZcRk7sx3TVJC_)Cx<2gwzW2n$n*L7exsH?RK|fuIdnZU7O#Rq~ zc@~G|v%bE>ueNvMxAmlcjl-8ui~)@>mb|o1yX(2Yuqlra#_4TfX)~pXR4u<*V@| ze(IXAfBJgarT6^wxBct4Pw$-f^p^Bv`j2FB@I733yi>oTSoN*fxc&Y`ZPP2y;()i% zKV~%QpML)nJ-}F9yiU@*_AiCstDkwWYq%hbgS-@L`A+iDtuansf?@BY_PG9%+lRwG z&rbfsO*hV0#iQ%~a*mTOqpKgU?k8>w{>d}ShYWx2{7KiCES$}`pQNN;C zi+KLkzR=A(ty6!xzC!=J%&_@fX;r+n?}4>64k|A?gWgGAYU2kjo=ZOamHFwr7rKe{ zKC$5o{U^UX`|#FPVUIiS+HI95mS;YD|CK8bxU&BK8a_qIsm&B z{X1o#QU4en_d2nqJYb*LtPiw#G@jbzf$WFmnV)*6^MXt0hcA5K$=ANJ%h!JyF8Yt> zCT;bJ`Cj?>+?4&(aBBSjsb{Remh_Qs;i3O_M>4Rjp1zf*(7#(|*7SF!w>YjR*NfnO zGjuV{e&@nEed4!1bbj(c@{uksN$zQy#z7qUNY6JPCq>2W`#-y4CK^U9V64*rwZH76)}a&WM|zex2lda4G#^ zuX9g+|EWKm5%zi8yjvcc9N;tk$8;XBqc-y@inrR*x7RqjVW;cEF7zMHiqWXQ&To3O z%IqQ^bkY3(&+-4G@4Eb|p}R-Jr(gW@3tzi#KDycWPjo$AG5_`Aeg6|4>e{MX>L-q# zh5ie&V$}5SE4_t3k6`*@8zy<_`3mN-J~SVHFHGyalYY#Pt=Nk%Y`E3(H|_hz^TMpR zT(r$PFTDZ}$AA0Waa8BoZ7GjG>G!X4=m#7 zc5vFCWYxFm(VnI}PLX~=^%wdtEHi5QYntxowo$U3mW9{&DGbAP<= zr=GpHf6_bNvD*FTp95dx@u%VY4_U*YU;F+E{dA;&__*iau`*KA-)VkJ(DYE-9*;>L z|9eF~X!G_>N85!U>PP^6K&r-3~wJ-%N*rERLM( zZ>Vi?=FcUMt#@*(@$(nzI+EAlcY3^Gyjt%S+lusW&%1dR2mKQVy_2}v%&UkWwEI*0 zq8@&;m3L|V;qy=Qo!B{QAbe}^sq1$?d3kv4{6i~?gMRWa#P44niw#QFB91>!BD@c> zKH=5%vF<|uzOtgGfAki|^O?d&zVT@sn>gs*e_ux)NPfy^e&sWp`IpciuARF5IV=6_ z(Qw`Koqyk8;fCP4AC!Q7 z{@`1_|w*BM0OE2E?(6Ga~51zOBnOAT;G5yE14(zDinuuamTkqsH$oDp_A9(sS zPwhhgy!4p<#1+%)nbR+bPAS^F2_ODieoOmug2c)9>NkWp@l)4?{e9oM`@rnS=Vvc(FE_W4(y#bL*b$?^M#=Ud;( zFIm;r;!b?%=G8XrW^u5-LjUNg>CZX_C+2f};P~O4#L+YT+?08&Kjk&Q)?=rb*4cdY z$NXRqf7sxCfBMS6Q~w?|zI?6nI%M$M@u!=`Ve8Z1pV zuG|(l9HIC5H_dCGJu%<{$<){{SIO4`5(UR)aAD!&(8nkSse72%iq6`+VQBX zaVN;us|}(XHacO`ccK4qR*Xjdw=1LLjw3Cx(FtDoK<^~a>|Xa5`fFaTPWj+vUb8h$ zT@&_iy3w`A|M|B^_iwu9h41{qMlZl)$Dgq*4!iz5eJQ_Jj7f{w5Vzl7>w369nUCXm zq5p!Crcr;LpY>2@`-3a1tNX|DTy$5*Ewyd_ZW3pm+OP4b?WqSB*Prl(&HLZK2t4tghfW+(IF2W|hLPpyX^K12A(1AF+xKD!-t&VJ9`8uogyb>fSg zIQVS;AI;*hez^X&Q@`?yRo~JtpTA?L`z`A(^j}!AH0jR_-pMWY3q$q8haVfXKD2-Q zy)dn_J{olH#TW7o*1vPB4^Ihmj(gLS?I-8K&+BG;-!-J`-Aey|s+Fu?=NE6aHNLep zpFTT_whR47%ScUst*gEM%**T|KaXIqlhSp2Cvp7r1>y(EPvh7YSKg)dhc(y!&sA30 z@6BP_JAcyu)x%zb*Y`|w4e4?l92jSyU>5E%&h6}q`o~v&~&1& zuK3|24tgi?)W#259Ns>uBM!gWPUC5w`r&KBaKNd%zh~1gUKkFz=EUJioyIT29OYaBPUFhE` zC$#xooZil}pf~x($2!4W>UiEEA581;_d@T4*Xpqwuoqw0d84(z_`BO*3cGxH#Y;E3 za0p)e{C%O$rwAW97_x3fsVa0-yXW^$G_TMzIa!$QKIAY zpQ-sy5#6X$yU;)0s_CzF{EnM-nGWzM5(mB3r?%C1vp5>#T5tK-AoF7{CWiMPe#8f# ze*dg+?#)*}c;N#BUh8cC&$Bq#&+Y#=6Tf5?dOL0j(r?f64_a5~-z_U^`s@0*j-u(* zDzl4xQ1gg`-j!L^c?tRg$pf4C$oKC5@pkrsmd*A4Ulli%>Wo1{gD++n#>2)i9tP{X zea{#Sc2p+BPLky2s4j!4&Lxt_5tXEzkc60?BuS^_CJ)^q>B+4VlJa!AA@tz)UF-9C zebzbOHLE>y>i*Ur>%Biu@9*dP-fOSD_S$Q&U9eU2ovc6Xd&<}6%-y{e4u9$mKiT*4 zW$@VbC*5+G>VIh0DDOZ2n{@w8op+MY8b7*Wr}M*(`VSPfHT|71@;@3io&MGOcVs-~ zY2)eh>$M-lN&5RYTJXO=c;;Pa^l$jW!RPGowcF6g^dHR1h3}L6+!ee320ZlFW4J?ymm{Fd>dE&Uh{~9 z^hMn4zKML6-w{7~=GT1VaTD`a*5Ciy^*+7i)*J5cpT6#~yIdA_rQY6u$CMwWuU!3# zVpV#lHo7q%Ue~D|^&c*3Yx?WFxsK8FXvs+SJUFq5gEo%F%~oFWKwdxeX@1T3@PL!_ z_s{yy)E)O3ex!faR}NX{(YFr4ThqUrm5bk4&%X_*t>cPSt<(AO|0B!;Oy?K%Us|x# z^moF|?})9Qd4uF}{p~|i=hbg@YU8)(ZzL-h@=~;XCwXQkGGt@k@b-bp0_`-i5_mg`@ zA6zZmKd{eB_dd~4Z|DEJS-Di7@8jA3^=~+UJW#T#HGUA?te=MYMg7MLmYV)LKklPN z|E4>9uBfh_AIo#4yZZiD`RIo{(0Db^{M0F*2UpS$U;kQr{P>|KUwmr+Yqp>A(N%Wd zkbdm?^KMoyra$-pWL!nbs@7ip#y4ML*y;T6_xCL>6PxtMPyhBh%9Y^AF3ia;g3QNUnZGvEp{? zcqE8z=ikykyOKv=QU5XnovCNO>2Z`jk1gRL4?Ylo%7>pk(4JTA*ZgV!ZsNGczk>d7 z(nhJQG_^2UXseZ!q6e(Sj$UiO=|U3GPwHT3$!Tx~_M zsl#1T}&rHd>`y59dBUd1d~(tg?!CW44n= z=;qai52T-{|3Fbu(_iPuaTNK~8!A6>&?kQS1jz$!zpvw(Uv2mdC+Y8Bf1jIwb~~wf>F;yCIiG&){u^Uixzw(|ZK)sB@x@!MjgR{eXkI!$?5O`>QQf3J=Rr&l#s}da zE?DCAqVE$VAEYnP{Q2_pEe=2EBp)7+4W@aCKMddXqs^}W%F=MfK^K4VtSgVE-j4r= zvvOhHe%pWi{!6`7MICi~yZRAIPtiNc!;fv_ z48Hlyv`&$_CJgf~Ip8N}e{@W$;=HxazJD2(i0QwA|NlkbU^`bHr-;pb zbiPsl;UdHIcZD_Xd7LXY^YKn>L**k55>IXXU@e~Tkyk_J;j(fe4qp?7Lq7DSQ@?or z>fx{_7VYwIcRF=;{HeOIfADLTzyICL_O~q=k2QLyHs71TZ zo!?3OXqqQ|HDOw3ysjLse%?s^9;8of#gsR-!>0!R9^m%?{wU1i+}&jRlM$O`ybG8z|B_d_t#Q2ZoAd}w)Lm;Vm#&-^&c&2Q$4BUJ`&{H z_D_GVsIGLjJXgBw`;oqhe#_6rZ~RVe&G*>gB>mx_9d_CN{crCJZ~Vkp4nBJJkvxy~ z{@cyUh4T=3dHdT>U1<6w^QPWV`?LGs(q~I~HO{=F{$oYN_%-eoIgTQq=g(02iG$Re zO&?A9t*l&l9n|WyA9&OGuBg9%=0k&@-g4i(f0IM5z44CrS=2RN|KDH!{=(UK+JDxt z7@yuZ-s&3W*WV{6ENlAfIPjSs@PJMlAus*BW_9o>;_rplKlO~4+UoDc7cRc;(2Gv| z?IU674`#k_>z>2#bNzp9|G$ggf3KtS1d~1_8nR-;NroYaQ^BDARJh_bQ%j$pO{$Izn@m5l2o1gYYfB3_m zfBxm#3*S8)=KpcmDGUBqZU5b@TsVISS|1&agGpZ!t)2(F{~md1T`m<-|5ov+QU5uT zXMN;i+w-9O#6j;Q&usc=$}hW!eJIxIln-9?G@Cf%V1w!pyI=By-|f7^p}~LZRR?|H zjPFz5c>bZ2m5Yt1JmRgd$>Tn#*vt!Leo_CtU}@5y;{fuhH*`gHtnZePwf7SwAGAKS zfBceNfBg_o>%5Z+^J6Pg*M#BMr`~n$Zg<`m9$stdey6QlUH^Qz@(v}qk)HqQs$Ws8 zIPaWCg5C+*@xRX3`c3Spf2XLZ=?~9N!pD3V&rtJ-gFf+NTOR{ixmbQnY~tppU#D?! zlK#;8+!GhR{g`)$t@oYtmouJP!ggf${~6P9z~Wb8KYjT;d;jc6ylL&7M7VAon{k*Q zcGSOHRMhlO=4X1eitLyVy3#Z|{k^Ky!KaA77h1p6GhS+|zZYM)V9%NFf4u*ZaLJNK zmp=2Q0rf&R545t6Cf11{3pad+mxJ-l+dT;i>8GB#-BveB;SAp5(RF zPd-RrApTxRz9HjHR{zbX{$li!M~)7&|97i3#}0UvS3bM{IsA(GTb6(STk9uz9*pm` zS-DuA@}+u4{RfN0CjHyH713({T+?62Pj&RJs2W#?UQPLVnVxwk2w$y#jZfAe)_MCO zA3pzM>x6ZOpNKz0TEcjZ-@ojt|Nk-|eW8oeDSjWfTf|4coFq;d6!1qZ(Bsk66Q9QGN$`fr_$7J1FH?|+lJobwOetXw$G zvwm9QXN{lFzsv(9jve(c6=>4GoeMMh-id9f^CAusH(T@4@oRCNkNMN_{z*H`JmQ#- z4J)Ux&F6O5`nP1d@q)2Skzt{bLTv1)^ z=TqIxW_(5bpmzmJ?4zUT6sf}o8~pG!VVHaOX(zwpu%CtP*SPe=-DdpVD<8)@*kI1@ ze+RO1F`Zg!Yae38{+&Fs|6fb{%q32LQU7Iy$9SE@)8~!OWd|Y+P4Z@A+)F;EJ`+AKB$L>h1jVXjU$~@A-M<|Bv5k@1MEGonX55>gPTH*j7(}9jT1^ zj}?y^_1F7&)O5h1csR#q;&zVq_-Z;zg7<0-#nRcnho@zw5sqJFPPKmC0tmjyNbbsX+vM!xNz zyJeZZ-iU+VNj$ajgBI^dKK$m_KFBv*S$|9T!`63i8{Tu*qOk4fu3an4c?do`{uz@z z(__2j|6g=m$*Qk;9&G>Pl&_WTd!B%9PaM)pcZQ{Q=E4<~e`^DSP9}15h|BPnk!gcq4cm4f)OZ`qTU3>K# zUwZ$sTq>9+{Y3rC%9{Q0U{{J2ID~c6$Xz@tn+84Tcr+&>tFY2QHWu!@e>P-*jHy&&qhu0nXV49D= z7c#!p>o_%A{k{0Y4nxQO`z!alAZ<g8QBhH^Fc3zv!fxpH-g!%y~WxnYUu53w`lD zEc}LDNuVreg8G2QycJvF$(4%7y(*er|dE)3w(@N8`!(;*C10-}e7W zUOK<1|3FdOq(8rVF+H4&geQAdqY8N=Iy)UXMR!t z!NOzwuCS`-(YwN`&hHoxYj@_~HErRh z^n?DKhvWJk>P??qI%Yp&nDk}$UrqY6d9;Kl>OWjOGJaQB(RRf;9{(isV!JTZIC<^c zq>jEp8#m<}UVeV9&PkpJmB;KA^oO}`yzVa_*<|Oi)2C*Se){^W=*!-J;Z@wd{P|Z?*R7H$MFz zSnPCu?9Zb9BLz!MfB1%qXr(W8wS3KMA2%_dd0QW2=BG~i;3dy&;yv>FXT55(D}Q*y{QgZJJ!#vG&a3|a`tYn= z>~~+oYA@9D4^C}Gv8uIKKYiK%ztB%Exx|@o)PJ;y)b!Un?jve?bd;y7sCmRe?<8(E zeKh66o5ZbO<%4&!{;>Jz_vZfU{LhBjr=NQ7y)Rw}pZ)%QO!~I{5Ph}OuP9c1?Vb4S z`a`wR+xk}CsQ*||QPW@RdS3spdFTxK#BbyFW#vL1Xs;{n%lw)TpW!6^{WI_UljM-pTs=H~nM#!yjJj=Kfh19`O6M<_^PS`VVL2V($Zc>-D!z{fc5$Yp;Ie)Avp6 zbbi=T|FW`4f3^eD3Ee@)?PleYp0Bey)s=qmOI3Azg&!o}&`CwjRzG!37+!zTPBZ7f z{K~NC>_6=NrOW2Q!|{*lFred1)9t^bwxU>Zzun@I#`^@h53qOY*F46ncC^e}4a;Q}SZOM*Y_)qSf|^K50JpTWLQe`Q8cEbX48UzjR_Aevs!EKEuiCKj+c! zKlRfu-4k}$_N<4G+}FNWx8I)*Nk8`e;|iXCma3?ulSl1Yg4Tboc1w`?#qGaScv3y7 z<2*L`yxt7mL_W()^}$vo547jC>}K5aO`VS8!IktgPJiE3SKn~fM_$%HW#LD5oPK=A z=E?E@fXv7A;P{8}6eX)#!vmrlcGLW#{@sGb#&d-gZN|$>v-Bvd_l`o zUi`fOs*8BEttq1SA$ZK8m{^zK2N%gbsFL^%bi|a

    zVj8zTl)skWQr@m2cG3^L#^*F&G38C| zaLqxVzU9_$D{Hl#asHFz4Z~^w`2HI*dbOsn+Wilik0O0V{RfL`)87?V)fdkj`L^BX zWt#2p$m6<4@b zSDUwvFWKYtf-n93wfcD-!5j4-Dma?-=YO2Zul1q4#6h3e&C#58nr1n?BvF zT&QC{hEC$>X8xA=m$UyFDI&)23M<;KSjX*I?_+O$l#J1+uBeXn%Fp-C>HO2WG~Q%;SFlI^bUatO z?RDzaZsNfoX20kcQ)jYa`YuB85q6J|gDgKgG*eIB;jfAMaopKvGqcK@GT#{s97*Z;iSJM}AyRjs}H zjZbyMPUnXm^)F{q(_i`0^U2g1st(K#q#v`qonXH%A{k|DR{&Vz2Yp==Kk_`da@^fKU}@aEU$HT{6ChJ zOYQq#&GU+7(sEYx%k>ZBr}JapQU6v!WBSv-VtWS}gnXVq5PypFVdE#fwfwS+*oPwV z8jq8Hn*7u?VL0W-FPwJq&-CiRI3DvnLU}t0ZlvemG>?4As;`No8#eO-=`ZS^7c8c~ zE3B$7-iOHdu879CE7s$%TbdcKJMuy6L+62Cs;bXv_`$T!JE<@~wjy;+7&h7e6TeyX zAFGGWH+u0NSDkc|*E;+DE73pyUU>oZ5XX05rVHD(+3+dW`fN!i{YCxD5u5aH?=dl- z@p)dnjsqW<`ms~AeCFMkm5ce+wmNLkNq#MkpS+gjh0R{n9=rCyqr&XpZF|Y8f0%(j zrayIx`Rh+E{(yFgPyAW3q;b3chdLdf@z62q-z}&b_1EjDYkDv*z|@=ELjFQ1gg`KJnZ0uKkh+8ecan7w;4`-|T^84<+Kf6a(9^i=u2_~e02 zar3*9&i1b>>t~$)wZ1<8vX3s^w*NI#F8tbayL=ZOJO9wtalrrc|KD7)tQfJTzv?92 z#CR@`=-1GRpZSr84O)JK{^X^Y)_EPz{Md?VeQJkk7hU<~J3lc$;(r|z{9^cQ|1*%4 zi|Mh>^3VTMC)w2+e>=VZVJFA5J>#&u4FYQ<2b-^cQg#E9$=2Hh>SMC4bnw3lK_@DVYjqA8#)z{vMkNdzd z-=_IR{mY6*{nrsb(<2w#_Gj7;anL)-GaJ2{@*CpTzw*I5S${Zk%lzQ4UUgG=>$LSh z_@+-(`~O3wU;EtKRlC>kUy{D*tF5|W*RUo0QUCH$O@AkG-p5Txt+V})j!zu)T5q=I z(FcAIzoFLG?0?eVzs~eiKD$r2w11s9JiXIa>;D;kJO1os<-+!wKN#=+Fc5!#o%pHq z+K*IK?+3N*U-PU^^@#fCMQxM*wf})gbo8#Mj`brYWbIE2qATc~_ziQl2L#DW(efGB zXoenENud9l6TbBVL|jz@yncK$)* z>Gp~p_3spwHT{{#(jq$U_@~T{`Q6fO$En;WrG2jWYdXTGNE{yQ6w~d+I~^B3Y_Nwv zY`*jFANce;HxFCB_<(g+|KlM1_W5^A`N7Ga|Ka_YzB`Dqu%nB&ur$A_~C29FniiJUb5NX7Gd_*yKQpbkvZeo z_2;S!{pH;MVIV6P)2U&6`fIl}-zkz0w!|OxA1EFfuM@vlY->LA!Ji^|?eAsfLOw`e zp!tWEpKoy|alfLd{nql;AGSI3l3C9^wO`o&oD07D=xsZbXUG2oS-F@VYw7)eI_g&x zE9$1a-f3KIu0u=x?8lg2)PJz3wDDYFMcWnYxc#U9f0~!>mH+%jpNfnR(idoc>YR=X zPSPL3@u$q2{_sfH2ISS+y4ydxVa+Z45-~wbQ9xZRj-HI@85IHcY@42>OWj~8ud^2|E|ccULW)iALw0? zHST@em1gyN)p=Ncp2W#-LUc*u@HJuh?DTg$|Dknv44?V;!P~dIewx>MyZ#fM!S(*} z|BqzJf*$t$m-6#|LY&t@)PHF~QPZFK4HnUHI<~~N?bWG0D9GzLoxhE%yu`heI_wnF zI`Yl0^}YP?hWzt$7ytdqZU|T3b^Ojh8G*;He;&!o#qR&c^*>$p=Zff~DBfzj#7@`7m!5yneBx}!QUB3`qozOk%ZlhY9rMzxj(^Zc^V*k49Q{BW zSM$x^U4FjB;Rl`M8y_~9<|Y0x@034m=zjBT*zKvUzjDnXU2K?_bmpFY}A~j}^5|`g0#+@~Jn}@ri?8$1@vUn({kE7qJh}m8Q+lV}tNv zgFXBqO#Q)mTi&=Z?6I_O>qlnR-+xE4a^ZZ6^`EO>QLJk1ojm6GXfK#(r*ZV_?|Xll z*r>nG$NFd~FR1Z0_&te}57HNC{^8~4TU>d)lRU#o`uo?obM;kkdeLS5Q~z-KTNXan zrXSn>$8;R9*ZBwSORD1bqjv53tBzza-Z=fsnKbFoeJD(iTzNpJ_K;xvU4rC;Hf}lE zxc!;dlb2#z=QZE_*r4)TUg8ha@Bhq}8}D~x*ua13f@8Pan)g-v{7b%K{#HHznfNnZ z=)-KyYbz3G9A4kdGj9K7rs<#b&*y0JdEP+$DS90rKQ{evy`RN9S-B85f6`CNW40^H z?LHOMrx-TQH(!6tM+PK&-1hor@icz_LLZ9MgY@m4c+d?S-N~b`sDG=dF#TO&jq9t| z_wTMuug%*he#QkEAGGa5=VyNAt$gspV>a;~`C-XsH(qqh+d>$8`-wMhykry}`~ES{ z%7yLAPu1U_!mlV<)fyhKcK;Ll0$WlW_0Nk(HT}sOEu!Q07kOz`$3IT(QNi|C1kn$q zFVOr$%g?tseSl8#;qlmDnwR*)cDG!VKY#HWVY^N5d*tp{SNH$#X63^E*09(2@5(RL zF=bT*CYA1UE~^1^4i}LKlz~PrTUnEX!-dTM^BLPQhNpc zVUz5oulv=L4~0$7+2h=kPOpA{^j7JE&!>h{${%3dV*8Jd<|X5ccbxwEzg;$O`qO&m z7xnKJwN3gLv0|}!UYfP-A3i>}V}sU*_Km+6rgh%w_{se7_u>mvmh8Io!>9i;O!?A! z``jF=?|+-V|9Y2R|C#H!pc6Y?({JPbXXsmbqW%K~hw1AIE864@iOsxt{TMol^LlNc zFfrfi;8QezzWjWPCw1OQzV(LJQ(2@UpPv6)!$vo3DQm9y|Zg&C12L z|Gx73$5YHE?lezPtf-sX?M=i3zrB9wFPC`Kf3SGeq(A2|OplJ#SwG509Q01|QX7Bz zI!XJqI{H;Ut8cQK^3@;a{Pe0@|NYSC!;Y)Iu=8Dq91WlS{Q$L?e}_(+yAd=KeoO0y8lBfo3I$4PQd!iC5`~*eW%%olpX0U8 zjz5{VV*VQa{d*@X7kge>YHOZY$ji`~F?~CO=CS{Pc%z z=5I*>ar%k+j}(zc{q;V^Xk-Roi=Nb00kTYx7f{sQ_2??L#m!3IC$gvAU0`07Wl?u666yV?0anX{Pw?f56p%EkIwQ|~_szoKMS zYk0iI(arq1J4;+6o+mU_$JDQaX*WLRa%j5r(?R~c+an{x!<0_(?#dE1( zJm%fs_kl7{)8A?SfFSwQ8zy<}gC^!%UD99b$4=4ur=IarTm5N$YKJ#{e3NZ1|MpE` z(QkK|a>0`N|BrN~e>?vFZ~gzf#P6NvDT)=hW9^IU2C+>q`p%^y>R)P5sy@rgh#W<6gV*2G8 zXWok99k=~+|68Z|oup3vQU6vEY0^LaJXPyMdD8j%#GmR|%g?iNu{^D>+5edGK%UdI)~L4`ROq=H=nf(R(2h^e3PD z`SH3j)aM^P+tY*>8-xeM-wVk%)V^!B`g`$(M}9N>+DE7L_4oCiaND96PGBC5$Ny}% zrAl!-1$DeES?H_h`8VZ_`j?NI^lxh(^MXG`;-J_5%4yfKC)zihyKwGy9uNI-GZt~|6bQ&*tjDFOZ>cW zz991j)A7yUU4FjBljqGl$u~Z1kbG=V{b9>R55NCa&s`X{`qAGWyXC=k;j#VCU{)@i zpY@xS*PjpYIsqjs>gISPh+RAXpHGZ4zo`E}5oy%FD|O`4Zw)ms{olT(I{d=^58i*B zWY~Dv4G3=&hOq4!zn?!iErdU;ar@El8idz=|K8Q{z<>7pFZyJ=NHFR@Sd7@He;KXx zp*1NUtac3pLYlb-+N z1H{pv?FXbE-jBgdm!{W4duK_o{#%JXBzDxleAJ|Wd!FQ3AMk)~V%+i)zxC1Ake`d6 z`52Gpd#7=5lK%enUiRkOKl}8?{nN*GocF%Ns`EdGDF0xB`|ACF7*A2Ms%2YVrS=N?``1`?)9|J?|3ALUAz7h9{N^0>c6z8tm#kQa1kA+ zKmHVl%e1V0l3*_#iF>#F{50P9JZSxj#>39;MSnc|lIz1x7ykOtoo>33?Z}QlbLq$Q zp7LmM0nlTfaTFyh>ZUrFt#Ld4RGU0LM=-yr|430=(_iaq_n&MP+2gi<9Unf>yCSRk zc_R<>uE>tOu2?Up&XsP9+k95iA1>Hx@y;6_nT0QW_JND;-6DsF{XfqKnE$E#0a(i? z<48Bv8Xoe{E%g&;-ckQ$1xus;Q;X<0JzQy4I;d^`zxL?FIOA9!$@9hQoV*mRE*)2S z{7U-6j&FPJ`s2>uA?)(z!%n(rgL|mA>pweLxv-t)<=+q9xeObWtZF+n+sPw4|D(3e zL;0C!)PJ<7Xw-kbB05h0yfmxNfvN+1pm&mIw&u|Xevo|PjkfaE@_P6gCmgiViLV;E zX-PQfS2wNy!X;-ij(z@WW#wYq|5@ezXL4-AWIj?=y?$%IKXS^O=oR%JDJZ2KS6@xYGSDfX#f zvBu{#4kr1$zx4MFmWif+lF$1H^PtX9^NE8#@iR}5JkYi)ou~Qfo4&wF`orG&S_{9p zNf!3`+V&66|KlS1)Wr_daKjCM*THEjbp=S2p@G|4}aKw``M?x>83eh_s@RgjC&qm44<8U9?r^z z<7q$thVlnsNBB&SGCSsj%1a#di9hY1JdnN`>y(eU@vWeral(I|a?C5=xY^IbdDG|r@b2So zgU^mX2eNW8J?855Cw=Ny6suaN{j@t1^PA=u_0Nk;8_yM1v>7iiP5XU?ZTIT;PU85v z?;rC8jZb;;_rkQ!#-l&RSM0?X)<5Hzc>Vv>;EulQs@1PO5`O#rdp9c=jyv*u_5MGJ z-?l&PL#is>+B3zI_QQD8NBuiRW|RK>jsqTaHf+$-#zCh9;RET1I`dQKWIi5TNk4pH z!~QqUKG^*#Z1~s~t6ut3`&?!F|E`V$Zm8csGOnUzRco()`bRhHbbja;_3suejr!|# zjP&naksb40v96Ff=v@)5-j}Tp)r~yR`smz^ZVOkw z^5>VV`@ql0x8u*2)+y$bz5YMF{uAD=@H6kI|3Kks)L;Kcn|#}kDUWxtI4;CN?+S0_ z_pY$2=Zojx@;e&u6Ewft-f0}1q(97l$3+J|y3=sj^0Vvybj=)#zU)x{_BppY?I%tNk8`e*H~6AoENda7*|oU>T9p@+V@|oH{-z@ z^&cu&n)F9!=7VjB4$9AYZ}LGKSM%}r!nBV0nP2CNzZYNFc*j?s|J5tb4jb=t^%}EJ zxt2QH{|{#6!uLr{*Z=Uk0i|l(_TTjVi|Rz5QUBqhwn=}k<0YT*3|&!O?Pn~{m2TTF znO+V0vptY!Jeu#7&v26d{%OPWMi#AkLH{~OKeOk3&z%5Y&+WgJm5a?=#}{w4rhjz9 zPUnXm^RZ+l(kq0e0Hjv(`n`i~TzM*Vl6m~VXz zh~K-3ad=ZdHvRGXwtVf2xcTW{$EoqGq(96!;yZnhePh3{@zd{_^4uo3F`iw2IF^-5 zZU5g=zoJ;x8XmBA|Lu;}_bI9!^V3hx!ixFKV?b=n$KOi)PHg{ABIAVom^*i! z|CM9I{0~3>t;epqf&IB1e-7(-V5__V=;gM(a|FI&oNq@$r ze{91(9VZuT?<7b*Xya=>{$7~Yc_;kl$5!mc7q;8`_C03!zlR+*4DI($yU%N#{r^HI zD;KuYrvD$b@mrG5+Bg;H7elAKn%DmN#C)sk(>z7< z4=q36;z^x%6M4*DL4P>o*>|4(`Wr*?d zf9brJ|M8Bqq^jB<_I&^C=s2KLTjx{b)AI(_Cv}?7^BDCnnN0uE8rRokfB(oh@TEAA zl?(c?9nr7(n|LPa52yU~h|e5x%ClkcleaDV@H*A|Z&&&;9kf3`{U7iDy^}iD^l!)i z8c*lv)gSe56}2|5`n_U%w-}6z>Cq9J^RtXc9Q01|%tkLo{2=?qln)y|L-=%@74?Tb z4*AHRzVyJ1(Es$>tNzcq7r<}V|BPnk;z8!?)UPO3wf5=<(GC7|e%Mj}Jc~=C{>kTI_UXkm1Oege6ki7Q8 z6UVhWc(s47Z{>Sd@=~Oae;T#lu;AmHKk=T22g5$Uz4P~nUv)OTcK?TNRxWH``K@~W zr}kabNqMDe+0@q^^^`Z7Ot z%4d0#^oQf#|IRz6|M1B0?mzu7m}KUs0@TZT)D!?QJ!m{#(npqyB?Mq^3W0c@eGl?>v9J zK5g8*Ov||M4f=tWr@Z)kVOr;%jAMRm#a?`2)`l0&>wDc9@&8AjUH_t|Z-mF*f5(&` z?5N#JuutrCZG5Tzxz=kP`;n;sP!Xx=?=-(F$h@dCgirI@3n%7V9eIlQCA)fl@q^6A z&`CwjRzG!37-oE9!PM=(c}$pj$Zp@i{AFFQbxr$!zn1>~x25BP$$TUlb%c-iU-Hz? z_Dg?J|KThyHT{!1@=f==sETz-9>2RJAGAEp$KMOnI`4{6o1d>SCJg<*xMZ8B-gR%-^XNO?zVF&|*q&_vKcM{JhLha?tdsPatKAW# zzo`GR!c)`VNnU#`LGsyu+54~N5eI$Z$2Q$sS-Dt#F1GPIwfhvY!AbhVmcLx^kzZV} zL)d1o!8dO8vcJ=h?f(aKJh0*aMOWdMs_CzJZT%c!I#Hh> zanQShwc4(|E37zfrXs*P^&<4-Z_KUUP%^mm0f9{*1fWInci4v617LGs#nPxM=z z_Qm+-Un=?7DU$EqMBd5z!?ruUciG?1`dZj_t)2IrbNsXL+Vu}ZIu5w09{=a+R}`z- zE>gSp|ASg*eYd2dzwgxYK~4WYiPt`VX&u|Icam4rE%Dp9%G;FBevP=*DW3!bQNe|-0^th?19{Py`b&&tI+!JPa5s7<~^t2KTQ-LO6NU8!MyQU7u# zHT}t7HZkAh2hlOP)=tohi#_fN$P#&k4D{4OTi~5%tP5QSxQb)dR_bufstE>II2@GDAI zwf5>azK+DO?f9R*bBRa&Tg9WA{+h@82>t(`>7Vp#efEVPedELZ z>m4!Y`b++MH}tXh-!UBr?5Lffc&qI_wb2cJdmj5H@`pM2*?*z*kse&L6|T7^2>|5LA+bN_)n zD;Ilyl01n<9i2SlK47M|`kCLj?SG)CsOb;?U=gkK<@ICe%5Zi68QqTH2dxj~P5sy@ zrgfh2=norg@Wb1`#>|^`dduK`{ZsdR#kQw>efbKTrJ9ZGFx7=w|(PBsZO3)PJam)bw|f z-_|i?a8@ImqW$3Hy(lWV7sQrGzV zQ~CORfsNOfEqAfFlQ6oOU+cWa=_l$xTvXKbCm$RuvSOYq*5gp)@U;^gf7%EBUYOQ- zSBxIVr!QBU)>kjSu-$XF&YQXSys-UepL+b3Zy(HdWamGJln>lke}B|bKiDTWYwzR{ zebvstX+Ci<>c6z8Y|_8>|EIZ*13IzOdHclAcwjA`c_|VnA3j6oqvNcoKO8*mtXtPP zc}7^Y`;&jU=4-R4v;EIlRxY*c&vW%FidC(xPwk8UUt=Dijilg_c@ zdOm+~90Xc_t*l(Ujw4omO&r~<|6FpHv;SXKL>l#&=dYX5&!RxYN)fZFSq_rKcJ^lOQ~X^kFs{>>>* zgKkm(GGgPB4-QE^^QmqBNnZQ7iTPHSjBozd^7Ad8)Op6kj}7+lhYeQy*iL_b_xWMN zjTe9Lv=5EK$NQ}v_l#xb!gkW6KYB}cd>uG>)NTn9H=bPl%rENi3Qwc{gGF@Qek3o= z>i%y|?Looz(Sq=U)`#k2e(e)G#k9^l;Ws}vsQexqRDamu*}dL;>ATMe?)mTjW}UMh zBhTJ{$ydy||NlT%E;XIBe~FIMU+b;UTrzkbm}k_#WT^3Kzilsd(pb`~7b>D;K{(arOD0_B(m+#GkJ5 z8=vZy`iX;4|GeNb{SzJVn@*h~yUNGk3mFe2-@AgXdVS#s$v1Rm`s6?V?y=7sc6jBB z7YIw7M@t^r=P%}`m|s!efBtB60Ifdfk)U^i?M>D0Q`B+Te>1oDowRsLnJ^9pw z_~I`4Eo%44?r)B4m7>-n>9_{=fC7M5}QizAhHIKi3a&o376dIi&Z z@3c;l>oBc<;-%Wv_7?T;W^pn7U15#uE4}|}UWTjvzv^JWzj#FBPLTOqAF2Ms6Z6Oi ztxowo*r0!}{IJEB-gwrjpFbjO;Xd^Km;c}w@HL))J6ivLt(BDv+Y5E#t=9O_BkhNI zfl>c~qPnKP&JSjI+3pRUWWYy%*r4^Hy!d-zT4#MIkJ(8dO}t^9g&+U;-+ysbSm(OG zUAD{5@1Wk^e_L6(FpqphZI@u8k3`39w|d^j`p>mqk^Z9ogGHoCf9_*XzVR`CMdF}$ z1#9&>OY6u3nJ;go0k2bLW zTGB_;+W2;s3~Z~XZ{>;l4;7hB`lrWB#s`nnaXNzh?;HAoX&wGv=;2St!=GR;zOdWv zLr2{E@@qr>gSV~uvP*ONvh6?5%7xc=F1P>epo1afYF|<{Zu{R*d6GQdKj<^+KU`$i z^w&DR$0VPA4KhJvYwX21z{&x3)PrkDY zpB;Y=>Np_pzq$HBCw97Ke&~knHJ|y>U(|nT!BNv+pV##FEcQBT6;-iL^WX!$E21m0 z|H;er8Xx@LX&lsXSJY2m;fH;F+kE8w1!39J``5U6!Ijk8`|p^J19sGQ6H%;c?VUWP zUv$If^<=j4M*T;M%0~Tn6khr_o?QI!5eL1KxY>O@@|maR)#{YbgYaR4J^bNK|Fiw= zBd4ty-t_1#|Mi6@FQ$Lfe>5u>4|4yLuKE?lin`@I5;Q*54cqox^rv-E|7AsGlm46! zF+H@7alMPhaUrk$EwRZ5tq&dF{6ovnw>bTRPV$Wp8%*;Of7s&WudMaW&tDm~`rzk( zd)@zD>NRfXf4W(@u%Ga2=Q@vi&b!4UjXOcR|6$sX)-%7T|7cOsq<>ra$+zdR zTU5n5SFEf3jODq~-F*E+8qXykU5zL09~)Gjw2t}|d)|MtqkSmmPn4%0rrGwE^eNfZ znm*9YJBeAn_Dx?=|FI&YroYy;=Sw~L#*>Sm@rZ-oN!)B5w=ut^e0f8i(>MqpHrU{& zkC4A>ue;8B;+ha{o_XfW_Osvh+Wv<=K>q&}UPbAqS~D-NeW089;mf6le){`9SUOC9 z#!()xNPfEghEbY4;`mxzRox7{`_W_ zoqN-zVg7y}-E-rsZU5QsX62H;zU}&3t*`Cx=>v4agKl1J!z3^2UuHDv@0VufVmh|O zrVn%^ul?bP`BulcistVuKi}d>op+LN<6?v4V}t4s?H#^(`l{!QhW1PDyz?a|wYks??D4)6HyoF)EbMN}}z5liZUiMAfbuf$sDEBi z)%5Qxyz%-UO(fs8`TBcE31Fx7%rENSDOhUyJK^Sig#47p zYaVeh^B$3owXWmi&Zc*8&zs`qz(~o&7w!!ZfY|HsPkmMWRV3}^8 z6YzuHNxX*erSTsAu>WJn_y6FmW5WS|+V_@kzp2vyC>>`Y!9KnJTg|U|I=)y@N45F? z3+$S1niusinVajBm;N)}b=HzU z-m*#9^u2GN`PJ8q!DHV)4rb-Ty!~|D|8&%^C|15@<2t@r)tWx-`~!XF6YF^0ME!@0NR$5DCy#kiXQ=$dLHcMy@<5xPE4r!P zKdj!B?w;F!Q$FK_yVm-~?SI*2arpVv-*0o_tU<55_WQ>{9S3Ze=YQ;fGT@i2YK%4o%zDLdo8_&4$KR*0q_5VA(|Bhwl!o2;i<@b;4+q^p(XBD;>A$D$e{%IJidE@twv$KgEd{;W_WaRzN8(ZcWyPaZPxNYQp6St& zI{GG$IOv_^ncb&#iugfzz?xtAJP02)*x-jR%<0~6`Il!b3p;=2t`Gd6TYdgLA}g2L z@x|0q|IV+l~+IgqZ@V;M*T+%nwtLbEiIzs_QxHueOHnBYF_&s@iSjA9pC)Q zhn-?tSF>Hx2mRm&)gPwbw#$32`^&0ft-0qM`@0YS&THI0{|;y6V#l9OKYtCVKUZX& z6rIHR-MUvh-CnVy{$oW&O@DY>MRc70d1>1BZr-V_d3^5crDGazvU}>y_=-(D_`{61 z&$wgc%?E@{u6XB32i!BqYn`3{AI!?d^j}Z!Kh{w{*r&D=WKF+4|Nm6=>hC+fEN#?( zsW8(&{590P_PG=Do9fKpS$@97`y_7TxdwX${b7p_?fP$vNB%8rapwWc4xjZwuXXLW z3opklp!K1B*7oCCFV%57R=aloQ~l{YqWhFq3qyBjj?Y;j==j&ZjHLea_6>It056>%c>(`ZZwtY?3A2!eT zKk}FdFAuYy{`Rux7d;D4?ep)bj@L=Xbz-M$c)Z5jc~&mwZj za>OS6(|n%4+VQ6o{`C7Y#$lcy{$5DFp^j6t)!&ORZ1nE!cHI3H*M^N(J7?GSp#84J z&OdZz9yY(G`@iXUl3lIo%ie!A&$chNk6db^{@vnHlm6Vtn0XmbUQ|`D6XKwEMRX!2)54?Bp;+N(EOd{=UbdUKqvX|cx*7uOZ?&39X@*Mb74j}_U6m3>)W{c|Aiy6 zaxonS)b1<)|NWHIFVU!X&Lhotf_A>1ep(vu2r|E@|4`vEekXAcE)$!4#xv~G{87R7 z7boUh9eIl8A6S0A#gjVkB!ANS&wb-{fBDEJJBOV%*zUd~PW(PRcK_QkT3M4-csQd2FzUKWufu69*qX z`-rgh#BuUkaN9mnLQ zY1=`qh_x^V-#gCC*!U`2R0Hhk@|;WSq3# zY|Z019DaNLI+6)*-2Rsp9@AgzsjJ=pq$TzCdeVIOK%e;G1IYuM`Xe7cL)Fb}9j{k@ zyzjGr-;yncL+6vnE_?9{>JUUE1Aqc>OWR^YWkzs(8PTBYp8kccTLP^oHXD3Zu$9X z-0C&IX7}KMFYNTtkG}f&&#n(U@AS}yzqzM6|9O=3VLA+`Jxz~4J8CP66?JRzNaKBi zwg2CA6XU!d`uol(12!&s;BXmk-u{!k_L7PD%-i(R{>+~*Ki}e-Z}p6a9~qtE6UwWGK=RCIQ zkt;9g#3l~<#E(sXY&Vv#{a9SzM4sNHTUk5lX@YMkdI>R)Cw>F<|G9rJbYF_&@6Z5T( zJg_Bsj7wgMo_zYoZo;NK^@n}Wdj7uq*If`gCx7q!_igY9^RVmB^Q>IZ`Tzd+*JqSU zpo4chzG6H5{b`w9?f1xU(hWcJ1o4B+xAuPDLr=yx?2#XqYGR>8MB42C)y5AGeMSAt5gYX%5#s(Y7o4m2AI{YeGLE7|<9@-(BR)4_^E}ynI+7Rl z?-q|5^&css)$zI2C*%8(GA(Po|EH6<2U}`uAF1xxmYv*!4dH+84NXx&JxWw!4nTlX1iwbyUBd|4jR$PBH2~P}J7+r*5!_j?=#* zw&lY&DE{_41kn$qFVOsM`S})yAJlw!JT{o-sUN;D>-uB&e)k`K5jK0_{F~QU|6zF8 zZ`O2PQ?LJTW#z*2MI9*7aohiEHJ^SuYAe!D)PJyun4Ye%Dju<-JuHu`zX7$CA3lBu zi4D>xh`$$-Z|EkD>#-Gk@dbC>;BQVnYH`@`fFIrT+`8MsYu~>xFL0{ff3KUBi|uz? zYU}*PiaM#?b`#_5_laXi{fCOoCjHxM2oLkJdL73*iIdmde1P^08C2@!UjS z_-mN-WpVX~g>Ty{{{DA39QdPeY<2M+)%)*Z%0HCgOg;bKQNN;CmENh{ZfU+#%oY0- zonX{|xOmj4|18Nv7oImm;ChIW4+cjo`2*2AK_KZ8W!VI-K_sy3YZ@{Mg5l+k(&NG4%;>Pw*PA>A9;G8zV<`v zcY;=@dGv!n{eA&INWND-L-a|zo#Y!IHkjrm{&28+bkFx3IxQS}_u;?4;@Rr@AFe+fQ+{wk`Tv6*+wXTJ z&Kf<JeM}wbu6WX}S-;|g7(>l-iJ^D;}u*0@{ zp7O-gj}2zJcqb3+`kyhe?L1t=?=RB6q$=vDeYKSroBfX2xl}~`M~g?MzbmY9JiCca z|2*G@ZerZ>62Fbxpug9A(0CejHol~;mp^QD<5TzE|F+x0jJ3b=*B?z^#CUf8Z6qs~ z#@D^}BU#lt9hdtCn7^gGPLTOU{l^NA>7UG(>)6TnuE>u0u2@%}3y6c>711j0U13$v zE8BzR=VjQoAM-o)r|M)W}JSMEwJeuBr^Uvx1XV6d4>a~y5=6Ac^ zNyL!8bMg20eW-k3`aAJ^#r7UzGaq!b{h{&^2dPhO+a5XzZ%b_Wt&V=3#=%MY!(K1l zB42jZm0_PxT((x{_UT^vZ2vQ$a7$g4RDgTE})4^)D-$^yhz^nHSH8p^ir!^oig4(DBIw*&fI zj{lF$%EfdaQhPf+|J+g=)N#aHt?3utu+#aWU(`P@svGt17jDz3Bew1T-Nd-%C4R5A z_0!GD#qx8piJM>N>7B;GN&3SkpWb!LFP?Ei*z67eb>CyZ$r;bS|IKwA#ry~5`Jbie z1xi-6h6m(6%-%^3b;Nl-qW-0#jr#8(^`-}W2@(gr#?3}AMf_mepZTdv#Bv2Vaa1rvGSGF82NJF6I5lUT*#65@+olj|8#X+o(#1) zh~FoC_{jsUKjk$)<+rwwQa!%NMlP?{0@#Cw*W5+*ZS-IHp=Pu>tw{z_Hr6Y0HJbz~E_;&xRw4cO| z`VSSgray7T_I|=_dbGr5T>L3|m((S^wS3}Db?WzEl8-;-P3^GtIm58Slv$bW6M&?}!^{{X*YUjF=I;BuR<_L=EIzp0IG-buvj={pyH)PJOS)TqB6 zN0}ZvzVW%Dy3*P5T?5MUFggb-;$WHh06 zvztkf1}8vF@gOJ&2w@PT3?8|FD2S0s6v2piK^Y8#2u>V96d8{)I03#@ zd#&&4^sZBOlB@UU=dC~L=`}rj?We1&tE;Q4iG##b8$ZWU7N@WOt^V*N{V{HM_pgsT z?vlUk7~VB+`c>24*ggNhM-~^;VNmVy`unf8+KOUzb@EQ*IzInDp8C^y7y6Hul{T&` ztuAkKd9h8WTx@u}6Pvu|{Tf%l)v3+)Vedx^C7-+$EuVT&=jpM*KK?N6BTvWc|EHb$ z)bICOuY3RDP8JvLI|I^>=`$#Kta&}8eRBOjJT~99RKnXDI5|3u-$~r~Mtk#3_k8TU z=C!@~jAMMNkNI^z*eRxU-X-H=gUau*LG_2Xyz9`L?)ues;Vpw7oN@Nu%aYgZWN}G# z@?8IrYg|#RVmoc{NaIe>-hb&Q*SO|0&qDv0QKx@1{r*ww8P_}Eg%3>qsgC4P zhh69&D{J+Cn~Y(4{JZo|^tAC3e>zXI<1D)S&D0NHxa^u8Us!MRDdF0m@Ac6B@4AgV zJN_Ka;(|Z_Wpe*F?MJe@Ytvcd9RDybi0;^h{*98QqQ4VvwsZ2SujEIT;(qn(NgerM z+8_RYnAUmv(s`P#{(gL+vG;!WUhw4u!bTS^v)1u%JQ;rb{l#z=7u)`)>-|4-^@EJB zDA7VkCy(s>8+i@!qaS@1`sZb&qNmnz97R6;rbrz0u4L_gE?C@r@{EW6U8J-5k>?XX zb#)jvn!3_E-}cS7U2p?0ACytbeC*Fqw~EOn+<5 zA8xSiKYo39Yps*NvBJuiZ3n;U-_~)!mGu4t@GDAIb?r5dZt&ao+K?LNSLi=jdMf%4 zNFMhwBj0#(iF+qV98CSzhw_pK8gCM}IvoezrS*rc-o57Y=l|r&u=9rFH@fVZqj-O_ z?_Wl;xY+k!ZM9u5id9|HS93$n*ZFv-w(>FWLjR#MQmcPnMi<$S=h3wLKQJDAApSZe z5BxvVf5Sh|`2IZ?+!h+|$(QNu+V)Q7W7nUIh`prxd#81(cQfy`)A{kd7y8dFGb{Q# zsc$CF7j=d@9y)?v>&@0Yo)7#W{@Oa_BcA5>^D|CZu--m{=N&OOJaF(4cOG@iX3WEm zKO0$G@GtTH-${K_b+bO1zsBjO(0{nBsOYcx{Emry>x(`$k2vUE$-2nvhVkq2^O&Bc z$7j~R(>MqpHrU7CTKSWwU-ZVu?ryE}rVHM8-oex0xAUKQ78m^a0dbj=ZoiUMT~o*Z zP+&7J5SUfRl;83e_U7RStF-Lbu$dn? z65ZYXey+C0;p2N#Y%t}+-w)F|?|R2ITmAj`!rH(7-3zP#^vbZ#KW0w8;mNO2Z~Ol| ziwoO({*!qB!;b0K(6|#!*Ixb2l_X;Q=W4eF>9f#(Vd*jbRZs6qZ*lw?1j+YKY!H8n zX&l@7N%}Cqjt`&JxhyXIZ0hPTO!YV2eB&o}6}CmTe?9-q`AQI;bX@AeG>&fON7rP2 zh5jQYhw1N1YmvVG)#v}|qZ5beem~~K=6SGrlTRG1^AoSZ@SdBW`R;R%eK_p#r;k22 z`aobjJO1osaq-~F@%+OY>Gm&K)wS`dZr(|bVbXV@|7e*}tG_Fwi|of6Vp~5>Z1e(s z;-@c=JdoEraq26!@-D4Ee0ZIoJh#j(FNUM9`|F!dTxJNKf7$0R)tBENF+Y&;m~U%9 z{;;&7KRoIAhsD)j>utQmrXP@gcwWs<9lC={=!dVhLi3DoynUJRtrZSE;N?%B@K&B@ z^yhsSe|~wq{`q9nN5{>QC7oybyA{Sy_iKgzkqFPyMOQb?@YZX@B_pVOnS7 z)#%@kFKlqmfm1H{`itSsM}O_~AFkho-_CzFvbeAv*!2%>^@C1rMX?syZ?B?p`pnf< zq|ZWsS4JxOQ#VpZ|H=L*$!lKKo6k7R6Ey$O*!dQRA9RurkH-enyu{y{_}C4P-22;S zS`)9o>dEi_cnf&g{{2uE7sHzWACas=w+4^2PRD2e13!6Y=aOIO9~tWOhn@Qh7>|zY zon)9^^LuUcYhR3yzaNrssN+@aq>nn@Ftfe$n~#}wdYJLMW3rE3bRs;q-x-zuIga+5 z>h*_%>Ia!mE`HX=m;V1_TkEM)yU;%})alRnXw1j<)3K`idLs^cm&L_w##O`*k`J1n zIv45eb0700->^@9Smou_4?p*XUBV>4&lbfWkW**(Kcrq!MlI%9A=#uu~ zU0GdxPUHRqPl@fFpw%(1qWKs0=HUmK4}6C3IjLV$NZA!v<^e!=&pz{oeQA`)F7*d*SZQx1LD7eg2{^#r%}G{paa+ zm+3-(W;=Pr{r{N{d4`%-=s#3eRP@(4diUcYkKetL58Ak@5B`4W=_A!OwKcCFUzoYc ze%F5CYn`ykhkn2H=Z=3h{C5AJb`}@rW5@p;^@EJx5I<|?Y1jWdiP&)qb-DNp{pXgC z>hy1}EP2d}zHIwfKH{KvNgdy{Md@r)nS=*rvxc`}L+iyeSti9urAU2;vv2DB3dipE$pI=s1^mnDVcwbGg|7?q&=fyj*$@`DL z{`1YZeq^5&PrWN_c=JB@=YP1L=g~g@j_UkC=G#!eqF9S;e@^3y{B9k7(|8a?id41nU5Ntme#WoEugu1`=;k%wUO%~H(qFOtFDyM3{hj1B zCkc{oJm{r)#6h3+ttO z@5=7F+nINz)t#qzrPa0j@rBlpCVzjg=~>wJCbt$iXQNQ^JSdc|No^AP{*e)_*3-cGY)ng*5tt(-tzLgYkzUYw}vfV zzwxQt-fG7+w*BW>T<9x5E#7}`p&f5@B+i;VvvvH+{fFrbY)D0+fAm!JcM`As{$1;M z{dmn|eqicnI|IoBnFqY)r(YchUUo@#v?}KUP%qXCA{+M?Uq2vATHOY2JVQ@88ec^F6=2 z`sLwp(XJ<*wZ+u#=db-_p7yzGRBi4f&{11atYY3dj|9=Xa{u{uZ=Cm&LjOh?sp#)C zU(xjEc}Y?8c;6u(q%RPEKP2Cfz8D`n>9dYEta0ZqQ}#G(wXo)0XMJk&;{)*8?;l69 zxY+fF8|wdWw$%?N<4d&AQT_J$i@aRxvD;!7`o~DE{#wU;Y`bqLpHtMl^!?iE7*`R0 zKV&|Jj0YdKVn4oc;Jdf}>9Xs5I2`hopImXvGu_``yf2GOx}W#s;{8uvsGGM`b=Tg> zwch&FIDF(`gQj1SPrMG(I_777oiF}69{geA^Vtt) zTs$wV@}ZsI@W6&YLs#4Xk7RMN?Z2V=GjFgVdFh(IZ2NbT!TyFgcAXZh_U?rQ8le)#GzZ1SB?|K_~8kB7}}n|8zd{A{m$ zcKkW2HnsPeB zUN7iieQ3V=vEe2h@YPRxjob0h+$=7p z*VOp`n_uK}7ARSTPL4-{^kvsSr2SyGCBM*rLHVdof6k+#3*#HQvby{HSe}b+dmiWu zBoAbJ0L@Ro%ICqQ^oQ_`Z_Zok+k1v>)_wK#>sFl)pI!emn#INRn4#z2R4@9Mtm>M0 z-~E3>Y76}rmXB;aCw_RuD&l#spEziGX`k32c_99N=v^`{b`AF9E3UtKZj}Q!y(g^l zk8#(Xc_X@So!!*PrK>IS9qU)cJ_JInhng>cF}?qid7^jgR9kLfU&#l_eB|8F|JWEVQB9vrt|dnXxY zH^goYoEjh4xJe%0t1~a2Kidv8->yG=KzP0St&ZnO(fsozA3H_sSNY&wT7Ove%)hyH zHvdRi{o14dbi~dlz-QCM>Q@x2y7o?dmGf`8_|ctt7y8GFTK(T5b>vs{kJa7x2g^(RUTu5+^DHiw z&-_$B^Q-Nh%_qsCvACKc=uX!c6#xVar9%?|18jP6ydS$L;IAh?)GH1 zlSg*{2eqx9zLlrYKR&AH56?QnSJ#KvJmO&LXPzK=Aln7;3hFrUT6_upt(8WuU*@8H zzt@`hp0}U%i;I5-j~#!GWO3nqim&Ey)r2@(g1r#ACtJd4wp@|r&x2i{stc*uh<%(!Ueqf>WXE;J|qc$a(jxPo})_@|x4 z#jmFKKXhs^f|&lEsC5(D<}}{QWSkvvHNj zY!`>M&wF*eVf~Fy_Zy7h1d5j_}z2e?s#AJGsW4V)A@=_1o{S$!lnS zN09ys{iCO%zmq(E$3edB$KccPiGyD2&DK2h#1Gp0it?JDI^}~G96z0_V3E-qJAVo*1S@Xe9-#Pd6|D` z?0kzSdUz-K#)l1(j}5B7wen`uZ@lvOlUu8PV*hs!jqDGP=|7ak#jZbGtIz+xE3Z@) zI;dT_{vlUhjWfSu`=47z>h$Mx1o`l%NF4O8WL;$YchR)t1;(>{?F-(DpZX-v<6le+ z+aCGx4MzWZU)c8kM_)W+#%7FX=bs0&xX@qz$@u@{bFmE@nwJZ*Hoo-!Cu!em7y1vE zwZ`j8tFT?U?k-&4WBbKiV%`awUh{ix^C#ou?}yCCP{*s-Ngs8*Vfp5z-+FnuZ-f+5XX=c{en_BS?RR{!y`t{?sk(&9Bqn zYhLrh-h9R}y_DDdNk90(w9Y%>H$OH=-`Jq~L-?QdkNez4?+oGG54`c-2X^xsxBX8i ziwpYuiDUo&yJU6swtjS+rdOV{AM9M>h5if5M-}~D=`DVLvAv+_(Gc6-2V8I5@)CdA zA9z zcGq70AiBYy&X4W5(0^gcQmen-$DaOq|F-Qzbs!GXM{45-E#5AgC>HuPzw&!mMvJ&B z*WEntN^9}`Ve9vv`s~*qUOTj&pYW<*opBE1+4;AjNSE3B@22|yUrzmsVs+PE{l>Sg z#IP&+>wF9SN6O5K{yJ~2BQiZ2$_qNNiGx1zTOZ0x9?14Up7}K&KEoP6b#)k;uX$nW z1Nm^+`spbfeSOWD@Y(VIP!<={e`Wptv7>&F`D-5%?QRE6r}^}mk8KzFkCu^&{!a4K z>pAUtWPHs_=j#(cx`LH__~=*b$%oG{&0Ea;)`SQ58T{CzUusPZt6lPk3qJ?Hoqrq6 z;$qj~w$*MVsN;*byEeWxHDB{=zk#iMt%1{GVnu(=`~ThX=dNe(bj@d8GbQY@*J^(_ zWUE_DuMenRgT46gKmI%|DsB3!?)2rI^uzXQ`-!~QPrMF|PxiO&;LAZ+izRrT1S0_PtSu9H{GbqrJ~sWUHQoL zkJci6tx-l7`5e^{+xA1sm(J71WxNC{`ML5Ye8%H64#I~G_VI_-DeYt4vi^x-`&p+< zn)Bp4;kWM}8#<0+es}!+<6|CvMainJjZbyM{wMm!kt+J@IL&_Ne>AV6XX3X$RG&(I zTjoL@acr-AhRS0$@jm|6jBkGbORsu-?bgPd{&>q7ui6BDyZ&%Ei;I2U4Dt6zcX+3M zMX|bTc)Z5Z&HTyycs&){e_pcG>5spn2lE66OP1pE!d`;pgVu-Ur+(}d(>m{j-~8B$ zX?<#k*?X)rYri*M7-rvi?};z1|30sEcKvOh#f9xPU(jpg7jL1P6nYncHo`Z z#KF{Wy4F2kx$ux@Jc)krl4rK-t*`Tkw~gMu(ryn=3_EUd*~Z|{LirRfa~c0 zk95=zCVfe?s+;whYdv*p7y1vDk&6DV^cMbwf~H4XZ2SD{)YiP_MZNh}H=ucn<{un8 z-{MJ~cfCAjFQz{%>HQzp*|z)x^6g9??bpW5vz`TibBegJE8BlA@k0Nh@{#eo(kg6M zu8aNo3WBwIs6OO1m-PCd1-<-?1KRfEB+r99FW6uof0(tyQ43x*`^{mikL|nMqsK3V z-;RIUSzOS|{{JZPdmT@*yKDT-@#4caR37G6=s&m2tkr+rGWt)BKV1x0zIXMBpLv3e z50X#3)>ht1-eUU0LE~;->5;dc6Aqf=&wb#zzrbVrpFE3;?O#{a-#@~yC|T9D@#(~{ zz1F36q5p8nQqg~)^cLIybV1XpAvW(j-u1>UFY(*B%1a&y58IvjHQ(aOYkb&XpZwN3 zyIeM9%B)RV>m2a%OIKaJ4LqiQI=}I{|AALAt67Xsb;I`b*Oo%&SLi>#^wjGAfA#*W zeet}~U!{NMp*nbMu#Z3NyU~|kIVYSJIuAE5`^ZlRskifQqgh-k=YQZ;l&tESew(Tr zHrrJ?ze4{7B}=XT>HiNVeRx+^clS@Gn~QGSFL|}CpTQ)ai%mS`@lNCYkp61&!n$X4 zj=1wZn}l@_dgOy2UB`LNv(I0778l$9jQj8M0Q6$-)UPO3ckR^=avwbAgYIVMSzHSJ z7nTny`nwp;D!+fs#c%y-9(sX3@!Pnn4|yQ@ww<6OsC<^Ul>V^&T7O^Tl-Im6Y=84B z=H2@6iSXI?U&%Q6X7T@*ceVNESzOZjh_}$u$s;~DS)TggVcv!QBV|QJf94mX-R+xt zLswQ8^&=6o|M=fOuD8qmKm6dHe+yG?TmR4*k9^WQ?fdapmP7 zJ15NDWA&X@*?&3s_}xflek;W5AI?bomsxbz7I)%9H}mJ>_fC+0S_7Yr4%44`fy2G| zwtwjK`r)fX>qB|TW4_oargatDNgw#r`qU1SpLyavXPy3fsVcTR?Z>`jJf8K@R^E=H6C@7$#E%Vf zy@%z;E{f;1lf}jS^y|d$!5Tkxbr_}{_{DcTvH5VA=06-3uJ|?dp4J+MLg0`d1l@(rDi zo6O&C^>yi8>0Nw(*!R89jGTAf6=9#JH$C!@?WTBFR(HR*GaUxS@2BeVk5gMwtm>M6 zE7zarl1rTV7W&7LY+R>#ip_P!Hk}$82M2oNmZ$Y*YaZj{2kD2n*ZPY6zx0O>PW|@N zU;E&c@WD@9`l0)Gss8?=lf{MiAg=bc35fG4X59GK4>+E5;FK<)X0&C>-_9jp({ zC(h?N>=e^F?}Xp{*dYC2gX#}qhYSC-#vxaPu=ly|zVndo^|yOxaWNeR)#m(LM{SUC z8scY7UzP1&^N4#V6@~u81Ec9PcdMf(6(p&iR2-WSsBYyfP??3+j8_U1z zzEyv8%H*)Z`r~JA_v)$iU;F)Uy!`EnlWn`}r@s?j?E4qB8OKolh5if6ii-X<&lfry z!rv)bisQIl1$ll!8&`SpOLq4-6+f8Pc_$U-$5y1S4#VpIwcP&K&6*w7_|~_#Trm70 zuXTJLw|V7RT=HAv{%3ybmuR7zFti| zU1@dmn*Sp<{Gj!b^35MRzf$KU&-X*+ujHve?DC_nuQ=@W*MvRK{cQN!J6@zOK7ZA2 z=Xn+vo(HkII+(5FRIWd9lEHXjq5o(ZsnehLC)1;$JfQ2fdEX}=WIPamKP2Cf@pPUZ zTd^NsYlUn6G5f2lo!1(7$yo<}>^C2z-rj$s8igKUJBtfC80z>E?XKyw_WKvD<9Tik zoG$!>!r@&nHg$aFW#cz`^N54+n9aC~_(A%vt#iHOrTNsEpYgPgal)65`TNcLAG9D` z(>48a-GbLV z_BZg-XQ6+iYyUzqsVpSQUD z&X0waH~HRSKN|dt*E;+DYcz|?Qt$uaTT6M+r=8fESLmOYkvjdG%`)26)4S3t@?E*^ zVmfc{%IMV@ZvV%&A106Y zUyegT>!YFLD2mlxr~NcJ|L?R;=f&#)-a`KvsnvgLsVBd({VP9l(E1tB@e?EuG~P}! z9(6j72bans6VS&j8Ap5x?BpF7g%ingJq;nf2k_sxjf?i%24}( zk34M9`bhGL*I`=co%W$TW~;xB2Y=XQ#^uXS|Jh|>m#u&Etz(z%p8wxpd502QO@Dvg zRzKJfJ6(Gx9^3z;`(Vx7LGP=lqJdbAg`EY8F2sXba$b7+6NAnMjoo{ja z0G;F;A2yigCH}CEN!)YHK zdGj*oEf+q!=Jqr1n)MicRX%@dex?hrYx41@NWAa;&vl-K{=+3lMSoX%i~Ya;j>GiG zqr0;G69>JMJhRb75kJUwVW{=Y2R=ji$OHTM!|apx9{2T4-W;|Z{^rLn++{9&w*8N0 zak2N|w%U0wid9|HZ{_+Qc<4{C#(eAMSLnZ>9I#gZ z4P!gq^Z&Wnb$razJION}T@>+yw%zA7b;{>K_^`n~{;Q)4iB~> zeBpDy`Q2-8{kaQYe`xFB)t`QVdfWb)mty|x#^ur=56qGUKZtJDCw+lV2#W20w0u;j zKkq~IU(p}F$WlCCHwrKLVA_ZI=Z>9kari+e`NoG0rg@3KHDUgoeRn_p*w#wldHxd* z9XAAzU4K55#iif%Cz4g@Rx@5j|Az89LFU&QI3qeL`YSKr5lL)K5@`HiKjOC zjA!w-@+q2M`+(1IDg9yA6La4>^_ahfSy%0N$P+hQ3ZHHNquK}9R-5q@#amr7KXmg> za!fbnV}6DHkr6#CpSt-{Pd@uUL+yv_gqreq4)%U+b>u0Uzun7^A7nm;^aUR_*vB8H z9kQSQ%%(er>970w)JvYY9$vP8uKNV75AD-*OvaH}bl(r``e*VQl0iMZh5oVgivC*1 zb&TX!_CGpr;-F9b%o8LJq^~rN4WA+N(s35o-&*aZm2U|be6O{}JM!JHd;WI#?eo`Y z78mxrzNNqa%GIwZR#$Jc6W!oX=Z9VB-zaNq_3!up3pyS;f;O&`&djdM&wKM~c~U-X zQ29MJsQz&9gLfXe(HXadLw0}7OXq!gK6!TjZ77S2?f<96_urZ6b|+cg{eNm#KL6&D zVLCXm3;koHR{yOf&-94d#kgRdo-V0Fm%99#c-pU5-lg@2slV#HX|2y39;OX__T=Nf zHyJ+DKhNS~^II|g|LLd510}1w#!p{O)eXCXPHGGN+vTHL{nPW$4XLy3L3Ok9&yQ-{ z3EJ~kr|-hvJn}*7TlqW)A2!&>-&+2evkyCT)A6n4?|AValNYQGf9>~QJ0$JgEp?}gTEi9b?LaN-H$I!*#9ec zUcU0`VWs1CFK+*^BK&s#XIT38;Cg!hRpt-sJS4lj_D&vi9V|BEq*&-bSXNZ@ccr(x zSY1PWw*5P?nYT}D^J~BG;O~dz8!CUrt{FGw5BbPV9~}Snfw0wT7rbGQ_nr%no&Ool z;$rW=ZME|Ro!IHx_}0;U<_!;aF7ZPDq4H6k{!P7)F7vScVqR7ib;?H^^sbC9^1T7$ z_R06ItQw=vMYnz4XI#ISe)@stl6 ztjUKjG@j1i_4DJTOZZ{9yC#lE8fNr*@(L zaG6=r-<96te8QT7^k326^~NnP@!Pnn4|$-yu9&~pTOBr7qjSIf)`atS+kW;%_qHbd z<(4lV`REhOhwC5gxU-YR#c!b4P}Fh7s;(Koa{YPUThDw8{pXjFI{mpHU_~eRTv=Vz z&lPMQE=WE|U!eJQUf3y;Z|H=-VlSpYOnPe9|Jvt6_k`81KJ=v*SN^`&I=lX%lf}jM zGb`xvPp*DYp9lJoXrY7p*>16Up5bMlh5if5+KT@0OcZAFjYoBZpE&4UQkV2g9?15< z>)!m-DIdIyYc}yd`K?usyyBFVe{_Fq)qT&7|39nwmF<5hi;J)M{g*_mYx-(#srB@e zCw5!xLjQ&3qdNV0J(?a3uKykm)A!o(-Ln_IKz+L1RL;K* zW^pl{bbRp^IwbvY-YoT--H^&c|IzZ1@jCI7*PJD`>D1PEr#Fu{=$*t<8(zkjmr3yfZmnlf}ih`%$&q zW21PhYx-)cZr({wIzQ|}|CmwHU+bD%NxkWji*3)R&X+joo#dH~E`9Rht<)(Wyv);V z;*5h0sz2mw|LT2LzAX!_2^akK_~*OVKOB(7#dtM8kAFWk<%PQAKB*dG|F60kAARIf z!TbvS8>OeBKl6yuMczl%wtkYl=CodZ#<4y$AAdhg>%0?w^J6Ra;|ragvxm<6<(zQH zWmmtre|`r%_Weu3m&@f3Q`lY&RR`&&yEeX+#fM#|SD}AiX4dJ?=Q!qNJUkDstnNNP zmgl0|zK66vYV>EjC(n2^-z%TtQu9*#irgnv5A8=uExz)-N*xtFNs?nJfP-zY_N|% zY;()PXV!kxWnuTDR-AVGKEI{j&OZ-jaWOqMPVWD#{E}5&({JtZXIpBRccFi$^w@Z= zv=-T)jfzdaz5WKp@13INHNPb``5=9P<{uh6-{J$x54swAG5z7Gv;O+_n_oE^p5OG| zci#PnLHgnNzxMq%*KrlaDs*x@62!LO-_dtl^6Z@^%F2uh4&}^i=d${>tya7&k%qK<`S{?)!rEp>^beb#e2% zNN3yI()z=#|&re{&rN9H0FEyXH$)ckR_rU+9KihlT!g zOO}fM%2&q9h3k9j_CKcMe96ZK)A8~5!?eyj;Wt0FVn4nx;cquAzsBp|5+>X`bJlm) z+KBDk_Wz^G54P2ABsd^;y7o>yO+{?eP3y1={fEnnI{lk@8C_&M&ZF7A{=tcz&f6z` z(@V#%*PQmUWNg;pvFq_%v^c zkALR&@~8S4|A3C?l5w%YG%xXo#{~6BWV%I-R*Z)7t z)vqX4p_}88Aa=9SYoj;gVHf%@EHmr$=RO|jV#g_YFF$e6JBgc(E{ga;dp#v_>sR^U zU0Q$G>E4r`_}y2Ap;wLXf%Wnt|{hD8&539ol-Pm>1rx-SDef+KCP6%P<7Hj`zs~zSs zp56bDJ{0py%iC`h_uokTGHQ2iaqXwMgZ8C1<9M~{tI&V6%&gV_-*x_TupD)f_w}~e z^htl%-f0{hNbJS*SI&QGUG4R6^wrSu6vZlZ(>_vr@y~yr8A~hrI~WD|9+KCMq2`%h zb0r@eq)!lkKP2B!$E(;$A9cK8`OANC#1^OIVfkO>7anlZb6)G}_CJ1Qz5XQ8pYgL~ z!O#CzW83RPpD*-l4V)Q0b^15;JRbAmePfCDKa8sgA85R7nUHsi)M0~5=?}9%_lJYC zfBZazt)4#W!Xw-0W5+*(IxaX#|9>Y}zoJJN_kz z_CN4}h5mV2UD4l_-oif;p?mz_5F0-4)E*IRUMUDaXnknk=GS>)rLzL z%(Kuxj#$xuKzWk?kulxrNAutVy_0&gH4okJgErrGO`Y<25I$_MkH0nUk5}d!ymp7y z__hAL{WUAjgU_zNotwpl>-nrNcoik9yJlWsQ+2~Op0?C5??V4h>8aD7db^)lL+b5$ zaALzp9ySOMh`%3_Z>Zx{?4*x6-tgYv{(9k~oje?T#qS^d@xue~*#3V|=3{!SsrR2r z=RF{K>DoK-)c*ca>)4MM`o|G%JgqmL8N$bWOn3UxeBvPWW;3oLevp39!~E3gIPj8Z zHt|0Bt;WQ+T)5&lPj7A1IrPEl^A^Bk-#?CKak1}V+G;m?QLO6PJMlGDH~7=}VHf%j zl@+!6PY^!SgMK>V_fC*FnEI`cftvhW{PsF^YHPm72A9$wW*vFXX{*g27xHbd-Sg5P zzmmRe|39kZf^D_)UKFdkHonC^|90lwlBJ^mfN*misp(2T9f>m@anL)7r#AVFXK{FS zT=T09pW#yaL-RL#zi{~VCx)#b-hIzMJo;PuvGZ>u(l^H)es(%4eQ3V<+hgZj9DdMAzVTs$X`cGw z3vZcs%DF#yF@()mxaThu*1gJW-1N`0xUik)KZ(~rbg)6m>Taj0T{-{hBqQy+(0@T0 zsnvg>)X~4yGf!|t@|u_R=GWDke{k%4i^C5($+teR!89-NhrKS}@xN}kZtd{?pMUYv z`44u_KkTo(jb6;>#2X+!XZw$~#94bMkJ5g)53R*>%~NE4h5if6NUi=!zIR1(v3OUm z7r{K58Sne>gVvwwgTEhoC%kFB`IC4*zOc~=Yrkrr8+HvFPZ*qg%}PIp*N*>HU-ZsL zJLO4`I7nV~h-VldpJqM|f@j-^t>_>puTs{Qkw!uc6aC9Z#%c z`&ZlUf0E=82MhgUMjdanCCsKrLu~e&K2}%vAMg`5Tl<3#WP2me{F-k(Iv#n8>xZwk z=CwDiu+kZGT9YsR>7f@-K81Sw{$)6ei|v1c{{NIyzoJ;(wO2oVp&NEOKi=;O{iC84 z{o$J{W6;0xb#SD8y6cS9~)GEn0CX!k9Q8Q4bvxG z_{7!zVXtx9{ztR8*!j;b_48Ll{hBXUcY8NmNb{JZFbX;j!yK zM|B)f=Y?*D4arN_#YT+C7krV`%b-VtG0KgReWy$fS?_Bj;g(>-hXCLZAG!V+rM`j zS421TsheM+f2XXh=+8XnmeKD1y|Vo$dCd>^<}+{WL-Wnw7(3tM@PkhBjSm}4^AdlU z{Ms`%e>T4_O#aqA_y2O0?Yzcq|1+4y#q=1^`59_|>x?8t)17W>$=gj@-CExK{XZnw1ak2BydHL_B z$ZRLns-XRcY^3?l zc_iqaU~@gS=`&Ybd6-|J|8N8QN;`(aw=T{&7Y9{ss! znvVVWTH|MoANl-=Pqrrh^yw>p_LF_Q*4z1qQRM^MYUc?$vC}nuRqnq5&w%o%UFbi* z^wjFFdCZIFzml(c|MA!V&$#8!<2F9~iZFARKVJI7dtPmNasJt^r8KI0p@-Z;EI@nak2SzIh%=Rw^31Cob7eI6Ir4_|0(_S8TAeEx}H z<0DpHcFx?_!^ib+*kB%a|J%;T2BoX+8bAG@8#d1qHvJU(k7UJCr$2S%+c@X~<|Rw< zIYxCPA581;_rtW#yKmlxxG?WR|IyM>r$6`6CEv#HL>kk9IOv_k%|mS-#T#Vm|ovtmeeepgBe>y+x*1*}3 zs-i#h8JJC4u-S8uefYNU z!EYaNo`N1gG|nrE$c+5`5#Sa>9JBthT^l7$>VSC?MQh(Tb=8c=L z`I+yAU0!#=J%9Y_i}2g=&s^mP>1UvKT(PQa@5G00@Tc=*-i7{oSy87y$Mxj%d>hi& zaLG~}{~RMoKIonJ4I66bg5;%W{WFf!y43FDhd0bT<-A=^eZw2WCNFP4>+x;&U>^4Q zH&=ee{L*;+;ha=I?=(+QtU^b%o2!Zk+fe6CKZXA7GP9z;&X4;@lTW{fuB;wof9|5Y z`?(Fh6yXEY`I(XXB0ux{Jiq+j#m>8pWO1>+7*A26 z)wS_W)_UsNYAZ6oLjO(~sntKpw|)kdZ$J<}kUr{=JTT3x_??ai>bM>otntGa&RH<$ zp>+?wEPQGBo}>SExTC)I_m6AD{~y_yJTOZZ{LG`-mIUkyHY8r?KUhAh)87wC9{qFN zYRG&wuQ{hT-|EOyG`|}=-{MJ~*L<4~Hb_1;sQ$3_^wWno{{87;-AzU&jK2DGuW>v7 zJebAB^jOm0Kf=fL=k(K1UPb0t=s#3OY&=(5g{^g*S0>+d&tqD5f2ZfE|Kra;zjgdM zcfV%xx@D}>dEh{VfYkuYb z7aC=Dkq-`t4Ik)TuiyHp%Wq3QdB&sp@EIm-SB^WFFXH zAAguW|F4bF_uUsZJmpoJp8oKsz1G?9UpiS_JP02)h~H&#@d;M0|4-t~uh4&fNl?*W z`FS59zi$6S9P~P#+4QT3A0!`s^HZmMmbZj{_`+(>-0|jlU-({FM(4w)@DbXw`^NRE9R{{_Wk1miFhZNpE9ew0bFj+hvrMPx;8%5 z4IACl`4##vELlu{C!AifxxLuT$9Qt_dnZU7O#Rk}_D>#2K6&QXe2cp*F65gq6T zcvCw&_vLHC#_dl$|Gt%Wq~4A{7exBZE5c*?Hzb~}E$$?OZk0Znk7A+!NLf+QU*nv| zwmy=2_=$twm8{*@hj*n_jN{66fek^+SDWY8{7&-xzX-#5Pn__c!!j4XvBo7Iy6nv- z!)x1rTgL;J*ZZI3>Q@x2n8yH*G_LmIUw?b{Xjxg&-zi^1(DcaF4?49qFTGyK>a;KV z#orIfH)K5cuoe69h4okb`N)?Zn-iu@eRSL>R(!SBI(z?xS2355Uo*)w)OeOG#25ej z-*X~EMgOD^UI*|SY98}$PKb22-^XwICHchbFs)0+HQS9nZps@P@49H{v`_94X3Wf5 zPwlV@{Pz9Ju<&r+%dbE7{||bndCBvDzgB;A&85CIa8C48^mh`c59Y!9hvmaJEcwl2 z1<41QCushB?0k#E4?4+*$76$OUg8fE?%3j{n#yC*XLZWwLFF-fG5ukWAN+0pZKrG(I`4nr*5y8M z1@*T5ce1$H=il)+pHch*`v*2(oxfyt*WSrvu=4p==VkNBCBM)=M(XsJtm66U$Rn#~ z{)(of=3#?r9sYip)_JFW)L=ipFmZ>gE_{Bi>0#xCcm3kp3w{Zoeg5qzzv93D_h0A4 zk?Qns>hs3DyvyQZnCNeL$@9ba8^lwz-}@wdwfZOgX+KuyV!L)c7B<=S69e<^J1T7Y zk^KEXxq1e5y#LZCNT2EUC#>DIy*{+BR5#;sQiEM=|AVEcPJiBq$hUDC!ejFs6}$Nz zLGnS%)BemqG^QL2dyX~yjiZ|Z;x~GSZg~z^sA5ngAL;e4Y zw)z#tDs*e`NaIeh_WEb$qjiP;LuEys{!K4@riT;TgUUx7WL}n+9e>Q@x2yY@~V(|_gu4{ePr zGQUFqxn%@ByeqB39#EVwNWS@rgPPa;RBwJ=o%!>z^DXWqZsTItA^oIr^@q3Yd(in$ zy{-{vKDzV$`5*3w*S7zpE^q1Yf7@AHcs|4`bjo=o=$&A5!(N+l$Y*|q{=;Qvt^Px0 z^q(C6GvB$A*E~cJe$d9(eDk}p^DPcPsQJc+4W@aCKkRYNGyClGx+!75?an&no--Tl zKkWL4!7MJO$MSmrx5O`5W9YBrSFV4{m6!e+Vi)?)FCW=>PW;TT-}z_q&*Z%I=$`+4uZ};I;A*=4t8VBhSydg-1Kqq+ zzJ{Xe#eS&Je_GnHr$qZ7*T%>7SM=G|dPU|}=s!|MD*C(9 zTWtR;3X;$BX6PhNUh^@rz513{d4wR za8lR+p4$7LoD+Y4bQG^wP`W5|Yw$?>a)Oof#r66}1#0!*Quxq?@eFl5;-F9b%o8LJ zwC7XjWBvgdA3npS^tYye=?yz>_Ur+zjfeMs`0hV0q#rx~ujAYN#>M+@+({lNT~*ig zS2_QdOD??3uQl+w7^u^q@z}oMPZ7TKep&e80rB@k@(o>VxBEVVt=Nw*%$#@EBR|~a z{;)}F%L^}f;}_wz>(57(AKZED-+y@>PqMpf<6A>~*tUJK-Q*H4^pBo8{hMvcV_rOe zhRR1A^e(Am+`9Z+^5Hce>XgreOX!C${OG%r=Df7>yzub&?I>KY)DleZq;0G;lpm%)Yb(q$9myC<8 zSjU4uthVKrlYTyJkFeUt@3`{L|N4~IIy?Rx&EnGU^Dli#c6W`R>z2Kf7{^V-u?zj9 za<%%;mwNiQ*HNy#PEqrkpB9^Z(8e9;jnfBtDW-MaNxk{8LFM<@p!$pJes~Pl?zxACb7ls|ryW;U>@vpka;Ia98CT636cjIU!#|geE1A?-e$WjF7VZ1 zXngtUc^^3F@UY?F4`2TKbIY3kdq}RA2U!i}e zjM%uYv*Dx-iXizsZ%e%Wqn9Flpz)IDWL)bX8%*=m4__UIS+^gv=dlmW3(X(= z@(({af8yZ!!Jh#nwjSCoF$`c=;oc| zr1N7tF7%&YX4L5~)kS<*9>JfY=@r?!$DQUM=&i@!57TkH>*X<9{r&jDN*_3N!>?}f z=P>cKU#;@_J^l{Aeg17LzheGxzyHSP7L+5_>CfwsdD(uyp?#w3T(O&n3X%^pPtg2s z?0k#E4{AO<9ve*a5`Xyait9f(;iM_y2l-|n8o%#c=401?j%0DM^AFSX{7*;ypi^5> ztVQ-4YiXQ*+KHWuU2Ojg%SW~PFOWRbqaikZn+}@SJgYa~>gYq!{Oz&xEuPeQC;2um zHb_1;sQ%XKJG|++)t1}2wc6ub9(eiGN8z#0zk^v^Ophsg|4DchC9A8q*&450|3+VJ z$z*+iLE3A3JPLn?VXlsS* zf9XH?>|fcQ{@?9?&dtiDPXF5bfOKM+y-wI){@efmh35lw+6Oj>A5?#sa?no4J=J2F{J0nf^)t{Es&I z#-n-oQ=||4{p#xC=6C;mKIwRjn_`%C_~%ZzWXscqt-Jr=_ecDH2S~l?oKxA0-ib}W z>F=hE2YnR_{bQx+507Hy`}ekv(^1qs_(7le=?f$ewEYkKT2DTFhRlOJu#Z2?{$lv? z@0tt3+m1VGkK^BUAN+RznUO3myq>Kucoik9y7o?d=mx*>v?b2G3;i3Vr=q`(!|RBA z_Lqj5M;x?qHJ;k!fpu~7JL%hl@M42Ce#QykfBxJfXRnimd#`w5_2X~f4nDSjJMS=> z#l>`kUs0mnwO2pbY)Z`fZzQ(%S?Hgaky`zghyG1>_*_}t|MQmhq2s~_+Uq-sTb+x; zbl-0@&o7}ry!*1(9`}cF8-(}Va_-+B+2RiB?f7RXiwpN1@eMuy;MA`uR(I{yPrnuY zbIB#nd<*^CWu&6NlYD+hW;!**Z_k6~5eI$Zw{a7F(|r1(-s+U^f9nsQ`Qs&z-*MK& zaO(8V+wXtrGWhKHzmvtqE9Sq``!6Ma>r3Y&RfUdfH#ZUwb{!V_N9KzD%42m4l+TTg zrsG11V}r({y!iW}XFU4BuEBnM!JT?ZdEHf5^Xz@kp7fRb;phB6=b=IPY(Lb_;*!os zyoLTw9`U^iwr#J>L$T0*u&k)nU&pKT5v#i6(cj{}@59#*(|Ou_w12Z*F^OX7#}_8L zFMsb5AtgYzpN^jvG zj$re${X;H(?-Yjxn=?@#+e*28`XC4=3+|gO%wrAJ& zT4%@qoh&Z4|G71O|2W_D8PqsB=3=w|sP*>+zI(|=U? z!M56+1nH+Ce%9W}qb1n>hs%mu{gt2j*t{7Z)I6^HB_B-d@b^RS%I=E(7{^7^_Om{* z`|*YCn?L{WuTQ)$Y`^D>t)AXs9`&{7pKI=aPF_~Eg*K7aIQ{yN=LKQ`!!NfnEC<+}U&3TvElXqiDHu)9(^GI90{@kfQL3mR%{v;oNxbjusJ?mpDJ{GRM_tSeG|JvQW=9&I^78m-< z=g0HU!`Pr?6*{0N=p?R~m=yx8!A zo;;9#+hgZf>c~&)vB5MCzhYSbwFjK~w!gd>HeAT5jlGkI<>z8Eze4|!(o?H{UPc$uKaXbj`YYzEe(rk%KWO7?KK_1~ z)_JGnGcLAbKfdthQ(k}K;R_xMGnaYzzEwYR8T@wt$r0rTH`m|4IZdv@pg6hPCV!azrC*LTd_6p`B+e^zZX91 zL-~Dz#6jYzZT-y6;$m_7YS;Lk_&rEIHrU4>)_Lx(&9%7Q=9B4SeChRv4N0)R z)h_gRWu#7jK8IT$F}ui5akL;QmgcpBJ zeJ6%@E%%iTcKQ6p@bvxsmFu{QVs&*eTl=k?e`{+!^HICdzfndi`a8{EAZU8T>>?jb z@|tJ&=3Cu>_?fTyJ7ectJgM_e^5HRiG5xLO=Y052$G>!bYlXl6*XN(Q^%?5z{PU>t zgKf1F6mM4tvj^mn{r{BOcKn2$OJ1RWd{nD{L-I_Iw%GPMbZToJ*IkegnqE3S{(hL& zdB#&7v(?{^FD$?OvJb7k+|;nb>Jwi+aJ|=2Z^u7*78mxfd3pEGS%VjwK7$&s8J~VR z59wW!Pn}|+f4j^u{v@CO@nl}~WvD#FLHeje@<7kHjiQ-iF~9PAS9*&+UAgYY$pdXX z7t<^se>nKTJCEGxj9bFN$KUwzJubY)YaYM9v-g4bOTF=GU%c)mTJ%GIAbf9^jJi0y zh5nr~v!cH%y~X}~njrc1I_iiYe&V2a5>IXNdHq`(Qk<(0{PZtmuyp3wwENyU)cB z59dpo7xm^dzU3*e`8#9hTbw>XC;7&Q4W@aCKWw$)b?3kP^~Z+%oHH+9@JRRnkM>mF zb}#17#``}s(s@f(SO3(u^KbN%_wq8oLjR#MQqkX)-eUVtfB)j5yYjk*5A>Rs+W47| z#hEw#YQ5$Ei|lap0Ux{k+06#S$G7)Q+_P@!v(pC5M!1w0xv%P`zSLi=n)>ia)rFV?& zKacLp`5*W|?O7ctq5u4np-z7vquuS3=g*LFHLv+u=?8w$wgb&Ke|zkFi^C5( z$u~Z1FwIN+VU2rkIpL&-_6n1?{^OP(KXIJ|F|;R-M&rt zhWO!UexP>}Pi^vfeOo-2eEKy166$RHI=%;$r#12BYiHj2?Dnmd-*fh5=gb};-}XPF z+6TD9*!Mq|tm@kM^mpsn%**VC)E4^3k!tmyDS4)c*4gtg&>Ocr*Xy@&>+*BSC(n2^ z-#d+iOX+W|d)316Uu?{4t^4Gp*WIwjx{Oz=zwdMZL+wYpE4E{`E8oAgC4=$0+yA^6 zsOYae_)QOZI?9*i{mcLV6rDlFOKtVjXB~!Bem(cz&Hg+uO#0@wv;Oqe``M1{{QpQ6 z7rXxXZF>LJhWZu7>hAAToAU;ycU$Y}TkY1syy!t^?=(-5_i^S!zlO?39E8Vg#;wcG zb=-!cQ=|?XTuOgy(nAlPcEJTRTdPew?!uq_&x>C9?E2^7EG}Gk=C_OAzue)S`W40M zu1#l++xPGE*_K?dadav4kDiMDTKB)c{#?hi{*;&TL0*sKnVz^DXWqZu6+K7te4#JM_P{*NPo0Zx*!!E!V+sAO zao_pPE53hfyEXoAfBxM~NB_-hy`BFV$>L)DOwi+>hWfQ1v8rqOLO1Uuhk1d_uh73; zX4L6lIZlk(MSe$loub;!UB!kUq%RP^WOtum{2=*;PAV$4`l+kKu>KZ*c=)`7*ALT| z-F21M|8WQ&JO4AP{9wQ1pN8bKHa@=p)xOZ3ICi0br>w2hpWiVtA6_?x17&qpKkrI! z*Kgz2<>xWIdi|T`d#7=5DgEK>>-K%(bC1psU;p?=4t?g!BlKnOzYQG+9EkV-Jcc|_ zvZ`z1=!R{tr}qEH+?xkVGM?@Kl^{fjGZ^d4c#IkApfSd7>S@n0$e@YXLyKKPj-3!W zi6sbeB$6POW8ay`CJ8dukOY&>E{+gHgyRi@7vEo1-S_9NbDlbtJ~MfL@Aq4O)YbQP zJ@<2Ub#--hb+y!>XQqEo?y2bSB(FJBkbKkK`Wqg%JgqmoOY-OkKZqZ!_?6Ft@L_{B ze)z)4_k42y!MFOu*^AzrUh^^+;xZcB% zzBG?~sj9At$GXvv%}04N{rhqj)8FORkiPUjHddb&;m+ruyEM-!Vk^Q2+V+-8zWJ$x z2b@QL*dqDK6B~{l7yRe_xflNGeDtv6pB@<>f4Wn#|2dKUHGWXCOTF-OA5LuBZ?vR> z`DOa|=bl>qHyxgDJPq-CH#`n+?6*EtH}XK!UwO^1`S2Oeqd(lWOXPe$|@1~30 zI_dhy>5K2*vB7l38Tk)j>^K#_%(`6Tr@zYoAElCue#|e^e^wr;=&$qRIy&-AC-^jv zIOttQnk{CdS6zNuq}%!QN}ba<2p=}s!5`eJo85l?-gwq{#0^D(0_IwvGH7P4S7BF7G|kqDz-fj%vbZ~`v32=9=&6< z{u#&V_^~}oe{k3PTNeD~%VFYkUtZzfyX?F&pMOn<9vx?F@%_u;w*P5~pS5>962#^@ zaO*$SxFYk+w*NVK#Kv{GmDxH!?z>GsenVHJ+kNVruZiu|Z*^*0U78dN?{nnEX!+E4 zLhakfdFl__J-pTNpE%|G@ae%LjvT*M7kPI5TTfCfrpH*_|FqSwC|0RcY&SR1d>0}8 zDQ5Z)M${r-YJbzZ!V=quBIFlRCSop5?Z-p9$O-ulx# z;vn_0O@HuMocjPlCOEqi-+f1T`@}8uld^}=UY6g^G@=uKWvbEY*78- zg)ctvneTmKtMK;+_xkIMrI&%HH9f{$lz0tDO{!{m!IM z?dkJ){25>W5Iuj)JKO$?XRuCx_Jib`?y2_Y6x)LT&!7JsztlkYN^gH9tk5<8LJv&7 z#5?VKUqR+;xVT>b5a|~`|5Ba39-KVl{DbAC>aW~?;DRDj(Z5Ub`5x5tFP^RPb?4pi zyyh&i$p@_uaS;1&`~4B)f=lY} zFBnfzx~ZfLd>_PnTC`^|K2|n&%0^4_Y7Ezxktn z@Pl!kcfxOeY(?toFf709G1>30KM3O{|7_r2@0b67vAgp2N4T;c|1fV*=i!~?v!*Zm z{gK9PzfGO;Wcv5ywH5s}uep&hGauVN+v4|5kT@9ov15$$>EHbH>ylz2ZW#HgtHZG7 z^@rcE(9SQ0wWojYgxz0R*S3H3Pjwu{l<$8V>Q@x2T*v)1^}Mlnl4BUnC)2+-X3 zf5**yjK}3wS)K9`2ffRqL-s$eFzx+`*N^3=8gC1lKkC;zi8HR*qx6S|AG-RJN4B>L z4_*CV3ts+>S=8J4=RxHMTWZ6vDBkLtzU=xd>Qc##=bh=_mq%*#kIz3l<%tlzK(F!G zra!K;w0M{D>U_-~jRS8j!V}fO7goG1UEj#cBY?`M_`jY{83-@lj+}|SJvszew}ebJD&?zrV@$)Z6*DMp7)yFFkX!`~+yq^8rd$ z`T958$)hGb^iyMJ`p?QM>hzC)M@CN>5&2)1-5~MGF2TdG5^~^8Re@_b1=s|Cc2@!%&X;f?FMB+skga31|( z%Nc)J{ho=dh8a^&S@ebb%I{we&~f@B+^Bf}eU;}4l&tES`Pu)kIpt%1^sRQL|6ram zYW?Ah5S>A!kxgoiljBfsgW zyySuZYx;lY$KTj(mouk?o&NN#-(T>ZgXqi7KljUg?E3T7^!vw_`W3}0_4iKWIzM!S zKb{}YW2S#mQLF#CJx%T+D~kg2YTh<_|E*)69?zg4_{cx9lUtBdSY1f z-6v1D=e|1`*Y^KuQY`f0mwK*v0^7VA0D*C(Jn;rkGC`kYG8zXVhyMJFt9%%E^{;9WdlD(*U=n7}A z_r_VjU-%vPx&H~*!x*aGHm>%O)GWp~NfNwMgopX5o#|gZs_3tI&9PE%I;F}7Y94XW zJIOO!^XLyhXxo3$&5-kU)F~gl@R&`!Lw?xrvFBR99yb^cy#4lFU%R=VdOrW!`>^ua z{r4jNq-LS7ST~zzG`~#$c1~5NKlhcf`ZUkZ^3wwBa{_tIYs61JNME4&+au>&96doN z`S5sbFwTqoVe`*TeR|9bYlZ1=52VXXn4fz4{Y4`w7WS`o_g|H)5%&K&KEGS6^htj@ zPNsiP&Q;Oh$q1GGPfPrK{)HdCK(F<&O+L?`#Z%>lU+XO&8|>12Y_Nkr?DpX;>z{OB zSNQCrA8dW{V}0=3|9`cW7c74MJK;9()UPO3b?r5dZrIG1{Vem&^zTiwrJ}#iqp5n3 zZ+tGV%IY+aIOts-9kL&Bg=zVGzPJ2_#x>9U)Qu9UtHUt)wF7qj*W$~CRepWYeAC93 z*MI8$x7q)I@GwtMx-HlEL3H!g)FZew5$8TR%EJx|yH;6q#S9|EwfiD*9_4 zzat}`_a(!K&+;O_^`ZTd2io%*>lfR~Ykb&Xhy3u1{rB1Em1lMhckX}vR~Ej!y#8(P zq*&~Itu1!?NOAvxcI@{~;-FaN{@8whN$*bs;iXR<_gWgHp zY{pf@58Cq?#o>p~5I!BJlb>-yGd4cGZ=|7NXRP@(znu_Gx zcG^&0CrDoNKC#IMt-qMBf8_j1os&EdDv#Nt^oOI*`}DCJ{WA?m{op&-zJBbR^u_0| z%6?|?+loH`w*6z83|pwjiEY+ywtYo$>!106@L_`;{M{=*cHVZEpYxaQ zl`mWE;RUuVKY#6$6pNkr8B}{_vH7>Gzf}BnYx+euY^$d~<;nCfJhl1<8Od~_K0@N4 zcmKYQJkXw3xAEwalY^VVkalf8Z{8nsM#(Z+}uO_WcX{|Cah) zg!Bh$ys5fbpAD@ePchTKsHo^4<=OS5E#+w|7S-kFv*t45M?cW|P+t6d1@qrw{UCw0@~~!f$?TFwTqo-DA%A`^SHA$W`5A z4qJ4wResTc$3FiKDnHm#J3{ehx;1#Dd0m3``!BVbC*xsf`loqCMSqujv-AHtF1j$D zp)1nt_raDI`E6Y7hdj`0K4^aWkK)ERNgO^rZDD+R*;=%b|>4C@Ie;Y}$ z*!=YVkK}=pRj%=SCq8tG{lw`f)4x!-qQB0Y-*J#{?<1*<6Z2V~lXz^C4_4wuH|74U zuKzkee03Oh*y5d|zk1f5VaKQZP3yd0{r_FO}L%|DOC&t^UhOp6Nk9ZSliT9Q01&u}wbP zm&M^#G=GL=4J@KiZUEJ-$_`z_@ znuiwOKc2Sb@BX^l&tHbW?)b;M?APzvSMcP4i7x1Ewv$KbMm(M${bc(0cD*>I^O?*`iAlh3c??wjYA!%<5uDw{9&sTm;Uj^PrI;9-}isN*cz{*kA45% zPKw2jKVK{EzqgES7cGgirarbufB(bp_vf{B`ooXD)<=(w=$#p4&v_)~6YmG%Ck}d-v(~?FlLs0v{kfvt-j6*t80U@BAGX@~OONgFjYYy% zH$HL7A2xgkUOWEpO^U_Nf6lG`PU_JOeJb;7NM5G@?EF!!{!PiN=#+{de&V2a5;q&Y z6!C-cc4dC*ln-9UHJf;c{IK!2c0T0%N!x|#ORYFFoLK$*)t3|t=NBqMJMp7*FlfGD7e`wd~k}p~18b63`*zmC*V&0kl1362r{`&ck zeB+@{msgkj8Ow8p+ddatAF2;|pgo_hnmXn4pz@fle)#IJd+dWNF1XFH%8)tAG>DUm_MvK>A@quziL-l^^zHXSpNG5z+=~crb)4&ciO9VJ3^?k|JwUzff7(g0aQxZn_+PR!-JLw5FLcAU?VtX1 zzM1}or=mZ3%1i%s`r~g-l0L`>tq;pG%6xj(d95UU}s2AH9=!yvAQS{!Aq^?mN@p<&ld1g*W^BtKTCqFJ701nnxV; zk>AFx(Vu+Kc%toqxbf*YqxFY1esaLa7JK1K*?kB;KI@sUeMlYGyTxdIMB^mU0uNaE z|F4!5u${u2>EFmBwfb)-b>vfT*g;RLL&v)ORL1d6uu`Xf;&ZD%eBp^V{(ACNoAia} z4p?}Hk1tsM{_a2-m*1O%)Y*92zhswdi#zc(w-YpfsF}`pAG<&Lf4+GadfNH7LFENoYNx|dtnz&~w%HE7lZfd?-^!cm z-_A4Y^#5Pm|2MQxMf5ZMoyI}tfem)>hjClp_`sPn{t?D~W#*2HF7ZkD?f##GIu6)U zyD=QaD%al0W6-^y#JDx;e9rr(stC&QKSHJP8ZrJhsurvMpbE;bX zm(HW*_D!9kE5f#&n{JWc`iS(6^Ld?`e;#%AI=-Rugp-cl@!*S2_-#1-%m;q+&4ui| zn(cq0x^(-I|NoBpB+-Juxuyh|2S^+{(|=Z;S<&A~eN&Nq)4d__n9uU!I&@Sd546`4 zd0G#@#|GoPQTn@=*!%F!*VuaZ?j?_X;+d(tJ_N7n-Y2^!x@!|lrJyCv~V|AG7wI(U~`B`^2!Ko{Fidxqx` z2fdTH+32N+A7r}#%}<^3!OJ|&Cf*^xd(tUKo%!+}mvm2h`ycP6Pn5_1dnd(W^J}R+ z%ImMl zu8(c||GO#o^=~l#it_lU?)vjqQY>s2pm;Ogv=6hLJmNgCSKD->Z{_Xoy0|E))xW<8 z+wqOPj#BY^r`RvpJXtWFH+_P{=?gnX@(rEvSL{*x!{q008FSu=?}e3j`PYLN{%w-i zIy?W5K8n+e-Tzy&(cLi0ldgvBHztV3`b^bU-I-^mf5BAI-)VkZkbLy=>i16K*xdJq ze9-#PzVS{PW>K)4;!rUGfvp?d+%W-}IH~pXQaOzss#5eXSxk`Sv_F#1B7l(7UJ(y`u5seEKv$b;<`X!`5PZCk3aWMiY0!1r{@*_KX@GbS5UGuol+hNdM9Z6ANpx%yd}u| zGW`oro&NBXZ}Wy1Om$p6Ur0U}*WvGk-U+YOW7l9OzOZ%k$t!MJ=8s{!la71u;5!%c z%463*^e4q)$Nvqz|9`4}(1{(dz54C@U$yZw5BkjXZztJO(ck6X%->r;dHmB5+g=Aw z?Os7X$HNaYE@*zO$BvPFL(Q+)9sKZy>p!vT;fvgQez{js9e*T>>^5-w= zypEgbf?fOnJEcGGD{bl2SvK`s?Up+~&m>%Pc8vw1qe!x)sk?eBqojh(%6U4TD z8{((WO#j~8W8=Bp%50afGi(=7ZvUy+)`wHOE!h0DApD?>uYJdU>=@%Z?}Xp{*otv| zY=`9sKL6pTe*I(^@4xZ@=-`HJw`yyOU?|;~%enqh|eNrB2T<2$>ztkqr`qsKk z|GvD^^d}!|=h5=`ob3gS=T~RjxXMeOu$TP!!MM&lsW3mbB6W4x-8Fu(cV1oWbYaQn zt^L^cKdAg*OYJm5t)pI|8K09!cKttj@L;Esm+9Z1KQjGYZVl;cU9p)Dx*IAVanQS{ zj&bYqQ^{vO#zUR*d2k;6-4izW$xq*VexL3Y&iV3h);RXd@bSLI`<-FU_2)^n(6{l; zkObxd635Q;pOt5t{>)3UIes{gjiYgRVvNV>R7XB}@L^lrCB;(br>+jeH|}5floKYk zg=NV0svg^}8j?}>{|h$0>R^14ZkFdHBepaBXXg>q-{sbj?SFBxnNQpwd`-=3J|X$u zMRnR2`Nr2jJP$uezIT#m2;W@#ySx5++XWkrU9Nla<)>cp`l=p&`~Bm9jsvb++<&!? z_d!F(SCpzDx~-!;Q6A%=W2XO{JhP&|lY0I~)%0kIKkkD#=(Rq!di^K;fzkLtARC-uz3^k95N;-Gi`zK%T5jt|;3b;{>K#>WOb_`}~OzJ2k#U%EED z^S8xL+HBlx_)P!)q*zRkja7fAe&rXdy7o?dmG2)@@w44B?@a%}JYwTH&C_u>k3l}K z8$-<_4%+h+@!=;AG`@z6Z+_+X@PPB^4~OjW`ipDc|3Wx)V9C!veCkyCvHK6etC)5d z$N!EzP`a$HiK82K-Tb<{E-6SV`s+NKEg7StN7N^A&}+Tfo$?zpPHmm?5x04b(oY|u zdD)EpR{7PvVe3DCb<=|m7zdvn|3rN0@AUttqW+U;iTgn}^ViHT)4yP@)qhDDi2mc} z5q{#JcTru`KY5_-e_B#UJ^W@njl+ixcJOzPTVmYnuWa_O?r|qw*8QFJzYf3Y-=q8u zLHqqzs($4ctGf11e6{!gY)PDXXZjZvwfavVo^Lv(;-`Q3K<^}Ow)S0{uYJXFZ1@b} z({V=Y50lq=Xvv4{5=xbIr(?E9C2q*(0wL;7;+R}`yUd-a2r`+vei-)T}T znf{IZL7o2I<n=c2X?V8E-4{(~lFs2W$NB)nQoc z!K=5q^AP=(uH2v5`G-Lrr(aR~iMJ1lR@cmr^Jds}^UL(_%~?!;ms{ob&v5|ru|66j z=QVE^mi|Q@Rrx?{?n(s7vJN)#rOT>JzneV_;XPC z!Is+Ta1^V2{$jhS6RCZj3-yzRmNk6vzYhR|vxRI}akS;Ubw2#>QGo~mpIu1NpSkAG6(fxq%R>Np;pNBz5(8Tjfpf1TFq zUT(cTH#_XicNou(|JzBiaD3q>jqd*QQdQM0)|>H|U#9<@ytbmh6Gr~WjeP12HIF#x zBR{tF)0Y%WoKM{R^Qg0p=Z23H{xJCGhYx(LH$2riV%DjL9Ymh#KU?xEzkhe?R}`yU zd-ciGRT{V7U!)iwRw@eln{-z7DSo9RE8dusL9@6jqc z!KeJhK_B_)3nUM;?L&FZPru3sFFa-w?~otP{ly*Ej{R&qoIB87cGeB${l_@}&`OHM z`Wc(weex|kPGvmFD)%>LJ9*TMzkh_cA#vu{-F0cHpm!3FZSr~k zEuKog#kIa-|A+pt?f6yBSnK7PVcR*Y?fm6ml-K|4r{gm}#q^Hi__N1e?`_Fr%{-ZbPd3;elCbh*)`wFb5dMDg zH+u!i2W?!EzQvuytsc7$=_ih>KdiIj`0KL^pu;)`?{M|kezL09yk7wBEiQhi;(OCHGf zz;6 z|KCgb)8Uw2ru+Z4`oU-%iI&@eU4QGeuAxYsVz&La^GL1!%S#=)nC=k|anQS{PHp^b z@0K6;7w3=Grmvd(aR2j*efyd3y%X+VVB6~#x%nX85AFPOniLED+5a!M)vqX4rd!G* zLG-EUPhX(UH`Bi-udL`#-fZDDol>zG*E_Xm3pOtlBp;+N(EP>d<$el3NWP&H{)#ygfD;=SBXo%3XhIAul_LO(o?V8;A=C()=zz7pIc@6Rji^zZaNsPWH{@&9k%f6w}H(vJuAc@3ZX@qOR^)-o4`PrdTX zIiJ5{Hu-k_XR39I={x%WcMbI`iuG@{fBoMI_`Q<~wl^@-e^wr;=&$+BZHCtwpUwvx zKG18u*`4x<*VU;XUdA;$jz>0q*!73o9J0uwU)*UX_4fVaoTOMP+a3KWN>;g!`$0GJ zQ}3jPelq=M=bk$K{jA~nylyNXUd?NsJv^Ur;y%pZ8adzM@PkhBjSm})^CEv(`dxq4 zCW~wtmYq2J_&+{gUVnR_@(xD0iT?jZOZ|#sWx6@_Gp=HDgUC-kd1`0+&&e}QPdB`- za{UQB?csUo1$rm(*f!nLe;>Cx`c*y;&Z9rfSh%^={bMJEt*8I<*+YMp!e`$<_9ex_ z=Muk3^!ZE2m#pgAYrOLLHRn-0ZS{BJ3mcty%=N#U(+rzzx9yt?uX>!D#ft*_V}{P5LbIAhw> zD^Ggl<#5Kcr@VG{*E{gne*Wq$+O^-|`nK4t(ZOsdkI)VNcz*QP-PKXSy)@4J zGW`ort^R#Uv5-$+hRVnLRkLS!zSW_NqWK#m=UY6g^Nfcd8|>f@>+boD>u>BjFKptj z*y`?=%KtyzHz^kOFNR}^-@m+S`lXV`+B+TzqIc!|gU*vU`ephTkvjb=zkg(Y5uz7J zU$KoJw0Oive6A55mlR7}A5P!#s}mP{D-EYy`RDJrZ&tp4wEd7iAJ$)w@_|n6h9GP2 z2A zRvxJ64^I&tqGLmOKqoeOf$-NMd7yo+({atOw%2hC=h6>fSo7%%X21WV{|f7TXQS0m z*>EBD19tsSniLD$hhMz-{q;FEu9JAYwz&4yY)OXAzsAn=Z|9k{`hQAz=-)fBVZ?{|i>#`%mn9EN(e~GVO1Rwi-d*5v*#p0LBzJ1I-0^593_3J#u z%Jfm&JMmP0f2X>jBYkH2_vRH9{gsdVI*@PML63~XxLohsd_nzA(CX+<(fo6U=ivw8 z_sVAo-(32`Mz8ODcCyx_uyOCVZ~EQdABEq}zcrF#L8tVvk?(&cS*7k~J9$(&{!u^r zF~3azzPzHMzvg$m{$KOp1AXLYo*;RkZC|RZ`R7q*#y3=+;8(im^rsd$Dm2q)8i%Y= ze*fF)`jbXdEIcn_m22fc;XTF*K{duHT|E;9X^oaUkJmR2tQJvc4^LfGY z;Z>c?uQq&!bLoe#d(2|5_a5@*=ex&TwL|Z!FO=WE93cIg4n1nGS?vGswsD>KqTZx&HjC_3+1N{WA`I#dZfjyy3m8 ze{)s;hRbv>zTXLN?7qz-@Z0&%mduy^MEbm*|BU?9c^y}(%Keeu|1j?^k( z_$%tT|1G+Mscimz*oKU+nCN1BYm3ABPt|S-(r2dsoZM5VfBZih?6(YceE2}`a@HZw zo959Ed0_l{GCy^$pi4YBkN&Xr0avckSo^53!{T@DKd@Hy{sX-_4p?*lG3FuJ<=Q)W z-0A!$^Ud@h$SZ2~-&FWakA~RPdpA68d6A!fLGr+OyE6Yg>P%mT%F{jmmYHjR{icPw z$M5s=rOrP3Qu?vaU;Rn3a6N~gF}m+x;&CefzuZz@=97w@=|7l1s_5?|UbzmH`WRgg|){O+#HiU$?_HLn@nPuRB8w$#IqUZ8gp zk8S**#aoge^I?;3$UL0ZfgSu|%LzYvclCF#4O{N9#iQ>YQ-1%&^@oGX2hPYZ|9or_ zn|Dj%tgXLj+{*XwTIZcqGVe_PqOzjD=1F$;`e@6e%6K|1eDGj{aUK3n=v_1}b`5so z3**l|`OiO|a&Z{<+O9|M{o$eXW!L|sqhh*rq5s0R{ifQFjxScZ_Uh+21ltgP&CB#J z9-*7%X}lTh+enHf-v9Hv-{x;69_zq7LE`LZv126PJK;5q<}q4-_n3o@`me8ka8dW- z|GnV%{_(5Vsk7go4(d2yOYO#R6f4^yIOizqd$B=KAxOlREl|(fH{{^UaS9#(9xH zOgr%CUH-i20b$ycttop=dD?5d_WaM8#rd~1-tH1zsE=*?{)K)TlEM23^UU;b<&ir5 z*^iM=z2z%EanQS*wf=b{548D3eDkWaclqeqyj;F6_Y+~a*M4!)-4;``sG zMY!_$m-D2}XEg2vO<%@UH20dA#FL}z#aV+`vJSq?R5C^^T z#5R7mcZTUb)Reo@5{rxF@ zC`wj!O<(Bdo#dEq(fl&~d-IHn{x0`sughfxO^;M;#xa~rKYZcMbyi%a z|Ksm;cdc^PcIzEn-hYqt|AR6g`~2Hd+YLvt%C&d$nE9a_c051qO#lA8@_$@^^ywSk z7yY0IXxqQmnI9g|MSR$b)YV~jzlEzly85kqbdUMo5od4s&^7d9=bz~pZ0h%4(KymY zx%R}h&x-!3pb%r_~anQS{PV*R_JP<$IMa56O@>+hKpS-A# zu<5pIef!HRycql`e|vEA%m3&#UVHq>?=KifF^QJApJq!EVt+ipO#j(=X085936JRk zZ(IEE69>JMcx>a3>umd1eyh{@TAlJzAN#4R!?58x*X;h??am6D{`OzJ$DVN|^|t*F zB*nsh+V213)UW-BRbA5;_hIl(a_Z)n=|3mWsMDYQAoDSvRC%4^pkVV(LGnTG#7}=_ z_llpq7%e}39%9=GkL6>7njfZ}d-NI$fARjX(E$f;czFA}XgJGI+_ocHUbBXOxJ`SFA3W9X!!VymCJ zIt;sf=~p*D^YDpb&y!yH_<>)a1CO2mPnBOWeWdvR_jYBwceyt^{?z+;kk9@L9>yaMdY4z1x_XyeWq;ILeq(st{PgdJ#~Ih-SEQ~E z!<3&~_|%n;z7W>h`Cr%l^W^gL@1Dxv8==$xe^1q~C{}e%U(NM3-zi4(WBcvyy1YoJ z=&$pxTz>)&sQmGKedMPvu#%r9#lrJToP6&z4r+gM>ks?=eAB%@-@jit_+RIKaL3yH z@Z0sD%u6wy9gVBwi?>|E<2BxFNzD9FzsxVwzlc=yhi5zCA;0c*NF4N9Z#LsA;s?nG z%}<^35x04b(hpxq58L{ob6>kLbf+KLeBAd_`0V^=#FrLdKR&jQji>!2(PD9}Z*C_E zvEOT)elq#a{benNTw+x?WcFG5%R{(48= zuRzHv^^g0kTz{Ag zUiY<&E?9NM7{+s3*UE$KRzWLb&n_cv8=bz^!#ZtNdKwJG_G>$|w9i2R?-2W#Pe>}fT|DHTj ztN;Jh@jv}4qAysteV~tbd6(t<=G7n4hnHCA2RptQnosRDbM2E)goon>KJOd0blgUS zIuG%dYvWVhu;ck*XZrW%)ph#Qhw0?<>=ASr6o2z}LGnTCL;E&=-^lqE*LE8CIM0*$JP%wtD~J zUg?WI{PM;1Z>Msc$@4B*=v7R z(E3x|@OQ$v&c=)N*FHM&buTe?ujXm*+|)gGvDd$ITG$c(%K6U$nTOw^IRCS!Js+w1 z8DCMnU$^$yF<9YkY&pg3OenWWTI5vESU7GJA?BEZd zT6p4{XRUQy_|(^X{`~Rv%lpr?m1i)*R>bebu3OV@v!QiyKiC@2^q-w)R`l0AzDGu{ z%68!L>hd_p@?7CAf0xI&isXUzx@y(bDW3=7!v;I}!#3&Inb)nHgssG6N^`ZLU z?}TxkXS^DnJMo3p8p*Y*@4RG~vcoaYed(j+?~l0ud0XeBn0}zwKeW^jHpGtC-ifE< z`Ttb%GW`egM-}~bejLX#FP=Y+W6_5==v~fQzAn7Wts&b{TA1bQgLx?$kM`l6rho{5zyPn=@*DYU$&yGI_bey)Le*W}MZ5>yv>KZ-R|1b{_{jf9r2XmH+ z{!X}?EkW|V`VF-o;vn(Zra#}C6pO`Ev3VVt->E-F9oNVH*pB)O>Fc{4H}3FO=>GkQ z%RJh55%u=@x1AJ=ZC^|2@828hR}?GLjXppp5p?rvGcS<&b$4A+6x8ZJUiiqT-cb38 zgFf=deUb+nUrWY|^Wig$<~v${*!b`5@f%-pNZ4?JBQD(Q7v=T;`|7xiub5t4oc}z? zo{zTXIYsS5tn&43-@njjs`(8;=AG$ZWK{Imy5{D?^GzrC-0(QOKJv$Uk_Xns&98j$ z&Z|F6T40^A8(eyQn0)9H_kZfTnef^D=i5oKnBL3j`DdqoMX}1YS3iBB8+HZtdC2rH zDl7WK*G`JXbZUuR$?p+=bEo0?R;SM=ec+eu@_hn7NWP(yii)j%>gq7;vhP)IobdSU zu*;X%z2ojf%kO{r{!w+|c&zUJ4?gNcx+?ca95-5@@S=m(XZjbBik_OseI%GqW&77W z;voISw)N919a}sVn|Yf*>eoBXn@c}@Ve*|TytPAXr?B$HA3yZ@bq|5h-hUg)501GY zUoN!1bzI3R*Kr)(;5Xfrmwqz+)10MFf4&DIKOV>IzMLidzW$4X zw|dR5*rW8@_5T~(+I!9Pqt5hNXV+h~lVV~2W9R>S)DJqf!Kly5?;jhQPoCCg`nU2( zt^U1vwA??~yx{?Ri?oF6xX}-c>ta84jB%ZJGLHGNLHfZ4)gPvOeY5NW^pAxpzuj@p z-m9MCHO~FFx$hoGKO8q2M&l&97~ceORQinSGX2|mX085n^5_u#U1263girJS$M63f zkAq!@^b^O`AGZC%Ka+<}-8F1?&k7q{yU06U^X&Sc-lSMKKKA2v|I<>xqF9-3PW_Cl zXx~2)Z$x>i*qQ!4`J+1h`5#-`4z$kpXF48n(7TK@Tg=uxwg>#6cX@W!M@!KB)H(5c za31|(^T&68?%+-551Ws9|Edl4{3d*M{<)#!D5k~j_x^r5eS?x!T@yz)Y{rS_m+9Y| zv()OpVICc#V_KNy?_X30^a8z;JhRcOE??(Mzm+=WgO@zBi8Bs1sQxhh(qFx_@w7dI z-{<)=rr#2%H~j~aV)5X%#qaO#<+uQptm@i3@p0bL`fQ2cY@KhWe_tM{)1N*}4|v)U z-+jlXNZQQQmamFPt#<UUj5;7`!D?Hu|3Cyy?=Pq z`E$0k=h5~*{nEDwR~z~Kr(~7ef!R(Tp_}!|^BB!P(|>keQLF!E!e=_AV)OjLPaO14 z^33j}e=7OxUn_OW=Rx?e!4Cc~?XWlc7r6I;u;DW+{Q8<*FM+SwPKw3PR_s@7DLO^v z7pq*O12`Awf9B*YwfZk3W0)TF(-uGD5eL1KxY^owZGI~GzOCpKslx{6(H}Ot@{8I1 zA1(=-F4s5qq-geA`~zQiCqp_CJt&D*7|O zp5gh%M;z3==B~r@87Hob{n#;jCwx|4XRAN1kL@t=#PiNN{9CVt$xB>x@5<{>@LJdL z^Ow#Kof2Kp)oj(Hxw3e?Q$(k>*z}d@KbU*q^)9zE8=j`v=@%Z8&7%6 zb~3KzV}qI>+Go7v*ItDWgz zFxBecpA-wapqC+iXkPQ=;rUiao}&31Bj;N@s`F0r(ar2p`oqLKf4$^IUz`>u?SJPp zyDd`Q|6jjh@#FF6Q~j0WPn{>*J>z8h7Zt`I@g}-(9@Msb_%t6rj@z(7 z(@Xor-wES7uj82?JL;p3H*7KGxi!}K;76hRCvRVI$FmQ>$Mw(X4#LC!(on}sqQ&@@ zlYsS^4!7aW^e>dG=*N2n{;hYnz3S_WUKzH#fBNHFf292WWq0YD z&-;ek-<sD#zv-ZMrhl48>h!0NIvtH~AZN+W|6eCaKIonJ z4I66r3X&J2+;ke4*1g{%Rcz0{&3I>k6g9-uXaLLd;d+9S24Yzxc=~T z+uy2RvNGLt9%egv#C0QHZPTeGapsxnUwG>Dm+WjDtusD&72)Ici4EHLUBZaJ6UKEm zzVetI^-;$gmb-h`^QYX}6~>)$LH;{n`0e+<4e1YE>+ZkJxRPD2jc*0zp-*_*;-|k% z|H4zzpSmdDo(J`NmlO-S$KP8p4t;_6rMf&%gdZf|kn!jbTamgt49mW>*wP1IHztfv zf3xVzZ$IL-&bI&ANwM%b7#`E7N8?U06=Y4H&1o8U5z=={{F(j*qwy05HP8BJD4!z! zPDo#1r+nh%8!{jI!&aoO4#O9o>w4-{&{NwKhPps@)c(zfAw$+++H?+{$d1uS+D~w*Q{taqq-#Dv}RczuFJ}P8ioQj`?*w{GIs1 zG6$ZP?tAfGVYv%`@Y&5CxD6hTcTC@QQY>ll_16XGn?CWp(HC@5$93TFvp@41r_W6P zzP!TtHSQIgOJ~7sA>Vi!;)kC&=$*vPrawjepuNA5r}gl|XBg*=GCw?V&h3Bli=7`{ zS$(++w|%o0e%t@Fl47y#Y;nE*r=fmO#}{w8Ha^u2yE4C2YBK%%^G6l^m7nX_D>|il zRaU2Y#6kSt%vFL`1-zI0Vx z8{cG!VOxDFe)`MwpPhT^^yhOI`IY^T%d4~cKEb)Z{-+^z@W*KVql=S#^J9Z?UgQt= z53ZbC`ng7U@PUn=JM+Oo=401C3?#+E=N>;z_dlun6~+2D+kf5rFa0S`rvIFrtD?W= zaUK<2=-1HY)#dwx<+;LLzHczDB6*&x7z`gB|=~=NliLdfMPtVYgLxcE^vK z$@6IE|I?&c*gvFK7y8#zPw~>5MI;2ofHf6Opnz6zs8?x zJQ`oTnU3n`zO~rija--MKbS{q^-raad3h(cA$_FcZ>}duK4^VtKK@P^*Hy-KG9La; zd|~=(D_r-N+usXY+_=cOtDW8lf9?0bYZcf3_wjl#WZsI>#Ss1Ji@cWh8QI-kR~3PZ z{#svo|4o$#)Vz4!KJqh9ur9yydZS9Zw%=AY?bFjn-}apL_6uS-MCgAb$+vo()?@Pp_c$Fbov zgwG|#GFpE)>FH(f|K@#DLhm9s|LiL-m;0anlqZdF3BCWnQ@^5E<=Xm;bTfZT>*-tV zO#gzhR{zDNuA)<0`HJf7yr&b+-ReUDzJ%|EJQVSlIuFRqk)hcJj!sKT$j0Ua>R%3ngmxA0vFs zgYgX+PxG38l6=@@(|GIp5!%9z{{QAwuPYSDV``-(0G;0!l+WtrT z#&7rEZYRaU@rzg^=&$o@Zld|j+o|0Wq@PUxH22i%ujf(8H{FY>tj=Y{*+LxjF87vx z(?$Cs4`h2I&-`)!uAsH!437v z|E{{leR?PPtc@=||4cporBadUUqov4-$wFGkA~R1e!LqVw>-wtIDLWSfySqG=8t(| zK5VciAHJ~Zna4cv)fbKk(|`Zdsf%t~p8x+`QY@x#OKtxD6n;gCmTRwmusK6w%r~AN zcBX$j&#cv7|HqB~sWXi9vOJfw&c*iMkbL?zo*F&L^G@r)4*A_H{&nJud%Utp_vB-i z`t(m9-v%Dr{|_o3xYb+v37|LQ*S%zwum9L?uC6@vlSXz!>`ecj{82@JCviSUm>#M4 ztsmtl4tghfX46MqenWX%idv689vg%YTaoeVFzo)pZ4+*|eO726|JOf1JHhU2Xy-qB zl43DE#%E`LzVzHyw!P6O<10#4`Fx<8^-o<(GU+eVzc=^TcrLdx8~z@#$v53o@yB`1 zU54jd9ej%TCA)l`;0KMLdKZn0tw>!RhPD5`!4g~D>O$kB8CQRQp#1&y0m|DNjvfF1 zu_5`ay^}|k-#@0}M?d<@^zX|fb^7x=4)W`^1LB}}IcuqFtRs0KeUWGWSa(;@mfHt5 z*dae0^z4rM z+=trs`(NTQHe{Sk|JnJYivBM5X50Ukg3OEO!_W+?WAnf`NfmOA~}4#>Ct zSejR5buM3L*k3@>{Hh@Np!E@r4%7x`7#~})6JMBe^P^AyV9UM2Iy*hM+|ds$0>6F#IH3GscmDUY zr#f^t)bS;|TpOSM{}DFZS!`$e59ZZ%`g0vG^Rn?9GNyNe#KG8a<7(gJfibW7H6K31 zdGv?FU!QQ&yH8CDr_aB_eFv|R(w80oFfYZlHz}6*bFp~;&GxUqTZF%Ee%)PH3xAJ} z<05Q{UD2aU^FYmu&sW#g;qQdjKjY9}Y^%Q$U)Xs54{mzpjAO#a8~pg#Q;#T*|M$}Q zw1;E*+hUnx+h>o)8NVSmYwzTdegCMo=}(>cp5T}*L?DtbB6n^ zPWjN&{Iet*xJYn7CsN?!`-p33#)c?O| z=s1dEW!t~wksx;E{sXDTwVu8*{aqfZ)1TvD@@+qs=2hkRj`wt7Ne{z?bFTC{X-xUsoP~el95%KM-L{Z8sdns;-$Ix?#uj<8_wl-_BWT z^>=x6$o8KWW_kY;CpLP4KJsH5Hj-ko{8ViEH2*y6?49y~$`iJDs%!rvS8RtZ_Bi>{ z>rPq{edc=que>r!x!?5axBK6!&Hg5yU#5Rg&QZ}Ho(Xw$i2klH?R}sA(F^pNXEr)2 z;s>q2bYy<`4B>Ngmmdy)diQ&Nw#pyF5kLO#uY7*FDe&3({{iI#Q@#E(RX@o1F*?bs zoPTZ&uV>zw{=IpmqQCOS*HiJjG1NTxK<{$aA7e8bD_cr_C?CvFI{p8_~uGoa%K7S1=KiE<`9gbpU zx;Y*RVmI}BbNKD~qi^NS^q-YiR`hqdH#`5loglgxPmlP$8y>g3$Zvh<_~e21`i$aM zr+gmNJhMCa8K-;9PmbDt+QD0Qk9qOA4Nf^=3jFr{OPUmm^)sfp0o<$Jsb5j7a_!X* zq8t3i7xkU#KRd~mTK(he-x_&#mJhnZEU$n2i`ZWM%-j0V@y$P5^08yII_2|VG>_5x zL;KAy+&lKLRm1*={dBh*4z%{7HGO(CPv>Fz*ot=i8TrX)-kJUbIivBr+{$d1ue1HnN`mCu z{--T|?*xg1u^&4|%Wnx!MOXTF8V76q@YP}1a^p!ShkJU%j8hi9ZKVhHWIWsc`*j>} zJw5(O)ekbhqC_*@)aG|9-buuCqpl_XO#i|BQAK}vCP^OivF)@We(#3IEidxZ7f2om z5B%oWJ}j=h@XW10eCC7h9S;A?MPb(?@A&Yx$0xyK|G&e$6w_;q>#yuSt&EpM3p~V| zUJ_z|JiqR)Yl_5L{dNAPM@xBo6rEsG^P2aFO+ILS#C&}t=U3`<91kjw*`xG_v5V~4 zd*m)JhNVur>&XpX-vd2t`yWh-h4Z<7MzQ^$67RnyE8A`xJkmNRSh@dXOXJ!%^UL%v zJazih2l-z44V90)<{dH)`QSY3&v=Z3U59mf>JK|?zT0xwuJ~qX9dp`?*Bo3P|9@8c z=JO6{{WmlYe}obpvfo}K%5SNyJn&}vyF5~-e{*@MBj5I8ZSkWkanL)7$2R$VzOgv{ zDVo1a$5B2H&Z9r9^u(urc5$~0lV|SGv;CV>;j_=bgGsU2_wU?)uc3Y&SFGyVJMlH+ z?|)n3M?dD5>0dC{=?@?Iyxt6TJo4iAYpc_~(I0;&B;Szn;KNqz#23z+G5Mr_biW)f zJ@N0e-oB{^Ufch)lVV}}@XPA)f1y&jzoAaD%QgPW`JYsBh_gS-^e-Y6{hj1DTY}7s zIz!cgIOw(BZ1k$jC*GkBUYm#JSNt`6+5aA0v(SMzeKUm9j(YM3=RQrHo&QghVkzgR z|3B;0uP9cz_UZ@G&HAkAC)2-`v()OpS{|)GkLFz-E#s!QE8O<W$H1(vj!}wK~UF!Ys|BCt9{%276!Is)OKk^x0qU9QY<@aCkbSauErf8WUY7LV$@lYAQ&8zdhaRDYN@W0!|6+;QbF?X2z{ zFCCcQYuwI%wvu9DJG1Mb+v-;oE7L9Ik)U^icK^4ypSHxAU#5R=?lJwzSLA<8$fqy1 zfBZ3em$R1pCGT=88wdU#v8gj&jhml*C;1+%@l#iaVXensnD+PW&KI__pW5HQw4!l! zeCk1)m-dBjUi0hbm+9Y^bC~|*D>j$NqeFB|3)7x=@5Cmr`HIHXj}CEv<{y-N>=><1 z`8=reG<%f(Fm23cyS{SbK$y1PPRsoGqc_5D-#=!>HQzlq*yB3yY?Z`AumqiPKt%s_dFkeE~z|DF;zrQwKM%^<&ir5o65tyc>WEQk2vUE z&N}3E!?<<%4aw(qYCP0Ajf3;(51;$Vf~zmR`s}d(@$b&}-LsG4dA0pdpN`X3ne&pX@B)WLbi~Fhk{**jL=9%d~ zC&`vt{Tq37$ab6-X1V`YJvlCGDlz<^^{4sxJ7HYsosQ4A*ovL_g1c)<*Zu$M4=dia z^;4gj{9CVemH$6gzy1H9>Og+nSE38?CcnRS5p{0IBv9fU+Jkq!mY)({L=i}8M&nMG=Fwd;$Pkvt>9ipR-$GFC)dCfzI=iBqq zC4TbF??%qIcvR;#A0D$u=@0w<^x*?Oa{Kw=!232k`Q=@{K)rqbjpmomlnc;JpPr;x z;_<~>ZU>d$-#M*I1({!W*R_Si_?^T(I7@8u%}*TEyyj`c^R2E+{Pb=9bmV-CyWw>n zyAByAj;lXxvia(N|Dpd&*le#kGcNl|(`%kx|3JOsbh-TeZPTYGDHi6zb|~IVf34$p zh~7yAe~g*_MTPOl`D&XUE#+$~YTPUGxsQA>9v6QnjO!T3{L%QG_`))8CHJj+<@sUx zi4Wg><88a6pPhebC&l7H?*GZW!9q339lEA3`~KZYl;a-a*qQ!CMn!+Ei_hm!XZ>kj zJZ~TQv2FaGq*$mkUY%DZ-${NaKYVoHb=bjc-U#%J^sGYrRDd(`zZflge|qx z2X-_+*{(%KfMDQ0EG| z#Dg_{>gq7;w{qj+HXXN`oNOsw8Z1cN0+umyCm+7D8 zwH5uD-z;HfJ{A3)#J%P<&l;X@b?_;gzcq5c#iKg!B;V$R4U&%ysy{6Aw@ue*tbJry z{wI6CdchkvdyU)ie>*7_Uf*eP`?tpy!Zu`_hUQ~Cv77PtkLu@j&ipd{TX{uAf6cEP ze=;7ZdGLWg@|#}TCwU-!S)SHc?Elc;z4X9sAGv&=Uv@9M$ORXF_iwkuZ^u6aNwM&G z()wb)ijtM-rt>k|$s=?lZakWgo$24sD{A#$O888ts1NvwgWg4TjH^fr8)m6z%gDyoyQ9;^6J6relv z&h+oeBNhD_Z;sTFPrbbkoy5K7H7_2XZ*}k~n!i1AzQv<78zc2T|>s@YTw#(NUM){0q=!!JUYo=nu z51L-u2mVeN*Lf%W=Eqj-#1|f1{?fl(@$j|bk;BjJdg0S^;9>i>^ZtW6j?UlqkE!}~ z9@L9B(@Xsvw_~GYY-jrS=d~66;R(XbysSPNpE&4URHwGp_eSwlZ2B}F#&H@4>-@xP zFs!@g`2MB7v1eHKy@3Z$9W1Xu*-!cVB6Pa_x7DvGR;F7+{ZT)*{o_xgyq4IR{FG32=f`#Irc{;>ED|ai(P-XvR?ngd=(|DT$?W17rMb8&yVLJ(|;gm zsp#+iKjzLnP>!lv_r;)qOou5XA;2aC2vcYv%v0@VH?z6s0fIPN8pMmYQO<5ARJH;5V%JM1t;FFTI;tuy(@J)iSOR`>W}(*P2XPo z>+0(2>gwvk8~u3+-Rr2-#_yfPvH9O;^aHIA?GJxHOzUhsF`@t*#yt1`;r!c3RxTb~ zue|@#TK0LTBXQPjcV=r}mHS`il40{vyEPzx*jUjYp7imC+_>fd&8V%b{pLLjx*r5?LW`T1zqj`M-sne#qC!6 zFx$x^?wjG&Hf%_oc}D$9hC2Nh`19AW@HXGw%g;DAzLU7^FOz=A2d%E`vO8bqZE<+X z!v@tK-njGnk1g*T;Z4iVp7gr)mZT56{(LYi7t>>s-v6Yneo*LC!t;_}QvUw>3|TaiIYrJy&*_I5R2j&;`&x=T%{!RTKbMmP-)cFzzy(?H3;`o zyMnE|y)CLg3|_d*_g7f?sc`a5#~$#|rN_Zz$Dgho#pY|W?IN?HzmxjrWI@w47u)z;Z`|?{e>yICpx1nk&&<|7%_eUV{bBdrcHH{2 zCya&N$9H_!=q-1_Yxlo}S215nuRqDNaOWLGs_4)BCM3`L$i-&+Fh0#|-rk#Ub?_;ge`x;s7EkKDlYAQ& z8zdhaRDam)lx2VM$*VUBn_uAY|Dmj0?EJ&D^7^Zv+3UV7an@`%W;=OQ z`Tn;de(PK9sQ+*gsneg|5tttEfCIuy9JFzq#LceD&wKM~c~U-XQ29MJsQ%WpPwzbS zj2GV8n)c$90}j~zr}Sm-zr$I%us`+Oe+eE%$?C4*0nrV+vc2X~8}%P49#!;rg*Tr6 z+(OXwXo$`8?p<%(@)E!Gp*oNU+V+*itxowosCi~Dpue@nH-324>zn*Gk@-49PY z7=HWwJCT)(eGk)C8-7LcR@cU-x?!7M9jRg7QUB4xQ>*{<-hAU@{)+H{-u?G=JMa3xe&fAQh0poB4&V136O3#3e;&`u#g6~~$FIM=ZmbxvR{!+=pLvlT z^YaqyI2j#5?~PFwcU`kgC+xiXzgB!XKOa8( z{5zhNi|Mc5|Ji(ViL>@j9@)IquAKj=nP1%g=N1fg`tv@-eC&19&~bG<^7tJO`JnZu zx}|>X6w^A-c*F$1Sptc9|tXwRg{uIrx^Yc#f z4HwZLcKrC9eJkm_hFUy-No)p$N za-}oBKKY~YnEoB*2W#&CAX(itJRrJxCpo4Y^U?XY2Cgp?D*9_(`g@|v_OE>C1$wPF zn{n&%^Q>Ixzfz}s9)u4Y?BfrIy#BOpzP9tN;Sm4k<;OqOz5ZuTRxb9v%ZS=Lmghgu zvhBAa{<^hy5v_J>+JfUPF5~9KlVS`C)=@P#r=lkks!9+e^TR> zd1(Jp|1zUi|Az3Q3;Y$-Jid1$A56!`-w$mZ?YGu;S-JG%3yo9Wao*)GUlO)_`p|d% zZn^8>v*Vu;;j!1j7V+N)<5$48{pK1^`VnvU`KVm~)7Cnzr_ZQ=sa!>WH~)N||4P2{ za^G+G!9~`ed4lwn+UjS#It<63xzQ$zJ$XSmdHwG^cF${<(3f5RGm@2yZU3tcUL7w% zKiTG;YdjfWyitGkH(ip~P`e{Ye^LLu@Kp4KXQ$r$y6pfy(7U9Leig|B$!9-ne(IFZ z@)pq_HvDa8#dXhnci8y;+y3>N+1>sB+ofN|H(a%lf6)G?0evOAyT%VTRX1$wyCF5q zFY4beJQe+&IM;eE9yqtq^uN zc;kJ}nSBX-ea}CrZuFTY3x4BM-KMYr+Bf}K|E_Gq-tQLG zA2w`%^vaE2J|Jvz&QBKm>V4h)|B(s5j+e{rZ`(HBfaIlXi)&pomkjfxe=@(Q|6q|> ztN$*-V|u^?y56|uC4TE;peDa9e)5b*^S$yJE~39R?d8pvS>l<&*0e{ay?Fa|YcZZ3 z|BPhiV(-5Tc>J06#s6@@Z|@)Ut@Dlg4;2Ge^e1n)h|aV9;|E3Hy@SeK{%FzdqKP5I!*ULQ8U;;-xPHR&4o?fmm#RxYMT`u^M2xKmVK zv7(M@+xu^lN1S;^{fCQM8!yctEu#O8{yLuh{_djQe8yp1(ERQB=UW_p&`CZ#9ve*a z5`UQWjvaR zQ)GTo|B);%75!b|js8(V^6TDzxnF5>xHsSG&_&Vw?fK_hJgM_e^3(a^r>+je^sOG) z_tI597S6?bWXQ|3xktY2Q)*u_9vnC%n8KnOC~~34?3~awO4=T|KHJ{zH_OJ`i~cn>hy1F9rN<8$d380Sa*>)=v@)* zZr9!wRyV)$x^iVWtMwEAH!+;G#PwHCnmR4Kcl)*ew#&UEUis|!XE-Yt`~H1bz5f;Z zX})At*Iwi3hHco%$_1UG{u70xqW^&8@jVXt_WDcuAr5*c^=2y{+Z%q+_G|5$I_2{q zeAr+ge{1dK|F}W?0HUKfZ9x;|F}`oU_Nn85g|uhzp+R z{{HlE2nAiW<_TQF%=+DNh z(V09?zhIyI@YHFyo#oy#9R9Z2J74|UYuCbK-+$4EV*b1G1{@FC>od>F#XG@tjUV0M zw|e?h{-}S+RHr}3^`-~?b!24Y9}~OzDM9i<@5FD|P+R%POVRiTlKRwk8t>x|>m2a? zvljpR;bHpE*WLW3ciqqS#Px@^-x(6V{3-qakwN7L8)}0CYAbU7vvxkYu_vLE}&&SA})&zt=I_bvsG zZU5t0x!Cn5ZM9u5iq&0vCy$M9_g*`lAJ2Q#KQA&X`n$p#uYc3=>7P17SEku@4wjer z=?f$ewEpVq)bGKCd|ntQy!T6coVoAF8R5boZP(c2%9-%l`L|A1E*yv1^$$D`ijoy| ztLX#XES@W`6QrN0f5}+UUwPBlk?}FEB7C4v#=$lm%*w^`bFtw!zf*sTjK83M%?q10 z#@{mbjjhA1l@D3rxy66Zc(vz$Rxi%}?0%zdZw=`uUBlyboaT0tfo*a6&Ltl8?-Y+J z`n$qgT`D}vhh9`*gJ~W9ewfyICnK02Td^NsSnI*V_W9@1zYc5u;hSe3`;%w9)|vhj zS-IHtSL>Ibf1faY1~uMLbb_qut2wPV&T%JkUhh%=lF|5;k34?I$GnWEqx=Jc#6jz_u$@14t2z_qy9rhMn!*Dc%y$p(DZ1CZLdeCw&pc&?ajA3bW}9|@ci>Fp453K z`8E%1kbG=V{jEudG;RzxKiyjWhI?-Q#uvW?kA42qe(dw_hMy~z%g_tVbn(ic+MI`k zpSskJ`j;bC^w)m*9lhz1%Sgtj;}Zvc;P`1t{N4!?2U9;b{V_kwM^D9yU&pENETlhdHL~C8+Z?!2*m}dkp_@;+3qHI4r<0Wn z{rQ^T-$_=~E$JKE?!V!buO=_*KU!ogu>Ni3>nJ)w^a6e2$EH8Fcgt@G4{`I;xAIkZ z7Syl&Vap?z+u}RJqhZTyuQ>GB$9_*ArvHTY0q#)#0qShtmq5v?uIb|=JCDx6&*D{^<$@48P5s7`LRLz!3NbIwm$YfZ+Ue2 zW5U+AyyKC4se`@7Yp*}wPk;a0&dMd#9~~vyeLd7(|3-hx7xkZ8)K>I&!uY@T`|E~` zhps#i*k1cFWF8CYZ%ut+jq|U3>ZaBjPoH`1lAo#m{(3ko7kmF*z~6sK)jay^@7AsV zWPUtvQUCFxwpM?gr|AI?=!)w2KCzPG&jiT_t&fy%bpH93IwyHvk$8$WPLhwmwZ`<5 zetq@Gxve#xf5UZeJL6~g`Tiy4!)Ch&rJJamj+5Gz?;p`CnSa!OqNuItuX&a8Z}?Lj zlMb4n6r^vEd4lF|&p+Sd={QdE;qllY`PiWP!@(0LzWb^}?hS{GWX~RX!qV{A`8Vwc zo&73u17fyUFw=#4vvoW>|ExB3ystCA*1*@w(mMV597jIm87e<<&?kOu!#pdOG#}oI z-$}mzH!*zll9vy@b%R&J@U>%pw>1Xgv*XXXI*y|C(NMplSl#_iYNMNX5-}W5TQTZi zKC087{h;-cYdr|xu=ty&2$Bz?D`@`4{PQgiKj_T>8fEj;{ufZU4hrxvU+wNk+aJQuee2TCm-#=->;G*(MShkn_?cf*bu)i5Kk}mfc|nC9 zmY?)(df-pd6)f>M=>PZYPiFmL&1aU_`HZmX^(S1m?caaGc4+UvL&-vDg%HXN49sDHb7RHuLC{6jzsAJ99=GkZYm6!C+$-6e7O;WLEK zDKFT^->H7zkUAdC?B}Cp8tnmQL?IQ<4gDdx#SW@zo>tw zh*b1+3&le)I+E^AaAs;o7w>d->Mz*yrDQ-?D3W^*>tr zx9=Ut)t;^YKb5PkC|1aOwAZ?i2i>+cq&Cl^@v5KmuH=K3r@Z+4VOr;%_N6>#tG^##n7PhnI}Yssk+A9Fuf+Y( zZSdE=|JLk(7+1RKu8oiHAL%of9OAs+Mg2#LNUi=_-&|em-TaYxbKf~^(8m3Lt^ek$ zulo1td)*isdo1<*vTyhcecAEMS z@fF4DZtrG0c~pD<&s^fnFX}(H@YL$RLvKFsONOpD4zExA%o8LJG(Mf5`3Hm_KEp-y zw`Trem2LMPd8)PHSs%LlEB8DKpIv`7(K}xL1-<@uQ2jctSlu=A0@LqJY(8zN!H)Wm z7oJ-E^}Ig$w%zANRjhNxdY<$D_lfPDpw(&L^k@Fj-aPyu`S2OS=cIl?{e8Fp3FQOZ zYUjNuR(JlXUAg}WJOj$3cGQ2O@Kp47lE;3fq6a)lUh_v%M?UB^&yf0&tX#-TaiR5h z!slx+G_$EIe&Mj2!mi;@pWAn84!@oMDbM?S}HE{?yiSO@I1mYrP`#Yz^E{ z1}ge1Z+bmHeWgfV^C=xi$7LL^wqYYHm-O>B`Jm-H&F_cWcU@hWbJJQk`R`s3j$PuJ z%db4=LFU2vXWj=u>!YLND~dIr=gCRL{{K$x^!dY%`j;J7^oJ+i|FnfaJ^oL>fAm^! z{iBPb`A3BhJ4N(?&k#Nx$MRBt5)T_3@o;(n)tM(;K53bQ#^AH--zKth;kq-wlm7oj zTm6b+b@lg7<2ruj{#Qwyexv?nq@q7Oss0Te2UK3u|5qAUKYB7AX#P>j$4-%auY88u zpV`Fw_*<)8y5t*{xa%9O)%H8=U28s5`Tm#dJs7ug{HgqzF65c5ec1K4^wX9MzQ19f zQU68}sng%PB0A4`guFB>$DiJbpZ}eQAEZwZe?KJOP{*m*>hH%FCjWGsRl{4a4pSz7 z@#tS1`%SNPeE)7b3})rx*VN;mQly!G9WYTBUKQBBst}CoAk66)m z@`(MG;ehzz!;cMGA4xv(I!x=V59Kl2_4ZN67u z#;fx(9<{CBNuBXvgMIQte#&*{ZhXoPx;B*k0`TU;8pW`2Af+$5j-oyY@~V zgUu~_?R0+FQU6YnY2&%V>Td7cN6z$UYrLcA1c`$_@nf5Ac~&l#53lNHezmRMNuBXv zgMISDe*0bhn^SiFUU>89=58AlOi@zVHbvB;z znC<32Zpz;pc>B3GT=$XFTPv@%)>(hakD|VI``3Bd{sUXbqdyrv>ZtzaOz{x6diqwL zsQ*ZjS*Jh0BW519o#sVV_xUCcdRIi}Ic{ZKMe;yM zu*a)UJbl~>OMB(D|G)0&IN+-K{};LX!DK!XokxGY4#fIQ>l#uK^&c%BRrGff=RRnr zQ!=jck4XMP|NnSf>fle&JBiykb@l@K!;YVsv*d0`Fr?`u!O$N7HKeIhFtn{PvHQ0F1u?wY=u>uSDJY%6LVyixzslj>q^Z}nCd^%1)KjVaxUby73^-s7aoVnC9Z`$>gm#DMzZ$nwR*ypgV zk0}@OK*{Q^@zZa!EivqLe&`tWcSUBZCw0v-I?r*u+CD+%2NE|My%K#bo@eF4xaOzM zX&hWce>nY%m;C;rW$q2{+h+PT*Z%N8uYC6V(@s_{>}Pz<|Buk8WOvuz$z%Kd3w^f5 zPaXTMsDGn~RP=X}-_&~31D-*N!$)58C*mg`Bo8$I$o%syuH$4P=>q_!RZr+sQY^NafDMQyGAYX~#> zX&>J8#w{=LGf$8_kk=z|cq_K@E~>vZY01rx-uAeEwkDl0<0BtA;zq`??_b8Va8tYl-&}HuGvBCxIYLGM8H7?=HqXutw^6y z|4tF9)qkvr&a?l|OVf_KyiYa`YE~Fp6aP-wDfAI93XM}ft@UbO-`|a-g?@`L% zNs#;hFs`Cxb=O|~^p9@X>HM&x{zC;zo&My}f8G9{yr$l-lzhIm!OBc*KBkZ&7>f7G*-c!BCo1Ls&Oou_Wr|IwSa_4;%86(y^?_UZ@G4SxJ)=UKT#{YQ%j75#PI&8Fm4^gtIy;-HO_=dnk&$jS+t+zUCka=K(ef;6L?O(fO#b+N0CqDPoC(gY7DX(?*`KytY z3;o#jZ@KzG9Y?&~wRiFu-K>9j+Y)EqQU8g;WBMzvcZE0l$4jvHSB^7ze}bnUS|7?! zKIa*+Q%vi;lYY#Pt(exQc369z)4y`yJ9A-)^V2%}{=HNq`oWgo|3NF*?$Ua*b$-o; zc)U|=D{34aTLWJ&1ExRuAbgXOdUzC>C)d4WgQi!KPrMF&(udjj6RhLGAEpmIdh`Zg ze0`XH)ly%(`iSoRXO79rh3yK|pMT9YeFinJ(Tifu^L$R%xa-YJb%^?xiWoonsuQ0h z=s&H;CJuV#Np1W*{}#`)a!L6t-)X!bYTtEr;R{o?J7)_Q$-V{w|A4t^Rr)Dfz}zR>iu6FU?EswBF*4*hE}XKJN-| zcf0bguol8jo{unR)r*!}{KT>Fj%$AL=Q%g-<6XfP-{%e!Wd8!@_Z6p~_kRi-ls+cfbIR75$y$H*-Pqy%XC|`H6$X zQ=9qn{#F+^zw*OpSm$?HxmX^y=7$abK5+Z;&&>=Q{kggAt~;Gbp6!1|vT|X2@-_Q^ z$?C4XlgG@j$#oO>8@+L_{-}Sah*b2~ez}jj>CrB-V}7TE%sWBiVCu&<9PF*5Z+MMI z`>;A}&jRb=`sY~K^}|6B}pKELEOA5*^*v^ur}Me|Se=HUmKk5@iJ_!iP1c6;$3 z2OoFZc-V8N{T{si-Iu~=@4xUVf4|?KX31iFdflw`pGzY10;B#TMWmv?jxSX)p39>O zI`P9t92@kS2XelJcpX~*sotrr{B=C|!{l53Hu}uuZNii(Q-5>%;92n7_rG(qa_M{j zF&$5`yKAq0yZ%;fbV-k|qyD2sbwz)zD`LgsU9pbXNKkcPe#Egs<_Y5OhvXYN9bd=w z*oyu5!v0s^__k9odnCMR-&dD>a&q_o2l@Tccvdc)_wrll|1Wpc4>G=@M58`xbN>_U z3O1x7>OWRIs^|}oE27=+2P-_9m(JU}qN={GlHZnm_=#hCr*TlnU08p(=PfJ#{@^9% zhF@>^e7p^){jYEB_b+<G)}%>Zh&_!x8IEEdA-Z`Y%Co-dbnJ|DCK{=*#o}U%C1f z#p-VFsomU4^Id}UtvW{i$BPWp-xXH3ugZ1sT8I9o&sbTOH9squ&foHMeB#U#JH@ol zJK;A!Hpu+2LG`yLJ@MmfzWU_Vt;xGw{FPZZTnvw$f0)S1#m+zPQU3qqOX>D6SzVn{ zyK?`xTrwCB{i6Qmh!y>{-}HJm>I^lnqMMWP@Y}d``EBh_u~Mge9#kH))lXd=wk9q6 z!ad=+!PcZ34*cHjN6e+(_CFI@xtRX+m8)OJ6|1^to~ds1!+r<5H85I|RP=Yk-P~J{ zeAB%le#RpXdY9B`-?jO~>+00+!6Y9);JcXy=QKl8|R!KM#%Ge5je3g|EDUq&kWYkuYYe?#i+c}V!+^@%_2gFKM!f;{sl zeZxzh**ac7e{1!%zHsn!U)-!U`Ld19ynE}9lW)iW<5{`b@elt$o~s|EFGY!V_eYig zA8Bhnb!tcbT@k6(e^aS5J>cnx-#bC#VCu(C(em@GT+qY(^y_-#){u#C-JO7hs<-+!!kCxxRjM(;}^Ovly-e&9g_WL6z5%{cc&5Qas zij0c>TF37i>EC$rqN+PSdCg~g{Z^;rF~0dHBp*9P^1bWjT~vSH&%a5ZyU6V)?Q;U- zN;lQDcj7}2^3g4wU(`P@Dr)uLQ~0QZ$I$i0Eids~A38tsK;vul@>!koc@Ukj!5Tl~ zgq^aV9yt28_lBLG9J%d*2k!7%XZJrD%*w^|*hYW<3$LPNb=PU1P1Vi(x#aSCqo1gM zyNK9$PVTl6>NInAZ8UKWxQ19{k~$#}D|>IcJZDcU^n>wX;6j zeg8c?D;JwrL+t$d^89n<{@e5=S>3gF^0+xgkUZOeDKGs*{X0cwMSoX#yNlJUe#Yx0 z2R`z!LCarQ{gvP9>+GZt#!WFy{m``Uoc`fc!jzZxzx|>YXL_x(>)%u#o`?MW^7g|y zwxP}=OBUnP|KGH}b4fH?$BFt677^25dA%#V(LXB4JiHSd#NQ9exAAl3QLL-C{Isvs zcEVT3gFo!_vycA%{DW_j>^MKIv-jVQMEG22+kadAU^2c$=i%Ez^PM93T2DVw|Dhto z^mm07ZSv=eO+MQ<{iS)$FZbqK9ej%BADn-_#gjVkB;Urx2Fb?;)gNX(_quam`q7hN z_HCc~xjV1>|C^kD8_UWi-H+P5+Y)E(9ghU*t8)Io(HrM=6!jl2BDMMtNnNcTIxntc z<2agpkiJ0kyIy|$pw~EcwRT+|e#Nl$UY}d@wg1>7Y_tA(TO9G;Y4Fwk{xSb-dHwm) zrq5tjF4m9gD&FpPYsddC$?u5G{G$FNg~#|^VRd=DE3A3WW9FsV{XPKwo%ow;3BnIr zznYJ~AEtF)$1y*)Vn4pH_MU$|Ia#(qC}%^ zP99b6ztm9w8wKez>OWd|YV}us`#zy9^_H)3`1qd&Y|!*d@`=}BTIZdPuRLa}zm5lg zSYxHZSHAtR--R`IxMII$4!DSV+x{n%A8f0g;(+*Bdnb?Rm+cVSj<2%|MdSvj3+PC?D=MXpm!2aZTyU9ar#oM_?6G<$zMo+So5v_D(?Fb)>>z&pEPg0 z4?g?;tCN+By}$jB|Nr&q++slV@UF1t(LXQE?)A?Nv3dXSt~Xw*8+s{{2io(h{N|@l z$6-9%t`^cC4!ZUY?|*vLO~PC5`pn;dxBb2F+3{y5D;Lv$BmMr1J`^RZyS9Eb-pnOt ze(Z+$nP1%g#|uxL{^(4;>68~$-RFrs&U=s#S|8dU{(hL&c~^`&&$zBMYhQ;cf0#A= z^sbL*ov`J>Kg^c<^!8rsZU3+OFduvWP5ksFqj%TFmwx`rC5Jltiuz9!kvjdG`$`@8 z75!ar-0~7XtNS`MTjTt1qgUH>)Bc!e)W7spwtslqS-Fr;zlNGe9Q29bbWFy#{5&fc^31RK z-f0}H@l#ia;j#<&e7<$(!Qr~_wjKYvSex;<9;T_shaf!Z_|i>xoyME08~nE4pzmC2 zqyDaVRHr}Jp;{jesY~@IulWIqlMh-SI==by`R7}lenBVs#)l22d5J$9{`%AJS@j#o zg+qUI|7rhub^{)}{%uhC!Iky?&mHxHjMET5YwzTd{r?nsx%e56`9=LphKl~wjYu8& zrn`;{pXoo;n{RdSDVjf@f4;?&I`1Uk#>EE7#|G6OcAxpe);p{>81~%!p{u|D*pK0{ zEsju#Z2qAH4a_f9!fwc+;ET z@|g?YGvc+b@A-#DRxa$9=u5oawRiHk^8XiY@l(fqqyB?Mq@ur*eEvtzbV~csJmR2F z{5I}@@RA3z9Wg)iYd(C2i|7w0|JPdYfA!%R;gl<;|HqWGX3!VsZLz_8%IAvZGV3pw z{)vO)ooD;!x<&e=t|6JsFX}&3cxv_EsW+eTQiKomE~!)7>IRcIeJZ9ro_dgc>;=TI zmy4R&-mIG_dzm$o|OyZfl>eAB2dvEp1q{b z`e=yFcIsVk-14-ZIOBojf$8>T{**_@gU9&$_~8rgj=voC!n&Ui({DI_(=Q!(D?E1n zXEHzg{);{ovt)q>Y^rY9b@Pk*j}%mO`ZJ8_o%9Dk^J5%rkUS87KP2DKNkY?9_|T3L6}C%3T)?T<^86_W!RJ_WNIbemDAzlnYQJT|)Yomb>hHeyORsetx7ob% ztX%R}%KLw|E8CxrFIn9+eb(N8qa(TW8TB74JQe+2;f=owPw&6o7QY>*X&>-`-br3+ z(;xG+IK0Y7z2z(4zv~a-fqj1a!GE3_T0dCr2RCfkz5n@9(yv{2JE1mxL^e;p70@a9im^wBH)s^Kk9 zZ~co;|LQ4tO#imdQ!$sz&xhG=4U;~kD(a+uuK$Oh{Vn@#`e_Y(vn;LCpZ9U{EBX(} z$izYVFkAEJ2R~^2F@K#0`S2OSM;_S6AO5_;8lT*6k377%)o(uW*K=mVZ^u9IDBAyz z!>gFpEMDu-4gPd~%sc8|GS=#^_gy02cyeg~e&Qf~n4R>GAGGJSt-P+6A3j6)$OHTM z!y!Z4z5e;ez2THI-}CMzH#`(R`~HjZ!O2;{iBl%{3%BL%bA=0{(14JPJi!;=sf2!8)Dn*2|g!&?z_c&LHYvm_e1gxb)1T={(gMnJ1-nP z_sF;A;Rin-xa<2{+V3hj|6|)#N9N;KDewPtBd-TT9Z#~lYp?#w_b;lq^{sZ)zkF1y z|AcTeFB?A>zwv2a^9Q~8RyWX_Z~meA=UY6f^G@<@Tx^hhY*77S$FrYV$36Z^*!iW+ zzIpu<)z4p@tX%AQSf{-I*t6cLUs0^?{>yBQ+vhL(X-Ed!E%S@|cZ!JVPrfU>@$;`< z$3nh$g}3{9!=IveMbX~ADSDUG(T8FspLjYReo%Ss zdBRS{OZl&*$#At#lXXt-N;rXG|izA5iC=_{kTmyY@~VgH5k?+PCJx8}%P5 zGHdlu&;RE|cFfO9u+P2lf!_7T=^G>ugfER_!)MrMUht+Ew!iqG<1d}DuXHz${v7{| zNQCPNJoo>1>Q@x2yS6@gKDXC=`fMk5E_T#^xOimxJMnwP=Bi>d57Q|ZKjRVyy_2}v z^r46!G+z4DdidcpgpWM1k3Sr?-rVy>HaIXGGI{#B+b(@B{FTpN%9BrvcZ~hYec3_K&|GrgdJ&Ge5RsKfbW;!%uwioHdRQ>wf>56=%1PhsVDERejhF z?D(^hm5Xh6I-YpDYp?z$pPO~Q_~E6`sQ*|Nm%jRUimaFiUxM&)+=dONb@=sif4@rVwO|QQlR2xjk$5!O}leCX?`;7XJ7tE%=E3EGG znC9F2P+q2WUk@Fz`QEuQZqgrdsp`H@;0H~A>YY@WA6t>SIt&}1c$7bHn|;Ej%RaQ> zm;TiK{+-`H4k|ymrt05NKRBSaqF7zsxDL?zq^>QQ^cVG?C_J_LC;9BZcwUTO^O||V zR@KwH!rSeSI{K(X@@mnQw6-18@!$_rfAW`j`P+qI&6!gs{p0exc|F_p=i|x;Zm0U^ z>IVnZRun7FPi?#Zv(r4)hd4U62EJ7$RP={`Nb2dovi&=WbKa_XVsF0H!KY~c#{Baw zp453K`8E%1kbG=V{b8-8mOlFCopugupL)^i2R|{zYn;zp%)@Z~$BQT6iFDqXF4U*C z?f>bgAsIa1%rEL+X4L6l`TiX~&>a z?Gy*Z&l(=Espk#6lMH+PXdQOczfokQdU{uQeY zAruXk9UQ)JHJKshgrw&^}bd1pA;Hr zj_z^x=lUN1`<3+i+oXT#rn^r2shoeyC71mN^N#wLGp^{LjK_H(-aiaA4?cb;j}2OX zNj~v9OzXVUdC(uWVjU0uaL{FU4gBb@BjJc||Ni5DdfO)O+wp%x`4ww_|Dxkdc6GaG zwv|WoZ2Q7)NIdG_DIQhycZE0Ze^wHtf1W=({&W&2ula!3Uj0_bxQgZa$Jbh_FvOy)PJa8snuWW=%2m}HQog8keZ@A~?Z+!7f^kwJY2D5T8J!X{mzx_)( zj%58e+rM5n3%~7u8dAypqW;5$$HsFKx4I>BLH7xq8@ z;M3mr<%zKWmoMC@IW)?4WcR;qXXVn}Uz9gM{26(M$^4}%>geQ=UH`1M)hR!HM*T;M ziaPz-uF1FOF)ym-8BhCZ{!skngLV4zIg`8;tu7hr?o%f5Aw)@4s44-`*9`Zk+MFE4*F5>8QNqf!-C_ zv5&lj>if16zXz4a>;?3PH~+J>|H)5W5e{GGmhWtP`GNFh`~P-UF8%gDlGROZ%LRP=Y6-w-69`U+|u_dO#YO!M*g!?e!R*MRnit=Nw*?C|LRAGmVe zi$inr_1`?Rd@gq7;{m@4yUGve|!q)A}w*OqmQOx`N{=#^i z)+M@GpQ=}CNBt*?O4A=6Mb6_fAFJ1Ku!)0S>&;%keD*)o!*90oE~-D=@$92J{`k?E z;m$u#{`tN)4Dh_$@y}pZE~dvOXO$lS((xs$x~8wn_rDFvWxs|lt$~}$fQ{!gPxY$Y z|E;a@PH!H3pm!2aZTjPV*5dT1_zV};ul!-1uh06__De1uHaPyBOWeA~QSjOS zFKZspi(UVp_+@t8warWWLO1x~Bait-{mY74{q?*v`P3Ua>4Edk%_qfX+`2mRPe?v? zis?A;)}qe85?6oNX0ab{KC#oPVcTtPzUl54_ae{E|97%-Vf*ka%OBve-GY)8_ZyB! zg4lNdRgK#`=&vpLQU5ZdqQBNvu0MwlRKALC8prR+Pp}eC`0DBw*3UShePMpziu->p z9CYQL(?++OfXB}N4`=1V=MuaBSVR4aVs+Qv$zya*_uGum^M@VvFBunD|D+Gbg%3>q z^aYX!ru%*K59qvHf(z+~ueHkfhhMw>pIK|w-wsS4Tk9S4W9R?dIu1BZ|Nolt6eX*> zhQ~YcvE7zwy=BKIw?8!O{=j<CQj3IsQ-kqaJ_U z{@X>Z>7UMbLYT=<^~c{2O?TxdpX=(dQ%vi;lYY#Pt(exQc4%zyAJ2SsNxeHyoVV85 z?;ppqa;e;ZTl2hPnG{#_k=nNZN6%a`yc49asDG#MnEtM?=ILukvB|gh@rL*rpE&4U zQb!+&{+6Fhe#(dKoyI}d#$h7ef;p%VQam6 zKYRL;XD-)T|ANQAb;A8O!*Az*CbDv=oPT3nMainJjc*f)rF{@*zES_7BD13ZfYkFp z+VD_k$7fFRh=V@yV;eTIa;f<5{_QaE8AB4yqqa$8j20zuo^l@iQJ6^&c)OYxUP(iqL<$|A&V-=yhDP=~ody zXwPdBhaWye_;j2F^@m-qy8DmYZ+&RU7yI4QxBS3SXa7GqmX*tb?*An_>PJ6{mGjSS zc3iV-T&D3pUXJ*I&f;U zy+7E9=}^m z^=E$c7xf=2SZei8-bZXZpifs+caQVo^{()C{nm%-Mjq&0ksbTUOR(cYt8@Q%{Nac< ztgxZ?PlY3H9AJeB1K=ZwFN7&G?F$F7#Wu|FDyUw7=HCx644C{>^r8op)lVnDWv`nrHrD;i1m< zYdRnEt6j_Egva7F`S68J|Ma8yw~s#wTdcADuZGq>jdAV#b3^$Q?fxf;U$WwMn~tY8 zx>;Y;C;3tT(o@mjX&)Uy`nU1m0W~lEzOt?kzp!`Tr}2Z8@tjncA6t>SIt*K`x%|q{ zp78Uq)gDhj_@Udn@4x*1Xk7Why!`&_z5TZTw4ciUr>(rXApJ%CUE!(IzqxB~ex;AH zx~gB=kJfkcCSJXNjJ`Pj=k;L7{~s}) zVwNoMcs$Rk9CP5}_g(qy@2RunpM)=$+aF$I^VNB0y3j{za~+^}60v&p%Ece`FCW$E z-`rC2(8YMP5AS;8mY4YH6C@9`*Hx#Nk9@CuhRkCj{jJ92tv zf#39>$jYVm{DY1wS=F`isczV&o7OY$sDHa)spzkH=t4fWp_3l4`Q1JFp!JcAPrMG( zI?s6Yhpkx0gFj5W?MGMq^nE_8HT07MpL}Av*E)Ou9m>ju*R!A6=lE0mkgBMo+IIap zeKwRAyDfIqzf(M_(?9(l6P*o}kNJV#6|CL&1^NQX1M9XA^1UnAVxO*9cjxI{VJ)~n z9DU-8Cy!@Ht7p6BY6VBf#7pZ2SjzrVZFJB=%fRb6|Hv){1(bICPZ zpO2{jU=gX)pYyP$lRj^i{BhxL-Y7^uXyYc&7vFP}mttDyo$#9<8&rOe4XQt^^ZP@u zf73s*Fk{;v9dqe-_l1Y;pW`=>er$Wov!2Df9@DmuivH+Ee`-hlhYE&@o|?z|i1pEs z@oal_ViN~_;%A;9d7#Zt$2Gs&@EIt67M19VYX|Z+0#OQUBrMQAK|zah^x(BNu<2UY4izW}}xPez1cEBcGftS= z+V}kBXI~Mf-t&g|Ht<<^+263=2dOjtblfai(m1-o&wR;)H|jrDP}S`ycvH%+JvO zU&ym^vH9!x;*I(x{n+o{@l&t7%rELcx8SJNe>WL{eDB0IRDR+h@zlo8>)zsdRxZrL z{7(G~ip-b%q`$D<^T!gP1Be!Kn+ zf8HBsJB#{{7mDm*u&nPW!6OPv&Jj)G40_7ttSP?fkA&U-aOvF=*E1}8=Lt>{U-_*8_yM1_xXf} ze70LdSEk#3-trPZ~+dGDZ;nD=Br()%OxK5FEc9oYaQ2-qKiF` z9jW)4M;uK3rl0mp9%%hFgxCDcPsf4R_!iP1-t_cxKmW_SZw&|DapkPF{<yJNf;)_9I!Pd2e&xtJc4^!gv|%eFrq zN3y$X@8mK6TUzO}A#vsx_0J2Bjpqt$p7-CD*yMXBc8bJ7;;BtP_(Ag1_b2i#?y_=O zRDW3SrJrxR>rN+zE$+xKeDRcx;N|?AZ3nrIvwFGz|FDf$r@Q)%Pj$mKtm@w`Myl0+ zzal!%=P0d5C;CE1(7U2)o;vzdBoD0PgWtP?t-D<;te}BzvB|oOqh5oArrq=+KtDxczsEM>ZaDMc&8h-*jq7J^mEE z>y29<%3I0LC7;(fajR2457H;LB6W4xnzG&}XMOtAlQUhs*4g#XgAy=3wmh`F0k`#` z<7#;Y$U`@5dtJ4qg#M!b<%l+(lQ?z%*RFrlzTstj(4N<%Kk_|&f$(92ef;6l(_dbE z_r+bf>^H-&`^#-h!*9p`jjUX}^5x%8{{K$-C9AqNzBMF_yf-WV>_Jq?O$KG+WHT|Yv&(IHKHEihBg0xt>b0M;?>_&gg@1b z{cPO+N3yuo>CgYjFfZd9D5|>KC2`QZB0A6euPe>&cH9x$@@x3Kle`5*`06m6di0;a z{`C)E65c&}(??JHz;VpSe*fOq@f2n-Hq)*WNBlFLKRx?5`jH{QWx^>Ui+m{(sZu+fn|{Q)m1C@vL09?!eEws5k%`X1{IdB#(94 zPwjo+wT^zG{&NeCTKzjkbRPX1V)OiYr}m&=^FTrPLF-SSSNxLQ<;M@Eb>2yZ`LPwL ztHbchlqEVpIP8qp!2OL2CZGIAuXS9vXFBM7{FL(d*K4P*`%D+=Q`_Eub-t!oM>6Rz z>OWq1Y+P4Z(bhWd<4wNtGln}cIzIk>d|{tQhBsP#i>1T$Xcf3AEwKlq~l zrHAd-JIzyUY9FQ(eRVWX`G|vF>r>nM(Rzz3Po*yLTm3@%87FLY<`KJXGJD6c&1JW* z^OY$#F`ga&Yv1_u-gq5YHF^DIKKIB?;e?0(`0Vw!UPHZI|1g%7 z%fEg7^E%2y{|&Vj=`ZSEdMf%m$*+9>lJKA_e4y8Qvj>E)HorapIQj4y!soOO?Bj1u zU*{tiUiiIdTkF2%{@49$rIq2g^Zygd2e#GDdr_>o{W%^9Vxt@UwqK(^<&FC1MP)^Q z&0{}iI=UjeJFa(y)!ok1ywtXShRQPg-roE=Zl%sizVC<3uO=_7d-)B`oi5%Y%sTm< z7cF=Cqwv}J|4vpecAUS)mgPboC|TV#e&&I0sh>FhsDJrP)asx9|7=6XWIq89dV#6m z#!d9K{9Jj)j5rW%Hy%YKK^j(Mlb&U{DW@_?|F6l+V>qaLOs{t+PoTBx%dJ7 z|3`QgC0bq6?}GpS{gzI_QmcP@|BbecY1@Yr8y!KP__67ad0M{Bx0lcIm9N6Hpnl~K zo4tRXoyPI-|X^YapH|3|;g?}(p#kiJ0k56wT{;sZJk=xXc*^oLE`%Wrh^1ruSj zlfJaqVK2-;2h)FC`N7Tf`jfW$6~&roKTLm$mFqtnTJHpzU(|o7@Kp4NZ@4#~ehr<( zIbYj+S8qP!pgU-Ot;bH0d_%^gKWwm%KkR{~9rY`U6?JQ8Jhh|#!$ri#bA=Ucc+%^y@-kf6{wumU$-@sHh#$0h zv}@{=&x6{Z*$e0otxugZd5eF1E$p@XpWb)(SN}mhpMUA6a{Y57D;M&`>aLj=X!l>G zPwLxJ!#Gj@k-}4}|GK^TrW1V14VV*MyIJ_zdCGaTe6yntJ%3u0G|& z$6C`?{N-J@y!<8V?EQB!-}8*{J!%6Qmsk5^7S@Ou-~+wsp}Rxb46*DSApIGH^9k*w;PIJ#l;e8S88qWsX zGaHY5Med_WzVS4K$0vEs|LXNy9r`Jne?s!HQ?%zz$MGP1*kB)jYx41XFZ0|FE@(|T z@za0*&YpANxBb8N!{>SS*Xg{oWPt~4HYK5UKDorB{;qgbtN$A%4<25ZhB{y3pm#|f zy42-2B)_&!`G_-5vxzefwjzCn9Zopnf!mI`H0*NpoE@IOvwQtH-+$4EqV0bYf0it1 zKkNt0Pkqu))W3|>>Cfj7`ZvA-X~67pv74iU<(Mf4;@jah&8EA2vunHmLrP zZ+y*4Qy)4b>~{1?uX|?3z3?=T$;!oa7*u;wdHu;n_Bv>bpS5@L$WX`OzRmEb+bgf9 zsDEBC*to8+q7BbxVw)a1KKpy`dgGRt_{oEpddts+hk2S``>=Yac{P6O>M(3{_|$#h zF>!d<==hfg=DhKK`r-V8>A*bu9DhYjlQ(aSZ$9x7PgQ?3pJXeQs;5_doQ+$49SbJp27oCo31W4?m;4 z|FFGI7+12oYy94cubB&)Uv+0Z<`?zv6qyzMnMXr-$hX%~E`H1c8Vk`FJ3me|?uRs3!n7Lt-XP2I~^WUEGT4(=1+s?{`?IHhcdHKWArq7_pb-rSC z*WStF<`#n1KfJm4=`-qIG8(_@t*adWXuUlTnonNy4_dE&#<4yIByRq>y*7T(>Xgqb z5>K%%AAi`$A9}VQK{LxE4zxqfc z>~h&-Z{OtRCVX|Dzx))v{}tmYW;KiPscts!wiKlEi~0{2kvjeP9Bz8#$^$yJM+BSa z2$Bz4AKJh9^ZDmnT=T8oNq)s%Kz~@P`L=WZy#F7A+wT|m{A}mt;HkX-4(d2F_5Qc; zD@wHcervYIIS&B8ZMO}{WPVZqk-}4}e_lkppHF%I>~)Ghd6||qcMyah^iKRK60bw+ zpK;(xZS~jj;18=ke)1}>JiKRE?ctA{dD+_c!C!a%zh70qe^=dj|D;Z4)m`H^{pq(Y zImFR1>OWdUD*EgEnp#KyHhx3-oFH+~#!;TsCJ*$=L!SAmCk`&8AHHzjA-mlE=IwtR zuKD&|>pi~1WaeYXKZ9Ahm>wJG_wS6WC|TXLS3muu8+IDUj{1*fak24SVa?}t#e7qA zWqRd09_9;LA6k#UAEtF)$1^{+Vn4pH&hFdanO)up>pcGR6@P!`#qikm=M!1E*!hS3 z^!meG{h$*&T^rxp62Z3p5BlX2kNVFo9@XjJTvPIx7q1)J|G3_`kBi<)JZ2Gs=78_mSxUku4``)nMAxl=a|8`a`*3U}2-x$Av--A3u9Y?aeYxG7p z?<8h?^rz!Q{l|-pivF(f#^ay$1x?q6*l8ccLGO|}##JN_G(N4Pp81$f96oHYk3Y08 zKI^LSC2tJ}AF}(WcKciouYLcOXXV1UVnuzNJWBn0YS+bym*ShAul3@OZ+8U>y)_`0vRncF^;d^BAZ9kUJpU?7=ajXyJB@gtP z&+}k@&9^vtHGb;qFzhszi{+iF{IOapWVIMuM4!!E~+r9mH9lz55zv&N8{r1(BHaKZCJpJk5em!S| zacuuXAK(t>c3_j(DSfjH_t-fBI@@J$bpzkN zy$IXCE6ck3$&T2@%e)lLPruZIi|7w;KKZVpbJrRVZ+iEA>pXvK+bbXE-#9;PdQEz~ z`~XHCdS*2Xaqin=bv5&g`VSUV75!b|jsAw9=>bn!72{6g|A)DE543Hp_y0$wLbv6< zvF-g}W4lk+?YfO;dKN;=B!o1{ttiW_Bg7))mX^cGCDoEkNwHm{&?32XVHK4lX}Q!v z)Jl{(#czz~^Lc&7TF=;{xvkFm{?7Pgyyxfkem>88jydL-V~#l{dA#qC57HNizZa5k z$asA^F1BJXzQ*f+_EtWfFZ0Qdj_056w*4dXXI$+=stY~TuKoWJyc+jfU+6zr);8+j zEu$08f5HR8*Ntgedk4W(2l@huOLoPNA0*$^OM}pY3>#sT*M=#Z{(0{0m)b!W9_NG$DeAq4w z-Q>9CC4SROdC3EfuQQp?>Xgre%42p9KjZYT@{W&Ou-`5V^{=|>0}F36bC_}M_-7!C zi|H{_@4uI;Us0@To%U(_G5XYbc_%g2Pv5P>rN_o|rB&FjTo>nm((8X(;-_!##D)({ z{nnrMLmp_`R}!~69S7dm>t~$ssoRh2&fe$laN<&P`e)DH0vy^=J|6u#UZgkuJj`@M!Nj$Z!pKcQGhz-Bh(XZ1u zI7NTh?||uFdBe7|!-pTwkuI9B{|@{fphF-aCn>YnzwG?fx5TGcS<& z75a~qT#fqcd2I5jPxFa`-j%Eqo;UPTBo8!R@>)`#&da-h%^$Y7d3>9hUw&iQ^5NBw zxb3D_*$!?0GpPLFntJ|$en9P0va2=ywpBOm+Wb0FQ|LchK1%iUuJjiBpXCHir(A4% zJ-Nwo%S-&$hxSb#Xnfk2*LthN29?M17SJE&Y;nZPzr1xcY;yRMZ&>JUmHvBXak0<6 z<7#iM`ghd^b$szw`)m4bZ=w10*-7kN>_Y#s@=;BHt!sP9tLc=B-#bC#Abk)wTY1R? z(>(LjzmAjUnN7S$epu<|Z=JLEPB({@{=WHpKXRg@-uD09EG{0LHvjK`C9BYl@e)Kg zi#w?>I~Tjqf4qED)89#)-<6sk9rX_=GVYMr?IQ%q2W?!4TMzAfVoNZh;0SUdhq;^e~zrhaVtW&29=iP!uM;~MV*#zW`T6Mpc?jgASso%74` zK4jFVpMO)GO!rpyPZ#6U<92LAwhzTZ|M;kB`)~KlaHfZjZ~eH*am!2m=n9etvc08o zY_EKV%;R?-y!y@G0W&h1NgyvD*pzU2Mf(d|`zj-*L>y zo971iu9=H`c(*?I+5S!UZWfn3-hTC>3vIpw;`dIGzCiV}-NJABYn(m{{aYoI@oU`b zM$71g;Kd7rE0?de^uq7pH5=uVi)?yN2aF}Km43WGCg8;F%GEw z#6j<}xR^~JP5JbvSgTV$55k8H_V9;~Z*<7M8$Pf__|)b1@B46Tz~;Gs78lcDK<)MQ z{txgfO0>QO!1c`&>r8a)h;yNDv*Zho^;%A=ByU;(5Xya*pT1URUkF=DJeC6-? zyT3lEgRd7_A6n-bFSXU*i!VI&*!j!-;jp3b*Uvn*@64Z_4!>>x1Ja-AF+=x1x%$Cm ze2G@uW8?ilwVpl;{Rhih8_$*21fIpjCcjDlw&t}Tlzi`!I{H;KzIl`L@Pp_BpCNos z>KD`>j=KHaE&JBIFC4q#eYf4Q?L_oYqS}T2 zL#3xt|LKx%dN{E?sC>jh;$|~$Q+}Ss#hzcMHg(wG6#e0rmo^+*|CcV@@%6ub^X~ia zLr1&+=dg|ot`on1d=i}vb$rRL*51iu?n4fLT8CZeKU`MS^k;q}Wpu)RJdb8|{NvOf z5o}*9i0+{Ep}giFn18;-=>v3XYb0`?Hhttqwj#^LOT-Z}Ftg zJIS|ku|e{&LG_1~$G)}a`_}(PSoseNFSqSgPr}pq`|GLRe?GncZJx!2@zAf(f3ytL z^mj2m<9{3)_0V|{2ffy(wvFErev7xnwz$^&Ce(2o;|+D;oC6-Y@9{N8!kOpH{n;1a zU!DKq^RM>BcACep-+y;HHkj$c`<2-`F1mTO$+P+Cd<*@@vSO*}uk+)3oSIHr?_F74 z?Pn~{MR)CYF^o?h=rx}{&98k}oV+Re!SAoDBqA1^Cv`n%Fw_y;3YI<~|{C*#+=_92t=8OQQex73fF zVp`{&@S7hSRDO>Qs=w#;C*xUM?0c||+6juc(5J;C&2xfny>FHENqtx1%(K7mJJHdk zKmVgmKI0koP3E&aH`#A}H09@#53liPzIPf2U#B0w{-xh~+4C2_I-`H-8^3n#E6?}A z+xY$0RImTsM*F4DTI~f_V9;|_xa@U8B@4YLnYFy(RjP8u@QRy4BJnb*_ zW2czbc_-sEqVm_`>JM|5UE}uauKan}>Obdv?C*O&PM&@K&9k_$f5^{`AHbgRUG;-b zZPih%N*AtM^GP1_VV;Hl17&TK{$;FOEYAEu7sJ)(gTcu*<1j9WzZZHZeAJnZKfzvn zVYyFFzxCfvJu)nR@DIOx`8_k>v-8iRSzPeva{KMsZ1+4b){p8SRfSG!+xIU{BDK0) z{DuC5L?*xg1sh_?;@<4liwkGqD51*mV(`=W;rB{A% zM-9KU?lY@~m0n)_+3$?kKmQIW54eH;{{rJFN>;T_$3-{ux0F|rc^CQ*m61mMSCTsN zQ=Q-^4tkf=Y2S_c9myw7ea%+hsrvhu+4$9!j{5RD`EnNKUbd9jet>=#1deM^9+y9@AWO1q8cRyFZqFB}1J9$k1=!Tuf z(XY^dURi1S)4w7-^lv<_tg7CZiG$vi(JJm;X%*wZKO}aV&vsGsJIVJTJlKlVHDOr) zD`THneV2tZUA)$D{FA=E?ffU>DN+yGyflt(@Edldg8mBqM@o@4M>ANa6yi5L2hmX8|sACWwC;dNu^B+l!#eZk~>t7BY6^LOW;Z}FtgJISB2 z{%aon`x{<(Wb*j)br53g$w-a`Me zvZAKHE4_t3k5C<7G9Jjdn%CZVaz5jjUYd`;7p8UI3BUQV6?^f8_kHm8;SYRui?GLq z@BZ0|f4vc&#_P|oj>kW>?_a3%I&P*5e!Krtq7QMNw?hB%vb0hEQ6XktRu3I` z8c;vjr*YTzudQLJNnLPygc^Hv9L>z1G?N54%}hY<{!i_wUDAAIdLT)jEx%oB4Ce zP3KqW9~m3<-%9FC4|qT~Ic|B0-}=!0$pbCkRHuFq_R!h#G@o&TTYkO2Z7^s5u;Sid zf75U0o3-KYhe+zXrd3|23M$ z#Vf+Yb_+^Yp_BHJ+U<>$hdx{Lw+sDaq)C7N$H)4B2Xw+q9Q28wK0)$8=4Gh$DaSKBSRS^pi$V1CTI z&_6FDjrzBxj(py)40XQ5LGO|}bZN?`zot6%dk{Wsu!p~Y#=(pHYJ(m3>7VgmbIv&E zGY_JVoqy=lRcrj-iLbW*ZU=P}*hcZk>ecA5tt zwjy;+7>2&kckd3L9uk&n|83VFjw?UdQM;w+1k<%wKgjP!t$%pBSzPF^(0`zGnEoz? zO^2HPxyC^!HhiE@{MJWbL%#YO>y(eU@hzaAzQWpXU#__B?YE)z(XZd}(G8A;&yIic zEG~9@v4HoVPy0bP^Eb?|(0{NTuu1>eOc{6b$ok6FPV^+se8IF2{Jk)(^KSCEW~;v! zUs!DSy}!H2e_S0FzjT{FT>8}S;J59+qx_2bXUf|TZ#&QQ8PK?n=M}|U>BN1g;OBSj zysyw_q5n`>Yy8UR75N`|^4aeg_7%m&LLBt2^j3cBqba{7`S2Q#=6k1caEku^nYWFf zd;C6s?sx9vXYM%THu|#j4_zGxTulFep`(69u`0dc@lGOKhhTpCO|Z~^xU6W>-_H}i znjQ%cdF@js=Qq`vKc9cT#nW+|t;hj$L8n^Es zyIEY=PILMDol(9KzG7sa;h39{J{}cAz=A(-~dvISk?DIc;+oCW3oqp{5 zmvQX_RGqw2zoJ<6wbwYhVe`J2&acpav}CF2-&cBz@89)42BuR>{P1`uHgPcZGf$8_ z(D;mPf|>hFA>er)?s=9m91egGIF50qI~Yy2R( zVK>dM(0{CCsp;=ZZ=wHOLHg%)$@U9BanQS@PHpnp9xNX{6>I*qp8N&%tFB?|z0Ug6 zH=mppw%O_ZAN!@<7R|*7!km^G#YtOY|yz^e)z#kt6sax^l9Oz$6j{ir>=D5+x6$&EH0-1QhNW-mij@) zSCnY=zKCw{r}Japh5nJDQGYMYO&MB|2gIUrqDT zCs#Ysv(P_!YWh3L=e`2H^e3-9s^e(A)zOEd`R7SKc8a#WD4z%6!v=f!!-i+SyxO^Q zPYxSi-#zljyKfAiU4NBlaWS2j{O9+-qCcscp#LVy;}lzpxghf_^pBoK{a2ox&+Eqa zk8W}tUZ41}4YeQ3&$GBNAInSi#0K5uI{2C}?6mDM+aK}VBVpI~?De}%zq~i&wf9w? z@dP_+=LtHo)3sNB?fU;*G8vEOx6nV1(5Qd9|EHg>@;SkDzCQ6|+xUZ7T&T0>wJ-6j z?Vb1yr|1uJmfrKGbGLaqY`XDRuUIbIoBg_d|23}TfSqJqCw98_>IZHAj~|}CEH2Eq z(7#(cYWkBm8q@9bEzg@_lGpyy4J? zy!3i1wiiRqgAeqH-}KUc$OFk|zh?flf8{kkY_Lau*yo~`*FWzk3xy9p@YEmg-D4W# z+WEIUiwpeuGJ5>)Ci}&zue}prdjmmu@Tc=D^dBrUYx?Uv_#X$;qg7@X`JjuYy*?~2 z@mn9tOCHE}#CB(X&G*V@I7NT|^1mNmY}eC1+CTk+cW(XI1DDZP-&x7z` zgFXCVr$x5zTlvE`hR!Pszu}~@-1Of&i%U9gzD{!gtCr-k_Krt_*ysj7^8y*C*#3vh z%9{Q<5ALH*zHRr~2mHiA`Y;<^6!C*8ulcESWtT;tu3Q&5MSs}qs*A7se)qnx_tBTX zap7hD?p?`Noafq4Fzv%Hv0&#Pq`K-0-K@{nWPavX=s&NFH0rO{5s}a9kJl0W#6j;$ z)(Pj0Tr?~FI$~RXu5s1X{Azorad3+MuzLH<&wpr_PlVN{?>GC^RjdEMK1lhsFXowF zS$;}8@L-#7SMt&|I(R3M+VAgj@x#mf3jIgQNTdF1NFDjL?IY34@?27Q&;@(5eL1Kcxsc+_S6(Ne-Aww$FN6!nBCg#Lr1;o@vz1L=ifAI z(@WT%?D&63`ZoPH(End@cz;h76x-fp?sk6l$P1U}pUjAe1L`)_Qb`ycuPlkughdOdI+(&m%asomdq zhm1F%r}6ujhU-tLqrXfS{MTG!I{6i;;3;X%}_VVNJmIK&^jH4)3l`idR%0pf&v2(Et{bQt2|E}br z3;otm^V-`_&Tp#2FWJ@S1pJ_Pl40XvHz0LQ7}mUC)gQn0)K*w)+uJw3JgcrhX}tf8 zZvS2FLs6_k@0>@1^vVB*d#8So=R>j3KQA+C`fGlB-7;F~1AmI?e7!5Hn(J!$c}%zS zO2lm(r*V*ZV1qsU;imq+Ri4;LbC0X2^}=v@{Uv(cp~za#ndTdPw(55k8H_V9;yA9(qi zt#ywI@4jY{Z+vo~1t0s(+WoI~j=w)W#r8YeXQm7F^o`ANqxqBh!B^JNQEzoR&cEsp@A=R}OE0&=En%nYkNL;4U#Ncn z+s)!)pSyuSkod_$+a^90+U z5hNe9K9tw|o%!cmoIXG&`NoG0rg@1!eD%4LKY#95yW!dsUi`P>`zY^Nf@{aW zU*3-6Mo_W}-EtmjofE8Gf3E(t??V5JnxG5L0-q~AE}4mY~`J*KO8==<8MdKdn6pT&WxQec;P;HZTnC9+=}b}^DHim zD^`8&o%m|MKh4F@{FryK{m&~SP5OIRMknll@@Q82JGB+L?gD<0zCrxGkbFbNLnmy- zUVLG>dyc(p&bI#%mS6Aw&vo~_(QBQZ|LkUQVLzYW+wl7%#?K|6wRiH!_Wx>|KlwlF zLjRGnqEY{`GCDzj#!Yc7re*DG1XCSMKdmF4e!j*JdMA8VkKKgymB!T{wtD8UnOA@Q z#n8XZKh{0*-DmSYX8Lz@Jg`qcf92{|6sy`#q_!RZIEh&Q%FFx;{YT4;n*M!KSG)cp z7r*Tsi`or9p|MC6Ho*EAM9$OB5>fBlIw7Xec>~q(! z+AY2QEmvDntU@>K*K8+`_}oOC{f^nW)Zm`-_FRXzXc_&@&s%ebJ6 z!`i$(wjzBrVOVD4pR~Vs{~yA%wYtkc`rhTJx9cB9v$)jm|Ik%G=+ssetJ0%({)hao zWYS-u|9I(X(!cim_gw1jd2nLG2lBi%A$efBeV9MhUB@$CY_Laun0eu*%ly};b_p{# zeQ&pQR}PQ;{&y^ki=F>}PyGGY-R!48$trYXJkUv`cKt&~{FbkFf8Tdwpi%#oq|Wrn zl?T*uiG$urp4s%*ln-CGq0WilgYaR4J^Z2jpSKKkH+(4^@W}4VY`45a5BvSgSQZ!4 ze?ztN$tYH}y_>D$+wmv;P~VkIbSd#OF!f_I-}HUgcsp5K zjEDZ6_&qp9f7tXQciQCGgwtUy}Jv`TmogEH3aj&9Bfuj#$&*Nj=xm(ZBJ~r{)m{ed4!1`h=G}kbK@( z%&++tS6<`827Ba(&xKvX?47dk?|;mOcK*2SzfJ!I!drm&yEAN1h^n$*w+M;0LXL>YY@WA6t>SCJfuG`shzz z-D<ZpZ&kq^EuS|%(cEFNPmU?(bJ@V``wfC zoAied^e(CE6Ptd>18x7L($|Kjob&jWcr47DH0uGZek zW3JnWKdr+q^dBrM8ucF#KGUNmHuc`At$AEWKt5=FXy2(HJH@ol##0`%wU4wuwZlGd z`M3L*yzbzz&x$XHe|v37`0f3-o5h9s&DdDwQ*6+hz<2fdTH+4QT3ALR9z@?pbg2%nC#p#HGx(-*z>mF4ybvzL47{C7QH z{r>kuSzK&>9kKIo>-Xh{9;<{y=O>=eoOZZhvw z{b9Z9zH!too7@%Ff9Lx?`QeM!g4fQ!CFA6u((A7hzjRYy(^u{MTQ0dgugtU1f254m z^mnDV@Vjzd9iN--d30OcJF$7+fghwV5PvTu-%!V?+3N4b7nWN79`~cyRtif${)V&f z3~yq)wCg{IWgd1Odi5UHzv;MARq18hKigfdyop`tAGg3J{rMj&=GCM>anL&%&uqq3 z#1EoFW1aF5Z_>Y)KYZYq&z$?#zYT{Eoq74iOKdS6K0E(8lEnp{TyDR53wih@t6JkH z58ce)l3e15Ej5O*$w~S8M{#`U{+dX`s*SyrWeg=fc;ygc1b?W!MFyTq_!>l8A z9sc;2$3yG(L%wpyo6qEVx9z{1#fANZ@6+#JI_g&xtI&<{KqnD9|DbkLUZMYZS<$4w zuodxK9@+6hS8XR~I%!z#$Lo%KFs;Mi3ynV=k8u<1#TPcY>f}o< zn!RV(bl2CGzVoU5;pcOs=`fJR#V>wKJON?*qptdse#BepQTzQxOY5|rJ`4R_8L@Gr zRoL)%%jkste;&yFy$>QP_>4*IQ zC|OniX6yKN{ezQ;=|+DY@fX{FUOuYn?@DiR{o5SDratJOIOttc$GD2*fy^uA!-mfg zJ{@O4{h@otcJJz6rW-!^^!Pgt+jU3y?flzt78g7I4{`yV@jHn(t;s_-_|y6Ed=&b} zXQEO6EoCIrsU^0(-!$pzk~;Ki%FmUzsZRYKgby3+;qRY)`U8L4c<&qgS6}}Fn|ykM z>id@ivbgX%Hrz(8Sz0+iyaSS!T(0`!JY|>v?s+sVjHJov*3AKmKyRgUR~8IS(vu8%#i8TEGmr7_{DU4PP1Kj_3x*YwMI z7;N@8sa@zlSaQ_#ccr)3|1Ty;|7;hAPU7UX9}wHC-|AF%#y9^w$;VESeE1CE({UEm zAJ)0?#_Qj8{j9Lgd2jmdNmtzhKgSJr+&`Yh#dPaTw#6!RYw<|qPO$d-FY5a=Pwhhg zq0-Z&zjtM{+McbSZgf}2|4wY~`_ZVQj-SSxZ1geIacZ{un|SbtCANRtJFa+iJS_30 zEk@5+wz~iAUedp9_hV{ruJ@nG)dtZ~QKE&8P9E9+52|gtQP+{YLjU3NQKSB&l1Kl> z(-psWikjDW|2Ohd>`FcSDVomoO?_%Fpg*ke#Dfn`U;n@`bCEASwBNID@>*y6|5g?k z#ucm3r!(14+&eNR$L;yO$yY;)Y&_rLu;&%N0_OJ87 z&v?wY(0?Q=mPY;cb6icQysRqf61^VuL(nb^|9wK zIpdPB)pt)g=$>0{hR^o@tt>8VU;q2pKa7l)1J?9+v5k!VA@ebva*d1<40Z zN6k0?;QaF~4nOE5-}tb>G%xXoZT|H4Jr4fFSa|oAJ3n&A5f^%m+wYIMSzK)UUpyZF ze=eQ3WEHx#c%*esu<`nnT;j~H*#5^#Pm})bZ6@b4o}rr@hu0^5Y}0LMa-Gdj^)sFx zdK#~eXZ+ZT@P+?=aQCqMEhFL86Tkn=-1Z*i+xdrqEH0+Un(_D7r+MW8W732be&b7Y zV|?Pwx6psQq_Oc_X%)84o9ig7zN7K3q7x(z`oxb7FWZ;p=UH5cn_v5|dN(=m_4>nF z@49HU<$ruzSo^%+Ph0h%BN)%_e-hOw<_|L86#JwP*6@J#`PWGXdK1U)@4GW5H0r;$ z)SDjgf!YUg&}-am&0~E0Ao;wm%ugNTfm8H{Q@{A8gYG)*m2ld!2mJR|SB^1mb{nzT0&x7z`D^k~l;b;GN@#m=+#8 z=i)0mpRw)N_CLc}TMCOiHH8%J5081`=ak$;)VW^amxBT;YsJ~ z6F+@{wS1jd8pnpuka_4hz5I;VzxZbNJ~XoV_xqPv>BHA9w(djJ@&3zp4^l@Sm?aB- zknPs`Pv*z-QRttSRE_%oZ=e4eFGsJ=Z!llb`gbwSw%4ip!`k27?~r>|yd|u?>RYz` z;lMY%^4j&EgE|h#|8I8H5Ayu0t`e=@58A2T*d57aeue(g)1*Jo6ZzPNecD$p$p5aA z51L+@kG~hDb>7K1=Eqj-#TRCO{~s%^vc%0{ts{Q(?|qw8|G&^x-jT@&pXoE8ah-=) z)%%)v;%RR#XnnR4Tk8w`yJf`qU1?Q(Z5Pezb1Xcd^3W&f(>l$gAM!x*(>S)pm3ONC z{=UbK*z|3uT-3k#i8CH;A952sjo*K*sP{kVW^wUgG9QUf*#1}0eDdK(Y*!~YebOo6wXny6xPLX^=C;T;g0sUe1V;|Z7-;OEnfBXC^FRXRP zr9SzOGoHhhLt^!K|b5B$oOZ~I&SN{je8KX0uBpIv`4p2em1`OB$aQLJk1)zAFe zc@`J!bbi={{zGMAqyDQ3Pfd?x{=`A=k~+2V^ZsG^Ey=fj$xp_y`q%4MzHn2%%aNBI za9_A-^ys@j_|}~9?EQC0`v6zldv^H?NcsgOtG+fqy>FX$l4D5UE%6un50{Uc^k+LT zo$@lf$j>9#c?$B{H;JEokiJ0k56(Z|;`9qT$%n^dgK1vk4_p80g}V=!nTN3Ow|;)) z+}o(P^Pl5cT$m5rslDD?5@$^xsa?DNp(7dOqhF!_yfV_He|s~jGd$gcRq*op%eMP*9jBw%N>FtXZ?!hQ)imEJ!lQNQSm-}m zX4Lf8I{rtAd8MyI#v=}Ttv8!-74d_-9%{!`)F~gljB7UW9{J&8U--cX_nP%g_}Ghm zSAOm2hvBjJ-+?SHrpFSx|L>|FbZRS#Rp_YWw`XcR?WZe#=3VGNR(fnaS6UNz()%Cg zFhGVi&VDbPwZhN)-7|ZIu8|;&Mj}7+lht~1C-SG9>9trE;xbJD7`S=XB zC%gV(TzSFG_4mIW^(%^1Uo$_u|B}%huyLa`VgIlBrbnK|#ojM9-Uh!faq_`*eDe>@Ki}fYYxPd@YxV;A z!&Xb&e93NaKR)E=pL_MlQ)k1&@n<^j|Nj54-PI~*)Tn=Y{yEh{ZH`ad52`+@M>-$o zYyM%$$4=2J&jKU;rFH5LCv0=-28;On!e_4i$th30HV&`t|2tV+*k9*Y#``}s{{I*I zFR7Za-L9oPPLaM6EVlo=%&6(_N^ix-{Q$iyy@lVE>k82=-CvnM85e&q$6nJua?i&$33snQW05nK z8lg`+|JlvrV#gP=h-gKHuI#fh12%GV&6p{tG<8V zS9$ZvX#1bUFIh#u+Mn4@9<^0BueM<_ze4|kvZAKH^8P=&|1EuX%5Ek+Z`y}i2it|D>hkfQ!ZB|C#6vN>+VM9Nn~rjFpQYj9{-f<8|UU z9V1({{hGgTay|ZDXyZ_?B}+|z9gq7MlW)4W%BrGH z^N54qmC*^Wo1}m8K>Ix1jb+vK7SuWMdvJ>W{w3C0cJ5upPk@(NY4Cd|9`^+OH~#+@ z_n++Q_#orA#LwD0d1S}`%0?w^J9a|7aLT+{r}yXKR9i-Po7a-e|}&V7uybUvGYMebTf47R}`z-9^1=mJnaX& zCGkT4vGP$(e^+`7zbn@hw&OgSwe$bpiU0rn@9&nL^TRDJn{h&zw%EMOKlk}3tnWRg zf74+=?S}vVrH<`DvM1=jverArToL`%F7zKSBQ~xptqDBo{ogto?iH!fWPVl$5B1n!4}Vzou{S?>%rUow*@tiU)NK##39tSBrIW?Q_SY-K z`;Q$?9{osGeN7xatS*;a<^}fmeJ=)@^yfNY^3&I+@ed1s`vgJqLHYvCpU*$v;^+xF z$%n^dgK1vk4>SL=;g9b-dgrj>%TFA9(~s=)ww?cPWpUws&i;QV@k>^Q@x2(5=NIjXS|M z*PqZ&u5oOwEA)?%n*N%{=V;TzDG#W5#6hp|)W%QW7N@UH78mm?kJVw5kKKexUikd> z-@kj;J+=!M?!NJt-~3#4|M?GQaY^6ba{2jb*WQn{Kgq7Iy%S%1Jwa^4WPXMI(Nojk zNqsvPG(FU|KHcQFZIp=Q1v*XV}9S5AgVCO&K@lNV% z-@oVLPxA`>2g*pJ{w=999Xra?Ra6~_gWgFzwej=%vAB*yf96+ytHTC$9O?{Hzw(DU zOD*=|`~S2{*z&u#96fj71bF%W-Ht1|SzP>*@e9Zw(=RB|LO1QhY$uP}I-XaX{Vn;} zh5mzOMNNO^*Da$Hwtp8*+wP68EB^M*g6IcYp7P?C?1~>hnAUkG73RlQq^=3W#wVTg z{Os$m3>*FBSI>Rthts{*^*sNEo{E_+^kFu9v&7@oMsM=jj}-b3m6FOlXZVP&~ZGLJqze4}vtXOLL)Ax88ouFe!Y~NK( z^4i}LKm4Her}^d|nt#5<;RjtZE;g9vCH}C=k)QbUd)GKWthT~0pI+sf-=d4{f5w#` z?5N$EjAB*#r?wscJFSDKPwiZ=(0^X}s7e3Y_76{2>d_UwK<^};+T^o+SsY&N&-`k` zXY~u}*FMlW?Edb1=6?3k?ZO@xt#Q@ipQ`SEwYT(ZIt-}I@9$b_D~eTL(;vs}-boJg zWxvDx3jIgQ3>(*#R$;qxUEpAZ`uTX~4{Bcf;K})nLtmizbw1cBl5gmQzh*C>KWzEL zk*D7Dt+#}&j{jook`rI$d1>7K*N@j<9ccOt$oQ<$)oks<{{KsD#sQgUq5o)EYy9M^ zuC?u7^@7K1K73&6H@%dXJdpf!`>;A4$MTfNY~nrq;iC&3bm5h|o*xeR<-)%?>A_>* zx8qOh74s$a|ChU2T-fi5Rp_R9sf}*Ly_1Y|eue&HWu&IRE4_ukBWOCQUCZxEUVFRA z`BtZW(hvS#NWP&H{+g}+UVLHZ3qN_sHFFOQE8X$ZM}D#4I`A~!f1df~IxeX55pT8j zP9EF&2er|W@vsa1$IFVEp6JyrqZ8hL^Jvz;f9Z<9{a(RT2OC#;iQ~skk+`7~{+g}+ zv_7@N3Z0Gby?^%~hZ%qV;$|m3_D-*L_W4(B{CP|Ne_{Tuxx_3#(^$Bah(=gJtS zhZEa_%1<05ZZ>^1<>y&kct5MvIgNwxVS_#VVYMX}`QsnoeM(sE#_b0-`|s-WFW>)m zbR3Y+zb*AEidC)A9kkzH^l6<_R9@y+=pQ3B{WV{zs`ml*Tb8f<@Daxb=?lc)3&}Us zzG}Amd+`N#>)vnO=!AoVJ8AJbXU@CNYn}c6cO;98SA>u4!cgZe+11)RdE8!7(E3!} zQ@hZ=Rc4x=PU{qF_uq!6t9jZ7eS%)=Q`^QLlzNN9oA6n_@_A76%2uD=@2;==c?etS6p@=pDVVpVIee&gFoV&+fg$97uipO=}9`lsg~ zVs?=ax}vyPm>=j}>8pA`gG;GK;;SBA9eph@Bieyu;XTX zc3#P+c~?e@@A;ape||82|I(!|L;6w_mTJ7(@n=gisAqnK{+%+?q`!A%bb^jCdqTdB z?_HTy%tNnxfgfc4ApTxRzM+f5n4TV6u@_%hak)?Z_1Lf96=uDiIT)Y1*_2|`-ibDTx`KYG9=5ZXwyu2&3i+oqEDmWPqEN{po}cA{vG9sRYjfh&?kuBJI&WT{N#b@c4dC*-~p%T?_Yi6kNkY+ z3x40f=EiTo>XH0QUis|vZzqe3>AY0D0OEV(fs$2U6R+KW-bpU=XTF90gJq96B(9-DlgH$%-M4l=JMBoDOb zvo*Ott9Rn}ppI*H>QCZ~6K1aRy6~(I7Y2REo=$+)){z7f^Ec72KGn(}G-7-31J8p?>uLGxcSMc?o|Dxs2*`@vrp4tuTj-CuoX-3{Tj?LV1c9xuNgxZL(f9hoON&`CZ#9$S&R zCJgJG^y0Huul{t{_>vdC^~WPWK|RNtsm^{W{rzJyPNoZeq&DX*y_1MxM{N2l^dBod zP5P&wr>wtjSyj}jF7ScgmC*^W8yC%LKdrXqYhT7seyV#9KYUFX-hKG1;of;`h0fp^ zCx>TquY7j?XCRA<>9LyL|3ve=VoaLP3O#DyKPK~|ze4}% zHya%l@q_ldO5!$-_5<%!{oxCjjnDk*0h@$Nj=bfhyPnG7vCqFlSzP$tkKgex$1DuR5?FGo4y8GW$Jvh=Zx$`bhM({EqS_ajSD02Q|-Q zE5g@={mU$$kA80dv-_7l>-ZCY`s}vUzuxPgsmqdue$frvUf1-k^DXp`o|^tnh`En9 z^YQ98R6gP$@zl0{2C}$VTzQC_-$|ZNk^BYp!xy%@_AkHM_0b)}PNTmbT7R)0!E4{Y zbh5aZUJLm77asbz`#(74qu&Gz{bNR>{(2u{`nSFYlvnwQgFf+N(_iCp75!(=Un~TD6eDN{+SoJtiJ!|SzK&CqT`9TT6^`|`8Ty0$L7f8YDId;4`~ z9NWLxJ3o8p0YfLiZ|5Jzvbfmw|6G64QNN;C#rtN9M;h0@_}wo2_Pl8vyU;%`E794z z(kg6Mt_#c~*yk(T{=4E&`MK|0I)BU4@zZ#doz{7mjEk+<#DhO9@t==<^IJwvnu-8_CKclVAK5{65XY$&@0v7DPKoX|&*Ea&|F?^ypI1v5n77j$0o3fSPCh3{I{?cj7f3>cNzE z0sSHW@lG$l;mccu{Jy{J@XFuqxRU**={_#~Szo!@ielB*^w0lLcqci=L*E_o7y1vB zj~ew)&;R67XZkB2dV$_ap4p7sl%GpJeb(xf&x7z`D^k~lVgFw|(fPsj@o>;>x9#!! zYlh*o^AGSU=9|UqPo5+XeY1vzc*yE41TcjdZzoe&4T zE2Gu>s&}O|p)QYR{e7jC_?_7Pe+a{wKKst4Yj(pqN51gavkt!nUc3HW$7lbVpC7M3 zJjS-SJe#zjcZ&RP0{j+F=2z%HR1!An&vgK%Yo82nd`|6Q!S>05c>5JC7s@5JAPavbdNYOKp2i@eROs=w?ViEy-ui_;&p1B;rBp6bt?5 zl^Hf3d5YXu8(q@Zp^c~aiD*AvvK4%;TsJ>&#y37W4?hUMcamqwJYJ_iZ1n7x54~j3 zbHbb}KXh&1%KO7_-+vA3IN<7X0@}9wj>K70Z?=<1_We73Y90B^uh4&_tZ337y~(G( zhIz?Sd|#&jyF@;i=Hu^$-j%FHf6T*0)1F_S*uD6|y1&``S114D6Jf)#Uw?V!ZL0e( z9i+UYlMz02Fw}7+yIOnox0e?mwqaNN@D}>V5u5afhkVlkJrs$9jF;N@c^%i{+MngQ zEG|>F=~H>a3HM(2pYM6cXgK?>```9#yTg3i|NrSfR!*>{zxK^_q^46|W*7N+1ltaY zgWgFzwed5a#aoikafkU6{j7e9{;<=2dtSZd^DcBoA9?c5H6AhjyE=Y~E9?1Z##5B+ z`r7z5Q9h^GnvBe^*#5`M2zq!|TGi{ly_MMH!=EB?FpZ}+d7$lIwNLZwe7%!C45#Q1 zvzGYN%}ea~beMJTOLwe$+%)>K?LVqf%pdF|<7yw`t=8~>=!V@izy7{^B1x0}_)U+N zjLCNDo!SF}e2ym{v_ASIFZE-mnAUkG_2$P`OzTrSYW*F*B$d( zN8ilXaB;oORq zf7oTlHMc*s;oafAm+pPm*8ed`Uv~W2(Q&{&z5hT<{a~Ni>DoK-*yk_JGrhVJXMTnL zk+D(#jVI^Z>y19#_VU zL+Q`XKaXc|sqO!B^(%^1U({?8q)*S?uwp?_qo>F*?;|IsGjtKTr;OY>6O>bhB6 zEUrAn&F>@+e@{$!()=*(w8gK!?YbX_>4(kCZo6YY;`aXA$>PHHVD~@osvp#G#9JM& zcqblo!{&LxW`2eKal}UbX9yqp^u>N6<+D7O)TvEAucI{I_*+t+_HFe#f3uzB!`Fmi z{WrXIp@YNOVT+~04TC>ClzPrT^S{?1JhuNx`jSzrwehKL*oINbHJ%eDc6_e&(lN9S2_Hd!7EU?uOa5zkT~Ap>@k$cRscM9q`!oZ$nvJ z?D`+(OCO4oRbLyQ>W1Akze4}PlBG$1M(|D^8P9;)+K=hg5Lz16SSh zq=VtL;R92@>E73n zpNpT@m+@#me1=o>hwT>m&g|Ckys*v95B%i#eO{q2+y2M3kD{KV14WF-K!;qOLh-u|7$$xDAP#yHkTqCbA}QuOer&qHc!URs~pVS{Twe)+AZ z?-({Z^P&}Yf2}(Iwy(}3pN#p{r=I^g`}wu=uRn+1&OZ+*zhZuJyaD}?=|jAwdAT5K<75BhB!WN1LjSzX zFn(8B)${s4cKu18@+zV)$m_xSbCdHuHV7ZKB7994&e&tOY-HSpGhcam<$-O77|-o3>16HM2}*X#eMa;Yrzk0Une4?nL1_KSvzo*Xx)^QSK`jW^lk z8#?KzW~;x62Y)zZ?`IGH*cz9IgOAwzeaoI)-G5_m_|y5}FZ3TMshad>yEi>LWpf4;^0bR5t%*bC?n(>4y54Bh?fu>}Tl>uNlnr}oj}r*DwHK=ZfepKoy| zaq4`Ny@3AETHc>veI~m{Vv73;7(zyD=dMmE*^~=6F zEv)y3RI=grU%XxO~*4f4d`j=)&vO zP{$(&oeg19fIN$>Q z|2RA#x?wlXuh4&9IbuzJr+w%=+lt1Q*vyYSY%s0E-wV?^pY&z6`g`$(Ie(tE;)@sc zg)Q&f|M=EJZ{hj2-@lJ#ak1b3avfo=e$a`XuD$x(D@w%r@66vW^dBiBP5Q&byo|3w zf6H?v>(q~byeq4Vz8J?vv;MyGI{o2_M{eEx$#uHn(tn(D>IT;>41c5k4Zpvp&g=Nn zUDYq=AuTVLjMOglA1xy_{dL~8@4wI^L39VbD_LKM{w>L;KjUE>r*UwK{_yWNy}0RR zpBo9+Ed1*&x^Eoz%4gSqj_Ww!+~VwWaR_Rk$2t;kTGPM%|0#XuQo+2yLjSQcQq$j+ z-U-j=VrsV%RDSsQA4qJFzCirFkbFa@<1;U8#a?{jkTbSj`?_ae3ZJ<4!4toDSRcH$ z{dbffTwFhYwbT#xiJh*EZyC+kJlp>#cA@`xnOW1Hym`XIyo@Iozjun7*FIfr@(eA#S4D9!Agg2;kC}r|97*v*zphNA8LNhbAqgy zpB?|xXG`-FyT9+g7^vy*B%k{TnI737`o$x-HGzrPrgJU&nQ z)%5>QbM-5VRbLyQe*W@Kat!IaCH_MHn9->J(voL-XdTCGj7J<~JhK^B5kF{ron$=f zVwc5!5}cyHe}xY`c;X5t{jq<>!^hlr8ar<+|ga2^aP~f9PixtL{IuUltek4~7l*AEqyF|9{1MKYaR_-{CMH( zmR#g3@Ywd>QGT%D{(IC(cD43S9=DegBo7^VUg@*g{s+p;n*OR2#{uM1U&|lTagP!t zAGAJ{*Zi&d=UbdUKqvXehYhBAi9am4!Q5ZIa_e`((rd2qwe@d37arUHk1Id8t$zQK zt6x#9YCBAA`~JmA#B`&-j`$1x2g^q_JvFa({--5%_Izp{dV$_ap4pnmeDQ-#eB{Gt z2%nC#p#HGKEx$S9wVyl|cKhX{7d)~4jquy|U!5#2m0t6|{$H~GiT--uNchwFVHf%j zm0UIbos7Wm$Z9&ZBo04u&}+Tf=%t7s{D1ZS8;(7B(Yuzq_4081A9lU%!P)oGzv-Xr zyuhXP{6o^eWL0Z;yb~X~nLiglx-;)W|KT#ysQ(sHN4{+rUGc+D9Q01&sZBn|cNTAW zJ}qDQ%tjCVVCq+!al+Pno%N@0J~=D&zr6IDfAzWZ;j{BU1JbwYv4Hyz>A`~&`u z@+#6#q5r%xV&jny4i~{X^ws$O^W?;}fb|Cv)HPCjUPeUsyC_vEFR)_EuO=Enw= z-(!R74|DfiWy}5F`K2)T_J>aS#~S;Q*Yx=-zc*fgIM4JM$l_w-w-VclUAzB-`qTLq z`j3s#kx@m%tWn_v5|`jiJ7Y{-W%Y`fL( z|9r(e4h=hgVEdELzjzOL?f6IgGCh`y_aA%0UiY1Bz6<*m`bRhT(VaMUq5o(}Xyduk zsr z{m)%Yy&eCwvbfNXU4PP5KbVX!(FwY(srlsPYIg;hccK4S>1oto>&Z9W^RjBfc$(Li z)2a36gFaHedBOvKivQ#KuY27c-+E(rt+3t;&n`T$*rMLa_;&qCOKj8sPx1c$E%HFg zD(0{AG26+b_IiTE;p270JPZBD%gmbo%FA`Ut5ZIB zr|R!t^1a`@chh;R^e;7S`oM>-xeY!$|2dY$#lHXTsO=`BSoJl1p&NEOKc0vFzWXCd zqyB5l7^aizW4wu8mgoL?9eJSFe0a^zc*Mae`oql^UH`|&I-}vfuiW>6uWp_*uD$;b zW^v)V8$aux|NjpCO4Wq@LAw8GDR0tup?~z$^mme9`~N|BK+Quh&?kQS1jz&09*9$4 zvz2$M{{ERuZM4r7M=aIv&RqS>wQrsdj~)MyXK}ILzjV}QzKY`gC;OjNe|Y+oPwhf~ zS9)stYaaJ8Hl12p560^1_|)>W-fNtBg5-hr`s`+LF@MUF@?nEL^7~g_?24aFTjS#X zl~-KktkGp2r61e>k7aSO{ZGU7Cz4gI;qgv<+`q&8=%vW~3jL#}roZyQN51hfPmpm} zlJUt0(>naUFs-8>^Xs_yd+~*}UiscNukC$kSm&T6*SM&42X(gpN%-;)==p!u&EEes zFH07${sq7P^Zj{A(WrlnR@=AD8{I+8OMfqh9%&u^Ug%vG7puo^z+QY|wb?K3_wBJQ z!fHFLeQW;e173N!{*&z%gwOOD$l_wpgU(;P^V$C^zx9t^LsC&}|DE!Y@sqDu#>&OJ za$R7|F0dO-^IM;}__4useEhvIt@BR!&5y0vi!ZEv$I%z8^^4K4>Nh(peropi@YwYa z*-(3Bm6FXfS-)xC^rwC8qWV_J6TY75xYhHUN$!A{nytl;f-Q>9CC4Tw>$phg@ zeiR@6@j-R<-u( z2YLO#Z@MLY75Wd9ET(@F=QyCIla7bK7g`_6Pd>+a*eNm}tJiUB_5$`3w!E|Nh2{Tp zO{R<2I`%)NQ%e%^OZEQSY7^J;`sXj5ANyhW4K>d5Tj)Pna+&_FvELo!AS!{~}4t?Grz?VJnLZb@qIA6TkN1o%ju3uV3rK zrE6^VrjyPe377wRrR)Ab>fStFj;dP!E)}ne3A;?s_*&QYcO+K@m^{j);g81Vm&IjevrT5m3;}#iOF42p$y`C-_zEwZ5y)*_EP` zc<<-EZ~akEuj$!qKV4m2U0q#0`}SAEXZ!zA9Y^`|PW=BtM;`N*tooXG?fRdt^7aIo zccK4C>1ooR&spS~PI*~X)VXq9U@t<(aWDBGeSxVTJ4Nyhos3hn)t}aOI0H3$FO)qvr;A`28Kv14w;s-aP+`(p9C0-T#?>TS?z)7y6Huk(&Nm*A7zG*oWp3 z2YuqlHq`MgKhNUA>)ZST>URk)qd#o4Xxr2GS#f5VGIY%4%dcJi{;ntd=nh&R$@r38 zt$7~3=Aj$5J%3%PfwwboPjocuzt{MD<8$K2h7U~r*oM6aYK!K|b3j`x|(PgWioF2b+Axv;3~)6E{EeR=yg~68hl_+wFh)&U;+^rLg^iD|dYM z0~2{aw9j9o+6PEqE%hskRqQtg)DJqb`QH|=w&|vM^i${`Gtk5GwT|N`@@wNIe3qy6 zsm(ZS9~P(oUKSVc`XKpfUg}Tm@SbbF`S;yglfwJ{I`i+JS+)B8%TY2OeSy~Bp!TCE zR-u2+BaQ2L?TyuTF$_#YzuCxlRL~yU;&IOiwpHudU;g&+FAt`ydW7P7{&`n*J>r-~7sNJV}2ZTamgZ4Aa-VWZEA( zbHe6dy<)+ITOSIqeg8O=#f9^Ne)V|%VMo(vP~%RJH9TJZcKt2=cO{cJ{T2Ghi1BMZ zdF;o?H{Dx{=d(QJF`GUV@q@;j#M61v*RuLMQ-1K9)lXk_!_L(0w|MhmUwjT8j(51; z+pweGf9ZS}KT8&Pz;;^_u&Fa!^(*x6m6tDz5Abi+hAAguQ=TFO> z{m_YF;wOIjhX-%{Bz)|Dc;5x-$M>?h7&@`jwO2oSSpO}_gbytAA1Wg?{h3FMj@gct zC*A&ey|#}j*{XcTu|Aa7{4S>3c{u!FTIV(2{MaCUV}t4sSDiEbuFFU74qx7D;gqYk zoe9tXcjrIvnV%I)lm6-b1Wfl{@{*;Gv2F`ivXFuAFeB1x@ln0#f=kf+Xe@W*pS=Ic_*1ouI3Vy54 zC6oCT+y8LsY0_V^i};W{GCt<72p@UaU|NU2AEtHQC4FEk_Tvka-aBop^=F+JCa>^? zmA`fKFTK{;^(T227xvfo|3_W*gDtVswRhraH{Jh1>*=%5e?eK%qgus z-x;6ZRA>JAi_f>X@_Q$FhRf&=@A%`Fuf6a<7v3?tOa9uk?t_o>Z|p}6chT=3lkugS zLO1QhY|U$LDIVfHpCI!q^j}zJHtIiHc*wWof}Z%{Ck}ci@zf?ieVrJu&d2JOQD?>n z;S2fmXRZD8#DUPc>?fc9_372`zYdpv{cwVN==p!AenqjWwO2p$$~Ql~b4(DLa|(fk9#L%!iM`olJZ51sJHxi^GuH~Gri-#=%4 z^swt6Ms>WN;)eSDBesq!(fZo>R5$M=$8_t8&HM`eM@mmke^+{|h3%EskD-$s_{hTs z=?lc)56L&w@oIL`M-y*Y<=fA`{rG48JFN1Dk6gFO%d7i8^kg3Pz3+(HTkHMjbG1R9 z|K$1bPV#E+zp8tBTwCZrT2|EbS6=R;!hGy~yd~p!CrBJj{ic`lk_Q@ZZ#nNLIdA{W3HOEb_U>Nyigx|_D|i(1nGN?J*737s@mk0ChSq;BiR?F+cW2<< z7^vwFPx|?bd4uG=PWanT3VQW34z^({i%WW+Q}RK}cbX3-<6B-If5;y?eEO!3J{M+9 zcy{X>p1FbN-OfMvbR0#y|L36kLB>&(Xt5ou-QHgFonn$tUZH=C)bvkzhm|K!Q1eW$ z1ro;w84twY56L%lI=+tUu@(FAg^3@2?8lRT`-L#^$>~#WeBt%f+xPGADrUOa{x%nf zcY^5}|B@en-W!>k^lx`%1m?x|Vi>Dm;`6S?O+TF%e4xEQC2^~R2h==|tq5Neh7W)F zE63hHcV_tL6PG=_@_)3bx8EO)W^t*Ve^!3-C97KFr(bl#PUnYRZ2zs2rBQz`V=y1n zJyun(H{u}un2lbF_(A%iU-MJvqz`z>Gn;sy{LY3aPq}u+JzI7*{NZ;$vEHwigU7c2 zUKSVj)4tE=U&$_XaPr9bR5$E&e%OWnd0E+{Kc7QtdN5CLzKq{~k0AM=^`ZIZcZ<)r zIQ*dI8y_~9<|Y2H%~9{0`^1VzhHbAO`ohldJqI59{d+Hqi|M~gdH2bEewXflB&*Wh zY#qP$`wJ%-jOW!Z^zW9DCjHyWXL_{C>>?l3IC&@|eAZ z{?6n-U3^aQ$FrTu-+1bxfwNYkFWde{v$)juKe_r9#d-<-bpr6Sy{C4ef3M`K>F-K! zab9g(LDQopw(0N2$1N}M+qjATmY)j`+nM>7QD?8?8!At@c-s26zx^{G2p4_-58wIG zgJ;o~9seZr%P-UWuO@!!w$M#^&DL?zjX3iHnO~v*U|HL!|7OBWKJ|t=9&ykoe(OW~ zB@Z;dByM%e2k)}_!ydQnyy|P7Sr~TD{T46VE6>!?yhw{k5dH(0@Vs$n+=fBu*Xud;E&{`=Rxr{QdHY zr*$@d+UFAX6Smss@UMUBOTA1N@8kjB|C$bi;`giS{b#U2#?K{>wRiHUcKvfMevWII zU!niPGGgPp(yF%qe)@Y?W*7N+gpA*v!4IZ7;_rvv39r>-H()=$F!PHa`pl=M9uT(Q zZtL1onmvWs|69?@@#@R5fNHsup*24gaOhLv zwlkMo{+es6^Zy5Cap5{RkUASr`6atrr*U*Mzmwc_ew~5)VxXo!`8poo!x$g)RHRSx zu)(wre?Ls?eA1WM>hH&A_utsez3$U%9P<#Q zADuCGr!Q{|JS9`S4hN9>cc(^sd4FH~rzzBbNWsaf2TSy|=yhUqAnS_50t$ zb-eirZlLEMy6Oi9)OLcbz1rx8&3tWp%Coo>`nSpljr!|-q-#3mu{`rmkT{t7ZCve} zJka`Me#&cg*kCe$#_N|KW?lHDPh9)n`$Ff$7vDQ~;R5p6-`nS|kt{C0-|w%r56Lcc zbn>Wn{dq2a=E=MZ{bR)RXI_eYjv$}+4-kKf-pP3QnJ0)JBtMN~TU>dY{O~nl*z1ko zo{YF(j!x2!aNS6WqHVik5SkI>!F ziJ$qBhYeaEIzIk>nAUk6&-~bm{rE!r$FKa(C%$%d@YhZM{mW1NCOmfjc{Gbl?fidB z{h-c6yw%$HHq?C0tDXPpO1#j&S3aug?@Djs*YTK_@$_Q2+FxrP`#thO`U3IyL-Gw> zGA_1aKfbW;(_h>FvVWZ!+;Oiu8ie@_^L$`~4BR7W&7Ln)K&) z#LTOvf5W_e;%A;9e4z1a|J0Ljw(>Ul;cLRM!=&D+n?89?*kSwoPS|?dY4F96r zZTn|jMainKy~fea`s_+>Y8Uzsm64kMszdt?QpbGQzZkmlam#DcQT^nB#@8CpXZ`9p z9!&X{&>vjoo+UCx;|N0+O;WPc=Rm^4cpJ#ojKWkXXLpSWE`4#%lFR5z! zI~l+$)^y6p$I0WqE98ULhxU)ZAEtGTV}3WjkA8e%(hWPE`}WP>9VY+$p1&P&{R;5e z_g_8bQ_P=_*S~e`yaREkdCBuZU;J+-Hv1cR=(Es&xMZs7ue|N9j6=Thw8Zb-__*b1 zJ#p(p{p5khn~sVH^pWiQZ~6>8rawH2`PK3F7tfOi zX32sdL^tfF`4##vD5;wCXBdk!zXX}z|M}~0*Ph$D=(OJ*7S>*Mx$j)`%fr#d-hUU$ zjMM8PZT^EAcZ#ZmScQ)2Z%-Z{$8XQae2EwOFDxG!zbmc6R^HnG4|X-)Q*?sp5BkK9 z4R887wEd%wYdmUOz496lHrOXW>@m8}+JF7(8^a!7e#NALM;F4=c>l?b^!|H;SzKy5 z<>F`Uojl?=5W9AKkxO2o|Dy6yjn_$><0#XkC4ROW`X>&0tv4H86!C-P!`EmlZ!K>L z{o(%q-uLPIj^01~`m%Sg@xYuB=3&SG$^7zL;`d*cM?T|c$>OySbR!SH**Z?4|43FW zHT_kmwkxA!&g10KtUix9vC$Fqi61*1-}3V;F2v2ReOSF4pSQIBF#GeLeg8Mt&BE-b zT8G^D$PSEWzrW}yKe$@_{q?6!chya@s{KT2qnmdUNnhXCh5n;uMooX}hRf&}{kvkb z|L{)jVZrtZg6Ib_Ptg3zhn*t%hEDiv_7eKTnk%gI;8mCWEUfj@fp;Bq$uH5>w*MjJ z2d6LP?;o`f&f8d@x$^4s$2>a&_s2k!{v5}W@161_NE{^3>;bJ)#1EnaXnyLF@oZj8 z=?`<>zRDBZ?tf4?`0Ue;IP`#d^l9HeCVcrL@&8XhVB={&S;Jy{`nk#KT2heCuh2g- z*7VnLcpaG@2@m5D2UEZGp}gdQydHU7nP2naGh9Z0*!8+um%Z!h!LawM{(M8{nK#mx zZT}0ixNscj*Vpq8J@w~`jISu(Vn5>KQRDqTdlF}Uh5oUkQU9IB=bKKs_~{=$&^w8n zo%D+zWV-;(Po46?%eZC}?~@-U{qfW{zh}=&!{iD3zu~vv_!RZ_{Yx*43)_L8t~R`i z;;q)!kH*`ooB3PHqsaUU{iCN*|IMY2eB;w`u!)1-y>uOUAo(dDw#AiqS^eRG`>$H* z*xqj8p;dl)=~>$k!^`z={5}q}{_-p?Y=2_C#QU%6R_hbp^m!=skIXgwos8fWt&hZ? z^7Fn!J_t{mPrM16>dc??)5L>6Ou6BvYucABA2$7wTk+u+)`G{5f4W&*I9|`6(C;5} z^@HfBDADTqs9pa9FLf=cDD;n!n*QWD$)kUsf5Rk?{T}(C>8|`Mar~I?1Zm^wn;O51ajJY8U$V%FIUn zm4|tGS7ul5Gv1X}VQU`OU6T*mxJ#?Qi}dz+++#N&<27N}@zn2y-|RFg?EKy>FIi)^ zN$}bGZ=S^kz4I@}?|;wY^?;u`$*${R_g`|7(KP=;|G_e&N&j|UM#mig=h5u{`Ac>7 zPV=!F^ym4bPxQk!o|F$8WE^Zz{bBNtzjp8zD}Fn;l`cN)ov%6EYuwJi4Q6pMJvP+i zf6eoXF=@;SKUn+!$Cf19eAF)VA1Wg@o-3{D`D}N^Ha(o!K0)H3jg#b=f4=ZoK6wK# z?e|HZ*{NT7h%-*u{j7EVD;)et=$^j(eLHP&0dYJ2?`3hZL=?EEMBiuu0r^Vf+s@2>E$raraN%{z&Z$Nr0X7y1vE z8BO}*C!cylS5{Z`%!7Q;##cR4KX!^~opRdr_g^vS&Pn|Je?o+ks#l; zgI-ZoEZ&vt>ixm;Ty&crUTy0qPvY7aapTcGtUl$z2K(fP_J#hMQ_q?nIzRi@+s^vu z1&n9<&(Gpw*US1PyZ@n7ReEq7;+;fHH~Q;}ztDeS`KVFV)Mjq`2DqaF}=2;PkuK+o?%Z)=(Es& zr1UiDU%UTvSL&@#)d4=xJIPCJ`eVOoad=Z*Q(JkB2OI2@AGZ9(zAt>G{o62O^MBs( zhbM3FTGu`*iwox+z*>LWr(_qpIUWgOqZ>A_YwKHi3jIgR%0~V7E~8`iBY8CYAOAba zv;NUV5kHvrXMXCG&x1?phcDz$j=uHoEw>80OnJr12MkPvkNsio`p>2P{hjvLR^3v) zvFWEX@IY2BHT{|2Xbe~Hqg}CW`*&(|b% zYZv_GtM3VGe{J)vcD~}P?1ydt-;3>3``bSE-=Ht4Ds`1>OBRi1Fjy^mct>)^A(q1(K3;?>6tQqT4e zvVZhz#^3+mL0{;b>C$A^zJI5Wo)j{_LVs6!n)Js{zV(%tRYjdE*Ht~|5y=PX6HNWs zDUxsK;&8T|d2GeBKDEPkqf@^1#dSUt-tdR_9`(w*FNC-8`iHKLmnVpxiV_{O{coW8 ztr*M#BX zn~s`1@%J}nx_GU(^KbAf=9|9s{U7Mt__Pk);3qGgU!i|q)|&pVw8lJ-JBZDEnzn!9 zpm#}KGB5JL^!06i>XZ*&o6l1E!@#{S{(9lN)(Wfa*gfr#zm2lrv;F_5^ljJQ?lPlX zwn8^U?Mt$&?UBCPsvCAXKXfeg@0J`j{hcs#9k1z;i=TS$#3l}=e(OW~CJ(g!a^a)i z>fHZ~Kg````>jttY9Q=);q?d1+w^ewZNERPy#X-@hchdEEV+?s%PmnJ!-QQ@hRYFEmaa+b#VS`bXtz`m3((t}v5tJT39lKXDMh zXME+uPabI7hvu1|I(WdP^urgv@|rtNdGL*tUuh4&}tZdYOUE!sFoTRJ3->0jf+kRk_Q^E=9!;59f$FZe<}T8&Q))ncfn2rVa}mHo3_s{96WaZ zb5Q1EdT{+oGLB@`*T$#1VW;!s^dl7heR*a%}Jiw=-8AG zZ%g^L-ulM|;ll>|_`|WkI_l=#58W*sd%%f14gANez1G?GpJ#DFuUu}wG#?w3tU{-B zJhjQKm4W}buFnV^j}mys_E|}&iBalZ#-J>HIF!$`mGNgpFGfb6F#d` zKfFupXPj{EFHZjR2AfR{^M~K~*MI&qhsX9m%nMvauRqtm$qdDC{o{x={RgDJ_Wx_@XB-e6LGPsA?4%Aq z$om?7nx8u5gSQcz^3@-Hv;JQ${oCQ@x2 zzD5uG{iD;mTv7XAzMX*wW1yzL)BIe}^ho;fPU7%!-idsWzCir_kbFbNV;pNb5a)Q~!o#rWuRqRL9w*Nn+&#q)J9(on} z$Bdf(NgeZ|&M?X2x;yeg8&~u3_rtV~`I%qmi@zUVm@@O#ADp!3{4n($*DwE@dG-50 zw^S#^rt=RrzRpLg#^}GM@@U-N-@4+b&q9A!dYbg-dt~y{aj}VmKC+HE|4bi>- z5{Dl?L-=%@CH05gPMAEnNNm$d`HdG_^V*k=&u^+Te{b>m7IzZ2@vxhae$u%5!)|X{>A7=X z|ADaQ@R$G9UBB=34|e@|FN=%qhZv_Ne%AD9$Dd9j_){$O&&!OO{_qb7 zYu^9)>)$qd{)BUH8aX3uveE1-iYLNr|MvZh&cpPWsMp`-I!|ywZAGyP9i2RCZzPD# zafRvMm3X0ld}QOf@pbT+{w`b>HL^?p?|L|t?92iwY?CN zZ#w1TXFTGdcM>;S`5NN61O_#^PuLL z-N(;3omJlQ^QYeVmI<9zUp{Zz8?Ss0e*65}%i?0s!@BYQ^UHguenqjWwO2pb?nn%N z{ATA8FZ3TOAJz1CrMK{pM5tf?6Q8vrjt-jFzIJ>*^R~QHU*#b$#k9^l;Ws}vNI%%1 z`ooTI`}R7oU#S&#JmWoE{^u^$=ifuKxG-PCEp`9X(>UnVRurq!!>)g$pRVR7cA@|L zGSaC3La9R++wNQ9_fB!4VEZCL@t8u+6 zt-@XyA!}bCh<>2;k?P-Fe121%cam>>*kB+1!^|W8cH_H0u}#?itp|Vb)9XIuwa%`8 z7|PjH4*gN*8{IXnj)GlT7AUZ2t>NPm}(xZV|2jT#(7Td#DL#MY zF}>QKxd!{+^misa-udJmt6k7p?Pc3(WSE_cUFg54eAJ{rufv)iI*#!T z3ExtG|Kg+${uI5&Z5-?-q^~rt{;=sq8{hw`(_3NFEgtysTRyiuymtOGsmu4)?LW`r zQrnI-U!sMMYS-@nuYT&8XQBT{RxFMBx5{X>ecN_0D1Pr0HIM5~;Rl%~h`%3_Z^(G6 zr;V@vetg00^@Z-o4nH?+xc!`+PI}!Ocasl;0J<|5|`;n2aw~WAxYSM&P$P z)rmd}{YT5%M*VYPCg1e$iQhZLT#)}gCm*yvv~T?VFs<`W#xXy(Vn4nx@!A!?^r3a9 zg*AWrr`x`@!vH+?`M0P1-~_ev1kqnnqJ@rXx7X5qm(=ydrq9m6qUfmU?vsP`&GU*eY0S#{(edo_m*(4cOFxQ*{_#;we|Y-ce@6My3-pN}J4MUSv$!x1^V6@> zIGFmeLGnQLhxJyNw8jy~?Hkscz5Dqa?e`Glaowcp)Rpn_&*}GHgJPqTSk?Xp9+2-1 zz0u7bUBj4iWDG~?COKtkA#q%sKwbzAH{}LkOHDNfu`^0Zv zarn$|!ao*#V2gJw=atuv|9e?n=)<=EM1SuzPf@Ju`LO%%k(Wyby!2P--zpXu$@mnAIe31uQe@Wcx zl+S~jXLcVyN9NrUzxAzlq5ohRsp+qEoX2B6wLYBi5(j(+R!=iG$v~bRBu1J+I1Ze#TKgc$uf!#QWrji-*2??a^<)DO}#& zeEBQZs(ycZXcm{0FMo^Pf1{;wMX?IqbUd@2JmR`>;_3Xb3;pMp6*c|g`JcM~hW1H6 z=nJwv80z@sdu$LsY_N|%?EH$GfBddX76yOA1v~!tbq;>}{TDoncKnH7`Ll+_Yn~l{ z(oa_km>=^l^dBxGHT|`Y|1l@uyE41TcjdZ5;-GhBbj*1p7tN|}NNme*X0TOwjWYyR9d~Pc-^4Z=&=2z&y zpv-L4KfV56^??U|_`uXpUm$s)@u{xnUq+o7-%xov(=Iu8{>6`cpfmlDD^I%pmXeUu$OGvojbnS|GlXv`{b9EwR`~jU z=baYzymiHIjC|!p_-+3`lEuZo|K<0Ox%xpJU%b`YJ9*4?$ndB0!!Gn6DJz=v_pXe- z#D3hxuqGm--w!?fO?neo?8g_T%{b3bAN_mSWz9uX*B%@zaNrs=;H8p+~=_s z`|*W+f3%mIc;ybE_tyJ&|L~*pnTNgq_OiHmkojSQ_!VPOvGPtH)xQ5Fza??{>t)|R4rOuSdsn}v{{CoC{fc50dgnaSxD%}DpKF}{)GqXo9@Eolo+9@# zFg;ot2Q`m4=o3G->85?8`4-puMqBGsUig|Yym8u%!^gei=&;YoJ6rj7U!mT9f6>k2 z!gi*Af4T_UFd1K}iswW9cK*RBU#^(c75c}FM*Uk-$9#Cc4V8~L=p*Zx_YKBv%J0fJ z^lv=WDIdJ#nN7S;e%Sfuk3PKLo9+qi$)9`W=_gcr>HXhqyC2Hp;@8#l&$-%)VpV#Z z?c`CrC1`yn^W%9e^pBaQKm9B6Kk}xB@_R-6DW-93!@(>r)R|w$t@)kgr+H>;oVq3q z2cG%W4}I|yD+t?|{qA1NdhIv0(ak%F7-~M-Z=ru)W|;o2w8r$6 zo`23`*!Fuozv+CvlRW(7f%rk@%XVRY`c*!7;W3+dpZsvh?qB}+CWkHx^M3sG$={#& zB=vUvLo165{p450{~wu$4NBHaZ2zj8^$BlRDw$uQfAlozpPo-Pz5(qMTp)J)qvQD* zC#^Go9KG64;Rn+??}Xp{*dYC2gX#~h@2s?F_95qnEyAofZ}LL5|2Z;?3)>}_e@1RV z%ywZ2ucB0s*>2YskJq?iGT%b~UREqk`nUD|6Q)P2%r5dl7fri9+VT>=>7_c52im+e z?zP_Pu)#EM3H_Z3Z(FPL!YZqFR(sP~xBlR$BjLBtU!z%EYM*~|^(%^1>7Cl>27hgS zU8yMaA1oi$^k;sG?bk`Z@g;F|__$cr+BtpU*wUqxs%x99&92d||^)&c5N9>;DoqobruZ9)HacecARuU&jHviyv3K z)!I9G47PVvn?7^3dxG>+=pRR_>F+eZH9p^T&o%B8H7~tiDC00s5Pv@;-;nW`7q((Q zzOdfUuf6e0dtMwiIPB-YzvDeW@mh!e+@A`BuQqSSSIl%VzRksv_MP$;`VW_xjrtFj z(dzxt_>%ECZss_GzQMFE^<$^#T{(KuALF@b*6k(qhy4%eJ+R}iCWHeXeSD)=xzoJX z+2`M`@+;=Q(Ce>q^@H@IDAA%%Cy&~b1hEaNa}qD~Ur;`3)W0Qp=wdp+18Uy?`1Q9= z#L{Gr#)uBh$mqA7AmA1Ag{^S020nOrFJs?LCjTA2@!5{i31PNp_{X zS3kd7#7_ICze4|oS+UggS6#V}3i-TW8M?B%dVN?P&x?!UYI{{bd7$wn@w9KB@?nGN zxawz|;C}zsp(ihVHf->Rlh(TEfWO1v?qzYY>pX_lZp8}_ujKgvC0eb$lgGw4bG&Wy zYl)wJ3jG(Ao+kY{4kMrO4W00j*Z!v168giYEAQ~=JH9>? zrhjqMR}L)y2Kr(D&pZsf(x1&Y7n`+rJQBpV^FQF-K!as6R>{Mi-1?FSRROiw3ysm*+;x4827v@Y=zHy+LJ=VzR- z<%(ZFZ+{>wqifN;2zrkCx?uDFRZ`WJN68p z9rapg=bu|yT-aad^7pgz*j@~^jy$QVUZ3{;SK4P+GU>C>KW5bQhoAaUsk6_~J+(Ct zKK7f~py{W4`1@g6=XE^uV<&wy@rH?0H@W}RDPId~JbB!4mtDFtJofz;^91$!57TEb ziwkvL#})4w{nOtcbtS{<)GqXo492f~me-Ox=HrQ{sCiuXMLuYJ1LOIKH=&KEb!MmI ztKGzdKWwwhPj7kIv(JXD9=++6Z@YC4e75~B%;Lg!ZpZ(F>IYk5r)%%T!}qY*HvgW) z>8H>?GBxSn)^W&ZKVvvBp3m~!c)yL`l&|CP`Z6BP_sVCujQ+6ukymco+Wfs?|J7Hy z>E83I`>%5S|40@WdmrekJ*ddIU@P%Eu^az?xhrwzSLmOYo+kbAlh6Ln(3RCyJ-_22 zAM~!ws$QSol~!TvJjhFtxS@+>R_C!5)B4m7-@fQ?tG@Y|iQ$oTwtL~~Ri?sA-?iia zDGm4kWL)h()5Z9vio-ia)txxoS)qT7ptI#`9q;4J!*p*cuZ~X~gvV^nV|@G|{qVXs zKXuvsr^Kd-aus{8B^ zwmjvL>wo;ph2-)1i}Pb3b=HTDnGpqF?cK#r=WXL^Kg1=we*TDfTIZcq zm>*k_x+V;hXaD!WDPLMHto!W;rXDqAuGc!Y|8)PFzcYURIt?8R(N*g+UA)>H2U?%G zBv4O(h5qx)NKJpI`8tmMo;6oL=){JPJZ#YVNc1P(glV1E@yw5{*u;ZBtaJOrcW%Gg z!(sB7C(Ii??NG0EcKq4P;(|{0`$x_5pw1&Ek697tdqC?mmn2?S^i$|RTt=GoZzp;g zpZdKktBdm#YPatY8-6gYGynX>=UY4($2-Yy#HM`phb_*!eCpqxn;o{=;FHJwcG5g} z?Dt1K-tpc3VQt@aP;>l0q|0=+A%#?+yU zB6%SFz-Qx7=OUfW*YcLq-&tqM?(bQB{Zl*ZJbC`dZ(jZb@YwNBPsah*e7f8L{m%Mn zXy*li`{>FN}v1BFdnFVv{g6jzooq7 zsa@#5sEpL~*F5$srem%=pc9+C_B~>g4_Y6}YyRQI=UZI!y^}n{W%P%w=1pH`!Gu=W z>ia)?ap#ps=*zx;8P##XuG)EmjISusLPxdx{{BVz3jIgQ+D84;^Uto#F7jQuuEs+z z(7Q4^X8+@&Y5nniahgy6H9z$r^T!4o{P2bF+D&#jalZ>f=ePqOo_=VL@$CHnU=|nC zgZmHU>Q@x2T6-stjZd$e!nXZIE_w7*=s#L|Y&<7%t6L;C`6*AD*S>OmzSW_NqWK3G zpKtM`&O6Dsaj`-2u|f5R@V#j#e{zd`L%8s$l^%X7Vg?XK9ShuXfUc{(rRpm!2aZPTrn#FfXp27eMa9?hq&=1=+y8*TKa58mDWd)WBh z>m6{}^{?iA)y_YU$oO{tjq^A?^@H@2qLVy+x9-(8B)=m=r?&lTUHW?>whKee zLuZij%w}9g{2=*F`dM7ZtL3R5z9tO29Q~o`tA2GTgs&}l)1NM#1TWXk7_au3^ZjEl zi%YuQQEzol^3W~yr*@%#tZ3B#|7X8HZPFcoxZwT+CY^J99&X)m*Q1`C@D%#l_MeM> zwtc;B$?iWaRn_}p?f56z{&?LL`nSs3M*U-S%=@U?)bo5YKhV39bxd6{K6xO=!zmxO zcO_fVrz_XhJiRNeCHHq;cg3`;rkpsdGhyE^F2CueJ9<}E7oQ(F|34JXjN=3UdcFT} zGH=PM#!c-u`OLekylOM=LjSys)b!W9wqD0VzUcvP!e@D|WF4cwi)N*t+LqtI=Nsy* zkEVEoKYV=4?#I78Z&CQLQ#76zslQB%Eme}+SKXEYiGag7DX!F%R%}?J>#|4+sA6}gCu_LzHHt!6~+w|36 zxN_Y!uS=*?;-p8P(M>2onpm!zfGHn0d zC9CsJ@|G6u{|D!uy7gn9-)%$cx&JWd{Xv^guHz|+RbQib?fOsjYDo>eh5m!3r>4J? zJdUHtx9whe7>_vUwcc#TSHuspJ-|nO%~sxJ^>2U3GqEmG>XiTl@Rxz+=~c zMm5Ih(QyCaw7*Oj{OAUMZGNg>q5n`>+Ni(gnNE3`UF7Ex?DH&f&^w8nt-8|>e$e>3 z4Ry-rLHMx2KK{`8=+j@=<*MDntZ&V|>5ggV!e`(AF3jRmyYFDGenqjWwRiHE`L%Z& zZ>MqQTj)Q(%xu!1;{emCrMw{X9TGq1QOF1B3rzjkDUxsKbbQ9e2K)HKg5AIJ)J&iL?OKjHO$)k39{ZB4_c$jCQ|8N;;(!X~7 zp^j(!A?*V`(7Td#%>JK#oATjJ;^^aD$yW5~%5{P8xoGzF(^qG;uh072c?-7etp2t; z&$#)HSHfeTzebb~-lq@y#lEHY)v(SHG8L{zPX;r*@k4C<2_pR}H#6j;QZZ>);;s^HO;d(wZb{=L;U zfZsmU)#K7$%(-fr=Bdo%SDSA7@{T?_q3%S_`ZAN~bpw3?UgC-UgFdGkB@ z_Sxd6Z_vh7Uh{VspKo#c0G*DT+DqsUn|$@EzukPn&%?AMzcuseS4{L;XZwHchwaSH z|0jN%kJe|p;BTuR+jQ$mBJ=AEJQ5vE`lr7qHJ)5}yc2{EO#Rk}>P8+&KJzyJQtC`y z6NWu^c=G(q{6s zzS8Y0f0w@hYTxK2*<<#DjH77!JLM&wi_QEB{bL5aK3Y}3?B~f(`?Ke_m&GN$UeW3p zUlG4lRr@#mpz%}hq{95ziqth>SodvfZE)h__k{KCI`YCD?>xh6ot=Lk$>PF1oAkHw zTbiHrx8&EKKN1;g`YRvDQOt+e56=sI5C^@JzET@MXz^U~;WvLWFX|ZAY^U{o^25yi z|1$mFH*6fX|Ki{7`}^Q@x2zNRnx{>4cS&nI#0V*Ag_j3)ib zGd*HC2R8AM z?_J4K@VIh4hR;Q_ub*+kdY%0?J-|H?*8AP`S6=aG%WJ*qKcakKRA-)Rz2c*{-2T(*|EIYBCv}Q>r?~p?>C+wiMKIHa?Z#}a zLpQIs>6T}4Vcv!QgQcUUzxK^}4DyYKc_|VHy(?K?LjOF5E1g?n!=Lhar}2Li!|^XW z9FM(&$j;|9Vb`h`v0EV$-Ko|t&NZGzns=Fzn<94uh4&}^wjis zrMKArHxwkF=i5;GAr5+%)FpkB2iktMD|OT}AG4jt;ll>|_`^|$JiEg`W(bsK3ybJy3m!6vb1CqyiZ1U;LQ1gg` z-buaLnnyqQ!Bl_qQ>T3J!ech^KKWsfKU}`=4}NoW*njlYy?5JT0rhtM+h`USKdi|8 zpSxgNl#j}asvq%Y9?+6Q)u-U%Od zjdoL>`r!@Jw|eZ6XQqEEv@ZSO3Tt1q4bQusf6KGDRNGgv_n&;z9PiYxC{{6V)xm6y z+y5`9O&;$b^i$}+psa1wf3S>>*^ldZ#)qwW%maQf%}@QzWZpe^=voTU_Mi~|2&IJ^}0}7`w(l44lRv4#rcX|LHaE8UsQT(`VUAR zYug{@;;}xQ*zj>46dN?Xlox+LOzXUkXMSwOetcoA6F+kPUswE1SnJ74|NP2D_4_Z4 zWO1?UZ~NT;gg&HdjQ-sJQ|HTm&i5pfJ`4RvN>5FHS9%NoNQCM*F&EqPhksQ3?Hk7D zGY)-%<{w&ozQy4Oo#eygvB5Mi@rTx*e!crUzB)Nh1Wao5h9wv|nW@ z??0^L)qa1GD{o7X`4#$)mYyd4CA)fFP4`?P-U-5&+U7?WMg0BHbf#X%Ge5RsKfW;V zinm<*$Il)c23nKCb`$NmZ#AM!w(Un?0e?Vs_N($6?y@As|#)tx@RLU{AwtABaw zgVpcvxc@f%iuV7rjHf7FR%@G|)}fpEbID~tL_dZ8k+D&K-H(xPI>D#o5eI$Zw?4Fg z@<95s*S*$T9X5y_*kB)j2*bN>JoT&_!XEFx^|ljS3x50kDLjg~9RBot;-@Z47V_^pqA`X_O#Q$7#&(b@8n{LX|Kr-ZW~I_Y$e zsHVT>@j5b{s0W?!5C?tY$2RO`aj|^(6wU7>&!Jai)uOpiOwPo46?3y;~v8}i``kMDK* zy!^nK;qRaLOmY9iF7ria`ysRqf5$CBw=Wl_UvS51|C)9v^>+P9!k5eKPo_4_oBB)_?Aq~9OA>gU z(`TXof-+LmUv=d=7V^{Qk@1Lwjd~@|m*sc0za(DcapUtC-%#t#c3E8D38yT&{u$lCbQ`_FVWe?ZX*7WywNAJz1CrMK{p zM5xaHw8XaO0Y1%ZUpGGA^d(Qx{DX_nw|G+Lo#Zo5vzO2xjye1rKmYDQ7l-3!o%Yr_ z-#Ho{e*b7XXkY$ydjFrEjterLqC~6xLGAd5{GMboze4{-rN_o~5~r_zzeDrnN0#FK z`ZH1wKWKcar}=fB*eRxUHlFgBO&@*yVfx&gZ=8PI{$bPS|8Vwdw>?I^ZU4P2E*z)$ zb@cx?o%$8Us`NHn^i(Ek57k z={QdEjSm|n9~)GEc;7KUe{8KMt_~-^{>aN`?z{p#cKkV<#l^mFYWV*P>OiTgo{z@& z-=x1n|IxCxroR(L?xSdWPKPcA8&iiKM#5W9y|WYv$(K* z<>$xu-=}PU)YG_AYzeaVP9E9)_tZ|GKjzmN_)+YvN&j{(eB>LCj_cj{xaB4OCjBkH zD?H?xKXzBVK9$dK8U39}U;5_W7d>-#XPxywckE&B-X1>N|Bq&IvGdRL<Vw^q`-f_`Opc5nSrypCPG(KSk@8ew^f+9~(^b5`W*{ zpN=X&*j2kV9>uEm_hvhJRQvrGJOdKv`6%>{%r*U$$19=>uS>%ukK-=#LDNge$KMar zI{GnxGJZe4F!73$etF*E*N3&w{J=*Kdhh$-v+uupSzK&?vs%3U%zJFUxyE%KVpVJU z;=HAI60`cA*z{TGpO>DR{>oqb{cp@J@zovUHi;Le}zyD%9MTyqe%n#kLo90*OA7@pp!)`#Zf?}up}{g_|J#ov!F?6dF8dtbF^LD=tIE5G&D&pZ#0 zo&Sky6x)}g{r^bfr!Gqt{7wJAlO9(U`u9qTCjB{%VqUi0=Vet<=gM`pom-xZZhKwO z7f2pxygEcGhN67@q^5ZIJ%iX zmrR~l=2z%HSVn64>-fBm$Tywf(>&s!cLiy&q_*|bP2#!OX@8dQG!E*x9vkfA4~IYc zv+g~UM#E8OerAms>mNv-XZhg4i5~Sf5&l&HM`e=a-dD`U`6j*ZynhBm+L;*dTro ze?KJOP{*s;NgqwTVeO%>Zg$IF?+R;gKk<;?Y_cId_Wfg2qnhu|@$WZ3_n7H}4lS9t z*Kx%v>eOy4!f($j^?4STLjU3Nf$_W2s>V;Rr!_u!Tujf%Yo8@{I{&nec)GvA52kgV z@zU{^&>v>}a*YSi|J?L2pxdpk@*$+FDN55{axuT z_9v?elJA|^hN=T`kht0C)s){--kzdUqz)TgMt`_!?x}|#yQ>S|ySR1pR&T%9E1!M; zm}hZed(9Wb7r-#Kq1H)uwf5>qH`70vAG#F!FD$DY_1~e4F6MpIMYp})-~+wJQyV|~ zQ;VagqWP7_>afYjUP26;{bRRl?>u>r(7O9SK6JsS_d_51{53y|3%}>_>&4$cPUdwA zN>;Uo2jq8)-bs!o06Pd$&Ww+OIj{*-u@yIlSDr&d&RcW^wVI`1yCx=9{aZ@fF2e?YG-gG|wq^ z6{%CZ(0`#Be2c>mYCb&F8>V?lewe!J zw84|tI5oZ@$omm zySo03&k^IefPNZ)I_zzx=BB|3~v|-o%sf zrE1LnhyTBD%GXt-PVGYf=t=ctyK8Hn>5(fBsC>jh?iVmukB^I0>Fu4yb)M*EeKLQ|qn|?m=xNkn z-$%%2yD)6hpYa+HKG5c!@LB)N4}=#R?BnmOee?SsJo5AVbk-TV@cU1lda&0z`~GV* zi%Y-#k7O-o|F7fQ`!D_E%8T6OB+hk2rc=x=^1%VgBMy44H(T@26F+FL zZ}POBeE1C0yd})H`#-O`#k&_hx6%Rd+xzcG78ehCz5XXxzoJ-+(ceh~-QcgyuO)ef z{(1SRroYaQ^GK!>^9Eg6T^%1;o{R4K`8V~my{Gx~Tk})b50kz;`RWfJdGppA&07BI z@Ua!v{`t-uR@a~C{ZBYQ3|b#~78m*!tG=fHCBOgqqur9FroR(z_G8vZOZ>L|YaVgX zCw}?@$pdZsP=52PZtxi{r60a<`Bifsng6T7aQ)s#9{FF7PKD3De;Li*1A{@??>D@cnawY@97MO_}DinqkJes!K^ zJB@?HvB5t6(7F8&Z~FZ^4+^{8_R$p{+2Aqw?fn0!jsxzcpTBzQ2c6jI+W5AX2=fPt zV;A}lmYFsEbsp_q#`Ab5wxQ<12NE}1^V0Dx-c{a~p!unD8V8rrA2w@^%w2fhHsSTF zzGbCnUj6|6*!yo!#{oHxWIRR5s@Cv;=!V_&`Y!YzDp_j!yV6@Nc0QyfK3+%gn_j~b z#|Fs*@%KaW4V{jU9@vWg_`<$l_}+(}I%#^CbKtr&p8j^V{qy^;`N|J&9RL4iCtk0H zI=*CAYwzT7TYra=_MN`36#B=Jn)Gk$ILyn&&t)9=$!mXK{N#h=f#%nFVy8&HcRC)Z zeJ!m&Y_sd!`#(Krrz^@mx1`se=h z-aiuFw9n;Rymhd;|0>)6Lg}aWys7z0&sIU5zuI zqIe4()#f@7_|xqcJq!I8mbFd#FZ2B;wGYOzK2$gSpvCjDn=!{t%c!$=B}=6TYkWVaZzuU4Ttk{ifIKD3BO?kYN{C^ki z|G)M-{0~Qe_aD#0XWu^#>o_3Szvb#z6syopbuin>qqe?p5@-8jUSOgBXj##y|5V{4 z-#f8WBn}c!ZTuWh)Z#jyn%`w{vHB)k>znez>GL)|Y@a7a!=;y>^5F-kSJ%HC5b5o@ zH+!AKmup;6ti^2qn%`F4ypv2r_!K(>kHv(V{#w^gxBon*+viuWdFTbEe%lUoT=GDB ze@fz3r{lo8to{&A+VQX>AH6cPFFoR;PaOYV`0V^cD~k);YvcQ`_DLU7RXrc*Mm(J# z{DuCJsZsxa`~O5Q)6+>ush##?apujuwBGWu!8C6P{b7$CUw+X?_Z|s*9QK1Xo)5dh zZ@)i+M=_Vb-yF2pLpK|@c+JCUd>e>|_)F*?8EX3L{5YC{sGB(MFO@#9*Z=Fy+| zMa_V!ojxBPJO4bSmEA4ljLpCvmfz^1I{n8hKJaY*6_lgDz z!#{s?)8nf=S>69i&p+FF|B);%w%sd_SB$|iEBqkG?KbaZe()Ch=Os;}{u9e+b$mk| z@1O89KhV3fYD}GrX6C`~q5o_x*L~xU;V$ z&#u26%;IA6Tqk}2dk=YxD_Qk5@%9=Lv$|aEWPXMI(bJ?qe)=~)r+nz6eYf`)Klvbi zfvF!mMH`QC7`Fk_`qU0vuY2dz&;In5F!Ry_Zam=AL-5=GZ?>|y@cPb=*Uw+M`a$|p zlxVRXtKHs0^PM8R)aBwY^zW6A(Am4v8uLEBso2cJw$ohv-U$*1Q$KxyjSzK!0zvSu%8DCMNWAxYKM(-q2 zn_pM_h5m!(qniHkJIP}{HT{!3t~(+hG#xb`e?Ls?y!K;$Y{hiti8>Ia?Liein~ZuNVsTAztth5kcjZKM8+gqeBqx-@hW z=YDnV&yCNwI>t@(=Q=a;Qndc5XS~#2LVuWd_h)z8>WwqQQS;XL&hck&1;73N6kf$# zF8{pA=G&6_vGzJ{YTM^8__~q-5A!VapI=6r^lt~LBcJWU(2b8<9^*D3c_4kEhxw`3 zaXh$;{;BuNj|!b*;aaEXooDQaHh@lTrH5+2s@r|790kW91jCz#f!cG&Qm)4#L%YaRpj>SDXsy!PE< zlMh;c$~S-U`Bvv7?w1smzm~85FzfD99)J7ucZR+0zv{A0AFMwAa{i5a#huZ=t9>Yn zRp_R9W;=Og+rQf68LGe7{uh-MHT^Z7K3}}v3^fnEK%e++TkZSF7b{P~}T;IsYz zP!<=q56}O9IrS@wRbMkd`~M5gPv?hS=pQp``fFXgD`S|Bx!CFRNgVV}^2|mrMf{-g zwHoS_&x7z`gAIPh30wdA?zykt@Z(|I=f6FipLjQX?W3}|m=1$#uMy8bT!UVqM62x- z-My0tx|u&0f4bi-^p7*Pakbtn_Pzh7&X@Ut)SHbiP5F5i7y7N$IgNwxVS|1AolP$I z;Gge*?Twvj*-8I;dd@wJXTQH5)p5YC+PPv&FkQm~HeUaxe9W)VKQhh-|yCcKk~^`F!g`ghBWCjHwRNImo7^<(J9$1N}Mr{j|c8lU!O{-j^8 zyvBzO_Q?;sfAiuizw`9nVXxV{zTy4@=fPv&zqGQr&`;C%?~+yMruk;;_~-^dd1mKQ zQRv?*A2sSfTk=egq(AtGgZRCZdb71Z@<7}FCvmHT2h==|4L0~0r}6v81GBhrzemGx zUtB!=iZOZ2%B!D#+p{Hzoz4%t(0{PZY|>v?ig?4k@H0Q}qyifx55(UO;iIn6ZWuS^ z50i!`EI4WTXTzkqSKsl_oZaBF^Z%n+Tui5CRh#&OI zQ~O*-9dU3O{b8qD{wS0n zt}j0EpPdc=bl*+i`0VZQ+wsq$EH1VEF5@anR()-JT>q)_^;$=q`4;*wELm#$YaQRC znNIK}NF4O8WF52La?z~b$A`qW{Cs@e{EXu?4lbiVob%HcHl1?s^Wp4+uDtcnE6qd) zyZ(7p#~D$S3s6pEyS4ql_90o-8hz2tYo687cUNi({TG#w8ui!fsB3z_qsaBv?Z<`J zJHe(p^T*Mv=LtW^eBd)=d>v;={ox&3?(&y+opE<~$K4;^^morZ0>6F#o@a4EuUt+) zOf>zw5@$`l*-jpD9~iGT&ojKtuh4&_tf=X)b?sIet@JS-^)o(vpm$}}m^$=QBoCyo zln>jxlC9{|mFoiGbJ1L4fB5`O7ry@=uW{j;o4+@Ebm~N}^>+N3D<8P3{{OhF&1{2WJ+4_cn;min<%OzXVU@s-DH^{4fz9k$!| z*Z$kLjTYCxopM+2#5VkCftQ91o`RT0(!=?75>JXs_{5*nHa2Hy(TEbMV{oPoBkv z?bCDrZN^iStZEGpXy^Z_>nd+cka-vS$Bah(cNm`!e~R#d-X(QvOn6RX3mbQ{4*v zV@6H?q>kSKr1jY7MIJUt9*Dmm*7~Qf2JFWdHvHGZ-ls0TFHD*9o&S7dmuKn6_CJfV zxYUk6d+G;OXYa(%8oTlLcRk6Z&qDwB4A%7LdE97xzCGWus@ng-2hxYx^s9&;G~OhR zKJXdB=d!pgslT(v`RDy+o9}+CGjXSbzW?0vQ>m*Re~v2e|N8$g|0FL*YSLf2DB@jt zWO^`vMboh-dDviDhrb`Db>0bo6ZYc^llMBgbN%@@1(%&M=aOq?@x0sbkA{>VTqEB9 ze+K(YLnnFZ+N+=QHvf;gcMrI1to#30DtUCeF&NB@iy4D4HCyB`4y|j?wV9AMN=R(S zgmhDSNVpwxEZrm#)g6&<2Ms9_8IeONQ!1W_Tgf4Z?hYiCe%D%`&+D`1zSgW~-!nbW z@B3YUtoQz$-q+`P@3q%nd+oK?#;)l%ER}KlFO{w7uRM&$yzG4`mwKP%@xSxrgQ<@A zg{3-9#Sf-+HXhGE<113vgkkDEv;X+EZ+$vUJ9?{kuKLOy;kWPK;Z^K={Z*cgTf7sK z{ZHBl&jbFbf3K)8{gd%i9<~<{o)o<+SSwwbCs@nR%XItyU*hC@r*V)zu@$Ln!m#_- ze!1ysWAlYA&RcoAuM6_v z1HJO3Hh$3J%&(i3i}}<3ox~Z}V}nik${)7c`{^@x3Lg#aD}Vd3e7l@_(|+UZQ`0}0ANlq?wuHyK@yPf7%Qiv4s6ih69<#La8CRALq_hMA3pim(B(Is zUw!}ho~&HbcrGwczUOPE%xB1YIV$^>@QQN40`umrbjAX9^CpP?` zJu5}O z_!T9qT6^^ypX!F4&JR24zp!9w)PJWUy4ZG{muB_-W1^So=_Jo=bm@~%zqLB$^B{cK zV1pmNu1d@-}d`0;lrOIec*==#1FDRY+|Ht&#er@}w|6B%4=hqo{pbVJ)PV*G`9|7i*>W>b@L9g{@(}yB{kiJqrZ1@b} z({U#1@2qytOP5}>>rS23zVh>%KKYtssOzhLH!BzV6f5pGT0GLY6GS)bpL+UMJL+G0 zYWkBmBz&e*F1Bs2Ngkgg$p@_u%}@QG+I`4fgTd|Bq}qY4%AQJzCv=V1MP! z6P#53|IwGcQ@^5EQGXrJY>l_q5)W~&ex5((8TBtCP5Spe{#Rc3K*mdL>!+8Mi^W@F z|J%p^{q(=`!0*jkYww}(m3QtraN<{1@J`0H*p^|e`%fCQUAQCt?93IMXXr-PzjlLOGfK+ zSmM~A^*gZSIC&|ib>1yqeaaWsdieG~KYGZI!diER4_tiF&hVT5gTilmOn$Do0PcC) z-{uQ`Rs4b?yK`>#GQQVeM0X}t$^ zeEUAF6_ev)db!fH>;Jj$AG(6ppYq}FhiRSH@yw5{*pDwO2^>n641_P?OWY}8-JV_v3vOZlCm=C!vK zn|#p59awUlycE+q@1)-R*otX=YKObl`oeE6`^lBzo+Hm4cyzZ#@Yw!mO!+~bm%ZL| zjq7+~RcqthWPIL#qyNGp)AUz;yequ%{HOkofO(|)<4@7MqI$9K4d7L*<>zI3bsW)3 zY^OE|AGTu3o7$ZzZ##JKA1}O4*y6mEhw~GJ+Hm2TrA)H&)VU%JN|IUo*!EfPCxFU zA8xkBg^XkSpM^S}qV?BRzoJ;x^JnvN5w2xT^FQAa z)$5Nq=(Rqz8K?IBb}r*se_HRGknxi^<27M8^1;1s{m%M74@cbf&|}xP_J-fSe_5E7 zOYQvUp!yZXs^;&V#ueN7csrdJ{YCxD3Uu+MRq?VPBj0$KzanuEp47(gds(?y9KO`= zB+rB7f%p~SYr=5u-Cut4&eJA`i#C1WZ4aL|%6#nm_ik1$Y@hjq<>%iGunl!Q$*$I3 z{q0r7hi$0*^cVGS6`4)?Q%AnNj`E@^*12L`Vfubyb?BvtzaOHHp(}^8<3^9I*pDx) zHf^hYXMfv=$p@VIo^Nh>0=#zqXOZ$N!dIKO&O@@RwRiG(;{X5kU|v)->Cbh%)<;YD zZGY&*CJy?<&pbi$K$~B8JRkY+8S4Dac3HXf%kQl7p{K_3Ex*>8e%BSxy#E&aUeS*K zllkT4-(ST4A58dVwECLyZTqLsTylB+Fz>kimou*E?5Cf<>kJ|@-(FTO zwtb;PlfNbTti6**cK;c*@!LF&6J|8_SPsdID z*eRxU-U+|?u@%$$)D9bcci@L_4=aY*C+ziN_fr>mt+UTx319y9y~_ijbl#aRUh`9% z^PlPXsh&~);i9&tzv`48&)9yfCnI_%SQ{ttlb0fODX;mpkF>5y|9<|k`ICE`d(NBo z30uwk^US?|eTnUV`2KM$D;IR~{QtF6zxE?meeIq2mO}rLqM}j%9fZ&HXo*eV%$GRm zT~ddRisXT|eI;@D;WLC!$LZ&1oW}dV?UR*D?fv&H<^2y&Gkzy|>DuDjS6g+n{&T5F z?YR9fC?bver~g0LRi2)rj>r5!?c6O{sOeAM0^u{Aaflo}zw%+HX#CVW9Y3`v z=norAZT)%WtFH|kJo%n|S2_D9Uh5jqKm4!%|NDcZSzK!RYhUP$EX#7ojGrnR!zOd$6tAF)ZuYD=3IcbFp_g((4ypJ^=e{%nmJS!LUNarKbYJXI_ z{)hTx`-|KESW#QkpZRJX^XjYrmIYh&dCltJOU5VOgy}rJE7+@fdRJJ{cE!3rZps^` zUh;uwUcdi`f;)K5$bH9s)Vrc8?q~NF>UdH;<~ZTr)CQYYEf z+BjG;xu6)m`F*oV=VkSW zeNXt}qEnw}h2A&ze)C7(+=a)kzsk#Y?Dx(256k~Q%F}sER<)lgv z`Ha@|*L?0HM85G2D4!y6&}+Tf6UXH1L}7PE~7sj_Q-!cwD}%;ha+FwzH{@;G5WFlFOBLrAbmOYD~eTLdyVtIE!gaL zK;{?qA1qjE`s@7K%11uir=jK%2N~CF&11g!LHYvCPo4GwFFa-w?~@pPd-Rrp!tWFoNsaE^-l5(m(d@>?Xyn!^v3T9+ds3}w3B``A3pp3H`j3# z?fQq7`W3~h=5Mx>N4y_0U$1`V1xEdci;9~5nx9@z-lRW#px1h{(Mu6OXuRFnOkBJx z*46yHE3ALtAGSSa-Sw_G`P*TugHGCgg&lVFu3(Lye>wgf6Px2=zs*I(@>$boP~xo9 z{eX88;W}{BKNr7ug3LSWKT>#%-xbzk^ByYA>ie$&v5nWMJtW9|WZ?%bU+c}^k~-`Z z(>m`Y-~8Ah{b7UZ51ViHwNI?E#|2^Y(|`Ke4_-SP9{c`lC@UBH{oRJ;1rSf0&f2eJ zRXU}%-G9JI#Ompheg5pu5{-ctIF2(`~5Zj z=NiXui5>M{SUhUf-xbm7_}umrdFi(AAG{Nr>mJb$G`)0u{QWSkV;uAAxcK|=h4o+f z`b$r}Va2fF##{dWrN{n3o$de8N74TOBG1aj=1-mzKWndc?fR>hWbpjaXViaD5vl19 z&&EtX$B!-~GoVyIOlEkL~l9+W42C|7cOssDE-kp|<~-sQ&2Hl;177h<%XfO0#bN zoBqxkpFix9tGC;?v(}d1U+a)pUgnk0jz34Ta`E7H*Oxb-_pWr;_BZ0K*7VPL7;JP; z?YRAq71d4pa~xpXX-mek=fSC6vc%(vo5Us`v~g8m^A9gM-{P8Y^^Auf8|>o`=lp)( z4g7nr3>SXm&bHgko?p|yCH*kJd`Ru?coZw@ruk+&d1U{8L_Y~H^Xm*eR2DSp-<~IY zi!Fw6Xiayu+&V5AV@& z^6{AeGrs%Aw_xwouP9cv_Ufnqw%%da{OC{Lc~&k_|I$;_U-N3ef7d$pkKUCuc6^%Z zW;Ww0;s@>Z*&5Hsd>F?NJ}3G3;cLP$=d4wxWCOQ{)(`Kz@q7DK=bt(L8CJeQMgD)v zsjcIRRjsX0jia0SyIS88q`#=YD?Cm5`!c%N@sHN?x+4zqz7IcWeQ1C9`(awgIOf;> z@b}{j-MP2D|5GP*!#*$j#};${Ym7Sk{FTfvm%l%CHecd8k1Sc}%fA28JlkFqJL+FD zHtFBqR0d#PwLaYVxaB2&`U1%VQ(p7aukv|t8T}pi;m^*0#XzgG&YQmdq3gH!K74lm zc_=Fv`~3A*-TyG2qGZ+A%n#kLo8}kw&kL5C{;u#=i#>lW@fn{J+w|&=x6PmQkG~&U z|J3Vv=Eqj-#~1STXWqQY>662}f4*wlV;(sl9{c`fO#0`zBi%3BytN<6uGZekF@@}mB|;!%_S zJm02AOX~15J~rQbk`G#*j*q_|rgh%w_{;-au^(U9?fpO7|KNWf81`}3JdnR(ve!C0 z|C7uwKfS#F_5$1Px>>m}U$LsSck-C`LF+#ke`-hl2a8Cf{>*bUsmHI#ym{SXgEnrG zPrM1!I`5J`uoatl@Q2m`3(h_Mu!F-U_n-2@y5D^oe%t@|lpma=zrW7a4?3~awed}s zh7V8;ETW4Y|Kz1<-;Xk{en?*+`SgLEBKd|+$EQzh#k4-P!<#P~ znX%2?zZaHj|6$jk3`xY^f7jOUUvl*;iWTb}j|8#V4_M#1#_3z_sQ+*gG5uX(E!LO5 zk1!AKifA>icZF3wKWScSdsk$~`n&}D+~A$q^xIgM zee>4?jMw=2_uz6l#Pk`=%EdEIOKjHo?fsX$u4FJCc~Sq7B4YfmuqypIuWWh@BysFf z@wcxLBp)dwq6Y$vex4o=f zJlJskr({(+neF6JyCvw==KX^4z^MO%BC}EdUJb__ZGk>o51plW*AT6<+!5_-9c1H$B!UuRpxi=9_Dr@l%A) zCwVQkHBMhq|Aj@w#)D7uYQK}ni|m-6mtgx3_(1Q*$4x)wCl9paizIG!$_HVu!(p|Ov+VTIWM0nj>UrzmsVpVIee){VBKU`g@jruPt9-00r&s@o4KAw1j z#6j{>8$a{1xayuhua=*Tx2*oK-_P#<@|ULV5qbygcizYTk2AeE|2CSHi|v2rl;3}? zNWbKRl3lI6lgDx5OrHex4Vn zaj?lxyaB^%53cf!kDb3^SmVqWFWC9)6TId%?SJy$mR~^LZ~6>o=fh{mpRKH1&?oOqE53knrq7_pouc+9R@71b_WgU>e^)Y@UuWRq(qa5s zZ}Us?&ELS=Zja|@oU{&qKlEuo*bUf^FU&pnBS+sD7KSZXILd!>gKyIp*KM$WF#W%% z_x~Bp$_2k?eAPjsQGd1h9^CSheDb3HWkr+z?ahUmc{TA72gyrq{LI7R+IMaHcUieu zT|XP1hP-gzxBj%{WuIIahR?Zguj?K>23|Y<>}KUcAHL81mn6Gdqbs_3Co%l+GViE= znNibU>!dn9uS4>P`ZOOt@~}Z0U-iM?57RpDw2uaz`|*XTE6(wsc;AX)%KcmY@W8DX z!f)Tdk10RcRlAkofY|BUJMnNI2KY@kt;3G`m&~c2)NvmP=Ed=eq4E(2>BDTsZOSjZ zh|jZPtxo3!Fa4RVaq5~dOu6{lmG61|=faeq|MQV^{(d#}w*Swwa$$b?3+31EUHSqg zt6JmtPJHdIp!JEZar%q;=S60t{?m%+VxPYfoxCfeRUBUL3UB4NJ_g47(|ooI^J~6$ zQfIh~{;3{1lVAsDH1>tm&_HTt`9w^qV4a&}+TfjH`$rwAXJdHWL@~Q|C&z z9pBmbOX&~$%{=APul{r|?En2Uhvyz$UH`nl@((4*{hyO@B&*WfY|TSA>r>~&JegnA zf3T>n>Ce1}g_(Tn4Rt)eH)OAA&#|HcO!xx@EXV2r`dVl!R znWuHu`jkDN_Wk=GPrUe_ ze|>^_yZ$pT>FqecucY5UI`yl5V%68)i4Wbd)A`X~)PK0BXwpADp0w@0CF7V5Lt^tj z20v(d+JEZDPBE?XPR21mHmLj_8&rR|YsZ71dFHA+!|h*KZRV?I55d#;{nr}h=ijx{ zd1ty%Z?=wSzyGD5Tr$Y#`H1?D6p<$VxsC;0(muQ^s;m9A<+;*b-;XEyTYgLOtv{`= z*$FT86UEM?t3QAIhko(t&T88(+Ups~n)21}T~W1!I#;@Fe`|ab^wU>Zr?dM;d%1_gI#17j z<)!zvyw=**)zzp!9w(x3C-`!N5|lJhOj^8z}_H$H4I%}e~9RnPdq?1eY|sI%IZcfazj->!cD zvcK|rOh@&4n|2(GSfE~U5Emyyy zSk)RH(0>2rB**q&^xYDF)PJ;i)TDpy|Bt#-XU9KI?FEAEj|uWTfb<2LKVNdb#pxGx zk`Iqh?KDs0@P(ECa?twM+)(#h|6j;~N?k2q{pt6WR#%Qv zZ7=xyA^I3P9Uney#eRI@&|_!Ke%WIm4@b`Y&ck>8tGfPnZ{=-`$NcE>`iI-l!H{va z52>o=$NfL8Z+Mfwqy8mRO;62hD>5&89&;Jdm(}reGWkCcKlz~bk@Af!Iloq?<9JYc z%$}e>Z20sIS04R~!7%F&pZ)M|^FGAu(B6MXvU0KCf6dk3f3?)FC|0Gn*-jqW_2+6c z4$mL+i~5(0HT~gvi|~lgD6Nb+i6+jnlW zlPhi+>8x|YqUZD{1U{N z|L4zt{_@C2PkZ>}1>w<^Uw-qKZg<|vJWT&mjm7ks;Ps!vQf(LM{}!2kJ;}t5`VSSJ z3G1I1*)cya!H%n!S$`)y^pm1JzXLj+=9?cIO!E?dm_B;lUc0^NYhn5yR#Cb(H(8aqVJLbD$9Z~xvulR0ErzJ}UQ&9C`kk6X6gZiD4M8ulDK`r=#Paw>VI|4>#gY)^ir^8C+{ zrq7_p8K)&SYwzTd-Ty>wbn-okGry?+NZ~PlS6GYbn*5(T&$po~)9pMOygu>U_}Vvl zpz$Sft5ZIBm(?GR+w_%pU-$FL;o=9T{9(D1u7}Uwe^pQVvHM>o<7W+v@ogvx)@PpB zI!@GoK@q9x?%V+v{BGtqvPh9*+(7@rOM>cg>?4 zj(#b;>%Q-w^_DZM?Z20mi;dq^ds(kPd6U-DPb;x=v7`PAi$|uXE39h!PhT(Ww+vmG zZpRDloyF#P08K~j2ft)DAGf4+-bsb|u@&KM!Z7=_7p`{G)mMZ!oV(nI|NbBK^Uvt6 znBS!5pA)}y5p~jWQ=9#<^_fdX+E>(nQ4y)>?UcFf>7$7^Ougg1e_sD9?+eqmU3kDf-`p4;j@#1t+4-MAoiEr@TT!g)`LN%A4UErY zJo=0Jj}{q?`nw`pZJ%j>-WAcs#+@!U{2=3k`1>LGhEDivw)*?=h1E9y(bktuJ0Yxo z)JNX&nmaz}wa(7}3}xkF`=M3!_eWjzyYVPiwf0UPv){(%{RCe6jQWoi6{bJWmm>e8 zO+NMMeu+59xMnB3_(9{Y zda)Mc)BkOUKb;?TXFv`ZO@DY5xsIr&hmH$BanNghYUAg1Wbr&J7oJzkbLyWc(pN)% zIOm}I?|f+1!f@6(+y3Hn@2vm+agmOvX#Mrn4<_SCwAwz<%{z%OU*-pI)W2k^>971; zM@PQ#f^a_Dr*?t z?|WZ=>beh(gv0MRyWM&2Yw(-?J?V%2ME-2~{o`cz*Pvuo`x~>-LqE5AClSU;G3uWe z88!Xk=}JBGuNw!6^8WwZdd5|htZMDmZ+u;eVW;!Mj{29G6V^XhUT{Ec<_Fri8c%KVz^1tQ zosJ8_iw!pT87Hi>@srnWu=cOQ^!4BR>`QO|2mJQ^doL>&wtqiWuRnobQL?JFSHJOf zC5BzYVTnimd&Q$h{q;V6^w0BV=!)v<^MU21I$$f32lBf1u3)S9tZ!GkGY`Ut4L0~0 zCwzL>m0$DG*Ut~1{l{*P|Mbn({WlKC%7yFb4ZG@xS5dsx+WOSE-T#ofuJSlR`ic4v z7M_~^n#cFh(dI62FH(S&)1%?a%z$7j}wiop-`-er%9_utD{Qss7{Ff9}wS z!`eeTY`53D4)hwg>wku_a%?d`|s zdna~^@PWk59?&{P{9x1TX<2nM<&A#T^7t3LL696B)9*KP8OZp+N{a;HSC#{38@A-emapQT7Zz=s@ z>-T@<o@q~!#8-XxA)(LS-JH4|2xTwx;6CI&LzV;#UzjUNBu{Nj7I(S zJU02ZT?{I(6C@7$#BY6SALN0y{kO*RS)KC1yR80j#9dQg|F@&&ha;ZZ~LDe_5atHq5hf zvHY&s^lyHhuXh>;Q$IFH9;p5he(?EkFM4$d^JjkZO_yvsgdX<&%b1P>cGYf;N3p8y z-)tw3IB(|FHr)m!jve)1Sa_QBC(raq`tYu(j`iAS)BXfqLHPP1`G&3>uJLu>kFT@R z&mQ^x9lJf=nY8c7=Fj-+;I-?2ddjb8zrP+-Kj_pR5M*t98;y^9H{PbtsQ;oO(xgAv zv6FAdJ-PBZLE<2NG$DB)+aYLvouAin442U#rhVXh2d(zN>%z28taRkuU))T8w*3#v zd`yom^!FExrzly~8Xgecu$dRg{G$G&1&fX63M<;KSV!!YVBhaH=}%sIzbwYFKD2-Q z!d@Nk;|J3^@1(-~*oxFOVOaOg_w9V)Drbc0!@t`hei7-l&bI$|>H>YM)Z^cAN)(V4cZaqRHo6f{=ZD=H_>J%m z3WIlw%A0U117&d(9&itbO z{GfN@ul19QpS%<;KYbojJLSg)H9xHX+@{ZdV&M6(!4+4wHp;8tpZ2nHvGWeYYV-S- zTx~_Ms{ORtI!^8Xue;;xnP=4B6_F{EGJfJB+88B@6Qc(G9z4eo_BcK~>Y=$pCB* zrbn3_^T8yqeT3AJ4_Y7EzxkC1JH@olGoJF8JwbnG<@JuA|FbXtt+Vp+zj)YP>ub}0ip1ap;lYmj4;7g;{dF9^ zS0^9-6v=BZl5xEgY^pQ=$oM?`ppBz^9@PHKRzG!37!G~YyyuTN?apxc(0#Yva?mjR zcK&%RD;K*C>+O2~+g$yMVpaQ_)aLgW@S}TbNBxJ3+D82+2{ZHgU*G@qlJOdJws| zosUPcs_n^a?bq%xlks`|lN1v2H++IOyGYKQ{S%zi0XMm+)D>^3`}I=x3a;{ac=W z{_@uz9(LMuhxk449`M=rKa`aV{dw*`;MA`uR<-u(r(bk4f5ZHu{-ar38uj10h%UAt z&r7rV{i73``GG$1W78j>?=8QfKg(CX8qZSt!|E4&^gQuST`99~tZ`WUq zW#wYWKlIg7zoJ;x8XmBA|9^M}lt=BT|5)Lv>F*?u*OBSe62Bb>XdZFUCw}?@$pgs; z%}>9|M;sori8tiK7Y<)%`@_$kcYZi#e*45nZ>+BW;{nm%_k_TEmAJ1ogCw>n$=-iagIAPt}r%hjU^GKMn z*1ns^FW||y^Z#Qy4%k(@Hy*{R_P_9WCz1A>1qX&u>M)!yz8qAri2X-f6elbTs-VGZpWXktXz0~ z=ZBSlKzpja?{y{4nm){S@~FLuAaOhX(R$_?^)D-G`X@Z(8=ua{YaVeh^HKJJY!TcTxL;C!DaxgL8j>-G$-A zXWifD-G2#w&i|MWgIT%w8G8Q>r?#S4)jFM5?fRcwa+x3Vj{27o8`o)`V%v*NzKz4Y zCaQnG{9MOvHH_yp4#I~G_VI`H_CNHz!3BqhnSZ?RlmEK$QLlA&{NKyUh2t}yp2t*v z$*R_-ixVHZnLigl`!)28`ge;+O@HPwDs|-Bek>Qi@oQfDtK;*n4n9Tm4=y?1;z^x% zl5gW;gXCj_>JM|XlQ!Avf(2pj)CW&J<@8Z_O#h**ToUsDFC>RbFJr{JeyTpMO9{MaBo&FQ)x5Pw#X*5I$_M zk3SqT`{-kLIO6(n$lbpz?t|#H&dxt{vvM(=r|SJ@;8m2Y`r143)vmv4i9elR)PJ~$ z)b!VJxQ?Qx6MRm1iGx1zn~us$9%%DT;#Q}8@Gh&rGxZxg%{l$+D|Dvrcf)F*IQk3l z*!Pb^S-IH$f3x!cUk8}%P4xN7>l!W(~2lzfk7JiRjfKezqo z%GXtNiaH;U4W@oQM;(cf9^)z_RqYx=^@YJ0!<>vukOBYK$rgE}s_fu8?lTt&%>x@jL~YaHE( z8+IiVoudAug{Mh>-iOHduE<{O_2Ej>{vWdaUa`psJ$az^&GSfJidIK{T8}+J46{}| z`0O2D_(zyC(`|S8FTV%B9e=j6a-lD~|B~uv?|<~8`BGKs(w?q7PLcdvY~~sDA1gdH z{mJi*&$s81IH-B;-NxrLPFjawva8n#elV@`PAbfgtw>!Hh7B)0c;?XOb_g5(=A|=N z`uLjg*zxC(@`I};|35pZaps+i&DuM8)K-L_=hJMBcLp9U3!3!jJ}S(|cv>=!S03VE z>bG&VZ}LF$$umFm)N#^0vx)b~4<|0V=G2FO(h472_jjkh=gt=6+3`=Fm5c4KSN`PQ z8^lv^-l<&#aDqW)#ajrynGKf(j*e9;SJywt|e>$nzA_$;6CK+S86 zH{^viHg+%W@z<5Z8W(=xWBdKQMSbJ>&#Aiox3Y3!`x7hfH|Qh5+V_u6>&eT-j{27o z)1Nrll|0i)>)1Z<_rtW$e)+`HI$w6X*u1e7scXV8eT&zA>YG=5Jk!N%ot^(t8-Jc> z<-&Xoo%&TLu@>9^uRlKSo!W_>QU6vEG5yI??0fyE=B4xXi61*f>P%bL5srPwd1w9LmKs+4AG>2?f3_M z2Bk6Y6o&-aj^GFB8%+J!DUxrw4@ZN1jn`Gu<5bu~T*&4(nfY!Zll7R$YI4pz@9-*i~ES$NrzbBwDS#lgAVP|M5qAMMX`2 zS9s(3A03~0)%15|nteZDd5NEXLGnO;=j8j;5r@ZYtuy~XRxZRDC(J$W-Uoj6;_JfJ z+Z}w*b^G5>+^&CEq;-na)#k1Ik{$I+#%sSpc@jVSF?i$lKUiee^iTO0Bz*8H!pC(G z*r4^7A3o-T4KB0(^bOKSYO9~|nlQ|mf6i{F zee!2vz2%So{I|EP{{L%VonLP}=H=J#&n>ihtA5N!vX`*^ca(=dK_`Cti~0{2o>Wio z3UBl;EW!3K-ickqh2n32N)UdKenIp1mYi>KCvoaLb`vJ^w0!l4ohF~U|8o~!65f8^ zmM33$_ax%{{>#SE{I?VzzV_bWdDCYwD;LXaiOm|n{r`fK2>ukK{v$<(@w>u`wky^V zdnHu*=VIIbQ2Qj0|6N5tFdY}au>Jpd{$Yc?SK46tZ)_Jf`q0?1m!7gSeR15z>le(+ zuYW)0unn2FBJ=Q09@+h$$!m$9*AerN`Y$LVjrxyB9r-psd`{w=uU+c3y7pRPlh5;K`I<)@^e(AW8$ZvZ<>!(QzxnAutzSle7~22k zuO5A9Q8;D&$L||Hx;p=}zl_i4FVOmEX@5GdSoO7c;^T8GI@FFfy z6C~gGv=6g2zrCi^lMh;+*5U7m)(7JyeOX-n{rJMVLk}Ia{-w)@b-(bcuU)p^XX(q% zelaBK^P*ovCvo!HpC6y!RA>Id zCFffle)?{*C+H8`FMN3Z2VQYW*!j9_*OO2Axz{?||8%o*;dPjQwR{6OgZ+jf{j?;X zHM-dMFHR!7-ih;ii29Ee88!WtpYwR8M_2iJicXL?=o3G-q4K2p^kshS!|Jd>9mnR? zpz+gJLs|SW&lJ{cb$ge*cp61#f5I zcV%It{_6@a`D_>Ic0e5TE~!(S{@ETZzbpB~&7bt`Qyy%vPkz|*>-Qcq|9jVlcc1kB z>;Lns_Pci5|BPkjV%NWItN(vWe_%3>R4umuQQf?gi0MY1@|^jhFs(e4Y=F4W@Y$^oL#czw?YQov{H8kdx0al5@v^&WyYfx;1pT4& zt1oQ)mm_{2I$Lc1vnyte@H({f4})2`m>!eL-#`8sy$qSZjw@Bw^I`X&(YObxQ;hnz zidq|wJjL4eZ+T3POS<1fFVMRptNL7(ZXe`y77!5A;srsg0la4~y$Oc%IC! z^RYT?P{*OpF!d{cm^*okXU;w3sIbjj??3Z|iw=a>^dHR1#q?iN0|AfuOqK{#KF|hcp!No z+k?ICm6v?@44DskU>|=t^tsFTx#~Ar_|R&nzUr9I2Ka3M-_6R!<~ezn;s%tvT7Swf zS@kvjHlF`ce&!wZA1YWH^-uDxkCu$>ogjQ*>ZdP|Jdpe}j%{(5l}lg0>O>vmgagN( zJMV(Q$>He9e?ROmVH6&_|1I-Uoaq0bG7iXo1AhEwYkyJy;Vdquzxv7NIF|lX{qd(r zJ^plD{Ge@bI&bqQ{lIH{E-M%O)HPu^>l05NbDO?7#!Bk!^KUOYhLFj&9gZ^Nac~D5#qB zXBh9~k@57@);>+gmiVzj>m$i0-h^qLcjL!3Tm4Nu_=Ee--miM|35NuC(#o5kwct+b z+r6w@((TLcKdu8O@Ga2KW2Kglm}FP;-J_1)W*+v7Vj!=!e{x)XSU{il4FNDLMerfd2Kgy}MxQgv%fG*`jP5Nw~62X{C8tTZIk|77sWi%{>&bh4%&l)zv;pKpSxMPpubp^-ezmR zroWSj2bG8Ubp{?Q6Kp*4;F(^A`(FP5A6U~z`S6nm+UrU4%ugMCfy?L*dtUV9lMC0q zBc)K2`N1FcFK1HI-^mDVou}#1 z6@QvfUi%h_lMh-SDc{JF^PB3tlYHaD29@7qgX#~*J^IjRC@h4sO();(g6COMM zY-QzwK6&}~3s0Tl;ZZD;;);5+HD0^_QdctIWqwirvLe;fD-TFM95-hKZ;USy&h`E|1IUEPVK0FUPNm8yTTi}<`W0KOX?U`kvx!mn|G3Lapkq=6C0!s8&rR|dy7L>Irw$m@RL{neC}NvUI~w# zf2LkB)5Rw|_)|2#X%fL^zr*~PchtXIWTtw0S9qiTdLJ|LP5)jQw*4P*&^w8zw)HcZ z#LF(K?Z@g=2lxz^(I2*(cEY!>I)0BZ{~vF?|NN6?(~s#tCVkbu|IO8}C|2y(@kr44 z((hlA`JrRfzgJY&^k;r$bg}dQts*<-gRV5~d{uiVvC$8tFA%?ESN!-v@(me}{;(CP zYr=5-4m+K6?^m`6cfaawd+z(pm0s)Y{PR#&E*@N0zkkft4>G=@M62zG-=SFFX6T~ZVsUiqC4MKiPm%o8-^UKyU;c%aA6?-cVTZfkJI{Z<`u*u%S-G%( zHe99r`S0HLzMD&4)7m?Upc}T$yC-qx9rYhBJdOIVG(O*WlKI>JFZ@*Uy-VuU#&7dk zI6jZQKpQWuPsTUiKK>BS_{>e;+j=1E_{N2u>CX@GdbaJqC;gcI@HP20E>(-|e`YC< zQ-lZ1#UJ$_DIPWH@5^Yle>UCnqAJ$8VjXd$gsgp{Ao_u(qmFO>e98G1r(e)XzVTs$ zX;Kl|0xj`}Ys zYMb=ue>_c(yvUCEc?lCg{y`T-_(0?93a|O8b2=WljQ%k9yxkAIaK_GIn>jzdVB zebs*d*p&?8ydI+di;762{`x<{=)(Te&^2|1|BJ_;qgh;P`n$p#{X->G=RcWuikipwNazQmE13GRQzYNe zNk27vg8s1WUh8dh;Ojmg@+r%Ie~(|B%64S$zq#@&=J%BQ{~mduWJP@(j|8#p`~y0* zG_E%DjNAWM5vl1<-H6nY&+E<5N!;FlPZ^(Yb?_zQGavF&G@YquywskcKWzNDmCt$m zcRm_6-*}G)F8SagUhA5!f5`8+s`vn~!1Ni+%EkK1#b)iDJmPy>Y%u+Q>dwI9Wr6X# z(zN$gvEuX7Q@>j>q8oYGp!J*RPrM1!I^Qrp{w5y$Va;!Cc2>9hkuYV`OCR2E#;d3^ z{dry$-%?(Fwae4&uMNr5ellH*Pk%Ru&30zE~w*)w_1Dk+x6%4*_9l| zWB(ELFDq*LQ#T}a%!fL|vbuVoXdgB{-|FB~G{4qkr)d3B&v>alL4TMteCBhjedbSL zV}I|bPg;J4*E+lZ13Zd(r~Crg*?jYC+~VCh)Ak#jx3vB>4<6K55c$k#lMyWj_{j{&L2-w)F|@1)-R*oyu5!u$R3l=FUlV>ss1H-G2Qu`PH#+x}-z z`N1_x{nth}Lmf}DtF>1@`vGjmvA$KWsDGZtrKZ2?#OsK88IRV(PaI7BHm>rL2im-n zxYfDXWL&%}))6(&yTY2dKWzWU3r=XAen!~gWvhJWxa+F(KV1Kimvq(V%T+Hbmd|>p zenqkBYp-$p{);|y$+f=Kj{0|tNKJoNc;n}+JIB4S}dFCG!9?LJgsB|?y z{WtL}r9XUYmr474;IE_My6Hcf|NX}Y>BsgzW71dc{C_fzWYyPR^Uw{u_P(8GmHwl(f6Ho>{lME zzJKTZ=aBM)Yc~A;QOBbXsj6Ow+&7ze`uaxKsQ+M5+oXSc6=7yx)Nz~%KY8sxNWOPT zo!a=RV>^RCMa!dpKzNtd4{w-0w%?rxo{@+3{=UsQ^JcW^$F6@!#>qMUO#E5H;x&)| zKV@7H{jj6{Lj_fn{`kqK9>3{OK8taGwP(Rryu|E^@xSJZ#F@R*)X;?%Wg zh)urf-_txdK5lu5A3l&g(0G&n$oI-;m_8rG`}o6-r{43bn;#zyJN@?Tn@`!Iy8j0I zpRuf5cwJgw@G44HeNDgUhHX4usbStx|B=FD`X}S?JsSDm71=T073=zTp5~=C09n9j4cNlz%M2x9b02=jvA!tI{d8`5Xj4+f`~u{TCLsrhhuGks?}c z=jnEUPW{lvReth$KCx3w>%1$viT$R1y3%duX;U8TFztawbI#}2(UpFU=l{FHYx8c2 z&DuL23DQ@a?;q(?>v$gMC+febs5JeZ@+k5-j(qDQ*F5DX4tlLOn?9QIyUGV@J@bLj z5I*w2KK`)V1;;$~%!?uHy4>BX{P21Qzg>Sgnw1OZ-96twCj63BUwh3%H~7&VM3<=l zXcm{6{yGniE6Jxm%_pz@RKZqlS5}8Eism1aeC!mhe`0e;-G%FYT{oBoRx0b}2XVibJ@T7WD*VggLH=aS^ zA&)qi`k5z49%!$tq(AcEGaSgu#cZc`gP(CiYl~<8_18;I4I4jp{O9vK=fiLM!=sr0 zJf8i(YLfj=Ir%bLwZ`wY4&BV3OK!Tq?F>9o1{(EGzklp1PfyVaGC$BKe(ba#%U9hk zuJuhebt(4C58J=}#vP9O{r%yc&!2MS?_YX?zU=s?mz9fszU2HH{V7UT+-?VWq;+c7 z?mtPr#(5s1{-vj;KRoLSFY~nRG}rOm__*aIe)QCbEg}2>% z`cpUGHzjlzUV7_?U;P)xweNp>S-F`0E9w3JbM=GDFWzeHojk6+|K{RHKl+LKmlZYr zOKQ@x2zV;eNH|w)2xx|@w)W20kn)F8(@{Nyqg38~XC*zP0rgixH zA^lpt6Mpk2c0azbiwH3vx^fp_^vHSl!iFmLl zHuH@7cMFe=>m=?K`5Z?+{uGIW#8VqT+fyyx%F3nYcjEW`Q0L#27tVR#PUju6(^$A{ zpO;^J!-FfZ{n-Ccm1;yiK=g-KQM#$G85iBK4O>#f{G$H7!c)_q@$w?N*ymsTDduHb z#{cfY4>C^>e?Rn2c&RfRe}et^!Zr_d&iVb5&xW@wI(_TyuAG6 zs>$|#tol(W+0`09-y3=-G5Fx2=kk)agaWmkUY?~w}E6lwY?L+ z;Zph;Cw%#jGe7+0ne)T9&OLv-Q|B+DKfC^_r{jQY%M)PZI*F%ii)&x#27fv~UjI@5 zp@OSX|Md8$TV&VsnIGs~k=6XVArJJf$d0_)zxkI^XX?Ex*dm`R))jB#|9>5%d9Cr7 z%k8)4d#8RyvFdBauO0tr9r4BVA1+vG`fGme{BvGp$Nap6nqH=xlepRFsE8jlzOM3W zz4eO?!iNp^@rQjUZ~d7A_W4kF&zak=c$+hwWW4YBj%Q^@n#H{kaQ2dcX$ZosVy~ z+?)R65O{3=Kc?e=^p&e$QLOsf_%_r00Y!KcJL3tX$~N&sMvosN;%N zUo$^+^GhlW*Nd+qI0Pn!;}{r+W8=L5d6y#1B^PneCT<4bn6CSLph)2`$& z9(_jr7Zs5v{ke{ge4Y|DaQz*aIH!|IG6MS|2SPS5d5L?VUU}z8%D-9wd$(^&c%VoAmdth&CU` zz~^0&70(w<5riMKJk90Dq-zwb9i@FqvgWe@|YFpiK64&vF z8xMZ!!6rZP1mO!??sE913m&;PY<*tw`|B&b=Gp(hce8Tgb(nvqy#L+`rq7_pwT^y0 zwqkqxtXyhw`c~`=JSlk%yzP$g(7zog=HiE+IEdf7@p<^k1DS6c$2Ojn4;y5D*r58u zMt2`GYxhZ`VfKtSY&ZPSitzV6{%K|9!gefHwT1^oH|%tN=os}cJG1B438x1ah;2IQ zJd77x^V%O9pWjqx{(Q;#7Ka~nl5gWYK~`b#|Dy%X&x7{l!A`w9miP zE9UL;|Cfh7Jc^kv^kKHf?fegX>SsLW7xgbAHT|`Y>v+lMc{9{J;-J^@%%+b%`5gDv z>gXSY4;$>`5A%-P_oY7%pBm<^`>LJpUHuTRb@u&Zo|Oyp%jNH{d$#>{HSQF3Jh7_d z0(9f~@#Oa;llevcTZN}l|Kxs3Jb#96JRiJ1@naj7qb~No&b)L!#-p~?DQ|6F6ZCgh z`{%B|o!7cA-Unjuho-J`EW8}=*!%8iRxYNyQ(IB2`r2!p;}GjJnIF7S|GZ$S>90KL z|7o&a7^2UDf+ZgRoGyrdp!K2o=FgX$Z*llRC;7&Q4W@aCKWr2R4taXlFNN8!+6P*gFFC(fr{j1~dCXQnbxqh=ZKtu_ZusYCI;(Ac{=0tjmi6e%j(-NTaxpy^ zC-v)iUa>5SE9~0;|K^f`ZuA%R?-dan&lOg*bzH6#BcIm|`)Tir>}o$^d9HNZ^=Mvg zc;T`9T=I#VpZTZtOY3h5f0(|ul2V5=UKVXU(@lAWK}wut$p$TcWEE!1xEdci`tt0%&#lV@X(i` zlQ_?By1%kI_!RN?L;5siJovB``|*W!Z@A;^nOk$wLsk{N` z)3(3uW#!_XVoQ*(ksb5%66|#VALyOLQ`>aZ zzAfHWK2Yl|9~(^PH9>!v|GKN!di&!eA)KXWXy!fBgS9=>ufE)SjR}%zoE* zzCHUt_Xx9J_Q-Xs@9`V2bzFbN=Kv6$$OAK7c>WBX1lnD})bBM8Z`6Nbk=dkwTjxo> z@iBiz^4fP4Y}NDCRA>H?GTn}o@Pjs%xN@hDc*O~*@ZyZ?s9ZF|L5zNr7AqP9{0$--=Ubj7y)q4E(2y-Vse zkABGmd0m3$r%w6cg~x2-ee%QRPwsK8UKGo zb+bOZ%B#ryqW+^rq^7?syz%&FXF>AqdCbKRKXK4IiJOfsiugfvM-TH;r+n}-uGz#J z^5F}cK6AqI8y|H@*zC5y-gWG|H>2K;e|lNDnEo@${r^YE10}0kr~ROt`MZ*v&M)de zRzw>0pDT6rPracVAGf^3Z+)mfJ?Mk$bf!J{^IH!& z^lj9$|F`{4sY=wPo0SWBpm?jb^{IWew~?6n;n6s}oq^w%iH-Veo$1t4UQqdngWgG= z*_uZ`_`#;rQ7)W6KA>7Ue*U+Yi#@TdP5V|DN;;_rtxFX|aDwUc=@@pe}J z^z4}zTzW)j)!S#>bJf50hQIIoZ=RKl?SC0xQKIqnqPAUsOCI&uU8#uryW&wz|I!=x z|LOe)T19ru2VH4eUi)dWz53CCzCiOYlzi+I$%oI7c{r(`s6QNc_A!^`uRbH3^3VH@ z+j8$5e!Kqwyo$Npe)u?>FZ0&8j9#tj1Kr@qpUyApUotl8pML+F%UHG@II&Y+pZKj0 z?Yk+Tc@noe<%4%={qThi=6>*^b2pn7Hro28i&wnnH}LTO%W*&ciQfN#e6aC4t&h}87gy7u}~XFAbeSykyo9P}=!Q`^SxC2@EajYs?N z)Pu|D4?A2qxY_l8TM*uL=+(df?vJL>5Bs0S<4?v@lP4xYfY}YM${=(BGNW+4i!R&z;|yb@3nn^Ub%M z48I-!k7ng!-vf8mcH>d3`r7!?>u+M@E6|Av8#&M-tx7FXogFkG0 z*q@(x`I#?-_Ph7HG~al>*E;+DYf$;Y$>sTnI8?YZ=nEBiFH@Wh%7s9M(Z@J*blb?W}>u=Eo zwEI8jS-Ie+PQ2BcJ~?h7&oI#~>c60-JClm^wS|?+sFr zEz|9LEBcK3FDxQW`uj3kozG{#VTcZz*Zx$&R^>B}^{M&h*S@e*OzXTW*kiwp>q^tM zgMR+bn*SejXC5a>Ri*6^!9fKr_6C~f0u45MvMDD%L2Q zfKWJsh=_%>$Ovk)DJo!zY>q6gtU92@qQZbu3XOo<7je&dPFCLNFuP0V`%U~2Z=UVF z=e`jc85tRw`IBQ`-|B*A`X`_8r=>r?!QRmUF#|G29#2*g) z+)IZZ`u1tz{WCxN!Jl2!U_N&Je;|vC>9LId{%YcvtU|Y(M}qX*^ZHLi@(cY(%SSez zE3I++S4(VkVLU@8agIy>=Rbd6b{mVe$iidgGb)XT0`rpZ~UXJ|O>pjSZ@P z=q*(p-R%BbClL?!b=!shW2L7~e||^K^k-a9`G|wwm8_lP5PgE=fwnzcHFZw>9@KHo zUPyl!_{>{xd&jcRg%j_X{LXVW%jw6CKRQ19lV5JLxbQpyGhL`pZFD1EH^2VA7h-8e zec;Ire;uyn+Egl}9`UV)fn`sEsDd`0n&+yB$^&sI{WcAX?sr$^*vg z&hw}FUWt=e^FpN@;InAWFu*!+9X|LmCKw-4LwalrIHFTFQ>cKt!;YkEwY ze@Af(`m2kq|5{mG;J148$|YXt@5)CuUbH&fk@@p`y2g{2RYjgF*PZ?MUw-~izo3i5 z)t>*PdZ%_+d9|@setFI3!>TvD<8V}7N6|10rR2TD~(kET-|`p(sE z3DQ@ge`K!Hzp4MfM1C**Est@Xj*G2G9_W)i^HWD0Ttq*7VVm1l+2AMJt`l}x^Mtz| z-TEx_v+JKDSzIdT-MRV|#ph$M2tm&GU z*+qUHp>rG_6}vfCkbKbeO7$IBaDH8#cam>>*kCe0pZLR!Cx3V65v%7REcu;BKeGL; z#O?P72D7-B{;S5{e>>C8M_UqS&3N)~D8)ol8Zb|3LYuR{s&ntJOot<9OM; ztUJH1&ivXJc8b<7<2d0rKQ@@=CH`>S&2L=Pym9gH(bo^2xA%YShJN<><8T%iJOA7; zUjLb5`&%w?*478(D%$VA;crNV<+sEx^dBrARrIHBuH>N${Te!n^F6WVx!w6z2cM$( z^9AQyJgM_e@@*d2AoIAG$>iaZL?Lz;d(o?JdsN|a-x!ATpIJHLwo8J;7AGAJneDe=3IN##* z3p&X+K5Q_}OZ=hn%`g1+YcsA6n@pQ?_a`^*{Ql%&%A0p%eo?&t|GD(|W8*g@pS5@L zsPg%*=E1}K3jK%6ii-ZO^mZ2NO!r)3^Z_65gRw#KK>WRsd_&hguE$pF#TQn6=-fT` z`RVgv@)~=u`;OnenfluEpJ}@PqaUc_WyxZE>G!oXA06ql(0^`8QPIDz^cJ6IZ7WEA zMSs^Fx4gt}`YA7Ypx1n!&*s;Bi<37&f7pBOKG*Mf$T?wOzu0?jdG;1~?fQQ!iwipW zMg9Fl?F-#(-t^rNe&$!`KT>+?^yhVz{5t*Z`xlpY^IIKyDVo2v;CzcGb>2ySWnK&E zx9?x9|Joluy~1se!)Kqr4=O*na(Ve-{*QQG8`4)p@>zQ)kDBVYKCxR8XMTnL^GZ)e zec6b;knf$?DK<5)IociXS=W?&@={F4 z`Dg9W%vO2ll?UGxX6$`w-($C5iavJzZB+WP^M5BV0Jh)d8rSVCR%h*{M}by_|8Ye-v9Hd z#$oq#zW*{y`G*pGSN#9wS?O_CvI^ZCj|7<~-`gc_`#=5VlHcDaH!Pd}#GS;`?d9<+ zqEj!VFOYm0rE`4=KS;hI+K*Hf+e7=R z{r!3RZb?O50B zp^w$o{WtYv8|GPD=)?RvFY~KyaTm*MKU-Kod|~_ff4%FfXNE)byPL0i!q;{)z1qq< zkl?EN{kvTKieh#4ce9;5vitwlMmPEZ3;i2qh3U_H!MVamz8%M6RaJNLQe>VWarQIp z6v;PaKJjF|pT;$E@2jM(I(m-Un6H6N9HuYRjj8^7U578mkTtmy6}f1>`d+MM(DTIX;d zru_KNkDPPxChSk9f3EzB`P}&V@376gC2`i?@ko%qIBzG;{!bh%^pA{n`qLk}7*F0k z9(la(kPk8*h`$$-Z|IV7u@!sqg=r7Y{m{}oe=e;5{KJEL{rEL_Yw!P@d|$cTZ~Eum z;3KJd_Ip?|xqtmvOSZ^*CJKQCE|>lJ$6HTj_Rp}hEeVOr;1IZEfa z;azEU=HH7iY_Q#vm%a0ChlC9;x$CksPMzvqSzYwqj?mH3iJi;e56&_D8r^wfb=Ka= zV>|yxNA)j2|ADfiPXBa&uG2rAx7YFTr#j*XZGNq?o6hTs)w$^I=#$Qicn^Qr`N%oz zH#Yug*!|6ae=2LA`D*EgEnp<_}r##+u z$Kg%=)}Qi{2Qn}6%&+pVcX!2bIU{ zh4hE_UiawcXN}Dahn~OI);FE@QTXli$2^M*`&oW@a{Y(xk&G`@g^ud4{Qh1;dE0`_ zyU>4b>8aI!hwgm(GIZT>czxohFOWRY^lx?Zkq@7t&eLp{#idt%nDVP-&zSk($Aepb z!@W=6dqwKm|LLdl|Ip~CG0<AG{Ow_pklwZ_j*s)+hVd`OzJlJ@UO9;IZ@nu`Di?;}7#yl&tF7 zJMnSd0RD7-*oFS{%8H8q@Jtmx)2Su4Z!0=M;-F9b*oFgHTr9ta*ZOyo@4>#rUPyn~ zW2cki|8MSn%P-!3(T2CeZ`XgulozD0hWf!ivD3BjrSE@}x^c(<(UPN1fA7lZxbv8n z*rvbs;hp&T+!NQ-09Z*bC_oKi}o}uiUZK%<$+3Mvk8~eT;rg|1sqUTWaUsC{{=J)UKTW z!_%icYWMg3E;{P;=YIrD4|v+j;{=I=KJjBSUp`;5e0a4#^Q&$3%4U`;-JVe{|I z-+k!^?+aVbI_;I$-#r44z5gA`;==c-{0#m5XZRH*tF!j%2hk1No;UQ@%HmS!A3e4D zZz_58UwQpiKH?zbn$5U9^5L)4Iq3slbTgYc<6wj84_EJS+JC=mvEeZ17sJPW_J3ON z*zf-|vbcDyv*VB2e}6B};==LKJF(FVO#So?k_VD+ z`-$?BZ}A>}`06ms+J3!z_q_1@aN?UM@AvXsCc|s5zidCnoco^{7u0x`EMDWy^!NAF z&;01G&_8bRI{mq?gnZLI&*I{p;yl6T)q>=Mrla<4{(%MOTU_(4-bsGNUPymfuRZ+t zKR?h6>)*ch#eeMY{QlVC%A0qiU4PI%D>`YuRCQht?DJpxPk5PMp?{22c&Qr`X7Vfg zCwa~9b>~|hd5Y#ATyVa{lREDtf5Q5QyVpDY;!XAmd#(Mq2hUu(bN}HH$~%gWL< z=v~RW2>RzS-R|c$9__TCB{4*KX_~!|4bB-YVpyg@3 z`3Dx9Z}D_IC;7&Q4U&%ysy|HM<)o1%w%;i1aPnbG{`%kN!DFw#gIQd7f9_Y%@BeC_ zrW51jlF!;ZdDK*dzp~vlUg$r!tT5iBj?a;6_0T?ugI;-38$W1q=F`sNVtyyKUs!~v zCO=#A;$k{6PDA{xy^}{=hs3t+(3Uv!EA$^J zJ;v)wtFYl8jb`U}+*)GW^V6w4D%iYH5Ps13l-K-&3(mJVeSl8#jSpLqx;hN2yzT9C zU;V~$VbvQq-h79ze1rYc?*ANCesCTA|3T&rCjCfNN4LuFFXhTho$?g=&nqLf`nRQy z`PjVS0S6?nxlea~T^)YO?wlv#2fdRF8yC9({k$u^9l!Of z{lEvZAM*NZ{i@|MD`B>Cq6|^h)?F&n0!5$95nOwC&oesZ%}=!iNp^@b^!C z?d2L5iiVo@r>6x`#;xzTGBUtc&=mWeCSiMs%!M9+<%x$F8eL> zF7%Iz)alRns>!F`5MAdAfAcef$PUpe6*kBKT*lF4yw|w{717YV) zesR@pzq%J*`~Fu$`4#iS{*H=aRvA-icl4A0O4}&-c2}g*ro> zFL}+Ug_n43U9Nfa1j$Pgo(k%?i|Q}_cl~V{ z^2{dAIM|^2!yf8+sUKwCPW-IB z+Liae>PL6-3jHHvMStaQ?vll&R)6Is4yNOnt-R!c_WVxbR;S~5Q1i_0;b)xi+(CD2 zf5nw~nE%HeX1?vf&h>|O78m>8-Kg3vMc0jDb=Ka=WBNrm>~wyd4;A{i%SzKfnOEgL zyu8dV^79CGe#87g@4DmGN20IgC-XFa(ht1Gr{mzKt`5Vd`~U2Q(|&NQbXUwn>$(5X zbQq8fzo~xzh&~jl2h)Dg&HOFpbAt3&=pRS2aW(E0o2z%{+i|BMe)!=7sW)5uuFcQ8 z`KTw~Y*&-FkpBL@?FQyv{)xZ!_x=3sH=e!XTj90OU!of0_Q%!r`*+M2)cHs^owf0) zZq|P;IoRk@=s#FSD*7k<_IaH8;Q`@G&sWW#)Zy=i@Ef|a+oE6iTr_Pz?!_0@n!Wx~ zU*GiDFm-$~pn_UeN? z?z<)*q%RPEFC^bk`>EJTpLM)p$`6japzpzh!s_Q<@!|I93-H+W|ENZx2e^?Qe;Qd_ z*k06$x3k9ITvhY?6sgBCvaZe_Qcuo)cv4)#m@N zu~WV97y8f5ip9orrB&FjTo<29>vlAqTy$F;UgDtFJhRbD5kF}AReQnw@EOAAy7S?y z!*JbxU;NQsxAldau2}D_A2@p+e0KbYS215p-~Tqc$DvNVoi%=RL!Weh^jGLVQdZaM zzkL}Uw;$)xwDTYD#D)({{nm%hi##wru9_b`bR2k%ZxQ`r!?~|F#@_dE*ysT_Wv6AI zh0nhK)ym>x=ZiJJKOk9!ZuAE_i8NI=i|69^PLTN(`p+vp75(8G?9Mlxa`79#=J7rr ze$eui*ZfX+uv1Lyyylx98>DY+MR>!SxBu7jmw$N6F!hMrpFCixw|R}*_a7TsT-aap z{`mWc&h#13xYp5!c*h+#*44bUpO*OPr_g`2^ccS@t#R9RuGr+$mm&O`*F38`-|EOy zG=IL}e2XV_-bp_5HhUrc;mXsmUE#G0?+sT^{`su$3{}5>J*fQPI(q+qu6{+aI(nxz z=ONao)|uUqibDUf@=>k+gOX=@`R4;7M^h)xH*I`;mU*^|7@z?R-50lQh;GKu~N5Z5-4jdc3W;g2W z{C_x$i@o33PXGV9p?*;3A>PiKzM5-kzUJBf+7>_k6#B;u+pey=F8x0dO57HNizZa5knD&RQ z*o!Z$dF!`cz2m6M!`cU5wfdIpY~Z!d-hYi`ak2e>O+Ee(sK2e~6dQu9>C^7NMW4Jo zpFRuyBSS@hC;9)f`+w*|k?}!}hjx6^@tKdu2I0e2gs%=O=l^l`#rqt7|4nWB$@7t$Zzb=Isu|KhQ|!}|Lky5yVRJmR&E^JcDFg4V}C78i69tI#dyksy6G z*HPPb`#G+`Tj(DZfY+0+*nGFtlh1ZH)I8##PyF-+k_X~v|F8I!4_?bp{M6N9|AsSX zeCNe2X7+D%+{KeWx#HdM*ymq)78i8&E9vjg>wZJNWL4MRi4Wb#H%#`oLjQJIQLF#X z!pD3}_lEer>yBGq;G-z08z%I87NGrNbMar#%d^Up7ze!~U*D`tahy!_Qq zz;F7GWpSx|{~dls$?B}V`i*a<#IQ{_`pzX@=s!?Cs_0LilRWx2o`(3n#_{vJE98UL zhw|d@g=w9Qm-L6f7hkyP`k%Uyqn-;7efOE{+CR;v&hCF0%i?1C@2sD{=jsO;Us0lk zj!qug^*8cb;&-v6I}bkyKYWJt;iP_H z{h`(R%_FDY^IF(@qvIQYd2K1`?EHUN`M{d@zmiqxrhTL~pPLXz_tY-*j|$Z4?}XX( z$i=qjQxdnl#BbwjzvO{l^Z&Qcf5QH!?EBTvKYV%EZ>y7@eg7Ahq%ZsZ?U5`lmEYZO zsXx`*iJ!IcsUO>8D-ZK7^dHHJrB?r5-+%9+C*xQjb@_SOMbSq)g56hPbx!;qR35Vz z(jN}^{r4|_eA&~&eh>fQ%&`x5UVo3w;==Z4f6e3HkGyb>`O|ULmg>&)q4NFrT>0CA z=u+rEuk=*(caq2d4w6q_hB`iR&})5a;|DFCXK}H4I+G2)-G4}b@U|tB{tEraN{{L9Bn~hC<4nHo2Mx{BeBz)_{P2O~ zfpz-_`S2MsA0205{ryw@{-4?Y?GN{_{q1k~SJwYFb*6t*qtF9vWpN=76mMsp_E|aq zfu|ug%)7r&2CCJ6x$gXm{(TbX_r9AiitU{s<6v8#^fTO@M?PrzPVzjcyDGhIM~$HVF;HWc4+_TO~N+r_>;c= z($5Nd|1IMyQV)9d(+9elA3o-jXK^X?cjW`qKU(AZO22=U#J$?-d{aMmiqx?^Z2Ky2 znr}Aq=#`hm!xumJ&7Hom>b>DV*S+U~ci!ZxD2kFCX<)cshpz*b(fAdqP4#*~EL~+wZR)FzvnW zF8Vb{&C&_6G0YxVE7|7$*eDQ@(?RQ+}!qo>oGr0MO?uE}je}q>tUqiqDo@a5f^8_b$y7ublywT>Z ze(ITDp?|C7sM9|^&U<)3`fX=%Nk6w>9P2~#@k>?bJQY8f)_Ert=Eqi~t`5VNOJDMx zCr%j&Tlwo>zGtb+;IYrYhLj)ND4zd6ZTm+<;;gAR+sUKK{@)Tm`5b2p{o7?^q4kF+ zRuy%wC@vPZQ*%$T)A@TRe#1VoS*Q8f*01JQY$pu(sjI`V_G_n}f6M4uVS{|m+rMzp zmR{>Q|KxQ5gva#BWjxmG&t_|%T!;5gB6dG7^H6`G|3H~x{Ob4Ma26Nqqa}7FU-O!u z=+3t~##JpM1tQbY*p?p5IX*A2hv^@x3dn z|34mo*8R_=4u1Ug^}>1|U+F7Lob(&|vg@CDY{x#=^~=NyKrecyenqj0dFy=4*0|}9 zj_T*QgD!>sLuD;Gd-A9oDT2j9KJ^@Lyep$c{YZptk^ld?Ep_mx$oveQ-RT!{Q{{seFhdhzIZ#^qw@Z%p>=IRcnkf9OONrp(i+FJx!B~J{`BFxt^wjFVa(BM<1z%rx9A59b{qzNr2O4kEANlYZ>bPdREH1tB`bcNs+;wLUdj9l{YT2$TK%UBv+2aV z6C@6L_mAty15JPBH9zAhAG|iNh4jN0Hf)UUH*)=!VY5|^S#GNrrom(1f624B@cNeD zqrd;0_$8~OH#}fdb;C~QhriH&UddI_-^mEv$3_2M{f3ENX`b2WSeM_B@hYz?)YXh@ zeGu=FAAWjP^Oo1IZ-mG0I{SSWyUy=lwX?Wz{6nAo5WW8};g{^{+G`%V!DITjr3PIJ z{YOhrMSqWG6A^!KlH!69$kGrYZj z(hXbQ`S^)zGp-%~hqAcX`T7oe{hx6aC9AWhZxG$Ez1F36q5oLPQqf=Yc^#?f!Tc48 zgI?>+?vnmS(!zn-d#n~5L`vm)wz5gA~;$rvTZWsT4 zVQ=z4$?6<`%y#k!-AwOX{OSDq`~DCE75#M{+{bQuw7PlVCk}cid1j-FB7QJ^-kYB~ z<%5@T&DJ<|br?1~=+f3vb4J4!Pks5L*IsoFJa+sa%i?11!xr`aEA1EE;7{j=UFaVx zYV}`3`e8o2E*a{0#6jmu|Sr=aN)c#@C+AA*c`}dv? zzn%Y#W^wW0PMej>=Ir-|I=*Cg*2bs0VW;!MF7$V0bw&T^E$+)s&p-1ryU5QY*!w?p z1ih2E+32N+A2hy}@@l>Hj}5|y4fgPdkImR|o24#VGF*D`?&og$%1p0y_W66B#l`wr zPCfwePW_;cBi_#1YaHFI|6Fp|@0oX@f1`|4^jCg($hW@W(>&s!cLiy&n5}vAgC8{B zR!yA~zX##N27CC!Ub|n?dh~{AVXvPb_}=Azbv}G{|4%!M3&&}@|GA-lMX{=D@5E=n zzeGQ|_!ppmjMVAR>rh1}_?+?%3;v(K{^tJl?Rgh{{DWc5pWXGJYuxt>@3im31x<$m zwU^W1-)O51qHD4}ypufp{42>@fc~wr!p7Bl_S2>c-%EeupjRHV=|d4eNMDR=e(LB8 ztnpJzdOyIpSi?Y)34c19&z6Y z@pOLJ#qpkacs`!N27j=Q!X`Jm<1=ucjXX`Oe%Z+>jWv_7@NT3avuA2(io zewcdQrLS$g*E(M7c>U$NKS;eDkJ?#Wyi?Tqh*jvQ{>t@F)yMkIrJ~S(pnPQdD<66M zkAU@|b!-RxDSB6s77Kp(K>Q&4LmJ2Su4L=Xug(u&9freS3cK&S+H7Gf_;kML&+*o9 z7rp-Dq{1tT)mg&>q8oNPKlCf~A1ql+e^*-L`r1%z=0lyK_D>x2E~!hlD|sN#Gx{|@ zb;_6Wk_S>xo}u!DecBH+zWYQY?EBq|x0`+BWa4)Hzmdg-e)7Zg^WT>GLFJ=fqMiMt z^7$jYT2H@){zGMLo&NBGHb2H!gwH$4#0Jwk{Jju9t5<&WCw4Esu55i~q3}kU(`_Pwo$LYVm<~hYgFRpJC`VW^G#_vjN98dcFyOze= zicXL|L7(`s4I5cpEFWG)^Y=-f)j72%=nq?c`^b;%`N>y9e)o3g-gfYv=!gFHx;v`l z&==z>O0>E*zNr%PP7xmMoB0*`&n+`5`fDE7QOURM-cVjANF4NupT0oyKs!DvulcnP z_zWlL@1Go=xM$>*z4}*s=i3fE*PzexofrS z=*RRQlfLZwPfKlh6vbOz8=vatH9wsn+oRBbUdd9ce|r3Bl-Wf-*e5o6f!=ld=?f$e zO#3sx^2291L4W_++iWs>=I!6^U;9@d*aKZT&haYs3Z+xkp<|Xm4((@~xw#}BG46FQd$zQzn zzD>Qx?fU;f78lcFS^fO0t$szZ#?7B`6`Ol#zEe!%96$Q|{umQ%Jn|LUuWR*SJn+9{ zJn}))OY_V>yx@F`)3^5H>ySJ!%~OBa^1NfN{l(IM3tL^a?wjAg^#${q9Lu};iGz%1Ho7R{2k8qmKXtBq zJlmd&=nwlGG5z&ZzCIH6USa-rzyH$X=xN9QQ5^?tsZAe>;;pW|6JO>3f8kMNeue(A zqE>%RB)_7+_Cp+`53@B7-SC6PoA8kjpCNp)`_6vYD?c1`+^iSA{>%4=gEs!$&mP|W zKKShY?^qU>%K1-2{fc6Bjz6j0+)DH5vz6Gn*oFR)v7*1$HFuRf(bHM$@{qvH=#)C%@9{M-k8)a2z`x6JfE2E2W{Z0FZ-+1u5NN2}eo7Y16 z;R~O?{>f!uJZ4__($}ZF`~EGiVLW^NWnPL4`~9!9A9OQ6^H5&;DfDlZGpp6V*YRI@ z;RC(uVm9N~<>y5c#eyD{Iv2we)*l_hc2}+ze}7}U69(7XFj^rwz|^f7d0by1&3$eLRVk`G!RHTsj6Vp`{&@S7i7 zF|AMS@Z(j#{Pa~1ToayrH9P0ghi-w#UVn4t2bb61e^!1E>O8V!A>Ld?0`Q{`+m*fw z{Rhg-TK&g_hkWDH@x7BcuUE})cjsFjd0;MiTn8jC#oF=G@!{>^4?B(gYUYO*zdY=+ z*;Nm(G^6wVyF;{}k#3|P(=V4eYwvg@NWb>^uf{!?^jGLVm=#M!|0K`)XqDMTemjCa z-*`UoyF=uI^abMYh2$GL9XILEtG^dtSbfe-qu1>EY*_2#53ha2>f6F=-~Sp@evtX* z>IWHLQKE$|YV-MH+7EG_2ZjDaWo@1Q{Ejy~=xpd@Wa6NWlk|z7Jka(p%{M>wI*tb? z=+Qe1?#FBNPkH*(x36&g{`6z#KO4hV;W+c{)t}Juai8p(W@@MrMyYp`c*y;!iNpk_~|43?o*%tw`tEe!e3^r zwvNBCbN}-}SzI`d8qyd1ijv(~d-c zc0-rC{Fda?pYc%VG!9PCAC5R?YVooA`@<2F_POoFL$_u;JO3Qkalmk2T)b1iqFB|n z*LdaqS1!4X$Mz`nA1Nah{axuTu4}3;rbk2kw*Nb^iGx1z+qkM5d7$yNyZNk6`Me@^ zDfY?_FTDBXKb*1M*8R&|y2L(*erZ1W&BMCKt?)9w_Al9;wNK*c27lfB3jODmEVcTF zGCFQQPW7+T)5WrdzyBeZeEMbn*xqRz)NvQp-#>L|zb`#8<&^%lcfRYvn|FQ#e%t>C zbR2NKc>Q_%bR5a*tkZZ?b%Q^BeZwyFA1zsG^-u2Sq0Z2i)t&8Qy1D3Hbp0Dr=e6GY z#|9^;f4F4#SN1=7`s3l^Yq$KyQqw!ff8GD>xIdP~h2w;s@3qCx+WJfSX>K7I7SFqF zbSm_ZBUSW=uPt@t+w-W=%}0Oy?;H7`^`X4@g{8Xvh^KYlCF5c%QdfuJ+37#n?Ccen z4D-)_|Ik&f&i~)NUwPXJPLB7#omx9@osU!%`>~TpwfCRr5~siZzL%q;R{td5_Ivev zm&FBtb4#(|2U8vK_rkQ!JK;A!wqh^7F!=`~Cw_X)0b%tU_c`vW>v#29$Ng@$zqhiu z_~oa~FFt`^w@!b_>a4w!$9DZ$ZL3#a`YiO1Td<-(JWb&v-}-BdA03E;-bp;Q$!9!^ zGvC-v=lNjy$_MX6{o&nnHvZ2gzj9=l{)bcNop#2z;j!;uj7r~p?q~B&#+R(l+7s9Q zn_gn(Pv!@2p?|DI56h3<;`ub1#igQCTm0TBjtVxvB}hKVJVEmhE;!%fIv?u1ll+Rk zkp8gIJtrKx+<6a#jqf=0(fs*G;o^|7}{?XxCT+kD=-~VqUaUDmzowaxJxc2`S=r5PNLjU-vqQ8?k@1sqpnm&kw zUhBFD^{f4Oknyp>9{zCX zm5rsY&gO>0-@5Jgla4wAe!KpYXK_KFJbwN9o0B+RfRa_{rsJ9IJ0HK!WXt$@xYre+VR=2 z%_ru({_c@sbg}FIqgh<&&-dx~PuuEug6Z0;pYfXw$w>Xg!9xFmGP73y*o$ALMtatk0I@n%xk)(0{OeRMEdr;;c=dTs*cNoY>5d=LI%s z<0>!yUYOQ-9nbvOioN*4ikE!u;~)O~?6BfX`(F9t9>>FH=l=s)TuhHuj*N@VH`lmR zOm$L!<@L9r`MDr{7WxmB9vja|ocZxSntc2z5(mBVq&9xGr^R(1@S0zFtge?0PfcF< z==V?kLF<{B;iI28?%)6Lmf4JF=l=s)TuhHCdjDZV{fc5$*T%>72d8!Pm0+R&aG7D_ zxzZ|ZSFVfepZYp(dL;GUm06wRjODrLUi9lf@M}Hu@z`LRw~+qu<+(ro(&qcm3v*_? zW9iMm_#FIp{*z~MVSmr#-_Jg_B(D>oWEDDTAF0iGBXRgryU>4bSzD`rOPEd9hS>Ds zU3c8_62FbB{gMard?3&Kn$I|fi|B_h+_J@wAGu+V;c)+LcOJXsAMJOA?fhp<=4034 zT57v)6sxoLP9D>LlREmWu?zi2%F2rVTG#XYr<%w7K>Dad@<7`^v`_OZzjtMqo#&pbHrkJ;*{t`5WQ*F1X2 z7oOiO>^A)Hn>PGw=lFkU78mAgxP1Kh>weQOmpsk{*t$Z6Hou)ba0Ya=s#9Is?~pnUoCsa%m+UnT#0+_&_8-A`a8`Z z5@a5155pv{d2DyS)iJK3`5Oz)w|G+L84o`;*ux)QerViUUklSCFSCpMJVIxCFDG{M z*XnnIR>!!C<{#_M!w;g5S3X1d7SSKV-Rqrx@h1C(aLEVW@zCS@!)NFJ@F{=3k^cTb zp2dZJ#Hy}|qZ>AQF)y&tzfrPO^moF|`)Km*JiRS(_=$twNj$aj!(;JW^5L_*3F>V1 zuA4V(aQszYnm_Z!u+dpdv^Ja4x&QD;%^OQ_x*q>q>IdmdQKFsa1G?FKa;>LM?Lzkl)xJ>{{z&RZ(%m=B!%>o0ErpS}JzbR5OJKkom|^cm2&Q`Gs0RqX%j zN4HANAlEJ7 zr)~lI50(|^;9Y4IHuF$^bg9)}^LXBo4;r8H;_roNop)t-wc)?%9j3czvI+`&VB*_Ags?et+pO z8JGM1LF=QTaYeDJYv##$2zK543jK#mmWuwa^cL53^nFBKAMg_gy(_E79skiwkvx$7 z0zU7`>iYe>lILQ&eck|Xiv278X7?FSFEy`!rSpIC$fYZN7an{49TOfq{_Y!}WoE7Z$^EqSYpCPF2YOes zF2eCAkLmWhZ#>HHoyNfl`uo@Y;nU~0fAYuv^){My?pJphVIH;TKP?@%(T!qN*KCJ$ zzpczCm)b)A(ehE9{*~upUS=2hc?3J}Ag_6a_!$qRFVOt$1?O8Fok1u0@OW%6%}e}Y z$Cv-I|Ll*AhMlMH`S|U}+kGW={imJ9h4aL`y!(0D{1JP7YDt_mI+*R`kzN0!pIrRp zGrvOru`*Jpe^bXJpZ `H6$xm8|3Te-};LztcYI_$oRx9}r$J^(Xy>k#`+B@Wqb| zh2KAU*reUwLGL8bY{sq2Z%ICSSL&3{gYaR4J^W$cPcQ%d^}ly%*muVCJJ2pvrSkt{Bi z^H2B{C9Aro|H}U_B=h6>ROlZ&uGL@bOpk_)N#Be|9Q01|%#NcJ`->ue5IaJ(#B=agchm zd*#P2I>*yWo%RE-&1(_;;oTqD^Y!1}^0u)3q}5hF>fq19XZ!zf78eih82|rt)Vt`m z&w)NQ-9*fhbqWP0?;GHOj z1OI!i(Ffi>6b?A^=TlDDaw+<;-~SxW;$r&0OMm~rrG8NR6mMtkojm6J6Ps~TEcB0z z75$Zm^Gef$@!K+v@y`>xIV?y%n2u}yfd%JVyiek3z1glNjvrK>u!VKpR?$SUkOv>OcV)DQyK>#h^RBcO-rv9S4L?0^t8AzK$ww{slb@e`H~jYb@0jv| z^p&e$QLN6mX7|Y>t{cP8`93!DEA$^KD=PXs<wMrdoS?to-Fe_m7hmzd{xvTD)1-aQy_|k(e}8yQz5iDGV&2kaXFo98$s;@d(`QTk z)HA)r$~M+y4Z$2Z!E9IA0}=1rN3`9 z^ZBsy0awpGeDoBrd3O9kC&fJe{M%KJn?3`{cs1iRo0?Y>XFC-7&n+{JzfbGHhS=yr z9f-ddnl8%QE1!5;=Uvhtwjy)qL{Ox|YNX{YT12roSt#aXkHEGoQNi6yjh-H~LT{4`h2%XMXCO^x?r8KXr9z zpTDg3ZPjjQ8l$zpNstEsxdpUw{*3;pMnRCW3@Ov-P( zIv(?79Bi;IpLiWs#&yz>`4hX22Y*;&r)__feX$YNn)=3ZvtR4|{iBQG?+-j@$K95W zuk)r4@s8VXCu^QlBtOAI|Isp|qQ5J>g+GsApR1-inyq=<_s)Dl%hU1k_d@Sve5Ou8el(x2S&l2rn=Gg_%FuWi0%8 zpXM{G9oM=4hu{Akjl`wzM?pSu0E&_6QN>fh`8?@mTe=WF98ed4dpPx#39>NkXUA^nUO z)>!hqjehmpSz*mTpK{@s!^7~^9)C8~`)?UfF-sQfQ{$EIzf-UM(odm(^i=fMJkDdu z_v$xPUg99})W*;A&*F_NE)~C1e~QU`Jn=fe#>0l2F8jNw2dx)2UjCJNU;o-KsjGee zwW@ppxNiT?bTR#PoaQ76Fh39;>_Y!I^IH8A9_kHsJorGL_^m(Hhdj{uTGBW5R;PUb ztUv7eK)&{A7ws1IxbXfjKKz!`;Ir!wjIWr>-+v9UUw~Q7Vtjht!1~XnfO_U#=pQ3> z`b$-DT%#{|KqndSao&s#S|5Gg`NZomt@Apb`LPx2c<_g1o|}Bh-)_1(Ec4>b^^DHjx5BUjk1Ker)3~0R3jbe4q51OlMT<7}_j{mJP)A(I?omk`ahc7|q_b=B! z>BnQ&Abr$fnDlD%iYxQ|!YZ?_IO@EcI^Vx*XK|r_L*|*r@i(N7wRiHUsff*XhL`?| z{l8sSRP=YHx48dJ6Un#jsN=#*9Q3ZNp1}2IotN=2PZ#Mj4^Ge@&e?YLBmVm@&xMOV zxcu^Sem0l+*z0dw$5G60i|>C2OrHUbJ4Nk7ta1ClP5^y;lF#$2&_6O7KY5CLj#<%z z`706!885Z*b3C(n4WH#}KW3Ak_~EO=@c!fWS?r)U9uCKRWAUd~K4Js-?Ej~Rv$)vj zk9_}VK>Z-&=i+CL-W-3@K8TOgf3U2r)8F#v$q3XLY9EY~eqL#Q`c3kQ*P)G1z0*EY zTm5xB_`@3R#1F06+$F5_)#bi$@SAN_);})|DUEj+E=O< zcCr7*k?QnMdFj_s`I(YBX&9mwKhdaSVE``@UO?9Lj0(}@q;5FIpL=s%nli;bs#4>pQmu`n;w0Ul8E{?C8^ zWyUApdeGG8?+m+~{m2qe{jl@?m+znIJa}EmKeAc5Y|j2_=%Or@75X$+kdSu?w#7!z zLjSp?r=mZ)=H2=B{A-J!@#u^D?%)S4PkHh8!nDr2WL#{;UVLHN8kcR?x7S%=y=C_N z;#yDc13$07{O>ggAM-OrA4TbELdTzxvbIkD<}$);eI(;r|C-nQndEzy)Txc%w&QSj z9^-(P?=`okWR&kOS(+CS`m+opYE&v)+sdB5@xBv^C(Q?d%(8a&cE9ml?Z z=hP2sob6cb|MSX>I{l@(vt~R)Co$7Y`LRK^6NtYTl5eQvRqUjXI^M9~HurpZ$@}&X z`+n#4U*CH66Y$vmZ=*6FyZ*L)y#MWKn{PwoI#01WYvW5l|7vyDvHunNkCu^&{;u>E z=l?SVnGf5|uugB6)M+05!3WY0^E5wo%ICoe`oo4_zV+0_9=RfH{IlO4^VY-POFwr0 zIhe(T&$InP{{9s_-bp>WnZG6ebbf{YV`ZeGzs`@>5z_;nw(@BnanL)7o2_~DgCDf* zsr=@rPWj-4$86#~^24@^Uw82~x11BU`*vfk!*A&5KP!ui?f0WzjuPf!PJj!IM7}9|84)@^fPaN;ah!E!{&S6`H980zXILt`ty+V#sBize4|Rm zaY9kDI&1viiI4lnvFqkn=pQR8`X}SC-7Bc$neGi49~-p(lox+5OzUhu%42rYM;&iC z;O(cqd*G%0!+!sMlM`^`}T2B%azf{y-KNi}#6LTj#ptZ(r)$pe-4IAvY;>FF$i_?$o z|H(M{UHbbc^rtA@bk^|DkG=lFN4?t2yU;(+ilt6}z6VaeJ&$5lQRj-{Vj-{jU9rgr ztv~I<{DTY5w>W)+PV$Wp8%*;Of9Ly;Kfhs0dwTWvuNum)Soi-6lGT}iYIEPX^$BlF zGMQhYfArMpUwQwPOTBFeCpLM_pK4tFjAMQDN!D$i#<>U8L zO&_)oeddzS+W4G!uq*RxNW9QLG8#X1ip>=z&va^u&F47q69>K4r#61j;@S_dOT^(f z+huWCWWVx<9X`DMIbeKUcq^ScPtmM}pXu|6gcnJehx?|413B=&yNP$6!7@ zZwy^oU0f$!PVs*Py%V%L`~MNzAN-2>*82ZbxyC`Ak9a$4 z@8ogi`a>>$UN`Bp(0{ay)alQ5B)^ybmgh>=`p=EY1MT?Js;P70_aNI78|>llUu%_% ze!l#%yY;X6#_A{S{ij#hj&}SX%;I9_|GVq|Kf*iPJKUP-M>c4^T znNAI{Z98Z`#6jEj`7Lq(Z;=OPHH+6e{x_5SbbidczwfokRMB7Ok^Y~rZO3*jv)3Q^K<^};+G#%) zZz-RSYyKX3GLB)7{IKbNotf=<{iLw@fhT|KircH-KONIPz@7E}w_N>-Vio(X>c5@TnH~-00b_ONIAeJ(snb04ClAD*9v{uGd>(`k8|>i^M_sk- z$Cuk{`S78!4Q_to?i@aQ|J%yqV*Be#@&2nb$)g|1>a6jT*W5&6=5MIo7G&Oq{;u@Y z>aYEfZ^s?@V)X*_?<-k5`-k# zZ}8alpIpZQ`TuLDenqi5Yp;IeQ{AvDsQMN9$43?Y;Tz22LjQHgA6~Ec9GiSF9pC&q z4t9!Zo!5NxV}rVVEidthb$@cg$A2($Fl?~OV)OsH)9vus{pZ73Tdx&EoP2OCKoK1H^JEScE6ZhcB#gWxm{tJH8{V|HN-k zdh)OVc{Bzg<$*>aXu>>EE75v8t%k`SyJO>&j6&`S$uY zCVu!+gx@>KGpy0MSAJOeiaq}L*z0?RRet#Ad)IyNN%-vk!^3qP<x#Nzo}oc zs%yqa4{YX}&acpapk%4&?}WRV-hYydA3pd~UhgCiKel0`CY~pLov+n7i8Jn^`oosr zJpGKzZ`eC*`|GogzvYb1`49Ks=E@JQxQO3>bW)FQUTwoXiwn9G`VW?livBupK3624 z)+JK;6dal?$4&pG+VT|$^KaQdz9*t&ZC zbDq{IS|7Rk6~!uab378HU+%x69wg5GTj)PjR@UmTg;t;IIG_`oIOr3$gfp7}N3!vjvxANF|U8=pJvjcH-e zKYwGBWybDgJp2EP(JU@@AL0)2`@fIdd~=D{t-X^7|62{e9WUrRm%Kv%x#go;{dbT& z(<2w#`f=TH%VV4xBo8#cMmL}Juj61# zE3LwI<+_99o9=lGSMvDZJ@P@*t553i_rkQ!I~`Yf%vOIdzA$O;k1xNh1W`&f?<1zEjFCV4P|;e$`R3J8Sy2-~XXc&4-6R3;oB+>N@?okA?g?{fUF# zm8_lX67&U<2io(qC3W5@(mys>&-QUG)7|OZ|#sRoBM1ndbL(*AYj@LjRai ztADT8U*$(H(8h5|A9eY8cU~<|%7+ayUu;GA!mX$OWXexZYlnNz_{#bB|2+2^H~rgL zT-ZOn)9Y`V=M`hpxD|fp&3Oy99XIH&A;pFMu6$I{KYEMzwJVp=amNv@WBb9+{6O!@ zs&RFUt4JPL>CeS-I}h{NAbi-0@YP|s|H*w`xbK7ehX-#uBDU0qaXZW%4>e=bR2l$ zF`Iag{QlKvF8PV8zWdGo)!%>5j*l*T4?OnzJDA0V&olgP@%8sK@<7R|u8E@?cHR66 z{qvHgqQ5J>#r{8C(DcZ~PPZp<&^yU98(kFfgLQn?zw*I5QGZx_qx*Nf;h5LLI*&bY z*yvr?z-Pywkt{BY`u{K1kIt{6e@l7MAs4&QKSpZxPw#)wI@8Mu4?2QA@u&Tf2ipEY ze_C&K*kGEskp3`p$`7x6agUFN9oIg2@Q_0r@Z0_mk76Eo|5HYN;?HUp@+$W~G^Bv> zm~WwfyNp!y*ZFZByP^|(uB;w+|JzGqdnagh=%r}>dEI&VLF-rfJP02)*ux+8TlcI- zZhz+CaO43i9d+q0o%5f=v$$~n<&`i0p8o$x9>ex}Bvy4zzvzaI-pmUu^dBf$YW45+ z`s<_z_&}fdtq;|QJdplC^Y_q`aSVIphf7bKzt~QTxp3K|Lrc8+y(Q_(&i~t4TiI>=em2WIX1Dt(exQcK`AlJu~A?UmNaU z{`EI@{dM0Ey!36?okz2{_$}lI5bN~!PV!h|SML95Ne1I_oGbJnE+gpdU1=4zE7t|a ztut=_M|aT0G|OwAD0Vt;8@I1J&h||6vC}&5q~844pz?ccQ2pWCVcqh3@8R3u+-LT( zTX&BC@5|z1pSz5y-C8h;x6nsjW&WBcv+S(t z3*E4-|F+~Z?_&QSDLobaUFj{(YxH}Rgq7;^1x-^`QD53!tQ5p^2pH0b?9pU|JcsrV)L7<|9?k+ijuW}{ohFh-Mre2 zW7|V{3jODm6}9^R|N8%LGFKYbnYt~+jdiJy6bR4@3FxNZkO<$zxRLt^4Et$>$0=ATH}%J;IDlCJDkPE`bx%?Xm#y1kIzBiH=efC zFz-VDvC>n~U+33UB;R-jga>}|n)h_`Tb=TdZ~oy0=UbdQCwU&6pg%nKg;m}@|FLtz zug^XAiQ!Mork^JF{|xFl;L7^_U*@YQS%q#nj|9CFto;8}E`G+NuD|b%7^vv)B)^&7 z|Ikn#Q1j3W^oifb?UQ=)K>8ui{F)D+;ROBtQy#spb;L*3=%4b$!#|pR^zQK4`>(+) zE_VHGYq)^5T|FJACb{^VNJMTuZiv8d5NYMDw`~Tt5 z`7z%@|Hx3$UwOHXVLGYbCz!t85VtzURW$#+EH2n7G9UO1;d8Ovbm)~IX0NgOrssX= zoN&zjzkc~sW4DsuQ~!pJqbOE&ZG7qXUvjOdZ{;cUkB=()YhJS@b?9RIz1A}xaS*?E z$+*f(9%y-*Z+^{B;}w3btMkJbj^1JF#$SEw*5UX!zj)9=1D)gl`?9#04g+d$qo4oM zhoVF~Ya3VN=w^PWe2Vl_=%1Gn8&~6Ak?%2)&vDC8^N54gr?&Oe&f;S6hS)qW&Ck4@ z#=#mtb#)lpo9yw(`4^0ZBX|6dhmTojgnsP%*YGRmm+Sw(B>pT};PIM=ZrHZpwxp2$ z3jL#}qQB0sa$IRhz42)td?4eRt$Fl`A0+>OUH@~=S@Gd3zcD*p_NCX({6wQoKlb@! zuJZ%wJDIm+75lBm&35vL-(eQ4OEJGX{{G}_>xa0G zpCya&Z7Tunw4Qkv`VW+ub^2GXzvapcY9Hh^BijP5N06t;`1FOHqV-R`)A3WghacXs z){Xc6=)x_J4(qIb$I_dBv2*_qzdt;x{NV2T|050cD~dIN{a@?pr_g_}%h0X^VyS_2j4$JcSG(!%(J*~oD{3jslg+SqsLa=HgzEL zEc72PGb{STvx@MLZ+*4Jk3Pgf?wWdE*Wk13KZ99Z*iZdB)5{BR+mpw9C9Aq7&hJ)PUCsOo{pXe}wfgIQ)TT#XW*7N+ z1UoJe2fdScYSSO%S-i1eKFe1=4=$qqj1$&4^w1~6a_rgh%QIOfMzOzTrSOqufBPKO+RcyPyl<+zi7FzU6A`%n1a1(5p6yxIPWnJ&hc z{{AH6>@JD$7W&UCJ$3q5{(mqT*SoU1`nu;`>FxMUFRf>M(B_xKti)Kf{xI#f zSC-!W?pa~IOAguh%g5gBwccKT$8;QUOa1~1ORh{vPgWi?V3FzOBW%ho;`gP*>U^?%G^oJe3e9v#U-EvmQ-}KD$ z-}!-qpXV*-(Y~F<#ZO(h?;lBaM-Oy^-}W!%MW;glxTP!lJ7KKchnK4#M28XaH$Nu` zKWKeuzWJ30JH@olJK;A!Hb_6%p!&l`+yCm~fgLsq8~(;$fAyo6dX4wI{+4HPK}WF) zy&aDP>5I=FEAwef9{&EmKSxKM{>{8QAN~~K1HJplb>xA@S68Qg5BAX6@{;_p-k%oR z_NSj&Cv3F#PgY%gb|B9_e{5%QvHf2^e?@mg9bd9L`#tlheE-yS^D^&3|HxFU|Bg~e zzK174;vjjcO@EAMar9I)e=-id##0wh{P2b82b_7~P1DZ}`+W1ojV>*|f7i(3V&^|2 zVppE`ZS^aP)meKdkLjzax}|>NV4;7kM2|{-JBthb+xE}J4-fkb=UwE3mY3)&ES=X0 z^1-yuyJTE!Me6D>OdXti&JK?(7N&l9%0usb%SK-7?D(H&abcc$y!>iWcwp5|{YIN#z)op;?lW-p{a zOg{dsx1Di&9;UQE^}xM*c7Ff)aOG_$xJf%tnN`G(40 zv1`Un`NNt!wb%H!@0=0VT<>GwJAeP#^lSG&jAn7M_rHApm8&0gV*fwx-aJl@s_OqQ zR#^lmVGAUL3kf8U0L{J#p>A^|nEt0uGxXh>UCkf&^JbkbwtT6fka}$TA=z z0zyOt9u^%D1rc;u_3=UcRnj4rcmya*jIvo8$9nxqmjEhWNb`jOQ8q84s+>Cr*9E*74@oAAWT3{+qr0+#cb9 zFTC;8rkg(nkKKQ-SLS1StX%y6$SSt|rW)5i#46Wb{pbciezR4-O#fm;8&B&kzb`2k z^s@RM@q4G(C&=%3$Oq#(^J_hJj9%kbk6njxU*xGjZ1U`F?~K{^?Xbo7?|A>tD?jZu zZl8Z=CB-BE zQ$PQEMLrn!fxjEZb>2neVk>sz3#%Qy!KPa+c}tkI*m+wv`)=`CXZxQ{QY?I)^y6yo zf39_sU9Rc3IZ=GxDW;0(rFN$OKpv^+uX)_Zl6l#FN9(=IinE0{=w0qD{odtPHcm9Y zV&0UL}1D}2Wa$r&{*zm9)V81I_<@OZYcK+W<#QN`u4R5CZ zVD72tug??vG4j33v$K4cugmd?gWl!QQdjSCtIX&6V#{Z{P&B_Q%F6NP)sFbWM!U7P zUg4(u!lwIvq_xU@_j={C>)!^FVyT>e(>$+OBn?~P2b()eka>XiJZN8;{zG|YMSmyt z@%14*|Axv#Uh_n$V>~b(*ZhsS=UW_pkiKGjl>YFc-oK9D_VmeN=JDGxIVVSs+%5o_3vJOJFNEU&F}x}UI%%tv+ci=6bt)ZKS}R@ zqJ3H)jIStF+4JG#QRVX&^@{YB>0dC^>0i13MqBu;zoNR_&%jF`v2Fc#gva8k*u>4R z{N9a>lQ(LAL+ZoUdw=nbo1dQ?T5m1C$aCjk1dm<+)6sFj)#L=Ucj{LZt6Y2agXo4` z*Sp^OrY2b=x0x zcEh&UgNtb8*`Pp5XQ zCpH-6!B>Z2qvU%JeDhb&hD~-L-fum{_regD{}h~`VwRK#v$~DiUcj9XX zL2ScR{PdUU-_AX?`foHc-}Fhv@0}ogVC=`XK8sPy_YHI)Zag|)t9MDUkTH^jE>mcH9ko9W-1KdRHe@;M}xI^%O{4+u8DEQqe4^`YaN zzccrIi_EGONWWMpF;)fqT&^w8nt-9CdHI_`^2m?ft^S?r&k+RmQCJgO^_M zT4$fX=mQ+1-#@~unA9xH58beN-Jl=3Wcv5#R2BVQ?#=w20&4f;#;zdB=|4Nq ztkpji9-c3I{dL4|e458~7vzJMr~TmXhH;&D(YV-(-T1+;g|WT*E-Jsvt5AH*?c=mv3MsKukkm>O2qnZhz%du z>iI_zsMBAnvge~Ck4*nwwN(fB;K2s1k0_sb9maLujU3l(_1E#>51a3>hY<|bO_g^L~*>}YO}_D@NwIi068YRsK4^U?FaB;A*V%YcfB3ucg=N2R)?xeq z_^`0-Pp^6PD-ZpJI=*j@$4gh#>kk_`Pmu8yC0g#cEBBvFwZ1J#pPBvzW1aq${h-#{ z{z%7%5A<&2IPtv518u(+@mZbn!8@;h#tHtm>+iYq>pfwc8@F69w95Nm?IZo#b)U1; z-aPvKwe}&=a_!YmU+9J%&yVdm(?88wD*D6MpGSus|2M?ub>yAe{esO?1eq^reZ=}I zA9*pxb>0cT`LRLe_t>ENTRmHT`itZ5c&pX3%`0nNb<*DO*#2iIDHgl`SZf~t|6Rvv zcKiRAwv3bMUp%VluRM}fzOGF7hW6HX?!P*i6pNjIF4W29&wN3hk7So?@8mJZ?eJH&f91{e@8lH~{ax-Y z@9WPzc-^5W0+tk_l|5W)D(?^T*{~enz@oxIZ+rJyh%RDpvd-F)G{{N}pU!>Y!tZQt$n!Hi^ zTPwfz_m3>J%h9cg8=wA>T^2rv?b^2gA>{|#YQwK6-s+m|rS|$C=95Zorhi}l$n+UxyzhGTG<9H`|hV+R)>a&h7Y}@?R#9eNfAZ%G5TF3D} z$3Y-Gw*P2TDd?Rd{lKT^?d#%WdhTj$`i*48*4PvK5BtO# zzlYzx{~bz-#g0GQYBwU(@x@!Njc+B**F4h;JC%5*|3LnzqQB-f(>yxt_&+Vo+Wimn z_fGO+oBkNj;;G~lH$VL%P2XTMm6&&msUm&1#m@AflRrWa?{X`%UA`{S^w2tcow_2;@|$Xt57HNizZ;To z$au^HyJp;&KdkVh^WNQcosWhUzCUTb58t>3y!QReP*N=KdHY{odFT_}B0JN6Fwd;$ zuX&tDVjevIhAyuzUmup|3U_(lT5TVp#u ziMONioY+=}t@D^yf4Fwv1DY#5c2>CM$@iZ;{iL&)2gjfE1*YFDuD?AC8$Y*>*H!^N{=u?B_f%baReDhPU<9KjB{b8l6)_-Q= zRa3(FogX@J$^HJDaqakXNakbPf0y$=l3hLzJReQf4V!VyPNg!_znDp_{(4_e)1#sN zflh4h6WaW_#?^0i#VBQ;=I@t$>=>OVS4rr~g{>6)2u+Y!o z@A~;SO^Su>PONf1v2FL?rk}RfM|P&a%Okb=>;DL#3;o7O9P|Zi`M%IKKkCbP@Po<= zzwzQH4~+T<=j`x5V-MM|C!F`b_h0w)g9Gr``OijDENrjo-}V0=m4|%ED%Wuz&9=nM zU(-*fe=%aM{(2nSO%LKAea1F^=2wYppB2B;`MgIAU;OAst8cT!4dKeqt+4#(etH)D z@cl2_H&}cAP5Y3p$~Ag(-v;;%qvs*hKh3Kv`n%j)zP@aKNPUFp)r|DRPhTMZZs=X! z#jrl2alOl|K_fGQSHU8!} ziC`Pjzms^Te>;Cv(Vx5l$zxvLiES9=H7}Gn`5?N3<}XGs_fz;m@(mfEak0TJ{;=tN zy<30ko;|`A$1nfdA%lx~t+V6*!K7I1{Qsu<`*-b=`gii$ zivBM5X8zfNrbj9^uTSek^J;(p$heBe*B;4_A2fdIosJ*dqx6TV`;3{o=(n~B(~ceb z%r&R&0KZ-TJfQrF)<;AAiei;|o9*OL?e(|xtvs3jy?I4Nf6e1}+}1~1_~U%ypm&mI zw(>Dw{2+ez2Nl2a!E5>R=ns95-}dmhqh1Q9|8b!)_y1xL9y|XN@ukNVzdyQn|y3TCp}>EyC?EN z>qDOh{M|6F^Nh#yiLKa;FRXFpUzfOGmlMO3hyL`(|Nh9*@Y(l|YzNAp%GdALqMP-j z<0a8ze9KB8&gcDuJ~RFM^UON^E59pkD{rr&_6Z;8oy23?`cWQ>r;;D{XZgzKK^@oZ zQToHC>s`OsB|DrOHXk@|)4y!^1ohni&CUZUpY4Crq*&-jtm>LL*B>&!I?VK+owHQ* zcf!r@kf}!>%SVSk$>(zn`5=9Pu^&4|@(t;Wak0TJ{xE6XtDbrD3-5&0-NOrXc5ZvE zv-3YgNwL`Rc3bUqB#M>k=6EDXpI!g|QF$}{2l7hO)8$slOCRWBufvAs^$0e>0kO#k z=?gUfthwh~TzRSUPVy`EDE+Omo3H)7kF9W5YwQL0_qZ7c!DGiiL&^`f)s9fSnNAHJ zX7+K# zJN|A+UtqU<;^Z4T>8E0=pSn71E%V}Q8~pbxy~0v%zjpquPx--(=YIbU#)k*=P99ah z|3$a9#OW{7e<=5u{!Zc^91xp)#;u^{H7^;NUsq@T&fN1Y-Xn1v&(+wY^oNzoj_@BijM+N^L*w)Si1f9T&y2G@l$zgEvb zi$IaplL9gSPO&@jn4e9UymClTBsN;rP`gVQis3#T+cijBW1>^3yjPW>b zuzZ&kODaEqpUVE)Q2Ujva&3GoiVr*13p>-l=&V+M!%aN zQ?cpGJGGVH?Dyzzt?<$wZ`|_di(BKq{F!TS{JDLuwf)bKj@wb3wnT9OwDqO^NVK|U ze!OpDTfOq4OQyfeS!(r9^JuBB@ifHmogjQ*?6*F&fAT=%jpA0P|YF*Eq(yDrj4t8NOrllxc1+iAu;O{y)@2zGyRK5t^R8Y zkLi>uFX+T34*JM%edze)fi~Y%_^7u!<$JIGu-$3TetDa_lklPYcA9d*j6V2l_y5c2 z{WsF2Sm;x%Y`b+l62!LO-=TM^aYg2x>0fv%`fFX~|Hqy3Lr~CdI=0 zp84rt`8+tE{&3<78zny!~!@JzdZ1kTbHu*gNhMGql^e(DHue$u2IDNopNWP9UT7NkH`V$v@;HU}V zV@#6cTZ<7VslCbjZj6*(XeP|v2 zZWz~jC;aBeR_w+Xw*Te3JFa%}!eOTw=RWYyAAAoUJO4bW{NN1z{FSO7bYjPAul~_r z|NqavoTE?s z^V(kzz1B;Aod37)-3PVrdHw%!ogd#rnGUJ)p+j5jZ2O;`KdR{OBwo1>SE_!{sXa%q z`87dw1+5Pq-~64q=UZI!t=>s~#U7*!p)R9lW71X@uu_N=X&WYcHHT{qeT0Z?y zAKRn!hs}O}*`MCMXN$1ee(z3t-{Fg*hrR!Hl49ZgDScLMfMWXeYMk*?u~~a3kD8MO zu`7CMeWw4MJk$7HZe_O1*V*q+HP7^Dh@ZaUC9nAtiIWeK2bzD@-19B2<9jE0hV$tU zmk(TXz-}A&hTFgJ^Cb>Fpa(wN|3vdkFD=ghEN1&hjY~JBPG)OgmG8gkwv%Q~Y><3xMfk!||J>}n!xo<%PW+VT9zZ|U{rHeSp#U3lo9x)_Or^bzObCl8GKGk=eaU&k}A{&3P|+rRjW zje5f8R=98GrYFBiyD%bFU%`GH`9nTLt(?88KZ9JD-1Ux~G1B(i!PC^kw`1PEstkpIKASKQo@9WL4MRi4Wc2#~;rx)4!8f z)alRrgz3@9v$K4VzS`pFdr$H~>qGPLcf+{OI~m9P*oxiwxc^>w_InTCHhrh+^$)$u z53W>m|3k*r`AAi!i~89QSf6dlFuNgkrhhS#>92g`H8qcU+51Q;_4s4-Ze$!ju#&Ii zRGx48Q9gL#FlZ(dwGM&<)$R*S6Hq zSEheo?lJvcZVl@z{`{L3Vf%fQcVfc_#(wKV`zH@1AN|c{-YzMY_v#OuPx{8XQ#Slt zXnyEp({8`)75MD?U)9I@sQLfjcs|lyRX59Xk^wLC&h(#^M@FrGLwUd+vEc)~8|k+` z>hj@hOC9xbUB&N4<}-dBwpP03+W)#`^=51Qz%RFc#GM1bz5fm=A4oq9^(%^1T^paC zx5q|zvs0e<*dCixk&O1SLAP+SE(8xUeAo=haYJXC!(4~L*RC)d3!BM^PZ}o7o*nQIBm+b1AIC@~C8}kA){b%Pa75&{v zZo3}~^NtWc@~}bcL+kK&!?@0CU*^YF?8X;XKJT<&e}DRwVU^Rjzwq+Uj)TXx|3*?Q z?61=!mn}Ykp`W3SC)wrN_~PHcz}t~r`ponn$USxXH`6>i?D)Sew!Kf)=;@v0F%Gt2 z8pYv__?VB?IjNgpe^_V6k;lIN+T_r9-(EA9KDYe;+5MEaFTx4M_WyI6Z%5*+D}8AH z&Fz%ODMs^Sf0XGzC(o$We_N?H9UICA_Kb{Mo*U`6KI`&R$tTZvG~YXogY)QzFAN-h z*nMZ-+z9vYa?|<$`^Bm7viBak}sz$9Am(?9=%jBT$Q`c8!)(|<63RMDTjS(0ac zG{olh>7CmBg3S{I$p@_upRUFUaOFK~T-Yq{k=vCJNa z-w2QW{%%P5L0|m;j$?ZTB`edd!6U8H@$LIBwW(t~>`eb+q&ofKF+FrV#)XeK=$-Uq zHhn1K2g&DkZhq>N&+_Ke-x~kJ;~qWa>pQh3oPNi;&)sk*eD?dV!K7I1{KF=C{c~IW zU^Ko&v**LfBfI|+d8zp0`L)I@Py}lA*YQk`hVp>QPaO14^32w}+UKjSyb&My@EOAA zv<|HC(?|H+lMn9k-M6<0XMJ;lm%q7ix&P@X&rpPIwcSV*t6Y00kHKbKhux!jYG?Wv zo?89KOFsScy5V_5AL1Z=#5R6jKNhDyMe`TkmD`oo&#S*7^RVBaZgKPDdk?I%F?^Nt zpW26CS?@pTq@MYTRb3Oe>;I{PPm%d$`WG3s`fK0hqnlxm^gtZ+1?#ZmCe5Qi@<8%= ze$7vv@>$+|`onn_UA_2gugwhSZF1(C2R!-|{n+`pMp7({lgjN6?RTo=OICGF9oJ!! zk00HbU#5RCVx9hc4@ADbkEb%W@ehgJ{GlNEpyjD<=I@((zQr})>Ye0Q>{0r|HV<64 z*qpP+g>5c9dAn2kmW7AoAM^q1j{l{L@^xvp_FK9BIh72?gFn;1h*b1Csl6 zj$)*z6wD{dIo)j|TZXZ-$yj9Hbw!HIM%AgY*^0vEeg> z&n3k&T7PS$<8L}=#h*RV8sGlw&wBrQJN)+j`z+-H*Vq5QXs93T5j$QR-vrIqym)@t znf{$TvsQoo9Bn$Kd3Khc7GU4^69>JMxY_ifh#xe*c1@k~c@REqu!}$JeMkS%SMB+5 z*k`X@r|&oYS@^jA*^WC0lVb5fuYYK$tteJ?ZTiROA5yJv3o`Fa|K8kF(cej4Gs?H^ zUi)A?_&~4qX7>nRZ9aT;b?WyZeAr-(pFYCw7hW@C++nAO!=Jlhy*po;PCvH&!=q^L zzwjz1(E^XxI&?FCLkft~Po{rg9;xWB;-dRau?O*X3~zyx!&B((hevmHCWk z`O*9;epi&+cs8$5`r!*J@Ad9xJKpq%u*&TxUjF#>Exp#;`TxPBSnPH1A^ras^i!0q zavk@>bqn4}j%~NmJTv`g>{&_cfxOeY*6_c8~XpV@F*s_(1+O?uU!A%mJD=bo|*mwd8DGh_S0r$`(9@uwS^&v?d5|B4lVTu*-Fr>+je z^jj}~=GrG-2-DwKZQJ9|d%|nIeg0}B#e%NsF~#=3p>1!e#N#!(f=$iyE-4oLF=qM? z<_YNGU2c{7<7Yl`y?1$bStj1?^6YKzjM?|? zu-T3;?DqLr9;e>+KfTHej@AFa=%^oz`jKd=PNI-v&AmV!?yoH z*!{26w*9u&VQ2amo{IkPM89X}c{9{J<_G%7Z+hwYc5TT znNC{A>(;xGam$PRHf~Q%KD<%f>XgrenrC(wKYg`&o;hyGdq2H?Ys}(b-2A(f&w$^K z|ND|+vERQ;)AwKcQ@a5-(Ej$i6vVr-}r@< zKCr~c;Ir+2C@B`cce1{m`W3}0*YW(&&HQbxr*E}0{fkEx{dJuG*yk_$({cY@`iIr$ zY;wjW`;-Ggq>#*|{QUBzDydLS-{M5OEF7eb zkNPryG>+Ae)(@}dhaIk2W4kGz>sJu^M<%KgGh zmw$WTEg$#+JZ$e=ziqf`asA;$)2BBn7RF=yhX;S{=ijs_x9?Lj{rhu|@w?m_w*Bkx zD4Cb(o)&4gKPC=(Cvme?clyK++B`Znb;{>K_^`n){xEUV^RGYi-YH?_=N@R?{?>-{ zWBT_c#bU>w6ZHO{ZS^aPmFeWv&$x<}-+!f1UPJ6m|JnJYivCXG?8hoPX}#Ax;$ZA& zo*;Q3`JnlAKKKpi(;xP~;D%d{eSIh#w8~a19(B+A;j`nLNX4t6ri1jc0 z#e!bz?D${xvF(3MvHxEs-tQ*5u-`NrKAnK|nMwxv%rDb_FppI9ce%IR&iFqQ<3o2v z_~QSI!5`P*?}l-mcX?N3U*6?bX1jb{ecogKu;FEo{O%_kKM^(`bLvrF{m3Km+xfRy z!sEep^!G;%^@BZP$7|!8tobfNt>gL3^dHJID*9_)<^J=j@_|n5c%DA;(-&CDZwpTx z$A-_4dFVK!^@s5Ozn}2i3#W(XPv5%siw94p&dxvgCBQ@x2x~5-r!?xEG zed~N%V-_k%YV}`T#y~HgH$yivZh4X4`q1&o1HI<^PEsu9r(fdWeEPHV&;QtM&8N~Z z|4^)@eO`X8OB4Ql~%naV6gu+2#Hgn>ZNzv8|uJq*yGU`MAih zwjV7$jpDE(oToz6S?+&8`w+#?TN z6&^3|e{!JoZJ&Gl)&94C{#_`|N2=4G|1mc`T%J8_`;YXD^J3fdObZ%2?lgZR@;kBp z|04{O@7Q7G>t}y2tiJObo6Vm5HoRQ7!G0f1FVg#8rAe`{{fU+7z;+NJx_Kv=mKV)0 z+y2{mMn!+Ev$7mdQ$*cYUcjfb-_QCcBe~j>39rg5q4R-N|&JIW4eeg5ygag0# z#lJl>a~8aI{NG86#rF4{e@oS`C|0R|Y;)a|cM`FA&|fP4O#e>)sG>i4?L1l@S66tV zJobCFI%<9#ud}_&M=$5;U2bKz%hy$`UoGaq{;G{4ne;wnT>xXSMy&_kkSpLYJuX&pL6nCah}XH@jpy7)ReUYCZN$NWGa`RNlR4`jY^ z92-7E=Aq+^*56v~>*wG4a&u~H_125$_MUkn{C4~~loU(l`^QxMiei<|zuDRs|Jwq8 zJU{GA|GvDoqCY%q3A5>xip_q|JF$s_vETYoUh+WWOGolqofE$YmB(!L!&ir)dG0=M zFMsrgp?Up-(;wP=GW>S@IhYiSUH{WooADIITU{HU>V{o6zfAvGIZLhn8^{>Uhv&`q zk8Wh#@*=-i+xqDw#bWuJG%xbuGmP_eyl(kn|J_eHV%JL>;kfgb+T|~w+=qNS{|2vODyQEn zpMMiwsE_R?+dq8t*OpA?o#{W2dn)>C9oNy3AMeM!%d5-dBg>1&#a1K_^e$(~`bZ0? zelBq0_aJ=OU>AQl_R-yM{?;#+4acqeU~QQJ4MZF-Yquyp!K1==I@_-zQySSbdqm;*kGI&`NJA3@93`I>e#TxXP;eU$)`RG zkA40cQhu8yUB}$nVu=zMLC|KYU@5J8W#YZnZG^Gj5B$=REI~&whWJCdFd&8>hd2WIRR5D%UnI zjiZ})lEZxC`L)I@TqIQV*Sh%kq|_T~9`gfzI^vh@W|9`n%jy(O<{mb2RzJljc?B`wel>yF6MR7g-<5 zOCHE}0h+&yp5Eo7lzrGZu5jD?A^xz_G4EgSOn>sh*(38~{o(0`^aYa7aUOPzd|uDn!cyvI@4th}53Z~K|Jzo-qF7mdgGUE7@z7!-0ItsNMD)$g{Pvw^2guj#@j#R z!3V~E>qB|T1MB*;IwyU@JHP%AUU+5udq3R>tp#>k_z&aez-OO-J30=yre6Q4eKW3P zRoBGP4ZCiBnf}E{wfe{BKN~Wp@j0>45%iJY`l!=C6(07l#>2do&x7;nZ>=z=vFM)L zz113b>0uvV_o60!vHx#=MEU@&k7(YKRb3lj-TBXs)WDnR-;AWvzklcdkA{+Bv3|Jzuvh)cFIKs>J~bX6H`+YgQiGl8 zKP>>L2faQmJPTL{o!5LD7aJrWTM@qC4p{TmEB|p*SoO_?Us(0zsqonE zU!uCS`1rMDug$xij99$p<21fW;vvrSMx57IrvL0bvsV8!kCyuv<4Fs5c^s-XpSzha zXyd9r_`6|T=bet*Mdv!+u-?Gh>mB^d%fkk5-M;bpiF~w50i6!Fc|$j>LJLW5?*7@L7GGt^T+^w!;=j zeB;!MF5fL|dFKzNUj3Wfz1G?GKadoQSESzb>6JL^N?%SQ%~a4kMf6e|-c0{Fd4}<8 z+$&b@zt_l;vxV1>cezzQKk#~&drQCdseOXK#Vd9@p7?-wI zS#!zd(uG#1FFXE8lVY)c`27pxDN0tkhQ~Ycp_}oSv&KspaJm{X9eex1R;o-9$oB8Bmx&LSXlP1N&JkviH zUqFt-_DK|YKpmnB~%d5-n-1KyX`#*mC$@p~`{G=bB z^~@s2hV;w-+~|!b%i~Y(zt<;yvtRTp>;K=i)eoYh6F+Oye-q7*@`&?%w#F<{Bv$m- zI__h`e0crvIn_HEhd3Dfv2A>vhvlmczxm_-bv)z22D{{keRf>@)(cjh9QOU+fqx!% zNqPR67mT&~j~SWoo!Bwj^`DQ9j57{> zf#x5OeC!y>kH?!|f9NcJ)m!)9Ga>B%ouztC`NqO*hj#w0BmJ;_{rmrab`h74SgZe7 z=?7hGzmtj|9r%4%^Lh0|LE)Vh3;`5>Bo!vkG*O7^lD%5red@9P9C8f{4p={&h$_73gdUV zmDw&|XXpR_|91bSiLbuB=TL{op+LN^TGzn#|G8kT6V^FFJ0*M$6L!Cv*k0- z?0dG?cgWzuABCV({Dbhwv+=o9gfH(xg~y zKdy1=C0afo&E+*e?z1g^`poq2%RLqSUGB~N4MFm4{8apmPhNBC$b74#4@Lal(Dp?_VSa{A^K?F9m3>vtKc^$};mPzL%p<11@>yT{Kl02c-v05& zNWFJCd)XKMN`6|T+50bX^1ahINT1k>)YV}){?q?l`|8!+5w@}~yZ$+qh|QDhAC#Yd zs0Z=X-uTiyT5ea=SKf!xA}wj^d2jeZ?<5{0@jA3VV!dNq z`RjP_hw-Owe(`}fz8NO${FT|uu6qFe*!2&yl49X~&(H7w-#O(0+iKId+O08*7J)kb zc^%PzMStZd4*JNCZP-YP#qy*6%ul~g@;x}8{;PCa&h7m3 zppFC57vm~QR&~wxf^J^(O*i^h-c0|(Q_-KghKyl)q+)Y?;GNo<_kaHU&(w>zd-RN@ zUJuj0@$R{Yt$QT>+y1{-=L7!R@BhDOpVwA;_W=z-7lg>|mKGdm8w3qCvl&`yfQw%3XB15~zaP_oK3e(%JGZsu=@ zKb~K<{TGp1{g;tC`nT;qRX)|Bsd>#;)b9kXPV?x;{6i!2@Pp)gL59>dbT;>I4`WKOk{yIMA zG03;=q9c6X2@(fmKeqML7+Gh$I@hVeiD+b=i$>t%^9)YHm;Bb)75?(eW4`sj zt>x12(I*!uuRpK5{$$(Y{{ORWJmpVn7V^*yn|XoEFVnv_r>fOo|3{d7>f_^o;-Ggq z>#)xat}x5@Pqi&S)p%Rb{9W|)PV2xf`C;>eKls&2&+QtvIP?!gi=J2B|NJ1~VgCQLIci9WS<<`nlCRi5SvfL;RWkefgt`{x0`s-^(p0Xnmw&+kQy-h=blqp4p6B zm!C?$J-=Gd{5&=YA2!&+JZGdc|~-{{LE<6bt=` zRb6{0K6E4B^lD2ST{8V=<(`WEIzHDCk#Bq&$0iPXm$MGr{zsY5>&keja|K=b`k7CE z_}%8)z1sTYxbV_vPrUY-SU+P6`Oh~VCvn!)o2_wlBW`@r{4)J#=e4!^ciaEyc<2TC$Zvh94&;H> zUps3iTfEEH8EU?Fxz*hdUmb?64?lJK*qi%8`0ngo-neptcR6deopJqbr!W&n|XBDenjhx&lO?2&X#%m$Zvh<_~e1SF3B@L z{VE^4O!cQ7dyJOA8P+l@rA%C&d$807q? z^$8FCsh#ORC$FsNPhNi>9i~7282gK~r1^P4^aHIAtuuc*_k4>xiCaB(9nx1ESAS?d zebstz9{G4^ZMw;0*R9phc4YhihK{G0o~qZMwAByxsIB9ORlYvi|9g#F{~gI>ewqG* zxu;hDG>;C`pYc0|yWCHtV%MI3re8(I?}pYl^-jl)ZS{BK3oHEn7k@eZ3%7@re!uq} z-+uTK_&MJ6{mKu1SigT%eh=#SNwg5>zHQj{`ff-8eP;R(<(@kIH&D{|3`Y2A6zN={wtMu z-P-tcV%YKguv=r^S5TS$kxqUrg?8-9HH z%Cnbk2ut}qHap_6{r;fZ>;Aj%zwy385!2u4xQdng?`h{=@eC8onyKva+4{Z13MK{owUH=KMVxmhtU;H3E-iZg@%-;|{Jj^fK{)>uQ z{iFZ0rruD;gAeqP-}=ybkq6rTk@;)A)nS8i-YEU82_HG6_np5j-CB9ct2<{s@i_c; z{6CZwi(QA?Ry!SuVr8!%$0I@GQ{AxH-=VF{cK0I=bvltzb&(F;}~k5*~Gi}TT^Cy?%9(b zUaK|bI=|+)$SCp*k+H1Ua|3iNbsm=6n=Z|bWt*5Wb`!Der z2a;lGUMk4*3mTt}Z~kKR^7+LN#&zBazxlC2<@ea2`ooN?cUo$%y)Oyj$>X>D#rT)V ztKI%Z{#k6dV4@5A0kfSvYAROxY)G8>W%_q=Po4hU$C7+|9Vw4@d39Ak?{aVHw?0${ z_(0}Gp7}N3yPU22eu;YT^6YAUYnfx7Tl+&xAKF@WyZc}Ahu%oO9sdt$9k{LD|0-3# zqF5z=Y}@{a{@Yp~*_r;md8DGhlYG{(KJ@K1&-Cg@1vY5;IzIkx7}qhL`E`8!-T1;v zr%m|b@n;+pR(x}(yUu@ODs^`J-aYJnD$B7-!+ed!-0xS7#;i;`tKH~70O`Q2*D>7bdnFkO5$*bFq zZ!LT9Ghg}5(?5mBj(^gmSlF+4{rx|Apk$S6{N9NV-OOJzzfAvGc}1=M@$c`LUxdsL z^e$%|_Plk=Pm6T>Tx&ekIgNw!=?|NH)}3(d883v*9)7L8_3zh5A3Oik)^Wh)|MmH2 z$u9Rp#9?63_JS&mUFv*Sx0oVS1!_c9x$OV8_42LGL6U+w{l&%;Kr!6E{Eo zDxU}E(GOpkGW+Yho&2Na!_?!y**EFB^8X(@NwIL>H;_8hr&sb=+kPPGhy6c1%ol_= z(|>l(W&BQ1<=yZwXZ_wR=&&tQb~1;3(Xm20nl5Z$bQc<4{u__H;?v2s{;nTx;v>1}TDTF3u?aU29vZ~AnS zVqtu-$~FGV^;Zqafe*~|pOZ(7-$`D~Lq4ycIG^#Fzt?)@u{!iqH2;v~W5-AzUil2+ zn@4|`cHE!8`|4A-hG}1X^~Hmxl;=M?NwHMUKXd=#RO5{1k77N&%e~q6a`FB58sfM8kn&Y@bCQSO`l!o~d9~i^utDYV*kBib*!c@zdgSbb zd&54fOz&IexE}cJ{9Bq73-52~---(m+csbAQ?mYr{(3zo{N70gdVrb!LwTg4zs`g2 zk;#wy@Gh?|_fMAR3b&n?us)QRJdpW<=GT0$e1`Ms52wue&gs8-q!IeIS!2J4XEf-` z&OgAbnDYP2kzZz4T^pZXw}8#OK<3vPvsgh=r$4V7@~Jm;(gS(C?~o5#A36{G-7v26 zjE5fBirx6aBd{c3Fr>K31m2JoB=f2t2e<~U1 zM4y@d1w%!Dc-m4&eqA5LL9g{@qn9FnkbKbm)F~fvo7X7)@P*Bn{MHp0y?sUa;A6k^ zpS@-*Jf?pmDHdMmX>t3Lw+6@qC97QH_fCAxwqWchPCuFcMMXt_<~<}lYVw7_GV1E40hrAd)b&OMID!-S*HT;lY*(y-c>el-1n{Vs=(<4yJtVA}2cr;eNGLchFkVlz%`XZojk zX`TLE??0eA#Pel5{5Eb~emn0X>jQL!X~%aS8&sZo^@nY~{qvqJzBD;(J$tLW#!f8H zKkN0kmG6HW#SZ9g>x=$${3KeKXXX5VR0m(Ce-WwG-^&1|qZ8XlNE{>{+w{lzMT<9- zSJC{@IPe-z9Pd#c{E;1|zxc-;uUO^G(D>lzKeg)H55i~Lf7C}>oPIvTv7Z8ElyV*S z(_BMh@KXmezfAv5o>`|q`y29&&*fEF9rH^IGih!tNIqzJnvcI5#&zBazxlBhyYYn? zpM2w`h0gw4XzsoLHR*aYz1G?B|6o!q_Bz;5?RJDt?08LImE#}k(M#=2|K6OVqQBPH@>cTGBCUGA8_oAlGekiHoweYCj#r(^wT|B_W* zQ^)oH%nxLK*qQ#ba+W&%8NtS0P?tTg$)4x~dV|uJy`~YbtuM&{=t?`bcQ`9`p z`;iYCUytPD?}l+*#b*3b@`kCCx8AmqZXVWN`G9S;(+Hi|@!G4u za{qx;G8vEUIMaV1k5u$`xi|AS1ks5)Lnm?a{?FfkZ|zI}IrEf|&+=VSZu`6W^@qJ4{Kv(U?|wY&J?SG)FTelc@YwHvXX!ZLq~iGJ zO!_rs9@>{wW%E-T-LT{NVQ2aeCfQP}e{?)=d@ipl$3u6}yFB_Y^zRhm^8BTqtvt55RkQ9sQv6;UAGOp?_S>>9(!REFS^DaWI zXMUOfLwSacM?TMQUml%{j;?Uq^Udd3uH%3oq%Y9?4dKC#k$gkOr9W)2i$83B_YzmM zTBnB(KKQePUb^iC)1Ui~b&_K7ivRxmpD!-s_3F4zQRl&Z6w$?Wh~`Ng^vV<4_<6o9 z4xgg=mB;GdqaAj?Wrt_>|Ixx>w}W0g{3rkWH+b#(tKOto=+7^w+kZ#>iehEkt@az+ z{0_)Fi5N!nY>ipG@Yr}!ep9bwF+HMsc!`6tpFTnIK(@m;jt!q7eYm7pM(Yn#mf7y` z#kag9OnLdjM!MNe@N@iG`TloJ)xWLdD2i3;ACJ%XQ1DmYHyXl_>F@GK75#O7mG6Jm zZ@fiyxqmV}BfnSMQ0HO!+7GXL^T&1X(GDkFw*8C0*r+F*w8!mZj&~2xk8S_6l49X| z3BR@e{)PT@e95Y=y~dkSKK^)qnf^s)o&J7~jA44f3+i~}HLn|)Z*@K5=Xo=KXYTnH zkLtXWe0a?6;)ky__Q|m)oqF$`t+78#_g?9UPrzfJe+QKxoT0z}O4Y9@R<_-$4rV)f z#C-$c=XeL(`Wdr$npf27KTG(?XFNkEakiJ{r$^>nU8JM=wSVjw>DQ3);KK&H_**@@ z%zXd1&pxp=X6f_pIe6CR;H}la=Khm1%em;UsncJovUtBd zGTj?$J3(|K4;!>`wIBT5Fs}1Dp82sAyYYpCpIzjTNBx`Oh$Z{}e#O`!`0V`WP*N=2 zet$%rWS49FT!&_T7Aj{t>il^>$n@{z)ph#wKf=t5=gqK2Px`Pr#;wb5$NiBHpCNoY&S?E%&F4BFm~{5hVcKK2zx%|7yHdyJMr<%GZvX#ZcAlDk zl4!vXq8m1R@%%FVXXR8C{ax?AB1IY)?Po44+hsSK< zHTm#`-7mam#<;^y4+mWHo6qlb?WXY9@n<6`7MtG~J^qJRQL@T4Jl=^9-C{p+`pNX~ z&nxQmr=IH|tS|c1e&B1akh9I@^FY);`Q4E54P8NBzK>xmcH;}TKDWq~3oi6h_+eOc zt(|`7;5Ge+lpp-CUjLb@A9P~JYvXH31l#r-=+KsUrvL2xQJwyk^AAqyJg9u}youw- zwti+M#bWV>*u>4R{8sNq=FO`=?D@l$zu$P|@v!elKmN51H+VnwcK%uW;&|F`rRP5x zS22ke`t}+}H*DK(nU7+o|3H#075&K@%%kP@>|JhU`7U2)809r@6hHcbmZ$mV@60{l z;!fgLk6nY*)nPd9i07YL>DM=e<39Ap3V#{2_to0#-}wL2w)UYYR;h#8I=|++n(q|V zZ}U()(|=B$QPE%PxQ`$6vDZ7b zM$=B)^NtO}jOicv`NP*Q4Ik(KIS&L@`p|Kuo9f#5xc_0?XG3b>&GaA4Jr(__8_1)> zbZm>w@uNMT1LALfSrC5E@|4&7ow?^*oIdK%Nqx-k@q=0)R{Fzc+us$A4HGx}*;_MT z`ET-U`|nMP#q?OQxB&6T@pdO!nQo3pg7jhE|7x7=ia2(r|4?3O<1tUI)FEE9h*#r-;8BTEEoC{b4J1 z;|tTbc<%BKEU{l$`;wnre)>t1z1G?1uSQZVY!B%lYs zaf!!u@S0!8!B1TshOO?oKELinx+}LMJO4l*imCkl!b$Lh)PwjzbhAF;by7ipnf__+ zG5uX`m3^@vBtO>Q_#0BkaWDCxo21F06m>aqpga z+7|HG`|ptQgIoS7{{Z$1`%6Qe2l_}=sT;Cvr$680FfYb4bizX% z^pW5C(DBIwE3Z%b1eFh7?EJ%EQY^Or*+_l> zWBR2UcY^VnzR(TZ^kP13sm%26_@Y((+8Yexj(EmR2=qIUJ$m9BF^5gk2?@a&RoT^TL zuInM+c+$M8e7%v!aUA)e^%3dsUCvT|P9PtQ>%7aevvHNjyWASqM;CvXc;;P`zwz{j zVbw1ldGyb=-_*OjIy=wH|1T734AWy=asL13_WEo~K5Mo^vvvIDMDbXk>D=v1|Gqp@ z(Vuzr=h0Fh>!VY+6YmuJ1)HY{q915|D6jb&bI-RpeSl8#jSpLqx;hNwKK-k&r4MWy zRyuH@+b&*v68drd8{d0@@R&Zmb01&4<#y3rQ}eVh#^L<~-c0{ld8YA`4>t1XDD>An zes>8!XnOUGjWIv)I6zjo$TGtR!@tgZk2+xzccVEyv4eDa!)O1^hd9pfq*-@wQ` z{2=r5PVx-ln@4~6(#s#4>JPmkT=uO8nr>s;kJ1!df%#?n&(0$i{k4wI5#&=(AKvBF7j&`#t3dh>@vd#lIJxNNWT{;T`z_(KuWSE_!H@f9UHZ2OeSBkAILN6 z^hamYqoI5td>!#Om6v=FT|xX(Rek=5lW*vxqGGF`x;hL$T{{kKJn{bvP!*UoBNi-Z`*B;@+)tq|D4=YtH0)Re;?!ND38XOFL~IY^%3P0 zufw>`yOHCXt^PV5{9)W%$xqh5_Oh_jr*>TGsdb-3SG)daQ2D{S-(NQ*FJ9BHU4P{y z!*tX6V`usg<{5SRH@BC1`lrtFYxMLks$<-`{I=xNzl}$o@_BF`{qTj=K6u(CQy+RP ztoGux=bxDLCHU;~Zzm}hUiW@Q{ruZdzoJ;>+BM|Wr@}*@=GT1q4Cm7yrW|+nv_g_CH4=Tu%%Sd7xyKYy2ReAFR)Y zVdosav&R;=T}A0{pJkIz4P!U#Fk8#r#mMFdfV7oPp^h|UyzE#8l4}NU=$@F)5W<~$PoB0Rx=&=2d zD@@yN;3Kd39f`va`nV2z*4*(0(w!Ek3)$xD5C2aNV$$c+e@@KDgcK*Me z6pQJ+Y;pgeHQDcgl9lPBeZ+Rx-@iDi$nPEstM<9K)#C0bq6 z7rJ3HFOd0V`lmTdMSqujM{)h3E7IQk`)@;d=$HPmy^dpu9;5Wb7j9qngV$d8K!14X zp}VI%^!6b9cKkUjDHgu>^FgnFi^h?xa_xz0U%YR^kM3scd^7#qIZLhndLEDdjVH~k zvO1Tq%l(t(xx#JFi;b)Okp~)Y(M`!~b;{>K5gQM7BnfALQ3dDPkFOFgJOVF$PM+52zV2zws6^2vA4EcZW$=y(Go zk>}0y;rXFo<(H~*zfk%8ORBtWL3lI$XXPH_cM@kF{EmryJ1(OS%_k1}$d3&#JeIG0 z5jTHNQY=>Q)Sgd&m~r!IdtdO^r9<#neB|I4I*iBnFY)nDTAY6P(^;{fex#dHcl`GK zmy-C1b?Xj`Y!fEGea|1Zd2KiHYp=iBwK)I(S@Z%EU8s+3K8JcI5%Sm%(RZf* z>^!4Re_uq0?Z+F+2ll9~dChahh99)?HQ)U0x#wFPe$eT7u{}zEYfSH2OMPPbiLEgo z`O_IIompQ0aIo_BN4Waj?>}JkX-Gb6{FUoZobu9lDt4y-K>ny!|4tqqra$#D_DWuJ zS3&dx=?la!*(E=IkbFZY6%||k)YV~_vDfNb9<=n=f*<#tm$E+~^jcT<`76Cz_1F2; z>J;h0eH*OL#@zeP^q-SQ>UiVpX?gt^>bUWIy~|lkU9FG0eEM@m*}w1jXX2e-+5O{J zToYEEu))xzKfdgBJUjoU{n2ka>B;;A(CB}E2VYa>0T0KYjE7#C{)2gDMStz1ndZ@9 zI=02O_an`N5A;s*V%z%ZjpE8f-0JWvA92Ic`WrG{SoMzY{GhS#C1KK~8>ACbPa-&HxgPSGck`+I+%_pLwb>9y9g z)?WMR>gww1>S_=k$om@eHa~sqIMk(%*~Gi>!|dhmoYoka6Lz@$;h&v+V0r(U!;)gL zx>`rP*?gTm0>!GXiKCm9NBrIi5(i^Hwqav1#|v(+<*&Vbnf=4$z1I5dw@y8aJbrIvuj`K19g|)C`QqtoRQ&O{ zkm0i3zG1)JS2%vs(QW#- z`%ku$VnLs@xco4?|3KuoK6HG^%5=~^?D_|dv)vQ->d*9Vd0zdNhYpJ7pCf$i7@05iz?g?! z$HyNwIsVV@dvdw!visk*4_@&-{nRu4J4vzd`c8+6-`@>j!xvU{%{aB!zjY*selz`x zXSSlh>eN;Lh}V2t>U%eQ9QpzoKmL4;AB0amLmk&_mlR7ke%S2`Ut8_5N1qD2ue|2j zKOTP<`L*|-*By}Q-;i-wGhS??8*#>AUSOtwC(o_u?{aVEPYWnN4>_?thz_avt1J#6fg0WZsI2 zF3hvJkvOPNJ>t~K^zY4?b^15kQiplj`%p*x)FTdhC-K;Z&v+JRK5<^;xBSKQGfvp_ zgoAgOK66&s{IYu{Uq9hC`n1nqIzIli`1{{q&yL4UqQ&Bxk8ae7p7C6f zR_1A6^aBsH*Ol_jPoCEE;9~lz7mhh_?jOc)-U!FFAKvxgCk|sieE-OC4@jPktK%op zVsY&Y-I#AYKkQ8ZfxMLIAI+<|RvukM|F+nC9;Ck*y%QdO?6@C`QE zsaJ<#=Jp?7{tKU)m+0c1Jh1DZ2gT-kCcn*M{{D`7s+(6Ey_paFW%|#_M>PH6E3zMh z&-P-ddJzY`=9`VKb$sG=dFrPw^DsNgW84_SIm_Jo&)-d6HVmx3{eQjv553f}-(N)I z*!gGrQ%s_TI$rb9&HUtRoW3&s=Ox)v(I38|{hqIpvok)Zaq7Xt2ID^Pcf&Z(yL|L) zd>z-j+{$d1uglkIUEOf@fA4za!>2dG<(F)^&V}3dQK$C!bB133+?M*b9qatXTdu9% z6y?(=`H`LJKR;(y^mm%yFUWkTUqR(HKQ)|RmuLRQ$b5@CiQ9PCHAr3^+V?N%rJc^d zcKnLhI`;jG_QUp-^8I6PQY;mnlrPb8yQtj%zen>U%=913BbBp(vejGn$`VZyV75$ym^nHRAJ@<@4&kf>6<4=h56=^68)P47EOS(8fWJ2;qTNuMv$$o{r}h|62W1 zm%O$|Sas&~7{Y-9vBOzA_6~gK&iUco<^FQT4_;l?E8q0*B*nt#8^3aX`>Sn^w%0={ zan|1PNRa-~jkx7UeP{YN^2}QOr%7GYqb;`e=Z43P7x}%~hFZt?)a^-%#r$epo|8E9 zTwMQK_WNJ$L%J!~^i?_jiRQ;~Y^HyjXV>YE&J{h-1?=Qivg_q`6@(Ac7l>cjZ{7PZ zb?Y$9`1@~9`qAG`3>$oRy-9aHGR-^bz^=bVC&lzz#SNIoqXS6a>_@1>{OtN4t&cy( zO#fn}ivHvc$wE^GL1!ZOJn|)aG^Q-SD{aB0oI()Pz|?~sgd&jWo$;|QxQ-T!Y17wPaf{RF%RYZKv=7Df@5L7|gYo#nD%aEj`P~Y(Yqx5O#gxWk&WwetK6>R z&qvfZbOqi1pQm|}*lcg0@%8yPe>yVX;%qOV6Ta2M24kN3sTa0B;gt(}ADag&KWS(_{b`*{|2cWY#&fw<*5y1B{d3&9 z#QT3&l()}~mAdo?FSh?pf7t1O9<0^`` zx@Nnm-2aw3%t!4^|9N?pTK$!0IyGcW`u0w2;$ZBjFAyGR^|U|pt4%$_#q?7zob$0S zU*3QJVEF377hn6%RX5O&egB&##bWPs?=60RcRD;!SmheOcjB}6U;1o_A0GW=`p?fJ z75&K@kUaR-S1Nw*6bA&GeS+{o#s$rvj?A|>eSl8*)bZG0%!~ZNO?h&&pZ@vqFy-Ef zS8mwf!}D(2KlzI382$dGlN3w*x+h0jT_AeKRnQ$SLK_Zd>x1JsAD$qF8tQID_*+6B*vy70ibKl9VC)}yY~TTFjzrHM)N z50|{VHEyqq{M^GAP{*!+8%m0$qJKmEp!Ox+a&7gv{y};1{IE0q3r4N}ns0ivwU1s! zC)iY8^Cx1%2k8qm|D2Ke7T3Do3D0l|{o(!lPu%lFw{Q61e#`89&&Ou69ohcBt>Y-B z*A@SNdW7lID{F_#U`P2i+uS0ks+XH;_ z_vm;!4&%Wyn|K#~SnH0~$p^mH2y3t?NwIK##Qy)6`ijCT*Vd1Yk8b8~ z3)k#a>`ebg{-~nAlekwjJyP|98uyA#MfjkNtNr2chH)O_m|w@k->qI)YmHC*qVdW7 z!`eUm#Gik1Sb6`+{gPtg^#GM0*T_3AKe^U~>md*Im1!nTh+ddl?o|Cl^G|J+H6r98gV z?|+^86~!voUj1ORP_^7Xc|8%QzfAv5k}b9RPsyW;oX2d4ZT0H(bi!jC)3J^p_pAAq zhYe~Sj}3P5x5j?%;XSV3|7WeS=O6H=^KM$3`gZ6c2pZjIjPh8=IO*qQ!)d97OgXUa&XM=G}Uqw^&W+PF^n=u*dT zD6b<}TgNGnde~qWe>n7&u-p4rJ0g5!mF<2tWzu2vX}>?6C-wO~nDr6W7glxcmDk)@ zV$?@(_G{G5^zYA^75%jiI>Wa*=*4{7G9KSM!w0Po<>T*$ah|6y#>H0bRxkAabc@fv z>*cS6lRtO*oja}a5cO^U(@Ba2o%~As7bhUl!BEE&cDc5CtBMcXyOHfo|A9PG(O>6R z`Ts%H+4`%|)4ROtBJUfnFztAWd7zi|8Pzj?oUc4TT7N^v4;xK+{fuW%yCuw=5WfHA zJIl|%`%B!$)B0&s|9^pT6op-`z52;Vx7be{%=Dj=SE=Z)^>`hbPHE20_-O%&cY?&h z*w1(%JkaX3hu4EoJww&UY?l;EH+~35pEPUI^Yg;?k6!T9J9dASd^`Rb(mL}5tv{!J z9apUCn(>>P4d>U*FVlZs&aCLK<8U33^})O&Bo2C)R~=#dcZJ)YznIVKs^TZF8`4(| zFP!xHAy*u7(!$Vp>4Wpno-pKb8@>NbniPxa$T*4;&2)6~sB-;nD*kwWnf~+h zNJW1qeCpKdq2m(=z2?U@e%?nc&U`vau^7+Q*zlt`_3AKOyz`A)|LVts;iA`0etxU{ z9pl*VFWNeuVtQ$D|L0{*pI(hKenV{5w*7Pd%|-l<*z}j_KbU*0zRRu5*80sLHhgq5 zR335AyC{!->-cTqGp^MkPwRPb3H>d%!8iSL`?g!|EBj0c-`tz=O#dP618l3!xQgN} z*IxY~x>3LKy3x9s{>4ZY{i)ZJ6iagzL93^J@5GN@@%38x<2?M`kouOdbAzLSU4*Bqx(9; z`L^9R#83a!1HFs#c)k?jf$-yc*cR8iOX_b;xNG}s?qB-)*2LF-bM7BMbQg8)_jdzH zvDo)t|LgbPTd`22R)5{@O{Y}*1D)8!K_B@|uR8r3QYU_YvOKLv-6i#h-LHS~qP5<= zcW6Jg?E2RZY(YIc{)xs(TgCM!u5Pa|u{=N)?n-g2#$vcS{Y8*S$ zKg}7n`cKWHiyX(354s|3_ZcwVBEMJLbZZRH<8@|n_-`?Wb0#eH!EFzk8_qar)dPQj zS;uRAJO9v|6pQKczyAB972ElMHl8bnPrM@EBbcrQdl7z*@QH)uo2@+b#Sg-_=RwDZ zZ}BdE>eXS`Wy|GP+kc~d!`@Rj{?J2@mH+>Ecv39(`evTvivM5O)B0)%kF|F^62wNg zO8=Tqf7$lm$usNpXMYIa`s>ZJvb-L}A;IR|g787(Dc}5kBl9gzeb5Qt>S2R1FYchYfb&wX5+XWrdPwPvxy0&`p=OvvN^JIRR{(X6sivBM5X20iJQxJaq zI%GWJpm$Lox+uZ}ZGIie^G=aGY_P^py*g}7828SpADVPiYr;DBAAS6-yALf7bm!_x%B_k4{o7UQw)4@3}ZRv;obb8vz|9>s4Ot*&eqjBv2HzPmeftmjE@(LCGb$;>v4C#y4p?7(9 z`S~E$Ew&k#@hr}~bzJkS?Oi@f)~CzY87`qe?6ly{FTQf_OJS#9-n-*EPnG9?xc;iI zD4%fO5x>@(fB){C@LAIrx)C?s=({Z$nf~+hN9bXEmwU7K-}MAdk5p{i54quS<3)b2 zwqYYF7UOF_b=#BWImu(5i|Y?NfBva2?eL#T*m-yV+(%al)Uo|fzt#t*%MB=PzNy4n zd&eU|tGAij8aMq@@iXsC|H0f-r$2e{z027nY=^2J_x*qm(ie!o8^Skq#qj0pAhNsF z3)>#~mlxiC_QKHI@8GM_jb?kzx8t8QDHiljFD!n4@q)eHI~sS24MEo4$)n~(L2TQu zdc;qknf^n$r%wO)d_H{{y5aSx>m$GE7maWHRO;}2lW%!i@89)@?LU6n^szVhh4e#5 ze&W(wd#GpUKUGikvio0kl47yv!HFHOz51JqmHu`9)NhRuM@4_-Rla|W`lCKNg7jfF z<0|3@?fGmc#bSQ)m=Cyw{;MskL$7b5j+3Wknz#S?tjj>ioz<_ z=-@TZ|F)Pv6)ts|U#5RClREt?|3ge2&}rTIg3YT1sSjEos+;+HN9J2x`Ihg5U$IB& z51U;&w|5z)8QyqxgR!L`?dQ&sBN$7$j7V+|9VHGQG@g!+zZ=GR-s$+v16#3Mz1G-^r>}j`fByTV`+j&2_3Zaw14*&i_m89f{T+QOqFY^` z^pokI<^xvrceyt^{@hLw{;2hLQC`#^^+0<))#a(*gVe(YyZFQ8oo>8#VzPgj^518l zx8q6ksAt#Trb)5bJh}fBbrpqGu4A3i&HN3m>jas1rhhy4RP@(zxb6YI=|nx{5eL1? ztCpY3tdBZ=TF@)M53xL_aZu}cY_N+z^d0i{cMm=@KYV_j-~F=x*z)}UAxW|Ld5Wpx z_~`t<)|Y5??Vb2y-AtFZ#IZB|JGrN#zstRuzaa=8+t5iIoB#cR51NkJKmKkQ=Xu7{ zI%ccCTfOkH6V{u))brAErsMu>ITb?>g!So!W|GWqPU2aijI0 z3WMWU`poq2%_F8i^HOY%kv#b54C0T`yC^TJ3;(VD{|oBYVXNndzuIZf3noZ+i|BQr zcsSo^Sa<(nogeyIJ#^#xH|E{Y`q-)1nf`tGBh%mIR%TO2|Hpy(pu3?f=yrb8c#)ra zAUx3OwTIV(@3o#G^H@xOYs#Fx)?e<>545K4_{S&K-1Ii;Rjz;NB*o%q=>5-o)z)#v zD%aMh#`*sX<_B_|h@I)*pI52q@1!<$n_3q;M(V|{7yNa6;&mA3`KT|m)n8W!f7txB z@st1j)t<1;gNL2mcyt+_cf0>mZ&EC#$K*G^|1kYY)*`PbeT5Nce>Q^t19`5E=W=V2 z?LRu6r(Z)?(5u`3D4#EMv3iFkH_i>azq;AwFTcBd|7HK* zNs5L2w0Dd7`#bH6&q1btL+jFaDt5O0&&eNE^mh{HIwJbF=P{Ky^@)StN!)D4Rm2a% zFXt`!)VH|%_x$0w2bcXq`tc-u=A{=uf6~rVscYxo+FD;Ry|K9eOy&Qtg_Y@+@<{WX zAi7oh??{|^XZp{}J+=Bz8_u_SqtugeypuYHI=@Psy6|Ir3H^){PCWdC8~?IJZ#eCg zzfI`;eLVfx`G?-5SWJ&ey8Wl>R}?GL&GASOeeCyl8joM!Uj3Q=^Ycs_PyJRWKK?#I&zy4YLh9N6e@Mpx zH`V+9^r~M`tZbesk2KyR$oD4RslTD9apsljKbS`<`n%kleg4%vbYVP0SJ0~a|H#jH zAoW1oKH9_U!S`Cvka;YoKkWSd?Zz(m^1QI~xb=VYAID9jo_+r9ONzz5|C&%7|J-N$ z#g4`qU-c1hx!s|g^{>1U-hYSk+=~8DebWhlj1B4FE&u;bCv|Ne@wmhl>9Y<)>%Z2# z_kbBm*x~Yhmi^aFf#=<}|6UyrTtzMbLN`PD&~YUz(^37+aau>?_+xC1ksVA$f6Z%Z zJ?7)77a_dn14*&K2jg+g-#;?n;?X?46Ta2M2D|XX79akf+mE_vv#|962cB@*8b9}% zXXoE~lVUMFCaV6a`W3~>^ltD-<4&-0{ZCut%4dF={yllb#&gOmyxIQ8iw)nN|ITn8 zanL)7$F}L#iQ@F9<655D-U;9G7uVmA{IJ%^dp~sJw$FrhzIgtQGmozB|NC?taP8v$ ztIwe`D6DdOx4xW2np+9lJlbL#Pwh?lJwNc?vs= z_sb*t0;$st>-1mTJnyuRs89Ue>TUnV^-I6{!1GT?m26&49`Sp-iq5O*?=Kp{v)2XV z$4I?d$|s%<&!^8!|8^cR{hji_A+h1t?fz4^W2e9NOA{b2Wx%(p!FAbGJpN`IK~ zvG--4X!i^ob{>28b!+#sUDO``jMx9)Or=k@JF&{=FShOae>Xgj=a>0q`gd|hMSq;ovohs!MLpnli#!Jz2WVx38yc6*}2aQ;J5vM zCn*+mN*5O2zqC!CUX43N?N6*sN44$qZ`{Ay%rDcwH_x^DPAYlDW?O9d_+umv5|3^A ztGur?WSmN#_G@`=I1fI^cy$=kk3F=_4!3+f>^%F@+ou2E3AQIY{_NBG;3j(i6Q_Pf zvC6eqKZtJFjDt;onf`rAw$$lg(J9T@89yz+_Mh|t;^Ssk>d0w!= zF8(n7)$O)@=>Cs}3BUTtR!hILy#LGrTDLPC(_a+VA0E%^!LXq`9Z#%9w*R%nQ#&8c z&-CxlnRWU%(>z)pU$TF&K6L!bIIc*u^AcWd(=(0YT8B9Guq*lSV?f$`OtSS z++ji3?lW5*bm*+-*)Hw-myYyh^Bbe*e^T`;idC+?lgISiY^jaEF+2`0(|;h3RP<*a zb0m-c>DSOnobM5v=M3jt9`zK>-yWH7@hHzb;nTm_qx6T{mYTf6x3=vKKbrmZ=Qmn7 zK)#*->DT(;y88V~s(wYWGTj`H1nJApztMj>JkI_j(|=ALsnx$Dd8S7yHu>JE-7DDK zTM#}-Um$*Am#;7Upm)L`uhv!`d36}pSo3{P-TmZOf?MIy-|ct*uGF>t|DcWsZm9eJ zw)(*ywVfbqtG9;6;l+8W#54Wp<&P@*yWE?de^^D3`FJOGjKo3WX48iveh|Lx-&|5G z%*W#R>yUBcxcb8vPkI0H2j1Ef&Ux;kU3Pm%dHngIq*&~_s{yswdGp_Y39DRtCyy)F z|I=55nf~+hj9UFCOFh%6A$Gj~Ar5*MRU-#@T$+q*%xU#hdBk~ z^Yc#phT5Om#Jl)g<34letP3{YqBVZsJ>Q;QcrW$s{KLGYShxfpQ)G5 zn03PR@PQLPbJo6Ze40A;`M0h0!Lh~t=MT5-P5TvAw!LV7vCZ{A#4TU>ydE6>Nm+jKdzjahckmng2#2-B$_I#>t-X+C?-!K(_rhg-URIC4%!sGd)Uqd%M zZoJ5E<7&U~K=zC9%+EZvo(GrE-&*&uYacm#yE(1(*7)=myFaiQ^=$heN{WT^xz-os zDhjJy)3?_+_W?G4Tez{E>7VA2TKzT8^k`@u(1}ePv~hZrA0a%@YaQ~;Pabiw#!p@y zh9lS9a@+x@j}ON!`0cUl9`_*Q+UMU!QY>s=>0gTD&jvhDSmhc&*lbIT`BczJJk!5m zR`l06b(l|G9pa$Ze6!I@5kF{kweN~w>rvPEqx4fRgsuJL;(K6Eob^N;42>EFpS>h$Nhp8mbd*%{yE>kO$oN9^Vo z1mT0mQ@;7rk@*&cVlm@!G@k>4@Fx8uY#DHiVpiG#7<`mE94;!(ZWPu(T;2lwfLS2uiN?J#BYNv?P9 zeCpZ$U;CqeDyQG3VS`Du;J13)3Bml-b>c^-O#lAeQ_&xOXE@)E2h{JK#Mxe&dx=eb zFzzGvW5*ch*?9EF_=<6UY=`x>`u%fXJ951+{f7gauXgn*)VJ#&hNOQx{^9`~k1G1R+?$>M)c+A+9`-tFNSywOgWgHpY{pf@4_5lq zeCkurkb3aIF8Ete?Lc>IZck@s?|= zH%a-*tIV%0@l5|Y`J-C>wT|i3(7K=#n>gqrKYfAlz)F8TQWrk;42kPFqxHAexaf@Y zCvLYxYw{l7UiP>nR-wM_e}WyCid3koN{(2u>)2XHp;-GhV z)$)54`U2sB-sSA959aF%vpl}S26g@(8|>l_7f=4iwPP+H3Z>bZbkT{xbasbC1<`xs};2UuXOOEd=3Pord_SPaO0v%2OLZ&zJGFuectz zUhF5Y4#VVw4thGQ^noz-vp-&U zja&cD_GI7x_9w++@4s8=`Deye6jr(R>ZgBn!;a&O+ZwY{QKX{3*5iL6F2{2>QBCn=VToGVXMKOhxXZj*|7ETcRanxt_R|`^AAJH2kDDC zioz<_)bSceH*C|5`Doot|H4zzUwKV=ddll~8Zsu&ALGJn{$AtSM@2`iYySDd$BvQu zUh5fZ9kYpd@rV76e14z3u6`-(_srAx-TlGx{sV_3#lrp^Oy`a47jL$o*73}C@~GJm zB<|I3w#GC4i%3O(l6(G4CpXgWsEC-FLr^E~6$=wDX{f0#7>>18&(W}UF=sb_s>pS4bbMQP{-1e++Ly4(_W`TN^$$+#M0vapGW|O_!}Qm> z=)m*H{JhKA8QVsRq5Ub653)VeKe~A*jQIT$UZ#I<&Oi_Ea%+*k;`dp@1o9l^Tmj;s%@i%ybN z*2fRVdEQBe`LPwrtHW^pA5OdE!s|QX+o!y7Xy4n$d(E@+|AR@f*!%Dd{r{Ja`W3~> zbW3?8=$)Yb{tg}0ANQT5uAuUo`$`->SmA45!hRF|oiO6Qsk^us zrYv*fZU^)&24K%*YxHI3ALi+J4M98pZx8p2mFX7E1KZ9&JIw=AvFSV0e_rmX z)4%fnYuZ=5{ZkM0F0WeZN?#y6&|aUNBCUMC(fN5Ne#0g7hmY@f=7+<-wg|me%;R9jyyVX;`9$X;afdyFy=-6u*R+L+wH`sR|)QbJx}z%J;iI>^zTiI#q{9+7nJ7}3sSbC z53@DiR3whxJdeyT(|;(>wef25;FHhm$iq*QVrgz72p_aQR3H4^FwXN%>YE>1v0J?` ze%%ef_{)+7%gUmk_KmR)!>%ct0I9_MNH)K40 z9zC{VT^;Hlj>iCJT9Mw-f zyZ*U9DHi+wtLFEoHh%OxSiSiCPpWm5&wiveW}M3-75xiu_W5`HJi5sKzb$sW|3NR% zJK@E)_1zoAsY`z`YCR7U$5v##It*L?bjFH})o%*v<)5GV_>~>{vE!e?q*(0ue^WjF zr#~{nv0jbPZe_O1*X8T9u5LJOvw1hH_kr=@ z%fCE!nX}Kd?^*2kFTGL+J<>;t3qU*AhE92@AZz;M`op*n;yj<3{_Q-sQdf25IEp&n z| zKX(3qpeUEv@31#0?!VWMUWW9gC|Ow_YNH!=JU{GA|4yD;r+?*lm<<`pwtuJge8J`w zg6IcYAKIV!J0tTgu6)aP!mrq)^tZ+w`j6Ls{mjj+vB%GUW5;9m_nK$NKmA%Cta<+x zR{6Y}t^JPv{tx4N^W2L5PAbO7^IrXiIv#p~#A6#j@86YpniNaL@5Jx_O$^7pamWwu z{#-kpK4#o)Cm%CJJv;u<`SAS8sO5GY`-y{APd^7?8>UIIFu!d3@5>#gKjXXHoB0O| zD9^_<#Af@aPB)})5PtkUFn$oeq0{l>KGjcN9kwQ&y1~@3k7#zOtDXOBYkjb1aU7d|45_Dm;*aabc59=d{^Q%v zuF~4*?`Q9K(~KeN+x~wjDVECTuV@@$Ro7m5=tljz`DOYK-@Nn5c9Bl%wLf> zSjWQ;59IX}$FaT3yUhA@`MTV0z00l9`@;rLynWS7_ps5}L;mn!=L6p5+1clu!v*dA zHC3DQZ%%DRvC6eqKiCWsv;G?++nN4z@<>I0Cw#7BF`ZKJTc36MYrf?vkACojRn%U`%`IOc^@C%>`5Y_EBC{68-#7V9Uz|Gd^0R(0*2_|VPz zPsPvcfO%*7&&wmV`tKrn@X3$)#6j<(Jakcn2mWtw|BTZbv)zMdd~D3itueQ~{O%hM zy8?ae`^S#-&GCVs_!eD%+t9k`pmwJJ{5%rt>0R#4UYGIjkJ{q5{+!s%5A=~AoB6VR z89z;m#hzcMHvM9QOXv^VZF=(Mx9s_F2)&lK8W&r!TfH!I^1A1K;^66F z)431axA!T_c+IQZ|J&zZ)dAheOQHoo|J!T*r$S`=qR&kKp*&Knf1l*RxAEx*U6jXp zboik4q5Rm79b=s5o%CaVY>&g$N%l5SkTF@ zRNQ}HlX%|3%5>AZW;=Pr`3K@wkN$L=))@JLSDpS%<)cg72le4K@0Ic4gYZD}_m9lC zIQ{CpJh+7Z)~es%_{tOC_K()8U-3Ff7t9J$L{mY3WtZy z?)d0a={xuJ8n^df@)gq!M!x@8q6_(EYhH6T@mT+<+8sgWnd$FxPeuR2oB8MG(M8Uq zwZ)F#f8jN+k~sB2>qGl9f8WS_i!0yqo$xF6DE(nd>&hp7dBwZJ)Q8Vs%Rlh~b$I{f z{z#zpk?OdLVrBX`9tmP|-iG>bvj1u1nYH@6Ji3Vf1v}$|t_WKnT=zoXp!K18;_rrW zo@YF*W48Lc)e9fE;P{hXzGGV0=a%btfBo*gsc+}sn3rODtKNSm@{^ZD3;xRWC#evL zFS7qBA{G60e(~=~O?UcnQa7HrjiYh1wQhyqmOAi=V|(g=T4%I=>V-$HNOu`{Aq`KR z`q{Vtp|||~?_o)?*m;K`wdu>LtteKx_Ufl!bi=M}|Ec6=`=4TjivC)!xw-J@zoNhD zKpga%Z?^L22R|6sH9vV;kGj+`n|K#~IAVpL{r5@l-XeVRqW)vf*}A;{&ml>%m=3*a z|F8f4cjZn#l8x(fE3;1NVS`=#VOHbM zFR%3Fjl-ZrcyH<2ZJ<{rBd~ivBM5W}koC zf^~g(H#}~<$WLD&Jdpii9LHuH!!9}-FX|)w;rf3y&)a@>t7pZ9XU#u*BK2+mKbRB? z=iMv&4INil<(fL)i4Wb(pNb!wd1w0f<&ld1Iu5_%fbU(-&iF20mq;A+E{`tqzU>OL zd|m1GX8d$`-2CLdMHtro;JJ7F_1c+Xy$7GV`J6F_qX)-7eBTFJ9}VqKQLJ)J9kBBM zJE`Wk1*x0q-=BLb`a9uO`oJIKJmEJl6Qn*!U!eJWN9J4HNt`@iXOGe!HcOIE_Z%}W zY%}@c^jGhhNSa^CbidRniNaik9f=N(fr_pSNso665$AC`IgPK4w2`>*JE5mu&K505m@334Ac z;-;JOd44nf=j0i6`tv{XJYQBPm2s#~9Q01&W;3oLevp1_f7=*dpL&MWbIMbsULA&y z9Q?;w|JbA_^v=9)lXvtiPd)qotF84wu0K!JuPD}==s#8Y^x05b>(XDQ|GYd>r@v%n zaVL-Jkb3Bd4e~sJ_`9Kv$2>;aM_t`8=G4P3++~%|g|VB>n(@dE)%{N=DHdJ_=?%s2 zPZyd#y&8AJQLIv@=J?@p+ipAJr@u`9`MJmHJBg!Hb6c_DqpzXziGx1!QxAj(TD@rd zfKNTcy62;dKWuy4tq-5R$t_{qhi>}YnO{7S`u6*mMp7)+&p6%xP*+h{<(fJmx?x+r zw&XDHO#i{$Q_e=-Vy-Bf{9%J?Qj~(?Zik0cp;E~34{L1-f<=K4N!q4;{${*Qy zPWx%tIAju{BYF7tRe=BMTPhy8Wl>2OT%E`ER1Yx{5F=&C4L*>)zilMNg5NLh{$Kk+H|syux*DgyO#i}D(VzJZN*;WxM;ugM^P9u@ zmgmIp!Bp*DL3lBGr#!~NuERQ>`op2$J?e}ftlbwry!BUqzSDy8|9=nEaoWSN?)UGp z4wALV_CH1II7Rx3Fw?)0Gb;L%S40;%j}YZsAIgj0FVNF?nvcI5#(AE;=mT4^TfH#r z;JdD!J>`th{KmZlTTIxM?YwULul@Y1^G%|~Yag7qwEmUP=NtOW^iT5=75$mtU>+TX z{(}WAX?|Of`k?ipblIl43Ex6Wfm#sT1+37q-7{!$04$(BoeTS3+b=yaSKJ#9Ylz%Kl-<4aRd zKj5lQhY#Gj!(%%g)8jSI^rwzudSh_|va{fUNwmcMG*cm%pLr=RqJMABtku8U{a2lg zLp{((e(OW^fd}$>1kJB}>KQJfKODK@U*Gf9H5P`C-Soy~A3A$3^-TXpQY`c_+T$O| zD)&c~>z|{#%rD#i`|{jc{p0OFs)Mf71HH?uF7moTFGYBuZTIk8k#5J+9vh5#qx83? zUbEo}SAF-Y*3^Z|pEr0>`ThGLNwM%b-f+WW`#-}|U$G#MutJZ@^=}O!6KB4e{{4BR zR{!|^S5E6hh>oDwxY_8Xh#!PcUuKi1_5NLdIOgXMeeWkn%nlcQr&sT zt$dFSE}_3Q=E&o2xbc10w#Kf0{4wAD$2+KN=bs0XVzKMrHh%N#uY{fH5%tsDRO>|k zn3w54Cudgl*F1hlQ>UYMd3N?a@mR&@1icfi%QJs5dbyv%4^n5;7|&PxVBTSe2S4}J zM;G27cKhjdUtj-`e*Cumldrg4arm>xOKi8G%&yekY#q<8Ka9p_Jg@#t|9MHaRP@(4 zuOrhV&Dj}0Ex?|4;-GgDH#?dae$d|U;Awu`FLjsHA7*cL;xG63;__kdi>};a-_6SX z57+;6q;Id7{!`EYwAHUDR;ELPM;dp6=w^K?FSaxN=jRy}{d=^I3S>H^;(yEaZ)@F- zppB<}cqe|tCG>|wrkpr*(!ct`!H<4${BPDO@4v+Pw;>$|+_Cuo<>lG#L1Dd#{(9dA z)4Q#8JA%wF(|<7cnEuR5u{mZq-*iuh`>6+dH#}~AXkFvYAI`%MTAtSPio_Qe!}`bm z@z$50xLrog=FL1i;;E|_&zn5Z>N$DT^*#Wp@G|{}@<*n>6TesFI$rv!>w`E*zS%lY z`o$0OI%fMKAHLbz*OL0f``&y1_nz7?4ey)w`Gq(9V3x0ZVFEBfnr#6chVu?;&(u^7K0Hhr3(=gnyxTta`?`SJr= z@A>D0VW&es@q+{28PLc6|2oxiz?Jp>V~nRLtm@i3@ikR9Y~}?rzfAw4qUp~#)Y1RZ zfN$GHN5;V)Bje)7X1+X+7H>#B;^vR~snl_rr$}BMhVA$J;$LolPOnZ_WdC!J)}ar@ zy8j;+cA4*WeEM&Ct%tv@HuKT?nf?XC^oI}jOCIy__(AF^ulecWe9MEUX#U2?e2d59 z)Y+r-hn>&e_>|v|n;v%Sthm9@xAyj$XTLuhN{YpbTRPWaUE*kH_y{H^g@k6rcPJC|vVUw+@^Hd<;u>e%nUhLU3Wx3~ZJ_%qeI z^w$IVsDW_) z*u!pmZkM^#x9z_z{gvD6$j3jzD%bJ$X#anz^Pyh+yQWP4PF|%}fBhX9^YJccm#;JW zCk}d-vle;XxWcsKZT5f0kLGQD`X^tJc#QPpBR>9c$<9B#fA4)~hbxv{^Wy7%I3FIz zpK(2G`h}m=ER56C`yY}IQU^QJzc(*c(cj4cF(2D7;&I&t^+Eaq@pnV`hK#qE&kN&( zX&3MO$muKmIjnccmD85__6}a_+4cX_RZP>QSZw=ls2%adTW*)li4o7K-4>*uO#i;z zQ>TB|-ygX=yWGyH>s{_G{idJx4G-k?$vn-^JY7-FjyJ5{V)|R_Y$NzN6?A2L(rc{gsZ-xYUo4`4}=D{b7S${9%n3ZoBL!58fN5oO8qe zo1Rnt|H}c=Km8a^EPns8jcs>2e_?03>9}U=_{|N)L)`M|PchSfUe2`fTyABy#>L9) zwmh=;-BfKSh;G!u2B`<)?}qRVb-aol^-)(htg=tB)s=0NBr$G_n$@`+x~}S zKDHlX-W~OWst@^wPI%4f%7@n&+0OKzpGVNmyWA@EZ|)#A^XaC)@m!QgABylmdw!#M zJTI?x7uU}?VYS2mbn}D%I5VvF_k*_^zy1Bx>8d~DDkjn5)o=AwH|%(R=$Pq0n3t;6 zU+dC8`)$K0ZannXxb+d~Yy31R7Wy=QTqmxF4R+zT#yz;)@2`1wc5CI%5j(l9Hm0x2 z`|nWEPWig0uA)S%YpWOclL|MUU#5RCQmy`a9A$b?2Xs=GI7lB}<7R9B@IbZ)o>%iT zZ|Z zjQmikqQ4V9*D;zNjp20|k2vTx-|VP={2<#!To0RihSbw>y7?I={AKB{&i={HL*XBL zH-EF*(bK4JzrPqtilwsukH!&Jx%R}hFRnjlT#)gYZ>E2dQK!Ejlrbzn72CECr}m&= z^BaQjK^s^5Gk@>Me2XjJ@}2N2_9*>f!|&X4$J;v7!_3+5`ic9;nqKql{7;$`3-e1a zd@(-(`T}|x(wF8-R<<8kyYl;A{JRO0`DOYSk&6CWKR%CPrxMl{ic`Jg$MGw zglGO}p45eBw)WA@A3pKUAMd&EmyQT0ov_N6UVC904Yv|kM=tt*z6O857HNC{&Zx% z#ivV?VkBON=wnD7=8vsdR|kJs^N z@U3k>9^45V9DT&X^8RlJC&gmNJws}@YJUHy;|Z%=(`V)WCvB}q-)d+2_vVpW{re@4 z{%yaXir+iM0m0_+g78850?pqTnQ!qPiIeADjXg?#Snb8#jvYVeXJPfHH@a}28^1?g zKL7Il6li@EDwR4(7n!a7Q^#u`d=E|?+iu&EN!?8UzTER~>)$VRo1YM*K4|^*43E=4 zycpv=?@ZgIdSz zQToFcAO4@)kGg2Hu=N24o^aV3KPTUg|9g{SF+KSFo2p+?taAG|+sUKK{b$ z|A9PWpQ)>T+IZTBcRDV(g#Hk|w8DRGKKB<2>*3 zZp-b%yWGlbm#@q3JL5XoVcn|^U%T&$ox{4%{p;*8=a%<>J6!VZcSn6{udK)aIzRNV zzEa___D&u(6^*C5o2~Io|9N?a)m6Wbkp6l77)Crk_rV8k+@7RZ@OQ&F&oiFZFe>BQKe*Gy2fp_=yHcO+Ca#xmp}#*(lVY*=2dB37Ay&B^RDORAzb#Dq%=Dk1dn)>C z-KHXZ8$Xpg)<66Q#19{Y2b#ZcWWL3Nf``=BiZ#=C}KlnjUToK*i+jiSY ziiQ3%{fBag>F)x!@DH2rm4^Q&!j;(FL%4WD{p#_H3K|NGqoq4D|UR=oWK zM^KmhZ}52uBo7{#L<@cp-Iy=Bo1IF5O#hj)wOrxLpPguDt>JG%k(cY>hwn!_}GR; zcDDc0`uy%5KFGKr{%#20kny@~=XG_%TE9B*!S`KoPFUy6fq_}OyoE`OO1P!I`;khKvFDr{`U)^ zA~ff(dFwatdV)H3{yEk9is?{s{pSGN1t_d?{$@LQH2U+O6WVzOI(wH}RekM#60QiB z`m4=+`Q8B=WS$`YZV2Cy@p;~{qdw~DhRMg=|INdGurN%%@NM6{L;%b!%gemwck6>>z8WfJnZ}5R3hxJ(>c2RM}F^w7q9X2y`lAA zGrvs#-n@qC?{cf`i_as>hu5E>E9eQl#XkS6`P7fm`j7OEdDzCMFHrNt2QOZGqigQI zD|~eNi;pgNWoh_){>27Ux&6jTY!{%+I@3w(#x}o$Anxg-Elm2&^zX|(75$lSV>sX5 zCws;3oucxZ>xxZ%(E8B2_`6}8XX9xdv)#z!#`VL-_y29&CKuflX72H)>wft2WvFlZ zx22BFbER(;&wy=jI#0$GcDcsSewh0BsY_p({{4Bj=^xFjsr|qwpX~sDjEsjL+xqFv zM;&4RULA%_PdIpo=`&}^DCO&%&tG202h(ErbG5xsGd}qse)7-_ zJC4&|rvE@*!}NE#RrcjwZY`oyLu}g~oZ5=K?^7SN@l`kc-7wDcj2G*#{%-X``j7MO z`Oc~1!;W)L{q@%4pQ3*4{{NBvr)6g#I=mkkGQMJ>i`9$!Oob3{x0(KP@<^@z^CXXX zneGkod#5;0uz86fe9$}b)1S?wBYt=>8lU;bb+Eyh7x`ObS8d;L)l2KP#_n_VuRnF; zD_-NK|4>pa_C0)C?Q}SbRc@!T-E1hIey9`Knf~)~W<`H^{Za>AOt)10-YF`td92v* z!B|K0Yd&_2ah_Mc`LRLz#s<|Nrfhfo*0;TTW|(^HF&od`;|8yB`~I;nDHgU<|K4Kz z-{0ol&^Y5Linn||D#t(V;dPm3rvLmrQqkWDKi;3zZU58*z2=)8^@|_0_Z4`Wzoa@d zzMef}LvilwssH`EVmJ@J-ntGAZ&l~>qaB=JDqyKUi-6 zoPTR1#bVpp*d@IFd~>ZsKdIV^Yl_t3SH! z`;u2y3htcdli$7KDX)2^f0`5v+n0U*)jrWtSlM>l;F0F(_;&vd`cWS5E6lSsW@5pp z=ue&K^AzJ#2VJNKdMEwFHh!=Y*Ky6SHhji2bV;#P;*pDN0uMd^mYz`m1fak*EGl|Kd?ae~mXcN{WU4 zZTwXH=s+CAUx)BOtJ{voi~99$__$_|(jSiA^HazE@vAq4PmllX*KRrfT>7)^zhCQv zTNS@Q-8LRaSk*N=bfbRV{4)I;d6kO(%zGe@miuR`lNN3}|H1k4=E>qmSJ3*o0A2!q=%&IsT-d)%5;voupXci&b3{x9bm`aBN;$mw9LU7oLj# zI*-cv=Z4k;l}A0$>$qkskN)t3Y#;P#e)6;)b*W=^G#>dehQ34I{_eqN=7-O(^SfX6 zAKRb~-@ouZBuGB*vy7M2EW|6s@Xwvj5Th7{IsfpfQ|B9Q01&W;4Dbevo<4zxm12dentyHt{a} z)}-AI?|psYds?gQe%pEDcU^}%Z2!EE88)K(&!ow)#k*mq?T_vL5A>4?kMYnk)4wl| zRP-mWA$g`pTkHy7dHn7GK4{}=UHsiJ&ht*k*E(jazgxYq!HmnsjQz~(VdEFKyMOB$ z<@b+#{!O*MV!E)n{EUUXSVJ`RV8Vr?KA# zg_Y^n;F0F39+mfB<=H&c&bI%7JQJP0%dO0&PQ3qbYrLbVym+48NgO|Rj25SERF6L5 zye0LA-9K{Mz`(0dggyR#-q{}=xC#B(ZcK+>>EEvt{r=I3zi#a{&UN6}Ja5*w&NtJ4 zPF~B#)qL`rJLl0wj{h5C)33g=z2m-l?s~ z>yG-M^--fgycpv=8!ze)f1DrNVdWh=Uw{9+e}h$M2F8K6isC^Izz00dE^1LYzJ>h}g{l68gicci+9&z90WaSZ%pi2S40chI)4Xf%=N+FHX!)03A=?U=l6(L3G3RS}&eo zrvG4Gs-nO0`5!UUqmi>SKIjV5&et0+@>?IO8$6J{;F*63b@nyH90o>Jta) z$82;|#1B%(kok~@4KATS9C`2YCm#9u3*o4b|MpA&nD7F9+VRg&QY;nyQ}ru~m2HQP zM}qX<_5Nepe{0ODMPjY~@$pYX#{sntI)dH_&urzap_vV|v_dI`k&R;<^6-eJM(`T*rM@?mz5=>lK-Irhn0~jT`YQ zzdw!oi0e@|_FI3&XyyBY@zbPOjOS|Xf79O@v(HtZxpQoPYwSix&42YhA7VWF{5z=Q zg4-TmEb;4JSml1$Y$uP$f)SAJU&NPbV_9$Uu2J|~W-Ykz>;JQN*!hiPue<28j(Sb*|In8d3;Sp9^!U@MU&j@zx@Ml}hHd&s z^UL%vJazihhv}jDw!hGR;5Ad}2|j3jC?9_}jPtyc`sT-0>{c)A^@*P!a`-hDjT`eTg7_i?<&4hOj}{LgYn&J73s#~0pt=j~IOkL`aNNwM%cNc+?-RLc5S z6mK~nv$ZdNhvl6_3_D^o?@a%~Q>Q<9@X^gs$Aic3p5TMb6U5&Q;TyW7SSq&qyVVO* zx7_Mqt6e`QtTpYDOTYE>!(Q{)|B!F`Ust^S9%1us6vLPM6`j9$v*!c-^}b=)j1zyq zk?G%?=T`KO>c;i0uJWw!$d3Kw;qQjlx7Ib=Nk_5WtzKAtw^LfTTsSwZG5zsRkAM6^ zbhht*Q?09*URwPB-Lh==hE92rPRx(*4SmET&bXQWeR-ui{rMdY^Rn$A&9lnaA92ve zaRtq`3;ggvn^!~l=C9Lz3H{;l_dfU4AKi6-IATh8?+q66 z3ai}S&8A*_|L0T~BW(Zud8AhV`1-eo)&ZT^cKzEc8drZs#~z8Be_*(cALRM+PI!h| z$86$V{Nea#m)Y~2-?hVee_Zhpzs+3goBrf0{_p?(Wz~TsTWa;!J}P=deKyr@-j`P^ z-*@Zs%-=sU-{MZ*_eoz2e|>b{u-n}m_YD1hdDH(e9j7tk;Z!j`5w?a^@AE`Ka%Z#=H!fu{>tZboaxcd*%`l6K;oSsaWM8{Ghg~PenaY5 z+;y>;N5rFE9fleEzH#Fpe(_q^?Bu^av%=v+^kK)J%ug}>WAXE&@#tl!<4c$2n*N&& z?VtWqwKb3aGX3Y}k&6Dx<2qFMwx90gSy`UT*BK5LkTh=+gb&geX#T#D`4*=S&QHQyYPgqsAJ!M4JE~5=ig?k-5X&vjzmj6*lwwBx{=qG zj7#accOX&^8*Y68$zvGfOUFhGg zKUBWuN4jPD59I~x^tXC^o}eD6_0W;u-C%>%0r7W3_=eHA9$T?ny)bo?$1Zs6Cntuf zb56MT@kh=ezjFOyp-QI9Y`y*=O^T(Wqt0KV8DH(n|BsWeab8!gF_WZjuT=1EI5yR0 zUgZ7T@4p=xr&5>xbX?}+vB8)(N`FY7fBLL@pG?B`AJ}}s2^ZeQc3!*x-?6y=;TKHz zG$|JE1miXR+WqJ0vn@>G=$PqWFzU8{j_XaQRO^6FYk|jbH=91{`0b=v*e=LNH?v(qv)B3J`omtA9RK3W ze-B~Lw>`AdkLNn-bKch8cl&gl9g6G!XW2N~zeKBRc<4rbo0rZLT{8U}c@@)N>sy`i zd9?gI#p@NMPK@5=S&QURSFyrR3%Y%tp}zI+G!9Y^TQRO1+pRS&I^+C_+wCCL7TI6# zFCN>jQnknF^`A~{P{$WYIxx2Ff9Sg{yiEU2{s{fN z%dJITmt)0d9yTs@6r0LxJ}Z3hqCB1We`yKH0?_aS-C+v6E zqi_85#_?=NcKp+je(gA>%7b)*`yCqjl&Hen8Ro}UB(Dy`>QBz!@XO6+SmWF4{P_BP z|HEsZ?SF=nVzKiNHOHUA%5-x)5~Q!D>L2$(oYzC9e_x)7&fewLBF|&|`-`^5JBm)Q zGEU_8Y8y6^Vj+)y=+FH6d|958Ja~)i4`KT9R}8kd2;0A${6E~id7K@X@Q z**w{A+J0wL=e3Le{v}siQLK(msa^U13$3$xw4|cYf2e#^tH0JU57WIN_1-Dwf_(2! zK4|0iNFM%fnAUkG_2$P`?8etSdE0yD9z1YG@04vmcX502Jg;^3{@c#tV(+t4^!KN^ z`azwCcj9M_T{-`dO9uMUXQBUa8L8;6{Ctie-}cje%2%VOcM`Wc`mM{)B_IAuo$`4Q zK5VdyzjxIS{p627`}N%3X}`RBhi@*p13q52elUxR;TC%St)YHJv8rpf3v|P_e?E_sZ#=p1c&9ib*!-p-`Ji{=H)KBj;wLXf>%S+dPi?31F8;99n|A$h z-$xz~TOGgc)>|ER9sKtGJEA;bOKmqE#VT~u@yyn|Cg0nr&2~>d^DOiqDQheGYaRPB z@{OlaRuy%wTvv}Dy(_&Pzjvk8>6h2H@#ZmX^Px{er*W{0&YnC_^TV^>JLh$uIqrn; z*F(0x@B2qzOkVBrXDf?KI&blI=9Aj?`(OIZCBx>UcA@`h8L8EOYpFAx=%+1y_=tnv zNj$a5ujt=cIG^P!pV`c7QT@svW?p|z?|~zOVdm4 zCaoGfw(&IXLG6cp`ZY}Qn(xlyLOy7H#L>p=mlB(NFs<{9r*&qlpSn8i?RnoJvp)NW zq28Xaul|==y`BGGes2~Rt|K;Fsr>udX*byR(b7B}kN(8l*&o~SKYiv}hpjxt_8*z* z^f%rCt&iEAalG2hm*WO(kUl~D-H?1k#%CVbNgs8*VY#C>Sg^taPlhSeci;1kpZ=xS zI=lX4B#R5j9ezW-{<*DwknvM=l4rkvQQMwBwdt?Wzfo3H^jF^Iq%t~YdyUyeKImdN zBd_^uvAy~k$MjQP^AAZrc8XS~eDF@x-#hh`nS0*3;2*tH|9bcpTi^dPcufCM>Bl~Y zwbahXqgaIwIv=y0JmR`d;-EE7#|G6O*8J|A$s6Bzc$hJ=+9fwV_!h5mu7BpZz;KHG|F!msZZ>`+ z8@CwWG;v#AuD0egze4}WT&I6?J*gwVqQ4s-x4gt}ee~4iYhS#sj7Rgm@)=IhALj0V z)?c5v;cekVuk^p?{(;W^e}C!Abm&vN%l{8DFUc;pS0|6qjX3iHnO~uQyR5KrU1=4z z&aWB7CZBpk%_9y{@14|}U6-G0p61u`Xx#X)LFM<@p!$24|MV{NuKBNdz03dLuoa*E zSRXug{NK*vV%z_8J^v4{qGWZ}@PO!sZQHGr8s=B%-&cAn`fFZ#KF4%#2oL=5fnMv) zPWs0WT7SxGe(IDDUdA<>c$fU%X@i@7{aX($+q>G-wa&h1kBg|c@81WrxNsa%+21I? zWL4MRi4WbZ&s_ZI$NUQY`^!kJ{%c4b`PNTc{O}V8y_0xq<4^lA-c-NTR$j(6zD4ve z_VqulEH1WvKqMYV$uJ#7(cZ zWHP@(|Dn=T(ceiPYug{@;<4AGjt^gYylQ^*O7e-UcZx%T z%`*kz2k8qme{13S7Dso`Nj^Lt8%*;Oe;9t-=eB$J78h>$__qcRe9H~g*M9%Cv-|-M z`whcnoJ<$vTT>j?XDhKAVi)=^C?8eySN`Ufl4p82v3-KXLE>gBAN}A5(|MVnx@0_f z7{{o28Z4K(lKHABewh1 zivwGEt+UU+iOzm3t|7=Jiy3=Q&|7aPh=&w3)97}#he}>F@8h* ztc`CA%}?@(qhs$RIZtVzG{5p;r)d3B&v>c5i2iW@ z&PQK<>*jO9Zw?(kdcSQ);kW(&i1csgKevg$f4mdDK*=h$TgM|o?B)!$>9e(PyU;&o znEvD|avmF9=oiGFB7NY;rmnJmF>XWptkgO28&8c*T^)wA_TF^Hj&GSSY#sf${)fIm zc+efBK1G|K{H(>EAN*{$@D}>VnVA03D*8hIM(rfko)fV-MYi!3!ixF z!6SQh?*Bh;;p61;^`kTWv`@+Iyna#}-Mo{C)#qZfgV&`HP`nSqQb^3E1JM*aM?}Udq=o7!`rQ?$a+I}sGTb=U3J5hh{ zMDr?VbG5X{&@Sr@&*s{~FEWvdG8(4asNiojj`i{;MT^#$&#P{_QeS(chKc z;=GR5nNB*MJr6FX+3zzfFY(*BJvI5c_`MS}9?i#ZxR`$U!aciP_~05}of$^oGxx<0 z_t^g2u77CB_~_&L{|m-bl&sF$=I6wRZsyO$@0}q16#Dm-o{IiD4!>(8-*j&X5B%gc z|Db*+Xm!d*zWEo7&%+Or@0HIGzQy$SuJrOFk6m%<+Py3N==VoGboWW{+5QJ!#e9`` z{oy*~(N9*h5JwN}y8UvYe}73;(chKcV*B4w(DZ1CZO?-nAGf^3Z++;z$OGw*JoBT6 z@_BGE{qTif%$WDYkG_0gxaWWkzw)7dI`_XlD2t0-chax+W_tfgr?#S4owZj#{a5b4 z-I845%(u{gpp4Y&zq-`Xzv&qo~eE~!H=Me;!Mtv{`=*#DwG?DP1Umo7VcY1se5 zheqFb`7ZF=_7A^eeu7?q&U`_gN0uzcH(dhQ-l^RdWZs4TgQcfdfBhUs{(nsWyzHXr zL+9mPX)VNd(Ou13M1Sv;>FeF`^}o;Ro${XFU;Ohe&-Geo`VVGt`CmN#87c>?)jz%d zp{4z`6`df@59kv=wxRM^ej|$u`kTK;{Vu@?`onrJEP3ngkK7&BZ*0B(&)vanPkjF3 zb1!IpXurHKOE-mX+Lzhd7srjn8Hf2{7u)}ESzD{W@>w5wnO)@P5o|lO>#yz-Klz}I zoAM1UJik)sbUaXbJhmcrbr>3ddSa);ubmb)`qO1kz4mzL`rCuExY)dMvGe;sUoID? zeu*wbe5#Zc_7;Z+n4#N zbCJ%rH^%jg=?@sXyW@ft3SMBz{h3CmVBtg`U`AEZvQI&1uu|Nl+q$Man1A0sxd6E5S~RBY2R z*Es0JCJy?Vad z^v2)+_SyE9i$7h{H(2@qqlWU)xAGMF$43?YHLtmy)R`VyXZ>g%agaXhkUS6_$TvUz zI_blM6ZD7mp5JeJbIrrT`o}%6a5`b(G5GDANm#g zw@Q{;{in$orbk0;<8$NVmY4XgkDi+RT>Ss>!ex+Q0Aozs}p9 zN433m&7@9MYDe#bw4cXaO>t6urjA6>t|^g2Z5Vds4Y)#my)r?#S4oi#jQGnbfm zis(h(x%dnH`^rZZ{mFBZSJ8tw*pNKF_aq;rFA#q>^g2FuwYKJU;|r^=e)H=;@WY>m z)lYxjcXzt>4tVYQ|Nbm4b{=}wc>duFY`2Cw9`#bSkmDcnK=e|((7!(`mOA}8kHNfb z{B~JY)F~fv(7Q4^=5w=)rscz9`K|GB^C$iKlm}apx;hNu>4BM#{r%#wYREJ6(JA+x<7xX1?f!UFbhha@6Xd{{9iZ1nvCWFH5%0c7P5x zuIg$2!I)mze_34fy^}n{3Hrn4ue)Qz(QTItTWtQF>wog6o#C_NpJbdo&*Fk!pm;mm zpV>|x+4bjY^L~pC%&*XYu&k)lKRsW^_?7&2$x{8km~j{v#4lBy*9m@*d_yM{6Y*arhi*1>95d# zsPt6ybdtyW5&6cGYn<(HQ0(T%1<41k56w4!zVLjD!w)*iH$H4I%}e}Y&1Y9R^5t1B ztbOYl{mXp4bN?~UKew~Eu-*A(_4@N%{fc50x;1#DaVOYRv_3VD?U(r#`VW^C75$lq z){)P6hDjdRJ(3SvADWN98>V&MmECr>JMT)XuwA*Xe;qMSczU-tjSl>!(c5#;_n%+q z_4~kY*Pr(ZkLj^e{QsA8(aq3lp3XI_KY7EF z$9&+gpyo9{J3haz&iw6#=UbdUKqvV&J~o)@yynEb7S!kx7T+re&$!`9~mn8!?WS|{FDcN_&~4qW}}ND ze$aRu$#~Q$AH0lfHt{a`;X`{IecW@OpC69eqJK&E{?7Mbb;mz5_5OSCDrPkc{Z!6B z=Tg9an|T-dyE0OzfBJrE`-wi~i`7N_u-MIu1>pz16Tj(}tKAkPFGb5|{!Z&sdlCI% z-M62&#vP|T7G{0pj~{&Jlbd<1tNr}D^~UA0iOsvU@bSglc|D~6Tgtp_KFU+*-zaM< z`ZK=~VYYh4Z;RjfM#XMkKR(~;;8Qez|HAVvp453K`8F;#NIo{G{;aL3hRe8 z+jBqj>*u$C$Id^GWO1?m|JHi`GgrT&ScPtmM}o$u=Pilb>pPb`=2z$+J$3qX9+iCC z4*G=0J3-=L>ZdP|JdoFA8pnpuu#3)?=d!rK6Xt&HH=CS3Iw!pU?CD!v|FL87oBj>W zSIm#Qwfq3^Bb#?i<4#fg5Npi#ujlRIPv_^gzR*81*6HsT2(#(L_-*lfr>J?&L9xjP zy%WFnldIh)NM4GT-=q0XF|`-bAEv+Q{6Fus?8RZmk-uB++RfI7-;RGql^<-WouGIN zy&F8zJSSMW{w>#dkD~Gw`nO9@o&J^cpE@4ndRJCgKX-apdOLpR308F0e%Q{4lkZ*0 z)_J{8)Ze@8RfkO9aqe2ZlTUm0&7a)h!zp zy^}AS`K685`V#Z8{ZBiKiyeQ?()~ZYijvh?r~ROt`D^A^=-*$m)at*Hj6whOYv{(u zEids~AKE{8Aln6b=4al@=fMg3d)GeuwEh2n!I{15yy15zzUgnzFrFR%jAU`K*TLfc z{#W~2jN_kya=?oI@TB*DXlQ?+6C1ripZJ+4NFGQ&&#U>h4~r`=Jd5iO2OPM^X>a=0 zU^sC9Reycklnbfn_rHt}=H<;lcYpc}8&CVmY8LXiZw5Bwz{~uK?SHVOs?)!Wm5cGT z)NU(kKk#`cer(X{lYHWJnAX8>ejN{g9S{Dn!lt)9`~OaQb6DZKSFgI~T@O*$oR`G~ zeZc$x{rw~J1{;!}uDufv$Bo#AI)9Ew3;l=6iaP!IUmg0l{X}0`)p=gXYyMXKPSENY zR}nww39(bG(|Mx)uJTyC1zKDqxM@53Pd@O*$WyXu<0?D&Uy z4@fTc@D}yI4rX!TdnX_C{tr(5ieh!vUj1ORB{AziPi&1B`Y$LWb^24^ zTwd$*1XTy-M;z+gD|X`xYi_yKYpY%}6lVPT^>co*;`-Fv^@pm9 zz0UvR_n&biS+UgV&wX^6mu;tcS=HJ95C^?0qlW&lpdQ&XwmQsrT5R@`M9+ z`s(@X9JnAHaL)7(pY(P|p8fu!KZ}c9e=_`ynCc`JUG#f7?Bq%RlK zZTloYT(IZ<=WqDqIpMQ!`prLnIAsLC9e?&|9XMrz_uo+6;5Vc{#X^6V70V*)pO@K1 zejcH+zfN^CI~f;0SjU%cFW!wG*X%{~hdpPmKeEf4E)09_x?}6*ymS2t$NzoG53Z>9 zKhM>#DAqsO|Hy8|^hxK3UFhE^xhncQ8G-i^(b`Jm`>Cdi%XsMl#N3jYWv|qEGJVH0(>HM$@ z{qwS-R(}=9bZQ8{>96Ax2fdR#vo()?@Po$Js;N^x55k8HcJYV(clq+$9{ki^q5X~n z&iSMN0DN}+kB*N%b@xBal7)F0-&T^4`r&2Xh5m8GTK&`e4?E>eko$cs_Wg$!2p|2V z=#z2&RXc36$JLWwH}uA^{lM0bBe^U>*iPJ-&bZ<^mkHU`Td3JWBEE? zbOgQDn~g4A^69ryr+n~|XEt%h!3NdeJL96AZn^%>wR&d^-hBU2mkd*H`nR&Uu%GZ# z;`fiYkq1gvbxj=IDt*#lF13aJ{pBOm)0I|XYhH6Fu}zPL*tY$<@o~#b{P2O~fwq0< zxaOx{<@4ZT`r!+&e)8By)}8Hodyd{FyXN+<(U0jrn#HB^{>!+ElGRyz^@HdJe>y+x zLjQr1rJ}#`aUXm7x9y^@tSahUx$c~2usj#tc3#fA04~EXVafvzfW!Im#bzH?f?tWezwt2TSu6>Es*E9U7N4{4+!(={->JQs|`o6V4+n0x}9=`rl8y{ld zt8l(KomYN~UVqE@iSC&$X`fBi&HOqq=E=MZ{fA15TK&8I{!#mZ5A?2N?Yu6mkGlLk zrf0sb=oB^IV}ld)ha=9uXN$*wv0pgq;IA&R>)n^qm+gPrSzN4-<>LA0yX<+%HSPq{ zwO7BL|Dn%ZGT}w1LjU11Ql~%Hos(~`!#?GMKe80x7YvA>e2{s9=5H-L-{SCtPV(XL z*kGEM_``B5uX_KxmR=C13>XB zADHp6^Wo?C-*g|9KK(o5^$%~e?XIPM9glkP7CLHQ{0(-`k34th5}ZsRI1c_8`lo1cD^&+=;g)YT#1ze{&x^g38PX`kE6<+m@Te(F;s zuc^9WdncLc{0jX?%ZTYuo+7`aXFhfNLE@m-db80>5kE*@p!umw`l`%}c$fUJ-nWiA zed(WlA#8BVou|xt>`Zv<`>(z%E~dwt@c`&055Hne8nc4OYaHFo-;m^Ve!Y{{hyfc< z>%C&-_>=j8nulH>^=6Z=h#$1)wUxz%dh6f)d;H;whmX8_-{zb!zwz>$U;AZee|?bh z=Ze~A&i#K9zhqU{-ieRTzswJ0KZafCA4jUy|9|WJLqqzeKj!go*Z-UQp85W(_qii{ zck}b!{I|h5jAQS=1KLMhakipU)aOO4>Y9GJZ#TVH$P*Yg&H zSxcRH+KFo(44<8UPWbZOimT6yQ$RM~Jd2BWiaH;$3LVwoTzPyPKRonV=%1IF|2F-% zDA}spbE+fbcf+*KyOO==m*?F@)8^fcFRZi7NuRy*ll@`c;s2R*>B#Y3>rMZ*@_~%g zQa|X_*0@+>^q1S}b^6RTzb#0gh5nJTR{td5=B?v;Cw%Ounsda4A581;OLq17BA(WH zClz%_T^)vv|FFW(&%0uF*ksd7PTYC=R`78BC+Gb^c$gn3T@-pZcqC|i>Gv<1XY(15 z{6hbB`N;G~S8$|^cJ#6L$98nv`8E2iLmO9l$zwj)DW-Ma$++gnR!r+tJIp-%T_-== z`;@SZ*qRfUdfqZ_t&l9Ael{{3a7qCY%}rej`a7x{UF&UobCDt_`o8#m<} zSa^P=&gpod@_1~pi$82X`8$`+ecQ)Ee&Kmv9e(s|`m*EC(JU@@9-`*_gJcytG`hRlWYFp?~u|EO}%e`cal+g-Q?mg^dBrA zRrFW>rjEzFcs>kWS>4%>Se}b+dw=k1(;u&I%Wp|Oar0{*R-f`4R(O>6V=s#3u)alRb(Dcwc^z*K)F6u`G zYk&XiBwm?szxd%#k$l61$76#%SzJ6esQ$3dJMLZa_A4F>v#0I&x=Y@29lZAYBb^8H zG2QYkE^K#V74vpH62xBg|36)0xU4k&>0hyO{b8%jF7n$EZ2PY0=Eldp+J<=+7wT-D zI<7Rm`6h%+4<*TvArTZHM&Z z?SDa8VfvG=Sh@a$c_&Dppx1h{7crl{m?u1Jr)DdEYBOFPhLeBt&G`Mx$y@FH>Ma`# z!fW@RX=QPt5BvTx@lyv%RiUGkN9_M?zt#{x`Cy^{NExZyY5tn=>h!0s z4#V+hKHgmL{+Z#!>#Y6E_deeF{&)W@E^PN;E`PtgB(@>_YJXBaMt?mH^cuH*^?9Jb z-bvGAX|4YHyCd>#{B|s}<7eiiY9#@kX}t+&3hLHMv0;j6>2!;4Eib^qF} zFz1}L|7-8{8@B!P`L{2Ni|Mhko_}LJMainJy%Qh5!?ONc;%A=pSLh!ZZ9J{_irm+e zeEcbr*Ibarg}Bve-}GbtA<4&1k$m_J;d4^IsQ$3SNqhbM`1MW;?|93k*(ZMbZSdRs zZ(I2k^Pflk+q_;u$tt$@29LDP3EKDX^qFfMTk{J2&0JOXrKmY6sw~vM`mj2Rnjh|i#zrFvCY9IZIdGh}+s-HxwYwyHY zd;HUuIP)&_ZCm z{b!8Nhd)L5K<|<|wed4Q%SX?oKg&<+$xr=V>~O}n?|f?N=8bUX#w*R<`+wT-+xPD} zKK7_i(yU;&+>hw3>w(_`zBYf;Xut6JN`@`Q2(>l-i z%44?ryYYo7Oa1=uXRWnmSpMnvJodKJj-lT6|NU89?DwbB7k>ZYv@fX|bNr+GZOhA* zw=GDYh5pe~(ceiP*HMsf`-xoQ-U$*1Q@{05qrdScajVmD;GL*H%-(meZ|t`8X<_}j zd!K#e0iE+d?JO>K-eFYjmZBSvVs+Nu$zu@Rtj}EIIzRFX{rk#DMSt>Qbj<#^p*)~d zTl3hD@;rdnM~~#Ee(V&}I?s5@W48L!`qU23eDm^GZ+LYi{4K0@#?yE9!Efh(2D7+$ zaJK&cR7?GeVimeIc%*UdtM>e}#@W9W`uCTWb^2F6f9ZIPOaJLS@z-G`KQFr|`q2J} zV|!Odi?}P-ojmVKYtjATh%XNhePD+_hYzf=NAsK=c7fl%|4rtX-}G>K0(!*et8p2% zx;8#Of9ZVE*Ef8H{sXc9KH;L?JF&Tr#q`kesmGrp_4tif`@s*gJs2{M=EG-L@MfxlBA1o_sxBvgG&%b%u zMbSsnpY`uz_+Q`t*Lv*pyT82BK4Iock6!rpXMP92UH_BlWZQpR#|L}FW(^N$`+p}H zY_G(}=s#3u)atK^XLEE1C6gGb<}s8r{y`VV;pQy{b9#%-qo|sXVwfm z3~m4YWv4yqHE#O%WpOb*me>DZpf6DQB)hZrP99gzKd7JA0rM{OUr<)qc;tb7Wwf(> zQ*Wq!P|tBR{GjQm`S^vs!;c?K>%5Z+^J6PgSBK%C@7_J{bvIue4!wQbQ=feI2fWtV z?_U~OT$o?Ji{5`pbzq+9_);}yyVd&^rg>~<^jGLVQq~%;E3Gj+Yluz$zr6o(!k6}8 zygZ-erS`w*4;w8x+W7N@=Z8%`c+!IxJk`1X(!tu-Xo4-Z^91QjQKE%zYTNHGoYuiZ zo!W)|qh&^&{(KHJ{Ts>$_Nd)2$aX|NNME4&oz!8cNWP)wSL{XfhnIf)c;ly6%nX12 zi~HruKb#4V?SG;gg&tr_`eS=BWZsIBHAa7~e;^N@#O|H6W-P7fPoB;TU3gs@YTPTP z-xpgQe2VzHA^I3H9(>q}-T1<>Z<+Mn=Z`ry^dGv}JNEv^lknR2go9aJ?DN;!@%+!% z?e*ExxKq?V#2Taj>f_^9Pv6Q@=pQ{5{guC|*O4+W)4egCpE&5X-fYHI#1E2>9<{de zR`M3nAC6q-#^=6rp8vmYs|_-rhWJ?<-&&fV#9B`NTn=_~8S|1MPKPSEqh>;W3+d(wD{6A1>egL)Tt& zNAExL#@S24Q+NFD`5qU3#Y`9KQyblg*Uhiczfsm!^moG8Yzdl9F}uhIHIF#x6TkJ* zGoCNaC(rzv51-*;`r!)~&3nr-2Y$X4zH!;7irfDTGoGD)XiINmcY#IP%<`Gx*@nOUp9zK_tq>E0--iaJ-WtH+PtmEMj&9Unf>yE41rOLa|c7t^YF zi|7x#w!XgY-zuiPDVyY&?kP=Re8w+?e&?&txoyiov6Qeg=v@V^X%mt^saEnniqWjgY)6xxXrFR z9LeJ1H`47ts#NF%I*n_;Vs)Mmbb~*gA3TNr?XtF3f4z@QMW?*1>WoJm^sbC9#`d4b zbeo^?=={9XI5ve|Q!1BjWwHPct3WFWpqv9D38W?iN5%|@$K-^em6gV+gkgColm3A&!Zh5XdZFUyOMRx{>MeL z^S;><+w#?>Kl3ZUryiW3KkWRQ2lqbir2WFq=Up|@eyy|pcRT;UxQbcLLjUOIHQ#ij zZ=G+U|6oZ~r+*nM7eAy(J?O-5dJT`a%}>4}{%+`<@G&p5@h8}gFHHTy{BPd*xgEob zZ+rg2bAH&l|NQ%OoK}L97JmOvuj5K~XHB2A*FPjW6#5U9)fN4naB?3f=4H=gpTxc9 z5eHL0eSzeG_-pHQ9O7wyH$UTqE06o@m6y(*8m|7x>BszK&LBK?{MpXp!t47#fB&mB zhsyzL^-s?~B+noHGe6L~l64{b9~a&B`>`}1K2P4i8SVFX^MC)$3nzZd-oNbkcY|46 z?EFtd|Nk*pKj_3x*Ixacx4|}4{g`*L{V&LhrA~kLJLK2B|B}}{t7PlwZgt7?X8zW~ z^DUm#c_;a`&lCCNf$9&_FTduv)tB2Ptm%Jx_o4Up!(-Q94Jbdj(YEC`VB7J22})L> zoAReN|3Ae%KV^3?Z!Mn9GB-$%2!qp570>ivHSv{?oYq z=cbR2ngVe%{bYd_>Q^_S(;TOE2SntwEl3wDY& zFYO23iTcAAhA%&G?q+@Ai}Uw-_O@I0fzLkwwqiS-^NW+>4`9AP9^*<@bxj;Su-U%S z`4##{MJoEMPR$i%4C?CiCl1oD*~&*h_(8UdG>#3QA$%^2%cA;wSG?{I$KH37?_K$W z>%F%AQS+&@^Phu}F0=RF9rg2_Y#@SkYg{Y3?Xvm`=IaJfGf) zO&m=9)`#jw9%%8_cs}#%IPfm6pK-!Q8}9w+0~?$jHhtj(OZ8vZgvZ{0a~a>}w`{!r z?O_|&NjzP{Lms-BpY0`?U!i}afGsH;#`Pk2sk6v8|uJV$fo-{5+cW`PlsQ z?KBQf&>yz_*@^po?7oXb^R@^7->+A0(GS;c*!RApI$mqxDBeQ129Gq)3EKaEscp|E zeJgLFe_mEp^w+%R4rO#9I_1$_(W_QB##JN_WFDaTsdG9WI6;3{<%LuB{?I$W7^W?^ z&sI-;_w!!)Z2NEMIEwi%;vW#&bwcDzR-qexB#3SnPk5PMp?|C7s_5@zg!F!%#+Qr- zALz9{wej=%u{iy~tM!(j+I4wd{Py|lvOjFJ-%BSt`0evop2dajHNRB<|GlMtMX?sL z{p4} zt`5U?hkf|?$9oPBJDk4Zx>wz9_w~2!zb}i6>9Mx{|Cdw0qFB{6+h;TV-dN|w{+s>^ z{o{x>p7MJ|&LfdeJ=-ti5eKPHZStwNIQ{ixaiQMw-M_~l_Bv+HaqIo}8DZ|Nhdg)2 z@3&wa+x|y&9FV?p^(%^1Y=4eNg7nMhUu<4aW-D)@e}7q7tAG0cBlOcQt2*P`|8L$u z-f#N$s9(|i3&f9|qV?;>^G?(sHW+^4#Oq%BaoF$;8%+83F;BtMJV5zJ5}Xyk|2p0D z>618X?|39=e2aYj)r^6%()gY7fWu;=3;c#jUh~57`E_;XZ!J9E;_&xKp0BeP(I2*X z;`DExvivi_-#4_=Z=YF{dD!t!JBtgiPd`cj|3dq;*MHKFR24d^pZ87gB!VAa=2vY0 zgJniVe|YqJH1c`hFmz>g=lIC-Ty%GiFX>m2Jka`U{qs7nyp|7d7eC{K51u=DV9zh# z9QxL}=f1O7?OcELei_&Jl`sFDe*R({Mak~0(|#)7KT_Y48u}^pA1Xby`u}hJ{~-Oj z@%?$VE12Y4yw0yYVfD-M$*!I;EkAK!l^_8s7I*r%< z|DcnaG5Qaeo{Iji^cL6ur`vxX-FE)oJF(#dQ-9hYd0_hfWd1rmC+Po=|Nc??Y6j88 z=FRg;-?_w#?SDb}sG>i4G1}R`*mlv7dhZlR1e@~((GRpfl-KM_sTBIYw6@5<_;FX~)0?RamZ{;v05>eF!) z^G_82{`KOm1Evr0hUVphtm(_{|L-J%KgB}-(K5sM$yek$I@2{Tl8eRm7cQFiK5Kc2 zKOL7mu(EwK`JnZo`S`nGTIXGCxAt|>jW5i+ z{ngcHOfLR^`OPPO`I*nZ03W}<;5r}>K8_m=lRl)Y;`va2W&hKV44Y3bcAtO~&EH;lzQvO|?#ldaG4HcBeuQeRF22Auk?4j^n-rb zh5nJz^moDwU$Z4P`RViMHJ><0eI1eqrg;^=j>CACzleVL!dkPJ`oUS3d?n0$Y2b~Y zJM=s7aQzk6$AZ+Q^Ujhb?F-%DuV6z63jL$P75$yWscZJAohRtTrcd&)L3lv?-H?1k z9j{_1ebn)WsjuB~+)3~Ka#-mPtA1(sbGL`b?*H7D`SAM8e--aPb^^Ac>PNj)jd?xE zWewHiLUh#nLjO2oo&M?nDWtDM?_}h3zNw!+!Mc35GvegKXUIHU7ME^*_`+5v-t*gE z{%x7C&5rYaUc3QQXXiirBVFfu?^$~OAH5YNtFun~M7PwR&acqFuVks!e=`|_{@H)o zcHqXxEidtVwXL6a78lEJiA~)6J?eJ}PS78EZ~p3Am-ta0c3gSc`1{9(@Ye4Cz25&k z8DFY8+q>DCSNZ%^GrvOr{<5}K{~gNcn9s2;nwbYx2lN7or#AZX{8=156Fzt|-FEE_U&&U4*Y-b+EG}%PmH)riaU`p=ws~n^75(Wq!9xFmlBJ?Q zd3`d5>C_OL{=8GWUy$Qy@)lfrMI)#e#eQ=gUZJ| z)9u>)jGN>WuS4sf`ea<|L;ZC;_`}*yef)@%f3-?j_q@-o_nno8sJH9iMzgq7&c8L( z4{AT+?X2mS?`^S7Kdr+q^dBm#>-6V52K^f!Jc`6Y?@HD&`>k&I4aqltS7ZN+{_vHz zzqa)o9_a}~+wA(YJzJgM-_4WpIPL^(KDqXzC|04H&MUPmzrS{pK|Zg$LjU11Qqf;| z`5le*(JHfx{B{I;y%7h!lXz;|_VXV7x8|1#Oo%_hwuOT@9)CY zdFOrjpvS%zR(f^UPvqb2_fF=;=P&dFw~vSa)%))em+H{~yWX;z90Xn5$n=tU|Zs^?=<(AC24RCTu4Rh5n=Eqgwst^t64BgFi)d1idR+ z$L#-IG;M!XIloI^?eTfu)!5|M#cTXw!AF1k(C>G+BmDG%n@@Sy(I+qu&OclK3uIig z;nQ&?T3s6-`5^tar3T*KNoz+(t^Qg^zU_aM$HvpV=8wnoTb&a>`L;a_EIi-p=$m?Q zg8nf5%=KCuPunW2Ip^+^|N7fC=*O-U&??a<{KN9#*^QqhI*d>}}z*?LQeu{(hl2{z?2*d&%Xbnt|0}M zr*@%#UPdbVyVBcP_w%`toANk7khoT)Lw2e@4u&g*zX_dv!Q$%XWqs3KUhX;_1~_H zj(H!=qiOrST0NcQF%Eeke$e#Se$7w+%ICoe`a{0vLkE8Somtr7^-Dd!_KWtrM*IEI zNER3S{M%C7jYqLMYwzSS^K0hgZQdt{Gv7l0p)#|gzs|4egvaz~h)umu;+B{AZCvFg z4>Z1(@S9)r8OLyf{;>Nce_!F2vz`olzA(4{`Pnn!v+v)Bv$)jiuj5KqbEepjyl$^PHPu;p>w3qP3li@zJDb)NB< z7q((IzOcfN4tA$}{3l_hb9dZj_en><$NBK|^_|Dd|Iclket8xb`Vy7*{>_%uk#Db~hWOEeIOttcM<0shfy}G6PWgy4uGz#H2V0SG!trlhahv5%4dK+E z&wJq2NuB!-9GJzWGQY|42jn)M@=JDSZE+_)bTfZ0e(wb7r_jGqdTRCm|J(mh{nv-D zod3upH-v-VasHQ2eD5>fm9sCdGngmH_Gvgd{{G^Vw%=(<9&7J-B#6y*3l;s@K61$~ z^pB5f_1{?Xc)lvzhZ`Tayu@#P==kJ;Hoqipb;{>K=79}%@%PSb{&3?fkKU?xoxM+6 z^_rpU;kW(IXcm`l*S|?tb!~iGC=dO#7H${%$4H(2jKFa{{3)7_gHnMF*5wngL+^yo z>alCEjt75O;gf&e>E4?T4a?v8{TWvgKG5`TB;!%%#P7ig`a}58iQBC5_;3ghJka~A)#k&;`5!)q zfz}`6DM~lhwehKLmF=k^HSiYt_m`fE{yGl#^&#K8GP|?g!%trGq4DwBI_-n+mB~vH ze(%QfPShV-C!es#?Oz@WtwVmZ!CAli5j^()3vC@oF@G)|e@?acr_Y#6GNYou)^Q&jbg@3#WmQq9e8fTT%4lc5L>@>UXs;`spLb<-hu^!>sy^@D zmEJ}7hh1ksyu??RdnW9<_xIoLR$r2Sxc<#_=u_T}^!Q)rNxnp@YwyH|Zse!)W8Q`S zgJp${>q@IL@5=Y@Ngv*o*&UtX^{(`G{N9z;7@gYDtiC^}{wa@llHV2SFU?nfIBV}s zXYBZv`Qe?p^Jm2C@;#QAchiPBrsXuJ;wTq8!{^XUg>B@J` zz2c*t{m%h9PJe+F+eA5X&H$JcS_(%EZ*Z3AlJ$8!ZdmYCRzQy$SPCDrCFFpLbYkQYH zVZlG%aZKm_^W6V!Ad3s1kNkAK|6xo0ppGxz&YFI?4-B?#w_3;ZS?E7nR#)_Q!pZk& z=t7K-eFYjmZBSvVpZ4lU-|!Wc<4Kq+TKa>L&;kGXGk9NsciqsM;!Dnsbk!_{3K32 z^D$d_C+hE=^zO@#|K73#y~|#C^zc<*t6u-y&f;R*SIzw=C9BwOojgkWVZUwubbe|V z`o~8V{mJhae$%y~DW-K^^UaS9(l<7!{&4+;%l=}^9j1mG zZ~4;)_xnb}YuxVtHkif5?tituzW?UxR}`zGe`;60f7CkbTkS%BSJu|)&;OX9i|xm> z-n*i>Scro*4tgX=9%#IpXMXBj9Hw@A^TfO4hjsgx2#s%^5N3Vp=9hoG+k@2E_wNHb z4%qGgchd3HMmN(TS6=cPVi)?yM-}~@#Q7bE>7jMDAJRPHpilhPhmKDkX!^HgTi%8&Gk+V$$vF5!^VOSnn}7Xq=zZ>j*M^RrkKfKe=bEpW|0JG&7_!%=`X#Hg zh6m(-1HF?RtFP+cE;B0nYaaVC()vqX4p@YsRwJZCd zhSqET82$UpNJW2FdOM44@9=<5a!kk8c-#DptBAiFGCxBduVN>C)bWPix9s_;(dkcy zoo2o6f|qVRjOX39NlFx4QGs zGxhgJ315~hK8aWEKbA{@^{sst`VW?oivF(jF690Xd368#_y6#=mES4W=xOsH5A2d3 z_W9Ochdr~$m0`anulvCvzkS7P9p67p#!NE#%>(ugdi&PBJ*IWqT;} zA1Wgi{dFGA?WB%;UN`V~H$HB8iJv_BV?LJO5FYZ(-$hUI4ZGxrRi?hL_qy|L2-A)| zcG_m|e1?AP_jh>~7y9!nFXr`EiEh?c&HM`ehsyzL^;aQG4|qUVRxjl9uZwQ`T+BQ{ z_&|GJTQzmc=fNJyGkX#J;fS5Ddg7WL=7%HRxWeg=Uf)MQcKid6qW%9=;?HUpH8-A75a~q6&3w; zeC{Jee#!$sanQSxwez|l4*iPJKU$7dtG`}HXF56UD?#F*>BIQst4$tg+gC&Q&7amMJRTeD zk{>?x>XEyB^t_A1=~oQjcJc>1`yaml9nIoWIsSxKQL?IQ=H2A_Lnk?P^Xr`?UzAn! z*SgC6HyX+ZYCq@&daXBG^XLyhXs@ffI`zW~kJ-c-2OCs>@2XF1GV=q6oYuSgJx5>n z;~7seuATqv%i>~sEaLAk(tgm*{0-&RewcTmf9%Z0(>QffHRg2>4;ZV*>}S*uKL{U) zzZ;TonDpnd6}$0;{STWp`rz?rhkbu~$2RX=>PdJ_|IsWimF=JTf<2O#u8nVX<7NOu)W0V1eC1K8XoV&*K7%zzoB+6$ovZZW29F9 z<;Uk6AM;m)5A^Pz*O3R>^V+JZQ$7zeJ~r6JA8!2k*|$Hv;TGWsXC8gUwNn=`AG`j! zoyEmI->jh5Up3UPC{||;k9Xp0ZYhXuar(<8Ug#em)#+cmJ;DRFg^$nO=nAIe_XWRZ0zWjvU%Rc~ak!Q%f6{V}re!7|N ze{#u4^(^#nm62NgozyWen|Ct4*SzLRxu@$@Vg~mzWIAW=T=7tTv zxz4`NZQI%Z)cyWth4SyG&fU*+Z^-;?{5t(t(LDOg)z*6YEc9=e5z`-CUFj|SLxSYn zej*?5CtinXzvQLc1AZ{A^G?RC^HW!cVfwf3xb57{4i}a&+bj3qXe&R+|G&4?uP9ch z-uiJ8sr>&t{&z{7{tEs3N{{KUdBzu`_3xvW*I%*}=j+-L?7Rcxcqje}_K2Un6fIx# z%yv?j+G%}ihfSCM!ciBU(+Zo_ml?+IN~`m{R?a`W7`D&ZUTyfm)Q`>hTwiAS=$mJ8F~1Yrr%3)H`r!*ZT)n{y z*KK)a*uVY4+aLSL0K9hoKjF)7iuZpQvA%WOELkkB_2}l+ws@Y!g?km&$_q!79Y^P>xU;O@p`GYo}mQ)n_50;NA`ZGVxv-*bA+53@GTl3QM z)m8_eBK~e@{Zh|(sjdEQeBrUb9kI^`<`4Gv%s64%-wxQH8zL9Umuo{O@h51AT#Myv`=yknxxYwqhL*{xEsx zw=R9v1EXQNSFhjn%va{XYoC8xSzMTB{;1ynAy+@>#7@`7x3)yE?e#<7I!>YgaG6=r zpFCGa$GrdM(d_JhoZ5qB|*cf+*KGhWgk{%(9>*{v_%>l0h-6(;X{`{XNM zuHOHDG>ePvhgxc(^UZ7+Zx;Y*R(igfBxBa!w54+HRq^zvy?@DiRJel7AQ0GCtcV$*bXVcR~w;hMl z7f2pxd~K=oPLY1G!3p|%m%IPk#~%I2cYCKSHSd8HPru14ANzm)cLcQl7*{c?S-i&4 zjrpeYgSXIsw4|!&uk+(~1myqQw*S6z)Xwu}edf_!ecqh-Jvc%ALw?MEpEm25T|)EN z#~xnw=K1Ji`~S9%1FjO!KkVV1`W3~huD!;)p8wJQdne6`Bz5}pdNci%pK(&%_}?q? zLCe!|@pr?tt}<@YPdC1>{ooJRo_*o?td?EkarSRGQXEcjTW&ht$KdAE%Z)feD zJZ_Swwr#gshh69&J$3pww--L<<>5<^I7nVMtj^lzrEzrgPIB0e)A<$p$BbJ2 zmCy7@`hdU5xOd2S}v@}{A%*ZnIl`O=&_ zyymh0Hy!%2xcHv<1^kt%U!ui+Tjyc6j$_CFP9o{^LtUYNWH5g6LG1%w?0L+UH_2G1gVu-YX8!(#=UZI! zt=>s~#a={z?@D+6e&vVfuhqNq1vegj#+)tSslESCZ`}X%rQ4l!QRt@QnyvlX@hAPa zB!ltTzZLq&NUi>Dsq3nT<~8>mpI=vpU$Q&TFMiPaWgbo{%#W=|T^)uk-#z!#qpp7{ zZ2hxMU%lkA>)^5d|EThVyXf~Xx%xr+QIu%s`RMxpk4`EI{oCcEivG&Wc~s`Z^Ooil z2fZscBPm%E#)vtNs ztRKJSv=i5Chk<#YpEP<~%d4O7jnEI=K3@Lt;xOBbA>(QvnJ(0!nbv_k;GM@1)ra#R1^W9tQu{n$2ss~EIc z(tP6P*FL-}vx`1mxh`-q{qP0%zEf{r>8?k@8h1Us-n_#v=6%P`KlCX-I7N>?+v-;o ztF!h_9)ouLgTJM5`c}Kpf2fSq>c5H9(Z9_b9!27ycS)Vv_!-agYxpdmdh)Qrnm*tM z!zpJTI(o(hH-yjZ{m|TBd~wwD-!F>``vuVce>@)_7pt=!q&D~A@lGOjx)u5lml^2c zU1@cAn=P^FpMDM9__*aI{&aluK(G1mn!k(QR!4nJK73)F>(_bhxSKh1Gap2Y?I{c`&G*QsAotj^l2pLsTSmKZkk0_msFe?gg9(O>y0*B>&!1mOd{)|=fe zKhNS~`!}t({;@&$u)!|=aLe2yZ@TB|CBtpA|L1=mKdc}A+RtCDEH2iU+T!hOr|@_u z5zZT%A0GNsyU>56tf=Tu-jMK_j=9*jeK@r>uX&c(( zecAi3j&Ji@R{wvN@f4-Y>YD!04gR|M^-h`{Now`i{a8hhq#xoSeVE-XpSX+dbdHM> zKfE?C%{PCIO&?*~qd!~x|Jr9l7~J=W!2{n$o$Y_xSzPS+ZpHHQ&)uJ~=c6IG5v`@_=4tghXvo()C@q;$MgpYjq4B^vp7S$i#waX6% zdQN>Xy#J>4_dWIt55ix2{+ajRb`}?VUKqb2e%8jfmgXmU#J!Vvp?`c-(O=`-N6~c3 zrOsYY%1<2hT5mRe)a9qVTA%g{??nCKs>gbcY#q`UuDRldJMKEDbN-+IKWb-jF`bu< z_g^~K=9^2LHQSBZP9C9~^_hzw9&{=6Z5cjD`I{KNbT{qwS- zR)3u*`L_RQmsLfbE7!$&I?Zd|E;jj~^_TMXFFe0e=Oi!X@!kAMJgocn^VYcIl*ht) z%iX`p8=u;ldVBvJ&f-$J{;f~_Z9&#NPu__KA(NPbz$Rs*VuIXk39#Eeg8h9bqQCN`*ArH@|3oj-Q|nXP`e_$~7E2}WqPyc)ey@J3 zUtE7fc*FYJ9d+yr&#xI8Z+`EbWzIemUemu%@=T9u+r}@zY=5gflGRzKecArsDWBG% zAM-Bsk0aW6b#SDn=dv78kCM_iO6;hnD&k#VWR2$0I@dGW~U2ws+##h5r3z zWv%{N&wX&IGt@YGrROW~+qg+S@j6WFyeqry?DxDYt&R?LJov-ZC;zh5SMT~rnEKLh z9{%GoKlHAwF5bWR{Yyh^w)cED{ryp&`a$xX_*r|koBaN$Mf|}QCj?b^F!`}_P6FwUky9T@Qh0UMe{|!BR-w-zY$e(6>YlTs-Je+^vxQpc!O~&;uCzKl>{rM)9nc*dlDg(+1jz@}I`g*{ zo^NsZK_~gfhYhBAiQoQzEnj7cTle|(XF{#UXJofpb`!ndy<2*+qUH;oo=s!~Aqy8&7TTbUbh|{qTjg*V%oaJDz(X z%r@B^-|8uSHQB>YS|KZY8(chKc!apEL|2*G@ z@M&K2`0@ExN1mej8w=03cv9z`B%3E#@`)@gYualR!7@j7de{}5o&L?1@R84WhN=&7(7U8g^O!Gr zAbr)=DIalo%+@%3br>3d`RhBEKlzTZ(bv}h?im+7MZLZM=2={rU;gsKe}64mowaR` zPJHNQeX5RLc4Cm9dn%C5ZlI6T{%AbzkmP#Pk%bU`o?X0C-1!d4V%s1 z7=7&ce?;d8ZWPb|45ZsXb)ZxgI-qSSc{@u>MC4BJu#BcqnZsdWy-a+#(rp`8wj!z%>!-xJj?c~4y zcFl0iukL==E;n}WzsLSRsmr&}-(TceT<8xz@pjhU$z$$YPM+1ZCC>Z`{o@QP`ZMn& z-*|H2@lLTV_<#QTPdELyo89%oyfFQTD_yq1Ko75HJO9SK74u1Y{HgP^*M*b$QZH7q zA5nkh|G(5v{uupZq)z|z|E%luC$IVQEG|B>j(Oim=EeA3um7h``QWwfYBBv`)^X2$ z>)HFh6K0+D>=OsCeFr>z{__1RRGw{>Ap+C_T0M`|LwHca}k*HvFuE|!i%+TDpLR<*zJPUG5FdkeMk!=rKL8};uNnT`6(8##5B6`r143wN*Fx)A=#)sQ*AwQPW??X?KK=eAA~Te(xs6Eids~AIeJ} zX#FK|t5ZG?YM$9W{EQR6eA)vazU3$9gu6a>`^cV$FAu-{|Hx=oE_VK{qqdufVpVJJ zNZ2a8Bee^+>;f3O7GzD=iG{N5=J3bv0Ggde0Y(EM)le2ddRsQK`C zY%tAB{9(qA_qyz++gA#+-m&lQKYs2H)Z6vvovd8gKlnBE|7YP*l&q+aj%T*Uxo*d+ zZQHH#Gry?+P*K~Yf6wh-$A=GOT(dQge({6&d({z7^J{*_b6L4CPPqG<4;=gccT5Y9 z{(9CczINR8@YwmcuH@Ts{(tKJlMVeu{fCPY8};8=`l0`(?VmX4T~f#RisXUD+f=81 z55k8H_V5Syw&7^JBhI|B<4yroZNKAG4ZH14UJ=(>&s! zcSUsE=U-QvmS6i`p))aVJj(B>2OIp9z>b)3$hzChxsjUOa# z=#p`<6{%~&;Evw#=__BjG0c2snMY^7X9IZc{By#W%jG8nY_Fhn5p~miv-{)`zuP5_ z-i*iVF6uv8R5a<&|L8FvJ&HQTDKXfFQ=MUbf|5y=e)PJ_rk>hsjnlLTIEdF{x!)LWg7)5{NE6NV?RJ1l+xDMl;;eO@Gyi&mrU+PhM2TI#;YC zc1y_q?|=XD_uCeJ^gYLiSwFh#vor7exp&(48w5><0kv0oW%&-w{0z~jC3&p9lSlUX zH_4ks|B}JR)p~e9(;vPRwT}0F@rZ-%zzC(n;}>Q7PgYVjWaut{t5(2<+B37f8b*osdtbwBe!fBFJ#J}vD- zQLJhW4~TBoUoN@C!KiKeQ2lNfn7l^+Xl5gmAe8$CA?8O((+GgbXjSiU>KK9UQ%boin z`yRtSe|42#G5@;W|7t+}pi^7>5G(4a{Mf0!QxR(f6e2%hMFGmBzf(Jgx5R4raJR4n3#tjWIpg2 zGQN}g#r22nx(}cBo$GfB+wFhpRmXj$y8qalm1kfg=4a{mUy0vd|GDI|reE%Z>YYUJ zrx^7gDl(e%XB^W5z7$&pOWgl&BuGA(j*Gt+rgh#4zxlBhd+~+Vsc$*=6My?yXq`QG z>c?Jnhu1p${w3kd-=q5URk)p+D^T+#Y)!_So#O_bc9d>d3eLi5~XYMO#J!^Fk@)GKy#M&|KmCG?pV~>E^qXRs z`=!r4I)CZ=!{&!ie_-!d_R+sx|2&qJi=BUHxc{Wr@g%!i<3|VhYsXC;sfqfJ6pw29 zGru9pV;=TA=HmBGQS;iTiA_FeeJHQ_Ta)KoT=T7-@$h4VJ^W$%7VrAjMh~wQ*8S&s z?>%XsKfr7E-|J@O!t2xCe>>_2lW`;(byR=P-@mk^BI>`ec+{kSTi?e`kB-!(`NTo* zB+qQ^i~jI~wqNZw)G40_;ll=d_`_xI_}fx@Eak!#4-PH+>{sj8KXkKlLI1qz{(Cx( zWLLU-C!Th{AoDfUIP;GBj~1D!o|doU)Xpa{Peu5+?>#n{^5O4=X`OdPx3OPWtn1i| zFKo2%b59-e(}iK<1M(FQ-}*uC3YK`>&@G|*T~#i2zM$Oy58M8uqj^p+U3>L6-hV!q z3i^!tj}@Mp{!a4P-;i(HX-nft9`8HkgQ<@AdtqAV8Bd>gv(?{=ufOk2SHJ${Up~8k zR&Q6ZU4NE$Tyx`MueX@nEL4xBoDOLRabb; zzl1t7zM=Aj9j@5?OGmtFB<#8OF?X#0hP4^jj{gT_eCqPM;@y{?{D8xzf61z^iPx?_ zbdt;S%KW1KWu&IR_RD!}@@x8Q9&ym?cxG!J{ox1EA>BU6htCi`9cOXplmva`8J9oB#CN$)kUX zR_iow|3B^|$Mo-t&Ag-jdEu$)kG>;Cblm4ybOK!&&bVHn{Ry$*2k9F$e|PeHi_-__ zBp)7+4W@aCKm6kAJvTmepHBGM-|rmS6n7+bIwI zbSAf>{$-@5zmxp7@|Yg#_X!dQiKjOH+I~jo#s00aE*aPOn*7XPAa)<-J+sN|JwD>%qPXcf+ZeDzeSL~ zLHY*G?e!{f0*^07hnhxI>r^AKl2311C1{ouTRHQK6v3Vn|MP$T}`bTdD^9Vw)rxcv_no|^v2IQ&l0yCOU0 zyJB4-eChpC%&&a(gTEJ&Z|KTl?YP@xEB4|Gt3I(;=PL&<3afRdj(mRUpHpwgpWUon zI1ccumgk>Ow*5!0apvR1X6@Cs`!A`@_LY9G8TB73YMb=ub0G6-?4x9f`=65vw(9-b z>gZR|{H{#5*B5@!bf;eP&5sSHd5J&F{@!y}uld`bhdFOJciMu5|Lrwy=bt-Sxv-zM z|DVd$4>FFTM5B&sx7X2p@>*(l1({#ef4J~8>ED)*$EJ_<)h(*3@!$i!E27o&Kwltv zpz(GZ>YVsJ$h@$@9{#Y+j?*vx%g~Kso8|w$-O1fc=*RZ|V_CV_`Dg8iJW#UgYvSmJ z-88?b|AKPm#R#6WWIpY^pQ=*u*^ippBz^9%LTa zU=M#d_`F5s_0I?Y^Ti*2dr=>J_W4))vHj03`uVq$l?&USSoJmiq6apwALa$d?SHgj zsp;boNIbQzpRVxK;(3GLiQj`gbhbRrXPj`rTc$j+?ffIdQRklh z#-k5Emb}K}pC0Ewvt%(o)eYM)&&q{i$iCfZT|GNVy{tusB6K73hSKYmI5=nBFIqC*Yo=>HT_-Tt^74TTp70GU+TGzn0%1& zz|@bOBKd}l+n{q=pW5LyQ;z!bD(lY<2Yu=XLmS@If!FT;ljy(1=O5BOTdG%AkiMe+ z<%sC#oy0+YM?k(k-|%TZagchm`;=D^Kgjlm9_FV`$AOnTvx)b}?_c|Go4;b|CkFc0 z{^i<#{l;gf!_#yB55HoTEMEP_x2q&z*WQ0S5|8?Ki${(6Zzy@DM@wvb9(2CMLGO|} zbZN@ZCEuP0t+)QMLHMx29{%vIM=m}6&<$@1@89y-kKc3P2>f>ZnP=rv)2pR^MX@G% z|8)|v-(S#AE`H{RPEr4XB4YZJ?=)Y>Bj5VUC62!rGEcBqK5_C5=_~D1{nRyKm@{?b z1s5%SopfL6XP>`vi7?OnbH(3JJ$Vkg8LBSufZoZYwj#D|uZf;f|G}cd^mm0duCJM5 zGavdgbQ9y2m-xNfrekMfoy{xt_eq}Br##qTkNohO-4>npzVnX^2W>s?n1i1fqaQo} zJerk@oqyAQyi>oTSk)RHuy+4>cofko>OWMlH0rPS(IP*69=$87|AWV$t%-ch$2;*G z_9Zs)9{&F6&&+?vqVJv6KjSBF-v92tKf`aIzq-l`{;%Kvr!`!R*rY$lTc*D&vSYq0 z*468Tymm+Y+Jmh zNLDW13FeoUzyDp{UZ3ihXf=PcHO}X+v=6pl{BiqVP}J7+hiCO7TIu6mVb%4A5A?3c z8dt}5sYo7Z`!jw1yeq1!`FU4Z^>Mu`yo>J-cmHmsi+^?LqHy=J``)m^O~;}K$IYg* z_L2Xvy#G=+^;0LKS8MO&G1np6c9M%99_AhOA1NX={n2Z*h&Jo!T@j7-V}k8l1mOp* z59Kxg@Z|XxcM`YyCVMgcVgH*x^1`c^J3Soq>qB0(^WKkot+V5wZdNY5o}0e^m8|M{ zH(Tdd`~7t;8RV2}K; z-Zg)|bB!mD2^;ulPhMvKHR0j9AG^+AI4c*w-3H|^0NCC^iALRYJhPoVs$Kut5$d9!Njw^J;a<=RxH$dolg}Gxq+<`{(R; zV*gCPUgx&f4e;CX&sbJ2yq>Ku_!T9qzBaz}|BpJ7%lw#k)W00DQGcx?pXc9D`ymec z#7|!!d0;wE^Y>}r$_FnzW)pA7hcDdUxBKYtmf17>=BD4DanjeT`(GWMl?&HVg4Ee~ z$}ic~+Tu=pZPm^E*cykoe~SE3Lrs4jr?&lTy$3Z9K9G27;|DFCXXV24V1DH%-*9pL zNnW^Q@4LQp(EL5amCMii_-F1~2%l~L17#Z-etZ9IsbBfTs;|)--QY=&FPLA{zhrFG zzawLi4}XfpLB=&(`=$;*h@aP$`JIdppCNo{yulA&*!J{Q4?Xm!H-sIon)Z$>-r8Y2 zJO0VDaxuNuT)h9kw*Gbe+V)RhE#ar1sJ|;brayV5H}3yCMReTpkJi&SJSlotRE?{H zSFx6#m+9I6bo)HMPprJ|<4)?1z0%)=9JKwfv`gT~jOKY1zIc**l;e)?EU-mt+k4_&a%c?X3JR{p}V z_dk0IeKnr{AJgC8q5sBZ?!f)jeD?Exq80giTdYRTx$Aj9rrOq7q9*liG##b z8-M!xp>N`ibuKFx@=~O(3Bzl@bH#4|I&vr+=t^TF@ zwLY}3TJd$?ee9OdnRm*er)~aA_-y|(n3W6H-TRHppWki8_5w;)rN7xu9<^r(@_ex0 zVY_F3QU7j{i5}h+RuYA}krgb*%OVwZV!;W)r z|H=zrZ-w@2kAMA2D-OYD+rRdMzt{gC(O;$ub?ufou+fp{fq6##2a3#^{@QPQE8!uZ z=igBCh=Yu4w&u|bp z{r`L7XC9JWt?~0di0z%^q}OLg{RfLkqyG9i&h&6a_PE!FD^0sTI?YRMulY5d@prRw zX{;k}N%g;G+ebeA>bJUZ?QtjlVe3am(Z}9@hh%))UUx0OfBB$iTt&&MuZ@q-zeyd( zRm?ByUyj(Q|GqK?`RHb-{KP@}@ag#Irt`FX@-%LK@`-~l(GOpEcE4Y}c+24R{e73M zx=!ECFVbh@{(mdI{+V$Vvt;q=H$K(P{7IbsP}F}oi%X;a?IPNI9M5>n5A?368dv8^ z)6Q4%{$}~;nd(nnisYyM9(LI7vS;S(z0)ya`+xrB_}9AQ*^cb<*I-sI^yk;n`ww)~ z59;{h9k)MFKmSARoz|%zy}+pdf+C})zt;7-{vRH61ijXqos5qkWP5;*`kJl0OX}}m z`LOJl2Ry!M|H^MZ`Ol{u`(ws2{l~I$`5&Kuo1;9;yEC~R^&crBP5Mh!jOX%*?Zr^} zQysBE(<{j*-h^qLciIQ@!d7hJ!5{yB`L`z@yXX&LgZJ#P@+tSuV;-h|SNRqH_wRqW z`NCqvCjB{&P5<<3*r2C(1?#x|4|+A_=ViLx57Ileslx`B(BJQVam#TVz4DU&Su3ve zt)0JIUH^QfjyIU#EWQ3eSHGfIQMZOZ+8xa&-cs8MqG#OxM+;A*{`xv1-*ksh`H6$x z6|65o|BmF>=A-=HX&hWafB)(~A39@&8Ta;28~OC9hdy>Py!QRqsEz~fqyPWXQa`BU zi?>?Sf9?J=9j(_o_P0_0u_Dr_|5T|O!%!1e-kplG_L+| z=67%Y*}kt{7|z_`;1|BW*)Y8J{X63;=AY5)|GQbaaQw);#an+JaNVGHl0iN;^X{Lr zMVZj3|3*?zKDrsYiE+zI{5HPoKpx2UV0l_!vwQgAYr_6@FZuQ=|9EDD{yF}hV=lgD z7=F9{c}(V0d;iVVuP9d3-SJ2e+wMQ0^R3NKpWmo|nNibU>)MLUhkjEeul-%=gM2W} zH~-+|`4-plnU~=b`osLOX*lC;o_TWu@UgLKBIh>V? z>9L-ke=b$3wnNE^w(_R7{r=ra#OB)-n|Vh4^TK1}xx%W(;d3DQ#*>$6Ha>a0kC6}3 zCun{rJlH9cZ>ae-dolfCPBGPA z{k8MYx#o8TnP=3$WUT40yzTV=f7NgER~tUiyNTmqTV40x$5S4wcT%^ccKEk z0=+9(m;CtOc<9%abauR9&*Mw<_s{(OtjCUcVuk)$&%I^g3I|lbzd-*n9S7{Foliuu zs{KT2bN#3F2~VH$s2%kmC_FX&o#fSi|JxEj&pZ6+1$wPFn{gHKgKQ7*QD3u_cS-&I zD}Us+^GCM5xPRrZZgI$SZ+|D_u-~-z-Lb4({I>f4I|FJfiWPO!`FN*s#rFPc6K_q7 zGw-PXU=gY5Pu*w{9k(6lrCFU1a%ztXw!bI{KWKd_ulWZj&$l>zfKKv_4;xJL5`S3# z^^4|T@%?+khOKoj%$7gKYuv6s%(HS~d&s|A{s3?Ullm6Z$mpy{Rk;O~Vtj@Fs&%3-Sgk=niZ!i>A$xkcyJ*}-l3>-m2f zJrX|CU-hx~+o|RCpBviiy`|%7KVnsD=E;5oenaM=81)}6s+;t$-T#5{LFGy3&3O3f z3&aoFd=ozM;WN}cvt3p$z4F5~uig6oBieh0YubzeEB{{EXQ56J(2IrS@wRjqAY z?Q8M>e|C!n1xus;>G!`*#`IvKm+7hT)V4kcvvRR`M{MHeSAOp_4lbcT4E|%ecb@*y z(eVC1_dWiDr7QjU{57cKfQ$M4msCX^wXgOD%HtH1`LVsl?SG`msOiuA7D|0h58|Na zwTCC>Tb&cX2U}`)1<6a%JBc%|8aBo8gW6{}?k}JF!X-zz@YcJ2@PTD781)*r?|3M{Yi^FFw=#8%y#mK`v!Zp)BP9ojQTGuDr)*`U3-rrI_~{9>5uW@1HCJ%#?|$S z-Ix!r<{1xlPV2#!=!Y-dzhCRC4?Q#z7M`{3q3f)(C;iyxuMr(bkvi|xuP9cv_Ubo2 zFEMQ1x6Ri1M*T;N%trn7ecbwB-k>Y0E1fORm2SH(!mDkVXXRq~YSW+jo#goxlktgD z*M#9+OW$zqYHO|?-u0Oae>8jcb!?Y*{-K+dOYQS-lVAIgs&V@t{oLf8M3|RhF8-+h zSn;T)zw&Y(gZbF=sP*s>2fd4=aWNac6!C+`+i9qC;`bnY*kBL8-GBbb!w;H!)ZFU& zw{BK0cHVPT?eOn^{~}q{8a>+jy$N>H{Q9TJ1%`{QKl1~fjE#<|i|G&h4R7|! zlkPe{9QeT2AGu}cbEvofKN`)-#a;*7?@@dJ+mr1Al&q+?)#hOHa?YEtN{-vJzs2%lh6FKkW9w@4T(=)TyC=|B*eu{c1a|H2vXM z%wMnDe=92&<}cPH`fDA!p%1Sc=7&yE|GeO;>96zSdlmAjxBVpJO(|I7^Yv3f?*y$5 zy%f#AU}7GA(E3+C55k8H_V9;k^DlYF+Lx{r*8Iv1|L9y?-T#*T|3FqQrpFq3|Epa6 zieg3GT0GLY6RaKotDk)47xnKH6*eAupw7qse=3j3ak1x9YwzT- zZLexG4*L!Ii~0`~6*c{ppZ{?q-?sa_sH)~q9Q3Y;R(ja@%1a*TU6CF6I)diceDB0> zxP<;N<))XN_MXG94y*n8qR)Tl-h1J*>z}(ij$;0h-hU(U+k9G@pY&tje`N?;kC~md}PFrZaJL42Yx&M zFsA&V&WHWBVV~rsYvWr_c{H!4pYlfihl}S$jR8Bj}yvnXP@&FMg1G>r?B=xA_0iAKv`> z|K92OE&IZ;7p&EP#;c~nYsa6%S-IHp{~SI3?5bZ;tf-rlM=92hKgmvwOxWPJBL|*2 z_Ymss{*&FTTMHDI$&fcchManC`jwy;IaY zj=RVQQyua5!nDrDV;+pJ*o!Z$`<*La{*U1hW-oYlg)6Vz3O@V(aUd%fdmXN!|Nqib zzf;ul#j4ic$zzVgt^Zv7@X}}0e_;_Z{gv0d!W;b^LGq1H^YHh=|5TlK(vR_BEB5Lm zY&u~^Q_WrxR+yI*W zfT4~@y;N1(t=)f;K0C^r*!@%F4YNuAbbsEo|4--XwLkpy3E~HhSNk+S{VE^4@R&`! zM}F93=~vux?khG3o38a&_x!BAskisvJS!LL=cV3%#z{T8S)aN1dH$GR)W7Vwra$x0 zI`XNv;{i93m*0Jm4_Y6}i@z79b)NC)4_mPpUzol6ox5&*^)q3EcfVwjz8@7(O>=*XtkLcQD*~=bl5|t;XQ7&%a5ZIs5-s*_NH}q)xom8h?9j z&4%dEQeCp|+oE;8c{=eqF=F;m-FU~)Y z>HNTs+WABjtJ>bpR^8g0Yrfj)_KzL)&x_2O{;u#=i=B_9p8<(^C&=^3eG{-j<_Y5O zh2$IRcr`oeqlq`HcE??t4fUNJR{Q6HXTA5DYv8fpf5ESqpI(0cvip?T>r~@evUv5| z^@r%6OCq|{U(~-;w5R^mE58SoA3oj}utDYt;_rpz8z$pgAL{SL7gql6C*Qm7eHVt6 zxBS&tW(@oqUc3G|&&q}O-~7Dt13+&2B-@>LlFu63&Od0JaZ-Jv{@tS1_{mq~JSy|z z`GW^u;-J@hv)K-k=gZ=GRxa>)*I@VX!`Fmir;*QGwbt@igk5(!{}MiyieJEFRVLr*5Q(jywMe59rEp#{cxRuM`{oK>7sD-<>?);`9MJ$%n^dgK1vk z5BuGI-1Dzpx-abeg^ynIrqynt-ah{hWaVOdto)nu1gP1>87G%~*51jZ#sB@+RzpRF z@oT+T2*)cya!S+Ate7%!6ej8VL@q;$MZbO~&d60QwgFXCV_wOwH)9jV54~Lz;%EsS+ z-J|f?_m6{Fx!CdEHhTY6r+!7T>TBkSZrDxpi~FAi1xus;iC(t<@5- zr*DL}asR)@OT7O8+o9@K>r>-=PKx?3%;M6dKY2Bs&;cAOSmJTeX@cZ~)`yO7{?_FA z7T0{McamST7t>oz{r#g9!s&!5HflFq(wtK~b5gUkaP?BNf){cX7?eztxm?0LcZZ#j6=7Td3# z|Aa>||7v;oGirS+f7Y;gt>Zd0Yc5i=L_Yl*>UhLK#x)yV6!C-U z_F;bNbR2k1kC*80pLy(ZU*G&IxAo7u_#>x%{*uqYXZnw2ss6Zy~!^iJYtD<9+I2W|V%Jo8hheDK0!Ht`<$VepHa ze08lgy5X!ZZnfsB2UYi<>1O4U&M*Iv{{Fh7aj+$Jy7o>y?KK6h&wO$_>hFq3qyBnb zv+2S31Ikxc$LERbpMN;fZ*_g@S2X{K__0&8{+-r?@L_{J{9%)k8-M-CqCLZ=zu4^3 z-Ir>kcjNZINBRBB*|vS;*@UI3zuNr=w2twZXVkw{5H{*RRfZ$q_7h$4Gd^+9JBgc( zE{ga;`hws5)F~gljB7UW9{FMCGY;9hv)f}~*A0IC<1b#ZHua|ea8@q%`FBIT{-L9O zMX{o8I$mnGTjF6JVEVon_0NmUn*Ofv#_f6)L95sC7}q=e^+ z;9XLGnD@rLuKmaz!(rcFg*9(HZ~=UF{NIs&*be*@z5Y%0WIK_pN%VITvFoqY_F$fs z3-gTnmm}8n?~^=^qsX_ux)S$JkT{t7u}!z`#5&{E`5TY+Vf9Yxj0YR+ksr3dd$SdH z`q^i~4!0fku`3RlkDhk?Ija0%!~G{Et6F;}kLjzOi_JVh;@DCDZjsrfKd(dby(_X~ zzAM(_aJV$H_F01DgNzHBKc76`;!fh!dF&?arT><{U;T$4y>@QcYU@wDd+&93fX}x7 zgf~C7{QI?M$OAK7tgmF;+WntfQowe`{G$E?MWm*`ll=5NM*2SFH4i>8^{0K32O6*T zY5rth@EV_vzqtOe#fKhw`fG68~$)p+E!4-`NAp!K2s;+O2|JT-nWt@BPQn*7u?VVM5G zZ_K{o=Uastt?#dM*9yOf&%S?wUomfgq__d@P&#kvqMCndbN(~!gXa~0)PJa`t?92i zaUBc!-WA!^xadF}^sdONboH*V#?>X;0ez)>-W65j>Rf5safiolLY)VB;ez-5*D1UF zW+Ytj$G^TW|5c8k^Pi@}Kvph(oyp&S=~J?*HGXvSPGaf&cs`^4!$pRTOTUV>?|)m$ zr-)7_mi-w0XUEs0n-^L_R`~2HcKj@S| z0Ui+Du+#ZrNBtKR6sEruLJ#VA%!lXAFv;V6hkVfb=o5bYy)dn_zLdvo?V}f8nDxF_ zKD%hF9oBu-fqUNh_yGL2|If2>VSe`ccR>AMOKt5#tm<`I`~0hU=uV$e|B<4$Nq=}j z`Z9D9fsgrMgZM%Gy^wrE9j|65eKhgz_KR=d+PCk)ZnvkV-TdaKPVaiHGyUOL%v$wVcWl^zh~k3OY`9az4DmN^RI{>Bp-hBQ>T2E zw}k#M@8BDr`sU3?hW&nV%a@P(%piP?@4ufb&p(`FI_kJt!$KaqVKYuTzo`FcL6z!B z9p_PNdN6-Q;vnOh-7CLUbP@aL2-fPH_&o?8HrT@-KJn!H4?Q_^;R`c9ebV)tR`=gH zIx82`VL`su zUdz*Q@Wa=H;ffs}TJ4$b_LT18cBl32H;KotJ0Deh-}3$g-{JW%)V?IETGJQ0Vb|u@ zk(#J~$z=LF@q4f(HuJIPF&Dq_YaYj4I}`v2c;p_4aOJobghB zsv~)zZ6BS9d{(F9z`LaW{%Nn?Z0?quy{v!rMTh_NZ^ykG9y|USmhn07<@YPEzun2k zbrMh4X&l|)Z<=4+{!1kr_1{cJGM#k$HC{I{Zh48{t8LiL%Ej^%Uh_N2^NWk{G~|cZ z4u9nGpSkobs{-%fM^rKf77EcyMLC{;H#XMX{>=S8Ch+x1H9}7pOc@|Gdbk z=}+F6)SDjoQ`EfnmnY^o)tP^2@_dV@92MCj)VOF_5F|bZ{w-$wI9PJ z^!KmvvuTfhY3Bv~Yb^TL^h1C4OLVd8KgY6i>2>}=vZ8L!&NE7+?(=@)%L?~1IL*Day?y>ZIpBo2?q1{?AjC){!7_oqK_MmK!->`$Hl zgP-Np^ZskcKf_tM*nZoot^8tDYwzSSeW4q+#k-P6KT-dI!c)`VNnU$pLGqi91ITNS zO1^i3R);Q%<{z7whaarMpYxA>`)Jr}=lvi4#wnM1jfue}o=dXSH9%nOYA4;B?Q{dN4d-p7)Bcnn=pT|b`muJBfV>m$+Mcw3Us z{>}WF@14fMCG>}R4}I(dch1Yh{$aI`Z+`F!@S5%eIu1Cy{Qm1%@<7R|*6I9e_djn* zF0Uu%7xf=1A~pS;Teh4@Rx9`$zo^SY-_{@7LLOogB0 zAD#!$`sFu^s-&%fX1eaVn< zw126Jx~QM~He<8hQ_no3{tJrQCjHwTVJ6=@u~Q@t5>IXGXGnM~-csIlygv221Ye?` zal+p3`qAv&TgQY0F6!R7Xxm%7^0K|7kKx9;|7Sc!>84uSyx{9d%&TqNzxwGX>OWFs zn*PcUUlA)7tItc*wtskfA$(x3eB$IAIvt-ru@$Ln!Z7VkAAQFg9=%am;=HuZ&OZ-H zgx9lQTlfE6^@EJBDA93z+i5;|x!Ns3`iuH6EIg*aleo=yP;BzO6Wb8}LGg1OO+ILR znxFcyQ%vh><2o64as6Sd13&r15eKdiwiSo66*?)~V7zX{WqJLIT;T|CWe9q+%UQ=XMe{=@SBf4k@mN;K-GdYkR!QSJUe z>PI*Fi~5((c$5CM|9?;Q*YV*4y^fdK_?d^r;Z69MpVcYvlKMkq~`avglx~4C5^O|R`ADtg||CDV@ikkjfSNr}8ow{Y2J^$zk zdaX}w{Gi3*1+|{}kZ;I5$OD`Ft*l%^d**|y?6meD!u}&`-g?xZ|LL`k&tK@m^OBc8 z|9aBCC!-%>tFMhOxo$a~CwA1o?AY|Df5mo3#vq@*K>R7v2Y&hn@q^^2acqmbtX!J> z@HJuBxxM3eyJxh8rP_a({$1q<*DinmYrhlrPU7hrKYgJab{fYY^>+oA>0esWR^&d$ z%m>{K^?4!=Qg3$hyy6FKzuV2qg?jol+m&H^-Mv(Q`2NVt%HMze__4=+eb)~jhTqQr zHL^?)W2jl{S%#dJ(6!cx$xufh4cmX$|p{~p-bk4tw>!H zhUwRCd&3W|I$K!AZU0^I_>qYS4>}k+$zz@NXWzeQp4I8|2yfIsFE~tp@_8N>6w&JV zB-P)$A{y&8ul*sh;RmfhQ@x2dR>^U$@d()W2kG(x2~fOpjc7KqoeF z&?kNySNkCkwC6R6Tb=U3yQKcG&)D}@pa0uwVgC)jxYh$te-b|X{56`Di=BsVxc{4E z)z{vM&+b2?^GWB29rf=P6*c``;f;P*tQTYZcV$|}eLv6@G`$jii8o{9gXB z*6HE-OTTt%Sm(m~9^3HabKtY{4=tI8ZU6G~qw8${(a|`%DvGz#tM>g%p490$QU8G= zQqv#)j?^(P>!T}v(?|2#yHCuwI`|av_d?^R-X-H=EB4|G>#lmiy|=7#Sy=zD?_Y7o z>er+5|BuK2gT;)R^l$H0M8|Fa=#XGK@1D91mbBIPTrIJwgAdy~jf0Gjy_gs_eR%1$ zZro%nY`*f*Pu$Zz#A}_Mf9Pc8V%yoOdj3=MykeO&ZiOG@I%I4^`qufz?SCkXOQZhs z^Xy6|`s)@|)$0wtK<|p^xaZB4W_8>+D7NLp+n4y&_D=kUOXv@Kpa1Qp-}8oL!o0_S za@q=OUB>hKQn!EUrka0h*RKCe=Ew6J^&c*3Yx=vw8-M?}s-Wq_yg@fHZh48Hd4lAD z^kb;?9`SPB%Px#3uXp zulCgU9-4K;Y1G;EC!<-p*zwQS<^JbR(?8d^6HM3458bfS`LUfv{YMIpCjH^}U3p|Y zEw!DX=`}FXHb1&3;_rplKlM7E`LPvy@rCx!mi^YnU%Ec{o9F&%srTIhPtW~NOXg+s z*M7uXt&ML(&8P3qfpFW!MTawTGjE6d>ac~L! zVeP-X@8Fa7xiUOWdg)%16T zH~NQ5sK5UXh~M}%ul@ds`HW+Es+;-q$@49a&Y+WgWlevb2hU@p9-2oS z^sZnXxBa`)td4)WVyF4^SM#U!*dV%t_!A7XSNZF0tIT^e%zpMaEB>iF=1-pP1oCpzK>$!9+1r%w6OJhO@S$PX8M z?3!2n`OkZXi+BIvyAHVVbjIU*YxDu_{#WoTW(|w+Z7vDe-c4>t{mV#A|0JLL``C7$ zPvk={uDinqt&fK1i@X%mI-m4qw))fh)DCN&@X=+zaQty$opa7u<^#jupx*XB-K<>f zcyDUC`~N}Hyy;J>s`rE1?@x2(W&3D}9rZ6C)%4fA+V&3*sCnr;z01l4KYfDuLAJLv zjt!roj%&8d%B5F+IPIQSf9;3+ofF>qihcfj@ZEP%Z{L3nD-XC!x&PnA`YY9voMO#* zc)XKG=tkV0Kl)alsDEBm)bw|SH~NPJO(*J8)V%h|6Z4zu%-@wPV#MD*dY1X zp!!2+CAZa?=ie6&JnimfUVh&h=xX;rAI!?d_W#p#`|qkBWSo}xS$ijs?Eins>xkd_ z&c%-Umyss@+gnPW>7nD%w|5icmY4W#TUv~UCAoDRjw)$J~0pMBkK*_4F=?~q=Z<=4!zgw`_c&@Ob?TU3g|I?&L zGJo^}y(_9GIsSB|yLSDY$KREB%42cOBc5WKm)hah*GxU?q`!}b$2Z(_=-SRg^x^xj zln>i@blzFRVtmsjfqIbl73`>g`Ha`}*Ks(GBHy z_zaiO9|kUX#n)F{`48cg)xPuU!L+W6ZdNXAr+&tk{pc_1KU72-_1F7Yl23h_ zPaO2FU>&#rK}SXM!2gu~=;2*4dbJ@0RoVW(Tb5<% z?{uxdL~qHi){K*0w`TL~NG^8Nf4K0ZdU{uQqkn-Q`S90J^V%PrnBP=q{(SO$i#v(i zcun?V`orF*9=g;f4_zPT9lh+g-?j5C@Y?t9xsIop%ij-9v+YjxldPyuACI)o3EJ_G z+P2@$CC>b!{-tuJzw*IbyZ%bY=k*CsieAUXj}32HZ`+shTOIuQ?xwBN&Lh?<4b-w*VIqcf2622{as;I&ujX5)}F`KPY~nrq{cC*x$%D4N>n;6j{N(kY`1YUj$3{2!z^H$zKuv$fOZ4&hLFTJ@?N4Uq0zYVdXukQoljmC;e$Yw2@nM5$ zUg8h?p7E78toyZlLg%f|KGEKw`u_K5<;^EzJ~Gkfbt>L!J2l(MqjpQM)+haGJnBDM zWH#zwM#sI6=0$eQ&r8U>Qydj+-z0+qjrMgjXJb=qR!#<4A)YyXYMVY!ay1mOpL;wTq^y`3*b(+?Jbu$YwA-(H}kio z!t9RNQU7u#jrz}!Jo>kK=C4Q`^zPr+kq45`^I(3)Q9jFCLVp;XzjXV)O^ywhAGY@6 zFV5eN`PljA0qGk(^0V~*tBIdJd0wQddOv7yqdZQLeiMxPml=%b$ydyDvGaksI2hME zu}!a`iMIKZaq;&;^fA=&YIf2`6K|N({>+#Ewfq;tDof40e5*g*=e5p`|65tPpl^Pn zTmWSA%{A@>)3sNBdv%G_`s|3G{-XZnh)w$QKZfMfm!axP9HbAkHIIJqgXG)i94CE} zZ}I=3KOBDDy+g~pZs?x)*vsZz{uKPS|1Z_3-dCISm+q?V-E8d--Qf4?XMW5(>YrzE zsp+qJ)$Tu^=m7BlY*N!;kl$d*F{&+8{h| z+hg8XTvN|6eB=@X%+}zgt9V`s@7K^Q4aaw7(`j@i!rSpz-Rs=5Ns1c%Ag; z!G?VJ!Zy!ccEJfd9v`;bWzSC@bS<<{;=ic8~oz1L(dD_?EmD_JM2-N ze}0|vGcUzlZolek^VadDs#?>({r?Dk=E|SgQUAdr(xgA*njU$P9rN=Ns{Oh4%kRF) z2k8sM-wVk%bUH5cz*g+V7uNgpD-XQ)a|^;oWAEMV)W5FgwXSjhBY!`*JUSTa_?a$g zf84j${JA80CrF=B|DnQT`ZF&u$v55eqN;j-g|7*%59MvjOZ{n`cg3i&-=sh93UBXz z!nE&iJ2f1Btz<_YCy(s-6JABT|J8uxnNC_~dQb&KKU%C3h zKD8Cas+gt}5654;kJCSK(7S@Q+8)UR$pej7`|_@+uIA%i zVMW^&>t;WEO&E4PX_xh$U2(pa*R*DSZ8@!D z*DV{5K3~{T|B-^LroZNSS479@n3raK|L?@#R)im`}M8 zt?$&Z>bW~SwBkvd!Nd7a&f9?0n?3_FPwIF-5O1|zwA1_lw3519?5O|3;*s$?@f*KB z4}1@nZU66dh*kHGK6BglD|{3l^tbaY zgIT%wru&bno|09qjc=ybJH?Ko>WPj~|Is4SsDBw>e$4v!z+?J%l?PmtLZoUM1eDa<@?yB}}&_v-sE?!U+OsaR-J zEa+xPo-4z)@@m`X-z1OsIrQqEvO`&G`jZc~gqnOCKcDC)-h{RtC~s3<>QC#ulX15)rs>+%p=v`H_SWrn_dn2vt5v9<7qy8hD+!Vd%kgnTMxPL z?6A-251#&;i{~<)eg86=m5T>=Tf+U%T;-w9&g6F7{#_Ai)PF0fGd)^j^S)32#6kSt zX+C`@k_TE|SMtqIofE$Ym(U+x^T@6zuJgow;eeZ0J^teV>Vwa|e`)DB%Abq;8<5Am zB&%BE2W!8-c9P3+JM)YBmm@am&j{4nb~>PZ$_pQP*dTp@_+JJyo|Ox)^Za||_5ZnTZ>3ssd+n3FbdA5gh4N?~e5pQB z|Gda-(!bpi9_CZ)!%d7^9^*D3d7$woe9XtI-w@u#^fOMFb>zi&JUa8tu-@FQp8dwQ zkHBa8x3Y4ve){zOV~nRLS=HM5)Hu4CA6_Rl^b_^(6rP&?npZpjTxQ4oZc$Zj2k?R3 z7143Wt=bp;kO$iKp?#X4I#<$V9(;*@_`+VxzV@`2|MB6l&jmNH)xD`Y{y9?l2NK*~ z|NoKk6eX)#GcWJNhi>N2#n1CfKT-c~5oy#v{r){KvSWT;g6RPt=$*vPPWs0W(wFVu zwBGv12I0d7d-%f%@4Ix_x81UQIOXJ{U;OR03%%Ca_79Ju{r_nzD;N3{E9&5QB#7O3 z|ACI=G4H7VK;fzBuk+w_L_Xsix`}+2r}d^^Vv`4Yut;uWd$jH_5Cjazy38(&9)*l8U7qW*(LW~2VQ7143~k-Ri} zZ~y4ifXoYQdOg+pcRD`2*kBKTn0fMNkNNZ`mkTp*`P2^k9DJziCjI#z2j*kj zX*y!QW7x%lCs zzqtL66p@<#uJA^`E7sNa?OkD2e(wsavKxPQqkYjY-Y%O`d(zOd0Y zhi&qv<1Pzxe|_c6o4@fV)bslzUI!q2zMGYcA^o((&)PeA)SfDcZP*b%yixy!g{P*! z^0%i=%r_l0jtw8^-M_CR5ByKsRJ*aAD%qp?Wg_VRkt&)U4Jql zeVZP0%gy`Ha@ z@^%!p-ulM|;ll=d_`|0k?K`3K#)0tpqb|Aoyo(3C*4g)u)GOxw(~BRV)~-LvbfG@A z?f&!dwIpK_{l|((qyGI;XF9?Ix`}biOZ=u|UqgN_ezp_i(R}YT4lbd;fA&S+nLfPb zANuF~{EIt(|5sn7FT4I^NXG&9)8GFxo}y$`Yk0g9Ut4uEe=h!Xe*IH+ECZ&0G7g{P zOplK8bOrIJ=-tFPc8b)oUC^)j=|}mjzQIpj6NbGn-gTq5zWQUCE{wrTo%uwYIqSgKl1J_SY$I z)W6KA>96x>D_S3URxa>)CpLNQJH;j+wEj}Q;mPxBbx!g;nCiTk{;>JYH@@xm?d}Yl zfA7bg6CS(@KHL8cWaVOdY^47`3%{ad#r-yYfEsV_EEyKh)ouwgzo>udvGJVdfw|b^ z+v`txe3JJczy6tdfsB{h>Zi{p?4Nr7b_46Lw0-|-&knA0>9ZT7kLf>_l}pe6e_?z@ zsTz0O!u1d2b(9yoC3e(5FCNwO*SxlTJ#5zz*>+!6RpX`eWnN8K%h!I$Cywo1Y%(t1 z73+x1!ASFSO2&9_v)KRR4_x(UuK{{ZGG@6@j-R(rXG1rO-TusuJfTjDpp`u=@BdB&spUil1{(BD5}zfHH? z`3o2J&sfm;`)cz}Wjs6oGa&t$9-HX>ZrYoM2Fv)A*AaVFX%XbpDeg?%R zFGcSpUfYiFzeEi4zqQNm8!UB3IBejwc>jU3z1G|P9|p5>;rPr?(chnzDvi^LdAB5= zwRiHU&Hi6)>%S{;<{9-LEIc*+UEz(_rKX>MJK|5bQ}{sdBrmn`qrb)BWnNluePe@Z z-eUU0etVq0_bE?291hy|h$pwd`bhX+^82rjmnDnwZ6E>f6w~>s9rYh7GHUuWKh3M@ zG@!grQS;LC)m8_eBK}^;d<+>6K5WHad|~x}Jn*NpKlIhG#=6ViaoMNVg_rYfcHPft zRxWM7m(z=esqyEFiqniG%@W$;wz5iZbx~uchI$k<&?<5aD zeS!Ev^7(vce&x5g^1|~{{o(a{eeR+Sk6sv#x%{WUS^nG4P;cjdhO=_vzSn+J{rz>W zenqh=z0G#=2;FLZs_x7$>c60IXXU@e~XN8aKhd`%eISFQNE z?>=@*Xg~YH&xST!0I!|@?`Gw~{x!e8y#L`M(`P{AZX${mbyUBd|5Tg(Exhy>^&iRN zV*KRy72ayG_oG~V#;1AkvE5*UwjCt-#G5d!^G?TC9<$Zo#DhPqcHb?3x_t1Yu*SB3 z`{hH+tw6oq|7|2Im)iG_x%xrISCr^D{nyfbr5fuWWHdkqxtj6^DR!lpp$&#!v@p5#2=r#XA?1fTu>G2zSipZxlB>gXfW z1%I3UKhK+!1oWfNsDGK!q<_03b>vfTsJz5M`tlk#yFq_mm*h9{qJuwf#SJ8b{Ztf2*jq@tiPu#kTC8s^gy|&UooOsmD(qh#w@MzRXX*4dYs! z#;I$ckA%mc1vC^1X2I; zQKSBP9?AMBpRM?OflhdcgFf+FA38pHplx6Dr}b8c4W@aE>F=NV^E+>U=G0~SR~@}^ z^M_Wx27degcPuNH+V?Ll^@BRTc&oMX>3K8kn*JTBiTan18ui!zF{$Y^p#3;O;-F9b z)`#{@9!NgVgZVYz;>v4$*oxFOVK{f2t*$%shkpnkery$Y`*H*D*!ll%RxWJkdHM18 zZzJ?&sC`LxwcqhhJhkf|l6lZy)W2I)H0r;$@R4uhmsQoc#6juKd53KQOJ^H|B z2p@T14}WN#_s;c?`r{R0%O4H>;1X9||9oUtF1Fo|s@+j^ipnomeNDf$;~#jsQp3EX z{sV=lroWEEef-F`{Z31G7>_vUoy5&nzQ+7q^3yoBcNz!b!v=f!!v*VoC?9-oC|tPb z@;C2()Hz=3?EG6dD;KtZ@2)N0fUomT{fc7M*Iwg2-+yzdjrtE3k81k6!W++jb_7jF zC$+Hv1nRo=2UX|Izu{ z>v2kX{o9OmyUUU#-A-!P-_mb_QU9TW3O&3ltZKg^R}4d%#;70^_qv@Q2kN= z;VdpS{WV@Y|CaRUlf3p|RxYUyHm*Kz#F-CvifNs9!f$?TQ29MJsQ&Qe9Dn2I?wS^! z``S6J*M5AIyq?EDc~&m;D^|5#nyuq-9gg+c(R%vJ#g6(fC?3`HhexlYKo_(hHYkC zmOX#{Sz+6+{c8VZ-H+hwseeoQO1C@lR%`UI`>#5!>nPH<+EM?7MWj*x?WB(R@cJ`! z6XTYb_~{EI4>UgQ&;0bOd>&jvf7oxU?FS#dOWf4HtMf=riUxCW4zC_`(|qP* ze)^>ze2ISe!mfASIend(r-%HvhoAWK&$hhs+40X{Rxb8^->iTC|5K73b!cgw_SNpF z?GjA-j{1)k88!X2uDyxW)4y$}x%j;kBo3y2bOp%+ZNH}dnLp_lUUgl~ zkEQ9+(s2~=r|6yJ;kQ1NA3vCGKju&Rf!FwS9Q@QZVfgXAA^!j4Lg_B*r~LN$E0+k{ zS^h$K{&@g?P_n8ue&gF$V%X{Yu%rHE2d2N%I*|U-`>CrRe&+js{{C091|NCj`kz`Z z%-Qf2FWxfv8T#k^C%=0KnV0D^p!&2F(E+^_xem(XCl6GA)W3XW{N#h0Z=WZqPmn&z z!v>irh`$$-Z7#k+_ce@Td3{okzZa%;-buaru@!sqg{}9$>&|)k-eKFNmRUG^V3yZ9 zJN_BS%Ek1Uq2GVC)DP-B=tH8FF82R-mOX|bsZ&~%?UADeEZ27SxN5*DUzyIa>lR?R|*EheT z*7{U$$&Px^mlF@iEnaQIJS!LG7xnKJjz;};eDcxF&=u9yalYlb(p`OTNq>sufi~Zc z)Y*KfQ$7zap+6k*z#3a;7u^>ImU{ZV&)s?qe0Kda{EGRZ<@L8KkY}jlXURg`e*aFN zElHeY|1(fTYWh<*D0QY2e$Yvr&nxZYCgwNQnP2O%Q}i0Q=cCD9On=z$b(`JriM_50 z8=rdTKI=|h3SPVZr<;|Fy}qZEzkm5Qx)~<(m#XS@VE;esl#l$b*vvC-|AU36N&ohg ziTO5uSNz@ylGlE4qMvb?Cusf!l8>Dt`QApT@moKdN1vci{MLt#PabId zvu;){=GT1h#BaES{&4W?zxCOlA9hAKV(yMVUH41%{ZB{70jKEw-&*Qd6sy|a&35t# z-E7{u_<3D1zo`Fk5vl3lr}+y7$u~aYpysu&nV4^NPWix=+Fe2NQuI#ZHZFD((oY&! zf0#dZ{3%x-c3(K+Wji1E&VO9yHP5bp>ty9(uXDct%GIwZ)+GAtIClQuX&rqf81-LJ zWYqMhE-#|h`>W}Y@UZ=I-mj)(lAp$#?Amy)Y^Qd-;nm;7gFoc^wWj>|&1;9-?VUWb-+z(U5x>nR7dz@dQaq~ZPu-B@Ss(a8Cvnba zwojgz-&AM**5vsXXB?36QhPD|q4o8J(=LA79icVv%%l5Hdb`&;`}{kam5T@E3&=Cr zzCg)}x;Y*RVw?UN$B+KZFY3RrsBF}KxQLG1{ySn@J~|AG|0Q4l-;z4|Ns)dHo#fZ- z#q@{SSAY8V4-5{4**CR+GC1Sy@YwmEuJVI3|NZZOsh6ti`Di=kL7!ag^nE?*KUzfo zA9HUWCr4H8{}+OaAUFwxT{r}iKp@zV0AUMtn$t-@mKKrACN_$QqA=isSGh8>30#2z z5s=MM5V;^4Kr}3o5kWv%f`}lZj2ogNjx1iTfPSm$dEQS=pPI6h@xJcwSAW!}pY3y= z^Xcm9>gwuh)6?ZvW@{by(M1>hF%k#8%UQ-eZ}g!^9$3d`epk@h_BK&}_|)XhPq^Wy zD}@v0UU1cShZe$P`j6;1;O=_)?bbO z#v8@0PRD_FqW&;@&24`*W%kiw&Y6pMzi8=v`0V_T>cf60mEF(E*kGay^|6g^)@Ldi zW~+Xg{>7t;{!a4w9|6-99#Hd$gFf=xxRJh=51*p>H6K313Hrm%XFN9J^ZjYq`LFN0 z^Ft?<_kVj&QY?1dIihxYU$NNsQK%)ce9gS@cqfn0jX1}xAoI)gPxFde{oBGve%yz5 z-Eqr{{MLu+Mjl9CF`v~bAMxe&Gfueqfselap0{?w!~4H_)YP-e|3BjVbAM7SrpIi( z|C{#BxRO<_@p~t}rs`(?RQ%ov(od#;k!j;;oV;eQ>(3)Tc&U$V{5+2qkNZ_#>mM6L z4{SyF>M-oO;>|xj`jX+`zj^*HKfm6=&vChJ|NTj^`0b+mkLkD)t**TjAG%?)-(o!a z%k=N$6&3xN-=aJ^<~Y7Bb|qi)nrBNKevolN^D7^AjN}_S9luWz8|>i^AG-8E{`jWr zUI-sK=GpnTuP_H*yZ$p(Ud2>Cehu2|Q~i>a>E?JOh;7$D(@(1Ljv({Q^zY9-rYHG| zypG6cy9M#b=v~e_=6Q>F>+;cqIJS2=TRGo4KXr8&zWvdM{_~qZK3&){KIX~s|ByuN z_%rDK->F|wta9zuZ+vZuVaIWv&rJV;Jk#`dxmC_nta6R+hE8J4m+`Pc{2=~bNWP(t zSFxi$>UhKcN8K^=OXtrG`%QlM<>@DU5FY#eQNPT`^jKG~|4-EqM%$A_$2_0YG@rai zWVgl6^dHP0*?3O;)>nM}Cp=)E%%7Y(15I>xXMnZ|9#!lVYh{|Hgb3C9BjswmA=M{j`<0Pf_hm|DoJd(ck6X%s(h- zI#C~^<~5J+&abO8e`EZ7i^tJRe{nzqZ~t6v`cIyXGM$D`+ajhp`c zNwJt7Q}p|nj`|hF%JgaQNaH#V_YJl_HIMVx%rDb_QC?}|IptC0dwlX4&rtJ-gFfDY+ zQ2nj7e$n^&r*3<9Ywc}5|Jrj`EdPJu;G|gWeRoLhEyw@=1&NkAn62YfuD@z4AN{GF z>Ay6O)apMVb*4uuHt*lwsXZXre7_+1py{RKo8OI}Z*lqsHQ)HK!8kAShmAK`;fM$Q zb7AAtzd7ZFL*EFGoqy|3ipBJp==E`ecW{E?04ax1enkMA)}kA}uU zCpK}=M}GJ~@<4k&bzJkS4WHoz{dWHO)RSKR!3F1DO+R-0(@u)Tjt7?a`*-aN-QbVs zhn?v^nh#jf-xX~n&8dPFJ(#~DanNghY~yEqi^JDRipBC=jZJ}vj6?M zCtTUy=#ejMx*7Xn`~20fxf~U|i>&RG1%Ik-9nzGk5&`uP!-iW|;Mweb;;AUk2c@ z^Pi*25AIp)|J!UYhJD?Ah&5)vvAM>bVyZ}8TkK5#!c(VzQ=XpkIzo6rCp__dedK4J zU?o55i#Yl48IrH#EUQ1P_xBBtyWp>H3mf=%?YLl*YpJvSPsEoFC&glY=(yr7*Kr)( zVn6#e=9lU3a+W&%Ij$j}*Q;Ssoz)NLEZOhn&l4mcjQcQuXZ(DNqci9v-}tb>I4|;t zk3adkeb4+|I~@PwpO5?WtCzxK`gf9I;W#~gu(UWRlvnfEkCG4CxSAjPv15$uywmX*7h5r|kL_^DfnQkZ#_v5HuKe4( zZ=X8syw=(EKch*pR6hT<)vqX4rgwu!8h3(~@87BK(>%2^{nOl2r+-uTW6Xo`3|(Gb z-G5hfV_Ze}K(+_!%uk&whDkg)L4Rw*7Y;c7r5~Ks+GzBvN8P>E492tHe+}w5;O1Y* zmy4~xRN}0?j%D0&XVnab{8ZcWS$^?$*w+s z#K|{wQcNN*6;J53q29jdo_Y8i+ z;{My`*t~1{l&bReP}%PDM{Ri?#rl-rT%w`^gVpnt}4r<HHFc`N>X(MPVP&h$@{V&V0eUb*2BBJs(hY^gEo%#6Wip0%$Gd#Q}2r5Y=1$XUrv9RK54HT zRvkDZOuz2^TWs;_H>tP%|EP`wE-d7K!TL)j9ba@`C3Abo-Odm;ITjK_Si6?^f8YiFL2ZPA0_yKlZ`y}Lf;yw-8tY{#9W zNwN6dX6MTs-%%TMVzc&Y(>`N>##qsDz|^0kEZ_b2Kit- zKK@=9*LjzZI%Zr~nD)Bp#TPc4^WuWPG!6W7bKkLt}eEyC3uo)Ljbb;4wCy%)Q9`zv4H+H7~(!8Rgzv{&I z2;@_580l$w+K1ViN5A;Nc>6Lxb;{?#3HniPdpQY`Er{MyC!Z>M^vejQh=>e@T; zp_}Qie&)x#GyO;Nidy~S-=8*wKi1!VfBJyNogm{_AKJh97j@_12dz%|Jg7WoFQdP; z?%Y{RzW(XcTkHPu2gm*C@8$1b-jx&!*Bcnl?D79QI<8b_I%;2)-#<2#pT5;@O-hSE zo&Mex;o9w(@yKhwxjVn2qmGZi7aBi(Yd_}4R_w(W-twDYoVeGLm%=u`f8>f!y?71V zk)3}VNQ%Wif4y1H|FqQ)YCq!5baxt8f3vA}14G zcwGFwFs}1Dp82t(KI(YG0lO`^`O#NT3HvYEz|FdJ6dt?(#%NM3b{yJPn|bRv)Je2l zdyU)m|9uj({eMSn`pfh$D(duaYCZXk8zXtmd%F4S>dZehe!j)&zv!~Mo%!YThYxJC z+h319?Vj+#fk*HB?t9Do{~RKH*!%9F+B@s@4-K_-Jh96CjoBKn-2X6@jOzYBEto6% zE00uVk8eze=y@Qo`IO{)C&)N)o%u(* z^YDXKr+gk{d~C3XKWua7@GHNYe_xoh!!5u1=(V4Mulc^DSSr{5tXF*eTGRCD*F45a z#b!;vT>r*AK;j%^Qzm2Q?kq6rQOCu?kO25kILFF-f8U5jq+Yb8Vx&4oY_x^3onNRF_DfR5{d4DwI z`$xu8OllV6)8lZPcUuaW7xT{aAIKxM`p=g-@~Jm;-Eqr{{MLu|PabG|QQYd3&x4w0 zb`L+}gp1F3_<_&=?dov(75feDb9DK6iQ}I%DHhwlChPYv@GDAIxwbyFugdl3)YErE zYBT)@^G9|1SMGzEN}cuR)Ltssyi$<)f^mQ5?;k(k;(f{wx*B^K{bBnrt$W6jlTQiT zfBdCCeQ(J@^kvt7jw(OcR=d%SVr9DZ@krxN(5`>exak$ynf^n0MXmliZmk~hX&&FZ zkq^=*h`$$lmv>jr)4SZtY?rUA*o)8ZKNAjFX=ty>w*R)zzd8?IkExvgakhVKN8{@J z#as5ne%m|E*F5GAf2RMUJfosN`E99ZUep;zdEEDae9*?#eEhvIuJca%F+a9qFTU2K zKQ6lBtSjzltvPpvEw1>?f$*6A1IiDsU3~vC+ujd48fV@Ou~{46rsL-k=k<{3Kb&V) z^mn;8JO3G9|KkdGx&77X=`{~MuxL%V@(_hvB@_-*m*=KC~!&?yX<{ z`g3=e-@mi}?7-s!W&@&7vU_AgoG_HMTJ-+YVmI7Q|aVW$7mJflv3wsYoV`oRN# zL&o8}H~C;(hrbubb)NC!@zvjpFU;BQ@SDDHO(*Pd<}{Qz0_T8=FNE^ zY|zG4KK#AV#$&!U$EWK&~v|j#aio8XV?D^C&j|+GyQdO1C+;1 zpMH%u1mm@L;%TlYh;8%lh@U<){fn6yzmvEJN5m$d*Fz;=^P1On=UW~6DVo23{CtZ? zb>2z7%?BGK9~)GEm>q7r?&8M|4YN;u^s-|nHN3`6|8`O=HqSM5|I<*vqFCkjZnl$0 z9Jg4X)b~l;YhI>*@u;Gw=J7xBrUyKr@(>4wwR~T|HtZzDV)<>cy%RLQ_JQ9p_G5#} zLq2@r?S8Yfw>xL$@Q(8@`1K>N-bdW_|507~3qAh>uVNA{rnAP;4Sst*m7jhx{Tn$| zt^TrmD!+eh=h<0)rvTgE!3W0u#5Q@Ly`O0R=2zQ09S@wKKWx3pE2lkq(hXtuqf@ut z^@L~X%Z~p?bR3ZRGOnUzm20nl(Ek5|KGURF7?1g8`WKOk{!a3nZxu8>8e-e~iRKXp zedM=swIA|8`hnN{^sjvI8s9Sd;S1lGd;Fen_}$sohPRd9Bk#6*p>EF&XEBb34pQFjQ*HNdNpE&59#LcD;Mf{-g7Tt{5Z&9aw z@W%69R)5&)-mCWb&Ijg%#*5egXtVu0)Y*fOrhg}A zsnvgqjA1%Cu|25z5C@6JcHED})1+AF)BOM2`k(vZH=c1$=YVk0DPPF`{`Ua-wC~?L z+6TC{UVoUXUs0?~H|;OB?f1Vg2#S;H=z8WeEs-I)2C3Y)E#}K zs`NLV^7JX9TV!YY59Arf?{X`%UA``nc`=?Le6C2Z{9fDY$Wz4M3(?0=$LUp97Z2;6 z^Z53c96ub^|M{(-{pg+L^>0T=|9-d|;W2&sHQrEkf~>vTwfEm^OD4RT{)4&4__dz- z#^*C_yJ!5qZa#Ra_Yog{@<8@GaU9!tVm@q8`>D%sZTQu9f9I0mwU7Q$y$P@Qz~}6{SybhlRUH0 zMG-$}eDtaH)-N^)A2!&-A3A%lf8tT=Z5a+e_x&sW^uP9lzj=64ES2M*&Gr7z^ra}- znQl6s*-jo+zW=3O^O<+1|DwF2PJdx5$6>q};qy)^u)(@~;&m9;!Eb&Y4}TpG{xE&U zl)ZoS!#{*+2VQj8?>FC+*Rvh}3?#*3=RY^o-yb#952}vhE!W=3JMZwy{cm6C z%&`5IH{LcluiXD1B)m=i-$oD~dwr&o$J#p{31V{`VtuwX&V1C)^k14sD*8Lk*F5V3 ze~g+Jzh78gpYkchHxD=6v}(pLpyy+l1|3*!kF}?w<+29e=j9PsO_X z4^!uLT&c=*RNLNv)wcYGWYTA*|48ns)4v(z$J+tC@PXdttmAx+>g)DfzRs7|f%!Gx zJE=3Apg$bD?{Ob}_6t{sV|Ll&l95Xm)4%QiM|B*q9gXY6j@R&bjia0S;ZbCMnf}E{ z75)2U1U^TT&;HU-^N54=VYcSc4}K8cYwL7g#NjcUIOAZ0>Thkn;H}@h{hS9{n}2A> z^pU530+0RvaU?00%I9Br6(y^>Hoo}$LtA;BAoFfbY8H-){+ieG`V;zED!k1ry7?K0 zzCiPL#?QAn{GgM3csw>3=SBXI{^ZvKPt5*E_CMGQKeoq{FHvvTAErsM@H$UFQ+)ni zBxeb-{X|FOt{cS~v;SFN<8eQco#|hAD*9_4-=moxZRPDKIzi%~kNnsm*PB^Byo%=6 zKBzO@m)GA&iY3gt=Gbdj-v6mE>&qWJ_1BL#cpY>8nfG1L`bedo=S8e??VUWPzh+Bq z{5mhMHoTesg{PuFbxV_CA>Z~B9q}_BuaoAG_{j&o6TjuF9&AtK#c26`S|9VocATei zc*E4MuX)e4k312kzwy&6UU1gOOn<%p0o_6BOrQR!k5p~ORpj#*^&s=X&h&5O494%2 z4;&Gje4c+p#?`#$*Shnqjyy&4cgD}RcvR<|Q@x2+~34@<^M-gt!F-JXZjb9D*BVxkUG;t>r8(qaodh& zbmvb~@8ogi_+S0-(r2cBJFlqKzle@Gj%)}&dV8m~=KcTv`iFo2$>ZPOZqY^I?C-tr z+Q(P7^9OeRGn!v|RdN6SBW=FK`()nnn&*Yjt=Mt>82vjrN3H$?d322as~NRMFo_oZperzwvY=?wufUF!o~`rb)3_ zzS`au?!=IOm(dSj9frLZe0-1N{_t4Xck$z^o;$0&|MOuw?m&b!=YJ$C)6MZn5F6dF z+3#S}U#9;+k}b9R>vfFeQ*WsH5C@6JHh$3JjigxEe$4OGKT(XHzi{mJs}1k>=MYYK zcK*tHyzLOLymtN}O^OA*(ue+-zX3mu4N6vZP5-w4camfKANtYxX8I5285R9Cuc`H> zht?Ti6t}#{PhTK;puMi@>eTN+?a%CG^taZWx9>>*;-#%ATd%OnJx`rYz3u<|lVUMF zw$k6f!w>5Cl3lKuAG%@J%`ek`D6h8hTyAAH^HXeMw>92TR6ja$9|CMJ=EL6$<2vuU z$2D91z4*e$zq;f4S##D3(^t6u1Ji%IknN~;|G%SN|I^TML7lgF%e8m%xbpwwsrYR^ zYG?W{$|H69OVv1WClUCVH}AvPAbBAEUg)E7%*G#KFTODOBYi(U{?`8qlUs+q_TV$h z|6h1#QY?J#0^u`#`X!I&O{_87s}qlX|Drbgt$6;K{=<2N@sl4tzaD>#nrD4Bl48LI z85hLg3q9lMIA-IIuoqvL{MW;Owesb2!Wt*dcy`rQ%Fkb_Ke~eHp}PIIlVY*`h|W{I z<(m1~{ZE|4@N?WqpPBwk^Nc$ExsIKD8^6=dOC0o0;%1|tB7V@ek9JL+@_7(GY_Nwv z?C_m0KeYBun}wZkz3WS}zxx0>SN1=HNwL^`BYufi*XWFH*md*E^dHGtYV}`F#xR{4 zV$(PL#6j<(I&@Ja53J*ZA3j6)bev`NhYxOi`#*khTwgfmnK#~h_`3(Fv-8gbNwKgW z^<4kcuYRy0cD$xvyZ)IzHJ>>1&GaA5IV$=)VQ%gqSkVbS%_9!_$Zz9nKjeY7eME7q zQ$Bbn>Tk`M_o*j7dEd;|W}m-i(#}U70goO33?#*3`fp#H|2fUZbrP>z8=rn}i*5Bf zKjzz-v|B+{tN$i4lIi5c_F$w}oM$%U*5x;pm%NDI#_Qo{ygCeLzxT0welc@6T=l^7 zdyVe(82Z@xw~?e+?E9Bl2xnYy5Wqfx7wVd^7!vj5_^|SMzOu*e?<91eq_d zD{PRyK>WRsd_x_tVn=<{@rJc-yX!53eW!%AUU~aTCw}Qm@YwcWsFCdt6w{4Wf94Nr zTxMUcspI||*lh2yo#|h6UZ;Ok$74QLuj6AA2Ytah=Dd;Gj8i$jtKoyEg5<*s*7-Fa zKC{n1-hAxLzHs_VS8lq?eNWRr+dqAQHlIdPENow5m1}rFu0!xnaw__3pPBvzGrCwl zbwg5beYC}{i^Q)ljd!cqVEfiNw3-R58U zWBL6%*S|HCUopM9xc}`-reCV@XdLlouNx)8mH+?j{nzDP zl&^dGv;JKXw)sue-*Kxp^yW|(Z&GXUtl2u(B zpB}ehTfORvo$24sS?ctU@8?$84qRTH)ej0bj}b&a(E8Bv&EFV5-{P8Y^-l6D_A>gz zCL64F&VL_xWZ2}oFYk2m$Q@qmZ2v!y6pJ1I^tk_?caqPVzM9*M2iv>I)_G_8ck)O@ zf0ui+>p$b;Pgl6h?NDv>0)6DSJ|g`sKTV2-{j2%WU->*ZL4P>*iyu1f-!FFI_+x%} z^ydyP@BjARVtBhB1bz5P`v0e@7xR$p^7U%A_J?ld+jgrw%rDcwKd-IRU#?CZ^FG>^ zkt+Ga;%{Ck2tR0j^mWI5r`skUjO)CUeDh<2%I~p3^@p3c{OeWgK0Xw_bLO*qz2Wjc zc%Zh0Q;_^cm zgK^$6`om7IeRad;FQ|-WC4z6;Cj%}#pCAxUEL3G2e%&*jc zD9M&O{jER!9hmWS@~ZOnhK}*?1@OZI;_rpz8@giH%InUnzZYMaG;-2yU)lbhVe+l- z{rRsydK$bO|J&~_Ms=KBiv9nFHgDC9I#9ApJu1JyOO=o9M(u3-UzA7c^l$DXbtt~+jdk)J+6@<8L&{>)Fm%ICr5^ure(ns&tR?)>6o;o-x!zUJ65|Nruyq*&~{ z!-(2#Mc0jDm22yBHV>-IBGkUY?9ejLZPc$BxSe#Qy!8rtut$K8KIIO6Wk@$*kV zfqMJ=)k%t_+|TR#uT#IGSmoNQpME)ifn7n(&$j=OBwH%_tFAqN|Ng(Ff0}oZ_0bSS zFYDWh--9}?***O5g*nfEZI_>4@50<&-aCK&4+Z$`{!3|6EViAkmHqih_8V04K*=iC z_(62THojD}^lp5+*Zu$DZA}UVf$6V&9_%l|_4nV1@|yd1=Q9rT z1o2CD`Tl|*B;Szn*gmiosjI`_ZhY>c+duK+u*sAg9?Nb33a=f1j!H)p-v4VKmH+>0 zD=&Sko#|hA>h$M*ocVa=H&j02Ao18{zVUWXKJiLjJnpjEQGa29A6fjDy}lXty!^+9 z?*0Bz#f1W1A zV)NEI`jTj-i~8;QPx#1hOD6qf`Zsb`8un!)lZ*w7+t_*r&FlSeXuLSMER7?#}0Rl<8kYD*8LgZ|*F}e5f&@0Y z`oj_zdCGahBB|w)o0VJCA+lnK0|3+kds|rjJo)`~RV&SnT}UyyE!tSN3{u zNIYJ9Cm#B3Zr5$c^TW>c@8p>k{h41|cuc2MY}4PVt$DqE_e6j2^+NIu8IS(36?^f8 zO%HwjIorK|Vc7Jzh57Hm;I;G5X;LiAFa78p`31Olnm+v+Z*-$r<$j^^{X6;EH+^RM z7mUX5lvlC2rP$=tm!akn2YuwnX1+ZCmfuc_#o}6DvHwMXSUC59*6}~xFUM6`Q5a4l4E+Q{>(qqe<07O)qiFlEsxKQCoSCe zevgizcM><7aTW1{_WEvj^D-a!4B>O#`S8_Yc;`#+`tR4CdO|q#j8&J+*rmMw|2@hB zucCO%{W|^NkI_5vRDOTg5I?%pU#9vf>ySLqo>v{${AznA zeHbpMAHHzJ(}(<^|DjjH(IYR;yYUMzz-Pyw{W=b~*?q-A9w=Gm8b9+zH|%(R$zyWE>y|1d|8e8w|$-Eqr{{MN7b zLmp_a&vrMT)hV9`nHM(L!{3^Iz`yPB&gV95O`rP3RVSqv!*9p`{YkNy9vc?tA9k_% zHY6Ue;qgu)%YObdEX^xyJgxVN&2|>dmWobk;ZD30Bo4-Y>#wgSU;VXpPU?so-!l5? zD{Ow|BiUvD9}HVwwQ#%LPJaeI+y6Io97X&7m;Mwbt6azZpqu%rcTz(?+4et@dn)>q z-`|}dZwJP&dAyF{2Q5!|V?TC`ah-S4kNL3`*qy8nUT*u?0TkQ8v zYUrmmsZ}^?^^fwG`S>%gi~47L(0J3L9ow(f^*>R6IAP%nvkrWu6Hff@OY7Y9nywe?Km%vM>gYx z1>3$7c3AU;u;AcruQ=znA^6eXbm&it#jmHwKW()Y#maPZJQ6g%`2UZn@9WOX^l#)I z8`pKmc^%Qe?H7xxtWNoegZS%^JkUqunxB4&gA??J1CIT{SzmpAX*h8HCNIDLqCM%) zu77AJ#lq__y{S0=Hf-BlN8^lNgXjjocTt^=lj)!4k&6B<_m+OsBQ4zazVDsd{etXA z$p@_u?brNTj~!!N=QZE_*gbgQ4bykK@w{~|c`R&p__g1=Ysu>HG&@PLa33#_I-U>1 zNO!3&x5s5a|I;d-i8}q8@%`sh?Gtoj(NRF1O0(k?$eM_b!i)(Z>~LLSFL(vB?KbNA1V_>G=5;cM`XH>^fw=aa{dj%Q=tz z_)lAYCp0d5DN%lCl30^&$u9YAoGRS{5l`_3@7Mst$Xbu|G4R_DXn#HS>?=6_}|ix{rd>o4KI2yE=nI4o8|>i^yKT4c>jz#P4hwcXFa5~j`+2Ri z&%bF>EX*%`T7Uo8P`{#B)wOrxYi=#beDKG1rvFf$Ibr=>UR{1ZFx_0?w%3P^tNM@! zdd-iw533`tNZs;cNbleK#TCA|b9mb`&)$5@#^wEQ4@!!K-yOxgUdN>$$*!)6v)y7_ zeOqea&-7oEdusKM^4UKdx^6yrZCuqmw#ftQ_^eLH@gTg|V2z(~!rZlwyYTiazaP@2 z7yakeySCxC@826qu~=Oox88*VF%h zZ>y~+R;HWkV0NE8vg<$9_D=lPcPe(K|I+-CjqAj3d1{+Z#KDN4>n_L#jZgFO_rkc& z*Nlt57hl-;lXL%Z_=9JMsb`)!?W&W$MV+007)gr7u7BhFPh0(9G;fJ!I;x-RMzGnh z#&)LvNM2jf-{szH`(ICxc^OYf{N4!?2V=jDtNoD&8ZYxxUaP|fqxmylul(TeTXJ^a zgNKB0=_>Df&(2S?{o3(Ayo%`t#s80NZoKMGY8LuHH>+z)0rOz}bf#&1YIYx+eu zY_=r?a0PaXY(6ZE&HUVP@~Ryh2W)+XDo^U|u<96&!@H^6yB!|jUw{}DFd zM)FS=$9cB8bd1kHtdkc^0q2pUWt~+jdk>9Irs63XBo{AMe^--Sj_3*DxU`@=x@45q>k`om^e&H%dEa1M zMe;ytDX~ch8=i@_gI&uj89uQ}q2e8eh7p zuD#~j?_bnr9QH%>m+9Z1v()Opc^<9T5nlK}@A9fKb*?b&^C8CvN;EYFTVSZ!w0V@&;PtPDHcDhSg4ZevO{$LLmgMLs%!LE4*dsmmOB0M zn;vZ$6aBnXTl;PPo7m)omZ$w=OLqA@kqhouRHtvJF5I(OZC_J*}R!YL-JXBCyy$}|84PG-)d*u|6m?LXYX<=v-<@39*6nh zkC8a&oz%xR{@UM{>$r7w)Z6$vj`?F-`G|*;-?`gszy09T;nT<5|I6pUGJxNH|2Ui! z3+JJIA^(C!aZ{fF{MMSqujv-3Yw1<7asW$3!&mKXUM7bFj~ z{cWTd`Cj=9<2>R${NdpKKWudNj>F+y-&y=WZ#Zf(eD?XPKPeW|V_n_zZY5`HTu`p$93N2-Ddsv=(CP5d}i8qw_LVSryv=_W1oNfB~sBN_B+i}6ziXC z|EcCT1nDo+e`)Tq@tnlHVskgK$wzlX%_9y{Z+4&Xpc{S=e{G$PLp;u3R)6@~1E0C+ zpy4!p{h4_)AO5|A$FBeBB*o%Ex19I?aZ-zXG4_{|A#|vGYF- zz5ceLenqj$wRiFu~-efe)7b63qyOs**iXT)~4{+egB&dNPlb>pm@vujoCUr=gqv6 zh+#);=GU6Epm5ab&vrn*?ROf&Fs`jy@!-d_I=__DRERF85!YM=z=((L`0yyhd_e&+9;`q8be_98*@Vzhp#2jRg6d-#KUZp|B4 zJ?QGN$*O<*>H(Mk9e(ydd@cv+$Mj2+uEo19)AkF^ZNyL9bfdqv#54WV{86p`^7c@F z{g3j|C+J2-G;@W|=m{BMq|a>7CPz;D+-3?;>4`yc-QF;zdvIEoS-v)^!< zPhNX`JJY|DM{4!g>qt$T3JqMO;od*p}R zKY8UlUcPCau*Vrc{AswRy#AK!KO0H0&`sdToN`s@ti5?iqpbIXMbYF zPoF=G&V2r2zYkg;TF>~xR<7wAx>P?yfuFK!tA|%(O6-h&xUPB=YReq{m*!3PC_kV9z4@)9rqt*y9eR3{cV~Qi+6(Y8h>+biB$URh#%fe|DoJdr$4Vx z^05tFUY*sa1tiUeAo-y6@&C2{KR)TJyPa}NU-;!u?mGAW4F>4T&VPNlz}C=az;uKeqNQKWvuQe{%fU(Q&{x>;LaCuA*d>Yp;I#Z?+|d9nTN_vh9C3 z&#dV0a&P8eBuM|%8KQ&cHBar%w>t6^&7Y2+Z}F(kJIS|sVT0sjgX#}=9J}z5^Pf91 zeD`Nd4*vPaj)bRn`)?=35^w+F&Gc#TNb{WFvR{9`U};`a(O>)Jyk@N)QC{=6GQM|$ zb#>++?aspwq7QtAI5mb?SHl{{s86~ z)2Cm?Va;}Jw)SPezjhMgd9waAFWde{@(km5xs}=Qq=i}k`PX&FYjsmTc*z58`_eq~ zQ>Wv2aDx8U#y5Ru^4jMd*_wLQBS&93^-TD9|25r5lVY*H7*|oE<=U&C`8Cs|Sg?(U zzS|Pd^e;xL=&`(!-oEnoTmRe#v#*ZckbYt27| z?cO_d(5K(=6Zq`_V{+Df00qEe|-I0syv|bqZjC%q=^~}d?mlVrH{b8RU z&Hd)>=iC?e`tmESIZHkckA41%`jpYK?YynyDw@B+BaJ&jyZ#NnRO6AI>0eY-^w&H- z$C-{g9?|L2-Eqr{{MJWb zO+I{4-0GCigPLddGWuJat-kry_b#5_n(@bf-)8E$<^K=X{r+@TasAaD@i_-RfL#i?zojl@qqww=N0bcsb^zY;qb^3E2n|#xq@m*fM4Eujqq*;G{ zKy2e}Yh34Jezm>RI5K)t7 zhUWJv!V}q<{{4AIMSmxC{EwCC0Z&Kb-U$*1V?VZS4+GtGyv~Rl5B)mH_h5~mx;hN| z>~qCWK0NQX@Qy``UU%?&e$041-+voAzM@!}ZrX?0+8?@kwd46=XZjB$*;3J8c`Ls^ zg)hQ*zFy<_u?=+|7T59O*QezxUoRWJIt)iWvd!!#XI>hPTIr{ceP!XM@Y(N=hLd99 zb)FWt|2fs67kAS?{8 zyubra(BE3;t9Q)#@2S6Pt#j{NpMUW!chHyVU#OAk0k(?!-(F<>>G(;szymf_H*Bk? z?}iX$`VZxgD*C(Jo4udL`lp54&iBKQUZ8i9XEx(1;s?o(`LHdnyc6|@Xz}a|C7S7$@<{WYpdEkqslU;U%rDb_IFH!4F1Iq<K)c>DK)eka`qC`vG?EdFY>)MLcsXxNCm&*L#4bTeF5{n4u~zma#5_0blz@u+j+_uvHmVVAv@?lS*|4~AVn z{`PxT_Rqp+`+psuKJ5CRG$|J5EmpaXx07ak{%0Q3}eFx2s+n{rKG zw*B`>%yf(F)}-Bwgo>V;$92@q%f?TYSM!L2-bvhS#;wawC7(Vkb;{=ziO0zN$urb( z!@;)=9rwip?+b?e_1@-Kzc$p*v`OXg>a47}qgB^Xq)^_u>oh zNapvE3KYz^6K%;}9>X1Z>@vSQX?22AmpXpybs_3se z;b&gPqxHt$FXQn$Lh?b&Q{LE*9b;VQT{JIj#kfAU!%pA$(ZAih<_%$&!@tvB?a;H~ zx8u)tH-EaVp8v<6cE_m~Z}vXw2LYWFN#~8^1(Y%e|XDlw|LXYf**%jw=Q|x=lait&-Oo~NwL`XUwiBQ_xjZj zYMYr?^mo_AapR$1>cJX6b#)lF ze*cuu&D-|-VfMCf`LCIc{dqn2y#6^&iY3-tyycqtRsR3fNe*$ZcBX$PkJRcP|Nm(t z&(88eSD63$_MZw*%p3RXo#xf(-z&eh;iJDk@s-DKXl=Cm4}QD)@#m4xe$zhp4(d3w zi~nDE%=*%KOSHQ7PJDL#6@8}ShllMW)4xBD)at*M)R9lWF%k#8`{#A!foxay{-*Ud z4mQZVu)!Yw@U7L4-s=sQ_l0kr{=+-=yW$?Nb+-Q>Ns7gefAs$st*^GmonXAC-^%x2 z)X|^Xnf?PgM@4_lTh{Ba(GjE%?{fBxpK%q*18sg$-0ECGWAAUv>t~#>!45ya>!f!q z3LD)0zKg!S{(r*5@xSf&M|HfmV%m*jm220V%Llt^M_4$-)XI#uDv>J`sf2U?*I5}yw=(F-${za&M&Sv z{{0UbU$V=!ck-Cet>oG3yHDagADR9`xu>GP%e|SuA;>(av;LgK$>Y2$`JnZo^8ajjcv#atz5XB9nPhz*r1NNZWBRO~|4-Eq>UiQU*YwwHXueZS z6{9{g{TJntI{ho(Kem;pqZsj_r*{&MZTjPRw0J7{7T5ZU-NO%G9fs|9{L=7#f1V$< z|F?nsJcQ|gh>kzdjrRW+{ptrBYJ+`hD>io#JMP2glS(|(e>i_s(H|cD90!lRj?%m; ztJ6HG2+pei|;`bnY*kBKTIDA%m(2dvJ77qK#?4`F4olpOE{rN~z zEcX7pk9+|3PW_5vRoBKBfBsD+H{R|t{g>vEivCXX`vpxW{2=qyJg(y=AEYlZ_G8CL zzF|Brwqjf#+hN|vSO3fY>t^?#S$pGsmR$D{uXWrv!F161_>J`czZyxg@OrU+bp8^} zbae8_zJJlU)uj@ruT1}u++*Xq+$wqSlW#oq1Ao8NF%R;=xZl{19b;VQos46CY{j@f zw!@P1AI~m(o*R~|xaz~}UsC=3%ZT!WUVnd)s$Ws8Qm5E%ZmRj{(~j(h*qQ#L`6G0$ z%xh5cn1}5r(r!QTIyBzC?mC{&Wv%m0>WvRuv5p6SSnr5AQ(ic9RbeUhv+Mu0Klpbo zzW%-9F&~X%^_p+C=2gyrrjk+J|Ljo&Onipz(F2 z&O1fwu)!KXe03P!ci6K#zdXAy9C78+n_pdGziY4k{;T-<_m>ON%`oaix*hX;ZYUm} z$9R79*P66PF_T*Trw9-E-YIW{#6j}RMlVJDp!L^|_^FG=vw1D2Kb*Mk=?gELac4OB z$iDp_dGKob$o4z!+R$=)L}F4O#dRIqQA~ts{hYs+O zhYjKf@%KXV4U2B;`{~6O);M*G%MRURZdmiO_x zAF;|ce!Kq+x~1K9-ie**-^d?T^w+p#m9Hy%{dFYbognk(dBz6m3&h_G$v4#TDt6RI z9dFp=J0HDp%SV@nO{Xnh?KgK#hNt%W&z9<+$~>8$*YU(#u8nUK@p-4%R^<6mJJUbS zBX#=of1u3AyF7c$^Wh3JA+I?kHu)fZf#&awpKoy|aq2vF9WuW-uKuw1$=_e=Eo=3M zeVaE;TjQo-cy0ePniPw@4pb+O6F|wzbWq;dZt8h^?<7JU`E19T{>2Ct{gs#haW*{~ ztF6?H~)02S3djxaU>}g`~1~b z+jXN@11d#9**&4R5w?_hQCDVo0__1H05|I{;HY%ilf%sBp* zC$HS~sW4;oXG3q>t=#`~qz~KgjH=yMOuJF6az7l~%^8|cpQ+j%LFSq1KahKB^&iTk zrJfZ%;2$c|lIAA`;RoY7^QYtITO5ASNxt!6gK=Ku4^vM#|Fg%Q_lGd`V>cc1%A`Z# zvCm)q%CDH7RQ&(rpy@-rp?RqwYva@3O?xL1{4r+w59S%hALa8u=B5YZf|^ep^pQUv zmpqX7qxf}db@T=HCB@>g!5;qBMz_vB>!5=#Xl-=WJFdL;sVm^GJ^pML!`S;Q`Qojv ziK82K-TX5BhjNx${p0(OF~11W5%ex+9rLNx|KYqAoxRJgF+A&uO+I}Yy6(8;MSl7M z$ph(&&za`e`FQ0soS?t8_LSMreC?rATWcS?${W}H)c){s-kk5dLF*$LKdD)aFTVd+ zLki;gW%@78BNhFf<`>a1`;k=rAUbGX^NSLvZxG!<^LNJ2w>bQulYDqQHW=qc{xJXf zR}MJxV|&GaA1 zBX#;qRTdwTN4A}&YCA#GYf-mtesod9-wTadBV}HaG_Soe$7oN4xNZ9LjZ`h&ljyvJE_ut{9Sol3# zrN30;jH4*tQjd!Mah=+&NpCGmYxUnqn5~b7*!Fz7?zrVee&z|12UhfF9OkKf9-N>* z{Am6cKK$}0I^l^6KYa6yAD92X`JSX$*e<~I*LwY1s&UYX9k0Fm`5$)XTbW;5Dl`3y znbhjPiR4vuO2rR9anL)7n~h$I_(AKhBX!;>Qilyr&>ueg$ejyT85j)bOu7Gt-(LI@ z{nvf};`{E)m-}sdYjpP|R=M_09^3yv(q}4u_G`>L)4!;w=uh3C)RAB5qo~eacbeDy zaCg4d!KY|`t;dcLeGC~7K5VduKm6#h$Mx1iIAePaLEVv(Y6Q-{NiUujppX@fLMX0tAGNwi$!2hk0i{Tcc(?@a$RXQ|Ua ze!UnU^9MUQOZIzt`8v6JJv4cWj4xHC-uS`Fcup$JkF7{u9frNu-r;@6zkVp}{Y<-m zw?kj?T337h;gsV30~ezknCLk?@wuJ8Kzsi!RLXQplW~_= zm!@7f?wzFbyv5sdrhg~TsOaxZ^ySD8?4j6hd&(KzvolKpL;AE zv(<+CT==JZ;I+?RoupXM$*;G#_yaK0uOacewRaLR{plwaKhG=MU8aA39;wrx?-j|9 z`|vKW&gz#6Hoq!JJ{WHw<{ub8-{SNSI>|RaY%tD?{4ICbdk%W-#UE+8MaTT+{6|)Y z$G-m^NQ%X-f7n9LzctjaC{{La$0I@P<`!zx52*FbFVlY@&#cvdD36x=XI_7X@D3Ge zN%Iqe@Po_~G=DmNzQy4Oo#eygvB5Ym@`uJP7rpY*YIlZhR^IP5AN=kQ;IZS+Mp7(n zXX(-U|C_1$6~!7ye;vQ}_>=y0oJ{}0oU5Wgd5dHu(?jd%&pU~8pYG<_-T77rpQ8CY zx`a^T; zPaZgJ+Yok}yz}+jURdh?9^vJ@Cs^r2$Cd2rn)#udcap=rKzK9#7v&ih{k2Z2vv`q} zVM9EIng<{61K1#af%tnN`Gz`P#g6)@;|=ciZQpeFn>%6S4}9TgH$QkWJa+zVI4KtV z=_I}Ww$tq=4oWoB$H}ASn+1v6{<|0^iw_N3ZoU2NA8h?Jd|dw!BmJ=5g3?Vn|JZJ> zDIV*e?Y1L+cr*Q%=AMfFPV)F26ZsYWRR`jr*Lt(jt1ds4@tB|WO`Y<2aDx7@x7+QE z!**$eJr;hlbNf51_kSKviluV?vtRvSLv1I>+N<5omrVbWJW|o0xF#>w%{)Q#4@*9FjO2SKd4|ku zIsIYQ?e9M7`x`F~vz~nMvRekr`wwvZgHDQd$Di~eU6pJ6wf}$AmR#o9nzU!(sOYcr zYexB%{g3PBgV%IZp4cW2tbISHUdQp^a{3u3tiSKuzCQ1Rdxi}b-}2h~?zsd$_WRaH zBPo{jDZT##<0(qCT*vc6H}fl>cTz(?nf?W1MSsofdH-SU2R<+!$NH$tr@u~je^#gc zz&lZY7<_i8$#-Pmzg*yMK5NQ!`(2Bje`w42jFaA?*B>&z_AgoGI_?ME%wIFVO#gzp zR{srT4AYhVI^y?EkT@9otq<*=JdpjOe(|pRA8Ad#|H>D)TkSRQ+x3ToNwM&KlHYNc{0sPb z#_uFvw>G|BzrSk>L#BV4KdRH8?Z9*@va@^;9Y!*eY-!#oNIpnkp!xg9&$l@Ipp$%f zJT@5TMgB1J!~++6=V!--EstCEvELwW`wit~o|*ogyu$QXKJRjGwmtW_ z{}ntjdY4y^Id257VkJK<((QFc-1>JK2jRn3q^=Ibyf3YN^6J_<8>5FVnw2kC^^W^O)bD*yI}@aZvM?`}_AsH$UTmmGR>K zutEC42Gt*S_{{@1{^_xAg`HYcUVL!V3%thd`^WyISWJ&~iXUKn(q0d3jYs2%cg*t{ z@Bdv?*AP3?e;|KkYn8C&8nb^sd(T6?*4gn7yo%|@`Ojan-~Xmbu^4Kdc*p3a{>u07>ZhJQGyMm1 zj#~YT=$QRTBhSwA!9KM$uX()K@PnqG=9^!6uw#ttywmX*7aOec!yAqmc=*RhO&<;) z+~Ta_{?D4v^|$B)rp3=M&gm@o_CK|z&uG4x{zG|YMSqujGyk9<{hRK}g`V~`PoHY>+JZalN5`6zci(|{`Od#Z>n*0bYipiYS&(W zsJhc%rvGqWTc6RwN66agoNxWkB@WWS!VULraIB)*#N!V+( z|9JVZIZNTUvTPwA^e*>ilHaq=Nu7^T`>6AW*5X?SZu!b7VeeTW z7Cz3yf!0Sy;`X{=oQC*Wdnb?V`xmwGvmb*u(|;tdsOXQLZ_T5np43|(t_WB3^N}B$ z?WOX*k|xDMoc^$_-gW0Ks~^5Fv-zK2U-8C2gstBE?7-YFp2GW?o&QgjU(v4rcj{LZ zt6Y2a({EFCGk=;C3vv3%^dHRw75!b2o>abvZj1k4*1sb>^ug;BJ#}Y zAC1TNamJ_PU^Bn?e3AJhKK#AV=0!c@#o%a>DKe^KMOEu2;isH?-W3`(u%4?|I5v0#d|DvK!fBwgfe6~+R*Ue{n+K1OT zeSzeG#;0}Wk9lG~Y_Lau`Tv7E|KfFzZnT}(Iy?R!Op1m5qu=(QfBz`irS9H|r}F); z`k6oT&GauK75(9f^=Jq`^*+jL{!rrR2imwX-_ZE^l{y{AgUVy}GWuIK!pApEgy;}@fjd0$VHVzKv?h#$Ux6~o1^dDHEWuD^2l!ZS}?^6tAo4xerR{Y4w) z`yt2wZS{jrZS7O6@^xVQpD2&{qf4fLn%7qJSKg*3l5g96Bd;o-f8wBbd321v$`_9x z=f`nu?{cmLu-#W)W9R^PP$KI2ER{s8l@88p6nDYGA8WX(#r%tTWCptJ8 zC)2;3KdRM#y5yNo)JI4h^e(DX+xqGJ^LWhTsRtP!8?5tdJbdo$?>uko?=K9CUO(%< zu3oA9{Ch-FET%)h+Pwc_gV8vWU2Y$C{fUDoLiK0*ck+mhtA5i-$1^<|d3Kf$y27;j z=Kf-n4;r8L9s99kjO#q(DUaFekLzPQeDnOt5ACqqI^pIe&mVKhH_H1T9-0(O<^D6f z7v~@Pm-+Zd`!N0Kv#tD*o$247N9y$FJ~qt5bZ;o16C@7$$RF!S9%#JUpZV!m`QSCa z<@ASHv;Ow6Z~v(o8h0&x{TYYNgNM(%_WsjJip6vNXR5ZMSmpL^w)Tr|@W=DR&h#J1 zYfb-XzFbFSdbDLE`otfjcan!6+prPE;RQ8r{X5CG`o754IOEk}@b@h_yYIn6q`G_^ z+xgFa+?I`_S#3b^jcSY|2MAVY$wHne$>&2L^B=x zU5>=ZPK>AY_7 z1Eh_ZpP}k6+2z_hdCc!d;kUYu#OWu~e`)Ti)4w^lJKy+pd~EnY@1i=j@v|Sb{2D&X zkLxFBhyBmG?fe@D?hEf&c*!RBo%I{~=ld`AL!kB7pA<_)r-t}hdnb?V_jhVD4hV0i z|43d@;Z;3&9?7rU{)vO$<*eoQ$haVRpm%w8xm}x|=fxFa+s@(f%jpk${A`U=UY&V( z*yDtu&)+^8sJHW(fb` zyygD=X``DT-9hV@=OISMuk%OoFzx)-s_$9lys-I^KV7uqkst6Hx8u)FQY<#Vn*EPt zmD@{f+xa&q5!?UJXG8q0NqZL$>hzbaavbB)di06)!Uo9$@%KXV4V`pUvDM#;FKl$f zy%(H1d6VDT0$)N##TMt^Jl^G>{F>A<&I8zkGjcWCaq^kdh* z4JF0G>)G$A_dkSRQL?IQBCM+`bo{aJttzJOAHFiiQ18TKxO(i^J#!N>;YL zr#uq$PLSh3>pK-c`OGuZzmZ2O`s+M6jwPRZLnpk%K_B_859K8fWL}{8d+5nHhCTAb z9@j0rd-#QR*z1gW?|)$FH>tDx{|qF>!glAE@&2n)HD>>#$Bj1cw(>eb=9lT8=AJtJ z`5uUT)4ixF$0M)#jhwAK?y)*_Q8a)5`1ux(>b#SDn-?}nJ~pWSu=Vv{`^e(CcZ98f z@NoM>Ke^m%oa0Zndqci|Nt0r+`A6eOwA6#`*7~HrEt$+O)4%Z4>Oat(Z+wgoYF_gL z-T90Y*O|XDe!j)w2c6^_A2t~0MgFkmXZ{~^=N@R;SpWakO>#ZMjJX)&G0Yfau#I61 zW2|R;wtHi{CYNcGYl_?wbtkJ z`s{O_HLE>y`hNSh{#ft*xxJsy^WJN(z4qE`uf5H33;UJ}Gw#}>|9#U=^BT9`zmTt( zZmH)Vx|3o-C$TcUYdq4p6SVW6YFocei8H@U|5omS*Sp*r@;t`>zmpbW+wZ{(ALyOr znT;-r_(9vQv_IhE~}pQz7tV)6dFf%T_xnO%8JpPc`+K2yn!_ph1$-Fc)^ z|E;C2q*E$>_=$twN!;vcUid-!iuthNGlWma!4F>*h9hPif8x$Z-w+O~?>qhVht8s| z^7v;b{ryEWPExTLpXz4)H>CiZ{xbc0@`&lrycDG>i!YK#yxu@~Iw9kNo$`s3Z|JlS z`ovbGt_s6;GalP;!`H7BmLacmzW*AK2=8ZpBh|mDe$c6{eTbFusa-n%TpOOx>nYQJ zQ64eB_Vx`=*mm;`^WQy&c`M#|>BUb!gx{`z zF1E%YdTbV5|DfBqu$R}yx0S@O+1@#>V*ARr|HXM`rT!ZV5BbKEir+gy;$ZBzKD2-G zK=O@Wd94l`R358SKYUdf=DhRV?D~_Z!t8}_x%}LlFQcCGo!DSn)9VkrlVUNQYHDjA zVzr;|((f;v=BI-6m+9Y^dyLoRR%SE5R$;c!zolZAbkaP&_kkZYKIO$PZ0+qGKN#0} zCl%($R-~>9!#eMn{npEO><;d)dmOaoo9*k*4~=-W&vd+Af7sGEh(1pJkzHSN$^N+S zO#l8ov!uV1`qKT+YvSkq7ruDDUhDCb2jU0GCthhQZz*pn{f)8LK6vjvtIlXlIPKtL zzWU=!n1|i}yp`< zMc7^+rYGZQ-1<;Id7zK-%pdbac~;lK4`2AzCFw4`Pp9GGPoDC&XS#Q!-p+saB*kLq zAGXlzKb`s&#cHp;`oVgd6pPI}ww=T?{Ri_$CH*zd?+EDMbgxN0{KP@8^=6}^B7P7a z_)C7}gV*wx(hpxa>Yl@ED?f2tIObD_-L>Mw?eBj%{*3t2vx@tl_uIG{PogD`qnq`~ z{8cylX^h&fU@GZP{-6v*zU}u@@f*M9{ilEb9yWXUrv=`m(5%CldL7|e^fX7{gK*M-;zr7%=9lZ+T-c?@aXj~262b-V%;Q>eJ4~@@!^VT~@pA~kw{ZGgI;^y}C&s=}oE%UMc|0MnXv8H|< zU##-lYn<=RZQfBm^UL%vDk}BgLB=2-{uqga#A6#j^DD(w*OK2Q#bWhUw${ga@P&<@ z{n>ASd+VKH=9*jG*R%EsJnwe>Ltj!X@qILnPx&RQy*8aSURT|$&PgtL^pojd%OfTI zHNRe$I`Zk)Q1gg`KJqgzNFK;`53l(v^n{0T3_IjE);VIiA8q=>>l%~4c<%AneEACM z?EBxrq*%C~$@+5YR}`zgj{8A3^Eb7=Cdm9U{nOl2(qHFM@AUcC<<;%uBY3^by=}ks zp}gdQ-sRa@AKIt+HQ&2D+TPz9pDWyn2Rr14oxk*!aR)pT%KFjqVjJCvdnXz3{4)KE8JG0ee5uODS*-PGggU|ggbwYRguX_I_uYQ({OFn4hM*6cokr!iJ=biAI9~-1^Y*77S^5Y9Gsjc4?)*CbR zzBP}(lKq|Sf6}B_Y@VZv>;I4SPW_5vEn)lD_|o5hd$;bKtE4|Xoz8!f$Nh(&$k{SJ zm#>HHe_Uaj-{#jZ{#bYHW!54;wg*F=&zAIA z@@rh8?dQY(e~0nm0aK~Sw*Q{|QAvNz<30wahs(3u`g)gJ*?qFm|4(9jCuntyt7!g3 z!}IWi%d9`+==dH~f7tr(J@cElKNtKn?>ys2zj`}yJN`+NVzKRGw4Q%+>Q@x2y!IL| zZU5?LKg9ep{R;(3`jfAH*O^xZ!uKD4|D{QJ$FVIQ zzX6iX-lC9Az3h;6(65WSsbFrHUC(|>Uu zsnVZ3(<#lfv;4Gx_Ws<7&3QlaK^s^5!QTnvI{UtNI`gdY7{fdH+@2 z>5n{+{UT_7>RdtBe!fTOZ>%=$rfVO#(KlAR=E}=^-a6hZukC*Zb)0U+t)lbKI<7>^ zYvxylv%aR5A>kiJ0j85cW7@(t-L?o<77eQbyE zH!VE=eQ$h7W|z%ddF=iti*(-L#17a0=(xtGHs^oh@#F0)(|;hZMF;P4Ye-*H#fGP} z{k!3D%ZvQX6C@Axn$LdC{1rOGV|CPb$PYU_@tqGHdG6C;{xSZ2H|>5hJhuO@CB?!# z|L^~Qe7C`Tz)Jn&^AAyf-sRQp*N5rm3b%bP!#qLqK(>oGj_sAtu!GK)N1f(3*1dn) z_`g2-Tw~oQA7APBy|duADBkkg_*6IScz)Q8Q8GZK{@ckIrjrxf zgI!`12Z_hF^^*>-qo2w;C;7yUC-!%+!^D$zzkA{-PlkyvjDOXZ*& z=4m$Z4*6m37mt1WZWAVi9sWM|^W%Qf{{N9fl47yz%oeFVU*CV>Rg`FXOo}G-LVsxPyWFN9KH|R6eEsEJ?%ie@ ze0KZ;uj01F+wbnr(Knb>EX0@o^KY}3Q7HUHtxcGaJ`sMN3$`6Q}Vthui2y96Q_p zq&zEq9^Q!^BXN*;Y}@#Ygva8Mp5}Lw2VX~wc*ut@Y$^%U_G8merhoBFmGswn)N8^+ zzU_BX@yC3YhYl4;9taQHo%yME!^cAhtJ64jRT%d9(jQ;C=JEByg7@}J{Y-KjJa+wI zw~hnODDHoLQap}imDk2sQ=Yj0miXx})4%YP^mmenpM16p!zi!*Ua2D=q%Sb`W5-Co zA>+mKQh!_@+hOl7@A}lZU(N{o?loz@t9D4?<$ObF`|8rqUyQ4m=t8~O8ZTY{mP!V? z(O0H_@l2z$*LuSu+CILg-Vhx$?>}DuSFiZ*Z}e_CuCdx7AN$LGJO2v(?DJPsby6(- z|26S0%~QR^8uEU!w#K78#sf3`d-BXG{Y&?MWBgWL)qbDg`2@YoqeGrISD5yGSK7~{ z!{f#i_aF0MgB|k2Zo6H6|8etf4tpVDW*y4iM z54bnCah}N2>JqE{_h` zj+bAhx6Eezb1b8D#vld^D6Vx z;d$_p@14d$?JxFISA~r+eJ`E<;=!NE{{3a+%~wsIqTY`GyLB9Ja`6Wki{g1pR(l=y zS)Z*u^jVASRP0RuzWh;@{*sl&d*u=PzpDKY*FnVdxBj%BI9_GPb>3+o%nMtwiU)rf zbHS>=UbxYgVa$i0U1iN(u7lsU{{iI(i@$%_VR5WKb&}m)+4?7J0`syRTwPl#U{|)m` zop2xZcKolp*gVJFn!f;kN*?_Z+lMa%8@!}`()H-!It=$56_!n zlvm$*c)ry|_VE>d(4H^qbv)wBieb{}KVI+K56uc|Upnf}7k~Q?uY7j@jaE`D zyuYQd6rX>a*oIn1e^Qlg$J%Fo3+18jn%XTv`p)z(GD_ROlRU0tA>a0k%ENfXL9g{@ z(}yB{(4J3twH|)>4B;aW?BEaE{cOE+yS{aC*#4^J>T_Bjf#1$Q4?XU?aPntd;Cga{Ta8n{Aad=&(44LCdFdMdo%U-7peL|^l{>6 z?bWvZKY2~bU^}C)O#fOQDd`W-45=gEbbwcpIOw(B>`wVb7g-s+(5D^$^d`k(`p?z#pY*3FS?zvf+phm)KGa8crhl5(R_Y)B{<)?m43d0T$pYvjEv#*C;H@WJBi8r_Z|7*XbSUBD@ zq_0%tAbl!IG}F<^Bm4ce+P2@OPI)u^i$|6E>qMB3@w8+d^dSy<9oKBeRm2Y(Zxn|g zK129)oTc@Loqu`J^*27(A9mU7(J3Q~h8~ZS6y>OeeKDZo%exW;>&wO#kk@wxqukMxICV zOZsabanQ!qxY^ntc_4j}XMWB1@PH%qhxs4*$u4Vr{Eo2OFTb|jO}}g3|MvY!vDkJ$ zsCH8^9gbpUx;Y*RVxt@LiRXu%>EDxAmh{*8@jXsSk7)kHLHaP8d`0}AZ68tG`geJk z?fF*vU6E$G_{a{Q+k5jV+plnCxa8ww*ZBC)?R#C$KilW79_86k&wtj`2DMM|w%7E9 zZrJhs*p4&(7vd9AbWU(%#l&?o(T@df>K`zP8uGaL+p1-d~$M zz8x(9Qf#d252qFVlZ?b$|~fZZ>*#$hY@B z#-R=y9HBp4^zolBeB1T|;qoc-Uj5Y5J@jeE{{u;}*yrEPi}Rni@_I02d_}2h?;p{v zq<>2?(IwNrFZY!6S6=SpM?Tw&q06h=pN}oi6>j?+Y<(y%d7#a!Sy88a9#kH)m(t&G zlV1Mlci!{mhP!_I*I!K@r!U+83?{{5@59=U&9^4;sDIfE(dgjk~;H^?eJ{Z@T zf6~f7oW{g?;asw{h6^#DkW9@{{e)zlSM*PlRKNFTi#*eY!Qy_^H^e zy^}|EMVog`{AO!B(|<6J7{B^0uP2YT_b=vWd{=}M-mmIMij96?TxWjm3p+*+e>@)k z7_0KsAEy2Kl@I*k@+-siM~|Jk?ejC}%g(>Sr}^oI`uq20QY>EMVr9B%UTpKbE%FUp zlF2+9qxL8qCHkCnVoc`zqP$@5C1-o$&Qb53YSQtaZ=lFI@eg zN8qvhulC4{*`B;BZh-zb_Sc3wo@BSz#uxwoOY@0y9GdB0JcA|u^?Btuig}gx|H?}o z^pW4jRbKMInAiN851-)({o&L#Zd$)_%d~LL^WT}X*1op=+VyX>q*&-ztoD9B_FFva zr|yUIRLw8b-{qO6KRjR&9kTzSKSgxvg!Bz|$|p{~q4ra&JRCdJ}4BR#6v|D0yqJ@eOikvwEYU#0)Qpspp!^p)vfL`;7taSx_qlOI2iaUSKIqi{QZ)T9i#Q{ zB+rBJVS^p~Vf&X}+xLhQ2EtAcet%MHn|FguzTe$bxRh|l_W(g(aF^*7c&=5MRd#zoF)#BS<{9@$)*ic$yRo z{hD9-to zufHs;Gkeh$58Zh)^;N&WNRKFvKVL=%Fwuqj*e?D5jy`W}_w<$N-;>wE8_%gclSIunR%FFdXTz3w_ zLmrq!3x4{lr;jKs^0STd8y=C%6`J8 ze?9VpA09tB(FK0n|94Bk^q8dIKc?yj=|@qbnT~3gzJHACYEqHu-9SXbS!tLB&Kzc{C=(x3C-rbkl-=Ka(72gG0hmLU0{JPhp=iJv! zgL=?OobQSM*T4UTKOPT%j8%E+Z|ncm0bg_vp69jR^zToKrF8${rur4d%5T0$9HBp4v;3Vi-tyF$;o1dje(r^{o`%nk zf6}B_=u@mA+nbJyKSuAwW7i+jXH)#VZ!*8es6C57mHwOuC!hVIp^ir$-{X-FG9S?V zHR%gGM)D0Ak9lE(9sFU<{%iC7ewcXfthEo>X&-p)^VgvC!+bo~|D@{Ie#L4(FJ|lb zoVSPH^lC{a^UU-wDysC4e@|iCz4ie=e4uy3#~}|S547#SSy88a9@Kf7-NDZ|;h~-0 z`+*&g+cG@X_p!&0|8+{e9sev!iiOWT))(U`N>+Q#yu1@1x|zQze)#Ao)4!-F>96B( z9~<)Rc}$hBsBV8gw!Fx1eJC$^pgpfq-0GCiN4(U7Rr&bC?7vJo| z$^d@5{$wyI7LMo8i~WY7_DQ{=_E*>I_RU{YUh34&^e-Nj^oM7Z)Y<&h??L6`zPI&X zOFnU{V_Ze^_ewr?jMl%CJl_c!C(cuU*zx4CKe*&!7n-wwv;B-&bK$rDzw6dx@ z|BsnI>X)o+|5M|U)^!P%&cCUj@xV;~G_Npzms{=qT76xy(S`90m6tf^T~rs1PabI7 z8F^YypYR#Rc}tleKDgf>o_pgfCx;I%-l+Fuo6msH&i|)LvG6)iZ_IA~F#4nc>#wQt zXngTzx~ZT22Da5xr#zYd#iNq`QJ(4Kbo>~VuRc%W0ieSGX5NpVG8{hvV)%=zqeP;R>o+|zM zA0P7Vc~leKZGN?4TFKJh|4OolZ2zOhV|}J-w*=`c)4x0SRO&x|c)sbBiXVRXK<^}OHaaTe z2gzst=BG~iEN_JVuln$m`7wV*;vnP2HvYH|Z1^kd;`zijeZ_vY!%1&B{m#*!yE2?~ z`p4dO%KP8WczkYTdu#Wj$3Ly4SjY#(+g{_h>;Ij^jISj&^UL(_%{`U+>-z}###0j> z?*xg1^g-P0s^<&7)^I+nbK>`4mChafVVnP{&mNP$J?t><(Tksc@FUE}wtxCiOy%

    (OAwth{TJt+O8qm4*d{kwwB zUf=NeW%P&Ua;G~8AoX_qS<`V8)8h6U7hb^g1WHz0Z?m=E((kWR zozQ*SY_fkJE zVeGtrT+?;*AHvuVoqpcvNni0=&-*Rk`-0Yo_EplK{vs@W|CMT=$^&nve}5jS)W6gA z?}QgV&_{mLOZy}b{I}`9gWi9B=QEyqc-M;;(U)!i-7+83V=cY@Ty=}}m+bO(VfWwG z`7(~7>XYd|kaO60>L-u$c&1a-2YJLnuRLZ)ec%U~SImbEpCNoY&eHnB2Ob{%)xG;( z3nxsNf6Pj6jE3LzZzjcJ$2t7|H=4I(Wx8n}v5juT{1wI%(XFxJ-)Ea^l) zIzDmGYrWZ;M?d&Mn=g4-qr>OJeyqf7m9E|en_iDYyOLdxW^II(WEh!5zk0Dj$!c%^ai84R z%R9;BdB$d*nf^s*rR|@2)TEw#<59njn@V0i%C|cB6!CXLOUxT2AgK4+q%(qnQj_yynII*#H01KY5DGFVnx4WJ{I)+{b`?>!X!dWpyrJ zw~sR|&lPSvF7;|#pWRVh=SAFjD*7kSc$J^JDhxYp@wLg%ta@MA;UnwxJ^Sa^;IaMB zp!CJ|Wb>szFzQRH+IlSg`RBdTytYz*mq*+B+Uu_-e(wb1`Nn?c36}EV(|HjmA3noq z{!8m`toi6!eHW~_USr}_&m8vMyV}41eP2>6cK)G9ZMpqb>H8P!E0sLf`0GwQ*rpqO z>wL5AznN!N>OWI>OphWv%WsJve&V2a5;vQ174d_%okel@;WLC!$5~o`nDv{3Zn^m2 z%fc3)ynN=4yI)D2U4J;36iaFQuc;r@@x|L-)31I1rFm8J%k*#M)s_0k_aAe4c9!q* zbsNzU^e&GMdEL0eY}aXDaX$T*{Bix#+RR_`!v!}SK55(wH-u}C-Dbv?AF6rvR~>)a z`JXf?7JJ{T(jPuOZ|~KnF8*Ad>EE3bm-N><&SRJ!wLCk^2VG&>e%|sTKe~eCf${d? zwVr&l|3!ae)M-;5^q*d?F?x^XfAr?IUGR6@|1+*)Qn7fAqZ|CC`89Q@x2yr$pM{fC*C_Q$+4{rhs3D*fSWZx1pi+dFai=o=d(55(UI z$v1R5eud7R_`>9wo1VA9*k)Mozc1eK;M4Z=%4ffS?@@kmVzK?--D&%fs!T`itF-;6 z%1hr(u`~Vq^G7B9;ZbB>rR_h;tN%oJ$p`5RG=J}s^DVCY-btR}2>oH2l7Zsf0uhR zf2#o7ztC?9HLt$Q@O;LJ^YKe|`*Q+*Fs}1XD$I|qNL>|%t?oGT*zr5`hpqqk^S}Ij z-*NER^@jt>4{oyL^FP#iC;6=L*C&eyo9D^wro=P-2lGc&`qwp&`INSQCp^SKANg(E zu8Ms1SJo*XapPM`KYfMwFMf0E)wd0XW8U-HF?T;d3m!ZEuO-F8Jkvh4-Eb7Ey@tm- z@u8dfE9Tc2wRcfarN7k=2(NcbM&|b)zyF_c-jUNTx#qR7@k!r4=l1D!=4bbRi}mj= z&i{X%{f1$rgQ${i$LhD=zo2_68N3eAE7QNoDCw_t^{G-vzK!3le40lb^e(E?JmyOt zXuQpeI_2{qeAr+If0(=e126yVsfWVcmnUrc#lPK3KeqqrNs7hJ!v#J5gjZ3r%4_3O z-LR|Xm+9|vmXiL3H}m)B(f0n?yWDC&&)(%$JFkAJ*ysv+Cw{}4+ATryVzhkb?^`81_A)4zCB(qG4kujlvbH&j0K0*RZ=xK;VJq*yraDAhTQgYaR46@K`_l!HI; zh3>^44Aa(s-?tvW`BkrV_W27Q#q>Ts{)bmFi5AmY>(I^oH7SVam+9ZiBUSqIx}$&Z z^6d70&b!>o>{bB@=l#eBtq<)Vzf`r~U+{z8Nru%|+3KgR3d8gj4*T3Lue=sEdgq#} z{PnP%z1Fe+XC8*G*#Ar;4@`8SFSDIIDjolze^cW0m+9Y~d#d#3dmQq4{tVr4KFf>z z^a+v&#;;HF>+|K6&v1nP#`+(X*O|NAG!V&_=~)Sg*fe}0(t zr}{}W(@py@+sPwzBX0Hdtvs3jJ$Xfy{=(MYzKy3P5%^6n^<#tNf%rQi`IYFF+(#8} zSo6Y5f8BUs_ps)l_Fp)9<@>zW^ZN_i?pw+`ruYDGti2zn>PN>|f3-Uv|I~yh(|=L^ zsHDI0&=2z|_2GnvJkEQQ561f8m+F7A|Dg|%@nT#3@K#}%_|U7p>$)$8Nn6dmW!LY0 z#%mqNo3?#5lVVA)D7OD2n4h86F)zu^bX33H|AanM$zZ=hf0_Qhd8AVReyJm$@k*$9 z^=}N%uc|YDYsvW*r+?5%zKxF!#(9xHEcp8G9$a_gAz{H~H|}-q*>kmdytVrJf326QA^KMx|I{Rt`DOYq&OJ7slQ{FM&lj6~ocotV4 zw!e~J`K+G&rS!uWHo4%@segI68D`%7_diTu?hyL4&%f{~rpM|1KN(LkiI%u8bo0t% z@un2gPo{rg?m-Xla%;%*82z5KwExk5-~+wOS=##*8^1$7{KljC-sNoVzKqWmZhPH# z@`pJOzWJV~x*reoZr|zIRj+x`YrP%+GoE6)j$VJB>Nud*i8t$uI`x;1Kh+=4FVnw2 z=cv>_&7(tfObgS#m-J3-^a5kQ^{0K32UfkFtWL*)cclI>WBADNA zS_vMz|3FVtEO!29<~I2o@H`vWNjzT1@p>v5@W=BbKhu98&#clve!h$^dcMi~U%&sb z%eyOYU)F~!!nXbMd{$wYwZ$PnxbWMpFzcE1fBol6-tby)_g`u%A2?dS|Ej4U>{44% ztgKHbk2r6GZP=7J{bl+O=AKIZmA|fN^Nwuz;_KD%$94ESVO(ePV0@Q%-=23TzA*c5 zKe&DU)j=|k(&w+H z`fI~ctn%7B@zpmIG#%if?^NQM{)NZ%C+@Uf^GpwTx+RXk6Ivh2-zlGXT<4vPYkq7+ z>Z&ko^|PD5a@;vb3rl;yX5as|B;vtQlkz_RUYqUJ&}p8|N30=ylZMB6omt;CiD&w| z{E_KjSnbzyeQX{b^8TxJF<(4i@A9f4b=oK6m-5ph-OeLu*f|`@_FK^kb(rHcXCfJ@T^&@Z0uZONxbg+WlvuaU`p~h6k+Al^8aB zW;Z3CZU1Ths8WCZ9k=Ne_2*q)-QLbE&lT><>n6wpO@HmnYrXZ04R$5P;<3RF{>HlR zz4E|)UYgNZZ=Xl^`0EHB+ zUoqwX@4Ay>De0toO0>N{sB69TnMwva(PyTAE02_TUG81N|9=(FRyn`krTIS>gdem% zG|&7!OU}2r=6ff3h9mTcjs9@LF>BqvQrP6D)BbSvN(aMd=igdMvDo|XYQ_Dp9z!=! zvN9cNJkmNR$aM=gZ}n5p{4)K!^NLFSHx)keski=A2jZZ2Q5||!SiHtd`=5&WW%~EzR8{(;cS#TC4=O+B zJ;(>W%SXw^arrt!ts`EA-sRcteD88=2#+gFEn%JlcMLwef4>_fD}_uzr#t z^9AV(G=I9}e2ddB=p-K=j}69okw47cWB$CeAKEdbUFk!+KHWwAvYvlPbfJ&fF5UkY zy;_pV{4)J}b5BWs=FuFUU#Y+5Rc`K_(3Q6#)l2Yd67TN+i%J?hph6|Fz-kAp7#3mgT2P>^B4JwX`?v)bf#aL z6pMF?I-XeV{h}TJkOwv;llf)(_vM}{{p(SFY5Ujl;RC(PS%>VmG!Na#1C7@eW#!Ke z>h~ag*kFa9al#32A3t~WsUe*4$K`%_%sS=w-&#^E)=!sSe+93iWVP4K%RBL*oB83@ zIQ?Y$_vaNQ{dJsrnn#D|*c982f1KFxfj;tMTR%NXu~>d8HuE;W_Tinz!4dky*pHoi z=tuXQ6vi#ycBKoCIssnW|I~CGaI{{3%Xo^C)n41YG+s|7=AB}ysJzTC(|;h3RO#;* zOC9;>W2obiS3hHTzSWVZX#RA``4*4rypw#JFE&U%HmLrv>*?R#{#SQQ3%mW#JAN_m zviq2a{r;$#6pQWuf1jUzn{-3G|BR$4(du)<7P~*Y(tr0YF#QjZdD?Y{g({gYT>sOpaYeDRc*-MYny{GV0>Cb)SmwzJA|fF@1wc zwBQHP4V!V|`DOaMoT^HHwu6!$QJ>`17w2s4_v@-U^S6q0dp+O>t$)UIl5c)&FwTqo zVb>48xyfdC*21o1t~qArk8blCxBIWANwKhf+5aC!e#y#ob378Hugd!mNBw2`*Ye6r z{dc|ssR;PNDu0K>iaWK%AIGcgxXvrT`LQd;jd{ZcgW;q(pL%PU@xtVv+;hnt@YwZN&7@e^PuTtUQuTvQ z?09W_-2VjKHJ>=IuT1}9#FGA+UsoRHVSTk^WUqO|!Pt*&P|8d`1 z=1f10OICU9o%py93G=o2MSW-b_v96(zss%m^XU5u(~eJ!4<4{z^6D24&qs&2&iu_K z=Ud!K-0HEbkbWxS;XN0;{;tQSH^Y9{-|^(%)As-GEYNYfBV1?6_n(xkOeY=BY$uQE zH9@a7d4?L#^k0-`n*Pd19>1e8J<>co%TEii=Y=@v-S9YkAbB9$8{3)r=|}nCg~x1- zQ&)u{T=kaQe({Ty5?!db_uoMY*yrG;+Vr8Q0*-)S|8fK`O_umTRfV#cam>>*dY1Xp!&l)Z+qy|&uw#cSZCC7zdPur z<>29SyXnxK6pLS5c0e}YRQ=I7;>~n(^2q*wfxJ}wJP*t-(|>Uuv2mT|QP(Xt`D~xQ zrFl+K^SF;7`5^NIV?TC`#*=t??ad_?ehk>M6 z?EQD6{OsHA&)K}25@(H`W;=OQI{w!>#)B`@zb~(-(x3m)W*&@Zs5%e_y~|nK$3Zr3 zRenwK>C1RD-#d+iBlI`MfB(skJaD6LtofF~?QeO@OnB|{SC5VZZl&ixo%$8UDzCl9 zOZQ(&B^TY8U#5S59x3VXB)>jOko?l?Q1gg`KJs_eKTV2-JmVRm&fX~>s61irXC7Gp zOWkSM`i%{qyKL|2jA!TH2DJ`sMm)+V-tyY`w$wTu*K~{QO#gvAv!uV~aUGrM;qvS( z-{tEz5(mA@qeHfTSD1FZ!1ZNuKK+;c)OSMWSCJRi`C7XAbK9>HHhlPx3m-YC+iRX3 z|5LAM-#Pxorj?~{+{rBh39=+LljnOY1^ZK!icYw#rnoc#bJL<3XHhyHo2YRQxnrD9Mh=U{ahjV^(`0Z0yZH03`@rAk1 ze6W50t3!4CG{V*O_=j;7C9A!T=ZS9Sr#|XC)4%Xk>CgY*njTFVvvmBkNc{C~LGnTC zqZn<-eyO(P{Hi+dB;WY3LFM<@p!z!={|qL@V%z@|{rz3H`ayV8@w4_$9(6qaNhL4S zzo@9vpYJhEkEYa_{#{}d2YtahL}#_}Qy=G}tNE4R>fl*ce=RANu-X0>G>`oHK$!iB zZ60~oelIefUH{gUJoXp%|BsQMI`1T(HMaf#uf{Du>O0fF=&Ynab&G`A^hm`ny`O7d z{pjKOR>!!Cu^&6es=m$dv>%XhutD{Q&CZoe7OH2+NhB2v;{^Q5YMoWphz=cAi4t1zyE z-~2ir{wf~)+5Km}c;~0@TpXtS_YZFxeE0--_}s|r(vbf@YRbH9{)`*zq&A0dC%dQw-{amly$-J15J;}Hj`_ilI|e)2%$O(oy_%I}@{4VTdmUzq;WV_$#a zv@^qIJMMbRJ%8Gse(d_sRL221|Ic`elGR?r1EL#t)%-I3TRBUm{_*Fpnv7|DPHg4} z`p9p6X#eDawtYl#t5ZIBN9u2^J8!jh4?O(F#(Mo5JpbLR+W$XWb^n=Ezkf`VVxdp5 z+WQ5wwSPXhTA!(8nB5dR)4w}^RMOu`9G;R+sl>e#Bo4-Y8#mI|@@vAw>)!mD51-*O z`r!-nZd~unH(hW?*zTgE4*KuAUWU*1KTYieDI*~jXS~mY_&Dc^Tz&* zelq=g^30{yAHE3T1HH>xhaCSbWj=ix4|T4fOFUTNr>+XaK~qot(u!Ydg~PwN@E2X5 zxEek?{{ye$(%%0;x@*54*lu}#7$=@zrvIY6wo?Dm!pwY(FY1#x$arRV%1>n+wh!uU zJg0FGK5VdqKMX9`e`^2yR-sPy7E7^5{RpO#j7sZApK4<_k0PviIGZ_`Mq*w>-wt zIDLWSfyS$K=8t(|K5Rwms<5&8!mq!6)=L*Q)|hZm*9lvVryo22KbRDYeg18#t^8h5 zyzMo9#RX6D27Z=8n)t&i@cSV}t8#LwD0d1Tjrs?9hcyqW&} zc}0m=>$r}QeD*JfnnxV;k)Lrv@<8L$`Ix^eDHiw)N9Yez`e%Q7_m#H{>-T)<(iPTh z|NmO67$#xA!*;>`Iv!WD+iT`U9=e%7m0Y$v=9lR|kVh)@*ZX*oPrae)KpgZgXI+Nl zAJzT;>i82K8k2W?@#lX#`t-&+Z|-;Y7T30~f8+Xx-lFaHeJG|z-+$?RC9Aw<9PWdJ zZQEB%YS1aue=zq{=`Yz?Je5bbJ+#zzg7A@t4O$-+&lh~n&@~A#WkhpEHS`Tky)O(9W(_i(W zj^~kl>!T(7_+#{LcpMvE#-q;qi=H3LSH2QY9+Z8BT<+br`tvpV#rii|&*qQ!?2R*zKKN$a>l<^HUpFTnQfX8edpFGf>SMAgM z^sD2P_?FQhHoWezI|nE%1QoX7g@J0oxh8nf@-1 zRO-LF)R9lUVNsnOCuv^&Pm=G|Z*}OXX#PRT$BxnZS3Y=0>JPmy-gw_J`}Tw@54!lQ z=G$L^&wl^aN{WT|A-n%P`YB3Q`}O0Uc)&XOUFuJV$C+oQe=U!c^w&D><4V5iUR1Tm zCk}d-vo6E?FMTQ+4}MqB+5UCJ`hQ^m7oOVaXHSHqKJ>uOoBZ-McjTF1OI{hK*ONq?Om*AbC# zdN6-Q;vjv*w$*p%qYim}q=jkkhow5F@lHrz6?x$cKX`G@<>&T=&o6$@oDKfGrPsX5 z@4q(t?F!i)Kp%)-_esUV{Ll@4+rH>86@pCvR{p3`|9|)S|C;n=`qQryzXwO?4<~JX zK)&AzAHV(c6VJKY_Gh;L@6&O#56^itY#mp!+G}(NOXok+;k?W*)4w~9l=OF!&+iDx zkGBJ_dBnlkPhTK;pdJ6Tl47Bre6w8QU_x+84(?6Z%)H28+;b$B#>Q|zky&r7tuhQ~Yc+4bjYGhgP% zyfgh5#tf#vDopa z)AwJ-Rg|pq+G~92@4x!q-khaU{~hw^koQSfn6}^aYNHnz`>{>Wo}^eTA3gPXFu#*L zzqANXln-Cn>0Q?xJ?4yN*!>G*$4))F{r@8e>Nu?kr|bRy;Z>BZ_S)v<#8+3{yxJCz z=9g{%i}Q*q{dwKczwyuy{AtdTeGb#_J;?{-I{cl`yPP%aFCNbo?vC3*%pc}Fyj9=* ze+^-qPp!~%$xeH~Yv=z{ED-ROG$s_ zt#6b^hiv~fvF&}>i47lU<7wP%Nn(yHON9b=%_`Z9enq#+uK3esA`h%Z1=(XN#^isqR z+VdI3;fK!{-!w;L;r0hND>RwRiH^ z_~s3_OZwL&p6Nf3KdRJ!H_0=dQn8KC4Ub!1Z2yKk6-C`TkXZ*lPd9w`~2)7sA$$-n{#jN2c)E`RAUbSonR2 z-*Op`f3*Llzy5ze8LwN%b;GfA|NkhCKIjN~&5LdFdA==P({WQl^DB>!F@BzickqW@ zAO894pZaJTcKhZ#Kl`DpucjaS{b?&H7Pb#RzWDw1+O~hxJ|!#LZgo7fojgJ}ueQ~v z5@&vmQTr7P=wbO<$M@>w+kQuR$S1G{J2l8XP%k4esw@^#859@lv%aXI#AAa`mo|m8T-f7`?A6t3+?B%b7pYOd)r)E+t|M&M_-7n1ts?wj|G10&E*Il%i zcqd34jQz|LBoDOxTJ-#o51(O${+;r}HXk|Wfi9UgoC?MaHoj(@oSXSe#nn%c@MR;Htq zM|^LHZJ0`&`DOaIa!-~1^)-g)+jg3Y-#bD0z}R1=9Fj`$I zHu?5Es^2?_mbvAah_r0k*`=f|CwgV*~0b?FY^Pv%d>{)k6w!8f%Fse zVSAUeW#hVhJ!IalFqhgNri>rA!FP6>6{h}VovH8lc3zq5&+WSZLFH{m{5qa^+iUAn z=hyN3ubR|m`uF9JO8UbSeV<}FX+7hjBZ%L-oPCI1%1a(-&u3bs+xx^@shw zaqGI{X1*2<_{O#q4!G zejP7tb?T)Ld~BZ^!rV2FJMCLLoCUvKf2Dc&?f%;lzl>U5Q-^Mzx>Rz}k9lYM59X1Q z{!a4iUXXnFV}Jq?`Gb$Ljt?J8XUUZvXh>4J(9sxZ9uaR^^J^4gmgwEzECz6NREcWF?^X)30}QLJpc zbvzQp<~*eJ2@n0Lo#|hAs`O_+R;h>b5eMlbwi&1XZt2(JHSIrDbR$IV$5+&czLWmz zAI%@`8}7R4%oT51dm4G{f9MM=^{0JGcjdM5F&`&6`0e@7c&2}mfi7_#|D$JpG?lNV z=md#_KJsJR_^qT^EFa#k$gj3{;y0}D!&il2!y_-+^VvDQVdGO~9I^MFgB-ss>-m4_ z=AUf;8$|s^^W%LW)4!Hymh^X0&+mwtk5|8;>OdSM9^2@TA7p&v2yre}BRB zFTHx@mEpU;*x=AlTz3KdcKkVz6pP*eXKwNLJA3dvF}`GFx@jL~J9&g|UhQ~(%sbOR z%_}PPkFS4Y-VwqFdY7{f`TXSy)ApC8*Ef8c7xVb2epzk!!u(6ee0^}0!LVTL@wdIU z*30O}`Dgk9sWbi3q*$mEtNlFSuXEir^QviGOAy{n|7Pwf>92Ksk4(O82k^P!e3qy6 z^b;d_pz*eb^I4sa<3aeb!4Ce$Ml&vb@aV7fH8y(XF;uHc0J+0>-yWY7|uRV+Vj#uPw%u2?2zB__rHsj58Sr6 z|KaH#enrVDuhE;|jbfYrO{rminf~3mr=)+Ek!^*ceULl;H! z4-U`64>BLGe1`BXqd#o_(rf!3al$~@X`9_1`0{h9ulYx)v)A!K_`(l=dHg%? z*rgTj-RJeACZFBD{|4tD7A3`E$NxJ2xPQrNuPyGxSC8wkYZ9lQO#i;zv();-)5@!| zI+w58$NBJjmwVfO?{X{4?^0|DT0Xok@{_NA4=$shal*zgKYHT(FPsrJ-ECj@n}ff~ z^IiG**GK=qqkT%3?fKihw6FSf$uPgxnVm{J)4xA|RMH=wPRE~4c$gpPBfpKSyySu8 z^ExoU=388OjSm~_kRNv4``gdm^1TDX{NG)B{emlc;I-dhz@wPMrbknGT8i2a?>Cm`Bp%yd{p=6+);z|?50VeB`KePr%NwCTeB$cRH^`7N+ z?xi2@|6{thl49w2{|zU3ti9UYw;XmwRJ z&abRfKH|o=lzzquo9%tzw~~3!ge`9S&3R}4WD-1;$N%eRci+f90+0tL(So1;?D~g_ z?Sy_Z{fmrB{p&Ii`S2Td(6h46Rq)m-{F?8V6{Gow$zwKu=8^ZV6xRF7IcMK+;~ntX z?~ewOVzJM^Q}z1WZuNuk#^@xkuDW@(4QmodmrVa6qojYtYsa%%&wc`4_~Q2g{I-24 zFaAy#*D;>?i|*UUA)WZbiH}}7`GZ@Z7tXo+nvd4^>4V44zcrI$;e9gwy z$7|zTS0dQ(#dfBDp=e2e<^Q)`|H-_IZrXZUzuJ!nyOLrtdnx_lwZ^7n));j|qif>f zYmdMGDC%whKUmNvrTZV!myRo0 zE+fqrvo)_WKP}SRzprs>Q-=+X&>s$*(*K#>_4f+z9XI*3EthZq|Lnm!Uc|p`@$R-CTYaQSJ+J1i^DHcDsIR3oL^iS3A1mm?=e?9*GF_lc4 zPoa8}Wcs)ANJ)R5Hr!seTakJMRhz+75c~XG9K!b4_?MKn|MV&d|~@9t@X=ke{O{xZ#n+Vcg;V5db|D; z9>w&l`u+>AViGNJKj>!uR0_Nkq@PUxp4?NZe|-ICnrCPEX#xNH`~RBo(x3J3bUY9} zu)z-gu)g&&umRqxog} zFUo0bJSU`HF@7Jnw0i@AF^ZzWISL^V)+> zKjY|0^ig^KZEOAgFMTOWSLHR&Lw!q$d8b%Yq;It|{d@CBrT+2p4?Ljqp(E&B&N}3M zL-Xj5JkVZGO{w!vkveQ}g#Lzm_v|;PJo&-Kl<&@BhQ`54?)$ZcF$1SGvt~ z)c*J!#C+rVG4D+O#d&R&{=P4d4$(0!%=Z4rsof`7|FR(bp!K2iGJkW)`4-oFt9O!L zvX{~yHfpXs{kpRT!zRakW9I{3T(7kK4=6vliGKgCee(L3thV0PkM>*NNqL-NZMe<+ zGX4AVNTvSWQb)c$kE!^*Q`9`pBa#nV9{Gw=-F|+_2jh8_Z1{BE)Ky_v@3x~BJaF#^ z!?eRkA9US)^Wd}d4^8O^ol}ngb$;kX-#!}O__+Tz@iZwGcs@ZOpVzu8 z%=YJ~RBU@5qBy)h^3xYc9%y{bU+c+-&#)^g7PFn$@Ks?r_`;L#S@6i)!x4YldhSNo z+(5mZe^4I$_Wf_9KXG0^i7xoj&H7IzVF~&VFeFaE5JMTD~eTKdySXAe~IQtUbg)Y<}4-s zRR``PVS3c^>?|MDJmR2tc~*PB;$3bH(Jw8`^6M(%8>!CT3BRH8X7l~TgIAx}-B|6E zD-Id+@&e*qf5`VihE3&3BXnZNYx+euuX*EO&`H6${ zQHA7z#;5$|r(fm)j?f>DId}C%dtQ2T=$?A-181$ZnEooie=JTvKei)zps3JZ;|J@i z8+O(FGX0C0l=RnmGJ;<$k9gjEOKm4;di4#r&5vG+_&cHXPrZ(3er&}~e4*>9sf(WP z-zao_^)p|1W{*L5?7E+Uq*&}c5ci+#RzFx%Tl*3#dwr|Tc^GWdP2<>^{w}XA=?~Ag z!pywl=h3_2am!8*6=c*G^F*~ecBkDdP+ zl=+nQKhZdn)n0qz+Hc4EpERX5)4!HKs?=Z4u~XK zy(BTv!% zwI%0UJgVdQGjuwBoQEIOcv$_8t&jWe|GXot(cJ#-al5YXHO}Wodmm80oqy;~ip5?B zHMK$Ir!V{eFXh2+sCAkCt(>W(KYYzRI^;MKe~g+}55x1Tbj05YtzX7-!tX1v6JMD9 zmjmC{wf7BStA#gz=OZt^2#=lrY$e6Q_LbhQ*S~eEA9QMKUt$e;J@Nk|QC>WsO#gzh zq`%hHw-;vSwbbvwT5omQC*$J>?RBMj=BJK%fXnEIFZ}TfzuMxYRTqWluQ_Yu&0c69 z|M2;1ASo8x|2Nfk!%?jE+Bv0dmW8c3=^Gg@#_b*Ky zS5d4?H_bO&`>6c?_f#_JFVlZf?y1th-Zea*@eDOiUj2^Y`HaInLGu@*FNOY2cs;1& zn!S|%M%R-!T)5?!360g(oOSsN(>(pK|F`XaASo6|i0<=*W2fBhYq>6I4l_W5V!Ck}cid1gm_;0NvXnO4*(p9kT?20Qq}&NqJk z{P~CWg&n{9*6(feKP$m!_aA5`#X>*n`T5`PWcOd{RzFx1J6?My9&|Ik;Y}sZyfgh5 z=N{vC61Td3vB~FoGwjlQ&8uHLJm2cbQ#5~T$@vzK>b#SD>kk_w9~)GEW7N6NzU!tB ze4{by=#^Jp|E3x|cKkn>6ieyzSE_zRvD({vY?t)cI>zHTD$~C&uPy2Ca&NZ%N5?a! zdrjir38EJm`)yp+fjp4yjXd*fK7581e(I{Q;T}KivTas;wK4f!e)j#f$?)0!zc(os zyZ$QZ^(RjKiek0bUj6i6I{%YOE^+3Y>EEA6D)nDa>X^^}x&BS<(v-6+y(-c3tqi3f7KprXS zPhCyw$gk2LKi3_R57IY?zY~&g=%k;Lt^Q7Y;j!i|$Blcf+34z-cFH%Wp9~M5ySP3O zgpd7&A^IpvSMB`~zuWcN7y2+BcBcPeUR%;%>*C`{>I`*!`UJh!n;rFwAFSfDew7d2 zW%V;oIQ6Hi96I>d-f-4C>Q8O@N-uo&{##3mg?>`bKRfj+iq&3w_0t#6A9e|KoW`gH z1xZPN)r;%6=%3dQpAX?B4$_a=ORImgXs7+X40X&89HBoa1C8=XU%LuVR`XeP)NCasSdydmYE^ z{11GoxDLNmEyd?w#&J?%eAtTcR$F@4&E$sfy^Vj4SJ&AKPK_3(vmi3+JpP zRYP9q2Ph9ZDW+f3>z}(du4whC`n!f>eXiK?_;H_^{sp7y@03T8|8an)wEb%yanMJ8 zY{qB%vixRJEX2*vyp*rRvyA?*>jCSoc*(6Vhh46E>DKKprEj#ZsShQ3AJ(P|o z(em2(=4idR>m=^ML9xj?{d~5uN!o!%1?`Qd%YVEbxz~p2>p$AyH1Cw70~I+Je-^or)sc57IZKH|0Ujg?5LPx!Sy)4xB@tkS>q`J2e*bb>yxk?bc;%06+y86ao;UhyN=2ssK>nyw|DHTLWIvu3roHcbr*@AZuVeHB znI~v|t;dd$d_&DI*-PmUvp0J92N!<36=wf-^Rt$}egQnT{i`nMl|G%{{bbrJmHO9Y z-mJa$QJ*0m_^n>~nP;Z|V4hjhUwKN$>1m#w<);PM^}ghB9tD2T`sk9n*pD4!T<2Xh zE;gwA9vf7Dn6dr16~ zNdgB1iQ*9v5kUt7Nx-9sf+)@fK`%jvBp|^N$)X$-6$SAa;GzP4ReP=9>T`BY-JTim z`@UcOQBSY*?6se+uCA`Gu5L@3%}4D*e^*9~H(G@a&Mkt)LjSxj?RDs4IO90A`DwAq z2W?#K$Nbs{c8Y18XFTOGdm;T{)`KU1Zo5#e`~_6cRl;l`+nJ)IP3F&x_JM|@Y?o2l*PsNL-fUXijq}bqc6I7Cpos=Dj)MM z^v}zTivF(j7T5pic;v%x=wh1XHSZFee9-b!zBvoduhi){9#kH)7t$a07#P3ILwg?( z_Iz!pmmd4ceEPEQ-#b}c?Df6+4e8LaW;PY{ntI$9hpuIa;wK-ZFA#q(B;U{pf5ldRFTOBwi!CNRv(|oL;)d_Nq4*-+Yn^@m z>Q{bnTJrxp4UP8+rfd4L&tFdS+lpGp_EzXWr;Jqe*Su6u>K1wb!?=p6PTrMW6n(S> zi8CMX#BT`SBKpJjZ#(d$-45I+Y=7ZaN8j=5H{j=Ui_NRUc>nYDx715^cY8!P;@&A= zGQUFq!7@^(f8{)^)>}S0%oqOV*96fIv_3T7{GA2oTO5ASNxt!6gK1vk51akzeHUiy zP79k4eE!=%J@{?#*zr$8`4#iy^!nRe{fc6B^)}ndBisL{{j?>{{0jZ&mLAj7Nu0j; zTtzm z=*#r)WO1?A0slYJR==WHg>Fv$$+*04!e57l{_{$PI{ljs;Ws_hHvQe`xaB4ObbRta zJ3iKVm_M19S6<`827BbUCjD)nQ(ph^*{#*TapA-N{k0e2vFqQ4vbfms&qiwJqfx8{ zZ2vkwy1}2$&pTl#^q*fos_3tA$tvQxJgT6MXL_|I4;!>Tl6>NInAUlxeJGFF>aXL$ zANt&X{o|#3-WkT+_0*}qf9*Qz>+Zkg`{MrpVcXssqsJGkyEeXwqw}mj(WlUVsLZVB zuk+(Re)P}#W13GK^sZ#>o|i~$FxG^T}1-`poa(H~rgL zTx>hz^KV1_ieh!w@OURa^f15HrSofz>BIulzfbC^YwjjCb#>1panQS@F5x8)Ovg7r zb;_6WlDDwfT4CKQFFxzObEUiP>xuW@Iv&OasR!|=x?$JNuh2iv(Be)QscY)*2n6eeAA<$<0#@!(MQ%r*#GCs+ZKe+`d2;=YJX;{pSn71jeGf0|IK^1 z$aJCJKL6_Y=;NJ!{$)Hx>Qkg&bi=me7UieELjOj|Vfs5^^osnBgM8!3N9Pd-sW&_6 zA3tc@m*$zDI_(Euo7W=xLu0LD_C6t-6gGVRFE7tObr1M#|KG{tV#oih#qYm9KprSr z-8Fvts%-zv18hibp?_XJs?}fr$IA4G*+o96e8fTTvbdx+ezq@*wRgIH@^(OUH?BPiwpnb;TQ7%m*D}?4LhA5&tswgKsjKo{^{{&US=2hc?5eNm>=k! z#LY$*Mf{-gHEQaV&x7z`gFXCV;%(zE{?3x02@?-_c)5daS_wY8{vpreV*RWVufO_) z>8SjY)m^9Y%KL9Fxx5aTccK5BGGh9t{c9ch-WAEk;$6A!?w5$yVcK6^Ug}Tlyemg7 z#!vfn(QVgTq&(PRjj-wmmUVhp{q8($`yUb>JO9>**B`#d_G*}nFIC;XD*wNkD=&3z zu?zhN%SWcaE3Ix{YzOr3T^St_chR)hL;8Cqt7BY6{Jqe0rk?RqTm8NG&_B%j=Pn<7 z>9((Xtuy@_SzOpoYu|r0PqMpf@8ofFtf2L;ex5)2EcBmSW}5!Vyqa1^KFfc>h&k|18QIJfX2u9 zPp5osMXh7Lh5qx(h>hn;tFSe%^8cH84BPhZo!IF-Q$Kc!)G;1d@w+T8R?j>Z(hpx~ z{N|Ex&${e=p>fAIA3pQd?(e@kk-q!CYlCV}(DTpzYJ-fgDAB5J-f3Nu&+Dtue}0)! ztABd_Ggm%Pc{$(Ld`aU@ka?pkX#U~RdH6x{z493{pTtjH9ft6uHP8Cu`g?|O@e%L& z*;D(#XWzecvbZpAzIi&vN<(@FbaJmMh!IwTLY z=T*lwzuMlFT^4=1a$VqJ`dh1Qy!n>9jNP|2;jS%CT;a(NF(3Q=dneLo9$aC;|Gz0& z-F{NLnSTCql3{)4Vz;ciRg|pm8XmBjOAMR+4$n8b6#7TTTK)BX zoP6U;`XLT_C*zylE59+i-%6eGc@REqu!ld~`{WJR-~ONT!_Q}*e$o0bF9Tog@y`ss z{sex-tY$%<%KcZ9`SE%$^pA{n`WxRE84x=~^lGjkdDviGKJhwC>wN6Ce*aU)gFiGL z{{8ZeN#6<^?X<^TcYJgY`1tnTaRTGn z^|#45x$OSiHlEHSOBRc3J-V5n`8TAHc^CTUrKe7Rs~;+}io85R7nK)2jvuf=<4N*~ z*I`=cosQ3Zuodfg@P`RceCL@Hzi~&HH0hir|8>+7UhDYXrtPQOSzPkr#r*%%$x?6q z=W2HZ>8H>?j#SZK=g0pjkh5>UcJipXmZ0em4}Gg$=-(+bEBd?ATlnWi zu=84V+dp}W{QrZU(fp}y)<1na$u~bXnC2z^(3pAeAFf|&Mp$o7cINV}RlUaT`$y^( z^K}<|{U>$wk?De;<96cs+0K|}p?`l_TC4wX8SS3utmyAzIBROZ&3R(O4_Y6ZXZ|@0 z&bPScTfLL~ioKBju+9(Pf6R|2e?P1k0Vy}S04P#%l7*X8sFxu#BLyccnFgXDhMEXaAMv69>IZ>d-6E-|}iCjf=#})tbvx#Vw;>hGuh4&P z=}GnUuJjh?-=+zYZ~Aw{4?l6xJBg<@`RR6GyvaN)U-{sz^D|x@2EWSpmY#6hcf+=) z-F?Bf$IpY$u74huzIz@2Fur7W*Z6CH|I7T^Qd8(ZuY6RifAV?Obn28<-RGUpz0K#; z?*!3-zCiQOADxFEB;PBaA@jhG4dMsYAI45wZ}r;`{6kn}+&VuxcGE53v-3ZLSzPS> z*XjKy`_-Q-GERz4;?41zr#7!^n@>aHh5qx)M-}~*x3c|fo$)C@dVw~M@}xF-VEVc? zKlM6}2N%;Hw)$st)0Ok3LjK^s$8G;?_4lXkEH1X4jfwXkTiyE0CC=JA9tmQjo6Q?u z`pzZ4(0{0WRMFp+-oigu(DcZ~wtnE#yyp3%^R13NMe{2kc8bZc#Y=1vRtGau$z=s#R$*6Gjp z2;|##(XaeYkT~cQKjVVrf$8&Ue&(fo@WNv@@gDhMuS4#JDnePYs~v&X+?jXN6+&QnulJXchZsBnnyqQL3>`? zHFe78LHMx29{$$!cOP}h`}bI?HGPSvzIn~PucND-e;dx?QhEP{S5dO6YvzY;*md(O z^pA`c{he_0JXUn-NZdO?;$Z5>Htb|^vHWCy6~B{w50Z}!_V9<+?bp9+>3j09!v)9O zu*ov>EBbe`xUiq`6ZHCX_!T9qy7n4JH*9+z=uffGKUUQ0-_GJx9ru|Z*<7|trbpN_U)&?^)>pkZdRMw+owb z%x+6f1MxSBby%WGP}s{M6mIRgWgFzwej=(SsY#+*ZgW* zo$}heu)!YrVaB6p4L!P5JIs83#l7~Kye2%A^ADXYE`FNce*;_jC0bn@-xP^?r`R5i z%&*YDzl@mv^bf}9i2X>eJc{u2Liz&9Pi_1laYLu$(;v1Xb#)kKzPiyX7xuj-EXBN) z$3A}zNyN7Q^>qK=R==XvH+ZCR__Tqv55^m@{STCtroSt#5q-I6cEA79@z{QS5@)>B zZ{uozQ0bQLO5k z{+pVIoz4%t*#75~EEWB!SFGs4{6Wp*dk6AC>qGPL_d-t}$vEaGuGot&%>2+R+y3#3 zmxozXw!L+uZ6AZjw*O8R7xu5U=l|)aA^EJCr(J*IB!WN1LjS=sqoTj^OLcc`+r5qp zAM;M_)L$1TFGcbVo%B+%)t}aIQFkt+Vr==&G2{Ud;Pn zrPu%GSzLHt>8sFxZW*ZP?_zq!>xg__H}IeXanNghYLm}#gvE0i&-k_8^07g5!UlWz z!@h5QXXCSu=nDtF`^_Cc{$%&}r`-Rolf}j6$N3-l6(y^>_8M zuB)=ozv=u|EdPD*>L=6wvt+TjlR#5-OZ{Hs%&*XYepykg|BN#Fe?kAYjAPrKQ@e+r zo;WZr=9QP2SzT|Jb229{*bkqLXcJ4auupqj&A^zdBM0Z=wHC z>8aDda{izBr8rl3|JTp|JK<$s9=i_dD~+o^%-;2r{Y#znOxS$Sb+7r_QBT45|M2~H zI4hQl{@QQ)KVs>2K>s|SVCu(4$IABAE}AHobpJ@ccV)DQyK-Hi_UBz`ExbSMJaER5 z-#+r3@E=#4a?1W6tDOI#FVN=0c#1M=cWwP>9NoN=9OebW+ZrP;yjuM=k$mcHKcV9h z2N^fD@xx>BT>DJ;Y+g>|pyqjOu!leF`kVc3Kk-*5h27qF=;MEUhdrOR{SRbuvG0F3 zS-AUeNL6>cs2qRh%1eK07y3s}t^O0F&UB(aLE@lyNuAm@PCJP=#I8Iq)GaL1SDLT> z@LwOj;NF+kpB;X1!pEQ4ecXJWUpxQSuYG`%F|LKFi5-jwO8Kz5; z&vBIL+K@4=AI&EY`ovFPAbB9VgXX7S<%1U@atCVNf3{G8U<4}a+7v6uXX zdb|Jpa2A(;`}?oS%0oYGwH29Pp?{-{)at*b)RAw;vAOusfjH=$#8aDm_Foo{U36c+ z=I_&Sl+S~U=?{}$n*aO9$G5}kvu;0r#u}f3&(6PfvbfNnUtz)5-%3_@?VUVkewF=y zGC#K8LjSz1tkWO8O^=ve~ore%t4>PS|jt zJG14!xVhIl+y4(`ak2Anb@xBlI?3*?>8o=6NiI3Wd7T&fx64Sa{;F@~|0K~}k$LmG zS8UM6P4bD?VOr;1*=_fA;$3MKwky}&&nb0$VZ*=QyJhhQ@L}Vt{`SYa@7Ry+(9S=! zg$I3d`T6^*_Il`Oydjvby%P_=hqnH6@uz(i`gh7mMSoX%i|Y^dK9uu9nqx0;o@KvsV7|h~geL1ytT(PQa z@5G00Ht)9h)A<$p_m`1c{l`h2>5+?_?th4b-btR>=%t7sG(P&&dg~t>gby3+;SV3) zcDv1%yJ(s4;r`=x`O(?k^UqxW+{ohMl`sG4OXUq{Uc{!Kn6!Wud6oZv&n21XlX(~V z50sHw{e#qzZ`*y+Cvnibqz+va$ph&Ne)ChOeDI=!*~EL~hZ_#K@A;RSS@`a$>u!12 zMjh&H`|o6NVLSCJ#qpUd8h^t*eCmHY& z#|H6(_y*yQN*&sh7uW#F^l ze|56B*#2kCzxn?=I-hiY*oFS{$^k3-yV6@6|LA=@Oegq27t?wl|7f1+)<3$={K{+N zQI}#ezSlT)br?>cwZevv9Qw6z`a^5%al-A-GOk^JH7|<`*VDk0ZvT?i-QLZ1@(A6$ z+KdCjTj)Q(tf=U(^J_NBX!p3<`shTry)VEAdMEMJ#?SL_@m%uL{;;hM-9b0Ht~Oui z5!Rb>$lN85JUXm*(e798GI$sH_WxhG@+sQ!kJj_Lm#pgA_{evX!+eQ@h5kcjhUw3| z6uFOz>6FCrr|4bDI^ubA(ab!Ui%lKKA+=E4|qW3Ck}e8H(T@Q4?k%7E3f&fQ$Bd%F`Iag{P2-k*K9Q7)Y;)9 zAAew+YxUgK3?2Qel2toPEUEADWk3ytR{t~&V6*Syx*`TvA3m&@PQ zu=(a$T+;EyJ7Rw}Ey-)B-4>+3LVs6!D*8w70`C6?52$(Yfj;qDzkOL;$OG+tJ?YQt zl+T08WA;M&Thkx9l+=1kGvGqI`5?3{MexKdu&ks z;gZ?QY&QM2Q^Vj_@BizYpU)ZJ^dHXRQu+R+qkfQa8scZ|ojkJtUnZ|De(PK9LjSys zRP=YHxBGfRm&)tV#kB5m#A;%b58Ale5B^@5)_KNbUf7Dg_`-y5Zqgb5!1-a~5nuk% zX78$xz3a?C#n-dEC_RZLtmMyDfQz{_&BG=fqDw_pxDK zHhyDt9&wO*vl&+rKWNWu!izre8N%nXxZsDc4#W8u{{GZ^HqFAEr4FCsZl6t^ZU6Hk zUFPQt!ejH+aV6SaTb~-|xY7K%}qJOS_MX|d32eZ*X z`QF6lN#Dv-=-*$~R`iFbAy(dpn!m9A*0h;dPu~CX z{aVwu-0d@yPFM;aJO9?s;==o+pAhdqX1~)TU$Uxe;!Si)=Ix!-Fz-VDfznf}zxG4E z=?=dus~52Sr{g9ze4x$OMY`(iM*SXy4;!rUGfo)Sf9ubl+V_Dl{-_PMy64<)n_dTI zaq;jd=5zJ_Z}2L{zZhzY-tT5@O&mTE+eXn(P{&O&kiwC!o3!p38U61FX_WLib zL%%}*I8sG_&Es{%yefUT*sg7_mY4WVuNwWm=Chp`5B(Ac7t>jrrlEuZgvpH(767T=Kq3P4F{<`s<#4G>5+|atVAoDBqpIds2-$@+3ntI<= z@_F72wGX@h%u`ygJXV+RqH}tEFZm#ScIjc_g2chpk8S<* zkFHB!PsXEtSiO@v#(8+^8_PH@p`$jWb1DCb#>++i0PI04~xSOI?1>BVuNX3 z;t%^Dc-5^}uHFfU?7qd9Rvdc(Jofo(D2og8_FL=q&$;>)#VT~u@lw08|8bH*KF>#? z|8N1yxzUGn7xM8!+2j-oA!(YN? zd!O>m&3AY2zw`m&HQk5Q<~)8|Z7}IeqJ@r59yJvix1v`=@>*l$!lPRKbICJ37{4R_ zl%MZC$pm{xaj_M9@r9Y6-f`!BFaLU2z76$* zePX9;@5EC%{{xTKb@xAqM^Byp%~geudD-)ri{Cpz;$Z4$o*;Q3`(gSrKmF=B@WNv@ z@gDghEccI7pa1;YVcYLb{PJ=4&ZXY&|Io4WoK zmfx0q;^wCxaOt{pZ;#b{5A6{ z^v_Ec)8CcWi1(3MVlyA_#7>boNIbRiSGMy!iwpf*o>PB{i}<|27Y@5|&BhYDT@VhR zed)(uIH7y}?S2a$C(qT7ev0BPbkaV|cJj!Mf7G_^R`b!N&_6Po{u=l2r2jwF5PtS& z@T5pRer)=!=nt=sZ*@sO6&{zx1wVCl*qU_9Dc4=P(|e?rhh5nuLk?HSBYsB`yf!NH4_Yd^*Zgkx85sxS~ycE+q@1)-R*otX=YKP4~aK!Y3H(o7l-8c5s3+L_w zkDdQ%kLJ(0{}ujxbewwej@WMXxRJQ+f0T#53jGJl+FJelg_(Tg$;I!TqULcO4f&w; z(I@$-A3Mdg&c;(7v)yRklm|OZTzI7d`p|s*!rpy-;Rn+?@1(-~ z*oxHEVVJV)XE*z6{#cmuy+@yTcC{V6*4gjxbiSrXpML+9_%mJ5!)*Abi^uwGsI5qU zh5mzO#Kv=_HR5?p|9{iPaOM-__h!u(N5>h*`skCm`3Fba_(7|4lIKC?F?%8XVaq+f zK4yuzONFr0mYZ*|=HJ+U*>3E6mw_xUKIr}55`KwR*YwNprm-1^*AMe6^q*U@)at*A zj6wcFzyGHCU@O80T7PXB&pSoxu))Rj2mjC=pXvL_0b!T3x1M$1Y1c9?`yW1+pj)1v z|JQM)yXx9&f0h5gXh<&lG4DeEd1a(df6hmdPhS-rDp`v2pVteL52pF%A6Rg{#nBmb zl5c$2V49cs!)|B%_VAc3zWMl*pB^;*4^ZCW1UJz0pZ)4r6syoJ=aI&pAjctI zZN>qaU!ni}vZ7A^W_tVyAJ|t`SNGrEmENx3yV5Gg(fqUz^S33B{=5^rg7BpAI=|+H z1Mj(f+qVvw6AtqCKlSDT--nm&pZx=vzY>rCS7p03OvcG{F+SA|yKa7k{zGMHMSu8* z%jgIl8)BoMcWP^1^JcNp547=>*Zczu&bPScTfLL~ioKBjFg$baSH>Pb7=~}3{=#>6 za$f6dKY#6@<8?-(SPR(xwcpD3ziq8&K57^G50{ZT{ke|R^k^s#=)@)t`owQ~_0{CJ z#n1C?>DX+5P_I{W?CpisJ3+T-o0=$QWIZZZ_|> z#x~Bn}ccTjxVR_(A4nuW!bo4jZiT!&isl!|NQ|{_%w2 zaPoE6zTSRrCHl1EpW!SncK&T!{r~H>`avCEyxp~T@;KdZ(+~SK>_Y#@RHr|EkZ(HW zGLCnO!-CD*1jz@{9W?*og7Yn|^Q6u@$*Cb)C$!GsysN)j{eKJnkCwZVfuL+;k zDIdIx>u*iC^p2xiAG)?Ran{|h-1k8D{KG+6TsYn{Omt&>$?mT4(^uvHUvkMM&U_2~ z8)c+c|MdJb{d7k2p(E&>#8cbGkE4!wUx&9(d9~j1vB5NNA^l<3$%pN?&WEzlKKJpv ze!Y!tzxMt+kj2G=v-JFf_RDrBS>5eFwJYbJ)A^}g=%1Ih75$l?=9!KS8Hql;Q(N=6 z?8QN;d!g}bU9GKs^x_LgoRb}W*598EC;fPrZ!h`uweZ>TXMYwK(_9O&cry@M$^SS}?r|4bDI%4}ycq{o0$xq*(?0It< z2jRn3OnFl~tiRmG>m7L86~b1`TkGs|?|g|=u0QXnA9QMKA7YK*TYYpKz0&y=`gh7q z)8Ca=mxuQu=HXo#9T9iY%*bp0m)PWkrla<2{(%MOTO6H1C;7&Q4W@aCKWzJ(>wbIt z`zM4Qk6iiJhyCDwc&Wf7+5yf5xNy z-f0|MOn+;I7ni;4r9&@jjXi70FWma%Q}EjFzXo(1aHHh>r;~Wy+H1VInq)8^+yBtF z&bQEiPMKM!e|mkg@xcSmmGPRN6huETtuueV;Cze24?4*=K5Q_}OZ;JzBTnA?m+$^S z*yPy3pML0{r+bat?_WAuTx|QF^grK!l|H1ZyFY49Q6Boq)outfze4}P(o@mjmEPif zrv8qPe8w|$F|Bg_rceBqul>LW+UJ~(@KJAd%J=X3!=3)i^WJ&+tu&w9dQOj=j#!R(~(Pu;n4QU$ev1b3^mGI|pZv z+l6}D|0@svycK`{<4?9>)JRrJZ zTmQ-Y3;l;nmRkMSkTI-}hSafAZ^tV=d|I|x{|G0H))yEHf*WG7-1wBmv0UZ~d zssF#$P(P^Si?_QrzKNPopSju{LFQfPA2TZYQ|CtKSN1;|M?da64L?XzuH+`(8>43 z^A8W(cHhvr6HM1${pgQv^?4Q-`YN{n=&94c8KWc4A(zDkKF}wA)2l{*o3GB_ zcr+hA!^QN6nf~^bce%6^X5D@2*;l;zD13JQQ~SoBUtivS`=#@+!7N#9eog|-T+sYF zFXqYo3jJfGqCfK+6dv-8k2t7#&5w-Eud6eEzTkX|!w)*iw?44JG%xYn`JYX19QejI zC!Oy#ZtuVCEH3Ee$He3R)opvrCC-{YQk%~~-bsW!!-n_^{X6BOivF(j7UzFv3z{BI zY!B*uiG##b+r}Tr;$rb!Y+k44@1du6S_k&X5BvVt#G4zBJQntUVET^N96mt3o&W48 zAGlWh`@3bWZ{?S)>e@T;p&Pb6e+`K zoJDs(NqvgpsP}FA_}iwRB-!2fHQWAkiJ(`0Z9M*Y9ox{sFdDu3n+yH_n@0?k45q)< zGw+_)KWKhBPoMbV11tIMEG}sr8$QDt{d?uNrtEReWk-Y?TT^#fee$`NZUUdZ{|?7C z?E5_14>|QKiq&0v^)o)YVH*#9>wJste@@9#(O>zRis;35Z>W64L7&z!Ze4z!#l?8F z-ulM|;ll=d_`}hEI^y0`Cj=9na3jfTWb%Ty7c`gOlqzD^BA%@1onbJF%*3*f^LX8n58AlOi@z7zIP}A~ zsjdEAd|{(m59cesyj|F2-Q{mx;+0qEtM2$Sf3x`W<>C+&y$p5yELn_Czqj?;mtk89 z=(Es&sPxq7-_&u)x9vVJt42KEn%8_l{N#i51)6_w!TAIPx8!ONPla!7r*n#uYGM!YxT>Y`uZ6M zPk`UH|9<5KH`MdbjHf7B)wS`dZrFA6YmGTFl2r6}!rfFfof_h|_unLLd5Pcp(EiB- zjjyBpT5omOAbMbfJ^bP5-(5TSvmNJ!{`0=I{H^D$0lyvp=UH6PEB{H^r(D>$#&chFq@0HI`$2FUH z4}VziL+|>@MOUvK*8lP|lfO6gE$Z#~C(q)-Jo6Xz^KV1_iegpQ-ieRdh8U_I?s5?Jn*OWsU2oLc=AVnxWhnL`_miE zoAFS0|8qzd7rX9fNbPA0{{6M}S}!v^b|f94v;pAUcS^;bLU@n@dJ#hw?&SCr_8{lDr4e`S7csVMZ1 zk81T#@Bh}&KEOV))A@Qg+HZZ-<>!)beQLecVS~!!u@%!kQagP6V_QCU-r2+9#`nIs z{B?IW;5Yr@Rm|n@-}~+OK*!0F1s<<)&KsD&AqDWzSD}BWjMVC%p8wCw>>@vpV4wHk z1HF@YYUAhq!{YRpCw1uI#P7ky^oQ+!_12kZ?$HU`?|a~+z6)~r?D#X!;$rjT_b*QU zieh!Qce6EKIsacXze4~1vbIit`Y@gHGP|ljc|7mrgSH*0Zuom)TIXFkdUxL5l~!T9 za=j4e|EIrn#a?|syCBTG{M(;=_chOWS5_7KS?)jI7Ms^$9)JE-eE*oMAJlo!pG3QT zRsR3fX}$6@??V59GE&h$;paG%@eEyA-JLgi@Pn4Gy!d;ecO`pwUfz{fS8wl1t84e- z3+vvq)}3!(^|;X3X4)73(psCo?EaGjkuLKf_doBbA9QLfiq-9d^JZS-hKa6){&ULO zTK)UWXjfnQt)S*L_Z^*&9%(*)$?o#w2fdRFtH-WE>gq6T_QE4K{qd<=!sb7D-_2ir z_p@GkZ2zzGFg+&88vy%lFw=#;Qk(N&-buu;BR2gN`VW>K8_$*2i04u7ze_%^OZMO1 zmDyGOyeqw3zjvio@b)Qo1S`B6H$VAK>%hhIhsI^!pYXX~{2;8q)qYQW;JUND^4aIF z;Vdq8{drsM#%L6)JD=2UZm#)G5uU!$cA@{=(o@l2^WZT(8d?uJv5A8|@tf|-OCD(Z zSLHRo=EG;WnEsag@Z~!!)z@gb51zWxng5sppPhebXK}IZY^-koPW_5vRoBcD-LUKC zSLi>lWU1)yN^kMLlwSW#f1Mbvec$s=;-;h8)=wviqo<cuTUwNy|{>1d}m%dDomG%EOJL*>ytGo729)ouON&Ib%)3@4%{`1R-jYqy>bBi)M zVn32cbJ34K8CMZ~LGP5eA!vT;h=Yshhc6ud!TASY`RWegm?wUG`+H`0e}8@W=y5Ce zpG?M;?CP5BtMdPw)F<;R^dBl&YW3f?jQ(HHKaayyUQf)&yV3E*^@nwj*y$rH?mjj& zPMf>K8J9f-42O^ z#8cb)nIk-P@rvI`KD@@0`fF_X!d@E;|6aN+3(N|M-^G9~b&ZPep%t zdggq7;e8W*2oVe{uVV8H@xcu?UJj8hH zf7tJX@Sqnc-Bj1cr@DFiP3Fh(RiS^(sMUW%8N>Qu-U$*188@}*kMS&C!v{|V$#+>? zD)AbBIQ5iGesINko$%osZ=Ct(TjtP@UH_j)I(vQRcP!@lPu0!(qTd7y{Tt1jz&OgB3qKAoIWmd-z-9u0P;kH($7NYkdEfH|_hc zHsjg#pPei&_C8$m`!C5Vba3(r9_-5fH|Q(DLjSyEsMUY7GCD&4w%C@heCP#wCwXQk z@UHx;u^k)y=KdmN{(q8Hal6ZT~-*#bsfy|B>uMzhvB| z{%>sRPwhhgcFAFSI$6O^#r$0sfy^uZ(mA_)wjGOX@u}^(*>j_`- zVU=HO+}QW9)xFl)`L{fa3;X%}qId%IMfTT*%%>swti6**+&9Vk)I93xtI)qwR#fzN zrMK|Aay?@E&!cJkN8@+m=XYoDgO>OITL1Ao{QlFQ-|n(7;lTqsx2<)Xr*Fl+1%44?rd+~*{f4TkZWB&GfIOpPzul3-;FT!uIWHLQKH@bM029% zlSf@g{PbDqKTvv1fAT?BMn`P_ZL!%N;OT|T6YP~wocvmJ+K={2T^)usCSSf~-|pv1 z)d;-~P@Z|C5gxWzL-bLs@J&`8r>IdUEsIl&!H?Xb{)!Y zdi@*xijq}b8{a}-|8~@1$x^HT$}&csPTq}V}m{N!|sn? zbKL`JMX|yY!HA zKKj0}>H}L%-uRjB|Bv(g_n|B=SQrnLIoz}G#wGZZ3=s&NF)ahTj z{>PQs-EqAut?qsXUhhh8*Y90vji}RpEMNO#`!K(YWwt-`*yLj`B!)}hHT4huJI@P) z%iehY>tnmue{%dYCyR^y4>tFI$kneXR(C#TYhUQ*okW;diiQ63%Z!Tt)HS5u^ibPg zk4av0&C&T*r+qO#{$5DFA>+Y^t=Nk%bXGm~XK#%=DSTk{1NO?6`vSbS|LJ6LvDY`h z|4seqpnXVH_xZ5vALuie40!3Y(0{0mRP?8=Q$|N@$9Xin$Nx_4e!=FRg7AaP6T~ms zU4Hx^`G!s^Dz^HmtHUt)C$l~_^Tvn5ZMoy#cN&jVCC(M#{Ag@AOA~Mp8gF7+;_AftuG`Yjl2H9sXYET`~@K4ff&-{*x;6|+vUsiMzWvySoh&Z&*&1_nv@7~M$*UZHs^5dEKYSqZ z)VAj@&*EZnwb`$k-$|ZNk>@@2_prkUPCUDP;xY$>qYm*eZ2j~;@Y?q;{aIX0kMVK< z#CC3|<4bn2{U-hJ|Fg^kq#nD_KUUg!PRECDKy32S$B=x@<2)Amp!KJ`_=Wv{Z~tTC z(3c|Ph1bU|+4e0@jtfo&-o*G$RiGV`AJ`k>{tFU{e*9<{PES73e$&w z{DmWzIFUZ>^VeV&7n>jVAMRIwuBd%lAJ|Uf_Wr9j^R@hjm;jqUI*oo z)ia)=={Q&Nu)(wre=qbd85g?-d+~)CGgsdA(Fgw!rtf>{A-|tB4P7ew59+w<#O;3) z`wc@KU$VMu=E?6Myyh_uaqL3>ysWO%AKjUk)hGIs*Zf$?R^6YZb;g&+bUWXGAM{S* zX}#HXar~h68`{gejn2B@q0s*Ar4KY8n}ZIfeq$+Q;aGX&ks7};xF{?l#eRY+S7vv|!=IveWmb1Tg71PlEKN+#3al~z|L z{>RPwXluNqsC>jh?o% z{m-zD1Gd#}j7G7#I;A$cc_$Hj-|ic27y8dBJ$3ryH$9R*yeq4__07G-CLcss5Wi%1 zUnlrM@(rC-RBZKASBK%B%ZvU?}QIK zMe+?bzhW<>Kdg0I-;Z|-%Z7D6eAAE4J>&+jbzFai4d%z`{ZCZqir!kE=|Ua9!>aVz zmPF=R=s&mg)ahTj{ygaee)vH8FuPC2?UkR#vEeg>&nYk1!yhjF@C$Qit+8tO(wVD$ zX}?2Xhu_YBwv`WDG2Z`bLv#lvtGk8=tX%(J(_f+gypp9>|7m5kyMJJOLl?uZ_|52t3JdE4i zMgr!~6I=NU{pXjFivE3)&+}+H(N{;}-U$*1Q$M!#(?7Z{-Jcnc_F?r->Wl{)?2#Wf zU;pgSJ-*-OVe{jE^7AeJ+5P{)!~I^u*n1QgB8Dv!*t)5;9p2TeBpa1|Mb(}eeAsO;3JQW+u(^| z#;ZO5wr2eO(bhg0N4n{*Ev|hv+Y*Dnf(?lm`VW_nD*8K#a~%u)+x|i8z2*@IQ$M!# z(aGXs`Sc4qiCaE4*r#!9u!p}j?wf!7M)3#0t%-jyICpuIjDSzOG|^R45+YkUjo zhcA3*_9?r(^X~cK=1neK`Sjb)gU{~&IWLQgUH{Nl+bL>4Vs+PPKj>zDcp6eeKZX9$ zQ_)|?;W}#a|BvbKq%YqY-JkVO9Mt|iwjzDjVL0pIhrYA*b{B-RzP{;&>+asY{w@9f zUHR<(V~nHYW;F}_q8qlSz9WV7SLmOYo?89W`=2oH1mOd{D_Oh8Mb?Mbkq3HLW_R}o z=4Tui!~g#N|NWCrUFRp;O$&$LJ%5KE-27dyyte=E=s4h5{r|y+`W40M_M6&v|C6-; z#4hx2m$jxpamD7Y!fblP>>|H2+D{zxZgd>mo`+5nXI%F``>j67_emXnq!?zb^o4_d zbL7`$lwuwYdBlCwINxG8Q9pk%Z$;`s>&q!$di`foPhW-powCC8ccoR>u3T4jPTz;T zE3>+J&0WNXAGH4ZM#shKzD~#o(>m{@!u;5Z)YV~_KI!gz=3aGan6cAye}3_|%fMsj ze;UfKn7JM zcxsc+_GR&0@`;un{OHT; z9waYC%ct*@2U{_%Pwg=6U8{`AFP|2sKR$Nu{pauCwa$(|RTt)&|2kg(^OWhAXQLMS z!|9zovh81O=1D$wq5qt+qE7#Ge?pz1@~89kiQmRm9V_{bqKRVhu3UHbM^@*e+v1Dr zXS{I1ug`jQ>N;zLIfJ|W_Zx5I@Ywk``T)8AnD$M+bkkkq_fCB1W`6iIPCteIgJnfU ze;tqW*yI~u(hqU4E)PF>Aln;ge#X(hJ-C?u@VT4+vf0(2ofkeoWuH%k`g1#;TK@m%uh&pWjj zQ)jDBc*53~pLo=BmwYq$bDux!*|WOm-wx98h7w%+^ykYv(Aa$25@+q5JhFLdzxMqv z{pU%Z@)Y{dDb6Wt6k_nzvQabUw_AEdcXs^ zqPSRygWi?iuHX95@yP>iyHDa)=OW!=@BiTmTQ>gk#}&V@LD=Fq*$>y6dO3bO|JkqO zfHU;_$7FoTs;<4}+4YAypLBlMh5kb&OGSU>vHr%ESw)^J*CQ~|tC{>?s_9D~Nj~v9 zWPC#xhp+ab{yHA~VbUkRxk7%@VZohr=#6jPdz{xgJN|4dzheIN`1$up+y8VluJaJ9 z&_iwS#AEv(wbMBL6#5UB6&3xJkNfzM@8L_3I7nVXPx`Pm#Jh3};-r z^Y4DTabNh@bKjfvomcv)xASjFAKS>!FJ7Z>L-HEgs0E$T4SxJ7w#Ixg)*G)AI_kKO zH1kR8$s-P?er!YSJIyzKm&L{M7t)5W4#VYdTmFYnj$bBx^Ua^W_fN-k;5YsAEH1ns z+4;A`?_Er*tmwaT{jKH`_i7jV$7ixu|4n5G=EHu=UXRK{9E3l$ZTwCRZ~DB%?ndnI zbUfq127Ba(*6r87Yw3IPu-*6-j(=kJxqObX>mT~1FVka|-hU5%MainJy~geSZ)%%v z$@~iaV}^}KzRn}vpR-*UY92a+UU|&cJaoek8ecn$i}|TjJ`XOYKOFYt^X(r`7!F5# z@`_b9obo~V?D!|)-zNV3>_hX(qyMaCASNo#>VRIL z*ZS19emcT$ad<(kw|s0x>Qb!B#~-$tK4$#cTb>*I>|cKU*gH=0%E$S)ln>kV%d-V8 z@Pl^$J>}gSiy5yz2vH0^hP#;1ABD9_X&1Ihdf{W~Q~MSoX%i~Db^ zBxpJ|#7e;)b5KQC>q{N1Pb%}>1;9{c`rD2t0-hqZM){r0QMyc6A}s;V3O-bsep`g|1n z&nY7n{mECIO((6h}KJH65o*GWt?zr<-{`IBkVf!EE1+Jm@-(Xxt>888(>ZgA@{>0yq zT;jZ+6#CCABk18>X%)84kNXJGKmHVngWi=aBR)5{Xm($xZLuw1$K&;H{-(?vYug|FYCkfY5!?S1@pz4!ULEl>ze4}{ zrKe7Rs~eU&vl(9zKJFWc4W@PYd!cv9xOKMrd+~)C_x@>{r#E~d%$RccUmiTnt}C(Q z|F(_?j?wc!4fTV4VyA2G#KU<5Y@TP+LGudzhsuhI{u*aL#=PwL@03;D*9mdZyE3{6 z+kYO@?Q^y9=sdmCIJlVpu-^Ey9=w0vBVoNy&D-|IU(7}i`~G)W#{sv}`wz6$5B908 zC{|YwyZ)TKe02VZ{m*b2spzkD%^juA^k^s#=tjpaFYz-^kUY?yR~^^187p^ir!v~ePP_jQS%Jn(;O`-iU% z!_nJJTko28&JO*4$8Dd_I`qT#&-Zd5JYMGsX32uz_*6ISbbi>y_U}rnI{nGBJ{o0q zkq^3P+V!~29mFOdv_ACtz~2kgI?s5@W48Kx@r6sj_?stw@`=81!%@H8X4=!;<2!afIJ2-hcaL)2CnKIv%eJ@piW}{kmgidD`qF-&{3(@YUV^bi--WsI%`M6TbYhc>T|* zHlFsAC5y$i9^K5}mV$JCh5mUNG5wSIR`x$JyU15Wrxd-*;)37$P=5Si`g$^d(ht1G zr{mzKt`5W2YyRP)`~NU6(}g;_{v^@KkBk3*aRPaumK&t~pqu#{l1e@O75c{!O@F6( zI`5{|kLPPe_MU>x?$rFWRKqX z!|wk-?x*}i3HG}Gt@a^R#dfJSx_PI3xgveH#V+*kl#eR(>aw`@ZkhQ~K=lxb1)DWO3nqmft4+ z|MeB6>yrw3S4_Y6IzQpS=t*h9s?7ryV zmFw>3y*j?I;ypY4bm>>t4=eriZ~NW!%{$<;^FITT#_l`f$Hw!|&zn9`t?qb|-CY}> zUWewL#MoaGN3TNvfilDRU1@ds@iQN*R~^mPzW#sz{ma(-P2cLU_godWxiT9wXXh=^ z!OlORFSv@{|Ff;SD2g>gr#_84K|BA3KOY@O&qDt>WdxnQE3FZ{ooL#5b^0}Q66f{6 zb?@+l_B?4n_@%mjyG`pn<1sI6MR@Bl^l$jDm$!S*Q{nhueD~Pb*K+XN|G#vyxG*2z z7mt6=Fq^p3JVmj(x>fG~)7E_Y&c!bDA1oi$>aY1+2SmRqGGDHH!3OK{iPvFT=Uv%t z(J%9G(Omfb&!(@fI{7b`^@Z8D+(IycW4Y9){ESOJXn8t5{$7~Yd6$fft=Nk%ta0y~lc%qEOIZ8F z-+Zob_5oh&*#4>Kcof_8>11*7PEqG$eX76m`%~4C_fL2W{pXcT#;^78rT3G^pCWvq zS01yIKJkO}1+V$3qc5<=PhA~`AAYShfBm=b6K;R((DMhEsQ&+5e-;H6aPmq)_-`%U`kPRsVeZ`VHzXK}If53QR2AJp+AySw)4xBp+z`BwCAOHHBw zQ2D5$zt%N1kN(+T8fqSK(7Teg`@YINLGnP`uen&(eZNedC$0>E9N9`YH4uEN zs~rE(S3~kxdnb?V_ow8+lh~~>$Hjt*{_w0Ne5Qx`eS*Y6;$}0hB7V@e5Axto?SIoB z&cAYtcU|yC7B2d)kALfg<>s=T+2`N>EH0);&Heu+tJrQGj|7cx?a_8+er<^t`o|gC zcy4qZzhg3;8X5;{`*`4#%R(qsCQulRrL{u}MleV{x2(;v2X zl5aMBCVuMbFf`m+Yu$S3E5g>@e(m@_*Kxox3wi$2X^?wajbqZ{o$C$H`wsGHLnQAEcNtL*X+d;pae=lTy zhKvUvwqh^7u;%wyedf3iXJNfN-f{B}kA9EWI=lZs!j~TtKmVSC4u+bSB@2D>yE$yD zYfAxr7Wzj|MSq=NdOpE)&xHqm_(1&Ljn2bQ9+>i)zeZ&!0E)T4&pTCyNXF$Ncr( z%P)Z2*oIo?6=PAcqA&IX*tY#MpIj;m{i6bP`tv(J)1x7E#^=N)4*JAzedze)fwp}l zajR24co)~-8h7AckAJd#S8Lqazkc-YbAJY(=|3lni+%qxRnNb*)emZ);_a^K3*E5O z`SH3c^dBgzO@HO3uBrc2U^+2=r>v?TcUqo{;p*pD<+r@~qx0~C^x>7ykUkSXb#)jf zzWVJ$Uj4yAnJ(~|{tXGRedTZJ`3I+dMX{=D`b9Tv_|m=#{pXY{roR(TugK?U>fkZd zJmMhrW}}xPevo|nH9vJaPMT*n@gDi%%vrZye)r)a3>Q@x2yM_nkx^Zl;b*WwGKUlI<^w)Wmv2roJ8;R}2HoY{C4cfTMi@z79bza9a zKel2ozOctOtMB!v)t?RRpSh*Kv)#AgvEN?|XK}If5ZmkVPp*E@iJh*EZ*_@av%g90 zLjSpCW<`G|^&H1CugdeFdBj1V_-))8{cU^GIQ3SieE+ULoPNaA_qN};b~qznV*4GB z?&{C=SM4k==#|IIuUhEiYt=8Z1t6x}Y5`3J0 z=6e^=`eo@lNa%o0`{rWpv!?&`Z($^CcfU zMeEm%=B@Lm^{E}2haR!_=dnE`f<{? z`LV$?PyO(QY3qDuaKqnyCd}AmXzcLPpYa;!_|tfmCzsowoMF#HJBy3uX}x$09o1fh z{ZC%jR`iEwY8maG=dtH8k8XRunIGt##LY$*Mf{-6k36kU=LzrP`opL0ef7=rpZR{c zVC%6T{f|3dhtH0GhO)TW{)g`$bM=Ex>~u|E&1n+BHr<$CTjIs`A0Jip*F3(*H=UgF zf|^Gh^cqiX{FUEV^1Tx@ z9g}?X&mEnIAGE%e&x7z`gFXD=ps)S>>8TSA3kOeq`^OVvBtPibY`|xgb-0~7XeS+kH%nM%g)35S*u*Oea z9ky0nd)(G{?XY}n4)DxnhsHwuD%sNeJEzSc(t2T!~uWZ{0jXC%1q;T zrB&FjTo=cmGXAguMv^Y>Y<(Y^lp5S_|Jd~!J=5phx}MZWK~Ua!Y%pYvX` z+B5U1-|w^jSkL`>y`Hbv`}KbAwbx#I?X}k?4*JBO>Ie_C*C+jIzU5(qDK8n1x+%6+ z*yMmW&O7np)(XE};ogG|JKt-bef}NE%Ek6S@2I){nei1RYn1-$XdQU%1=~^op(0YN z|4hka9;OF%-PpMC62Dj5@;h0%q8{C!#{nP0w(-hZQCA&#%pOGTBqQBOy zT>spbk-EnRVxuGI-B>^K1mS`3(>S)pweI5jL*v!+-ua1x#)nP!c>Rnyemr&A|MR&E zwEj9-xuo+JZ+GpTJmz~_>$4$#>Vd6s{bit5|MdL7*0I+?pN_}*|L4W_PLOe|59OPG zWGoLqXn9)CgIdSzh4i;p{o8Mz`pPG#wpP9E@ee+F?(N9*h z5U-qn%O!z%fl>c*q>BDd_*_>A-*j(iJmLMHzy9zuZ~Vta8=Tz^SAOmtU)g2ukm<$# zzoYX4$Lsaa%vVuZ-TjT(rJ8ZSt-o8LzUh_=6FsB;uJBa!*E;0ExBXvUWW~IM*W6J2 z@Im7#AAc`Q^SqP#=Eqj-RWGdlwq?(|{))51+B;vp!uls)>ou?T{M+uj|L@Pr#dK<@ zt@&bgpO5BbjVJn8-+c_9L>D!eX^643-ySD-bp;Q@uzuKmwq)LUYb|u zXS_NLvvz;^!n?=kp?S&kU)|z>v)R7v_bb!;W&W{T1 z9)Ig4_uMp09o~QK{k2mLm-&h1{SVi$`8LGQntZdJJgRK}x%lBT->84Ph*b1<${!T0 z=#*>RDJriiU+3HPDwfx$JVo zfy|>ePwNr4dW+}})1LZB{^+*92^-FzHhu5$!_>3Szx`Rcm>v_$FJOLb^UXER^Pi%V zIJ#M%4b9Uy^N#wLBieWx_X)zU?L+xZMfjlgq5b0Tg=wDk#k_0!tg9O)KXBdu9DC(0 zVU7Nc1~2}@KGdsy|814~A7}cMYIV1B`pa~&dV1Xmas0dw&|lPlph&g)uCPYcpD#9i ztH=C7Tl?EL?5 zRxbAbyRjbswABwL<482>rvBRRU&zxsQUAdrx1v9F)|ASoM=rL#4&2zd@e)6Mf$%`v z|8&OIgYUJTA@fN5)T_g=(=HD@yz&_vgk2|OfBW5|cT&eQ6)! z9i_kOX1s>xbp+`z>OZ&e)asvp|J#;)>(hyC-~T?KarHBf^`ZTmf4=asQ?xv-=RvJw z_Cor@+Aqzx`GfPmAJ*Mw<$29L&!WEV|3|WNvGdPuwG$L?)Xnio(CYF1ug2N$5XX-C z&nq(P^k+Lq7xGg5;icD$T3+AS{_yug>z91yliKRM=iio$B~9tM1xsA9nq}+W66rx>5h3EG`xOUEz(NqtpAZ=B2y){i73` zdZ17IsgCeKn_pd?`l)O67SSIz*nh92}o^%Qv0!v3}~+q4lTr>v*X@&8yf>`oN#&r*>H9 zxjFCZ`{mh^)!l#C?~jHuH=FYMeTt*LaitzuNY?X$zCSqW;5$$MkoFHL9=e z#fG2iUpH^>lDuSI)B~-q^2|@3)}t=tnoYchPrb0&A5L9*lVcALGjIFrogdkMCi$lS zP*yIB`u!Jm=!^ScW2bTE8}%P42CUPc=Z*fc4PB8P^BaQy=g+_WY*0^KJK&39cxsqr4<4;j}&5w@d z*X5bNv0%Q%({Y^enWx7F;bVj951V}CCu@HCmeazFt*?I1uGefz9sB$>n3ap||DFE+ zsH1*GvAWwGb-a^^oqwaBT>QpsiyieZ85RAV@`nUX5Bw=AuX)i}zU9$}qWSw5%(pn> zkY8snq(8jveXo4?=<{ZVZ6A8Spm;q_ei`y*jR{c|1(dM8-f|Es@x z{Bukhsp!vq!6B(jUxpety_AOydZ+ndk_RtE+g|7+$@kcbX?|*lcb+!?8_R4mE$sWN zZBB@Pz(ale{5z7BOXc{#p?*;N5pQ>G^;T8BQ*0}09?wJE{>zHB`u9s7^Ps*V^_9nY zT=<~zb^O$ioua3{=9!H@!8AX$L*thD6V5yS=CH|s9lgUtZ+x11^v(7R=3iRy?_X>_ zI)0{$)#Lheji=iy`BDG8NUi9vc}1*PEWfR`>g}D_%o83q$aoR&2a(Vsl62j8o0nDF@A2OngfApTxhHx6|yu6^~Y7dHCsYQJA{)`?-0CGULj!#mx` z_G0%R>tyA^=llE-{r|gu^@Gei7e8z7$#SwXR}* zZg~TOXV`8*VRiQlX6rcoZqYl5m`+K5QUCseQL8`o^!gHe{pDKM38Gv2f6e&Q@$vV< zG|%gJ=Eqj-RWD5b_N=py*!bw+R=n(6Cw_Jw_3Zb*Nj=;DB!0%vlEo{}?iY;SU|SOC zGwMH3cxv_U6wy)Je_on)+(jScH}{e_^+DrlANYmct&bl}^SqM`^J6QLSBGKBA0`*~ zoext#xcAkE-}^Fk?D)T<^}&_P%YU{>w>x1iVEcCxsr~=Es{VsT2KsqdSkZRHI-Y;i z>!_Iz`%6Pt(t3X1%RE8qfi}N})bm;wzS;jve`}4Mf4IXTpI@`J#yh{b{qLUMi28Q^ ze<&*#(_?GB{wEn%Sk<*x9=cIKoged#+yC66ijAlFUXkx{;G6E~)EATELLBt2@OJ&) z6;}6o^{%i+#q-j%?~lAwdl8ZG>#)@~{kmt)x_6yc--QznIpqy!F^--8>DO_=3Htdr zS3jupMqi1>`KjOTKj0ReM}JZOc?F|Z{~3}GzoNez8#i9!r!Np5=#|fLhxwT&aj?cu zULA&mF8a%+=ly)CaNxa9{_*zN6BxJd_}@=h%qG%Cw(xUA1GpgPp0b$!~kDKv7 zf2&@9*vQJo`dOj;{rfre4GOEf#t)(!w$)4K*BW=MjMp!8Wi0)Y)9>m>#*-1)bQ$L7(`o4;>#KX#LgYsh_%5ZxQ`r|J82E zr(C~vn0?tnr#*gegF3b6-{kLiCcp!;nuY$FsvG>e`9=N98C3M=d1}i*rjy#0_h0qb z-Otea(7M#?g=rr1Gk@8A+wW@Xf7Nj`-{KD?fP*`S$)hqV>VH+KsU&R`+>NZLWi|KFROXI*L*MyztcN z&wV^hkCGkvpo__IAr5+1c)Nb&TD>H0d9EyH$9s$G4?BJGk^*M7zEcK3P04_bZlon)9FTammv44WPO#I5ZM?g*Rxe5*IT{K3<`=Gp##AS)M+r~SI+ z|DXQGUhfTwv!;*K=65T^ZTsp7lm4Rqr3d}Ilep#eiw)oEsaL!I z-+VE@{~e!|i}l$UYtv`cf1rp|^mm0f`ZW)})l2k;$L}uTgJ~ZAUg%xXMeHLT&z0`( z`+?fM>V?f_zV|y9eZ3PlfBd?8A71%PuX(0_TkC@3^!H!6`avglx~5O=8{n0PKgFp3 zU{Rw^f4)a%UUmB);vnOuHh!Kji!{9d^C! z@V$2U*a&*q`TvgA12<5cx{BhhuDuf%rdVM(W zM}3fag7|wOd_z|b-+g`(yH~xi;e9)OcUtS>u+hFRyuQ_AYkAG*eG41R8+!hyot2B} z*bqBi)2IFajuQrcL(Px+&np;p`Ztxwyx49H`=mZ`(8h5RH@l9n<50J@PFfEe)cPJ9 zRDWx&w^nTAw)RU6DQNbweL@eC_K$t$)~V>XVo5`_j!}yK{Efy7Pv)-W6TOc^)X}-B`?r z)K2;rZ*}dR_|VO^yPEk${pS}M75y1+Na~rcx!9(^Q+r6Td5Iu=(E8B2=I>uH-{SNE zI^kPAY%t{|{xId;KRe^`16~SKzB>2Y`#o*%>wNy=yaQ-`XrHDN<20ln*7Q~T|JUT@ zk`eVEE*@E3S6I<@#X4>mZ9(fp$74HXJmR2tNgm@W!UN%_^{_3jbr;poIN@i1d|=zd z{`^q*&7&tZ-}IL5{of9ee(iJDfZAK=|6en%qC~rEuYUSMH|$g|_)-6nqDn=7^qN~l zN9{)%Vp~0@_FTc{$%4!mv_2C3c^$z^G0pQ%>YE=Mq;G6c{o&eYw?FLIUk!&F?zws5 zIjiN=vG?EptXxcw)qDK@QR_?AsO>hr|4Ccx=7P+#HSW05VdFW8TVHC!M=!%9ZoI@# zJrEvf&!_5Sezm>UGh9r6n04c8d$0FU9=3U4{F68TazE+CORR`jr*L<_ltB&8+x}fG;zt|x4u)!YwaN2_p zzS6#7cKFofXD;1rMxWO_yZ`OntXw>}i(dcf)UPO3bxps`>B>+0>4+a)qW-S%RP@(8 z?jr%;bkB>d?(2;>=v@&Vwcnx-MR=fX|LvMQCw>o74;$>^4?FIp1!(^Eg83GwKInu` z9ghvByu=?a9{Tbjd(Z3-H=c3Nn-4pryZv+jt43BX%rC#7{QcK5w%uu5Sl#W7{+Z18sgu-14-Z2bm8x*ux*r z+W6SxcK^iR!uelY`q`UChN*A+|ADMr>^Nwe?tdEUR}?GirsJh{<@_`G%4fb&|8|jE ztA82o>Pw!XlabL8^ogH7L3kj3u;SNx)P-j@aps4uNWHM>^mArk`r7GX^V8q->WAKR z8~L{XnV*$Q<-T`X2fnbnYy4nSb+bO{OObw}{+*&qMSta`=QDfNqb_}zT{C_qUUoC; z`6kcBCgb8=vF>hn-WAru`@>ngy|TigoArgW?z{1V&+b0#U6CC>=WzVrFShA1q5SI!65mib$>gnrC|C1v~Qd61v-?^0@C4e9$}PGrroy>kxemYwGxgtQXd6WLL!- zUxoEQe84p~AG#ZLYd?RjQrv$2D-WWVVIy1MLLVHrW1C*gKbQ2V|6uW`PJhm0qYwS2 zNF4O8sJejth>i~rwD)uRb7i`{zj@3!MOyZ)*_ zD;LvaRk;D2>6dGq@fF1zbyS=C24h#|*N}{;|J>q{jpqt0+OAl~{bKt2i@bDKwtsX4 zy%V09#hYqH9&pNzu9rmKvpi^Ddq_#<4Cls z8}-xqG4H7Vydt+&|F%?yZ_i^x{N9a?8!z!&AKEuOkUH?puY8MZU3d%Yr(PJEdT{?G z#|?%T9({Ll9TxMc-TzEo@cmD+WU)T9-)36~l|GaCME!>frs+?dI#Z<%{nIaqKSl47 zyrgc0-w=LUx6;4Xt@EdKQ@b_kt}Ty!$3YWhv{BpZ{yKhtf~)KAzmopQ2fdR=V6!E5 zI==OtOFZg7zj#!u{|dsRj=leM#`=ka-i?jhxP6I#-dLM_(DJmN2eppb>L;%b!>Vt7 z%ei0>w9ca{b9}by|q04S^KI# z{dBw5e0a6rzfaQh|Bb9%c)o0W#shUc?i*&jd~81RjQWohky`!dOCEgmvg3coRbKP5 zv3$#O;`d;#cE2FJ6s>>8Ns)SW{v;kIEU}b-bk()O#5Z5I;x+rf;5BZ?Kb@>xY<}a* z|G&5=-R^`Hb<=UpcJip%5cFzSo;QuR#vNZ0>-6V$+~|__!T7{M#x*<92R~TpkG{#n z1{c#G_CMx;=f3{fd&7a39`3%f?Dy%@j(^%&x!86#POraes9#a6f1!VR|5ft)B$IhZ z{Yy`+{`yV@zoNezThDljpXUjL2U^{>)bpAT-|YXTKkWa?sh@ZyObN4_v$op#U58WG z_CEtzx!7^&7W(zZ@!G*oFj;SU^=e(I}7Gpocf>>zSYA9 zQ(oc^2TZy1xD%R}hXb}-GEDmZZPc;r5Bs$~I9{*6>Zl)NoQC*WTfKFapYVu#C-JC% zUOcMk?+R}`F4Nx;n;trzJ?~ndIOtuGz4+%JtPcIVvRvlD8b5h;7*1L5tDCKR;@iSU zC%^Kii$zOX^{huI@X9edfncZYrFX3xLp$Ys37?fmmV zRxb4A8_DnAl`pKQo7Od3Q<_V})<;BV|^1eBz*Yv1nXU+q=RW z^O8QSZd>A(r}Ohp{Dw7t^6D^N|JHZ+)w3Q6J6?6b6<3{bKl<7IPjr0iXR_{p7+=Sg zZmMgmH%Ve?AJoMk_3tm375$y$^FJE!z4}um4iZmo`s01f;*G3a@RM(Rr*TkqFy2D? z!&O7q?>P9mGsB%9S#GOSzTT$3o&Rs^IAG21ugL=?tGhim(|(fqG4H7VK#^O~zpwDd z{l6E4@158w5(kN=HvKZ5#dSRPr{=F2kNOp)PEB6C|MtWSul@e=Vbbz%edqLd43KZ% zKMrN(!t2wg*Z*sMVRhFwuS7TRgk$ksY~~mBA1plR;ay=x+ZF4m{|svFuZ~g`+=l=-qF57VYe*gg$iksb5fg3TaEebB~N-SGFqG|xM!Z+>jW zUiHGX&+l~Kr^wU$RDRx4QqgK2>*g zqR*)Rydt-vzt(Rm!ng5r8PPjIc+H=P4Ii{V(t2|j%&+7*;dxN&n7xqxu+dhxzqahs zCx(sJ_|skooOlKKcK&lDD;K`cMz6}e8DCMdqK<0Y@86x)Yb%nccGQ2Uh}7xN`(Z^7 z>VTc1O59)VAqXF&FA%@5yT?iRLHLGFGAg$E$*aRK^RN^4d2HE3!_4m;@ksur*@Uq$Ju25GF#;ibo`mZyC& zKK@<^-;nXBhn@6US2x@~W1EML{Zk(9`N-Wfj$N(oHP1eO4QJ)T=N|9N`%gY!+5R<8 z*xfaKR<6HNy@~UDM*WA2?27(6KlUrk$MTtf!fW0ty}$?4Jo67Mm~U~$al-T9V*0}_ z$G*PtjJq0PmzAzLded9F*I&I?>*r%JzpwoNqfV;7u%d2`M}o|=^7$(t8~0A^sQ*au zs8;{gg=c!?Vp~6MY}|N>pT0nNpw(-Stq0$0J;TM-f6w{PuNVg28xDH^`X73_?*Dtla8;gs){of(^=nAHFt)ICI z=GW#q;oH2h!5+^`nEk}l7oNDuQQ_dTzPQ?1d!0rd`}|8C#eCoL{Lh8fZ=Q`=$j9lO zJhJmYYU5|W!~CNDWyiJK|C&+{zST+U5(nX>HvRJcVey8HL)`qyIMjt_w(`iU!*JDm ze=vR00R!R6KfL=LYiu|~zU}|n4#DZ=_wTddftfD!k=pzY7e4caPk&K=SL9aoFTF)X z7Vi|>iuBhfHuV?>8)RG%e=me@NFO?`$5!lBFRXp@D$|a;c`&Sf>SdSyV9uG;weKGX zvvRTh&!(z>L;awRFW&CjJ9%8$|Fp%=c)Sjx{$<6A{#xJoBS}7S9glgZ`z!obNAvLa z!Zgq8c;?4W`lzcL*1vGax9|S$!^1}3Ir_nmT|1L{cK#=+XZIg#WaYy4MV@%OYy6e} zAJ_b}uc&`sWcSja`)a5ALB^wQdcQdQrq}=9`cM7aHUE5W!#9TwR-EzZj9m_*fBXHl zi(LouW*w{_P^#{d{V#znXe!zh?JI9s0x%T3zLtKgsiMY#pt+f@e8ZE zHa+xvbMJ&>&!6g#9rYh982>i?AJ*}lAbQdlX#RO)dH6y2UhDNl=9lu-AHsRh?ES0z zz7fK&4?5?l7oVq|>EFr9#a`b%{(nK(-8KDI&VM$vF8!$;^`BcrD*C&^8~yM{)OHI- z^x}Vju|cb&_3-yX&v^7vXRE(gy|C_+Q)gWDmA{3lNBq~1AN$=Wyyo%!3;Sn~db}PC zH7`pRul}YYw&gL8JS&%||GeTsMStb-KdRP;E7Q9BAGL{tKJjDQ^DvN=i}4#`6E{Eo zI*o&i=?_QkbN|I(-)epsc*99^Zv5_!UhCQC-{GuW?EL3;<xcu_4_0QHj{QOyc=)>`k)z^Bt{QT?i zw7;xoAs^kSpXM{~sQ++Ls!o5l1Nc0D_^p1Ys1m>b((j$&gT_nrm#psb6?`zw^G-6% zkF7{v9ft8MF8_lYo;okAbjE$t|Jd5oYaajq$bKKBj?KHBjakUYiEZD1If+<4+lSVP z`i~ThivIA1B;WeT#kTF=sXZjP$oIeMNFMc5G@a?k3E%wKV9HDUVf|fK+C%jzzybhRWYuty*Kt+G2d`0*^ z@zX#4^#779k3JOf_d@EX<5X}+^|%+d`@`oi4(pA4?cu@ouBE>1f0918EiXU( z%aCoq?W|nLqYv>${Wafyf9fQH?!NWDe$hs_S(V6}PU8)36MpLl)S<%g+n`j2Gg((C#MVRiLRZM*-ElL&e<9(L5f zWK{Imyymu&Z+bMe9;iIxpilfZuIdI4q#t< zy7zxOI4hURcFy(B%vVv^-8FrCCq8sDe=dHr+hRxk%Ze5K`;_klO^;mtpi^71xrW&A zK^s@=;_roNp4V~AkFD6NUYNMvP4^u3+O}cU8PBYD?d~(EW5@sfS-F@V|MB||d?+sl zwDDYFjd~wR^s?=vQ_{NoJLNTR7k{dQcgjz3uJ}_vHv0H9PsdwSe{i?wpPe}S3t_EI zUV6(mJ3U2xJN_Ta%BAwVlk|B>$4~6$hFZrdHWcBh9k>5>5vkRGd&x6Br~|sOapNU^ z<_W?B|I_;)#%Yba;v)}?EdAfDaSzU4Q$87fJP@}3 z${!wo?z7$NANc+INLDVD_h0%`6js!&!6VJ<6SV6e)HdA`JL=zGWYp?EQ|g&c4Y3*5 zyRmWOC4SRY$A<^n_OJELuYATaTugsB#hw4A6-Q1CAHQww=eE48LtnQ4&$Du&AN&7n z?aSt^;|sgH_D&u*RX6HWk9_7A^&coQYW4qjU4P|@QQ|o0mX~Jt_c3j;Z98#l!^Z|| z{LHHk!(j(K{`;3dIS>vzdXI3^s(^;9?W=UKTB2c!P;ia@RYYfGN>(GZ*cn0I62#!LK+2f_nw`)`k} z2j6Qw!$s9!`K?v2e0cfu?>MP7>Bx8N@xs?XKt0?4k7VV7KbOlt+oq4!5mt9?eQG?t zZ>P<(Ejidx|DnQT^<80gx6|}`o^%{+>Ve)BRTg3Y(-3}I58FG9gF4Se^@qz3JfykG zFU}0tKKRyCHaNL^{oA2hzmwof<^KN|+dguMv!*|@ojhtXAGK|JrEkTk|NJ7OqQB;G zA34)CFW8Zvmtgx{;-GgDH#->@KWOzDW9u?M>KRhcjpb9X4#UZpX5+te(Ko^spV{)Y z*`JzEeLMe2T}7!L=bMaU{u&;-Q9qp@{YCwUi&_=^wQh448412Uj}7sAH#Tm(#BY6Q zU3j3y>+;m^L9JtU4?p9CZJs*)r9EyvKlp9WI_#5k9%5Yk{Z}U|7Z1AR_rFf!>DsFw zY;GqE>KiJLexm*(1+${R@|sG7@6~Tur>F8NdFa;3%EjVsvFX$N@SVoN#q@`d9rVP` zE1!Bt_+&VB$zOf=La+5~`ybBA#lHXDOznIuiq(Bxq&B*FClNcYrte(*t#KbNAJpov z-+!7O4Xp<{v5A8|@nfSe{TRP3b?o&;pP<&O)Jgo*tHW^kXC`lc?}TaL$aQ9(y4)+> z|G(z95K6#`M<~F7uQsVM|G>~llevc8%3>J{dGR@ z>C4`qG~NV%ESt*lx>bN<>Uh}yAkmuiUaqqvkPF60|H_S`7ZI@C1PT{H3pW|5i=l$Ez71`bG z+<2~Z+c>6|K5y_qn{N`gJgw)!^!Z;%e`~_QAKGQdYhG+k`1QEW=iGN4^-TYftX%9o zXj^SJ7RBnWy_3fvx>^6!(Y~2?)W5&Ttkd7mEuvk0t)A8~d!F!`rwLLYG+r`3uS<9- zdMEWPzs_Drf7s+NM;vwR#I?fAowvX1@DH9!UCzJJ513zFUjK6@HYi<1-E=;wZPy>t zS1t^;Gv*ofA1ERf{k3j-9?7=*hSqa})B}Cur%wXk-Mco{a1hJbNt4*JIVmHK&`VSV5YV}X< zqeZ_dq9f>CQMLYc0}o`opsxAp-<9QTzVQ4a`oks9eR7{;G8e8malIp-d*X2NP5)Aj zs0WxY=Kp`C_n&EN|I8;BJL*5TcvPo9eV873!H)dAgsvXiZ&Q9wZ~J}AV_Ze^H>3`B zil#I9PRC8{h4hCB&u+bbW8>q(r0*X5{6&B4UjMeA*6obNT>k!I8K3yAFYVuKC;9wu zPU>enW1dm}c|}G=f9k9x_2Aonf^n5c9Q29b`cU2Af%bg1WjyjNPwVyaQ?CxgAD5eS z?(RG0;jim&^toC4+j#}s{uy8Amp`rdUs9WXq|54>{y1+BKb;@_Mg510Ds}p&*9Tg? zz9K8;fy1SlHLnt+K4?7Uo4n74@1J<*ujYospStg>({JC4 z?a8kHY3O*0`6tdPZh$h#c59f7BUw=o9hd81v9143RxZph>Oa45RP@*S&GuNnJ&(Eg zy%VG!Xyegmg784&C-qW4<1MP6al-1)xb^;cwZgP7FL&y3UpbUIw*Seqa$z36uYCVquQG4dS=imR)k}Z>;)KKd2z^HVM~X;A zf345B@Uac4+mQNv?*kvCFA#q(gm1`r^oOn3t6sS9^S^!IzEAaqYmaz(+bIv*{?opH zDbYP2UcJG@cv`>{~VO@(aSS# zr^Zjb7c$=(URd)>`HIhPzg(Dd`D16T_K)+u^6dBTc~&mn?f+k&e-?IpzMMP)`5h28 zdRDjp6U&b4^e5kR%CmB@{+!wk!R8b}_@Iri`rz+{X`Xjd-~8B$z3PQ=hn@K5`P*C) zRygV7FFy4CL0-qEzs>`Heo)Q%w`5%Ds{1^kgNK(31KsE|ZvQ2tqQB-EizB5-1lMlS+n4RZ*kG|~tuRJRk zUXOWs_w}zs>!@E*tf*Vg zBSEY8wz0P9h7Ot+_3sqSivF(fM*mO=c0Pyk44uT`{h$B;@lDnl8QOQvrNSof-S2(F z8-3kt|91bej?PEXu76;A0}~x3E9&Ls5%;0x`QY`!^T75I^)D;d>0kN(5v|WSAoal1 zk8SirN z;trG(Z2MK+((xrL>Z0?r^Uvt$v@Ui-?5O`x@u*IJ{zsMm*>4$YJ>sBuMb%Ny8{^jT zwIBMoI^?;soW0)R`9<`H_rEZ%`QtlB!tn<`GT@i&>UBs`U+d=O?avRGR?%Pkm8|ZX zzR(Rj)eAf7KflPX=-?Gx z;Z=0%h~GOw;$Z5>wto7@=B52v9qq&No#a^^Y_JDE>@>2+q`zLXOK5NVi%(s+U-$j@ z-CB1f!M56su_#toZ?l~|s{H<)I(-snzES^?!c(jN+QK(Il0Li}8#i9!H@$Rxcp$Gw z_Pge%U#;iC#q_r(|JVC}c)}Mx)mnYUn?82MW3SSe?f*xza-kpl{FRI=tm@k8>F;*2 z(L0@AYuragU>$yc5SHQ)Y!aP|vF z-uLn@e?w}2J_@tj|| zJpLIEk8y=nT@$Yy|2Kq7ocTrlT@k6%pX~#_FWC!t-nIU}{rgA8q2)^W*u5`sYQJI{i5gGhL|z>Ui)L`TBFsr+$joKjXNf z+wSw^U12TY_2^1>W&a!=IOeI-AOFO>Fud%;7q9X2*4fm`bTzvt>#6stR~ z+1jUl|482r&BxX{QU7)ksp#(tZ}iU-L>HbnLnm=~&C|#7Est>(&0mh*{hWp${I~0W z$tPZ1a_H3IaM7{NUmo(TJwJB-e?;d4_PGCscfx0l-CRdJ)_=ZWJL=ylB6a%nI}X#s z73}WwPJQB_cLl4ft9OMpDz727@ztiDcM|`1V>s)~2d;eai}S)~pPX{-l{-8{eLMft z(Q(1?YUc^|sjVnhoS)k0=AH7=`KcZC?=Kj&`Zp!tbjroM7E`YPkv_5Y2mT<9bJkzRk8_=VNgn|WD1)h+c?m-$8g z2Z~yC`WLZc;W}um$NUwk$941AV48=&7p8gM72S-Qw<}HCkMyb++?6ZddGWJT!kTAX zy~YdQnvWiK{NL7kin*NroZvg^2ODCiYwyHU`Tv(({Aqtt|G^?s(Z5gl{EmP;+wOCT zqYrTqe;vXDt**}7{Mv_iIv%)~{?I!4^qoKS@qd zquAVC>m+{f#LxVq{&NdYt^Vtc<+HsQYJKW~-X(cSAMn7sdiMNjJ?bv5-+uqH&idEw zdh}kqP{)ox2c&O~=lp8r?|(10&jW3Vv!-veb)HStjr#D+&Lt!2Kd*RHr@ydbd{7=) zzK(-!I?fdyHV6;I-wVBydgPgnKfzx0!m9uL^QuQ|@r$tP>*K$B+Gn1lp1uDzvT{MM z{DSiS|I1(->imV>U3({wE7zYWKmFVv^&cuSD*C&^8^3?tRFHXjC$^#1B@Pl#ZPT$| z>R7y?b(4D3`FCO1ZHe`6fBmuJL;HcHcbn4+)UEyf?|Q437a*GM+NX5eUE930f4lzH z3CHp^jve)%Uoa~AQzyBeGaUyT9YODks-uoS8CMY=NIz*kZ10L{u}@d5BT~xvPcN4x$f*YOnX{$r|_y$@(T@+GUgeQ@2ZPk8D4qW&XAZms^C7tyZ1cAS!zZrkqA z3-nInW}|~5e$eKX#F-!U45_E%EUZ85eC-jN9=qN6u*;jiwfxb`+)aJk{|{y5!u4d< zUqk(hVs+Qv$z!XR+~>sVbcBc98h29ZsOay6mwumK(Ld3Px?c0mMz1=4F5}UEB~R;l zkb2l)4}aM6`susBdcpZ&uhrJr;)L&yXFNOp9LdV1a{Wm|{fc6B*VF-6pXbryc~&m;Z#?>U8VBqA#A`4NPW$T?+r6g|&bjZQ ze`GfgdF9o<|57K<%7wnIPwhjZ-S-2ITfCD5t!ZYHPiIMRZWRxsUka zgK591A3Mb~&%0z?Y{fJ`wZr7ScAUQV_~EeHBcHhL^_k!BnrEMXJ6XA~pZ1gf_4RM` zCt0KR|4!?Gx!Mgu`ilCO9(49jdElVf@ENay%4;4!mS2}={-huLU}YTo>XUIiHrT@- znqT~%>)-I#4Z?O$erokky|xs`U8es)RxbAWYc2i#ORj!JvATMjt>ag||8l~x`Q&0p z{mVzS`VR@u^iaL%+dGN#e$~8iEZ_3bMbZ471@kSQ&bSnlN8!peXD$)3O1?0K*G+<(l@vkYq8_4NEtt~ThzPS;-j&9xKQJkKU_F{tsfmR z=fp7QZ5O=p;RCz(U)^8&vf~t;pZ))=+Keyk>Y8|SWr^9m+iG_NnP1d@pzze`&-)O3 ztC!BdtP;-`9adE9eqO7~Grue8_Bk0p=rwNR*4g-bVA$f3@4Y(iz?Z|8yFaz}$DhC7 zE8ng^8O+MXUI$b3{y*pkY94(_R@71bmCwHoVZaBY{^f`j{riN^b#$gju601=5eK~! zp4rM{T>PMIcXfH{_aODK!5;o_=wbJ6I``16!=d9F$1SyF_xGoq{~656#q^jYH=r>) z*SJ&E@xKKex!W`mV4>)tMx=>C_P0_Cs!L+<1xK`p|yif$Voc^DEzL zJ;TNHw^n}7r$6_^i^sQCzIo0=Zy5S2ec9))k*r)Q*PnFMuj7hUU0c2M{ts=KQ7}Y?qY__3E&->9y}WYqu@t zv^MpZ%sTD;2T|XS|3{=w<@|%z_lm-*u8E@?_0##WokjhJiYm4GuUJI8`*W+u{1y4W zv-t<%dnbqvX`cCYzSt>Rp4Ovooj=V_ZM*-^Q8RbG>6sH>Wjiwc+tN4s{O7;_J!yV1 zVx9h#`!6-LKhUXNR*CPwmx`Uv+w|%i8)w{-TVI^l$8tpfcZ%I{yP`jka2R!W9^+h>Usb3w(#QiKU_Srx~{OI?TU5O zv)BG#`#>+yyCS>(bpsEqygr#9=*n`|*W&8mTH&8>nf0^p{k^r~NA^4W&^J5k*!3sF zIu6(>zkhMOP7Jj#VRzS7Z=%+r|9ruA)PJOiRP@(8zDF~i8d?W*ViO1H!)x4ZtqTw2 z^$ySc%C|acJ#4TCzcum3nawGO{h&4Rx0kQ`>Fg(rS9$*($;!pgTJZf(BpP+o@yvGe zsPg|`ZSlirzO8X5mw`I{jX#{^Gfzd+OL^F!)k*lo>oCppF6jeXv91pO(75N>kN*&{>Wc^*E{LJzJF|s%{=Y?lZjtg z-TicG+vhLlooij}hS*VmS3IidS$d;?u!wf`vHhRsn?A~G_K)S)>1ckpV7|rsByQs< zzhW<>KeSf5{v5X&!s-&QU_l1PKm<@ZQQipyan?sc}{p9)H-G_q(4mi^PKHA`07bv z`rKQN-|)We$+!C-&d1U3*rvIRhTdERuS*^SQ*)j0wUs%;OJamJf z&X0LV{X0dKivG-dei0qD9n)8*bldaK>$Uk6@lzl4F5x%S?iYlYqV?aWe5aV&3+WGQ z&tB!lk4~8trrhz(8-IDlQq;HW9|p8OxSrmBl71D1^)I%6uK#peudPVmYDfM1i%3O( zr~CmybfMo0DzEnUM^5Va8l--T#;5O^{9ZijhUR`t9Ja(4;`Pt1%Z|GD;N!`&&tG{~ zE^M#)xuyQku)hR_6?JRyNb|H$yZ+XV^_y<$kNOW3jEerucWx0Kwf*O%Y47`1Z?5>8 zrwB4%(0E$c{P}|U7N-x;3E%2rgDEfZhp87YvEG^IoF1k=`KeQWxZ?nIYPbK1SF68XH)y=J_T3R=eo_Cy!c)=T36JY|;j>*BDvvnm6TkJL`oII3hw(JOV)yV< zuMWdrXTIT^*B^O**yHz)_usO_N2zb0e>+*Zpm%kmQAG-5gmMTXUf4~`VkQTrbq zkLS<3A{z6R*ZiT_)CUg&`4;b!IC4gWlHn%;{nCWtUiKi_5zApJe+V%*uuHF5c_;H^x;IR(H+3Ky<@4)O`Ah`VSXX z>hwo%_}GT%kQY^|zn8H*c#8OYq4iC^D~GZ7IghQ_t6rG;wq-stdFR2f{;b>I-uL0{ zsc*+W!&$l5_rF`{{g-m}gHG&pZS~fY2sX#9sU7tnDVTNoa~#XOtWIZaed3^Z5>IU# zzdwn$#U^ffsqR`Ap2r4z@WZV37e@~I@-88N`yR7C-s&UYj(!KZ#5GA?}3 z`p~-gdtsVq<0k##?^Q2MS?R^?uGrz9Vai(X|M}7{G{`gk8(F!aSAKnY{doi1knt5I zt9v|Bx&L0mr_ZRrD{^b~&x`1&_tCsGyPtpESUdf`l5tWU@%O?s&odtL!B*^5FRb?M zWgpvO_5oq_7yOL5`6<-nc;7x>b+U5tedXt`lPms)#M3qXR=$5xK6U6b>R(3c^sik1 z+19=~icXMvpilhR^oMQyJS!LC=I>L#8ykn`6F+%%7&iR+F;_3M_tRnHZ7(=^zu#}l z_FTLD59zq*rzqa)n!eD@JKel(Sewh4`|Nh!)mrM$)fApu!glBy)zZ8{M`}+&(fq7Og>GQxipm)k+JnTBG8J$hYr*b-wmGUp2Y^o{p0x3w7uh-LTX7 zVMqP@i&Ay^^SdJYxBXaNWObi!c+Ela!w0Po)yMogU+fgqJny8w`LRK*@3BGk2ltH$ zudjLiwPEd7FI{HZk3R^{_Wzx%TzFmfdjF*l$r`oYZm4x!f=Pc-|AB&0(ccx`_<42( zL3FY0q9cCBhu8eI`kkQVsqIsoo0SW^6shl>@C=iAEUZ6FU;f;+?)dmqVg2h~JbLG6 zW|C*W{~FP8z@2pa?^i$Ar?wMhO}~8q?lo@PEqM*ei24r}k1F~ruQ^qC=)&v9(2b27 zFY(hC2oFr3SM$@a*7M+E`oj+2-2T!JKGz?1yZQ^~@3U3+{}=Yp%Effb#kTujsZCs> z-PeC=H#ZOu^Cgeh74wVw&n=j>`oCT3!1qpUL)C#eNIbQzpMI%h@my@;=3i8uoz|(r zaN>t&{$S3Top93AryjEQ6W#xx<@XnJH6Prl`1<9f-6#3ydqMoe|r6STkCWbouFNR{`^>fMMv$&{6oUWPSN^xW9u%iKdk+ki{3r3>xE&R z*Cs7-!p=)m$M*k&Iu2O#`#WJp-E=-?J9$*O{zU!odA&#dhl-4f{>o2(PwLfgnCOXq z#LZS7{TzU^6bw(@Z9I04ii^8<>oVf zy*~9#|6J=U=5xyb|E>Q2g^uSHGhMJb4~cEq79xE{{YMHYDkX8#eO-8K*T)F4(EnKRy1>Wz5R{M|sW1HSPo%$NJFz%|A4jhaa>&t>+bqr&!0w zA7(uA(E}fPWqO#oP2c`Qt9Fn7_4c5iY(?7ZyU+WMDeOYzX_Xf%M z@Ic$X+A<#bmHBDi#r21CUV7}EYd)QY^ZqpRce4(kz&QMF-{wajE9(7!8rqklSlu;s z=&RXOzDuxgtR3|)E1I4Ozd2p_6`k_2b%=xBNj$abuX105w$@AfGrrdIV9Hxaf7s`b zTYunzFE+xv9_{<)_6K!8fARTuBrBK7_rD$WgUR?3jqis}9-$kyJ+I`og%|bDi$@jx z;SCDUbS&AC4<@|k(PR0R2T#%bZozzuCwX4^i~GEUX7{)0 z_b-@laq5Fk_*M@aOnHgFwaSqPuCe!(*R)nSd1$Y9f4BSpQ=ESo$jXJ+r{7e6|41E0 zVMX0^JhL@!*B>VREa3gOQ{>j^-_-jUm>zk-u5SOtLHhEpV0Zh(4-aH}OScdB-W641 z9apSJ)pMn3e(Kd>c<+NxwC|cc5)M0K&MGt3I?8K4_kXtUf#*wI>cFEPiB{Ld`QJM1 zy7|TJzrUzb(cek!W<${W$i=pLZfx9miQoFr?ExNWuc!9ddX}g4Jg9ZdRzLOXFzkKc zXI}Z;_um?3ANjQv!WC1fZ`c0}W#z*68Pu`TYfUXsQ*BbQLFz9 zsb_uUV*lIq*S_f2#;eg8o_E?0*n=Oo{`5mrKi{8+Z4N(t=$5;FK^@!fhqWF^UrBvo zRo7m5_W#TDLmjo5U(|oFs8Xjte)#r0Hj1p6=ZbYVpZ`&T4>C_M^<$?9-_VuA*?!k! zE2jCW-CE&|Q*T{$+}f=bc9=AM^Yc2?t^NK5-SVtlY`aq($(LxPMfT-k=r%VXDIuRg-t|9SG8=3o0iQgxJGhlc;DWdc;BRl05WMga@YmnV&qZM_tA>n|KYMdg1!dzu{YN z+U=@vr+;?&{I_$dWBZ@}tXxcw>H7T_^%aHHU3>Lgy|%=#)A?aX{pS}<8!xGYpZ-nv zhSrC#eRCWEAGALDBro-2r#?v}xt3S<8?Qre0+aGr9uZF`7_uM@3oYivboBqRD zx!C#thJOFjR==WHQMU$QA1NYM-ziVA*%q65@wzco9&yko{$Bdm)w4XUN8LsB(^r^v z_fKY?x7AXixzX}3Zn;c%|G!^WF1FndtF80*PHjc8x@+r4_2s_(=BG}gXKUQ4Ws!>h zS_eP80a#|D@MEJO4kFl}qLGZ@>CM zr*2NBzr4o&Nlfs_BtSUfuq}c#Knn@Idyr^lyIh%Pzay8$7>= z{xIpO6E^vEI5dSQjXAHL6o=RO@)oc-my9(_Der}q5M^zsK_gQicvj^7yDhgeaM zK6z9*|BP-LN3W=V8L|3K>wdCMCbeYe7AWYuj_3z%aLihi_dVT-Z$jXKNq*z_OQ=9+I^iCqC8~wG#ANB7P zkLvVqjuW2gk@SHM@S498zIRC;<0@Lcd1HC_LF!Y_ka|w?7uFw!Zn$mtTOV(PJJ0>y z3)>t#OrCxJJEHZ#Ef)O$7sBf5PaUxG|Cjar!)W5&*RP^r?9>1f3Z|}Rg#L;1b*kBKTn0(2%SGaHaFNVqYzxKp`|Kpp~MM%3y0&^-Nz6M%>hz7Z zqyB@1r=q_T9@p`j9_sfA5(kN=HvMs)(c(#e=1<0O73EHuEuFUsf(= zJGE>4)T_fVu*TNcK0IeAd~CBv@7XXs!g!oFvE#aqjFZ1s-v6+l{kEa@E3EF?J9&(5 z*tWl>?_79M|Dobht^R8ZkNH>~9pAgLapNU^<_W?BsbkMC_32mZ8E-NDVVCLOo%`V> zj|#io_Q0F>JYqKWc>lHW^Q>I_xa9tmPW-HykJ%cp9RI7I`pi4(KflPW=+FEHifDKL zV$Xl4blc|*_Lt2g#7}*YK0))l1@kRVAE5H78q|Bbtr`^C?GPaQk|HmH4oTkG#Hoca~T z>aM-|LA(A_=fif-cs##R|B<3fMStpPAM|f^@**qdQD1q@e(}Qxji-F`y9M(tPW?L6 z{ItHu4{Cl`|E9jXb`Nh38&1CTx$FIJ;BoxJ^=Y8>(Vvw|y4|6NL`QA^PCVp;^oiXX z_c5tUea(YMefZdh@bj`v#_#^%gH}&%YPF6OMb~x9f%tpZ)Wr8?QRoJL!P!Kb>EGg=Fipc2r|E@f1~hJ^moFm zoF`!3V8UyDNb;x;S|6Hc{(QlFi&H-xFSYA<_!Yy9>ppn-4QK8WHb3j4)%JOPj#q!( z@n?Q@dH=mLZF|eJF^hL&OnX19eE+Dtbo+|>=S78D{fCNZcl%EJFkOdATDHjFKXxP! zy;HRMsotsWq>k~iLFI?WpMLj__g}Mm*z`|d`u#tje${K-?teI#l?&UQpW5U4H~Ijn z_jav=zp-FD>fbIR75z0&vPQ*AcC~kzxri9rpHF*{fB4S zxH`VDy6ZIF%q3>}*Vs}2fr4q{xx(sh*Zhv&^vKI__xE2;Y^V(Q!R&u~^Q_C5SodisQ=u8S5sM`*-;`ZTo%nWS*e;^Ri5ZZ*lqno$#sSvB8v=_=Elb+FRf8-M`%R z68U!g*~rR;?X~Xr*W|&Itgaq*{y*)fEllPY^`BRGD*8L&HQyl!pYaTpM;!Et-}KUc z;DJ^ziCdo5qwb>m8K>2kpR)G4Pps5h;qvR|ziGu0#X zs7AJa|qcs`>3!$n3#f6Z@hUqnak{}~_Ddguju zS7eRKW86A^Tln;Ab;xrX2N%&#z1D>3OW&~PFE(#Y_}#tVX?}TYuk}p-5gi9?tF1h* zDBkWGKmD`c#8IN&XRxZ8#)T_hr zW&eqtc6_Bjd}GJEZ(L(%$9(MjuUyAb{LkNiX`mdiqQA~NJ)h6(#&BWvN5>v~^guss z@5(mq^Y)_psTYpE><@3h?%@-|(Wk65;mLLGXCC(bSFYnU1oQIr+ov4c3;Pj?R@dH% zkN?53K6CM#t-RK_fl`r*{u<|dWcsIHL*)?%ed4!q>-2AD<&yHu)_NY)I%cb%@$0bF z_m%Y?JmPEPTH_|Wi*7x3UGnYo*GN__cK(6B8tMo8#7@`r+gxAy%CqM$v7`R3U{>^3 zUb7{2m=Djtq4J1>^ihZKK<{K6_~xfy;^1QX!=zhodFSk-z7{6`a^T7bj@#C2Jv;x| z&dSB~TFCn!QiuN04Lgmq{YL#8#efz4UEz&>ohR4Vn(lcSwm5om9s(Pr4v4=O!Z&nd z$Mx8Xz3K(`$i6o|b=0e2%J*-*CH??_y0!ZE>F+N}m7*S?>LT5A*IxVRb^jmw{C~~8 zf1qVk{r|s{(ht=gW|(1&#SDf)XEGQ+n%V2jx{M!jOvuma_>o3Mm;9JA#+67yHzXvk zlHX}mew5-Si6X;WQK>GyMM5_rq57`9*6Z=w_pUv=Gjrd4-tX`JV?WR9^?JTu>$RR| zpMCb(XP&ou1 z#jkC$(!0vz|M`UqUWZnKUH;#{-raLK^HP*~(?`r2w!O;z@Q7Oz8EJoB|1e@jf8nS1 zlhpC^SkETB=9$r-j-!2~&zHM`z23r8TgI>4e?xfGVSZl!c9C1rpE~On(P78&t!Srw>Veio zp4#Xlh#%DLE{UuDQjfZ$^}GDz?x!F5#3I)`^q^h#z33`l*Lwb;H}$u73XNmvO*t{{8=7M|QbJ4|KC8##A@@mic=9 z`wB)yf63#xNA<`Hw&&*oR8M%#2gPq9Y98YXs(;By9)6JdSgEH-y%qF3yZU=4tn=HM zuDRR5FV@~^=rjA>)P$V?)bkQGnt>)zrXNQ^iSpo-bbwJeBs=m?mXmP-Lv`ob&e{xf4=|imvKP1g4f?lUwZ#r z`pl(nVtf4uibzF&c)dlmv;G_}G#8BIGjDVS)o+%~*EoHE!l#bK22)<*cl(_1(fh`( zwZQHF>u;U@>Gk{Bf9U?FCkqRDUs~i=>Y7*) zp60j2-xf52c|`tq06wVWiaz)|VVY-697p}wf}QHQb#`+8_b<1*b$@&FcmA@`OVrWx z&;3##n0XAbo-OJ0bj9 zG)4Em|D=z)x^DIU<6eEU+18D{`T8feYE81H$oAI_91xN3v6+V*Bga2I63+!?JjPWt zFi&oQe>)3{>OW3?f0T<~Fj`(W!=s4IuQ@|(i8BsyuzSqO zfvEZ?{a1ef+mgENi0G7}H6!ELig^-G#-$#8Y92h$#5(ZEt3x;a@NwtP+VZZL<@IaC zM|%C+fYcMD4*Ov+vZ`xqq7U7$=?|N_UjHx?)nD@A^F2O%`c)J@aWEZ+dLTRyKUne8 z7nthb!B0K6-P}ue-s|kEoZW2fD!1NL-v9OkSy<3lakX&&jn7%?3kLGAl@&kzqZ@Vw zh41wbk1G00ocpNIzv?dAjTIhoF!iV7!vl4{Cc3JB1$EYOq%Zow-@Wd*E$4sj=yBcV zjE&FO?52tM_58zN78bq!EtmVh<>D8N)^hBB=%0M-R;=LlZxoN}^yhP+>S1D@1*ta_ z{mok)?ZxJR^B^NIz*@{O;X9=z4bTzg_R<-1f+kZ+d>b6`p?nYGh$y zJF9&E*otx1*6=8zKA#(lEpdH4(O(|pUjOi@R)66)$&>N*{Ym`j#d!#9kiJ0toe;hv z^PwMX!A|wuxTj~{+&kk)x8^V3dEyn5R<)9+$3H_^SlI6Lb=eR5qNY>BfAli9AK-v4Z~YweR1edHBVPx+l%seaXAW z&ZmBp?_YXl9B{3g-`^$U$1J}eK@X09;N?;`vAzE7B2ue=di_scusuHypyQ?bSrg;< z=?laU>hoEbCw>bu4{Wf;&p2-Si5C~|yKcsPCtvib6Q)p)>)-eu9wd*x!7N$uYdz6T z{f(GFoVs5Bo+47wU*?g1o>iS{>QPt6HAVGuyTA_*)aSDu=z2Xad~2d#QTkI`;?%1{ zcUbRF&K-Z;cz4)uUw!P0@86XAdiS2Q&{O*w58~t~D@^1I( zO~>13A1jYP56;43sUw)bPriRi{FyBDr?&9WjkwM$k4$v(`Y$Owb^7CnpVra(J+UsI zW8j0z6MfWgVjgyiX`U6n`mqJm{M0V*|8U&5?mF!ty{^33&ced?-5~k3^OpI&EfyG67@LVVSL3&^8fFQCzvG*^8%ZBBw%a3R`fGJum94* zQ_*{(oUG9Y3pCwBGbcz}9?pFfs1+A1oeK^cP-wKikUd&=lF_>m$`uZN?SE z59+*=c%^?6hSS%l#ReG%8x+4=Z`#gxeCaO-yY-H^>FeKKsr>(=b`}=>+&d(8OVEtO zXq9Vg;$!+nH|%tN=;-wiBh~3op6Zm#IH1f69@qWC2el93g)BNgHpdOxXy*eauM^hM*K2x3^;Je7jIT@Id;3uYUSv9^eZ4 zspmd6>8LUHZ+U=QbnF$IpY+jP8Bg#3+|I(H+t<4Pq2E7>j*Q3i;q?!SR`i#7avyK{ zCr?p$#6g?*u@&1{Sd^bfo999OE2y*1&qzH`>bSigc=*I0U;CQdbJp0YpC3N}U-v(Q zk_Vz2bp#`;y0#KWH*D2Q>M=jBf1dfJR{wEDv^>7iI(cws)3l^u z>M4%W?`D4chmZa0gjL;EtIz(*ZErY(*KzIsXSZG6M_g+;fAvGV&1BYweX`R!KbqqfBL`V;0Ye&&ZRUjKHHTdTh-qW^*I--I&ydSQO_ zfz5o>pZ1RpQdey9>d_eI#&A8 zeDagwK8p zq)v)Do++xAuT%ULzKml{!CuDmW`etX{m^IPPwS?(TkG+UJ-F`kPsD0vUwZzhA>)AD ze}M4>$p@Jih;G>IcdUsS^yT&MD?F;d#4WhQ2mJyc-4#s;XZ%jAc}BG1gW8Amum0Au z`5G6#<})6CY_NmB_WytR{<0vvt4d0z}YL6!aW{5@>f^u_lo@fOJ4_#EG(AePi(0lqvaai!De#b zV4Y7Z=3smMmlmE{{l|~wSGIrYhkBqjNuJpB$LmG;(iipBFZHRXI7+{p{L5Xw^W|>i zHrREKp50$4_di^JJ{0?AJJ9?8H^eXFMytBECi*J(A8SQ_IzO-fU=gY4ZwjyXOP=bH z2X|%v4==r6kmgC>=#Re>!dEmhKJ~B#JJoYjW~}*G)?Vl~n7qg7i*H|_x|Qv|5y#0N z2-p8VWs`Bpi>z`Duenb2!AtjF^y&2=|``BXh}W6{4&}9_lRFm^Kpv#==Am+-7$APrmLr+@wYOFwxe6KA{g`Ugfuf63#2d{htWfF{-@4%)=8 z<4Rq4Abfc0r+=xJ^3*2Yf$wJTz21u#-+#T^{hi~cU-jpOYzKP&tv3sce*ZFKIe-6{ zjN8nmj!7_?zt=x7D*8(vpCeR{w4T_+L7Vuo6&qPtlu!TSSDuNy<+iC)ljlykXY>88 z-7$Bc`uL=^-*iz%U9OwqeE`({+F4lG@3Nin{Kcr>pPBfmIW{7;Vk`Qo>-7&aR{fLy zEBD`)eEOhHij0RJTl>yO_QC5>F?3b#pEcjaJgtK*$ar<=w!3Wo2Uj}(+p$VnU(de{ zNqy$Ij{N>B>4*Fjt%)9V!`698UHbF-=S7C~L8&W^A%C+iZ zqL2H}Ge0ohZoU4kBBNIS5bb>ahx(xQgB}UO1C=lR*ZHl)Q+_8u^;~;s(bR`#^t%PC zth(rsjmrPu_33!k76+tL&?C{10DrqCc;HPmx>E-^7Z|IT7Jo@hi&s#6jX} z3lBZTb=r25eN2z)=3byBi!XpmShcyMe+%E9L1Jl=&`lUX3))du-)ic4=?Z3j4 zIQ86Lzxay<2j0A(yKCA>Pw%(Gm*MO0j|Qb4*b>`}#AuakYvN)%&o zR`i#8%_*@i{cF8M7vi9m{M5$J^QUp@)y!A(r0!^K_s#dMx%DmoHROJ9@_m;ay!TA{ z(9gel78dmB`2R)xx(t6k+I$;Ykj@x4UH?Keal!lwR z^#eO@^yqC?;(Y$nKJqLqw#(oBaMCUFtPxu zb~z69iG$V@tnzuWrm%+PnP8UBPb=EYKdobhuPFVgoyKcyH(|PYWuL#T<|bV5(Nhn4 zsQmtgcU!1F zJrYlz59)xG^AGgdiuv^E^&cucS|6TZ5i6F;>n9nXdOSbaAbo-OJE1kj=ze_0Gr`pT zo=xmd^_;nK^?NS*;}mBeziRQC2aaVs*W;hI)Caqk@Bi;&mhaQVN8C3FoBq@P749DM zsgO{of93pVBi5&H>ZbE%T>S7r{9xtzH8M_}?xXZ~Pu{Ej{E=h!?w;KH$~o?e_32YT ze+|jFU{kI?X^S6Ze8CtUrhoeXQ@P}mC$`r=cxv@eufJ_b9neG@9YLG;(H(>bs{XB5 zk9_!QOWo1>yVrgE(){I9W^|i5KRIlpMLSSekAH@;uvD)9&&4kot?HV-bo=j;Jmw`f z^Y;3if>EpgYB3+aZugDo*YSkc{I&Q^l1CrO_{@{~DN^4`Jw@uRpx>SJ^NG7Xd1t>n z^_!1h*to5WdbhzCnAJ5|>##1!WPd)m?29+=U;_rlMo|W;`k1g1#o?H8ntsh_Y{^#9V z_uu%~;u+tep8o$}I}3|$2Yml!#1AIA$LO%_c5UGcPxT9Gm!FHg{_TQUr++iySN8v= z$S&(Ke`^YF=~um^A9x_on>9t&GV)At>+zk|TTVaYxLwzuW7A3xDwB`HQUN z>;6B_!lHUD@BL5EnR=p|_K7WV`tkbr6a&`kzey1trbi>%`aGCuQxCL>U;F5);pfq> zJjt)v|3iQGTEA=O6E^u+_rx1s+H%hI*D?=1{uz`$z^&x_mt6dU(JHrhwPk)Bw`>2B zm)c(c-Xgc6KXs-Q(P8?FU+dKAX+oO5FIce^^DHds^F>|tGoFcj3)cCG*P#360bBmz zeYdpS_ol!1omX%BGUMs*FZ!~uSa5T>|3gFkU{|!$wbm2etceUo$>a6q_3tYf75z=& z?R@^(nu1k6KdPGv?oxlo6@&-UkFCoSKXK-%HgU$m7GxZE@(CY$+uj>L;yyodhkG9Q z=-uT1kH3FO#%;E;uxOvT*d`+Vc>VhePep&J7un^S*Q=uNsKkCxowPVlNe2 z{GIB#-R|4t((QZhc6%PMU~+TH0xNm?``;m{54Ob46Ex9I*Yw44qZOWRU!oW9cV7P` zMU6WBn`;!&a{IQXusq)sYmZVdowqeb^xxz)YT~WLFZHd7e#IJp+DB@;^LPHyoqO&x z*IjY?mQNh;j&l3w{AZ7h(}<|&KgDLgFe1E#4i}Fa&5)0_0s!~wIY`|uLrOH(jro;|C%wcqEkEisYe{NCdN}6Kl?9@)1RRF zyJ8*9Gh&a@@3y$>(UT_Md$xP`DeFu=`R%9EzrO$WN_~*?4~!=mS=F`H6Wy$d9NoTJ z(Pn;L|G~mj(ccta|Nfi){=x)zx&INHdZ11G*z{M?KhMHq+k$44h)y;6?zqj4X#Hg3 zkURG3t6pyXZmboae*SG|VWE%ws&M}KW!0xg;$|d9t9%`zn}wH026fTN>pxUPw7wCZ z%%dp?pVy5dyfHDad2dAcpz5C1>t8m%l4l~%f>KBA<@7su*Cva0ftD=k(S$_LX z#zi;vH>92*^Yi+L%!>X}kNM~v(y4`+$!_=LAI>pVqao?9FuksP{ zI_`h+IG?mn@?*5zAMv{_YijcH==a-yP_$P6jUtcvpquJ%M#hzw_^sIVOMc2HuKrZ# zv<^1df$w&{`<6-9es77}f7==3j@+=tc)I`T&BCJZ!xNY9`UmP*6Z84~1$O0m6Q0+< zU1U`Bmw4s<9{$Rw*P7`r8+zTXAG-V& zD|PkypH>zYwzK@Va{WUteo*F3zcD&&`)B`eB#*k}HKO0^-%~uQ=r6pA{^Wxu)=B4U z6F+@|6@Dw$VSh{;8Wr3yW_5JIVcj z=ugItEU%m4QADk`;Yge3Go7E;zqepk^f!gK^s62X@q$R-(MI9WIl?F zCpuVcLGtR*&41(Z_l&(^fji{aS6+Vfw&neY-=Bqr>oFAh|6iG3MW;OGv8K<;{g+yi zLEM^Xd;R;0N0quVKR!n@A8QJB`Mg_GSXKS3DZHiMn!*~E*NC>(&7;kBqJAU({}8&* z?zQCxJG|v8ch+_b-n#d(<>z01e>W)O^$IqU{lAI+x;6Tun-#w9|K)jLeqR6n%r6!F zP2u(DfAV5IOvgN!dfaPGw5bQCe)l&to07hy2m0;RV3_6lMI# zF4xw?$Cc~f^5`d@{=EJJMWmv?DZGCBm;X_MPrjlFw933~;%A;9^+4uJUG>-KOdZAn z$*V(m&e^}bd8e=6?LNP!o%h!}9-)q&|CjBEKJ04Y{_~H)1GAciKF|$*-Tb`%ON&w! z{bk;q$74QPCoi(f?T|RA!%2#05L`;ig{qSO533r^Tw}_M_+jhh#jkC3cEk;uoycGZF_AKHHnuf2d%p z{t3T1tB4NMzY%SHeVS;e^R|ho<3v>W;qg9jztLFQSfq z{%vPr;q$p2FTejv<{eq(^G6*J-LS2R8EQ+t?lEywsOT^JnB~XmPx8A)+N#%*=*I?i zT*<@V3DZ1lMvkkt_&e2eW0!vXjvw66<;H#LUUTP{uAshN|FBf*3sO&?kG8~R9@a#g zwbqmC)`;u%MCA3w46lFi)ahS2PoKv;UO&{QPcZe<7YGl8pT@B@ZnChfsGo7%hPz+; z_RCH=+HJhq?DNdG%Jctx{%T2m!MgJgkyWm(iH|{atMnPvE?+lZ|BzYHUwD<@KPKZa z9`gfJKeqPM$ikw0u`8dy=+{KP1xM+3*RJvNH=niRLig+E?|fk15#{&q`$>KJ5X^rP z?th-^xH5iZmusDu^u>Mf(mZ&~+v^`js?~q>I0k&(?(^uUep7hOKacclp7fQD&*yx2 zDZ*zQMe3Q6<8|V@X~#bIt!E#($xZw0vritnQF;IEw$w>+b@}~uE^)zV`R&;7C?a;{ z`QDN?52kt6#QN&T20J`2ZtKk+z45|p z+HU6hPyO)Hf3&Hi*PjnbeXu2VJ`$s~4E<&NW<&U5^SWj{Y_ES9p`yQu`FxI0J#wiF zN`2y>HIb(_ebn)(7rGgCyhWbWv*0NGZccX7FK3RM=C)Yt8{6`9oa`nL;DMgOkI<9ULQt;YiikNqBeQ2P)*{!WU#Zq ziW;gvad4oB4%;3j5B)pMm$)@W6+aGj8)8d6cquBMycCVp5r3MW+HU-$xmP{7_77rK zx&Pq%M~+)S@>M_SSM%wIJ_MV>BOgrX^Xg!J-{=)Gyt>o$XpLP}&wlBT^TTA?4 zBiiZOn&?3{)m!+f?e*^~GAjCaNu4~R>MFMG&xA)D)Nv%9+VDVY;y667>L;H#I7+`; z`}^O2?>-;7+O6~E7Y0Ajc%zki{Qi;c9<1~s<7UZ{&aatAg8Gfvjfm9s`u7)}ivFhX z`tzUiIS#(Q4trvp`ouwNVm!6sGoHp%XM%pocYuRn~33E86gNyU(NF znux@~)UUdC)$sG^r%$aTeCjEV((mlA=Qp=$ALHDfFMauIUwnwZ^!t~k(ubh-k&7Rc z@uRm~TN58Q*NRB}R5$8+{RawWMStekE~54OajoADw5&NVBK1M#NnQMrUDn4BS`!&* zzWS3md3ETvHLv^2nU~(=W}SHQqR;QL2KzhSe^Z_G`sZBw5R8`BEqNZW_4w0B9(pF| z^

    rD*8)aW&3YQy&gfSm(JHpK7RTF@q;?=q#k_gDGE<*8Lt!HZE@g-9(e3cce<^| zUH!Z4+Ly`K^KV00SeAGH6WQh3n)n#qw4XfsQ`_r5SVZddw@YH4>ePz1^3g$f&CibH zYaTp7^|zMI*Lad=P2}r5utE6Pp!nU?1IA2!<2QD2)5br2_E9^2h&sIg>Uqk(EG%|% z^85Ez^t0ymqqd2Ubo-~DJo*`r?ak{yR76xyGm^)BBx?1L`tX{+k$kCBm#6-L$j466 zN}UQauQXog7oJ=E#>ej2{qTF;;+5Ze(5-KHjk@~zw>JxmUjJj{``=vrg3+q3t%<&7 zBO*3>^L#PS?lGSZ#9IB=k9FXauV_Zbm6!PSc^7@)fxPd6>aWvtl>Y9nSAO))FYI)S2-WrB3(8=Y9Oe``&s}_r}}J{QCyCE@D1<{lice zmdgI$h+i;T)wPv4x-p-0e%N0BkWtZ}dFNSJR1dNB^<-ikn{nZT+K1G|-wD$^%XsvM zE!e4^oBXzaoP7U6|Nk#rzj*V^KRif1{r<5h3ybP8HvIp>edwhq<3)CP9Aw4c@%^_E zGrj)dk&Y+%)L}o!ymWl}?22`G-)ep&+VH_t$JCFVVw$IYF|LvE!47`cy~(AYpZM23 z-3}{1eBVD`eS!M=`*-RH=DW)OU$kT#Fc~LC%jcoV?;qiT?Z{-FUjMxC)al>cY$Tu8 ztD+ehr>;%>%oBtM^7^UGlX}Fdqc(Boi4BUsd%cbOcYkH^P2KBn{k7NavTu3+bN+v- zKMM=rXV{s`zWyq*s%!eHod3xqm+hB%d;MEQq@us{+YHh2^O5S_kb1(S9!MW`2oKb{ zCe~HI)VJaXiKnRJ)$rYda~Bfj@k6)ZSUK^y#Iile@^RR>-Ldn%UrC9Zm_vU zw8;nQ58LbCE||6Yzk6hzv=3`W#;Ke785e{H!cXJa8aG*3M(cO$P22g7Fa70Ux84yq zef`@jJxHC7=YJSiFm_R{t@y#p^(T227RH0;_3tSHwfb)t()9X5-A>v5&B(awnQph( zg783Im!SH|lW{CKN`LnTZ#(>p2hUr%dxO_Ka?RI%^fL3& zw5n_JYOjNB#T@46_3te_75z=&_2)m=iwK|kDG~>*N%F+jymp5;{5l+^{(ArUuYY>x zk&6!=RQ=ii_r$(bkGkWZWc--rb(DV54O@{sL9c&b<`*5$6joV>{f+9H(SP&)YeHD}Pt4yYe)bhOFimR zM{VLA`0hhTPx#KdJ%jE;t>1m@>?6wiPacwmh4Fb^=>Kmf^&`8ww!*6%|D%69<}g36 z|B}K}(ceTK=aJy6PI-)@198xr7*`uz1o4B+7gRraQjfZft2Xftd^hX%->>@JzaQpi z%{a2}+HdbhzP|tFSy-57{(1TT&9?XjqvdsL@JQk&V(tI$dc5k8pasoyM{pT;#`__hw|Cyk5WZQ1zcKTkVld$-l%w_bJb^nU8<`L}i! z7G4K=c>8^Efa3x%vb=7FM-jE2_-%^x*NA+t|I*@7MStN{?tj&adGyD4=mlC6d1_0& zT7Dk+%&(Fs^(;s|Y_NmhZT-i6K7aPR=DTfAe(+OkeBykzUp@Zm$-<&~@c&-#j-W`hhyG@YUa5Hech^ z2TkN_J!~-LC4Sdf^!^90*yJm&@tx-P%>hu$e#6XQA#_9)S{T7UTY2|s?>wQhas<_qT!TB)P@_heyFJtidoKbXfj zYioEEkv_R^2607nm3eoMIXxumcqW*7997x=$xm@0=KZhf&-nB!m_C20?dD(p>t9Zu zc&K~-zdw{=uW; z!H3t1Jm$f1fnvh@|NZ|D&OGI)&5obm=eF76sd3ZZ(qm=5dj2_&^VRvS9=?A(6`rEZ zJF?3)@#ebG$LoQ5>2*b3e^W5)^yhP&>L1v{-XFv!4%)-{YL!%A#|tj z*wcT*txMgf^Yu6S@Zv|T)YbFPgIQS6%dRQ<%XUbAkyTw2*Ylqya2W{fV*8UF3!lL}HX!Clm_>IKjf%rlEDK=uh+vYd(_C336soQqi zmG4_;ow-)>(4XxS)IQoVKb?2-EozjEesBFMY$ereF`-{u5q$ zzYfiVCy2ijT2qYX$0OeavucaKQ$08B+K&!B`|i`+W^Zf#yfOC@D|veVlff)37Ti4C z{;f}SZ%f=nWS#bx{%WW~zE^HfosPdh%?p*hPI-XLnuzII>rIIf>~wzEUjKH%tkXYz-Dtf;Pk8C~ zW%|4&eEglTGM*{AUB>Id1b5{;R9fHdamGH!?SI9Pd*6ZkE$zMO810+$pM$ZEZvXQ8 zf7LygxQUpqt@wH0)c#wMnc80eo+47Gf6OY^*2KptGEeGbgZM%Goe;ia&G_`wsh(Ty z?~50mHDO=3`jfl-;Lm?AuYcQ5=9P~`J^zD#U{~a^wkAFToAB@ppE!Mb{d+UN)ajq{ zRR`*Url{h-FIYb!^}#d`e6Ugl_~Jg6Ji-*T1ib)au_G^O#rVbx1woaUCUmkiJ0mOCQ)N z!dEo0pNhSlerFE**NhuiKiaLo%~Rj%{pm-nfj7U;0kJM@BbA;h`gFO;L5&c{|}TUw9zLV`&`Q znxa~{eT~-Nz1|b+|6%{9SM6T^t)K4y^iK|@j_!YkWE`+1c7oCC`)TkJPGw8*IFZ=_DA-@nLs=m=WLS6g`WgCB&S#<4Xnb?f}pt3x+?%W0?V`@I=%_9Zu; z_TaM%sjK?8v#{vrEhFFm8u1H8tGc!luYCU~e(k#vbG`n9#iLsNJKcXkbk+M0JQ3?! z6A>MB+~oP!t@=SS4fU4vrpBKLo2hz-g-qPJYrS97xP zg~$GeICl4#&jeycf5~ggc=T_L)J>2$2rsqq^ZaSNC3R(7^(W(KzVOtp<0XD~-TreQ z-uJoF-I9I3_Or9T*dSlOf0X&sU;f>2|2Mr(2!B?yXuUNe0b5b#$^5+jVI~#*P0X*H z|4jPh`J^7mcxs1H{B|mcAB3OAu{Cb8u#DEt>i)l9 z>Vxl+|6j|+FBq+Io%VxnsXw*7{-&r>tN(lA7^>6%YyCqv<$fgX*Ghav{q*k^_WyqC zV{aMjPIzd-Z+|!GP1MofzbD(t?C|#cRlQFl{HRK~#!r8ozfS$c>Bs9I6s_n_AA@lW z`0Tgz_#ojmuO7+QJa~fY?_D-u<4K-1kzY5jPJXw}i(fzJ#oOjPv(g1W*z=y#sHf*2 zhNM2o{hxF33r4Ga-$`xu+uBb{@|lm=UjId+(hy7K*3F1BE_%Kd3- z>-X>U(~$hc_WHMqh>mMU^3vZE>Aa~Uh+d$TI%+Q`pW~=X9({q-!v;I}-GS5RU3uNf z54rgRFD`uI&S$LTao!3W%#RKKzjG%xnAI%kgKpU5rStRpw~JC0{Y~Mmp3l^J%u^7( zKx>NZVf%3tOg&EII70aiiMJD;SuvY>b?8o?ck=r0crkaMo}T}4*2D{#kG}u*%6QUG zetmfV9k2Ua`jmM`cDdGi>q#9WNM1YI^yl^ODLl3MZ#JOTJ!Jo;DJ0JT37Nb z_W#iDPI>YR*Q|Q|0(a?UYkl?W!|c z^cQ5_UjN>LS*w4i{l6))%k7-H))d~-Z%twOak>QC5tWZW>Cc+Tt04Tkc#Yp(y5xZ` z&)8?F`|8P;?|j#N<^BIZAmg+~V*W|_{VI_jgA8g3+q38DFn|7QP-g(swK7dj0#0M|Ju~mX8ac_YXxA8LDG@ zq^*9&6~x~OnUA83SFy$4sotEw%zxuPvrb(&=hZhnbLjC4%k$6Dw|?)^n}x+r2=Bi? z)crraMvSLx`qJ-zO=PHk=#WRh*MCXzs80W;{I4VPQr#ur8lUVJc+LBxZB0bYV_ZS? zhtbRZ6n>ETP*0J1GS2e)-Lw;@@B7s^f68rk+Ee#lJL|XXKlJ=VQZLW5u%z>jUaykmrPmXteOOauSM{@|@RoknQR*^2sCARL=9y4V=Q~=z zTl2gn2mWzjvRm_qPyMmys83p{tM^|Tl5xP6*ut}d(Oa(Z(-*p7r}N|W;PqcxRH^81 z3a|Hv=rA2kaF;q@3$J-ujH4^4eRPeCGcLRo(>%*~Qb+CO^t-LH?3k{Pjde30UTfN! z$LevtUjH+Yg{5-caV~zrX#Jb*e?#FL!Ca8O#rFCS77=u?rm)Joybqy^K96~zmCr+e zw3~|~!Ut0w)t@h$uW|YXP2_7mY%t{|emCi~+ooLes~K+HYkv0XgeTu+C9d~B$+NI9 z&-{wRie&+|Vlqz5Ds^kFEp^~E#BN1oeqR5f!c(U|pX1=Ozf?3M>nShst6tI%JdoET zsD9yFsi!zffA>bS7C(Bzy_p`ltQC2h%(qca-|O8E5?IADf@Iz-{%D z{0oa7I*j_d|7pkm(LcXGeE-tM1|!RFw+$Xio{7kHfYh&h-FW@OOe*@z{HUXPNPPlE3}-Q`FV-4}&rexMev0 zyhHWNCC>PQ(d(ZNvC+*M;kN|I6TjC#JgU>5=UerVJey)q7MA9o5#fXM1*U%N6yYl} zZZZ!m{xm>zNqx=n9;=F+=<<%sL?=h8TS<+!gce$a?5b))5V6o1|CU()T_>z^0775%9*t%wdg z{>+0}xegm0L2DvUZFCgG52|kPBwzj5V9HxgzkAP~)N`O(Xh?>=CPl{|g_ zrH)`84uAadPvXyN7I^ymFZxW7d3*g^MX8GZ@Oz8sF#Q928T{Tr%lIDy>VvAI)J^@^ zDW-YW#BtP*4bnF@D1K+QI%1a-rk?MnZhhkKZ@KJZ>ge|0pM{0vZae$o;sod;`uc3d zIBW7#o8L`X6C;Z4Xfsc*fAG}l-<&&=pZ3A{)B~+a^3X*P9>{S}S`V9giqw;FI{6vL zZQuRq9&>I#+Rgf0{@CoD%k%#SXJO%U7f4<@@5nCK*2Ks3i*DHI{HW*k?@k#XfEe(mG`NPoufUUThy>E{kTvwOm47oGIrA9tjGJ^wQl=fibjs#`<+pv)tB z%QfTZ|6dDF>m;_ zYyJ77eQw%8=e+#0Z|!ZRo__z{n}vn`?51KHK^Zq%)iv|e`)`vc^Wu46eqR5+qDn=7 z$xH9&W5u5$^+4jOO@EB1@kSOFj<3{j#J{{K`q&!2n|Jt_-)_Cn0dD?hKJ|fv_tW=P z{rp8AAlLs>S1@)}T{AAaVY9zU=jZkBFRGx2@=f9O{&oQUT^0F?^wkr2D|-ANv( zbO-hMVjL4Q)Q>GlUL88~;vQ@N?d!X`^-e$OnT3bkW+hMWKagi(VSal2BlA<88p1OX zSzEEWZokrJD>CWN>%XM%XkE!CkMsKQ@uvu{`SV!cN*&FkE=d1j^nas2{N=^2EG#zV zi{BmnuI_K%`LBm=B@|yx(M2(sT2lz1L7jKY!&}SeRe_>2Lwi64i(2 zry)Fzi1~3UVJ&=EdtzQR)*1$yYn+13y?-Py3dBs5@G} z`_t_U?)=1>FS|cq_^~_YJX&7=^MNcZ+D9YW`73h$xskYFw92&=KYgJaHqWQpQono5 zXG4)%{iUwz)CzPxF0x5nd5IsMqKvP6>UL#eQC?~riI39nX8+~s3;ucdRA;w7^Kai7 zY@>(%{-s~)gWKK`egTs{FXTmbxyE04|4rv7w%0$*q@usfkN?qAof^^~C_LhzHIb*b z@aPXe$m=qVV^dF&dNR)P`ny-(bi%AV=DwqQ^?O%YZ~pC9Q(vz?AIic~`TW}wzhJcd zext!7iJOR(|34+a9plW~>mM>|^_TB4;WM6M$S&WXm6!OLCkPMJdeW!*yJEjq>Vd>l zq&_@FspCHRhoe68r57)7r+(Moa!ocKzUrT6VPSuf|Nh^fe~WDYJS2TKx0X8iTO)1y z_WC!9NJW1W`K-AQC4DIhk9wRp#0IrL=?8x&O!I6qUZ?ytKIhjzys_&&dzm@UjKAT7 z&n#?HPv3u~KK`8h-}Yo-QJsQnrS386Z4)0=&OeJE-RR5fpBEWg7oOQ7 ziYD^({Il2=gx`uhuYXVRsG`5{())?&`(z{LTN5#zZ$%%ehrhzlW1TdPZH>f1>S2Q& z{M}tgjQhohpIp*C=GvtX?|$^=R`T@x!%!BM%JEM_{DRT)y2*H{jc&wse<6HquYYfm zTdV(Su`+sD6YUha-`ooS|JssRkNPQU|LM5QV+H+g)@6IVZNL4`clk!Ei0Z$~D5dj0zgkJdMZHLQ*aX1V{D@#q_*9%$1%vGKDXQ+}R>rQ$c@Pmw-Se+S$3 zJ#yMzJ09QX&f9yhkFPTC5i5NC{oOzo7Jk30{RxlxMpkuAT+e@!mq#vf>U#Z`6p@Pl zCh|Ft1fTbB-aoDIh=Zx0aY1;X)@{Xc$%n7Dk$9c{9sF*yC7bpf`>n^_^anOv^TAJ* z*FV(#{$+Yk_ysomYcO_EuG2pG{LB16c-UV5fuc%9f0+lLBjA&tB5}}~qUx~YCKF73 zzvB5*eq&@@{p2k#*6A<#?u_03zS?0ky4)9ExBr<}P3YtGrr*ExNgv=wa{rB7{DRT) zx)~ltL>F`;&N#$*9=-lci_BX6yJKC|qY-VbXGX@Am-uzut{Q$T`r&CE;ajPvI7)x_ zx<9@CqK&2=(!K7&3l4LS9M5=q{Z(%k7JdJnwtUwgrhPRAiCb^v|-Q4h2x z##5X5lCN>;OP^QCSAJ@D#rk$R{fy&w-2HnyAN}+rZpS6v^FB9WRr2-o*FYAQ%Jo;w z8)SUJnB{d5J3VjA_{6ch$DA3*>ydGdAa&CFpX3q;g-1Qmn#faIc;Uft{d zc*fs8b=ao#rQg5w%D8Po{(sqsE#pS3y0#|znxY%^)A=!PuYbs>=r7}>pQrSFq#ehh zKJ`EwsxD*yFaAorm4$`+T4$6xQ(sZ)xS7+h-0%F|&UD+~`?ucXKXWP7Q1$M z|J?!}^NrE!nmD>?Ud{Zx{*9tat^S+DF;tH{+WLH&k#XfEek-=3^rL+G>&n8Sez7&r z#5nU@QNKIj7jJlO#%pbNz!UG7yu;q*{a5#wx-Eozj5__fuB4&|RLqzzXjw^ii_b!{Saq5F6^0gi|nDP?8+vmWOPkY@~ zZ8!g~C$9d~*cNs4_&?9W!uFM45qIWmH^$d^o`r?D`qTND zkvxkHcHp~Ch!M=(aq*Mr`Fz|`bP z9j|{FQOAQXb$K6AJtU9UXR4R-q>kE2zxY9|*UG}8e)6QA1#A4|)uB7|;m2A(nKI>@qXYk9oSe^FmB zvZ`yXClkO<=g0f8*T1i*QmcP@{NIXW>VD8f8@)iA__Ytw2Oh}&h`!X{K~Kg}?7(+3 zw*75$>V=Ek*1KP_!}Z&4OJDqMO~3aV%ECfFs$U-cb!)9B|Nm}HWNIDyYem1;zrT1? z(cctae>}5&MAajYw)SI2#+8@&=?jDhTH&X0Y>g+p74^Huv=6`UsBDs(G3Uj}cW!bR z`Fj4Xk%firAs-OijKpY_Yir_T`b9VFG>+}{Us7aN^q1#}<5<;6p0^d={!KATxxZq2 zsMM8l)i1U+MfR}#QTpASZC@Ba>GdzW9p3)zwYwaBC;D*yhw}!Y_K}C-^>+c}fidd) zlfKZ+8p&%3(zn=N|A8V>tN-+vr+rYT9sP_)9JD6JQyYIpfBF*CJp59R@e~t3d3ESE z+vD*!J=R*yZT7u0Z@GSY`Tf@cv0rotsiUv=Jo4(+^riPdF_CFO(T}=b|D^?^R{zOG zw0wW1zM=_XeV?VSP5ju3NgwKOXJJX}XxzkjI({V%FG2V5?#Ir)@@HMH_tl>*Ib^WB z|0?$%$g{AZcOL%!<-!Z}_1>1a8Hv#vw*8B4HsMi+`FZ^Zi`-iM-(5t9>7NI)d>xu- zQxCL>U-gph86L>~w)S-)^{A`$R?zPzJT(2$8y8M=YyIN>&wgU!hScNp7Wybo3IBik z2^~-R&yppLH$^w~H)29MKd=8#5vk~JBA@HHR44J<1c`&hQ(OD#&BCH_nHTYj-$cF@ zB%UJq@D!!KoBsKWk2v?_d2Yt}?)Luk+VJ)IlROIx+rJ$X{{Q_Mtz%+5U29y%K{xg1 zF@y2I?lEVDK&}3BVjlCh|9lSr5LIdWt+>E9iGqKlJ9QAKrJY z+ho&e2R@jTkZ)o;ZAA42!>?6STGKK7bBY5Fx6z2}8q zD|Pkvk9{&8xW)4Q{vu|T`=O@jMt$9Hi(c4X|Gdbp=r6n?RxE5MUtBi7}A2jGJ`zVwa16Q+5U`Ov3er+RMP zXCHXm^|LGePKlSe<}vH$S;_Y{#j{dpfzJ?Nu7vL12Jnix-Q{Gi6EEB&fp zZ1{>R>ThIWadY1G^kJ)=(&u(~^Ng847;_!<^!)QsQYB-;vM*U{`p<=UiXHLXIw`<=bt(625KMF7tE5y zil4sF4LhA5w%5PEC{?S!EuzED|1iG<=@Yc3$g0nS2htDys-JO8D5u94@aziu-F6?` z{qIlS;(vcPew_)2A9xOR^!kTB83&vl?mu%{I!onf+{^xnj_4+R<9@Xj3eoysi z#60};kInb~@ImEu#eDpoFwL_@#+N#3i@#I7?yfy1ZT-MMKhV9}O0zfm@ZyKa*WX|F zNPUq1U&zG|GQMDp4%`1E*Ui!w?q#x>PJtI89v~FsIv^BDK~4;GJ7J*_Fce*2#jQFUrXo7a&wBjd_T z{P1-A-jR9wyvp;Wb$H&SUZu_o`rW2~*lGVAj{mmX{IX|$I{u4SP*0Eld$X|Ud7zr} zKao|gt%;ABUlX0^zcDh-e7*idMWjxDbXGlbsRL5K9sSLnBEkph3&bDU<@+>#5Wb>` z85LXn54|4&JM>Vejb90yzbZYS|P+H4orh)w_4VCt{wgML6aZT!uz z-?(>EH|?FXHvIgdxs`f){L{+9LLWK*f7&B{P{y-H{Mdrcrub{(?BBfpAw%`24tc$? z4*ke(_g@&8^XmK7ocF~=r}O-C{*%vLApNO6Ju=^9{OBF_e9G_UEd8Q?J2I*3^$!uP z4`1r@J_4V96@^b6RK4gkL3p6n6Q26Xqc3m;{nT?ezjDrbzd3rQ`_Ykyto)lB%j?hI zpM^#5bJQoc-LbdyLkAKL%fQs;Q?u6Vl7? z1*9+ffd}gAsxD9b7Nj0F*umeu#*fyx_;u@S)xGAF_bz3Kx&IRLOOSeCI*#@sdGJ8^JP+z;9Fv7bc{P6W z>d>9H=0{&S;?H-x^Cxbx#*tYc_4NJ&CJd9|uYCTJcw|-A%(L?UN0P^Wn*O~0?V?IW zf8iIgVnLV6_HRPC+|Ju0ZN{N55Pv6xuPEbHZ1H!h=bHaI`@^&U_?mM+Uv17cA1|+e zI5_sL=RF6--cqhVX^9Oc&wq>#+yAHk|CQ#6?e*^|ax3~veXgTpUOI1yV-p9hDXNyo z_3%J=pgzA+SN)7*LOI+%0!78ZRTW{BNN z&_p|3(-*p7r}M-1`u7$!YW0`gfKzorT4MYsWvp*7?f(Bdgrr)i&`FzrUtF`yI7qo?idH zBBP=|b<+DUwPJnkPwMFXmmZh6-f3wmPCY?qVmZHWn7C5cJRB2zu55UQ{6(h z?g5MTzvRrh)YtP5gIQQ~``=WqKWvM?6_K?y@ey@Eu7kp^=r47={{2OTI{i5gK$mno zu%^iN`TYU1<|z^3gW8AaqkicFJH<55npj``*r3$6*r52`pEj`9UHhZ)?(esr-Z=0J zgYatg-z41sVXSU%?JO)hek0nf@ps(*^O)iFUs61(=r8^9I|6j6)1NqKO;L3e`~Ox= zo{4@7N`GoEr@wpLEn_#ht^d2-YrOaHg?CI^hxzdNi|t5J`axGPOBVbfx?!`w2hqvv zKTwpa)nDcbpMDi3-URQ8jll=CkF;LjviX%f6L}VtI%+Sc-@SkFWlKNxRKvBualn?J v*s}cmH9rfB?kDnS=RfiPKK1Zha-DA@`dKrt)Nab}CWu=T8Ps8ZCRzSJ!|3@a literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/DRXFCD_G.csv b/pandas/tests/io/sas/data/DRXFCD_G.csv new file mode 100644 index 00000000..3fceacd1 --- /dev/null +++ b/pandas/tests/io/sas/data/DRXFCD_G.csv @@ -0,0 +1,7619 @@ +"DRXFDCD","DRXFCSD","DRXFCLD" +1.1e+07,"MILK, HUMAN","Milk, human" +11100000,"MILK, NFS","Milk, NFS" +11111000,"MILK, COW'S, FLUID, WHOLE","Milk, cow's, fluid, whole" +11111100,"MILK, COW'S, FLUID, WHOLE, LOW SODIUM","Milk, cow's, fluid, whole, low-sodium" +11111150,"MILK, CALCIUM FORTIFIED, WHOLE, COW'S, FLUID","Milk, calcium fortified, cow's, fluid, whole" +11111160,"MILK, CALCIUM FORTIFIED, COW'S, FLUID, 1% FAT","Milk, calcium fortified, cow's, fluid, 1% fat" +11111170,"MILK, CALCIUM FORTIFIED, SKIM/NONFAT, COW, FLUID","Milk, calcium fortified, cow's, fluid, skim or nonfat" +11112110,"MILK, COW'S, FLUID, 2% FAT","Milk, cow's, fluid, 2% fat" +11112120,"MILK, COW'S, FLUID, ACIDOPHILUS, 1% FAT","Milk, cow's, fluid, acidophilus, 1% fat" +11112130,"MILK, COW'S, FLUID, ACIDOPHILUS, 2% FAT","Milk, cow's, fluid, acidophilus, 2% fat" +11112210,"MILK, COW'S, FLUID, 1% FAT","Milk, cow's, fluid, 1% fat" +11113000,"MILK, COW'S, FLUID, SKIM OR NONFAT","Milk, cow's, fluid, skim or nonfat, 0.5% or less butterfat" +11114300,"MILK, LOW LACTOSE, 1% FAT","Milk, cow's, fluid, lactose reduced, 1% fat" +11114310,"MILK, LOW LACTOSE, 1% FAT, FORTIFIED WITH CALCIUM","Milk, cow's, fluid, lactose reduced, 1% fat, fortified with calcium" +11114320,"MILK, LOW LACTOSE, NONFAT","Milk, cow's, fluid, lactose reduced, nonfat" +11114321,"MILK, LOW LACTOSE, NONFAT, W/ CALCIUM","Milk, cow's, fluid, lactose reduced, nonfat, fortified with calcium" +11114330,"MILK, COW'S FL LACTOSE REDUCED 2% FAT (LACTAID)","Milk, cow's, fluid, lactose reduced, 2% fat" +11114350,"MILK, COW'S, FLUID, LACTOSE REDUCED, WHOLE","Milk, cow's, fluid, lactose reduced, whole" +11115000,"BUTTERMILK, FLUID (INCLUDE KEFIR MILK)","Buttermilk, fluid, nonfat" +11115100,"BUTTERMILK, FLUID, 1% FAT","Buttermilk, fluid, 1% fat" +11115200,"BUTTERMILK, FLUID, 2% FAT","Buttermilk, fluid, 2% fat" +11115300,"BUTTERMILK, FLUID, WHOLE","Buttermilk, fluid, whole" +11116000,"MILK, GOAT'S, FLUID, WHOLE","Milk, goat's, fluid, whole" +11120000,"MILK, DRY, RECONSTITUTED, NFS","Milk, dry, reconstituted, NFS" +11121100,"MILK, DRY, RECONSTITUTED, WHOLE","Milk, dry, reconstituted, whole" +11121210,"MILK, DRY, RECONSTITUTED, LOWFAT","Milk, dry, reconstituted, lowfat" +11121300,"MILK, DRY, RECONSTITUTED, NONFAT","Milk, dry, reconstituted, nonfat" +11210050,"MILK, EVAPORATED, NS AS TO FAT CONTENT","Milk, evaporated, NS as to fat content (formerly NS as to dilution, used in coffee or tea, assume undiluted)" +11211050,"MILK, EVAPORATED, WHOLE","Milk, evaporated, whole (formerly NS as to dilution, used in coffee or tea)" +11211400,"MILK, EVAPORATED, 2% FAT","Milk, evaporated, 2% fat (formerly NS as to dilution)" +11212050,"MILK, EVAPORATED, SKIM","Milk, evaporated, skim (formerly NS as to dilution, used in coffee or tea)" +11220000,"MILK, CONDENSED, SWEETENED","Milk, condensed, sweetened (formerly NS as to dilution)" +11320000,"MILK, SOY, READY-TO-DRINK, NOT BABY","Milk, soy, ready-to-drink, not baby's" +11320100,"MILK, SOY, LIGHT, READY-TO-DRINK, NOT BABY'S","Milk, soy, light, ready-to-drink, not baby's" +11320200,"MILK, SOY, NONFAT, READY-TO-DRINK, NOT BABY'S","Milk, soy, nonfat, ready-to-drink, not baby's" +11321000,"MILK, SOY, READY-TO-DRINK, NOT BABY'S, CHOCOLATE","Milk, soy, ready-to-drink, not baby's, chocolate" +11321100,"MILK, SOY, LIGHT, READY-TO-DRINK, NOT BABY'S, CHOCOLATE","Milk, soy, light, ready-to-drink, not baby's, chocolate" +11321200,"MILK, SOY, NONFAT, READY-TO-DRINK, NOT BABY'S, CHOCOLATE","Milk, soy, nonfat, ready-to-drink, not baby's, chocolate" +11340000,"MILK,IMITATION,FLUID,NONSOY,SWEETENED,NOT CHOCOLATE","Milk, imitation, fluid, non-soy, sweetened, flavors other than chocolate" +11350000,"MILK, ALMOND, READY-TO-DRINK","Milk, almond, ready-to-drink" +11350010,"MILK, ALMOND, READY-TO-DRINK, CHOCOLATE","Milk, almond, ready-to-drink, chocolate" +11410000,"YOGURT, NS AS TO TYPE OF MILK/FLAVOR","Yogurt, NS as to type of milk or flavor" +11411010,"YOGURT, PLAIN, NS AS TO TYPE OF MILK","Yogurt, plain, NS as to type of milk" +11411100,"YOGURT, PLAIN, WHOLE MILK","Yogurt, plain, whole milk" +11411200,"YOGURT, PLAIN, LOWFAT MILK","Yogurt, plain, lowfat milk" +11411300,"YOGURT, PLAIN, NONFAT MILK","Yogurt, plain, nonfat milk" +11420000,"YOGURT, VANILLA, LEMON, COFFEE, NS AS TO MILK TYPE","Yogurt, vanilla, lemon, or coffee flavor, NS as to type of milk" +11421000,"YOGURT, VANILLA, LEMON, COFFEE, WHOLE MILK","Yogurt, vanilla, lemon, or coffee flavor, whole milk" +11422000,"YOGURT, VANILLA, LEMON, COFFEE, LOWFAT MILK","Yogurt, vanilla, lemon, maple, or coffee flavor, lowfat milk" +11422100,"YOGURT, VANILLA, LEMON, COFFEE, LOWFAT MILK, LOW CAL SWTNR","Yogurt, vanilla, lemon, maple, or coffee flavor, lowfat milk, sweetened with low calorie sweetener" +11423000,"YOGURT, VANILLA, LEMON, COFFEE, NONFAT MILK","Yogurt, vanilla, lemon, maple, or coffee flavor, nonfat milk" +11424000,"YOGURT, VANILLA, LEMON, COFFEE, NONFAT MILK, LOW CAL SWEET","Yogurt, vanilla, lemon, maple, or coffee flavor, nonfat milk, sweetened with low calorie sweetener" +11425000,"YOGURT, CHOCOLATE, NS AS TO TYPE OF MILK","Yogurt, chocolate, NS as to type of milk" +11426000,"YOGURT, CHOCOLATE, WHOLE MILK","Yogurt, chocolate, whole milk" +11427000,"YOGURT, CHOCOLATE, NONFAT MILK","Yogurt, chocolate, nonfat milk" +11430000,"YOGURT, FRUIT VARIETY, NS AS TO MILK TYPE","Yogurt, fruit variety, NS as to type of milk" +11431000,"YOGURT, FRUIT VARIETY, WHOLE MILK","Yogurt, fruit variety, whole milk" +11432000,"YOGURT, FRUIT VARIETY, LOWFAT MILK","Yogurt, fruit variety, lowfat milk" +11432500,"YOGURT, FRUIT VARIETY, LOWFAT MILK, W/ LOW CAL SWEETENER","Yogurt, fruit variety, lowfat milk, sweetened with low-calorie sweetener" +11433000,"YOGURT, FRUIT VARIETY, NONFAT MILK","Yogurt, fruit variety, nonfat milk" +11433500,"YOGURT, FRUITED, NONFAT MILK, LOW CAL SWEETENER","Yogurt, fruit variety, nonfat milk, sweetened with low-calorie sweetener" +11446000,"FRUIT AND LOWFAT YOGURT PARFAIT","Fruit and lowfat yogurt parfait" +11459990,"YOGURT, FROZEN, NS AS TO FLAVOR, NS TO TYPE OF MILK","Yogurt, frozen, NS as to flavor, NS as to type of milk" +11460000,"YOGURT, FROZEN, NOT CHOCOLATE, TYPE OF MILK NS","Yogurt, frozen, flavors other than chocolate, NS as to type of milk" +11460100,"YOGURT, FROZEN, CHOCOLATE, TYPE OF MILK NS","Yogurt, frozen, chocolate, NS as to type of milk" +11460150,"YOGURT, FROZEN, NS AS TO FLAVOR, LOWFAT MILK","Yogurt, frozen, NS as to flavor, lowfat milk" +11460160,"YOGURT, FROZEN, CHOCOLATE, LOWFAT MILK","Yogurt, frozen, chocolate, lowfat milk" +11460170,"YOGURT, FROZEN, NOT CHOCOLATE, LOWFAT MILK","Yogurt, frozen, flavors other than chocolate, lowfat milk" +11460190,"YOGURT, FROZEN, NS AS TO FLAVOR, NONFAT MILK","Yogurt, frozen, NS as to flavor, nonfat milk" +11460200,"YOGURT, FROZEN, CHOCOLATE, NONFAT MILK","Yogurt, frozen, chocolate, nonfat milk" +11460250,"YOGURT,FROZEN,NOT CHOCOLATE,W/ SORBET/SORBET-COATED","Yogurt, frozen, flavors other than chocolate, with sorbet or sorbet-coated" +11460300,"YOGURT, FROZEN, NOT CHOCOLATE, NONFAT MILK","Yogurt, frozen, flavors other than chocolate, nonfat milk" +11460400,"YOGURT,FRZ,CHOCOLATE,NONFAT MILK,W/ LOW-CAL SWEET","Yogurt, frozen, chocolate, nonfat milk, with low-calorie sweetener" +11460410,"YOGURT,FRZ,NOT CHOC,NONFAT MILK,W/ LOW-CAL SWEET","Yogurt, frozen, flavors other than chocolate, nonfat milk, with low-calorie sweetener" +11460420,"YOGURT, FROZEN, NS AS TO FLAVOR, WHOLE MILK","Yogurt, frozen, NS as to flavor, whole milk" +11460430,"YOGURT, FROZEN, CHOCOLATE, WHOLE MILK","Yogurt, frozen, chocolate, whole milk" +11460440,"YOGURT, FROZEN, NOT CHOCOLATE, WHOLE MILK","Yogurt, frozen, flavors other than chocolate, whole milk" +11461000,"YOGURT, FROZEN, CHOCOLATE-COATED","Yogurt, frozen, chocolate-coated" +11461200,"YOGURT, FROZEN, SANDWICH","Yogurt, frozen, sandwich" +11461250,"YOGURT, FROZEN, CONE, CHOCOLATE","Yogurt, frozen, cone, chocolate" +11461260,"YOGURT, FROZEN, CONE, NOT CHOCOLATE","Yogurt, frozen, cone, flavors other than chocolate" +11461270,"YOGURT, FROZEN, CONE, NOT CHOCOLATE, LOWFAT MILK","Yogurt, frozen, cone, flavors other than chocolate, lowfat milk" +11461280,"YOGURT, FROZ, CONE, CHOCOLATE, LOWFAT MILK","Yogurt, frozen, cone, chocolate, lowfat milk" +11480010,"YOGURT, WHOLE MILK, BF","Yogurt, whole milk, baby food" +11480020,"YOGURT, WHOLE MILK, BF, W/FRUIT& MULTIGRAIN CEREAL,NFS","Yogurt, whole milk, baby food, with fruit and multigrain cereal puree, NFS" +11480030,"YOGURT, WHOLE MILK, BF, W/FRUIT&MULTIGRAIN CEREAL + IRON","Yogurt, whole milk, baby food, with fruit and multigrain cereal puree, plus iron" +11480040,"YOGURT, WHOLE MILK, BF, W/FRUIT&MULTIGRAIN CEREAL + DHA","Yogurt, whole milk, baby food, with fruit and multigrain cereal puree, plus DHA" +11511000,"MILK, CHOCOLATE, NFS","Milk, chocolate, NFS" +11511100,"MILK, CHOCOLATE, WHOLE MILK BASED","Milk, chocolate, whole milk-based" +11511200,"MILK, CHOCOLATE, RED FAT, 2%","Milk, chocolate, reduced fat milk-based, 2% (formerly ""lowfat"")" +11511300,"MILK, CHOCOLATE, SKIM MILK BASED","Milk, chocolate, skim milk-based" +11511400,"MILK, CHOCOLATE, LOWFAT MILK BASED","Milk, chocolate, lowfat milk-based" +11512000,"COCOA,HOT CHOCOLATE,NOT FROM DRY MIX, W/WHOLE MILK","Cocoa, hot chocolate, not from dry mix, made with whole milk" +11512500,"HOT CHOCOLATE, P.R., MADE W/ WHOLE MILK","Hot chocolate, Puerto Rican style, made with whole milk" +11512510,"HOT CHOCOLATE, P.R., MADE W/ LOW FAT MILK","Hot chocolate, Puerto Rican style, made with low fat milk" +11513000,"COCOA & SUGAR MIXTURE, MILK ADDED, NS TYPE MILK","Cocoa and sugar mixture, milk added, NS as to type of milk" +11513100,"COCOA & SUGAR MIXTURE, WHOLE MILK ADDED","Cocoa and sugar mixture, whole milk added" +11513150,"COCOA & SUGAR MIXTURE, REDUCED FAT MILK ADDED","Cocoa and sugar mixture, reduced fat milk added" +11513200,"COCOA & SUGAR MIXTURE, LOWFAT MILK ADDED","Cocoa and sugar mixture, lowfat milk added" +11513300,"COCOA & SUGAR MIXTURE, SKIM MILK ADDED","Cocoa and sugar mixture, skim milk added" +11513350,"COCOA AND SUGAR MIXTURE, REDUCED SUGAR, MILK ADDED, NS TYPE","Cocoa and sugar mixture, reduced sugar, milk added, NS as to type of milk" +11513355,"COCOA AND SUGAR MIXTURE, REDUCED SUGAR, WHOLE MILK ADDED","Cocoa and sugar mixture, reduced sugar, whole milk added" +11513360,"COCOA AND SUGAR MIXTURE, REDUCED SUGAR, REDUCED FAT MILK ADD","Cocoa and sugar mixture, reduced sugar, reduced fat milk added" +11513365,"COCOA AND SUGAR MIXTURE, REDUCED SUGAR, LOWFAT MILK ADDED","Cocoa and sugar mixture, reduced sugar, lowfat milk added" +11513370,"COCOA AND SUGAR MIXTURE, REDUCED SUGAR, SKIM MILK ADDED","Cocoa and sugar mixture, reduced sugar, skim milk added" +11513400,"CHOCOLATE SYRUP, MILK ADDED, NS AS TO TYPE OF MILK","Chocolate syrup, milk added, NS as to type of milk" +11513500,"CHOCOLATE SYRUP, WHOLE MILK ADDED","Chocolate syrup, whole milk added" +11513550,"CHOCOLATE SYRUP, RED FAT MILK ADDED","Chocolate syrup, reduced fat milk added" +11513600,"CHOCOLATE SYRUP, LOWFAT MILK ADDED","Chocolate syrup, lowfat milk added" +11513700,"CHOCOLATE SYRUP, SKIM MILK ADDED","Chocolate syrup, skim milk added" +11514100,"COCOA, SUGAR, & DRY MILK MIXTURE, WATER ADDED","Cocoa, sugar, and dry milk mixture, water added" +11514300,"COCOA W/ NF DRY MILK, LO CAL SWEETENER, WATER ADDED","Cocoa with nonfat dry milk and low calorie sweetener, mixture, water added" +11514500,"COCOA W/ WHEY, LO CAL SWEETNR, FORTIFD, WATER ADDED","Cocoa, whey, and low calorie sweetener, mixture, fortified, water added" +11515100,"COCOA & SUGAR W/ MILK, FORTIFIED, PUERTO RICAN","Cocoa and sugar mixture fortified with vitamins and minerals, milk added, NS as to type of milk, Puerto Rican style" +11516000,"COCOA, WHEY, LO CAL SWEETNER MIX, LOWFAT MILK ADDED","Cocoa, whey, and low-calorie sweetener mixture, lowfat milk added" +11518050,"MILK BEV W/NF DRY MILK, LO CAL SWEET,WATER,NOT CHOC","Milk beverage with nonfat dry milk and low calorie sweetener, water added, flavors other than chocolate" +11519000,"MILK BEVERAGE, NOT CHOCOLATE, W/ WHOLE MILK","Milk beverage, made with whole milk, flavors other than chocolate" +11519040,"MILK, FLAVORS OTHER THAN CHOCOLATE, NFS","Milk, flavors other than chocolate, NFS" +11519050,"MILK, NOT CHOCOLATE, WHOLE MILK BASED","Milk, flavors other than chocolate, whole milk-based" +11519105,"MILK, FLAVORS OTHER THAN CHOCOLATE, REDUCED FAT MILK-BASED","Milk, flavors other than chocolate, reduced fat milk-based" +11519200,"MILK, FLAVORS OTHER THAN CHOCOLATE, LOWFAT MILK-BASED","Milk, flavors other than chocolate, lowfat milk-based" +11519205,"MILK, FLAVORS OTHER THAN CHOCOLATE, SKIM-MILK BASED","Milk, flavors other than chocolate, skim-milk based" +11520000,"MILK, MALTED, UNFORTIFIED, FLAVOR NS","Milk, malted, unfortified, NS as to flavor, made with milk" +11521000,"MILK, MALTED, UNFORTIFIED, CHOCOLATE FLAVOR","Milk, malted, unfortified, chocolate, made with milk" +11522000,"MILK, MALTED, UNFORTIFIED, NATURAL FLAVOR","Milk, malted, unfortified, natural flavor, made with milk" +11525000,"MILK,MALTED,FORTIFIED,NATURAL FLAVOR (INCL OVALTINE","Milk, malted, fortified, natural flavor, made with milk" +11526000,"MILK, MALTED, FORTIFIED, CHOCOLATE (INCL OVALTINE)","Milk, malted, fortified, chocolate, made with milk" +11527000,"MILK, MALTED, FORTIFIED, (INCL OVALTINE)","Milk, malted, fortified, NS as to flavor, made with milk" +11531000,"EGGNOG, MADE W/ WHOLE MILK (INCLUDE EGG NOG, NFS)","Eggnog, made with whole milk" +11531500,"EGGNOG, MADE W/ 2% REDUCED FAT MILK","Eggnog, made with 2% reduced fat milk (formerly eggnog, made with ""2% lowfat"" milk)" +11541000,"MILK SHAKE, NS AS TO FLAVOR OR TYPE","Milk shake, NS as to flavor or type" +11541100,"MILK SHAKE,HOMEMADE/ FOUNTAIN-TYPE, NS AS TO FLAVOR","Milk shake, homemade or fountain-type, NS as to flavor" +11541110,"MILK SHAKE, HOMEMADE OR FOUNTAIN-TYPE, CHOCOLATE","Milk shake, homemade or fountain-type, chocolate" +11541120,"MILK SHAKE, HOMEMADE/FOUNTAIN-TYPE, NOT CHOCOLATE","Milk shake, homemade or fountain-type, flavors other than chocolate" +11541400,"MILK SHAKE WITH MALT (INCL MALTED MILK W/ICE CREAM)","Milk shake with malt" +11541500,"MILK SHAKE, MADE W/ SKIM MILK, CHOCOLATE","Milk shake, made with skim milk, chocolate" +11541510,"MILK SHAKE,MADE W/ SKIM MILK, NOT CHOCOLATE","Milk shake, made with skim milk, flavors other than chocolate" +11542000,"CARRY-OUT MILK SHAKE, NS AS TO FLAVOR","Carry-out milk shake, NS as to flavor" +11542100,"CARRY-OUT MILK SHAKE, CHOCOLATE","Carry-out milk shake, chocolate" +11542200,"CARRY-OUT MILK SHAKE, NOT CHOCOLATE","Carry-out milk shake, flavors other than chocolate" +11551050,"MILK FRUIT DRINK (INCL LICUADO)","Milk fruit drink" +11552200,"ORANGE JULIUS","Orange Julius" +11553000,"FRUIT SMOOTHIE DRINK, W/ FRUIT OR JUICE & DAIRY PRODUCTS","Fruit smoothie drink, made with fruit or fruit juice and dairy products" +11553100,"FRUIT SMOOTHIE DRINK, NFS","Fruit smoothie drink, NFS" +11560000,"CHOC-FLAVORED DRINK, WHEY-&MILK-BASED(INCL YOO-HOO)","Chocolate-flavored drink, whey- and milk-based" +11560020,"MILK DRINK, WHEY&MILK-BASE, NOT CHOC (INCL YOO-HOO)","Flavored milk drink, whey- and milk-based, flavors other than chocolate" +11561000,"CAFE CON LECHE","Cafe con leche" +11561010,"CAFE CON LECHE PREPARED W/ SUGAR","Cafe con leche prepared with sugar" +11710000,"INFANT FORMULA, NFS","Infant formula, NFS" +11710050,"SIMILAC EXPERT CARE ALIMENTUM, INFANT FORMULA, NS AS TO FORM","Similac Expert Care Alimentum, infant formula, NS as to form" +11710051,"SIMILAC EXPERT CARE ALIMENTUM, INFANT FORMULA, READY-TO-FEED","Similac Expert Care Alimentum, infant formula, ready-to-feed" +11710053,"SIMILAC EXPERT CARE ALIMENTUM,INF FORM,PREP FR PDR,WATER NFS","Similac Expert Care Alimentum, infant formula, prepared from powder, made with water, NFS" +11710054,"SIMILAC EXPERT CARE ALIMENTUM,INF FORM,PREP FR PDR,TAP WATER","Similac Expert Care Alimentum, infant formula, prepared from powder, made with tap water" +11710055,"SIMILAC EXPERT CARE ALIMENTUM, INF FORM, FR PDR, BTL WATER","Similac Expert Care Alimentum, infant formula, prepared from powder, made with plain bottled water" +11710056,"SIMILAC EXPERT CARE ALIMENTUM,INF FORM,PREP FR PDR,BABY WATR","Similac Expert Care Alimentum, infant formula, prepared from powder, made with baby water" +11710350,"SIMILAC ADVANCE, INFANT FORMULA, NS AS TO FORM","Similac Advance, infant formula, NS as to form" +11710351,"SIMILAC ADVANCE, INFANT FORMULA, READY-TO-FEED","Similac Advance, infant formula, ready-to-feed" +11710352,"SIMILAC ADVANCE, INF FORMULA, PREP FRM LIQ CONC, W/WATER,NFS","Similac Advance, infant formula, prepared from liquid concentrate, made with water, NFS" +11710353,"SIMILAC ADVANCE, INFANT FORMULA, PREP FRM PDR, W/WATER NFS","Similac Advance, infant formula, prepared from powder, made with water, NFS" +11710354,"SIMILAC ADVANCE, INF FORMULA, PREP FRM LIQ CONC, W/TAP WATER","Similac Advance, infant formula, prepared from liquid concentrate, made with tap water" +11710355,"SIMILAC ADVANCE, INF FORMULA, PREP FRM LIQ CONC, W/BOT WATER","Similac Advance, infant formula, prepared from liquid concentrate, made with plain bottled water" +11710356,"SIMILAC ADVANCE, INF FORMULA, PREP FR LIQ CONC, W/ BABY WATR","Similac Advance, infant formula, prepared from liquid concentrate, made with baby water" +11710357,"SIMILAC ADVANCE, INFANT FORMULA, PREP FRM PDR, W/TAP WATER","Similac Advance, infant formula, prepared from powder, made with tap water" +11710358,"SIMILAC ADVANCE, INFANT FORMULA, PREP FRM PDR, W/BOT WATER","Similac Advance, infant formula, prepared from powder, made with plain bottled water" +11710359,"SIMILAC ADVANCE, INFANT FORMULA, PREP FRM PDR, W/BABY WATER","Similac Advance, infant formula, prepared from powder, made with baby water" +11710360,"SIMILAC ADVANCE ORGANIC, INFANT FORMULA, NS AS TO FORM","Similac Advance Organic, infant formula, NS as to form" +11710361,"SIMILAC ADVANCE ORGANIC, INFANT FORMULA, READY-TO-FEED","Similac Advance Organic, infant formula, ready-to-feed" +11710363,"SIMILAC ADVANCE ORGANIC,INF FORM,PREP FR PDR,W/WATER,NFS","Similac Advance Organic, infant formula, prepared from powder, made with water, NFS" +11710367,"SIMILAC ADVANCE ORGANIC,INF FORM,PREP FR PDR,W/TAP WATER","Similac Advance Organic, infant formula, prepared from powder, made with tap water" +11710368,"SIMILAC ADVANCE ORGANIC,INF FORM,PREP FR PDR,W/BOT WATER","Similac Advance Organic, infant formula, prepared from powder, made with plain bottled water" +11710369,"SIMILAC ADVANCE ORGANIC,INF FORM,PREP FR PDR,W/BABY WATER","Similac Advance Organic, infant formula, prepared from powder, made with baby water" +11710370,"SIMILAC SENSITIVE, INFANT FORMULA, NS AS TO FORM","Similac Sensitive, infant formula, NS as to form" +11710371,"SIMILAC SENSITIVE, INFANT FORMULA, READY-TO-FEED","Similac Sensitive, infant formula, ready-to-feed" +11710372,"SIMILAC SENSITIVE, INF FORM, PREP FRM LIQ CONC, W/WATER,NFS","Similac Sensitive, infant formula, prepared from liquid concentrate, made with water, NFS" +11710373,"SIMILAC SENSITIVE, INF FORM, PREP FRM PDR, W/ WATER,NFS","Similac Sensitive, infant formula, prepared from powder, made with water, NFS" +11710374,"SIMILAC SENSITIVE, INF FORM, PREP FRM LIQ CONC, W/TAP WATER","Similac Sensitive, infant formula, prepared from liquid concentrate, made with tap water" +11710375,"SIMILAC SENSITIVE, INF FORM, PREP FRM LIQ CONC, W/BOT WATER","Similac Sensitive, infant formula, prepared from liquid concentrate, made with plain bottled water" +11710376,"SIMILAC SENSITIVE, INF FORM, PREP FRM LIQ CONC, W/BABY WATER","Similac Sensitive, infant formula, prepared from liquid concentrate, made with baby water" +11710377,"SIMILAC SENSITIVE, INF FORM, PREP FRM PDR, W/TAP WATER","Similac Sensitive, infant formula, prepared from powder, made with tap water" +11710378,"SIMILAC SENSITIVE, INF FORM, PREP FRM PDR, W/ BOT WATER","Similac Sensitive, infant formula, prepared from powder, made with plain bottled water" +11710379,"SIMILAC SENSITIVE, INF FORM, PREP FRM PDR, W/ BABY WATER","Similac Sensitive, infant formula, prepared from powder, made with baby water" +11710380,"SIMILAC SENSITIVE FOR SPIT-UP, INFANT FORMULA, NS AS TO FORM","Similac Sensitive for Spit-Up, infant formula, NS as to form" +11710381,"SIMILAC SENSITIVE FOR SPIT-UP, INFANT FORMULA, READY-TO-FEED","Similac Sensitive for Spit-Up, infant formula, ready-to-feed" +11710383,"SIMILAC SENSITIVE SPIT-UP,INF FORM, FR PDR, W/ WATER, NFS","Similac Sensitive for Spit-Up, infant formula, prepared from powder, made with water, NFS" +11710387,"SIMILAC SENSITIVE SPIT-UP,INF FORM,PREP FR PDR,W/TAP WATER","Similac Sensitive for Spit-Up, infant formula, prepared from powder, made with tap water" +11710388,"SIMILAC SENSITIVE SPIT-UP,INF FORM,PREP FR PDR,W/BOT WATER","Similac Sensitive for Spit-Up, infant formula, prepared from powder, made with plain bottled water" +11710389,"SIMILAC SENSITIVE SPIT-UP,INF FORM,PREP FR PDR,W/BABY WATER","Similac Sensitive for Spit-Up, infant formula, prepared from powder, made with baby water" +11710470,"SIMILAC EXPERT CARE NEOSURE, INFANT FORMULA, NS AS TO FORM","Similac Expert Care NeoSure, infant formula, NS as to form" +11710471,"SIMILAC EXPERT CARE NEOSURE, INFANT FORMULA, READY-TO-FEED","Similac Expert Care NeoSure, infant formula, ready-to-feed" +11710473,"SIMILAC EXPERT CARE NEOSURE,INF FORM,PREP FR PDR,W/WATER,NFS","Similac Expert Care NeoSure, infant formula, prepared from powder, made with water, NFS" +11710477,"SIMILAC EXPERT CARE NEOSURE,INF FORM,PREP FR PDR,W/TAP WATER","Similac Expert Care NeoSure, infant formula, prepared from powder, made with tap water" +11710478,"SIMILAC EXPERT CARE NEOSURE,INF FORM,PREP FR PDR,W/BOT WATER","Similac Expert Care NeoSure, infant formula, prepared from powder, made with plain bottled water" +11710479,"SIMILAC EXPERT CARE NEOSURE,INF FORM,PREP FR PDR,W/BABY WAT","Similac Expert Care NeoSure, infant formula, prepared from powder, made with baby water" +11710480,"SIMILAC GO AND GROW, INFANT FORMULA, NS AS TO FORM","Similac Go and Grow, infant formula, NS as to form" +11710481,"SIMILAC GO AND GROW,INF FORM,PREP FR PDR,W/WATER,NFS","Similac Go and Grow, infant formula, prepared from powder, made with water, NFS" +11710482,"SIMILAC GO AND GROW,INF FORM,PREP FR PDR,W/TAP WATER","Similac Go and Grow, infant formula, prepared from powder, made with tap water" +11710483,"SIMILAC GO AND GROW,INF FORM,PREP FR PDR,W/BOT WATER","Similac Go and Grow, infant formula, prepared from powder, made with plain bottled water" +11710484,"SIMILAC GO AND GROW,INF FORM,PREP FR PDR,W/BABY WATER","Similac Go and Grow, infant formula, prepared from powder, made with baby water" +11710620,"ENFAMIL PREMIUM NEWBORN, INFANT FORMULA, NS AS TO FORM","Enfamil PREMIUM Newborn, infant formula, NS as to form" +11710621,"ENFAMIL PREMIUM NEWBORN, INFANT FORMULA, READY-TO-FEED","Enfamil PREMIUM Newborn, infant formula, ready-to-feed" +11710626,"ENFAMIL PREMIUM INFANT, INF FORM, PREP FRO PDR, WATER NFS","Enfamil PREMIUM Newborn, infant formula, prepared from powder, made with water, NFS" +11710627,"ENFAMIL PREMIUM NEWBORN, INFANT FORMULA, PREP FRM PDR,TAP","Enfamil PREMIUM Newborn, infant formula, prepared from powder, made with tap water" +11710628,"ENFAMIL PREMIUM NEWBORN, INF FORM, PREP FRM PDR,BOTTLE WATER","Enfamil PREMIUM Newborn, infant formula, prepared from powder, made with plain bottled water" +11710629,"ENFAMIL PREMIUM NEWBORN, INFANT FORMULA, PREP FRM PDR, BABY","Enfamil PREMIUM Newborn, infant formula, prepared from powder, made with baby water" +11710630,"ENFAMIL PREMIUM INFANT, INFANT FORMULA, NS AS TO FORM","Enfamil PREMIUM Infant, infant formula, NS as to form" +11710631,"ENFAMIL PREMIUM INFANT, INFANT FORMULA, READY-TO-FEED","Enfamil PREMIUM Infant, infant formula, ready-to-feed" +11710632,"ENFAMIL PREMIUM INFANT, INF FORM, PREP FRM LIQ CONC,WATER NF","Enfamil PREMIUM Infant, infant formula, prepared from liquid concentrate, made with water, NFS" +11710633,"ENFAMIL PREMIUM INFANT, INF FORM, PREP FRM LIQ CONC,TAP WATE","Enfamil PREMIUM Infant, infant formula, prepared from liquid concentrate, made with tap water" +11710634,"ENFAMIL PREMIUM INFANT, INF FORM, PREP FRM LIQ CONC,BOT WATE","Enfamil PREMIUM Infant, infant formula, prepared from liquid concentrate, made with plain bottled water" +11710635,"ENFAMIL PREMIUM INFANT, INF FORM, PREP FRM LIQ CONC, BABY","Enfamil PREMIUM Infant, infant formula, prepared from liquid concentrate, made with baby water" +11710636,"ENFAMIL PREMIUM INFANT, INF FORM, PREP FRM PDR, WATER NFS","Enfamil PREMIUM Infant, infant formula, prepared from powder, made with water, NFS" +11710637,"ENFAMIL PREMIUM INFANT, INF FORM, PREP FRM PDR, TAP WATER","Enfamil PREMIUM Infant, infant formula, prepared from powder, made with tap water" +11710638,"ENFAMIL PREMIUM INFANT, INF FORM, PREP FRM PDR, BOT WATER","Enfamil PREMIUM Infant, infant formula, prepared from powder, made with plain bottled water" +11710639,"ENFAMIL PREMIUM INFANT, INF FORM, PREP FRM PDR, BABY WATER","Enfamil PREMIUM Infant, infant formula, prepared from powder, made with baby water" +11710640,"ENFAMIL PREMIUM LIPIL, INFANT FORMULA, NS AS TO FORM","Enfamil PREMIUM LIPIL, infant formula, NS as to form" +11710642,"ENFAMIL PREMIUM LIPIL,INF FORM,PREP FR LIQ CONC,W/WATER,NFS","Enfamil PREMIUM LIPIL, infant formula, prepared from liquid concentrate, made with water, NFS" +11710643,"ENFAMIL PREMIUM LIPIL,INF FORM,PREP FR PDR,W/WATER,NFS","Enfamil PREMIUM LIPIL, infant formula, prepared from powder, made with water, NFS" +11710644,"ENFAMIL PREMIUM LIPIL,INF FORM,PREP FR LIQ CONC,W/TAP WATER","Enfamil PREMIUM LIPIL, infant formula, prepared from liquid concentrate, made with tap water" +11710645,"ENFAMIL PREMIUM LIPIL,INF FORM,PREP FR LIQ CONC,W/BOT WATER","Enfamil PREMIUM LIPIL, infant formula, prepared from liquid concentrate, made with plain bottled water" +11710646,"ENFAMIL PREMIUM LIPIL,INF FORM,PREP FR LIQ CONC,W/BABY WATER","Enfamil PREMIUM LIPIL, infant formula, prepared from liquid concentrate, made with baby water" +11710647,"ENFAMIL PREMIUM LIPIL,INF FORM,PREP FR PDR,W/TAP WATER","Enfamil PREMIUM LIPIL, infant formula, prepared from powder, made with tap water" +11710648,"ENFAMIL PREMIUM LIPIL,INF FORM,PREP FR PDR,W/BOT WATER","Enfamil PREMIUM LIPIL, infant formula, prepared from powder, made with plain bottled water" +11710649,"ENFAMIL PREMIUM LIPIL,INF FORM,PREP FR PDR,W/BABY WATER","Enfamil PREMIUM LIPIL, infant formula, prepared from powder, made with baby water" +11710650,"ENFAMIL LIPIL, INFANT FORMULA, NS AS TO FORM","Enfamil LIPIL, infant formula, NS as to form" +11710651,"ENFAMIL LIPIL, INFANT FORMULA, READY-TO-FEED","Enfamil LIPIL, infant formula, ready-to-feed" +11710652,"ENFAMIL LIPIL, INFANT FORMULA, PREP FR LIQ CONC, W/WATER,NFS","Enfamil LIPIL, infant formula, prepared from liquid concentrate, made with water, NFS" +11710653,"ENFAMIL LIPIL, INFANT FORMULA, PREP FRM PDR, W/WATER,NFS","Enfamil LIPIL, infant formula, prepared from powder, made with water, NFS" +11710654,"ENFAMIL LIPIL, INFANT FORMULA, PREP FR LIQ CONC, W/TAP WATER","Enfamil LIPIL, infant formula, prepared from liquid concentrate, made with tap water" +11710655,"ENFAMIL LIPIL, INFANT FORMULA, PREP FR LIQ CONC, W/BOT WATER","Enfamil LIPIL, infant formula, prepared from liquid concentrate, made with plain bottled water" +11710656,"ENFAMIL LIPIL, INFANT FORMULA, PREP FR LIQ CONC, W/BABY WAT","Enfamil LIPIL, infant formula, prepared from liquid concentrate, made with baby water" +11710657,"ENFAMIL LIPIL, INFANT FORMULA, PREP FRM PDR, W/TAP WATER","Enfamil LIPIL, infant formula, prepared from powder, made with tap water" +11710658,"ENFAMIL LIPIL, INFANT FORMULA, PREP FRM PDR, W/BOT WATER","Enfamil LIPIL, infant formula, prepared from powder, made with plain bottled water" +11710659,"ENFAMIL LIPIL, INFANT FORMULA, PREP FRM PDR, W/BABY WATER","Enfamil LIPIL, infant formula, prepared from powder, made with baby water" +11710660,"ENFAMIL A.R. LIPIL, INFANT FORMULA, NS AS TO FORM","Enfamil A.R. Lipil, infant formula, NS as to form" +11710661,"ENFAMIL A.R. LIPIL, INFANT FORMULA, READY-TO-FEED","Enfamil A.R. Lipil, infant formula, ready-to-feed" +11710663,"ENFAMIL A.R., INFANT FORMULA, PREP FR PDR, W/WATER, NFS","Enfamil A.R. LIPIL, infant formula, prepared from powder, made with water, NFS" +11710664,"ENFAMIL A.R., INFANT FORMULA, PREP FR PDR, W/TAP WATER","Enfamil A.R. LIPIL, infant formula, prepared from powder, made with tap water" +11710665,"ENFAMIL ENFACARE LIPIL, INFANT FORMULA, NS AS TO FORM","Enfamil EnfaCare LIPIL, infant formula, NS as to form" +11710666,"ENFAMIL ENFACARE LIPIL, INFANT FORMULA, READY-TO-FEED","Enfamil EnfaCare LIPIL, infant formula, ready-to-feed" +11710667,"ENFAMIL ENFACARE LIPIL, INF FORM, PREP FR PDR, W/ WATER,NFS","Enfamil EnfaCare LIPIL, infant formula, prepared from powder, made with water, NFS" +11710668,"ENFAMIL A.R., INFANT FORMULA, PREP FR PDR, W/BOT WATER","Enfamil A.R. LIPIL, infant formula, prepared from powder, made with plain bottled water" +11710669,"ENFAMIL A.R., INFANT FORMULA, PREP FR PDR, W/BABY WATER","Enfamil A.R. LIPIL, infant formula, prepared from powder, made with baby water" +11710670,"ENFAMIL GENTLEASE LIPIL, INFANT FORMULA, NS AS TO FORM","Enfamil Gentlease LIPIL, infant formula, NS as to form" +11710671,"ENFAMIL GENTLEASE LIPIL, INFANT FORMULA, READY-TO-FEED","Enfamil Gentlease LIPIL, infant formula, ready-to-feed" +11710673,"ENFAMIL GENTLEASE LIPIL, INF FORM, PREP FRM PDR, W/WATER,NFS","Enfamil Gentlease LIPIL, infant formula, prepared from powder, made with water, NFS" +11710674,"ENFAMIL ENFACARE LIPIL, INF FORM, PREP FR PDR, W/ TAP WATER","Enfamil EnfaCare LIPIL, infant formula, prepared from powder, made with tap water" +11710675,"ENFAMIL ENFACARE LIPIL, INF FORM, PREP FR PDR, W/ BOT WATER","Enfamil EnfaCare LIPIL, infant formula, prepared from powder, made with plain bottled water" +11710676,"ENFAMIL ENFACARE LIPIL, INF FORM, PREP FR PDR, W/BABY WATER","Enfamil EnfaCare LIPIL, infant formula, prepared from powder, made with baby water" +11710677,"ENFAMIL GENTLEASE LIPIL, INF FORM, PREP FRM PDR, W/TAP WATER","Enfamil Gentlease LIPIL, infant formula, prepared from powder, made with tap water" +11710678,"ENFAMIL GENTLEASE LIPIL, INF FORM, PREP FRM PDR, W/BOT WATER","Enfamil Gentlease LIPIL, infant formula, prepared from powder, made with plain bottled water" +11710679,"ENFAMIL GENTLEASE LIPIL, INF FORM, PREP FRM PDR, W/ BABY WAT","Enfamil Gentlease LIPIL, infant formula, prepared from powder, made with baby water" +11710680,"ENFAMIL ENFAGROW PREM NEXT STEP LIPIL , INF FORMULA, NS FORM","Enfamil Enfagrow PREMIUM Next Step LIPIL, infant formula, NS as to form" +11710681,"ENFAMIL ENFAGROW PREM NEXT STEP, INF FORMULA, RTF","Enfamil Enfagrow PREMIUM Next Step LIPIL, infant formula, ready-to-feed" +11710683,"ENFAMIL ENFAGROW PREM NEXT STEP,INF FORMULA,PDR,W/WATER,NFS","Enfamil Enfagrow PREMIUM Next Step LIPIL, infant formula, prepared from powder, made with water, NFS" +11710687,"ENFAMIL ENFAGROW PREM NEXT STEP,INF FORMULA,PDR,W/TAP WATER","Enfamil Enfagrow PREMIUM Next Step LIPIL, infant formula, prepared from powder, made with tap water" +11710688,"ENFAMIL ENFAGROW PREM NEXT STEP,INF FORMULA,PDR,W/BOT WATER","Enfamil Enfagrow PREMIUM Next Step LIPIL, infant formula, prepared from powder, made with plain bottled water" +11710689,"ENFAMIL ENFAGROW PREM NEXT STEP,INF FORMULA,PDR,W/BABY WATER","Enfamil Enfagrow PREMIUM Next Step LIPIL, infant formula, prepared from powder, made with baby water" +11710690,"ENFAMIL GENTLEASE NEXT STEP LIPIL, INFANT FORMULA, NS FORM","Enfamil Gentlease Next Step LIPIL, infant formula, NS as to form" +11710693,"ENFAMIL GENTLEASE NEXT STEP,INF FORM,PREP FR PDR,W/WATER,NFS","Enfamil Gentlease Next Step LIPIL, infant formula, prepared from powder, made with water, NFS" +11710697,"ENFAMIL GENTLEASE NEXT STEP,INF FORM,PREP FR PDR,W/TAP WATER","Enfamil Gentlease Next Step LIPIL, infant formula, prepared from powder, made with tap water" +11710698,"ENFAMIL GENTLEASE NEXT STEP,INF FORM,PREP FR PDR,W/BOT WATER","Enfamil Gentlease Next Step LIPIL, infant formula, prepared from powder, made with plain bottled water" +11710699,"ENFAMIL GENTLEASE NEXT STEP,INF FORM,PREP FR PDR,W/BABY WAT","Enfamil Gentlease Next Step LIPIL, infant formula, prepared from powder, made with baby water" +11710800,"PEDIASURE, INFANT FORMULA, NS AS TO FORM","Pediasure, infant formula, NS as to form" +11710801,"PEDIASURE,INFANT FORMULA, READY-TO-FEED","Pediasure, infant formula, ready-to-feed" +11710805,"PEDIASURE FIBER, INFANT FORMULA, NS AS TO FORM","Pediasure Fiber, infant formula, NS as to form" +11710806,"PEDIASURE FIBER, INFANT FORMULA, READY-TO-FEED","Pediasure Fiber, infant formula, ready-to-feed" +11710910,"GERBER GOOD START GENTLE PLUS, INF FORM, NS FORM","Gerber Good Start Gentle Plus, infant formula, NS as to form" +11710911,"GERBER GOOD START GENTLE PLUS, INFANT FORMULA, READY-TO-FEED","Gerber Good Start Gentle Plus, infant formula, ready-to-feed" +11710912,"GERBER GOOD START GENTLE PLUS,PREP FRM LIQ CONC,W/WATER,NFS","Gerber Good Start Gentle Plus, infant formula, prepared from liquid concentrate, made with water, NFS" +11710913,"GERBER GOOD START GENTLE PLUS, PREP FRM PDR,W/WATER,NFS","Gerber Good Start Gentle Plus, infant formula, prepared from powder, made with water, NFS" +11710914,"GERBER GOOD START GENTLE PLUS,PREP FRM LIQ CONC,W/TAP WATER","Gerber Good Start Gentle Plus, infant formula, prepared from liquid concentrate, made with tap water" +11710915,"GERBER GOOD START GENTLE PLUS,PREP FRM LIQ CONC,W/BOT WATER","Gerber Good Start Gentle Plus, infant formula, prepared from liquid concentrate, made with plain bottled water" +11710916,"GERBER GOOD START GENTLE PLUS,PREP FRM LIQ CONC,W/BABY WATER","Gerber Good Start Gentle Plus, infant formula, prepared from liquid concentrate, made with baby water" +11710917,"GERBER GOOD START GENTLE PLUS, PREP FRM PDR,W/TAP WATER","Gerber Good Start Gentle Plus, infant formula, prepared from powder, made with tap water" +11710918,"GERBER GOOD START GENTLE PLUS, PREP FRM PDR,W/BOT WATER","Gerber Good Start Gentle Plus, infant formula, prepared from powder, made with plain bottled water" +11710919,"GERBER GOOD START GENTLE PLUS, PREP FRM PDR,W/BABY WATER","Gerber Good Start Gentle Plus, infant formula, prepared from powder, made with baby water" +11710920,"GERBER GOOD START PROTECT PLUS, INFANT FORMULA, NS FORM","Gerber Good Start Protect Plus, infant formula, NS as to form" +11710923,"GERBER GOOD START PROTECT PLUS, PREP FRM PDR,W/WATER,NFS","Gerber Good Start Protect Plus, infant formula, prepared from powder, made with water, NFS" +11710927,"GERBER GOOD START PROTECT PLUS, PREP FRM PDR,W/TAP WATER","Gerber Good Start Protect Plus, infant formula, prepared from powder, made with tap water" +11710928,"GERBER GOOD START PROTECT PLUS, PREP FRM PDR,W/BOT WATER","Gerber Good Start Protect Plus, infant formula, prepared from powder, made with plain bottled water" +11710929,"GERBER GOOD START PROTECT PLUS, PREP FRM PDR,W/BABY WATER","Gerber Good Start Protect Plus, infant formula, prepared from powder, made with baby water" +11710930,"GERBER GOOD START 2 GENTLE PLUS, INFANT FORMULA, NS FORM","Gerber Good Start 2 Gentle Plus, infant formula, NS as to form" +11710933,"GERBER GOOD START 2 GENTLE PLUS, PREP FRM PDR,W/WATER,NFS","Gerber Good Start 2 Gentle Plus, infant formula, prepared from powder, made with water, NFS" +11710937,"GERBER GOOD START 2 GENTLE PLUS, PREP FRM PDR,W/TAP WATER","Gerber Good Start 2 Gentle Plus, infant formula, prepared from powder, made with tap water" +11710938,"GERBER GOOD START 2 GENTLE PLUS, PREP FRM PDR,W/BOT WATER","Gerber Good Start 2 Gentle Plus, infant formula, prepared from powder, made with plain bottled water" +11710939,"GERBER GOOD START 2 GENTLE PLUS, PREP FRM PDR,W/BABY WATER","Gerber Good Start 2 Gentle Plus, infant formula, prepared from powder, made with baby water" +11710940,"GERBER GOOD START 2 PROTECT PLUS, INFANT FORMULA, NS FORM","Gerber Good Start 2 Protect Plus, infant formula, NS as to form" +11710943,"GERBER GOOD START 2 PROTECT PLUS, PREP FRM PDR,W/WATER,NFS","Gerber Good Start 2 Protect Plus, infant formula, prepared from powder, made with water, NFS" +11710947,"GERBER GOOD START 2 PROTECT PLUS, PREP FRM PDR,W/TAP WATER","Gerber Good Start 2 Protect Plus, infant formula, prepared from powder, made with tap water" +11710948,"GERBER GOOD START 2 PROTECT PLUS, PREP FRM PDR,W/BOT WATER","Gerber Good Start 2 Protect Plus, infant formula, prepared from powder, made with plain bottled water" +11710949,"GERBER GOOD START 2 PROTECT PLUS, PREP FRM PDR,W/BABY WATER","Gerber Good Start 2 Protect Plus, infant formula, prepared from powder, made with baby water" +11710960,"AMERICA'S STORE BRAND, INFANT FORMULA, NS AS TO FORM","America's Store Brand, infant formula, NS as to form" +11710961,"AMERICA'S STORE BRAND,INF FORM,PREP FRM LIQ CONC,W/WATER,NFS","America's Store Brand, infant formula, prepared from liquid concentrate, made with water, NFS" +11710962,"AMERICA'S STORE BRAND,INF FORM,PREP FRM PDR,W/ WATER, NFS","America's Store Brand, infant formula, prepared from powder, made with water, NFS" +11710963,"AMERICA'S STORE BRAND, INFANT FORMULA, READY-TO-FEED","America's Store Brand, infant formula, ready-to-feed" +11710964,"AMERICA'S STORE BRAND,INF FORM,PREP FRM LIQ CONC,W/TAP WATER","America's Store Brand, infant formula, prepared from liquid concentrate, made with tap water" +11710965,"AMERICA'S STORE BRAND,INF FORM,PREP FRM LIQ CONC,W/BOT WATER","America's Store Brand, infant formula, prepared from liquid concentrate, made with plain bottled water" +11710966,"AMERICA'S STORE BRAND,INF FORM,PREP FRM LIQ CONC,W/BABY WATR","America's Store Brand, infant formula, prepared from liquid concentrate, made with baby water" +11710967,"AMERICA'S STORE BRAND,INF FORM,PREP FRM PDR,W/ TAP WATER","America's Store Brand, infant formula, prepared from powder, made with tap water" +11710968,"AMERICA'S STORE BRAND,INF FORM,PREP FRM PDR,W/ BOT WATER","America's Store Brand, infant formula, prepared from powder, made with plain bottled water" +11710969,"AMERICA'S STORE BRAND,INF FORM,PREP FRM PDR,W/ BABY WATER","America's Store Brand, infant formula, prepared from powder, made with baby water" +11720310,"ENFAMIL PROSOBEE LIPIL, INFANT FORMULA, NS AS TO FORM","Enfamil ProSobee LIPIL, infant formula, NS as to form" +11720311,"ENFAMIL PROSOBEE LIPIL, INFANT FORMULA, READY-TO-FEED","Enfamil ProSobee Lipil, infant formula, ready-to-feed" +11720312,"ENFAMIL PROSOBEE LIPIL,INF FORM, FR LIQ CONC,W/ WATER,NFS","Enfamil ProSobee LIPIL, infant formula, prepared from liquid concentrate, made with water, NFS" +11720313,"ENFAMIL PROSOBEE LIPIL,INF FORM,PREP FR PDR, W/WATER, NFS","Enfamil ProSobee LIPIL, infant formula, prepared from powder, made with water, NFS" +11720314,"ENFAMIL PROSOBEE LIPIL,INF FORM,PREP FR LIQ CONC,W/ TAP WATE","Enfamil ProSobee LIPIL, infant formula, prepared from liquid concentrate, made with tap water" +11720315,"ENFAMIL PROSOBEE LIPIL,INF FORM,PREP FR LIQ CONC,W/ BOT WATE","Enfamil ProSobee LIPIL, infant formula, prepared from liquid concentrate, made with plain bottled water" +11720316,"ENFAMIL PROSOBEE LIPIL,INF FORM,PREP FR LIQ CONC,W/ BABY WAT","Enfamil ProSobee LIPIL, infant formula, prepared from liquid concentrate, made with baby water" +11720317,"ENFAMIL PROSOBEE LIPIL,INF FORM,PREP FR PDR, W/TAP WATER","Enfamil ProSobee LIPIL, infant formula, prepared from powder, made with tap water" +11720318,"ENFAMIL PROSOBEE LIPIL,INF FORM,PREP FR PDR, W/BOT WATER","Enfamil ProSobee LIPIL, infant formula, prepared from powder, made with plain bottled water" +11720319,"ENFAMIL PROSOBEE LIPIL,INF FORM,PREP FR PDR, W/BABY WATER","Enfamil ProSobee LIPIL, infant formula, prepared from powder, made with baby water" +11720320,"ENFAMIL ENFAGROW SOY NEXT STEP LIPIL, INF FORMULA, NS FORM","Enfamil Enfagrow Soy Next Step LIPIL, infant formula, NS as to form" +11720323,"ENFAGROW SOY NEXT STEP LIPIL, INF FOR,FR PDR,W/WATER,NFS","Enfamil Enfagrow Soy Next Step LIPIL, infant formula, prepared from powder, made with water, NFS" +11720327,"ENFAGROW SOY NEXT STEP LIPIL,INF FORM,PREP FR PDR,TAP WATER","Enfamil Enfagrow Soy Next Step LIPIL, infant formula, prepared from powder, made with tap water" +11720328,"ENFAGROW SOY NEXT STEP LIPIL,INF FORM,PREP FR PDR, BOT WATER","Enfamil Enfagrow Soy Next Step LIPIL, infant formula, prepared from powder, made with plain bottled water" +11720329,"ENFAGROW SOY NEXT STEP LIPIL,INF FORM,PREP FR PDR,BABY WATER","Enfamil Enfagrow Soy Next Step LIPIL, infant formula, prepared from powder, made with baby water" +11720410,"SIMILAC SENSITIVE ISOMIL SOY, INFANT FORMULA, NS AS TO FORM","Similac Sensitive Isomil Soy, infant formula, NS as to form" +11720411,"SIMILAC SENSITIVE ISOMIL SOY, INFANT FORMULA, READY-TO-FEED","Similac Sensitive Isomil Soy, infant formula, ready-to-feed" +11720412,"SIMILAC SENSITIVE ISOMIL SOY, PREP FR LIQ CONC,W/WATER,NFS","Similac Sensitive Isomil Soy, infant formula, prepared from liquid concentrate, made with water, NFS" +11720413,"SIMILAC SENSITIVE ISOMIL SOY,INF FORM,FR PDR,W/WATER,NFS","Similac Sensitive Isomil Soy, infant formula, prepared from powder, made with water, NFS" +11720414,"SIMILAC SENSITIVE ISOMIL SOY,PREP FR LIQ CONC,W/TAP WATER","Similac Sensitive Isomil Soy, infant formula, prepared from liquid concentrate, made with tap water" +11720415,"SIMILAC SENSITIVE ISOMIL SOY,PREP FR LIQ CONC,W/BOT WATER","Similac Sensitive Isomil Soy, infant formula, prepared from liquid concentrate, made with plain bottled water" +11720416,"SIMILAC SENSITIVE ISOMIL SOY,PREP FR LIQ CONC,W/BABY WATER","Similac Sensitive Isomil Soy, infant formula, prepared from liquid concentrate, made with baby water" +11720417,"SIMILAC SENSITIVE ISOMIL SOY,INF FORM,PREP FR PDR+TAP WATER","Similac Sensitive Isomil Soy, infant formula, prepared from powder, made with tap water" +11720418,"SIMILAC SENSITIVE ISOMIL SOY,INF FORM,PREP FR PDR,W/BOT WATE","Similac Sensitive Isomil Soy, infant formula, prepared from powder, made with plain bottled water" +11720419,"SIMILAC SENSITIVE ISOMIL SOY,INF FORM,PREP FR PDR,W/BABY WAT","Similac Sensitive Isomil Soy, infant formula, prepared from powder, made with baby water" +11720430,"SIMILAC EXPERT CARE FOR DIARRHEA, INFANT FORMULA, NS FORM","Similac Expert Care for Diarrhea, infant formula, NS as to form" +11720431,"SIMILAC EXPERT CARE FOR DIARRHEA, INFANT FORMULA, RTF","Similac Expert Care for Diarrhea, infant formula, ready-to-feed" +11720440,"SIMILAC GO AND GROW SOY, INFANT FORMULA, NS AS TO FORM","Similac Go and Grow Soy, infant formula, NS as to form" +11720443,"SIMILAC GO AND GROW SOY, INF FORM, PREP FR PDR, WATER, NFS","Similac Go and Grow Soy, infant formula, prepared from powder, made with water, NFS" +11720447,"SIMILAC GO AND GROW SOY,INF FORM,PREP FR PDR,TAP WATER","Similac Go and Grow Soy, infant formula, prepared from powder, made with tap water" +11720448,"SIMILAC GO AND GROW SOY,INF FORM,PREP FR PDR,BOT WATER","Similac Go and Grow Soy, infant formula, prepared from powder, made with plain bottled water" +11720449,"SIMILAC GO AND GROW SOY,INF FORM,PREP FR PDR,BABY WATER","Similac Go and Grow Soy, infant formula, prepared from powder, made with baby water" +11720610,"GERBER GOOD START SOY PLUS, INFANT FORMULA, NS AS TO FORM","Gerber Good Start Soy Plus, infant formula, NS as to form" +11720611,"GERBER GOOD START SOY PLUS, INFANT FORMULA, READY-TO-FEED","Gerber Good Start Soy Plus, infant formula, ready-to-feed" +11720612,"GERBER GOOD START SOY PLUS,INF FORM,PREP FR LIQ CONC,W/WATER","Gerber Good Start Soy Plus, infant formula, prepared from liquid concentrate, made with water, NFS" +11720613,"GERBER GOOD START SOY PLUS,INF FORM, PREP FR PDR,W/WATER,NFS","Gerber Good Start Soy Plus, infant formula, prepared from powder, made with water, NFS" +11720614,"GERBER GOOD START SOY PLUS,INF FORM,FR LIQ CONC,W/TAP WATER","Gerber Good Start Soy Plus, infant formula, prepared from liquid concentrate, made with tap water" +11720615,"GERBER GOOD START SOY PLUS,INF FORM,FR LIQ CONC,W/BOT WATER","Gerber Good Start Soy Plus, infant formula, prepared from liquid concentrate, made with plain bottled water" +11720616,"GERBER GOOD START SOY PLUS,INF FORM,FR LIQ CONC,W/BABY WTR","Gerber Good Start Soy Plus, infant formula, prepared from liquid concentrate, made with baby water" +11720617,"GERBER GOOD START SOY PLUS,INF FORM, PREP FR PDR,W/TAP WATER","Gerber Good Start Soy Plus, infant formula, prepared from powder, made with tap water" +11720618,"GERBER GOOD START SOY PLUS,INF FORM, PREP FR PDR,W/BOT WATER","Gerber Good Start Soy Plus, infant formula, prepared from powder, made with plain bottled water" +11720619,"GERBER GOOD START SOY PLUS,INF FORM, PREP FR PDR,W/BABY WATE","Gerber Good Start Soy Plus, infant formula, prepared from powder, made with baby water" +11720620,"GERBER GOOD START 2 SOY PLUS, INFANT FORMULA, NS AS TO FORM","Gerber Good Start 2 Soy Plus, infant formula, NS as to form" +11720623,"GERBER GOOD START 2 SOY PLUS, INF FORM,PREP FR PDR,WATER,NFS","Gerber Good Start 2 Soy Plus, infant formula, prepared from powder, made with water, NFS" +11720627,"GERBER GOOD START 2 SOY PLUS,INF FORM,PREP FR PDR,TAP WATER","Gerber Good Start 2 Soy Plus, infant formula, prepared from powder, made with tap water" +11720628,"GERBER GOOD START 2 SOY PLUS,INF FORM,PREP FR PDR,BOT WATER","Gerber Good Start 2 Soy Plus, infant formula, prepared from powder, made with plain bottled water" +11720629,"GERBER GOOD START 2 SOY PLUS,INF FORM,PREP FR PDR,BABY WATER","Gerber Good Start 2 Soy Plus, infant formula, prepared from powder, made with baby water" +11720800,"AMERICA'S STORE BRAND SOY, INFANT FORMULA, NS AS TO FORM","America's Store Brand Soy, infant formula, NS as to form" +11720801,"AMERICA'S STORE BRAND SOY, INFANT FORMULA, READY-TO-FEED","America's Store Brand Soy, infant formula, ready-to-feed" +11720802,"AMERICA'S STORE BRAND SOY,INF FORM, FR LIQ CONC,W/WATER NFS","America's Store Brand Soy, infant formula, prepared from liquid concentrate, made with water, NFS" +11720803,"AMERICA'S STORE BRAND SOY,INF FORM,FR PDR,W/WATER,NFS","America's Store Brand Soy, infant formula, prepared from powder, made with water, NFS" +11720804,"AMERICA'S STORE BRAND SOY,INF FORM, FR LIQ CONC,W/TAP WATER","America's Store Brand Soy, infant formula, prepared from liquid concentrate, made with tap water" +11720805,"AMERICA'S STORE BRAND SOY,INF FORM,FR LIQ CONC,W/BOT WATER","America's Store Brand Soy, infant formula, prepared from liquid concentrate, made with plain bottled water" +11720806,"AMERICA'S STORE BRAND SOY,INF FORM,FR LIQ CONC,W/BABY WATER","America's Store Brand Soy, infant formula, prepared from liquid concentrate, made with baby water" +11720807,"AMERICA'S STORE BRAND SOY,INF FORM,PREP FR PDR,W/TAP WATER","America's Store Brand Soy, infant formula, prepared from powder, made with tap water" +11720808,"AMERICA'S STORE BRAND SOY,INF FORM,PREP FR PDR,W/BOT WATER","America's Store Brand Soy, infant formula, prepared from powder, made with plain bottled water" +11720809,"AMERICA'S STORE BRAND SOY,INF FORM,PREP FR PDR,W/BABY WATER","America's Store Brand Soy, infant formula, prepared from powder, made with baby water" +11740310,"ENFAMIL NUTRAMIGEN LIPIL, INFANT FORMULA, NS AS TO FORM","Enfamil Nutramigen LIPIL, infant formula, NS as to form" +11740311,"ENFAMIL NUTRAMIGEN LIPIL, INFANT FORMULA, READY-TO-FEED","Enfamil Nutramigen LIPIL, infant formula, ready-to-feed" +11740312,"ENFAMIL NUTRAMIGEN LIPIL,INF FORM,PREP FR LIQ CONC,W/WAT,NFS","Enfamil Nutramigen LIPIL, infant formula, prepared from liquid concentrate, made with water, NFS" +11740313,"ENFAMIL NUTRAMIGEN LIPIL, INF FORM, PREP FR PDR, W/WATER,NFS","Enfamil Nutramigen LIPIL, infant formula, prepared from powder, made with water, NFS" +11740314,"ENFAMIL NUTRAMIGEN LIPIL,INF FORM,FR LIQ CONC,W/TAP WATER","Enfamil Nutramigen LIPIL, infant formula, prepared from liquid concentrate, made with tap water" +11740315,"ENFAMIL NUTRAMIGEN LIPIL,INF FORM,FR LIQ CONC,W/BOT WATER","Enfamil Nutramigen LIPIL, infant formula, prepared from liquid concentrate, made with plain bottled water" +11740316,"ENFAMIL NUTRAMIGEN LIPIL,INF FORM,FR LIQ CONC,W/BABY WATER","Enfamil Nutramigen LIPIL, infant formula, prepared from liquid concentrate, made with baby water" +11740317,"ENFAMIL NUTRAMIGEN LIPIL, INF FORM, PREP FR PDR, W/TAP WATER","Enfamil Nutramigen LIPIL, infant formula, prepared from powder, made with tap water" +11740318,"ENFAMIL NUTRAMIGEN LIPIL, INF FORM, PREP FR PDR, W/BOT WATER","Enfamil Nutramigen LIPIL, infant formula, prepared from powder, made with plain bottled water" +11740319,"ENFAMIL NUTRAMIGEN LIPIL, INF FORM, PREP FR PDR, W/BABY WATR","Enfamil Nutramigen LIPIL, infant formula, prepared from powder, made with baby water" +11740320,"ENFAMIL NUTRAMIGEN AA LIPIL, INFANT FORMULA, NS AS TO FORM","Enfamil Nutramigen AA LIPIL, infant formula, NS as to form" +11740323,"ENFAMIL NUTRAMIGEN AA LIPIL,INF FORM,PREP FR PDR,W/WATER,NFS","Enfamil Nutramigen AA LIPIL, infant formula, prepared from powder, made with water, NFS" +11740327,"ENFAMIL NUTRAMIGEN AA LIPIL,INF FORM,PREP FR PDR,W/TAP WATER","Enfamil Nutramigen AA LIPIL, infant formula, prepared from powder, made with tap water" +11740328,"ENFAMIL NUTRAMIGEN AA LIPIL,INF FORM,PREP FR PDR,W/BOT WATER","Enfamil Nutramigen AA LIPIL, infant formula, prepared from powder, made with plain bottled water" +11740329,"ENFAMIL NUTRAMIGEN AA LIPIL,INF FORM,PREP FR PDR,W/BABY WAT","Enfamil Nutramigen AA LIPIL, infant formula, prepared from powder, made with baby water" +11740400,"ENFAMIL PREGESTIMIL LIPIL, INFANT FORMULA, NS AS TO FORM","Enfamil Pregestimil LIPIL, infant formula, NS as to form" +11740401,"ENFAMIL PREGESTIMIL LIPIL, INFANT FORMULA, READY-TO-FEED","Enfamil Pregestimil LIPIL, infant formula, ready-to-feed" +11740403,"ENFAMIL PREGESTIMIL LIPIL,INF FORM,PREP FR PDR, W/WATER,NFS","Enfamil Pregestimil LIPIL, infant formula, prepared from powder, made with water, NFS" +11740407,"ENFMAIL PREGESTIMIL LIPIL,INF FORM,PREP FR PDR, W/TAP WATER","Enfmail Pregestimil LIPIL, infant formula, prepared from powder, made with tap water" +11740408,"ENFMAIL PREGESTIMIL LIPIL,INF FORM,PREP FR PDR, W/BOT WATER","Enfmail Pregestimil LIPIL, infant formula, prepared from powder, made with plain bottled water" +11740409,"ENFMAIL PREGESTIMIL LIPIL,INF FORM,PREP FR PDR, W/BABY WATER","Enfmail Pregestimil LIPIL, infant formula, prepared from powder, made with baby water" +11740510,"ENFAMIL PREMATURE LIPIL 20, LOW IRON, INF FORMULA, NS FORM","Enfamil Premature LIPIL 20, low iron, infant formula, NS as to form" +11740511,"ENFAMIL PREMATURE LIPIL 20, LOW IRON, INFANT FORMULA, RTF","Enfamil Premature LIPIL 20, low iron, infant formula, ready-to-feed" +11740520,"ENFAMIL PREMATURE LIPIL 20, W/IRON, INFANT FORMULA, NS FORM","Enfamil Premature LIPIL 20, with iron, infant formula, NS as to form" +11740521,"ENFAMIL PREMATURE LIPIL 20, WITH IRON, INFANT FORMULA, RTF","Enfamil Premature LIPIL 20, with iron, infant formula, ready-to-feed" +11740540,"ENFAMIL PREMATURE LIPIL 24, LOW IRON, INF FORMULA, NS FORM","Enfamil Premature LIPIL 24, low iron, infant formula, NS as to form" +11740541,"ENFAMIL PREMATURE LIPIL 24, LOW IRON, INFANT FORMULA, RTF","Enfamil Premature LIPIL 24, low iron, infant formula, ready-to-feed" +11740550,"ENFAMIL PREMATURE LIPIL 24, W/IRON, INFANT FORMULA, NS FORM","Enfamil Premature LIPIL 24, with iron, infant formula, NS as to form" +11740551,"ENFAMIL PREMATURE LIPIL 24, WITH IRON, INFANT FORMULA, RTF","Enfamil Premature LIPIL 24, with iron, infant formula, ready-to-feed" +11810000,"MILK, DRY, NOT RECONSTITUTED, NS AS TO FAT","Milk, dry, not reconstituted, NS as to whole, lowfat, or nonfat" +11811000,"MILK, DRY, WHOLE, NOT RECONSTITUTED","Milk, dry, whole, not reconstituted" +11812000,"MILK, DRY, LOWFAT, NOT RECONSTITUTED","Milk, dry, lowfat, not reconstituted" +11813000,"MILK, DRY, NONFAT, NOT RECONSTITUTED","Milk, dry, nonfat, not reconstituted" +11825000,"WHEY, SWEET, DRY","Whey, sweet, dry" +11830100,"COCOA W/DRY MILK & SUGAR, DRY MIX, NOT RECONST","Cocoa (or chocolate) with dry milk and sugar, dry mix, not reconstituted" +11830110,"COCOA POWDER W/ NFD MILK, LOW CAL SWEETENER, DRY","Cocoa powder with nonfat dry milk and low calorie sweetener, dry mix, not reconstituted" +11830120,"COCOA W/ WHEY, LO CAL SWEETENER, FORTIFIED, DRY MIX","Cocoa, whey, and low calorie sweetener, fortified, dry mix, not reconstituted" +11830140,"CHOCOLATE, INST, DRY MIX, FORTIFD, NOT RECONST,P.R.","Chocolate, instant, dry mix, fortified with vitamins and minerals, not reconstituted, Puerto Rican style" +11830150,"COCOA POWDER, NOT RECONSTITUTED (NO DRY MILK)","Cocoa powder, not reconstituted (no dry milk)" +11830160,"COCOA-FLAVORED BEVERAGE POWDER W/ SUGAR, DRY MIX","Cocoa (or chocolate) flavored beverage powder with sugar, dry mix, not reconstituted" +11830165,"COCOA FLAV BEV PDR W/ RED SUGAR,DRY MIX,NOT RECONSTITUTED","Cocoa (or chocolate) flavored beverage powder with reduced sugar, dry mix, not reconstituted" +11830170,"COCOA FLAV BEV PDR W/ LOW CAL SWTNR,DRY MIX,NOT RECONSTITUTD","Cocoa (or chocolate) flavored beverage powder with low-calorie sweetener, dry mix, not reconstituted" +11830200,"MILK, MALTED, DRY, UNFORTIFD, NOT RECONST, NOT CHOC","Milk, malted, dry mix, unfortified, not reconstituted, flavors other than chocolate" +11830210,"MILK, MALTED, DRY, FORTIFD, NOT RECONST, NOT CHOC","Milk, malted, dry mix, fortified, not reconstituted, flavors other than chocolate" +11830250,"MILK, MALTED, DRY, UNFORTIFIED, NOT RECONST, CHOC","Milk, malted, dry mix, unfortified, not reconstituted, chocolate" +11830260,"MILK, MALTED, DRY, FORTIFIED, NOT RECONST, CHOC","Milk, malted, dry mix, fortified, not reconstituted, chocolate" +11830400,"MILK BEV POWDER, DRY, NOT RECONST, NOT CHOC","Milk beverage, powder, dry mix, not reconstituted, flavors other than chocolate" +11830450,"MILK BEV MIX, W/ SUGAR,EGG WHITE, NOT RECONSTITUTED","Milk beverage with sugar, dry milk, and egg white powder, dry mix, not reconstituted" +11830500,"MILK BEV POWDER W/ NFD MILK, LOW CAL, DRY, CHOC","Milk beverage, powder, with nonfat dry milk and low calorie sweetener, dry mix, not reconstituted, chocolate" +11830550,"MILK BEV POWDER W/ NFD MILK, LOW CAL, DRY, NOT CHOC","Milk beverage, powder, with nonfat dry milk and low calorie sweetener, dry mix, not reconstituted, flavors other than chocolate" +12100100,"CREAM, FLUID, NS AS TO LIGHT, HEAVY OR HALF & HALF","Cream, NS as to light, heavy, or half and half" +12110100,"CREAM, LIGHT, FLUID (INCL COFFEE CRM, TABLE CREAM)","Cream, light, fluid" +12110300,"CREAM, LIGHT, WHIPPED, UNSWEETENED","Cream, light, whipped, unsweetened" +12120100,"CREAM, HALF & HALF","Cream, half and half" +12120105,"CREAM, HALF & HALF, LOW FAT","Cream, half and half, low fat" +12120110,"CREAM, HALF & HALF, FAT FREE","Cream, half and half, fat free" +12130100,"CREAM, HEAVY, FLUID","Cream, heavy, fluid" +12130200,"CREAM, HEAVY, WHIPPED, UNSWEETENED","Cream, heavy, whipped, unsweetened" +12140000,"CREAM, HEAVY, WHIPPED, SWEETENED","Cream, heavy, whipped, sweetened" +12140100,"CREAM, WHIPPED, PRESSURIZED CONTAINER","Cream, whipped, pressurized container" +12140105,"CREAM, WHIPPED, PRESSURIZED CONTAINER, LIGHT","Cream, whipped, pressurized container, light" +12140110,"WHIPPED TOPPING, DAIRY BASED, FAT FREE, PRESSURIZED CONTAINR","Whipped topping, dairy based, fat free, pressurized container" +12200100,"CREAM SUBSTITUTE, NS AS TO FROZEN,LIQUID OR POWDER","Cream substitute, NS as to frozen, liquid, or powdered" +12210100,"CREAM SUBSTITUTE, FROZEN","Cream substitute, frozen" +12210200,"CREAM SUBSTITUTE, LIQUID (INCLUDE COFFEE WHITNER)","Cream substitute, liquid" +12210210,"CREAM SUBSTITUTE, FLAVORED, LIQUID","Cream substitute, flavored, liquid" +12210250,"CREAM SUBSTITUTE, LIGHT, LIQUID","Cream substitute, light, liquid" +12210255,"CREAM SUBSTITUTE, LIGHT, FLAVORED, LIQUID","Cream substitute, light, flavored, liquid" +12210260,"CREAM SUBSTITUTE, FAT FREE, LIQUID","Cream substitute, fat free, liquid" +12210270,"CREAM SUBSTITUTE, FAT FREE, FLAVORED, LIQUID","Cream substitute, fat free, flavored, liquid" +12210305,"CREAM SUBSTITUTE, SUGAR FREE, LIQUID","Cream substitute, sugar free, liquid" +12210310,"CREAM SUBSTITUTE, SUGAR FREE, FLAVORED, LIQUID","Cream substitute, sugar free, flavored, liquid" +12210400,"CREAM SUBSTITUTE, POWDERED","Cream substitute, powdered" +12210410,"CREAM SUBST, LIGHT, POWDERED (INCL COFFEE MATE, CRE","Cream substitute, light, powdered" +12210420,"CREAM SUBSTITUTE, FLAVORED, POWDERED","Cream substitute, flavored, powdered" +12210430,"CREAM SUBSTITUTE, FAT FREE, POWDER","Cream substitute, fat free, powder" +12210440,"CREAM SUBSTITUTE, FAT FREE, FLAVORED, POWDER","Cream substitute, fat free, flavored, powder" +12210500,"CREAM SUBSTITUTE, SUGAR FREE, POWDER","Cream substitute, sugar free, powder" +12210505,"CREAM SUBSTITUTE, SUGAR FREE, FLAVORED, POWDER","Cream substitute, sugar free, flavored, powder" +12220000,"WHIPPED TOPPING, NONDAIRY, NS AS TO CND/FRZ/POWDER","Whipped topping, nondairy, NS as to canned, frozen, or made from powdered mix" +12220100,"WHIPPED TOPPING, NONDAIRY, PRESSURIZED CAN","Whipped topping, nondairy, pressurized can" +12220200,"WHIPPED TOPPING, NONDAIRY, FROZEN (INCL COOL WHIP)","Whipped topping, nondairy, frozen" +12220250,"WHIPPED TOPPING, NONDAIRY, FZN, LOWFAT (INCL COOL)","Whipped topping, nondairy, frozen, lowfat" +12220270,"WHIPPED TOPPING, NONDAIRY, FROZEN, FAT FREE","Whipped topping, nondairy, frozen, fat free" +12220280,"WHIPPED TOPPING, NONDAIRY, FROZEN, SUGAR FREE","Whipped topping, nondairy, frozen, sugar free" +12220300,"WHIPPED CREAM SUBST, NONDAIRY, FROM POWDERED MIX","Whipped cream substitute, nondairy, made from powdered mix" +12220400,"WHIP CREAM SUB,NONDAIRY,LOWFAT,LO SUGAR,FROM MIX","Whipped cream substitute, nondairy, lowfat, low sugar, made from powdered mix" +12310100,"SOUR CREAM (INCL W/ CHIVES)","Sour cream" +12310200,"SOUR CREAM, HALF & HALF","Sour cream, half and half" +12310300,"SOUR CREAM, REDUCED FAT","Sour cream, reduced fat" +12310350,"SOUR CREAM, LIGHT","Sour cream, light" +12310370,"SOUR CREAM, FAT FREE","Sour cream, fat free" +12320100,"SOUR CREAM, IMITATION","Sour cream, imitation (nondairy)" +12320200,"SOUR CREAM, FILLED, SOUR DRESSING, NONBUTTERFAT","Sour cream, filled, sour dressing, nonbutterfat" +12350000,"DIP, SOUR CREAM BASE (INCLUDE BUTTERMILK-TYPE DIP)","Dip, sour cream base" +12350020,"DIP, SOUR CREAM BASE, REDUCED CALORIE","Dip, sour cream base, reduced calorie" +12350100,"SPINACH DIP","Spinach dip" +12350110,"SPINACH AND ARTICHOKE DIP","Spinach and artichoke dip" +13110000,"ICE CREAM, NFS","Ice cream, NFS" +13110100,"ICE CREAM, REGULAR, NOT CHOCOLATE","Ice cream, regular, flavors other than chocolate" +13110110,"ICE CREAM, REGULAR, CHOCOLATE","Ice cream, regular, chocolate" +13110120,"ICE CREAM, RICH, FLAVORS OTHER THAN CHOCOLATE","Ice cream, rich, flavors other than chocolate" +13110130,"ICE CREAM, RICH, CHOCOLATE","Ice cream, rich, chocolate" +13110140,"ICE CREAM, RICH, NS AS TO FLAVOR","Ice cream, rich, NS as to flavor" +13110200,"ICE CREAM, SOFT SERVE, NOT CHOCOLATE","Ice cream, soft serve, flavors other than chocolate" +13110210,"ICE CREAM, SOFT SERVE, CHOCOLATE","Ice cream, soft serve, chocolate" +13110220,"ICE CREAM, SOFT SERVE, NS AS TO FLAVOR","Ice cream, soft serve, NS as to flavor" +13110310,"ICE CREAM, NO SUGAR ADDED, NS AS TO FLAVOR","Ice cream, no sugar added, NS as to flavor" +13110320,"ICE CREAM, NO SUGAR ADDED, FLAVORS OTHER THAN CHOCOLATE","Ice cream, no sugar added, flavors other than chocolate" +13110330,"ICE CREAM, NO SUGAR ADDED, CHOCOLATE","Ice cream, no sugar added, chocolate" +13120050,"ICE CREAM BAR OR STICK, NOT CHOC- OR CAKE-COVERED","Ice cream bar or stick, not chocolate covered or cake covered" +13120100,"ICE CREAM BAR/STICK, CHOCOLATE COVERED","Ice cream bar or stick, chocolate covered" +13120110,"ICE CREAM BAR, CHOCOLATE/CARAMEL COVERED, W/ NUTS","Ice cream bar or stick, chocolate or caramel covered, with nuts" +13120120,"ICE CREAM BAR,RICH CHOC ICE CREAM,THICK CHOC COVER","Ice cream bar or stick, rich chocolate ice cream, thick chocolate covering" +13120121,"ICE CREAM BAR,RICH ICE CREAM,THICK CHOC COVER","Ice cream bar or stick, rich ice cream, thick chocolate covering" +13120130,"ICE CREAM BAR/STICK,RICH ICE CREAM,CHOC COVER,W/NUT","Ice cream bar or stick, rich ice cream, chocolate covered, with nuts" +13120140,"ICE CREAM BAR/STICK, CHOC ICE CREAM, CHOC COVER","Ice cream bar or stick, chocolate ice cream, chocolate covered" +13120300,"ICE CREAM BAR, CAKE-COVERED","Ice cream bar, cake covered" +13120310,"ICE CREAM BAR, STICK OR NUGGET, WITH CRUNCH COATING","Ice cream bar, stick or nugget, with crunch coating" +13120400,"ICE CREAM BAR/STICK W/ FRUIT","Ice cream bar or stick with fruit" +13120500,"ICE CREAM SANDWICH","Ice cream sandwich" +13120550,"ICE CREAM COOKIE SANDWICH (INCLUDE CHIPWICH)","Ice cream cookie sandwich" +13120700,"ICE CREAM CONE, W/ NUTS, NOT CHOCOLATE","Ice cream cone with nuts, flavors other than chocolate" +13120710,"ICE CREAM CONE, CHOC-COVERED, W/ NUTS, NOT CHOC","Ice cream cone, chocolate covered, with nuts, flavors other than chocolate" +13120720,"ICE CREAM CONE, CHOC-COVERED OR DIPPED, NOT CHOC","Ice cream cone, chocolate covered or dipped, flavors other than chocolate" +13120730,"ICE CREAM CONE, NO TOPPING, NOT CHOCOLATE","Ice cream cone, no topping, flavors other than chocolate" +13120740,"ICE CREAM CONE, NO TOPPING, NS AS TO FLAVOR","Ice cream cone, no topping, NS as to flavor" +13120750,"ICE CREAM CONE, W/NUTS, CHOCOLATE ICE CREAM","Ice cream cone with nuts, chocolate ice cream" +13120760,"ICE CREAM CONE, CHOC-COVERED, CHOC ICE CREAM","Ice cream cone, chocolate covered or dipped, chocolate ice cream" +13120770,"ICE CREAM CONE, NO TOPPING, CHOCOLATE ICE CREAM","Ice cream cone, no topping, chocolate ice cream" +13120780,"ICE CREAM CONE, CHOC-COVERED, W/NUT, CHOC ICE CREAM","Ice cream cone, chocolate covered, with nuts, chocolate ice cream" +13120790,"ICE CREAM SUNDAE CONE (INCL DRUMSTICK, ALL FLAVORS)","Ice cream sundae cone" +13120800,"ICE CREAM SODA, NOT CHOCOLATE","Ice cream soda, flavors other than chocolate" +13120810,"ICE CREAM SODA, CHOCOLATE","Ice cream soda, chocolate" +13121000,"ICE CREAM SUNDAE, TOPPING NS, W/ WHIPPED CREAM","Ice cream sundae, NS as to topping, with whipped cream" +13121100,"ICE CREAM SUNDAE, FRUIT TOPPING, W/ WHIPPED CREAM","Ice cream sundae, fruit topping, with whipped cream" +13121200,"ICE CREAM SUNDAE, PREPACKAGED, NOT CHOCOLATE","Ice cream sundae, prepackaged type, flavors other than chocolate" +13121300,"ICE CREAM SUNDAE,CHOCOLATE TOPPING,W/ WHIPPED CREAM","Ice cream sundae, chocolate or fudge topping, with whipped cream" +13121400,"ICE CREAM SUNDAE, NOT FRUIT/ CHOC TOP,W/ WHIP CREAM","Ice cream sundae, not fruit or chocolate topping, with whipped cream" +13121500,"ICE CREAM SUNDAE, FUDGE TOPPING, W/ CAKE","Ice cream sundae, fudge topping, with cake, with whipped cream" +13122100,"ICE CREAM PIE, NO CRUST","Ice cream pie, no crust" +13122500,"ICE CREAM PIE,COOKIE CRUST,FUDGE TOPPING,WHIP CREAM","Ice cream pie, with cookie crust, fudge topping, and whipped cream" +13126000,"ICE CREAM, FRIED","Ice cream, fried" +13127000,"DIPPIN' DOTS, ICE CREAM, FLAVORS OTHER THAN CHOCOLATE","Dippin' Dots, flash frozen ice cream snacks, flavors other than chocolate" +13127010,"DIPPIN' DOTS, ICE CREAM, CHOCOLATE","Dippin' Dots, flash frozen ice cream snacks, chocolate" +13130100,"LT ICE CREAM, NS FLAV ( ICE MILK)","Light ice cream, NS as to flavor (formerly ice milk)" +13130300,"LIGHT ICE CREAM,NOT CHOCOLATE (FORMERLY ICE MILK)","Light ice cream, flavors other than chocolate (formerly ice milk)" +13130310,"LIGHT ICE CREAM,CHOCOLATE (FORMERLY ICE MILK)","Light ice cream, chocolate (formerly ice milk)" +13130320,"LIGHT ICE CREAM, NO SUGAR ADDED, NS AS TO FLAVOR","Light ice cream, no sugar added, NS as to flavor" +13130330,"LIGHT ICE CREAM, NO SUGAR ADDED, NOT CHOCOLATE","Light ice cream, no sugar added, flavors other than chocolate" +13130340,"LIGHT ICE CREAM, NO SUGAR ADDED, CHOCOLATE","Light ice cream, no sugar added, chocolate" +13130590,"LIGHT ICE CREAM,SOFT SERVE, NS FLAVOR (FORMERLY ICE MILK)","Light ice cream, soft serve, NS as to flavor (formerly ice milk)" +13130600,"LIGHT ICE CREAM,SOFT SERVE, NOT CHOC (FORMERLY ICE MILK)","Light ice cream, soft serve, flavors other than chocolate (formerly ice milk)" +13130610,"LIGHT ICE CREAM,SOFT SERVE CHOC (TASTEE FRZ, DAIRY QUEEN)","Light ice cream, soft serve, chocolate (formerly ice milk)" +13130620,"LIGHT ICE CREAM,SOFT SERVE CONE,NOT CHOC (DAIRY QUEEN)","Light ice cream, soft serve cone, flavors other than chocolate (formerly ice milk)" +13130630,"LIGHT ICE CREAM,SOFT SERVE CONE, CHOC (FORMERLY ICE MILK)","Light ice cream, soft serve cone, chocolate (formerly ice milk)" +13130640,"LIGHT ICE CREAM,SOFT SERVE CONE, NS FLAV(FORMERLY ICE MILK)","Light ice cream, soft serve cone, NS as to flavor (formerly ice milk)" +13130700,"LIGHT ICE CREAM, SOFT SERVE, BLENDED W/ CANDY OR COOKIES","Light ice cream, soft serve, blended with candy or cookies" +13135000,"ICE CREAM SANDWICH, MADE W/ LIGHT ICE CREAM, NOT CHOCOLATE","Ice cream sandwich, made with light ice cream, flavors other than chocolate" +13135010,"ICE CREAM SANDWICH, MADE W/ LIGHT CHOCOLATE ICE CREAM","Ice cream sandwich, made with light chocolate ice cream" +13136000,"ICE CREAM SANDWICH, MADE W/ LIGHT, NO SUGAR ADDED ICE CREAM","Ice cream sandwich, made with light, no sugar added ice cream" +13140100,"LIGHT ICE CREAM,BAR/STICK, CHOC-COATED (FORMERLY ICE MILK)","Light ice cream, bar or stick, chocolate-coated (formerly ice milk)" +13140110,"LIGHT ICE CREAM,BAR, CHOC COVERED,W/NUTS (FORMERLY ICE MILK)","Light ice cream, bar or stick, chocolate covered, with nuts (formerly ice milk)" +13140450,"LIGHT ICE CREAM,CONE, NFS (FORMERLY ICE MILK)","Light ice cream, cone, NFS (formerly ice milk)" +13140500,"LIGHT ICE CREAM,CONE, NOT CHOCOLATE (FORMERLY ICE MILK)","Light ice cream, cone, flavors other than chocolate (formerly ice milk)" +13140550,"LIGHT ICE CREAM,CONE, CHOCOLATE (FORMERLY ICE MILK)","Light ice cream, cone, chocolate (formerly ice milk)" +13140570,"LIGHT ICE CREAM, NO SUGAR ADDED, CONE, NS AS TO FLAVOR","Light ice cream, no sugar added, cone, NS as to flavor" +13140575,"LIGHT ICE CREAM, NO SUGAR ADDED, CONE, NOT CHOC","Light ice cream, no sugar added, cone, flavors other than chocolate" +13140580,"LIGHT ICE CREAM, NO SUGAR ADDED, CONE, CHOCOLATE","Light ice cream, no sugar added, cone, chocolate" +13140600,"LIGHT ICE CREAM,SUNDAE,SOFT SERVE,CHOC/FUDGE TOP (ICE MILK)","Light ice cream, sundae, soft serve, chocolate or fudge topping, with whipped cream (formerly ice milk)" +13140630,"LIGHT ICE CREAM,SUNDAE,SOFT SERVE,FRUIT TOPPING (ICE MILK)","Light ice cream, sundae, soft serve, fruit topping, with whipped cream (formerly ice milk)" +13140650,"LIGHT ICE CREAM,SUNDAE,SOFT SERVE,NOT FRUIT/CHOC TOPPING","Light ice cream, sundae, soft serve, not fruit or chocolate topping, with whipped cream (formerly ice milk)" +13140660,"LIGHT ICE CREAM,SUNDAE,CHOC / FUDGE TOP (W/O WHIP CREAM)","Light ice cream, sundae, soft serve, chocolate or fudge topping (without whipped cream) (formerly ice milk)" +13140670,"LIGHT ICE CREAM,SUNDAE,FRUIT TOP (W/O WHIP CREAM)(ICE MILK)","Light ice cream, sundae, soft serve, fruit topping (without whipped cream) (formerly ice milk)" +13140680,"LIGHT ICE CREAM,SUNDAE,NO FRUIT/CHOC TOP (W/O WHIP CREAM)","Light ice cream, sundae, soft serve, not fruit or chocolate topping (without whipped cream) (formerly ice milk)" +13140700,"LIGHT ICE CREAM,CREAMSICLE OR DREAMSICLE (FORMERLY ICE MILK)","Light ice cream, creamsicle or dreamsicle (formerly ice milk)" +13140710,"LIGHT ICE CREAM, CREAMSICLE OR DREAMSICLE, NO SUGAR ADDED","Light ice cream, creamsicle or dreamsicle, no sugar added" +13140900,"LIGHT ICE CREAM,FUDGESICLE (FORMERLY ICE MILK)","Light ice cream, fudgesicle (formerly ice milk)" +13142000,"MILK DESSERT BAR/STICK, FROZEN, W/ COCONUT","Milk dessert bar or stick, frozen, with coconut" +13150000,"SHERBET, ALL FLAVORS","Sherbet, all flavors" +13160150,"FAT FREE ICE CREAM, NO SUGAR ADD, CHOC","Fat free ice cream, no sugar added, chocolate" +13160160,"FAT FREE ICE CREAM, NO SUGAR ADD, FLAVORS OTHER THAN CHOC","Fat free ice cream, no sugar added, flavors other than chocolate" +13160400,"FAT FREE ICE CREAM, FLAVORS OTHER THAN CHOC","Fat free ice cream, flavors other than chocolate" +13160410,"FAT FREE ICE CREAM, CHOC","Fat free ice cream, chocolate" +13160420,"FAT FREE ICE CREAM, NS AS TO FLAVOR","Fat free ice cream, NS as to flavor" +13161000,"MILK DESSERT BAR, FROZEN, MADE FROM LOWFAT MILK","Milk dessert bar, frozen, made from lowfat milk" +13161500,"MILK DESSERT SANDWICH BAR, FROZEN, DIETARY","Milk dessert sandwich bar, frozen, made from lowfat milk" +13161520,"MILK DESSERT SANDWICH BAR,FRZ,W/LOW-CAL SWEET,LOFAT","Milk dessert sandwich bar, frozen, with low-calorie sweetener, made from lowfat milk" +13161600,"MILK DES BAR, FROZEN, LOFAT MILK&LO CAL SWEETENER","Milk dessert bar, frozen, made from lowfat milk and low calorie sweetener" +13161630,"LIGHT ICE CREAM,BAR/STICK, W/ LOW-CAL SWEETENER, CHOC COAT","Light ice cream, bar or stick, with low-calorie sweetener, chocolate-coated (formerly ice milk)" +13170000,"BAKED ALASKA","Baked Alaska" +13200110,"PUDDING, NFS","Pudding, NFS" +13210110,"PUDDING, BREAD (INCLUDE W/ RAISINS)","Pudding, bread" +13210150,"PUERTO RICAN BREAD PUDDING MADE W/ EVAP MILK & RUM","Puerto Rican bread pudding made with evaporated milk and rum (Budin de pan)" +13210160,"DIPLOMAT PUDDING, P.R. (BUDIN DIPLOMATICO)","Diplomat pudding, Puerto Rican style (Budin Diplomatico)" +13210180,"PUDDING, MEXICAN BREAD (CAPIROTADA)","Pudding, Mexican bread (Capirotada)" +13210190,"PUDDING, MEXICAN BREAD (CAPIROTADA), LOWER FAT","Pudding, Mexican bread (Capirotada), lower fat" +13210220,"PUDDING, CHOCOLATE, NS AS TO FROM DRY MIX/RTE","Pudding, chocolate, NS as to from dry mix or ready-to-eat" +13210250,"PUDDING, CHOC, LO CAL, W/ART SWTNER, NS DRY/RTE","Pudding, chocolate, low calorie, containing artificial sweetener, NS as to from dry mix or ready-to-eat" +13210260,"RICE FLOUR CREAM, P.R.STYLE (MANJAR BLANCO)","Rice flour cream, Puerto Rican style (manjar blanco)" +13210270,"CUSTARD, P.R. (MAICENA, NATILLA)","Custard, Puerto Rican style (Maicena, Natilla)" +13210280,"PUDDING, NOT CHOC, NS FROM DRY OR RTE","Pudding, flavors other than chocolate, NS as to from dry mix or ready-to-eat" +13210290,"PUDDING, NOT CHOC, LO CAL, W/ART SWTNER, NS DRY MIX OR RTE","Pudding, flavors other than chocolate, low calorie, containing artificial sweetener, NS as to from dry mix or ready-to-eat" +13210300,"CUSTARD","Custard" +13210350,"FLAN","Flan" +13210410,"PUDDING, RICE","Pudding, rice" +13210450,"PUDDING, RICE FLOUR, W/ NUTS (INDIAN DESSERT)","Pudding, rice flour, with nuts (Indian dessert)" +13210500,"PUDDING, TAPIOCA,MADE FROM HOME RECIPE, MADE W/ MILK","Pudding, tapioca, made from home recipe, made with milk" +13210520,"PUDDING, TAPIOCA,MADE FROM DRY MIX,MADE W/ MILK","Pudding, tapioca, made from dry mix, made with milk" +13210530,"PUDDING, TAPIOCA,CHOCOLATE,MADE W/ MILK","Pudding, tapioca, chocolate, made with milk" +13210610,"PUDDING, COCONUT","Pudding, coconut" +13210710,"PUDDING, INDIAN (MILK, MOLASSES, CORNMEAL-BASED)","Pudding, Indian (milk, molasses and cornmeal-based pudding)" +13210750,"PUDDING, PUMPKIN","Pudding, pumpkin" +13210810,"P.R. PUMPKIN PUDDING (FLAN DE CALABAZA)","Puerto Rican pumpkin pudding (Flan de calabaza)" +13210820,"FRESH CORN CUSTARD, PUERTO RICAN STYLE","Fresh corn custard, Puerto Rican style (Mazamorra, Mundo Nuevo)" +13220110,"PUDDING,NOT CHOCOLATE,PREPARED FROM DRY MIX,MILK ADDED","Pudding, flavors other than chocolate, prepared from dry mix, milk added" +13220120,"PUDDING,CHOCOLATE,PREPARED FROM DRY MIX,MILK ADDED","Pudding, chocolate, prepared from dry mix, milk added" +13220210,"PUDDING,NOT CHOC,FROM DRY,LOW CAL,ARTIFICIAL SWEET,W/MILK","Pudding, flavors other than chocolate, prepared from dry mix, low calorie, containing artificial sweetener, milk added" +13220220,"PUDDING,CHOC,FROM DRY,LOW CAL,ARTIFICIAL SWEET,MILK ADDED","Pudding, chocolate, prepared from dry mix, low calorie, containing artificial sweetener, milk added" +13220230,"PUDDING, RTE, CHOCOLATE, RED FAT","Pudding, ready-to-eat, chocolate, reduced fat" +13220235,"PUDDING, RTE, CHOCOLATE, FAT FREE","Pudding, ready-to-eat, chocolate, fat free" +13220240,"PUDDING, RTE, FLAVORS OTHER THAN CHOCOLATE, RED FAT","Pudding, ready-to-eat, flavors other than chocolate, reduced fat" +13220245,"PUDDING, RTE, FLAVORS OTHER THAN CHOCOLATE, FAT FREE","Pudding, ready-to-eat, flavors other than chocolate, fat free" +13230110,"PUDDING, RTE, FLAVORS OTHER THAN CHOCOLATE","Pudding, ready-to-eat, flavors other than chocolate" +13230120,"PUDDING, RTE, LOW CAL, W/ARTIFICIAL SWTNR, NOT CHOC","Pudding, ready-to-eat, low calorie, containing artificial sweetener, flavors other than chocolate" +13230130,"PUDDING, RTE, CHOCOLATE","Pudding, ready-to-eat, chocolate" +13230140,"PUDDING,RTE, LO CAL/ W ART SWTNER, CHOC","Pudding, ready-to-eat, low calorie, containing artificial sweetener, chocolate" +13230200,"PUDDING, RTE, CHOC & NON-CHOC FLAVORS COMBINED","Pudding, ready-to-eat, chocolate and non-chocolate flavors combined" +13230500,"PUDDING, READY-TO-EAT, TAPIOCA","Pudding, ready-to-eat, tapioca" +13230510,"PUDDING, READY-TO-EAT, TAPIOCA, FAT FREE","Pudding, ready-to-eat, tapioca, fat free" +13241000,"PUDDING, W/ FRUIT & VANILLA WAFERS","Pudding, with fruit and vanilla wafers" +13250000,"MOUSSE, CHOCOLATE","Mousse, chocolate" +13250100,"MOUSSE, NOT CHOCOLATE","Mousse, not chocolate" +13250200,"MOUSSE,CHOCOLATE,LOW FAT,REDUCED CAL,DRY MIX","Mousse, chocolate, lowfat, reduced calorie, prepared from dry mix, water added" +13252100,"COCONUT CUSTARD, P.R. (FLAN DE COCO)","Coconut custard, Puerto Rican style (Flan de coco)" +13252200,"MILK DESSERT OR MILK CANDY, P.R. (DULCE DE LECHE)","Milk dessert or milk candy, Puerto Rican style (Dulce de leche)" +13252500,"BARFI/BURFI,INDIAN DESSERT,FROM MILK/CREAM/RICOTTA","Barfi or Burfi, Indian dessert, made from milk and/or cream and/or Ricotta cheese" +13252600,"TIRAMISU","Tiramisu" +13310000,"CUSTARD PUDDING, NOT CHOC, BABY, NS AS TO STR OR JR","Custard pudding, flavor other than chocolate, baby food, NS as to strained or junior" +13311000,"CUSTARD PUDDING, BABY, NOT CHOCOLATE, STRAINED","Custard pudding, baby food, flavor other than chocolate, strained" +13312000,"CUSTARD PUDDING, BABY, NOT CHOCOLATE, JUNIOR","Custard pudding, baby food, flavor other than chocolate, junior" +13411000,"WHITE SAUCE, MILK SAUCE","White sauce, milk sauce" +13412000,"MILK GRAVY, QUICK GRAVY","Milk gravy, quick gravy" +14010000,"CHEESE, NFS","Cheese, NFS" +14101010,"CHEESE, BLUE OR ROQUEFORT","Cheese, Blue or Roquefort" +14102010,"CHEESE, BRICK","Cheese, Brick" +14103010,"CHEESE, CAMEMBERT","Cheese, Camembert" +14103020,"CHEESE, BRIE","Cheese, Brie" +14104100,"CHEESE, CHEDDAR","Cheese, Cheddar" +14104110,"CHEESE, CHEDDAR, REDUCED FAT","Cheese, Cheddar, reduced fat" +14104115,"CHEESE, CHEDDAR, NONFAT OR FAT FREE","Cheese, Cheddar, nonfat or fat free" +14104200,"CHEESE, COLBY","Cheese, Colby" +14104250,"CHEESE, COLBY JACK","Cheese, Colby Jack" +14104400,"CHEESE, FETA (INCLUDE GOAT CHEESE)","Cheese, Feta" +14104600,"CHEESE, FONTINA","Cheese, Fontina" +14104700,"CHEESE, GOAT","Cheese, goat" +14105010,"CHEESE, GOUDA OR EDAM","Cheese, Gouda or Edam" +14105200,"CHEESE, GRUYERE","Cheese, Gruyere" +14106010,"CHEESE, LIMBURGER","Cheese, Limburger" +14106200,"CHEESE, MONTEREY","Cheese, Monterey" +14106500,"CHEESE, MONTEREY, REDUCED FAT","Cheese, Monterey, reduced fat" +14107010,"CHEESE, MOZZARELLA, NFS (INCLUDE PIZZA CHEESE)","Cheese, Mozzarella, NFS" +14107020,"CHEESE, MOZZARELLA, WHOLE MILK","Cheese, Mozzarella, whole milk" +14107030,"CHEESE, MOZZARELLA, PART SKIM (INCL ""LOWFAT"")","Cheese, Mozzarella, part skim" +14107040,"CHEESE, MOZZARELLA, REDUCED SODIUM","Cheese, Mozzarella, reduced sodium" +14107060,"CHEESE, MOZZARELLA, NONFAT OR FAT FREE","Cheese, Mozzarella, nonfat or fat free" +14107200,"CHEESE, MUENSTER","Cheese, Muenster" +14107250,"CHEESE, MUENSTER, REDUCED FAT","Cheese, Muenster, reduced fat" +14108010,"CHEESE, PARMESAN, DRY, GRATED (INCLUDE ROMANO)","Cheese, Parmesan, dry grated" +14108015,"CHEESE, PARMESAN, DRY GRATED, REDUCED FAT","Cheese, Parmesan, dry grated, reduced fat" +14108020,"CHEESE, PARMESAN, HARD (INCLUDE ROMANO)","Cheese, Parmesan, hard" +14108060,"CHEESE, PARMESAN, DRY GRATED, FAT FREE","Cheese, Parmesan, dry grated, fat free" +14108200,"CHEESE, PORT DU SALUT","Cheese, Port du Salut" +14108400,"CHEESE, PROVOLONE","Cheese, Provolone" +14108420,"CHEESE, PROVOLONE, REDUCED FAT","Cheese, provolone, reduced fat" +14109010,"CHEESE, SWISS","Cheese, Swiss" +14109020,"CHEESE, SWISS, REDUCED SODIUM","Cheese, Swiss, reduced sodium" +14109030,"CHEESE, SWISS, REDUCED FAT","Cheese, Swiss, reduced fat" +14109040,"CHEESE, SWISS, NONFAT OR FAT FREE","Cheese, Swiss, nonfat or fat free" +14110010,"CHEESE, CHEDDAR, REDUCED SODIUM","Cheese, Cheddar, reduced sodium" +14120010,"CHEESE, MEXICAN BLEND","Cheese, Mexican blend" +14120020,"CHEESE, MEXICAN BLEND, REDUCED FAT","Cheese, Mexican blend, reduced fat" +14131000,"QUESO ANEJO (AGED MEXICAN CHEESE)","Queso Anejo (aged Mexican cheese)" +14131500,"QUESO ASADERO (INCL OAXACAN-STYLE STRING CHEESE)","Queso Asadero" +14132000,"QUESO CHIHUAHUA (INCL MENNONITE CHEESE)","Queso Chihuahua" +14133000,"QUESO FRESCO (HISPANIC-STYLE FARMER CHEESE)","Queso Fresco" +14134000,"QUESO COTIJA","Queso cotija" +14200100,"CHEESE, COTTAGE, NFS","Cheese, cottage, NFS" +14201010,"CHEESE, COTTAGE, CREAMED","Cheese, cottage, creamed, large or small curd" +14201200,"COTTAGE CHEESE, FARMER'S","Cottage cheese, farmer's" +14201500,"CHEESE, RICOTTA","Cheese, Ricotta" +14202010,"CHEESE, COTTAGE, W/ FRUIT","Cheese, cottage, with fruit" +14202020,"CHEESE, COTTAGE, W/ VEGETABLES","Cheese, cottage, with vegetables" +14203010,"CHEESE, COTTAGE, DRY CURD","Cheese, cottage, dry curd" +14203020,"CHEESE, COTTAGE, SALTED, DRY CURD","Cheese, cottage, salted, dry curd" +14203510,"P.R. WHITE CHEESE (QUESO DEL PAIS, BLANCO)","Puerto Rican white cheese (queso del pais, blanco)" +14204010,"CHEESE, COTTAGE, LOWFAT","Cheese, cottage, lowfat (1-2% fat)" +14204020,"CHEESE, COTTAGE, LOWFAT, W/ FRUIT","Cheese, cottage, lowfat, with fruit" +14204030,"CHEESE, COTTAGE, LOWFAT, W/ VEGETABLES","Cheese, cottage, lowfat, with vegetables" +14206010,"CHEESE, COTTAGE, LOWFAT, LOW SODIUM","Cheese, cottage, lowfat, low sodium" +14207010,"CHEESE, COTTAGE, LOWFAT, LACTOSE REDUCED","Cheese, cottage, lowfat, lactose reduced" +14301010,"CHEESE, CREAM","Cheese, cream" +14303010,"CHEESE, CREAM, LIGHT/LITE (FORMERLY CALLED CR CHEESE LOWFAT)","Cheese, cream, light or lite (formerly called Cream Cheese Lowfat)" +14410100,"CHEESE, AMERICAN AND SWISS BLENDS","Cheese, American and Swiss blends" +14410110,"CHEESE, AMERICAN","Cheese, American" +14410120,"CHEESE, AMERICAN, REDUCED FAT","Cheese, American, reduced fat" +14410130,"CHEESE, AMERICAN, NONFAT OR FAT FREE","Cheese, American, nonfat or fat free" +14410210,"CHEESE, AMERICAN, REDUCED SODIUM","Cheese, American, reduced sodium" +14410330,"CHEESE SPREAD, AMERICAN OR CHEDDAR CHEESE BASE, REDUCED FAT","Cheese spread, American or Cheddar cheese base, reduced fat" +14410380,"CHEESE, PROCESSED CREAM CHEESE PRODUCT, NONFAT","Cheese, processed cream cheese product, nonfat or fat free" +14410500,"CHEESE, PROCESSED, CHEESE FOOD","Cheese, processed cheese food" +14410600,"CHEESE, PROCESSED, W/VEGETABLES(INCL PEPPER CHEESE)","Cheese, processed, with vegetables" +14410620,"CHEESE, WITH WINE","Cheese, with wine" +14420100,"CHEESE SPREAD, AMERICAN OR CHEDDAR CHEESE BASE","Cheese spread, American or Cheddar cheese base" +14420160,"CHEESE SPREAD, SWISS CHEESE BASE","Cheese spread, Swiss cheese base" +14420200,"CHEESE SPRD, CREAM CHEESE, REG","Cheese spread, cream cheese, regular" +14420210,"CHEESE SPREAD, CREAM CHEESE, LIGHT OR LITE","Cheese spread, cream cheese, light or lite" +14420300,"CHEESE SPREAD, PRESSURIZED CAN","Cheese spread, pressurized can" +14502000,"IMITATION CHEESE","Imitation cheese" +14610200,"COTTAGE CHEESE, W/ GELATIN DESSERT","Cheese, cottage cheese, with gelatin dessert" +14610210,"COTTAGE CHEESE, W/ GELATIN DESSERT & FRUIT","Cheese, cottage cheese, with gelatin dessert and fruit" +14610250,"COTTAGE CHEESE W/ GELATIN DESSERT & VEGETABLES","Cheese, cottage cheese, with gelatin dessert and vegetables" +14610520,"CHEESE W/ NUTS (INCL CHEESE BALL)","Cheese with nuts" +14620100,"DIP, CREAM CHEESE BASE","Dip, cream cheese base" +14620120,"SHRIMP DIP, CREAM CHEESE BASE (INCL CLAM DIP)","Shrimp dip, cream cheese base" +14620150,"DIP, CHEESE W/ CHILI PEPPER (CHILI CON QUESO)","Dip, cheese with chili pepper (chili con queso)" +14620200,"DIP, CHEESE BASE OTHER THAN CREAM CHEESE","Dip, cheese base other than cream cheese" +14620300,"TOPPING FROM CHEESE PIZZA","Topping from cheese pizza" +14620310,"TOPPING FROM VEGETABLE PIZZA","Topping from vegetable pizza" +14620320,"TOPPING FROM MEAT PIZZA","Topping from meat pizza" +14620330,"TOPPING FROM MEAT AND VEGETABLE PIZZA","Topping from meat and vegetable pizza" +14630100,"CHEESE FONDUE","Cheese fondue" +14630200,"CHEESE SOUFFLE","Cheese souffle" +14630300,"WELSH RAREBIT","Welsh rarebit" +14640000,"CHEESE SANDWICH","Cheese sandwich" +14640100,"CHEESE SANDWICH, GRILLED","Cheese sandwich, grilled" +14640200,"CHEESE SANDWICH, HOAGIE","Cheese sandwich, hoagie" +14650100,"CHEESE SAUCE","Cheese sauce" +14650150,"CHEESE SAUCE MADE W/ LOWFAT CHEESE","Cheese sauce made with lowfat cheese" +14650160,"ALFREDO SAUCE","Alfredo sauce" +14660200,"CHEESE, NUGGETS, FRIED (INCL BANQUET BRAND)","Cheese, nuggets or pieces, breaded, baked, or fried" +14670000,"MOZZARELLA CHEESE, TOMATO, BASIL, W/ OIL, VINEGAR","Mozzarella cheese, tomato, and basil, with oil and vinegar dressing" +14710100,"CHEDDAR CHEESE SOUP, HOME RECIPE, CANNED OR READY-TO-SERVE","Cheddar cheese soup, home recipe, canned or ready-to-serve" +14710200,"BEER CHEESE SOUP, MADE WITH MILK","Beer cheese soup, made with milk" +2e+07,"MEAT, NFS","Meat, NFS" +20000070,"MEAT, BABY, NS AS TO TYPE, NS AS TO STR OR JR","Meat, baby food, NS as to type, NS as to strained or junior" +20000090,"MEAT STICKS, BABY, NS AS TO TYPE OF MEAT","Meat sticks, baby food, NS as to type of meat" +20000200,"GROUND MEAT,NFS","Ground meat, NFS" +21000100,"BEEF, NS AS TO CUT, COOKED, NS AS TO FAT","Beef, NS as to cut, cooked, NS as to fat eaten" +21000110,"BEEF, NS AS TO CUT, COOKED, LEAN & FAT","Beef, NS as to cut, cooked, lean and fat eaten" +21000120,"BEEF, NS AS TO CUT, COOKED, LEAN ONLY","Beef, NS as to cut, cooked, lean only eaten" +21001000,"STEAK, NS AS TO TYPE OF MEAT, COOKED, NS AS TO FAT","Steak, NS as to type of meat, cooked, NS as to fat eaten" +21001010,"STEAK, NS AS TO TYPE OF MEAT, COOKED, LEAN & FAT","Steak, NS as to type of meat, cooked, lean and fat eaten" +21001020,"STEAK, NS AS TO TYPE OF MEAT, COOKED, LEAN ONLY","Steak, NS as to type of meat, cooked, lean only eaten" +21002000,"BEEF, PICKLED","Beef, pickled" +21003000,"BEEF, NS AS TO CUT, FRIED, NS AS TO FAT EATEN","Beef, NS as to cut, fried, NS to fat eaten" +21101000,"BEEF STEAK, NS AS TO COOKING METHOD, NS AS TO FAT","Beef steak, NS as to cooking method, NS as to fat eaten" +21101010,"BEEF STEAK, NS AS TO COOKING METHOD, LEAN & FAT","Beef steak, NS as to cooking method, lean and fat eaten" +21101020,"BEEF STEAK, NS AS TO COOKING METHOD, LEAN ONLY","Beef steak, NS as to cooking method, lean only eaten" +21101110,"BEEF STEAK, BROILED OR BAKED, NS AS TO FAT","Beef steak, broiled or baked, NS as to fat eaten" +21101120,"BEEF STEAK, BROILED OR BAKED, LEAN & FAT","Beef steak, broiled or baked, lean and fat eaten" +21101130,"BEEF STEAK, BROILED OR BAKED, LEAN ONLY","Beef steak, broiled or baked, lean only eaten" +21102110,"BEEF STEAK, FRIED, NS AS TO FAT","Beef steak, fried, NS as to fat eaten" +21102120,"BEEF STEAK, FRIED, LEAN & FAT","Beef steak, fried, lean and fat eaten" +21102130,"BEEF STEAK, FRIED, LEAN ONLY","Beef steak, fried, lean only eaten" +21103110,"BEEF STEAK,BREADED/FLOURED,BAKED/FRIED,NS AS TO FAT","Beef steak, breaded or floured, baked or fried, NS as to fat eaten" +21103120,"BEEF STEAK, BREADED/FLOURED,BAKED/FRIED, LEAN & FAT","Beef steak, breaded or floured, baked or fried, lean and fat eaten" +21103130,"BEEF STEAK, BREADED/FLOURED, BAKED/FRIED, LEAN ONLY","Beef steak, breaded or floured, baked or fried, lean only eaten" +21104110,"BEEF STEAK, BATTERED, FRIED, NS AS TO FAT","Beef steak, battered, fried, NS as to fat eaten" +21104120,"BEEF STEAK, BATTERED, FRIED, LEAN & FAT","Beef steak, battered, fried, lean and fat eaten" +21104130,"BEEF STEAK, BATTERED, FRIED, LEAN ONLY","Beef steak, battered, fried, lean only eaten" +21105110,"BEEF STEAK, BRAISED, NS AS TO FAT","Beef steak, braised, NS as to fat eaten" +21105120,"BEEF STEAK, BRAISED, LEAN & FAT","Beef steak, braised, lean and fat eaten" +21105130,"BEEF STEAK, BRAISED, LEAN ONLY","Beef steak, braised, lean only eaten" +21301000,"BEEF, OXTAILS, COOKED","Beef, oxtails, cooked" +21302000,"BEEF, NECK BONES, COOKED","Beef, neck bones, cooked" +21304000,"BEEF, SHORTRIBS, COOKED, NS AS TO FAT","Beef, shortribs, cooked, NS as to fat eaten" +21304110,"BEEF, SHORTRIBS, COOKED, LEAN & FAT","Beef, shortribs, cooked, lean and fat eaten" +21304120,"BEEF, SHORTRIBS, COOKED, LEAN ONLY","Beef, shortribs, cooked, lean only eaten" +21304200,"BEEF, SHORTRIBS, BBQ, W/ SAUCE, NS AS TO FAT","Beef, shortribs, barbecued, with sauce, NS as to fat eaten" +21304210,"BEEF, SHORTRIBS, BBQ, W/ SAUCE, LEAN & FAT","Beef, shortribs, barbecued, with sauce, lean and fat eaten" +21304220,"BEEF, SHORTRIBS, BBQ, W/ SAUCE, LEAN ONLY","Beef, shortribs, barbecued, with sauce, lean only eaten" +21305000,"BEEF, COW HEAD, COOKED","Beef, cow head, cooked" +21401000,"BEEF, ROAST, ROASTED, NS AS TO FAT","Beef, roast, roasted, NS as to fat eaten" +21401110,"BEEF, ROAST, ROASTED, LEAN & FAT","Beef, roast, roasted, lean and fat eaten" +21401120,"BEEF, ROAST, ROASTED, LEAN ONLY","Beef, roast, roasted, lean only eaten" +21401400,"BEEF, ROAST, CANNED","Beef, roast, canned" +21407000,"BEEF, POT ROAST, BRAISED OR BOILED, NS AS TO FAT","Beef, pot roast, braised or boiled, NS as to fat eaten" +21407110,"BEEF, POT ROAST, BRAISED OR BOILED, LEAN & FAT","Beef, pot roast, braised or boiled, lean and fat eaten" +21407120,"BEEF, POT ROAST, BRAISED OR BOILED, LEAN ONLY","Beef, pot roast, braised or boiled, lean only eaten" +21410000,"BEEF, STEW MEAT, COOKED, NS AS TO FAT","Beef, stew meat, cooked, NS as to fat eaten" +21410110,"BEEF, STEW MEAT, COOKED, LEAN & FAT","Beef, stew meat, cooked, lean and fat eaten" +21410120,"BEEF, STEW MEAT, COOKED, LEAN ONLY","Beef, stew meat, cooked, lean only eaten" +21416000,"CORNED BEEF, COOKED, NS AS TO FAT","Corned beef, cooked, NS as to fat eaten" +21416110,"CORNED BEEF, COOKED, LEAN & FAT","Corned beef, cooked, lean and fat eaten" +21416120,"CORNED BEEF, COOKED, LEAN ONLY","Corned beef, cooked, lean only eaten" +21416150,"CORNED BEEF, CANNED, READY TO EAT","Corned beef, canned, ready-to-eat" +21417100,"BEEF BRISKET, COOKED, NS AS TO FAT","Beef brisket, cooked, NS as to fat eaten" +21417110,"BEEF BRISKET, COOKED, LEAN & FAT","Beef brisket, cooked, lean and fat eaten" +21417120,"BEEF BRISKET, COOKED, LEAN ONLY","Beef brisket, cooked, lean only eaten" +21420100,"BEEF, SANDWICH STEAK (FLAKED,FORMED, THINLY SLICED)","Beef, sandwich steak (flaked, formed, thinly sliced)" +21500000,"GROUND BEEF, RAW","Ground beef, raw" +21500100,"GROUND BEEF OR PATTY, NS AS TO %LEAN","Ground beef or patty, cooked, NS as to percent lean (formerly NS as to regular, lean, or extra lean)" +21500200,"GROUND BEEF OR PATTY, BREADED, COOKED","Ground beef or patty, breaded, cooked" +21500300,"GROUND BEEF PATTY, COOKED (FOR FAST FOOD SANDWICHES)","Ground beef patty, cooked (for fast food sandwiches)" +21501000,"GROUND BEEF, LESS THAN 80% LEAN, COOKED","Ground beef, less than 80% lean, cooked (formerly regular)" +21501200,"GROUND BEEF, 80% - 84% LEAN, COOKED","Ground beef, 80% - 84% lean, cooked (formerly lean)" +21501300,"GROUND BEEF, 85% - 89% LEAN, COOKED","Ground beef, 85% - 89% lean, cooked (formerly extra lean)" +21501350,"GROUND BEEF, 90% - 94% LEAN, COOKED","Ground beef, 90% - 94% lean, cooked" +21501360,"GROUND BEEF, 95% OR MORE LEAN, COOKED","Ground beef, 95% or more lean, cooked" +21540100,"GROUND BEEF W/ TEXTURED VEGETABLE PROTEIN, COOKED","Ground beef with textured vegetable protein, cooked" +21601000,"BEEF, BACON, COOKED","Beef, bacon, cooked" +21601500,"BEEF BACON, FORMED, LEAN MEAT ADDED (INCL SIZZLEAN)","Beef, bacon, formed, lean meat added, cooked" +21602000,"BEEF, DRIED, CHIPPED, UNCOOKED","Beef, dried, chipped, uncooked" +21602010,"BEEF, DRIED, CHIPPED, COOKED IN FAT","Beef, dried, chipped, cooked in fat" +21602100,"BEEF JERKY","Beef jerky" +21603000,"BEEF, PASTRAMI (BEEF, SMOKED, SPICED)","Beef, pastrami (beef, smoked, spiced)" +21701000,"BEEF, BABY, NS AS TO STRAINED OR JUNIOR","Beef, baby food, NS as to strained or junior" +21701010,"BEEF, BABY, STRAINED","Beef, baby food, strained" +21701020,"BEEF, BABY, JUNIOR","Beef, baby food, junior" +22000100,"PORK, NS AS TO CUT, COOKED, NS AS TO FAT EATEN","Pork, NS as to cut, cooked, NS as to fat eaten" +22000110,"PORK, NS AS TO CUT, COOKED, LEAN & FAT EATEN","Pork, NS as to cut, cooked, lean and fat eaten" +22000120,"PORK, NS AS TO CUT, COOKED, LEAN ONLY EATEN","Pork, NS as to cut, cooked, lean only eaten" +22000200,"PORK, NS AS TO CUT, FRIED, NS AS TO FAT EATEN","Pork, NS as to cut, fried, NS as to fat eaten" +22000210,"PORK, NS AS TO CUT, FRIED, LEAN & FAT EATEN","Pork, NS as to cut, fried, lean and fat eaten" +22000220,"PORK, NS AS TO CUT, FRIED, LEAN ONLY EATEN","Pork, NS as to cut, fried, lean only eaten" +22000300,"PORK, NS AS TO CUT, BREADED, FRIED, NS AS TO FAT","Pork, NS as to cut, breaded or floured, fried, NS as to fat eaten" +22000310,"PORK, NS AS TO CUT, BREADED, FRIED, FAT EATEN","Pork, NS as to cut, breaded or floured, fried, lean and fat eaten" +22000320,"PORK, NS AS TO CUT, BREADED, FRIED, LEAN ONLY","Pork, NS as to cut, breaded or floured, fried, lean only eaten" +22001000,"PORK, PICKLED, NS AS TO CUT","Pork, pickled, NS as to cut" +22002000,"PORK, GROUND OR PATTY, COOKED","Pork, ground or patty, cooked" +22002100,"PORK, GROUND, GROUND OR PATTY, BREADED, COOKED","Pork, ground or patty, breaded, cooked" +22002800,"PORK JERKY","Pork jerky" +22101000,"PORK CHOP, NS AS TO COOKING METHOD, NS AS TO FAT","Pork chop, NS as to cooking method, NS as to fat eaten" +22101010,"PORK CHOP, NS AS TO COOKING METHOD, LEAN & FAT","Pork chop, NS as to cooking method, lean and fat eaten" +22101020,"PORK CHOP, NS AS TO COOKING METHOD, LEAN ONLY","Pork chop, NS as to cooking method, lean only eaten" +22101100,"PORK CHOP, BROILED OR BAKED, NS AS TO FAT","Pork chop, broiled or baked, NS as to fat eaten" +22101110,"PORK CHOP, BROILED OR BAKED, LEAN & FAT","Pork chop, broiled or baked, lean and fat eaten" +22101120,"PORK CHOP, BROILED OR BAKED, LEAN ONLY","Pork chop, broiled or baked, lean only eaten" +22101130,"PORK CHOP, BREADED, BROILED OR BAKED, NS AS TO FAT","Pork chop, breaded or floured, broiled or baked, NS as to fat eaten" +22101140,"PORK CHOP, BREADED, BROILED OR BAKED, LEAN & FAT","Pork chop, breaded or floured, broiled or baked, lean and fat eaten" +22101150,"PORK CHOP, BREADED, BROILED OR BAKED, LEAN ONLY","Pork chop, breaded or floured, broiled or baked, lean only eaten" +22101200,"PORK CHOP, FRIED, NS AS TO FAT","Pork chop, fried, NS as to fat eaten" +22101210,"PORK CHOP, FRIED, LEAN & FAT","Pork chop, fried, lean and fat eaten" +22101220,"PORK CHOP, FRIED, LEAN ONLY","Pork chop, fried, lean only eaten" +22101300,"PORK CHOP, BREADED, FRIED, NS AS TO FAT","Pork chop, breaded or floured, fried, NS as to fat eaten" +22101310,"PORK CHOP, BREADED, FRIED, LEAN & FAT","Pork chop, breaded or floured, fried, lean and fat eaten" +22101320,"PORK CHOP, BREADED, FRIED, LEAN ONLY","Pork chop, breaded or floured, fried, lean only eaten" +22101400,"PORK CHOP, BATTERED, FRIED, NS AS TO FAT","Pork chop, battered, fried, NS as to fat eaten" +22101410,"PORK CHOP, BATTERED, FRIED, LEAN & FAT","Pork chop, battered, fried, lean and fat eaten" +22101420,"PORK CHOP, BATTERED, FRIED, LEAN ONLY","Pork chop, battered, fried, lean only eaten" +22101500,"PORK CHOP, STEWED, NS AS TO FAT EATEN","Pork chop, stewed, NS as to fat eaten" +22101510,"PORK CHOP, STEWED, LEAN & FAT EATEN","Pork chop, stewed, lean and fat eaten" +22101520,"PORK CHOP, STEWED, LEAN ONLY EATEN","Pork chop, stewed, lean only eaten" +22107000,"PORK CHOP, SMOKED OR CURED, COOKED, NS AS TO FAT","Pork chop, smoked or cured, cooked, NS as to fat eaten" +22107010,"PORK CHOP, SMOKED OR CURED, COOKED, LEAN & FAT","Pork chop, smoked or cured, cooked, lean and fat eaten" +22107020,"PORK CHOP, SMOKED OR CURED, COOKED, LEAN ONLY","Pork chop, smoked or cured, cooked, lean only eaten" +22201000,"PORK STEAK, NS AS TO COOKING METHOD, NS AS TO FAT","Pork steak or cutlet, NS as to cooking method, NS as to fat eaten" +22201010,"PORK STEAK, NS AS TO COOKING METHOD, LEAN & FAT","Pork steak or cutlet, NS as to cooking method, lean and fat eaten" +22201020,"PORK STEAK, NS AS TO COOKING METHOD, LEAN ONLY","Pork steak or cutlet, NS as to cooking method, lean only eaten" +22201050,"PORK STEAK OR CUTLET, BATTERED, FRIED, NS AS TO FAT","Pork steak or cutlet, battered, fried, NS as to fat eaten" +22201060,"PORK STEAK OR CUTLET, BATTERED, FRIED, LEAN & FAT","Pork steak or cutlet, battered, fried, lean and fat eaten" +22201070,"PORK STEAK OR CUTLET, BATTERED, FRIED, LEAN ONLY","Pork steak or cutlet, battered, fried, lean only eaten" +22201100,"PORK STEAK OR CUTLET, BROILED OR BAKD, NS AS TO FAT","Pork steak or cutlet, broiled or baked, NS as to fat eaten" +22201110,"PORK STEAK OR CUTLET, BROILED OR BAKED, LEAN & FAT","Pork steak or cutlet, broiled or baked, lean and fat eaten" +22201120,"PORK STEAK OR CUTLET, BROILED OR BAKED, LEAN ONLY","Pork steak or cutlet, broiled or baked, lean only eaten" +22201200,"PORK STEAK OR CUTLET, FRIED, NS AS TO FAT","Pork steak or cutlet, fried, NS as to fat eaten" +22201210,"PORK STEAK OR CUTLET, FRIED, LEAN & FAT","Pork steak or cutlet, fried, lean and fat eaten" +22201220,"PORK STEAK OR CUTLET, FRIED, LEAN ONLY","Pork steak or cutlet, fried, lean only eaten" +22201300,"PORK CUTLET, BREADED, BROILED/BAKED, NS AS TO FAT","Pork steak or cutlet, breaded or floured, broiled or baked, NS as to fat eaten" +22201310,"PORK CUTLET, BREADED, BROILED/BAKED, LEAN & FAT","Pork steak or cutlet, breaded or floured, broiled or baked, lean and fat eaten" +22201320,"PORK CUTLET, BREADED, BROILED/BAKED, LEAN ONLY","Pork steak or cutlet, breaded or floured, broiled or baked, lean only eaten" +22201400,"PORK STEAK OR CUTLET, BREADED, FRIED, NS AS TO FAT","Pork steak or cutlet, breaded or floured, fried, NS as to fat eaten" +22201410,"PORK STEAK OR CUTLET, BREADED, FRIED, LEAN & FAT","Pork steak or cutlet, breaded or floured, fried, lean and fat eaten" +22201420,"PORK STEAK OR CUTLET, BREADED, FRIED, LEAN ONLY","Pork steak or cutlet, breaded or floured, fried, lean only eaten" +22210300,"PORK, TENDERLOIN, COOKED, NS AS TO METHOD","Pork, tenderloin, cooked, NS as to cooking method" +22210310,"PORK, TENDERLOIN, BREADED, FRIED","Pork, tenderloin, breaded, fried" +22210350,"PORK, TENDERLOIN, BRAISED","Pork, tenderloin, braised" +22210400,"PORK, TENDERLOIN, BAKED","Pork, tenderloin, baked" +22210450,"PORK, TENDERLOIN, BATTERED, FRIED","Pork, tenderloin, battered, fried" +22300120,"HAM, FRIED, NS AS TO FAT","Ham, fried, NS as to fat eaten" +22300130,"HAM, FRIED, LEAN & FAT","Ham, fried, lean and fat eaten" +22300140,"HAM, FRIED, LEAN ONLY","Ham, fried, lean only eaten" +22300150,"HAM, BREADED, FRIED, NS AS TO FAT","Ham, breaded or floured, fried, NS as to fat eaten" +22300160,"HAM, BREADED, FRIED, LEAN & FAT","Ham, breaded or floured, fried, lean and fat eaten" +22300170,"HAM, BREADED, FRIED, LEAN ONLY","Ham, breaded or floured, fried, lean only eaten" +22301000,"HAM, FRESH, COOKED, NS AS TO FAT","Ham, fresh, cooked, NS as to fat eaten" +22301110,"HAM, FRESH, COOKED, LEAN & FAT","Ham, fresh, cooked, lean and fat eaten" +22301120,"HAM, FRESH, COOKED, LEAN ONLY","Ham, fresh, cooked, lean only eaten" +22311000,"HAM, SMOKED OR CURED, COOKED, NS AS TO FAT","Ham, smoked or cured, cooked, NS as to fat eaten" +22311010,"HAM, SMOKED OR CURED, COOKED, LEAN & FAT","Ham, smoked or cured, cooked, lean and fat eaten" +22311020,"HAM, SMOKED OR CURED, COOKED, LEAN ONLY","Ham, smoked or cured, cooked, lean only eaten" +22311200,"HAM, SMOKED OR CURED, LOW NA, NS AS TO FAT","Ham, smoked or cured, low sodium, cooked, NS as to fat eaten" +22311210,"HAM, SMOKED OR CURED, LOW NA, LEAN & FAT","Ham, smoked or cured, low sodium, cooked, lean and fat eaten" +22311220,"HAM, SMOKED OR CURED, LOW NA, LEAN ONLY","Ham, smoked or cured, low sodium, cooked, lean only eaten" +22311450,"HAM, PROSCIUTTO","Ham, prosciutto" +22311500,"HAM, SMOKED OR CURED, CANNED, NS AS TO FAT EATEN","Ham, smoked or cured, canned, NS as to fat eaten" +22311510,"HAM, SMOKED OR CURED, CANNED, LEAN & FAT EATEN","Ham, smoked or cured, canned, lean and fat eaten" +22311520,"HAM, SMOKED OR CURED, CANNED, LEAN ONLY EATEN","Ham, smoked or cured, canned, lean only eaten" +22321110,"HAM, SMOKED OR CURED, GROUND PATTY","Ham, smoked or cured, ground patty" +22400100,"PORK ROAST, NS AS TO CUT, NS AS TO FAT","Pork roast, NS as to cut, cooked, NS as to fat eaten" +22400110,"PORK ROAST, NS AS TO CUT, COOKED, LEAN & FAT","Pork roast, NS as to cut, cooked, lean and fat eaten" +22400120,"PORK ROAST, NS AS TO CUT, COOKED, LEAN ONLY","Pork roast, NS as to cut, cooked, lean only eaten" +22401000,"PORK ROAST, LOIN, COOKED, NS AS TO FAT","Pork roast, loin, cooked, NS as to fat eaten" +22401010,"PORK ROAST, LOIN, COOKED, LEAN & FAT","Pork roast, loin, cooked, lean and fat eaten" +22401020,"PORK ROAST, LOIN, COOKED, LEAN ONLY","Pork roast, loin, cooked, lean only eaten" +22402510,"FRIED PORK CHUNKS, P.R. (CARNE DE CERDO FRITA)","Fried pork chunks, Puerto Rican style (Carne de cerdo frita, masitas fritas)" +22411000,"PORK ROAST, SHOULDER, COOKED, NS AS TO FAT","Pork roast, shoulder, cooked, NS as to fat eaten" +22411010,"PORK ROAST, SHOULDER, COOKED, LEAN & FAT","Pork roast, shoulder, cooked, lean and fat eaten" +22411020,"PORK ROAST, SHOULDER, COOKED, LEAN ONLY","Pork roast, shoulder, cooked, lean only eaten" +22421000,"PORK ROAST, SMOKED OR CURED, COOKED, NS AS TO FAT","Pork roast, smoked or cured, cooked, NS as to fat eaten" +22421010,"PORK ROAST, SMOKED OR CURED, COOKED, LEAN & FAT","Pork roast, smoked or cured, cooked, lean and fat eaten" +22421020,"PORK ROAST, SMOKED OR CURED, COOKED, LEAN ONLY","Pork roast, smoked or cured, cooked, lean only eaten" +22431000,"PORK ROLL, CURED, FRIED","Pork roll, cured, fried" +22501010,"BACON, CANADIAN, COOKED","Canadian bacon, cooked" +22600100,"BACON, NS AS TO TYPE OF MEAT, COOKED","Bacon, NS as to type of meat, cooked" +22600200,"PORK BACON, NS AS TO FRESH/SMOKED/CURED, COOKED","Pork bacon, NS as to fresh, smoked or cured, cooked" +22601000,"PORK BACON, SMOKED OR CURED, COOKED","Pork bacon, smoked or cured, cooked" +22601040,"BACON OR SIDE PORK, FRESH, COOKED","Bacon or side pork, fresh, cooked" +22602010,"PORK BACON, SMOKED OR CURED, LOWER SODIUM","Pork bacon, smoked or cured, lower sodium" +22605010,"BACON, FORMED, LEAN MEAT ADDED, COOKED","Pork bacon, formed, lean meat added, cooked" +22621000,"SALT PORK, COOKED","Salt pork, cooked" +22621100,"FAT BACK, COOKED (INCLUDE HOG JOWL)","Fat back, cooked" +22701000,"PORK, SPARERIBS, COOKED, NS AS TO FAT EATEN","Pork, spareribs, cooked, NS as to fat eaten" +22701010,"PORK, SPARERIBS, COOKED, LEAN & FAT","Pork, spareribs, cooked, lean and fat eaten" +22701020,"PORK, SPARERIBS, COOKED, LEAN ONLY","Pork, spareribs, cooked, lean only eaten" +22701030,"PORK, SPARERIBS, BBQ, W/ SAUCE, NS FAT EATEN","Pork, spareribs, barbecued, with sauce, NS as to fat eaten" +22701040,"PORK, SPARERIBS, BBQ, W/ SAUCE, LEAN & FAT EATEN","Pork, spareribs, barbecued, with sauce, lean and fat eaten" +22701050,"PORK, SPARERIBS, BBQ, W/ SAUCE, LEAN ONLY EATEN","Pork, spareribs, barbecued, with sauce, lean only eaten" +22704010,"PORK, CRACKLINGS, COOKED","Pork, cracklings, cooked" +22705010,"PORK, EARS, TAIL, HEAD, SNOUT, MISC PARTS, COOKED","Pork ears, tail, head, snout, miscellaneous parts, cooked" +22706010,"PORK, NECK BONES, COOKED","Pork, neck bones, cooked" +22707010,"PORK, PIG'S FEET, COOKED","Pork, pig's feet, cooked" +22707020,"PORK, PIG'S FEET, PICKLED","Pork, pig's feet, pickled" +22708010,"PORK, PIG'S HOCKS, COOKED","Pork, pig's hocks, cooked" +22709010,"PORK SKIN, RINDS, DEEP-FRIED","Pork skin, rinds, deep-fried" +22709110,"PORK SKIN, BOILED","Pork skin, boiled" +22810010,"HAM, BABY, STRAINED","Ham, baby food, strained" +22820000,"MEAT STICK, BABY FOOD","Meat stick, baby food" +23000100,"LAMB, NS AS TO CUT, COOKED","Lamb, NS as to cut, cooked" +23101000,"LAMB CHOP, COOKED, NS AS TO CUT & FAT","Lamb chop, NS as to cut, cooked, NS as to fat eaten" +23101010,"LAMB CHOP, NS AS TO CUT, COOKED, LEAN & FAT","Lamb chop, NS as to cut, cooked, lean and fat eaten" +23101020,"LAMB CHOP, NS AS TO CUT, COOKED, LEAN ONLY","Lamb chop, NS as to cut, cooked, lean only eaten" +23104000,"LAMB, LOIN CHOP, COOKED, NS AS TO FAT","Lamb, loin chop, cooked, NS as to fat eaten" +23104010,"LAMB, LOIN CHOP, COOKED, LEAN & FAT","Lamb, loin chop, cooked, lean and fat eaten" +23104020,"LAMB, LOIN CHOP, COOKED, LEAN ONLY","Lamb, loin chop, cooked, lean only eaten" +23107000,"LAMB, SHOULDER CHOP, COOKED, NS AS TO FAT","Lamb, shoulder chop, cooked, NS as to fat eaten" +23107010,"LAMB, SHOULDER CHOP, COOKED, LEAN & FAT","Lamb, shoulder chop, cooked, lean and fat eaten" +23107020,"LAMB, SHOULDER CHOP, COOKED, LEAN ONLY","Lamb, shoulder chop, cooked, lean only eaten" +23110000,"LAMB, RIBS, COOKED, LEAN ONLY","Lamb, ribs, cooked, lean only eaten" +23110010,"LAMB, RIBS, COOKED, NS AS TO FAT","Lamb, ribs, cooked, NS as to fat eaten" +23110050,"LAMB, RIBS, COOKED, LEAN & FAT","Lamb, ribs, cooked, lean and fat eaten" +23111010,"LAMB HOCKS, COOKED","Lamb hocks, cooked" +23120100,"LAMB, ROAST, COOKED, NS AS TO FAT EATEN","Lamb, roast, cooked, NS as to fat eaten" +23120110,"LAMB, ROAST, COOKED, LEAN & FAT EATEN","Lamb, roast, cooked, lean and fat eaten" +23120120,"LAMB, ROAST, COOKED, LEAN ONLY EATEN","Lamb, roast, cooked, lean only eaten" +23132000,"LAMB, GROUND OR PATTY, COOKED","Lamb, ground or patty, cooked" +23150100,"GOAT, BOILED","Goat, boiled" +23150200,"GOAT, FRIED","Goat, fried" +23150250,"GOAT, BAKED","Goat, baked" +23150270,"GOAT HEAD, COOKED","Goat head, cooked" +23150300,"GOAT RIBS, COOKED","Goat ribs, cooked" +23200100,"VEAL, COOKED, NS AS TO CUT & FAT","Veal, NS as to cut, cooked, NS as to fat eaten" +23200110,"VEAL, NS AS TO CUT, COOKED, LEAN & FAT","Veal, NS as to cut, cooked, lean and fat eaten" +23200120,"VEAL, NS AS TO CUT, COOKED, LEAN ONLY","Veal, NS as to cut, cooked, lean only eaten" +23201010,"VEAL CHOP, NS AS TO COOKING METHOD, NS AS TO FAT","Veal chop, NS as to cooking method, NS as to fat eaten" +23201020,"VEAL CHOP, NS AS TO COOKING METHOD, LEAN & FAT","Veal chop, NS as to cooking method, lean and fat eaten" +23201030,"VEAL CHOP, NS AS TO COOKING METHOD, LEAN ONLY","Veal chop, NS as to cooking method, lean only eaten" +23203010,"VEAL CHOP, FRIED, NS AS TO FAT","Veal chop, fried, NS as to fat eaten" +23203020,"VEAL CHOP, FRIED, LEAN & FAT","Veal chop, fried, lean and fat eaten" +23203030,"VEAL CHOP, FRIED, LEAN ONLY","Veal chop, fried, lean only eaten" +23203100,"VEAL CHOP, BROILED, NS AS TO FAT","Veal chop, broiled, NS as to fat eaten" +23203110,"VEAL CHOP, BROILED, LEAN & FAT","Veal chop, broiled, lean and fat eaten" +23203120,"VEAL CHOP, BROILED, LEAN ONLY","Veal chop, broiled, lean only eaten" +23204010,"VEAL CUTLET, NS AS TO COOKING METHOD, NS AS TO FAT","Veal cutlet or steak, NS as to cooking method, NS as to fat eaten" +23204020,"VEAL CUTLET, NS AS TO COOKING METHOD, LEAN & FAT","Veal cutlet or steak, NS as to cooking method, lean and fat eaten" +23204030,"VEAL CUTLET, NS AS TO COOKING METHOD, LEAN ONLY","Veal cutlet or steak, NS as to cooking method, lean only eaten" +23204200,"VEAL CUTLET OR STEAK, BROILED, NS AS TO FAT","Veal cutlet or steak, broiled, NS as to fat eaten" +23204210,"VEAL CUTLET OR STEAK, BROILED, LEAN & FAT","Veal cutlet or steak, broiled, lean and fat eaten" +23204220,"VEAL CUTLET OR STEAK, BROILED, LEAN ONLY","Veal cutlet or steak, broiled, lean only eaten" +23205010,"VEAL CUTLET OR STEAK, FRIED, NS AS TO FAT","Veal cutlet or steak, fried, NS as to fat eaten" +23205020,"VEAL CUTLET OR STEAK, FRIED, LEAN & FAT","Veal cutlet or steak, fried, lean and fat eaten" +23205030,"VEAL CUTLET OR STEAK, FRIED, LEAN ONLY","Veal cutlet or steak, fried, lean only eaten" +23210010,"VEAL, ROASTED, NS AS TO FAT","Veal, roasted, NS as to fat eaten" +23210020,"VEAL, ROASTED, LEAN & FAT","Veal, roasted, lean and fat eaten" +23210030,"VEAL, ROASTED, LEAN ONLY","Veal, roasted, lean only eaten" +23220010,"VEAL, GROUND OR PATTY, COOKED","Veal, ground or patty, cooked" +23220020,"MOCK CHICKEN LEGS, COOKED","Mock chicken legs, cooked" +23220030,"VEAL PATTY, BREADED, COOKED","Veal patty, breaded, cooked" +23310000,"RABBIT, NS AS TO DOMESTIC OR WILD, COOKED","Rabbit, NS as to domestic or wild, cooked" +23311100,"RABBIT, DOMESTIC, NS AS TO COOKING METHOD","Rabbit, domestic, NS as to cooking method" +23311120,"RABBIT, NS AS TO DOMESTIC OR WILD, BREADED, FRIED","Rabbit, NS as to domestic or wild, breaded, fried" +23311200,"RABBIT, WILD, COOKED","Rabbit, wild, cooked" +23321000,"VENISON/DEER, NFS","Venison/deer, NFS" +23321050,"VENISON/DEER, CURED","Venison/deer, cured" +23321100,"VENISON/DEER, ROASTED (INCLUDE ROAST ANTELOPE)","Venison/deer, roasted" +23321200,"VENISON/DEER STEAK, COOKED, NS AS TO METHOD","Venison/deer steak, cooked, NS as to cooking method" +23321250,"VENISON/DEER STEAK, BREADED OR FLOURED, COOKED","Venison/deer steak, breaded or floured, cooked, NS as to cooking method" +23321900,"VENISON/DEER JERKY","Venison/deer jerky" +23322100,"DEER BOLOGNA","Deer bologna" +23322300,"DEER CHOP, COOKED (INCLUDE VENISON CHOP)","Deer chop, cooked" +23322350,"VENISON/DEER RIBS, COOKED","Venison/deer ribs, cooked" +23322400,"VENISON/DEER, STEWED","Venison/deer, stewed" +23323100,"MOOSE, COOKED","Moose, cooked" +23323500,"BEAR, COOKED","Bear, cooked" +23324100,"CARIBOU, COOKED","Caribou, cooked" +23326100,"BISON, COOKED","Bison, cooked" +23331100,"GROUND HOG, COOKED","Ground hog, cooked" +23332100,"OPOSSUM, COOKED","Opossum, cooked" +23333100,"SQUIRREL, COOKED","Squirrel, cooked" +23334100,"BEAVER, COOKED","Beaver, cooked" +23335100,"RACCOON, COOKED","Raccoon, cooked" +23340100,"ARMADILLO, COOKED","Armadillo, cooked" +23345100,"WILD PIG, SMOKED","Wild pig, smoked" +23350100,"OSTRICH, COOKED","Ostrich, cooked" +23410010,"LAMB, BABY, STRAINED","Lamb, baby food, strained" +23420010,"VEAL, BABY, STRAINED","Veal, baby food, strained" +24100000,"CHICKEN, NS AS TO PART, NS METHOD, SKIN","Chicken, NS as to part and cooking method, NS as to skin eaten" +24100010,"CHICKEN, NS AS TO PART, NS METHOD, W/ SKIN","Chicken, NS as to part and cooking method, skin eaten" +24100020,"CHICKEN, NS AS TO PART, NS METHOD, W/O SKIN","Chicken, NS as to part and cooking method, skin not eaten" +24102000,"CHICKEN, NS PART, ROASTED/BROILED/BAKED, NS SKIN","Chicken, NS as to part, roasted, broiled, or baked, NS as to skin eaten" +24102010,"CHICKEN, NS PART, ROASTED/BROILED/BAKED, W/ SKIN","Chicken, NS as to part, roasted, broiled, or baked, skin eaten" +24102020,"CHICKEN, NS PART, ROASTED/BROILED/BAKED, W/O SKIN","Chicken, NS as to part, roasted, broiled, or baked, skin not eaten" +24103000,"CHICKEN, STEWED, NS PART, NS SKIN","Chicken, NS as to part, stewed, NS as to skin eaten" +24103010,"CHICKEN, STEWED, NS PART, W/ SKIN","Chicken, NS as to part, stewed, skin eaten" +24103020,"CHICKEN, STEWED, NS PART, W/O SKIN","Chicken, NS as to part, stewed, skin not eaten" +24104000,"CHICKEN, FRIED, NO COATING, NS PART, NS SKIN","Chicken, NS as to part, fried, no coating, NS as to skin eaten" +24104010,"CHICKEN, FRIED, NO COATING, NS PART, W/ SKIN","Chicken, NS as to part, fried, no coating, skin eaten" +24104020,"CHICKEN, FRIED, NO COATING, NS PART, W/O SKIN","Chicken, NS as to part, fried, no coating, skin not eaten" +24107000,"CHICKEN, COATED, BKD/FRD, PPD W/ SKIN, NS SKIN EATEN","Chicken, NS as to part, coated, baked or fried, prepared with skin, NS as to skin/coating eaten" +24107010,"CHICKEN, COATED, BKD/FRD, PPD W/ SKIN, SKIN EATEN","Chicken, NS as to part, coated, baked or fried, prepared with skin, skin/coating eaten" +24107020,"CHICKEN, COATED, BKD/FRD, PPD W/ SKIN, SKIN NOT EATEN","Chicken, NS as to part, coated, baked or fried, prepared with skin, skin/coating not eaten" +24107040,"CHICKEN, NS PART,COATED,BKD/FRD,PREP SKINLESS,NS COAT EATEN","Chicken, NS as to part, coated, baked or fried, prepared skinless, NS as to coating eaten" +24107050,"CHICKEN, NS PART,COATED,BKD/FRD,PREP SKINLESS,COATING EATEN","Chicken, NS as to part, coated, baked or fried, prepared skinless, coating eaten" +24107060,"CHICKEN, NS PART,COATED,BKD/FRD,PREP SKINLESS,COAT NOT EATEN","Chicken, NS as to part, coated, baked or fried, prepared skinless, coating not eaten" +24120100,"CHICKEN, BREAST, NFS","Chicken, breast, NS as to cooking method, NS as to skin eaten" +24120110,"CHICKEN, BREAST, NS AS TO COOKING METHOD, W/SKIN","Chicken, breast, NS as to cooking method, skin eaten" +24120120,"CHICKEN, BREAST, NS AS TO COOKING METHOD, W/O SKIN","Chicken, breast, NS as to cooking method, skin not eaten" +24122100,"CHICKEN, BREAST, ROASTED/BROILED/BAKED, NS SKIN","Chicken, breast, roasted, broiled, or baked, NS as to skin eaten" +24122110,"CHICKEN, BREAST, ROASTED/BROILED/BAKED, W/ SKIN","Chicken, breast, roasted, broiled, or baked, skin eaten" +24122120,"CHICKEN, BREAST, ROASTED/BROILED/BAKED, W/O SKIN","Chicken, breast, roasted, broiled, or baked, skin not eaten" +24123100,"CHICKEN, BREAST, STEWED, NS AS TO SKIN","Chicken, breast, stewed, NS as to skin eaten" +24123110,"CHICKEN, BREAST, STEWED, W/ SKIN","Chicken, breast, stewed, skin eaten" +24123120,"CHICKEN, BREAST, STEWED, W/O SKIN","Chicken, breast, stewed, skin not eaten" +24124100,"CHICKEN, BREAST, FRIED, NO COATING, NS AS TO SKIN","Chicken, breast, fried, no coating, NS as to skin eaten" +24124110,"CHICKEN, BREAST, FRIED, NO COATING, W/ SKIN","Chicken, breast, fried, no coating, skin eaten" +24124120,"CHICKEN, BREAST, FRIED, NO COATING, W/O SKIN","Chicken, breast, fried, no coating, skin not eaten" +24127100,"CHICKEN, BREAST,COATED,BKD/FRD,PPD W/ SKIN,NS SKIN EATEN","Chicken, breast, coated, baked or fried, prepared with skin, NS as to skin/coating eaten" +24127110,"CHICKEN, BREAST,COATED,BKD/FRD,PPD W/ SKIN, SKIN EATEN","Chicken, breast, coated, baked or fried, prepared with skin, skin/coating eaten" +24127120,"CHICKEN, BREAST,COATED,BKD/FRD,PPD W/ SKIN, SKIN NOT EATEN","Chicken, breast, coated, baked or fried, prepared with skin, skin/coating not eaten" +24127125,"CHIC, BREAST, FF, COATED/BAKED/FRIED, PREP SKIN,NS SKIN EATE","Chicken, breast, from fast food, coated, baked or fried, prepared with skin, NS as to skin/coating eaten" +24127130,"CHIC, BREAST, FF, COATED, BAKED/FRIED, PREP SKIN,SKIN EATEN","Chicken, breast, from fast food, coated, baked or fried, prepared with skin, skin/coating eaten" +24127135,"CHICK,BREAST,FF,COATED, BAKED/ FRIED,PREP SKIN,NO SKIN EATEN","Chicken, breast, from fast food, coated, baked or fried, prepared with skin, skin/coating not eaten" +24127140,"CHICKEN,BREAST,COATED,BKD/FRD,PPD SKINLESS,NS COAT EATEN","Chicken, breast, coated, baked or fried, prepared skinless, NS as to coating eaten" +24127150,"CHICKEN,BREAST,COATED,BKD/FRD,PPD SKINLESS,COAT EATEN","Chicken, breast, coated, baked or fried, prepared skinless, coating eaten" +24127160,"CHICKEN,BREAST,COATED,BKD/FRD,PPD SKINLESS,COAT NOT EATEN","Chicken, breast, coated, baked or fried, prepared skinless, coating not eaten" +24130200,"CHICKEN, LEG, NFS","Chicken, leg (drumstick and thigh), NS as to cooking method, NS as to skin eaten" +24130210,"CHICKEN, LEG, NS AS TO COOKING METHOD, W/ SKIN","Chicken, leg (drumstick and thigh), NS as to cooking method, skin eaten" +24130220,"CHICKEN, LEG, NS AS TO COOKING METHOD, W/O SKIN","Chicken, leg (drumstick and thigh), NS as to cooking method, skin not eaten" +24132200,"CHICKEN, LEG, ROASTED/BROILED/BAKED, NS SKIN","Chicken, leg (drumstick and thigh), roasted, broiled, or baked, NS as to skin eaten" +24132210,"CHICKEN, LEG, ROASTED/BROILED/BAKED, W/ SKIN","Chicken, leg (drumstick and thigh), roasted, broiled, or baked, skin eaten" +24132220,"CHICKEN, LEG, ROASTED/BROILED/BAKED, W/O SKIN","Chicken, leg (drumstick and thigh), roasted, broiled, or baked, skin not eaten" +24133200,"CHICKEN, LEG, STEWED, NS AS TO SKIN","Chicken, leg (drumstick and thigh), stewed, NS as to skin eaten" +24133210,"CHICKEN, LEG, STEWED, W/ SKIN","Chicken, leg (drumstick and thigh), stewed, skin eaten" +24133220,"CHICKEN, LEG, STEWED, W/O SKIN","Chicken, leg (drumstick and thigh), stewed, skin not eaten" +24134200,"CHICKEN, LEG, FRIED, NO COATING, NS AS TO SKIN","Chicken, leg (drumstick and thigh), fried, no coating, NS as to skin eaten" +24134210,"CHICKEN, LEG, FRIED, NO COATING, W/ SKIN","Chicken, leg (drumstick and thigh), fried, no coating, skin eaten" +24134220,"CHICKEN, LEG, FRIED, NO COATING, W/O SKIN","Chicken, leg (drumstick and thigh), fried, no coating, skin not eaten" +24137200,"CHICKEN,LEG,COATED,BKD/FRD,PPD W/SKIN,NS SKIN EATEN","Chicken, leg (drumstick and thigh), coated, baked or fried, prepared with skin, NS as to skin/coating eaten" +24137210,"CHICKEN, LEG,COATED,BKD/FRD,PPD W/ SKIN, SKIN EATEN","Chicken, leg (drumstick and thigh), coated, baked or fried, prepared with skin, skin/coating eaten" +24137220,"CHICKEN, LEG,COATED,BKD/FRD,PPD W/ SKIN, SKIN NOT EATEN","Chicken, leg (drumstick and thigh), coated, baked or fried, prepared with skin, skin/coating not eaten" +24137240,"CHICKEN,LEG,COATED,BKD/FRD,PPD SKINLESS,NS COAT EATEN","Chicken, leg (drumstick and thigh), coated, baked or fried, prepared skinless, NS as to coating eaten" +24137250,"CHICKEN,LEG,COATED,BKD/FRD,PPD SKINLESS,COAT EATEN","Chicken, leg (drumstick and thigh), coated, baked or fried, prepared skinless, coating eaten" +24137260,"CHICKEN,LEG,COATED,BKD/FRD,PPD SKINLESS,COAT NOT EATEN","Chicken, leg (drumstick and thigh), coated, baked or fried, prepared skinless, coating not eaten" +24140200,"CHICKEN, DRUMSTICK, NFS","Chicken, drumstick, NS as to cooking method, NS as to skin eaten" +24140210,"CHICKEN, DRUMSTICK, NS AS TO COOKING METHOD,W/ SKIN","Chicken, drumstick, NS as to cooking method, skin eaten" +24140220,"CHICKEN, DRUMSTICK, NS COOKING METHOD, W/O SKIN","Chicken, drumstick, NS as to cooking method, skin not eaten" +24142200,"CHICKEN, DRUMSTICK, ROASTED/BROILED/BAKED, NS SKIN","Chicken, drumstick, roasted, broiled, or baked, NS as to skin eaten" +24142210,"CHICKEN, DRUMSTICK, ROASTED/BROILED/BAKED, W/ SKIN","Chicken, drumstick, roasted, broiled, or baked, skin eaten" +24142220,"CHICKEN, DRUMSTICK, ROASTED/BROILED/BAKED, W/O SKIN","Chicken, drumstick, roasted, broiled, or baked, skin not eaten" +24143200,"CHICKEN, DRUMSTICK, STEWED, NS AS TO SKIN","Chicken, drumstick, stewed, NS as to skin eaten" +24143210,"CHICKEN, DRUMSTICK, STEWED, W/ SKIN","Chicken, drumstick, stewed, skin eaten" +24143220,"CHICKEN, DRUMSTICK, STEWED, W/O SKIN","Chicken, drumstick, stewed, skin not eaten" +24144200,"CHICKEN, DRUMSTICK, FRIED, NO COATING,NS AS TO SKIN","Chicken, drumstick, fried, no coating, NS as to skin eaten" +24144210,"CHICKEN, DRUMSTICK, FRIED, NO COATING, W/ SKIN","Chicken, drumstick, fried, no coating, skin eaten" +24144220,"CHICKEN, DRUMSTICK, FRIED, NO COATING, W/O SKIN","Chicken, drumstick, fried, no coating, skin not eaten" +24147200,"CHICKEN,DRUMSTICK,COATED,BKD/FRD,PPD W/SKIN,NS SKIN EAT","Chicken, drumstick, coated, baked or fried, prepared with skin, NS as to skin/coating eaten" +24147210,"CHICKEN,DRUMSTICK,COATED,BKD/FRD,PPD W/SKIN, SKIN EAT","Chicken, drumstick, coated, baked or fried, prepared with skin, skin/coating eaten" +24147220,"CHICKEN,DRUMSTICK,COATED,BKD/FRD,PPD W/SKIN, SKIN NOT EAT","Chicken, drumstick, coated, baked or fried, prepared with skin, skin/coating not eaten" +24147225,"CHICK,DRUMSTICK,FF,COATED,BAKED/FRIED,PREP SKIN,NS SKIN EAT","Chicken, drumstick, from fast food, coated, baked or fried, prepared with skin, NS as to skin/coating eaten" +24147230,"CHIC, DRUMSTICK,FF,COATED, BAKED/FRIED,PREP SKIN,SKIN EATEN","Chicken, drumstick, from fast food, coated, baked or fried, prepared with skin, skin/coating eaten" +24147235,"CHICK,DRUMSTICK,FF,COATED,BAKED/FRIED,PREP SKIN,SKIN NOT EAT","Chicken, drumstick, from fast food, coated, baked or fried, prepared with skin, skin/coating not eaten" +24147240,"CHICKEN,DRUMSTICK,COATED,BKD/FRD,PPD SKINLESS,NS COAT EAT","Chicken, drumstick, coated, baked or fried, prepared skinless, NS as to coating eaten" +24147250,"CHICKEN,DRUMSTICK,COATED,BKD/FRD,PPD SKINLESS, COAT EAT","Chicken, drumstick, coated, baked or fried, prepared skinless, coating eaten" +24147260,"CHICKEN,DRUMSTICK,COATED,BKD/FRD,PPD SKINLESS, COAT NOT EAT","Chicken, drumstick, coated, baked or fried, prepared skinless, coating not eaten" +24150200,"CHICKEN, THIGH, NFS","Chicken, thigh, NS as to cooking method, NS as to skin eaten" +24150210,"CHICKEN, THIGH, NS AS TO COOKING METHOD, W/ SKIN","Chicken, thigh, NS as to cooking method, skin eaten" +24150220,"CHICKEN, THIGH, NS AS TO COOKING METHOD, W/O SKIN","Chicken, thigh, NS as to cooking method, skin not eaten" +24152200,"CHICKEN, THIGH, ROASTED/BROILED/BAKED, NS SKIN","Chicken, thigh, roasted, broiled, or baked, NS as to skin eaten" +24152210,"CHICKEN, THIGH, ROASTED/BROILED/BAKED, W/ SKIN","Chicken, thigh, roasted, broiled, or baked, skin eaten" +24152220,"CHICKEN, THIGH, ROASTED/BROILED/BAKED, W/O SKIN","Chicken, thigh, roasted, broiled, or baked, skin not eaten" +24153200,"CHICKEN, THIGH, STEWED, NS AS TO SKIN","Chicken, thigh, stewed, NS as to skin eaten" +24153210,"CHICKEN, THIGH, STEWED, W/ SKIN","Chicken, thigh, stewed, skin eaten" +24153220,"CHICKEN, THIGH, STEWED, W/O SKIN","Chicken, thigh, stewed, skin not eaten" +24154200,"CHICKEN, THIGH, FRIED, NO COATING, NS AS TO SKIN","Chicken, thigh, fried, no coating, NS as to skin eaten" +24154210,"CHICKEN, THIGH, FRIED, NO COATING, W/ SKIN","Chicken, thigh, fried, no coating, skin eaten" +24154220,"CHICKEN, THIGH, FRIED, NO COATING, W/O SKIN","Chicken, thigh, fried, no coating, skin not eaten" +24157200,"CHICKEN,THIGH,COATED,BKD/FRD,PPD W/SKIN,NS SKIN EATEN","Chicken, thigh, coated, baked or fried, prepared with skin, NS as to skin/coating eaten" +24157210,"CHICKEN,THIGH,COATED,BKD/FRD,PPD W/SKIN, SKIN EATEN","Chicken, thigh, coated, baked or fried, prepared with skin, skin/coating eaten" +24157220,"CHICKEN,THIGH,COATED,BKD/FRD,PPD W/SKIN, SKIN NOT EATEN","Chicken, thigh, coated, baked or fried, prepared with skin, skin/coating not eaten" +24157225,"CHIC, THIGH, FF, COATED, BAKED/ FRIED, PREP SKIN,NS SKIN EAT","Chicken, thigh, from fast food, coated, baked or fried, prepared with skin, NS as to skin/coating eaten" +24157230,"CHICK, THIGH, FF, COATED, BAKED OR FRIED, PREP SKIN,SKIN EAT","Chicken, thigh, from fast food, coated, baked or fried, prepared with skin, skin/coating eaten" +24157235,"CHICK,THIGH,FF,COATED,BAKED/BROILED,PREP SKIN,SKIN NOT EATEN","Chicken, thigh, from fast food, coated, baked or broiled, prepared with skin, skin/coating not eaten" +24157240,"CHICKEN,THIGH,COATED,BKD/FRD,PPD SKINLESS,NS COAT EATEN","Chicken, thigh, coated, baked or fried, prepared skinless, NS as to coating eaten" +24157250,"CHICKEN,THIGH,COATED,BKD/FRD,PPD SKINLESS, COAT EATEN","Chicken, thigh, coated, baked or fried, prepared skinless, coating eaten" +24157260,"CHICKEN,THIGH,COATED,BKD/FRD,PPD SKINLESS, COAT NOT EATEN","Chicken, thigh, coated, baked or fried, prepared skinless, coating not eaten" +24160100,"CHICKEN, WING, NFS","Chicken, wing, NS as to cooking method, NS as to skin eaten" +24160110,"CHICKEN, WING, NS AS TO COOKING METHOD, W/ SKIN","Chicken, wing, NS as to cooking method, skin eaten" +24160120,"CHICKEN, WING, NS AS TO COOKING METHOD, W/O SKIN","Chicken, wing, NS as to cooking method, skin not eaten" +24162100,"CHICKEN, WING, ROASTED/BROILED/BAKED, NS SKIN","Chicken, wing, roasted, broiled, or baked, NS as to skin eaten" +24162110,"CHICKEN, WING, ROASTED/BROILED/BAKED, W/ SKIN","Chicken, wing, roasted, broiled, or baked, skin eaten" +24162120,"CHICKEN, WING, ROASTED/BROILED/BAKED, W/O SKIN","Chicken, wing, roasted, broiled, or baked, skin not eaten" +24163100,"CHICKEN, WING, STEWED, NS AS TO SKIN","Chicken, wing, stewed, NS as to skin eaten" +24163110,"CHICKEN, WING, STEWED, W/ SKIN","Chicken, wing, stewed, skin eaten" +24163120,"CHICKEN, WING, STEWED, W/O SKIN","Chicken, wing, stewed, skin not eaten" +24164100,"CHICKEN, WING, FRIED, NO COATING, NS AS TO SKIN","Chicken, wing, fried, no coating, NS as to skin eaten" +24164110,"CHICKEN, WING, FRIED, NO COATING, W/ SKIN","Chicken, wing, fried, no coating, skin eaten" +24164120,"CHICKEN, WING, FRIED, NO COATING, W/O SKIN","Chicken, wing, fried, no coating, skin not eaten" +24167100,"CHICKEN,WING,COATED,BKD/FRD,PPD W/SKIN,NS SKIN EATEN","Chicken, wing, coated, baked or fried, prepared with skin, NS as to skin/coating eaten" +24167110,"CHICKEN,WING,COATED,BKD/FRD,PPD W/SKIN, SKIN EATEN","Chicken, wing, coated, baked or fried, prepared with skin, skin/coating eaten" +24167120,"CHICKEN,WING,COATED,BKD/FRD,PPD W/SKIN, SKIN NOT EATEN","Chicken, wing, coated, baked or fried, prepared with skin, skin/coating not eaten" +24167125,"CHIC, WING, FF, COATED, BAKED/FRIED, PREP SKIN,NS SKIN EATEN","Chicken, wing, from fast food, coated, baked or fried, prepared with skin, NS as to skin/coating eaten" +24167130,"CHIC, WING, FF, COATED, BAKED/FRIED, PREP SKIN, SKIN EATEN","Chicken, wing, from fast food, coated, baked or fried, prepared with skin, skin/coating eaten" +24167135,"CHIC, WING, FF, COATED,BAKED/FRIED, PREP SKIN, SKIN NO EATEN","Chicken, wing, from fast food, coated, baked or fried, prepared with skin, skin/coating not eaten" +24170200,"CHICKEN, BACK","Chicken, back" +24180200,"CHICKEN, NECK OR RIBS, NFS","Chicken, neck or ribs" +24198340,"CHICKEN TAIL","Chicken, tail" +24198440,"CHICKEN SKIN","Chicken skin" +24198500,"CHICKEN FEET","Chicken feet" +24198570,"CHICKEN, CANNED, MEAT ONLY, LIGHT & DARK MEAT","Chicken, canned, meat only" +24198670,"CHICKEN ROLL, ROASTED, LIGHT & DARK MEAT","Chicken, chicken roll, roasted" +24198690,"CHICKEN PATTY, FILLET, OR TENDERS, BREADED, COOKED, FAST FD","Chicken patty, fillet, or tenders, breaded, cooked, from fast food / restaurant" +24198700,"CHICKEN PATTY/FILLET/TENDERS, BREADED, COOKED","Chicken patty, fillet, or tenders, breaded, cooked" +24198710,"CHICKEN PATTY W/ CHEESE, BREADED, COOKED","Chicken patty with cheese, breaded, cooked" +24198720,"CHICKEN, GROUND","Chicken, ground" +24198730,"CHICKEN NUGGETS, FROM FAST FOOD / RESTAURANT","Chicken nuggets, from fast food / restaurant" +24198740,"CHICKEN NUGGETS","Chicken nuggets" +24198840,"FRIED CHICKEN CHUNKS, P. R. (CHICHARRONES DE POLLO)","Fried chicken chunks, Puerto Rican style (Chicharrones de pollo)" +24201000,"TURKEY, NFS","Turkey, NFS" +24201010,"TURKEY, LIGHT MEAT, COOKED, NS AS TO SKIN","Turkey, light meat, cooked, NS as to skin eaten" +24201020,"TURKEY, LIGHT MEAT, COOKED, W/O SKIN","Turkey, light meat, cooked, skin not eaten" +24201030,"TURKEY, LIGHT MEAT, COOKED, W/ SKIN","Turkey, light meat, cooked, skin eaten" +24201050,"TURKEY, LIGHT, BREADED, BAKED/FRIED, NS AS TO SKIN","Turkey, light meat, breaded, baked or fried, NS as to skin eaten" +24201060,"TURKEY, LIGHT MEAT, BREADED, BAKED/FRIED, W/O SKIN","Turkey, light meat, breaded, baked or fried, skin not eaten" +24201070,"TURKEY, LIGHT MEAT, BREADED, BAKED/FRIED, W/ SKIN","Turkey, light meat, breaded, baked or fried, skin eaten" +24201110,"TURKEY, LIGHT MEAT, ROASTED, NS AS TO SKIN","Turkey, light meat, roasted, NS as to skin eaten" +24201120,"TURKEY, LIGHT MEAT, ROASTED, W/O SKIN","Turkey, light meat, roasted, skin not eaten" +24201130,"TURKEY, LIGHT MEAT, ROASTED, W/ SKIN","Turkey, light meat, roasted, skin eaten" +24201210,"TURKEY, DARK MEAT, ROASTED, NS AS TO SKIN","Turkey, dark meat, roasted, NS as to skin eaten" +24201220,"TURKEY, DARK MEAT, ROASTED, W/O SKIN","Turkey, dark meat, roasted, skin not eaten" +24201230,"TURKEY, DARK MEAT, ROASTED, W/ SKIN","Turkey, dark meat, roasted, skin eaten" +24201310,"TURKEY, LIGHT & DARK MEAT, ROASTED, NS AS TO SKIN","Turkey, light and dark meat, roasted, NS as to skin eaten" +24201320,"TURKEY, LIGHT & DARK MEAT, ROASTED, W/O SKIN","Turkey, light and dark meat, roasted, skin not eaten" +24201330,"TURKEY, LIGHT & DARK MEAT, ROASTED, W/ SKIN","Turkey, light and dark meat, roasted, skin eaten" +24201350,"TURKEY, LT/DK MEAT, BATTERED, FRIED, NS AS TO SKIN","Turkey, light or dark meat, battered, fried, NS as to skin eaten" +24201360,"TURKEY, LIGHT/DARK MEAT, BATTERED, FRIED, W/O SKIN","Turkey, light or dark meat, battered, fried, skin not eaten" +24201370,"TURKEY, LIGHT/DARK MEAT, BATTERED, FRIED, W/ SKIN","Turkey, light or dark meat, battered, fried, skin eaten" +24201400,"TURKEY, LIGHT/DARK MEAT, STEWED, NS AS TO SKIN","Turkey, light or dark meat, stewed, NS as to skin eaten" +24201410,"TURKEY, LIGHT/DARK MEAT, STEWED, W/O SKIN","Turkey, light or dark meat, stewed, skin not eaten" +24201420,"TURKEY, LIGHT/DARK MEAT, STEWED, W/ SKIN","Turkey light or dark meat, stewed, skin eaten" +24201500,"TURKEY, SMOKED, NS AS TO SKIN","Turkey, light or dark meat, smoked, cooked, NS as to skin eaten" +24201510,"TURKEY, SMOKED, SKIN EATEN","Turkey, light or dark meat, smoked, cooked, skin eaten" +24201520,"TURKEY, SMOKED, SKIN NOT EATEN","Turkey, light or dark meat, smoked, cooked, skin not eaten" +24202000,"TURKEY, DRUMSTICK, COOKED, NS AS TO SKIN","Turkey, drumstick, cooked, NS as to skin eaten" +24202010,"TURKEY, DRUMSTICK, COOKED, W/O SKIN","Turkey, drumstick, cooked, skin not eaten" +24202020,"TURKEY, DRUMSTICK, COOKED, W/ SKIN","Turkey, drumstick, cooked, skin eaten" +24202050,"TURKEY, DRUMSTICK, ROASTED, NS AS TO SKIN","Turkey, drumstick, roasted, NS as to skin eaten" +24202060,"TURKEY, DRUMSTICK, ROASTED, W/O SKIN","Turkey, drumstick, roasted, skin not eaten" +24202070,"TURKEY, DRUMSTICK, ROASTED, W/ SKIN","Turkey, drumstick, roasted, skin eaten" +24202120,"TURKEY, DRUMSTICK, SMOKED, SKIN EATEN","Turkey, drumstick, smoked, cooked, skin eaten" +24202450,"TURKEY, THIGH, COOKED, NS AS TO SKIN","Turkey, thigh, cooked, NS as to skin eaten" +24202460,"TURKEY, THIGH, COOKED, W/ SKIN","Turkey, thigh, cooked, skin eaten" +24202500,"TURKEY, THIGH, COOKED, W/O SKIN","Turkey, thigh, cooked, skin not eaten" +24202600,"TURKEY, NECK, COOKED","Turkey, neck, cooked" +24203000,"TURKEY, WING, COOKED, NS AS TO SKIN","Turkey, wing, cooked, NS as to skin eaten" +24203010,"TURKEY, WING, COOKED, W/O SKIN","Turkey, wing, cooked, skin not eaten" +24203020,"TURKEY, WING, COOKED, W/ SKIN","Turkey, wing, cooked, skin eaten" +24203120,"TURKEY, WING, SMOKED, COOKED, SKIN EATEN","Turkey, wing, smoked, cooked, skin eaten" +24204000,"TURKEY, ROLLED ROAST, LIGHT OR DARK MEAT, COOKED","Turkey, rolled roast, light or dark meat, cooked" +24205000,"TURKEY, TAIL, COOKED","Turkey, tail, cooked" +24205100,"TURKEY, BACK, COOKED","Turkey, back, cooked" +24206000,"TURKEY, CANNED","Turkey, canned" +24207000,"TURKEY, GROUND","Turkey, ground" +24208000,"TURKEY NUGGETS","Turkey, nuggets" +24208500,"TURKEY BACON, COOKED","Turkey bacon, cooked" +24300100,"DUCK, COOKED, NS AS TO SKIN","Duck, cooked, NS as to skin eaten" +24300110,"DUCK, COOKED, W/ SKIN","Duck, cooked, skin eaten" +24300120,"DUCK, COOKED, W/O SKIN","Duck, cooked, skin not eaten" +24301000,"DUCK, ROASTED, NS AS TO SKIN","Duck, roasted, NS as to skin eaten" +24301010,"DUCK, ROASTED, W/ SKIN","Duck, roasted, skin eaten" +24301020,"DUCK, ROASTED, W/O SKIN","Duck, roasted, skin not eaten" +24301210,"DUCK, BATTERED, FRIED","Duck, battered, fried" +24302010,"DUCK, PRESSED, CHINESE","Duck, pressed, Chinese" +24311010,"GOOSE, WILD, ROASTED","Goose, wild, roasted" +24400000,"CORNISH GAME HEN, COOKED, NS AS TO SKIN","Cornish game hen, cooked, NS as to skin eaten" +24400010,"CORNISH GAME HEN, COOKED, W/ SKIN","Cornish game hen, cooked, skin eaten" +24400020,"CORNISH GAME HEN, COOKED, W/O SKIN","Cornish game hen, cooked, skin not eaten" +24401000,"CORNISH GAME HEN, ROASTED, NS AS TO SKIN","Cornish game hen, roasted, NS as to skin eaten" +24401010,"CORNISH GAME HEN, ROASTED, W/ SKIN","Cornish game hen, roasted, skin eaten" +24401020,"CORNISH GAME HEN, ROASTED, W/O SKIN","Cornish game hen, roasted, skin not eaten" +24402100,"DOVE, COOKED, NS AS TO COOKING METHOD","Dove, cooked, NS as to cooking method" +24402110,"DOVE, FRIED","Dove, fried" +24403100,"QUAIL, COOKED","Quail, cooked" +24404100,"PHEASANT, COOKED","Pheasant, cooked" +24701000,"CHICKEN, BABY, NS AS TO STRAINED OR JUNIOR","Chicken, baby food, NS as to strained or junior" +24701010,"CHICKEN, BABY, STRAINED","Chicken, baby food, strained" +24701020,"CHICKEN, BABY, JUNIOR","Chicken, baby food, junior" +24703000,"TURKEY, BABY, NS AS TO STRAINED OR JUNIOR","Turkey, baby food, NS as to strained or junior" +24703010,"TURKEY, BABY, STRAINED","Turkey, baby food, strained" +24703020,"TURKEY, BABY, JUNIOR","Turkey, baby food, junior" +24705010,"CHICKEN STICK, BABY FOOD","Chicken stick, baby food" +24706010,"TURKEY STICK, BABY FOOD","Turkey stick, baby food" +25110120,"BEEF LIVER, BRAISED","Beef liver, braised" +25110140,"BEEF LIVER, FRIED","Beef liver, fried" +25110420,"CHICKEN LIVER, BRAISED","Chicken liver, braised" +25110450,"CHICKEN LIVER, FRIED","Chicken liver, fried" +25112200,"LIVER PASTE OR PATE, CHICKEN (INCLUDE PATE, NFS)","Liver paste or pate, chicken" +25120000,"HEART, COOKED","Heart, cooked" +25130000,"KIDNEY, COOKED","Kidney, cooked" +25140110,"SWEETBREADS, COOKED","Sweetbreads, cooked" +25150000,"BRAINS, COOKED","Brains, cooked" +25160000,"TONGUE, COOKED","Tongue, cooked" +25160110,"TONGUE, SMOKED,CURED OR PICKLED, COOKED","Tongue, smoked, cured, or pickled, cooked" +25160130,"TONGUE POT ROAST, P.R. (LENGUA AL CALDERO)","Tongue pot roast, Puerto Rican style (Lengua al caldero)" +25170110,"TRIPE, COOKED","Tripe, cooked" +25170210,"CHITTERLINGS, COOKED","Chitterlings, cooked" +25170310,"HOG MAWS (STOMACH) COOKED","Hog maws (stomach), cooked" +25170420,"GIZZARD, COOKED","Gizzard, cooked" +25210110,"FRANKFURTER, WIENER OR HOT DOG, NFS","Frankfurter, wiener, or hot dog, NFS" +25210150,"FRANKFURTER OR HOT DOG, CHEESE-FILLED","Frankfurter or hot dog, cheese-filled" +25210210,"FRANKFURTER OR HOT DOG, BEEF","Frankfurter or hot dog, beef" +25210220,"FRANKFURTER OR HOT DOG, BEEF & PORK","Frankfurter or hot dog, beef and pork" +25210240,"FRANFURTER/HOT DOG, BEEF & PORK, LIGHT","Frankfurter or hot dog, beef and pork, reduced fat or light" +25210250,"FRANKFURTER OR HOT DOG, MEAT & POULTRY, FAT FREE","Frankfurter or hot dog, meat and poultry, fat free" +25210280,"FRANKFURTER OR HOT DOG, MEAT & POULTRY","Frankfurter or hot dog, meat and poultry" +25210290,"FRANKFURTER OR HOT DOG, MEAT & POULTRY, LIGHT","Frankfurter or hot dog, meat and poultry, reduced fat or light" +25210310,"FRANKFURTER OR HOT DOG, CHICKEN","Frankfurter or hot dog, chicken" +25210410,"FRANKFURTER OR HOT DOG, TURKEY","Frankfurter or hot dog, turkey" +25210620,"FRANKFURTER OR HOT DOG, BEEF, REDUCED FAT OR LIGHT","Frankfurter or hot dog, beef, reduced fat or light" +25210750,"FRANKFURTER OR HOT DOG, REDUCED FAT OR LIGHT, NFS","Frankfurter or hot dog, reduced fat or light, NFS" +25220010,"COLD CUT, NFS","Cold cut, NFS" +25220105,"BEEF SAUSAGE","Beef sausage" +25220106,"BEEF SAUSAGE, REDUCED FAT","Beef sausage, reduced fat" +25220150,"BEEF SAUSAGE WITH CHEESE","Beef sausage with cheese" +25220210,"BLOOD SAUSAGE","Blood sausage" +25220350,"BRATWURST","Bratwurst" +25220360,"BRATWURST W/ CHEESE","Bratwurst, with cheese" +25220390,"BOLOGNA, BEEF, LOW FAT","Bologna, beef, lowfat" +25220400,"BOLOGNA, PORK AND BEEF","Bologna, pork and beef" +25220410,"BOLOGNA, NFS","Bologna, NFS" +25220420,"BOLOGNA, LEBANON","Bologna, Lebanon" +25220430,"BOLOGNA, BEEF","Bologna, beef" +25220440,"BOLOGNA, TURKEY","Bologna, turkey" +25220450,"BOLOGNA RING, SMOKED","Bologna ring, smoked" +25220460,"BOLOGNA, PORK","Bologna, pork" +25220470,"BOLOGNA, BEEF, LOWER SODIUM","Bologna, beef, lower sodium" +25220480,"BOLOGNA, CHICKEN, BEEF, & PORK","Bologna, chicken, beef, and pork" +25220490,"BOLOGNA, W/ CHEESE","Bologna, with cheese" +25220500,"BOLOGNA, BEEF & PORK, LOWFAT","Bologna, beef and pork, lowfat" +25220510,"CAPICOLA","Capicola" +25220650,"TURKEY OR CHICKEN AND BEEF SAUSAGE","Turkey or chicken and beef sausage" +25220710,"CHORIZO","Chorizo" +25220910,"HEAD CHEESE","Head cheese" +25221110,"KNOCKWURST","Knockwurst" +25221210,"MORTADELLA","Mortadella" +25221250,"PEPPERONI","Pepperoni" +25221310,"POLISH SAUSAGE","Polish sausage" +25221350,"ITALIAN SAUSAGE","Italian sausage" +25221400,"SAUSAGE (NOT COLD CUT), NFS","Sausage (not cold cut), NFS" +25221405,"PORK SAUSAGE","Pork sausage" +25221406,"PORK SAUSAGE, REDUCED FAT","Pork sausage, reduced fat" +25221450,"PORK SAUSAGE RICE LINKS","Pork sausage rice links" +25221460,"PORK & BEEF SAUSAGE","Pork and beef sausage" +25221500,"SALAMI, NFS","Salami, NFS" +25221510,"SALAMI, SOFT, COOKED","Salami, soft, cooked" +25221520,"SALAMI, DRY OR HARD","Salami, dry or hard" +25221530,"SALAMI, BEEF","Salami, beef" +25221610,"SCRAPPLE, COOKED","Scrapple, cooked" +25221710,"SOUSE","Souse" +25221810,"THURINGER (INCLUDE SUMMER SAUSAGE)","Thuringer" +25221830,"TURKEY OR CHICKEN SAUSAGE","Turkey or chicken sausage" +25221860,"TURKEY OR CHICKEN SAUSAGE, REDUCED FAT","Turkey or chicken sausage, reduced fat" +25221870,"TURKEY AND PORK SAUSAGE","Turkey or chicken and pork sausage" +25221880,"TURKEY OR CHICKEN, PORK, AND BEEF SAUSAGE, REDUCED FAT","Turkey or chicken, pork, and beef sausage, reduced fat" +25221910,"VIENNA SAUSAGE, CANNED","Vienna sausage, canned" +25221950,"PICKLED SAUSAGE","Pickled sausage" +25230110,"LUNCHEON MEAT, NFS","Luncheon meat, NFS" +25230210,"HAM, SLICED, PREPACKAGED OR DELI, LUNCHEON MEAT","Ham, sliced, prepackaged or deli, luncheon meat" +25230220,"HAM, SLICED, LOW SALT, PREPACKAGED/DELI, LUNCH MEAT","Ham, sliced, low salt, prepackaged or deli, luncheon meat" +25230230,"HAM, SLICED, EXTRA LEAN, PREPACKAGED/DELI","Ham, sliced, extra lean, prepackaged or deli, luncheon meat" +25230235,"HAM, SLICED, EXTRA LEAN, LOWER SODIUM, PREPACKAGED OR DELI","Ham, sliced, extra lean, lower sodium, prepackaged or deli, luncheon meat" +25230310,"CHICKEN/TURKEY LOAF, PREPACK/DELI, LUNCHEON MEAT","Chicken or turkey loaf, prepackaged or deli, luncheon meat" +25230410,"HAM LOAF, LUNCHEON MEAT","Ham loaf, luncheon meat" +25230430,"HAM & CHEESE LOAF","Ham and cheese loaf" +25230450,"HONEY LOAF","Honey loaf" +25230510,"HAM,LUNCH MEAT,CHOP,MINCED,PRESSD,MINCED,NOT CANNED","Ham, luncheon meat, chopped, minced, pressed, spiced, not canned" +25230520,"HAM, LUNCHEON MEAT, CHOPPED, SPICED,LOWFAT, NOT CAN","Ham, luncheon meat, chopped, minced, pressed, spiced, lowfat, not canned" +25230530,"HAM/PORK , LUNCHEON MEAT, CHOPPED, CAN (INCL SPAM)","Ham and pork, luncheon meat, chopped, minced, pressed, spiced, canned" +25230540,"HAM, PORK & CHICKEN, LUNCHEON MEAT, CHOPPED, CANNED","Ham, pork and chicken, luncheon meat, chopped, minced, pressed, spiced, canned" +25230550,"HAM, PORK & CHICKEN, LUNCHEON MEAT, CHOPPED, CAN, RED SODIUM","Ham, pork, and chicken, luncheon meat, chopped, minced, pressed, spiced, canned, reduced sodium" +25230560,"LIVERWURST","Liverwurst" +25230610,"LUNCHEON LOAF (OLIVE, PICKLE OR PIMIENTO)","Luncheon loaf (olive, pickle, or pimiento)" +25230710,"SANDWICH LOAF, LUNCHEON MEAT","Sandwich loaf, luncheon meat" +25230790,"TURKEY HAM, SLICED, XTRA LEAN, PKG'D, DELI","Turkey ham, sliced, extra lean, prepackaged or deli, luncheon meat" +25230800,"TURKEY HAM","Turkey ham" +25230810,"VEAL LOAF","Veal loaf" +25230820,"TURKEY PASTRAMI","Turkey pastrami" +25230840,"TURKEY SALAMI","Turkey salami" +25230900,"TURKEY OR CHICKEN BREAST, PKG'D/DELI, LUNCHEON MEAT","Turkey or chicken breast, prepackaged or deli, luncheon meat" +25230905,"TURKEY/CHICKEN BREAST, LOW SALT, PREPACK/DELI, LUNCHEON MEAT","Turkey or chicken breast, low salt, prepackaged or deli, luncheon meat" +25231110,"BEEF, SLICED, PREPACKAGED/DELI, LUNCHEON MEAT","Beef, sliced, prepackaged or deli, luncheon meat" +25231150,"CORNED BEEF, PRESSED","Corned beef, pressed" +25240000,"MEAT SPREAD OR POTTED MEAT, NFS","Meat spread or potted meat, NFS" +25240110,"CHICKEN SALAD SPREAD","Chicken salad spread" +25240210,"HAM, DEVILED OR POTTED","Ham, deviled or potted" +25240220,"HAM SALAD SPREAD","Ham salad spread" +25240310,"ROAST BEEF SPREAD","Roast beef spread" +25240320,"CORNED BEEF SPREAD","Corned beef spread" +26100100,"FISH, NS AS TO TYPE, RAW","Fish, NS as to type, raw" +26100110,"FISH, COOKED, NS AS TO TYPE & COOKING METHOD","Fish, NS as to type, cooked, NS as to cooking method" +26100120,"FISH, NS AS TO TYPE, BAKED OR BROILED, MADE WITH OIL","Fish, NS as to type, baked or broiled, made with oil" +26100121,"FISH, NS AS TO TYPE, BAKED OR BROILED, MADE WITH BUTTER","Fish, NS as to type, baked or broiled, made with butter" +26100122,"FISH, NS AS TO TYPE, BAKED OR BROILED, MADE WITH MARGARINE","Fish, NS as to type, baked or broiled, made with margarine" +26100123,"FISH, NS AS TO TYPE, BAKED OR BROILED, MADE WITHOUT FAT","Fish, NS as to type, baked or broiled, made without fat" +26100124,"FISH, NS AS TO TYPE, BAKED OR BROILED, MADE W/COOKING SPRAY","Fish, NS as to type, baked or broiled, made with cooking spray" +26100130,"FISH, NS AS TO TYPE, COATED, BAKED, MADE WITH OIL","Fish, NS as to type, coated, baked or broiled, made with oil" +26100131,"FISH, NS AS TO TYPE, COATED, BAKED OR BROILED,W/ BUTTER","Fish, NS as to type, coated, baked or broiled, made with butter" +26100132,"FISH, NS AS TO TYPE, COATED, BAKED OR BROILED, W/MARGARINE","Fish, NS as to type, coated, baked or broiled, made with margarine" +26100133,"FISH, NS AS TO TYPE, COATED, BAKED OR BROILED, W/OUT FAT","Fish, NS as to type, coated, baked or broiled, made without fat" +26100134,"FISH, NS AS TO TYPE, COATED, BAKED/ BROILED, W/COOKING SPRAY","Fish, NS as to type, coated, baked or broiled, made with cooking spray" +26100140,"FISH, NS AS TO TYPE, COATED, FRIED, MADE WITH OIL","Fish, NS as to type, coated, fried, made with oil" +26100141,"FISH, NS AS TO TYPE, COATED, FRIED, MADE WITH BUTTER","Fish, NS as to type, coated, fried, made with butter" +26100142,"FISH, NS AS TO TYPE, COATED, FRIED, MADE WITH MARGARINE","Fish, NS as to type, coated, fried, made with margarine" +26100143,"FISH, NS AS TO TYPE, COATED, FRIED, MADE WITHOUT FAT","Fish, NS as to type, coated, fried, made without fat" +26100144,"FISH, NS AS TO TYPE, COATED, MADE WITH COOKING SPRAY","Fish, NS as to type, coated, fried, made with cooking spray" +26100160,"FISH, NS AS TO TYPE, STEAMED","Fish, NS as to type, steamed" +26100170,"FISH, NS AS TO TYPE, DRIED","Fish, NS as to type, dried" +26100180,"FISH, NS AS TO TYPE, CANNED","Fish, NS as to type, canned" +26100190,"FISH, NS AS TO TYPE, SMOKED","Fish, NS as to type, smoked" +26100200,"FISH, NS AS TO TYPE, FROM FAST FOOD","Fish, NS as to type, from fast food" +26100260,"FISH STICK, PATTY OR NUGGET FROM FAST FOOD","Fish stick, patty or nugget from fast food" +26100270,"FISH STICK, PATTY OR NUGGET FROM RESTAURANT, HOME, OR OTHER","Fish stick, patty or nugget from restaurant, home, or other place" +26101110,"ANCHOVY, COOKED, NS AS TO COOKING METHOD","Anchovy, cooked, NS as to cooking method" +26101180,"ANCHOVY, CANNED","Anchovy, canned" +26103110,"BARRACUDA, COOKED, NS AS TO COOKING METHOD","Barracuda, cooked, NS as to cooking method" +26103120,"BARRACUDA, BAKED OR BROILED, FAT ADDED IN COOKING","Barracuda, baked or broiled, fat added in cooking" +26103121,"BARRACUDA, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Barracuda, baked or broiled, fat not added in cooking" +26103130,"BARRACUDA, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Barracuda, coated, baked or broiled, fat added in cooking" +26103131,"BARRACUDA, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKIN","Barracuda, coated, baked or broiled, fat not added in cooking" +26103140,"BARRACUDA, COATED, FRIED","Barracuda, coated, fried" +26103160,"BARRACUDA, STEAMED OR POACHED","Barracuda, steamed or poached" +26105110,"CARP, COOKED, NS AS TO COOKING METHOD","Carp, cooked, NS as to cooking method" +26105120,"CARP, BAKED OR BROILED, FAT ADDED IN COOKING","Carp, baked or broiled, fat added in cooking" +26105121,"CARP, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Carp, baked or broiled, fat not added in cooking" +26105130,"CARP, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Carp, coated, baked or broiled, fat added in cooking" +26105131,"CARP, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Carp, coated, baked or broiled, fat not added in cooking" +26105140,"CARP, COATED, FRIED","Carp, coated, fried" +26105160,"CARP, STEAMED OR POACHED","Carp, steamed or poached" +26105190,"CARP, SMOKED","Carp, smoked" +26107110,"CATFISH, COOKED, NS AS TO COOKING METHOD","Catfish, cooked, NS as to cooking method" +26107120,"CATFISH, BAKED OR BROILED, MADE WITH OIL","Catfish, baked or broiled, made with oil" +26107121,"CATFISH, BAKED OR BROILED, MADE WITH BUTTER","Catfish, baked or broiled, made with butter" +26107122,"CATFISH, BAKED OR BROILED, MADE WITH MARGARINE","Catfish, baked or broiled, made with margarine" +26107123,"CATFISH, BAKED OR BROILED, MADE WITHOUT FAT","Catfish, baked or broiled, made without fat" +26107124,"CATFISH, BAKED OR BROILED, MADE WITH COOKING SPRAY","Catfish, baked or broiled, made with cooking spray" +26107130,"CATFISH, COATED, BAKED OR BROILED, MADE WITH OIL","Catfish, coated, baked or broiled, made with oil" +26107131,"CATFISH, COATED, BAKED OR BROILED, MADE WITH BUTTER","Catfish, coated, baked or broiled, made with butter" +26107132,"CATFISH, COATED, BAKED OR BROILED, MADE WITH MARGARINE","Catfish, coated, baked or broiled, made with margarine" +26107133,"CATFISH, COATED, BAKED OR BROILED, MADE WITHOUT FAT","Catfish, coated, baked or broiled, made without fat" +26107134,"CATFISH, COATED, BAKED OR BROILED, MADE WITH COOKING SPRAY","Catfish, coated, baked or broiled, made with cooking spray" +26107140,"CATFISH, COATED, FRIED, MADE WITH OIL","Catfish, coated, fried, made with oil" +26107141,"CATFISH, COATED, FRIED, MADE WITH BUTTER","Catfish, coated, fried, made with butter" +26107142,"CATFISH, COATED, FRIED, MADE WITH MARGARINE","Catfish, coated, fried, made with margarine" +26107143,"CATFISH, COATED, FRIED, MADE WITHOUT FAT","Catfish, coated, fried, made without fat" +26107144,"CATFISH, COATED, FRIED, MADE WITH COOKING SPRAY","Catfish, coated, fried, made with cooking spray" +26107160,"CATFISH, STEAMED OR POACHED","Catfish, steamed or poached" +26109110,"COD, COOKED, NS AS TO COOKING METHOD","Cod, cooked, NS as to cooking method" +26109120,"COD, BAKED OR BROILED, MADE WITH OIL","Cod, baked or broiled, made with oil" +26109121,"COD, BAKED OR BROILED, MADE WITH BUTTER","Cod, baked or broiled, made with butter" +26109122,"COD, BAKED OR BROILED, MADE WITH MARGARINE","Cod, baked or broiled, made with margarine" +26109123,"COD, BAKED OR BROILED, MADE WITHOUT FAT","Cod, baked or broiled, made without fat" +26109124,"COD, BAKED OR BROILED, MADE WITH COOKING SPRAY","Cod, baked or broiled, made with cooking spray" +26109130,"COD, COATED, BAKED OR BROILED, MADE WITH OIL","Cod, coated, baked or broiled, made with oil" +26109131,"COD, COATED, BAKED OR BROILED, MADE WITH BUTTER","Cod, coated, baked or broiled, made with butter" +26109132,"COD, COATED, BAKED OR BROILED, MADE WITH MARGARINE","Cod, coated, baked or broiled, made with margarine" +26109133,"COD, COATED, BAKED OR BROILED, MADE WITHOUT FAT","Cod, coated, baked or broiled, made without fat" +26109134,"COD, COATED, BAKED OR BROILED, MADE WITH COOKING SPRAY","Cod, coated, baked or broiled, made with cooking spray" +26109140,"COD, COATED, FRIED, MADE WITH OIL","Cod, coated, fried, made with oil" +26109141,"COD, COATED, FRIED, MADE WITH BUTTER","Cod, coated, fried, made with butter" +26109142,"COD, COATED, FRIED, MADE WITH MARGARINE","Cod, coated, fried, made with margarine" +26109143,"COD, COATED, FRIED, MADE WITHOUT FAT","Cod, coated, fried, made without fat" +26109144,"COD, COATED, FRIED, MADE WITH COOKING SPRAY","Cod, coated, fried, made with cooking spray" +26109160,"COD, STEAMED OR POACHED","Cod, steamed or poached" +26109170,"COD, DRIED, SALTED","Cod, dried, salted" +26109180,"COD, DRIED, SALTED, SALT REMOVED IN WATER","Cod, dried, salted, salt removed in water" +26109190,"COD, SMOKED","Cod, smoked" +26111110,"CROAKER, COOKED, NS AS TO COOKING METHOD","Croaker, cooked, NS as to cooking method" +26111120,"CROAKER, BAKED OR BROILED, FAT ADDED IN COOKING","Croaker, baked or broiled, fat added in cooking" +26111121,"CROAKER, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Croaker, baked or broiled, fat not added in cooking" +26111130,"CROAKER, COATED, BAKED, FAT ADDED IN COOKING","Croaker, coated, baked or broiled, fat added in cooking" +26111131,"CROAKER, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Croaker, coated, baked or broiled, fat not added in cooking" +26111140,"CROAKER, COATED, FRIED","Croaker, coated, fried" +26111160,"CROAKER, STEAMED OR POACHED","Croaker, steamed or poached" +26113110,"EEL, COOKED, NS AS TO COOKING METHOD","Eel, cooked, NS as to cooking method" +26113160,"EEL, STEAMED OR POACHED","Eel, steamed or poached" +26113190,"EEL, SMOKED","Eel, smoked" +26115000,"FLOUNDER, RAW","Flounder, raw" +26115110,"FLOUNDER, COOKED, NS AS TO COOKING METHOD","Flounder, cooked, NS as to cooking method" +26115120,"FLOUNDER, BAKED OR BROILED, MADE WTIH OIL","Flounder, baked or broiled, made with oil" +26115121,"FLOUNDER, BAKED OR BROILED, MADE WITH BUTTER","Flounder, baked or broiled, made with butter" +26115122,"FLOUNDER, BAKED OR BROILED, MADE WITH MARGARINE","Flounder, baked or broiled, made with margarine" +26115123,"FLOUNDER, BAKED OR BROILED, MADE WITHOUT FAT","Flounder, baked or broiled, made without fat" +26115124,"FLOUNDER, BAKED OR BROILED, MADE WITH COOKING SPRAY","Flounder, baked or broiled, made with cooking spray" +26115130,"FLOUNDER, COATED, BAKED OR BROILED, MADE WITH OIL","Flounder, coated, baked or broiled, made with oil" +26115131,"FLOUNDER, COATED, BAKED OR BROILED, MADE WITH BUTTER","Flounder, coated, baked or broiled, made with butter" +26115132,"FLOUNDER, COATED, BAKED OR BROILED, MADE WITH MARGARINE","Flounder, coated, baked or broiled, made with margarine" +26115133,"FLOUNDER, COATED, BAKED OR BROILED, MADE WITHOUT FAT","Flounder, coated, baked or broiled, made without fat" +26115134,"FLOUNDER, COATED, BAKED OR BROILED, MADE WITH COOKING SPRAY","Flounder, coated, baked or broiled, made with cooking spray" +26115140,"FLOUNDER, COATED, FRIED, MADE WITH OIL","Flounder, coated, fried, made with oil" +26115141,"FLOUNDER, COATED, FRIED, MADE WITH BUTTER","Flounder, coated, fried, made with butter" +26115142,"FLOUNDER, COATED, FRIED, MADE WITH MARGARINE","Flounder, coated, fried, made with margarine" +26115143,"FLOUNDER, COATED, FRIED, MADE WITHOUT FAT","Flounder, coated, fried, made without fat" +26115144,"FLOUNDER, COATED, FRIED, MADE WITH COOKING SPRAY","Flounder, coated, fried, made with cooking spray" +26115160,"FLOUNDER, STEAMED OR POACHED","Flounder, steamed or poached" +26115190,"FLOUNDER, SMOKED","Flounder, smoked" +26117110,"HADDOCK, COOKED, NS AS TO COOKING METHOD","Haddock, cooked, NS as to cooking method" +26117120,"HADDOCK, BAKED OR BROILED, FAT ADDED IN COOKING","Haddock, baked or broiled, fat added in cooking" +26117121,"HADDOCK, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Haddock, baked or broiled, fat not added in cooking" +26117130,"HADDOCK, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Haddock, coated, baked or broiled, fat added in cooking" +26117131,"HADDOCK, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Haddock, coated, baked or broiled, fat not added in cooking" +26117140,"HADDOCK, COATED, FRIED","Haddock, coated, fried" +26117160,"HADDOCK, STEAMED OR POACHED","Haddock, steamed or poached" +26117190,"HADDOCK, SMOKED","Haddock, smoked" +26118000,"HALIBUT, RAW","Halibut, raw" +26118010,"HALIBUT, COOKED, NS AS TO COOKING METHOD","Halibut, cooked, NS as to cooking method" +26118020,"HALIBUT, BAKED OR BROILED, MADE WITH OIL","Halibut, baked or broiled, made with oil" +26118021,"HALIBUT, BAKED OR BROILED, MADE WITH BUTTER","Halibut, baked or broiled, made with butter" +26118022,"HALIBUT, BAKED OR BROILED, MADE WITH MARGARINE","Halibut, baked or broiled, made with margarine" +26118023,"HALIBUT, BAKED OR BROILED, MADE WITHOUT FAT","Halibut, baked or broiled, made without fat" +26118024,"HALIBUT, BAKED OR BROILED, MADE WITH COOKING SPRAY","Halibut, baked or broiled, made with cooking spray" +26118030,"HALIBUT, COATED, BAKED OR BROILED, MADE WITH OIL","Halibut, coated, baked or broiled, made with oil" +26118031,"HALIBUT, COATED, BAKED OR BROILED, MADE WITH BUTTER","Halibut, coated, baked or broiled, made with butter" +26118032,"HALIBUT, COATED, BAKED OR BROILED, MADE WITH MARGARINE","Halibut, coated, baked or broiled, made with margarine" +26118033,"HALIBUT, COATED, BAKED OR BROILED, MADE WITHOUT FAT","Halibut, coated, baked or broiled, made without fat" +26118034,"HALIBUT, COATED, BAKED OR BROILED, MADE WITH COOKING SPRAY","Halibut, coated, baked or broiled, made with cooking spray" +26118040,"HALIBUT, COATED, FRIED, MADE WITH OIL","Halibut, coated, fried, made with oil" +26118041,"HALIBUT, COATED, FRIED, MADE WITH BUTTER","Halibut, coated, fried, made with butter" +26118042,"HALIBUT, COATED, FRIED, MADE WITH MARGARINE","Halibut, coated, fried, made with margarine" +26118043,"HALIBUT, COATED, FRIED, MADE WITHOUT FAT","Halibut, coated, fried, made without fat" +26118044,"HALIBUT, COATED, FRIED, MADE WITH COOKING SPRAY","Halibut, coated, fried, made with cooking spray" +26118050,"HALIBUT, STEAMED OR POACHED","Halibut, steamed or poached" +26118060,"HALIBUT, SMOKED","Halibut, smoked" +26119100,"HERRING, RAW","Herring, raw" +26119110,"HERRING, COOKED, NS AS TO COOKING METHOD","Herring, cooked, NS as to cooking method" +26119120,"HERRING, BAKED OR BROILED, FAT ADDED IN COOKING","Herring, baked or broiled, fat added in cooking" +26119121,"HERRING, BAKED OR BROILED, FAT NOT USED IN PREPARATION","Herring, baked or broiled, fat not added in cooking" +26119130,"HERRING, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Herring, coated, baked or broiled, fat added in cooking" +26119131,"HERRING, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Herring, coated, baked or broiled, fat not added in cooking" +26119140,"HERRING, COATED, FRIED","Herring, coated, fried" +26119160,"HERRING, PICKLED, IN CREAM SAUCE","Herring, pickled, in cream sauce" +26119170,"HERRING, DRIED, SALTED","Herring, dried, salted" +26119180,"HERRING, PICKLED","Herring, pickled" +26119190,"HERRING, SMOKED, KIPPERED","Herring, smoked, kippered" +26121100,"MACKEREL, RAW","Mackerel, raw" +26121110,"MACKEREL, COOKED, NS AS TO COOKING METHOD","Mackerel, cooked, NS as to cooking method" +26121120,"MACKEREL, BAKED OR BROILED, FAT ADDED IN COOKING","Mackerel, baked or broiled, fat added in cooking" +26121121,"MACKEREL, BAKED OR BROILED, FAT NOT USED IN PREPARATION","Mackerel, baked or broiled, fat not added in cooking" +26121131,"MACKEREL, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Mackerel, coated, baked or broiled, fat added in cooking" +26121132,"MACKEREL, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Mackerel, coated, baked or broiled, fat not added in cooking" +26121140,"MACKEREL, COATED, FRIED","Mackerel, coated, fried" +26121160,"MACKEREL, PICKLED","Mackerel, pickled" +26121180,"MACKEREL, CANNED","Mackerel, canned" +26121190,"MACKEREL, SMOKED","Mackerel, smoked" +26123100,"MULLET, RAW","Mullet, raw" +26123110,"MULLET, COOKED, NS AS TO COOKING METHOD","Mullet, cooked, NS as to cooking method" +26123120,"MULLET, BAKED OR BROILED, FAT USED IN PREPARATION","Mullet, baked or broiled, fat added in cooking" +26123121,"MULLET, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Mullet, baked or broiled, fat not added in cooking" +26123130,"MULLET, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Mullet, coated, baked or broiled, fat added in cooking" +26123131,"MULLET, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Mullet, coated, baked or broiled, fat not added in cooking" +26123140,"MULLET, COATED, FRIED","Mullet, coated, fried" +26123160,"MULLET, STEAMED OR POACHED","Mullet, steamed or poached" +26125100,"OCEAN PERCH, RAW","Ocean perch, raw" +26125110,"OCEAN PERCH, COOKED, NS AS TO COOKING METHOD","Ocean perch, cooked, NS as to cooking method" +26125120,"OCEAN PERCH, BAKED OR BROILED, FAT USED IN PREPARATION","Ocean perch, baked or broiled, fat added in cooking" +26125121,"OCEAN PERCH, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Ocean perch, baked or broiled, fat not added in cooking" +26125130,"OCEAN PERCH, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Ocean perch, coated, baked or broiled, fat added in cooking" +26125131,"OCEAN PERCH, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOK","Ocean perch, coated, baked or broiled, fat not added in cooking" +26125140,"OCEAN PERCH, COATED, FRIED","Ocean perch, coated, fried" +26125160,"OCEAN PERCH, STEAMED OR POACHED","Ocean perch, steamed or poached" +26127110,"PERCH, COOKED, NS AS TO COOKING METHOD","Perch, cooked, NS as to cooking method" +26127120,"PERCH, BAKED OR BROILED, MADE WITH OIL","Perch, baked or broiled, made with oil" +26127121,"PERCH, BAKED OR BROILED, MADE WITH BUTTER","Perch, baked or broiled, made with butter" +26127122,"PERCH, BAKED OR BROILED, MADE WITH MARGARINE","Perch, baked or broiled, made with margarine" +26127123,"PERCH, BAKED OR BROILED, MADE WITHOUT FAT","Perch, baked or broiled, made without fat" +26127124,"PERCH, BAKED OR BROILED, MADE WITH COOKING SPRAY","Perch, baked or broiled, made with cooking spray" +26127130,"PERCH, COATED, BAKED OR BROILED, MADE WITH OIL","Perch, coated, baked or broiled, made with oil" +26127131,"PERCH, COATED, BAKED OR BROILED, MADE WITH BUTTER","Perch, coated, baked or broiled, made with butter" +26127132,"PERCH, COATED, BAKED OR BROILED, MADE WITH MARGARINE","Perch, coated, baked or broiled, made with margarine" +26127133,"PERCH, COATED, BAKED OR BROILED, MADE WITHOUT FAT","Perch, coated, baked or broiled, made without fat" +26127134,"PERCH, COATED, BAKED OR BROILED, MADE WITH COOKING SPRAY","Perch, coated, baked or broiled, made with cooking spray" +26127140,"PERCH, COATED, FRIED","Perch, coated, fried, made with oil" +26127141,"PERCH, COATED, FRIED, MADE WITH BUTTER","Perch, coated, fried, made with butter" +26127142,"PERCH, COATED, FRIED, MADE WITH MARGARINE","Perch, coated, fried, made with margarine" +26127143,"PERCH, COATED, FRIED, MADE WITHOUT FAT","Perch, coated, fried, made without fat" +26127144,"PERCH, COATED, FRIED, MADE WITH COOKING SPRAY","Perch, coated, fried, made with cooking spray" +26127160,"PERCH, STEAMED OR POACHED","Perch, steamed or poached" +26129110,"PIKE, COOKED, NS AS TO COOKING METHOD","Pike, cooked, NS as to cooking method" +26129120,"PIKE, BAKED OR BROILED, FAT ADDED IN COOKING","Pike, baked or broiled, fat added in cooking" +26129121,"PIKE, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Pike, baked or broiled, fat not added in cooking" +26129130,"PIKE, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Pike, coated, baked or broiled, fat added in cooking" +26129131,"PIKE, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Pike, coated, baked or broiled, fat not added in cooking" +26129140,"PIKE, COATED, FRIED","Pike, coated, fried" +26129160,"PIKE, STEAMED OR POACHED","Pike, steamed or poached" +26131100,"POMPANO, RAW","Pompano, raw" +26131110,"POMPANO, COOKED, NS AS TO COOKING METHOD","Pompano, cooked, NS as to cooking method" +26131120,"POMPANO, BAKED OR BROILED, FAT ADDED IN COOKING","Pompano, baked or broiled, fat added in cooking" +26131121,"POMPANO, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Pompano, baked or broiled, fat not added in cooking" +26131130,"POMPANO, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Pompano, coated, baked or broiled, fat added in cooking" +26131131,"POMPANO, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Pompano, coated, baked or broiled, fat not added in cooking" +26131140,"POMPANO, COATED, FRIED","Pompano, coated, fried" +26131160,"POMPANO, STEAMED OR POACHED","Pompano, steamed or poached" +26131190,"POMPANO, SMOKED","Pompano, smoked" +26133100,"PORGY, RAW","Porgy, raw" +26133110,"PORGY, COOKED, NS AS TO COOKING METHOD","Porgy, cooked, NS as to cooking method" +26133120,"PORGY, BAKED OR BROILED, FAT ADDED IN COOKING","Porgy, baked or broiled, fat added in cooking" +26133121,"PORGY, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Porgy, baked or broiled, fat not added in cooking" +26133130,"PORGY, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Porgy, coated, baked or broiled, fat added in cooking" +26133131,"PORGY, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Porgy, coated, baked or broiled, fat not added in cooking" +26133140,"PORGY, COATED, FRIED","Porgy, coated, fried" +26133160,"PORGY, STEAMED OR POACHED","Porgy, steamed or poached" +26135110,"RAY, COOKED, NS AS TO COOKING METHOD","Ray, cooked, NS as to cooking method" +26135120,"RAY, BAKED OR BROILED, FAT ADDED IN COOKING","Ray, baked or broiled, fat added in cooking" +26135121,"RAY, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Ray, baked or broiled, fat not added in cooking" +26135130,"RAY, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Ray, coated, baked or broiled, fat added in cooking" +26135131,"RAY, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Ray, coated, baked or broiled, fat not added in cooking" +26135140,"RAY, COATED, FRIED","Ray, coated, fried" +26135160,"RAY, STEAMED OR POACHED","Ray, steamed or poached" +26137100,"SALMON, RAW","Salmon, raw" +26137110,"SALMON, COOKED, NS AS TO COOKING METHOD","Salmon, cooked, NS as to cooking method" +26137120,"SALMON, BAKED OR BROILED, MADE WITH OIL","Salmon, baked or broiled, made with oil" +26137121,"SALMON, BAKED OR BROILED, MADE WITH BUTTER","Salmon, baked or broiled, made with butter" +26137122,"SALMON, BAKED OR BROILED, MADE WITH MARGARINE","Salmon, baked or broiled, made with margarine" +26137123,"SALMON, BAKED OR BROILED, MADE WITHOUT FAT","Salmon, baked or broiled, made without fat" +26137124,"SALMON, BAKED OR BROILED, MADE WITH COOKING SPRAY","Salmon, baked or broiled, made with cooking spray" +26137130,"SALMON, COATED, BAKED OR BROILED, MADE WITH OIL","Salmon, coated, baked or broiled, made with oil" +26137131,"SALMON, COATED, BAKED OR BROILED, MADE WITH BUTTER","Salmon, coated, baked or broiled, made with butter" +26137132,"SALMON, COATED, BAKED OR BROILED, MADE WITH MARGARINE","Salmon, coated, baked or broiled, made with margarine" +26137133,"SALMON, COATED, BAKED OR BROILED, MADE WITHOUT FAT","Salmon, coated, baked or broiled, made without fat" +26137134,"SALMON, COATED, BAKED OR BROILED, MADE WITH COOKING SPRAY","Salmon, coated, baked or broiled, made with cooking spray" +26137140,"SALMON, COATED, FRIED, MADE WITH OIL","Salmon, coated, fried, made with oil" +26137141,"SALMON, COATED, FRIED, MADE WITH BUTTER","Salmon, coated, fried, made with butter" +26137142,"SALMON, COATED, FRIED, MADE WITH MARGARINE","Salmon, coated, fried, made with margarine" +26137143,"SALMON, COATED, FRIED, MADE WITHOUT FAT","Salmon, coated, fried, made without fat" +26137144,"SALMON, COATED, FRIED, MADE WITH COOKING SPRAY","Salmon, coated, fried, made with cooking spray" +26137160,"SALMON, STEAMED OR POACHED","Salmon, steamed or poached" +26137170,"SALMON, DRIED","Salmon, dried" +26137180,"SALMON, CANNED","Salmon, canned" +26137190,"SALMON, SMOKED (INCLUDE LOX)","Salmon, smoked" +26139110,"SARDINES, COOKED","Sardines, cooked" +26139170,"SARDINE, DRIED","Sardines, dried" +26139180,"SARDINES, CANNED IN OIL","Sardines, canned in oil" +26139190,"SARDINES, SKINLESS, BONELESS, PACKED IN WATER","Sardines, skinless, boneless, packed in water" +26141110,"SEA BASS, COOKED, NS AS TO COOKING METHOD","Sea bass, cooked, NS as to cooking method" +26141120,"SEA BASS, BAKED OR BROILED, FAT ADDED IN COOKING","Sea bass, baked or broiled, fat added in cooking" +26141121,"SEA BASS, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Sea bass, baked or broiled, fat not added in cooking" +26141130,"SEA BASS, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Sea bass, coated, baked or broiled, fat added in cooking" +26141131,"SEA BASS, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Sea bass, coated, baked or broiled, fat not added in cooking" +26141140,"SEA BASS, COATED, FRIED","Sea bass, coated, fried" +26141160,"SEA BASS, STEAMED OR POACHED","Sea bass, steamed or poached" +26141180,"SEA BASS, PICKLED (MERO EN ESCABECHE)","Sea bass, pickled (Mero en escabeche)" +26143110,"SHARK, COOKED, NS AS TO COOKING METHOD","Shark, cooked, NS as to cooking method" +26143120,"SHARK, BAKED OR BROILED, FAT ADDED IN COOKING","Shark, baked or broiled, fat added in cooking" +26143121,"SHARK, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Shark, baked or broiled, fat not added in cooking" +26143130,"SHARK, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Shark, coated, baked or broiled, fat added in cooking" +26143131,"SHARK, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Shark, coated, baked or broiled, fat not added in cooking" +26143140,"SHARK, COATED, FRIED","Shark, coated, fried" +26143160,"SHARK, STEAMED OR POACHED","Shark, steamed or poached" +26147110,"STURGEON, COOKED, NS AS TO COOKING METHOD","Sturgeon, cooked, NS as to cooking method" +26147120,"STURGEON, BAKED OR BROILED, FAT ADDED IN COOKING","Sturgeon, baked or broiled, fat added in cooking" +26147130,"STURGEON, STEAMED","Sturgeon, steamed" +26147140,"STURGEON, COATED, FRIED","Sturgeon, coated, fried" +26147190,"STURGEON, SMOKED","Sturgeon, smoked" +26149110,"SWORDFISH, COOKED, NS AS TO COOKING METHOD","Swordfish, cooked, NS as to cooking method" +26149120,"SWORDFISH, BAKED OR BROILED, FAT ADDED IN COOKING","Swordfish, baked or broiled, fat added in cooking" +26149121,"SWORDFISH, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Swordfish, baked or broiled, fat not added in cooking" +26149130,"SWORDFISH, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Swordfish, coated, baked or broiled, fat added in cooking" +26149131,"SWORDFISH, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKIN","Swordfish, coated, baked or broiled, fat not added in cooking" +26149140,"SWORDFISH, COATED, FRIED","Swordfish, coated, fried" +26149160,"SWORDFISH, STEAMED OR POACHED","Swordfish, steamed or poached" +26151110,"TROUT, COOKED, NS AS TO COOKING METHOD","Trout, cooked, NS as to cooking method" +26151120,"TROUT, BAKED OR BROILED, MADE WITH OIL","Trout, baked or broiled, made with oil" +26151121,"TROUT, BAKED OR BROILED, MADE WITH BUTTER","Trout, baked or broiled, made with butter" +26151122,"TROUT, BAKED OR BROILED, MADE WITH MARGARINE","Trout, baked or broiled, made with margarine" +26151123,"TROUT, BAKED OR BROILED, MADE WITHOUT FAT","Trout, baked or broiled, made without fat" +26151124,"TROUT, BAKED OR BROILED, MADE WITH COOKING SPRAY","Trout, baked or broiled, made with cooking spray" +26151130,"TROUT, COATED, BAKED OR BROILED, MADE WITH OIL","Trout, coated, baked or broiled, made with oil" +26151131,"TROUT, COATED, BAKED OR BROILED, MADE WITH BUTTER","Trout, coated, baked or broiled, made with butter" +26151132,"TROUT, COATED, BAKED OR BROILED, MADE WITH MARGARINE","Trout, coated, baked or broiled, made with margarine" +26151133,"TROUT, COATED, BAKED OR BROILED, MADE WITHOUT FAT","Trout, coated, baked or broiled, made without fat" +26151134,"TROUT, COATED, BAKED OR BROILED, MADE WITH COOKING SPRAY","Trout, coated, baked or broiled, made with cooking spray" +26151140,"TROUT, COATED, FRIED, MADE WITH OIL","Trout, coated, fried, made with oil" +26151141,"TROUT, COATED, FRIED, MADE WITH BUTTER","Trout, coated, fried, made with butter" +26151142,"TROUT, COATED, FRIED, MADE WITH MARGARINE","Trout, coated, fried, made with margarine" +26151143,"TROUT, COATED, FRIED, MADE WITHOUT FAT","Trout, coated, fried, made without fat" +26151144,"TROUT, COATED, FRIED, MADE WITH COOKING SPRAY","Trout, coated, fried, made with cooking spray" +26151160,"TROUT, STEAMED OR POACHED","Trout, steamed or poached" +26151190,"TROUT, SMOKED","Trout, smoked" +26153100,"TUNA, FRESH, RAW","Tuna, fresh, raw" +26153110,"TUNA, FRESH, COOKED, NS AS TO COOKING METHOD","Tuna, fresh, cooked, NS as to cooking method" +26153120,"TUNA, FRESH, BAKED OR BROILED, FAT ADDED IN COOKING","Tuna, fresh, baked or broiled, fat added in cooking" +26153122,"TUNA, FRESH, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Tuna, fresh, baked or broiled, fat not added in cooking" +26153130,"TUNA, FRESH, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Tuna, fresh, coated, baked or broiled, fat added in cooking" +26153131,"TUNA, FRESH, COATED, BAKED OR BROILED, FAT NOT ADDED","Tuna, fresh, coated, baked or broiled, fat not added" +26153140,"TUNA, FRESH, COATED, FRIED","Tuna, fresh, coated, fried" +26153160,"TUNA, FRESH, STEAMED OR POACHED","Tuna, fresh, steamed or poached" +26153170,"TUNA, FRESH, DRIED","Tuna, fresh, dried" +26153190,"TUNA, FRESH, SMOKED","Tuna, fresh, smoked" +26155110,"TUNA, CANNED, NS AS TO OIL OR WATER PACK","Tuna, canned, NS as to oil or water pack" +26155180,"TUNA, CANNED, OIL PACK","Tuna, canned, oil pack" +26155190,"TUNA, CANNED, WATER PACK","Tuna, canned, water pack" +26157110,"WHITING, COOKED, NS AS TO COOKING METHOD","Whiting, cooked, NS as to cooking method" +26157120,"WHITING, BAKED OR BROILED, MADE WITH OIL","Whiting, baked or broiled, made with oil" +26157121,"WHITING, BAKED OR BROILED, MADE WITH BUTTER","Whiting, baked or broiled, made with butter" +26157122,"WHITING, BAKED OR BROILED, MADE WITH MARGARINE","Whiting, baked or broiled, made with margarine" +26157123,"WHITING, BAKED OR BROILED, MADE WITHOUT FAT","Whiting, baked or broiled, made without fat" +26157124,"WHITING, BAKED OR BROILED, MADE WITH COOKING SPRAY","Whiting, baked or broiled, made with cooking spray" +26157130,"WHITING, COATED, BAKED OR BROILED, MADE WITH OIL","Whiting, coated, baked or broiled, made with oil" +26157131,"WHITING, COATED, BAKED OR BROILED, MADE WITH BUTTER","Whiting, coated, baked or broiled, made with butter" +26157132,"WHITING, COATED, BAKED OR BROILED, MADE WITH MARGARINE","Whiting, coated, baked or broiled, made with margarine" +26157133,"WHITING, COATED, BAKED OR BROILED, MADE WITHOUT FAT","Whiting, coated, baked or broiled, made without fat" +26157134,"WHITING, COATED, BAKED OR BROILED, MADE WITH COOKING SPRAY","Whiting, coated, baked or broiled, made with cooking spray" +26157140,"WHITING, COATED, FRIED, MADE WITH OIL","Whiting, coated, fried, made with oil" +26157141,"WHITING, COATED, FRIED, MADE WITH BUTTER","Whiting, coated, fried, made with butter" +26157142,"WHITING, COATED, FRIED, MADE WITH MARGARINE","Whiting, coated, fried, made with margarine" +26157143,"WHITING, COATED, FRIED, MADE WITHOUT FAT","Whiting, coated, fried, made without fat" +26157144,"WHITING, COATED, FRIED, MADE WITH COOKING SPRAY","Whiting, coated, fried, made with cooking spray" +26157160,"WHITING, STEAMED OR POACHED","Whiting, steamed or poached" +26158000,"TILAPIA, COOKED, NS AS TO COOKING METHOD","Tilapia, cooked, NS as to cooking method" +26158010,"TILAPIA, BAKED OR BROILED, MADE WITH OIL","Tilapia, baked or broiled, made with oil" +26158011,"TILAPIA, BAKED OR BROILED, MADE WITH BUTTER","Tilapia, baked or broiled, made with butter" +26158012,"TILAPIA, BAKED OR BROILED, MADE WITH MARGARINE","Tilapia, baked or broiled, made with margarine" +26158013,"TILAPIA, BAKED OR BROILED, MADE WITHOUT FAT","Tilapia, baked or broiled, made without fat" +26158014,"TILAPIA, BAKED OR BROILED, MADE WITH COOKING SPRAY","Tilapia, baked or broiled, made with cooking spray" +26158020,"TILAPIA, COATED, BAKED OR BROILED, MADE WITH OIL","Tilapia, coated, baked or broiled, made with oil" +26158021,"TILAPIA, COATED, BAKED OR BROILED, MADE WITH BUTTER","Tilapia, coated, baked or broiled, made with butter" +26158022,"TILAPIA, COATED, BAKED OR BROILED, MADE WITH MARGARINE","Tilapia, coated, baked or broiled, made with margarine" +26158023,"TILAPIA, COATED, BAKED OR BROILED, MADE WITHOUT FAT","Tilapia, coated, baked or broiled, made without fat" +26158024,"TILAPIA, COATED, BAKED OR BROILED, MADE WITH COOKING SPRAY","Tilapia, coated, baked or broiled, made with cooking spray" +26158030,"TILAPIA, COATED, FRIED, MADE WITH OIL","Tilapia, coated, fried, made with oil" +26158031,"TILAPIA, COATED, FRIED, MADE WITH BUTTER","Tilapia, coated, fried, made with butter" +26158032,"TILAPIA, COATED, FRIED, MADE WITH MARGARINE","Tilapia, coated, fried, made with margarine" +26158033,"TILAPIA, COATED, FRIED, MADE WITHOUT FAT","Tilapia, coated, fried, made without fat" +26158034,"TILAPIA, COATED, FRIED, MADE WITH COOKING SPRAY","Tilapia, coated, fried, made with cooking spray" +26158050,"TILAPIA, STEAMED OR POACHED","Tilapia, steamed or poached" +26203110,"FROG LEGS, NS AS TO COOKING METHOD","Frog legs, NS as to cooking method" +26203160,"FROG LEGS, STEAMED","Frog legs, steamed" +26205110,"OCTOPUS, COOKED, NS AS TO COOKING METHOD","Octopus, cooked, NS as to cooking method" +26205160,"OCTOPUS, STEAMED","Octopus, steamed" +26205170,"OCTOPUS, DRIED","Octopus, dried" +26205180,"OCTOPUS, DRIED, BOILED","Octopus, dried, boiled" +26205190,"OCTOPUS, SMOKED","Octopus, smoked" +26207110,"ROE, SHAD, COOKED (INCL COD ROE)","Roe, shad, cooked" +26209100,"ROE, HERRING","Roe, herring" +26211100,"ROE, STURGEON (INCLUDE CAVIAR)","Roe, sturgeon" +26213100,"SQUID, RAW","Squid, raw" +26213120,"SQUID, BAKED, BROILED, FAT ADDED IN COOKING","Squid, baked or broiled, fat added in cooking" +26213121,"SQUID, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Squid, baked or broiled, fat not added in cooking" +26213130,"SQUID, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Squid, coated, baked or broiled, fat added in cooking" +26213131,"SQUID, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Squid, coated, baked or broiled, fat not added in cooking" +26213140,"SQUID, COATED, FRIED","Squid, coated, fried" +26213160,"SQUID, STEAMED OR BOILED","Squid, steamed or boiled" +26213170,"SQUID, DRIED","Squid, dried" +26213180,"SQUID, PICKLED","Squid, pickled" +26213190,"SQUID, CANNED","Squid, canned" +26215120,"TURTLE, COOKED, NS AS TO METHOD","Turtle (terrapin), cooked, NS as to cooking method" +26301110,"ABALONE, COOKED, NS AS TO COOKING METHOD","Abalone, cooked, NS as to cooking method" +26301140,"ABALONE, FLOURED OR BREADED, FRIED","Abalone, floured or breaded, fried" +26301160,"ABALONE, STEAMED OR POACHED","Abalone, steamed or poached" +26303100,"CLAMS, RAW","Clams, raw" +26303110,"CLAMS, COOKED, NS AS TO COOKING METHOD","Clams, cooked, NS as to cooking method" +26303120,"CLAMS, BAKED OR BROILED, FAT ADDED IN COOKING","Clams, baked or broiled, fat added in cooking" +26303121,"CLAMS, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Clams, baked or broiled, fat not added in cooking" +26303130,"CLAMS, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Clams, coated, baked or broiled, fat added in cooking" +26303131,"CLAMS ,BAKED OR BROILED, FAT NOT ADDED IN COOKING","Clams, coated, baked or broiled, fat not added in cooking" +26303140,"CLAMS,COATED, FRIED","Clams, coated, fried" +26303160,"CLAMS, STEAMED OR BOILED","Clams, steamed or boiled" +26303180,"CLAMS, CANNED","Clams, canned" +26303190,"CLAMS, SMOKED, IN OIL","Clams, smoked, in oil" +26305110,"CRAB, COOKED, NS AS TO COOKING METHOD","Crab, cooked, NS as to cooking method" +26305120,"CRAB, BAKED OR BROILED, FAT ADDED IN COOKING","Crab, baked or broiled, fat added in cooking" +26305121,"CRAB, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Crab, baked or broiled, fat not added in cooking" +26305130,"CRAB, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Crab, coated, baked or broiled, fat added in cooking" +26305131,"CRAB, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Crab, coated, baked or broiled, fat not added in cooking" +26305160,"CRAB, HARD SHELL, STEAMED","Crab, hard shell, steamed" +26305180,"CRAB, CANNED","Crab, canned" +26307140,"CRAB, SOFT SHELL, COATED, FRIED","Crab, soft shell, coated, fried" +26309140,"CRAYFISH, COATED, FRIED","Crayfish, coated, fried" +26309160,"CRAYFISH, BOILED OR STEAMED","Crayfish, boiled or steamed" +26311110,"LOBSTER, COOKED, NS AS TO METHOD","Lobster, cooked, NS as to cooking method" +26311120,"LOBSTER, BAKED OR BROILED, FAT ADDED IN COOKING","Lobster, baked or broiled, fat added in cooking" +26311121,"LOBSTER, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Lobster, baked or broiled, fat not added in cooking" +26311140,"LOBSTER, COATED, FRIED","Lobster, coated, fried" +26311160,"LOBSTER, STEAMED OR BOILED","Lobster, steamed or boiled" +26311170,"LOBSTER, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Lobster, coated, baked or broiled, fat added in cooking" +26311171,"LOBSTER, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Lobster, coated, baked or broiled, fat not added in cooking" +26311180,"LOBSTER, CANNED","Lobster, canned" +26313100,"MUSSELS, RAW","Mussels, raw" +26313110,"MUSSELS, COOKED, NS AS TO COOKING METHOD","Mussels, cooked, NS as to cooking method" +26313160,"MUSSELS, STEAMED","Mussels, steamed or poached" +26315100,"OYSTERS, RAW","Oysters, raw" +26315110,"OYSTERS, COOKED, NS AS TO COOKING METHOD","Oysters, cooked, NS as to cooking method" +26315120,"OYSTERS, BAKED OR BROILED, FAT ADDED IN COOKING","Oysters, baked or broiled, fat added in cooking" +26315121,"OYSTERS, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Oysters, baked or broiled, fat not added in cooking" +26315130,"OYSTERS, STEAMED","Oysters, steamed" +26315140,"OYSTERS, COATED, FRIED","Oysters, coated, fried" +26315160,"OYSTERS, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Oysters, coated, baked or broiled, fat added in cooking" +26315161,"OYSTERS, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Oysters, coated, baked or broiled, fat not added in cooking" +26315180,"OYSTERS, CANNED","Oysters, canned" +26315190,"OYSTERS, SMOKED","Oysters, smoked" +26317110,"SCALLOPS, COOKED, NS AS TO COOKING METHOD","Scallops, cooked, NS as to cooking method" +26317120,"SCALLOPS, BAKED OR BROILED, FAT ADDED IN COOKING","Scallops, baked or broiled, fat added in cooking" +26317121,"SCALLOPS, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Scallops, baked or broiled, fat not added in cooking" +26317130,"SCALLOPS, STEAMED OR BOILED","Scallops, steamed or boiled" +26317140,"SCALLOPS, COATED, FRIED","Scallops, coated, fried" +26317160,"SCALLOPS, COATED, BAKED OR BROILED, FAT ADDED IN COOKING","Scallops, coated, baked or broiled, fat added in cooking" +26317161,"SCALLOPS, COATED, BAKED OR BROILED, FAT NOT ADDED IN COOKING","Scallops, coated, baked or broiled, fat not added in cooking" +26319110,"SHRIMP, COOKED, NS AS TO COOKING METHOD","Shrimp, cooked, NS as to cooking method" +26319120,"SHRIMP, BAKED OR BROILED, MADE WITH OIL","Shrimp, baked or broiled, made with oil" +26319121,"SHRIMP, BAKED OR BROILED, MADE WITH BUTTER","Shrimp, baked or broiled, made with butter" +26319122,"SHRIMP, BAKED OR BROILED, MADE WITH MARGARINE","Shrimp, baked or broiled, made with margarine" +26319123,"SHRIMP, BAKED OR BROILED, MADE WITHOUT FAT","Shrimp, baked or broiled, made without fat" +26319124,"SHRIMP, BAKED OR BROILED, MADE WITH COOKING SPRAY","Shrimp, baked or broiled, made with cooking spray" +26319130,"SHRIMP, STEAMED OR BOILED","Shrimp, steamed or boiled" +26319140,"SHRIMP, COATED, FRIED, MADE WITH OIL","Shrimp, coated, fried, made with oil" +26319141,"SHRIMP, COATED, FRIED, MADE WITH BUTTER","Shrimp, coated, fried, made with butter" +26319142,"SHRIMP, COATED, FRIED, MADE WITH MARGARINE","Shrimp, coated, fried, made with margarine" +26319143,"SHRIMP, COATED, FRIED, MADE WITHOUT FAT","Shrimp, coated, fried, made without fat" +26319144,"SHRIMP, COATED, FRIED, MADE WITH COOKING SPRAY","Shrimp, coated, fried, made with cooking spray" +26319145,"SHRIMP, COATED, FRIED, FROM FAST FOOD / RESTAURANT","Shrimp, coated, fried, from fast food / restaurant" +26319160,"SHIRMP, COATED, BAKED OR BROILED, MADE WITH OIL","Shrimp, coated, baked or broiled, made with oil" +26319161,"SHRIMP, COATED, BAKED OR BROILED, MADE WITH BUTTER","Shrimp, coated, baked or broiled, made with butter" +26319162,"SHRIMP, COATED, BAKED OR BROILED, MADE WITH MARGARINE","Shrimp, coated, baked or broiled, made with margarine" +26319163,"SHRIMP, COATED, BAKED OR BROILED, MADE WITHOUT FAT","Shrimp, coated, baked or broiled, made without fat" +26319164,"SHRIMP, COATED, BAKED OR BROILED, MADE WITH COOKING SPRAY","Shrimp, coated, baked or broiled, made with cooking spray" +26319170,"SHRIMP, DRIED","Shrimp, dried" +26319180,"SHRIMP, CANNED","Shrimp, canned" +26321110,"SNAILS, COOKED, NS AS TO METHOD","Snails, cooked, NS as to cooking method" +27111000,"BEEF W/ TOMATO-BASED SAUCE (MIXTURE)","Beef with tomato-based sauce (mixture)" +27111050,"SPAGHETTI SAUCE W/ BEEF/MEAT, HOMEMADE-STYLE","Spaghetti sauce with beef or meat other than lamb or mutton, homemade-style" +27111100,"BEEF GOULASH","Beef goulash" +27111200,"BEEF BURGUNDY (BEEF BOURGUIGNONNE)","Beef burgundy (beef bourguignonne)" +27111300,"MEXICAN BEEF STEW, NO POTATOES, TOMATO SAUCE","Mexican style beef stew, no potatoes, tomato-based sauce (mixture) (Carne guisada sin papas)" +27111310,"MEXICAN BEEF STEW, NO POTATOES, W/ CHILI PEPPERS, TOMATO SCE","Mexican style beef stew, no potatoes, with chili peppers, tomato-based sauce (mixture) (Carne guisada con chile)" +27111400,"CHILI CON CARNE, NS AS TO BEANS","Chili con carne, NS as to beans" +27111410,"CHILI CON CARNE W/ BEANS","Chili con carne with beans" +27111420,"CHILI CON CARNE W/O BEANS","Chili con carne without beans" +27111430,"CHILI CON CARNE, NS AS TO BEANS, W/ CHEESE","Chili con carne, NS as to beans, with cheese" +27111440,"CHILI CON CARNE W/ BEANS & CHEESE","Chili con carne with beans and cheese" +27111500,"BEEF SLOPPY JOE (NO BUN)","Beef sloppy joe (no bun)" +27112000,"BEEF W/ GRAVY (MIXTURE) (INCLUDE COUNTRY STYLE)","Beef with gravy (mixture)" +27112010,"SALISBURY STEAK W/ GRAVY (MIXTURE)","Salisbury steak with gravy (mixture)" +27113000,"BEEF W/ CREAM OR WHITE SAUCE (MIXTURE)","Beef with cream or white sauce (mixture)" +27113100,"BEEF STROGANOFF","Beef stroganoff" +27113200,"CREAMED CHIPPED OR DRIED BEEF","Creamed chipped or dried beef" +27113300,"SWEDISH MEATBALLS W/ CREAM OR WHITE SAUCE (MIXTURE)","Swedish meatballs with cream or white sauce (mixture)" +27114000,"BEEF W/ (MUSHROOM) SOUP (MIXTURE)","Beef with (mushroom) soup (mixture)" +27115000,"BEEF W/ SOY-BASED SAUCE (MIXTURE)","Beef with soy-based sauce (mixture)" +27115100,"STEAK TERIYAKI W/ SAUCE (MIXTURE)","Steak teriyaki with sauce (mixture)" +27116100,"BEEF CURRY","Beef curry" +27116200,"BEEF W/ BARBECUE SAUCE (MIXTURE)","Beef with barbecue sauce (mixture)" +27116300,"BEEF W/ SWEET & SOUR SAUCE (MIXTURE)","Beef with sweet and sour sauce (mixture)" +27116350,"STEWED, SEASONED GROUND BEEF, MEXICAN","Stewed, seasoned, ground beef, Mexican style (Picadillo de carne de rez)" +27116400,"STEAK TARTARE (RAW GROUND BEEF & EGG)","Steak tartare (raw ground beef and egg)" +27118110,"MEATBALLS, P. R. (ALBONDIGAS GUISADAS)","Meatballs, Puerto Rican style (Albondigas guisadas)" +27118120,"STEWED,SEASONED GROUND BEEF,PUERTO RICAN STYLE","Stewed seasoned ground beef, Puerto Rican style (Picadillo guisado, picadillo de carne)" +27118130,"STEWED DRIED BEEF, P.R. (TASAJO GUISADO)","Stewed dried beef, Puerto Rican style (Tasajo guisado, carne cecina guisada)" +27118140,"STUFFED POT ROAST, P.R.,NFS(ASSUME GRAVY,STUFFING)","Stuffed pot roast, Puerto Rican style, NFS (assume with gravy and stuffing)" +27118180,"BEEF STEW, P.R., MEAT W/ GRAVY (POTATO SEPARATE)","Puerto Rican style beef stew, meat with gravy (potatoes reported separately)" +27120020,"HAM/PORK W/ GRAVY (MIXTURE)","Ham or pork with gravy (mixture)" +27120030,"HAM/PORK W/ BARBECUE SAUCE","Ham or pork with barbecue sauce (mixture)" +27120060,"SWEET & SOUR PORK","Sweet and sour pork" +27120080,"HAM STROGANOFF (INCL HAM W/ CREAM OR WHITE SAUCE)","Ham stroganoff" +27120090,"HAM/PORK W/ (MUSHROOM) SOUP-BASE SAUCE (MIXTURE)","Ham or pork with (mushroom) soup (mixture)" +27120100,"HAM/PORK W/ TOMATO-BASED SAUCE (MIXTURE)","Ham or pork with tomato-based sauce (mixture)" +27120110,"SAUSAGE W/ TOMATO-BASED SAUCE (MIXTURE)","Sausage with tomato-based sauce (mixture)" +27120120,"SAUSAGE GRAVY","Sausage gravy" +27120130,"MEXICAN STYLE PORK STEW,NO POT,TOM-BASE SCE(MIXTUR","Mexican style pork stew, no potatoes, tomato-based sauce (mixture) (cerdo guisado sin papas)" +27120150,"PORK OR HAM W/ SOY-BASED SAUCE (MIXTURE)","Pork or ham with soy-based sauce (mixture)" +27120160,"PORK CURRY","Pork curry" +27120210,"FRANKFURTER /HOT DOG,W/CHILI,NO BUN (INCL CHILI DOG,NO BUN)","Frankfurter or hot dog, with chili, no bun" +27120250,"FRANKFURTERS/HOT DOGS W/ TOM-BASED SCE (MIXTURE)","Frankfurters or hot dogs with tomato-based sauce (mixture)" +27121000,"PORK W/ CHILE & TOM (MIXTURE) (PUERCO CON CHILE)","Pork with chili and tomatoes (mixture) (Puerco con chile)" +27121010,"STEWED PORK, P.R.","Stewed pork, Puerto Rican style" +27121410,"CHILI CON CARNE W/ BEANS, MADE W/ PORK","Chili con carne with beans, made with pork" +27130010,"LAMB W/ GRAVY (MIXTURE)","Lamb or mutton with gravy (mixture)" +27130040,"SPAGHETTI SAUCE W/ LAMB, HOMEMADE-STYLE","Spaghetti sauce with lamb or mutton, homemade-style" +27130050,"LAMB GOULASH","Lamb or mutton goulash" +27130100,"LAMB OR MUTTON CURRY","Lamb or mutton curry" +27133010,"STEWED GOAT, P.R. (CABRITO EN FRICASE)","Stewed goat, Puerto Rican style (Cabrito en fricase, chilindron de chivo)" +27135010,"VEAL W/ GRAVY (MIXTURE)","Veal with gravy (mixture)" +27135020,"VEAL SCALLOPINI","Veal scallopini" +27135030,"VEAL W/ CREAM SAUCE (INCLUDE VEAL PAPRIKASH)","Veal with cream sauce (mixture)" +27135040,"VEAL W/ BUTTER SAUCE","Veal with butter sauce (mixture)" +27135050,"VEAL MARSALA","Veal Marsala" +27135110,"VEAL PARMIGIANA","Veal parmigiana" +27135150,"VEAL CORDON BLEU","Veal cordon bleu" +27136050,"VENISON/DEER W/ TOMATO-BASED SAUCE (MIXTURE)","Venison/deer with tomato-based sauce (mixture)" +27136080,"VENISON/DEER W/ GRAVY","Venison/deer with gravy (mixture)" +27136100,"CHILI CON CARNE W/ VENISON/DEER & BEANS","Chili con carne with venison/deer and beans" +27141000,"CHICKEN CACCIATORE (INCLUDE CHICKEN W/TOMATO SAUCE)","Chicken or turkey cacciatore" +27141030,"SPAGHETTI SAUCE W/ POULTRY, HOMEMADE","Spaghetti sauce with poultry, home-made style" +27141050,"STEWED CHICKEN W/ TOMATO SAUCE, MEXICAN STYLE","Stewed chicken with tomato-based sauce, Mexican style (mixture) (Pollo guisado con tomate)" +27141500,"CHILI CON CARNE W/ CHICKEN & BEANS","Chili con carne with chicken or turkey and beans" +27142000,"CHICKEN W/ GRAVY (MIXTURE)","Chicken with gravy (mixture)" +27142100,"CHICKEN FRICASSEE","Chicken or turkey fricassee" +27142200,"TURKEY W/ GRAVY (MIXTURE)","Turkey with gravy (mixture)" +27143000,"CHICKEN OR TURKEY W/ CREAM SAUCE (MIXTURE)","Chicken or turkey with cream sauce (mixture)" +27144000,"CHICKEN W/ (MUSHROOM) SOUP-BASED SAUCE (MIXTURE)","Chicken or turkey with (mushroom) soup (mixture)" +27145000,"CHICKEN TERIYAKI","Chicken or turkey teriyaki (chicken or turkey with soy-based sauce)" +27146000,"CHICKEN OR TURKEY W/ BBQ SAUCE, SKIN EATEN","Chicken or turkey with barbecue sauce, skin eaten" +27146010,"CHICKEN OR TURKEY W/ BBQ SAUCE, SKIN NOT EATEN","Chicken or turkey with barbecue sauce, skin not eaten" +27146050,"CHICKEN WING W/ HOT PEPPER SCE (INCL BUFFALO WING)","Chicken wing with hot pepper sauce" +27146100,"SWEET & SOUR CHICKEN","Sweet and sour chicken or turkey" +27146110,"SWEET AND SOUR CHICKEN OR TURKEY, WITHOUT VEGETABLES","Sweet and sour chicken or turkey, without vegetables" +27146150,"CHICKEN CURRY","Chicken curry" +27146160,"CHICKEN WITH MOLE SAUCE","Chicken with mole sauce" +27146200,"CHICKEN W/ CHEESE SAUCE (MIXTURE)","Chicken or turkey with cheese sauce (mixture)" +27146250,"CHICKEN CORDON BLEU","Chicken or turkey cordon bleu" +27146300,"CHICKEN PARMIGIANA","Chicken or turkey parmigiana" +27146350,"ORANGE CHICKEN","Orange chicken" +27146360,"SESAME CHICKEN","Sesame chicken" +27146400,"CHICKEN KIEV","Chicken kiev" +27148010,"STUFFED CHICKEN, DRUMSTICK OR BREAST, P.R.","Stuffed chicken, drumstick or breast, Puerto Rican style (Muslo de pollo o pechuga rellena)" +27150010,"FISH W/ CREAM OR WHITE SAUCE, NOT TUNA OR LOBSTER","Fish with cream or white sauce, not tuna or lobster (mixture)" +27150020,"CRAB, DEVILED","Crab, deviled" +27150030,"CRAB IMPERIAL (INCLUDE STUFFED CRAB)","Crab imperial" +27150050,"FISH TIMBALE OR MOUSSE","Fish timbale or mousse" +27150060,"LOBSTER NEWBURG (INCLUDE LOBSTER THERMIDOR)","Lobster newburg" +27150070,"LOBSTER W/ BUTTER SAUCE (INCLUDE LOBSTER NORFOLK)","Lobster with butter sauce (mixture)" +27150100,"SHRIMP CURRY","Shrimp curry" +27150110,"SHRIMP COCKTAIL (SHRIMP W/ COCKTAIL SAUCE)","Shrimp cocktail (shrimp with cocktail sauce)" +27150120,"TUNA W/ CREAM OR WHITE SAUCE (MIXTURE)","Tuna with cream or white sauce (mixture)" +27150130,"SEAFOOD NEWBURG (INCLUDE CRABMEAT THERMIDOR)","Seafood newburg" +27150140,"CLAM SAUCE, WHITE","Clam sauce, white" +27150160,"SHRIMP W/ LOBSTER SAUCE (MIXTURE)","Shrimp with lobster sauce (mixture)" +27150170,"SWEET & SOUR SHRIMP","Sweet and sour shrimp" +27150190,"LOBSTER SAUCE (BROTH-BASED)","Lobster sauce (broth-based)" +27150200,"OYSTER SCE (WHITE SCE-BASED)","Oyster sauce (white sauce-based)" +27150210,"FISH SAUCE (BAGOONG)","Fish sauce (bagoong)" +27150230,"SHRIMP SCAMPI","Shrimp scampi" +27150250,"FISH MOOCHIM (KOREAN STYLE), DRIED FISH W/ SOY SCE","Fish moochim (Korean style), dried fish with soy sauce" +27150310,"FISH W/ TOMATO-BASED SAUCE (MIXTURE)","Fish with tomato-based sauce (mixture)" +27150320,"FISH CURRY","Fish curry" +27150330,"MUSSELS W/ TOMATO-BASED SAUCE (MIXTURE)","Mussels with tomato-based sauce (mixture)" +27150350,"SARDINES W/ TOMATO-BASED SAUCE (MIXTURE)","Sardines with tomato-based sauce (mixture)" +27150370,"SARDINES W/ MUSTARD SAUCE (MIXTURE)","Sardines with mustard sauce (mixture)" +27150410,"SHRIMP TERIYAKI","Shrimp teriyaki (shrimp with soy-based sauce) (mixture)" +27150510,"SCALLOPS W/ CHEESE SAUCE (MIXTURE)","Scallops with cheese sauce (mixture)" +27151030,"MARINATED FISH (CEVICHE)","Marinated fish (Ceviche)" +27151040,"CRABS IN TOMATO-BASED SAUCE, PUERTO RICAN STYLE","Crabs in tomato-based sauce, Puerto Rican style (mixture) (Salmorejo de jueyes)" +27151050,"SHRIMP IN GARLIC SAUCE, P.R. (CAMARONES AL AJILLO)","Shrimp in garlic sauce, Puerto Rican style (mixture) (Camarones al ajillo)" +27151070,"STEWED CODFISH, PUERTO RICAN STYLE, NO POTATOES","Stewed codfish, Puerto Rican style, no potatoes (potatoes reported separately)" +27160010,"MEAT W/ BARBECUE SAUCE, NS AS TO TYPE OF MEAT","Meat with barbecue sauce, NS as to type of meat (mixture)" +27160100,"MEATBALLS, NS AS TO TYPE OF MEAT, W/ SAUCE","Meatballs, NS as to type of meat, with sauce (mixture)" +27161010,"MEAT LOAF, P.R. (ALBONDIGON)","Puerto Rican style meat loaf (Albondigon)" +27162010,"MEAT W/ TOMATO-BASED SAUCE","Meat with tomato-based sauce (mixture)" +27162050,"SPAGHETTI SAUCE W/ COMBINATION OF MEATS, HOMEMADE","Spaghetti sauce with combination of meats, homemade-style" +27162060,"SPAGHETTI SAUCE W/ MEAT & VEGETABLES, HOMEMADE-STYLE","Spaghetti sauce with meat and vegetables, homemade-style" +27162500,"STEWED SEASONED GROUND BEEF & PORK, MEXICAN","Stewed, seasoned, ground beef and pork, Mexican style (Picadillo de carne de rez y puerco)" +27163010,"MEAT W/ GRAVY, NS AS TO TYPE OF MEAT (MIXTURE)","Meat with gravy, NS as to type of meat (mixture)" +27211000,"BEEF & POTATOES, NO SAUCE (MIXTURE)","Beef and potatoes, no sauce (mixture)" +27211100,"BEEF STEW W/ POTATOES, TOMATO-BASED SAUCE","Beef stew with potatoes, tomato-based sauce (mixture)" +27211110,"MEXICAN BEEF STEW W/POT,TOM SCE (CARNE GUISADA CON)","Mexican style beef stew with potatoes, tomato-based sauce (mixture) (Carne guisada con papas)" +27211150,"BEEF GOULASH W/ POTATOES (INCL BEEF GOULASH, NFS)","Beef goulash with potatoes" +27211190,"BEEF & POTATOES W/ CREAM, WHITE, MUSHROOM SOUP SCE (MIXTURE)","Beef and potatoes with cream sauce, white sauce or mushroom soup-based sauce (mixture)" +27211200,"BEEF STEW W/ POTATOES, GRAVY","Beef stew with potatoes, gravy" +27211300,"BEEF (ROAST) HASH","Beef (roast) hash" +27211400,"CORNED BEEF HASH","Corned beef hash" +27211500,"BEEF & POTATOES W/ CHEESE SAUCE (MIXTURE)","Beef and potatoes with cheese sauce (mixture)" +27211550,"STEWED SEASONED GROUND BEEF W/ POTATOES, MEXICAN","Stewed, seasoned, ground beef with potatoes, Mexican style (Picadillo de carne de rez con papas)" +27212000,"BEEF & NOODLES, NO SAUCE","Beef and noodles, no sauce (mixture)" +27212050,"BEEF & MACARONI WITH CHEESE SAUCE (MIXTURE)","Beef and macaroni with cheese sauce (mixture)" +27212100,"BEEF & NOODLES W/ TOMATO-BASED SAUCE (MIXTURE)","Beef and noodles with tomato-based sauce (mixture)" +27212120,"CHILI CON CARNE W/ BEANS & MACARONI","Chili con carne with beans and macaroni" +27212150,"BEEF GOULASH W/ NOODLES","Beef goulash with noodles" +27212200,"BEEF & NOODLES W/ GRAVY (MIXTURE)","Beef and noodles with gravy (mixture)" +27212300,"BEEF & NOODLES W/ CREAM OR WHITE SAUCE (MIXTURE)","Beef and noodles with cream or white sauce (mixture)" +27212350,"BEEF STROGANOFF W/ NOODLES","Beef stroganoff with noodles" +27212400,"BEEF & NOODLES W/ (MUSHROOM) SOUP (MIXTURE)","Beef and noodles with (mushroom) soup (mixture)" +27212500,"BEEF AND NOODLES WITH SOY-BASED SAUCE (MIXTURE)","Beef and noodles with soy-based sauce (mixture)" +27213000,"BEEF & RICE, NO SAUCE (MIXTURE)","Beef and rice, no sauce (mixture)" +27213010,"BIRYANI WITH MEAT","Biryani with meat" +27213100,"BEEF & RICE W/ TOMATO-BASED SAUCE (MIXTURE)","Beef and rice with tomato-based sauce (mixture)" +27213120,"PORCUPINE BALLS W/ TOMATO-BASED SAUCE (MIXTURE)","Porcupine balls with tomato-based sauce (mixture)" +27213150,"CHILI CON CARNE W/ BEANS & RICE","Chili con carne with beans and rice" +27213200,"BEEF & RICE W/ GRAVY (MIXTURE)","Beef and rice with gravy (mixture)" +27213300,"BEEF & RICE W/ CREAM SAUCE (MIXTURE)","Beef and rice with cream sauce (mixture)" +27213400,"BEEF & RICE W/ (MUSHROOM) SOUP (MIXTURE)","Beef and rice with (mushroom) soup (mixture)" +27213420,"PORCUPINE BALLS W/ (MUSHROOM) SOUP (MIXTURE)","Porcupine balls with (mushroom) soup (mixture)" +27213500,"BEEF & RICE W/ SOY-BASED SAUCE (MIXTURE)","Beef and rice with soy-based sauce (mixture)" +27213600,"BEEF AND RICE WITH CHEESE SAUCE (MIXTURE)","Beef and rice with cheese sauce (mixture)" +27214100,"MEAT LOAF MADE W/ BEEF","Meat loaf made with beef" +27214110,"MEAT LOAF W/ BEEF, W/ TOMATO SAUCE","Meat loaf made with beef, with tomato-based sauce" +27214300,"BEEF WELLINGTON","Beef wellington" +27214500,"CORNED BEEF PATTY","Corned beef patty" +27214600,"CREAMED DRIED BEEF ON TOAST","Creamed dried beef on toast" +27218110,"STUFFED POT ROAST (LARDED MEAT) W/ POTATOES, P.R.","Puerto Rican style stuffed pot roast (larded meat) with potatoes (Carne mechada con papas boliche)" +27218210,"BEEF STEW, P.R. W/ POTATOES (CARNE GUISADA CON PAPAS)","Puerto Rican style beef stew with potatoes (Carne guisada con papas)" +27218310,"STEWED CORNED BEEF, P.R. (""CORNED BEEF"" GUISADO)","Stewed corned beef, Puerto Rican style (""Corned beef"" guisado)" +27220010,"MEAT LOAF MADE W/ HAM (NOT LUNCHEON MEAT)","Meat loaf made with ham (not luncheon meat)" +27220020,"HAM & NOODLES W/ CREAM OR WHITE SAUCE (MIXTURE)","Ham and noodles with cream or white sauce (mixture)" +27220030,"HAM & RICE W/ (MUSHROOM) SOUP (MIXTURE)","Ham and rice with (mushroom) soup (mixture)" +27220050,"HAM OR PORK W/ STUFFING","Ham or pork with stuffing (mixture)" +27220080,"HAM CROQUETTE","Ham croquette" +27220110,"PORK & RICE W/ TOMATO-BASED SAUCE (MIXTURE)","Pork and rice with tomato-based sauce (mixture)" +27220120,"SAUSAGE & RICE W/ TOMATO-BASED SAUCE (MIXTURE)","Sausage and rice with tomato-based sauce (mixture)" +27220150,"SAUSAGE & RICE W/ (MUSHROOM) SOUP (MIXTURE)","Sausage and rice with (mushroom) soup (mixture)" +27220170,"SAUSAGE & RICE W/ CHEESE SAUCE (MIXTURE)","Sausage and rice with cheese sauce (mixture)" +27220190,"SAUSAGE & NOODLES W/ CREAM OR WHITE SAUCE (MIXTURE)","Sausage and noodles with cream or white sauce (mixture)" +27220210,"HAM & NOODLES, NO SAUCE (MIXTURE)","Ham and noodles, no sauce (mixture)" +27220310,"HAM & RICE, NO SAUCE (MIXTURE)","Ham or pork and rice, no sauce (mixture)" +27220510,"HAM/PORK & POTATOES W/ GRAVY (MIXTURE)","Ham or pork and potatoes with gravy (mixture)" +27220520,"PORK & POTATOES W/ CHEESE SAUCE","Ham or pork and potatoes with cheese sauce (mixture)" +27221100,"STEWED PIG'S FEET, P.R. (PATITAS DE CERDO GUISADAS)","Stewed pig's feet, Puerto Rican style (Patitas de cerdo guisadas)" +27221110,"STUFFED PORK ROAST, P.R.","Stuffed pork roast, Puerto Rican style" +27221150,"MEXICAN STYLE PORK STEW W/POT,TOM-BASE SCE(MIXTURE)","Mexican style pork stew, with potatoes, tomato-based sauce (mixture) (cerdo guisado con papas)" +27230010,"LAMB LOAF","Lamb or mutton loaf" +27231000,"LAMB OR MUTTON & POTATOES W/ GRAVY (MIXTURE)","Lamb or mutton and potatoes with gravy (mixture)" +27232000,"LAMB & POTATOES W/ TOMATO-BASED SAUCE (MIXTURE)","Lamb or mutton and potatoes with tomato-based sauce (mixture)" +27233000,"LAMB OR MUTTON & NOODLES W/ GRAVY (MIXTURE)","Lamb or mutton and noodles with gravy (mixture)" +27235000,"MEAT LOAF MADE WITH VENISON/DEER","Meat loaf made with venison/deer" +27235750,"VEAL & NOODLES W/ CREAM/WHITE SCE (MIXTURE)","Veal and noodles with cream or white sauce (mixture)" +27236000,"VENISON/DEER & NOODLE MIXTURE W/ CREAM/WHITE SAUCE","Venison/deer and noodles with cream or white sauce (mixture)" +27241000,"CHICKEN OR TURKEY HASH","Chicken or turkey hash" +27241010,"CHICKEN OR TURKEY & POTATOES W/ GRAVY (MIXTURE)","Chicken or turkey and potatoes with gravy (mixture)" +27242000,"CHICKEN OR TURKEY & NOODLES, NO SAUCE (MIXTURE)","Chicken or turkey and noodles, no sauce (mixture)" +27242200,"CHICKEN OR TURKEY & NOODLES W/ GRAVY (MIXTURE)","Chicken or turkey and noodles with gravy (mixture)" +27242250,"CHICKEN OR TURKEY & NOODLES W/ (MUSHROOM) SOUP","Chicken or turkey and noodles with (mushroom) soup (mixture)" +27242300,"CHICKEN OR TURKEY & NOODLES W/ CREAM OR WHITE SAUCE","Chicken or turkey and noodles with cream or white sauce (mixture)" +27242310,"CHICKEN & NOODLES W/ CHEESE SAUCE","Chicken or turkey and noodles with cheese sauce (mixture)" +27242350,"CHICKEN OR TURKEY TETRAZZINI","Chicken or turkey tetrazzini" +27242400,"CHICKEN & NOODLES, TOMATO-BASED SAUCE (MIXTURE)","Chicken or turkey and noodles, tomato-based sauce (mixture)" +27242500,"CHICKEN OR TURKEY AND NOODLES WITH SOY-BASED SAUCE (MIXTURE)","Chicken or turkey and noodles with soy-based sauce (mixture)" +27243000,"CHICKEN & RICE, NO SAUCE (MIXTURE)","Chicken or turkey and rice, no sauce (mixture)" +27243100,"BIRYANI WITH CHICKEN","Biryani with chicken" +27243300,"CHICKEN & RICE W/ CREAM SAUCE (MIXTURE)","Chicken or turkey and rice with cream sauce (mixture)" +27243400,"CHICKEN & RICE W/ (MUSHROOM) SOUP-BASED SAUCE","Chicken or turkey and rice with (mushroom) soup (mixture)" +27243500,"CHICKEN & RICE W/ TOMATO-BASED SAUCE (MIXTURE)","Chicken or turkey and rice with tomato-based sauce (mixture)" +27243600,"CHICKEN & RICE W/ SOY-BASED SAUCE (MIXTURE)","Chicken or turkey and rice with soy-based sauce (mixture)" +27243700,"CHICKEN IN CHEESE SCE W/ SPANISH RICE","Chicken in cheese sauce with Spanish rice" +27246100,"CHICKEN W/ DUMPLINGS (MIXTURE)","Chicken or turkey with dumplings (mixture)" +27246200,"CHICKEN W/ STUFFING (MIXTURE)","Chicken or turkey with stuffing (mixture)" +27246300,"CHICKEN OR TURKEY CAKE, PATTY OR CROQUETTE","Chicken or turkey cake, patty, or croquette" +27246400,"CHICKEN SOUFFLE","Chicken or turkey souffle" +27246500,"MEAT LOAF MADE W/ CHICKEN OR TURKEY","Meat loaf made with chicken or turkey" +27246505,"MEAT LOAF W/ CHICKEN OR TURKEY, W/ TOMATO SAUCE","Meat loaf made with chicken or turkey, with tomato-based sauce" +27250020,"CLAMS, STUFFED","Clams, stuffed" +27250030,"CODFISH BALL OR CAKE","Codfish ball or cake" +27250040,"CRAB CAKE","Crab cake" +27250050,"FISH CAKE OR PATTY, NS AS TO FISH","Fish cake or patty, NS as to fish" +27250060,"GEFILTE FISH","Gefilte fish" +27250070,"SALMON CAKE OR PATTY (INCLUDE SALMON CROQUETTE)","Salmon cake or patty" +27250080,"SALMON LOAF","Salmon loaf" +27250110,"SCALLOPS & NOODLES W/ CHEESE SAUCE (MIXTURE)","Scallops and noodles with cheese sauce (mixture)" +27250120,"SHRIMP AND NOODLES, NO SAUCE (MIXTURE)","Shrimp and noodles, no sauce (mixture)" +27250122,"SHRIMP AND NOODLES WITH GRAVY (MIXTURE)","Shrimp and noodles with gravy (mixture)" +27250124,"SHRIMP AND NOODLES WITH (MUSHROOM) SOUP (MIXTURE)","Shrimp and noodles with (mushroom) soup (mixture)" +27250126,"SHRIMP AND NOODLES WITH CREAM OR WHITE SAUCE (MIXTURE)","Shrimp and noodles with cream or white sauce (mixture)" +27250128,"SHRIMP AND NOODLES WITH SOY-BASED SAUCE (MIXTURE)","Shrimp and noodles with soy-based sauce (mixture)" +27250130,"SHRIMP & NOODLES W/ CHEESE SAUCE","Shrimp and noodles with cheese sauce (mixture)" +27250132,"SHRIMP AND NOODLES WITH TOMATO SAUCE (MIXTURE)","Shrimp and noodles with tomato sauce (mixture)" +27250150,"TUNA LOAF","Tuna loaf" +27250160,"TUNA CAKE OR PATTY","Tuna cake or patty" +27250210,"CLAM CAKE OR PATTY (INCLUDE DEVILED)","Clam cake or patty" +27250220,"OYSTER FRITTER","Oyster fritter" +27250250,"FLOUNDER W/CRAB STUFFING","Flounder with crab stuffing" +27250260,"LOBSTER W/ BREAD STUFFING,BAKED","Lobster with bread stuffing, baked" +27250270,"CLAMS, CASINO","Clams Casino" +27250300,"MACKEREL CAKE OR PATTY","Mackerel cake or patty" +27250310,"HADDOCK CAKE OR PATTY","Haddock cake or patty" +27250400,"SHRIMP CAKE OR PATTY (INCL SHRIMP BURGER OR STICK)","Shrimp cake or patty" +27250410,"SHRIMP W/ CRAB STUFFING","Shrimp with crab stuffing" +27250450,"SHRIMP TOAST, FRIED","Shrimp toast, fried" +27250520,"SEAFOOD, RESTRUCTURED (INCL IMITATION CRABMEAT)","Seafood restructured" +27250550,"SEAFOOD SOUFFLE","Seafood souffle" +27250610,"TUNA NOODLE CASSEROLE W/ CREAM OR WHITE SAUCE","Tuna noodle casserole with cream or white sauce" +27250630,"TUNA NOODLE CASSEROLE W/ (MUSHROOM) SOUP","Tuna noodle casserole with (mushroom) soup" +27250710,"TUNA & RICE W/ (MUSHROOM) SOUP (MIXTURE)","Tuna and rice with (mushroom) soup (mixture)" +27250810,"FISH & RICE W/ TOMATO-BASED SAUCE","Fish and rice with tomato-based sauce" +27250820,"FISH & RICE W/ CREAM SAUCE","Fish and rice with cream sauce" +27250830,"FISH & RICE W/ (MUSHROOM) SOUP","Fish and rice with (mushroom) soup" +27250900,"FISH & NOODLES W/ (MUSHROOM) SOUP (MIXTURE)","Fish and noodles with (mushroom) soup" +27250950,"SHELLFISH & NOODLES, TOMATO-BASED SAUCE","Shellfish mixture and noodles, tomato-based sauce (mixture)" +27251010,"STEWED SALMON, P.R. (SALMON GUISADO)","Stewed salmon, Puerto Rican style (Salmon guisado)" +27260010,"MEATLOAF, NS AS TO TYPE OF MEAT","Meat loaf, NS as to type of meat" +27260050,"MEATBALLS, W/ BREADING, W/ GRAVY","Meatballs, with breading, NS as to type of meat, with gravy" +27260080,"MEAT LOAF MADE W/ BEEF & PORK","Meat loaf made with beef and pork" +27260090,"MEAT LOAF W/ BEEF, VEAL & PORK","Meat loaf made with beef, veal and pork" +27260100,"MEAT LOAF W/ BEEF & PORK, W/ TOMATO SAUCE","Meat loaf made with beef and pork, with tomato-based sauce" +27260110,"HASH, NS AS TO TYPE OF MEAT","Hash, NS as to type of meat" +27260500,"VIENNA SAUSAGES STEWED W/ POTATOES, P.R.","Vienna sausages stewed with potatoes, Puerto Rican style (Salchichas guisadas)" +27260510,"LIVER DUMPLING","Liver dumpling" +27261000,"BREADED BRAINS, P.R. (SESOS REBOSADOS)","Breaded brains, Puerto Rican style (Sesos rebosados)" +27261500,"STEWED SEASONED GROUND BEEF&PORK,W/POT, MEXICAN","Stewed, seasoned, ground beef and pork, with potatoes, Mexican style (Picadillo de carne de rez y puerco con papas)" +27311110,"BEEF, POTATOES, & VEG (W/ CAR/DK GREEN), NO SAUCE","Beef, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), no sauce (mixture)" +27311120,"BEEF, POTATOES, & VEG (NO CAR/DK GREEN), NO SAUCE","Beef, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), no sauce (mixture)" +27311210,"CORNED BEEF, POT & VEG(W/ CAR/DK GREEN), NO SAUCE","Corned beef, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), no sauce (mixture)" +27311220,"CORNED BEEF, POTATO & VEG (NO CAR/DK GRN), NO SAUCE","Corned beef, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), no sauce (mixture)" +27311310,"BEEF STEW W/ POT & VEG(W/ CAR/DK GRN), TOMATO SAUCE","Beef stew with potatoes and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce" +27311320,"BEEF STEW W/ POT & VEG (NO CAR/DK GREEN), TOM SAUCE","Beef stew with potatoes and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-based sauce" +27311410,"BEEF STEW W/ POT & VEG (W/ CAR, DK GREEN), GRAVY","Beef stew with potatoes and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy" +27311420,"BEEF STEW W/ POT & VEG (NO CAR, DK GREEN), GRAVY","Beef stew with potatoes and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy" +27311510,"SHEPHERD'S PIE W/ BEEF","Shepherd's pie with beef" +27311600,"BEEF, POT, & VEG (INCL CAR/DK GRN), GRAVY","Beef, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy (mixture)" +27311605,"BEEF, POT, & VEG (NO CAR/DK GREEN), GRAVY","Beef, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy (mixture)" +27311610,"BEEF, POT & VEG (INCL CAR/DK GRN), CR/SOUP-BASED SAUCE","Beef, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), cream sauce, white sauce, or mushroom soup-based sauce (mixture)" +27311620,"BEEF, POT & VEG (NO CAR/DK GRN), CR/SOUP-BASED SAUCE","Beef, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), cream sauce, white sauce, or mushroom soup-based sauce (mixture)" +27311625,"BEEF, POT, & VEG (INCL CAR/DK GRN), TOMATO-BASED SAUCE","Beef, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce (mixture)" +27311630,"BEEF, POT, & VEG (NO CAR/DK GREEN), TOMATO-BASED SAUCE","Beef, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-based sauce (mixture)" +27311635,"BEEF, POT, & VEG (INCL CAR/DK GRN), CHEESE SAUCE","Beef, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), cheese sauce (mixture)" +27311640,"BEEF, POT, & VEG (NO CAR/DK GREEN), CHEESE SAUCE","Beef, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), cheese sauce (mixture)" +27311645,"BEEF, POT, & VEG (INCL CAR/DK GRN), SOY-BASED SAUCE","Beef, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), soy-based sauce (mixture)" +27311650,"BEEF, POT, & VEG (NO CAR/DK GREEN), SOY-BASED SAUCE","Beef, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), soy-based sauce (mixture)" +27313010,"BEEF, NOODLES & VEG (W/ CARROTS/DK GREEN), NO SAUCE","Beef, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), no sauce (mixture)" +27313020,"BEEF, NOODLES & VEG (NO CARROTS/DK GREEN), NO SAUCE","Beef, noodles, and vegetables (excluding carrots, broccoli, and dark-green leafy), no sauce (mixture)" +27313110,"BEEF CHOW MEIN OR CHOP SUEY W/ NOODLES","Beef chow mein or chop suey with noodles" +27313150,"BEEF, NOODLES & VEG (W/ CAR/DK GREEN), SOY SAUCE","Beef, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), soy-based sauce (mixture)" +27313160,"BEEF, NOODLES & VEG (NO CAR/DK GREEN), SOY SAUCE","Beef, noodles, and vegetables (excluding carrots, broccoli, and dark-green leafy), soy-based sauce (mixture)" +27313210,"BEEF, NOODLES & VEG (W/ CAR/DK GREEN), TOMATO SAUCE","Beef, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce (mixture)" +27313220,"BEEF, NOODLES & VEG (NO CAR/DK GREEN), TOMATO SAUCE","Beef, noodles, and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-based sauce (mixture)" +27313310,"BEEF, NOODLES, VEG(INCL CARROTS/DK GREEN), SOUP","Beef, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), (mushroom) soup (mixture)" +27313320,"BEEF, NOODLES, VEG (NO CARROTS/DK GREEN), SOUP","Beef, noodles, and vegetables (excluding carrots, broccoli, and dark-green leafy), (mushroom) soup (mixture)" +27313410,"BEEF, NOODLES, & VEG (INCL CAR/DK GRN), GRAVY","Beef, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy (mixture)" +27313420,"BEEF, NOODLES, & VEG (NO CAR/DK GRN), GRAVY","Beef, noodles, and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy (mixture)" +27315010,"BEEF, RICE & VEG (W/ CARROTS/DK GREEN), NO SAUCE","Beef, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), no sauce (mixture)" +27315020,"BEEF, RICE & VEG (NO CARROTS/DK GREEN), NO SAUCE","Beef, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), no sauce (mixture)" +27315210,"BEEF, RICE & VEG (W/ CAR/DK GREEN), TOMATO SAUCE","Beef, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce (mixture)" +27315220,"BEEF, RICE & VEG (NO CAR/DK GREEN), TOMATO SAUCE","Beef, rice, and vegetables (excluding carrots, broccoli, and/or dark-green leafy), tomato-based sauce (mixture)" +27315250,"STUFFED CABBAGE ROLLS W/ BEEF AND RICE","Stuffed cabbage rolls with beef and rice" +27315270,"STUFFED GRAPE LEAVES W/ BEEF & RICE","Stuffed grape leaves with beef and rice" +27315310,"BEEF, RICE & VEGETABLES (W/ CARROTS/DK GREEN), SOUP","Beef, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), (mushroom) soup (mixture)" +27315320,"BEEF, RICE & VEGETABLES (NO CARROTS/DK GREEN), SOUP","Beef, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), (mushroom) soup (mixture)" +27315330,"BEEF, RICE & VEG (INCL CAR/DK GRN), CHEESE SAUCE","Beef, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), cheese sauce (mixture)" +27315340,"BEEF, RICE & VEG (NO CAR/DK GRN), CHEESE SAUCE","Beef, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), cheese sauce (mixture)" +27315410,"BEEF, RICE & VEG (INCL CAR/DK GRN), GRAVY, MIXTURE","Beef, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy (mixture)" +27315420,"BEEF, RICE & VEG (NO CAR/DK GRN), GRAVY, MIXTURE","Beef, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy (mixture)" +27315510,"BEEF, RICE & VEG (INCL CAR/DK GRN), SOY-BASED SAUCE","Beef, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), soy-based sauce (mixture)" +27315520,"BEEF, RICE & VEG (NO CAR/DK GRN), SOY-BASED SAUCE","Beef, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), soy-based sauce (mixture)" +27317010,"BEEF POT PIE (INCLUDE GREEK MEAT PIE)","Beef pot pie" +27317100,"BEEF, DUMPLINGS & VEG (INCL CAR/DK GRN), GRAVY","Beef, dumplings, and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy (mixture)" +27317110,"BEEF, DUMPLINGS & VEG (NO CAR/DK GRN), GRAVY","Beef, dumplings, and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy (mixture)" +27319010,"STUFFED GREEN PEPPER, P.R. (PIMIENTO RELLENO)","Stuffed green pepper, Puerto Rican style (Pimiento relleno)" +27320020,"HAM POT PIE","Ham pot pie" +27320025,"HAM OR PORK, NOODLES, VEG (NO CAR, BROC, DK GRN)NO SAUCE","Ham or pork, noodles and vegetables (excluding carrots, broccoli, and dark-green leafy), no sauce (mixture)" +27320027,"HAM OR PORK, NOODLES, VEG (INCL CAR, BROC, DARK GREEN)NO SAU","Ham or pork, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), no sauce (mixture)" +27320030,"HAM/PORK, NOODLES & VEG (NO CAR/DK GR), CHEESE SCE","Ham or pork, noodles and vegetables (excluding carrots, broccoli, and dark-green leafy), cheese sauce (mixture)" +27320040,"PORK, POTATOES & VEG (W/ CAR, DK GREEN), NO SAUCE","Pork, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), no sauce (mixture)" +27320070,"PORK, NOODLES, VEG (INCL CAR/DK GRN), TOMATO SAUCE","Ham or pork, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce (mixture)" +27320080,"SAUSAGE, NOODLES, VEG (NO CAR/DK GRN), TOMATO SAUCE","Sausage, noodles, and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-based sauce" +27320090,"SAUSAGE, NOODLES, VEG (W/ CAR/DK GRN), TOMATO SAUCE","Sausage, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce" +27320100,"PORK, POTATOES & VEG (W/ CAR, DK GREEN), TOMATO SCE","Pork, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce (mixture)" +27320110,"PORK, POTATOES & VEG (NO CAR, DK GREEN), TOMATO SCE","Pork, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-based sauce (mixture)" +27320120,"SAUSAGE, POT, & VEG (INCL CAR/BROC/DK GREEN), GRAVY","Sausage, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy (mixture)" +27320130,"SAUSAGE, POT, & VEG (NO CAR/BROC/DK GREEN), GRAVY","Sausage, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy (mixture)" +27320140,"PORK, POT, & VEG (INCL CAR/DK GRN), GRAVY, MIXTURE","Pork, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy (mixture)" +27320150,"PORK, POT, & VEG (NO CAR/DK GRN), GRAVY, MIXTURE","Pork, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy (mixture)" +27320210,"PORK, POTATOES & VEG (NO CAR, DK GREEN), NO SAUCE","Pork, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), no sauce (mixture)" +27320310,"PORK CHOW MEIN OR CHOP SUEY W/ NOODLES","Pork chow mein or chop suey with noodles" +27320320,"PORK, RICE & VEG (INCL CAR/DK GRN), SOY-BASED SAUCE","Pork, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), soy-based sauce (mixture)" +27320330,"PORK, RICE & VEG (NO CAR/DK GRN), SOY-BASED SAUCE","Pork, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), soy-based sauce (mixture)" +27320340,"PORK, RICE & VEG (INCL CAR/DK GRN), TOMATO SAUCE","Pork, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce (mixture)" +27320350,"PORK, RICE & VEG (NO CAR/DK GRN), TOMATO SAUCE","Pork, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-based sauce (mixture)" +27320410,"HAM, POTATOES & VEG (NO CARROTS/DK GREEN), NO SAUCE","Ham, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), no sauce (mixture)" +27320450,"HAM, POTATOES & VEG (W/ CARROTS/DK GREEN), NO SAUCE","Ham, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), no sauce (mixture)" +27320500,"SWEET & SOUR PORK W/ RICE","Sweet and sour pork with rice" +27330010,"SHEPHERD'S PIE W/ LAMB","Shepherd's pie with lamb" +27330030,"LAMB STEW W/ POT & VEG (INCL CAR/DK GREEN), GRAVY","Lamb or mutton stew with potatoes and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy" +27330050,"LAMB, RICE & VEGETABLES (NO CARROT/DK GREEN), GRAVY","Lamb or mutton, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy (mixture)" +27330060,"LAMB, RICE & VEG (INCL CAR/DK GRN), TOMATO SAUCE","Lamb or mutton, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce (mixture)" +27330080,"LAMB, RICE, & VEGETABLES (INCL CAR, DK GRN), GRAVY","Lamb or mutton, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy" +27330110,"LAMB STEW W/ POT & VEG (NO CAR/DK GREEN), GRAVY","Lamb or mutton stew with potatoes and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy" +27330170,"STUFFED GRAPE LEAVES W/ LAMB & RICE","Stuffed grape leaves with lamb and rice" +27330210,"LAMB STEW W/ POT & VEG (INCL CAR/DK GRN), TOM SAUCE","Lamb or mutton stew with potatoes and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce" +27330220,"LAMB STEW W/ POT & VEG (NO CAR/DK GRN), TOMATO SCE","Lamb or mutton stew with potatoes and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-based sauce" +27331150,"VEAL FRICASSEE, P.R. (TERNERA EN FRICASE)","Veal fricassee, Puerto Rican style (ternera en fricase)" +27332100,"VEAL STEW W/ POT, VEG (INCL CAR/DK GRN) TOM SAUCE","Veal stew with potatoes and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce" +27332110,"VEAL STEW W/ POT, VEG (NO CAR,DK GRN), TOMATO SAUCE","Veal stew with potatoes and vegetables (excluding carrots, broccoli, and/or dark-green leafy), tomato-based sauce" +27335100,"RABBIT STEW W/ POTATOES & VEGETABLES","Rabbit stew with potatoes and vegetables" +27335500,"STEWED RABBIT, P.R. (FRICASE DE CONEJO)","Stewed rabbit, Puerto Rican style (Fricase de conejo)" +27336100,"VENISON/DEER STEW W/ POTATO & VEG(W/ CAR/DK GRN),TOM SCE","Venison/deer stew with potatoes and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce" +27336150,"VENISON/DEER STEW W/ POTATO & VEG(NO CAR/DK GRN),TOM SCE","Venison/deer stew with potatoes and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-based sauce" +27336200,"VENISON/DEER, POTATOES & VEG (INCL CAR/DK GRN), GRAVY","Venison/deer, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy (mixture)" +27336250,"VENISON/DEER, POTATOES & VEG (NO CAR/DK GRN), GRAVY","Venison/deer, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy (mixture)" +27336300,"VENISON/DEER, NOODLES & VEG (INCL CAR/DK GRN),TOM SAUCE","Venison/deer, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce (mixture)" +27336310,"VENISON/DEER, NOODLES & VEG (NO CAR/DK GRN), TOM SAUCE","Venison/deer, noodles, and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-based sauce (mixture)" +27341000,"CHICKEN OR TURKEY, POTATOES, CORN, AND CHEESE, WITH GRAVY","Chicken or turkey, potatoes, corn, and cheese, with gravy" +27341010,"CHICKEN, POT & VEG (INCL CAR/DK GRN), NO SAUCE","Chicken or turkey, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), no sauce (mixture)" +27341020,"CHICKEN, POT & VEG (NO CAR/DK GRN), NO SAUCE","Chicken or turkey, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), no sauce (mixture)" +27341025,"CHICKEN, POT & VEG (INCL CAR/DK GRN), GRAVY","Chicken or turkey, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy (mixture)" +27341030,"CHICKEN, POT & VEG (NO CAR/DK GRN), GRAVY","Chicken or turkey, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy (mixture)" +27341035,"CHICKEN, POT & VEG (INCL CAR/DK GRN), CREAM/SOUP-BASED SAUCE","Chicken or turkey, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), cream sauce, white sauce, or mushroom soup-based sauce (mixture)" +27341040,"CHICKEN, POT & VEG (NO CAR/DK GRN), CREAM/SOUP-BASED SAUCE","Chicken or turkey, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), cream sauce, white sauce, or mushroom soup-based sauce (mixture)" +27341045,"CHICKEN, POT & VEG (INCL CAR/DK GRN), CHEESE SAUCE","Chicken or turkey, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), cheese sauce (mixture)" +27341050,"CHICKEN, POT & VEG (NO CAR/DK GRN), CHEESE SAUCE","Chicken or turkey, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), cheese sauce (mixture)" +27341055,"CHICKEN, POT & VEG (INCL CAR/DK GRN), TOMATO-BASED SAUCE","Chicken or turkey, potatoes, and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce (mixture)" +27341060,"CHICKEN, POT & VEG (NO CAR/DK GRN), TOMATO-BASED SAUCE","Chicken or turkey, potatoes, and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-based sauce (mixture)" +27341310,"CHICKEN STEW W/ POT, VEG (INCL CAR/DK GRN), GRAVY","Chicken or turkey stew with potatoes and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy" +27341320,"CHICKEN STEW W/ POT & VEG (NO CAR/DK GRN), GRAVY","Chicken or turkey stew with potatoes and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy" +27341510,"CHICKEN STEW W/ POT & VEG(INCL CAR/DK GRN), TOM SCE","Chicken or turkey stew with potatoes and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce" +27341520,"CHICKEN STEW W/ POT & VEG(NO CAR/DK GRN), TOM SAUCE","Chicken or turkey stew with potatoes and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato- based sauce" +27343010,"CHICKEN, NOODLES & VEG (INCL CAR/DK GRN), NO SAUCE","Chicken or turkey, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), no sauce (mixture)" +27343020,"CHICKEN, NOODLES & VEG (NO CAR/DK GRN), NO SAUCE","Chicken or turkey, noodles, and vegetables (excluding carrots, broccoli, and dark-green leafy), no sauce (mixture)" +27343410,"CHICKEN, NOODLES & VEG (INCL CAR/DK GRN), GRAVY","Chicken or turkey, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy (mixture)" +27343420,"CHICKEN, NOODLES & VEG (NO CAR/DK GRN), GRAVY","Chicken or turkey, noodles, and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy (mixture)" +27343470,"CHICKEN OR TURKEY, NOODLES, AND VEGETABLES (INCLUDING CARROT","Chicken or turkey, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), cream sauce, white sauce, or mushroom soup-based sauce (mixture)" +27343480,"CHIX, NDL, VEG(NO CAR/DK GRN), CR/SOUP-BASED SAUCE","Chicken or turkey, noodles, and vegetables (excluding carrots, broccoli, and/or dark-green leafy), cream sauce, white sauce, or mushroom soup-based sauce (mixture)" +27343510,"CHICKEN, NOODLES, VEG (INCL CAR/DK GRN), TOMATO SCE","Chicken or turkey, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce (mixture)" +27343520,"CHICKEN, NOODLES, VEG (NO CAR/DK GRN), TOMATO SAUCE","Chicken or turkey, noodles, and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-based sauce (mixture)" +27343910,"CHICKEN CHOW MEIN/CHOP SUEY W/ NOODLES","Chicken or turkey chow mein or chop suey with noodles" +27343950,"CHICKEN, NOODLES & VEG(INCL CAR/DK GRN), CHEESE SCE","Chicken or turkey, noodles, and vegetables (including carrots, broccoli, and/or dark-green leafy), cheese sauce (mixture)" +27343960,"CHICKEN, NOODLES & VEG(NO CAR/DK GRN), CHEESE SAUCE","Chicken or turkey, noodles, and vegetables (excluding carrots, broccoli, and dark-green leafy), cheese sauce (mixture)" +27345010,"CHICKEN, RICE & VEG (INCL CAR/DK GRN), NO SAUCE","Chicken or turkey, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), no sauce (mixture)" +27345020,"CHICKEN, RICE & VEG (NO CAR/DK GRN), NO SAUCE","Chicken or turkey, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), no sauce (mixture)" +27345210,"CHICKEN, RICE & VEG (INCL CAR/DK GRN), GRAVY","Chicken or turkey, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy (mixture)" +27345220,"CHICKEN, RICE & VEG (NO CAR/DK GRN), GRAVY","Chicken or turkey, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy (mixture)" +27345230,"CHICKEN OR TURKEY, RICE, CORN, AND CHEESE WITH GRAVY","Chicken or turkey, rice, corn, and cheese, with gravy" +27345310,"CHICKEN, RICE & VEG (INCL CAR/DK GRN), SOY SAUCE","Chicken or turkey, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), soy-based sauce (mixture)" +27345320,"CHICKEN, RICE & VEG (NO CAR/DK GRN), SOY SAUCE","Chicken or turkey, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), soy-based sauce (mixture)" +27345410,"CHIX, RICE, & VEG(INCL CAR/DK GRN), CR/SOUP-BASED SAU","Chicken or turkey, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), cream sauce, white sauce, or mushroom soup-based sauce (mixture)" +27345420,"CHIX, RICE, AND VEG(NO CAR/DK GRN), CR/SOUP-BASED SAU","Chicken or turkey, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), cream sauce, white sauce, or mushroom soup-based sauce (mixture)" +27345440,"CHICKEN, RICE & VEG (INCL CAR/DK GRN), CHEESE SAUCE","Chicken or turkey, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), cheese sauce (mixture)" +27345450,"CHICKEN, RICE, VEG (NO CAR/DK GRN), CHEESE SAUCE","Chicken or turkey, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), cheese sauce (mixture)" +27345510,"CHICKEN, RICE & VEG (INCL CAR/DK GRN), TOMATO SAUCE","Chicken or turkey, rice, and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-based sauce (mixture)" +27345520,"CHICKEN, RICE & VEG (NO CAR/DK GRN), TOMATO SAUCE","Chicken or turkey, rice, and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-based sauce (mixture)" +27347100,"CHICKEN OR TURKEY POT PIE","Chicken or turkey pot pie" +27347200,"CHICKEN, STUFFING & VEG (INCL CAR/DK GRN), NO SAUCE","Chicken or turkey, stuffing, and vegetables (including carrots, broccoli, and/or dark-green leafy), no sauce (mixture)" +27347210,"CHICKEN, STUFFING, VEG (NO CAR/DK GRN), NO SAUCE","Chicken or turkey,stuffing, and vegetables (excluding carrots, broccoli, and dark green leafy), no sauce (mixture)" +27347220,"CHICKEN, STUFFING & VEG (INCL CAR/DK GRN), GRAVY","Chicken or turkey, stuffing, and vegetables (including carrots, broccoli, and/or dark-green leafy), gravy (mixture)" +27347230,"CHICKEN, STUFFING & VEG (NO CAR/DK GRN), GRAVY","Chicken or turkey, stuffing, and vegetables (excluding carrots, broccoli, and dark-green leafy), gravy (mixture)" +27347240,"CHICKEN, DUMPLINGS, VEG (INCL CAR/DK GRN), GRAVY","Chicken or turkey, dumplings, and vegetables (including carrots, broccoli, and/or dark green leafy), gravy (mixture)" +27347250,"CHICKEN, DUMPLINGS, VEG (NO CAR/DK GRN), GRAVY","Chicken or turkey, dumplings, and vegetables (excluding carrots, broccoli, and dark green leafy), gravy (mixture)" +27348100,"CHICKEN FRICASSEE, P.R. (FRICASE DE POLLO)","Chicken fricassee, Puerto Rican style (Fricase de pollo)" +27350020,"PAELLA WITH SEAFOOD","Paella with seafood" +27350030,"SEAFOOD STEW W/ POT & VEG (NO CAR/DK GREEN),TOM SCE","Seafood stew with potatoes and vegetables (excluding carrots, broccoli, and dark-green leafy), tomato-base sauce" +27350050,"SHRIMP CHOW MEIN OR CHOP SUEY W/ NOODLES","Shrimp chow mein or chop suey with noodles" +27350060,"SHRIMP CREOLE W/ RICE (INCLUDE SHRIMP JAMBALAYA)","Shrimp creole, with rice" +27350070,"TUNA POT PIE","Tuna pot pie" +27350080,"TUNA NOODLE CASSEROLE W/ VEG, CREAM OR WHITE SAUCE","Tuna noodle casserole with vegetables, cream or white sauce" +27350090,"FISH, NOODLES, VEG (INCL CAR/DK GRN), CHEESE SAUCE","Fish, noodles, and vegetables (including carrots, broccoli, and/or dark green leafy), cheese sauce (mixture)" +27350100,"FISH, NOODLES, VEG (NO CAR/DK GRN), CHEESE SAUCE","Fish, noodles, and vegetables (excluding carrots, broccoli, and dark-green leafy), cheese sauce (mixture)" +27350110,"BOUILLABAISSE","Bouillabaisse" +27350200,"OYSTER PIE (INCLUDE OYSTER POT PIE)","Oyster pie" +27350310,"SEAFOOD STEW W/ POT & VEG (W/ CAR/DK GREEN),TOM SCE","Seafood stew with potatoes and vegetables (including carrots, broccoli, and/or dark-green leafy), tomato-base sauce" +27350410,"TUNA NOODLE CASSEROLE W/ VEG & (MUSHROOM) SOUP","Tuna noodle casserole with vegetables and (mushroom) soup" +27351010,"CODFISH W/ STARCHY VEG, P.R. (SERENATA DE BACALAO)","Codfish with starchy vegetables, Puerto Rican style (Serenata de bacalao) (mixture)" +27351020,"CODFISH SALAD, P.R. (GAZPACHO DE BACALAO)","Codfish salad, Puerto Rican style (Gazpacho de bacalao)" +27351030,"STEWED CODFISH, P.R. (BACALAO GUISADO)","Stewed codfish, Puerto Rican style (Bacalao guisado)" +27351040,"BISCAYNE CODFISH, P.R. (BACALAO A LA VIZCAINA)","Biscayne codfish, Puerto Rican style (Bacalao a la Vizcaina)" +27351050,"CODFISH SALAD, P.R. (ENSALADA DE BACALAO)","Codfish salad, Puerto Rican style (Ensalada de bacalao)" +27360000,"STEW, NFS","Stew, NFS" +27360010,"GOULASH, NFS","Goulash, NFS" +27360050,"MEAT PIE, NFS","Meat pie, NFS" +27360080,"CHOW MEIN, NS AS TO TYPE OF MEAT, W/ NOODLES","Chow mein or chop suey, NS as to type of meat, with noodles" +27360090,"PAELLA, NFS","Paella, NFS" +27360100,"BRUNSWICK STEW","Brunswick stew" +27360120,"CHOW MEIN/CHOP SUEY,VARIOUS MEATS, W/ NOODLES","Chow mein or chop suey, various types of meat, with noodles" +27361010,"STEWED VARIETY MEATS (MOSTLY LIVER), P.R.(GANDINGA)","Stewed variety meats, Puerto Rican style (mostly liver) (Gandinga)" +27362000,"STEWED TRIPE W/ POTATOES, P.R. (MONDONGO)","Stewed tripe, Puerto Rican style, with potatoes (Mondongo)" +27363000,"GUMBO W/ RICE (NEW ORLEANS TYPE)","Gumbo with rice (New Orleans type with shellfish, pork, and/or poultry, tomatoes, okra, rice)" +27363100,"JAMBALAYA W/ MEAT & RICE","Jambalaya with meat and rice" +27410210,"BEEF & VEG (W/ CAR/DK GREEN, NO POTATO), NO SAUCE","Beef and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), no sauce (mixture)" +27410220,"BEEF & VEG (NO CAR/DK GREEN, NO POTATO), NO SAUCE","Beef and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), no sauce (mixture)" +27410250,"BEEF SHISH KABOB W/ VEGETABLES, EXCLUDING POTATOES","Beef shish kabob with vegetables, excluding potatoes" +27411100,"BEEF & VEG(W/ CAR/DK GREEN, NO POTATO), TOMATO SCE","Beef with vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), tomato-based sauce (mixture)" +27411120,"SWISS STEAK","Swiss steak" +27411150,"BEEF ROLL,STUFFED W/VEG/MEAT MIXTURE,TOM-BASE SAUCE","Beef rolls, stuffed with vegetables or meat mixture, tomato-based sauce" +27411200,"BEEF W/ VEG (NO CAR/DK GREEN, NO POTATO),TOMATO SCE","Beef with vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), tomato-based sauce (mixture)" +27414100,"BEEF W/ VEG (INCL CAR/DK GRN, NO POT), SOUP","Beef with vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), (mushroom) soup (mixture)" +27414200,"BEEF W/ VEG (NO CAR/DK GRN, NO POT), SOUP","Beef with vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), (mushroom) soup (mixture)" +27415100,"BEEF & VEG (W/ CAR/DK GREEN, NO POTATO), SOY SAUCE","Beef and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), soy-based sauce (mixture)" +27415110,"BEEF AND BROCCOLI","Beef and broccoli" +27415120,"BEEF, TOFU & VEG(W/ CAR/DK GRN, NO POTATO),SOY SCE","Beef, tofu, and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), soy-based sauce (mixture)" +27415130,"SZECHUAN BEEF","Szechuan beef" +27415140,"HUNAN BEEF","Hunan beef" +27415150,"BEEF, CHOW MEIN OR CHOP SUEY, NO NOODLES","Beef chow mein or chop suey, no noodles" +27415170,"KUNG PAO BEEF","Kung Pao beef" +27415200,"BEEF & VEG (NO CAR/DK GREEN, NO POTATO), SOY SAUCE","Beef and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), soy-based sauce (mixture)" +27415220,"BEEF, TOFU & VEG(NO CAR/DK GRN, NO POTATO), SOY SCE","Beef, tofu, and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), soy-based sauce (mixture)" +27416100,"BEEF & VEGETABLES, HAWAIIAN STYLE (MIXTURE)","Beef and vegetables, Hawaiian style (mixture)" +27416150,"PEPPER STEAK","Pepper steak" +27416200,"BEEF, GROUND, W/ EGG & ONION (MIXTURE)","Beef, ground, with egg and onion (mixture)" +27416250,"BEEF SALAD","Beef salad" +27416300,"BEEF TACO FILLING: BEEF, CHEESE, TOMATO, TACO SAUCE","Beef taco filling: beef, cheese, tomato, taco sauce" +27416400,"SUKIYAKI (STIR FRIED BEEF & VEGS IN SOY SAUCE)","Sukiyaki (stir fried beef and vegetables in soy sauce)" +27416450,"BEEF & VEG (INCL CAR/DK GRN, NO POTATOES), GRAVY","Beef and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), gravy (mixture)" +27416500,"BEEF & VEG (NO CAR/DK GREEN, NO POT), GRAVY","Beef and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), gravy (mixture)" +27418110,"SEASONED SHREDDED SOUP MEAT","Seasoned shredded soup meat (Ropa vieja, sopa de carne ripiada)" +27418210,"BEEF STEW, P.R. W/ VEGETABLES, NO POTATO (CARNE A LA JUDIA)","Puerto Rican style beef stew with vegetables, excluding potatoes (Carne a la Judia)" +27418310,"CORNED BEEF W/ TOMATO SAUCE & ONION, P.R. STYLE","Corned beef with tomato sauce and onion, Puerto Rican style (mixture)" +27418410,"BEEF STEAK W/ ONIONS, P.R. (BIFTEC ENCEBOLLADO)","Beef steak with onions, Puerto Rican style (mixture) (Biftec encebollado)" +27420010,"CABBAGE W/ HAM HOCKS (MIXTURE)","Cabbage with ham hocks (mixture)" +27420020,"HAM OR PORK SALAD","Ham or pork salad" +27420040,"FRANKFURTERS OR HOT DOGS & SAUERKRAUT (MIXTURE)","Frankfurters or hot dogs and sauerkraut (mixture)" +27420060,"PORK & VEG (W/ CAR/DK GREEN, NO POTATO), NO SAUCE","Pork and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), no sauce (mixture)" +27420080,"GREENS W/ HAM OR PORK (MIXTURE)","Greens with ham or pork (mixture)" +27420100,"PORK, TOFU & VEG (W/ CAR/DK GRN,NO POTATO), SOY SCE","Pork, tofu, and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), soy-base sauce (mixture)" +27420110,"PORK & VEGETABLES, HAWAIIAN STYLE (MIXTURE)","Pork and vegetables, Hawaiian style (mixture)" +27420120,"PORK & WATERCRESS W/ SOY-BASED SAUCE (MIXTURE)","Pork and watercress with soy-based sauce (mixture)" +27420150,"KUNG PAO PORK","Kung Pao pork" +27420160,"MOO SHU (MU SHI) PORK, W/O PANCAKE","Moo Shu (Mu Shi) Pork, without Chinese pancake" +27420170,"PORK AND ONIONS W/ SOY-BASED SAUCE","Pork and onions with soy-based sauce (mixture)" +27420200,"PORK HASH,HAWAIIAN--PORK,VEG(NO CAR/DK GRN),SOY SCE","Pork hash, Hawaiian style-ground pork, vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), soy-based sauce" +27420250,"HAM & VEG (W/ CARROT/DK GREEN, NO POTATO), NO SAUCE","Ham and vegetables (including carrots, broccoli, and/or dark- green leafy (no potatoes)), no sauce (mixture)" +27420270,"HAM & VEG (NO CARROT/DK GREEN, NO POTATO), NO SAUCE","Ham and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), no sauce (mixture)" +27420350,"PORK & VEG (NO CAR/DK GREEN, NO POTATO), NO SAUCE","Pork and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), no sauce (mixture)" +27420370,"PORK,TOFU & VEG(NO CAR/DK GREEN,NO POTATO)SOY SAUCE","Pork, tofu, and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), soy-based sauce (mixture)" +27420390,"PORK CHOW MEIN OR CHOP SUEY, NO NOODLES","Pork chow mein or chop suey, no noodles" +27420400,"PORK & VEG (INCL CAR/DK GRN, NO POT), TOMATO SAUCE","Pork and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), tomato-based sauce (mixture)" +27420410,"PORK & VEG (NO CAR/DK GRN, NO POT), TOMATO SAUCE","Pork and vegetables (excluding carrots, broccoli, and dark- green leafy (no potatoes)), tomato-based sauce (mixture)" +27420450,"SAUSAGE & VEG (INCL CAR/DK GRN)(NO POT), TOM SAUCE","Sausage and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), tomato-based sauce (mixture)" +27420460,"SAUSAGE & VEG (NO CAR/DK GRN/POT), TOMATO SAUCE","Sausage and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), tomato-based sauce (mixture)" +27420470,"SAUSAGE & PEPPERS, NO SAUCE","Sausage and peppers, no sauce (mixture)" +27420500,"PORK & VEG (INCL CAR/DK GRN), SOY-BASED SAUCE","Pork and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), soy-based sauce (mixture)" +27420510,"PORK & VEG (NO CAR/DK GRN), SOY-BASED SAUCE","Pork and vegetables (excluding carrots, broccoli, and dark- green leafy (no potatoes)), soy-based sauce (mixture)" +27420520,"PORK SHISH KABOB WITH VEGETABLES, EXCLUDING POTATOES","Pork shish kabob with vegetables, excluding potatoes" +27421010,"STUFFED CHRISTOPHINE, P.R. (CHAYOTE RELLENO)","Stuffed christophine, Puerto Rican style (Chayote relleno)" +27422010,"PORK CHOPS STEWED W/VEG, P.R. (CHULETAS A LA JARD.)","Pork chop stewed with vegetables, Puerto Rican style (mixture) (Chuletas a la jardinera)" +27430400,"LAMB STEW W/ VEG (INCL CAR/DK GRN, NO POT), GRAVY","Lamb or mutton stew with vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), gravy" +27430410,"LAMB STEW W/ VEG (NO CAR/DK GRN, NO POT), GRAVY","Lamb or mutton stew with vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), gravy" +27430500,"VEAL GOULASH W/VEG(NO CAR/DK GREEN, NO POT),TOM SCE","Veal goulash with vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), tomato-base sauce" +27430510,"VEAL GOULASH W/VEG(W/ CAR/DK GREEN, NO POT),TOM SCE","Veal goulash with vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), tomato-base sauce" +27430580,"VEAL W/ VEG (INCL CAR/DK GRN), NO POT, CREAM SAUCE","Veal with vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), cream or white sauce" +27430590,"VEAL W/ VEG (NO CAR/DK GRN), NO POT, CREAM SAUCE","Veal with vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), cream or white sauce" +27430610,"LAMB SHISH KABOB W/ VEGETABLES, EXCLUDING POTATOES","Lamb shish kabob with vegetables, excluding potatoes" +27440110,"CHICK/TURK & VEG (W/ CAR/DK GRN, NO POT), NO SAUCE","Chicken or turkey and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), no sauce (mixture)" +27440120,"CHICK/TURK & VEG (NO CAR/DK GRN, NO POT), NO SAUCE","Chicken or turkey and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), no sauce (mixture)" +27440130,"CHICKEN OR TURKEY SHISH KABOB W/VEGETABLES, EXCL POTATOES","Chicken or turkey shish kabob with vegetables, excluding potatoes" +27441120,"CHICKEN CREOLE W/O RICE","Chicken or turkey creole, without rice" +27442110,"CHICKEN/TURKEY & VEG (W/ CAR/DK GREEN,NO POT),GRAVY","Chicken or turkey and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), gravy (mixture)" +27442120,"CHICKEN/TURKEY & VEG(NO CAR/DK GREEN,NO POT), GRAVY","Chicken or turkey and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), gravy (mixture)" +27443110,"CHICKEN A LA KING W/VEG(INCL CAR/DK GRN),WHITE SCE","Chicken or turkey a la king with vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), cream, white, or soup-based sauce" +27443120,"CHICKEN A LA KING W/ VEG(NO CAR/DK GRN),WHITE SAUCE","Chicken or turkey a la king with vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), cream, white, or soup-based sauce" +27443150,"CHICKEN DIVAN","Chicken or turkey divan" +27445110,"CHICKEN & VEG (INCL CAR/DK GRN, NO POT), SOY SAUCE","Chicken or turkey and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), soy-based sauce (mixture)" +27445120,"CHICKEN & VEG (NO CAR/DK GRN, NO POT), SOY SAUCE","Chicken or turkey and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), soy-based sauce (mixture)" +27445125,"CHICKEN & VEG (INCL CAR/DK GRN, NO POT), TOMATO SAUCE","Chicken or turkey and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), tomato-based sauce (mixture)" +27445130,"CHICKEN & VEG (NO CAR/DK GRN, NO POT), TOMATO SAUCE","Chicken or turkey and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), tomato-based sauce (mixture)" +27445150,"GENERAL TSO CHICKEN","General Tso chicken" +27445180,"MOO GOO GAI PAN","Moo Goo Gai Pan" +27445220,"KUNG PAO CHICKEN","Kung pao chicken" +27445250,"ALMOND CHICKEN","Almond chicken" +27446100,"CHICKEN CHOW MEIN/CHOP SUEY, NO NOODLES","Chicken or turkey chow mein or chop suey, no noodles" +27446200,"CHICKEN OR TURKEY SALAD, W/ MAYO","Chicken or turkey salad, made with mayonnaise" +27446205,"CHICKEN/TURKEY SALAD WITH NUTS AND/OR FRUITS","Chicken or turkey salad with nuts and/or fruits" +27446220,"CHICKEN SALAD W/ EGG","Chicken or turkey salad with egg" +27446225,"CHICKEN OR TURKEY SALAD, W/ LT MAYO","Chicken or turkey salad, made with light mayonnaise" +27446230,"CHICKEN OR TURKEY SALAD, W/ MAYO-TYPE DRSG","Chicken or turkey salad, made with mayonnaise-type salad dressing" +27446235,"CHICKEN OR TURKEY SALAD, MADE W/ LT MAYO-TYPE DRSG","Chicken or turkey salad, made with light mayonnaise-type salad dressing" +27446240,"CHICKEN OR TURKEY SALAD, W/CREAMY DRSG","Chicken or turkey salad, made with creamy dressing" +27446245,"CHICKEN OR TURKEY SALAD, W/ LIT CREAMY DRSG","Chicken or turkey salad, made with light creamy dressing" +27446250,"CHICKEN OR TURKEY SALAD, MADE W/ ITALIAN DRESSING","Chicken or turkey salad, made with Italian dressing" +27446255,"CHICKEN OR TURKEY SALAD, MADE W/ LT ITALIAN DRSG","Chicken or turkey salad, made with light Italian dressing" +27446260,"CHICKEN OR TURKEY SALAD, MADE W/ FAT FREE DRSG","Chicken or turkey salad, made with any type of fat free dressing" +27446300,"CHICKEN GARDEN SALAD W/ TOMATO/CARROT, NO DRESSING","Chicken or turkey garden salad (chicken and/or turkey, tomato and/or carrots, other vegetables), no dressing" +27446310,"CHICKEN GARDEN SALAD W/VEG, NO CAR/TOM, NO DRESSING","Chicken or turkey garden salad (chicken and/or turkey, other vegetables excluding tomato and carrots), no dressing" +27446315,"CHICKEN GARDEN SALAD W/ BACON,CHEESE,TOMATO/CARROT,NO DRSG","Chicken or turkey garden salad with bacon and cheese (chicken and/or turkey, bacon, cheese, lettuce and/or greens, tomato and/or carrots, other vegetables), no dressing" +27446320,"CHICKN(BRD,FRD)GARDEN SALAD W/ BACON,CHEESE,TOM/CAR,NO DRSG","Chicken or turkey (breaded, fried) garden salad with bacon and cheese (chicken and/or turkey, bacon, cheese, lettuce and/or greens, tomato and/or carrots, other vegetables), no dressing" +27446330,"CHICKN GARDEN SALAD W/ CHEESE,TOM/CAR,NO DRSG","Chicken or turkey garden salad with cheese (chicken and/or turkey, cheese, lettuce and/or greens, tomato and/or carrots, other vegetables), no dressing" +27446332,"CHICKN(BRD,FRD)GARDEN SALAD W/ CHEESE,TOM/CAR,NO DRSG","Chicken or turkey (breaded, fried) garden salad with cheese (chicken and/or turkey, cheese, lettuce and/or greens, tomato and/or carrots, other vegetables), no dressing" +27446350,"ASIAN CHICKEN/TURKEY GARDEN SALAD, NO DRESSING","Asian chicken or turkey garden salad (chicken and/or turkey, lettuce, fruit, nuts), no dressing" +27446355,"ASIAN CHICKEN GARDEN SALAD W/CRISPY NOODLES , NO DRESSING","Asian chicken or turkey garden salad with crispy noodles (chicken and/or turkey, lettuce, fruit, nuts, crispy noodles), no dressing" +27446360,"CHICKEN/TURKEY CAESAR GARDEN SALAD, NO DRESSING","Chicken or turkey caesar garden salad (chicken and/or turkey, lettuce, tomato, cheese), no dressing" +27446362,"CHICKEN/TURKEY (BREADED, FRIED) CAESAR GARDEN SALAD, NO DRSG","Chicken or turkey (breaded, fried) caesar garden salad (chicken and/or turkey, lettuce, tomatoes, cheese), no dressing" +27446400,"CHICKEN & VEG (INCL CAR/DK GRN)(NO POT), CHEESE SCE","Chicken or turkey and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), cheese sauce (mixture)" +27446410,"CHICKEN & VEG (NO CAR/DK GRN)(NO POT), CHEESE SAUCE","Chicken or turkey and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), cheese sauce (mixture)" +27448020,"CHICKEN FRICASSEE W/SAUCE,NO POT,PUERTO RICAN STYLE","Chicken or turkey fricassee, with sauce, no potatoes, Puerto Rican style (potatoes reported separately)" +27448030,"CHICKEN FRICASSEE, NO SCE OR POT,PUERTO RICAN STYLE","Chicken or turkey fricassee, no sauce, no potatoes, Puerto Rican style (sauce and potatoes reported separately)" +27450010,"CRAB SALAD","Crab salad" +27450020,"LOBSTER SALAD","Lobster salad" +27450030,"SALMON SALAD","Salmon salad" +27450040,"SHRIMP CHOW MEIN OR CHOP SUEY, NO NOODLES","Shrimp chow mein or chop suey, no noodles" +27450060,"TUNA SALAD, W/MAYO","Tuna salad, made with mayonnaise" +27450061,"TUNA SALAD, W/LT MAYO","Tuna salad, made with light mayonnaise" +27450062,"TUNA SALAD, W/ MAYO-TYPE DRESSING","Tuna salad, made with mayonnaise-type salad dressing" +27450063,"TUNA SALAD, W/ LT MAYO-TYPE DRSG","Tuna salad, made with light mayonnaise-type salad dressing" +27450064,"TUNA SALAD, W/ CREAMY DRSG","Tuna salad, made with creamy dressing" +27450065,"TUNA SALAD, W/ LT CREAMY DRSG","Tuna salad, made with light creamy dressing" +27450066,"TUNA SALAD, W/ ITALIAN DRSG","Tuna salad, made with Italian dressing" +27450067,"TUNA SALAD, W/ LT ITALIAN DRSG","Tuna salad, made with light Italian dressing" +27450068,"TUNA SALAD, W/ ANY TYPE OF FAT FREE DRSG","Tuna salad, made with any type of fat free dressing" +27450070,"SHRIMP SALAD","Shrimp salad" +27450080,"SEAFOOD SALAD","Seafood salad" +27450090,"TUNA SALAD W/ CHEESE","Tuna salad with cheese" +27450100,"TUNA SALAD W/ EGG","Tuna salad with egg" +27450110,"SHRIMP GARDEN SALAD W/ TOMATO/CARROT, NO DRESSING","Shrimp garden salad (shrimp, lettuce, eggs, tomato and/or carrots, other vegetables), no dressing" +27450120,"SHRIMP GARDEN SALAD (NO TOMATO/CARROT, NO DRESSING)","Shrimp garden salad (shrimp, lettuce, eggs, vegetables excluding tomato and carrots), no dressing" +27450130,"CRAB SALAD MADE W/ IMITATION CRAB","Crab salad made with imitation crab" +27450150,"FISH, TOFU, & VEGETABLES, TEMPURA, HAWAIIAN","Fish, tofu, and vegetables, tempura, Hawaiian style (mixture)" +27450180,"SEAFOOD GARDEN SALAD W/ VEG(NO TOM/CAR), NO DRESSING","Seafood garden salad with seafood, lettuce, vegetables excluding tomato and carrots, no dressing" +27450190,"SEAFOOD GARDEN SALAD W/ TOM/CAR, NO DRESSING","Seafood garden salad with seafood, lettuce, tomato and/or carrots, other vegetables, no dressing" +27450200,"SEAFOOD GARDEN SALAD W/ EGG, VEG, (NO CAR/TOM) NO DRESSING","Seafood garden salad with seafood, lettuce, eggs, vegetables excluding tomato and carrots, no dressing" +27450210,"SEAFOOD GARDEN SALAD W/EGG, TOM/CAR, NO DRESSING","Seafood garden salad with seafood, lettuce, eggs, tomato and/or carrots, other vegetables, no dressing" +27450250,"OYSTERS ROCKEFELLER","Oysters Rockefeller" +27450310,"LOMI SALMON","Lomi salmon" +27450400,"SHRIMP & VEG (W/ CAR/DK GREEN, NO POT), NO SAUCE","Shrimp and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), no sauce (mixture)" +27450405,"SHRIMP & VEG (NO CARROT/DK GREEN, NO POT), NO SAUCE","Shrimp and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), no sauce (mixture)" +27450410,"SHRIMP & VEG (W/ CAR/DK GREEN, NO POT), SOY SAUCE","Shrimp and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), soy-based sauce (mixture)" +27450420,"SHRIMP & VEG (NO CARROT/DK GREEN, NO POT),SOY SAUCE","Shrimp and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), soy-based sauce (mixture)" +27450430,"SHRIMP SHISH KABOB WITH VEGETABLES, EXCLUDING POTATOES","Shrimp shish kabob with vegetables, excluding potatoes" +27450450,"SHRIMP CREOLE, NO RICE","Shrimp creole, no rice" +27450470,"KUNG PAO SHRIMP","Kung Pao shrimp" +27450510,"TUNA CASSEROLE W/ VEG & SOUP, NO NOODLES","Tuna casserole with vegetables and (mushroom) soup, no noodles" +27450600,"SHELLFISH MIXTURE & VEG (INCL CAR/DK GRN), SOY SCE","Shellfish mixture and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), soy-based sauce" +27450610,"SHELLFISH MIXTURE & VEG (NO CAR/DK GRN), SOY SAUCE","Shellfish mixture and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), soy-based sauce" +27450650,"SHELLFISH & VEG(INCL CAR/DK GRN)(NO POT),SOUP SAUCE","Shellfish mixture and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), (mushroom) soup (mixture)" +27450660,"SHELLFISH & VEG(NO CAR/DK GRN/POT),SOUP-BASED SAUCE","Shellfish mixture and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), (mushroom) soup (mixture)" +27450700,"FISH & VEG (INCL CAR/DK GRN, NO POT), TOMATO SAUCE","Fish and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), tomato-based sauce (mixture)" +27450710,"FISH & VEG (NO CAR/DK GRN, NO POT), TOMATO SAUCE","Fish and vegetables (excluding carrots, broccoli, and dark- green leafy (no potatoes)), tomato-based sauce (mixture)" +27450740,"FISH & VEGETABLES (W/ CAR/DK GRN), SOY-BASED SAUCE","Fish and vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes)), soy-based sauce (mixture)" +27450750,"FISH & VEGETABLES (NO CAR/DK GRN), SOY-BASED SAUCE","Fish and vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes)), soy-based sauce (mixture)" +27450760,"FISH SHISH KABOB WITH VEGETABLES, EXCLUDING POTATOES","Fish shish kabob with vegetables, excluding potatoes" +27451010,"FRIED FISH W/ SAUCE, P.R. (PESCADO FRITO CON MOJO)","Fried fish with sauce, Puerto Rican style (Pescado frito con mojo)" +27451030,"LOBSTER W/ SAUCE, P.R. (LANGOSTA A LA CRIOLLA)","Lobster with sauce, Puerto Rican style (Langosta a la criolla)" +27451060,"OCTOPUS SALAD, P.R. (ENSALADA DE PULPO)","Octopus salad, Puerto Rican style (Ensalada de pulpo)" +27451070,"CODFISH SALAD, P.R. (SERENATA)","Codfish salad, Puerto Rican style (Serenata)" +27460010,"CHOW MEIN, NS AS TO TYPE OF MEAT, NO NOODLES","Chow mein or chop suey, NS as to type of meat, no noodles" +27460100,"LAU LAU(PORK & FISH WRAPPED IN TARO/SPINACH LEAVES)","Lau lau (pork and fish wrapped in taro or spinach leaves)" +27460490,"JULIENNE SALAD (MEAT, CHEESE, EGG, VEG) NO DRESSING","Julienne salad (meat, cheese, eggs, vegetables), no dressing" +27460510,"ANTIPASTO W/ HAM, FISH, CHEESE, VEGETABLES","Antipasto with ham, fish, cheese, vegetables" +27460710,"LIVERS, CHICKEN, CHOPPED, W/ EGGS & ONION (MIXTURE)","Livers, chicken, chopped, with eggs and onion (mixture)" +27460750,"LIVER, BEEF OR CALVES, & ONIONS","Liver, beef or calves, and onions" +27461010,"STEWED SEASONED GROUND BEEF, P.R.","Stewed seasoned ground beef, Puerto Rican style (Picadillo para relleno)" +27462000,"STEWED CHITTERLINGS, P.R. (CUAJO GUISADO)","Stewed chitterlings, Puerto Rican style (cuajo guisado)" +27463000,"STEWED GIZZARDS, P.R. (MOLLEJITAS GUISADAS)","Stewed gizzards, Puerto Rican style (Mollejitas guisadas)" +27464000,"GUMBO, NO RICE (NEW ORLEANS TYPE W/MEAT, TOM, OKRA)","Gumbo, no rice (New Orleans type with shellfish, pork, and/or poultry, tomatoes, okra)" +27500050,"SANDWICH, NFS","Sandwich, NFS" +27500100,"MEAT SANDWICH, NFS","Meat sandwich, NFS" +27500200,"WRAP SANDWICH, W/ MEAT, POULTRY OR FISH, VEGETABLES & CHEESE","Wrap sandwich, filled with meat, poultry, or fish, vegetables, and cheese" +27500300,"WRAP SANDWICH, W/ MEAT, POULTRY OR FISH & VEGETABLES","Wrap sandwich, filled with meat, poultry, or fish, and vegetables" +27510000,"BEEF SANDWICH, NFS","Beef sandwich, NFS" +27510110,"BEEF BARBECUE SANDWICH OR SLOPPY JOE, ON BUN","Beef barbecue sandwich or Sloppy Joe, on bun" +27510130,"BEEF BARBECUE SUBMARINE SANDWICH, ON BUN","Beef barbecue submarine sandwich, on bun" +27510210,"CHEESEBURGER, PLAIN, ON BUN","Cheeseburger, plain, on bun" +27510220,"CHEESEBURGER, W/ MAYO, ON BUN","Cheeseburger, with mayonnaise or salad dressing, on bun" +27510230,"CHEESEBURGER, W/ MAYO & TOMATO/CATSUP, ON BUN","Cheeseburger, with mayonnaise or salad dressing, and tomato and/or catsup, on bun" +27510240,"CHEESEBURGER, 1/4 LB MEAT, PLAIN, ON BUN","Cheeseburger, 1/4 lb meat, plain, on bun" +27510250,"CHEESEBURGER, 1/4 LB MEAT, W/ MAYO, ON BUN","Cheeseburger, 1/4 lb meat, with mayonnaise or salad dressing, on bun" +27510260,"CHEESEBURGER, 1/4 LB, W/ MUSHROOM SAUCE, ON BUN","Cheeseburger, 1/4 lb meat, with mushrooms in sauce, on bun" +27510265,"DOUBLE CHEESEBURGER, PLAIN, ON MINIATURE BUN","Double cheeseburger, (2 patties, 1 oz each), plain, on miniature bun" +27510270,"DOUBLE CHEESEBURGER, PLAIN, ON BUN","Double cheeseburger (2 patties), plain, on bun" +27510280,"DOUBLE CHEESEBURGER, W/ MAYO, ON BUN","Double cheeseburger (2 patties), with mayonnaise or salad dressing, on bun" +27510290,"DOUBLE CHEESEBURGER, PLAIN, ON DOUBLE-DECKER BUN","Double cheeseburger (2 patties), plain, on double-decker bun" +27510300,"DOUBLE CHEESEBURGER, W/MAYO, ON DOUBLE-DECKER BUN","Double cheeseburger (2 patties), with mayonnaise or salad dressing, on double-decker bun" +27510310,"CHEESEBURGER W/ TOMATO & OR CATSUP, ON BUN","Cheeseburger with tomato and/or catsup, on bun" +27510311,"CHEESEBURGER, 1 OZ MEAT, PLAIN, ON MINI BUN","Cheeseburger, 1 oz meat, plain, on miniature bun" +27510320,"CHEESEBURGER, 1/4 LB MEAT,W/ TOMATO/CATSUP, BUN","Cheeseburger, 1/4 lb meat, with tomato and/or catsup, on bun" +27510330,"DOUBLE CHEESEBURGER W/TOMATO & OR CATSUP, ON BUN","Double cheeseburger (2 patties), with tomato and/or catsup, on bun" +27510340,"DOUBLE CHEESEBURGER, W/ MAYO & TOMATO, ON BUN","Double cheeseburger (2 patties), with mayonnaise or salad dressing and tomatoes and/or catsup, on bun" +27510350,"CHEESEBURGER, 1/4 LB MEAT, W/ MAYO & TOMATO/CATSUP, ON BUN","Cheeseburger, 1/4 lb meat, with mayonnaise or salad dressing, and tomato and/or catsup, on bun" +27510355,"CHEESEBURGER, 1/3 LB MEAT, W/MAYO, TOMATO, ON BUN","Cheeseburger, 1/3 lb meat, with mayonnaise or salad dressing, tomato and/or catsup on bun" +27510359,"CHEESEBURGER, 1/3 LB MEAT, W/MAYO, MUSHROOMS,ON BUN","Cheeseburger, 1/3 lb meat, with mayonnaise or salad dressing, and mushrooms, on bun" +27510360,"BACON CHEESEBURGER, W/MAYO/SALAD DRSG, TOMATO/CATSUP,ON BUN","Bacon cheeseburger, with mayonnaise or salad dressing, tomato and/or catsup, on bun" +27510370,"DOUBLE CHEESEBURGER W/ MAYONNAISE, ON BUN","Double cheeseburger (2 patties, 1/4 lb meat each), with mayonnaise or salad dressing, on bun" +27510375,"DOUBLE CHEESEBURGER(2 PATTIES,1/4 LB EA) W/TOMATO/CATSUP/BUN","Double cheeseburger (2 patties, 1/4 lb meat each), with tomato and/or catsup, on bun" +27510380,"TRIPLE CHEESEBURGER W/ MAYO, TOMATO, ON BUN","Triple cheeseburger (3 patties, 1/4 lb meat each), with mayonnaise or salad dressing and tomatoes and/or catsup, on bun" +27510385,"DOUBLE BACON CHEESEBURGER (2 PATTIES), W/ TOMATO/CATSUP","Double bacon cheeseburger (2 patties), with tomato and/or catsup, on bun" +27510390,"DOUBLE BACON CHEESEBURGER, ON BUN","Double bacon cheeseburger (2 patties, 1/4 lb meat each), on bun" +27510400,"BACON CHEESEBURGER, 1/4 LB MEAT, W/ TOMATO, ON BUN","Bacon cheeseburger, 1/4 lb meat, with tomato and/or catsup, on bun" +27510410,"CHILIBURGER, ON BUN (INCLUDE HAMBURGER W/ CHILI)","Chiliburger, on bun" +27510420,"TACO BURGER, ON BUN (INCL CHILIBURGER W/ CHEESE)","Taco burger, on bun" +27510425,"DOUBLE BACON CHEESEBURGER (2 PATTIES,1/4 LB EA), W/ MAYO/BUN","Double bacon cheeseburger (2 patties, 1/4 lb meat each), with mayonnaise or salad dressing, on bun" +27510430,"DOUBLE BACON CHEESEBURGER, W/MAYO/DRSG,TOMATO/CATSUP,ON BUN","Double bacon cheeseburger (2 patties, 1/4 lb meat each), with mayonnaise or salad dressing, and tomato and/or catsup, on bun" +27510435,"DOUBLE BACON CHEESEBURGER (2 PATTIES,1/3 LB EA) W/ MAYO/ BUN","Double bacon cheeseburger (2 patties,1/3 lb meat each), with mayonnaise or salad dressing, on bun" +27510440,"BACON CHEESEBURGER, 1/4 LB, W/MAYO/DRSG,TOMATO/CATSUP,ON BUN","Bacon cheeseburger, 1/4 lb meat, with mayonnaise or salad dressing, and tomato and/or catsup, on bun" +27510445,"BACON CHEESEBURGER, 1/3 LB MEAT, W/TOMATO +/OR CATSUP,","Bacon cheeseburger, 1/3 lb meat, with tomato and/or catsup, on bun" +27510450,"CHEESEBURGER, 1/4 LB MEAT, W/ HAM, ON BUN","Cheeseburger, 1/4 lb meat, with ham, on bun" +27510480,"CHEESEBURGER, W/ ONIONS, ON RYE BUN","Cheeseburger (hamburger with cheese sauce), 1/4 lb meat, with grilled onions, on rye bun" +27510500,"HAMBURGER, PLAIN, ON BUN","Hamburger, plain, on bun" +27510510,"HAMBURGER, W/ TOMATO & OR CATSUP, ON BUN","Hamburger, with tomato and/or catsup, on bun" +27510520,"HAMBURGER, W/ MAYO & TOMATO/CATSUP, ON BUN","Hamburger, with mayonnaise or salad dressing, and tomato and/or catsup, on bun" +27510530,"HAMBURGER, 1/4 LB MEAT, PLAIN, ON BUN","Hamburger, 1/4 lb meat, plain, on bun" +27510540,"DOUBLE HAMBURGER W/TOMATO & OR CATSUP, ON BUN","Double hamburger (2 patties), with tomato and/or catsup, on bun" +27510550,"DOUBLE HAMBURGER W/ MAYO & TOMATO, DBL-DECKER BUN","Double hamburger (2 patties), with mayonnaise or salad dressing and tomatoes, on double-decker bun" +27510560,"HAMBURGER, 1/4 LB MEAT W/ MAYO & TOMATO/CATSUP, ON BUN","Hamburger, 1/4 lb meat, with mayonnaise or salad dressing, and tomato and/or catsup, on bun" +27510570,"HAMBURGER, 2.5 OZ MEAT, W/ MAYO & TOMATO, ON BUN","Hamburger, 2-1/2 oz meat, with mayonnaise or salad dressing and tomatoes, on bun" +27510590,"HAMBURGER, W/ MAYO, ON BUN","Hamburger, with mayonnaise or salad dressing, on bun" +27510600,"HAMBURGER, 1 OZ MEAT,PLAIN, ON MINIATURE BUN","Hamburger, 1 oz meat, plain, on miniature bun" +27510610,"HAMBURGER, 1 OZ MEAT, TOMATO, ON MINIATURE BUN","Hamburger, 1 oz meat, with tomato and/or catsup, on miniature bun" +27510620,"HAMBURGER, 1/4 LB MEAT, W/ TOMATO & OR CATSUP, BUN","Hamburger, 1/4 lb meat, with tomato and/or catsup, on bun" +27510630,"HAMBURGER, 1/4 LB MEAT, W/ MAYO, ON BUN","Hamburger, 1/4 lb meat, with mayonnaise or salad dressing, on bun" +27510650,"DOUBLE HAMBURGER, PLAIN, ON BUN","Double hamburger (2 patties), plain, on bun" +27510660,"DOUBLE HAMBURGER, W/ MAYO, ON BUN","Double hamburger (2 patties), with mayonnaise or salad dressing, on bun" +27510670,"DOUBLE HAMBURGER, W/ MAYO & TOMATO, ON BUN","Double hamburger (2 patties), with mayonnaise or salad dressing and tomatoes, on bun" +27510680,"DOUBLE HAMBURGER (1/2 LB MEAT), W/ TOM/CATSUP, BUN","Double hamburger (2 patties, 1/4 lb meat each), with tomato and/or catsup, on bun" +27510690,"DOUBLE HAMBURGER,1/2 LB MEAT,W/MAYO&TOM/CATSUP,BUN","Double hamburger (2 patties, 1/4 lb meat each), with mayonnaise or salad dressing and tomatoes and/or catsup, on double-decker bun" +27510700,"MEATBALL & SPAG SAU SUB SAND","Meatball and spaghetti sauce submarine sandwich" +27510710,"PIZZABURGER (HAMBURGER, CHEESE, SAUCE), ON 1/2 BUN","Pizzaburger (hamburger, cheese, sauce) on 1/2 bun" +27510720,"PIZZABURGER (HAMBURGER, CHEESE, SAUCE), WHOLE BUN","Pizzaburger (hamburger, cheese, sauce) on whole bun" +27510910,"CORNED BEEF SANDWICH","Corned beef sandwich" +27510950,"REUBEN(CORN BEEF W/ SAUERKRAUT & CHEESE) W/ SPREAD","Reuben sandwich (corned beef sandwich with sauerkraut and cheese), with spread" +27511010,"PASTRAMI SANDWICH","Pastrami sandwich" +27513010,"ROAST BEEF SANDWICH","Roast beef sandwich" +27513020,"ROAST BEEF SANDWICH, W/ GRAVY","Roast beef sandwich, with gravy" +27513040,"ROAST BEEF SUB SAND, W/ LETT, TOM, SPRD","Roast beef submarine sandwich, with lettuce, tomato and spread" +27513041,"ROAST BEEF SUB SAND, W/ CHEESE, LETTUCE, TOMATO, SPRD","Roast beef submarine sandwich, with cheese, lettuce, tomato and spread" +27513050,"ROAST BEEF SANDWICH W/ CHEESE","Roast beef sandwich with cheese" +27513060,"ROAST BEEF SANDWICH W/ BACON & CHEESE SAUCE","Roast beef sandwich with bacon and cheese sauce" +27513070,"ROAST BEEF SUBMARINE SANDWICH, ON ROLL, AU JUS","Roast beef submarine sandwich, on roll, au jus" +27515000,"STEAK SUBMARINE SANDWICH WITH LETTUCE AND TOMATO","Steak submarine sandwich with lettuce and tomato" +27515010,"STEAK SANDWICH, PLAIN, ON ROLL","Steak sandwich, plain, on roll" +27515020,"STEAK , CHEESE SUB SAND, W/ LETT, TOM","Steak and cheese submarine sandwich, with lettuce and tomato" +27515030,"STEAK & CHEESE SANDWICH, PLAIN, ON ROLL","Steak and cheese sandwich, plain, on roll" +27515040,"STEAK & CHEESE SUBMARINE SANDWICH, PLAIN, ON ROLL","Steak and cheese submarine sandwich, plain, on roll" +27515050,"FAJITA-STYLE BEEF SAND W/ CHEESE,PITA BRD,W/LET+TOM","Fajita-style beef sandwich with cheese, on pita bread, with lettuce and tomato" +27515070,"STEAK & CHEESE SUB, FRIED PEP & ONIONS, ON ROLL","Steak and cheese submarine sandwich, with fried peppers and onions, on roll" +27515080,"STEAK SANDWICH, PLAIN, ON BISCUIT","Steak sandwich, plain, on biscuit" +27516010,"GYRO SANDWICH W/ TOMATO & SPREAD","Gyro sandwich (pita bread, beef, lamb, onion, condiments), with tomato and spread" +27517000,"WRAP SANDWICH FILLED WITH BEEF PATTY, CHEESE, LETTUCE,SPREAD","Wrap sandwich filled with beef patty, cheese and spread and/or sauce" +27517010,"WRAP SANDWICH FILLED WITH BEEF PATTY, CHEESE, TOMATO, SPREAD","Wrap sandwich filled with beef patty, cheese, tomato and/or catsup, and spread and/or sauce" +27518000,"WRAP SAND W/BEEF PATTY,BAC, CHS, TOM,SPREAD","Wrap sandwich filled with beef patty, bacon, cheese, tomato and/or catsup, and spread and/or sauce" +27520110,"BACON SANDWICH W/ SPREAD","Bacon sandwich, with spread" +27520120,"BACON & CHEESE SANDWICH, W/ SPREAD","Bacon and cheese sandwich, with spread" +27520130,"BACON, CHICK, & TOM CLUB SANDWICH W/ LETTUCE+SPREAD","Bacon, chicken, and tomato club sandwich, with lettuce and spread" +27520135,"BACON, CHICKN & TOMATO CLUB SANDWICH W/CHEESE, LETTUCE &SPRD","Bacon, chicken, and tomato club sandwich, with cheese, lettuce and spread" +27520140,"BACON & EGG SANDWICH","Bacon and egg sandwich" +27520150,"BACON, LETTUCE, & TOMATO SANDWICH W/ SPREAD","Bacon, lettuce, and tomato sandwich with spread" +27520160,"BACON,CHICK,&TOMATO CLUB SANDWICH,MULTIGR W/ SPREAD","Bacon, chicken, and tomato club sandwich, on multigrain roll with lettuce and spread" +27520165,"BACON, CHICK FILLET (BRD, FRIED),& TOM CLUB W/LETTUCE & SPRD","Bacon, chicken fillet (breaded, fried), and tomato club with lettuce and spread" +27520166,"BACON, CHICK FILLET (BRD,FRIED),&TOM CLUB W/CHS,LETTUCE&SPRD","Bacon, chicken fillet (breaded, fried), and tomato club sandwich with cheese, lettuce and spread" +27520170,"BACON ON BISCUIT","Bacon on biscuit" +27520250,"HAM ON BISCUIT","Ham on biscuit" +27520300,"HAM SANDWICH W/ SPREAD","Ham sandwich, with spread" +27520310,"HAM SANDWICH W/ LETTUCE & SPREAD","Ham sandwich with lettuce and spread" +27520320,"HAM & CHEESE SANDWICH, W/ LETTUCE & SPREAD","Ham and cheese sandwich, with lettuce and spread" +27520330,"HAM & EGG SANDWICH","Ham and egg sandwich" +27520340,"HAM SALAD SANDWICH","Ham salad sandwich" +27520350,"HAM & CHEESE SANDWICH W/ SPREAD, GRILLED","Ham and cheese sandwich, with spread, grilled" +27520360,"HAM & CHEESE SANDWICH ON BUN W/ LETTUCE & SPREAD","Ham and cheese sandwich, on bun, with lettuce and spread" +27520370,"HOT HAM & CHEESE SANDWICH, ON BUN","Hot ham and cheese sandwich, on bun" +27520380,"HAM & CHEESE ON ENGLISH MUFFIN","Ham and cheese on English muffin" +27520390,"HAM & CHEESE SUB, W/ LETTUCE, TOMATO & SPREAD","Ham and cheese submarine sandwich, with lettuce, tomato and spread" +27520410,"CUBAN SAND, P.R STYLE(SANDWICH CUBANA), W/ SPREAD","Cuban sandwich, (Sandwich cubano), with spread" +27520420,"MIDNIGHT SAND,P.R. STYLE (MEDIA NOCHE), W/ SPREAD","Midnight sandwich, (Media noche), with spread" +27520500,"PORK SANDWICH, ON WHITE ROLL, W/ ONIONS, PICKLES & BBQ SAUCE","Pork sandwich, on white roll, with onions, dill pickles and barbecue sauce" +27520510,"PORK BARBECUE SANDWICH OR SLOPPY JOE, ON BUN","Pork barbecue sandwich or Sloppy Joe, on bun" +27520520,"PORK SANDWICH","Pork sandwich" +27520530,"PORK SANDWICH W/ GRAVY","Pork sandwich, with gravy" +27520540,"HAM & TOMATO CLUB SAND, W/ SPREAD","Ham and tomato club sandwich, with lettuce and spread" +27540110,"CHICKEN SANDWICH, W/ SPREAD","Chicken sandwich, with spread" +27540111,"CHICKEN SANDWICH, W/ CHEESE & SPREAD","Chicken sandwich, with cheese and spread" +27540120,"CHICKEN SALAD OR CHICKEN SPREAD SANDWICH","Chicken salad or chicken spread sandwich" +27540130,"CHICKEN BARBECUE SANDWICH","Chicken barbecue sandwich" +27540140,"CHICKEN FILLET (BREADED, FRIED) SANDWICH","Chicken fillet (breaded, fried) sandwich" +27540145,"CHICKEN FILLET (BREADED, FRIED) SANDWICH ON BISCUIT","Chicken fillet (breaded, fried) sandwich on biscuit" +27540150,"CHICKEN FILLET(BR FRIED) SAND W/ LET, TOM & SPREAD","Chicken fillet (breaded, fried) sandwich with lettuce, tomato and spread" +27540151,"CHICKEN FILLET(BRD, FRIED) SAND W/ CHEESE, LETT, TOM & SPRD","Chicken fillet (breaded, fried) sandwich with cheese, lettuce, tomato and spread" +27540170,"CHICKEN PATTY SANDWICH, MINI, W/ SPREAD","Chicken patty sandwich, miniature, with spread" +27540180,"CHICKEN PATTY SANDWICH ON BISCUIT","Chicken patty sandwich or biscuit" +27540190,"CHICKEN PATTY SANDWICH W/ LETTUCE & SPREAD","Chicken patty sandwich, with lettuce and spread" +27540200,"FAJITA-STYLE CHICKEN SANDWICH W/ CHEESE, LETTUC,TOM","Fajita-style chicken sandwich with cheese, on pita bread, with lettuce and tomato" +27540210,"WRAP SNDWCH W/CHICK STRIPS(BREADED,FRIED),CHS,LETTUCE & SPRD","Wrap sandwich filled with chicken strips (breaded, fried), cheese, lettuce, and spread" +27540230,"CHICKEN PATTY SAND W/ CHEES,WHEAT BUN,LET,TOM, SPRE","Chicken patty sandwich with cheese, on wheat bun, with lettuce, tomato and spread" +27540235,"CHICKEN FILLET, BROILED, SANDWICH WITH LETTUCE, TOMATO, AND","Chicken fillet, broiled, sandwich with lettuce, tomato, and spread" +27540240,"CHICKEN FILLET,(BROIL) SAND W/ LET, TOM, & SPREAD","Chicken fillet, (broiled), sandwich, on whole wheat roll, with lettuce, tomato and spread" +27540250,"CHICK FILLET,BROIL,SANDWICH,W/CHEESE,WW ROLL","Chicken fillet, broiled, sandwich with cheese, on whole wheat roll, with lettuce, tomato and non-mayonnaise type spread" +27540260,"CHICK FILLET, BROILED,SANDWICH,ON OAT BRAN BUN(LTS)","Chicken fillet, broiled, sandwich, on oat bran bun, with lettuce, tomato, spread" +27540270,"CHICKEN FILLET,SANDWICH,W/LETT,TOM,&NON-MAYO SPREAD","Chicken fillet, broiled, sandwich, with lettuce, tomato, and non-mayonnaise type spread" +27540280,"CHICKEN FILLET,BROILED,SANDWICH,W/CHEESE,ON BUN","Chicken fillet, broiled, sandwich with cheese, on bun, with lettuce, tomato and spread" +27540290,"CHICKEN SUB SANDWICH, W/ LETTUCE, TOMATO & SPREAD","Chicken submarine sandwich, with lettuce, tomato, and spread" +27540291,"CHICKEN SUB SANDWICH, W/ CHEESE, LETTUCE, TOMATO & SPREAD","Chicken submarine sandwich, with cheese, lettuce, tomato, and spread" +27540300,"WRAP SNDWCH W/CHICK STRIPS (BROILED),CHS,LETTUCE & SPRD","Wrap sandwich filled with chicken strips (broiled), cheese, lettuce, and spread" +27540310,"TURKEY SANDWICH W/ SPREAD","Turkey sandwich, with spread" +27540320,"TURKEY SALAD SANDWICH","Turkey salad or turkey spread sandwich" +27540330,"TURKEY SANDWICH W/ GRAVY","Turkey sandwich, with gravy" +27540350,"TURKEY SUB SAND, W/ CHEESE, LETT, TOM, SPRD","Turkey submarine sandwich, with cheese, lettuce, tomato and spread" +27541000,"TURKEY, HAM & ROAST BEEF CLUB SANDWCH W/LETT,TOM,SPRD","Turkey, ham, and roast beef club sandwich, with lettuce, tomato and spread" +27541001,"TURKEY, HAM & ROAST BEEF CLUB SANDWCH W/CHEESE,LETT,TOM,SPRD","Turkey, ham, and roast beef club sandwich with cheese, lettuce, tomato, and spread" +27550000,"FISH SANDWICH, ON BUN, W/ SPREAD","Fish sandwich, on bun, with spread" +27550100,"FISH SANDWICH, ON BUN, W/ CHEESE AND SPREAD","Fish sandwich, on bun, with cheese and spread" +27550110,"CRAB CAKE SANDWICH, ON BUN","Crab cake sandwich, on bun" +27550510,"SARDINE SANDWICH, W/ LETTUCE & SPREAD","Sardine sandwich, with lettuce and spread" +27550710,"TUNA SALAD SANDWICH W/ LETTUCE","Tuna salad sandwich, with lettuce" +27550720,"TUNA SALAD SANDWICH","Tuna salad sandwich" +27550730,"TUNA MELT SANDWICH","Tuna melt sandwich" +27550750,"TUNA SALSUB SAND, W/ LETT & TOMATO","Tuna salad submarine sandwich, with lettuce and tomato" +27550751,"TUNA SALAD SUB SANDWCH, W/ CHEESE, LETTUCE & TOMATO","Tuna salad submarine sandwich, with cheese, lettuce and tomato" +27560000,"LUNCHEON MEAT SANDWICH, NFS, W/ SPREAD","Luncheon meat sandwich, NFS, with spread" +27560110,"BOLOGNA SANDWICH, W/ SPREAD","Bologna sandwich, with spread" +27560120,"BOLOGNA & CHEESE SANDWICH W/ SPREAD","Bologna and cheese sandwich, with spread" +27560300,"CORN DOG (FRANKFURTER/HOT DOG W/ CORNBREAD COATING)","Corn dog (frankfurter or hot dog with cornbread coating)" +27560350,"PIG IN A BLANKET (FRANKFURTER OR HOT DOG WRAPPED IN DOUGH)","Pig in a blanket (frankfurter or hot dog wrapped in dough)" +27560410,"PUERTO RICAN SANDWICH, P.R. (SANDWICH CRIOLLO)","Puerto Rican sandwich (Sandwich criollo)" +27560500,"PEPPERONI, SALAMI SUBM SANDWICH, WITH LETTUCE, TOM, SPREAD","Pepperoni and salami submarine sandwich, with lettuce, tomato, and spread" +27560510,"SALAMI SANDWICH W/ SPREAD","Salami sandwich, with spread" +27560650,"SAUSAGE ON BISCUIT(INCL JIMMY DEAN SAUSAGE BISCUIT)","Sausage on biscuit" +27560660,"SAUSAGE GRIDDLE CAKE SANDWICH","Sausage griddle cake sandwich" +27560670,"SAUSAGE & CHEESE ON ENGLISH MUFFIN","Sausage and cheese on English muffin" +27560705,"SAUSAGE BALLS (MADE W/ BISCUIT MIX & CHEESE)","Sausage balls (made with biscuit mix and cheese)" +27560710,"SAUSAGE SANDWICH","Sausage sandwich" +27560720,"SAUSAGE & SPAGH SAUCE SANDWICH","Sausage and spaghetti sauce sandwich" +27560910,"COLD CUT SUB SANDWICH, W/ CHEESE, LETTUCE, TOMATO, SPRD","Cold cut submarine sandwich, with cheese, lettuce, tomato, and spread" +27563010,"MEAT SPREAD OR POTTED MEAT SANDWICH","Meat spread or potted meat sandwich" +27564000,"FRANKFURTER OR HOT DOG, NFS, PLAIN, ON BUN","Frankfurter or hot dog sandwich, NFS, plain, on bun" +27564010,"FRANKFURTER OR HOT DOG, NFS, PLAIN, ON WHITE BREAD","Frankfurter or hot dog sandwich, NFS, plain, on white bread" +27564020,"FRANKFURTER OR HOT DOG, NFS, PLAIN, ON WHEAT BREAD","Frankfurter or hot dog sandwich, NFS, plain, on wheat bread" +27564030,"FRANKFURTER/HOT DOG, NFS, PLAIN, WHL WHT BREAD, NS TO 100%","Frankfurter or hot dog sandwich, NFS, plain, on whole wheat bread, NS as to 100%" +27564040,"FRANKFURTER OR HOT DOG, NFS, PLAIN, ON WHOLE GRAIN WHITE BRE","Frankfurter or hot dog sandwich, NFS, plain, on whole grain white bread" +27564050,"FRANKFURTER OR HOT DOG, NFS, PLAIN, ON MULTIGRAIN BREAD","Frankfurter or hot dog sandwich, NFS, plain, on multigrain bread" +27564060,"FRANKFURTER OR HOT DOG, BEEF, PLAIN, ON BUN","Frankfurter or hot dog sandwich, beef, plain,on bun" +27564070,"FRANKFURTER OR HOT DOG, PLAIN, WHITE BREAD","Frankfurter or hot dog sandwich, beef, plain, on white bread" +27564080,"FRANKFURTER OR HOT DOG, PLAIN, WHEAT BREAD","Frankfurter or hot dog sandwich, beef, plain, on wheat bread" +27564090,"FRANKFURTER/ HOT DOG, BEEF,PLAIN, WHOLE WHEAT BREAD, NS 100%","Frankfurter or hot dog sandwich, beef, plain, on whole wheat bread, NS as to 100%" +27564100,"FRANKFURTER OR HOT DOG, PLAIN, ON WHOLE GRAIN WHITE","Frankfurter or hot dog sandwich, beef, plain, on whole grain white bread" +27564110,"FRANKFURTER OR HOT DOG, BEEF, PLAIN, ON MULTIGRAIN BREAD","Frankfurter or hot dog sandwich, beef, plain, on multigrain bread" +27564120,"FRANKFURTER OR HOT DOG, BEEF/PORK, PLAIN, ON BUN","Frankfurter or hot dog sandwich, beef and pork, plain, on bun" +27564130,"FRANKFURTER OR HOT DOG, BEEF & PORK, PLAIN, ON WHITE BREAD","Frankfurter or hot dog sandwich, beef and pork, plain, on white bread" +27564140,"FRANKFURTER OR HOT DOG, BEEF/PORK, PLAIN, ON WHEAT BREAD","Frankfurter or hot dog sandwich, beef and pork, plain, on wheat bread" +27564150,"FRANKFURTER/HOT DOG, BEEF&PORK,PLAIN,ON WHL WHT, NS TO 100%","Frankfurter or hot dog sandwich, beef and pork, plain, on whole wheat bread, NS as to 100%" +27564160,"FRANKFURTER OR HOT DOG, BEEF/PORK, PLAIN, WHOLE GRAIN WHITE","Frankfurter or hot dog sandwich, beef and pork, plain, on whole grain white bread" +27564170,"FRANKFURTER OR HOT DOG, BEEF/PORK, PLAIN, MULTIGRAIN BREAD","Frankfurter or hot dog sandwich, beef and pork, plain, on multigrain bread" +27564180,"FRANKFURTER OR HOT DOG, MEAT/POULTRY, PLAIN, ON BUN","Frankfurter or hot dog sandwich, meat and poultry, plain, on bun" +27564190,"FRANKFURTER OR HOT DOG, MEAT AND POULTRY, PLAIN, ON WHITE BR","Frankfurter or hot dog sandwich, meat and poultry, plain, on white bread" +27564200,"FRANKFURTER OR HOT DOG, MEAT AND POULTRY, PLAIN, ON WHEAT BR","Frankfurter or hot dog sandwich, meat and poultry, plain, on wheat bread" +27564210,"FRANKFURTER/HOT DOG, MEAT&POULTRY,PLAIN,WHL WHT,NS TO 100%","Frankfurter or hot dog sandwich, meat and poultry, plain, on whole wheat bread, NS as to 100%" +27564220,"FRANKFURTER OR HOT DOG, MEAT AND POULTRY, PLAIN, ON WHOLE GR","Frankfurter or hot dog sandwich, meat and poultry, plain, on whole grain white bread" +27564230,"FRANKFURTER OR HOT DOG, MEAT AND POULTRY, PLAIN, ON MULTIGRA","Frankfurter or hot dog sandwich, meat and poultry, plain, on multigrain bread" +27564240,"FRANKFURTER OR HOT DOG, CHICKEN AND/OR TURKEY, PLAIN, ON BUN","Frankfurter or hot dog sandwich, chicken and/or turkey, plain, on bun" +27564250,"FRANKFURTER OR HOT DOG, CHICKEN / TURKEY, PLAIN, ON WHITE BR","Frankfurter or hot dog sandwich, chicken and/or turkey, plain, on white bread" +27564260,"FRANKFURTER OR HOT DOG, CHICKEN AND/OR TURKEY, PLAIN, ON WHE","Frankfurter or hot dog sandwich, chicken and/or turkey, plain, on wheat bread" +27564270,"FRANKFURTER/HOT DOG, CHICK/TURKEY,PLAIN,WHL WHT,NS TO 100%","Frankfurter or hot dog sandwich, chicken and/or turkey, plain, on whole wheat bread, NS as to 100%" +27564280,"FRANKFURTER OR HOT DOG, CHICKEN AND/OR TURKEY, PLAIN, ON WHO","Frankfurter or hot dog sandwich, chicken and/or turkey, plain, on whole grain white bread" +27564290,"FRANKFURTER OR HOT DOG, CHICKEN AND/OR TURKEY, PLAIN, ON MUL","Frankfurter or hot dog sandwich, chicken and/or turkey, plain, on multigrain bread" +27564300,"FRANKFURTER OR HOT DOG, REDUCED FAT OR LIGHT, PLAIN, ON BUN","Frankfurter or hot dog sandwich, reduced fat or light, plain, on bun" +27564310,"FRANKFURTER OR HOT DOG, REDUCED FAT OR LIGHT, PLAIN, ON WHIT","Frankfurter or hot dog sandwich, reduced fat or light, plain, on white bread" +27564320,"FRANKFURTER OR HOT DOG, REDUCED FAT OR LIGHT, PLAIN, ON WHEA","Frankfurter or hot dog sandwich, reduced fat or light, plain, on wheat bread" +27564330,"FRANKFURTER/HOT DOG, RED FAT/LIGHT,PLAIN,WHL WHT,NS TO 100%","Frankfurter or hot dog sandwich, reduced fat or light, plain, on whole wheat bread, NS as to 100%" +27564340,"FRANKFURTER OR HOT DOG, REDUCED FAT OR LIGHT, PLAIN, ON WHOL","Frankfurter or hot dog sandwich, reduced fat or light, plain, on whole grain white bread" +27564350,"FRANKFURTER OR HOT DOG, REDUCED FAT OR LIGHT, PLAIN, ON MULT","Frankfurter or hot dog sandwich, reduced fat or light, plain, on multigrain bread" +27564360,"FRANKFURTER OR HOT DOG, FAT FREE, PLAIN, ON BUN","Frankfurter or hot dog sandwich, fat free, plain, on bun" +27564370,"FRANKFURTER OR HOT DOG, FAT FREE, PLAIN, ON WHITE BREAD","Frankfurter or hot dog sandwich, fat free, plain, on white bread" +27564380,"FRANKFURTER OR HOT DOG, FAT FREE, PLAIN, ON WHEAT BREAD","Frankfurter or hot dog sandwich, fat free, plain, on wheat bread" +27564390,"FRANKFURTER/HOT DOG, FAT FREE, PLAIN, WHL WHT, NS TO 100%","Frankfurter or hot dog sandwich, fat free, plain, on whole wheat bread, NS as to 100%" +27564400,"FRANKFURTER OR HOT DOG, FAT FREE, PLAIN, ON WHOLE GRAIN WHIT","Frankfurter or hot dog sandwich, fat free, plain, on whole grain white bread" +27564410,"FRANKFURTER OR HOT DOG, FAT FREE, PLAIN, ON MULTIGRAIN BREAD","Frankfurter or hot dog sandwich, fat free, plain, on multigrain bread" +27564420,"FRANKFURTER OR HOT DOG, MEATLESS, PLAIN, ON BUN","Frankfurter or hot dog sandwich, meatless, plain, on bun" +27564430,"FRANKFURTER OR HOT DOG, MEATLESS, PLAIN, ON BREAD","Frankfurter or hot dog sandwich, meatless, plain, on bread" +27564440,"FRANKFURTER OR HOT DOG, WITH CHILI, ON BUN","Frankfurter or hot dog sandwich, with chili, on bun" +27564450,"FRANKFURTER OR HOT DOG, WITH CHILI, ON WHITE BREAD","Frankfurter or hot dog sandwich, with chili, on white bread" +27564460,"FRANKFURTER OR HOT DOG, WITH CHILI, ON WHEAT BREAD","Frankfurter or hot dog sandwich, with chili, on wheat bread" +27564470,"FRANKFURTER/HOT DOG, W/CHILI, ON WHL WHT BREAD, NS TO 100%","Frankfurter or hot dog sandwich, with chili, on whole wheat bread, NS as to 100%" +27564480,"FRANKFURTER OR HOT DOG, WITH CHILI, ON WHOLE GRAIN WHITE BRE","Frankfurter or hot dog sandwich, with chili, on whole grain white bread" +27564490,"FRANKFURTER OR HOT DOG, WITH CHILI, ON MULTI-GRAIN BREAD","Frankfurter or hot dog sandwich, with chili, on multi-grain bread" +27564500,"FRANKFURTER OR HOT DOG, W/ VEGETARIAN CHILI, ON BUN","Frankfurter or hot dog sandwich, with vegetarian chili, on bun" +27564510,"FRANKFURTER OR HOT DOG, W/ VEGETARIAN CHILI, ON WHITE BREAD","Frankfurter or hot dog sandwich, with vegetarian chili, on white bread" +27564520,"FRANKFURTER OR HOT DOG, W/ VEGETARIAN CHILI, ON WHEAT BREAD","Frankfurter or hot dog sandwich, with vegetarian chili, on wheat bread" +27564530,"FRANKFURTER/HOT DOG, W/MEATLESS CHILI, ON WHL WHT,NS TO 100%","Frankfurter or hot dog sandwich, with meatless chili, on whole wheat bread, NS as to 100%" +27564540,"FRANKFURTER OR HOT DOG, W/ VEGETARIAN CHILI, ON WHOLE GRAIN","Frankfurter or hot dog sandwich, with vegetarian chili, on whole grain white bread" +27564550,"FRANKFURTER OR HOT DOG, W/ VEGETARIAN CHILI, ON MULTIGRAIN B","Frankfurter or hot dog sandwich, with vegetarian chili, on multigrain bread" +27564560,"FRANKFURTER OR HOT DOG, MEATLESS, ON BUN, WITH CHILI","Frankfurter or hot dog sandwich, meatless, on bun, with vegetarian chili" +27564570,"FRANKFURTER OR HOT DOG, MEATLESS, ON BREAD, WITH CHILI","Frankfurter or hot dog sandwich, meatless, on bread, with vegetarian chili" +27570310,"HORS D'OEUVRES, W/ SPREAD","Hors d'oeuvres, with spread" +27601000,"BEEF STEW, BABY FOOD, TODDLER","Beef stew, baby food, toddler" +27610100,"BEEF & EGG NOODLES, BABY, NS AS TO STR OR JR","Beef and egg noodles, baby food, NS as to strained or junior" +27610110,"BEEF & EGG NOODLES, BABY, STRAINED","Beef and egg noodles, baby food, strained" +27610120,"BEEF & EGG NOODLES, BABY, JUNIOR","Beef and egg noodles, baby food, junior" +27610710,"BEEF W/ VEGETABLES, BABY, STRAINED","Beef with vegetables, baby food, strained" +27610730,"BEEF W/ VEGETABLES, BABY FOOD, TODDLER","Beef with vegetables, baby food, toddler" +27640050,"CHICKEN & RICE DINNER, BABY, STRAINED","Chicken and rice dinner, baby food, strained" +27640100,"CHICKEN NOODLE DINNER, BABY, NS AS TO STR OR JR","Chicken noodle dinner, baby food, NS as to strained or junior" +27640110,"CHICKEN NOODLE DINNER, BABY, STRAINED","Chicken noodle dinner, baby food, strained" +27640120,"CHICKEN NOODLE DINNER, BABY, JUNIOR","Chicken noodle dinner, baby food, junior" +27640810,"CHICKEN, NOODLES & VEGETABLES, BABY, TODDLER","Chicken, noodles, and vegetables, baby food, toddler" +27641000,"CHICKEN STEW, BABY FOOD, TODDLER","Chicken stew, baby food, toddler" +27642100,"TURKEY, RICE & VEGETABLES, BABY, NS AS TO STR OR JR","Turkey, rice and vegetables, baby food, NS as to strained or junior" +27642110,"TURKEY, RICE & VEGETABLES, BABY, STRAINED","Turkey, rice and vegetables, baby food, strained" +27642120,"TURKEY, RICE & VEGETABLES, BABY, JUNIOR","Turkey, rice and vegetables, baby food, junior" +27642130,"TURKEY, RICE, & VEGETABLES, BABY, TODDLER","Turkey, rice, and vegetables, baby food, toddler" +27644110,"CHICKEN SOUP, BABY","Chicken soup, baby food" +28101000,"FROZEN DINNER, NFS","Frozen dinner, NFS" +28110000,"BEEF DINNER, NFS (FROZEN)","Beef dinner, NFS (frozen meal)" +28110120,"BEEF W/ POTATOES (FROZEN MEAL, LARGE MEAT PORTION)","Beef with potatoes (frozen meal, large meat portion)" +28110150,"BEEF W/ VEGETABLE (DIET FROZEN MEAL)","Beef with vegetable (diet frozen meal)" +28110220,"SIRLOIN, CHOPPED, W/ GRAVY, POT, VEG (FROZEN MEAL)","Sirloin, chopped, with gravy, mashed potatoes, vegetable (frozen meal)" +28110250,"SIRLOIN TIPS W/ GRAVY, POTATOES, VEG (FROZEN MEAL)","Sirloin tips with gravy, potatoes, vegetable (frozen meal)" +28110270,"SIRLOIN BEEF W/ GRAVY, POTATOES, VEG (FROZ MEAL)","Sirloin beef with gravy, potatoes, vegetable (frozen meal)" +28110300,"SALISBURY STEAK DINNER, NFS (FROZEN)","Salisbury steak dinner, NFS (frozen meal)" +28110310,"SALISBURY STEAK W/ GRAVY, POTATOES, VEG (FROZ MEAL)","Salisbury steak with gravy, potatoes, vegetable (frozen meal)" +28110330,"SALISBURY STEAK, GRAVY, POT, VEG, DESSERT(FRZ MEAL)","Salisbury steak with gravy, whipped potatoes, vegetable, dessert (frozen meal)" +28110340,"SALISBURY STK, GRAVY,POT,VEG,SOUP,DESSERT(FRZ MEAL)","Salisbury steak with gravy, potatoes, vegetable, soup or macaroni and cheese, dessert (frozen meal)" +28110350,"SALISBURY STEAK, POT,VEG,DESSERT(FROZ MEAL,LG MEAT)","Salisbury steak with gravy, potatoes, vegetable, dessert (frozen meal, large meat portion)" +28110370,"SALISBURY STEAK, GRAVY, MAC&CHEESE, VEG (FROZ MEAL)","Salisbury steak with gravy, macaroni and cheese, vegetable (frozen meal)" +28110380,"SALISBURY STEAK W/GRAV,MACARONI & CHEESE (FRZ MEAL)","Salisbury steak with gravy, macaroni and cheese (frozen meal)" +28110390,"SALISBURY STEAK, POT, VEG, DESSERT (DIET FZN MEAL)","Salisbury steak, potatoes, vegetable, dessert (diet frozen meal)" +28110510,"BEEF, SLICED, W/ GRAVY, POTATOES, VEG (FROZEN MEAL)","Beef, sliced, with gravy, potatoes, vegetable (frozen meal)" +28110620,"SHORTRIBS W/ BBQ SAUCE, POTATOES & VEG (FROZ MEAL)","Beef short ribs, boneless, with barbecue sauce, potatoes, vegetable (frozen meal)" +28110640,"MEATBALLS, SWEDISH, IN SAUCE W/ NOODLES (FROZ MEAL)","Meatballs, Swedish, in sauce, with noodles (frozen meal)" +28110660,"MEATBALLS,SWEDISH,W/GRAVY & NOODLES (DIET FRZ MEAL)","Meatballs, Swedish, in gravy, with noodles (diet frozen meal)" +28113110,"SALISBURY STEAK W/ TOM SAUCE, VEG (DIET FROZ MEAL)","Salisbury steak, baked, with tomato sauce, vegetable (diet frozen meal)" +28113140,"BEEF W/ SPAETZLE OR RICE, VEGETABLE (FROZEN MEAL)","Beef with spaetzle or rice, vegetable (frozen meal)" +28130000,"VEAL DINNER, NFS (FROZEN)","Veal dinner, NFS (frozen meal)" +28133110,"VEAL, BREADED, W/ SPAGHETTI, TOM SAUCE (FROZ MEAL)","Veal, breaded, with spaghetti, in tomato sauce (frozen meal)" +28133340,"VEAL PARMIGIANA, VEG, FETTUCCINE,DESSERT(FROZ MEAL)","Veal parmigiana with vegetable, fettuccine alfredo, dessert (frozen meal)" +28140100,"CHICKEN DINNER, NFS (FROZEN)","Chicken dinner, NFS (frozen meal)" +28140150,"CHICKEN DIVAN (FROZEN MEAL)","Chicken divan (frozen meal)" +28140250,"CHICKEN,GRAVY,DRESS,RICE,VEG,DESSRT(Z MEAL,LG MEAT)","Chicken, boneless, with gravy, dressing, rice, vegetable, dessert (frozen meal, large meat portion)" +28140320,"CHICKEN & NOODLES W/ VEG, DESSERT (FROZEN MEAL)","Chicken and noodles with vegetable, dessert (frozen meal)" +28140710,"CHICKEN, FRIED, W/ POTATOES, VEGETABLE (FROZ MEAL)","Chicken, fried, with potatoes, vegetable (frozen meal)" +28140720,"CHICKEN PATTY, POTATOES, VEGETABLE (FROZEN MEAL)","Chicken patty, or nuggets, boneless, breaded, potatoes, vegetable (frozen meal)" +28140730,"CHICKEN PATTY, TOM SCE, FETTUCCINE, VEG (FROZ MEAL)","Chicken patty, breaded, with tomato sauce and cheese, fettuccine alfredo, vegetable (frozen meal)" +28140740,"CHICKEN PATTY/NUGGET,PASTA,FRUIT,DESSERT(FROZ MEAL)","Chicken patty, or nuggets, boneless, breaded, with pasta and tomato sauce, fruit, dessert (frozen meal)" +28140810,"CHICKEN, FRIED, W/ POT, VEG, DESSERT (FROZEN MEAL)","Chicken, fried, with potatoes, vegetable, dessert (frozen meal)" +28141010,"CHICKEN, FRIED, POT,VEG, DESSERT(FROZ MEAL,LG MEAT)","Chicken, fried, with potatoes, vegetable, dessert (frozen meal, large meat portion)" +28141050,"CHICKEN PATTY PARMIGIANA, W/ VEG (DIET FROZ MEAL)","Chicken patty parmigiana, breaded, with vegetable (diet frozen meal)" +28141201,"TERIYAKI CHICKEN W/ RICE & VEGETABLE (DIET FROZ MEAL)","Teriyaki chicken with rice and vegetable (diet frozen meal)" +28141250,"CHICKEN W/ RICE-VEGETABLE MIXTURE (DIET FROZ MEAL)","Chicken with rice-vegetable mixture (diet frozen meal)" +28141300,"CHICKEN W/RICE & VEG, REDUCED FAT&SODIUM(DIET FROZ)","Chicken with rice and vegetable, reduced fat and sodium (diet frozen meal)" +28141600,"CHICKEN A LA KING W/ RICE (FROZEN MEAL)","Chicken a la king with rice (frozen meal)" +28141610,"CHICKEN & VEGETABLES IN CREAM SCE (DIET FROZ MEAL)","Chicken and vegetables in cream or white sauce (diet frozen meal)" +28141650,"CHICKEN & VEGETABLES AU GRATIN (DIET FROZEN MEAL)","Chicken and vegetables au gratin with rice-vegetable mixture (diet frozen entree)" +28143020,"CHICKEN AND VEGETABLE W/ RICE, ASIAN (DIET FROZEN MEAL)","Chicken and vegetable entree with rice, Asian (diet frozen meal)" +28143040,"CHICKEN CHOW MEIN W/ RICE (DIET FROZEN MEAL)","Chicken chow mein with rice (diet frozen meal)" +28143050,"CHICK CHOWMEIN W/RICE,REDUCED FAT&SODIUM(DIET FROZ)","Chicken chow mein with rice, reduced fat and sodium (diet frozen meal)" +28143080,"CHICKEN W/NOODLES & CHEESE SAUCE (DIET FROZEN MEAL)","Chicken with noodles and cheese sauce (diet frozen meal)" +28143110,"CHICKEN CACCIATORE W/ NOODLES (DIET FROZEN MEAL)","Chicken cacciatore with noodles (diet frozen meal)" +28143130,"CHICKEN & VEG ENTREE W/ NOODLES (FROZEN MEAL)","Chicken and vegetable entree with noodles (frozen meal)" +28143150,"CHICK & VEG ENTREE W/ NOODLES, (DIET FROZEN MEAL)","Chicken and vegetable entree with noodles (diet frozen meal)" +28143170,"CHICKEN IN CREAM SAUCE W/ NOODLES & VEG (FROZ MEAL)","Chicken in cream sauce with noodles and vegetable (frozen meal)" +28143180,"CHICKEN,BUTTER SCE,W/POT & VEG (FRZ, DIET MEAL)","Chicken in butter sauce with potatoes and vegetable (diet frozen meal)" +28143190,"CHICKEN, MUSHROOM SAUCE, WILD RICE, VEG (FROZ MEAL)","Chicken in mushroom sauce, white and wild rice, vegetable (frozen meal)" +28143200,"CHICKEN IN SOY-BASED SAUCE,RICE&VEG (FROZEN MEAL)","Chicken in soy-based sauce, rice and vegetables (frozen meal)" +28143210,"CHICKEN IN ORANGE SAUCE W/ RICE (DIET FROZEN MEAL)","Chicken in orange sauce with almond rice (diet frozen meal)" +28143220,"CHICKEN IN BBQ SCE,W/RICE,VEG&DES,RED FAT&SODIUM,FRZ,DIET","Chicken in barbecue sauce, with rice, vegetable and dessert, reduced fat and sodium (diet frozen meal)" +28144100,"CHICKEN & VEG W/ NOODLES & CREAM SCE (FROZEN MEAL)","Chicken and vegetable entree with noodles and cream sauce (frozen meal)" +28145000,"TURKEY DINNER, NFS (FROZEN)","Turkey dinner, NFS (frozen meal)" +28145100,"TURKEY W/DRESSING, GRAVY,VEG, FRUIT (DIET FRZ MEAL)","Turkey with dressing, gravy, vegetable and fruit (diet frozen meal)" +28145110,"TURKEY W/ VEGETABLE, STUFFING (DIET FROZEN MEAL)","Turkey with vegetable, stuffing (diet frozen meal)" +28145210,"TURKEY W/ GRAVY, DRESSING, POT, VEG (FROZEN MEAL)","Turkey with gravy, dressing, potatoes, vegetable (frozen meal)" +28145610,"TURKEY, DRESSING,POT,VEG,DESSERT(FROZ MEAL,LG MEAT)","Turkey with gravy, dressing, potatoes, vegetable, dessert (frozen meal, large meat portion)" +28145710,"TURKEY TETRAZZINI (FROZEN MEAL)","Turkey tetrazzini (frozen meal)" +28150000,"FISH DINNER, NFS (FROZEN)","Fish dinner, NFS (frozen meal)" +28150210,"HADDOCK W/ CHOPPED SPINACH (DIET FROZEN MEAL)","Haddock with chopped spinach (diet frozen meal)" +28150220,"FLOUNDER W/ CHOPPED BROCCOLI (DIET FROZEN MEAL)","Flounder with chopped broccoli (diet frozen meal)" +28150510,"FISH IN LEMON SAUCE W/ STARCH ITEM, VEG (FROZ MEAL)","Fish in lemon-butter sauce with starch item, vegetable (frozen meal)" +28150650,"FISH,BREADED/FISH STICKS,W/PASTA,VEG,DES (FRZ MEAL)","Fish, breaded, or fish sticks, with pasta, vegetable and dessert (frozen meal)" +28153010,"SHRIMP & CLAMS IN TOMATO SCE, W/ NOODLES(FROZ MEAL)","Shrimp and clams in tomato-based sauce, with noodles (frozen meal)" +28154010,"SHRIMP & VEG IN SAUCE W/ NOODLES (DIET FROZEN MEAL)","Shrimp and vegetables in sauce with noodles (diet frozen meal)" +28160300,"MEAT LOAF DINNER, NFS (FROZEN)","Meat loaf dinner, NFS (frozen meal)" +28160310,"MEATLOAF W/ POTATO, VEG (FROZ MEAL)","Meat loaf with potatoes, vegetable (frozen meal)" +28160650,"STUFFED GREEN PEPPER (FROZEN MEAL)","Stuffed green pepper (frozen meal)" +28160710,"STUFFED CABBAGE, W/ MEAT & TOM SCE (DIET FROZ MEAL)","Stuffed cabbage, with meat and tomato sauce (diet frozen meal)" +28310110,"BEEF BROTH, BOUILLON OR CONSOMME (INCL BROTH, NFS)","Beef, broth, bouillon, or consomme" +28310120,"BEEF BROTH OR BOUILLON, CANNED, LOW SODIUM","Beef, broth, bouillon, or consomme, canned, low sodium" +28310150,"OXTAIL SOUP","Oxtail soup" +28310160,"BEEF BROTH, W/ TOMATO, HOME RECIPE","Beef broth, with tomato, home recipe" +28310170,"BEEF BROTH, W/O TOMATO, HOME RECIPE","Beef broth, without tomato, home recipe" +28310230,"MEATBALL SOUP, MEXICAN STYLE, HOME RECIPE (SOPA DE ALBONDIGA","Meatball soup, Mexican style, home recipe (Sopa de Albondigas)" +28310320,"BEEF NOODLE SOUP, P.R. (SOPA DE CARNE Y FIDEOS)","Beef noodle soup, Puerto Rican style (Sopa de carne y fideos)" +28310330,"MEAT AND RICE NOODLE SOUP, ASIAN STYLE (VIETNAMESE PHO BO)","Meat and rice noodle soup, Asian style (Vietnamese Pho Bo)" +28310420,"BEEF & RICE SOUP, P.R.","Beef and rice soup, Puerto Rican style" +28311010,"PEPPERPOT (TRIPE) SOUP","Pepperpot (tripe) soup" +28311020,"MENUDO SOUP, HOME RECIPE","Menudo soup, home recipe" +28311030,"MENUDO, CANNED","Menudo soup, canned, prepared with water or ready-to-serve" +28315050,"BEEF VEGETABLE SOUP W/ POTATO, PASTA OR RICE, CHUNKY STYLE","Beef vegetable soup with potato, pasta, or rice, chunky style, canned, or ready-to-serve" +28315140,"BEEF VEGETABLE SOUP, MEXICAN STYLE, HOME RECIPE, (SOPA / CAL","Beef vegetable soup, Mexican style, home recipe, (Sopa / Caldo de Res)" +28315150,"MEAT AND CORN HOMINY SOUP, MEXICAN STYLE, HOME RECIPE (POZOL","Meat and corn hominy soup, Mexican style, home recipe (Pozole)" +28315160,"ITALIAN WEDDING SOUP","Italian Wedding Soup" +28317010,"BEEF STROGANOFF SOUP, CHUNKY STYLE, HOME RECIPE, CANNED OR R","Beef stroganoff soup, chunky style, home recipe, canned or ready-to-serve" +28320130,"HAM, RICE, & POTATO SOUP, P.R.","Ham, rice, and potato soup, Puerto Rican style" +28320140,"HAM, NOODLE & VEGETABLE SOUP, P.R.","Ham, noodle, and vegetable soup, Puerto Rican style" +28320160,"PORK VEGETABLE SOUP W/ POTATO, PASTA, OR RICE, CHUNKY","Pork vegetable soup with potato, pasta, or rice, stew type, chunky style" +28320300,"PORK W/VEG (NO CAR, BROC AND/OR DK GRN)SOUP, ASIAN STYLE","Pork with vegetable (excluding carrots, broccoli and/or dark-green leafy) soup, Asian Style" +28321130,"BACON SOUP, CREAM OF, PREPARED W/ WATER","Bacon soup, cream of, prepared with water" +28330110,"SCOTCH BROTH (LAMB, VEGETABLES, BARLEY)","Scotch broth (lamb, vegetables, and barley)" +28331110,"LAMB, PASTA & VEGETABLE SOUP, P.R.","Lamb, pasta, and vegetable soup, Puerto Rican style" +28340110,"CHICKEN OR TURKEY BROTH, BOUILLON, OR CONSOMME","Chicken or turkey broth, bouillon, or consomme" +28340120,"CHICKEN OR TURKEY BROTH, WITHOUT TOMATO, HOME RECIPE","Chicken or turkey broth, without tomato, home recipe" +28340130,"CHICKEN OR TURKEY BROTH, WITH TOMATO, HOME RECIPE","Chicken or turkey broth, with tomato, home recipe" +28340150,"MEXICAN STYLE CHICKEN BROTH SOUP STOCK","Mexican style chicken broth soup stock" +28340180,"CHICKEN OR TURKEY BROTH, LESS/REDUCED SODIUM, CANNED OR RTS","Chicken or turkey broth, less or reduced sodium, canned or ready-to-serve" +28340210,"CHICKEN RICE SOUP, P.R. (SOPA DE POLLO CON ARROZ)","Chicken rice soup, Puerto Rican style (Sopa de pollo con arroz)" +28340220,"CHICKEN SOUP W/ NOODLES & POTATOES, P.R.","Chicken soup with noodles and potatoes, Puerto Rican style" +28340310,"CHICKEN OR TURKEY GUMBO SOUP","Chicken or turkey gumbo soup, home recipe, canned or ready-to-serve" +28340510,"CHICKEN OR TURKEY NOODLE SOUP, CHUNKY STYLE, CANNED OR RTS","Chicken or turkey noodle soup, chunky style, canned or ready-to-serve" +28340550,"SWEET & SOUR SOUP","Sweet and sour soup" +28340580,"CHICKEN OR TURKEY SOUP WITH VEGETABLES, ASIAN STYLE","Chicken or turkey soup with vegetables (broccoli, carrots, celery, potatoes and onions), Asian style" +28340590,"CHICKEN OR TURKEY CORN SOUP WITH NOODLES, HOME RECIPE","Chicken or turkey corn soup with noodles, home recipe" +28340600,"CHICK/TURK+VEG SOUP, CANNED","Chicken or turkey vegetable soup, canned, prepared with water or ready-to-serve" +28340610,"CHICKEN VEGETABLE SOUP, STEW TYPE (INCL CHUNKY)","Chicken or turkey vegetable soup, stew type" +28340630,"CHICKEN OR TURKEY VEGETABLE SOUP WITH RICE, STEW TYPE, CHUNK","Chicken or turkey vegetable soup with rice, stew type, chunky style" +28340640,"CHICKEN OR TURKEY VEGETABLE SOUP WITH NOODLES, STEW TYPE, CH","Chicken or turkey vegetable soup with noodles, stew type, chunky style, canned or ready-to-serve" +28340660,"CHICKEN OR TURKEY VEGETABLE SOUP, HOME RECIPE","Chicken or turkey vegetable soup, home recipe" +28340670,"CHICKEN OR TURKEY VEGETABLE SOUP WITH RICE, MEXICAN STYLE, H","Chicken or turkey vegetable soup with rice, Mexican style, home recipe (Sopa / Caldo de Pollo)" +28340680,"CHICKEN OR TURKEY AND CORN HOMINY SOUP, MEXICAN STYLE, HOME","Chicken or turkey and corn hominy soup, Mexican style, home recipe (Pozole)" +28340690,"CHICKEN OR TURKEY VEGETABLE SOUP WITH POTATO AND CHEESE, CHU","Chicken or turkey vegetable soup with potato and cheese, chunky style, canned or ready-to-serve" +28340700,"BIRD'S NEST SOUP (CHICKEN, HAM, NOODLES)","Bird's nest soup (chicken, ham, and noodles)" +28340750,"HOT & SOUR SOUP (INCLUDE HOT & SPICY CHINESE SOUP)","Hot and sour soup" +28340800,"CHICKEN OR TURKEY SOUP WITH VEGETABLES AND FRUIT, ASIAN STYL","Chicken or turkey soup with vegetables and fruit, Asian Style" +28345010,"CHICKEN/TURKEY SOUP, CM OF, CAN, RED SOD, NS W/ MILK/WATER","Chicken or turkey soup, cream of, canned, reduced sodium, NS as to made with milk or water" +28345020,"CHICKEN/TURKEY SOUP, CM OF, CAN, RED SOD, W/ MILK","Chicken or turkey soup, cream of, canned, reduced sodium, made with milk" +28345030,"CHICKEN/TURKEY SOUP, CM OF, CAN, RED SOD, W/ WATER","Chicken or turkey soup, cream of, canned, reduced sodium, made with water" +28345110,"CHICKEN SOUP, CREAM OF, NS AS TO MILK OR WATER","Chicken or turkey soup, cream of, NS as to prepared with milk or water" +28345120,"CHICKEN/TURKEY SOUP,CREAM OF, W/ MILK","Chicken or turkey soup, cream of, prepared with milk" +28345130,"CHICKEN SOUP, CREAM OF, PREPARED W/ WATER","Chicken or turkey soup, cream of, prepared with water" +28345160,"CHICKEN OR TURKEY MUSHROOM SOUP, CREAM OF, PREPARED WITH MIL","Chicken or turkey mushroom soup, cream of, prepared with milk" +28345170,"DUCK SOUP","Duck soup" +28350040,"FISH STOCK, HOME RECIPE","Fish stock, home recipe" +28350050,"FISH CHOWDER (INCL FISHERMAN'S SOUP, SEAFOOD CHOWD)","Fish chowder" +28350110,"CRAB SOUP, NS AS TO TOMATO-BASE OR CREAM","Crab soup, NS as to tomato-base or cream style" +28350120,"CRAB SOUP, TOMATO BASE","Crab soup, tomato-base" +28350210,"CLAM CHOWDER, NS AS TO MANHATTAN OR NEW ENGLAND","Clam chowder, NS as to Manhattan or New England style" +28350220,"CLAM CHOWDER, MANHATTAN (INCLUDE CHUNKY)","Clam chowder, Manhattan" +28350310,"TURTLE & VEGETABLE SOUP (INCLUDE SNAPPER SOUP)","Turtle and vegetable soup" +28351110,"FISH AND VEGETABLE SOUP, NO POTATOES (SOPA DE PESCADO Y MARI","Fish and vegetable soup, no potatoes (Sopa de pescado y mariscos)" +28351120,"FISH SOUP, WITH POTATOES (SOPA DE PESCADO Y MARISCOS)","Fish soup, with potatoes (Sopa de Pescado y Mariscos)" +28351160,"CODFISH, RICE & VEGETABLE SOUP, P.R.","Codfish, rice, and vegetable soup, Puerto Rican style" +28351170,"CODFISH SOUP W/ NOODLES, P.R.","Codfish soup with noodles, Puerto Rican style" +28355110,"CLAM CHOWDER, NEW ENG, NS AS TO MILK OR WATER ADDED","Clam chowder, New England, NS as to prepared with water or milk" +28355120,"CLAM CHOWDER, NEW ENGLAND, W/ MILK","Clam chowder, New England, prepared with milk" +28355130,"CLAM CHOWDER, NEW ENGLAND, W/ WATER","Clam chowder, New England, prepared with water" +28355140,"CLAM CHOWDER, NEW ENGLAND, REDUCED SODIUM, CANNED OR READY-T","Clam chowder, New England, reduced sodium, canned or ready-to-serve" +28355210,"CRAB SOUP, CREAM OF, W/ MILK","Crab soup, cream of, prepared with milk" +28355250,"LOBSTER BISQUE","Lobster bisque" +28355260,"LOBSTER GUMBO","Lobster gumbo" +28355310,"OYSTER STEW","Oyster stew" +28355350,"SALMON SOUP, CREAM STYLE","Salmon soup, cream style" +28355410,"SHRIMP SOUP, CREAM OF, NS AS TO MILK/WATER ADDED","Shrimp soup, cream of, NS as to prepared with milk or water" +28355420,"SHRIMP SOUP, CREAM OF, W/ MILK","Shrimp soup, cream of, prepared with milk" +28355430,"SHRIMP SOUP, CREAM OF, W/ WATER","Shrimp soup, cream of, prepared with water" +28355440,"SHRIMP GUMBO","Shrimp gumbo" +28355450,"SEAFOOD SOUP W/ POTATOES & VEGETABLES (INCL DK GREEN LEAF)","Seafood soup with potatoes and vegetables (including carrots, broccoli, and/or dark-green leafy)" +28355460,"SEAFOOD SOUP W/ POTATOES & VEGETABLES (EXCL DK GREEN LEAF)","Seafood soup with potatoes and vegetables (excluding carrots, broccoli, and dark-green leafy)" +28355470,"SEAFOOD SOUP W/ VEGETABLES (INCL DK GREEN LEAFY)","Seafood soup with vegetables (including carrots, broccoli, and/or dark-green leafy (no potatoes))" +28355480,"SEAFOOD SOUP W/ VEGETABLES (EXCL DK GREEN LEAFY)","Seafood soup with vegetables (excluding carrots, broccoli, and dark-green leafy (no potatoes))" +28360100,"MEAT BROTH, P.R. STYLE","Meat broth, Puerto Rican style" +28360210,"SPANISH VEGETABLE SOUP, P.R. (CALDO GALLEGO)","Spanish vegetable soup, Puerto Rican style (Caldo gallego)" +28500000,"GRAVY, POULTRY","Gravy, poultry" +28500010,"GRAVY, MEAT/POULTRY, W/ WINE","Gravy, meat or poultry, with wine" +28500020,"GRAVY, MEAT, W/ FRUIT (INCLUDE FRENCH SAUCE)","Gravy, meat, with fruit" +28500030,"GRAVY, POULTRY, LOW SODIUM","Gravy, poultry, low sodium" +28500040,"GRAVY, BEEF/MEAT (INCL GRAVY,NFS;BROWN GRAVY;SWISS STEAK GRV","Gravy, beef or meat" +28500050,"GRAVY, GIBLET(INCL ANY POULTRY GRAVY W/PCS OF MEAT)","Gravy, giblet" +28500060,"GRAVY, BEEF OR MEAT, LOW SODIUM","Gravy, beef or meat, low sodium" +28500070,"GRAVY, BEEF OR MEAT, HOME RECIPE","Gravy, beef or meat, home recipe" +28500080,"GRAVY, POULTRY, HOME RECIPE","Gravy, poultry, home recipe" +28500100,"GRAVY, MUSHROOM","Gravy, mushroom" +28500150,"GRAVY, REDEYE","Gravy, redeye" +28501010,"GRAVY, BEEF/MEAT, FAT FREE","Gravy, beef or meat, fat free" +28501110,"GRAVY, POULTRY, FAT FREE","Gravy, poultry, fat free" +28510010,"GRAVY/SAUCE, POULTRY FROM CHICKEN FRICASSEE, P.R.","Gravy or sauce, poultry-based from Puerto Rican-style chicken fricasse" +28510020,"GRAVY, MEAT-BASED, FROM PUERTO RICAN POT ROAST","Gravy, meat-based, from Puerto-Rican style stuffed pot roast" +28510030,"GRAVY, MEAT-BASED, FROM PUERTO RICAN BEEF STEW","Gravy, meat-based, from Puerto-Rican style beef stew" +28520000,"GRAVY/SAUCE,CHINESE(SOY SCE,STOCK/BOUILL,CRNSTRCH)","Gravy or sauce, Chinese (soy sauce, stock or bouillon, cornstarch)" +28520100,"OYSTER-FLAVORED SAUCE","Oyster-flavored sauce" +28522000,"MOLE POBLANA (SAUCE)","Mole poblano (sauce)" +28522050,"MOLE VERDE (SAUCE)","Mole verde (sauce)" +31101010,"EGGS, WHOLE, RAW","Egg, whole, raw" +31102000,"EGGS, WHOLE, COOKED, NS AS TO METHOD","Egg, whole, cooked, NS as to cooking method" +31103010,"EGG, WHOLE, BOILED OR POACHED","Egg, whole, boiled or poached" +31105010,"EGG, WHOLE, FRIED WITHOUT FAT","Egg, whole, fried without fat" +31105020,"EGG, WHOLE, FRIED WITH MARGARINE","Egg, whole, fried with margarine" +31105030,"EGG, WHOLE, FRIED WITH OIL","Egg, whole, fried with oil" +31105040,"EGG, WHOLE, FRIED WITH BUTTER","Egg, whole, fried with butter" +31105060,"EGG, WHOLE, FRIED WITH ANIMAL FAT OR MEAT DRIPPINGS","Egg, whole, fried with animal fat or meat drippings" +31105080,"EGG, WHOLE, FRIED WITH COOKING SPRAY","Egg, whole, fried with cooking spray" +31105090,"EGG, WHOLE, FRIED, FROM FAST FOOD / RESTAURANT","Egg, whole, fried, from fast food / restaurant" +31106000,"EGGS, WHOLE, BAKED, NS AS TO ADDED FAT","Egg, whole, baked, NS as to fat added in cooking" +31106010,"EGGS, WHOLE, BAKED, NO FAT ADDED","Egg, whole, baked, fat not added in cooking" +31106020,"EGGS, WHOLE, BAKED, FAT ADDED","Egg, whole, baked, fat added in cooking" +31107000,"EGGS, WHOLE, PICKLED","Egg, whole, pickled" +31108010,"EGGS, WHITE ONLY, RAW","Egg, white only, raw" +31110010,"EGG YOLK, ONLY, RAW","Egg, yolk only, raw" +31111000,"EGG, YOLK ONLY, COOKED, NS AS TO FAT ADDED IN COOKING","Egg, yolk only, cooked, NS as to fat added in cooking" +31111010,"EGG, YOLK ONLY, COOKED, NO FAT ADDED","Egg, yolk only, cooked, fat not added in cooking" +31111020,"EGG, YOLK ONLY, COOKED, FAT ADDED IN COOKING","Egg, yolk only, cooked, fat added in cooking" +31201000,"DUCK EGG, COOKED","Duck egg, cooked" +31202000,"GOOSE EGG, COOKED","Goose egg, cooked" +31203000,"QUAIL EGG, CANNED","Quail egg, canned" +32101000,"EGGS, CREAMED","Egg, creamed" +32101500,"EGGS, BENEDICT","Egg, Benedict" +32101530,"EGG CURRY","Egg curry" +32102000,"EGGS, DEVILED","Egg, deviled" +32103000,"EGG SALAD, W/ MAYO","Egg salad, made with mayonnaise" +32103015,"EGG SALAD, W/ LT MAYO","Egg salad, made with light mayonnaise" +32103020,"EGG SALAD, W/ MAYO-TYPE DRSG","Egg salad, made with mayonnaise-type salad dressing" +32103025,"EGG SALAD, W/ LIGHT MAYO-TYPE DRSG","Egg salad, made with light mayonnaise-type salad dressing" +32103030,"EGG SALAD, W/ CREAMY DRSG","Egg salad, made with creamy dressing" +32103035,"EGG SALAD,W/ LT CREAMY DRSG","Egg salad, made with light creamy dressing" +32103040,"EGG SALAD, W/ ITALIAN DRSG","Egg salad, made with Italian dressing" +32103045,"EGG SALAD, W/ LT ITALIAN DRSG","Egg salad, made with light Italian dressing" +32103050,"EGG SALAD, W/ ANY TYPE OF FAT FREE DRSG","Egg Salad, made with any type of fat free dressing" +32105180,"HUEVOS RANCHEROS","Huevos rancheros" +32105190,"EGG CASSEROLE W/ BREAD, CHEESE, MILK & MEAT","Egg casserole with bread, cheese, milk and meat" +32105200,"EGG FOO YUNG, NFS","Egg foo yung (young), NFS" +32105210,"CHICKEN EGG FOO YUNG","Chicken egg foo yung (young)" +32105220,"PORK EGG FOO YUNG","Pork egg foo yung (young)" +32105230,"SHRIMP EGG FOO YUNG","Shrimp egg foo yung (young)" +32105240,"BEEF EGG FOO YUNG","Beef egg foo yung (young)" +32105310,"RIPE PLANTAIN OMELET, P.R. (TORTILLA DE AMARILLO)","Ripe plantain omelet, Puerto Rican style (Tortilla de amarillo)" +32105330,"SCRAMBLED EGGS W/ JERKED BEEF, P.R.","Scrambled eggs with jerked beef, Puerto Rican style (Revoltillo de tasajo)" +32110100,"EGGS, A LA MALAGUENA, P.R.(HUEVOS A LA MALAGUENA)","Eggs a la Malaguena, Puerto Rican style (Huevos a la Malaguena)" +32110150,"SHRIMP-EGG PATTY (TORTA DE CAMERON SECO)","Shrimp-egg patty (Torta de Cameron seco)" +32120100,"EGG DESSERT, CUSTARD-LIKE, W/ WATER & SUGAR, P.R.","Egg dessert, custard-like, made with water and sugar, Puerto Rican style (Tocino del cielo; Heaven's delight)" +32120200,"ZABAGLIONE","Zabaglione" +32130000,"EGG OMELET OR SCRAMBLED EGG, MADE WITH MARGARINE","Egg omelet or scrambled egg, made with margarine" +32130010,"EGG OMELET OR SCRAMBLED EGG, MADE WITH OIL","Egg omelet or scrambled egg, made with oil" +32130020,"EGG OMELET OR SCRAMBLED EGG, MADE WITH BUTTER","Egg omelet or scrambled egg, made with butter" +32130040,"EGG OMELET OR SCRAMBLED EGG, MADE W/ANIMAL FAT OR MEAT DRIP","Egg omelet or scrambled egg, made with animal fat or meat drippings" +32130060,"EGG OMELET OR SCRAMBLED EGG, MADE WITH COOKING SPRAY","Egg omelet or scrambled egg, made with cooking spray" +32130070,"EGG OMELET OR SCRAMBLED EGG, MADE WITHOUT FAT","Egg omelet or scrambled egg, made without fat" +32130080,"EGG OMELET OR SCRAMBLED EGG, FROM FAST FOOD / RESTAURANT","Egg omelet or scrambled egg, from fast food / restaurant" +32130100,"EGG OMELET OR SCRAMBLED EGG, WITH CHEESE, MADE WITH MARGARIN","Egg omelet or scrambled egg, with cheese, made with margarine" +32130110,"EGG OMELET OR SCRAMBLED EGG, WITH CHEESE, MADE WITH OIL","Egg omelet or scrambled egg, with cheese, made with oil" +32130120,"EGG OMELET OR SCRAMBLED EGG, WITH CHEESE, MADE WITH BUTTER","Egg omelet or scrambled egg, with cheese, made with butter" +32130140,"EGG OMELET OR SCRAMBLED EGG, W/ CHEESE, MADE W/ ANIMAL FAT","Egg omelet or scrambled egg, with cheese, made with animal fat or meat drippings" +32130160,"EGG OMELET OR SCRAMBLED EGG, WITH CHEESE, MADE WITH COOKING","Egg omelet or scrambled egg, with cheese, made with cooking spray" +32130170,"EGG OMELET OR SCRAMBLED EGG, WITH CHEESE, MADE WITHOUT FAT","Egg omelet or scrambled egg, with cheese, made without fat" +32130200,"EGG OMELET OR SCRAMBLED EGG, WITH MEAT, MADE WITH MARGARINE","Egg omelet or scrambled egg, with meat, made with margarine" +32130210,"EGG OMELET OR SCRAMBLED EGG, WITH MEAT, MADE WITH OIL","Egg omelet or scrambled egg, with meat, made with oil" +32130220,"EGG OMELET OR SCRAMBLED EGG, WITH MEAT, MADE WITH BUTTER","Egg omelet or scrambled egg, with meat, made with butter" +32130240,"EGG OMELET OR SCRAMBLED EGG, WITH MEAT, MADE WITH ANIMAL FAT","Egg omelet or scrambled egg, with meat, made with animal fat or meat drippings" +32130260,"EGG OMELET OR SCRAMBLED EGG, WITH MEAT, MADE W/COOKING SPRAY","Egg omelet or scrambled egg, with meat, made with cooking spray" +32130270,"EGG OMELET OR SCRAMBLED EGG, WITH MEAT, MADE WITHOUT FAT","Egg omelet or scrambled egg, with meat, made without fat" +32130300,"EGG OMELET OR SCRAMBLED EGG, W/CHEESE & MEAT, MADE W/MARGARI","Egg omelet or scrambled egg, with cheese and meat, made with margarine" +32130310,"EGG OMELET OR SCRAMBLED EGG, W/CHEESE & MEAT, MADE W/OIL","Egg omelet or scrambled egg, with cheese and meat, made with oil" +32130320,"EGG OMELET OR SCRAMBLED EGG, W/ CHEESE &MEAT, MADEW/BUTTER","Egg omelet or scrambled egg, with cheese and meat, made with butter" +32130340,"EGG OMELET OR SCR EGG, WITH CHEESE & MEAT, MADE W/ANIMAL FAT","Egg omelet or scrambled egg, with cheese and meat, made with animal fat or meat drippings" +32130360,"EGG OMELET OR SCR EGG, W/CHEESE& MEAT, MADE W/COOKING SPRAY","Egg omelet or scrambled egg, with cheese and meat, made with cooking spray" +32130370,"EGG OMELET OR SCRAMBLED EGG, W/CHEESE & MEAT, MADE WO/FAT","Egg omelet or scrambled egg, with cheese and meat, made without fat" +32130400,"EGG OMELET OR SCRAMBLED EGG, WITH TOMATOES, FAT ADDED IN COO","Egg omelet or scrambled egg, with tomatoes, fat added in cooking" +32130410,"EGG OMELET OR SCRAMBLED EGG, WITH TOMATOES, FAT NOT ADDED IN","Egg omelet or scrambled egg, with tomatoes, fat not added in cooking" +32130420,"EGG OMELET OR SCRAMBLED EGG, WITH TOMATOES, NS AS TO FAT ADD","Egg omelet or scrambled egg, with tomatoes, NS as to fat added in cooking" +32130430,"EGG OMELET OR SCRAMBLED EGG, W/ DARK-GREEN VEGS, FAT ADDED","Egg omelet or scrambled egg, with dark-green vegetables, fat added in cooking" +32130440,"EGG OMELET OR SCRAMBLED EGG, W/DARK-GREEN VEGS, FAT NOT ADDE","Egg omelet or scrambled egg, with dark-green vegetables, fat not added in cooking" +32130450,"EGG OMELET OR SCRAMBLED EGG, W/ DARK-GREEN VEGS,NS AS TO FAT","Egg omelet or scrambled egg, with dark-green vegetables, NS as to fat added in cooking" +32130460,"EGG OMELET OR SCR EGG, W/TOMATOES & DK-GREEN VEGS, FAT ADDED","Egg omelet or scrambled egg, with tomatoes and dark-green vegetables, fat added in cooking" +32130470,"EGG OMELET OR SCR EGG, W/ TOMATOES & DK-GRN VEGS,FAT NOT ADD","Egg omelet or scrambled egg, with tomatoes and dark-green vegetables, fat not added in cooking" +32130480,"EGG OMELET OR SCR EGG, W/ TOMATOES & DK-GRN VEGS, NS FAT","Egg omelet or scrambled egg, with tomatoes and dark-green vegetables, NS as to fat added in cooking" +32130490,"EGG OMELET OR SCR EGG, W/ OTHER VEGS, FAT ADDED","Egg omelet or scrambled egg, with vegetables other than dark green and/or tomatoes, fat added in cooking" +32130500,"EGG OMELET OR SCRAMBLED EGG, WITH OTHER VEGS. FAT NOT ADDED","Egg omelet or scrambled egg, with vegetables other than dark green and/or tomatoes, fat not added in cooking" +32130510,"EGG OMELET OR SCRAMBLED EGG, W/ OTHER VEGS, NS FAT","Egg omelet or scrambled egg, with vegetables other than dark green and/or tomatoes, NS as to fat added in cooking" +32130600,"EGG OMELET OR SCR EGG, W/CHEESE & TOMATOES, FAT ADDED","Egg omelet or scrambled egg, with cheese and tomatoes, fat added in cooking" +32130610,"EGG OMELET OR SCR EGG, W/CHEESE & TOMATOES, FAT NOT ADDED","Egg omelet or scrambled egg, with cheese and tomatoes, fat not added in cooking" +32130620,"EGG OMELET OR SCR EGG, W/CHEESE & TOMATOES, NS FAT ADDED","Egg omelet or scrambled egg, with cheese and tomatoes, NS as to fat added in cooking" +32130630,"EGG OMELET OR SCR EGG, W/ CHEESE&DK-GRN VEGS, FAT ADDED","Egg omelet or scrambled egg, with cheese and dark-green vegetables, fat added in cooking" +32130640,"EGG OMELET OR SCR EGG, W/ CHEESE&DK-GRN VEGS, FAT NOT ADDED","Egg omelet or scrambled egg, with cheese and dark-green vegetables, fat not added in cooking" +32130650,"EGG OMELET OR SCR EGG, W/ CHEESE&DK-GRN VEGS, NS FAT ADDED","Egg omelet or scrambled egg, with cheese and dark-green vegetables, NS as to fat added in cooking" +32130660,"EGG OMELET OR SCR EGG, W/CHEESE, TOM & DK-GRN VEGS,FAT ADDED","Egg omelet or scrambled egg, with cheese, tomatoes, and dark-green vegetables, fat added in cooking" +32130670,"EGG OMELET OR SCR EGG, W/CHEESE, TOM & DK-GRN VEGS,FAT NOT A","Egg omelet or scrambled egg, with cheese, tomatoes, and dark-green vegetables, fat not added in cooking" +32130680,"EGG OMELET OR SCR EGG, W/CHEESE, TOM & DK-GRN VEGS,NS FAT AD","Egg omelet or scrambled egg, with cheese, tomatoes, and dark-green vegetables, NS as to fat added in cooking" +32130690,"EGG OMELET OR SCR EGG, W/CHEESE & OTHER VEGS, FAT ADDED","Egg omelet or scrambled egg, with cheese and vegetables other than dark green and/or tomatoes, fat added in cooking" +32130700,"EGG OMELET OR SCREGG, W/CHEESE & OTHER VEGS, FAT NOT ADDED","Egg omelet or scrambled egg, with cheese and vegetables other than dark green and/or tomatoes, fat not added in cooking" +32130710,"EGG OMELET OR SCREGG, W/CHEESE & OTHER VEGS, NS FAT ADDED","Egg omelet or scrambled egg, with cheese and vegetables other than dark green and/or tomatoes, NS as to fat added in cooking" +32130800,"EGG OMELET OR SCRAMBLED EGG, W/MEAT & TOMATOES, FAT ADDED","Egg omelet or scrambled egg, with meat and tomatoes, fat added in cooking" +32130810,"EGG OMELET OR SCRAMBLED EGG, W/MEAT & TOMATOES, FAT NOT ADDE","Egg omelet or scrambled egg, with meat and tomatoes, fat not added in cooking" +32130820,"EGG OMELET OR SCRAMBLED EGG, W/MEAT & TOMATOES, NS FAT ADDED","Egg omelet or scrambled egg, with meat and tomatoes, NS as to fat added in cooking" +32130830,"EGG OMELET OR SCR EGG, W/MEAT & DK-GRN VEGS, FAT ADDED","Egg omelet or scrambled egg, with meat and dark-green vegetables, fat added in cooking" +32130840,"EGG OMELET OR SCR EGG, W/MEAT & DK-GRN VEGS, FAT NOT ADDED","Egg omelet or scrambled egg, with meat and dark-green vegetables, fat not added in cooking" +32130850,"EGG OMELET OR SCR EGG, W/MEAT & DK-GRN VEGS, NS FAT ADDED","Egg omelet or scrambled egg, with meat and dark-green vegetables, NS as to fat added in cooking" +32130860,"EGG OMELET OR SCR EGG, W/MEAT,TOM & DK-GRN VEGS, FAT ADDED","Egg omelet or scrambled egg, with meat, tomatoes, and dark-green vegetables, fat added in cooking" +32130870,"EGG OMELET OR SCR EGG, W/MEAT,TOM & DK-GRN VEGS, FAT NOT ADD","Egg omelet or scrambled egg, with meat, tomatoes, and dark-green vegetables, fat not added in cooking" +32130880,"EGG OMELET OR SCR EGG, W/MEAT,TOM & DK-GRN VEGS, NS FAT ADDE","Egg omelet or scrambled egg, with meat, tomatoes, and dark-green vegetables, NS as to fat added in cooking" +32130890,"EGG OMELET OR SCR EGG, W/MEAT & OTHER VEGS, FAT ADDED","Egg omelet or scrambled egg, with meat and vegetables other than dark-green and/or tomatoes, fat added in cooking" +32130900,"EGG OMELET OR SCR EGG, W/MEAT & OTHER VEGS, FAT NOT ADDED","Egg omelet or scrambled egg, with meat and vegetables other than dark-green and/or tomatoes, fat not added in cooking" +32130910,"EGG OMELET OR SCR EGG, W/MEAT & OTHER VEGS, NS FAT ADDED","Egg omelet or scrambled egg, with meat and vegetables other than dark-green and/or tomatoes, NS as to fat added in cooking" +32131000,"EGG OMELET OR SCR EGG, W/ CHEESE, MEAT & TOMATOES, FAT ADDED","Egg omelet or scrambled egg, with cheese, meat, and tomatoes, fat added in cooking" +32131010,"EGG OMELET OR SCR EGG, W/ CHEESE, MEAT & TOMATOES, FAT NOT A","Egg omelet or scrambled egg, with cheese, meat, and tomatoes, fat not added in cooking" +32131020,"EGG OMELET OR SCR EGG, W/ CHEESE, MEAT & TOMATOES, NS FAT AD","Egg omelet or scrambled egg, with cheese, meat, and tomatoes, NS as to fat added in cooking" +32131030,"EGG OMELET OR SCR EGG, W/CHEESE, MEAT&DK GRN VEG,FAT ADDED","Egg omelet or scrambled egg, with cheese, meat, and dark-green vegetables, fat added in cooking" +32131040,"EGG OMELET OR SCR EGG, W/CHEESE, MEAT&DK GRN VEG,FAT NOT ADD","Egg omelet or scrambled egg, with cheese, meat, and dark-green vegetables, fat not added in cooking" +32131050,"EGG OMELET OR SCR EGG, W/CHEESE, MEAT&DK GRN VEG,NS FAT ADDE","Egg omelet or scrambled egg, with cheese, meat, and dark-green vegetables, NS as to fat added in cooking" +32131060,"EGG OMELET/SCR EGG,W/CHEESE,MEAT,TOM&DK GRN VEG,FAT ADDED","Egg omelet or scrambled egg, with cheese, meat, tomatoes, and dark-green vegetables, fat added in cooking" +32131070,"EGG OMELET/SCR EGG,W/CHEESE,MEAT,TOM&DK GRN VEG,FAT NOT ADDE","Egg omelet or scrambled egg, with cheese, meat, tomatoes, and dark-green vegetables, fat not added in cooking" +32131080,"EGG OMELET/SCR EGG,W/CHEESE,MEAT,TOM&DK GRN VEG,NS FAT ADDED","Egg omelet or scrambled egg, with cheese, meat, tomatoes, and dark-green vegetables, NS as to fat added in cooking" +32131090,"EGG OMELET OR SCR EGG, W/ CHEESE, MEAT & OTHER VEG,FAT ADDED","Egg omelet or scrambled egg, with cheese, meat, and vegetables other than dark-green and/or tomatoes, fat added in cooking" +32131100,"EGG OMELET OR SCR EGG, W/ CHEESE, MEAT & OTHER VEG,FAT NOT A","Egg omelet or scrambled egg, with cheese, meat, and vegetables other than dark-green and/or tomatoes, fat not added in cooking" +32131110,"EGG OMELET OR SCR EGG, W/ CHEESE, MEAT & OTHER VEG,NS FAT AD","Egg omelet or scrambled egg, with cheese, meat, and vegetables other than dark-green and/or tomatoes, NS as to fat added in cooking" +32131200,"EGG OMELET OR SCR EGG, W/POTATOES +/OR ONIONS, FAT ADDED","Egg omelet or scrambled egg, with potatoes and/or onions, fat added in cooking" +32131210,"EGG OMELET OR SCR EGG, W/POTATOES +/OR ONIONS, FAT NOT ADDED","Egg omelet or scrambled egg, with potatoes and/or onions, fat not added in cooking" +32131220,"EGG OMELET OR SCR EGG, W/POTATOES +/OR ONIONS, NS FAT ADDED","Egg omelet or scrambled egg, with potatoes and/or onions, NS as to fat added in cooking" +32201000,"FRIED EGG SANDWICH","Fried egg sandwich" +32202000,"EGG, CHEESE, HAM, & BACON ON BUN","Egg, cheese, ham, and bacon on bun" +32202010,"EGG, CHEESE & HAM ON ENGLISH MUFFIN","Egg, cheese, and ham on English muffin" +32202020,"EGG, CHEESE & HAM ON BISCUIT","Egg, cheese, and ham on biscuit" +32202025,"EGG, CHEESE & HAM ON BAGEL","Egg, cheese and ham on bagel" +32202030,"EGG, CHEESE & SAUSAGE ON ENGLISH MUFFIN","Egg, cheese, and sausage on English muffin" +32202035,"EGG, EXTRA CHEESE (2 SL), & EXTRA SAUSAGE (2 PATTIES) ON BUN","Egg, extra cheese (2 slices), and extra sausage (2 patties) on bun" +32202040,"EGG, CHEESE & BEEF ON ENGLISH MUFFIN","Egg, cheese, and beef on English Muffin" +32202045,"EGG, CHEESE & STEAK ON BAGEL","Egg, cheese, and steak on bagel" +32202050,"EGG, CHEESE & SAUSAGE ON BISCUIT","Egg, cheese, and sausage on biscuit" +32202055,"EGG, CHEESE & SAUSAGE GRIDDLE CAKE SANDWICH","Egg, cheese, and sausage griddle cake sandwich" +32202060,"EGG & SAUSAGE ON BISCUIT","Egg and sausage on biscuit" +32202070,"EGG, CHEESE & BACON ON BISCUIT","Egg, cheese, and bacon on biscuit" +32202075,"EGG, CHEESE & BACON GRIDDLE CAKE SANDWICH","Egg, cheese, and bacon griddle cake sandwich" +32202080,"EGG, CHEESE & BACON ON ENGLISH MUFFIN","Egg, cheese, and bacon on English muffin" +32202085,"EGG, CHEESE & BACON ON BAGEL","Egg, cheese and bacon on bagel" +32202090,"EGG & BACON ON BISCUIT","Egg and bacon on biscuit" +32202110,"EGG & HAM ON BISCUIT","Egg and ham on biscuit" +32202120,"EGG, CHEESE & SAUSAGE ON BAGEL","Egg, cheese and sausage on bagel" +32202130,"EGG & STEAK ON BISCUIT","Egg and steak on biscuit" +32202200,"EGG & CHEESE ON BISCUIT","Egg and cheese on biscuit" +32203010,"EGG SALAD SANDWICH","Egg salad sandwich" +32204010,"SCRAMBLED EGG SANDWICH","Scrambled egg sandwich" +32300100,"EGG DROP SOUP","Egg drop soup" +32301100,"GARLIC EGG SOUP, P.R. (SOPA DE AJO)","Garlic egg soup, Puerto Rican style (Sopa de ajo)" +32400060,"EGG WHITE OMELET, SCRAMBLED, OR FRIED, MADE WITH MARGARINE","Egg white omelet, scrambled, or fried, made with margarine" +32400065,"EGG WHITE OMELET, SCRAMBLED, OR FRIED, MADE WITH OIL","Egg white omelet, scrambled, or fried, made with oil" +32400070,"EGG WHITE OMELET, SCRAMBLED, OR FRIED, MADE WITH BUTTER","Egg white omelet, scrambled, or fried, made with butter" +32400075,"EGG WHITE OMELET, SCRAMBLED, OR FRIED, MADE WITH COOKING SPR","Egg white omelet, scrambled, or fried, made with cooking spray" +32400080,"EGG WHITE OMELET, SCRAMBLED, OR FRIED, MADE WITHOUT FAT","Egg white omelet, scrambled, or fried, made without fat" +32400100,"EGG WHITE, OMELET, SCRAMBLED, OR FRIED, W/CHEESE, FAT ADDED","Egg white, omelet, scrambled, or fried, with cheese, fat added in cooking" +32400110,"EGG WHITE, OMELET, SCRAMBLED, OR FRIED, W/ CHEESE, FAT NOT A","Egg white, omelet, scrambled, or fried, with cheese, fat not added in cooking" +32400120,"EGG WHITE, OMELET, SCRAMBLED, OR FRIED, WITH CHEESE, NS FAT","Egg white, omelet, scrambled, or fried, with cheese, NS as to fat added in cooking" +32400200,"EGG WHITE, OMELET, SCRAMBLED, OR FRIED, W/ MEAT, FAT ADDED","Egg white, omelet, scrambled, or fried, with meat, fat added in cooking" +32400210,"EGG WHITE, OMELET, SCRAMBLED, OR FRIED, W/ MEAT, FAT NOT ADD","Egg white, omelet, scrambled, or fried, with meat, fat not added in cooking" +32400220,"EGG WHITE, OMELET, SCRAMBLED, OR FRIED, WITH MEAT, NS FAT","Egg white, omelet, scrambled, or fried, with meat, NS as to fat added in cooking" +32400300,"EGG WHITE, OMELET, SCRAMBLED, OR FRIED, W/ VEGS, FAT ADDED","Egg white, omelet, scrambled, or fried, with vegetables, fat added in cooking" +32400310,"EGG WHITE, OMELET, SCRAMBLED, OR FRIED, W/ VEGS, FAT NOT ADD","Egg white, omelet, scrambled, or fried, with vegetables, fat not added in cooking" +32400320,"EGG WHITE, OMELET, SCRAMBLED, OR FRIED, W/ VEGS, NS FAT","Egg white, omelet, scrambled, or fried, with vegetables, NS as to fat added in cooking" +32400400,"EGG WHITE, OMELET, SCR OR FRIED, W/ CHEESE & MEAT, FAT ADDED","Egg white, omelet, scrambled, or fried, with cheese and meat, fat added in cooking" +32400410,"EGG WHITE, OMELET, SCR OR FRIED, W/ CHEESE & MEAT, FAT NOT A","Egg white, omelet, scrambled, or fried, with cheese and meat, fat not added in cooking" +32400420,"EGG WHITE, OMELET, SCR OR FRIED, W/ CHEESE & MEAT, NS FAT AD","Egg white, omelet, scrambled, or fried, with cheese and meat, NS as to fat added in cooking" +32400500,"EGG WHITE, OMELET, SCR OR FRIED, W/CHEESE & VEG, FAT ADDED","Egg white, omelet, scrambled, or fried, with cheese and vegetables, fat added in cooking" +32400510,"EGG WHITE, OMELET, SCR OR FRIED, W/CHEESE & VEG, FAT NOT ADD","Egg white, omelet, scrambled, or fried, with cheese and vegetables, fat not added in cooking" +32400520,"EGG WHITE, OMELET, SCR OR FRIED, W/CHEESE & VEG, NS FAT ADDE","Egg white, omelet, scrambled, or fried, with cheese and vegetables, NS as to fat added in cooking" +32400600,"EGG WHITE, OMELET, SCR OR FRIED, W/MEAT & VEG, FAT ADDED","Egg white, omelet, scrambled, or fried, with meat and vegetables, fat added in cooking" +32400610,"EGG WHITE, OMELET, SCR OR FRIED, W/MEAT & VEG, FAT NOT ADDED","Egg white, omelet, scrambled, or fried, with meat and vegetables, fat not added in cooking" +32400620,"EGG WHITE, OMELET, SCR OR FRIED, W/MEAT & VEG, NS FAT ADDED","Egg white, omelet, scrambled, or fried, with meat and vegetables, NS as to fat added in cooking" +32400700,"EGG WHITE,OMELET,SCR OR FRIED,W/CHEESE, MEAT&VEG,FAT ADDED","Egg white, omelet, scrambled, or fried, with cheese, meat, and vegetables, fat added in cooking" +32400710,"EGG WHITE,OMELET,SCR OR FRIED,W/CHEESE, MEAT&VEG,FAT NOT ADD","Egg white, omelet, scrambled, or fried, with cheese, meat, and vegetables, fat not added in cooking" +32400720,"EGG WHITE,OMELET,SCR OR FRIED,W/CHEESE, MEAT&VEG,NS FAT ADDE","Egg white, omelet, scrambled, or fried, with cheese, meat, and vegetables, NS as to fat added in cooking" +32401000,"MERINGUES","Meringues" +33001000,"EGG SUB, OMELET, SCR, OR FRIED, MADE W/ MARGARINE","Egg substitute, omelet, scrambled, or fried, made with margarine" +33001010,"EGG SUB, OMELET, SCR, OR FRIED, MADE W/ OIL","Egg substitute, omelet, scrambled, or fried, made with oil" +33001020,"EGG SUB, OMELET, SCR, OR FRIED, MADE W/ BUTTER","Egg substitute, omelet, scrambled, or fried, made with butter" +33001040,"EGG SUB, OMELET, SCR, OR FRIED, MADE W/ COOKING SPRAY","Egg substitute, omelet, scrambled, or fried, made with cooking spray" +33001050,"EGG SUB, OMELET, SCR, OR FRIED, MADE WO/ FAT","Egg substitute, omelet, scrambled, or fried, made without fat" +33001100,"EGG SUBSTITUTE, CHSE FLAV,OMELET,SCRM,FRIED,FAT ADDED","Egg substitute, cheese flavored, omelet, scrambled, or fried, fat added in cooking" +33001110,"EGG SUBSTITUTE, CHSE FLAV,OMELET,SCRM,FRIED,NO FAT","Egg substitute, cheese flavored, omelet, scrambled, or fried, fat not added in cooking" +33001120,"EGG SUBSTITUTE, CHSE FLAV,OMELET,SCRM,FRIED,NS AS TO FAT","Egg substitute, cheese flavored, omelet, scrambled, or fried, NS as to fat added in cooking" +33001200,"EGG SUBSTITUTE, VEG FLAV,OMELET,SCRM,FRIED,FAT ADDED","Egg substitute, vegetable flavored, omelet, scrambled, or fried, fat added in cooking" +33001210,"EGG SUBSTITUTE, VEG FLAV,OMELET,SCRM,FRIED,NO FAT","Egg substitute, vegetable flavored, omelet, scrambled, or fried, fat not added in cooking" +33001220,"EGG SUBSTITUTE, VEG FLAV,OMELET,SCRM,FRIED,NS AS TO FAT","Egg substitute, vegetable flavored, omelet, scrambled, or fried, NS as to fat added in cooking" +33401000,"EGG SUB, OMELET, SCR, OR FRIED, W/ CHEESE, FAT ADDED","Egg substitute, omelet, scrambled, or fried, with cheese, fat added in cooking" +33401010,"EGG SUB, OMELET, SCR, OR FRIED, W/ CHEESE, FAT NOT ADDED","Egg substitute, omelet, scrambled, or fried, with cheese, fat not added in cooking" +33401020,"EGG SUB, OMELET, SCR, OR FRIED, W/ CHEESE, NS FAT ADDED","Egg substitute, omelet, scrambled, or fried, with cheese, NS as to fat added in cooking" +33401100,"EGG SUB, OMELET, SCR, OR FRIED, W/ MEAT, FAT ADDED","Egg substitute, omelet, scrambled, or fried, with meat, fat added in cooking" +33401110,"EGG SUB, OMELET, SCR, OR FRIED, W/ MEAT, FAT NOT ADDED","Egg substitute, omelet, scrambled, or fried, with meat, fat not added in cooking" +33401120,"EGG SUB, OMELET, SCR, OR FRIED, W/ MEAT, NS FAT ADDED","Egg substitute, omelet, scrambled, or fried, with meat, NS as to fat added in cooking" +33401200,"EGG SUB, OMELET, SCR OR FRIED, W/VEGS, FAT ADDED IN COOKING","Egg substitute, omelet, scrambled, or fried, with vegetables, fat added in cooking" +33401210,"EGG SUB, OMELET, SCR OR FRIED, W/VEGS, FAT NOT ADDED IN COOK","Egg substitute, omelet, scrambled, or fried, with vegetables, fat not added in cooking" +33401220,"EGG SUB, OMELET, SCR OR FRIED, W/VEGS, NS FAT ADDED IN COOKI","Egg substitute, omelet, scrambled, or fried, with vegetables, NS as to fat added in cooking" +33401300,"EGG SUB, OMELET, SCR OR FRIED, W/CHEESE&MEAT, FAT ADDED","Egg substitute, omelet, scrambled, or fried, with cheese and meat, fat added in cooking" +33401310,"EGG SUB, OMELET, SCR OR FRIED, W/CHEESE&MEAT, FAT NOT ADDED","Egg substitute, omelet, scrambled, or fried, with cheese and meat, fat not added in cooking" +33401320,"EGG SUB, OMELET, SCR OR FRIED, W/CHEESE&MEAT, NS FAT ADDED","Egg substitute, omelet, scrambled, or fried, with cheese and meat, NS as to fat added in cooking" +33401400,"EGG SUB, OMELET, SCR OR FRIED, W/CHEESE & VEG, FAT ADDED","Egg substitute, omelet, scrambled, or fried, with cheese and vegetables, fat added in cooking" +33401410,"EGG SUB, OMELET, SCR OR FRIED, W/CHEESE & VEG, FAT NOT ADDED","Egg substitute, omelet, scrambled, or fried, with cheese and vegetables, fat not added in cooking" +33401420,"EGG SUB, OMELET, SCR OR FRIED, W/CHEESE & VEG, NS FAT ADDED","Egg substitute, omelet, scrambled, or fried, with cheese and vegetables, NS as to fat added in cooking" +33401500,"EGG SUB, OMELET, SCR OR FRIED, W/ MEAT & VEG, FAT ADDED","Egg substitute, omelet, scrambled, or fried, with meat and vegetables, fat added in cooking" +33401510,"EGG SUB, OMELET, SCR OR FRIED, W/ MEAT & VEG, FAT NOT ADDED","Egg substitute, omelet, scrambled, or fried, with meat and vegetables, fat not added in cooking" +33401520,"EGG SUB, OMELET, SCR OR FRIED, W/ MEAT & VEG, NS FAT ADDED","Egg substitute, omelet, scrambled, or fried, with meat and vegetables, NS as to fat added in cooking" +33401600,"EGG SUB, OMELET, SCR OR FRIED, W/CHEESE,MEAT&VEG,FAT ADDED","Egg substitute, omelet, scrambled, or fried, with cheese, meat, and vegetables, fat added in cooking" +33401610,"EGG SUB, OMELET, SCR OR FRIED, W/CHEESE,MEAT&VEG,FAT NOT ADD","Egg substitute, omelet, scrambled, or fried, with cheese, meat, and vegetables, fat not added in cooking" +33401620,"EGG SUB, OMELET, SCR OR FRIED, W/CHEESE,MEAT&VEG,NS FAT ADDE","Egg substitute, omelet, scrambled, or fried, with cheese, meat, and vegetables, NS as to fat added in cooking" +41101000,"BEANS, DRY, COOKED, NS AS TO TYPE, NS ADDED FAT","Beans, dry, cooked, NS as to type and as to fat added in cooking" +41101010,"BEANS, DRY, COOKED, NS AS TO TYPE, ADDED FAT","Beans, dry, cooked, NS as to type, fat added in cooking" +41101020,"BEANS, DRY, COOKED, NS AS TO TYPE, NO FAT ADDED","Beans, dry, cooked, NS as to type, fat not added in cooking" +41101100,"WHITE BEAN, DRY, COOKED, NS AS TO ADDED FAT","White beans, dry, cooked, NS as to fat added in cooking" +41101110,"WHITE BEAN, DRY, COOKED, FAT ADDED","White beans, dry, cooked, fat added in cooking" +41101120,"WHITE BEAN, DRY, COOKED, NO FAT ADDED","White beans, dry, cooked, fat not added in cooking" +41101200,"WHITE BEANS, CANNED, LOW SODIUM, NS AS TO FAT ADDED","White beans, canned, low sodium, NS as to fat added in cooking" +41101210,"WHITE BEANS, CANNED, LOW SODIUM, FAT ADDED IN COOKING","White beans, canned, low sodium, fat added in cooking" +41101220,"WHITE BEANS, CANNED, LOW SODIUM, FAT NOT ADDED IN COOKING","White beans, canned, low sodium, fat not added in cooking" +41102000,"BLACK, BROWN OR BAYO BEAN, DRY, COOKED, FAT NS","Black, brown, or Bayo beans, dry, cooked, NS as to fat added in cooking" +41102010,"BLACK, BROWN OR BAYO BEAN, DRY, COOKED, FAT ADDED","Black, brown, or Bayo beans, dry, cooked, fat added in cooking" +41102020,"BLACK, BROWN OR BAYO BEAN, DRY, COOKED, NO FAT","Black, brown, or Bayo beans, dry, cooked, fat not added in cooking" +41102100,"BLACK, BROWN, OR BAYO BEANS, CANNED, LOW SODIUM, NS FAT","Black, brown, or Bayo beans, canned, low sodium, NS as to fat added in cooking" +41102110,"BLACK, BROWN, OR BAYO BEANS, CANNED, LOW SODIUM, FAT ADDED","Black, brown, or Bayo beans, canned, low sodium, fat added in cooking" +41102120,"BLACK, BROWN, OR BAYO BEANS, CANNED, LOW SODIUM, NO FAT","Black, brown, or Bayo beans, canned, low sodium, fat not added in cooking" +41102200,"FAVA BEANS, COOKED, NS AS TO ADDED FAT","Fava beans, cooked, NS as to fat added in cooking" +41102210,"FAVA BEANS, COOKED, FAT ADDED","Fava beans, cooked, fat added in cooking" +41102220,"FAVA BEANS, COOKED, NO FAT ADDED","Fava beans, cooked, fat not added in cooking" +41103000,"LIMA BEANS, DRY, COOKED, NS AS TO ADDED FAT","Lima beans, dry, cooked, NS as to fat added in cooking" +41103010,"LIMA BEANS, DRY, COOKED, FAT ADDED","Lima beans, dry, cooked, fat added in cooking" +41103020,"LIMA BEANS, DRY, COOKED, NO FAT ADDED","Lima beans, dry, cooked, fat not added in cooking" +41103050,"PINK BEANS, DRY, COOKED, NS AS TO FAT","Pink beans, dry, cooked, NS as to fat added in cooking" +41103060,"PINK BEANS, DRY, COOKED, NO FAT ADDED","Pink beans, dry, cooked, fat not added in cooking" +41103070,"PINK BEANS, DRY, COOKED, FAT ADDED","Pink beans, dry, cooked, fat added in cooking" +41104000,"PINTO, CALICO/RED/MEX BEAN, DRY, COOKED, FAT NS","Pinto, calico, or red Mexican beans, dry, cooked, NS as to fat added in cooking" +41104010,"PINTO, CALICO/RED/MEX BEAN, DRY, COOKED, FAT ADDED","Pinto, calico, or red Mexican beans, dry, cooked, fat added in cooking" +41104020,"PINTO, CALICO/RED/MEX BEAN, DRY, COOKED, NO FAT","Pinto, calico, or red Mexican beans, dry, cooked, fat not added in cooking" +41104100,"PINTO,CALICO, RED MEXICAN BEANS, CANNED, LOW SODIUM, NS FAT","Pinto, calico, or red Mexican beans, canned, low sodium, NS as to fat added in cooking" +41104110,"PINTO,CALICO,RED MEX BEANS, CANNED, LOW SODIUM, FAT ADDED","Pinto, calico, or red Mexican beans, canned, low sodium, fat added in cooking" +41104120,"PINTO,CALICO,RED MEXICAN BEANS, CANNED, LOW SODIUM, NO FAT","Pinto, calico, or red Mexican beans, canned, low sodium, fat not added in cooking" +41106000,"RED KIDNEY BEANS, DRY, COOKED, NS AS TO ADDED FAT","Red kidney beans, dry, cooked, NS as to fat added in cooking" +41106010,"RED KIDNEY BEANS, DRY, COOKED, FAT ADDED","Red kidney beans, dry, cooked, fat added in cooking" +41106020,"RED KIDNEY BEANS, DRY, COOKED, NO FAT ADDED","Red kidney beans, dry, cooked, fat not added in cooking" +41106100,"RED KIDNEY BEANS, CANNED, LOW SODIUM, FAT ADDED IN COOKING","Red kidney beans, canned, low sodium, NS as to fat added in cooking" +41106110,"RED KIDNEY BEANS, CANNED, LOW SODIUM, FAT ADDED IN COOKING","Red kidney beans, canned, low sodium, fat added in cooking" +41106120,"RED KIDNEY BEANS, CANNED, LOW SODIUM, FAT NOT ADDED","Red kidney beans, canned, low sodium, fat not added in cooking" +41107000,"SOYBEANS, COOKED, FAT NOT ADDED","Soybeans, cooked, fat not added in cooking" +41108000,"MUNG BEANS, NO FAT ADDED","Mung beans, fat not added in cooking" +41108010,"MUNG BEANS, FAT ADDED","Mung beans, fat added in cooking" +41108020,"MUNG BEANS, NS AS TO FAT ADDED","Mung beans, NS as to fat added in cooking" +41109000,"MUNGO BEANS, COOKED, NO FAT ADDED","Mungo beans, cooked, fat not added in cooking" +41201010,"BAKED BEANS, NFS","Baked beans, NFS" +41201020,"BAKED BEANS, VEGETARIAN","Baked beans, vegetarian" +41202020,"CHILI BEANS, BARBECUE BEANS, RANCH OR MEXICAN STYLE","Chili beans, barbecue beans, ranch style beans or Mexican- style beans" +41202500,"BEANS AND TOMATOES, NS AS TO FAT ADDED","Beans and tomatoes, NS as to fat added in cooking" +41202505,"BEANS AND TOMATOES, FAT NOT ADDED","Beans and tomatoes, fat not added in cooking" +41202510,"BEANS AND TOMATOES, FAT ADDED","Beans and tomatoes, fat added in cooking" +41203030,"BLACK BEAN SALAD","Black bean salad" +41204020,"BOSTON BAKED BEANS","Boston baked beans" +41205010,"REFRIED BEANS","Refried beans" +41205015,"REFRIED BEANS, FAT NOT ADDED IN COOKING","Refried beans, fat not added in cooking" +41205020,"REFRIED BEANS W/ CHEESE","Refried beans with cheese" +41205030,"REFRIED BEANS W/ MEAT","Refried beans with meat" +41205040,"REFRIED BEANS, CANNED, LOW SODIUM","Refried beans, canned, low sodium" +41205050,"BEAN DIP, W/ REFRIED BEANS","Bean dip, made with refried beans" +41205070,"HUMMUS","Hummus" +41205100,"BLACK BEAN SAUCE","Black bean sauce" +41206030,"BEANS & FRANKS","Beans and franks" +41207030,"BEANS, DRY, COOKED, W/ GROUND BEEF","Beans, dry, cooked with ground beef" +41208030,"PORK & BEANS","Pork and beans" +41208100,"BEANS, DRY, COOKED, W/ PORK","Beans, dry, cooked with pork" +41209000,"FALAFEL","Falafel" +41210000,"BEAN CAKE, JAPANESE STYLE","Bean cake" +41210090,"STEWED BEANS W/ PORK, TOMATOES, & CHILI PEPPERS, MEXICAN","Stewed beans with pork, tomatoes, and chili peppers, Mexican style (Frijoles a la charra)" +41210100,"STEWED RED BEANS, P.R.","Stewed red beans, Puerto Rican style (Habichuelas coloradas guisadas)" +41210110,"STEWED DRY LIMA BEANS, P.R","Stewed dry lima beans, Puerto Rican style" +41210120,"STEWED WHITE BEANS, P.R.","Stewed white beans, Puerto Rican style" +41210150,"STEWED PINK BEANS W/ WHITE POTATOES & HAM, P.R.","Stewed pink beans with white potatoes and ham, Puerto Rican style" +41210160,"STEWED PINK BEANS W/ PIG'S FEET, P.R","Stewed pink beans with pig's feet, Puerto Rican style" +41210170,"STEWED RED BEANS W/ PIG'S FEET, P.R.","Stewed red beans with pig's feet, Puerto Rican style" +41210180,"STEWED WHITE BEANS W/ PIG'S FEET, P.R.","Stewed white beans with pig's feet, Puerto Rican style" +41210190,"STEWED RED BEANS W/ PIGS FEET & POTATO, P.R.","Stewed red beans with pig's feet and potatoes, Puerto Rican style" +41210200,"BLACK BEANS, CUBAN","Black beans, Cuban style (Habichuelas negras guisadas a la Cubana)" +41221010,"BAKED BEANS, LOW SODIUM","Baked beans, low sodium" +41221020,"CHILI WITH BEANS, WITHOUT MEAT","Chili with beans, without meat" +41301000,"COWPEAS, DRY, COOKED, NS AS TO ADDED FAT","Cowpeas, dry, cooked, NS as to fat added in cooking" +41301010,"COWPEAS, DRY, COOKED, FAT ADDED","Cowpeas, dry, cooked, fat added in cooking" +41301020,"COWPEAS, DRY, COOKED, NO FAT ADDED","Cowpeas, dry, cooked, fat not added in cooking" +41302000,"CHICKPEAS, DRY, COOKED, NS AS TO ADDED FAT","Chickpeas, dry, cooked, NS as to fat added in cooking" +41302010,"CHICKPEAS, DRY, COOKED, FAT ADDED","Chickpeas, dry, cooked, fat added in cooking" +41302020,"CHICKPEAS, DRY, COOKED, NO FAT ADDED","Chickpeas, dry, cooked, fat not added in cooking" +41302100,"CHICKPEAS, CANNED, LOW SODIUM, NS AS TO FAT ADDED IN COOKING","Chickpeas, canned, low sodium, NS as to fat added in cooking" +41302110,"CHICKPEAS, CANNED, LOW SODIUM, FAT ADDED IN COOKING","Chickpeas, canned, low sodium, fat added in cooking" +41302120,"CHICKPEAS, CANNED, LOW SODIUM, FAT NOT ADDED IN COOKING","Chickpeas, canned, low sodium, fat not added in cooking" +41303000,"GREEN/YELLOW SPLIT PEAS, DRY, COOKED, NO FAT ADDED","Green or yellow split peas, dry, cooked, fat not added in cooking" +41303010,"GREEN OR YELLOW SPLIT PEAS, DRY, COOKED, FAT ADDED","Green or yellow split peas, dry, cooked, fat added in cooking" +41303020,"SPLIT PEAS, DRY, COOKED, NS AS TO ADDED FAT","Green or yellow split peas, dry, cooked, NS as to fat added in cooking" +41303500,"STEWED GREEN PEAS, PUERTO RICAN STYLE","Stewed green peas, Puerto Rican style" +41303550,"STEWED GREEN PEAS, W/ PIG'S FEET & POTATO, P.R.","Stewed green peas with pig's feet and potatoes, Puerto Rican style" +41304000,"WASABI PEAS","Wasabi peas" +41304030,"PEAS, DRY, COOKED W/ PORK","Peas, dry, cooked with pork" +41304130,"COWPEAS, DRY, COOKED W/ PORK","Cowpeas, dry, cooked with pork" +41304980,"LENTILS, DRY, COOKED, NS AS TO ADDED FAT","Lentils, dry, cooked, NS as to fat added in cooking" +41304990,"LENTILS, DRY, COOKED, FAT ADDED","Lentils, dry, cooked, fat added in cooking" +41305000,"LENTILS, DRY, COOKED, NO FAT ADDED","Lentils, dry, cooked, fat not added in cooking" +41306000,"LOAF, LENTIL","Loaf, lentil" +41310100,"STEWED PIGEON PEAS, P.R.","Stewed pigeon peas, Puerto Rican style (Gandules guisados, Gandur, Gandules)" +41310150,"STEWED CHICKPEAS, P.R.","Stewed chickpeas, Puerto Rican style" +41310160,"STEWED CHICKPEAS, W/ POTATOES, P.R.","Stewed chickpeas, with potatoes, Puerto Rican style" +41310200,"CHICKPEAS STEWED W/ PIG'S FEET, P.R.","Chickpeas stewed with pig's feet, Puerto Rican style (Garbanzos guisados con patitas de cerdo)" +41310210,"CHICKPEAS, W/ SPANISH SAUSAGE, P.R.","Stewed chickpeas with Spanish sausages, Puerto Rican style (Garbanzos guisados con chorizos)" +41310220,"FRIED CHICKPEAS W/ BACON, P.R.","Fried chickpeas with bacon, Puerto Rican style (Garbanzos fritos con tocineta)" +41310310,"STEWED BLACKEYE PEAS OR COWPEAS, P.R.","Stewed blackeye peas or cowpeas, Puerto Rican style" +41311000,"PAPAD(INDIAN APPETIZER),GRILLED OR BROILED","Papad (Indian appetizer), grilled or broiled" +41410010,"SOY NUTS","Soy nuts" +41410015,"SOY CHIPS","Soy chips" +41420010,"SOYBEAN CURD","Soybean curd" +41420050,"SOYBEAN CURD CHEESE","Soybean curd cheese" +41420100,"MISO SAUCE (INCLUDES AE SAUCE)","Miso sauce" +41420110,"MISO (FERMENTED SOYBEAN PASTE)","Miso (fermented soybean paste)" +41420200,"NATTO (FERMENTED SOYBEAN PRODUCT)","Natto (fermented soybean product)" +41420250,"HOISIN SAUCE","Hoisin sauce" +41420300,"SOY SAUCE","Soy sauce" +41420350,"SOY SAUCE, REDUCED SODIUM","Soy sauce, reduced sodium" +41420380,"SOY YOGURT","Soy yogurt" +41420400,"TERIYAKI SAUCE (INCLUDE ORIENTAL BARBECUE SAUCE)","Teriyaki sauce" +41420410,"TERIYAKI SAUCE, REDUCED SODIUM","Teriyaki sauce, reduced sodium" +41420450,"WORCESTERSHIRE SAUCE","Worcestershire sauce" +41421010,"SOYBEAN CURD, DEEP-FRIED","Soybean curd, deep fried" +41421020,"SOYBEAN CURD, BREADED, FRIED","Soybean curd, breaded, fried" +41422010,"SOYBEAN MEAL","Soybean meal" +41425010,"VERMICELLI, MADE FROM SOYBEANS","Vermicelli, made from soybeans" +41440000,"TEXTURED VEGETABLE PROTEIN, DRY","Textured vegetable protein, dry" +41480000,"TOFU FROZEN DESSERT, NOT CHOCOLATE (INCL TOFUTTI)","Tofu, frozen dessert, flavors other than chocolate" +41480010,"TOFU FROZEN DESSERT, CHOCOLATE (INCLUDE TOFUTTI)","Tofu, frozen dessert, chocolate" +41601010,"BEAN SOUP, NFS","Bean soup, NFS" +41601020,"BEAN WITH BACON OR HAM SOUP, CANNED OR READY-TO-SERVE","Bean with bacon or ham soup, canned or ready-to-serve" +41601030,"BLACK BEAN SOUP, HOME RECIPE, CANNED OR READY-TO-SERVE","Black bean soup, home recipe, canned or ready-to-serve" +41601040,"LIMA BEAN SOUP, HOME RECIPE, CANNED OR READY-TO-SERVE","Lima bean soup, home recipe, canned or ready-to-serve" +41601070,"SOYBEAN SOUP, MISO BROTH","Soybean soup, miso broth" +41601080,"PINTO BEAN SOUP, HOME RECIPE, CANNED OR READY-TO-SERVE","Pinto bean soup, home recipe, canned or ready-to-serve" +41601090,"BEAN SOUP, WITH MACARONI, HOME RECIPE, CANNED, OR READY-TO-S","Bean soup, with macaroni, home recipe, canned, or ready-to-serve" +41601100,"PORTUGUESE BEAN SOUP, HOME RECIPE, CANNED OR READY-TO-SERVE","Portuguese bean soup, home recipe, canned or ready-to-serve" +41601110,"BEAN AND HAM SOUP, CHUNKY STYLE, CANNED OR READY-TO-SERVE","Bean and ham soup, chunky style, canned or ready-to-serve" +41601130,"BEAN SOUP, MIXED BEANS, HOME RECIPE, CANNED OR READY-TO-SERV","Bean soup, mixed beans, home recipe, canned or ready-to-serve" +41601140,"BEAN SOUP, HOME RECIPE","Bean soup, home recipe" +41601160,"BEAN & HAM SOUP, CAN, REDUCED SODIUM, W/ WATER/RTS","Bean and ham soup, canned, reduced sodium, prepared with water or ready-to-serve" +41601180,"BEAN & HAM SOUP, HOME RECIPE","Bean and ham soup, home recipe" +41601200,"LIQUID FROM STEWED KIDNEY BEANS, P.R.","Liquid from stewed kidney beans, Puerto Rican style" +41602010,"PEA AND HAM SOUP, CHUNKY STYLE, CANNED OR READY-TO-SERVE","Pea and ham soup, chunky style, canned or ready-to-serve" +41602020,"GARBANZO BEAN OR CHICKPEA SOUP, HOME RECIPE, CANNED OR READY","Garbanzo bean or chickpea soup, home recipe, canned or ready-to-serve" +41602030,"SPLIT PEA & HAM SOUP","Split pea and ham soup" +41602050,"SPLIT PEA SOUP","Split pea soup" +41602070,"SPLIT PEA SOUP, CAN, REDUCED SODIUM, W/ WATER/RTS","Split pea soup, canned, reduced sodium, prepared with water or ready-to-serve" +41602090,"SPLIT PEA & HAM SOUP, CAN, REDUCED SODIUM, W/ WATER/RTS","Split pea and ham soup, canned, reduced sodium, prepared with water or ready-to-serve" +41603010,"LENTIL SOUP, HOME RECIPE, CANNED, OR READY-TO-SERVE","Lentil soup, home recipe, canned, or ready-to-serve" +41610100,"WHITE BEAN SOUP, P.R.","White bean soup, Puerto Rican style (Sopon de habichuelas blancas)" +41810200,"BACON STRIP, MEATLESS","Bacon strip, meatless" +41810250,"BACON BITS, MEATLESS","Bacon bits, meatless" +41810400,"BREAKFAST LINK,PATTY,/SLICE, MEATLESS","Breakfast link, pattie, or slice, meatless" +41810600,"CHICKEN, MEATLESS NFS","Chicken, meatless, NFS" +41810610,"CHICKEN, MEATLESS, BREADED, FRIED (INCL LOMA LINDA)","Chicken, meatless, breaded, fried" +41811200,"FISH STICK, MEATLESS","Fish stick, meatless" +41811400,"FRANKFURTER OR HOT DOG, MEATLESS","Frankfurter or hot dog, meatless" +41811600,"LUNCHEON SLICE,MEATLESS-BEEF,CHICKEN,SALAM / TURKEY","Luncheon slice, meatless-beef, chicken, salami or turkey" +41811800,"MEATBALL, MEATLESS","Meatball, meatless" +41811850,"SCALLOPS, MEATLESS, BREADED, FRIED","Scallops, meatless, breaded, fried (made with meat substitute)" +41811890,"VEGETARIAN BURGER OR PATTY, MEATLESS, NO BUN","Vegetarian burger or patty, meatless, no bun" +41811950,"SWISS STEAK, W/ GRAVY, MEATLESS","Swiss steak, with gravy, meatless" +41812000,"SANDWICH SPREAD, MEAT SUBSTITUTE TYPE","Sandwich spread, meat substitute type" +41812400,"VEGETARIAN POT PIE","Vegetarian pot pie" +41812450,"VEGETARIAN CHILI (MADE W/ MEAT SUBSTITUTE)","Vegetarian chili (made with meat substitute)" +41812500,"TOFU & VEG (W/ CARROT/DK GRN, NO POTATO) W/ SOY SAUCE","Tofu and vegetables (including carrots, broccoli, and/or dark-green leafy vegetables (no potatoes)), with soy-based sauce (mixture)" +41812510,"TOFU & VEG (NO CARROT/DK GRN, NO POTATO) W/ SOY SAUCE","Tofu and vegetables (excluding carrots, broccoli, and dark-green leafy vegetables (no potatoes)), with soy-based sauce (mixture)" +41812600,"VEGETARIAN FILLET","Vegetarian, fillet" +41812800,"VEGETARIAN STEW","Vegetarian stew" +41812850,"VEGETARIAN STROGANOFF (MADE W/ MEAT SUBSTITUTE)","Vegetarian stroganoff (made with meat substitute)" +41812900,"VEGETARIAN MEAT LOAF OR PATTIES","Vegetarian meat loaf or patties (meat loaf made with meat substitute)" +41813000,"VEGATARIAN BOUILLON, DRY","Vegetarian bouillon, dry" +41901020,"SOYBURGER W/ CHEESE ON BUN","Soyburger, meatless, with cheese on bun" +42100050,"NUTS, NFS","Nuts, nfs" +42100100,"ALMONDS, NFS","Almonds, NFS" +42101000,"ALMONDS, UNROASTED","Almonds, unroasted" +42101100,"ALMONDS, ROASTED","Almonds, roasted" +42101200,"ALMONDS, DRY ROASTED","Almonds, dry roasted (assume salted)" +42101210,"ALMONDS, DRY ROASTED, W/O SALT","Almonds, dry roasted, without salt" +42101350,"ALMONDS, HONEY-ROASTED","Almonds, honey-roasted" +42102000,"BRAZIL NUTS","Brazil nuts" +42104000,"CASHEW NUTS, NFS","Cashew nuts, NFS" +42104100,"CASHEW NUTS, ROASTED","Cashew nuts, roasted (assume salted)" +42104110,"CASHEW NUTS, ROASTED, W/O SALT","Cashew nuts, roasted, without salt" +42104200,"CASHEW NUTS, DRY ROASTED","Cashew nuts, dry roasted" +42104205,"CASHEW NUTS, DRY ROASTED, WITHOUT SALT","Cashew nuts, dry roasted, without salt" +42104500,"CASHEW NUTS, HONEY-ROASTED","Cashew nuts, honey-roasted" +42105000,"CHESTNUTS, ROASTED","Chestnuts, roasted" +42106000,"COCONUT MEAT, FRESH","Coconut meat, fresh" +42106020,"COCONUT MEAT, DRIED, SWEETENED, SHREDDED","Coconut meat, dried, sweetened" +42107000,"FILBERTS, HAZELNUTS","Filberts, hazelnuts" +42109000,"MACADAMIA NUTS, UNROASTED","Macadamia nuts, unroasted" +42109100,"MACADAMIA NUTS, ROASTED","Macadamia nuts, roasted" +42110000,"MIXED NUTS, NFS","Mixed nuts, NFS" +42110100,"MIXED NUTS, ROASTED, W/ PEANUTS","Mixed nuts, roasted, with peanuts" +42110150,"MIXED NUTS, ROASTED, W/O PEANUTS","Mixed nuts, roasted, without peanuts" +42110200,"MIXED NUTS, DRY ROASTED","Mixed nuts, dry roasted" +42110300,"MIXED NUTS, HONEY-ROASTED, WITH PEANUTS","Mixed nuts, honey-roasted, with peanuts" +42111000,"PEANUTS, NFS","Peanuts, NFS" +42111030,"PEANUTS, BOILED","Peanuts, boiled" +42111100,"PEANUTS, ROASTED, SALTED","Peanuts, roasted, salted" +42111110,"PEANUTS, ROASTED, W/O SALT","Peanuts, roasted, without salt" +42111200,"PEANUTS, DRY ROASTED, SALTED","Peanuts, dry roasted, salted" +42111210,"PEANUTS, DRY ROASTED, W/O SALT","Peanuts, dry roasted, without salt" +42111500,"PEANUTS, HONEY ROASTED (INCL BEERNUTS)","Peanuts, honey-roasted" +42112000,"PECANS","Pecans" +42113000,"PINE NUTS (PIGNOLIAS)","Pine nuts (Pignolias)" +42114130,"PISTACHIO NUTS","Pistachio nuts" +42116000,"WALNUTS","Walnuts" +42116100,"WALNUTS, HONEY-ROASTED","Walnuts, honey-roasted" +42200500,"ALMOND BUTTER","Almond butter" +42200600,"ALMOND PASTE (MARZIPAN PASTE)","Almond paste (Marzipan paste)" +42201000,"CASHEW BUTTER","Cashew butter" +42202000,"PEANUT BUTTER","Peanut butter" +42202010,"PEANUT BUTTER, LOW SODIUM","Peanut butter, low sodium" +42202100,"PEANUT BUTTER, REDUCED SODIUM & REDUCED SUGAR","Peanut butter, reduced sodium and reduced sugar" +42202130,"PEANUT BUTTER, REDUCED SUGAR","Peanut butter, reduced sugar" +42202150,"PEANUT BUTTER, REDUCED FAT","Peanut butter, reduced fat" +42202200,"PEANUT BUTTER, VITAMIN & MINERAL FORTIFIED","Peanut butter, vitamin and mineral fortified" +42203000,"PEANUT BUTTER & JELLY","Peanut butter and jelly" +42204050,"PEANUT SAUCE","Peanut sauce" +42204100,"BROWN NUT GRAVY ( MEATLESS)","Brown nut gravy, meatless" +42301010,"PEANUT BUTTER SANDWICH","Peanut butter sandwich" +42302010,"PEANUT BUTTER & JELLY SANDWICH","Peanut butter and jelly sandwich" +42303010,"PEANUT BUTTER & BANANA SANDWICH","Peanut butter and banana sandwich" +42401010,"COCONUT MILK","Coconut milk (liquid expressed from grated coconut meat, water added)" +42402010,"COCONUT CREAM, CANNED, SWEETENED (INCL COCO LOPEZ)","Coconut cream (liquid expressed from grated coconut meat), canned, sweetened" +42403010,"COCONUT WATER (LIQUID FROM COCONUTS)","Coconut water (liquid from coconuts)" +42404010,"COCONUT WATER, CANNED OR BOTTLED","Coconut water, canned or bottled" +42501000,"NUT MIXTURE W/ DRIED FRUIT & SEEDS","Nut mixture with dried fruit and seeds" +42501500,"NUT MIXTURE WITH DRIED FRUIT, SEEDS, AND CHOCOLATE","Nut mixture with dried fruit, seeds, and chocolate" +42502000,"NUT MIXTURE W/ SEEDS","Nut mixture with seeds" +43101000,"PUMPKIN & SQUASH SEEDS, HULLED, UNROASTED","Pumpkin and/or squash seeds, hulled, unroasted" +43101100,"PUMPKIN & SQUASH SEEDS, HULLED, ROASTED, SALTED","Pumpkin and/or squash seeds, hulled, roasted, salted" +43101150,"PUMPKIN & SQUASH SEEDS, HULLED, ROASTED, NO SALT","Pumpkin and/or squash seeds, hulled, roasted, without salt" +43102000,"SUNFLOWER SEEDS, HULLED, UNROASTED, WITHOUT SALT","Sunflower seeds, hulled, unroasted, without salt" +43102100,"SUNFLOWER SEEDS, HULLED, ROASTED, SALTED","Sunflower seeds, hulled, roasted, salted" +43102110,"SUNFLOWER SEEDS, HULLED, ROASTED, W/O SALT","Sunflower seeds, hulled, roasted, without salt" +43102200,"SUNFLOWER SEEDS, HULLED, DRY ROASTED","Sunflower seeds, hulled, dry roasted" +43103000,"SESAME SEEDS (INCLUDE TOASTED)","Sesame seeds" +43103050,"SESAME SEEDS, WHOLE SEEDS","Sesame seeds, whole seed" +43103100,"SESAME SAUCE","Sesame sauce" +43103200,"SESAME PASTE","Sesame paste (sesame butter made from whole seeds)" +43103300,"SESAME BUTTER (TAHINI) FROM KERNELS","Sesame butter (tahini) (made from kernels)" +43104000,"FLAX SEED","Flax seeds" +43107000,"MIXED SEEDS","Mixed seeds" +44101000,"CAROB POWDER OR FLOUR","Carob powder or flour" +44201000,"CAROB CHIPS","Carob chips" +44202000,"CAROB SYRUP","Carob syrup" +50010000,"FLOUR, WHITE (INCLUDE FLOUR, NFS)","Flour, white" +50020000,"FLOUR, WHOLE WHEAT","Flour, whole wheat" +50030000,"BISCUIT MIX, DRY","Biscuit mix, dry" +51000100,"BREAD, NS AS TO MAJOR FLOUR","Bread, NS as to major flour" +51000110,"BREAD, NS AS TO MAJOR FLOUR, TOASTED","Bread, NS as to major flour, toasted" +51000180,"BREAD, HOMEMADE/PURCH AT A BAKERY, NS AS TO FLOUR","Bread, made from home recipe or purchased at a bakery, NS as to major flour" +51000190,"BREAD, HOMEMADE/PURCH AT A BAKERY, TOASTD,NS FLOUR","Bread, made from home recipe or purchased at a bakery, toasted, NS as to major flour" +51000200,"ROLL, NS AS TO MAJOR FLOUR","Roll, NS as to major flour" +51000230,"ROLL, NS AS TO MAJOR FLOUR, TOASTED","Roll, NS as to major flour, toasted" +51000250,"ROLL, HOMEMADE/PURCH AT A BAKERY, NS AS TO FLOUR","Roll, made from home recipe or purchased at a bakery, NS as to major flour" +51000260,"ROLL, HOMEMADE/PURCH AT A BAKERY, TOASTD,NS FLOUR","Roll, made from home recipe or purchased at a bakery, toasted, NS as to major flour" +51000300,"ROLL, HARD, NS AS TO MAJOR FLOUR","Roll, hard, NS as to major flour" +51000400,"ROLL, BRAN, NS AS TO TYPE OF BRAN","Roll, bran, NS as to type of bran" +51101000,"BREAD, WHITE","Bread, white" +51101010,"BREAD, WHITE, TOASTED","Bread, white, toasted" +51101050,"BREAD, WHITE, HOMEMADE OR PURCHASED AT A BAKERY","Bread, white, made from home recipe or purchased at a bakery" +51101060,"BREAD, WHITE, HOMEMADE OR PURCH AT A BAKERY TOASTED","Bread, white, made from home recipe or purchased at a bakery, toasted" +51102010,"BREAD, WHITE W/ WHOLE WHEAT SWIRL","Bread, white with whole wheat swirl" +51102020,"BREAD, WHITE W/ WHOLE WHEAT SWIRL, TOASTED","Bread, white with whole wheat swirl, toasted" +51105010,"BREAD, CUBAN (INCLUDE SPANISH, PORTUGUESE)","Bread, Cuban" +51105040,"BREAD, CUBAN, TOASTED (INCLUDE SPANISH, PORTUGUESE)","Bread, Cuban, toasted" +51106010,"BREAD, NATIVE, WATER, P.R. (PAN CRIOLLO)","Bread, Native, water, Puerto Rican style (Pan Criollo)" +51106020,"BREAD, NATIVE, WATER, P.R., TOASTED (PAN CRIOLLO)","Bread, Native, water, Puerto Rican style, toasted (Pan Criollo)" +51106200,"BREAD, LARD, P.R. (PAN DE MANTECA)","Bread, lard, Puerto Rican style (Pan de manteca)" +51106210,"BREAD, LARD, P.R., TOASTED (PAN DE MANTECA)","Bread, lard, Puerto Rican style, toasted (Pan de manteca)" +51106300,"BREAD, CARESSED, P.R. (PAN SOBAO)","Bread, caressed, Puerto Rican style (Pan sobao)" +51106310,"BREAD, CARESSED, P.R., TOASTED (PAN SOBAO)","Bread, caressed, Puerto Rican style, toasted (Pan sobao)" +51107010,"BREAD, FRENCH OR VIENNA","Bread, French or Vienna" +51107040,"BREAD, FRENCH OR VIENNA, TOASTED","Bread, French or Vienna, toasted" +51108010,"FOCACCIA, ITALIAN FLATBREAD, PLAIN","Focaccia, Italian flatbread, plain" +51108100,"NAAN, INDIAN FLATBREAD","Naan, Indian flatbread" +51109010,"BREAD, ITALIAN, GRECIAN, ARMENIAN","Bread, Italian, Grecian, Armenian" +51109040,"BREAD, ITALIAN, GRECIAN, ARMENIAN, TOASTED","Bread, Italian, Grecian, Armenian, toasted" +51109100,"BREAD, PITA","Bread, pita" +51109110,"BREAD, PITA, TOASTED","Bread, pita, toasted" +51109150,"BREAD, PITA W/ FRUIT","Bread, pita with fruit" +51109200,"BREAD, PITA W/ FRUIT, TOASTED","Bread, pita with fruit, toasted" +51111010,"BREAD, CHEESE (INCLUDE ONION CHEESE)","Bread, cheese" +51111040,"BREAD, CHEESE, TOASTED (INCLUDE ONION CHEESE)","Bread, cheese, toasted" +51113010,"BREAD, CINNAMON","Bread, cinnamon" +51113100,"BREAD, CINNAMON, TOASTED","Bread, cinnamon, toasted" +51115010,"BREAD, CORNMEAL AND MOLASSES","Bread, cornmeal and molasses" +51115020,"BREAD, CORNMEAL AND MOLASSES, TOASTED","Bread, cornmeal and molasses, toasted" +51119010,"BREAD, EGG, CHALLAH","Bread, egg, Challah" +51119040,"BREAD, EGG, CHALLAH, TOASTED","Bread, egg, Challah, toasted" +51121010,"BREAD, GARLIC","Bread, garlic" +51121110,"BREAD, ONION","Bread, onion" +51121120,"BREAD, ONION, TOASTED","Bread, onion, toasted" +51122000,"BREAD, REDUCED CALORIE/HIGH FIBER","Bread, reduced calorie and/or high fiber, white or NFS" +51122010,"BREAD, REDUCED CALORIE/HIGH FIBER, TOASTED","Bread, reduced calorie and/or high fiber, white or NFS, toasted" +51122100,"BREAD, REDUCED CALORIE/ HIGH FIBER, W/ FRUIT/NUTS","Bread, reduced calorie and/or high fiber, white or NFS, with fruit and/or nuts" +51122110,"BREAD, REDUCED CALORIE/HI FIBER, W/FRUIT/NUTS,TOAST","Bread, reduced calorie and/or high fiber, white or NFS, with fruit and/or nuts, toasted" +51122300,"BREAD, WHITE, SPECIAL FORMULA, ADDED FIBER","Bread, white, special formula, added fiber" +51122310,"BREAD,WHITE,SPECIAL FORMULA,ADDED FIBER,TOASTED","Bread, white, special formula, added fiber, toasted" +51123010,"BREAD, HIGH PROTEIN","Bread, high protein" +51123020,"BREAD, HIGH PROTEIN, TOASTED","Bread, high protein, toasted" +51127010,"BREAD, POTATO","Bread, potato" +51127020,"BREAD, POTATO, TOASTED","Bread, potato, toasted" +51129010,"BREAD, RAISIN","Bread, raisin" +51129020,"BREAD, RAISIN, TOASTED","Bread, raisin, toasted" +51130510,"BREAD, WHITE, LOW SODIUM OR NO SALT","Bread, white, low sodium or no salt" +51130520,"BREAD, WHITE, LOW SODIUM/NO SALT, TOASTED","Bread, white, low sodium or no salt, toasted" +51133010,"BREAD, SOUR DOUGH","Bread, sour dough" +51133020,"BREAD, SOUR DOUGH, TOASTED","Bread, sour dough, toasted" +51134000,"BREAD, SWEET POTATO","Bread, sweet potato" +51134010,"BREAD, SWEET POTATO, TOASTED","Bread, sweet potato, toasted" +51135000,"BREAD, VEGETABLE","Bread, vegetable" +51135010,"BREAD, VEGETABLE, TOASTED","Bread, vegetable, toasted" +51136000,"BRUSCHETTA","Bruschetta" +51140100,"BREAD DOUGH, FRIED","Bread, dough, fried" +51150000,"ROLL, WHITE, SOFT","Roll, white, soft" +51150100,"ROLL, WHITE, SOFT, TOASTED","Roll, white, soft, toasted" +51151060,"ROLL, WHITE, SOFT, HOMEMADE/PURCH AT A BAKERY","Roll, white, soft, made from home recipe or purchased at a bakery" +51152000,"ROLL, WHITE, SOFT, REDUCED CALORIE/ HIGH FIBER","Roll, white, soft, reduced calorie and/or high fiber" +51152100,"ROLL, WHITE, REDUCED CALORIE/ HIGH FIBER, TOASTED","Roll, white, soft, reduced calorie and/or high fiber, toasted" +51153000,"ROLL, WHITE, HARD","Roll, white, hard" +51153010,"ROLL, WHITE, HARD, TOASTED","Roll, white, hard, toasted" +51154510,"ROLL, DIET","Roll, diet" +51154550,"ROLL, EGG BREAD","Roll, egg bread" +51154560,"ROLL, EGG BREAD, TOASTED","Roll, egg bread, toasted" +51154600,"ROLL, CHEESE","Roll, cheese" +51155000,"ROLL, FRENCH OR VIENNA","Roll, French or Vienna" +51155010,"ROLL, FRENCH OR VIENNA, TOASTED","Roll, French or Vienna, toasted" +51156500,"ROLL, GARLIC","Roll, garlic" +51157000,"ROLL, HOAGIE, SUBMARINE,","Roll, hoagie, submarine" +51157010,"ROLL, HOAGIE, SUBMARINE, TOASTED","Roll, hoagie, submarine, toasted" +51158100,"ROLL, MEXICAN, BOLILLO","Roll, Mexican, bolillo" +51159000,"ROLL, SOUR DOUGH","Roll, sour dough" +51160000,"ROLL, SWEET, NO FROSTING","Roll, sweet, no frosting" +51160100,"ROLL, SWEET, CINNAMON BUN, NO FROSTING","Roll, sweet, cinnamon bun, no frosting" +51160110,"ROLL, SWEET, CINNAMON BUN, FROSTED","Roll, sweet, cinnamon bun, frosted" +51161000,"ROLL, SWEET, W/ FRUIT, NO FROSTING","Roll, sweet, with fruit, no frosting" +51161020,"ROLL, SWEET, W/ FRUIT, FROSTED","Roll, sweet, with fruit, frosted" +51161030,"ROLL, SWEET, W/ FRUIT, FROSTED, DIET","Roll, sweet, with fruit, frosted, diet" +51161050,"ROLL, SWEET, FROSTED","Roll, sweet, frosted" +51161250,"ROLL, SWEET, NO TOPPING, MEXICAN (PAN DULCE)","Roll, sweet, no topping, Mexican (Pan Dulce)" +51161270,"ROLL, SWEET, SUGAR TOPPING, MEXICAN (PAN DULCE)","Roll, sweet, sugar topping, Mexican (Pan Dulce)" +51161280,"ROLL,SWEET,W/ RAISINS & ICING,MEXICAN (PAN DULCE)","Roll, sweet, with raisins and icing, Mexican (Pan Dulce)" +51165000,"COFFEE CAKE, YEAST TYPE","Coffee cake, yeast type" +51166000,"CROISSANT","Croissant" +51166100,"CROISSANT, CHEESE","Croissant, cheese" +51166200,"CROISSANT, CHOCOLATE","Croissant, chocolate" +51166500,"CROISSANT, FRUIT","Croissant, fruit" +51167000,"BRIOCHE","Brioche" +51168000,"COFFEE BREAD, SPANISH","Bread, Spanish coffee" +51180010,"BAGEL","Bagel" +51180020,"BAGEL, TOASTED","Bagel, toasted" +51180030,"BAGEL, W/ RAISINS","Bagel, with raisins" +51180040,"BAGEL, W/ RAISINS, TOASTED","Bagel, with raisins, toasted" +51180080,"BAGEL W/ FRUIT OTHER THAN RAISINS","Bagel, with fruit other than raisins" +51180090,"BAGEL W/ FRUIT OTHER THAN RAISINS, TOASTED","Bagel, with fruit other than raisins, toasted" +51182010,"BREAD, STUFFING (INCLUDE HOMEMADE; STUFFING, NFS)","Bread stuffing" +51182020,"BREAD STUFFING W/ EGG","Bread stuffing made with egg" +51184000,"BREAD STICK, HARD","Bread sticks, hard" +51184010,"BREAD STICK, SOFT","Bread stick, soft" +51184020,"BREAD STICK, NS AS TO HARD OR SOFT","Bread stick, NS as to hard or soft" +51184030,"BREAD STICK, SOFT, PREP W/ GARLIC & PARMESAN CHEESE","Bread stick, soft, prepared with garlic and parmesan cheese" +51184100,"BREAD STICK, HARD, LOW SODIUM","Bread stick, hard, low sodium" +51185000,"CROUTONS","Croutons" +51186010,"MUFFIN, ENGLISH (INCLUDE SOUR DOUGH)","Muffin, English" +51186020,"MUFFIN, ENGLISH, TOASTED","Muffin, English, toasted" +51186100,"MUFFIN, ENGLISH, W/ RAISINS","Muffin, English, with raisins" +51186120,"MUFFIN, ENGLISH, W/ RAISINS, TOASTED","Muffin, English, with raisins, toasted" +51186130,"MUFFIN, ENGLISH, CHEESE","Muffin, English, cheese" +51186140,"MUFFIN, ENGLISH, CHEESE, TOASTED","Muffin, English, cheese, toasted" +51186160,"MUFFIN, ENGLISH, W/ FRUIT OTHER THAN RAISINS","Muffin, English, with fruit other than raisins" +51186180,"MUFFIN, ENGLISH, W/ FRUIT OTHER THAN RAISINS, TSTD","Muffin, English, with fruit other than raisins, toasted" +51187000,"MELBA TOAST","Melba toast" +51187020,"ANISETTE TOAST","Anisette toast" +51188100,"PANNETONE (ITALIAN-STYLE SWEET BREAD)","Pannetone (Italian-style sweet bread)" +51188500,"ZWIEBACK TOAST (INCL RUSK)","Zwieback toast" +51201010,"BREAD, 100% WHOLE WHEAT","Bread, whole wheat, 100%" +51201020,"BREAD, 100% WHOLE WHEAT, TOASTED","Bread, whole wheat, 100%, toasted" +51201060,"BREAD, 100% WHOLE WHEAT, HOME-MADE","Bread, whole wheat, 100%, made from home recipe or purchased at bakery" +51201070,"BREAD, 100% WHOLE WHEAT, HOME-MADE, TOASTED","Bread, whole wheat, 100%, made from home recipe or purchased at bakery, toasted" +51201150,"BREAD, PITA, 100% WHOLE WHEAT","Bread, pita, whole wheat, 100%" +51201160,"BREAD, PITA, 100% WHOLE WHEAT, TOASTED","Bread, pita, whole wheat, 100%, toasted" +51202000,"MUFFIN, ENGLISH, 100% WHOLE WHEAT","Muffin, English, whole wheat, 100%" +51202020,"MUFFIN, ENGLISH, 100% WHOLE WHEAT, TOASTED","Muffin, English, whole wheat, 100%, toasted" +51202050,"MUFFIN, ENGLISH, 100% WHOLE WHEAT, W/ RAISINS","Muffin, English, whole wheat, 100%, with raisins" +51202060,"MUFFIN, ENGLISH, WHOLE WHEAT, W/ RAISINS, TOASTED","Muffin, English, whole wheat, 100%, with raisins, toasted" +51207010,"BREAD, SPROUTED WHEAT","Bread, sprouted wheat" +51207020,"BREAD, SPROUTED WHEAT, TOASTED","Bread, sprouted wheat, toasted" +51208000,"BAGEL, 100% WHOLE WHEAT","Bagel, whole wheat, 100%" +51208010,"BAGEL, 100% WHOLE WHEAT, TOASTED","Bagel, whole wheat, 100%, toasted" +51208100,"BAGEL, 100% WHOLE WHEAT, W/ RAISINS","Bagel, whole wheat, 100%, with raisins" +51208110,"BAGEL, 100% WHOLE WHEAT, W/ RAISINS, TOASTED","Bagel, whole wheat, 100%, with raisins, toasted" +51220000,"ROLL, 100% WHOLE WHEAT","Roll, whole wheat, 100%" +51220010,"ROLL, 100% WHOLE WHEAT, TOASTED","Roll, whole wheat, 100%, toasted" +51220030,"ROLL, 100% WHOLE WHEAT, HOME RECIPE/BAKERY","Roll, whole wheat, 100%, made from home recipe or purchased at bakery" +51220040,"ROLL, 100% WHOLE WHEAT, HOME RECIPE/BAKERY, TOASTED","Roll, whole wheat, 100%, made from home recipe or purchased at bakery, toasted" +51300050,"BREAD, WHOLE GRAIN WHITE","Bread, whole grain white" +51300060,"BREAD, WHOLE GRAIN WHITE, TOASTED","Bread, whole grain white, toasted" +51300100,"BAGEL, WHOLE GRAIN WHITE","Bagel, whole grain white" +51300110,"BREAD, WHOLE WHEAT, NS AS TO 100%","Bread, whole wheat, NS as to 100%" +51300120,"BREAD, WHOLE WHEAT, NS AS TO 100%, TOASTED","Bread, whole wheat, NS as to 100%, toasted" +51300140,"BREAD, WHOLE WHEAT, NS AS TO 100%, MADE FROM HOME RECIPE OR","Bread, whole wheat, NS as to 100%, made from home recipe or purchased at bakery" +51300150,"BREAD, WHOLE WHEAT, NS 100%, HOME RECIPE/BAKERY, TOASTED","Bread, whole wheat, NS as to 100%, made from home recipe or purchased at bakery, toasted" +51300175,"BREAD, CHAPPATTI OR ROTI (INDIAN BREAD), WHEAT","Bread, chappatti or roti (Indian bread), wheat" +51300180,"BREAD, PURI OR POORI (INDIAN PUFFED BREAD), WHEAT","Bread, puri or poori (Indian puffed bread), wheat" +51300185,"BREAD, PARATHA, (INDIAN FLAT BREAD), WHEAT","Bread, paratha, (Indian flat bread), wheat" +51300210,"BREAD, WHOLE WHEAT, WITH RAISINS","Bread, whole wheat, with raisins" +51300220,"BREAD, WHOLE WHEAT, WITH RAISINS, TOASTED","Bread, whole wheat, with raisins, toasted" +51301010,"BREAD, WHEAT OR CRACKED WHEAT","Bread, wheat or cracked wheat" +51301020,"BREAD, WHEAT OR CRACKED WHEAT, TOASTED","Bread, wheat or cracked wheat, toasted" +51301040,"BREAD, CRACKED WHEAT, HOME RECIPE/BAKERY","Bread, wheat or cracked wheat, made from home recipe or purchased at bakery" +51301050,"BREAD, CRACKED WHEAT, HOME RECIPE/BAKERY, TOASTED","Bread, wheat or cracked wheat, made from home recipe or purchased at bakery, toasted" +51301120,"BREAD, WHEAT OR CRACKED WHEAT, W/ RAISINS","Bread, wheat or cracked wheat, with raisins" +51301130,"BREAD, WHEAT OR CRACKED WHEAT, W/ RAISINS, TOASTED","Bread, wheat or cracked wheat, with raisins, toasted" +51301510,"BREAD, CRACKED WHEAT, REDUCED CALORIE/ HIGH FIBER","Bread, wheat or cracked wheat, reduced calorie and/or high fiber" +51301520,"BREAD, CRACKED WHEAT, RED CALORIE/ HI FIBER, TOAST","Bread, wheat or cracked wheat, reduced calorie and/or high fiber, toasted" +51301540,"BREAD, FRENCH OR VIENNA, WHOLE WHEAT, NS AS TO 100%","Bread, French or Vienna, whole wheat, NS as to 100%" +51301550,"BREAD, FRENCH OR VIENNA, WHOLE WHEAT, NS AS TO 100%, TOASTED","Bread, French or Vienna, whole wheat, NS as to 100%, toasted" +51301600,"BREAD, PITA, WHOLE WHEAT, NS AS TO 100%","Bread, pita, whole wheat, NS as to 100%" +51301610,"BREAD, PITA, WHOLE WHEAT, NS AS TO 100%, TOASTED","Bread, pita, whole wheat, NS as to 100%, toasted" +51301620,"BREAD, PITA, CRACKED WHEAT","Bread, pita, wheat or cracked wheat" +51301630,"BREAD, PITA, CRACKED WHEAT, TOASTED","Bread, pita, wheat or cracked wheat, toasted" +51301700,"BAGEL, WHEAT","Bagel, wheat" +51301710,"BAGEL, WHEAT, TOASTED","Bagel, wheat, toasted" +51301750,"BAGEL, WHOLE WHEAT, NS AS TO 100%","Bagel, whole wheat, NS as to 100%" +51301760,"BAGEL, WHOLE WHEAT, NS AS TO 100%, TOASTED","Bagel, whole wheat, NS as to 100%, toasted" +51301800,"BAGEL, WHEAT, W/ RAISINS","Bagel, wheat, with raisins" +51301810,"BAGEL, WHEAT, W/ RAISINS, TOASTED","Bagel, wheat, with raisins, toasted" +51301820,"BAGEL, WHEAT, W/ FRUITS & NUTS","Bagel, wheat, with fruit and nuts" +51301830,"BAGEL, WHEAT, W/ FRUITS & NUTS, TOASTED","Bagel, wheat, with fruit and nuts, toasted" +51301900,"BAGEL, WHEAT BRAN","Bagel, wheat bran" +51301910,"BAGEL, WHEAT BRAN, TOASTED","Bagel, wheat bran, toasted" +51302500,"MUFFIN, ENGLISH, WHEAT BRAN","Muffin, English, wheat bran" +51302510,"MUFFIN, ENGLISH, WHEAT BRAN, TOASTED","Muffin, English, wheat bran, toasted" +51302520,"MUFFIN, ENGLISH, WHEAT BRAN, W/ RAISINS","Muffin, English, wheat bran, with raisins" +51302530,"MUFFIN, ENGLISH, WHEAT BRAN, W/ RAISINS, TOASTED","Muffin, English, wheat bran, with raisins, toasted" +51303010,"MUFFIN, ENGLISH, WHEAT OR CRACKED WHEAT","Muffin, English, wheat or cracked wheat" +51303020,"MUFFIN, ENGLISH, WHEAT OR CRACKED WHEAT, TOASTED","Muffin, English, wheat or cracked wheat, toasted" +51303030,"MUFFIN, ENGLISH, WHOLE WHEAT, NS AS TO 100%","Muffin, English, whole wheat, NS as to 100%" +51303040,"MUFFIN, ENGLISH, WHOLE WHEAT, NS AS TO 100%, TOASTED","Muffin, English, whole wheat, NS as to 100%, toasted" +51303050,"MUFFIN, ENGLISH, WHEAT OR CRACKED WHEAT W/ RAISINS","Muffin, English, wheat or cracked wheat, with raisins" +51303060,"MUFFIN, ENGLISH, WHEAT W/ RAISINS, TOASTED","Muffin, English, wheat or cracked wheat, with raisins, toasted" +51303070,"MUFFIN, ENGLISH, WHOLE WHEAT, NS AS TO 100%, WITH RAISINS","Muffin, English, whole wheat, NS as to 100%, with raisins" +51303080,"MUFFIN, ENGLISH, WHOLE WHEAT, NS 100%, W/RAISINS, TOASTED","Muffin, English, whole wheat, NS as to 100%, with raisins, toasted" +51306000,"BREAD STICK, HARD, WHOLE WHEAT, NS AS TO 100 %","Bread stick, hard, whole wheat, NS as to 100%" +51320010,"ROLL, WHEAT OR CRACKED WHEAT","Roll, wheat or cracked wheat" +51320020,"ROLL, WHEAT OR CRACKED WHEAT, TOASTED","Roll, wheat or cracked wheat, toasted" +51320040,"ROLL, CRACKED WHEAT, HOME RECIPE/BAKERY","Roll, wheat or cracked wheat, made from home recipe or purchased at bakery" +51320050,"ROLL, CRACKED WHEAT, HOME RECIPE/BAKERY, TOASTED","Roll, wheat or cracked wheat, made from home recipe or purchased at bakery, toasted" +51320500,"ROLL, WHOLE WHEAT, NS AS TO 100%","Roll, whole wheat, NS as to 100%" +51320510,"ROLL, WHOLE WHEAT, NS AS TO 100%, TOASTED","Roll, whole wheat, NS as to 100%, toasted" +51320530,"ROLL, WHOLE WHEAT, NS 100%, MADE FROM HOMEMADE/BAKERY","Roll, whole wheat, NS as to 100%, made from home recipe or purchased at bakery" +51320540,"ROLL, WHOLE WHEAT, NS AS TO 100%, HOMEMADE/BAKERY, TOASTED","Roll, whole wheat, NS as to 100%, made from home recipe or purchased at bakery, toasted" +51401010,"BREAD, RYE","Bread, rye" +51401020,"BREAD, RYE, TOASTED","Bread, rye, toasted" +51401030,"BREAD, MARBLE RYE & PUMPERNICKEL","Bread, marble rye and pumpernickel" +51401040,"BREAD, MARBLE RYE & PUMPERNICKEL, TOASTED","Bread, marble rye and pumpernickel, toasted" +51401060,"BREAD, RYE, REDUCED CALORIE/ HIGH FIBER (INCL LESS)","Bread, rye, reduced calorie and/or high fiber" +51401070,"BREAD, RYE, REDUCED CALORIE/ HIGH FIBER, TOASTED","Bread, rye, reduced calorie and/or high fiber, toasted" +51401200,"MUFFIN, ENGLISH, RYE","Muffin, English, rye" +51401210,"MUFFIN, ENGLISH, RYE, TOASTED","Muffin, English, rye, toasted" +51404010,"BREAD, PUMPERNICKEL","Bread, pumpernickel" +51404020,"BREAD, PUMPERNICKEL, TOASTED","Bread, pumpernickel, toasted" +51404500,"BAGEL, PUMPERNICKEL","Bagel, pumpernickel" +51404510,"BAGEL, PUMPERNICKEL, TOASTED","Bagel, pumpernickel, toasted" +51404550,"MUFFIN, ENGLISH, PUMPERNICKEL","Muffin, English, pumpernickel" +51404560,"MUFFIN, ENGLISH, PUMPERNICKEL, TOASTED","Muffin, English, pumpernickel, toasted" +51407010,"BREAD, BLACK","Bread, black" +51407020,"BREAD, BLACK, TOASTED","Bread, black, toasted" +51420000,"ROLL, RYE","Roll, rye" +51421000,"ROLL, PUMPERNICKEL","Roll, pumpernickel" +51421100,"ROLL, PUMPERNICKEL, TOASTED","Roll, pumpernickel, toasted" +51501010,"BREAD, OATMEAL","Bread, oatmeal" +51501020,"BREAD, OATMEAL, TOASTED","Bread, oatmeal, toasted" +51501040,"BREAD, OAT BRAN","Bread, oat bran" +51501050,"BREAD, OAT BRAN, TOASTED","Bread, oat bran, toasted" +51501060,"BREAD, OAT BRAN, REDUCED CALORIE/ HIGH FIBER","Bread, oat bran, reduced calorie and/or high fiber" +51501070,"BREAD, OAT BRAN REDUCED CALORIE/HI FIBER, TOASTED","Bread, oat bran, reduced calorie and/or high fiber, toasted" +51501080,"BAGEL, OAT BRAN","Bagel, oat bran" +51501090,"BAGEL, OAT BRAN, TOASTED","Bagel, oat bran, toasted" +51502010,"ROLL, OATMEAL","Roll, oatmeal" +51502020,"ROLL, OATMEAL, TOASTED","Roll, oatmeal, toasted" +51502100,"ROLL, OAT BRAN","Roll, oat bran" +51502110,"ROLL, OAT BRAN, TOASTED","Roll, oat bran, toasted" +51503000,"MUFFIN, ENGLISH, OAT BRAN","Muffin, English, oat bran" +51503010,"MUFFIN, ENGLISH, OAT BRAN, TOASTED","Muffin, English, oat bran, toasted" +51503040,"MUFFIN, ENGLISH, OAT BRAN, WITH RAISINS","Muffin, English, oat bran, with raisins" +51503050,"MUFFIN, ENGLISH, OAT BRAN, W/ RAISINS, TOASTED","Muffin, English, oat bran with raisins, toasted" +51601010,"BREAD, MULTIGRAIN, TOASTED","Bread, multigrain, toasted" +51601020,"BREAD, MULTIGRAIN","Bread, multigrain" +51601210,"BREAD, MULTIGRAIN, W/ RAISINS","Bread, multigrain, with raisins" +51601220,"BREAD, MULTIGRAIN, W/ RAISINS, TOASTED","Bread, multigrain, with raisins, toasted" +51602010,"BREAD, MULTIGRAIN, REDUCED CALORIE/ HIGH FIBER","Bread, multigrain, reduced calorie and/or high fiber" +51602020,"BREAD, MULTIGRAIN, REDUCED CALORIE/ HI FIBER, TOAST","Bread, multigrain, reduced calorie and/or high fiber, toasted" +51620000,"ROLL, MULTIGRAIN","Roll, multigrain" +51620010,"ROLL, MULTIGRAIN, TOASTED","Roll, multigrain, toasted" +51630000,"BAGEL, MULTIGRAIN","Bagel, multigrain" +51630010,"BAGEL, MULTIGRAIN, TOASTED","Bagel, multigrain, toasted" +51630100,"BAGEL, MULTIGRAIN, W/ RAISINS","Bagel, multigrain, with raisins" +51630110,"BAGEL, MULTIGRAIN, W/ RAISINS, TOASTED","Bagel, multigrain, with raisins, toasted" +51630200,"MUFFIN, ENGLISH, MULTIGRAIN","Muffin, English, multigrain" +51630210,"MUFFIN, ENGLISH, MULTIGRAIN, TOASTED","Muffin, English, multigrain, toasted" +51801010,"BREAD, BARLEY","Bread, barley" +51801020,"BREAD, BARLEY, TOASTED","Bread, barley, toasted" +51804010,"BREAD, SOY","Bread, soy" +51804020,"BREAD, SOY, TOASTED","Bread, soy, toasted" +51805010,"BREAD, SUNFLOWER MEAL","Bread, sunflower meal" +51805020,"BREAD, SUNFLOWER MEAL, TOASTED","Bread, sunflower meal, toasted" +51806010,"BREAD, RICE","Bread, rice" +51806020,"BREAD, RICE, TOASTED","Bread, rice, toasted" +51807000,"INJERA (AMERICAN-STYLE ETHIOPIAN BREAD)","Injera (American-style Ethiopian bread)" +51808000,"BREAD, LOW GLUTEN","Bread, low gluten" +51808010,"BREAD, LOW GLUTEN, TOASTED","Bread, low gluten, toasted" +52101000,"BISCUIT, BAKING POWDER OR BUTTERMILK TYPE, NFS","Biscuit, baking powder or buttermilk type, NS as to made from mix, refrigerated dough, or home recipe" +52101030,"BISCUIT DOUGH, FRIED","Biscuit dough, fried" +52101040,"CRUMPET","Crumpet" +52101050,"CRUMPET, TOASTED","Crumpet, toasted" +52101100,"BISCUIT, BAKING POWDER OR BUTTERMILK, FROM MIX","Biscuit, baking powder or buttermilk type, made from mix" +52101150,"BISCUIT,BAKING PWR/BUTTER MILK,REFRIG DOUGH,LOWFAT","Biscuit, baking powder or buttermilk type, made from refrigerated dough, lowfat" +52102040,"BISCUIT, BAK POWDER OR BUTTERMILK, FROM REFRG DOUGH","Biscuit, baking powder or buttermilk type, made from refrigerated dough" +52103000,"BISCUIT, BAKING POWDER/BUTTERMILK TYPE, COMMERCIALLY BAKED","Biscuit, baking powder or buttermilk type, commercially baked" +52104010,"BISCUIT, BAKING POWDER OR BUTTERMILK, HOMEMADE","Biscuit, baking powder or buttermilk type, made from home recipe" +52104040,"BISCUIT, WHOLE WHEAT","Biscuit, whole wheat" +52104100,"BISCUIT, CHEESE","Biscuit, cheese" +52104200,"BISCUIT, CINNAMON-RAISIN","Biscuit, cinnamon-raisin" +52105100,"SCONES","Scone" +52105110,"SCONES, WHOLE WHEAT","Scone, whole wheat" +52105200,"SCONE, WITH FRUIT","Scone, with fruit" +52201000,"CORNBREAD, PREPARED FROM MIX","Cornbread, prepared from mix" +52202060,"CORNBREAD, HOMEMADE","Cornbread, made from home recipe" +52204000,"CORNBREAD STUFFING","Cornbread stuffing" +52206010,"CORNBREAD MUFFIN, STICK, ROUND","Cornbread muffin, stick, round" +52206060,"CORNBREAD MUFFIN, STICK, ROUND, HOMEMADE","Cornbread muffin, stick, round, made from home recipe" +52207010,"CORN FLOUR PATTIES OR TARTS, FRIED","Corn flour patty or tart, fried" +52208010,"CORN PONE,BAKED (INCL HOE CAKE)","Corn pone, baked" +52208020,"CORN PONE FRIED","Corn pone, fried" +52208760,"GORDITA/SOPE SHELL, PLAIN, NO FILLING","Gordita/sope shell, plain, no filling" +52209010,"HUSH PUPPY","Hush puppy" +52211010,"JOHNNYCAKE","Johnnycake" +52213010,"SPOONBREAD","Spoonbread" +52215000,"TORTILLA, NFS","Tortilla, NFS" +52215100,"TORTILLA, CORN","Tortilla, corn" +52215200,"TORTILLA, FLOUR (WHEAT)","Tortilla, flour (wheat)" +52215260,"TORTILLA, WHOLE WHEAT","Tortilla, whole wheat" +52215300,"TACO SHELL, CORN","Taco shell, corn" +52215350,"TACO SHELL; FLOUR","Taco shell, flour" +52220110,"CORNMEAL BREAD, DOMINICAN","Cornmeal bread, Dominican style (Arepa Dominicana)" +52301000,"MUFFIN, NFS","Muffin, NFS" +52302010,"MUFFIN, FRUIT","Muffin, fruit" +52302020,"MUFFIN, FRUIT, LOW FAT","Muffin, fruit, low fat" +52302500,"MUFFIN, CHOCOLATE CHIP","Muffin, chocolate chip" +52302600,"MUFFIN, CHOCOLATE","Muffin, chocolate" +52303010,"MUFFIN, WHOLE WHEAT","Muffin, whole wheat" +52303500,"MUFFIN, WHEAT","Muffin, wheat" +52304000,"MUFFIN, WHOLE GRAIN","Muffin, whole grain" +52304010,"MUFFIN, WHEAT BRAN (INCLUDE W/ RAISINS & NUTS)","Muffin, wheat bran" +52304040,"MUFFIN,BRAN,W/ FRUIT, LOWFAT","Muffin, bran with fruit, lowfat" +52304100,"MUFFIN, OATMEAL","Muffin, oatmeal" +52304150,"MUFFIN, OAT BRAN","Muffin, oat bran" +52306010,"MUFFIN, PLAIN","Muffin, plain" +52306300,"MUFFIN, CHEESE","Muffin, cheese" +52306500,"MUFFIN, PUMPKIN, W/ RAISINS","Muffin, pumpkin" +52306550,"MUFFIN, ZUCCHINI","Muffin, zucchini" +52306700,"MUFFIN, CARROT (INCL W/ RAISINS/NUTS)","Muffin, carrot" +52311010,"POPOVER","Popover" +52401000,"BREAD, BOSTON BROWN","Bread, Boston Brown" +52403000,"BREAD, NUT","Bread, nut" +52404060,"BREAD, PUMPKIN (INCLUDE W/ RAISINS)","Bread, pumpkin" +52405010,"BREAD, FRUIT","Bread, fruit" +52407000,"BREAD, ZUCCHINI (INCL SQUASH BREAD; W/ NUTS)","Bread, zucchini" +52408000,"BREAD, IRISH SODA","Bread, Irish soda" +53100050,"CAKE, BATTER, CHOCOLATE, RAW","Cake batter, raw, chocolate" +53100070,"CAKE, BATTER, RAW, NOT CHOCOLATE","Cake batter, raw, not chocolate" +53100100,"CAKE OR CUPCAKE, NS AS TO TYPE","Cake or cupcake, NS as to type" +53101100,"CAKE, ANGEL FOOD, W/O ICING","Cake, angel food, without icing or filling" +53101200,"CAKE, ANGEL FOOD, W/ ICING","Cake, angel food, with icing or filling" +53101250,"CAKE, ANGEL FOOD, W/ FRUIT & ICING/FILLING","Cake, angel food, with fruit and icing or filling" +53102100,"CAKE OR CUPCAKE,APPLESAUCE W/O ICING","Cake or cupcake, applesauce, without icing or filling" +53102200,"CAKE OR CUPCAKE,APPLESAUCE W/ ICING","Cake or cupcake, applesauce, with icing or filling" +53102600,"CAKE OR CUPCAKE,BANANA, W/O ICING","Cake or cupcake, banana, without icing or filling" +53102700,"CAKE OR CUPCAKE,BANANA, W/ ICING","Cake or cupcake, banana, with icing or filling" +53102800,"CAKE OR CUPCAKE,BLACK FOREST (CHOC-CHERRY)","Cake or cupcake, black forest (chocolate-cherry)" +53103000,"CAKE, BOSTON CREAM PIE","Cake, Boston cream pie" +53104100,"CAKE OR CUPCAKE,CARROT, NO ICING","Cake or cupcake, carrot, without icing or filling" +53104260,"CAKE OR CUPCAKE, CARROT, WITH ICING","Cake or cupcake, carrot, with icing or filling" +53104300,"CARROT CAKE, DIET","Cake, carrot, diet" +53104400,"CAKE OR CUPCAKE,COCONUT, W/ ICING","Cake or cupcake, coconut, with icing or filling" +53104500,"CHEESECAKE","Cheesecake" +53104550,"CHEESECAKE, W/ FRUIT","Cheesecake with fruit" +53104600,"CHEESECAKE, CHOCOLATE","Cheesecake, chocolate" +53105270,"CAKE OR CUPCAKE, CHOC, DEVIL'S FOOD OR FUDGE, W/ICING /FILL","Cake or cupcake, chocolate, devil's food or fudge, with icing or filling" +53105275,"CAKE OR CUPCAKE, CHOCOLATE, DEVIL'S FOOD OR FUDGE, W/O ICING","Cake or cupcake, chocolate, devil's food or fudge, without icing or filling" +53105300,"CAKE OR CUPCAKE,GERMAN CHOC, W/ ICING or FILLING","Cake or cupcake, German chocolate, with icing or filling" +53105500,"CAKE, CHOC, W/ ICING, DIET","Cake, chocolate, with icing, diet" +53106500,"CAKE, CREAM, W/O ICING OR TOPPING","Cake, cream, without icing or topping" +53108200,"SNACK CAKE, CHOCOLATE, WITH ICING OR FILLING","Snack cake, chocolate, with icing or filling" +53108220,"SNACK CAKE, CHOC, W/ICING OR FILLING, REDUCED FAT&CALORIE","Snack cake, chocolate, with icing or filling, reduced fat and calories" +53109200,"SNACK CAKE, NOT CHOCOLATE, WITH ICING OR FILLING","Snack cake, not chocolate, with icing or filling" +53109220,"SNACK CAKE, NOT CHOC, W/ ICING OR FILLING, RED FAT&CALS","Snack cake, not chocolate, with icing or filling, reduced fat and calories" +53109300,"CAKE,DOBOS TORTE(NON-CHOC CAKE W/CHOC FILL & ICING)","Cake, Dobos Torte (non-chocolate layer cake with chocolate filling and icing)" +53110000,"CAKE, FRUITCAKE, LIGHT/DARK, HOLIDAY TYPE CAKE","Cake, fruit cake, light or dark, holiday type cake" +53111000,"CAKE OR CUPCAKE, GINGERBREAD","Cake or cupcake, gingerbread" +53112000,"CAKE, ICE CREAM & CAKE ROLL, CHOCOLATE","Cake, ice cream and cake roll, chocolate" +53112100,"CAKE, ICE CREAM & CAKE ROLL, NOT CHOCOLATE","Cake, ice cream and cake roll, not chocolate" +53113000,"CAKE, JELLY ROLL","Cake, jelly roll" +53114000,"CAKE OR CUPCAKE,LEMON, W/O ICING","Cake or cupcake, lemon, without icing or filling" +53114100,"CAKE OR CUPCAKE,LEMON, W/ ICING","Cake or cupcake, lemon, with icing or filling" +53115100,"CAKE OR CUPCAKE, MARBLE, W/O ICING OR FILLING","Cake or cupcake, marble, without icing or filling" +53115200,"CAKE OR CUPCAKE, MARBLE, WITH ICING OR FILLING","Cake or cupcake, marble, with icing or filling" +53115310,"CAKE OR CUPCAKE,NUT, W/O ICING","Cake or cupcake, nut, without icing or filling" +53115320,"CAKE OR CUPCAKE,NUT, W/ ICING","Cake or cupcake, nut, with icing or filling" +53115410,"CAKE OR CUPCAKE, OATMEAL","Cake or cupcake, oatmeal" +53115450,"CAKE OR CUPCAKE, PEANUT BUTTER","Cake or cupcake, peanut butter" +53116000,"CAKE, POUND, W/O ICING","Cake, pound, without icing or filling" +53116020,"CAKE, POUND, W/ ICING","Cake, pound, with icing or filling" +53116270,"CAKE, POUND, CHOCOLATE","Cake, pound, chocolate" +53116350,"CAKE, POUND, P.R. (PONQUE)","Cake, pound, Puerto Rican style (Ponque)" +53116390,"CAKE, POUND, REDUCED FAT, NO CHOLESTEROL","Cake, pound, reduced fat, cholesterol free" +53116500,"CAKE OR CUPCAKE,PUMPKIN, W/O ICING","Cake or cupcake, pumpkin, without icing or filling" +53116510,"CAKE OR CUPCAKE,PUMPKIN,W/ ICING","Cake or cupcake, pumpkin, with icing or filling" +53116550,"CAKE OR CUPCAKE, RAISIN-NUT","Cake or cupcake, raisin-nut" +53116570,"CAKE, RAVANI (MADE W/ FARINA)","Cake, Ravani (made with farina)" +53116600,"CAKE, RICE FLOUR, W/O ICING","Cake, rice flour, without icing or filling" +53116650,"CAKE, QUEZADILLA, EL SALVADORIAN STYLE","Cake, Quezadilla, El Salvadorian style" +53117100,"CAKE OR CUPCAKE,SPICE, W/O ICING","Cake or cupcake, spice, without icing or filling" +53117200,"CAKE OR CUPCAKE,SPICE, W/ ICING","Cake or cupcake, spice, with icing or filling" +53118100,"CAKE, SPONGE, W/O ICING","Cake, sponge, without icing or filling" +53118200,"CAKE, SPONGE, W/ ICING","Cake, sponge, with icing or filling" +53118300,"CAKE, SPONGE, CHOCOLATE","Cake, sponge, chocolate" +53118410,"RUM CAKE, WITHOUT ICING (SOPA BORRACHA)","Rum cake, without icing (Sopa Borracha)" +53118500,"CAKE, TORTE","Cake, torte" +53118550,"CAKE, TRES LECHE","Cake, tres leche" +53119000,"CAKE, UPSIDE DOWN (ALL FRUITS)","Cake, upside down (all fruits)" +53120270,"CAKE OR CUPCAKE, WHITE, WITH ICING OR FILLING","Cake or cupcake, white, with icing or filling" +53120275,"CAKE OR CUPCAKE, WHITE, WITHOUT ICING OR FILLING","Cake or cupcake, white, without icing or filling" +53121270,"CAKE OR CUPCAKE, YELLOW, WITH ICING OR FILLING","Cake or cupcake, yellow, with icing or filling" +53121275,"CAKE OR CUPCAKE, YELLOW, WITHOUT ICING OR FILLING","Cake or cupcake, yellow, without icing or filling" +53122070,"CAKE, SHORTCAKE, BISCUIT, W/ WHIPPED CREAM & FRUIT","Cake, shortcake, biscuit type, with whipped cream and fruit" +53122080,"CAKE, SHORTCAKE, BISCUIT, W/ FRUIT","Cake, shortcake, biscuit type, with fruit" +53123070,"CAKE, SHORTCAKE, SPONGE, W/ WHIPPED CREAM & FRUIT","Cake, shortcake, sponge type, with whipped cream and fruit" +53123080,"CAKE, SHORTCAKE, SPONGE, W/ FRUIT","Cake, shortcake, sponge type, with fruit" +53123500,"CAKE, SHORTCAKE, W/ WHIP TOPPING & FRUIT, DIET","Cake, shortcake, with whipped topping and fruit, diet" +53124110,"CAKE OR CUPCAKE, ZUCCHINI","Cake or cupcake, zucchini" +53200100,"COOKIE, BATTER OR DOUGH, RAW","Cookie, batter or dough, raw" +53201000,"COOKIE, NFS","Cookie, NFS" +53202000,"COOKIE, ALMOND","Cookie, almond" +53203000,"COOKIE, APPLESAUCE","Cookie, applesauce" +53203500,"COOKIE, BISCOTTI","Cookie, biscotti (Italian sugar cookie)" +53204000,"COOKIE, BROWNIE, NS AS TO ICING","Cookie, brownie, NS as to icing" +53204010,"COOKIE, BROWNIE, W/O ICING","Cookie, brownie, without icing" +53204100,"COOKIE, BROWNIE, WITH ICING OR FILLING","Cookie, brownie, with icing or filling" +53204840,"COOKIE, BROWNIE, REDUCED FAT, NS AS TO ICING","Cookie, brownie, reduced fat, NS as to icing" +53204860,"COOKIE, BROWNIE, FAT FREE, NS AS TO ICING","Cookie, brownie, fat free, NS as to icing" +53205250,"COOKIE, BUTTERSCOTCH, BROWNIE","Cookie, butterscotch, brownie" +53205260,"COOKIE, BAR, WITH CHOCOLATE","Cookie, bar, with chocolate" +53206000,"COOKIE, CHOCOLATE CHIP","Cookie, chocolate chip" +53206020,"COOKIE, CHOC CHIP, HOMEMADE OR PURCHASED AT BAKERY","Cookie, chocolate chip, made from home recipe or purchased at a bakery" +53206030,"COOKIE, CHOC CHIP, REDUCED FAT","Cookie, chocolate chip, reduced fat" +53206100,"COOKIE, CHOCOLATE CHIP SANDWICH","Cookie, chocolate chip sandwich" +53206500,"COOKIE, CHOCOLATE, MADE WITH RICE CEREAL","Cookie, chocolate, made with rice cereal" +53206550,"COOKIE, CHOCOLATE, MADE W/ OATMEAL & COCONUT","Cookie, chocolate, made with oatmeal and coconut (no-bake)" +53207000,"COOKIE, CHOCOLATE FUDGE","Cookie, chocolate or fudge" +53207020,"COOKIE, CHOCOLATE OR FUDGE, REDUCED FAT","Cookie, chocolate or fudge, reduced fat" +53207050,"COOKIE, CHOCOLATE, W/ CHOC FILLING/COATING, FAT FREE","Cookie, chocolate, with chocolate filling or coating, fat free" +53208000,"COOKIE, MARSHMALLOW, CHOCOLATE-COVERED","Cookie, marshmallow, chocolate-covered" +53208200,"COOKIE, CHOCOLATE-COVERED, MARSHMALLOW PIE","Cookie, marshmallow pie, chocolate covered" +53209005,"COOKIE, CHOCOLATE, WITH ICING OR COATING","Cookie, chocolate, with icing or coating" +53209010,"COOKIE, SUGAR WAFER, CHOCOLATE-COVERED","Cookie, sugar wafer, chocolate-covered" +53209015,"COOKIE, CHOCOLATE SANDWICH","Cookie, chocolate sandwich" +53209020,"COOKIE, CHOCOLATE SANDWICH, REDUCED FAT","Cookie, chocolate sandwich, reduced fat" +53209100,"COOKIE, CHOCOLATE, SANDWICH, W/ EXTRA FILLING","Cookie, chocolate, sandwich, with extra filling" +53209500,"COOKIE, CHOCOLATE & VANILLA SANDWICH","Cookie, chocolate and vanilla sandwich" +53210000,"COOKIE, CHOCOLATE WAFER","Cookie, chocolate wafer" +53210900,"COOKIE, GRAHAM CRACKER WITH CHOCOLATE AND MARSHMALLOW","Cookie, graham cracker with chocolate and marshmallow" +53211000,"COOKIE, BAR, W/ CHOCOLATE, NUTS, & GRAHAM CRACKERS","Cookie bar, with chocolate, nuts, and graham crackers" +53215500,"COOKIE, COCONUT","Cookie, coconut" +53220000,"COOKIE, FRUIT-FILLED","Cookie, fruit-filled bar" +53220010,"COOKIE, FRUIT-FILLED BAR, FAT FREE","Cookie, fruit-filled bar, fat free" +53220030,"COOKIE, FIG BAR","Cookie, fig bar" +53220040,"COOKIE, FIG BAR, FAT FREE","Cookie, fig bar, fat free" +53222010,"COOKIE, FORTUNE","Cookie, fortune" +53222020,"COOKIE, CONE SHELL, ICE CREAM TYPE,WAFER / CAKE","Cookie, cone shell, ice cream type, wafer or cake" +53223000,"COOKIE, GINGERSNAPS","Cookie, gingersnaps" +53223100,"COOKIE, GRANOLA","Cookie, granola" +53224000,"COOKIE, LADY FINGER","Cookie, ladyfinger" +53224250,"COOKIE, LEMON BAR","Cookie, lemon bar" +53225000,"COOKIE, MACAROON","Cookie, macaroon" +53226000,"COOKIE, MARSHMALLOW, W/ COCONUT","Cookie, marshmallow, with coconut" +53226500,"COOKIE, MARSHMALLOW, W/ RICE CEREAL (NO-BAKE)","Cookie, marshmallow, with rice cereal (no-bake)" +53226550,"COOKIE, MARSHMALLOW, W/ RICE CEREAL & CHOC CHIPS","Cookie, marshmallow, with rice cereal and chocolate chips" +53226600,"COOKIE, MARSHMALLOW & PEANUT BUTTER, W/ OAT CEREAL (NO-BAKE)","Cookie, marshmallow and peanut butter, with oat cereal (no-bake)" +53228000,"COOKIE, MERINGUE","Cookie, meringue" +53230000,"COOKIE, MOLASSES","Cookie, molasses" +53231000,"COOKIE, LEBKUCHEN","Cookie, Lebkuchen" +53231400,"COOKIE, MULTIGRAIN, HIGH FIBER","Cookie, multigrain, high fiber" +53233000,"COOKIE, OATMEAL","Cookie, oatmeal" +53233010,"COOKIE, OATMEAL, W/ RAISINS OR DATES","Cookie, oatmeal, with raisins" +53233040,"COOKIE, OATMEAL, REDUCED FAT, NS AS TO RAISINS","Cookie, oatmeal, reduced fat, NS as to raisins" +53233050,"COOKIE, OATMEAL SANDWICH, W/ CREME FILLING","Cookie, oatmeal sandwich, with creme filling" +53233060,"COOKIE, OATMEAL, W/ CHOCOLATE CHIPS","Cookie, oatmeal, with chocolate chips" +53233080,"COOKIE, OATMEAL SANDWICH, W/ PEANUT BUTTER & JELLY FILLING","Cookie, oatmeal sandwich, with peanut butter and jelly filling" +53233100,"COOKIE,OATMEAL,W/ CHOC & PEANUT BUTTER (NO-BAKE)","Cookie, oatmeal, with chocolate and peanut butter (no-bake)" +53234000,"COOKIE, PEANUT BUTTER (INCLUDE PB WAFER)","Cookie, peanut butter" +53234100,"COOKIE, PEANUT BUTTER, W/ CHOCOLATE (INCL NASSAU)","Cookie, peanut butter, with chocolate" +53234250,"COOKIE, PEANUT BUTTER W/ RICE CEREAl (NO-BAKE)","Cookie, peanut butter with rice cereal (no-bake)" +53235000,"COOKIE, PEANUT BUTTER SANDWICH","Cookie, peanut butter sandwich" +53235500,"COOKIE, W/ PEANUT BUTTER FILLING, CHOCOLATE-COATED","Cookie, with peanut butter filling, chocolate-coated" +53235600,"COOKIE, PFEFFERNUSSE","Cookie, Pfeffernusse" +53236000,"COOKIE, PIZZELLE (ITALIAN STYLE WAFER)","Cookie, pizzelle (Italian style wafer)" +53236100,"COOKIE, PUMPKIN","Cookie, pumpkin" +53237000,"COOKIE, RAISIN","Cookie, raisin" +53237010,"COOKIE, RAISIN SANDWICH, CREAM-FILLED","Cookie, raisin sandwich, cream-filled" +53237500,"COOKIE, RUM BALL (NO-BAKE)","Cookie, rum ball (no-bake)" +53238000,"COOKIE, SANDWICH TYPE, NOT CHOCOLATE OR VANILLA","Cookie, sandwich-type, not chocolate or vanilla" +53239000,"COOKIE, SHORTBREAD","Cookie, shortbread" +53239010,"COOKIE, SHORTBREAD, REDUCED FAT","Cookie, shortbread, reduced fat" +53239050,"COOKIE, SHORTBREAD, WITH ICING OR FILLING","Cookie, shortbread, with icing or filling" +53240000,"COOKIE, ANIMAL","Cookie, animal" +53240010,"COOKIE, ANIMAL, WITH FROSTING OR ICING","Cookie, animal, with frosting or icing" +53241500,"COOKIE, BUTTER OR SUGAR","Cookie, butter or sugar" +53241510,"MARIE BISCUIT","Marie biscuit" +53241600,"COOKIE, BUTTER OR SUGAR, WITH FRUIT AND/OR NUTS","Cookie, butter or sugar, with fruit and/or nuts" +53242000,"COOKIE, SUGAR WAFER","Cookie, sugar wafer" +53242500,"COOKIE, TOFFEE BAR","Cookie, toffee bar" +53243000,"COOKIE, VANILLA SANDWICH","Cookie, vanilla sandwich" +53243010,"COOKIE, VANILLA SANDWICH, EXTRA FILLING","Cookie, vanilla sandwich, extra filling" +53243050,"COOKIE, VANILLA SANDWICH, REDUCED FAT","Cookie, vanilla sandwich, reduced fat" +53244010,"COOKIE, BUTTER/SUGAR, W/ CHOCOLATE ICING / FILLING","Cookie, butter or sugar, with chocolate icing or filling" +53244020,"COOKIE, BUTTER/SUGAR, W/ ICING/FILLING OTHER THAN CHOC","Cookie, butter or sugar, with icing or filling other than chocolate" +53246000,"COOKIE, TEA, JAPANESE","Cookie, tea, Japanese" +53247000,"COOKIE, VANILLA WAFER, NS AS TO TYPE","Cookie, vanilla wafer" +53247050,"COOKIE, VANILLA WAFER, REDUCED FAT","Cookie, vanilla wafer, reduced fat" +53247500,"COOKIE, VANILLA W/ CARAMEL, COCONUT, CHOC COATING","Cookie, vanilla with caramel, coconut, and chocolate coating" +53251100,"COOKIE, RUGELACH","Cookie, rugelach" +53260030,"COOKIE, CHOCOLATE CHIP, SUGAR FREE","Cookie, chocolate chip, sugar free" +53260200,"COOKIE, OATMEAL, SUGAR FREE","Cookie, oatmeal, sugar free" +53260300,"COOKIE, SANDWICH, SUGAR FREE","Cookie, sandwich, sugar free" +53260400,"COOKIE, SUGAR OR PLAIN, SUGAR FREE","Cookie, sugar or plain, sugar free" +53260500,"COOKIE, SUGAR WAFER, SUGAR FREE","Cookie, sugar wafer, sugar free" +53260600,"COOKIE, PEANUT BUTTER, SUGAR FREE","Cookie, peanut butter, sugar free" +53270100,"COOKIE, P.R. (MANTECADITOS POLVORONES)","Cookies, Puerto Rican (Mantecaditos polvorones)" +53300100,"PIE, NFS","Pie, NFS" +53300170,"PIE, INDIVIDUAL SIZE OR TART, NFS","Pie, individual size or tart, NFS" +53300180,"PIE, FRIED, NFS","Pie, fried, NFS" +53301000,"PIE, APPLE, TWO CRUST","Pie, apple, two crust" +53301070,"PIE, APPLE, INDIVIDUAL SIZE OR TART","Pie, apple, individual size or tart" +53301080,"PIE, APPLE, FRIED PIE","Pie, apple, fried pie" +53301500,"PIE, APPLE, ONE CRUST (INCL W/ CRUMB TOPPING)","Pie, apple, one crust" +53301750,"PIE, APPLE, DIET","Pie, apple, diet" +53302000,"PIE, APRICOT, TWO CRUST","Pie, apricot, two crust" +53302070,"PIE, APRICOT, INDIVIDUAL SIZE OR TART","Pie, apricot, individual size or tart" +53302080,"PIE, APRICOT, FRIED","Pie, apricot, fried pie" +53303000,"PIE, BLACKBERRY, TWO CRUST","Pie, blackberry, two crust" +53303070,"PIE, BLACKBERRY, INDIVIDUAL SIZE OR TART","Pie, blackberry, individual size or tart" +53303500,"PIE, BERRY NOT BLACK,BLUE,BOYSEN,RASP....,TWO CRUST","Pie, berry, not blackberry, blueberry, boysenberry, huckleberry, raspberry, or strawberry; two crust" +53303510,"PIE, BERRY, ONE CRUST","Pie, berry, not blackberry, blueberry, boysenberry, huckleberry, raspberry, or strawberry; one crust" +53303570,"PIE, BERRY, INDIVIDUAL SIZE OR TART","Pie, berry, not blackberry, blueberry, boysenberry, huckleberry, raspberry, or strawberry, individual size or tart" +53304000,"PIE, BLUEBERRY, TWO CRUST","Pie, blueberry, two crust" +53304050,"PIE, BLUEBERRY, ONE CRUST","Pie, blueberry, one crust" +53304070,"PIE, BLUEBERRY, INDIVIDUAL SIZE OR TART","Pie, blueberry, individual size or tart" +53305000,"PIE, CHERRY, TWO CRUST","Pie, cherry, two crust" +53305010,"PIE, CHERRY, ONE CRUST","Pie, cherry, one crust" +53305070,"PIE, CHERRY, INDIVIDUAL SIZE OR TART","Pie, cherry, individual size or tart" +53305080,"PIE, CHERRY, FRIED PIE","Pie, cherry, fried pie" +53305700,"PIE, LEMON (NOT CREAM OR MERINGUE)","Pie, lemon (not cream or meringue)" +53305720,"PIE, LEMON (NOT CREAM OR MERINGUE), INDIVIDUAL SIZE","Pie, lemon (not cream or meringue), individual size or tart" +53305750,"PIE, LEMON, FRIED","Pie, lemon, fried pie" +53306000,"PIE, MINCE, TWO CRUST","Pie, mince, two crust" +53306070,"PIE, MINCE, INDIVIDUAL SIZE OR TART","Pie, mince, individual size or tart" +53307000,"PIE, PEACH, TWO CRUST","Pie, peach, two crust" +53307050,"PIE, PEACH, ONE-CRUST","Pie, peach, one crust" +53307070,"PIE, PEACH, INDIVIDUAL SIZE OR TART","Pie, peach, individual size or tart" +53307080,"PIE, PEACH, FRIED","Pie, peach, fried pie" +53307500,"PIE, PEAR, TWO CRUST","Pie, pear, two crust" +53307570,"PIE, PEAR, INDIVIDUAL SIZE OR TART","Pie, pear, individual size or tart" +53308000,"PIE, PINEAPPLE, TWO CRUST","Pie, pineapple, two crust" +53308070,"PIE, PINEAPPLE, INDIVIDUAL SIZE OR TART","Pie, pineapple, individual size or tart" +53308300,"PIE, PLUM, TWO CRUST","Pie, plum, two crust" +53308500,"PIE, PRUNE, ONE CRUST","Pie, prune, one crust" +53309000,"PIE, RAISIN, TWO CRUST","Pie, raisin, two crust" +53309070,"PIE, RAISIN, INDIVIDUAL SIZE OR TART","Pie, raisin, individual size or tart" +53310000,"PIE, RASPBERRY, ONE CRUST","Pie, raspberry, one crust" +53310050,"PIE, RASPBERRY, TWO CRUST","Pie, raspberry, two crust" +53311000,"PIE, RHUBARB, TWO CRUST","Pie, rhubarb, two crust" +53311050,"PIE, RHUBARB, ONE CRUST","Pie, rhubarb, one crust" +53311070,"PIE, RHUBARB, INDIVIDUAL SIZE OR TART","Pie, rhubarb, individual size or tart" +53312000,"PIE, STRAWBERRY, ONE CRUST","Pie, strawberry, one crust" +53313000,"PIE, STRAWBERRY-RHUBARB, TWO CRUST","Pie, strawberry-rhubarb, two crust" +53314000,"PIE, STRAWBERRY, INDIVIDUAL SIZE OR TART","Pie, strawberry, individual size or tart" +53340000,"PIE, APPLE-SOUR CREAM","Pie, apple-sour cream" +53340500,"PIE, CHERRY, W/ CREAM CHEESE & SOUR CREAM","Pie, cherry, made with cream cheese and sour cream" +53341000,"PIE, BANANA CREAM","Pie, banana cream" +53341070,"PIE, BANANA CREAM, INDIVIDUAL SIZE OR TART","Pie, banana cream, individual size or tart" +53341500,"PIE, BUTTERMILK","Pie, buttermilk" +53341750,"PIE, CHESS (INCL LEMON CHESS PIE)","Pie, chess" +53342000,"PIE, CHOCOLATE CREAM","Pie, chocolate cream" +53342070,"PIE, CHOCOLATE CREAM, INDIVIDUAL SIZE OR TART","Pie, chocolate cream, individual size or tart" +53343000,"PIE, COCONUT CREAM","Pie, coconut cream" +53343070,"PIE, COCONUT CREAM, INDIVIDUAL SIZE OR TART","Pie, coconut cream, individual size or tart" +53344000,"PIE, CUSTARD","Pie, custard" +53344070,"PIE, CUSTARD, INDIVIDUAL SIZE OR TART","Pie, custard, individual size or tart" +53344200,"MIXED FRUIT TART FILLED WITH CUSTARD OR CREAM CHEESE","Mixed fruit tart filled with custard or cream cheese" +53344300,"DESSERT PIZZA","Dessert pizza" +53345000,"PIE, LEMON CREAM","Pie, lemon cream" +53345070,"PIE, LEMON CREAM, INDIVIDUAL SIZE OR TART","Pie, lemon cream, individual size or tart" +53346000,"PIE, PEANUT BUTTER CREAM","Pie, peanut butter cream" +53346500,"PIE, PINEAPPLE CREAM","Pie, pineapple cream" +53347000,"PIE, PUMPKIN","Pie, pumpkin" +53347070,"PIE, PUMPKIN, INDIVIDUAL SIZE OR TART","Pie, pumpkin, individual size or tart" +53347100,"PIE, RASPBERRY CREAM","Pie, raspberry cream" +53347500,"PIE, SOUR CREAM, RAISIN","Pie, sour cream, raisin" +53347600,"PIE, SQUASH","Pie, squash" +53348000,"PIE, STRAWBERRY CREAM","Pie, strawberry cream" +53348070,"PIE, STRAWBERRY CREAM, INDIVIDUAL SIZE OR TART","Pie, strawberry cream, individual size or tart" +53360000,"PIE, SWEET POTATO","Pie, sweet potato" +53365000,"PIE, VANILLA CREAM","Pie, vanilla cream" +53366000,"PIE, YOGURT, FROZEN","Pie, yogurt, frozen" +53370000,"PIE, CHIFFON, NOT CHOCOLATE","Pie, chiffon, not chocolate" +53371000,"PIE, CHIFFON, CHOCOLATE","Pie, chiffon, chocolate" +53371100,"PIE, CHIFFON, W/ LIQUEUR","Pie, chiffon, with liqueur" +53373000,"PIE, BLACK BOTTOM","Pie, black bottom" +53381000,"PIE, LEMON MERINGUE","Pie, lemon meringue" +53381070,"PIE, LEMON MERINGUE, INDIVIDUAL SIZE OR TART","Pie, lemon meringue, individual size or tart" +53382000,"PIE, CHOCOLATE-MARSHMALLOW","Pie, chocolate-marshmallow" +53385000,"PIE, PECAN","Pie, pecan" +53385070,"PIE, PECAN, INDIVIDUAL SIZE","Pie, pecan, individual size or tart" +53385500,"PIE, OATMEAL","Pie, oatmeal" +53386000,"PIE, PUDDING, NOT CHOCOLATE","Pie, pudding, flavors other than chocolate" +53386050,"PIE, PUDDING, NOT CHOC, INDIVIDUAL SIZE","Pie, pudding, flavors other than chocolate, individual size or tart" +53386250,"PIE, PUDDING, CHOC, W/ CHOC COATING, INDIVID SIZE","Pie, pudding, chocolate, with chocolate coating, individual size" +53386500,"PIE, PUDDING, NOT CHOC, CHOC-COATED, INDIVID SIZE","Pie, pudding, flavors other than chocolate, with chocolate coating, individual size" +53387000,"PIE, TOLL HOUSE CHOCOLATE CHIP","Pie, Toll house chocolate chip" +53390000,"PIE, SHOO-FLY","Pie, shoo-fly" +53390100,"PIE, TOFU W/ FRUIT","Pie, tofu with fruit" +53391000,"PIE SHELL","Pie shell" +53391100,"PIE SHELL, GRAHAM CRACKER","Pie shell, graham cracker" +53391150,"PIE SHELL, CHOCOLATE WAFER","Pie shell, chocolate wafer" +53391200,"VANILLA WAFER DESSERT BASE","Vanilla wafer dessert base" +53400200,"BLINTZ, CHEESE-FILLED","Blintz, cheese-filled" +53400300,"BLINTZ, FRUIT-FILLED","Blintz, fruit-filled" +53410100,"COBBLER, APPLE (INCLUDE FRUIT COBBLER)","Cobbler, apple" +53410200,"COBBLER, APRICOT","Cobbler, apricot" +53410300,"COBBLER, BERRY","Cobbler, berry" +53410500,"COBBLER, CHERRY","Cobbler, cherry" +53410800,"COBBLER, PEACH","Cobbler, peach" +53410850,"COBBLER, PEAR","Cobbler, pear" +53410860,"COBBLER, PINEAPPLE","Cobbler, pineapple" +53410880,"COBBLER, PLUM","Cobbler, plum" +53410900,"COBBLER, RHUBARB","Cobbler, rhubarb" +53415100,"CRISP, APPLE, APPLE DESSERT","Crisp, apple, apple dessert" +53415120,"FRITTER, APPLE","Fritter, apple" +53415200,"FRITTER, BANANA","Fritter, banana" +53415220,"FRITTER, BERRY","Fritter, berry" +53415300,"CRISP, BLUEBERRY","Crisp, blueberry" +53415400,"CRISP, CHERRY","Crisp, cherry" +53415500,"CRISP, PEACH","Crisp, peach" +53415600,"CRISP, RHUBARB","Crisp, rhubarb" +53420000,"CREAM PUFF/ECLAIR, CUSTARD/CREAM-FILLED, NS ICING","Cream puff, eclair, custard or cream filled, NS as to icing" +53420100,"CREAM PUFF/ECLAIR, CUSTARD/CREAM-FILLED, NOT ICED","Cream puff, eclair, custard or cream filled, not iced" +53420200,"CREAM PUFF/ECLAIR, CUSTARD/CREAM-FILLED, ICED","Cream puff, eclair, custard or cream filled, iced" +53420210,"CREAM PUFF/ECLAIR, CUSTARD/CREAM-FILLED, ICED, REDUCED FAT","Cream puff, eclair, custard or cream filled, iced, reduced fat" +53420250,"CREAM PUFFS, NO FILLING OR ICING","Cream puff, no filling or icing" +53420300,"AIR-FILLED FRITTER, W/O SYRUP, PUERTO RICAN STYLE","Air filled fritter or fried puff, without syrup, Puerto Rican style (Bunuelos de viento)" +53420310,"WHEAT FLOUR FRITTER, W/O SYRUP","Wheat flour fritter, without syrup" +53420400,"SOPAIPILLA W/O SYRUP OR HONEY","Sopaipilla, without syrup or honey" +53420410,"SOPAIPILLA W/ SYRUP OR HONEY","Sopaipilla with syrup or honey" +53430000,"CREPE, DESSERT TYPE, NS AS TO FILLING","Crepe, dessert type, NS as to filling" +53430100,"CREPE, DESSERT TYPE, CHOCOLATE-FILLED","Crepe, dessert type, chocolate-filled" +53430200,"CREPE, DESSERT TYPE, FRUIT-FILLED","Crepe, dessert type, fruit-filled" +53430250,"CREPE SUZETTE","Crepe suzette" +53430300,"CREPE, DESSERT TYPE, ICE CREAM-FILLED","Crepe, dessert type, ice cream-filled" +53430700,"TAMALE, SWEET","Tamale, sweet" +53430750,"TAMALE, SWEET, W/ FRUIT","Tamale, sweet, with fruit" +53440000,"STRUDEL, APPLE (INCLUDE STRUDEL, NFS)","Strudel, apple" +53440300,"STRUDEL, BERRY","Strudel, berry" +53440500,"STRUDEL, CHERRY","Strudel, cherry" +53440600,"STRUDEL, CHEESE","Strudel, cheese" +53440700,"STRUDEL, PEACH","Strudel, peach" +53440750,"STRUDEL, PINEAPPLE","Strudel, pineapple" +53440800,"STRUDEL, CHEESE & FRUIT","Strudel, cheese and fruit" +53441110,"BAKLAVA (INCLUDE KADAYIF)","Baklava" +53441210,"BASBOUSA (SEMOLINA DESSERT DISH)","Basbousa (semolina dessert dish)" +53450000,"TURNOVER OR DUMPLING, APPLE","Turnover or dumpling, apple" +53450300,"TURNOVER OR DUMPLING, BERRY","Turnover or dumpling, berry" +53450500,"TURNOVER OR DUMPLING, CHERRY","Turnover or dumpling, cherry" +53450800,"TURNOVER OR DUMPLING, LEMON","Turnover or dumpling, lemon" +53451000,"TURNOVER OR DUMPLING, PEACH","Turnover or dumpling, peach" +53451500,"TURNOVER, GUAVA","Turnover, guava" +53451750,"TURNOVER, PUMPKIN","Turnover, pumpkin" +53452100,"PASTRY, FRUIT-FILLED","Pastry, fruit-filled" +53452120,"PASTRY, ASIAN, MADE WITH BEAN OR LOTUS SEED PASTE FILLING","Pastry, Asian, made with bean or lotus seed paste filling (baked)" +53452130,"PASTRY, ASIAN, MADE WITH BEAN PASTE AND SALTED EGG YOLK FILL","Pastry, Asian, made with bean paste and salted egg yolk filling (baked)" +53452150,"PASTRY, CHINESE (INCLUDE 9-LAYER PUDDING)","Pastry, Chinese, made with rice flour" +53452170,"PASTRY, COOKIE TYPE, FRIED(INCL POLISH PACZKI)","Pastry, cookie type, fried" +53452200,"PASTRY, ITALIAN, W/ CHEESE (INCLUDE CANNOLI)","Pastry, Italian, with cheese" +53452400,"PASTRY, PUFF","Pastry, puff" +53452420,"PASTRY, PUFF, CUSTARD/CREAM FILLED, ICED/NOT ICED","Pastry, puff, custard or cream filled, iced or not iced" +53452450,"CHEESE PASTRY PUFF","Cheese pastry puffs" +53452500,"PASTRY, MAINLY FLOUR & WATER, FRIED","Pastry, mainly flour and water, fried" +53453150,"EMPANADA, MEXICAN TURNOVER, FRUIT-FILLED","Empanada, Mexican turnover, fruit-filled" +53453170,"EMPANADA, MEXICAN TURNOVER, PUMPKIN","Empanada, Mexican turnover, pumpkin" +53500100,"BREAKFAST PASTRY, NFS","Breakfast pastry, NFS" +53510000,"DANISH PASTRY, PLAIN/SPICE (INCL W/ ICING)","Danish pastry, plain or spice" +53510100,"DANISH PASTRY, W/ FRUIT","Danish pastry, with fruit" +53511000,"DANISH PASTRY, W/ CHEESE","Danish pastry, with cheese" +53520000,"DOUGHNUT, NS AS TO CAKE OR YEAST","Doughnut, NS as to cake or yeast" +53520110,"DOUGHNUT, CAKE TYPE","Doughnut, cake type" +53520120,"DOUGHNUT, CHOCOLATE, CAKE TYPE","Doughnut, chocolate, cake type" +53520140,"DOUGHNUT, CAKE TYPE, CHOCOLATE COVERED","Doughnut, cake type, chocolate covered" +53520150,"DOUGHNUT, CAKE TYPE, CHOCOLATE COVERED, W/ PEANUTS","Doughnut, cake type, chocolate covered, dipped in peanuts" +53520160,"DOUGHNUT, CHOCOLATE, CAKE TYPE, WITH CHOCOLATE ICING","Doughnut, chocolate, cake type, with chocolate icing" +53520200,"CHURROS (INCL MEXICAN CRUELLERS)","Churros" +53520500,"DOUGHNUT, ASIAN","Doughnut, Asian" +53520600,"CRULLER, NFS","Cruller, NFS" +53520700,"FRENCH CRULLER","French cruller" +53521100,"DOUGHNUT, CHOCOLATE, RAISED OR YEAST, WITH CHOCOLATE ICING","Doughnut, chocolate, raised or yeast, with chocolate icing" +53521110,"DOUGHNUT, RAISED / YEAST","Doughnut, raised or yeast" +53521120,"DOUGHNUT, CHOCOLATE, RAISED OR YEAST","Doughnut, chocolate, raised or yeast" +53521130,"DOUGHNUT, RAISED OR YEAST, CHOCOLATE COVERED","Doughnut, raised or yeast, chocolate covered" +53521140,"DOUGHNUT, JELLY","Doughnut, jelly" +53521210,"DOUGHNUT, CUSTARD-FILLED","Doughnut, custard-filled" +53521220,"DOUGHNUT, CHOCOLATE CREAM-FILLED","Doughnut, chocolate cream-filled" +53521230,"DOUGHNUT, CUSTARD-FILLED, WITH ICING","Doughnut, custard-filled, with icing" +53521250,"DOUGHNUT, WHEAT","Doughnut, wheat" +53521300,"DOUGHNUT, WHEAT, CHOCOLATE COVERED","Doughnut, wheat, chocolate covered" +53530000,"BREAKFAST TART","Breakfast tart" +53530010,"BREAKFAST TART, LOWFAT","Breakfast tart, lowfat" +53610100,"COFFEE CAKE, CRUMB OR QUICK-BREAD TYPE","Coffee cake, crumb or quick-bread type" +53610170,"COFFEE CAKE, CRUMB OR QUICK-BREAD TYPE, W/ FRUIT","Coffee cake, crumb or quick-bread type, with fruit" +53610200,"COFFEECAKE, CRUMB OR QUICK-BREAD TYPE, CHEESE FILLD","Coffee cake, crumb or quick-bread type, cheese-filled" +53710400,"FIBER ONE CHEWY BAR","Fiber One Chewy Bar" +53710500,"KELLOGG'S NUTRI-GRAIN CEREAL BAR","Kellogg's Nutri-Grain Cereal Bar" +53710502,"KELLOGG'S NUTRI-GRAIN YOGURT BAR","Kellogg's Nutri-Grain Yogurt Bar" +53710504,"KELLOGG'S NUTRI-GRAIN FRUIT AND NUT BAR","Kellogg's Nutri-Grain Fruit and Nut Bar" +53710600,"MILK 'N CEREAL BAR","Milk 'n Cereal bar" +53710700,"KELLOGG'S SPECIAL K BAR","Kellogg's Special K bar" +53710800,"KASHI GOLEAN CHEWY BARS","Kashi GOLEAN Chewy Bars" +53710802,"KASHI TLC CHEWY GRANOLA BAR","Kashi TLC Chewy Granola Bar" +53710804,"KASHI GOLEAN CRUNCHY BARS","Kashi GOLEAN Crunchy Bars" +53710806,"KASHI TLC CRUNCHY GRANOLA BAR","Kashi TLC Crunchy Granola Bar" +53710900,"NATURE VALLEY CHEWY TRAIL MIX GRANOLA BAR","Nature Valley Chewy Trail Mix Granola Bar" +53710902,"NATURE VALLEY CHEWY GRANOLA BAR WITH YOGURT COATING","Nature Valley Chewy Granola Bar with Yogurt Coating" +53710904,"NATURE VALLEY SWEET AND SALTY GRANOLA BAR","Nature Valley Sweet and Salty Granola Bar" +53710906,"NATURE VALLEY CRUNCHY GRANOLA BAR","Nature Valley Crunchy Granola Bar" +53711000,"QUAKER CHEWY GRANOLA BAR","Quaker Chewy Granola Bar" +53711002,"QUAKER CHEWY 90 CALORIE GRANOLA BAR","Quaker Chewy 90 Calorie Granola Bar" +53711004,"QUAKER CHEWY 25% LESS SUGAR GRANOLA BAR","Quaker Chewy 25% Less Sugar Granola Bar" +53711006,"QUAKER CHEWY DIPPS GRANOLA BAR","Quaker Chewy Dipps Granola Bar" +53711100,"QUAKER GRANOLA BITES","Quaker Granola Bites" +53712000,"SNACK BAR, OATMEAL","Snack bar, oatmeal" +53712100,"GRANOLA BAR, NFS","Granola bar, NFS" +53712200,"GRANOLA BAR, LOWFAT, NFS","Granola bar, lowfat, NFS" +53712210,"GRANOLA BAR, NONFAT","Granola bar, nonfat" +53713000,"GRANOLA BAR, REDUCED SUGAR, NFS","Granola bar, reduced sugar, NFS" +53713100,"GRANOLA BAR, PEANUTS , OATS, SUGAR, WHEAT GERM","Granola bar, peanuts , oats, sugar, wheat germ" +53714200,"GRANOLA BAR, CHOCOLATE-COATED, NFS","Granola bar, chocolate-coated, NFS" +53714210,"GRANOLA BAR, WITH COCONUT, CHOCOLATE-COATED","Granola bar, with coconut, chocolate-coated" +53714220,"GRANOLA BAR WITH NUTS, CHOCOLATE-COATED","Granola bar with nuts, chocolate-coated" +53714230,"GRANOLA BAR, OATS, NUTS, COATED WITH NON-CHOCOLATE COATING","Granola bar, oats, nuts, coated with non-chocolate coating" +53714250,"GRANOLA BAR, COATED WITH NON-CHOCOLATE COATING","Granola bar, coated with non-chocolate coating" +53714300,"GRANOLA BAR, HIGH FIBER, COATED W/ NON-CHOC YOGURT COATING","Granola bar, high fiber, coated with non-chocolate yogurt coating" +53714400,"GRANOLA BAR, WITH RICE CEREAL","Granola bar, with rice cereal" +53714500,"BREAKFAST BAR, NFS","Breakfast bar, NFS" +53714510,"BREAKFAST BAR, DATE, WITH YOGURT COATING","Breakfast bar, date, with yogurt coating" +53714520,"BREAKFAST BAR, CEREAL CRUST WITH FRUIT FILLING, LOWFAT","Breakfast bar, cereal crust with fruit filling, lowfat" +53720100,"BALANCE ORIGINAL BAR","Balance Original Bar" +53720200,"CLIF BAR","Clif Bar" +53720300,"POWERBAR","PowerBar" +53720400,"SLIM FAST ORIGINAL MEAL BAR","Slim Fast Original Meal Bar" +53720500,"SNICKERS MARATHON PROTEIN BAR","Snickers Marathon Protein bar" +53720510,"SNICKERS MARATHON ENERGY BAR","Snickers Marathon Energy bar" +53720600,"SOUTH BEACH LIVING MEAL BAR","South Beach Living Meal Bar" +53720610,"SOUTH BEACH LIVING HIGH PROTEIN BAR","South Beach Living High Protein Bar" +53720700,"TIGER'S MILK BAR","Tiger's Milk bar" +53720800,"ZONE PERFECT CLASSIC CRUNCH NUTRITION BAR","Zone Perfect Classic Crunch nutrition bar" +53729000,"NUTRITION BAR OR MEAL REPLACEMENT BAR, NFS","Nutrition bar or meal replacement bar, NFS" +53801000,"CEREAL BAR WITH FRUIT FILLING, BABY FOOD","Cereal bar with fruit filling, baby food" +53803050,"COOKIE, FRUIT, BABY FOOD","Cookie, fruit, baby food" +53803100,"COOKIE, BABY FOOD","Cookie, baby food" +53803250,"COOKIE, TEETHING, BABY","Cookie, teething, baby" +53803300,"COOKIE, RICE, BABY","Cookie, rice, baby" +54001000,"CRACKER, NS AS TO SWEET/NONSWEET (INCL CRACKER,NFS)","Crackers, NS as to sweet or nonsweet" +54102010,"CRACKERS, GRAHAM","Crackers, graham" +54102020,"CRACKERS, GRAHAM, CHOCOLATE COVERED","Crackers, graham, chocolate covered" +54102050,"CRACKERS, OATMEAL","Crackers, oatmeal" +54102060,"CRACKERS, CUBAN","Crackers, Cuban" +54102070,"CRACKERS, CUCA","Crackers, Cuca" +54102080,"CRACKERS, GRAHAM, W/ RAISINS","Crackers, graham, with raisins" +54102100,"CRACKERS, GRAHAM, LOWFAT","Crackers, graham, lowfat" +54102110,"CRACKERS, GRAHAM, FAT FREE","Crackers, graham, fat free" +54102200,"CRACKERS, GRAHAM, SANDWICH-TYPE, WITH FILLING","Crackers, graham, sandwich-type, with filling" +54201010,"CRACKERS, MATZO, LOW SODIUM","Crackers, matzo, low sodium" +54202010,"CRACKERS, SALTINE, LOW SODIUM","Crackers, saltine, low sodium" +54203010,"CRACKERS, TOAST THINS (RYE/WHEAT/WHITE), LOW SODIUM","Crackers, toast thins (rye, wheat, white flour), low sodium" +54204010,"CRACKER, 100% WHOLE WHEAT,LO SODIUM","Cracker, 100% whole wheat, low sodium" +54205010,"CRACKER, SNACK, LOW SODIUM","Cracker, snack, low sodium" +54205030,"CRACKER, CHEESE, LOW SODIUM","Cracker, cheese, low sodium" +54205100,"CRACKER, SNACK, LOWFAT, LOW SODIUM","Cracker, snack, lowfat, low sodium" +54206010,"PUFFED RICE CAKE W/O SALT","Puffed rice cake without salt" +54207010,"CRISPBREAD, WHEAT, LOW SODIUM","Crispbread, wheat, low sodium" +54210010,"CRACKER, MULTIGRAIN, LOW SODIUM","Cracker, multigrain, low sodium" +54222000,"CRISPBREAD, RYE, LOW SODIUM","Crispbread, rye, low sodium" +54301000,"CRACKER, SNACK","Cracker, snack" +54301100,"CRACKER, SNACK, REDUCED FAT","Cracker, snack, reduced fat" +54301200,"CRACKER, SNACK, FAT FREE","Cracker, snack, fat free" +54304000,"CRACKERS, CHEESE","Cracker, cheese" +54304100,"CRACKER, CHEESE, REDUCED FAT","Cracker, cheese, reduced fat" +54304150,"CRACKER, CHEESE, WHOLE GRAIN","Cracker, cheese, whole grain" +54304500,"CRACKER, HIGH FIBER, NO ADDED FAT","Cracker, high fiber, no added fat" +54305000,"CRISPBREAD, WHEAT, NO ADDED FAT","Crispbread, wheat, no added fat" +54307000,"CRACKERS, MATZO","Crackers, matzo" +54308000,"CRACKERS, MILK","Crackers, milk" +54309000,"CRACKERS, OAT BRAN (INCLUDE NABISCO OAT THINS)","Crackers, oat" +54313000,"CRACKERS, OYSTER","Crackers, oyster" +54318000,"CHIPS, BROWN RICE","Chips, brown rice" +54318500,"RICE CAKE, CRACKER-TYPE","Rice cake, cracker-type" +54319000,"CRACKERS, RICE","Crackers, rice" +54319010,"PUFFED RICE CAKE","Puffed rice cake" +54319020,"POPCORN CAKE (INCL PUFFED CORN & RICE CAKE)","Popcorn cake" +54319200,"PUFFED WHEAT CAKE (INCL QUAKER)","Puffed wheat cake" +54319500,"RICE PAPER","Rice paper" +54322000,"CRISPBREAD, RYE, NO ADDED FAT","Crispbread, rye, no added fat" +54325000,"CRACKERS, SALTINES","Crackers, saltine" +54325010,"CRACKERS, SALTINE, FAT FREE","Crackers, saltine, fat free" +54325050,"CRACKERS, SALTINE, WHOLE WHEAT","Crackers, saltine, whole wheat" +54326000,"CRACKERS, MULTIGRAIN","Crackers, multigrain, made with whole wheat, wheat, oat, and other flours" +54327950,"CRACKERS, CYLINDRICAL, PEANUT BUTTER-FILLED","Crackers, cylindrical, peanut-butter filled" +54328000,"CRACKER, SANDWICH-TYPE, NFS","Crackers, sandwich-type, NFS" +54328100,"CRACKER,SANDWICH-TYPE,PEANUT BUTTER FILLED","Cracker, sandwich-type, peanut butter filled" +54328110,"CRACKER, SANDWICH, PEANUT BUTTER FILLED, RED FAT","Cracker, sandwich-type, peanut butter filled, reduced fat" +54328200,"CRACKER,SANDWICH-TYPE, CHEESE-FILLED","Cracker, sandwich-type, cheese-filled" +54334000,"CRACKERS, TOAST THINS","Crackers, toast thins (rye, pumpernickel, white flour)" +54336000,"CRACKER, WATER BISCUIT","Crackers, water biscuits" +54337000,"CRACKER, 100% WHOLE WHEAT","Cracker, 100% whole wheat" +54337050,"CRACKER, 100% WHOLE WHEAT, REDUCED FAT","Cracker, 100% whole wheat, reduced fat" +54338000,"CRACKERS, WHEAT","Crackers, wheat" +54338100,"CRACKERS, WHEAT, REDUCED FAT","Crackers, wheat, reduced fat" +54339000,"CRACKER CORN (INCL STONED CORN CRACKER)","Crackers, corn" +54350000,"CRACKERS, BABY FOOD","Crackers, baby food" +54350010,"GERBER FINGER FOODS, PUFFS, BABY FOOD","Gerber Finger Foods, Puffs, baby food" +54360000,"CRUNCHY SNACKS, CORN BASED, BABY FOOD","Crunchy snacks, corn based, baby food" +54401010,"SALTY SNACKS, CORN / CORNMEAL BASE, NUT /NUG, TSTD","Salty snacks, corn or cornmeal base, nuts or nuggets, toasted" +54401020,"SALTY SNACKS, CORN OR CORNMEAL, CORN CHIPS, CHEESE","Salty snacks, corn or cornmeal base, corn chips, corn-cheese chips" +54401050,"SALTY SNACKS, CORN OR CORNMEAL, CORN PUFFS, TWISTS","Salty snacks, corn or cornmeal base, corn puffs and twists; corn-cheese puffs and twists" +54401080,"SALTY SNACKS, CORN OR CORNMEAL, TORTILLA CHIPS","Salty snacks, corn or cornmeal base, tortilla chips" +54401090,"SALTY SNACKS, CORN/CORN-CHEESE CHIPS, UNSALTED","Salty snacks, corn or cornmeal base, corn chips, corn-cheese chips, unsalted" +54401100,"SALTY SNACKS,CORN / CORNMEAL BASE,TORTILLA CHIPS LT","Salty snacks, corn or cornmeal base, tortilla chips, light (baked with less oil)" +54401120,"SALTY SNACKS, TORTILLA CHIPS, FAT FREE, W/ OLEAN","Salty snacks, corn or cornmeal base, tortilla chips, fat free, made with Olean" +54401150,"SALTY SNACKS,CORN/CORNMEAL BASE,TORTILLA,LOWFAT,BKD","Salty snacks, corn or cornmeal base, tortilla chips, lowfat, baked without fat" +54401170,"SALTY SNACKS,CORN/CORNMEAL,TORTILLA,LOWFAT,BKD,NO SALT","Salty snacks, corn or cornmeal base, tortilla chips, lowfat, baked without fat, unsalted" +54401200,"SALTY SNACKS, CORN/CORNML BASE,W/OAT BRAN,TORT CHPS","Salty snacks, corn or cornmeal base, with oat bran, tortilla chips" +54401210,"SALTY SNACKS, CORN BASED/CHEESE PUFFS & TWISTS, LOWFAT","Salty snacks, corn based puffs and twists, cheese puffs and twists, lowfat" +54402080,"TORTILLA CHIPS, UNSALTED","Salty snacks, corn or cornmeal base, tortilla chips, unsalted" +54402200,"SALTY SNACK MIXTURE,MOSTLY CORN,W/PRETZELS,W/O NUTS","Salty snack mixture, mostly corn or cornmeal based, with pretzels, without nuts" +54402300,"SALTY SNACKS, WHEAT-BASE, HIGH FIBER","Salty snacks, wheat-based, high fiber" +54402500,"SALTY SNACKS, WHEAT-AND CORN-BASED CHIPS","Salty snacks, wheat- and corn-based chips" +54402600,"SALTY SNACKS, MULTIGRAIN, WHOLE GRAIN, CHIPS","Salty snacks, multigrain, whole grain, chips (made with whole corn, whole wheat, rice flour, and whole oat flour)" +54402610,"SALTY SNACKS, MULTIGRAIN& POT CHIPS(W/RICE FL,POT,CORN FL)","Salty snacks, multigrain and potato chips (made with rice flour, dried potatoes, corn flour, and wheat starch)" +54402700,"PITA CHIPS","Pita chips" +54403000,"POPCORN, POPPED IN OIL, UNBUTTERED","Popcorn, popped in oil, unbuttered" +54403010,"POPCORN, AIR-POPPED (NO BUTTER OR OIL ADDED)","Popcorn, air-popped (no butter or no oil added)" +54403020,"POPCORN, POPPED IN OIL, BUTTERED","Popcorn, popped in oil, buttered" +54403040,"POPCORN, AIR-POPPED, BUTTERED","Popcorn, air-popped, buttered" +54403050,"POPCORN, FLAVORED (CHEESE, BBQ, SOUR CREAM, ONION)","Popcorn, flavored" +54403060,"POPCORN, POPPED IN OIL, LOWFAT, LOW SODIUM","Popcorn, popped in oil, lowfat, low sodium" +54403070,"POPCORN, POPPED IN OIL, LOWFAT","Popcorn, popped in oil, lowfat" +54403090,"POPCORN, POPPED IN OIL, UNSALTED","Popcorn, popped in oil, unsalted" +54403110,"POPCORN, SUGAR SYRUP OR CARAMEL COATED","Popcorn, sugar syrup or caramel-coated" +54403120,"POPCORN, SUGAR SYRUP OR CARAMEL COATED, W/ NUTS","Popcorn, sugar syrup or caramel-coated, with nuts" +54403150,"POPCORN, SUGAR SYRUP/CARAMEL COATED, FAT FREE","Popcorn, sugar syrup or caramel-coated, fat free" +54406010,"SNACKS, ONION-FLAVORED RINGS","Snacks, onion-flavored rings" +54406200,"SHRIMP CHIPS","Shrimp chips (tapioca base)" +54408000,"PRETZELS, NFS","Pretzels, NFS" +54408010,"PRETZELS, HARD","Pretzels, hard" +54408020,"PRETZELS, SOFT","Pretzels, soft" +54408030,"PRETZELS, HARD, UNSALTED","Pretzel, hard, unsalted" +54408040,"PRETZELS, SOFT, UNSALTED","Pretzels, soft, unsalted" +54408050,"PRETZEL, OAT BRAN, HARD","Pretzel, oatbran, hard" +54408070,"PRETZEL, HARD, MULTIGRAIN","Pretzel, hard, multigrain" +54408100,"PRETZEL, BABY FOOD","Pretzel, baby food" +54408200,"PRETZEL, HARD, CHOCOLATE COATED","Pretzel, hard, chocolate-coated" +54408250,"PRETZEL, YOGURT COVERED","Pretzel, yogurt-covered" +54408300,"PRETZELS, CHEESE-FILLED (INCL COMBOS)","Pretzels, cheese-filled" +54412110,"WHEAT STICKS, 100% WHOLE WHEAT","Wheat sticks, 100% whole wheat" +54420010,"MULTIGRAIN MIXTURE, PRETZELS, CEREAL &/ CRACKERS,NUTS","Multigrain mixture, pretzels, cereal and/or crackers, nuts" +54420100,"ORIENTAL PARTY MIX, W/ PEANUTS, SESAME STICKS, ETC","Oriental party mix, with peanuts, sesame sticks, chili rice crackers and fried green peas" +54420200,"MULTIGRAIN MIX, BREAD STICKS, SESAME NUGGETS, PRETZ","Multigrain mixture, bread sticks, sesame nuggets, pretzels, rye chips" +54430010,"YOGURT CHIPS","Yogurt chips" +54440010,"BAGEL CHIP","Bagel chip" +55101000,"PANCAKES, PLAIN (INCLUDE PANCAKES, NFS)","Pancakes, plain" +55101010,"PANCAKES, REDUCED CALORIE, HIGH FIBER","Pancakes, reduced calorie, high fiber" +55101015,"PANCAKES, PLAIN, REDUCED FAT","Pancakes, plain, reduced fat" +55101020,"PANCAKES, PLAIN, FAT FREE","Pancakes, plain, fat free" +55103000,"PANCAKES, W/ FRUIT (INCLUDE BLUEBERRY PANCAKES)","Pancakes, with fruit" +55103100,"PANCAKES W/ CHOC CHIPS","Pancakes, with chocolate chips" +55105000,"PANCAKES, BUCKWHEAT","Pancakes, buckwheat" +55105100,"PANCAKES, CORNMEAL","Pancakes, cornmeal" +55105200,"PANCAKES, WHOLE WHEAT","Pancakes, whole wheat" +55105205,"PANCAKES, WHOLE WHEAT, REDUCED FAT","Pancakes, whole wheat, reduced fat" +55105210,"PANCAKES, WHOLE WHEAT, FAT FREE","Pancakes, whole wheat, fat free" +55105300,"PANCAKES, SOURDOUGH","Pancakes, sour dough" +55105400,"PANCAKES, RYE","Pancakes, rye" +55201000,"WAFFLE, PLAIN","Waffle, plain" +55202000,"WAFFLE, WHEAT, BRAN, OR MULTIGRAIN","Waffle, wheat, bran, or multigrain" +55203000,"WAFFLE, FRUIT","Waffle, fruit" +55203500,"WAFFLE, NUT & HONEY (INCL EGGO)","Waffle, nut and honey" +55203600,"WAFFLE, CHOCOLATE CHIP","Waffle, chocolate chip" +55204000,"WAFFLE, CORNMEAL","Waffle, cornmeal" +55205000,"WAFFLE, 100% WHOLE WHEAT OR 100% WHOLE GRAIN","Waffle, 100% whole wheat or 100% whole grain" +55206000,"WAFFLE, OAT BRAN","Waffle, oat bran" +55207000,"WAFFLE, MULTI-BRAN (INCLUDE EGGO NUTRIGRAIN)","Waffle, multi-bran" +55211000,"WAFFLE, PLAIN, FAT FREE","Waffle, plain, fat free" +55211050,"WAFFLE, PLAIN, LOWFAT","Waffle, plain, lowfat" +55212000,"WAFFLE, WHOLE WHEAT, LOWFAT","Waffle, whole wheat, lowfat" +55301000,"FRENCH TOAST, PLAIN (INCLUDE ROMAN MEAL)","French toast, plain" +55301050,"FRENCH TOAST STICKS, PLAIN","French toast sticks, plain" +55310100,"BREAD FRITTERS, P.R.","Bread fritters, Puerto Rican style (Torrejas gallegas, Galician fritters)" +55401000,"CREPE, PLAIN (INCLUDE FRENCH PANCAKE)","Crepe, plain" +55501000,"FLOUR & WATER PATTY (INCLUDE CHINESE PANCAKE)","Flour and water patty" +55502000,"FLOUR AND WATER GRAVY","Flour and water gravy" +55610200,"DUMPLING, FRIED, PUERTO RICAN STYLE","Dumpling, fried, Puerto Rican style" +55610300,"DUMPLING, PLAIN","Dumpling, plain" +55701000,"CAKE MADE W/ GLUTINOUS RICE","Cake made with glutinous rice" +55702000,"CAKE OR PANCAKE MADE W/ RICE FLOUR &/OR DRIED BEANS","Cake or pancake made with rice flour and/or dried beans" +55702100,"DOSA (INDIAN)","Dosa (Indian), plain" +55703000,"CAKE MADE W/ GLUTINOUS RICE & DRIED BEANS","Cake made with glutinous rice and dried beans" +55801000,"FUNNEL CAKE WITH SUGAR","Funnel cake with sugar" +55801010,"FUNNEL CAKE WITH SUGAR AND FRUIT","Funnel cake with sugar and fruit" +56101000,"MACARONI, COOKED, NS AS TO ADDED FAT","Macaroni, cooked, NS as to fat added in cooking" +56101010,"MACARONI, COOKED, NO FAT ADDED","Macaroni, cooked, fat not added in cooking" +56101030,"MACARONI, COOKED, FAT ADDED","Macaroni, cooked, fat added in cooking" +56102000,"MACARONI, WHOLE WHEAT, COOKED, NS AS TO ADDED FAT","Macaroni, whole wheat, cooked, NS as to fat added in cooking" +56102010,"MACARONI, WHOLE WHEAT, NO FAT ADDED","Macaroni, whole wheat, cooked, fat not added in cooking" +56102020,"MACARONI, WHOLE WHEAT, FAT ADDED","Macaroni, whole wheat, cooked, fat added in cooking" +56103000,"MACARONI, SPINACH, NS AS TO ADDED FAT","Macaroni, cooked, spinach, NS as to fat added in cooking" +56103010,"MACARONI, SPINACH, NO FAT ADDED","Macaroni, cooked, spinach, fat not added in cooking" +56103020,"MACARONI, SPINACH, FAT ADDED","Macaroni, cooked, spinach, fat added in cooking" +56104000,"MACARONI,CKD,VEGETABLE,NS AS TO FAT ADDED","Macaroni, cooked, vegetable, NS as to fat added in cooking" +56104010,"MACARONI,COOKED,VEGETABLE,FAT NOT ADDED IN COOKING","Macaroni, cooked, vegetable, fat not added in cooking" +56104020,"MACARONI,COOKED,VEGETABLE, FAT ADDED IN COOKING","Macaroni, cooked, vegetable, fat added in cooking" +56112000,"NOODLES, COOKED, NS AS TO ADDED FAT","Noodles, cooked, NS as to fat added in cooking" +56112010,"NOODLES, COOKED, NO FAT ADDED","Noodles, cooked, fat not added in cooking" +56112030,"NOODLES, COOKED, FAT ADDED","Noodles, cooked, fat added in cooking" +56113000,"NOODLES, COOKED,WHOLE WHEAT,NS AS TO FAT ADDED","Noodles, cooked, whole wheat, NS as to fat added in cooking" +56113010,"NOODLES, WHOLE WHEAT, COOKED, NO FAT ADDED","Noodles, cooked, whole wheat, fat not added in cooking" +56113990,"NOODLES, COOKED, SPINACH, NS AS TO FAT","Noodles, cooked, spinach, NS as to fat added in cooking" +56114000,"NOODLES, SPINACH, COOKED, NO FAT ADDED","Noodles, cooked, spinach, fat not added in cooking" +56114020,"NOODLES, COOKED, SPINACH, FAT ADDED","Noodles, cooked, spinach, fat added in cooking" +56116000,"NOODLES, CHOW MEIN","Noodles, chow mein" +56116990,"LONG RICE NOODLES(FROM MUNG BEANS),CKD,NS FAT ADDed","Long rice noodles (made from mung beans) cooked, NS as to fat added in cooking" +56117000,"LONG RICE NOODLES, COOKED, NO FAT ADDED","Long rice noodles (made from mung beans), cooked, fat not added in cooking" +56117010,"LONG RICE NOODLES, COOKED, FAT ADDED","Long rice noodles (made from mung beans), cooked, fat added in cooking" +56117090,"CHOW FUN RICE NOODLES,COOKED,NS AS TO FAT ADDED","Chow fun rice noodles, cooked, NS as to fat added in cooking" +56117100,"CHOW FUN RICE NOODLES, COOKED, NO FAT ADDED","Chow fun rice noodles, cooked, fat not added in cooking" +56117110,"CHOW FUN RICE NOODLES, COOKED, FAT ADDED","Chow fun rice noodles, cooked, fat added in cooking" +56130000,"SPAGHETTI, COOKED, NS AS TO ADDED FAT","Spaghetti, cooked, NS as to fat added in cooking" +56130010,"SPAGHETTI, COOKED, NO FAT ADDED","Spaghetti, cooked, fat not added in cooking" +56131000,"SPAGHETTI, COOKED, FAT ADDED","Spaghetti, cooked, fat added in cooking" +56132990,"SPAGHETTI, COOKED, WHOLE WHEAT, NS AS TO ADDED FAT","Spaghetti, cooked, whole wheat, NS as to fat added in cooking" +56133000,"SPAGHETTI, COOKED, WHOLE WHEAT, NO FAT ADDED","Spaghetti, cooked, whole wheat, fat not added in cooking" +56133010,"SPAGHETTI, COOKED, WHOLE WHEAT, FAT ADDED","Spaghetti, cooked, whole wheat, fat added in cooking" +56200300,"CEREAL, COOKED, NFS","Cereal, cooked, NFS" +56200350,"CEREAL, COOKED, INSTANT, NS AS TO GRAIN","Cereal, cooked, instant, NS as to grain" +56200390,"BARLEY, COOKED, NS AS TO FAT ADDED IN COOKING","Barley, cooked, NS as to fat added in cooking" +56200400,"BARLEY, COOKED, NO FAT ADDED","Barley, cooked, fat not added in cooking" +56200490,"BUCKWHEAT GROATS, COOKED, NS AS TO FAT ADDED","Buckwheat groats, cooked, NS as to fat added in cooking" +56200500,"BUCKWHEAT GROATS, COOKED, NO FAT ADDED (INCL KASHA)","Buckwheat groats, cooked, fat not added in cooking" +56200510,"BUCKWHEAT GROATS, COOKED, FAT ADDED","Buckwheat groats, cooked, fat added in cooking" +56200990,"GRITS, COOKED,CORN/HOMINY, NS REG, QUICK, INST, NS FAT ADDED","Grits, cooked, corn or hominy, NS as to regular, quick, or instant, NS as to fat added in cooking" +56201000,"GRITS, CORN OR HOMINY, NFS, NO FAT ADDED","Grits, cooked, corn or hominy, NS as to regular, quick, or instant, fat not added in cooking" +56201010,"GRITS, CKD, CORN/HOMINY, REGULAR, NO FAT","Grits, cooked, corn or hominy, regular, fat not added in cooking" +56201020,"GRITS, COOKED, CORN/HOMINY, REGULAR, FAT ADDED","Grits, cooked, corn or hominy, regular, fat added in cooking" +56201030,"GRITS, COOKED, CORN/HOMINY, REGULAR, NS AS TO FAT","Grits, cooked, corn or hominy, regular, NS as to fat added in cooking" +56201040,"GRITS, COOKED, CORN/HOMINY, FAT ADDED","Grits, cooked, corn or hominy, NS as to regular, quick, or instant, fat added in cooking" +56201060,"GRITS,CKD,CORN/HOMINY,W/CHEESE,NS TYPE,NS FAT ADDED","Grits, cooked, corn or hominy, with cheese, NS as to regular, quick, or instant, NS as to fat added in cooking" +56201061,"GRITS,CKD,CORN/HOMINY,W/ CHEESE,NS TYPE,FAT NOT ADDED","Grits, cooked, corn or hominy, with cheese, NS as to regular, quick, or instant, fat not added in cooking" +56201062,"GRITS,CKD,CORN/HOMINY,W/ CHEESE,NS TYPE,FAT ADDED","Grits, cooked, corn or hominy, with cheese, NS as to regular, quick, or instant, fat added in cooking" +56201070,"GRITS,CKD,CORN/HOMINY,W/ CHEESE,REG,NS FAT ADDED","Grits, cooked, corn or hominy, with cheese, regular, NS as to fat added in cooking" +56201071,"GRITS,CKD,CORN/HOMINY,W/ CHEESE,REG,FAT NOT ADDED","Grits, cooked, corn or hominy, with cheese, regular, fat not added in cooking" +56201072,"GRITS,CKD,CORN/HOMINY,W/ CHEESE,REG,FAT ADDED","Grits, cooked, corn or hominy, with cheese, regular, fat added in cooking" +56201080,"GRITS,CKD,CORN/HOMINY,W/ CHEESE,QUICK,NS FAT ADDED","Grits, cooked, corn or hominy, with cheese, quick, NS as to fat added in cooking" +56201081,"GRITS,CKD,CORN/HOMINY,W/ CHEESE,QUICK,FAT NOT ADDED","Grits, cooked, corn or hominy, with cheese, quick, fat not added in cooking" +56201082,"GRITS,CKD,CORN/HOMINY,W/ CHEESE,QUICK,FAT ADDED","Grits, cooked, corn or hominy, with cheese, quick, fat added in cooking" +56201090,"GRITS,CKD,CORN/HOMINY,W/ CHEESE,INSTANT,NS FAT ADDED","Grits, cooked, corn or hominy, with cheese, instant, NS as to fat added in cooking" +56201091,"GRITS,CKD,CORN/HOMINY,W/ CHEESE,INSTANT,FAT NOT ADDED","Grits, cooked, corn or hominy, with cheese, instant, fat not added in cooking" +56201092,"GRITS,CKD,CORN/HOMINY,W/ CHEESE,INSTANT,FAT ADDED","Grits, cooked, corn or hominy, with cheese, instant, fat added in cooking" +56201110,"GRITS, COOKED, CORN/HOMINY, QUICK, NO FAT ADDED","Grits, cooked, corn or hominy, quick, fat not added in cooking" +56201120,"GRITS, COOKED, CORN/HOMINY, QUICK, FAT ADDED","Grits, cooked, corn or hominy, quick, fat added in cooking" +56201130,"GRITS, COOKED, CORN/HOMINY,QUICK,NS AS TO ADDED FAT","Grits, cooked, corn or hominy, quick, NS as to fat added in cooking" +56201210,"GRITS, COOKED, CORN/HOMINY, INSTANT, NO FAT ADDED","Grits, cooked, corn or hominy, instant, fat not added in cooking" +56201220,"GRITS, CORN/HOMINY, INSTANT, FAT ADDED","Grits, cooked, corn or hominy, instant, fat added in cooking" +56201230,"GRITS, CORN/HOMINY, INSTANT, COOKED, NS AS TO FAT","Grits, cooked, corn or hominy, instant, NS as to fat added in cooking" +56201240,"GRITS, FLAVORED, INSTANT, NO FAT ADDED","Grits, cooked, flavored, corn or hominy, instant, fat not added in cooking" +56201250,"GRITS, FLAVORED, INSTANT, FAT ADDED","Grits, cooked, flavored, corn or hominy, instant, fat added in cooking" +56201260,"GRITS, FLAVORED, INSTANT, NS AS TO ADDED FAT","Grits, cooked, flavored, corn or hominy, instant, NS as to fat added in cooking" +56201300,"GRITS,COOKED,CORN/HOM,NFS,MADE W/MILK, NS AS TO FAT ADDED","Grits, cooked, corn or hominy, NS as to regular, quick, or instant, made with milk, NS as to fat added in cooking" +56201510,"CORNMEAL MUSH, MADE W/ WATER","Cornmeal mush, made with water" +56201520,"CORNMEAL MUSH, FRIED","Cornmeal mush, fried" +56201530,"CORNMEAL MUSH, MADE W/ MILK","Cornmeal mush, made with milk" +56201540,"CORNMEAL, MADE W/ MILK & SUGAR, P. R. STYLE","Cornmeal, made with milk and sugar, Puerto Rican Style (Harina de maiz)" +56201550,"CORNMEAL DUMPLINGS","Cornmeal dumpling" +56201560,"CORNMEAL STICKS, BOILED (INCL CORNMEAL GUANINES)","Cornmeal sticks, boiled" +56201600,"CORNMEAL, LIME-TREATED, COOKED","Cornmeal, lime-treated, cooked (Masa harina)" +56201700,"CORNSTARCH W/ MILK, EATEN AS CEREAL","Cornstarch with milk, eaten as a cereal (2 tbsp cornstarch in 2-1/2 cups milk)" +56201750,"CORNSTARCH, DRY","Cornstarch, dry" +56201800,"CORNSTARCH, HYDROLYZED, POWDER","Cornstarch, hydrolyzed powder" +56201990,"MILLET, COOKED, NS AS TO FAT ADDED IN COOKING","Millet, cooked, NS as to fat added in cooking" +56202000,"MILLET, COOKED, NO FAT ADDED","Millet, cooked, fat not added in cooking" +56202100,"MILLET, COOKED, FAT ADDED IN COOKING","Millet, cooked, fat added in cooking" +56202960,"OATMEAL,COOKED,NS AS TO REG,QUICK/INST,NS TO FAT","Oatmeal, cooked, NS as to regular, quick or instant; NS as to fat added in cooking" +56202970,"OATMEAL, COOKED, QUICK, NS TO FAT ADDED","Oatmeal, cooked, quick (1 or 3 minutes), NS as to fat added in cooking" +56202980,"OATMEAL, COOKED, REG, NS TO FAT ADDED","Oatmeal, cooked, regular, NS as to fat added in cooking" +56203000,"OATMEAL, COOKED, NFS, NO FAT ADDED","Oatmeal, cooked, NS as to regular, quick or instant, fat not added in cooking" +56203010,"OATMEAL, COOKED, REGULAR, NO FAT ADDED","Oatmeal, cooked, regular, fat not added in cooking" +56203020,"OATMEAL, COOKED, QUICK, NO FAT ADDED","Oatmeal, cooked, quick (1 or 3 minutes), fat not added in cooking" +56203030,"OATMEAL, COOKED, INSTANT, NO FAT ADDED IN COOKING","Oatmeal, cooked, instant, fat not added in cooking" +56203040,"OATMEAL, FAT ADDED IN COOKING, NFS","Oatmeal, cooked, NS as to regular, quick, or instant, fat added in cooking" +56203050,"OATMEAL, REGULAR, FAT ADDED IN COOKING","Oatmeal, cooked, regular, fat added in cooking" +56203060,"OATMEAL, QUICK, FAT ADDED IN COOKING","Oatmeal, cooked, quick (1 or 3 minutes), fat added in cooking" +56203070,"OATMEAL, INSTANT, FAT ADDED","Oatmeal, cooked, instant, fat added in cooking" +56203080,"OATMEAL, INSTANT, NS AS TO ADDED FAT","Oatmeal, cooked, instant, NS as to fat added in cooking" +56203110,"OATMEAL, MAPLE FLAVOR, COOKED (INCL MAYPO)","Oatmeal with maple flavor, cooked" +56203200,"OATMEAL, W/ FRUIT, COOKED","Oatmeal with fruit, cooked" +56203210,"OATMEAL, NS TYPE, MADE W/ MILK, NO FAT ADDED","Oatmeal, NS as to regular, quick, or instant, made with milk, fat not added in cooking" +56203211,"OATMEAL, CKD, REG, MADE W/ MILK, FAT NOT ADDED IN COOKING","Oatmeal, cooked, regular, made with milk, fat not added in cooking" +56203212,"OATMEAL, CKD, QUICK, MADE W/ MILK, FAT NOT ADDED IN COOKING","Oatmeal, cooked, quick (1 or 3 minutes), made with milk, fat not added in cooking" +56203213,"OATMEAL, CKD, INST, MADE W/ MILK, FAT NOT ADDED IN COOKING","Oatmeal, cooked, instant, made with milk, fat not added in cooking" +56203220,"OATMEAL, NS TYPE, MADE W/ MILK, FAT ADDED","Oatmeal, NS as to regular, quick, or instant, made with milk, fat added in cooking" +56203221,"OATMEAL, CKD, REG, MADE W/ MILK, FAT ADDED IN COOKING","Oatmeal, cooked, regular, made with milk, fat added in cooking" +56203222,"OATMEAL, CKD, QUICK, MADE W/ MILK, FAT ADDED IN COOKING","Oatmeal, cooked, quick (1 or 3 minutes), made with milk, fat added in cooking" +56203223,"OATMEAL, CKD, INST, MADE W/ MILK, FAT ADDED IN COOKING","Oatmeal, cooked, instant, made with milk, fat added in cooking" +56203230,"OATMEAL, NS TYPE, MADE W/ MILK, NS AS TO ADDED FAT","Oatmeal, NS as to regular, quick, or instant, made with milk, NS as to fat added in cooking" +56203231,"OATMEAL, CKD, REG, MADE W/ MILK, NS AS TO FAT ADDED","Oatmeal, cooked, regular, made with milk, NS as to fat added in cooking" +56203232,"OATMEAL, CKD, QUICK, MADE W/ MILK, NS AS TO FAT ADDED","Oatmeal, cooked, quick (1 or 3 minutes), made with milk, NS as to fat added in cooking" +56203233,"OATMEAL, CKD, INST, MADE W/ MILK, NS AS TO FAT ADDED","Oatmeal, cooked, instant, made with milk, NS as to fat added in cooking" +56203540,"OATMEAL, MADE W/ MILK & SUGAR, P.R. STYLE","Oatmeal, made with milk and sugar, Puerto Rican style" +56203600,"OATMEAL, MULTIGRAIN, COOKED, NS FAT ADDED","Oatmeal, multigrain, cooked, NS as to fat added in cooking" +56203610,"OATMEAL, MULTIGRAIN, COOKED, FAT NOT ADDED","Oatmeal, multigrain, cooked, fat not added in cooking" +56203620,"OATMEAL, MULTIGRAIN, COOKED, FAT ADDED","Oatmeal, multigrain, cooked, fat added in cooking" +56204000,"QUINOA, COOKED, NS AS TO FAT ADDED IN COOKING","Quinoa, cooked, NS as to fat added in cooking" +56204005,"QUINOA, COOKED, FAT NOT ADDED IN COOKING","Quinoa, cooked, fat not added in cooking" +56204010,"QUINOA, COOKED, FAT ADDED IN COOKING","Quinoa, cooked, fat added in cooking" +56205000,"RICE, COOKED, NS AS TO TYPE","Rice, cooked, NFS" +56205002,"RICE, WHITE, COOKED, MADE WITH OIL","Rice, white, cooked, fat added in cooking, made with oil" +56205004,"RICE, WHITE, COOKED, MADE WITH BUTTER","Rice, white, cooked, fat added in cooking, made with butter" +56205006,"RICE, WHITE, COOKED, MADE WITH MARGARINE","Rice, white, cooked, fat added in cooking, made with margarine" +56205008,"RICE, WHITE, COOKED, FAT NOT ADDED IN COOKING","Rice, white, cooked, fat not added in cooking" +56205012,"RICE, BROWN, COOKED, MADE WITH OIL","Rice, brown, cooked, fat added in cooking, made with oil" +56205014,"RICE, BROWN, COOKED, MADE WITH BUTTER","Rice, brown, cooked, fat added in cooking, made with butter" +56205016,"RICE, BROWN, COOKED, MADE WITH MARGARINE","Rice, brown, cooked, fat added in cooking, made with margarine" +56205018,"RICE, BROWN, COOKED, FAT NOT ADDED IN COOKING","Rice, brown, cooked, fat not added in cooking" +56205050,"RICE, CREAM OF, COOKED, NO FAT ADDED","Rice, cream of, cooked, fat not added in cooking" +56205060,"RICE, COOKED, W/ MILK","Rice, cooked, with milk" +56205070,"RICE, SWEET, (RICE, COOKED, W/ HONEY)","Rice, sweet (rice, cooked, with honey)" +56205080,"RICE, CREAMED, W/ MILK & SUGAR, PUERTO RICAN","Rice, creamed, made with milk and sugar, Puerto Rican style" +56205090,"RICE, CREAM OF, COOKED, FAT ADDED IN COOKING","Rice, cream of, cooked, fat added in cooking" +56205130,"YELLOW RICE, COOKED, NS AS TO FAT ADDED IN COOKING","Yellow rice, cooked, NS as to fat added in cooking" +56205150,"YELLOW RICE, COOKED, FAT NOT ADDED IN COOKING","Yellow rice, cooked, fat not added in cooking" +56205170,"YELLOW RICE, COOKED, FAT ADDED IN COOKING","Yellow rice, cooked, fat added in cooking" +56205190,"RICE, WHITE, COOKED, GLUTINOUS (INCL STICKY RICE)","Rice, white, cooked, glutinous" +56205200,"RICE, FRZ DES,NONDAIRY,NOT CHOC (INCL RICE DREAM)","Rice, frozen dessert, nondairy, flavors other than chocolate" +56205205,"RICE, WILD, 100%, COOKED, NS AS TO FAT ADDED IN COOKING","Rice, wild, 100%, cooked, NS as to fat added in cooking" +56205210,"RICE, WILD, 100%, COOKED, NO FAT ADDED","Rice, wild, 100%, cooked, fat not added in cooking" +56205215,"RICE, WILD, 100%, COOKED, FAT ADDED IN COOKING","Rice, wild, 100%, cooked, fat added in cooking" +56205230,"RICE DESSERT BAR,FRZ,NOT CHOC,NONDAIRY,CAROB,COVER","Rice dessert bar, frozen, flavors other than chocolate, nondairy, carob covered" +56205240,"RICE DESSERT BAR,FRZ,CHOC,NONDAIRY,CHOC COVERED","Rice dessert bar, frozen, chocolate, nondairy, chocolate covered" +56205300,"RICE, WHITE & WILD, COOKED, NO FAT ADDED","Rice, white and wild, cooked, fat not added in cooking" +56205310,"RICE, BROWN & WILD, COOKED, NO FAT ADDED","Rice, brown and wild, cooked, fat not added in cooking" +56205320,"RICE, WHITE & WILD, FAT ADDED","Rice, white and wild, cooked, fat added in cooking" +56205330,"RICE, WHITE & WILD, NS AS TO ADDED FAT","Rice, white and wild, cooked, NS as to fat added in cooking" +56205340,"RICE, BROWN & WILD, FAT ADDED","Rice, brown and wild, cooked, fat added in cooking" +56205350,"RICE, BROWN & WILD, NS AS TO ADDED FAT","Rice, brown and wild, cooked, NS as to fat added in cooking" +56205410,"RICE, WHITE, COOKED W/ (FAT) OIL, PUERTO RICAN STYLE","Rice, white, cooked with (fat) oil, Puerto Rican style (Arroz blanco)" +56206970,"WHEAT, CREAM OF,COOKED,QUICK,NS AS TO ADDED FAT","Wheat, cream of, cooked, quick, NS as to fat added in cooking" +56206980,"WHEAT, CREAM OF,COOKED,REG,NS AS TO ADDED FAT","Wheat, cream of, cooked, regular, NS as to fat added in cooking" +56206990,"WHEAT, CREAM OF,COOKED,NS AS REG,QUICK,/INST","Wheat, cream of, cooked, NS as to regular, quick, or instant, NS as to fat added in cooking" +56207000,"WHEAT, CREAM OF, COOKED, NFS, NO FAT ADDED","Wheat, cream of, cooked, NS as to regular, quick, or instant, fat not added in cooking" +56207010,"WHEAT, CREAM OF, COOKED, REGULAR, NO FAT ADDED","Wheat, cream of, cooked, regular, fat not added in cooking" +56207020,"WHEAT, CREAM OF, COOKED, QUICK, NO FAT ADDED","Wheat, cream of, cooked, quick, fat not added in cooking" +56207030,"WHEAT, CREAM OF, COOKED, INSTANT, NO FAT ADDED","Wheat, cream of, cooked, instant, fat not added in cooking" +56207040,"WHEAT, CREAM OF, MADE W/ MILK","Wheat, cream of, cooked, made with milk" +56207050,"WHEAT, CREAM OF, MADE W/ MILK & SUGAR, P.R. STYLE","Wheat, cream of, cooked, made with milk and sugar, Puerto Rican style" +56207060,"WHEAT, CREAM OF, INSTANT, COOKED, FAT ADDED","Wheat, cream of, cooked, instant, fat added in cooking" +56207070,"WHEAT, CREAM OF, INSTANT,COOKED, NS AS TO ADDED FAT","Wheat, cream of, cooked, instant, NS as to fat added in cooking" +56207080,"WHEAT, CREAM OF,COOKED,NS AS TO REG,QUICK, OR INST","Wheat, cream of, cooked, NS as to regular, quick, or instant, fat added in cooking" +56207100,"WHEAT, ROLLED, COOKED, NO FAT ADDED","Wheat, rolled, cooked, fat not added in cooking" +56207110,"BULGUR, COOKED OR CANNED, NO FAT ADDED","Bulgur, cooked or canned, fat not added in cooking" +56207120,"BULGAR, COOKED OR CANNNED, FAT ADDED IN COOKING","Bulgur, cooked or canned, fat added in cooking" +56207130,"BULGUR, COOKED OR CANNED, NS AS TO ADDED FAT","Bulgur, cooked or canned, NS as to fat added in cooking" +56207140,"WHEAT ROLLED,COOKED,NS AS TO ADDED FAT","Wheat, rolled, cooked, NS as to fat added in cooking" +56207150,"COUSCOUS, PLAIN, COOKED, FAT NOT ADDED IN COOKING","Couscous, plain, cooked, fat not added in cooking" +56207160,"COUSCOUS, PLAIN, COOKED, NS AS TO ADDED FAT","Couscous, plain, cooked, NS as to fat added in cooking" +56207180,"COUSCOUS, PLAIN, COOKED, FAT ADDED IN COOKING","Couscous, plain, cooked, fat added in cooking" +56207190,"WHOLE WHEAT CEREAL, COOKED, NS AS TO ADDED FAT","Whole wheat cereal, cooked, NS as to fat added in cooking" +56207200,"WHOLE WHEAT CEREAL, COOKED, NO FAT ADDED","Whole wheat cereal, cooked, fat not added in cooking" +56207210,"WHOLE WHEAT CEREAL, COOKED, FAT ADDED","Whole wheat cereal, cooked, fat added in cooking" +56207220,"WHEAT, CREAM OF, COOKED, REGULAR, FAT ADDED","Wheat, cream of, cooked, regular, fat added in cooking" +56207230,"WHEAT, CREAM OF, COOKED,QUICK,FAT ADDED IN COOKING","Wheat, cream of, cooked, quick, fat added in cooking" +56207300,"WHOLE WHEAT CEREAL, W/ BARLEY, COOKED, NO FAT ADDED","Whole wheat cereal, wheat and barley, cooked, fat not added in cooking" +56207330,"WHOLE WHEAT CEREAL, WHEAT & BARLEY, FAT ADDED","Whole wheat cereal, wheat and barley, cooked, fat added in cooking" +56207340,"WHOLE WHEAT CEREAL, WHEAT & BARLEY, ADDED FAT NS","Whole wheat cereal, wheat and barley, cooked, NS as to fat added in cooking" +56207350,"WHEAT CEREAL, CHOC FLAVORED, COOKED W/ MILK","Wheat cereal, chocolate flavored, cooked, made with milk" +56207360,"WHEAT CEREAL, CHOC FLAVORED, COOKED, NO FAT ADDED","Wheat cereal, chocolate flavored, cooked, fat not added in cooking" +56207370,"WHEAT CEREAL, CHOC FLAV,COOKED,NS AS TO ADDED FAT","Wheat cereal, chocolate flavored, cooked, NS as to fat added in cooking" +56208500,"OAT BRAN CEREAL, COOKED, NO FAT ADDED","Oat bran cereal, cooked, fat not added in cooking" +56208510,"OAT BRAN CEREAL, COOKED, FAT ADDED","Oat bran cereal, cooked, fat added in cooking" +56208520,"OAT BRAN CEREAL, COOKED, NS AS TO ADDED FAT","Oat bran cereal, cooked, NS as to fat added in cooking" +56208530,"OAT BRAN CEREAL, MADE W/ MILK, NO FAT ADDED","Oat bran cereal, cooked, made with milk, fat not added in cooking" +56208540,"OAT BRAN CEREAL, MADE W/ MILK, FAT ADDED","Oat bran cereal, cooked, made with milk, fat added in cooking" +56208550,"OAT BRAN CEREAL, MADE W/ MILK, NS AS TO ADDED FAT","Oat bran cereal, cooked, made with milk, NS as to fat added in cooking" +56209000,"RYE, CREAM OF, COOKED","Rye, cream of, cooked" +56210000,"NESTUM, CEREAL","Nestum cereal" +5.7e+07,"CEREAL, NFS","Cereal, NFS" +57000050,"KASHI CEREAL, NS AS TO READY-TO-EAT OR COOKED","Kashi cereal, NS as to ready to eat or cooked" +57000100,"OAT CEREAL, NFS","Oat cereal, NFS" +57100100,"CEREAL, READY-TO-EAT, NFS","Cereal, ready-to-eat, NFS" +57101000,"ALL-BRAN CEREAL","All-Bran" +57102000,"ALPEN CEREAL","Alpen" +57103000,"ALPHA-BITS CEREAL","Alpha-Bits" +57103020,"ALPHA-BITS W/ MARSHMALLOWS CEREAL","Alpha-bits with marshmallows" +57103050,"AMARANTH FLAKES CEREAL","Amaranth Flakes" +57103100,"APPLE CINNAMON CHEERIOS","Apple Cinnamon Cheerios" +57104000,"APPLE JACKS CEREAL","Apple Jacks" +57106050,"BANANA NUT CRUNCH CEREAL (POST)","Banana Nut Crunch Cereal (Post)" +57106060,"BANANA NUT CHEERIOS","Banana Nut Cheerios" +57106100,"BASIC 4 (RTE CEREAL)","Basic 4" +57106250,"BERRY BERRY KIX","Berry Berry Kix" +57106260,"BERRY BURST CHEERIOS","Berry Burst Cheerios" +57106530,"BLUEBERRY MORNING, POST","Blueberry Morning, Post" +57107000,"BOOBERRY CEREAL","Booberry" +57110000,"ALL-BRAN BRAN BUDS CEREAL, KELLOGG'S (FORMERLY BRAN BUDS)","All-Bran Bran Buds, Kellogg's (formerly Bran Buds)" +57111000,"BRAN CHEX CEREAL","Bran Chex" +57117000,"CAP'N CRUNCH CEREAL","Cap'n Crunch" +57117500,"CAP'N CRUNCH'S CHRISTMAS CRUNCH CEREAL","Cap'n Crunch's Christmas Crunch" +57119000,"CAP'N CRUNCH'S CRUNCH BERRIES CEREAL","Cap'n Crunch's Crunch Berries" +57120000,"CAP'N CRUNCH'S PEANUT BUTTER CRUNCH CEREAL","Cap'n Crunch's Peanut Butter Crunch" +57123000,"CHEERIOS","Cheerios" +57124000,"CHEX CEREAL, NFS","Chex cereal, NFS" +57124050,"CHEX CINNAMON","Chex Cinnamon" +57124100,"CHOCOLATE CHEERIOS","Chocolate Cheerios" +57124200,"CHOCOLATE FLAVORED FROSTED PUFFED CORN CEREAL","Chocolate flavored frosted puffed corn cereal" +57124300,"CHOCOLATE LUCKY CHARMS","Chocolate Lucky Charms" +57125000,"CINNAMON TOAST CRUNCH CEREAL","Cinnamon Toast Crunch" +57125010,"CINNAMON TOAST CRUNCH REDUCED SUGAR","Cinnamon Toast Crunch Reduced Sugar" +57125900,"HONEY NUT CLUSTERS CEREAL","Honey Nut Clusters (formerly called Clusters)" +57126000,"COCOA KRISPIES CEREAL","Cocoa Krispies" +57127000,"COCOA PEBBLES CEREAL","Cocoa Pebbles" +57128000,"COCOA PUFFS CEREAL","Cocoa Puffs" +57128005,"COCOA PUFFS, REDUCED SUGAR","Cocoa Puffs, reduced sugar" +57130000,"COOKIE-CRISP CEREAL (INCLUDE ALL FLAVORS)","Cookie-Crisp" +57131000,"CRUNCHY CORN BRAN CEREAL, QUAKER","Crunchy Corn Bran, Quaker" +57132000,"CORN CHEX CEREAL","Corn Chex" +57134000,"CORN FLAKES, NFS (INCLUDE STORE BRANDS)","Corn flakes, NFS" +57134090,"CORN FLAKES, LOW SODIUM","Corn flakes, low sodium" +57135000,"CORN FLAKES, KELLOGG'S","Corn flakes, Kellogg's" +57137000,"CORN PUFFS CEREAL","Corn Puffs" +57139000,"COUNT CHOCULA CEREAL","Count Chocula" +57143000,"CRACKLIN' OAT BRAN CEREAL","Cracklin' Oat Bran" +57143500,"CRANBERRY ALMOND CRUNCH, POST","Cranberry Almond Crunch, Post" +57144000,"CRISP CRUNCH CEREAL","Crisp Crunch" +57148000,"CRISPIX CEREAL","Crispix" +57148500,"CRISPY BROWN RICE CEREAL","Crispy Brown Rice Cereal" +57151000,"CRISPY RICE CEREAL","Crispy Rice" +57201900,"DORA THE EXPLORER CEREAL","Dora the Explorer Cereal" +57206000,"FAMILIA CEREAL","Familia" +57206700,"FIBER ONE CEREAL","Fiber One" +57206705,"FIBER ONE CARAMEL DELIGHT","Fiber One Caramel Delight" +57206710,"FIBER ONE HONEY CLUSTERS","Fiber One Honey Clusters" +57206715,"FIBER ONE RAISIN BRAN CLUSTERS","Fiber One Raisin Bran Clusters" +57206800,"FIBER 7 FLAKES CEREAL, HEALTH VALLEY","Fiber 7 Flakes, Health Valley" +57207000,"BRAN FLAKES CEREAL, NFS (FORMERLY 40% BRAN FLAKES, NFS)","Bran Flakes, NFS (formerly 40% Bran Flakes, NFS)" +57208000,"ALL-BRAN COMPLETE WHEAT FLAKES, KELLOGG'S","All-Bran Complete Wheat Flakes, Kellogg's" +57209000,"NATURAL BRAN FLAKES CEREAL, POST","Natural Bran Flakes, Post (formerly called 40% Bran Flakes, Post)" +57211000,"FRANKENBERRY CEREAL","Frankenberry" +57213000,"FROOT LOOPS CEREAL","Froot Loops" +57213850,"FROSTED CHEERIOS CEREAL","Frosted Cheerios" +57214000,"FROSTED MINI-WHEATS CEREAL (INCL ALL FLAVORS)","Frosted Mini-Wheats" +57214100,"FROSTED WHEAT BITES","Frosted Wheat Bites" +57215000,"FROSTY O'S CEREAL","Frosty O's" +57216000,"FROSTED RICE CEREAL, NFS","Frosted rice, NFS" +57218000,"FROSTED RICE KRISPIES, KELLOGG'S","Frosted Rice Krispies, Kellogg's" +57219000,"FRUIT & FIBRE CEREAL, NFS","Fruit & Fibre (fiber), NFS" +57221000,"FRUIT & FIBRE CEREAL, W/ DATES, RAISINS, & WALNUTS","Fruit & Fibre (fiber) with dates, raisins, and walnuts" +57221650,"FRUIT HARVEST CEREAL, KELLOGG'S","Fruit Harvest cereal, Kellogg's" +57221700,"FRUIT RINGS, NFS (INCLUDE STORE BRANDS)","Fruit Rings, NFS" +57221800,"FRUIT WHIRLS CEREAL","Fruit Whirls" +57221810,"FRUITY CHEERIOS","Fruity Cheerios" +57223000,"FRUITY PEBBLES CEREAL","Fruity Pebbles" +57224000,"GOLDEN GRAHAMS CEREAL","Golden Grahams" +57227000,"GRANOLA, NFS","Granola, NFS" +57228000,"GRANOLA, HOMEMADE","Granola, homemade" +57229000,"GRANOLA, LOWFAT, KELLOGG'S","Granola, lowfat, Kellogg's" +57229500,"GRANOLA W/ RAISINS, LOWFAT, KELLOGG'S","Granola with Raisins, lowfat, Kellogg's" +57230000,"GRAPE-NUTS CEREAL","Grape-Nuts" +57231000,"GRAPE-NUTS FLAKES","Grape-Nuts Flakes" +57231100,"GRAPE-NUTS TRAIL MIX CRUNCH","Grape-Nuts Trail Mix Crunch" +57231200,"GREAT GRAINS, RAISIN, DATE, & PECAN,WHOLE GRAIN CEREAL, POST","Great Grains, Raisin, Date, and Pecan Whole Grain Cereal, Post" +57231250,"GREAT GRAINS DOUBLE PECAN WHOLE GRAIN CEREAL, POST","Great Grains Double Pecan Whole Grain Cereal, Post" +57237100,"HONEY BUNCHES OF OATS HONEY ROASTED CEREAL","Honey Bunches of Oats Honey Roasted Cereal" +57237200,"HONEY BUNCHES OF OATS WITH VANILLA CLUSTERS, POST","Honey Bunches of Oats with Vanilla Clusters, Post" +57237300,"HONEY BUNCHES OF OATS W/ ALMONDS, POST","Honey Bunches of Oats with Almonds, Post" +57237310,"HONEY BUNCHES OF OATS WITH PECAN BUNCHES","Honey Bunches of Oats with Pecan Bunches" +57237900,"HONEY BUNCHES OF OATS JUST BUNCHES","Honey Bunches of Oats Just Bunches" +57238000,"HONEYCOMB CEREAL, PLAIN","Honeycomb, plain" +57239000,"HONEYCOMB CEREAL, STRAWBERRY","Honeycomb, strawberry" +57239100,"HONEY CRUNCH CORN FLAKES CEREAL, KELLOGG'S","Honey Crunch Corn Flakes, Kellogg's" +57240100,"HONEY NUT CHEX CEREAL","Honey Nut Chex" +57241000,"HONEY NUT CHEERIOS","Honey Nut Cheerios" +57241200,"HONEY NUT SHREDDED WHEAT CEREAL, POST","Honey Nut Shredded Wheat, Post" +57243000,"HONEY SMACKS, KELLOGG'S","Honey Smacks, Kellogg's (formerly Smacks; Honey Smacks)" +57301500,"KASHI, PUFFED","Kashi, Puffed" +57301505,"KASHI AUTUMN WHEAT","Kashi Autumn Wheat" +57301510,"KASHI GOLEAN","Kashi GOLEAN" +57301511,"KASHI GOLEAN CRUNCH","Kashi GOLEAN Crunch" +57301512,"KASHI GOLEAN CRUNCH HONEY ALMOND FLAX","Kashi GOLEAN Crunch Honey Almond Flax" +57301520,"KASHI GOOD FRIENDS","Kashi Good Friends" +57301530,"KASHI HEART TO HEART HONEY TOASTED OAT","Kashi Heart to Heart Honey Toasted Oat" +57301535,"KASHI HEART TO HEART OAT FLAKES AND BLUEBERRY CLUSTERS","Kashi Heart to Heart Oat Flakes and Blueberry Clusters" +57301540,"KASHI HONEY SUNSHINE","Kashi Honey Sunshine" +57302100,"KING VITAMAN CEREAL","King Vitaman" +57303100,"KIX CEREAL","Kix" +57303105,"HONEY KIX","Honey Kix" +57304100,"LIFE CEREAL (PLAIN & CINNAMON)","Life (plain and cinnamon)" +57305100,"LUCKY CHARMS CEREAL","Lucky Charms" +57305150,"FROSTED OAT CEREAL W/ MARSHMALLOWS","Frosted oat cereal with marshmallows" +57305160,"MALT-O-MEAL BLUEBERRY MUFFIN TOPS","Malt-O-Meal Blueberry Muffin Tops" +57305165,"MALT-O-MEAL CINNAMON TOASTERS","Malt-O-Meal Cinnamon Toasters" +57305170,"MALT-O-MEAL COCO-ROOS CEREAL","Malt-O-Meal Coco-Roos" +57305174,"MALT-O-MEAL COLOSSAL CRUNCH","Malt-O-Meal Colossal Crunch" +57305175,"MALT-O-MEAL COCOA DYNO-BITES","Malt-O-Meal Cocoa Dyno-Bites" +57305180,"MALT-O-MEAL CORN BURSTS CEREAL","Malt-O-Meal Corn Bursts" +57305200,"MALT-O-MEAL CRISPY RICE CEREAL","Malt-O-Meal Crispy Rice" +57305210,"MALT-O-MEAL FROSTED FLAKES","Malt-O-Meal Frosted Flakes" +57305215,"MALT-O-MEAL FROSTED MINI SPOONERS","Malt-O-Meal Frosted Mini Spooners" +57305300,"MALT-O-MEAL FRUITY DYNO-BITES","Malt-O-Meal Fruity Dyno-Bites" +57305400,"MALT-O-MEAL HONEY GRAHAM SQUARES","Malt-O-Meal Honey Graham Squares" +57305500,"MALT-O-MEAL HONEY & NUT TOASTY O'S CEREAL","Malt-O-Meal Honey and Nut Toasty O's" +57305600,"MALT-O-MEAL MARSHMALLOW MATEYS CEREAL","Malt-O-Meal Marshmallow Mateys" +57306100,"MALT-O-MEAL PUFFED RICE CEREAL","Malt-O-Meal Puffed Rice" +57306120,"MALTO-O-MEAL PUFFED WHEAT CEREAL","Malt-O-Meal Puffed Wheat" +57306130,"MALT-O-MEAL RAISIN BRAN","Malt-O-Meal Raisin Bran" +57306500,"MALT-O-MEAL GOLDEN PUFFS CEREAL (FORMERLY SUGAR PUFFS)","Malt-O-Meal Golden Puffs (formerly Sugar Puffs)" +57306700,"MALT-O-MEAL TOASTED OAT CEREAL","Malt-O-Meal Toasted Oat Cereal" +57306800,"MALT-O-MEAL TOOTIE FRUITIES (RTE CEREAL)","Malt-O-meal Tootie Fruities" +57307010,"MAPLE PECAN CRUNCH CEREAL, POST","Maple Pecan Crunch Cereal, Post" +57307500,"MILLET, PUFFED (CEREAL)","Millet, puffed" +57307600,"MINI-SWIRLZ CINNAMON BUN CEREAL, KELLOGG'S","Mini-Swirlz Cinnamon Bun Cereal, Kellogg's" +57308150,"MUESLIX CEREAL, NFS","Mueslix cereal, NFS" +57308190,"MUESLI, DRIED FRUIT&NUTS","Muesli, dried fruit and nuts (formerly Muesli with raisins, dates, and almonds)" +57308300,"MULTI BRAN CHEX","Multi Bran Chex" +57308400,"MULTIGRAIN CHEERIOS","MultiGrain Cheerios" +57309100,"NATURE VALLEY GRANOLA, W/ FRUIT & NUTS","Nature Valley Granola, with fruit and nuts" +57316200,"NUTTY NUGGETS (RALSTON)","Nutty Nuggets, Ralston Purina" +57316300,"OAT BRAN FLAKES, HEALTH VALLEY","Oat Bran Flakes, Health Valley" +57316380,"OAT CLUSTER CHEERIOS CRUNCH","Oat Cluster Cheerios Crunch" +57316450,"OATMEAL CRISP W/ ALMONDS CEREAL","Oatmeal Crisp with Almonds" +57316500,"OATMEAL CRISP, RAISIN","Oatmeal Crisp, Raisin (formerly Oatmeal Raisin Crisp)" +57316710,"OH'S, HONEY GRAHAM CEREAL","Oh's, Honey Graham" +57319000,"100% NATURAL CEREAL, PLAIN, QUAKER","100% Natural Cereal, plain, Quaker" +57319500,"SUN COUNTRY 100% NATURAL GRANOLA, WITH ALMONDS","Sun Country 100% Natural Granola, with Almonds" +57320500,"100 % NATURAL CEREAL, W/ OATS,HONEY & RAISINS,QUAKER","100 % Natural Cereal, with oats, honey and raisins, Quaker" +57321500,"100% NATURAL WHOLEGRAIN CEREAL W/ RAISINS, LOWFAT, QUAKER","100 % Natural Wholegrain Cereal with raisins, lowfat, Quaker" +57321700,"OPTIMUM, NATURE'S PATH","Optimum, Nature's Path" +57321800,"OPTIMUM SLIM, NATURE'S PATH","Optimum Slim, Nature's Path" +57321900,"ORGANIC FLAX PLUS, NATURE'S PATH","Organic Flax Plus, Nature's Path" +57323000,"SWEET CRUNCH CEREAL, QUAKER (FORMERLY POPEYE)","Sweet Crunch, Quaker (formerly called Popeye)" +57325000,"PRODUCT 19 CEREAL","Product 19" +57326000,"PUFFINS CEREAL","Puffins Cereal" +57327450,"QUAKER OAT BRAN CEREAL","Quaker Oat Bran Cereal" +57327500,"QUAKER OATMEAL SQUARES CEREAL (FORMERLY QUAKER OAT SQUARES)","Quaker Oatmeal Squares (formerly Quaker Oat Squares)" +57328000,"QUISP CEREAL","Quisp" +57329000,"RAISIN BRAN CEREAL, NFS","Raisin bran, NFS" +57330000,"RAISIN BRAN, KELLOGG'S","Raisin Bran, Kellogg's" +57330010,"RAISIN BRAN CRUNCH, KELLOGG'S","Raisin Bran Crunch, Kellogg's" +57331000,"RAISIN BRAN CEREAL, POST","Raisin Bran, Post" +57332050,"RAISIN BRAN, TOTAL","Raisin Bran, Total" +57332100,"RAISIN NUT BRAN CEREAL","Raisin Nut Bran" +57335550,"REESE'S PEANUT BUTTER PUFFS CEREAL","Reese's Peanut Butter Puffs cereal" +57336000,"RICE CHEX CEREAL","Rice Chex" +57337000,"RICE FLAKES, NFS","Rice Flakes, NFS" +57339000,"RICE KRISPIES, KELLOGG'S","Rice Krispies, Kellogg's" +57339500,"RICE KRISPIES TREATS CEREAL, KELLOGG'S","Rice Krispies Treats Cereal, Kellogg's" +57340000,"PUFFED RICE CEREAL","Rice, puffed" +57341000,"SHREDDED WHEAT 'N BRAN CEREAL","Shredded Wheat'N Bran" +57341200,"SMART START STRONG HEART ANTIOXIDANTS CEREAL, KELLOGG'S","Smart Start Strong Heart Antioxidants Cereal, Kellogg's" +57342010,"SMORZ, KELLOGG'S","Smorz, Kellogg's" +57344000,"SPECIAL K CEREAL","Special K" +57344001,"SPECIAL K BLUEBERRY","Special K Blueberry" +57344005,"SPECIAL K CHOCOLATEY DELIGHT","Special K Chocolatey Delight" +57344007,"SPECIAL K LOW FAT GRANOLA","Special K Low Fat Granola" +57344010,"SPECIAL K RED BERRIES","Special K Red Berries" +57344015,"SPECIAL K FRUIT & YOGURT","Special K Fruit & Yogurt" +57344020,"SPECIAL K VANILLA ALMOND","Special K Vanilla Almond" +57344025,"SPECIAL K CINNAMON PECAN, KELLOGG'S","Special K Cinnamon Pecan, Kellogg's" +57346500,"OATMEAL HONEY NUT HEAVEN, QUAKER","Oatmeal Honey Nut Heaven, Quaker (formerly Toasted Oatmeal, Honey Nut)" +57347000,"CORN POPS CEREAL","Corn Pops" +57348000,"FROSTED CORN FLAKES, NFS","Frosted corn flakes, NFS" +57349000,"FROSTED FLAKES, KELLOGG'S","Frosted Flakes, Kellogg's" +57349020,"REDUCED SUGAR FROSTED FLAKES CEREAL, KELLOGG'S","Reduced Sugar Frosted Flakes Cereal, Kellogg's" +57355000,"GOLDEN CRISP CEREAL","Golden Crisp (Formerly called Super Golden Crisp)" +57401100,"TOASTED OAT CEREAL","Toasted oat cereal" +57403100,"TOASTIES, POST","Toasties, Post" +57406100,"TOTAL CEREAL","Total" +57406105,"TOTAL CRANBERRY CRUNCH","Total Cranberry Crunch" +57407100,"TRIX CEREAL","Trix" +57407110,"TRIX, REDUCED SUGAR","Trix, reduced sugar" +57408100,"UNCLE SAM CEREAL","Uncle Sam Cereal (formerly Uncle Sam's Hi Fiber Cereal)" +57409100,"WAFFLE CRISP CEREAL, POST","Waffle Crisp, Post" +57410000,"WEETABIX WHOLE WHEAT CEREAL","Weetabix Whole Wheat Cereal" +57411000,"WHEAT CHEX CEREAL","Wheat Chex" +57412000,"WHEAT GERM CEREAL, PLAIN","Wheat germ, plain" +57413000,"WHEAT GERM CEREAL, W/ SUGAR & HONEY","Wheat germ, with sugar and honey" +57416000,"PUFFED WHEAT CEREAL, PLAIN","Wheat, puffed, plain" +57416010,"WHEAT, PUFFED, PRESWEETENED W/ SUGAR","Wheat, puffed, presweetened with sugar" +57417000,"SHREDDED WHEAT, 100%","Shredded Wheat, 100%" +57418000,"WHEATIES CEREAL","Wheaties" +57419000,"YOGURT BURST CHEERIOS","Yogurt Burst Cheerios" +57601100,"WHEAT BRAN, UNPROCESSED","Wheat bran, unprocessed" +57602100,"OATS, RAW","Oats, raw" +57602500,"OAT BRAN, UNCOOKED","Oat bran, uncooked" +57603100,"RICE POLISHINGS","Rice polishings" +57603200,"RICE BRAN CEREAL, UNCOOKED","Rice bran, uncooked" +57604100,"WHOLE WHEAT, CRACKED","Whole wheat, cracked" +57801000,"BARLEY CEREAL, BABY, DRY, INSTANT","Barley cereal, baby food, dry, instant" +57803000,"MIXED CEREAL, BABY, DRY, INSTANT","Mixed cereal, baby food, dry, instant" +57804000,"OATMEAL CEREAL, BABY, DRY, INSTANT","Oatmeal cereal, baby food, dry, instant" +57805000,"RICE CEREAL, BABY, DRY, INSTANT","Rice cereal, baby food, dry, instant" +57805080,"RICE CEREAL W/ APPLES, BABY, DRY, INSTANT","Rice cereal with apples, baby food, dry, instant" +57805090,"RICE CEREAL WITH MIXED FRUITS, BABY FOOD, DRY, INSTANT","Rice cereal with mixed fruits, baby food, dry, instant" +57805100,"RICE CEREAL W/ BANANAS, BABY, DRY, INSTANT","Rice cereal with bananas, baby food, dry, instant" +57805500,"BROWN RICE CEREAL, BABY FOOD, DRY, INSTANT","Brown rice cereal, baby food, dry, instant" +57806000,"MIXED CEREAL W/ BANANAS, BABY, DRY, INSTANT","Mixed cereal with bananas, baby food, dry, instant" +57806050,"MULTIGRAIN, WHOLE GRAIN CEREAL, BABY FOOD, DRY, INSTANT","Multigrain, whole grain cereal, baby food, dry, instant" +57806100,"OATMEAL CEREAL W/ BANANAS, BABY, DRY, INSTANT","Oatmeal cereal with bananas, baby food, dry, instant" +57806200,"OATMEAL W/ FRUIT, BABY, DRY, INSTANT, TODDLER","Oatmeal cereal with fruit, baby food, dry, instant, toddler" +57807010,"WHOLE WHEAT CEREAL W/ APPLES, BABY, DRY, INSTANT","Whole wheat cereal with apples, baby food, dry, instant" +57820000,"CEREAL, BABY, JARRED, NFS","Cereal, baby food, jarred, NFS" +57820100,"RICE CEREAL, BABY FOOD, JARRED, NFS","Rice cereal, baby food, jarred, NFS" +57822000,"MIXED CEREAL W/ APPLESAUCE & BANANAS, BABY, JARRED","Mixed cereal with applesauce and bananas, baby food, jarred" +57823000,"OATMEAL W/ APPLESAUCE & BANANAS, BABY, JARRED","Oatmeal with applesauce and bananas, baby food, jarred" +57824000,"RICE CEREAL, W/ APPLESAUCE & BANANAS, BABY, JARRED","Rice cereal with applesauce and bananas, baby food, jarred" +57824500,"RICE CEREAL W/ MIXED FRUIT, BABY, JARRED","Rice cereal with mixed fruit, baby food, jarred" +57830100,"GERBER GRADUATES FINGER SNACKS CEREAL,BABY FOOD","Gerber Graduates Finger Snacks Cereal, baby food" +58100000,"BURRITO, TACO, OR QUESADILLA W/ EGG","Burrito, taco, or quesadilla with egg" +58100005,"BURRITO, TACO, OR QUESADILLA W/ EGG & POTATO","Burrito, taco, or quesadilla with egg and potato" +58100010,"BURRITO, TACO, OR QUESADILLA W/ EGG & BREAKFAST MEAT","Burrito, taco, or quesadilla with egg and breakfast meat" +58100013,"BURRITO, TACO, OR QUESADILLA WITH EGG AND BREAKFAST MEAT, FF","Burrito, taco, or quesadilla with egg and breakfast meat, from fast food" +58100015,"BURRITO, TACO, OR QUESADILLA W/EGG, POTATO, & BREAKFAST MEAT","Burrito, taco, or quesadilla with egg, potato, and breakfast meat" +58100017,"BURRITO, TACO, OR QUESADILLA WITH EGG, POTATO, BRK MEAT, FF","Burrito, taco, or quesadilla with egg, potato, and breakfast meat, from fast food" +58100020,"BURRITO, TACO, OR QUESADILLA W/EGG, BEANS,& BREAKFAST MEAT","Burrito, taco, or quesadilla with egg, beans, and breakfast meat" +58100100,"BURRITO W/ MEAT","Burrito with meat" +58100120,"BURRITO W/ MEAT & BEANS","Burrito with meat and beans" +58100125,"BURRITO WITH MEAT AND BEANS, FROM FAST FOOD","Burrito with meat and beans, from fast food" +58100135,"BURRITO W/ MEAT & SOUR CREAM","Burrito with meat and sour cream" +58100140,"BURRITO W/ MEAT, BEANS, & SOUR CREAM","Burrito with meat, beans, and sour cream" +58100145,"BURRITO WITH MEAT, BEANS, AND SOUR CREAM, FROM FAST FOOD","Burrito with meat, beans, and sour cream, from fast food" +58100160,"BURRITO W/ MEAT, BEANS, & RICE","Burrito with meat, beans, and rice" +58100165,"BURRITO W/ MEAT, BEANS, RICE, & SOUR CREAM","Burrito with meat, beans, rice, and sour cream" +58100200,"BURRITO W/ CHICKEN","Burrito with chicken" +58100220,"BURRITO W/ CHICKEN AND BEANS","Burrito with chicken and beans" +58100235,"BURRITO W/ CHICKEN & SOUR CREAM","Burrito with chicken and sour cream" +58100245,"BURRITO W/ CHICKEN, BEANS, & SOUR CREAM","Burrito with chicken, beans, and sour cream" +58100255,"BURRITO W/ CHICKEN, BEANS, & RICE","Burrito with chicken, beans, and rice" +58100260,"BURRITO W/ CHICKEN, BEANS, RICE, & SOUR CREAM","Burrito with chicken, beans, rice, and sour cream" +58100300,"BURRITO W/ BEANS & RICE, MEATLESS","Burrito with beans and rice, meatless" +58100320,"BURRITO W/ BEANS","Burrito with beans, meatless" +58100325,"BURRITO WITH BEANS, MEATLESS, FROM FAST FOOD","Burrito with beans, meatless, from fast food" +58100330,"BURRITO W/ BEANS, RICE, & SOUR CREAM, MEATLESS","Burrito with beans, rice, and sour cream, meatless" +58100360,"CHILAQUILES,TORTILLA CASSEROLE W/ SALSA,CHEESE, EGG","Chilaquiles, tortilla casserole with salsa, cheese, and egg" +58100370,"CHILAQUILES, TORTILLA CASSEROLE, NO EGG","Chilaquiles, tortilla casserole with salsa and cheese, no egg" +58100520,"ENCHILADA W/ MEAT & BEANS, RED-CHILE OR ENCHILADA SAUCE","Enchilada with meat and beans, red-chile or enchilada sauce" +58100525,"ENCHILADA WITH MEAT & BEANS, GREEN-CHILE OR ENCHILADA SAUCE","Enchilada with meat and beans, green-chile or enchilada sauce" +58100530,"ENCHILADA W/ MEAT, RED-CHILE OR ENCHILADA SAUCE","Enchilada with meat, red-chile or enchilada sauce" +58100535,"ENCHILADA WITH MEAT, GREEN-CHILE OR ENCHILADA SAUCE","Enchilada with meat, green-chile or enchilada sauce" +58100620,"ENCHILADA W/ CHICKEN & BEANS, RED-CHILE OR ENCHILADA SAUCE","Enchilada with chicken and beans, red-chile or enchilada sauce" +58100625,"ENCHILADA WITH CHIC & BEANS, GREEN-CHILE OR ENCHILADA SAUCE","Enchilada with chicken and beans, green-chile or enchilada sauce" +58100630,"ENCHILADA W/ CHICKEN, RED-CHILE OR ENCHILADA SAUCE","Enchilada with chicken, red-chile or enchilada sauce" +58100635,"ENCHILADA WITH CHICKEN, GREEN-CHILE OR ENCHILADA SAUCE","Enchilada with chicken, green-chile or enchilada sauce" +58100720,"ENCHILADA W/ BEANS, MEATLESS, RED-CHILE OR ENCHILADA SAUCE","Enchilada with beans, meatless, red-chile or enchilada sauce" +58100725,"ENCHILADA WITH BEANS, GREEN-CHILE OR ENCHILADA SAUCE","Enchilada with beans, green-chile or enchilada sauce" +58100800,"ENCHILADA, JUST CHEESE, NO BEANS, RED-CHILE OR ENCHILADA SC","Enchilada, just cheese, meatless, no beans, red-chile or enchilada sauce" +58100805,"ENCHILADA, JUST CHEESE, GREEN-CHILE OR ENCHILADA SAUCE","Enchilada, just cheese, meatless, no beans, green-chile or enchilada sauce" +58101320,"TACO OR TOSTADA W/ MEAT","Taco or tostada with meat" +58101323,"TACO OR TOSTADA WITH MEAT, FROM FAST FOOD","Taco or tostada with meat, from fast food" +58101325,"TACO OR TOSTADA W/ MEAT & SOUR CREAM","Taco or tostada with meat and sour cream" +58101345,"SOFT TACO W/ MEAT","Soft taco with meat" +58101347,"SOFT TACO WITH MEAT, FROM FAST FOOD","Soft taco with meat, from fast food" +58101350,"SOFT TACO W/ MEAT & SOUR CREAM","Soft taco with meat and sour cream" +58101357,"SOFT TACO WITH MEAT AND SOUR CREAM, FROM FAST FOOD","Soft taco with meat and sour cream, from fast food" +58101450,"SOFT TACO WITH CHICKEN","Soft taco with chicken" +58101457,"SOFT TACO WITH CHICKEN, FROM FAST FOOD","Soft taco with chicken, from fast food" +58101460,"SOFT TACO W/ CHICKEN & SOUR CREAM","Soft taco with chicken and sour cream" +58101520,"TACO OR TOSTADA W/ CHICKEN","Taco or tostada with chicken" +58101525,"TACO OR TOSTADA W/ CHICKEN & SOUR CREAM","Taco or tostada with chicken and sour cream" +58101540,"TACO OR TOSTADA W/ FISH","Taco or tostada with fish" +58101555,"SOFT TACO W/ FISH","Soft taco with fish" +58101610,"SOFT TACO W/ BEANS","Soft taco with beans" +58101615,"SOFT TACO W/ BEANS & SOUR CREAM","Soft taco with beans and sour cream" +58101620,"SOFT TACO W/ MEAT & BEANS","Soft taco with meat and beans" +58101625,"SOFT TACO W/ CHICKEN & BEANS","Soft taco with chicken and beans" +58101630,"SOFT TACO W/ MEAT, BEANS, & SOUR CREAM","Soft taco with meat, beans, and sour cream" +58101635,"SOFT TACO W/ CHICKEN, BEANS, & SOUR CREAM","Soft taco with chicken, beans, and sour cream" +58101720,"TACO OR TOSTADA W/ BEANS","Taco or tostada with beans" +58101725,"TACO OR TOSTADA W/ BEANS & SOUR CREAM","Taco or tostada with beans and sour cream" +58101730,"TACO OR TOSTADA W/ MEAT & BEANS","Taco or tostada with meat and beans" +58101733,"TACO OR TOSTADA WITH MEAT AND BEANS, FROM FAST FOOD","Taco or tostada with meat and beans, from fast food" +58101735,"TACO OR TOSTADA W/ CHICKEN & BEANS","Taco or tostada with chicken and beans" +58101745,"TACO OR TOSTADA W/ MEAT, BEANS, & SOUR CREAM","Taco or tostada with meat, beans, and sour cream" +58101750,"TACO OR TOSTADA W/ CHICKEN, BEANS, & SOUR CREAM","Taco or tostada with chicken, beans, and sour cream" +58101800,"GROUND BEEF W/ TOMATO SAUCE, ON A CORNBREAD CRUST","Ground beef with tomato sauce and taco seasonings on a cornbread crust" +58101820,"MEXICAN CASSEROLE W/ BEEF & BEANS","Mexican casserole made with ground beef, beans, tomato sauce, cheese, taco seasonings, and corn chips" +58101830,"MEXICAN CASSEROLE W/ BEEF (INCL FRITO PIE, NFS)","Mexican casserole made with ground beef, tomato sauce, cheese, taco seasonings, and corn chips" +58101930,"TACO OR TOSTADA SALAD W/ MEAT","Taco or tostada salad with meat" +58101935,"TACO OR TOSTADA SALAD WITH CHICKEN","Taco or tostada salad with chicken" +58101940,"TACO OR TOSTADA SALAD, MEATLESS","Taco or tostada salad, meatless" +58101945,"TACO SALAD W/ MEAT & SOUR CREAM","Taco or tostada salad with meat and sour cream" +58101950,"TACO OR TOSTADA SALAD W/ CHICKEN & SOUR CREAM","Taco or tostada salad with chicken and sour cream" +58101955,"TACO OR TOSTADA SALAD, MEATLESS W/ SOUR CREAM","Taco or tostada salad, meatless with sour cream" +58103120,"TAMALE WITH MEAT","Tamale with meat" +58103130,"TAMALE WITH CHICKEN","Tamale with chicken" +58103200,"TAMALE, PLAIN, MEATLESS, NO SAUCE, PR STYLE","Tamale, plain, meatless, no sauce, Puerto Rican style or Carribean Style" +58103210,"TAMALE, MEATLESS, W/ SAUCE, P.R. OR CARIBBEAN STYLE","Tamale, meatless, with sauce, Puerto Rican or Caribbean style" +58103250,"TAMALE, PLAIN, MEATLESS, NO SAUCE, MEXICAN","Tamale, plain, meatless, no sauce, Mexican style" +58103310,"TAMALE CASSEROLE W/ MEAT","Tamale casserole with meat" +58104090,"NACHOS W/ CHEESE & SOUR CREAM","Nachos with cheese and sour cream" +58104120,"NACHOS W/ CHEESE","Nachos with cheese" +58104130,"NACHOS W/ MEAT & CHEESE","Nachos with meat and cheese" +58104150,"NACHOS W/ CHICKEN & CHEESE","Nachos with chicken and cheese" +58104160,"NACHOS W/ CHILI","Nachos with chili" +58104180,"NACHOS W/ MEAT, CHEESE, & SOUR CREAM","Nachos with meat, cheese, and sour cream" +58104190,"NACHOS W/ CHICKEN, CHEESE, & SOUR CREAM","Nachos with chicken, cheese, and sour cream" +58104260,"GORDITA, SOPE, OR CHALUPA W/ BEANS","Gordita, sope, or chalupa with beans" +58104270,"GORDITA, SOPE, OR CHALUPA W/ BEANS & SOUR CREAM","Gordita, sope, or chalupa with beans and sour cream" +58104280,"GORDITA, SOPE, OR CHALUPA W/ MEAT & SOUR CREAM","Gordita, sope, or chalupa with meat and sour cream" +58104290,"GORDITA, SOPE, OR CHALUPA W/ MEAT","Gordita, sope, or chalupa with meat" +58104320,"GORDITA, SOPE, OR CHALUPA W/ CHICKEN & SOUR CREAM","Gordita, sope, or chalupa with chicken and sour cream" +58104340,"GORDITA, SOPE, OR CHALUPA W/ CHICKEN","Gordita, sope, or chalupa with chicken" +58104500,"CHIMICHANGA W/ MEAT","Chimichanga with meat" +58104520,"CHIMICHANGA, MEATLESS","Chimichanga, meatless" +58104530,"CHIMICHANGA W/ CHICKEN","Chimichanga with chicken" +58104535,"CHIMICHANGA W/ MEAT & SOUR CREAM","Chimichanga with meat and sour cream" +58104540,"CHIMICHANGA, MEATLESS, W/ SOUR CREAM","Chimichanga, meatless, with sour cream" +58104550,"CHIMICHANGA W/ CHICKEN & SOUR CREAM","Chimichanga with chicken and sour cream" +58104710,"QUESADILLA, JUST CHEESE, MEATLESS","Quesadilla, just cheese, meatless" +58104720,"QUESADILLA, JUST CHEESE, FROM FAST FOOD","Quesadilla, just cheese, from fast food" +58104730,"QUESADILLA W/ MEAT","Quesadilla with meat" +58104740,"QUESADILLA W/ CHICKEN","Quesadilla with chicken" +58104745,"QUESADILLA WITH CHICKEN, FROM FAST FOOD","Quesadilla with chicken, from fast food" +58104750,"QUESADILLA W/ VEGETABLES","Quesadilla with vegetables" +58104760,"QUESADILLA W/ VEGETABLES & MEAT","Quesadilla with vegetables and meat" +58104770,"QUESADILLA W/ VEGETABLES & CHICKEN","Quesadilla with vegetables and chicken" +58104800,"TAQUITO OR FLAUTA W/ CHEESE","Taquito or flauta with cheese" +58104820,"TAQUITO OR FLAUTA W/ MEAT","Taquito or flauta with meat" +58104825,"TAQUITO OR FLAUTA W/ MEAT & CHEESE","Taquito or flauta with meat and cheese" +58104830,"TAQUITO OR FLAUTA W/ CHICKEN","Taquito or flauta with chicken" +58104835,"TAQUITO OR FLAUTA W/ CHICKEN AND CHEESE","Taquito or flauta with chicken and cheese" +58104900,"TAQUITO OR FLAUTA W/ EGG","Taquito or flauta with egg" +58104905,"TAQUITO OR FLAUTA W/ EGG & BREAKFAST MEAT","Taquito or flauta with egg and breakfast meat" +58105000,"FAJITA W/ CHICKEN & VEGETABLES","Fajita with chicken and vegetables" +58105050,"FAJITA W/ MEAT & VEGETABLES","Fajita with meat and vegetables" +58105075,"FAJITA W/ VEGETABLES","Fajita with vegetables" +58105100,"PUPUSA, CHEESE-FILLED","Pupusa, cheese-filled" +58105105,"PUPUSA, BEAN-FILLED","Pupusa, bean-filled" +58105110,"PUPUSA, MEAT-FILLED","Pupusa, meat-filled" +58106200,"PIZZA, CHEESE, PREP FROM FROZEN, THIN CRUST","Pizza, cheese, prepared from frozen, thin crust" +58106205,"PIZZA, CHEESE, PREP FROM FROZEN, THICK CRUST","Pizza, cheese, prepared from frozen, thick crust" +58106210,"PIZZA, CHEESE,FRM REST/FF, NS AS TO TYPE OF CRUST","Pizza, cheese, from restaurant or fast food, NS as to type of crust" +58106220,"PIZZA, CHEESE, FROM RESTAURANT OR FAST FOOD, THIN CRUST","Pizza, cheese, from restaurant or fast food, thin crust" +58106225,"PIZZA, CHEESE, FROM RESTAURANT OR FAST FOOD, REGULAR CRUST","Pizza, cheese, from restaurant or fast food, regular crust" +58106230,"PIZZA, CHEESE, FROM RESTAURANT OR FAST FOOD, THICK CRUST","Pizza, cheese, from restaurant or fast food, thick crust" +58106233,"PIZZA, CHEESE, STUFFED CRUST","Pizza, cheese, stuffed crust" +58106235,"PIZZA, CHEESE, FROM SCHOOL LUNCH, THIN CRUST","Pizza, cheese, from school lunch, thin crust" +58106236,"PIZZA, CHEESE, FROM SCHOOL LUNCH, THICK CRUST","Pizza, cheese, from school lunch, thick crust" +58106240,"PIZZA, EXTRA CHEESE, NS AS TO TYPE OF CRUST","Pizza, extra cheese, NS as to type of crust" +58106250,"PIZZA, EXTRA CHEESE, THIN CRUST","Pizza, extra cheese, thin crust" +58106255,"PIZZA, EXTRA CHEESE, REGULAR CRUST","Pizza, extra cheese, regular crust" +58106260,"PIZZA, EXTRA CHEESE, THICK CRUST","Pizza, extra cheese, thick crust" +58106300,"PIZZA, CHEESE, W/ VEGETABLES, PREP FROM FROZEN, THIN CRUST","Pizza, cheese, with vegetables, prepared from frozen, thin crust" +58106305,"PIZZA, CHEESE, W/ VEGETABLES, PREP FROM FROZEN, THICK CRUST","Pizza, cheese with vegetables, prepared from frozen, thick crust" +58106310,"PIZZA, CHEESE, W/ VEG, NS AS TO TYPE OF CRUST","Pizza, cheese, with vegetables, NS as to type of crust" +58106320,"PIZZA, CHEESE, W/ VEGETABLES, THIN CRUST","Pizza, cheese, with vegetables, thin crust" +58106325,"PIZZA, CHEESE, W/ VEGETABLES, REGULAR CRUST","Pizza, cheese, with vegetables, regular crust" +58106330,"PIZZA, CHEESE, W/ VEGETABLES, THICK CRUST","Pizza, cheese, with vegetables, thick crust" +58106340,"PIZZA W/ CHEESE & EXTRA VEGETABLES, NS AS TO CRUST","Pizza, with cheese and extra vegetables, NS as to type of crust" +58106345,"PIZZA W/ CHEESE & EXTRA VEGETABLES, THIN CRUST","Pizza with cheese and extra vegetables, thin crust" +58106347,"PIZZA W/ CHEESE & EXTRA VEGETABLES, REGULAR CRUST","Pizza with cheese and extra vegetables, regular crust" +58106350,"PIZZA W/ CHEESE & EXTRA VEGETABLES, THICK CRUST","Pizza with cheese and extra vegetables, thick crust" +58106357,"PIZZA, CHEESE, W/ FRUIT, NS AS TO TYPE OF CRUST","Pizza, cheese, with fruit, NS as to type of crust" +58106358,"PIZZA, CHEESE, W/ FRUIT, THIN CRUST","Pizza, cheese, with fruit, thin crust" +58106359,"PIZZA, CHEESE, W/ FRUIT, REGULAR CRUST","Pizza, cheese, with fruit, regular crust" +58106360,"PIZZA, CHEESE, W/ FRUIT, THICK CRUST","Pizza, cheese, with fruit, thick crust" +58106500,"PIZZA W/ MEAT, PREP FROM FROZEN, THIN CRUST","Pizza with meat, prepared from frozen, thin crust" +58106505,"PIZZA W/ MEAT, PREP FROM FROZEN, THICK CRUST","Pizza with meat, prepared from frozen, thick crust" +58106540,"PIZZA W/ PEPPERONI,FRM REST/FF, NS AS TO TYPE OF CRUST","Pizza with pepperoni, from restaurant or fast food, NS as to type of crust" +58106550,"PIZZA W/PEPPERONI, FROM RESTAURANT/FAST FOOD, THIN CRUST","Pizza with pepperoni, from restaurant or fast food, thin crust" +58106555,"PIZZA W/PEPPERONI, FROM RESTAURANT/FAST FOOD, REGULAR CRUST","Pizza with pepperoni, from restaurant or fast food, regular crust" +58106560,"PIZZA W/ PEPPERONI, FROM RESTAURANT/FAST FOOD, THICK CRUST","Pizza with pepperoni, from restaurant or fast food, thick crust" +58106565,"PIZZA WITH PEPPERONI, STUFFED CRUST","Pizza with pepperoni, stuffed crust" +58106570,"PIZZA WITH PEPPERONI, FROM SCHOOL LUNCH, THIN CRUST","Pizza with pepperoni, from school lunch, thin crust" +58106580,"PIZZA WITH PEPPERONI, FROM SCHOOL LUNCH, THICK CRUST","Pizza with pepperoni, from school lunch, thick crust" +58106610,"PIZZA W/ MEAT OTHER THAN PEPP, FRM REST/FF, NS TYPE OF CRUST","Pizza with meat other than pepperoni, from restaurant or fast food, NS as to type of crust" +58106620,"PIZZA W/MEAT NOT PEPPERONI, FRM RESTAURANT/FF,THIN CRUST","Pizza with meat other than pepperoni, from restaurant or fast food, thin crust" +58106625,"PIZZA W/MEAT NOT PEPPERONI, FRM RESTAURANT/FF, REG CRUST","Pizza with meat other than pepperoni, from restaurant or fast food, regular crust" +58106630,"PIZZA W/MEAT NOT PEPPERONI, FRM RESTAURANT/FF, THICK CRUST","Pizza with meat other than pepperoni, from restaurant or fast food, thick crust" +58106633,"PIZZA, W/MEAT NOT PEPPERONI, STUFFED CRUST","Pizza, with meat other than pepperoni, stuffed crust" +58106635,"PIZZA, W/MEAT OTHER THAN PEPPERONI, FRM SCL LUNCH, THIN CRUS","Pizza, with meat other than pepperoni, from school lunch, thin crust" +58106636,"PIZZA, W/MEAT OTHER THAN PEPPERONI, FRM SCL LUNCH, THICK CRU","Pizza, with meat other than pepperoni, from school lunch, thick crust" +58106640,"PIZZA W/ EXTRA MEAT, NS AS TO TYPE OF CRUST","Pizza with extra meat, NS as to type of crust" +58106650,"PIZZA W/ EXTRA MEAT, THIN CRUST","Pizza with extra meat, thin crust" +58106655,"PIZZA W/ EXTRA MEAT, REGULAR CRUST","Pizza with extra meat, regular crust" +58106660,"PIZZA W/ EXTRA MEAT, THICK CRUST","Pizza with extra meat, thick crust" +58106700,"PIZZA W/ MEAT & VEGS, PREP FROM FROZEN, THIN CRUST","Pizza with meat and vegetables, prepared from frozen, thin crust" +58106705,"PIZZA W/ MEAT & VEGS, PREP FROM FROZEN, THICK CRUST","Pizza with meat and vegetables, prepared from frozen, thick crust" +58106710,"PIZZA W/ MEAT & VEG, NS AS TO TYPE OF CRUST","Pizza with meat and vegetables, NS as to type of crust" +58106720,"PIZZA W/ MEAT & VEGETABLES, THIN CRUST","Pizza with meat and vegetables, thin crust" +58106725,"PIZZA W/ MEAT & VEGETABLES, REGULAR CRUST","Pizza with meat and vegetables, regular crust" +58106730,"PIZZA W/ MEAT & VEGETABLES, THICK CRUST","Pizza with meat and vegetables, thick crust" +58106735,"PIZZA W/ EXTRA MEAT & EXTRA VEGS, NS AS TO TYPE OF CRUST","Pizza with extra meat and extra vegetables, NS as to type of crust" +58106736,"PIZZA W/ EXTRA MEAT & EXTRA VEGS, THIN CRUST","Pizza with extra meat and extra vegetables, thin crust" +58106737,"PIZZA W/ EXTRA MEAT & EXTRA VEGS, THICK CRUST","Pizza with extra meat and extra vegetables, thick crust" +58106738,"PIZZA W/ EXTRA MEAT & EXTRA VEGS, REGULAR CRUST","Pizza with extra meat and extra vegetables, regular crust" +58106740,"PIZZA W/ MEAT & FRUIT, NS AS TO TYPE OF CRUST","Pizza with meat and fruit, NS as to type of crust" +58106750,"PIZZA W/ MEAT & FRUIT, THIN CRUST","Pizza with meat and fruit, thin crust" +58106755,"PIZZA W/ MEAT & FRUIT, REGULAR CRUST","Pizza with meat and fruit, regular crust" +58106760,"PIZZA W/ MEAT & FRUIT, THICK CRUST","Pizza with meat and fruit, thick crust" +58106820,"PIZZA W/ BEANS & VEG, THIN CRUST (INCL TACO PIZZA)","Pizza with beans and vegetables, thin crust" +58106830,"PIZZA W/ BEANS & VEG, THICK CRUST (INCL TACO PIZZA)","Pizza with beans and vegetables, thick crust" +58107050,"PIZZA, NO CHEESE, THIN CRUST","Pizza, no cheese, thin crust" +58107100,"PIZZA, NO CHEESE, THICK CRUST","Pizza, no cheese, thick crust" +58107220,"WHITE PIZZA, THIN CRUST","White pizza, thin crust" +58107230,"WHITE PIZZA, THICK CRUST","White pizza, thick crust" +58108000,"CALZONE, W/ CHEESE, MEATLESS (INCL STROMBOLI)","Calzone, with cheese, meatless" +58108010,"CALZONE, W/ MEAT & CHEESE (INCLUDE STROMBOLI)","Calzone, with meat and cheese" +58108050,"PIZZA ROLLS (INCLUDE PIZZA BITES)","Pizza rolls" +58110110,"EGG ROLL, MEATLESS","Egg roll, meatless" +58110120,"EGG ROLL, W/ SHRIMP","Egg roll, with shrimp" +58110130,"EGG ROLL, W/ BEEF/PORK","Egg roll, with beef and/or pork" +58110170,"EGG ROLL, W/ CHICKEN","Egg roll, with chicken or turkey" +58110200,"ROLL W/MEAT&/SHRIMP,VEGETABLES&RICE PAPER(NOT FRIED","Roll with meat and/or shrimp, vegetables and rice paper (not fried)" +58111110,"WON TON (WONTON), FRIED, FILLED W/MEAT, POULTRY, OR SEAFOOD","Won ton (wonton), fried, filled with meat, poultry, or seafood" +58111120,"WON TON (WONTON), FRIED, MEATLESS","Won ton (wonton), fried, meatless" +58111130,"WON TON (WONTON), FRIED, FILLED WITH MEAT, POULTRY, OR SEAFO","Won ton (wonton), fried, filled with meat, poultry, or seafood, and vegetable" +58111200,"PUFFS, FRIED, CRAB MEAT & CREAM CHEESE FILLED","Puffs, fried, crab meat and cream cheese filled" +58112510,"DUMPLING, STEAMED, FILLED W/ MEAT OR SEAFOOD","Dumpling, steamed, filled with meat, poultry, or seafood" +58115110,"TAMALE CASSEROLE, P.R. (TAMALES EN CAZUELA)","Tamale casserole, Puerto Rican style (Tamales en cazuela)" +58115150,"TAMAL IN A LEAF, P.R. (TAMALES EN HOJA)","Tamal in a leaf, Puerto Rican style (Tamales en hoja)" +58115210,"TACO W/ CRAB MEAT, P.R. (TACOS DE JUEYES)","Taco with crab meat, Puerto Rican style (Taco de jueye)" +58116110,"MEAT TURNOVER, PUERTO RICAN STYLE","Meat turnover, Puerto Rican style (Pastelillo de carne; Empanadilla)" +58116115,"EMPANADA, MEXICAN TURNOVER, W/ CHS & VEG","Empanada, Mexican turnover, filled with cheese and vegetables" +58116120,"EMPANADA, MEXICAN TURNOVER, W/ MEAT & VEGS","Empanada, Mexican turnover, filled with meat and vegetables" +58116130,"EMPANADA, MEXICAN TURNOVER, W/ CHIC & VEG","Empanada, Mexican turnover, filled with chicken and vegetables" +58116210,"MEAT PIE, P.R. (PASTELON DE CARNE)","Meat pie, Puerto Rican style (Pastelon de carne)" +58116310,"CHEESE TURNOVER, PUERTO RICAN STYLE","Cheese turnover, Puerto Rican style (Pastelillo de queso; Empanadilla)" +58117110,"CORNMEAL FRITTER, P.R. (AREPA, P.R. AREPITAS)","Cornmeal fritter, Puerto Rican style (Arepa; P.R. arepita)" +58117210,"CORNMEAL STICK, P.R. (SORULLOS / SORULLITOS DE MAIZ)","Cornmeal stick, Puerto Rican style (Sorullos / Sorullitos de maiz)" +58117310,"KIBBY, P.R. (BEEF & BULGUR) (PLATO ARABE)","Kibby, Puerto Rican style (beef and bulgur) (Plato Arabe)" +58117410,"CODFISH FRITTER, P.R. (BACALAITOS FRITOS)","Codfish fritter, Puerto Rican style (Bacalaitos fritos)" +58117510,"HAYACAS, P.R. (HOMINY, PORK OR HAM, VEGETABLES)","Hayacas, Puerto Rican style (hominy, pork or ham, vegetables)" +58118110,"CORNSTARCH COCONUT DESSERT, P.R. (TEMBLEQUE)","Cornstarch coconut dessert, Puerto Rican style (Tembleque)" +58118210,"CORNMEAL COCONUT DESSERT, P.R.","Cornmeal coconut dessert, Puerto Rican style (Harina de maiz con coco)" +58120110,"CREPES, FILLED W/ MEAT, FISH OR POULTRY, W/ SAUCE","Crepes, filled with meat, fish, or poultry, with sauce" +58120120,"CREPE,FILLED W/ MEAT, FISH & POULTRY,NO SCE ON TOP","Crepe, filled with beef, pork, fish and/or poultry, no sauce on top" +58121510,"DUMPLING, MEAT-FILLED (INCLUDE PIEROGI, PIROSHKI)","Dumpling, meat-filled" +58121610,"DUMPLING, POTATO/CHEESE-FILLED (INCLUDE PIEROGI)","Dumpling, potato- or cheese-filled" +58121620,"DUMPLING, VEGETABLE","Dumpling, vegetable" +58122210,"GNOCCHI, CHEESE","Gnocchi, cheese" +58122220,"GNOCCHI, POTATO","Gnocchi, potato" +58122250,"KISHKE, STUFFED DERMA","Kishke, stuffed derma" +58122310,"KNISH, POTATO (PASTRY FILLED WITH POTATO)","Knish, potato (pastry filled with potato)" +58122320,"KNISH, CHEESE (PASTRY FILLED WITH CHEESE)","Knish, cheese (pastry filled with cheese)" +58122330,"KNISH, MEAT (PASTRY FILLED WITH MEAT)","Knish, meat (pastry filled with meat)" +58123110,"SWEET BREAD DOUGH, FILLED WITH MEAT, STEAMED","Sweet bread dough, filled with meat, steamed" +58123120,"SWEET BREAD DOUGH, FILLED WITH BEAN PASTE, MEATLESS, STEAMED","Sweet bread dough, filled with bean paste, meatless, steamed" +58124210,"PASTRY, CHEESE-FILLED","Pastry, cheese-filled" +58124250,"SPANAKOPITTA (INCL GREEK SPINACH-CHEESE PIE)","Spanakopitta" +58124500,"PASTRY,FILLED W/POTATOES & PEAS, FRIED","Pastry, filled with potatoes and peas, fried" +58125110,"QUICHE W/ MEAT, POULTRY OR FISH","Quiche with meat, poultry or fish" +58125120,"SPINACH QUICHE, MEATLESS","Spinach quiche, meatless" +58125180,"CHEESE QUICHE, MEATLESS","Cheese quiche, meatless" +58126000,"BIEROCK (TURNOVER W/ BEEF & CABBAGE)","Bierock (turnover filled with ground beef and cabbage mixture)" +58126110,"TURNOVER, MEAT-FILLED, NO GRAVY","Turnover, meat-filled, no gravy" +58126120,"TURNOVER, MEAT-FILLED, W/ GRAVY","Turnover, meat-filled, with gravy" +58126130,"TURNOVER, MEAT- & CHEESE-FILLED, NO GRAVY","Turnover, meat- and cheese-filled, no gravy" +58126140,"TURNOVER, MEAT- & BEAN-FILLED, NO GRAVY","Turnover, meat- and bean-filled, no gravy" +58126150,"TURNOVER, MEAT & CHEESE, TOMATO SAUCE","Turnover, meat- and cheese-filled, tomato-based sauce" +58126160,"TURNOVER, CHEESE-FILLED, TOMATO-BASED SAUCE","Turnover, cheese-filled, tomato-based sauce" +58126170,"TURNOVER, MEAT & VEG (NO POTATO), NO GRAVY","Turnover, meat-and vegetable- filled (no potatoes, no gravy)" +58126180,"TURNOVER,MEAT- POTATO- & VEGETABLE-FILLED NO GRAVY","Turnover, meat-, potato-, and vegetable-filled, no gravy" +58126270,"TURNOVER,CHICKEN/TURKEY FILLED,NO GRAVY","Turnover, chicken- or turkey-, and cheese-filled, no gravy" +58126280,"TURNOVER, CHICKEN/TURKEY- & VEG-FILLED, LOWER FAT","Turnover, chicken- or turkey-, and vegetable-filled, lower in fat" +58126290,"TURNOVER, MEAT- & CHEESE-FILLED, LOWER FAT","Turnover, meat- and cheese-filled, lower in fat" +58126300,"TURNOVER, MEAT- & CHEESE-FILLED, TOMATO SAUCE, LOWER FAT","Turnover, meat- and cheese-filled, tomato-based sauce, lower in fat" +58126310,"TURNOVER, CHICKEN, W/ GRAVY","Turnover, chicken, with gravy" +58126400,"TURNOVER, FILLED W/ EGG, MEAT & CHEESE","Turnover, filled with egg, meat and cheese" +58126410,"TURNOVER, FILLED W/ EGG, MEAT & CHEESE, LOWER IN FAT","Turnover, filled with egg, meat, and cheese, lower in fat" +58127110,"VEGETABLES IN PASTRY (INCL ALL VARIETIES)","Vegetables in pastry" +58127150,"VEGETABLES & CHEESE IN PASTRY","Vegetables and cheese in pastry" +58127200,"CROISSANT, FILLED W/ BROCCOLI & CHEESE","Croissant sandwich, filled with broccoli and cheese" +58127210,"CROISSANT, FILLED W/ HAM & CHEESE","Croissant sandwich, filled with ham and cheese" +58127220,"CROISSANT, FILLED W/CHICKEN,BROCCOLI & CHEESE SAUCE","Croissant sandwich, filled with chicken, broccoli, and cheese sauce" +58127270,"CROISSANT W/ SAUSAGE & EGG","Croissant sandwich with sausage and egg" +58127290,"CROISSANT W/ BACON & EGG","Croissant sandwich with bacon and egg" +58127310,"CROISSANT W/ HAM, EGG, & CHEESE","Croissant sandwich with ham, egg, and cheese" +58127330,"CROISSANT W/ SAUSAGE, EGG, & CHEESE","Croissant sandwich with sausage, egg, and cheese" +58127350,"CROISSANT W/ BACON, EGG, & CHEESE","Croissant sandwich with bacon, egg, and cheese" +58127500,"VEGETABLE SUBMARINE SANDWICH, W/ FAT FREE SPREAD","Vegetable submarine sandwich, with fat free spread" +58128000,"BISCUIT W/ GRAVY","Biscuit with gravy" +58128110,"CHICKEN CORNBREAD","Chicken cornbread" +58128120,"CORNMEAL DRESSING W/ CHICKEN & VEGETABLES","Cornmeal dressing with chicken or turkey and vegetables" +58128210,"DRESSING W/ OYSTERS","Dressing with oysters" +58128220,"DRESSING W/ CHICKEN/TURKEY & VEGETABLES","Dressing with chicken or turkey and vegetables" +58128250,"DRESSING W/ MEAT & VEGETABLES","Dressing with meat and vegetables" +58130011,"LASAGNA WITH MEAT","Lasagna with meat" +58130013,"LASAGNA W/ MEAT, CANNED","Lasagna with meat, canned" +58130020,"LASAGNA, W/ MEAT & SPINACH","Lasagna with meat and spinach" +58130140,"LASAGNA WITH CHICKEN OR TURKEY","Lasagna with chicken or turkey" +58130150,"LASAGNA W/ CHIC OR TURKEY, & SPINACH","Lasagna, with chicken or turkey, and spinach" +58130310,"LASAGNA, MEATLESS","Lasagna, meatless" +58130320,"LASAGNA, MEATLESS, W/ VEGETABLES","Lasagna, meatless, with vegetables" +58130610,"LASAGNA W/ MEAT, WHOLE WHEAT NOODLES","Lasagna with meat, whole wheat noodles" +58130810,"LASAGNA, MEATLESS, WHOLE WHEAT NOODLES","Lasagna, meatless, whole wheat noodles" +58130910,"LASAGNA W/ MEAT, SPINACH NOODLES","Lasagna with meat, spinach noodles" +58130950,"LASAGNA, MEATLESS, SPINACH NOODLES","Lasagna, meatless, spinach noodles" +58131100,"RAVIOLI, FILLING NS, NO SAUCE","Ravioli, NS as to filling, no sauce" +58131110,"RAVIOLI, FILLING NS, TOMATO SAUCE","Ravioli, NS as to filling, with tomato sauce" +58131120,"RAVIOLI, NS AS TO FILLING, WITH CREAM SAUCE","Ravioli, NS as to filling, with cream sauce" +58131310,"RAVIOLI, MEAT-FILLED, NO SAUCE","Ravioli, meat-filled, no sauce" +58131320,"RAVIOLI, MEAT-FILLED, W/ TOMATO OR MEAT SAUCE","Ravioli, meat-filled, with tomato sauce or meat sauce" +58131323,"RAVIOLI, MEAT-FILLED, W/ TOMATO OR MEAT SAUCE, CANNED","Ravioli, meat-filled, with tomato sauce or meat sauce, canned" +58131330,"RAVIOLI, MEAT-FILLED, WITH CREAM SAUCE","Ravioli, meat-filled, with cream sauce" +58131510,"RAVIOLI, CHEESE-FILLED, NO SAUCE","Ravioli, cheese-filled, no sauce" +58131520,"RAVIOLI, CHEESE-FILLED, W/ TOMATO SAUCE","Ravioli, cheese-filled, with tomato sauce" +58131523,"RAVIOLI, CHEESE-FILLED, W/ TOMATO SAUCE, CANNED","Ravioli, cheese-filled, with tomato sauce, canned" +58131530,"RAVIOLI, CHEESE-FILLED, W/ MEAT SAUCE","Ravioli, cheese-filled, with meat sauce" +58131535,"RAVIOLI, CHEESE-FILLED, WITH CREAM SAUCE","Ravioli, cheese-filled, with cream sauce" +58131590,"RAVIOLI, CHEESE AND SPINACH-FILLED, NO SAUCE","Ravioli, cheese and spinach-filled, no sauce" +58131600,"RAVIOLI, CHEESE&SPINACH-FILLED, W/ CREAM SAUCE","Ravioli, cheese and spinach-filled, with cream sauce" +58131610,"RAVIOLI, CHEESE AND SPINACH FILLED, WITH TOMATO SAUCE","Ravioli, cheese and spinach filled, with tomato sauce" +58132110,"SPAGHETTI W/ TOMATO SAUCE, MEATLESS","Spaghetti with tomato sauce, meatless" +58132113,"PASTA, W/ TOMATO SAUCE & CHEESE, CANNED","Pasta with tomato sauce and cheese, canned" +58132310,"SPAGHETTI W/TOMAT SAUCE & MEAT SAUCE","Spaghetti with tomato sauce and meatballs or spaghetti with meat sauce or spaghetti with meat sauce and meatballs" +58132313,"PASTA W/ TOMATO SAUCE & MEAT/MEATBALLS, CANNED","Pasta with tomato sauce and meat or meatballs, canned" +58132340,"SPAGHETTI W/ TOMATO SAUCE & VEGETABLES","Spaghetti with tomato sauce and vegetables" +58132350,"SPAGHETTI, WHOLE WHEAT, W/ TOMATO SAUCE, MEATLESS","Spaghetti with tomato sauce, meatless, whole wheat noodles" +58132360,"SPAGHETTI, WHOLE WHEAT, W/ TOMATO & MEAT SAUCE","Spaghetti with tomato sauce and meatballs, whole wheat noodles or spaghetti with meat sauce, whole wheat noodles or spaghetti with meat sauce and meatballs, whole wheat noodles" +58132450,"SPAGHETTI W/ TOM SAUCE, MEATLESS, SPINACH NOODLES","Spaghetti with tomato sauce, meatless, made with spinach noodles" +58132460,"SPAGHETTI W/ TOMATO & MEAT SAUCE, SPINACH NOODLES","Spaghetti with tomato sauce and meatballs made with spinach noodles, or spaghetti with meat sauce made with spinach noodles, or spaghetti with meat sauce and meatballs made with spinach noodles" +58132710,"SPAGHETTI W/ TOMATO SAUCE & FRANKFURTERS/HOT DOG","Spaghetti with tomato sauce and frankfurters or hot dogs" +58132713,"PASTA W/ TOMATO SAUCE & FRANKFURTERS/HOT DOGS, CANNED","Pasta with tomato sauce and frankfurters or hot dogs, canned" +58132800,"SPAGHETTI W/ CLAM SAUCE, NS AS TO RED OR WHITE","Spaghetti with clam sauce, NS as to red or white" +58132810,"SPAGHETTI W/ RED CLAM SAUCE","Spaghetti with red clam sauce" +58132820,"SPAGHETTI W/ WHITE CLAM SAUCE","Spaghetti with white clam sauce" +58132910,"SPAGHETTI WITH TOMATO SAUCE AND POULTRY","Spaghetti with tomato sauce and poultry" +58133110,"MANICOTTI, CHEESE-FILLED, NO SAUCE","Manicotti, cheese-filled, no sauce" +58133120,"MANICOTTI, CHEESE-FILLED, W/ TOMATO SAUCE, MEATLESS","Manicotti, cheese-filled, with tomato sauce, meatless" +58133130,"MANICOTTI, CHEESE-FILLED, W/ MEAT SAUCE","Manicotti, cheese-filled, with meat sauce" +58133140,"MANICOTTI, VEG- & CHEESE-FILLED, W/TOM SCE,MEATLESS","Manicotti, vegetable- and cheese-filled, with tomato sauce, meatless" +58134110,"STUFFED SHELLS, CHEESE-FILLED, NO SAUCE","Stuffed shells, cheese-filled, no sauce" +58134120,"STUFFED SHELLS, CHEESE-FILLED, W/ TOM SC, MEATLESS","Stuffed shells, cheese-filled, with tomato sauce, meatless" +58134130,"STUFFED SHELLS, CHEESE-FILLED, W/ MEAT SAUCE","Stuffed shells, cheese-filled, with meat sauce" +58134160,"STUFFED SHELLS, CHEESE AND SPINACH FILLED, NO SAUCE","Stuffed shells, cheese- and spinach- filled, no sauce" +58134210,"STUFFED SHELLS, W/ CHICKEN, W/ TOM SCE","Stuffed shells, with chicken, with tomato sauce" +58134310,"STUFFED SHELLS, W/ FISH &/OR SHELLFISH, W/ TOM SCE","Stuffed shells, with fish and/or shellfish, with tomato sauce" +58134610,"TORTELLINI, MEAT-FILLED, W/ TOMATO SAUCE","Tortellini, meat-filled, with tomato sauce" +58134613,"TORTELLINI, MEAT-FILLED, W/ TOMATO SAUCE, CANNED","Tortellini, meat-filled, with tomato sauce, canned" +58134620,"TORTELLINI, CHEESE-FILLED, MEATLESS, W/TOMATO SAUCE","Tortellini, cheese-filled, meatless, with tomato sauce" +58134623,"TORTELLINI,CHEESE-FILLED,MEATLESS,W/TOMATO SAUCE,CANNED","Tortellini, cheese-filled, meatless, with tomato sauce, canned" +58134630,"TORTELLINI, CHEESE, W/ VEGETABLES & DRESSING","Tortellini, cheese-filled, meatless, with vegetables and vinaigrette dressing" +58134640,"TORTELLINI, CHEESE-FILLED, MEATLESS, W/ VINAIGRETTE","Tortellini, cheese-filled, meatless, with vinaigrette dressing" +58134650,"TORTELLINI, MEAT-FILLED, NO SAUCE","Tortellini, meat-filled, no sauce" +58134660,"TORTELLINI, CHEESE-FILLED, W/ CREAM SAUCE","Tortellini, cheese-filled, with cream sauce" +58134680,"TORTELLINI, CHEESE-FILLED, NO SAUCE","Tortellini, cheese-filled, no sauce" +58134710,"TORTELLINI, SPINACH-FILLED, W/ TOMATO SAUCE","Tortellini, spinach-filled, with tomato sauce" +58134720,"TORTELLINI, SPINACH-FILLED, NO SAUCE","Tortellini, spinach-filled, no sauce" +58134810,"CANNELONI, CHEESE & SPINACH-FILLED, NO SAUCE","Cannelloni, cheese- and spinach-filled, no sauce" +58135110,"CHOW FUN NOODLES W/ MEAT & VEGETABLES","Chow fun noodles with meat and vegetables" +58135120,"CHOW FUN NOODLES W/ VEGETABLES, MEATLESS","Chow fun noodles with vegetables, meatless" +58136110,"LO MEIN, NFS","Lo mein, NFS" +58136120,"LO MEIN, MEATLESS","Lo mein, meatless" +58136130,"LO MEIN WITH SHRIMP","Lo mein, with shrimp" +58136140,"LO MEIN W/ PORK","Lo mein, with pork" +58136150,"LO MEIN W/ BEEF","Lo mein, with beef" +58136160,"LO MEIN W/ CHICKEN (INCL TURKEY)","Lo mein, with chicken" +58137210,"PAD THAI, NFS","Pad Thai, NFS" +58137220,"PAD THAI, MEATLESS","Pad Thai, meatless" +58137230,"PAD THAI WITH CHICKEN","Pad Thai with chicken" +58137240,"PAD THAI WITH SEAFOOD","Pad Thai with seafood" +58137250,"PAD THAI WITH MEAT","Pad Thai with meat" +58140110,"SPAGHETTI W/ CORNED BEEF, P.R.","Spaghetti with corned beef, Puerto Rican style" +58140310,"MACARONI W/ TUNA, P.R. (MACARRONES CON ATUN)","Macaroni with tuna, Puerto Rican style (Macarrones con atun)" +58145110,"MACARONI OR NOODLES W/ CHEESE","Macaroni or noodles with cheese" +58145112,"MACARONI OR NOODLES WITH CHEESE, MADE FROM PACKAGED MIX","Macaroni or noodles with cheese, made from packaged mix" +58145113,"MACARONI OR NOODLES W/ CHEESE, CANNED","Macaroni or noodles with cheese, canned" +58145117,"MACARONI OR NOODLES WITH CHEESE, EASY MAC TYPE","Macaroni or noodles with cheese, Easy Mac type" +58145119,"MACARONI OR NOODLES WITH CHEESE, MADE FRM RED FAT PACKAGE","Macaroni or noodles with cheese, made from reduced fat packaged mix" +58145120,"MACARONI OR NOODLES W/ CHEESE & TUNA","Macaroni or noodles with cheese and tuna" +58145135,"MACARONI OR NOODLES WITH CHEESE AND MEAT","Macaroni or noodles with cheese and meat" +58145136,"MACARONI OR NOODLES W/CHEESE & MEAT, FR HAMBURGER HELPER","Macaroni or noodles with cheese and meat, prepared from Hamburger Helper mix" +58145140,"MACARONI OR NOODLES W/ CHEESE & TOMATO","Macaroni or noodles with cheese and tomato" +58145160,"MACARONI/NOODLES W/ CHEESE & FRANKFURTER/HOT DOG","Macaroni or noodles with cheese and frankfurters or hot dogs" +58145170,"MACARONI OR NOODLES WITH CHEESE AND EGG","Macaroni or noodles with cheese and egg" +58145190,"MACARONI W/ CHEESE & CHICKEN","Macaroni or noodles with cheese and chicken or turkey" +58146100,"PASTA W/ TOMATO SAUCE, MEATLESS","Pasta with tomato sauce, meatless" +58146110,"PASTA W/ MEAT SAUCE (INCLUDE AMER CHOP SUEY)","Pasta with meat sauce" +58146120,"PASTA W/ CHEESE & MEAT SAUCE","Pasta with cheese and meat sauce" +58146130,"PASTA W/ CARBONARA SAUCE","Pasta with carbonara sauce" +58146150,"PASTA W/ CHEESE & TOMATO SAUCE, MEATLESS","Pasta with cheese and tomato sauce, meatless" +58146160,"PASTA WITH VEGETABLES, NO SAUCE OR DRESSING","Pasta with vegetables, no sauce or dressing" +58146200,"PASTA, MEAT-FILLED, W/ GRAVY, CANNED","Pasta, meat-filled, with gravy, canned" +58146300,"PASTA, WHOLE WHEAT, WITH MEAT SAUCE","Pasta, whole wheat, with meat sauce" +58146310,"PASTA, WHOLE WHEAT, W/ TOMATO SAUCE, MEATLESS","Pasta, whole wheat, with tomato sauce, meatless" +58147100,"PASTA W/ PESTO SAUCE","Pasta with pesto sauce" +58147110,"MACARONI OR NOODLES W/ BEANS & TOMATO SAUCE","Macaroni or noodles with beans or lentils and tomato sauce" +58147310,"MACARONI, CREAMED","Macaroni, creamed" +58147330,"MACARONI OR NOODLES, CREAMED, WITH CHEESE","Macaroni or noodles, creamed, with cheese" +58147340,"MACARONI OR NOODLES, CREAMED, WITH CHEESE AND TUNA","Macaroni or noodles, creamed, with cheese and tuna" +58147350,"MACARONI, CREAMED, W/ VEGETABLES","Macaroni, creamed, with vegetables" +58147510,"FLAVORED PASTA (INCL LIPTON BEEF, CHICKEN FLAVORS)","Flavored pasta" +58147520,"YAT GA MEIN WITH MEAT, FISH, OR POULTRY","Yat Ga Mein with meat, fish, or poultry" +58148110,"MACARONI OR PASTA SALAD, W/ MAYO","Macaroni or pasta salad, made with mayonnaise" +58148111,"MACARONI OR PASTA SALAD, W/ LT MAYO","Macaroni or pasta salad, made with light mayonnaise" +58148112,"MACARONI OR PASTA SALAD, W/ MAYO-TYPE DRSG","Macaroni or pasta salad, made with mayonnaise-type salad dressing" +58148113,"MACARONI OR PASTA SALAD, W/LT MAYO-TYPE DRSG","Macaroni or pasta salad, made with light mayonnaise-type salad dressing" +58148114,"MACARONI OR PASTA SALAD, W/ ITALIAN DRSG","Macaroni or pasta salad, made with Italian dressing" +58148115,"MACARONI OR PASTA SALAD, W/LT ITALIAN DRSG","Macaroni or pasta salad, made with light Italian dressing" +58148116,"MACARONI OR PASTA SALAD, W/ CREAMY DRSG","Macaroni or pasta salad, made with creamy dressing" +58148117,"MACARONI OR PASTA SALAD, W/ LT CREAMY DRSG","Macaroni or pasta salad, made with light creamy dressing" +58148118,"MACARONI OR PASTA SALAD, W/ ANY TYPE OF FAT FREE DRSG","Macaroni or pasta salad, made with any type of fat free dressing" +58148120,"MACARONI OR PASTA SALAD WITH EGG","Macaroni or pasta salad with egg" +58148130,"MACARONI OR PASTA SALAD WITH TUNA","Macaroni or pasta salad with tuna" +58148140,"MACARONI OR PASTA SALAD WITH CRAB MEAT","Macaroni or pasta salad with crab meat" +58148150,"MACARONI OR PASTA SALAD WITH SHRIMP","Macaroni or pasta salad with shrimp" +58148160,"MACARONI OR PASTA SALAD WITH TUNA AND EGG","Macaroni or pasta salad with tuna and egg" +58148170,"MACARONI OR PASTA SALAD WITH CHICKEN","Macaroni or pasta salad with chicken" +58148180,"MACARONI OR PASTA SALAD WITH CHEESE","Macaroni or pasta salad with cheese" +58148550,"MACARONI OR PASTA SALAD W/ MEAT","Macaroni or pasta salad with meat" +58148600,"PASTA TETRAZZINI,DRY MIX,PREPARED W/ WATER","Pasta tetrazzini, dry mix, prepared with water" +58149110,"NOODLE PUDDING (INCLUDE KUGEL)","Noodle pudding" +58149160,"NOODLE PUDDING,W/ MILK","Noodle pudding, with milk" +58149210,"SOMEN SALAD W/ NOODLE, LETTUCE, EGG, FISH, PORK","Somen salad with noodles, lettuce, egg, fish, and pork" +58150100,"BIBIMBAP (KOREAN)","Bibimbap (Korean)" +58150110,"RICE, FRIED, MEATLESS","Rice, fried, meatless" +58150310,"RICE, FRIED, NFS","Rice, fried, NFS" +58150320,"RICE, FRIED, W/ CHICKEN (INCL TURKEY)","Rice, fried, with chicken" +58150330,"RICE, FRIED, W/ PORK","Rice, fried, with pork" +58150340,"RICE, FRIED, W/ BEEF","Rice, fried, with beef" +58150510,"RICE, FRIED, W/ SHRIMP","Rice, fried, with shrimp" +58150520,"DUKBOKI / TTEOKBOKKI (KOREAN)","Dukboki / Tteokbokki (Korean)" +58151100,"SUSHI, NFS","Sushi, NFS" +58151110,"SUSHI, NO VEG, NO SEAFOOD/FISH/SHELLFISH","Sushi, no vegetables, no seafood (no fish or shellfish)" +58151120,"SUSHI, W/ VEG, NO SEAFOOD/FISH/SHELLFISH","Sushi, with vegetables, no seafood (no fish or shellfish)" +58151130,"SUSHI, W/ VEG & SEAFD","Sushi, with vegetables and seafood" +58151140,"SUSHI, W/ VEGETABLES, ROLLED IN SEAWEED","Sushi, with vegetables, rolled in seaweed" +58151150,"SUSHI, WITH SEAFOOD, NO VEGETABLES","Sushi, with seafood, no vegetables" +58151160,"SUSHI, W/ EGG, NO VEG/SEAFD/FISH/SHLFISH, ROLL IN SEAWEED","Sushi, with egg, no vegetables, no seafood (no fish or shellfish), rolled in seaweed" +58155110,"RICE W/ CHICKEN, P.R. (ARROZ CON POLLO)","Rice with chicken, Puerto Rican style (Arroz con Pollo)" +58155210,"STUFFED RICE W/ CHICKEN, DOMINICAN STYLE","Stuffed rice with chicken, Dominican style (Arroz relleno Dominicano)" +58155310,"PAELLA, VALENCIANA STYLE, W/ MEAT","Paella, Valenciana style, with meat (Paella Valenciana)" +58155320,"SEAFOOD PAELLA, PUERTO RICAN STYLE","Seafood paella, Puerto Rican style" +58155410,"SOUPY RICE W/ CHICKEN, P.R. (ASOPAO DE POLLO)","Soupy rice with chicken, Puerto Rican style (Asopao de pollo)" +58155510,"SOUPY RICE MIXTURE W/ CHICKEN & POTATOES, P.R.STYLE","Soupy rice mixture with chicken and potatoes, Puerto Rican style" +58155610,"RICE MEAL FRITTER, PUERTO RICAN (ALMOJAMBANA)","Rice meal fritter, Puerto Rican style (Almojabana)" +58155810,"STEWED RICE, P.R. (ARROZ GUISADO)","Stewed rice, Puerto Rican style (arroz guisado)" +58155910,"RICE W/ SQUID, P.R. (ARROZ CON CALAMARES)","Rice with squid, Puerto Rican style (arroz con calamares)" +58156110,"FRIED RICE, P.R. (ARROZ FRITO)","Fried rice, Puerto Rican style (arroz frito)" +58156210,"RICE W/ VIENNA SAUSAGE, P.R. (ARROZ CON SALCHICHAS)","Rice with vienna sausage, Puerto Rican style (arroz con salchichas)" +58156310,"RICE W/ SPANISH SAUSAGE, P.R.","Rice with Spanish sausage, Puerto Rican style" +58156410,"RICE W/ ONIONS, P.R. (ARROZ CON CEBOLLAS)","Rice with onions, Puerto Rican style (arroz con cebollas)" +58156510,"SOUPY RICE, FROM P.R. ASOPAO DE POLLO","Soupy rice from Puerto Rican style Asopao de Pollo (chicken parts reported separately)" +58156610,"PIGEON PEA ASOPAO (ASOPAO DE GRANDULES)","Pigeon pea asopao (Asopao de gandules)" +58156710,"RICE W/ STEWED BEANS, P.R.","Rice with stewed beans, Puerto Rican style" +58157110,"SPICEY RICE PUDDING, P.R.","Spicy rice pudding, Puerto Rican style" +58157210,"RICE PUDDING MADE W/ COCONUT MILK, P.R.","Rice pudding made with coconut milk, Puerto Rican style" +58160000,"BIRYANI WITH VEGETABLES","Biryani with vegetables" +58160110,"RICE W/ BEANS","Rice with beans" +58160120,"RICE W/ BEANS & TOMATOES","Rice with beans and tomatoes" +58160130,"RICE W/ BEANS & CHICKEN","Rice with beans and chicken" +58160135,"RICE W/ BEANS AND BEEF","Rice with beans and beef" +58160140,"RICE W/ BEANS & PORK","Rice with beans and pork" +58160150,"RED BEANS & RICE","Red beans and rice" +58160160,"HOPPING JOHN (BLACKEYE PEAS & RICE)","Hopping John (blackeye peas and rice)" +58160400,"RICE, WHITE, WITH CORN, NS AS TO FAT ADDED IN COOKING","Rice, white, with corn, NS as to fat added in cooking" +58160410,"RICE, WHITE, WITH CORN, FAT NOT ADDED IN COOKING","Rice, white, with corn, fat not added in cooking" +58160420,"RICE, WHITE, WITH CORN, FAT ADDED IN COOKING","Rice, white, with corn, fat added in cooking" +58160430,"RICE, WHITE, WITH PEAS, NS AS TO FAT ADDED IN COOKING","Rice, white, with peas, NS as to fat added in cooking" +58160440,"RICE, WHITE, WITH PEAS, FAT NOT ADDED IN COOKING","Rice, white, with peas, fat not added in cooking" +58160450,"RICE, WHITE, WITH PEAS, FAT ADDED IN COOKING","Rice, white, with peas, fat added in cooking" +58160460,"RICE, WHITE, WITH CARROTS, NS AS TO FAT ADDED IN COOKING","Rice, white, with carrots, NS as to fat added in cooking" +58160470,"RICE, WHITE, WITH CARROTS, FAT NOT ADDED IN COOKING","Rice, white, with carrots, fat not added in cooking" +58160480,"RICE, WHITE, WITH CARROTS, FAT ADDED IN COOKING","Rice, white, with carrots, fat added in cooking" +58160490,"RICE, WHITE, W/ PEAS&CARROTS, NS AS TO FAT ADDED IN COOKING","Rice, white, with peas and carrots, NS as to fat added in cooking" +58160500,"RICE, WHITE, WITH PEAS AND CARROTS, FAT NOT ADDED IN COOKING","Rice, white, with peas and carrots, fat not added in cooking" +58160510,"RICE, WHITE, WITH PEAS AND CARROTS, FAT ADDED IN COOKING","Rice, white, with peas and carrots, fat added in cooking" +58160520,"RICE, WHITE, W/TOMATOES/TOMATO BASED SAUCE, NS AS TO FAT","Rice, white, with tomatoes (and/or tomato based sauce), NS as to fat added in cooking" +58160530,"RICE, WHITE, W/TOMATOES/ TOMATO BASED SAUCE, FAT NOT ADDED","Rice, white, with tomatoes (and/or tomato based sauce), fat not added in cooking" +58160540,"RICE, WHITE, W/ TOMATOES/TOMATO BASED SAUCE, FAT ADDED","Rice, white, with tomatoes (and/or tomato based sauce), fat added in cooking" +58160550,"RICE, WHITE, WITH DARK GREEN VEGETABLES, NS AS TO FAT ADDED","Rice, white, with dark green vegetables, NS as to fat added in cooking" +58160560,"RICE, WHITE, WITH DARK GREEN VEGS, FAT NOT ADDED IN CO","Rice, white, with dark green vegetables, fat not added in cooking" +58160570,"RICE, WHITE, WITH DARK GREEN VEGETABLES, FAT ADDED IN COOKIN","Rice, white, with dark green vegetables, fat added in cooking" +58160580,"RICE, WHITE, W/ CARROTS, TOMATOES, +/OR TOM SC, NS AS TO FAT","Rice, white, with carrots and tomatoes (and/or tomato-based sauce), NS as to fat added in cooking" +58160590,"RICE, WHITE, W/CARROTS, TOMATOES+/OR TOM SC, FAT NOT ADDED","Rice, white, with carrots and tomatoes (and/or tomato-based sauce), fat not added in cooking" +58160600,"RICE, WHITE, W/ CARROTS, TOMATOES +/OR TOM SC, FAT ADDED","Rice, white, with carrots and tomatoes (and/or tomato-based sauce), fat added in cooking" +58160610,"RICE, WHITE, W/ DK GRN VEGS, TOMATOES +/OR TOM SC, NS FAT","Rice, white, with dark green vegetables and tomatoes (and/or tomato-based sauce), NS as to fat added in cooking" +58160620,"RICE, WHITE, W/ DK GRN VEGS, TOMATOES +/OR TOM SC, FAT NOT","Rice, white, with dark green vegetables and tomatoes (and/or tomato-based sauce), fat not added in cooking" +58160630,"RICE, WHITE, W/DK GRN VEGS, TOMATOES +/OR TOM SC , FAT ADDED","Rice, white, with dark green vegetables and tomatoes (and/or tomato-based sauce), fat added in cooking" +58160640,"RICE, WHITE, WITH CARROTS AND DARK GREEN VEGS, NS FAT","Rice, white, with carrots and dark green vegetables, NS as to fat added in cooking" +58160650,"RICE, WHITE, WITH CARROTS AND DARK GREEN VEGS, FAT NOT ADDED","Rice, white, with carrots and dark green vegetables, fat not added in cooking" +58160660,"RICE, WHITE, WITH CARROTS AND DARK GREEN VEGS, FAT ADDED","Rice, white, with carrots and dark green vegetables, fat added in cooking" +58160670,"RICE, WHITE, W/ CARROTS, DARK GRN VEG&TOMATO/SC, NS FAT","Rice, white, with carrots, dark green vegetables, and tomatoes (and/or tomato-based sauce), NS as to fat added in cooking" +58160680,"RICE, WHITE, W/ CARROTS, DARK GRN VEG,&TOMATOES, NO FAT","Rice, white, with carrots, dark green vegetables, and tomatoes (and/or tomato-based sauce), fat not added in cooking" +58160690,"RICE, WHITE, W/ CARROTS, DARK GRN VEG, & TOMATOES, W/ FAT","Rice, white, with carrots, dark green vegetables, and tomatoes (and/or tomato-based sauce), fat added in cooking" +58160700,"RICE, WHITE, WITH OTHER VEGS, NS AS TO FAT ADDED IN COOKING","Rice, white, with other vegetables, NS as to fat added in cooking" +58160710,"RICE, WHITE, WITH OTHER VEGS, FAT NOT ADDED IN COOKING","Rice, white, with other vegetables, fat not added in cooking" +58160720,"RICE, WHITE, WITH OTHER VEGS, FAT ADDED IN COOKING","Rice, white, with other vegetables, fat added in cooking" +58160800,"RICE, WHITE, WITH LENTILS, NS AS TO FAT ADDED IN COOKING","Rice, white, with lentils, NS as to fat added in cooking" +58160805,"RICE, WHITE, WITH LENTILS, FAT ADDED IN COOKING","Rice, white, with lentils, fat added in cooking" +58160810,"RICE, WHITE, WITH LENTILS, FAT NOT ADDED IN COOKING","Rice, white, with lentils, fat not added in cooking" +58161200,"RICE, COOKED W/ COCONUT MILK (ARROZ CON COCO)","Rice, cooked with coconut milk (Arroz con coco)" +58161320,"RICE, BROWN, W/ BEANS","Rice, brown, with beans" +58161325,"RICE, BROWN, W/ BEANS AND TOMATOES","Rice, brown, with beans and tomatoes" +58161420,"RICE, BROWN, W/ CORN, NS AS TO FAT","Rice, brown, with corn, NS as to fat added in cooking" +58161422,"RICE, BROWN, W/ CORN, FAT NOT ADDED","Rice, brown, with corn, fat not added in cooking" +58161424,"RICE, BROWN, W/ CORN, FAT ADDED","Rice, brown, with corn, fat added in cooking" +58161430,"RICE, BROWN, W/ PEAS, NS AS TO FAT","Rice, brown, with peas, NS as to fat added in cooking" +58161432,"RICE, BROWN, W/ PEAS, FAT NOT ADDED","Rice, brown, with peas, fat not added in cooking" +58161434,"RICE, BROWN, W/ PEAS, FAT ADDED","Rice, brown, with peas, fat added in cooking" +58161435,"RICE, BROWN, WITH CARROTS, NS AS TO FAT ADDED IN COOKING","Rice, brown, with carrots, NS as to fat added in cooking" +58161437,"RICE, BROWN, WITH CARROTS, FAT NOT ADDED IN COOKING","Rice, brown, with carrots, fat not added in cooking" +58161439,"RICE, BROWN, WITH CARROTS, FAT ADDED IN COOKING","Rice, brown, with carrots, fat added in cooking" +58161440,"RICE, BROWN, W/ PEAS AND CARROTS, NS AS TO FAT","Rice, brown, with peas and carrots, NS as to fat added in cooking" +58161442,"RICE, BROWN, W/ PEAS AND CARROTS, FAT NOT ADDED","Rice, brown, with peas and carrots, fat not added in cooking" +58161444,"RICE, BROWN, W/ PEAS AND CARROTS, FAT ADDED","Rice, brown, with peas and carrots, fat added in cooking" +58161460,"RICE, BROWN, WITH TOMATOES AND/OR TOMATO BASED SAUCE, NS AS","Rice, brown, with tomatoes (and/or tomato based sauce), NS as to fat added in cooking" +58161462,"RICE, BROWN, WITH TOMATOES AND/OR TOMATO BASED SAUCE, FAT NO","Rice, brown, with tomatoes (and/or tomato based sauce), fat not added in cooking" +58161464,"RICE, BROWN, WITH TOMATOES AND/OR TOMATO BASED SAUCE, FAT AD","Rice, brown, with tomatoes (and/or tomato based sauce), fat added in cooking" +58161470,"RICE, BROWN, WITH DARK GREEN VEGETABLES, NS AS TO FAT ADDED","Rice, brown, with dark green vegetables, NS as to fat added in cooking" +58161472,"RICE, BROWN, WITH DARK GREEN VEGETABLES, FAT NOT ADDED IN CO","Rice, brown, with dark green vegetables, fat not added in cooking" +58161474,"RICE, BROWN, WITH DARK GREEN VEGETABLES, FAT ADDED IN COOKIN","Rice, brown, with dark green vegetables, fat added in cooking" +58161480,"RICE, BROWN, WITH CARROTS &TOMATOES/SC, NS AS TO FAT ADDED","Rice, brown, with carrots and tomatoes (and/or tomato-based sauce), NS as to fat added in cooking" +58161482,"RICE, BROWN, WITH CARROTS & TOMATOES/SC, FAT NOT ADDED IN C","Rice, brown, with carrots and tomatoes (and/or tomato-based sauce), fat not added in cooking" +58161484,"RICE, BROWN, WITH CARROTS & TOMATOES/SC, FAT ADDED IN COOKIN","Rice, brown, with carrots and tomatoes (and/or tomato-based sauce), fat added in cooking" +58161490,"RICE, BROWN, W/ DK GRN VEGS, TOMATOES/SC , NS AS TO FAT","Rice, brown, with dark green vegetables and tomatoes (and/or tomato-based sauce) , NS as to fat added in cooking" +58161492,"RICE, BROWN, W/ DK GRN VEGS, TOMATOES/SC , FAT NOT ADDED","Rice, brown, with dark green vegetables and tomatoes (and/or tomato-based sauce), fat not added in cooking" +58161494,"RICE, BROWN, W/ DK GRN VEGS, TOMATOES/SC , FAT ADDED","Rice, brown, with dark green vegetables and tomatoes (and/or tomato-based sauce), fat added in cooking" +58161500,"RICE, BROWN, WITH CARROTS AND DARK GREEN VEGETABLES, NS FAT","Rice, brown, with carrots and dark green vegetables, NS as to fat added in cooking" +58161502,"RICE, BROWN, WITH CARROTS AND DARK GREEN VEGETABLES, FAT NOT","Rice, brown, with carrots and dark green vegetables, fat not added in cooking" +58161504,"RICE, BROWN, WITH CARROTS AND DARK GREEN VEGETABLES, FAT ADD","Rice, brown, with carrots and dark green vegetables, fat added in cooking" +58161510,"GRAPE LEAVES STUFFED W/ RICE","Grape leaves stuffed with rice" +58161520,"RICE, BROWN, W/CARROTS, DK GRN VEGS,TOMATOES/SC, NS FAT","Rice, brown, with carrots, dark green vegetables, and tomatoes (and/or tomato-based sauce), NS as to fat added in cooking" +58161522,"RICE, BROWN, W/ CARROTS, DK GRN VEGS,TOMATOES/SC, NO FAT","Rice, brown, with carrots, dark green vegetables, and tomatoes (and/or tomato-based sauce), fat not added in cooking" +58161524,"RICE, BROWN, W/CARROTS, DK GRN VEGS,TOMATOES/SC, FAT ADDED","Rice, brown, with carrots, dark green vegetables, and tomatoes (and/or tomato-based sauce), fat added in cooking" +58161530,"RICE, BROWN, W/ OTHER VEGS, NS AS TO FAT ADDED","Rice, brown, with other vegetables, NS as to fat added in cooking" +58161532,"RICE, BROWN, WITH OTHER VEGETABLES, FAT NOT ADDED IN COOKING","Rice, brown, with other vegetables, fat not added in cooking" +58161534,"RICE, BROWN, WITH OTHER VEGETABLES, FAT ADDED IN COOKING","Rice, brown, with other vegetables, fat added in cooking" +58161710,"RICE CROQUETTE","Rice croquette" +58162090,"STUFFED PEPPER W/ MEAT","Stuffed pepper, with meat" +58162110,"STUFFED PEPPER, W/ RICE & MEAT","Stuffed pepper, with rice and meat" +58162120,"STUFFED PEPPER, W/ RICE, MEATLESS","Stuffed pepper, with rice, meatless" +58162130,"STUFFED TOMATO W/ RICE & MEAT","Stuffed tomato, with rice and meat" +58162140,"STUFFED TOMATO W/ RICE, MEATLESS","Stuffed tomato, with rice, meatless" +58162310,"RICE PILAF","Rice pilaf" +58163130,"DIRTY RICE","Dirty rice" +58163310,"FLAVORED RICE MIXTURE","Flavored rice mixture" +58163330,"FLAVORED RICE MIXTURE W/ CHEESE","Flavored rice mixture with cheese" +58163360,"FLAVORED RICE, BROWN & WILD","Flavored rice, brown and wild" +58163380,"FLAVORED RICE&PASTA MIXTURE (INCL RICE-A-RONI)","Flavored rice and pasta mixture" +58163400,"FLAVORED RICE & PASTA MIXTURE, REDUCED SODIUM","Flavored rice and pasta mixture, reduced sodium" +58163410,"SPANISH RICE, FAT ADDED IN COOKING","Spanish rice, fat added in cooking" +58163420,"SPANISH RICE, FAT NOT ADDED IN COOKING","Spanish rice, fat not added in cooking" +58163430,"SPANISH RICE, NS AS TO FAT ADDED IN COOKING","Spanish rice, NS as to fat added in cooking" +58163450,"SPANISH RICE W/ GROUND BEEF","Spanish rice with ground beef" +58163510,"RICE DRESSING (INCLUDE COMBINED W/ BREAD)","Rice dressing" +58164110,"RICE W/ RAISINS","Rice with raisins" +58164210,"RICE DESSERT/SALAD W/ FRUIT","Rice dessert or salad with fruit" +58164500,"RICE, WHITE, WITH CHEESE AND/OR CREAM BASED SAUCE, NS FAT","Rice, white, with cheese and/or cream based sauce, NS as to fat added in cooking" +58164510,"RICE, WHITE, WITH CHEESE AND/OR CREAM BASED SAUCE, FAT NOT A","Rice, white, with cheese and/or cream based sauce, fat not added in cooking" +58164520,"RICE, WHITE, WITH CHEESE AND/OR CREAM BASED SAUCE, FAT ADDED","Rice, white, with cheese and/or cream based sauce, fat added in cooking" +58164530,"RICE, WHITE, WITH GRAVY, NS AS TO FAT ADDED IN COOKING","Rice, white, with gravy, NS as to fat added in cooking" +58164540,"RICE, WHITE, WITH GRAVY, FAT NOT ADDED IN COOKING","Rice, white, with gravy, fat not added in cooking" +58164550,"RICE, WHITE, WITH GRAVY, FAT ADDED IN COOKING","Rice, white, with gravy, fat added in cooking" +58164560,"RICE, WHITE, WITH SOY BASED SAUCE, NS AS TO FAT ADDED IN COO","Rice, white, with soy-based sauce, NS as to fat added in cooking" +58164570,"RICE, WHITE, WITH SOY BASED SAUCE, FAT NOT ADDED IN COOKING","Rice, white, with soy-based sauce, fat not added in cooking" +58164580,"RICE, WHITE, WITH SOY BASED SAUCE, FAT ADDED IN COOKING","Rice, white, with soy-based sauce, fat added in cooking" +58164800,"RICE, BROWN, WITH CHEESE AND/OR CREAM BASED SAUCE, NS FAT","Rice, brown, with cheese and/or cream based sauce, NS as to fat added in cooking" +58164810,"RICE, BROWN, WITH CHEESE AND/OR CREAM BASED SAUCE, FAT NOT A","Rice, brown, with cheese and/or cream based sauce, fat not added in cooking" +58164820,"RICE, BROWN, WITH CHEESE AND/OR CREAM BASED SAUCE, FAT ADDED","Rice, brown, with cheese and/or cream based sauce, fat added in cooking" +58164830,"RICE, BROWN, WITH GRAVY, NS AS TO FAT ADDED IN COOKING","Rice, brown, with gravy, NS as to fat added in cooking" +58164840,"RICE, BROWN, WITH GRAVY, FAT NOT ADDED IN COOKING","Rice, brown, with gravy, fat not added in cooking" +58164850,"RICE, BROWN, WITH GRAVY, FAT ADDED IN COOKING","Rice, brown, with gravy, fat added in cooking" +58164860,"RICE, BROWN, WITH SOY BASED SAUCE, NS AS TO FAT ADDED IN COO","Rice, brown, with soy-based sauce, NS as to fat added in cooking" +58164870,"RICE, BROWN, WITH A SOY BASED SAUCE, FAT NOT ADDED IN COOKI","Rice, brown, with soy-based sauce, fat not added in cooking" +58164880,"RICE, BROWN, WITH SOY BASED SAUCE, FAT ADDED IN COOKING","Rice, brown, with soy-based sauce, fat added in cooking" +58165000,"RICE, WHITE, WITH VEGS, CHEESE +/OR CREAM BASED SC, NS FAT","Rice, white, with vegetables, cheese and/or cream based sauce, NS as to fat added in cooking" +58165010,"RICE, WHITE, WITH VEGS, CHEESE +/OR CREAM BASED SC, FAT NOT","Rice, white, with vegetables, cheese and/or cream based sauce, fat not added in cooking" +58165020,"RICE, WHITE, WITH VEGS, CHEESE +/OR CREAM BASED SC,FAT ADDED","Rice, white, with vegetables, cheese and/or cream based sauce, fat added in cooking" +58165030,"RICE, WHITE, WITH VEGETABLES AND GRAVY, NS AS TO FAT ADDED","Rice, white, with vegetables and gravy, NS as to fat added in cooking" +58165040,"RICE, WHITE, WITH VEGETABLES AND GRAVY, FAT NOT ADDED","Rice, white, with vegetables and gravy, fat not added in cooking" +58165050,"RICE, WHITE, WITH VEGETABLES AND GRAVY, FAT ADDED IN COOKING","Rice, white, with vegetables and gravy, fat added in cooking" +58165060,"RICE, WHITE, WITH VEGETABLES, SOY-BASED SAUCE, NS AS TO FAT","Rice, white, with vegetables, soy-based sauce, NS as to fat added in cooking" +58165070,"RICE, WHITE, WITH VEGETABLES, SOY-BASED SAUCE, FAT NOT ADDED","Rice, white, with vegetables, soy-based sauce, fat not added in cooking" +58165080,"RICE, WHITE, WITH VEGETABLES, SOY-BASED SAUCE, FAT ADDED IN","Rice, white, with vegetables, soy-based sauce, fat added in cooking" +58165400,"RICE, BROWN, WITH VEGS, CHEESE +/OR CREAM BASED SC, NS FAT","Rice, brown, with vegetables, cheese and/or cream based sauce, NS as to fat added in cooking" +58165410,"RICE, BROWN, WITH VEGS, CHEESE +/OR CREAM BASED SC, FAT NOT","Rice, brown, with vegetables, cheese and/or cream based sauce, fat not added in cooking" +58165420,"RICE, BROWN, WITH VEGS, CHEESE +/OR CREAM BASED SC,FAT ADDED","Rice, brown, with vegetables, cheese and/or cream based sauce, fat added in cooking" +58165430,"RICE, BROWN, WITH VEGETABLES AND GRAVY, NS AS TO FAT ADDED I","Rice, brown, with vegetables and gravy, NS as to fat added in cooking" +58165440,"RICE, BROWN, WITH VEGETABLES AND GRAVY, FAT NOT ADDED","Rice, brown, with vegetables and gravy, fat not added in cooking" +58165450,"RICE, BROWN, WITH VEGETABLES AND GRAVY, FAT ADDED IN COOKING","Rice, brown, with vegetables and gravy, fat added in cooking" +58165460,"RICE, BROWN, WITH VEGETABLES, SOY-BASED SAUCE, NS AS TO FAT","Rice, brown, with vegetables, soy-based sauce, NS as to fat added in cooking" +58165470,"RICE, BROWN, WITH VEGETABLES, SOY-BASED SAUCE, FAT NOT ADDED","Rice, brown, with vegetables, soy-based sauce, fat not added in cooking" +58165480,"RICE, BROWN, WITH VEGETABLES, SOY-BASED SAUCE, FAT ADDED","Rice, brown, with vegetables, soy-based sauce, fat added in cooking" +58174000,"UPMA (INDIAN BREAKFAST DISH)","Upma (Indian breakfast dish)" +58175110,"TABBOULEH (INCLUDE TABBULI)","Tabbouleh (bulgar with tomatoes and parsley)" +58200100,"WRAP SANDWICH, W/ MEAT, POULTRY OR FISH, VEGETABLES & RICE","Wrap sandwich, filled with meat, poultry, or fish, vegetables, and rice" +58200200,"WRAP SANDWICH, W/ VEGETABLES & RICE","Wrap sandwich, filled with vegetables and rice" +58200250,"WRAP SANDWICH, W/ VEGETABLES","Wrap sandwich, filled with vegetables" +58200300,"WRAP SANDWICH, W/ MEAT, POULTRY, OR FISH, VEG, RICE & CHEESE","Wrap sandwich, filled with meat, poultry, or fish, vegetables, rice, and cheese" +58301020,"LASAGNA W/ CHEESE & SAUCE (DIET FROZEN MEAL)","Lasagna with cheese and sauce (diet frozen meal)" +58301030,"VEAL LASAGNA (DIET FROZEN MEAL) (INCL LEAN CUISINE)","Veal lasagna (diet frozen meal)" +58301050,"LASAGNA, W/ CHEESE & MEAT SAUCE (DIET FROZEN MEAL)","Lasagna with cheese and meat sauce (diet frozen meal)" +58301080,"LASAGNA W/CHEESE&MEAT SAU,REDUCED FAT&NA(DIET FROZ)","Lasagna with cheese and meat sauce, reduced fat and sodium (diet frozen meal)" +58301110,"VEGETABLE LASAGNA (FROZEN MEAL)","Vegetable lasagna (frozen meal)" +58301150,"ZUCCHINI LASAGNA (DIET FROZEN MEAL)","Zucchini lasagna (diet frozen meal)" +58302000,"MACARONI & CHEESE (DIET FROZEN MEAL)","Macaroni and cheese (diet frozen meal)" +58302050,"BEEF & NOODLES W/ MEAT SCE & CHEESE (DIET FRZ MEAL)","Beef and noodles with meat sauce and cheese (diet frozen meal)" +58302060,"SPAG W/ BEEF, TOM-BASED SAUCE, LOWFAT, RED SODIUM, FRZ, DIET","Spaghetti or noodles with beef in tomato-based sauce, lowfat, reduced sodium (diet frozen meal)" +58302080,"NOODLES W/ VEG, TOM-BASED SAUCE, FRZ, DIET","Noodles with vegetables in tomato-based sauce (diet frozen meal)" +58303100,"RICE W/ BROC CHEESE SCE (FRZ SIDE DISH)","Rice, with broccoli, cheese sauce (frozen side dish)" +58303200,"RICE,GREEN BEANS,WATER CHESTNUTS IN SCE (FRZ DISH)","Rice, with green beans, water chestnuts, in sherry mushroom sauce (frozen side dish)" +58304010,"SPAGHETTI & MEATBALLS DINNER, NFS (FROZEN MEAL)","Spaghetti and meatballs dinner, NFS (frozen meal)" +58304020,"SPAGHETTI,MEATBALLS,TOM SCE,APPLES,BREAD(FROZ MEAL)","Spaghetti and meatballs with tomato sauce, sliced apples, bread (frozen meal)" +58304050,"SPAGHETTI W/ MEAT & MUSHROOM SAUCE (DIET FROZ MEAL)","Spaghetti with meat and mushroom sauce (diet frozen meal)" +58304060,"SPAGHETTI W/ MEAT SAUCE (DIET FROZEN MEAL)","Spaghetti with meat sauce (diet frozen meal)" +58304200,"RAVIOLI, CHEESE-FILLED, TOMATO SCE (DIET FROZ MEAL)","Ravioli, cheese-filled, with tomato sauce (diet frozen meal)" +58304220,"RIGATONI W/ MEAT SCE & CHEESE (DIET FRZ MEAL)","Rigatoni with meat sauce and cheese (diet frozen meal)" +58304230,"RAVIOLI, CHEESE-FILLED W/ VEG & FRUIT (FZN MEAL)","Ravioli, cheese-filled, with vegetable and fruit (frozen meal)" +58304250,"MANICOTTI W/ CHEESE, TOMATO SAUCE (DIET FROZ MEAL)","Manicotti, cheese-filled, with tomato sauce (diet frozen meal)" +58304300,"CANNELLONI, CHEESE-FILLED, TOM SCE (DIET FROZ MEAL)","Cannelloni, cheese-filled, with tomato sauce (diet frozen meal)" +58304400,"LINGUINI W/ VEG & SEAFOOD IN SCE (DIET FROZEN MEAL)","Linguini with vegetables and seafood in white wine sauce (diet frozen meal)" +58305250,"PASTA,W/ VEGETABLES & CHEESE SAUCE (DIET FROZ MEAL)","Pasta with vegetable and cheese sauce (diet frozen meal)" +58306010,"BEEF ENCHILADA DINNER, NFS (FROZEN MEAL)","Beef enchilada dinner, NFS (frozen meal)" +58306020,"BEEF ENCHILADA, GRAVY, RICE, REFRIED BEANS (FROZEN)","Beef enchilada, chili gravy, rice, refried beans (frozen meal)" +58306070,"CHEESE ENCHILADA (FROZEN MEAL)","Cheese enchilada (frozen meal)" +58306100,"CHICKEN ENCHILADA ( DIET FROZEN MEAL)","Chicken enchilada (diet frozen meal)" +58306200,"CHICKEN FAJITAS (DIET FROZEN MEAL)","Chicken fajitas (diet frozen meal)" +58306500,"CHICKEN BURRITOS (DIET FROZEN MEAL)","Chicken burritos (diet frozen meal)" +58310210,"SAUSAGE & FRENCH TOAST (FROZEN MEAL)","Sausage and french toast (frozen meal)" +58310310,"PANCAKE & SAUSAGE (FROZEN MEAL)","Pancakes and sausage (frozen meal)" +58400000,"SOUP, NFS","Soup, NFS" +58400100,"NOODLE SOUP, NFS","Noodle soup, NFS" +58400200,"RICE SOUP, NFS","Rice soup, NFS" +58401010,"BARLEY SOUP, HOME RECIPE, CANNED, OR READY-TO-SERVE","Barley soup, home recipe, canned, or ready-to-serve" +58401200,"BARLEY SOUP, SWEET, WITH OR WITHOUT NUTS, ASIAN STYLE","Barley soup, sweet, with or without nuts, Asian Style" +58402010,"BEEF NOODLE SOUP, CANNED OR READY-TO-SERVE","Beef noodle soup, canned or ready-to-serve" +58402020,"BEEF DUMPLING SOUP, HOME RECIPE, CANNED OR READY-TO-SERVE","Beef dumpling soup, home recipe, canned or ready-to-serve" +58402030,"BEEF RICE SOUP, HOME RECIPE, CANNED OR READY-TO-SERVE","Beef rice soup, home recipe, canned or ready-to-serve" +58402100,"BEEF NOODLE SOUP, HOME RECIPE","Beef noodle soup, home recipe" +58403010,"CHICKEN OR TURKEY NOODLE SOUP, CANNED OR READY-TO-SERVE","Chicken or turkey noodle soup, canned or ready-to-serve" +58403040,"CHICKEN OR TURKEY NOODLE SOUP, HOME RECIPE","Chicken or turkey noodle soup, home recipe" +58403050,"CHICKEN OR TURKEY NOODLE SOUP, CREAM OF, HOME RECIPE, CANNED","Chicken or turkey noodle soup, cream of, home recipe, canned, or ready-to-serve" +58403060,"CHICKEN OR TURKEY NOODLE SOUP, REDUCED SODIUM,CANNED, RTF","Chicken or turkey noodle soup, reduced sodium, canned or ready-to-serve" +58403100,"NOODLE & POTATO SOUP, P.R.","Noodle and potato soup, Puerto Rican style" +58404010,"CHICKEN OR TURKEY RICE SOUP, CANNED, OR READY-TO-SERVE","Chicken or turkey rice soup, canned, or ready-to-serve" +58404030,"CHICKEN OR TURKEY RICE SOUP, HOME RECIPE","Chicken or turkey rice soup, home recipe" +58404040,"CHICKEN OR TURKEY RICE SOUP, REDUCED SODIUM, CANNED, PREPARE","Chicken or turkey rice soup, reduced sodium, canned, prepared with water or ready-to-serve" +58404050,"CHICKEN OR TURKEY RICE SOUP, REDUCED SODIUM, CANNED, PREPARE","Chicken or turkey rice soup, reduced sodium, canned, prepared with milk" +58404100,"RICE AND POTATO SOUP, P.R.","Rice and potato soup, Puerto Rican style" +58404500,"MATZO BALL SOUP","Matzo ball soup" +58404510,"CHICKEN OR TURKEY SOUP WITH DUMPLINGS AND POTATOES,","Chicken or turkey soup with dumplings and potatoes, home recipe, canned, or ready-to-serve" +58404520,"CHICKEN OR TURKEY SOUP WITH DUMPLINGS, HOME RECIPE, CANNED O","Chicken or turkey soup with dumplings, home recipe, canned or ready-to-serve" +58407010,"INSTANT SOUP, NOODLE","Instant soup, noodle" +58407030,"SOUP, MOSTLY NOODLES","Soup, mostly noodles" +58407035,"SOUP, MOSTLY NOODLES, REDUCED SODIUM","Soup, mostly noodles, reduced sodium" +58407050,"INSTANT SOUP, NOODLE W/ EGG, SHRIMP OR CHICKEN","Instant soup, noodle with egg, shrimp or chicken" +58408010,"WON TON (WONTON) SOUP","Won ton (wonton) soup" +58408500,"NOODLE SOUP WITH VEGETABLES, ASIAN STYLE","Noodle soup with vegetables, Asian style" +58409000,"NOODLE SOUP,W/ FISH BALL,SHRIMP,&DK GREEN LEAFY VEG","Noodle soup, with fish ball, shrimp, and dark green leafy vegetable" +58421000,"SOPA SECA (DRY SOUP), Mexican style, NFS","Sopa seca (dry soup), Mexican style, NFS" +58421010,"SOPA SECA DE FIDEO, MEXICAN STYLE, MADE WITH DRY NOODLES, HO","Sopa Seca de Fideo, Mexican style, made with dry noodles, home recipe" +58421020,"SOPA DE FIDEO AGUADA, MEXICAN STYLE NOODLE SOUP, HOME RECIPE","Sopa de Fideo Aguada, Mexican style noodle soup, home recipe" +58421060,"SOPA SECA DE ARROZ (DRY RICE SOUP), MEXICAN STYLE, HOME RECI","Sopa seca de arroz (dry rice soup), Mexican style, home recipe" +58421080,"SOPA DE TORTILLA, MEXICAN STYLE TORTILLA SOUP, HOME RECIPE","Sopa de tortilla, Mexican style tortilla soup, home recipe" +58503000,"MACARONI, TOMATOES & BEEF, BABY, NS STR/JR","Macaroni, tomatoes, and beef, baby food, NS as to strained or junior" +58503010,"MACARONI, TOMATOES & BEEF, BABY, STR","Macaroni, tomatoes, and beef, baby food, strained" +58503020,"MACARONI, TOMATOES & BEEF, BABY, JR","Macaroni, tomatoes, and beef, baby food, junior" +58503050,"MACARONI W/ BEEF & TOM SCE, BABY FOOD, TODDLER","Macaroni with beef and tomato sauce, baby food, toddler" +58508000,"MACARONI & CHEESE, BABY, STRAINED","Macaroni and cheese, baby food, strained" +58508300,"MACARONI & CHEESE, BABY, TODDLER","Macaroni and cheese, baby food, toddler" +58509020,"SPAGHETTI, TOMATO SAUCE & BEEF, BABY, JUNIOR","Spaghetti, tomato sauce, and beef, baby food, junior" +58509100,"RAVIOLI, CHEESE-FILLED, W/ TOM SAUCE, BABY, TODDLER","Ravioli, cheese-filled, with tomato sauce, baby food, toddler" +58509200,"MACARONI W/ VEGETABLES, BABY, STRAINED","Macaroni with vegetables, baby food, strained" +59003000,"MEAT SUBSTITUTE,CEREAL- & VEGETABLE PROTEIN-BASED","Meat substitute, cereal- and vegetable protein-based, fried" +61100500,"CALAMONDIN, RAW","Calamondin, raw" +61101010,"GRAPEFRUIT, RAW (INCLUDE GRAPEFRUIT, NFS)","Grapefruit, raw" +61101200,"GRAPEFRUIT, CANNED OR FROZEN, NS AS TO ADDED SWTNER","Grapefruit, canned or frozen, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +61101220,"GRAPEFRUIT, CANNED OR FROZEN, UNSWT, WATER PACK","Grapefruit, canned or frozen, unsweetened, water pack" +61101230,"GRAPEFRUIT, CANNED OR FROZEN, IN LIGHT SYRUP","Grapefruit, canned or frozen, in light syrup" +61104010,"GRAPEFRUIT & ORANGE SECTIONS, RAW","Grapefruit and orange sections, raw" +61104200,"GRAPEFRUIT & ORANGE SEC, CKD/CND/FRZ, NS SWEETENER","Grapefruit and orange sections, cooked, canned, or frozen, NS as to added sweetener" +61104220,"GRAPEFRUIT & ORANGE SEC, CKD/CND/FRZ, UNSWEETENED","Grapefruit and orange sections, cooked, canned, or frozen, unsweetened, water pack" +61104230,"GRAPEFRUIT & ORANGE SEC, CKD/CND/FRZ, LIGHT SYRUP","Grapefruit and orange sections, cooked, canned, or frozen, in light syrup" +61110010,"KUMQUAT, RAW","Kumquat, raw" +61110230,"KUMQUAT, COOKED OR CANNED, IN SYRUP","Kumquat, cooked or canned, in syrup" +61113010,"LEMON, RAW","Lemon, raw" +61113500,"LEMON PIE FILLING","Lemon pie filling" +61116010,"LIME, RAW","Lime, raw" +61119010,"ORANGE, RAW","Orange, raw" +61119020,"ORANGE SECTIONS, CANNED, JUICE PACK","Orange, sections, canned, juice pack" +61119100,"ORANGE PEEL","Orange peel" +61122300,"ORANGES, MANDARIN, CANNED OR FROZEN, SWEETENER NS","Orange, mandarin, canned or frozen, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +61122320,"ORANGES, MANDARIN, CANNED OR FROZEN, JUICE PACK","Orange, mandarin, canned or frozen, juice pack" +61122330,"ORANGES, MANDARIN, CANNED OR FROZEN, IN LIGHT SYRUP","Orange, mandarin, canned or frozen, in light syrup" +61122350,"ORANGES, MANDARIN, CANNED OR FROZEN, DRAINED","Orange, mandarin, canned or frozen, drained" +61125000,"TANGELO, RAW","Tangelo, raw" +61125010,"TANGERINE, RAW (INCLUDE MANDARIN ORANGE, SATSUMA)","Tangerine, raw" +61201010,"GRAPEFRUIT JUICE, FRESHLY SQUEEZED","Grapefruit juice, freshly squeezed" +61201020,"GRAPEFRUIT JUICE, NS AS TO FORM","Grapefruit juice, NS as to form" +61201220,"GRAPEFRUIT JUICE, CANNED, BOTTLED OR IN A CARTON","Grapefruit juice, canned, bottled or in a carton" +61201620,"GRAPEFRUIT JUICE, FROZEN (RECONSTITUTED WITH WATER)","Grapefruit juice, frozen (reconstituted with water)" +61204000,"LEMON JUICE, NS AS TO FORM","Lemon juice, NS as to form" +61204010,"LEMON JUICE, FRESHLY SQUEEZED","Lemon juice, freshly squeezed" +61204200,"LEMON JUICE, CANNED OR BOTTLED","Lemon juice, canned or bottled" +61204600,"LEMON JUICE, FROZEN","Lemon juice, frozen" +61207000,"LIME JUICE, NS AS TO FORM","Lime juice, NS as to form" +61207010,"LIME JUICE, FRESHLY SQUEEZED","Lime juice, freshly squeezed" +61207200,"LIME JUICE, CANNED OR BOTTLED","Lime juice, canned or bottled" +61207600,"LIME JUICE, FROZEN","Lime juice, frozen" +61210000,"ORANGE JUICE, NFS","Orange juice, NFS" +61210010,"ORANGE JUICE, FRESHLY SQUEEZED","Orange juice, freshly squeezed" +61210220,"ORANGE JUICE, CANNED, BOTTLED OR IN A CARTON","Orange juice, canned, bottled or in a carton" +61210250,"ORANGE JUICE, W/ CALCIUM, CAN/BOTTLED/CARTON","Orange juice, with calcium added, canned, bottled or in a carton" +61210620,"ORANGE JUICE, FROZEN (RECONSTITUTED WITH WATER)","Orange juice, frozen (reconstituted with water)" +61210720,"ORANGE JUICE, FROZEN, NOT RECONSTITUTED","Orange juice, frozen, not reconstituted" +61210820,"ORANGE JUICE,FROZ, W/,CALCIUM ADDED,RECON W/WATER","Orange juice, frozen, with calcium added (reconstituted with water)" +61213000,"TANGERINE JUICE, NFS","Tangerine juice, NFS" +61213220,"TANGERINE JUICE, CANNED","Tangerine juice, canned" +61213620,"TANGERINE JUICE, FROZEN (RECONSTITUTED)","Tangerine juice, frozen (reconstituted with water)" +61213800,"FRUIT JUICE BLEND, INCL CITRUS, 100% JUICE","Fruit juice blend, including citrus, 100% juice" +61213900,"FRUIT JUICE BLEND, INCL CITRUS, 100% JUICE, W/ CALCIUM","Fruit juice blend, including citrus, 100% juice, with calcium added" +62101000,"FRUIT, DRIED, NFS (ASSUME UNCOOKED)","Fruit, dried, NFS (assume uncooked)" +62101050,"FRUIT MIXTURE, DRIED","Fruit mixture, dried (mixture includes three or more of the following: apples, apricots, dates, papaya, peaches, pears, pineapples, prunes, raisins)" +62101100,"APPLE, DRIED, UNCOOKED","Apple, dried, uncooked" +62101150,"APPLE, DRIED, UNCOOKED, LOW SODIUM","Apple, dried, uncooked, low sodium" +62101200,"APPLE, DRIED, COOKED, NS AS TO ADDED SWEETENER","Apple, dried, cooked, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +62101220,"APPLE, DRIED, COOKED, UNSWEETENED","Apple, dried, cooked, unsweetened" +62101230,"APPLE, DRIED, COOKED, W/ SUGAR","Apple, dried, cooked, with sugar" +62101300,"APPLE CHIPS","Apple chips" +62104100,"APRICOT, DRIED, UNCOOKED","Apricot, dried, uncooked" +62104200,"APRICOT, DRIED, COOKED, NS AS TO ADDED SWEETENER","Apricot, dried, cooked, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +62104220,"APRICOT, DRIED, COOKED, UNSWEETENED","Apricot, dried, cooked, unsweetened" +62104230,"APRICOT, DRIED, COOKED, W/ SUGAR","Apricot, dried, cooked, with sugar" +62105000,"BLUEBERRIES, DRIED","Blueberries, dried" +62106000,"CHERRIES, DRIED","Cherries, dried" +62107100,"BANANA FLAKES, DEHYDRATED","Banana flakes, dehydrated" +62107200,"BANANA CHIPS","Banana chips" +62108100,"CURRANTS, DRIED","Currants, dried" +62109100,"CRANBERRIES, DRIED","Cranberries, dried" +62110100,"DATE","Date" +62113100,"FIG, DRIED, UNCOOKED","Fig, dried, uncooked" +62113200,"FIG, DRIED, COOKED, NS AS TO ADDED SWEETENER","Fig, dried, cooked, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +62113220,"FIG, DRIED, COOKED, UNSWEETENED","Fig, dried, cooked, unsweetened" +62113230,"FIG, DRIED, COOKED, W/ SUGAR","Fig, dried, cooked, with sugar" +62114000,"LYCHEE, DRIED (LYCHEE NUTS)","Lychee, dried (lychee nuts)" +62114050,"MANGO, DRIED","Mango, dried" +62114110,"PAPAYA, DRIED","Papaya, dried" +62116100,"PEACH, DRIED, UNCOOKED","Peach, dried, uncooked" +62116200,"PEACH, DRIED, COOKED, NS AS TO ADDED SWEETENER","Peach, dried, cooked, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +62116220,"PEACH, DRIED, COOKED, UNSWEETENED","Peach, dried, cooked, unsweetened" +62116230,"PEACH, DRIED, COOKED, W/ SUGAR","Peach, dried, cooked, with sugar" +62119100,"PEAR, DRIED, UNCOOKED","Pear, dried, uncooked" +62119200,"PEAR, DRIED, COOKED, NS AS TO SWEETENER","Pear, dried, cooked, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +62119220,"PEAR, DRIED, COOKED, UNSWEETENED","Pear, dried, cooked, unsweetened" +62119230,"PEAR, DRIED, COOKED, W/ SUGAR","Pear, dried, cooked, with sugar" +62120100,"PINEAPPLE, DRIED","Pineapple, dried" +62121100,"PLUM, ROCK SALT, DRIED","Plum, rock salt, dried" +62122100,"PRUNE, DRIED, UNCOOKED","Prune, dried, uncooked" +62122200,"PRUNE, DRIED, COOKED, NS AS TO ADDED SWEETENER","Prune, dried, cooked, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +62122220,"PRUNE, DRIED, COOKED, UNSWEETENED","Prune, dried, cooked, unsweetened" +62122230,"PRUNE, DRIED, COOKED, W/ SUGAR","Prune, dried, cooked, with sugar" +62125100,"RAISINS (INCLUDE CINNAMON-COATED RAISINS)","Raisins" +62125110,"RAISINS, COOKED","Raisins, cooked" +62126000,"TAMARIND PULP, DRIED, SWEETENED (""PULPITAS"")","Tamarind pulp, dried, sweetened (""Pulpitas"")" +63100100,"FRUIT, NS AS TO TYPE","Fruit, NS as to type" +63101000,"APPLE, RAW","Apple, raw" +63101110,"APPLESAUCE, STEWED APPLES, NS AS TO ADDED SWEETENER","Applesauce, stewed apples, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63101120,"APPLESAUCE, STEWED APPLES, UNSWEETENED (INCL FRESH)","Applesauce, stewed apples, unsweetened" +63101130,"APPLESAUCE, STEWED APPLES, W/ SUGAR","Applesauce, stewed apples, with sugar" +63101140,"APPLESAUCE, STEWED APPLES, W/ LOW CALORIE SWEETENER","Applesauce, stewed apples, sweetened with low calorie sweetener" +63101150,"APPLESAUCE / OTHER FRUITS(INCLUDE MOTT'S FRUIT PAK)","Applesauce with other fruits" +63101210,"APPLE, COOKED OR CANNED, W/ SYRUP","Apple, cooked or canned, with syrup" +63101310,"APPLE, BAKED, NS AS TO ADDED SWEETENER","Apple, baked, NS as to added sweetener" +63101320,"APPLE, BAKED, UNSWEETENED","Apple, baked, unsweetened" +63101330,"APPLE, BAKED, W/ SUGAR","Apple, baked, with sugar" +63101410,"APPLE RINGS, FRIED","Apple rings, fried" +63101420,"APPLE, PICKLED (INCLUDE SPICED)","Apple, pickled" +63101500,"APPLE, FRIED","Apple, fried" +63103010,"APRICOT, RAW","Apricot, raw" +63103110,"APRICOT, COOKED OR CANNED, NS AS TO ADDED SWEETENER","Apricot, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63103120,"APRICOT, COOKED OR CANNED, WATER PACK, UNSWEETENED","Apricot, cooked or canned, unsweetened, water pack" +63103130,"APRICOT, COOKED OR CANNED, IN HEAVY SYRUP","Apricot, cooked or canned, in heavy syrup" +63103140,"APRICOT, COOKED OR CANNED, IN LIGHT SYRUP","Apricot, cooked or canned, in light syrup" +63103150,"APRICOT, COOKED OR CANNED, DRAINED SOLIDS","Apricot, cooked or canned, drained solids" +63103170,"APRICOT, COOKED OR CANNED, JUICE PACK","Apricot, cooked or canned, juice pack" +63105010,"AVOCADO, RAW","Avocado, raw" +63107010,"BANANA, RAW","Banana, raw" +63107050,"BANANA, WHITE, RIPE (GUINEO BLANCO MADURO)","Banana, white, ripe (guineo blanco maduro)" +63107070,"BANANA, CHINESE, RAW (INCL CAVENDISH,DWARF,FINGER)","Banana, Chinese, raw" +63107080,"BANANA, RED, RIPE (INCLUDE GUINEO MORADO)","Banana, red, ripe (guineo morado)" +63107090,"BANANA, RED, FRIED","Banana, red, fried" +63107110,"BANANA, BAKED","Banana, baked" +63107210,"BANANA, RIPE, FRIED","Banana, ripe, fried" +63107310,"BANANA, RIPE, BOILED","Banana, ripe, boiled" +63107410,"BANANA, BATTER-DIPPED, FRIED","Banana, batter-dipped, fried" +63109010,"CANTALOUPE (MUSKMELON), RAW (INCLUDE MELON, NFS)","Cantaloupe (muskmelon), raw" +63109610,"CANTALOUPE, FROZEN (BALLS)","Cantaloupe, frozen (balls)" +63109700,"CARAMBOLA (STARFRUIT),RAW","Carambola (starfruit), raw" +63109750,"CARAMBOLA (STARFRUIT), COOKED, W/ SUGAR","Carambola (starfruit), cooked, with sugar" +63110010,"CASSABA MELON, RAW","Cassaba melon, raw" +63111010,"CHERRIES, MARASCHINO","Cherries, maraschino" +63113010,"CHERRIES, SOUR, RED, RAW","Cherries, sour, red, raw" +63113030,"CHERRY PIE FILLING","Cherry pie filling" +63113050,"CHERRY PIE FILLING, LOW CALORIE","Cherry pie filling, low calorie" +63113110,"CHERRIES, SOUR, RED, COOKED, UNSWEETENED","Cherries, sour, red, cooked, unsweetened" +63115010,"CHERRIES, SWEET, RAW (INCLUDE CHERRIES, FRESH, NFS)","Cherries, sweet, raw (Queen Anne, Bing)" +63115110,"CHERRIES, SWEET, COOKED/CANNED,NS AS TO ADDED SWEET","Cherries, sweet, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63115120,"CHERRIES, SWEET, COOKED, UNSWEETENED, WATER PACK","Cherries, sweet, cooked, unsweetened, water pack" +63115130,"CHERRIES, SWEET, COOKED OR CANNED, IN HEAVY SYRUP","Cherries, sweet, cooked or canned, in heavy syrup" +63115140,"CHERRIES, SWEET, COOKED OR CANNED, IN LIGHT SYRUP","Cherries, sweet, cooked or canned, in light syrup" +63115150,"CHERRIES, SWEET, COOKED OR CANNED, DRAINED SOLIDS","Cherries, sweet, cooked or canned, drained solids" +63115170,"CHERRIES, SWEET, COOKED OR CANNED, JUICE PACK","Cherries, sweet, cooked or canned, juice pack" +63115200,"CHERRIES, FROZEN","Cherries, frozen" +63117010,"CURRANTS, RAW","Currants, raw" +63119010,"FIG, RAW","Fig, raw" +63119110,"FIG, COOKED OR CANNED, NS AS TO ADDED SWEETENER","Fig, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63119120,"FIG, COOKED OR CANNED, UNSWEETENED, WATER PACK","Fig, cooked or canned, unsweetened, water pack" +63119130,"FIG, COOKED OR CANNED, IN HEAVY SYRUP","Fig, cooked or canned, in heavy syrup" +63119140,"FIGS, COOKED OR CANNED, IN LIGHT SYRUP","Figs, cooked or canned, in light syrup" +63123000,"GRAPES, RAW, NS AS TO TYPE","Grapes, raw, NS as to type" +63123010,"GRAPES, EUROPEAN TYPE,ADHERENT SKIN,RAW(INCL TOKAY)","Grapes, European type, adherent skin, raw" +63123020,"GRAPES, AMERICAN TYPE, SLIP SKIN, RAW(INCL CONCORD)","Grapes, American type, slip skin, raw" +63123110,"GRAPES, SEEDLESS, COOKED/CANNED, NS ADDED SWEETNER","Grapes, seedless, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63123120,"GRAPES, SEEDLESS, COOKED OR CANNED, UNSWEETENED","Grapes, seedless, cooked or canned, unsweetened, water pack" +63123130,"GRAPES, SEEDLESS, COOKED OR CANNED, IN HEAVY SYRUP","Grapes, seedless, cooked or canned, in heavy syrup" +63125010,"GUAVA, RAW","Guava, raw" +63125100,"GUAVA SHELL (ASSUME CANNED IN HEAVY SYRUP)","Guava shell (assume canned in heavy syrup)" +63126010,"JUNEBERRY, RAW","Juneberry, raw" +63126500,"KIWI FRUIT, RAW","Kiwi fruit, raw" +63126510,"LYCHEE, RAW","Lychee, raw" +63126600,"LYCHEE, COOKED OR CANNED, IN SUGAR OR SYRUP","Lychee, cooked or canned, in sugar or syrup" +63127010,"HONEYDEW MELON, RAW","Honeydew melon, raw" +63127610,"HONEYDEW MELON, FROZEN (BALLS)","Honeydew, frozen (balls)" +63129010,"MANGO, RAW","Mango, raw" +63129020,"MANGO, PICKLED","Mango, pickled" +63129030,"MANGO, COOKED","Mango, cooked" +63131010,"NECTARINE, RAW","Nectarine, raw" +63131110,"NECTARINE, COOKED","Nectarine, cooked" +63133010,"PAPAYA, RAW","Papaya, raw" +63133050,"PAPAYA, GREEN, COOKED","Papaya, green, cooked" +63133100,"PAPAYA, COOKED OR CANNED, IN SUGAR OR SYRUP","Papaya, cooked or canned, in sugar or syrup" +63134010,"PASSION FRUIT, RAW","Passion fruit, raw" +63135010,"PEACH, RAW","Peach, raw" +63135110,"PEACH, COOKED OR CANNED, NS AS TO SWEETENER","Peach, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63135120,"PEACH, COOKED OR CANNED, UNSWEETENED, WATER PACK","Peach, cooked or canned, unsweetened, water pack" +63135130,"PEACH, COOKED OR CANNED, IN HEAVY SYRUP","Peach, cooked or canned, in heavy syrup" +63135140,"PEACH, COOKED OR CANNED, IN LIGHT OR MEDIUM SYRUP","Peach, cooked or canned, in light or medium syrup" +63135150,"PEACH, COOKED OR CANNED, DRAINED SOLIDS","Peach, cooked or canned, drained solids" +63135170,"PEACH, COOKED OR CANNED, JUICE PACK","Peach, cooked or canned, juice pack" +63135610,"PEACH, FROZEN, NS AS TO ADDED SWEETENER","Peach, frozen, NS as to added sweetener" +63135620,"PEACH, FROZEN, UNSWEETENED","Peach, frozen, unsweetened" +63135630,"PEACH, FROZEN, W/ SUGAR","Peach, frozen, with sugar" +63135650,"PEACH, PICKLED","Peach, pickled" +63135660,"PEACH, SPICED","Peach, spiced" +63137010,"PEAR, RAW","Pear, raw" +63137050,"PEAR, JAPANESE, RAW","Pear, Japanese, raw" +63137110,"PEAR, COOKED OR CANNED, NS AS TO ADDED SWEETENER","Pear, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63137120,"PEAR, COOKED OR CANNED, UNSWEETENED, WATER PACK","Pear, cooked or canned, unsweetened, water pack" +63137130,"PEAR, COOKED OR CANNED, IN HEAVY SYRUP","Pear, cooked or canned, in heavy syrup" +63137140,"PEAR, COOKED OR CANNED, IN LIGHT SYRUP","Pear, cooked or canned, in light syrup" +63137150,"PEAR, COOKED OR CANNED, DRAINED SOLIDS","Pear, cooked or canned, drained solids" +63137170,"PEAR, COOKED OR CANNED, JUICE PACK","Pear, cooked or canned, juice pack" +63139010,"PERSIMMONS, RAW","Persimmon, raw" +63141010,"PINEAPPLE, RAW","Pineapple, raw" +63141110,"PINEAPPLE, CANNED, NS AS TO ADDED SWEETENER","Pineapple, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63141120,"PINEAPPLE, COOKED OR CANNED, UNSWEETENED, WATERPACK","Pineapple, cooked or canned, unsweetened, waterpack" +63141130,"PINEAPPLE, COOKED OR CANNED, IN HEAVY SYRUP","Pineapple, cooked or canned, in heavy syrup" +63141140,"PINEAPPLE, COOKED OR CANNED, IN LIGHT SYRUP","Pineapple, cooked or canned, in light syrup" +63141150,"PINEAPPLE, COOKED OR CANNED, DRAINED SOLIDS","Pineapple, cooked or canned, drained solids" +63141170,"PINEAPPLE, COOKED OR CANNED, JUICE PACK","Pineapple, cooked or canned, juice pack" +63143010,"PLUM, RAW","Plum, raw" +63143110,"PLUM, COOKED OR CANNED, NS AS TO ADDED SWEETENER","Plum, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63143120,"PLUM, COOKED OR CANNED, UNSWEETENED, WATER PACK","Plum, cooked or canned, unsweetened, water pack" +63143130,"PLUM, COOKED OR CANNED, IN HEAVY SYRUP","Plum, cooked or canned, in heavy syrup" +63143140,"PLUM, COOKED OR CANNED, IN LIGHT SYRUP","Plum, cooked or canned, in light syrup" +63143150,"PLUM, COOKED OR CANNED, DRAINED SOLIDS","Plum, cooked or canned, drained solids" +63143170,"PLUM, COOKED OR CANNED, JUICE PACK","Plum, cooked or canned, juice pack" +63143650,"PLUM, PICKLED","Plum, pickled" +63145010,"POMEGRANATE, RAW","Pomegranate, raw" +63147010,"RHUBARB, RAW","Rhubarb, raw" +63147110,"RHUBARB, COOKED OR CANNED, NS ADDED SWEETNER","Rhubarb, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63147120,"RHUBARB, COOKED OR CANNED, UNSWEETENED","Rhubarb, cooked or canned, unsweetened" +63147130,"RHUBARB, COOKED OR CANNED, IN HEAVY SYRUP","Rhubarb, cooked or canned, in heavy syrup" +63147140,"RHUBARB, COOKED OR CANNED, IN LIGHT SYRUP","Rhubarb, cooked or canned, in light syrup" +63147150,"RHUBARB, COOKED OR CANNED, DRAINED SOLIDS","Rhubarb, cooked or canned, drained solids" +63147600,"RHUBARB, FROZEN, NS AS TO ADDED SWEETENER","Rhubarb, frozen, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63147620,"RHUBARB, FROZEN, W/ SUGAR","Rhubarb, frozen, with sugar" +63148750,"TAMARIND, RAW","Tamarind, raw" +63149010,"WATERMELON, RAW","Watermelon, raw" +63200100,"BERRIES, RAW, NFS","Berries, raw, NFS" +63200200,"BERRIES, FROZEN, NFS","Berries, frozen, NFS" +63201010,"BLACKBERRIES, RAW","Blackberries, raw" +63201110,"BLACKBERRIES, COOKED OR CANNED, NS ADDED SWEETNER","Blackberries, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63201130,"BLACKBERRIES, COOKED OR CANNED, IN HEAVY SYRUP","Blackberries, cooked or canned, in heavy syrup" +63201600,"BLACKBERRIES, FROZEN","Blackberries, frozen" +63201800,"BLACKBERRIES, FROZEN, SWEETENED, NFS","Blackberries, frozen, sweetened, NS as to type of sweetener" +63203010,"BLUEBERRIES, RAW","Blueberries, raw" +63203110,"BLUEBERRIES, COOKED OR CANNED, NS AS TO SWEETENER","Blueberries, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63203120,"BLUEBERRIES, COOKED OR CANNED, UNSWEET, WATER PACK","Blueberries, cooked or canned, unsweetened, water pack" +63203125,"BLUEBERRIES, COOKED OR CANNED, IN LIGHT SYRUP","Blueberries, cooked or canned, in light syrup" +63203130,"BLUEBERRIES, COOKED OR CANNED, IN HEAVY SYRUP","Blueberries, cooked or canned, in heavy syrup" +63203550,"BLUEBERRIES, FROZEN, SWEETENED","Blueberries, frozen, sweetened" +63203570,"BLUEBERRIES, FROZEN, NS SWEETENED OR UNSWEETENED","Blueberries, frozen, NS as to sweetened or unsweetened" +63203600,"BLUEBERRIES, FROZEN, UNSWEETENED","Blueberries, frozen, unsweetened" +63203700,"BLUEBERRY PIE FILLING","Blueberry pie filling" +63205010,"BOYSENBERRIES, RAW","Boysenberries, raw" +63205600,"BOYSENBERRIES, FROZEN","Boysenberries, frozen" +63207000,"CRANBERRIES, NS AS TO RAW, COOKED OR CANNED","Cranberries, NS as to raw, cooked, or canned" +63207010,"CRANBERRIES, RAW","Cranberries, raw" +63207110,"CRANBERRIES, COOKED OR CANNED (INCL CRANBERRY SCE)","Cranberries, cooked or canned" +63208000,"DEWBERRIES, RAW","Dewberries, raw" +63214000,"HUCKLEBERRIES, RAW","Huckleberries, raw" +63215010,"LOGANBERRIES, RAW","Loganberries, raw" +63215600,"LOGANBERRIES, FROZEN","Loganberries, frozen" +63217010,"MULBERRIES, RAW","Mulberries, raw" +63219000,"RASPBERRIES, RAW, NS AS TO COLOR","Raspberries, raw, NS as to color" +63219010,"RASPBERRIES, BLACK, RAW","Raspberries, black, raw" +63219020,"RASPBERRIES, RED, RAW","Raspberries, red, raw" +63219110,"RASPBERRIES, COOKED OR CANNED, NS ADDED SWEETENER","Raspberries, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63219120,"RASPBERRIES, CKD OR CND, UNSWEETENED, WATER PACK","Raspberries, cooked or canned, unsweetened, water pack" +63219130,"RASPBERRIES, COOKED OR CANNED, IN HEAVY SYRUP","Raspberries, cooked or canned, in heavy syrup" +63219600,"RASPBERRIES, FROZEN, NS AS TO ADDED SWEETNER","Raspberries, frozen, NS as to added sweetener" +63219610,"RASPBERRIES, FROZEN, UNSWEETENED","Raspberries, frozen, unsweetened" +63219620,"RASPBERRIES, FROZEN, W/ SUGAR","Raspberries, frozen, with sugar" +63223020,"STRAWBERRIES, RAW","Strawberries, raw" +63223030,"STRAWBERRIES, RAW, W/ SUGAR","Strawberries, raw, with sugar" +63223110,"STRAWBERRIES, COOKED OR CANNED, NS ADDED SWEETNER","Strawberries, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63223120,"STRAWBERRIES, CKD OR CND, UNSWEETENED, WATER PACK","Strawberries, cooked or canned, unsweetened, water pack" +63223130,"STRAWBERRIES, COOKED OR CANNED, IN SYRUP","Strawberries, cooked or canned, in syrup" +63223600,"STRAWBERRIES, FROZEN, NS AS TO ADDED SWEETNER","Strawberries, frozen, NS as to added sweetener" +63223610,"STRAWBERRIES, FROZEN, UNSWEETENED","Strawberries, frozen, unsweetened" +63223620,"STRAWBERRIES, FROZEN, W/ SUGAR","Strawberries, frozen, with sugar" +63224000,"YOUNGBERRIES, RAW","Youngberries, raw" +63301010,"AMBROSIA","Ambrosia" +63307010,"CRANBERRY-ORANGE RELISH, UNCOOKED","Cranberry-orange relish, uncooked" +63307100,"CRANBERRY-RASPBERRY SAUCE","Cranberry-raspberry Sauce" +63311000,"FRUIT SALAD, FRESH OR RAW, (EXCL CITRUS), NO DRSG","Fruit salad, fresh or raw, (excluding citrus fruits), no dressing" +63311050,"FRUIT SALAD, FRESH OR RAW, (INCL CITRUS), NO DRSG","Fruit salad, fresh or raw, (including citrus fruits), no dressing" +63311080,"FRUIT COCKTAIL OR MIX, FROZEN","Fruit cocktail or mix, frozen" +63311110,"FRUIT COCKTAIL, COOKED OR CANNED, NS ADDED SWEETNER","Fruit cocktail, cooked or canned, NS as to sweetened or unsweetened; sweetened, NS as to type of sweetener" +63311120,"FRUIT COCKTAIL, CKD OR CND, UNSWEET, WATER PACK","Fruit cocktail, cooked or canned, unsweetened, water pack" +63311130,"FRUIT COCKTAIL, COOKED OR CANNED, IN HEAVY SYRUP","Fruit cocktail, cooked or canned, in heavy syrup" +63311140,"FRUIT COCKTAIL, COOKED OR CANNED, IN LIGHT SYRUP","Fruit cocktail, cooked or canned, in light syrup" +63311145,"TROPICAL FRUIT COCKTAIL, COOKED OR CANNED, IN LIGHT SYRUP","Tropical fruit cocktail, cooked or canned, in light syrup" +63311150,"FRUIT COCKTAIL, COOKED OR CANNED, DRAINED SOLIDS","Fruit cocktail, cooked or canned, drained solids" +63311170,"FRUIT COCKTAIL, COOKED OR CANNED, JUICE PACK","Fruit cocktail, cooked or canned, juice pack" +63320100,"FRUIT SALAD, P.R. STYLE (ENSALADA DE FRUTA)","Fruit salad, Puerto Rican style (Mixture includes bananas, papayas, oranges, etc.) (Ensalada de frutas tropicales)" +63401010,"APPLE SALAD W/ DRESSING (INCLUDE WALDORF SALAD)","Apple salad with dressing" +63401015,"APPLE AND GRAPE SALAD W/ YOGURT & WALNUTS","Apple and grape salad with yogurt and walnuts" +63401060,"APPLE, CANDIED (INCLUDE CARAMEL APPLES)","Apple, candied" +63401070,"FRUIT, CHOCOLATE COVERED","Fruit, chocolate covered" +63401990,"BANANA, CHOCOLATE-COVERED, W/ NUTS","Banana, chocolate-covered with nuts" +63402010,"BANANA WHIP","Banana whip" +63402030,"PRUNE WHIP","Prune whip" +63402045,"FRIED DWARF BANANA, PUERTO RICAN STYLE","Fried dwarf banana, Puerto Rican style" +63402050,"FRIED DWARF BANANA W/ CHEESE, PUERTO RICAN STYLE","Fried dwarf banana with cheese, Puerto Rican style" +63402950,"FRUIT SALAD (NO CITRUS) W/ SALAD DRESSING","Fruit salad (excluding citrus fruits) with salad dressing or mayonnaise" +63402960,"FRUIT SALAD (EXCLUDING CITRUS FRUITS) WITH WHIPPED CREAM","Fruit salad (excluding citrus fruits) with whipped cream" +63402970,"FRUIT SALAD (EXCLUDING CITRUS FRUITS) WITH NONDAIRY WHIPPED","Fruit salad (excluding citrus fruits) with nondairy whipped topping" +63402980,"FRUIT SALAD (NO CITRUS) W/ MARSHMALLOWS","Fruit salad (excluding citrus fruits) with marshmallows" +63402990,"FRUIT SALAD (W/ CITRUS) W/ PUDDING","Fruit salad (including citrus fruits) with pudding" +63403000,"FRUIT SALAD (NO CITRUS FRUITS) W/ PUDDING","Fruit salad (excluding citrus fruits) with pudding" +63403010,"FRUIT SALAD (INCL CITRUS FRUITS) W/ SALAD DRESSING","Fruit salad (including citrus fruits) with salad dressing or mayonnaise" +63403020,"FRUIT SALAD (INCLUDING CITRUS FRUIT) WITH WHIPPED CREAM","Fruit salad (including citrus fruit) with whipped cream" +63403030,"FRUIT SALAD (INCLUDING CITRUS FRUITS) WITH NONDAIRY WHIPPED","Fruit salad (including citrus fruits) with nondairy whipped topping" +63403040,"FRUIT SALAD W/ MARSHMALLOWS","Fruit salad (including citrus fruits) with marshmallows" +63403100,"FRUIT DESSERT W/ CREAM & OR PUDDING & NUTS","Fruit dessert with cream and/or pudding and nuts" +63403150,"LIME SOUFFLE (INCLUDE OTHER CITRUS FRUITS)","Lime souffle" +63409010,"GUACAMOLE","Guacamole" +63409020,"CHUTNEY","Chutney" +63411010,"CRANBERRY SALAD, CONGEALED","Cranberry salad, congealed" +63413010,"PINEAPPLE SALAD W/ DRESSING","Pineapple salad with dressing" +63415100,"SOUP, FRUIT","Soup, fruit" +63420100,"FRUIT JUICE BAR, FROZEN, ORANGE FLAVOR","Fruit juice bar, frozen, orange flavor" +63420110,"FRUIT JUICE BAR, FROZEN, FLAVOR OTHER THAN ORANGE","Fruit juice bar, frozen, flavor other than orange" +63420200,"FRUIT JUICE BAR, FROZ, LOW CAL SWEETNER, NOT ORANGE","Fruit juice bar, frozen, sweetened with low calorie sweetener, flavors other than orange" +63430100,"SORBET, FRUIT, NONCITRUS FLAVOR","Sorbet, fruit, noncitrus flavor" +63430110,"SORBET, FRUIT, CITRUS FLAVOR","Sorbet, fruit, citrus flavor" +63430500,"FRUIT JUICE BAR W/ CREAM, FROZEN","Fruit juice bar with cream, frozen" +64100100,"FRUIT JUICE, NFS","Fruit juice, NFS" +64100110,"FRUIT JUICE BLEND, 100% JUICE","Fruit juice blend, 100% juice" +64100200,"FRUIT JUICE BLEND, WITH CRANBERRY, 100% JUICE","Fruit juice blend, with cranberry, 100% juice" +64101010,"APPLE CIDER (INCLUDE CIDER, NFS)","Apple cider" +64104010,"APPLE JUICE","Apple juice" +64104600,"BLACKBERRY JUICE (INCL BOYSENBERRY JUICE)","Blackberry juice" +64105400,"CRANBERRY JUICE, 100%, NOT A BLEND","Cranberry juice, 100%, not a blend" +64116020,"GRAPE JUICE","Grape juice" +64120010,"PAPAYA JUICE","Papaya juice" +64121000,"PASSION FRUIT JUICE","Passion fruit juice" +64124020,"PINEAPPLE JUICE","Pineapple juice" +64126000,"POMEGRANATE JUICE","Pomegranate juice" +64132010,"PRUNE JUICE","Prune juice" +64132500,"STRAWBERRY JUICE","Strawberry juice" +64133100,"WATERMELON JUICE","Watermelon juice" +64134000,"FRUIT SMOOTHIE DRINK, W/ FRUIT OR JUICE ONLY (NO DAIRY)","Fruit smoothie drink, made with fruit or fruit juice only (no dairy products)" +64200100,"FRUIT NECTAR, NFS","Fruit nectar, NFS" +64201010,"APRICOT NECTAR","Apricot nectar" +64201500,"BANANA NECTAR","Banana nectar" +64202010,"CANTALOUPE NECTAR","Cantaloupe nectar" +64203020,"GUAVA NECTAR","Guava nectar" +64204010,"MANGO NECTAR","Mango nectar" +64205010,"PEACH NECTAR","Peach nectar" +64210010,"PAPAYA NECTAR","Papaya nectar" +64213010,"PASSION FRUIT NECTAR","Passion fruit nectar" +64215010,"PEAR NECTAR","Pear nectar" +64221010,"SOURSOP (GUANABANA) NECTAR","Soursop (Guanabana) nectar" +64401000,"VINEGAR","Vinegar" +67100100,"FRUIT, BABY, NFS","Fruit, baby food, NFS" +67100110,"FRUIT BAR, WITH ADDED VITAMIN C, BABY FOOD, TODDLER","Fruit bar, with added vitamin C, baby food, toddler" +67100200,"TROPICAL FRUIT MEDLEY, BABY FOOD, STRAINED","Tropical fruit medley, baby food, strained" +67100300,"APPLES, BABY, TODDLER","Apples, baby food, toddler" +67101000,"APPLE-RASPBERRY, BABY, NS AS TO STRAINED OR JUNIOR","Apple-raspberry, baby food, NS as to strained or junior" +67101010,"APPLE-RASPBERRY, BABY, STRAINED","Apple-raspberry, baby food, strained" +67101020,"APPLE-RASPBERRY, BABY, JUNIOR","Apple-raspberry, baby food, junior" +67102000,"APPLESAUCE, BABY, NS AS TO STRAINED OR JUNIOR","Applesauce, baby food, NS as to strained or junior" +67102010,"APPLESAUCE, BABY, STRAINED","Applesauce, baby food, strained" +67102020,"APPLESAUCE, BABY, JUNIOR","Applesauce, baby food, junior" +67104000,"APPLESAUCE & APRICOTS, BABY, NS AS TO STR OR JR","Applesauce and apricots, baby food, NS as to strained or junior" +67104010,"APPLESAUCE & APRICOTS, BABY, STRAINED","Applesauce and apricots, baby food, strained" +67104020,"APPLESAUCE & APRICOTS, BABY, JUNIOR","Applesauce and apricots, baby food, junior" +67104030,"APPLESAUCE W/ BANANAS, BABY, NS STRAINED/JUNIOR","Applesauce with bananas, baby food, NS as to strained or junior" +67104040,"APPLESAUCE W/ BANANAS, BABY, STRAINED","Applesauce with bananas, baby food, strained" +67104060,"APPLESAUCE W/ BANANAS, BABY, JUNIOR","Applesauce with bananas, baby food, junior" +67104070,"APPLESAUCE W/ CHERRIES, BABY, STRAINED","Applesauce with cherries, baby food, strained" +67104080,"APPLESAUCE W/ CHERRIES, BABY, JUNIOR","Applesauce with cherries, baby food, junior" +67104090,"APPLESAUCE W/ CHERRIES, BABY, NS STRAINED/JUNIOR","Applesauce with cherries, baby food, NS as to strained or junior" +67105030,"BANANAS,BABY FOOD,STRAINED","Bananas, baby food, strained" +67106010,"BANANAS W/ APPLES & PEARS, BABY, STRAINED","Bananas with apples and pears, baby food, strained" +67106050,"BANANA WITH MIXED BERRIES, BABY FOOD, STRAINED","Banana with mixed berries, baby food, strained" +67108000,"PEACHES, BABY, NS AS TO STRAINED OR JUNIOR","Peaches, baby food, NS as to strained or junior" +67108010,"PEACHES, BABY, STRAINED","Peaches, baby food, strained" +67108020,"PEACHES, BABY, JUNIOR","Peaches, baby food, junior" +67108030,"PEACHES, BABY, TODDLER","Peaches, baby food, toddler" +67109000,"PEARS, BABY, NS AS TO STRAINED OR JUNIOR","Pears, baby food, NS as to strained or junior" +67109010,"PEARS, BABY, STRAINED","Pears, baby food, strained" +67109020,"PEARS, BABY, JUNIOR","Pears, baby food, junior" +67109030,"PEARS, BABY, TODDLER","Pears, baby food, toddler" +67110000,"PRUNES, BABY, STRAINED","Prunes, baby food, strained" +67113000,"APPLES & PEARS, BABY, NS AS TO STRAINED OR JUNIOR","Apples and pears, baby food, NS as to strained or junior" +67113010,"APPLES & PEARS, BABY, STRAINED","Apples and pears, baby food, strained" +67113020,"APPLES & PEARS, BABY, JUNIOR","Apples and pears, baby food, junior" +67114000,"PEARS & PINEAPPLE, BABY, NS AS TO STR OR JR","Pears and pineapple, baby food, NS as to strained or junior" +67114010,"PEARS & PINEAPPLE, BABY, STRAINED","Pears and pineapple, baby food, strained" +67114020,"PEARS & PINEAPPLE, BABY, JUNIOR","Pears and pineapple, baby food, junior" +67202000,"APPLE JUICE, BABY","Apple juice, baby food" +67202010,"APPLE JUICE, W/ CALCIUM, BABY","Apple juice, with added calcium, baby food" +67203000,"APPLE W/ OTHER FRUIT JUICE, BABY","Apple with other fruit juice, baby food" +67203200,"APPLE-BANANA JUICE, BABY","Apple-banana juice, baby food" +67203400,"APPLE-CHERRY JUICE, BABY","Apple-cherry juice, baby food" +67203450,"APPLE-CRANBERRY JUICE, BABY","Apple-cranberry juice, baby food" +67203500,"APPLE-GRAPE JUICE, BABY","Apple-grape juice, baby food" +67203600,"APPLE-PEACH JUICE, BABY","Apple-peach juice, baby food" +67203700,"APPLE-PRUNE JUICE, BABY","Apple-prune juice, baby food" +67203800,"GRAPE JUICE, BABY","Grape juice, baby food" +67204000,"MIXED FRUIT JUICE, NOT CITRUS, BABY","Mixed fruit juice, not citrus, baby food" +67204100,"MIXED FRUIT JUICE, NOT CITRUS, W/ CALCIUM, BABY","Mixed fruit juice, not citrus, with added calcium, baby food" +67205000,"ORANGE JUICE, BABY","Orange juice, baby food" +67211000,"ORANGE-APPLE-BANANA JUICE, BABY","Orange-apple-banana juice, baby food" +67212000,"PEAR JUICE, BABY FOOD","Pear juice, baby food" +67230000,"APPLE-SWEET POTATO-JUICE,BABY FOOD","Apple-sweet potato juice, baby food" +67230500,"ORANGE-CARROT JUICE, BABY FOOD","Orange-carrot juice, baby food" +67250100,"BANANA JUICE W/ LOWFAT YOGURT, BABY FOOD","Banana juice with lowfat yogurt, baby food" +67250150,"MIXED FRUIT JUICE W/ LOWFAT YOGURT, BABY FOOD","Mixed fruit juice with lowfat yogurt, baby food" +67260000,"FRUIT JUICE DRINK, BABY, W/ HI VIT C + CA, B VITS","Fruit juice drink, baby, with high vitamin C plus added calcium and B vitamins" +67304000,"PLUMS, BABY, NS AS TO STRAINED OR JUNIOR","Plums, baby food, NS as to strained or junior" +67304010,"PLUMS, BABY, STRAINED","Plums, baby food, strained" +67304020,"PLUMS, BABY, JUNIOR","Plums, baby food, junior" +67304030,"PLUMS, BANANAS & RICE, BABY, STRAINED","Plums, bananas, and rice, baby food strained" +67304500,"PRUNES W/ OATMEAL, BABY, STRAINED","Prunes with oatmeal, baby food, strained" +67307000,"APRICOTS, BABY, NS AS TO STR OR JR","Apricots, baby food, NS as to strained or junior" +67307010,"APRICOTS, BABY, STRAINED","Apricots, baby food, strained" +67307020,"APRICOTS, BABY, JUNIOR","Apricots, baby food, junior" +67308000,"BANANAS, BABY, NS AS TO STR OR JR","Bananas, baby food, NS as to strained or junior" +67308020,"BANANAS, BABY, JUNIOR","Bananas, baby food, junior" +67309000,"BANANAS & PINEAPPLE, BABY,NS AS TO STR/JR","Bananas and pineapple, baby food, NS as to strained or junior" +67309010,"BANANAS & PINEAPPLE, BABY, STRAINED","Bananas and pineapple, baby food, strained" +67309020,"BANANAS & PINEAPPLE, BABY, JUNIOR","Bananas and pineapple, baby food, junior" +67309030,"BANANAS AND STRAWBERRY, BABY FOOD, JUNIOR","Bananas and strawberry, baby food, junior" +67404000,"FRUIT DESSERT, BABY, NS AS TO STR OR JR","Fruit dessert, baby food, NS as to strained or junior" +67404010,"FRUIT DESSERT, BABY, STRAINED","Fruit dessert, baby food, strained" +67404020,"FRUIT DESSERT, BABY, JUNIOR","Fruit dessert, baby food, junior" +67404050,"FRUIT SUPREME DESSERT, BABY, ALL FLAVORS","Fruit Supreme dessert, baby food" +67404070,"APPLE YOGURT DESSERT, BABY, STRAINED","Apple yogurt dessert, baby food, strained" +67404110,"BANANA APPLE DESSERT, BABY FOOD, STRAINED","Banana apple dessert, baby food, strained" +67404300,"BLUEBERRY YOGURT DESSERT, BABY, STRAINED","Blueberry yogurt dessert, baby food, strained" +67404500,"MIXED FRUIT YOGURT DESSERT, BABY, STRAINED","Mixed fruit yogurt dessert, baby food, strained" +67404550,"CHERRY COBBLER, BABY, JUNIOR","Cherry cobbler, baby food, junior" +67405000,"PEACH COBBLER, BABY, NS AS TO STRAINED OR JUNIOR","Peach cobbler, baby food, NS as to strained or junior" +67405010,"PEACH COBBLER, BABY, STRAINED","Peach cobbler, baby food, strained" +67405020,"PEACH COBBLER, BABY, JUNIOR","Peach cobbler, baby food, junior" +67408010,"BANANA PUDDING, BABY, STRAINED","Banana pudding, baby food, strained" +67408500,"BANANA YOGURT DESSERT, BABY, STRAINED","Banana yogurt dessert, baby food, strained" +67410000,"CHERRY VANILLA PUDDING, BABY","Cherry vanilla pudding, baby food, strained" +67412000,"DUTCH APPLE DESSERT, BABY, NS AS TO STR OR JR","Dutch apple dessert, baby food, NS as to strained or junior" +67412010,"DUTCH APPLE DESSERT, BABY, STRAINED","Dutch apple dessert, baby food, strained" +67412020,"DUTCH APPLE DESSERT, BABY, JUNIOR","Dutch apple dessert, baby food, junior" +67413700,"PEACH YOGURT DESSERT, BABY, STRAINED","Peach yogurt dessert, baby food, strained" +67414010,"PINEAPPLE DESSERT, BABY, STRAINED","Pineapple dessert, baby food, strained" +67414100,"MANGO DESSERT, BABY","Mango dessert, baby food" +67415000,"TUTTI-FRUITTI PUDDING, BABY, NS AS TO STR OR JR","Tutti-fruitti pudding, baby food, NS as to strained or junior" +67415010,"TUTTI-FRUTTI PUDDING, BABY, STRAINED","Tutti-fruitti pudding, baby food, strained" +67415020,"TUTTI-FRUITTI PUDDING, BABY, JUNIOR","Tutti-fruitti pudding, baby food, junior" +67430000,"FRUIT FLAVORED SNACK, BABY FOOD","Fruit flavored snack, baby food" +67430500,"YOGURT AND FRUIT SNACK, BABY FOOD","Yogurt and fruit snack, baby food" +67501000,"APPLES & CHICKEN, BABY FOOD, STRAINED","Apples and chicken, baby food, strained" +67501100,"APPLES W/ HAM, BABY, STRAINED","Apples with ham, baby food, strained" +67600100,"APPLES & SWEET POTATOES, BABY, STRAINED","Apples and sweet potatoes, baby food, strained" +71000100,"WHITE POTATO, NFS","White potato, NFS" +71001000,"WHITE POTATO, RAW, W/ OR W/O PEEL","White potato, raw, with or without peel (assume peel not eaten)" +71050000,"WHITE POTATO, DRY, POWDERED, NOT RECONSTITUTED","White potato, dry, powdered, not reconstituted" +71101000,"WHITE POTATO, BAKED, PEEL NOT EATEN","White potato, baked, peel not eaten" +71101100,"WHITE POT,BAKED,PEEL EATEN,NS TO FAT ADDED IN COOK","White potato, baked, peel eaten, NS as to fat added in cooking" +71101110,"WHITE POT,BAKED,PEEL EATEN,FAT NOT ADDED IN COOKING","White potato, baked, peel eaten, fat not added in cooking" +71101120,"WHITE POT, BAKED,PEEL EATEN, FAT ADDED IN COOKING","White potato, baked, peel eaten, fat added in cooking" +71101150,"WHITE POTATO SKINS, W/ ADHERING FLESH, BAKED","White potato skins, with adhering flesh, baked" +71103000,"WHITE POTATO, BOILED, W/O PEEL, NS AS TO FAT","White potato, boiled, without peel, NS as to fat added in cooking" +71103010,"WHITE POTATO, BOILED, W/O PEEL, FAT NOT ADDED","White potato, boiled, without peel, fat not added in cooking" +71103020,"WHITE POTATO, BOILED, W/O PEEL, FAT ADDED","White potato, boiled, without peel, fat added in cooking" +71103100,"WHITE POTATO, BOILED W/ PEEL, PEEL NOT EATEN, NS AS TO FAT","White potato, boiled with peel, peel not eaten, NS as to fat added in cooking" +71103110,"WHITE POTATO, BOILED W/PEEL, PEEL NOT EATEN, FAT NOT ADDED","White potato, boiled with peel, peel not eaten, fat not added in cooking" +71103120,"WHITE POTATO, BOILED W/ PEEL, PEEL NOT EATEN, FAT ADDED","White potato, boiled with peel, peel not eaten, fat added in cooking" +71103200,"WHITE POTATO, CANNED, LOW SODIUM,NS AS TO ADDED FAT","White potato, boiled, without peel, canned, low sodium, NS as to fat added in cooking" +71103210,"WHITE POTATO, CANNED, LOW SODIUM, NO FAT ADDED","White potato, boiled, without peel, canned, low sodium, fat not added in cooking" +71103220,"WHITE POTATO, CANNED, LOW SODIUM, FAT ADDED","White potato, boiled, without peel, canned, low sodium, fat added in cooking" +71104000,"WHITE POTATO, ROASTED, NS FAT ADDED","White potato, roasted, NS as to fat added in cooking" +71104010,"WHITE POTATO, ROASTED, FAT NOT ADDED","White potato, roasted, fat not added in cooking" +71104020,"WHITE POTATO, ROASTED, FAT ADDED","White potato, roasted, fat added in cooking" +71106000,"STEWED POTATOES, P.R. (PAPAS GUISADAS)","Stewed potatoes, Puerto Rican style (Papas guisadas)" +71106010,"POTATO ONLY FROM P.R. MIXED DISHES","Potato only from Puerto Rican mixed dishes, gravy and other components reported separately" +71106020,"POTATO FROM PUERTO RICAN STYLE,POT ROAST, W/ GRAVY","Potato from Puerto Rican style stuffed pot roast, with gravy" +71106050,"POTATO FROM PUERTO RICAN BEEF STEW, W/ GRAVY","Potato from Puerto Rican beef stew, with gravy" +71106070,"POTATO FROM PUERTO RICAN CHICKEN FRICASSEE, W/ SCE","Potato from Puerto Rican chicken fricassee, with sauce" +71201015,"WHITE POTATO CHIPS, REGULAR CUT","White potato chips, regular cut" +71201020,"WHITE POTATO CHIPS, RUFFLED/RIPPLED/CRINKLE CUT","White potato chips, ruffled, rippled, or crinkle cut" +71201050,"WHITE POTATO, CHIPS, REDUCED FAT","White potato, chips, reduced fat" +71201080,"WHITE POTATO, CHIPS, FAT FREE","White potato, chips, fat free" +71201090,"WHITE POTATO, CHIPS, FAT FREE, W/ OLEAN","White potato, chips, fat free, made with Olean" +71201100,"WHITE POTATO, CHIPS, RESTRUCTURED","White potato, chips, restructured" +71201200,"WHITE POTATO, CHIPS, RESTRUCTURED, RED FAT/SODIUM","White potato, chips, restructured, reduced fat and reduced sodium" +71201210,"WHITE POTATO, CHIPS, RESTRUCTURED, FAT FREE, W/ OLEAN","White potato, chips, restructured, fat free, made with Olean" +71201250,"WHITE POTATO, CHIPS, RESTRUCTURED, BAKED","White potato, chips, restructured, baked" +71201300,"POTATO-BASED SNACKS, REDUCED FAT, LOW SODIUM","Potato based snacks, reduced fat, low sodium, all flavors" +71202000,"WHITE POTATO, CHIPS, UNSALTED","White potato, chips, unsalted" +71202100,"WHITE POTATO, CHIPS, UNSALTED, REDUCED FAT","White potato, chips, unsalted, reduced fat" +71202500,"WHITE POTATO CHIPS, LIGHTLY SALTED","White potato chips, lightly salted" +71204000,"POTATO PUFFS, CHEESE-FILLED","Potato puffs, cheese-filled" +71205000,"WHITE POTATO, STICKS","White potato, sticks" +71211000,"WHITE POTATO SKINS, CHIPS","White potato skins, chips" +71220000,"VEGETABLE CHIPS","Vegetable chips" +71301000,"WHITE POTATO, COOKED, W/ SAUCE, NS AS TO SAUCE","White potato, cooked, with sauce, NS as to sauce" +71301020,"WHITE POTATO, COOKED, WITH CHEESE","White potato, cooked, with cheese" +71301120,"WHITE POTATO, COOKED, WITH HAM AND CHEESE","White potato, cooked, with ham and cheese" +71305010,"WHITE POTATO, SCALLOPED","White potato, scalloped" +71305110,"WHITE POTATO, SCALLOPED, W/ HAM","White potato, scalloped, with ham" +71401000,"WHITE POTATO, FRENCH FRIES, NS AS TO FROM FRESH/FRZ","White potato, french fries, NS as to from fresh or frozen" +71401010,"WHITE POTATO, FRENCH FRIES, FROM FRESH, DEEP-FRIED","White potato, french fries, from fresh, deep fried" +71401015,"WHITE POTATO, FRENCH FRIES, FROM FRESH, OVEN BAKED","White potato, french fries, from fresh, oven baked" +71401020,"WHITE POTATO, FRENCH FRIES, FROM FROZEN, OVEN-BAKED","White potato, french fries, from frozen, oven baked" +71401030,"WHITE POTATO, FRENCH FRIES, FRM FRZ, DEEP FRD, FF/REST","White potato, french fries, from frozen, deep fried, from fast food / restaurant" +71401035,"WHITE POTATO, FRENCH FRIES, FR FRZN, NS AS TO FRIED OR BKD","White potato, french fries, from frozen, NS as to deep fried or oven baked" +71402500,"WHITE POTATO, FRENCH FRIES, W/ CHEESE","White potato, french fries, with cheese" +71402505,"WHITE POTATO, FRENCH FRIES, W/ CHEESE AND BACON","White potato, french fries, with cheese and bacon" +71402510,"WHITE POTATO, FRENCH FRIES, W/ CHILI & CHEESE","White potato, french fries, with chili and cheese" +71402520,"WHITE POTATO, FRENCH FRIES, W/ CHILI CON CARNE","White potato, french fries, with chili con carne" +71403000,"WHITE POTATO, HOME FRIES","White potato, home fries" +71403500,"WHITE POTATO, HOME FRIES, W/ GREEN/RED PEPPERS & ONIONS","White potato, home fries, with green or red peppers and onions" +71405000,"WHITE POTATO, HASH BROWN","White potato, hash brown, NS as to from fresh, frozen, or dry mix" +71405010,"WHITE POTATO, HASH BROWN, FROM FRESH","White potato, hash brown, from fresh" +71405020,"WHITE POTATO, HASH BROWN, FROM FROZEN","White potato, hash brown, from frozen" +71405030,"WHITE POTATO, HASH BROWN, FROM DRY MIX","White potato, hash brown, from dry mix" +71405100,"WHITE POTATO, HASH BROWN W/ CHEESE","White potato, hash brown, with cheese" +71410000,"WHITE POTATO SKINS, W/ ADHERING FLESH, FRIED","White potato skins, with adhering flesh, fried" +71410500,"WHITE POTATO SKINS W/ FLESH, FRIED, W/ CHEESE","White potato skins, with adhering flesh, fried, with cheese" +71411000,"POTATO SKINS W/ ADHERING FLESH, W/ CHEESE & BACON","White potato skins, with adhering flesh, fried, with cheese and bacon" +71501000,"WHITE POTATO, MASHED, NFS","White potato, mashed, NFS" +71501010,"WHITE POTATO, FRESH, MASHED, MADE W/ MILK","White potato, from fresh, mashed, made with milk" +71501015,"WHITE POTATO, FRESH, MASHED, MADE W/ MILK,/ SOUR CRM/CHEZ","White potato, from fresh, mashed, made with milk, and sour cream and/or cream cheese" +71501020,"WHITE POTATO, FRESH, MASHED, MADE W/ MILK & FAT","White potato, from fresh, mashed, made with milk and fat" +71501025,"WHITE POTATO, FRESH, MASHED, MADE W/ MILK/SOUR CRM/CHEZ &FAT","White potato, from fresh, mashed, made with milk, and sour cream and/or cream cheese and fat" +71501030,"WHITE POTATO, FRESH, MASHED, MADE W/ FAT","White potato, from fresh, mashed, made with fat" +71501040,"WHITE POTATO, DRY, MASHED, MADE W/ MILK & FAT","White potato, from dry, mashed, made with milk and fat" +71501050,"WHITE POTATO, FRESH, MASHED, MADE W/ MILK, FAT & CHEESE","White potato, from fresh, mashed, made with milk, fat and cheese" +71501055,"WHITE POTATO, FRESH, MASHED, MADE W/ SOUR CRM/CHEZ & FAT","White potato, from fresh, mashed, made with sour cream and/or cream cheese and fat" +71501060,"WHITE POTATO, DRY, MASHED, MADE W/ MILK, FAT & EGG","White potato, from dry, mashed, made with milk, fat and egg" +71501070,"WHITE POTATO, DRY, MASHED, MADE W/ MILK, FAT, EGG & CHEESE","White potato, from dry, mashed, made with milk, fat, egg and cheese" +71501080,"WHITE POTATO, FRESH, MASHED, NOT MADE W/ MILK OR FAT","White potato, from fresh, mashed, not made with milk or fat" +71501090,"WHITE POTATO, DRY, MASHED, MADE W/ MILK, NO FAT","White potato, from dry, mashed, made with milk, no fat" +71501200,"WHITE POTATO, COMPLETE DRY MIX, MASHED, MADE W/ WATER","White potato, from complete dry mix, mashed, made with water" +71501300,"WHITE POTATO, DRY, MASHED, NS AS TO MILK OR FAT","White potato, from dry, mashed, NS as to milk or fat" +71501310,"WHITE POTATO, FRESH, MASHED, NS AS TO MILK OR FAT","White potato, from fresh, mashed, NS as to milk or fat" +71503010,"WHITE POTATO, PATTY (INCLUDE POTATO CROQUETTES)","White potato, patty" +71505000,"WHITE POTATO, PUFFS","White potato, puffs" +71507000,"WHITE POTATO, BAKED, STUFFED, PEEL NOT EATEN, NS TOPPING","White potato, stuffed, baked, peel not eaten, NS as to topping" +71507005,"WHITE POTATO,BAKED,STUFF W/ BUTTER/MARG, NO PEEL","White potato, stuffed, baked, peel not eaten, stuffed with butter or margarine" +71507010,"WHITE POTATO, BAKED, STUFFED W/SOUR CREAM, NO PEEL","White potato, stuffed, baked, peel not eaten, stuffed with sour cream" +71507020,"WHITE POT,BAKED, STUFFED W/ CHEESE, PEEL NOT EATEN","White potato, stuffed, baked, peel not eaten, stuffed with cheese" +71507030,"WHITE POT, BAKED, STUFFED W/ CHILI, PEEL NOT EATEN","White potato, stuffed, baked, peel not eaten, stuffed with chili" +71507040,"WHITE POT, BAKED, STUFFED W/BROC&CHEESE SCE,NO PEEL","White potato, stuffed, baked, peel not eaten, stuffed with broccoli and cheese sauce" +71507050,"WHITE POT, BAKED, STUFFD W/ MEAT IN CRM SC, NO PEEL","White potato, stuffed, baked, peel not eaten, stuffed with meat in cream sauce" +71507100,"WHITE POT,BAKD,STUF W/CHIC,BROC,CHEESE,PEEL NOT EAT","White potato, stuffed, baked, peel not eaten, stuffed with chicken, broccoli and cheese sauce" +71508000,"WHITE POTATO, BAKED, STUFFED, PEEL EATEN","White potato, stuffed, baked, peel eaten, NS as to topping" +71508005,"WHITE POTATO,BAKED,STUFF W/ BUTTER/MARG, PEEL EATEN","White potato, stuffed, baked, peel eaten, stuffed with butter or margarine" +71508010,"WHITE POTATO, BAKED, STUFFED W/SOUR CRM, PEEL EATEN","White potato, stuffed, baked, peel eaten, stuffed with sour cream" +71508020,"WHITE POTATO, BAKED, STUFFED W/ CHEESE, PEEL EATEN","White potato, stuffed, baked, peel eaten, stuffed with cheese" +71508030,"WHITE POTATO, BAKED, STUFFED W/ CHILI, PEEL EATEN","White potato, stuffed, baked, peel eaten, stuffed with chili" +71508040,"WHITE POT, BKD, STUFD W/BROC&CHEESE SCE,PEEL EATEN","White potato, stuffed, baked, peel eaten, stuffed with broccoli and cheese sauce" +71508050,"WHITE POT,BAKED, STUFFED W/MEAT&CRM SCE,PEEL EATEN","White potato, stuffed, baked, peel eaten, stuffed with meat in cream sauce" +71508060,"WHITE POT, BAKED, STUFD W/ BACON&CHEESE, PEEL EATEN","White potato, stuffed, baked, peel eaten, stuffed with bacon and cheese" +71508070,"WHITE POT,STUFF,BAKED,NO PEEL,W/ BACON & CHEESE","White potato, stuffed, baked, peel not eaten, stuffed with bacon and cheese" +71508100,"WHITE POT,BAKED,STUFF W/CHIC,BROC,CHEESE,PEEL EATEN","White potato, stuffed, baked, peel eaten, stuffed with chicken, broccoli and cheese sauce" +71508120,"WHITE POT,STUFF W/HAM,BROC,&CHEESE SAUCE,BKD,W/PEEL","White potato, stuffed with ham, broccoli and cheese sauce, baked, peel eaten" +71601010,"POTATO SALAD WITH EGG, W/ MAYO","Potato salad with egg, made with mayonnaise" +71601015,"POTATO SALAD W/ EGG, MADE W/ LT MAYO","Potato salad with egg, made with light mayonnaise" +71601020,"POTATO SALAD W/ EGG, MADE W/ MAYO-TYPE DRSG","Potato salad with egg, made with mayonnaise-type salad dressing" +71601025,"POTATO SALAD W/ EGG, MADE W/ LT MAYO-TYPE DRSG","Potato salad with egg, made with light mayonnaise-type salad dressing" +71601030,"POTATO SALAD W/ EGG, MADE W/ CREAMY DRSG","Potato salad with egg, made with creamy dressing" +71601035,"POTATO SALAD W/EGG, MADE W/ LT CREAMY DRSG","Potato salad with egg, made with light creamy dressing" +71601040,"POTATO SALAD W/ EGG, MADE W/ ITALIAN DRSG","Potato salad with egg, made with Italian dressing" +71601045,"POTATO SALAD W/ EGG, MADE W/ LT ITALIAN DRSG","Potato salad with egg, made with light Italian dressing" +71601050,"POTATO SALAD W/ EGG, MADE W/ ANY TYPE OF FAT FREE DRSG","Potato salad with egg, made with any type of fat free dressing" +71602010,"POTATO SALAD, GERMAN","Potato salad, German style" +71603010,"POTATO SALAD, MADE WITH MAYONNAISE","Potato salad, made with mayonnaise" +71603015,"POTATO SALAD, W/ LT MAYO","Potato salad, made with light mayonnaise" +71603020,"POTATO SALAD, W/ MAYO-TYPE DRSG","Potato salad, made with mayonnaise-type salad dressing" +71603025,"POTATO SALAD, W/ LT MAYO-TYPE DRSG","Potato salad, made with light mayonnaise-type salad dressing" +71603030,"POTATO SALAD, W/ CREAMY DRSG","Potato salad, made with creamy dressing" +71603035,"POTATO SALAD, W/ LT CREAMY DRSG","Potato salad, made with light creamy dressing" +71603040,"POTATO SALAD, W/ ITALIAN DRESSING","Potato salad, made with Italian dressing" +71603045,"POTATO SALAD, W/ LT ITALIAN DRSG","Potato salad, made with light Italian dressing" +71603050,"POTATO SALAD, W/ ANY TYPE OF FAT FREE DRSG","Potato salad, made with any type of fat free dressing" +71701000,"POTATO PANCAKE","Potato pancake" +71701500,"NORWEGIAN LEFSE, POTATO & FLOUR PANCAKE","Norwegian Lefse, potato and flour pancake" +71702000,"POTATO PUDDING","Potato pudding" +71703000,"STEWED POTATOES, MEXICAN (PAPAS GUISADAS)","Stewed potatoes, Mexican style (Papas guisadas)" +71703040,"STEWED POT W/TOM,MEXICAN(PAPAS GUISADAS CON TOMATE)","Stewed potatoes with tomatoes, Mexican style (Papas guisadas con tomate)" +71704000,"STEWED POTATOES WITH TOMATOES","Stewed potatoes with tomatoes" +71801000,"POTATO SOUP, NS AS TO MADE W/MILK OR WATER","Potato soup, NS as to made with milk or water" +71801010,"POTATO SOUP, CREAM OF, W/ MILK","Potato soup, cream of, prepared with milk" +71801020,"POTATO SOUP, PREPARED W/ WATER","Potato soup, prepared with water" +71801100,"POTATO & CHEESE SOUP","Potato and cheese soup" +71803010,"POTATO CHOWDER (INCL CORN CHOWDER)","Potato chowder" +71851010,"PLANTAIN SOUP, P.R. (SOPA DE PLATANO)","Plantain soup, Puerto Rican style (Sopa de platano)" +71900100,"PLANTAIN, BOILED, NS AS TO GREEN OR RIPE","Plantain, boiled, NS as to green or ripe" +71900200,"PLANTAIN, FRIED, NS TO GREEN OR RIPE","Plantain, fried, NS as to green or ripe" +71901010,"GREEN PLANTAIN, BOILED OR BAKED","Green plantains, boiled" +71901110,"FRIED GREEN PLANTAIN, P.R.","Fried green plantain, Puerto Rican style" +71905000,"RIPE PLANTAIN, RAW","Ripe plantain, raw" +71905010,"RIPE PLANTAIN, BOILED (INCL BAKED RIPE PLANTAIN)","Ripe plantain, boiled" +71905110,"FRIED RIPE PLANTAIN, P.R. (PLATANO MADURO FRITO)","Fried ripe plantain, Puerto Rican style (Platano maduro frito)" +71905120,"PLANTAIN, RIPE, ROLLED IN FLOUR, FRIED","Plantain, ripe, rolled in flour, fried" +71905210,"CANDIED RIPE PLANTAIN, P.R. (PLATANO EN ALMIBAR)","Candied ripe plantain, Puerto Rican style (Platano en almibar)" +71905410,"PLANTAIN CHIPS","Plantain chips" +71910110,"GREEN BANANA (COOKED IN SALT WATER)","Green banana, cooked (in salt water)" +71910210,"GREEN BANANA, FRIED","Green banana, fried" +71910310,"PICKLED GREEN BANANA, P.R","Pickled green bananas, Puerto Rican style (Guineos verdes en escabeche)" +71930090,"CASSAVA (YUCA BLANCA), COOKED, NS AS TO ADDED FAT","Cassava (yuca blanca), cooked, NS as to fat added in cooking" +71930100,"CASSAVA (YUCA BLANCA), COOKED, NO FAT ADDED","Cassava (yuca blanca), cooked, fat not added in cooking" +71930120,"CASSAVA (YUCA BLANCA), COOKED, FAT ADDED","Cassava (yuca blanca), cooked, fat added in cooking" +71930200,"CASABE, CASSAVA BREAD","Casabe, cassava bread" +71931010,"CASSAVA W/ CREOLE SAUCE, P.R. (YUCA AL MAJO)","Cassava with creole sauce, Puerto Rican style (Yuca al mojo)" +71941110,"SWEET POTATOES, WHITE, P.R., FRIED","Sweet potatoes, white, Puerto Rican, fried" +71941120,"SWEET POTATOES, WHITE, P.R., BOILED","Sweet potatoes, white, Puerto Rican, boiled" +71941130,"SWEET POTATOES, WHITE, P.R., ROASTED OR BAKED","Sweet potatoes, white, Puerto Rican, roasted or baked" +71945010,"YAM, PUERTO RICAN, COOKED (NAME HERVIDO)","Yam, Puerto Rican, cooked (Name hervido)" +71945020,"YAM BUNS, P.R. (BUNUELOS DE NAME)","Yam buns, Puerto Rican style (Bunuelos de name)" +71950010,"TANNIER, COOKED (INCLUDE YAUTIA)","Tannier, cooked" +71961010,"CELERIAC, COOKED (INCLUDE P.R. APIO)","Celeriac, cooked" +71962010,"DASHEEN, BOILED (INCLUDE MALANGA)","Dasheen, boiled" +71962020,"DASHEEN, FRIED (INCLUDE MALANGA)","Dasheen, fried" +71962040,"TARO, BAKED","Taro, baked" +71970110,"STARCHY VEGETABLES, P.R. STYLE, NFS (VIANDAS HERVIDAS)","Starchy vegetables, Puerto Rican style, NFS (viandas hervidas)" +71970120,"STARCHY VEGETABLES, P.R., NO PLANTAINS","Starchy vegetables, Puerto Rican style, including tannier, white sweet potato and yam, with green or ripe plantains (viandas hervidas)" +71970130,"STARCHY VEGETABLES, P.R., NO PLANTAINS","Starchy vegetables, Puerto Rican style, including tannier, white sweet potato and yam, no plantain (viandas hervidas)" +71970200,"FUFU (AFRICAN)","Fufu (African)" +71980100,"POI","Poi" +71980200,"TARO CHIPS","Taro chips" +72101100,"BEET GREENS, RAW","Beet greens, raw" +72101200,"BEET GREENS, COOKED, NS AS TO ADDED FAT","Beet greens, cooked, NS as to fat added in cooking" +72101210,"BEET GREENS, COOKED, FAT NOT ADDED","Beet greens, cooked, fat not added in cooking" +72101220,"BEET GREENS, COOKED, FAT ADDED","Beet greens, cooked, fat added in cooking" +72103000,"BROCCOLI RAAB, RAW","Broccoli raab, raw" +72103010,"BROCCOLI RAAB, COOKED, NS AS TO FAT ADDED","Broccoli raab, cooked, NS as to fat added in cooking" +72103020,"BROCCOLI RAAB, COOKED, FAT NOT ADDED","Broccoli raab, cooked, fat not added in cooking" +72103030,"BROCCOLI RAAB, COOKED, FAT ADDED IN COOKING","Broccoli raab, cooked, fat added in cooking" +72104100,"CHARD, RAW","Chard, raw" +72104200,"CHARD, COOKED, NS AS TO ADDED FAT","Chard, cooked, NS as to fat added in cooking" +72104210,"CHARD, COOKED, FAT NOT ADDED","Chard, cooked, fat not added in cooking" +72104220,"CHARD, COOKED, FAT ADDED","Chard, cooked, fat added in cooking" +72107100,"COLLARDS, RAW","Collards, raw" +72107200,"COLLARDS, COOKED, NS AS TO FORM, NS AS TO ADDED FAT","Collards, cooked, NS as to form, NS as to fat added in cooking" +72107201,"COLLARDS,COOKED,FROM FRESH,NS FAT ADDED","Collards, cooked, from fresh, NS as to fat added in cooking" +72107202,"COLLARDS,COOKED,FROM FROZEN,NS FAT ADDED","Collards, cooked, from frozen, NS as to fat added in cooking" +72107203,"COLLARDS,COOKED,FROM CANNED,NS FAT ADDED","Collards, cooked, from canned, NS as to fat added in cooking" +72107210,"COLLARDS, COOKED, NS AS TO FORM, FAT NOT ADDED","Collards, cooked, NS as to form, fat not added in cooking" +72107211,"COLLARDS,COOKED,FROM FRESH,FAT NOT ADDED","Collards, cooked, from fresh, fat not added in cooking" +72107212,"COLLARDS,COOKED,FROM FROZEN,FAT NOT ADDED","Collards, cooked, from frozen, fat not added in cooking" +72107213,"COLLARDS,COOKED,FROM CANNED,FAT NOT ADDED","Collards, cooked, from canned, fat not added in cooking" +72107220,"COLLARDS, COOKED, NS AS TO FORM, FAT ADDED","Collards, cooked, NS as to form, fat added in cooking" +72107221,"COLLARDS,COOKED,FROM FRESH,FAT ADDED","Collards, cooked, from fresh, fat added in cooking" +72107222,"COLLARDS,COOKED,FROM FROZEN,FAT ADDED","Collards, cooked, from frozen, fat added in cooking" +72107223,"COLLARDS,COOKED,FROM CANNED,FAT ADDED","Collards, cooked, from canned, fat added in cooking" +72110100,"CRESS, RAW","Cress, raw" +72110200,"CRESS, COOKED, NS AS TO FORM, NS AS TO ADDED FAT","Cress, cooked, NS as to form, NS as to fat added in cooking" +72110201,"CRESS, COOKED, FROM FRESH, NS FAT ADDED","Cress, cooked, from fresh, NS as to fat added in cooking" +72110203,"CRESS, COOKED, FROM CANNED, NS FAT ADDED","Cress, cooked, from canned, NS as to fat added in cooking" +72110210,"CRESS, COOKED, NS AS TO FORM, FAT NOT ADDED","Cress, cooked, NS as to form, fat not added in cooking" +72110211,"CRESS, COOKED, FROM FRESH, FAT NOT ADDED","Cress, cooked, from fresh, fat not added in cooking" +72110213,"CRESS, COOKED, FROM CANNED, FAT NOT ADDED","Cress, cooked, from canned, fat not added in cooking" +72110220,"CRESS, COOKED, NS AS TO FORM, FAT ADDED","Cress, cooked, NS as to form, fat added in cooking" +72110221,"CRESS, COOKED, FROM FRESH, FAT ADDED","Cress, cooked, from fresh, fat added in cooking" +72110223,"CRESS, COOKED, FROM CANNED, FAT ADDED","Cress, cooked, from canned, fat added in cooking" +72113100,"DANDELION GREENS, RAW","Dandelion greens, raw" +72113200,"DANDELION GREENS, COOKED, NS AS TO ADDED FAT","Dandelion greens, cooked, NS as to fat added in cooking" +72113210,"DANDELION GREENS, COOKED, FAT NOT ADDED","Dandelion greens, cooked, fat not added in cooking" +72113220,"DANDELION GREENS, COOKED, FAT ADDED","Dandelion greens, cooked, fat added in cooking" +72116000,"ENDIVE, CHICORY, ESCAROLE OR ROMAINE LETTUCE, RAW","Endive, chicory, escarole, or romaine lettuce, raw" +72116150,"CAESAR SALAD (WITH ROMAINE), NO DRESSING","Caesar salad (with romaine), no dressing" +72116200,"ESCAROLE, COOKED, NS AS TO ADDED FAT","Escarole, cooked, NS as to fat added in cooking" +72116210,"ESCAROLE, COOKED, FAT NOT ADDED","Escarole, cooked, fat not added in cooking" +72116220,"ESCAROLE, COOKED, FAT ADDED","Escarole, cooked, fat added in cooking" +72116230,"ESCAROLE, CREAMED","Escarole, creamed" +72118200,"GREENS, COOKED, NS AS TO FORM, NS AS TO ADDED FAT","Greens, cooked, NS as to form, NS as to fat added in cooking" +72118201,"GREENS, COOKED, FROM FRESH, NS FAT ADDED","Greens, cooked, from fresh, NS as to fat added in cooking" +72118202,"GREENS, COOKED, FROM FROZ, NS FAT ADDED","Greens, cooked, from frozen, NS as to fat added in cooking" +72118203,"GREENS, COOKED, FROM CANNED, NS FAT ADDED","Greens, cooked, from canned, NS as to fat added in cooking" +72118210,"GREENS, COOKED, NS AS TO FORM, FAT NOT ADDED","Greens, cooked, NS as to form, fat not added in cooking" +72118211,"GREENS, COOKED, FROM FRESH, FAT NOT ADDED","Greens, cooked, from fresh, fat not added in cooking" +72118212,"GREENS, COOKED, FROM FROZEN, FAT NOT ADDED","Greens, cooked, from frozen, fat not added in cooking" +72118213,"GREENS, COOKED, FROM CANNED, FAT NOT ADDED","Greens, cooked, from canned, fat not added in cooking" +72118220,"GREENS, COOKED, NS AS TO FORM, FAT ADDED","Greens, cooked, NS as to form, fat added in cooking" +72118221,"GREENS, COOKED, FROM FRESH, FAT ADDED","Greens, cooked, from fresh, fat added in cooking" +72118222,"GREENS, COOKED, FROM FROZEN, FAT ADDED","Greens, cooked, from frozen, fat added in cooking" +72118223,"GREENS, COOKED, FROM CANNED, FAT ADDED","Greens, cooked, from canned, fat added in cooking" +72118300,"CHAMNAMUL (KOREAN LEAF VEGETABLE), COOKED, NS AS TO FAT","Chamnamul (Korean leaf vegetable), cooked, NS as to fat added in cooking" +72118305,"CHAMNAMUL (KOREAN LEAF VEGETABLE), COOKED, FAT NOT ADDED","Chamnamul (Korean leaf vegetable), cooked, fat not added in cooking" +72118310,"CHAMNAMUL (KOREAN LEAF VEGETABLE), COOKED, FAT ADDED","Chamnamul (Korean leaf vegetable), cooked, fat added in cooking" +72119200,"KALE, COOKED, NS AS TO FORM, NS AS TO ADDED FAT","Kale, cooked, NS as to form, NS as to fat added in cooking" +72119201,"KALE,COOKED,FROM FRESH,NS FAT ADDED","Kale, cooked, from fresh, NS as to fat added in cooking" +72119202,"KALE,COOKED,FROM FROZEN,NS FAT ADDED","Kale, cooked, from frozen, NS as to fat added in cooking" +72119203,"KALE,COOKED,FROM CANNED,NS FAT ADDED","Kale, cooked, from canned, NS as to fat added in cooking" +72119210,"KALE, COOKED, NS AS TO FORM, FAT NOT ADDED","Kale, cooked, NS as to form, fat not added in cooking" +72119211,"KALE,COOKED,FROM FRESH,FAT NOT ADDED","Kale, cooked, from fresh, fat not added in cooking" +72119212,"KALE,COOKED,FROM FROZEN,FAT NOT ADDED","Kale, cooked, from frozen, fat not added in cooking" +72119213,"KALE,COOKED,FROM CANNED,FAT NOT ADDED","Kale, cooked, from canned, fat not added in cooking" +72119220,"KALE, COOKED, NS AS TO FORM, FAT ADDED","Kale, cooked, NS as to form, fat added in cooking" +72119221,"KALE,COOKED,FROM FRESH,FAT ADDED","Kale, cooked, from fresh, fat added in cooking" +72119222,"KALE,COOKED,FROM FROZEN,FAT ADDED","Kale, cooked, from frozen, fat added in cooking" +72119223,"KALE,COOKED,FROM CANNED,FAT ADDED","Kale, cooked, from canned, fat added in cooking" +72120200,"LAMBSQUARTER, COOKED, NS AS TO ADDED FAT","Lambsquarter, cooked, NS as to fat added in cooking" +72120210,"CRESS, COOKED, FAT NOT ADDEDS","Lambsquarter, cooked, fat not added in cooking" +72120220,"LAMBSQUARTER, COOKED, FAT ADDED","Lambsquarter, cooked, fat added in cooking" +72121210,"MUSTARD CABBAGE,COOKED, FAT NOT ADDED IN COOKING","Mustard cabbage, cooked, fat not added in cooking" +72122100,"MUSTARD GREENS, RAW","Mustard greens, raw" +72122200,"MUSTARD GREENS, COOKED, NS FORM, NS FAT ADDED","Mustard greens, cooked, NS as to form, NS as to fat added in cooking" +72122201,"MUSTARD GREENS,COOKED,FROM FRESH,NS FAT ADDED","Mustard greens, cooked, from fresh, NS as to fat added in cooking" +72122202,"MUSTARD GREENS,COOKED,FROM FROZEN,NS FAT ADDED","Mustard greens, cooked, from frozen, NS as to fat added in cooking" +72122203,"MUSTARD GREENS,COOKED,FROM CANNED,NS FAT ADDED","Mustard greens, cooked, from canned, NS as to fat added in cooking" +72122210,"MUSTARD GREENS, COOKED, NS FORM, FAT NOT ADDED","Mustard greens, cooked, NS as to form, fat not added in cooking" +72122211,"MUSTARD GREENS,COOKED,FROM FRESH,FAT NOT ADDED","Mustard greens, cooked, from fresh, fat not added in cooking" +72122212,"MUSTARD GREENS,COOKED,FROM FROZEN,FAT NOT ADDED","Mustard greens, cooked, from frozen, fat not added in cooking" +72122213,"MUSTARD GREENS,COOKED,FROM CANNED,FAT NOT ADDED","Mustard greens, cooked, from canned, fat not added in cooking" +72122220,"MUSTARD GREEN, COOKED, NS FORM, FAT ADDED","Mustard greens, cooked, NS as to form, fat added in cooking" +72122221,"MUSTARD GREENS,COOKED,FROM FRESH,FAT ADDED","Mustard greens, cooked, from fresh, fat added in cooking" +72122222,"MUSTARD GREENS,COOKED,FROM FROZEN,FAT ADDED","Mustard greens, cooked, from frozen, fat added in cooking" +72122223,"MUSTARD GREENS,COOKED,FROM CANNED,FAT ADDED","Mustard greens, cooked, from canned, fat added in cooking" +72123000,"POKE GREENS, COOKED, NS AS TO ADDED FAT","Poke greens, cooked, NS as to fat added in cooking" +72123010,"POKE GREENS, COOKED, FAT NOT ADDED","Poke greens, cooked, fat not added in cooking" +72123020,"POKE GREENS, COOKED, FAT ADDED","Poke greens, cooked, fat added in cooking" +72124100,"RADICCHIO, RAW","Radicchio, raw" +72125100,"SPINACH, RAW","Spinach, raw" +72125200,"SPINACH, COOKED, NS FORM, NS AS TO ADDED FAT","Spinach, cooked, NS as to form, NS as to fat added in cooking" +72125201,"SPINACH,COOKED,FROM FRESH,NS FAT ADDED","Spinach, cooked, from fresh, NS as to fat added in cooking" +72125202,"SPINACH,COOKED,FROM FROZEN,NS FAT ADDED","Spinach, cooked, from frozen, NS as to fat added in cooking" +72125203,"SPINACH,COOKED,FROM CANNED,NS FAT ADDED","Spinach, cooked, from canned, NS as to fat added in cooking" +72125210,"SPINACH, COOKED, NS AS TO FORM, FAT NOT ADDED","Spinach, cooked, NS as to form, fat not added in cooking" +72125211,"SPINACH,COOKED,FROM FRESH,FAT NOT ADDED","Spinach, cooked, from fresh, fat not added in cooking" +72125212,"SPINACH,COOKED,FROM FROZEN,FAT NOT ADDED","Spinach, cooked, from frozen, fat not added in cooking" +72125213,"SPINACH,COOKED,FROM CANNED,FAT NOT ADDED","Spinach, cooked, from canned, fat not added in cooking" +72125220,"SPINACH, COOKED, NS AS TO FORM, FAT ADDED","Spinach, cooked, NS as to form, fat added in cooking" +72125221,"SPINACH,COOKED,FROM FRESH,FAT ADDED","Spinach, cooked, from fresh, fat added in cooking" +72125222,"SPINACH,COOKED,FROM FROZEN,FAT ADDED","Spinach, cooked, from frozen, fat added in cooking" +72125223,"SPINACH,COOKED,FROM CANNED,FAT ADDED","Spinach, cooked, from canned, fat added in cooking" +72125230,"SPINACH, NS AS TO FORM, CREAMED","Spinach, NS as to form, creamed" +72125231,"SPINACH, FROM FRESH, CREAMED","Spinach, from fresh, creamed" +72125232,"SPINACH, FROM FROZEN, CREAMED","Spinach, from frozen, creamed" +72125233,"SPINACH, FROM CANNED, CREAMED","Spinach, from canned, creamed" +72125240,"SPINACH SOUFFLE","Spinach souffle" +72125250,"SPINACH, COOKED, NS AS TO FORM, W/ CHEESE SAUCE","Spinach, cooked, NS as to form, with cheese sauce" +72125251,"SPINACH, COOKED, FROM FRESH, W/ CHEESE SAUCE","Spinach, cooked, from fresh, with cheese sauce" +72125252,"SPINACH, COOKED, FROM FROZEN, W/ CHEESE SAUCE","Spinach, cooked, from frozen, with cheese sauce" +72125253,"SPINACH, COOKED, FROM CANNED, W/ CHEESE SAUCE","Spinach, cooked, from canned, with cheese sauce" +72125260,"SPINACH & CHEESE CASSEROLE","Spinach and cheese casserole" +72125310,"SPINACH AND COTTAGE CHEESE","Palak Paneer or Saag Paneer (Indian)" +72125500,"SPINACH & CHICK PEAS, FAT ADDED","Spinach and chickpeas, fat added" +72126000,"TARO LEAVES, COOKED, FAT NOT ADDED IN COOKING","Taro leaves, cooked, fat not added in cooking" +72127000,"THISTLE LEAVES, COOKED, FAT NOT ADDED IN COOKING","Thistle leaves, cooked, fat not added in cooking" +72128200,"TURNIP GREENS, COOKED, NS FORM, NS AS TO ADDED FAT","Turnip greens, cooked, NS as to form, NS as to fat added in cooking" +72128201,"TURNIP GREENS,COOKED,FROM FRESH,NS FAT ADDED","Turnip greens, cooked, from fresh, NS as to fat added in cooking" +72128202,"TURNIP GREENS,COOKED,FROM FROZEN,NS FAT ADDED","Turnip greens, cooked, from frozen, NS as to fat added in cooking" +72128203,"TURNIP GREENS,COOKED,FROM CANNED,NS FAT ADDED","Turnip greens, cooked, from canned, NS as to fat added in cooking" +72128210,"TURNIP GREENS, NS FORM, COOKED, FAT NOT ADDED","Turnip greens, cooked, NS as to form, fat not added in cooking" +72128211,"TURNIP GREENS,COOKED,FROM FRESH,FAT NOT ADDED","Turnip greens, cooked, from fresh, fat not added in cooking" +72128212,"TURNIP GREENS,COOKED,FROM FROZEN,FAT NOT ADDED","Turnip greens, cooked, from frozen, fat not added in cooking" +72128213,"TURNIP GREENS,COOKED,FROM CANNED,FAT NOT ADDED","Turnip greens, cooked, from canned, fat not added in cooking" +72128220,"TURNIP GREENS, COOKED, NS FORM, FAT ADDED","Turnip greens, cooked, NS as to form, fat added in cooking" +72128221,"TURNIP GREENS,COOKED,FROM FRESH,FAT ADDED","Turnip greens, cooked, from fresh, fat added in cooking" +72128222,"TURNIP GREENS,COOKED,FROM FROZEN,FAT ADDED","Turnip greens, cooked, from frozen, fat added in cooking" +72128223,"TURNIP GREENS,COOKED,FROM CANNED,FAT ADDED","Turnip greens, cooked, from canned, fat added in cooking" +72128400,"TURNIP GREENS W/ ROOTS, CKD, NS FORM, NS ADDED FAT","Turnip greens with roots, cooked, NS as to form, NS as to fat added in cooking" +72128401,"TURNIP GREENS W/ ROOTS, CKD, FROM FRESH, NS ADDED FAT","Turnip greens with roots, cooked, from fresh, NS as to fat added in cooking" +72128402,"TURNIP GREENS W/ ROOTS, CKD, FROM FROZ, NS ADDED FAT","Turnip greens with roots, cooked, from frozen, NS as to fat added in cooking" +72128403,"TURNIP GREENS W/ ROOTS, CKD, FROM CAN, NS ADDED FAT","Turnip greens with roots, cooked, from canned, NS as to fat added in cooking" +72128410,"TURNIP GREENS W/ ROOTS, CKD, NS FORM, FAT NOT ADDED","Turnip greens with roots, cooked, NS as to form, fat not added in cooking" +72128411,"TURNIP GREENS W/ ROOTS, CKD, FROM FRESH, NO FAT ADDED","Turnip greens with roots, cooked, from fresh, fat not added in cooking" +72128412,"TURNIP GREENS W/ ROOTS, CKD, FROM FROZ, NO FAT ADDED","Turnip greens with roots, cooked, from frozen, fat not added in cooking" +72128413,"TURNIP GREENS W/ ROOTS, CKD, FROM CAN, NO FAT ADDED","Turnip greens with roots, cooked, from canned, fat not added in cooking" +72128420,"TURNIP GREENS W/ ROOTS, COOKED, NS FORM, FAT ADDED","Turnip greens with roots, cooked, NS as to form, fat added in cooking" +72128421,"TURNIP GREENS W/ ROOTS, CKD, FROM FRESH, FAT ADDED","Turnip greens with roots, cooked, from fresh, fat added in cooking" +72128422,"TURNIP GREENS W/ ROOTS, CKD, FROM FROZ, FAT ADDED","Turnip greens with roots, cooked, from frozen, fat added in cooking" +72128423,"TURNIP GREENS W/ ROOTS, CKD, FROM CAN, FAT ADDED","Turnip greens with roots, cooked, from canned, fat added in cooking" +72128500,"TURNIP GREENS, CANNED, LOW NA, NS AS TO ADDED FAT","Turnip greens, canned, low sodium, cooked, NS as to fat added in cooking" +72128510,"TURNIP GREENS, CANNED, LOW SODIUM, FAT NOT ADDED","Turnip greens, canned, low sodium, cooked, fat not added in cooking" +72128520,"TURNIP GREENS, CANNED, LOW SODIUM, FAT ADDED","Turnip greens, canned, low sodium, cooked, fat added in cooking" +72130100,"WATERCRESS, RAW","Watercress, raw" +72130200,"WATERCRESS, COOKED, FAT NOT ADDED IN COOKING","Watercress, cooked, fat not added in cooking" +72132200,"BITTERMELON,HORSERADISH,JUTE,RADISH LVES,CKD,NO FAT","Bitter melon leaves, horseradish leaves, jute leaves, or radish leaves, cooked, fat not added in cooking" +72133200,"SWEET POTATO,SQUASH,PUMPKIN LEAVES,CKD,FAT NOT ADDED","Sweet potato leaves, squash leaves, pumpkin leaves, chrysanthemum leaves, bean leaves, or swamp cabbage, cooked, fat not added in cooking" +72201100,"BROCCOLI, RAW","Broccoli, raw" +72201200,"BROCCOLI, CKD, NS FORM, NS FAT (INCL BROCCOLI, NFS)","Broccoli, cooked, NS as to form, NS as to fat added in cooking" +72201201,"BROCCOLI, CKD, FROM FRESH, NS FAT (INCL BROCCOLI, NFS)","Broccoli, cooked, from fresh, NS as to fat added in cooking" +72201202,"BROCCOLI, CKD, FROM FROZ, NS FAT (INCL BROCCOLI, NFS)","Broccoli, cooked, from frozen, NS as to fat added in cooking" +72201210,"BROCCOLI, COOKED, NS AS TO FORM, NO FAT ADDED","Broccoli, cooked, NS as to form, fat not added in cooking" +72201211,"BROCCOLI, COOKED, FROM FRESH, NO FAT ADDED","Broccoli, cooked, from fresh, fat not added in cooking" +72201212,"BROCCOLI, COOKED, FROM FROZ, NO FAT ADDED","Broccoli, cooked, from frozen, fat not added in cooking" +72201220,"BROCCOLI, COOKED, NS AS TO FORM, FAT ADDED","Broccoli, cooked, NS as to form, fat added in cooking" +72201221,"BROCCOLI, COOKED, FROM FRESH, FAT ADDED","Broccoli, cooked, from fresh, fat added in cooking" +72201222,"BROCCOLI, COOKED, FROM FROZ, FAT ADDED","Broccoli, cooked, from frozen, fat added in cooking" +72201230,"BROCCOLI, COOKED, NS AS TO FORM, W/ CHEESE SAUCE","Broccoli, cooked, NS as to form, with cheese sauce" +72201231,"BROCCOLI, COOKED, FROM FRESH, W/ CHEESE SAUCE","Broccoli, cooked, from fresh, with cheese sauce" +72201232,"BROCCOLI, COOKED, FROM FROZEN, W/ CHEESE SAUCE","Broccoli, cooked, from frozen, with cheese sauce" +72201240,"BROCCOLI, COOKED, NS AS TO FORM, W/ MUSHROOM SAUCE","Broccoli, cooked, NS as to form, with mushroom sauce" +72201241,"BROCCOLI, COOKED, FROM FRESH, W/ MUSHROOM SAUCE","Broccoli, cooked, from fresh, with mushroom sauce" +72201242,"BROCCOLI, COOKED, FROM FROZEN, W/ MUSHROOM SAUCE","Broccoli, cooked, from frozen, with mushroom sauce" +72201250,"BROCCOLI, COOKED, NS AS TO FORM, W/ CREAM SAUCE","Broccoli, cooked, NS as to form, with cream sauce" +72201251,"BROCCOLI, COOKED, FROM FRESH, W/ CREAM SAUCE","Broccoli, cooked, from fresh, with cream sauce" +72201252,"BROCCOLI, COOKED, FROM FROZEN, W/ CREAM SAUCE","Broccoli, cooked, from frozen, with cream sauce" +72202010,"BROCCOLI CASSEROLE (BROC, NOODLES, CREAM SAUCE)","Broccoli casserole (broccoli, noodles, and cream sauce)" +72202020,"BROCCOLI CASSEROLE (BROC,RICE,CHEESE,MUSHROOM SCE)","Broccoli casserole (broccoli, rice, cheese, and mushroom sauce)" +72202030,"BROCCOLI, BATTER-DIPPED & FRIED","Broccoli, batter-dipped and fried" +72302000,"BROCCOLI SOUP, PREPARED WITH MILK, HOME RECIPE, CANNED OR RE","Broccoli soup, prepared with milk, home recipe, canned or ready-to-serve" +72302020,"BROCCOLI SOUP, PREP W/ WATER","Broccoli soup, prepared with water, home recipe, canned, or ready-to-serve" +72302100,"BROCCOLI CHEESE SOUP, PREPARED WITH MILK, HOME RECIPE, CANNE","Broccoli cheese soup, prepared with milk, home recipe, canned, or ready-to-serve" +72306000,"WATERCRESS BROTH W/ SHRIMP","Watercress broth with shrimp" +72307000,"SPINACH SOUP","Spinach soup" +72308000,"DARK-GREEN LEAFY VEGETABLE SOUP WITH MEAT, ASIAN STYLE","Dark-green leafy vegetable soup with meat, Asian style" +72308500,"DARK-GREEN LEAFY VEGETABLE SOUP, MEATLESS, ASIAN STYLE","Dark-green leafy vegetable soup, meatless, Asian style" +73101010,"CARROTS, RAW","Carrots, raw" +73101110,"CARROTS, RAW, SALAD (INCLUDE CARROT-RAISIN SALAD)","Carrots, raw, salad" +73101210,"CARROTS, RAW, SALAD W/ APPLES","Carrots, raw, salad with apples" +73102200,"CARROTS, COOKED, NS AS TO FORM, NS FAT ADDED","Carrots, cooked, NS as to form, NS as to fat added in cooking" +73102201,"CARROTS, COOKED, FROM FRESH, NS FAT ADDED","Carrots, cooked, from fresh, NS as to fat added in cooking" +73102202,"CARROTS, COOKED, FROM FROZEN, NS FAT ADDED","Carrots, cooked, from frozen, NS as to fat added in cooking" +73102203,"CARROTS, COOKED, FROM CANNED, NS FAT ADDED","Carrots, cooked, from canned, NS as to fat added in cooking" +73102210,"CARROTS, COOKED, NS AS TO FORM, FAT NOT ADDED","Carrots, cooked, NS as to form, fat not added in cooking" +73102211,"CARROTS, COOKED, FROM FRESH, FAT NOT ADDED","Carrots, cooked, from fresh, fat not added in cooking" +73102212,"CARROTS, COOKED, FROM FROZEN, FAT NOT ADDED","Carrots, cooked, from frozen, fat not added in cooking" +73102213,"CARROTS, COOKED, FROM CANNED, FAT NOT ADDED","Carrots, cooked, from canned, fat not added in cooking" +73102220,"CARROTS, COOKED, NS AS TO FORM, FAT ADDED","Carrots, cooked, NS as to form, fat added in cooking" +73102221,"CARROTS, COOKED, FROM FRESH, FAT ADDED","Carrots, cooked, from fresh, fat added in cooking" +73102222,"CARROTS, COOKED, FROM FROZEN, FAT ADDED","Carrots, cooked, from frozen, fat added in cooking" +73102223,"CARROTS, COOKED, FROM CANNED, FAT ADDED","Carrots, cooked, from canned, fat added in cooking" +73102230,"CARROTS, COOKED, NS AS TO FORM, CREAMED","Carrots, cooked, NS as to form, creamed" +73102231,"CARROTS, COOKED, FROM FRESH, CREAMED","Carrots, cooked, from fresh, creamed" +73102232,"CARROTS, COOKED, FROM FROZEN, CREAMED","Carrots, cooked, from frozen, creamed" +73102233,"CARROTS, COOKED, FROM CANNED, CREAMED","Carrots, cooked, from canned, creamed" +73102240,"CARROTS, COOKED, NS AS TO FORM, GLAZED","Carrots, cooked, NS as to form, glazed" +73102241,"CARROTS, COOKED, FROM FRESH, GLAZED","Carrots, cooked, from fresh, glazed" +73102242,"CARROTS, COOKED, FROM FROZEN, GLAZED","Carrots, cooked, from frozen, glazed" +73102243,"CARROTS, COOKED, FROM CANNED, GLAZED","Carrots, cooked, from canned, glazed" +73102250,"CARROTS, COOKED, NS AS TO FORM, W/ CHEESE SAUCE","Carrots, cooked, NS as to form, with cheese sauce" +73102251,"CARROTS, COOKED, FROM FRESH, W/ CHEESE SAUCE","Carrots, cooked, from fresh, with cheese sauce" +73102252,"CARROTS, COOKED, FROM FROZEN, W/ CHEESE SAUCE","Carrots, cooked, from frozen, with cheese sauce" +73102253,"CARROTS, COOKED, FROM CANNED, W/ CHEESE SAUCE","Carrots, cooked, from canned, with cheese sauce" +73103000,"CARROTS, CANNED, LOW SODIUM, NS AS TO ADDED FAT","Carrots, canned, low sodium, NS as to fat added in cooking" +73103010,"CARROTS, CANNED, LOW SODIUM, NO FAT ADDED","Carrots, canned, low sodium, fat not added in cooking" +73103020,"CARROTS, CANNED, LOW SODIUM, FAT ADDED","Carrots, canned, low sodium, fat added in cooking" +73105010,"CARROT JUICE","Carrot juice" +73111030,"PEAS & CARROTS, NS AS TO FORM, CREAMED","Peas and carrots, NS as to form, creamed" +73111031,"PEAS & CARROTS, FROM FRESH, CREAMED","Peas and carrots, from fresh, creamed" +73111032,"PEAS & CARROTS, FROM FROZEN, CREAMED","Peas and carrots, from frozen, creamed" +73111033,"PEAS & CARROTS, FROM CANNED, CREAMED","Peas and carrots, from canned, creamed" +73111200,"PEAS & CARROTS, COOKED, NS FORM, NS AS TO ADDED FAT","Peas and carrots, cooked, NS as to form, NS as to fat added in cooking" +73111201,"PEAS & CARROTS, COOKED, FROM FRESH, NS FAT ADDED","Peas and carrots, cooked, from fresh, NS as to fat added in cooking" +73111202,"PEAS & CARROTS, COOKED, FROM FROZ, NS FAT ADDED","Peas and carrots, cooked, from frozen, NS as to fat added in cooking" +73111203,"PEAS & CARROTS, COOKED, FROM CANNED, NS FAT ADDED","Peas and carrots, cooked, from canned, NS as to fat added in cooking" +73111210,"PEAS & CARROTS, COOKED, NS FORM, FAT NOT ADDED","Peas and carrots, cooked, NS as to form, fat not added in cooking" +73111211,"PEAS & CARROTS, COOKED, FROM FRESH, FAT NOT ADDED","Peas and carrots, cooked, from fresh, fat not added in cooking" +73111212,"PEAS & CARROTS, COOKED, FROM FROZ, FAT NOT ADDED","Peas and carrots, cooked, from frozen, fat not added in cooking" +73111213,"PEAS & CARROTS, COOKED, FROM CANNED, FAT NOT ADDED","Peas and carrots, cooked, from canned, fat not added in cooking" +73111220,"PEAS & CARROTS, COOKED, NS AS TO FORM, FAT ADDED","Peas and carrots, cooked, NS as to form, fat added in cooking" +73111221,"PEAS & CARROTS, COOKED, FROM FRESH, FAT ADDED","Peas and carrots, cooked, from fresh, fat added in cooking" +73111222,"PEAS & CARROTS, COOKED, FROM FROZ, FAT ADDED","Peas and carrots, cooked, from frozen, fat added in cooking" +73111223,"PEAS & CARROTS, COOKED, FROM CANNED, FAT ADDED","Peas and carrots, cooked, from canned, fat added in cooking" +73111250,"PEAS & CARROTS, CANNED, LOW SODIUM, NS ADDED FAT","Peas and carrots, canned, low sodium, NS as to fat added in cooking" +73111260,"PEAS & CARROTS, CANNED, LOW SODIUM, FAT ADDED","Peas and carrots, canned, low sodium, fat added in cooking" +73111270,"PEAS & CARROTS, CANNED, LOW SODIUM, NO FAT ADDED","Peas and carrots, canned, low sodium, fat not added in cooking" +73111400,"CARROTS IN TOMATO SAUCE","Carrots in tomato sauce" +73112000,"CARROT CHIPS, DRIED","Carrot chips, dried" +73201000,"PUMPKIN, COOKED, NS AS TO FORM, NS AS TO ADDED FAT","Pumpkin, cooked, NS as to form, NS as to fat added in cooking" +73201001,"PUMPKIN, COOKED, FROM FRESH, NS AS TO ADDED FAT","Pumpkin, cooked, from fresh, NS as to fat added in cooking" +73201002,"PUMPKIN, COOKED, FROM FROZEN, NS AS TO ADDED FAT","Pumpkin, cooked, from frozen, NS as to fat added in cooking" +73201003,"PUMPKIN, COOKED, FROM CANNED, NS AS TO ADDED FAT","Pumpkin, cooked, from canned, NS as to fat added in cooking" +73201010,"PUMPKIN, COOKED, NS AS TO FORM, FAT NOT ADDED","Pumpkin, cooked, NS as to form, fat not added in cooking" +73201011,"PUMPKIN, COOKED, FROM FRESH, FAT NOT ADDED","Pumpkin, cooked, from fresh, fat not added in cooking" +73201012,"PUMPKIN, COOKED, FROM FROZEN, FAT NOT ADDED","Pumpkin, cooked, from frozen, fat not added in cooking" +73201013,"PUMPKIN, COOKED, FROM CANNED, FAT NOT ADDED","Pumpkin, cooked, from canned, fat not added in cooking" +73201020,"PUMPKIN, COOKED, NS AS TO FORM, FAT ADDED","Pumpkin, cooked, NS as to form, fat added in cooking" +73201021,"PUMPKIN, COOKED, FROM FRESH, FAT ADDED","Pumpkin, cooked, from fresh, fat added in cooking" +73201022,"PUMPKIN, COOKED, FROM FROZEN, FAT ADDED","Pumpkin, cooked, from frozen, fat added in cooking" +73201023,"PUMPKIN, COOKED, FROM CANNED, FAT ADDED","Pumpkin, cooked, from canned, fat added in cooking" +73210010,"CALABAZA (SPANISH PUMPKIN), COOKED","Calabaza (Spanish pumpkin), cooked" +73210110,"PUMPKIN FRITTERS, P.R.","Pumpkin fritters, Puerto Rican style" +73211110,"SWEET POTATO & PUMPKIN CASSEROLE, P.R","Sweet potato and pumpkin casserole, Puerto Rican style" +73301000,"SQUASH, WINTER, MASHED, NS AS TO ADDED FAT/SUGAR","Squash, winter type, mashed, NS as to fat or sugar added in cooking" +73301010,"SQUASH, WINTER, MASHED, NO FAT OR SUGAR ADDED","Squash, winter type, mashed, no fat or sugar added in cooking" +73301020,"SQUASH, WINTER, COOKED, MASHED, FAT ADDED, NO SUGAR","Squash, winter type, mashed, fat added in cooking, no sugar added in cooking" +73301030,"SQUASH, WINTER, COOKED, MASHED, FAT & SUGAR ADDED","Squash, winter type, mashed, fat and sugar added in cooking" +73302010,"SQUASH, WINTER, RAW","Squash, winter type, raw" +73303000,"SQUASH, WINTER, BAKED, NS FAT OR SUGAR ADDED","Squash, winter type, baked, NS as to fat or sugar added in cooking" +73303010,"SQUASH, WINTER, BAKED, NO FAT OR SUGAR ADDED","Squash, winter type, baked, no fat or sugar added in cooking" +73303020,"SQUASH, WINTER, BAKED, FAT ADDED, NO SUGAR","Squash, winter type, baked, fat added in cooking, no sugar added in cooking" +73303030,"SQUASH, WINTER, BAKED, FAT & SUGAR ADDED","Squash, winter type, baked, fat and sugar added in cooking" +73303040,"SQUASH, WINTER, BAKED, NO ADDED FAT, SUGAR ADDED","Squash, winter type, baked, no fat added in cooking, sugar added in cooking" +73304010,"SQUASH, FRITTER OR CAKE","Squash fritter or cake" +73305010,"SQUASH, WINTER, BAKED W/ CHEESE","Squash, winter, baked with cheese" +73305020,"SQUASH, WINTER, SOUFFLE","Squash, winter, souffle" +73401000,"SWEET POTATO, NFS","Sweet potato, NFS" +73402000,"SWEET POTATO, BAKED, PEEL EATEN, NS AS TO ADDED FAT","Sweet potato, baked, peel eaten, NS as to fat added in cooking" +73402010,"SWEET POTATO, BAKED, PEEL EATEN, NO FAT ADDED","Sweet potato, baked, peel eaten, fat not added in cooking" +73402020,"SWEET POTATO, BAKED, PEEL EATEN, FAT ADDED","Sweet potato, baked, peel eaten, fat added in cooking" +73403000,"SWEET POTATO, BAKED, PEEL NOT EATEN, NS AS TO FAT","Sweet potato, baked, peel not eaten, NS as to fat added in cooking" +73403010,"SWEET POTATO, BAKED, PEEL NOT EATEN, FAT NOT ADDED","Sweet potato, baked, peel not eaten, fat not added in cooking" +73403020,"SWEET POTATO, BAKED, PEEL NOT EATEN, FAT ADDED","Sweet potato, baked, peel not eaten, fat added in cooking" +73405000,"SWEET POTATO, BOILED, W/O PEEL, NS AS TO ADDED FAT","Sweet potato, boiled, without peel, NS as to fat added in cooking" +73405010,"SWEET POTATO, BOILED, W/O PEEL, FAT NOT ADDED","Sweet potato, boiled, without peel, fat not added in cooking" +73405020,"SWEET POTATO, BOILED, W/O PEEL, FAT ADDED","Sweet potato, boiled, without peel, fat added in cooking" +73405100,"SWEET POTATO, BOILED W/ PEEL, PEEL NOT EATEN, NS AS TO FAT","Sweet potato, boiled with peel, peel not eaten, NS as to fat added in cooking" +73405110,"SWEET POTATO, BOILED W/ PEEL, PEEL NOT EATEN, FAT NOT ADDED","Sweet potato, boiled with peel, peel not eaten, fat not added in cooking" +73405120,"SWEET POTATO, BOILED W/ PEEL, PEEL NOT EATEN, FAT ADDED","Sweet potato, boiled with peel, peel not eaten, fat added in cooking" +73406000,"SWEET POTATO, CANDIED","Sweet potato, candied" +73406010,"SWEET POTATO W/ FRUIT","Sweet potato with fruit" +73407000,"SWEET POTATO, CANNED, NS AS TO SYRUP","Sweet potato, canned, NS as to syrup" +73407010,"SWEET POTATO, CANNED, W/O SYRUP","Sweet potato, canned without syrup" +73407020,"SWEET POTATO, CANNED IN SYRUP","Sweet potato, canned in syrup" +73407030,"SWEET POTATO, CANNED IN SYRUP, W/ FAT ADDED","Sweet potato, canned in syrup, with fat added in cooking" +73409000,"SWEET POTATO, CASSEROLE OR MASHED","Sweet potato, casserole or mashed" +73410110,"SWEET POTATO, FRIED","Sweet potato, fried" +73410210,"SWEET POTATO, CHIPS","Sweet potato, chips" +73410300,"SWEET POTATO, FRENCH FRIES","Sweet potato, french fries" +73421000,"SWEET POTATO, YELLOW, P.R., COOKED","Sweet potato, yellow, Puerto Rican, cooked" +73501000,"CARROT SOUP, CREAM OF, PREPARED WITH MILK, HOME RECIPE, CANN","Carrot soup, cream of, prepared with milk, home recipe, canned or ready-to-serve" +73501010,"CARROT WITH RICE SOUP, CREAM OF, PREPARED WITH MILK, HOME RE","Carrot with rice soup, cream of, prepared with milk, home recipe, canned or ready-to-serve" +74101000,"TOMATOES, RAW","Tomatoes, raw" +74102000,"TOMATOES, GREEN, RAW","Tomatoes, green, raw" +74201000,"TOMATOES, COOKED, NS AS TO FORM, NS AS TO METHOD","Tomatoes, cooked, NS as to form, NS as to method" +74201001,"TOMATOES, COOKED, FROM FRESH, NS AS TO METHOD","Tomatoes, cooked, from fresh, NS as to method" +74201003,"TOMATOES, COOKED, FROM CANNED, NS AS TO METHOD","Tomatoes, cooked, from canned, NS as to method" +74202010,"TOMATOES, NS AS TO FORM, BROILED","Tomatoes, NS as to form, broiled" +74202011,"TOMATOES, FROM FRESH, BROILED","Tomatoes, from fresh, broiled" +74202050,"TOMATOES, RED, NS AS TO FORM, FRIED","Tomatoes, red, NS as to form, fried" +74202051,"TOMATOES, RED, FROM FRESH, FRIED","Tomatoes, red, from fresh, fried" +74203010,"TOMATOES, NS AS TO FORM, SCALLOPED","Tomatoes, NS as to form, scalloped" +74203011,"TOMATOES, FROM FRESH, SCALLOPED","Tomatoes, from fresh, scalloped" +74204010,"TOMATOES, NS AS TO FORM, STEWED","Tomatoes, NS as to form, stewed" +74204011,"TOMATOES, FROM FRESH, STEWED","Tomatoes, from fresh, stewed" +74204013,"TOMATOES, FROM CANNED, STEWED","Tomatoes, from canned, stewed" +74204500,"TOMATOES, CANNED, LOW SODIUM","Tomatoes, canned, low sodium" +74205010,"TOMATOES, GREEN, COOKED, NS AS TO FORM (INCL FRIED)","Tomatoes, green, cooked, NS as to form" +74205011,"TOMATOES, GREEN, COOKED, FROM FRESH (INCL FRIED)","Tomatoes, green, cooked, from fresh" +74205020,"TOMATOES, GREEN, PICKLED","Tomato, green, pickled" +74206000,"TOMATOES, RED, DRIED","Tomatoes, red, dried" +74301100,"TOMATO JUICE","Tomato juice" +74301150,"TOMATO JUICE, LOW SODIUM","Tomato juice, low sodium" +74302000,"TOMATO JUICE COCKTAIL","Tomato juice cocktail" +74303000,"TOMATO & VEGETABLE JUICE, MOSTLY TOMATO (INCL V-8)","Tomato and vegetable juice, mostly tomato" +74303100,"TOMATO & VEGETABLE JUICE, MOSTLY TOMATO, LOW SODIUM","Tomato and vegetable juice, mostly tomato, low sodium" +74304000,"TOMATO JUICE W/ CLAM OR BEEF JUICE","Tomato juice with clam or beef juice" +74401010,"TOMATO CATSUP","Tomato catsup" +74401110,"TOMATO CATSUP, REDUCED SODIUM","Tomato catsup, reduced sodium" +74402010,"TOMATO CHILI SAUCE (CATSUP TYPE)","Tomato chili sauce (catsup-type)" +74402100,"SALSA, NFS","Salsa, NFS" +74402110,"SALSA, PICO DE GALLO","Salsa, pico de gallo" +74402150,"SALSA, RED, COMMERCIALLY-PREPARED","Salsa, red, commercially-prepared" +74402200,"SALSA, RED, HOMEMADE","Salsa, red, homemade" +74402250,"ENCHILADA SAUCE, RED","Enchilada sauce, red" +74402260,"ENCHILADA SAUCE, GREEN","Enchilada sauce, green" +74402350,"SALSA VERDE OR SALSA, GREEN","Salsa verde or salsa, green" +74404010,"SPAGHETTI SAUCE, MEATLESS","Spaghetti sauce, meatless" +74404020,"SPAGHETTI SAUCE W/ VEGETABLES, HOMEMADE-STYLE","Spaghetti sauce with vegetables, homemade-style" +74404030,"SPAGHETTI SAUCE W/ MEAT, CANNED, NO EXTRA MEAT","Spaghetti sauce with meat, canned, no extra meat added" +74404050,"SPAGHETTI SAUCE, MEATLESS, REDUCED SODIUM","Spaghetti sauce, meatless, reduced sodium" +74404060,"SPAGHETTI SAUCE, MEATLESS, FAT FREE","Spaghetti sauce, meatless, fat free" +74404090,"VODKA FLAVORED PASTA SAUCE MADE WITH TOMATOES AND CREAM","Vodka flavored pasta sauce made with tomatoes and cream" +74405010,"TOMATO RELISH (INCLUDE TOMATO PRESERVES)","Tomato relish" +74406010,"BARBECUE SAUCE","Barbecue sauce" +74406050,"BARBECUE SAUCE, REDUCED SODIUM","Barbecue sauce, reduced sodium" +74406100,"STEAK SAUCE, TOMATO-BASE (INCLUDE A-1)","Steak sauce, tomato-base" +74406500,"COCKTAIL SAUCE","Cocktail sauce" +74410110,"PUERTO RICAN SEASONING WITH HAM","Puerto Rican seasoning with ham" +74415110,"PUERTO RICAN SEASONING W/ HAM & TOMATO SAUCE","Puerto Rican seasoning with ham and tomato sauce" +74420110,"PUERTO RICAN SEASONING WO/ HAM & TOMATO SAUCE","Puerto Rican seasoning without ham and tomato sauce" +74501010,"TOMATO ASPIC","Tomato aspic" +74503010,"TOMATO & CORN, COOKED, FAT NOT ADDED IN COOKING","Tomato and corn, cooked, fat not added in cooking" +74504000,"TOMATO & OKRA, COOKED, NS AS TO ADDED FAT","Tomato and okra, cooked, NS as to fat added in cooking" +74504010,"TOMATO & OKRA, COOKED, NO FAT ADDED","Tomato and okra, cooked, fat not added in cooking" +74504020,"TOMATO & OKRA, COOKED, FAT ADDED","Tomato and okra, cooked, fat added in cooking" +74504100,"TOMATO & ONION, COOKED, NS FAT ADDED","Tomato and onion, cooked, NS as to fat added in cooking" +74504110,"TOMATO & ONION, COOKED, FAT NOT ADDED IN COOKING","Tomato and onion, cooked, fat not added in cooking" +74504120,"TOMATO & ONION, COOKED, FAT ADDED","Tomato and onion, cooked, fat added in cooking" +74504150,"TOMATO & CELERY, COOKED, FAT NOT ADDED IN COOKING","Tomato and celery, cooked, fat not added in cooking" +74505000,"TOMATO W/ CORN & OKRA, COOKED, NS AS TO ADDED FAT","Tomato with corn and okra, cooked, NS as to fat added in cooking" +74505010,"TOMATO W/ CORN & OKRA, COOKED, NO FAT ADDED","Tomato with corn and okra, cooked, fat not added in cooking" +74505020,"TOMATO W/ CORN & OKRA, COOKED, FAT ADDED","Tomato with corn and okra, cooked, fat added in cooking" +74506000,"TOMATO & CUCUMBER SALAD W/ OIL & VINEGAR","Tomato and cucumber salad made with tomato, cucumber, oil, and vinegar" +74601000,"TOMATO SOUP, NFS","Tomato soup, NFS" +74601010,"TOMATO SOUP, CREAM OF,PREP W/ MILK","Tomato soup, cream of, prepared with milk" +74602010,"TOMATO SOUP, PREPARED WITH WATER, OR READY-TO-SERVE","Tomato soup, prepared with water, or ready-to-serve" +74602050,"TOMATO SOUP, INSTANT TYPE, PREPARED W/ WATER","Tomato soup, instant type, prepared with water" +74602200,"TOMATO SOUP, CANNED, REDUCED SODIUM, PREPARED WITH WATER, OR","Tomato soup, canned, reduced sodium, prepared with water, or ready-to-serve" +74602300,"TOMATO SOUP, CANNED, REDUCED SODIUM, PREP W/ MILK","Tomato soup, canned, reduced sodium, prepared with milk" +74603010,"TOMATO BEEF SOUP, PREPARED W/ WATER","Tomato beef soup, prepared with water" +74604010,"TOMATO BEEF NOODLE SOUP, PREPARED W/ WATER","Tomato beef noodle soup, prepared with water" +74604100,"TOMATO BEEF RICE SOUP, PREPARED W/ WATER","Tomato beef rice soup, prepared with water" +74604500,"TOMATO NOODLE SOUP, CANNED, PREPARED WITH WATER OR READY-TO-","Tomato noodle soup, canned, prepared with water or ready-to-serve" +74604600,"TOMATO NOODLE SOUP, CANNED, PREPARED WITH MILK","Tomato noodle soup, canned, prepared with milk" +74605010,"TOMATO RICE SOUP, PREPARED W/ WATER","Tomato rice soup, prepared with water" +74606010,"TOMATO VEGETABLE SOUP, PREP W/ WATER","Tomato vegetable soup, prepared with water" +74606020,"TOMATO VEGETABLE SOUP W/NOODLES, PREPARED W/ WATER","Tomato vegetable soup with noodles, prepared with water" +74701000,"TOMATO SANDWICH","Tomato sandwich" +75100250,"RAW VEGETABLE, NFS","Raw vegetable, NFS" +75100300,"SPROUTS, NFS","Sprouts, NFS" +75100500,"ALFALFA SPROUTS, RAW","Alfalfa sprouts, raw" +75100750,"ARTICHOKE, JERUSALEM, RAW (INCLUDE SUNCHOKE)","Artichoke, Jerusalem, raw" +75100800,"ASPARAGUS, RAW","Asparagus, raw" +75101000,"BEAN SPROUTS, RAW (SOYBEAN/MUNG)","Bean sprouts, raw (soybean or mung)" +75101800,"BEANS, STRING, GREEN, RAW","Beans, string, green, raw" +75102000,"BEANS, LIMA, RAW","Beans, lima, raw" +75102500,"BEETS, RAW","Beets, raw" +75102600,"BROCCOFLOWER, RAW","Broccoflower, raw" +75102750,"BRUSSELS SPROUTS, RAW","Brussels sprouts, raw" +75103000,"CABBAGE, GREEN, RAW","Cabbage, green, raw" +75104000,"CABBAGE, CHINESE, RAW","Cabbage, Chinese, raw" +75105000,"CABBAGE, RED, RAW","Cabbage, red, raw" +75105500,"CACTUS, RAW","Cactus, raw" +75107000,"CAULIFLOWER, RAW","Cauliflower, raw" +75109000,"CELERY, RAW (INCLUDE CELERY, NFS)","Celery, raw" +75109010,"FENNEL BULB, RAW","Fennel bulb, raw" +75109400,"BASIL, RAW","Basil, raw" +75109500,"CHIVES, RAW (INCLUDE CHIVES, NFS)","Chives, raw" +75109550,"CILANTRO, RAW","Cilantro, raw" +75109600,"CORN, RAW","Corn, raw" +75111000,"CUCUMBER, RAW (INCLUDE CUCUMBER, NFS)","Cucumber, raw" +75111200,"EGGPLANT, RAW","Eggplant, raw" +75111500,"GARLIC, RAW","Garlic, raw" +75111800,"JICAMA, RAW (INCLUDE YAMBEAN)","Jicama, raw" +75112000,"KOHLRABI, RAW","Kohlrabi, raw" +75112500,"LEEK, RAW","Leek, raw" +75113000,"LETTUCE, RAW","Lettuce, raw" +75113060,"LETTUCE, BOSTON, RAW","Lettuce, Boston, raw" +75113070,"LETTUCE, MANOA","Lettuce, manoa" +75113080,"LETTUCE, ARUGULA, RAW","Lettuce, arugula, raw" +75114000,"MIXED SALAD GREENS, RAW","Mixed salad greens, raw" +75115000,"MUSHROOMS, RAW","Mushrooms, raw" +75117010,"ONIONS, YOUNG GREEN, RAW","Onions, young green, raw" +75117020,"ONIONS, MATURE, RAW","Onions, mature, raw" +75119000,"PARSLEY, RAW","Parsley, raw" +75120000,"PEAS, GREEN, RAW","Peas, green, raw" +75121000,"PEPPER, HOT CHILI, RAW (INCLUDE JALAPENO)","Pepper, hot chili, raw" +75121400,"PEPPER, POBLANO, RAW","Pepper, poblano, raw" +75121500,"PEPPER, SERRANO, RAW","Pepper, Serrano, raw" +75122000,"PEPPER, RAW, NFS","Pepper, raw, NFS" +75122100,"PEPPER, SWEET, GREEN, RAW","Pepper, sweet, green, raw" +75122200,"PEPPER, SWEET, RED, RAW","Pepper, sweet, red, raw" +75124000,"PEPPER, BANANA, RAW","Pepper, banana, raw" +75125000,"RADISH, RAW","Radish, raw" +75127000,"RUTABAGA, RAW","Rutabaga, raw" +75127500,"SEAWEED, RAW (INCLUDE BLANCHED)","Seaweed, raw" +75127750,"SNOWPEA (PEA POD), RAW","Snowpeas (pea pod), raw" +75128000,"SQUASH, SUMMER, YELLOW, RAW","Squash, summer, yellow, raw" +75128010,"SQUASH, SUMMER, GREEN, RAW (INCLUDE ZUCCHINI)","Squash, summer, green, raw" +75129000,"TURNIP, RAW","Turnip, raw" +75132000,"MIXED VEGETABLE JUICE (OTHER THAN TOMATO)","Mixed vegetable juice (vegetables other than tomato)" +75132100,"CELERY JUICE","Celery juice" +75140500,"BROCCOLI SALAD W/CAULIFLOWER,CHEESE,BACON,&DRESSING","Broccoli salad with cauliflower, cheese, bacon bits, and dressing" +75140510,"BROCCOLI SLAW SALAD","Broccoli slaw salad" +75140990,"CABBAGE SALAD OR COLESLAW, FROM FAST FOOD / RESTAURANT","Cabbage salad or coleslaw, from fast food / restaurant" +75141000,"CABBAGE SALAD OR COLESLAW, MADE WITH COLESLAW DRESSING","Cabbage salad or coleslaw, made with coleslaw dressing" +75141005,"CABBAGE SALAD OR COLESLAW, MADE W/ LIGHT COLESLAW DRESSING","Cabbage salad or coleslaw, made with light coleslaw dressing" +75141020,"CABBAGE SALAD OR COLESLAW, W/ ITALIAN DRSG","Cabbage salad or coleslaw, made with Italian dressing" +75141025,"CABBAGE SALAD OR COLESLAW, W/LT ITALIAN DRSG","Cabbage salad or coleslaw, made with light Italian dressing" +75141030,"CABBAGE SALAD OR COLESLAW, W/ CREAMY DRSG","Cabbage salad or coleslaw, made with creamy dressing" +75141035,"CABBAGE SALAD OR COLESLAW, W/ LT CREAMY DRSG","Cabbage salad or coleslaw, made with light creamy dressing" +75141040,"CABBAGE SALAD OR COLESLAW, W/ FAT FREE DRSG","Cabbage salad or coleslaw, made with any type of fat free dressing" +75141100,"CABBAGE SALAD OR COLESLAW, W/APPLES/RAISINS, DRESS","Cabbage salad or coleslaw with apples and/or raisins, with dressing" +75141200,"CABBAGE SALAD OR COLESLAW, W/ PINEAPPLE, DRESSING","Cabbage salad or coleslaw with pineapple, with dressing" +75141300,"CABBAGE, CHINESE, SALAD, W/ DRESSING","Cabbage, Chinese, salad, with dressing" +75141500,"CELERY, STUFFED W/ CHEESE","Celery, stuffed with cheese" +75142000,"CUCUMBER & VEGETABLE NAMASU","Cucumber and vegetable namasu" +75142500,"CUCUMBER SALAD, MADE WITH SOUR CREAM DRESSING","Cucumber salad, made with sour cream dressing" +75142550,"CUCUMBER SALAD, W/ ITALIAN DRSG","Cucumber salad, made with Italian dressing" +75142600,"CUCUMBER SALAD MADE W/ CUCUMBER AND VINEGAR","Cucumber salad made with cucumber and vinegar" +75143000,"LETTUCE SALAD W/ ASSORTED VEGETABLES","Lettuce, salad with assorted vegetables including tomatoes and/or carrots, no dressing" +75143050,"LETTUCE SALAD, W/ ASST VEG, NO TOM OR CAR, NO DRESS","Lettuce, salad with assorted vegetables excluding tomatoes and carrots, no dressing" +75143100,"LETTUCE SALAD, W/ AVOCADO, TOMATO/CAR, NO DRESS","Lettuce, salad with avocado, tomato, and/or carrots, with or without other vegetables, no dressing" +75143200,"LETTUCE SALAD, W/ CHEESE, TOM/CAR, NO DRESSING","Lettuce, salad with cheese, tomato and/or carrots, with or without other vegetables, no dressing" +75143300,"LETTUCE SALAD, W/ EGG, TOM/CAR, NO DRESSING","Lettuce, salad with egg, tomato, and/or carrots, with or without other vegetables, no dressing" +75143350,"LETTUCE SALAD W/ EGG, CHEESE, TOM/CAR, NO DRESSING","Lettuce, salad with egg, cheese, tomato, and/or carrots, with or without other vegetables, no dressing" +75144100,"LETTUCE, WILTED, W/ BACON DRESSING","Lettuce, wilted, with bacon dressing" +75145000,"SEVEN-LAYER SALAD(LETTUCE, MAYO, CHEESE, EGG, PEAS)","Seven-layer salad (lettuce salad made with a combination of onion, celery, green pepper, peas, mayonnaise, cheese, eggs, and/or bacon)" +75146000,"GREEK SALAD, NO DRESSING","Greek Salad, no dressing" +75147000,"SPINACH SALAD, NO DRESSING","Spinach salad, no dressing" +75148010,"COBB SALAD, NO DRESSING","Cobb salad, no dressing" +75200100,"VEGETABLES, NS AS TO TYPE, NS AS TO ADDED FAT","Vegetables, NS as to type, cooked, NS as to fat added in cooking" +75200110,"VEGETABLES, NS AS TO TYPE, NO FAT ADDED","Vegetables, NS as to type, cooked, fat not added in cooking" +75200120,"VEGETABLES, NS AS TO TYPE, COOKED, FAT ADDED","Vegetables, NS as to type, cooked, fat added in cooking" +75200600,"ALGAE, DRIED (INCLUDE SPIRULINA)","Algae, dried" +75200700,"ALOE VERA JUICE","Aloe vera juice" +75201000,"ARTICHOKE, GLOBE(FRENCH), CKD, NS FORM, NS FAT ADDED","Artichoke, globe (French), cooked, NS as to form, NS as to fat added in cooking" +75201001,"ARTICHOKE, GLOBE(FRENCH), CKD, FROM FRESH, NS FAT ADDED","Artichoke, globe (French), cooked, from fresh, NS as to fat added in cooking" +75201002,"ARTICHOKE, GLOBE(FRENCH), CKD, FROM FROZ, NS FAT ADDED","Artichoke, globe (French), cooked, from frozen, NS as to fat added in cooking" +75201003,"ARTICHOKE, GLOBE(FRENCH), CKD, FROM CAN, NS FAT ADDED","Artichoke, globe (French), cooked, from canned, NS as to fat added in cooking" +75201010,"ARTICHOKE,GLOBE (FRENCH),CKD,NS FORM,FAT NOT ADDED","Artichoke, globe (French), cooked, NS as to form, fat not added in cooking" +75201011,"ARTICHOKE,GLOBE (FRENCH),CKD,FROM FRESH,FAT NOT ADDED","Artichoke, globe (French), cooked, from fresh, fat not added in cooking" +75201012,"ARTICHOKE,GLOBE (FRENCH),CKD,FROM FROZ,FAT NOT ADDED","Artichoke, globe (French), cooked, from frozen, fat not added in cooking" +75201013,"ARTICHOKE,GLOBE (FRENCH),CKD,FROM CAN,FAT NOT ADDED","Artichoke, globe (French), cooked, from canned, fat not added in cooking" +75201020,"ARTICHOKE, GLOBE (FRENCH), CKD, NS FORM, FAT ADDED","Artichoke, globe (French), cooked, NS as to form, fat added in cooking" +75201021,"ARTICHOKE, GLOBE (FRENCH), CKD, FROM FRESH, FAT ADDED","Artichoke, globe (French), cooked, from fresh, fat added in cooking" +75201022,"ARTICHOKE, GLOBE (FRENCH), CKD, FROM FROZ, FAT ADDED","Artichoke, globe (French), cooked, from frozen, fat added in cooking" +75201023,"ARTICHOKE, GLOBE (FRENCH), CKD, FROM CAN, FAT ADDED","Artichoke, globe (French), cooked, from canned, fat added in cooking" +75201030,"ARTICHOKE SALAD IN OIL","Artichoke salad in oil" +75202000,"ASPARAGUS, COOKED, NS AS TO FORM, NS FAT ADDED","Asparagus, cooked, NS as to form, NS as to fat added in cooking" +75202001,"ASPARAGUS, COOKED, FROM FRESH, NS FAT ADDED","Asparagus, cooked, from fresh, NS as to fat added in cooking" +75202002,"ASPARAGUS, COOKED, FROM FROZEN, NS FAT ADDED","Asparagus, cooked, from frozen, NS as to fat added in cooking" +75202003,"ASPARAGUS, COOKED, FROM CANNED, NS FAT ADDED","Asparagus, cooked, from canned, NS as to fat added in cooking" +75202010,"ASPARAGUS, COOKED, NS AS TO FORM, FAT NOT ADDED","Asparagus, cooked, NS as to form, fat not added in cooking" +75202011,"ASPARAGUS, COOKED, FROM FRESH, FAT NOT ADDED","Asparagus, cooked, from fresh, fat not added in cooking" +75202012,"ASPARAGUS, COOKED, FROM FROZEN, FAT NOT ADDED","Asparagus, cooked, from frozen, fat not added in cooking" +75202013,"ASPARAGUS, COOKED, FROM CANNED, FAT NOT ADDED","Asparagus, cooked, from canned, fat not added in cooking" +75202020,"ASPARAGUS, COOKED, NS AS TO FORM, FAT ADDED","Asparagus, cooked, NS as to form, fat added in cooking" +75202021,"ASPARAGUS, COOKED, FROM FRESH, FAT ADDED","Asparagus, cooked, from fresh, fat added in cooking" +75202022,"ASPARAGUS, COOKED, FROM FROZEN, FAT ADDED","Asparagus, cooked, from frozen, fat added in cooking" +75202023,"ASPARAGUS, COOKED, FROM CANNED, FAT ADDED","Asparagus, cooked, from canned, fat added in cooking" +75203000,"BAMBOO SHOOTS, COOKED, FAT NOT ADDED IN COOKING","Bamboo shoots, cooked, fat not added in cooking" +75203020,"BAMBOO SHOOTS, COOKED, FAT ADDED IN COOKING","Bamboo shoots, cooked, fat added in cooking" +75204000,"BEANS, LIMA, IMMATURE, COOKED, NS FORM, NS AS TO FAT","Beans, lima, immature, cooked, NS as to form, NS as to fat added in cooking" +75204001,"BEANS, LIMA, IMMATURE, COOKED, FROM FRESH, NS FAT ADDED","Beans, lima, immature, cooked, from fresh, NS as to fat added in cooking" +75204002,"BEANS, LIMA, IMMATURE, COOKED, FROM FROZEN, NS FAT ADDED","Beans, lima, immature, cooked, from frozen, NS as to fat added in cooking" +75204003,"BEANS, LIMA, IMMATURE, COOKED, FROM CANNED, NS FAT ADDED","Beans, lima, immature, cooked, from canned, NS as to fat added in cooking" +75204010,"BEANS, LIMA, IMMATURE, COOKED, NS FORM, NO FAT ADDED","Beans, lima, immature, cooked, NS as to form, fat not added in cooking" +75204011,"BEANS, LIMA, IMMATURE, COOKED, FROM FRESH, NO FAT ADDED","Beans, lima, immature, cooked, from fresh, fat not added in cooking" +75204012,"BEANS, LIMA, IMMATURE, COOKED, FROM FROZ, NO FAT ADDED","Beans, lima, immature, cooked, from frozen, fat not added in cooking" +75204013,"BEANS, LIMA, IMMATURE, COOKED, FROM CAN, NO FAT ADDED","Beans, lima, immature, cooked, from canned, fat not added in cooking" +75204020,"BEANS, LIMA, IMMATURE, COOKED, NS FORM, FAT ADDED","Beans, lima, immature, cooked, NS as to form, fat added in cooking" +75204021,"BEANS, LIMA, IMMATURE, COOKED, FROM FRESH, FAT ADDED","Beans, lima, immature, cooked, from fresh, fat added in cooking" +75204022,"BEANS, LIMA, IMMATURE, COOKED, FROM FROZ, FAT ADDED","Beans, lima, immature, cooked, from frozen, fat added in cooking" +75204023,"BEANS, LIMA, IMMATURE, COOKED, FROM CAN, FAT ADDED","Beans, lima, immature, cooked, from canned, fat added in cooking" +75204100,"BEANS, LIMA,IMMATURE,CANNED,LOW SODIUM,NS AS TO FAT","Beans, lima, immature, canned, low sodium, NS as to fat added in cooking" +75204110,"BEANS, LIMA,IMMATURE,CANNED,LOW SODIUM,NO FAT ADDED","Beans, lima, immature, canned, low sodium, fat not added in cooking" +75204120,"BEANS, LIMA, IMMATURE, CANNED,LOW SODIUM, FAT ADDED","Beans, lima, immature, canned, low sodium, fat added in cooking" +75204980,"BEANS, STRING, CKD, NS FORM, NS COLOR, FAT ADDED","Beans, string, cooked, NS as to form, NS as to color, fat added in cooking" +75204981,"BEANS, STRING, CKD, FROM FRESH, NS COLOR, FAT ADDED","Beans, string, cooked, from fresh, NS as to color, fat added in cooking" +75204982,"BEANS, STRING, CKD, FROM FROZ, NS COLOR, FAT ADDED","Beans, string, cooked, from frozen, NS as to color, fat added in cooking" +75204983,"BEANS, STRING, CKD, FROM CAN, NS COLOR, FAT ADDED","Beans, string, cooked, from canned, NS as to color, fat added in cooking" +75204990,"BEANS, STRING, CKD, NS FORM, NS COLOR, NO FAT ADDED","Beans, string, cooked, NS as to form, NS as to color, fat not added in cooking" +75204991,"BEANS, STRING, CKD, FROM FRESH, NS COLOR, NO FAT ADDED","Beans, string, cooked, from fresh, NS as to color, fat not added in cooking" +75204992,"BEANS, STRING, CKD, FROM FROZ, NS COLOR, NO FAT ADDED","Beans, string, cooked, from frozen, NS as to color, fat not added in cooking" +75204993,"BEANS, STRING, CKD, FROM CAN, NS COLOR, NO FAT ADDED","Beans, string, cooked, from canned, NS as to color, fat not added in cooking" +75205000,"BEANS, STRING, CKD, NS FORM, NS COLOR, NS FAT ADDED","Beans, string, cooked, NS as to form, NS as to color, NS as to fat added in cooking" +75205001,"BEANS, STRING, CKD, FROM FRESH, NS COLOR, NS FAT ADDED","Beans, string, cooked, from fresh, NS as to color, NS as to fat added in cooking" +75205002,"BEANS, STRING, CKD, FROM FROZ, NS COLOR, NS FAT ADDED","Beans, string, cooked, from frozen, NS as to color, NS as to fat added in cooking" +75205003,"BEANS, STRING, CKD, FROM CAN, NS COLOR, NS FAT ADDED","Beans, string, cooked, from canned, NS as to color, NS as to fat added in cooking" +75205010,"BEANS, STRING, GREEN, COOKED, NS FORM, NS FAT ADDED","Beans, string, green, cooked, NS as to form, NS as to fat added in cooking" +75205011,"BEANS, STRING, GREEN, COOKED, FROM FRESH, NS FAT ADDED","Beans, string, green, cooked, from fresh, NS as to fat added in cooking" +75205012,"BEANS, STRING, GREEN, COOKED, FROM FROZEN, NS FAT ADDED","Beans, string, green, cooked, from frozen, NS as to fat added in cooking" +75205013,"BEANS, STRING, GREEN, COOKED, FROM CANNED, NS FAT ADDED","Beans, string, green, cooked, from canned, NS as to fat added in cooking" +75205020,"BEANS, STRING, GREEN, COOKED, NS FORM,FAT NOT ADDED","Beans, string, green, cooked, NS as to form, fat not added in cooking" +75205021,"BEANS, STRING, GREEN, COOKED, FROM FRESH, FAT NOT ADDED","Beans, string, green, cooked, from fresh, fat not added in cooking" +75205022,"BEANS, STRING, GREEN, COOKED, FROM FROZEN, FAT NOT ADDED","Beans, string, green, cooked, from frozen, fat not added in cooking" +75205023,"BEANS, STRING, GREEN, COOKED, FROM CANNED, FAT NOT ADDED","Beans, string, green, cooked, from canned, fat not added in cooking" +75205030,"BEANS, STRING, GREEN, COOKED, NS FORM, FAT ADDED","Beans, string, green, cooked, NS as to form, fat added in cooking" +75205031,"BEANS, STRING, GREEN, COOKED, FROM FRESH, FAT ADDED","Beans, string, green, cooked, from fresh, fat added in cooking" +75205032,"BEANS, STRING, GREEN, COOKED, FROM FROZEN, FAT ADDED","Beans, string, green, cooked, from frozen, fat added in cooking" +75205033,"BEANS, STRING, GREEN, COOKED, FROM CANNED, FAT ADDED","Beans, string, green, cooked, from canned, fat added in cooking" +75205110,"BEANS, GREEN, CANNED, LO NA, NS AS TO ADDED FAT","Beans, string, green, canned, low sodium, NS as to fat added in cooking" +75205120,"BEANS, GREEN, CANNED, LO NA, FAT NOT ADDED","Beans, string, green, canned, low sodium, fat not added in cooking" +75205130,"BEANS, GREEN, CANNED, LO NA, FAT ADDED","Beans, string, green, canned, low sodium, fat added in cooking" +75206000,"BEANS, STRING, YELLOW, COOKED, NS FORM, NS ADDED FAT","Beans, string, yellow, cooked, NS as to form, NS as to fat added in cooking" +75206001,"BEANS, STRING, YELLOW, COOKED, FROM FRESH, NS ADDED FAT","Beans, string, yellow, cooked, from fresh, NS as to fat added in cooking" +75206002,"BEANS, STRING, YELLOW, COOKED, FROM FROZ, NS ADDED FAT","Beans, string, yellow, cooked, from frozen, NS as to fat added in cooking" +75206003,"BEANS, STRING, YELLOW, COOKED, FROM CANNED, NS ADDED FAT","Beans, string, yellow, cooked, from canned, NS as to fat added in cooking" +75206010,"BEANS, STRING, YELLOW, COOKED, NS FORM, NO FAT ADDED","Beans, string, yellow, cooked, NS as to form, fat not added in cooking" +75206011,"BEANS, STRING, YELLOW, COOKED, FROM FRESH, NO FAT ADDED","Beans, string, yellow, cooked, from fresh, fat not added in cooking" +75206012,"BEANS, STRING, YELLOW, COOKED, FROM FROZ, NO FAT ADDED","Beans, string, yellow, cooked, from frozen, fat not added in cooking" +75206013,"BEANS, STRING, YELLOW, COOKED, FROM CANNED, NO FAT ADDED","Beans, string, yellow, cooked, from canned, fat not added in cooking" +75206020,"BEANS, STRING, YELLOW, COOKED, NS FORM, FAT ADDED","Beans, string, yellow, cooked, NS as to form, fat added in cooking" +75206021,"BEANS, STRING, YELLOW, COOKED, FROM FRESH, FAT ADDED","Beans, string, yellow, cooked, from fresh, fat added in cooking" +75206022,"BEANS, STRING, YELLOW, COOKED, FROM FROZ, FAT ADDED","Beans, string, yellow, cooked, from frozen, fat added in cooking" +75206023,"BEANS, STRING, YELLOW, COOKED, FROM CAN, FAT ADDED","Beans, string, yellow, cooked, from canned, fat added in cooking" +75207000,"BEAN SPROUTS, COOKED, NS FORM, NS AS TO ADDED FAT","Bean sprouts, cooked, NS as to form, NS as to fat added in cooking" +75207001,"BEAN SPROUTS, COOKED, FROM FRESH, NS FAT ADDED","Bean sprouts, cooked, from fresh, NS as to fat added in cooking" +75207003,"BEAN SPROUTS, COOKED, FROM CANNED, NS FAT ADDED","Bean sprouts, cooked, from canned, NS as to fat added in cooking" +75207010,"BEAN SPROUTS, COOKED, NS AS TO FORM, FAT NOT ADDED","Bean sprouts, cooked, NS as to form, fat not added in cooking" +75207011,"BEAN SPROUTS, COOKED, FROM FRESH, FAT NOT ADDED","Bean sprouts, cooked, from fresh, fat not added in cooking" +75207013,"BEAN SPROUTS, COOKED, FROM CANNED, FAT NOT ADDED","Bean sprouts, cooked, from canned, fat not added in cooking" +75207020,"BEAN SPROUTS, COOKED, NS AS TO FORM, FAT ADDED","Bean sprouts, cooked, NS as to form, fat added in cooking" +75207021,"BEAN SPROUTS, COOKED, FROM FRESH, FAT ADDED","Bean sprouts, cooked, from fresh, fat added in cooking" +75207023,"BEAN SPROUTS, COOKED, FROM CANNED, FAT ADDED","Bean sprouts, cooked, from canned, fat added in cooking" +75208000,"BEETS, COOKED, NS AS TO FORM, NS AS TO FAT ADDED","Beets, cooked, NS as to form, NS as to fat added in cooking" +75208001,"BEETS, COOKED, FROM FRESH, NS AS TO FAT ADDED","Beets, cooked, from fresh, NS as to fat added in cooking" +75208002,"BEETS, COOKED, FROM FROZEN, NS AS TO FAT ADDED","Beets, cooked, from frozen, NS as to fat added in cooking" +75208003,"BEETS, COOKED, FROM CANNED, NS AS TO FAT ADDED","Beets, cooked, from canned, NS as to fat added in cooking" +75208010,"BEETS, COOKED, NS AS TO FORM, FAT NOT ADDED","Beets, cooked, NS as to form, fat not added in cooking" +75208011,"BEETS, COOKED, FROM FRESH, FAT NOT ADDED","Beets, cooked, from fresh, fat not added in cooking" +75208012,"BEETS, COOKED, FROM FROZEN, FAT NOT ADDED","Beets, cooked, from frozen, fat not added in cooking" +75208013,"BEETS, COOKED, FROM CANNED, FAT NOT ADDED","Beets, cooked, from canned, fat not added in cooking" +75208020,"BEETS, COOKED, NS AS TO FORM, FAT ADDED","Beets, cooked, NS as to form, fat added in cooking" +75208021,"BEETS, COOKED, FROM FRESH, FAT ADDED","Beets, cooked, from fresh, fat added in cooking" +75208022,"BEETS, COOKED, FROM FROZEN, FAT ADDED","Beets, cooked, from frozen, fat added in cooking" +75208023,"BEETS, COOKED, FROM CANNED, FAT ADDED","Beets, cooked, from canned, fat added in cooking" +75208100,"BEETS, CANNED, LOW SODIUM, NS AS TO ADDED FAT","Beets, canned, low sodium, NS as to fat added in cooking" +75208110,"BEETS, CANNED, LOW SODIUM, FAT NOT ADDED","Beets, canned, low sodium, fat not added in cooking" +75208120,"BEETS, CANNED, LOW SODIUM, FAT ADDED","Beets, canned, low sodium, fat added in cooking" +75208290,"BITTERMELON, COOKED, NS AS TO ADDED FAT","Bitter melon, cooked, NS as to fat added in cooking" +75208300,"BITTERMELON, COOKED, NO FAT ADDED(INCL BALSAM PEAR)","Bitter melon, cooked, fat not added in cooking" +75208310,"BITTERMELON, COOKED, FAT ADDED (INCL BALSAM PEAR)","Bitter melon, cooked, fat added in cooking" +75208500,"BREADFRUIT, COOKED,FAT NOT ADDED IN COOKING","Breadfruit, cooked, fat not added in cooking" +75208520,"BREADFRUIT, FRIED","Breadfruit, fried" +75208700,"BROCCOFLOWER,COOKED,NS AS TO FAT ADDED IN COOKING","Broccoflower, cooked, NS as to fat added in cooking" +75208710,"BROCCOFLOWER,COOKED,FAT NOT ADDED IN COOKING","Broccoflower, cooked, fat not added in cooking" +75208720,"BROCCOFLOWER,COOKED,FAT ADDED IN COOKING","Broccoflower, cooked, fat added in cooking" +75209000,"BRUSSELS SPROUTS, COOKED, NS FORM, NS ADDED FAT","Brussels sprouts, cooked, NS as to form, NS as to fat added in cooking" +75209001,"BRUSSELS SPROUTS, COOKED, FROM FRESH, NS ADDED FAT","Brussels sprouts, cooked, from fresh, NS as to fat added in cooking" +75209002,"BRUSSELS SPROUTS, COOKED, FROM FROZ, NS ADDED FAT","Brussels sprouts, cooked, from frozen, NS as to fat added in cooking" +75209010,"BRUSSELS SPROUTS, COOKED, NS FORM, FAT NOT ADDED","Brussels sprouts, cooked, NS as to form, fat not added in cooking" +75209011,"BRUSSELS SPROUTS, COOKED, FROM FRESH, FAT NOT ADDED","Brussels sprouts, cooked, from fresh, fat not added in cooking" +75209012,"BRUSSELS SPROUTS, COOKED, FROM FROZ, FAT NOT ADDED","Brussels sprouts, cooked, from frozen, fat not added in cooking" +75209020,"BRUSSELS SPROUTS, COOKED, NS AS TO FORM, FAT ADDED","Brussels sprouts, cooked, NS as to form, fat added in cooking" +75209021,"BRUSSELS SPROUTS, COOKED, FROM FRESH, FAT ADDED","Brussels sprouts, cooked, from fresh, fat added in cooking" +75209022,"BRUSSELS SPROUTS, COOKED, FROM FROZ, FAT ADDED","Brussels sprouts, cooked, from frozen, fat added in cooking" +75209500,"BURDOCK, COOKED, FAT NOT ADDED IN COOKING","Burdock, cooked, fat not added in cooking" +75210000,"CABBAGE, CHINESE, COOKED, NS AS TO ADDED FAT","Cabbage, Chinese, cooked, NS as to fat added in cooking" +75210010,"CABBAGE, CHINESE, COOKED, FAT NOT ADDED","Cabbage, Chinese, cooked, fat not added in cooking" +75210020,"CABBAGE, CHINESE, COOKED, FAT ADDED","Cabbage, Chinese, cooked, fat added in cooking" +75211010,"CABBAGE, GREEN, COOKED, NS FAT","Cabbage, green, cooked, NS as to fat added in cooking" +75211020,"CABBAGE, GREEN, COOKED, FAT NOT ADDED","Cabbage, green, cooked, fat not added in cooking" +75211030,"CABBAGE, GREEN, COOKED, FAT ADDED","Cabbage, green, cooked, fat added in cooking" +75212000,"CABBAGE, RED, COOKED, NS AS TO ADDED FAT","Cabbage, red, cooked, NS as to fat added in cooking" +75212010,"CABBAGE, RED, COOKED, FAT NOT ADDED","Cabbage, red, cooked, fat not added in cooking" +75212020,"CABBAGE, RED, COOKED, FAT ADDED","Cabbage, red, cooked, fat added in cooking" +75213000,"CABBAGE, SAVOY, COOKED, NS AS TO ADDED FAT","Cabbage, savoy, cooked, NS as to fat added in cooking" +75213010,"CABBAGE, SAVOY, COOKED, FAT NOT ADDED","Cabbage, savoy, cooked, fat not added in cooking" +75213020,"CABBAGE, SAVOY, COOKED, FAT ADDED","Cabbage, savoy, cooked, fat added in cooking" +75213100,"CACTUS, COOKED, NS AS TO ADDED FAT","Cactus, cooked, NS as to fat added in cooking" +75213110,"CACTUS, COOKED, FAT NOT ADDED","Cactus, cooked, fat not added in cooking" +75213120,"CACTUS, COOKED, FAT ADDED","Cactus, cooked, fat added in cooking" +75214000,"CAULIFLOWER, COOKED, NS FORM, NS FAT ADDED","Cauliflower, cooked, NS as to form, NS as to fat added in cooking" +75214001,"CAULIFLOWER, COOKED, FROM FRESH, NS FAT ADDED","Cauliflower, cooked, from fresh, NS as to fat added in cooking" +75214002,"CAULIFLOWER, COOKED, FROM FROZEN, NS FAT ADDED","Cauliflower, cooked, from frozen, NS as to fat added in cooking" +75214003,"CAULIFLOWER, COOKED, FROM CANNED, NS FAT ADDED","Cauliflower, cooked, from canned, NS as to fat added in cooking" +75214010,"CAULIFLOWER, COOKED, NS FORM, FAT NOT ADDED","Cauliflower, cooked, NS as to form, fat not added in cooking" +75214011,"CAULIFLOWER, COOKED, FROM FRESH, FAT NOT ADDED","Cauliflower, cooked, from fresh, fat not added in cooking" +75214012,"CAULIFLOWER, COOKED, FROM FROZEN, FAT NOT ADDED","Cauliflower, cooked, from frozen, fat not added in cooking" +75214013,"CAULIFLOWER, COOKED, FROM CANNED, FAT NOT ADDED","Cauliflower, cooked, from canned, fat not added in cooking" +75214020,"CAULIFLOWER, COOKED, NS AS TO FORM, FAT ADDED","Cauliflower, cooked, NS as to form, fat added in cooking" +75214021,"CAULIFLOWER, COOKED, FROM FRESH, FAT ADDED","Cauliflower, cooked, from fresh, fat added in cooking" +75214022,"CAULIFLOWER, COOKED, FROM FROZEN, FAT ADDED","Cauliflower, cooked, from frozen, fat added in cooking" +75214023,"CAULIFLOWER, COOKED, FROM CANNED, FAT ADDED","Cauliflower, cooked, from canned, fat added in cooking" +75215000,"CELERY, COOKED, NS AS TO ADDED FAT","Celery, cooked, NS as to fat added in cooking" +75215010,"CELERY, COOKED, FAT NOT ADDED","Celery, cooked, fat not added in cooking" +75215020,"CELERY, COOKED, FAT ADDED","Celery, cooked, fat added in cooking" +75215100,"FENNEL BULB, COOKED, NS AS TO FAT ADDED","Fennel bulb, cooked, NS as to fat added in cooking" +75215110,"FENNEL BULB, COOKED, FAT NOT ADDED IN COOKING","Fennel bulb, cooked, fat not added in cooking" +75215120,"FENNEL BULB, COOKED, FAT ADDED IN COOKING","Fennel bulb, cooked, fat added in cooking" +75215510,"CHRISTOPHINE, COOKED, FAT NOT ADDED IN COOKING","Christophine, cooked, fat not added in cooking" +75216000,"CORN, COOKED, NS FORM, NS COLOR. NS FAT ADDED","Corn, cooked, NS as to form, NS as to color, NS as to fat added in cooking" +75216001,"CORN, COOKED, FROM FRESH, NS COLOR. NS FAT ADDED","Corn, cooked, from fresh, NS as to color, NS as to fat added in cooking" +75216002,"CORN, COOKED, FROM FROZEN, NS COLOR. NS FAT ADDED","Corn, cooked, from frozen, NS as to color, NS as to fat added in cooking" +75216003,"CORN, COOKED, FROM CANNED, NS COLOR, NS FAT ADDED","Corn, cooked, from canned, NS as to color, NS as to fat added in cooking" +75216010,"CORN, COOKED, NS FORM, NS COLOR, FAT NOT ADDED","Corn, cooked, NS as to form, NS as to color, fat not added in cooking" +75216011,"CORN, COOKED, FROM FRESH, NS COLOR, FAT NOT ADDED","Corn, cooked, from fresh, NS as to color, fat not added in cooking" +75216012,"CORN, COOKED, FROM FROZEN, NS COLOR, FAT NOT ADDED","Corn, cooked, from frozen, NS as to color, fat not added in cooking" +75216013,"CORN, COOKED, FROM CANNED, NS COLOR, FAT NOT ADDED","Corn, cooked, from canned, NS as to color, fat not added in cooking" +75216020,"CORN, COOKED, NS FORM, NS COLOR, FAT ADDED","Corn, cooked, NS as to form, NS as to color, fat added in cooking" +75216021,"CORN, COOKED, FROM FRESH, NS COLOR, FAT ADDED","Corn, cooked, from fresh, NS as to color, fat added in cooking" +75216022,"CORN, COOKED, FROM FROZEN, NS COLOR, FAT ADDED","Corn, cooked, from frozen, NS as to color, fat added in cooking" +75216023,"CORN, COOKED, FROM CANNED, NS COLOR, FAT ADDED","Corn, cooked, from canned, NS as to color, fat added in cooking" +75216050,"CORN, NS AS TO FORM, NS AS TO COLOR, CREAM STYLE","Corn, NS as to form, NS as to color, cream style" +75216053,"CORN, FROM CANNED, NS AS TO COLOR, CREAM STYLE","Corn, from canned, NS as to color, cream style" +75216070,"CORN, DRIED, COOKED","Corn, dried, cooked" +75216100,"CORN, YELLOW, COOKED, NS FORM, NS FAT ADDED","Corn, yellow, cooked, NS as to form, NS as to fat added in cooking" +75216101,"CORN, YELLOW, COOKED, FROM FRESH, NS FAT ADDED","Corn, yellow, cooked, from fresh, NS as to fat added in cooking" +75216102,"CORN, YELLOW, COOKED, FROM FROZEN, NS FAT ADDED","Corn, yellow, cooked, from frozen, NS as to fat added in cooking" +75216103,"CORN, YELLOW, COOKED, FROM CANNED, NS FAT ADDED","Corn, yellow, cooked, from canned, NS as to fat added in cooking" +75216110,"CORN, YELLOW, COOKED, NS FORM, FAT NOT ADDED","Corn, yellow, cooked, NS as to form, fat not added in cooking" +75216111,"CORN, YELLOW, COOKED, FROM FRESH, FAT NOT ADDED","Corn, yellow, cooked, from fresh, fat not added in cooking" +75216112,"CORN, YELLOW, COOKED, FROM FROZEN, FAT NOT ADDED","Corn, yellow, cooked, from frozen, fat not added in cooking" +75216113,"CORN, YELLOW, COOKED, FROM CANNED, FAT NOT ADDED","Corn, yellow, cooked, from canned, fat not added in cooking" +75216120,"CORN, YELLOW, COOKED, NS FORM, FAT ADDED","Corn, yellow, cooked, NS as to form, fat added in cooking" +75216121,"CORN, YELLOW, COOKED, FROM FRESH, FAT ADDED","Corn, yellow, cooked, from fresh, fat added in cooking" +75216122,"CORN, YELLOW, COOKED, FROM FROZEN, FAT ADDED","Corn, yellow, cooked, from frozen, fat added in cooking" +75216123,"CORN, YELLOW, COOKED, FROM CANNED, FAT ADDED","Corn, yellow, cooked, from canned, fat added in cooking" +75216150,"CORN, YELLOW, NS AS TO FORM, CREAM STYLE","Corn, yellow, NS as to form, cream style" +75216153,"CORN, YELLOW, FROM CANNED, CREAM STYLE","Corn, yellow, from canned, cream style" +75216160,"CORN, YELLOW & WHITE, COOKED, NS FORM, NS FAT ADDED","Corn, yellow and white, cooked, NS as to form, NS as to fat added in cooking" +75216161,"CORN, YELLOW & WHITE, COOKED, FROM FRESH, NS FAT ADDED","Corn, yellow and white, cooked, from fresh, NS as to fat added in cooking" +75216162,"CORN, YELLOW & WHITE, COOKED, FROM FROZ, NS FAT ADDED","Corn, yellow and white, cooked, from frozen, NS as to fat added in cooking" +75216163,"CORN, YELLOW & WHITE, COOKED, FROM CAN, NS FAT ADDED","Corn, yellow and white, cooked, from canned, NS as to fat added in cooking" +75216170,"CORN, YELLOW & WHITE, COOKED, NS FORM, NO FAT ADDED","Corn, yellow and white, cooked, NS as to form, fat not added in cooking" +75216171,"CORN, YELLOW & WHITE, COOKED, FROM FRESH, NO FAT ADDED","Corn, yellow and white, cooked, from fresh, fat not added in cooking" +75216172,"CORN, YELLOW & WHITE, COOKED, FROM FROZ, NO FAT ADDED","Corn, yellow and white, cooked, from frozen, fat not added in cooking" +75216173,"CORN, YELLOW & WHITE, COOKED, FROM CAN, NO FAT ADDED","Corn, yellow and white, cooked, from canned, fat not added in cooking" +75216180,"CORN, YELLOW & WHITE, COOKED, NS FORM, FAT ADDED","Corn, yellow and white, cooked, NS as to form, fat added in cooking" +75216181,"CORN, YELLOW & WHITE, COOKED, FROM FRESH, FAT ADDED","Corn, yellow and white, cooked, from fresh, fat added in cooking" +75216182,"CORN, YELLOW & WHITE, COOKED, FROM FROZ, FAT ADDED","Corn, yellow and white, cooked, from frozen, fat added in cooking" +75216183,"CORN, YELLOW & WHITE, COOKED, FROM CAN, FAT ADDED","Corn, yellow and white, cooked, from canned, fat added in cooking" +75216190,"CORN, YELLOW, NS AS TO FORM, CREAM STYLE, FAT ADDED","Corn, yellow, NS as to form, cream style, fat added in cooking" +75216193,"CORN, YELLOW, FROM CANNED, CREAM STYLE, FAT ADDED","Corn, yellow, from canned, cream style, fat added in cooking" +75216200,"CORN, WHITE, COOKED, NS FORM, NS FAT ADDED","Corn, white, cooked, NS as to form, NS as to fat added in cooking" +75216201,"CORN, WHITE, COOKED, FROM FRESH, NS FAT ADDED","Corn, white, cooked, from fresh, NS as to fat added in cooking" +75216202,"CORN, WHITE, COOKED, FROM FROZEN, NS FAT ADDED","Corn, white, cooked, from frozen, NS as to fat added in cooking" +75216203,"CORN, WHITE, COOKED, FROM CANNED, NS FAT ADDED","Corn, white, cooked, from canned, NS as to fat added in cooking" +75216210,"CORN, WHITE, COOKED, NS AS TO FORM, FAT NOT ADDED","Corn, white, cooked, NS as to form, fat not added in cooking" +75216211,"CORN, WHITE, COOKED, FROM FRESH, FAT NOT ADDED","Corn, white, cooked, from fresh, fat not added in cooking" +75216212,"CORN, WHITE, COOKED, FROM FROZEN, FAT NOT ADDED","Corn, white, cooked, from frozen, fat not added in cooking" +75216213,"CORN, WHITE, COOKED, FROM CANNED, FAT NOT ADDED","Corn, white, cooked, from canned, fat not added in cooking" +75216220,"CORN, WHITE, COOKED, NS AS TO FORM, FAT ADDED","Corn, white, cooked, NS as to form, fat added in cooking" +75216221,"CORN, WHITE, COOKED, FROM FRESH, FAT ADDED","Corn, white, cooked, from fresh, fat added in cooking" +75216222,"CORN, WHITE, COOKED, FROM FROZEN, FAT ADDED","Corn, white, cooked, from frozen, fat added in cooking" +75216223,"CORN, WHITE, COOKED, FROM CANNED, FAT ADDED","Corn, white, cooked, from canned, fat added in cooking" +75216250,"CORN, WHITE, NS AS TO FORM, CREAM STYLE","Corn, white, NS as to form, cream style" +75216253,"CORN, WHITE, FROM CANNED, CREAM STYLE","Corn, white, from canned, cream style" +75216300,"CORN, YELLOW, CANNED, LO NA, NS AS TO ADDED FAT","Corn, yellow, canned, low sodium, NS as to fat added in cooking" +75216310,"CORN, YELLOW, CANNED, LOW SODIUM, FAT NOT ADDED","Corn, yellow, canned, low sodium, fat not added in cooking" +75216320,"CORN, YELLOW, CANNED, LOW SODIUM, FAT ADDED","Corn, yellow, canned, low sodium, fat added in cooking" +75216700,"CUCUMBER, COOKED, NS AS TO ADDED FAT","Cucumber, cooked, NS as to fat added in cooking" +75216710,"CUCUMBER, COOKED, FAT NOT ADDED","Cucumber, cooked, fat not added in cooking" +75216720,"CUCUMBER, COOKED, FAT ADDED","Cucumber, cooked, fat added in cooking" +75217000,"EGGPLANT, COOKED, NS AS TO ADDED FAT","Eggplant, cooked, NS as to fat added in cooking" +75217010,"EGGPLANT, COOKED, FAT NOT ADDED","Eggplant, cooked, fat not added in cooking" +75217020,"EGGPLANT, COOKED, FAT ADDED","Eggplant, cooked, fat added in cooking" +75217300,"FLOWERS / BLOSSOMS OF SESBANIA/LILY/SQUASH, NO FAT","Flowers or blossoms of sesbania, squash, or lily, fat not added in cooking" +75217400,"GARLIC, COOKED","Garlic, cooked" +75217490,"HOMINY, COOKED, NS AS TO ADDED FAT","Hominy, cooked, NS as to fat added in cooking" +75217500,"HOMINY, COOKED, NO FAT ADDED","Hominy, cooked, fat not added in cooking" +75217520,"HOMINY, COOKED, FAT ADDED","Hominy, cooked, fat added in cooking" +75218010,"KOHLRABI, COOKED,FAT NOT ADDED IN COOKING","Kohlrabi, cooked, fat not added in cooking" +75218400,"LEEK, COOKED, NS AS TO FAT ADDED IN COOKING","Leek, cooked, NS as to fat added in cooking" +75218500,"LOTUS ROOT, COOKED, FAT NOT ADDED IN COOKING","Lotus root, cooked, fat not added in cooking" +75219000,"MUSHROOMS, COOKED, NS FORM, NS AS TO ADDED FAT","Mushrooms, cooked, NS as to form, NS as to fat added in cooking" +75219001,"MUSHROOMS, COOKED, FROM FRESH, NS FAT ADDED","Mushrooms, cooked, from fresh, NS as to fat added in cooking" +75219002,"MUSHROOMS, COOKED, FROM FROZ, NS FAT ADDED","Mushrooms, cooked, from frozen, NS as to fat added in cooking" +75219003,"MUSHROOMS, COOKED, FROM CANNED, NS FAT ADDED","Mushrooms, cooked, from canned, NS as to fat added in cooking" +75219010,"MUSHROOMS, COOKED, NS AS TO FORM, FAT NOT ADDED","Mushrooms, cooked, NS as to form, fat not added in cooking" +75219011,"MUSHROOMS, COOKED, FROM FRESH, FAT NOT ADDED","Mushrooms, cooked, from fresh, fat not added in cooking" +75219012,"MUSHROOMS, COOKED, FROM FROZ, FAT NOT ADDED","Mushrooms, cooked, from frozen, fat not added in cooking" +75219013,"MUSHROOMS, COOKED, FROM CANNED, FAT NOT ADDED","Mushrooms, cooked, from canned, fat not added in cooking" +75219020,"MUSHROOMS, COOKED, NS AS TO FORM, FAT ADDED","Mushrooms, cooked, NS as to form, fat added in cooking" +75219021,"MUSHROOMS, COOKED, FROM FRESH, FAT ADDED","Mushrooms, cooked, from fresh, fat added in cooking" +75219022,"MUSHROOMS, COOKED, FROM FROZ, FAT ADDED","Mushrooms, cooked, from frozen, fat added in cooking" +75219023,"MUSHROOMS, COOKED, FROM CANNED, FAT ADDED","Mushrooms, cooked, from canned, fat added in cooking" +75219100,"MUSHROOM, ASIAN, COOKED, FROM DRIED","Mushroom, Asian, cooked, from dried" +75220000,"OKRA, COOKED, NS FORM, NS FAT ADDED","Okra, cooked, NS as to form, NS as to fat added in cooking" +75220001,"OKRA, COOKED, FROM FRESH, NS FAT ADDED","Okra, cooked, from fresh, NS as to fat added in cooking" +75220002,"OKRA, COOKED, FROM FROZ, NS FAT ADDED","Okra, cooked, from frozen, NS as to fat added in cooking" +75220003,"OKRA, COOKED, FROM CANNED, NS FAT ADDED","Okra, cooked, from canned, NS as to fat added in cooking" +75220010,"OKRA, COOKED, NS FORM, FAT NOT ADDED","Okra, cooked, NS as to form, fat not added in cooking" +75220011,"OKRA, COOKED, FROM FRESH, FAT NOT ADDED","Okra, cooked, from fresh, fat not added in cooking" +75220012,"OKRA, COOKED, FROM FROZ, FAT NOT ADDED","Okra, cooked, from frozen, fat not added in cooking" +75220013,"OKRA, COOKED, FROM CANNED, FAT NOT ADDED","Okra, cooked, from canned, fat not added in cooking" +75220020,"OKRA, COOKED, NS FORM, FAT ADDED","Okra, cooked, NS as to form, fat added in cooking" +75220021,"OKRA, COOKED, FROM FRESH, FAT ADDED","Okra, cooked, from fresh, fat added in cooking" +75220022,"OKRA, COOKED, FROM FROZ, FAT ADDED","Okra, cooked, from frozen, fat added in cooking" +75220023,"OKRA, COOKED, FROM CANNED, FAT ADDED","Okra, cooked, from canned, fat added in cooking" +75220050,"LETTUCE, COOKED, FAT NOT ADDED IN COOKING","Lettuce, cooked, fat not added in cooking" +75220100,"LUFFA (CHINESE OKRA), COOKED, NO FAT ADDED","Luffa (Chinese okra), cooked, fat not added in cooking" +75221000,"ONIONS, MATURE, COOKED, NS FORM, NS AS TO ADDED FAT","Onions, mature, cooked, NS as to form, NS as to fat added in cooking" +75221001,"ONIONS, MATURE, COOKED, FROM FRESH, NS FAT ADDED","Onions, mature, cooked, from fresh, NS as to fat added in cooking" +75221002,"ONIONS, MATURE, COOKED, FROM FROZ, NS FAT ADDED","Onions, mature, cooked, from frozen, NS as to fat added in cooking" +75221010,"ONIONS, MATURE, COOKED, NS FORM, FAT NOT ADDED","Onions, mature, cooked, NS as to form, fat not added in cooking" +75221011,"ONIONS, MATURE, COOKED, FROM FRESH, FAT NOT ADDED","Onions, mature, cooked, from fresh, fat not added in cooking" +75221012,"ONIONS, MATURE, COOKED, FROM FROZ, FAT NOT ADDED","Onions, mature, cooked, from frozen, fat not added in cooking" +75221020,"ONIONS, MATURE, COOKED, NS AS TO FORM, FAT ADDED","Onions, mature, cooked or sauteed, NS as to form, fat added in cooking" +75221021,"ONIONS, MATURE, COOKED, FROM FRESH, FAT ADDED","Onions, mature, cooked or sauteed, from fresh, fat added in cooking" +75221022,"ONIONS, MATURE, COOKED, FROM FROZ, FAT ADDED","Onions, mature, cooked or sauteed, from frozen, fat added in cooking" +75221030,"ONIONS, PEARL, COOKED, NS FORM (INCL PICKLED/COCKTAIL)","Onions, pearl, cooked, NS as to form" +75221031,"ONIONS, PEARL, COOKED, FROM FRESH (INCL PICKLED/COCKTAIL)","Onions, pearl, cooked, from fresh" +75221032,"ONIONS, PEARL, COOKED, FROM FROZ (INCL PICKLED/COCKTAIL)","Onions, pearl, cooked, from frozen" +75221033,"ONIONS, PEARL, COOKED, FROM CAN (INCL PICKLED/COCKTAIL)","Onions, pearl, cooked, from canned" +75221040,"ONIONS, YOUNG GREEN, COOKED, NS FORM, NS ADDED FAT","Onion, young green, cooked, NS as to form, NS as to fat added in cooking" +75221041,"ONIONS, YOUNG GREEN, COOKED, FROM FRESH, NS ADDED FAT","Onion, young green, cooked, from fresh, NS as to fat added in cooking" +75221050,"ONIONS, YOUNG GREEN, COOKED, NS FORM, NO FAT ADDED","Onions, young green, cooked, NS as to form, fat not added in cooking" +75221051,"ONIONS, YOUNG GREEN, COOKED, FROM FRESH, NO FAT ADDED","Onions, young green, cooked, from fresh, fat not added in cooking" +75221060,"ONIONS, YOUNG GREEN, COOKED, NS FORM, FAT ADDED","Onion, young green, cooked, NS as to form, fat added in cooking" +75221061,"ONIONS, YOUNG GREEN, COOKED, FROM FRESH, FAT ADDED","Onion, young green, cooked, from fresh, fat added in cooking" +75221100,"ONIONS, DEHYDRATED","Onion, dehydrated" +75221160,"PALM HEARTS, COOKED (ASSUME NO FAT ADDED)","Palm hearts, cooked (assume fat not added in cooking)" +75221210,"PARSLEY, COOKED (ASSUME NO FAT ADDED)","Parsley, cooked (assume fat not added in cooking)" +75222000,"PARSNIPS, COOKED, NS AS TO ADDED FAT","Parsnips, cooked, NS as to fat added in cooking" +75222010,"PARSNIPS, COOKED, FAT NOT ADDED","Parsnips, cooked, fat not added in cooking" +75222020,"PARSNIPS, COOKED, FAT ADDED","Parsnips, cooked, fat added in cooking" +75223000,"PEAS,COW/FIELD/BLACKEYE,NOT DRY,CKD,NS FORM,NS FAT","Peas, cowpeas, field peas, or blackeye peas (not dried), cooked, NS as to form, NS as to fat added in cooking" +75223001,"PEAS,COW/FIELD/BLACKEYE,NOT DRY,CKD, FROM FRESH,NS FAT","Peas, cowpeas, field peas, or blackeye peas (not dried), cooked, from fresh, NS as to fat added in cooking" +75223002,"PEAS,COW/FIELD/BLACKEYE,NOT DRY,CKD, FROM FROZ,NS FAT","Peas, cowpeas, field peas, or blackeye peas (not dried), cooked, from frozen, NS as to fat added in cooking" +75223003,"PEAS,COW/FIELD/BLACKEYE,NOT DRY,CKD, FROM CAN,NS FAT","Peas, cowpeas, field peas, or blackeye peas (not dried), cooked, from canned, NS as to fat added in cooking" +75223010,"PEAS,COW/FIELD/BLACKEYE,NOT DRY,CKD,NS FORM,NO FAT","Peas, cowpeas, field peas, or blackeye peas (not dried), cooked, NS as to form, fat not added in cooking" +75223011,"PEAS,COW/FIELD/BLACKEYE,NOT DRY,CKD,FROM FRESH,NO FAT","Peas, cowpeas, field peas, or blackeye peas (not dried), cooked, from fresh, fat not added in cooking" +75223012,"PEAS,COW/FIELD/BLACKEYE,NOT DRY,CKD,FROM FROZ,NO FAT","Peas, cowpeas, field peas, or blackeye peas (not dried), cooked, from frozen, fat not added in cooking" +75223013,"PEAS,COW/FIELD/BLACKEYE,NOT DRY,CKD,FROM CAN,NO FAT","Peas, cowpeas, field peas, or blackeye peas (not dried), cooked, from canned, fat not added in cooking" +75223020,"PEAS,COW/FIELD/BLACKEYE,NOT DRY,CKD,NS FORM,W/ FAT","Peas, cowpeas, field peas, or blackeye peas (not dried), cooked, NS as to form, fat added in cooking" +75223021,"PEAS,COW/FIELD/BLACKEYE,NOT DRY,CKD,FROM FRESH,W/ FAT","Peas, cowpeas, field peas, or blackeye peas (not dried), cooked, from fresh, fat added in cooking" +75223022,"PEAS,COW/FIELD/BLACKEYE,NOT DRY,CKD,FROM FROZ,W/ FAT","Peas, cowpeas, field peas, or blackeye peas (not dried), cooked, from frozen, fat added in cooking" +75223023,"PEAS,COW/FIELD/BLACKEYE,NOT DRY,CKD,FROM CAN,W/ FAT","Peas, cowpeas, field peas, or blackeye peas (not dried), cooked, from canned, fat added in cooking" +75224010,"PEAS, GREEN, COOKED, NS FORM, NS AS TO ADDED FAT","Peas, green, cooked, NS as to form, NS as to fat added in cooking" +75224011,"PEAS, GREEN, COOKED, FROM FRESH, NS FAT ADDED","Peas, green, cooked, from fresh, NS as to fat added in cooking" +75224012,"PEAS, GREEN, COOKED, FROM FROZ, NS FAT ADDED","Peas, green, cooked, from frozen, NS as to fat added in cooking" +75224013,"PEAS, GREEN, COOKED, FROM CANNED, NS FAT ADDED","Peas, green, cooked, from canned, NS as to fat added in cooking" +75224020,"PEAS, GREEN, COOKED, NS AS TO FORM, FAT NOT ADDED","Peas, green, cooked, NS as to form, fat not added in cooking" +75224021,"PEAS, GREEN, COOKED, FROM FRESH, FAT NOT ADDED","Peas, green, cooked, from fresh, fat not added in cooking" +75224022,"PEAS, GREEN, COOKED, FROM FROZ, FAT NOT ADDED","Peas, green, cooked, from frozen, fat not added in cooking" +75224023,"PEAS, GREEN, COOKED, FROM CANNED, FAT NOT ADDED","Peas, green, cooked, from canned, fat not added in cooking" +75224030,"PEAS, GREEN, COOKED, NS AS TO FORM, FAT ADDED","Peas, green, cooked, NS as to form, fat added in cooking" +75224031,"PEAS, GREEN, COOKED, FROM FRESH, FAT ADDED","Peas, green, cooked, from fresh, fat added in cooking" +75224032,"PEAS, GREEN, COOKED, FROM FROZ, FAT ADDED","Peas, green, cooked, from frozen, fat added in cooking" +75224033,"PEAS, GREEN, COOKED, FROM CANNED, FAT ADDED","Peas, green, cooked, from canned, fat added in cooking" +75224110,"PEAS, GREEN, CANNED, LOW SODIUM, NS AS TO ADDED FAT","Peas, green, canned, low sodium, NS as to fat added in cooking" +75224120,"PEAS, GREEN, CANNED, LOW SODIUM, FAT NOT ADDED","Peas, green, canned, low sodium, fat not added in cooking" +75224130,"PEAS, GREEN, CANNED, LOW SODIUM, FAT ADDED","Peas, green, canned, low sodium, fat added in cooking" +75225010,"PIGEON PEAS, COOKED, NS AS TO FORM, FAT NOT ADDED","Pigeon peas, cooked, NS as to form, fat not added in cooking" +75225011,"PIGEON PEAS, COOKED, FROM FRESH, FAT NOT ADDED","Pigeon peas, cooked, from fresh, fat not added in cooking" +75225013,"PIGEON PEAS, COOKED, FROM CANNED, FAT NOT ADDED","Pigeon peas, cooked, from canned, fat not added in cooking" +75226000,"PEPPERS, GREEN, COOKED, NS AS TO FAT","Peppers, green, cooked, NS as to fat added in cooking" +75226010,"PEPPERS, GREEN, COOKED, FAT NOT ADDED","Peppers, green, cooked, fat not added in cooking" +75226020,"PEPPERS, GREEN, COOKED, FAT ADDED","Peppers, green, cooked, fat added in cooking" +75226040,"PEPPERS, RED, COOKED, NS AS TO ADDED FAT","Peppers, red, cooked, NS as to fat added in cooking" +75226050,"PEPPERS, RED, COOKED, FAT NOT ADDED","Peppers, red, cooked, fat not added in cooking" +75226060,"PEPPERS, RED, COOKED, FAT ADDED","Peppers, red, cooked, fat added in cooking" +75226090,"PEPPERS, HOT, COOKED, NS FORM, NS FAT ADDED","Peppers, hot, cooked, NS as to form, NS as to fat added in cooking" +75226091,"PEPPERS, HOT, COOKED, FROM FRESH, NS FAT ADDED","Peppers, hot, cooked, from fresh, NS as to fat added in cooking" +75226092,"PEPPERS, HOT, COOKED, FROM FROZ, NS FAT ADDED","Peppers, hot, cooked, from frozen, NS as to fat added in cooking" +75226093,"PEPPERS, HOT, COOKED, FROM CANNED, NS FAT ADDED","Peppers, hot, cooked, from canned, NS as to fat added in cooking" +75226100,"PEPPERS, HOT, COOKED, NS FORM, NO FAT ADDED","Peppers, hot, cooked, NS as to form, fat not added in cooking" +75226101,"PEPPERS, HOT, COOKED, FROM FRESH, NO FAT ADDED","Peppers, hot, cooked, from fresh, fat not added in cooking" +75226102,"PEPPERS, HOT, COOKED, FROM FROZ, NO FAT ADDED","Peppers, hot, cooked, from frozen, fat not added in cooking" +75226103,"PEPPERS, HOT, COOKED, FROM CANNED, NO FAT ADDED","Peppers, hot, cooked, from canned, fat not added in cooking" +75226110,"PEPPERS, HOT, COOKED, NS FORM, FAT ADDED","Peppers, hot, cooked, NS as to form, fat added in cooking" +75226111,"PEPPERS, HOT, COOKED, FROM FRESH, FAT ADDED","Peppers, hot, cooked, from fresh, fat added in cooking" +75226112,"PEPPERS, HOT, COOKED, FROM FROZ, FAT ADDED","Peppers, hot, cooked, from frozen, fat added in cooking" +75226113,"PEPPERS, HOT, COOKED, FROM CANNED, FAT ADDED","Peppers, hot, cooked, from canned, fat added in cooking" +75226700,"PIMIENTO","Pimiento" +75227100,"RADISH, JAPANESE (DAIKON), COOKED, NO FAT ADDED","Radish, Japanese (daikon), cooked, fat not added in cooking" +75227110,"RADISH, JAPANESE (DAIKON), COOKED, FAT ADDED","Radish, Japanese (daikon), cooked, fat added in cooking" +75228000,"RUTABAGA, COOKED, NS AS TO ADDED FAT","Rutabaga, cooked, NS as to fat added in cooking" +75228010,"RUTABAGA, COOKED, FAT NOT ADDED","Rutabaga, cooked, fat not added in cooking" +75228020,"RUTABAGA, COOKED, FAT ADDED","Rutabaga, cooked, fat added in cooking" +75229010,"SALSIFY (VEGETABLE OYSTER), COOKED, NO FAT ADDED","Salsify (vegetable oyster), cooked, fat not added in cooking" +75230000,"SAUERKRAUT, NS AS TO ADDED FAT","Sauerkraut, cooked, NS as to fat added in cooking" +75230010,"SAUERKRAUT, NO FAT ADDED","Sauerkraut, cooked, fat not added in cooking" +75230020,"SAUERKRAUT, FAT ADDED","Sauerkraut, cooked, fat added in cooking" +75230100,"SAUERKRAUT, CANNED, LO NA","Sauerkraut, canned, low sodium" +75231000,"SNOWPEA(PEA POD), COOKED, NS FORM, NS AS TO FAT","Snowpea (pea pod), cooked, NS as to form, NS as to fat added in cooking" +75231001,"SNOWPEA(PEA POD), COOKED, FROM FRESH, NS AS TO FAT","Snowpea (pea pod), cooked, from fresh, NS as to fat added in cooking" +75231002,"SNOWPEA(PEA POD), COOKED, FROM FROZEN, NS AS TO FAT","Snowpea (pea pod), cooked, from frozen, NS as to fat added in cooking" +75231010,"SNOWPEA(PEA POD), COOKED, NS FORM, NO FAT ADDED","Snowpea (pea pod), cooked, NS as to form, fat not added in cooking" +75231011,"SNOWPEA(PEA POD), COOKED, FROM FRESH, NO FAT ADDED","Snowpea (pea pod), cooked, from fresh, fat not added in cooking" +75231012,"SNOWPEA(PEA POD), COOKED, FROM FROZ, NO FAT ADDED","Snowpea (pea pod), cooked, from frozen, fat not added in cooking" +75231020,"SNOWPEA(PEA POD), COOKED, NS AS TO FORM, FAT ADDED","Snowpea (pea pod), cooked, NS as to form, fat added in cooking" +75231021,"SNOWPEA(PEA POD), COOKED, FROM FRESH, FAT ADDED","Snowpea (pea pod), cooked, from fresh, fat added in cooking" +75231022,"SNOWPEA(PEA POD), COOKED, FROM FROZ, FAT ADDED","Snowpea (pea pod), cooked, from frozen, fat added in cooking" +75232000,"SEAWEED, DRIED","Seaweed, dried" +75232050,"SEAWEED, PREPARED W/ SOY SAUCE","Seaweed, prepared with soy sauce" +75232100,"SEAWEED, COOKED, NS AS TO FAT ADDED IN COOKING","Seaweed, cooked, NS as to fat added in cooking" +75232110,"SEAWEED, COOKED, FAT NOT ADDED IN COOKING","Seaweed, cooked, fat not added in cooking" +75232120,"SEAWEED, COOKED, FAT ADDED IN COOKING","Seaweed, cooked, fat added in cooking" +75233000,"SQUASH, SUMMER, COOKED, NS FORM, NS AS TO ADDED FAT","Squash, summer, cooked, NS as to form, NS as to fat added in cooking" +75233001,"SQUASH, SUMMER, COOKED, FROM FRESH, NS FAT ADDED","Squash, summer, cooked, from fresh, NS as to fat added in cooking" +75233002,"SQUASH, SUMMER, COOKED, FROM FROZ, NS FAT ADDED","Squash, summer, cooked, from frozen, NS as to fat added in cooking" +75233003,"SQUASH, SUMMER, COOKED, FROM CANNED, NS FAT ADDED","Squash, summer, cooked, from canned, NS as to fat added in cooking" +75233010,"SQUASH, SUMMER, COOKED, NS FORM, FAT NOT ADDED","Squash, summer, cooked, NS as to form, fat not added in cooking" +75233011,"SQUASH, SUMMER,YELLOW OR GREEN, CKD, FRESH, FAT NOT ADDED","Squash, summer,yellow or green, cooked, from fresh, fat not added in cooking" +75233012,"SQUASH, SUMMER, COOKED, FROM FROZ, FAT NOT ADDED","Squash, summer, cooked, from frozen, fat not added in cooking" +75233013,"SQUASH, SUMMER, COOKED, FROM CANNED, FAT NOT ADDED","Squash, summer, cooked, from canned, fat not added in cooking" +75233020,"SQUASH, SUMMER, COOKED, NS AS TO FORM, FAT ADDED","Squash, summer, cooked, NS as to form, fat added in cooking" +75233021,"SQUASH, SUMMER, COOKED, FROM FRESH, FAT ADDED","Squash, summer, cooked, from fresh, fat added in cooking" +75233022,"SQUASH, SUMMER, COOKED, FROM FROZ, FAT ADDED","Squash, summer, cooked, from frozen, fat added in cooking" +75233023,"SQUASH, SUMMER, COOKED, FROM CANNED, FAT ADDED","Squash, summer, cooked, from canned, fat added in cooking" +75233200,"SQUASH, SPAGHETTI, NS AS TO ADDED FAT","Squash, spaghetti, cooked, NS as to fat added in cooking" +75233210,"SQUASH, SPAGHETTI, FAT ADDED","Squash, spaghetti, cooked, fat added in cooking" +75233220,"SQUASH, SPAGHETTI, NO FAT ADDED","Squash, spaghetti, cooked, fat not added in cooking" +75233510,"SEQUIN (PORTUGUESE SQUASH), COOKED, NO FAT ADDED","Sequin (Portuguese squash), cooked, fat not added in cooking" +75234000,"TURNIP, COOKED, NS AS TO FORM, NS AS TO ADDED FAT","Turnip, cooked, NS as to form, NS as to fat added in cooking" +75234001,"TURNIP, COOKED, FROM FRESH, NS AS TO ADDED FAT","Turnip, cooked, from fresh, NS as to fat added in cooking" +75234002,"TURNIP, COOKED, FROM FROZ, NS AS TO ADDED FAT","Turnip, cooked, from frozen, NS as to fat added in cooking" +75234003,"TURNIP, COOKED, FROM CAN, NS AS TO ADDED FAT","Turnip, cooked, from canned, NS as to fat added in cooking" +75234010,"TURNIP, COOKED, NS AS TO FORM, FAT NOT ADDED","Turnip, cooked, NS as to form, fat not added in cooking" +75234011,"TURNIP, COOKED, FROM FRESH, FAT NOT ADDED","Turnip, cooked, from fresh, fat not added in cooking" +75234012,"TURNIP, COOKED, FROM FROZ, FAT NOT ADDED","Turnip, cooked, from frozen, fat not added in cooking" +75234013,"TURNIP, COOKED, FROM CANNED, FAT NOT ADDED","Turnip, cooked, from canned, fat not added in cooking" +75234020,"TURNIP, COOKED, NS AS TO FORM, FAT ADDED","Turnip, cooked, NS as to form, fat added in cooking" +75234021,"TURNIP, COOKED, FROM FRESH, FAT ADDED","Turnip, cooked, from fresh, fat added in cooking" +75234022,"TURNIP, COOKED, FROM FROZ, FAT ADDED","Turnip, cooked, from frozen, fat added in cooking" +75234023,"TURNIP, COOKED, FROM CAN, FAT ADDED","Turnip, cooked, from canned, fat added in cooking" +75235000,"WATER CHESTNUT","Water chestnut" +75235750,"WINTER MELON, COOKED (INCL CHINESE MELON, TOGAN)","Winter melon, cooked" +75236000,"YEAST (INCLUDE BREWER'S YEAST)","Yeast" +75236500,"YEAST EXTRACT SPREAD (INCL VEGEMITE, MARMITE)","Yeast extract spread" +75301100,"BEANS, LIMA, & CORN (SUCCOTASH), NS AS TO ADDED FAT","Beans, lima and corn (succotash), cooked, NS as to fat added in cooking" +75301110,"BEANS, LIMA, & CORN (SUCCOTASH), NO FAT ADDED","Beans, lima and corn (succotash), cooked, fat not added in cooking" +75301120,"BEANS, LIMA, & CORN (SUCCOTASH), FAT ADDED","Beans, lima and corn (succotash), cooked, fat added in cooking" +75302010,"BEANS, STRING, GREEN, W/ TOMATOES, FAT NOT ADDED IN COOKING","Beans, string, green, with tomatoes, cooked, fat not added in cooking" +75302020,"BEANS, STRING, GREEN, W/ ONIONS, FAT NOT ADDED IN COOKING","Beans, string, green, with onions, cooked, fat not added in cooking" +75302030,"BEANS, STRING, GREEN, W/ CHICKPEAS, FAT NOT ADDED IN COOKING","Beans, string, green, with chickpeas, cooked, fat not added in cooking" +75302040,"BEANS, STRING, GREEN, W/ ALMONDS, FAT NOT ADDED IN COOKING","Beans, string, green, with almonds, cooked, fat not added in cooking" +75302045,"BEANS, STRING, GREEN, W/ ALMONDS, FAT ADDED IN COOKING","Beans, string, green, with almonds, cooked, fat added in cooking" +75302050,"BEANS, STRING, GREEN, & POTATOES, FAT NOT ADDED","Beans, string, green, and potatoes, cooked, fat not added in cooking" +75302060,"BEANS, STRING, GREEN, W/ PINTO BEANS, FAT NOT ADDED","Beans, string, green, with pinto beans, cooked, fat not added in cooking" +75302070,"BEANS, STRING, GREEN, W/ SPAETZEL, FAT NOT ADDED","Beans, string, green, with spaetzel, cooked, fat not added in cooking" +75302080,"BEAN SALAD, YELLOW &/OR GREEN STRING BEANS","Bean salad, yellow and/or green string beans" +75302200,"BEANS, STRING, GREEN, W/ ONIONS, NS AS TO FAT ADDED","Beans, string, green, with onions, NS as to fat added in cooking" +75302210,"BEANS, STRING, GREEN, W/ ONIONS, FAT ADDED IN COOKING","Beans, string, green, with onions, fat added in cooking" +75302500,"BEANS, STRING, GREEN, & POTATOES, NS AS TO FAT ADDED","Beans, string, green, and potatoes, cooked, NS as to fat added in cooking" +75302510,"BEANS, STRING, GREEN, & POTATOES, FAT ADDED","Beans, string, green, and potatoes, cooked, fat added in cooking" +75303000,"CORN W/ PEPPERS, RED OR GREEN,COOKED, NS FAT ADDED","Corn with peppers, red or green, cooked, NS as to fat added in cooking" +75303010,"CORN W/ PEPPERS, RED OR GREEN,COOKED, NO FAT ADDED","Corn with peppers, red or green, cooked, fat not added in cooking" +75303020,"CORN W/ PEPPERS, RED OR GREEN,COOKED,FAT ADDED","Corn with peppers, red or green, cooked, fat added in cooking" +75306010,"EGGPLANT IN TOM SCE, COOKED,NO FAT ADDED","Eggplant in tomato sauce, cooked, fat not added in cooking" +75307000,"GREEN PEPPERS & ONIONS, COOKED,FAT ADDED IN COOKING","Green peppers and onions, cooked, fat added in cooking" +75311000,"MIXED VEGS (CORN,LIMA,PEAS,GRBNS,CAR), NS FORM & FAT","Mixed vegetables (corn, lima beans, peas, green beans, and carrots), cooked, NS as to form, NS as to fat added in cooking" +75311002,"MIXED VEGETABLES (CORN,LIMA,PEAS,GRBNS,CAR), FROZ, NS FAT","Mixed vegetables (corn, lima beans, peas, green beans, and carrots), cooked, from frozen, NS as to fat added in cooking" +75311003,"MIXED VEGETABLES (CORN,LIMA,PEAS,GRBNS,CAR), CANNED, NS FAT","Mixed vegetables (corn, lima beans, peas, green beans, and carrots), cooked, from canned, NS as to fat added in cooking" +75311010,"MIXED VEGS (CORN,LIMA,PEAS,GRBN,CAR), NS FORM, NO FAT","Mixed vegetables (corn, lima beans, peas, green beans, and carrots), cooked, NS as to form, fat not added in cooking" +75311012,"MIXED VEGETABLES (CORN,LIMA,PEAS,GRBNS,CAR), FROZ, NO FAT","Mixed vegetables (corn, lima beans, peas, green beans, and carrots), cooked, from frozen, fat not added in cooking" +75311013,"MIXED VEGETABLES (CORN,LIMA,PEAS,GRBNS,CAR), CANNED, NO FAT","Mixed vegetables (corn, lima beans, peas, green beans, and carrots), cooked, from canned, fat not added in cooking" +75311020,"MIXED VEGS (CORN,LIMA,PEAS,GRBNS,CAR), NS FORM,W/FAT","Mixed vegetables (corn, lima beans, peas, green beans, and carrots), cooked, NS as to form, fat added in cooking" +75311022,"MIXED VEGETABLES (CORN,LIMA,PEAS,GRBNS,CAR), FROZ, W/ FAT","Mixed vegetables (corn, lima beans, peas, green beans, and carrots), cooked, from frozen, fat added in cooking" +75311023,"MIXED VEGETABLES (CORN,LIMA,PEAS,GRBNS,CAR), CANNED, W/ FAT","Mixed vegetables (corn, lima beans, peas, green beans, and carrots), cooked, from canned, fat added in cooking" +75311100,"MIXED VEGETABLES, CANNED, LOW SODIUM, NS ADDED FAT","Mixed vegetables (corn, lima beans, peas, green beans, and carrots), canned, low sodium, NS as to fat added in cooking" +75311110,"MIXED VEGETABLES, CANNED, LOW SODIUM, NO FAT ADDED","Mixed vegetables (corn, lima beans, peas, green beans, and carrots), canned, low sodium, fat not added in cooking" +75311120,"MIXED VEGETABLES, CANNED, LOW SODIUM, FAT ADDED","Mixed vegetables (corn, lima beans, peas, green beans, and carrots), canned, low sodium, fat added in cooking" +75315000,"PEAS & CORN, COOKED, NS AS TO ADDED FAT","Peas and corn, cooked, NS as to fat added in cooking" +75315010,"PEAS & CORN, COOKED, NO FAT ADDED","Peas and corn, cooked, fat not added in cooking" +75315020,"PEAS & CORN, COOKED, FAT ADDED","Peas and corn, cooked, fat added in cooking" +75315100,"PEAS & ONIONS, COOKED, NS AS TO ADDED FAT","Peas and onions, cooked, NS as to fat added in cooking" +75315110,"PEAS & ONIONS, COOKED, FAT NOT ADDED","Peas and onions, cooked, fat not added in cooking" +75315120,"PEAS & ONIONS, COOKED, FAT ADDED","Peas and onions, cooked, fat added in cooking" +75315200,"PEAS W/ MUSHROOMS, COOKED, NS AS TO FAT","Peas with mushrooms, cooked, NS as to fat added in cooking" +75315210,"PEAS W/ MUSHROOMS, COOKED, NO FAT ADDED","Peas with mushrooms, cooked, fat not added in cooking" +75315215,"PEAS W/ MUSHROOMS, COOKED, FAT ADDED","Peas with mushrooms, cooked, fat added in cooking" +75315250,"COWPEAS W/ SNAP BEANS,COOKED, NO FAT ADDED IN COOK","Cowpeas with snap beans, cooked, fat not added in cooking" +75315300,"PEAS & POTATOES, COOKED, NO FAT ADDED IN COOKING","Peas and potatoes, cooked, fat not added in cooking" +75315305,"PEAS AND POTATOES, COOKED, NS AS TO FAT","Peas and potatoes, cooked, NS as to fat added in cooking" +75315310,"PEAS AND POTATOES, COOKED, FAT ADDED","Peas and potatoes, cooked, fat added in cooking" +75316000,"SQUASH, SUMMER, & ONIONS,COOKED, NO FAT ADDED","Squash, summer, and onions, cooked, fat not added in cooking" +75316010,"ZUCCHINI W/ TOM SCE, CKD,NO FAT ADDED IN COOKING","Zucchini with tomato sauce, cooked, fat not added in cooking" +75316020,"SQUASH, SUMMER, & ONIONS, COOKED, FAT ADDED","Squash, summer, and onions, cooked, fat added in cooking" +75316050,"RATATOUILLE","Ratatouille" +75317000,"VEGETABLES,STEWTYPE(POT,CRT,ONION,CELERY)COOK,NS FA","Vegetables, stew type (including potatoes, carrots, onions, celery) cooked, NS as to fat added in cooking" +75317010,"VEGETABLES,STEWTYPE(POT,CRT,ONION,CELERY)COOK,W/FAT","Vegetables, stew type (including potatoes, carrots, onions, celery) cooked, fat added in cooking" +75317020,"VEGETABLES,STEWTYPE(POT,CRT,ONION,CELERY)COOK,NO FA","Vegetables, stew type (including potatoes, carrots, onions, celery) cooked, fat not added in cooking" +75330100,"VEG COMBINATION (INCL CAR/ DK GRN), NO SAUCE, NS FAT","Vegetable combination (including carrots, broccoli, and/or dark-green leafy), cooked, no sauce, NS as to fat added in cooking" +75330110,"VEG COMBINATION (INCL CAR/ DK GRN), NO SAUCE, FAT NOT ADDED","Vegetable combination (including carrots, broccoli, and/or dark-green leafy), cooked, no sauce, fat not added in cooking" +75330120,"VEG COMBINATION (INCL CAR/ DK GRN), NO SAUCE, FAT ADDED","Vegetable combination (including carrots, broccoli, and/or dark-green leafy), cooked, no sauce, fat added in cooking" +75330130,"VEG COMBINATION (NO CAR/ DK GRN), NO SAUCE, NS FAT","Vegetable combination (excluding carrots, broccoli, and dark-green leafy), cooked, no sauce, NS as to fat added in cooking" +75330140,"VEG COMBINATION (NO CAR/ DK GRN), NO SAUCE, FAT NOT ADDED","Vegetable combination (excluding carrots, broccoli, and dark-green leafy), cooked, no sauce, fat not added in cooking" +75330150,"VEG COMBINATION (NO CAR/ DK GRN), NO SAUCE, FAT ADDED","Vegetable combination (excluding carrots, broccoli, and dark-green leafy), cooked, no sauce, fat added in cooking" +75340000,"VEG ASIAN,ORIENTAL STYLE,CKD,NS FAT ADDED IN COOKING","Vegetable combinations, Asian style, (broccoli, green pepper, water chestnut, etc) cooked, NS as to fat added in cooking" +75340010,"VEG COMBO ASIAN STYLE, CKD, FAT, NOT ADDED","Vegetable combinations, Asian style, (broccoli, green pepper, water chestnuts, etc), cooked, fat not added in cooking" +75340020,"VEG COMBO, ASIAN STYLE, CKD, FAT ADDED","Vegetable combinations, Asian style, (broccoli, green pepper, water chestnuts, etc), cooked, fat added in cooking" +75340160,"VEG & PASTA COMBOS, W/ CREAM/CHEESE SCE, COOKED","Vegetable and pasta combinations with cream or cheese sauce (broccoli, pasta, carrots, corn, zucchini, peppers, cauliflower, peas, etc.), cooked" +75340200,"JAI, MONK'S FOOD (MSHRMS,LILY RTS,B.CURD,W.CHSTNUT)","Jai, Monk's Food (mushrooms, lily roots, bean curd, water chestnuts)" +75340300,"PINACBET (EGGPLANT W/ TOMATO, BITTERMELON, ETC)","Pinacbet (eggplant with tomatoes, bitter melon, etc.)" +75365000,"VEGETABLE MIXTURE, DRIED (INCL SALAD CRUNCHIES)","Vegetable mixture, dried" +75400500,"ARTICHOKES, STUFFED","Artichokes, stuffed" +75401010,"ASPARAGUS, NS FORM, CREAMED OR W/ CHEESE SAUCE","Asparagus, NS as to form, creamed or with cheese sauce" +75401011,"ASPARAGUS, FROM FRESH, CREAMED OR W/ CHEESE SAUCE","Asparagus, from fresh, creamed or with cheese sauce" +75401012,"ASPARAGUS, FROM FROZEN, CREAMED OR W/ CHEESE SAUCE","Asparagus, from frozen, creamed or with cheese sauce" +75401013,"ASPARAGUS, FROM CANNED, CREAMED OR W/ CHEESE SAUCE","Asparagus, from canned, creamed or with cheese sauce" +75402010,"BEANS, LIMA, IMMATURE, NS FORM, CREAMED/ CHEESE SCE","Beans, lima, immature, NS as to form, creamed or with cheese sauce" +75402011,"BEANS, LIMA, IMMATURE, FROM FRESH, CREAMED/CHEESE SCE","Beans, lima, immature, from fresh, creamed or with cheese sauce" +75402012,"BEANS, LIMA, IMMATURE, FROM FROZEN, CREAMED/CHEESE SCE","Beans, lima, immature, from frozen, creamed or with cheese sauce" +75402013,"BEANS, LIMA, IMMATURE, FROM CANNED, CREAMED/CHEESE SCE","Beans, lima, immature, from canned, creamed or with cheese sauce" +75402020,"BEANS, LIMA, IMMATURE, CKD, NS FORM, W/ MUSHROOM SCE","Beans, lima, immature, cooked, NS as to form, with mushroom sauce" +75402021,"BEANS, LIMA, IMMATURE, CKD, FROM FRESH, W/ MUSHROOM SCE","Beans, lima, immature, cooked, from fresh, with mushroom sauce" +75402022,"BEANS, LIMA, IMMATURE, CKD, FROM FROZ, W/ MUSHROOM SCE","Beans, lima, immature, cooked, from frozen, with mushroom sauce" +75402023,"BEANS, LIMA, IMMATURE, CKD, FROM CAN, W/ MUSHROOM SCE","Beans, lima, immature, cooked, from canned, with mushroom sauce" +75403010,"BEANS, STRING, GREEN, NS FORM, CREAMED/CHEESE SCE","Beans, string, green, NS as to form, creamed or with cheese sauce" +75403011,"BEANS, STRING, GREEN, FROM FRESH, CREAMED/CHEESE SCE","Beans, string, green, from fresh, creamed or with cheese sauce" +75403012,"BEANS, STRING, GREEN, FROM FROZEN, CREAMED/CHEESE SCE","Beans, string, green, from frozen, creamed or with cheese sauce" +75403013,"BEANS, STRING, GREEN, FROM CANNED, CREAMED/CHEESE SCE","Beans, string, green, from canned, creamed or with cheese sauce" +75403020,"BEANS, STRING, GREEN, CKD, NS FORM, W/ MUSHROOM SCE","Beans, string, green, cooked, NS as to form, with mushroom sauce" +75403021,"BEANS, STRING, GREEN, CKD, FROM FRESH, W/ MUSHROOM SCE","Beans, string, green, cooked, from fresh, with mushroom sauce" +75403022,"BEANS, STRING, GREEN, CKD, FROM FROZ, W/ MUSHROOM SCE","Beans, string, green, cooked, from frozen, with mushroom sauce" +75403023,"BEANS, STRING, GREEN, CKD, FROM CAN, W/ MUSHROOM SCE","Beans, string, green, cooked, from canned, with mushroom sauce" +75403200,"BEANS, STRING, GREEN, SZECHUAN-STYLE, FAT ADDED","Beans, string, green, cooked, Szechuan-style, fat added in cooking" +75404010,"BEANS, STRING, YELLOW, NS FORM, CREAMED/ CHEESE SCE","Beans, string, yellow, NS as to form, creamed or with cheese sauce" +75404011,"BEANS, STRING, YELLOW, FROM FRESH, CREAMED/ CHEESE SCE","Beans, string, yellow, from fresh, creamed or with cheese sauce" +75404012,"BEANS, STRING, YELLOW, FROM FROZ, CREAMED/ CHEESE SCE","Beans, string, yellow, from frozen, creamed or with cheese sauce" +75404013,"BEANS, STRING, YELLOW, FROM CANNED, CREAMED/ CHEESE SCE","Beans, string, yellow, from canned, creamed or with cheese sauce" +75405010,"BEETS WITH HARVARD SAUCE","Beets with Harvard sauce" +75406010,"BRUSSEL SPROUTS, NS AS TO FORM, CREAMED","Brussels sprouts, NS as to form, creamed" +75406011,"BRUSSEL SPROUTS, FROM FRESH, CREAMED","Brussels sprouts, from fresh, creamed" +75406012,"BRUSSEL SPROUTS, FROM FROZ, CREAMED","Brussels sprouts, from frozen, creamed" +75407010,"CABBAGE, CREAMED","Cabbage, creamed" +75409010,"CAULIFLOWER, NS FORM, CREAMED(INCL W/ CHEESE SAUCE)","Cauliflower, NS as to form, creamed" +75409011,"CAULIFLOWER, FROM FRESH, CREAMED(INCL W/ CHEESE SAUCE)","Cauliflower, from fresh, creamed" +75409012,"CAULIFLOWER, FROM FROZ, CREAMED(INCL W/ CHEESE SAUCE)","Cauliflower, from frozen, creamed" +75409013,"CAULIFLOWER, FROM CANNED, CREAMED(INCL W/ CHEESE SAUCE)","Cauliflower, from canned, creamed" +75409020,"CAULIFLOWER, BATTER-DIPPED, FRIED","Cauliflower, batter-dipped, fried" +75410010,"CELERY, CREAMED","Celery, creamed" +75410500,"CHILES RELLENOS, CHEESE-FILLED","Chiles rellenos, cheese-filled (stuffed chili peppers)" +75410530,"CHILES RELLENOS, FILLED W/ MEAT & CHEESE","Chiles rellenos, filled with meat and cheese (stuffed chili peppers)" +75410550,"JALAPENO PEPPER, STUFFED W/ CHEESE, BATTERED, FRIED","Jalapeno pepper, stuffed with cheese, breaded or battered, fried" +75411010,"CORN, SCALLOPED OR PUDDING (INCLUDE CORN SOUFFLE)","Corn, scalloped or pudding" +75411020,"CORN FRITTER","Corn fritter" +75411030,"CORN, COOKED, NS FORM, W/ CREAM SAUCE, MADE W/ MILK","Corn, cooked, NS as to form, with cream sauce, made with milk" +75411031,"CORN, COOKED, FROM FRESH, W/ CREAM SAUCE, MADE W/ MILK","Corn, cooked, from fresh, with cream sauce, made with milk" +75411032,"CORN, COOKED, FROM FROZ, W/ CREAM SAUCE, MADE W/ MILK","Corn, cooked, from frozen, with cream sauce, made with milk" +75411033,"CORN, COOKED, FROM CAN, W/ CREAM SAUCE, MADE W/ MILK","Corn, cooked, from canned, with cream sauce, made with milk" +75412010,"EGGPLANT, BATTER-DIPPED, FRIED","Eggplant, batter-dipped, fried" +75412030,"EGGPLANT DIP (INCL BABA GHANOUSH)","Eggplant dip" +75412060,"EGGPLANT PARMESAN CASSEROLE, REGULAR","Eggplant parmesan casserole, regular" +75412070,"EGGPLANT W/ CHEESE & TOMATO SAUCE","Eggplant with cheese and tomato sauce" +75413010,"KOHLRABI, CREAMED","Kohlrabi, creamed" +75414010,"MUSHROOMS, NS AS TO FORM, CREAMED","Mushrooms, NS as to form, creamed" +75414011,"MUSHROOMS, FROM FRESH, CREAMED","Mushrooms, from fresh, creamed" +75414012,"MUSHROOMS, FROM FROZEN, CREAMED","Mushrooms, from frozen, creamed" +75414013,"MUSHROOMS, FROM CANNED, CREAMED","Mushrooms, from canned, creamed" +75414020,"MUSHROOMS, STUFFED","Mushrooms, stuffed" +75414030,"MUSHROOM, BATTER-DIPPED, FRIED","Mushrooms, batter-dipped, fried" +75414500,"OKRA, BATTER-DIPPED, FRIED","Okra, batter-dipped, fried" +75415010,"ONIONS, NS AS TO FORM, CREAMED","Onions, NS as to form, creamed" +75415011,"ONIONS, FROM FRESH, CREAMED","Onions, from fresh, creamed" +75415020,"ONION RINGS, NS FORM, BATTER-DIPPED, BAKED/FRIED","Onion rings, NS as to form, batter-dipped, baked or fried" +75415021,"ONION RINGS, FROM FRESH, BATTERED, BAKED/FRIED","Onion rings, from fresh, batter-dipped, baked or fried" +75415022,"ONION RINGS, FROM FROZ, BATTERED, BAKED/FRIED","Onion rings, from frozen, batter-dipped, baked or fried" +75416010,"PARSNIPS, CREAMED","Parsnips, creamed" +75416500,"PEA SALAD","Pea salad" +75416600,"PEA SALAD W/ CHEESE","Pea salad with cheese" +75417010,"PEAS, NS AS TO FORM, CREAMED","Peas, NS as to form, creamed" +75417011,"PEAS, FROM FRESH, CREAMED","Peas, from fresh, creamed" +75417012,"PEAS, FROM FROZEN, CREAMED","Peas, from frozen, creamed" +75417013,"PEAS, FROM CANNED, CREAMED","Peas, from canned, creamed" +75417020,"PEAS, COOKED, NS AS TO FORM, W/ MUSHROOM SAUCE","Peas, cooked, NS as to form, with mushroom sauce" +75417021,"PEAS, COOKED, FROM FRESH, W/ MUSHROOM SAUCE","Peas, cooked, from fresh, with mushroom sauce" +75417022,"PEAS, COOKED, FROM FROZEN, W/ MUSHROOM SAUCE","Peas, cooked, from frozen, with mushroom sauce" +75417023,"PEAS, COOKED, FROM CANNED, W/ MUSHROOM SAUCE","Peas, cooked, from canned, with mushroom sauce" +75417030,"PEAS, COOKED, NS AS TO FORM, W/ TOMATO SAUCE","Peas, cooked, NS as to form, with tomato sauce" +75417031,"PEAS, COOKED, FROM FRESH, W/ TOMATO SAUCE","Peas, cooked, from fresh, with tomato sauce" +75417032,"PEAS, COOKED, FROM FROZEN, W/ TOMATO SAUCE","Peas, cooked, from frozen, with tomato sauce" +75417033,"PEAS, COOKED, FROM CANNED, W/ TOMATO SAUCE","Peas, cooked, from canned, with tomato sauce" +75418000,"SQUASH, SUMMER, BREADED, BAKED","Squash,summer, yellow or green, breaded or battered, baked" +75418010,"SQUASH, SUMMER, BREADED OR BATTERED, FRIED","Squash, summer, yellow or green, breaded or battered, fried" +75418020,"SQUASH, SUMMER, CASSEROLE, W/ TOMATO & CHEESE","Squash, summer, casserole with tomato and cheese" +75418030,"SQUASH, SUMMER, CASSEROLE, W/ RICE & TOMATO SAUCE","Squash, summer, casserole, with rice and tomato sauce" +75418040,"SQUASH, SUMMER, CASSEROLE, W/ CHEESE SAUCE","Squash, summer, casserole, with cheese sauce" +75418050,"SQUASH, SUMMER, NS AS TO FORM, CREAMED","Squash, summer, NS as to form, creamed" +75418051,"SQUASH, SUMMER, FROM FRESH, CREAMED","Squash, summer, from fresh, creamed" +75418052,"SQUASH, SUMMER, FROM FROZEN, CREAMED","Squash, summer, from frozen, creamed" +75418053,"SQUASH, SUMMER, FROM CANNED, CREAMED","Squash, summer, from canned, creamed" +75418060,"SQUASH, SUMMER, SOUFFLE","Squash, summer, souffle" +75418100,"TURNIPS, NS AS TO FORM, CREAMED","Turnips, NS as to form, creamed" +75418101,"TURNIPS, FROM FRESH, CREAMED","Turnips, from fresh, creamed" +75418102,"TURNIPS, FROM FROZEN, CREAMED","Turnips, from frozen, creamed" +75418103,"TURNIPS, FROM CANNED, CREAMED","Turnips, from canned, creamed" +75418220,"CREAMED CHRISTOPHINE, P.R. (CHAYOTE A LA CREMA)","Creamed christophine, Puerto Rican style (Chayote a la crema)" +75439010,"VEGETABLE STEW, W/O MEAT","Vegetable stew without meat" +75439500,"CHOW MEIN OR CHOP SUEY, MEATLESS, NO NOODLES","Chow mein or chop suey, meatless, no noodles" +75440100,"VEG COMBINATION (INCL CAR/ DK GRN), W/ SOY-BASE SCE","Vegetable combination (including carrots, broccoli, and/or dark-green leafy), cooked, with soy-based sauce" +75440110,"VEG COMBINATION (NO CAR/ DK GRN), W/ SOY-BASE SAUCE","Vegetable combination (excluding carrots, broccoli, and dark-green leafy), cooked, with soy-based sauce" +75440170,"VEGETABLE STICKS, BREADED(INCL CORN,CARROT,GR BEAN)","Vegetable sticks, breaded (including corn, carrots, and green beans)" +75440200,"VEGETABLE TEMPURA","Vegetable tempura" +75440300,"VEG COMBINATIONS (INCL CAR/DK GRN), W/ TOMATO SAUCE","Vegetable combinations (including carrots, broccoli, and/or dark-green leafy), cooked, with tomato sauce" +75440310,"VEG COMBINATIONS (NO CAR/DK GRN), W/ TOMATO SAUCE","Vegetable combinations (excluding carrots, broccoli, and dark-green leafy), cooked, with tomato sauce" +75440400,"VEGETABLE,IN CHICK-PEA FLOUR BATTER,(PAKORA),FRIED","Vegetables, dipped in chick-pea flour batter, (pakora), fried" +75440500,"VEG COMBINATION (INCL CAR/DK GRN), W/ CHEESE SAUCE","Vegetable combinations (including carrots, broccoli, and/or dark-green leafy), cooked, with cheese sauce" +75440510,"VEG COMBINATION (NO CAR/ DK GRN), W/ CHEESE SAUCE","Vegetable combinations (excluding carrots, broccoli, and dark-green leafy), cooked, with cheese sauce" +75440600,"VEGETABLE CURRY","Vegetable curry" +75450500,"VEG COMBINATION (INCL CAR, DK GRN), W/ CREAM SAUCE","Vegetable combination (including carrots, broccoli, and/or dark-green leafy), cooked, with cream sauce" +75450510,"VEG COMBINATION (NO CAR, DK GRN), W/ CREAM SAUCE","Vegetable combination (excluding carrots, broccoli, and dark-green leafy), cooked, with cream sauce" +75450600,"VEG COMBINATION(INCL CAR,BROC,DK GRN)W/BUTTER SAUCE","Vegetable combination (including carrots, broccoli, and/or dark-green leafy), cooked, with butter sauce" +75460700,"VEGETABLE COMBINATION (INCL CAR/DK GRN), W/ PASTA","Vegetable combinations (including carrots, broccoli, and/or dark-green leafy), cooked, with pasta" +75460710,"VEGETABLE COMBINATION (NO CAR/DK GRN), W/ PASTA","Vegetable combinations (excluding carrots, broccoli, and dark-green leafy), cooked, with pasta" +75460800,"VEGETABLE COMB(INCL CAR/DK GRN),CKD,W/ BUTTER SAUCE","Vegetable combinations (including carrots, broccoli, and/or dark-green leafy), cooked, with butter sauce and pasta" +75460810,"VEGETABLE COMB (NO CAR/DK GRN),CKD, W/ BUTTER SAUCE","Vegetable combinations (excluding carrots, broccoli, and dark-green leafy), cooked, with butter sauce and pasta" +75460900,"CHOW MEIN OR CHOP SUEY, MEATLESS, WITH NOODLES","Chow mein or chop suey, meatless, with noodles" +75500110,"BEANS, STRING, GREEN, PICKLED","Beans, string, green, pickled" +75500210,"BEETS, PICKLED (INCLUDE W/ ONIONS, BEET SALAD)","Beets, pickled" +75500510,"CELERY, PICKLED","Celery, pickled" +75501010,"CORN RELISH","Corn relish" +75502010,"CAULIFLOWER, PICKLED","Cauliflower, pickled" +75502500,"CABBAGE, FRESH, PICKLED, JAPANESE","Cabbage, fresh, pickled, Japanese style" +75502510,"CABBAGE, RED, PICKLED (INCL SWEET & SOUR CABBAGE)","Cabbage, red, pickled" +75502520,"CABBAGE, KIMCHI (KIM CHEE) STYLE","Cabbage, Kimchi (Kim Chee) style" +75502550,"CABBAGE, MUSTARD, SALTED","Cabbage, mustard, salted" +75503010,"CUCUMBER PICKLES, DILL","Cucumber pickles, dill" +75503020,"CUCUMBER PICKLES, RELISH","Cucumber pickles, relish" +75503030,"CUCUMBER PICKLES, SOUR","Cucumber pickles, sour" +75503040,"CUCUMBER PICKLES, SWEET","Cucumber pickles, sweet" +75503080,"EGGPLANT, PICKLED","Eggplant, pickled" +75503090,"HORSERADISH","Horseradish" +75503100,"MUSTARD PICKLES (INCL CHOW-CHOW, HOT DOG RELISH)","Mustard pickles" +75503110,"CUCUMBER PICKLE, DILL, REDUCED SALT","Cucumber pickles, dill, reduced salt" +75503140,"CUCUMBER PICKLE, SWEET, REDUCED SALT","Cucumber pickles, sweet, reduced salt" +75505000,"MUSHROOMS, PICKLED","Mushrooms, pickled" +75506010,"MUSTARD (INCL HORSERADISH MUSTARD, CHINESE MUSTARD)","Mustard" +75506100,"MUSTARD SAUCE","Mustard sauce" +75507000,"OKRA, PICKLED","Okra, pickled" +75510000,"OLIVES, NFS","Olives, NFS" +75510010,"OLIVES, GREEN","Olives, green" +75510020,"OLIVES, BLACK","Olives, black" +75510030,"OLIVES, GREEN, STUFFED","Olives, green, stuffed" +75511010,"HOT PEPPER SAUCE","Hot pepper sauce" +75511020,"PEPPERS, PICKLED","Peppers, pickled" +75511040,"PEPPER, HOT, PICKLED","Pepper, hot, pickled" +75511100,"PICKLES, NS AS TO VEGETABLE","Pickles, NS as to vegetable" +75511200,"PICKLES, MIXED","Pickles, mixed" +75512010,"RADISHES, PICKLED, HAWAIIAN","Radishes, pickled, Hawaiian style" +75512510,"RECAITO (P.R. LITTLE CORIANDER)","Recaito (Puerto Rican little coriander)" +75513010,"SEAWEED, PICKLED","Seaweed, pickled" +75515000,"VEGETABLES, PICKLED, HAWAIIAN","Vegetables, pickled, Hawaiian style" +75515010,"VEGETABLE RELISH","Vegetable relish" +75515100,"VEGETABLES, PICKLED (INCLUDE GIARDINIERA)","Vegetables, pickled" +75534030,"TURNIP, PICKLED","Turnip, pickled" +75534500,"TSUKEMONO, JAPANESE PICKLES","Tsukemono, Japanese pickles" +75535000,"ZUCCHINI, PICKLED","Zucchini, pickled" +75600150,"SOUP, CREAM OF, NFS","Soup, cream of, NFS" +75601000,"ASPARAGUS SOUP, CREAM OF, NS AS TO W/ MILK OR WATER","Asparagus soup, cream of, NS as to made with milk or water" +75601010,"ASPARAGUS SOUP, CREAM OF,W/ MILK","Asparagus soup, cream of, prepared with milk" +75601020,"ASPARAGUS SOUP, CREAM OF, PREPARED W/ WATER","Asparagus soup, cream of, prepared with water" +75601100,"BEET SOUP (BORSCHT)","Beet soup (borscht)" +75601200,"CABBAGE SOUP, HOME RECIPE, CANNED OR READY-TO-SERVE","Cabbage soup, home recipe, canned or ready-to-serve" +75601210,"CABBAGE WITH MEAT SOUP, HOME RECIPE, CANNED OR READY-TO-SERV","Cabbage with meat soup, home recipe, canned or ready-to-serve" +75603010,"CELERY SOUP, CREAM OF, PREPARED WITH MILK, HOME RECIPE, CANN","Celery soup, cream of, prepared with milk, home recipe, canned or ready-to-serve" +75603020,"CELERY SOUP, CREAM OF, PREPARED WITH WATER, HOME RECIPE, CAN","Celery soup, cream of, prepared with water, home recipe, canned or ready-to-serve" +75604010,"CORN SOUP, CREAM OF, W/ MILK","Corn soup, cream of, prepared with milk" +75604020,"CORN SOUP, CREAM OF, PREPARED W/ WATER","Corn soup, cream of, prepared with water" +75604600,"GAZPACHO","Gazpacho" +75605010,"LEEK SOUP, CREAM OF, PREP W/ MILK","Leek soup, cream of, prepared with milk" +75607000,"MUSHROOM SOUP, NFS","Mushroom soup, NFS" +75607010,"MUSHROOM SOUP, CREAM OF, PREP W/ MILK","Mushroom soup, cream of, prepared with milk" +75607020,"MUSHROOM SOUP, CREAM OF, PREPARED W/ WATER","Mushroom soup, cream of, prepared with water" +75607040,"MUSHROOM SOUP, W/ MEAT BROTH, PREPARED W/ WATER","Mushroom soup, with meat broth, prepared with water" +75607050,"MUSHROOM SOUP, CM OF, LOW SOD, PREP W/ WATER","Mushroom soup, cream of, low sodium, prepared with water" +75607060,"MUSHROOM SOUP, CREAM OF, NS AS TO W/ MILK OR WATER","Mushroom soup, cream of, NS as to made with milk or water" +75607080,"MUSHROOM W/ CHICKEN SOUP, CREAM OF, PREP W/ MILK","Mushroom with chicken soup, cream of, prepared with milk" +75607090,"MUSHROOM SOUP, CREAM OF, CAN, RED. SOD., NS W/ MILK/WATER","Mushroom soup, cream of, canned, reduced sodium, NS as to made with milk or water" +75607100,"MUSHROOM SOUP, CREAM OF, CAN, RED. SODIUM, PREP W/ MILK","Mushroom soup, cream of, canned, reduced sodium, prepared with milk" +75607130,"MUSHROOM SOUP, MADE FROM DRY MIX","Mushroom soup, made from dry mix" +75607140,"MUSHROOM SOUP, CM OF, CAN, RED SOD, PREP W/ WATER","Mushroom soup, cream of, canned, reduced sodium, prepared with water" +75608010,"ONION SOUP, CREAM OF, PREP W/ MILK","Onion soup, cream of, prepared with milk" +75608100,"ONION SOUP, FRENCH","Onion soup, French" +75608200,"ONION SOUP, MADE FROM DRY MIX","Onion soup, made from dry mix" +75609010,"PEA SOUP, PREPARED WITH MILK","Pea soup, prepared with milk" +75611010,"VEGETABLE SOUP, CREAM OF, PREP W/ MILK","Vegetable soup, cream of, prepared with milk" +75612010,"ZUCCHINI SOUP, CREAM OF, PREP W/ MILK","Zucchini soup, cream of, prepared with milk" +75646010,"SHAV SOUP","Shav soup" +75647000,"SEAWEED SOUP","Seaweed soup" +75649010,"VEGETABLE SOUP, CANNED, PREPARED WITH WATER OR READY-TO-SERV","Vegetable soup, canned, prepared with water or ready-to-serve" +75649050,"VEGETABLE SOUP, MADE FROM DRY MIX","Vegetable soup, made from dry mix" +75649070,"VEGETABLE SOUP, FROM DRY MIX, LOW SODIUM","Vegetable soup, made from dry mix, low sodium" +75649110,"VEGETABLE SOUP, HOME RECIPE","Vegetable soup, home recipe" +75649150,"VEGETABLE NOODLE SOUP, HOME RECIPE","Vegetable noodle soup, home recipe" +75650990,"MINESTRONE SOUP, REDUCED SODIUM, CANNED OR READY-TO-SERVE","Minestrone soup, reduced sodium, canned or ready-to-serve" +75651000,"MINESTRONE SOUP, HOME RECIPE","Minestrone soup, home recipe" +75651010,"MINESTRONE SOUP, CANNED, PREPARED WITH WATER, OR READY-TO-SE","Minestrone soup, canned, prepared with water, or ready-to-serve" +75651020,"VEGETABLE BEEF SOUP, CANNED, PREPARED WITH WATER, OR READY-T","Vegetable beef soup, canned, prepared with water, or ready-to-serve" +75651030,"VEGETABLE BEEF NOODLE SOUP, PREPARED W/ WATER","Vegetable beef noodle soup, prepared with water" +75651040,"VEGETABLE NOODLE SOUP, CANNED, PREPARED WITH WATER, OR READY","Vegetable noodle soup, canned, prepared with water, or ready-to-serve" +75651070,"VEGETABLE RICE SOUP, CANNED, PREPARED WITH WATER OR READY-TO","Vegetable rice soup, canned, prepared with water or ready-to-serve" +75651080,"VEGETABLE BEEF SOUP WITH RICE, CANNED, PREPARED WITH WATER O","Vegetable beef soup with rice, canned, prepared with water or ready-to-serve" +75651110,"VEGETABLE CHICKEN RICE SOUP, CANNED, PREP W/WATER OR RTF","Vegetable chicken rice soup, canned, prepared with water or ready-to-serve" +75651140,"VEGETABLE SOUP WITH CHICKEN BROTH, MEXICAN STYLE, HOME RECIP","Vegetable soup with chicken broth, Mexican style, home recipe (Sopa Ranchera)" +75651150,"VEGETABLE NOODLE SOUP, RED SODIUM, CAN, PREP W/ WATER/RTS","Vegetable noodle soup, reduced sodium, canned, prepared with water or ready-to-serve" +75652010,"VEGETABLE BEEF SOUP, HOME RECIPE","Vegetable beef soup, home recipe" +75652030,"VEGETABLE BEEF SOUP, CANNED, PREPARED WITH MILK","Vegetable beef soup, canned, prepared with milk" +75652040,"VEG BEEF SOUP W/ NOODLES, HOME RECIPE","Vegetable beef soup with noodles or pasta, home recipe" +75652050,"VEG BEEF SOUP W/ RICE, HOME RECIPE","Vegetable beef soup with rice, home recipe" +75654010,"VEGETARIAN VEGETABLE SOUP, PREPARED W/ WATER","Vegetarian vegetable soup, prepared with water" +75656010,"VEGETABLE SOUP, SPANISH, STEW TYPE","Vegetable soup, Spanish style, stew type" +75656020,"VEGETABLE SOUP, CHUNKY STYLE","Vegetable soup, chunky style" +75656040,"VEGETABLE SOUP W/ PASTA, CHUNKY STYLE","Vegetable soup, with pasta, chunky style" +75656060,"VEG BEEF SOUP, CHUNKY STYLE (INCL VEG W/ MEAT SOUPS","Vegetable beef soup, chunky style" +75657000,"VEGETABLE BROTH, BOUILLON (INCL POT LIQUOR)","Vegetable broth, bouillon" +76102010,"SPINACH, CREAMED, BABY, STRAINED","Spinach, creamed, baby food, strained" +76102030,"BROCCOLI, CARROTS & CHEESE, BABY, JUNIOR","Broccoli, carrots and cheese, baby food, junior" +76201000,"CARROTS, BABY, NS AS TO STRAINED OR JUNIOR","Carrots, baby food, NS as to strained or junior" +76201010,"CARROTS, BABY, STRAINED","Carrots, baby food, strained" +76201020,"CARROTS, BABY, JUNIOR","Carrots, baby food, junior" +76201030,"CARROTS, BABY FOOD, TODDLER","Carrots, baby food, toddler" +76202000,"CARROTS & PEAS, BABY, STRAINED","Carrots and peas, baby food, strained" +76205000,"SQUASH, BABY, NS AS TO STRAINED OR JUNIOR","Squash, baby food, NS as to strained or junior" +76205010,"SQUASH, BABY, STRAINED","Squash, baby food, strained" +76205020,"SQUASH, BABY, JUNIOR","Squash, baby food, junior" +76205030,"SQUASH & CORN, BABY, STRAINED","Squash and corn, baby food, strained" +76205060,"CORN AND SWEET POTATOES, BABY FOOD, STRAINED","Corn and sweet potatoes, baby food, strained" +76209000,"SWEET POTATOES, BABY, NS AS TO STRAINED OR JUNIOR","Sweet potatoes, baby food, NS as to strained or junior" +76209010,"SWEET POTATOES, BABY, STRAINED","Sweet potatoes, baby food, strained" +76209020,"SWEET POTATOES, BABY, JUNIOR","Sweet potatoes, baby food, junior" +76401000,"BEANS, GREEN STRING, BABY, NS AS TO STR OR JR","Beans, green string, baby food, NS as to strained or junior" +76401010,"BEANS, GREEN STRING, BABY, STRAINED","Beans, green string, baby food, strained" +76401020,"BEANS, GREEN STRING, BABY, JUNIOR","Beans, green string, baby food, junior" +76401060,"BEANS, GREEN STRING, BABY, TODDLER","Beans, green string, baby food, toddler" +76402000,"GREEN BEANS & POTATOES, BABY, STRAINED","Green beans and potatoes, baby food, strained" +76403010,"BEETS, BABY, STRAINED","Beets, baby food, strained" +76405000,"CORN, CREAMED, BABY, NS AS TO STRAINED OR JUNIOR","Corn, creamed, baby food, NS as to strained or junior" +76405010,"CORN, CREAMED, BABY, STRAINED","Corn, creamed, baby food, strained" +76405020,"CORN, CREAMED, BABY, JUNIOR","Corn, creamed, baby food, junior" +76407000,"MIXED VEG, GARDEN VEG, BABY, NS AS TO STR OR JR","Mixed vegetables, garden vegetables, baby food, NS as to strained or junior" +76407010,"MIXED VEGETABLES, GARDEN VEGETABLES, BABY, STRAINED","Mixed vegetables, garden vegetables, baby food, strained" +76407020,"MIXED VEGETABLES, GARDEN VEGETABLES, BABY, JUNIOR","Mixed vegetables, garden vegetables, baby food, junior" +76409000,"PEAS, BABY, NS AS TO STRAINED OR JUNIOR","Peas, baby food, NS as to strained or junior" +76409010,"PEAS, BABY, STRAINED","Peas, baby food, strained" +76409020,"PEAS, BABY, JUNIOR","Peas, baby food, junior" +76409030,"PEAS, BABY, TODDLER","Peas, baby food, toddler" +76420000,"POTATOES, BABY, TODDLER","Potatoes, baby food, toddler" +76501000,"VEGETABLES & RICE, BABY, STRAINED","Vegetables and rice, baby food, strained" +76502000,"PEAS & BROWN RICE, BABY","Peas and brown rice, baby food" +76601010,"VEGETABLE & BACON, BABY, STRAINED","Vegetable and bacon, baby food, strained" +76602000,"CARROTS & BEEF, BABY, STRAINED","Carrots and beef, baby food, strained" +76603000,"VEGETABLE & BEEF, BABY, NS AS TO STRAINED OR JUNIOR","Vegetable and beef, baby food, NS as to strained or junior" +76603010,"VEGETABLE & BEEF, BABY, STRAINED","Vegetable and beef, baby food, strained" +76603020,"VEGETABLE & BEEF, BABY, JUNIOR","Vegetable and beef, baby food, junior" +76604000,"BROCCOLI & CHICKEN, BABY, STRAINED","Broccoli and chicken, baby food, strained" +76604500,"SWEET POTATOES & CHICKEN, BABY, STRAINED","Sweet potatoes and chicken, baby food, strained" +76605000,"VEGETABLE & CHICKEN, BABY, NS AS TO STR OR JR","Vegetable and chicken, baby food, NS as to strained or junior" +76605010,"VEGETABLE & CHICKEN, BABY, STRAINED","Vegetable and chicken, baby food, strained" +76605020,"VEGETABLE & CHICKEN, BABY, JUNIOR","Vegetable and chicken, baby food, junior" +76607000,"VEGETABLE & HAM, BABY, NS AS TO STRAINED OR JUNIOR","Vegetable and ham, baby food, NS as to strained or junior" +76607010,"VEGETABLE & HAM, BABY, STRAINED","Vegetable and ham, baby food, strained" +76607020,"VEGETABLE & HAM, BABY, JUNIOR","Vegetable and ham, baby food, junior" +76607030,"POTATOES W/ CHEESE & HAM, BABY FOOD, TODDLER","Potatoes with cheese and ham, baby food, toddler" +76607100,"POTATOES WITH CHEESE AND BROCCOLI, BABY FOOD, TODDLER","Potatoes with cheese and broccoli, baby food, toddler" +76609010,"VEGETABLE & LAMB, BABY, STRAINED","Vegetable and lamb, baby food, strained" +76611000,"VEGETABLE & TURKEY, BABY, NS AS TO STR OR JR","Vegetable and turkey, baby food, NS as to strained or junior" +76611010,"VEGETABLE & TURKEY, BABY, STRAINED","Vegetable and turkey, baby food, strained" +76611020,"VEGETABLE & TURKEY, BABY, JUNIOR","Vegetable and turkey, baby food, junior" +77121010,"FRIED STUFFED POTATOES, P.R. (RELLENOS DE PAPAS)","Fried stuffed potatoes, Puerto Rican style (Rellenos de papas)" +77121110,"POTATO&HAM FRITTERS,P.R.(FRITURAS DE PAPA Y JAMON)","Potato and ham fritters, Puerto Rican style (Frituras de papa y jamon)" +77141010,"POTATO CHICKEN PIE, P.R. (PASTELON DE POLLO)","Potato chicken pie, Puerto Rican style (Pastelon de pollo)" +77201210,"GREEN PLANTAIN W/ CRACKLINGS, P.R. (MOFONGO)","Green plantain with cracklings, Puerto Rican style (Mofongo)" +77205110,"RIPE PLANTAIN FRITTERS, P.R. (PIONONO)","Ripe plantain fritters, Puerto Rican style (Pionono)" +77205610,"RIPE PLANTAIN MEAT PIE, P.R. (PINON)","Ripe plantain meat pie, Puerto Rican style (Pinon)" +77230210,"CASSAVA PASTELES, P.R. (PASTELES DE YUCA)","Cassava Pasteles, Puerto Rican style (Pasteles de yuca)" +77230510,"CASSAVA FRITTER STUFFED W/ CRAB, P.R. (EMPANADA DE YUCA)","Cassava fritter stuffed with crab meat, Puerto Rican style (Empanada de yuca y jueyes)" +77250110,"STUFFED TANNIER FRITTERS, P.R. (ALCAPURRIAS)","Stuffed tannier fritters, Puerto Rican style (Alcapurrias)" +77250710,"TANNIER FRITTERS, P.R. (FRITURAS DE YAUTIA)","Tannier fritters, Puerto Rican style (Frituras de yautia)" +77272010,"PUERTO RICAN PASTELES (PASTELES DE MASA)","Puerto Rican pasteles (Pasteles de masa)" +77316010,"STUFFED CABBAGE, W/ MEAT, P.R.(REPOLLO RELLENO CON CARNE)","Stuffed cabbage, with meat, Puerto Rican style (Repollo relleno con carne)" +77316510,"STUFFED CABBAGE, W MEAT& RICE, SYRIAN DISH, P.R. STYLE","Stuffed cabbage, with meat and rice, Syrian dish, Puerto Rican style (Repollo relleno con carne y con arroz; Arabe Mihsy Melful)" +77316600,"EGGPLANT AND MEAT CASSEROLE","Eggplant and meat casserole" +77513010,"SPANISH STEW, P.R. (COCIDO ESPANOL)","Spanish stew, Puerto Rican style (Cocido Espanol)" +77563010,"PUERTO RICAN STEW (SALCOCHO / SANCOCHO)","Puerto Rican stew (Salcocho / Sancocho)" +78101000,"VEGETABLE & FRUIT JUICE BLEND,100% JUICE,W/ HIGH VIT C,+ E,A","Vegetable and fruit juice blend, 100% juice, with high vitamin C plus added vitamin E and vitamin A" +81100000,"TABLE FAT, NFS","Table fat, NFS" +81100500,"BUTTER, NFS","Butter, NFS" +81101000,"BUTTER, STICK, SALTED","Butter, stick, salted" +81101010,"BUTTER, WHIPPED, TUB, SALTED","Butter, whipped, tub, salted" +81101020,"BUTTER, WHIPPED, STICK, SALTED","Butter, whipped, stick, salted" +81101100,"BUTTER, STICK, UNSALTED","Butter, stick, unsalted" +81101110,"BUTTER, WHIPPED, TUB, UNSALTED","Butter, whipped, tub, unsalted" +81101120,"BUTTER, WHIPPED, STICK, UNSALTED","Butter, whipped, stick, unsalted" +81101500,"LIGHT BUTTER, STICK, SALTED","Light butter, stick, salted" +81101510,"LIGHT BUTTER, STICK, UNSALTED","Light butter, stick, unsalted" +81101520,"LIGHT BUTTER, WHIPPED, TUB, SALTED","Light butter, whipped, tub, salted" +81102000,"MARGARINE, NFS","Margarine, NFS" +81102010,"MARGARINE, STICK, SALTED","Margarine, stick, salted" +81102020,"MARGARINE, TUB, SALTED","Margarine, tub, salted" +81103020,"MARGARINE, WHIPPED, TUB, SALTED","Margarine, whipped, tub, salted" +81103030,"MARGARINE, STICK, UNSALTED","Margarine, stick, unsalted" +81103040,"MARGARINE-LIKE SPREAD, STICK, SALTED","Margarine-like spread, stick, salted" +81103041,"MARGARINE-LIKE SPREAD, MADE W/ YOGURT, STICK, SALTED","Margarine-like spread, made with yogurt, stick, salted" +81103060,"MARGARINE, TUB, UNSALTED","Margarine, tub, unsalted" +81103070,"MARGARINE, WHIPPED, TUB, UNSALTED","Margarine, whipped, tub, unsalted" +81103080,"MARGARINE-LIKE SPREAD, TUB, SALTED","Margarine-like spread, tub, salted" +81103090,"MARGARINE-LIKE SPREAD, LIQUID, SALTED","Margarine-like spread, liquid, salted" +81103100,"MARGARINE-LIKE SPREAD, STICK, UNSALTED","Margarine-like spread, stick, unsalted" +81103120,"MARGARINE-LIKE SPREAD, TUB, UNSALTED","Margarine-like spread, tub, unsalted" +81103130,"MARGARINE-LIKE SPREAD, WHIPPED, TUB, SALTED","Margarine-like spread, whipped, tub, salted" +81103140,"MARGARINE-LIKE SPREAD, TUB, SWEETENED","Margarine-like spread, tub, sweetened" +81104010,"MARGARINE-LIKE SPREAD, RED CAL, 40% FAT, TUB, SALTED","Margarine-like spread, reduced calorie, about 40% fat, tub, salted" +81104011,"MARGARINE-LIKE SPREAD,RED CAL,40% FAT,MADE W/ YOGURT,TUB","Margarine-like spread, reduced calorie, about 40% fat, made with yogurt, tub, salted" +81104020,"MARGARINE-LIKE SPREAD, RED CAL, 40% FAT, STICK, SALTED","Margarine-like spread, reduced calorie, about 40% fat, stick, salted" +81104050,"MARGARINE-LIKE SPREAD, RED CAL, 20% FAT, TUB, SALTED","Margarine-like spread, reduced calorie, about 20% fat, tub, salted" +81104070,"MARGARINE-LIKE SPREAD, RED CAL, 20% FAT, TUB, UNSALTED","Margarine-like spread, reduced calorie, about 20% fat, tub, unsalted" +81104100,"MARGARINE-LIKE SPREAD, FAT FREE, TUB, SALTED","Margarine-like spread, fat free, tub, salted" +81104110,"MARGARINE-LIKE SPREAD, FAT FREE, LIQUID, SALTED","Margarine-like spread, fat free, liquid, salted" +81104500,"VEGETABLE OIL-BUTTER SPREAD, STICK, SALTED","Vegetable oil-butter spread, stick, salted" +81104510,"VEGETABLE OIL-BUTTER SPREAD, TUB, SALTED","Vegetable oil-butter spread, tub, salted" +81104550,"VEGETABLE OIL-BUTTER SPREAD, RED CAL, STICK, SALTED","Vegetable oil-butter spread, reduced calorie, stick, salted" +81104560,"VEGETABLE OIL-BUTTER SPREAD, RED CAL, TUB, SALTED","Vegetable oil-butter spread, reduced calorie, tub, salted" +81105010,"BUTTER-MARGARINE BLEND, STICK, SALTED","Butter-margarine blend, stick, salted" +81105020,"BUTTER-MARGARINE BLEND, TUB, SALTED","Butter-margarine blend, tub, salted" +81105500,"BUTTER-VEG OIL BLEND","Butter-vegetable oil blend" +81106010,"BUTTER REPLACEMENT, FAT-FREE POWDER, NOT RECONST","Butter replacement, fat-free powder" +81201000,"ANIMAL FAT OR DRIPPINGS","Animal fat or drippings" +81202000,"LARD","Lard" +81203000,"SHORTENING, NS AS TO VEGETABLE OR ANIMAL","Shortening, NS as to vegetable or animal" +81203100,"SHORTENING, VEGETABLE","Shortening, vegetable" +81203200,"SHORTENING, ANIMAL","Shortening, animal" +81204000,"GHEE, CLARIFIED BUTTER","Ghee, clarified butter" +81301000,"GARLIC SAUCE","Garlic sauce" +81301020,"LEMON-BUTTER SAUCE","Lemon-butter sauce" +81302010,"HOLLANDAISE SAUCE","Hollandaise sauce" +81302040,"SANDWICH SPREAD","Sandwich spread" +81302050,"TARTAR SAUCE","Tartar sauce" +81302060,"HORSERADISH SAUCE","Horseradish sauce" +81302070,"PESTO SAUCE","Pesto sauce" +81312000,"TARTAR SAUCE, REDUCED FAT/CALORIE","Tartar sauce, reduced fat/calorie" +81322000,"HONEY BUTTER","Honey butter" +81330210,"ADOBO FRESCO (INCL ADOBO CRIOLLO)","Adobo fresco" +82101000,"VEGETABLE OIL, NFS (INCLUDE OIL, NFS)","Vegetable oil, NFS" +82101300,"ALMOND OIL","Almond oil" +82101500,"COCONUT OIL","Coconut oil" +82102000,"CORN OIL","Corn oil" +82102500,"CORN & CANOLA OIL","Corn and canola oil" +82103000,"COTTONSEED OIL","Cottonseed oil" +82103500,"FLAXSEED OIL","Flaxseed oil" +82104000,"OLIVE OIL","Olive oil" +82105000,"PEANUT OIL","Peanut oil" +82105500,"RAPESEED OIL (INCL CANOLA OIL, PURITAN)","Rapeseed oil" +82105750,"CANOLA & SOYBEAN OIL","Canola and soybean oil" +82105800,"CANOLA, SOYBEAN & SUNFLOWER OIL","Canola, soybean and sunflower oil" +82106000,"SAFFLOWER OIL","Safflower oil" +82107000,"SESAME OIL","Sesame oil" +82108000,"SOYBEAN OIL","Soybean oil" +82108250,"SOYBEAN & SUNFLOWER OIL","Soybean and sunflower oil" +82108500,"SUNFLOWER OIL","Sunflower oil" +82108700,"WALNUT OIL","Walnut oil" +82109000,"WHEAT GERM OIL","Wheat germ oil" +83100100,"SALAD DRESSING, NFS, FOR SALADS","Salad dressing, NFS, for salads" +83100200,"SALAD DRESSING, NFS, FOR SANDWICHES","Salad dressing, NFS, for sandwiches" +83101000,"BLUE OR ROQUEFORT CHEESE DRESSING","Blue or roquefort cheese dressing" +83101500,"BACON DRESSING (HOT)","Bacon dressing (hot)" +83101600,"BACON & TOMATO DRESSING","Bacon and tomato dressing" +83102000,"CAESAR DRESSING","Caesar dressing" +83103000,"COLESLAW DRESSSING","Coleslaw dressing" +83104000,"FRENCH OR CATALINA DRESSING","French or Catalina dressing" +83105500,"HONEY MUSTARD DRESSING","Honey mustard dressing" +83106000,"ITALIAN DRESSING, W/ VINEGAR & OIL","Italian dressing, made with vinegar and oil" +83107000,"MAYONNAISE, REGULAR","Mayonnaise, regular" +83108000,"MAYONNAISE, IMITATION","Mayonnaise, imitation" +83109000,"RUSSIAN DRESSING","Russian dressing" +83110000,"MAYONNAISE-TYPE SALAD DRESSING","Mayonnaise-type salad dressing" +83112000,"AVOCADO DRESSING","Avocado dressing" +83112500,"CREAMY DRESSING","Creamy dressing" +83112950,"POPPY SEED DRESSING","Poppy seed dressing" +83112990,"SESAME DRESSING","Sesame dressing" +83114000,"THOUSAND ISLAND DRESSING","Thousand Island dressing" +83115000,"YOGURT DRESSING","Yogurt dressing" +83200100,"SALAD DRESSING, LIGHT, NFS","Salad dressing, light, NFS" +83201000,"BLUE OR ROQUEFORT CHEESE DRESSING, LIGHT","Blue or roquefort cheese dressing, light" +83201400,"COLESLAW DRESSING, LIGHT","Coleslaw dressing, light" +83202020,"FRENCH OR CATALINA DRESSING, LIGHT","French or Catalina dressing, light" +83203000,"CAESAR DRESSING, LIGHT","Caesar dressing, light" +83204000,"MAYONNAISE, LIGHT","Mayonnaise, light" +83204030,"MAYONNAISE, REGULAR, WITH OLIVE OIL","Mayonnaise, reduced fat, with olive oil" +83204050,"MAYONNAISE-TYPE SALAD DRESSING, LIGHT","Mayonnaise-type salad dressing, light" +83204500,"HONEY MUSTARD DRESSING, LIGHT","Honey mustard dressing, light" +83205450,"ITALIAN DRESSING, LIGHT","Italian dressing, light" +83206000,"RUSSIAN DRESSING, LIGHT","Russian dressing, light" +83206500,"SESAME DRESSING, LIGHT","Sesame dressing, light" +83207000,"THOUSAND ISLAND DRESSING, LIGHT","Thousand Island dressing, light" +83208500,"KOREAN DRESSING OR MARINADE","Korean dressing or marinade" +83210100,"CREAMY DRESSING, LIGHT","Creamy dressing, light" +83300100,"BLUE OR ROQUEFORT CHEESE DRESSING, FAT FREE","Blue or roquefort cheese dressing, fat free" +83300200,"CAESAR DRESSING, FAT FREE","Caesar dressing, fat free" +83300300,"CREAMY DRESSING, FAT FREE","Creamy dressing, fat free" +83300400,"FRENCH OR CATALINA DRESSING, FAT FREE","French or Catalina dressing, fat free" +83300500,"HONEY MUSTARD DRESSING, FAT FREE","Honey mustard dressing, fat free" +83300600,"ITALIAN DRESSING, FAT FREE","Italian dressing, fat free" +83300700,"MAYONNAISE, FAT FREE","Mayonnaise, fat free" +83300800,"RUSSIAN DRESSING, FAT FREE","Russian dressing, fat free" +83300900,"SALAD DRESSING, FAT FREE, NFS","Salad dressing, fat free, NFS" +83301000,"THOUSAND ISLAND DRESSING, FAT FREE","Thousand Island dressing, fat free" +91101000,"SUGAR, NFS","Sugar, NFS" +91101010,"SUGAR, WHITE, GRANULATED OR LUMP","Sugar, white, granulated or lump" +91101020,"SUGAR, WHITE, CONFECTIONER'S, POWDERED","Sugar, white, confectioner's, powdered" +91102010,"SUGAR, BROWN","Sugar, brown" +91103010,"SUGAR, MAPLE","Sugar, maple" +91104100,"SUGAR, CINNAMON","Sugar, cinnamon" +91104200,"SUGAR, RAW","Sugar, raw" +91105010,"FRUCTOSE SWEETENER, SUGAR SUBSTITUTE, DRY POWDER","Fructose sweetener, sugar substitute, dry powder" +91106000,"SUGAR SUBSTITUTE, SUGAR-ASPARTAME BLEND, DRY PWD","Sugar substitute, sugar-aspartame blend, dry powder" +91107000,"SUCRALOSE-BASED SWEETENER, SUGAR SUBSTITUTE","Sucralose-based sweetener, sugar substitute" +91108000,"SUGAR SUB, HERBAL EXTRACT SWEETENER, POWDER","Sugar substitute, herbal extract sweetener, powder" +91108010,"SUGAR SUB, HERBAL EXTRACT SWEETENER, LIQUID","Sugar substitute, herbal extract sweetener, liquid" +91109000,"BLUE AGAVE LIQUID SWEETENER, SUGAR SUBSTITUTE","Blue Agave liquid sweetener, sugar substitute" +91200000,"SUGAR SUBSTITUTE, LOW CALORIE, POWDERED, NFS","Sugar substitute, low-calorie, powdered, NFS" +91200020,"SUGAR SUBSTITUTE, SACCHARIN-BASED, DRY POWDER","Sugar substitute, saccharin-based, dry powder" +91200030,"BROWN SUGAR SUBSTITUTE, SACCHARIN-BASED, DRY POWDER","Brown sugar substitute, saccharin-based, dry powder" +91200040,"SUGAR SUBSTITUTE, SACCHARIN-BASED, DRY POWDER AND TABLETS","Sugar substitute, saccharin-based, dry powder and tablets" +91200110,"SUGAR SUBSTITUTE, SACCHARIN-BASED, LIQUID","Sugar substitute, saccharin-based, liquid" +91201010,"SUGAR SUBSTITUTE, ASPARTAME-BASED, DRY POWDER","Sugar substitute, aspartame-based, dry powder" +91300010,"SYRUP, NFS","Syrup, NFS" +91300100,"PANCAKE SYRUP, NFS","Pancake syrup, NFS" +91301020,"CANE & CORN PANCAKE SYRUP","Cane and corn pancake syrup" +91301030,"CORN SYRUP, LIGHT OR DARK","Corn syrup, light or dark" +91301040,"BUTTERED BLENDS SYRUP (INCL MRS BUTTERWORTH)","Buttered blends syrup" +91301050,"FRUIT SYRUP","Fruit syrup" +91301060,"MAPLE SYRUP(100% MAPLE)(INCLUDE MAPLE CREAM)","Maple syrup (100% maple)" +91301080,"CHOCOLATE SYRUP, THIN TYPE","Chocolate syrup, thin type" +91301081,"CHOCOLATE SYRUP, THIN TYPE, LIGHT","Chocolate syrup, thin type, light" +91301082,"CHOCOLATE SYRUP, THIN TYPE, SUGAR FREE","Chocolate syrup, thin type, sugar free" +91301090,"SORGHUM SYRUP","Sorghum syrup" +91301100,"SUGAR (WHITE) & WATER SYRUP (INCLUDE SIMPLE SYRUP)","Sugar (white) and water syrup" +91301120,"SUGAR, CARMELIZED","Sugar, carmelized" +91301130,"FRUIT FLAVORED SYRUP USED FOR MILK BEVERAGES","Fruit flavored syrup used for milk beverages" +91301200,"SUGAR (BROWN) & WATER SYRUP","Sugar (brown) and water syrup" +91301250,"MAPLE & CORN &/OR CANE PANCAKE SYRUP BLENDS","Maple and corn and/or cane pancake syrup blends (formerly Corn and maple syrup (2% maple))" +91301510,"SYRUP, PANCAKE, REDUCED CALORIE","Syrup, pancake, reduced calorie" +91302010,"HONEY (INCLUDE PEAR HONEY, RAW HONEY)","Honey" +91303000,"MOLASSES","Molasses" +91303500,"SUGAR, BROWN, LIQUID","Sugar, brown, liquid" +91303750,"CHOCOLATE GRAVY","Chocolate gravy" +91304010,"TOPPING, BUTTERSCOTCH OR CARAMEL","Topping, butterscotch or caramel" +91304020,"TOPPING, CHOCOLATE, THICK, FUDGE TYPE","Topping, chocolate, thick, fudge type" +91304030,"TOPPING, FRUIT","Topping, fruit" +91304040,"TOPPING, MARSHMALLOW","Topping, marshmallow" +91304050,"HARD SAUCE","Hard sauce" +91304060,"TOPPING, NUT (WET)","Topping, nut (wet)" +91304070,"TOPPING, PEANUT BUTTER, THICK FUDGE TYPE","Topping, peanut butter, thick, fudge type" +91304080,"TOPPING, FRUIT, UNSWEETENED","Topping, fruit, unsweetened" +91304090,"TOPPING, CHOC FLAVOR HAZELNUT SPREAD (INCL NUTELLA)","Topping, chocolate flavored hazelnut spread" +91304250,"TOPPING, MILK CHOCOLATE W/ CEREAL","Topping, milk chocolate with cereal" +91304300,"TOPPING, CHOCOLATE, HARD COATING","Topping, chocolate, hard coating" +91305010,"ICING, CHOCOLATE","Icing, chocolate" +91305020,"ICING, WHITE","Icing, white" +91351010,"SYRUP, DIETETIC","Syrup, dietetic" +91351020,"TOPPING, DIETETIC","Topping, dietetic" +91361010,"SWEET & SOUR SAUCE (INCLUDE VIETNAMESE SAUCE)","Sweet and sour sauce" +91361020,"FRUIT SAUCE (INCLUDE ALL FRUITS)","Fruit sauce" +91361040,"DESSERT SAUCE","Dessert sauce" +91361050,"DUCK SAUCE (INCLUDE CHAISNI SAUCE)","Duck sauce" +91361070,"PLUM SAUCE, ASIAN STYLE","Plum sauce, Asian style" +91401000,"JELLY, ALL FLAVORS","Jelly, all flavors" +91402000,"JAM, PRESERVES, ALL FLAVORS","Jam, preserves, all flavors" +91403000,"FRUIT BUTTER, ALL FLAVORS (INCLUDE APPLE BUTTER)","Fruit butter, all flavors" +91404000,"MARMALADE, ALL FLAVORS","Marmalade, all flavors" +91405000,"JELLY, DIETETIC, ALL FLAVORS,SWEETENED W/ ARTIFICIAL SWEETEN","Jelly, dietetic, all flavors, sweetened with artificial sweetener" +91405500,"JELLY, REDUCED SUGAR, ALL FLAVORS","Jelly, reduced sugar, all flavors" +91406000,"JAM, MARMALADES, ARTIFICIALLY SWEETENED","Jams, preserves, marmalades, dietetic, all flavors, sweetened with artificial sweetener" +91406500,"JAM PRESERVES,MARMALADES,SWEET W/ FRUIT JUICE CONC","Jams, preserves, marmalades, sweetened with fruit juice concentrates, all flavors" +91406600,"JAMS,PRESERVES,MARMALADES,LOW SUGAR (ALL FLAVORS)","Jams, preserves, marmalades, low sugar (all flavors)" +91407100,"GUAVA PASTE","Guava paste" +91407120,"SWEET POTATO PASTE","Sweet potato paste" +91407150,"BEAN PASTE, SWEETENED","Bean paste, sweetened" +91408100,"CHINESE PRESERVED SWEET VEGETABLE","Chinese preserved sweet vegetable" +91500200,"GELATIN POWDER, SWEETENED, DRY","Gelatin powder, sweetened, dry" +91501010,"GELATIN DESSERT","Gelatin dessert" +91501015,"GELATIN SNACKS","Gelatin snacks" +91501020,"GELATIN DESSERT W/ FRUIT","Gelatin dessert with fruit" +91501030,"GELATIN DESSERT W/ WHIPPED CREAM","Gelatin dessert with whipped cream" +91501040,"GELATIN DESSERT W/ FRUIT & WHIPPED CREAM","Gelatin dessert with fruit and whipped cream" +91501050,"GELATIN DESSERT W/ CREAM CHEESE","Gelatin dessert with cream cheese" +91501060,"GELATIN DESSERT W/ SOUR CREAM","Gelatin dessert with sour cream" +91501070,"GELATIN DESSERT W/ FRUIT & SOUR CREAM","Gelatin dessert with fruit and sour cream" +91501080,"GELATIN DESSERT W/ FRUIT & CREAM CHEESE","Gelatin dessert with fruit and cream cheese" +91501090,"GELATIN DESSERT W/ FRUIT, VEGETABLES, & NUTS","Gelatin dessert with fruit, vegetable, and nuts" +91501100,"GELATIN SALAD W/ VEGETABLES","Gelatin salad with vegetables" +91501110,"GELATIN DESSERT W/ FRUIT & WHIPPED TOPPING","Gelatin dessert with fruit and whipped topping" +91501120,"GELATIN DESSERT W/ FRUIT & VEGETABLES","Gelatin dessert with fruit and vegetables" +91510100,"GELATIN POWDER, DIETETIC, DRY","Gelatin powder, dietetic, sweetened with low calorie sweetener, dry" +91511010,"GELATIN DESSERT, DIETETIC, W/ LO CAL SWEETENER","Gelatin dessert, dietetic, sweetened with low calorie sweetener" +91511020,"GELATIN DESSERT, DIET, W/ FRUIT, LO CAL SWEETNER","Gelatin dessert, dietetic, with fruit, sweetened with low calorie sweetener" +91511030,"GELATIN DESSERT, DIETETIC, W/ WHIPPED TOPPING","Gelatin dessert, dietetic, with whipped topping, sweetened with low calorie sweetener" +91511050,"GELATIN DESSERT, DIETETIC, W/ CREAM CHEESE","Gelatin dessert, dietetic, with cream cheese, sweetened with low calorie sweetener" +91511060,"GELATIN DESSERT, DIETETIC, W/ SOUR CREAM","Gelatin dessert, dietetic, with sour cream, sweetened with low calorie sweetener" +91511070,"GELATIN DESSERT, DIETETIC, W/ FRUIT & SOUR CREAM","Gelatin dessert, dietetic, with fruit and sour cream, sweetened with low calorie sweetener" +91511080,"GELATIN DESSERT, DIETETIC, W/ FRUIT & CREAM CHEESE","Gelatin dessert, dietetic, with fruit and cream cheese, sweetened with low calorie sweetener" +91511090,"GELATIN DESSERT, DIETETIC, W/ FRUIT & VEGETABLES","Gelatin dessert, dietetic, with fruit and vegetable(s), sweetened with low calorie sweetener" +91511100,"GELATIN DESSERT, DIETETIC, W/ VEGETABLES","Gelatin salad, dietetic, with vegetables, sweetened with low calorie sweetener" +91511110,"GELATIN DESSERT, DIETETIC, W/ FRUIT & WHIP TOPPING","Gelatin dessert, dietetic, with fruit and whipped topping, sweetened with low calorie sweetener" +91512010,"DANISH DESSERT PUDDING","Danish dessert pudding" +91520100,"YOOKAN, JAPANESE DESSERT MADE W/ BEAN PASTE & SUGAR","Yookan (Yokan), a Japanese dessert made with bean paste and sugar" +91550100,"COCONUT CREAM CAKE, P.R. (BIEN ME SABE)","Coconut cream cake, Puerto Rican style (Bien me sabe, ""Tastes good to me"")" +91550300,"PINEAPPLE CUSTARD, P.R. (FLAN DE PINA)","Pineapple custard, Puerto Rican style (Flan de pina)" +91560100,"HAUPIA (COCONUT PUDDING)","Haupia (coconut pudding)" +91580000,"GELATIN,FROZ,WHIPPED,ON STICK(INCL JELLO GLTN POPS)","Gelatin, frozen, whipped, on a stick" +91601000,"ICE, FRUIT","Ice, fruit" +91611000,"ICE POP","Ice pop" +91611050,"ICE POP FILLED W/ ICE CREAM, ALL FLAVOR VARIETIES","Ice pop filled with ice cream, all flavor varieties" +91611100,"ICE POP, SWEETENED W/ LOW CALORIE SWEETENER","Ice pop, sweetened with low calorie sweetener" +91621000,"SNOW CONE","Snow cone" +91700010,"CANDY, NFS","Candy, NFS" +91700500,"M&M'S ALMOND CHOCOLATE CANDIES","M&M's Almond Chocolate Candies" +91701010,"ALMONDS, CHOCOLATE-COVERED","Almonds, chocolate covered" +91701020,"ALMONDS, SUGAR-COATED (INCL JORDAN ALMONDS)","Almonds, sugar-coated" +91701030,"ALMONDS, YOGURT-COVERED","Almonds, yogurt-covered" +91702010,"BUTTERSCOTCH MORSELS","Butterscotch morsels" +91703010,"CARAMEL CANDY, CHOC-FLAVOR ROLL (INCL TOOTSIE ROLL)","Caramel, chocolate-flavored roll" +91703020,"CARAMEL CANDY, NOT CHOCOLATE","Caramel, flavor other than chocolate" +91703030,"CARAMEL CANDY, W/ NUTS","Caramel, with nuts" +91703040,"CARAMEL CANDY, CHOCOLATE COVERED","Caramel candy, chocolate covered" +91703050,"CARAMEL CANDY, W/ NUTS & CEREAL, CHOCOLATE-COVERED","Caramel with nuts and cereal, chocolate covered" +91703060,"CARAMEL CANDY, W/ NUTS, CHOCOLATE-COVERED","Caramel with nuts, chocolate covered" +91703070,"ROLOS CANDY","Rolo" +91703080,"CARAMEL, ALL FLAVORS, SUGAR FREE","Caramel, all flavors, sugar free" +91703150,"TOBLERONE,MILK CHOCOLATE W/ HONEY & ALMOND NOUGAT","Toblerone, milk chocolate with honey and almond nougat" +91703200,"TWIX CARAMEL COOKIE BARS","TWIX Caramel Cookie Bars (formerly TWIX Cookie Bars)" +91703250,"TWIX CHOCOLATE FUDGE COOKIE BARS","TWIX Chocolate Fudge Cookie Bars" +91703300,"TWIX PEANUT BUTTER COOKIE BARS","TWIX Peanut Butter Cookie Bars" +91703400,"WHATCHAMACALLIT CANDY","Whatchamacallit" +91703500,"NUTS, CAROB-COATED","Nuts, carob-coated" +91703600,"ESPRESSO COFFEE BEANS, CHOCOLATE-COVERED","Espresso coffee beans, chocolate-covered" +91705010,"MILK CHOCOLATE CANDY, PLAIN","Milk chocolate candy, plain" +91705020,"MILK CHOCOLATE CANDY, WITH CEREAL","Milk chocolate candy, with cereal" +91705030,"KIT KAT CANDY BAR","Kit Kat" +91705040,"CHOCOLATE, MILK, W/ NUTS, NOT ALMONDS OR PEANUTS","Chocolate, milk, with nuts, not almond or peanuts" +91705050,"MILK CHOCOLATE CANDY, WITH FRUIT AND NUTS","Milk chocolate candy, with fruit and nuts" +91705060,"MILK CHOCOLATE CANDY, WITH ALMONDS","Milk chocolate candy, with almonds" +91705070,"CHOCOLATE, MILK, W/ PEANUTS (INCLUDE MR GOODBAR)","Chocolate, milk, with peanuts" +91705090,"CHOCOLATE CANDY WITH FONDANT AND CARAMEL","Chocolate candy with fondant and caramel" +91705200,"CHOCOLATE, SEMI-SWEET","Chocolate, semi-sweet morsel" +91705300,"CHOCOLATE CANDY, SWEET OR DARK","Chocolate, sweet or dark" +91705310,"CHOCOLATE, SWEET OR DARK, WITH ALMONDS","Chocolate, sweet or dark, with almonds" +91705400,"CHOCOLATE CANDY, WHITE","Chocolate, white" +91705410,"CHOCOLATE CANDY, WHITE, W/ ALMONDS","Chocolate, white, with almonds" +91705420,"CHOCOLATE, WHITE, W/ CEREAL, CANDY","Chocolate, white, with cereal" +91705430,"KIT KAT WHITE","Kit Kat White" +91705500,"MEXICAN CHOCOLATE (TABLET)","Mexican chocolate (tablet)" +91706000,"COCONUT CANDY, CHOCOLATE-COVERED","Coconut candy, chocolate covered" +91706100,"COCONUT CANDY, NO CHOCOLATE COVERING","Coconut candy, no chocolate covering" +91706400,"COCONUT CANDY, P.R. STYLE","Coconut candy, Puerto Rican style" +91707000,"FONDANT CANDY","Fondant" +91707010,"FONDANT CANDY, CHOCOLATE COVERED","Fondant, chocolate covered" +91708000,"FRUIT PEEL, CANDIED","Fruit peel, candied" +91708010,"FRUIT CANDY BAR","Date candy" +91708020,"SOFT FRUIT CONFECTION","Soft fruit confections" +91708030,"FRUIT LEATHER / FRUIT SNACKS CANDY","Fruit leather and fruit snacks candy" +91708040,"FUN FRUITS CREME SUPREMES CANDY","Fun Fruits Creme Supremes" +91708070,"TAMARIND CANDY","Tamarind candy" +91708100,"FRUIT SNACKS CANDY W/ HI VIT C","Fruit snacks candy, with high vitamin C" +91708150,"YOGURT COVERED FRUIT SNACKS CANDY, W/ ADDED VITAMIN C","Yogurt covered fruit snacks candy, with added vitamin C" +91708160,"YOGURT COVERED FRUIT SNACKS CANDY ROLLS, W/ HIGH VITAMIN C","Yogurt covered fruit snacks candy rolls, with high vitamin C" +91709000,"GUMDROPS, CHOCOLATE-COVERED","Gumdrops, chocolate covered" +91713010,"FUDGE, CHOCOLATE, CHOCOLATE-COATED","Fudge, chocolate, chocolate-coated" +91713020,"FUDGE, CHOCOLATE, CHOCOLATE-COATED, W/ NUTS","Fudge, chocolate, chocolate-coated, with nuts" +91713030,"FUDGE, CHOCOLATE","Fudge, chocolate" +91713040,"FUDGE, CHOCOLATE, W/ NUTS","Fudge, chocolate, with nuts" +91713050,"FUDGE, PEANUT BUTTER","Fudge, peanut butter" +91713060,"FUDGE, PEANUT BUTTER, W/ NUTS","Fudge, peanut butter, with nuts" +91713070,"FUDGE, VANILLA","Fudge, vanilla" +91713080,"FUDGE, VANILLA, W/ NUTS","Fudge, vanilla, with nuts" +91713090,"FUDGE, DIVINITY","Fudge, divinity" +91713100,"FUDGE, BROWN SUGAR (PANUCHI)","Fudge, brown sugar (penuche)" +91715000,"FUDGE, CARAMEL AND NUT, CHOCOLATE-COATED CANDY","Fudge, caramel and nut, chocolate-coated candy" +91715100,"SNICKERS CANDY BAR","SNICKERS Bar" +91715200,"BABY RUTH CANDY BAR","Baby Ruth" +91715300,"100 GRAND BAR (INCL $100,000 BAR)","100 GRAND Bar" +91716010,"HALVAH, PLAIN","Halvah, plain" +91716110,"HALVAH, CHOCOLATE-COVERED","Halvah, chocolate covered" +91718000,"HONEY-COMBED HARD CANDY, PEANUT BUTTER","Honey-combed hard candy with peanut butter" +91718050,"HONEY-COMBED CANDY, PEANUT BUTTER, CHOC-COVERED","Honey-combed hard candy with peanut butter, chocolate covered" +91718100,"BUTTERFINGER CANDY BAR","Butterfinger" +91718110,"BUTTERFINGER CRISP","Butterfinger Crisp" +91718200,"JIMMIES (INCLUDE CHOCOLATE-FLAVORED SPRINKLES)","Chocolate-flavored sprinkles" +91718300,"LADOO, ROUND BALL, ASIAN-INDIAN DESSERT","Ladoo, round ball, Asian-Indian dessert" +91721000,"LICORICE CANDY","Licorice" +91723000,"MARSHMALLOW","Marshmallow" +91723010,"MARSHMALLOW, CHOCOLATE-COVERED","Marshmallow, chocolate covered" +91723020,"MARSHMALLOW, CANDY-COATED","Marshmallow, candy-coated" +91723050,"MARSHMALLOW, COCONUT-COATED","Marshmallow, coconut-coated" +91726000,"NOUGAT CANDY, PLAIN","Nougat, plain" +91726110,"NOUGAT CANDY, W/ CARAMEL, CHOCOLATE-COVERED","Nougat, with caramel, chocolate covered" +91726130,"MILKY WAY BAR","MILKY WAY Bar" +91726140,"MILKY WAY MIDNIGHT BAR (FORMERLY MILKY WAY DARK BAR)","MILKY WAY MIDNIGHT Bar (formerly MILKY WAY DARK Bar)" +91726150,"MARS ALMOND BAR (FORMERLY MARS BAR)","MARS Almond Bar (formerly MARS bar)" +91726410,"NOUGAT CANDY, CHOCOLATE-COVERED","Nougat, chocolate covered" +91726420,"3 MUSKETEERS BAR","3 MUSKETEERS Bar" +91726425,"3 MUSKETEERS TRUFFLE CRISP BAR","3 Musketeers Truffle Crisp Bar" +91727010,"NUTS, CHOCOLATE-COVERED, NOT ALMONDS OR PEANUTS","Nuts, chocolate covered, not almonds or peanuts" +91728000,"NUT ROLL, FUDGE OR NOUGAT, CARAMEL & NUTS","Nut roll, fudge or nougat, caramel and nuts" +91728500,"SUGARED PECANS (SUGAR & EGG WHITE COATING)","Sugared pecans (sugar and egg white coating)" +91731000,"PEANUTS, CHOCOLATE-COVERED","Peanuts, chocolate covered" +91731010,"M&M'S PEANUT CANDIES","M&M's Peanut Chocolate Candies" +91731060,"M&M'S PEANUT BUTTER CHOCOLATE CANDIES","M&M's Peanut Butter Chocolate Candies" +91731100,"PEANUTS, SUGAR-COATED","Peanuts, sugar-coated" +91731150,"PEANUTS, YOGURT-COVERED","Peanuts, yogurt covered" +91732000,"PEANUT CANDY BAR","Peanut bar" +91732100,"PLANTERS PEANUT CANDY BAR","Planters Peanut Bar" +91733000,"PEANUT BRITTLE","Peanut brittle" +91733200,"PEANUT BAR, CHOCOLATE COVERED CANDY","Peanut Bar, chocolate covered candy" +91734000,"PEANUT BUTTER CANDY, CHOCOLATE-COVERED","Peanut butter, chocolate covered" +91734100,"REESE'S PEANUT BUTTER CUPS","Reese's Peanut Butter Cup" +91734200,"REESE'S PIECES CANDY","Reese's Pieces" +91734300,"REESE'S STICKS","Reese's Sticks" +91734400,"REESE'S FAST BREAK","Reese's Fast Break" +91734450,"REESE'S CRISPY CRUNCHY BAR","Reese's Crispy Crunchy Bar" +91734500,"PEANUT BUTTER MORSELS CANDY","Peanut butter morsels" +91735000,"PRALINES","Pralines" +91736000,"PINEAPPLE CANDY, P.R. STYLE","Pineapple candy, Puerto Rican style" +91739010,"RAISINS, CHOCOLATE-COVERED","Raisins, chocolate covered" +91739600,"RAISINS, YOGURT-COVERED","Raisins, yogurt covered" +91742010,"SESAME CRUNCH CANDY (SAHADI)","Sesame Crunch (Sahadi)" +91745010,"GUMDROPS","Gumdrops" +91745020,"HARD CANDY","Hard candy" +91745040,"BUTTERSCOTCH HARD CANDY","Butterscotch hard candy" +91745100,"SKITTLES CANDY","Skittles" +91746010,"SUGAR-COATED CHOCOLATE DISCS CANDY","Sugar-coated chocolate discs" +91746100,"M&M'S MILK CHOCOLATE CANDIES","M&M's Milk Chocolate Candies (formerly M&M's Plain Chocolate Candies)" +91746120,"SIXLETS CANDY","Sixlets" +91746150,"EASTER EGG, CANDY-COATED CHOCOLATE","Easter egg, candy coated chocolate" +91746200,"M&M'S PRETZEL CHOCOLATE CANDIES","M&M's Pretzel Chocolate Candies" +91750000,"TAFFY","Taffy" +91760000,"TOFFEE, PLAIN","Toffee, plain" +91760100,"TOFFEE, CHOCOLATE COVERED (INCL HEATH BAR, SKOR)","Toffee, chocolate covered" +91760200,"TOFFEE, CHOCOLATE-COATED, W/ NUTS","Toffee, chocolate-coated, with nuts" +91760500,"TRUFFLES","Truffles" +91760700,"WAX CANDY, LIQUID FILLED","Wax candy, liquid filled" +91770000,"DIETETIC OR LOW CALORIE CANDY, NFS","Dietetic or low calorie candy, NFS" +91770010,"DIETETIC OR LOW CALORIE GUMDROPS","Dietetic or low calorie gumdrops" +91770020,"DIETETIC OR LOW CALORIE HARD CANDY","Dietetic or low calorie hard candy" +91770030,"DIETETIC OR LOW CALORIE CANDY, CHOCOLATE-COVERED","Dietetic or low calorie candy, chocolate covered" +91770050,"MINTS, DIETETIC OR LOW CALORIE","Dietetic or low calorie mints" +91800100,"CHEWING GUM, NFS","Chewing gum, NFS" +91801000,"CHEWING GUM, SUGARED","Chewing gum, sugared" +91802000,"CHEWING GUM, SUGARLESS","Chewing gum, sugarless" +92100000,"COFFEE, NS AS TO TYPE","Coffee, NS as to type" +92100500,"COFFEE, REGULAR, NS GROUND/INSTANT","Coffee, regular, NS as to ground or instant" +92101000,"COFFEE, MADE FROM GROUND, REGULAR","Coffee, made from ground, regular" +92101500,"COFFEE, BREWED, EQUAL PARTS REG & DECAFFEINATED","Coffee, made from ground, equal parts regular and decaffeinated" +92101600,"COFFEE, TURKISH","Coffee, Turkish" +92101610,"COFFEE, ESPRESSO","Coffee, espresso" +92101630,"COFFEE, ESPRESSO, DECAFFEINATED","Coffee, espresso, decaffeinated" +92101640,"COFFEE, MEXICAN, REG, UNSWEETENED (NO MILK)","Coffee, Mexican, regular, unsweetened (no milk; not cafe con leche)" +92101650,"COFFEE, MEXICAN, REG, SWEETENED (NO MILK)","Coffee, Mexican, regular, sweetened (no milk; not cafe con leche)" +92101660,"COFFEE, MEXICAN, DECAF, UNSWEETENED (NO MILK)","Coffee, Mexican, decaffeinated, unsweetened (no milk; not cafe con leche)" +92101670,"COFFEE, MEXICAN, DECAF, SWEETENED (NO MILK)","Coffee, Mexican, decaffeinated, sweetened (no milk; not cafe con leche)" +92101700,"COFFEE, MADE FROM GROUND, REGULAR, FLAVORED","Coffee, made from ground, regular, flavored" +92101800,"COFFEE, CUBAN","Coffee, Cuban" +92101900,"COFFEE, LATTE","Coffee, Latte" +92101910,"COFFEE, LATTE, DECAFFEINATED","Coffee, Latte, decaffeinated" +92101920,"BLENDED COFFEE BEVERAGE, REGULAR, SWEETENED","Blended coffee beverage, made with regular coffee, milk, and ice, sweetened" +92101925,"BLENDED COFFEE BEVERAGE, REG, SWTND, W/ WHP CRM","Blended coffee beverage, made with regular coffee, milk, and ice, sweetened, with whipped cream" +92101930,"BLENDED COFFEE BEVERAGE, DECAF, SWEETENED","Blended coffee beverage, made with decaffeinated coffee, milk, and ice, sweetened" +92101935,"BLENDED COFFEE BEVERAGE, DECAF, SWTND, W/ WHP CRM","Blended coffee beverage, made with decaffeinated coffee, milk, and ice, sweetened, with whipped cream" +92101950,"COFFEE, MOCHA","Coffee, mocha" +92101960,"COFFEE, MOCHA, MADE W/ SOY MILK","Coffee, mocha, made with soy milk" +92103000,"COFFEE, MADE FROM POWDERED INSTANT, REGULAR","Coffee, made from powdered instant, regular" +92104000,"COFFEE, FROM POWDER, 50% LESS CAFFEINE","Coffee, made from powdered instant, 50% less caffeine" +92105000,"COFFEE, LIQUID CONCENTRATE, NOT RECONSTITUTED","Coffee, liquid concentrate" +92105010,"COFFEE, MADE FROM LIQUID CONCENTRATE","Coffee, made from liquid concentrate" +92106000,"COFFEE, ACID NEUTRALIZED, FROM POWDERED INSTANT","Coffee, acid neutralized, from powdered instant" +92111000,"COFFEE, DECAFFEINATED, NS AS TO GROUND OR INSTANT","Coffee, decaffeinated, NS as to ground or instant" +92111010,"COFFEE, DECAFFEINATED, MADE FROM GROUND","Coffee, decaffeinated, made from ground" +92114000,"COFFEE, DECAFFEINATED, MADE FROM POWDERED INSTANT","Coffee, decaffeinated, made from powdered instant" +92121000,"COFFEE, FROM POWDERED MIX,W/WHITENER&SUGAR, INSTANT","Coffee, made from powdered instant mix, with whitener and sugar, instant" +92121010,"COFFEE, FROM POWDER, PRESWEETENED, NO WHITENER","Coffee, made from powdered instant mix, presweetened, no whitener" +92121020,"COFFEE & COCOA (MOCHA), W/ WHITENER, PRESWEETENED","Coffee and cocoa (mocha), made from powdered instant mix, with whitener, presweetened" +92121030,"COFFEE & COCOA, FROM MIX, W/WHITENER, LOW CAL SWEET","Coffee and cocoa (mocha), made from powdered instant mix, with whitener and low calorie sweetener" +92121040,"COFFEE, FROM POWDER, W/ WHITENER & LO CAL SWEETENER","Coffee, made from powdered instant mix, with whitener and low calorie sweetener" +92121050,"COFFEE&COCOA,FROM PWDR,W/WHITE&LOW CAL SWEET,DECAF","Coffee and cocoa (mocha), made from powdered instant mix, with whitener and low calorie sweetener, decaffeinated" +92130000,"COFFEE, REG, PRESWEETENED W/SUGAR, PRE-LIGHTENED","Coffee, regular, presweetened with sugar, pre-lightened" +92130001,"COFFEE, DECAFFEINATED, PRESWEETENED W/ SUGAR, PRE-LIGHTENED","Coffee, decaffeinated, presweetened with sugar, pre-lightened" +92130005,"COFFEE, REGULAR, WITH LOW CALORIE SWEETENER, PRE-LIGHTENED","Coffee, regular, with low-calorie sweetener, pre-lightened" +92130006,"COFFEE, DECAFFEINATED,W/ LOW CALORIE SWEETENER,PRE-LIGHTENED","Coffee, decaffeinated, with low-calorie sweetener, pre-lightened" +92130010,"COFFEE, PRE-LIGHTENED, NO SUGAR","Coffee, pre-lightened, no sugar" +92130020,"COFFEE, PRESWEETENED W/ SUGAR","Coffee, presweetened with sugar" +92150000,"COFFEE & CHICORY, NS AS TO GROUND OR INSTANT","Coffee and chicory, NS as to ground or instant" +92151000,"COFFEE & CHICORY, MADE FROM POWDERED INSTANT","Coffee and chicory, made from powdered instant" +92151100,"COFFEE, DECAFFEINATED, AND CHICORY, FROM INSTANT","Coffee, decaffeinated, and chicory, made from powdered instant" +92152000,"COFFEE & CHICORY, MADE FROM GROUND","Coffee and chicory, made from ground" +92153000,"COFFEE, REGULAR, W/ CEREAL (INCLUDE W/ BARLEY)","Coffee, regular, with cereal" +92153100,"COFFEE, DECAFFEINATED, W/ CEREAL (INCLUDE W/BARLEY)","Coffee, decaffeinated, with cereal" +92161000,"CAPPUCCINO","Cappuccino" +92161005,"CAPPUCCINO, SWEETENED","Cappuccino, sweetened" +92162000,"CAPPUCCINO, DECAFFEINATED","Cappuccino, decaffeinated" +92162005,"CAPPUCCINO, DECAFFEINATED, SWEETENED","Cappuccino, decaffeinated, sweetened" +92191000,"COFFEE, DRY POWDER, NS AS TO REG OR DECAF","Coffee, dry instant powder, NS as to regular or decaffeinated" +92191100,"COFFEE, DRY POWDER, REGULAR","Coffee, dry instant powder, regular" +92191200,"COFFEE, DRY POWDER, DECAFFEINATED","Coffee, dry instant powder, decaffeinated" +92191250,"COFFEE, DRY, ACID NEUTRALIZED (INCLUDE KAVA)","Coffee, dry, acid neutralized" +92191500,"COFFEE & CHICORY, DRY POWDER","Coffee and chicory, dry instant powder" +92191520,"COFFEE, DECAFFEINATED, AND CHICORY, DRY POWDER","Coffee, decaffeinated, and chicory, dry instant powder" +92192000,"COFFEE & COCOA (MOCHA) MIX,W/WHITENER,PRESWEET, DRY","Coffee and cocoa (mocha) mix, dry instant powder with whitener, presweetened" +92192030,"COFFEE & COCOA (MOCHA) MIX,W/WHITENER, LOW CAL, DRY","Coffee and cocoa (mocha) mix, dry instant powder with whitener and low calorie sweetener" +92192040,"COFFEE&COCOA MIX,DRY,W/WHITENER&LOW CAL SWEET,DECAF","Coffee and cocoa (mocha) mix, dry instant powder, with whitener and low calorie sweetener, decaffeinated" +92193000,"COFFEE, DRY MIX, W/ WHITENER & SUGAR","Coffee, dry instant powder, with whitener and sugar" +92193020,"COFFEE, DRY MIX, W/ WHITENER & LOW CAL SWEETENER","Coffee, dry instant powder, with whitener and low calorie sweetener" +92201010,"POSTUM (COFFEE SUBSTITUTE)","Postum" +92202010,"CHICORY (COFFEE SUBSTITUTE)","Chicory" +92203000,"CEREAL, BEVERAGE (INCLUDE PERO, BREAK AWAY)","Cereal beverage" +92203110,"CEREAL BEVERAGE, W/BEET ROOTS,FROM POWDERED INSTANT","Cereal beverage with beet roots, from powdered instant" +92204000,"MATE, SWEETENED BEVERAGE FROM DRIED GREEN LEAVES","Mate, sweetened beverage made from dried green leaves" +92205000,"RICE BEVERAGE (INCL RICE TEA)","Rice beverage" +92291300,"POSTUM, DRY POWDER","Postum, dry powder" +92301000,"TEA, NS AS TO TYPE, UNSWEETENED","Tea, NS as to type, unsweetened" +92301060,"TEA, NS AS TO TYPE, PRESWEETENED W/ SUGAR","Tea, NS as to type, presweetened with sugar" +92301080,"TEA, PRESWEETENED W/ LOW CALORIE SWEETENER","Tea, NS as to type, presweetened with low calorie sweetener" +92301100,"TEA, NS AS TO TYPE, DECAFFEINATED, UNSWEETENED","Tea, NS as to type, decaffeinated, unsweetened" +92301130,"TEA, NS AS TO TYPE, PRESWEETENED, NS AS TO SWEETNER","Tea, NS as to type, presweetened, NS as to sweetener" +92301160,"TEA, DECAFFEINATED, W/ SUGAR, NFS","Tea, NS as to type, decaffeinated, presweetened with sugar" +92301180,"TEA, DECAFFEINATED, LOW CALORIE SWEETENER, NFS","Tea, NS as to type, decaffeinated, presweetened with low calorie sweetener" +92301190,"TEA, PRESWEETENED, NS SWEETENER, DECAFFEINATED","Tea, NS as to type, decaffeinated, presweetened, NS as to sweetener" +92302000,"TEA, LEAF, UNSWEETENED","Tea, leaf, unsweetened" +92302200,"TEA, LEAF, PRESWEETENED W/ SUGAR","Tea, leaf, presweetened with sugar" +92302300,"TEA, LEAF, PRESWEETENED W/ LOW CALORIE SWEETENER","Tea, leaf, presweetened with low calorie sweetener" +92302400,"TEA, LEAF, PRESWEETENED, NS AS TO SWEETENER","Tea, leaf, presweetened, NS as to sweetener" +92302500,"TEA, DECAFFEINATED, UNSWEETENED","Tea, leaf, decaffeinated, unsweetened" +92302600,"TEA, LEAF, DECAFFEINATED, PRESWEETENED W/ SUGAR","Tea, leaf, decaffeinated, presweetened with sugar" +92302700,"TEA, LEAF, DECAFFEINATED, LOW CALORIE SWEETENER","Tea, leaf, decaffeinated, presweetened with low calorie sweetener" +92302800,"TEA, LEAF, DECAFFEINATED, PRESWEETENED, NFS","Tea, leaf, decaffeinated, presweetened, NS as to sweetener" +92304000,"TEA, MADE FROM FROZEN CONCENTRATE, UNSWEETENED","Tea, made from frozen concentrate, unsweetened" +92304700,"TEA, FROM FROZ CONC, DECAF, PRESWEETND, LOW CALORIE","Tea, made from frozen concentrate, decaffeinated, presweetened with low calorie sweetener" +92305000,"TEA, MADE FROM POWDERED INSTANT, PRESWEETENED","Tea, made from powdered instant, presweetened, NS as to sweetener" +92305010,"TEA, MADE FROM POWDERED INSTANT, UNSWEETENED","Tea, made from powdered instant, unsweetened" +92305040,"TEA, MADE FROM POWDERED INSTANT,PRESWEETEND W/SUGAR","Tea, made from powdered instant, presweetened with sugar" +92305050,"TEA, FROM POWDER, DECAFFEINATED, PRESWEET W/ SUGAR","Tea, made from powdered instant, decaffeinated, presweetened with sugar" +92305090,"TEA, MADE FROM POWDERED INSTANT,W/LO CAL SWEETENER","Tea, made from powdered instant, presweetened with low calorie sweetener" +92305110,"TEA, FROM INSTANT, DECAF, PRESWEETENED, LOW CALORIE","Tea, made from powdered instant, decaffeinated, presweetened with low calorie sweetener" +92305180,"TEA ,MADE FROM POWDERED INSTANT, DECAF ,UNSWEET","Tea, made from powdered instant, decaffeinated, unsweetened" +92305800,"TEA, FROM POWDER, DECAFFEINATED, PRESWEETENED","Tea, made from powdered instant, decaffeinated, presweetened, NS as to sweetener" +92306000,"TEA, HERBAL (INCLUDE SASSAFRAS,LICORICE)","Tea, herbal" +92306020,"TEA, HERBAL, PRESWEETENED W/ SUGAR","Tea, herbal, presweetened with sugar" +92306030,"TEA, HERBAL, PRESWEETENED W/ LOW CAL SWEETENER","Tea, herbal, presweetened with low calorie sweetener" +92306040,"TEA, HERBAL, PRESWEETENED, NS AS TO SWEETENER","Tea, herbal, presweetened, NS as to sweetener" +92306050,"TEA, MADE FROM CARAWAY SEEDS","Tea, made from caraway seeds" +92306090,"TEA, HIBISCUS","Tea, hibiscus" +92306100,"CORN BEVERAGE(INCLUDE CORN TEA)","Corn beverage" +92306200,"BEAN BEVERAGE (INCLUDE BEAN TEA)","Bean beverage" +92306610,"TEA, RUSSIAN","Tea, Russian" +92306700,"TEA, CHAMOMILE","Tea, chamomile" +92307000,"TEA, POWDERED INSTANT, UNSWEETENED, DRY","Tea, powdered instant, unsweetened, dry" +92307400,"TEA, POWDERED INSTANT, SWEETENED, NS SWEETENER, DRY","Tea, powdered instant, sweetened, NS as to sweetener, dry" +92307500,"HALF AND HALF BEVERAGE, HALF ICED TEA/HALF LEMONADE","Half and Half beverage, half iced tea and half fruit juice drink (lemonade)" +92307510,"HALF&HALF BEV, HALF ICED TEA/HALF LEMONADE,LOW CAL","Half and Half beverage, half iced tea and half fruit juice drink (lemonade), low calorie" +92400000,"SOFT DRINK, NFS","Soft drink, NFS" +92400100,"SOFT DRINK, NFS, SUGAR-FREE","Soft drink, NFS, sugar-free" +92410110,"CARBONATED WATER,SWEETEND(INCL TONIC,QUININE WATER)","Carbonated water, sweetened" +92410210,"CARBONATED WATER, UNSWEETENED (INCL CLUB SODA)","Carbonated water, unsweetened" +92410250,"CARBONATED WATER, SWEETENED, WITH LOW-CALORIE OR NO-CALORIE","Carbonated water, sweetened, with low-calorie or no-calorie sweetener" +92410310,"SOFT DRINK, COLA-TYPE","Soft drink, cola-type" +92410315,"SOFT DRINK, COLA TYPE, REDUCED SUGAR","Soft drink, cola type, reduced sugar" +92410320,"SOFT DRINK, COLA-TYPE, SUGAR-FREE","Soft drink, cola-type, sugar-free" +92410330,"SOFT DRINK, COLA-TYPE, W/ HIGHER CAFFEINE (INCL JOLT)","Soft drink, cola-type, with higher caffeine" +92410340,"SOFT DRINK, COLA-TYPE, DECAFFEINATED","Soft drink, cola-type, decaffeinated" +92410350,"SOFT DRINK, COLA-TYPE, DECAFFEINATED, SUGAR-FREE","Soft drink, cola-type, decaffeinated, sugar-free" +92410360,"SOFT DRINK, PEPPER-TYPE (INCL DR. PEPPER, MR. PIBB)","Soft drink, pepper-type" +92410370,"SOFT DRINK, PEPPER-TYPE, SUGAR-FREE","Soft drink, pepper-type, sugar-free" +92410390,"SOFT DRINK, PEPPER-TYPE, DECAFFEINATED","Soft drink, pepper-type, decaffeinated" +92410400,"SOFT DRINK, PEPPER-TYPE, DECAFFEINATED, SUGAR-FREE","Soft drink, pepper-type, decaffeinated, sugar-free" +92410410,"CREAM SODA","Cream soda" +92410420,"CREAM SODA, SUGAR-FREE","Cream soda, sugar-free" +92410510,"SOFT DRINK, FRUIT-FLAVORED, CAFFEINE FREE","Soft drink, fruit-flavored, caffeine free" +92410520,"SOFT DRINK, FRUIT-FLAV, SUGAR-FREE, CAFFEINE FREE","Soft drink, fruit-flavored, sugar free, caffeine free" +92410550,"SOFT DRINK, FRUIT-FLAVORED, W/ CAFFEINE","Soft drink, fruit flavored, caffeine containing" +92410560,"SOFT DRINK, FRUIT-FLAVORED, W/ CAFFEINE, SUGAR-FREE","Soft drink, fruit flavored, caffeine containing, sugar-free" +92410610,"GINGER ALE","Ginger ale" +92410620,"GINGERALE, SUGAR-FREE","Ginger ale, sugar-free" +92410710,"ROOT BEER","Root beer" +92410720,"ROOT BEER, SUGAR-FREE","Root beer, sugar-free" +92410810,"CHOCOLATE-FLAVORED SODA","Chocolate-flavored soda" +92410820,"CHOCOLATE-FLAVORED SODA, SUGAR-FREE","Chocolate-flavored soda, sugar-free" +92411510,"COLA W/ FRUIT OR VANILLA FLAVOR","Cola with fruit or vanilla flavor" +92411520,"COLA W/ CHOCOLATE FLAVOR","Cola with chocolate flavor" +92411610,"COLA W/ FRUIT OR VANILLA FLAVOR, SUGAR-FREE","Cola with fruit or vanilla flavor, sugar-free" +92411620,"COLA W/ CHOC FLAVOR, SUGAR FREE","Cola with chocolate flavor, sugar-free" +92417010,"SOFT DRINK, ALE TYPE (INCLUDE ALE-8)","Soft drink, ale type" +92431000,"CARBONATED JUICE DRINK, NS AS TO TYPE OF JUICE","Carbonated juice drink, NS as to type of juice" +92432000,"CARBONATED CITRUS JUICE DRINK","Carbonated citrus juice drink" +92433000,"CARBONATED NONCITRUS JUICE DRINK","Carbonated noncitrus juice drink" +92510610,"FRUIT JUICE DRINK","Fruit juice drink" +92510650,"TAMARIND DRINK, P.R. (REFRESCO DE TAMARINDO)","Tamarind drink, Puerto Rican (Refresco de tamarindo)" +92510720,"FRUIT PUNCH, MADE W/ FRUIT JUICE & SODA","Fruit punch, made with fruit juice and soda" +92510730,"FRUIT PUNCH, MADE W/ SODA, FRUIT JUICE & SHERBET","Fruit punch, made with soda, fruit juice, and sherbet or ice cream" +92511000,"LEMONADE, FROZEN CONCENTRATE, NOT RECONSTITUTED","Lemonade, frozen concentrate, not reconstituted" +92511010,"FRUIT FLAVORED DRINK (FORMERLY LEMONADE)","Fruit flavored drink (formerly lemonade)" +92511250,"CITRUS FRUIT JUICE DRINK, CONTAINING 40-50% JUICE","Citrus fruit juice drink, containing 40-50% juice" +92512040,"FROZEN DAIQUIRI MIX, CONCENTRATE, NOT RECONSTITUTED","Frozen daiquiri mix, frozen concentrate, not reconstituted" +92512050,"FROZEN DAIQUIRI MIX, FROM FROZ CONC, RECONSTITUTED","Frozen daiquiri mix, from frozen concentrate, reconstituted" +92512090,"PINA COLADA, NONALCOHOLIC","Pina Colada, nonalcoholic" +92512110,"MARGARITA MIX, NONALCOHOLIC","Margarita mix, nonalcoholic" +92513000,"FRUIT FLAVORED FROZEN DRINK","Fruit flavored frozen drink" +92530410,"FRUIT FLAVORED DRINK, WITH HIGH VITAMIN C","Fruit flavored drink, with high vitamin C" +92530510,"CRANBERRY JUICE DRINK OR COCKTAIL, W/ HIGH VIT C","Cranberry juice drink or cocktail, with high vitamin C" +92530610,"FRUIT JUICE DRINK, WITH HIGH VITAMIN C","Fruit juice drink, with high vitamin C" +92530950,"VEGETABLE & FRUIT JUICE DRINK, W/ HI VIT C","Vegetable and fruit juice drink, with high vitamin C" +92531030,"FRUIT JUICE DRINK, W/ VIT B1) & HI VIT C","Fruit juice drink, with thiamin (vitamin B1) and high vitamin C" +92541010,"FRUIT FLAVORED DRINK, MADE FROM POWDERED MIX","Fruit flavored drink, made from powdered mix" +92542000,"FRUIT FLAVORED DRINK, MADE FROM POWDERED MIX, W/ HI VIT C","Fruit flavored drink, made from powdered mix,with high vitamin C" +92550030,"FRUIT JUICE DRINK, LOW CALORIE, W/ HIGH VITAMIN C","Fruit juice drink, low calorie, with high vitamin C" +92550040,"FRUIT JUICE DRINK, LOW CALORIE","Fruit juice drink, low calorie" +92550110,"CRANBERRY JUICE DRINK OR COCKTAIL, LOW CAL, W/ HIGH VIT C","Cranberry juice drink or cocktail, low calorie, with high vitamin C" +92550350,"LIGHT ORANGE JC BEVERAGE, 40-50% JC, LOWER SUGAR & CALORIES","Light orange juice beverage, 40-50% juice, lower sugar and calories, with artificial sweetener" +92550400,"VEGETABLE & FRUIT JUICE DRINK, LOW CAL, W/ HIGH VIT C","Vegetable and fruit juice drink, low calorie, with high vitamin C" +92550405,"VEGETABLE & FRUIT JUICE DRINK, LOW CAL, W/ HIGH VIT C,+E,A","Vegetable and fruit juice drink, low calorie, with high vitamin C plus added vitamin E and vitamin A" +92550610,"FRUIT FLAVORED DRINK, LOW CAL, W/ HIGH VIT C","Fruit flavored drink, low calorie, with high vitamin C" +92550620,"FRUIT FLAVORED DRINK, LOW CALORIE","Fruit flavored drink, low calorie" +92552000,"FRUIT FLAV DRINK, MADE FROM PWDR, LOW CAL, W/ HI VIT C","Fruit flavored drink, made from powdered mix, low calorie, with high vitamin C" +92552010,"FRUIT FLAVORED DRINK, MADE FROM PWDR, LOW CALORIE","Fruit flavored drink, made from powdered mix, low calorie" +92552020,"FRUIT JUICE DRINK, REDUCED SUGAR, W/ VIT B1 & HI VIT C","Fruit juice drink, reduced sugar, with thiamin (vitamin B1) and high vitamin C" +92552030,"FRUIT JUICE DRINK, REDUCED SUGAR, WITH VITAMIN E","Fruit juice drink, reduced sugar, with vitamin E" +92582100,"FRUIT JUICE DRINK, WITH HIGH VITAMIN C, PLUS ADDED CALCIUM","Fruit juice drink, with high vitamin C, plus added calcium" +92582110,"FRUIT JUICE DRINK, W/ VIT B1, HI VIT C + CALCIUM","Fruit juice drink, with thiamin (vitamin B1) and high vitamin C plus calcium" +92610010,"HORCHATA BEVERAGE, MADE W/ ALMONDS","Horchata beverage, made with almonds or other nuts and seeds" +92610110,"COCONUT BEVERAGE, P.R.","Coconut beverage, Puerto Rican" +92611010,"OATMEAL BEVERAGE, P.R.","Oatmeal beverage, Puerto Rican" +92611100,"OATMEAL BEVERAGE W/ MILK","Oatmeal beverage with milk (Atole de avena)" +92611510,"HORCHATA BEVERAGE, MADE W/ RICE","Horchata beverage, made with rice" +92611600,"HORCHATA BEVERAGE, NFS","Horchata beverage, NFS" +92612010,"SUGAR CANE BEVERAGE, P.R.","Sugar cane beverage, Puerto Rican" +92613010,"ATOLE (CORNMEAL BEVERAGE)","Atole (corn meal beverage)" +92613510,"CORN BEV W/ CHOC & MILK(CHAMPURRADO,ATOLE DE CHOC)","Corn beverage with chocolate and milk (Champurrado, Atole de Chocolate)" +92801000,"NONALCOHOLIC WINE","Wine, nonalcoholic" +92802000,"WINE, LIGHT, NONALCOHOLIC","Wine, light, nonalcoholic" +92803000,"NONALCOHOLIC MALT BEVERAGE","Nonalcoholic malt beverage" +92804000,"SHIRLEY TEMPLE","Shirley Temple" +92900100,"TANG, DRY CONCENTRATE","Tang, dry concentrate" +92900110,"FRUIT-FLAV BEVERAGE, DRY CONC, W/ SUGAR, NOT RECONSTITUTED","Fruit-flavored beverage, dry concentrate, with sugar, not reconstituted" +92900200,"FRUIT-FLAV BEV, DRY CONC,LO CAL(INCL CRYSTAL LIGHT)","Fruit-flavored beverage, dry concentrate, low calorie, not reconstituted" +92900300,"FRUIT-FLAV THIRST QUENCH BEV, DRY CONC (GATORADE)","Fruit-flavored thirst quencher beverage, dry concentrate, not reconstituted" +93101000,"BEER","Beer" +93102000,"BEER, LITE","Beer, lite" +93106000,"ALCOHOLIC MALT BEVERAGE, SWEETENED","Alcoholic malt beverage, sweetened" +93201000,"CORDIAL OR LIQUEUR","Cordial or liqueur" +93301000,"COCKTAIL, NFS","Cocktail, NFS" +93301010,"ALEXANDER","Alexander" +93301020,"BACARDI COCKTAIL","Bacardi cocktail" +93301030,"BLOODY MARY","Bloody Mary" +93301031,"CANADIAN CLUB & SODA","Canadian Club and soda" +93301032,"CAPE COD","Cape Cod" +93301040,"DAIQUIRI","Daiquiri" +93301050,"GIMLET","Gimlet" +93301060,"GIN & TONIC","Gin and Tonic" +93301070,"GRASSHOPPER","Grasshopper" +93301080,"HIGH BALL","High ball" +93301085,"KAMIKAZE","Kamikaze" +93301090,"MANHATTAN","Manhattan" +93301100,"MARGARITA","Margarita" +93301110,"MARTINI","Martini" +93301115,"MIMOSA","Mimosa" +93301120,"MINT JULEP","Mint julep" +93301125,"MOJITO","Mojito" +93301130,"OLD FASHIONED","Old fashioned" +93301135,"ROB ROY","Rob Roy" +93301136,"RUSTY NAIL","Rusty Nail" +93301139,"SALTY DOG","Salty Dog" +93301140,"SCREWDRIVER (INCLUDE HARVEY WALLBANGER, SLO-SCREW)","Screwdriver" +93301141,"SEABREEZE","Seabreeze" +93301142,"SEVEN AND SEVEN","Seven and Seven" +93301150,"TOM COLLINS (INCLUDE VODKA COLLINS)","Tom Collins" +93301160,"WHISKEY SOUR(INCL SCOTCH,VODKA,APRICOT,BRANDY SOUR)","Whiskey sour" +93301170,"BOURBON & SODA (INCLUDE SCOTCH & SODA, RUM & SODA)","Bourbon and soda" +93301180,"MIXED DRINKS (FOR RECIPE MODIFICATIONS)","Mixed Drinks (for recipe modifications)" +93301190,"RUM & COLA","Rum and cola" +93301200,"PINA COLADA","Pina Colada" +93301220,"COQUITO, P.R. (COCONUT, RUM)","Coquito, Puerto Rican (coconut, rum)" +93301230,"SLOE GIN FIZZ","Sloe gin fizz" +93301240,"BLACK RUSSIAN","Black Russian" +93301250,"WHITE RUSSIAN","White Russian" +93301270,"FRUIT PUNCH, ALCOHOLIC","Fruit punch, alcoholic" +93301280,"SINGAPORE SLING","Singapore Sling" +93301290,"STINGER","Stinger" +93301300,"GIBSON","Gibson" +93301310,"MAI TAI","Mai Tai" +93301320,"TEQUILA SUNRISE","Tequila Sunrise" +93301330,"GIN RICKEY","Gin Rickey" +93301340,"GOLDEN CADILLAC","Golden Cadillac" +93301360,"LONG ISLAND ICED TEA","Long Island iced tea" +93301370,"FUZZY NAVEL COCKTAIL","Fuzzy Navel" +93301400,"IRISH COFFEE (INCL COFFEE ROYALE)","Irish Coffee" +93301450,"LIQUEUR W/ CREAM","Liqueur with cream" +93301500,"FROZEN DAIQUIRI","Frozen daiquiri" +93301510,"FROZEN MARGARITA","Frozen margarita" +93301550,"EGGNOG, ALCOHOLIC","Eggnog, alcoholic" +93301600,"GIN FIZZ","Gin fizz" +93302000,"RUM, HOT BUTTERED","Rum, hot buttered" +93302100,"ZOMBIE","Zombie" +93401010,"WINE, TABLE, RED","Wine, table, red" +93401020,"WINE, TABLE, WHITE","Wine, table, white" +93401100,"WINE, RICE (INCLUDE SAKI)","Wine, rice" +93401300,"WINE, COOKING (ASSUME COOKED)","Wine, cooking (assume cooked)" +93402000,"WINE, DESSERT (INCLUDE MARSALA, PORT, MADEIRA)","Wine, dessert, sweet" +93403000,"WINE, LIGHT","Wine, light" +93404000,"WINE COOLER","Wine cooler" +93404500,"SANGRIA","Sangria" +93404600,"SANGRIA, PUERTO RICAN STYLE","Sangria, Puerto Rican style" +93405000,"WINE SPRITZER","Wine spritzer" +93406000,"GLUG (INCLUDE GLOGG, GLUHWEIN)","Glug" +93501000,"BRANDY","Brandy" +93502000,"WHISKEY","Whiskey" +93503000,"GIN","Gin" +93504000,"RUM","Rum" +93504100,"RUM COOLER","Rum cooler" +93505000,"VODKA","Vodka" +94000100,"WATER, TAP","Water, tap" +94100100,"WATER, BOTTLED, UNSWEETENED","Water, bottled, unsweetened" +94100200,"WATER, BOTTLED, SWEETENED, WITH LOW OR NO CALORIE SWEETENER","Water, bottled, sweetened, with low or no calorie sweetener" +94100300,"WATER, FRUIT FLAVORED, SWTND, W/ CORN SYRUP & LOWCAL SWTNR","Water, fruit flavored, sweetened, with high fructose corn syrup and low calorie sweetener" +94210100,"PROPEL WATER","Propel Water" +94210200,"GLACEAU WATER","Glaceau Water" +94210300,"SOBE LIFEWATER","SoBe Lifewater" +94220200,"GLACEAU WATER, LOW CALORIE","Glaceau Water, low calorie" +94300100,"WATER, BABY, BOTTLED, UNSWEETENED","Water, baby, bottled, unsweetened" +95101000,"BOOST, NUTRITIONAL DRINK, READY-TO-DRINK","Boost, nutritional drink, ready-to-drink" +95101010,"BOOST PLUS, NUTRITIONAL DRINK, READY-TO-DRINK","Boost Plus, nutritional drink, ready-to-drink" +95102000,"CARNATION INSTANT BREAKFAST, NUTRITIONAL DRINK, REGULAR, RTD","Carnation Instant Breakfast, nutritional drink, regular, ready-to-drink" +95102010,"CARNATION INSTANT BREAKFAST, NUTRITIONAL DRINK, SUGAR FREE,","Carnation Instant Breakfast, nutritional drink, sugar free, ready-to-drink" +95103000,"ENSURE, NUTRITIONAL SHAKE, READY-TO-DRINK","Ensure, nutritional shake, ready-to-drink" +95103010,"ENSURE PLUS, NUTRITIONAL SHAKE, READY-TO-DRINK","Ensure Plus, nutritional shake, ready-to-drink" +95104000,"GLUCERNA, NUTRITIONAL SHAKE, READY-TO-DRINK","Glucerna, nutritional shake, ready-to-drink" +95105000,"KELLOGG'S SPECIAL K PROTEIN SHAKE","Kellogg's Special K Protein Shake" +95106000,"MUSCLE MILK, READY-TO-DRINK","Muscle Milk, ready-to-drink" +95106010,"MUSCLE MILK, LIGHT, READY-TO-DRINK","Muscle Milk, light, ready-to-drink" +95110000,"SLIM FAST SHAKE, MEAL REPLACEMENT, REGULAR, READY-TO-DRINK","Slim Fast Shake, meal replacement, regular, ready-to-drink" +95110010,"SLIM FAST SHAKE, MEAL REPLACEMENT, SUGAR FREE, RTD","Slim Fast Shake, meal replacement, sugar free, ready-to-drink" +95110020,"SLIM FAST SHAKE, MEAL REPLACEMENT, HIGH PROTEIN, RTD","Slim Fast Shake, meal replacement, high protein, ready-to-drink" +95120000,"NUTRITIONAL DRINK OR MEAL REPLACEMENT, READY-TO-DRINK, NFS","Nutritional drink or meal replacement, ready-to-drink, NFS" +95120010,"NUTRITIONAL DRINK OR MEAL REPLACEMENT, HIGH PROTEIN, RTD","Nutritional drink or meal replacement, high protein, ready-to-drink, NFS" +95120020,"NUTRITIONAL DRINK OR MEAL REPLACEMENT, HI PROT, LIGHT, RTD","Nutritional drink or meal replacement, high protein, light, ready-to-drink, NFS" +95120050,"NUTRITIONAL DRINK OR MEAL REPLACEMENT, LIQUID, SOY-BASED","Nutritional drink or meal replacement, liquid, soy-based" +95201000,"CARNATION INSTANT BREAKFAST, NUTRITIONAL DRINK MIX, REG,PDR","Carnation Instant Breakfast, nutritional drink mix, regular, powder" +95201010,"CARNATION INSTANT BREAKFAST, NUTR DRINK MIX, SUGAR FREE,PDR","Carnation Instant Breakfast, nutritional drink mix, sugar free, powder" +95201200,"EAS WHEY PROTEIN POWDER","EAS Whey Protein Powder" +95201300,"EAS SOY PROTEIN POWDER","EAS Soy Protein Powder" +95201500,"HERBALIFE, NUTRITIONAL SHAKE MIX, HIGH PROTEIN, POWDER","Herbalife, nutritional shake mix, high protein, powder" +95201600,"ISOPURE PROTEIN POWDER","Isopure protein powder" +95201700,"KELLOGG'S SPECIAL K20 PROTEIN WATER MIX","Kellogg's Special K20 Protein Water Mix" +95202000,"MUSCLE MILK, REGULAR, POWDER","Muscle Milk, regular, powder" +95202010,"MUSCLE MILK, LIGHT, POWDER","Muscle Milk, light, powder" +95210000,"SLIM FAST SHAKE MIX, POWDER","Slim Fast Shake Mix, powder" +95210010,"SLIM FAST SHAKE MIX, SUGAR FREE, POWDER","Slim Fast Shake Mix, sugar free, powder" +95210020,"SLIM FAST SHAKE MIX, HIGH PROTEIN, POWDER","Slim Fast Shake Mix, high protein, powder" +95220000,"NUTRITIONAL DRINK MIX OR MEAL REPLACEMENT, POWDER, NFS","Nutritional drink mix or meal replacement, powder, NFS" +95220010,"NUTRITIONAL DRINK MIX OR MEAL REPLACEMENT, HIGH PRO, PDR,NFS","Nutritional drink mix or meal replacement, high protein, powder, NFS" +95230000,"PROTEIN POWDER, WHEY BASED, NFS","Protein powder, whey based, NFS" +95230010,"PROTEIN POWDER, SOY BASED, NFS","Protein powder, soy based, NFS" +95230020,"PROTEIN POWDER, LIGHT, NFS","Protein powder, light, NFS" +95230030,"PROTEIN POWDER, NFS","Protein powder, NFS" +95310200,"FULL THROTTLE ENERGY DRINK","Full Throttle Energy Drink" +95310400,"MONSTER ENERGY DRINK","Monster Energy Drink" +95310500,"MOUNTAIN DEW AMP ENERGY DRINK","Mountain Dew AMP Energy Drink" +95310550,"NO FEAR ENERGY DRINK","No Fear Energy Drink" +95310555,"NO FEAR MOTHERLOAD ENERGY DRINK","No Fear Motherload Energy Drink" +95310560,"NOS ENERGY DRINK","NOS Energy Drink" +95310600,"RED BULL ENERGY DRINK","Red Bull Energy Drink" +95310700,"ROCKSTAR ENERGY DRINK","Rockstar Energy Drink" +95310750,"SOBE ENERGIZE ENERGY JUICE DRINK","SoBe Energize Energy Juice Drink" +95310800,"VAULT ENERGY DRINK","Vault Energy Drink" +95311000,"ENERGY DRINK","Energy Drink" +95312400,"MONSTER ENERGY DRINK, LO CARB","Monster Energy Drink, Lo Carb" +95312500,"MOUNTAIN DEW AMP ENERGY DRINK, SUGAR-FREE","Mountain Dew AMP Energy Drink, sugar-free" +95312550,"NO FEAR ENERGY DRINK, SUGAR-FREE","No Fear Energy Drink, sugar-free" +95312555,"NOS ENERGY DRINK, SUGAR-FREE","NOS Energy Drink, sugar-free" +95312560,"CRANBERRY JUICE ENERGY DRINK, HI VIT C & B, W/LOW CAL SWTNR","Ocean Spray Cran-Energy Cranberry Energy Juice Drink" +95312600,"RED BULL ENERGY DRINK, SUGAR-FREE","Red Bull Energy Drink, sugar-free" +95312700,"ROCKSTAR ENERGY DRINK, SUGAR-FREE","Rockstar Energy Drink, sugar-free" +95312800,"VAULT ZERO ENERGY DRINK","Vault Zero Energy Drink" +95312900,"XS ENERGY DRINK","XS Energy Drink" +95312905,"XS GOLD PLUS ENERGY DRINK","XS Gold Plus Energy Drink" +95320200,"GATORADE THIRST QUENCHER SPORTS DRINK","Gatorade Thirst Quencher sports drink" +95320500,"POWERADE SPORTS DRINK","Powerade sports drink" +95321000,"FRUIT-FLAVORED THIRST QUENCHER BEVERAGE","Fruit-flavored thirst quencher beverage" +95322200,"GATORADE G2 THIRST QUENCHER SPORTS DRINK, LOW CALORIE","Gatorade G2 Thirst Quencher sports drink, low calorie" +95322500,"POWERADE ZERO SPORTS DRINK, LOW CALORIE","Powerade Zero sports drink, low calorie" +95323000,"FRUIT-FLAV SPORTS DRINK OR THIRST QUENCHER BEVERAGE, LOW CAL","Fruit-flavored sports drink or thirst quencher beverage, low calorie" +95330100,"FLUID REPLACEMENT, ELECTROLYTE SOLUTION","Fluid replacement, electrolyte solution" +95330500,"FLUID REPLACEMENT, 5% GLUCOSE IN WATER","Fluid replacement, 5% glucose in water" +95341000,"FUZE SLENDERIZE FORTIFIED LOW CALORIE FRUIT JUICE BEVERAGE","FUZE Slenderize fortified low calorie fruit juice beverage" +95342000,"MONAVIE ACAI BLEND BEVERAGE","MonaVie acai blend beverage" diff --git a/pandas/tests/io/sas/data/DRXFCD_G.xpt b/pandas/tests/io/sas/data/DRXFCD_G.xpt new file mode 100644 index 0000000000000000000000000000000000000000..15de11e8f9f4960bf4b2ed96c90c497ce02e8b32 GIT binary patch literal 2195200 zcmdqK3!G!wUEetn7R)ONp6RxiC2;WC7uo*55*#Wt;ybXA&GNi~wHdwOtitEyC8 znpR0=J-XfAJPP9wFAzhBc^M217~A7V_b?uI?ZHbb#4+s`IAG|(Jf>|-mW2eohGe~% zko^AV+;i`_?{lwIb?-%={&eYibkA4+$2tG=KmW5*DV8fO)~b}6t@885tzKJh6TkJJX4duST@J&i`AxoEK2G%a%kinT+rFH$ZtcbC^-= zXaLgWgZ8$&Q;qi0dW-SM_mO8e{a48=Kkd`C)su~816a5enbs-ldNuh>F7|iXXv~JA zVKzA#kG$!c!pC8s@6{C+A6S0&2km7Og3tBKr@-g^L41m(TDiHlQme1Sr)g`JXxWcX zceB4W+S}<5X7Fj5*DTQw@c9)jJ~4$Ul21;A$#~=ko*u*}rpQQ<k1CTwFio%Y6=vron&Kl*Z;4-EzD z#Zsr)uEe%DPGo+32Hnl+XwqZj-qviB#zHv|DDW{t`M{sc!6G|b>#Q(225|ClKZ?Nz z^A9n2pP`G772hx| zwS*Ka;GxAQ2LTV`kss&t)8+L}r_vHpARhi^rz=LS#ptDIv9V-nnY-&< z9z`_6KRK{A*5zAwj7Q$`Dv}=$pE>=z51$-(a|?@g2Ye0!_{5x1SWWUfK74XuZJd3e zN5&&R@$3LT^MqHK51$-Z8$Uj8y+wS4<*7D{oy7#8g?+v->Q3_=fHod^@4n5a+&Wj_ zYtxN(r`B2TP=Gbr8ie4pHNH~7bYgQfoJ{-E*_0*~$=dV&vPbib@yIWHJe*H5H4EmG zFEukBdF*B3d@yI#3pqP}&|p4TzR27x-ErxT2jKJOa6ZYVVZnUzH4HM|{q@T>pURoy zTC-IYcBjpd+tO+B1&dO%(Wx|a7A{Qsf=};KcW*TA3cJ&0-3gnHcnlgAY1$i3*$GVD zcYEW(71oe1Zz29lDF+2MoAkC=e~2Za?d@KVB1O|)w}4hAv)vw>4f#8JTQNqh4$;th zV7&Vqp6|~mncb0J>g6NsPJAGT+I@Msfq#v6KmCpVeC8S+n)7#6e$3%v2%vOH_#-mj zeN2fztNO*GtNtI-c-AXFJh~b}CqSQ4hGx9`K4!}g%uKn`Xw$H#eYR48$w6!*(M|Ay znc3=)VU#iqpPm};dE&t4({A$dVX=H}snc94w`vU}1vQ!-wp?63 z=kw{{rv#tLh>s7uTUVB*qou8Je~6@@;b_V>x*J!XtC=Df{3S>5it(Ob;`1*tzuviKj|K{sld?NKr2b01lQopi{34Hck ze4_PB1CYWeTE8+3$awG5ZdrV4tF=zCQ)@N~A_Ebm$Rb&;zCf-_PsIeE{%(KT6_W)! z2#w@Acs0x4Q+i_e(r7$kqv=i$bBG#G<6xqVj>-4}eigI$b( z+`cPGdue?JeEuqmkA^yVE|qMN&33WmLsM~~R54mi=dH%IZ|3n0$!D!ztTh5K z#koSsXK&E$BUIs!DNchay%&6blJv{PM+7%$P@V9)Tzo`ugUe3B22=W&aqU|OkDHGf zR80=Z#m5Y)<_{3~Z29mpgQ^STV+K|8$ftN-<#VRksMYI5>?)|h)x&c&4Woi&GRtmr zWZZa%O1zYxOWk3AFhEpv(1WWlkYzT*rNTgG-8323Dl0mC;u<7c)?@0d@q+bdjv7-lRh=?(E8UdrCdH5)k50Q`vc9fJj8uxqF zyks1z-V1xtxVHLC9r=lCuw(Pds0BB!@$^H>zX!>W!bg=K#Hjs$s(JWiOMW_ktjdoy zJ6BwmI@R!};eA%}u_oeKcU0ak@Mnx`4}LO$Ph6Mf_y!+7*>+TptG6E**Ivc##G$R;|*(T2I_zq4i~w&-QrMpR!9` zxSlDZV$ON0HJ`UWmbp8|wZVfSd=l$e0H56Hm~n0IR0tns*c0!k4d9c}QDAW2jBDfj z^!V_N2H-`T4XoAYfYBji(R}>8_)j42iI4DG#Re+3hGLCl0j1C*RY&W00&cndx zMSgs&fwR=Fm?I%KA8X(&^(!YMA@Din$48N$m}G>|yZI>clM^zcAK-JpA0J010(bC~ zM6bV|;pP)=f3k}Ve7@6{k6dS#`W17e=;q_mubhk&!Dq*hkD^~O$wcZ`PRLmL^%jMX z(c;4&r*>h3ce~kIu5?a{KT9QS6N^avb<0nDLaac^l8yT_0Zoazno z!}mAI_?QWZC-u%FL-F`zSEP(eo|z|CcF0#78O@?od@N?zH0K@r)0{^((u`AU|cr{xD0w-2N+OB+)HD z9{-h-NFw+&{P@^PW|oQ6ANp{?bn{X4D<@h&H1}?ei~DHyz{Sc{mL#f@w$GY`PW zH@+(5$H6DoEIvLTUf7o+)tO3C1fQ#J`Jt>shjzsxBYt{+K9+uE6NZn}8rNU&QI(JF z$O~-Q@cDq|Q<9JExeI(#C2G~-0D}D7z~@uJd}z&-ul(G}R@dvDTD67jdRD4nYhk?* zT4*Gck2&oUxeK%gv^yJ2`xnMYwP%|>?57*Bz1g@2gC6=FM?o2U{y>M12M>DztF@Ye z#rzkD(oK79Sc!$(~eOhCvZKgtqUo)pZu{>}r^uM+nC7Yi$ns1MC9 z5KmlXDBFf8>x1OY6tfiidzP^8AG>yTP-{am(o}k5dpz1@DBK2NdToL2cDH&W2yGjv z(1wrrMdSLr9w$CdGHmUx)?Ed*inG?Je6KDP<=R)AVAf{6F-k|Z`kUCiJ(*siJt|p8 z!+QfhzZ}U2y2h()adV_V2)P@J4|LD6+aYbH@se@QZd>GJL@e3 zfH;w2xy&=}P>qZ>HEU7g`o(DOFqvKGj!BuOm@+_WX#a9|Ym1jclr_g3o14?X5cr+O z_4o4n8v%S2&Ek{-+i|}T$VV|VoRa&48G8&quMCkNSsz=jgE3u7fqdNKAR$W`qhenU z`0)J&0rI1Id0jNT@jyO`pU*fN-VgBM@r?jJ_9!tH9fl(&;otr2k2Oxr10B>izJBd1 zZT+JBUYCWI8pfaYH`ul`+V=waNd4kwJNxDue7-ggA6FrP7}20q8)@XNty>=T^M=DWc94+(1cm#5qLB5Wx*A)NvW< z&Et$uGmPuM@Ii|Y#R?mS#fH)noV#%mVb!E0{5#LyHIq{gU{KO3hDx=U>YrX#vUq1#^OUqcK1*fK!=ER#vVSDA-OWHKmL0*9~nP{9Ek-D z>+Hb-TU)QRI!)H9m5PC@?zR#Kj1S#M4PHY1(cS)V!V%(5Z`>W=@Yu*8BQD1i(YC*Q zf%lB-?>}$xfp~bO2S$glUBq)}oR3R>d~)P1h03aXGp-+O5FZ{?Uap*h2@1481+H9H zetqb#MC)H{qjw1calu@`iD8En4NJLvt#SQB&s6zTTE!~j@~*_X_-i~4K2R8+Wv-Na z9{TfIFM`i0$%hUeq&ll@*6gf6tvV~PG}gcF}W`+0rg|beYYVGOphsJlwxiLPDLNZMEJ|28P0DQ)#Ri zH^FDOJK$$q&4$YG*s~SPc5WP2jBh#ow;IIUQfD3HjrU+;hBqPCI|@|g!iC}Jf@)ms z(Xvf3`}d6NfBB#M_#o@qJzS2}8808S=pHaDN6Ga(JfDRRh!NzocswZT7m0Ow+HW=< z{l=$B`^?%a#nUu-ckX>>I2nW|)@e72HI!x*<;0stJUOVd9#S-QhOPE_JjDGbir{%KO(MJlTYS1>>7{$FF)qw!_L+^D|{wq zj(EhVkq7J%eXW%$tc3l8tKs(2<|DG>X(SziG?Oob&tE(F*goD`LRB_~>2|{AqxyJj z5j8hk;f+VXiRT~ku!ZnnatNg0iKAJ9U?iW+YnJGT@#sJJJjqX~*lL|yYOb3#>H<8y zuJj;(wL2bPSsKmE?d$=bUcbGx-f2Ag&7ELA+V0A8`Tl$|@2))k0H5~<^NFul@(cX= zRo4%`FE3FsH&2V3(G-uuA_D5`R=f^HvFXZfw@o0qy zSevb4qgrA2uh(nqu>z?9{cVoBLoA%$KO6M1gFU}DI9+`^|2?rBOGdqQ-#cxxRit*UByNOQ?r9>Op?%r&nQVWi+oY#`phTGjCHoK3;sO^J|@bk8p7BcYR z`D1OIZC)>ySmnVrq!}V>wN=4s-nG?AqqDwRU>kjQ2E?ar@4f~QGH9h#N>mTQCnA* zrlY0pUbafRfKM-)55$M7UV-cqhKy{j+!EP{L5dTTk7b{D^@+XF<*gpl%I)DKPe@Ed zOTlM5A)iih%}j^(5&i}9nRfTgv}m92I?SiQ=R!0evOhwAr0}riPCb|oV+-cPk4a}6 zqv>=&PLL;%A0`&7ZSdKN=40Ve^=rAfe2x(L9_F-QJ{#SQD@Tb%;dA{>DxYE*C!m(F z*cIDjSxxdgl2369Csc1@r7Jfc%h?a(vFkse#V68ZIoplolUa}D><9S#U=SaBv}mf; zDy!EXpjk+XLLX)}z2O_6{ns?{&^RE-~Sw`4HYzK29=Z_Yu zU#WR_&gaeAd>rNnWg|8G`$FU=HSf;({BRJTQ2P_AT??>3De9D}4zSO>ez=^U29AH+ z_Bm9$7QiP(opSN{(M0kiwJYE()R^|i>%UUeDHk7}-{O)Vq()VXjT*)|3CGbcKCC&u z&>i+Sb2pBLe*L(Q`~>sS97k*O$@n-Le10N~kMGPhV(d)vsm&+j<3#ZJ=>&Wtyw?Kx zr0mDQ=V}5z5#DQod{Xvf;PW#{_?TnE0PnRxJ}LV#@cCJlPrK4+*E+Q`^G1NA{b~1x zlm4`SDX$SA@cB7SKAN6Oj2<-jWZZL!et^&0g87&+XKU=N9saTWSBU&LJy-Y)KE?Hm z>p!p0$Bg}mX?y4aDjM?RSZ%_m%bQuFfAuXiNl6Yk;{ z%qKN34?gcq$S2grFPKkiULJfN(dT2@A5yL0&>>gc*nGV9CpDdN*`IgC=EK5ui`riR zpVV~9#pm4#_=M`#g7~DSQ!YO5@#RB9AJ$&0b(W|aYwReHM}8<(inaIp)1`G(W6jNK zD(ugD6Yz-~OULAs*|9YE@B*VUerV0VEZMOlKvwDoRokL6h++9SG5KV6tO!2u^Oql~ zTNWR4mLYPZPKFN;z4kd=zoPi0=F7n6@kD$gT-bu;CpBLNKEIfZPq+(PFrU=C3w-qc%T-CpX@U)=wY(Dph{8_(XcIn0zwpy*Qt* zj^Yzwp|$dVwB#qsI;R-vfc$)YLO!7$OxmA?@JY>karybX3HgM2u!ZtT&3kb^e=jK? z(}Stg73=;&`K0E(IG?|-@~Ji%9a2|oeyAVDp+GW`f@VPxEpHRCj+g%8s6z#I?2l(6( zmrwZ2LbSXPJ}KH|g%3Y}r-J>1*dd2~?UmKq`YLNw&Mr4wjl?#(f(>xkA!oPOySy

    =`O)U1F}EQ0+I%uRx1fH256@3<^AS^0E#wTST20lDLA&iexfL6Hx!HIQHn?5jqeqnr4_F>?| z^Ap1Ogyf0yEMJfC;E9FtN!f>i56@3<^HJ=N1CL!>Ghk!43Cqo=#y525&V7-6=J^S_ zd^FvcM}C6%WZZobAD*8O!pCiXtRxp!5_V^YDz!xO5&t5)h*c=z1Yku=nW&GpzUtE5q z?Kx~dY1kYWAD+JvSAHV4$1O~L(y%!$K9BmzPrbHQt0#{Bc=gL{ub-E?NRkok&tt)S zJP~?(Y#3fX!6QEbeB!vTR7^Ga@cbXQeu@5g_>k?<*d3=WKTbYg^PCD3DLeERe0ctk zCLcw9!abPVe=R_MQgLPA!}EXQ^9lE0hrlNlR|Y=+M>6{p?!gX$Pb#hqeE!cEe1!c8 zS1qo-iTHT!Pbxa)vOm0jxTbyyKH;jhNIt3Pl#9;?V#`msYAupaDmvxj^MuOB^fO|5 z!Dkkt+-Ow2OtnqAFZC_$kAHgT2tNmVh8XzpT&4Q9vrx%g5pq zE%UUfDHop)seFoeweEtKnlHGDFOYmhd;Pus zAb+_D#0mQK?{)da8AR*w$-^KTe16r<#~YTH!)LyQfyb|93)Eps-oQfpx2%BWP5HWpYcd5`22b-KAHwvn@`3KH2D07 zC_YXDZO!3B_jhaa$+&?ApWjgV_>6y?cr5=FK1h^&=x^BgCkjv6V?^-z&18JSo!FxI zr0v7N=TnLJh@BUa`%~tXpR|1#`23d2$IpK`70j|fp$aDDC(?gqT*1KSw-fRSRj`He zNm;?b=Vn4a)&U_DM{?g5#wTS31D_{VKIFfscmm&8i(R@8c38WEGY9$fo7wv<<*Ga# z2ybKjshwa#e!))>S zg*D={Rw>ttiK~g4jVXWDdRzVOByZJ3l@8c^9&qw;r#@>2oLxWYbV`8Yt*6QnRd-{%=>S4n|iJet@V~7o_jz(LEW^~6>w8xji*xFz=$&WaJ z56|!Q@^RyFl(~tNCNG~*Jeidy@Hrs;3g=@L)w1>%$Rw$t{Dhc%oMY$keDNb4M}rT~ z?+xce(Z4mEUm#-hQEW)b=Hio~R5I}4`Mu$MEIf+*gibH!;}b4FM>@{{AD-VEpHJxY z;xPFf={y5`cz$mJKB4iSx%4YsevWjW0X{sxH=K{#{)Fq+A=sZI)hTO`f;-^z;b{Nm z<`b@4hrs7Zb;`wu=Re1jpXd#`bLp4Ye;uh#x%lw>XD^?%RF&1wF|&pdxMijU>tbr!$y1H|Ry93XlH=tu{JiU7g>@cg=%d?F3>qWPp{puy*p z{`T3aUoQEHG|-FYla_%7AD(|0Q+^^1^rHEsWuU=_=O23c-0KN&xD9m3@|TbOL3=q2 zaDWfbAB*JUQn1kERbTrY!6%~z8hm(uOI$ut2HLA%bMi^aK!eYx{q3_$zg+SYWuO<% zCnW<7K0N;?uKYw9=!Nr1$v}h8?*++^)5H5LXauJx_}b?@j-bJZ=STU;kCTr}!6FQ_ zpZtXL$)jG%3R#-jr*J3=T5LXI2!lZ^0+(P%FnW(HG>DA z|2LSAJ9BImN!<^yQnOKts=}(t$7z(qzDkr=nBR_RwGO`CRUG#3vP(2*9h;9%jjilWFu>=E4j(i6 z!(@305grRJ(dLyO9X?(Mmdd0{F+kw+x;T84`6-i5UTMy{n~YDm{G{RR!RLQV%11hR z3yN~*F!-e5?7`*ul-3wrxZ0H|Nc*t z@R7Q8*nHB^DHorAmWWTp0D4jKlZH;Y`0T5Etoj@6<~euy4SSbLRP_y8zlamBztJ9D zVL9Bw4E_3U$%oe^3FA`0AW<{FPRdU_KF)BN*BTxDXb61%MI;|<6fH-I1S#A?9}b_R zx7OhE#$M80>VOmI=X!=l{gx6CLG<$0xf{4)Edm zKjD0&T~=nN5&13g_&EJm_$>8keHi%sKG~m8K9>K|_wV!Y$%0>(V?^-zgGfGHd|VN- zaQDvU;S(-DN9*Ikhv$!l%8wtPQ18wTkWJKvKoj_T>G=@;O?al6|1R;KTDX6Y&X& zA1;>9(dv|o&!4IGr}E%h1;sb`5g|x#W~d6(YOPd?zGLW+{AbCh^4h%~if`}|+1sNr zEBCwO@lLOsmoYTt=g*Vz39m64pHDW&(BQ-K6Qq7sn+%DWOooDd#irP1oo3kPCJ3nH zQynoRX0j@ksLk2bi>n#7@zc9yX5m_@%cn7UM!MN zdahma`B~!Q-Csa~daJgUd+TYWiT3UUYC!sX@d_*~cGBZ3@CfW8pAf*OyHbA;%%Ku0|)v?U09-WSQoGlGuJ|5+@b^ei;^ zJf4hCsCo9SzY(loQSy_Xg$AErOvp!?XWtRfV&x}23vE31ypYZAFkvlT*+0^8tpZpU~ zznpxsBulYH8CNBH)mQX$x9_R2d!oA1PuQ>8^^sXNA^Akz z5ub4Pen@KrB8^7w3^4L&^oCkY?2&s3;QwP*7F zLiH;}6AeDRez@cV6H~3UJ5()T4(n{kH`d0z3%$vdss+gQa%7zmKYZV_dHwJhe6#`_ zQhqe}WIDhBKD<75Fdtv<#Uj#st@(uppD62`qW9wR!|V5g&uS4m<<+ksKB4Z5`0RF} zQw!jeqW9u_`2M)~d_vvVBKf4~y*M9Uzc&G&&>8#!W@LccDvIVPQb9+8(^@uIVUI5OygC*i$67}49$m$M z#o&`YR|YU3P(nVL z6Cm($)n9elpZgYUf6!je?2oJdY6KrW@lnTzPWuzeCv)OM{_A@}?KAqbQ2T@Sa%O+5 z`j9jy_4s$)v*7r_6HXWY-@~Se#6m}jeDD;A*S)u*|djaz|dZI!lom3d1o}}af`Iwofa4h zm<&g#*gCZvO8K7g_)FeT`lawWyV9&zd@#kjQiYGnz=J7PduhGdc>Lv8U3@THHj14& z0V#aouDEdI5|HuusW-a#G@8ixSs)*wF+O^hmQ2L?UgPl*A3vX6slfEK&sHiR12Yt- z4ZruE-RZ&fOfL6&Fe@-ae(%Y{bH?M}-68oYHA~GRJUO8uTg2{U?e%IAC2Yhq4?6TM zx@V=)=BUd~z~FA~;O?M1?LAL8c7nAF>&@!|O=cImV*zlt|Jn@782Iq{yWq3dJX@}` zfC>8s%AAuJJ(r3oR7Q6q0}N>r#2aXuIday6q0QUNGXlWf}G?vv9O^{3rk zyo+lb?)1jp!Nf<`P{L}4Vs9hX!pW*bHkn=-^g=IU0WX3NpI@l@FKy?6$v=a$)WSBED^Y);735V%!v5np(2+4&WQw>xmZ8iA(u*HWFA7Vo_*vLu~D#1m1F-6Ht$?_+a zfDgdUHUTeFoWO_YXA1pt^P$MIu+yDJs}R7b>_;d8A6W3E%sDggQRZK(#X6slf<%F> zH+)Sqx6@?K{jgZiiTTBDcfjYPmTQ^~E!*s4l?qH~;Dn8)JH0WR?!XLN<0q@_qr3w? zze4&Y=BExmbI@TdfKSfoV14KD@BWhjK2m=8m`AZY1r{kifqbImCtYFj5lrym=Pw7! zPa-;je8S}?b96|4-mov_M|dti#o&Wv$2dql(2yMQBanVsZoFXn@vt#+25k@Z1biNI z@Zn=-xOU;zE7dAy95otND;gH+*?stkVY0(-Q*@E09lZ+|FbU~TsWOLmqNP z7%V5ajQ4;3*^2yV>rXHl@rubOCt4KT8}NCaKOYfU#Y#r-heKZR<0CQ?vb6da(gE`& z@VP6953w;?ussQG{(Okd_86zZ=jLX7;2HO+e8^FmZfveTNIt?}<=8%d;2C^>w`rfb zBS%KUqU^Kj&vR^_Kk$lMUiq>3B=la4IpxRVlQZvy`PTx%OY5!RQ`6=Hmqz1hoom(N1BXV# zYMu7ddJp(Ch>s~atkYaut2L^aiWX~FYF;k3X_zQI8CIaYa^)*lXT^vRIX-)P{ow^n z%e#FnEN^rtG)$C!41te~%a!rOiMLtugP_K;nG~hW!Hib(p-Q8Gx*M3A(Im{A;)njE zTz;?=v0)}fDO0iS@#u9uB!`PiV|+3umU*%`rZ%2vKJL#)UP`FtpYPl3<<{(Q`-d=fb5rd2WGflo7(PeL&Y1m>ymrr)b$YC7>AD*AU<3Gawn6psK zwfIsq2ssuqu)Pqgsd#@_v%&s7XPCg}Fa71m)+viqC=ut2UiqO=Whb|n9*k` zN6!97J@VmW#-3%eLR1Ucc-?s7fu}|BaZ8fUm51?hOOj4|X}t%0?D(N${G<9Wors3B z2VOp^e-|3&{6sb%fsY+O4CE7Eg2MUaM}mweE}bX+^286Z%ZY~&l|WgkQ9fz39y+Nb zO`iB6l3I8OQ3;eG2_Earh$fl=Gh{97cDH);c-PFRLdXmm_bG}N_mC6$z@Dz8OzkJ`usk3?#}yP_xdL$07n`A$)QbNCcmUqWC<7$|Pdbr8QD)*5~De zNf_UEnQ7_4`qvYehf#b)eBN5b(CSg7{Md^aTJ5FvCh&Q!CLi13Yh0`bAKT$;w3pUf zz~}o_K8|^q?ZaS$R_*y1a$KXXSMu4UJUd66-gab}b5=f`0v}%AkNmqxoTEWd!6IXB z_QrlfEr~^K$li(XDaOoL+rzxo@$R~ z*qY0QC*U3Vcp7~8{vDeSvQ>4HR71LG^Fh*TK2lYEdt?04&2J+6qw>M7xb+gkJ`{mY zdQ@O67y%wA1diKN`}mWid-lX7qi(>2RL}pBrvIMy+0_ ztRF5@WlH$r8A0&*%j=y^rKMw+R6g7N!GPxJ6Y^ndg1~BHHl6myS++~Y2gi34 zpK@)@1R_R=lo%yuTa>#YxX>QDrOvsv3d6f2B%s{iGl2*vPl-{7`%52A>)^xVhkkqt zw7;8sGVEb)wrb%k7S8MQ<5NHxDY1oHZc}!CCX0{2=gnNdkX2VKt&n6aT#pi7nB+ry1iL6ff~}<&dpV?K-1r(k|EiVD zeM^Xc8tsHol#kUmdt!RAp!SrekMyo_<7@f+OW}hG^y_u1j3hS-^13cDuS(&A3iPu9 zRY4L_`3!CT*XR@Y@cb4(KD3E42^;!aKRy}Uz8y&je0Y5%l@E*$ZBe08E^XHAtRQi| zvx2~U99}9P7@vF!59ooSpc8hX_;~3>+W>%K_xb^lv6C1X&Rr%5-EI8iA^l!bhox> z8lNjh;KS$dihjBIB=Ynjd~!8R1fQ2h$WJ006v8K0HpsYf@4XH_*b>*GOrT-$+S}i`<5;vxL8`uZ7iJCT4%PXh02t0v7u`H z42Q%=P4T1oaGAl`@V$W)8?$o%a5kM}h7|bl`~)YT7N%gL#*g)Up|b*o6Hjth;av#g zGsa|0QLw%p2AS?a0hO1yt|3qz{ym5ND)?OS*q;LDlN1fVTE5a%XLOw*CPX+O@c_>0;KbbiP zX54sT!6QE)q3_9^T;wAMAJkIIruUXt8~hi1UgX1vJ9rxMG}fzCD%sD^P${+68)Uef zSY2&YgB!&wESV(6R?;I87iM) zPv{j5jPn*k1fLK4@u9(voQnF;iqsV+AGH{$>B5z9PL`R0&nKh!kRQXkJTHhS=krm3 zBKeTb#^OA$ikRVu5cquBj}L-mRw*FKB#w%?`5=gBl>*8z4$sdHKA(%^lOPBk#wSk! zR`B^VKR$eh)m8!>_B_MG*zXA^AJsnFlZn2Zy!$nfk>|kYi++5Z_L)yj?IDuJwLAIv z*k{%LWEh!ooy5`iR_oRQr=*9G+hue7??)k5hiQwwd#HJw|RmHWNSp zZoir}odX|UKiuwhdmS53shn2BOwh!+MC+?&M7mh7%gm1OVYJO>GDAu)h2Akt%-;=s zm{We*&2lmEu-O8j@EL7&vpfQ{F4^}E_&nc_k8N{&KNk6n=tXiQk+ zAoB7NQJ-AFffRtxqyBub?R~9SI*mipC`Z9O(?NTR;pKyE?|a?Ni#Qc+i%qZWEf8j=+p{g49+v}Q0w34rO5tHZLyH<=Ow`xBeEdX)mNm>k zP)6;vapPt8IrVF;hVUV#7g(%lCkoATOg5jrJ`iAffyIhRUZV2$FKyiThad9tF=Nmi zQNbxl%0_Uz74JP4AF-SxBGDWaMvP7qL>o0SC_~1L+Ma_C6bn0<5>~CVIaz!F0;dP( z4NiIErw$Hy&K!^0bTN_~L$r_e&4q8RN!7FXw#t!34HmIS0%qaMtH) zrBy$t&CA0lG$@~4Sm0y$@v+q^J{$TOJ$!5($}S8a`2(LdKRz@9nkP%#!zZU%;^4#U zYl)f?wtji}#G9c;@(I;9pC2-%r@)8j_eS!u6;A6)Bl+0+rqy0rZ!&It2j5@m=HuF= zVs1~-w?!U4>Mkeu0%3+N68P}_m7#p(fu8z!y!Kh;6RKO;f(1T&e`PQq5#2b^DYiQ} zN)Gii<=H$BtXC?Hpg~Kh{MfpcEf_vZ3_g5+Wf&hy2$JI=c0wFn6bc956T*it^+c>_ zWQH5~e2<@hx9tyK2{YFQ^%X3dPd|2d-fXY(R|G2kv#=IKdZ;b9w6E*gD+Eu zk8Odhd46z%DZOIc*xT3Q!(-CQjLZ&)_+GgNA0CxfcJ3;HGv6jtdD6JC{|4gY)*;s} zLZq_d*bAJ5p&RJ%*dOOMOG|W?XmbHfQDz*v>sJ7uHwEx9eY)H@C^GJJ3F-2IRpwhz zm2}@su`+Ia*Ju6sh%9j`m82~)4)Pv8l>Y-Ar3Uj6ITYP7ZoHnaFT3^2H zC)7`65ghpV#}D}~1KVfCVMF8t_*ix)FH!_Pdhsz+pmbBAJ$&5pLsQ?{tUJLPyjgq8 zp`w|106wqwvp=5rp;O@^uU(T*sJ=yh$q~E(d^F>SE`^J@HcdXZzD2Z`*300-^IP2Z z+01fLW?^KDPFid6s6C2`fWa$c9zG(Cm1>UK)sk#FS}8$k*l+Op5kLL%@^R$bMd9(< zXFopCDR*fgafmATyi1P{MaV?H6iJdy401H4JkXM#1SwNT4Ger958=a6V4JSYm0~B( zo@^>PWI^FoJ^2aNFLr|F`J>s?ne+0zBOexk&o77Yv6RXKW)b$sp*v(qmA|KDYGv1eA#kxMCeX znV-C)Z^n&Z|6~xKcto6_{P^U7hqD))53ir#r(b+>;r@W;V?RFjbV9R{47?5U^SL1T z!3ue?jftqaBRAI7`tfnExW{TRr8gTle&Yo3ar<}LX3%`7f|EV@DlX9=h9u=d#sMoIN%)EQ11d_<yE(HwPaLv7zlvK!HMlgAw-oFpE|A z*Btt*;KR?~anIkC^>@l}QKg6ISZmKgM-+PIU(WUSFxj!(To%^mjT`@EjpSz;dna)+ zQN7qcU0lSEvW%URTdX+fPA+yA(Oz0@7$13<*Z)~tFPFtBaf>*eh_SWV))qe{ZXu@~ zr9}th!;k!{zajat_$(t7hCG&9qg1DpV&8~8$xW>4jyOW`!OZacPHbY=z#9IB$?vM#adqSj?G`rrV?%rt3 z^MCjth>Y}jw#!Z|&mepVkFdS&FmMSV`~nvAD)?~!j{Sr6<|?eMB}i*`weG@|u0!pp zdL4hYR%&WqF7^)&M!T5An^X$WIEC0`dIf=2lY@M3e{-bS`hq&I(G2*M9s0Fec~Fs| z6Q$x>t<~%l%f)z?Zt2%*@3o2yohWtp`s2~GyVZ@?o<>&=aCZ;2CLm>*y@Aib!G|X8yb1~8W7hnXPJb_BYJr2zPqsQbMHDPP)@X*eXC||Z zN4q!(^$OeVzm`@r$b?^6nvRxwnL`CW-|OK+l0u_p3OnPlMocO?d`t*nprl2}kB>4w zM(}VnoOb&|s{2gmH*WVg``v*(Y|aWz@Od-wf#bsFoVs-iJnSj#bGvg63t%T!i;eqn zYRGcE*w8i5a9lmaovl;g{q6H^cla=l57`)WhZ(fZr1ap!=NBau+bFhB=|{M&)gpK` zs16xQh1csbjWgOUq1Z-ujMIqx+}3Ir^oFQP)i3>On`;g&5 zg<3tkBMq&kUtvMjtSFJZXrI5}^H>0%FdIacIux2FpUj&i@cAI|G4;!bV6vx7{Yv)X z41EfG`1}-W74sbT?&*;Mlk0ghhff$E`PaPup*5}Gj%@ypSo(znck);_QxX85mc#zo zj@~M(PP6%P4f18K&+E(#oP4;WcWkk!4Y&G`Fmr`or$u?iNBb9i-r(TV!Gx<>Di-Wz ziR_yidkm>W#xkuybw=$oy^YNc3T zS}wLLHGs1YLeliq|;sBm*M9`5$KgQbn`1i}105nqgK+S7-_d;)y_ zse{kj`s&(ggbe4rS$y_pyL%TAEY0Z+e4gNZD4HtSn8pZO2WixZC_Q2w#pU8dF{Y`G zX{lsnN(kDe(IdjkSPkrMbYGX@2`cz}i1-+-N}EphYBiWWm0wq4_9E1wEd|E7hl+lr zL2NU8D*w9f?r1#50ALk`qDQPT>s`uF7~1&A2mY?AUvl~`LWsP^F&zeLP2@2zE%31q zKb$!jm!)6f^LJkKm`;f02_3fMr<4wUTyyX@9L+1>^GubG?begCp)0reC~iITFno}M z53m0t$3Iej3Q}T_t8tdcS`lci8MRW)O5!+sGGq{K@kt~<@xmTr?Tiiw_vzrAPk|4w z|0DU3{AjR=R56QBwEScTh7U{d;q`wc9|{wSe_^o(p$g; zY-_ejmM3#-7HY4+=lfLqlOZGET zn>I(g8~x#742C1F+3#iJBcFMjDnAIs7t80CI?bg@5xeEgXtdua!=DS_V@B)4Kbxgj z!RJ~uA7qSU2#pwsPU;WuUN|3YGRVc~5b^nsevLCf0SgqOD8QZUOtC?^8|-Y+sI&qr zLHhsN79YNJBMJeKN?qy>DPx0O?r!(S8LI{9*AwHTOP?h9S#7Q(&aBM5{N5bnxz*7O zab{)072~s6daLo#yY5r@$iWStO3ZPgk`I+2u}$zC4W{%_@Zsx&ruwu z*$U7jX*1TsOUYdJ3JpFRGE9#*$7)^Y7h24Fz-Nc+ms%eH*%YVxBHa4yF9}koOtBO*HE(+M09kx zbrNqDhggClJwS@*Xgck}eD!*hw8BK-^F@-MPOVj3t+m&6nCiTz(;s(t`;(c@d$ar= z<73a{`!8gEoi)#pj;w$kh09PfD_OkVX;J3k{oy+ZWq#eB+~*`_4{S6=;R!a~!TO+L zx;MHTSJ?JwgnUSg6;TJqz|=l=CZ4YH$g^T>}bpY%kx0H`eVDIbU`SJ;`PZVhi8~7oF#0>~`_6bzT6~cIoZadg(S0&*-oxvwOZh1kS1YT_RD^84o8(jK?)G*!s0dj; zZ{YKvT=JtGBMj@YJ_W?- zMcju1&k1fWK6ccH0?#?RfzJzl`Cw^>&|tJ2H#EvQm7PId`1IIJ@b15AD-W9`7h4r{^Gn&-pS{FoI{;YM#jw-uc`85poW%RourCW zcX0k74XtB_^dujnhX_}WZ^q4+i1>y%zc8AOPOT9U9MtHq#b-2}_J{dJ1U@`JQ|cF& z7>)i%y+!i5fa95>-kasujhiohOx3SybG=+7uT?1*BTCqWc1!Z9j%Hh3@>-Rx?r!KO zviYKM^JRMuKCSh0mDc=2Xp7HycBMDY?-1I!`HHuweCoATEMin6>(fR3+LBMbzq>IT zUqH+`pEu*?D<4$(tinD+e&!wmNj|Hv&yb(o4uOoD-~I+aKJ!j6+a(V+FzJJh?ACu3*{JY5~zy7`Quqr>W$*UFA$1YH%x&ket zc92|Wd3hB2D>ig8$)xcSgyh6^A7oGHP1BbI14uTBU-Om@-rS? z8VzuEx7Me!_a@_JRpdXL@#i(kCy7y3x+nSUNj`Zp%1S?so3)lIKkc)%cFe6PN^ACA z$)|m}Kgq#16#K$m_rT{>9zM40B$;~gZ*cIjWhYNlFWwLE*$&~8aP{5xUtE4{J~>-` zC;wG*#OJwwA(S=8)YMOY@;Eg$WUUiZRi)#BS#L68#bNK^5j)YnfJwWQq)k5ah})jp_EFw9ZvIR` z;nOabD=iSB<7HU0_+SxRz?MX5HtfB?0X@1a7$%=dcdIuZ9c70W{9VS)pJO(k(n@V* zy@-D%q18$Qo{V-L#FZk!r?k`GnRW4R!B>5B%^5e}@idzcRrxDHd{%1hHEixKnWAG* z(5w}Uk>EpB{x#NP7PYbC=v0e;lkw!Ej{G0KwUqeqy{6H1NP;fc!-sDz?O}u9po?@4 z$_Se5(%IFUvrLB-jVFIW%rAu8m^OwYhGcS{n|LZ;qOG;yAiOz63=yXvKR0(%dE?2) z9Pw4H&B;MRqdYI=M{Yt^Udq*7{vhL)6xZ8jKv4_!*h~~>Dz1L z)8FotAJorW#rdbT>PqKiog#-PP(=Y#)jEcY#d@78waG{sou`Yyy{{*7@WG_Le_@Aj zJ{?fmk`pMTfZ6H*BSuW&5t}Y?isCb@&OM!PtG-=0)F+MK{fxtZp*#Sls5mtRrN?kC zEG@=kg3&0)onlBnMR4HfFH>OfV>|d6A z=2-gTU2*ft;nElH2l)Ju>c4D0^DV)9`Q&p69(+FQ&qq4+1cM)t_wtbrJx7Bd^uzew z&pu@HVQrkcQ^c%YiV^KZ#n76U|$W#c6TehnrCF zG)_-JG1jbWo$3Gc{#@K zJ?#y){OH=}=*9&fUHcr}URv(}p93F0VrHRuthm?b;v;4jo3_V_+g@7l2cJK7@_~0( zT^G6XYW%Bn@ll-@Ir9rMEHlS%_2@qVK3{P1;o~1CAwHX@p%9*f$p+3==1l*|`5Y8VI4bac*wL@2~LugxYGYQ|#25 zIQ~=&d*T#c`Nbdnxu}oi%HP29d$m#rF*-jN@1^8KHwwr!ySAucJ_!5 z&YOvmuMnG0MhDi$AADY^Z$n%f>z%flzG{ts>h-wcV8KU>o` z8TWVh*jC;#fzN-navP>osQ1*H4H?B{+Yc)1kvh$d7RC+-Gg>^ap*m*W24e zv56Dn(I(tK&*so+F&z;yo;rTQ&4+7Nv$KLwMrQ?sLv=8%dzC65u375A7pmInHki`; zz=y{VJI%E<+Frqr?KbVQC>fRP!aAe9z5eimIM;ns$kAROwWN|Ym)fVmhu24P@Ua{@ zON_|D$MW4QcZk5}D(RPl&nnJlOpb>C)($?qJ!D4Z4h`=I_`J)X4=pe!7bGtq8YJgW zkc_8Z{wd;PnjDm;E#rWP`ETaIJ8OdDd2b_oC}NSVa{&GQ(G&( zd{&ypYAv}Ta>-AD?Toq?a(GmT@znSJrGpO+09(XQZ1KSXV2fxktu~COzVA*CpH=Mg zrcjW`FH97XcJSH7E^iSEGPCG6a}$#WpSy`qv5vjV<>s8H#@WAeaj=bqpHWUvjkEh< zJoT6(KcTT+#ilRpMlo77SZ5LVSuQpZ*yyn37TlX&EJDZ+@>gaTu;pt)`%U-yJ*2u| zKMJJ;(N>;~?nU~8m$q?=Q8rDt@zj%#lKj|pS50*)unuZLcA9j)XS-JC8x5ND_Y86u zs*xuct&U{fXKM@UbhO)@jtYz>8VGsx2WCpgs6XJ(T|yGW1+1!W(OMbJ{j+d0Zp}Kx z2jgcoNets?#9LRItCTojs;$w6=3=7(iNY!>rI?*t>NJ~ki-nnQlnBr%Mm5pUfY z?NZ|WW`7SEV^}6MPd6oby0SDKEn%_eQpT(~_`IL^ELW_=9&EQDIKoI%wo8%<*~>kI z2~9IB;MhhwPZhGOw?F-p#D_u}i`>M&ijc-4+e=H=z~@uvEk3TJggfU*v0|PqEP5jN zD2KvY=LS!&kak%Qt@CnWrZaXnEcFC@e(S2q2ge8Fa7Nlm={#|nFtMhAia`2jr4b)C znc{Fp@>urS!WI|8#75hU$IjA{j_B_NpWoXDpK7bQj(`|fvqhB3sE$W7_$lgruGOr1 z$5;N+oqdumep07RhHc#P+X@3lF+mz2mFy zB|fOhUp%d-l4_HaXj*&P>t0k8Of|=O(lqdSA=BoguW5RGvae~x=O7**ElrETC)1h+ zKCb~D(|@gD{4+;TRWVP({_J7=lf$5@(hu;tzHjldJURF8Jj9{cBywOFB35+LS^exY zk7DrlrlUOgF2Yp3<6C}G^1=AoX{2cYL`e&)mCi~t-av@>VEpVf(lh{~z{PHFx-&xL zCkF!|@OdjU3pGZF0H_s$LZ#{^`N6S3N;FD?dOniRsyo=9r3xPrr3R^BF zoMEW?wJ}DB5b;yY(9Mu)4iw9@(ctqo;NkvD>X(;K48=0r4d;^s#ZvfO)#Ib5So(aj ztyngn&)jnH@$>8E+qM6w@?-n;?u6yXwe!A0zdpO~&nLc8DfE2!5OVck^kX-fGf?9rHUJe8#A=O6rxLX_rL`e0~aeT>i`C zlR&Rz!@hiSq*t;Z#OG=#pLlvD+YRQE551E8sNS)*w{P+(u;zoEVy%u9c(HUU-<04l z@j+_XYp31*04wlf=`=w*X@3NKt^ZI9YUbgy(H(E}HfJ;p;`^B=U1Zj0 zU){jxrOZ!$TzulHnJ%BKs~hn-(B%_D&0_J%u)2ZI!}}JW5{jv>hyqePcP#nnL-5%| zG4&l@Kr^ocpz0kvpSJn5n#DE}>-eAfCSZckc+{OBv5x=A=L8IVK6BoZA2*-4R>zh% ze?D2aHum+)d*H+C_lo>XUp_Icj%-!tAG+m-^U1Kav9Dj=1E0U}=$E2bllEt-cRc)n&1bFI5yl5O;vxt{`%#wpa(uiokl?d7nhN7XyP9|qh%dtP<@oH6 zedxCVp9gLG6V4}&HPYde2WtdA=U|@A`MU-mO>3mhC*#(rddJ1DxA`D}?5x;hFjw+C z1HJc?;x|2c?poz+cGE9s7}dG?hweH=Iw# zl}hPH^^U58tXxb+OHj%CxyK zA2;aNjRWu@!-Fw%t$n&O=gv6MZ?exAGxsMKdpX@1C;9c6VqB!che8^ANRhfCm*JJj z28z{g_J&i&4U>H=p_K?@o&tk4xVQ*i(XAk+O1?*+vVfBX{%CLioW z;dtb9i;tOk(Wqh>8=lN!i0#@#578^({>H&aPPfSVh=NhwEnWdJ!-8Xi1E2p4Jf{4V zsVF4mXQhUNDsgAMku=d)@Y$lekC308K90S>o!Kz^M4S>ue69xaG3nrttEl~O&Q|66 zmz$4C2S-~`X{0>dz~?VcnS9v&mDcGvMUX-|NbA33*cvUaH>lmKW?>^0S6% zR;##LV<&{YZm)_8WNj37iHi6*J1g{S57Vr1cel?@Y>0T!uBbpZL1CAS27H{|fPH@K znKmC+VLyU}YOB1c=>6-B8pcGrH^FDar?6kf3m&>U2tNNav*btcv9P$_8|R^ikA)>p zE9t+N_#C+S#1s|ZPI&laur{Xeg}a`t-tnbxJ_SB&&6Zl8Ex7)nW`U%&(O50dmgD+| zU;5@-5qy+Y9!)xoh4JCT#N5+s>LJlc4o`Q&7k2m1BwAU=M(yLAcq$xr@v zchmgqOV42e@?-KzqF%o8WAe$9dT~An8hm1^S0taDs2A~heh?pPb;nQ9Tz=5~&`-bo z>Xc+%=fLL$=L6)&%_mO%!x(K2 z%NQuSRY&Ww;A7SoRTh*nP}KT-R^9|YKMXwP_}N^Aw-?}(hR00%d_h!McQ3$arSlrs z0X{!*%EgBc|Kio?ZBCY|=>ik-z@iQto2r>^vl_#DLH z6T`np$q(m~MW^oM^I;bsZ~0YAd0Nu1Up?=XA1|Lcie<77<&y`+GW!8O zH-Lwae@y!vz(-TDwE1LQu@pX6-F$d?NZNv$ybiQOe)xHMyjZ4HJ~J;Rp&>sXIi<}< zU(sUo$*9f|pIhmeTHs=arK&sASO`nu$8 zRjgl_d=A)%cR%6N?ul(d;G^ljIybC_R>EdI})#p((g-;&Csun2X zbIU0|Za#?=O^;7Tot8xh>pRD8|J1$)pSX$^gHQIHmXpu#xcMxg{IeS0@RT>U0v>rd zU+>uMpJp!kaq~%{V5=Rob9Z;^%`TDg~tK7f)KH@W!S%8$@J5D}nnA`!Vo&3De}GD4fMN z=#CDbJSZITIf%nY(~oKM$+*H*j}@N3FZuAQ0E<}w<9uLwx)+6FO$Stm!x|apW+@DO zUchwu#89x9d@`$Q#OELuA3X(2!Y7NW20lwy9ejl2>eQ*u&9Zx{p}i93&nJIVG4Q$T zls+GA4b$Y4S-p~enfTm_$wx=S;_%6$UMYO;J_zRHSOLR^!g=*8FUwvM59VKmd(La} zp-HQ*f(6KrhfmfO41Ded9x?yo;~$58cF2##M?=A4@yW1)fzOMB`6Q3f`}4`)QW){M z8Yn*|A6=!gwAPGoc*f89TM7f8mz>h#qoGu>_+(hAlzxEED=a=pb{4}zaRRE9!ErMF zG2Z_A<+l`;?er<)b4$uk6rUJ+HV>aH`84o(<$f$adU`fDpNyy<_>?R@KK|WIm5S}i z^!a4lr-4tIN%@K7qpx7G`D96{oc6hV5RZ?Rf+gdV5v6kQxoaf(pnzzlQLeP=O}h+| zGUZyWbXJ;W<+EXTi4W?B_J&)%@nB>ZK?;l^hBcPrYt>_S?VUH}$H~W;#u4`ty?m^# zDcs=1Z7;3&gU|R@03UuzY{Cok<0B4GN!VaY9|NCDz{BUKF8QI-y@{{NhYywRP265u z9|WH(EP#*Wl=GMu=*P!#%6ZK8(t3OKSot6AgU?EFb)MtiJ(s`I-Ob~1@1A~u&v%mi zsC@KCIiBXMCsaNe9okse8g_^H9Qg6k8s!AH?BSEWp-u3`E3bghtItb5PM59>ef0M= z$)-6Uzv+6mrxWt>t-s(yc!Ykr`OKkcQGD{EbHwLrD4%4Cro|@*ItM;)-j{rgR;9fn z%ZtvXUd*&Vy~&O&FRIKeGMw*YDVBK$e17750G~P3%a>1HbV_9eK0gUO(!V?8hx19M zUcr2Fpi@ph)&FT<@)6}A=gh1Vt#kdF9NrTUG&PVc5?2i^5CdP-}@sD{>W43LMDy-mhC-4OLFDXCv zNLmw*4xh~094DXJKM9r}vknpfE0m9@Sf`(vDKJIz$%hRBpI_bA=c7Fa z(&UqQ8w5Uo2`sw-PO8_ohTJsjS4GQSgFz~HyPg7 zDTW{2#G5wgWFodlUTkw`HoQ1Nd$ZmcGmTasqi7tcdu7l=>)r9N$F_QGvp3!vF)aQ} zy9Kt}o#5|8JWTgo%j&ULkM>>i(_U$=*O3P_Po!RzpUKW>Hb5RwUSb+jcIYqo?6F8b z@oi8zpZwS$;&Y(KCzcJ;;FAj*R6W+cweRM$fDP~tJ~p-3~8fj(8>7`pVedk@-3%qK2|WJb8fA|ng*leJ>@vaf~s+6S)6=| z_At)FsBn5^ug6B){B%z_4$^2Q^|ye})qU{cBWF{HEJ!?`JLxFq5_#QRzf9S& zAZ?F%K8H8#io)v|&20sr-*EHsEEDP;TA)x<`Q(X*(gl7Oe0~e!BkT`lCbVmqYl+iT z9O*#r4@NxZ`bDKlCVkAV_V{iCQ+&k2X~gHV@^baqo4(;H_^A5jksmBGS3s^=uC1?X zUU;bdV41K7a(tm#v%R$520ou)QhtyD-E6H|+qmtcm*ZP#l~0b3U&j3OhaTStpLVg{ zG1V%8wekMC?e1V|s#UJ^$=wt1dEx+kD0m9@g)=NoJSS?6Qhgn^Xr)aGsT^D8HN-EnW+-#~OM zAL%U0FD&kY&ns9MpBQsewQ;gP1?Iyi2Xj)jAH?S%l8@e;RBbn$PqyczYCpi|mFNBV zEH6J$U}sOVc5%H_;cK8|qO=vv!)K#A-so-4sL<%;{&a^;y0gumvbLJh)gJKqcHr^a z9~U1*&9oTl^2xfo5udBNe7Krv;W5vTQ|wQG{BS-QRyXkZqRpq&f{U-?%rDD4XAnH=64RaIDtaB?jl; z(FI3*Sg$(+I*6&@Yxvk2N{hnNB6OM?BI7uzxe+5<>K-=xb#k|SZmBtTY|oo=dW&b zH?FYl(TE)TblmL^^M|Q=?BCrFJh$;!VY^eK2oPaHBvHGTjo+`Z+Cu)qWAW%j9L_GTQ2>+-6{zv^@QCLi5~r&N#o5 z#Y)d)#K&_!EDfD=7AFIrKVnWkR>Y8}hf7+@1m1<%_hBTTtWVs{UJ;)IZ9e*OGCe*S zp150lo^};@O#4Hby+K+QU%v{BGM`N?%b8wDrKlc%+EoH1?6VJ_n0h7K753SOPfqko z_5*x==2RpfeZ7+HhV#k3Udet`kAL-d>|1g^SC>w+9*egwUb#bjt_JY2 z28!`qJ8xackCTs`M~1l>=4ZX3av>SL3qIe-0{A2_J613K_~gj!Sp5K>Gm;NdHRcZ- znk^F_mgm5s*^lb+uY1!eTYkg_T{-7+D45>5pagHqkJzA_)3G!4h4|bG;-e^7JUUF; zXD6Q=Dj4|uz+S5qk*f;1oscG~yk^$Fn|&ZE(m%#2&)w4z0;s+LvKHBR&V3e3a>!s-nL4b@=2# z&%kHxd>lTSdZx`Mg%2M@=R=#k{UAOEzI@_nleb+jpFC)j zw;$m1iSxmHG_}dwZUCQ*Ym>Jh;PW3`^0O=s&7Duf6h7twmHE^#@c9jw{8)VE(J((g zdDJlQ`Aw)7pI?~cA19xL8WzYWXBt*L{;gkg)#78G54(U7qTn-|4ti6*3x3+`A`{UX z5z?GK7ehtx`Px(3eDpOgCZEiz9`U&qkB^qdCE=4r)dQd7`%XT*%0#D9q)gw$uFRiL z{#+UOJcC8^(N`>U2ob?2v-;#70TG`AO+H$R6@yO}^~r}%Em(e%PsRM@Cx25h$WJX; zeoQ{GwTrP(KDp5@T7Uo6`_BjJm&zxOcIoiRgLV<0XMNXIlMl9jQT~P_K(7%hbgL|X zO!9;LG0IP%k^t7gFvFoj-+qA4>rZ+4B+w{l^DaI)(kN#?h|jG6KJhfl*{&a-d}x%j zAJyZ}=KD{@`q$#}Hw2%*4fz|-{tMEtRixHo{*L`|l?JO<<}RSHiaohlzrg-DWMK|^ zxh(ItF!)m3WdLne=0w{GP*=`0DYO{VBxfwpMX@xrPiNE3{E=u2yKj zCC$dp*6LXPibbo{-Pq_O1ITh*Tch0`?YE?8$L0P2J}nots>lD~$peLt#G;TQF7`5^ z(|iR!l6)i=hE3MP#w8SLvO%6T}_r@2m1im%XFM!Wer@)8& z7SiYI&1$2#hzl;mZy|ktFuE}8E~34(+5n$FCHdieN-NDZYaU_QX4Cca)n^UMEPBHE zZ0?Nqax#wy;txK5b_;ye@ee9U*%f?)J{a-5L%(vI2Ms=703HV)Dv?wRrxXZ6p$b>hV|pg9GqcZ8qB|L#ykaBBY10)zN5z^0T>^qla|{d|pI+mQgx$fq2+5 zYG?i}z(ex$s#D;DWAtjv&Gm&L!ohj{jnORkh`{G}h|e<3P8UXpv1OW_=AI7OpHts( z06u199>w?<1F1S5&4ydp;e8?Bkich+#ZnwOSQxk5iaU*Lkh|npuB5?qLlz-!2UD^9hn4_^&heiJm$H!rue`b;&-_GyJ_- zezkhM__ZWIC>)Nl&*Cg}tGfxo$-jlJ9$zk#{1jWOI5@6eZ!X9J700_cKW;D>hba~lKeDLm9|z|S&;O= zf1x04e{(12(nI5)_WD)u5%nb&SHM@)m#`|+(iAn91$?W=*FQ{rME=I&_=xzg{^NkB^p? zCFYYcMT7kO^L>?%2+eb07kY*a9vy|Uh>JTxd_<%^o4Vxc0)GfT-@`)rXeyMe@hCo7 z)FoFJEItPje6$qG)wmX)jOmiA3zE-=fXDV?nMKyq4B|stMPoreB;=Vu<32s!8~5m}-7WL*Pt3$zig(h~$0MGg zfzLkE=c5M)mTeJyNbyp_Vwq~7B7lwm;TxjZy>u9fH%?WA;;+^=0id==uL2-nw46fj%EoFB0g7R^D$lj;qplni>hDWeJT+j z7OG|okRQn>CCxJB=NY#>=HNp~t8LoIGhYh5#fOqsC+bF?T!jx=6?|@~@?-I7GZAN{ zlqq}r4xQ<(GhNl^lj-3h@p<2VOg=h;#F%_i)gjB+_}r;J^AiVxcia4DQOG!6!7ju}D7YDj@W$zh?5GdXOTxVHlK%NP&8hwt+}}je_=86p(&U7m!8AKc{S=e2yNA^;;94 z&rAI(nDPVhG4;!&!-V`O;$wQUKr3{3e2!M%z~|vpl24@}3Hg{G5*W?kJm`D)MCua> zNrnRRrUVh6Tk-f1yO7AdXm}nzgpv|Cvv5Qz_*~piz$e0e%_TpQ&(Ug6> zkCt`#aY%<$dJe7{&a_tNGWqQdU%AfY2tHL;lt-&zJU0|3)|yqi)CO~Oybe9KXLf{ z5ANPQ&avgH?>)W=VDRgJZD!{1DhwR=fj*uC+#9&OBDGqrmRl{!l6uDDv#3VPl)iBN*;) zRqfil_Dj3=)*fk>^~bcjTdGlgd#zQgR;^lV#aNtqfX`(iF`u~ThMs*+&1Z!Q2tL0% zRro~cSF+%t=K2-OhlFO$Aw}?cJ4?VvD|nbaA40j7kRtJ!C*>2bUfI|mSAHa)6>63+ zL6s{X!s}x@_usc$2m4f)AKlTGN%3{oFDG=UtU(5)_=3;3s`|xLK5}>JOYzll6%+9x zIayWdb2Uo8BKYX2musID`?H8oUR8|vMCn%~9}OK#DnFc04pj_%Za}@9{gVKmwZ#;--6H0LM$H*h4Kv^%O{7rVUM4nA|A8YvJek+b*gVYxe4fj~_(a6!b$4Led~&`6BR=zF ze6)68$@r{YjYw2NAK>%l(-1xpu~jPS#FzHa!IU5R`7DY(yOpByCrse;6-=Lxb|^6k zpZrmA|Iox|o`jE z4W>Mz_~coal#lN`1)pCiEaao9P|k?r$wEFk)g|8)__viSAO0cJ5I(`!JgqYN$@G3c zCdTFo5cqs3)8ms!)in4Rp=!iuuFEHZs_F8{yQ+cDPXmv$eu?c@NNgivhOeJb?A|no zv3Xm+etMRGj}x0OYvw=O`_m+RNb&Lzo45J=Od&C!_yojd3F&(FH? zd10O1vkP&yV|U=$^c`L-RImDW9kR;qEtXDa0f@%hO0D=ap@z()J|#M);< zkh9o4=>hn>nI+=m#O4>!3F=n@KJ?PH-Ar7DX zEaH=E6$795PZRLb)2>AF!};V@#VS{x^Xc;-pJl`I?)on5kE!8#_=nH=jBYd^eTDLx zwJ1Kh)+OH*__yHmnZV=3$87&c%8&MaFaOv+`y=Hi*Sh4N1APxZ=Y=4jWyAA+`7t#- z&-q+x@kyp?KKTjZV}`0heqJ-x$?LL3Q{a!vmFIriT;apFU(1K*{d{7# z?>P<6L%*K8+12Nx8=hZ=4=G+=!t=!Eao};*uNe6eeBy!|kW8O`C6ONwpLHuB@oCz8 z(7TEUH-h1LdpUChta2k3S;)o5$JFpVm!Da3KCyb1As>%CtvEc-`Mf8Zk9zq+Ioy$I z7F`JFLGf9&zCpkKWNP!-Lwp{$ElPX#+P2IWhtHvfs=ekuDpy|cPdt3;W$?jiSF5_! zULp{ll?=m?~FZ_{#-Xepshitz|I{htI_7pJp-On#KU1Us2>o7?un^ zi_J&G1Tz?KT|Ff zwOgC4h8r0j_9)vfw)Vu&<3FvVUq{wpKz(=4urjmZ;Pig{d-L=P@VUHn`LI@_R@=nC z=n+el(xBnfbNQU0?0j#)M#Iz7P5g@vD(GTv%maMX`fTV;-RVGNcB-{n8C`U-kaf!S z?Q#oi88r{tCTMgfMqhUTk?m}U&Q8x1(ZxbG=?#v1BlMr9&W*<; zT6AVeU%gH?yUs?v@x;0qS%dYdDDjyq^27OD7kI8mp$R48{b6!G-uLI_JMj6vuEIwM z5Zz)b?Uu{!azJb}Ke&evNL95*E6c1X(y4LzH~n5abi~(nLS=@k#XFJiQov{%8t5Ji-H;gTj!=dn)Z7)b}7n&6ly( z%h(3}c(>SUHR|Oy+b**voPfq-3#YL&>SGx{vKRNGlZ(M!2vPH*H^M=Cs}HY;jVJd{ zdl($AIk85g;h;BW$31oqM_{;k@P+?#w!f@gdC`dYbPig3Wi0H=-piGru5&TE+rz@1 z)p&B;ReuGaF;n;mhmf3S*Uw3GF~y#M--6Gi8_Os8wmz26ssp9EKzy$QpNpBFkKz=10;~xXEsT$|p*Xa#pP;>l zeXN>eg##a+pDFFLYJZ#!mj{xbwk6_|V}&C=mwx%-L5wIN(mRJw&L^J=2R`?^5qz8| zM*_W)>Kn$#NwUkaKFQB7zk<(Gz~lHos(v|q^i)a?$-ge-lVg4IOak8!pIJPg#0ywF zpM2_52%j%h`8Y9DC5V_py_n>4Y>n>P?xBh5Me;K(DDo4=Cxv<~T5^(+CO zTx%Nme0f*p)49Ga4gujmkF(uQyb{ z=PQ86>mOcMR_{{bA=%|%5uHeIj`{OmwQhYd`d}@_54e}3PukvZ{ zi`;{R>f{b+KU*k2XCfa#<|X7=&%o#Fx{CZrKC-sP!Xf|xD((-G@X50vfzQ_ikF&me z_-N$GyF|Rt67$LFc?S6Wu*%1+kL{%Sq*67>M`>?mf;xHs$oP)<%oOg9>XV%f-T}>l7c~g zeykhIC%J;f@-algz~{%O;e0$xedI+isi?lMA$-O@w=mCck24$ac`H-+*yZ_U3U3@ZQE5TAK8AHBn5bwK-h1Ro=0Mx74$JX2W2C&{I5W#C18a_TB8 zpWb~6KEFOy_|Ww)nK|aZD)}?XhpvCg%(3L7tp6S0^KO-oxS_kW`ElZ7qI`>yTMPcj z0iX9UMSi?|(s(}NZ!YD>%f}?oN6drx%op%U?)k*>F~sw6`1}R%IPnc3KhpJ)dX_vI z5jlk z!_lBWK4EvPvmQI?IX<*$z9R9NE##AKDU0S~)=~yOA6|&zlV&N4;bY8F20lB}P(F?? zluE&Ze9ZI}!KcC&^GT&(3-}nNV8mx0$0vn?#qlvk!NBLM3gENdxPj$8ux8N@Q1ciU z;yb&jlBECG9zKcXJ#cPHKcL=TXT8bE(1fZ|#OKoCBdP%DQ;2@0;KS=jM8C*@-+<3A z&cWvu2N@mg*mHixh2aM~_B<+AcAi}TpXP44*hXzJsbkCjX`3fKYmCB$Qpc7ZZ(Z-N zT=|+Wo`R2ES&J?WZc!O2*&nUlX%(>*+onp9FF&YP@kQ%l*B?VQKkM*5yFDCYo8cyp zO-2^>W4wgRt1kw9++WG-1AM-O1^GBw(thqRK4Xa`?VxLTAMu$7`6LmQ&`%x4C+DIP zI*-bguYJ}Od}RHfjMU3HA2eLOa6b68{|gK7Sz5iq_?W0(oX#RY;PaC+M}AmVcx?GG9uMT_y|<^} zvsEtdpj!Bia*OJY6|0CsFZDOu>h*3TUF^x;hzc58eMF&`8gE^12A?}~AD=9CVJaV! zcVXajzW_ec{$wRW(*Bq(Lg4dHS&+}tBIM&^tO$Y6yQbj71`Kuql1fTXz zG?w87S5Y=R14!+AXe`4E(xPm*!H_Cf{^pZNe#qykXM=|SQMM?;V>TK-kII$5eFO37 zH0qTDCqf@DcX6-i37ggS27III`H||%;AI11RZ%drTH>yT~I4Su8OQ!Lu02J`tZcGEpP-u@IfXgu zdv@$gRa0T=ye5Ab*3^DA919}gGN1?+L&FfB>MOyT3fBAS3b&il!k5HSOvUuO&W z@VH`%zfTmOLy9XJ^dLU-I6jZUFGB0eB#(uBs2ue95QW$C{{Wx=20YyU2=jB@u{m6J z1RHskS$jfSe31FuJH9wVX!cX~$dF`@o0a-`T2BvRHNswsne>XJ)>FfluR-<)sTQBE!-t}WlM|$F^vt}!eD!&)F7av98kKqx$#vWz ztTk?OXE61jc58S#yfd(nT*n>4)8ReZr@9o_&+#cZa2282m4WPUHEf{ zkK_L!-FG}Z?q8fOd|%#v3Hf;?@VNeu9dMS8Vv3_dK1GL*9dMS8qCrQ4d>+JSrt-;V zd%+w&M(-}Te%&gleE9nA9GW(6Qph>Q!{Ni%cPTZf0BzVt1M+k05`0R7|e)j5( z(q4uE#J$1r=u15}+@r+sT+JRtCS{S7+SSlk4s=k-(PJt?R zDjx(9k9x@69o!wOFEQdd^y?c6lFy^!&?^fy<9y8Aa(Vu4;PZ`h@M#xo#r#U#Mc>eKM^*#7}Q&-{BZtP@ICP4BT4{sY?CIEcC8F(D|*>2t9_jhm?Y^gty zM(r^a8= z1s~pjr`QWo0X{!eSi~pgW+Rf1VVe!&bLsM- zLyJtdszE;T49u)uEBO4|smsSb-w~M#o6LXAqbW}TKAZf;=_d2@VS92 z@AaZI#wgcPtW(wJ)Q=mu@;9iMMgm%T9!*ijMgm%T zymbvwx%z@1CO)-;I!?ckcSsjO~J&qQ2VJ{?-PnQNH%AinW} zH_wFpFaaf*9S&ndKZE=rSB!|5VTYsU0X{!>Y4dqhCb$r&s5fZ-ocNHdY^S&oD$RH1 z>t*2c7Djv;D8I@jD4Wl>J4E?aEg+0c}5J?^IhtB!~=@*IipJOB|(l7X7bgp(H;Pd}XNq)#b^q3y( z!2qdA{$d-u4}8T^hHO?i!*oQCT!uxDbbLg1Yo0iP&o48<=QFg zqO?EfIEVMyXdOYy&32af%%%KrKxCz{S)~@JQSzj*x##jjaBM5hS9=3NfzMmJDfsX( z(1HsZ4j&Qmac&tTHc3K~+$hnC?{I+6e*zv+e?G(fOkRKfgCalk1^E550cM+c%_}nO?M0f`KE;+9oJro5ucGo3mLHUO- z_#XvVeyD*;3yD(h3sOF_<%b%mjF2dma+LMI4SarQ>hck3WEp%cn-8L@Cez{a^LvW? zxaWiD0)~#U6oMwxoH#^g^HJ`9QL~V-aUp2P&+om?k)P`_IYGh+MCx@xMe{K$SPwq$ z>4J~A)*W)dqYK2RlUuy3jrj*W;Pc)~@F_J~be=3Q@rchdr^}2{|96~riQDf8I#Zm- zhtP;`D_1}2V}a)(UdOl%Ft|oY+ldCcJ!~&=n7WT+Q#J+|TqA@LLZ6Tc0GW4~3O+C4 z@?*C`qXi77@B-8le_?rkK?@k_vXqIDD}tU@r~o& zlYO+>Q3NRUczBdShvet?uNJ^(2e(ggGSOzmHtRH4=N5HyY87vijHP+( z;PxpW&lZ(xFU?r%d=K%tbohwIgFa3S5&K4C582UJ?>oCFL#puz4fq*wC;-4 z>~J)sg$*Z#xFatLY~lxt##`6#h!5?*LiKAApREJx7Z&-k6Z1K|pnhSIpXj2WV zY>=HMKI9;V+Gh_R+Lg(53`N}1*k>I+IkVHmr;wNryLmm0eb(WVGdoRunrZmN80fV6 zl{Y(0e7ahEpiI6gbAY9i-3&OYBB^^I_zF%RPNOkzGU#yK&c zoEc?&e)%9imx=kr7-tN=P`?85lQW|%;ovLrc~4S4F}9h8PsAs0M%l~fV}NI||Ksl3 z;~i&v>_mL>W|X~r`1ww#f7tGni~H%$!9w|rCq3)TtaC8v*T>GH&V3~gK`S@uRpPg1? zA5k70{OnNot0a$5KDS51GemiC@N=8GUzzX#pO+MDKCInA;yf~bik%KG#Zf<~RLay( zI5{Tb!^RV&%_FPVnoM|=q`}3VJ3Z#rhxd0V;`)2z1{zQYj(DWY zqkpzTGb`fy`|wG$pEmGJNBzry&p(<&eq28Bce~X0M?H7>&wm9Tl~3^0iw-J^+uQIJt99orH4-`hg~BHo zZKi_?>-ZSHVt?T5Ef$ZruHO=$hjsYSrNIP@V)@WjN&^@XpP83WWK^CXcxf_H_(VsY z4Ui+^^K=qEA#xNZ``UbRE=S<=>xFPWJgOQtcb%Vw@ZnK0o%iPHHQ@6bQ!gJG)! z=99_NuJAE>X?OV0`Yz-roKJ{JuJAW6_(#Z3IG>!GWY4Tfer67zOqO;&zSOU%gaiYY zc0Lc{L+iV=KT5)envlR=f8Lth<|*PvM>ptv`5pfm;sg6^ z^9e5T?&)bv&a;Z%;S*fqbD#69&V%>}|3`yQeA~s=7dA8Hw^GqR(Pl}vp)$RxQ1aek$dSOtVgjm0xX=?G&Fv`l< z@qBV@l$Cjq{B*VWXc%Q>+;~1YHpZ8>4xEPYrkkpNwK- z4j;o~W0KFqI((8X??QeS@iAk0=kmjJ_#|826ZLjZ9=?QM^Fz0rf*7tc5KALt~n@`T| zH1T;R0UupEt;;9xcAEHHCg7uMr*-+{-A;qge!8VlAH!o~G!O7;FeyL09st^*p*8HXS$3QcuLpoOx@Zk+ z)TIIfV?T(`Jd}^%#9HP3#tm^Z?j}y=qbown5yjUaAHj)@dS}BY#m%^T*nQ-$BBXa7 z;M43%K1ug8`MNNKy&o6L3C;+`zZe4nm z&?#=8LVhHlV2st2s|8%_iO<9Fe3D!(;N!;dG2v=~!)K=Q@x;hBlUzCVm>Gvpbd1ah zIU@OaI+9QND=M0NaxX{V^Gk&=K1*Iv3GgxWiVFDf{<)5Q&d7&$_!#a(SFXOZ+=cv< z%eBnbb}~fe-l_3xJIw=pcBhh0MzZ7ZFMqm20Os-uZZO=#!Tjg25+B-MxbhRlr`p+#E2^%@r5>OB$LWa=)8?Zer`O|?|2RGI zc|w;@f;hbv9|Pj_#HXOkCqbNEi;n?udg3$l@bPTmJsVW_f>XSmcKJka;7zd8#E16p zu6@?w6MvqXj8A^;H1U}y<`ZwHlkv%~ohCldB;})FrxWqXv7IJ9mr40(*y%)ka%`u; z=goysKFcM!1o;G#IP&j2i~ZM|n|gfWjdG~}EZ469pZpqSWga9yT|GYWMp+qGLw@pW zl$Ci9pK~of8b(q*eEOW0H2@p^2sL1>hdu=$O=Ah z>3ZZRn;@&g$LJ)5%GG!ML*lcGy1Wf4&Yw;A5dFK4kB3y8-|X-q;;Zi}0FTQj7#>TN zpWyHbh7XN6SC%G-_&glXCvkp444>SeE315FDxc`>dm=e<_(=EAP&p#`d0L;3wj62l z$-NwbPq7fjXUY7803T!X6W+BoRrzG&{2N z2ju6q6d!Z+YZ0FW5i&==7V$A4LPmVLx_lBu$h7zv5FsNz6d%)(pCl179X=*R$cWEW zlTVTenGPQlB4otpQsoo9|4L-19X`?fFC*+U@p(^DJ{op95uY5}Y4EuZJg)uGksl2^ zorq74?KJUuI3XV$JDr42p6xX8VF~%@*y$vE@@%Jx&oqqBa>vWA|19lu;P}*t<7L`^ z-G7}H9}T1Ic;!rkPmYbUG7sW2)8eCHl$CJ<`W29$92;e29>nKq9X>inSs6E$Po9mk zG7sW2*WsgMl$CK~`Q+IsEAs%KrLM?X&su)t}&e%B@zl zUdb-L+8d4fgF7D#@zwvkOZp}GWEEePe9Vup{v`0Ye1h@AMCYc`wCzeh!T6zR=kGts z_g_+e!uim-=|NkRDQULK&0?$AsW$48M(}G4pWL6D${At(`pKCpKhgX5L~`Wtk^2in z<%sxj|4_=$Vm|uk?^3@O^U1v&fzMAB!sKV!^LIZVbI;%Z<5cC-td{m_WfWVl)>)}l zF7Bh8)Iq5%^-=e$qhII!qr0a)6kG2P*wLtGouQo6#ZgapoOQnr@-t^DpNwj3IeZMS zu|<65Dj!>Zmi@aqd~EqycD!}HAAEkgtMU>1i%qsyZ8poT)MeTnK4O2d$?o>g&wC@| zs%=%S{^4hn{OlJ?D6d6DII^h^ux}lqycQMVFugv&AO7zoKQ5naHW)4+vo{!!pU(jv zg-^Hxjr3^YJrh1`WaI3C%g0d1(4HXiq5Xv`KjD0mZZP8blkX@>%Ra>&lVK zXR!l~G<#7jlGG3ZkvO?r_3uf3<_Y-dZZLHD!E#{Pq=rODD=#A zSIUps-m~Dt{pW>zbiE$OtU2-%&L{6)k2?>NpP3>**?7;Yenqu#GUJ$s`0)LOV}GLB zzti!Bz8p#YTD-f+zZ`+jj~7DZXF2aVC_iR;&)oj}hpECRqx22O{urLVf%WVE{fs&I zG)m=Sogt!H+RY?{*f{E01BQs|(TUL^MDY2{uAff^PLPL>!A=nP{I9^{m7ffpAP=AL zMLzGLAHK*FAM$@Z`W4PcfAPY|8hrAdUjf$PrmIm@wrUI zN6${{^U1fJ2A>}(1ont7%P1@3#_`FuQC8+be9q(f=ow{Y+!#LjHp)SB6F9KYxL>e{Pp%IvTr~U{@tHY%qPOo!v)B5$jbK=w3 zZx-u~OmcrnGMdA)b89d(I`@a>0Y2Z?RrzFeLg?@@{DcsEz8`p8`N_xya`+hT0udkX z|497`=aXdVBC&M!E1ZuBOBbI9@tLXelaUML$WQbMh7qnb@!|fDl%K_XwB<;XPwwRi ze14!1CO=EMKmk4m=Khd^fzJ<4RX!QHKn@?nT_EuJw_OjPZ2E(^e2i|72Ko8pN9UYR zt8yzd?~vB7(VhFodxxZ7fBe#}%O@l6(BWgacgXo{01x~_EQ0SNN54rp- z=955 zWx@L~es$-3z6*E+pUhTwn~(9UyWm6XJC~nuJ_)R{yDoD13Fl*gRd(kA`}18h$!GEE zm0)#u^vm?)FWO&x7p?EM{fWw6O?LSm_-*^Mh>sC+1U}zg@bg)Eb@%ZxH1PuLuM+Qr&_qn=%Wqivm@4F~22IKbx4<4b(V|B?E&h>zCA3!9q`9}{wXi4W7{lO)GihmQ$4 zzQpGVJwAzYd^PwOk>g8z3VM7J<@jpwF(Sv8_{gnV@DbP_&!w$sGtnPhyl>~sP?xwg~9=Q0@|EjyinPp<7W_iy=!AHv|E91uT$+b~d<^evRtn$e!gedu#9zq14 zPhpDu@b*>;zqC&!AKu<7?I1aLAMu$he6p#|udMGzSLf&P=SRB=AGd1KeZr;YYrwQ?Jz zwi@-ac-Exua@*9>@+MB8Tb>cZzj67D(U$47H-@Gi4hKE)?3}vG9aBrodpLm}g%6R7 zpH{B^>Bn$B=Q8X`B;e0Lv4po@PsGn-DUd2yL^HtU`AZr zv89*zJgmVdX%bKzA0v`Cz=-(FJba=f&`IRT>Zu-l8k3BCoBH~KHt7~;YOQYspEnddeB=_poQZb%$oPhtCK`OcvFqWJO@C^akJ%?+ zm8dNYzT5J% zh)<#naSc93WQddeFg-qrGQ>6b7?B}Pe4fzalPp6#o{t$B;>4$*#V1*Ycsw67GQ^3` z%;gj98z!{VHlJwU*$_KTe8_)x*7vCM3zv_Uold|f*LIrt%#-oaveOCpFzJY`7+;|n%RHwJzvw*;G<=f z9j{#3e?{=gwNX~)LGshp;G<=fm2rLg6_B4?8)ao4#OFMokDgIh#*N{VZ=AG53T6Q4hO_MG?} z)Qg*Jr&XpC!dCGnv0LWRxfocR?DnWfCxj#G-ZEpY^S$8n;aq-HJ{g>#diWT8f(kw# z0rBz5PcVv@5b<&4C{lW`zN_|mF&|fs7L2#9UxCka3Q_X2#0hFB9|KQN z!RNVCKc5V|L=PW>y+rVN9`p0b;10ZpkHJ^q!RPs1Kc5V|Xb&HQQ|!R!OI<#qHozvU zw^^~xIt^YT0C@`H);ryT*R_*^kEjj6$Fs(4GUO!!kf$JSz0)mt-SO7-I`H|*xg$T4 z4^>V`LL>O;y?m&0f(bN&^8lZ(3d&D%dGU|z56g7jK^3<%N_AEwJE zL6BaHj{!k?;`4+ipCmzg9X=)m>4{H4lTVT$y$&A}g7m~^rt*o7Z6vbO4xi|~yAgJp z_)vU9S>K~#8!jIWJDrG6j_ow@nJ49=VW$)E$+4X#KF=iNqhqI&@X52CCO(%5`RLf` zBz*F0r@`k#3SoSf%y0?t31ldkQSFcTG`09>7-eO17tbfhMp>B$@#$*u(J;!&xbb{) zY?PIG5TA1$J~~EO88?qjGRRG!aXbA}>%5N?4uf+OB%hheC!_e7!^iyi*yqyz%aNZ& zd=f12T6_#x;z_^ex_lBW@mhQgSmKEf?Z0&7C&?18!^earp7>m9@=3D9>+msQi3gw0 zE2w;8;$!S)wX>^HdEMa?6CY#u`je9!S6(MRwEuGSD~1opqhY6Y`EWcrw$sF?o0N}+ zole9j$99_d(Ecll{OH)}Bz*F0r-{!rAs-z(orF)G?KJVZ4CAw8d@R7n-1yk%y+@0W zhEb*si~We_lVhVSCxNe!pU(%Lp#2HRkA_iJ#*OEbW23ChgZMnG!$-#`E91uU$+J;b z=0SXz4j&z(tc)AWC(lM%nFsiMfrn2<@iB*w@$s<_d_foTv(su+Sgl+^10E+ilR{<4 zDP#NP&Tb=N5viTg@D4le-9ZB$j3<&aDO84>GIrLRoD2;qB2~Hif#TH5M@o<)Wyzl^ ze53>=A0!9w0H2>A`Dv6ojpjjS?VDKRXfiy%Fur{g*7pzm6v>anCll#mE+3<%hy0%p z{70tp$>44Qsf8my2H!29c@Up@fDiSKtJYE#_M<`Z@baPFas5-H3j2v^5TAKh<&(kP z3lATI?_SV6Dp&vZvx!fuQQl-|GP5mDz!-0V3a@aR;dhB!<+O&qO*THUj^zoM{r52K z3;4WgF8S=1TP<>UGI(%4C%w^#93F!om8*aE=?)*+#6ZXnwa+aTZ&Zt|#L~l4A*<742>cQFs} z`H`v1NA6B>CwI9WhRsLrPH`vK%pC^!{3vtzWVAa)eK^QNxLM_lmE;KNA!V+{r-ZrdFKm1_?@4m_xj)aul*MGd59rU^b{ z^v=a-a@u1X6Kuhds5y8{?o$nmM^~=B_|r&!id)56qaFrG`h&?p6%Vb`;Q)Jh)ALua zz4-Bh%4et6IA|d& z`AV>?JNN}3;M_@r95;n)IwQ z<6_a^^Vr~Bh5EnYxxsK#s8y*73$aohTeEtpaxO|p(eK;bAPq>R{9pj*fc4hg_j-vg= zm48G2vm-y@d=k6RF?>vLefZxQe7>!q^2x?LGl!4a=9%Q@|D39Pq)}cn1IXbcjk2K` zK;ScDDjz97;d-XMm3R0^`7zWE5uYdY`DoiAO+LA|L&Rt9;Zv;FQ}4Ru>dqAVWA+xK za_#C>;oxty3jK{rG^H1Qx=UP|glhFyh!^iLwc<}kysme#LU#_a8dgky^ z)-N;jTdsb?9DEwLXnT>Jel_mX_Tqy=zy4KM<&%+qIeg65uWtk%SALfCd!#Ztd<^w_ z#eA=aAt=4G5fzHN3Y#&*tr^9nN&}KV`m21~t1Uw3# zB@MG}`CUGS8fJSQ#E1MtSAN3zXd7ltKDjr{3ZI#Wk8}{13qZT_BOOFD1JID47Z*Hy zvT+byK4v?J;PcVbP(I;$rt2UE`Iu>kh!6RPuKkhn6V6B14(amAyBz|b*L8`{ZmYWA z%xZPV{qB=d|LolK)t%0lue~0496mv*aoY|i$%vMGf?8!nTSj~J5`6gjZp+UiK1nJZ z>+mt5!ZFDY)0Cei6^?cIm{8%E_&lMs+M#Al}P$;Nwj*DtfZXKtUrPL&_&{qXSWCQG+?;jb>`$KyS3GP4#h z%mecCbxlQnBKag)yma`Ouz1;gx;lK4EM7W%Ojx{ZKIa;I5-naDe2iGUY(7&BK8Y4D z4L(LJUN)cCMe{*f`h66MYy-vmV{59monn>bTU3#OIZRwxn9bM&pa6)t)(mhpIk3x;PVEi@X4w`q~v3I zfk@)xk)Ldi-`)L}*~jnLe?9WNF3C^5Sf%vm!0B?j>tC!ru=>Ik5%|weAl2&o3sE;b4=U49|KL z33+@xxsM{8>wJLEAI+uwR2m1hVtY4>&%@>C&hX;Y8lPk_-nzyBpZ5Zf(66n7R^_0+ zeT!{yb=yKE&B{TwQg7hON*Yvl8n z1@PG~zp{$z09>i);_^+@k!`SMqXTPHhTRr+$yPp%54+!cZT|?WBQ%8LL8$fKCL0Xd z`EUY@)Pwb2Xqz@l)@aaUAUd{=EjGq&)pP6I8n2Wri&wNqC6nY`Hvt0pc?QpLVfUZNq;i0OjHy>6ags=Y4VCw5`+r z82N4*XQ*G;4=gs)Xx;v>flFX|G?Wjh)^px zOvyieiKGSrpQDpL_6@wfg9x=A(oWT!2l!kRz^8q)yp8f>=+#~9pjAk%%^jZf%GSQu zJ4X329_m4>kkhg4@R+7-;PdSyKXU!rK%t4{17;85Un#xL+>~HScpN+GN@yTd7 zJbR3dhZpCDtz_WyJ5z;EyK!sz;3`nMe8$83rUq9r5Ab=%9DKy8juHyhTgAO9Dc2Ig zXN=oMwamK=B^3Jit-F1pT*d{TIiKJDT&- z1vQGTt#avL3C~C1aP{lZ8XfkIE{yYh#5};~iMhfD>sYzN9wp<90+Csq?~L#DdJ{&Q zQkZ8Hi8Skk2Yf#B5_~A+Os%Zip zMjtRdyE7UhR*wKs?=}YXgr_%}h*cl=abcAm_t+5+JLAcy_f-5wPyZvOdL{UL7Vt>> zTx@lUE$rT@kCQ?O0#&Y5l6+X(K3k)SHNx(l`Zy_oxH$Fh+(|NGs_8BLCh+-ok{`z} z++@v{vDVAjMzOZlsBc#*#WuqsPrJBXY$uq`K8^snUx@K8dZWpZwUqOWjpFIyaB$qe zV~yqc#A=Tvw7G%LcT9!+kbZ@#SM#9U>NHr39{_2dpbGtR)JqkfaAK~Ci36SCCPVW} zpVlS#18Eaj{6g?~BXjsLch5y~L}Jt_wu>)s*do->{(v?gch410rDKh)R}XC&0uo_+ zj(SJ^fh8qN`~O@#({#TjK6B3JU}pzr87G3Q)j-0k;ImoZX>SzU?SuWYhzD)9J9uHW z9%qc&lZ)H8u`k70AR7&luo}h(QR$sF+pxyti?f~@BI2tZjnE&g*+zlScNf5i@4v*k zDaj2jS!n(4M#;oUDwelqv4srR*uQCr(V>-#qGdQl2mc=Ee?bD#Yi~$H(VG>w)&Y zD!|83HyV6CW)40cznKe?Z^8+`Mf^wgnz*CE{`g{KJRX#EFBZ@*L&Id?^YH~ApDc0^1AG>yt>i+LLO%rY zxdb0<)!W5Nnbau@JgjY9jIBGiQ)4I_0tP;xFje@7L%~p#iGC>gh$F#7Z_U+|FIs5d%x_8x1?uY21kcEN`e z!Lg~mWGx-0Z<1q(L(>ziW86_TG)xcqc?s|cJ{iWJIiC-T`19L7g@Mlwu1xOj9JFwP z!LILOle*ogY~H-i(-JpDt_u@Rk;qh}UoT35*};{`ySFc}l^Ss|!Z!7Ic*jYLLPjP} zWkt%%Vm)2=Zy`S~oq74RB_Do4V~-(yn#w4SHn^VLksPlorKeDY)LO(*dA+JfW*|9StQ)1gIb*-Ql=A7eem%C*Zf z$&bB$RT^|%P@L?Pid(H}r@_i~hH6PA+_;Qg)m_oF*RMN6EA-@o;qubqsE=dA-hiPr z{Sj_o;&g{DwG56&SnTPX`K15kFu{N6{^uD!1D~HEJ~zt6T9)?tM$bAm*DAY!iO*c} zv5SmW>s9v!DW7pZV^Ji#Kj^0%W&LjhpP%hYK3LyHtl3U%sn$z1%AM!J)hsq!)jeEK z)e?DEenkGWVQDSS9|nBh3_Om0*=5EAAm>%dpSXPN>MVu=B0e+82ZdR1&!Cu`M(`b+ z&%QMpTc=j=x90o=e14%T`QYq$zgnpl>&qkJd~ia1*1yxYkZWM-1AKnzQt~M^THA1b zwrb^rW#_~B91Rg59k9dG-o>)xt?T`jYrp&V80S;3BC+oJb{PRO*y&{tF>dq*eI(Xh zKkgww#^W63#C+7h1^9dj@W4KM`7C>m5#(dwjgX~ce?oiP#d>$I# zcv2NAikRj9wr=Ix+dq3M_>}grlPDESr7CJBP?Dx}f549a?&fu&b>ggfu^R+FM|ZJz zz!3&2wcPF9M{3y7Q6F^^kgjRa1AIQG3qB$7jV7v<;6|kz;z%;B(D=qV>Xl4J_eFu0 z$9b*V^;c+FJS*_|+!^uV`Og$wwnfFZ&_Y(+3~0^tOuB!8{Ab!@axsdq(1Ao~=a%Y5 zb4cmXcCIFmPjds`TP(JVC!6B8;B#{hJ|Xr;vPx=t_@@Z9Ka!TKbh$S|;Byanh>sMU zOhT;E{&2xDbLrxLXYl!F1%;1@^t8*GWfCGKR`PL0MMQbVJ)_Js*7vu6(G+}8U~I2^ zE4v6#M=V;Vix2VnG2$cj3n5k)l_%KYlnjWJ8TA)I`QfOH3wD6d`+>*dgRMnadR^jL zb`gAx%d)Fn`@PRl<;S^UsR~1i=RrQsUCWK5fbYm<%a;MO;)Oh7j^OheRen7BwYBvM z>HoC%s&$5xdbrqHk02k;z?%h)>tT0MRSZ{s;A!XcL40P4el6rf&8;*UCE-IouJXzV zd?rlca}!rjXceOrAW@1<$H*+N+N8M0htJ>M6(%3i3 zm2#(u7~{gCc5!?}CK**Ed$M<@H?a_7%w5SxuuC?R-yDdPQs1$Ip-|isjO2z)RP>4)kiP!t2^8UN6V4LiC zba6JGAX-mHK8Gkn7Jelj(Ul97-K6}na~`2bIqlxj$;BNDSHDkBdjl(GW25)+v|m-Z z_WO5<&khx5bMk+HM5%v>Yy8-abqo-nxb$ zK9@Ehh7Q#zK!!v_=jhZ?JK#$s*u;mSLp2JJAraBs2lxya_;CH|RQFN)uuNLC-#BRN z)_M+`J=d>E{|q&5dZa^V!;5jgD?NwJg7Oo7e=r9hX;tdwo7CmOl_2?x&TbjWQ`?P} zo*30^n=(L?T04!}UK}OKM?_W87X|j$oL!gu zhXbG26u<{XGhuy}&zqMci~d&3^za7h*J~~nK8?~|2c4YQh7F9CeEBoJqmL6D8<6BP zJi0qUCnvUn8ly1R_L47u$|Xv3L`ijsJf4G33AerES(k8~Lvd>Vu!Or_!XP8k z%nOHWzcp7cg#26pkF?LwFu71I7eBP^57#ice=v0Yx|k6kPhC;5m@V^%^qpVW)_Iu` z*YjS;&y$zn~k3PGn^UZnqH%go^j?czik=yc<)hYt6j?@hs{af@%tc?|%2 z>s~4^0jP1GZ^>;uO6vl{AVGd!%fN^4zigAURjf1`u_wrj|Dt^VC56YjGaRnFkm2Hw z8~I!Cx!(mJ+a1IWx&3A}^40pku<3sTI z=OjOJ^2^>!kRshq z!K3{oBd`f6a)M$#f zM4}JlQXAB^C=DRn5}B6P;^+eSe18{waJ9QyNBXYq=4_O35{7=2sW(fw+Km$FROp(! zIU6OMgrO;8>hYWpyy7R|^8>)c&j;HKInF`jjCv)>3-r_Z*O(!>IkKR7%l{R$HqPb+V>k<$wFm1}?S+ay1-9KW}=I&XW61|gjW>)dMM zV0A~-?T^KS1~S$}_&)wC1!#7St{|xVGr50`qt)A@SidVo@nf&@72xysg3X8b)r&yF z8w`sTP3z++{Zm4L*p)PMA9Af+d*{b6n-4oK8lvuhG#4;!_!^($Mbj>p7_ke`EZl{sB+)Jh4XmqHTY0IT*K%} zw|nOz=XgkdURxkOVto(m--K$lqAaVdG49u;u6-xBn8QS-o|}qZ_5M`|O;T zvR$!11U}!yz(=m{{PKIm8kXc7-nBpc@;e{Vpk>|plUJ_2uQ3H5-k*pBMfiW1Ps6^U z;`Tas-lXw__h%$kl71g|2Xnui2WAF72Xo1Xl#BvoE~VgZEHFa&4>%tjf!z~MqD#rJ z^xLgu!Lr7GEcjmHyoV_w4&4?bT5^^)sXz0ugl&40yxj^PmXYe#+t!{IUR z{u}B(lYYIPxqSAE@BfdxHyoLY(xtGU zVvz&7nEWJh#^h~e?C@dlcIpAw^hV-y z>EUCe;R(GW25krE6Me!cK6YMjNFX}V@GE;U_BZ zp?>%T>6oTIz~@s7;G>=|H;bLlttBU zc&2r1xhKo)aER+bE4xVsJ|D%vhf56Q|Je5n6dvd$?a!@aXr^0Sm_ruAhYL|KC6QWH zuRV<}@{bKZSG!z($~Pe_iW!m@F?L*rV%+U7=JZE%(_F&tII)eFD2D1KvK?An z@?PX@vi}A6{EHd*1pS{~6qMSa!mG7|I$T9qBcW+}pCN30KS4>U4Jy2PdNF{8z#a+3 z(i?F-uL7ShDu55sc+?Hf+u4UyNmD6Qo8Sy%7cA%?Ssv+#?1Yd~wK?C)6Q4_mj|~4T zwKe5`Xvv2M!Z;iQH>a2h_KS z^D)mxgU?sbLisEe9%JEr%nOl$&%eC%@Np6pmhypAJ|_D>;PbT%d_4YP1{n&{|FPK^ zo1p+c-_UjVkbjr~9IRie|08X*iIBkO+b;zl+=qj6=-#oUbv|_xqMwfwx8T0CIY+7B z^Bq(031Qn*m$unXx!jTA4Fp@O9h4}+p+>3P+HS}z zmJ6dlPJH9Mf9IuRcDvV`goihpC>Aimy=J;-(HkAhhVQGrL%Ha04?b_{az3K}4woYy zK(qlZdh>$c$^JWh1ID9;p@0^QkdI%1&o5qrkG!8N3L|hz_H9D@mM*X5OILOEE*W_5 z=Wq&3xzg#37^EVYM^ zrO5D1hILV`i&k{=rcb+4?eF39`I3Q zMRHks#jHUX)eVh$Y;`f_R!K@afx zYv6I#FGqehmpfw)@==_`n6OWt@Kvt;&xZ;MA6I^syR5sAkDbvmyq}jUW6^&R@cD$P z!Y8XMy5W52B;Kr39`N~;Irw<)=SpWW8f)xtJoj^t?9E2(yYlw}@cE2FKz^JJX{y>k zG|OoD5z1xCWjXNq>}e<;pGzou?P*`dh~r~~bBMn)_xoD#(;sskuLPgpRr!F(_Q8IWt~s?6hEzQtU-EI|W1P(K#o0Msc^WIHfS&i|?HllU z=Twm&l9&#Tnkltc@1!(*MiG3B%kl-EKbb3hT>rUL+(RyqxC@4#CQSYf@;S2Z!Z+l1 z!8R%EYDV@i_`Dx@9Q)jEKzM4)XFvz}jEA`1bZS~IH2C~=0et-DUpTM`MC`*gDg38l z=U+#Gn0@$r^Yl~V^EwY7I?M5+Nq$*Zi;n@9bt~8Y=GRGnN=V>9*;RRsaOpo`>C`$y z(N%e8aOv^ZH30FsBtD|?7q9+9hhSu#m-$R3(fEs3|KZjc)_Ixn*7aWS`Hd;@X%)AY zhN4V@`s<-Kv6)bmd4SJIjI zc%p?0Z`r6ZNDf{?eCFU&DeqKkxY?8fMd&Olz1#g$+-x!kMfg0f{mp;xf=|0x+ehA* zM}pW!wO*Jm1e2mRIlnlV< zw=Ri~?4Ro#VDK1X>H^|U|6JwR!j2r8aO@@VApY|=zda>BqW`jTXpt%{!5P=?f2z@cAEI@aY`Xi&@@J>0Asfb8n@{zcOFHeg}92 zAMftMeNl>MoR9Il3(n{5416d)7Pb4@F5jru%G>dqE`=k-$IRb-2|jc1Y23nfxE9-K zRZ*X$rA##C)5d*Vha0imqdw}B7<1bqI9b@A-=BfcPOWiJ-v*hR*LkSSIRp#7Me8R! zr^AcEG05;^!6S;6IcHy51LWjI;PZ|G_|zI(T!0Xr-$JRsZReD5a|@By?Zlt9Haz44 z#PJ-WZq%{EXA?o!yQo@c#Dn-;f{z`aFBRKWbaz<(!Q=C+WR3f1?O^Z&eBL<+A2hMr zE4RuupN&pEvwaKKQ%1d0(`_{PysHa7yT$G8MrkiAJg6^md^|k5YdRkCpZ^qiw5toFA5Y`h0Vd}b~mIy|M5o@JLWX7e$8_2T}|pD~vY zFG#ydZcvIfOEp$DA6}4lliVOv7A$oVcndu`>ev9N|W>$+UMBwv3 z8Tj!1ySOljHLTq(w;IR-;^qj2q5XA6-Sk!_@SRlg2XnOBW3J;(#OE@|$FE6WLH^^yzbgN26dQxbJho2HhJ(=3Y#Vk8%$N>ZYW!6B$BK#`xM{@Fem9)Ca{d1(--R4#K6A!qiH-Jx-e&XqjXnTJ?$ z1%rPhQaBQRKH|eWSK2wwJj9BNDG|8rSBa`p3ID4H`f~8u1D=rldAnm8FZGxFBRKNI zeEqG+C*tM)l8?ELcMzYMhYv;Q%Vj8`7T!$_Pr}Q!9k=;U)fBd&zGW zh`(|aAHIwc83XPX&`gNWCHU+X+q+rWW*T?b8lObi8v3;{5Af-c{M@LP>&Q^B%k;Mq zVCCyQlr2%)CTHF^`n>@%6vm=V{}_i~e7)xZ4p}HNa@nVgJ|{>e7fb(Ii`5bzlAl`j z27>SMYNa#H)K6;tCvkWxuT!QTX6JM;{C#2uiM!Pp2juC-99Fl`L&ZUP4K5v|ZkMy6*MO1_Y zr%H>?-zQ{fR)Sj$6#_BWTfKCAM*axxNSO|Sd(E7@daqr~Cl_#_7vS^rGw@NG#KBmgD;gJ|^zx&$g)>@=z~?Q65I)6DLm;9a za;^+%YnUdV+=~(T{PHFEIQ<#vOeb7^h>a(mHN-4CKjQsi$>|YiI?@njWrtQ4%Lbpf zP9ynvg+~}@AkwhVx_r_I(8}Nl`}3=80Uxn|Z8BvA(~5WK@=3Xltt=1%`T5oP0zSg{ zJUBjSlb%?96su@E-Z7FeA4UHsyt`$hbBHV zl~1(zlv>x3{nFGc)b@bnM>&JV-tDB18{~Yy!7JvRU5rmg!{Hf2Qq6frr6!u>zTXgJClHvaSAP8v^7FgXP(DHV$&ybR`B^y}TYjDil)-a|GEYOqp0$dlnY8(*Lk4?iT7d1WE*&C`#-=O121$&U#B zsJaCcq!Ap$1Rsokt=tsV~k&frs;MezyBPonrU9ajyHaom;J z@z?4-XzGdA`uR|9R7Kov!s(!Y{h}a8tO|kJaP6pjr^=0j^NW|K|Z#F7~Es%#tBO$@Y2;`1aD@E4$(0{ogn#+X1;!QfBl^i0yScFWxbma&Sz>b$!zYdKtYvc{_{?XH zeyMzxaDHO=q>PQNYkmLeLMR{A_NejIB>r=_e#Oa8L~hi&`1~vbA6YoORNUGsR>}<5 zb8EcE9#8C`vSi5`gtRB9#*h5NBkS`V`7>q<6>U%iB^<@tg2>&;z- z4^}ALN~x8LH_9#^``%@ec}PB3bZ{vJ>2XgYtB!Bnjlx6g`_Es3kBA?-_NUw_Zqd^)`B~Tg z;;nk}lY~z8^0O8=LVkX=8>(Nyy?gTWuW z3mh&#&--*{^Wk~Vlzr&*b)ZWZdpPmI1!nv_$pss-KXmq+Om^Yh*){FYp^e}=o5lOe8kou2jv zIV+?9`T3Iq`0S#H7G1j_^OMD67bUbl2+R-s=jZ*&>uf&Mxrwd{%F}j!%p(`FOU|IaJeas{H%B*20ni}1s}mi-anOaL=4(< zJ0(uBY+-70#gf;)Oq3ik5%76W*Wu$-M&g%qrDC~0wD$2k>LPiB->7>Nd-KLehJ z?7|7*wmAi$wkVviAcZ51a)p2iGkzKVjQGr=`KX6HvJXQ}A)@50)_J3YcS{$u(5d z5aYLDj(%mngspZH20rg+QG9IQCpjI)H29^7H;V_&D|_C_Mb?G8yM25!MG} zf7Ze0&%2>~RL40f9meAL7$H1De*TK7d~m}H$EJ1;P>6lTj}~rBZQtWT@LA!u%bo?d z&wnT7C(HbK;`70gKmUA4kF-DT`Fq&;X_5%Nke?5R^NZ&{Gz;b9w?9efNc$7cCynh{ z-}%M!pH&Fu<2XOg`XyaMo%@v!Kz>&4JChNrJo5Z!U54`U%TE$I*~`yb;0Qh+G6NsP zpLzL&kmbvt`=hmjA-=JwyhIumTgUQ6^7CvK#YgPk9|WJ2oA=ct0-xs;z^8q)T&AL9 zNPH4edb$;tEalMg8R$v-UavRd0aX-x5OI2uUS~qmp)!-oBM<){GvX7dZ-a)GTgnR) zJ`JmHgO-+u35Qwx%faWLT!Ig$K}8}-Zk+mKelJMBj!TVL;DG1sVlo*Hc==T!KXN}G z2-c@mIk6*qU>w1x%WOVq-c&`{i|;L*{7kWo!3HIZZ+Q7^@?73v{5g%8UhNi@_{>B2 z_$4P%a20<|Jw7RAW`&DA_#6Tc*DoPIc7VeXAn7MMN2glXc(nMWE;IUol}gc8@Hx7) z`S7C8KKrA3iTc*ZFF!i=C%vvI)5-iP_}rO+kDI^Y&7bFXJJbC6Q?lHqTrYrs`0%UQ z0{M~Jr?O%?%os$(aNxu1vvEF(e`x3a_|=OJP&KrRG0x|MqCV}zy#81OAE92G$wNT8eB!i9 z|E^qw04(|A$|E27tz>`D*mSGfQCD~+(jk4J1pikH_u~2!4ZD+I%yqmJeBMNS?E3ry zlQciJ&0*VRRzlVOmuqEaidl&I{A|QAg`1xpZk>9(^gc@XzdC1`nPMK`^P2_mxlyiH z(YfIIb{TCj6gvbd5*sp4sdcPm+z>^tVmEq&J{lKXKklIohQW3SeBRd8;^SNCGv*U# zhr;g6J^0m~*7uLRocj1c9U5r3teg}2m$CRQm?}Su0<9^%!j)~}!RPm8v3z{8qfuu< zmrokGSs5IdAn^Hvd4P{BHx^ZABIWZ!#(pAkv@FG_Bg%U8rRxw5Ym^jRQig z=K@2R{C@j@=0T)R{@9H=NCTz;RrrK`H9av6kcaSJ}KpA73&z}=lR_TK7RSp zr<0IR8u?iT99w?q{*G;*W9$!ad#+Wbe--v;74_&LKQ92DP(FV7(WkQl`B?=VAwMr@ zM(_#RA9=eeJ_Bea@^jMbjqL^x52~RY$ zjD4$42ZKa>Qm$Vi`}fQpY4CZt5W&YUKl*f5V1HHtN7$cFxODlbp|OY%eSB<#e4v2* zJis8CSnBD2&He%TVbgFva+~27BP};56`wSA=t05(Zt&UcF5=^q@%5dcMu%4UA2Ai5 z?Celxf&@N~1CR87RQs5ClpoHY?eoLVmt-zJQNkdi3ao$j<^kX~bqFa3F`^^TuTIqh+3h z`n7;hO8HsI4wd*Mksm!e4_f&L~VY**-6-4FI)ME$Wd4opYQFu^5d*ukN%Skt`n3``!v6e>B0tP&QFMK|{zL&jhF-RW z{Ji-xM1E}NIp`kB5ZU_mYb6mN@cFrEG#|fyNjhuSua&^z`t_m@V=5o_3P#u!16}ty z%s&jjjGEFpT%G^?q7R!#@$pNKHl5J*J)BP(O3sQU}DM8VvR_|y%Ix`2;Cxf?W($|Enjrtnd($c5Qw z6baCY{w&~=QSQd7>IHz$BU4v?ynOunrA;RRpEUM)Rd9s-Fedn5e=1apXVrzoodcUx z62OQ@z<-XqVwrM%U&-babA$Y_xsQ(>&)D#b&jaC;Mt)WWNAP(G@VNWa#s1+k{&Tqh zlU9CKaQ05B0r`2!%+JRTZUp3Kx%2mMKB?qqMQ{Y4N4tJL(T5ky#5cnEq?Di4#W!B` zQs4>KFTVh3(|N$_*Q($M`T2^u%g6N(c@c&5)x-nqm!Z|fVSirdksp4MnPTrzx$~$l zuHhmE_-rCU0WH(|gFC@83MqAL728zeGjrt!Z9qB)J3HVLVWYL+KrVxPqVZ^%Xm)2r zmHyFb?e=}tVq66qOoP_1*B8S0M9YuXE;Yy}d-++#HkJ5XF5=_2(OPuELF+~ylYL|;U zjm9>a@6?Z`g>EsHC>b@znll_8rygsaZzMjK;3JtRSC{1qrfL{3aRWCx_I$Z*Gts!Z ztfgE&>#TUt+2@R@e-H54V#H^+Ro!p8$EV~-FLQ$0J{k4T&J8^@RR9s6xx%MaZq&+7 zZ9k_`nvL(}sA*TM6>k+|3yDZRqu%foc(V&{k5fY+rOPu5uz0h)b&XPWlsh)x`4w*Zd9fnSswvwY}?4ZV4U_YKITm zPJeuoLCTNM=ULUMyZc80pK>9BkGgPC?I?XZERIi_HEZ>7fQk5Ag3ngtpb7_Qt5}6@ zx%g5)+ZtZ<;Q$?4edt!|QP%%P@Y$P!PvaImA5^AQyL-x?iMD&OOO|Icjr;I?P?xrE zG91hZd|F-bS>PXvD#uEB(Tx2=-OcG*Ttx7B-jwrL3?&#>Jowo1FVDCHT~nP1j=8QZ{eU#=HB zMXDsVRV=|pjN7M{hR3(DgxMRgi8VSpxzE&9Q;A7Vxv&BdPvSo!;4yx_qcAU(pQ?l!i9SJcC2ec(bgx$I5kFDeU z#WrX@;4=lDhqkKiQt?)u3bAoT^AjpEL_cm+UsWnr>+!oU^cU`rkF5KH9@8RZF~mY{ z^q)Gi`U5N3LaF{y9>8uU6YUo#az?IKyZJYse@}%;}6)G>m z^$Yupopwf_i}WkY@iO{YiH9LRBtMnLL9NKQVA&w53@=VC+Ja?=Jn`JJcmw$SCfX|Ns5AqWlH`#uJKh?aG3HOs8AgC7^s zlMtSfEZe5>AC*pq_t;sl&ns+U5kF_+3o5dO!L2ca<1@K`-ebetc#So&<7cvswZ`x( zdkOKGfscsKXZP0@@%ik=TNx1Fe*`}Nt^ht;t%G{|CTg_7eMS?Er5{_Pi^2FFYP7*O z?%i8@xU~%cKL36SK8}95_nJ0u6kFBCL7VGWJE4L_>DQCis6V`bhC#i?X6VI zv4#b?*e2QOHnXum92^e^ck;H&0H3?0UzLOXtpu`r}B}byF7J@uH&A=eVUB0-id=i3gr*)jx3Qx zo%q8nVZFfTHFNNBPPT{qFY=v>dzt*r+V^nqMZU9jHay{>Z_6pEq%M1O~z(7W`Nk>1h5>I0K?#68J2+UvN41> z3G2WNV*_HF7%-0~%O8oChkSqUd){;2qq85WO7EF^X8NjBs#7{2{r30w{$8$MSaa{g zGxN?~40l=F8I7i^27NEo5Nfio0c^!rh;)k%Rsgq zn{#50M#Bl<9gl`5C&PYEbd8^yqbKS*m&ZW+AiO>00wnPHz5FttrN#v5z~EftEq>v; z{(RCb1$q!IFPPmnpFf-}^HCkV?cS5n3F5Q5{M;!VEC^NDwm&i z?8Wtn&mC?)6?{IWBl*zA%AH27P^eYP_}MC7HXmtUR=L;)f46k+Q$H1WxP8vCR&B2> zvJ6)gWa}7)(>};?35pZ{kV86~u4lW>ro@Q&%)zJmaY%e$F+`lJSyC7-d_6O@0 zzKa;h$aP$+p!dD}1wLC0d>*wa^)zbh(yvF;!8xm&!;b*{`mcb8%a5%8sNv6Y_>2fK z)&vgQf2rx|>fz7bI(G;YO_T+w`L_Igggn3b5uyW+BCxSQ|6r`v)2)Qk1HIC*eA z1&(_6>PnnEnC$`w_*~4uNBFN&qt>pXy$%^hu|!+EYt?eCnoxB32P^dJd^DU6&|b$A z`R@5SuSgFEz2RWPZH2Lg4}6}?Ej}pgp!qQNuZF&m>lg2T3VoUOKLnq_jpWlZ3N^Ne zK6=!dKa+cUxv79mHywR}f9W;RD;S*5#5@_YGxX78LG&}Zr^k*IK3hcA!tgJ>X6<9q z*Tx-uroi(|v;C-iWIQUP$+lH)&^FW}a#efLr^(8&)6%uaFsRMxlis!Yq{p_#lirBV zpGE=k*k{oP9WZd(f>4a>uI9{OMKMOM~M&PqTyHf<+FO=bGLACC*bp&3-H1I3mh*Dox(PP;_fazpnU|qJg?x4 z{Sf%M{o^)Wq0zdQ=k=mthRL(e?brFlc} z`4E%xLrol3!^YC`AVf-Zbg(TfJ)=#U_d~yg6~ll&7jMUgus*z7YN>9YV~$|iYlT|{ z4vcq3zcfAVZHO7<=U;cghmP=+78z{hDg~d@YlT4;IT1}q2~q(^cuI@RafpQn%TtkI z;$e_q>x)*Q5%~OHb47j-Jp&I93!4^@t)22-t5{&gYN5D;6|z_x4Y>Ug%Yz&br-9`h zAGvbYAGUUSXOr#;>kdx3$5=blVADbA-u=hKr(o>t7$x2swU4d1`?bQOW}3KJ%2fde zznkaQj@%3Ll3ff>o{Y5;4f$3>{{^4JJope4>`$)O8tBkmLm0geB)rdQgbq#G!xQ>0 z^!*|DynO~fdkv%dXs_MCCd(#)+Q+8K-CC)M91Ov7vGJ&3v|9-TYHwr?o}5ib*kp+| z%n`(A!cK?hW1fK_T<(t04mO==FHqofbOSzIaN?|g!IMR27mU@>E%%YpA9eq)0?#w0 z1|q>G*=AMKlGS`vEh*8ou(3$o$^1xNbBwY2YJDF=e%_I{47-{FRO^-J69J6B|=v-<@;gy)%q z8vN3J8y>ukf6EVeBt(T)Bwj|fU_44VbmCw*WX+2yt~bTMJzS3)Bod(^+&adS4d`s( zD|2vSKGAr_0`ZxH4;LHi-w>&VpLQh@0$aaCVv$MRq8kRE z)a65U>W`x0I2tcdQ z3CJ%p2nc*GnZ*bF&360PG9!F13WyKvRScILE`KB3m4E-(>X_~>hytHMN6OFo+J8iT z`VrCpbAJdtF8vB_e|D8!mqo_)z3tys54D!W>ux*5AwR?U5+7B!?A8|KrSa*kWPd`v z*WJ?L_Ibp>2Q{50)mQzO2p>=g1p6Z(#VG(a&FPrxtEqjwG7a0~;_na=L~e zZJP|1P~*UXrUmg?-8SDL9NY)^j5{tqq7w*<7hn{pQs^=}_BSe@E8Yu)#f!TSU+njj z_5t`z^WdY#KTGymRK#3`j_AL3d-nP6d^_DG@mVbKaVb|SI=5$^@0Jed^Xg3UaqT}X zxAs@qfy(9QBVzyQ{lCq?NA4OdU`}?kRjcopt3|nfTG%&^Y8czwzzFtYHQoq?CLNZ7 z1?CgX$sSLJS5)uuUp*~cnAbzh?j2!LKzE?u{Dk|zJeTr=__@wEp&nu>p6{=85Vb34 zY{g_Cn$z}@(b#<pqs=+DY-Xx%DYO#7IEjP)`a-S&z(!cdOx z=)oeomCMq}N7QdYem=DD=L1Fa8^iH{wt26}XWfbF?I8l6KeXj1 z!1Tc6K-rLF$U*cm!fGx*OE!5;2Io#^))OCFek324=}AL}F@HX*8=O0ZBka!~G3b}@ zU+bGs!tL`%M11(hx03u6_A$F|8&f16$r6WZe$*-uge!)A$@7We;xRkoN%#78Djm0; z@s{qr@pERrd|XN zNU!{4Q`sOtUkp4#zrY8*8{*)UnwZoUaQl%@d-=e!+DGxw5_~pJ;{l(aSU7yR1%mLn zmQtOSFyZDOBd^ZX&xD2t0EnAglUN*zTxBHu{ox_ALcaW z(OQt7e+N9a{o(Zsix1`TE*V1aM%)9OeB_4;75;oy7p2>QBjo4bFBCqz<;U2uy!w}y z&uQ;f>|D;;SLt5vX-DDXzy76!Kk=5|{qze>KTdsYO}W^um!rYwHQ=%J%i32F)B1ceJ!d* z*qPD4#K82d)8X>-UXmXvHg=1ha*hF2pA(MP@`-=4*=z^>`iYssN2&jalhRN|JiziY z7m5V`@%ne`q_j&Do85Wg{u1lTg8>e>)OdRV*`J^I1&5EUZ-_0Zht#7`ZMU$jT;1IU zy1hobjPGK61@IAM2Ga@OXi1a!&qmW8zSH#!e17uA;e*mLjktl((EOhjzC!uXB%q9Z zfzLmDR^j8PUzJvJ6${{#V~&3Hd*>Gz{;ao_5uZ*3AF@9W`C(4Gjb==#iowSr^N=6r zB({N{(pEaE66x_peBKkr=fJ?3p3pZ-h~jdJ!Dm^1w4?++{|I<&|L&t-{BZLl7GHMg zlaIlN*R-ygW&*P$w!}xx-+lPtc%qg#0X_kw7<`2MXikav%){hI>Q}jCRA?CDE^XeW z`gHhL3_j(F*{5NMS_=^I`EVRQus;r>Xe%9qkB}eDDS^*F&d1@ihw~fvFk~U79g4xn z99-L{5T4^&`y}Z=bzjtd`f7&MCr#46GcI6TWl!gWyE+2=H+va9!nH~gcsjB zv1&r7DpC|JdKl!?KdQyjv-UQ_;p76tIGyS>+5oWleiw=fKL4~6%m*tx911m(0abiJ zDfujeP$qAQY9TLUTiot~67f;vpJn;M3Dvb~uCQM$Hu?D}=t+>mgseW;=ST423F^y z#3&aA$jfKzc+^Gij%loXSmXodQaD|K5udru2jx|_`Edj%l8D>id--^Ah*6_~QX?mk z-le#>Zvx*DpBtM`p!_7}RqGD*&VOP=&F&ZDio{v%n+eM>f_i zt-mY#mxZN{++X13b1UobmPHr+Pq#6!g!m}(Bl#e-vG-n7edzGHto{-yo{i?SdjH0) z!@;eT?mc{%SL6rkR4|%N=SK-zjy$B@VtZ)$~~i=I*I<%6j3#2k;iXnx~% znSbUykCHQc{rAktpJIdI)e!S0(Jj}#GDjF|*1rzlqZju1#r+xaS@`kc=9s4Vp<=df zKC-bp1|6X}q??=J1U}z0Cq4}W`v>{rPF%F1uPcnqV=Gq=eVy(<1fTECgHNTlgWZ*l zwIQkukFlo`O{5u~O-r90@mYWmLgV8hV(fBf)J+S~x#WNRPT?>MRMmU#;(V#OD`0eA28a zyYJDT&+?L>`*(Ewj`%D*eE531Tq=#Vp0j*!{(Kzy(Q>hL`6lo^@uBq{w|*sEP4-^c zpU<-V=uZiJ-VHpw5AVe?FQomdZD;{7ZbOe(1G7t0D5|lZho|g$}pR@6LPq7}X=jJECfPEaA$sxV%4~ zOe`sT_;kR>il5h(p<3~C*3&QY@81tRbiUPIt%g&r)|A5Ie5#pYhz*D{|Bwe-3^72>l1AItuz zD}p3QY1>;clb`4Bu0`c7oF#{(KF93!KTx}^#sb$}rvCFF@3J>BCv7@C80U z*a4rtGL9~QwCuHWVvWZ>rn*%;sJ9!2a$a?u``YV`FZ|En!L!}VtLX@5GrCVU(DJM+ z@cH39_(=c0Y=Ep00Z=n)aKUONZ~BQYc}V~6GC*QoM|gh43dPi~JL{=aKY!x02;pOa ziOoe`I|-kdjbEGM^WIqqA9(s4&6&tCIcWmK9*`7;cS0xOBfOZNQUv+=5f;pc<4Ge! z$@%ChMd0&ebMUDh(ehW5HE?icagPSkHl(Vg`nPsX%U@&Gz_FFxGa5ZRN_Cx0{}TB8 zWCwgIwSzJxU$h9ej<3q_vPa1mErMnF3w-`5@W}W%U~UxI8!W;L_Z4G08|Kf8`1w|v zTIBNmT?7Qgr|#hc9Ps9TM-;_H6u@Work1;e1HmUga}OU%;n6X_`v{Y?T%&2}$UUfG zJ8FE?t;)|`%x#f20epTc@8J_BKk*3Nn*7`)93ekHwQ%^jh90gmf!^Yi&7p@uaX`I&qOJr1X?wd-7*5)R+rheEdw2eiTG7w0ZM zJcRbl&7|y|N6@5jF(RO7K5LPmB^_OVL=8T_l=tD|5*u;A6_JU3N>MbQ)#c~b;RyM8 z-wpV1(-S!y5|xgaM}Sd%^>$2-sDro$tKP=O0?U#?etxC1#K$E&+9XIp9#W?AOg>gCU8EhcB#D&L~;2>p8h!k>>zjD+V}kxxAN2|Ic4 zZs7<%ADBr#Ys!N<**2&K*{Nq45ISsu!CDW4A^-BQw#${v#ivN3spi*k#wG9qvPWgSYoRZt@xMVR`VW7;T&^*xs_vK^2C&Vbtre+T|*18I9U@ zvtF(mg?&~j8jp+3a3fSPr@Yxq`>FDCi!`(T`3o<)uJ#!Y)ieN%Pz;3SCkQ5L4xjLo=;N3;PV*^@G+{ba^1kmo(vvr-@v3F5+NLjCMMfZwM+)q zvHpZHnDnkpoa`w|V;ANnrbY0c7s*Je1;x7JW?lq7dmZqplpkYxnRxKo;q;(J%0;`u zNPL>;3aVA9S!&BFpoMEfrS~e9muVM2pB+vQhNM~6=4BEfu64r!wM*S@j<5<@@YyH% z;e2vzr&!#h9m@ry0uw{6QXERGk+iP2!TIFaakqO$M&`uq!?@%eBpQ`aUGIDO8}K<| z;6q2B!W5%Ts#&YzL8(z|V_r7#vZA5o-wgl0DNcnQcg^u|0E2u!8ctDjCSLyDzK)Rk zwAb$s8EtOY<}_XK`6l3ze4t(}^l?-$;1JRy@{2;-cnO2)+vO&*jBu@Tm|1+FUK8|j z^f6Es(&HB;o=nYGhU|RW8zaYvdv)jU-1Z~z`IVW&r&NCVWsH!r*&HC9D87U~&87v& zi16$3{aSp^d#}Haxpe_-4vD;qp-=%dy&{Mnq6unqD3cx!30Ff?L(@Q&t-x#u*?wyuPwle8zH>BZu(_ zqbJ-x(QvgZ94tE-UCWtkWVwbH;F;jXc(p@>(zXggokZa11dJjM&|*sq_#+LWXULquxT ztRT@iVlUN-@LN^3(~i}^$@v_cqvP($l#*0oh$xh9_K|2@vzLZ+O5lJUPql8~g!~*b zFQ4|#fzc@QbY7b5_-sfcmiSDMFU?Vpr}NTe>(7Sn1AN{=d_2|x67wPNwYiuN@w=(Xm;p9l+Z_#Ah@N6jy2$xFnAw7x@&Jqd$Dz*qg3DEnv_{7E4R z`Iq=yE%71eQhV7G9g+`=qgwp0seCMFW!1&(R7^Wx<%bD;PJqWHKe!(yezt|lPBP*+ zvn4)i{LD2gE7C-K=HTNK|3J;sYgs(|qdz6^InDd?5#ckP4J*^s*wHhF2YBN{cl;A+ zpZQv}0Ei7eNMFUNI(MA25}yV5sP@^dUu3QMyjn@g*0Zo=K1#owNu#_6h)>~iiXHkS z^Wf8c(TC4locvvSd7*4EWoVhtTb%qoQgY<7pj&7*Q|BN)KK^UlhmXuuo4GK-v7Eqa zd`N!e?m1dJ-CUWP_{<n%$8$B7tY z%g-h}KKPtJ>&TDS3{li#i4m$c%fYl?NY0U> zj>E^-3f;Ylao)7gBk;KZ9$){ZW?_!x!~^E%zg(*r+#2gFIRX~(nS&1+4=ZI^ceeaC z-^AR;-W2phiysjs{>sQkL7Y$2@5$v;&1AwyOah+*pI#n(S~%wv#S5{#tnv{&R6KMP zY9yZ);vduk#qx5-`MeW+EdL&ApA|Q6=dKa{J=8w?+8@oLAd;U&7#|`*>FG8Ppkv3+ zsqJ$NK6jbDg3l{2hVfDSJLHF_Wg<-EvGsOPK`|B|x}N4iH1U~%k5{o|l_Q-1#q(eK zS2>8!F9h;&W${9{*c-Q%l6|1v_vd4ai?vN96aM`P1E28v7~k*|lC66&E!Q`c{ta$n z_=cwp_%+DSCGafqL6{@_7H6eNLsvcBG>d4K-)Z>wke@5&=Hp6Bofd_FLRi4%6+SNW;8lqHCz5X-4eVNA3BEF)%=qkl>?v8@A&g^uX^R!?oK6sgG&rPVZouRRLBeTr<>`5 z0iO}@sQydQFREvxvqeGrl{I5TeCBRGZ+_v^o%W4oK=G<$F8}rBhi}?`Gj$t^298KV zUfJ28sREzz!p+A*5|@O(fmY-v639juxcuL~{Ve!QW^O)C^N-`a68Enve0Zm7CYygi z34Eq3hz}Dkj7w5NLjHWh{g#dmQ^e&qoGBDj#R5D>G@ks$Z{m!ugN` zOKgln`Gk3P&5RM`=jj}LmgB=!)70?zCp%Fz_`H_)<+GX_3+Iy^mqvURVSLgw*Fk$% zmtl8e-JH2gZm*Bwx z=1Ux#{XV~~@s)Xg(Z$9lQBq}N&vxbnSH9bqg3lLpz^6`wZLC$~ExXF$%pc>szx>|R8MI60->h%h?JEvS4v(T+vRKBn4|L`cB3s%W7q-8H*Onu~+ z%E!n4Ag{o=V0{d#o3d9hre51G@cD`v_+UP*@rW|grMX({kWL}AtPg|c$#QhW9bvwx z`NS^~}@xZK8m3_f4U96t7dNM~t>xP5$rH!1k2X2wU$G9XLf^Hm+2k79`B*t(R0ke6KJw!%V`!^%#AgmZdj?H;e%NdsQF(`^tkaB0%GRw2v5p_JoyIPTt+3W_M3@+= zZF?rob$%$0*l|}mlrV@dc3t6x?3mJYcU-N7^#-4>S@_BiPB7qS7S)S&YDA>j{7qgz zj3GTZznlB7IQ9p}5}{CZ)y4FFoEJ~Op?0lq+L(jS-<;Web7dvE>d8Th#7KV#H0#?Hg~P;AJJ z5$!-dlHT9RR)Z^vuKC6o@a&9#=EjJ|v|)U@m5h;6S&2Iy(!vcd-+QUw{WIY6F)T43 zC5aEHv?k%>KTkpo$=)Gm13kNCh+;0 z1^9?WUD{_dKnY-Y=wQAvpD!T|kS4`;pZ6K{6!`qlGnYN4>fiBL9B9Df zAKmJNL7c$nXPM+fs)XZ-=-3^K{|XhW(kgwLmBzm@hG=m5v{Y_o_kqf{D4mecQmK`PiCSqFg!!RH zACGe74scLNxzvd1+T!aMO0AbTZKsEIF^oFqiM*#Sv^iBHK6RUqu+JiVZm}&&*F~24 zAg7pYp25><52P%f{Sg85gl+LOolcSCSt*hUMXtEr)(8!kEaKA%CpX2j!E8phLHu0HDK752$s&w6>#lk+*2IWkp>z?}npPwf_Xc=209ln2ukDbdl z9qQkB>o46{d{Vk94|93~{qwe0&u(_X`TTOnhmTzG;ubg{NB0F|J*n~$j$B>y+MGWM zd(VD9gZ#V?c({J~XJ6?^5g+nRR)t5JUER|O4r{gnsqWIw(aa$#3iPTFB3@ENuC)+F5Kwc5x$z17k(aHRf9n*DqmzayT_s z6kcOqbOSE~pI@B?^07v?08>&c6v&4{qA5!9x&_Js?7`>#jQG@AjA9@k8=Yvbjr=qF z!wI_xveAN(e5uy|5}%u8`2mq)wN$|Y=j@=pw^xpJqMrLUm*fXTii7h$4md|LdUl3H zG$UW&^MTGXAG874jT=hSp-4yL1w%`GygO3d8m89f zEtmGi8M--saJT4H*g7cgmJL>|!M7#gbI>~-^v*BTGjz8Ox~Dyp4Tgvrvx0~6uiqp- z^;!e%b@r+SEYKm2IxjNObqJ}E4GMJ>@GHe8bZuwnZTJl;1NnT_aon^W`tUZ5r#R}o zYjquR=Ox<^=+t+1g*tv;oKwJ*V?n1UU7w5J;ZwwCA^EKF{8P?Hxlv2GGI;*!TNg9Q zN6OC{J|CC-tl@f_x&-9sx0r_y^eOGe3+qN)d@Ns;t(_O*Mu^W1_=wnWxiBB!UoC&W z(!XbMO;Ej;(!IiWFTjVdToi?er{&W0WW?1m{~5=fsUDu&WZm7liiSTXJ|7O^W2LKU zYGeU?ta1=lKC*!de7^6|R(Sii2#i-^yS!iRU(@uflJSw(w!&<-8rzH!Nich-3q2JO%>?)5hH zI`Dbdvq|_+wIjw7S!_O|!SIvH7TVG7YnM;&bK4 z$E{jKNbUVmQ2Dn1;QUJA-y`*l^I2L))`%1MeBWH*!y9>RYm;L%mMu*zB>C`0UfbH_ zm_38dim7$?^5FCCd?26nOV0j$$kAtOL7w<5g82x;=Xf|x!IjQG4q z;UnVb9CpIh%Gja;61I*h8=1L#M~fo%dt(!w--b;IZY$;sd1up9h}kkXpxMbcZ|kS@J>WAqRxQyd-2*rD=MU5F@c8-jCbn zEIIL+OFmX(W{$(jNy)*EN-H!xJmmHII3Hq6`H{8;Hd@TtibZNL96VHJ-A%5qy0R`1 z=e;F)4fyCpdnlZ3a9RdVuX2cH?|V=*+<KCD!5Ac(cAJ$`-p%yTU#KF*RC1D`*i59K2yh8KEbqm2s5x9s~b@mYXRp<(Q> zg7K*65hCATi+r*$GLPAb`2>Xrt8rBdQRJ^`^#bsDGx2E}joorprIUK;g5O`$9G&(C zDjhXl&xY*KjeyVRbik+8t{M;>S9GWlO8l9&rUMhAqi*0?THHx|$&LI1_~e1dm(MD1 z$cN7=m)qFy5}&!`b5N{^25~-+WTp4S`CNAUPJ{SLm)qDcgO9P0e4t;l{=;26ML9^+ zt**^P?@^VXpcq=+k!pp{iT=i2yqNfWSm8t2shnzKb!n<;h8UU<16UkBOF0HN+PZNn z-P`{g!1GL{T?GdePwAi;#m86oFk1_TIzi~eKi0~0fZNBi1RXr6_;`N34AnZo=c^as zvs0_oO5BCX!K*2tC<4DT><`bm3v(s#Sqh!`-GI;6%oIMJ{ex+Q2UnJSs03)m%TAz} zE&2I7H{in$PT8%M*w!A_xE}4b8?9nvn|58&laM8P0q<16BcoNWmLk@_a9j#I9iFqT zGpuhtIh&3q-4V6_TwvoRJuj*pcoiS%f(eo4q&GMZzo;Ugcl8kR^LH2EQ!keo_EBO4 zZ3TFW=wMYiQ9raq#i&wU$M8tJ8iy8^_Wf_E-aBX5MTx<)ecgVJ9*ELO;&FNe0cvVHcB>) zQc-Eh+$vWK74}m3;NXbu7P;1mmssuL;i}Ozr(^THoB0_);KTb@B_E8D*xkhlaaSow z1se0QE%fag~B%UPFhM5g(ynLOAUB2ch&TGXf+ZCH_G$oxK?W;Pakmx&5(} z2WMC|+13Fzr%==*w^|&O-;P98WMbipWqs(951nC&Cd^Co6kTGy$%WnieA#=An{E+2 z#=7Pvh(7D`{2X-52gK~_CljXOBs}^r>7PjSRd!hcCo$U94Y>(*sk|W@fg;vY3&TDVFC4iT81pPuH#CfWh-2wq zInTgHlr_X&N@NrDTC0T|!d2`@f}PR%xw2;xS=ZHYGC>mIDt08g-lkp*K3@zx&-iCs zImYWX&KWatTI=|&?D=P<(i~YAb`DU%>fxpkg#KOPGmpgUucW>%*S-EZlr#)yJ(|hx-+bbfFe#Ds?SC7RB}E}AH8)J5B`KX#!&W!fSo^Ve8>KGu+I!V_+UlJZeTWNT%Fh|f(3A37_9^#iQ}EH! zff1j%o6oka#bcKoX_t+thPsfom(X@%K^`mxA1xgi`23B$n~%$XIia)HbxB$A`!^*Y z?Hm~KS%mOew$E`LSYke!Ixz70vRMS5Z5sZU(22$3!wY0rm;~rLe^0AS41B(vMesr8 zaG6k~KTFR?FGogvZX)=2cb?sL%*W|}7$JPnIX zz*ga-lOqG4uLPdJ_=ga=^KtwaDHr^gP%fccRv9t3a{Vw&ztm9^8!jdipBpzHSN_FUcxXtA z#H83D>B(+Ee#pgZ;ljY@Z+24hk)CYrd^B=l;PbV>6Bz%~{F* z;lzl~O$eVxaTkqJyKE1;df>k*bjFHmql5CWNdGnJp5pM2Q+9?v82GO~9k!yM`=a%o zukWnDhkRHn`HAAAhx;Nv-vB&;@lTZeP$}`2_~_uiIG?$jkK?}phZJjDXjLKywz7U{ zl7m5hzA^9S5G^pQH6l2lqwt^NouY_)rdZEBd8}`yxL7?JR^3`K`T1u^7i^ zshlG9i~QEvsN2=hXQ}rAK7X4f^Ks%I8q6;>Yu)=Vmh?-Lbe%I%{{#8?7T^htf0pcz zTrypK|Aj9fEtX7GJY=80Wgedo4}Bhr{VAavFp}CI9{M~Ke017?0Y2ZF5971V9v4f+ zmeD9-pFBHv^Ied5rP&_^|$xyUQGe<^zZrpra^=HhO@U=gxBXb#i>zjCd zeC>}Qq=_p7pLr(*A2o+xGas$8c<}lAz!Mn%NClJG7lt9yIbht8`EilYMaqv0av}=Ov%}tXL|sD$t;0C_wRz zC^en|W)(?Ji-92$KQpG7Di zU_myDmH0$ok$h|{i7tERF9x5RSty^E+SM||CgE2kAN@qd_YT45_d1e~9SrU$`n7`m zaHA|d_VKcQWk-HKG!NmE)=+c#3FDIyQ(e0Ejz7!52LWH<5zc44Pa{~13}-!5iw$X_ zQ;1N(Y5!)U-bt6?w1+`=IIkr>@HI(YnB*f|mmi&MI56U~h~l$? z`~>sKg7i>)_>NDUMe#}D!lLCT8x9P7-a2>qARH9YAq6H>R$NhC}URS#JzkTxzeD)f(myuq`yaHoaS9^nS)+*PVbZ~)#hO1jzK5Wvv z8jHmp3rz-?sOZ4wUn~?p(hddlAuCjg{A4ZpVaK%2!|RBd<@4(|#K)+Vn>+2s5iNol zk6eyEjVr_XJH7GobaYLNVCEB-qgM@eBxv=++`wbV&u?^m_=Kuh3=vx9lT{G{pWmE& z`Q#J>#BEOwXB0J?I3o)&>+T-MDsC4Id+OY2RJ%R7ds6;i~JHF%*qtzha4CD zR*okf>*M3gWsF3GHo{+;C+0|=8z#@l*c>$CvsmH-`y-q>v2n1}gxusRB_1e-TaJYK z)pG5CO^ok3pyIq2eJZ zQH;2%{qfJZ z;6rgCjvT6B@pL)2NEJX+{*`0nK9)|Gatj|&+DZ@A$A0erydn8P16iw4-r>o`&d%G! z0asj%L_W`J_96p|VSX6TPsbO?%(1+DOszk|L09Y-9FW55tB%4IPgOUD3ofWieoI}X=-+CA-!FLHDQ ztVM$(vbv_Uz~Hev@(J+yfATIq5|6OZN3hH`8>`|G$36$}v63)1A8m~fAeHX@{MUBC zXF30Z$l#QBsb$==+x4Ir;=?@l`Ph77TO#X_cT1QQ&#oUS@Zsx&GH=J}dd1euILHAl zW3*PtlM8I$!7_7_@{h1TTugAfUU%|3=3yc@rqx2lf$6PUT*XTFe)-)HpJyJUXt8qA zE|nJlK1P{2vq`&hAEkTm8urF*~nDS7ab>x1iB;*slvSzqD-pXWO+K98}F z(yAE7aa4+~st+ALuVV9LmMVtoU66nO)rWJ3Pfn!myV!%#%yIuEYaFPIC8=*%d^~Zp z6#$)LQ^vSAIL|G+G=DAIT%iMe-V8jpemT(~A|PTT>xKzsO3Dwl{>VU?yL-h3ZLaF& zmXWaUz~`lo!zYIY!LsGR=k19(*eKWj@>0b_p{9a&@EGGID#3cDa=_{#oJ^AwQhjl{uc6{KzYn zA3~+kw1?5Po3U2lQ&;3?$^N*Pc9NLrxb|mL_Z?=F)t`aS=Q2lrY(8>nN4hVk?h|kQ zVl_US^XDoXdJI0Fr^t`6KlrbcXb3+qtIuefieY`2+deZV(OAyO6zig2aYOLwJq!71 z6&vLv<53yQ6k;O}uq^jNCt=j;j(XSTlO9b*IC+9jJ8ojU-O#!>1wnrLihfaR3t@SP z(9m-02j#b;c^uWBMD;OgpKUA;9Z9+Dy*8PSy1Jv`ss=v8Jmkj_8%Jt#Oqp(czggQY zw+|dj;rY9V4|hy9Bjgpze06g1mEnpN-F+&*|TM8Y?iDRH`KCWFmw1jbjn%N}<)MCyL zFG6c4HufhCK3~Ry_!w@1_?S~VmT3Y^u?rI$O4&@s*8o@3`i3AEIJCGf`R1KDnKC3#mK>%(+9HE8Ma~in12YLQ3@bgZg%HnT3T!H z;qz^*{4~c7+Rgn&&1r2pELV1&P7aZc#bBh!&*gM{F&Yjp?dGV9-UO@4FicN-{nO9)9J6|TekC-}9rmFB653e5{n`7)}vKoH8 zRUr4~xlQE~UOGJf;q!lFeFJ=I4Wo+9f_7^iFL)&N@Cu*d$Q)p+prT)9|8fZN;hZ#g z^pKzbBkz_U-kr(q509ZCIrq^-pR*1Fv+6)t)p$9_?U5gKxBXda#k4b1wNr}J&03+* zD!wk=d;c3$KEg{Wp&`O(r^FFI2$J&S_EBnnD&rq|g{XN53De7ufzRU^&%;9M}_=+ z>B5npRuR)gUw*la9ZjqC*jaogT})Sf{q^3Ur&(NV3rNI+(!CFS9q>G}haDH|^IkZg z2sQZq>8R&nsn08PVyJ z3a>&mFGjt~D_-R}>6@2hT3HrtbM9IP2PIQ64Pzki`6`khn-3Lws0g%NUQK9&6h3zN z%poad4jYTc#;A1fUwpz1_|X1JR;d|#YjX0me~|Tu=2^y_JotQK$Ku2Bz<(ihhGr$R z$BN144;?tbP+-?{sH@Y$M!4^Kr=QY~TKtIFy{%#doV5<)jA5k*P$9K9C< zcGbl|l2JB@c=+T64le*R=Q7!NNmyYA0P*YCjRyJjvvL`4$9xHbcW=rK{Dg0>gY zff(;j#QcOXJ{a9*VkAALEA-IiRId(CGYK^)ly{k-U z81b2V_|#Sgw9IEXomfUWTZq8t-5u~DUq$mbxQ1bsx%l=nOb;s1M-@0TRgAWpwYq_A z8O*5c)T+DXlHn&=a=rxDFkG+uS`X$1`PX$H&P)}et>*B`#M&1#`j}yI+B-ML5w_p& z@7>mO;PXSkBlJtOLs^LGFEn0YHVlkOsumf}y(;e(Yf;V%ZSXw@3cGA2@mQ&fBZvpOMVzNukzJo7bwxY$|Z?yJw^*h)i@|Ni>$t1 zgQE-htan3vsgm(?$ZY-bp7z1xzHsOy5ykLRWQScl7q z8QENZqJBN_=UjelKIFzQOUNrhfxoZI55VU)82E^cf*O9dFea&9e6XUcuk(xo3ps3S zGJ>yqkWp7pd~U$!pjd71qT0j9tE=WXes;MH#;^T885H? zZd`n1pnNszVy7IkRvr;OHL0x(83zq9xqdhqKE&Y6S2c@@O3yvR=Nt3<3$-z8(-)7U zl~M<3j6KqBN}B3@9)o&+aJ3!CX`F5bYS2Z?Z)K^8Ch_e6FnU~p=q`}0FXpC zP7iz*dB~3spRgzh9j!bJ!fFua2KjvX@Cl28=z8SqaXqsSy+kEtt{lXbLwUSY- z?cu0cE;_26ab+2{A*V5AKG?1Khm+Cp+#C$ga8xW89k-ED+?p(QXlJfV&wbQ?L45WN zECBL;L#*+L0<^^_U7WaYV%<+zUWyV$M~g)%OK~FcxsiOVv_jcqnVJ#jBhrec<9D(J z34HDW&oemnDRFOvLlK8M@1I=EG>!E4jup6E|tp;+|jcg?eFk(bDQm`#d~{QuxB zJqtdM=iswrpb{^wGH>XuVV-ZjQN*YTEc-x|4h`Cv5%+7yL>*pQWwB!!^L+gR=(tzh z%z^7TLwa}=+*C*H(*5G&e`k)A>F|>BaXjFvY8N9;Ewy*+! ztTZA(Zpdh3hHt+M3DNlre14__K7xoyBi3P`E1ZaxL=X%|&4jDZh!ZP4_p$#k$&cKB z0ZU`GiZ3mOapP>ShDCnlvVR^XWilN->0W1aEew4AzgZZc!*XlC)@~&&*az|+}%aw%MLx6<2=E2Z- zLhYN{e}T_`$cM>K1^wdNa!!C)VRj{7E&qcG1LcQi(2FeV6ocpGTpNw^uaEs>rtsm* z7a|CBFYwfBXj!0k1!gobWGl)mOFn%0BG6;m3p`gt^e<5B0;9n3^{A`go!CKs{`f}W z!*vPKpqvaKlT~bxV@Wbq$;U}7WH+6Yu67Xz>F(xB&;6;7=_q_0|6Xbz?1+MWnompe zas2!F^zzuJV6UxaiMs}$|8(KY$F)Z2E(D3D7oM#Km>(bi3&u5a)HlXg5yeM$yDUF*;-RGOET5PsNda-3$Z0HXy zJzJJJwq)=0MyJn@*`Pa~h~#4HL=4J?{?O7hNt0!rV_(znbGYs2z=tisXP=K6Ls)2K zUp(4rqs!U)LA_i!qOu|YVssLs$B4VyN9SSovoD^XehTpTtY0{(g{^5#-XSE;dr&el zzt7NXf1K39*0eE4Xr136+J5hTh1BoeJI|G#`?xnUhY!y%in24RaKLC473B;*N*r>7 zojg^31n5Dz@@P9`N!6bZ42r#$DqHB}AaSvShhN_uqk@lECERK3LQXX!4l&%pE=+`AOU1l9Z1Qy5#qffX}DR6h2P=?hG0gwKAWkAS!vlR{LGo z%f}frDq1ydN2awPfzPKi@Y!u+^_Yh|es`Pvx!oyNk2719ckZ|J+@J1}{8%Gfu*M#@ z!t7kb?-8GG!`Il|3Ax{Eb$$+f&PaYZ9~#e3Gm=Cpm7Xd#4vZ@DGLkG5TZ_hCt%e?Z zdR=&e!THc|ewvY#;V+niH#W$pemL!iq%qu%0E$G1+-+M+RpV$$*B&^!39pV#KsH=P!%1|#ld->=lPn7 z5%Ib4@Nux%b&j|TI~F)M9|ubmSKWRNKCgDb2P=au>~&jiT@|L9>Ojq^!DrsYsQu7A zVm{;qt=uFUb~z)9{vLez{0;8ENOA09kF?e`Yr6`~W|N}<>cwWkz;@Lmc3?EhexEt_ zUtT&n6b+q)b*DY5J3cY7E$$jcjZtrWqR$?9@Zs|}MEt|OI9~}AmF8e3=qC@I5+LFq z9w{r^D!b@NkTU`5Hj@L*n5dsO_`GXI_NTT>h{PTzkD4Xx9i!_PhNowI#GkXr$*pJl zw~ld4;PZXV;UiKEr0e!dX_BTo1-!*am>e!a_FAd#Ep^IWQ2qHo`X4&P$L-&RCs!Rk zl??6j4~Dg3(@NNv+kYvZTygg9#MIvI%_~4**S^c2fzP)Ck3S!Zbi^dG=z{9fn-1aQ z#-bICrJKOF#AhDKCuY%5xqNIsx)y${djX&C$cOU@X$~(MyGIXO3Ksie2%qHz3yr%o zx%_-LmmeoTU8St*@Nx1}XGtehnk8-p^7AeR`Ki=)(9?s-b>-&U+AHLx5>RD$jGmqm zJMN7?e|ooDBIR3H%fH0uM&Tp6!dJhv~d2C2G8jFwSu_4_D`1~mFNc~!u`;vUJ?!GvmIq|WUZuk-*w92mkC>2E%$oP086wC+F9yfO!Lpp30mRw(ULT-_2Q{m#vP zzk7a{dk5U_*#EoIb1!@p@H}Iks#mYIE3L+n|7(*xTN;_K@;#E%{$zBW2=V>&eF!pW|-#jP#4^ znfzIn{so_pVGf^Cd8blr2{r~Muu6N)xnN3Lz0ky#1B~xCTH$JD@j34u_c13RyI;iT z#^J-qjA9v(03<&&MZw@BKbcfA@cGzI5T8UlP8B$K=*veLxaxkjzTbh*p8}qx_-FMV zR*R4A*E5KQe`w(Iae0T2-4wp+Cu;Gro5EMU-ll#Zd_KP8@WEPX@hDY5xO4*+AFP#j zud@;hF5LqE7JLlm@Ui_n&V0u}afF2_VfoshGX&FTI8e^-jcEED_>?*hA3Hu=jgD=f zZTpi!G?x6lIG#&B+dO|jLBh&X_Zm1_&)PW$S;C1$&1MmMx?}WAarvRhke}yC!+dxG zW*vTdHX2^KMvpxd1AR`!9%l#{PU#ur%$bdUm7aTXl9znw!~&zm8|KmX!|Bx9#YU?} zgGtZ>jiNMb)Nq_`^xKy98TqMTlEY2gP#!jvQ;NpcQI2%j%4mpVAEK{*cdsBmi@1EO zA@yi3i~LH!hvy)=E<~4}d-LxCkBEOP{eni(Hg|OMVk2{*nB$`}x5b%+xk96gG5&@9 z@bp5&hl+k-KNw~OblKK;h;trHjE2nNED#!@2_1CKZ=?Yf;^^v#>qmY-YkC3r{6QXk zM1R7=y^8U;Mgy_M`NHY{i0Dsvc-A*x9ny(1;#lGI*W1)@mY#cyN%C`0L-%~Wwu6>K z13k)g`tnMj-~o>LfKfii=87SNh6HA%>ur$FDU*C27aORBt$_~e^Tg-X?g(4_GfW3@ z$|rvhuWuAfB~0%-+=m#UN8E^Efx|!1;`ur1h8LiOV=&C8;V;(sr{Gf{{c`as)M}6D zP*|tCrHCQf;SJAA_o(tY84jP&Ik0wf3q3)H&RljeEBkcmH}(1!e7=X@_ zK-1D)Z&SYoK7RsuT=qwipPhCq$*C#MT?Ftso=&JMIy+zB^HKR=K3J4LzyW$>gRH7O z+r{ypbsVAQKX+co_Q%3qp&Mm9*U`kdnU&ntMdvHVr7{so^uISb}Po}M2kS8o`J zR^$HJ3JH}&xstp%IDa0hliwH=IJSXes6%0ks?;qN~@eV zLwx2zd}`eA#Oeokt|%V)@F6S%8vBd|qW?d{X+bAU;`hV#Mbrgik`Tu>~rOPnMim>68D-U-U+uqd|z%x149cZ_>^vJK08H7 zPPq^xKact}IG>&FpnKXo(KtPix(xt()t95N3l*H>L3VC)oi)-f$N>-Yse|KqbBK9zE5Ux-jzO1^xS7EhsQxj zSjtH!JP-t5K0F?})>~kfgqZle$HfQ30bqVsCFILT$WNvyfzNjX5BYcW)JgwUZW)y_ z8dlQ_kT0L|#O(JlyH{(A;}#&|Q}^(J0Ii6~myeL2Oi>~}a}OWd8pqui_QFvGi0?B< zBuvByGm_l#k2$zz+;I)h_!2YUYoNCJ>YofCb+i;5wJX-NZdGM*%8jskz z;NT|e!_gBeA9{r7u5WO~zeju);A73NTN@#3{#fP-fzO*};KR4(tc?z1J2V&|^K{Uk z@P=>ubdn!5&0+rqc3-p%tdG_XiWn1MZG_uu)f%mGrD8B_J}_t?f29&Retw1B z7ZVdrKEq3#c|Y-;1hmx}jwZc+-(=W|V4}Yjk2M-{13rJ5flm_$%p72L!Y&yhegfl5 z#Rj^h7ztLa9fNK~Z=Q_IOUzCoZ0?U>aI=X)rwHc>KR7?i3I6xGAKJ5-ne3(U{ z)!m=PZGHznryY_XK40`9S)e)&lsRHttGG%Pi~~f}Rh-^bh*T^4tqS}6kSx#@4wSjZ z^`J*wi3;W=BI*GfcTYAg9roJ|d{BDB-~S~`eqdg({k8$KQ)r_Rv$6ZIQhtQc8Nsxe zp?RM*+vui`rEQQO=m@shj$n39rnD#U^kKjE1lxI>dbF6INipKGUbjE|EVkb@U+oT_AJcPGvAs=c1D_AfAU`h~J4UHet|i||;E(=g^VmG^_lDWo zN#K7Ur8j(s>|eFK7BzyqtTmD^1D=Z?s}g%Z2FZ``T6`0#YOPfX(tbY%@tH$@$nXfS z9?B>7evFuWwBL^bKC*u`4xiZjF=F!3em@5Id=A;481l0nuo=Twabxn)em@5I%f<6$#uKSoSGvU^_J{TSe5bYkZnf*ICUPzQjE>Hp_$*TL33K*o_~_{D z!Dn}tl24ekPs2w?XAeHbn@B#}&N>Fyu5G6HdO!8z>+gZ_bAW#7=vo9d9O5lbo5@FPX%~l*ym;a3in{i^-D+Z#rYiM6Z6@Yt}8H4 zP0mM0?i}R`7MDp1d$(PmOA`7nDR<(7N_=lfXv}~$1O2lBF z>z%ZGLR?t1eu@0lmybrS41C@OJQ4O;<`?X~W2Fzb!#vnZd^B=p;M2(a@nNXAV(p6; z4S5gQvU08Jze40kBUc7K%~=Q^%kqfMI@|C>y06&zsjq%%T92C z+rhQgRrE_IR|YB{55pzq#sDO0-YcjkvC&TU-gMl=jcSC&U zDfm<|3fgAm11MU4+m*`GkDzv6_}3`QV9Re;hvcTuluHiTErc`E0XYqw(lr z3FG>!bcC8q$^7ud;G>+nXdj}cpR$3^+h@^yeDO#f^8*yEUm<)nQa128V$pm&^5fDm zA3&-3Xr^q$=f;nZ6(4R(w}tRgWgy=T-Y(m?>=XmDFN7^4dGnI}^&A$|dT6!4GN2zaYR)v<{ zx%*r2IbkXIglkuv`g0(kyH+7Lkfra4&rL!;mgfo`J(^N}HladGMBLwk58q$l>Ccq< zWp!@c=fPpj zd^V7e(p?b{8{)tOMDi7d}RJ*X;#AYD~5fxmk5n%I^5L5JY+l*GXT*~7iBc|n0txghKz{U4HsW&=#V6-YzLX{ompsaE;_=Zv z`{Mp<+)2yF(lNphd_La!~ z1ZNiyH@^@DKGS>(J`wIKiG5DT=Z^G^_$(6gQB*6yyWg_>+@ZdKk39d&JzqlFA7^J} zq7*FJ{)qJ*>;U8Yz;sQ-z~>8DnEZtE;c6yrvik4peC|kr)Hm((nE2eJ<72Ct&t+rS zpD_8kLk04C^U8DJ^F^I7KC(Vfc^DRXSgFAm!E#n_W>5hrjr;@z(i`pC1fMSkp5XW= zjL%B(p;vxF_-stsh|fF?A6vUHTFU2ZjqOyQ9)!oB# zVc+l7@t@lp^~7v!9P%|decC&@i2u7fy%KzW5qQY{IER7k8wWYmRCkO5#y;bJJMC)H z_YC=Qjsm$bFLS(+<=8yI;Ai~rcsfXW#m&5)_{`mWKn8$_ORw$*1`CR>QCg&TQ+=tXaXCLid??S~A1)G{L`IY(5CF-JDPNwaLh|4s4?9jr+Zm z?ikl3gCu?^vEipQ$G?XkYh}NQ&rI?WwLmV6lTxeDY{YLs_(ah_bk zcvMUlSO@YsAN5X8airXd`GgM8)jhT*J~x67?2fd-6f>ufeaI7o54Xb7{!+}GvCxMH z6yWnK9gqAtnMGPk9{F)ni%el6KJN+OlU}?0{1;EXGNxVBKla{V1s*j&CI2on)6^MQ zu>52z10z25U_Plc42929=AJ3{&gEw=_^5KTqIGucb4JY$x6Sk)$j|#1HXp05u`(Tp zkIYLmWt43?oDV3s7H>a1C7*A;$gACml>j)qrkJe*#oy@&?C zS9-(Gebf#3;9SZ|xgZw6K*&0ya;sUZi(_+Nihr~xzpIitLa~saFieE}kEU!6uf(yr z8$Sj%@RKOL;X`jEJ|sVf`{fpD&eqw&w#MbCDg>&eH7uK)a3t+i0h}O2v5$`FUZHh>yKt#hx(gT+MlunvXuMQT%4WXM2`}k8_|w z=pe*YeDrFKl6*cb5ua_w+vy|J%dtOk^h<03(@?v(eg1ToM1E905&Khke3*=nX4)nA z+$7<{V`uDj3e_$pK8(*tL+#>xJ~I)Y?RcYn6WJf9F?=&adL6goc?A3XS)C;Eqw-mo zel7FSNXx+Ivw_En4V$9!2Xn)jEH_L*wF*gSNYIR)llYq~9 z{Z|;DjVT)Vw1G#-zhd%HG;Aqk;NK>)Kf=EU@!6=NfzR0td=83@a<$YhHvMik#d8PU zQEzZQ?T%CYu1>#F`U|i9Hu0hL-Ulh=N4r&K;xmuUr?$=4b-Xw{&!pp{-zqcs{Hq&>PqS5SwOd8n%G@ln zy^8TT=lJ`Boah3I2n{)_Wj9 z3FPPNJMrYl;S;G{Y4pqCqmOn`{QSXh03I11hS_JqCzf`l=cAW)aX#M^#K)=6v%_r! zM6S7qn71W!d8t$Czsc>79YJa}0}=A`P4fWx3E<;sm&HisA|J2LM-MFnpKs2`=i|~Y zDMFHo`tNFd^wBc#`PNu`M0dh{$4Cbr4a+el=c6t6D+CO(Te^5fy-sF}Q$ z`tO?fY+T8}=i6q9`MA_f#bQ5R10OAv41B&l0Uyi`pvC3r0D2`p8o4s?`3@FeepEi8 z3bs~08`Lu5bCaA;q=K!D&xW-Oe7+|ZAKuifiqE?JS6~y+MtvCgd~YX?{CN0;s#nVV z)YAk+3mF^KE}mb!3wV6*BLv?c#iBdu}uz5&!VBRO0qWsrC8z^+ORH zQZKy@ZVdYMo_Sh6mWtUykMba`eGX~@+LWS!&ky9&@KIDu!D2sLH=j)@TIu;W_h;a< zQ#9z@hTX;y)|P9vN5x&f0Zyy~wvOspa>iQ}K8vcWQC~Y8w^ZGVmF9AU}b8qL*^)YpD-{`D6l% zeJkL@&tDGYlSr%V%ZA9$raZOt-jX~5pKFMZB0unA1*2Le2Ue*aV&2wnxsBsO$a~pM zEzvf}B|j(TU_cJ6KYWULTc^D#jtjA!ncnh5eCBcZIC>__&ec-L2NB6fZ!GqmfzRvt zxO|+HG?Gy)KH7UP?%#QSx>GRx1|(HXpqe4Sc?j^eZl(Sj}UQu4H`lRW$J7 z`Kii>@4K)SEZCog^3*ppl@&il0G}UY0em7<$nSFYbAEg>gvGuO z;&T(gCuQozuN}lkLlv@F_&orh-=Bd`rF>w(D~DAP6KPqccWLXFe-SYo(gZ$#z(VB!AB!4vT=AGfX}=9 z`S>|3M~tEt2)%qV;H*4!XnpW4?^^iE&t34*U(LYh`(}ywIE&0axb3In$q(_-Tg|}d zcZg53c4QR~Wp$o(DINE&m6STj$si>Y>e%z zIS&q|gLCQGvq?tW3-OsR@o}uMbH7O+E%V7xNyBxk;PaRAOMDWxLb?ZK$xy}NPmF%*!?USHbJH5UwS_SpB)1&;kE(Vi_ziZ!tD8G$3#oG zG(gqWujly#ck~4Dxmo6eEtJI;revbJk@m~QM{G11^-wv?+E?lMM}IC4K85{qr7Y!V z$7t*n3+Ja?pse$ z{ne_yK6E|5ah%}uX~c)Sc4ky}F?a32Xw`}^L#xLPPnvU^a@r(CIaeHGO z-)vmXx-v*ywWZ=^%^4Y4wW3A!M zG`jqRW#O^baJ@}kg7{o5^AYnSoa8;K80PclU3|n|IK2xQKKDRR5ucl7J{T8eopKuR zThULu_=wr*)&QI6-__|A;PXozKRz_*XN5QF!-o$@SmBZz`Bm`wW#Dn!AHN+=3E!QI z5A`W5?{G?Zy-mHG_{_nF;%By7uIEU%)?lnD{$Z!RE9_^t4<{I9&}M_r`xfA{-#$2K zubj0zpYs0n@^YH>!!K|<;Pb0DBtL3=*e--8pALq=N3QsAdeRMiq2@m*Jzsg2}TvyucpwT|Sow6zV#LRuew)P-4P0~vi(<7rnmM+y;p{P93f`1 zzKPeGqmlU_;P2hm)8O-G^9~l#B|qHsNIRslu7S^gHUl50{sYHVXz#4hy1~`gssAAE zD@>0OP5Z<=gYNlA33x;x$X|WufuL%1l*gs#Yd_A2kFzkS&ec+0ne3sfzr~rWmFyxK zyqfsjfRD%?4$G~589&qGTDv9Y_9lL(qQ8Y7T*WIKIc|RFe9CMsW0amh$Vh|f(RpY)y3!F;l{%nUwX z0z71Y3Xj&PVX!|ZPd)-PjQGq$_@vdaAU;{sFz}h>L-?fCurNMZ(lGG(vL!x#8=HJu zHN%@;6h7Xy_YAda5}$=ze*E}2Y8H;g@$ZiP@#B*jH3Oe7pT*;os9!2VelpX_3O@gk z_>>yOVs-ndSm85!>y>f~rFd_=7<)rTY1HiwsO|f@+vn-URlhf3J`PQDQsDECnB+q( z@==H!O|x~$??ECy8i$O;=LUS5fg&U4KF8hpE@;K(qxt41@cEaVk68cWYG!FxyC?s7gwCX?ST+M`9XVXnXKSNk7{?y3& z7eCr)8xZ(>VaMU4lv!7yW3Qi{IAxAZhR@Ki?iT@%lplo;Z?Y4eE%BF?ZE3RPN8!WQ z!ZNU;Y~KQZ06t$b10NdT_gu&*MG3A%sUYUp78z+Xxj)Y(XS63bwk@7)YF{w zCRm3)?Xr{Z=yZ688V5czrt;w%c$(;7EjRbERkz(VO6X}#tZd8jBVxnml{x5*F&e;} zj?Hu06YrySL6lA|hNB+X-60+FU)|Xa_=w%7jub(Bc8o#|r^zKo!{rjar)bTz)l1+x zHX#jBOt5ct0s$hqnG8>QgYIOLy$ay-eaIGI;dG}SJ=FNC^!yk7`kdsa zcEqaf)_RH?xL?d4V}t1=!#h*NhemupZ1I8hsjmwV@qzWZ$~vIZ^RucYKhzSA*0DxR z4GLEZYknO&nU1v9pYXdN|22CX6Y}Hm5odKr{c7mXU3@aR^IhB?__Xrib5L&9DBQu$ zQ&{OjrTzbN_a;!TWmkQ70|7IW1QLdPm;=P!)kfuduO1FDCbn*=RJY`(lGGYrbv^Q} zRku{CD*34-Swp|BX0U9G0fPf1Aq#LwDvKcm97Zz*3^K;x0podKh$#Nx+hN^K35i> zwVix*4^wz_Qf9nJR?*P&$K|tj(i)s&3Xd*oDB?w8D2(xRjPmA+^>KU?`nf#;pORLu zmst>?YI(bnt4FO;$r$0VP7t6$?|9rCMJ?}fL@>8#@cDl)!DqKzsKNC>w6M9~2>0f0 zuRUzTr&skB20n*V@S*X~(%=9g_3z5VK|R2yc_s7V$qQhU$plem;c#;CeahyOp~N6C z(`14uv#{@z>3#uxT9$qe%6g-kBxs9NRpN8eI~@;3(u)c{1fNsL=e~NrS~$>d7Ce1C z=3NDJXj&1oc{3(rxE{aW8niErJ8jh)8S%OF@k!pu+te|)CM4J&I(Nb6UpQpY zYW0~2*cP+AReg>4OnrQ|s(EcY4^Xigx%nIoT8NT#j5uw4S#edBjq!KE=XIA7AA}uS zN{|VQ4^HbS9b`g(;CbJ~z~>fbUWFXoGYbe~0PR3I+=*eK$S~>;NN|i^C|B%NHe$d2-B9Y)!=#vFZXgCVK>UTa#5?B>mIC* z57z62Mf1yg?_^BoruP`!e2-hhA%aWbdB=F9HyEc9JxXT+sn}bIp@1gCTG3#7H~A% z{gz6Az~_0uBl34kjVvJ^X6Ij48xPihUi$ni@Ui9>qCxT1dhD0tw0F$Q8Xt-Vbxy1G z*exBM#Vl@RUjv_Sn1Bz@@5zt8n>Ht*fOvjJGrl^vIqJ5r7clUVnijXxdsZD%%Y}~zrReMUC zqD&VCJ}%9Fa=3z$^XEd;NrFt9$KHrpe^O2AQ)jZs5 zlKqhkPPp%;=~LkI%@d0c55VS(nZ2vf+%<6@Q`PE5b&;n~Q2R1tth@JnGMLx5`UXw) zxgo!csp=p-k^-M^nFjGmsGdetxJ?*fv+i#92ajI_8M}hsE}D#_eNT z;J~Cyc=)>0F)QPL_|lhOf)7p<@a$Gg>b@Ax$63)0&AuIRMAhrQXnyg_S@6-Z{il^~ zixh-3#<5&|Lky>b?5XAHEr!tKxN4p>P;4Ybe6GO9KHn{(WO6D&^$z=5&58%$Qc>7% zr{{CvGi5G5Gzdz5e-0m-p(ZbHRV9GWD-L~p_;5K%_s-mWtl_eEpE>27JAM@K`PEAo zpXCgK96p{&hGK&t@cA1CKIX|mF~g9?KGwv`Gi9GSFSXi%IA1y27^is3ZdHN*t3U8# zKbHj`C-AD-VMQG8xC+ISeLL{8$tYmFvsUZ%u{4cSOJ`CXaU9v`Hq#M29QDv2`2Fvk zfDb>cgSmQ@QWQ~ct0FeRyp@jde2cHj4@U!@qNrtza8OdAZBx6Z;PW3ZZ9ZFtI_B(4 zz{3QeqxJ}Mc8Zv(C?3+!e*vER$YbN?4xF74e!g@wQ-QnhL1v>?#GZUCU|ZO^xpM+% zXM~^cB5whX0FK&N_Q6PT*uloUD%8N|M<(FIhs$|vw%g)Kwynp{SGKU(Ziz?HeM0d0 zQ5M7}gY#Z4Cm??8fEAH_m?XBX&EJ6!;G?7y5cs?ic<$Tb$4%=9qA5wpwo=g_?C@iz zBLvZOBow`r)qewges4m2DDT$}+#b7Q-#$7i)eS-(!;-YXWn&Uzzu+&fi| z75MxS>8IF03UdmbAWX7lh1MzaSH?0*(RrK9Hr$19Qgv>jD|i!p{(|&FK(@4EF}`BlH@}BI{mAHa)KiSR9^ms= zm*7*&0ZzFhjedLv+Wo}^&uUG;>Gh@2PX-&zE-%eBC$lAWa%?z08X^wBI92DLz~`?S z_z1VUDxF^5YE*S$g~TYx?()f@4DXf}py6-dYMtWX`hX5EAB_j6!V1wK=)zG{hIO$k zKkkMpZ&jZGpTD~z{p=SopMZn7h7JtD0raYN>mWg6tKDs5K7o~I105LOm_Vk2M-8J;3K*5Fe?1Ar%M$A4Y-UEfmkC_Jx=&tlw4vMnFBl=ZVbC z2faBG#jSBKXtXry*(4#&A2!>6LE?kHywhe%SY!M4?FZoV#aZy-{;T(s_cL1q6gaoQ zJ8EP5CJYfKWw6U4!OVkXo5GwmB>z>uG;jMq57}C~+w6>w+uc)mAP(Ryh29NLQ%(5y z!tVtQas$JlV`FeTFy|Wf`^oh> z}p@<|&j}bcU3tr(AkVMgQgy8e# zz$5daP&dJ1i}0$$hi@-O_-4933O--KynL!^VSp&GmP!YA%zEe#{Kd$k~=(UF84F z=Uwu2i_bI6U@lntNcq%@S4v;IzrnzVVn;1=iMWK=QJ8<8sJN#i|Jva5a29;D;$FG5 zy*xx(=e*ZF2BKuk{PL}?Uc3MMfX8WH)MqV$kGvfm}~^`adI>gpvSplODZ0Ge&|r$Wb|Ed?#*r_jQB6bOb0_QNvxGk0#Yf>j9(-OjAwIi^t|fY}C0J+J=QBjtq5#lv30Hgm z^}uU@2l<;2Rl>|>PGO_+h|Ztn${{KX@bGO@isRBkRDKmN=;yUrAD?YR^}`->Z9ku{ z=S%$IE`kFgjF+-Ozm(#DV$4L)BFjkTY9ut!3f-yZXi%4F!BY;ZWwY6(I@HoQ7ue zIZAyBqB*tTe4a3s`PiKot!IRvPk8@`&ByM%#HNn-b7_AL{d}@ZKQ^EEdplHTKOb^V zpw3I3y&b9t_1_qbRo1Onfe9@`$svcQA(g&3mAmNZ-yue1#A>aH2e^#Gr2*5N||50~Pf1Ha?hIM*(5hqE-{Bdi~d}h|q2b-mGj=lR4so#8|fk=rV%&muy zJLW1sSZKdAJ~h&7!S%IgYHSXlkT`tAX7Nci3|}p1e(}t$!`Xaxa5gvAHG|Iy;v^__ zflq$o=X0x2$7XX0!8Gyzd=)vbb`XHQumiV+aMknhd89qUW-}7OBceF|Z4Hq1>I5O! z+b3{b2v_22J_SB^eSCoBSx9v?KSIm=`*rc*Se}h!SE&onGQSdd7kqw!Ir>>zidd76 z@+o4$=a;hJgJdB4C7Qd7$Qx|Uj!F_?QB3+Zwg$bsU7EWyHREnrRngb>Tatdv@v|L` z!}?g{f8FhCXhq|QT+XV$06yP;3H^xuL2LYM*`N5F+wm_j#3zU7KYLElUfV;HiJc6> zh{;(ce|y2rTYQ>`{&OAy5TcHV>}e_S<^-RjtRJ4d0UB=NjY1uR=~_O&9dU20M84r75RV8y#$BM{C_ZkF_;7mI zY8@-EKL$SgQ!gK!NRbI)AD#eZc!CTk=ckO>_jn#3m==P=Ln5U<|LNK{-vu78eqt!b zjmXDG5xux;f%f&_7rbl&K9$B^lioJv;q&*Cd3+>)MEQG*9?#?>`6DhWBMtBC5B`hmoKL={ zAtALG_wc!>$o9@d0mYap)mlS?BDJV7N}l~ofX|m-l71vUu*AFNq6KUAcjvt$^MNHk z>vaUIdEZInd*JgGQ;Cm;ZPR7kr^SbdL|sOKD=Gtgo&h|TeuRI^67I$1)2Gl{Y;7o7 zo9WHL10M3F4o6pMUF`s$?X1JcT%TX7YrBO~;X#ql@+Q1{^TlWrc$1GwWNl=ewY%*H z%>bI5&7Ibu+v=p1izN~8>63nTirT~6I$Lif{CB5gJgUBRrupgr^JNq85$8`6Pup#( z;{2EL-51lJ{F(10K9uc!3qi2=w?*m#Og0)}{8(%to(DN&&^tmf>$}Gybpa+DlC#}ZnWwxh13!7i@dojvCh!tj(XAIe>$IgvsV;Ne#UX1u$&F7N=ugDR7 z=kUpZmogE3!RIR^{ZQsyN}RG$X;hJn2UbXfD+eYR$W>%(D~zg4KF(C@tcT@}L95wD z+8v5RKOQvCcvcTsCW9Rrm~J3@QR?(Yd?fwM0=7&{#lshhUa6Yw#|&tiVMgj+ZJclGC| z*M9MB%)=*MkL89c^HHnEa@Pa;dB>%XkJXRy3{EtX)@pD;lt$C;LIR$-`9$l-0*YJN zOW^a)>|#DHHese?N%*8mZ~C=g`fRU$$U#%=po)bc$YCaA=pX1d}AfJdvDFmN1$z&#c@ZskR%>FCaz`+Y=eYJ)d0fpLb1{)jIdP8p@ zD_F>dR+CR|jAIuw|F{BgfX|y?eyskT^KqnNFGNQMCG$~2C^LR5{o2d<`3r0PP^%Yi z<*`n$Aq)p(^OWjS*ct_z$W;rN;g>_IhcS~+$r!aCXI1W;l5INC}lDDe6GiPOG{d;;dk zW;>6S=u#cQw<7+wCZ7(UfX$N4aSQ9x-4Q}@MJ)Ys5Xy`q!RHHD5Fd|hqJmn$$0M8c zf&!l}nmT;24WF-JtSp7%2Th+(Gho2wZDd)6YuApTYB5xr0VF{Z!THRDtrKsU(Po!(7 z*EE{Vwt@0S27(`Uv3F{Wj>N8NzhmHNj}jO7d`A}gDQUEFSSW3~(8RAo$)J_PLigB( zCVqLV`X>0ihB@ua6i)_GCh|}dkZIMue2Jh%yUg}wiU;M<)+r&=2Is9VK~v)bpVtA8 zWq)S#iD-!y@=?SRfzNME9X=HRTFMV=^64Y)&r-@;RUzQ>hC?@>h)M~*bBT{4N(rt9 z_`K=L&4-r9#L4d9uY`J4;v<$1#mVka-%Znpz~^niW7!|xzAQdTgcQt2Jt^teetCyE ze2``jYlO}MV!2c(WATT-8flJg@@by6T0@l`G{I+A(vKkF2*+MPj0(!dM?KltO&ok) zDeI?zN!K1Elk~Fqv@z*A@2N7{@z)xBUOkoCm&wObkpC;8Uh(izRzdo;U&+e)DOXE- zd4xMe+$gqJE^1f|ignh+)nW?#krT>$Vl{+`$2Sei?YI z_Qm-mYFH*)nU9hU%d7|Z{Ds8F3KO2wie=&4>_2r zo2PI|XYJE7cG5-$V(W+)UQA`0RN`~#=w~*ch@y(eCw)o-pTEw|=EEET!D+!<_t>CZ zR}1tMmk&jCaiZLj*1AX{zu=EOj&Bp6tJ!>Z3!-%q9KI&R*<`qf-Q2rcE}zxuO277s zCrlka+`kv`{E!Z;AcCszHHz5u(Rl9n2!_by!~J^^&#zCb^E{;b`M85kA0A&-SfII> zSNRT%|&W6cxbD|C{a09REn^t!PBtQzr?te(jZ?GMU3?!-Nx6QX%*# zp(^nCLFVwOQBA`B!Fa0geD1bdBZS#fb`(T>E`#|bQcwUNB^9J!`?W8gIDBs9^?Y61LX4z@ zm&xam)@f^G9N`>_q8Ipl2@BwpNIYQS<)fr{P!I4q1RmI*Mh(sz^(DF2fzNmd=a!M; z?seevs>3Yjlh=?Wg?H^4xQ~TSG&rN(nEK&edj<=pDNi#vqZXu}-{AhMB7Tdtd|2K| z7@p0HzhNyO_D&QH&qjRSMEddZNoasXd`;E%O6Q;p6W0eqAU z5&%Bjzb-&O-n}qsugBpq>rnuog?ojw%39^?;8XJHXAYUL%`Og~G&Vu-dEvxse|B*S zHQ|ru;zMD85|+2BuY=Dwu^>KSg)-yDeSFd=jv4nZzYIR_yR!Lg7xGCvcC&E7=eUj8 zsnQqt{NbU^2kv8x?-VAkli&mQIs}VCq|wCXt*QXQ zS)0$&+*i%{sq*ftq@T&e=CfPY^a8@$)EZlRTD4HZ!}w;RP=xCpE+KIpMVy?!6vJIWJ7MiDo2d)#TZBE@Bp@BLmphJL>F z(95URz(m!LC<-GNpWzr2RW%WT&$j`OSU(iQL-bz4jkU(^X9bLf^h|USx%lw@OSrNQ z`APUTMnr08Nx$~G(FA;Q<(-{;p5>5>D8ml&$k)Vp_JX(?&h<`CS}oQ@-k}V;Xdzz{ z(U|UcOIf)_Yi$UYzfo_wPuNGCny`a978XD){{8Ea`{%*a#?83;9aLzdU@n?=v^n zB|m>5@qzn>%I7cM`1x5c zA3MP(7ZOf*M+fHOgYrZcc&;Oy@Ky&H^z->wUOt2a8DXmV3K=5PY3hDfp;-M`LkqJ) z{}=0Y3i~ zc)b0W6HGMbQjm`iw?FEw1cA?GHXKqwxX(7i1l`rsl>lAz_4>h81bniw?%yZPQlZA zh>djjtYu(A8!$*ZBMdC9J;^Q(u4w|qsipnRgYSOR0?Pp0pwqqvk^WZRLveC00KXZuO4&xE+ z3DOWcgznPz5#mGo3FH%2ECH1=`!6@2w2CC){;ik5=fl7w{Kw3ZkJPlRk4661@{dxp z*K(>$d@g76iLIbnd{mPV_h%KY0%=eS?S7%Y z8`BHgK9>>rgq6|~`K(lJ;PVrbe#Gd{Yk#;meYDd&3-~D0l@T9FKa2T<6i@^{E7gZ( z4`%%fKCeEU*}mo^x|&s>`T7Dr%H#qApVvt3%VVFx2O$xQm}}%WkRFZg6t?nJQ$h0q zCFPSAQGw5EvyOiJ2|`>W>hLROCLeDC6>CLNnH4kQa}|%zTuT&#Pg*47Y8c@2Tfk$D ze}sQ#OwGszn|uQNSJhf&B|ewo`OMW!3_hvUj9d@!c|+FA$7QAEPltc((T}pTFyeDH zmrrQPxZs80lX~q?g}~>nTtC+SSE$uf7mC0qTh{)Unr?@7orn+DkHjaQV(cRM1DTH! zLUG@|{S18GaY*gU;$t8EO1M90=|?*MrB=@+5P{D-foCzFh@!IfGmDQRq5_}anaF%# ze_~A6Z9Q5KY(B6*7&Ny>XDUTKv}(b~KL0M8!zZF_?1E?WQA9XqO^DAlh!2mFO&JVI zje3lzNdbIB@B|8|)g6yi37iBzzjwHhPjnS6=QWovY6MxL|Lx`=Ufb&f>l2%Sh#OG>0pIAzo z!$&D4fzJ~ri}=JigE5znLc<~O`D7NJPiQTL=96+YfzSPO_;`21yyk1>^BH`U+lePW z)7kp*@=2;6FQ1gFhxcC(90v1AG?Nd|&#am1Obw>t4d`bLc!K)(S$sliC<32UYl!$< z2J;E>bYWq;vH(6oYtX5m+Jn!7+1Y#?9HCU>w1`-IQm7>2a}~^IcKebY?;IqqM*)0h z*`HKv2z;JA3ByNtitlfjZZ3=D6q`@VGz32H1|G40iu?|C{sNkT%}@|~l1nOqPx$^0 z@?Ri-fkL^=8u*kZHXqlDNE;vl;l9@d2s_5#xx?`n^p5 zpTR|I)+f|>o%oP`96k$#5wcnipVj^LA#Zi=$H3=v4jn#wT6GJtjY&qx*VNdnTeP2=-^YtY4{LzU#Q;PcOb$I{PCK2cS*n2#E|0-t}Lb@)Iy z8HBUhZsgpa^ZKsvou_lDxgsm(Yqj}-#pU*;%wd9nmPUN8!t)6&qey&GEG6*yBH;0_ zKPT+TeCt1o_GHB8at5E!VhPY?5T6vwB%lW3CGc_i&nM`zTz|uCUw!2|ELSbC|8@Nn zUHI@@I`#49zHST;v*pQQAjSndg#|btl)znx%Sx&xa>ApM*wP zDp&AP)F?~!0H2Rd1NkItWqv+twle+t7k(WBAJaaUwfbIOD@p)kJ~Zu5&lsJzj84o# zQt&n6L;7*^Nv0jDmYa{7+M#-Y&vSuC;=^M5%u{*R^{4(J10QpJW3N%H7kK)V2rPo^g*m@CA9qG=o<1f0D1xei{_ClK zI1S;LLW zm%)4ztpEk^QE~+ceD2SN<`X`EZyY{p6qSDcD_;aW=KS61zGk<`aPNruh0}e7`&Rls zMSLy;_{1Bq+>p@4jR6cvzu z{oj0CHkgkS8(@a^=Di%iN2z(2RX5^uwTMp)EiK@qjGDkFIDS+@OXQoMDr$+qCpdnT z!zYG%7@NaK8SPLT)32ZWBBS5kwxv~z`H(4=ttJ}yuQ@(EG6tR2Mc^mYc#-%_ zy?kOR#$8_*AEgxIu7`g8^pQ)4Pp!NR@!(IGe1<)h#e;f)&)uwx&l1Gr;-j#51fQQ_ z5}!tCr&!+4S6RY+R7*eO?n$S2w>4mjx{*OYKQo=hCxMg#`6wtQ@R8RKtA*Th4tSe< z2JNQW2fV>YUOyy02~>jyYhDr`1yy6!g6#9z^$FTnq4cbLRby+~ULK6J(t53afP>!o z`fj0Iaq8w)T-$pNw_^VrudOcs=D->_C`auQOsk9R!UjJ5m*c`W!+TqB!)9#+Ke(&xv zdU?cOJsOWjt-*P_bBB#C`YoP@#2B(sPl_sa-hMR02Cb7p`xLEoguJrHz47T;hQ5E+ zJ4Zl#Mmcoy@H- ztp>RPom?I*t-MtgM0_rxACpf)tK-zdZj7*IfHU&91!1_6iR>-=Fm; zg8gv(_rmtgo8(*)q@j)eT{ofundcQ_aI7>85qUAYcdHWpl4se1a@8 zdcYGnQLpL(pQlYc`U&j3Oz{vXlhfQ=UJ{?Vf8$=f2|i;lAE$peH6(TJx$+)H*tvpM zEFREUU{)^|flsq{jzM;_ZFD*pG#p4(M+ZJHyY%SC;o~0vh=7_rn%%;mE&?CN{@mN4 z&B8j+2f*iNUHXwiM20G0#)oV^chBq)kuyG;>X*Uie`RI;DK`GKjMj}?g4`B02U5}#C$Zx&Tezy6vh0*|8~l>rd1+5&9i~$y; z)mm!RufO*5E-gM4j1e5d33C42DTU9CG^ZE(+)XUX6jEoj?`*`8))_a2yjEu3Y@)rKV$LRy$^OmWL56x7Q z|1pP8lSV?yUf}aPz$58L3=-FPs$z~{Y(ElMitnxzq$;ag*`t^5z+NH#28Ef-&qZS{P*XF4n;PdHO zFP{t}Kh}jlEcE?Zmy3_^V_l`2%B}`5E$7Nbt&Vk{-9lyI*Jpmf;?q3q zHG3VnShD6>yFc^eseJ={u1zHUIDC@bvY$_~@>cb2@Oc)K_$+61=-3~%M~B@0oK0PP z@{3&OLj9DMe)zN^)Hl-kA?W9ufyeIOg?@MhsYEyI@FDjf9ziNmd8_&|@ws&IA^q?W z*?ftxJI!h>TRYic$=T0R)OLEV&`6%w5N&5MhS&0v2n85+Cgwg#Z{ooi64uGxd z=TH21!RLWPiO=mu4im3ZA!)%@`+98L#Kf!HPFmioz7IavrV^i=R;`xnW|~}Q;Ik2O z*kf12;?pz+gWl?nm)Q3c_kn)>z0Y9aQz=)&M8m8xI*^^g9m3HVs? zZOXNJxr83RydQUERS+@H=Wg|eqh1%$N_uzIv_atW(-I#vqvibKCZFzD;kHF2z~?Wf zj(*JUOV}Ux+$-7wy2*zUq^aC}fzQXTGl$QzmY=QoKdLW36QBP%aroHXyYR28J=D

    u5W(- zyBYY9XA~V?rdSCXtj(LN^cjSn-;Z-|Ir5A;g6C>$fJg|&-3->}t(E#X<*n*F;PakC zA0I#=zr(r%D(e0D`E)T%RF4t(yf16=A&=)=qavhKsxhs`>SZ2uBPt!nI3M^tH^+T) zvIFfJ6HzuO=tlJNR`m^i^9R0UYU@WU>G>ktDVGrnaet%C3OG#|?;r;8$uOgP+Ul^A zUJs#eXc4i8;ZVCto~uBogcB?HJP16d{c-b2fQReH&qpyh5H0v**epI`G<3h#OusO4^=r?2K~SWTqy^g5RAtJQ;$(|A9hj?uh>u71!Oj@X(#P+Z3X{$Ows zvQ)a`dwW4Y-!n1!V16N%$8rb;y~iqr1ZOdb56TnEV@(`nK4<;5LgPZ~R{)=PWdrmh z)}d*Vv847T7NS+OKkpCXgJXlVP;W*Ih(3_q#r zpd7bV4P}S+^&#N#+2{^nn!p{g<`8nqkMSN7|hShJ0O!F9eN` zPt93t>I@U{`3cewoy_&};f~^SAk7&Y#pIK^1vKz^FdM)}M@VBF-VrLo#*WYnR{cV* z0{Qsph-nNbO?zUYNJj=fPo0{4gynI%bSNkGK#2HAmPe+62Byw%2z*ACe!zz&UIfT$ zIcRb9a-|Y|724!O6R&_d#i-ZsD-u6p^9SEGfqrTwny-uDv$Xrgb5o19Mb$A&bkpec5OI7sqwst9vcCC4UkE%d{kS#;0gCdQN7azb$F(ykTOtKC1U^s9&fw!% zYCiN4jf;!O642~`l2Q4jRYTzOrNoC@p6xOg$Y7AG_54~1Ax6m%ahn8-%^#BiV&;C& z{3SnOpSOEQy&G# zE_jXyfoWlxtM~x;ym(^ragCpK^y&F3_xy=-U&M!C*!%CN=+9e&BMKQAr@U2t2Yi0h z(GO*B5{u9Gi*{8lQy?k3!w-L#LO+zfNi05#UJ1+wJqjh|w5ISyuH=*8^X9CnpA34b zKt8SxE!=G`;N$Aj!j-t1PwAWg{Zl74pJyTN*a0KaB)uEK=h=umcEKf+w7gY)AAAbH z!|k(de~Nj;$d+ds5pU3Ee>yD$$qtxmSiM!m^6%Bh!Ka*^$0y$Qj(`7D;*+{Xo;k3= z=f%?iK5S2`ZWZ}bm^4=nKLPXU=iC?^b<9<;)UEQ&1_VC;5qNy<%i$Aeb`h|DA@fm4 zK;ZMeR{?y?y&X$CvjNX|$EP2AZ$}~Vpl;yvlF4j7F|AL4er!GpiAV7HZ!CaM(u2%C z{U~~n8T$GD%OF0Lyhew4%p^`TTG8y6eSG>YgAVhE?0U1mm$rAn=huP9oS#-GSa^a_ zk5fD1gNU>gEL^=ok5dh@KfjUn^NDSDtfGB<)G|9(HHgpKue^L>%t!44%?B<%%FIT+ zwc!2N@A~*uo>F}ZTSGMQhZ-?Lhx0$;;!_#72FTx8mE+e{dfn%YTdQ39^1}#t1^Ri{ zp;tfBGCcPngP!0Fjgvj)(DPOoA8GlWFKxgP8s~a?IC(H=rQE**eEuM7@{#AKQk;NT zV?^TPS|2k{TB>HOiOKK9mffcECSG@p+;#u|Lye;B|A=i&;rLTR1GKyfY~ACdlR zoyI^aUEuRamnI)NEO<*pY>73VV-V8W(W-@#7RQJR`%^U@F%Vl~ji=xdep1Gu-A!vS z(SRQgeExXi@Sz1(9soM&AW`r^>i3q&5~7;ju~31}|8?|({`|JKE#j=>pj=HW-qN9d}7Wt=J8Rd_oDIh4;Qi~pA18m2kZ(akd56) zMEe>d1CQlesMfleRT1zhPXhSFl#NxadHz)9qmpXuYLI>!Qy-t?6I8c;)SaM$&%eC{ zpK4=|3C9F-_bFpkC_H}Fkg!O@xuBktLfpqQ0^6?|HU4j&vBWW_uV8zj9klMhY{vQ7(U1e2Dx zs_%o(^CmW*Mx|CjK-z7JuC;~~@4_J{Ogvie{R=+h{;+-AV#gF+YYq9`#rgMPoC%Bf z58R{Y;Pb{SlaD+;Wc#~?I`#DNwg>(D%XiP(BXfMHQcngxZ@RXCk7KPJU)DuGAL+-n z{v+{GX(Xg?{*TW(4B~SDSC=xP+{6@+kIw~MU3v%u#Aoq}1ebsah)>}86q!#90nO#3 zkbuBPv-E@c#V&kTMOVL7sO1_3aljI`s8WGjxaE2m;`E%b&%-lx@J+rIcGMm=$L$gK z$23)hCfD!7E8OwGOJ6uQ@YzE1GRHqN_=Mo$wMSw2q>qNT7QB5u_bP~wGgYOoEUJ90 z`In;~F%?ryK4gFF_$03OCGm+NpD=t>kPq?s-f572u=-58v5z<1Z^i5ncP7l&zIeZ7 zSsd$p6M6k|OsPE+20l}(eYwVmcBDvun+)>`@o|lR6pa+AZ~js#OZq97Zx%3K77-AM z)ONX{@5YaR*z4VCW4tUvAQGwL-uU#4$3hIlH!Yt5pBGLfKFi8ymNWS*tGre50-sj_ zkF6iA2*2m;*4J${Ru)!Lwlo#&>MO*@r5_F9)%lug zvOQFrkAe8=d~T}d9xC|!3glytpB*YlQI>i_;;%~jF~g1tCsrCJR{6R3Jc1{M_n$Y{H*EW3 zHZ5+dx!Y#5g;)xmRn$I{fJP0VNe8q#>a6N685>`U!nmd~oc2cy)B$qsj%g(>T}@pHB91DYv@{)?Q1)(G+^=D*O0F^-!)Ds510*0srL0XeTk3VzHm4t z=Fpk{)IR;FH+1IpApK0|@^M9l3@!87e3a|oCxLw6%wYs8I_>O)ZS@yXZ% zGA&W-(b2$2eG)L}=X(PAAj~11B`g*-8&n7^_D6htc-&ArP1xxeHY)q$seJ%Gzkudt zkAEEd!)*Pq4A!kowB2y-oi=H^B>JM=2Ai{OK0 zK;QCnhT#nJkaF)GGdH~ygzNSF4*KTwA5SGd5tcA~WyvpC`Z3dj$Zsk2Sl@iblYz&! zKit=Fo#L=XnQi&2Z1LgVbL%u0#cT^|@_#bjuMnThS$ue73o~2y*Krn~G|tieckjFc zK3{ev^C{@qpQt(?i22Co(>_&EHg>JSCo}Q$3Ae~&m*Rim;iH;t>P2gGxRl28`(XDb>oN%i@Ei zRxLI>!{MC_yj!ej$CQH^(+rx`b0=VO22SS%K7TNg_=vFB$o5cKFu++*9gB|$i#_Zb z{WOkl7E}UngU=sNWj4~2bJxQ7Ryqlv_4PebC$G7u%aEI#K3 zay0gUNYM*?{u}Vv?MvFM5IbPux0GE396r+ij_{K*HL4u``uWb%qvA@KR_iKL%m ze(Pof@mrF1@D`t9>*&rH@mo~w;KAq5WIlV1V!eQs<^qy~>>`B|#?o7HhSC{kKKRSd?cqj>9CxIY~io&<0CmfOGi{v4~+O+ z`uW&<%3_jH&F>*DD>gOI_b)`lKj7hG?LP!#L&s#2b;fh?zr@-fS*3U;^!DyN9 z5psw6XkEUCkA)@GW96V90DQiB;^*Vmk82o>VYNsTHdjv`K3@H}hS7XrEmDQ0PEp|V zZ>J6)%lufrtDY$an>(kF`Fr^I^g}Z*H>A`}EqHan=evN%w$E;^R>J&4Yb*j4$j7Im^fLmVAGn;whg`&+1&&Vm z)`k2{=J^!2etf4>(%Gn_pACH8GMUN8a{%k7ofnwYy$n2%u z&h!{fFYu+e_;AIzMP&9-Zg*c}lumDY@cAh4IQDrbzXRXrQUi+|A*B-dF^f;-q;-N_ z((V|G9DyH9`HTAItNy8rPvQ1$Y%b>!Po%CDoxuEjb9s?bmg;Kp>9-$y44cbHPi@C1 z8C~Eh-&>AQo>cTbeF#3EmzDL?*sI(uBwhWH?DP1%f2XbL>JRNNzp65k_=pK;@}J1k zkC=caFK<;PfX_qB&&TSxV3u*}bT7KYorjOralt6#)akk9aJQ;jhyT0a)0@hC5NCah zB0qj6IB4m@`MWeJCImmlWN+>IlbmQQ_fY0j$_A4@pv z!sW-Ew;M+XEsuKVo9TK4KChq1d?-Tg7TVN;@y()I*nB9$=n>kK0%rw->I6Q&#RB=n zJGW!~4L={X&h1$B0G~Hr&fpU>G&V0fhz}o2s}mZV*F)d@^-l*L+y3Cl4o>G<^y2?( z@xg%|oX(A3&Z@sjd}RFyK3LeGxDq?na*d93(+)=H$XcRCZa(J12I961=^!`xWGWKI z4t&~ENk3*jVE_>)QRDu879TU8Fo1}YsBz0%)%U>X*@xf*zn?;$nL*W;a5H87AaxNdq zQNj9Bh<|7EksK95l(XuO=$pUsNirYNe{FCx?b-ekBSa+C4dbU^@$rm*d^=!ipKgH9 z6S9(ie0(^S4U0*+z6)=%`Ysa2a$nz>Kks}#J}SM@8NA9yXGk1t2Qish_Pu-qe7*sA z?D0=MuVvV?w2D^BC)&WU_>5Wx$}{>#x25C;2Kc;UBJr{MFEhf89Yvg$%cA^l79Xqs zQZsr2t?MY8u3c&QVsiy*$oYnlS^!7Al_cFl`IOAz~^HViI3$#W-k9k z2V?#*i;wC5tR5J%9{T3%?wd+{ta%s0NAXB)aRFI;ta+EJfWYSoz+;bp1Qo3EREqqJ zL0n2UAKsp@%G2-gGY0Cluh)G>*2l*j{ltX>ZMymJ(T|c6f_^@GBJpuKMu zl!Aox&DXbN{ZyV(eF|IK(@OPxPTMZj%QaRh7jKoTh&vG_53}HdFj19pYcT4uYP*TK z)!LrX9krUqaeLGovVO1gNN<1u6e&)~tZ%;c=ZQ~+=Oo%$ys$j4GnC$|@SH@;D^Du= zo<0PhU%IsT6iVBLTZQcgHVA8l+j-jds%ur`h>e~Fj`L}EkK2#5kH^>`9JU|hX$nTh zU=*jkdkRJRW8m|0TR%J4y4@U{gTh&*==fhVXJ~NtGq+68v%xGqj%3IZk!RIf$`r*wi zd?@U|TbKFpW=7Gm13#JW7xm4zeb&^Y9||EQ+Lt(SL5F#&jlC@^mu?uhT$vArkW#mO zz3qz*J$%At5T5_L%Y2U8Efq}RM><`EtL0goU2pg66;L=A*8K2A}hZ)xNf{vU(HY5~>G@n;7SFgq77h z2$wLpP_~JI&vUMV_#|v%K0b;zG4T1esm+I^!+pZJk}_MxMn1DuKB(nOnW|Q+JOx)K zob48TgmlO$T$a*NXWTMXb=12Uwz}q%vvKoI$9iXA4EyHm_Kxr{7y+4?ZtG zbn(eBXZ$nY#>{*7apZGPhg0D5oy@~0VbkK+9ydM+QyRP~Fz?rj{~q|f6nMn=huYI} z%ri6XkKK?|9RY#Qk30Ht_#|y(UOxAZCqYHsx~gc=T9eI zK1*02bG0vZr(bAaZ~y06i%*dUH^#|R#K@#$xwL_?X9kQ6vnU4M*2VdBcyME!JVks= zI+jc4cFxU!krB(Y%DvbBg3ssL`kBiI@#sC&N8dZAa!d zY8ctrpON_}+Ke=sV>L-i1{h_AibWdOFx$V`BvuUvyA2? z^HI5Zk^T80;1T1WN}-frl8+1T-}~)uOK~3;@cEI$06s}Y|B`$3O3zSbRiqwxuYDw=WT#O=$(8eZBw6%O|Pv zLC@&oqpI;iJ@n0Yd@1mV{)_gI&Hb;WOVB1CXOCD>A2RT{p9SydDgM=Bd2@HxKp@k!dcEc;{eQMU&JpVpO+PjUn8){nXd z8hl7T!ampPRc(Jc2AcD6ympiw1cA@HCVoEa)r6+V!)JY9H8sTywZ8ezFAUUA!Y1b7 zqsC@VKu@f1zUvX251+fQ*UF8mxjz{2g^(|CK78)JKJ1ML=Kf&FFQ@2Z;PYKst9@Di z8~i{4?VsWW3R7}Fi_DDkvHUmAjblU%=Mfb6HY#4HXeE>*B<=9P=VcR%&z4rge~Gn2 zKR@SlWOVVLvf80~fX~lg1@ZB3Cr8=_^YKyC0)fw4rZ%5Oy`Hb`6^b_rU7}0y8IMM- z!FjuLCsA3e{4)5w_t4@4b5pA!(*Tn0FyX5pUcuqrsO}b){TZe=^kXRO;PdCeBgThP za1!p`5~_sY51D+BYq^8eAL^PQ@cD?OA3Gr$@kwHWg7~Otg7nSbzdo_`VGhuW2WjB&QT%1nXZq4&r`1g`6N+~<-aQPQBghAzyCgu-^=4CU5Q#m%DubqI;Ug2T*)a_I$&NIc7;88O13XijH z=?Wu6VVcZ&hbSWO|9kC6;Zt`qpg7MIQ-bI6#4BQ!Vd@I*n!+@h6OO==R{J9K^ZY6J zY-8k;2bK!*H)%__rR^3HS^*2?Z!(rp-l~cKpC2RraQkD936qX}s4l#H3IAS&doOOE zf9fiLPf{D?<)f+%f`0x4c!Yjj!K0E*<2fJr{~)+LGh_HFsUv;!>eI6}AIrZb85_HO z+5Rmm&o8dNY-00SQt~*<{`3`39*25BKO0vbK1pqlqaVRXRog@L0H5nqi;unkzNGeL zpV3yjeLc(OFwTBv7Epg~xlBF2` z44*SLhRTNS<@ErcXFGiC{nNw^Oz6jsdZ26r1E21ptsf7c1co}0kAj9;-+b@Gj(+y@ zc}&5|b**0J>WcqA!RKzPg(+BXWFUg3nwP%$zVDb=`muuZFKhkK3eKn}6^cn-4;&@9dOIh40ESFjZ@yLzN2sce^0R6m(3>`8>)+&j}wJ{RrQSfcfsc~rVx-= zf41}k2Zdtcp++8#95F99AHHhXX+LM&f-8qo7x=uu)(?*w#kR`zdU-ElEjS+@G>RSd zMx)+&!tz%2b$#>wkJ^01`k@sOAki&xK4SgQiU^RXyj6V}e7^STKG*uH#V5h&(6zp5 z@lkMeNb~pi{~YjG_Q&k9){*YBwu?l%#qxfZP;PahRi%(gr z@8uDEehC-nV)$+#eqwBONs+7##zd9EL2lY4m3D z2cMU*nSA26HPODp@JV<31E1-oj}Of+?Ch0RMl>6iNd7PrIe){=$DUvK@>iNE(fp9+ z7k|&*1)n!&eSEC>sYn^OPATIk@q_78ADz#<_LXjA;N!#}tCx#Kwp(t%CES^zQl7Nv zKNr_ez1Qimv)&lK;I@MBsNIiV-m1Q#Z+_rMAs=&nez#m+-zgp};bu6W;aRV@e$u&E zLV2qy0(^ebZeK{=-C&rz3pb!dx5W7%dG}a&{0K*&L?y1~%i!}z6QLhg+s&hWE$0Fs zHau&geW`tc&!0|ZJ{hK0wOu-!Qbn+JBomHz*?cnWbYPqzwqJ9AVCzUCtkwlSe|F{O zCXgJP-gZZZYDt*& z%|BV2fX`O3P^#ai@eZB%TBm&7iDDpbb=uw0W5|@tk9w`202aJ6)w{swDN~6L@4AxX zVHO|WajA<3>tlcVAF|++D{pNT^N6m+&sd5xJdN$V+0~oxMmWTi>m40+T8OU2&seIb z8|#k2=X)k@K0L>a^?uxcB|ba{Puy}={XOvcuTz;1KmMNlu53O$X+`q#R#gJ{yfiEG zfq|wZAxSTrk2s^7w78XhAAEiVcpUq~V~i%h9h(o2ftb9!Rh0lfzsfv(;*Oc+I>>xf z8#BxGAU;zUAKM37{#fM44j(JVU*sZE@dfbtHQ2g;m#aBh+(&c@>W#@`0)63 z_V~vPOp^3|Y(8dSlBDIW>ihcU2VckASG7>9n4?2$kZ8@q;@n$q&>r^9(V;a+v}R#( z%3IZUz~}YUz7VsQVgp!c65r0wpiQv>EHsJBTU7zz^JW(xGX_V}`?2|$F*uTzx2o@h z&s(yxewH+TcKA>Vo>kQ@`QHVfcM_kRus?PNu%soM_Qy`8m$bZ9eII;yd@YmDl4ci- znS2zVU6B9J2j4e!^s}Vdh0RCt*#+nG$G~H?uO*FsY(9#Qeu&S9{!Z|rcv_Xl&dx?Y zSJVpV>CIE`8{C`TTvCK{k@pdzn>(k)#sgo#tLa?D#eYTSgA<0W<1vZo#7I%Lc<}j*OBbISxsUPi8)Rh= zawCkn@d1+gbbCDd2ALK_+(@5|@!h=i0{HynDfqx5Te~{;RBV5v%xVYK2Ij4m27+^x z8J6zh;99F66!TIUK@YX|zzRu%^eHqIBQ|&lfe5QjXAXLZ9x%KZj4_j~j1jJ*$Es~| zc^!@}IxV)gHSUgEo!*ckeBvYRR(I5ku!9nGSJ7VspMP=)KKr|QG?kqq#m@8tieB#i zSqshNq(iYYO+dmC%0?2s%vF2?d_EU=?yHq6TA_knQwm^AOepIg_PLML7uJ&4=z^Z1JTVTWfnnN4g7V!vkHKf0oyEuYbg@jg z^Swon0{PgUF6wN^gU{DYX7NejtPsdYL1zW<`T9$X534n9=dsMXNE{*F=X}_3{8$Tp zdx+1c^&{Z(ys5>cm`8{}3QD!F?R zO!AP22uNPus!D)^@0GEbq1kI4+lAV0WK3Je@CXM74YoFHo%gVqVc2UO$L--+whh`yZalUv0Yjs_PL8WI^W)LBmT)dPIqHj(&fwStBKk$W1}#&~X!tvq(z zF(@jQ>x~+#<>9nJEOD}M?w**%M;o>c1c-Dp5FR0BKkXBCdZY1>4O_Gy50)+4g<p0bM` z^2}K~hiRb%KJPe`_#hfdiPnGY1=gpn7qtV7t1BXQQrO?s;*&eW{C$XpdPhMxZwa+< z&xIhe>3R)(-gznWDVJ{+@=WY6!;vGuEz<2(FegK3@rstaeX|e(lEtUl>)k=939-K{ z+*{HGV8Pvh&+kkmK83ngrIurA~NEd7u#)H2FjRT<#({;Y?O$F-N+U>-gS9)}wgS}KH(rp#xKXD|26 zL~oW_4_5H`00SShEs0?e_k&LMi{(U1>hoa`_o!1d(I@!)iL9SJtx%!>Qo_^jezuRu zWAGK{mO2VCOF!pEyNdu)qPwCF{Vvjd!dF}sQ>|})?4MmhKl!~1QZH|77#iju=3bcA zidnodb^g4M)XT^4uG?!p%6%?J=8BnGV`}jET(*EuoEe$JCjgK_Q!@R=Cw^NNe74}m za&re02R?hCnHVp5pJ>Q~kyhJ_0=VZEGbl{u9&G z3XZ6hHfj~}01^X4jEA_Rc&O12W7~+2gw!&^K4(}TCsg>fYuKmJvpN^}{4MFn!v{2y z?SFar@X?`?`(HOc`KgmYK4y;}^QR8tV{R43EO9ko({Ft08SvbRr3R9N*iOAU z?Pi`ca_@aaw5e5|}Fm}ZcI;-QlGoS8xBG0h+asf7xB`cpR_3kn6Zvr^3DXm54lp+V_K zmiSmuD43mziZy9(73*TDzrg1?hoSj!$4zYdz)&q19EIb90|@PYADqznWA6v{Ds8hg z@EHJ)WS?!(NbL(;3iVw{PYch?Nqj8PNbSoP^EtJL=D=q(0iWD%qgpN3%)y{F2g_9( z2w9u2Msi){Z63jOR<^Q9atjKE~S9s9w~pw(@j;j{(E6uX#}e-V6MN_?F0 zvt*wsM=UyZYYZL5N;~b#?$kw&Saep_7+M7@4L<+#(8b4uvJr1yVg29^A4;4nL9srd z$iLw8W55$iKT)dW@R_ZjDCMo{v&84p#mCpE>;{KKA)!an4j+$x6dOf@&yQzae4d37 zPn;9o?qAs9^K1m^SL86ay9)dn`24h6KW12)gg!0~A2X_rqCPI*^D?)7q~;XwXeH+^ z@v-I?svWJg^z*VSSNn47$L!M=&bPeMl=z6KNTzAwv;(*9Rnt43aJcFMCJIs0ED zW1!4xvvv2*zr^R#$0q}K!~L1Dp`35ee&ptpfxF?|RK&Q2cD z0I;}GKdY*;lBlZl_J9yM@ z{Ok7t&wV=uoMmDN7DWj557?HLEK2`Q8)unVx!Yp!l)8JtwhT3+^n=d_vfy(Qv#zqP zKTu=Xo2VAnkp!$zVmWN;AYW_@ST`~2>YbiGFl43iXwY8QF-`5V95!`~j!LkI&*4ly z2W7p1-hOF(4tl3!^!5sgiTF%r@)4$({N|ymNSK24vqPsb_*rJCN(oHh^FiXXSHNL` z2a>cXw}IYk9~&Fg)P`b_Ck@K-!z72ZM!_H3sY~ukoYj0PraBk;h_J*SpqG# zh6Ep89*2YetJ_09LWN8-|Fwxv*5+gBr`o_~7#{@1RghahgE2P4_#j9%1reV^n-BlJ zO*qLEBS9q9G#zt&yB?JE8Eq2Sm zF7t&shOT3b*kOC^N0~aTh|l2+J~9>V-(n6IDc$*^Xrn=VWGdXD)f_J7dd4WN(ME~S z#K%WO`LoVb9wgRJjYs*T;}SUH($7N; ztc+Ec)VYJ-Lu0Ir4NSu$H>?nB)ASMIL;7*?dD=CW(~4!BK8?|`MD1LBs63<7LvVG4 zT9&8>@tJ!0Y(Di#teCIixO+p#I#8Te=HkQ4LyBrv8=qn#rjne9&y|2O#-3pq z-R#k-VWO+nHJW#5#-3pq-JH{^p^_K)l(OJs3rAaykESIYPDo)NP4KBtynMvwU?SgB z7ay@Xn5eu}i4S}l%*&@#E+yJLMP*!ky1lNFo2OKd8&7+oPd`@nA>O^m``KN5tjt40 z>fYm(w?h4V+tjNc>jVn7)yP+5auS{|dcGPW0kMmZb@YVW>LG%z3r@oG<+L?OaROrK z=S@ES*e+!2G*ze25|J&ei;wNXy52-*);f++-U{{eX6Dt8IIM%SU;MzOmsY4-b@36g zTX6P^AGq|=3RT{!J_P-|dFtjP&N9&vFUd>FR8r{Ml=z4QJao28@{%%@6uP`seGq)! za_Ht`SsjxQZ(%00a%tUG6TG44ER#umEURM@;w{W%*6Xg@YQmYq6fTp2&s#6Oe1z4B z%tZc;U3`SqQHF`D9ygx$j;WW=Zb9FL4+}gx(85gpY#{5NP!9sL@m=}w`Z;T#o+0uY zJUY7RK zeOd5v;*-ExV+sAQ)=Q`~Z<(v@$ z>H$8bDfm>%`}t}NKe~V-`Ae1F-PS-gXN`c$fKLN>?yD6Gd&s5?Yh$-CzQ`IES*_DP zXFC+g#%^JIin(TTTHy25S>jWIr#u2BAhK~)t0OBct5nPNJbYN=5vq0JDc>3l*`6^l zkVv@8Dudpr1s_)Q_W{3CT3;eQSAl%;rF>P7?FjAXlkc_$rx%Lh!TRCTjS2B7HxPgX z!LSh|pjfy??itD27xFBhv8|SYAOXepBjlc;YWq53!F>2EEN&~S`T6Y9EX-Dxgp}}Y*zEL!({KEn z?gV_W7*;LV3~=;|TOTB?E^4{_UcSV==OfHpOD+aD`o*_Uk+iyFG+XCLbuac-Bd=?_ ze%yQTM88q~c;X|SLA`Uqs;^ZZNV z!{g4Iv3fITND+BRX!ye%k2`P1>dinbitzB(1AM-5D)C7S#{2^oA7#Oq^#Gr5XA+-! zK40JExzRW&FV4`SdA18{zm%!r50uIkrBAerJB5j6HKYycl=>l9l& zp~E8Cf6U8A+B#JyxF~O5-~D{%;)DIJsjw z96q`65i(k-dV$Y39D4ZVlBd^`^phJm6;7iIJ}&?stN*g?Gmj)eq1p=Qe_~vYeuRBC zH@_%YTf3`}r!V+?V>XaavIs7IK59mAAwE}ue8@u+QAcBjS@-iH56zPSV!9}GfzP*2 z0{PT%{A<6E+g%sA(!{KOZq7lu;(k6u9RIr8Zl0}+Txnuf-|kRiX8DULd#vC1i63Py zK6_gIcA0M=vs!t((1I-~MB>iOA^=sFj#P)X)NCE%PYxPVKQ`PA0%;aO1clWH63W?tMbQ;5homRZy$H)s$0 zJkX)tUnymmt=|NE{x|8zS~z8U4Ge#6gLh3#L9k8DPZ(~>;-AdbhZ^vn0d&)ak8gc7;SKrLR}+@Es;?8DDe);+a^)&Qhtt}#@WnKd@t5y+R;ELAUZ_l^*KhU) zU6Im!Rh3)9-vyr+0*|F1K6I8LiMX2Kn8>$d>PLh?;(6cYchz|eKHp9H;Z~=j;RMfe z?w#A`zR_2=XBQmw8^85b(vNTbInnIPJ^$kC&q~g|XngovJpYB+sbdIa`ftSYs&lmG z^k1g`hVzzEkHF_cjO)jmU)cUFiI!M>d~lRUMF%qQ`SZ(QK9Unh^q)L{kL1J=y}VU@ zL%;Ec4d9XI@Ad{^l6!La$Qy*`#09mu#z1$DB1vN?nIf^8zl0#~4K$oeUF^5Z8~!Bbu{P z^cJipruhP&mrD8}^XziVjC(-T;p6ekj9cJ}AcD_NWgY!w+<08_;aT;t7#->|`5;!N z+ig_{h$$l=;&UbOvGwC=TP7U7u^MK%J_?^SZ~lPK&rc*iZvB{>g;5NTsjgXk(w>je z+c@+aZ~9W;k@_#$f7Scx@XsClvSl~c@a5mDPlC^-#0Sn%h|E@H zwBXz*#C0CS%{zSVA~f589kqu|QWp8x&LUz+vv zN$eoy;iH&p94SCQUp;a7;H)whS;hKlt&W&C+u{|o`GsFcbolUPR_6~Y_DH|+wod~d zseLUc0*Av#?Fbyi=MsDnJ&F=k?;!LB{ewt0Fh#UNkALD2(^~^dP`!iGZ}bl$*$h!0 zoW}8wO2qUV@Az7wACVD_yB~rHZ)aOtje_&X;UhvtT`=yx3MRaj9T`Ik&aW1qcR!Z_ z5Um;>h{HP;oed7jjABPeR%oSTqtwtdtX``xn5T+7dM-MMyg`(BumB>Jp+H+hMgcj- zr>89nPtV8Nsao;SZ@l}t(_lUbvPCK)K&GB@Vv|0M2WKcgVuwuZ)YD@Gqi?l_*q5f4 z(qYKK=XsZ5`Iz>ozF(--7p&ih=hG*n#Fq_6ciY3!@abG-7Ao(%_nI2`e8VIJpL)4k z$KhoSGkQ3BEWXQ7d`7*&2up}Yngrvi8u+|`1@hUT#)WhabX-nQ$*@L=>{OUFvQT{H zwyz8ucZWC(wOVsTe5PJLyz}y!XODswoD2)a$E_cn??Hsk5nDULd9h;=_O3%&3w!NO z_<^bC_fPs1eE9ymyZ>^tKvX+3mI9B;A#$KE_`|XISUY6o)M;&Wlsm0ehJOK{Z@LQ7 z4{u&h=WY?haprx{%52@7-@(V?gELS-L<1yCKy>1esIBY*pKoDv`D6mnWUxC=ikp4g z%jL5=TLeDe3OoVhLm{9dM$Z@<;xwx1*2;0gUN7jxLh<1-ILN>CXkg%Yq1znIW`8f= zAwHLZd^nix4YLE|LmzfO6JsBKym0QJg8)8!cEP*@8VXFt2ZjDOM02~BH2i-He14Vm zV>P1aJ-NS3dL@95e}3T>%<39Za25Eg;PaY^TR-rif4JVL<}-WcTD^F{CmCp9m1@3z zJ726(TcfGi{6V6`hn=?{9gPQ=Z=Lssqt1nxt9of+-pZR>f6y8|*6Ls~W}OJPm~*x_)t}8jdk6ezB?y+q7=BI&w|hIWoPigPCOS6 z2Ze{>W)`9%L45cmi_b%aDN4~abS8c9dC$bhM?M5E9h9TPm!W1~Ui%~axJU=($TrhV zea#sTigyVjsZR2N;cnF2XepV?X;5uPh+uz_`oIAw~ zUd!-F$WC#_jk)#X@ZlQj_eKT+viji24B!!iNrw1j3r&>I)m(hgWZ7_J44P*%?qQ}V zBD?`U@1KHCrBDw&HxE;Mr9DdXv^-1|_-DZ9uP4Mu_=jVSxLm1Vf{w%~bl8j4YLSA2T6Gd zmpF|YO3gMOqdizR8Lh#`*L3`$Mb#b=69-a56*<2mkJG!vXDajY=qH|f?DGXa{ivlL z@_&B!-vN))z8w8v#Mr~UBmkiJk=H=Y&P?e^|>p^@jWj;Ga?N%AV0oa-qDPU{s zA-E-$8(6Nd=CwVXFf1Wg$bu$e^Ev4lkMt0I<8jje<6SKH{C!sD<7r>!a!=$?KsF!K z_E`4WTH;Y9+zs)$TF58*uQZ=e^zv5q4gJP@K90@d6MjYBU4vPC(y-?4zSH^7z~|!+ zWj?iru2os>pxUTVubx9XxxIW5^Ds&TKff&bU7OEve2N@1!;8VVPg|u;V_=-OI;y1a zM*sDmPXHdL|C-B(ug@=B+aONi_?T-ND_y~me^viu@cI9_dlxWSlB>S+0^(sLmH{s| z{??#m7hUzBzPcl6rmwJAsO;+M>YA?C)Z_N_Sa>_TyKZ;Qt*+{+$K2@wgge3l`q3UDsT<46(Gcf+-I9a+81XKXfRE(zW$^jm=EP^GRjD=O`!Aw8(>@*d z&(6gXeQKTeM@JSmZYI5miBnX=5BdG_+y0-&f#-(wo*LJaK+=?M((7sOA!CtWM|>78 zK09VB!8j!w@;q_)oLXaDC(q~;_&iZ?@o6`U<4!i(H9W-+%X(JPI;ttBa(dYLDa=(qTz-l>jbfvUC28V?sr?<250{^#)6vmrfJ`uL7N&MTz^AxS`Rq5!-4=Gd zQk)s5;+K3bMkllJ6j+|`>F7lSpY2PBPn)(#DI!y(oZ81SHfo!Vh!YuAI#|EU`Lk{r zr|y1~-}Xz7Ux5$L`f7J7#a%G+Cds)!6X&lzmi0B6_Sq^`-z38F+dlT+F2F|_uta!^ z;jGE*)0uA~rd8AdjPL^0WZ8Kqyye`HDd?Joy|3Z%ZStAPvkLR)2m+tA0{Apq6&%uS zR*k0FLLvqjF<%%j5CDm4m)h_n8bZKIonp8YHL!g?oPxo*HJ)A&Bik3o3k1--u{VJy z5j*Hy#IPAZ=-cN(*u2`3`?6?OB#O5P&Qyl3+Jtf;Q68aSn<<0Wj{`316;3I~uxbZ7dH@DA*t=x1o z>UIO)FTd?qUqgI0%`$o6fhX|NL~m?bC%pl~ljvi)ySV)L-#VUyPt&YZX+P4PD$WKm z*tkx`X`MIV=!)u6;o>6-&!hei8EvdnE9D&|UQ+sH z{D;Fwoa})5KO@*$cs!70fzm&eewqIV@i}+#Ve*qXzuV!%nThqI&mybNou1ts9-z~@+% zAFSW)oaKNI#C+M|LxH*a9smzM_hEjN`A?>@kdmImM~U*5@9Oh}_%ws~WEkar`Di%G z6Q4yepVXs#03Yo}dGL9z;NnA(m|Kl5=FC|h4xb6Cgc`@A879pdKEUU}+{Fi{4_-q* z96sc&(DzY({O|r7;hFTjjax!j0eZ+RgJKJd6RH}dId*t_svq`}|d^BnL<{ZckgGx-t)ANRsb10bGT z1fOqo@wt}Og<(iOTCXmceg5gWB0ozzgRs%cYq8I36nvI;24SO>*K&VeWBKv#{d84+ zL>UQ>m?i#G@DbG`5;u^O4}ecel^-^q-*TTHM;4kY0<`$K-m#3e;KRoATQt1qq{9hs zqx|^yeTB-$_CBXxcZshTd~ENtcIz(D5AfMsK*3z|=jiey;^Tq;didm_)JWjHhP?qk z?*bmlNAdn-q9phz-XGM@V)c*0ZYOu|l^_47Z=ZwDcB@n`?ij4Nx>$ut2Ysti!+Dle zOe&V`p-yg(d&8qsUhq-pRT%iZW8vW=!xAzqR#iSSBtgT)YWeYxGXD>c*Vt}V=%5tZ zWqxz(%~^zf69GQQ27*)QpcL#fH)p*uN=o2ZFpOlA>BXRDtaV1?aqn4#tejXlN9DxA zWo2v7KkDNW@yXZ%o8@LhK3jhL-+mA27gCd((!C)S%ho!34?fAg7RSBwKLzd$_&0w2 zb;M`8N(tOI4Ssw<)9fHfN@6PRRO(bEG6AP;I$VgYO!@f*=hhTqQu;l>XBT*EK2-6C z6T(8`?tX^uc==!<@w}FNp7@M^Nbiz)ZGP&ha(V2hMFdAFRYFe&vuS@gnoV>&i@E&7>t7E%oDX~%wh1-_ zD!XD7Gv2rHJ`PSp8vG`XO$$%kWqjbvIOlO>3M!{al!));rJ@c|XZ2x6r$+zW;PZw8 z_-r-WCJk6y6*H>zNOYsPVrw+9(BAMElbJ^~omXJni zZX(`JP7;&CT9%*ZdpP6EI;5aGw5rQgk@zfJeE9U)K}2NgsFBU*LP(vXF&~~rl9J#@ zC?Ab9417w3P(I249~Ta=Me@-^!^%&*`|ZGUqmC09tCU`udPeA$r8-Vz#NkW2FTY@) z-~IN5FP|)&R+s!BfPN35VwehMdN_PeM-Pm% zo~|V(z-M#?K-C6T3~aO1o+++vBdQMRJ9wGxmOi>pD>Do{wme0gNKhTY>CU!FhkT}J zA3}6*JUT-X58h<^rH|&_eGfk8b3Z=HOgi!41<8xc$C+T~(=LI}b4ECyR0b#=pZwV+ z@Ohs2FtdYARF{t73@f%5mSTcfB(rmGHgxHjBP`lX5Q)!aAfNOT#0Wn5R3@Jh349(L zMDt0kV=?&TPnmrAeBE~f&y9A|Ebo*$o$K0QZl7Bxr@iS^_YLOq6JPh8ivT`p#x3y^ z`0&w2r$islbALXGbxLhFfR7eBrS_xz#Mkd#`SWqDEsBpW$_76BMhy8$s%&xi=%8%nC%*N6zXTs%fmT^mY=_6EhAJ1{EcU1Z ztui6Z*k084y;|>s&%e0x;Zv!%J7ygR8A!tuJ9U}es+UiHh@)JGILJWGoO7SL%=!U7 z|M~!YHqBPGw6DmIh^})KE5YuUVQgCCLGMD5AAPn=U7`p+KLb3=d=lDYSEKmD%SRV` z?Cl5m{M>^0$f|Ya7INVIHVPd{@;2i0_ol2`cQQr}ytW&K;Pdl^a6YOev6L`l*KF^Y z;n{M*eDsne;&TYUq5)#9n z>Y{l6I!>-`^Io{+2jH9f* zLmi3SijacOZ_QOcEgan4LIEI~%{HK=+2OekK6%hY@cC^+;R7E$oMMXfKy z4GMn}!AFWs9vFo6g!o)4eC*@`e?*}g<^Gl=d{(47?i&vH3w(a(ATA#{mnKl*kGOoK zAzGo03F`^?{4Vf#?6X_N5{Qlbst7(3i&Qip`>$@fBaglJK#I_8e*#?SFmi@4pnAPkaqa z!6%2>#rZ4}@rg5fC9^-a{N!Oe=%-(wFk^eLYFH{h zIn*%XQ%J}sj)tY?lfUun4-OD`5Vi?=aY?4K8c1c zNhlqk{An8TxeVnal`3ZBAxD&)Pd+sbd_J`x`Pha=%nsLaWbV3M)UH;kVslx-ijpc1 zAFozP7}9rW&xq9h%Dydpr&4<9}zb+_z9MxiIZt^;jRBzot!S(ocjs+i8wHO+zfXHe^dP*L* zdCm15Ym72V1`N-c)qgQO2J2xEBR*FOA43#^&KBNl2tLNKT?qPNbmmp~4)FQBxyq;A zK@=2%el{DGDheO+fI4Zt%U!chwbmnlSjlHHMcfl2r4C2^K}62#tI)m7O@hzo1CO-N zI25E(E8Xg}N@jD{xyOR4h89-^=?fOEk5PbXR4-YtktZEWt=i)ie#Pq+pq5v z;PZtAn-3zwTR0;G1+w{MhY;0i#l?f`6MZT1L73baXN0g=1U`cyNJH8qwTvScU~jCw z)tDYm&e^iVz8i#pVOn+l_p5qfn#dT`tuCvR?Lp8%i7 z4!{Q*RV^Vd)z_85S6+$o3u=8FiGp+sQT=cgYlG{63O;WEo@GAqU4_24;pLNeKcR0Y z@FMu!DfsYl)}M*QMt+SSpZs|*Tlu`2=L?^wwKJ&+84gZL~IKGFc$n>r4VIJ<(*mo4#O{tIL6Ed=Ck z7`1V5Y;W@DYMd6%ZALx=pJBm|4+#$aptiUqqhl<~j}#ldEH(Iy=6-z0HI^!~fd3>* z@*}ZGP+X!^hgc4cIQX0!etaN5uDpVT0<^?OV#y(d5G^A|6f3Twyv-V&cM`;%i_#OF%oBkhhO zKi`7<`2KG2`w^ZbFzr)D;B6eEvQ z5bh4!3tcn!n~j*hJe!a0JhaYn9xRVh5H1g&d&%QV;PchM z6`t>|?xEsXR99kLGnyu8GR`NBLJ zmP~+__{i}+=RiC{1U_F=Q27MdpQN)P$tSh_$;TXt_%u~MVfH7%K8EBI%l_npg-3MQ zpTy^2iBF*YiA`vUPZIl+BV5Gie2Gtx{fUid*}ocUe{uxGBSggKO63z^fADgm)rORx zRQ5-HXyt1V;`55iC&>OtK=CZKP?E3wWIM1>@sue-aT|;uF{Y1k!^=k_m2&lk_$&hWq!1g~wjZDTt|H`Z zFFgUDUz>vu9Zlq;UF2$A`b?Y`TKyv&1S=-biMLDa6WrGm5zdm&xI@9d2rgFDwzE2vYMcQ5c_1`^6Fnrd@iH;Y<4@HL}9+E_#Dor)0{>B5}&JR zJ~gvdHe1NNi2IAB;&Wz=PblS}7x!CU{p-NzT2K4)@q=hSDeX@}KKZmiAD>6_Noju)^2w?FSp@OPY=3>At%#cHBN6AuaCA7~ON5Ae|7wKEG0s@*{#X?G!oD*%bfUd`6Tl zxbCFLrD$ojA0a+hl23cDL`hC-?$rip?KJA8{g|OYtV5j7t z7fBji5hn2Y&AH^m^o!EsgaA1X)^4fQY0&AGW~`HS75yRs($>ogKEGwS`MB&4ndmg@ zc!kfB{n2S14?e%M0H6I*wTgq6-HPS2Wf9jCwBH*Ha004Zv+})F;j6*m^Sf6;d@_vh zzI-%vl7Y|f8)1CXC>MT-Kt4Jt7y1D{pEv*?U;j_J+U>wC-EG?etgN=eK2yS|p@-Wa zkN@XnFq^_HJ)8K7axPy4pFaQ|o6mOZ8Dp!|UavRmTV|!Tk0%{e$ZiyEKtvJg@t8!$ zCU5$7gzK4UMDI(a~st3IcfoMSRFV zZ_7`mioyLYk3IGVWdZjh^C)C;EV)1IC*bp6=0SYOGAnGzPBS#93l#MT<|ECp z3x>4Ks|Mof82J22!RCWy7wzvLs%4MaX|bA51o6>Iem>2Rd|dGfm))fY06zs?}l9B{neZ zv7HA(rx2vQQ`D*hnWYU376+|Qh$i@)7J~Uum75fF3?q<_%p=r8$B55WFdxo_C~-+* zRixsGYakzqMS`*=N-cCFFIxE;AAI_AA3m~-OUgPx+4$sVehog)8ex27J&t6nj6gn8 zZ1Te(Odp8PWf&hjRV5jn5c^|mlOA-)|NNo*2Vs0~7|d3_ioT4<8$gq~c5wjYf-|coo)a$oM6($bsHx zen_@EAF>5Lqf3Vm>OQ0Dyt4U~YI6tZZ23XmXH=aRE7Uv{W(og!<-6c>egHn3-DF z=D5KvXgr)xK6FPg2>X}#EW+hS>@UYN%=VWB^N|K9KSm}55crG>;KTeshHZYl#yJrg z{$&C7M=v<&3h}uDpCY1I=>PEQRn3Z$V?f$PQ5>96Yz-?SiscCZUpLMNR^O9jpizJ? z_)O+ueB=n9Oo#&cXeL46GrbJslgc_9IDaQZe)KWVq+hcG@Y&mGpzI;HLj1TCF_tl3 zaiZESWP$chM<{vtfV|N5Q7K}q*T?rv{Q#fmfhU|#ib-M^ADw1E#Agw~$5AmN8t)Os zM;8?Xp9clT$GyKJ{m;_Bnv7E5`cy8vw3z;oem%Gf;ltgl4x6|Gd>|jizp8@_fzMZS z`B}El*gr)iS+%sUnCiHu$WOnxwI+`k0?GzGM6u>S9teEiWia{i;X`|>hz1gn94U&n zLCN{#Q<{j+WlTQK3=xyOh(|I$`JFU^&tE-Yd`MR0%!fh^owKU>dQ2ugMWA7CGyCJr zd}Mf`dQdgboy)hw9(?A&W3Nv``6O}$#pjdn*%0wr1oK%s&&koTa4Cw3J9*h=1b6DVh)DlW%o$z}U~g=U*6peAGIKi7Yd3IFOHfxXQh|p7>lw^GU2_ ziTLDL-@xZz9z^p=tYxwJ0s3#zN-iO!7u>HJ(+ ze&XBT06aHvCU>V>1ELZbC9E~(*PD88GM$}?QY22dK8ErbVS!0Zw-L)pCeK|RlMg|IZd|^yS z2Kl9F)0q`8iP7GwE&-p`tS$}1|3v)2IwMpwQ%^%?NtH1+3&5~H0UPqVA z!8r;8=>I5h%pT?P)7&v{ZB{yM+YDvw8u*-Aw>JAzjcQ5wbs2oV1Ybv7Cr z#nWDI+#l)334Bf=J~vFp=Sj1;n_YH@6X)}!b#z}pChG4DKL6POpG~ul{~7fzrira? zy|^RH^jfpgMv7(9$EG#J|BU)JQ%+od;yamwzs@fdRokTGTs;KHw=%y zmYD-6Z@lmU;!`x6Z^W{~H8sqFsA!$P5z7ibHO$h`ia!fJKYHom15GOKv?}dR4Li#z zk*R#3P{q@6e=NHpsAwOY-CPZQ$f(l797 zmP}fT*~dDhkRcDBX3rX8DYnTEkkLqn${R1fmdVdG%n4Ofq4S(j?FaZg$>hfwy0UCr z$fnuQ^`~Hi2YhM<=@;W8++(tvsh==D(mj^CnU#MKe3}Kur%`NFO4K2#H!w! z20nM^E5+Zvx~=xPO@uV?ZjOi!m_l8YnjL|U4j z&d4GchprQ8@c9OVb(JTuD^ZOpRl z2l)I1m!HONrF5%E21r=uwMxBM?QWF}1ajEfV1fQr*`WQTh`_>r?^clvkoP^E{sx~v znUnmm2@mFlG)q+wHsTNZY}jqxcJaXX_L`$9|85cJ;3J==3YL|j#ujLxcE35 zU#Z($)d$NPU-P`=bA8hvZkLGyG_%|1+e>gwfG# zU}b1$)gLWyyzAqPPYa=S)k^)1h9^@u-dcJ_?Y3+IE;80%6y{yFGIa{79C3)f@CrPIZf5B|Zl(K76MKjzhl7lYq5)sl$iw^oYG2^%@2_*hfmAuPx{=4D0jScAj9twe7+WVZ2NP^ogTMtpn_c$NppAMfiCSQ z=@sXL_G}l~PR|-3TkbCH3|=H@XvNop&wJ+J!^FjvJ7~KRWytg6kG+8lmGGh=Svz>= zldVVv97^kULY`Se6!xsV9o*3B`TT2=A1A-NQ9~vn;(&N9UjZYj%S^_H7e{BvB*ZDP zyds%kqKnI{yz#d`ljNsvcDgMTCh?P_G>I5>YdRZS11vBUL17sfjv(3F@JsTPrwlsq z`8wjW4gbb&$#!vMn>sU%*anyTJ=?{xVjaoy_2rG1Uc>n`P-MQ^Xf&^9e|dX68cmJr zXmqao{_;y-x?u9dgXv`SVMZAO9~Mlfcl06oH(n|N5A=)ksa5KgTg)^|Ibu6uN%xoq zpIU#|zr{?mo^k{}m4d>Dk6NfRfhdX+gm%d{^nqR{SqG$W~b&3CcHhthdB^M zn4OwJn6SN7T@F59HD~hEtx&CS_<<0Oz*Q{E@4h{r^{3Qc|8U&H;)2{kk7j9bMS#HP z6Ty5?>ShZ$2RN07uj=vSrm<&Mk^2;3zJ2-dQa8sIj5UJF*t#B1s383VYk>Tx9NMb# z#{0g=kn)3|`MW5DDCH;Vs>}J2IiDSC{5%RF%1Ujzth$_jfX^2fB%c-%-ytJxy)A<4 z(#8L9J}o4^LqhZ`%O(MV&zH<4pS_(*t9mUXJm<4_+8+;cs5P?tkx#(qO@`n@X(<`& zB8e|#d?+nN)4E8aAK){S^0SY;(`yP4xA|xvAPzo12JyL3ZdA8W;IG^=cTD6pWu8Nq zM}uP&_$!aCQ%lFZCP#kWKO*@7m^$(Yug#VM%y2ZYuB|g@Y+j9 zzvv7ZIaSVr%I3po%zO(B6CwRzl{Y@{CR@LnrCV&>kl8j9dGI;!-9q^~t!F>vKm0)7 z)-S=w-UZ|GmG-|1AHJoNwxJb&4tz$oehEGuC|K2~QLr5kM@(~z3ZD*Eq64FbU^@-N z5y6L@Us*<&$TDs?QtxCvTki+5VGH%mCpd!3&F%C3oiVe8JxA9bsdPpAyEdvg4=n7T^j#7X}QF%E#DhbdhGssbC+KPL_*D#gR0X4}!^N zNV7z?r5;S2UXcCy;KxY6cvQqe^i~xRt^6%bqZ^WJ}#5$M_EeKHpQ2d{}w)Yf19v zd{}vPt&5C;&wrSE`D8n~fPr`M(evm6)(`Oc72vVwha^Ak9hBeL!gQI3x`?gjWYZse zrcCnFK1KPBV@#J>sEZ!69>?gFH@;=hk)L*r4tU7yJVf(ce%fbrz=P^MbfPo)d+%^v znZGdK^2Xo$u1m-dJ0A@{F0WPR@JsuQosWk0c&)m$t*riY;Ip9o6n=i3*~KA*bz8GC z-5ftZ&Ya;8LULn}vqQ@V@i~xuunUI5wAaxQ&gc5tyN-KG(+2T5_wvc$Hx&2a;-jJ8 zQ1pZN%mesv?~k1i53h8RQ!)JbaGP!C!(+OrLv}Oxd^hme_NUxvY#G}qi4Bj2Aeio7 zo6l%;Y;2<>_V8HKeGKBWkbIEN)an?W2IV#32^Sz%Q4{kaDm)SU!TBJoX*{LlJoytY zK&+xB=0j9?BKHsC^GXmOB4Y3Mm?$~noFO7yiNW3TR`m|} zd{4pQg9P1uR1>Y^uvnz(W_Z|qSQX9oY>2~R^-R?TpMTEyAhOQ5Tj}7Oy#!T^g8}X$ zvd*~MpIT?w#?kmu-uT{6xg`0y#@ZW%L+3M_)SnZ4UPJR|Zi=ttWGmh}H^ur>0-wzS z_*5&~A}kz7;4#j5DMfvC)W8;jAtVMVuT@`AM>E+IkDjL*XQ7a!*V;f z8n&q`fQioEqF6*vshXlXXdaUiR>D*IK7i;>jN> zA9t;FDkZMZ1AP95;pKx`;ww2CKU52GKU)<$7aywicfqUJ>0_B4J_`>YF&D}b z$g1)Yb0HlASxJ7#|D)O;!G~%E80}^QPLIT6ydNK`6=1Z_M{s&*Gsc6@dkRZ@kgL0I zWHQihKFHO*(9=MJ&->>dK71{Yh*hLI8Ew?(ZzUhYR6cwyk0@TGIvMTfX4Y7rF${dZ z?b4UeO?Da&b3Ui2tFX+6R`IkS&)m=2R8|0=1@O4+kFqK?YDn+iPch7^d}`Ksa(af! zV<00^SCs2Dl_8M3B%?i@bK&lUWzO!7jeDHhi6~#Qe@(NUH%21UnqF^*txo{(+r%W&G}h!?9C8o zsmjOB)#axdC;>q1v?BIuNJA?I2|mAc>EWYpn5Ok`sC?87Qyp?H)Sm==eie9J<5vfz z+A1aN!(*CAhsmwA=vY*ioD%zow)vcq|FDBnZT%kh;W16r?Jzm;`Gmpvpem%DN?^Nv zI1$8vp)#3@G}iN)g{qKtMw5v0W$8Ma1t69;zVFXn5+9`Kl#sS;4_#{s#bt?E4JkT3 zq%EIo9$*JPUjRJdgSvI?J(MSTMs9mi=0)20a8|dj{Q%`jUNoft8JQO{O;D3kH{kOX z2gIjaYFFhRbM~?awOMa6K*}lK+sWSEl7K#>7r@87l6;UMir^>;k%DQy8C@kR_$E1@ zb>kSpQO7uXnW}Bk3T#Lj-_97%FXLNd$W=w5XhZ|s1*Yh7iygi)i9IX?-%!eTz-Q-@ z@u_wyd`iepkxSW}{8()^nD+UUQ0r6Vz=xe*Q0sz^orHTGHGmi&J26@38bIK~&M#o~ zjZ30XI6(1gF4$vcx`rI@WZK5+8}~%LaDd{~Tv!6CN86;#P~v@*f>1o>h0jiFx-A5nlK_+ydpxcGJ0 zF7kF#UjROL4@iD?-iW*}*Um*+*9N9|WIYo`cU@9>eh#;t&J5 za;G@NbSE(L_UuCpxYNhZ^w#Sj=JLkhf6XQMASN7VQ{#-!*1q9lklcJ&9i$V05dIC% zid58X%O4@|`6iN|Mzd3?b!%`$&_OCCl%a2$(5KXo#`&~=HakOmBtkuup>J9^pGdn0 z_U(H6dv|oxdk83~Z^{~RQ4;8lUisKivk0xr4 z&U+WVIPI-UG0Pi2@J`~>Y&EvJ#g6foFHO6maQC1&9v#n)rqrI+5AgYR;JHEj7AWm^ zJ#avTwngDA)uIMl*bfLkzq%V7I04&ivl$d2!&r zMmtKxXW`~!&w{dHvgL=lpv5dmlPzV}ZwNj=Klk!sRw!lQhTXctN3)0x7ars{{m|zb zZa(YC7wjOrDbvHnr!$%&3`old_3la1w1ny(-IDD8hn1^iujaD zZRXWLeI#Op?LwRf?+QUo)Wue>H(_3lo`sMEWH(KxI1iqOAlmu5iP~G$#pR73ejV`K z;K9zf}g#|ln9hty?5%76(F8J_-rfZq6Fg`q?=~~)bq5S-m? z2ju5tmqLC7AEQIn`o&^C)kMg}$LLVCezBO>YQ9+B_>oTo9_HUb;8>=d3&EWJB|iE` zXVUofBU^Lu@l+LkBNB8G1C-t!cqGH$es(ybEpGRDLoGNu$ zdgJhMs?=p^Z&jZqK1~;&;ttA_R4J{}*eX>k<(=fo7wC+`r+A9;Bm+vTG`4z!{>iBZ zRS%tCgZP}g_+We8zzK46;ts{II+rGT=J2T^>vS7#dOL6AIC_oJa%IPf&nqrIBHnqwQSM^MPx)~8u(U74Pw$UTW*G7{dw|a; z6Q&-l{%Ed|q<#k=3>Mp=d_JgD>okIDBMw6FYZD zwY@cHpM_^=~W5S}5{5LxA!9hrjg3>|H<+IR5zL*Tg~YT77A%VhFviJCu0Bh-$% zH?Z#aC;~{oeZum_|M+&2AHhes5Yt&5hrHk;T!=dEeNk_~=Swfa2eY1T5i=j2{qLq_ zFUeq9OVw(lTt;wWx~B&h8;K7o}FViL%k*krr7s4FK_(QS4e)Yqh^Bo1!7P_u(PPYs=lqw^YX?& zJCl5{q{z0b>~x#)!IDDj&1H0_yz$XT3rv1!OZgfU%SNQ}ma_9B+k9yL%(vW&ShMpZ z^)jT!vGzHiBAm|rWIYX2<5)i+KPODTTCzBRi(lG5I%5yY|pxP4}jSGN9QrDym86uw@`Y4xd|J@ zk2B9wnvJQ#9?}b}L)ajG#5=FeW{c9QbS2IAg3n)`L%(bw*B0Pn1JU|yPvQwa9|9hm zPr1|*1_(-#<$z(%=cG4Ac9YHr41>=vIP&A-gPcLWTD@uKH7Aizl@H6R0TFh-C;mmrUNYIGI(!~dkP`fk zUOp(hwr~$BdO&!4g z7kp!5>%ld(x2k5q=P%lPr1TUUjol2Q!@5F=l|p)sMx*;0iVo`s_^|p>EO52is8;CY z%5sbiiijVS#rK|%27NlY@62eEt*&5cteVzm#SBI=mca zF{6g@W&49u#D%XLM`PkXUJ2 zohT8XOD~_>kefRGmqCD*_#9e8{7)w|Y%m3%A0R%Pt;SwmP5WJts4(c zN7yRl<^6OU@4@G13*aN-du{h&W(ji7fAp0g@cDOhg%7~6G0Sggr@|9kTUFEzh<^C- zVS`uT2)}MjN5{tl?8WElQNHsxw7M z7D8RJ1S}Wr!*-PE;Iq~k@h({!U4dYM&zBXzXK8(5kL+nlDf1tfM(G9#e14SVM_s== z3k(x6(MV154ECH^_>Z|fzVii!g?K4;kc!`M&*6nP{E*)TpWmE=j~KtC(MgYn^+4EX zwpXer8rF~Uoi{v2^5fF4Yk;Kamui@G6CbSK-_R(y`H*@kt5Y%3$I&WB_?P%7>r)?t z*VfWhC8ws<=&XW3Xzn1 zJVWmC1Yw3lL<1S^AzaZDnSmjPx5x%pbV2#f>HC1^1~!&kl}-awK(mPdHd@A0I1$X; zVyTu8s8YF{=nk>5Jnm0NnDkjk`0r?JJckp(tYaK2XK{t=otz|UaWxkcpJq%xH*pkU z$LuuX+-W>M6fSj+;%(DWoHkc+4e?pT=Cg^qK)c%}&TB-K*3d1D_%oU zAgGLK*E&rYU5xzhHx?hn-7)1_=QGAU3KICd|B*;Ow){AVHe0()#e&2K1^r^!=P*7w z5hUVMh~&fNXA`v%+v{UFobHfPJ>!(gjui5g$hw?lc z-laSCe*t{HZSKgAzBEK96zVEuLap>tpPX*aqpM7ora@|5D?&u224jS=nAL)w!j58`vV z%m<57E;=`jbexv>U{UJWVtqJ)&$lb%yQ5!{5b2r22m^X7WsA!v@5%-~-vRLv>r-K$ zB^{jkRe=vN@gtlM+gDB8<|;lwe3~JAhz`w|9SMqem%)694WIGopk?6mcjj(B#hnVG z59{$b1YPCgb9CBA{GpEJMQQx{u1kMDa)1wF5caw+9}TBK;IsIMKcD1Er2q@$qk~c@ z{Q#fu_Shd=zZ4Ul)cBx%KR!AR@x@&Fv?X?&A;7N4it|?@rUi)mXyoA`~eNCbEgZM0baPNUUf`+`LiIhL&k>Xu-Kux(b`=6Z354x(9)+%GJi z_6OE;vp$OnpN_^;wlR2QA<1$~(TF_9+Zs$Pr1zZCVKjWrk+)q#@cFRe;bSZVLR3>D zl8Gvp@%b>jEf5VyM!pDv&p#mfDb<+-ZJA;PPrEqW1%yblTl8j$cAcH;+AIP_WuiVW z4YBANSjYBeGV?&o|9sgvvPGsh*5c z1fP>}uQ$|z5cvG4;pT(r7SLhEwvsVXe`_B;8cB`Q6XJ93<|E{19nmg{PZks8FF*R3AmZ~8Hy^Cg`RFD+Z2bC3m;9_7Pj<1VJUW&IjBHB z%$=dnCK&j9>=E#3H?})0c3%27JOYvM&-Uovl*P_#9$`m(9&_`_s97AO40 z9Ju*p)T}^0nrW89hwP7{UqXIZ1fsLGoNO1vM}9nf^bjY=&#x_(`3MgOhnV8O9zJ6I zo}#7Iegu3z?y=7#KQ^CaGNbmypN}3gqxOUN6x{M7{TrzVcYJ|AACHQuHa)Y42PJMfGj)rr z`5^fG_G93qjPIyWl7$TKhsXYCNQU5D$h|kMmeA20x|NKzHQ4DUfgwCYs11y%f86u}VwD33=oDyq@z-Qzo8 z)6p4~LE;g-G>x9p4)mmFO-4g@g2-rSSjN$4JfuP#DD6L{u!faCz~@sg!KYSwx>7Xj zhAU*A(Mn~0j9Tx7{*g7rf#xD&{>(bI!{$#Mc2bm7d`?8YhanHKCx(az>7PH0{2%an z%|R%iwF-`oMb{Yym_&D;Tf+34TDU|5Qn_F+D*x#brs%(-g zCDue|qYR22sF((m!NSIHy;!!wH)PpRY8eeuc`9Sfa)H2Em7&{q8S6 zVhNHrB(DB}&%u@C<6fSoTw1X9ZTsUaPnr1WXQ{#Gy$kR`ned%Pn-*L=^CH{svSHM% zqtnrZD;3YZ&@VQMerWJ{|J;v{OUjb};ln3+BRTj4_acLLvDj(7>$7ia=vQ++p#OKPzM{ZkWLZtGs^-D`c;PV|MKjlVitI{#o zu}W`Z%1pE69kbePvU(*cA{EM`@o|5O;x&`eIZD@Hj(l`#4QA(dy%KFA6^IYXPYgbZ zR4o=C4O9($zH=VM#~$*NIDjJf$RR)PVbI|7cZ^tkq{)i)Bo3cg4XyYh@L2#)xPCcP zqSzu7!AG4EgILR{HlETOF{$yxxRUZYPA2Qs0xRWDejckRFaB^r_Bb)e~ zFY`$;qPu_LB|h4W=<9S#@PV6;s$Yrb z%(78DQTVv^OM^MH+7Iyg$Hd1uZO~J|g)}WiedDL(X@da;Zq%E|@pJI`C%_ZTCqYcF z+RZ>d`8pm>y=C9$#Ao5+L+ZuMv+wnBUv&6z^L6QDOy?l-| zJpzOHyyW4d1dXLG5D>seia|Ipd-*)yJLyfWLu@rC zX=U}-g3pi7!}#1xXN*Gl&-PDc5I!V3`8;gdryt<+FXxb-ZSzTNFS}hoS!U7O*0b1N zUUvQHHjf6MpINy0*n?IUIQSh@KGLmH6L>K(hT(BlvA`PoMlJ`~E5osNOv zc+Q6cS@fmD^y_C26h35qu9FYh9=-WI`+0NXQ+ehYsa^=9ZzGjz~HXQzVf z<`|jXTyL8F7hgnl6kkRl4P)Fpw@_!^;O^(~=*8Y}-I$*Ck%^p-MD74}FW-6g^Ine0 zr??v*P+UGo_wx-1eEyuV#AmxzGg_r~=l1qCGI-jC**4(&=OEad;+8;y?HH&_#tsyC(==Qt7UUXG9vpUWjaT(yXh*=?Ei4#gvGn<$#F z-DqsZ(zQfBp-Shd=G*tliKYS6wZpHFLKh=|WiG5I91LX3~t zKJE1;Jyw2W8BgBr5b^m)Og>4hP+UHFw?o9|Dkh&KRwyo?yxSr0dAB<9*1uPiL=r^M$#$dA}wW_e9EpCm>{X+DHc-c63u3+5kt{dpLlBt}PR zIfPH%O^(tF&SxIRCyCKfS`Oipcax*^g7H}^2tFcpC(VN0)vCSza#DBnS+INh0sFJ& zn*UIMLI9u81-qwRCO@(GjYBlPW^-P=03ZL`d`FU@QD)V;o zj#k^u;Pa*fg^z>ByT^Px(FSuGK6xD2h>tpdj>ji@P)oxnj{_U{tV4Xb|A*{zJV247 zS{goi9N37DoImsO#tNbI+syua^xI}8J_|>FD78}IlYX1opO1dq%;0mYkdRNDaXty3 zJdg0;bK7wE*s0x(&Q7U?kVBL*CTXH9U3WlPWGI=!hlPiarl-9z0uE8vS*!7$_*^P{ zSoD0o(Q#`Thr?sZDCRDD(Rv+x@-W_m&ts1y=7ZqGa(Haeb@0ivsy3LiGS)8@On?XFA06ZkywNHRWgmZv;E z%JDz5-S=30ax`|4I1!%%e?F=APO04}`OzqTYy)_l@!iocyMk2G%mShl%}1Ak4Dz$l zjNp@I7>kl0T?R7Zvxwl6W*Cd&qsu@>d;;b_Y7801vSgEKg^x3T4v3M;vn>LjI}jho z{y6Ik51(X{=_GveY>SAGYkUvm6KiB&vd@w7lV@EcK1MA0NoJ*!$WNYaHSsB^eC+W( zTlAR!_^xUE*qy6jKFN#@S1tBmMSkS^!se4_bEEWtKZX3f{jp#^$&^ZIJ%mr5)k*1r z!)G4MC(YD{H5|ex&*s>V&zBWcKK!gEwt#o7EY>P@xSl&^v0<=UCXX&P<$6Kk!_R8+ zF5orMGU(TrU4`+HV|p?`aroqHsqXrH#UJ4Fb8P7@X((3%L0 z0Av3PKDEmrK2ml(AByp(FCQHQ#?v?O=@!6eZ>Q2Jam(T|JD$s9|G9VCpZ1J%UL{FS z(P4chK36_`GKdaq*ULvk(Gh(9Ch*)Sn$>3-b(}ni5Id1%NmFTd^aVG~?M|yv+iX-T z5knViw`dJs91U^upppUR9Hhc7(c(XlpTD{A@UfQ|9FxJ|bhk=4)I2pGIfb@U<+MP< zZW4UH!2lmNr!iWMYPIbU;V*8&GPUE#jM{5HzK=(Pfo28y)lT4f`Of`6a|J$Bik9d& zhZA`&O!#N%9rw@96SuPZ4}i~W z4KJU~Qfd2kv(eg3e}5i6hrQlC@~+-Sz5esj_fSe{aSZ&yKlBCMa&*eWUj1H@aA@_5>dUj1>57^ zJ|)rNYPPQIG&7g|xPOGXq=hUNV{JG>Rm5?>cbs%Ycy&mF&&~n(>^15}2mfEg6a4!q zh4g{}{^6B;dEgy(oA~)I5xM!V-6GC!cIU9Vw>k53fy0vDtQZM5G_zrluEDtAz60Bhc z$AfHZc6L7K4^MCa{1h~Ls6rb{ay-qf{Lk{8!R7(@bj+GrEg40#-9|KT6${non_F)- z*7)mfqlBm^^OFzPcteGEaoz6Bb);h>bR8b zexd|@t@i>mISwqZOFHli8p1e!#fS zO%Il5FwD&GBy9a*km26}KJPgIAKTgN7@dq3y2wfzC$iAb;YjOI-Yhiue3bENwCa$d zDy=X&9X5oq`Jst}LSVGnWRafZ5Ug0f=;CNRgl!qnvVA%Y8p2FEHsl?=bsQgZjw7*h zCQ0D)≤=f=|1Hn!PrVwRWS`tyVGrfzOpa#Sx%vGE_4w&xH0w@|jFgd*6S6Z;!?^ zjP4U^ojt{Ypc7KLGpqk%XzLGEzy8$~`0Q3TH{sRciwg3u!WwUOtL1L%QDd!HH8B}7 zF?Eb>hIadhhj8imEh>bs)i|6DPG)0je_&zdVPYbgyBQ+n=jRM2KU>?C_KsJ*Hcf0C znoMd)@fxvoi+~$-UDgH}w$%}_y~_Bw@zA)%Z4SK-+b#DC?fr}4 z$e1unZ0{ocimk#|HS6>y09u7@|UL=G!R`M@R5oh!Yw!x3{VWh|i_UhpSiS+mU>@dSz~JRSlHy z3~wyJXBUf$U3h;)DVwcQt7c}nsa*_E_u>5!1#FIc<1;Hmd#m~=_`K$T_|yRi!xJ;k zSTVvq*4me|Ay(Foy%lMg?T+jY*)+$Np0#uM;>Q=R*${s#ZhI@_^Qpii_=uQHA3pr8 zxc#yD*zR{fKK!k??X8l}!r?=cA7AOAHKfP|mcJaq zhq9g!KL$WffU68;@Voa2K9uu>h%o?i0$gP%gI{~AdJTNO%7FZEgCnLtV%nVku4spX zV!||>GP^nrK3}sS{i5yD-3I*c9g_!$8|7B1v}?4RmAYBnxn)~!EW~1&jW%r)-ygx_ zKGktpmyE#YJq1U8gv~*8FRj8#w5!}KkzbKTPRWl&J`wVx8XZ2O_a-bj?7TEo$cAFAas9>qR;^osMjwQ07I(G;JO!$(Id&Tm6}7A`)_1;k@jk?Uo96p^2V=Lt-;!-tJu zuJFQJVys897pB#;{X<)=_=8#QSnE5d2e0lk@cF%h%7^gWvZ>sHp2_VVv8Ip5$-{@1 z7i=5MlbMrya|AyBFm|^bKFt33&3|%8WBE5BK9_O$sN?*)5lJfspXKp=-N+p?@cH8d z7av={Sf!E#1C|H9!^gHicBK-X))v&I@}1FZfJd4CNEf1kniG7vDyNGc2qAwud{hr& z?*s{jq;QJ9g*1|bw}{UoHXor_$fJue(Gh&q5iEyV<@_+TkL zG#d2tZitA_B8U&7!c(AO!WP8`A>nyP!}>PPJ zz~?U=sC-DT(ETX3KRHgf#10dk9-#OI2D=Y$M_Y0WU1bEaFFy^Fw1YJ3OQGoSrfhV%uQkO2ZjxhTcc;wjQ{sca4MutgU`m5%7=v?@))Ts3MTo`*e(MwwNfqi`+(1#c_1H*X10!Dy}_2PaV5x~ z&$4P^#lf6Id6gvML;fFWe_Zktzz1pIQQnVKd~)am0iU-U!Fi|G;i zRXkAnc+ykS4)K!Da+G{tRmpcefc$I$kE356K8}XPwLJW5g7_?3Am7`{&5Ok6vG{yq zSe}G@a%gkZABXrX!ucc`w?x~a@{{WkOZ0&MOMG4lWn@Hn< zOEGd`YTsR${ILGp3wOpS$D#y24+$=8aWZV4nA6=K;&bW4hibN{5_64w+`lFc+X+C#4;&60+Cn}TJ|?<{y})L;K!MLU z9R%><9-o^Cn>Xvb+uargwcg&r@zh(5$gHOj`C)wSjjiGRd$TbMYlSwQ;#lhA(Mj%h zDWIAJ#%NB6czMb}F5y!$BaL z9{D*MAfJ*sR+SyqF`^M6IuB0!(;S`*BK7M73ojo+;V=rY?BOqa_z((*QTRu5^)BS+ zg9R@iE+d%;dH8S{(U1`MeCW!}XUU5}zA{mdHSUk=;j^?x71dbdwzsP5z~}o8z^7)` zE5!!<8(3oPluGTA^o%9(8alnGS;PL(2>uNiT~-@FE3cu`5AgYZ;0fX5ad|jXMyC;< zXTf}yc6glmqE9=keu?-jqWGlP!HD3axG+{tApHBm=fedzA1v3Ys+LF1_HM&1M8{jL z6D>q*hKFk^eB@I77MmG*)Gg=Y1U|of1wP{3l=e=E`*|`Nq4sok?;h2CJng{=rmqnK zpO4L>`EVly@rh}Ej0iqCml{Xsz~@(tSbUNhp*VbWFhbz-{{l~dei`aMp&>VVVhs5K ze3%8YT^XW`Caav8*PssY`87ZJfkN3?Ji>>_jvjObFXEY(4^t>Rk4KN4FWy1$`K<#V z`2inxbmh(4Sd%iNO@GE`<;zDsm5~{4ewO8uF^XXSFyAiUdG1q5emV`fcdFG&9TRA> zG;;dvnI6Wyx6TNzoxz|#!~~k`jGR8_8HP{ih)**CpSU|b@%iM@>*IhTK9?Roo|&PT z50Y}JJI_udKF?G>o*5yD6J>;HVTOp$D{ek+Jd%zZ&U(Z~VLCqflO^!^v;#LEdu73F zv9dLI(~vb5eB{OZQhYvg`N7ZSc^>Dk^iReJZRi=tq6mYx^@sO2_xa_lAfS9?a z^%e()V`wTk*{KuZWKF;)UqC$QiO<5r$2ES%md+dEaou8M$=g-W;`J|Gd5I$N-5cs_Iz{AHU_AvIi zE`E8Hk63T!KUTuYB%xz~i#dUOs6AD2$Iz0t7yvvj879ovAixdBM#OJcwC1 z6{#4>U@)Tfg>8RaYj+__d1nPaUwlA(b{c!ecDF8b-C>n!97`D9~J(o~gkVeVF)Ms(fx^tG%_G`EJ~N&PU_>nVVcm1K`s;aPwiOR%X2! zHy?ITW!5HF@_F#N2Ry<)!v%)<4?Cd9;HNwADpHBhZ6dKzi%w@VJojpx8&x-)?Jy}Th)il zcgF9$0v}iYAQgi~2rCWr>qhg87U{b4(ilfG4dV#8K zxqX07{{VbyX3=cHy+H$fr&~uVA>*>fUdJ1#UPotQn0PPNSYNY_tTDVBc6#M(X!&wk zW7pwA)Usm-V@zkmSnWS7pyfN054!kZyIquIV{;*j)l6|Sk`DsFML{<1c;+WLrD$>` zA0a+-Ren@HtTq4(B!h#)+$tdx12J|@v$%`XXSR%5<>|nCj^G3L2BoFk8;{O#IM~sB z965tr$^Hw0?XBt!;zRitQomF_w{2fy+)a0-6Onv0sdh|!UJl};%1_B`??Z@+MrXen zL*XL%l&r~xfvb$^#d)sn^W=lH|L!Y4Dj!?FY6c}|&|x9Ankh1)K8@t#=ofM`W=Bvj zIxi%5x(50AX5jJXbKBN0IH74wQ~MwFJc1AN)nlx(qS{&Y`@|=P{IEF0B(ugid~}#K zg3q@E%a7dZAvCD1*`lMbHoL7d&R|BMbQAvsJmT`f!QSKExkdR;Y%trg&JJhe6CA_5 z(;J-QpA`>k#HSEPzZx}4Nsax8!^ckQiQVQZE(D)%ohy89FHa6#38-=NPvh_j%ClSr zhoJ8SpKmi1K9oW!5jo=*vp-1=BaFjGkMjp7A39L^IExBOQQ@A}Ah;o~$KfNqLRf;> zhZTi1xT3cpKR;gpA6DOnEl#CGeZ4PfzQ7+ zTzt&dW}|Ml%=B#Nr%?G=X)_!x%5M_Ip1R7l)WR`T)sDfmzUBzyl$qOvWuJ+F?}-m0!5K9`C3P_>(A zZ*+V4df#=3PcC^YSixKD6#`egw zq<1VXlGcyf$7WX9GHQqs=OGF-FI-1y?8zysYp75CsA=;GeAMxsi%{FFnp-TV(x{pH z30(8+R}=b0_L+;Q$~_P#q$#;`6%LnNC{a=d+t{DNuIyZ`0oQ|%-t6ob#8e{woS zoIPGPMSMMA+rp4@7((3+Ds*NlqvRA1*&R zr$l@T;e7atY(J6s#E~DiFuTaN_|W+ER5OAP1c)tKlOT#MKRKsFd=4V`nDu=gHP+Z> zkx|>NQYlKBV(_ts7d&`ubdSYG-5d9EQF6mI>n1+)U_N{B@RI^gz`BWTZjBu&!CUz=5@$V`yh%> zihR^iJ{qN?f=>r{g4P$@{xJO_zgT=j6v~I`7x~5VZ-|J`B8rc2{Unh3P(C8?Cqa9w zx)gl6g=jvBvoIp~^m@7WYB;|J_zVuh_}FDQoKM7i7Rrb1p~h=x)mMSfXbwKy=hG>5 zTIMs)P?gB77SahSPp>yyrKU-aSL_|`nK&XO-W~J(!D(+gwqAUZRf#+vUr?~F641-M zWDhJH5fZP>Ra^x=f1Bio$7CAK?$#C#edoE|6gsxsEtjehzGlDst|Ofid2i>l4E3T{3kHlaVzt&9Trd~g9i?MAIs$F@6s&ZJ2k*Sb+f zA-^tas?*4h_<827k5zx7Jv!?ld6In2d`%%Y2?r=`hzZAu6)jKkT0!TIXmF+dNjjk|sm$+xr&rxdNY7g`{M=RViW5 zB6U=+(>juGAtLw2{T}UE*cDfux3zf&KL6ps%jX(OLb&)`Lwl10<%* zX^WJ&_}GcL{iFM^NqRkq&%%dKEF&D!)nz{Uw7(%;U~hrX|6TCmleGxYG9Nu_0D;fP zu6+2UaA~-Hie)|;IW?4?fX}bY!Dp+xyV=;S7zoMilp6FLe`FZzw`TVbNB8^G-n2KO z=lDazvA%p~_S#GEX?NQ@*H#<}{M)n1srJQ@&=2r=-GRcVK`~J%`h*ZxUgPFAO`31Z z6CM=N3w>(yLA<;Z5ycMzu}j^o;VpLF12$OYSq~`|dZBlb{Y~)sEZ`CHgOUAO`LK~) zXF|kh;pKyXUM4;nf>>?&@$lg#YIQ1>1U~;&!ON%AF*i|he%)v_s#UmtDp&AFA7$E8nQDvwp^`MvgxL@LPrSPFZUfc38E94~eB-3S;k6m4hnIA{jlC`(0 ztHI~9=L(-(O3^KQg)RwT&Rqoy(nTnPul@D`dB(>Bz5w-H2=RfTLd}#enjvss7 zCCW#ctybe1RynE(ERFc%3(XI?n@HS>>oE6mJy2{f8w_;Pbi#$xo-d zy$$=a#A&NhgF71K#f^4nzZ#>m4Zm}7e$#lF)7Iz=4r!CKdt?&pL zYy2YRnq11ypPiHZ*cA^2r)Ia*>NH54l1h#(KlUJgZgbM9W}PYtke|O`5T8c3xxcJk zyojCIZZyq?f$fagR;WFionII}+T{ak&76$Rt&xF(WaoLaLg4fI12-S+dZB2qYOOI} zY&JTm-$HXDLKba`6h7GWLiq@{wPq8=^U>6rj(Rxg$_JYIBd+i{_`G2OKFlt|&Q!?< zjU{{wH?6AJnpv$io<#8z7W>T!P)3p>k#HCPNOT}1U_#y zz^C0Q?b*W@7ZGEPkL~4drEP8{7}g^ZRV)79r-0pe%R`3^4zSCJq#ZeTH}nUZsrj-%FZkt z_mwv>0-vob@L@YU%s#u}khUcj9#Gn*ot;SWA>GTd@Sq>yvuh~wbGK5e)7;1GwoxO~ z&58=JVuzKc*gIx~Bsuc)e7}b*9m-#xph{+xAYsRqCfui1P9zC@SbVRTH=~Yk9S$)M zAgMO7pE!I_$#;mXuTVg-n|2QrTzE#L)Rz%5fo@H2kcC5^jPY|y$!x5lBe>S^z8GdGL1NL3~teA0s=D z2T$vCR)l`DKfb`Y&RvMQIh~b$fX|m-3HgD@@PaMM1u368_J0QA8 z-m`ecF(Q_%#ns>iA69>Z2w<;kr_!N+SUD{mQiLt|Hr8VXc6#VY2Xku+VuomiVgs>e zpc(ui{lZ??X@5%p^rxrTBpuUnsBNsrP-m27EL+FN5HrLo9E}ibmdi8^;WxHb7_=ZhbjZbUG&M9p!}Q{Rn z!sq(*i}TS?zdA+)pX<>thmU6Z)w%TLgY)ECjZRt>^RYj)+cniq#bAHB2jP5DsaOaf ztyBzr?gEeB_%7rp;mn5*VG(?^Q8D7PSmHxvj?MPX3@WzFM^2rObW$|%*()sZq4_gg zTF^pXQc9v|aro$ISzJDOS2XZ>dcMTxHb2~<)V7}(n`Rp;FZ<9L*P2Yu znB7m7`S8N(IOlW?EBf2Wy09y)A6gUI1hkOT{Am8>kKnU!gz&-AlpXx#0_0XSHyHb` zw0!cWbHwLznU9SnK!BVP&6f?mn1xTCbPjx;IauOD_Ib;M2eFJ&Y;_8ymnj)8<>J=J z57IK9V{3fhI6)1zp)2Jn=ME3>`OCoLGyie(!93cnRm=p%F`v{vxvb* z%8#R4(og1TlKq*8Po5OY(;4u2XJLsCwwA^0S&U@s#+z=VPB#8D+C`7w!_y}Hxy;9& z7AogCERAKi@;8_*jK>d;-}}+o|M-_&j_S5-5ECB|cY6e3bd2ty*j-yU7Q#+x7%8 zc=QV4vu*?^mn(wG?E_iiyb*j3=W+S0+khxdj!JZd01A|!YvA)R)ewA+jM#kSNpeI8 z(Tc%_nJU;UuRep#MAFkrCp|6h1=V9qdOdLy>fzPx4 zd{8H#Q>ms>E?++Od6jyomE=Txn!fTQ`6L&fa6UPlCEn_Ek%DQHm^ZaG5AN}LwuEkD#J;PZ`h zDL}EG_fVHoo`$2TSr3mk-yu>*SL+og?{~XX1kc zv3wqBNyzXMJbe5NHT7odE&~L)OM?Xjdn?ecOpm+qZ9H zosv$e)K?{`^|(bfnYd0>scxySN|N=s*VU}x%32{z07E-4LkNUu5g1mI$*ktF?8yKb zE&KqCAsrr>0ESgy#=)=#%p`U)u_w&`+xtBBc|WASI=j_t_0@Bi&QITKfBW0tw(B!p zZY*Db1}r{GLVl|A7$4hSY;dDegU2l#X~GEKK!hqi+K4{U~)N$Ohb{jnpt;1$er;xQ5S!H63du-4%?qQeLbT&Ud zwdyz~O7$WGpYQR@&vkqMm+a@QjF1&bhuI$b8A9b}4fVYba!N~l7yya>TGINmwYn1y zC_(+RT;F2a=QZ$okYEAKX!eu9qw4}+%x?OED%q^=RxX;``1hUh~(!g1)n5r=&vW`lQVbj0|!1oXz^jC zR^!>Oj?;;JZ%h3I@nOf4(L3fnoKBSbZVx+?2@ zy>V|mro)?fi6+i-nn~|b;V$_6B*~AP4=-Wmq2Z$$IR)~WkWf*DraGX&=cg{i`4l)F zoP3eCk$5Pd0*8acFQzJp57qztG?Vy<7@72ICGlYvAF*m`>X(}G^V7>vKAFWQoR50) z^K*yb!=!5qs{x#{GeDx}NbiH460WGYjx3S6dzP_>{GLm2!VFGtW<1%Xpg_ zr1IoDKko{Bc52$~1~pmdf@B?Gnf+Vs42*lD333|T=2%BqW*=`;p9G(WStuWv>C&#Q zx6=oV!FtaBO3Jj<f*fjQe$tsS{GZIw_CbaPT;!uZ^q{rCpb`fn$L4lk_s01 zyyeQ`L$h|R3PNeK8UDY<`S5UXV)VgCjckVhI>6^`%;Hmq_~@<9bsVycs$J09X?3f? z^#}fu+|LO4f%x<$Glpi*I9J&)H^QGz=6z+-h~V>V(d2`yp?p4(m{^$A^Cc+Pp09X5 zdAaY|`Ub@lem>LD zh1|zb5ltgLi)4IKX<7n4N@yDR+%6{PlUUOd^2xoXfzJu>nEGYakMa86HN=Om`Y~1G z!;?ieKDNGv+n-c?a;a~SpI$LLpP2YCHJ{w+8}YdcL4NMAI6jHJmh*Rn{|e!gJ1ug)xAz|LSqAc1M|>#rQ8qq2I}GHbVX?~- zK?m!a7de0ImmeP=B{U84(+8e__$QD~VogiPC-<5LKI2Q9kDNagow3dG1b+OED~Bk~ zC?bgpy!qp2xij{$GO;4~yn|)rW2svZpcVP#M)!!%G8-Sl6B8fG`R7#fLws_fd*CxU z4C7<&EZ|!#!^6cO@d?OpIDB|Hha%x3_)LK(ApheWIw{&);+S$oa!{#E0_y%kf_Yrc`|Rt<096bbKr&^D?K$ zLccQe$&J22e!g{?jZcF3a3wyuaBJZ6ZHHlelEsJBwDKdzKY6db1D|gPo`CuLI^x4% z`H}hLMv;PkWIRWF7Ag7Ij*E^Hp>n){Ta^9r^2voF1zZC?1wQXC#_-7=9|p^hV)5a- zuhR3eTvzBcEJ}U?_~cg2AV1%6nT<~<8xw+q+3idI^ht>yFP2j5j>#8$;~g=^1xVu|I{K^rvT;#E-9fi(fG(>h>{7TbX=73jc5D=d1H@v8CwI*je3 zKN5<1hbQ;@$7k4}(jN@021&H}y}-txH=Cf~4#mHxkg9^~=tjafq22PEzJbrfmw|i| zf(W^Q2tLk4j63>VUXDMNupmQR0S?x=2U-EyedzA?++87n%34Tb&WO^m>2yh%n|ksvaNDe?_j4v z<2k)f-6m>@>}k!I8h3_WfzLm^bofBO1f54MHU(>IE~XTZ)-^}8qU9%^kF!OWCs*On&8@n*k~Z-9=ZD~9 zdviz0(wZEXY~JqO>CKFz0eVDQN~VIjaa{vF0H1$x1wPMpOQqdvz3Sk}WR~smpPL^a zpY?}*2ahr)*}evo&&Po0p*;;P53%*%aoUb1rSV(V;CsfgF&TjmMZ%{4%9Y|+Uo_@l z$j`?a_-yO?4%pNijWW7Lk((C=NDgx>V$UUT^V_}NDQ7brjZV-lN@_qRtg|BAh|dyy z&>x^eRESf7p^KDzy|mWYhkfRK6wDq9Xd=*hu7C+Xf}?;N_AlQ#L+iRe&97XH_y*?w zkcp|PdrQV(bRWjrvD}-t*x>W~!1ItKI55=#gd%D90o7%(k!DM z69$r>R-e@W zDS`hA@Hw2MoUM~U(tH4>j?ARjN3bWx%sKMo&-rlqz+2A{zt_+T@4wNa}UOy6~Fr&_D& zW$x$c)In~V?HNY``Dq#V`jXGOCKAwQA5B@ma_rj)68M~x{IseSaKQXx<;Yo}Z}smO zGn!v`#Yd62*%Bb|86Jkp579!n2%jc8m2B-imoSGIDL;0+$Y&s;UrAmJvIWNc7koww z@Y&Pq)l!4}JCKMJkfU7b%`@9GhW+Ca<`-5}sHld4&twTcB@K~Itp*JXGC}F>Psu=> zGZ;X_)CL4T7r^sS4RtDA%Kz9J)z-Bg%;xRD*|yG!99a!*qvn+V;r7Q?s43T|Y0o&t z{ET8}z5@Yo8uqNPIbR+heEwGkKD^&yLBMg=N6k#M*VQNRUrpW((hWz>1*zie!RNOY z;KTDjdL21Lw2snNm{^#0jpu)QL*x+AH_A|MT47sh@cHeck52*p>v(q?K5a$!%$-&h zP1^X()tZP!=jYvg$OiQXeP(V-5vzZb-sz-|i3Ojz=4L`+lM#2o=XVal$J8W8g3?V# zJ)<&K58m=@u-n1W4otQ_b3)!AocTCZ;W0;)XueZR*bKXBnFP@tNOofhdB>olmoF0&PC8*>yWh|@q$C!}Ra$Ww)# zVs;38{@@UNnp(Z2-6B2~smy4&o;8i(v2hpcIy4_eu)|>~#zyGZkG>2%549WJrpS8o z%Uk(>?a_R^&hb|MLKz3BJo%&lwg^6AeFtk>Yr-VfcTjSmI41D<=q32@^_{iR5%tgN z(;@%$V_(d`XIpF4^aE3Vb{l&-g62}SiC*h`W{*rFDp;w(@xirD;~Kiy-i|)=VcVFX zbk`(wHahR2;rwxbjH0d@=UtF7wuAz(Vz0KFE2B{$Ud?oO>`Ly?S9gE5P z{Fpr)28Mqd-43I5umaQCR6^eF9Mqx+x%f=)_j*`0=Cf5SH`BkP`HYDLFl|aDgPk&qi&j#ofz?CSuZdeP!j?(Q`y5frXw ziE(j0C-d|1fcl3EGaaC2xyMC(t}H&1kq$6HiP;c*&WGmd3bUc>0H3xkKQ8@}2+Svl!3Eb3jqS3L0CT zkn{Lr__$9evcTlfn}N@Jmf&Mr8ywu!=Iv^a#b?hRz$S%G#dE)!DUiD!O?Rg2vj0Ifk^TdBvzjM@xRN!2WO&n zw0g%%(DQ9PLD!x1x#|$7oepPaRA|mklYTfy?*pH2IRqb(n%`@*J2h+SzS1FbVtwa) zG@T8ssk@R#$Pu5HLil7j7S{gdgZTLNE!pqR?;Y^@9~R&fs9z{2(<|8QgfjZ-UX${N zmKl$|pg{fN3HsihJJ?fjHtC;_$75Bz0S692md~{s+Zp% z(GIvF+qyp*;@`(q_444u>wlzdp>XQSe>D#e&eS!lUI|sJWxPKw0aB)8@uA;$|LJo_y~oiBG%H)L2_Dp@~!(?S9CeJyu|Qy>~i^npEWm z?a|m^)7~+fNS$Es8_Cg1<86ix$i2R*Hnq1 z5I88`ZeV=K`Lu}-c2DkL3+2d1``o}w++-6$5hA&x=uJV9{x0!ZT6`=tOsjNJAAFf}3q_wb>QYf081tRTm%actg z=pr2b0wiOCPBuc(>@1>R+j2<=d_Hw)@d27nqt(IIFQ4pK&vVPgHyABZEahegWB%2u0PWX=HY$T z>Mv0+`vB1VoRalN=&OJ|rpD0)qb(S)`E7>sR?6ScaFR7DUnppNem?BupB&u4=hF=G z6Uzs1(tO>-d~7UfMt^YcB|gg}e3A-|`PCEi$&aW6LIWyS6MLpxuOiEl;AZcCw;Y(&Eg{f}<<|&-58r2~x&Nk_4B z?HbIrO$*D(jgDTeo1LYim23|kTiA4dG@bQl^BI*r_fWPvxW>JhjSDNvqgk&%6kVjy z=9h0Mj2`*HzFN7Ve^n8DN?J|ZYoI-L9a(2>f8{SxG6u%^2<@?l$T}PMR~~Ou1Ax!d z41BQPw257&-6}O110UW`w2KLu`~sTxMDIFvuA+ke&)8o&?^8E1wfR(m$Hm7f#uS?? z`6LN@3Ofz+JiYdQ2W^ua7yzWUFp*=l(irIIich4@y zJvKVEUwZ)IB;@CrVl1C-9Z0a@fcM;LYNcBNMBGnB^N~ncEw{$T@#d>F+}A*lh|g6l zA2c7URV%x&;0G}=~;g?M6N>>n^d{6^SPHU`El_L&)i^`T0BMyp^iL!4IwjeEt^6kIje9 ztb+DXEdsSnLQaf2XC`%9wg%gLgrVV;2X?~#w`E^P<;Lz`W#CiOF}uj(sO*v4 zcWOP%F4S>T?k_Jr0-x&A;=`I%T0E`sJ%rMmGTh~S*tp;0N6XM2LN#yT^YI1osqP`a zvyPdm;85$IW74illoS5jRc=&&kb#c~3D-xc!2?3|2@#*A%|{BEE$jSDmyVK2H0P3k z1&fapF$+b5PaCU*BfN{cy2yZx$(@%;e)9b z8by}17NyDjX$~uRZqcf6^v#l=bL8WV31!LD%yNLwJD^^s{fXg|Lf4Y;Q9;v)&mt+G z6lsnmd~!9B2vG;{nOr61Q%=5=nuO1ZS+EiE)pGH4<;JbAxC9?;E~w}=6xFRM1nN+} zXMmzQ#X}(Q`HRfr1G8)fiFSJ4GS_C#47K7~r`_Gt;_~#I5A3q)L1N{`fBz%IN6H^^ zD@@0u=#%Mp=)%E9M6Fweokn$Vc4Aa_HZ+eE!7}Iymd%YJ>2c^V~_$&kYz=ex7 zi=9SmFZH$f`PhYGr=!Vv>hU)90r2^WVjv$YfZj&eY_y`Mj0gOehOnqb8xdk|em++G ze1xvqbQUU>d*Hwf#6{Q;Ka3e}V=sfxF9!258;fqC&lYm@V%fPY>Yw>KPqYQ{^Dz&! z*g~HzWa!1R^NA>*;yab2$J-R3%8kEy1o091A7+}K)l`dLipX!6rbkWH;@3fZ7G6H9 znPxv9HBGa{XL#x51=Es&d!x)un0KECwvS#9n`zm}VinsYC@4)FQ?g^v%4pi$6Ts&@Byn*O>OCSFfA zH}&)3MbO8_;J80OXS}S(%GYC4My}_p;Pa<``B`ml>gS{8+*HWV;pahoJoZ`eL1fqH zP@imR{!X7sa&zmK;8Vbk+!^)BmgetC#@gh2DmM`Fl8Lh30w;y1 zMHy@Kw+`mx*E5#$d()idW^&!4uh01u z*a>auGRHHf(>ant^Pwo&OgG*GMB@5W;B&mNY^~RJBg#4WM--+(( zNRe?)Y|DIXsv1r&nw_EZ4=weak0yL;&g={oiz6HiiX$!W+^`SzHpXb&F{1P0PjG-d z{uvu%;{shP#yx`$ljnCP^beM7t&_mVllhQd!ZQD~Kb&UzLBQv92|oM`b6b8anMnyn zdb>uamD}=T$&B;{Mec%6|0i5vKw1CK|3 zgvH^RWB0x|vH2o;YaG9BHy>*^ioFnKZ93u0sM3au-0X|9emw9Q7DM?Y-;ZLAXZ;(@ zM&=? z<$S35<5hVKD+s>YN5S+RJG9H z^Vl+iPgV;Z#z!>^4L*P6FoI833mwKsH46o_}?8mF&3yTJKhgL&_$ zhqH4jz!$R%+vcfs2YkL4c+h`-jc@<}=Xa|Q2iE~UzqKU!(dzh*?bL9HE=iESdw^>2 zPTI0nTQs(fA^u~h13E+(64X1pKsoqKm9ka)I>6`mNPbK{YYTu(KFS9`m8YKiZ;Rkl z!nxh(&%YkyL;8htyV0LtZR5lB>#66K#0PZNX`f4=^XjnAPkk@RPZ_qxR=PFZDZ|#R zVVI2%QF-cShva9ciaz{nnp@CC?ex)w-^{M6n;j$nb@Lm5$HiyWxrLXHs&fmv4)FOK z#SlJO=N4gnRGV8^e6B+HWSv`t@lkDVVe)x5@H|vIpth^l)M86K3MAMAsxVE5n}W?ted_OO2ty9X7Xd5P-)pYOT?pQhH- z4$xJ7U4WYA`jqN`Do;KANRjyHT4{HU9==KIcCP{t4?ch4(#L0Y5AWuqu7?MoFJFf8 z$?oBU`KagN!RIRuL-}O)@WFi4^YGyF3#R%Nf74@!IG8la?l z6!yNRRi~tQ3=KZJOOl@&?X_q%(7L#-)vR^qu&a-MzB!nmqjGD6ro~fZfF{|B2GZd3 zEnI$DUFuD;whUR5YTI4~4h($WeM$1;;^T8*TDgp+o)m7(&Brp(D%MAR-FGZQ`DAxs z!F<$nV342hJPhTN-GK%3QO|)@o_gCh$qzbbwQ+{7b8bPYTCdZI57$Z!zKm+VA!&B2 zC2DHp4SCMr0Y0TehYz+3ivGQ;er=Nvwh8h^;j0d}vCo6gHv$j#H|XqP(Y zJKVhXJ+}PN0gO2OBBRT4Ni5edI`NdBvSYifJRGUx`krwQAV2@$(vcsNkLp7`CjJZlgVYd#&z>i%4NB(1AKnK=EE=ps~{rWpLQnX3EdFe|(pk(JFfbpMN6pS%c&V6eb^q zB}ZIG<>uqB75ZiF|60=w)!x6O_zV?%9zNvyW%4oeV^Rc^aem4-^xOF{DFU*cy%R|1 zd4Br%7XXj5f6&jzIn}d(W)n`zEYhaf^5c&XC1;=W!##0@ke@GDMDsz{@LgTIjZX9_ z;^+uI=o)_3Gwz`iy#i4*`0({*d;h6leo~wY;)qlcvlqXz8@DUA>6gl7ueBQq!yx9-eQ6*NLWG6OxN*tu~spNtZY=UDy+dk=j6^h)A` z4XAa!O@wextjNZfwA<*{R&DPV%KKVtr?7)btlo;bE{jj;3`nL#NX>I%D>tt{bSd#^ zQF=^BPqkjEb;~+4rDzW=qRUCe$I&lpKjxAibkc%hioyF}g(TvyfzNB0m(Lnzsh0dq zh>yavRPcH2p~QzL5F>adeaYg(6Rb%;l*9Ld&%+Ce52@BV5@Z%1QZ1DQ2z*|5<>Ry6 zs8W_}g`1nt(WtLzmJEEpj7fa9wN6KGT`O0cO|sOPEs@3N$e7J~lWQma@mNVq1U?tU z2km-0SRn0E*t6Gd-`c}rg!RWHH&337KZ(_!op!AIDWQ1+7DeY2_MFeBch7Mc;Sl4g z=H|iY2No_qBs7iZ^g7$x)@rqQ62VI7@FD9ndZ9ODTSvxVkgbbl@ZWe4&w|en6*-@l zwzu7=X>6=9%a0YZPsU2d#5g}14Ggw5of#9pu&ki?V19A^M-D^znAM29WpBDh ze<6IVLTF_c#K7l$3(lwA*0wdv#AFv7Q|L?R)@vLYjN2Xc%#N+P;PYuleC+;FsMFNi zG!1LmZ_04T=^uqUO=F5aK*LdnVK)Ae%FQRHZa(cs7dz#|JPdQxj5mkRbTr3KLO#z> z&3WP4!Dn`8$Qdxjz_CS9K+C;4@zY@)5m_t*WuK z7w6|AdK+7nVrj?Q)ce8b!j>Q2+)vUkv6hiy)yuhxK>f13m$U-rd?KIsz~__9k{`im zA5ES;fwN=1`N>xaLTHK4{az3IACz^c?#Vx1ZC^Ph`}`P8bF-A zy^a_t?#B5j8WYOb20niRJVJi_e588AG|{2if7Q=Nnt7=Z9a4Pw#y?qv^GOvG%G^Wb zM~RqFz7FvDe~N++HWv5=C{ytv{s8!3V}W0Q04V=Kp#-RM^RIpE5_~Extx1jNVWOq& zP1$#OS0=_-oK1G0k7wX4v=LKpmpAzQ9+UX!D1kyRvN|I)1+9#*#!t3&mv!eEwAt zd~Q|utD-l;dH~((-|vfd32Fl(K3Cvl_2*xg0kZn@t8Re6=L44#pTHTIDE+}wM)DnU z7N5Wwm^k4ROBwm!AqPIc&A?~3QP&U3`u@5KfObd2-o;7pzUl=)m78z+oJ9biRcjC} zK5ADV(){90-$3$Xo`10}^TW*ZFH|=_oX^tXWA#s1Im$5kybAg!yyH@+0wCiwsG8l$N2& z%{O;ReysC%)@7ir^LJD?(BQMbkoZ(ux?Z;v_1U%F;&W$$iZMG;pMAVdeG+{5`IVOZ ztSf%D_^2O0zxlg_{IuKEMtyBl)FyHo{n1eQDJtaWml^4oINxer=7-L|p!2O%H$U)S zZ~k|O4j;dI*(>l?W1IN3cd`{GAHRC3kd&8<9r*k@@L2NW=OZQM)2zUU@=+lv@8PqE z;^WS)N}r6r8_LI>U5y?j2d_eYexn$}M^t4Z{j;Z+tKGdsS)eFBVgVlMpYz^He}10p zED-UzisF;1D%1JFL-{CCmFZju_5Lo7M~SLT=Q_ZL*N2<_U8KdN3XU{A z)tymo|0Rkur3wz!8q|UhuMhX|k-V0C)yeOh@?-IlyjJot^6*aLBiSEIepYT!>*1r! zeq8Tw0zSMx+?1cS7|$ zz2Kw#^pyDA1|IW#SU(@h(WjZ7n&*4^`ACjli2`xrvxwl6ae5lcN3rQC$v-Z(*zzDA92E`)EU4Z1jV*ch~(oBf^yp}oA@l^_+$uzBKas31cA@9#RPoP z20?LrR0)EJ&s7|s3_(yNABBP-@M*I+J{f|bNInV$LE!Tt&ZpMhTWkJjFh75F$wwjAZt;1UMexbs+9UZWF6<0M+3jLgh9>GdGF4|7#jR^8FiPzz-LK(TD#qCt+l-t zHYAv>v-y!RIZ{4E1fLHb%6x)D#1x$qxY{~=6b})>=VyV(ksm)FDMS?4p5*_w{P_7u zA!3q|Hv1mpvxw%CDMXCmqg03pK0jBC<&!={jOL?Gh)8^{qWNSB5hM606(WMq{{=jb z{jn;@vQFGhb7AwbD(n=>$;%cGd_GW@S)x$; z={dQwuFChf(YvMZG_YHFJbCN~dB1W~|EWuvk8PhBZJ$>g??Ez={}%ZCF(W=T zG?kZyNPbg>``ZKK_%1a7vVz_Le2oZ`vt!JPMJQ`|fwKHQlcdieyK z=Qis!Y>A57#q8pvu6ZUtQ{aL9k>tnE$IOu>FA5tbGo)sL_QpHDF2(>Q4B_4Q5Qw?`M#-cbDseC1~4b-?50 zv+@MY!$;W(82I!q!KZ}U1Y3GwM>QTT^JJnoS`1cH&$A`eCfL$r+hydUtzKjW=cql; zM|1TMd`_8{&l(cYEKyM&kq65 zLuhSmtJfODhn0KxZS_)Ed=NiZzmoXuc1yQvYs_%$&X4a7UIiHr@Oh4bPpwgr4D{*} zR2$uq479ou1U~N~`Em1E*}XG2A7$M;>DT{B^0U{it=0bQ%?Ga%`}0iu5`0=(yD0&>x z@@e5j^A+njTzn?9V4J2z^8NQlb^T%DE9bM=H}yf@V2SWc>n}HUEuNP*XjXK93RP4 z=Q;o)J_|pepa3ZO;c-xfI6eq~@_%?7t^<5{eT&=vSPm?E0OaPQZU97l{PJV*$sPa& z^O3Y_Gv+q+Uj?68(WhS)pX@Fyn2&lc41B&Fcpf@vbnBHh#yhSFV< zwLW-pZG@HPJ3Th(4f<1@$TJ_B!BD#KHuY}s`L!kV%i&{XTCqy1bxV52FXr%>n3SL9ghd+M_K0B>$wF4qG4LPx$mfqf_3>oHR1-7MseW}Ju)lRG1ehjI< z1}nGPm6$KQGnw~iY-$V)_;t;GU#vYE? z3En%U*Ya@#pN}kJ_*Cob<nz2MzXV4{d$t5#&dc-!%r#X=Xi8{ zcV_elBs=H*ciM;PW*BfoAA$V5d}zy$#m7E?5S-+8qfUxGZiedSV-@{4H9xAP=)vdT z0FP;ZBtHHWeVX(JT<^soK39q4CtVwkc==JI z4F~xADoeyCU8lBqK5BGotK2+(ZPDgq#?Q^iTaUAL=b)w|c&_s&HCEP1_c~fkVWt_U z2tH=~+?@9&vk_}S;gB|*T?~3`Yp?&#*?iJtB3SHA*%6wR4UK65{WQnM1>F%dH{9vX zjvs#vUuDS0GuTOw)OKb}+00bq-Za_QN*zU2Zr*!810Su~1PzXY7B0$|VYI9D3geZl zU9j2LYUM_2N4zEe%V^^6hKW`@rWzMT-x@W)Q0CWk+mET1!LQ z62|4L9WOW5C2&6QR-n*V`2aBDa|J%!t%J{Q1LbsRf(Q?PTSuLgb-ii-<@N`B&PFJw zLoXM2_NHpj^NzY14ki9UchK18gq_?sCZ}d(Q(ysMP~!16 z^+xddTo%ZO!l2TwuD8*!Ha#gn9}xx}pY?jvo;s|+=jI{jW9OgQ7E;V+x|tWbP*U27 z7?b^yQv4jAa5hU`C`s+)ohj95T#SZ8qd(nzHXnSR1|H$x&G?xoni<5O?c%UZQ{V1o+!?kV^T zN5d1NKe^!6oIuS+<8fYAFu>>Qie5f${X%2J_U@imt2OqM&MuglUlsGO&;=c#Uucv# zJv%oBgVFu`%`}M5m6s1$X0hBw_NUn`b6pD(lhEf}@)Ne8BlwKxCwwt1bgWIj06uTK z^zyOg$1yx1y%97)yD{PYxQXoYW;o9}!d z1D{G)D`|TTz**0&GB=Km^O4$cfP(le!Kbv_?bP*y^}xX>rL*~L*t=NUc$*ria`WO% zBtLe2omE3tYM=^Ht7XkV+x2x;4cYMsRfxtwwG1@){3!9U&le6@1IzN;+2>a(z6Msg z`NCgcf)7k{lgh;?3$%_~do&+YxfoR;s(h>5{Ki2Md;}S>#*TxWtjf9q{5voA-KlA} z;p!9Lgnt)n>^R8Ds;sjXjoInIxCb|%`y27OisEB}5uU!Yi%hghEc((y`Iulvv$NiW z&CakOZx%>LkGH8;!Dn(A#;3rL^TC>GO|*$b%^{Wdc9IYZ<>Ty!yzz-AzJ>{ppZ?oPeAv;TM+X)0@r1`uKi;N3 z0Y3LGgZc2qE_>dQl2-s9?yru`zM2<`@B*KI%D_j|*OjVe|w#i`o*+xC%5-%u$MpK4tfK7MsJ#NEjss>?>m&l}zkJP$RU^WWE^WYYQX zN-J6A>Bnv?h!3w{U61T=`Qi1eYRit0pT`c0;A8i{`@|Ip@PFtzPh7kG&6g%=}M#uYp8!RY$dIwSJ3lEf9$zq~?Y0HR`nkUf{}05W}Ns zF+ZK2kB}in>huI>4Wonaxq+=)e4cvBS2mEL7k7jrj)sE^wna@8cs=5HGCG+b&rs*F zxm)GwxBf=a=2O>89a_9tJpm;64132jTD(|&yiF0RJbmL!82D&Sl*wZsa)D)>D0=NR}j zb*;3!Mn27+aeVfw;B!-75Fe4>SeN@^W`09y+d~RgdHU(sFwRHLZ>)~Vo!?Lw6Zm`+ z=c8+_HL4ebeobBl>IM1vx=#@w)JL`2jV9ZIBdco^4L!CJF4RX&rlT>&n`0+4^^SN< zNnDkupa1wJ_}oT*tO5~Q*X?$H*t-J}QvFtW+yD2`BKYuWXaQSuw-44bUE=ev0=DKJ zU9i*92+PZA&cTS!mBj~mFgfMx%Hl+a+trS?hqEtAf*jjvG_Ww=X_U(~tg}Yl#Qc<3 z*75neIMLx=e`cJc9}>os$jwG4Cxe`>vx3j>UV@LOf7G510+1w^p@cc_H~~JtzXTuAf5cL-BoY!OJ&f`7Wwh>*faF%P| zzYgNF2;`GlwW9c_q*@lAPBD;AX4Q(~qmpV_e5k#Rq+e*oW+v;~L8?X@E}w~x4QXIU zzjz}yL!|574qpX72K{<@5yvNkswLs0g0g|nzbVG?$)IXU_^6<4#OKP*2iv>FYLBd6 zmIz%>$V%eF=UsOX>d=koxOfCr@MfB@e z4}E;lcdUhmZMK4?7a%Vm^c|aEkC)qlDIh?wKfeY%vVW)UKOR2m1t<|81q6urEMob< zl3F5^&_E~Rqkw^~yzNgPIRqceV>t;i3y5r0lHV8g3CgLH?*hSRkL1VXL;K=HstjQ? z)>KkcwvzcU`<@Vd1Rk86 zf;6gq7?>0R<=;Sq&xe6WtiL-B3zk7^BN6WqNtidZK=Ji=kGoR9+*n`V{+IYH{Crk3 zJ{UcSPyVIF#$w+ApI<2Y`J{K~_K4z65FZ8Hx#ucSp8!5rem*NNUmi3vi0HmZpCCReS|7n@>E*Ms^$FsmqV?f?zVOh?XJzXX z#79Nz!}(NzN9^Cp8Z7&)KKRi79hCy*fNP+qD#h2l_7Z%8Cl%QZwV0oVOeqvH)!_3v zER;`nLmkOSAyZ9!mZ5yI8|p|t3Ylu~(SV2MU#&LH*{yx3E3AEy-XcDnm@{@1`S1z6 zk_*kmXA#9Gql(4xQANeTXS*21C!>nR@li#^D#gF>)4(JAm)IjHG>epEg&Vg&Vr!+O zS1Sy-p^p-ug_jRy=(Sp@+TF{{%Ed>j&_u7g1!y(#5%p2m#Mza6 zbGw@2L-yyZfycBzVizIr1h5u9W4?<}aXxP-I((ciCF|kibSY6+ey&}D&-ye=*eI#L zmZD_L>nMKt4d8i5teLvP9_q~|`qXBSB^UN ziKxNnotNNaHU(dYY3732fM>tcQ`W89flQ_N6@TpTxhA&ptRajRe6AT(xuJ3xT`B&> z$00sEKjxZO&{jd4V){Fj-%wwAD1QE`?;<`zaN7I2-eJu~N9#1M30>aVMdQ{oO)*%;sz) zF$IC?tbgZ>*_*xDcreFW$H@sIN;AL67Z$gzw`}lO)JpOC4-ubct-EJ0FK1siwZ*?V zn4i0smlawxwXcHw*YzJ-c==>p(~*W{em<(K=*U;W`TTIv%O~TSjy$rTk18uV@>K{v zS2CYETDQuTbHRIQP8jVbNQjnPn(=pL+k9};5gya z3r>Nrn`_)7QQso0@yM8+tW%5cvG# zqMy&I;=_XYC@4N;pFeu(@Cm4pl11ryg3bU!j!ZtH08!xzDe(C%7Re`@hmYr@hOs6- z%V<8CbS<8b8rlXvzkMk2(QwcX`SyL<9_MaAmWq?iha}}bZI5#|AX5TRrMUY#;4$+X z=K1ey@M0prVV?i4uoojfi!eSb1c~8%)UZ2Nv;sa~TnyurN~5e1#lPWvlu#@CV!-Fm zGn-Gl+idB3INm{Tw{6?pacM()T*0S3A5VJcINqT*o%R&UI@_0l_-FUO zIRqcRzGJ52y?C;y8DHNqlkov~l$pST&zD|-kGXy!FuBAhQ~OLaerA0ARB=9RNqlNu zoxAik$G_?VTerN%Z6V?+~Q zvAC1nft9GA%>_Fi9pUs`oG@cA2NtlHn#)e2j!l;Dfluucd_?7;2ah~86}@{CcTqrn z0?sc8!lPD}1AO*a7@sVT%Y+}!N4fZLZyClXyLyTK5`prglzJilsXn{_AJe}#yXA7V zUa{;?b^#LeQ#PKTob-oxT=qvP0RkUh-)r(&hwI`RX7W+lb@A%}pKpfvSpMs_R%FBZo~6)y%p1L7m%!?l@b)4wZkp5fo0dDnvYSo_{r zn|9ge+c|&PpZifJxq+(zpKm7l5$DI*i48vgvM)Bn9dUk~pG}6LHu@3p`RPmW>2y1t z>NUPUrBijg_e>tFGoQ`+*G}ma*jYc&y{qBDz~^5vnGY~|@nmse@qEd4!m+dT& z{l2{40iVBH1RvA0)AFgF% zsRog+Ux)@b$7lWHyS9myIVsn&tx|gLYl+YPZndMEJHHU{>?B?&wFJiV@BUeT z)-!j0tz$gT@DJ=E_}sYk@j-nH-kfE2W=#Bk*|Sr%a_k&BlINZ|OLYk#>5w^ireV z?o<(ICpm-A&F6$lf5xNxCp|d(0wWHS-tlNSo%Lt)8T@~4Z#456&F9FtOI+MSl4`t7y%~J|*;3|H5NtRaj-`N!Qm4UaH=#K6lg|5WG;T$I z<8VG)f%u%xm@;fjrA>um?-vgKJRXhiBA*{U_Je#CeE#`iG#?^ELgb=@MUKd?8Ow*T zke=li4Zl9{`Cov?u|E_Tl5$A`Z|I*cXalXEr%2z>sq)51T?~uv5isDuI=GQlV2E`EW)-HB3{I5jXRmO6gyJ^`(ao zFf^()GSHTD=jK8jXh7PQ;3o3{gi(J$23mIQYM3DKc|A+O$8Ch-0VUz1f(ar%%Y=M{ zfsSWG??}og?~H9-^M+EFKKn0^j@GY9IV|aQzKS;6<%m&-G#= zKJhvwLtBMUp7acS4uQvGe_9Q#jWh;#E9tmcjNE)CBV&p*MtRl+SbZiJ4M>FJ?q`ayi>FuqpW;$jtx8`*6PD+p}WMzpoh_oF>*k6 z=KZO0VoXzv?tK4O`X2FFfR6yf8ui)%76==Af{)mBP_DLj(VHXHttoo4jfR5@bkQ80 zyZM~+?wTk4=^3Fzm*0CAd?``18b2PLk4MAaa5hC2c08I;0(ROP8xy3aG28>afzNLj z9X9YOO)mf+<3^ijJD`$R&{p#pi(jhTh|e;NPj?4t)$;XLwb`V9 zOUSg|s_BV%1@eK-;c3K4pW3n_T*PYOF_Nu!;mmRh4SfFm!o#QF2vE6Og4d6mj0OmU z%%|W8&|}fqW)C6lKnjXLBmK zQ{x6cUkN-e|0UTUhYz_kZi%jogjvkx#quF{=2#=%{?B|YQe`pYKd6_9&mtKg%Nixp zwgh}EYm{qk1E0UllJVi|&Pji_1blM7aw@JAe7?E}K4#DGZLJM0;Rt*-oVQOpFQ9Swm;o^TdO51hw_abC_nSz z)EMNt9Ljec;L|4gk>?k-eYW^;+ng>^jO63C&A#uHrzgON@BfndSmAT6TG{O&ACyIi zWIk5-JV00M84@@u2@&{wnDoock2Nvng5g283B9df+reJl)RB+)^-cRT#*_<&2j`^q zroC&Y*sH5d;}R><;072znM&ztFWf8mfvmC`5P<>h15 zNAWaA$mimomibuqQ9R8N_raXKUMc2{)IR4wp@&o&WvQ(>n4CWb04r!?{r zjo@>!g#3i?sdjcvV;zsg^^OQW!dScBo!Q@3pNqqAK76iD0+jX^!pB^!AQ4jI zRw;k^?_T=&AgNkMyArJF&{DHq8pT%+@mhQL`cCV)$P0AyK~i;yb|qNTq4iRy28tF| z@!Ir^GWA$Xe4&@OR3i9%jK%PAuoTc;L2qK4d^Jw>qWJJdRgR^ArgptC=a$oiBJuIA ze-$VxFUgP0r*Rt_c@kuTlJXhdgUIAK6GVKL$>gW;9BtbrKse_c;ZbJ&GUbP~2}@x4 z1_VC8cNoiO4-)m9P%hjFB;`X3J4Xh#qQ;K6nKuxht0a7? zwJPtfK#8*iUo`=r{-B@#c?S4smr41M;NS$%dICTR`H^j39;zIZ-T8SU`#^WAVzQ`7h(EE6pqf9LLd^A9QwDvCBZZ-Df4RHt|4<7?l ztfR^3J_179q07?@+!xW}grS!JLw#$=xDWbi@Zsn0xc&RC&4=b1sP^TP(lp;KoR5Pg z&3K!7FYyV`FFzleU8MMuv3znkR}Z>8@Zsn0#K;dCaxfH&2aOQ?TnwLlDN*nx@Mpp2 zccbKoig{B9&lowJk4n)q#m|4~%iz9T`DbhW3uwIk0TN_>#E&8KvDUvh8RNv=AAr1m zPIEtWCE)X>Vjv%q9J6Zzr@}pbVo^%R$MpI+oRfi1`x1Q2^>^M`g2;FU$5FFK6V1R@ z;ghF1S*830Ki|P^pZDOu`1ZlfeYRviR6vKFI<$Q-^LRpQAN>EpXODUGi|1FV8;ON! zPb=#rRD0E0Oxqv(0^EF@n8#t|l=z)P=&bJ@XgUuNx;eu36GnRa>O zXKtT)lRwLod93pWBqJ1w>jmPo@bTf(^x8R|5AV!YNyWea?oMIWC~{j#J-uSB=^^UP1<^vjW+-klsZKZFbs zpR1I73XJeXt5|^iMA#qBr@#nK&QuJ1p1t((@z@{gz~PFav2q&Y%L4eA6&sd?rsj|y zD@{c%Ioo;(J}u_s6QN&b8=xRtCO;RzCsKZfqS2^4L@w@w&jIjw{ClafhaJmk2yNyK z{jO{u9cx<=R}jF5+C~g|=n8En4}+#+_t8RP5;5!pe+hivUi9&C$&X!?z^AA2s>QD% zfR9Ul>{SdtInAMBnZE?_x$^Px+8^r#u;hdS_;~D(8ic^-!_3F0sdYLBY^z!?)w)P+ zn8~wJtMP1C?{xG8jc(k0#>Q;+fE%x@NfRsOH||{e`Or3=mHvW<4{hUFX}nE+7<}IE z=ObDiqiqW~Q67;uJGL8%^EJ>;qlq)ILZd`CAG4i}*{wICd**C39_On+Hu${kl^=nJ z)1kECb{9I;y1u8iaPTMmJEs-%gQnwSwFBZS97j0%2~G?)``nn^G0>l5gVi07pARrE zAG1ZaB|l(f&b)LwJ0uaI75GTgv6vs>0lZ9nmNFl+|GdP;oT_`ZEc(l>z$aG|c<}k) zp^J}ScmN31V-JuIRhH%C20kAG9w&ab`p-)OghH3nqh`JbjiU-l6l?|gK^@HT@yK0# zx3N89#AgxCr_7giaLxmxQyp4+XsngY0Ik6194A09I_rV2V{D9!5Mc26`K5~woL9L( zz6)reSfv7qPeF)}IMu*(^pP5{0w3o*tQ-hZrTo`k3p|c~iT=;x{1kWmlj;za1bn;) zh2$>wApsyhO)sCQ_=g8S@u-sX$#3jq@mYBJ_+6Nlc{ZKcE5;{Rv5(1zpTF#`PjJsK z$VQv?N5szYcs%*%!1;x#UoQE{RqSK&xr&z`R|u2+~77ayN)?Ll516|oN*)zVRN2?Qvkea=H6X!NHWOb#ZvEaEfv@wqPOSZm5PT3${^DV7g! zMurS(&M6U}rI!x{%v^jdKt$(S=b%Xos_oF6y!>;-@^Q#qn_Y~1)U*wfc7h$<(>x{U z!RLQ_#LLHv$CRHmgkt%4Ho9CGh!|z~k{>9`jS}XtgSuV<$B}N#v(GGX{Nh#?GC^aSIUfq5PQJ zzkB#VfYKsLC_lNUM0^%;e6;!j7Rm#;jF^~LEwHR&Kp(vUnpL$)Z zCTc1nU8b9lRA-p?fKa*yx=Vb5T6V&vpp#8gz@ppPfo*T`%;L{VGy6x5urUKzZ1qM_W_~%GRXeC`K2H} zS%PM1>@Yrxgw6JqfX~+gk1IapX&)yb)Z6WdLOOdS_nk04xeo~4mqGILwap+t8DeI6 z=rBG)e$=5vd=^1`GQ`YD_$U%K+m}))8|>2I)70uE)Na@>XTD>~&)66q8+Y@aLXEse z@Oj%3eCmzXzFwh8dQIPHqhYu?Va0yR8g0l;3$8wz-0$6?33{z}Iz_wiu{iLPZ+08d zHf}Z0fzR84$Fk4OZn>P^7zg|Umi)|5PV$s44fv+fPl3-rx&$BJ`MG=g*YopFGajMq zJ3n`?_fEx*&gJ01X8}ACA0+2HjlF`&$Abr*`|AuZV|P>ZaljrvVu@!qIyYt`Y5^Td z3+?=ejK8-R{YRpI`n9|YJ|A`SL4h3(6Xuh54uT$~zeVy%KitMXQ7K=1>m>kL`NKxH z$+uVFRO(K{%yV3~wl&3Vij%IwihoA)F&_`-P~&uJDbGPYNw9?=cN^lQYoNQ}^KHz_ zhtIuOV@Ir?CU4pzjqTziDs@Mv@M1V{+Mu3?yb>YLja$rXAwEklpJq#MYSa!Ad7usL^a*>t@z50A8a5G-;2?T_y!ANS zYB!o1PAp)}n%2?kjWmdw12jc9uuFi%2II|p6WHgL90IjRV*}?Gu<^i{8N*SU(bHYy zD(|k8-}6&Nk{^=~$}<~P9Ev76G2We)nyKTQ>clfMix0|qMty!3wY?wr4pwqRZ`wZU z5^{b6pP#vO@iCkE;d0>MGqIsdcKGlHhmwz13J>h_dp^a$M}Y835pTgCdzM3x2voR` zKy|p5Az~I{6!g|yKLftg}Th*4cRYD5_mFzxex~ zIs_k~UmhtEULH)yyVGQ>+iIXycC{1l>VGu7~%gTe)K5yz&0X_U5)I;#$^*=J7)W#;@%ewg}Vr+u0qx{`B4k15H9Jowp zQM)<8fdzaf$x}`2GoiDnMT98i2vKy92dm7{fn+pg_j;3)-ju-%^>E(hQ4f19H!ot9 z^7p=O0X`+I-PUeHwGO%^jcvmkm9)p4gLE+)6B$>nTn5FM=5+vAGNx1Gp24;*=Env* zf;T%hFrIagu02`hT{BNkK$1EF;F#TA0UTfI z;v}?ILhQs!D_15ibIhpMtaXK*0z1k#VAB<<< zuss~YbFY7rFIy9!2CzTh|Km%+2YvkNusp&b<9=7KHQKbiLx9povf#sx=EIPXd3!#b z_XdcWvAi<`uC${EUCAr+T&4Vv{%`?4$bi+Wddra-tGDQZ*6mca)ECe(hQodjc^*@0 zUYT$I%8yzpFTRg~Pf4%ot*VCZV&>f4X}(HJg<7+kT({_3mwJQVq;DLn4hVdHo#dxX zClb;5+;ZlbdeiPt$>e;l0xwLjiSs)CaBSf7 zo0s6z(OM0&^)O_4~r~^Jh zg_&^r#x%7zc;(1~&##jFgz>=@pkc3>oKBIKPRgf0JRZz>2{2Z9gvaAk5jK@3DE!ce z3yP@tbWY^l5LCL7|5LX03UCIWUt{XhQ+66EVA7w2N@mYrR z$!vi__$X(Az^8H;%|}=u>d)V3+)BFnGJ=m%76^R4wFo|?U9E+p`87m7!oHQxjLC_@ zc1K(Vx6dzLI(&R~M+gt8S-P9aM_IdLT`lD2JzRCE@Dl2IVS&`1eBnAOqy_D3Z?dqtT~l>JE&QTyd5!2T!@QA2+AnlhhY`;&r9Qv0I-9ASU<4#WA__9rbM zzx=r3AJQ)+0FnHR!}v({XH9%$`=bb-55A^Id@z+*+vF5-S)xwhuZsNG2S5MP;j^B} zsl&&be0hws^DysvRw;k*^I!Dyk*23K7a$LxPQy(3vG~Zrp*nE5eg5F*zZAm9G(VXD z1@WsLDI3Faf~mnwjuUq40qRYv8vUaO+}?0Smq%BcA64f<#_t9Fqc z_`Dx@0{Hk`y$~VNwgfjnAInrbrYP^RGhHU}Sw!;5;=LI5r+DSZ;-ixH;(R_(jOCNo z`o!^3#rhDR4^4pQAsy|JZet$`N}$nI^q|1>b_pfX*auIIA<+=KTG!ElU#aY}^;dMXs1f2~*&Vg)vUMHc^DY+1C!1Zi#`W`2%Pw2jL41~>eA3%x zYusQy>eyxLI>6`h(9MSqgGIB1)ov0WbZ$9ElLR$y;PaVF@Ufz1UsRZ>`UifI`qLr- z?YBNERsV4R{+Sm8_{bAfSk}~YR4*Sn0i=?pCiz)}^JzTC9a(BTKKXI2eklZq_`DRx z$C|)r5THQ$u_o|p@%bzZ;Dbx>Cdg9dSiO84Bx)s~AwQpeB7l!cWh83ZVhk7a)p5+IVFWdNT5`;#h|_VNj^KS~7C z#OLJzK3@Bi3JyYLFCVY{Q34KK2l!lt^KtA?N$L;CbklMqQ%TmEB0AvYh{yE~DP8ta2UdxHh@;l4d9pg=x~?QNnT zo#f|vVUn3vOyDKF?#Ajjg39~lk7Iw7 z0YvgM4&o!(pEdE3?T<=)-gyZ=HEnOZ{cKljb(|*atJJvEjPs-EJLbk@<}_L7iG{Fx{b{HS8 z{mC^PTnF*FisNJ3pX77Z7(R~u$vGh6^T}vFlKoi|AKCuop3l#{;SzkBja$0Cby=Ex zr8~9TiG`Y@yFGjBvVZcG-wB0t3H)BA{Bv6@j8Dp#M;*NIAm|t)DQm?E#strjLo??WKY%955Eq`&p#viX*a8N zt+cx)I*hf){h@JurZ^q0Uq1>w4j;)j+ljFZM%w1%x6Mjzkc0gE=pzyz|HKp#Z_3z^ z1k=Yy4h)?jI`@8z_^?nu84_b$Ys2{{lNd9vgZMlV!Y5N=ERv5E7^)*c#AhM#u@hss zgjF`k`Si;{qDEp&lAo6(K7RX?ElBk7k?oH{aU#jjGL(-!F-@NMiPSGZ;_dm>0YrRW z4&ftBOxMK6vOh}k`LUwJC(8b$nQHj-E5QD!Fx7zk{8&@s6KsFdkP!;z| z5TCdry0t}veWD0W7%o7_$(bh>`}|il(cqR=M7jhDZXW&(Z&}& zSh<-wHm1`a_0-HX;D$Z~`FY=!#0PD%F@^7RP$O=U%Jj{e#=yACn#Qo#n@~sEwqe|{ z9&T09yw(^#mTBMz{2}mpzgK>$rCY42YdDT4!@`C6{oV4@KfXKe8EA({i61fXdcfD2 z_0NIN#~?lrVRJ!)T9Vw>fyG(_bEj65Z!W->>7Y9;wIsRMlbS@TlNPgaR?0vB)tAI) zx7zL?E?gI)&RKstgQ-y+5%>%^pKhyOZ3b1iWLrh;%qPSCIHJNukyTXU^CHQQi;q8x zh%`kiV-&(ijuM^Bhi#PlL*CM)KfGfL_=fKgpG6EG9wVj%6wOCOi7EghJ}*V`F=NCv z^0A^sMfjXEA0KB|L9i=}7y1zSagZpoiVFETf5OMdZ-3mbJ~JQL{wU$?iBB3x<4QtI_DtRQ^_3bm+G>c?(A5?jvt5`C&X?I2ny*(q!IaikyeJnr%RSE*^>F zBj@UI9Z3+y^N~$aPT>gq!~2uC=BHlyvDZ{ZN0)LL$E(ChP+~qgpNf(EJdsF#V26_9 zVF~$IrYN@pM0^%;eDLj^C@5Kw7|+K+lIN)y@p&nZkIViv5+#D-`N;MszmYWYStjJe z51UDbC#ik`QoajXiOhvGxo{;Z9U*Z$-Xj);F=I84Zg>`y{GN%c#%Ke+@%@-t4vhufc|c#`sw?N46$ zysPNtgK3vmuanuSHTGFu6LDdibsCB07Bn$)*&j5bH-^LBNr4SU_fb-I(x0CPHK)(j z%!{sq=QrMU70svJC|A3CLbZ|whZr{npWON`lh5U4az2SO94YzaQnyS#A1i{-zJ|u4 zW)sbIZ1CPU&{!1Zs#9b%RNmkX`}_%#ADK_^npdjw4JPxkN;Gq=NF*!)w@UfLr^IKw z+JOM=>9t0^u-j;%bPNpx+Pj5kx*feB?z0*dzfi&q=T(y@soJ}sK#&CAlJD;E1&mNhF z8*#z_IK6M2j~`gy(n|R^-?Ak6X*EivMyPtNnH(k1Y_;8QFH@_|0c`b8!>sd8atKc4{o%I&Pn zp+tNrf9SYwdv?)k>>+cgBT!776?4Y*^YQAJ0&s9S#OE-IPnx8mgC{_KOg@f&DFKN1 zTm|y+=vT@uPJa3E=$9f}oQTgS1Nq4QDd=>YlV$K}7ceyIQm^0UnX`S|sVx8q2nT!Hcvq+h(BLVlG?$j|l@fqXprl?F~A zAHV;iRxY`QWAZ6R@v-$Q9iSxom4E5s@$)+t5+AIbTKl@UXwHG9(?+9Q&$W~Oc-%YT0&_a){mOxY_D3o zRbab~J)O1mQnjgD1q`gw!ZSfvYVn~4H{${uPkLix0xH6>_Xl?i>}+)2W0T%-e@wE( z3mB-A0JQDo;@WI<4NGPBHr{Iid_GzX=0jD`d|5T<`s1F9hhC9jDlF%e5@WnX~ZZ;lq%)__-ha!Y5kp6xu8=J9?Z6?d(EJ$kAKcte`iON(dc2teXC32hY7Fpck^Gdk)~##&@Jd>4-#K8n z^@`rnw$XBeEVgNpdGI*Oy93?*X%vxBQwxi|{fQPJP~9}Ycy3YbLHw6Z8l=$6qaiNm|r(yDEgyYLs| zB3zkI0h7>yp-M#Pwshe0|1LuKz=dheCX~wkkwlMz_;8zJjK@%`L}TRPO%?rpPhQG= ztmQ9H`UekFtq|IL)GlvL#z(R87S89TP(D@}{kr&AW%TL@5aj1AEQk*-!CRh?s4&wssy_K~kf>DtL-O;M zCxZCM_J=HU8aQ$GN1gH?&Zii`N7$eAfPC`fTHa9y5Xnz7l#gY9*2TxNKWgz=2Js27 zKPk$i{Qf<_{-{vkLh|!+5FfAoNdqT|{ZRpqh=1O46~V``Kj{F)+aEOmk^Fozl#gtG z*1<=%Kg#fV>tPTdkNx4RrzzHXeDdS5KdPK$0Qq_AIEasIf6~HMPlWQZ?9aORSoTLPK8qke9{ZEF zLfj`mp7@97>aDf;JfFV<`FSabk8FR^!ilp#s=zVr&oY9Kus`Vm`Q*nH|EL3q?9a=g zd@TF3E98SHgQ~&sQ$JJT^yss7r+#mq7t8R03SL0Neu_W;O8SR zz$*d=*Fk(P!}(-Q|Ag}q3-HPSB0evM@v*|63<4A)KUM&w5})UR$7g@6G8b5lOb1-~ z`KVb41o?UXkqADr9U@jqJ$*PI*$z22JFkHv@nP|Nq((Q%0-;zwiWCA7pC{t@Sav9d z0LAdJ>`?9ni1;ib_z)8NfSD9EF5!F}Bq|gF5ucYL_{jDrO%+EtAKCuoe$`Ii+RdhJh+;fVRy-NSf3miv0Qcz(gpA56k0@#HHpA97#079Yz0 z3?&@x7fG<|@61;|hC-8=EQcM1DrCRQ2tzjYdWXab$Ej}7Tk(9K-mVQB9>8=X@V z=xL5uk%O*4JO@7C4?HFxR@$vLvB|QWsi%beSIqqU@mYV2O_nDLb(D~=g7T~X`Zmdr z*()54BSPBKT5+QAUu23RZ%xq~Jm{1QJ}u3^Wfqr5d|nLVBL{|d5SB+ z)l$aPK&xI!N>%>q-hbSPxN)zM#3w+q)biM(e}zbNSQeA*=fS-sKC=E4Sl192PV+Bs ze7b3rKkN~=LHhHclEH`TPjNtLe8OpI**m{;M8v0+%17wWy7&nF(Z_&@&pe4wg8nE@ z-5K}=`$^EBa?iX%`+1ziC$2w5;Na2=eB}72{L@kn2mdAexz6Ad>Q6C1X?#Nc(ZPB+ zpI1`($ojJmKC=Gk!si$N&K3AnQPF10eBLxRIxVw_&Q^v$BYRp*$pVv8dQwG2n*;Yn z7uUOR r+1)q`OdVHl-=`y$eL$>vc-+CR|4^@Mt@{pLrx1MP~v;5oh{mVctNGcDB zIecsCjj+to?!$=6T1{*5e!=G#_Q6N)|Ajm|(fTW>#>#jt{(9;wk^Wy)Wz{#Iu->R1 z#BA%g<}>2MTcV*3TNCDka${(a=Jyrm`(Ips5D4Av^lY=83da9NhqXf6L|MOc>LWL~ zq#~JT=#EBRKEtsd>#NlAY*7?OeXAzB51~j3Lg8cvTP`mPg4A|X@FY!3<-RGW?zxdQw3}&pAR&td^i?9a`5@j zR|$MpE%TSiN5fiww)NXzF-zdHWd?2E;aP>{$4KOJ;6Bt$o-x1p?Z0kh@L5uxGx=yF z&y3GJolj9&4*WOEel(J2;`2B5v-uQ}WpzG!$TRV&0M9~vDD+3__>QupEHYFIfwJt! z_pkT_6=g`v7bk+xS6@kd{P`CtaupIC*hA4LoR64(QILEHaV~n5+y8m+dA}j?5%UXX zKU75AueVhpuUjzNQ1;o&Q;&%^$JRyL?E$#6O` zXdu&L2m_xt7z%s}xEA}2WB!%MM+5gVh|gS+kD_Z8;#2k^41B&YjSo$~xOcHYvCu%4 zWIufR#eIu~ijovXwtPwi4Sc?6KgE91`Q&?BQ9fn$HSpO4p7{KN>2o%pY;RkJPl?PX z7$?E!aFGx4Hnx}P${1WWHhy8t{YamljNx4%zXg1*V)jFJv(xU?@?vE9I`QYS{cznm z9!>j`_>YzT-{A8-vlt)Iol=T=I(uz2Nh|P>`H1e6Qp7X99O=EfE2D#h93c+!|Me;%k^BA z?j*pJn$?y}b1OKxOD z`5ceD^TSixE!3#>Ap3yqo!5WzY=O_7RmZ`1p2NCaes7e|r8gK1A4M}V^~m$Jcm9jN zOMLkGWxu;Qt5;_@JGdd0KIciE;GfUs-OX9GI%9aOVV`sG`R|R84{P_{rW&KX54;ko z3_h%o@rY`SYD&u9dGC{CKm5?Ug}m@pGwV35E<5z@Aun9Rq&n#X2OkG`BtEQ3^~#t+ zKCDT#wwUalfA(V3ene((>;PEs&xY0Ab+#qMiBCg5qOMK2LKr;&f97G$zO>DUiO)+B z`w@KP17HQ=fY>4*`2bkJs@!mJAH?V7bUxyMQ(-_Ue8d4KZ2*DK`>rB<0u=mU_g0&) zwI+H|`imi-0L3x$tAi2S>q-C3a(##RMD0iRt0m#!7DGO=Uugyh_W?d1Fe3KTjvU+Z zs{<6S=MwT6MN`6gbpUPZxe%Xwi+p4wT52*M<0Bi9R+D+~`J<@)#Pp|wlb3a;RiyV9 zDM-jCra$9D7jv--9eRJ!eGs3h{m7=XB%BBz*>p66!}xsoORggR6}O+R^`NNnMEJz* zXW~85#dv%^>El;tIXDqM5&O{#j?d?D#D186iMViSK%xFHJ|Zq0>juc)`Eaii;Umz5 zl_7z#@FV0S&|rZKiyTM&=~>~6-Qq8R&(w(U@#pVi8WxcwW24rr*4eu2_S2c~I^^Te z-xH^)nU^TVH;GSl{EG1jhck6JxU*C~p&XUs{`1}h@tG&tPpekFj~v9j-%tBK=d)?} z)^pK6e25Ihw4W~T>)`Waz$2}XvH8Wy1tLTHVe<=Z3q;yGzxq3&{j9IQ73cFY(BEq3 zSO3j*U_YGCrm@=CMcyLw*);SVyXZdbonQMr;PL&-x1Xw0z27x!b^3uNA2J^lv^jh@ zar=WMuXUTh20rdA^S$k6bR!Yy?~7dbjnv z;PXFLlK8M?vgO~^0w1;zwEXpM>o>vYd#}NVjbE62wc%D&ZHy0R%R^%Rg{jvFZbfwt z`EaHmBzxx%KJkk9v`l-~>URC$2h&p21gKk`CDVHiRHZ#R^(GTP_`#$UH30_Rczj!i zu;kwbpHHeR@-Z+43uf|2N)(cPr_tr9eipA`1s4GZL3bT)!8a)4_N${G0Esy?dPnH^1R!hzmL2D)mGQ{0Y2YI z_OoTSx2$Rp=L$?L{H%UITkhz7hXodb8QhbTw+d_VCCw!GHl zUsbW?rM-Xs`)|AkpH|OmcO3&qn@k+)w#<&xtTk;uZ0%wzqv#{r&B1fW!}I?62_Lpj zv5}$6!`}JB`F;kUy9UvDN(^Mm)|JPHMs)6W&jXPDdmR1X9DvP_VbkC+;3x_NAL-<4|z2t!lm|*_zWM8u+t;> zgz3*A1&;N_{{V96duDA`IKIWz~}dYCybw~y=t$qWw9-0bK8JH;ru)N?YUah zQs_G#dIKathZw#@ClJ6-1v+w94FuMs{$&KMic9-%?V2);BzR;zn-d1o@bGxkO= zlyhkse2y0Er`E*oV6$sb=Biv{>#o6AWrFd)8R64E$L`?y#P5QdKuhI{xAQaLb2f|H zk2r!F&4QLm)w;W!6|<{^w;tgmj-W=fpySoZN@-W%GoBaWqbNs9@Hy)b9wv~!-Iu`U z$_V+e;}(1n3q`1aTLm}c^I?ZA_#md0TLt$4J|DS?@o75FHj;k)M7<-KBbi9;FAWQzi5Pf*5O+0+CCqhU5niZ_`LZl;+)X4|xT@URsvhu^RY(~1QvE{jFCL=r7#&~#nK(AsdaW*|aDSS!!@;CMze2xVl*iNV0M)uRD z;TPFj-js+BY-ca_;4qn5`Ke6Bv9LH`W7QTkyC?gMW48Qc2=h5h{F&Taz;PYKG;?s7jRc8lo; z;PXA#j86|Oo9dmYMh9kA_{kQ=^szS>M>V?eWp4c!z~@KEeyZlymT4pUSy`F+Emhrv z1NQ{ck6++VR|^84SAZwvgE9e49QIn(#{xdpQ*5P=0ZLCowte+X#C})>j@9)i;KM3# z=*fmW0el*BWD6oN8@|A2I_T>^e8GOcXM^$a(+b1OByRHJeu5*y`1{R+VOU-5u}z+#47N)4_qR z(k%FxWIs4dS*x#w47)7OpN=UPOn37k`?2TXgJB)Vw>hBDzx)@M@vqbV3;K8eV*d5) zHSs}lq-MA6tjmO|D2{YK84YzeA=pn7c%Irl~H zdqm*#6t^Fr&lbwf(*e{KL!p4r7K+W&@l#EQdE#@;{LAOlFq@7^mbKa=;B)4l58c(S zb(_BpK3_1C`IzmV-K(#>2^R2iN7Iw(K-*0)@Nt3XiAL>BR8HMQiCVryzv5^fZoYpr#EA{I2P9DrFC5=V&HRE=F@Vvune^NGOOTo zF+9LBkiN^T;PYS4CHuk2+x9y7biC0>b3Q%bf&Os+f;_J!XIa<}u(sKLjbE%I48izCKI+bj<7`6b{9{A;(x1$L4wbL zdyfC<8wT0-_dQAW(>AwJ7<+XtLLb`h5ej4L%SGG=_`H+x>7gm3X|KU@_?6pJv}ANo zG`1XmAK>#tbJ!0`_ClGrxd8=HA;m(~T`Q*-wO}oO?RYMf`AB;M0-q1;6Q8E@0CNmu zgZ|TUwx3?n)jH?HM;Dl57#sMnaWOo?1$CT?fzQta&l8+*?e6AAsNC8mbZO`|rl-*iWnS;go=$hth+u#3* zGw{KxPqWrq-d<$wq37M{=)8ZSpS{HI1AKlKc>ML}-~_7Q5_ZGs?xGY~cNd4Bc!g*c z+o8X~*Pp}V-K@2+u@Ss5HinZ^)Fhjn;>6R0^$AhEbo>@x)qjJ}&sV^Qr(!PCDL47; z@%&7E`s60x1+D-5;J+k3{Co=QeM(yx5AIgYFyR+NJf+lcnN{o@KDCWzDxJI<+5CJ8 z>wQW^>v)|=Ip_EA)MIJ&2kznU+&JjdX&F=jtB9%j_|1?!@&n%owJ^uG%}e>BVH z6ZUTs%#3^&%it6CZxYPP=2Q|c;`3OA&t06ow0E&M?b}rjBPBkA{>dqpr+v#RB_;6r z5A$q3DBn`+n)MpS`0aKlzjXnluf(S|aR+^j?qsH(U1HRw2v0P#VK7TUL<72j32!`&q%^KonY(ul* zT~@Kh=c#;nDD7TcARMBA`7TwibNdLH5kRvryHtQ|`yahN-+o#+v&$g)Z1QFobiOi0 ze>j5+9NJ}!%E(0iw87_-Dxv+LZvj?#EIPl;<`|n)oGm{Zl&{F2`+RUcaekT2G0Ki( z;YPsc&(DdEf4;KQ?d|L!(+*hptaRZw)LszhE60=R@i8*(_ym>BN!2b;lAi*fF9n|9 z{0>!`w`>cW8K}V8#F2weZ>e`G&hPMQ^4^Ju?F`glJ;$Mg@pP#x-p((9&vO--kMCCD zqzh$gF#N?vPbeR2oQzelPPV6$@>0+@&nk< zx9rFHQ1s03EN??e{>Au2ji~fK20lL}*$+IcR>!EtJdEvk6$uInxc>R-X~=qO;XTaN@FI?m?x6DKWdEZ2Ro(;Ewl#cYbH! zUIu-wHU|92rN5*Y2GlNA=+6%N=k5eYT4*TGNBGucwE#C_orXC8sja)Mf?u<{NpRwPu1DlS{oc( zygEEM&>jx%!?u6oxB@=mECK;?2eS*rJ$`?y;Gac$R6i${G$08<$K1)~1?j~QWuf79 z!OOJtT-HW}#OFGPPto3$WSa_nH0fXY6DPB5KBCuop&pise9CGz^bxZM?B{P2AG2jML{3nCqfoE*& zjJ)&1Q(SOSpNAvV?k}fY6Q7sJezN)S94RD5Aw^mVkpdr{CxzrFq)C?(4s?O|Tq*N$ zo>#`*}r`4-0^l@GPZ2CFOIqFZ1zt;A>68sns(>oXl?q_Tw+d z_p$QcA7uVYDZdH(`E}rt`=g2V9>i~a8L`;Rl;5V*A5E+IX#7JWK2Ij|ktLZTXtjaM z{{snpH?e7OD; z1(aYvKA$iiJV))eGR8bIB=WDv8GMBPtc#D(A6*Oxd_Gc1=98d53KLTweuDiZ=ug?L zhmG$aX(jWC>yJ7dHT@|+9NY)l&weJKP=AU5O0b^={m}ss@wv$0BkRvP_{jRB2cMt( z>>2oMq1A=s7@b|mp+2!<4i=SVOj_w1Drr+&?%Ba`XpB#XEXP~S!E}-3UEIhHOj9DM%7$57^@K9^VHQ~tR;@wunSM~r1E2BgAA zjAdmuAmVeC&xerk{1(MzyBt0tl9JEmiO5r22VE*N~ zRFGxRrzVYy^9yD_vi_8q&weGwCy*aMAVeWwqvLGXdJW37Q<{FI@^R7D8)q+2%hwnW zkNVTnCiePsOW4o;bpbwVqgT59B=9No@I~`4*1sTa{4&uNetT20AT@a|Vn4*EtnNj8 zzEkqAPPdKKuY{y|ls$BC{#{Xh0{ap=`uS~z@s|IYnTf0FpHNku_C!oO1Z#MU)R42bMU8s8WB@JU5cJehpN zxUPc%5ufW6`@#DU7FB)cQk;FQqCe%@btXQqr0_}BA2mcv>QA}gaQ*o%;1TQdi~a>j z{LoG@3(Dq`u0Lf11pE1}C$sp(^heElQvEA3uSn9LGV+G5t}(lV(3YpJe?hBOtP$m$Uf9 z^+yd)CZDwUr=)z|Z7lNf+wUZ8FRM+#V*cf~-_dS+ndYbOzL(1~vDxWY_GuMce_=MX#r3FNMUQy#C z^he2n6!-}JDYXHC&v)6s5|Ikz-rp? zU1g4i1Um1LVm}Gnr=b>=Wz9#vH|Wn5@QC%*WIjqGmVDKP^)aE(34BUDd{KOOMS%SM z<#ax|9<>Ue(z+M%`DMv|#QMBwznrwdq2y#S{}Sz&wOimIK2L`Blg39L;2{|Kh;sPI z6O2*|5b-gT`S?q%`FP^%8*%&5W`TqF+*9Nu26z<%Qn4Q~z?a#8h|g6%AKb!H`ST(_ zA#Oh*l9DfQ5TBRx`9$__NN$@%F(T~eS6caeWc^V>l*31ke@eZ;L45X=`H=o7 z;VG;?<+L8QzVRy;ihQ6ys(6(6$of-WKJWXCna{`QwAxNjKK>ra4xSWS;UNDq#up>B zslqV>?WSG9WZSmi_Zcsx@{wf+h!lN`E}qOsmYq5uR)EdY`I%v1R!_?JrXoN7M_{#Dk2-8UiP^Ej1H+`kmzr1Fu+uTqE2 zfrAmBSF-uY{-q2^(Z5P=JrqB`uM0fbzp!F~p?{82bWI75RVp#s@;to*!_dFFPcHJ2 z9V-+e#ko6OI>twKtWbo?92iP`j8r~B!eK${N#hgCd)7dL$iMET@DakZi~%Ltj~Li> zG9cn}wa6z#!t|$5=)){C#wSFgOW-W|*UO81Wc?`)CtH8Chzor@Z2X$1@(B}uauW{4 z1^urv`|ExtKG2b4)y@0ti$4-#kGH{Kbd$V%x zw|CmTS~qa7s4f?+A3pNjqvO%EKMB1{KkH%f&tw+kv%{L4a~)aszgrTYV~v}fLw~+z z&iFXhs!^7b*>iBu6E4G-Qppib$_EY$*;$X>q^z$&- z&({JE^k>n2mN1*Z9n<+}6Z!~!{`MK;)9ZAsdM76bXnWh*W?3!)h?FN=qv?3;4aU0X z0KtB~{iRGk@=^yWk{Y6PKJrpW`KMyU=Sqc-`q2w!QMr6H+3EqG?=Uj?;4?0q{e$>W z)xSzE*~I5wCLh_qq9a<0^r+d7H$LU{M9aNWPkbsWd_-!m;zmZg{phuoL44+!e8MO( z*eX<5?MUYnYI3kuSmGg{_&mcQtn z=ESGh-gc_@gULG!0z<7)o_7h&-kOe%hKCQssaMIzDe=DJU%}@Wf#->;xwU257WnMe znpTINpi>-5hcCB0Z&clb1NX$kWlsC&-WX5xoTCz-t4u!XW4g>fosV9I1U|o1$>pOc zIuf94J|!Noq71+W%JR;yBMD)@Py-6o5rlw#x8Qkz`sW1ReFwHbRY1q=2a@6 zWxXqnk8a+@_&AkJKFfGl79X9wi}88KOy<)upL6zB_prdf#_kKlOMN{I{`F2HgU_-a zmc~an4VAY4iDI6nK-NVT=@#L}|aUqRe^Z=jl z*k|?=<+H3`@vD+PNBQXHSK>av=kEd!;-5u6%lMVJZW14z{7T#h`1~B_)2r9~?RPmm z^s6EjIoi!GU3I5}e){%1TUym&E=GKwWcHK5M_yXR&5+Nj+EgMRd1+M#IQXQAkCDSC zI0aS!P!=Cewg#CW5TARQd_)#>F#}5HBeJ0NFd*V{mBxpV1e-eHVzkh%tgnfDLL|Y~ zPPumnxqrQ!#z)qlf&+dcA6b7i8Su$|<~e-AQyA(tl*K39+9|vBaQk_j$w%l?(=v~7ih8zDYVF7S~j7ML_GfhfvHUf2jGVdaMd zO2o%V;}a~b76+8bC$_Mv10dpaFPV>+SO^1R$zP#l2PT;KB!Q2ZSZHNH#OG>(4{jl< zKfErs`tn#Y{VB0*6Q7qC_$291F+>UW<1cK)&8Nh0eEpfH@rmkBAwY?IlJ%$5fXIFx zC-YgNqj}AwngMt6@L2)&ie&{ZT~} z<&zcvlo$@zpW1#JpCtWJ1eC}pU4KeyJuE(~T_p2K)}Q71Wa&?t`P?@GKF|qyLnrtr zi=Q!VMG=#R(dq8hEuq6{U%CC{=*}UEbetLE$>qRHyUuOD4*j_w>JQT$$!<8T;&jCP zl<7{I-Dp6E`B!~Eu%B(J+#;4YR{H3;{Txx{7O}js()DiZmtjB6O28+pt*7eDi{A<7 zlhal&bidLL@WdzduQ;FBI9~u!GM~h8zSILe@d^Dag- zNw3$!XF;#a&gVK`e+W#0)eSLz<>(KADR~MI+0QGfd}RG8hA7j&G^xEoBMtaCz~isa zC;As4J#S(0z7U{HK3sn^upZ9m$!tDCf7ZoE=#MshW~qD-po9smiZibQ@lTnBncoKU zua{E!#PvrVQKo+d`cv9zsZWRd*Hs>$P=AyGW%3F2r}ThGpI^@ABkRvP_{jRB1D`kU zFYpN#Hn{o}Sl1AS8MU8qqQ%x%wV1i{JAwVYaZ!Jm?g+bCf=)a>jI48LM2G258|D*< ze~hP$2fMYdl*bW_WAsGnl#^^fhI@WwJUZ=9q)d*OMTP$?*-Rex(~ieKi+tp9Obt;U zA6;hh#K*|BAD@q3BT~hJ6#0~XCQp3ssqujxsi!C?@ew0gX}yc<&s8oT@+o!%qbXIS z7eiIz*E~KUl5!sfBm0TRKRNoNgeZ@XtUqO*h!LN8uKmdRqlCxLZ{h1dN#mE#Cyb;^ zx(-Bq9;@-;`lE(NjgQcu5*rZsbSk-g0{u}Lx|km2`B#Gelya^?d|J7D}Ta^dk{-=`_XjL9^3`z^HLfgdDx;MUTI~6Up$eIJZOaje92ZcBsh2qeePc6@L6K} z-H#7r_7e%3^#DYCUe4qrWT=<{rQ45?A#M2dj5I#D1v%Z0{7ZEcK9Ns|q}-Es$;Dtl zy?beVWc?`u2iH&JBkNCz=UfsV(jV4;nUC+W@yq8EZhxs-Pd1-0XfElbT>?aWTAB7E z^rx5srQ46tA3Y3+_{`JzBWd>$AbXS>#G6im;I>|fvjMgkvg;-9wwPb7Z>2UJW{yTL&fw9}37DXPy| z_CtKM(C6`-`1Gp1#uj=DqnN}heVc^eRi}s3vje?*3)8*8zb5;Me2UE{1Q+pjk&hn! zB;LRH3O-ZdNw6Q{qwY`Qdg5y`pYr>Yc>m%n_>uoGdw>w z#@_hAJ@31l#`vvMms;rJ2L}BCDoK{pzTjW)nSqaOw(GU37~KByt|_Z9OhpicTojsOQYfvzW!Wi@)7z|2#=Efl;3)|f4!2yN7kPrgcAHKlBGcUqY0l+m`i*%5%bjO zpt9ITWnRYWBGRy={1AL_y>Z`_S{X1(tS+MRwQlnVp+8%|^MrH1jYJIr1yUMJO^eOD z#JxBVM_7bE91b5+fykINhyN_S6%n5&7x>81Oss-cAaVP-U!#1qoSL$GBtAwOpXH{e z?7E43^qQImeC{Rl5z@Sr0VVJep`k|J#m29#s|7xUBxqS#Y+~ViUX)LWBnS<4m|Wmq z$iH4*;3Ml#(U}DY6y+o9k1jI{ejmhVp2kOnhD%sbA|Jh`rVJ4Ad7R8g=+9CHl)y*m zk5&c*KGn(spNRex37ff5;=fTo3HqZ$@QnK)KCJ~las4TdD9R_UKbpYt`Ru3h5&Bac zPt1ONKB4|-1BmSBBAJh@KZOY;*pI9~I`Od#@TptfZm%kptc$90!QExUsC$#i^iV2T zr}GCbkCJ>1pEys{q7d4G70&E+$9G95`~6dTFHf0lG6 zPkdg=;3JP@#7cE0pT7dfYr;^V{M2gBY)f_%yq*(HG4&pP<-~)e%Cn+e! ziphK^8+`0eCtkv$G+9&m!balrID=2zzm(v>^%VHTU9Eg@`1tj;*ZF*ef2rarz$bLI zG6I7AeBHjpr)A-wffP9N;mg>lHLG=_Rjb~wTibW5PW67*tks`R*Jxos7ak58gn@Hp z_)7on@_dK*u=%MUKgall?o|*`3ZKxY%7Y;Doe-Z{y8ZZkoaYPT3H&Q&KRzGwsm!-k znr{*xd47>(KUnA}iYJo~U+5?eg3LDpK7R}56UKaD*p5Pv-FnRfyM8*xt6b*;F+hJ=32QKA{Me91d=e_*~`j z!3XZAD=4o634K;Epfb-hh|kM;d?NOvhA5MdY(Qm(!|mtmD|vhZ`%zy0DWX5+oLKnM z0sHy-RvsVOeiRX@>QAZRaQpd&xx{DN+TGi3n_UR-N)MmUk#~A|G;$~2(Mngly|%1!4Nf>28)m1|YgpnuHyhbArWBEzXt{$k;}nyNNpQl+*l@?=>6w>$ zl3VrKkw5e*{{wv9dBym&J9R5Kz`PP0ddn?$G#+@t0cI`P(0zc7RT7mKK zK&NAL9luBk=}v(M@j35b1jbYTNGJK`h>x(JD4!y$sf*WM%+6-jd2n~9X4SXvZq?1|eQVF!qy^{g_TFao{`RH_ zgXn9Xx3xT%`PL&$;PK?R?+uO&{+yNMa1S4PmmYh?*q}9NmJ%&(vEq-oV`DNjj&Jp} zD)FKDy9yseii7ePt;|P`u|rljK7%DbR|WZCu{95mu`WKhy^%Yb3wx1Bmt!}gupL>dYWclH<_;l@GnUAbLw-_1_pGq-4 zBtLn0*5qG+gq7;stZhPkX#TFEKg@oZ{N&rcVu)m>!-w>zARYz%(E||i zxya;$fQRW%F+%C~BlJffKG$h{67)wkK{Jt0g8tl2)R*B>jgdu z?PUTj;*3bGvmQ}C3GH`6Eh_u`qB>LIQ*8K(`BwrTJ%%sRpKU{xkGgxQ@hQK15udj+ zJ~i8Nng&~jmh*a-o?rA&yy3aOqb!c)hCZd!{NnBR$bORe#K(8l$rp|9Nqpktd&$p! zk-n1sv@-d~aOy<+3zJt=i z6z*MWL1A!Jnt%EJHKL=YivyS*-o^C!14b&JWxXqnk8a+@`OGuW0hY-}qYW_P^EjK22%rlYPzE0nKx<(@;Pca!R6c}+Rd4YV#uS|kcA3m4 zM54__O276wFou2cC0`cq(J=aYt495!sy14 z+kZGbmrlUxGJgHZz8arG{-wxA1ONKamjKTb?Oxa1GVN#&l_l2>$)(!UiF@FlB=%4# z?P5lP7TETOzT_&A51EVleop+-i+r?jr}%APed9x4T1n-jIN4gbYLfjBpVGV2!kvK6 z)BvB3S?|<#_Kc0^EZc&s)lma_uhX^KOA2(y9gO?Om&V2m-iZfS8=waC<#^(aG!tm> z`7z)DpPto5QS2W2FJl(4g7WZ>pyN)x5sG3@rT)v|>_Q(?B0g7fK1C*?3l^EqM@Iz$ zpMO$`@hLQr7v!Ux0)fx_;(Ww}qV5oIL{*`>K`rtC=qbd@e@bg^MIa<57QdYK(`M z)Bxkq%lzdMeG`1Xq$2ZKNX-m2r%02ufRDbZne0;{KG#Wn2urxUS_BQd?gF3EM>@$y z3qD^uOXI`G@xpBAFD3FRvk)ia!Tjr|&l#UL_DtIULc!jK)sAF92N{}078N^how~ID zg@V0j-Y6*pT7$_um=T|s$bM4z$k7kY#8fjh)A-22PZ%hcab<%W6!E#r=fh?f1@IU- zeDs*S`}Xs49v`uwSipcX`G^b!4GakOGc+>z5Rzct6^>^qY8Jkn#wSFg!QuwDpCJX% zG5y)I>UHOV;k1MEV5r}V`m`2lcy#CwnaIy#d@jAgVEBmIqa~LF@<_|e7y^*}R2KOJ zCwP2)7PBv93ySdxPVo5p98SKt1U@r@{xkIreUlyd8ML@x5i*yXXa9llHne67nwfN5JR8Jvl`$_xY5o ziEgir_A-%mA35#`!XQ;G>P|;!zmp_7daU~h3nD&GCh(Etp2Bc&@i-qj?g{6p%%kpYqYTqW=cd@7uGDbL-BeVk8- zM2~eJ*?wM5;3Ml#foZ$A>;fNIe{>kP#Ep>s%+vXZc~>FZp=-zON4Iq!na|@iKJvV) zAfZG)LVq+eAlT18t|ahD&>zK_DJLK2lb}B(pPBOeAU>@GK5_ji1Sj3Uf^}vs;NU)p z&we@|p+80O4g>P%~f^E+~YoGYkngpm})g8plJPF9ys5Pr~K&!D6yZ-mO zj%_w`TLC}lpA(=nZ!kpvE}jIW4HW5(g3lKM&l7v-@7d)*dfS$<)wUj3?JwvU?Dbq4 z(gppeduZ#avDpMS1U|J&z{h%1w{2FtMh97T=C*G?bfU0P>sp&e!)((N6vMMy}=UF3_&r>+d+)b3{Aizv{lFWx9p$h~++Q>8M&$IJXJ{Z4R zn3y?+Z(dOaNGOWcZb!1&)LJ~+uVa8X^|PhN#{-4zL} z1|Rsj6{%hEc76(cIm6s*%J7xtUxE)s z&1D_7f{z}2jw`8r&3^9_bNALT_LWj-aIyOTE% zpSda@Mcbk47UEN4mA36)G_SzNvh7ygY<3NdPFV6WI#sMU`);*NqNa6naxrkvCkB!b z`GOBW*R4fRNFT8O0H3d!fe&kICDaEC&?4;d5rvkcG`hnsN1jLR<5(9SwgN4Nb$Lcn zV#5je{L&@yY1G~XLB>`^$JijP-efEFY~9ChZ()I~YEmggs3|2(0U!~}cr^NNLrs97 z_~F>tpcQ(5!Tx~Hz4#gsI}j$OV{9qk9gT*QapDsGjVE{7_Agyt$$WTm$ZSeTUO>xO zVwDPMt$ZTMLpM9%5ucZf@)3DK8HCo)M{M5TIG+~{nNPCiMBNLv!w33w2U{zj5Xp@v zFT#FatjK%{_*WLAwepGh*Ubh@#HUr1PvBqagx1eT_}9(znag~V{VO(pZ9cf0F3rs8 z*2*WrziuirAp5zN`KbFBW^3t?*2-tWzix^S%`blG#aD{*5&o4yX#IR-|GH5=Uo%hO zgULlolk@Fu1d{27;}2=;;j>WKmPK9e7*iHjP2kn%1pArotMlQ4tdOnws`0s5c_#n* zTHp!i7ry=^+YgzI0vy=TtIFrqH=7`^v+e(VJOdw^Y593k!Mr_OQB}){3NDH#E$puc ziBaLCJsgR2SbWBPfX~E8<)iFQG8*1a4SlMMm|P-X#U0gE7^XQ8pU$?XyRkw^S2D}sj~JL9agosX}0`?sKCW! z6Y_U37JW*7rOMi0jQy&;=kA4AftR;0G^oM>KHpFg_9LewWT8Qdw9F#ud7#gFrPD3k z3h{YK@G+a)Qo>a2J)<~ek{dW}2jBLIuZZ3n?^Ekv*(X$^xJE2WUK~ z^8YWO?tG63Bpcx=d|ZUTSiPP z(3bI$F}b|(jE(;J;b3}%BTsS~J9vu-}O6>8h|FMnQz^MMjKwrWkYivw}~(Pbq4+%>lEqX;xLsseh`z)Q{!`seNhhvH=aIuu@YFdE{}?x4R(%hi-%f8>rH-r-q61J6CaloEl? zhawB@rhJb0To>k(zijqu^SP$a}%)v_`JbL;sa+|-n)!YpQHZe zy-kc?$^KO?=YmoYpLsz(`QG&^^SQxF_2Bc={t|q$z3V3U+=O?L{rt;!0T0DLD5PFP z!9%BAvzlGAjzeF2b!)Rqb1tP!9@Cu@cCvwVdR37xas;8xc(MB=Z;j#gk2u zpie(B9rTaUgM^wzU9jQ22un~rB zuzIb&XSBPW&8=suz4rFzgJ-I{tOi$h6`*I`KCU-BfA|GsV`n%#GB%`rut9%tX<$!m z0?VLWdgE|9!a3Eb>Ws7hK;Dh5>HFYwH3uIY<2I{X7D`A3Csp`L8CE?ujjdW2)wlct zTUNK4|JGZWQXC$56Jx^@Csks_Ne=oGsR9@Ku-9}?>%5F@|6e~y_7lwC4V=V4v22{) z;hijb4KjAJ8Q#`wVwKVA#YB_$#jI#O3O}`w*A|k3iz1qZmqiO+^0e|oo;Vu z2T6vjJj~Ige|S24NM&rslj-p>POIqn0H3izd{76VZQ4EHp=gJ-v|{4Sk6Dsb`y#E@ z@6#NktEYQ11)gxK%}xg}O$IHfGV{2IG?xw`@wpe}BbOFk8j-}u&t7JwMfHM%PmB0e z68SjKTT}^=2KXgxXps*ut*#p&;?qjtBeI{@#Ybd!YslwEuD~Z!->=p{%`rBH#kDAu zp`50}{`q&R@3(36&rorUjbYkp5$xwjUQXwephZ$NouQ^Fe6-Ud;xo_X6VW0iK-qls z(IVpWIEPP6i&o~NlNN!`k5AE84f=ktmY@`=?y43x(Y6P2Wyse|udvHGGh+0h_ag#Em7FO|>I;g7JN z7#~@7G=RhP=aotxpW@+9HXnV$AJXSnr12}}U(9}%34gNq2-(rXfQZjL)qbMkkNW79 zRv%If?=c-Ju|N}_$Ju;j5h^;q$Mi?=k#(rVaD4r_R^n4A{7Lbz7$2@fr3OUy^NIqW zg5i$}AE85K=kuR{N6ar``rLWms_yp8<{iEl&&mSiWhSujCx=hxMep!*>Ym^6H{x$I z!;rtlSlu{Rh~>!#t95;bBmvoM~V>)iIYi4W7~IG+S%F7jR%`6Nkm zkt^QL&k~<`lKuF6BxNr222=S&rMb{GZ{vr-=XVV7shV3`rcE_z{SD<6-%Qm#IB-vp z)uM?XrQNP=|IP{L)2r8Z>JGZopc$LvSqlb}6C z5k>guDLnz7N(vuIdy3*QlK5yQJprFqBA?i@VL?KReBx1In#Y&xb@H!yoX?g?>6mx6 zYps?=`QgZ@FPd!^;d9_pD(0P|KB{FSPxW}z*CyKze7^6B_^{KZYa70dDmz1}{qTkL z`QQ0)h4}2&>Q={STR6bnq~vfuRNdL36U{}92QE@Q?GL=MG4gPL`Fx1*nWq`vIi?fM z$c7dZvt=!-P(>>=@191nbc)=1*xgP{dy3w1Zp5|!s?D!$7FK0X+}Sm;rS3F?Z< z)8Cw4ZCGq@Q~ z7eDI{9%f$oX1_^%E*AL66O82$Mfu1R42|I6qaZ%BG(P^8;4&7J$Vb`_WYc#?iKimQ(s~fUukjyyA=J2 zFox?pxnTbFyR5#1;A3FYNX`E*9)ZLps+z@=m4$ceLI+d3CM^ zKCC`Kz$b{8F{R+W-A>H*XK;Ivp z4o2<)4(aN>01ZCxxdI=N7r*+>7ytMgVb$JPaHtnA;`4Hpj~pB>ZA=m$IXKh{4nAt) zGf(6bNm?!J(~Ep`Tz%$z9w+dLtv;{JM+Aqu84&E}suJb1j^o{iWItl5urT8?`~va0 zmiVkIewO%X9YBN6hsb`M`)zYwZ0K`$9*%JERBJTMe8J}<2Jva4s^7W>cj9x7dVW}z z*LrZreGs2}5}&mU?h+rZ2X~*(T<|g4Sa0S=#Fr0a8--gYRu}IoZ5J~8F-EAgx@bf( zuRCzD20{zeW!yI;`=Rlj`Iq1$8&SAvQGkx`UxJTpL^{Fo?dLj$4=o=qiHGY?j1P|x z^#WwuzxO#;;Dcq2PP5j6b1glk0PXP6a!2Fy{so*%w}>#j2YLoRUsfSLmWg$Yy1B$w za`b}!ztGIQ%`tMMtPgn_HH67x7e5D>kFHA_0iSumN7{dvbtrl;SE)c7w;!=T7@LJL$v*Q}V9R|jgY1XmXRbej zkE}yWzybLvA6bVqf`j`YKG$h{xDG7^D3On}|E`Dia6Ydj^AS3(3H!^7Tg#I9z|;GE3vb^=BzSQTy@rS<)X} z0FnK?l*~uy&${?T^hYB;?*^WQ{&8E#I^3pIM7AmhLo4|3`+tr7_Hq3DGA$CaLi{P2 zy?iCH;9q)me;oGn?yGb@l;;~RktwaL^7k9>kERBwd@HQr^WBvMK2r8i=wAhr=;M5X z?4L-(Jt>F&)ir~quTFffbNRGusFhGCA1IZNpMN;&A9_0E1A))~Fw5m5pQ}{+DDwD} zyDD~gkKpt7jRZci2(j>DP@GRJLeyq4i1^$~;3G$fh2a2yoR2(1)dmjkgZNa^`7FIs zmcmE7l``VfO5-CU#3c+Uk&lQFH8LRLGf&{-A7cuIs^GIYpYaeq9yIKQ3O?_tfDg*4 z^X{x(c(zXFq|@(!!j=M>}Db_*^XTSwWZ+ z`DiH2A)oJ`gO3=Zs=Mu4r|YzKYfT)yXg$+@#@MLtntM*yGEAdxQu;!}%s;JCrPF~( z>p46f^~aOp1$v$1;6-cdjV42*-3KthT60DjDdUVzcI)&2pI>42v+Fzn5Hw%JiUt}v zwTw>B+Cz&o9DYaP0NR^(@PE!WRS{5lR8NPGK%;+7Yd&b?bYYCA-X#v20uTxZU~e90 zzt7P)jj9Ng_yInDQsMT)`s!u%U&ca5$Jx8HWp=E*_62GEVZ;9BLCd_bhh={MWw!M( z9$wx#aK~_7cDVP|g-d*{llUz3&yCQ@-9ruiE7G5OmHxRm4+rRN`=hV9;(V;GxmAZ{ zpi$a=_>;fF%In(D@%T;j8*V$@O&d#!mf6f!(U8v=jnW>%pG2*M4atUB5rM#xXtj*@ zK|MfI!m{)q{OIdu5k6h3(dxC$70ou}Gx5$YrXzR7>)qC`!+t(-&iTmt)A7%sQj6-K z|9iO5%G$tZzU7dQC22NnQ6pAI}> z{If~@m3JXQcPM2O743Rr2V--iW!`t%=F^)B`xt@!KzVrGC<9|Xs{I^3ymNuvO|){I zj{Nf(o5se4`*1jNpDtf*De(F8l|}nWaWD0C=zveOYpyKLq9<wo~2oP3XbhtsW~dwPgIW9{V|%vZ7QsF!^G9{_yp4&PV8v)7V;^ zrOHmmyTp_Ar9Tq=YfbSXYYLzKr`P%&+y28pwV%!>AxKoAlj2|N7{6{34oHK~Cj(Cy zAF^3S;9kMxf;tspezBT=t;v2O4n~vOSCJXmuRw=j#OUtsk`QQf#dK27?zHm}*cANSc;#K)BC z^RK~(BK5;HHX`u(YZb=F?|MiO;pnr%g%W!hm?3*m*$8{1i67u$JgI63LV3fX|3ZX6kD|;PYiOna}-N z1Iw$%2EMYA;ZI}3XAj#x1s?&Q`~5Sls^WV6GZbC-yr(rW9RGg7=PUMQJ}6|@MRzOI z$~NoWWh&3}3kQ5~5N?9*R;ZQj4klWKKI}%o=Xrzp^s2qa7HVU2*Fq??jmrE5jYj(M z>hy4WcHp6EI135cD*a#}{eUJSlej*>H#!G+7i+qrwsYS)r z=q-cKTYx9Qzxb#X{b}`oCGdHKmV4?KD9KNP&wo1uA5ot+9NSlWgQ7mI&SN_f0H3ck zz-QNKqnfd~y{`Gi?r?-^#_rL_!2IGX=iuWH>tgs4`=?kHx&&@{X6%ga0dMsJc!S_ zu9y1jRcdck@HrT`hh^$l zrS|T_pCUfj5+7ziVRyao2MazEeEf|;ZELK9PtyP&nm)8F^gC^6QPUmiY8pMa0Qz`UaQNas`Z+!qx04?c~#%qO%TzE(y_c6JR*XSHSx8yu>NkzhaKPipnm z-u?LBy9OU#pClN)R=x*bza<#GR=(bCUBKS`giovxpHA<-)o_}Qlo7>8`sLPxy5s3X z?`(KJlrp0DNMGu501F=j_&hxWpXYm3T3<&0%JodZo}V5f?dKf*EB*Lq;VmU6hoVkRcVC zwS7l{jt6EJ3n5t#bBxBh?#Xm)Aa5f|s$;lg1EFKAYd7k2JaQ+<7%J5Rd~73&kDyZU zU8nQ8714I*1sA^IC*aco9^{9!{AXW%cAbWW zK)PCMp@q4DIax6!?^VXlM5UK3^~cAKQGs zWg^pXT_2w9zIfrHZr!@BcU#v1AJYJzx@Fx@v>yr!*|ehAJFI)&Lj`p%cF|k-Y4G{r z417d>ozH0Hv=otF=QCRQdbfav{Ymio5ZO2(Esv%9SCFru#qt>PTR!V&W-=dOz(REOKC%$w;j}O!mEk_HALd!CuAhgJ&#|?{ zsF#&-tqgqrnIZFuL&53>J7w?;IodNU|3wUEcQd>#;1fq8N{I&tF??Susa+ADxiX(x zuMuidrWqCHQ&Lj`pMQR}$ftqkXuKQqHdZ~56QmR@NIw+g!^DQQGC0Dj2Xca>A1(D$ z@cG%wA|HSJ63$h85R35%$FDU1irPx=h|Nlh95n;yoa!CM#U~1(jCvfs z3(C*@?AKIcd{Ba;SzWP=i_B+dzukTFg7b)<@SFWe7<%@e0cuq+7S8qtJ)(1pZ{!t z&w~D#%_d9t4YCtxgOE?S;ycYrpYnqKxaa3A-8W&~r_4Le_U;=#?ka)Ls$+bdkFH}p z_`K#ife&jV=g+-14S$^s)6*ndNqQ}p&!!QslqFrDB(H(b>-Q7*NSo|Sz0)`!X_I}a z>)qBbfzO}5j_~0nX#H`1X-pv>UV_#i=e5HGJ}(lV2GvIBww)##6tM)vpfZWG%~pL1 zW&t(OFK#>;4bM^NkS!X<*TXc)0s^16&yx5oZ8_W%7WimqIs87r=c|n*KHOg9t({y; z7?wdtmY{4tqyC{7!!qcURf520HjD5H^v7Vitb|MC_$I2gk(g5z+E1WAJYk-2i5%bL z9p+L^$H3?7_9J}6?rDl0VRJB?rFM6cY=>S8`G~z!J?#j5zP=LSL*XGe9YM-Z;%HcJ zs=?!Uy<<6xHHlHkhr&Z{I)ap+!qM>F7WNsAWU(3>o#BORv|TLgqln^dX5JH@>j??~1I@~;EHgvb$O*=go@`>nkaui)+KLekix{B}#^hY+4?85{J1{CrM^e1W} z3BOaum%!)!QTvhjB#&R2`|jc%6Zs^MU!~qA1fLJgxc#6jt{*v5f(CapoMv8bB9_31 zix79WF^%V}tx(CW3Vc3jB=OUSNOcsecmkVg0Gu@(+%qHc_XSDv?GiO8zvDZvU_YG}dU{r&$L!<t zdf*;h8pp#So?_wN#rnMtedNIByRN~9+Q3#(q!xh8HVzeZ(2t0>qZXu|?KNxISyp^( zjfRJZ!vX4V9P&Qh1~r=G&7~u(zG{k0ypk17knYjmeeciAz{jT|U<6t7f&BnY5kl3V zdwFO!L5g!=KLAsHJ}f^x#wWlcp)B+?iBEu~(3NiYhhaY~KRm{#h^ZvrKoXyln@Zw+ zfX|2buhi1RD9y^<-M51XEHoG!m0SYUoF-5?cE>xIN%Zb164+*CT8uc zjb1q4EvfQ_=c0<8uH)AavGtA48+#^Aw>X!T<=N=Gb?Q>zL5Yfv@{IY{53%))D4&(( zIf;*s^33?m;a^ccE6Z~dA06eH_XDolM$bJHlL-OERT;aP6j^TUCHLN)HF4Zk1lgn@cBQ0=Ls%6OoeEU z8VP@v8OQ9Ahk6MIq(Ur6OdRPoa58)M*Z-Ws?8oZ*(cm&T78Gd<^)vO01{ZDv^NU}9 zX$C%gS`k~Emq#wXO0OCovifz&i}SjST$~m7d>h$MBA=y3E`D8_UnKLUd3e30dWECon)h$(ofz@lqWK4tYf;xjMCN6G7o@+qs=fzLP3z(-z{ zUt!n^lnwP0qQiSx9VmFm$xp!NTZ|Z=6^5#&# zl+}TPcbxnLeBQAiKDtb}V)tS1{>EPeo+sinu2mVek`|mbWMuFD!4os^ z36fSRv4!XM78uQA(DBb8KZ;LJc~-9!@8}{&;PW?_{a{xaWRM2*pegZLym7@>$iLXc z9fwgKof|PeiWe!tkJ+_Pd1%P?Ly*j>v+5pLU_Ts^dpOi{4-9<%5!p|eHAbfl#M1Nf zBfXrTKpC2m#b;fd3-${>zcq{7k3?wMSTSZlaYEWei{SIyMh2f{EhvYNHWoyD<{5mJ zjTLkFXcH~kwNHHtc!K;@$g_W}ay1PS`Kvrb;S}ANJuL+qe5?xiME93hrxe{k)t3_Z z9L>Ndhz}Rh$P%r%Iwr-3sc7_&Xz=ljY(7g#bX0%*lgkY^IIH2c!Sw`h6;KL59o&Sxp#qU&b!(I)(1eCFwVmh!E9 zK6>~T-3RRFU+sgBSpT6TaQxg32C~4>iYL^I^&e4L!t=%+8_MFYR`N9=@cFHIgb$U` z@(EGCW_a+46iFFWDj%$O_y?aPJt{9GyY^{cX@HL$|J2RKmJsM=1t_XN1NZD8BGB3h z5cvG+EXD`D&hJ||JGeN6DTjwKeV$B5553C>Ax(xb@cFg4{Y3dJXF*Z>N#LW21reWl zru{58gyr(lWC*is?_m0~)2>mepKfnw2Vcc#hI&8Gw4X6H+E%@8HJy%ut|2WGC*yNV z7VV7sIDb2yOplN8HOwAsO}!C%celgJNVvK;7o@?wJzG1cQ%R52K}qo#^Sw?#@I8SH5*Pd=N|a@B}R1Pc+{VucR9A4lK=rG zJP-@|J|N@L_$v2oc%Jh&Zi0|q`!>@E_=vS1qg6vuHf}pqN5P_l@C3-gk$9zd?eh^O zwJ!Q7x{-vY<)Rb^9g8P4LmZG-y82C!;eW%fz31Ovfe+8mY}L(X7xOF@CAQ7#{W|i* zk?X=qHJlx%X{+(!`I#33_k4n*?tYXwat|L4kSU&GMvdX|@cg7)gzVbYZ|#Fm8?CyH z08ijz!iH*26aOpl*|yQD+X(PvyH^XXXz;%RSHDU7`JO*T_7mm9dJoDzRs)_w??1}t zj2d{S`&b`%3SIps`TW4g8{ktlJ00^mlT~d&EAPOkxcXR3p7(lHQ}s5g?s)9J;2K_)74Z>~PZxb1YB=*AS0J-q zHCw$l3UKAeKJ-a+{K#j5z7BnydCyiLb8zThOh+ikbsG$lEr-8q*M8^|_Q5BOiyZ#! znZ0gJ(Zjk*T;#y#(w$EFrSmYm_9I_@1wO5w)$Tf!xq%98f#_@m&)`oDv!e(H=p$-F zb)&oiWM)Ii5xLHpJ1!lL1yizX@Bfd)NBCFO+}bj21cwkF8pXKqV1mIyL&KkF;FNRI zKuSZ?O4DoMUx)6&fqR1BFzBG0tT}CuMTf?S&ovC>dr)PZb1&@=EZoVf^MYObz+bpd z20&Z*p%&WFu8cs@|)k`(|vLSegt)F9bJkcwEm_+^F(z4wep z|8#t5G`zv_bg(w6;PaPe;A7b~iw+H1Gi2cpjrdmEsTVBl=YNdW$q9=N84VWx9Ad=x zMnk>I*J1tn_x`Lw_QUHPVqCW#aGx?Zs!p}G?HCq)#tNWyxD-lJsOs16VNM?=FJ zV=y1C?*m`6o7uHL+9f^_eP%TwH#!*8o$9V*+%-C8lRZ^u6wzl^4RWL74i1Nhr$f3P zDm&9t_3N$qwv{huKl}Ep8TiQS&pYj2t&3_}REQpDF`C<(UwrT08~6*G(Cyuty=y#& zAE|DBkzs9`Nq;Q5K6X5s_9qDPs1*GH)_PE*ja`zx;%oP5|K!wop+9lYFrTer?lwgN zb5`-M#6O$#R#tk=Zanw};6eODwKI2Uz-sPfmo`-TdtDbCjAIIs*K$3j zKl&>C5cvF?3ixdGXiaQ=H?uXR^|19_?^U>UrAyk4H+}L<;?wD(w>eIrn)Pn+qKDF7 z6nw@Lq(-6Qp*xspQSVT?1@;ttKE;suJlL(ZT2vmg+ry-Ec}#-OqtpJy1(k=KOtAx| zA12~6PvWzpAuaIH)R4gE&&YgyEWPIPro-$~@bR%s&-F7M;a`7tKgoWWe=Uzm@QM1D zewbiCe-3zp@hi}u6%A>DkEVtMK3_BkpE_c`t|3PG6~zaZSx2-tF%JAuUQ_YGeSpvA zej1{k#KsLVdnduiZy~M)dSFF-u)qivm7(2K|TV&xocw zE@r9PX%YPE9Z#n7VQt{CWVq+py*Ad)lzFA_VQt{qR?p?|WIDp?S;>ik&pRtJAAeW_9~J@|a9 zk|k(@EP>qI_;BA>$2WTeCFwVmLBU<_-HrQgU`1CPulpt^eR~jALd^= zNf7b5O6L>R>H=Pt!Y8WL8h9D_e0wFG57%d^Ep8QSa+Sh|>oe6}_cZBm)p*mt$gm&k zFI+Y2o5o+o1~c1TRUFY$_=x_(hwfmA69}7zi@s?Sc6GMbTDtKK_VX`ZPPHF_N6?9d z(_qDCDU1JIlFwp%eapna=YPxaF99gDAH^BQl6*=x+W?=RGg9pb`EywAL^ z#OGRtPr;c+E+0*18sPI=WItj4m{Y6Y;roQEtdfQKW5fR74&Nu#ag~htT<7sA=v!p7 znS3?vH_5h#X1)gmG3izlSQl|YZ@F}lB*^RwF zZGaC~oIAlbs-L*3zThl?`(Jtf#+@^0eC8Q^)Ga1@{WL!1wUp@l z7ykmEH%NRij2Rffxb=viS>~(H=LJ!q-@q`&ZD^UR-QMqjk98H;59Sj%g;BK{Ruh{! z2oCR1aEN2w54KtIFPfaUEu7uybo1UZ=Y!)oIE8WOoq6Zj%0X~=hk`>K8H_!WL?tqXOXhMB7LKZ6>?9Rhly6=O}j|0yW zbu_(L=bNje+r>JscU#v2pMOjC)7f>}SR-U@<4}4A^(1hbPztKsIQYaYO!19&PKP6` z5wf;%K`9$ajYc?2!7NPidbjms;PW5mF+L%d<=>UeC&aS+^=|7o?Z#U{Cu%>%EG7A! z%Y5{(l;ryWpT9zU>|I*Ks6vKnJ5=e$pP?rIlESO@DJ^0g4v=GaOqFg9SSG8&FP7{_ z?8Y};&xnuNuGgyAC+*?T*BTzSI~w#4u}?b1kuQxO;PV@E@Tps<=@YDLtWK%!p?*)W zuAwg_yYYAa(iQmZqOuQ;=9;w*lA~6~W9*`q585xgeUv@a*@(dBFVDaSCrq~=)T+Dw zn%K%7ffJ@jkNStF{+igz*SoC?fY0X|;L|nR_`kK74!f1EJDRv7txbpB2l1JM55hy7 zb~mX2z*->f4o5ic?$XhJts#NW=k0?}%jzItUWf8|dLh#vfsvKceaCU4QFc`AWC@gLdOxA4hz4ou;+7 za#R@J%kJ>pyVN!+4DZ8ky!09ae9UcU%b~5nPSr6sYRzih;4f9%HRYmy;bjf;Xm~KB zt-$f&PyV_+XBKB3Y>J<7$fve13Sw_;kD2m#`4@ti{Eu!^vcL zKK2mwXzgO)^Ye4?*{PdvS|1(5*crHQ)1MCWuipcnC!BiiIX057NlcwV{{>HP(_tj+ zX5$NI;Di3l>m0re^soF_7`~v-joKW1+9qbGLVbAr9BMTRto2Yv+$>PSx9wh_BZ%(m zL;ls++Xo*X##2Ve+1r9Gt*X*hK5WqlHXdFcz?$@QHt>1NEZ~EKOgNR)j&I8n{ zU1mVzd~lG-hr>Wk&yNSgN7#ASXGjB|KJZ|Db!P<{i=y6fkF{g5co)pSet_(!V|C2N z8V9hBH+IiH1_m(L&;QHpXFakLd;}exMJKomW)xxhG4YfFY_u(_}`SdyXJTU8PjSnBVgO8E;u=)8j;>5G#bno?gIMbR6eHyJ~&!xZX>VG>U4OH)DBua>>$;SeWQ4uUB?}`N64%5 z#$%o%Mb|q;}97^NLCOgRq%SZ_2b|(o)aIF zO6~_RHg=tEk?o3wx4-35efR*z#_4df()AYpzzw|sJ}&}~&*v#*Wj0J~GzXxTeH)w) zML(0_nTxIF0M)Wryq(_zpYI?(RTC2oN_^%5{r`7&E{~#PO|q-rnA>g)x9T=ot3?Q{jG1U^{wx* z_RSPcY%s|2St#hW+uK$H;PZhg@j;;iqoTA0A)3S1yQ3ls6^wf3Q`re=y7JK^KR8*g z!8-~Xx~Mc2ltn`cqIG0xZ)o-59R&?tG{+{2qM_Q5jIsnimoxAY{t4BV0WT?U9h6>% zgnz>2WCSlMZyl7ry={FHe7>6G2iDU{8D>LAXn28{L*&zhV=F2#CpM9lNoZg+bc7~{ zayzKG!&8qvL0U~D*W}d*vTi8@pP!n7PgOb6Yc+DuB<~-s1xM(${$y42deHBadnS4R zoKN5gy;ghM`m&}MK7!=O&!=34G&NCjaE+Vt^XXoqs>K*Z2G?kBTVDmAeUhI>3mY%r z_*(aV8WU{1eB*1~_O|tX@cC;&`PpqAp_FBCKbHLNJo(ujU!#;I-rYl6TwPr=eu65s2!W4s3qJPw&PLb}So?fOYBtml@cA;} zx!2s+TNH!A%GibBpLIbsF9wqlZQsk-h2bArL4nVYPr*kd=v^BNA?J@Ho}O1)EKWBd zKM%hm4?e;9JjZxFj5Yjn!}ir4)x%h$J@Z* zshIfLhwQ6TG4b~Ue7JqyzkgqAVA+j(`!8wH@ScFr+io$w{CsTtPmZy6xe1$V8rN1o zA7Atr+TDXz$r1g~^hY0`2cOXXeO)}^ z{kyDqu>X4W)+zWb+P|j-wP^n?8z}I37s=0o4g;ch{6z<;$c2bwRK%aAc^nL2K=h8k z=pYrj5OIu(Lok^h;Pbo0CuCnuD>iP|L-tkK#K!H1reFO!@rkj2cXC0y-(B`3#{S*O z1--19t$zkSzcVHI3GcrW(U|YX-2O`n8uR@CpFfxppWyigPY%g5htGc{);n|KOIkiJ zeF*R@w{N77rKR=_iCAg+OCO#GpGEtN6ll17E!tm5gogJ6d_HOlKJ)vFv}DBao8Mo^ zMn?1ld_HalK7RYgdSpbq-u|8ErR7Bn#3yq7AACLucp~;+>rtBDe@QDpZ^(mB_}4GX!20;5#)ndj5Di4E{N9fO{1KhU>&qF z{{WvKy7BlF8hU#jw@kOcHlLH>;HivmnQlM8=Qo+hr>fK|=?p$@i{gpR=hD3D$z|{f z^+P-S#m~xne2Qf_K_e4c=&Ix|Z9bhYoS>13EO`SN`3Cs>g{jAiuaz1{wys{)>Am5kGagWKW4o9qCi@Z1 z`S||{ip-2BG_RcwuQOf@1AM*@c*OcAz=G4+Rg5}i_C@N6LPf2WY((vqGZ~kk01HlM z*Bo6@W?!V9IO$pa)663RpI@1BKE(zSts3fXK`}6qFA<|vqQgHoCY>Q_oQ|yB6LW-# ze2EyvYHwQ?X@|e`=>hr4v3<2sKq(dVVY8tWn|@Ma85HYQ=jG?Z8lIq{iuLk%Xm-Ya zU|B9VLws(y{7@hhbj>n8q6Xo{`G``4v6{W}i@@h%3O@Y&LeUf?X|pM;H^TV&g>r5p zNt@k$IY*dw_h=)*jpk=f~7&=IQ8*qNWX#2aQ<`?2yIiTbD5xN|ecNQ|>>b#jg0B|bOc1N&GRdv(%2YqaKl z751?%HtD2&yf-C$20oQM_z1&1=d<$}FTC(ZcxS>=|D0PRM10P@sA9=al^f;-AJHH; z)yMj7mN0@(bqYSkeH6f;6G2A_HTTOk##1Szy4~Uh3SiKQpfiJwFS>okQz@isZ(Cm? zKJQ%0C!H(nVm@-YvJ#)06@0|rgIB7K{byXpN9LDsPYVi`WiuKP~&wPSF3>@!pv6;|G4<)I+?DihB z6YB{oIG$%a5i9t#T=`);+>A)$D=@$!$u57>9$(ylL-O(UkO7cb|JfT+@4F((b ztijEyz57_shCUVRL$V9<4Jw{I54k}xCo8mBwf7%85UWdT*t-^$vFMA!_rqRde&!p+ z>*rSJz14S?>!d;_-VK78G4W`6mhDm*)Fb-n5i zjJfhdRd{xuT5?u*p!M_XKJ3`f$1Y?krZHPbr%=%@pDR(wQcPp=>1gozaNzOgcdnmv ztW|UT>(VU~T|Qht=h&q0_}9s`Oe8)xem*q6Ghqi}`^qDgm%TFT2#+t>$AuVyYNetq z-;KF^-1(ik#-I!HnZ>JLjjoWSBip>d=XC)-+!W?6f>`@`R#X8e=>pN^!%blxB%Xb& zV}XktOGNPb_*qzf_(2#PeQ8cB>PSVOyjqCM$3>H4@IEAS5K__i&o?tme(+lXpCSXF zG9q_0*q_~F!Zm-ojL4mH*q@!%^P0_V?d!ycTVP816 zF!UDH9Hf1QYivD)P$8;NrRXsFWckc4{?Z18eerX_(O^PV2PYi~PLt=Ql=zVR*#193 zFxaIs{(%t(NuY7t|3_9VZePIXH-P6}33bs_R2|&Z7PW-BXcnptN-H=vA>wmGd|JhW z@VG+wMcJs8;}0sAI%B0t=dtu~&>!&m&0FxPBT0e)kz)$4#A7s%)(G{kBT0hbgZ6kO zp3(SP!ccFm{ps(Wg3kkNsgBSU(m4DOe$A7f2Uf3lonx?z+54`n1y=ZEec4m+;ri!+ zQpM>hD(b0?Llqe%)&=#zyhIuaRMfMEPb_4VkQJ2He)IDrKVtrJtUi7^?liB|$@xcI zvT{BX^Os}wk(CbVpEvh`2mT2S7@d(#bbZm`;#!5{kBLgl2y&Mf4%g3b{$7$FTmLw# z=K!UgXDWla1L5%HJ#5!t^042J8o##waaPX(O2`yoCk;3i+13#e-#zPgy0CdTpJb&P zALQq!W*#5-Mqz;rOT9>!R5fG5Ey4;)&rl9o(euj&YRzTfasvm_HOkJw^b9^Qa*H6 z$VVL8@r}4uO)ehHAH+|<=lL137#M*dXLJE{!or{luHlM#P>+gV1 zKMy{|ebicp0nAZBc7_{vsA?*YbWhlkBEogBxo~GNJHrioVx3!KigZud-nK5++J*l) z1s_dCa1$b$L<$?942irFiEmG{5Zr`_CXr~vCqp8yMB?_g^#!e6{AA|i%c(A5zgZkp|8THN1V@YirydU5*^YL-y$6XMn{KGkX9Qonr z%3>jyvc>Iu4f6B)<7hrOE3DVC7vn*vN$)O_4^9iOt`MTqq2j|yTinj|;PYpJ$JNhs z@`F&YG@K&&2>GGd3Ta3YpPOhtLT-xtD$V;zACY{7+;lE1n)&5;fDg+@^WjH@sRkFi z9?6Fv6-qR?s2|{S8_h>-*zF_Lq?{u8c;ibNQsDC`)8%|pjk6Veq#0@8^9HtrPtMz6 zz-V@#!MQHh>S!^aoVUk-)fKuL9J{@pXlhm6l;YjuKBosWB>lll1A;|o>| z+tV&bw)u)qsYYGH8Dg7PJsdvv`0}4wI23HP6bWcm^LO?+@tG~+!`bj<5JE0}h*0%m z)eSD;<84)aW+9=%v&Ra2zUFulAM(`XjGQf4d=8GXL|7Y=^OAVDYde(&z$XPy;Ului28D*2ma4y zzdO2M&i3?ba(XIJ)R)%}t*!mbJop^x`hil*u?GrL0FlAjAyuUc=h$L+2lN!B!Lzw} zt;|0=8Vnxe1b*j;{()XG*#i^FfXMLdFuSB$=UkXh+{1bivBB%~%(uCXceJ+tr-)Ax z=`$&`+n%A23L5UdZ)v;*DYwdMjiLUIp%h|8rifk?kv@|`yY1Ntsi4u`1Q7=V*61RQ z6cg^^JB6;*XQ;+AGEZ>D8=B-0Gh*k1!70O~m)0A$>=gt)Z=8V-l9Q;yfw$N)nh30P z@W5NV24P~jU`0r7;Llr!dxCjI#nC#G(HMc1(RlWHKm{>EE)#RhBm%eZzR=n){tc3! zeWg_|D{RM+BU=HbzUO^&a@952PREubTLGnR&%NP0exkJx&w$6VkLC6n`lFn^9nGPR z97=x`54ejsBNt$uX_Y2}PW>8u;QJ$YZfxp2`260D zkRQ%xuUx6XCjk>W_v1xo0K~0$eyi*d6?tG5TA9er;d9pQ_27}fr*ZDTORfoiZ+i9w zHF*%io6%Hm;PZzwkB`rBOxRW38cs=!tKe?)$@=zT<@nIr$GrZRQKNEJsF%C)8}TkM z`eb)Uz1(=0XZ9+sJ$f&VFXR#}AwF(n)wodq;uP_58*6V{0Kw-2dGM*;ufEBEqCss~ zctp8bCJ5VVWfO7p)ZU0GJ=BIhWkom31Yygns)^_Y>F3dJFz^v%aK!Gi*)8hGXAE7P z>LJL$eBou_=nRn07`j@j2l1Kt`S7hL*zn@!_QFtaV0&*1*NSza(|MNkKvenQL^eskOQ`ehDS!MLr@~vjk zRXZrL~rufPEu3pFmU8ez@=KUTZo|znUxl#LWR>D2K3b!NcP&;xs%+*J zN)eZMA1z;SroE9hzXv{VXD%OiMJRI47t?bRopqZ)~L&*dYgcA>`X#&&pGNZINT zg3oinfIQII5B z4)RlokRKlUwN8bQ5TA(9FGtd{{V({OPXTDp-fFFTZsGKH&)#Y|uVwugd@fj+k5B)I z!Ij+0*XHB1$iq@Ep^GN?d^_;i^+D>YTA@=OatcrB_-qTx9xqFMWm!EsS7t&(q@Fgx4%j-`<- z@cGUu@oCkB_`r#*p;9<^3kRyGIVkq?ex$LU?@juQGa9kt&_V#WF~NcAWk#dm7Iz7J zzWW$_ns80P8j$R$8*#B~!Zl&oC6`QEKrjCNtbYPN-`2mt25tibvcII&jQ&6ddc^}A>D$eDX z_3ns=rMPRJdFA66gicQ}^JkYA_`C;rR`cO87x3udQl;&$g0zwkqbLk`bO?9E%nB9w z{PGlhv{tp$K-9l)>73N1B_PMzsfZ!RPikg3r2QALg@ed)xXx`1}I!M2s&d zL?zvwh55)KE%SXPJ~tkppoj1L<+0zf`3Q#uChB~~{&I#c1fO5bFW{3*;{-bu#YaXN zB0jespF^Z+t5lR=U(>v@`8+|IwqDOn(?T}B13tero#V5P<>vY^U^buA?vrl6JC=8YN_=J>pF%@#BdHco-?xKIe=XFNTj?tnH4c7#Ya05{y_y zXWae)J|@YJQ9~X!74cphGC{lTA?WKa{>~q)-vFNr;JH^&3P-HbB6oRiAY0c%ykMSO zv&IBI&vHM&r=JI(&v@YlrZs3-5gCz9;?wYE?ge}h$6+;u!PlPD0t7zacmqEBO65@5 zN0Q6b5h+1H`)2Qnd4USm(nX{M`vE>Tw?00uSHfD>AI2O$A+H4KP6@U4%bx%|_jrsl zm{bcepzb68s?b9aC`9G6J)#W)jkC%!_sqk0wfp6zv}mcmS|T7K7X2p`EV5- zLL;X7*isdrk8jS;poI~9J~&hcrKjWtt^ z2KW?*-I1iJM#1M(r{MFTT&+?fKQ|V|Q`91j6x0$zRLix4iW(3>KBCyeI)s9JM3R0P32E(Te;j!3*-4L_mGj!W)Edus^h(&D zKKq#4IX*ZWv@MBgm65|oWK5V#rkTN=ggc}HPar?9W8g#a@36|Zl_PEd3S5)sg=mjA zC%OIS+3RL5AL>uFT&k6|eQepuP`0Nxs%oP`ZxcYefCr?mGMRaod+h;~mtD)dK(uEL zGikSn{Cw;&@u6H}B2q96hCW$^V@||Gdm@PhnV@}OsJ)Yn;+X3P_{?MTI9c**Q|t z0-wp~u{E|VSRI?qF0X`x=g^t`|1FNf8Lc>fZj^OVzRntVkH(aBYCd{ufXJLbm++mu(vN}l2 z#&*OJJRz~p&#BlIh4-MS0SvG)TU=8iru@3rep_)0`LT1^q+Kr0{d%3JhLqmD7vt+~ zB?dmCeu{mVL%Xn4RZ~1nqJD~?gJ!!B1_^8<}!zyf@n1f#CD zmIo8$r#u56cYFovr+D&^^qD)pxZ;t~vLSj5K9yTPpAFSdarub)DbnoM!*T#V?*JZK zel~G3D1O1#JlJQ2e^IX2;PaJv;)61$P1;-dRP;KgjyejPj%n>|gOX~|?E4df&ur?+ zPoYt6HnFo<`{BsXusa@OXCdtaeEv1?IO7X@E#DHz4VM!K@<}@BiA~h;1=fs`1YLs9 z|MT-{H8IMB8uEdP?Y7|}bmV6;#wd$IO~Y;zTw#1@QUfQ$L@wT1*|(?L+DE=~^ARJQ94Jz*F$~n3<0cueY31 zX8bKXd<-hIob4^T|F-9zgU=@dk2}8h6a$udl+8_%6Vl(f!)Ff#-B|%ebEV4+DS87w ze}?(^aFgK?EQhUHaes{kki&v*G#%)5cs@44?aP?>=YnOKCe466qvv4 zysI(}Gw}I>De-A2m2z!m*=eWyEB~zz&0e=Jvu1Y3*Ovg#y*l71^*R~l$$10Lt8Zz% zg{99xRtFsO>IzF|z7|A#6S#{FSc7N%8;!3eSyXH7w|@=;pN3L4%4=1;2#Qq0?2fvD ziWhQj;lbyNNPb*C8?tY>e5AK;y#2Rsz{gMx)IYW7E!)hoV<`JdDP4S=VU3U;+@7~! z?l&*Y({83M@j>}_UTN(w{Q(9(PW}z&{dNC!@^7r$&bGf#d}iR|gFv)E(_Bg`26;i##i}aMQx@Mm4G}tFO2TXJ!>qLs5rMC z_!oSB=NNoc*fn5Sg*yT{Z1~*6y|EVf-l`WW>>7ytf-?d+Y>>5?yb<8A;ElCaO=RM8 z@cF+aKVp0#C1mrFTG_x5v(J}@*7%v#llTB$YyXx120X|Qxwp4|)(!7Z-Zan7TJC09h$pE=N1 z*SB)`cWGLE2E zys(~92_op5jkFi>6L<0!_&l11`EX-d`njEMJiv$h_{ul8Q$N7x-M|x+AI=BH)?`yU zHRnF&`5-ZggsVlZ{j-laAAA3W*c8QJO`SE5>g%{^7VZ5h+A~KC2g~E@D;eE1!RI

    kRZxd|uc23MI!JjVh$ZZV!{^dGwNP4SaLEK0q`VQt61KOk%faVIXFfh~9c!zl z98+IxDHT>%8n8)&5W9~IKt&kl{4?Nj$5*q}I4B$Y;mZ=gcK9?W!^ho`yje7f&yA0diX>W?xb(z8LSJzB zSjeh{iE9x0SelpM^W(P>e6W;GG@im84)e)nbo_+xJNz1ac>IMkf7x|nxmwP#R*lzz zS8LEXJE-|d=ZEo5?HoRKomf{b=h&prYr$K6EEsoC_w%ybd;Ix4tQq=;f>QgeXJsZ$ zH2C}k^T|(;Pg*)yKilh{c|J1Hq4o36{=_Vb4Oc}a83)8KJ_?}K#>@u@H4L%UOg)2w0FioAC1{W`p)tE=v3 zgM92GY2-Ayx+3BTmbzT^^Ben|_}nbxW3PjfNe^Q&_ykr&wxDHBiippz$KsPrfHux& zOHu?r|MnJq-2Inc+}}WXD_1{X4muYSm$xGQ{O_h=J~_tCh|o5ME;1h`9?#DF#rgQP z@@R0)HF4q=xAO(9{qH|6z{g!b*Y$QurRB11e|D#`SU2kOaqX*DgQuq!EthSxv%95_ z)!w!)0-q+t#~okX^eoWi*jrzCBjd+Bwq-TJ!Z8jXYM!6^*oM_Ct|P(c>-_R#JO32* zT2ZYv8?ZIw=m9D08Y{Y3d0Uc@?sviGUCbvx-ucBm zsU*KJFF$gU0-p~6k2}7UB3#F6Y73%MDLjJI)b00qS@d;R+`Kg{bP8DZ`&p(^`_yo00wfr#Ge!jh5 zrj2{!KKJX$#TsD_A5UQBfLwN8_BIV;oM#OA=AApkT5eeOASXk-?x2P1)Ln8OY< z#`vmTKB4uqbd-n>+2Vx-aCjWM`51?|N5X0H1$5bNS58?}$kCB#JK^%vP~jh7oKdk0O$1PCA_~EM8JSh|l{;eq26DJ)xqz@A1isr&IKOu6h@I zg#E|ICoslVHL&1wKcB!DTX20=-zGkMf4YcIN^x4qM>28J+Q0I(T>sedgQdn1bKDc$ z6*L8a;bbs|S<_8%Ylclgdw&Cr9K->K&a6 zI)JqHuYRP*N63YJ?CyDM+)r+XcJrc; zV~dReDhea>hVF8E&4ABenGzpvOU=1~ao&=-t~PYM&r4n@Z6%Iz72PQcOR5tWx4jOo z`ODz*6~`_g7Z2H=3y2p)&2t@j+k9R@F@2!JeE9O_1dep5dOjGVZnY$C(BQ-Kw^Vr~ zfp>K31Xv39@DegyP!3nud8#6kz;Eht1<5Ii&ZK6d!b4JcsD~Gk;(V}cIFm#VFX{*Q{OB~u$1g>xU08j5q|h@<`25&xIiE;1w5UDj z{iS>|sArt}4*v`J`4_MB<>R@8!{Gb~w$wlx7k@I}l?sCM@ocHEHa~+I)tAOa zl64XI7&AwHxEY?%a4`0o*$X1140is}whU4XJ|Vq5ua~kg?!f14>hSU8C!9^fgA^zD z)EQr%{DhMy1RXRoJj}r7oWez5d*dtS5@;o#mD(p8#vt(NF^A6*`Ds6&D1CyM-!No4k_l;^Vj_NS}8wSn7#P;dTA#3u-p>)v7P60wwj*f9DXC| z_{yrfhWzmQJA8i{NB?YUeqSO#S(@&_=i4AYV*c7!8wG#BprIIsve!_IT!n&~aYP^6 zTZ+rig*7~pD_k%<3~2uP_I!{}Q02s&ufh%c_ypI28J^3-eW2IG=XMnzds7iJ^$-7< z6@0Rxe4~Jb-UFZSm@ebPl|ga~;zB-IQZ6ADQ7^&gJDJ1Bo4<-mgM8_jp{k`+RR}pB zZ~fCThvrlBnvE>$RH`b3;Pc%#4xjyUp==aesepvw3;F4ubVr?uR6s)g0H5#4gAb;s zn(II2M!fTPG4;ouw%pc>gJJ)b;Xn48ADn_uK}FdV*Xhs&#NSQ>s9>RNitBXfVw3I( z)1iH0nxSUWVrP_s<+YNC{chQ3EYOp72Ei!SxSP^Ir~7 z_CvCq*#16&&o50KKJ!*XpH)owZu-Bw>^bK%zbo}^vxM*FvNp2t8TfGjUOv9&`8bEB z*tsLx+j&Lq94q4E7VvR=p^$6UEg0*(H_Re2?cs<0F&ba{N@b5nQ}QQ{qRuh?5*x1) z1HtF{tBP9HYgFc7*<;`Ao$)9){=`w#Id(zs!#DbrjV+U3z~4C=PP$|E#S`+HhTnMq zG27``mxDg4A0(eUm)%#=&%ozHj%_~o9X3m`a})?&clv`R4pQA7tI2OqsiT2)RggBOyKr z#h9F%!`(APTcm1lTVK*1e#%GPf)6%a1!xs2Zqdg78#!ASm-teVHT7D#n0v8Ru9a)5 zxMI0pD4N3)sE;ue%~Sjzy4O*|*{(Mj{yTur$FnFt!P>CpTCu{4l~w^2c}j7%8}od8 zQ+xO(GW;5RKJg}!k29HLYIkz6?jrv)%!n!_bE)a>tEleH^Ks?J4$y_tb#t*IflGhj zzH+X-s~p%-_wT$1KA&_9K7RdN)GNv^g{U|erTEvce>#Jnxl17`DcafgSBTGhBKR;7 z!h`xvEn57nJ}?b+B+Q425FXTTn$Y4W8;5_PdyaYuKA#Lc(eks#EEeX&8l1{p92UI?pHIyP_@q`j0hS)0 z?8`{t#zQZ_XXh4t{QIxHMyuT1-K!{vI`$ViZ)upo8sr~e+<3Zs)-#_Bu)oN83quL4 zLE!V;bRi$VEG6T#jE~rS5uEI^0-rY@FXSWShmwdZdwVLG#^OHa_^=51LAb%#?Do6; z^SI64`E}rv1D>Vh%g4trP)qMMA)m}DDe!rofsaafqa2kYy3NFfgcFOw! zJ~NjO@WhsorQ+k`lUdPN-U0hl$j@7k!H4o_!GB|Q;=Wj|GS)yAE%&=f>r#H;ND*;p!}04{(C>#O!x_ zcvmchRyB~3&%x&ccw9d2I7&A+Ac0Xsi!8xQa{QC=9 zL`!;niuwIM-@p6(Hy*Som!{(jWH1q2z=7#|One%mG5^wc{_gV%^Lf>AV1qN;XKcQK zjJyLrLO*-{EyZ#Zj=nYsjw!{x`uKFZ;~{+LeV9_Tx2>;$&$mxW|9E_AdTo6~K0f_H zUtUDo!_R#;=^rs~`G(e-f3Uqdhs!k^+|t*<=bLW8r>RsGcpsJ=y;FY+jsbTXDkd+Z zs3ZE6Ialbh^mNm_G~tJO>i8hmC)RL0U`Xo=fBKz<1?%CcGeFrUHny)E#A^4hF@X=^ zvE`@UsulNfL`vSf?qXU96>1p3b1e4l3HiC2^g9coZkE)VVhLdxn%uDd_EJiasu_;ED zw1l*WpZAt2_z1=9&R@=Oqkx`Nf4b28We+M5+LfxoEqzIQ_=WwOc|P{s#RCh-!IvFE zw5nxf{EK;?AwC?89auo_z3d4@>$2OA-CoR}-S}nTGrk3%VneAF)CQs^{d-i{>5F>t z0P}l=SA=&^t?pKFX!cL6A*!SK?HX7gcLtrua7yS&0t)aZfC$Oy2HZ)?H z=z7v;d#HiW-y%NIObAHfGoAypWONqG&-kLt_0Eo?fT(#wm{F~yJyc%JTt4pnWs8lM z(8Ue?M)Ah53`V_<*3Vu-7dJE>^-!a}pOyE(rwBaZ`F$xLTU9R|Zb|s$GCbC_hsDpi zb@>GJvo95GVDgHksw4EXFG^b;Weq-`J6*{qxwJ3mlf1oceFJ>HfaE8}`p1^1fGjPX z&SI>8q>v}@sSuwE7RgQcZ76UnQ$-@v6lDY)5?Q&qq9v_ut+9* zsR=hkdx_{Jnef|vcyfzAUJUxYR+F2{9e5K^PTJX!8 z)l}f~MGSmG`*(YOkH!+`4x;CGNrcANLE%9)0KQ;w2_5Nz~_g7$0t9| z394`H+%6#_Qc#wbE)LPLXg<;F=Rg9Rj4h3b&rN_2#aygl6hod;^T~qH`0fFG{!!lL z6Iee7awqUrkJzrp+N}yc0sBTEcY-Y@ncuDwpWAqR%1zIVxH6C@CLd=mTG^iOPkN_M zwTB0Pa)JCr7*~<{xB}NHrBuQg#QLyUZdIdiH-b;ZE)=Pcy;&UV!cKQ`8QtEt-X%T@ zEzWASSFuwNn2C_rqc*jd!=Xkj&6`Z{% z&EmU17$Q3)j(5CWh3DmB_Hg=+JiahT55|;=qdysAElB<@SanpG9K=QqViH^-M2wv?Xu zNXzF_W)XbIeGHjc(;W`t&mHC?_TtFJip-D7cmTUST6GP4ySg8Y2`G59Re zKUB9M@x3k8KSOd7lj8wC3h+enSu0Is1fR9q+ZIjmd2|Cl!su005W>9?6=YC4NSM0% zmfR``e7OI!($sM_xI+om1A02Gb`Uydr$cGh1ByCrgIf|H@cGBbkRK=iJn3Z~!*idj z=N(F!U2ioSN=eTNhH(9*3s&?qn17xxoxMOFTImQ?T|@@$tI2R^o(^)%)!T0H;ff!* z17_O8>6cEyC*bhYPLe4h`X<}mKC(wh>-OVz3VePHcvImd+d@EhKf2cISmzC>1e?8Az-Q|}soy(SL6L_T@!!#Y-fJVY$~8trZC ztHkFPeE9hV&kf*P|42Mw7kRQW5M# z(&W)%I~o>2SPZQ!Bp2cXv)FJnW-m=F1U$juvqYY7X3DM~LiR zQMW4YeHMCy!Ra+anA3IKuVnH%;&TN)u&gRj&A}I-j-6MKdH2WqLGy-&blM`|D zCuJ^Q;xpxZP!6Q1uMXUfU=j6J(L_0r;vl<$+fg?i@gDfR3wZ9ifw>Xaqps za)!P`d~U!;D_1M4vm(U5ajkpVLqY`UAK>%#dCsQ>t;5S$ZKO}gzsj{vbI_MACf5&u z_}mg7RCzb{bt>%O|2Xw?ZD@{07XvEnkh+a5d=Y&9))ahroM1tzR5o-|`xIn)V)l9) zYHwT35TEyh&jF%x4wOe5+PyP&U|x0~o6lr4sRsTO;PbZ`_*9h|+@oNH4t$#aMb+%X zJ!&j>aba|y_{=yTCqF=XCjM`F`2qaz%J&?6{?4)B(?pV%jdcKg#>gzcvG%sr5b=2r z@hMkzV}lVN)$ZkBWNxchoBLhx`3B$tpBm-Rs;KqNv|a7?sa|l;y4p;8+iHdQ)QOM& zpxo3qcNf*c7k81bPWlJ&xd9)&Qet}wPMP%@T))?U=sjcwFh>{Nfi$M($W9ZVUk9It zUSJLVDDs*#Z#D)e$k8vi?T7jeKJNyed&s!jJYqE@B1uDI{yUAyXnf6TNJKLKnN05x zpBKQ#fJ@5}E9u&XZr*@P%QY(vvRgSOUPp;fp7@{u7{cJn@SbOG+UI?xaR{H6HnwF2 z3T#G{oKewt`L)ZB-w4Y(DD;V3FTv*WDJXo4EoKEZ`W3 zu3FBkebF5~hJW2?AnVDp_&b8nH!<)jpvMKBY^*$uv8$p5CHrk;h(x@gxMVpVRsffi zff!+#Phz}8q>0ZAe316Y30uYhLta`+mRZJ`PgO6K_sT`3iIugn0*dcM%tn5{fk0B8Y=r8} zNc;ewzXv@2{GuD_jW56GM%vp}BjEGRdGIOf$lKf0?XXJ3sqk=1u5eNfg5m&qd&dZ> z2!stHUWJEU=Gbs@nX28p=0tpM!3XPNm7xeB+bchMw1Jy2dIO7{;}Gnu`|Q~U+S^tW z;PdyV;8Un5#RE)FhCxmW>pu!Tv-6l?mpURR1$iIfbHl&~>mRsgZjKMuKXA=_MezBS zWAG9Af9(1Np0j4^KfK5fVApq$HTg4o5Bd34;JIfYzr0dM?iTFODb@VC4+Hg2%`4Pf zW(M{c@>)5F&kgax=?)c$-PFUt=?)c$mHh!e{~!-OTDf3gd$F;H);$?vdm;UU_}qd| zRVkywPI+VfshVBZ#DB6sz~>)M!KbNWeTZ_EMyu8+8|%9%G%c(Tv9&NJ{b6?`ubTq+ zd>aEFS{lNHK_-rkJjg!^#hS>(A@zg!%!m&Tz+g5k!ig1CPvd5pU)*(V&^v_zu?Qzt z)I61~s)N@r@c9nlxmVFKsh5okSy+*8ql7@zwNB$J15E1WQIGcU$hL9H#+H=(c;a&d zK6|Z4kLdjCP_4kAy1ozn-sIUcI{$iN$$bC@KHoJ3AJp72_CaQE55-RHXiD({r>Y3f zNV?fBLn~jv0)2L7$*Y_(%)sY+Nq(H5CQ2{SP?ZH)*SvAL>RH7OY7&_xvQzJD)qRIQ z2cI99fsa!##i`#EzAowOaDAer-_OeR;PakikB?V|XU!Y5`CPhXc-CxhTVDsC_uhh! zs%bSH83H!2b?04C&(He(2WDK0g9He0{hf)1hcre$%1D=id+?oL?XfByuAZ zTFs_PDQeey;QS)TE|3o4WHKIGNWHpVb6fj1xvH(`G0r?9voIkD?W_)%))$xG1`@dG*1G2o^oS!nr62C z9qs6K_dGs4)uDIxt{jw;uA7|>2|f}fI&^M4^e_1E{KjH@6?Of9tj{|Lc4}2A;8H(O zOKBJhKApkfF3ZD*CZvrcXBhrs9Q)a4_psBEHn!H4H(+eC9)YXf}v z`jDse!*Hr>=nRAS&~WNWeue=)eEnmzjssZZL1yd`2~_6Iv?ERgU$zE2@mZk$AE|I zKO5~1`LDhb9^i97Z}ZvU@(`Y5m#>87A?zEuzdQvW5o)oyzxd(c^s)S_L(IcR-}Z$h zKWzn8)f`-0HWa_yM!H@kh#Z@;$1flK*mwH)6!d1ZqLy;3RZE!|(iTCR&&gms?pdcf zHt9>27}D;CcJyQ4KlS9t<>S&pZeuF^Mb-5Xlu_f^e0lvi=Za{byf7b^j-Ak$vi2e< zqd#!Uow|uNv*nWqK0iG3_yo&XQg+d14bnuX&ZzvrjH5=YjsPCeLegXYX*3>Y^En&B zi=LhJ%qMWwgcyY}@v7#ys52hGiHcW)7+nu1S3GvZm-*QJ;$73zj(+aT^Wal&=ykYm za5c0EEnOcDuHd@CF>Ru~ZM6YDe|-u*8l0k3rL{3Y8l0jmb0R$;@cEh<_!xRYMP}MP zbwd(l3(kGWxc5^lM*| zhy2*BDAedM0)x851w!bCZIPsHe7vloYv0(ND^U7YJczNJ)_` zMBwvfQ}9u1Myr9o`W29|uN>g5R0>G#k0wL()vtg?7v|%PbwBcN>wgn`-oacx5eg_J zB*7;_0Z9f4d}hEy=NCm4Dri#@g!7Az1r;Q_1cA>VO~L0tt&SJEYg-f(YKlBA8Kh>5| z#8D}ggiWb&g1xCuMjafLR#6Z(TM9RLn=4-tpPMiryUboH#ccm#fR9~fFJn{pN;Y(X z_UMN{o8$*kxm8N=*A$}(u0B+axuMo+UtdMNG?FT3$rEEmji}s9wujY@7)?1*KQZ|DmpltrgVqE8{6FTj2Bcz;mx`=yezn zHzq&j(clUOL}ss#SyuZW<`I4kKHr!Jp9uTUXTIRl5{Ie~qwv=kzK*c}Pk`J{;W+=Thqi{tck7z^;R*TwSDA^Xoy ze+$V^$i5-gIcoq4?cZTY!~#%4K*WdSCz6kE9f$zAMC+kQK3QA;oz6Y%-g{31Td zru4usMe>njN)Plyd-QK!3p^3_ADXQqZ9qbjR&1;1_ovumMcaVHs9Vuq;HK~J1@XB_ z#>bH=#CDY$xdj%o1bm{TDl@dSM<4idlAqwV3m%!Y7vmO;bhqlPbR6M$N_#PG!ARHM z9g0lgr-%>9PnZww#n!;%t^^%EF!0ET2lM*}{%SsgkG%w1FnR)rBVUB&$6f-ZZZGmj z?%+-0a~r{D-HwF$tlQolposhl?a_bzgemy!wNU!BxsMZf#1}BNrZzN`D{7Q$2a#V} z`R3jPrB9m|*l!`efT?|Jc#d*K3PnlRYQK-8Y`)7rc~?ef~pQC{jmvWjgs|Lv?r$sfzR$S_#p6PPgOQ2KeYkd zv&>h7{OrER$ET_{VM?s%N@+a_a`;pS;|pup89Q74w4wGC#gU@Hph!1ko!tZ%w3aByYJci%1^a==kKKB-U zc>E5Roib|d*+TT7g;dAsRzW;|hs#bEHTG;FdN4t%W4TsA;KTh750zG>>2O(NUFZzn zLvzv_J6t4O7drg_pYjy)vmv$PH2Vr5Bf0`U-2NQ3ex@)mWL$3)++N3j619Ftdjn)# zAD+Z-X6sji&v)b@KUICn`WgN#Elp|cQn*FTJ6^hehCj>X+#K#wxJAtNwsjf!eD^W< zM6I7wU~#+QtPlOuyF^&reh?qAeh#h=*I`4Ahu4QPvY~!}&-Xxl?iEqEv!FH_M@&rW zetnI^zGZe;Zn7dm-wPBCPuT*N2f*uyUPdIr)zG|VMdV7_wFQZupIF1;wWGl^KIena z_f5eka{au95Jj$^B@`m?`Jox{iCRCe!zXI}EGM6T0zCJ``VhJ6^vx_1#rp7(H5@nt zNzO$gp%OeifN&EM1U^)PhX)YKOiFw7-@bAR zKAO_h8_3QJ8-2NfD4rKvYOM%M8N%g|xz|X%N!85pU}&CNu+evih~jy1V)Z+)l(7+t z?u|zx7=A4Yy_+wC&o|}42Z!n^=aZ60_-^ZirxxcU8IN#3z~`Tk{P4s-yL%Po5H%W6 zy(uI`1@#d4w79zq#(4e8T_^Dm8w)9V$&sQH>k06kXMJ};>%;&4YU1OLFYPnu;P^#~ z_N;tuxFU6lJHDvBB||LhsY}4;6Z7EXjxRpNry&xQOm}?oDc(QGvQ8w}5AgYHk{@q; z1vN4}fEJB4SCoP+idUiWN{T{OnZ_mxB;fU%aVWQH#xnUO7VH zVMDL9D6LBripcl&dWg$Gh^a5$)aqHC@o><)M&aSnpf{nkE|K5O(wo|&|M@2t@OkSS zDZz13M>R{Ft0v4V6~PDXog-@iC1S(5sw8=(v`4@Ccc&yjEtutvidsWZ3R&p)u<9xA zl`)n1^|45ACwBPDr!>)C&*~#ki7fPISoL(zx;XXmXQdPA?Y@|s?Xx+hfzN*g9`a96 q4pD$dDWXVhfjF_{09FhF*jLR%6yPyCC=z=@oLDvitmsGhYcaj45F0?Jf&BPytrMih-;F*p&8*I7`p zo*Ku8lxSicQ>YIgAW$0PghM$*aV}BfM6@)D6Y9Ht_CC8j-*P?8`|I@|z3=aQ-`?lk zd-px}Hp}JckwJ1~kM0?1CiEnS(9hWYM)U+dhv?DY*4pSN{=fcc-5d1kzMpT!sGQcr ze$j2m4^B-@^-NIXEquqXU3Q;fCdP*j?~V`OwR?Dc59IGi`Im;gbK>(IH*(yF=obYt*e(SpRj_#e*JvE4~V=|Z+{J*|_zQmUWYqIcv0wkLX~&B>fF^QdVv=ju7rW}P@~=C7yenJ3OTa@y?gzRgj# z{c)<}|5K#3{HVA0;L#(ab{#!!c4pcfy`QW7ebLS7e9Zma_`ecejqi)!)z1?~&@7SszgcdMoIILGK3L0Phd}F~DQNKOXcMpw9#j!G9s} zMc{Ym@6Puc@Gl0w8+aMiTMqi8psxjf7W~hFUIKkR@EgEy0dE5S5_q%ErhnQF1lk5= zs`A8qearw70t(6u$dK;1#-a+GzlMZBboPs$w+R)!09^FBHUXR?cJ=Czp?NO;MZq&pSlxfWPdJ`oEPH&2nE?o4v8T`#;44l6eIi>o30^RppDbt#< zG0-+doU;dkwn3SJ340J|8zSa>nl|FJ6#{L8GCFM!YL#4#I`>7oN!Ul;hh@_xSCpIrW6&evRTa0>jGU-b3uvOaGgCyv>m z%=V$L46*sPj(K9|--!-nzMT@JYa5i=DGglq z`4?HAceYr0UUtrqPSYr}a|r$tFxPkfc-+_H31ZQ+K6ht)e|H}Ag3r5@h_wyM?3My9 z`n-FAIA((~LqnhU$PsHBl-V;u?EHI%J`b~4xn9G_DEDJng8Z6%W>^lmOzi3pPy71t zoX@|E180CkpZ5ZNucEK-o%MO26mZGseJxh{bKeB%F&a1fW`McA(-Q^KwGGN7ioj)` zN3ibl4IKp9+LSSAdmz`_gv8p0NY4^We#z%#oLJkSOtS3ruhKq`B43~>no((c5NI2e z8O8HC9-Xp6Ae7V{=@gWi8V7&Q*N;f}`Vkr65IE=akud*}3DUI<${dvg&ii~c`2tPR z939$&K--|qF_bR{*^T zT=Mxh1)q;+cJ*e^L7=TonHh0=AnTiv^?4?<^Uuta?l_b3{j=i031V%7GP4Rk&xZQ5 zS&y-QjwZh_)Sr_eoovd?C8M0r+>D=pqQ%Pda$?rkPohGSe^Ltc0x{W?IXUm^r=)%T z6rSJtPmTHdsR^Iwfj*BCWj*t<74KM)AOJgeEuEq8CI@bpEID|89B;# z_0NF((8`tl+iTpX>Jh8SR(#t+y zSpo+A>V&Ufo$~oV!M`Zx>x(#_hLyQ4OS*8*_b-P0#c|)i82x2GKc|61VmIIQ317b+ z^}Meyu~>P2mQbNU)3{j@+5@?tOJF@WFjJZ`H&R}pDViJUpu(XQDqIAHdVk1}?&|+B z^!cWgpZ_QA^UZPJe{&x6qOaeQ@bz0lpKpy3Q&8sCvd_1%-|4rpuB}a(+f(Eh4uSK) zMW6480c&C!RwfVrrB<%o-=&bhG~wqj4T-f4$}G+Kd}oT-&39+$>vzS0bHI7v0&t1g z)xWFk>vu!`e`S3CvV`wnmiGA`sJGn8mHl~n-1jdp5y!}9mP7su8kgr|MT-2I{VUSM zE`LQ9xajjr&{yU`FA!@Rlv!E!`CiU<`n{YlTmnDjuZsElsyJ|(m~6_d%J}+f$Y0HV zZEeb|&e;RGUaJc}ud!I^*EI>!HI17!DPlMOnzFAK(!O5E02he04ayWs#ID|bG#+S* z<~}+IC@6DZLiPOwy6<=NKEDseE*X7`2kH#L74}*Ud#?F^VbaNjvvhW z{s(iu{*d(nUoRGjwGGNV1^QDV=`R22EU~shnWu}s{tO8L z1!bOrde4Na??>*(GbLheL*y^}{49+Jnxc7@4l10mg4Fw48aMDCSB9GS)U3T-L-Clqn~G)4)8xw!@L1 z>pA`q_#^Vk{rM)H-0vpEZT-si74 zKgRyA6Tbd65O|F8`an$H~}_fVU*pHmGFmC!p7Y-T>ST{x;AD1Mdku9Qaqj z2N3i5BxAjxPX_%^&<_KBI_SrP9s-{Q{&PT|5Bi0mUkrR1@Rg8%EpQI}_kjK&u-^~K z*wc{z67cJgzX9|QK>yO`{{4J^|AC-y2fPpX6TkuZM+1)m9t%7X_&{RUKmAVzeID>{ zfzO5f^MSMA=liSeuuAq{1pXUASk8AH=&u032L2N8 zdf+#K>4hF>I~>hVFVqVE(+U;-6qx%-*84T-!r%B@wS}KwwH5G>!O#1dw! z54;1hwjuJzLEjbhy@0vDwH=P~_XRxx4EjiW z5ApX0y&CGl`*1)V`1$=P^%^0c_e0T#fZh%D_lGl?<-%g{uyu;F!ztP!zx+b0XpnA)o}l-hk~EKm)!cRp&zPYzo{Mx`TTt) z`TLWuZHU$f@6+lO=*JUF{yCs803G(1YQDdce--5a5g6W&)i*=^Tfl!i=>H8|06%;` zRlh)b+=l3SyiP3Zh5e@bEzma-Ya62cGU%H?=l7x1{|xlaz|em+yuZ2pnt${CHPD|m zKL>qVU@8=7JFJp5wWLdZ=-(RdSJCGT>_<|2oifz&C=Q_e*VuqwDc!@Gk@Z zYS8)lkn2?h{YA*<=Rw=yXg=6)Yj}SZ{X@zZhW)nYpQOwD{5&|Wh5oPY=lg40fCmz5 z8&tBE`&qaP^qqkB1m2ri*2C{(VR#?cPJw*xPx1c->hb$kbm+%g?munQqVs|MxAsDq zkNZRX@b^aTZQ#Efcsa4GXBFscKraG64-EZN`;A|(4)(J;*w5qddU7nr}dv>lGt$NQD=LExVZd^EAU-gW%_CHc?~b7pLH*TAHE0b%85`3Haw z{aDZaD1O+V>Zd|J>{s>2gFXwG`%mhhL%J~ZTRrq!J-i?5vygu!@FHOT_ZDr3qw9Ys z>B79f$$D0iF8nC)6OjKm&|d{!54;ib{{i|Zz@LI2z7OkRzijxipWpBkKfmFpz_7nI z)PUXq+yu=1s_n2!HtbKjyM7I0fMI`YIFxj4LzI6M@HAl94;!X~ek|~D#Il|c^eiy% zU)l~w>$w*695DQQUPGRAnGgD_VHxP$Z`uw=^Fe<#JV}1hpCXp}uwOOsekJ&4E(jvjav}A`5U(c-U_${^4oz214F+x!uz`s{yn4--q(%$!Tj8h zvR>{-;bXwh{iN-%N;b|0eIBvs@V;#fK|hmN<~tko`JgWV9rnjYejiJ{>%q_OPi=>z z_5T_4rNolY`=#&-&{sl!0rba!`F*SHa5O*kYh#K0qQ4IO7O|`c_Rq$T!O#1t)cXqZ zVgGE}#@Czr0rP(9=5P8L>DmUBZ2CF)t3a;?y$zWAU+Q;5{%+tO4jh1gG%)Ya+73tS zJrMLM!2CRj|8UTc0OsdK{QSKt=Q9`l@b`4nnV_Esd=atKyAJqf@WX!FQ~>=!;75q1 z{#wwV27Mjy3&1Y{zecQWh_3(Zpu_h<(+1FCzi#@FbZtXakN;j4hW>Bb1b*1ho4J2o zelvd$xa-l}=;t?YPb@lruS)(X(7FG_KMv|aKQ%E8;eNKj{cOpRU)Fmg4D>^W8d`bn(OU+sgeQ`Nx9)IH*4Zcos1CkG8|n`J6(! zFg)+A3qZe$Sk`kJ@EzcP2y}SfTOR@aQQ)Vb-t(Zp3i_MC?*eZGE)&c7!26>0W6-|< z{?e~Ea2sDAxE<(2fDZ=$A)u#0KM8mq@L9m;5^Eb&av*;%xbq!&0qBnWfg1^LH=&d;;9!z!s~gCE}KIt2Y3Vr@f|&+iAhK39Mro_Bo>hVMfS&%fRT`JWQI`P=#d zZwovSSOf1wOv5VKHWYXdV0hoO4F}y2%li25eQk%M`lG=Q{n|Da^rMNTUIz5vf*yj- z`;WH6(fsFw&VR2<{Yya40$&OY`&HZRq)R>CpM;@5+g3t7?oVxpqxBX@mwevOgn2&` z|8wLQegpU|;J1mT-akO+zi+f1j@ASH+6MjF_8HXU_m$M&?AISu1bW1@YjLA3G`-Q{vMV2VLxc+`=M=`O1AG$`NHFY z4*xl37vSBY-X5Uy`%l|pmF!4@ ze-to353+vf_l_yx|1~h|Umepy=YE&^r-Bas+X3&l4(>l~)1vc%{i=ii{uUkjsbd+; z$L}{;4}Y&q{#x+A0u1k~4(Nx@t;p}{cMkCLJ9h(y{i}07(0ifYWZ=WV57)o*IM8Q9 zKJQQ34y$A*-yc~&>_?q^zeHaQ^>e^Wh{b;cFx;Qcm5~1ka1nSN)LRb>-@l!I2mKwW zw-NY5;Ljm{v#)n;4ZN-I?`j2x?~N{g{ho6XVf#yr~<&R6F)#Jd|QI5fZyX;v-SlnT;W_?}nIi zwB2#vT0tq_DQHyDo9p5ak~ikA*-aj5H(X+0T3)Y$pINJA^WSz~HF<4o!w=+@l`od?p*{c{>r zSk5et>%rQ(_*!0P)9fR>&Xcc&lWwe9^@j={$o0m49z&XV%gu~&em*~=aiNl*0}hp( zSK&iul`_$jSSv9?&Ry!1>#nb0)+p@xrwVIi-i)^&AXYCQb(H2dKKV@MU%e-SbhBv! zvIetec3N-Jl*#GGc%Au0TstFkJJ$!2wlt}*ky&``80WP%_>MFC@itzsaW z3Y%o!s+>NQSI>w#!EDTMUIRW;BT+CyPz1iJJ7uy?g|F$t}_%aemGxmvGI*uwP?I*Js4^60k?I%D5ELl;>VN!|PmFcYwTAIfnPh7}YIa zz z=e&sV_l)^XAHJiw=Y0OG#5Zugrj!8kS~?iUg#hQt`MVd}t$U<3Nub0&g(*B)W}v9_OT`vTj` zf!8p{a{Ljl+j;Ve^ZkBrp)zfrg8vQxn3CxpKhNtao`U@a-Z|a^rD(cRG~4f+?T0+n z=6YW8D#aeZ7vu5x$uIPKi@me_K`n>Id8QQ=EBS>qCL7x?czpSOPl2t--v-gB-(*<4 z6cnju_ubTad%^8X|OK~#&u{S}%5Fde!{c|Mi1o-REDNvA6!KwU>bU zC^l+D2A??sBDiWtjZl%5O)GOtmbENi*mTp%+^4t}HLYsm*PSc>>8|HbHrX93<(~6l zJ>F?tt#hLSko>r2dIPQ-8rCdZogr{MWH;`sb7zBEgo2l2c}o_8va&)21% z>r&5^aq?Zr@R^q^k$iA7z03LV+|cV?{ujx!n;zlHHt${=5ZI(Ekbd=uzztE_0se!+ zGa_8s;@vw6o9&hPYY8MO=XzJ0Jo@$C?JfyC$GiIEvE1R^#6tojf&y0n z4?H38>77*B1 zN&;t{Kd4@s^Nh)^!y^3+Qv&*|)ihYr+-=@Hq(oj2rOPY5n^g_F)uMMC-vsoln^JPL5XT4G*$!YVy0T9Sz%YDWW$ zm~2pK%MN*ROjB%xU@oszo>)^NA--NMP5NA^Fo>KJ&mcWGAkS8SY`vFP^MsZ>q)Qsv z2zMxKcGJKm3WFzOlVu7wA`K;3((Gn&cN`Vil(0jLRJ#r4$oP%I785%E+Ebzg9IQ39`PPs?K~qGbD5Q%qA^K@3K86=u%&?!yiaGErI>_RdtAIU-I-oxFv)hQ zWs#r26%Y}BoX#lVILv-|9?Zc6OPBQ?dQ zgwj7%7)cY|9MOYHA0q@)yuDwRwAx`zeyDbvJA|hyoN^xSOs{sHCG``HpczQJU)EiL z^%Gk1J0+VDnQi7oNenc`x^<{xg`snis z#NV0azVwN|ljHF=Tpye}cW$mc^EQ@0Mvn;hQSy9g3g=p*1?P89YRqpQgj?T8O_S`+~op*Kwk)F-lx;OOld@xCDx#$f+Yk&~raLGl5 zO`t}E8FQ{T2LJTVc<`Az^$sXtpA(Gt4}sy9xj@M(>|f`>{&^lOqv$jG@E)$f8R-89 zuHWKbBWqpOFw%Vm&%5cgTq?_t)A>Wlx*3mfAzXz2*RJ&0_hPou-I4gm`#8BXCak$m z`TR+^g|7te)P!Kw@uOTdp_V5kRjc3y@1tQ+DitC(ctLsmg0zfRN9z0^`rnB3cN^no zgxianl25*e)-EF{-{#$l#|5^L#C^oE0|KXpI;P1fo%I-wIg!9fdRFK0H{+5HgJ9aD zA~M(D+q`>S=Y!3j0yf0c^x$N6d)VD^sM?ZV943!c>8A8cssy^l%L2T7>j{yb9PQ?C zEDi{4(j3$C4+{)SLmdC?pGc=7xXQbh#{^Doh*x7LrevFI zfd#x>VOx@?#=W|vNprKkMbnUy#{TDMNtQGN>5f-R8Z&7qRPP{3pE3HT2$fc!=p+1! zvMLJ?C^apY+!M~%G-S(lttQQ+xix=KVM{SL+cQ+jjA%&(^x1V(yo8dJfm;+d1H(~Y zsW~%BlY(51-4fV>Ni#d+&%?o+@is?lz5pe)i3@Z-9>>q0`3t4N#u4q_H6SrD zR?+UW(l`oP_+F7_V8tEQm@J5PmoWr1Ca&4wl@{1Ssz~6a&a|?R(v0_pesL#T z4J*_@gFSNjOLgaJa~^h28x$|68w@XB6gWi)(x;&GF|x5EpEx2)EDhw~dSz9XYA(-J`YUv+xAzY|a{ohk+mJlCz`T?8qy$BLf~QpGua zdlyG(j*Mi<@=#5)!LTG3syM%Kqp_+04kpsR@%X|hO|nf3?qrm32&j_N$b2rbgk-xu z5CXalW=EbLio`cIf<7Y6P35UT2*8l8Pn3{V{ncb+k3AGRM508b2~Q2Jx7c@is&ts&mXyit>xUmr3LyHT#S@adUdAG|2xLuw<9`L1o<)TJnN2q+9$uW zp3FrD0^P7Eq9Th9_Mij<@nNFb!guD$?1t`aBI^378y%&|Vt6x7uK_Oh{W%3AKW87M zG&+iLmbrgDP#G|)UILX|Z^7N2o793e8ao z8_nnt6ErdNl7EayCOr&X8l~yw&fq)w^+B_Qe@^ zL5c35c3_||mPD=zF~M&ZL}}lOJ)ZQR7Sm1ooRQgm+T3BNPJ|^hp<>JXV~{TP-2&-) zk2P?$d-q((4h%$*$S#6^i8)j96mSJgLjeZv4iSXiwj_^xr$TlBTc|2|{QnGzcb^fW z(uzO+D3Mb`8iCy&U@Qqs#6Z&i`VeoJVlkoaQpp3uHfLcB72lwAJwg~t3}!A{LgRq6 zx#L%PR|wJ|o4LaX>fiJW%~3Z~9Gg-KMqo!5Jr>NgFlHy&LLuI%B@wFS8Y+WXX~_`* zWHWvfez-*4a3^ITlq4HPv`2%PhDEB|VDfu?$wP&2>@}Fx&>iB9;?CAik{uL@6*t@C z{J=r*1Zj}$f zQiBv@_lz_Sjz+ZvO1`Mw-t2bkF}sCT?%J43!bOFfGVrq{&()nv#RSq6E>r4_Fnl(BF=wJ&H5F@bb*ggHsx?)Siu;^qj4+3=Vrnzn@SOCs%z-&7LcRW?T@ zq|bR^w#Q1*Q*+u9(r4ZQA5fQy3Fxy&VOx@xJfoKQxMT*-N>5XFj3>p7sh3TVmR#ns zhxoDfK@dVC*oOBlng*i}U#v{lz?vnl75~#urJJJ?Chh)`cj3w=Ew)~RIlP86mrJ4& xc8FwhLyav--EV2AQXE9Q?^8>RRV+X~l_cg2{~V6Ma0G@UFdTv52z>St_%D}_T}J=_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/datetime.csv b/pandas/tests/io/sas/data/datetime.csv new file mode 100644 index 00000000..6126f6d0 --- /dev/null +++ b/pandas/tests/io/sas/data/datetime.csv @@ -0,0 +1,5 @@ +Date1,Date2,DateTime,DateTimeHi,Taiw +1677-09-22,1677-09-22,1677-09-21 00:12:44,1677-09-21 00:12:43.145226,1912-01-01 +1960-01-01,1960-01-01,1960-01-01 00:00:00,1960-01-01 00:00:00.000000,1960-01-01 +2016-02-29,2016-02-29,2016-02-29 23:59:59,2016-02-29 23:59:59.123456,2016-02-29 +2262-04-11,2262-04-11,2262-04-11 23:47:16,2262-04-11 23:47:16.854774,2262-04-11 diff --git a/pandas/tests/io/sas/data/datetime.sas7bdat b/pandas/tests/io/sas/data/datetime.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..6469dbf29f8eeeafb4703f01657eae4d0872ef10 GIT binary patch literal 131072 zcmeIy!E0Pa7y$6uG_|QjNibjzl`cUf(556Ypaq4jB(VY0M!VHk1REh>Al?e~Ac){W zXbvg`59Z)O`~zAqf?_Du9z_sAk>Ei*cn)gQHtWpnd)dueN>l`e--MU%n|a^Ny!pMC zgwSeNetF~U<$?aMK6|$(?7ud)GI+4Be`qL-4^_IxySK~sRCbJyS7OQIA@uF3lqDx? zC#y5FPfbtN>eKbvx#{uh-)nJxEzNx3`He%LeED7N<#$ef^l6NFpTRFCTQO~2i9fA& z^vLMM=-AxYa}STd_{jV-Ct7#@T9)>Hdf@%z<<&X+^6JQX8Y`c%^S(X-0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1pY4qmsUsC%V%BwJ!W0~;hq?(TOs^dY2F!#d2dnl)6Eb@i}CrG zUrgh%KM?cBiu~%05ax^g?U+v#`3q$p`@?Pd#%56y$Jg5Om5#hvCWYKSICOn2CCykp zyz*YSa`D@Ta?Aw>zs~n_zKdUzJ>N{GvSwWSd}8VQ+Q&a%%ds^c?{922>vF7qc;Ra0 ziMVf$xvsdMa*Y510t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C7 z2oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oU&Z0$Vp4jdF#%W71Q0a(pvXVtg&!wPUn> z&VjH$+#j!2JD2AycdXPt=e}5+u2wsptLN`9XJ0Jd`D{~ppUKeOS|(=g`~3Ml6M4?z z&hs=Hjp6Xa>uI5Cy!M8Dv2Jhb#&)Zbc1XQS*xI(b8Rtx;+eORI)?LZZSmu54UYQS; zc~2$pQI41M<8qDBvJ)OGc7L>p$BMI`4e3lFyt?Dmx8mHlw&%t(bhKxe7SGogURs`7 zIJY&~2+o12OcKNIsbHlAXwK3(?fORp@ZetmXsdOOCtG=6NUS)1;q`uf7s`MKHS zPo6r_TrUnaTLenal3e~GQSne?gV*8ZM~q4IU*?jLC&^^5=j literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/load_log.sas7bdat b/pandas/tests/io/sas/data/load_log.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..dc78925471baf4cc3dea8d568d27c59327c7d578 GIT binary patch literal 589824 zcmeI53%F!gS*CY~iya0YVL-*m<%lg839-+mZvsv=Bp^dBNIDMq5W_LunY4D&q5A}a z2oy7zAjAPBOpyB}ARyN?GAN)$AmqY;10)h6LPSJRK@bE%B(wHewf5R;|KIwn`XIaN z`=3=joIbT{RcF2HTkl@CI;X2DKCb%vXFc)A-R|<|@B6*Y)v?zsUV8U?-}x@f_o+yqh58v&AUJTw@-M~t>3(PcT=7En}s(Is;d6aZ>l!e&s&dOCv08Wx_HX+ z$@|a0@S@iBr4zQFu)Or3ZCh7PSloWX(h04X*1x@I=k~LkH**Uo?fob{f@uzxMR{6wtU+^KXKFe`uNXl|J6T#LMT4Ie*F0Q%R1LT|1E#v z7dqFk7+)WR>*GK7X0C6^T>tzx{TsV~&lQ>Ldo$O!WUhbyU+~`_u8*r-g=TWkSDt^~ z&AazJcK4~LFDxuPZ1?GBEp1)gzPQyr0T1{3qfgpXzi@_D47^iZI(+cL0|zeLx+q>4 zUpRej_58i7hxRU3xqmYAp}I%^ zc-If1e;VOS)(?-`b4q^k;O-WW+H=w={-)vcLyl@UmDX#$-Cx@04-FrZFX7`{KLkEF zI3#>D^UvgbCGh3;m%9Bk6LYH@X20#Le`3B&#R>V6^+S`N9lm7!V303aKN#do)(;II z8r+lnC+GSh@J%~DWSSv+(`>!a*Zng&UkQBEjt{haQ{F#m`7#ynj(?eAt!|k8cDDU9 zQ*lB*v3>~aBeXWC`RdaL*3U21_m{wj6pVbl>xVHP93mng@A@I+H%xaOHhWO|o@A_fPH`Vxn+9176W}o$S{}k(o zkl!@p12f+=;{!7vs(bX0^ZcPb|1`q)og2XCBMp5%OYnvJ3(Al1d@SK>_FoX3yVckJ z`8z(1e}YfTkMRC-D9jWm$Lq$x_A9UWm1gnvzhQU{$(m&_fKDA&irb=;2%Ql`iIJ76H8y{E5B1eg~q>3 z#iM_I{g)}0lHg}EaQ!1ud)C zvi#M){1bd3KZGd#rI?jJ(z`p3rC_=n(te~f&=KZMxzkAUxszk$ZT%*3+mU#3`o)Qhk5<)7dS z`JuXZ{X=CUg8JP|_!|E*6_?|m%AmbCGd?Z9u78-a-4tq^Oa07{O0s(U+W{m7xF`OkN$D;HU1$u*Zy_?^fB=AY57I}X!!o& zF!B#=?z-Tje}4VGZT`>y1YgJxAx8f=`5ONaT;d-upO#FXEfXH(M1Vl(3l;~zro z`iII$6nLdCzUN$n)_<9b6Z$9kwEPJDOZW`>C-}7d2>si9+(jGN|D?Hp3O+4ALjMvz zgZ>FVEx+O)9Lv$gzQ*??Kj&BT`6V>O7ynRH@Y+oBtM4zy{JZ~;J=OI1C-}7d2>na= z432+-Ps@*R{Cm!?BmcI;dtU4O&!2B*is3h>SNpnu3cipZs(bX0ldtg)!MXOY`=^hA zmru(t`bWd}#XsASe}4U!i8(b5KkLgs!KdXH{o~}D-alSGEx+g=4d3&wLjIw{73Y!f zpS4h)Z(IBPefv>;7^W}(1YgJxA$I*^<7@mwa4xkt{&D74^J)2Y{bS3|^*{gl7qmHH z%s+%^#yX>kfVfEnmnF zA)4_I!5Q-pA)4_o^ZTn(=QZkk4JRdIoz5lkGj{Cmee^Yh84ZeR=ZFcW`RdufozTdy?+-l4B_5R0H zcl^HI|B&kYYyacp`+fYku2k1Pv#!$d<9cnb{!_JSy{+mCx2e8;YgK*k*4vG*eaAif zuKRy{e0Ti!JZF5VZsq#1{^xCvsqXBbRtxo??)ld7^E=dkZa${^x$*N&r+k+W7fG{ipl;UOIk07(X9BUH@ZTwOfCoxwvJ_zcT*wL`Pdco=_dT{^zCD zUDp4+wyM=}k4?8-|6`%)_v=sHmwzAs@-n0AkJtsZp!2Vx7fAze5 z)yd;O&)aw2`twu9f9_pfTRpsQZO>CL*jJtCzh9`I9K2xPq1F0#9J{~%wDA4HntNs8 zey85=q37>=%JB>LJL7&2z3>9}H|*PcLQ~%TCHwa5cmKtz{`>YVte(GjVa+{XIC##& z+PUtJ@RzyQckelA&!bK`skwCe!3z(ay|40rxZmD-LcjyfF|MBa4_q+d+`!Ae*?&|sfec$1#I{V;x=j}Uxt$O(Q zPv27Guj9@W+R(Rk)%4M%?0>+MJC5ZEeKtgQ`|;o1eI7Cn*F*if&qF^8*KgPP`@-|@ zjSGfy!R7Ar@vS-W4B>xg{P&^P+Sk54Wx4iyn7{v~d(`z0_oBe^E%&Iu|LXX{6Ym~g z@qgZB>wVV0xn+ELar_tfd&Bsm`=k5!hI{8JO_7kOzwo2ur=g#OKbzv^8?^;SM=tL+ zxmrH=Vo2qfae?rBe6M$7oPYQ+Am;q=PWk_k%8dWc^pDL|xW`o0-CF(Zj_cpwqje>H zU;6B}j%1S?$--R8zq%#hRiIxtuhnHgkZJGb@SoY#{9T#fpTBJdEfLJDUlE)e@9XuQ zp98S}{zROgne{9BaIOo^_s=>(&iq{c$N&D%wiSeE#y{}7@X{}7xp{}7@X|ImkXjroTV&G?7ljQNKU&G?u3SeNgg zE=d0Khh>Cl#y?G~*wFGv*&c zB=qkW?)Dou@80v+-KU=J$G5YVwk~d8M4L1*yY z$*YI2Z0)mVcCd^}lbcRlM$>zM;<0ignu(J)cQ_t$)84 zcF~r9EPTi8x9y+cGv;5yXV5>vr{zcJ-?jUmW_tV+d|G~l<6pvO&_BVa%l;Jk zhdx~3W+8F?ck251o+*arE8S~-&0hpx$Pd*$`p3!F_=n(J0&)Jy%ctcR{iEUg)4Tm9 z82`}WX5wF1KOEY3&cXv1#xfqLf0Weyr0Hr`Ht@VJ{{&yi4+gkzK|cdg4U{S(!_>mMqUVq~B7#rMTuMf1;0#R>frd|G~l{v~_{ z{S$mzeuVy=`w83r2|g`9LjMvzgZ>FVEk8#8-uZN}|6*tSJGaZH<;Un>o6n+uT|O;8 zLjMk*`3&G+X8%miKU@2sI~V40|5NaV{1Bq_kBhJI55c*mPTfEKiYKi9`1rK^q<>s| zuKyj?{^yPbKE9A2+CMYnAA&RHA3`+a-_-L@UjGoH8UGNRG5-*v8UN5Pw{iLT{#nmp z{QMFC`X~6b{0RL^_ze0d__X{8{rkqT8}iS8zAmHbG`E<(?k|E*%P;!J z$v3@!ynI@I(LWl#LqGXUu>YA6i*WoCd|H0dKbrY? zEx+g=3tx4?8?fARgZp27J0OQu*|VA7+f{ZGjk z{6lr``iII)0vo?R6TW8r%TzqhKRNTO`Lz7H{$-kBnh1N!uX_EzoAyugY5DQ_m-5;3 zPx5K`@%cA8d(X4L{%7XK!8iU%J}o~!|584C{z*P9KR*Ae2cG&rEc++hp*A`4_hF4lAieJY5gbpwDm*RKQ_KF|IFoA_fMaUpPw=EY58^iBj9`P z%WeB7__X{8{Y&@^`X~6b{P_GEeft}x{gZrJetiC=eD?g4d|G~d{#B3u>cw*ZGdF+X zn}13^Ek8d0Qa*eBNj@zKDY#oe@l4(B#wXn zd^1xFzd60y)BcO(3;ChCNB=na8vhWSYyY}``WSfmwEUufEPTVCcoFc={oo>eME}D6 zS?l~^X&%qtCEqwdh#39j`% zmtWmKeKLN2#>l7T*Y%Hp?>&!r&PMiMXz~+$T7HE7C42_`6MR~Jg#LYH*KeElPw;8^ z5&D<#8T3!^Y5DQ_H(337)BZ_5Ek8d0Qa*eBNj@z%Z3d z-|{@3|4F`aeh@MG$H^D`gK&v|ynI@I(LYW;*Z+>{=WmzVzyF%dKlBj;GyWksWBwsT zEB?X5OrDekC9Kyuj?NH-<8{*E6?AzqQTvDt(JdhpL1wo|KYVX4KUAXXqy{l{c_Aa<9OIuejIJC5NaeMy1SNX?K z@Qw3>h%Nsp`GS8C&b3v`KUzL5zm|V2e1E+I_?NqXw*0Gmf38^GiulST|4v_9J%8`& zp}h-dtZNtFIOTW2Kf$NnKU@A$@&*5L!Lna=4EiVd zwEXz|tA74D&y(}d-2Ibp{2LF0|G|G;b>#BR=&xK37dLI$H2zk!uhxI~{7?IjJ^v-2 zmLH%0!#7`Q+JDKX<;Uk=%4g3%$*1MV=ilh+TabS%c$F8&U;p{OOfmfC^lDG*PstbZ zLv@e-aq>0(Avo9mb^r7+@bYQ-MgM5{esSBSVE#LK|8=~+ZhiiAWgb6&Iud*#KZF?l z^PgW_ZhyXyfUofn!6p9j@@e@+|2X+v|MS0p1|6=t{Nnh>%NO!Ph-UmlaK`*Yh-UoD z9GqJ64+UnHjkG=AV*J%a6~$l+T`jl26Nz(7*S6+O~g! zPs@+czl6`Ae}YfTkI%p1U4Qm}f$?t}UV_K*&!2B*is3h>S9@B&OTLgFs(bX0ldtg) z!MXOY`=^hAmru(t`p3do{p_=lf9P<<1sDAb`)955_igid{x11Keh4x8$H~|Dhu{+b zc=@#aqJNxxuK)S-cXYU7%s+%^#y5>MM%(@gJ}o~&{}Mif{s}%UKR*A4ul?u?!2Ty1+}&MKj(@HB=k|Hr zf02A4KZGd#Y4Zd zT`>M-ez95f&!4Ykip|mbLGp$BC#rk&kCU$%{}5c_A1|MlU-XZKZ+O<)?`_0Czkbie zJV*XXJ}tlKA1B}R{_*l@`9=R&_(m_f$h3cwPs@+bzm(6Of09qjkI%p1^FECH+kuC$ z?))=T48J+O+SC5E#`o6#(vErD|CD?oKZF?lFEk8d0(){fCC;7De`24FjUu)Vw$*1MV=U>Wa z&p*kh<;UmW-~sn~5g7k=;+1zC|NQw%rWk&6dbOwZyW|V`p}I%^IQbg?5S(lOx_|l@ zc=@#aqJJ!W!xPU#{-MoX7hLqupRep_|N96#=kfec@`d~mV)T!bukjDTCI0d9Y57I} zIQd-v^XGq4um4)-?_~ZVL^J*&IAi`HL_YsUH(Y?mzs&rlJN{*g`6-V6wWs-~d{qKq%kLj8Dt2>)-tI4IcPG)BZ_5Ek8d0Qa*eB zNj@zW&FR&i*6)%p zGHEQ{vkv& z{vkMH{vkv&{$&m>E%}EK&G?7ljQNKU&G?sDKUne)A)4_I!5Q-pA)4_I4en+Bd0=&I z-@@M2wN=*?lOBET?ZF^q~e*Pg-aYFwDpOzn?e+i#K{{)|wAD@52kNxWJ zTlP=#Y5DQ_m-5;3Px5K`5&C!He%t;DJ}o~&{}Mif{s}%UKR*Ae8~+6Ox4acE0pk2~ z{pUM!#qgWct39nBB;PncSoi23CtvUn!nyXZ`=^hAmru(t`p3dIT)V?d!2AoJ)@h?}*PjU3GJjbo`^^3*%p|;&J}DK0f4%>B;$5 zXOdsdr{&l2kAQFVsk2P`C;7De`20)x?D;48wEXz|8(jTIru~zAT7G=~rF{1MlYCl! zeEyAYJ@Qhy|B@TH`R1RJPs@+bzm(6Of09qjkI%pAySINC@NW?>0iu6?e>YPMzd60y z)A~X3h5S(6qko)yjeiKvwSV0|eGI&OT7J<#7QVqHj|2WKq0J>1T=Xxj-&^~ii}QH? zAo<4mLB!}ECtvUn!X^Ik@@e@+|2X+v|64zQn123A>mNk4;va;wr#Sl8p5~vDPn*AV{bS<`<6kbnIREtH10$c7U)R6+=c_*PcGLbzJ}o~! z|584C{z*P9KR*A4Z@t~iH?sf2a{rWkT7G=~rF{1MlYCl!g#PXPcia96J}o~&{}Mif z{s}%UKR*AeJ0J53%j2Kq)AHlo-@`e0R-J^e;e2sqy&b5EtKYa|md|H0dKNh~Kx&rw(@#p`-`fuLPA0%JM4`=6N`2jBQt{Xia9AGv%p`iqyt#Z6l_`EO^G|2*H)LhJl-c`kqcQ1FHP5MtLq zHonF`1n2UL;~!^!HJ_GW*FU!WT>tauk5lje@cM@k&G?7ljQNKU`TQIH<7sI8%SPx5K`@%fkX+4E2GY55WQcjVQ! z{S$mzeuVxdd;CCu;N{cu zi~iB@J^6*mKeV~)f{Xrz{j=8j!?t-me;)|GkRL*f{&Dg({vo);KVCj9zvv$)pX-1A z{C(>6AFqE1(Tslx&X|7)k+%MVoPw;8^5&D<#8T3!^Y5DQ_H$3y< zuiwc23(Nge@@e_;`Iqw9^H1_=`4Ree{f<8{?VsS&@+0&w;WOx;;M4Ns^KbN_focCF zpOzn=e<`0m|0JK5AD@4NTV8zJJ!Q`SX=bG5qHAYESET$rtiNb&vjW@-_Y; zIM@Dl|MW5N@@e@+|5*5{5UuzJ;cWQ_5zY9QIXJQ8A3`+aAA&RHA425wZ}_<{ zfcfWgX8q9hFHWa&p*khahJT&^7uCp zd|G~l{v~_{{S$mzetiB7&wRdV|0JK5AD@3IpFRI1pOzn=f1?k77WuaWZ=Lx5S-pBBC;t%azhnk(zWHY*__X{8{Y&@^`X~6b{0RL!{At_%2|g`9LjMvzgZ>FVEk8p4 ze&qN!gYj?T&;R-Jl}s^Z+1|X=)B3#~uluL3A7_3wpO#L zt(N_hd|G~d{-u2O{F8iIetiCowmitRf09qjkI%o9&z^shPs@+czZ0I0{M&_>sJ?&J zule)MOfmfC^lDG*+mYZ4`JuW;|2X*?{}7yO|GIzr7jp-^u(#=eK73LvY6Y zLx^VlLx-C#KTG2uLNwzaf-~kHLNw!F=HSwje+bcxe+bT)e+ZG#zo*@R)_Cdf-kHeP~E%!p)wQ6e1VzpHS51j#q0j*tLN88nPSr*&WumXuj?NH-)FYG z4V?dFDo*I1;M4LW^e^Ev=%3)z^5gSwc;!<~`zQId{P_G!`Rw^8`Lz7_{2RUO@Y_v~ ze}YfTkI%m}KYRX3J}o~!|EhalWZFN;r{%}zU&?3CKgp-%$LHVZ{Eq{9xUqf1G^5KM3dAzwVzt23|fbzvv$e-|!*Fy#vfY(cz{G zF8UYt&*JA_T;>N>*Y+*!U0qvERfErZ@=x-G`zJz-{&DgJ|EB%-H+Qu@Kf#b+&8OuT z{o~Bf^*{gl7j(F3%s+%^#yCw~rPx5K&_l|#bd|~~U zt9WPrnJYFayZZH+yK}E=9+2$B%dw+Qa*eBNj@zvr{%}z-{8#aO#3JK zwEXz|OZn{iC;7De2>rYGxFcZvTg0o>IR5$17i5azH>X#7TEAEDaocK>``=xv`z5b# z*_1q0o_z4g<(tvV%j1i~P2ukvi1rQr2en=FpQU|MzQ%t9=kl-p@ZEryPs=a*&%!sl z#{uB~GCEv!!A1Yq&yU?Sxpn@rIFFydk$mI)AY$~7lP~xO;S&FN`Lz6^f1G@-|M}0~ zpu<&T{vkv&{vkMH{vkv&{-MKFWBwsTGyWksWBwsTEB;Nt|4KXmgosxBgK)O|gNS_o z4R#*{>%W!E`rY?W7v%W!H<@B}fk}^^_J1UwwtnyW$Ho`df4ThX{^^79>mx=!Ex)dR z^Uqh^{C?B^Nj@zveUOp|q=pPN=&;2p-Z{qKN z3hTet`NPs&e*Q)9h5Qg=^pBIT@ejcz{_*l@`9=RY`CR{7|NKjC|2mF;ynG=)v^Q)-tIRWJPEcY*!SOvU;9lYCl!eEy|;_WYB4T7HE7-Q#TA{s}%U zKSKW!K7;-VJ}o~&|K7f4+dsjlmNci;~#=E<{v`j^RK$`j%fVL%wM|WU#6I!;^<#{ntw{ZaQ{Sg@A`+z z=vd9GGvRB-zf8sB{L_yQnPLru^3BZnwEVjM%|GApU1yp0Px5K`@%fkX+4E2GY55WQ zck4TC`zQFc{0RL^_ze0d__X}^{2P4qjeiXGUorzX-~3bZY5DQ_m-5;3Px5K`5&HMO zWBvsAw}NN>(LcYxn<<9hoL=o|{V))GAwN|2=pQFv;~#=^?O*p#9|JF+mS6Obg|E8m z4CG&C|2p~?*6*$T&y{&Re~^43KZF?l{{9(EM`>)CT zL;Kfe{6lcY{6mOl{F{3H$2lDqif7|dB+4s--HGjU6DTd#iUhQdqJrsN)KUDYVA17bqAA)o3 zU-wTR123PJU-XZKZ*C-}7d`1~84 zeCnTC_D}L@`SJOe^4arG@@e_;`B&ZPmB_#CcnRS9XZ@N#U&$21Z%(iFw0@9$AwN|2 z=pQFv;~#=^?O*p#9|JF+mS6Obg>P`$H;{j5bIAo4{R{KY*7@J|c|8AE4}mgpKJssC zDoc14{R`t?YyWfSJbwOVDELBt2r>G{$=CRY;1d6M`Lz6^f1G@-|M|~(d|G~d{-u2O z{F8iIetiB7u7B_cH?sdpbN>{4T7G=~rTN+OPx5K`5&C!WgReI2pWxH-BlIufGw7e- z)AA$qZ`bpXf4lG!!1vGkHGjU6DTd#iUhQlBAoxOlsP54}PQJ!J1n1hn?w>veUOp|q z=pPH;@TYG<{$+lCI{FvppRM!1U2}QG{$=CRY;1d6M`Lz6^f1G@-{~guu zf7#jo{S##Vq5W$!{vkMH{vkv&{$)09EscK&(Tslx&X|7)kpy({r+oJOmwZ}&eEyHV{oHFzkH3OX%a6~$G(UU(Nj@z0zHxrA?$JL^zTh8(b4_3OPoDrUpO#U5Y709;Eef)5DESJ^dcDla`TtY_?Ih|ANArZeT{#DPn*AV{G;Ow z<6o}go$)VMET)wIZYKHFd|G}T{|NX-XFl7sf09qjkI%o9&z^shPs@+bzu|*!GVPz_ z)AHlIDk)5pNer{x#@W8oX!_Zh&y6|}kJ zf{XsG@9(-{vbF!YIFIKKl5dF1xc{y{`5 z{y{if{y{`O{|0Y-5g7lrW#%v4@h?-%PjU3Gea$}wpEiH#`p3o>#=l&CasKJY2Sz?E zzpj7t&o_G94W|8*d|G~d{-u2O{F8iIetiB7PyOi+Z)E?4<^C!8wEXz|OZn{iC;7De z2>l!0_y3yqPw;8^5&D<#8T3!^Y55WQ_u8i*|1!Tn$oJ2BVdwXEGsX0kpMSNd_1{qN zh5S(6qko)yjeiKvwR_z^ee?73Y57I}Soj7XxE}bolF2Xn7v`VyzW*ut#`!_S=pQFv z@DIWz{_*l@`9=RY`CR`ynxB7g+ibl4p(9Q+{vkMH{vkvw{=vf)m!F~W4#y4>gZ1CGDc66QVlL^4kDm5l27*spzjys(;|uG* zTz+-`^vU@35hI_LU)MhZzF%AT2snSpRJ=R>Wr|Id&lTv)Kf$NvN9bR|XV5>vr{zcJ z-~K1t_D}F>`4Re;@EP<^@M-xG`uFIykD4C;1fP~4p??XVLH`7wmLH#gqeCB({_WU` z7PoHF80Vk$`DV7*9L+x^pU)51J^IJVH}((1xm4=@>BHgW)AEb{vG5IU{P8~re+Vw|kC#u&FZ##H=lY-j`M-(dA1`0X4+dbv{GrN9B{mT}s1)cQh>HIlao)&FZGXsZ*baqru~zAT7G=~rF{1MlYCl!eEtoeewAteB%hWa zpMNQzJ^v)1mLH#gqnob!e`5bLJ8<*OKP8`*AD@3IpFRI1pOzn=f5T_q?PI_{w77LE zy6B%jU&$1kwDzlC?`!=h_(FcD?$JL^zQ#WU=Ni54pFSL3J}tlK9}8b~yK|*~+jF1a zjsAu8d+YpfX&%r2B%jX@BS!x?`NsZXxWqqRJ}tlKA19ydfA#s_QOrMC{lkbx{KIgj z{KJTR{tXWvkmKKut(o~tcl^r~^HUuCYhUwE!Kck%y8f~8O+Wwi;{zj~mS5LD0>0+!+Iz{{uQ7so#uzK30n z{6jz7;ew0)h4o+S{9$<>&;Le(FXV?1qko)yjeiI(@sF2J%P;!J$>;i?KmW^Y+*fZGamC>=9S7*Z4jDMMm z$N8rpA2P)n2<4lZ@oD*W{bS&}(+yz%GgEO!|GIoyevJOL`7HX^<asKJ| zcQeKCo71a3tsf*`$Pd*$`p3!F_=n(J``7)`$H2>{<;k%{!3o!nn%_SFH^e?R6 zTjvie^LYL+5PTs&gc$wf{8RFU`zNY<*FRK7$7)`k312h*Whx%$pMHGE6l)-q zZ)V1)<=6FZ{`m$M{E}(^B%hWapMNQzJ^v)1mLH#gqldrQw11LM%a6~$l+T`jl26Nz z(7(Tb>nFkfOJ?Ban}3c3pOzn?e+i#K{{)|wAD@4NuYC{sw++wyqkn#XH&YD1IlbD` z`a$xA{7~Jaf1G@ce+bUCf89TQ47_|=e$hV`zR_nNdlMM{(B_g0F8UYN@2&mMZS#2k zAo)Um2r>G{$=CRY;1d6M`Lz6^f1G@-|M~NWspp@({vkv&{vkMH{vkv@|AtpT4vl}A z`Ac{F%M|ld9Q|ug^H0ea?w_dcUH?!S9jkeDCVb8Km#KK1fBNwuQ>=kdzL^=HmS5LD z0=~63*!EBGY55WQm+%?%Pw;8^@%dLh_q(S3lYCl!eEy|;_WYB4T7G=~4gcx8p91?Y znSq;d{wesh{P_G!^RwrlsW$`vw&R(9^w00_W{TlAr&oJgKS;iiAF6xw zkCU(Q55c+iuluKuftOFqFZxHrxA)b^zp2N+*8b=Ac|3m@2)>XXLX7@}{Ru9<#y9;Tt{m3glnr{u#$V&zC8dj&%50Pvf8D3;ChCcl|?UFpju76TZg3OvU5) z$C+Qvr{&l6FVhSnEO4Qx{HkC2rfL5qpOzn=e<`0m|0JK5AD@4thd=r+H?sbtxqk{i zEk8d0(){fCC;7De`24G0@aRul_D}L@`SJOe^4arG@@e_;`8WLF%aDINz>L^k^8K@Z z&F?Q|is3h>S9_XYOTLgFs(bX0ldtg)!MXOY`=^hAmru(t`p3dI_}#A}|1$e$(Z4YN zZ0!&3n9Kc7!58vFh|xbzzQ#WUm-xrar{x#@ejtwz9l3ln z`iqyt#Z6l_jlUJ`tMwl~|I_|s&wt6Ma;6x5b9%L}^^4#O`JuW;|2X*?{}7yO|GIzr7e1kju zm(POn5B+ih7hLo&>>sqwKX%UL`G?>O`60yUA17bqAA(E#P2B=VtsvaK`*Yh-UnodjFNzKZIz;KLlsYKZHo=-;X~G%|A2i2j4$kkPCi%&lIZ* zOnUUR{u>CsFn>XH@A`+zOadFfJ`=uX{+X$G-9LT(IPCdPd|G~d{-u2O{F8iIetiB_pZ&XA!2V}u;O3iu zN?Q{19UFkCU(Q55Xn= z@$zZ;MgKVYT>taue`s^pn12Y-jDHBun12Y-jDJ(F|9HngglNV;1ZT`YgvjUL=z+hB z=AW7MgYTa%$np7KrdVBI(xa#KpX3Yk7gYDIf2hnPu<`3N;cMofnTpr_)7Q_>&oafP zL7W+%mS5LD0=~VUv+bYY)AA$qFX1!jpWxH-d|)*SXl!_$T?a`Af$?I=(Re|^$#Li@ejh;@(&{N z`8PUk8yNo#=l&C-SLl+Ps^|C-~96p-u*1o z{z*P9KR*9bK70O2J}o~!|Ed??Xxcx?r{%}zU&?3CKgp-%$LHVZ;EjJL_g`}J7sC0c z;M4Ns^DoWMo_~^0%a6~$!JY2=Md05OUcN>D{QhpH7=CklwWsx;;i?KYz$<9L4dEmoMap5Y709;Eef)5DEQz@Nc5=FEfAX`j;u@r#Sl8p5~vG z;0yktx_A9UWpu3O)tT@$<6oxYasKJ=pP6C}g!0YI__X}G{>?w%;Kq-d_D}L@`SJOe z^4arG@@e@I`ghx(`qD=BUs&#+1Hq@|N9bR|XV5>vr{%}zU-ka`{=H@YB%hWapMNQz zJ^v)1mLH*i=R6ttw~S~0zJJ!Q`TgBYG5qHAYG3OI!58vFb&vjW@-_Y;IM@Dl|MW5N z@@e@+|5*42uekyEw>9(ccSQff{Im7>>*aYoe~^6R{2*fVkCQL>2jLR`c=@#aqJNxx zuKyj?`NLBC{Ey2&_z@o~{y{if{y{`D{$)094UK<`2+@px2+o*)2$9df>MNfF^UuZ1 z`l0JzrkG!1=wJI<{|P>A{m}J~jW5hUbNSW%)5qfHXN-JWeqI0OpKoySz5fC1e`YGq z=bz-$^5gR_<+JCX{ZE9*=ilJnKLPeX7c=|UzJJF2GR4*tp75ow^9R8f@BlPbrSK0PY@M-xG`j_w-^iS|< z`SJNTeER#pBG2D*^B2DPr{vS}M`Lz7_{7d=l`6v0b{P_H)T7J<#7QX7^@7|Dqe*T$>d5-*( zd|H0dKTf{s{p01+@{9hl@QogG>_3C~C;H(kXO-`twO*cYTl@Td`#he%OTLgFLhSm- z#@G0V;9P2P{Nv28=F{@)`p1@^>wiae{?5o3@x0T+$OCJ$-&c@`d>) zs(aTzRAwTXFEA6nX8o6`c-=pJ^;q^N()`-%KZMxz50#m({D!Z*$h3de59D#(k;^xu zzxb|KkFRce`1l*yzP9{N`Rw^G`Lz7_{I9M$^Xp*$HFM+Q8-FFAmLK2voATN7Px5K` z@%cA;-4jjwC;7De`20)x?D;48wEXz|8+`Ze$iE$UmLA7H|M`YYG5qHAYG3O!!58vF zb&vjW@-_Y;IM@Dl|MW5N@@e@+|7iGr`j~Hk`6t@ka=}Ia!u~<)_mA$F$MgSz;0yU7 z#ONOhY-#9hv1C)hY$(<+j@I6{$=Jb-SIC| z%ujLjuYJuw1z)&-qPlneLuGWV=GB?-HRE5V;&J}z$A?U@215B}W_((HUH|5vZ*#e5!lYCl!eEy|;_WYB4T7HE7{rOw|1?<0M25!Fj zXC?Tw{0RL^_ze0d__X}^{2TmU^-bX4PCWCE{`vjgOfmfC^lD%02f-KeLv@e-aq>0( zAvo9mb^r7+@bYQ-MgLg%M$bNDL;m^odnV@8H2kb5|0JK5U-XZYZ+icD`Lz6^e=K~% zXFmz~mpMO*(Kbbe&UKLlsYKZIz;zp3|MTIcU%;~zpa;~#=E<{v`j^RGJQO=$j^S=jmhSuf~V z=GT%hjDHBR>mMpp(@*i}X?>gWHS^C*#q0j*>&}^9d;WG*)l%-Q`V;T!kQY{k>@AzN%3#J=vI zf=|mY?Vl8UuNb^8?Vn3q7q>5G_Lt)PtUiCp7MrL11YgJxAx8f=`5ONaoJ%G8$IGYX z7yYB*d-4mBf9RL5x%{Gk_58ZNzcl+3wTuT=*Y+*!U0qw9LcqT6pMo#shY+KGoP3Rc z2rluDmru(t`p3!V`d__2pB=a@`8VDClk5J{_=n((`G*ks{2RUaay0&B<}Y3UGR3AC z{`%T~k$mC)iR#`R|4^A};9QxW#`l!3@h?;HIREtHL#Eg?h%@8U^6UCXz<2JKZ2Kqp zwEPJDOZW`>C-}7d`1~85`S90oWdDWb{wevi{P_G!`Rw^8`Lz59{kwk0ADH$}@M-xG z`j_w-^iS|<`SJNT`q03%f09qjkI%o9&z^shPs@+bzrigpK>nc*f4Wt<@1OPZ+@G&x zip|mbUGjzeP~D?{oP3Rc2+pNa_fH=VFQ1lQ^pAzFTD}GNx0Lx{S@bWg|62R=bNTP{ zNxpG@5Hb44$rt>CaEX7sd|H0dKTbZ^|NQx1=KR)>f6EBbihmH!mVXe@jDMMf6HERf zL^J*&IAi`HL_Yt9pZfxse=cX%4_*H<#rzUO|Ju{}t>n|z4_*J*_`>`%mtWmKeJp-{ z#>l7T*Y$7y`9@#b{swUVkf}JIf09qjkI%o9&z^shPs@+bzro6-ru~zAT7G=~rF{1M zlYCl!g#I0O$r~+?e*?j%lg`NI7Z)xGNna=4EiVdwEPJDJN#+e z{s}%UKSKW!K7;-VJ}o~&|9<57H-qtS;?Mv2^Oa07W!c`m)YJOC5_};)RQKo~Ctu?q zf^$t@_fMYyFQ1lQ^pAyaaL;ET|1#%Ca{k#mf0)aEA4u|r{19UFFYFI;@iqP-xWqqR zJ}tlKA19yde@Aux*ZTMIwl9-v{zhG`<~E5En>g>C-?pOzn?e+i#K{{)|wAD@4tO+WLNjqHE2+&?9s zmLH#gDW5(6B%hWapMQhvw!GD{f09qjkI%o9&z^shPs@+bztNTlnf6ceY5DQ_m-5;3 zPx5K`5&Cz+(~*C>@DkPc&-yigzL_b8-<)3UX?;5qd?7zn_vjxdU*jKwbM0UEPagv> zpO#Ex+g=C!gzo zM|J+bv;FT+llh0vZ_W6J;Eef)5Y7094mVwXmc~DXXvRMTXUso@XvV+H!KEet5TY6X z5S%gp5F(*}PrCuF|1#_Mu78o9b@Nnc?Q|P^LyTpbIx<#^PV$v#whjZ^^gC#Z^utouK3q~`13lo=${jZRz0_T zMM|mPP&F#?sMaSft(+F@Ztt%Dz0NJ0rUmC99qm2s^*^frky52CU;E2<+aHlStJki7 z7E!qEw;B_Ti5DAxkV?MxYIoNwFH^awWI!??8ITM}1|$QL0m*=5Kr$d1kPJu$BmKVJ{25^INL%k8+3P#{&unXJ7twDSyb4|iZ>pzl|F=>9E;tEJp41JfKv%#^q@v>UDry5bvp3V(aZdd^h4F;8;zaEia+p1K?rsD0l)q z4W0upf@Sb3)^i=a3El?pf|KABs1{)V!R6p8uzrEt--ZQldpE!v!8F(kZUiH+1KbR5 z0lOBsd~XAHfV;tw1unm1Ugz8PJol(L$K}{93QYA+yJ(M9bgyO3+{f#?f(ck2A04B;9>A6cmh2A zjLYXa@FG|SuY%W6|0Z}FybDf(Q=qDI+f@fH2Umgh;JP|@9ZADC)-ATzjm?PnfIGl^ z-Ev#Lx9*oL5260iI(I92y3XB-UPSx~;_o0nQRn7|53s%u!H0D&54DRNmx3$7HDCj{ z0ZfA%!47cCBDeoNVDBO~zh=SwBDWny_+D^7;)lRD!Q&`D1wXsUUANA`FT&qij*t*#5 zZv@|h_B~+lVwd+Um|yJ1!5HfA2M;5D9Q972{aN&X0elO*0=^Bt1HK1NfFFP#f)Byk zC2qTyf-9G}{ayn$fE&OxxDo6Cw}3rhFPH`MU=iF4?gtNnZ-U3cQ{dSpF5efio#iF& zI9vg*E^)`{I(QSjjrMopli(Ekt6l2aEnVtvp;y7{m*V(>tzZY(1@@wT7R)0)w$z=c z2M|BJ)E)n$;EAPfUN{TCfO>CXJFbH7ApRaW0e*n>Of7ZesCJoK-^yiff7UE>>uUfv zEOW;-4Q@od1HJ|A?4)CV{SV$-Z1eAZb+jHYU(0;Az5A8tRaZyH%g=XqsLZaM`Po%$ zZBy;t-J80*+rb2w0@Gj<*bKI)ALeqee{a+C-|rtD+G)P9ZhBtxD zU<;T?WNiIJq7Uo`vzmoL9kw+68+ZK{23@~}!KRFhH=&;Ew=md(cnjjKXy2x}>G>B@ zx&B>+;#g4`?k*_H5pYjo)VgM7YyiteS7%SL-JF{z4lQ+umbybr-JzxK&{B73sXMgP9U3OlpIIL5 zA1(|Qb|&;sQvam%Pg?&p>7Qo()1rS`^-r7rF`Jw#fi4-ChC zFzeWi@@AAbqr4gAdH~sWdH^}1yana0C~rl1E6Q6@-iGovS8k4j=_jf8!xGz>)W^@d zK5kvFY}EcO0c>ix3R+i$72(^7AbWuh_Tm;jSt z3QU7dU^CbPwt{Vr$;OP?X4_}7u`jmVA6w4GmhHLX>L+4t6S20*SUeez$NEgh`b@_9 zOvT!!Vr|p0csdquip85^@#a{(IUbK~Lvw5!ke?*-lSF<}$af0)P9fhZhWxIr^l_O9-o$a99rt}W|?U0b4-9qFa@T;Ca@W70b9W~M}3?tV^SaI z*m8etIU8HHbD`aaWFpo!5o?=_#gp-Ptj}bu&t$C6RIF_();1lBr(^M^SiC6~Z;r*A zZ_f8!BeCy`Qa(5^^~Zez=_y0$D<*UvM>f?KaHvg=P{Zc8UzKj|cx z0@EPQu{6%HG|sU!&apJkv2>!*^_xiGQl~8SJ{g)ZZm2J5j)?22*t)san{sol&y4Ep zuCBMUFW3LNc5|;}?vKoTXzpk9{fE7e(AQdbUDNTM_JU(r$n7$2H^wt<`yL%A<_ekq zT%mJN-_B)Kp{v9EV(RB*B{`?!+oLPP; zr(0@n%IgExmp8Fpxgot-N(~J4k7jhh@Qu+CL%qU5!-DRuXc{7xDe87Mo*6a6$Sh}e z>e@-25WTE>wCxKyormne=!nUmp}vG!n!Fh|`5G9?jhWmSntFz&o}sB{X!2|5>zjKi z;MxsG))z0qDp4VP_xu^5RP4S=q(l6buI#6SNKYIPQem%1A(r@DV4?WhbG~j+K z1%9F4FHEO@zT`Q*R7^7PRms5D)z|f=X5EJ(>i8;bH_o>P#3S0B3HPCi;`pgRJR68N z1mY9!!?&6J2jba4yde;usC7FuWBUW~Y#`nch)-a`nbChBo(;qs0`Z9jf$a~(vw^tl zWq!Z^_aiq8&b1e5FZnYWknuCO49NNO#hyP8-ReI+;IExHjONl%s}|sULCStV$GCfb zqLg_aQLF#+ofW9>eKq-7^7Z8AeVcjt_U3(oLuGvov0d&tfE0Q2(e z-{-XA2g#3+A0t0Weun%!`6cqp-lPAdA$lJ*~$+wbkC(n=%knbTMC*MbY zko*YwG4hk-XUNZ!Un0LuevSMF`7QE0-?7UroN2d_8%Byp6n_ zypwz@`F8RQ`2hJI@^SKgJozQ^%jDO{Z;;<2ze9eH{66^ua(uD2 za{a>>Ybw41?;-v8SWUi`d_8%Byp6n_ypwz@`F8RQ`2hJI@^SKg=u$`^XQHA0ak>4P{MSh3;9{GLp z2ju<>>Hhr27w;aO|M(2dm#>bw84qj8*OMp6+sNC=JIS|_Zzs=?50LL6A1B{OevteK z`7!d7Hi^-MajV~%!d?l5yA#WhxK%OSw zNZvudg}jHnmpn_JCohukCErhei2P0R!0YFPks%=vQQaKl)V)#G^pmei`T!`YEq;eMXPEwo9czJPO2BAYOhp z(0?Ev1>!0YFaL6&|3Ewn#8n_(UJ>X&5RU?J|1dam-Bo^$A=e=fY< zQXn1$;wlg?yFc2UIX(mNC=geHco~0-H)Hz)@hA{ifp{6$*ctr?;!z;30`W5L5oYus zh)03A3dGB}2c6M>ARYzcDiAN@Jt%T zHWBjo|86|U-~YQj$lw2CJXC?af0Orb^8W3~eg7usznuSa{>%AaKi_@zjrPc{S?q6-@7E^1IGK-(IO`T3n^zE~*r@o3;D) zpkLo?`KWo+{=Ju8e&xmXEjCnsC35U@RNKM2vNEp|zO6XeUYtM`hjDFJn!gtuC_Urw>p;k;IBGH zh(|u&<>Ctc`24*;;o_zLuq;hFs(=4C)W`9>or71~&o8QcIkqoE8V7#d`#h*>FTY=0 z&&QcukKNY~#Y5${F>i1?@4lU<-)~PJpQ3(zoco!t`+k|Vk?pyR@$ISBxOnCG@OGmg zUoQNy&Eqd>9_-GUUzpE%KYd(bJo|F*>YI`6q7sLzG{xbnjpF3KpHG`n#;v#X`;LB| zj&{I%h@VFu!0r7d z0re{FeZ5(7MmxX0&x(G#=Gy<{cf0bL{#@U%>%eyx;O4)HY(3Arf9+gtEZeweG(O#3 zq5h3}K20I2Rn-35aL;vAPV4vUsc-}8gMPi0a%#`z-jD8&lzE-_BS-JP-19K%={wy_ z-)Hq^!ETB2tNg0d?TwoA{`fZwU-mrAcR#=E`}d@8U*$^Y4|IrriVcHC?l&*&%ek?9Ps$JO_55s8`DKk^|Fl`+Hw)cvJ4*a!!M~aC=M~fM_v~4Dv%pP!*FNrD-Yocxe75xY zp6goX_GZD4Y<{!g<4nJG9G|cIemy^}{(L{&uYYs!SZ@}5Cql`9WI!??8ITM}2EH^I zsHw4=@)>=khYz9s4aHZT{;17$Kjb65U+m|0a~&6-ce`1Cr01VQ`+*;#e;?*qtsa;A zcAi`w^3eAJd_SI{_FNuvA7^rYxZH1_kE4DG-&d*nkzS?$$nL@CJx*kOU(9?}8LoKP z%*Ubf{2$}Xmn%QsFm5VwKaS|5I{#6gYMMU2y!-9&{e0Ry(dX8ya6Tyiyx)J+hj_`y z{dvUn8#;Sd{YZ~jAs^|@KDqO$>GM9HTixb{w4V8KTxq5BEB0%R8-6iFBu=C>n1p(lgy9=27FEiuA1YRjFW^T$8$V< z1ePvm<(i$_*}8*oz(?RC*#GL=smoYtZd_T5>gw+L-Ky?;yHh6%HCwGUO|zCPH>=0J zEG<&gHm{n6{M->Q<)|3bvMmG2weE4ooI7!n$CtXj(XXXai93p#JKI>kJbQY+JUc&E zo}W9lyRRTq4L!7`Y`yhbo!lN2Tkq_dumiKF%Db5CdDT|ZTxmCpJuf_sTiLf2d!=bt zTiC~{ZQK9#{xxj9)_T->&pouztYb`RkNX(*mG-o17aMqy&2+HY?_E!OLo-N!8Ku2z zsp*Y=UQf65*X(GPFdO>+sYNb(OOLs|Z?$dPR6E0tOK79pS?_Lihc``Y*cpxn=E6qj zhY&B{hL~Y2>+cKWQ?)*q$6YRoT{331Gcdy+Q&aEvy2JjK>Gn+0+32osb%x!3?|cY3 zZbPgvkoBJl<5RUhmnU2{(IT$a^df# z#UIJA+u1P7>w|Rb2JeSyO!{O(tX;W07sjV*{l4sTxhyPSa_7B2+6o(kJ$jUcSivvr zzZAx&YJDzGx?JjujZ5viyJv@K$c8fuVbnIn3X@sigz>3b|Bjw=xhyu?wMI3)(RIgo zu{TV61DvDFosCf%BK^A!vBFH&e>03v)%skXcDc0LwH1@)(i*0lCh3pdH4~Q*D|lu7 zcf%YsezV<48&iU4rUiymQZ>|%xWV>+-YfUq&(mH@)WfZGBH8w0il5H<$f z5kS}&up@x5G2pHM!p4C60tg!e9ta?81fUrKgpC3F$3Tva0S5#SHU>N^fUq&(pa8gOY-I??9vYPvt*3MxQ!E6Qeur(>pmjhB7Xq8ThstE&Jxs9>Sn9xzt>Jxkh9a=^ zDa_a|oqz)ifu%ZZyA7?cfe$^VO9Ym>Fzqd99pJNtX@6nRJ(#g?X%-GFe4J?kw!b^{ z2y@=sX#z{!kDa)a0>C#pSaCYGt)~P4n;czpnr1Mnr*s-qECe<=j9uXSU;=Q)d&3KC zQr>~dqk4q1Zr%%fCC9d1FZSX-%-r2$h4o{4gmVH28w2<*Wd{;A27DwDz|wE%$6nmW z0toBJ^fmaKj$hOm%?usz2L^!8BS-1Ld3dm|H869c0~L6903StObil&J1C_kDvvq*r z<$;w#KqA0eA;90(yi1#ffSN?ORS38s!0kf7X98>&0{peYYq(PgSd<7mg@DfmxLXML zLV){)fF%JQ6axJ9jz#<1D+lS{Qezx}iq+oB0vyOa9i|>(MSxt%%BT)p6hP{B_MZ@; zDL}E>yCuLO|KPFKQ)&x<+B+{ga0xDaMDx`fb2@NY0IA#Af5M_y1wiec7d^rixcHQ$ zZugiDTopj-_89P`0I0q5qDS}&Ezp|J$_oKLnMVN&o-= literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/productsales.csv b/pandas/tests/io/sas/data/productsales.csv new file mode 100644 index 00000000..1f6a4424 --- /dev/null +++ b/pandas/tests/io/sas/data/productsales.csv @@ -0,0 +1,1441 @@ +ACTUAL,PREDICT,COUNTRY,REGION,DIVISION,PRODTYPE,PRODUCT,QUARTER,YEAR,MONTH +925,850,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +999,297,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +608,846,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +642,533,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +656,646,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +948,486,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +612,717,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +114,564,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +685,230,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +657,494,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +608,903,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +353,266,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +107,190,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +354,139,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +101,217,CANADA,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +553,560,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +877,148,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +431,762,CANADA,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +511,457,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +157,532,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +520,629,CANADA,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +114,491,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +277,0,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +561,979,CANADA,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +220,585,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +444,267,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +178,487,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +756,764,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +329,312,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +910,531,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +530,536,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +101,773,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +515,143,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +730,126,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +993,862,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +954,754,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +267,410,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +347,701,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +991,204,CANADA,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +923,509,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +437,378,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +737,507,CANADA,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +104,49,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +840,876,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +704,66,CANADA,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +889,819,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +107,351,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +571,201,CANADA,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +688,209,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +544,51,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +954,135,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +445,47,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +829,379,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +464,758,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +968,475,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +842,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +721,507,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +966,269,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +332,699,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +328,824,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +355,497,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +506,44,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +585,522,CANADA,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +634,378,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +662,689,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +783,90,CANADA,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +786,720,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +710,343,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +950,457,CANADA,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +274,947,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +406,834,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +515,71,CANADA,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +35,282,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +995,538,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +670,679,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +406,601,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +825,577,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +467,908,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +709,819,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +522,687,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +688,157,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +956,111,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +129,31,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +687,790,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +877,795,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +845,379,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +425,114,CANADA,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +899,475,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +987,747,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +641,372,CANADA,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +448,415,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +341,955,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +137,356,CANADA,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +235,316,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +482,351,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +678,164,CANADA,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +240,386,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +605,113,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +274,68,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +422,885,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +763,575,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +561,743,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +339,816,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +877,203,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +192,581,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +604,815,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +55,333,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +87,40,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +942,672,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +912,23,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +768,948,CANADA,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +951,291,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +768,839,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +978,864,CANADA,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +20,337,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +298,95,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +193,535,CANADA,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +336,191,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +617,412,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +709,711,CANADA,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +5,425,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +164,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +422,948,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +424,544,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +854,764,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +168,446,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +8,957,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +748,967,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +682,11,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +300,110,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +672,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +894,215,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +944,965,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +403,423,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +596,753,CANADA,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +481,770,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +503,263,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +126,79,CANADA,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +721,441,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +271,858,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +721,667,CANADA,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +157,193,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +991,394,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +499,680,CANADA,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +284,414,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +705,770,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +737,679,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +745,7,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +633,713,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +983,851,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +591,944,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +42,130,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +771,485,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +465,23,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +296,193,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +890,7,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +312,919,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +777,768,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +364,854,CANADA,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +601,411,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +823,736,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +847,10,CANADA,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +490,311,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +387,348,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +688,458,CANADA,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +650,195,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +447,658,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +91,704,CANADA,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +197,807,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +51,861,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +570,873,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +423,933,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +524,355,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +416,794,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +789,645,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +551,700,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +400,831,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +361,800,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +189,830,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +554,828,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +585,12,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +281,501,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +629,914,CANADA,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +43,685,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +533,755,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +882,708,CANADA,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +790,595,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +600,32,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +148,49,CANADA,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +237,727,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +488,239,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +457,273,CANADA,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +401,986,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +181,544,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +995,182,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +120,197,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +119,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +319,974,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +333,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +923,688,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +634,750,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +493,155,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +461,860,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +304,102,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +641,425,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +992,224,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +202,408,CANADA,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +770,524,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +202,816,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +14,515,CANADA,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +134,793,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +977,460,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +174,732,CANADA,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +429,435,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +514,38,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +784,616,CANADA,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +973,225,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +511,402,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +30,697,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +895,567,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +557,231,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +282,372,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +909,15,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +276,866,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +234,452,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +479,663,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +782,982,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +755,813,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +689,523,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +496,871,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +24,511,CANADA,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +379,819,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +441,525,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +49,13,CANADA,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +243,694,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +295,782,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +395,839,CANADA,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +929,461,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +997,303,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +889,421,CANADA,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +72,421,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +926,433,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +850,394,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +826,338,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +651,764,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +854,216,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +899,96,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +309,550,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +943,636,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +138,427,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +99,652,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +270,478,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +862,18,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +574,40,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +359,453,CANADA,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +958,987,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +791,26,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +284,101,CANADA,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +190,969,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +527,492,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +112,263,CANADA,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +271,593,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +643,923,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +554,146,CANADA,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +211,305,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +368,318,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +778,417,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +808,623,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +46,761,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +466,272,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +18,988,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +87,821,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +765,962,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +62,615,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +13,523,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +775,806,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +636,586,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +458,520,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +206,908,CANADA,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +310,30,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +813,247,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +22,647,CANADA,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +742,55,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +394,154,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +957,344,CANADA,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +205,95,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +198,665,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +638,145,CANADA,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +155,925,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +688,395,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +730,749,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +208,279,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +525,288,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +483,509,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +748,255,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +6,214,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +168,473,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +301,702,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +9,814,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +778,231,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +799,422,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +309,572,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +433,363,CANADA,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +969,919,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +181,355,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +787,992,CANADA,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +971,147,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +440,183,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +209,375,CANADA,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +537,77,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +364,308,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +377,660,CANADA,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +251,555,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +607,455,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +127,888,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +513,652,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +146,799,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +917,249,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +776,539,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +330,198,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +981,340,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +862,152,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +612,347,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +607,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +786,855,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +160,87,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +199,69,CANADA,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +972,807,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +870,565,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +494,798,CANADA,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +975,714,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +760,17,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +180,797,CANADA,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +256,422,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +422,621,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +859,661,CANADA,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +586,363,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +441,910,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +597,998,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +717,95,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +713,731,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +591,718,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +492,467,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +170,126,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +684,127,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +981,746,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +966,878,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +439,27,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +151,569,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +602,812,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +187,603,CANADA,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +415,506,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +61,185,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +839,692,CANADA,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +596,565,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +751,512,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +460,86,CANADA,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +922,399,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +153,672,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +928,801,CANADA,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +951,730,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +394,408,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +615,982,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +653,499,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +180,307,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +649,741,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +921,640,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +11,300,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +696,929,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +795,309,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +550,340,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +320,228,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +845,1000,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +245,21,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +142,583,CANADA,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +717,506,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +3,405,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +790,556,CANADA,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +646,72,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +230,103,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +938,262,CANADA,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +629,102,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +317,841,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +812,159,CANADA,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +141,570,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +64,375,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +207,298,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +435,32,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +96,760,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +252,338,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +956,149,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +633,343,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +190,151,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +227,44,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +24,583,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +420,230,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +910,907,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +709,783,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +810,117,CANADA,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +723,416,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +911,318,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +230,888,CANADA,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +448,60,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +945,596,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +508,576,CANADA,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +262,576,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +441,280,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +15,219,CANADA,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +795,133,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +301,273,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +304,86,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +49,400,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +576,364,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +669,63,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +325,929,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +272,344,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +80,768,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +46,668,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +223,407,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +774,536,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +784,657,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +92,215,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +67,966,CANADA,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +747,674,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +686,574,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +93,266,CANADA,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +192,680,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +51,362,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +498,412,CANADA,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +546,431,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +485,94,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +925,345,CANADA,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +292,445,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +540,632,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +21,855,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +100,36,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +49,250,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +353,427,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +911,367,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +823,245,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +278,893,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +576,490,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +655,88,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +763,964,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +88,62,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +746,506,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +927,680,CANADA,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +297,153,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +291,403,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +838,98,CANADA,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +112,376,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +509,477,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +472,50,CANADA,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +495,592,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +1000,813,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +241,740,CANADA,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +693,873,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +903,459,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +791,224,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +108,562,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +845,199,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +452,275,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +479,355,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +410,947,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +379,454,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +740,450,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +471,575,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +325,6,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +455,847,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +563,338,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +879,517,CANADA,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +312,630,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +587,381,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +628,864,CANADA,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +486,416,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +811,852,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +990,815,CANADA,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +35,23,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +764,527,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +619,693,CANADA,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +996,977,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +554,549,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +540,951,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +140,390,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +554,204,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +724,78,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +693,613,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +866,745,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +833,56,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +164,887,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +753,651,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +60,691,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +688,767,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +883,709,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +109,417,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +950,326,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +438,599,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +286,818,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +342,13,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +383,185,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +80,140,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +322,717,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +749,852,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +606,125,GERMANY,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +641,325,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +494,648,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +428,365,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +936,120,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +597,347,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +728,638,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +933,732,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +663,465,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +394,262,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +334,947,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +114,694,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +89,482,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +874,600,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +674,94,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +347,323,GERMANY,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +105,49,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +286,70,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +669,844,GERMANY,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +786,773,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +104,68,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +770,110,GERMANY,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +263,42,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +900,171,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +630,644,GERMANY,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +597,408,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +185,45,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +175,522,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +576,166,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +957,885,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +993,713,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +500,838,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +410,267,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +592,967,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +64,529,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +208,656,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +273,665,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +906,419,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +429,776,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +961,971,GERMANY,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +338,248,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +472,486,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +903,674,GERMANY,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +299,603,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +948,492,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +931,512,GERMANY,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +570,391,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +97,313,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +674,758,GERMANY,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +468,304,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +430,846,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +893,912,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +519,810,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +267,122,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +908,102,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +176,161,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +673,450,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +798,215,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +291,765,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +583,557,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +442,739,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +951,811,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +430,780,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +559,645,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +726,365,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +944,597,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +497,126,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +388,655,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +81,604,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +111,280,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +288,115,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +845,205,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +745,672,GERMANY,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +352,339,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +234,70,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +167,528,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +606,220,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +670,691,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +764,197,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +659,239,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +996,50,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +424,135,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +899,972,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +392,475,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +555,868,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +860,451,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +114,565,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +943,116,GERMANY,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +365,385,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +249,375,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +192,357,GERMANY,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +328,230,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +311,829,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +576,971,GERMANY,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +915,280,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +522,853,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +625,953,GERMANY,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +873,874,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +498,578,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +808,768,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +742,178,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +744,916,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +30,917,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +747,633,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +672,107,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +564,523,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +785,924,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +825,481,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +243,240,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +959,819,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +123,602,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +714,538,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +252,632,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +715,952,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +670,480,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +81,700,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +653,726,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +795,526,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +182,410,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +725,307,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +101,73,GERMANY,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +143,232,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +15,993,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +742,652,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +339,761,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +39,428,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +465,4,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +889,101,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +856,869,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +358,271,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +452,633,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +387,481,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +824,302,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +185,245,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +151,941,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +419,721,GERMANY,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +643,893,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +63,898,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +202,94,GERMANY,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +332,962,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +723,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +148,108,GERMANY,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +840,71,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +601,767,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +962,323,GERMANY,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +166,982,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +531,614,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +963,839,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +994,388,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +978,296,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +72,429,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +33,901,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +428,350,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +413,581,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +737,583,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +85,92,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +916,647,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +785,771,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +302,26,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +1000,598,GERMANY,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +458,715,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +896,74,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +615,580,GERMANY,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +174,848,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +651,118,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +784,54,GERMANY,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +121,929,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +341,393,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +615,820,GERMANY,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +697,336,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +215,299,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +197,747,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +205,154,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +256,486,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +377,251,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +577,225,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +686,77,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +332,74,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +534,596,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +485,493,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +594,782,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +413,487,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +13,127,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +483,538,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +820,94,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +745,252,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +79,722,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +36,536,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +950,958,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +74,466,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +458,309,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +609,680,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +429,539,GERMANY,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +956,511,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +205,505,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +629,720,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +277,823,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +266,21,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +872,142,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +435,95,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +988,398,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +953,328,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +556,151,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +211,978,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +389,918,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +351,542,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +14,96,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +181,496,GERMANY,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +452,77,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +511,236,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +193,913,GERMANY,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +797,49,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +988,967,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +487,502,GERMANY,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +941,790,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +577,121,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +456,55,GERMANY,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +982,739,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +593,683,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +702,610,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +528,248,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +873,530,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +301,889,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +769,245,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +724,473,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +466,938,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +774,150,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +111,772,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +954,201,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +780,945,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +210,177,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +93,378,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +332,83,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +186,803,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +782,398,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +41,215,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +222,194,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +992,287,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +477,410,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +948,50,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +817,204,GERMANY,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +597,239,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +649,637,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +3,938,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +731,788,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +181,399,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +468,576,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +891,187,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +226,703,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +28,455,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +609,244,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +224,868,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +230,353,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +216,101,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +282,924,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +501,144,GERMANY,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +320,0,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +720,910,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +464,259,GERMANY,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +363,107,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +49,63,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +223,270,GERMANY,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +452,554,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +210,154,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +444,205,GERMANY,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +222,441,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +678,183,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +25,459,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +57,810,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +981,268,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +740,916,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +408,742,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +966,522,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +107,299,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +488,677,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +759,709,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +504,310,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +99,160,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +503,698,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +724,540,GERMANY,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +309,901,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +625,34,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +294,536,GERMANY,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +890,780,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +501,716,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +34,532,GERMANY,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +203,871,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +140,199,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +845,845,GERMANY,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +774,591,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +645,378,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +986,942,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +296,686,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +936,720,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +341,546,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +32,845,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +277,667,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +548,627,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +727,142,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +812,655,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +168,556,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +150,459,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +136,89,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +695,726,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +363,38,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +853,60,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +621,369,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +764,381,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +669,465,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +772,981,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +228,758,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +261,31,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +821,237,GERMANY,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +100,285,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +465,94,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +350,561,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +991,143,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +910,95,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +206,341,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +263,388,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +374,272,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +875,890,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +810,734,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +398,364,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +565,619,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +417,517,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +291,781,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +251,327,GERMANY,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +449,48,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +774,809,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +386,73,GERMANY,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +22,936,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +940,400,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +132,736,GERMANY,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +103,211,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +152,271,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +952,855,GERMANY,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +872,923,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +748,854,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +749,769,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +876,271,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +860,383,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +900,29,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +705,185,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +913,351,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +315,560,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +466,840,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +233,517,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +906,949,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +148,633,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +661,636,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +847,138,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +768,481,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +866,408,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +475,130,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +112,813,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +136,661,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +763,311,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +388,872,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +996,643,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +486,174,GERMANY,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +494,528,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +771,124,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +49,126,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +322,440,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +878,881,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +827,292,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +852,873,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +716,357,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +81,247,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +916,18,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +673,395,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +242,620,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +914,946,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +902,72,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +707,691,GERMANY,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +223,95,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +619,878,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +254,757,GERMANY,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +688,898,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +477,172,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +280,419,GERMANY,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +546,849,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +630,807,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +455,599,GERMANY,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +505,59,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +823,790,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +891,574,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +840,96,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +436,376,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +168,352,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +177,741,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +727,12,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +278,157,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +443,10,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +905,544,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +881,817,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +507,754,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +363,425,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +603,492,GERMANY,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +473,485,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +128,369,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +105,560,GERMANY,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +325,651,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +711,326,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +983,180,GERMANY,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +241,935,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +71,403,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +395,345,GERMANY,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +168,278,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +512,376,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +291,104,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +776,543,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +271,798,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +946,333,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +195,833,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +165,132,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +238,629,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +409,337,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +720,300,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +309,470,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +812,875,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +441,237,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +500,272,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +517,860,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +924,415,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +572,140,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +768,367,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +692,195,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +28,245,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +202,285,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +76,98,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +421,932,GERMANY,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +636,898,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +52,330,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +184,603,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +739,280,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +841,507,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +65,202,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +623,513,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +517,132,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +636,21,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +845,657,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +232,195,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +26,323,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +680,299,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +364,811,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +572,739,GERMANY,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +145,889,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +644,189,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +87,698,GERMANY,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +620,646,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +535,562,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +661,753,GERMANY,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +884,425,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +689,693,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +646,941,GERMANY,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +4,975,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +813,455,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +773,260,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +205,69,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +657,147,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +154,533,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +747,881,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +787,457,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +867,441,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +307,859,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +571,177,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +92,633,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +269,382,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +764,707,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +662,566,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +818,349,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +617,128,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +649,231,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +895,258,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +750,812,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +738,362,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +107,133,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +278,60,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +32,88,U.S.A.,EAST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +129,378,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +187,569,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +670,186,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +678,875,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +423,636,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +389,360,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +257,677,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +780,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +159,158,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +97,384,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +479,927,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +9,134,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +614,273,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +261,27,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +115,209,U.S.A.,EAST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +358,470,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +133,219,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +891,907,U.S.A.,EAST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +702,778,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +58,998,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +606,194,U.S.A.,EAST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +668,933,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +813,708,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +450,949,U.S.A.,EAST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +956,579,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +276,131,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +889,689,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +708,908,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +14,524,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +904,336,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +272,916,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +257,236,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +343,965,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +80,350,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +530,599,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +340,901,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +595,935,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +47,667,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +279,104,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +293,803,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +162,64,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +935,825,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +689,839,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +484,184,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +230,348,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +164,904,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +401,219,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +607,381,U.S.A.,EAST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +229,524,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +786,902,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +92,212,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +455,762,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +409,182,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +166,442,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +277,919,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +92,67,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +631,741,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +390,617,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +403,214,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +964,202,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +223,788,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +684,639,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +645,336,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +470,937,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +424,399,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +862,21,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +736,125,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +554,635,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +790,229,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +115,770,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +853,622,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +643,109,U.S.A.,EAST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +794,975,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +892,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +728,123,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +744,135,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +678,535,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +768,971,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +234,166,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +333,814,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +968,557,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +119,820,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +469,486,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +261,429,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +984,65,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +845,977,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +374,410,U.S.A.,EAST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +687,150,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +157,630,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +49,488,U.S.A.,EAST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +817,112,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +223,598,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +433,705,U.S.A.,EAST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +41,226,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +396,979,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +131,19,U.S.A.,EAST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +521,204,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +751,805,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +45,549,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +144,912,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +119,427,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +728,1,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +120,540,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +657,940,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +409,644,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +881,821,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +113,560,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +831,309,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +129,1000,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +76,945,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +260,931,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +882,504,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +157,950,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +443,278,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +111,225,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +497,6,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +321,124,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +194,206,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +684,320,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +634,270,U.S.A.,EAST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +622,278,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +689,447,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +120,170,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +374,87,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +926,384,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +687,574,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +600,585,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +779,947,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +223,984,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +628,189,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +326,364,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +836,49,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +361,851,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +444,643,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +501,143,U.S.A.,EAST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +743,763,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +861,987,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +203,264,U.S.A.,EAST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +762,439,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +705,750,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +153,37,U.S.A.,EAST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +436,95,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +428,79,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +804,832,U.S.A.,EAST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +805,649,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +860,838,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +104,439,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +434,207,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +912,804,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +571,875,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +267,473,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +415,845,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +261,91,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +746,630,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +30,185,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +662,317,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +916,88,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +415,607,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +514,35,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +756,680,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +461,78,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +460,117,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +305,440,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +198,652,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +234,249,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +638,658,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +88,563,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +751,737,U.S.A.,EAST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +816,789,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +437,988,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +715,220,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +780,946,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +245,986,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +201,129,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +815,433,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +865,492,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +634,306,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +901,154,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +789,206,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +882,81,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +953,882,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +862,848,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +628,664,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +765,389,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +741,182,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +61,505,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +470,861,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +869,263,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +650,400,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +750,556,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +602,497,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +54,181,U.S.A.,EAST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +384,619,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +161,332,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +977,669,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +615,487,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +783,994,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +977,331,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +375,739,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +298,665,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +104,921,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +713,862,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +556,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +323,517,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +391,352,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +593,166,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +906,859,U.S.A.,EAST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +130,571,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +613,976,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +58,466,U.S.A.,EAST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +314,79,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +67,864,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +654,623,U.S.A.,EAST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +312,170,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +349,662,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +415,763,U.S.A.,EAST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 +404,896,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-01-01 +22,973,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-02-01 +744,161,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1993,1993-03-01 +804,934,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-04-01 +101,697,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-05-01 +293,116,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1993,1993-06-01 +266,84,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-07-01 +372,604,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-08-01 +38,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1993,1993-09-01 +385,783,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-10-01 +262,335,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-11-01 +961,321,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1993,1993-12-01 +831,177,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-01-01 +579,371,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-02-01 +301,583,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,1,1994,1994-03-01 +693,364,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-04-01 +895,343,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-05-01 +320,854,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,2,1994,1994-06-01 +284,691,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-07-01 +362,387,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-08-01 +132,298,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,3,1994,1994-09-01 +42,635,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-10-01 +118,81,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-11-01 +42,375,U.S.A.,WEST,EDUCATION,FURNITURE,SOFA,4,1994,1994-12-01 +18,846,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-01-01 +512,933,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-02-01 +337,237,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1993,1993-03-01 +167,964,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-04-01 +749,382,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-05-01 +890,610,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1993,1993-06-01 +910,148,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-07-01 +403,837,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-08-01 +403,85,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1993,1993-09-01 +661,425,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-10-01 +485,633,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-11-01 +789,515,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1993,1993-12-01 +415,512,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-01-01 +418,156,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-02-01 +163,464,U.S.A.,WEST,EDUCATION,FURNITURE,BED,1,1994,1994-03-01 +298,813,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-04-01 +584,455,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-05-01 +797,366,U.S.A.,WEST,EDUCATION,FURNITURE,BED,2,1994,1994-06-01 +767,734,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-07-01 +984,451,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-08-01 +388,134,U.S.A.,WEST,EDUCATION,FURNITURE,BED,3,1994,1994-09-01 +924,547,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-10-01 +566,802,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-11-01 +390,61,U.S.A.,WEST,EDUCATION,FURNITURE,BED,4,1994,1994-12-01 +608,556,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-01-01 +840,202,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-02-01 +112,964,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1993,1993-03-01 +288,112,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-04-01 +408,445,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-05-01 +876,884,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1993,1993-06-01 +224,348,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-07-01 +133,564,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-08-01 +662,568,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1993,1993-09-01 +68,882,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-10-01 +626,542,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-11-01 +678,119,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1993,1993-12-01 +361,248,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-01-01 +464,868,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-02-01 +681,841,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,1,1994,1994-03-01 +377,484,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-04-01 +222,986,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-05-01 +972,39,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,2,1994,1994-06-01 +56,930,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-07-01 +695,252,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-08-01 +908,794,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,3,1994,1994-09-01 +328,658,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-10-01 +891,139,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-11-01 +265,331,U.S.A.,WEST,EDUCATION,OFFICE,TABLE,4,1994,1994-12-01 +251,261,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-01-01 +783,122,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-02-01 +425,296,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1993,1993-03-01 +859,391,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-04-01 +314,75,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-05-01 +153,731,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1993,1993-06-01 +955,883,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-07-01 +654,707,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-08-01 +693,97,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1993,1993-09-01 +757,390,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-10-01 +221,237,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-11-01 +942,496,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1993,1993-12-01 +31,814,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-01-01 +540,765,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-02-01 +352,308,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,1,1994,1994-03-01 +904,327,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-04-01 +436,266,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-05-01 +281,699,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,2,1994,1994-06-01 +801,599,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-07-01 +273,950,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-08-01 +716,117,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,3,1994,1994-09-01 +902,632,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-10-01 +341,35,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-11-01 +155,562,U.S.A.,WEST,EDUCATION,OFFICE,CHAIR,4,1994,1994-12-01 +796,144,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-01-01 +257,142,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-02-01 +611,273,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1993,1993-03-01 +6,915,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-04-01 +125,920,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-05-01 +745,294,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1993,1993-06-01 +437,681,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-07-01 +906,86,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-08-01 +844,764,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1993,1993-09-01 +413,269,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-10-01 +869,138,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-11-01 +403,834,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1993,1993-12-01 +137,112,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-01-01 +922,921,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-02-01 +202,859,U.S.A.,WEST,EDUCATION,OFFICE,DESK,1,1994,1994-03-01 +955,442,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-04-01 +781,593,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-05-01 +12,346,U.S.A.,WEST,EDUCATION,OFFICE,DESK,2,1994,1994-06-01 +931,312,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-07-01 +95,690,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-08-01 +795,344,U.S.A.,WEST,EDUCATION,OFFICE,DESK,3,1994,1994-09-01 +542,784,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-10-01 +935,639,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-11-01 +269,726,U.S.A.,WEST,EDUCATION,OFFICE,DESK,4,1994,1994-12-01 +197,596,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-01-01 +828,263,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-02-01 +461,194,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1993,1993-03-01 +35,895,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-04-01 +88,502,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-05-01 +832,342,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1993,1993-06-01 +900,421,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-07-01 +368,901,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-08-01 +201,474,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1993,1993-09-01 +758,571,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-10-01 +504,511,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-11-01 +864,379,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1993,1993-12-01 +574,68,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-01-01 +61,210,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-02-01 +565,478,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,1,1994,1994-03-01 +475,296,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-04-01 +44,664,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-05-01 +145,880,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,2,1994,1994-06-01 +813,607,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-07-01 +703,97,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-08-01 +757,908,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,3,1994,1994-09-01 +96,152,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-10-01 +860,622,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-11-01 +750,309,U.S.A.,WEST,CONSUMER,FURNITURE,SOFA,4,1994,1994-12-01 +585,912,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-01-01 +127,429,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-02-01 +669,580,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1993,1993-03-01 +708,179,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-04-01 +830,119,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-05-01 +550,369,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1993,1993-06-01 +762,882,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-07-01 +468,727,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-08-01 +151,823,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1993,1993-09-01 +103,783,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-10-01 +876,884,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-11-01 +881,891,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1993,1993-12-01 +116,909,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-01-01 +677,765,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-02-01 +477,180,U.S.A.,WEST,CONSUMER,FURNITURE,BED,1,1994,1994-03-01 +154,712,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-04-01 +331,175,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-05-01 +784,869,U.S.A.,WEST,CONSUMER,FURNITURE,BED,2,1994,1994-06-01 +563,820,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-07-01 +229,554,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-08-01 +451,126,U.S.A.,WEST,CONSUMER,FURNITURE,BED,3,1994,1994-09-01 +974,760,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-10-01 +484,446,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-11-01 +69,254,U.S.A.,WEST,CONSUMER,FURNITURE,BED,4,1994,1994-12-01 +755,516,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-01-01 +331,779,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-02-01 +482,987,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1993,1993-03-01 +632,318,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-04-01 +750,427,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-05-01 +618,86,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1993,1993-06-01 +935,553,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-07-01 +716,315,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-08-01 +205,328,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1993,1993-09-01 +215,521,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-10-01 +871,156,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-11-01 +552,841,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1993,1993-12-01 +619,623,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-01-01 +701,849,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-02-01 +104,438,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,1,1994,1994-03-01 +114,719,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-04-01 +854,906,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-05-01 +563,267,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,2,1994,1994-06-01 +73,542,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-07-01 +427,552,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-08-01 +348,428,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,3,1994,1994-09-01 +148,158,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-10-01 +895,379,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-11-01 +394,142,U.S.A.,WEST,CONSUMER,OFFICE,TABLE,4,1994,1994-12-01 +792,588,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-01-01 +175,506,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-02-01 +208,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1993,1993-03-01 +354,132,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-04-01 +163,652,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-05-01 +336,723,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1993,1993-06-01 +804,682,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-07-01 +863,382,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-08-01 +326,125,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1993,1993-09-01 +568,321,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-10-01 +691,922,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-11-01 +152,884,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1993,1993-12-01 +565,38,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-01-01 +38,194,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-02-01 +185,996,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,1,1994,1994-03-01 +318,532,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-04-01 +960,391,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-05-01 +122,104,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,2,1994,1994-06-01 +400,22,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-07-01 +301,650,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-08-01 +909,143,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,3,1994,1994-09-01 +433,999,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-10-01 +508,415,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-11-01 +648,350,U.S.A.,WEST,CONSUMER,OFFICE,CHAIR,4,1994,1994-12-01 +793,342,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-01-01 +129,215,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-02-01 +481,52,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1993,1993-03-01 +406,292,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-04-01 +512,862,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-05-01 +668,309,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1993,1993-06-01 +551,886,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-07-01 +124,172,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-08-01 +655,912,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1993,1993-09-01 +523,666,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-10-01 +739,656,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-11-01 +87,145,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1993,1993-12-01 +890,664,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-01-01 +665,639,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-02-01 +329,707,U.S.A.,WEST,CONSUMER,OFFICE,DESK,1,1994,1994-03-01 +417,891,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-04-01 +828,466,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-05-01 +298,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,2,1994,1994-06-01 +356,451,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-07-01 +909,874,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-08-01 +251,805,U.S.A.,WEST,CONSUMER,OFFICE,DESK,3,1994,1994-09-01 +526,426,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-10-01 +652,932,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-11-01 +573,581,U.S.A.,WEST,CONSUMER,OFFICE,DESK,4,1994,1994-12-01 diff --git a/pandas/tests/io/sas/data/productsales.sas7bdat b/pandas/tests/io/sas/data/productsales.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..6f18c5a048115634347d0994b4879d5417beff29 GIT binary patch literal 148480 zcmeIbTacv5b>Ei*V#O8>Sq%}67~8O20U_u%%|J8&UPcs6e1o~5m)J{VEh}zj5 zY%j38C@mKBP!{QrO^R%RAlaet!-PX&l`ghhB4xKFJxqk8h|-l43dw4{C|YmzAlYNt z;pi9JBU$3Qs`CHO?>p&}ee!%|N9bWXVyAlN^6OLQ%RKqdb53T3(0%=lfA$OGzx1IG z{)4~v$2Y?7{_T%{=kkBB`N5BTBz*Fdm)`e@_ou)1U6;h~fAZ4r|77@Z2%Fz^iGStx z)!U<+&%bbE_ty2>S6{d>8v1qp>Yc00f4k)F{@Rbc{i)yj$5;8f`dOpDH%qTe14kkJ zzSVEUU;eXizT;DE#=7<00-oL^Bmj5`t*~RyN=JbF6 z)bjU)ZOh+y_VvsI|C)Q?Ph;#Z&D>f3^1j{G92))mAvJB#msaQXe>Yt;MxfBUz)-+$Em z{+D{+@Akg`aPRvkm%D%amwVs0+4swz3A00Z@||Y+|KaG{@im;U=5P4upN#+itH0e` zyZX}A>sOc2=f>6Bcb2b<@2=mycJ!&)>Ow>&EEz%QshlYc%?={^aye z{pmlm#D*)s5jjWi#Mg}9{+C1ezw_P_9lw6DS-nnl``hCoe4v*vo4?eo{_nVruXu#X zKabYrx8QH%HRu2Kn*8anMZ3p8uiD@Kqc!;>_<#SG!QY6l7(eYlU6bE_De|Vjpz_22 zF@(SK{7-!Lg%`u}YyXc%?(EI@8Z!Ie|9HUoKmK!n{x^T?FD&ul5YKf^|8K^hL;rm5 z5dQnV>;2FDuJ2y)1N?sl^EgO;`29op_g;PN{#RdL@i%`a+B==Z*PQ=}A^gAbH~-e( z{yV?D;?I6Ea%ZoDkNC3{<4=2a2!BP6zl>o2_!X5OKDQ>n`C{abzOM7XvL=5AVKBMApX!l|9A-h zfBK)F{4f9Pe_bIDG43D!db9ffod0A9Kay6zwwSj&2TlCl;ri#(H?BuW9vue1NG zS-qYxeoDSa|ET=ttC2t9^Ue4U{iE`yn78w6{!$-0^pDCvdNG(bWpF%$$ar|d| zhyLmKmzG~~{4g#|e-{23u75iI$}{nolW#@t6X;y-zZm}=`bXtIMO<#*Q0)&6{nO{q ze=pv{8RqXP)_KMUt}hP#)8~V}NK_#{PybWckN7~oL;rOAk8N*n<0ouj+??q4d-PAo zzxnb@clZvVpW7YSkNCjn>(D(Dh?SIkL14_-^63s z{qT>0kNHdgIP{O?M||M^1dU0>G zdwa!i5f?^ZgMXxc2xk80<(oI3zqU$j-?{o}WVkPd7jE3a2!;La^D5ugKN^2!dHjX( zc|O+prv9<`h!5>&z(;(j);}8mQuhX)U|gPL@rQgf|D*9cdK}tYgML2a z`Y!pl{xSG1__Hj2a(tTl$KY=xKeNg8(W`$9{*>IC@W15S`5%LyrrHO?i}&CD*66kS z-+c9*Hzx1C@r}1XhTUNPviHL6TYr7=@4owYr+>Wq=6C+@_flT_yW}6G|GRwYyBFVY zUV09{)BiW`Pk&4M=KYuW=ly@Pq5akLdr!mir!Bww|K@w*Z~ec1eG-;d&0YWIboFok zR;sDg{G7kM`ZvE7XnW1S`4g*u^IwGD1*>cR%`dI~%^y+U&j&(Wo^vO>x%vrT3Y4$? zpHuobFRcDe)A``N_ul)#@MDXr(4wZa`0snd?^)DqI5UPy!YO}T>Sby;a|PC zs8EOBdGDX6vGXq%|Gr)RV|=ar^!`gLfARaR#hwSN-_x*0~`{l-k zoP|(H-;a9VFTP!UElhcDkN*>{!2fsP(|7Uu9sK^E#WVe@cs;|PpYs`D3~!|IzeNA5 z#m^XZOX`R7`F8!q+c)+WZ{X&)-g*0tcfa+{{n0mHdGr1^N3Xr|?kiWX-MM@9g%Ga3 z`tG-0d2@7o`3Kiue&L0yw-#H2TQ{yhf9+1#edqpbZ@l{M{nz3j2mblBm+!uG=hhd) zwQqdu?RVe#+3?Y8JIj9-Zr%9IqA<32z5n_m3WW6g*Ps9V^S2j&a{Y~;dE=XlKjGhB zl;f}8`Qq-4-FLq6+P7YPcl7ShPVV>r?C#2j)t@ES=k8v;b?3&d@VRfj^3J>W-w9v5 zarM?0@4xcSi;JDX&ccQlzw!3F`^(ptUwY~4i#HbE;cGECmcClX0?uvz(M7Kxz+YaD zt);7%^N6oY|1ZB^GWojrt<~=>Bf|3cmao5q-&>-Vzqfo{guFCv7yrBZ`WMRY{d48_ z`1k++eEmNcUz~aW*W&AZ@x%Tt{J;-xeI|t43 zx5924U-LYQnddNc#OW_aZYd98<~a=h3F?rCe-!qI_*$Lk(Dwl>~lKb%ySt0_G;u#;U~^t?qeKzj>R(n{H>M9=#Br& z$U~oI@qzn1v(KfoU*(@;{$Anym3bPw&&A;L`93~?{}CVPAG6M5@E>FUxKZFc@*Jvv zwtp_#Jx0Em@rV95>pX`2TZj)SkBIm~z9Y|}+Rt_60iUnr+jSnpe&+dD&kmCB$#bao z&k=WKKZWsw_(1g~IG1Dgw{iE{Fem&a1_bYh58Q-k)82;Zt zd~UKlF8yQnxfuKa-Ehu2A?7dn4*jG0XBYX^GwKJ)x9dEH{i9c+--^p9%) z_^rsDv5!FN6OaB;`6KjK46gGS{@F&{Z(oCdr2TfC$KZ#zBX{%we5oHib)M@tZhtoB zSbF;B$wA~cUsCy=I*-byz75oe(teNrQTb!Uw`0~hhwC4ee}=g8^cUctjPKAtDxZB) zSF-rV_1B?)RQ@*NL(4h<<}djU{iE`^K91jjeRHJsqw=Tl`|Jzx=Wu@nzDNJ4{1M{MqgLg6^pDC9@XP#l@TGq6=pU88 zgZ$)-`cv{f`5%?f{q+^*_cFdC|D*E5hoWD&kKz1f{4C^uuD$%y?Yl4DSfqQjb52YB zgYi$@Hvby@pXrlA{->HxeUir?uD^vomuh|s{SylMq>%rq=AR($l=_GZ`lp&7?nl4R z(XUgP|EcGXBL4~3F{w{V=bSF)hwf_(-FAg>RPsyur<%XZb@%(?`(S+I_$l=HR`X|w zpRF(dQ_T;se=>pnh!32z2T{Bx{ZY5r#Xg8s4iss7($+-&jrN_|q&KNf#k|6dKF zZN%jX=dt9M^pDNQ5S<=GzaMgck?{-ppNsjd|1W;VCid5x&{>?nTz?Dt=VE@`rLNw= z1bskh{M6~6i}|tszv8pMeRezgDYL&+|GJpJ>_K?>`ijp!wIjwi#0NwFSo{@x`O7)x z)hYF%e--^TcC7^gFSLH|_qXQ+G4so$hNDdm4Oe%D7FYyYeFumRnc=C9XCJ?_{#O0|b{JPb+`y#x%Jt2mzxvN7y}ifK?>l%-j8Eh{ z^jF94<^LXGU3)I~Xxr|@RKULemiTY;KKw8BODX?U?Voe#-XrQq$uH%fs`)eat#N(K_zwN0`ezEiT;V*B`pDK_ zU45@RXVqIDdA{li=Yizg`pe*-A}_SVIGyp${FA}wzIBWF70h3*FQ)!6_~SN?OP)7z z>mP$3u{xSGh@O_|w;;zt44kss2{|e0S;p0^iI(8TRiSMEk=QUZp;<^^d_n!v1m> z``BEc6waAf`-k_V9O?S4bk4k*-$MWMJ}AZq?(a?gWB7;nhdmsse+>Q+;_`^=mskH7 z{FB2tZXR+RVg6D-6zXr){-2TCV0=oxt$z&r+k25)Iwx=HAA>)AEAk)F|588L`p4k6 z(4TYsoGia(>mP&9`;#`=A1&=K^&eF~-%Z5-(=7kZ_1&z$b@r?MS<|m#+#rrJ{&0OX z>u(01{oOPEoZ3bqMSK{)ig_?x z|LFE(-#F*~Lh?QON96~MpZ1HY{dWGx@DKZQ^ZGy6_rg7cT71*($N7)&ChV8`%Fh27 z_BU@tZp-{k#y9gn2A_HRG{2;NXdU`T^-r>&`ZnX6`5(jn@B`65JV(U%Nqti2KdOGd zvvK4XI{$?3>kx5srsroo`bXu@?nnFQ)F)D3mHLk^_D^SiIK}=qtuInP zc=V5I|7IKQ57+Q2`5yhF@>@LD25~yqS7!c48=t*<6ZY|ZM_GK}`tHy_U4K(N|G#<) z=h%0RviQLD#i4&BpU?Qmqu(DhKQLVXbo^fZ|0(t_kEq`eALt*4{^|I=`u{%n*YtnJ zcj%vv->d)gTw#9xgZ?+`U)}iW_`Ul7{&z6$KZe&#KREPHpU|2^FzR^Dp{nO_YAI?$#dc^#g)F+<&PoJOa{}aTA6Y5*Z_voKKKiSW`z&_W< zjPJ<*sC=IL-eVk<`oWX`>GLn8H-Pwah2ux^J^H85Pxy*&)~)~lw-?p_!a4U|V#FzR-*5AHxqwaGqdgJpP>+>e_E0Qnk zZv*&8*mtM?vy3n5Zv*(Jh&v_zw^t=G@;%zO26u;5V-zK4X``&id^ zQhyEL&nMCTIs2$C(q9Ak=ggaZ0^>*OBdNcJ@HuX-t9+@y2Jk1SukLbvk@m~_+W`JK z#wX9|XZahMe;UAlbO^uvC|;#LX@&k7z-NCg?`zNOm-=T2ANA)Q?msYpRsA!7|DcV2 znIVsSq5c`b4_}J>)PJAZFZ9mYo98*8iJ(s{K;` z4B(Hy7P*`3Uz7U5qkq)?rht1d<~rY_e^mZ1=IweRt>|)jvFce$4$Z;sf{|{iE`q z!0(#{`$hg~h<{#=&-V()kJJw`|1^Y;eRay;NWMq^sQ$Tv`L#{|XMB-=8el(l=k7uD zQ0z{= z_joWqaQzkiNA>)4{-627IrBi7J`w%b9zMnp^V%ouv(NaV{#DEG)&H5-NaypUez5hA zHhy~bfA)vw=RfFwQ~zlEUj6?eo^N0un6%&4KN`PR|KG#DWqdD=BgTj7{EyDZ(+#0T z-D1xER>TMTN8Ep1`+VcQ|Er%d;`)L-L2kd9|1s=;+(y4Nx8WbDAME^(&QJA!p1*9U z(=e+>Q@>(*WtAGp3a^iRKk>Js0;(}U>uNtPex`Xc&|E*?L9 zKJhiL>(>AOn~UoIv#-SWH_Q4P`2NiLTlaj__qT!Zo9Y{gAACLz{iX7od(nQbL)>4I zZ|0vg`+N1zW7I!K)JM{OkN)cH@9`VN*E9Hu@q_+1^G}BTQ{1b_eNM)=^G^o<81cKw z_J6eBp}$oBr1}!~uTsBw^q0yXVSjc)`z7DbKN1Mh{5<-0oCj;@ zFV#QcHN=OzD&NjO8TQZjBe(gS&Uff9)&4QYXF8vZ`OEcR^dHs6@80~~M%+x#H{(0> zmumkU`@eHMXT~?`Bhmjo!2SpD&u*cATJ#?c;Lo_P|2UrSMtr6J9r{c4PYYf5g#M9y zJO5;i&oGJhru_}(Fa7^_`=8T3z(3^fBM!OiZ-)J6hzrN->o3*+%zNkiC$7H^{iXId zvA>U>ch306^~IsTRQ~Zne7@V<-we+`sr-G+tCYW#`oWWbQu)01=#29><2&?^%6|a= zOmx0S|EPS{<+ls@8;AZ;`3=s8@E*H0^pDCPBQHc9o$>Adqi+7HeCn%R#!;ysJo-oF zZzKLssQ)G3qkmNXgnfg5FwQf^H^v8t{!#h!dy)H?`by?+?E0JG|5J?rG4peh@6kW1 z{o~i8{b%?&jBi|D9QsG)^K&)Q=f!6F#G!vwe*03ie}wmu=@YyE$nZb=il@xKO8Y(e zC)NHb{GRk%rXL*oN98}@yltbO7#{}cAC=E@buIOIX1^oaH|HY-Q_*T$A-TA}P{o6P1hUFLQxB2;4 zL;qCs+1HWI|J3Q9YW_ZS=eAzID(RnUKK1`L^Otq{$L6E{xdZ#RIFBX2q<^aUk1@Xj z^*QD*pKn3`RP)34#kj+JtQmheJ`44?YW@uKmgh7weo6mS^Un^W{j38q{?I>#{_AQ! z`_fbXs!sn@^8@m4t*`!8&FAN%HQD}!>r0{jR?VNj9sM7E2(NYer9e0 zi}}6!|2Fr(4aTR`4<7x~`KMR^=eezvXU+J9{LjVq_v-&G>RyHOa~}QE+25=G&oO_e z_uwDI2d?i9{nO{;>4w1j!6&TmNPemQb+LbX_5T_6z4LW{DVz8=DVf#+MKekt`oRr~J&o_ji< zlkp4nw~P6``X0w^>W`3okN)cX)8hyBl^~BsJxP7x&|fP5kn!~Z&v&@~Qu#CP>vevq z{#Nb(r0d9+`lZzWRLy67sbzn#HbW~xAm975756Uf0^+M{YRbu4Su@=zhwI}=06JkN7ej*amhY9=D*1| z^_OA)6YS60m+?Ljf4Ke^&i_~2zl;1tDSut-lQWx8avr3qNK2LjA4Ue%>#4NdHKEWa}@( z{_q9(AAPpmpK*OG^nX{|KiiFb)_=`x|6u;|`I`F2;Pdk%rtI&Rd^`VS@JG-;GtOVhxAl*~-^6o0 zr+&u#rGE=9zlnK!!tsyz zL;pDRkILu%@jQzU+k5-b-qLYsQf^?sn3V&AC=F(uT;O1d^`VS`2Q*L zCu#l7_zwM}+JA)joccVZe(>lYmA?)CHrL;b@6bPzkNW>M>M||wN0WSy{!#h7Kd5DW zBhwEK{iE`EZum6oujKk(=)cz1UzOjI`#k1XX1`f~D~(V1{~U3rJ&^Nvfc{bKXWg9l zNUWiMRQ?F{^AX}u&NuTvhJTKsn+u;`=Fva0zX|@og7|R4Jmqlxqw*ge;<>(w*NpGb zKPo@`ROB9I`y;Nu4*jF@*?+-vENkc=mCt*nx0#9tOrA z`rn~{`utS?=egJi>|c=j!IS^#^V9kNhww|PKTEzx|42TD=mY5Htv1GqHS#|lzgPcn zk>5Vy{!!`&kN)ZSz54&&LA3jn`XS>x^iRj{)&HlooBIpN_voLF->d(Rxv${4vb!{TIilb=$}5H_|RUB&o}@4Yw{iWpFTg; z|L4#pyeE_Kf%?$wKkD>JpP%gC!}y_2$@vcbqw+_;5&d(>{hibgOa1ei8@FD(`qCE{ zljg?Ny8Ev~xWqTGi}L${Lc_R@injO*8l(PMfHE?x7f$d_{aVILVvCG{@PdK z^BME}0OAMNpN0Nfi_hoNz7cBuTKpE=r+mIrA1(FQTKtF9N04bE&c}b_i3Lg^^r$^sr^m7|Ds`?K=M8MOXbh7?>J@sC*wQxkIHYD z_xNcUAGp3a^pDCv#`8`0*JS!?(SKC?d{zDoai8~?GJaAYI`ogq--3Vg&u=8(p?_5V zBaE9(=BcGV@#r6w&-0n7Pf_wc`bXu@5r0ztOzH=Z{!#hum*TigpFbt}%ly+?qui+imKTjPKAts{PGR#^=jA zJ?Agi7l-~)`P3)v=i<-F`40V~^4b45=K45X|ET<xlY6+V9an zs{gtE7S89}`lrjEsr}c~&FxM2C(|cp{>k9a;rHEE=bQS+;D>|AoxKKrrcX@$WAL9q zKO8eZknv6ZQ{tokxq<7L|A;@}+xo}g zw|Kr+n17XgTmKk*p8q`L_>}s=);|{iYtb)l79Z#zQ~wxzo||}F;G6o#;O`=SZnJ(O z^@FW{4E`y&yr(qNSGN8!_!}5EyazhxoBGG#r@CR*pUCys)ISEF{V%DGh4@3hsecSU z`!A?N8GpdH^FIdv1pQNIn8%Dii zDe68G)-R-fu=S6@Z_rnEggSAapE321!RNR+VSZTJZ|fhOpX&b&_tmst>JyLt>H4R7 z=l|!3KNISI$+z=Ao&A!JAq_@74b|u|G@q z$4dJ>`b*_E&=u(%1me>G{iX9!ADh!Zl5f}FwE5BV&kT8|mi?)c@6lhX{Q>%PhCFqq zk39NI<+Fd6pEH;7?fjGBpVW7ObtvbX^*4hbP`BD<9arijkN)cVt9sA3c`MGVl;4$n zkN#5mbFBLW{tl^+mY@H&7N7lj(|y%` zyZ@*gKdSu?VgDS@IoB@^{iX8RpBJ8o{fH0rzuA9e*xzD*FdwUYPyR`@|1$H_%x_73 zAC>-JM@pr=ealb0c852q<^~ejmG`kyx(CH&pG3l^iMUv;kZ1M z@yF0V)qH-wbb~w#_eWfR3;L&;-(X)qW&RBDf&7C0spf|tkAC6jux0j_^iMT^5BBq( zNX8%fzo37r`2o6r_LF$Nb^528KgIriuT}Xa{Zq|9#JHUE`AYpz(m&Pw#~A;ctPe z-)U}P{$~7w{;B5AUjg?m86UX*7S6wEeA(ZG@z47!*$1D|h5S#o z{mhF!X8i&4m;Q0+AJzW$TJ$^n$TNLX%Kuc`pZbG=fXU-CWrN9FVWg`^Mh9O$1y{q5rC+qoxfBQNm(f&NkL zpFx+*kw<6zu(|dbMg4;)&F@f>?ZZO)F-9$XBYE(_5bFL=>OFJ zF8Ln))A^@Y|DPfMv&sBwrXL*or_aaJ4T1NM&wTyY7yGAo|9^Wg`iJ+-Fn)6UIP{Nd ze}nj$?3en%qksDRRR4c`5baOrMyN3w4E_=9Z@Io@`p(QhY5ZROGeEbq4>q&k)?Ws{ZJN~(*kb(7^oyy#3_f+~ zob_SJFZDll^V{H`;(n*CgZMre|2TdM{ZG~W4e0hOXOV8 zasF28FT?(TdE0V-FZGeFzYIP=4zf^y# ze!jeaK71DAQ}Rpow`zWa{ObnyA5y>A`pfXoF5>bL>%)?7>o0@P@iS$9I^&!A%iup| z-jeH!)JGourS@mBua@T&a(z-b|E4`(mCwE!)`4=qL;vV}?4O!fF+MYW;?O@TKTIMw zKYzmY%b|Z%{t@o)7-#2axV{(qzpMSfaS-hvy$Szd{*v#|KdSu`jL+t_%J=9WmCyZa zivOAY4*jF@Pughr^Z@o_{?b2Y{;8Y4DxdfNpR&Fo`F8!y;2**6$)Re$NB^kyQ-4lq zztj&N{iE{3_eZ;Dygx}9|e^fp{A9%+6L#7`b`bXu@ksq6LeWafwKCQ z-G9`@2c7@5IBu9n%=jMtqw;s*_Y>y#GJR#%-wgZDvF|!(e3SabqkmNU!w+Cy?cr7O zJ^DxGPrnLo7T>u3n)NrsKdj#-{g3%OApfJ<&$!>dh4+#19r{P*kM<+?kmE=4J^3G% z|Ku?8+a1`C_(1i#juPiDVae>3>3JDu#6XUnBK{)IVRiQG5S(s{bDz zG%IiIG0!CVQvZ1Pcn0U-ZWii$LjTnAd-eZxu1(LM$n$5l{9gT^{qIMN|A-INS3>{P z@_Y6FQ`FViC&c|7*LRWsspa?T{~MTBsec6Xmwcgr>iKxOA@Kh0qilaf|A_Nv_5A1f z1Gw+n&GvV+U+ABDKJj52{+Zot;_pfQAoWi@Kh^(ves`3`H~L4^zv}tP{!{p6gghwY z8~H;24B+!z{vrJ@^@-F!_54eG0}b@yh&n~`rT(es6JPVXZvFqyT~z;XuUC#EG)wg zItTYp^uN$w1NgJAM}D&(Uo*bUKMmjq^mm-soc|xdKSkWG>i-_V=lxXad~&9bME+?2 z{}AJ{&Eg-&r^r7I;2$5v=bFxMOMT?gUtNDyZ~X7Df3cvCME&hz`&B;2V_H9@{WAYF zfPaR()|Bg8rjJDaX#k)5*K_7qGQLB9ss2g#YvDOFeo|jJ^q0y%Lw;+FILhZizC(Yh z{04Qsr_}EoalCT`{SN)5@|mA*-;O^g(=Q(VrSb#(Q#zmT&|fN_ z=c+f+C!g6b>u&?bf8hFpb1s>F@aQkq{wda#bpCL-{!;n7%p2Up^Ue4U{iE^&`kO=g zia&?(-=Tk0KJ)eyJwGJ-kA^&7tiRh6_-DBOQSCp$xXjPT4$wa;pLs3z$umCC|04f1 zz&~@0|2h4G_(uCh{%HW8`AdGz73VMcBL6gmkNr*RPZ_R%RR7O#{~POAnf>zq$pQ9L z-yY%TF#d3T5$E4DewY92`oDYnXB+F*Jlp?qeKhrt!4H2R&a2Yr>zer|gWtXo`BnE% z4)D+MuSfojmP&9d)s*qk@11g*VI1-e;fYUZ4p1c`p4iiZl?Y? z$(QHf20Y&u{V7}2Z-@`{kLW)dz<-3i@fLJC;{)ffsecUrv{>KAS^on0X8y_G&sZ0G z4bNBV2V4Ib{0ZzorT-=0&OaG^=12KCkc^*PUrhaD@Yy$#&NoQ?Ap4I7jGsg9ORvKJ z8Q;wRlwThAH#m?+cD>t)F*cS$KVI}eM(en_5Xd$t4-DihwGn?->d)ge%n*>F@L$fJM>SV z&&L~{U|ywrR3zV{fBO8c{(l(#&V4ZB8|^pqKicz6)`!p{KV10y4%vTQKmJqwe|Lg$ z^C`Rz*FSy#q<3(P_%?k@=bQB}!#@Fe=Tp`nhwGnyf72zt0gjt=enRr?{-e(RKA-rS z*LCav#~0QAc`p2r`XYOO4*jLRzo*c5bHs7RKdwJ!{mt+n^WLd{TIwTt|I)?tqqD!4 z|C?d|K^>OaFZ;g-@F$pm>3#&nrvds)wSUaG%K0Jr9{r{A_pmSHK9=#1_B-^K%4gp4 zocmX)kL>!JHhy}~mwg?xLVe7Ue^Tw|z14fvr_z3p{!;myxc_{sMSPX|#iPGeK0kkE zAM4y2`b*{WTp8S)_pFv-YQ77a6 ziT-iqpHzN$HQJxugQ z{;B3~U|k=*4!+bUZ6W_u&F8rpo|9sHp#CiMA64^DP*-ZHpELUl`lp)D{&L;}$N0nX zS*X9&^RX{W`XSRN1^rXa-^KVj<~oq^3-z~ZetR#Do5$2wQa_aRPc{D-`GdL6FV)|w z`414+8t#vz{U!ZV%@2rco4g+Z^OyRhkbkP?Zz8_2j$WyMs`)4AKWfG@{&4;Mwf^U) zbo#KGAHEd1`_ylkzvLJ6Pdy*cmE(x-H6lf64dg zAC;f_;|hF-{!#fO?CVl5Hh{ZPvP zRP*`yz$fhAm;6%wt(xD!FJtP5jPKAt>hnEx)DGsP;3j$2i3JKzJXqW-zb z=bY&WhyLmO6S^;+-)d2ZW_+W69QsG)^K;R7k9)>1<$o^rPiKB$-^C&G517BOzf^y# z=AR%gT%rDye0l!=VtuRnhv#A*9W?P>W&A?^$Fe`w|Buk0*uDt9)F-z7>GJbk|8)qd z{?B`ZP7YPRt$#}V=#9YhPv_jfOZ!Xb&pQ8T{9gS(?c3QW>eW9QzgPcnpsU)U`5%qn ztN*iqgr76a_(OeC=s&vHKfU@t&sA?RzGeEs)IXa2c(%(I_V0wPCjPGE+xkc6KNn#k zj8RvczN+$V{iE}V4=2>k9|d3P2V4K>{8aznVch5Vmwa3Q==@|qbve($a{onrZt5R{ ze}edP#PK8bgROsb{w2Nv_U{~_&Q@9fvc^C0HLvT||Nr}o>i=g4&B}%u^Fz{qcK%8C zUwU6h$lvUe@6A6M{D8brp+Bi~{_W!D)2k0R_}s>n-<9^;`b)FF$KS>NnD=Do`o+{= z2LB55^_cZl#J_6&W$+)NzoY$X97nnRX8y_GkFbBb!u^fZFShW$CP#^;*!MW&Am{omDm_7?=qv&?>v z{!;C44x{}~@jf#BV&|Va|Eqk~_0I8ga=w{=GWgurpK|^p{tVDxs{P~r=${SlZxA2I zcjzybKYb13XB=M{A2>eE`kUdO*;|o&%=}7bzgd4X_(zBfo7w!Of6V%u!RNVw8S{(7 z^_S{@p1&Gn9bQ9!seJaYvk#2(m+Q+_t3G4+hv#}z|7fO93jN>J@uz`)4D64~_-6je zu%C5R*6A4^=pQrxWbk<}!a3J3#0T=t{FBAUzMw@NE3@CDf7J2A`s6wL?=XL1e{kp@ zl|My(i*b(ehyF41PlkVZ|44p+9qo7MAJzWmjTqljf3wsN9{r>8!)?T8<|icIqkmNX zF7{0$UBB7&H^cv|%cuOM)F+<&liJ@Dej@s3%Jo#x~=Wbk*O8@Lb8`40V~+TZL(|8O2;e7pW;*#7{!VT1ZG(+>{)quS5- zpZXLf-;;k*`5ga&_n%9?C;z1K_wjrWv-&*8r$hg!`~dxy;`4C*qw{|>j-UMVCprEd z`6rdn&n-yzFG&61$v>%l_NDK!J|Ova{mmHvbFA--6O2Dxza08UwSROk`ai!vmg}!W z|LA;-%PHb0;{*8){UiCPZ_n;V`_ugkQeS!WkIH9Vc$@QUxc*W3j}X_6@SNAsKPsPn zO^3|CX7)SukIJ8-uOsDOr9ScKAC-RyeRzyl?vFTr%>JX&{KEL*bLBnaIp2~0QSB%1 z0rGI1zl?8-{LkHw-Tv6sk1cCPH?G#*zrD==z<&0{v5uJWm-=Td{?=ifU(F=G=KO{J zS&Kh~er`SoKISj|v(P_l@wqOJ@9F%7{#lF9d%2FOPo(`z{j(N-5A&9F8pa2%?+g9o z=ZEg=A)fCM&*30G(Ep44&szM;j1R1nN&Zs*ti|6(U*#F|V^W_i>u+oE&k&bc2jzUG ze-{1MYw^Pu;h1- z6Z4nrze9hie6GvmufzU~zsNt;u5T*;4Ekt~<5TiI`b*`XBk!?`IKlbL=j+g4DnGm! z{j!bE3CZ}&^Z#r4=lme@n|=5n@ssvD^p|S?9P8tjUcXxApVqRU`IQu(hwCrZ{t@=w z`T1Lpe~123`E&T?GM)qD1LLQuzq{o7d13yNZ|Wa| zPkq&*&cygb|Csv6;7{O}E#_Y{`_255!4G%hbLBl`wimGJj+1AA{e1Ci*|!Un=>_`rBIZVM5)O^*?g`GWCyP|1SEQTeP7wAucP>#sxqsC?E9Pk$x859X)2{yOxJ z%4dJ>jOP!A>mSKy{U76h%KDPz+xeev{HXk0#Q#TJA2EOFe~122`Fy@J#%IZ2_8$!! zpYYEki_V|;Qph|KV5%Q@BIH9`Ohi!|8V^y`FI91)J0}L4gX|(hyLmKz50KH`86Md zFZILn{8{b#((!xsf7aL7=a=g%NB*bd_v-&VpRvdMhP2#4z{@)-kGvhuI@qz2R zL;v*oc)B6*+}woYNAf-Tr_XjeN5=Q)FO|QEeKGIx&G?@D zlge+eN4p#3Sr|VC=r5H&!~XIi=Xa)$%>E}We)q;ts#|>${?GUh{iWK!(?)Ll2KZ9H zl=4sA`B>xr?FY~=TU>Z3yat(qU8d*>KOoWJB3^j9@M^~ZDnE%i}Je^v9D{~IxnAo(Tz zRn2E#{4vLWo&KukH;DV`ei_Lx>91=3Q`pUW02n{1{enJ0K^LhVR z>JygylK!dY&ybfGaec@9<@#RGKh^wA-tTZZj#I`r`lrx;RL|$R@IwBqkbkP?v#)8! z^_Hg8r%IGjEp0r_>J~{iF6bu|9mx`j6y$ z^pDEt`rF)xe=@#9|EPTK?|6=t@rUuhkblzrukz2StJt55_&~lx|EPTKAJhGul3&U{ zRog#1h<@L}E91ie{iE8ycNqB(8K0#-@#r6w&;1?qAi4d8`dhVsrkKCe*W>%h^g}8C zRL$qTbUf#t@jd!S_0I_Tb?)PHej)!$aCc!pQTAFjU+{iE9d1b#o`{$1*m<^7LZe%-bo_3bg%-InW@ z$x-MNGK94A(y@pLrRcgJXQ4{~h{A<&R#CaiRS*;zMS?L;t9J-fx`lZZ|Wbz{t@QYocdYn2Rr{`@LTAg^qe!ksecSU&xaSz z&)NFN;GZC_@t$7Bhid(!@k94@j{X|fQ8K=*e+)kBOS8|*_(S_m{bTX5Zl(GH<}dk$ z{-cY>f9IYMemvSe()E?Ce+>J1zWo&U7%;vuKA8GP=coFA>f5LPrGBvWkIql$|2cl1 z9^m;(zO8>sd_05AHgemmI^WFyX#8INe~R(Ld!pR>N8|VE|EDK~2Y ztN+iyZ%~KM?6>ug#_!etH!&{Tq54PX`s zfnTbBRmX>O?7xm^ztj)5{?YAE^?#nrf5`faBb?g7Xby59)5A~&%=U21$=g?p3`{Vg@ z-eb-9!S%bE24M%Fnp`#t(gkjYn;QXb3%>0w#AD(;2&!3R*&|j+k?Mrc8J7OJ6>N}7AQu*wUXC5urFJ}GC z@(<$A1pCa4U+O=q9zVMrH@yEG@rUEn%s(0SGjBH5^^r$^ss2Abh|f3GDTnJXl^;GE z`O~k)SH>TXPlx`}`KbHsL8mhQFn&7nPilYD=(T9~jP=P(pE&fF%0Gl|c*1d%@g4d{ z_=jPJ=msr+4xn{>bEaQ&n5n{l+CdE_9Zw9}i?qvLz`a$*|b)VmR zxIYeW8^CYA82$4Y=S&zMsINr*&BI6illP~DEdNdai2myV{3-WEcjNnD zd>~)wp8@;^`f7fl@}>S6zz^3VcboUGUZj5p@Her(Q|D&-LG~XF;E&<|bLv~f2aX?+ z{~5sFz`Wfo@P+;vz-NDVx<6d%2U&j`z`u-flb)~SOZ_u|e+qqmh2t~hi~P?3{xQ#= zQ6D1yaD5T^p8IbQR2Jl;~D|vpH<45S90esre zeKzAK`6B;Q&rkLL@QLW33CE|@Cm#LN^=I|Y|MT-_x2PW^-=lvdA47B#_l`{8itl3$ z{nPP#_5V5cjU(!};rgfJ_v-)q(6w`}zZu`5e>#4z{(p*n%O>|1k}vZ=7muHg->d&m zpqtx!@PDQs9Qvov$I}gA8~S{U<5TiI`lru-jz56>rl8+M|53evh!4lu-{qfwM13gg zU-kS{|L5njmp&iFp?~`R>FhsjR^MN4<16D2^_4^asC@447$-P?8Q(N00te`A-qQ&ynZN^@~G)seGOb*+w5%#`owimESU;G@z%zj&c8T{~OoVWS?vD6o){xbMe#Dyv6uUCH=eCCa3 zI$ze`20Y*Jx#*wNUnBEFcK*q*{|NK}X!ak7mm;Mp=PY#H0{Ct`5D|o(M{bSgFG>P2QKa%lH{bTUQc&5iwqw>!XcOG(mk@}={=pU8O`h5EQQpvaTPsaFUeItENn&f-*k81xh_NUy( zF#b>k8PApDl<|%0i$njY{BSwi{}6GA`xowi9r{P*^WOYff$z{iDu0LT zFZJPY{iE`EzLEQ2?qBGChyLmKq5EQgYpd7CJo-oFkFjr`!%w;WX8o<3zbgNf`8~`7 z#vl5}p?_5V7IbIB`Gxqv@#)C_sC?EZH<{na_zwNk=coGrh;}27%lV7=?9o4cemehu zig}g#>oa}g&_9xoA#4@{*U;7c+l`2tzOA@ z=%0??tN(|s$em)HWc=j%D?WeZVtv^0d-Z?byU9Ld&R_B!`lru_FBa$jzYy&|qJES5 z#G`-u{O9-s&fx!+{*io-{^|3H56la<_&JO}^p8XT^!cg&e+9bmsjjc=`j-_y5jRh` ze@Fb`_;l!>zWvER)BX5-rw8$s^Oy0nQ2*-A7i#xkhj58MU=Dqr&M(&KAB#_X&Fi}L z|G#)q{h#-T@Sf4kfAaHhyZK%H{-~?jC&~E1=Tp#M)qLJxG3NeW>Z4Nq?P7kf{v5Dw zjyb<2zm$Kf=8v$x?ehL`#1HC=g8r)J2h<;Uj-K(A{uk%}YoBk=KfE7d%Knke{(}Ch zw*LgWE}dVJ`lVEVtLF1w=5vl8$uH%fs`&x>?gY<)@tgiH=&x#ix)&97O3q)#*FyhO zHJ^P8&DUgn<@!_5U)B6$?2G3dKbgKO)ZeQ4C*Y?1y3F6m&mS2u{=?1amvnzY#xLlv zYWqj`BEMyRTJlTzr)oa?vYsBqpTqdT=UdQU)%>T>U3q?*<5S$fG{FBUKh6C&;t%aF z^nX{|ANJrE)_o zZ`J;v;d~?O(2PGEpN0HWHGhnCWy(pnn|tN9D7xyTv-0@k{xqYX3~o|21QN zUFL5}=ijRN?2DT-KPvg9{8K$2`=S~9zcRi<|ET_d2)~S4jDM+5Z2i;a&(!{F&ab^g zm2c}GgFpKD$W8a>OZ)BokHKf%cFz6+sUK|pWALeOPpA(wzNvpoe4hV@E>HC<$+z{7 z!Ds$;CyNhU-%b5v@L8Wfq5q}6D%Ib*`DO5@Z^ZG#K1IeK+HdB64E`K>2Y$|BrXTG5 zkHNnJ-ILCDdG(LMZ}41Ge{se)^FIcE599Lz_m5IPlK9Qe4qUl?MPknMSLHtMjq5n<&WAGdF zZL-e9`Afd3e>8sRz8IG|4>EqK{&w;6?eKx;R`@v{8Q<1FhW+imX7$r%uf|u#ANt44 z{}_DMMe_VG__qEr_*0(GdquTh-hWg(K0`?L{~2`2_yByVA3XY}>u>6v|L4A9%KW_K zd-RXw2Yg|DN$2M?zM22&{NM3=_5Tt2D|t@^;{)>#X8lX!_v-&Vznkv2m-@u6e`)-F z{U3F&hgtmO`d+yINSnW%e|q(Q-Ve2lb%yhoe24z&^Z9hcQ>e`G!1WPd>1EajIR`loMy z;kNz?9Plo->yUwXEB;TXIRQvh42v;~h z5&!5PhyGIeGw6zRkBH=Z^q0zS;P*NEM>4*de=_|4nEH!+sb4($tLv}oji1ptj(hII z86UVl7tX(F>x0VQME)uK1m>5t-;;k*`D4tlM(>~Y!&0Cr1Iw& zKdF8z`JVif%D;mA#0l#^QXiGhzg0iqfV_0e`hetn@=vP$58$6vf0lf^{?^SemA}`< zae2!4Ib46K{L9E6w>sbMKQip+=RS|_VSY*b?fRR+=lx(a+K>6m^~J2e8GPQ`p88WI zzjXe;dVC&1SEuut8Q<(bGVI?(UOLr3F@NcwLjQNQ{p?R2p^nJ?CHW5hqmEDB-^4mo z#`owSmET~0ocNf(T;Cn~N99wW>}K(e{&DCZmCy6%{2bO?znT3<#`u{a{!Cflkov){ zzZv{7=Iw~*10~<1e^mcG!TPdwAmb1H@6bOg|5^S2F7FRl>yyvw{}=m*`SoY@|B*U= zp4I=2@$;h($ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test1.sas7bdat b/pandas/tests/io/sas/data/test1.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..951173ce4d9f9395cf5f7c1e2c51ac4bd51ea51c GIT binary patch literal 131072 zcmeI24Rl>qmB(+|P$MgACc3gTOFNqBL?%kC`FKfQhJdG#LcjpgW-vOLnJB@D7$9Oh zE85^Zq)LT~5vqnU9g7r5C1BcyHb7`!(xB1R!6?NBsT!n6fC6=>QZ&Gvv(NvWcka3E z^SvycHO}nI>f8J7yZ1R~|IX*VAqY~}#4}g7?)5?~Wt6D0G7k)LYXj!467tOro^o{@c z(-lTiJsW?NOi5LBj{Hyk>Z@*fd91wR{EDWJU%sXXHYF;T*DSB9e0Ozi)$-cq70WMi z9+BD;uleViSjwvEyx?nVTFt-I6RpiZlI&AjsBOLz-!lOwzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k z025#WOn?b60Vco%m;e)C0!)AjFaajO1pW*Nyz)r0&ouMR_egEgn&1;s6u%k-vpM=n zq15x?bZ9;Z%3<&PZ4mS>kq_j3>_z!+vDUkv4}!^Dt=CIEhS)qu-~U|7zS+^c96j#n zxsE=4I2AkK=$(!pbM(N`_dT16?RWGJM=y8u`Dar8Q;xpT(c2xp%+ZfMor;}w^gc&# zbM#V2KRTC+O*;ApM{jlX5=TGsR4R7N(bqeAi=!7i`r%)tVn-c)oufB9`cg-qdomR} z;^+xSZ*ue^M?Z8Z6+7(cy^h}K=!K3x`$Q^s$kBToy}{879R1)gQ?Y}NzS_}+qvt#N z%;TxpEsoyp==F}C=ji)qQ?Z*Jz01+#j-Ko2)4xc?4mf(JqsJUQaP)mYPsR2-dWWNz zJNo?3QvOqpzR}U!9lgxaj~z_KPC9y@qqjMFsiPl#EESt{^bL;Q>gXkoe&naA*fB?6 z@8~U#UhL_CymHi!y#BT#Z)oX<54~64{kim77W6FsrLW*tz!G87w-7(`<~MF#QU`U@-5(f%C#mudTJyTM^j+f6-V-x8ndR6PuTQ_k_5A8xZwc1`YDjt@zv9%8Up1}B*{_RgrFMI6e zU&6nX{1e+IYT=AuY8?=3s-cFqmveI{N z-}FOmH{MUT>aIvt~c|vNtYwrt6J0c<)3PQ)Bd7eLfZ`=a67gM^WquQ`+i-{ zp8Ldqsb#Mf{hP$R{^T}s1x+OINr1!HPOEi_K^cSUykRbbN57_&3^M%y`4RPJCEA`jFb#?nk~@ zH~Mc7w9HRo{WRsw?iP)5wY~cstn)#EF8{4N!tD%%zF)o<<9rs}n#rEHdtA`AcIq!s zFUH&1V?yova!r9h_T^WG(Vm}ZJ2)q<*^7Q0Lpc}fdN1B7XnUMC9L#=thu6-)-s}Cb z8|7gBHtrR_`>X=jk9sj4WdFTY&Xy%tKcekspOtnyPOWl+acH00iDMn@aJCj2y*9#* z<)~sq?IS9~hDM*;M>c{KYYyZk*U8JC`n^0snEm2(&24BztfdXuoQrU-7`RQmU2VT_ zGy6WyEyY{JgL0LW6AwHwo-eo)`-s`Eu?|o53))^T$GVrqIH}r(^&I@oe44vcsME>R zzha*X&e82Wv zA2{HDCiy3RGmiD&jB~8ppZm5h&$K&0y>Cl~?dF_Qyho_ne)`Gg9Qa=CxBV{cJD6Xl zyc_NnwCz7_<}b#7^LDR%jOXG{i+{gaU6|JioD;SWVg8!^5&8bZHt`?o_*`qo1J22b zHbKoiuooOEbbZh40cUV9>(%|qr57F zwh{KZtt@P@j)A=7I@xf2mz*&eKj!^nY?Dx@ffw#D`#9#ydBa%8_JP|en%lDPU>`@n znEoyB`q7+QesZghGkyX3QQqtK3o{;Ye!F59<_qjejEBUH;;(38nDq_kw}*G&T)i9Z z)a!ZhnGx?iiT&|)$?*G{IseG^$Vn=X9GFMJ5tO%9sMF--Z#3^s*k1}Zp?sW6O}_gl zggQlaJ%#taV7bm;zOU&0@o8Od0{hVqHVaxFcH=x|I7t~g{|Ky0GdBoYt$NLVg#DuL zTAatsc?tXG^i4ubl^Vci~cUhV#jP^;DMZ(fJ|(avRCF<M;^Jv5$nlCog)*7xuP8YRK((CU<;A#5n z>T#jpYp|Z1{+hZ4`->T$SU<&A@V<$0V$NeU$b7m0N_NHw(|Lj4( z!(WzvCcPkGy-lFr+Ium7&3wc-`QU`erx!3YuQA`}#<724ekEW(X*j&U8+-4~cu$`M zH|*mrw}jvKO}{+(K$sisi62kkJssz_1on}{TA|(&Ii+Zu&owr5k zbp!jLz^H06ELn`9uL1~nfHZ}d(qAW%E$bY?V{J> zF0y-GvyWq6>)aatu4wj`w@=`G9C-!kue_V^-mzY{ zU*NmS-Gic^UPH}3i1!rPUOoQJcK|s=)OFrnI9HqX2mNx{{o*`YAk6tKCdmJpUBZqt zJN)d#pmAsiY*dLx*s)B`;X75VwS?0#F;{w zgYs_(daXGh@9BX)=On~9&aviPQKFl56iwL=(EJ>XlDW(HYQO%zPFqDhwsJv z5jdNEnYj-8vN;FhT=z&)JWrqh%|3|pZLlMJUNYsc+=hNM^B3Pky01ZAQ{azwCKKYY zd~5yuv*vp#zCUceU+DQ|&h2PV;g-~VvE$T^L$cTbYt2@%L)0qPnTD_}(JD67I@(aH z&?EcYMo@|B$3C(VRJ?Um^TJ;z&AEE_5YA)wfg9GhnyvW0WcH&{JRxZZsMPVp1NiQ} zP3M)se0kH|I5%QFH|4J|`zF30CehCC--C74j33OSoNf5-Vb_^?_6Ox85v-Hz3aBr-(T$T3oCYb+5u}vzkOuKs#VCx7Gy*1 zbNk3ztU}Jw=e9Ciz4f688z0C^u9NzE2=--@ckOQP9EEk>JU22ZC!UEX#)bN;-t@9N zF~9JAzU6b6%sz{8`>cPC zO`Q+Je+NOmWrN}J1MJrq?hL;Vn)6lhC&K5+0N-I|2L*lJejMwYY47CM!++;8f9E}N zAJ&sEXl}-T)3$Jb2|eEve>s8nU|%-xS>=1NZ(=<(`>ZU_j=JbLq#Y>??XY5g(YNfC(@GCcp%k025#WOn?b6 z0Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?b6 z0Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?b6 z0Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?b6 z0Vco%m;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(@GCcp%k025#WOn?de zc@cQ^_pj0Aw>n=MibKYZCAspxoqkgfGLLVj$fgYAUoWdr|5BG&k|S^G={IG~y#8{k zxh%u_qxlz8Zt%DjHT`Il48Wf|5V&A*r;mo7NIfFkoVjF09YQ0DWw3!eYEH2>*$ zLGzy?4=y-6-$#)fGK`O|{|U;xcft90 zQ_Wo&)*sEkog&*79N$clO&P{V`(IGz^$X6woN6x1u>NTN#T2=8!SMwYnV(^NH2;7y zpI^M-`M;RvKmDfWztYqh!dRktchQNZ^El_06UVfYcT1X>Xnu`!zG*nl`R>H^#Hof@ zntG!7l#+KzRxw57U6>{&I$!fBliaNGIYzs~(c|n9CuT55-iNY^nIZ4}X=0-NGe!H) zWSu4O5P<|#T1cuVVam|K6#W$ZdUmmqt*53an{%qkI^~u zK9p6=40-QQ6BEs6iq`MR-S((dpo*cScBmKA0o#Ls`YlkoW#HG0}Xc=;Skbobykb`9n2~q^T#`KZE4GC99Zz z^4^#xCYsNB%4A(u`Sef?tJBmI&8L&RJF<#tCGVCrG0}V)DU*h*@`+Oou{8BW^C>0o zlB{Bi$h$C2Of;W7$|N_de2y(rXY8Z&YewZCI$zI`_o1v}X2^SgnwaQ%J+(-^IZV=T zrfnOg8b;F86YZZt^4^kFOh0*VOcN9BpY@cWSu4O5P<|#T1cuVVam|K6#W$ZdUmm%c1#0znPBFIr2V~Rm==| z?@tpG&1Wiy<`4a5+O|=uVI)mG(f%1E?=4xy^pp3-G%?Y9)>9_yvdX82YFM47o@hRu z#WR*{xYKWz&Cz?+wd6#4rQ$*f{X=0-5bslAsn^iu?0-8Vc zd;BqavNRW`9!YJEyk~|8$UdTUPzkOfgMq>WStfD3kiE@+qep%Dj5! zEy~ioP)3J9udXJ-D?f54g`+7(3bM)&R|9;25+0h3b{U*nM*s+f~ zdeYGc9RDfDKJDl;j((ftKkL}%9Q}x+-!1(O&tr~V4XHeNuf{;0qwjFaFL3Ndj$Z8O zV~&5RV=s5~xT8Pl_zTD0=;+Ok{*dF}>e$;Iz0=VTIR4#^y~oiLjy~)7uXpTyj^6L+ zPfLHpbF*U~bo60If8Hs7)UhWWeag{aa{Q+q`;4Q{I(o1;HGk(E`w>S!=IF0?`~z8+ z9RrSD;OK93{EHlWv7?td`dg*H;ZyF|O*T%}qaD1iLzRH*v@3pV8#*6pbR~h%>z4ldk46D5MRaJ)LtGxDA zdHkxp_EmZOs=W49dHkxp_QgDYF|U0wk6+AdU(Dkd^V%2l_{F^T#XNp7uYJ`XziO|2 z)gHfUuYJ`XziO|2)gHfUuYJ`XziO|2H6FhjuYENhzZ$Q7H6FhjuYENhzZ$Q7H6Fhj zuYI*1zgn+-wI07(uYI*1zgn+-wI07(uYI*1zgn+-agSfzYhT>s7x&s1_xQ!V_QgGZ zaj$)Gk6+wtU!BLV&TC(t$FI(7U!BLV&TC(t$FI(7U!BLV&TC&qdAZyp%o<|$oK1r+O!&$wvs?(p^zKm4+nbLD%Q&O79rp}$}nUb$cn-rl-{cqJFgS`3O zacnOi$L7cVDeiwOuMQOalDw{$XW+|l|Az8qx#be1`W9g>AWkzH9OJKKt%-_Bp?M_BrR?;5gpZapKMys|r5z>iUQBoKOCtWI^E<2YqJ95T|sA z6D^6bTP%`hFO7U@i1T@UH^weg#!M-?;;NdlwPUB$mJ}Ba_^KXLJH}Ktp~vk0*F~d8 z{3+kc)N6~3*|E|w(Xj`tRgEZ46qk%IsrmlRx6TD`{24i-d_?KUua#Alj;I(>GU5i` zEl}rHmj7FM!n5Z8X~bxC*O)r{&%1-#!ZZ8sHJa^P@V=CVfDjM@LO=)z0U;m+gn$qb z0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb z0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb z0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb z0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb z0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb z0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb z0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb z0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@?4+IvT**D+o=jzWz9p_f(2gY))$Z;ZF zM$>wM{>cZ`IeS>!i&6i}EXT>6=R{2D-Gt-B5~%k!I8J;s>7(>bea~FIdHRR+cRGFg zQlDP$(<^;?+|zru`}D;=UH8Yf-(%uXtRAge{p41k-ssb3`gAiss(a$BPv7Fx7y0xV zK7F{Szuo84H~aL3K7G1RALi-D&-nC|Pha5E>wNl9Pd|3rr?>g^`98hYrx$tpQ3H#{ zezeu6-|f>U`t-q`e&o1MZ}I6#pI+nB3qAetVV}O-r_b~0<9vF7ryuI_>CHZUu1~M_ z>4QA|;9j5J>6Jb`?&$}%`}D;= zeU?v8_;jt>XS#o@PjB?;Gktoor|&!K)3^BaMLvCoPap2-d;5I)W}m*$r%(6k!#sV@ z8K0i==?i>%olhU?>AO$+^fsS9->295^rC={OP4mfhp!vloLBNsYN{^iOD#3s()M-V zE_3I-^PoyV(K&zq9NG^ZxOAqjV#})Ibkf{#n+AP%{Of=Iy&4Q|j(U7QDDJUqUViG- zl9lS3B$U;%o6+8}ylC@po;ak;sPv_tECT5s*2XFCzp4tM=%|iENuzPRQDkr50gAh4 z&Dh6YN!_K&$-Z$l+B<1n-%LVT{r(EH_r?n!p3`=t8V^NBH6KU&$!Pvndd%vd$^QIT z(VjXoW6Zr3HEJ5!Q`j-LbE~=jq=t}v?VISo)L?k}tQtf1r?8@XxbtuCt{M9~Rjc5J zYX34w9@n%K+BaPX((P%fsvGp(Z*qT{&GgcoU%R(L6Fsdar=YAJodeQ+$8__WKX89G zoJad{`Ww=enYa9VQPK6YZ@K=KncuI!p+5@L zFl|7qT;i(c7F-;RZ?PpkDx1TO={;vh7@Z#P>n2?;ep`*B@wVOtcigw;ng@IZ^SRY5 zG1|XdKlR66Y3KQwdIjxAE`D3rIHN9yqN7$Rv>&Csn2cq$_I9*yd*uEJI`!1GXt&SP zUAyhA3-7rksw%+EQLW|Zf80!;@jDdFfA&B4!JZ-8kIYbJCDNDbSdlc+-sil#ugjPn zx5rGYmz#Lr+B|v*=MUFgY?dnRzwcAibcWrku7;wc9>N574-b2<<5$zaqQ)lShPu8B zBy%f)qN7r9*wS&0`>?;L(PUq_5dC+}Hh`W~n9jQYm!SRCB`d$CL-RabiTUMr-go(b z-kJMlRfFx={<^z$>pFxjtF@HZ2hpy+NpY9-Vf!a2FD8+F-A1%`8_ci&kE%^US*`dX z+FvrO(r0f`S0~ZFhVrg==)SN2!+&p3Q^2gBJ3a`W)aldD%xbXkJPj-1s_7PPk}Cr)!!5p#>E-|B8temA*=&Yam{RUuy~@m9{*P)*5(cbNZ-L^jr@WX%_BG?+zH6x6 z{K);hkF45>_Fl46UfAd6?*1uEXj@ht$Du(k*FtqiEx#7qNts<|N*_O$2WEhs{ii0O ztmYSiboVtpzVm|7KUC8c+Sh&q?XOdRs-Wno*(lvk*k1mF^ZL3Z`oCzlwQXY0u;B4=J`ajw?CIOntR|9*x1w* zC_1W*_Cegn^VBuQORodV{|3^1llnuP)uy|W=7!rw&v6129ku3qW7g}TUQF~7!Tae` zxSw)A!OL4;+y>eo?S8R|o_nL&m4EtIHAz7YTi<=-&Rm@_7ukKy+IgO`@)7fp?{ng? zBTr|x!0hE3JeR`?SLIvK23HN==UZF@6gHbt4}D0v-TL(K;jx=|-D_zE9`0e-esAY< z#S@Fs{>~T72K+@nCoC%>yGgsEm()}RW%Ud$XzqU6N9;P>xCQ+W>ZH>fjGB@{{}rWZ ze*@QZ%UwN#&(}TrW0)xFN^o=3lEdiVLGuBZeLb%>qkSiAU)QNi$^XSW(B4kx84TyB z742w0rIST(Y?{5S{kzR^@6>PkoID-xyPNc?q&uogz|Bz`PoVz+;%wKe)Qe!}S;rGD z-4f0Ywy+~lYQ6=HarSW2tgs`u!Omb|GuPnz94y_!j%HL{8aP1dpWjyPK>G$dSL1YYRLgB>x97>hP}Jw8PoUktuhF{7 z{W+x%8#=Umn*47^>FzPR?1TfnzbwJ=x<_bVw&U&S#P$zq%K9XwaJzBTv=rLkru|VR zp{$m^g7#NU)T%n$4=Y`J{;})Q1G~`wfO#F6y<9;_???A6M0+ot$8ei))Wh4!A2%>Q zcfD{9?K||#h+Yuwx}k1sK>IHNLzPlcbkx)I+@GR+sY${Ob>C@_JpFhIv%*sle0U1; zeV(vv;Tn94n@?|p?}uxAT)uEsj+!1rk1F|IyK;NY@qA93vd^piP8hFy6yIy?b5f7d zzVHU^FLvF2P@zBU`ucryGM3eozd-vucs=@w8VNP)VBTrQ) z;Rd(g4X;O+@j2~<3b^m$bRJ~iBUHz=Xx|U7U-{m9^l`NBq5W%e63S|M4%+ShZtaiG zL;Gg>USYpKJcb8X`@V1cr`8_OZVY_528{Z4~E6Yo@ zp@Z8)&)2T2ElbdU4b5A7URrk&?OpI$WwNs8?Z;k5`(b!JnyBpjY5xH2_C0G56dmhRtNB{z-og9Y)3eZSzbj5v zP*xpZMEenXAGgoz;XJg@r}qx}uA-WrBY$`iZM5GLY=7MoIRC*(|EOmvUrx}u+CG;T zZ^CxE&DWXytL=AGwe|$qd7km~hDvRC1ITEH^m^`-R86ChsXGw(~cXEdx*}l1>mOFs|NJnN#C_- zU1I({`s3T&&U5^|`W~$3z6qbJXYu<;J=t66y$PqQqwb^ghRqkdPCmUI> zjY0cf+BYYs(0}zG(T>NitCh{86%n*|(|3&E4bXGhxFr{w}fN6gapi%~wBt?o#Nlo@_?@F_dy0Jzoj~>_IKkW_aB%+|pU>OyMa3XLn?&} zg-d_8RWxtaMmlHOb8PT@X#Wmk-wT?M54VqLrN0AXhh6(VXwO$aD1!K|Jx^jfJF2M* z?d!~66L0J0_8!@c_HC4>qoC-hb*It(3p{_a{|Y)k+vioMz)y92I%yQw?#p(4+k_Lq z?WKKlnlf%YPk7da=OIi{zRweuXC_-%;WpBza5!JM4Q`%&K;ahoK3mdb7;o_57R})P zxLRGEAHC$B3*VxrNQd0W3iVRa^4YT<>?S+GD_EswUV31{{f`_Z`%q6jE;;qo%hymlw!cB?Ohx~%KA{-w zM<~pj9}I)%Tg;pl@ZlD@D&OZ@Y+(hQ!}mEDhi4zC!Sx-yerqsP96@e&{1qG2jcs!t zE{v}s&b7$F-nY^z27B^QySjJGj3e8JJVX9hqyO+{i2qS~F5}Suj!nuumFs5iA^$7U zf8iPhvv31MNjWP#1lD-_YDUt$hsIyP8z@Z3WL= z*}V?^ZGSgELV3E4+8KrZr`IV+mwSGZ?!RP3`twql@5xVhP=8)R|KwzBr+AYxS=unV z7wz^XM#tw_7oLYOMfpBYSe}_|VTIdBpTgmM;WoH=_5p=ky5&}X% z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF# z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF# z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF# z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF# z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF# z2>ksJIQQXsxPm>-*M_1rV|{L1muJDRcccIH>!4N*vJmC7f zpk`-3>xZBJmK^i^H^c8h`ritXTlyJ4{QR4s=CT3TZ-knQ`dL3b{`oNSy9Ye}xe$3y zKjVkTKMQKk9B}I+RInmGf;qecHnnMR%zYuB`^s|0={CP0) z_<+aX7l-_Z-+|=Maj1EC!1Z@Q&CY(-508IK9P%H22h#smh}_c8_~GZ@1T~ipxPBwl zT-49{;q`w$jQs8akAE&ip3~3x;qlLcnllGnzYc2F_OpKY`HzEtpKF zL7mzRbwUHp9p~so&#gAU-t{qYDj`lHL!9tc$%QjvaISfLY+k! z>V(HLAI5WcR^yoqapq)*6CMv_PR`7%>eNA;+6;BV;c> z72>pHh!Y+UyKG02}- zR{0a#1#vnv#0iZj2Kf`qDt}_F5T_+WobY%ce_~nXPplE*EXoikJRZoOSXTKHn+tK~ zWQY?U59CiQtNe-8L7dtQal+$){E20iKe0-PlgJP!JRZoOSXTKHD}*=&8RCS;1Nozm z%~|A6w9iC{p2-j=G@dBrPc*CiiSB|pof+bU*6S$bPc*CiiMB$VmJD&i|KM20xw z@j(7Wv&x@nA;c-j5GOny$e(Cd`4j0g5h7Bj$tr&$b0N;03~|Eaf&7VNl|PX>h*O&( zPIx?!Kas5RCsGM<5*gxz#{>Bj$tr&$g%GD8L!8ih9fACbWR*WoA4E8lA$hh9?&5SpgzXvPgr1LsnUl(9KXqCl!txApLeIy+%vqXMokifiFhia2 zcrbI4S&e57L}b=jqWOX+4Mf zGtsjeEx69$l<;>%YabJKKHwE>Dj>;q&kE?0U`MF*>I`J^Gy1EzdsB4o7HW zqSrO98(pP!>Q7{lXV>c``$UR7J=&&q>p#r1M~Z!VrKd+<^7$*z9vSD;Cwh8xo7Qdn zb)G#k!>7;k^yp5lTmJ^nuJ=i=|4C1ec4^)E&-d(+g+9H})1z-|op?r;diF?@Phalo z(O#|F_FFxBB<0h$czX1Gty}-?o?V}hy#9B2dQ7YAf84W2PW$vePmjSWgZYIyPwPOQ z=Mx>|>9Nmho%*BCoZN?Kkxw7y>9LEnPJE)po?V}dy}HWNV?|oG?T_>9(TPTf4Oc7X zGXFUyW+LG=)cFJ`36j+$Ye~)`IhW*ol8q#rNVdYN=QvwP?jqSk@+`_oKFPr(hm#yd zvWDbzlJz9#kz7D>G0A3kK_W9i%B+?PO_flJdz7Y zE+*MbvW?_cl6y(^kUWbrH=pESlEX=kB3VOny8l0G>6H0fZi7u3=krB@t9nfBm?^bm zCtiL_{jE3MT;g0dc1o@JhvkvZP+ish!*Xfhe!>|)zPkEb-*Ui?K4k%)@<1CEf%}z# za#f&QQk>q0l9Kd3mW&Mdp`Gww#2g-pyjvSd@e`KJ4 zBNKu8f&Psw57ZCzZ)9blexQG)0fwc4{*{hQuU{JIUul3}X`p|l0e+={{*?y!l?M8k z2=Ge;`j-gsO9c9t2=Ge;`j-gsO9c9t2=Ge;`d1d5$In^?C%YuQ^F zWj>zk%z4i=5TUnhkY+yvTO68*n73SQI}f%TvT3PJ-v6HgGuS>Pm4){ED;GS!4}!M; zi}GLerQ^&rvEJ90{;S1(TD{4X<3ox+qyGwMFq^Lt_5xol=)b4HD;<tR+2Pdp!D%SdA=_r1$btHN_f7a)78f|V_upf{*xBe Pcs5Ya|EzF!lEeQ3c5b?k literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test11.sas7bdat b/pandas/tests/io/sas/data/test11.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..072aa683f66d992a2e96fe872af583a915e8dfc9 GIT binary patch literal 196608 zcmeI2dwf*YwfOfrGZTi08bYq68g)!lODk&L4+Uo<0mDmPV@zp^Q4>lnkCc*rsE~Gi zpizqQHYih|A`gukAVRy@(J61tAd-1O!@X)r#h~_TKB9IdkUD^v?a`_xb(q zUDJ=*>#Vb%-@VpeYahy3=+}Dcq4JF>J>T87z|FeL@V!Ms z&5V+X(bWueLn^!}BPSzsMCR~cPN|+DD&sHfro5Z7vu@1I&%P=Drp%iv4ZTs$D9F1f zFUNRd>_}E-|An*0mm2?t-Y99_sfBL|$(Wn{Jv|cvB0vO)01+SpM1Tko0U|&IhyW2F z0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F z0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F z0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F z0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F z0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F z0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F z0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F z0z`la5CI}U1c(3;AOb{y2oM1xKm`7OATaOL!d;vlY9ZZvQ3jFP% zEOLcK&Irk8E?Q(U zXYA+9B8yyZkzqbYozq`gn-vOi(F)plS1;bLl$|hMV@Yvxkc^~l8+v+$g3=Jl|}Ank=-HrlXi=|!Xi(y$ORVJ z8In8qSmb3Ed7?$mvB)4AHrerxMXs~R6&5)oBp#;2DeDWRV}W$fGQBYDoU* zq(yGD$bO4F+#>f5$p>zL^X+q|Joi4h73A|uZodcQLrUKJAjrp+yyrQPk0^Qfc91_- z@;je`d_c+n-UH;1l>C{qyX>we~QVzk4;K$|P(4Y2$O1A44MbZ1B z=u@KT+dqoZe-ynhiasTZzWu`}{YTOJqUcki=-UrO=|76z7e${EMc=+ZO8-&xz9{;X zDEjsfqVyj{?~9^OiK1_RKT7{m^u8$ilqmZ4_9*>F(fgw4Q=;hG_eJSHiryDRpAtpi zzBfw$QS`nj`jja8_V=RnA4TtrqECsUZ{HK8|0sH26n)CS(5q44^g`iBneO`3amT#s z@h&dQ3oH2i&u4ja$8SY_j?#bE)diI+Zas2T>ET(__n&X&FaCA!i2IV(V*ONA|M2sC zL*U$+H;0{2djBw`pR$S{ec`z_PeUu#Pgi=+YX0t*W8sxCpWagMKiaC^<9LkqQ_uTa z_#?r$R}R{A5c|nf`qy@FwXSv?_fDlB{33s#dHTZM&a+s5gwmg^=djixJ@nJ4zeVYP z@+u#m|HOdu<3ZGyD1GlB|BrXX5Bq0nO8>!TesP@uu>$K4Q2LTJ{Lh0neerEOu8aGW z{)a89G}aZw{>Sa)cixv&TG)6dd_HaU{5;Uif8sr*4~|jor!VDy0qubD&#``*(syk~ z4kwk~7^nHq{5KQD4r6_zpMSRSWC;}c1koP$y~a~sxuf&J6ocCk)qmfmyy=?Rr$;v( zR{i@2D}BFpysGlvX&&*rx2kGVWtdPg^%ow`{nMu2KeghQlPdo!f@~rBkiD8uW>fJ> z{>;J5SEeR>R8t*UF?{A`MW%kAT`wGFFRr&(t@n{8K3Ht;j1Q4N^2w;?Ey6s%=tMo* z=i5Pk0cwAC{c+R}RQ>Rc=ogxtW90jhy?;Zq(nc>cf8OH#~mDhr2gpKlw_Z z*TnaMH-CJ!uEO{simyk@fxSTh0EM{3*X<9dr0vVQvG^4h_2nKT`Jd zhU&+#tI}n>8oaCIvX8O+YPN82EB1Z;9aFR?C^&h_zx2vT0C%$;itn@KQLc0o3f`rQuU4b z_ukBv-r$4Wz6zjUoWgNOtA2KD<$n{$UMcQx5dHes>-kjaNXW;J9GW-r?aQyc`QvoM zkH)Hg`fTK;QHPtKuxhC>?KDz|Y#OdHdoOAy+7(~C=hI-^#yot|!W{Mz(oH35<--vqTX7~l{n>6R&hwr2RWUJz8 z>?^~4xx0aXCBjP^p6ABCls^?--;d{>r!#`)kmzzS7+HjV#5 z1{lNM(69aD_=meiV0#_ci|0n&Q(PYNyYT!p`svuti{Z3=TbI(ECbn#97dr zc4`Vy?9hB~I*wVEugXWGB0Ri*btGq2_rkfc9X^>)IN)Ft(9%_v1d^s_NIgj`>Ur z>a&%;Wj&W+?aOmGfAn7%e~d~vu>S3`zW?tnc>Wu9j_3BnFRK2Gc{3dMnqauz$a6e9 zxbXJ=iAEmiRrNN!jByA3%joZ^EnJ5E&kX-X`yaKz)<4>_=XdfLB+}=6!yYhC_Lp%v&HX!(L#7(<)DGl~9E|ho_27RJUralcCK%}*rpByj z8nyXKHZ{G+v}IE;E1G?nm8Hg1n=Q=u=3wC(vm#T!qB)bmW6*w#`$cnri)`TB8Y7OQ zzYJ}_bL<`DcD>@3_A174+!teilWhBF)815rHtNVpv517AA-HiT%`XJha ze*wQ*I)-t+Vg9ynBj(lHah~ctXAd^o@=1)3-GbrwH6#Cl`GAu`%7K2w&SQTwxyUA` zBpdf8j4w$6>>u+|W4-OITx3y`PT;+d-K5qJ_Z1Z%KU4ksF&@3Wg3IG!JLY2sCn&@6 zACBkJfv31Ut!5eV2;;@P$1xu>@+FL$dlqsPzOcTLcYU{o%j0nNJ~baBU#)tOi_>cR z@+WcqIM1|o=r3z<{+J)0pTkY_GfmhOOcRuKOcOTWn+9uY%ogS=v-|Kq%=eKk{*}L& zP0dlm$58IV8orMV_Z{XH_Fcm8qaH!L*JFO}FH-$}(Zt^p z=6^HhnXEwRhp)%{X9w;()`$LYmlq^FZ~ZuK{ucCK!ynO3?r!BhF?8TN!@eH}>WDws3B!_wQ}RdphQCevBjjnOxiwAp=$Y z8*w4H70<~xaa@ddLsxNiZeSebm=C}>6Xef``xs|;!TnTR>c!8r+$eUaYQbOAV9ge0 zAEqGdm``R!Q!x85>x2vD989&@!W7KTWF4~wQ@Ye&(~te5f5E)uwRnw(Gq>UQHzQ8rcapVrwtn#3IJ1D)h_91Fj5v;QZQ{D{?}|oz zxwaMW^{6sq}rUYDag*u z$sh`uj{Oz$fB)@jy&2DOd94|N_jIQ696xWtJl4o7QdIq_nOuF>GVTj_E&>1V#AmZi zr1ACmy&zZS+?iu$HHVN-djcEUP9v+wz?k~nUxn~{T>v5ij{ft_J_h_6C$9xRt zqrSVof%b#?`HgBFhJ737?E@?DJAiS&Kzo~5AO2m-sK2KQ&u1h5!ThlA5-vYW^v8Mn zk;9@O_K)A&jqw|{;Qa_W8~bwLNsP-z9*B9}`$2wIe*QP&Am+DhV|cz~^gn1l?xW$q z_&ucR5nR_U3 zeqS=;Q6FVzla|`@+!pXao(1yaZ}*?s{FuV*cHav##h7F#W3zn*b#nS`LX}RR$Kj0-na_B8ti`_zqhm7 zRlRfTG4C?`9q%3WtMNQB{#^|7$(gU=z8mw*R{bp9hX0Oc%PR~{M#<2>e)2---(O7g z3l~iDG!53Ye)E-StEM39m>ro-&G+UjQ)3FUa^!n+Fmrgbj%;DJXX>ZEhhSVb)}6oI zmPg?^Z@f23mLYlikG62}uX=mZUPAxE@AG|M3cm+o{_n5Z$>l$jRv70b+WEXq7^iR_ znOpV0x)uEb@4tS`qgE`y?`#j_xcD95i(vS<$}rz63h?jAwB5*CFb>|@%(LYW?^AgH z^$*`E$7jZRjkwk)i1RVxEZXg9dmbCg55xZ*1jp^WHoSj^@p|N@@cW>VUwM8No=-CT z4%4xg%g@`Lc)l6)ZF?sC-?@x`=RLm_&y&X#H=}-FefWNHTX8-ZmyLT?#!ifz zcpe&Y7Wy--F47LAiJI?CgEcMF)R=>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la z5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la z5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la z5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la z5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0V43fmq1VN@%&+L zt~vLouCDKN#<%C|j1_t}IeRah-uwXfIoMonHscu%=GJ)2QNPB#FQx4{UOwOnKGeaI zbpBFboq0b^t#@Z${~0g#=`3kJ-&s?_yay}B-J$V29E@+82Yse_Pwf8iipOuzxf@ii zt0(L4rhAUuI#TB)WsLC!E1;W)r#v&)Sv>N{t$Z}p34Q^pH3PMZalD9)1Vx|-6vvN_ z{c`t*-{amVdV;F?R#5G8WOH8-CckrdNy5sEz-2zsW3UgrI$Gnda528)w7)kzmESjF z?_bwljOFqTv^I>)~kxQUS4v!_l;Yw9^oES#&T1qT#VB0cbe|G zx2h_md{U+6S@>&0p7jb7ImQ*o3~ISv>M|?~W27K);P=Phw=IY6Y^1Gpj^%`h8 z=TDs0;CTZ&Ed|xa6i|Kg%v`Xi3Vw$>9aOJ2!e%5)`ZutM3Gnix5n3J?#5X7S-PMV$ ziSSM@0@Gk#R&-MAN!2QTyXgBI)IFV?4u)5l$T?_M*MsK7t(Oehnp`}d53g~(HdBmM zc`eU|x7(Kbr->HAtpXe7Cd_64;x2e`-6t>nL}zM`m%z4_i0$p+xwnT_RVB8&PLsPV z_IL^F`fjxTb4Q{lpw+>yE@8UoMooV%d z*weP>RtNN|8u!^X=el1P*l^=zi+e2*2hT?Xdx7iRRddPpLxPLo`NU*bpxc%Pkd6*? zmN*xGpZ9#SI_coR#o)mobKwfX_jTr8;qBZy-kWlk52x&+1E6UK9e?)4}yZE{A84w#3}^XJPM_JD0FJ#}d8H(G^_6O0_ZI-^;<3VWagj*Ag8L z%eLkRe*g8sAMi{VX2-WM3@b8DFL5k2{uorJ*%0AZ^={?p%~Le*nSmO(_>cG?VGxaQ z^r(~E?T52mIKbA^pz0VcR*}rc@R2q8$~QrCOzznzUaXC&)>^&>%^}!(v9(DKmK@|; zz;C?%>O?p=XLt7=nKWnMVz>XH1b#hqx#R-86DQ=yH+N@O@e;A`O)wSo@OcXZu7D4! zfYE_n-dW?`co5EpgdZjIvDMo1%^-^avrRbAxnZvHJy+%OJm|J=tqP0QufL*+BRBV6iwV*k^^6g+Udn9>k;L3$=c&-&8?DzlVLwej@ zqpy8M)EKT$spEqyyUlicPqpce2`rp!^eD@NMJe0rkift2ae6z0Oj*r&GQN5fcc)I*0x_H~Er)&D35{3Id>jsZKD-O=- z&76x3YIz6(V^1S`s$E>UO9spL!OTw2zLwt#bC?fEVgE(Y6=7-2WH^8x1J$frzt8zQ zd4Rs03eWqbnfEIG7>MZgu&hABz#4b!e(`E{9V_X^i(&tCuK)?Y%|{2HNQPMidq9O# zzDpnAMb*wG5p-G6wwte9zWujv@d6lr{cuq2NY%jBGA^I12V9*8MFwy?FRpfa+*=@F z@NWO@uExUBBo4;}Z{^+Z2GwC1?0fJrHSU&^a4>ni2fy461~8`D+2DfropA6b-~P>s z$&0n6v*5XHm^j^Ufet)9R|e6!qHG$j46BXD4_+bxh!Q!kf*N zX-R(AOovCE$$0wyplecP3KLQIm*9}|!C6P^&#@)?5+)*VSI2yiO4xk&&!pQfVb3}t z-s-TCEizCR@nK?f17OP5!1#U_%mY z9UJr0e2kd;RA_h@xu>(yK^StuWT*Fi5$?ua>zDsI_q) z)Y>jg>6#`H%pqZGd=ONfUu+t*^6eI$3UxdQ!ET^3tYUDbE8sqt%)tN(x=DAN2`A)1 zaj5sq8hM0QTn1~K0KQb?ntNGmJG5lQMN zih6a=O0DO*I$or|>@t!P6hULI9P_@c*JU^{Sp70HA zpxW>2HclQLuk$34ZM7(0;*%1f>lXi&pgIRxG^FF)H~Ese-MqstyT<)+(-@iVwEVrK zhxZ5=Z9yU*`0Sm+#a?_Ej%ml@ZozAWIo?%s*$#hNb+6NqA;B(7CwqAV^r?9|RIFa? z*dp@2!;sK9ANcl*O)@LoFiM>0qibBht8u;eB-58Z%ZF4a*4Ma>3EN5dA@?m9U9GME z88qx1-t*j_0x4yaHSf;-C5d42G7iF7>^&l}!IfZONo6k{d|eko)^Ol+j5*8{3O9B+ zq&Ol^uG7Xjm+Qi3;miiFepbJSslXf1I^e`U2U_r9Bk&FI!!`szEMe?A?>6+7(SpUM zV(Hg!W1b^CXrnk{i~VP-wJ}`BjOrr@-)VaW*B+k>r`E~w&H#hE6mJkmW3WxT6yny{ zulP_n==f9R;O-FVG>@~^1Ft_8P{DOdz;!kc<+p)qZ;e~zBh$bU+eP^3?tzpYf-xTl z9{+lFH`q&FT&qK1c&ZMnAC{-_eSEfaJ9l@0;yp;*9_%a@(VTC*s*}M+Bu;X}h3O`E z*`e?QWH=yr%PVlg9>Ij|b~aWlxp^>DlR137n-{f-)Tcq@Bp=kctFPcYA1QHon{Q;F z&%Y~mWEty!<^Aw#uQJLX1m)JR0n=Ta=oG1SmAHc?J_fh2!PU;4)z{V)^@0#Rdcvgv z{e8AmrV&^1G>B80ktZh<;7;D7wpwf2;jj=XOCMX?EQ$fW!?^Yq1zWu@@^FgR^ zgty)Rs&|F!cjJR=TpNA}h%&tG=%2$O3?+I$db;ebw2^Kw>&+7`hdU4CbkJGgD`+_& z`BZi)_=UW_Z`}c!&Y~;!x*z?Ux}LB9HE7;g`F1~jDrA~NYuuY4M9N?Qv4)Rzu(Tw; zaYAp4KZ}hozZ_EOdqfh3`wbaaABTR9bzS_qLzB7n{DkG~SzYAOy%;3Y^Yu~(?01^? zm`IKxs}OgZ6&GMER%Apu$nDe~9qrA*vNIgQP0x$Z8*4=*cKiSOq707DABJWp^U?E%%XEN$$U@ABSrg`2e=EpY@A#0P;E5rYqI8l+zj z`ROAvx!v&^sM@m<2VHvSqpS?-$jv>g0z4lfLwCJ6?oW$L^7vel7eVNL^(UYT$~ZgrqeS9NuHXz6bre=X~x1%jGSwr;9Hw z34*wD9Bl7A?-`N0-FIieT`xihq+R+Dk$6GorZ1fin$L?8hy5kszJAE|E_gawnY1|J z&tl^~A9nS`-z;{f^=bJhJe?ghJS7SYxzke|fm*ez{F;3)qGGv>+akcTtHmNb%xM0rWKl7K6B-hg;uJtf6g6m?8aU#~^ zIxpLtiK|VAY;!0^n~42!?GY<+y%W!RTtPWbbBu8!*V7=|)W_9kg>17dMw`g>%#-W+ zV_fU;%W-DK7$`^CUQN}e_V0-kBiH3`o$P0ay`<2Tygo2%Ol50 zjWJH-dZhoj;_@GGK=2>*Suw_mT#xi0XI%c{ESKYy#TX}Yf2997 ze{{bbXI6}HBG)7RM~};Y^l~{)S&VTa*CYK$kIR2_F30H?W1PtK zNdM8}@*mwJ$4QMbPUL!||LAe~4_v9if3%A+#)(*uCjCc?%YU>EInJRN<3y}Slm4T{ z} zjBz5@BmGB<%YQU3$LSYioXGV^|Iy;|AI&4jNsTd1#Cfer|Iy;|AGn}{|FDZO#z}rJ(KZWN^+i<`(}cJxj94y+g8oFUUrHPe{-DSmcb5tUmywU;l-yhPmT8lg{B1xx*okRDD8bB*kLNY-Bg z+2HwANDt?dSzmL9WPKyZI3GCgLUnpuWKT%en?W|}_X+8>42xV4lJz$%^?69I6F$`QNJpr*JfB`e@NClKsM^vhV?o)F_h zb69JU8$z;v7G$IUrjQ=;kWkxQAzA+tWTXC`kX}1rkvl>%+;2klPloi6gN6G4DkMAP zDWm*L`8=Kj?ob^)DI`0RLB{#$y+eA)Kh6I2J|Wq070Ad(&j{)D0*lN;vcm(i(SLDB zua6S4+j8`E!OtN^Qop{en-uzCIULI}EUU2eV>u7Y6<9W48I-3U1Ah|)80*0Df+{s9 zmMK`KW0`|x5tcqIE3ll7<)c_G!g3Xsjaar}*^cEgEH9{1cVd}>WjdBQSQcUF!?FU) z=~zCBem2DjiNNQ?N|OG6%~dEPYs3U^yMjN3mRlj*VZAZGJg6emS=J z<=FV;*yfjO_!Zjb zS7_r`Xq#VVhDi8~GXzBW>*~4)rHn!j%(?JQ?4pOt@8G+K>_EfYwk$s1)G#^6szP9WBZ#@44elSjj80$;;!M|S20r9uu z*_oDc3;b)~k3ke)CB#o8@v^RbVvgRHlzAYmOa9cm|F{0mJGF3E95d3?Ckj}d_@W^8 z{#RH|$FiHe)5?ec!XLNg3rVWXoqEW>D$l|!>tFUGE>D^@>Y5g-CYfCvx)B0vO) z01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO) z01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO) z01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO) z01+SpM1Tko0U|&IhyW4zA4DJ#)}pcL|ASUUOCbV8fCvx)B0vO)01+SpM1Tko0U|&I zhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&I zhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&I zhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&I hhyW2F0z`la5CI}U1c(3;AOb{y2oM1x@ZTr!e*kSgciI2| literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test12.sas7bdat b/pandas/tests/io/sas/data/test12.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..e2a9db874948d00fb14f4f9e9dc1ab2288330aff GIT binary patch literal 196608 zcmeI2eRP%8mH(f6lThl9)nU4_9M(uRL(#!XX+qvBn1cjF1c?xlT7LaSjSjU4Da9@- zsFxSzO%ae*XkiM1ylBJ(0t5(f6QWIB45CC{1R+3>KmgIica+~g=j?Ny=RUXh+B<*D znwj;zF7DZPKj-yx_TFcoCpwP%sXI5dWMk?-zO`dvN9VhL$#|yAb)EmQTQ?`On-k0k z*lJ0E2z_SYhuxeX(9UZ^Nc&sorNSnO8?R4n>5D>G&un!r_y&PEnjXXeY%!EEpeO9(le;9_^8P zxbnGnj~w>Mk{hq(+$fLS-IdQ?^2jwFd6q{m_Q+|j+;ZL{S9|0c9=XUPcXj16Egreb zBZoZl5Rcr&l|N|q$Q2%Wibo#gkyBmybdyIe_sEkxa-m1=?8?nYJ@Q(QJkcWykKD}Igkx4W*)3nDUFZRf# z9y#5Wk6-r44IX)sM=tTmJze?O=N`G=y-Xm9g^+pDYTC} z3j=Cg$4Na(Gop`B-|koA5iR@mi(=@DW9U<3=o{aQ(SHnmaSVNG41MF_82!i47st@2 z#?UtsQTY+-+x=P;Lth+2pBh8ocu@Bl)qf0qaSVNG41ME)82!i47st@2#?Uvu8>9ai z`r;V+)EN54#u)v_&=<$hr^e7X?vK%b41IA7eQFGS<2y0>kD)J)p-+vWZ+tsO|1tE% zG4!c1^o{#s^dCcC97CV_EqXl)Wr8gHYevWNuJ<+<=%05BRkkk|3ocA4$o|DP)@Nz` zH&?Du<8;;qj{&?7USE=nIdRi~fxc#hLY|K49Krib5LF<|o%?k7juZbbKv-_2t4zqrw)^`bu z|JWlx+`rRJ>kn)Zm)A)U%ej6(tsl8Yyf|QU+t-b}E>W!Y*VpROtSij@-@9Afd4K0o z`Bmp5=Tl!UE)mV#*>7uoc(`sqaf$c~Y5SF2;QHOP{>o0~(4zHboSGNKZ^pGX$!|NTS*E_vh43yhD=`izWKIxB&T4|nbCs@z+`DR$OQ|+@ioMwH0-H+JF ze({0U3*O(!TLd}XuxDS`%ldnCecI39O*5}Pymt%tldJVP)nY$+wb(~jan)mCGQ&fz4y8Q z!Mfk>3&oA2pR4<%2it#WfYvuX;NzB|^?|VXw1;e9?7y$p&)*2`&zG@Zoa1rt z(*5k(CO(#7ZxruunEm?Km14YdB<2%h4mF#_&OiV2Pu}lt{Ah&kr^iNNjXKi&q*W`8 zwbN)Jx@n}w_Gr~su-0ugwz7I;pKJH>e%ltdj%sSvj{f{h#RQ5pM|od#JPaKUvtR6B zJ@Xv2Sv>o~I7yB<^Eq;GBkP%)@eA5FWzJ7T?qjH>PIEQ;>hQkYQz1T=$4foObF(ic z^CIhqIPRs*62I(IU8@LqYaZRhoJ z+*mwMs4(Ba@zeCvv{MYCXuJM4j`L1u-TyTkBj@P|{kzw0w(}1<%a#~OO zq0h%M?uY$%&~EYN6B68F9+&Nb>R&_N(qZbmT5rx-(px*VLJT{!&#mLwiq@R1Mp>_I zVQo2DuubhF)rV~w{oFpXEtFu@j{ZDy<}1n|%z4pQb1bhClhpxi@*>VFmcAselgID1 z=G^DJC2f^>ouVW;9`j^-zVj76N6dN6akyczP{+$Sj(cIYlgvtv=gi;uQ_CwtW+x55 z<#WopP0zD=8QX&|udA;R(%6PQdyx0>W?ldB?VQilvOZJmYb%91);{}?=g+_`!Cwy>!0m8?RVmjOXP?BI>dRxhH~~_b3XEVe_kp6U8c{u#vV9N4viLS zng@3?hYUU5xn0beIhc60J^VM$7i)*g1f#vfYHUSo)b^2TYQ4wWvK4Ga+lQ^JG*)d} z*ynbzNR6%N=uhF?vE(sqKjwZ>vrNcr;KMcM9A|$ST)}Z{4|BUgbL;jRpX0nQW`8^T z_R-`m2UhBKrk=z5NWb;{!q@}nZ{xSHzpy^c_7GYqexMw~#5d00_HX38dMD3Q$8+bg zDqlXy=i_(6k@q!||ImEMNh#%EKXNW{f0KpGCdYl>+?)7(>AZ~l=e*Rcx3NyhENbi- zzV|u3^!n+(qR+>(y5A6=M{h0{Dm?7ue9Um7G9v#G9G4Ew6DnFwG3ODV7xQOwK4$VI zJ~#J0FZA)n^-bRO%~qkp;Zytdd`!Mt_Lz{-YUiKl@cMb4-PW_PI4!tFbNYBinssANF~4i*NH6+tiL4Ifix@M_qqMGEcK#Q)`93*Kj;H z`_-_5&lh8#96!YZzHhRfn0)N%)#7)mqbba}x3-h6uL^mJ|GvN37vA^5<&l0A;(X~~ zUF1GGyzexxi1!l4k2;0CRb7xc z-iCPG+^y`t#vj>E?x_=<)CJ7=HT!!@EuUZPUm@0~REUW4-RP$*;d^>LbK`ToctzxW z-|Wlj9g*BvA9|;b@9CVsh4>r^O&0Q&NExW^-<%8KZ5$`x;Boo98@x*BxWVV3;Cz6_ zc}RWCxzFcp1Km&MrCxrl<;Ji>U5mc0!P*wK4=boT_Lr?_1>1+M6Dim^Sha0o1>2db zV_P`-^TQ)wP-bfOh0ocqckz3k66SV`-j5rr*shqH*{@q_cz^jmYvSs%Dz@9r+z+4c zt=ojob2#4!alZY=3Q?pkNG4BUyZWM*&o#bhnfpT3R-R{w`)B{6d8uphSQBsg9BluC z(D}CUYrgN?P%Y%!v72VHotyrPm$Cit)%~0I3`aIb+GmLEr^a_~8a|VE@cTD&PVsw^ zwTpfIaNIb*P&_W*PTp?LaX#0^tc!fFXwH}G>i9m+>vDL%IzG?$j#+yCg5RsGUn>@? zYp6K~`JQ6!-@FIVHsTC^mGf#7e|TRW*)9gF0AcdCEWMs{TZFYUYkt--lpR`wjTWL? zSX)+=BY(+mttN5|`O9`6StnX;Cu{}PnVk%yQ0dr5od1Vz)$2|FlTg>1p?ptwbehCM#_-yhY&JTP3L8#XfeR-ZC=CCNt{qy^FGk(QZz8^7XvoD9{@VRXAK+fyl z4U3EF^}jg>Ilpx_M&?VV{{fY}kH&xbeMs4KURMM2=Xr)h;ym42pK3MlOZol7y6r-r zFD7s2`J}FL{l(g;wL_(`25Z$;uqJ8+TUnW~?a>OhsjX<6T0uRs&ut6Uqx`auYzx)B zt>}0@hNH==cb0QLwvD-Qe9KwK@0ZMZ)I;0Z*d@L^Ia}+UBBA}!*iTQbpR}6q^=5xL zUcFi&BK?c+vpY8Fc4nTYoL9|xxbahd5AgU_-+9qnx35^r@4YMaxk1P zzn?ib`CJS0Jpa0h0zxOclc9b4BmENza7aYc}_?)eJEplBBpF0ojii}qw z?*FPfUwt=kTt#mU4zA?)?ar;b-iMW(cNu@@dq??djwj~tVw_J--o*QE=9#JcS+axw zj^@iN3@1}M>|Z~6$^H9_HNQx~nx{2bYyI|-wN)#qI<_O*)IPV5ti}qea`baMm>u5M zQ7vqHM}PGD5I&d9x^s8>@+gk;=D8`Ibj34ts#eIq>h0_H3i}topYQofh<;sj&EkZ^)E#Joy+{4_tG|wCr@i`rv9?Z$o&%9 z-$VaU$MfNH+1#_zck{W)@z9*J)StDwXggFUYM)z!wU%i$RePHtvOIhaJCJ+(nJTZKPNuf0o`WhLK`fAt_Fu8u8iQ!~Y}-NKtch+th;n zyECVW;;iH6ntR7z#4V)wRhm>x?a=-GpIjsQkz#EVEq>dH+t#N(c0%)e zp`NPjRjVi!eW=Q|OH}1)T6m}1x9KXC%RIzJ37UD`KS&&}^GL(x51=+B%6CwsI^zTN=?p_Jxyx-MhvAYn|A2+ z^X^v9qvr>CW2JpG-J`56iX|p&q~?B*X^Q6@?3v4Y4#{9{hi*1ntr4?s*Lns z#qcoI+4NWHMj2_R!Qvk3rZPZ^6AQ*ns7QNVj3ULxR8pMe-R(|_*Q$)c-@bWcSLXDm#`?l~(Do%QmQm*srPLZhHRGkJ|JGdcS9?4J2c@z{aCC`j2);t)NUx zRzClX)ybcy7TBLwIHgC3v^cMEwzwuYr=X6v{8qWDKc&;E45(ZgSA>iirzDw`2bJ?p zE~6?3IcAWzYk0;g?j}Q;NrrSFyI{(N1!E}6Oci74chzIk87IrgCFUNI8%$3pRQ!vM z2Ru`bP@4~#V?MQ+E;%;v?4*H& z!lXFd`qZ`6cbyk^kYf8f3Zo}?KQvX1u`0FgS-ZtL(#((iEn`ZI2*M^It?MjX0vC~1 z;TfiY_$|Kk>zpABqf``>wBPF(#zT5M6a6A!=3UN=Hqk z<`&YNXFnex*;g&2M&^u@Uc9Z9sx;jtnG6@hWR(?fkfvD~N|6pqwO>#pV@;i@(bg(b zg#4$;@v(E|hKU=f#vj_L6LVb17%r-Bpf)B@$xfCPQx$FGoy47Tkias*bsq$)9sR_jw@kd9fDA4q?_ut3}urYhl1k{;(*g|bn#%yV}9>EF&ZdOQpe47%UH`ZKYi#yFrls_p|c}J!y2Pog6 zIIE-TD&YkppOGBG$5~tia zP||^ST89GH7-p)VAS01Clz8pO)@_OmJzOWM-zIm|IR&*UJt-7-NY=}0Xf-D}n#-)O zVlXN8@WCP-{k3kA+NjPH3_jM>HgqLKF?+?@ovT;8tc1pv#wwh8X3Sy6u;zZl#0W03D6NnVOvDKGa0(8mZS)l$w#1tK(o~m@<^JDh>U$_NC=-Qlq!D?Wx7e zFUnOZDW;)xjG1?+ZC1f?c`{F^5T8sFGv`ycCJ7|R5YNk#_&BFV1||49}Zl{k|w38=ae7U8iZeKu(`i!>hB zV$Rc6uwV6+ds9x!ZRS)&TFgsJNpXRrjr6lsX|xuFhcZhQN?y5>G{-oss=Zj;K$??$ zBN`zYRc4arU7adxJA0*tI{ttyN2bEdnn_Wou4lPIo^2KKLUNF=Q8LhNm&tOic5xvy z%F0^m^+OI6lDzDCc7e9F;gqLRdiII*`s19OsB`M&X~v{wr{Qajz27Y-x=UV^T2(T4 zl?ni_ZlE!mbQYp#zEvji$7F`ywpk_l8;VR9$^(9vMkmhObxWrIQD!=H(~vzhtHbqr z9d{~!PCcB^mshpa(b8#lbMB+EgLBm8IImqDz~SxQK%@43VdUjTr#ulcMI)@KS&O6v6?~?1@Jct&hj$+DLrJrv+NQ=72 zN$hamF3K(`C!zyF!lfwP!dKy`Dif}dheKGV03$vZgJrMtN(^k3Hq4Ud_`ddX9m2QV zLc_kpqvTWUm_V_kk?(bMcNX``sK1@lV09+VTTD%ksUzurAydztO{92RU5*E-c(d^u zJ*xEP-M#2x6Ypn{w6XQd#yA%rA!k`Lkm~3l(NU$U6<^b!2Y3itJ6-yx%T_fUj8*Qo z`2lM64!eN7KCWre!-gmnNj`H5$~bM4VN&cKM4H8%|J)^XU5dVZ&+n!qws*TZ4A@&0 zA40xp!#l0*KsQ$gXTL5eX_hi4b>~vQeKq-VKd0awz62`P)zfK>hn4dTlxK=~G@TUY zIFN~9_)Sur<}0w=u#JM2`2lBw(r(0-$)qvwDrDlP9e)d7y5z;BI)|#XP?B-~(ETbu z)m2U^TPvXpKG|BOwY4tPVr~<)I4|Fsys=iOIR5fL(!8$jX}^)p>3Tkj=Jq~c`Q=Nb zh25!2vwRIR`bMFG-viQi%mSq^hvi-TysXjVQz3)w26?8KYu7+h)ZIj#yu~p}?d*o@ zsY)Z;7-dO%^WW1iBlJ|ZbfGHd6_89tmVHi&b9Abb?W^}t+jdd6C-mViUo=s`{Iy&V zXVA)>Hp>*joPZ;!myJEVafa>X#mGowgNha0d`4S_sxQAzS)1_&7dE@u%7D~YNs6i% zWuJ5!agrx*fXmxywkA`cd|hMVH)Zk%ZR#a?A73jonvY&qVQ1wrTJ#zH-a+R@tHx0` z=4v62n$a&q&rcV{=@fSHY zlD=Cm54cvo@W_+%wI7iptP=GhVk9ZTUFe`_tlKl~KLoj|*m8iLFK7RjXYZPkG)S{N zW(K^d4YKxnZR*nmWjEyi_fm`VGLydlP9cXVucsEA1nXRS$waDhfiDMgl$}?oN{gPk zHl!D8NOM73IVa#>lI9{?k~$VQNdOl)ca~neM7|d|##x;**>!#R^$qVK7jj#hUbBYs z_XyIM+z937DrdM$U$oL;%CGeQ*1^eRaVM`_`RW_{Cc6_!Y4#Th9VR--^j#*u-OsnQ zDN$`k$7%Cj>P!B!pAJP%fW{djzunKbj3d-Ied3H0y`FT{re{KJ(o~zSaoVKNoaI0J z>CjoNr&B`fapVXom*b2RwVsqqs?GU?+BB&)N8_}K+MkqtawREy5?N15SdCK?XPoHu zRH!!P3AI_S+ANLJCVD;d)q0*uXgwh{&XhRgM6XAAPRiJX+LWkqM#pIry&mO1DGB*c zicsV9i8D_0dX)d9B;-FSX= zCwe`~f07gOpX3rX&geMfMDLICpX7x6Ct0X*`otM0dOga2k`wZu#u+DSe}c+?f(iLgFr>zr5@(#~^(g-d zCgeZC5;e}~IO9aGNBK`MA^!;qHBO&6<3z7V`A;w*{|Tn4ak|DCCwe`~e}W15PtcJg z1TM!JCu%(bY=0@}EFhjZ+h6oapr^{|O}IKY`_HoTYKb ziC&NLpFl$X69}nsrorws_NXUNzLXFcW&N$KQQT`K1 z$bSN9YMicd#)*p80p&k|g#3pt=;S}n61i_XHEM+FIO9a^kE1t!IiFVN#rNqW4F8 zPH95>GfIt69A}*9^$b!gDNJZRd1{0#|2S7JCs%u|$?2G^x($vZ?RS$Mit~=sQU23< zr?W@y;>yAMNH+D;T)or7Bd5D^@L`fo{XAFi2#-9-m4m+_Ifds_z?|;)dvQ7$;^|^cy|tKJ#vLB2QQLr`mc8Nl!v%&8(cZ~ z8Of&pK35+&0k;`T-+jZF?naflzyK|YvWg(ZvT$XzOhDR>*b%#mHuI1McM)Vu;vwQmwACl(# z6!t6bH==kzQUCE{AAMw8hI8wH5ykSK{@vT@PL1V1{X5h5Jj)q6w6O4JKXa5`*U9$P z$?^4(>wBK(`EoB_n_rfXUzTrvSw4PQzWHVO_+|O#m*wM^<(prYk6)H=e%U^L*}nN@`}k%1 z=9lf`m+hNhwvS)7Z+_W6e%ZeH<@osJ_~w`6*JT}n_sSvU#@R{xjufmzWL?)_~rWMm+Rw~>ziMmk6)f|etABAdA|AO`S|7e=9lN= zm*<;bo{wLiZ+`hce)+!n<@@;M`{tML&09We=C0{cT4{%{Wb7L66IS7`H@My zs;hoENnJcCbAMWw`sio>y}sw4?ek0mGYaTe6tp_|MnUfVuehAZ<#*JbR{i)pzB+o| zkaShKQz!MU^DNG?zO^5Dc?x(luD{vB?3H||t&^(}gXcUggLl=XUCKF zHv|v>0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5cpq2;3`^6z?t~JXhm2G2!H?x zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=9 z00@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p z2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?x jfB*=900@8p2!H?xfB*=900@8p2!H?xfB*>m=L!6OLuo81 literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test13.sas7bdat b/pandas/tests/io/sas/data/test13.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..b1dc6f9f8eddc611a3c18d412d173660dd8b120c GIT binary patch literal 131072 zcmeI2eRNgTmB8{tdpGSUm8a}Tmw-Ne$$(z5Ag^)=->8hHFi8T|; zCf<}&1ht;^|FR``xAe@*e-_-C{@LZC3jrY@1cZPP5CTF# z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF# z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF# z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF# z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF# z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF# z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF# z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF# z2nYcoAOwVf5D)@FKnMr{As_^VfDjM@LO=)z0U;m+{;v~QdgkSezWZHQFQ2MRdfx5c z*G&0HFW#lb8w<*j4C!OT_O}`NSW^6x1sC-H{E{|Sfx6Au3;9R5!@{2zDtKko2<+~NPY!~Zdd|6>mS#~l8T zIs6Yf{0}+&4>|k~Is6Yg{0}<(4?6r0I{Y7X_&@6Kf7IdssKfsehyNoE|3@7Dk2w4f zIQ$Pd{0}(%4>F~eP;eV&Y{|<-$9S;9H9RAtP z+b)0b`f;tJ%Klo-!7Y+nc@A#;jsH;PFaGt9RVCUlTzD7lM-PAIM%~2ds{3p@DDYQ} z{?hcF|Mhb<4yt+Tu|Bx*k6-ihQ)gDJRoA3JSC8L<_U^WlEkAwYsJaAHPd!VB1^DEJQx?t=> z3p>B6ri1FK))Qzyl`OtWk6HaK*?;*-v}cCqPrk3FUd<(Y20P~WYzwacR2P$d!yc@^ zGQhBUL`^39Q`pcS?D_qh>!d6`{wIG`oryYjibNxdG1f^7PRlU zuSOF+t7cLCM;4;}*FiV0d4>D)!UeRSKn@(|XPs0(c)AO|!G5~lM*C^N`2jU21G;*a zIG-MK$H3>F8KbTOv!@=Jfc5)2W}f=zk94Rj(^!8sKA`5CzIa83b0&?4{wURmR&K@nUWjEmS3j>_g}#cmeLz3;M_=vY`I&hY?L(Kop z_SYW%(X~4D)OBb#&(q)V+P)9o^Qok&foh)WsK)vyg6T7Tm!kR4^^d%D;NtB=^A&yZ zsqQuDpn)mpyt}_YFnj)RFs)u*#q-wW(MvdgxZQI6B=irxZ?4X;+tt<3+*1!?g8PHx zf7AUh^FE=brlCOH&=1l#+{!@pR0cmF{o$bc_}{9_$i8+d*6+P70Q987boNyDr_lcT zinX8Cp?Myz#r*Pn?!V$+?<)GZqBo-HufJb=*CC9qHc(z4LA&}q#a(e8{hy?~m`V1H zFQR=Qz`XOnRYL}JwdU(+e<^q?ec)DgbsFvKDeq2Sa_DdV@;_csv%##NJ3a`W)ZytD z%xbXkJdNH&n}#jykIIj^rz zWBuoYW$m?n{N8w=1?_!X;YF{EpBMVkzC*uk?--b-MuV#7$%o*E`>Dvjc_Q5S{WQ;K zkiF}fba3Y%{uC^1YBs2z>ZElLm+?{RnxK}a`TQQFzlZumoYm&L(?NmXNzd_GP(8K& zhQRFkJN3&%zan@&eG1o8?kCjBzF+)iS|81Nv6-HGORy?`=WR7pK?~E~-+fn+&X^CG zb07I4Gn614jRdiU_o@tgU* z*WL;|{6ScL@8o^O6U)*5>yHEr@JD%{@OTN?gS0DoNzGB9tEcgW=J(M$V&20oTe1ET zopgGEQL{5xe@zA2cjNoqa92;`^YxE?6DErK98~kvib1U3P4fX?`+8n&LHk}L?`RV#iI=J%>)BB&+JN5JLK?T!} z<7Pe*w$I6cu3jtw>7TmjvLNG?52~kru@>$9xbCpO=N>?NU$Bh7xt{0$T1-H$H!}nE z)VE5o{*Fh2>)+H>X|%VJ|3S*%2C}dDJFGttZ1>LmJMWX{VY$Cqzm;@HRT)(C)Qcyv z{$b*5-dCCD!OpXeCtTPP%?`G3MV{1L1kN~n*frm9MfSm!!NO*4!Q~t*Y~hM#ed*HK z(sch6{=Qg2>+F%>tK-w}<9unu4T-;v-eZ>%w_je2+;-qP&d+V_Xt1+x^5rqy=d+#m z!$$Y}==)+8`QLCW`tPHBG4tWUG}?CuySnQ?%=@=>yU_ju?W=J*d8+*mw443pI8gO| z={;yS-)r<<<^G(}n++Y>KTGxRMCl(0R@rM0^ZK#^$LkN#x@^YV-Gly*YRY;irEt0N z)Z7f(-=OtTr9oFKUq$=tLDafN(+?wkv;Q&gr3ZSk{^8(r(G| z!cz}zr~0^n@xJT1chSB}e~joCqIqwqJDSn{L%>j{6sVqBP0#%)%9r{y6sY^pg5>GP zQ1a1pzQ8}`BF(H8$(zHn2H8je9j}`d%mz!ndjAY7mU|G zhTk>jIjKi!UD!?Qi+OMVSYdrw_4W7WEY#JLKS29p_&oZSDhE}+KXIb^yMk@_C5L!D zYQ=oycvqyMf$H9ie#~>E=WgopxrE81Ofl9!NqILV4Z7-h6YZOV-S#(srD{Rd>kEDe z_%FgS#wax<4F&9f06vc{=Y869HBj#3bRKQKBUJZwXzzp1uj1@?^a->dp!I838g$iG zh<3BS8~Y=R(Y}SwE6n-BqquSP>3iSwPi-24`bIZWb?;0E1^yoTPHzC!Q+J~DVfXpL zdw3taF{5ki`fxg<-;-v&dz{{rr|`X@W-F*?;`qnD3>SYNZNPcLQ1SIxkXY9sJ2TtUpBi?YRnc)qxfKK7GibPsp3E!Uf4cL+5YRP|Z^Bp0LrIXbYapP&upxZrKu!5oqi_ zTqkPg4)YB-FWa)lW;2!~Wn{^qu3p&*{Pi$UjQ^*fCH|zpt9H z{$4uQqW2Q>AJ&I&i+bMW^XhxBoxcZ;s~7P1NE6xH>AMN1tEcX#{f5aG^PXJ29pmn& z?|L(To|-}Zq3`G!3UuzzOLXo^cGdplP&vPESAG@!oT768Tt4*qGtFCbu4VGBnfh~@ z_OUh4+*3c9fPVJFXUb)q7nx;fKaT!Q|I~ABXg^BdN9MWwkT@Kmb=mCeGP|+<0Bk5v z^LNTRFnd1jm)QTBsaXFEJx6nnad$J?x6*mt9Bi-B8MN;UPK{>{^ZCQ`tI>Xh))&*C z$8lottmAnaw(x{yGvDwO~#ih|9SJcntn<5w`&M*@|dqj%K{A zFCd4FxF5^D-|)rgZ=v(0@lZ{l!{bEv%|5w`>^N;b|1{+zP8M}!hnsVN=jglMtQTeMyj@OL!RUt99~@QF`CeV%oW8=js#VR`m*5zke=fjaDwn{qi9 zv4wA-aN7KQb|8(#x$mIL@ zcZ#R!y-MFfy#L?)ZLEJF_|wk~V*=B;Ozu)OQ#e9ceOZF8w!TcfExc>$3&pYu$%->Z37k~e5#)-t^ZAgP7mBAN< zPrutLnz!mj+Gm=5ti3-pzk`_Xf>p?e{bO3`b71VSZ@vf3{_1Nb5Z^cZNla%?t?Ebn z#^Be)I|kU_!&}h)8s+IEP(8KrEZTp7`%kXFhW5|qc{M7ir@H?<9o+b4T{iEx%{T%4 z)3k2RRY5h*U!HZ*c?eUK%Xz}`%w!ASun%hrn}p$}e8cWd6_o6e%h?i+5mbjd>@n*o zY==?HA0Z$Fgn$qb0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@FKnMr{As_^VfDjM@ zLO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@FKnMr{As_^VfDjM@ zLO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@FKnMr{As_^VfDjM@ zLO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@FKnMr{As_^VfDjM@ zLO=)z0U;m+gn$qb0zyCt2mv7=1cZPP5CTF#2nYcoAOwVf5D)@FKnMr{As_^VfDjM@ zLO=)z0U;m+guwp~0`IULj1~@@uTA(5A83>yZu;bKPG1TLOsUdA74eIVEhI7j=%7H5V`PN z%=nS<7oLFjgL${_h4wu$+ee;%;nrZ}gKAe+egQL zFO2{Dd5?b)#9tUQesuhh`Gq&;-M$gxH^gio9Y5rMVcz*)SPSthW5$n;AM(F2@BA+u z3-QOqj2|68re+u$Hm3RK9 zdLe#K%=nSEOpUOM`Q;Q(}!kF=+ zbjMu^`KGk$dZkpHQ?^FLJ!@hfA-kB%SmKb3d>r^Z73F)`yu#}E0R$~*s) z=Yz<}b1~yb#-D`zPv)Kf$zF)x6El8f{YygrC-ct#WCz4=j~PEYe#rl1-ua(wf%wZ} z#*aMzB;u7^ApaA2=YOID z;sP4<1YS@G-c7 z{P+63qFihI^H;A2B5#iwKl1!N%=}Eg=U-ksL~e^2Kl1!N%>0#kw_gS=m&R-#9Y1D% zI`8o>gviY?<42!AX8ydqpZ^SqTpu%jbo>fNJ}K|$&Yq!sXt0OD_%U$**cXw|#B6+*6hwj3%dl$!}=A zFsJ`B!sT8tzMT5r{IIU~j^X$V!sTAGr3dwM{3Y+Q+NZ6x&eT(0%p_FKc{-gZmx4eQA#toHrka{X=%@$$}w_2e^J z&wYO9!{z#ikx+ZRR)qBAI<4pQKQTI7uGgke{X|JvPj+fOxBd8Vd7{+PYr}f-C9Ay( zmnWuK`i!uid`;`Q@f*YCiTRekAgm|%YCX4obGTgZjl$zkhxKH?)^pq68!k^Qwe*&- zo_s^=InOV#GF+ZmW$A5UJ$YK|x$!%~<%x`?Zw>3o-)TL!{q}IV-kXQV-yhafS`D>7 z5iU=hwe<60Jq2&`?E44aqq*;gQQ`W@(P2IH0j=lsUw=!5`kO4V^zmUm^&ze2@K2V8 z%k{Tm*q;jPsS>T{#-A20PtFK**r4efbK&0T!i7}O8@w*@-UBz4B&U#UAi03#B9ix# zY$3UdWQP|J^SrGjdr1zH96^~VCOMAe1d@|T){~q^vWetkl1oS~C)rA}ljJs%2T2Z- z96^~ZCOMAe1d@|T){~q^vWetkl1oS~C)rA}ljJs%2T2Z-96^~XCOMAe1d@|T){~q^ zvWetkl1oS~C)rA}ljJs%2T2Z-96?!7OmZB_2_z?xtS32-WE087B$tp}PO_C`C&_Ij z50V@vIfAmVnB+K;6G%=XSx<5v$tIGENiHF|oMbD>PLkV59wa$Tas*{jG0AZxCy<;( zvYzBT>p#hOlsmsKYKD0p{P}Q+bxoPvFnM;v)EQUY+I0KPx0HF8Po3Qm{6oFmyF@n% z{-IuBl~;Pxr%##kg)ewu$2wJ3oodTRja6Q2>2;P~RvPX@Sy{M`W#!R6lvP-DDlH#X zR(Z9h*I0V3)xIuVUS4YHRv*jD!|lth{*_l+?XCWmS6l6^{*~8S?XCV*SPUzy{#BHR z+gDiqtFZW0SpBQ8_*Gc_tFZW0SpBQC_*Gi{tF-u4TK%iE_*Gi{tF-u4TK%iE_*Gi{ ztFri2S^cZB_*Gf`tFri2S^cZB_*Gf`tFri2S^cZF_*Gl|tG4)6Tm7rH_*Gl|tG4)6 zTm7rH_*Gl|tFidiSpBQ9_|;hbtFidiSpBQ9_|;hbtFidiSpBQD_|;ndtF`#mTK%iF z_|;ndtF`#mTK%iF_|;ndtF!pkS^cZC_|;kctF!pkS^cZC_|;kctF!pkS^X<34K{=3 z9ingf!Zm1wbY6*4!5%DoHh{(1y)8HYlW&S~Z)ny&6F3o^xa7oR-K>!mC_yJ1!Cp4F zjIPW+6wTURX!!#@Y2w=d7qpSu88M&yy<-f4w8tOxm+HNpTTkCF3hT+csNP$-_MGv8 z+19gl;cOtQdR|MeV(#tswr=Wa*_Lx3+;&(xsCIjM{5fs5=iCRky}#Ahg$w+DZf~UW zrr+r+$9|UVH!qcaJIrA;yHHGpm;oTncmY{FMkTIt{*zNfmEe}r+Dj!Afn83#v f+`EH2+~?CSA1r_DPv9(J@A$pdR+C(&q0amd*Gd0P literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test14.sas7bdat b/pandas/tests/io/sas/data/test14.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..5a958df51f0ce029ceeb9b0c171c9508144c38b6 GIT binary patch literal 196608 zcmeI2dwf;J_2_5sb53%Is3GK9s!@-TT3S)_ekgbb2^e1TNQ^0sG-^Vr<&jd-4;506 z4-};+AYf2Vfr>n8)Bur}0Zu|x^jbyUZxSAYyjy8~p}A{j*4~`GPxt4Zp8L=J{66dW zf!VWWX04g;tTnUtM#e7xnom7ZzACxXryFK@S%(kO|J>!q#7>#1EHjho>6+QirCH>e z+Rd5ldd7S%vrAF0B46Kr1N#*BDJo9S^2Pis=vCZH3>Kz)?R#)`&)oUf8D;3R*>Xlk z$%J)t223|<`M0NKrKJx_ANZTG)f43i^;gF2Ik#tK+?Ji2d3)~d>9<#gHOa?`c{#tz z$+|4PZhmgh2ey@#8h>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpMBx8-0yECuHvaL)oo2b}L1?Ll|L}Kdk9u%*W11lo z<)skTx>)7kj#6^H)GGh-fRe}k*($%VP|00AtnyO@*uTsscPlyLR{0k#4)UiRSBBM$nHIOuP4(BI~uzs*5^ zn}hyV2mP%M`dc0Jw>s!Q?4bX!gZ{$~`VTwkKjfhQkc0k14*Cx{=s)P7|Dc2ZgAV!+ zI_N*(p#Ok_{sRvB4>;)G@1TFbgZ}*v`u98N-{+uzpM(B=4*K^w=-=z0f3Ji7y$<^K zI_Tfypns2p{yh%*_c-X^?Vx|RgZ|wP`gc3%-{qixmxKOY4*GXF=-=s}f2V`~oeuhU zI_Tfwpnr#h{v8hbcR1+Z?x26WgZ}Lf`nNmi-{zoyn}hys4*It_=>NsZX($2aq`QG1ujq(dt`Tw4V@&i(SaUoar50{|)xRn3U-mdB&e1P)9LLRu#%vJrp zQz$ng!tb-7C+R_Qk8ty8h&Z{Kei2ecp>)hrQ{_``Ct32{_^`B$CR8G zEBQ}w{sz|&>L0$D-}i80X@0{Q>wH@3_=TnK56FG)GbIlVQ+g)M;~#*uSNU1&pQ_~T z8hQ zq*k+L4i!!EXBsn`Oo`R1DUR%Dow?bOsoxj32*=un>n&94eQGW5E4FvyUgVE_(yDnA z&|Ljx8_IK3|BpiaBJ}>^mg6Yzt>k%L2 zCfuzT`k~>s5J$>?-cx!Ezbc*2t0B5dt~!YN`zt+NXYrqvz1aM(Zs`AkKB)f@8@F^N z*FyZ?-NXRH|9dL=jFo(ez%zai>PLUOwV8v_+g1iYRdT}*f3lMQB<_hhy{8#*I#B6> z`HI<;y|_>HH|F1E4Oeo551t!e2Qe;Aq1~ZM&!+YKpAhYdrMSN#jO*X6;NxTUIrTis81wUn5C8u<`8)G9GGnqu?8^lnpPc9uplifHmh;QzB1gG2kQCPMIyrRJU8~G`~_?O0G@lksr+|66L!B=h5pR(e7<`P z&Iiu{maoRM@%(Xl!5IFAaUB@W_kQvfSbi7Ri|0n|3tS%aTk!le^t5i|g>c%wbuXUt zEK%vdZk2VOjH~+Xo6*l(k=qa{54_dH<+XO}VYG|>*0+(1+Y!zb>_0&1`Mw_KvrEa5 zbKt4XTKUy1KeC$3>v5~W!MIPCs&{z-tVMZ4$^ z(EmD!mbfQ&D!Fmbg52~|^TELn&2}?z%#Nlzn-b~0<_Oc5BOlD6W|P!m4vlO#o6HgN z!4ykCjEy<*9b^#3ebJLkaa{wSBrjk*KN;o~^IzvTitG0VHAF@C}RFqLp%|9fQrz`vUC{5Sj@ z&+RAwrt}%}W@z_@kX3HvIlfI?M0?K!BM zAN|?)NB+t@@x!|0QQT7B#(Nz1#n|6O+ddk3%dXedIHMoK zeT2GtzcBm(^S5zpFkVm|LVpO%;@8W-FwQs3-?p#9ym}+fQ=RARu?AZ{iT7iCh>L6< z-eFXKBmaTEk;#(jkM#f)b$A2aeLyf?SL#MSkM z{f)fqhjm;Yhf}tz`55_X)ni4P>UqzTiFk{U-d*Q;yv3R{Qa@2J|cBX6)D9jkv$~K5Lw-!3Ol()u;#W_tWdS z%5yN^2w=Yb{vtj^z91QS0{YcAO?a>2dzSIO(6A2Y89@CQUobEET0Gh~Z}A?yIG3w@ z+lXs?-??Qi7hlJ2eHQ)P&|e%x|NlhkH@-9MUuE^r0Qyg(?cOx_Oxl3o-;8?-zmqJh zwduig2ZWKoWvTU?TEk60Gu_V&4C#la!$v+t zjxc>$cDB}pT2o@RA!??2YoAE5IbribDl;d8C}cYJ59A-XN3A#QRW4s^2H|@;Q+ZB+ zH(?%YvvlLT5c&gqmdnqX*?8_5?;~t9zBe_X|KoYM zXPPx$jB|49a(u7Hc^dvRWGTK!<9s;gV=y1}-Ss{6AC%8*Q0p-K+c<9@U5MWSjQ0!l zw^4Q0?^;IxtyOqF8~G3Bhu!CL`B|bT&NF}4#EcI;<%+A50fDAI#3ug-wm-gE`dfXbv?WWRux$j*uF~ zUuKgzLaH}Avap}Qk>Pqb)?q$|`Mi<0=PbwXOU8ZFP5IgAdA2+`TgllFuHw<~pYBRN zaS6WH8~cmr)tmL)8ejN6yWt%*&X}iBzOWYaa3fCfJ3!4k+kKIt#@Els@7^nvKLjvd zu6YadMm*0A{k@EP6YsSU&hx`}@LVK>Z2Lw*Hs%#>Mc}VE1eIy`9~o`kh~ad6yCIm~YoD!Se*;G=TZ!q<3)N zjd^A&J@Ynj@izPZ09#&Ra574U@%8i9FaP_C>3-G+(>+axHND?#GJVy2kbO)==1{ZU zY%(S0gX|pHZW=Sqn|o~sP=y@DYBkxbi-yan&UyBc|>{Nei)zW)XWZkG1NcUC5T#=X`pg!3`(S@he} z_B{4-erWx75VYHUnN>f-`+D$d>-(UQU-^D(%_oiTmaWUU{Jh>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm=kS(8+&1cfk8g&;O;p{Rf@#jk!8w`To`JE-y`Je3Tcv*mP|Q<7qDD z)p*mjB-xu%5q_g4(RSL64( z7+*aDbf)@Ge6shNXK&TH7eviV#^|4R^&P%zu+B@$7~``Rf|@7CzC7JsIQZ~gd?@q@ zeF>tay|s$rynqb`L9hS>$B!0$^~v5JasP9jK-72_h_<^jd2t9Pzj$ZYJPD}{IW5frfWPI6s{hd z;D6HBKRI{62mEfR|JEmLJ`vZ|%@5vj9q$FAWvw7u-*)d?$w>u|wY5)4dAS7ypOkf~ z;60#uec`+dP=6-lnvVU}2lW*}odvHgYxK9z-i{`C{UZ?=SLx{f1P>zjX~rJ}$fJk3mmkFL*p~r$@iZoBmTiECdGM z`4B|lO?P`bvHtu4(6B-SN!!d(fa}kPu7n4rn zKLc(tu>*ts6Z3o)c70d2fAZOxqq;J=XQM&V@R;!VNxb+uH}sjd2Q(eHQy*V{6E80I zGVartZw1l5&{Ihe#{7QpcJ>TJ`;L6*sn$vl%IC%E;?hK)Gf6W!Q z^b5^~`iU`~VEnpzcsknKUE-eeW8Ufc>csuM=RgD>JO_p9-& zf~745CrKIxP8#&A=mUe#SEuAd->0fmw)~iZ`94=pzcv7l=ihsJ$_;CWo=F|W2SEF~ z%RzL+H8SX3Jn|;kI@aZpaboize#Lg47u(y`?()sn=9;_y3hdni_gq%%nyc5k+Cy_$ zsWuGadjW(pY_wkHnXAKL+0yvvpT60DGfxLIo4yA#tiU+E#Ie-yGZ39-{X|^Vc~zh{ zj@A5UdTS8kKjnRdLo~qAqfT;f0M2p|0P9bKsCB4VMG_mv2iNF}-UrDsxo1Oop*E~q zYx)Kx2Vn2T)+V}GQi!jExbX+76X4*S(!qat^t9e{yn!d;_${Dv?nP)5C*;99Im5s z0kyTubT&u7zuGa*q=}hdbPwlLT-79?ZQ2yF4woQcb9lDM?K2#S+^#T^>YY&v@qP0!$ zs_B185aIiZ7b5nYI5;OXaxONg=?Sv5cW^^ z3y=`od}!#oB$!30BXl@beC4COpxV7wT)M1a!yVTy*!YJJc^;TwF%U$XQZ(?jw5zA< zK~LL$kpbMp3#;8e?>a~r{2TwUr6IpGk;5^;n|X%^Ky*l6_8s}K8gJ7{IGB9?{a<|o z4lu0RUGIVRHaK{b?)mP-m^s?K$xv?@AWru?L4mK+BH=He!D8QEdSQA8zTvpeTHsg) zH)-Bh66fEQEKed{JoPzo(NC&`XfGP!Rj#*wOaklbmnZLSSBV3TFRuKt|MWBc3Oa9T z;6-EfDRKVKT_gE@(6Xj7H8B92>GG*F8BgDv+ecR>GjR+51_E*>1nW@!RW?_j%f!vw z-Z~SWO4v;AFJ;)yWv{s5zSUtPo8(1Vzz2xU4T5V!ARiHT0L(;W4)wy-Si#2_TOxYi zP8CaKeM=J|H~FV>ft87{b*#^;tj6=&E{FoHpS+X~shlj8@nK@_<6z(+KEOcIag`RV60!q^^aa_a7NMHBh4QBC(SzGC{CYW?9}qwQ`F$AdLK<%N_5; z66LiVJO-j>nU3eC7RVF6z6C_Pi{ppOqvKtkD6*|46-#_{9H?#zTnnP}kVQi}&Wp!f zF+JWt;Hn$EPp%y%)19V&m2~tU2B*zS;JshDPlVXto`hrCH77oFgK)?DYp&W9NUiRC z8Zso&z@)cyjOU? z>V&!)&oSXUaX;b3vxZh{D}D(B+Xi+z|CeBL*%-~gd3Q+yxV*dv;Vkwame}Bxpm#}S zXC8W27niINz~>ornJE-*>;gz}M4nu$4RkIXV6+SsELIoFxc&(991%gQ#1UHQ|88bH>?JRps>8+bLM?PZBv0dq`4sm??rjCZXOOr( z)>bHPbH3`jHU=A!Fxm?*Ot(X0tHKYE;eg;xZ@~$B7!$U8SdmzAV_)bdbND(hFK7{| zPrb-VcGY;Rui=}YDslN6Z)0a?-k&nKjP<9x^oiTBDJm(?_def zz*|_~YWL>q8*2+X!xcSrLuppF>LO<|6qJxA7X0ynG^32yX!1H6EzcWW@T{$Pi0nx=hE{ zj*Ro4gj@3ALECw`_rA)6pngR7$nkRd_SL*%OJIZs*>D#yJFOKoo)P!``<>V_IP&88 z-^3=oCzJBu&(P<%=9fbKAijvfyEtUfv$}%ln0Tcv`x)<32EjgR#v?VJDV;_1gv1-t znSpw@fAgMGbr7ta0Rl{}d4-oH_T$aBg6LBb`tiJPjc4Ug08yHM2gc_>xP}t^`(7;j zFm(({cF6d;fj=(0o*mb$i-3<`Kpm!!s1zMeEHRoO8-hEVYuIradmU)=UMxu2VI)X zt!Kt9V6W&RkM7JMk)ElSx?sOk{l`Rd3|WPEr&)LrY_S64mV?|*?a|RL96USCCBpQA z_`I=9+{E6%KmH~!$JY+QuoLo4E-|o@4-f&ncr3`8#Y@TSoqR{-sk_7nqGK6a(N~}H zF4IMrH6JZ;1>?jAfhKVW?_b?VzXkHsr(|-w=^YU5$VljO<$X`HGUy{W_nf-m`5<|9 z*NNl)w0KD#kuCBfxVqo|Ifz0s4;;u#U|8XEp^+W@JM@qLE<%Q{fo#FQupqGmgYXhp z($bMG|1sg;eg4{MCNBoh+#0xSv zecl9+oGnNg@IlafOTUfnP&-+fI4AC}V&l#ZxPH{{=eSe5HGK=UbA1LTCqd%rd8hiC zbIFhCKJl_BqiSAvXg&lvh{r3kkiON|EFb%072L4DX-vE-%@v{TGj3@=Bw!-37XST9 z*L+2uSK-q}oNs2Fcoi;k{lFlpzJuvQ;ZsMcOB~|u^7q{j%>L^L0$;c=ZUS>N7FQ+y zzWnepAzMG z53EuAF@JH1a{V1+UOy9-J(r@iAF+PV1v&mq%;Q_-_ybXnkJx|DRWkE?ikN+w~T*x%<=DT zmG%!rX+L89ZW;gXnB(6alJ*;;v>&nmZW;gXnB(8QP}-j#rTvKW-!0?c9drD<1JeHF zDD6kCU&g;X=JzDEGiaGvW0cn47l=dUnFXP`8bNsu?rTwxf?MJR(#=k4(_;+z>zekkz zBiAqE-xYKGyL{4qN|g2^*DvGW6?6Q%n6Rv0iqd|>`gIxqdd%^!w@Uj5qO>1z|LHRR z^_b&d4@vutQQD7Ozl?u9=J?kaO8fJpv>&nmx{QB4=J?kG(*EQq?MJR(#=jnO{Oje? zep!_EBiAqEUynKdbuR7qh|+%K`epp&;C8UK3B@eiM@ApW&WQQD7K zzb4~fi#h(aR%!o0l=dUmugUn=Vvc_;B<(jwX+LuPGXAxg<6m1S?azNa{V&?wV2~yE0^}mqO>2mei{E-%<->rX}?F5_9NFX<6nz8{xzSppAx10 zi2F~I@vp@k|M2k);-6iL(tZN;^}(OZKNLV|GAV-V>$$i2Y|O z^0&nL{fjk9%WI>wAF=;TMgFpw$Iq7|Yoi<=xqcP-Q)6ELWNCR~l=dU{UqyaJ%==#| zEf+^=KXUzra^(dvuRl*(&iYUG+b_ixhI_Sq_PR||dMQXcxEb7`>gRRqgL z{UNA_kM~*ISvOlf&05zVg?jk-JZn4Swz^nN*!=Y0L){ZL{~^|PCe|0$pOstd`b4OQ z+aG0ZXH~Yk=pUv}f5J9CU~OkpZS@(}y8b-W!|m5v+u1@}eVMhczXbL0@%7erw$@e; zS?l_IsE3c=Vr^$zZS_6Yy8gOte5Uh`V(`YNc0 z%^&U(YyU2`y3bnI8=)RPzMHjOOS9GUtabf8+jwqm*9vX*A=bM73Dm>wms;B)x3I2f zl(nw!hI;t;Dr>to(N+&w>v}8H!^cmxwrewN^;&CP{{re^`v`2eyg=z+heP@TI=xbz&ie>fBm)`Jf(dztGyQ4-s{* zd0#*cekwO=Z+D5=s9$#$FCThfIS|V-EUT~#U^xTJg;>^O8DfGMgL6lftyo@ErRK&m z8OyF%W?@-?Wigf&SWdw5X)I@Bxfsg^EL*VLf#oqQFRD^^W0{O)S1hxzEWolD%L*(f zVEHtbv$0%^WdoKiSnj~`7?u}R>2hP4jAd6Wv#>0{vKY$>EGJ<3G?ufmT#RJ{mMvKB z!15TD7ggzYW0{O)S1hxzEWolD%L*(fVEHtbv$0%^WdoKiSnj~`7?u}R39c^v!jp_; zS1hxzEWolD%L*(fVEHtbv$0%^WdoKiSnj~`7?u}R8Ry0_8OyF%W?@-?Wigf&*8kVT zjGgb}CQ3gSe_dbHtLPWQdk@R-*?tARihC6m_ZiZA-012tW7F9^eTs_3A63s_U16~J zqw1Nq_AEANP(i^je!-+%^~tvN$+79kwYBHj>iM>MdYW|(>FL%vre{RXAwAR9C(EWI z+t!|AtLNJ4dA9NS*7l4vTirItj124e4BPxNvTWmR^UKJwjknD&BhNP8Hor_8!%W-! zGBd2>Gi~$BwDHTd%`el&FVi-^OdG#U+x)U@{IYEG%d+vyvdu5c#xKh@zbqTSEZh9D zZ2Yop^UJpJ%eKug+r}^3Hot5eziiw5vTgjbZS%{v@yoW&FUQ6&$2Pwl8^0Xe{Bmsk za%}U#waqWr#xK`4zg!!?T-*HeZ2a2D!(kPu9i%hcAEj|0lE*#~DRA_WRBdh|<1(F<m~r;D@s5rEeS>!SbE6!rG587?=5HGNttH+xHo;;< zWATxj8#Z2cw)<;jyHy9ad-04BI#Of~-%>Y5g-CYfCvx)B0vO)01+Sp zM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+Sp zM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+Sp zM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+Sp zM1Tko0U|&IhyW4zpF$u37oxEV|5KJk>mUL|fCvx)B0vO)01+SpM1Tko0U|&IhyW2F z0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F z0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F z0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F d0z`la5CI}U1c(3;AOb{y2oM1x@c%^M{{R|9#4`W@ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test15.sas7bdat b/pandas/tests/io/sas/data/test15.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..c028d8041a3d3dd2849b6b3bdeaa1dd94c22018b GIT binary patch literal 196608 zcmeI2e{@yVweR;i36%O6z0|?5j4Rb#ie9Xg67su(S!h5+P?U(&^7^8p*IJa6Vh0th zC!htPDx$5>!YwNDLlF}QzXO~EwAA5Mgh2Qu1QH|=Kr~o?6y?n|*IX-S@6$cr>3RRW zcgOd1khRy`Yt6ORXU#d+T-$nH`>*a~McJyZ-+ph?oKD_1|C0T?ZkKoYc1{m3C&!Cs zM_jXvh)2aPPv&dW;sS2D14pzf@P9=QL1K0lzS z_N*=A??2BovZbd-cQf#!wpP@qZ*Jf0;n~A}_RzzVxQF@EuTOrToPIyZE6C|n&?mdk zy$P@QaZ+La|H{v8m!7RH8c_4hsL}Rc`y2B#yQ#j5ezNlmzDFGdKmY_l00ck)1V8`; zKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l z00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck) z1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`; zKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l z00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck) z1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`; zKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY{( zUnelTss7uKJr*|0H=60q!}LG-o!@I7GJ1ItE7RrqxYWA&I_BlnbFeqvjQ{P7UE=ugrUqQ50Xe@lq|mJt0dA^MMp=szB! z|9FW0<01M#4AK8#i2e^l^nVzl|5%9rVqGR{hv=^l(O(~;e^-e9T_O5+ zh3MZEqJL+I{+%KEcZTTS8KVFF5dH6m=zl*%|GOtlz7+jQrZ3-n=QWXE>zBVhRpeK( z{Mf48kypZKzU$|iEZ+%DPIa(e&Tc=EY>wb|B;P!`J7I|&8$ggDiLyJVdy~-O$i3l4-m+Zf{dB{PoujQH|B;b@{|z;);K_ky$KoO%W#rxB>c8I6Khf{? zF!DWX)%j%~?;L*jLTNwH$VV+vFAZAL_Emkek*iW8ze473xyD8RomRKYMygv9z~( zzN}RjPm90(;vnG@zuoBBwOVyual?^Ex>{~G8hP(|YTac|wBEYzkkKC-Y~=lysq%a8 znvkV`ca@iqyVv(HcKm;2W!*Dj{5|8x{%qX6sXTBGt%J3?n(L}x@y`wBUO9<>R3}bu z=%2Z3=$YS%E45+ml=TiV>z%ev4c6N`X`k>HK7AinwM27G+YyoHoAz(V)j4W?;>u$p zzuw5zD%JGLpSERHerV)M_IL7HHTIgrw>8a=&M zDQDDv^V3$%5@)B$56NBq5~tCL-Gj4kCvlDWm3wd8EAg-E;o5LlC-%(m-zq0iq&X=2 zV&Y+JUtIiRlgNeVkTvS57ar2&gmZVpr(u=Ig`4#Y+BY`m-}v`2)>3Es*?sk7U+$<> zpO4fYCh^?vOWD)@_A!ZjSu@lx23*wea=F;ElK6b>TA7c;0k6pP_a>;v_<*tYCVm~e zQ|){AGa7lLtXJa3+@}?X`Rx)vt)9lsY6wN!6*own_qrJUm#*^9(^ID2XRFwGqi`F+ z^4N-6#mCy!hh08kH;OB zgFR1|nD>r9eV@@ks+;q%T=ald$!=9?q7NA6!@WigmcI$?x4O*MlFNm;rg-gUA0)8H7DA1}GLUuU3=! z0G9aaNnSDk6?M5jelN4S^TLDwQ_2Tb^nV`87uV=tjwBYDJ!7 zZ?Ydowq0BUKaPuZJv|0MQLvz zaFNXei%q+gugke3{$=$&y92b8XS}Aet z9pQGR;nw2~ImcyR?EZEM?4!+F_PlKR*>;}nBh}6Q!rFu6Z{ydBzlc08_7Iz+zRQkb z9RodOI~W%TVJPi7B%j8leF(+)=&2pb3UGsapgREd!gd+uvzjk%ZW-~{-Y!= z?R{Etw3=eiBRMZ-PnUem=1X#J?s!g_<4fAxyzA@rio@X(yUcuSzFPi>($Q-3pJ&Sa zWu85jiN7q7`AdFyc9wGH=SEO)4Y6kZvCipT<-OfpT|m!-nMn>uiE^7D0!w= zXyn7HB>&kW`!4NC|8;ypl6V`FaSPUq|5|?(JGrAyb><70^=t9>mRdQ##J^%9?@+0H z=ew0pnJ4%3ZNg2?@zNLl_kFuB$2R%7i9EKmPVVWFzs2MniA`4emPi?>(QnU%_y&oS zZ^^iF-VI%(Ox%!jP)R;OQS(6pkz&R|^+r^7vP8&`7; z-GkHN+V~&b9Guwoa1Txyw{bl@^ZVX`FDNs$`y%J;S6k#ePnmGL*6hbs)nZq|&Fe@*K%GVv=va^`aWV7bKe}h+TbAE9aWrv+RALdcDju zCi=y{XkL6R9%tjNoP+27piI7P{aWrjSFTg~>)6%P#m=q%(gkAw?;8E~JHvrhe*27x z{nP}`P0MHUCi(tm&nfv%vUF}hkHn2rbJU~yw7b!s<8rRuzf8WLNd9Thm&@zqJ}&F> zWWPE+C-;sSX8uaPtE^b6=JGYvo`Z5varSS&0~j0ehQ2O&wT(ZrFAr{1Lpea${4Lk4 z=j2-D?97>;a}2gaXRyf+$vvDcb7Ox^t926p7`o=v``aXo-Gto(D|3?(6r7HIEc|2F znf3Pllj3X5aJi>@CeMkfTFGN=UeVRGFQ2T;cP)Egkhnzrzt*44vh8;vu?KIu;?J3R z68G$V#2Y8~rfRW&iHCb<`TfPl$sNn&UN7^s_A_FM+@obaO7bz9kNNKUrr3|jpRG3Q zu=Z`^?E?$tJAl1kh`rsv-2Yw6w%<`M@!94-k{|Z|gW}H;17w~t;qZK1^dC3#vEx^+ zm-~@$w)?Vorku+*50t!acU+y}bkd%KlHYo({P~jAKd4If(fY4^4=JA}>)I~-WuEbv zIz_kECtB_IQpwwwZB*ubv3a}9r|Y72e{pu|?2skSV4c`KI1_abt}&ai)94;tSJ%*W zbr1Z?y>~rWqyFPwxgMe9~gM*W3M-c=dXv^81(EXE&`j{p>t#`Sd!;!>ymncYsIN2hNLrrhnyp z`R-k1>>(!pa>)wG8zr7w{R8c}Dd$>T=K0sv5?8JLh#y6&#UHO?qW_}0K>PN*aS?qrXn0w^w|m!_b{|zq-evt=^6lk|C7y_% z#w4Gdyju3%&NIj8nYT&l+w5C61@a2Z$(Ek@*Y{s(|L-r({QM8jJe|Qh>vyl5t-1$p z;}p5B?!9~EB<_J5C%<=tx#3+K?&11-=4ZZ#$hmCSU9dTjM@gKw?``SzWuCF7TBZN1 z-i{uxNnDce^Sxj5--9Iok3G6o@qbPnYvZKY`Rp}vPRTxcos9m)b>bIt|BXo=wQ!Dn zXPYYH%6EXaxIeCXlJAu)P~YLS-R3QF4qj8Ea`-3rDY^f~hHd5X^>IhXA{tM67hHzgk0bC&cut4p>+Hc|K9 z8LYERCvgu>hu?&g{l)2U>RlUta2nlvC-IM=TXP$yQSU!~AOHd&00JNY0w4eaAOHd& z00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY z0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4ea zAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd& z00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0{`C=m|1+R;HI~h zeDt3eE_~fKRc$V4d$M>n_{0lS^Qd$9mmQBKB2>jvfCWjqQ)kv+kKN{+{t;e>U!3 zdeN-kHk+i?a9eUWhi|l(cKt1l0#VvYE%NMEZ^Z*~VO)qZM7JbR|9aLYu zoD>{qTluLPrWLH7tVb00%AqEk=24SY?Xe41@m6w z!yElax1;WXO-naE^*bli)uv}%)jc$s5&RaHXq+wWCO5OVPPY6yy{bR;rBzu_1v;)M z9W$CVnU@>b`6icBlLmia{Af(Daur+aa6BncSS8x)z6Ccu^+01575L~ zU*R~!Ramw>WZRz>nLwCwTt;E^=+^rxc#K6|+n#b;Y$DBUe_h9vkO(3s zp{?tLSb`LOD@n9X-MEqMvE5vbbKSIhiYMWwK+&bbzsMGpt62yY1$>DLc^oFS`sl^}8krI1c=oqfnT}gdxpwdbflu{F8yznzs2@+pCL4cTvNrQ{5fZ zUWvtn__WQn>; zsme~Bs_dbBhvKY>s*_{Xhr{6|(@p%_phFCwP!#`krnX4iOsihv_G&d!S%dme2c4Vg z{o3tq?IzLG9tX-g%1)b5&~6OVIVk8zlo(19?Z>rk^oJg4lccw?ebkx7wVa-msGBtF z1vRvqqY}+^)>k={1n9-7+dE))ztG1OLev4+7B=jA=JWObuG$qtDP+@_a5 zp86k>-DESF6Qdz^bME_HK0#LPBEe>DSeLD%6pDh_BP2M|wr0?aZ`Z2sB#3{5lvrD{ zPD{bUP^Vx}p(fJ_c|ae2w;Wa+eY#MWW1@p6T31h6nXC99dM4XU(GKdfOjEWO0#1%*oox%tWm|avgD06kDoG|?7U8x&i%75} zb2b?>^BYgQ#hjw85WgCr_hvaQw^>pVZ85LRC&6inHrme?Wzkv~4`r4dN?yB}B!?ud z@?Ol{PLiW?BO0k0RppRmw@He!HIN z7JX4_bnGW4F zw1#FrT(2;3r|Ku9;fT4s@>0iWr`amG4`&A_sn20qJ0HODjfsJV_kE84a$`~+U(7f? z$7LVf)-*{BJig{~bRE^&lZ~g<*wAs)y*)u4sISiPeVX$o@yJ{Cy4MV$Me$KgIji>b zRSmSLGm^yKCfh~XCFMkPKxnuWrCa1GT)~-er9K?uIt3W{xf-gqK3!&EbJ{RNo8t$@ z%S{Mhdo2ySQ${JG*fD`(N4?zZ=Imy88$|tNb{LlTrO#w4wGu@5R%N5{O49>+EVnDdwvfSu@kpj z!hpL~$srVoHnP*k4or78IQQo?Kh0os;yV}D11;rpKeKqJTmqT(x@ok=ee67g^_ij` z?n?rD9O%R_{x%7Y$rV^{*eXTK{7^DMZ8vJ|WRlo#6*}=Vj=xqeUHam(E}xpTP?B-? z@LimrnkGlt*2?IDPqtR&Y^@vhnAJ!(4M_u2LNQ?$Nem7pQ$XuJ7Wfbc>REjsU#c^jk6-1m^YS5D^l|gu!Q@4Y z9wIgNYN3ys#X9OY>nQWtd#al@4f{0?s9#=waSG@pPX-ep9FDE)Q722B#3vTgQC9fovHt&$W_(aJ@kH|__scLmyDu8 zn)NX==p|#2wO1HZpQo1>gWDlj#+uHbs zGn7;wv@x|usP zjK`mn;`qtyXaCPk+y67mc>FObj-R~$?Ejf*`+uh5@%yJZe)9U+|1;C}|I92Nzk7<~ zC$FFVKQnFr*Us;G8Rt_RKWY6L?Ee{Q`+r6wkAE=5@srk{!Tz6-w*P0udHk9b$4_2A z`+r8-{-3ds$Dg0#_{r;M|IbL<|1)Ad{*)BQPhLO!e@5E=pHar+k4bU-u3MZNZbE2vUvRNDUP4Ke)j*2wEaKB(<4XEr#ODn`lIas(X{987@1ElL$?IqTkEZSa zQBRK?IiKSAN$ZcW|3}jH|41W`e=x=Ilhz+$|Bs~Y|B*P4Uz6ha$?IqTkEHGYk%c_| z{1nGeUO)SPByInX#CZHEDUP4Ke)j)J+WsFYavE*FS|vo|NMF z$@_0Se{9z9>^$+2dm!!S^LLNEyAB}(Ee8(Y)R?W|yGTl(o(Iok1rblO} z`E{Zq;l0--Q19liNAIF~V*f1vd#_iZ-q&A`K0x)v{)PVco(k0UY7*ub{Uy~qB+P$= z|GlTzm(boT^Vg%3sGd0f{r>k}d7!S_C+Le-1p3GP@4Xp;`fPtaI+N;& z6I4&wKgtFD^IaOKSNiMGGgMC;f1Uq5<+gr(+x_+EXH-w@zr+7NvNup~^w;Uzfxmx? z|2^Fe{NsP_uV-*HZ;wA5M-$^iCx82B7k@qDVyY+1KibXzp6*t@@uR)`^^8lYp1?oa z*Z)3R7^o|MJtK?iiQ^CPzmJa4^-k`*pc?&5Zgk;7hMtZ2_4dBO5B;SaCgm6@%cYD- zIa|twQdUYC_cSq&;*KdBr95ZKNT!rsrR*tXu9PKGmP$ER%861wF6Hx5E|RiZ%575C zOW7>tIa5Y6rR*wYPbqVyERnKQ%CS;Tl=5*YpOT>!oa#@|-C%Go|b*Wlt${r7V%MRLZeZPL%R- zDW8{ek(AX^Zj-WJ%4R9gnUYMM?V>|hDSJwpD`kn4rBaTSa-x)vOZmK%i=?cUa+{R( zQZ`F@&XgT9rR*wYPbqVyERnKQ%CY|cuZQ!UuR2a*JJ)}{J96O2AK!WXZNstxzmkEa z14ot)8gc#jaSuQEP_}p7ppm8epQ-ordQxZo&(w1Q?{mH3!%Iqj{9}*hrcGX;O@2T} zLEwF1pk5THXZQ8bAv@bY$LxN|bI8sKw8;(V$P2vB57Y|+^};~^BLDk-eFOEt9Q*b2 z_wN^&U%%Wy|G@nE#WT zf%)YH_~ixWmlxod7nomufM0%Ke)$1@`GNW62l(X&=9eGfmmip4et=(oV15Mweg%Q~ z6$JPd1m;%|;8zfsUqOIhL12Cb0e%I6`4tBE6$a*47~oeJm|tOlUtwT=g#mtrf%z2% z_!S1`R}|n^6qsL8fL~EyenkO(MS=Mh1^5*O=2sNpR}`3Ec3+(o+89EQ?q~A2^HO&w zQijgO+P@K)*X=p3>;6wabT&Dn%^&T+5&h*QVLVrtJo76VC?a1tbWW>(wytbGYQ!Y( z5YWf{|Ox}{cKSt`u=VOx(u#g&sW-gnb?k=+fR0W5v`DRUnI6mSTBo%_4c;> zji9~id2;kLv<;t_J%D*Pqa5N5Zpy-1|5BUAW->zqe!M{llNp zuU#I=7WuEFG)0fTKQa#)SBYGIu}l2%C5r@BZ{j<8&gjMh{&e7bb*;4%Tl(Ij5=vlp zQ~kF?U;gtiBJH0WxpTy$uYd{m7V&=W^S6jM(KkrM`$qESCiLgVL3>Sp@7E#kJ4uYu zI=XWX-&46ZImGZL$KCAOHd& z00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY z0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4ea zAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd& z00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JQJ--W3@xRNGunrIa0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X e009sH0T2KI5C8!X009sH0T2KI5ct0$@V^0YC4q|o literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test16.sas7bdat b/pandas/tests/io/sas/data/test16.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..867c3c51bbdddf25b39e2b17e1bf0b406fb263b2 GIT binary patch literal 73728 zcmeHN4|J8qm7hSM(Ua3+&ug3A`X@`#jWuGD_maGHFasrYK|vcBT^SV zQLudqRw`&wv4#p;ED?ePOdvqO051VGb*Tn$}o8H{BQ?&~t4bVBxJS5%CNjhPV3FUk)5l$FLxJ!Nm`=k~qyNaIgm zD^*muGjDsMk7j${(%gUgQ!+9yoR>c?zv4e8-&T84Q^Clhk>P^#3-cl)^9x26j6A7H ze65WZ{k$mBFZIT~|LH{^ssH^g^4pzTzjLGaP4ka-vb;sWB481)2v`Ix0u}*_fJML} zU=gqgSOhEr76FTZMZh9p5wHkY1S|p;0gHe|z#?D~un1TL{+AG#hTpBJ`9~K)GU#?z z<9_WvA^Duakb@*3&(g)L`co;W9HciKI41d-Lm|fx_6l z9x}+2ZyM|y#lKWRK=m(4VL+?(% zr0{1OAdg7-7mnk0uDbBe{Zj7oUjO*f4*t|%Umkb;umOlVyQl7npzx440w@LYvCFJq_9kLzcF*Zu_JKFh-#0yW1S-Tr{ zilltaW-jNI#JE>U`DIV>U$jkqBrE+r^dBeXJ&oL*>lK-UjzIn|QvSoIc}4MWOD7#n zKz@ysXC?Swws{Ziv*A+y>Nh2YnJjqjahr_^CafQV^V&0yQFGf3E01G z3%~06VU;Bw2Cg9K`bA$bHf*1<{hsZ2cWpS-yzbD7KZpkRBPd5~ zYHpA=Z9Fxuq`IHFFM8WRHuatZSuwyRoYAc?`5ZdOOWc=%d%47YMk~L}3$xl?hy%Q* zyxVxYmnO&dL5@6mA;CXzdmlRIAmo=yJA5_r^KB{Th_h+yxSRY}Kk(R#kY6kNyKyz> zy!*bp>&11jQ!M30t$c^ODpZyr{^H0(xBsMrUBh>O@)r8%i2m{Bjy`)#JTm{;-r2mt z3;R;!gUTN_pJg0BEA6QGu3W%xbF=)KZ@v!uW2M~@5A$;;{;uQ09OPkqjFj)af#8-e zozrdVdin@@PbQy#YP!{3dG{bbvTV_;GK$-HQE9#mA%v_5N|}1KG3qFE2WE=i({I zD~|o@;&t!`>k%uF`s`NzYq8;~xX1buzmD&E(c9>s!F;iP)ji0?w%3XMMA_N9iI=;( z&&tcO@3CRh{`Xhweljlkkz0^&dx6_{A&;+Y=VF^b?@f%0xWBB8dsUFTwxK`jl+T;s z&#O`noZ~aLaQPnFVCQN%?hR|XC>OdE4ywLbO8jJfysQ_dzz)`t@-6(+UwOdQVO+$6 z`~JS~X6?|sUy*WEcervr^!hme@!6*$!d=f|-HM(MdYc~ex=p=pAWk1a@2QWck1&v) z-cz@!b9zrEpPS$L%&qLy{&|sne~m4CnkbA^y^4Bo!IS(fuPi!K)nlk9v+Mb@?yli3 z0C=y+tE-k{UsZJ!_NC4`F1Fjr*as4b*KjlTH^hO;>z?J@s~S6R$G*)jmwp~th`1#6 z_SPou1^v|z>_j|WDEr^_4C>5w$iq_J-ps`||MC0qAM1!a{*PrLi2kER|M-XP*bh~{ zVIRBaDQQpn%`onn30`=FD|-*F=b{Wcq}CDS;rLa= zKJqI24*rMv-h=fej=E`8Bi0dBFJQj^*3AFQJJuAdc)0S@irdwh1DVts6 zW-j7boj=6tF?p67dzpc*I*l&j|gngVB)O?d2+^cSD-od>98!6}So@Zsfa7fyX zV}E{O5f{tUCe+CaCzon2{Ka}M zmgReQNPpBhV#=M|TQxWR@qWx7ehy!Tyj=?aQMVtR&GoSCOM|{IqFs+jU4>+Tb{5s6 zd?1^8PkjWP)5p{GbU*brT~Fr*($lTzbI?80IhnRhT+>{G_p0LN?shKEz1ZJW+;l#H z{Z{1(_7naP?nMxxfzx5Uc^>Y{(`~TQ_F4r&A1swH}JI;;bQFRdN zj?Q(a{nmec>d{J~TpgE1Cp}kmPS?}h^sLgkf%J4MI;YQ1Z_{n+oIbj)r`u%m`N8f_ z+>?un57a-OZ^pgoB)q?iWn7%wg80V!Q*m=~8{!Q2E^41y*n<373p=R0j%?tv{zM%Z z$9e3zC-``AfL8Sz;`@_!?DIH>tMg>bdiWWKedM|8m$<&EQTq?-f)9Sr<#|l4FE|gM z)5^Wuu4{KGQbhzFO_uxX){-BX+QU^*+q+ z@^165P<0LV`RZl*J-pg)&+5Rv5$474zA_%cxpbEFpX2^-WdpAh*A%KQz&W-r57m8@ zj1e|=1!2OUdQ!d)5EeaCH469&l$`KyLvw8HvPBtx%f3# zhJp0-(Fs6WJ)v9CXNy-N%H+S|d>9`sanF04i(~IN+$S+vf5v$`&N-^y3(5Xdrg3=> ztj?2Ir`AL-%YwZ`HohwG8)9@KeR{ z_@%h-fj=B|vg?ohz2sTMG2{=m$a$zdQ~S>^7U8~1oj(!x)s6Z+uCBfelKijFg0Ot<2TCZc3 z{V~mWk7^ykz1@@((zSAK@2o%tK_J3Ohg6i$jVCLei!hCWefTD#Ia7*lc)u3oF(7pS^7`zF24XSgri+rY)oQv0yZ zDu1^=rvFZ^{$Azi2CQqpk+`Y;3!C-##pSva|5gY5L7k$`;dxt7M_^r6b%$$D5B$C$ z)x~-w57?&Vims<~eM9MKDc1?jirzubF~VuPp{}QMx-SFGXh1);@pNC9eJlbN0gHe| zz#?D~un1TLECLn*i-1MIB481)2v`Ix0u}*_fJML}U=gqgSOhEr76FTZMZh9p5wHkY z1S|p;0gHe|z#?D~un1TLECLn*i-1MIB481)2v`Ix0u}*_fJML}U=gqgSOhEr76FTZ zMZh9p5wHkY1S|p;0gHe|z#?D~un1TLECLn*i-1MIB481)2v`Ix0u}*_fJML}U=gqg zSOhEr76FTZMZh9p5wHkY1S|p;0gHe|z#?D~un1TLECLn*i-1MIB481)2v`Ix0u}*_ zfJML}U=gqgSOhEr76FTZMZh9p5wHkY1S|p;0gHe|z#?D~un1TLECLn*i-1MIB481) z2v`Ix0u}*_fJML}U=gqgSOhEr76FTZMZh9p5%|AA;ET^3@zAj{T|u@ctQnf_^3~$A zzrnwLrO=G|)c>YCg!jMyDIc2VGG*ekU+RDSs2DlobNtl#=LpRaf%UV5X2_?0>ijdr z%+mwU|M(E^{e2)lgW%UAG!F#U?-rU#pZclu?;PU2|E=OPi2to(io;a%*z7LKPpC!_#8iV z{y9Q(L}2|ap&9b2pE~~xG4u4m^FN+0@?U%g$)6sfc_6TUx6n-b)K8s%XS&FL@fpPb zRxxsm&+${=e}mAh53FA&H0Sx$Po4iPG4q*$=U*#Ep6YY_)cIEn&8ooql|nP-Q$Kb7 zp*bS3ewNS-`P5IHe}io;a%*z7LKPpC!_#8iV{y9Q(L}2|ap&9b2pE~~xG4u4m^FKaF zLk&Osvo#b*%zTgAvNKF3de{|!R3KCphB(46N}KXv}I z#LQ;~o`04E2eJWb@k_zaRiJwo$9VEt~Pne?fjI{(fzk^kZ|i2to(io;a%*z7LKPpC! z_#8iV{y9Q(L}2|ap&9b2pE~~xG4u4m^FPi+{)^AA&Yx4=F4_Iv+;2TXb1xgJN&Vly zCWU^N&+${|zm<9UpAet@F8Ck6MU33!bNtlzUoSKl1=gP@H0S!%KgFG*_rL#{DP|rI zJpZX;xp<+D3=5WzHMUD^9bM!=80Mc3yV&ITyJ74mwG7@@XYAkI|5vgPoadZL>@UuKx8-@~=dLW_tZ=`- z?yPg(bFN`qoKIYCkF(PK>~T7rB`$r_IqrTQaR#&H&il?{_xla(u+!$!kDL?kXN>*C zwX@cF(rI$Jmz)F6xoo}rO@B@Rw{m?sU>ROF_l}}cx+#CdNgpKTZj)*s26KwfBK(ce z7dhbSXOs3E!sikG7BYU6v~$9j6aFruKc2K#5`GflXA}Ku(msXowS=Ef^y8#`7UAa- zekswfBkhX_-$3{^M8Ao&w-P=<_$@@gleBjczMJsdiT+;F-b47KgnyIhA1Cc<8OiYE zZoFn7?-lmYOEpOf}-!jC8X_lSNaX`e*+ zYQmpQ^rw*aTEfQ(A13;^9=S^+u{v<7rK=p4`mppBqypxZ$AfcAo(0L>hP@j-JzM}by=P6Vw1oenw&bOC51 zXdCD@&^@5NpeH~x2V;EDT+mUV6`&JAYe1)i&H-Hj+6dYPx(##>XfNmq(9Cp<51I=) z3bX=rB4`chbkI4V3qTt|+d#L0?g8xuJpr0I1mlC|f{p^M0G$Y013Dda4(I~VM$k6U zZJ>KVdqGctW)8*ppt+!HjD3#enbKdEosmtRveH=TgxHwzm)=ry+s%{n z+2}D7V%~qr7r5ExWgKIY50mx?8#k`3>?dxK*soJa`V0>|;R0PhO#BNI{KCY)Fu^ZO{0kHO!oo^FGBo_5d0#691wEzbNr9O7M#k z|DpuHDDf{!@QV`vN(g=>#J>`PUkUNAgy2^~{3{{&l@R|*2!18Rzx=#BcaKnOh$zq0 z2QCNH9+VEbtl?CrzaPznVRa{=s!6p4QeN$mO0N8)ysAT$d zU{>EbM7>d|T?iv8P89tz+g6r}z_hL3xiMAx^(E@9;7G|7e-lh8{$|48PIy^!fBo^l qS|j76TjvnYn4VUq_5b=<`258#&DY literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test2.sas7bdat b/pandas/tests/io/sas/data/test2.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..ba0b8e8dcbb91de3f8f919a2465393c1bff9c2be GIT binary patch literal 196608 zcmeI24{%l0wf_$oDRr2>JSXF0D%Ct5`mm-nBq4t)%0@vEqee-QT7Lb-iha~ZNhx-y zV0!~97!V`U3Y9!jkv|$SfdByl+ytbQ;bD|0SWu&2iTqJoV$~Mqx7XTh@4N50y|>)o z%x~uX-h7Y4-RrEg_P@_wd+l{j41(CzcC@-;P1ZlZx_N%@;H$qIJh$(~ng86se~{O| zN6)+rXLe4HIDK9ZZE{r*WS-N*nv5AfCi~j!OGlQE98;c`{eORr7+yY{YTw-d8&@v) zUw;^GOYUdEYtdLqUH1rj@-J(Hl0mrzxr1*UT>9gC@0)5={F^tZa8Q2Uw}uqu4=Nfo zc+g$mBNnF?7yj?Uf|!>2Jg0K$i|}pZ+fhvcC+1&K_FO|FX2)haPG9@aZ7P6@A5Dg5bekMxS#k2nxYP3_vBhn z9`DHmJ-Pd6tZvkk=X&xuPafdOCqInUZSmyUo?Pz9*`9p7D^|DJlOOctGEeU3$z4Zc zb(=gn;>o3++}D#2|1nm#!IP(Z@+eQv^5o7BVs-01d8#Lm@Z?NSK6p4*cZDZU@nq%6 zeLQ*pp;+Ceo?PY0Lp`~-CwIIbtGmRLD?Pc`lQTTI{k>S-#hyITlM6gK@Z{Z{vAPRA zd4ebBdh+RaWA-~ed4VTacyf*>fA&tS?siYE_2luMJkXQ7-;ULddh%RP9_PseJo)6o zSlt#+p6$uyo}BH;#}CBnHhc1eo?Pb1{XDtr53#yUo*ePyQcv#d$(#2D!S{OxK}L@t z_|+RhaFxho_XfdbB46}c5EP4id`}SMiQKR|2!@J0XIBthBJ%K^K`>h6%pE~6Smd|2 z1wp>Z3(gG!ioZQ{zjS437IZ28js0}!dD5l$H}szL|>jnpOr-4p?#Vz#lNwi1xfVfN%UDs^c@|h&4l)o=*yGnvy$jL^gg6Z@o(&B zK@xp=5`9(@eaGvj&4l)o=*yGnvy$jLbnKu@@o(&BK@xp=5`9(@eMh@#Gok$?`tl_D ztbd@lBbRU${QKbE_5E%?Jk037Yr5}Vq8>jneb|s6Y!rQg(SLd7j5eNd#k+@$Uf)x_ z$jLVK!k=Eh@rRi!#J-=gf9E;1y6)rUJ8n2)^pP8k{@$hP(DP4i&u(ZF`vFFuy-dCO z=@|XUj;Al{BZu0|bLmeLh1JX{R(XZXA%)C0J-Qbhs;Zr8~r7%rnK{lO8d8OQ#buEb6iQ&@%ZsUUZ@yyAq|_c!`8TZBWG(c6Ano>f1aNGmM%ww=GWs`DAB@QI2)>|Lv} zR$cer{aKdV^``xtMXLFthmPIS{Elfqa;?!1U8$<>x?^%SU3XMfRo)dROqlc^+1Yna zo^udzBL2YoW?#@-EM|`=?+#{!n@71Xj4dX}d3Q}bt=>n*e_k=|Sic&#Sly?0*VwPUE$xps?GBi)E*t+?+b09X z|05%fzVpryx4}l=BdY!~kSbXJztZSySF2?dHYV?v_Ql^WZ&SL{Z>_F-)#$AsUb0$U z(yRK{*UYi|wA8et$4jFb|JWaly&Zqw^~&fiAK~^{o$QOF((f&%ovj92qL?E!WljWO*ET%(**$GabSHEW!oCJKpF<29~Dr*?w# zZl`g^{K!2w+sprTHJlAsb?U%e|JpEFN1B7OE+!sE-ipe;*erVCIclAH^vQdPoN$(N zWZxRm3pcwj^t!P-|1y3ZBVBEVt6kSX*5$Sa^%)&6+a;dcb*Y#aZy%Akmpxnk^vd)0 zJy#|EtRy~PwO+;}aUdu$^@GXkSA4)&f0KP3xm~@rhYq$)GGB=s3+E{g^E)Me+IBj( zs8KrFuDVX*e2{6{zi>_bI0I$w2W=BS?-XujtdFc}ReY>n{;u>Z{&sDPqElbT6tS0c z`HKb_&znXsoFg;0DYK3(($3AM-#gbSJ|5dG2Ya3_4~cTT`wxiPImrW&Z*!!GtR?x;t!#` zZhM0wXS?~)eX@>M82bk{Nj}pm`h25rZB%@$efqJCU-qx=|5lT5i2c=UANizJ;=lEC ziQ6+?FzwlK2h#6F(RjVhbF#N8+U-}KXY;^e#%}dXa_-3fvhB^=p!nGT(C%OH|FYGg z_QjvGf1!T6h%VXJ5y=x)*USF3=cCN``;F>9DSa-o{vdgBWW3^G?%O6D2AlqlZWYeL z!N#ji(Z6%PI6q_;Oxzt#;|!fsyGLBr?H=dLPH={<4QI?6r*<{mbJtnC#u*0Y8ZoC* z?-=nP`+m_VxsbF=*kWsWbgw|Uo> z8x)7bhxVHB*nG9>enru0%hNS7e;H^0m9k%!%lIWfJUK@>_j4}n1m}Wm<6PK1cMj_` zu7-Q$nvZY8Jx{FhkM@hJ>Uxdu!)%v;Uw_96PrF{#t;)RDNIbXeweuM{U#x#h{8W$2 z`=IrhdTLXoKuuyC20* zZfjG0_yx@FYuWEzt#W?J{)&kH+y)hQzFYnDMe?4$UAW0PUj9t{`@UV54>reh6Mf|M zHhE8%{4FBqNMxF#w?v(Rn)dCv5Zx$oa)vxyhn7@XSGaDr=$ZCs7OT>p09bDf#m zb&+%Si>>mVr$V@0W!B@eCh;rbX4mVg7Fl0;pS5wdu1WlMowOt8`>~D6B^xoP=K+brMT>^UXhNmeWjwIgxk_S<(^GyGzSQqzZIDI99^%RpE>t)8-xAOIc%bkSi|`;8^_;KvrZG= z2i>{m<82bvZop39#@t{81*coW0Qv5k{h{QenJ`z;QdsCD6zr@4+bK>`ljg##w<-K0U zY5k{cxx7cqc$DO0dOYU4>nq|vqJO-}%)|P(jkk9%k?#QZ{X+b0VtxE~Eo8yU3%eGb;Ho-9SJesp$US#8xJCMLk6aCI-WdkoWjWfsdP}|J zV;hB=#J9qg@_oskM+1$YRW1tU$wQ1jC{t!XTK~y0`l-w0z22^`#H*JYRQ&#u_u0*B zO+7nKTh42iJlyV6`3~^FhR}JDXX-aBmhaw;W?m85FBhzmyiwx0ZGX5uH|1Q5$~b?& zR^qDlAK6De8s)o(jkn`WzghbGDu3XxekJE@)64Pm3gq1R(bo8Q6_NJOYYW-O^2Vk5 ztHHkKK?wXyrSQSvUk-{rldewoA*`*$(PC#S8Ib+_ZpH|;FiEPqD}IddB-$UeF zw(~C963U|_&fDj<404%g$Lh;5P0owz!%GMEuJ89p-9%&***vgK zk>d6V2ei#~+NO4$>UsIB3*OpuL@Roe$+9*@23NfMD%BdRZI;YeSG;@kqfg#DL)+Y< z6{n(#3~qjOxmMh)6(j1@zr3~Q(=mrW(28MtXjinTZTZ!EbqmpQ_2-$9(rtFU5k2yn zfl=w>khUM8700%!(<}e<`i(!V(28rcLQm_lwoe9*f9!o}F=vHop}RUuKfZaL`rQph zG+q7pDy_)=iMsIHZx85IO^V@K(YZpscYoHZ>)zXF`E7qv@t~jFqfLftlam|O#_Rj& z=|5~u>KZ*_Y8>JCr#9*76~b|=u9mw^U3uQV=c=y!NNbb^^spXm)m?W?KJ?%nRaKRD zy+4z0W=xp$ADuDr@SHm*Pr7r`gdbPlr5_xor-zOGk=gFakzYH}3Dj6cPqz13GgXg0 zN>8($E&D`w^&maUE%h>$je52xhn=b^T&aG={x25O7SMam%lNGpb@?nlbhoXR7A zTlput&(Q``m#O=-yN&(Ydv@wCHK;E;rfR1;u1#*#3$l5wT~~I|dCjt}`}A{iuLm{= z;&-$nPb(T0tL`tTiAHL(OlvM!rGCL~x?5|wmEkSw-`PtKYE6`o|9-6+O$&9wTe_Ls zw4z6&dL_GI^2EpA(~5Cgk+nt^m;7ab9#qrI)?W(s;~#BRG_%)l`~!8XTbOZPn|)6H zr~6%6MXTDSt6!}ZA2%x66`zhdHmtk-R89Rd^#k^mBUHas8%$d(e5u2Htyr{Kt*)aT zdraoRlQH}AyV^;<`MhGkFpFq@XYulBS2!WA!^`|zx1LU5$0Fn7bDfS`d)(bR;gD%N z-rs!J?20+uEVJTmG%K2R?o*CacXbi@ziL(F>*wvGtx8+3YMMk0T4TM}iCxSozN`Ic zmYVqZiRr_Jyr~VY)AyBX8g0b;oAf>H{{3^*ztY;Bp!MZ=(7saX@N)jlC@qiDv}($h z3-EoUAwSXGm@Q8mku)j3G%KF#S^56_Y&VX$+(mKG3G8u_}*AO%7bu2nDuW5 zKL6$2Efib^>&blK2XqS1mo+Iqc&@P9qvk5R&5sk;k=mxN$vffnCayWQQQb;JZs{Zw zI%#%1qsrJ-4wHsr!{@Eeo4TtrwW4W*oFx%G7CB33vF231s}+}Oi|*gJ!$KV6(q=DC zQ6uGb$^@J})Xz} z$hD>i?ITBr{jFy7OA-N((la7+&nAhQ6eJ7OsmG44kGT*Blh_*b!p2tjxEGH)Jz0*d zV=;5{&^U$|CXY$8IDZ+d56(zLN7d?9@3>`*JD*V(>*a2~=nXyOEA$-H3}{vl zFO&0Px;DwviheK2ZlrXBNX{5t@7UA&?`u=s=0kgRjZs>myW#-#1Ii7el+pBwisvyr zIuTAatKa{8%hO-~_GVqFSSv~!#eugHUpX|crhMxF?|N#uXoJceSJJfe=Ua3oIwfvv z6E}K?dZOWXwJOfHK0QFqkmlS5bq^m-?wFmvNDe^SP2bTb<-lnXxcnyy5kW_+;O zPEp_LF#UR);uxFtCN)b@HZo04FbZ27{h}Ks`t8uoQsf@JRMA+U{yObRMP98N)9&;~ z{am~JLp#?i3Myv5$D%+*FIi~wAm$6RI3&cXnCHIxINQ$nz#}#Z?RLg`oi&2bs#cnx zj*c}gbut?Ykd*O_ZjeAgTa31O|CiODbxN5z&QZ%;ZK+YbnenFV;qL0|wYi?V$*(C_ zp@o^WURIMAkK80OUwwl!Uvf`6KlZLs9G{wL!ReH`ze%<3dS>LhQ?%#$X~kEfD&}jn zWvL%SokGRQVcm~RQ=DC&Pp1jRp3FLFgLgRv#*Q|{0qh7J!gSz7$Eu%l68fQRrCm#u zaXiY}br0MPx$oNpI8CJ(w11xBV|V(y!#;`V``TGv0!Z%L{)D7&@9IW$;(n^;%MEVp zXb&Mx)%}Vc{OKA_jQX$i4mwJVW}KX(lBKa-y!?;4Gs7HXp^V1(Kt&I>jdKXOp$Fz* zit{gd8grQAv+TC&uD()_yuL~Dki!}xIn3hcCD@$M3fgAbOC_s3LHmJ_-SnaaRdNA7 zqjoNom!bEGk=`>k`Hhk$zo9jZzUxJC9NI{f1|P0d{Ib&CrWJIsZEp~4ltfXSdtj67 z@K&wJ*NWCgi4z~wSctrC+8l<>+T?1=%|2(Q-x^Wy zZlma-dD7YN`7bH8)HnJqxXZnB5UJZ5)MsNT2Z&Ou`h|T)#XP%S5niv^vdP~TP&HSr zmtJ1i-InMybF#eCb?AFK>LRyG_PUAMq&>ZGp3)IeZ&_MV$~!x^s8Rf8{F+wia5Lk= zHKtd3&l$8$*0WPTETg%vYL!HseT@A5+7>woDV?H7{6&LwL1&LXqjjyB+awmZ&^Rb@ zyK|jv0UA1aR@ZXVEr%k_$Grbnkv&DpQQIW>SxAi%b8TZ@@hSNB_g=f>!hn0Fl4C0r zZDgg5ADD{laPH>>OS9RXT#h7dIU{+mpJhI;yiQ{#)-#*cwY+^OgQxA0d!Kh2(0k~v zsJ#ACjzW5ox*0dm(I14F6vpI)Wn7} zkZfYPEtWX;wr+#I9}SqV zE*npu2zYbTIn;TlH$mFVJ)-I_`Xf!-C-gypf^6+-_s-Q_P3w4ho5U#ooJ5*SR*R33 z(^8V8dp92APM|CYlb-@3T6^sqx68T0zdRb)%f^lr-^C_0vX4pJ_t0ooZUv zDSHBTS8LCGXrTT`;PM$cFP@E$G%lz(Ay&h92HVrmA?FT$a1mE|gdK=~D$Zp?=`ZG= zv&=^hI*n+zcQ0{?0%z-l#e9gP1Ww=RZFRCuy+B{dGFRwI6n5TuPCh#DcL~~z_bzqs z6(;^>FH^7TFLTd7MIH4e*Er-{gT8|8ZB_KrF>)QHALOHJw@UWeK_H2~_7#&E(QC8b zyER?)^0q@K37wqZd|8$92^ZB09nxp^jjI1@+g(jdthZ{-o9pGBY>8IT**kW*dRAvi zUv!Xfkweb7q}9X&y_~v%n%lRjn>cWhXHgg1>%AP9Q|T#-Z52loYE^pmsR{5dyu+L1zV3# zQRl0=Df$La=)28l6aSuaN^piNBT(-_ht4=ms_e)XdTs>s^7rQcfCVkSH zPryB#K9`8cuG9=pvi0$_>U6U8!4!28*QfnlTA$tgd$wbXat|#j`bk`$2DYwGtIiU( zUYw#%;(Tg(CUeu8PlS7zo}!<``BbrWWmLkvmJ%jfT|DN^!;T~F2 z^pm(g4QySXR-Gkmy*Ndk#Q0XrGnt##d?MV#^c4Lh&Zmm4E7Ph|!Petb)Ja^QGM-6k zTJurd!_XA{B+e(7t#i_&mq1 zRIv5<6m=5kQ^qqXO=~`idl;IcpTzm(vUN^cb+XyIUy3@3^U36y^hs+z0rzmaXIlG* zt&gWwr<1J@rl^w`uiJa_{^8%V-ap(!ONxFH*QbH4>(i>Ugsm5+sFS!pwLFu#Y0W3X zJxov0PvU&4*t#;UIu&d^K1H3x`IPZYO4FK;;vR;k=qGVLxon-2R-J6N?w6uY;(RiB zCVkSHPryB#?vd91Ve8{*)#+sGgDL7H&ZoTx?;rj>>;1z$w4~@KaeW%tx<0KsOW1mG ziaLq&spXl>O=~_8?qPb0eiG+X#nzQ+)u~|X@hR#g&ZmrLQkvF$6!$PRML&u2$z|)D zwCZHDb-xsK6619y&!kUU^9i_z(?Rn6bDnnH?3nv%=_Ffs1-;_5slPcK+3E3=dHQjlT;a)6z4j-1`YKPJ>dDof zeZ8@a!8reX}P=J$bQbzthvVd-8rye%`b1^z>bxeA1Iw zX?x4_Gfz)0)mz__5A^orHD3Ffp1!XqXM1vsXFt%>=X!FnC%@v^D^EYllgm7Lk7qy5 z(^q)%L{Hx5*;je`sh%A1B;AN_I*8lwkHqt}O4%gkb}2igJS=6mlo>sxe<=q@ zSs>*IDa)mtAmtP(ACz)|luM;-l5)G09a0{avRlfGbEJPM2S`~UEDJMudMal=ITp;CADVwC+E@g+5ho$V6GNYIDFXaF!3#1$& zWx13SdQl!>uP^kTnHozYuNpIa%=d1;=GM~e&^2Os`S3C2Bg?LtRC(V$_YMxO9yz9* z{+au{V1RCr{+avy(DQ=e#v4bB_}=#d)*G84A)CTb8%3e##i9F>(EZ@t_%RF~96!dv zd5L2foFB3&2(>XJ^t>>1Ulh784%IJ-f1Z~cx(|&pFE74+UTA!I1)=((@#PhU>W9Xc zR~)Jz8ee{hVSZ?Q`FZj6^F!my5An+njW0jMFF!QC{1CtV(D({M{0c(jD+uu`2#v2G z#IGPUzJd_Hg3$O1Li`Fs;~NsIG`^A$zmm}S zN<#cfLgOn5@hb_9uO!5;Bs9Lkxw-m`urb7*L4khxq%(+8HcXIn@T&6+NHwn7*qrlqIeP7OxY4-Z8*8lVr_+rI>v!A(-?WjH5A<>Lzu2kMF zj zUbWhuy1h4kWuK!20T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5CDPy$pp^pp=USc zfAS15HxK{;5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X s009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!XIC}#B2Mu)>?EnA( literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test3.sas7bdat b/pandas/tests/io/sas/data/test3.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..a061b1ddd0d45f93cf1c9a6c89a1bef6f71fa8ab GIT binary patch literal 196608 zcmeI24Rlo1wfN7ynS78&8ZeL2n#z<$8*7Ro-w4qI1Pl-$n20H*G}erdYDQAZxKNn( zT8*LzhL1*2Qes6uG-?770tT3bpy;z2Aqo~hLV=2cLZ7s1ADXw%x%=KZbEla)|F!;W z{r_G2y0~Z0zWaRr&fe$jdn04PtMj9~E4QT#dVTlG1UBHMkq-?WmON>5c6Uxf7-T&uT(b)2_qW1awhZU~=>7T}0rR-<* z8-ZXcy4IL3?^4S~jmXT&965Dl$uDmAEi_f~o;4zGM0VDXa`UrC>Y5g-CY zfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CY zfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CY zfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CY zfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CY zfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CY zfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CY zfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CY zfCvx)B0vO)01+SpM1Tko0U|&I{y!k_{oD4}S;O4WDI_2K$C&#o#(Fgn;9m;JN#YU6 zCoV9SiTcXFGj?yBsn7V7v78uFp8E-7T`p730XYXmQI{$5s~-pTYZSRkkuw$f!kM7` z0YzS|$d!tmp~#<~4%Y2brrO4gK zf_2v^@?1sEQDmmbul5G(u2JMFMb1>@3-1T*4=D0#MXprj3`PF@y=puDArvHwE3_aI|0qx>Il1oh)zXY3&AfBi?s_M^Px zzZiQ7<-V60YeTu%M0 z>T^u}SABh8Ts8Xrdi+aYnPX5$i4^I7g@@BM8HSOPff^Tg&yXnO#ADDV$imAVS z13&)cV_oj%PP9)m_3n-Q_0P)Sku{#7Qg0mZG@oOCjEymWAL`)u1YUW3!uF%sPM)cM zelIuY)r0*`GxZan;xoBAI<$4l}b7+ZNVfclxH zerSOI@l|nQ`)s(We`^Q7utk7qMEkL(e�rhY8z1{jvw=#mi0oWgTW|%`1TI&)UbQ z{XDs1RO^}0@pLuub3ilyfj3Nj;Ci$Eg0=i5(2lMA8109f`o2BL;gqSj`f2|I|J7Wv z!f0=`^N$WbSOP^p0rZDM&-0XLuRC^Eip6b`*?z`a-gas2=^1VBne7`BP5qcHyvB3W ze7Cr6s;Q~=ga}ph{?qNgdH%ec=T-fp+Vgb;SwplTTeW}LRmCHD*`3*sY)xpdwmRG} zbmX=nGp`RX7Xfn+=UZ&f_ntOBQ7rGmw~;^c$@K9K;hvu!Lp}QED*=8UtUtW`B?==p4?p_FpV2;D+8ZsMc6&@-X56vfrF^q+x3TDlmfu1gnf|liY{&Aeignxv(KU16 zyV(9@v)#0n{EFERcm6FM{oj~i>U(c?a2sjrwE+Knx~O3Jf1#!#lF!^^hv%i`|--T0+eoR*mFz<9-IWh^=TYQk)=M5Ma zA7Q^U%y#zf;{PqSy$W340LJw%TlhQ~NXW-P4(;3dp7r0k>fJOej>^n-(zkKjsYBgQ zc(v5nei|->R}IzJE!t{Zu)W*X*v9ga{oD*M^={X&ZDdtj&CKh2>WWc?Z|U+z*Yn z0~i;(QI9-}xASF>+%Cu=XFNv^Z$mwDv*H5QO}g{1L)Xzb)oF6I)|KJ9+}O-N7spE% z?&sFJR4xy-H*nu`FX6u|Og#KV4f-?3{rQ?5I3C;w*eJ6;o6mnE4;aheFs_YR{O#Ao z!S({q7x#@d%ema<58(c3wbQ$Y7sGD*>~*-$vt+aVOSXlMlX3I+h<)hiy~wRp>WybR zxIEUbd>{Kof1B9O#ip0r9_rHMoOb6<-O?^iTm&e*? zXL0-(ztI2nX2OB?k}b`=NBf@aV8LwxR9R_9I!+>HC!tH|wg zliTp;@Epf=vDP=)v5r>W^41e(JaI>K-B{lfAG%-`niz<5D@0R6#O$-gTD!`k04 ze>=1d^Xfe~PIEtJceOh5Njx9p1EKG0R{jIyfgpvH1LKID!}b<&kxkw{*m`fm^Ch_f z+sC}rnr}}h7g<#G2l(E{Mws)5_Z9Pe{LpOI!1L&pMlQF9J(!PKoS+QJe>m<-N0xKB zTP?QEBRnta?#F!0%9rrm>|Vvq;|uMryz8r-TyBT8hs^O<`D)EwT_Zr;Kt@S$aIG!(-f8zehSK#|5`iYf~-M5k7DH~dhoU=?nNqvTk zQ~Y=1t#!e5pWGB0M+W9ghdV>pk>R?-xI(^5SaFmT!1sF0&y6Cp-A~*2%fkJy!aS1| znEH|y%zyUcx}!a`KTf_N;eKmizxg{ceyw;!Ke@4!C&?EuE3Pr#Pj%q=h4E#eKDL>M zobQ%?@mhRO??P^Pj+Z|k`o3?i%gNoL+)!^E?8NtU%-;+=M~p>Wyd^>gYPN5k3xQp@ zPriu#;(0fD12^{#JO?@E1JKVc^0Ll-JZBHU`>A-T7nkk1{rI721$WzF?HYC)wjgcn zU$&tw*lpN0p@KaITW!~{1-mh6W7lBj_5Gt?KxS&K3!bxI?#1ssmB{THb3LwTMZZFB z)_Psjj_Zr>v(~=a(29P$9oxb4{q!zw<~f*e7?^Lr_&6_>FGyCNfPVEw2cB#Ao@Ko+ zwC==l8rVL@7mQ237FS#QEuMqt>$#b4TXBu=JD0a{@pbIV`_a#>_RAa4|6eoPx4ttR z-4^Pf2KrCCh>AzmsfU<7fx>jWa8`UwoaMWS!%9uFc&N`d!gFUxsz! z`#8>v;d&*k!uO6P=J+{&S9x|bUn5^bt#c6HQ*8gYz5|#(!X`h1d9}6w;JSEU;FIMB zVdZZ*=6pWd!EHaY-OmmT>4&z%h6~{}Y+shfp}$14wkEU>@yl*L)FxbQ57-uDWAP%|=?^ zh0q_^{ak*|%*B1rdLLoc_}$6!3> zch~*sKd4{PYR<#*Z)?B3wGqDqSnn6;Z*!YMziV0c-8HyBTlo*>hZ*%;ewHZ2aT>^B zbpYGP@9kFq%{%e^2svBpa^yigm#sVy^SUiH{9Ryw&3?A z>pV&~{j7SeBTvpX^{kYe@o4!^hN)k;5#Q^r^~L?_nPwguU-&+|`+2jTHBPHs-iCR& z6{q+ez`xURUSyf|o7drY?-p}j2FA;fXEATY{oHDQtaWbUxfZ~2zWhAytCs&@9BD21 z-NV{%E6jdV;QOllz+w3np0lmbh0crNxpT|j(EZB5_7giD_QAYy1AH|&`~-e)XX8w} zvn`l+S@DkV9Zef?Ke7HUhWX^8=W*Swab}zCtlf=&M|0#A7ALD@7+*hpGWhow+xz`Dw{2t%yFN27^Lq%M%htT}_c-z> z+~=+5R>?9YPvdw87k}039{x1O7k;15cslew2=jl#zmLm5lU7;#B>H*Xc08wW9a)0e z{>Dy>3w-}IFpp|niQn0Ni~ZtvfKLOV`zphHuc(24OQ!8s-h$`g=yskhFMOZE_g|xA zpX{Gm^R>>k^Z<^>I%m;uPdoG2V15|-cM$A1V{>Tx4A1N7+e6<6t^CS;TWCJX@Hzsx5Y_ALVL+PUSbK7BU&$KnRV7C$K!r}2^w_!JL+epD~(SB}gLi-Tb(#CF)(M1G^ z01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO) z01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO) z01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO) z01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO) z01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5%@PJu;lk=H@!F| z|Kzngd!w&UXZd@$mU?UZf>lFZ+dUd9=Q>-a)iRc8B;KGI$2<4~V=GT~4}aRPPrgB8 zEG^?{{^8{#6Pi;0;?ay^F!2xP`}lZmR@G6cejV70|PfGxkjh(zH zbyn42Hv0Y7q1H?=YFx=jzdwE1BWV+E|1H#;;n6+~@G1KAWt)83bWju*HSlk}{n}?` z$NdA@wH|GBJKy|9O!gtK9`Ft{O7?lRWdjYcOAq)HinZfjofUcX(|h@aEq^^Y_2-p7 ztulMOM=OkZxbtu6vmgGmPxHlRY;MqE81GK;vgzCTOH=asfB5xlJevD9en~95>fN-s zyO(`qtViqJ%#Yob^6Yg52aoOY>Vw1J|U3|@;*Py>KUj1C`%3XZdq@;o&_j$Bs zdPB>{-{BQM)r~9IH#_|a`@OnxJxnljA1_Qi{KSytn!-;!8uxv>&{gBPX}<2hsivm7 zG63DKXI1n5)9pUZH22N(=iNN7YFOVdsy#o|G{y)Uo}sUIJ;3Dh#jp{uUVEC%B}rkayRMC4=R7QL3;&H=)2=$*%UHTsuU^Yd6rcFC zTq7N3Qp%TBfABwB{#vVLc(jEZx#sqP56m2ROiaL|J=4s;>bbsVVL|*EFrMnsc0cc7 zwTZQCc^hxi4Oq_6?;rN-*LigRPC-LYNxAa=_k3EGM{8ck&wn{Bwgr~D^xm61+K^}Y zo!RjTO|Js3*s<;W2Sv&0lV-i+(E`52m!Id8OJaw-4Sn3`aj{~ph3|Ja&!4-(cM+@b zXerxxZB|wyILe>>dYVUTea^#{#j!juyM>kQ?2k?C^lA+{2ghL>Y6SXI z;1uIL+SwMajT6rJS=niy#ywioMm}fGpbu7oXq0%gMbG<&)h1?pw6(kW)`qi}atIHf zYgv3PgDAMctN-vx4hy&N9$311SdE!t)so~Y!Wyp8*l*0mgQcryGxVRvB)S{4HQIXC zXVf#@{Uc3(G_E36bFa{KcS8(YVnXkP@H582+GRe*A7JA(m;?~Whgjhq{O}Vsg()oY z@Nu7(0-jW}2;xahpj1EX(;yZgZoleT2vK;(r`=$rK*js{+!Y@$zBczQpLU&3tL7SC z1P*-cu2!E`?$M4c<=<)yhbJ#zctdjLN-F89ZlZsK#q3SQ|?;H4f{ zVZuu>SyS>M?)Q6Kwc3i-22E#trkCByiW|9%$tO<(+Z#RF+$Iju{a6>Q*bxy6-8FpR zkG3|XTzz&EI6d=R%rfdBbn-9uB^tiO)w5Fr{@4#7rUvM&c_(+xyw2qtQv2;A53)Kg z!tZzEk3zd6ya{gx_$Lpr(J6Cw9fJ{1f~K}{;c(NlRxaBlyl6s;FZC>_M|t&{dwdri z2qZ#;Ck70DW=7kkwWr_qX;ZxVmJId+kGTbc-=C1Of#W)9%d0&i24VHiinWY!uh!~W z&9FWho%QTdSg7@`dbTE}UWb3ILc1O!^a5KC;Rx%h&yKCvS>5-Ke(~#D+q)p#M#3t7 z2^)`YSX&S5DOa>|*dk{Qime)bS}vH*t^vm%d7TE)JEP3Y z+Mj@6*J|0K-6?B%?wskdXCXNAJo@G}px2EXyeyWVS;_s^PS;=c>#*a@-NKJQ`B)+Y zSLpOI?$?KP@_FE5uRPuu3!&`)PQohw41mpNj{QDS1m>d&Y>7PeW_}`Kk;5w9-NChF`*4$| zyY+A;6FFV}H%j0YPIx7a~b-B6>+Bv&Ct^gc>NdbQA1~&7ekKkDmoQG}vsSa50*FJ+|8ayVpna2$9%KdX$tWR6Kmj9sSTeG^nx;vg-?Tsye zoSzeE!UJ!>Aqvr2z)tRlBbWJ@aj=uiy(Q(X6$4=7E%)fgBA%5fj%w4-SXyA0+y)@g zfT6&KHF?AEJ9+EX1J{2Cf`OO8YRARg;v3GUc{Dxwzz!~>&^#SQkh*>|uL`!)6ef1r zUqHyMfN-3lKgR0yz6(03omhw1EXylm)>pJn9^W#OB#=y~tvAG`h z(=}RO?>F2lAhm(0ouP{mhU0Qg%&PcrxLKX)oX22?ErgY9YUPV`LpmPpg|Lw8p5((I zE>pC&OF#B$SHrHl!O9$9gB5`{?P=Uc-UBzC@PON+9azK5wcEiH0^oz$E!?-t#YM^k zp)D8w)YD>32e8%ycl-tBcAZy0(ZIz)crv}byVIvZ0@BsYXU&X8nA{O&=tnRjy_$)@3_I^;B^062ZRdvBXBpS!P!2h@wZRGt{W4> zeDRkCPHF-FAopILc-Un?4v=^nob6hVzV#U{&e)-P^L#iNmbsSieWlPid{J{!^_-v@<;WxZ+KkpL~(3xZQ_^7L&5)D!$0sR7Y%OVzdt)|&@y)#Aeqg} z6A$l$E=ukm>Cryg>l@-r7U#qZ0W%H61nVsR>^vyo=va)>u=PTe z_8E`rKVxDY*Xs>h9GtIkR4#$sLmXHlCdYD(8E*#wn;8nbecBA5P`sTl(LyUX&aZhTo-VT=2z531DO|R>&O`iIw z?$J(d^040}vIb_p-PHJ86YqxPxOks9Qu8ie1EBow!kA00dUwEmkVc7+1LcJ&A}#@2m4TfpC7G%c=^djmxRo>i97+a1PCyGy+~dc z!Tv9g``3ILq?B*;vARAp-`xRi!cLmk#t-S0utislIt6^-&?wa9!Iw7c3$d%G1MQa1 z8yG7Dml*10y{vdC-z*NVv1ee_Aqpox1!?N9WQx2W7BZ${9fbZ+*I+ht;JeVkWUrpK zl3y`fydX^)djk5H;L&<-{@cY67FvM+J$;&9r~BerA#9XeWhjbt*Tss`Vb8v@lV`<4 zM86KzeA;DOxmI9?To1Hd0=aJDl!Vz2|LOSM!gqBxbO&tC%Le%_{=9)hdX%49oC9eO zy#KoP?t+78;t>CLVGsCaOAh1n=IH)GhA+8&I~QpH?B0#ze6BK&D}9@KV&0UbS&M7O zX+7)ft7qvk`dRkV7)>U)E8^BeYAcf2iy4IH3cVu65NlfZ1MxCH>jdnxH^7pfZ{#cV zCE{IT&Mu#pBZKd19TuoYgR@3P-?>iylz7Qb-YimCmg?2tdqTW9LB8+-K$s#@+1oeZ zGIs(6*f!m94{qeIe^yqYElU^*>vdeLmp_iVW8saO9qQmBQ=G8@x;+Pm<@Hojv(GdPc=bGDqdJhb$FwLt^bG5@e;XB2YlMh``~aG8sL9~#59%- z_h@hJfR6$blS^HVejOgo+{FI?-uY#Z?;>9QD4czlbwChb1TQAD_VH=L$yzI2XM8Sy zLRV8T2MSz&ug4p+U@dCdTB2K(xmc>9udll1Xz-NJq7_sqw^*?=m46h_8-PR9TE{JnMStIYk z2a)SkO6%DX>I{IU#J}KG8V6|wE0Oo0HgZ2)_An+wKjHJql-3zh)p1Mf)ChHAVaVcN z@Jg05Ns4MdO!jagRuB(fkr|wm)@P!s(<`mtiBKneeY#`C`n)RdUw3Q)*+YATe!|zM zSz0$mRi{x}uZvJ8d_Hw@CJ#k5A4B%AI6^<+^Qn>6)lt={l-9E&)Cr$YshmkkRP*7o zhcOZQ37=12}HZ zk@v41KeC7R2>pbwPqVacimFbdv|bmXPWb*-Cuj0dRP!-p4~rx86F#3BX(mH!!snALXOa}ve3ir%_t3i%=(gK6P>?4@ET}L-w#Z zLOP;zwGaiKp@y)HtX z@b#&aGkGYg`53Z?#S!`mpHGdnu8yisrL>+Mp-%XGO65#SqM8qvJ&cLaPxyQ?rFBMB zb==ZAHA0>6`6SDkBtl}&DPx$)03SZ$FJ0S1Dt|Ir-E_-N;&`x= z(D9p}OJ_o7>5~dx*}&P@~w(}fuiS%T&&2y0(<$*4#rce=qnVtQjr%b z?ax*8HHy4Yk?&UQ4Mo31k?R!sLB)QJqHk2>&5FEAv2RxNZHgRFd*jqe5SM@A*+ihi>qH!Jcd zO8ae!KA^}46!|m7zFW~BQRH4lW--C|J*DW+De~uvJV3FBKVt=bK#`Lbd9Y$XRMEQ? zIbD%20eg#2rlK!UWcc%x-LG3|zgW?iT5$I_+AIAJMRreT?bWf7L;SXN=V0Ly!^T#e-hEL*Yc!mxd6+1v0RPi1}s~#?834K%M)0h$I=yp{bQMiWe%1_ zSe9d1h2;V)@5OR8mK(5a#j*>_9xP8_c^*rb3;V}14a*!Xi?A%mvI@%uSl)}}YAiQk z*@|TsmOWUW!16qnu2}3J%QP%=uq?u|9Lp*!7hriWmaDPcfMqL|U0C*Dc>>GxSi0h{ ze=O6m%)zn<%W^EM;zS-{U0*`XEDWZR*OZMd`|+&t*JrsMSJBw=v1R2GO2^Nu_IYm~ z$;M46D;NLF-&rgT8WjJ`-`S4mIc)0GqM{%Fm`S~9lk2d_bF`7~cwXT6J<9QWWM=3X zMve>}O01l zmFKAM7++R_qrPK&*$#%;j`3w@h1Sn@j4#{4FWWJ`YzMz=$M~`x{IVV6%W?3_af~m= z!7s-#z8nX?9LM-_9Q<+|F}_>}zg)-oavl6~9plS&@XK|KFW12@ z*D=042fsYW`0^b5@*LyKbMVV^j4#i@FV8W)JO{r#$N2Ic{PG>+%Xje0cZ@IJ!7tx2 zzI+G2e8>3m9sKeg<129RD{zdjz`?J;F}?x^zXHej3LN|j9OEl+@GEePZ0>v?AmEv3zvI}*h1eMf@{8Ky=Pr;!jG?YB`9A=Qf026B=0Hmo`%`~ziP?_eQJ*XYf7zSt&^gY zuSeEzx57ZKTAdE>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&I zhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&I zhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&I zhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&I zhyW2F0z`la5CI}U1pb`~Bx*3b1^>=7q`46RB0vO)01+SpM1Tko0U|&IhyW2F0z`la z5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la z5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la z5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la d5CI}U1c(3;AOb{y2oM1xKm>>Y5%~H9{ule8n}GlT literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test4.sas7bdat b/pandas/tests/io/sas/data/test4.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..addd6edf90830b320270ae03ce68afc85088139b GIT binary patch literal 131072 zcmeI24Rl>qmB(+|P$MgArn<5;Yh*Om%0!7ZA1}$vSnw253>cu=3`VCjQ>8c&14Jyd zqAkuts#K|1p=uc0u}Fbb0;X+f1BCW9EgD@NtWs@|szr(fs8EM0MGDNl`~1&&=bqd5 ze93ZVIWzyu>O1@HyZ71W{La_DHv~cM8hz^O#ak*~^TYdZC<|Wwy}DJEZ<_g~q@Zcg$bdk*b>d)jF?ZUgRCs`_AKU zK7GwU{A`{nCDxiBWpiZ|z9jrF`qfvMpHV&{>Fz3HZZYD(p_QOt`rpWkWz<(_EihNCa8j-*)Id?VhI011!)36KB@kN^pg z011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!) z36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@ zkN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg z011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!) z36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@ zkN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg z011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg011!)36KB@kN^pg011%5 zpE-dWkG{CtH1YL!htjkwgO7(r)yqL}utYzp2<37Z9XcKawa|C{J_wf22p@#^sprCf z)3x05Oc3lY)$-g>PQka#k@x*3r(f^L-Hx1gvmbNhHIBU4k!u|J*poTm-HyE4krz2~wId&$$oXa+d6gq~I`V8sKJrA) zcf^rbI&z02S2^9eIT#FL2~j9C_mLobQk$XB@fRkt-ef(4m~~pd&ALuEbPC0Vm$a{W~^X+rwC5~L{$j5)4vmbNhHIBU4k!u|J*nyny zZbx42$cr4g+L4bwlJm_v@+wE}bmZBNeB@_2-w{V%>Bt?9T;<6@c$Gvy;q^ClWdo;t z=+L|M-QOuGkFHl=`OWfoH~!leLZ<PDD;@0Kgg=1q4kreJ`Vj^x*oL!>*Cj1uQ0yLwyAK@UGSx^KM4H? zw0$_9(@wkgbq_zd4fQl@ebcadC|r%5Eg1I<=6`6v`0>579(mzcu&);T%%)LWUm%xW zcfrByRDU$x=3yS1c^j@HJ%1k1^_Y3pxmH~nuC5FJ;^(OUTwU+%8`RqteR=eE)tLX8 z`C5PQB9B|0)|X_}@2evRGyhN5`qi7&ZPB)I#W?E6d^>Gag+br3dEF1S-ps?dZdPxd zarIZvy3VZAc3n@{-l#Q;zWg(7Z`xnEO=-R118&FGVO>0hxbM^T?6_C`XSD5gVtlh$ z*Pqy=E|-M_J{fQr*{ZgG^RM3d^Vwz{E!6c?Z&7wq$ERO3t3_kyX;MhK#x=G^tG0rj z-Bx3b<&k}^mzVo%J*xI+D_9580SVO?iFq#k}SI@s<*f3a`$U9V)DAH)7> z>N&VwwS~LwUGKp@AI#MCzhO(foq^K#b9P~#9|X4rqR-s5Q^~P*+Ak3o^X=>r6`lIw zo&tNE%P$S0JwMfYaL!z_3*$I~dM?m$FWRc)c-(6^nDg{@ubqLt*XLso>cRSL+ok^S znFw4T;$l98br4?RK6&XAFNPU}IWJDv+y;i!GC6=vUWB}2?XBuf(ee96bM7N=soJ0( z2zSYF;ejXS^Z9q+95LrL_TjNUCCAHU*!QxSCk>mhpM$?yPZM{jC_5SZ7o1bUdvrSw zt;2lq^19uFDw=E;d}%+%@m<>fs{4@7?1a8S>vwKaa;!c7EZUFt8^(XXPB>tHrr2kG zw-funndjKIKl^Q6pJ{i1xNpqH^(N1$+M%M=e)`EK4}7<_+k7X^9jq@?-}Uz>Irg74 z>lgEX!Dg?1%;&04seijEy0ES@$P+dXVEvl&5&ix9P3qsH^tsv02jt0_MM|2ve-}8^ z>3C1=0B3M8`_+Bf6Pz!09?BFgL53?V#ar-H;yK6 z`ROg%&)5YR$M9aiUzquT{O$5>STE3LF&{EFsK1tlVfHuVZx3xjUcDXd)cbkx=^-zl z#QFHDZ2W!AsE5UyFRqwZ455@}C_Tci4yZpD8a$ z*l#n4+k6k!uUU_nCm$G9-FSYH|FPa2G$??%7;CcLNb1~;7J9XH3{ z_szIGet(=B^qC)z;yoStTL$MyW|@lK62lBs*Kf{+?7i40zl*py@6O$z^uB>}P$3@( z<9t*ubME7u9Sh%2qnG;VvNJc&Jk(aikGv3Ore<7l&c3t*-+2~;+ZlR1-Zq4J1#V`%&KSY?;(gZatLug^Z?~cz zobONHt8|`&d?SN=`@1)*1@eMq@&wGQ7k0+qv&{R#&^>5p2K8foh3%5p;%>9w;v78s zHKp@yv##;JbLy~)zK)%CE#|qYzhfQd|AV@I^PS<5Et-eve+Khs#5*_5^JVwr`(RngbUbIdu8bFFJ*{JWw#U*0r|_i^+qz<8D2i1&__y8Q~@RqpCn zeexP=&OyAVgzeSy-+TuMr$}_2dnfX0v;SaRF1=5kD;tE#-%?8KpW3GEJhRi!E)1E6 zcETowq=%i$VjRDVs9|CgiBW_9Mf5avU0t&-1~joA0h`$*7@ z_ogAtf9!|j*TvV1*(djI#Ctv3Y39#@+wmTa_9*0IVSDs<*9S0vp#RE{?uVJ*X1~2; zJ-!2&_Y2Iou7UV>En~m82m7@R{qr|r9L@U0_mG}z(62G@M?13_bvS%${oK>$ zdnvv@Y`jnD^=0yQw5MW2ZoSxfYUiP7?1Z&yE7&P&1#2u**w$zT>uL?Hs}QMI0D-8Xr8a--G<3$Px|{Hf9U z-rMkAZ^jq<)t!SXzP|82d;i_q&y2SzuOCJpZq_Nj16*~FcV5(M|G~BR?!8I(D}(j& zy1S4!Vm~+a&ok#Hz8_}M&hOuiebvk#tfP`m`0io$+fE&~BK*E8A2`gs!Z|ziop`?j zoI4-g5#O&esQ=_q&pwwoZV0~`?7s!y+k-Q;-LspJcbWB$_l|+vu%DQ}iy@y}b~nb| zw6j6ibJP9!JDQhQ7*3`Pu)f}YYwqtacKXEyJ3Z}$wX@$ovUAl6Vq+_^uJ*ZoWHnY0 zrq)W^`iFv+yE6yp5V^F5+zikxj0`I>WD{h#1Bk=&AC>cMSILSi+TIBpU39%!}#Ap z5Vxj3UVnh|`hu$9^iIS*xu+UIt{+L>uJRpu7>^^DjG2g=b|IvNY#P`ep>-%Il zLa}|)^grJ|X`Y+%|I0pU`a9KqGW0LFPn!DwRQsfTm;#pCo|4CsNqkp-p zOnP2A<^CT#DH^|{@|&x*czzS&d1%W1?qPd+5a}_dFzz>pO(m} zBI76fUoD=qr|e%To)tyQtrPKl!@ zI4kn|FY`;1G3eFfqq$0vUtmOsw0L>MX%C&?gxzLh^tpF~(wAWpJB`SYs$ae5`f z(gJak{mGv{<&V=T5jqOQN%kjyo|HdMS|X$h#7Xuie?F8yPNhVsC=e&vpZs}G{y4{E zmpyudy*9s3{`@At&$vX`S0GNZKl$^R{BedQ!cc)Y$??gbujG%@ClS^Zh?Csk^5-S_ zAoz9G`x%-Y_Yj zKCxa?z$e+CmC}!kXVmI$c=agzP1 z7VFuQ@~IT-iUK~#{**~iN+;EyW78zpmfvFM=!94wnv~DDSnn&~liaVzrpfw|-(uS~ zED?qZ#7T}%zgTaWluw^nuPNY@9G{ialNFQdPp?E+S|CocKV4$IWKuqzV%<@|C)uAi z=}GIP`jeIjsRD75{izn~*^~0A6zhrtKFR)+Nl!{A)t_S}vVP>Z*f}~O)`uqLGcMNq z3iu@ZGgcz&M}CWK+pt6!Di9|*KK)|7VNyPQV!ft-PqIHNr6(&U)t_F8u(Uv&WPiHE zddZ}GI>ox9fKReNZPJt0N%bc!5mE)>B>Ph>*0U$&Qz_OJ1$>hGb(!>}bW;5}7RdUM z-xJT#lS6e??olXBi1oo>Moe2cui7U*dke%#j?eBOT0dF&&4nr)XGkIp7KoD^pABNY zeo{Vb#CmlBpJablNKZ18>d#V%&{H5za(tGE^`c3Q&jRsjFAyi$A0<7RJE{KEN`x9O z&hhXs%frz`#8noR;rGkVY53c2>9ioy8U0L0u5{!NJN8wMzS@y%9r+T+KJDn0Beyy7 zCqw&a-34KL7C8D&M_%m6y-xjIj=sl{dmZ^|$3EleS32@)NB*K?-{uNee%`U)>*&WF`Jf{Q({t;0!qFdbTh%O3yfSAHXc{>KgU#u z_WGI~yh?-=%2t#eD7#ShqFjlx4`n~fVa*^ILphG}Fv{a7%S(h+a0<#AlygzGqg;ft z8|8A8t5B{*Ie>Bm`BLb(>@0Ll@Rdr%%gc^Kt!l;x#}kFo~kT$Jr7 z7oqG%xg6yxlxtBApd3ND2ju~jhfy9!S$-1YqpU$W7iBxjMJT&bE=RcvjC`VB4 zL3se>VU)*Fmd`+Zlr<>lqHIUGXhxJrnCqo*&Gh8b$TOuTd| zsH=;&v93PZhPnpNCgs)9=sj=p?wh^)wCCRve_mhf-Ft1UuaEoJd+n=FdH!De>YF@& zuYL7t&);ibgU7JJYhOcs+`qwVUxUZ5!E0ZG$FIR_UxUZ5!E0a2%+SlyyYxdgL?D1>%+SlyyYxdgL?D1>% z+L!kDrM>p0J$`AgeQA$h+G}6hJSXF0Do^t$`mm-nkdQwWWuu^oQKO_tEx*24v5(p)sl^Tz zY;QmX17bv4p^_&m@<$^k5FkK+n}C)wJd6?r3u+WBkw2Q2SjD2ez1CiP-+j;Rz2&}{ z-%RJt_c+|W&N^%V`|P#XUgyLhh+VBms>;`7{qw7v=l2f2`n$n%`(Bv&&;9!cd3im0 z4#{w4XZ48F=k@qTUT{SaWS-T-nv5MWHv8&pOGcHA8e29r`+t9p98orcYE!$VUw-5A z1^@34BW%fiEqE;&E2;Y)K~Mf=ZBRTYH$Qjq^@B@(eD}Rmjf#JV3@R9uH{@GG3-bmQ z4jMe@4(}0*Q;Q1zqaZ(~r9S)BmyEanVo!`z-*vKvq`0w#jc^YFAOHd&00JNY0w4ea zAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd& z00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY z0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4ea zAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd& z00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY z0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4ea zAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHgY z-~<}FPS)5VUb}^)d-Z=oko{#4bo9`I{+FfYKJ-Y-hfW1SuIS7E5(E$QGWwhoL6Coz zktcl=1lu!=oUi44EmGaUlXo4D=@)wPL{HB3Ip z@&r#F=*itjVs)dQJlB)Qd-4EJ?)orRx7m|tdvcj4XM6Io&RE?hPkz9YOFg-tCwCr> z)ot|Th$okLa$iqA^v77;dQYD2$)i0v%ac1kh}Et0Sp4|R^tnLy|uJGg{PtNe{}%#Of~e z&d6yjoI(;dA9GdAuhN@Z_!o zvAWHkJlm7YJUQEwkL{1uZSv#?Jh{}9`+0KbA7XVIJvrjZC7#^ZlQ-`Tg75bXf{Y$P z@T)h1;0lq)?FoWQM84p)ASe>~*zO=0B69t%AQ&d{oSi{%k;o%<1i=`QGhYdU!6LuC zEeP^NUT}60Q2gzo`=u*Ov!F}yZ|tXC&yz02zpszL|>LfpOr-4zAs7pN%Un&^jS&t?b<)+Qv4hHS&&3umPDVGMBo0V zX)~eyB>J)>`m7}ScJ0%2DgKT9EJ&g+OQO$8qHk|EZ6>szL|>LfpOr-4uJ<8bihpB2 z3zF!|lIXLN=-Xd6Z6>szL|>LfpOr-4u44yXihpB23zF!|lIXLN=-b;&n+feF(U&FB zXZ;<$9l3<7;NJ)LuIqQ}q2WgVUDJK{67|^e>BEQqV58{sjsDBir?v6K%icX`^!lFa zMY>wm^M88%`X6Sl5c__{{+(yl>e`Q&zjEDSqmNu?^mi{+2cLU#dv<-R*bgxJ>}BfJ zPsi#tuC<{mjse6LQOXc#|oXBe34!;5Ldye{({ZRR_( zNnK6LJM}H$FMM+ERW0P6C*Bji_~*{3>ekjDUi^XRuQKhZHL@@MX!J_fciMWTH{Feo zJh5B!w-|f9o;@#}b>3UM*GoHvMqkjR_UPR>zF5{hBL1)K%RgxAzyIX>Vn2}WBMq&t zy?`&X?}CmwY6ZF52=PPfw|XBL|9RQ8WBqFUVs)?HUE{v?wzNOSv^!wFx@5wmt)C1O z|Bs9^`i|Q}+y)zckEr^~K&oK<|8k?RS*?~)*tln(v@ia4X{*wmert8@t444A@S@e~ zqFzk$!J7?QGqs{!C$SysU3j z_VrI2)MVaB!Y3jen%AitfPSpLa!UU^DpDqG1A#;xY~6MWL<8lSD(@GvR&f2U6=BC@%9mkd)c$qPcJ`b z@3WQS&r0I+73*a@5(k1}Q$M&z{fZA5>u<8JBe$xzcGJPON#-kYW8pl-VSb0iPuot% z7ByN&+f~;{oDVWh`{%ETA7`M<{h)2)=N-bWl=YESEsBq|OW&1##ow-OR&?s?m?HLa zE`L!k<9XBQg>z))Hf7eaS=zbL^n3d{#m8ft}1E!<(vwxG2=W`EB+A5 z>$cY`a<&^E*(>XKnX$irljJikqR%t>mIlSg+NU4O_+|g<{%r1J zm$*ISdDEU9cOd;<5RKQ{JSTgrqTPP^IW`X*ZtPaSDCdssFWcU{4T_Kb5AFUH|1Vt~ zYG3?0`xolBi|CSl9g#d?b)D>Adp^p1zu%z#i_+&p>kpDAMVX*1%$X4Mj z9BjPW6#X0Li}OQv!NlF+G|tdDwR^->-R^O|>;z}%+Hl6KacWn?J$IeOYn)+Vu90&p z^o|k#vF{hnwTiNVkC)qXT=vVDdWmDZgxkf2TmP5j9G7*m>zf%`N1M02`K+mD?E+ax zeQ(|`tUpNpHhI157tu$>A0qSBzw*Yg@lEo#J!>Sd-Xh~P@jSS%F_cfr`S_J+{QH{C zfAo0tCe4EHia8%Y zH0?&@JleTLad_Aw`IzOT%eeeUNnF}DPjR%GZqFk*FKT8&2Xxu0`kCpZ^m8|T9AxpP>j zaW&i{*L-{%?s;O3zqempRo82LA7;A*{Q5gic-r->xUHm?ZNWQeUHGUlfS$93IxbG5n zAN7gKd%fi6k&&j|6HV&(kSm44vTGa5nLR8-r838cuMHv5l({nClz+PwLFn zu8W+rUu>1{Jmtde3bP)UG>TsdH@jX}G|T$R`>c(twT!cky-;ZuoCeM+4BO>|s zD^IIZenGN%g80?R7CG1CJorm{jyr||J|m2`<-F`nt1<=i2pQ)&P~f_+GhFwX3r`4PO@TQs2zzL$L6d1 z>FeaR_8gaUZPLp4?~3+(xv*8<$7NoDtXJ>H<-KFJ8NZV6Dyvqgh5Q<7&p~-lasF?= z0~j9(#=IzbwT(ZrE_ZEGV>m$A{4L+i=g4~H{LHzZ+ZgPJ&S4XU#2U_***N}=nsu7^ zKIqOhA8(VWb^~?-H|7Q-C^#MaljQ%AE6sd!pH%!>bG^K$2PV&ns20g%ZC;UO>?@}! z^IglnFGyU{{J*5nW`pf_A@PS`mg3KuLnZFn_mQAN-kTc5|0N#on-jlZY@BRcDev_% zPU}CV%jG><#-k)3)8jGUU0)Xe5&dJ0W**kRZM?m6iF^mJ?-$~4lj`EXYgzlYN{P=l z|B?JK=QoN!OI$AFj0lGXQE6Ykx7+^fH^}>uaJK8RuUgJ!n+Hl>_eNBG#Gn7|IVkyU zuqHlVvh9y*kae{CSH6c-&XjrW5dJdGXha>;Z>qhmsh!|l)Cta*UD&ng1XtA=x~fj#NA9_+!7b8{d*o_x^Ug5vF3Zv8)m!Q$ zAKNJ0B)%1_lsEqUX zYbCB)|B-#vqd~rV*myhM^qZx>ukr^D>sNBlHog=;uRzY7A8n0~R}pFdoYs(iEN@(@ zzZ&d)R=&3fR~oyI8zk?t`(55U>Xu17v40nnd~(`aS$8|mJk!pi&GL7&P+nm<*)ov* z_08vEe}8fA7cV&XbPnsh-#v1^>IAlN4Y{iBxqIX^PGIB2=dLr?y|dvOu6|%H^F2h) zWjpV}EulP0;=Fxs%OIC|Mh>#`xzfJL*C)BZGM&g zllXixknb=ZD-?g;eox|?9q;xh;{VQN|IXXBQR2x%hMTpoZHQklW%hgIpIT)+axUBV ztlVvKZc03~=d5ned0nC(vWvRs&S9NrI*k)t8}TljxL;fwu6bv}f@{$|cbfP<$eL|j zivcbW009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X z009sH0T2KI5C8!X009sH0T2KI5cmfoFzdfQUjE8;g&zz*G2^wLwzi$>#oT ziWIj_+^=n}(Ka>fRL@Iio%h!6!&=dsOqR7OGPvyBSE<%GZL?&)y6oK>A9>>L8QSJ1 ztvC@?WN_mn%eCT0tr%IW{^hORpN>8FfmRIHL%Xb5ZOg0Lqg#lUsXxz*lx(x(jp&hA z4~$A52eti3tvI?>om%;)*RTI!xmH}I6?$5awtg~j!lUm?i#aPy3*A*&`tgnH)bFk< zr0MF%S7=4{Pt^I}etST#DpHKlijEcPz5B9OUGv^X%WwM=iU<9~Zf!D5n{;hd8?Wu7 zr~i;OscrCxsquv4pIW7-mkGzsx?1iwb@@4apRK(7Bdt*y&_jB#m3Q2B&%p<7tE{ZJ z&idTeU3Jmx=h`x-EG|0-m_DGv0i=IK2P|aUXaae?Ygpy&TEo&-K(FId)>c5 z5Wk}pL$soPvFiSUnrNUlOSR^_Rq7Y)rn|I;TN%-;{++$_fYw9_`R~`NF|<(Uy``JE zMJswVsF$PE z|8&1gD`{0bb@eN?;^PKIyW-QaM~8Q}ov5x`rhdS_a+vCuXoG2Mg)eoOrxlAftJSr% zV~@%_crs>xepfrmH=k4N7iJO7?<`(E?Fz@mb$FS7>({w)cY_8LBYmd2GCmb|w z$NQV_nw>F+n`KtKm1afL&VABx>aHpz|5q)FeEpogv{h;ARZf$LL2In{I+`Zrp;npSn` zasj@VG~_3`8?)tUBa&vuV~u7B%`U9IbBW>@^qgKYc9KbTYT1+9wVUjt8WhReD%ID% zMXN+EUZ@p0zmdJnenA~Ch)OH8((I1)8s~nxmM44jf;@d%BTk6__3~2tm{429@SUaZGMcnj?y-@jot~TH*xjRjp}A1a#IJH z&`I;k)2ftRU9GrSTXg@%9Twsk7dLrv ziW(`aRVLuVFLeGfIU7I9oQjpA7CmubrKITFkOk!)y3maSAV_rP!@MJl% zj>gQ*L*p1?m^>!U;{0WtJ~$&09aXDZyyKQJ?s!^VsF%Cxf;aS#FVk~SkGX?hGoV>L zv`o&6>Dpw7R`h#Ob|a-5L~_QMI>(;Ye{ZYeHXq!hYmC+k-4zF@A5d-(rHrOeR6LL2 z(TQ-PN&WukTb}y*w>RrbMOsnPAP&5h_{yPiCFNWDdDm0Jg&S1n_~OPLKi{G&(J66D ztGLlS)DsQAvqf>f_33_UhBRl_tGoDca>wlSMREYrZu*WkDSKLVQ7-uKYq}DFn(@JA zJ4Jn~!}RN|ieqfno75~t*~m0G!6 z>gU?!AKbBCQBX1aJr)JZd&xqZ2Qgom#UUY9#XR@j$Juto2OhCWXtyKQ>#UJ{R<+Rl zbabq4u9ewPfTWCXOuYmG+G4cL`@f|AtW(O&@s3*NYDh$lVEdPD`=Z#FO{tFIPC{QcEbx2RLKSS zjM}kKUWVQ$MtaXw=QT*0{D#&r`py@`acCn^8hof$@ykkEt5(p#w!L1kQ4&RQ?*2`( z!&|f>Pb*p)Bu;!xV%AhX_G4{H~XYTG80-aKBQ(mZycN(opC|bx_gu+lf6~( zd&@bLAkqF`{i38GbiDG&=WS5yPEdYMdw>%BQnzC$PR`8!h5BvlqAJ=@5!y4W>(oVj zw(@aIqx^n@GViUl0cfg`36l71(+Wxrj%<~=)2JyhJ-8|A#Tj1r`J5Ps93?Jf9G~x= zvaRtY;}PaJ&RhBD`L+Ws|=xpsKG} zFTK33yDipf<~{OG*RJpBsEgbx+3O~1llJt&c}hn>y=7@dDevsqqDJ$Z@oQS4!_AEI z*O*@EJ!jB1Sd1)V+mjMlYgZj)HtOyi)$ z?e=xD1!(BxS)I#Gw;YNzAM^fSN%j;aM{kqlXCXC8%(abq#i!ug-+S$j3j^+zN{+2i zw2_rIeqbuH!?~a1EX`(jaygQ;<&5OLewO*X@;Z%~SkG)ySM&Cv44$?}?!De=K<}YD zqw@MoIUX&TS|e#7eWT(0W|}^9`Ev_po}BgRiJNaU>vbhMbqbo)9y-^GJE>=VQWG1_ zL{8FSIFP=$G&o=DtDkqwQ7b5a7(uxJ! zT{3|_5%A`wbExBXZ-TU!dqmY=^hcW3Pw0aH1=*U_?wzZqa@-DQV)P>Zc8oKGTF~J5@KY zQ}zVxuF{_S;6VM6z~wV?UOW>YXFH^7TFLTd5Ngeei*Er~0gT8|8X;JjjF=`#9ALOHJw@UWeP9TZC_7#&E(QC8b zyVad_^0q@K37wqZd`Xq?2^ZB09nxp^jjI1{-Bm?PthZ|Qo9pGBY>8IT**k8zdPZkS zU$m2Nkweb7sKvwsy_~v%>RY#|8#r*0XHgg1>%1J8Q|T#-ZEpyp$7&ss^HX?C=0a~4Tb$A`Xl*@uR$G~aGy4W*!E?Rd#K%9#k{ zOfe6LF}~ICOy;IFp9uFbJw-o>^QmO(inQvKv-N}&brRR7lxI?s z)_fHAFf2tsiSx;2>zuUeWV3a@6m=5klgTsblh%9!?%~u~Y3(1jK9*LU4z@m!qE6y` z+Rozr!@p;|f4GO{6#XQwPd!`LrB!DMTQ5#gCviSCJd?R;%_qV=Oi$5I;(RLEx+1MQ ztkuv>0s*vDe5G~>$aY}fB5%|_Ye2boT8t^^{Hp;y0q#nVe7>y>LjjD4bNn5TJwo; z57SfhlQ^GBwysF4PB~joNKq$oKBYX9lCiz@%_;gxT%UTju1l-V61HBP zqE6y`YIr7d)0$6&dzhZ0pTzl8vUNpTb;{X#LW(+x^C{(-l%zEu#XSs5(NE%ha@jg3 ztvcCk-7iI*#CV;_GwGAod;;#_RFHiCoTFVgJLbMx>SF88pjVtW^*4tDTxVa3eiGMb zS3vt`2mi*pO5IO0_t2E0pTza4W9t=Z)mhBe3scldoX=dI$?UY|Go5>wnxdb?^{HU% ziD|9Rc&<~HqMyY1jOLk)Olv+x+(Uk-pHrt#pKkJ8us1EYeh(SnQ@;6S^bAO6^_iaB z*OPDa?6W=nKu^x~!APrt;IS9tQ{o_)QiZ}Q}*ColHwcX;|XPu}Or&w2J8p1#wQyF7W7 zwzoV#^Yr9Wz4bl$KyOc8HB(ewkJ1x_5(eAt|u3H^2?sR^7NxUxzv+)d-mf! zeYq!3^5ngqeWjd6sL?(pnqd-@trUg*gmYJ1CbiKk!T$@QN6k=K5cr;mE_4p08n zvv2eC`#ibBlY_Hj`?u56cX{$>p8QqMKG1zRAMoT%Pd?YP@9XKaJ$axfpResLpIlF0 zfRy=Cj+C-Y%861=k@5j47f88O%0?--OW7{vAt}41%;+ioOF2Nw zd?`mtStjK~DW^#JfRqcQTqmp3H7eqLyNc_Dszq4DK~_~nJhmlxuf7aCuFh+lqaeEA`M`JwUU zhxp}(#+M)BmmeBmeu!UwXnaFM{Dy|cH#Ed=XlQ&xL;Qw@#y2#?Z)j+ILqq(AhQ?PA z;#UwFUqOgpL1=shA$|p+@fC#l6@YP##a>LR}>mwQHWnrXnaK>enp}26^Hm0hsIYN;#V9R zUvY?EacF$SA%4Z7@fC;o6^F(*I5$_H5jKX{Gbqq6pL7OM>Sl5__avpzM~vy+m9juO z#%xLF8p#==qf)C_NBJi`qw-6w(?#5+|JRxX-RFNpck*xO;u`w@bn_Q|O_a7j)z=)I z3;$g1zmwZP;}Q*$zDdxf9|hKy?)!3nOtaT#wf?84z!xk2!G0RLPS%|1kZ49US1Rw6 z@+VSGk@742RNtJ@)!X|*(vNffKKz@-zXK%u|9eek=stUd{;w&&j$J2D$FE2BeznJd zSFNU}Ztsm>+2<%h00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck) z1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`; zKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l z00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1VG^bGJ$h?=-EyA zzdS?C4Fo^{1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l z00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck) z1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`; sKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;&YZyi08+OfH~;_u literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test6.sas7bdat b/pandas/tests/io/sas/data/test6.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..2d9b4b0466047481d0fa63bbb29c2d3b234326bc GIT binary patch literal 196608 zcmeI24Rlo1wfN7ynS78&8gL$^HPtDNHr5nFz7e7a2pAwhFcDKqX{;F^wHZk%<3eHD zYZXNi3?Gf4q{NDRXw(EE1Pm|g+8%rADXw%x%=KZbEla)|F!;W zz16j^i+lF$yU*9}?0wF@H!>EwIzPO#YMXn|Yr9t_u>miRcwp!yse^_MV>vmRp6#;D z;>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vQGj}!P{)cHDVh#NYEnkX`S@Q$b(`4JMSlBusP1M(p0CJUky8};@Uc+c4T@Z=$fFfGNs)X05~|y%$Tf;wsK_ow z?mil-yH1hkC~~eMGev%-H&l0xB3CPNmLi{jFJ!-8kyk5nl_F;<^5^e{>UJq|og&Xt z~-{YZV;_@E4t?ox((U0EUkG|&(v(1S1`_WhQqj&eC?}0cG7nk?Y zZFN8TihlI&e)K&D%{C+2??+$JkKWynz6WAkTwLBmx7GdVEBev9`_cEnejzR{@1fi3 ze)JXn=-vJ3d%DdwBiiprU(t`={V(*^$Z@-3KNykJls5DDHKzW0v-@+6e8oqLuF0Fd z3-!6C{;R$|Fs>f;-Z4`TzePRcY$t!>ZwIFQJaseLrj^P^mYLb(+tyKgLFzzYlisyMix2I)3{RY$xB; zKf9Nk^XkEVr<(c+Pw-pX=dT*-`WWq}nEI1V9OgPPML&i5t4#e5pW&qi_l>DK5k&nA zQ$IAw|MZHuuzfbn)W5ldpWh-tG@|_&Q$J%9|Ks@WpM2Sa^Wqhz{?ZP!wB{AW_Gj+n zQ-7XXIkNS1_;|XS_*tM?aNp~uK6ss3fBstjB521{eT4SIOnu)T1&VP;kLLsPTRsLDmp$$X4xNc2)66UUp~pBU=;RtF4YS z3?I2|$js}5%S6B&!1HPvJ;(K3xPwE`O z2m7iM-NTN2@fWnukoHDPr`;Ztml=1gcPZa2+-(f{q2;#_N2dQgZ?s(!oBH0H9Nb2jdM(KRo*^n&{$FJ3>$dWZV%wN^7~4mG zyP}grr$5@-@S3T&{P5DP{L+Lwe?M-i6{n?UJ1|}`npqG2*|fLDKXeB-^%fuG_IU%w z#fRAMbhDkkyZC>LZLbp7H;8fls}??21`_fykVE@+zGwY+u6!rmilcI~os4bVcIt5V z6J9Mfwx31{kyXPrc8j*!7HsdfHMX&QWIs2+9J?G4=bJWKemijogKR*U}3aeuyg2aX5#0XEXC&*t&p$^*voH;ijzCV%Ty zaj-py^TmB*&2lcc`Te+mTJ7}i;U%!!K7B3j^DNbD|DtW-<7C|YJ$xVfc`tG+lX~On z4la+iE8fF?(cdPtb8+g!o`UvxE`QmK<9XB6BWL5TecW8fc5G*w+3!u;xjY`bEe_Us zy2&w4Oq*am%=57p+rjuP*~kC!8v$+&_KW@i_7_34B;5IiskhEq(A$1$3;p<^{oD>5 z+t7ArTO+;Ku3`Ifq+nOIAIUcCs*%s_M|KS<*lK28zW4tJG6?IuC^9)Vwep4X0JicX z%q!MC&My(i@5R=+k9mt{1AiWNNeDdTiT*tGNjyia^BVWz{cE^9UT(#GFNl7U(}Mds z^0(sYuTo0*`_T!gzGrUv=2Oo`Ai4ub4-0l3zx^* zXJ>Hy7{Ac}b!NhW_G6{J@o5L{|CXQQzWuu=%=WBtGwk=`V7T7Ob3A*wi1wmnD-XQJ zwA=a=o;w&{R(s2La(V23V8t)`f7wV}+xf$5#f5QgV4kqG3FFr~A921vY2kkm z>2rbQ511z#v$!1Qp?%0b&g}aOlrn`>=op8 znaOR~GkA{Ux>)O*>R3lBZ+Y`Evz}$ga2?^d`F>&f1LkjYcVN7rK8XHctmNO7fnn`$ zn7=6!tcW5dn)!~2SPK7L@fYv6hGawC`9!ye4XEKX2{^|0td>mVe^@$yea}Ci;n$kKMD8-!2ns;oMMf9O%UNbj;rjJV%U$T)ZVh25Po%oeRNT zxKF--{o;8yX#+R+4Lk=q<^#~r&GNF&eLQFP!~3atsTY^+x&8Q|X$5!NVeJ}r8@3>A z>|eH_E!b_?HsOLj23u{{um!s@X=B%5=Jov}UqEJRtqY#BU+%^4JXOf;YI8j=jyqcZ(DJV?>m>Zaq)HRihI$|t@bM#(EndG+qb?m z9N8A`p9cC*yW`xn_$=Iw-`}ir3cr(VUgKy7_l?slc|d%foM@fnc&^Ra68>G$I$tj7 z#P@NW7sK^RT7~Z&OU&_e{I2r!X1+$ghFa$!zNgs!Z+! z4Z_Oba?SaCxP#k%X1kvq7}5`Ihm8~>YuLUljl+M5W^GM)AL5tYe7H@d+8(ei$j0o! zAPSj|{SEVfW2`yftVg(ft(k)F>CDV?4Bmlxtd&=|P5at~-2AR(y)WRt1pL1epUp;C z--Xa0*u7kS&dkGo&w3wWHTd4tivExL;o+s>@nY?h-COXz9>;04gZuTR)hHTz8WXff6wDRgbO_-1ELTo>2%@7^uuybO$&Ax~r8i2J$K{ut}r#B(i(<9z8^+*d9C!8p=d z@Vke#-&UIay5ak({J>%P6`r%L&xFs5;kk43-thg(!1j|n9rmHTaRYocIP@5PZ)amo zyE84AcUkd{?;TAWaX+#CE{6H!!e?>at#RgmK$b#ut8{&wMicJqYuEBe0LlKa*Bl`y~2#-F7^ua2;8a z+5W~(j0=4KH878AT#4V=euw?ycYsfV;rlAXe6P5He@mwAR^Ec=;HY+S@X5dwTvK+$2w=xZ%;Y%*ie2L{&x`UH*<4%`wY+PY1_l!2d(_d^ULsjlHqrl z-pyQo-ad-^n>F69hr<8PW&NG^>@M6-?lHMp_6;rJ>&4A@H@?}44(xq?dP_`+Ma1^Y{70L+=V0K#csoH-nNl~-J<>6)`a&Vtfh_JBBP54 z5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la z5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la z5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la z5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la z5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0V42kPGHF&&TM*N za>0pfboP2*pUw*Qa4qeY_W7%Zy0&{YR>5_)Ok2!YmXUnDW*qC__l>DK(LLR(oO28yAjPK*)w3*dMp!&67Q@5S#SH!b>hrIRb`+hA6Og47% zrnH&WgW0I}UV~aQz^HL0ANAg}We=r~zx8)eZ@O3eB*-W0)0S=WYtukcQry75_13GO zl^+WXWY>7LQSE&5>v1^;eR|M0&?w#K)0Pc1z%C;gNGj2e`E*w7)lco^=ePXrz?7d? z`L(K?abB$`?!nGaGiE*b7r*9D%-r0d#WCLP_OWT(`HPba_ax3U^lC$% z=C|i0CN;eRxZ=mO^B)waW=x#($P*aBZw`#?Q)6`8Dpe+Ptr*X-i25pVD zp7j~^O!xdq(;rT#jMqFXbluYs$CjATJ0bjxv9NYokMR50I1MHN1PUNlcn3f9SZ$G; zB_BHG*WBPqwF@Dh#01LpLw*fn0pj+no&^wvr~TUXh8rs0%jc~4Xwfx!Z~C=s{aOvz z_(E{tqj$9WwF<9xcq#v0^SsS^Rva)l{95-GzU-k}2j2zwxp7$_zHAepEmrV~KoT$W zx{8usjLV)}0CE4k*R@z%(b}NtjL-0~TUbdWmofSHDPVhpSDVwsA-W&wf)zU~Vxha1 z5B$;A2KQBGHi6SK?}aS09zv(!LVvR1PhLGMEf|P@A7W~N&YE{}*Nkgj{vnIMefWM> z$3^)4e%ujgcepRIL~UW zp4DE`C!@2TJq!!A-c`@m#MSHYk5y{dL4=-X>meLrUG-V<^*XEj{*f{>AeuZbW4upqXyu`8H{BTQ)lTi= z*NM?j?}bUjCins?dz6>Sr8@4{ASS=);NR0HiRkzUR+6PcJnR%?wQ<2Rua-AJ5L{aG zo?pAntDURo7wKH?YA0a-nc>wgYZGyrQVh=Yu3%Wv0NXaW6?8Ur-8!FsWSf`e!ER@i z`&j#95bWAwws5z5Ezg@hE&dDyXTDe8yax2TalMbl^V2JN;F@XrD*+vLoH<+gvBw`t zX5b2)KE?z3C7paOxY)~&HpW9J2fmZEivI(M&0n=7px@w&A290a&3sMHkl@P!-IxG_ zPl~(wL!TbcSlOhfxLE3C5Bb4azwLXDPuk384>}sqbG@z)cUa3<u;cX?ewH~>?E{FV)D#swe3vtl?8+xSbj z?OFewD|fFL2yGX7wbB-T)j$Y?y&}|#;>X4@aWWq{Dh{`TojkR2Wb6KPcH17XF7>)@ z=;RR6uy4HkN7(Lz{?wBlu->nJ2FEmbOnfts8{m`s=dyUewrDN?LFu<zqP38n*bPT6^E2Z>CzpGR`^^;tVB@Xu>c&Ezoh*)O)6ZCXaF^T$ zAkl!Kz=kzx!?4?V>s15Se+Pnrm&0l&#NF&4#-@5TJ$3&ME~C&q9Yv73eml1swo^9~ zyX-F@^HdoeV45-oQ1F z*pbd``U?yN4);mTuouQN5kjxXI2rZ~|8K|`QioMn!d&H|r*0Kfg&1XTKDK{=2n05< zftkVjF2n(>IV+jF!vubtUlWUGtb|SXyK;|uBcK;}wS(I@#BwqN*V*IOAUyAAr>8aq;=$#X z4eaTJ{ZfYd`+ywwOFcM)|4J_whhFUmFcvu3UT);G2JU(7b8xi0B%S54tCCEE99py{05s~}^WaA80X!uc_%_^X|K+=Uk}KInCE zzgBw(pZu*o>!%OCA1eEkhi&2GT#&axzu0wy%_OoVlv@XdLNB244pXvElD z5BupFt*`eRo)wVVK-5mxMF_)jxh8H^;x|03&UDVBu)`L?N;b9fg}Naf5B5S>$aRnN zOCT=YTHD1R`L(NHSKVM`4zR(Bz?=Fc?j!Gln~uNF;BvR3Uj;Gryp4#laI=FfhoomnR=`8IS`cp8{vQ#;b3AiiLVXvaMoQ4o>$m&+YtSeeLUBmnT^qn_HXsrSeb+eAfsJ{K*A_TlgQ&OdYh$lMYB` z@rvX_`=E=`J4bl65BK_q_*2C>@m$ah$P{LV=e0tg^%6P%s(%Olcy`V3PQw~>9~-_8K*uu?l)VAH3|9K-4w+O9 zX8ILkvCCna6WV1`P(AU+QGSh0_G(`?2a?|O>HZ`cUz&(&giIZxWPtViL z^{Zz$rqr6t_A@4!ifp_E1T|G6PJM5>+PLPKAbn9J@ZfM+epuP~B(crGCYKNUP>`P+rGIeQiAI-%%(sa=0kQ-LFn*m# zUKhgtFOU0I{Tif{ulKRKJ~Q9l0d2xgn%~9`>Q%5sSB^XheBjV1(&fRIKJyE)t7ic1 zmd@)LD*~4o>SMjEWGUY)4zDq%VbviDCp-aZ>aS&r{5&jVT;)0l{h_YGY{tNMpn*v~ zJ$)s=e3p1YnmFb-^fBJ6_1^U9g%B26kpDens$Qr26Il^#lv`yeigee-iZfu(zM_+7 zCqhKO2G#u9rCYgHXog%5v|I|gZt~=$Sr7jC*qy?6bvASdY|hID`7ivufkS#!kXDil zX%D>ry7umZgJ{B#z;|H}_*F|T<8x>0fkB2pwS7AmX#nirjbr_;a<406n|fm2n4(#W zYsYCl>+7p$8FBg<_R}~`Cb%mS)P+PX2^=$xhuYQdySf)8Bneyg5O>@IFA8EK=E9 zH{ddN0tMJMJqh=3p(ZSaL!)JJ@+@^zmZN@!tI1CN)KS5#| z&xU!mH+R5CfeEQ)uEu~4k7jJ*e+2LRvd4b`uXq^FzDqkGh%bN_lbQSYRN-W;Rj$*1 zS0JgYDU<^Rue;mpi<`feznG(qsrm?(FsW}3q*Pw*u6q_IwEvO+dd{@(2K2?Qe{}G{ zv)zF~K_fljIruE5N)YZ7y@{D?;oO!9e%rggc#-vv-ok5BtL)DLz^PtDsK*obRw?q@ zdiFEOVawzD&OgG$*194=yknNfK}LHe)2G8{fyy|s@qYC`eSHkC6evB`4_VGna6wrs z@1Y0L>r_eWSyAc?fTqO1&{dWIX$32l_mDPvKV0@OI!Zs0^U0FdnK9MzNb9sHb>d;j z;$P@Wl`~0+X+BK$a6VoT4_(n2oR!w6W2)0Dt>2DPCvttdCBJW>!Y(d#Wdz5}6 z*QZ%pH^o$^QChEyQYUggb#f*T#55m6_OK{QKaumPmDV*e)v1!!v!c|AoKKmYNoh>; z;j)L(QTmCTPnNXKjH!-CTBk*+6FHw$Ig^x_=EGzU=Up+ykF-7=Q=MLE{dSZ(k@M+x z$@r1?uN^v~G&2PNTG57o|?*{#GYv@<2@UF=P*mqVy9vpIT{M6H}ck zX+0}SoyhemlQSueX+B)`Fgi*P;ThX6@1d@u_tP$WXp7QM&-FMStqU6M5z-wp9kbj zmc%rlMY4wlQTmBopBiah9n<<$$~qNM`iY!RiJVDsO!Fy}J>)w2Ip5dU*QU5&l2~qd z52@7go1aT(LTBky6?v#4->BGo6n%ywXDRY6ihZG?=ZajS$iD`A`OOZ-Q>N%E6}d{0 z7bxw|QS`Nnyg-rfRO}5!zeJJi6#0I|evP7UROHQyyh^ceR`hL(98~0Wiv4~?->t}p z75Q<+zE{zoROGXY{505GJU>_T!ljbnxA1`^Mc$^gpQ`AGDzZnB+ZFo^MW3a}g^K*V zV$T(Qi6WON@~et{rJ}D=1Su{?*ROOtWJ+*qb#nTusHmK9i5V>utoyRlr2|BU|Egjd@S$Aay6D4ux!P$3(FoXk7IcbOP34#$1)wuTr7*RtiZAw%lTN|jpb@A zH(=R{Wfzt`SRTjn9G0$l>>tZ?EOW6e#EUObl9${TyLd+}(rIA;ck17B0%yHLcdmLBsn2Isw730gs&8_kK zZXLnKjxVne|IFXnEFBsY|IFVxj_0{-%9P^bAODz1y=jx@u*r9{QQ&x9==eR-@q0v8 z_!vfv2p{8!?8q^U$Z^=@I@-u{JkNLhE^z!VbkrXi{yaO&@!K)R?CkLR*^cpL=Q`>; z#+RM%sP7nGcA=xbV|+OdhB=P$3m9sKeg$M^~z{0bf8D|GNHbc}DLgWpKU z_(nSTjdYA}q=Vl`$M{A%_>FXoZ={3YNXPg_WM#n_VeKK-8N}e_Q=CB}bu)7|`$<$G z1&zhKtH=VyF=mzGTobYjb)p27)QS94JQL+tpo1dpg8#K9Lidk!--OTi(+#`~O!>xu8$Y@qbOJb*yz# zbn^Ac`u(B8K(1Py4(+}1TkAQMM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vQGoe3msFuVEx&NHOB5dk7V1c(3;AOb{y2oM1xKm>>Y5g-CY zfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CY zfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CY zfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CY ffCvx)B0vO)01+SpM1Tko0U|&IhyW4z`UL(Lz~Z0t literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test7.sas7bdat b/pandas/tests/io/sas/data/test7.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..785b12cf175e39d9a7aa73475f6ebc8ffec3a8c3 GIT binary patch literal 131072 zcmeI2e{@|{mB(+|P$R2rEUvel%!rz)l?hU7e*YK?oSk=h{Y9c z(Fati^1}*M!`O~RN=YGvGGL_eRa_XtG)MmUfFn8WX_B^WwXv~nVFtBCtY7w_|r5aJtMRZtG;H|$)_&= z_eW<~shhL>x7jfnR$mnS7kwJ1Rn$~eo?Y4Ul?9h{9+Ooyt!`R%)kkXUtEbgZtDJVh zF%&Crhfrq>?4jvO`V^6}BqS?EinYH$AQM$tMhnsMM|lU_0b`XVE{4;wk% z`iH*dIisH|%BKz*eFMsQkhxK#S4Ozspid|0QwjRq(+U0)^yvhBDnXz7U4s7veL6v(O3>$? zO7NecPbcV83Hsdr1pf*8bb>yWpwB&-;6Fj1PSB?k^tmSz{3qzs3HnrmKKFQn{{($H zL7z&{=k_J|Ptd0m^r-}W?y&^_3Ho$`K9!))J(}P@L7z_0rxNtJM-u!e=+g=MRDwP? zlHfl&Z$L7#g#!GD52ouE%8=yMMx_)pNM6ZEMBeJ+>aKS7^P z(5Djgxd#*cC+O1&`c#5G_dtUG1bsR|pGwf@ew*MwL7z_0rxNtJJqi93^yvhB>TUF< z7SA5Lynn7N={fQ92R?4}|EjcnWtIBz9~OVS_6u8~uQB>JeLpDN_OU0QHu@kPmd_j> zQn&u`fwR9fsT<`_H07VTNo`p9;w|@l@>!$LeA4I_tX5Co{PSI9y+bHJ#puh{sQX`? z6J*);lxltE=^>Mk`aEyCNk2HKuE^edAXe&tZ_r;L8qt?H`-3$HIN zegWmrHu}*X74-GYiQ}G!{$Gs#!*{5b`X9`g_e>W0bB(?Jb0%{ zt^34N-%eR>XBz)gZ&3a3|K9VT?tjAg&&)FV>1$QT1?OE{7Cz^7bhKX(d1<@oKg!C^ zzxbl_FKYWr`vv*DXp7K?_Ucm3G|bYEtITCNO|(|0ju(u!+!gf9bMT~a!aRWfHk~WxN2RYE(vDWxqtUK{GVm~PPtB6)S7VFxOfEq5pSmqsi4x2Y*=}}(c3uu z&<6FP377rkjH~QCZ83g=_J*yQ`=duqdE5Td&C2L4A883 zVtlih*I!?+F47YTd@|rLut{zI#Xo-Z@hNs5%`tw;H!2s@(eMkST5DXK#$V#5QH}HH z)b7Pax6`=7I?Lso>2>U{vv6g!sZ)FA`KR8CgOz3<#>K3MnMbmi7u%o*&*n|)nx8EQ z$$>NWk-ZzC2RAz}f^pN~{GI4HW=4k$S39mA#^tlU>PWb~?816($7SBN(efFrdu2=2 z*H1lu?@b+uXNC3ow9RM_)&Z~4*n1bN@9GW4#vA5!=JV>2`@)UwF7y}cM%T4UFZ07# zKdqnr+f{S0+OGQq)_HG|@&De9(RO;uq^IpfobLy>*;=1jH>mWscFL2e7x6Z0K!v-0 zu%@6q_T@Kv(VmBm9-K2*>{MnP2jJ%%Q}6kkl-?fqSPphSy+yRsb7`|bcEAtjZ}U#| z`gcNbU8ooF5RBh@gV{3SvWJY`?z4g3#i@Hqh(nj}CXOrU!r5tb^g0U{%kdXy>aw&C zXBy9USBr*6YOV)tvT!^2%lZ!Zh5?qv}t)$6gIgTI|mqqnK>bTa%+>{H$+Ogj&( zL_COd-L7602HQE;?8P{K%#^?ME}Ulup|3Xj!SzaSYp=eD_GA7A^?$}3I8gp{T|V=t zL9G8a&arM^eyj0k+wGy=_h+Med(J7_p~BgI>hbm*_;FKi!|m92Fu$z7Yqu!9?H{!B z7x6!PgYb`dF8h}H&l|!A^E!ib!iFBqU%Nk|zyG#g{a1K=uD9`kb8=>`(#_nv6C5f{ zeb4OxXK=9V)m_=Q*k4>6>JW_24ySPiT~NC$ZR%!^i)HuX3OXOIu+})WvvB#YvZ%%t z^vu(AReLaF5I^>QF|bmFr-2u5vHLjY%UQix$999;Nrv0xJFt&qT})v?;~1pP{ldlr&TkiO#(aT3i+IRfr{1S0hF#xqetU2u&ehw|PP3kS-|iFVN$ijB z$VTsL_WTpHCzzzckpuI{I}Cq|RCtKyJ+F3rTZOCD_Fw!E{YN_|uf=@11?|WA@bFd2g`W#y_u@iO zmvJHN@?F3>jk9oB&VAH}%a2>U-F$JTuGVNBX1aL#`g<;T+VQ$uT)^j^v!#84o zvGIxZQ~enCO~i>kk9~iQ`j&RI7@Vt2oSb;O3U~3JoMFcW<9=38bRK1JUfMep9Y+u2 z9<(d+UBb?ziCNt1aX!y98NV<0t6zuV|52PXy#}LiS&#G24vag>2mVjj7bL8=8Pr?9 z1@qU=N5sizhtx!U0kiWO^L=y>`xoX{2KwWARb+p+`o%Zkp1up*u#cy2jNbR{xID8h z$_@I=14Fo{$CxkFYdE;U0vCSxZMOl*x#Sus?0eD z=Zy@`+xOh4X6p-*JtrWpUK_-|hI^L1FZ69eJ2UW)`4zNFUyIxAdW(JVm7gke-nR1^ z_nnjaRrq!6lq(VE)_-~>;{QJ5-+pI!Y-2P&Gl-u7v2R*Fi?-qWo871Ious=<_`$ky z=sI<2_;vD3yN_dEo4+>tUD56@9~i=Y9R2bzUM1J#-m%oQU*WsTx^C5_uc3Ay#688u zzx@tiV#GV^cATs2`h#(~@Gfp z?)7M=ji1@K;2w?kD4fTF_L%RkcO!nF|8bw`hmCK$-hOozz603%1>$XfPxQN%Ex)G& z>$5%o;Cwjsr%Hd8I2G;8fWz`E{NsDOt-p5*?nmHk$7SS)*q7}&5a+swvg!r>`QPq? zINy33qvs{-f986Oqn*F_9@23I`ZWyxXlFK~4h6T??>%q7m*V@w+Pjq5U+lRZ?Mba3 zn=dX-T^wqS3s|RiFD^vgiz}={*m-m>&eRojrtU>&xqN4#J%&Fn%UNjmuAq0!V>#M$ z_4Xc|$F_nS*0;K~_`YQKqjD2x?Kg;Xa;?#Ovz3{THh!iWedik7>+Sery}G?uMdugp zv)k@8cD9{XUfYjzxSgl?4shufv0qdf``#7!?!DghD}(v+o^?1kVm-J1XV`rc`&t(5 z{Pmq!S8e=Y9u=*}cMrSXwwiiV!S_}Dfy2fX_SwEWqW$u)?|gYjbiK;J|M5ej{Mfm1 zb@0_-?@jpL?wxMRy|^CdE<4|G@90^B^~C;N4Cl#3cVgUaJFAVK8@A!^XyRO9Ia%pp zetr1nvA@5#@Qc2<@N@y|qTgk?SamPDjC15nUB1h58uy|L$Maofu6kESTR3~qJmz}{ z_GR1e`t9Nzg>~NMTj^D3o|&fyRrpuEJ(F+4{KEJ7skcS%K{)?sF5Rj0pGn*7I*B-6 zwh8+b#?dP={?`m)Uf}+l!8vNxb@$9R&4G?T-5QuwS3EDS98Y=c}@> zMbDESzQgSAR{HbyQ&`_@dw2aT`gbn-cizKWv7UV2aI@uCu8)qFGV?w2t|7Ds`?9@f zRqVvRiS^L#vw=Ssb@4dVA?os7z`DqE8u#LSL_;_}Uz`u;-j&fW&ZEnBnrIziXf4u+Y+Hy+4 z_NCe~m8bo2K^Mb+V^4|hd2zx0KXP0+ey`}?F|Yaa8`YKv3br57mbpCbVoaNwB_2Wu%T%8B@&1?So);I~|0BhE{_EdD=Fh0M zJW#Oxh_=k-X&>+ZaIv2M`nQnw_v@Pb@~l5T{@vPgb;0&s+H!fG_VM+9sqXobg8Sd8 zYhIXV{qg?K*OqMs+qY`Vbe{I{@o&~WZz{O|4Z7x~t+rMl-!3hsZWu6bde^~d`^Ut6{nY~QLa(|Ov*``@g4-c)e^8+6SzdDb8A zf4R1tQm}ogwoK(|AMbyO?s;*+{Xa5J&wu?}$ov`AmIn&9AJLY%JniHA-|#p+|MhPn z?eEt$_vKlCeEhq$kMq&zlPFe}k^MCeQlg{V&&+Qwp{()t0F|?c@C~(LFCNxc^6r^!(Sqh0LE(ZF!(z z`w?xK%hNvI|KTD%|MhPn?eEt$_vKlCeEhq$Y5kk zS%19$^R;DL!S=1%GM%S=y#LL*=S>Cozd_erlV|<${+DaZDFxe?YRgof_VN9%MEAV7 z;Qk-+^!(SqZ$E#I4~mqH6${=*wdHLNj|AyI z`Qw6j&pR*p_et*+{~mvy_Z$D|Am>;97Xw|^zdd+=+~4fK;GgU5^j`~7`}}pm@2EfI zuLgWt6GxxmjR|7O28NZs!r@K5%(1aHyj zD7ZC8mjkaPEEasDXf@3=f5RyrXY@gq&7TB!HeD*x=L!2VkzX#-6(aprVc#I~l}I;> z^fzFy{{|DZf40bP73p~*-6`syFY-G?x>KYt6ZRRAzf`1`iS!SJeV52zCDPp@eZ8>n z75V)lofYX7!hTrf?-A({k-k~j?-%)_B7In-*TLS7-w}~-mywcCAH-`3=DkgC6!lLM z`K2OVCej1KzFg#2h;)NU-!1Hw$Zr4%Sy(0aBsJ~z2XGMBgq+b>Gdqn<-NbeVEZ@dY(a3Ax6^*q4g@GLbG9>G#6k@~;s24I-^Xx=hsHEb?a`n=V1; z4KnPxzF!U>o;SsNht8{kY=TTf&WG%TTngC**$vr`eP|eR1o9B%QOJo!nsaa(Fmfv7 zbjTLSxsdIUiy^OuTmjhwIRLpEav$U&$fJ-G$Dw}6sgTnlTOj8`wnHw4yc%)^WDn#3 z z8*(4yA;_bU6DOd4$f=OiAzL8lPSEQM=1nNxz`3kr>|FM~GVwIcNY9v)o;mxBi`p-_ za6zSa`ph}$@SjOn1+y)jah_+=)gr&fJNxXWrq2b3#4(#%QKnA#s2BMSBHbv`l@-x8 zR8~gYSXmWsLuIunQzLxTiu^i}t{3SBVc!_duc{Df(Z;H(sC|`aUsa8;7wxO66ZWEg zRSm*kw69t)tQPI7u8P`Mi}qCue$}FV)q-EOXkWG9S1sCCBly*b_SFb}HKKhrf?thj zUya~bBidIZ_|=H^)e3&KqJ6c3U#)0gt>9NH+E*+1)r$7j3VyYseRYCgooHX3;8!Qw zS10(@iT2eAes!XKb%I}=XkWeHS1;OEFZk7q_SFl1^`d?Cf?vI8U%lX0FWT22_%(?3 zH3)tUqJ0g5UxR30gW%U7+SefXHHh{#3Vw~EeT{-&qiA2F;MXYH*C_ZkiuN@MevP7i zl@%4i9%0vz;C1Z5#{s(s6(h%-;l_&nU%gF)WBX0Qo+j-UDARV2w0he;%o_DJ=d5gHV IUDsv)2jy@;?EnA( literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test8.sas7bdat b/pandas/tests/io/sas/data/test8.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..67db5a143de07fed6a733e602d70c3344da236a0 GIT binary patch literal 196608 zcmeI3eSB2K)%fS$Y(j`D2Fycgjk=}L#!4~d9U(eEzyJ#b6OmF%gJylG%}PpH9~8D; zt0=yJ;iU*lN>t>fMob{`W`Ioqr9LS}KmkDs0u=>?R;({Y_c=57+#7bb_w(#$f4@I| zzkbgBeAu~jX3o5P=bV`{H_BM&)B4GS732_4EGfp1_PwG0X0o-!~`wr+EcAeGB^b>+8FyNWRV} z%>QM6Zl_fAtJe-4Z~f~mG2hM~Jy#>%BF3jyqC62G0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1x zKm>>Y5g-CYfCvx)B0vO)01+SpMBx8n0$&_GSM#ricXpj6y_ek!uLm!f`OE~AYi1fk zc`p<*lu!A_6pYs+>mT{t%s-hFDX%+c<_9Y67k^^PCo1*bADi+*?9b}A?UX6!X2LA_ zlMZsPgWT;PZ~MrheFwSMLGE^txBbk^Z6_SscaVD> zgS_pKL;DVLuY=s}Aa6V9(7uD*>mYYK$lDG$wC^DII>_A) z^0s#!+INt99pr8YdD}Ypf&sqf!m%6BR9l`W?H4MqOhc2mAdk^gwxzph_+i7B_@ zVGZ@}Zl1odm+P~e&C*d^rl;z8-lQ$V-TG;i-)PEz@(TABJT|D}WC-QsOnI*m|4W;A z_M^PFDSvk(zp&?-hw?$DeB3Jj{Ll@bf4l#LDd**;{F)Xs{Vp74;t65<6L#>? zcchN*-*g7s!SU>@=jT?vKCrn-<>Vq0pPp4Mjeo+sthACQ{HDO-*naN z(_=S%V76}zGvx!<@M_=WX&&)RuCA`~MZ8o_{ZEhQ&S_KcoLc#-Dqk!wQbV*MTeWkx zswk3=-I-lvOCo!<#nBZbM{ZYS=JWBj0_a|xZ;3hIhc@wHVtHpALjK4nE5KWT=7!IY zp*-KL|7M7vhuR-sdlKbCOgUfAkH7frPEW&MO*zN)ow<=$4mdhy?$dive!E#8)>FU! z(VmC)Y{Yg7OnLq$zHgK;AY6B2v|YdAWP9(!Uw?@8Go{>UY_-doe0rJrjwkqPVQz!a z4lTPK_9wKTU1mF$U5#JH1K?fby1kF>k1*TGSirBF_*Cl`nP~sUP;CEBgs#aRwUL{0iw|=9suum?6YO`a+0OPY{O{oHspD~dL+IDPY2;I-BOxCHIW%wJ z+m`>}hW9fpKPofZ$z0EEqmDE`VbxM%+iCPmv}&ZpZqXLoFSd2t61%c2vdc~Py7<@D zuvDSo|Ef%Q81CoRx>U@ItZ(4H=b6KQQyKi;Xm7)sxi}SIPr>>l&33+R!126m%8|42 z&<<{{V>7lh#;kwm1}=}sc8h~`o~|;>ZT`&8o9AOSwuAm#vV(u~I{|J9_KWra`d5Ir zBt7_!DQCUyc5FMfUmWbvF1H=Wu4tRHEs@r1YuL6N{bH-yMY0WBHM-m`vNhz3EoSE9 z|LC6(L0IQSkvZS`CO%Uhz_@=hj4PJC$gdX1?^V{hk8y%$1>Xg`Bsd=OM0+0n5}qU0 zd5!z9vFZbZS7eYJ9X~g{;`CIFG>Lo6slil~?ImK=<$9bX_?V%OdE5nrUY~aG! z#>_o{@&Tqi_!`DDEhx`1d|9&2Bo#qp#6LjSj#0SDF}EbALzwBY`4**WgpzyFij zo;7ZU{azJ{lv{C*XFC_(UX)_Rfj67=*1e494*HkX-n`9R9{V3#{)_fsx=v{y?b&l5 ze`cw8(60@Q6V}zE|61oG&iChy{Eufvz-QS5@-!xLIm`n)kV8MSzfZO!XXIe*SFeTs z#ra~}p)|p0@319yMcb(DBB^S7k8R8Li(S!f!>%kPw%FFN%k9o0C3Zz-KE+Q|fybcz z7`Tr$*K!dJoLyy|{yT6r-D5~nC<5<71Ie)mXnCIii z*e{+(Z`N_SJ#52x%;E%TMEt{XUphFC%iU_0bsph)QS&IqV^+L`=VtptZXRD)--^4w z+sx&5ID4Nt9xGn0zMqTTYTNS1ar`*W-fPfbR^j+DK0NmXx6RKsVf)24L0QK(VY}Qm zSX*Li*hO~pk!{%J(Hj4ozu2mFuaSM2?!x5tcO3FGki&y5+`QM|er~PT?&t7)vFsD~ zPd*>-n`kFiJod;+exGb;7IMxu?IitWE>7`346)V)*L_5Nq#qd=FCA!&Tt|lM4&#cj zbIXt1A#VB|#^*+{+3x3?_@9OO-+*x@D>UWaMvQ;9M(SI#} zL_4{?mAmBy%<^mW_fsu+exZLEC{JwQ5$C%lpS2Y4={u1dp5x`uMc((Vbvd~;k{im6 zy{&jp$N0^_bHtd*#VrvcP_upOTnKH!ee!ke7tgy9E4aCD;5o=K9)N!Cl8<%n<2kz< z?x*5XFCN=+9qiDo1;4hz+8TBn_KU1z=j@91i`|A@C-TJ}gDti->=(N+S;y93=JTV& zUqfVStqY#B-)_h6JQc|8CUZTmYeKt1Zq|C;)Qszk_gQOSt!+ZP-GJ@j`F?r}H{%?P zHw=uoUw@95$_tVeC!k$@-Gb*D-m|RxLepj(r-ALGf5EuqwYbXKZ}A*FznGixw&mA& z-??@Z7hlJ&e-!Q9YQMY|?SGHizV)5q@cKymG|+yUm2=bLGjl6`f3wah{7$lZiP8@4 z8)p{qpg8S@Tjw~QYg5+X_Y;git@GvTR=khnycn)m@{})qIJ(hFa$! z-cxM*x4r|IHo``{jB&NK|KPg#U*jX>24Tf-x#oO6*~o1>v(3+T3~7h9!A8GCYuL6d zD@W!;v$iC%50SH*kE|0dwg+s#$j0o!APO0d{SEmWgU$J7JsQa@=65aYzJU7@@c&MHHtT177eaerk8=4rGY|JY>psG&@ZQvf_K*AF!6zd9 z#o8y^*WkS#$7$J5=_fzu1+f3EM5&FSe>((N?uzWRYENYsePGk6mPI z$mZ>e?4rlw$Z)>f>MPD`^e?>6Zhh6%v&Lzq^EP1|Zuu#G2MBIf&WmhQzhN1E_ii-p!9aiMxfbI_ z+|RA{2U+JPo@*f-=bvB2eburb^dqeizk68w?Rc}_H2A(MKX6!fh39P3E0ObJc<$V_ zJ#xP?u>F))rG97JxB|W!9C!i0x3j@!y|axNcUk_9@pk=6+)vO?4U8vezKZK^jWfq= zXX#chZnLkCRN@MYla(^`ub;fw`S%yw{32g$^Rx}twtlUW&s*hI%CaO+<9G`ff7NU6{Sxj=_@SMVRWXWdxD_hYo@cwIH996dfzq9=w`^E17pNAs% zRfh3iaV`Iz4BM@^1<%0&%{)gwc%Q=iui@Px`)AgCt#d6igyXT!S+v{JY8=}cA4dKi z1pDo?IzViG!GM==)TXwAG^7HmF+~2J6?tD7(?_6x4+26S>xSu>? za>Y5g-CYfCvx)B0vO)01+SpM1Tko z0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko z0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko z0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko z0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko zfq#DjbN+aC)$1b*PTs7u@4{i76>Q^L`rXab7xr>(@M)}^>ujzzo3Sh-(>hl3WA~@6y`^yPu`PbRC`o5KpXS+@-_(5%^f%D2pG#b@ zg>M<|F6{Y;Pn)aPHh%g8KK>WFaUHv?HJH51uN$|*1haPVqLc$K^h~WT`pl>C!1s$> z)xOEobkF4K>Z*zmbi15YPW?}h=QPticTSsn=hVup!@sKX{X)|`i`jga?pdrSL7l~H zS>h$0<*r)oE4G{+OVT`J_2sU|m|VUD))&@mTfMm?X)Jxq%?a$BkL9o756Cs$S)=CZ@Fd zwOXBn;jk6e0{tm4iorhZY$MkO3uFAU>~uimKCONwpERlaM+@QA@cOiwuLiE3os#3z zmTu+iYR_K9!94=5xm{*6@Pga?`cGcuuy8XTf~8vwt1(Wjnp>_Stl<)kJzy>#EZt%@ zR{uppilb`Kh^YSlEx=$p82}&sZC&WOz5rPe#T%}yR2vVV{C{9lK=t* z;48d^A9$g=zGfA0Q_Q(S8`&~ZQY;As1t)??R&d=^7PuJC^-FS8t7(Me{&iX6{*D1I>kYWT<7EMeK1rt94pX#c!hRxhH z?iN>|=j`tve4N#A;eLM@av0j}>rZ|s#6NqC4M>}`Fg#j{C>A+;7uNvG1RZu zHu27FH^b=R(@t;Uw~En^?SM(cCiprleU6vPr8*JNz$d?M;Xl+z2=Dk5R+6QHKWr9c zwIQKWpO)8E5PYKQXh6Hxr=4HSuh6;N)lS0xGtQ@7yGi(|yBLh=1HrJo7Pf6LE9h+W zvSoh#@OmH1gWb+3^Rwm`z}dCgZ06Rqr95xan8dT-ocTU|^%9Wl#%+F<$j>a`!JEhE zZ9yG&oGEMg@fV*>VPFcae#V3P)vbIgnAn@o)g^)}2Y-;fkbeWjrZ1cm)Nl7Eb{(*G zHD8j`GxTOqH-^FBlN0Xx#IGkZRyyKkE|z-k(*ZEn@B3flBUZCX-H!$JT%YTcjn*<& zxvut!4fPU_+(pG23HQ7~*SmyqdB^HvZ(YukJmYjxdQ8vI^mFWwDZ(+INoI58sWfcllky5akv$1=BeZR zH|@?~_ilsQUZ3msRt_!=`^E>)!*(AEq@HSl_1^O(9MfPii48oVt6%P)a}xvFtflDN77*p2?g^5^(D5hgtLHXNeht%dC5RycB5fEjyRx!haQ-kskSHr{fdZp`G_ zDdMO$?TlrFw#aP&0u2}nY*-^!^uCWb-PmpU55O6C8LW0v!d-#hY_w0)Q+IFV(hJSg zQ8=mVfvJ_Sou)Cd%l-;nZa%o_}%N+u1 z@KN^e3%k1tM_|Kindz(_fFHn`vy!PBP2l$iG_iQb0@!q~lzY@WLA}7I?OV^mms1#+ z&bELC?s?7R& zaFC_?_0-y6BADFVZu?tdzm%^25g>>CQV)&eztM}up;!Gej0H}%H|zMsZrk4a3JfhT zS!a3d#$?zFU{mIBq#c~c(+t=~vfM{OpZTA_4d~A=$K{=4pdkr7rg18O(1*<#fNq&MSVL&6#L|E5h zmrd&;+%y4>M)b|)u%9l`!X1}*=0j)$UOQG7E)2)zl7xj_F7dD$(>Ra84qF5(S>MEG z>V`Bt*b8AHm%YfZ2ERCneKK5h3BUas8( zmJk9P%xUC-g)S~a9&l~B@TXo9Yuc4H?Y{S~Ft=O$`iWXD4#Ja}4$6z?!zi0th0eb}Q z#%MU(2iE=mPq6DIB(Ol2t3oHWP_VmaJ5M>_G9U&>ISs~kvrk|5G8boTFTG(JoD6ea z^EUGhpMMJxKroGYXOHshrR&7592)Cqp8NPS`qH<3E>DU$HrLhjYviF2ywV7E`}w6k z8u=g3j_y9!lL1I3^751eJD>~igZ+HkC))!(1F7Picr9c)q??&8d6wA`&g9F&=E(_h zsXPuRxR&d}2Vv_4FAW>d=)YuQ9hd91S`wVEa8%BL*h3sx!Y2oDjTwhRfX#G;tpRN; zP$=2J=jtP5Nb)A20^c~WKA;I><=w#9U~R-c_x0}}Mk->^LM}ViDq{I^KfAqwe^mx2 z&(6?7a1mpKPpg>cW5EO_&M40uaElSrE#8Nn8|HS?#$fVZ=z^#8hu>)!CGUq56c!}W zGeLwm1zlc)3^>k`dHjylBqp|1I520sI=1l=c{3>UydBV>yDQ+t1Q+Li)^`VhPGrI< zJA!&Ctn}Iz8C3OP`t@S5%V3(rnq^Q>IehYffW}7pv~L@N$?y90Ku_bL9o&~*qBld$ zF@F8d4W`w#&rTinjPBD;t@5!yq_A3M-fpS`u3-;Ca9pxO9I5%2F9A?NPf@}ZH@x5V z5eTEiN!8o0=V|8pEoPJ5)#kGOl1VQVD<;;oOIU8S=0&3{!Cp!@F(H0sV_i!Uk}#Sb z#};X9j(Irl6-!wHPWjL#u6c$@TND927+h8WRyHwNY;&;5<-(2!B>mJYtR{3b@0Mre}XXew=zWD1q+!lei^ua zFINvXuG{<2zzDydv4CGUQCyIQ4>|#T4E1RpcYbj>xP=zt|HvGz*XV&RtOz#Bb2V2CxX`LVdn zPdEYl>}{~5=j-@pDZ|0FKisjEdO%hLV&2QP@5 z6T}N20fdnvl)YyKE^{kTfNj&0^!Q5t)|X|4+T7$`uwKW-dIgi1ClPMcY+nl(k>bz| zhXXFKnKa|o?c8@M02GT2izklW9;}xOb_8(~Jyj1OskpF|>Tsijt^X&laIf5^Lji5v zBXBtM3h}={V4BE!`?PmA!bgE&sim&EpbkajR`KV-I=|f?xRjUAhsC(2#b-;2f%`8iRp!No)kA*J-;}dRW2Ke@O{|w8A zDUTcSo(S3WBp2kXqFN6$Y? zYW9g+-y=2CW7JQCDT;rcPpX``JMQ^2+4F@&v5K8f?EKD2%` z|2Fv>&-g>K=jIsuk6!-(@!mWijeU@BcM&=1;~we?#^> zE5`n#=U*)~tK!zLkeU-?)Q?{OQaN*P-1FzM=YcWyA3gspso5uPeUH>kk5NB*{;6{2 z?zrdAWX~58;`V>3c_wcC4yk!0M*Zmdwr&$E{x{HJ8Px zA3gsXIrAsup1&b`o)u&N(etmCnpJV@S4ho?G3rOpzf{iL8~6OV?0H~}{YTF~OKSFs zTi+u!(__?+o`0&GxjXLpGuiV6J#PP(nrGtH?~s~DV$_d5|Jrrw|ME93|Cc>C$Jl@L z`Zq|;`ndJ$q~@|1^`qCnM$Y`nxaV)ko@d3_fAsvTrDj#!`V~@hVvPFH^DmV%_r^Vc zE_)srWB<|f&yt#b;@0;_&GZ=cqvxM0XYP)B{!I3KL5th}rRJHq^*f~Ikr?%(=ijbL z|ChgU`M>PBImZ5@*S|q(*2k@1CpDMFs2@H58aeYPsLt4 zi81O&&%ad8+#C1&x$JphjQvN?KTB%%iCfl2&!5SjFR_2+_+u+Z6jO~`cokNJd|7O|qrWpH=UjKTjxjJtB zWm0oVjQY{@e^SnTPTcdKC3~I`WB<|XUnMmw<6i&qQolUL{-ft#B4=J4_xuZG&$(*< z=AS>p8QSg#gjJ;rBZH@sJp?^f!rxKEvo*}vn(znVcfyU~OtvfhWq5D+Soj90cQU*m z$}VFa;XUF14xb4pvfbfxAbAu1y&E3SUI~95elJ|fymH*@LCVA79PiTgujN|k?>mhJr!;ZuY~ky_$&B59Zp~`h0lgx zfcHDuC*fvD{}H|bzvb*_(9VYNi{S>yy%jzYzK(5%x5(#$2_{?;xgD@%Q4PLPj5mjA z{td^an{p_!%2Tm9E8k1WPge9jN_nP|&rIal%}O8&Q~FaHJu<1bap$1C{? zB|k&ye~MCGt>kAY`3Dt!Ln)u5#G&RPr54o+X$DC(bbj`^PDz{G5{iO38Os z^x?k;k&k7c@E__$3K|DI`)rg2Bx`~7GqkDX(gu9F?|@* zMVPL@v4NJ zOe-;+j_JdgF2ZyLrcIdc#B@KVConyasVfQl$20@eTuh5GEyuJnN$xM`HzN5KR6eMzd}!&AsZ|00J^k3=p=IUbpPA2Q8PJ~iXXbO1@?17* zRB`doAXx0w$y4g&D{T}g<%LSVzmo5l6*-1}{UXQMFFSe+{c@B#xk?*(N_oDLFHrJ@ zihlpd^6V@nuZ%G}J5oPe8DDm;qOXiEJ73XP#+O~F=quyPQ5fbZn@C$~}GR8$V%V0)@u&3_$ZJyq}j*Ze==5o51ZCh#RZLevj||CdsJV^(h|xeo6Y z;v4714_-;4+O@t3_OkCI){2_iH(?{V40dvH+w|^C;ESW@YMe`BFR@m|T1PAYYiz>G zS1I{`l25ihJNCX~tC@V!*XMM3=XA^8H2Ir>x6uEsEf>Y z5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y z5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y z5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y2oM1xKm>>Y z5g-CYfCvzQe+L378qOl{?=Tyh0}&tsM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y z2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y z2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y z2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3;AOb{y Y2oM1xKm>>Y5g-CYfCvzQ|8oNW19*37QUCw| literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test9.sas7bdat b/pandas/tests/io/sas/data/test9.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..d76a1f28033f47520f110d73b74f63b9b51fedcd GIT binary patch literal 196608 zcmeI2eRS2uwfBEPq?CJBo9kkExs_@zMK4y033;y|!pagN z_qpr)TZ=Ql*)w}yKC@@fo?r01=+k(zv}kSje|&rM{I1@2-^`fX9@H}%Y{xnBcJOd5BkUhKJw<{KJEL+gFf0*A9=t>-rVZbzK=ZUBM)L`H)ZhKJuWCJm4d5KIqfFk38rj5BSKN-}7nTM;`Q%2Ylqs2YlN1kq3R` z0UvquexLSzKz(?M^&!>GKdC*55@R2v~^=aQn9`un1eB{mV`n2yO5BkUhKJw;0 zKJEL+gFf9=pzsK$eZ8sY2Qa4^pOXAu{a5-M%064`fc zyeZJ>6w80A&1l?f890emz69ZtmR{t ziI;}2JNM0=qgpNswfxEkoqijMc=Uw1{X3h)Ex$}1n_qjH+u`wSs}g6Iy>WBy6Ypqw z_;#&7d7*eyN(UF6;rhL_JhDanYev_qo_Drsx#_3=CGoq7PoJ4Ki0hknzHShg%n0vz zaX9k{|4g^D`!&&h^^G4s+}&`yNz3~#6m^%+Jat>$2fF>x4O%{6g($h_?g!H7xx1vK z_@0=T36uUkEp7Y*lg3Y)@ay7x5_wS?)P`!+=Bz3esmFF^i>xHJS1XRM7&~%X(bLa~ zs|e^Wp6>`f-x+n{23p=J2be$eNqK`xgzFC9`s8yvS$>DEFW0lvHM1@~uyZ}PldI)9bz=7@G9X^}&@Hxp z(Xp0Z2hV@R_4_D!sHV}Dd+O<-^R08lax%BUY=_2fZ+M06=XKqVv8%C*#Dj8ojrrdD z-2O=2PVf2R>Typse%^=eKQx@%AMbF>(DIb9_@WOf82cZn<>hO{Qo{4Ver})b?V3g* zJN?O;inq1g*x{9H#Fbr2|2TAx*{4C>jvOzIX53Tn>H22;J=O~?H+-1eR~5W3PIAAu z>2|hm6rai6o;sG-H_ZF`w>4st+DObN#2o6^i7hYu;M(_ln|(A|x6@~>utptgeq_~3 zV(m2kC0;dFVq3Ig`(mxzN^E6SWXtvT>iBClY#pU)#h!ltrTPKsX%6waXnzxbCyrOg(<9oTK(%Oz~jg8lRL>v=rv2fTc(?>!(MRR@f*H{REwJH>&WU&@|0 z@qF2DESM*ho8Qj<)3npNMU0T%cJ+VPGzq$Muzmg8Tnsx4A&Sk?G)TVyrVixqqNx$pSjWCUT(i-CH+RkdQOI)K@KdK_0QS|YBX zALwJH2}&KGNk$^_$ghn3ih)~IcfQnkCs+OmDI6>S@~ zvXWS_)v)EZvsj6(=;>$JoMO3S*nUFn$LcEtMFXEMGv_$(myy-%$C{bjRhnC`SNR<0 zbusIk>a3%QTlT)J^-MjF*HPy6{leG-$8VF?^L}A@nC&4nU;I#Q4CCK8e%rm4YsaQhsW zn)&W&6cj}jA3wtN`|0`1`-(mvPjJ6{9_^?U$~|o1c+7B;X-xbh*e~s$CzQ9EX3ir% zFUn_eJZ9o0J~vyQ5&HPz`X=uBc7ssvaOQ439uu#YJS@mtZF!-L$Is*JwSxD{G9Ew2 zhiB&qYkt;*?Ta-*RmYmJEw=`1C04^0+2&*0u;uX@|J8o6s_5fReBWd{G4a^rOU3V1L(`aZe{Cl{R|z`B|2WjF3$Odg zs@Q!L;&^FaW9&M5yzX*bF?MeDQ6MbzzT@~jG)%X9u1>r~=6@~6nO>fj2WvR~*~;tA z^=11*)CGzCZHW8L-N5_T>_@hfI~qkmUBJw~=KbE*z~>k5uMo>Gt`;%pyOB>@$oKSZ z%#F|S!WU!j`(|B^ZI0!}^3bkEzNd5i7UFXxG*!?oQAVJ;eRD2^H?p66gZt(4ZscO2 z{RW?dg5v?%&#%;D&V4>-x6At}UFzwvmg{4Ox|aO425U8J8}>!ju{m4OzSuTwo!A#U z1}nB2_Qf`)>R1g=KR-HnUPh*7UHF{+W-EW^DPnHd>-D(0mhFnUnf1E9p4XS}v&OGh z)Uw^K<97IbKebWlIEUkn5XakZyeJCP1C`3#1-9j{gSCdf7dei1@=pl|F`tnEW>;kVtepr3H3QMoBf`-k9fs= zZ>nYcXMebVPV9a$ezIi+-|Kmt#(oNx@jaTyBRC$D%_nzrDAT zzXO>21>4)is@U&ZrhZEa`)3pXaD3SJPeOf`7|7!cF^6Zv-2QPr9@BsI2EHFLXR|K* z%lKS2aUjQa?}o)EDx5UuAjh}f+Squ>v_HIt*U{`>{vJ{?gXguK`SUo#A#qyXT4$az z-%B}eU$IH(^TovNJf7~0qx;3$skK8Tu?B0!_Qjg0eX*653ELL!i&eE1t*U)dMYh~( zs21tR7Fi9|yshYUcnn96=ewnf<1vosP28Tdg1;}B^Qe!uv*LwLoSd!YUV+g2(b!L4 zEuXTK@AYPV*^+UvbJhpR2|!pRkh`| z$V%*ssvKW#JG0%}I!eRpd-~DeL-<@a^UmGk#8K?$O}RQ@oB|SKa>7M&1{E{|#{*RXLx(vrXrI`8&Y5 zaI9bTINlppA^u5)?Iv#FbMU%)k*OZOPx1XX6l_xcduG1oT%#$42AB z*uR5tzkQd-w(s$Iee1f|`=E)h(tZ;gPnz$Rt;>b_y!|2jH#6RC&&B?o%NwBkJG+tn z$>W-vsb5hOyIw-?_t5tnc|3eBn|oGz6Q7&x56w9%+p|^|Z->f6ZMijAYnfJJUu+w( zCLF(CY#X+DTSvXv7Hzqe#P&h8RUO-+hX({e00ck)1V8`;KmY_l00ck)1V8`;KmY_l z00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck) z1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`; zKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l z00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1pXTmnDrl@E_>tV++%~zP5(9$Iaiju zMci9I`I#P_*9~efEfiWxMeU{O;lTX^oL}D(8#q4&VCZ zbN5e|b#9Y_b74UhZhdl@6x=EW!z#o-9oYHh=p)CZV2~Wzb@ietvvjv?AzUavn-K~& znem3?$jkbKxsM~V{xB&xwN+eL@z-6Wepw_1L#04Y>#4@i`;2?)BW|(pa@|6EX?IzC z>pJn~&ABvPS$w?|r2R%*cJ2GUyOfe(uoSc|7au;{ef5nWZZ!P1Jtx$lpW7*`43JgM zZWJ4D3drd{ZmLw&bRZ_i5{~aTa!;=lj@zYLdXpH~ZQsi!13!@xApt!u2U~K_-47gj z?Cz41;(I=tp)%7aO#1iM=nV?~`* z{SK{Z)l*MWX_~F8Y@)ripB&|uDxS(lIoq>?&Xwh?5RWSR*(L>jWR2p5LL>e}3bIA} z69r;U@$tW{_$%f6$_i7KiU*~+jrrb(X6mb|#kYH=NTWI}tBjHhviUW$uF6E`)$zLS zljUSyk8EJXA4oxeDX3l~+P|SDYN*WuDYm71t~S+^|a~-CO-Y46pWRE?rV8*$zFQPLDjx$>?KDQ|7xqCne7_&9(5~Q znBJ|?l#~6*eixU}sAyV*Zji6oe<>*s`+FQ<*RV@{FDO)*C`axM?>TArGI?R-U zg`35i3fi%!cphppdVl^*8p)5B2xS*~5%upZwS3YP&ammIW&X2WPy3<_iz=UOJ)EWP55G%K2R`tz1kducA&zi1F->)rOzR;8_1GL=0Bt+Cwe z#4c(SKa+MeOH6$F%(Ow-du4?iWxiNUrH%M-t<2Nz-#!l8IGf>GRv9nd~PG z@?Xgjj(QTIk>Ny+|r*EsfeC8=K@|)$< zR%{YYzbYmBl6{nSUo7krYd=#Sn|w!%aaCOhKShlcRtW8Ic2YmL$kjNw)>vC;w0x_7*<_WGOx-Nx^2^hNmREY5>B|?G z@(?ZV>G|RjGLV~^WjVRyi7UjBC1jnn_ms(B(I_U#EwJN7bEb8DMtm)iPJU)KRUx;x zdbwCY+hhj?1)&>s57I_X4*FZ!$W`nCPf{6?xo;DDO>&Z1;@ndw*GElAIg{ubtA&lO z?&%KxsI|j8A6ciO)pemZ46f+!lV+*-WsE#HLm}x^OB*^Iw+iF-7sVBFx$7=}R}T3) zIS28yJ?JF^n$_b=`Mj7WtMr$Go~w8_Qn*1Rr;n_%>}mb?H44?{%-vFBgcQiGl!Lm9 zVuLV6G=VT%o`Odw!nr!})ogdO=P_@As$|DC)ustqY>IFiW|FsERJ{Xg;=XYpmnpI&tl` zMthw#Or2E?G(YJb%jzq5Hsl~F;u~4bj)1lpZS!8QiqB+7nL5@|OI>5Ak-wR~m-ldc z=}oe_oV$*%DORC{nY5l)Q!O5uNocqG892!`Gdo zJ=aqTz7rNvTca&Y{b=gsD$WjSe`2ao(e)*Cnvm~Ft>8A)E+@yhr%@;ecAO4jI&i{c z#BWs)dV;soj!K~okD_+jgX)IN_x;`~OeG(*f1Xgs?zF>$J`c%!`K%5BAenE=GaSAh zmW{~3T{g18ZXIbMBq@1VC51W82j7KWWbleT+E~jkbZJ z6je9IAUHz>`e2H&FD{K5%;{NXTeX)Clq0XI!l8<>91%5SB*}; zJZ#K8Op{65D%8ED8wE(T|JSVI5QL6bHS&2I#JY17pVJ1;Uv2MR4^M&5$k?45&1-BB&R8ci`km|B$=_UT3Q%zA~? zdd=od{yu{$yM8_QvP*WGFT>0S_)fP+=IN*l-O17GCTf%R^n!UpdO*2lX+QRTdV|+*dbnAg*kT?EQv%J_sqCB2WBHHFrU0k36HL z){G|h#q~4}3f#u8<1Ii#C(CMErn^ckCd`%nZ= z+avwK4yOU#LwAJv`b#k$Etpu#VIX~@QSr@GdFZOoEfjgGs83E@f1_D1mFU#TsS~^D zT+45xp5;kRY*Zw2o({u4^u?vd+FB1;-a1Dtr}$wo#oEf7-0+I{ow}ycbxNwAD;v9r z{n-1m4f=l6d%n1O9DO2Co14y|*6|%CNP9UYEWVH*X&OJL4+7+5%h%XDS9>X~<28-! zqtxdll3cllZH$bTf+X3y{t&@CT;<>Zs&uS z`J;pSEAoQq5W^#Qt`I@GPmS;>Zk{q#t~~8^c8G(gP=>t@O2b^kd3~!m7~u+ z1d`}WThWmbU7O|JEo-ad+YX&1baMXqRS{GtTv!UEOP|prEdJ8iTuMtUw`$qm^?WC* zlma??$1D>s$tdZYJ!D&CkkhYh(EdO!r);3?&L(k-a$ICt)Wx={4l%F_r6(`sy8)fw z)JtRwkDT^}vg!Jl*vjcdS4v+uuqBbBR>$J**En>h+0m};yO4uAb?8fzeSE+Q{q2U= zkP}+g)>o~eB=SPW6t#e8ta6;2bi9>zQ?Ag>Yd6-x;y(KJZxQu)&J&RNv>4gsE}dn* zMEyn!64x(Mn&T4G|E}zU{zadHE-5k-RKHPa;{6NN^MC~VkDq_K((IeGewxzknV|l~ za*FgX`lPCv2a=w@r+U6{F|A_sNu1wVrFlAO{Z^%UC_(-B^>4YD*1uW(CNuu9>bX9_ z{^QrbT4`1#tzW4$7bU14KmT$y^SMdSKcsq|mSF$!^Dj}F#YyWIDa~;S>c`K&K+Qau z^!$bDc|d~w$Im}qY4%N8KTT=&Oi(|5{;6u_fu!f}sh%%%PI~_<&C^Niw<^s;3F^ns zzooO<|LQlX{jYkiPq6>^^{-Z%RY~htD$PX+>c{*4ay9e0NzXr|dY+bG|MBxLQJTd` z>lZ1_aS7_juYZA>c`)hu3)S<01pAMlf4b7_o3wtK((IX_e*FAX)yxA)&)-u$U$`jg z{jW4nC#~PAG!G@HA3y(=i`4#Cze(+X)pLD<{l~9=wbHCgTE9|hE=o{8e*WcZ=5v#t ze@OK_Ey4cd=U<{Ui<8zbQkvru)Q_KkftqG=!Q^MC~VkDq_K((IeGewxzknV^3B z{8QD;14+-{Q$1hkl=S{rnx~W2Z&jLy64Z}B|5`e!{jYwL+W)HO`ULxrU;k>QS(UVY zrP5rKpnm-Nm#dl2O?v(z)$_Cj`;VW0iP9`iTE9qXj!RHKe*Oh&=E0=rFI3M166`;I z{^?4yZ_@f{O0#Ez`tkEmRWlDHJ%3O2d?6+2{jW4nC#~PAG!G@HA3y(=6t(}=Z&LeT z^<1A||MBZztu(8W)~{5WixSk2pMSZU`P`)EA5uL}OR)d=`Ijio;-vM9l;*eu_2cJX zpk^LSdj3N7JRrgTCa52O{-vs!2a=w@r+U8NCAR;&$t{)^-DdLb ztkP`ry2MBm|9*H#>F-al|M>N9_GtfaSHICAB;J3$>bWk#{^QrbN@*@nT7Qw!T#%rC z{QT#tna@so{?k;?Qxfbye*KG;=7glzf2`6kOtAm>`HxUDAC~m|^Hk4SZvXnvACcZE z+aD#X>Pbc>p9$U!+0z717jKSu-ipWvk#{3Ckv`t*kuM{=A|FPsmGzEA_QdfwJZT_oLG6WJI!BlBA#cS+g7 z$l^$@_s?EKWVkoL`zS)Y>|GIuDlTK=|08Y4?(dN}fx{5=)9$XgltH1e{1 zAMc%v)XVgX$OZXZ=>1%_vo5kEQY~|DM~+6W_BP13*yjRs8xiXcyslJDexn$xhpGRD z)47wD%OX>r%FUU44<~=Oqo3xK_i^&+PX1m;KhG%_PJV=w{~hbAzro1y7dYi(oqUm# zpW^gC(J3!+@>86AsiPlq%4a+IawlKr=r3@}E1mpuC;yD2U+t9FIr*@YU*za-cgkCw z{C+3D#L;hc%G;d$Stq}m_09Ty<&+yY(v{@WUc2(XH~FwQO+4d67$={QbHI1O=H&S@p5)trX;9BSdTmD6@kJEthVUUyD=bDG8JFis0O zoxtg2P9Nj+Sxy&oTFdD+PWNznl+$)jJ9py#Iql797N^5FE#!0pr;|B-jMHa1UCe1M zr`tH)!|72@+d1ug5%c#_1kTk8;}1Y3I(| zKc~Gp&Ej+zr-htO;B+#lk8%1er;9nQ<#ZdTdpJGHX*;K#FXsL^?agTxr^7fczbt2bSq{G}XM9->zbt2bSq{G}XM9->zbt2b*$%&KXMEWX zzielG*$%&KXMEWXzielG*$%&KXM8yhzZ_?LIS#)ZXM8yhzZ_?LIS#)ZXM8yhzZ_?L zxemWvXMDL1zg%a0xemWvXMDL1zg%a0xemWvXMA}MzdUDrc@DokXMA}MzdUDrc@Dok zXMA}MzdUDr`3}E)XMFh%zkFwW`3}E)XMFh%zkFwW`3}E)XM7py>GF&)K19AoAO1*S z&Z5p-qGP#ewf|Rd0dClQn=nzOISSppIV+9ajL*&Uks2{_6%)%ryLtKlh}C1{W}a@I z|I2OMHx-w|*W8{fck|~Htmo?e|C;|oJ~Z|T&IG=c4~a4u`2Tjwf12J)Nv`I51%2b} z`0$k^u3hs@u!p^mm=)F9*K;Gh3^wW5Hs1XS3`zN-+`lyO60<619Zmi>+=R&&JNXBl zd{?`(6Yo2*nq515eeS95oB{RQUH$gvTj+n)R%bcB6z6}(chqt1T9%oqt7?ONc-ubcSE?wu>tSUaQL zRXigNnwi^MiWHT9RD~Z&DoH_Ae&k1flB!hkGbWWvr7GkT@=XXyNJ7AT8!-42Z{BWw zjCv%^sYx_f{_4yL8Ki6Y*)cE^+%WEWs)7sZ>tix)hyaR!wD1)77?Ww%6;83(60<)vN6) z{p!VwpZK-kb3a#&Qg$EZ|nFIFlm;r>gI z|2W+bpPzW4Qu$z*KLz>U5A%QWe5JAx=6@CP570bpzmUkEP2`_`ZhFoiCGsCh4h-87^V+|={v*pT`c{~g6{L#1>NTh3%bvf3%bu!3%bv<3%XC`w-zJUCl_>| z(+j%KJqx;zvY`7s`O(G5!BY#m&$A1 zu?5^`dChDtUo&dUrmmT7CAC%Pw*CZbUqbG(Mmh%Rkfv&+PGvi;^qk#Y+qd1ZGPZX5 zwlekrV#mNU?ld z+4UxFPjOu3t>@1u<9%za>|57uWnhiF`;NP(xYocPrLIzf9Yy#Y*gh4XO3Vl3=Vp8? zz%f1?!b^kXM5HL63uo?EdY*FVO_T$RikGMO_Sk|$rQC|}fd}XLoV+=o@xHC}>|MvT zdm$cW=!1s&W5*szq-7#}w1j@e_UA(8&whpKQP*-^Z>;Rt%=Fm3WyQ2@MfdhV-F%=2 zQqvzTXMVplJAWuXN&B3`r_QU;$nM+Sv0^E**=-R%4=3cu=(MY=y4oSFPNQC5o&CMH z8iwtk^f4(YWN&qtM5X%P;asUhy8-pO_D`&-3}T z!Ttm#J1v@etlT&hcWnE+R#$cr65;bV9G`Zp zYM9Mx3#>GcoPHv0-*QJoaQ%RlmU$E5^S2zI8i3InDhN^9J*JQ@fN=&k2vI5BUWI=$ z5k7y%@zIRhhMIwf-I_RVcfXt-!NO?Ri6VUdp5voyMlK$O^uaSGx8wvI3-Gb}6Px)D z|AFJv&~!atzXs4&f2HZyKXQDUS`Az~IU5-|UF#JhKNJCzpMRR+b5+&z00EyHcK;Xv zDxhC6pU?7q2-JY``FM^yeueS*XO7QSRoCk9-==CpN#DBGX=`mgYHyG&YPCj+4n9;x zb;tgpq`n{#`T1Oe5BLwOEpQ(~^`}F-T%kL)-7(dus;ENSm9!pGnwAglRj36ieM*XU zXN>Ifd!46NRfNyKaD29^EseS8>qIpf9`$P6Hy!bz-8|PV-(h{s)H4r&pevB|j#*=? z{IEp$e4gXeCZyA-8rMK-bhXx2>r?2867gwx(76o#$+TlxamMyw6m%@_*tQFBXr~YD z7c1{3!slNTe2mVj3R<;BT8(O?REf7Fa9zbb903ufyYG1;uths|pkdb7@k-j8oOpvU zE5hgBcs`m@1*;qo>eN-Umbg0UdMa?E<62;@CD}yy{5!{| zMjDN34jyQqdV>K}5W&^}pdEQzsUm#7kmhqW2M{^%!P?A959>h<$sI%a`45gyJqHbY z@rcjJ*>jy;Xluyo1`?5<|K#~p+iET%(zBpzC$Kq^NJ{o&itzcb6d&p!%Vfm(PzRap zj70c+afVM`;RU}$q+dPjQ1Nz^f#*W(hK!pCpa16h)EkX!d5Dtwb+*?V3=R+PlM#_KHm=kcC4_cF^N69>kO>EGNkQ$35bcv&zCd!?Oe7?f*$srS5 zn~b^+?csr{4{ftc)#OUh5$iwwD#vF{C)KEdI@|nGhzm#{0DJ_&-+lYs&Ge&Gvo! zh82}r^nh%e2%m58d^Q@5Y~u_J$i6p$o@H8A4ICH3$YgcZi}3j-&xdU0)qX-gvdK!; z^NR5K7RN_h&6OVD1GPVU58U!{dBf6#MEE?#^HJ6OBA)EouIgJHu^e{*we7-Zwr(Vr%pN8dojEOblX}8Xy4b3un zHW5DG<@hu-V||n4$l zh<`(B^%ANdes6}4Y34`|lwVP;ht_~bHuU!NN<;l&J-cib4J6ybqGC#_FmxX2wWM^p&CR*Lv*VQH6sx|FU;^U*7J&Yg*jANC6t-~&oBVtiiW`KT>A#2{OOV*h*?yaBalm^*Yp3gi72N`-Jhk2kScXSlsLs>FDl}}y{QZ|1}^O0=PVd%-krCo!d zVUjDF2%ja6&y{w*`!Mc*fB(etY4obJ6-5y~x6Ja%2Nc<#Sw2wxX?X*13^D(`RsINj z5k9waeCAc36*9Nh99zHgXGHj%;Q0`U?#(knL@p`G|D8nm+{W=S&0IQAKQz6r1GPZY z?oJ>Q@le_*O%Xn~^L#G8du1)Jj3?S^LxB?@fK~2(3=uweWbxU|RI!*(T+T}hN`%kJ zG@raiM$AX@>W`3r_09}FISMxABY8bqM1EeC=95>*$9$6Ie10iW-XkJ>UY+LiXig(O z=JNrMHZtUG#uwm2$4Rnw*C~!qZtOD^1~QF(mb2d?lIQ|_=&6j)Yj{2+M|SA>jL$i5 z_Z$s8lz9{3^IDG2_8dG48I7Um%kq1%2mZk6jrW(OQ#27iujBcs>La-w@DLx)dfep; zSe+4e5k9ARKBN^%keQCp%pM=d{LcrTFHdKx2%oz+KG)RT=y|X|Vf+T`9fS^@aS#M8 z-$Ie;--tbD(l=W`9P7gxWtLBEBd-cgVk}_=ai?#& zj~`F_6YJNH&GJ!KYjv%bWv5ASh|Wh+HEWtSPlQW-OhkTue3noC5U{EIC~}Vg6XEj{ zvwRxnI?VBxHA*Eaze1Y>CD>$01Au7cJPn`kDKOSDh)9Ich44DCj7aIm8jA2yX8GiY z*iG#-Xjl+X=i4v}bwneiN(W(%hKum|$yq*EHc8GLC6WBF`cEiCw%hH)L{e-Sr}s}h zIR^HK@VR%E&t@}6z2f?z1uKFiiZV{m=KN~qIaGkp#bu?&>W4oy%Lk^gy9T3pG72yP zlhz$Vv*KLecK2Xvk_4krn#zMnJFx>Fv-bDDE z<@nHn$+;2~@_|6i{PD|~JbcTcRRE|pu{lV=v=2R4|L|qQRDchi1C-h4`#3(~_|t3w zn$`~ik>d_0El6SJn=<#OipbCXJfCoUN+zJG zekF&cKn$W62Q0`@PZZ&Ej_0!p3t;3aSZJRQ_H8h;K3Eu7u84Lq2W5IR+%oV^!_b}y zT`i;cvKiqyH!i>j7Rp;@_Gg*n1IhtQ$2FRnh(bP0p;$RZM-lEib{`6Tz$AfUh0F@@ zp=UBa@0jINhZ$}%)r*ZU02ImSLnbV9eyMUE65+GL^C8(bpR52M8^*XX{kp*MX_NX*`CK};a|niq znxQBd5t?L!VR_|c_Q?wHf!=e6@wv$J+00k3$o^0;zA|y;^>Gy8156p82RJ@k)vFL4 zo^75ZJ}LV=aeDx4t6|RmPG$XEfR7Cw@QlwpIX>4^BaaTV&jar|G{kL~e7@I%x=kEV zmOou>8QUKP_|Tu0@p%`=2SVp_ga^(K>py&0f{Z1+2%mS)@X6J#d3@L${qpoS7vN*V z%I!>kE^&Myek0$77|KuVKaBnJfg2;?KaAQM$H0I1;4Gg`5TKM&3FY0?h(Oe2(NzXua>BzxhZ5kDhO^cLmZ3h<#TxikIxS&mN-+M9?H zpGIeb{R!reqD@n|Cy*+@2PWy8Fg`!e@d5k8LRT_+2Z2vKzF%2*4`2PML1HNl5; zYD6b3wr-pT73oHGJz9H^-Yi-lt>*P1dV0EW9IWlIC)JYGB7ACDeCQGen!y%pO#BKw zAGV4?2npQIPANU9iKkCwMfj{G_!u2^3ucc>T#3sP^*TgCal7RFAi`&z;{$6~ZV+zK z!2|))0LQ#1k(?x+oy!*BfN3y%<03 zoJ;{e(ERE%K3C@PS%Z;aQ?CwLgJECC08b(RGpxhO__XHnF-a?7jbMHo7*lx+wj%Na zugUnB^Z0CN^+W+rHXuyp7~m-+KM|kyTs}lk7;CcO^^bvdF4Vtcxo$B2YjYkSegRZ! zpCyRc}B=-1XfKGk-+HN9)Zr#c?{&JJBaMbe^7gwJ*kAGWah`6(R8 zk+}3fYuE?(J*1r7gw?LbeY+>w489_KuFm6g;U;`cI?T5^dWbu%{2xU4T${(|BKNLo z`JsGz@&OXz^XNQ259IKXO<=g!C{%vYZ?O8$FLHe9RfzuSRO^XSeR^$Dg~dTy5ECW) zCL%wNXYi?!=Cuq4P<{;$j{_hPKK)ESjfR$JScI&A&tL#S-cWeSO{oB%kutEDeho7C z)Ky(=CmI$JpSs<*A+kk!6)V7px+oYQH-k^DYSdIcrD3FIxm~-@_^3U3!&`*fUtR{E z4OMTZ=s^8&!|qEI1(opz;jRMmL(gRLGtA(#y`i?!>ZOov2;g^0A+urB!#iSRkh;6t|5 zR*UPpV5|taZu|Z*=)(}D5T`aFuUN^r$+g$XHo}c z+9AT{mJ}a@bkwVwnQp(U(Ef70v0@E}eKv2OOP95Rz(c?VYsGeLI)c0B!9IGHeZv`* zv^Pi#L5VS8? zp(VW-iOA2ZGx=o7P{c=iF%sc(D#gcCbt*qe%?k3wg#MlGSm`q}bVw^bK9eZf;x$G1 zye5+m$WK0>s51PD;Pcu9pP+rZTCHcx&o&qyW`Ll44+H06lvJ={Y?@|?_lU^P>oWOd z%Fnjyj{QU0oZuUwvZA3l$iOA1gH{p}$ z%OHvT%^#w~XNd55{Z05ZTUs=3Ex`7`|3%mF9)nwm zt~X-CiXnYkD`L}T2h-b?(sqmR`Er_1I5MsSGu?2tAXRgNfp9QGz!;pTPPdeAe{(Gp z;qzpg4~!e!1V4R4GfmD-udaG9^(CDGGY}9u(7QhssgXh_t{pf~C35#lO-_W*SJHeU z`JvJC8Wi)92Pl>wD+-`5HHCaR65;dJG#@&usRKP=WTuN*-E>(8JXn|!9qHs#Z?rP9 z_Xgl<9VtB+atDW?i_G&!r6woB=WA&`(0STYYayb9t4ytVK1?tM9Xv3>dVoRMv0>dE zfCRkCu2oZc65;dpG@qdSsu5#N*J=q1t+v7R?0S8$Ikb~76dYwAW+tJN+PN@Wi_@i; zl2)mVf+WJ{8&f_oL8eAW zN=hPpo|?yp1!~5CX8Ck!pr(v~MEHC=&4<>TJ9V?e(*gTqM)hVW%<077BYUjK2uXy` zchY>Kbt_YR);xb;K}|wFNrHRi1%~zDXyd1s#)}kO-e=(|k5Gkf0110sVr7 zs51`8wo8Q1b7?*XM2%&=vTi`oSk?jA_KEO$KFz1DRdtdM?^X1=Q=%Zt|HY3Y!smrF zAG2ye6tvpX*7#@8P0JlArtLes`~lfMiST(bgHIX_BcLAxH0(u0`1~Nv2PR=9X0uw0 z8>XRr=nc>HMzH)2_<*F|MED$~`P9|5>ZU$hJB0GRZtq%?zU;L_G5=5HiMW3-ZeMV- zMolF}Uw!4wA4T7qjZ0Zd^U>N>a6C`BL^)l9@Jy#0eUvv9(XU(5d{#kzV01wWQXoIa z0FnrwTc>;?|NQEv27uOD$=GlzEQB3+NKtQ$ZD?u0TrSMtDxVyQ@HsK%6WJd+eM`Fc z0_-!JzXdM-jw-CQ!`K9!7_v>MDZ=NrG@qc~f;#AFH^x*vp!{OD=H1R$OHR|CyrW#9kN ze=ogobR>Ey{7$8U3>L`X=;-L@Dxd!lmD2~{?=_XXDogNxu)TEj%n{8W9i6H?3$pwJ z__Ado>gJtBL$$i9BT429J9*j6PJ(RPJQ?mC79~y%eUG!*nA|o56cku7>Gj zVY(ltLm)%94~FT7!}K@8lpO?$M@PRCWax#z)6#ly*{jY zLAvG8)EwGfnr`}gcBN|tCj{x1LsNPJP3Z~tjUm*n*d;1a9xfB+*`>lfyWI5Y;D@EZ z>k0S^*h~-2%bf~OrgE_aC*BT!QA*#R=IJ$~6n3$Fw}ty?`ck-$UW%SaQ!uHq%)v*t zJ)g1fxr}|$`Tw8k_hE~)=NZ_3<(5k2&EXC2TDkYcKIG3N9w0iG>fV6ur*B%Rbl`jN z3T$ILO7`DJ-{ABnyZybk*f?+(*9h3SJ~%4`JWi)|;F>;&iD9qJzabc*HQlGrov bdMDtCmN?C%XOHQlvMu>Lg}bO&#@qh@2Wg#f literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/data/test_sas7bdat_1.csv b/pandas/tests/io/sas/data/test_sas7bdat_1.csv new file mode 100644 index 00000000..3eb23e42 --- /dev/null +++ b/pandas/tests/io/sas/data/test_sas7bdat_1.csv @@ -0,0 +1,11 @@ +Column1,Column2,Column3,Column4,Column5,Column6,Column7,Column8,Column9,Column10,Column11,Column12,Column13,Column14,Column15,Column16,Column17,Column18,Column19,Column20,Column21,Column22,Column23,Column24,Column25,Column26,Column27,Column28,Column29,Column30,Column31,Column32,Column33,Column34,Column35,Column36,Column37,Column38,Column39,Column40,Column41,Column42,Column43,Column44,Column45,Column46,Column47,Column48,Column49,Column50,Column51,Column52,Column53,Column54,Column55,Column56,Column57,Column58,Column59,Column60,Column61,Column62,Column63,Column64,Column65,Column66,Column67,Column68,Column69,Column70,Column71,Column72,Column73,Column74,Column75,Column76,Column77,Column78,Column79,Column80,Column81,Column82,Column83,Column84,Column85,Column86,Column87,Column88,Column89,Column90,Column91,Column92,Column93,Column94,Column95,Column96,Column97,Column98,Column99,Column100 +0.636,pear,84,2170,0.103,apple,20,,0.621,apple,,9697,0.047,dog,7,2543,0.728,crocodile,55,2615,0.146,crocodile,10,2832,0.644,crocodile,6,9671,,crocodile,28,9126,0.433,crocodile,22,8117,0.318,dog,61,3363,0.938,pear,58,3700,0.844,dog,26,3989,0.132,crocodile,88,8240,0.325,,9,6102,0.032,apple,34,2987,0.651,crocodile,81,8778,,apple,91,9908,0.897,dog,26,3682,0.274,crocodile,75,1629,0.918,apple,9,7935,0.761,crocodile,,2398,0.914,apple,75,9204,0.946,pear,87,5587,0.940,apple,50,1611,0.480,apple,45,3230 +0.283,dog,49,6275,0.398,pear,50,339,0.561,apple,22,8596,0.661,pear,38,4928,0.709,crocodile,73,6011,0.239,crocodile,93,,0.093,crocodile,23,6198,0.757,dog,61,,0.593,pear,12,9571,,dog,6,892,0.883,pear,81,3363,0.166,pear,,1814,0.454,dog,52,1161,0.630,dog,43,159,0.398,apple,17,8194,0.905,dog,29,4752,0.382,pear,44,9302,0.247,apple,45,4645,0.321,apple,56,,0.415,,,5912,0.580,,60,1918,0.821,dog,68,6847,0.542,apple,83,4465,0.169,pear,53,5820,0.942,dog,76,4904 +0.452,pear,35,8627,0.117,pear,70,5704,0.209,apple,7,5079,0.961,pear,73,,0.414,dog,,9681,,pear,80,4183,0.131,crocodile,53,972,,apple,86,772,0.460,,89,5950,0.293,apple,25,288,0.314,dog,38,106,0.108,pear,49,979,0.481,dog,25,7904,0.270,dog,4,4891,,dog,32,9820,0.517,,64,9053,0.487,dog,78,7238,0.488,apple,54,4349,0.356,pear,73,9991,0.113,,63,5731,0.294,dog,58,3060,,pear,2,,0.004,dog,45,9872,0.024,dog,69,,0.336,pear,9, +0.557,dog,29,5292,0.640,pear,34,2766,0.910,dog,26,2576,0.924,pear,85,3495,0.478,crocodile,43,1586,0.576,apple,79,4403,0.329,crocodile,,3572,0.702,dog,46,1913,0.147,,10,4292,0.368,crocodile,27,7037,0.137,pear,19,4545,0.999,apple,81,,0.095,pear,36,9195,0.494,pear,61,3393,,crocodile,27,8252,,dog,87,7968,0.845,apple,31,3801,0.514,dog,,6542,0.483,dog,58,4688,,pear,71,402,0.275,apple,51,3673,0.642,,82,4443,0.538,apple,10,114,0.155,,17,8170,0.493,pear,89,8566 +0.138,,55,1903,0.583,crocodile,34,4170,0.226,crocodile,11,3985,0.271,pear,,4624,,,43,2539,,dog,50,,0.469,crocodile,72,5999,0.476,apple,71,1184,0.333,crocodile,23,5075,0.120,pear,64,5242,0.561,apple,7,8206,0.103,,45,,0.972,,1,432,0.423,dog,4,1860,0.686,apple,43,9640,0.622,pear,15,91,0.795,apple,33,2149,0.248,dog,,,0.942,dog,85,987,0.492,pear,2,754,0.955,apple,48,1268,0.498,apple,83,6654,0.305,crocodile,81,423,0.013,,32,3217,0.046,,57,894 +0.948,dog,33,8962,0.691,pear,,2276,0.330,crocodile,27,1104,0.668,,73,2883,0.007,,18,3726,0.301,,85,3621,,apple,39,9646,0.305,pear,48,,0.103,dog,71,8216,0.813,dog,41,6387,0.114,apple,24,4428,0.122,dog,41,,0.558,pear,49,6455,0.119,pear,56,9390,0.482,apple,75,9168,0.437,dog,74,4101,0.557,dog,74,1631,0.159,dog,26,2218,0.274,crocodile,,7653,,dog,,9637,0.345,apple,82,440,0.604,apple,89,6848,0.177,pear,31,2597,0.847,dog,77,,0.700,crocodile,60,6088 +0.162,crocodile,17,8189,0.002,pear,30,5161,0.851,,,7230,0.886,dog,43,5277,0.086,dog,10,6891,0.510,dog,63,2275,0.641,dog,74,9202,0.452,pear,4,6580,0.008,,38,,0.491,apple,11,1469,0.672,dog,89,6182,0.477,apple,4,1877,0.241,dog,61,5083,,apple,78,2526,,,,7547,0.969,dog,22,1578,,dog,86,1211,0.221,apple,1,3252,0.146,,85,9278,0.676,dog,30,1218,0.012,apple,93,3159,0.681,crocodile,45,,0.825,crocodile,0,3749,0.831,,74,7021,0.534,pear,20,6122 +0.148,crocodile,37,1740,0.411,apple,23,8833,0.620,crocodile,5,439,0.372,apple,31,,0.881,dog,84,736,0.347,crocodile,46,224,0.080,crocodile,80,891,0.452,pear,82,3304,0.418,pear,85,2984,0.362,dog,65,,0.167,,65,5265,,apple,89,2101,0.864,apple,92,3636,0.382,crocodile,89,269,0.958,dog,23,1419,0.410,pear,86,8488,0.739,pear,,6576,0.613,dog,37,859,0.426,dog,15,8618,0.554,crocodile,51,4864,0.046,,,4421,0.995,dog,25,622,0.191,pear,84,536,0.405,apple,54,7081,0.575,crocodile,15,2570 +,pear,15,3679,0.102,pear,1,3227,0.157,pear,12,1627,0.955,pear,21,64,0.967,dog,14,2876,0.533,,74,4269,0.881,apple,58,5565,,dog,,6286,0.373,crocodile,46,6525,0.477,crocodile,18,1836,0.067,apple,66,3851,0.224,pear,,7233,0.443,,75,5577,0.392,crocodile,92,4353,0.549,pear,96,4884,0.581,crocodile,10,4058,,crocodile,22,8038,0.411,,94,9236,,pear,40,,0.770,crocodile,36,4591,0.854,dog,32,2902,0.724,apple,57,5499,0.566,pear,19,7577,0.853,,,,,crocodile,84,2709 +0.663,pear,,7735,0.086,apple,80,,,pear,16,9159,0.053,dog,52,3478,0.691,pear,49,9979,0.428,dog,46,5776,0.744,crocodile,3,9549,0.249,dog,90,1192,0.009,dog,68,8678,0.046,apple,62,1873,0.684,,31,7227,0.300,crocodile,83,9881,,,82,8272,0.621,crocodile,40,8171,0.002,crocodile,38,,0.580,pear,31,3094,,,5,5711,0.668,,30,217,0.672,dog,34,3184,0.105,pear,,1521,0.239,dog,28,6896,0.011,apple,,4509,0.691,,97,9247,0.167,pear,74,9824,0.945,crocodile,89, diff --git a/pandas/tests/io/sas/data/test_sas7bdat_2.csv b/pandas/tests/io/sas/data/test_sas7bdat_2.csv new file mode 100644 index 00000000..adc0587a --- /dev/null +++ b/pandas/tests/io/sas/data/test_sas7bdat_2.csv @@ -0,0 +1,11 @@ +Column1,Column2,Column3,Column4,Column5,Column6,Column7,Column8,Column9,Column10,Column11,Column12,Column13,Column14,Column15,Column16,Column17,Column18,Column19,Column20,Column21,Column22,Column23,Column24,Column25,Column26,Column27,Column28,Column29,Column30,Column31,Column32,Column33,Column34,Column35,Column36,Column37,Column38,Column39,Column40,Column41,Column42,Column43,Column44,Column45,Column46,Column47,Column48,Column49,Column50,Column51,Column52,Column53,Column54,Column55,Column56,Column57,Column58,Column59,Column60,Column61,Column62,Column63,Column64,Column65,Column66,Column67,Column68,Column69,Column70,Column71,Column72,Column73,Column74,Column75,Column76,Column77,Column78,Column79,Column80,Column81,Column82,Column83,Column84,Column85,Column86,Column87,Column88,Column89,Column90,Column91,Column92,Column93,Column94,Column95,Column96,Column97,Column98,Column99,Column100 +0.636,高雄市,84,2170,0.103,부산,20,,0.621,부산,,9697,0.047,Иркутск,7,2543,0.728,鱷魚,55,2615,0.146,鱷魚,10,2832,0.644,鱷魚,6,9671,,鱷魚,28,9126,0.433,鱷魚,22,8117,0.318,Иркутск,61,3363,0.938,高雄市,58,3700,0.844,Иркутск,26,3989,0.132,鱷魚,88,8240,0.325,,9,6102,0.032,부산,34,2987,0.651,鱷魚,81,8778,,부산,91,9908,0.897,Иркутск,26,3682,0.274,鱷魚,75,1629,0.918,부산,9,7935,0.761,鱷魚,,2398,0.914,부산,75,9204,0.946,高雄市,87,5587,0.940,부산,50,1611,0.480,부산,45,3230 +0.283,Иркутск,49,6275,0.398,高雄市,50,339,0.561,부산,22,8596,0.661,高雄市,38,4928,0.709,鱷魚,73,6011,0.239,鱷魚,93,,0.093,鱷魚,23,6198,0.757,Иркутск,61,,0.593,高雄市,12,9571,,Иркутск,6,892,0.883,高雄市,81,3363,0.166,高雄市,,1814,0.454,Иркутск,52,1161,0.630,Иркутск,43,159,0.398,부산,17,8194,0.905,Иркутск,29,4752,0.382,高雄市,44,9302,0.247,부산,45,4645,0.321,부산,56,,0.415,,,5912,0.580,,60,1918,0.821,Иркутск,68,6847,0.542,부산,83,4465,0.169,高雄市,53,5820,0.942,Иркутск,76,4904 +0.452,高雄市,35,8627,0.117,高雄市,70,5704,0.209,부산,7,5079,0.961,高雄市,73,,0.414,Иркутск,,9681,,高雄市,80,4183,0.131,鱷魚,53,972,,부산,86,772,0.460,,89,5950,0.293,부산,25,288,0.314,Иркутск,38,106,0.108,高雄市,49,979,0.481,Иркутск,25,7904,0.270,Иркутск,4,4891,,Иркутск,32,9820,0.517,,64,9053,0.487,Иркутск,78,7238,0.488,부산,54,4349,0.356,高雄市,73,9991,0.113,,63,5731,0.294,Иркутск,58,3060,,高雄市,2,,0.004,Иркутск,45,9872,0.024,Иркутск,69,,0.336,高雄市,9, +0.557,Иркутск,29,5292,0.640,高雄市,34,2766,0.910,Иркутск,26,2576,0.924,高雄市,85,3495,0.478,鱷魚,43,1586,0.576,부산,79,4403,0.329,鱷魚,,3572,0.702,Иркутск,46,1913,0.147,,10,4292,0.368,鱷魚,27,7037,0.137,高雄市,19,4545,0.999,부산,81,,0.095,高雄市,36,9195,0.494,高雄市,61,3393,,鱷魚,27,8252,,Иркутск,87,7968,0.845,부산,31,3801,0.514,Иркутск,,6542,0.483,Иркутск,58,4688,,高雄市,71,402,0.275,부산,51,3673,0.642,,82,4443,0.538,부산,10,114,0.155,,17,8170,0.493,高雄市,89,8566 +0.138,,55,1903,0.583,鱷魚,34,4170,0.226,鱷魚,11,3985,0.271,高雄市,,4624,,,43,2539,,Иркутск,50,,0.469,鱷魚,72,5999,0.476,부산,71,1184,0.333,鱷魚,23,5075,0.120,高雄市,64,5242,0.561,부산,7,8206,0.103,,45,,0.972,,1,432,0.423,Иркутск,4,1860,0.686,부산,43,9640,0.622,高雄市,15,91,0.795,부산,33,2149,0.248,Иркутск,,,0.942,Иркутск,85,987,0.492,高雄市,2,754,0.955,부산,48,1268,0.498,부산,83,6654,0.305,鱷魚,81,423,0.013,,32,3217,0.046,,57,894 +0.948,Иркутск,33,8962,0.691,高雄市,,2276,0.330,鱷魚,27,1104,0.668,,73,2883,0.007,,18,3726,0.301,,85,3621,,부산,39,9646,0.305,高雄市,48,,0.103,Иркутск,71,8216,0.813,Иркутск,41,6387,0.114,부산,24,4428,0.122,Иркутск,41,,0.558,高雄市,49,6455,0.119,高雄市,56,9390,0.482,부산,75,9168,0.437,Иркутск,74,4101,0.557,Иркутск,74,1631,0.159,Иркутск,26,2218,0.274,鱷魚,,7653,,Иркутск,,9637,0.345,부산,82,440,0.604,부산,89,6848,0.177,高雄市,31,2597,0.847,Иркутск,77,,0.700,鱷魚,60,6088 +0.162,鱷魚,17,8189,0.002,高雄市,30,5161,0.851,,,7230,0.886,Иркутск,43,5277,0.086,Иркутск,10,6891,0.510,Иркутск,63,2275,0.641,Иркутск,74,9202,0.452,高雄市,4,6580,0.008,,38,,0.491,부산,11,1469,0.672,Иркутск,89,6182,0.477,부산,4,1877,0.241,Иркутск,61,5083,,부산,78,2526,,,,7547,0.969,Иркутск,22,1578,,Иркутск,86,1211,0.221,부산,1,3252,0.146,,85,9278,0.676,Иркутск,30,1218,0.012,부산,93,3159,0.681,鱷魚,45,,0.825,鱷魚,0,3749,0.831,,74,7021,0.534,高雄市,20,6122 +0.148,鱷魚,37,1740,0.411,부산,23,8833,0.620,鱷魚,5,439,0.372,부산,31,,0.881,Иркутск,84,736,0.347,鱷魚,46,224,0.080,鱷魚,80,891,0.452,高雄市,82,3304,0.418,高雄市,85,2984,0.362,Иркутск,65,,0.167,,65,5265,,부산,89,2101,0.864,부산,92,3636,0.382,鱷魚,89,269,0.958,Иркутск,23,1419,0.410,高雄市,86,8488,0.739,高雄市,,6576,0.613,Иркутск,37,859,0.426,Иркутск,15,8618,0.554,鱷魚,51,4864,0.046,,,4421,0.995,Иркутск,25,622,0.191,高雄市,84,536,0.405,부산,54,7081,0.575,鱷魚,15,2570 +,高雄市,15,3679,0.102,高雄市,1,3227,0.157,高雄市,12,1627,0.955,高雄市,21,64,0.967,Иркутск,14,2876,0.533,,74,4269,0.881,부산,58,5565,,Иркутск,,6286,0.373,鱷魚,46,6525,0.477,鱷魚,18,1836,0.067,부산,66,3851,0.224,高雄市,,7233,0.443,,75,5577,0.392,鱷魚,92,4353,0.549,高雄市,96,4884,0.581,鱷魚,10,4058,,鱷魚,22,8038,0.411,,94,9236,,高雄市,40,,0.770,鱷魚,36,4591,0.854,Иркутск,32,2902,0.724,부산,57,5499,0.566,高雄市,19,7577,0.853,,,,,鱷魚,84,2709 +0.663,高雄市,,7735,0.086,부산,80,,,高雄市,16,9159,0.053,Иркутск,52,3478,0.691,高雄市,49,9979,0.428,Иркутск,46,5776,0.744,鱷魚,3,9549,0.249,Иркутск,90,1192,0.009,Иркутск,68,8678,0.046,부산,62,1873,0.684,,31,7227,0.300,鱷魚,83,9881,,,82,8272,0.621,鱷魚,40,8171,0.002,鱷魚,38,,0.580,高雄市,31,3094,,,5,5711,0.668,,30,217,0.672,Иркутск,34,3184,0.105,高雄市,,1521,0.239,Иркутск,28,6896,0.011,부산,,4509,0.691,,97,9247,0.167,高雄市,74,9824,0.945,鱷魚,89, diff --git a/pandas/tests/io/sas/data/zero_variables.sas7bdat b/pandas/tests/io/sas/data/zero_variables.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..85fec09447ec5055139f23847430227fd4226193 GIT binary patch literal 149504 zcmeI&&x=)6902h1#xciGP!e*X`LH2`Y?$#4gF*|R%%}rSn)4EEqXq*FSSv#pMFcGZ zT}9BQMT`CeTeOJ=30(GrGO55_i&ib7&>COo+Yj1o8R8d*__*`JQ|4`P?_F zIU#i8`p<8jyS8oX7oWa65;oqLxVnAUmaUE5q0y)fP1HJN>uP=ZM(xQ)*cn27U8iKe zIbT0Kdu-;#nUlxQo@$<)Z5}!{Ghct;m~OV3X;HfM8@s-{IoJ51ncL!VKDf2iX-DJj zOYdC*~T*7GHhsa+OQXzOkvX#=d8!4os!a*h0@#(Y?HX>g?2HH=FJ| z@%$&(@~1mf_730P+(^B?=Nl|CCP07y0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N z0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+ z009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly zK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&=1c4*BH#b(g?AOaxcIc6?K6Y=_ zYeizuy8h~=D!;uy9bSH-tUo=}Ke4yUKbzBKS;+Z;beOCXSMQdcs=OF{`1GBn)G6Eg z-mCA2>+juss%VRXU0)URMSl%n2S>g+n99m_>9fhj@0%b0bfajy`8faU-LkG|>mObD zymlzww`hyHYCiP^0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF z5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk z1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly@XrM9{n2jcD?A!IBY9A?Z--iJUk{J; zWqa4`2phxW@zafq#hMEPSL$8!SS(JTZk#TvPuyqChFIMHY}3Vk4u;|GWn!=QK7ZfO zWU*%N;C|Ze_MY(Lo9ROJ_*ow|#Jcrq7-PAe?vO^caIa(aXIwL#rmHBPZMarEWA1N> zbGg4g_eW~QJ#s#;kC$tY=Rp{)?!Lcjk5+p>7t)?WxZJnv<+%2p&f2(#fjG6eaH)0v zr3=&NFD^{Px-)0y=BDGrACCQ$+n3sAYbK9di?1xCacg#Nrqjl{ls~#y)~2~s-#Wi| zX>RtpBQG2;uNPBgM4B zQeA4}&%E>Q;;@LjE9CC1kjwQCv;8eb{>A+si3IdW}1Ceo+e zX$S`*SlmJpMcNku0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBly@Sh8$GY=`! z|NpDlIdHtA9?xhr=UX!;>bWAf=@3QF+H@8o#eo^Vsq-_et~-%||7_4(Pmb>CLM!tTDhbN%Ii0n4paMgRZ+ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/test_sas.py b/pandas/tests/io/sas/test_sas.py new file mode 100644 index 00000000..5d2643c2 --- /dev/null +++ b/pandas/tests/io/sas/test_sas.py @@ -0,0 +1,26 @@ +from io import StringIO + +import pytest + +from pandas import read_sas +import pandas._testing as tm + + +class TestSas: + def test_sas_buffer_format(self): + # see gh-14947 + b = StringIO("") + + msg = ( + "If this is a buffer object rather than a string " + "name, you must specify a format string" + ) + with pytest.raises(ValueError, match=msg): + read_sas(b) + + def test_sas_read_no_format_or_extension(self): + # see gh-24548 + msg = "unable to infer format of SAS file" + with tm.ensure_clean("test_file_no_extension") as path: + with pytest.raises(ValueError, match=msg): + read_sas(path) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py new file mode 100644 index 00000000..62e9ac69 --- /dev/null +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -0,0 +1,216 @@ +from datetime import datetime +import io +import os +from pathlib import Path + +import numpy as np +import pytest + +from pandas.errors import EmptyDataError +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + + +# https://github.com/cython/cython/issues/1720 +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") +class TestSAS7BDAT: + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath("io", "sas", "data") + self.data = [] + self.test_ix = [list(range(1, 16)), [16]] + for j in 1, 2: + fname = os.path.join(self.dirpath, f"test_sas7bdat_{j}.csv") + df = pd.read_csv(fname) + epoch = datetime(1960, 1, 1) + t1 = pd.to_timedelta(df["Column4"], unit="d") + df["Column4"] = epoch + t1 + t2 = pd.to_timedelta(df["Column12"], unit="d") + df["Column12"] = epoch + t2 + for k in range(df.shape[1]): + col = df.iloc[:, k] + if col.dtype == np.int64: + df.iloc[:, k] = df.iloc[:, k].astype(np.float64) + self.data.append(df) + + def test_from_file(self): + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") + df = pd.read_sas(fname, encoding="utf-8") + tm.assert_frame_equal(df, df0) + + def test_from_buffer(self): + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") + with open(fname, "rb") as f: + byts = f.read() + buf = io.BytesIO(byts) + rdr = pd.read_sas( + buf, format="sas7bdat", iterator=True, encoding="utf-8" + ) + df = rdr.read() + tm.assert_frame_equal(df, df0, check_exact=False) + rdr.close() + + def test_from_iterator(self): + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") + rdr = pd.read_sas(fname, iterator=True, encoding="utf-8") + df = rdr.read(2) + tm.assert_frame_equal(df, df0.iloc[0:2, :]) + df = rdr.read(3) + tm.assert_frame_equal(df, df0.iloc[2:5, :]) + rdr.close() + + def test_path_pathlib(self): + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = Path(os.path.join(self.dirpath, f"test{k}.sas7bdat")) + df = pd.read_sas(fname, encoding="utf-8") + tm.assert_frame_equal(df, df0) + + @td.skip_if_no("py.path") + def test_path_localpath(self): + from py.path import local as LocalPath + + for j in 0, 1: + df0 = self.data[j] + for k in self.test_ix[j]: + fname = LocalPath(os.path.join(self.dirpath, f"test{k}.sas7bdat")) + df = pd.read_sas(fname, encoding="utf-8") + tm.assert_frame_equal(df, df0) + + def test_iterator_loop(self): + # github #13654 + for j in 0, 1: + for k in self.test_ix[j]: + for chunksize in 3, 5, 10, 11: + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") + rdr = pd.read_sas(fname, chunksize=10, encoding="utf-8") + y = 0 + for x in rdr: + y += x.shape[0] + assert y == rdr.row_count + rdr.close() + + def test_iterator_read_too_much(self): + # github #14734 + k = self.test_ix[0][0] + fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") + rdr = pd.read_sas(fname, format="sas7bdat", iterator=True, encoding="utf-8") + d1 = rdr.read(rdr.row_count + 20) + rdr.close() + + rdr = pd.read_sas(fname, iterator=True, encoding="utf-8") + d2 = rdr.read(rdr.row_count + 20) + tm.assert_frame_equal(d1, d2) + rdr.close() + + +def test_encoding_options(datapath): + fname = datapath("io", "sas", "data", "test1.sas7bdat") + df1 = pd.read_sas(fname) + df2 = pd.read_sas(fname, encoding="utf-8") + for col in df1.columns: + try: + df1[col] = df1[col].str.decode("utf-8") + except AttributeError: + pass + tm.assert_frame_equal(df1, df2) + + from pandas.io.sas.sas7bdat import SAS7BDATReader + + rdr = SAS7BDATReader(fname, convert_header_text=False) + df3 = rdr.read() + rdr.close() + for x, y in zip(df1.columns, df3.columns): + assert x == y.decode() + + +def test_productsales(datapath): + fname = datapath("io", "sas", "data", "productsales.sas7bdat") + df = pd.read_sas(fname, encoding="utf-8") + fname = datapath("io", "sas", "data", "productsales.csv") + df0 = pd.read_csv(fname, parse_dates=["MONTH"]) + vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR"] + df0[vn] = df0[vn].astype(np.float64) + tm.assert_frame_equal(df, df0) + + +def test_12659(datapath): + fname = datapath("io", "sas", "data", "test_12659.sas7bdat") + df = pd.read_sas(fname) + fname = datapath("io", "sas", "data", "test_12659.csv") + df0 = pd.read_csv(fname) + df0 = df0.astype(np.float64) + tm.assert_frame_equal(df, df0) + + +def test_airline(datapath): + fname = datapath("io", "sas", "data", "airline.sas7bdat") + df = pd.read_sas(fname) + fname = datapath("io", "sas", "data", "airline.csv") + df0 = pd.read_csv(fname) + df0 = df0.astype(np.float64) + tm.assert_frame_equal(df, df0, check_exact=False) + + +def test_date_time(datapath): + # Support of different SAS date/datetime formats (PR #15871) + fname = datapath("io", "sas", "data", "datetime.sas7bdat") + df = pd.read_sas(fname) + fname = datapath("io", "sas", "data", "datetime.csv") + df0 = pd.read_csv( + fname, parse_dates=["Date1", "Date2", "DateTime", "DateTimeHi", "Taiw"] + ) + # GH 19732: Timestamps imported from sas will incur floating point errors + df.iloc[:, 3] = df.iloc[:, 3].dt.round("us") + tm.assert_frame_equal(df, df0) + + +def test_compact_numerical_values(datapath): + # Regression test for #21616 + fname = datapath("io", "sas", "data", "cars.sas7bdat") + df = pd.read_sas(fname, encoding="latin-1") + # The two columns CYL and WGT in cars.sas7bdat have column + # width < 8 and only contain integral values. + # Test that pandas doesn't corrupt the numbers by adding + # decimals. + result = df["WGT"] + expected = df["WGT"].round() + tm.assert_series_equal(result, expected, check_exact=True) + result = df["CYL"] + expected = df["CYL"].round() + tm.assert_series_equal(result, expected, check_exact=True) + + +def test_many_columns(datapath): + # Test for looking for column information in more places (PR #22628) + fname = datapath("io", "sas", "data", "many_columns.sas7bdat") + df = pd.read_sas(fname, encoding="latin-1") + fname = datapath("io", "sas", "data", "many_columns.csv") + df0 = pd.read_csv(fname, encoding="latin-1") + tm.assert_frame_equal(df, df0) + + +def test_inconsistent_number_of_rows(datapath): + # Regression test for issue #16615. (PR #22628) + fname = datapath("io", "sas", "data", "load_log.sas7bdat") + df = pd.read_sas(fname, encoding="latin-1") + assert len(df) == 2097 + + +def test_zero_variables(datapath): + # Check if the SAS file has zero variables (PR #18184) + fname = datapath("io", "sas", "data", "zero_variables.sas7bdat") + with pytest.raises(EmptyDataError): + pd.read_sas(fname) diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py new file mode 100644 index 00000000..ee97f08e --- /dev/null +++ b/pandas/tests/io/sas/test_xport.py @@ -0,0 +1,141 @@ +import os + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +from pandas.io.sas.sasreader import read_sas + +# CSV versions of test xpt files were obtained using the R foreign library + +# Numbers in a SAS xport file are always float64, so need to convert +# before making comparisons. + + +def numeric_as_float(data): + for v in data.columns: + if data[v].dtype is np.dtype("int64"): + data[v] = data[v].astype(np.float64) + + +class TestXport: + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath("io", "sas", "data") + self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt") + self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt") + self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt") + self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt") + + def test1_basic(self): + # Tests with DEMO_G.xpt (all numeric file) + + # Compare to this + data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv")) + numeric_as_float(data_csv) + + # Read full file + data = read_sas(self.file01, format="xport") + tm.assert_frame_equal(data, data_csv) + num_rows = data.shape[0] + + # Test reading beyond end of file + reader = read_sas(self.file01, format="xport", iterator=True) + data = reader.read(num_rows + 100) + assert data.shape[0] == num_rows + reader.close() + + # Test incremental read with `read` method. + reader = read_sas(self.file01, format="xport", iterator=True) + data = reader.read(10) + reader.close() + tm.assert_frame_equal(data, data_csv.iloc[0:10, :]) + + # Test incremental read with `get_chunk` method. + reader = read_sas(self.file01, format="xport", chunksize=10) + data = reader.get_chunk() + reader.close() + tm.assert_frame_equal(data, data_csv.iloc[0:10, :]) + + # Test read in loop + m = 0 + reader = read_sas(self.file01, format="xport", chunksize=100) + for x in reader: + m += x.shape[0] + reader.close() + assert m == num_rows + + # Read full file with `read_sas` method + data = read_sas(self.file01) + tm.assert_frame_equal(data, data_csv) + + def test1_index(self): + # Tests with DEMO_G.xpt using index (all numeric file) + + # Compare to this + data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv")) + data_csv = data_csv.set_index("SEQN") + numeric_as_float(data_csv) + + # Read full file + data = read_sas(self.file01, index="SEQN", format="xport") + tm.assert_frame_equal(data, data_csv, check_index_type=False) + + # Test incremental read with `read` method. + reader = read_sas(self.file01, index="SEQN", format="xport", iterator=True) + data = reader.read(10) + reader.close() + tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_index_type=False) + + # Test incremental read with `get_chunk` method. + reader = read_sas(self.file01, index="SEQN", format="xport", chunksize=10) + data = reader.get_chunk() + reader.close() + tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_index_type=False) + + def test1_incremental(self): + # Test with DEMO_G.xpt, reading full file incrementally + + data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv")) + data_csv = data_csv.set_index("SEQN") + numeric_as_float(data_csv) + + reader = read_sas(self.file01, index="SEQN", chunksize=1000) + + all_data = list(reader) + data = pd.concat(all_data, axis=0) + + tm.assert_frame_equal(data, data_csv, check_index_type=False) + + def test2(self): + # Test with SSHSV1_A.xpt + + # Compare to this + data_csv = pd.read_csv(self.file02.replace(".xpt", ".csv")) + numeric_as_float(data_csv) + + data = read_sas(self.file02) + tm.assert_frame_equal(data, data_csv) + + def test_multiple_types(self): + # Test with DRXFCD_G.xpt (contains text and numeric variables) + + # Compare to this + data_csv = pd.read_csv(self.file03.replace(".xpt", ".csv")) + + data = read_sas(self.file03, encoding="utf-8") + tm.assert_frame_equal(data, data_csv) + + def test_truncated_float_support(self): + # Test with paxraw_d_short.xpt, a shortened version of: + # http://wwwn.cdc.gov/Nchs/Nhanes/2005-2006/PAXRAW_D.ZIP + # This file has truncated floats (5 bytes in this case). + + # GH 11713 + + data_csv = pd.read_csv(self.file04.replace(".xpt", ".csv")) + + data = read_sas(self.file04, format="xport") + tm.assert_frame_equal(data.astype("int64"), data_csv) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py new file mode 100644 index 00000000..652cacaf --- /dev/null +++ b/pandas/tests/io/test_clipboard.py @@ -0,0 +1,256 @@ +from textwrap import dedent + +import numpy as np +from numpy.random import randint +import pytest + +import pandas as pd +from pandas import DataFrame, get_option, read_clipboard +import pandas._testing as tm + +from pandas.io.clipboard import clipboard_get, clipboard_set + + +def build_kwargs(sep, excel): + kwargs = {} + if excel != "default": + kwargs["excel"] = excel + if sep != "default": + kwargs["sep"] = sep + return kwargs + + +@pytest.fixture( + params=[ + "delims", + "utf8", + "utf16", + "string", + "long", + "nonascii", + "colwidth", + "mixed", + "float", + "int", + ] +) +def df(request): + data_type = request.param + + if data_type == "delims": + return pd.DataFrame({"a": ['"a,\t"b|c', "d\tef´"], "b": ["hi'j", "k''lm"]}) + elif data_type == "utf8": + return pd.DataFrame({"a": ["µasd", "Ωœ∑´"], "b": ["øπ∆˚¬", "œ∑´®"]}) + elif data_type == "utf16": + return pd.DataFrame( + {"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]} + ) + elif data_type == "string": + return tm.makeCustomDataframe( + 5, 3, c_idx_type="s", r_idx_type="i", c_idx_names=[None], r_idx_names=[None] + ) + elif data_type == "long": + max_rows = get_option("display.max_rows") + return tm.makeCustomDataframe( + max_rows + 1, + 3, + data_gen_f=lambda *args: randint(2), + c_idx_type="s", + r_idx_type="i", + c_idx_names=[None], + r_idx_names=[None], + ) + elif data_type == "nonascii": + return pd.DataFrame({"en": "in English".split(), "es": "en español".split()}) + elif data_type == "colwidth": + _cw = get_option("display.max_colwidth") + 1 + return tm.makeCustomDataframe( + 5, + 3, + data_gen_f=lambda *args: "x" * _cw, + c_idx_type="s", + r_idx_type="i", + c_idx_names=[None], + r_idx_names=[None], + ) + elif data_type == "mixed": + return DataFrame( + {"a": np.arange(1.0, 6.0) + 0.01, "b": np.arange(1, 6), "c": list("abcde")} + ) + elif data_type == "float": + return tm.makeCustomDataframe( + 5, + 3, + data_gen_f=lambda r, c: float(r) + 0.01, + c_idx_type="s", + r_idx_type="i", + c_idx_names=[None], + r_idx_names=[None], + ) + elif data_type == "int": + return tm.makeCustomDataframe( + 5, + 3, + data_gen_f=lambda *args: randint(2), + c_idx_type="s", + r_idx_type="i", + c_idx_names=[None], + r_idx_names=[None], + ) + else: + raise ValueError + + +@pytest.fixture +def mock_clipboard(monkeypatch, request): + """Fixture mocking clipboard IO. + + This mocks pandas.io.clipboard.clipboard_get and + pandas.io.clipboard.clipboard_set. + + This uses a local dict for storing data. The dictionary + key used is the test ID, available with ``request.node.name``. + + This returns the local dictionary, for direct manipulation by + tests. + """ + + # our local clipboard for tests + _mock_data = {} + + def _mock_set(data): + _mock_data[request.node.name] = data + + def _mock_get(): + return _mock_data[request.node.name] + + monkeypatch.setattr("pandas.io.clipboard.clipboard_set", _mock_set) + monkeypatch.setattr("pandas.io.clipboard.clipboard_get", _mock_get) + + yield _mock_data + + +@pytest.mark.clipboard +def test_mock_clipboard(mock_clipboard): + import pandas.io.clipboard + + pandas.io.clipboard.clipboard_set("abc") + assert "abc" in set(mock_clipboard.values()) + result = pandas.io.clipboard.clipboard_get() + assert result == "abc" + + +@pytest.mark.single +@pytest.mark.clipboard +@pytest.mark.usefixtures("mock_clipboard") +class TestClipboard: + def check_round_trip_frame(self, data, excel=None, sep=None, encoding=None): + data.to_clipboard(excel=excel, sep=sep, encoding=encoding) + result = read_clipboard(sep=sep or "\t", index_col=0, encoding=encoding) + tm.assert_frame_equal(data, result, check_dtype=False) + + # Test that default arguments copy as tab delimited + def test_round_trip_frame(self, df): + self.check_round_trip_frame(df) + + # Test that explicit delimiters are respected + @pytest.mark.parametrize("sep", ["\t", ",", "|"]) + def test_round_trip_frame_sep(self, df, sep): + self.check_round_trip_frame(df, sep=sep) + + # Test white space separator + def test_round_trip_frame_string(self, df): + df.to_clipboard(excel=False, sep=None) + result = read_clipboard() + assert df.to_string() == result.to_string() + assert df.shape == result.shape + + # Two character separator is not supported in to_clipboard + # Test that multi-character separators are not silently passed + def test_excel_sep_warning(self, df): + with tm.assert_produces_warning(): + df.to_clipboard(excel=True, sep=r"\t") + + # Separator is ignored when excel=False and should produce a warning + def test_copy_delim_warning(self, df): + with tm.assert_produces_warning(): + df.to_clipboard(excel=False, sep="\t") + + # Tests that the default behavior of to_clipboard is tab + # delimited and excel="True" + @pytest.mark.parametrize("sep", ["\t", None, "default"]) + @pytest.mark.parametrize("excel", [True, None, "default"]) + def test_clipboard_copy_tabs_default(self, sep, excel, df, request, mock_clipboard): + kwargs = build_kwargs(sep, excel) + df.to_clipboard(**kwargs) + assert mock_clipboard[request.node.name] == df.to_csv(sep="\t") + + # Tests reading of white space separated tables + @pytest.mark.parametrize("sep", [None, "default"]) + @pytest.mark.parametrize("excel", [False]) + def test_clipboard_copy_strings(self, sep, excel, df): + kwargs = build_kwargs(sep, excel) + df.to_clipboard(**kwargs) + result = read_clipboard(sep=r"\s+") + assert result.to_string() == df.to_string() + assert df.shape == result.shape + + def test_read_clipboard_infer_excel(self, request, mock_clipboard): + # gh-19010: avoid warnings + clip_kwargs = dict(engine="python") + + text = dedent( + """ + John James Charlie Mingus + 1 2 + 4 Harry Carney + """.strip() + ) + mock_clipboard[request.node.name] = text + df = pd.read_clipboard(**clip_kwargs) + + # excel data is parsed correctly + assert df.iloc[1][1] == "Harry Carney" + + # having diff tab counts doesn't trigger it + text = dedent( + """ + a\t b + 1 2 + 3 4 + """.strip() + ) + mock_clipboard[request.node.name] = text + res = pd.read_clipboard(**clip_kwargs) + + text = dedent( + """ + a b + 1 2 + 3 4 + """.strip() + ) + mock_clipboard[request.node.name] = text + exp = pd.read_clipboard(**clip_kwargs) + + tm.assert_frame_equal(res, exp) + + def test_invalid_encoding(self, df): + # test case for testing invalid encoding + with pytest.raises(ValueError): + df.to_clipboard(encoding="ascii") + with pytest.raises(NotImplementedError): + pd.read_clipboard(encoding="ascii") + + @pytest.mark.parametrize("enc", ["UTF-8", "utf-8", "utf8"]) + def test_round_trip_valid_encodings(self, enc, df): + self.check_round_trip_frame(df, encoding=enc) + + +@pytest.mark.single +@pytest.mark.clipboard +@pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑´...", "abcd..."]) +def test_raw_roundtrip(data): + # PR #25040 wide unicode wasn't copied correctly on PY3 on windows + clipboard_set(data) + assert data == clipboard_get() diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py new file mode 100644 index 00000000..aa9294b0 --- /dev/null +++ b/pandas/tests/io/test_common.py @@ -0,0 +1,359 @@ +""" +Tests for the pandas.io.common functionalities +""" +from io import StringIO +import mmap +import os +from pathlib import Path + +import pytest + +from pandas.compat import is_platform_windows +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + +import pandas.io.common as icom + + +class CustomFSPath: + """For testing fspath on unknown objects""" + + def __init__(self, path): + self.path = path + + def __fspath__(self): + return self.path + + +# Functions that consume a string path and return a string or path-like object +path_types = [str, CustomFSPath, Path] + +try: + from py.path import local as LocalPath + + path_types.append(LocalPath) +except ImportError: + pass + +HERE = os.path.abspath(os.path.dirname(__file__)) + + +# https://github.com/cython/cython/issues/1720 +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") +class TestCommonIOCapabilities: + data1 = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + + def test_expand_user(self): + filename = "~/sometest" + expanded_name = icom._expand_user(filename) + + assert expanded_name != filename + assert os.path.isabs(expanded_name) + assert os.path.expanduser(filename) == expanded_name + + def test_expand_user_normal_path(self): + filename = "/somefolder/sometest" + expanded_name = icom._expand_user(filename) + + assert expanded_name == filename + assert os.path.expanduser(filename) == expanded_name + + def test_stringify_path_pathlib(self): + rel_path = icom.stringify_path(Path(".")) + assert rel_path == "." + redundant_path = icom.stringify_path(Path("foo//bar")) + assert redundant_path == os.path.join("foo", "bar") + + @td.skip_if_no("py.path") + def test_stringify_path_localpath(self): + path = os.path.join("foo", "bar") + abs_path = os.path.abspath(path) + lpath = LocalPath(path) + assert icom.stringify_path(lpath) == abs_path + + def test_stringify_path_fspath(self): + p = CustomFSPath("foo/bar.csv") + result = icom.stringify_path(p) + assert result == "foo/bar.csv" + + @pytest.mark.parametrize( + "extension,expected", + [("", None), (".gz", "gzip"), (".bz2", "bz2"), (".zip", "zip"), (".xz", "xz")], + ) + @pytest.mark.parametrize("path_type", path_types) + def test_infer_compression_from_path(self, extension, expected, path_type): + path = path_type("foo/bar.csv" + extension) + compression = icom.infer_compression(path, compression="infer") + assert compression == expected + + def test_get_filepath_or_buffer_with_path(self): + filename = "~/sometest" + filepath_or_buffer, _, _, should_close = icom.get_filepath_or_buffer(filename) + assert filepath_or_buffer != filename + assert os.path.isabs(filepath_or_buffer) + assert os.path.expanduser(filename) == filepath_or_buffer + assert not should_close + + def test_get_filepath_or_buffer_with_buffer(self): + input_buffer = StringIO() + filepath_or_buffer, _, _, should_close = icom.get_filepath_or_buffer( + input_buffer + ) + assert filepath_or_buffer == input_buffer + assert not should_close + + def test_iterator(self): + reader = pd.read_csv(StringIO(self.data1), chunksize=1) + result = pd.concat(reader, ignore_index=True) + expected = pd.read_csv(StringIO(self.data1)) + tm.assert_frame_equal(result, expected) + + # GH12153 + it = pd.read_csv(StringIO(self.data1), chunksize=1) + first = next(it) + tm.assert_frame_equal(first, expected.iloc[[0]]) + tm.assert_frame_equal(pd.concat(it), expected.iloc[1:]) + + @pytest.mark.parametrize( + "reader, module, error_class, fn_ext", + [ + (pd.read_csv, "os", FileNotFoundError, "csv"), + (pd.read_fwf, "os", FileNotFoundError, "txt"), + (pd.read_excel, "xlrd", FileNotFoundError, "xlsx"), + (pd.read_feather, "feather", Exception, "feather"), + (pd.read_hdf, "tables", FileNotFoundError, "h5"), + (pd.read_stata, "os", FileNotFoundError, "dta"), + (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), + (pd.read_json, "os", ValueError, "json"), + (pd.read_pickle, "os", FileNotFoundError, "pickle"), + ], + ) + def test_read_non_existant(self, reader, module, error_class, fn_ext): + pytest.importorskip(module) + + path = os.path.join(HERE, "data", "does_not_exist." + fn_ext) + msg1 = r"File (b')?.+does_not_exist\.{}'? does not exist".format(fn_ext) + msg2 = fr"\[Errno 2\] No such file or directory: '.+does_not_exist\.{fn_ext}'" + msg3 = "Expected object or value" + msg4 = "path_or_buf needs to be a string file path or file-like" + msg5 = ( + fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist:" + fr" '.+does_not_exist\.{fn_ext}'" + ) + msg6 = fr"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'" + msg7 = ( + fr"\[Errno 2\] File o directory non esistente: '.+does_not_exist\.{fn_ext}'" + ) + with pytest.raises( + error_class, match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6}|{msg7})" + ): + reader(path) + + @pytest.mark.parametrize( + "reader, module, error_class, fn_ext", + [ + (pd.read_csv, "os", FileNotFoundError, "csv"), + (pd.read_table, "os", FileNotFoundError, "csv"), + (pd.read_fwf, "os", FileNotFoundError, "txt"), + (pd.read_excel, "xlrd", FileNotFoundError, "xlsx"), + (pd.read_feather, "feather", Exception, "feather"), + (pd.read_hdf, "tables", FileNotFoundError, "h5"), + (pd.read_stata, "os", FileNotFoundError, "dta"), + (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), + (pd.read_json, "os", ValueError, "json"), + (pd.read_pickle, "os", FileNotFoundError, "pickle"), + ], + ) + def test_read_expands_user_home_dir( + self, reader, module, error_class, fn_ext, monkeypatch + ): + pytest.importorskip(module) + + path = os.path.join("~", "does_not_exist." + fn_ext) + monkeypatch.setattr(icom, "_expand_user", lambda x: os.path.join("foo", x)) + + msg1 = fr"File (b')?.+does_not_exist\.{fn_ext}'? does not exist" + msg2 = fr"\[Errno 2\] No such file or directory: '.+does_not_exist\.{fn_ext}'" + msg3 = "Unexpected character found when decoding 'false'" + msg4 = "path_or_buf needs to be a string file path or file-like" + msg5 = ( + fr"\[Errno 2\] File .+does_not_exist\.{fn_ext} does not exist:" + fr" '.+does_not_exist\.{fn_ext}'" + ) + msg6 = fr"\[Errno 2\] 没有那个文件或目录: '.+does_not_exist\.{fn_ext}'" + msg7 = ( + fr"\[Errno 2\] File o directory non esistente: '.+does_not_exist\.{fn_ext}'" + ) + + with pytest.raises( + error_class, match=fr"({msg1}|{msg2}|{msg3}|{msg4}|{msg5}|{msg6}|{msg7})" + ): + reader(path) + + @pytest.mark.parametrize( + "reader, module, path", + [ + (pd.read_csv, "os", ("io", "data", "csv", "iris.csv")), + (pd.read_table, "os", ("io", "data", "csv", "iris.csv")), + ( + pd.read_fwf, + "os", + ("io", "data", "fixed_width", "fixed_width_format.txt"), + ), + (pd.read_excel, "xlrd", ("io", "data", "excel", "test1.xlsx")), + ( + pd.read_feather, + "feather", + ("io", "data", "feather", "feather-0_3_1.feather"), + ), + ( + pd.read_hdf, + "tables", + ("io", "data", "legacy_hdf", "datetimetz_object.h5"), + ), + (pd.read_stata, "os", ("io", "data", "stata", "stata10_115.dta")), + (pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")), + (pd.read_json, "os", ("io", "json", "data", "tsframe_v012.json")), + ( + pd.read_pickle, + "os", + ("io", "data", "pickle", "categorical.0.25.0.pickle"), + ), + ], + ) + def test_read_fspath_all(self, reader, module, path, datapath): + pytest.importorskip(module) + path = datapath(*path) + + mypath = CustomFSPath(path) + result = reader(mypath) + expected = reader(path) + + if path.endswith(".pickle"): + # categorical + tm.assert_categorical_equal(result, expected) + else: + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "writer_name, writer_kwargs, module", + [ + ("to_csv", {}, "os"), + ("to_excel", {"engine": "xlwt"}, "xlwt"), + ("to_feather", {}, "feather"), + ("to_html", {}, "os"), + ("to_json", {}, "os"), + ("to_latex", {}, "os"), + ("to_pickle", {}, "os"), + ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"), + ], + ) + def test_write_fspath_all(self, writer_name, writer_kwargs, module): + p1 = tm.ensure_clean("string") + p2 = tm.ensure_clean("fspath") + df = pd.DataFrame({"A": [1, 2]}) + + with p1 as string, p2 as fspath: + pytest.importorskip(module) + mypath = CustomFSPath(fspath) + writer = getattr(df, writer_name) + + writer(string, **writer_kwargs) + with open(string, "rb") as f: + expected = f.read() + + writer(mypath, **writer_kwargs) + with open(fspath, "rb") as f: + result = f.read() + + assert result == expected + + def test_write_fspath_hdf5(self): + # Same test as write_fspath_all, except HDF5 files aren't + # necessarily byte-for-byte identical for a given dataframe, so we'll + # have to read and compare equality + pytest.importorskip("tables") + + df = pd.DataFrame({"A": [1, 2]}) + p1 = tm.ensure_clean("string") + p2 = tm.ensure_clean("fspath") + + with p1 as string, p2 as fspath: + mypath = CustomFSPath(fspath) + df.to_hdf(mypath, key="bar") + df.to_hdf(string, key="bar") + + result = pd.read_hdf(fspath, key="bar") + expected = pd.read_hdf(string, key="bar") + + tm.assert_frame_equal(result, expected) + + +@pytest.fixture +def mmap_file(datapath): + return datapath("io", "data", "csv", "test_mmap.csv") + + +class TestMMapWrapper: + def test_constructor_bad_file(self, mmap_file): + non_file = StringIO("I am not a file") + non_file.fileno = lambda: -1 + + # the error raised is different on Windows + if is_platform_windows(): + msg = "The parameter is incorrect" + err = OSError + else: + msg = "[Errno 22]" + err = mmap.error + + with pytest.raises(err, match=msg): + icom._MMapWrapper(non_file) + + target = open(mmap_file, "r") + target.close() + + msg = "I/O operation on closed file" + with pytest.raises(ValueError, match=msg): + icom._MMapWrapper(target) + + def test_get_attr(self, mmap_file): + with open(mmap_file, "r") as target: + wrapper = icom._MMapWrapper(target) + + attrs = dir(wrapper.mmap) + attrs = [attr for attr in attrs if not attr.startswith("__")] + attrs.append("__next__") + + for attr in attrs: + assert hasattr(wrapper, attr) + + assert not hasattr(wrapper, "foo") + + def test_next(self, mmap_file): + with open(mmap_file, "r") as target: + wrapper = icom._MMapWrapper(target) + lines = target.readlines() + + for line in lines: + next_line = next(wrapper) + assert next_line.strip() == line.strip() + + with pytest.raises(StopIteration, match=r"^$"): + next(wrapper) + + def test_unknown_engine(self): + with tm.ensure_clean() as path: + df = tm.makeDataFrame() + df.to_csv(path) + with pytest.raises(ValueError, match="Unknown engine"): + pd.read_csv(path, engine="pyt") diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py new file mode 100644 index 00000000..fb81e579 --- /dev/null +++ b/pandas/tests/io/test_compression.py @@ -0,0 +1,144 @@ +import os +import subprocess +import sys +import textwrap + +import pytest + +import pandas as pd +import pandas._testing as tm + +import pandas.io.common as icom + + +@pytest.mark.parametrize( + "obj", + [ + pd.DataFrame( + 100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + columns=["X", "Y", "Z"], + ), + pd.Series(100 * [0.123456, 0.234567, 0.567567], name="X"), + ], +) +@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"]) +def test_compression_size(obj, method, compression_only): + with tm.ensure_clean() as path: + getattr(obj, method)(path, compression=compression_only) + compressed_size = os.path.getsize(path) + getattr(obj, method)(path, compression=None) + uncompressed_size = os.path.getsize(path) + assert uncompressed_size > compressed_size + + +@pytest.mark.parametrize( + "obj", + [ + pd.DataFrame( + 100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + columns=["X", "Y", "Z"], + ), + pd.Series(100 * [0.123456, 0.234567, 0.567567], name="X"), + ], +) +@pytest.mark.parametrize("method", ["to_csv", "to_json"]) +def test_compression_size_fh(obj, method, compression_only): + with tm.ensure_clean() as path: + f, handles = icom.get_handle(path, "w", compression=compression_only) + with f: + getattr(obj, method)(f) + assert not f.closed + assert f.closed + compressed_size = os.path.getsize(path) + with tm.ensure_clean() as path: + f, handles = icom.get_handle(path, "w", compression=None) + with f: + getattr(obj, method)(f) + assert not f.closed + assert f.closed + uncompressed_size = os.path.getsize(path) + assert uncompressed_size > compressed_size + + +@pytest.mark.parametrize( + "write_method, write_kwargs, read_method", + [ + ("to_csv", {"index": False}, pd.read_csv), + ("to_json", {}, pd.read_json), + ("to_pickle", {}, pd.read_pickle), + ], +) +def test_dataframe_compression_defaults_to_infer( + write_method, write_kwargs, read_method, compression_only +): + # GH22004 + input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=["X", "Y", "Z"]) + extension = icom._compression_to_extension[compression_only] + with tm.ensure_clean("compressed" + extension) as path: + getattr(input, write_method)(path, **write_kwargs) + output = read_method(path, compression=compression_only) + tm.assert_frame_equal(output, input) + + +@pytest.mark.parametrize( + "write_method,write_kwargs,read_method,read_kwargs", + [ + ("to_csv", {"index": False, "header": True}, pd.read_csv, {"squeeze": True}), + ("to_json", {}, pd.read_json, {"typ": "series"}), + ("to_pickle", {}, pd.read_pickle, {}), + ], +) +def test_series_compression_defaults_to_infer( + write_method, write_kwargs, read_method, read_kwargs, compression_only +): + # GH22004 + input = pd.Series([0, 5, -2, 10], name="X") + extension = icom._compression_to_extension[compression_only] + with tm.ensure_clean("compressed" + extension) as path: + getattr(input, write_method)(path, **write_kwargs) + output = read_method(path, compression=compression_only, **read_kwargs) + tm.assert_series_equal(output, input, check_names=False) + + +def test_compression_warning(compression_only): + # Assert that passing a file object to to_csv while explicitly specifying a + # compression protocol triggers a RuntimeWarning, as per GH21227. + df = pd.DataFrame( + 100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]], + columns=["X", "Y", "Z"], + ) + with tm.ensure_clean() as path: + f, handles = icom.get_handle(path, "w", compression=compression_only) + with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False): + with f: + df.to_csv(f, compression=compression_only) + + +def test_with_missing_lzma(): + """Tests if import pandas works when lzma is not present.""" + # https://github.com/pandas-dev/pandas/issues/27575 + code = textwrap.dedent( + """\ + import sys + sys.modules['lzma'] = None + import pandas + """ + ) + subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE) + + +def test_with_missing_lzma_runtime(): + """Tests if RuntimeError is hit when calling lzma without + having the module available.""" + code = textwrap.dedent( + """ + import sys + import pytest + sys.modules['lzma'] = None + import pandas + df = pandas.DataFrame() + with pytest.raises(RuntimeError, match='lzma module'): + df.to_csv('foo.csv', compression='xz') + """ + ) + subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE) diff --git a/pandas/tests/io/test_date_converters.py b/pandas/tests/io/test_date_converters.py new file mode 100644 index 00000000..cdb8eca0 --- /dev/null +++ b/pandas/tests/io/test_date_converters.py @@ -0,0 +1,40 @@ +from datetime import datetime + +import numpy as np + +import pandas._testing as tm + +import pandas.io.date_converters as conv + + +def test_parse_date_time(): + dates = np.array(["2007/1/3", "2008/2/4"], dtype=object) + times = np.array(["05:07:09", "06:08:00"], dtype=object) + expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)]) + + result = conv.parse_date_time(dates, times) + tm.assert_numpy_array_equal(result, expected) + + +def test_parse_date_fields(): + days = np.array([3, 4]) + months = np.array([1, 2]) + years = np.array([2007, 2008]) + result = conv.parse_date_fields(years, months, days) + + expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)]) + tm.assert_numpy_array_equal(result, expected) + + +def test_parse_all_fields(): + hours = np.array([5, 6]) + minutes = np.array([7, 8]) + seconds = np.array([9, 0]) + + days = np.array([3, 4]) + years = np.array([2007, 2008]) + months = np.array([1, 2]) + + result = conv.parse_all_fields(years, months, days, hours, minutes, seconds) + expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py new file mode 100644 index 00000000..23b51e71 --- /dev/null +++ b/pandas/tests/io/test_feather.py @@ -0,0 +1,150 @@ +""" test feather-format compat """ +from distutils.version import LooseVersion + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +from pandas.io.feather_format import read_feather, to_feather # noqa: E402 isort:skip + +pyarrow = pytest.importorskip("pyarrow") + + +pyarrow_version = LooseVersion(pyarrow.__version__) +filter_sparse = pytest.mark.filterwarnings("ignore:The Sparse") + + +@filter_sparse +@pytest.mark.single +class TestFeather: + def check_error_on_write(self, df, exc): + # check that we are raising the exception + # on writing + + with pytest.raises(exc): + with tm.ensure_clean() as path: + to_feather(df, path) + + def check_round_trip(self, df, expected=None, **kwargs): + + if expected is None: + expected = df + + with tm.ensure_clean() as path: + to_feather(df, path) + + result = read_feather(path, **kwargs) + tm.assert_frame_equal(result, expected) + + def test_error(self): + + for obj in [ + pd.Series([1, 2, 3]), + 1, + "foo", + pd.Timestamp("20130101"), + np.array([1, 2, 3]), + ]: + self.check_error_on_write(obj, ValueError) + + def test_basic(self): + + df = pd.DataFrame( + { + "string": list("abc"), + "int": list(range(1, 4)), + "uint": np.arange(3, 6).astype("u1"), + "float": np.arange(4.0, 7.0, dtype="float64"), + "float_with_null": [1.0, np.nan, 3], + "bool": [True, False, True], + "bool_with_null": [True, np.nan, False], + "cat": pd.Categorical(list("abc")), + "dt": pd.date_range("20130101", periods=3), + "dttz": pd.date_range("20130101", periods=3, tz="US/Eastern"), + "dt_with_null": [ + pd.Timestamp("20130101"), + pd.NaT, + pd.Timestamp("20130103"), + ], + "dtns": pd.date_range("20130101", periods=3, freq="ns"), + } + ) + + assert df.dttz.dtype.tz.zone == "US/Eastern" + self.check_round_trip(df) + + def test_duplicate_columns(self): + + # https://github.com/wesm/feather/issues/53 + # not currently able to handle duplicate columns + df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy() + self.check_error_on_write(df, ValueError) + + def test_stringify_columns(self): + + df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy() + self.check_error_on_write(df, ValueError) + + def test_read_columns(self): + # GH 24025 + df = pd.DataFrame( + { + "col1": list("abc"), + "col2": list(range(1, 4)), + "col3": list("xyz"), + "col4": list(range(4, 7)), + } + ) + columns = ["col1", "col3"] + self.check_round_trip(df, expected=df[columns], columns=columns) + + def test_unsupported_other(self): + + # mixed python objects + df = pd.DataFrame({"a": ["a", 1, 2.0]}) + # Some versions raise ValueError, others raise ArrowInvalid. + self.check_error_on_write(df, Exception) + + def test_rw_use_threads(self): + df = pd.DataFrame({"A": np.arange(100000)}) + self.check_round_trip(df, use_threads=True) + self.check_round_trip(df, use_threads=False) + + def test_write_with_index(self): + + df = pd.DataFrame({"A": [1, 2, 3]}) + self.check_round_trip(df) + + # non-default index + for index in [ + [2, 3, 4], + pd.date_range("20130101", periods=3), + list("abc"), + [1, 3, 4], + pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]), + ]: + + df.index = index + self.check_error_on_write(df, ValueError) + + # index with meta-data + df.index = [0, 1, 2] + df.index.name = "foo" + self.check_error_on_write(df, ValueError) + + # column multi-index + df.index = [0, 1, 2] + df.columns = pd.MultiIndex.from_tuples([("a", 1)]) + self.check_error_on_write(df, ValueError) + + def test_path_pathlib(self): + df = tm.makeDataFrame().reset_index() + result = tm.round_trip_pathlib(df.to_feather, pd.read_feather) + tm.assert_frame_equal(df, result) + + def test_path_localpath(self): + df = tm.makeDataFrame().reset_index() + result = tm.round_trip_localpath(df.to_feather, pd.read_feather) + tm.assert_frame_equal(df, result) diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py new file mode 100644 index 00000000..7a5eba52 --- /dev/null +++ b/pandas/tests/io/test_gbq.py @@ -0,0 +1,235 @@ +from contextlib import ExitStack as does_not_raise +from datetime import datetime +import os +import platform +import random +import string + +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import DataFrame + +api_exceptions = pytest.importorskip("google.api_core.exceptions") +bigquery = pytest.importorskip("google.cloud.bigquery") +service_account = pytest.importorskip("google.oauth2.service_account") +pandas_gbq = pytest.importorskip("pandas_gbq") + +PROJECT_ID = None +PRIVATE_KEY_JSON_PATH = None +PRIVATE_KEY_JSON_CONTENTS = None + +VERSION = platform.python_version() + + +def _skip_if_no_project_id(): + if not _get_project_id(): + pytest.skip("Cannot run integration tests without a project id") + + +def _skip_if_no_private_key_path(): + if not _get_private_key_path(): + pytest.skip("Cannot run integration tests without a private key json file path") + + +def _in_travis_environment(): + return "TRAVIS_BUILD_DIR" in os.environ and "GBQ_PROJECT_ID" in os.environ + + +def _get_project_id(): + if _in_travis_environment(): + return os.environ.get("GBQ_PROJECT_ID") + return PROJECT_ID or os.environ.get("GBQ_PROJECT_ID") + + +def _get_private_key_path(): + if _in_travis_environment(): + return os.path.join( + *[os.environ.get("TRAVIS_BUILD_DIR"), "ci", "travis_gbq.json"] + ) + + private_key_path = PRIVATE_KEY_JSON_PATH + if not private_key_path: + private_key_path = os.environ.get("GBQ_GOOGLE_APPLICATION_CREDENTIALS") + return private_key_path + + +def _get_credentials(): + private_key_path = _get_private_key_path() + if private_key_path: + return service_account.Credentials.from_service_account_file(private_key_path) + + +def _get_client(): + project_id = _get_project_id() + credentials = _get_credentials() + return bigquery.Client(project=project_id, credentials=credentials) + + +def generate_rand_str(length: int = 10) -> str: + return "".join(random.choices(string.ascii_lowercase, k=length)) + + +def make_mixed_dataframe_v2(test_size): + # create df to test for all BQ datatypes except RECORD + bools = np.random.randint(2, size=(1, test_size)).astype(bool) + flts = np.random.randn(1, test_size) + ints = np.random.randint(1, 10, size=(1, test_size)) + strs = np.random.randint(1, 10, size=(1, test_size)).astype(str) + times = [datetime.now(pytz.timezone("US/Arizona")) for t in range(test_size)] + return DataFrame( + { + "bools": bools[0], + "flts": flts[0], + "ints": ints[0], + "strs": strs[0], + "times": times[0], + }, + index=range(test_size), + ) + + +def test_read_gbq_without_deprecated_kwargs(monkeypatch): + captured_kwargs = {} + + def mock_read_gbq(sql, **kwargs): + captured_kwargs.update(kwargs) + return DataFrame([[1.0]]) + + monkeypatch.setattr("pandas_gbq.read_gbq", mock_read_gbq) + pd.read_gbq("SELECT 1") + + assert "verbose" not in captured_kwargs + assert "private_key" not in captured_kwargs + + +def test_read_gbq_with_new_kwargs(monkeypatch): + captured_kwargs = {} + + def mock_read_gbq(sql, **kwargs): + captured_kwargs.update(kwargs) + return DataFrame([[1.0]]) + + monkeypatch.setattr("pandas_gbq.read_gbq", mock_read_gbq) + pd.read_gbq("SELECT 1", use_bqstorage_api=True) + + assert captured_kwargs["use_bqstorage_api"] + + +def test_read_gbq_without_new_kwargs(monkeypatch): + captured_kwargs = {} + + def mock_read_gbq(sql, **kwargs): + captured_kwargs.update(kwargs) + return DataFrame([[1.0]]) + + monkeypatch.setattr("pandas_gbq.read_gbq", mock_read_gbq) + pd.read_gbq("SELECT 1") + + assert "use_bqstorage_api" not in captured_kwargs + + +@pytest.mark.parametrize("progress_bar", [None, "foo"]) +def test_read_gbq_progress_bar_type_kwarg(monkeypatch, progress_bar): + # GH 29857 + captured_kwargs = {} + + def mock_read_gbq(sql, **kwargs): + captured_kwargs.update(kwargs) + return DataFrame([[1.0]]) + + monkeypatch.setattr("pandas_gbq.read_gbq", mock_read_gbq) + pd.read_gbq("SELECT 1", progress_bar_type=progress_bar) + + if progress_bar: + assert "progress_bar_type" in captured_kwargs + else: + assert "progress_bar_type" not in captured_kwargs + + +@pytest.mark.single +class TestToGBQIntegrationWithServiceAccountKeyPath: + @pytest.fixture() + def gbq_dataset(self): + # Setup Dataset + _skip_if_no_project_id() + _skip_if_no_private_key_path() + + dataset_id = "pydata_pandas_bq_testing_" + generate_rand_str() + + self.client = _get_client() + self.dataset = self.client.dataset(dataset_id) + + # Create the dataset + self.client.create_dataset(bigquery.Dataset(self.dataset)) + + table_name = generate_rand_str() + destination_table = f"{dataset_id}.{table_name}" + yield destination_table + + # Teardown Dataset + self.client.delete_dataset(self.dataset, delete_contents=True) + + def test_roundtrip(self, gbq_dataset): + destination_table = gbq_dataset + + test_size = 20001 + df = make_mixed_dataframe_v2(test_size) + + df.to_gbq( + destination_table, + _get_project_id(), + chunksize=None, + credentials=_get_credentials(), + ) + + result = pd.read_gbq( + f"SELECT COUNT(*) AS num_rows FROM {destination_table}", + project_id=_get_project_id(), + credentials=_get_credentials(), + dialect="standard", + ) + assert result["num_rows"][0] == test_size + + @pytest.mark.parametrize( + "if_exists, expected_num_rows, expectation", + [ + ("append", 300, does_not_raise()), + ("fail", 200, pytest.raises(pandas_gbq.gbq.TableCreationError)), + ("replace", 100, does_not_raise()), + ], + ) + def test_gbq_if_exists( + self, if_exists, expected_num_rows, expectation, gbq_dataset + ): + # GH 29598 + destination_table = gbq_dataset + + test_size = 200 + df = make_mixed_dataframe_v2(test_size) + + df.to_gbq( + destination_table, + _get_project_id(), + chunksize=None, + credentials=_get_credentials(), + ) + + with expectation: + df.iloc[:100].to_gbq( + destination_table, + _get_project_id(), + if_exists=if_exists, + chunksize=None, + credentials=_get_credentials(), + ) + + result = pd.read_gbq( + f"SELECT COUNT(*) AS num_rows FROM {destination_table}", + project_id=_get_project_id(), + credentials=_get_credentials(), + dialect="standard", + ) + assert result["num_rows"][0] == expected_num_rows diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py new file mode 100644 index 00000000..cf745fcc --- /dev/null +++ b/pandas/tests/io/test_gcs.py @@ -0,0 +1,101 @@ +from io import StringIO +import os + +import numpy as np +import pytest + +from pandas import DataFrame, date_range, read_csv +import pandas._testing as tm +from pandas.util import _test_decorators as td + +from pandas.io.common import is_gcs_url + + +def test_is_gcs_url(): + assert is_gcs_url("gcs://pandas/somethingelse.com") + assert is_gcs_url("gs://pandas/somethingelse.com") + assert not is_gcs_url("s3://pandas/somethingelse.com") + + +@td.skip_if_no("gcsfs") +def test_read_csv_gcs(monkeypatch): + df1 = DataFrame( + { + "int": [1, 3], + "float": [2.0, np.nan], + "str": ["t", "s"], + "dt": date_range("2018-06-18", periods=2), + } + ) + + class MockGCSFileSystem: + def open(*args): + return StringIO(df1.to_csv(index=False)) + + monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem) + df2 = read_csv("gs://test/test.csv", parse_dates=["dt"]) + + tm.assert_frame_equal(df1, df2) + + +@td.skip_if_no("gcsfs") +def test_to_csv_gcs(monkeypatch): + df1 = DataFrame( + { + "int": [1, 3], + "float": [2.0, np.nan], + "str": ["t", "s"], + "dt": date_range("2018-06-18", periods=2), + } + ) + s = StringIO() + + class MockGCSFileSystem: + def open(*args): + return s + + monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem) + df1.to_csv("gs://test/test.csv", index=True) + + def mock_get_filepath_or_buffer(*args, **kwargs): + return StringIO(df1.to_csv()), None, None, False + + monkeypatch.setattr( + "pandas.io.gcs.get_filepath_or_buffer", mock_get_filepath_or_buffer + ) + + df2 = read_csv("gs://test/test.csv", parse_dates=["dt"], index_col=0) + + tm.assert_frame_equal(df1, df2) + + +@td.skip_if_no("fastparquet") +@td.skip_if_no("gcsfs") +def test_to_parquet_gcs_new_file(monkeypatch, tmpdir): + """Regression test for writing to a not-yet-existent GCS Parquet file.""" + df1 = DataFrame( + { + "int": [1, 3], + "float": [2.0, np.nan], + "str": ["t", "s"], + "dt": date_range("2018-06-18", periods=2), + } + ) + + class MockGCSFileSystem: + def open(self, path, mode="r", *args): + if "w" not in mode: + raise FileNotFoundError + return open(os.path.join(tmpdir, "test.parquet"), mode) + + monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem) + df1.to_parquet( + "gs://test/test.csv", index=True, engine="fastparquet", compression=None + ) + + +@td.skip_if_installed("gcsfs") +def test_gcs_not_present_exception(): + with pytest.raises(ImportError) as e: + read_csv("gs://test/test.csv") + assert "gcsfs library is required" in str(e.value) diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py new file mode 100644 index 00000000..b649e394 --- /dev/null +++ b/pandas/tests/io/test_html.py @@ -0,0 +1,1214 @@ +from functools import partial +from importlib import reload +from io import BytesIO, StringIO +import os +import re +import threading +from urllib.error import URLError + +import numpy as np +from numpy.random import rand +import pytest + +from pandas.compat import is_platform_windows +from pandas.errors import ParserError +import pandas.util._test_decorators as td + +from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, read_csv +import pandas._testing as tm + +from pandas.io.common import file_path_to_url +import pandas.io.html +from pandas.io.html import read_html + +HERE = os.path.dirname(__file__) + + +@pytest.fixture( + params=[ + "chinese_utf-16.html", + "chinese_utf-32.html", + "chinese_utf-8.html", + "letz_latin1.html", + ] +) +def html_encoding_file(request, datapath): + """Parametrized fixture for HTML encoding test filenames.""" + return datapath("io", "data", "html_encoding", request.param) + + +def assert_framelist_equal(list1, list2, *args, **kwargs): + assert len(list1) == len(list2), ( + "lists are not of equal size " + "len(list1) == {0}, " + "len(list2) == {1}".format(len(list1), len(list2)) + ) + msg = "not all list elements are DataFrames" + both_frames = all( + map( + lambda x, y: isinstance(x, DataFrame) and isinstance(y, DataFrame), + list1, + list2, + ) + ) + assert both_frames, msg + for frame_i, frame_j in zip(list1, list2): + tm.assert_frame_equal(frame_i, frame_j, *args, **kwargs) + assert not frame_i.empty, "frames are both empty" + + +@td.skip_if_no("bs4") +def test_bs4_version_fails(monkeypatch, datapath): + import bs4 + + monkeypatch.setattr(bs4, "__version__", "4.2") + with pytest.raises(ImportError, match="Pandas requires version"): + read_html(datapath("io", "data", "html", "spam.html"), flavor="bs4") + + +def test_invalid_flavor(): + url = "google.com" + flavor = "invalid flavor" + msg = r"\{" + flavor + r"\} is not a valid set of flavors" + + with pytest.raises(ValueError, match=msg): + read_html(url, "google", flavor=flavor) + + +@td.skip_if_no("bs4") +@td.skip_if_no("lxml") +def test_same_ordering(datapath): + filename = datapath("io", "data", "html", "valid_markup.html") + dfs_lxml = read_html(filename, index_col=0, flavor=["lxml"]) + dfs_bs4 = read_html(filename, index_col=0, flavor=["bs4"]) + assert_framelist_equal(dfs_lxml, dfs_bs4) + + +@pytest.mark.parametrize( + "flavor", + [ + pytest.param("bs4", marks=td.skip_if_no("bs4")), + pytest.param("lxml", marks=td.skip_if_no("lxml")), + ], + scope="class", +) +class TestReadHtml: + @pytest.fixture(autouse=True) + def set_files(self, datapath): + self.spam_data = datapath("io", "data", "html", "spam.html") + self.spam_data_kwargs = {} + self.spam_data_kwargs["encoding"] = "UTF-8" + self.banklist_data = datapath("io", "data", "html", "banklist.html") + + @pytest.fixture(autouse=True, scope="function") + def set_defaults(self, flavor, request): + self.read_html = partial(read_html, flavor=flavor) + yield + + def test_to_html_compat(self): + df = ( + tm.makeCustomDataframe( + 4, + 3, + data_gen_f=lambda *args: rand(), + c_idx_names=False, + r_idx_names=False, + ) + .applymap("{0:.3f}".format) + .astype(float) + ) + out = df.to_html() + res = self.read_html(out, attrs={"class": "dataframe"}, index_col=0)[0] + tm.assert_frame_equal(res, df) + + @tm.network + def test_banklist_url(self): + url = "http://www.fdic.gov/bank/individual/failed/banklist.html" + df1 = self.read_html( + url, "First Federal Bank of Florida", attrs={"id": "table"} + ) + df2 = self.read_html(url, "Metcalf Bank", attrs={"id": "table"}) + + assert_framelist_equal(df1, df2) + + @tm.network + def test_spam_url(self): + url = ( + "https://raw.githubusercontent.com/pandas-dev/pandas/master/" + "pandas/tests/io/data/html/spam.html" + ) + df1 = self.read_html(url, ".*Water.*") + df2 = self.read_html(url, "Unit") + + assert_framelist_equal(df1, df2) + + @pytest.mark.slow + def test_banklist(self): + df1 = self.read_html(self.banklist_data, ".*Florida.*", attrs={"id": "table"}) + df2 = self.read_html(self.banklist_data, "Metcalf Bank", attrs={"id": "table"}) + + assert_framelist_equal(df1, df2) + + def test_spam(self): + df1 = self.read_html(self.spam_data, ".*Water.*") + df2 = self.read_html(self.spam_data, "Unit") + assert_framelist_equal(df1, df2) + + assert df1[0].iloc[0, 0] == "Proximates" + assert df1[0].columns[0] == "Nutrient" + + def test_spam_no_match(self): + dfs = self.read_html(self.spam_data) + for df in dfs: + assert isinstance(df, DataFrame) + + def test_banklist_no_match(self): + dfs = self.read_html(self.banklist_data, attrs={"id": "table"}) + for df in dfs: + assert isinstance(df, DataFrame) + + def test_spam_header(self): + df = self.read_html(self.spam_data, ".*Water.*", header=2)[0] + assert df.columns[0] == "Proximates" + assert not df.empty + + def test_skiprows_int(self): + df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=1) + df2 = self.read_html(self.spam_data, "Unit", skiprows=1) + + assert_framelist_equal(df1, df2) + + def test_skiprows_range(self): + df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=range(2))[0] + df2 = self.read_html(self.spam_data, "Unit", skiprows=range(2))[0] + tm.assert_frame_equal(df1, df2) + + def test_skiprows_list(self): + df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=[1, 2]) + df2 = self.read_html(self.spam_data, "Unit", skiprows=[2, 1]) + + assert_framelist_equal(df1, df2) + + def test_skiprows_set(self): + df1 = self.read_html(self.spam_data, ".*Water.*", skiprows={1, 2}) + df2 = self.read_html(self.spam_data, "Unit", skiprows={2, 1}) + + assert_framelist_equal(df1, df2) + + def test_skiprows_slice(self): + df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=1) + df2 = self.read_html(self.spam_data, "Unit", skiprows=1) + + assert_framelist_equal(df1, df2) + + def test_skiprows_slice_short(self): + df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=slice(2)) + df2 = self.read_html(self.spam_data, "Unit", skiprows=slice(2)) + + assert_framelist_equal(df1, df2) + + def test_skiprows_slice_long(self): + df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=slice(2, 5)) + df2 = self.read_html(self.spam_data, "Unit", skiprows=slice(4, 1, -1)) + + assert_framelist_equal(df1, df2) + + def test_skiprows_ndarray(self): + df1 = self.read_html(self.spam_data, ".*Water.*", skiprows=np.arange(2)) + df2 = self.read_html(self.spam_data, "Unit", skiprows=np.arange(2)) + + assert_framelist_equal(df1, df2) + + def test_skiprows_invalid(self): + with pytest.raises(TypeError, match=("is not a valid type for skipping rows")): + self.read_html(self.spam_data, ".*Water.*", skiprows="asdf") + + def test_index(self): + df1 = self.read_html(self.spam_data, ".*Water.*", index_col=0) + df2 = self.read_html(self.spam_data, "Unit", index_col=0) + assert_framelist_equal(df1, df2) + + def test_header_and_index_no_types(self): + df1 = self.read_html(self.spam_data, ".*Water.*", header=1, index_col=0) + df2 = self.read_html(self.spam_data, "Unit", header=1, index_col=0) + assert_framelist_equal(df1, df2) + + def test_header_and_index_with_types(self): + df1 = self.read_html(self.spam_data, ".*Water.*", header=1, index_col=0) + df2 = self.read_html(self.spam_data, "Unit", header=1, index_col=0) + assert_framelist_equal(df1, df2) + + def test_infer_types(self): + + # 10892 infer_types removed + df1 = self.read_html(self.spam_data, ".*Water.*", index_col=0) + df2 = self.read_html(self.spam_data, "Unit", index_col=0) + assert_framelist_equal(df1, df2) + + def test_string_io(self): + with open(self.spam_data, **self.spam_data_kwargs) as f: + data1 = StringIO(f.read()) + + with open(self.spam_data, **self.spam_data_kwargs) as f: + data2 = StringIO(f.read()) + + df1 = self.read_html(data1, ".*Water.*") + df2 = self.read_html(data2, "Unit") + assert_framelist_equal(df1, df2) + + def test_string(self): + with open(self.spam_data, **self.spam_data_kwargs) as f: + data = f.read() + + df1 = self.read_html(data, ".*Water.*") + df2 = self.read_html(data, "Unit") + + assert_framelist_equal(df1, df2) + + def test_file_like(self): + with open(self.spam_data, **self.spam_data_kwargs) as f: + df1 = self.read_html(f, ".*Water.*") + + with open(self.spam_data, **self.spam_data_kwargs) as f: + df2 = self.read_html(f, "Unit") + + assert_framelist_equal(df1, df2) + + @tm.network + def test_bad_url_protocol(self): + with pytest.raises(URLError): + self.read_html("git://github.com", match=".*Water.*") + + @tm.network + @pytest.mark.slow + def test_invalid_url(self): + try: + with pytest.raises(URLError): + self.read_html("http://www.a23950sdfa908sd.com", match=".*Water.*") + except ValueError as e: + assert "No tables found" in str(e) + + @pytest.mark.slow + def test_file_url(self): + url = self.banklist_data + dfs = self.read_html( + file_path_to_url(os.path.abspath(url)), "First", attrs={"id": "table"} + ) + assert isinstance(dfs, list) + for df in dfs: + assert isinstance(df, DataFrame) + + @pytest.mark.slow + def test_invalid_table_attrs(self): + url = self.banklist_data + with pytest.raises(ValueError, match="No tables found"): + self.read_html( + url, "First Federal Bank of Florida", attrs={"id": "tasdfable"} + ) + + def _bank_data(self, *args, **kwargs): + return self.read_html( + self.banklist_data, "Metcalf", attrs={"id": "table"}, *args, **kwargs + ) + + @pytest.mark.slow + def test_multiindex_header(self): + df = self._bank_data(header=[0, 1])[0] + assert isinstance(df.columns, MultiIndex) + + @pytest.mark.slow + def test_multiindex_index(self): + df = self._bank_data(index_col=[0, 1])[0] + assert isinstance(df.index, MultiIndex) + + @pytest.mark.slow + def test_multiindex_header_index(self): + df = self._bank_data(header=[0, 1], index_col=[0, 1])[0] + assert isinstance(df.columns, MultiIndex) + assert isinstance(df.index, MultiIndex) + + @pytest.mark.slow + def test_multiindex_header_skiprows_tuples(self): + df = self._bank_data(header=[0, 1], skiprows=1)[0] + assert isinstance(df.columns, MultiIndex) + + @pytest.mark.slow + def test_multiindex_header_skiprows(self): + df = self._bank_data(header=[0, 1], skiprows=1)[0] + assert isinstance(df.columns, MultiIndex) + + @pytest.mark.slow + def test_multiindex_header_index_skiprows(self): + df = self._bank_data(header=[0, 1], index_col=[0, 1], skiprows=1)[0] + assert isinstance(df.index, MultiIndex) + assert isinstance(df.columns, MultiIndex) + + @pytest.mark.slow + def test_regex_idempotency(self): + url = self.banklist_data + dfs = self.read_html( + file_path_to_url(os.path.abspath(url)), + match=re.compile(re.compile("Florida")), + attrs={"id": "table"}, + ) + assert isinstance(dfs, list) + for df in dfs: + assert isinstance(df, DataFrame) + + def test_negative_skiprows(self): + msg = r"\(you passed a negative value\)" + with pytest.raises(ValueError, match=msg): + self.read_html(self.spam_data, "Water", skiprows=-1) + + @tm.network + def test_multiple_matches(self): + url = "https://docs.python.org/2/" + dfs = self.read_html(url, match="Python") + assert len(dfs) > 1 + + @tm.network + def test_python_docs_table(self): + url = "https://docs.python.org/2/" + dfs = self.read_html(url, match="Python") + zz = [df.iloc[0, 0][0:4] for df in dfs] + assert sorted(zz) == sorted(["Repo", "What"]) + + def test_empty_tables(self): + """ + Make sure that read_html ignores empty tables. + """ + html = """ + + + + + + + + + + + + + +
    AB
    12
    + + + +
    + """ + result = self.read_html(html) + assert len(result) == 1 + + def test_multiple_tbody(self): + # GH-20690 + # Read all tbody tags within a single table. + result = self.read_html( + """ + + + + + + + + + + + + + + + + + + +
    AB
    12
    34
    """ + )[0] + + expected = DataFrame(data=[[1, 2], [3, 4]], columns=["A", "B"]) + + tm.assert_frame_equal(result, expected) + + def test_header_and_one_column(self): + """ + Don't fail with bs4 when there is a header and only one column + as described in issue #9178 + """ + result = self.read_html( + """ + + + + + + + + + + +
    Header
    first
    """ + )[0] + + expected = DataFrame(data={"Header": "first"}, index=[0]) + + tm.assert_frame_equal(result, expected) + + def test_thead_without_tr(self): + """ + Ensure parser adds

  • P2g!QR@$A~F0}rHcilI0m5=}qlsm;7RaS%PVx$Xi!X#G<@{%)A& z+4#x0_`B(a4L&qHH@z_M{?BgmmrMGGfp5KX+^%ci z#60Z%H_^?x{;iqC#f}qdVyA2BwEd4027W`$55-B5P}TqR{{-o;VbXu*?{!?WmB;w_ zK|4NfYQ35dA3a0#;DKHI;mD;PU;Rs?mkGzebosmY!A0NBe=_|)S>mpJGXKXroez;wGZKlQ5~y6BiqybHhK z`XBJ%?q9#UG5Fro9xR4lfR0`NJUxqxU4PhA8(l^5w%6!@^bNaeexZ;HX089pIGjhd ze$=!cMf@q+xQQPA65pw=@`=Ou#6fhi70Ii@#%klg|Cf)iFE>5P-m|^RW%!vL_xCLf zz8h~Ux=s|Uyry3IhV7kj&CbOR#VPTD_1}r#gV&4Ad~83Fi{Cp%<<-9`Hhj>=?dgp3 zeg`kbG|xN9H$OJ0`W_ope;7P}`~9vw>dfF4{N&j+zWgFQJN|^Pm~VQ|IqzP9W&P>R z;^H;V{y@C%c|M6#Pc96ePv#klFT{Z9lc&h@>I`* z=wX9h{9)3TCqA+BS3VpjUH9bazutU4diMQeZx$Ep$B2La`)leo)H>*!`77oZim9FB zX}(wFf1Ir!=(Mu9pbxM9l=_{Z<K^Uv<&;Rn(8s%MBEerymwsQxhExt%|7{H$fe zS|1xabcK6QL(krSd$YJ$|5v#+-T;W#DJZOk+dJbWSf8vqPBDq|x(US>Bf-W?`1Pjb zTR(EK?RB8K#KEe%@WTV~dnfu;e&Q*=n;*R}bgR=Y82QknVR(Md5g(nq8F{w->wM{t zUsZp9QIonDSFH9L9gx1kH*}I?^%SQ@M^*o+1HS2K-x&`c_q~JhbXZ9P3|A;;gA7wM+LO&V>OVy-<892CDjB-%;|cA38pD zd)FB^UfKt2MR*{u%T&+m)p3ZwUq9n?{r|OInUD2jlh@z<0QQEBr}Yb~y-wrw&HPQ( zb%It;aaMHLcpCSLrSE}ik#6@rfQKHK`mGMFA0BAWXA-wOtq0xr>t~$ss}YC1aNoCo z8-8=cPv%{_%t&1rf80-X zXDPN6$0+8CsxMY~?Vb4O8@A0$=gGW7@#UCM>c5VY{+`QpS~3p(K@Viy)W*;L)#6%L zs)ucPE{n^O`omV=8$bVv)gKKLzWmb{<{ePJ|7xG=gVa@1zoJ;}<9)N8JmPymueSBA zN8;F__)2tG|A}i}y(xU_llr|P{uI+Vw)Jgl78ml&?-IY-elZcfDh%6Rwc+1x++v1| z@}Bc$`-q44MKJ$HJpVJ7ei>??QXTKaL*KBg<`)V%VQKx>eE9W^MUVN|_$~3nBMy2e z@zjR@pW6NzuL{GU+Yj5~`r&VcL3ggW@b=qdsfXi#_S+yj)CJHNA*}L?ZQxXxZbq>C*gaSZKpp^ox0cq4;_fDfOT6dSwjwrR_iA@jViJ z(8FVXi48ABtG^=8bX70SPwg;z{Gops^?`%K`UgHdch{}j|Nnbn7MIHJUuqgx6ziYt z|3@mHx|@r(LvcTY3JC``@Cgm?aB3UhAmmSzOFtQ`-sBr%+52UT z`@RA{Xu4Xb`N^X$a0&hG`#-!gWzMm0yiNVK{iCax%k3ZRa~gb^O?ge6zF}9*FBIoS zky8J4p7rrEhV?`JK0)Fj@zl0@T3K8y&b;%)@5J^g!cYBO>@ecBM>gMf!r@_LIQqti zd+l?rUH@Fm;zGX`_y13AURoFb+v2GwkFxfCKJyO6H)Ei(|KlVNKCd4`*BLin;!o>? z2bx~9Q_u2L&x2L{@8S=WmcM;uZTj_L;w_K->g4^~pMP6fTsYruSl9cXGp?er+v~JW zem7zMT)6Dtn0F}7i-D^C`{|MgpY6hs`6{n|S|{J~;3=9vUo_w1NuGDYw|QZM@UcPl zho5|Dsri@g-y42$*1pq!z5Oh7Z2!}j#l?<;nrgQ?QLKf>KRJ&yuH%&c{|gxUEDE8_2l%oBt!YX9W=XB~$;#;rnht1yhcao_I``QDn0i;mrYc&bF~ zcc@+NKTlnzr+wqR4f^z(&u`QhiVI?@_1}pSd3=t5Z#wy+dG!}M{g%h`sc8N_;bW)B z{JiVbT~dG8^q0TA;Nb&ig-x&d;Q77VY=Ev^|I@4EfIWKt85`{BtVgW&=V87Fw>op> zHwBqzC@zeSQvX%IKBALf)qnIr8<#p0ga?|g^2|@3lR7-Og#Ivb=I`$Q$o*5oq*e3F zZ~OGa)M?-U_USlaQ|($OiuF%Ee`(*)Pv?gn3i&InQvaQfH%-v`K|L*vJ4NNa@B7c^ zl8=6hR)3F<;}lbSG5ul0d5skg`p~&yWV3JmW&d`T*F3xac}^A=502CG|GD}>##fZ+ zBDR00c}+#-GtW?590Qg8@0C3G-icj8*~;R=e%h}YZ$JO69dFb$&iIPreNX>~E6*wBok(4wxFiOu z`d>Q#nT$*Q=z-qJcxESg_(Axo9=633UGl*yKK^jvI;Wk#!uhRm;MJRrer51^=-Th! zTUlJ_e}0Aj{=HZIpi>)6=EvtI#w|=b>e}6i`>ef1?s`lFA zS||6-M&IJe{6cYQ6shdLzK)m=pC8yh&?OEsZfe74eiqkxSv<9MUZr^AN3ROQxGk@k zwcB4OhH-mNJ?h(gKTI85|G;^F5FP7JZ){h_{CFQt>#A=pJn}(!*rB*A5>4Mp9y~uw zZ1~=ZZAhPHiN7u{r^UbiMCXHkiluR!jAQ=Prk)CaXg2@!_(}KA4?BNlgKJv1%tyzL zf08`A|5YoCOS;|h{7JNZJe2-#Df6PQ*rB*wbkpZm=TQ%$44?4~l}8-(F3F=WRs3A| zX?@tn53|nfSkInw=!;N8kOlh8SS$Y3IJ&OyU```Ef-|6`D&FarpH<^DZ z`hl&vUZ4*zxDq@%ZOC+umvtXHCA@P9Bx+Ki?ET`zz)dis>;>>c0~{|04rG zt-~viIGFme4O>}UjGv25eKt;)etM^Qis)5gW33B*x5?Ve?bKN7vInmne_#9f^B~ok zo*;dr4n<*=*WQVbzG2(Ea*0!4D6WW(QvY?FdjC%Tzx@8|M9(Wyht;R^H9zwv4;xJU z`ep}rB=U~H+q(!WVmwtbm3z_~= zUns7OfvWy<9tyto2|cH}Qw2GWh7VE~nEJ6(gm1`rY5y0~AI6M-?URGI{&HA<*?v2$ za@2$9+VN*D^RRjL#Q*;~$zG2()ulSr zPaMQwh44VO3yv?$&pe&f?ZGAVhcVCJKY!y>pAYMAJ#)&AeaBFr?f?69oH>e3MW?9a ziq&3wCyz}}zqi7!nqMep#Kg+}r{BLYZ&1gjU!YI?*j7((7MGMyyyRCs(^*10>^XDt z?+(0unXt#fuN?l_{QgwyvVN?lpMP6fT-ZLuY9DW=wtfHNB;rB(rWlIvMuLq8 zPci)*QS~~cU!d1~vy*Z0gZ8>op83g3>f(7~9K$aBaP567eCnbddcw?ePnfmvBpi8m z{9nuB!aVa=PW^nel9v}cji|=V`- zasL@J`c|Ya+y19%9pGgB{VC%q3ah^9#k*F;MEij#D2fdDf4n>a-M{ zAaT$qeyc-u;eodOtG@Y_kDlQY`ooyVvjaZx#(80bj~w^YWxm*pp1uFh%Hm@64ASr4 zwJzo%tn!+8eRYX34-j4KP+TM9^$OoRMV%kNtA=lJc#81qzt?>ATOR#VH2-wrW2eY` zyy_XE_dfk$-2XfxBxh=b^xO}`ZJgEl{Ss+-n}?vnb$l_%cz+N+=MO|1W#M<2mZTpyE5`yX`Hj$u2V`H$`YnOBdD zXLVz1Jsw;_e`Afy54-e+i3c>+y!+dYPmP{{o*n;8&*Ea=e@)cSUrzmsVwKm_&31^* z`-#~)-%#8TMN0i=e#+zdGM!w;_D)fGT=xhcv~g7ze>Y6?Y&_~?e8p~h!9BXwQwPrZ zSs41;x#z9F{fg*S{{CwXJ^pNEapC-%)#cQm)MNMmO#EqHC~g$pUe!TQk?VMwm+i+| zoqXb;*ZkDB@ipJ#c@`JbcWS5gFR8yKdSQzn?r_wuPaPGu->G-UWzX&Ls%M|Sn!>X> zR@dvVlm}l}|K#)6I;ulnuD0qk?@;`243xHi&Eq&0e%1C*9Q2xRHhomY588Z_xIJI0 zhwhU4!=}@B>OXF`QDKvvX1{jJ;OEe@{`X~ZDV=}I)vqX4c}-n*|8pmtbbi>OxG54U z`=9RrbJYWrepx@A@XS^o_2CEYeFa_3hwrgL^svD${;>V1AKdtjTZe~Uj`7Y{gVJwZrzW&$#ib>wlZ+g1%jU+me9QQStegJo-`6OMicwcj{7C zC~k=X>%UW;&X3P=@Ol0WtNN)rmPfy;_<85Jr99R1AbQwf7k}8bG3oPD_iu$=Ry%yy zRgY|fo}GVgW^rNvVE@0rHSA{r;Ca6opk@dnZ2nhHdj|N}PFz;>XcZ z+5e%PeA9E{$3_oK{n+%M`yClS&*DP7gihn#kUph6^@rX+eD{U1kIo54Uh&dbe)_Gc zjAPq>pN<1I)y_LntcBaVI=mkF~Rx| zpZS5aBhBs~V13j$dR(`P4Oa1qS7Dmx)B3O#t90;(@wdHm|4ujW5VpGQslm^!eF1uv z=YJ-|^Us5RvDCw3_Aj1Nlv*Erh&^_S{Mc-;37K4{}6 z&zDs3Pk#SP{UGC|w)z>b3d6^){>}au|9Qu-5<{YlRmq}PS?#v-_E}^RbMebHQxVon)Rnw;;e0ZOvbPL{Soyk zhT`rhW%{Yk$}$ptvGHo+_sXv;ul}n}zvU(4n*Vy?W2Z=e7{?GjC;5x(4?Dl}${x=h z{CwE;-2M8Ve`-1O?ej15Qp_*c^UsN2Ms2@;nC;||U4KZOx%hegFwaoTih;`h50gCj zrT(ivanQRYk2Z?a{gb(m1ucQz5H(xHuD0hFBCtCBBlO2(dIlLe4al;t)~vol05jJ)uBA|Pg^wK z;>x#tC;XDVnEo*Kfw}p~_dgoO4qf^)yIr`E*F5|Fw^#MSG4cBI6XAiv`X}4}W~xK| zx!N^B<`;^4qNB3^vpV_K_gwtehuLD+FYV+rj`6e}^S2hww>b5KPWYyW4W_)rA2z)H z&58Y%e;|zA^5DNded%Pcal8JlFN+J?o!=?`{{3{eTToc-z3JmTOI`or*L$F4T++uskvKeo%2Us&tY zjBD5bG_$zSXTPM^KWN{W5964f%Q&I9PsZyN+dD<(*C)31gLsO{tIO#;JMU(B=qZ|i zTBkmK&@&GCRrX@~!`4^q{E>Y=enH5W+IaFO@0yOjeg85&i;I2#%l-Fq^@B-05^cX8 z>gy^WUb8cw`Gw-AF;KPtsq6nZ!?*3C)u~S$^iJZbZO?xziBq4}YkAbKdLCRtf7oKL zGq%~`qVI>TuAedC<>Lo%JYmN_)1)rD{&3?b-n{|eZ{dN$YVZGTURqZ@R~?ribvMNy zil4;?mHi(jJnKhI^+4AdH(ugT>w^b+NxVPVe*G4vmZ@>P43$Hqn z@ofJyD~n6%`k$8iK__;)_UbR~|JBcUY{#LvKT1^gKmGoZzJscp&etb?8@DG8SKdG6 z;`c2@r>K074KAU-G34y8|KH!Qo6;Eao1HIv=ckjX%eMciSzN3i8_Nw4y;HxAD^_`J zdP6$-^qcw7uTVS?i8h|=%;SGF;Pd)3R6cRgCw}Sz;en>tl=02KgnedwL)8fv9ryZD z+b!k7x9(l;vp+fIR_bB<=e`f1)lrkWs8g)+n(^%V^R)h^ZsytUACQQKK((@5IuNc7k^{5gKoZR-#1QbtUlnf&AxcZW$1DL4SV06 zp2fxLL03_t<+XR>qia?<8)v@~8(t2tSQu zTU>RQ)E_3iRUfxnzD(HhCr|Etdb{c4_VD6h+Ui*s^!Wx_1 zedN!3+rK|OIE#z*y`}bmg@3+z&c5sMJQ?b|h1FhDS84mNsb12@P&^z1rT#nNb01^o zRrUFoIOsLsY{pf@58Aw1GOqc_Q#}tZp+6ij_=aoOSbI)5;I6aBe6_iZ^^5+`%HqQJ z7`DA9;|i<1_R6Dg=vU1z6u*cfi`{?KXB_nD7f3v{;j^7tycWx_?|00?zvkZAkP0UaNG%69eEZPwm-4j$Ny$Kd1U=p+xpLZa^Z#Ik@%pp z|LN}^HIF*Hi|OU#$;5Ac*YTMbXxmv+^t|R<9yZ84u)!|=u-DSNPPuEtlf$kry?O0Z zA9>qrp6&npvbfm!AL_}~uP9b~ZFTDS^$8MVexUK3#6$7R_@LB(<}pWjmY<8A_Mg{l zT`uP>KOg8Rn%>k-ef*%+OTN?bQ+qM}VS_;rZ?RX;C&GqXoj16C`yuFe?SD(h1(We5 z`kwb+{oLxEL~Q#auPOde%#IIA{U>j_@T?y-u}iNf<<(E>7k#aWEaHD?gXIQl8mP;~>uqHrT}ghT_+eQ0l+t)xG315AqC^M;!Et-^SJY;DPqM>io>FeDn;L(BBxm%VDP+|Liv# zgCGCQDc9BKQI~!H)tAMk^7u!`6;^v~{m^*5sXA)g{G9liUnm}nj>`V)c_jEYPEGX_ zJ>w;Q<_W?Btv>42eE1$4OnK@@uL>Kh9kSx~4~`aj{kdg9FY4jHPx>uR(t=04#<5=u)Wj#9>tnqD1H+kRQ7+o z@T`wcY!9j)agcawTRpv^V{xsgGLP{<9T$EjCcLo0N8QsaKfgg3`{U&&-!P$l{J&oo z7d{t)`FHgD7p=F{C-iHY?*zGTn^)WR8)`G}P&^S6s&siDfp0p=c*H@*HJdsW@q?z@ zisjhn2YbGp_&taoHrT}Q@x2 zz5S;)eM7%$exZ<`;#Bs3HBq)c>GOZ>8iagcxAbQwf7k@Z-3+l=P%`1|LI#(GDGoHd|>@2u6cFkF)!)`@u%oDA3t@0 z_(Ak4^Hh&GI%aDey($d9-s!xjhhD3{j$620X`c1JmBq!j|6#iSr!R`aYOm9J=$rX# zs_O))FBHFv4(q?h$*WK55` z<I`*OXzQ``pV3F%!hVptTyb;?QiPWzW-G#i;MNWPi^X|sqF;Q zHFblf^AG6sbn1lS|DvO^|NpJ+pZRHh|L*>y7k0RI^_y2%X=K>*&2b06vR2DG>4*LP zq9wNNe^%A+zjE~}iq(Gq^-erseH*pucQQY=<4`;kiKYHK$>(>h)Nk)2xx~E_Bo3y2 zwhIs*Xu2)YBOkumPUFlATM@k~42N#H%`XRTbZ|K2t-EGEmyJiC-ve47R6m!$AL;ds zqyDU7fyaF_(6{|hOA@Is6u*xS8`nwPD{>!i_^A&1#6j{?8$W3AW)>IIcWPJknQ=Wl zMf9pL)Q9}0-}r%Z!=#hH@U6eUIhTAp{-2t~#p+mFe}CuHzqtKhOZns_^P|2{%!$Ox z{wLq#c=SN62R#s8YUAhiZE^Zs%i>~wC$>)!e(LXHhmjLUG#?z;7e@O(UG|BS-r#w( z>z`+5aq)^)N3MQFvEH-)p&rHhn#y;IHN~bNx}kVBI!gUVZ)zvs_7l1IsoyKF{^?FW z;}}nM&F>b?w>Wix$~Qf1Fy$rwFzoTok9zLd>%#D_T=I=wPgvkJUT%R4+`xkxy z3!{&8-!3*BgN|+gy{Zq6jPJiAJvxfQYH$B$YuxTXptkMLYQkiGq4+~|l(v6#hIjH! zrzL*sM-TK);;D^4&Et3gf66P_UHY8zsUz(A%=(+XFmU1hCy%^j#BZOtl=1BNhrWZO zlj}cS_D>hn8!a^K(){#!2*vY}Sn5CX>k}R7HyzFQPU0Ll*YE7)TON9f=AX7`zQvO~ z?}Tsj!Uo}EgX(XrGT_DwHref4jeeWox6`MeXy1RE^PhcLT$qpFKED6uO~#?S#Tz3}v%PdA3{a`L0E zT=OQ|($Oid9~lp8jqHJDne|r%=2QMN0j5qFp*p%+(LlpO*ORT5mdk zt3&z3CF{TV`}g7d{q4IioN-GSk?nKHMYp$qfA!@>sPf%v;2d_$*oP#3mhH@z@;k2Pkmbll-# z$QRaG<>o(*@|tJo-+HsSSU(2n{~tBg4>EpD{H(o`M|C}JvAW4qq|Q+MIVRY6n(r04 zj{$t^d!EI`J3-=L>bE+yZg`-*uT!V$S{^p2I*eo3#UCbK@cnQ8@R)}~W9(W_?C;vY ze>o_N3$J&>{<{6wG_ELCd!5!Q@wN z;r^z^BaJ&j`~Few(*D0Gyimv=Hdgk3s_>Wx;~Ao_y!xj)`Igrse)?#Btsgr@_=b#! z9yZv;AKux2{`_w)nBVA`v&BCeQ|*6!?f0jBS|`|4JMToX+WUL6b$t8$rFM1yUyP}x z{yS0Rbx2>T_CN6IJ4qgV(E6wK;1~9P@%?wunRgBU{!^QT!QWo_!M{CGcqbL``!DN5 zulW6_cmi~5c%VcV_Fw0d+Vz@vh?AH8t}GNUMWT)CG!JZvO}}irPxX~o-=&jpd0Hp+ z;1~9PvH#y+Zo~5@-;jrEcHjGy+g`E$+3&xmN=2N{^8Mrge{Ib6WypNA56px4mEM2T ze&v!8ikIVqs{Yp-!lPfM{=3e&@e)6Ef$%_kT~+0&--FB_8|>l_-+5>9()V3IJzVvJ ztv8*&{50yZ^Uu9mT&y3%-~IW~$Tw`>I*zc~YjnKE>6`g;;Sy)wq4;YI*mzEPI*-!z z&ozyM%0mzIs$;hDs0Tj?KaFEsTy>Y!-{?Q+ioIUCa>GXd2Zn$1;v=p`$NJyQ;==wq z|AT)2p7_b5F3D=|kLX*x6H( zg#K{)1uwt)`i8T@nO7aPV8dmvX1lJu{=vly;AYskF8ilT+BeI~B`uv_C|-$ys_j4J zo1RlWuqk$Zq9FYPJ-h^oSE1!q_4|G5h1$`dI{RC%pBHMUk6HD+74AUaj(-xpJl_6o z=NE1Nm}haJ&)$j68k_HJvEi5BfBzO!tNPD<%;*m~hAyVt^@7Gr{9bL#o7S0Ue(iIr zWBI8Lc8baKggX0z)-vzMXLJG2=GhWffEMv^NDSK`k{!w8``+!tG@ZME5=Rr!+`xiw96w)ZxGg~ zU2x-J^N&NvK7UQm;^M&#^!|If`avgly7uZXo&RYHlkwI zUj5uR1KY5b#f3UU@p`mN{ddAEoqy(e?3K7z9(rKvxBlt#1rOwXhS$CMd46>qbWQJl z`oqcV-FC^npA2E@p~u}e?BqG0D}eVABU+MaD zCk*;VeW92i1Ev0>llGrJfvSrh=o7#7QFY;g)Wz$|{K`kqa0&fk$G+E37_i1;Ve%Sx z?(?_vo<`5k|Ddaw9}*Y)odEO5Di-2wx7bzl3q=u?s`}4)kWxRAI^pp$+y^J9b5g$=4dYE`9S(IBDGKDgF@&mHn?tKK%dK_TSVx6>U77uXh>; zm(U-MUheTT_U^qYeBp=N9JTecchDC*{+y=cfK9a-Pf@((wRhsH*96gLzYj9MP`nd~ zmHkhTKdGnHsgEA$oy1c+t;gc%>iozzzUp=JqgRDt+|0ipI&Am@Ve_#&A9u{PGtlR_ z&91xZlluKu@%yh$=o=`}_UA8jK+eNplb6~-4u7&z|FsW%kHCEXW7~gC=i3yt@w5)_ zG_FWIMVnv3#~&I`Ec=%Sw!A1zo_53S>z>s<{@+LRQ$1|*L1CBI#OWKh?XQ#hg$4f; zMXLIb-|EO^OnV;aPp|m-9u+=lbtpgeW2czrd8gwuF1BKtpW0!QBcA<9^TG$hW-o7i zt=9SL>FIXxj&}Z+W+5TQy>tP=DJl2$_ zeBz)_{MgpdX<1y1uj3Fme~<7i->LmR{pf`gw)y&J-ySzGocf)|fB3^QE@C|U{Y9VZ zgH5#=S5ds}wO2n_C$C31JfDn*9TqGd38nt4UVTH!r~W1GKXoXg2byjYw|cc652A++ zcJVic{B+EA?>uuXQA%swn5C{+$LLdPlbPaXG4kNP4 zHY~C$7ug~pTxH)QVpv4H2q+?AKtMpis3^!91!YkoC?X)>MFFt`_=Y;?b0$-rW^STi z-(UUF&z$W!r_S{BbocahZ~e(--#mZ4DPjAGLnhvO^0Cxo+n+`j2iu2SPCraVHcawM z)Li}7{YLmp^QcKgC>ED|JtCXGQ+cYO^`oJB{JYxy>c_VGTO59hrmxN0-|R`8cj9mJ zK?d(6J1rS!RPsvb=GBi6b2(*IxN8lUtp6iY^f_17hSxya0i_uCME^{eam z4*p=;NA#UAjq~JV9>|KF_`<3`+I7Vfe{^5p4W@T~M_wN{8|C_1@ zoyv+L&FR0^W&c0IFBcE&^e+@kMMG8p>l)9z`24Wlh7UXF(>RsUgXZ7J;z;?B@i$!1 z{%`$C_hxr*-5+-N)qBsLc|jmet&Uvz6w!A=>!b2^^2eUWd8hfQ4_T48Dh%US8*~0=_SbL6xwaPvEAOlXcZgrW z_wjvTsQJaawWQwq2<4&9#=PZFd?XsI|4#iB>l(*=EPqY?lpj0jHQr?PqdxSY@m9sD z9$t7%#!en&Me>BhUSIP1wRam7PP=K`{onp&>-vX%vp8%$ZKyn0ZouK4%8DYjmR|ML zW%u8s9#DC$o?@9uQrUlv!{7Q;licLB>mTHH_T}s0$e;4{2@m`!G9U67!sjG@p!#sc zr)FOL=-=~j+(%Eid$YY}5@*L>J<10zAFqEKWZT=O*jZXXs7I0KfLr~!M5J;kJ{k?B z{=?IdIQnAiM^pJ+yWRW}y;s@tSMjECT8G7_Jjh^$KXrxGt{U;X)3$y)j9Ps0(;r)6 zQ|e*+pXSM@$LnwV@B_1Cpa<(ZKjTYetEX5tGL`!8BmviP;m`NBq0SdONM4iaOO<~v z`Rx65DiencE}%Xfyvo;hJ@h3P4!Cjab)J07S-<%GeX8bdDsum~n#!72r1H``(bbdt zMp_>lVn+_e$D*Rtf6Y_c|4}`4d)LkfZ>l$cjmHnP_p{l~XK_yS9;6Oru!BC_a#Hi| z$JTlv-2TA5zstv#pMN(ii^KLm8!EeY6sf$VUiyYyHNQ|u!KMCdJ~D89iC2A!*2k$5 zfecz5+8^|tFpaak%44#dcixmg4Eo&qGlx9$QW$jUDPR8f-oGN=e*dir58L(pINkqg zs2)^5(YBV}$!q?%YxS$1&o_02LLOXQ)&J7}FOvGm2OpT~t&T3K4?ob>5496-aZcj? zQ+@ZU2VFV%nS~DPcHJjl`02Txzq!BiPfgJ4@t0G*qDbYXS37;P`g8F$S^F4@<)fk0 zf9(V3@k)Jy&j~Mf(DJC=WaY&VO#5$o_4n|A3#boU9sSUdM|YkTwz~L?uD6CgKwWnK zx4tY6JKtOJ{8RC29Y2_y^bP)WetZr>v4Z655eDxP>=W7gk=l{f@4x>15%|-7r*f5F zq7NG$u z)8MoJfAwZ@*!SHU9q#|*#GfVg+VxjzXMM+x917V{Ds6u>j{Atx7mH{9Nk7d`<4x}5 z-;f!=Uy5TMAbiMR2YuLZp|OvR`syFUMvY6)IPHMe{wL=j`m#7|`!iYZe-6K*c(s;Z z^&ov?KJ?q%1rR^4u0=@|c`UWGIG6 zz8;aiQ`CAnj>DD_|Sa2*l;#2cy~cF;S?m&#UWPhxL~%=fy*DZh7W2NzJ^z2XI9p8M@3 zk8}@SXY57S-#P!HCetrP^q}!lr^dq%pCNqsfgSW=^39KQZFb?Pu;qly$1Z=?eeiMp1ND@)KRsC- ztRKW#eMw&1|3P2V_=X^PLLpzURsH9C5r13n;e%g!>sz+-TO59h=sRK6{7mn3UW%Rg z!l##fb>en?FNGua8*}fe>(<~Yoqtxoe9w3R%4PJy))VHVeo{s2d${!fi(EW-|EMz* z!=s_pfB2_M9RA*kTtfA$Unq9`LFNgXKA*S0*`3&l^HuTy^a>On8<@%l}D@Uj16edv)oe8v6u#9Ce&pZ1x& zRr3pl{7`9mop4%QQ)HVTc96cRUwy}Re~ZIU5xsbS$o-enJXNyl;jO~3$)7L#>-#_1 zENrsx`y-b)a&h`%-@nZ)4z?5d@AdxAJ*o$pPcC|v-pMOl|5dhr5~mo7RbxP@|M1A^ zAv?aR;;E}&U2dnCd5xzJX`Jb2#`MOI9)xGW$ow=vaqtCy!Jvh{I4cjKxbNG)JtoJ_ z_5WP=01{{0|3=m}h)0QBA1fMU``*xacthc$W2yf-kGeYIpY{5C$I(CVwj za_s$pA3j6)ocIq^A1-+AiXDFa#`WR62fw@K_;*_0zlSJKZ-Vjk^H$TX4$UiGyY-<*<<<56CmN4G`NgZX-{YNVO52}?=!s+Ap;$c{ zEU(6UMULa}Cq6~&An~b;p7+n}U0EFPn%=4Yzl34@)jNK4{MTLz6aT*UX`3|{_R4G5 zztJbf{3gBsPcw^yx<#7P_nPXRqRzvvKS}h=4-Cb~7+C7R@^BuuvLA^b`w{qqT=kk) zr1Fw}bN+|hZw)=wv#@bJD7k8Fs4E^*v<2!GJ{w0`uRFpcx%Qy!C5--$1{ zGrsiH4jWDlBY(fyFP8c7y2R5LTmPF`9G>gna+N{yC+`RKmY#oI)A*(!b%tWis3`T{ zi62YrcP<)}oyhQ^M+T`2MBfSVH`IJ3IjN(Hcgq*2T{rX}ug=`^<&|r5u58P_###So zW^t7M|DLNJO!_X?*7reuocgPucZobzB_lKG9phl(*!c z3lG~}>=vhd9#kHa2dEF*zVVG+e)aN^VcTPeF1^X57l^m*5A#$0Oh@{ChdxE>rEkcr zm+AaMF(y(}^`GkitRJ~#G(P$;RrK}G2;vV~9qMm-H*bHl!w;&z@gak$pX%WYYo7aq zUw(J`@G$1SQ#ShOT8n$N^ZXHxdqLuCzIoO*td1nFUH^~1CSm3NuN@6l{pWv7RtG$w z6JG3~PxNV>_<{K2XL{;azSPfT>>d0=eW|y{JeBu_$wPns%wkux{(o2X``7aE`0JuH zPgXIgpS~emJoPn%AQbYTs;d6`S>k8?$VImG!Kpk;uzs^3{-AfFH_TP;6T~k?tDm}^ z#-;KA^qkf>@Po_~G`;3QPSLBKIFr$*NL>~Bu=eq9YO-+5gog5&l){KX#CO zCbJ$Wq6h8$#82l@s{cRKhd)1fC4E2s!~S=Vx@Vo|F5GlR_o(~6`1d`Qsd?qI^FO^= z9M=C$2X_6TM9sDR(Y}@XpUf{58%Vw$&Fk8+uJbSTgT5%@SO1yB;}80T*Vc!=dHY)& zdD8eQxyoPlVe@UL%vfyFdLbV&=(!VjJemI3`rnXx*iX-|(*F0T9#lToN3qW7zv}DK z*33JN1Bp`%g&ZhXuK#j));j-IQ$A36=@;k|J#~Tjf%bk-pT^@4pJD1ZK>u*=1usvz zZ$cKn|Ke7+9`)=U@Y(jCc`4fOzwj$&6~n9D_J61|mjIiO@`qx>XejkxuZ|0n;lBzn94KhNTzzMT7? zQ=g)EwU+RJ{Qg2+Aa?jeF<$cZ2$y$?I`5Rn;?<4}KL}4Mqfhq(;GwR{xTHSgL*D_z znuC^lHd50HpI?6Lop#LD*ONOcK=cxuH6o=PxRI=ts6g(`Bug$A9mxLPkoql^^ikP zTH(r&kGS%c94> zzP#|^Z#-IuS3dmee-MB47N>mFWBNYvM@|uc_zdB55l;`cEI!uYN(h zKY6SU^*4QE-u`BXAEd5S9-uy4y7-f84_SLgxcqmg{cOE^Ti?Ijf1qFLDgFN2P`#o^ zbGG+2Ua8#)mae}dzDxa74uzaBtLi_$qgg+4jR&2`*g>D@kqz@K4)fP}u%4NodY$-t zZ~^sUv-1wv{*tlVhRxp{zvlfv9}S;v|FtjF;aUHw57hipRcmQ{{Ql^~#`^>>^9zM+ zs4Dv(Bp!d;F6PRsbzld*6T8Xk$L9b&X!TXasosO|A%h+CVcc#<9J0gU@nPc$cRl#} zw_4Xf?4QL!ePBLOxBu`eig#;?zjvamYu`*?6Fqk39g5AOp{oC>zx4?|>T;1L7mhe*tFbq~{AM`*`;VRa^`pArvFo23G7r{M-}UeZwF|?uJb=5m52W1?Yz_(3b|0Ls{jA9``)+^v7cdnHRDmaa9=3*?9c5r#$vj zICr7<)}8p7*7djhWpP+O<W=Pd9y*~u&VhTW^4_mTQSF)0RA_22hJ zbL;$1F0yTh=!5#zPi^-nkJX|6rdK}X6w^5Ggx~bYAoU=Fst=>~`TKQ$Ipg-Q#_f;K zS^t;!YPa*Bv$8n&-PKn-|3$o7>$}OCr}X^sTs*99l|!*bG*tD!bp35jc|a#}sc(ti z@@jo0|6F*|^~2(n&x6Wi@&NVW3s>K~|50c5g)fYMVbiDk`{1|lUw9Pr`{M<;9j`x! zhkEFn>GLd(bbg@_LuLQ9QS{5Ui%sRL>ZcRC#i{JU-YgEYH$X zw*M~iZ;H%3L$PI47{3!cJYHq|kqxOs{kZN9f6(~UAAKiG<1DY{OXW^{VcaqQp0fLx zTf+Edp1S(H-yT4`^*_(zV13QM9De|KiS@+LsbAWE)!YAn6F=rdouQBeh06Z-Ngn)( zFQNL?PiyzDiZgvaZ-28pv0FamDooY~kDuzp`uG2N)>l5cUReKJ>$rv;D8}O6^^Ob-Ao9^G@}k+IfFN(H$KuAAX8lM`8WYeB`Bm?4Z|p zlgX=y9^~_0TEB=(>LM?B3_JLDFF$DAbBbMl-o5nUwF{N2ciejSH@8nn9PH?lqc1Pb?V>v`qGCS5Z1Xo|9;Qcj>T^M zZ)9=sd93>Xm*t@j&`Esh_m^DtI|wrGP;4C)rT#nh*YkMzm-4C~cF-q!WW!n(hxt># zqUoLZ`4sUVs9ycTuHnFg-dMRAcD;S#^ckZc=JQ;2{@?oFlf_|uBEOOWJMtsl9_>H5$AMe5&Z_qVvDUeou?+u!U-oOj}H^FapjM+Q|N#@)EZ zJo|&%djQKTG42%Su|uUk&BeI{18rVq3}Ali1a- z)A@h;l%jXShn~7X^dReDYDb38ka=hx^zc<-*y0=a|7FvS_1BiUw$}$`aoG2NQ)L(L zzkRKD$bwZ%@5DmiO7&BBP5eT!U3_7mD~%_vbpI2LtLhi?_R3>2c@@!v#@C4Dw9fYt zM_u3o>cbfuUjFGF_MaZkS@@cNPI#ti^W^^*@GIs=#Pgr0TYa_cLxo@oTjE!xIYmLN4`RdAW}V^Rn%J-p+>| z^iJ$1D<6GC58C^xex@f*`QU}eWb7UM!wDZqyS305tsi@qU~CzV67W2CC=|8kPa>c~a5_d(}t_rHBq?M~3*=!>H1``Z1`gI2Hd zc@REiu!BB?U#xw~S2x%tY_-dacYJk;gW$9M&skX=a*=`VWuX9?`ZZUiF5`N54SqCX=_yKhNUeb4fh&GucJA)_w&tNFHQR z_2KHC=a${<>FdLF-=BST-(y3)+U@-3)GQAE_vy#z^T%`5D~eQJdM7&m2ZWryfAl33 zjTlhszt$^kbGBH$7HRi6JJ<)@2w_YbKA*t@X+oP zp57IH>wiP%1#HIT1po`%de)Wq) zh96{}py`|Q_BVSnPw&JZ9+Op1Tos1(r=IiH;7k7=Y6t#&gVld{7xCPG5*f_5kLMpA zPS@W|2Yx1Ny`|q@oOrN*OMRi(IU1_^U(Y2Df7Wk9*KRkzMDJBLOx6SA)B25v{-^N^ zsPA6ujkkCF-TDW2kJ%+WJZR@Ke^s37;f2R!?92~Ykv!pq8-88ebzCz{ zd1LnD`~I+X{~4}-=*i-+epKB5O1xT2t4H&9-2cfXHWa(W7naY79=>`*Wa_s*)!O~A zgTz-MexUJce$(rGyy^|-Qx9J_efdj2_s`EZ!&L_?{qjb4I`Z22pP5-4oKGPRKTy0{ z`}^rU>kZ|h&YH?j5Z+Mi8WpAft6!-Oc$>oG6Tf<&=-~&``b^(5Z-28>KS;h*9-uy~ zdF#k$Ki75doc~{}d*|gV96&#<|NT;ro&Ra5oVTM$t?yftwXVASt-ba83p{FPKQ9!! z#n7t$^EtA9z|)kB-U(s{Q$4c14|#hW^UHAp!3ETZ$y0ul-|))2VcV--TjGrM zd&y_lpHJ01px65klUGr^%1f_y`i7j&4>=UON0L(ib$(n|Ug}3OfBfptM>>1oEl%qy z)iX={@k^2ZSez68Dt#KC%3-IQE_&~yi%$tVK6AkXul)F3TmQNKxmWo?r}tmYRS)X? zMcZ0>C$CHYe{6`J{V3`R#U9a6>c8@E9D+af8uC6&)x0MN;tyKC)Zg^AdHb6kevrCS zd4T$`fA-a%_KkF*Ss!}EYr9SN8fW`IS`YKH=f5ZVOb78MYhCGPEN+@${W3#2ZS>6apU(0CKO)$g=^5I$tEgFfUxJZsf8_8T9z=vjBo+uvUge%t=^ zYaXzna;+UjDld&s{#MjFPR}*IOHp}4@u{dt`)NGOi@)_L*Lcv0Y}=n_)UJAqBd?A!k61`VBMdGY3t+%8np80@@p13Lu z>;L_qd%k}5tgykA`z^ae-<5186stylzR>+}{A0A5mSDw=Ue;dNjc?|O7$9a@={^qCk zU`LOfBKAt8PUZ=sSA;jL^_9~O@SEQe);@2^H;&r!ba?Ijf4|NTY^adDx^P#PV>WutXRc^K8$$q&80S< zc2HRTV>=%D)BPuUjkE9Ho-7XQ$8i1qH&;FAL{67p^|t>>oi*{ad8ix;Iq_lnG#)>$ zBc*=Z?j(HJLFzD>Iuy}^|Ev9(;Sy>$R{kvnl z|JAH?-s06--&2{s(Z_Ut$f0OTz8+!lPLY1~iCpSaQ~jJ`pJ088ApW4$k@TN=;g@0> z=biAI9vP%=WJP#`+xRovowU}pu-bdSU-H;`>-XOSvp8%$Y^c0SeE!|h-l?qoB7I2z z<+fIK9Lo0weYbjw{Uep-B@ft)=GOjCF0%14U-hfYzw*Pq+2Yt=I74N@sC^9Hs<)!hhpgh!{i_E;hP#hQyrT#ndugm43wm!4HFjRi{ zK(Fy8Q%9A5L-N5}ic>y#nWxFv$%Cv2U-#f8u6gx}_eXROzV-U=zw>Gr@pk^9CyT@S zF-h-#0%AbLgGO%$MT=WRSW*^qmlYL(Nx`lRBz+w|ryY#rK$W#*{5*FZSv|Crp_Ik8OW? zWj-9w_?14m{Qs?=F)zcK_@ztZTTOYKV%q;)^r1L7zNqSdUH+ch+Wu%>5&yX@dbCu?{4_oHdlq!5BTKK zyJX|M#_{|$+rBijIQ(+D{b{HS>O8y?JxgSJ{uF+>c(5I(&QKf{4VC@xl{oxu-tdCz z_g}yNHvFKIdaMp)F!f9HVdH0-kA88XH^axT{MUC5dg%^$Dvy6VU4N+jnGWMi*Z+n% z@cz=zP#hi&rT*(Y()*dP-(%>67e3G@dg=o41Nr=<>j(bu8FuKi`Dq>Sba!2M?y#?n z*}QwXRWH4C&L83Nv+aML#bNcVsK>wLQxvcAl05Vc+4vd~!@NTwH;ga!--%y6@wdL$ z#O~D(J}}i=9a=wrpw*YyEl%^mJHLAJbaxHjV(U*|^8N1RuG;g>`yQDMkM+MNi^KXc zdVtqoQJ>aN-{7~tsh#h-V@FYn6##4)7nNR$9?gsv#)uH*(cfvHz@-dGjed9azqQB{X`VrPA+%jLYb-*0jBv7*2DrD*jN zPrg)EeHx$2Vfg3HT>A8T^Dv@$+M^HbG}UVy{kJ~VvN-bB$}NLUX{+j z<)Vk5xTmkqdHb6ke$a`(@gak$U!o6NKeY3`_YQ7^ZJTRsaM@bDUhTI3KO>8ybljP% zUQwhES^vkWzf-I!QeQ4|D2|RVO8s|YuQvp(AMk+chaL2Z-uk8a@dNF1RTZatc#Us> zdh&#G55MHfsrStamtMBl>6i7kp1;HO&&*3PkGFq#Ylh|1`m~s9bLQQ* zy}wHs^1#}gj=BFt`0e+XMivM2%p>6N`>A!AUj4oD87`o{_5bUKXY6_Gi+fO)UH{V~bz47%>F+OE7j=qP zd1-t)0c7G*ITXi563gd=(<|0>p4N|=+QG!H{!_8z4_e-ouW#P|r8v#wLFF-dfco&- z;KPnyd)WuV>4=vpDFZNOQJ7j#q-n1ONTy_!wB#f7orkXWj`~ zziJYJ3^Gp;eJ8}<&z{!6*$1vQ^& zTTA2PegX7}xI_+xTrghMf4*;+m%R_#XY%3q|NQk2Rrg;yKzeA~{eG1j^G4Cmz@K@C;>2jMyc+KnOZOk69|^(-5}(TGnTOfoYi4nn z-iho7ittqUhe?Y*H*T?w9t>L!`%T}(m9~eM-(M{6%q$L*;ZuIGwwBgswU^d^>S{_1 zyrDQLDysSqFV6u(P7%KJenIG~{IOSI8t0wVZ+c|KDjxJ<+*T*O{?@C#VZxV=pYfHa z5B3^I|5-o5{Fn_t7~@%A3^guG26b7#)X#&>EDo!uI62Ct{#U$TmOmFB^4a(Aj-rPj zG(Y7kmhby|8b?zg!qbZ#-*o|Kb;VCl%Q7Z?DL9{mJlo-~Uj&KIHqC`+w5M zn(`)cD83L4)_?3^E^+jW^#VlS39I@)zj5A4J;sNu*oiMZx#FEmE%C7#!qWQwwBKL) z#bWzG+<%~ccimteelgVNU<2 zU9)22!Qt@P_P-~KgZliic|ZSJyjn}P2h?AmtUOLJSEO#0L(v-zmd{1e&TDcXOY28X z?V$2u2dSe9@dNohG7r;JFLi+xdg7|Edqm?q-)Y`nbdQ*s|LhZcw5~tl|JO~OA2>Sx z|7%h@5AiB5@uP3ZRr3qQ7b8ij|4z8kGauv0)t>nM*SA0P86;mStDZWmFsyRz*B1T5 zZ99imU-|pF?+jj({&2jN&M!YppMTM#^95@vYaJrZwfz~^ZYK}h59$lWm!czm_O3nd z|7`n{OCIX*PGssdz0NPS<2S$hu<_+T%fCG1?6Aq@w|;!=<5S>az2SFv(CWywK1GpQ zOYh{h@yTPo?D&qpV@D3fX)&ne|*g>D@EwAp3AAK`@MZZFEdL*gr z{}{~~^xg?#2UES(QPqFut#;xqPV@Yy`mobZ*ZlUFNq2``r%zveuLGZezw-Kj zKjMQ2kUN{V)|u&Gy)jwqxBHJdu~+|1p)q~_w6ziPr zzsjZa|7l!9B0_Oyd{No|4aCp-1P`eBu!G)ZahR-r)Q29l@4bW%fA|dHb1_Y8JKI4Y zj(PErJGVJt>u~gyqhHziZ>{TpKCL``2~N`epPK3wMQZiiWGAmm|NqTJZ}U-}P@EMF zmHi(gan`4X@-!8@+U@4&+Vz&V${$`(<1G#uR348EcF>1)ezD7*U7KAP);o3cd*8hB zZQ||vlm09YJN~7fn(7rrDlfeg9oOL_r|TD=_fVW214{kx5}wla59m{zs(Iz-wDR>2 ziNC#1HHkw`(fUuk)4Ec5fcmgOGXRLmO*EsfntndArXQTN1omZ_t zJ)&nxUrpA!O56XM@~NG9h2orOFn*_gigkHBgY}d5%{SZqN`2FK^j>AdJd4BZ)a5>0 zPu+&$n{deM5SH$0| zoFaA*dn%)6y(!t7Ssdo)D&&ej!&imj%>BbXukJVJ`QzVvdidVE+jX_J|Cwt(#r(VR z`iHZr7Zk77@vg~Qm)(ENiN%9Wk*O~f=SM|l|CJAa@*9#5oYBs2acXD%Fn!;={mo9C z6F(0wpgz?8@t185|K5l&?yP$j{pP#<@Y(tQhUNj6i~Ikp+I(}dvt&LdJ9$-F|7)V> zbI$xiaX~bc`mgh=YaRIWc{S90*g@*5Li|AUOZZZ~SNr_x$rBEKY3~Pne)VoRX6>;T z9JbVS_-y|_&*HFt51I4t-#LfCmQVX4Uae(nr*Em=JBhJ+iVLHnvj5w(`&-|6KNCK9 zEw9>5*81@SSugN2z5082!1>g}7bgDl;gx@R?&@K)M(_L2UOJQcu>I$}4@g{^KTC$$ zwJxs1VLs{nkVA1%WUB1{rjp3&VBQI02dO8O(X;<#_6k1pcUc^rWcaEu9CS|KuWHX- z84kN{-K!V7>RS3^*IzX>kD@*Qu}AfwQ@Kl!rSWOsyb}xh6hkpJI+XfPoFen#eKUW} z@70gzzTpq1dC_;mG>$x`*SzRE@r6;N@A~fDH$NOkzqrBhO@FyIarXS{{wxkV4yfz- zhoyZN z6JMx3^76-OBW?`ix83=t_nf*jeAa*FqnOw9{$sT)4z^dsiMF*w&w6Ne=Hi2$Izw@B zG?e=9#GmKL<8SMKQ|#UeVh2+_venaUk27AKxAD*qC;lE>K>d85|7>X9W;=>BXML~n zO6^(~eY5$(qju&Sim$}L%Kon|yrq7?qlg{!E{RhaJ^9TazNW;PpPNq^o{G3|%kD!a z-0^sS_}*C)KYPjnc3(F;|4`F>iusq~^>0(D*D%Q=QFCp7v~Qj|@NbGI^A3gFFr(Ce z{52kb+b_$7$EzKz>;LZY2W>r2Ui6(XjU$igll-0d!b-RP?X8z~S}F`%Y@s;+*KIGZbHq3d^f@;_9ok`}27-)V%P4 zUU^KWzAFD*@>K2@DW3=7Lk2tOyN7+D_w+OR?(H6a%P$@qebWq^XVvfTE9vo9EsKMB zi`43SD$_UkZM|)ZCw&RUw5X`+KfgoZZ#=p17=N$Gb-A5{ov*Vv`l4uhCvnIr8b9&s zZ+c`f^-J_&+?xM>_M->)hYc6M=j1<+ebuYI^8QOJ$MX+o*mkFpwT-Ix!_GfYkH%Tu zxzL2-()gmP|6IpNU&v?ZgaNzI(*y^P|=r|4z4C zddf|oduDlfIR5f|SsaEF_5S}g)hmkBTEYX;H{`1Mh2k>F*Q5MSk$KM&+4_;I9ZdY{ z-)r}`IQ$e%-<-F<*^@Z$#NXzH4C0Rrsy=LS#`{k!ebe-iuQBPgpI$n{tKHWBMiz&y z@5}4`H)^T}m5(~aI@kB_s_Ks)aShQkzffEr6{YpxY5eqde|sNuwL3-itIN-61K<8A z5A~$T{0zz0rFoIT4*Ib2mQOv~x8X}+wN*c|*aOp^h1ahC=}}&AXuSXDN%np;#LklK zfXSM_wEd|muiA+V#TC&|)qk$zqA#WXD<5{yJIQAOnN4$H1T;nK;A!e{IMtSk;Y{~rdn{gJ5F_i=rY@=#~4azl{%Lh-ezDD|JX z>Fxei{pa_h`qXxR^01zPrcdfY58`j=guf&YP#?w)+3M0!$LQHy-8)e|2cik#e>fg^9#k5(NOBYQ~zE;{E25jk^1?axGuM|8Tj@` z^T3~?@iPzgH$5_#`X%}>Xz-8&m;Ul6!;0r$vGYBfZA?Gy`?o)f!_NOSRIat7NUinV zWGAl%{`{%0$AGH-*MsoU7wR?CeAq#s=&fJcAN)Z3Ts5;eOt1dliQaGl_1&u-y7)aC z+R-9&xbIE-7JSzK8Jee|=;Q-{AM+4vYiV{TI{Jox)%-$nRSYQgU+2etY^)!- z@Ee~K89V3`z15+-_<_ck*ey=^;9XFC2+I^xp8v|ap??2c*Pi$3QSjOCk3Cr&){jl~ z{HNAMUhyg~vC}u?s`-WD8}0d=a1vLS$HUw91ihj9(?`%JdSt_?Ssdn{%+K^r{CtY| zr}_#RzOdy|`@Z|P8@3LUckcVda)+PKJnZ~KBa4IWteyYqQN5x_bGAP<)jP#5LHgzs zJ^8>;^u+*szf=F2(cJ2PL*&x-U;XOeZue*2-f5iHXL66|@k^094Xbzts1NI0bIdXe zANkX;Uf+_Z4B2N8ymtOg>#=@R{Qq659>Di6b>$L4-(6fu>L2FU)Fhpm_T(OR{>>0$EikN)c9ecyq{_CNcxI7;8Y zPW6f+m6u-a^bLOI1v0--d^3`i`mgigxkmWg=dC8WE#LAY*Pm_ITO55+H2rk(M^4e| zckR3jst@N)*zuwx|JENa*yO40_dNC`ue^N!avTOyPdaa@ro5yse*YpK#LjvgifbZC zW&gF$7T=JZe2%;m89SKjkqwo{{Ne3t=d*g9=q+CTOvc_pAJ%_yw@trrLnCbP%IFDO zUGfU?cKu0z7Ds9Quc=;9qgo_Z-_trF2yOI2jN2oJLto9{SOV<XI593Uym?)mta%mQa`%X4^%&{yTBi$E)c!2e8~T=U6O}O^LHZ*}LQ4~6V7(r52P4{{!x`LNxx^+EGt z2Z>K*^weSYJd4BloyyAZk--Jjhx%&2U177~FNN^t&sUp#^|kQY{y)5mcK^>LZ&opk zkNYnXmrDTiW8R^-J{n5>*Zf>Zfj{euq55G5y_0+tJLuu7!m!;rQ+N2?uA{9`&2=-@h8|{Nw?xe(E7#Di2T}4q9}{ zS05PkRQT*&yZ!C0U$m}&*iYxtr`S~F`3qWSsgF+lSW<6Y^~lz*rg$>HP}~?5RsF9g z{V|@J@OUQ(A82{iZnE|bKd{PfdU!zP@yK9>o;+c#C-2$n>pPqt#@@Elu)l8pYxwQ^ zmw74Xzt`Wt;8)C&;Z+aPH~7(;taXM$elRcfU-|eQhx+;cw(X?)VF$^V%9g(;i^J@> z$YysH@_(ogVercj9CUUb!e<`6;TtE}b^W&g+0eXT*SzmPDPFDZy~*0w(*191%Byxh zN1?bW8Y=s*=XI=4PI(gKJkkI2=YO`?{N{anUzisBDF-aF%*c0nzw7(xy!sPdU$_4~ zs#g?gPXFmA=)^)l%&vO+2!`VOF`%;l4dKIoz}ufBpVgst!UuZw2Th;M3%l_paaDTw z?EdGMzw_6(Z~o@>_}lr1gfD+0{{Ch6>A*ir270gYmD?Y+TRp|}$W-dT&YS;H;?L*C zFyS-5v~S3Y_<_ck*wcA=%~z#|uL{GO)34j@3qKkh);i(4y(7Q%C_J|RGc~p&KyM%*3fA%`}ueX-fy}#5A514Nh?|<@B>tC*ZPB2}1)$>0HWcUo# zFBG?AAB@WW_e8VxllRTOf9b3G)%R%kryrJA{Zl=1ifNp8!f$$H#WX&Z!?4fXJnf9- zM~0Q(Ek3`|@r!zmv*WM+ERO&9{PS4lQGR>>=vOZGQ2ZdisOmrWF<>4xZ+Mb^nxE!P zWv_bjnZ1I~{E7PyF&r>r%Gk#yj|vAZv(?+5J@Wzh?fgR{i^JBlVFP>qmqfMphiw1H zDPJ-_`WT8^C0~#F;jhSf4C_;3_bOutQ#~?ynV!Wv~2YHSv^RfQGqyE@I;!Vb15j|+%E6pqp(-Y@J@4*GshrNzIVd6h8+a>J1_mj_l z<*FOuv+GZ0WpUX1&`{YaYF?4bOYcNS->hFX(X-uVexbNM8Y=t0rNrTHeS%Nx!47(t z#Hk3{I3lV3(KQ^;C=#zK)|2wC8 zMUh%duX^gOZ!0!r`}|OMP3)ogVSG{9|Md8a_yjrLs6VH9ouKtM@i%>cyB~Vc>Q_Dw z!iNlY(1*kB{oY@HcuZe7^8WMc-EZ9pzkUDqWN}zOhQ#|H-o*A26tC9tS1Q~6XQ-#4 zyvVu8p|~TyuzXJRR#&I>U;XJ9=o3ABAbueJd=5-cy~<~P3#bnl?sn*@3s33`7u`7e zrtKCwn|dnG|6Cu>KfGgkwT>(q@POPm9@*k+63F~Qac5MN`tQWg&l1F+_rLY|Z1=0n z>1^9ywK%7K^etDpR}jAxt$yO1#-;KA^&3P3`DY|I+`;s4L;KIP!t` zBZD3EVdRlV{rQgFMuat1{>i_WxvTIRXXk&WW^vf}??8`#lf3m!lt=mbT-*Dm{-OAh z7k8Q`A>W9A~&;7(7^obub_A0b`lX^^_@K*7l4@2_}_8Rr_ePQJd*M9cS zefNULj=%b{IPCZL4fXt6u6mGq6vaB%`afFzonk|gIF&P?aPp+^SM zgXlXU{)SF!Daoqu#21DiyY~3iC!ahAxBp7l@?ZBFXXhXCEDp98Rrenwzj(Kn-pOnG z{-xhF(epW{&QSa~8mjuwc~tzpQyx(Hv4i-fGIf`}f2l9wgQtY*SJJyI4$~)mVZu(c z|8?7eqr$|=kB{9UABCTN|K>3~v;M!Q&);;aR}`tW^r~lm^v(3i{NNA8jO>F^+5fep z+4d)S{|sHbJ?*#2^egFy*>mBce$x}Dc|5p)`tIRJee~ax4}PtC_zTMn%Es*qpB;bo zXK|Fye>PPQI+4>Q^>aLmOuv~I7>b`nipu`4AsO(8$I!Lg%`eeg9aa6W2@ih8GoO7X zt_s7XyWZPv%u{)&hby<(@5@cEb=v-aQ|kb`=G=XI#2q`^@s8=$&k3@m{<`)J+16L) zqw@{LPh)7Q|H@n1|Ia1Uf4%<=<<xexEg)s;Fl^$K9 z2|w5S+j`%h#bNs!@GFY7we+etzAeRuY&_K05PK+o7GIS5uYT$NjP*TNekVx3K%eNX z4&}uUG+wRC^y&|v;R5Qzw)u)5JoheacmEDw{==VI_rK-$uNhe!{GZwCsHt92r1Fw^ z(l_L)`Gw*>$=9R&F2SiHTOAFxHx;`S)vtbx$oPX+hx(g7pSQo+;Rl`g8y_;5`X%}> za`tvhF0#msu*RJapLgftli{)L|Ew$y&b#;t^WOgyuh#K{$(p~k{b?vK^{E_+pG&?T zwYzq#tDp5FS3BrL#t!;K&pbi=K>K`lh=Ug%lQqut%roH&!)NU9(8GT}f6nt~k6riF zyB=b@Y}fzvXK~neXiI(mT~qa-@{6|hK9Ik@nfg;lBaw5FL-C9FqO|>SVy|y6Xno2> zw$GDmx0_$0r!EjbkUsdTIMri^$7Jlx7g>=!VcZsn^elSpGhxEdKJ~`J8*fIu^}m+I zK|OiA{rO+*JSlm_tF=V$o#^PB>ETto)l=Lb1FHJ(6My6?tdGp%fFHCvv_I%OVH#(7 zJM_7VH>{SQ`1;O!TpU)r||A*pdsQI&GVCT8*rTQBZKz*TjASx>R zUu*Y=zl7??b2s1z(>l<1!ZgnE>ikW1Ssb1C!pFb(-i+&iJv`(?R=9lM=O@B%$G=4IRIrt+vCcF;Sqn@rvg z{_vOLln-9|W-@m2AcLw8BcI;x&aZd7u*UYE=-KJb@x=4}%YBDIt0T|iV7n_)Yiadp z9{Of_C%)5wvRzFn`f6(ek`pBbjN32S`hurU5l8+(nbI{vcle@_;N^@%*W=vjIvuk8PK_|-&T?*Bv4V7wa7{L<|? zpG!mKFZE4%(8C9!2R(V4SsbRPe(c}^>chvcYE1suotK1suftw_;)X8jvGf1%E9O(; z@o&TCOCL3VmJH+5??Gg*d>ltJzfe3J8A|Q z`AmHI z`5c7ek*Ki#YP<*WpOMAE_QFv8m~UFQ>ETgC-wElDq2?>eNgY+ZVU^Rr`me<{c{hyM z>YKZL{+vDGsW-DY*zSYy40!$5I!gC{>k)qX4sR$PjfSfJm!AKaD-T%FPw&K!Jj@eB z58@A+p7%}pu)||A_747G+-^r4vcur!TKoT>&f+Nbd4;9p1Hdew@{4zC zX?7<%`j+aw+L>P{dRnlubdBQC(p0Vb> zFJBh!c>Cbdn}6|8>agFxW+^|oo&Nu#rg}w@%1iRswQt^u4|+q@hhk=QDD_|K<~W@C zEgn1AFL~;6Ii3ApU~!tygLxK*?-j%^MeoFJd8_0B>chn5Q^(zR{bpg}PM2PF_?@?S zjkD*k^<{Ce-{-mhwnz0~m&&;yOX_6*zcimDE)>6tp_TogF3j+FC$b@Y>Q_HQ?D&J! z1)4sex4+q)*opJVRY*OlUG-s;7ask=Yfs%CHr;QD(;xiJ$?)0puNzq$Y^U=-J^8@~ z=qdZY%f-%;?Ul(+UX|{@r+RppUnqVZ11kGp6F&TjH-uOHxbFe}p!HAtfW8x^ah`n2 zW3uWy@dfwnQ(vEW?)hQ$7q|cR7RR0ozny>R$>Okn40`y3@d)pSVVXaY>ywlx(R(L) z>I}u>QDOPi?m>+&9nZn52w(btZS>3&MBfS1Ix1zYw-a9&J89i-U4Ptf!`M&XIqBkM z7KhK)|K2PPwu@da|3At4)1!9gUlW-n^RxXQm-r`fp?D%X7(f1CZ#3KX+}{7R4)v=a z(e6(_yc0e3@wr8hUy9~W-6;<;nEEC9u4+lJM4+3dm<7a9h?o&TAd#ldlm z=lSos>J>$5o$pEIdQJVQr!j9i6u*gvQvWrM-x25w^%^?i#SZ#Jj|`Ufuhl<2KE(gO zh2iY2dirku(abRI*Qf9Ch?@>C&p)Cr(B?xw6*C?5!DJ_|?D}VwJ=o0RfHxGsjdErG z=lA;aKMB8gg5*IqWIof|{qP5wpT#-xpKra!g?+F6-P0q79~7EH#$Wu0dv1l-wm%K! z2ig8pucCOBm()f7nFoj+ITTMul2ZSjaM!gC{Ox-;7kkQQej0CbR|kLWRdK5KVB+6N zPaPru>^6^l`Slw^cfQc1;a|&%t33bnbAA3e`4qEc&q8~3#Fy~m2S1p`q3?ug9QBx9>qFm(FRc5{&u;qn-baQFzIE`|Yio89XWRdtEDr0( z0H6ONQLXQT`Y`38Kec(wp?JDIpKJHye|*f#<_)hRcF-$NDx+t9X0PBgf913M$n%Mz z(X;7piw?acG=4OE=T)9r6n@VC@cjektp7b(9Mnx5^@w#&|8*YpE%n0=hT@qI<%P%k zluIVt4mpvrgYbK`8`5W;kNG!*hd9&YU&WK?iL1h}$q}FU$CHm=95z4gdq;fwJJ(Z} z?f>^{9Uyh(st2h{QLG=*e|U&fITXK(p{4iL31i3Yzxt)~^@$#t{*sS4(`#KNz4DcK z2B?S6et#eTk9)5;>Pw%6&-yT9?(S`O|s9yP*2+p zkDTvf`{PH&sz9?K58HE16eQdGku4Cdd*|l!M}T@XI@x*x%c+$UU|r^yX^4bh0Mdw zKlf#E*!j;6&!3`RiJEKsuYJRw&X2sIkU#8}`tM*AG@cIq$B+Jj=t1BvpD=(djGLpWzdP7E~$&>FnaY%w_nJi zm=!6i`k$UpBhFCQw{+g*MNeHIdeHb9v7C%}{7rUh*Szzq565pgd6Q+nykz*&GUK;h z|Iya=hu^>8SIpzzzvi5OCZD2IQC@nDqi+_Mi!Yx8<{gUXqoLG)^7Tp_{(No>o!B{F zSl55H;SX9J%4>R!M@}(~^W>vGWUzxi{QSIsPJHoKdxVF^ExYb}7qyPR_RHe1@85ov zx7GWfG*kwYJYsF_57kGhKYnRkP3)ogV|-E7|MY&^)_0AkKOOr<9f~D?_!?3dcKE#$ zy&-%9)Wg@k{6eoC_?KU8)II3vMQ;9E?K=2u`%fKU`W!FKE7i1?R;SwOo9W?eNDTFa z;!jag>c11e((~6;??KH6ABa7bt)6BUhuL$H)6b`Osvjt7Jr%otD#H+l-PT!EL<`;@TOTHfA@~$20<3uj?siuBi?RN8X?Ru-D%D;lo;*`&W%46~X z_2Jb0*If6KAM}P(yRIHPa>qO1xASk*p_t$G{0BFnuYCSpmJH(?F9u{=|EXK~L-AsC zEUo`e;}xy1x#~gq)UWcm4?jitI-%81yyh`IvSKH`;0_(O;$=hb3adYR%a0x!{-D=5 zJO1j);;?=UiNC);$#&6DJ!}_tB-hZs7dPR|1 zOX>#c8?txeYw}c)L-A64Vfhk2@tX6#HszIV$Ec@E^wzJQcG>jmM}G935Pw6bb-;(L z*oiOfbLcw#zd!!`aNxfe?mhg?BjK$){u&oAf16Y_Z>grWG(N7sqRxi+VCQobikG9I zs{fn^WL~x%%M-qSk?Xe#;tyJ0t;h5|^Y%Bp`dhpc|B^gFec0%lzEPV${7RU((sC>8 z|NBeesl5Lf&tJ|pucAnE*2@~N)b0f9ityt{-=- zH)0uCTA$@pK6n>YA6{Pl?XUmq#OuR9M+`so_~&QAXTSgUYaQSuef~vKpLms*)W!Ko zWY$~y0fyqQk)+grC*0h}4}at961!JF>|m--7D33nB+G-_6iKU&HT|a zXJ6YJ_Waa_UmJhz_QcutXGRu>SF}2u>J>#QFPTSO1Cee0r*6eiyc$VL{l~8%Ijj!! zDXJgWec=yU9U6ze6M854Ext-teJ8%K?#V;W+IrtfVeG6_FxEP2Q3eEfy@UP z?4S>O?Q_*#UthHuK6Ut+e#cW@g5SP>`?5GZxW4}XC0D(oNUeUSGWRWq-?kf0Jegl8 z-iV4)|J6^t=j53xuk3xVsjTybkGhdTt0VEpUWI9#ckOviR(%x@`moZF>%VZz4Btoy?;dp4GWVWw{3Fz3=O6kt57e?&!9|Iu4NG#`1rQ@$R-`mTcbgWidr`GTVT-`)Q+Jp7l# zzp>5YS$J^0C#H?MqV@k@&j0soonS-dyd6dQknPXf>ffaZkIM8V6mLcc`s|(hD{>z{ z{H^b`c0cT(*LahuBgt>}hVp?L&-(zX^D77T{I3{1XFQ#22YB!B28AmQINiZ-if}{x1M?XcZ_5H@G25lh2fBQ9y)c%gk8dC zM(wxy1;6a|8fU+MX+74Dwe|dGk|)!_d{Wu||4v*@Jn*N!Q2a9*EFbRdj4Qd`9SrfU!YI)9pA5p@`4&~amZko`Xein zzY4p%&VGORIiLA;ch|Iq-k$l9$zJ2^_^UsQqxAbP`4q*gy!1|V^v&vTh@SVA`aU*>9@8T$rtzs9o_+qzkF0#RtaNjz;IaR| z^eaEuP&sc$k>;`fJF&3e0zdOKxh8(0DB=t2zs7q|{g{Wvcf~L}&gK1JzZZWnjYHoF zt&TKbDr>!+_`>*&54!F08=7IG=f3~w(}T~3-;RHKbY9?az5jWxdXW5zVr{)2b*KLL zrEw~UVs>=2{^Jj-AM@h#O$%724KCHOR-o3}{ z{$dz>%1@q|@VC86{qN7>u@NwLN4B`i(?}XkZd66ry6JHqi{+q)NoAB!}?B9cKKmEMFdgZb6 zZ+R96ea;_?_n&FfXG5L8c(<0`$!q?%j-2)%o>2Td22}N*`&cqB<7rBss(zB!;+*<} z$$G$i;kP*6XX3#5)z>6{*m&f`W4^XMO$8C z=YLzsRr3q-^;`Y>`d(7xdZvOf5#IMrh}zWLOLv5PEq>p4rU z71sOBGH>lLVj6sQ|F>oq2m2R(ls;KMzgq6Gzs|g>b^i0fn7;J=mvjDy z{93jSB_Ib}GKTGd;C5T+FwaeBw z=F<>+n7v4RQPuy_{XdBZo$$~v&?kCiL(QN1(+~4go|2s8FWD>N!nrFgwCAM#?g$qw zviHj4&g&u0j=!2&9Jc*gF+Tt0=mGa%>neT!=E|GY9cC{o`Fhmu+OfWz$kwM^?VuAG zzq;H`#XiRtM_xtK_enf*ik0sNe45{b9rR({o7_!LUwLm>_rdMAd-I{8#8qzp|F54v zGJCP?gHc-lbsl_=&=>n0HDj2aKfw-qC-ziE51PFp{`A}QX+M!cr*UAaR~f#r(N>Fg zpWU}(n7GK^?@hnrP_J>7*B>^NKSkvgYwI|g`tARZ>Sv!HWaSI97mpOB{%c))4yoUG za>?uyzxqz1#~-9F5WRT+4}X7L^NsCCUw;4HVN7%A27_1s67|^grj9E96$Pr-Z}R_Kjz|7p4wqNe3jdumE-d-HpCCil0n_r=^L_bH?I>pssH%R zie@{{R=NK7i{E@-f8N{9Pv1eC7xg&tH$5_#`X&0{ZXEjW)qZeOSZ&9Teg1)Z;ni;U z-)LrWu%6~|_y4UK*1v|>SyD$T*C&VuyY;Opdgd2qFBKJ4{jb;B{f#FVy?27}ftHtk zB#0kq^Gov)r+LU{{PU>~hdp+{(Ab~j z&UU||K70OMYFC+ghuKR@z8=l%6xA=ik88Re^y&{EnCh+0B)|DLG;dST^y&|v;R5Qr z2QR(ep%4GzlJ3FJAKpLm1-;756J(xY_A=42vj5YC2mez4nXmfQ&uaI# zIQ$e%pU>Oh>`9z=;!od99-uzd?tbywU7kH9j6ZRoD<@9hmU!F#z@wP&|H0o+pWm83 zgP9JmeyPm=R;n2f!HfB4LZ{U+`H$!o(=vmgHdxO?-s zIjUlhyIHOxf(8hXgpizsKmr69AS^+aKErv~2_v$vLs(V7VMoIfFrXkvP<9Z4AfP~q z>;hs$6j_9z0uj0Juw51r9u|dbM||U{`qpGlrQ@g&l>QGNDv2(FQ@!9ym`su{)6*-S*{Xhp) z9&wO-v#CQ7KWP1LW^o~({+R7zxbnE88$bN`&reU8G^!bXcFH@SKWs)X{j}?^n3rNM zU;j>p2TB$5>A%)T->jZo>uw4%?@&lXD*K<_e+(VaiJo2mFF)t7{RHE9<&|(|78iIa zTK!IV9@OX2?0)*gT0g$`j)j|JLv7;sZd!Hw+2q^%Z!?RFz28=xf1@s8x7YaX`X48; zbo*kSp^y`7rT#O&hUCLHom_RDqVo9M2Op#^5Pvs>Z^(GmhppI6FN_*;%%^7Uzeia4 zqCIzf@bUKf&;2^*!T!g#f1Mxxv>wTBzaHw;MV-0oHwCFP6iY`(RsV%OSGOjQY+Q5| z(c}5S2GczJ-O#&_9yg75(+k74KdI;DbMFkp$3A)VpeN5I-`;NqU1uEI>S!kMTIyhI7?aKE5ljl0;E_S^^U9W#;JVjx(*H)*->6>@LVO}8hg<@bN zSpVUpvySA$PshO~4tmuwJErYS zexX<{I&3_6igkH=XdnM4ai8$&FJy7S&p7GxX8u{i$4=4mobb?HP=8oI%Z?s!)WERe zQ$toi>zMn|v*VwJjsx=lUp4hBid9~FCqC|5h0T5kWS*f|J|>jaq0k_@J$aJOnHevJo5d8e|Fvbv%~DY|NFcX{x*i~+4ldvSzPS; zpY`?gSFV0VvF2|7jz@yj)%E)4T==0F6dzRfza~8EhvxA-dM9z4$LeCk2dN9h-woj# zGG0ZWyXl1$4%+grAKrRNSn;dN4*hxdidVhL^Pdy+{(CK%m-UJ9bMdqGP9AaJc&oc1 z{?rbId{Jh)nh%fb7@3zR4r(35L3n0UZxuhOuaw7lAbQwf7k`*C>(1E=-MV4;`mb+a zbk#jtUi0kx$KEV1w*7CY=YOaVOvaI{_WJ>SqmOpnNS^A1Vu*~_(s7+4JhiPKxyC__ zdqsYC4Z><|VE$ZfCusE)D@I4D|BTn_)rp4f4Y*Kvf^-ruJ- z=k3w2+P*?DET+;w??egYdmQFrbv2czeIO2!pW0SWOY$wwyc0d+I~@;Hp2t>1uL{Ga zfByWM`DtUrX7$P6y<_$^=-cPtzAP^G`wQwy#t~L~jgD6yeZ#iCQMc-bVt5oO^&eg< zhTEUNYGT`d$f>Qo`tF^4`e5~`ZtBNQG0pRgr#fb9p4kJ0VSUUyZSyOS@$q*<_=ZmOOSbyE>4iP6J)`f+ zd&h>|)?WR(HNWwDbSkfZUPh09avc}c@xhVpRsFAf(Pm!u zK7yX>j2kcUQx^yiwEd&jWq#^aJr6FRKaBa(r^bAF_km&6gZ$t}KiQ-nJN}=R#f9r` z{0P1NzomZAsa;b{>fyS5^lg5*@R?sIR*H^N|5cCkNbtRr{1l0U#LcEYMf{+>o@%0R ze)637(Y1MfOn=zr*^^HC!3W!g{id%r>PJtvum9Xb_45Rm>)-i*$(m>XzmDoqzs`$! zGQUuaip0wPuOT|{OWVKd69>IZ@~ESVPknhsp6YoJJ#4UxKkR(uQS7LyIy$M zpU!`YesJ7u`l^>-qvwCnSIm+n-5%-Lg4<%MFE7_jlw50C2_n3pG>AiVnPSzNrvOL_DykF<>AYrutLoqrAs`}6S z5PZfrWWF;*zkZq^e2}_8^XK#DTO57R^^>nW4{CnTQUi~D<-Z@=wrAig2i|eWa$h3O z`5(qj(cV8BSzMS0`Qn{t|1Z0B``m;ceZvmL7}0Bq0(v0dBfz(D>4ze5ko?rf&-PS` zqwBJ`*m&4r%2PjjRT#d0>$Sh$=z`hdteL-`bjFgqp-=zqd-us%T>Qv8;|DP6HDr86 zVb8Pwk+$0Y$U9*$9(E|?i%Q%6(S<)%@|ch1D-VA+tjbHbhg63;EuXs5I@O=%r*_!r z62JP%y=Tg-=Gk7k|3*t9){oUc`tuicnV!~B+W)Jc*AaDvVzroRzFNm_rI@(u}^RC$;)?t2ffPU|LyvB|3h>b zzuw5=f^Bgpe&!tt`NFW&f1O|H^H)vs*>2HC4`e*ERj-nttFH1&c~0XXde~qWf6wx} z9=6zn>wKnX`E!o@{wGe`*=wGi|LM!(!f^rHD?CtG?KSfv&T*jmbK$0TD8|Ns^*^n@ zDS6h9Tx_;i{N2#TRekuP_8y#TFTS$h=b&(wmlD%CEwz?*u>4R`rc_A{Fr|9 z!a7Ud_U+9_)x)|Et@upi(Fy3<`L~%mjv{&RKw-7l_^F@1nZF@i;yjO`kP%A#*ZFZA zYjxCA2UH$$ka5jc9^>N&?Rib~;G<`Vp3CCWUw_YtFE6ppqzOCqjCk*?DR=zk8uab_ zTc7HIJH+3gj-y|ou-fZ%{L=65YN|`!YKKC8U}60y?u1|Z9D?Du;6nmC=Zrm>(@0p$Hf{vYk$R$9Z{owcmfM37^$p`U!Cm#A{e)^_y z>I#Jftp6JKikU9y_G!A>AN2U%0voL26R*NFkGjmSb>gqm!5`eU%RG3=Yoo%5Z(Kk8 z&6{WOyj%YpGGXRx_rFT~O4|h(C=UP-kv&h)yJ-h9SXTocUAxS9TW4i=TY;$ z6CL7U>SvxHJkXw3>Qi0#=ozNGe)4+;51n-Rw&D9dga7x=0h5m30)4ywq?N_R`muuk z|7Na!P{$Wh{ZF+K9L%S~OZ3>rp zhhn|xsOxV zKh>X=2zQ09T)tVe)Ph% zZ?AIfwX19uZaeq9Ev9TWi~8*Nzop}VUH<<<*zNN`Uh6CU|8g!&#$&#r7#{wr?hWr~dyV4C}wU`cH2>W>6SEb<<}@?t6__UHko4Zx$E( z9%k!!|3mwo0QC#2z4lHX6X(3K`77oZij87oRsT!4ZU^$F4wht1ztl^74pNoW2|259%IOb#f9iC6sBMy4aH=A+0@YDTcW#7@$aTu=~zh~6KPi%Yq zB3tx~nzh0BpMS6qd^`ScW^u9p57he)*VL~lR(p+(cj7A@|0MO&mr!gR6RP^}XNn$t z_FIPNDzAQNC*ShmDVo1Af4;?&Jnw{`&bPn*u*JJ`c01(Q-mvv1JIucK$cxCU?Eli| z#0wB@zPXIgn(fhSCy(m-{kzqfiys~47Yg~K)T;iMet%k19Z>rNudcr=_fB|>V|6Is z{C%A~{GjEjo(EOO?0)*g$xjTO@XGzu!f7|ZaB=P40p#2JZ!L=p{+#b$oceWKvC3=j z#794@{#^Xb3k=1kF;LZieuoC1=g-iIF1-4T`Hy3DX#J))dH#HhE8p^+@Jn_-{b9Xl zzgWBM#QtAC)@Gi%a>-yk`WK4LVxZssZ>UaF z(JA%{)~5^7KhQhzr{i;+0WU@4^E{+F*kH;_{9(k4&wYK;HlxDGi?2O>?-OoD-_F1F zWpOEe{>s&_DAs)X?dnDP>T2)|nGr1RI_DQx-Y^@pAE@K#>qw*Q}%#fAO4?SC5T&jne-OY5=w zZ%_~DB$N4tLJsgN`#-ajZ{yX(Z{yAqyM9?GpK*+*b(p_7f4;@38+5`qJ!~-LCH|h} zmRzX!&9j&4S?<2?-}27lN26os-}+P^Y^a@gqF5iX{p)?(&}Vy3?NHRCRAv7s%ShIb zn%MR{3{YEn^)GkwEf1ce`Q7~a7EkiL@@*d2Abf04{o%@!u3z?pbABDJ8TyC4?wH!% z{~wgah5e!7D)YboP1qmN|Bsh^p(fw4)~z;OZoV_VVdWi z)NOuj#cq0`=fROb-+PTLY;oDHi+y-QU^}woALgSdqs{GeL&pVcL^_eW_#Ha7cfw2U zP{;*trT!~F{XWa`wLWyyd8dAC!#s-%c~!c`cf$Wy?Qs4xH(WINyjR2J`Cb3m>(KV` zC+9zB={Sm3pZ1&QOIUNC7wto8)3;K+$|o-rJu$V^f90jyGvga7k2*odH9Hv>KWKV2 znTh$6JannwY$thDe)M{V9C!LcQ@_1i&k7q1-f_xhH%S$-u^$a5M8(ZdG2_PLtTAEYkO{I&V>El&L)xV}zZrl3HOQl{nCZfN%+@&99}?&D7Vjg>FBDtF)T;j1Cx|k9(@W-09Q0a8YU8IK zi=(G#e$}zOE;e~p7{>kXz{6LWuvJ)powv7JsJSD$cK$Qb&*k(_6CRi)3x2RJn|1s1 zuaktdf1%hq1}gjCkUZwYc44Tx#KCl&)P@K0e84k5&x?-Z!H?-jFPyf}b@Bc8tZ%G$ z;P(5?U_873)zmC5yqeq3e0cN=^r~Yv{Zhma@_O`5(KkPNs^`H9KY3Lc!u1Q?|KNj5 zgr0T(^rPQgJ_S9y{yfpo57*ycC;qHrp&#@OyJ~)+*gh&%_FpH$e0bf^C-jMf-bsO} z4WIEWUK2ibn!lpYrlaF{u!^txVZzIM+;ij$M~Bbve#tvW?a=OjGmDGOuc7ve`u}%M zZPgd6y|#KZUe^hDCmiMlGQUtvhy?m!eAS`P@Gb6QTKoOIDRy1!h7YForGD%by%Sx_ zud>yj=BIWT`^3J-U()lPu*RAHSoD&M+vnf*)^VmLIC}o~-w@VE?EhC%9j8b?6AZ-; zkx=Tt=GFB+9?XO1&rtP^LSPhN_p=ZQSk3D>W@*>z8DKRMhn zYNO4zIH(UkpMO(5Y}?=FS;yjChiUu!^tw&cN#+xZMieOZUwNsH^}QzJQa}2jcM><7 zaTW1{@aez#$x}V!EucSa|Gg;>4mkOSu=A*2{QZ>?OHjWZ|F@)W>j&3AH`Nb1wH3u` zpAVvM=zHa*^9#l2W2%h@Pv^(~_*g$`k>0-kPI^x9KOM_dHB()d)@NsJ=*u*o|wfY-M+eg{*|oun!4y4cGdhsv7?OJ5)I=! zv3VaU^+V^0KSi(k_^}O}SzIhm-%`I5o=*{e;zzFv!}rd8YniQX)~xpa+qVBk78l-! z?fxfC^(%_i&No}@;{La39p&x+3o*57`%n4a$w-Fup(*~l9M0PDD&yPps(k#C^&hiVluNSj*e%v?7JBct(ilNvk z5=>w7;nf?GPrn!s#GfMhRaoNdIPi&Mdna`mQfJ~PuL{Gc7l;30=Oc70nP-2!zv%P3 z1P~n?SH}}}du?&8i@sU?$^6(Kg<|I@V*PibiC$fP9%I`xenTCPIzi&8ZS}OWxKzbU zekXhn>bPd(Pch+zJ*Mq@{$G!MBkcRq$V#_O-YXYj_` zTy*wou4nMJFRi`RaqaUDy#My99=J}t|J6a%2?}d&|Fs`xYh9J+pQ$^Sj8NASSz}vnsY3u`w^(?#f;p5jo^(gf0{AXVl z7rXvvbN&5wQ~jV5o3&S)dg&W>32TxWirwOa%Kon`JnK^~Hv2DhiG$t=&usdlh#!Ps zdcP-6^^CWG{&4tuU*2!rqZ@}~@B7qR8?SK#_1pC)y;)q?&OO(^IrZzfVwKmXr+vfr zPPpm(LLoP-t?a-2JxqK1Z>Ua7QR|_Ppm!2CTY1!jAGGIF_03P7>Y!_JGW zcj_C{-w3-Namw7kft*IZegBweaY5hizn%DnRbG?V|KoqT@mW>>*}qvov_HmYzBQ?Z z^WN}5>H_h1L->YH#wpqA@1_^rZwGJvnS&k)qaJ&1^1_3jWq!8*=~aEOC*J?l!86o( z3%k9hF8loJB$no}y@f(<{AB%vulgLvGB4B7etYE;2U9r^4z1fFnNnr_IYH}_VNF|GA{Lj;%y)2q<-Qcar%Z`HNQ~o6;n(7ccSQL30gmD z>IdnA^6Hm&@+}XZqWPQi=UY6<^G^6SFKiG#HmLrd!B1~^#^1j-uxH3F$2|My?5XJ3 z_g|BLi<`PjMGp^y`=RsAph{$0oCeT4ey7wDbno6We2 z_(6NWLtpdJM-L<~#VS7jF!p!*erM)>Q^J}*Jz(zdFkV2<_Wv`pxRi8T;zviU_S!pn zOq}x%=C5_ec|C+;pBO0hU;EVc_b*O$XA0KO6)e?{o+5FcN9+`tkD(L&lHE^#7`y9F zZXbKxZegvRMt}Lm6JPh5XaB#@Qhl)E{;R_Ji0yxv>M-wo{&py2$5++=x|2Nm!t-XB z#EqButzSw1ji1Xn>GNTEs^>xGg$;J`hb@1+#pQ?psTH<&`=7quZTFyW@4tB#7v3N3 z{xgZ6JW#UQ?-TTmIC|;)LeZ4*TB6|H{IS%heEz)j`x?umFN*lPq18`5^`*A@yXl4H ze|gN1Ig36R1}``2l#?GQ-~YU&^8yFy`L~ApK^;%L?X_2bJw5)Mj&E_* zsmyb6n2fmT^wZBcVaHR)PTP3f#loInU3|SI$1H}Poqua)abZ7Uzkg||A9QL{cTH{e z*X3uW?Rt~A)l=*r6RrQ~p_6|9tA4MDPKs&V`lb3MexAjJdE5I-QV+USnChl>xMtT= z4mfH{7YCcWLdUlMrbMicfqMV*T>T(*DN1x6J?)##o4lqlsVfu*M2Gd?Nt`^cV}Xx9 zMdF}W9kY{q@q^ayhKz51@>CC98~aPg{9p)E`17pC(h3}M)-g0W=S45{9TEA4U8=rWZ z=NUh(v!C^Zm4Ef?3zqra&`cMvdA9%QlYpK7Y^a?l=)_Lf)LT0Lj1G0H9g2gZg!Nx_ z(cyEE^&^)NdH?Y6Rj+5I<=1)92Cr zs*j#wg`d1C3nBUaz$ zzDwVp7)Bm<@cOHay^HbcP1R4)>HTLIS5esQwO7CC$c(* z8^-ewU$fV})+em?`>oAO>*9Zq&7Z4oO_2G8;?U?Q_1_7vuH(U{UPI*(2YuqVIOVT8W7v*o$p4r3|9ORO+Sk<9y6kzu2D|XXge{NX=E&D) zhn=?H^H1Zi*b5!&f1lJ$u*dh0ho++hxScKuJH zm;X|)e@N=jlEvyt`j-0B`GrD$uwL2!^!m?S2Day+s-I4H%sZ_EKS*7m`I)!sd9cDy zUKNI(X^(DxcYbT=Ir!ba4;MWFJ^TJK&*FkVKP#U9oQ2IigjHU{EB*g_O}O-p`a*GJ z3{>`iu;js~KZgHT{m-?&MlxPSpXq~lng@2_hXY4`;ybrIvRyd#*Q3w5bQ!y^fSrG< zsUE1m|FrQ`Us&Zeyt?)co4&IjqMxBSDvFf)??jvHc;TB)E^+S!iG!)%>d^Y(fu`H& z)U!Mt2i*nrhpj$!_n6wW8DWdtAAS6UecJb5YRb5_-=C`X1ik-~Q@dOLl~6`icgpZvfeD6AS7t|ltJ^q~?{`jeD z!n(^f_WSH-rlM=d|MVRks^9-Mb$l?Hk3`#_$N1fl)miJzXP%)rCI+hd&v6cX+wY*~ zI(5_gby!|fFMi4Xh~K{?d7km`V=JOth2g4AH#+mwA2h>_`&@ngJ3Te@?Dt2lEH2iM zrFHvns$Ws8x&5y3NaI=;|65w>KXo^RABtn+0~^n&j$-Nl!HC*l{PucO9(tfp{8oqd4IW5cm3gX1-1I)C zKWuaKg9q>Y)fr*?OMiRaQmaox&p!V)vbeCF=YNX(pKoG=!up8qe|^=_`Bu#@6kn6^ zTFQ5wdFl0Rrc>+W(MQm$j@i_qh#$1)vmv_XCr|azUC{pb3|;;0$G&sZr9CU2cIk7M zz4`}q?EW*oGQMq(13ICk?b#xHU1V&T}<;oJS{)b{a@e*bHA)UvqvrS zTra}*lM9#E5AzPi2{BOF|MdG8>H<|CJIY5mahM)$@uyI#Qq;*&x?*L(em0m@uk}>^)Nr|P@EVOD*L~Q z=)wQD?SC3F4*jt4D*6o1JIw>T@Owt>@cFRuM)jUiuhc%h?Fa4iZ~LfDUxL(yzM`<& zYp;Il;(HKm`kvaMI4O#h`mgigItuFt^)zFeeZD3RdMEMJ#t&LN7e1dW%&&F8H~hH% zn&^iO*S>a-3nrWuHoo!qd(IrPK6>{1m!|OOQ@&xm|K5)9@C&QFhG+L5cEYjOPg88> z7mAakqtt&VJapijPA+lp1c`&GpSnSKV3jU>^b9|4|CJxMduYSOr{8o#*k;}1fBuuB zc0%7if6bEllzw-Ws~=Q7@wV6A$z%G4emXzq9g4{@q169G-(Ej8(YO7O^3Y4SU-K)E zdhmBc_=Zl$M-N-En_k$bnE3QTOHT>=E;jO^m3N$mt{wk0RUcgDpO=4bIX&&4cfw28 z_)F(M8^UBf`WcE-VxZK2r~Fxh%!@qwYYO|O=Wr~2NB-*5r_!HwDEo{PRe zGPpy&|H=}}j6=_k{~J0ExRTy~gYguF)n2Fb?Em#wr$v#f{`+Y%hV?0_!^Tx!eR3zi zD$o4+{P`Al61Q=%tC0HAxcbA&OKvpj}9qigrSnwG_ddHXfu_wU1O z`^Y8E+B+TznjXKu);RtYLveacEcIXW>T8Iu^&=PC>QNqX&?o+MTzDX_YxZyE@6u1k zG3>$*TWz}LfxT;97q^U#1GPUJFa3J^02`L^oMgMP5HtVcWo7Zu;G5Y zKD%W5{!8?KS{4_pW31XvQO6alyr$o!-yh|g-xOrtq4;KWl={#7rgiddJFSV|JH=^& zb@@15KK?;Zk@4BCuv26{hKz?EHrT}s;ZeZ=;!*X@+*u1O~I48<8TUQ7Jmbz)uT1E28?HC|sz^6HNW!UwI6 zRBzh+`K3H3JP)dl+5Pl~4R*Py_|_^Lh4I7JeRAS)?fw4&SzPS;hd#A8iC+M`VXud# z`We3_Hf!(XkzM}=FBiY{LG4hS83U#5-zi_Q)DLtM9>4p558Akzhrb(o^waTDJE^lu zHw^#IpvzAF`zvAijX%El?$KvZmwo@3XK`Ws%8$A4pFhC9-}=+ixKpeNvi43M+4Y}l zGY-!q^@ZYFF~Rh^@GJK}%_x)ofByQ17uS0I*SC(}Exb0p*!#mfXEPr@|CYA@hRloS z0~GH^?0;6#Jhl^b5<3)UMPjM{PV!6FU)59>R6U+g&}+Wgj9bM=PqCDzdg$8r^fCQC zqtBW6Ujsk)LC@&xpPPQ#PhLfb{Xgd&z)~Gre^#+jAALig=NUvd6lX`J%Kq#3nDBXD z;&sG$#KF{WbyW4g5zDaunKK>bqi48){;+s{>ZS&Z4b^k{txZ%zxLPh`x2xs z##0nldF`F}_}<*+jgBJo3&po%LaG0Zmx~^JUVnxOkL&K>gI0&~@pr>C&pWBd{Md@! z^umT;KVpsJ)}9(RZQOZ+TVjCMJUjod{ju$AnSUPsJheF6t)b2%OBT~pU8^$}qS-aE zLvc=gQ0l*vIG-b!myNIaUU|g9)NkWz-S9x0uj-m#`RExgpg$b)t)c5ZI%rHd`yVfT zVcVx#=&}Dv=a;`5&p+7f1b(Iq^D^7XBl1Ht`>(@VH}CV;&wS=~`eXfXW^tiDKU~lMJM}Ay)n0q`o1T|g z>L(6{LT(sV>c94b|IxEP<*Exhv5A8|@iQI>547#B*{KH~Jwu)^Cw%xMR&RCYxjpQy?W%HuYY=)SDqdJH?z3d z_RsYv)Tbz{k2wBZOLd$gb%ClAitk23W&g)WzV)M~dZ6kN2fa)3l$Z85Us0^N{b#%c>+FB7^v+32+4zQ`-z(Bxz4!p62DhFMdLTJxG*2{r}d+=fc~CAo2>WCxd)y-gYP=| z8{gdNQRZXce@)gp!1a^suXKE2mDki?*S>k>+xstd=aL(W^W%fc{!b8|^@Bb&#m{)e zLGL7<+VH8z;?$*B@~fWZ!|$gby|BixF~|Syr_(}h<{PKqxW*%1^X>C*EsG1!OD^p{ z#P&x`iL2bJogZpBbs7?mpf zpML($WnkMsIacQ;+$0Cw@cp5`U_j+M(x+Ywvq#;8|gdA3Qwx)@R;8 z&yIhZSzOq@@(=X?PZPg)k!JgUvD)tkrO&?=^9#jAF}2ix^!r4ad05|b@q4GJy!xGD z!w0R8+Bfr0oj>2=%C~&R!;cMi@rQN8JvUx5d!MlG@SpFxMNj>#vgfOt00cj~`?{hK!3IHrT}_;Kov?X$`^aZEe*<0n z{M(zwh2LFyj(-~JR}^dR_Fv*LY%w;?fu>^lJ*xq%XA>TfK>G;%R_n$#mF{@ZiPmf!y&P12_h2ncrsj~l? z2Os?uiG##b+v;h>VeNZ*`0Qs(c~1DotFpB}RlIP(&8yWG+TpD5#TB01<&gWD@a*`L zd_}wdyq3j<=S8gc`>)wf9@XKi?ZJk`nO`U_kB+MT)35sSlE?UlS|@t&ut9V{{M`_~ zA@k62J+@*uy$}vveudq(9U8W}{m1vL`$7Bq^NCqpyy}3t{Qd73whKe;Z%mqJMV+PV zpOsIX{)OU-m|fL>K1VPw8^0-oqDLI`PU5KzzjFVdJfFr_J#-h;AGX?i&!3!i(wMN# z3eAl#e`6*(w*T*yy6ydUQ$7EXt6x#9^4dG`(Kl@C8{17oc%hgUAC&qJuPHq1qvp{+ z?<9_0|9mIk^3YSnFYN!|@#ktAz4pS~KOPKgEWG28CElKF{ohaL(MoW6J^#`C zudA{?q6?~?U4QjE9mfe;9^)#Se_AIGKgfK%6P_V@AJgBn%-MI&dh40XdIqk3)s@R1 zxg`45|E7)uE)}o;`Gsv44T-b%P9C9yZO5O~S%IOrDhgEge>2f9^&`X=yB{{=n&ru9_zu^ESLAAd5x0f}F2@5FDofc~(>Vqd)B&EEII790NU z*VNFVI@vroqXd;WR#!zQityo<2fYji+6|Kn7zp-9~|u|x5L_@Ju)-U-k8n2T-o zpr^d_eg&2XPZ57NWIl#EPRUk(H@z@&owd(fb@@gZ_3&k@Pn@!cr|yYaT2kJ?@)GyTu#WgXt)PL2jD>5&SZi4XYzl?IWU0a^&&^Pl>n?K*; z@oZY?+xkmnpbaTapC;{ z%=gpt&kc=(%-e~dwO6}7OgvU+O>NC*o}sud21@-`eaV{p{4_Nm9ngs%J#?@^t3!41 zcf&N#>v-nJR_vx1_CN8Gq5t^PVqw2uZhiYNj$Hyh`~J5ti%YlTPsSH^dySv#z_IQ4 zN#~CpiXX~&Ez$5ULG?2)^6dG5uRM+;;Dc6&j-UFmQ%v)$KGiW>>qzrcI}BT`xN6RW ze+nb+9{JhD_xTWgyZ_|mEH3swu+qo;{e{+5Pk%SlP+jWI#SX=fWW1KfT_=jweqXWo zk-U?~JedzRh#$n?4dEL)9hZ4vD|XWhOKp7lq1)|#Ojzot*Y5r2x1UGX`cEIg;qm@U zrqFRQN?d`j$6v3 zE)YFzu#3NE@I9MfegC4@_N*{!z#iisTEc6degD{(#pPe#|E#P!)YF*19f})dyq3mY z!c)KX1D)pldG%ey4?SOGUQ- zSwXjd=B+5K_V&)}2(4ib7uJx^FfB9Y*@|APm{oV>|qHFhm>&@a~=RZfqk6%|?e_9%^7~k~L z^KbAI48?RA4_()(&vkUn%bv%CPaLFqOfc`M_{pB~g{DCa2xb~nW zC$F%%S3SG_WM&qZ(s6f9{fc6>*WSrv>Zfnm>HM%mAvf%_=NBEtx}xHxecuyy zd+nV(rf*hfE`GCBFBCIkpsN3+?EoEcfa=Z^tjp!p{r~*ckUXA;6s=yyal$u0Hkk4f zf5@MFaq=_k9v$-QhOIPy<{Mt)w*S|9=zkthf8IA0+mLZ;S;t~}>F2LpXl!T9GZa6L zfl~ifzpnMbx94BSWjx}b^+$D58y<)bJoA(9VwrY)2hZ||!w1!mUKsMiQNJDj&dXuw z!9RR%sUzP)$FBcrW^u94H%sa7e{=OKiq&3wCyz}p{rz>W@tUC3Q`{UKrT#nRw>tS% z{U^Wj_=kQdGCuWUr%2q8y3oT`O!HGaJoDfC4tn~&-NGN~zDitz3A*HWJH;nnXKKYY;Y zNcE=8pI^$;aXhFxX7|${R+_Q@K@-Pa7gm0MncseP^*89Voqw2|#l?<47xeS5-nYc& zovSW&s~w74WxS-H-bHuD@3^fW>L(5o2U9<`)zi%4V*G~K_PTd!Q!h5Sfd25s_luXF z{oGmM(4X$S-49o>{h6Kr?A39>4fOs$x%xrISCnY`^8kJGP9nD5HpOP%p|~wNO548^ zUVYh4KJOof%0mzIiQoFJ^}+)!-jMOgH^1urtNyTEzRC659(jM5xZetAzkjvuSM2yR z8OQ#AD)CcSmMmU*^bMY&&X0MA;`XRe*?&Ec1mF6e$FvMx;vjzSI(hivfu>tiewv33 z>htKa!7l!A!1O0hU1`ronDRolP8i<){X6&Hn4HDM_CMp~2K3&kUs0@&*#C3>je2s; z*ZDE;P}~s%rT%MPeLcy8&;HU-dBj2Ql053D;^)FopC4@RG!CMN4R-POj2d<1z1J-@ zwrAAg-+kq+@7?S*&%S@}%i>Zx|D!yw7)kS45$8OB)n5~`*}2%EkPG%J`=9>)uAw?j z#Q}o!9rUi#k8R_(k~q4Grlb0n?!HAY!DuZzZ=3gbe-dRY{hPRVc6Z{ ze>3#NmxCMp=vt5d?P^}f^xxk9n^|1!`rD*`-btLg#G2>*x7+#8nq-9HuK2+E&-SBO z@ACWis`+{+`oyUd#1G;JOMcZu7oOP~C$9>F`_;qmF8AV*nJ%UNwd)yo>bE@VP&EGx;bW&r9bWYe(fgSGo}~xfe8IS#F6~)n!~1vm(o>&7 z-@gCq%i?11zgvIw^*`iGR{MCUK3;XGvoU`=6hD>mT1nigT@$1)>GqGm8&W6OjZYlD zA$2h?Y(?^_FpS;(;_F{H;%hRidHOX`Jl6LmRqBy=>3mOD0`( z<#Gq1Z^xf4)dz?C^Y(A|on!rLXgnEDybJRF`?Hu@>c8sqJF-%rYBD1A69>KKn?0Zl zAH9nCp=XGmj?-U%IBeLyo9_CBtHak%KJNA(ZF>>*+xh=IiwoOleqQ|l!Q<28B4N$# zxAM()@`%2nZ`+s7kG_OrW=tscU*{)k?X~ID#D_k5sh#?(;_y<0Z|FM5_1KDOerktZ z9{A!SueY`fJ8!wiRW}a$J^8l%_hxai?;m^g|9_b;Sk)i(SN{J(Q}n4T6hD{oTH^Pv z6M0{SZ?D6q@)&PvvFm^6%(pz{(MR*o5I%N_@V)EQT~L1*v(?^%ce~({u|FZ7m8m*iOT*v8G-surzU=%#EnNEDi9vXew6*J`N`LD7|-VQG5ujHxA@5W z8)L%Gzj^<|&n@y2I=26BW^thoKPF!PZ{HKc7gl*qyzV51T{XW@$QLG6{bvL}P4!%Y z+8^uJ42ffdR!71oUWI9%XZ(uirAh~X7<%+-<3?_A!d%?$y91W~9{N@1AN&yg|Jj<( z8`SZ|+g_XA3d*NW@|xnedWv60N2&i#ccs+>1WDIZR%xy7T3Bg&N!-XRwPkr^o zvEjsJdc*Y7?nJlF_g}qPTx|RAa{NQRlGQ#As=WT0`RIH@@t};?(s7A9wR;7vPxuw_ zcSGs~;iu07eh|JPZ&~dpNubA^KAd?sE$*F z-w>Pmh2o*;C~g1n^G?2ZVjCvB`sXB$K4|?@KK^c)=9R{EG9LbJdcl8V*)7jH{j3m9 z-1p4AUcQw)`~2HfJw^NfmzMfL=3P@iwiCPm@4pYngi`;}*+BG|S7|?}JmR2F{AnNI zfu^hTFn=;HbWKmk>94pUH_=h*zY`wU5y7{mwqYcgZ20)&*EZrCOnDGv;XI~6(0FDVKVPfJSw^^@q4F8-b}IK+xU6H zn+V#)WS%qP(qdxuExmS%1YhUs7h&y_3@J{RI_$L>edFFqJ{l5qs6jr<6 zsa^X29ql{s>(m#D$6{*b_TLa?`2V);-^n;$(Z*vtNaAd_@C_^Y=!J*hesZ&e-{ES^`R3{eo!IHxtKW|Q z)#m-g=ApWwcsxqbH}b){<})w+DG~>Xn>`?l3*+Mld0q4RB)<~Tb5S-kUKf8je~ka@ zL%$gm&K>douJ^ytN1pY+rF!5%{ry){{h(7@QLK61e@Ar2d7hac^A5#tBeAmodfzwr z_B>Lbj!zu)iJ!iM@Id&~XLgr<8c)YVzseuBUa1&<=BE}7Ti>_HMGw3%1)d%MFrMOO z@$1L0^;-XGSzH)Lto9l|_rb%q{HAc3cPQkBNu~Z%N8ZV|?Nt4yuQuQNpbr{Pb@6w@ zG|#&%F6PHp?4}o1{mhByKXTgSu=<5NtTN`C?f)OyOLRGZ1)^u$T|;>3_~LE9ANT+J z-`~g7QvaQfGgGisM@!>Qai(DXyMpjR@5FC)=V~_v;iYJNwg;zqsohV1aEFcE^yr(O z3?rtmzW%d6*#~|5{;N;*!Oivhw}$$`0cz{GV$HMv;r}n-rFm+H;)$4A>c8gI)9X)i z8OdG;%B$>~Q@-+;Z;7wWy@6iF#!v?$fL(insw>|c+vqMk)p)X!G=@GAacK&U0 z78kqzdzhtX!m7npdFub$c=k`4J{Q;pk z^^n=)KIIt4zJKh?;$qjqQCCg<-~h4HHFeb!9@{-}>`=^>@mi|yICDq1@(uoy|m&z3tiO;C!INY;B6P*fR0^%%e)lxyPp5&1rVj< zpR8h`KKh1!IzRdt3b|mXwEb(J>O06lr9S22N1r%|pL)!$>Ob>N;+E%hJWzS2-^EW| zVa>fC{L;7185nB!e&S~fty3f4&i^#CxU`Rh`?vo=2drL*|4$9zNa$Cz5V{ckGD%*Q*iQzQ-&Pi?Dbvgla6CN^>NJN5S$ znP19Re{lcx){~!~@kki?(u}^@VHVq={r+^ajt8!*$3Kalu-a>%#On=-nVEJx0$OGc6pv>R#gy zd1^Ow?D~g1iwoOleoegp_947q8fv}5Zm+$Q$Mg-``jtzZ`a<#F(NXHZ>etuqx`r86F;_Lp2fxZYO_6>pXbF1--8u?@~SXwf5&Za+%nXK?T^oYG~(Jp^u@OST*p!D zdi`@v_^iE?NA~+)we5LpNSyjY@oaQd^`Gyx;9K8w(eX}knqd7@LHHo^1kIn%pKo#c z3_9VXw z_mAe!HNPRq{6Zl=7%KJO36J|~!#BMFs;3CAE?=i*Uh^$aZTz-9Ov~Z|FGb@!;dQmC zE9I*{OgMk_?|*B?)uAzF?OhIiul@fo`^;ZI=lb)8#udey+y9(L8h3*B`B!c0TP|_t z8H(QMDD|ItBz)^zGp1RespJ3i=ikPR+I-7ezEN1?zuujbUC_S& zoN?N3{xIauSHrot-Lm#Mzg&zwJN`#c_4D}mcb~WWq)<;*v6x=cw{%|Ep?D!GmHMB| z557H*O(}qSY}^&a4<9t1>f-N)X&&R4-^qCRyXl2h7asrV2X1YK)#lv){vQu-fB(q$ zzkR9)Hq_1&bYiD#>ay>Dm1lj^`D2IT#VAqgf0D<%&@Z9Znf_k8Di41*ER925%mZ7o zn_kE#)Ry|)!9zpXYtd=jFKz#yX6JwCw_^TSJpOOeFT+|k-^FVkrSD&Ip_5OYp?E0< zs`}6GnBbFd#|JtdanL7zY(uTz_{@7i7MIGtyCjYcl8+6lKYZ@Wk-uH|f?I<>asLAk zI;;Kv<9lauvGZkxN z8&rQ7d&{_I_a3x(SmP75wO4uVcy#Rhmzh~yO80$is9#a6x&5y3NaH$=?f=y-ZU34V zidSM{W&c~EYyHqX#-)DpKkohas2=?kO`mx<$uK{*Vw#`YVe1o4xa6BRycD+kpmnqR zYWw~(6IHj_iMIbw{KEQ(4#lgHNdLT(Jdo!R9oz2nP9AZP{M5$J_F!?I z*F-n1*YeZn!)0+nuL{GrmYTWzPBV53ckgxO#{W2A7W#Johg@`Qo=e61KOEtm`avCE zyzRBuIDNym?KRKhLVcllE!vg+_nmzD#(vDZ&N#ZMAKS3inP>CWd6`byXKc`k4t+FC z{Wal*(Vu+rh!NLs7gim$_pmQqH;lg6`Jbt(2d){ve|Z5OD6IC{J9*6b^)1!L-{_1p z-%z|B1Eu~CQ2u1WQa^};%BvsJ$*;;w{n#loA44blCA**g(DU>aYrl8+@57ddJbdu> z%Z;O-cKkV0^%bedwzsCnnRiWW*51h@yZ#McL;O~EE_NvX6dzRepW`U2L&u|T^oWDr ziN4v?p@<)3dtjdCCr|azWn8m~cj5OeyVU@OVUVA5xxo*MgY=}R# zL-FSrsOmrGHJOh+kGbkOL3nk!oGA0^w>+(v@y$O&^08BdkDeiVI!=H6Vca2OSDZ1h z7REjJ{69APY%lutW)_#y=dTI+{Y$QKMX}n)`>Ac`KdG~(`PdDyL-A&OQ0l*vIPc^1 zrKc)IptAq}fBXH{=uJ<0X3`_8^^7_HM@wydNc;Xv z`$~WKzR7UAYWIXCB`Be{H<9|$l*#E*$-!bK& zvEeH(41IE+Efzz^-hUfeT&Tk@w@Lf}X5;F3!fLNAuJfaB=o_j&^9#j0QKZy=C)&J^ zz_+@Z5=WOf=$*t<8$Pcii!*Pn-~4K$XSjg=u*uh-*zTakUD$N?{cDEXUuS!=>p%OX zZhIeYs9ozsvHJ1;>ogCY0iAXz-i?k@|CPt@h)exIN0B&aeNK4z;ek9KX&l=-ts6ua z8|>l_qX&NK?YFm{5=P&;O>3j#B(Hh4|CyS_#om9{)$^Y<^(%_iUR#|yK7BKPuK5~g zzM*(821@;R!sl~@^(m5kFXZU1>i0=oz915A5Ra8T`O`E8lhaoSq?1 zzU{BQ;?L;W=ik08F4q5s+D=jR#VW786CZuE`q61hoOy@hFVRuzzmAjM&&NB-H&h;a zAo0|;dZvnw#dEQVo8PH_K~d}Tnjcm=^eZplxy!Jy(%)~H{?$=4@Z0&fzAP@b|Jh#e zzuHtkh%V?ft~P!1`18&<`@K;7H3q8s&vAhD10B%C^bB5I{?5m)m$N+TR5bro$;VES z=fjZk(8C71_`{f!4tV1B$$NxVw;ZwjW_MhJZsqy6A@TFqLDrv^)XSRvh}k-ReUy06 zhnL!+cs~+N-)Wv=eG9Sai|OQ@JmR2t5;vQ^DB=fAH;G%ls)z2!^)pUbXS0J_iyr-S z829eImwe^a!_lkke=S}Bu_N0nD6?y?(E;h3)tL*|Y$tXoK8O!0`>*#grhe;FF8SUG z5(jOZr0@9Qfwuoo^x!j&A-c-%uRm>ay#v@+>aa@1^zr&*&=(Yd**S8ZY%fmkj#G{6bO0KvnoG zw|a^>GG0sk-YK#j^@o_aK&%aOE@j)^V@`XLm{-58U!beZ_nRh5YjHy-qXFGt;cqN=B71SpQ z!Ut2n`Q7~aY25Oa@2imcrg8O$bzVAn^>O>n{r=?(7w22}vFOj%1^lQ{YHoyCR^S{zgr^n^{`Jbq9Z{>d92f9=qmy6pJB zSLXw6tl$6U>IdnMqD1G}|JRi76dQ`&chQ?s~` zXMUIX)%N{F^r|pye$86%+&unj8D*aR^&aBkcYz=}R-g8de9+=fb+m6@?KF;Vn6q$9 zu>QkW?0WxO<)H`q#E)&*%;J*rZNKXz?msSSe@c1k4@Z13e2Ir=?iG$2y~#o|7C8i- zoqx!)xUj#l>kn({R}`zfrq6tD?wxRKzugdlxRT!pjyx-N&ZTUes|L=SI)BbBJ`F8x%%Hm@E7#Tl*O|`m`{*y0R z^Sqw4Z&ttd9UbZma~AC!&xw*x5I*^a2`~ML;J^Qd|Aq^uoDEPd)L< zHyUBu8Q0dHNAf%?>a8a{+rHFKong*@#cUgoJTQ;p zuE#&TUisZI`XF_I_`4x|L+V=4{y(_F6)#-%=yl=YnJZfBP z8Ri)mtm?nUE3ZGSWpQENVa_L_UFtvc>+R%IZwZxGzqymoI4R%!ljqO3xRbc$V^<+{ zr*ZX%=F3A@s*gG(Ol)rci#6}v4c)5m-}7_z|KD3$2Z+AvN_3v%pN*8C@E8vab3Pe~ z)_>LWNgn4xZNF2CY4*CHUscF>U==U*!#7OovO3kD=BIWzZ-qUc`|k05QvE#pYkvPb zQzG4de?dK|j(6g*_h0(i5I_B+t}th@7_k0pzE`a0SzO?ICw7X&LE@=R-S|Q3O5@mu zjL*EV!3sa)gf;&*{_%5ny&;U7^uV4I{#ZlTj(>WyxbS^my5IIr;^`W{>3Lz8->FS~ zHL=5-PsInN{=@4N9`&=IH*^x``_lUDoqWqfPtp97=g+rzlINZ9nWx$P^oOy#{^a(t z$L$tY|KRO2mi_5$^6mY%q59y`@%sPgZTqN6oHccr?c@>v!$REprMk>7%vn4pRQ7+C z=)t$`p(TFPS6=<9PQK+0=;WJUb+A)3eezwWj>iU5Ug8hSUNQB;9WOjFEI;JX<>kiSb<~>Z*!M5wE9UFP^KbW6 zZvU#E>EhL{E23|GYY36~g*i(^N2&ihetl>sKdl3O^g!>DykuVRz^eMtN6!#F9jCwk zu>a8US8ug-2nQYUt+Pfv|9kT6`jciB7yP;0{=Z@M>A1pbudPn{)*~_I2g1V+bC!(6 zQvaRga~~V{|Mvc;eNAnxi+Ol#unRxj{Ke;MyC2}fPk#2=%a2cr(gcG_yHgt zC#zUYuj>3i_2nWE=6ohTsO*1w|D~Gb+4G=!JU^g!!ZVw3tN0DoZC2ztjf3c6gI)YR zBS)Nm_SyHm(KGTd51+Hj^!Dek15|%O0MGv;bMkn14QeJ)cPQK;QM@9U?{tur2AMy1Q zFWu+-JHyDqqjz3()19c_?*BPUDk}Z|LR0;qQ(IB2d7jU7{{z1%Oy(cvEEOH4{%fE3 z+za3KJ2lba`Gi-O+bP<0FqWr!@XbG?vmX2)eD8ET5ItVJip>K# z+^DJ4f99iT{b*=?O+}4+#rg{3hYvDO5Pvs>Z^(EReeR|g22TFk9Um@sQ&@J=DUQQ zxBlt)@Ic;=jHkLKyNe&aDhwMgy2!atocy=2{-krBSa{{5snfpy?aSiA`4rFZPh08- zbsqFXqVw$kb-!WtH#DC-wZoicWxSTgUBa6sHuGUTL-@-3*#AGJa4-RV5U+kW5mPCWI^1<~hu1DSuAvwS30?SJsYxAAk; zcZ&M_ecb#1P#^j!5;tT#oxjIcO!HGa4Bm6wZH^myVOU}N!`}Mra^1K8itDe^dNN(S z)=~QZ7xlBBrLHhX4piwg`Cxt9EH3bQ{tUGb#6j}Sre9t7%&U~=qz-i9nN6H=u>T+C z&O2a^s_Op((gk^d0HI%!5JC*FloXIMcexLQnx#jiEJbRB6_8#YkR}SmMiGS|f=DwW z(v{Gw5)l!75rY&X^-+o-6a78jb3SL;I~g{KzW(0($9(s+?>YC(?Ck99%&a1Oy**EC zyZYfLtk=8L&W)pQS?!PT*!j2qEDqcMH&m{5qDbwfck)zlN;6?`2mfCq$M{rDXZ zf6(&wBz%dCKbXdO@@bsOswb`rdzZO<@QdHS`K8`vu087x|I#V&+xdt7EDqcLeLVhg z?6*Pj8u;9;@k-;guG0N?jZQl}p^y{&rT)W{oX7S~c|i3`=ZhbDWai8D9cEAJGrjU# zT$0y#yXfHy>pb(Zm9AgAKdgK4O3&@P^@GH7{T0VSAaVGCSu)UrcKqYSk$&4ebiSdG z2L_k=uW=GRu%4$VPc1Lp(DnI2iO8(%o=fQ66v+%dz#!N-sP z)f4Y-2aj$4Q?fX0zU%7$ujQ%-oyh6Z`1E%x=RTzAh; zuUa~+KmCYtFaL56c-Y_Ac0Wbt;ny7a_o;zPAlMEKlRCt^{XMMm`ZxN}kcdzSk@Yv> zt*85cjkE9P>YvWrCwgS&JJbr9P~>$i)jif9Y4ls_&kA-WJ=;51$=>&dB1h z^KV@LP*c64NbRL}@|yY4H`CL%9`#c>6st!?RsT5-pufiFln1_kk?Xe!;tzT!dc$1h zK0*9aWIl#Y3B@ zm%Lu%+5e~e`O@_dxkRS(5$%WiFCVL!ceD3r{~ZeXqG0_`{Z(dOXM6I}c=E)s z<;jPB>6pKh{e7pXng4$y#se`)@8DZCSsd6+ z-=&|%>pJ60`kL57?TSqg+Thh`Vf|fiz4x<+{)~Kf|F70%{TLbF0Gp+J!q#4zUF)sq zVl#b1Wwq1CP^=jZmd~jlI7MXZ2YL`b^{aoe)8FFoQ#5@(dw;Vhao&kPeKUCu^$(`*yXQX@>j2XoHw*S zw6Zw-2z~yN`gz4@{LlhDXwRRg&Rm?CAN!$D$c=iI*QuXkUGF2He&P(3A3NwZ-el^i z^3St4cwd&{l+T0kA%k7?VZ+03_|0o~_JuJI6!$icTm(M*{B34&aNO;Oz4NaNpx?z0 z6tDIYJxJfuey8&bglZf4NM> z&UaV#U;WbgTVAa@wO7e$oOd!;(<3WZ@t_YAYDY{z>5Q90W8)_udwAM(uW=mzbKVUk z4nMOcLh zb^6fi|rlOzVLx>opItYuW@$# z)5zjr|KOL7r#}Z--*uj8T@v-7;~&2N>U=F;`KdD$@~W!;rQ`ox>jRz0@PSsJ+D+Da z@B?kVCU%R12V`EzU>AK@`|X2%QD6Sausy)sv@psauYoFy@f0 zdzW74XVVtkY;Ab#`rB3(hxKDk{rxM+FJA4XPwYH54}R;H&JQ^hGI7g?9lq(3gZk4v z=)0lSq5SxBoQIsEQ z#;0s28k>J`{3;k@Y|IZ;BuD@YqvqW^vSa7Q`Q9o?xm+P7!}YC;TN@^=W)6 zharFX+J`1l+3#P|m0yu~TOUoeYaDfm_9OQHxyCgG;SGiSP*BzX zdU8C;`Zsi)eDL~2Pn{rspz$?2`S6F&ut)q%b}Dz#hdu85`_zZe9Ut~t=#hn&{Ml{9 z+y1AiJm50A{pYGz6si6GNoCvqsi&s#$WG)?Y!F|R`mc8J8pzXExuJTn zCUUy;PBc6>(&|_LR1U?4F`%ly^)bRnf9$xdDSG&@gWicfmGNhOX3xbRyXkvW?-HC# zJ$zx?1Mj}$vHCe-hZCP1I{oe`yzlJzzhCo!4V7!1DAGr4|N6g0-f0{>Ju2sdp^zH| ztpC^*Oa0e4ju)9%irxu7din~Y2k8Ur*Ywn*eDK0!vf7EO!Z2a89oKq&qg^r`#IyfT z5gyB@^$?#Te)J8wYJQ=R4cz+ggwf(!B3m7~+CloGe*BJ&KWKdFpX!lQOyj)M{NzPe zOyg5IxC8&W;ZK%1G7Q;h=8yXxn*@K=_pkiyc>T##>yOUUCwVg+= zg+ksa#_u}gxQ_{cj~z_oyh6ZtKNQpq@IR&l8^U!D8@xYssGA@U#TDPf=%JC z>z&Q`gO*qQ(Raf%&Qlk8krlh~g%PtJS@WgUCx(%Cz5MN=w_XT;{h%xk<_qROiO=7d z#^;q`l2@W6!HfPrT#O&8Ce|6i~YV~;y2g-KhoF9Pae?fC65z-(<6ha zU!o5~Up%_!6NeUI*xg4Ce`%}1UhQ`L*`LK>=bty#>mORGR}^WW-!;|uC^|vT+gP1> zr=9tQV&iBi_1}pD@tq!ddKhV}^Rh;VKHNLsjhkFK{IO3bz^o9F3 zS@Nb~*FFP}-T#vC+4Vp4S20TlJYM7Jo9S~2z|OowAs3jG`mcGojuij@wf@(%KFw?S zRHkmjdDQo=y6w-NyZ({Yd)?OG9CXCQ_Vxe!X#V~L8!Fcl)cm4tFX=aZL$H)rKWw`F)QfNW^W$Oj z=YMm+p)Y(79`3*9_Yu(Q$YWaj9Ymz|{@!HGU)R2QCl)?W$cG$?O`}6q|It^~VSFvg zGq?9YO4LVOe@H(;ClQtxS&=%cFbv)7#iy^>_r);m)xUiF&-ER=#@X>tBa4In&u{7r#b(h!pS@E*oiD%p;?MiWQ2p_%KcVrew>bKzX!@z* zkDMa&^U7xk-(2d$IvYPUYl(%93+t}-)-HP=u>QFam?-Ur$l|k_V2{Q>PN0X>a4;rW~&EB|Ed11u*r;1 z9dP2c-}cI5`=9-@IQSk658JIF{jP~WOYh_r=Pi*98)Any6nRvX`p@UV105EBF5mu~TO#>QPbEfAa7=UXzpg z!-pR-n8u;+hH0F4Qoreu6}$0;^_Cy>!H{ik47KqKzwzK5)eeNjyAa#N0yCMFDhx#yf|NeYZ_Pen8?rYDQc;-{|(T+c-WO3MiklX0?55J;#m6u-a z^v&vTh_A`H$f1x6OiKON`TfW5f6@B9qV+$|;-Eg$r+r2SmB)i!^dWzH=c`V+c2ua} za@0m|L$L)8eTw7kwzUiZ3erum0ALhVnHP zonT%4>JN#GKS*7m>8H%z-|W=u#Lt8Cs1NS*Pffh*)3=4yzrO1&?>zcf>fv{z%Hz+P z=2sM{y}wW8I{wt#=!|E6p^y{ymHn5?)9rU{+fOu=uSXC*(7R4OGV^77Fn{Vx_{?AV zN<4F^54Bql8MDOv<3sI^`5M1_;br(Z|IdCOEY+uVW;%#ZW&8gHCl>3Q&JQ^hTStdd z|22;1*jOF8@_|Xe%unM@CU2F0!pDAjZu?KZDh&I5cCn|AZ5AJ|4~{(Lo9pco z!i5KY;#bd1gs=4bi}K{_B*#Ch&ngD>rhQ{RDTZRZ$W-dTlLYJsh-1HH$Dho%CjMQY zJFWikbwlQ3NL}PbR_w+XR(tiw6EB-}c5ufoeCl6zEZ_gzl6hD^R+Jk6ct03wKJjiZ zsk3zbRW3f*>1!yqj|R)<62IvpGcS)lMfIy+*6D9?^h?q7t=apVJ&E(=MUM=2(TB0~ zJ-lB3rh~#pKU;XhPqzL$e0Kb=_0W&}h4}x!&mbF;re%5M#|HUtf3&jo+ z-%=i@h+k7=>qoA3F!4+G>lTNfB6?x_i0{8ioOcqzJWWRIez~z~^m6ef0_Ik3ET}zEFsxtyk9>$N$LVZ|}dn(+@l7 zo!CvL4n_1J{>;bp#3^6uXEOFK{$Yp1&iuusMK=jMeDdVmXWoAj+wI(*|I8`|_0u{^d~u4ugZBPV&yUZvMK@`R}u z4S6TrIXK*Q`g=e6{a)?suRfE-VSQ++%=0&>Pf@Jx?cZc4ujrdsnfWq5tEbp823TJF zbw2fVqq*JxhRE=+e&GYX@}x5U#G4&n&1-s<;WMnz6IX>{yO-Dg>4TSlC2T+9qDjLq zc^E$Kf2{oeOMQx&4&rTIS{HpYJMn7gI3^T3MW)LB50*sCr?mfZop$p}^l5$gfyS$K znx1-<&x7-*4?ADKsa^# z=Z2p2?BibJ?e~`?kNy8wp2a~wyc0QHqOT7T3$oPk(3Z<_Gc zFBDAs>YeBfbCsKdsXwxJVz<1=RY*OlUG-tYeaEiwz?CnCogRL0$L)snd-b#PpUo@| zw$J?Bc>7n*{*wNS*TDWi=ary$g4~CTZ1p$9&iq2LYgAPAzpi!TPhEyi`0%Sg*2!;i z^d;q+I(z?8oaV84z}HQm*u$RRzWk#3Up^r0b;>QrAN1&JUhTI1P_Y!irvJorGEG;@;$=(QByxq{jh^R(WiaG4`g2WnO^IK&u|{~ zy@Qunchm;!&eyyAdwcx)okLF~AJ?1Me!oA9!}i-wWkr$NORsw4lgp~w-wzt%i=Fv~ zV)tk$ZT~tyu4A`;?o&Ov@XZK%ARQq_8zFD2Q@-_vTUnt}U+fx6X`p@X}w?5@+ zcZxFv>pu{LAGA7>^~F5#OEHb}PWVla40h}PsHa;CFVlZq7Ed!C*ihK;`O{>y*!UGJoV%Ip8>TTT4<+!N^|`kx;Ez>~~B6njLfQvaP~ z;CD>=W&KtjuYTCURG-G-2eLg>#%Uhx#y6Mxu-o)I7eD8FePQ=-#c?Ztz5$dLhtHLm0!j(@Swb@-^*Qrw%`po`M!EeVu{aGCLIlNtb{#g6}XLGfaUs1FJ?*}KZ zI1dlM^^LfO_=RH6_@c7^dL29S@lN6ll@B|JJ(bb3J(TP<;VS?l8WN9F;h#}}cH#8v$_-ln`_ z|7WQAs`yP$zY>4!RcPA}@lNYVWz|>lpbxuz|H0E|o;)h-^qJK*c=hh~{YU#LZ!^JV zACq&DL$P;!QPqF;W7O}R#2Hrf6MHF+zV!)@*>jP- z6Er>ks>g2gNc6;2Vc2ue8?Jo&z*gA%q)py??|akWv+GY9SsZNtep!9~K$1ti%1f_) z^bI*(Z`2nGx$&v8|MGag_WnQ5;;7<-m%Jv^M@96Y@!{t*Pb&YL`Y`T@Ew)+u$VbDN z7iO$^)E^tXk1Nl=HMEWtwJx!?kAv7Q?EY)&ttpSjG2c)$qoLG)_2W8T>k~Yn@?!^m zqNgqpKM;SmJJVCI@|oW}>U&o>apU@s9sba};tgNecg2f7Lp@#l&wLfLieY^E+-zj# zmCB)zAKohapGzX^hsvx!_^^ZMs}MiX_?k(+dF-=y!egjB!QFG}TTir>3#)%@@WA7* z_QP(!e@)BcD4ln7s#hM7+Dn_K`qkyO-uC$iU2m)h<{b+8;Hm8Y7|DS@?^i?3haL2Z z-s;o(@dJ%7v0I$-!8@<|-XY)q_4N6EeSh!JD{p%9+gG)p|IF{d{aGA#{Mk@B??jQ> zORGogulyezCo#-B6#GX-W&hLT&s^eddrkTUuXo~SvieacdXViRwIjo42%qMeQ+?QC z%uWmU&6*arIQFI!_CIP8ad!RHj4TfO{#DcG-!@baYJSnSm(*+bpHgQM$9zL^faGf_ zze{kc$kwM^?VwXx{pw#98Gn$vK+|ho$SLA)NWLD;i>#Q&r*arN`p<`sy?(PW?9x>} zdBx!;z-!;Xa^+RbzYw4QaKCMzEw$G=QKa_wfztKo_-ozN6^hTsP~&&Xt9~5E;_tC1 zh#kbwWaVSN=s~sz(DcM5`QRasVHf{!@AkL6a_c3{@W9KbEcp1t7Zb<*kJg7)7Kh(N zuRn)ZQLN=9bH zkF1!+r*iL7N4_-kfcY2cU3QQ6N8EJvkG#fJo_`*t`yb}3nCT$iWSw6ff0fg|QCBDq zilMgNoiKTDhRFD*JgHy($DRHbM;(f$pFDeivnO%%9eaw{tu810=>PBi{&jFHu+)E@ zcU_-j%si{+g&o8%mFYwJKD5vChV;wg=)V&^yv8$^`fy6$v41}E&~f36tG78|@|ErL zpYk!7+2`&I&9_mP-ygL;@os+)tLp^3Qy!g{)va0@qvy&i_X@!Y-(T{p-pFTX!>{Um60aTJsE*EW{nv9K;N|`o-j`thQ+@sz+b@{(U#k4j@sFOjXT92fJBbU$VKKC-|8@1Jep`>Z zWb{rDzxsnB;}0@V(DYMg?{9YMb>ip2dDMq7U;V--7dzs?u+f1Bjr)AF{roZc8LGVh zk;ik|btcSTs%iH-l}qzpilJ3mOszpFn@Tp zuclYo;+@194>H)rf8c*#cDdryr=1iwKI-6}nP*O>4|e`%n(`}J9l7ciMQSgt9?f66 z{!Qb^2X81o7fCAnulq6ly^HRdVbZVE&t&>i<=;@=#BTGd$ZK_ACl4~H`f%}in@$-& zBM+BuwA_22u1|))^84?$d&VyS^bO2(5TD9+{*!td;()WQ3bp)y{ed#Szi5vi}-yb)hnImvMk2p{`>L+VLU>$2^iI`b?J<_}LOTA~*y?N`jdKZz%= z6TQ``e#oHlXdL=(n8ta{XL@ACZhT?Qz1;!3zVb#G@xtBby!!Yg_&EPVJo;RAKO=lI zi-W$g-BX8H+n>j!^AEYkHwDQPiqA(y)%MT-2;*=0bK&t$5IdOak*Sw_sXunpC+oSy zk`N{GID8_w{LE z*e@4*!+q>}cN6vFA9wxzk@|FAQf+xjy*!5snLMc+iZ4Wx zQvd1K49Q`Aqz=%Do%2@pA9ng%9DItV@0-29*^@Z$#NXzD4C0Rrs=jxXLC3uQ{@4q8 zSAAih-&}pd&R*?y{b4JM!}>8ix&K}L#cMX(zvi*$Z%|Jz5v&L17m8z|!SXrv(>Rd^ z^3+tHqWZ(f`vV!Yekm{dZkWb-&1ZV#q>d`yFyU*r9)Ige3xu7%@zCjq|Lz)i?Dv=c zERNFezYW!cHIdV$@#%g8nf*>GhvHbt*HV6$V2{Xr4j4~U{nXBU@k0hJZ{m-=3e!06 zI`f*W`YImuVWltpe$S)7uu52E_^(&~=3|EoXq1AU^055y0o53FC)GcV-M zpY9zTo*s7OUk;qH2=TW4Ps!r2`vB(l`D2>DE`Mv${ytSxUPa~?isL0;OYKe(o|Qz# z-}(ff`e6sX@|diC^bI{o-%~p>e1`C8o;lU`u6XSA=dSR`)4eOsc>le_mVF<7+y76= z;;{aQ$Kx=5C-$nP@oC?X)A=EX;)F<5+Ws|9eH+PSeab~78VzMZW|7I2k+o$2Y zo`2BiMq2$fVW@)SUhQuS9)l|DRs}oYcW~9Ev{4*AfQr z6t&)ZL*!CF)K2}_LG&pfdi+52py{b+PLaA2fB3>S3x*?4e(c$>!?@pWFeN{U`t1KV z^DGYfk2F3M zr%OI~oiM_~c@XB~v4cJ8-!E9dRSrdi@K1vCpMu zryq8Z_*6z;I{snaTA#&Huj|ZLS|8ZE_=kz^-7CI$-15-(A0>|8-)%e9e&zA;pTAio)e}cunGW>z^!!gv9Pnp-hT^PfsOo=R>tJ5Q z8EPM}gOMPQOZ?ln)tHevhmOU+CNOiFa20pcT%2XyuDHJ$@1DwEcf8i^KXc zQlCGQ@QGJ@>9K2m`Ubykuhf@IY$(o-FDzfeLmc&c>|nz8fByJq!)Xf-E*j&*xHB%= zsTjT+^RfG{`?5If_{XW7C&;`N#oGQnu=~GUMO-fWP@EHARP{f-p32r|lArYhdMEs; zZ1Jtc-VhnP#gRw(JUEZ~F!`CY9^C5K$>AHj?RxyeJGGxbAh)4p9Pb%Y7kL%4ib4JK z&8xrdzo=VzL-D1^RO&x*c{H~_e~C{~{rKFce;{>%=)0kJlApK|Vpr_O7e4jej;DQR z=-0yL-#d5hbzgX!{b=R>XSv(r890vD44HpT>SIZL{BO9|y3nT>igROVW&fu{bG!eV zkGj1RJHJ=d<#dMm(~mT->2rxkPBD%1PW(-e4AK{5Q1xN={jd7ViO)<6dp>d0XI4II zJUsUNrA8KqZLf2C{zvoFM=KBYQ)Ta(pp*T1REcE$Q;o}oA|hF11p=gIYL zwjT2sR{H&`>VF@YCrJKoh`%B8spx+-_4YI{jIHhOW~NuTS*U3E~GDpXN2a z%J3P^qrP{Ig?cV;JvX^`#NPXzzre;rsK_f03&mB#)w4KeYX?to}|h@h5L6CP#;={@3+)H0wu0`I?GO z5Ig7-Ju-R8Xa0E>2X@m_w^KVh?}7^bOhe+tjVRq4;VHpdaS%qS~%IXFm2m(mE_}F8*A1ggxZ`g^!meG<28;t zL-Dm}sOmreqk%v9Qp67WBoDG-GmFFgb-loAdMAEfIk1^rPU`I&#GPC!Yf!*A0~BNB)}T zRTQb+ZiG3??WPG~&$=Z_sQE_~yOt}-ign=q;N%s3vpRFp^Szb%h2p|!sOmrGb?~qJ{GB5F_0t9M2dxgR*Yx@9 z{mq`t(>w7uK4cJoWKi|JJ*#Xy|59hq?CrU3hvirJTl@Ok&t`F0-}_Wv<&uAmZ?Ro~ zVjbvrjaTZgdFoxR|6xBDii=`kssB#+c#eVfqoH|Pikc5Q=$+V2RzLcN9;6=Z#Fu2{ ztf-s&X&u;^ zZzwK~@y73j&MVeyBEv(yhCQ8j^V4{f)sOt>LAEP+O;4Qid2k-};qq~}eBt*`P7UAP z@20hGxoIl(*zfOo76<+DgW}@{*78pEiXyd_@OZV?8)7qkuCm&hUnnk#hN}Lj`z!im zsC@X@G9R3^}_I&WkV}5&Ljk;|6&!r;zpZBTU>_m}f zv;T2oDeZr9(UXt)hvHk&P}%>f5{Ex^8L}O!-~aFDA2L7hq;5m4GmO}CsSykPaLce( z>x2te9@D=5@BsCn)`>RnhUyhXnoa*TPwDUf8;VOKRjL2*r04&0tpij)>IA(L zKa#q7Mfy|Hn%ny6gV%+5690>5n&m13o+dR?Fg`Ke=4~HW)up zyvj?~f9d=`bu}f1zJ%i2QDObZPm$+d;SWEEK1J_3^WYCw`LiFxjz4^c)Tw#U6IX@d z=f7Lxj++J#&UDac+y5kdxm^BvFn)%bA0Ci;=^L_#w<(^~7mDvhh4mjl#sAv>Pt~L@ z=0|;{`l+v?|K0qz~QjrSCrJo7Gb> zzffEjCtlTm_^clZkMWatwMb{@D=aRlAAL78e&RKc>5&z?@r7r;y7C(%4jK{OT=1C_ z@7ynk$F~1w7KinJg?RqwHMR>wtw+4uOYh`$>H6D-=*h?NUMMb)hRXg=mpJBS`D+QE z`qjVJ>2Gm8o&Kh8&EDVaNt}0`JSNYfzIWw^=DY5od(ZA&WykHmuG}V;@__0`zd-NA&t&SY@`tZc z5vP0}gbx|)q7U``uX_27TNem^+l8Lm^kWypXTQI+vN)_C75{%tAH}=9^iE!vp8ueF zwqN=ZiYsD(<-?D8Op9jQpYXo1_h;f)m(zLXPaf(8P2cR~M-Sp}=)}Jy&!IjnyZD}c zNA3P{Snia2emC~b&(IIM{-LS-;4){%KOnJrH`LC&oyaV`%69z^ez|z0b%)}+(NO9? zJZnl^Wj~Z3JLok&mC-XFvr{Li@#a5=GJI7SzPZ#w_fGuvIpJFyZuaosb{$7QD!2cY z^!k%Li-Y}!NCU_Jj#q-nw*OH(c?@gfABrpEi&Fo!4?M?)`LMkhDnEA6Cwj}Py!e5v z5ByADwLa!i-@C?+*Nhmk>6qT(ufMa);KoGq+4Wcbu^sK}5F0AzohZ^r^naxKQztx$ z918isEcM?>e0`jt^+WX@R6gt=_EcuR?7z&OXK|GDPW5w&%n$!W4m-@Z)hZanOZ;l94G^>4R);n%Ysrye{0X=QO(KUUEHU&~dm zDAK@oS>u)3ogjVlDjTXl^A5!|F~IUA{@llef2sdHF}!@dsXP{^e#{p=XuSAQM=JlD z`mobUH+=bT{Rf9#zw!Fdzx$K#QICE9osq@C^VIyH-*`u-QM*?j(Ly`|&NhVn8W zl|%6Z$=6c5OZ@spwm#L=4&twV_1ioBEv`r76iweZdw;Vhao&kPeKUCu^}Rh;PT2Xt zPwdp&bJ(uyUjEqP@Yww?E#(J?T=1_S!0b5@rR`nolc@Iku=`)AzoEP}LFN~VYonsH z{cBwMKc3__R6qDY?>h4^PY^$_iVuJI3^lLGE{mg^f3N%2{s%o@EYiE?@Dmq%;GXvR zAFltIp*-Na`uy#N>J>%$i2aX#Z?bxFji>IK$f39{zOepdcVg!}QmIcGk3L1O@#w7% z{a>N|u5CDUV(MFj*|<&CktVj~(<*UePyr z()m$eD6WqI)_;vBF1?>L-4EgiALy0GWcsCu9<+WpVmbDC0>5EWA9)PPgCD4RJO8u8 zlJ!f+J>A~_G_yE3?gXvAmfAt(6K#7vdM6tD|5Np|`5`NY;)WPd)&Ei*%I{O0mc>z* z%NebIR)3P$^i7FFPSHED|Ht}2`nF#`^76rb;Zuu0arLD$zD-|j`)_GJkmo-%RIeyf zyYH!7|D^gm#U90`AoC2xjZsnRzY{;Nh<|DOPyD#<2!GJ}r*)w3hG`u2m_EthjV}z| z>EuBx9x^VhaP9l=AM(N@@Y(f;%`6U%kA08c|DNh=;+HO|i|clfSwN?G+ zJ`Vg#{jcb!cT$?^GrZ8JtIbn6Ui)w?DgX9=6@!iPs-`(|*^rssA1r#=CyoxED@Gkx zuNCmx|4-#v9M=Cw+ke!=UbXa2Ec6Z8Q0K?ILvd3KsOo?Ee0b_IbTK@`uP(P! z&iUtWQV(^3Y+r`ttLlFjeOT>RTVH(RZA*sL&)<3c_QzcBmCrtZ;aAM})!!fUEDoD@ zl1Hoq+rN`nbw#T?7d`ti<`)Y2!^Tqom0zL;c&BK*OHlpbV?7{)R!8EGy$U_~m@kMP zoI~C)==DM8-LvWzVYwAf{pE3Mw|{>*D2s#RJ&?M1zZxcaCHh1C=lLi2fz)4FKka`4*bj-dy-e$&Z>Gd7wUg72O%f$m;>I=m!(O~_@ zU)NLme~unIn5<8D@iUqCRh569#gXnGZToR*2jN2oyXeCYci&{mJI)yqZryOir+1#% z@)~EypOdpV?D>;x>Gl7)>J>#QFTE3;U4N*uy}xs@)0a@l1M^D#*Er5&<4=EBk6!(- zgQ*_bFwf#J|Axq=es}4ocN(V%UloS6C;s!gSDtz|to!PVe%N`Zus^ZS-xOb?ID)G!GU60<0ozLC6+|K=f_xNY^FHTzH?i)si z)$h4-#ZzkM&`10JJ45FKZm<9UUQ@lINCW-WKBcl9f2!TK+e8k9oUpI#f1fZ{_8&g= z<30xbLFxokJ#vcp8#<|{B+sEfjQa70&+RpMv9QjscE9mUuU-w0U4PP4esEr&f0$nX zkSlLPka>pUj;N^Y|MX6O-mixAU;XOxbMl;T|E*4b>H_JHp%ed-Jcs(Q=b20GeC|Dy z!+yQb+`jAI&VZ-#{>NqX{C`vPg5*;aYy16B`u!LGrg$>HP}~_6RsFAPfAKHvf7Fg& z{eez?i&H=5Yx>Ev_cuFnPW(JLkNU9XWB>T-0r}fu=X3t?;H7&U0H0m|*_Xv(_u-9- z$3O4d_Er-+OXicx{BDcAgiaztaaVj{{byc^rSor^pL*z1iluxBZ^<8C^}}vFTCWG; zLslfN3ilMEeG7p!{?{K6q096E5@brw#(?bu(RvsaBcJN|5Dad3Q> zU#a*1wp0%~mEo_cta|R7@G3JeWab@;pJo3VrT)99w(EZIH=ZOP>$kq1#NiLJK0)-| z5Pw7JV&2G#-T1-@lV`cV{%m?Ud&^UHd~0U={x7coX-WOok5y)W{fXCn;@w`NuRQ-u z-P*rU+#Q20pA#Aa;|Gv%i<{Y{omAwQ&%`={CC#8KYa1v zp51=^#5G>|?D)SC(`Hu3GK<9%py_reUgf2CqOc7U-$49drPp0e9s~_tJO!d?S;s+Y9&d2o3PxHVFkIC38 z{NW25-0_3fItPZZ;mPN(w(;(-z+>OP8d)5+{ja3kKm3Z~)n0nl8{Y)6A(t>0dnoRW zFDm;#Nc^bZ`lRvjVh6oT;^>PaexUI(e~pJ9K12BM1H0(Mgh>ZKwan6^!uG$pWK3h# zVenOb{`%$h{)<``2m2wB%1iG=S6^4q>d#fy`7!TM+!qa{{xgp$5{JL>VF%T(-q-10 z6=(W<_WovfVz<1=RY*OlUG<^&cbBjC_Tf*3-f?4}yn5sm`f1l6=2;wU5BYVm|K}is z;x%yGQsb4zIliN%_>J>#A=)dEYATsAI;b*X@2^3HtNLFzD>z9^yx@uyzX6X!bf;b(E!yZDD;3$C~9*vqdC zYutSPt{dO~9`SbnOHK1A=F?PmohVX!>7Bf$F8YR?+L1%?U<|D4f4aWxbE&2IoTApt zc|ZCGS{*&&kG>nGah`n2W3uYI@r9wwp7+Sj%Um4R{PbhrI%)Z9n4fL`{aGCL{PV5i z{^yLU?M}SgOX@9M|I<)j>Q*@v4@th3+Fd7d9)o$6`mcQ0L9aZiOntn+&93#BojQ~s zKEnzx^}<4zcgf*u+27$yl`pnB>3$3vpOt}=inYBzqHo@b#rj5^ z@`mE!=uo-+Hzl6=SbyLF)vvx?r+-x(dhz}*K7VGT7k}~N*Ppp1Z1n0rCm;35aPOpo z(({M4KUM#KWPQ+hVfj%1hl$3k-TGe>J@XI6BT-T6zsB)B4u7xu6tRQYQyD$)XS3&7 z9L&q~tY4>ga31wxx9N8-e$My$!tUXU<5vEB!z-Vif9}^jU_)i~^NONvFR2^k|09vD z|J0pJY$zU$FDm=Ll=xXcG>-mxCwBgKy#Bbz=q--Cil(0;{>Uj>{Z9Nm2p=-oMIT1* zwfL%oe)^9vddzvRUVVN0{KLLk9M-3X$a(zvYvBG1l?}M1 znc-KrzmQ$`w}auc{m+an4iAo&H-PmoSGyBTm(*4I{i~*N^tB;!D1IJabn8FQ125H) z^b^0d57Y&w_9_{FLzm10S+R--eOU9o8~*X?+KYuz%WUxb4Yu3cYn=W5rF~&P)AjS0 zeuD4~&*HGUbCLO6uzHGLh+j+W-YF^{*FEE3)ql48`iq@B7Ds)Grk^hU$SGR=)CIza z40h3njkD7>_|%0{!^V@Y`qtHp4uYTS20T29`BBOL&nkabF^q2+aX_YT>HI?RM06}| z|4!n0js$UR7aYHP^}`NYUbW+A`c@W)`8PyP_iN_w)DCv(v&Roq|1iEb^Z5r(-zALS z?W)5r`bYcx1J56roW)_+Ur`tN6veB&^iFj24cYou6Fc(`#q_8s^$W`+T#V=!MW&hLXZ{(6`j{8rY3DS4aJLT2+n4UQ7;5_QX7Gri= zxNp|9Q2%G^mYv2f;FZs=|7m1#SidTsKOkP^CH*d)e{zxs0?1Aq8a z#149mPi6F=+36>!@%YU#R>YGhd~=)YKK0v2XN0eAapbFgueHy=9hk*oeQ2r7?_W)o zLGsi@&yv2>hp4|xkT}Iq{3<$F-qgP-@zihIeJ*Lr`*5>{XO(hsh`c?9*_`G=%V>SMbFO<&`cpm&1Zo_|XpL-AA$sO-P;(=YpcR6Y67 z^SclJAoB##cSHOQb$%sT_1*ZwgU{}K<=LMe5q|F0Sn&CK8t~ZfkNvS7U7x>194JvA z`u;mod3qEZvzJ5h>u9LzKcC~wi#{7V;l&R6L{FU{ejwWw?<3;zH(7b-Ro^@Kn(2So z^7Uc8D?Ikn?lXpO32*8Ab6*yRk3YX}H5EVdinYANkN?5JKb;@*4#m@vq||?1Z}8yH z`^L~o4cI~IFq!Q@5j}`M^_reI&6E0>jJ=D07(Zs$OP1O9*|60I`>b~GiGzr@-(UK& zIPCg|is%1RC-D+Bu>EU(`i6|3$u)@x#c$$^%Kon}ex-gS^TZC4FO|`=otqs#MbkUU zZ*g-g!>e&&_ZxTkpMCFKAnbnH=x44PKMY>ZKk)q&v^w%E4z@dy%1ioNUs?T~B6ZY6 zhBp+?L`7x)m+$oVPGrNRU+`j2W%SI$>{=J^Q`5UF4vRx(KFD*3;lgbmxPO<4@u77A{aZra}IbMKh|I50caw>a?sYu~x`E00_gHu|4Ii|l{mbn4{#hblh&$e$$x zJ${wCQBmfGP@JK$?8`>LiG59#;1KYz49QZn8sN?2kUyj_0?uVP-)?LW`rU^^D6y}viv$t(Uhvs6z*?94M1 z{}UCZ{=+k*)8DJ!Q0s&b#GcAlPcw_d>{?fPTxI@F{5`07O`b!2m^|WdW4GA97A|<= znGdqNrV(%ZpUGJqcK&Dmc=_jMR##2zEWP8EAad#ZS29259g1gTV5$E)KYrK3pY?B; z_|@g(JfjYab1{t_A6Wck(c_omf4u*jpZSlc7Q1n3*t|G=%F>^{j5_W2_omhft`yII zZh#*sUbET$>BkDZ^A8d(|6iKKMZFCaWL)L=W2M3V!5C z<$qHj9$o8{m%o3=!QsKP`d7_>p$~p zNNgyci!UnsAH>i4l#6V9uG4OQiJrPZ{6OQ=`b^bFETer9Wy>(vw2{u%2cA`iF`=6XwYS;Od z&VRzAcH%-YBL6dL6s<-v4a($ale~W`p5xscNMgJ2X?}ULm;H|>oZaed^ z;+j3fkgK1YG4j~&636i;&)opwpBLGe1`DNr9OlQN1pP{^>zv2!h=5Xt7j&{XTSd@eEA*E|LY9U z4E(4ks~Fhpdfh&@ zqj|`e%5$jiUFyhnwmzxvjNYZjp0viQ>#yiFu5|uQ=VR~pJd1-qiZrnQal8^luGdti z&c^KJP`nTgrT#nd=YN#w3;7Mz4?E}+ecDI-K;xrMjkh>tka;14UG!n!d#_z|^!hWy z{x9$N%*+E8fWOZ5SN&NWYQj3v*CamF7v5*WgFXGhKf1#MBmK(oS^p<% z9&qK^KmSm?+Wm$Hq;K9SpXSHTd_(b4bSU*-=79|=v}AY@>coh;&0oV z##@k8K?n_I<-2^Y}~e!EeX^GqN~5*igCFi6YIW|4uCQ4Sw7H zm5)A!;^i1n>c7r|{}I67cyh@DKXwqkcb$Id@dMc&K+|g-#*^|PgI)Z?=9fS8r;W$# z8SdMv6 zDNYfrpCX7qXmu#B>GRq9o1Hp9C;rBV45ogGKCHImc_)A6txJOY#bcL;r?!O0et(>j z#o@u#j3_?_52sB!qS{tOd-zW3n|GEWeF zH^kqNy2y*H*o`k7`PwrxFWjv^oOsty*Wdh&QOv`R|65rc^dtXY|NrGbcjWUfiguo? zzJbWhtF--VAFzY)m`uN_{2QHlOL0!^AbiMR7k$`qm#?38%MJ^K9k2Z17e9U1hF;_B z{97xFgZ}&F_5Y`+57aoe3yErf9?&=J_PIuVHHiqtE1mfgUasS^e&iBo>p}gogFewC z8|GOY=FhwoO;5c}{5?32`Y`@8Z~yYB4^9eOjeF?)x3-@IpY4D8H4oU^ia!9@d~>l^ zE$O#C|Imr2#Zz}f^r3h)zOeO;9aKN-N8ZVYz8g{(*v%h1{)SHTQy;P-aa9C|yvW+uP)I4(f2V$mA`R4;t3E~LW4^r4 zkU{DK(RV}q4V~7(JdhQ;@r7j{UiaW%yn9qw?y$|CdS~*+UgK>4)6C+q?>80q|5`m- zheQqZ$;qoy|7)U8`xA=SqoLIQWPZ$xc&ktIc}4C!z#mNOMc)n6IP#c2$={7H3|s!* ze?502j5zVYzBQlu3Ve3{VMZ2*9skek^ACq9k5i=o^tB=SP`nXeRQ11f{Xg*u;#dEp z=5vDd!}_Ihrk~pBhaR*zj}|!(X29!aw`rv+Mt-XK|$W zk=SAeyvg1CbIC`2r8wpDAbiMR7k$`##w$k;etL4K zKXB=&hZeE_b+_aHzAO&={c&CW|1bCz#jCyast4&C^I-+`c)9+B>)yapeOjkf)n3}X zw9dNr4Suud5(952{u&jf{%andqk_M^4_m_Hogj8F)mwh$#SgUYquI%4aZdCeR34M( zP#?ygw8e(24*FRb^P@)}TkqZ#;kV=eW)=s>7kPaA$t4$Cf0B6V+DdzFVeYbvXq zd57YyXfS@Y8=uDEZ+dt^_2axN{$Lu1z8m_q9^?w_#uvs-7`e%XUkYK9U7lF>PY*vy zUHtzA+dT-MZ6A3S2XP{`zek`i{r;u>VZFi|inoQYrMym2`&xSb78@nqkj#M$*%Q?fYh{9Bjn-^9Cp+)w@V z%{%c(=Z8KN??i{v_D|eoiKqUm{SWolkMH!iIQo^WFY3ZCMdo8jUGO1;UG%*@pKmPp z{p(Nf?Kx)E`<~C5@Y?k!eOVm#`8!B|e`%>6Bu_4Smd58qgUmcrITY_k$5Q{5zjXgs zLwTEuPO#LsMDJC${LL&5;%vQYUehyA;=y^;haC>OdCHWxp9wqu{k$(7GUa;uQu+Oh z`I1*Ls~BGG_WVzk)A=EX;%||uvj3Xj`caGS_S`f2i5>Ks*JS#oh#rIotkT1apUIlf z^yEwG2#=3FXX|qoZG;!ko%F(U`;I5xu77A`anK*XqPzj{1I4SoL{I)YJk(Kvp?EKn zRP~?F9qUI;a z{r`{qDDUJ1N5<<<{zxATHJ^C5mtOU@{nPhce5`JjL-BqzRP~?pNX*On&U#RO?4Wm% z)~-XbyjA|W__N&^5A`db2j@{A&fMUV&u+i()bORpr+xb9mnTw}egAD~9U#yDZ>T;O ztXdkMP5{}~8+9vBDE=O)D*NB<`9I3Xb<*{xr84gXOMO&-(@*X6Ll0X0%I87mfed!h z+x1sl?eg+%-(0x;`|oG7I5>_l+^oy>=bBHV+s8pX2ixk;m7n@*B8TFG_@c7^D~X@= zBNy4$r|YzvU!u1>@{o(e0Kb! z`O)WB=<|nbSsc_OQhAA;^T5be^9w~Gye;AKPEqIaU;F->V>N_l<+EZ}*?;Z~L-1?D%ucfpMgH#H+ld-}DXH;+aoFVnZ=2zNp;(o%mTl zY9ia`fbwAneHusJF8rU}dOSRK{L{+f zuzn1F{9ohed@dO#c_ivX`ya0V$1hi2WG8Yc{t;hTKGl1$7R{BPqtws(NblD!^-=Sq z?}k=KQn%@;Q?VOgSZC)iKUur|*|6S=4^LZgvvsK7oy(O(uUAf96$+V;&%U$Y2+}9e-Z<%=za(Ffu2f9e*~nIIQ3N z|52`b(21NbjZcqTkiGh+a+oz=q$q9wIuEX6v3}&yz5RQ<@?r;lqPM(N{jVu+L(q8C zA3no*)Q4@pIp5$t-xwUWec+ytz0%qaK0E*HvN+J&{ijXUD~dF*y=xt*Y}cQpbvDG4 z`Gr~YM@3ctOV2;oc-ual@ZncKualoV)CrnCsRuoXzajb3dCj3dEcb_t_dVm(-NN8U zHeG%6?{0wC&j0l5e83G=ZY7xH7wd=iKk4UhL*t24In4T4G*tD!o=aS1Ka>wU=-o%h z;Ro9OtX2`Hd>&*T$Y2+}{r^bqx1U+!jP2+58fVv^XnyL+PKFkk4ILlZ)t&spY0UFeNpH95;F=meS`$P4r>bBJqW-U;GtI z*FP(N+6U$bdW}zI^weYalvm@;e-35%sxUnG!jfPA{YCGEU#y8o8}3#78SmhFqpp+xvs}w%!l=FsD9W%cv9K&=fZDx z=B4v7y~^+z&Z9nTdgG5@`R66e2iW+5;?-WJ zb<&Si&%D4eYl%ow>Ob@AmmK(eCvpkZuYSAO@dvFA{VTb!x8+*;-?dK1FM)R~19Hh@5YpNb}Dl3ZAem`*Cj#oR|J;#mAFUu5{T2hY$Rs*t_yt5B_%RnWw>Hzkl`1yzTij6ZHRIa@8w} z)LwcguZ?efr=0c+In0tjiYfJyuO76sL&3ewrZuAa#MJug%`y>`v^&dE_dj z?$oaOxqbfX=JE9NJJaYFnCbAECzb1}#}20V6@*!XVrXUm8`0eU{+o-;=Z^J7{kZRm z{(U)mzw(%@`fhw-{rmqmXoIg`A2!(kl>JvbcQN?w`je*egDc4iAn#NU zYJSnSmtO65{)akq@!@lYI>W4GqoJz*^(`b0f9pGZuG4OQiJp0a_<_`w+L68T88WZA z)Q45aUGV+0?p-si^81H-?|5fJ`1w4t{Z4-tht-wj5o>$tv1?uQ4SwUH?uNvMS#pA> zs{im>pK21v`w%6t znxD(!NM+*Y7Q^w+9y#&lZHI;9clp}658RW(Yuo>{n9lBRD4l=S^&npD7kFEzt(%< ztKS*5`Ad&D`0e_K8Ce{5{&_RK{xes-qDUXn|FzUVtrJ;!!Yp}EbXEVU!}{cuH^mvE zuiqeuKgc{m)A!BZ-|W;2I`N0cBZH}5q7Qpb*|)jc2jjzj%Rj%@{n^pPSN4AkJ^x(G z;^2KN(ns`vtol1e;;FA8`Y=m&Fjf8MbKLroD<9}Y#t!;KZ~fBz_<^?lH-(RQi&MUC zdibg^O!%L77aaY_%&`5BKk>vf{SU)u`=4eOhwTTK*87ig)r0g=QLOFtVdvlQt7&{w zkov+bxlpdue~qg*I{jJyhOW~Nul0@m3E~G@|JBd*#AzN6&ZQo{FnrXacYpcPYr}|L zuKB?l|G1bs?fUCe>e?rrZ|V47`xs`)iYoPA`^tK^K34P}zyI_9Klt_+ZyU4JhKuy} zZ2a1(uPt+j^`HA6o7y+9;`vwN)!yDyxjs^P=zl(YIn0s^MN9qHIR3}V`VU`%*g@|k zUn-+#e`xj!KJ!<;t};9o{^7y{CoX&V@J6^|>O}*OA)aadKOl>P=PrR(U#@nLJc?p% zuMhg>omj||VwfckrT%Li*HKi~p?=swpXiY-e=Cc_{A(ifJ~ch_bZQ6ZQ6Fkoym9eC zhaMSf_g!=LHBaxre%P)*nWA~XjrIN)>QfZ2cE3$_@`}EBmC2LNFU*o1Rb~Iz7e4Dp zQV0Cl!BlU3*S_HgT78M#;+O}he&jLiq7R>cbJbJc|DR#uIx#PF7zn>lnnR$azT9I|(ycdBUtA(NOBY&V&7!^(j{#Q2nrjmPhTWj2~$8YY4yT ziFcY8oJW1wcjRh|Uvk)SVgHfuEpq0f)2Pet|LW5`;CAu(8+M-({^C_$VyAD&Rr3q8 zhDMUg{)@SN{<$VOjn9cpzd)bptq$!QejuL*_?dn#`%GLFhHd+=7`fvME=+iH*yPjh zeUW^2{L_^Aa6I9;{v=mD=tNGJUiGE#Uk&jjAM*{fhDAfE|H_jdPg>tu4~bv>D9Mw~ z-|{B?hcERZsQbT)}-fE?hqrcunZ@+&{k@`J2QuqJ#Pf@%E z_Ww0rX`I$oy8nWBwR<9=^5fBjqF9;&bbz@Qb`n#N} zPYrYWwv*&te&6%QJX2LqJ*Q4hPtQzGPml1dAGz2(e%^^q98CSJ6NCrCPhX$lTij)F zET}(ho-ecIbrZ9&#qDeUag)EyK-c#F=quX)ztA_uSi~SV)K&WbBXk>*#JWQv4X*5e z!sqce)OyeZslzj`^6}r+({utzDlu=r!ML`l5&*WV^J+x5Ojff#2OVt>@-Tj_&X7+Vg7z7Nai^&BiPd^P zTDtxxdDN$NDC7WtssGC3e>6&cYPQc$9Q01&scn7BlXy~}+FReez7Vk>pX2 z6Mvd-w#Lz`!Z2msDXU%b#SKNRwf)wc;z{?f`6=-Ss5$#*7&BDh{(ehN!gQ{cpJo>{%cR#XA?X@iUM=$^JwnGB?_WrY(#bKYnhQ{l^y~uVC z3hN{NcbcbY*Z)lGOx7QYm1AOM|Mh!J`ojE%iJtMa4zsDFil4_a?DI@1PxU;A9yZv) zAGUgJ^eG#ie0`uX(oplb1^Z`#07X3b|L+f7?Ff zG@;gYH_+4h&`Wd`xF!J^dPupkJ1H+i%NA7#e*7thNv+o~g zW^wSj-&g$q9eKiT-Qus;#AkJ;wv%`$R*iQm`+u|WSdZy-iQjDH)lY8cSLK;MADnM- zCvlsn%AQAmSpU6?KKqaSl(69?+ueN3v>UwU+3~kLi-YHPen~w4c?I^@ps)t^KiXHb zwJ)59=AA^~**Y{Y6mqB3e{|&Z#Mbe*hU)bwDvy4F-bvhS{&}9ofxcJ+eXj5m2g&1i6WErYtS=ODp{%O@ zlW*7arVdcoOTU;G8$<`h-wELxI$3GSR(~hGF#NT5*Zcl24-3n#_49*%(es|yJp24L zH;V&(v0B?Xe$e7P?uw=3|Jrx*cpim9zF?{Bf3M8Iy2!Kl2h6KHe#ZkJw0V_pe%G## zA58N~w&qv*lXw_$$cta=y5Pw$>hk^e*z>fBJnt&czgkzXKbcFNwjHx>MTxfF4{;ql z?=-KWNS@lEkUzL9^`AUN>qo9SAbQG6ua|3i@D%ZP!pgp)r}KKnPI_VFQ#U<(@b~Tx zZoQEoytl;txx>P{;wMQvX?BkLa*2 z(<2TlkL?IPnC7K^>=e^HpRNmAG0jiyu+@eWUs%G|!gh;K9s2PJi_jO_|LZ!eA6$Pp z&*GqOX)Ltj$5@BWpSSafgWgHpZ01$O57Gzp%}<`{ zp=;}(Pk)#)@*_9B`Ol5}(X;;dW^veY=;`|ZgVdoYtk$j7qjCCX{+jCM zf~+qT@ffc|h$v9EjU^sexY zU+uogd3)VXJ@)+<`ige_Vf;F8RxwOZ`-WY$zEH>q+*1E_zPcP9YV|*tiP=ukr(dAg ze6#77B7U&+x~}=?qi2X7Jg|d5?ELQXL$=vRBZ`eyYv#LxQ{)*Xso%>2zL`JQeEOhvDAtRCQva1#-$3$8ebRjNiG$uHdCaQ_546X# zR*|QA9z+it?BEZh4?Su8;)gCA##}dRgMV+-g&9}V$yOJB{_`R(^#)RPM%tveLcVxX%3b$NP} z_s8fe(l5|EnJ=~RgBGvQgI7ZME{mfSukeRcZuz2r@<-Q(nVXEfsDIS8^v~XZHdGHB zDi>f)^@UZwrN8_Sj^$Brf}xNdOjZB6uCVpV$(*K(eY5!MCkny`sS7lJb8x=JlX|@q zK00R4qd%;E=ik1&**W)zsjuw*+V2j2934CVwomoJhT3^Miq(3*liHQpD0)MBjMG5*v_xYGylxN`4)E)r%#6XLFxeUgX#~{&e|>3rY<`ooGFeR{dK51biR+V!mW9=~k|uX^_ROXsIgetEtAMlx?! zF<57Pm=IV8h%R<0@<^=gziZb)KSlIFpXRBJKdpo9D*b$7^Q8G{|5H2Vs~s-cc;P*! zuAPVP{(PV3uDWFf^f_*2edx*J@MGg2pk8G4)x^&&`DQ!0$L~hH+LqsxIDHC*j}Ds` zz9Q#w!Dqe{iGyDAQyV{M@jQ#ebe-B&IFJ4INq)HcnLF2iX_aQUVZyx|_wPKD`Kn%j z^XK*a+dPYd=c!nO?EkecJOA8i9`%B`_(M^TcS`+Np0EbysmVR>21o0o@~}bH3F7aB zHXn6Y%nz@VURe749nL)Cs6T|Izp~+q+kWCj^6dA&v$HsO9_D}4_n$rOe%3)>B-(mB z?E5OGd0<0q>I}uk(NWcZzDI!1<8P>aAr5+-&useFfnVA$C3)!D`gNX8e(DIjxhX&W zO>YQ${`U9p9KF?D%`v)(>&%ge^O5;y)g9qr~GTnY0HM8s~vRkVHY{{?EPmWi-YYT?~B); zoMqcvu5qWR{THj%r_%2)YRXrn&QNs6KxzAT%D-6 zcqy7bd8rOInDP>T7`pZ1H%{=s3&XbB_qmlm^Ii1q`>#e82X*Cl>+%1l`W3|*=(n!J zY@OeJf8itokNJ2!L$OIDRP~?hxY8H$44H39nWz4&&gTRzPkGF5{@Lw3{2+YqglC9e z;wP^P!=#yu-n#j|Cxyv-tT1!bvP01;z5iGJ{KJdI4SWR*k9x9-LEQHL)LD}Rwp;28 z#ilV(>c8gwkG=n)4n^h%+1{w%{Ny>UA6!6xxS;n3H|@XA9pQp~Zu#(kFudy7@z1$g z93K2%KmT7Y)K}I2^!!-c?sKivDb5nCA16rvK>7-rKOdZLarz88;iKcR!IYQy!`LTQ zJZ93ZkA<<%UGt9%F8wR{cK%y4i-YYne@@T8O8mka*#G3*6ZB57u4r}U;zyVDg<^UP zRQA7D^5B~u>jag@d93ikG|&95T^~P~=6U5;BK4#^^@r8A{`mX*{MRF4^=0loYveD_ zL&uIkH?uhCQ~to{4?h4n>raoyoucZBHSl<-UFZA9ghw0<#pW^9^i#gtmHp89yz>5^ zKmTN{nfrfriRtHrHLu_7segp>`%n4{E*twlhV9%?*PA87tG{&qq5A1N``b`#5fw`P zM@LT2Y90T{Bi){NUU@t|VCuJd_3?rS!iQ)6of4|{bw`EWVZct{m+K_6~$_8@2OpR{&Q29tS=ODf#FjBS$E#fx5r=o-boxk z$NkU;tuLyJzZ0f;HXrpdzhWo7Flw=1Pkd$5VPW*I&)@fly}jt$=ig=)N2mUi=biAm z#pZX5#><6awi7!PTg5w7{TEj2eXivv>u23O4%lFthrbhgCly$Jm973xdSUoQ*WU8j zd7Fe~8`)(GJwM57o*n<3nZ;q}zm3=H|2NbRY94h-bf8b_x6fbjYQlgIhGOd&DD@wm z^!yX%Z>pa1(sld9kIlS%9wDz3*L>q+gI!r19$OK;Dh#XN^2l8eKCoq2b3v>atWV2#ZY`s=0g`gc(dfQ9_lhgPkHr|+WD3T zPtpAO;CzcGdEN=1b(%eo{;o&dS+4(%Z#3 ztG}jkt%H8BPSE^wgpZvfe6Mc10yU49VGc&*7H;BQr}#=-{#eNN_^Ir zSLCUl2U!<3Sm9^Bu=U@s`ONzld?I{q!*71`^rwcSXU8A%EDqbghUoW?PW`GcR{7RD z@zFP1cg6Zbv3*P^^*>n$^;_R_naA{<*y;bpTON9f_&cHLldtobA6v1LUKlg|-5vMd zW?UH8yX8t(&sxW8p1uC|WN}zOD*k_sJYlzP@z>>NU3NUe9#7U$lbld|KHjnUocLLv zSOfe2o7?@wLDubO3lAHF2jcI9@C}{TQGuQG!iX=_&OiQ*uY^&b*?7^#Z*3ia*iCht z?bz}EXQ_j}s%__grg3H8n_5=__aHICoxPo2=}P+j<b3LVoJ4H@qxsYqiW!k${fDpm9LIx?zM=AogFf+N8zyy_Ug}pJ z%j;k>e-(yre)pW0pZ(-x;gFvm@ZwspjzqU=`_C(mzgfN7r%VTa&Ram=Q2EpsiXCHS zssApzv%35|$+k~>9BUG1KJ-AZ`DW|9mHdY4sctDx_0UDfY~sv=4XVF;m2J=5?A}*< zx>uR>vn~Giv+L2Z@4xaa4tt-mOgsQ_5Ij&=N8jM{ev|&OzEFHICRFxcPS2|D zzs`pq=o7!yp?!b{!cXh7Jk>K^hdvvRJmrVer!IW&om*WH&i%~KPCRGf8R+x95%Yuj zwebn?UaFTR108t$|8MF?mvx6ic6^onS0a4!4LkJH^5|C;za|T!-cp{^IEWrL*umfJ ze(>Dm*W5F!d+eQyEq~vr_n6O)fA(f^*zuo<@%l3_*?P53VU=&a6JP20PsRE|As_ro z{nvHyKc3bPbU>YtI7l64s~+pc55iC5*cR9M7StbhUh~U~T=r-!?0oh5f7s)SfUX_? z>B-`-evH=h|5>l1u*$d8Mc=T!6K-mU;!81L^TAUrUH`42I!(ncLHY=K*Y3A@tN7^F zB#(T{Q@u`p=C8uAMg7^c*IVQhVe7-r9s0q=t@9uD)OmXoq(!7-nGY#m-y5A;DPpfr}LVhdR5PZ3+N9!-0;-Y z6ZN|dVE*CNx3~H8N!W(T{F2>zJW9vka@8kK?NG?X)_>wo_@(!s zHPun1PbqrM$B&(&#T!{1wm#E!jE1Rb}#kzH9yQxmM8syq*q=0{;^Nz0UK&7 zkL^gj&qL5xag#JKyrU+WF>h4$ilD zlINZ9>4Vwx=nvt+$=^D6we7>EZ>}}^m`67wzw-SL*FUW3yozGA9>3JCk5@kRliv_O z>kGxM(NXHZ(|kqvrT!;8uDb{yw0>zl_&Z^m=b4XnVJmjh3(GBY$c8(euv=K}o?o5( z!jP|_XP>{cKg^ryu>CIcCTKh-_v$r4bgH-i-6FB7|LOTWrT%Lj>AJm>`SDvF9r#wK z=35>%$ojFt4*t-6?1Y8u*Ut*|p>v;}K53@cJUjlGXK}F3JRW}7b8qWUkH%|?PLNx# zHhnAA-w-B!3B^~U!}K+uy!zJdeA^C~zpFitu2&tim4_~V5PxN!>Jd-*o&3xbR$u4Y z4Nre|v#{0^Yn(LXmS@Sc&%fFq+YXkB*Z+AsU3Zp@v>tx9oBGkUdWzknQdR#`zUd|X zgvaMT^uaU_erOr_75gn!eJK=HMA3lBdO^qkK`U&EP4_X~v?Q!(rrI_Y<=2IQB z=g}WVZ+h^?UH_q5&;QTW4>G@^Lg?^>|Ck}d@H?^sce2dq#zBG?|t5EqKgqPUN6Q+M^>NOwCxFKwL z+YYN7ePHYQ=khkB^*?hnvpA}*|0&T{zinMQe_bwXn|UWpo-f9$i9Zy3#XFV#*YDA& z-*hzJJJBHy+B{lMYQqEVaaF$g$=7+9&-CWg-#zZ?Gk^4YeTD7`-LIb3bK>FX*zt$C zSsWf*^WHN*JOhZ=V^CQC;r(a&{bNIQS&!PG*jwi7(YPBNOZ`9x)I2_Szz10;h`$rU zH>AE~Ua$U6dSUfVk9g}JZ}f&WUR>;<2PSrrXTQH|WO1-P|e7 ze`>;{&QR||d2U}^iH4)U%a1dgVbTR z@~8(t$aaOU`N`9Dp^J{$#5?dq&nK=Mb;|a~hXa51!B=jakTaj{f0|hw`28|^{o6!e zSmj&lvfrOlPffTy4y-Q}UyXsv{woo_^(j|9CrBLhiJ!Vac%ZGf(XIy{Jww)|^USM1 zY%%TS?_Bo&_F?mXf8(07k8yc0e;X3wKPY+n3z`pci26*gb!s#V{6qYE9| z|2I`1c3Ndub}lKS3ih;ulV^LGklQw zK=V7v!%h*tq4G=iJo>}(_gr}Ix6WNXjQ`P*Qzu{7`u(9Vzs9sZ$b%vrRI!gW5yt-T-?tlONXHDzVd8v=}c&BktdGqViptu-J;_)UON91oi52?KN5cWxGk9e-%3?b=bS)~$DP zALKZ&)sGH!s~rmY!`RC0e@W4`J|*+me9Ei5{zjTd9ka4H&`;4jiI#x>TI{%Y;o07>oL-FAK=@jPFYVZK`(^acj{o##aj+lt z%U2x#*ZG9qx}`4r{Hr|XA&wo2!(wt(|I_PfkY}j+>AHR5w|Oi24<8Xk!!SoY;`hSA9o#ySjtPk|6V|Fqxe$aFqqHBKgR1aO- zuIAGp>R-9~k8k~Wk+AVn`KKQ`V-|Y${!{1Yb=D8jJvN-JfueEjm-=ls-v0DB2PUDKD?;mUJarzR9Z^b}W|9KsP?^VBq z%`A@kwu10M>tk1YT&(}_`%@TXGdGLFj=xp>|E91$vi&FZl&=5QP+fSr z*rDi+cS`+t5>IcfPq{?A6Qqw~mA7Z>qs|Y%6Q+60XMUX@e3q@uM@uq7tkM8eg3|I>;HT#th?Id3;lAJi_o+4|MM&k`jvM$ z{y={vtF=F>>v>b&NyM-zHhl`kvC&c4e?5;8K6M$oc0J=Ie(D6_fmN^P@V)98E@=Pj zr|taC(1WLj`VNck`^IA9(YN0pCG+I*?dRchZC;IM6$4(q)90_62!!Iec&F5VCviVV z(E8L+e^Ze@D6f8LJKyr0@>A>?oNsyPS0Q?7Jn^F!#$EOF+_gXbTo^xo<+rAOu$xz2 z)%Rb%!|#82{0z}sFN=eDZNEXC4T*>1_;{z(f93Hz4%4YgUe)$$JSRM}nO6}%XwRoc zMV{(;5It6@MZP|M$c^bO zBSGw{^@ZYuNG$c=N&b9afBqSrSM#mE^hMG9eZt31(dt({52A++cJPNum;dnxpWa|% zSYelUo0mRu3VL?@2OY)y`}+IS#GfT2t&{J6&0muQ9#_^GiW6g?)PG$^UBAbHPrjiO zUE-im{8oqR!ULFU!|p!ywXoe^cj*1gloimk^B?Gw&X+$KUjPn; z$Glm^AYS?V--aZz?ofO?Ix73Wd^_Lzp45pR=v|VheWyNnpy@U%@>I`*=wX8${Pz3n zon9Zi;~yvNPCd5$H?lZve?3aCe^OJw>WfvrH9hSccGdbqagxl}qx!BLxsFz;Pt>D) z;-F9bR)^LP547#SnZ;p#<$EW7!wNrnRT#eg*Ckee>W#VK^x~GMFLAJv%+O-6`#7=KbFWvsp(LA1C zp^yW{rT#0gu6eA7JmV{mIOv^JmfF{35Av-Xio&PgS>j2k?ufHeS^D~!tdh4BdKs)}AzoBtO))$IXV<7FP)z#tmFX*I5 z9r)1$@q_lfO8NyKJwxT0?Xoz~tHSQlf1mNq_a2cCodX%@+w}+fM8}SQYQL)b@15|u z#V(zHo(qHdSZ^p!ivjDuQ=Ve!{7=@MV7gwf`S@*K&BG7c`f4)2`I$%c&^5jJ^oKPT zTKS3b=e`uyY@C0>al1KmZ2NC!aq#@~tLpJ*>QEF`>lPjF#7E!EpNpTqv%XNA9s{NR z>pb<%B@e#!2|eWz2fa)3lKSC+R%fDTe$_*Fe*Mf7YPUZ(VZTSV4{Mxr!dKVd|7Ymg z@wXnS+xo%pFPiFC6l>u5Q&WGkKKf>LCUL8$I3o(!d`|ViKC$83e9W8h>OX4dTVBQd z{LcZr6if9pA9~ne2Y>gFlb`;{K0o_Z_mVrV{%YR)PxS5lhq+lCcKo^1@qg`uWPRlL zn=S->d)=hIhGc}|Oqs7oZ0{266Pv!6PIGYHeE25v@d-3Nl-G1(6Ui0kuzpl5m|F7x1iej~1KT^96U-hkTU8>U%48_cNr*iw3aO?cH zT=PMvw({x+i%tK)w2$Vm4bHcCm+FJA!k$NeaN`yqxcu<`!9CtQ`*(Y;<2BDde=(1u z%r?;HT<#fa9_uom`b*~@=EC4{V11$ZP7IX#PaU%*4}RtTXO8gxpTGXn-Va}~+0-r9 z340wi@|G`5=%s&E{m)m@_n%JI=M}{oWc$}Rzgt9~Jo-mJLvdD=s_K8~_)kM-w)Y46 zIH3nJUuxqAEsn0D`Bfjj;r#k*SsY>GC0^fi$>U~*O+GnwoqLAdfS$en_6g7Wv7Vm) zqxtkxSmj&d^bLKsd*ZAw6lX^fo6m_RJf3gxu?^wpl9!$@XL=|f)K-5d zy)gFHY|TyQ?imi=ZFZ~IJUjk|zGBYxA9}JlN`0z{pIh(bUR}}Z&&AKsdT{!)}+Zdd;tOVW$Yc5}njnisJ`$ey;x?CfEOYNq6`0UgP%pYjze#dK}mK z)08;3rPo0xk@{wWUTv#SZPpoz3nJ0<;e)*~Z2h$Pa`Aho*el3+eCUHVzxKiW^V@&s zQyq^DcJK%H=+zU?J-0uM8$RQnAwRg5e0%+EW^u56+4XOG)DJRGE`DyklY4gj6JAaH zY`?5O6c@%oRsZXn55KDa@am7qJn+HF{?8noU&?dB^C0WO20Qq}>g#@ePX8&lht&_8 zw!?;V-a*g$-^k*iPuBm$FRX$0|26Juo)aux|6h4t1m*lC9pL$Y0^_!o1 z)BFYWhvhGP_4x<>c1~FJXUj~x=%*iJ`?d4mdb2p}_eWFX>+fUUsb5j7)-82=jo0%m z4(}4wx>;W+E)iZ&!Xs~%_~Fwxd%Wm_^6GLr$(eeDBrPJ+BSc7c;=<)uOdUDCIx*KALLQYt- z`83}v3Tt4VT<&?7pz2${a*1PuHgCcwUWI9%ccO28Y{e=a{9)7q_b=YR{vu)YS7&c{ z-Ubh#WA8s}s;`)Tx#IWN%&Y5>tbsnMZRa1t%Y{Ll=Uph|2cxC_s}9G(sox$4^w6Ix z`t|Ds;e$4>>YBf2aK6Q<19ZYSJ!~-LCH^pVgmg<88{HDhz7cXt@I`Nn6iwa;|&?D&5zi-YYm|BC+pv`775!jouge^fgE%4vQ> zkUB##D>_R3?-E`;;aBw^Jq=>n{0K4_)I|`I%4S=!Lc3zu@$NFCN2& zyZv;x+99*ivG+gCEDn2rx3qr#&DF0cR%?HQ4#;`4*d?rqLMX0?cPjh8l<=s()PE;& z`~TUO#rEpAJo=$%{<*@(PSNUCJrAOX4R-K{$&)tU>AJ<++TK&UzP#p9PfhKHAnOdpccY`S|LOPd=zy+W58YDVx`c-x9!MWR^OL9Z zcyK=b=!G?|yf@TtIW(-*bLdWsoqiU2w*SwwIIQ1GU-{t<=qh-iuv)kHSzqb<_nL6i z^@ZZf7%27M3BPpxja>XzpZ0ZE$uAoE~@>JNKQ`NmR+bt-p*8V@W>l0Lm{xk-+LveKsl=|<4&+nL6kF6VB~VVPI& zIpzGzhJ|5=AF;{xpE!!`*8maI9YwA}NYmn{VNyNT?SDSgP52_Q2@5O}5 z{x2(f){mqPulmHn)Nk{4wd)x_7r#9ZoZ8Cw*x&;CyO;diptu-LlTo?|*a6*F4r4ifdz_s{d>U)+eVrAbL&l|BwIv z9epsJ7h91!t1yJsPyEv+y~9IzZmBbWxz`8QuP;meKGDnT@%l?MOZy*qT9;&hWdEbv zE&7(<5GM77LJk;I^}n9*?Riua9rV!yy=$Kb9taPFpT@B*uDbK zyooqiIj`!%19?7z=1=PNu6+|pKs@s#`TJi^A zwjQlVSmj%I^bMPJfvhhS-;W}d{h!~@zi(*$PLO%5j->y*-+-5*<*A+r7w~bh{~rwJ z?e&@8JTn75JN}HW;zsf9|CP_cqn@l{5a)ORHrs8wzEIo{m8$w*`u%lHCbs>MQ+uvp zU0zP+{rG>6I}t=(NXHZ z>ejp4`K3CPhaTu%lBYcS1`mV}nx8z?BaV*Q#4Gsdg%Nk(z1@ucuM49-`S-bd)D|Vb z>iDzg`g`aqX34O6G)~{lPkoA3PjORJs_H-a^`&J(UO(*pL86yluZ(%n0r7W2_=Y-P z$xiC1(hZ}Y+WfZhCyWTA@7VIm3#b1H9XtPRZWf0ff7mKs|NnmW8-^Vo2lfNrsa``- z^XOwJZjOOc|241U^#>AO{Y9Ne{q%u#g68iNK6Z-mz3LgVj`{S5^)LSQ74QAm4Pn|n zBUbw6QLX>K;P^u$i-Z2;ckBNTCVpYH_BW|b->m*z80q>#aZ3zX|5=wJ$C1dh$GjS)dg#utpLxR2 z*Vp{{j+@*Uh7BM7>c3~Khn^jOXk>A){rigZKZVu0^-k_nSLyeUx^Ck1F%-AOMBDxo z9rp9oZ*?_wKKN?ar;8sxX!9oh7xw?%@#ipV<9{Bx?pnu&F|`ktxV2}pcT$19{`QH@ z=Mr0Qu6{7-r$j%p{f}3^Q*0=br*wg94x3N=mnFmEP6G7J{JHqO6J*_?_+fNZ z_J6r{KKm`hL=Rn`_^l4DA0Ei_(Y80uw>)f+?Fbv};O`!>%pVq6f92nFkJx*!^HzMu zqi^3o_GNL{_Fr-QUs&Z^(_2S%s3#xX4u$++x2pf8{f|?f6nn&9-(HZug46}#7xw?* z{%55}etX12?;ajjd3Dcc*SPx<@1z1d|F9`G>-Nj&^=ESRgY?yjpIfhX>Hkk_!YKd# z<;O8l>c8sqKic%mwtLk}`)NF_!))qR#1GnbpTyDks%MDaeEPeWn{n#qhx=E$M_l~) zu8;rKvmX2YCC}op=VMp={%c$B)UPO3`IdU=8+N)r>`>ehMN0kGd3YaYeadA{t1sd4 zJt}PY*m`S4Oq^E~q{VE@C2Ik)d~&hSPUdFIqHyH9Uj|KuxK9IV%Hr2hV^p>ajA zTJK-eI!o_AbIqsjn%JSZGv2A{Kj$&h7q%Be)gumiC;DbHZxufmzO6&^OZI=$9}Ye9 zg|TOi&BEbx4m|1Fn@>aE-v4V~Y<)vM{Q7sBH{gL%ZTZ$~ee?~!ZLi7tL-CU+QrZ8t zWDe_NLu}hW>U_jOpZKv2Rmb>R7wazhRnOJ~AHO1bRTw5e`P|r@F8X3v=`&C7J*oB} z^V$BtCyT@SF)F?R8e{#-HLmlE)w=ag?%VG#)V8{75~sdU{4_djz7G7VI&2-2+x3~p z=GVIMcfvH!r}begcG3$|w*L6_&F`!iRvGt$CD!@nHR!S5wD&!ISsZ=?c>~J++ECX) zzOX;?{(qe2!>c8BE_NvHig&8|&v|sLtE&IRLGMK0Z0bwmi5v>%#B zzvxpZv^rG36Q6jR=UptY^Mc%hgb@62=m_3>uA@h+xkydSD>Cw97i z>!0?GxUl~ZKmSfW<>GBlUE(ug%~MYQ-nYJdJ?pUTU;Dy-!Vekz{SS3}$2~!8`~0ix zH9hQH_@THb-YNB8d38OGLBFWi(1{Ll&?o+?{u@6R9eC!aUe)v90{XkhUGm#G3;p)~ z?r}FPy7B!tU&eg@{~iCiH!i%?e_bc9Bi4^R((QRi{lr19`DW8EMf@PoWAw-`*{Zvs z{&3OyPY?b3I-7-yrq2BB*LFXYdF=h?+$;|J{&A|>jRceRNc2Cv{^pubUQO&!+!ybZ z`ma3BV=VOvJtsQEL7(`oU#bfawC87|UC;7V58VazhsIAn`NUqg%npr1$F!Y)*pzv# z?{n4GdTc(eU!vt(c=QdsYJH)&KZ;cLKkY|N>jJeN`UrX_ywtXO@+4lNXMEK&Tj%xo z!PKAB7f#-Bqn~|#y@$h9KfHg!i|Y?(KWh8`zAO%V9bQxa|EsBfMX_4jyLaNDzIsD# z{I&Kt^M>Mq7^v)jr{n+0`mEpT_sox7#m{3I_I|ulC)L9SnFkwGfA^3BuDt)V%b(mm z(w*Q9OtXGk`$xm&r ze?{CLzb@5N-B3If9oB#1PI&e81g(#`*gT&2Q}j-F_^l4r#}D#+q)ziE^`L8dIuCyG zsxa(2{rhLFxZ515uC@QLJS={Dv^8FBA_)hxOk{+$-`w zhVbdTq4J1>&HHxZmQUTP8;W1V)KdSI$9Y87r<%;fc7Q&-`dp1WLCa%aMf1;T z=ivv@_o`=z-hBGQu0vkh{lI0KVb{+;{>ok7F8}{{Ba4IWA%9+vKji9H6sz_8N$vVX z<)?Ml#LxOd@kn%3^}lo-E~mO6`m@Dfm&^It@e1a#J}Te*P07Pf(L0G-ew96s{;8p(z+XGBd3OHc%q$N3{m~e`{&`dViee4)U;AmclY1QhDXmBO_(Sn% zOsMKV=MllT=TR>6cqd34O#Rlsr2od(x>%3-m5-j`0{X-7r#}1QH-5E282)B)#hycd zNL}{-YrR<<_WdKjzi{eT6svWM4oKgySr^FqLh)D>sqFs-G6#H~m%K0Xu03wN#1GHv zX}0H47je^Bz&_i2PUb;3j6Cd&>qcEXBDfL5Kl~qT=C$v?m`_o%Tl*Pw@q_4tcK(kO z2J;Zd4#h7cp|bzEuX7{IzoyN9p;!q+WjseMMonZoT@ctDa|ZV5jS2o=`k4ydLGdb}aq>X0CBi z*Gs=Z?}TSIeW~K3*N{B#6v@K|7tkN}yXVlu_j~HKuFo)ZidPu4*JAzb!gr2K=|y} z%&&ZltFGx`gB|!`$JZ9!E}OA**ya2akNwdK0Udk)*^|X#{g@(u!0FLfjHE$sOs^(E zZ12==3bMXX%!v-0&q62J~Q-&*F*T-$!{Kc;9cn0@tlQ!t)wOTtuUKCwejPa(9Mo%meY`Bswe#lJA9i{9@Y8PCV#~0{w;wxx)vH_AfBur{ zHWC~fzkh!*okv)$Tj~bw{ihQS+ZA!P<52u25-R(@s^nWga@7N!*u+7f_-$V8Cp-{7 zb(;Twy8qKpeE-hfPP;2?dfab+e}TV&dF}nb){Q@}>Ggk_Ssc7Ri8b)PRO?G^uA4;M zJ7J{j3&oQ$P}zUo9;_cV)d5wPIOr3<)uHvm1KA!b^Hh(x>CK~`dBQPA?7PGcD?Ssx zGjfw({OZ7m(c}0hj{``a&8PKe$*{QAQ*Q{t{JGj1M>iC|je%1CbsmnR!ngOawZZj_ zm(1hUwtAXb9LCSZCT@PM!#j-$H}e`x4D zin(0=b`IOEVKR?owVn_3%`<;44AzG~6i>xKW&d>^>qkv>Kqq!xd38CQYOmLpr*Y;t z|18PJP7%I$!ZSo~KKn`;CkPB|=?g~5o@jJVF^w|DO-#@P0;r&mZwGGp&I{uc_ z9g3%8eyRUXWb0cB(ihuL%|L9N`zq7zj4<9Vm?-q=YPWO^m9lO%c9=V}==%L@fexV5$;ZOJX%nzp9omA1<-=uc> zy@}~0+g~Vt7gI~yzw+w7eZ6TN)Q>)hzY5`j-pM?m`FY%uxarQPKaBkAF`qwZi%Y}k zrDv@7`j0PUzRK-?N*0IBr}c=pb(_ZNoB5q^y&~%i#j`PB{ZH0gA1Qh8?e(YG?k5g< z*B-~Vdh#UR5Zm-?YA5x0_n-aDUxlH*;^JH1dBvfk(%OFQ_m9l0m>z`=9*#Bla$WQIk`G@2;gvokC@mzG2`tO9t_6WbU|4DfDy5zwJt&iF_ z{GBk(^US9@W~;xGUKn@JFCHGd|88NzfBpWRQyY`fxBX8~7KinNc~Za5N4}x@OaK3# zM_K#4#^V@@Kg58|r}?ILv*fWZPdr8C)lX^XTOR9BG=Dxg-{MK0cfz;&utE6Pp!&la zSKM*sRX^MzOnvn6cW3?ZEw6Dq|Ef2O!@mDoHJ<-t*NOH{;@no*_WKLv+jiTKOx72Q z=cB{=PyL|g!{_+`AAcvbeyMIJKJhfqYdz-2PU<8-#W3>ug^pQtyliyYNA~|a4HzHV zeF-+yPSL4f*CWu9%FP-1~ zx-RS#)4Y=HWWGv&5)Z@I3YW~h|AnyZv%fxKml1EXAF%Hq`&18H zCrBN<|0Hgx>&Fho-0Z_q>c8?zpTBC6Zr}TRCw8fCiJ!W_62BojzA8`s#L+REc(Oj? z%oo<*?DEY&f5x?8!`Y+n{LQ2HqhqhX>H6mJ_owK9^bOm#TdmXTDPGJz9M*s8SFEoa z!*)Eu_G5XwA3e~!_PEudy6`}&uUV0&dLE=sY(?~{Fl==BL$9v4mVVniux`z>-+%RG zaoBN)hT6(w9pWwD5~pw2RqG4IOHsu7pUlJmII|v}uZC6qR2|EsUsZh8nZ&Jr>H^Wj z20Qq>mwmhM>y3S`?jEs9ZHbju>i3#w|9>$ni^KXgX6N_=c<l{`oxcIm}hYqpS~samHMCXOs|8VdBUk*KVXRqyYC2R zueI^IpV(zH^zHmBCv~Bl?;Wqd{da6oSgl+9U|su$%{(cF;^in(>c6fh1_y5J^`|DD zs{W(HdckyF{GBk(^G@fd-`I+s^up$6pZxFJd#?(gd#Z8yg9Cp9taOK!-hYl8 z{QSeTF3B3`lh##VNp+|*Pwa-+p?D?Up>N)a->^??)@6PtezTQVzeD`+LFxkWSAAY1 zZu+dliN5($yNaj&FmOGEuXlZI>9E1r$MfHPW;{Cf{wLAPx77c?t7UPp-HFxex7kka z@ju{RZNrAdSzjpr7#*elYrR~@06y~@sy=a$KHI#ihaVnj`^Tg{_@<-tEvUbH`7hnH z-#Nd_yO;mgs!Kk5!QSZF_wO^aIPCmSt;gombqK3`3y;2GSFJA;ugZKqqUD{U&Qn+0 z`jl%NR335AJK>qFJnF#@!lzF2lc##dTR?xs2x9L>2nsp3w_)F)I^8pcYewSA6@_}rw@k7{F3#N_p@VF z$0@?k#iq_s{3$w0{ddAEU4Ofw{vO4ISDz(*^g;R#nqPIWQ-p8Ge5?l>?BEaMrXP9K zr{26MEPu%#&-wl+JFmx%|MW@yb{wLic7oy^=y#2K%IgxWy#Bn#d47fB^_WoVKROHg z{*!rJdtLOg)aRP=nu5gPdnbO7c#2hg{N3X=yyG8lUo*6O`8BTWzwC+}J?sB$nWv=d z#E*_xty}NpK4|Yh=|isZrXc#E_;Ykr^`F-v_&nZ*=+6}0`d8caEf1ce`Q6}rizj(r z`Sii;dGv?w&F_DGv7U9p^fBKUz59Vnqhr@U&$Bp6`# zK(Bmw=4YM6!TI!~*FEHgP4=6*>uTM@u0He8mlqk!yy^a@)9CL?%Dr;QBTGCLh*J?sOJM zcxvMZEsierrl@-4F|XOg$-@TKAEqDFA0E1SN@!g7%$+A~xj*^#{o~v$4(oqIZP$)s zm2atQ-mm}P7e%c9)UR0A`K?bitwRxiiqzqq@>LIiC0}{O;hRpXhpk9n6^50cyKwdg zxBoTM!FFx?pE(jJ?f-SX@W}_glY1b2v--)`_2Cc2Ut*&5Kh;@7bXgCtn}*6K4*JB; zd>}l~^cu;0)T?^vqGLAk4*am)--a!@)hB0#ZC>7Pl^0&!867+R(9GhnG$Lp&_19#+P#eDI+569UE3Cf87uNpQ+jG#X?kh>~qDWQ$@xxDb%+`L^zYyi@`{Gg_eNzU_WLhgPr5$rP`n$ZO8wV7{>RYzl*`QaI*{<{ayY~GgO*1f zistVVJ?s=&k9RsRh#ofB!Efh(R{Z~?y$+~OK1{E_k!NvGk67hf;#?2LJK@l;bbXkx12mbyysf6#3TmpVi7&*&)iAO75SzQ>>9+$@gz&4TEI-ie?3412{7FGcSpZhY)2 z)V}yMuKuv;*1O%X`!^2_TMfJXss~4%1kXPI&d%Z}9e-=6Us0^q=dZLrJO9CHUQMwn z$T~yuujr`k|IBv2ZTGqOZC>To4{7H!kMX+N<2?W2rI_YQ;KwERWuz_aUb^ks3__g@?9`_G2@LDeH)q91wxGfF&F>%kw24IXX%x7`dlV@?5--+%2hcJx2B46r_O+FJ=dhXRj|FCNhy7v8VGLKz< zp86ECWZ3qQ^bLKB=aR_!LQzBq{V={09>3#&&-QAlJmMfaW+(OI2km(UPj#(sY%t}` zqrZFD%*U?y;X{vi4}bH)iQO;1ioPBH>B-`-evH!N54rk5=2w(xYkyR^|0hp%LeU>n zZ9XSTUa_7$kL>ezQ+cXKA3^fXX5K1(L-oK~yFPk`s$;gx;-H^Z{xEfwZ}i-L*~h}# zTby^}RKi_S?2glwKR-U-%rjPdA;5E-ae>Jl>_?((Q zr=PzPzpw_j_ndoz-U-$ftSvyG zUTiSsCH}C{g~#r7+$GP1`eSQf)wp4{*SNj^X=ZWIr~IvW{?%}M-ZwO!%p=~9?0?1$ z&a?H@Bp&(~ig!xezv^=xOZsB_ea-hybclng-{w_acp%#m>omXe(KB2?f7s^Ye+|3) z)Yrlee|qe}v)*|PJ=^~`vN)*257Fxn)YPviR{557(l_j?^@aYA$$UMk@7htUfyeb` z_2&t8iEaIw-ENy-dDMfy6T&xiqF=Jr-$^elb?*tQJ#y4uVX60LKlkVUd(g4t|MUS| zPOtx#>%5@OC*IbrSAU)3Z>&2P4)d`chyI0QptSv?GfeVWmsh``>JtZvr#61(w|Jh# zVcVTky9!xXRlLFZ=K28)`o54*iQnVyXYSzIwu^Z-&Z4546WwT1lm}{_^*`4*nDDsnD}0c;K>VE$z9IAJcIL4aJL!cn`+R?& z>xL{E#yhRk16KDHCvj{hWn`c9ppf3cWQ>c8gI72$i; zH%xf-hoW5d_g~~wSMT8b%K6BzLUb$QVTb(}+w=W3b`D=${fN;oZ9Id=)n0!aQV)Ky z2DbBNyPvps;^!8d5C$M&~%eH z`sf*=r}K33Gf(%B3;%fcUTeJ3z2uMsyLb8NHsn=({_jcd|b8x=Jsn@m7hmN05e^~XHUmthHC+CD!zf*jB*OP7~-?smT&I5ME-{1Yx zo}V>|b8B^UX`W)~`m3%zj$Y_rJO-@)sm@Z8SL%oI@TchAyyd0Gmr6R+qk8Dtx)Xnz zpW0!Q!~Isf-l|8b2DV@NwWri;=Y7sqyW;wLc~ZvQ>%2dPU@q91uY^|}q-XAj=Q2pWar@YtQeSA;& z{0IMj;=cE~yv9r4KhDnLpdQ}0^Zb|Sz~firo*?zw>#tLPuGkbrH}rotI!gU_!b|&O zbu=}e@ahfmqYqLSh+o+M;ruuH2{K=5s~_De3}cQv?z|Ugd?Sp#=9R1WIQmD_!~3?< z{-6IJ)OFeVllfVP>8&Up>{6cUg?OcP@#(H@C4J+2|6Tj7=b)yHe9Z{$GlXaqN zdOFX%`or|0-#PYz$CeJ$XI{F_T`%;JXWxI#&f+MY|KQZG^@!EFP3xg==C5gfLy+}_ zetF|q)qjpV!>2zyFHHYt(XF4_u5Wo-H+=Ia_238L8#>W1+4JZRZnqWRytKF>jQ#r} zFONNH1$3KmD59&bR*eh~GQK zKEe9!g6MiU5Ug8fM{G&O1^uq6j4R?6*@FOpsi;lhj@5$n@ zek?nm|9`FXm(KsMsV?=Y9s1=BzRgFyV2|Wg_5(fTu^qt&Stn?Iod-Kb_=eQQyx1Ld z&u=6ILGJ|Z{0Fs} z2ef+nmyQYOdibE$0pGU&W)_F_MS1nR#19`dURQgZ^Niu8nC5vW`R2z4Ro`QS>JO7H z+svJ{$?UM=#6vII;R_$o5BvVNFN?z-hlbjDJBl^Xf5$yR>ayd{8n?b_9oV6NnV4AW zzw+ujKYgKIL*)?%siO+vfowU3=Vk)T?o;L;dhT(@o-*hYpzP zcknY$So5TtKN$75TA230;^&+(;BEdh6OZ>~wwDp zt?d6~nS=VtGt_yBgVbX-{Yv_4@tVv-z2+xR=Rp^q*~B~WyGQ?h#y8)47o?&H6;=y@Z<&I~Y|Ak!r ziek0;@11x|Py3eoiL;*#{maHgn=jST^C;53rn+_38&p$N#mDZ1+}A|A;6u&;3v4VLtT0)Q@eC!^|uW z<0t(zzZ0HMG2vUh!jE3q_?lJ!dh5DZg)cS#`q`HcYi<9${?>Fp#XO$={O6ObZq*l7 z>(=5pZTd#MYJH(!PB1U^Kb&SFV0Vu?F7%=zOVd=bumyx``e7$HY|Y zKXJwMcLY3c9RE!Fh3%bGl-ksbPF39e)T4Unnht*DkynLby=xYJJ;L`+F;}Ec#q@VurlWD}&@V!!FQz@NrSpH74^%#SpilhPuVns|56}Fp zOZCu&XEyN)K6+t!x6hK{nsH(LVQ$(^b6z6fzJHmS#lhzjU(@&hHT5fs)w=ag?t}GB z)TW+1v7OkVUoI3{>c7U>KB%AVmhImsJdQiT2aN|`k$4qapIHa_6?T;l{*C`~;^iBj zeCyzfApVo;D!UOSlLimPGDk|CP@1z&H z+@1gY^ZlO;U0XeVL)ST1dd;)*|C?DHt?gfb|B|a8bYiEs)Wvl>v2A;8N(Oa?{_)XK z>c2it=vzIxc3T=Q~@T^5BCOZ-@@-F+cUE`3vYD`2U5CChxbzIbq93x4Qn=F|F@^`Twa|S_im> z{{L65evtVUCHj&6PgK5BY$%eacIX#kRsVTkZ~e$s2Xt!B60G+M!UyRqX#RX~zQw5* zbizl+V}mI#@rQB8AM$9|)X8DoTYLK6-^@YB&VTF6;wT;eZ>V2Utk(Nivz^?lJpYRN zR44Sy2eqpHa~)mlM=tvIeAM}fgFf+N8#c2zjGv3m^UM6y>og87pg(MW&uZJ<@VVYF z{l?eczJAlaOZ}g#^MKkX@6@j-R{7Rz-1@J!^-J|wU+9-VYOCr$e)!Z|!XBxmzP%uP zFy-SH_J274iu$YiOI?mHq85Gjnh)0H?WyhG?RC5+I@FIINPcSL2QAL}s53>E#X%nPn(boP zJ~vkRyO(@n>N3B&?+4vWopj00Bbz6~qi@(eudQxfZ|IjV z7)$-v_19MxJ?m3VYdGv?gCx86d@Pl3p#~(Ir z#wm-m_W!$NaabRE)E*b#{>-%cYT{3Ctxm0rzL{V1%+5t2^sf}}*t|~sUXkOdr9LHk z#6j{?8$W3AJd4BXa~1Y~)89Sv^+g`P?91c3NAG{pF{khIDX)5!??0#M@dxHr%qoW0 zIDNxrKV|D{h(PF{67Q7yuaBd!2A)^Fa?f;fwVj~#ab~-1e$``s{GAZKq0U#blRBz& z!!q|DyyH!m{X7g^_WdWvd~XkQ`21`8omsLDzm8siqoy{f>k)73*7W3St=9f9*L?b* zcIcNKOjZBW<9O(&n6B45nGZjr@{~vSb9WPf^_5CA7K|n%Dfh4d4BqWf)Kmr5^@$6w=NRTq2OC35$ zyC6-9UPQ_RiUCoIfI)Zw0U;B=FFMXM)is!wYK+ZJ$C;Kb>$kb$oxVfud4dbb`O8=lqW^4m+Kz!2d%yV z5|6$Urg5IU%44$XJMo3JfAEd-PwBckjNS0D1M{nIC*F>K>O7cter){xJKMfL8(AFK zMQSa*lh<{*tg5yCs~#Te424{%Tk1bN$@8?yYgo}wukqwb`-mPizN$FY!)tsCs3%W& z^3m1iK2c^$c;U0FEc(~Q?Yyp?f9%cT@ZbcM^L7-ewX}M)AN&uO>7B-_o%x2M77dmC z*X|9)+=ZsupG^A_*c()d)+E>xGmd3Y{`d9Vei9Hk> z#22OhYaGu}FZD^|$%h^EYENbKy#HoT=4E>F{D&~Ce%O&O+_v*7Vf7Dwc;gq=m`OeS z{^h(QSXD=s4EkVv$$D!%$@>wC4I@*j|4tIrwJ#Q56TNMRln*=THQr?MR{3MEic>wj z>3lossUzIC{O(gfzGx%7JY>QheYf65oNa%SJozu<`FCgew9YIUX4iQ7X8MZxg+e|k zmHpT6aq73e*Q5aWv4dXgFq!;{=t1LcD6huD51%1?_<OwC%`6V~Q~c_2 z`~Q^XQ-1MkEmJ#vGkq?;-U(Vgg*+&ts{ecs;ZM9F^X(B{)+78ut3&-$J#vbrJWleM z{v*m^y%EE&yYl-(!unee2&1?8Cp>okv6;nT{g>Ml?LJ3aPndV|elX9vTvnLDZ=Y}C zYLX`un@GNHk0U>^Phaf!7kvN+-4+i(+xyW>>Zys$k~-^hSwpM;)RkZ;HjSaB_20oL=v9V4 zMeCQ=gA7*rW3R$Au2ctgDOT~I533w^)pz!-9Uq22dfvq=?DGfs?fDzcEDr1U&>Q0e zhZnw-4v=t28jH)1*E z^+x&NwRwC*ec1A(wMTAt&DapWwDuq0{)^?Y>(9xrXwU!5vpA?zq}DR6r!Kqo_Iudg zH|1sCp^zKJO8p-o-29HipZtdEhaL1z;!_zt@4wk=;?F!xulyE=47&C>;!}V4!WLiK z{rSb*`(dk(KXmDxFE4=~`~NntTzK=D@$k=#R4>-@lKgdjZm@UaQ{Df`(V?>cS~&F^ zPes4XPxG2gzZB7fydTtWdg7P|xRCm;A*-!*&1bJaylcqo$6fog)t4rp-GA2?+mYGv zhla|vb`)to`=44@T_*s4IzQx4d^%E<`tKwI`%%^p)%ygogV<9UJ^9V9bz?Vuk_WrZ zE43#)_`_%Se{H+SkEZQ2^eXIj{-ryM!}`JfFS+U!MQSb6dg|;qrS&u<#_B1A(DFI4 zdquv-@h`3a>W3X9K9$i^ui5h~4&Im4&tw9@t_5@*{(r*cg& zeZMS@yo%^MA@7qR`QSrV?8FyFUjC*1PrB~XF#5#Pf4}^aZ^CQ)|8ry>>2n#a{|&X1 zUs1HJ?NRCYb4}wko;pKOkA|xLm!7|;@#e4j@vHw%^Eg5Bc$G`o*Y1ZuNPkLkn)jpX z!;rr&w#p^%-X2!kYoB+fH#dNv{U3e@gH}gV>SVuEq*lLE+4g^&#(~sb6MZN)k1wpB ziC^jdFOB1K;nklyLFQG3_<_c!^_iY}sS8|4ec1ViCnlWHHzn*j{lxeC{1oc5=Pxy~ zIBb0%aN@Ib?*KT-KJU5MS$fASLE{_IE~oQD4n>HLRsHXH{K09S9>I@%{6XXCcZ$r% zQ2k5t0_wwv|9$+50|(t2R^R#Wu7AhY{^xE|zxAP8tLeM#2?`hXqu8gDZ7DxwFCw~@s`Jp3lRXkO^=-*8m-)-TTb##7BbeY#9xu{yXun>vJseH$LW>^waz_K9#-d$!GRFi=)Jk|H2}5 zYJ52DmTT_Yr7pu2_4&h%SYF2ZE7IKW-x{yfuKDc$7o6%r z>d8ePiYf7h<--r`7C-9~@hSF*ANz6mgUl05^~foDC%nX&j6T6MK9$4d-9~P<|EDew z`Gw79vE@bZ+4-+p76*OJcZ}b^CtH8CAH?A&QLX(UyZ_yZ#p+M`7K*9S!T1yZdPCxw z2l))uA3JFI5#0l3Qzwz67IOfNHd*_NZ$HLG4=E7co z<;kze|KPxH{h~hYYbdtLJ{+a~!y~&#)yF^8kNJT<(IZaVcmPav-%0ww~qfbvpB2|TAv>jpZ~L&tsmMq@t)f^C$9{(-n#78RNChlEEPQUmah78hI5Pc`a-_U7(&FhgBJMo3V`z^P_V*m4KSZS?M zkL=g82=SH2KP&$Ki#Y1abRgH$>(8AyuwO-;q1ZYaD*LZ-_}hBWR6ezPMb5k74^kJ1 zz7yhaNWO*b|G?=-FZOGmhepEetVe}Sj?fumE zZwPB$F>vAyL*9qS)_>-u_>rG~rSqe2@Y{N;yjD-KZJc-IBywqfxdetnF7Pk)pMK2}zfzwx-aE1Ly!QGxMaCbrI+WM+&H4MAojO1#{>Fz4 zrhbV&jDGyotC#88H;nnt8$bKh@~!KiIsVX_#bNtF>+TURz_s* zX?UhnmHxT-+j^Gxd#82~J2KcoADY_^{nDW;j}5z@cgUi@ z{oan$W50iAWN~oZ#gEbJ-x{h{6sfhoo9yJ3{r;jJ;#H+jq)c?e8 zeuo{RU^y&OUu|qUe^`CmIPs|(CeE8LGlsx!@)<=yqefRwR%?^K> zFO{qO&?|=RZv6TO%Y6KVu;U+hUTU+gp7*M^=MOZpIQSgq*T*m5v*?>4`D^0Ol6l(q zuM>-{U&Q6255>aoN@b!p){SZO=2T~VkdN+T6v%?RnKRg~8O#Kplc{K#149mPi6GfZ}vQk z!}y)byuZj`g&w{t4D}IDtT1WV%VEkDH$Qg!2d(3u2WY-rQS;-M~FBH2(L#hAFuUF#m=ks9b#BSUFo7?>@4n9TGchBG7>`9z=;!ht;UO;`A z^7w^Q2R$$)OkHoCH?BMXW#VoBbGq__gZ2MswNC33b&)qg?whkZYs%XYWPYL8H7blZ z<&n!nTgM+X-qwHhhY$3L9@*;Y$>K2oTx8x?(^t)NA@yOs+95x?V5_Uc`mek)YCqq4 z{v`MR_GleoJzjphqSdW=#jCY6yAxgM{BJJ$bbg`uY&4YmuX#9+WBq6-PgBteVh4Sq zN47efSsdnH6B)bdwGQvv?f5OQUirhgQ6n$^+m5e?bvL-|>UAF5o_^T%Z%xeua{en< zy`o6-`Tga@QriE~IIEj_3i;t(>OVZAgwOgxpTGg_cJtGClhu#>=s~ak^uuK0u!9S! z51;$oi{{NLhSzxdVfE({z`Vdv>=6y6{wI0t_qg)=1mVLE z8MHc-7kwv8<1BAQ|2y%86;|A(@1Vu+pNmKQ^N^#@W_`5nPj?oF^<%`vAC61#Gfes; zQLW<>`A>|^BEs9 z*ug)H-Q_2@jy-i&OC^@<|3`fc@S{<_>&(K`N+ zYdrNeL=J^K_`a(DrQg3!c~a~VJ?F9MAL#KXe@*<+LLm>HEA`)re|kPK ztph&l1ii+aOkWhygVX~b@g-S#7gpajU{d$Yx&QmvHDKz*smE>b5_#z(KS z$LSyF)KBLlQtR_jI{uKi`!nBAG^3%^f90hP>yuL+F!AF!F8-kPOZ`(la*Ao3PxB&! z)PoGFK1_V|TbFD$?yNBJFH0@D`{T>QZ~d=jaj>4*_5ac@RI_=C(7MBfSVH%#*6aRqfb(?QZ z>{UzSt9t&1#xw6w>=&u3`d_*aB$v$AFXr1T`ueqk@PpI^n!bDf{$__Cbm9+>M+Q^B zL?1pgX#e}JfBfmN)vvO_XD%M#vFG1T&*HFt4Atk)bgMpBbc&i!qFLG|Idafde^7}gg~9(2h4 z!&hvbf8qRlGmFEHZ>=={^`H90yS4OAUeh=HO7qLbFBAvF7nS`VEq>M~C$a~t`lN3i-UgUSI6@&2iyD6RJ+bw zq}F=xooH{wm+4iu zcqehjgA8`?52HV~HRI5pVau5U-dI{)pE< zXq=t@g0GRq!TZ4cLLn#2Oa0gRa2^ML>r<|L%8woN8gDXnRQcED&$qCBCcmNP4SPIt z&D<;6W`})mTxY|X-@S^w#i`g0uH z`beJ?(WmG&9=+vNe)J&gh53<3{oymL(8E`S-2a~GAYaGrPeTH%P6vbPsYj9XR`6RK ze##e$LnDFpUwMfuZGSW_^-t&Pl?Oe1AbODXfjUi3z0?IRq&{49{JV>6xrhtjnX&b2 ziw?PsdTjf{{J<4-|G$>SL7gJCmhgBdI@|xD&Rq27r+lH13pUe!dMEy+`>){x)sKFG z#HTWP=3#dDnpqs?=l-*DxNzFcXTN;l3E>-)jz9hrm+wkHZ2iwQpJFbzpEg)844wLE z-6FNVkLjCF{Me6W-k~@=hF11p9*^GI|EWnP`UgLJV5(=HAbz0DOXp*H-WR8N!G+X! z4Vm7hkpR@j;$ZsH{!rdhu1o|Zvwl;Q@8Sj;>Z|T>c9Hc3@!2~Ea*JotlFhx(^_%-_FC(tl-g=w7Ee5OZMtl~i*R#^1rc94h{aE~M-fBn24x+C@{6N+t z(DdYST0gju`Y`dLcSdeCY5$OCXI!-Nl_!(0^!r!&^I`G!uQApi)k`(4^*@~_eKUPc z>sMrcp*SWQjGy^|GbIjx-aqzZed1TwBU|``=7)ct#liZ7Uy4@0&f7bUOXUUBhanq= zOQ+BNV_4~Lw+#O0aZ6bLIsV_P{NP6M_`@00X-IxWiJE8oKT3I=VndNQl|ylCG?ecjdue`d;hz9eq?N6S? zL7(mTf1($!x&78UOm_0h_WxD(U`_1IFBHc`MP>if>;H3!r#|mQh7U~j)CuATvL5oe zGW|mKnf!*z6T(e5Uo`k9dxR}6-R7nG#ZO_k-(TcY{B(T$$%(yIUoDG+d5F|nqAwl) zB(5pG%r6wjM@41- zUjts-{x>yGLvi(Z{l6V2C9hapOYh`0_PWM14-h}(P@E70EBmkS8~*giaDe2+4yJji z3&an!&r`yOKYWJjXR^!U=;Yruu>OffcE00(y9Q2Jx+~wp<8RO3?ol3a6?p)Mtsf1w zC;3G?pY1<>Xptb(P2dW=^1esSA;s;i}pBArqJecHPP<`kg zv*f-fy*Dfzv*}ikeRH$&^H*nNac~{6-&pVet*Ks7r1H``(a|@npLngCd57Y}7*OiJ z`tdsifA~w-8)>roOhNoXt0T#uUSEeln8sON>T{BBLG@ws@guK(f7F}7pTA4}>38-g z&aQuMDjzs7_WvmReCBFT@{4w!^?wcZbBZ;=^t#eeoD>b#|AaR^o@VQ*^3kUhy_0|gH?ek53<+nKHgSSc#Uln!@?!E0}-`(dfsjjvEV&A{LVqv}Wlj8k%4_5YH>n-uo zf98`bFLFcVP@EiJSpS{+fwM$rJ~nT7K=rGi*Y0m|PV^qEsjPM2m!kQThxjV2@>6}7 zvcsp|`RY0+gso0G?XBt8-vYlK|I~U|AME*`c@~Gg59Co4>%Un4iEoN0^9;o)QBmr@ z@^Tylf6EV_`e6sXi?o&B8~B0v|JV0Fx1R8)AHH(+`(f+pT{~^}vm3~3-@gs1+xlN| z|GRk2?RU;AjdOx_{zd!aRiDl;6km)1rT(iw&oQ<><K)?j{9|_(hxKFhLjM2w2FgR7c_KTJL-D2f z!t$x!E0+F$1il36Gf2G2o&0NA9Ho8}r}e;F^26T2Kisg(w}0`=pA86it~Tk9H(c3- z&yIg~XK`3RR*U7Bf$Z}?d}_0_~L6g}~U<#VF9xH%$QA9J;XPG$A0 ze^+GuLF3c>rtg`*zuDE_;+^=H$U4ooa!6xcD7rgI5iro`d`;L>xbqeFZE*w(R-KV)%x%QnHOk! z^*5fB4;ifRhcA5R+g%&n|Kh>n`uEq~Y2e~h;I-{fZx)B`|1?yFS5dUBrOj9E^v(3} zC|W(mmnC1f=5dPf3=$cC>wB%;4?E~x5~qCBgCEHH@Zah`dAnBn^10vr)XTr_T6x*a z_WkyuYooXAPcC(%xBFiby?B+E#PQs6Wab4j?@*i;NlNR#6Yli)5%GMF8-I`Zf8_g* zG!ONpX#Bi?>Th~vF!f9HVaTcX?Q}-iKCE=fo{#qaX)&*Md;U*f7KfdOZ>XH0Xj}W6 zR*&Yd>vbdECGl#fkD>TVbSU*7p0N^7UrOu0@?!^mqEG9?4@`MYPrb?qukn3EefYvQ z!)G7+;>2)L*KZ#@{Yy3Y?D%sdi-Z37;rjcRJX()mY(6&FdSWfwOgYv;kUX!tt2U(Fk;r=^@9sI#c7lr2rp8Dd* z{aWXL56t4Q&vjGfHTD1R;8hfBYnj$lZ)iMmxym&`tEV_4DysTVU+}l}qN#jNF&E^x z6aJvpr}d!kglSw!CjSERhLwj7zW#U16k+8Rm$+!}kN=hR$@YKd=zJ8Z4?jcAC*G~4 zck-I&_9K_-u8ChL&Wtar`Y&Fs-+7i_w9!f*F~P0!-6=O1c4_<`b8USh91u_0H@FBIoQ z$IAY1Aw0~-cxuY;+U@3-_6J!JKhP@=^_reI?BGY#!`C(7g)6uH{?eOuEjMS)bxxl8 z0r~9r*Q_iKo@Y&*cdAzuskQW~H$Lqfa%!iKq4;VfsqFt+(cIb}%tf~CkZZT6^_on- zI`zNV&R42m^DV4CO#AI3J%1THI_!Mbpc_B&!nI!cZ2upA#eD1epMS}8c=a<`=U4jw zLG@$*mcE4IYcaIcfA!}+O8l+wc?`?E`e6rCz4fcA|25@H>=vha;9XdK7~{{lwfE7< zVeFAteDaNpAB4~Lf4a2}aE*BV`Fr?*;?-KB2kD#Dnar=X{+}C3O8wV9eB}L~homOz zu=hjxsK@j(#2-0D{NXc%&q@4(>O<|ui(k5C^Q*&pqp!N>%j@!@5KKj_kWJ&(RlJ$AGIFS&lG>;6fI8qJg7V- zFQ7h*JnOj!4nE?GVdRXj4chUEVeoVQk@Mpq^;mzpvp9%jJr-^2cR+pO{QWF07kenC z#}~%$M2}xx=V^6d2i4B|RDV$7@drKrV4lTMb-lF3DZh8>4=$uWZ1%~a)35pR^f2k5 zAMSnRqYhr~Td@6}>6&jY(_6XvDyZN6JHp9>5*?NyYDf<9k}j{Cr>)rYn)wwI4g^zwEx+ydeEty3$moH`i5$E3F`b= zk3;c|=uqlEadRY|dGWp(CVutH+Wjq#yo#ppp1;4@lQ>U4^vGZbeb zi~jTcC8u$qzCY|wh2ophp|byDB;NYiP`;+36T}YsM2~D(%i=J9c&X2cz4U&lp12gN z{LzQ?zjx0c{!#o7FOT2;)~bbHS&eSy9S-L)h(z0 z>38I@^}jcZ!-Jd0{hy_6zBRG4qz;pvyyChw?4|iBPbe;o0hRqd$&oio*`VhYWVmhe=lqzv9fRt_qWHp1JQ8Kf2Fr9OtdcZ@9AFf2aMzKhwed zO?L9C^#4<;hnM+<;-VN(*?--R;cxF_E_u8Y#15u<>ICrvjaU0;`VRdhk6{P@u+{CK zUVh0pX3hQoh0$AGa6+s9d&@lRy7L(-k6a}_0mAn8nyOFoh;}~PAN=wpPUTQs91W%Z zYaG{6SU+mY1F9c(&^z%nS^YZpzflpVd>(`k8SJ3%8h+HhfBx0uJ9n+}=+&FwJg3!v zc?@}0>iIlO==s@!S|>TfdU|h2oOPQ0l+V2YRckCK{_p=WBhQDRyMg>PY;t zS793Go#s~_lT}~EgFXyD?~<2iP1!B1(){%;8;shXc)R|#nZ;r2`vATFw4r*?iJUII z>iHjDtG_0m)B%R#+tEo*AE4|*qhL+a}mJ$@-NA47PU2Qt_}AI9AC_~p~SJ~^y)!9KIj z46Xkk-%aYUdF3MK7sMMNPq6i^DSDP{2TXSIiu-oG%C>$r#LhfJAwO8H|I`oG+WpA~ z!qW+@f6Cj*AA1^S^PxWGr$}5ChUfo&=&pxdJ0sKKoxHH?-+D#1{m*gw{MCl)L8r3T zA<{hGza!i2^vmXxi#-&V#~0RrCwlz2j*Izt?4ahy4&s-}=vg01_GT7GN$*7OLCtG2 z`V@2;?#*>Q$J@ZUu z)00;beJ8X&6Yn%{DyzN|Us$ew^^c1sZVJn9bIRu_!^`H68lsNoL{ZIVrUu*ZbIQ$e%pU>al>`9z=;&1N@GKfDisQR$R z1AqI@%Fq2SjNI>khRmGvrdPXt|2DEX*gweQ?$2vyRnA*^C2F4ip9S9jTqXIsHLq*O z`lcdVKd7(S?uQ-pPVA{{eQPH6Tx9GPr}Ohp?chS{!-nt7y79M{-WfLf?sgBq@`u*{ zzwfF1eF-w(6g9tixB5=~rSp%u@@gFKXDI$R8miWRu4A!&IOPGA4?F19j+}m!TEJC0v*LKyw@)!$w6x~qt@@8A8hIJh3s>LZ_`c(s;Rr`qWo z^QoF&D6WnqrT#nNWQ*@v--`}a z{pWLI{m7LMbSl>bx$g#lkh(zhoe+OR@-Yu&#ZG*|AHM3}zVY~ku=((#?>g?g{a1d@W8zOd>o57RgQ*@lMe}cD zalmi-iawX#52x`R{KJObkDm0Kuig?i8gs+Y`_Entp3?S*dbt0w+0HLgd1-tLy#2X8 zl2rD8ZOLK%$VImG!?oMZFVRyMh#zQt+CS5G=%?2_h8_IF_?aI*(c!%?{`5CLz00`g ziDQ4KwEfvu?|wg;J7YaFGZ@e1sLCwQ?NVZ3Y>PNr0?+F>C z?;!e4h`(V{m(`inQNCNKs;F@~yY? zxGD^r-t_1$_pkVau-X21HkR4zTzD$aKXUvt*Sw(SBVMfY9RK0>FLe`F6Hn?3#f?#6 z{d8i7x2`h&=nWIQ`6YUzDfY>^yAwxl@jQW^?;rP22u-XK`3R2I~E%P1P%k z)LMEcuR;2TY<*)s4e<}f%=p6cInmqvW{AvujHju7PH~1{{UkyBLF3c>rqAc^Z+7Yh zo%kCcGMM@$`moAH>)r5X?>oUAdD*O=T)ZMY+<#H}{%vT!yd6bqZEsSU{c!KZVtu1N zkH41obtxlJ6q3R zIzZ>um!S5^_BV9i;?-JuC$E`beY2eh?i?{U0Fl|F!eKx$x5$>ZUH!JMr`X zAq)>cboHk`-<%SDHT#iY?6X1+uYLd0Z?NM2cd4qpWFGu)3;gK$95BC7{4kP~`tO7r zJ^tx>VEoj8FY@J3TlsHWynE&oUgPZk$7U7>`v-RZrCaqN`E${;^iE#Y zhYQ-gYbrMcnO`V=6cttd=RQ9C?R|vLwey)@qPIGYBtQ4Pr@(Wwv@@&4t>essJUfisH`qdOY^A5$WQBmr@ z6F=4i{O$ADRD0sbb64;OJ$^~wgysKk`xDk2chv<;{nUjqPmEsc`0qYUUDp3zsmOl+ zZmPe3bJc^)Ls6{r?Ek0df77^zM1H>4ohvK&QqO$**kNWL%R!ex*ubzt@e~@{C=sO|)hEDiPvg$kWb*=c|lb=6k z!|`1MkGbf?pB&jb{<(+p_9dwEO1D2=^NDwBiQf7@Ky0?%NaRrbBsx^}zrLl!GcWiJ zl@B|J-DL7s`PU>5?{6v2sU3t58LZI57moO!jUVc6-Vshb=c*;#-mT{k9H{)g3053` z6R+0#pRNz|&GZ%X3&rh`sJOh`g&w{t44*lD zo3Eex@>St8?;iK;x2L!Me__8Y4x5+Okw2@)ADr41MQUyDO;&qd<85Blt9t4S#ZO~s zRsX&6;a~MR#E<_S!yioRK;H?|IP#cY>qFm(FAVwSv}0b}{nfDYgO}}i;d-Bh$BsXA zXK`3R=v%5MFLqG%rQ`p3q_y+Vyw9PK9d^r?^7cv`^Rn$&Q}oo&_f=g^r_=)pA3JMlL?GMMziC;Bk#q-(zT?)MG|t89Jq#@F0!*EQJw&-5$~dk)0-`2WjK z(JxTETI;{nqxtK(czBl}^)*Bvil4<7rT%MNJ&$JVXKDRcKKccEm&B2`%D*Q5%)@wy zQ$7zaq&`eqa{Y1nb+3l}d(Vy7?}MkvXV-uBX&qogW%4SDwzc%C2kUtj2mHKmsT_(s z#jjiaT|0LA|97wc^b1V&mUn>g;|JRNs&?WnPV@Y``mW{I+hg&WC;YZ+`4^Udc&~#l zfX|Nq&&cAi``y-w=ijGWeYx0KddDk4Wcp_H=W1tuDu?2(Xeh1!_<_9|UrW%5Y<=u& zmrW0kBKl5f^%Jl8OpmPCi7%Xg`QP5!>Fq|i?3NqvJn`?{@Ywm^-YgEbr#_F@U!9!x zpEyvWTHB-gXytK=xgzsuh#ZPp@rCstyAwP7cK#Q;VyF3HhX+L834KzB$>87rvEXM|6MZP|jxVhL_$$(P=H(MR zeAq$!QW-t-Gkc!JVc+jgW!`UOa3S^K^>K$E_UYLZ!(013`11aXEa8=p>pzh}d;Yxk zi|u$;G3W>Pt-!;4u_K4#=aH$@f1O`_D@kO1YKY8nA^5R_sUF$#H?ug*pMENq^iKRO z9{)s7Tor~gN^lx4Nh^nLpP(;t!^Epznlf9QByKqRuMbF!SmQ z=011+{c!6>TTC8uNbCCBeX}??-()x?9{+r&wBBi6iE1sq>e&uhow@j=awzVJhRXhT zOC0mD_@wW|vmM1BOyg2La*Ao3cT$h(krmVUR1WK3{`A^kS$;~`;Mnsw?|S4-cx?Z( zPx--y%C&YBskOZ^*~u%O8wS6L+wuRGYxsfU)mqYb?4|RMPJH2|u29?`4c34B6!{$C&--WlpV|lPpx5|R zwmQ4BILzJ<+4wcy{Eyg0@`j8L3@YxY>9vtdZFF0+dvri!2?tkyg;;`*Mb-}MF zUgf1%dwor@A=|u^k9mjUf#_J(|MdMrHe|jv$wyxJLF=RXqwj=ioXv;4PV%Df#Mial z^wEQVvfG%h753fp_j`YLP3B?8KYNr9Ts5x$V!hT4-nqMiGF?Bo@FgWujiji)c6crXT3_J6GK;cs;`Mem&; zb}-c=TRqM8IO9#`W&Te5J(&8bp8Qo9K7IJdCjao__d+=1ufMqEy&>?k-)7sLKFz27 zviejOYpdTTYh4Te|7#D$&{F^5`M-MpWHL|sM}NGN85(MRCS&iQ?^^kq-!6N>eJ6Ge z{nAV8EcMCvh_~%eZx)9g|C}68Kt9Lk0Ti#+(mQ!gz4a|bW*#7R zjQ5{DWcBI1#cMwM|C+z9ePg~B-;_w^7m7zDU$^MJQ-mj7|FMJW2OsFwp33M!v)8gX zsMqvP_5UFZXZ`5GSBGp=3+K+c`}_eDn_hYC{BJXhgZ04c?;qQK=W5q^Q?F=S?+5oS zdZ+%${CGb?F(*2d)_;wY=()VPydqzU)~}}cA%j)^*sCy&^G^6pkE~e5gFdXbWpl(Q ze*Bd%;@nv$?R?uYUgPZg=bkJM53a7~e{c7VEJx={LZ+I24 zgWg5j)_LPj{<-+WZ#={)p9dFGAAUM%!}CUr91vz5c=_6ooHv7fcKp9Li=(yukM}AryuU?lp{6X{6yy!b&8t17?c}!M)C%#boz%O&sD`$ia2R{4t;geg}pYNZ= zVe5Ub${qgyk^01Yp6x%!|DEz^fAIIJ55;eyLskFlQzf2x@wqWn9_*k`^p>}x|M;_> z7*9o?jZgDfe1$)JVf|Y^-_teW&amN1qs~45@!jCD@89k$4(rFpdi@)`isIE;!sDIj z=v%5!=NF2{V}Rw;c(0gV&qlnV`Y}I{c$1U*(Sx?X*UaJ|zEr zX=EOr9C67rXD)dH{Pz1RZ|Bdu;`6tiPx!^Fyu^>b!Jp2Ld57YONK)#*&ZEA9O(|R3?94xs3!S&U+n)+ zsY6k`T1$ASgT5hG^(z!lMUqngH4ob{>Q6sM@M8y4J+jr=9n-en*M`X0jYsP!#eYOS zd|~ZPPTTaFWB(b(9k&1HHvPjt$;bYVUH3U%>+%2k`e*TKt@p;K-Ux-8(_-V5&!^KJuA=E<7o(`8&0PY5x~cA2wZk!&43#|9Y78k9*g9 zwD)=V?fbW>c|e{&P*Xjq`9<4WdMB^x8?vqcO|diYQ2Z_`=!f|;zec-1^_EcmIPU{L znEIpdgr$0@3v^N+{GIqh*Zr&Az2~?rY;ox>OB6Q*wllW zPWag;HW%mFWK70b`_V|I~)!N@T*~zQ=1VQUlu5we5d57ZZsHp7! zdhP!9Im$&3KYXBfVmG-{|J6U`Lx#@~KH|U*`Y>ga6Q7)R&eCD4nPY!*@zj^$v+K|E zEDq+GzZ^dR@6&fsyvj@OL|50omFiFG3&kH|fb~DI!)JZWg`fJ+cS5T}`8)YzPvfY^ z^iJzhB(4g>>b0?JzkJ6aiE6FCw*S+od|*T6T7m;$Xh-VeIw)k`H$LCg7m8;hf%V@B zUmAzM@lcP}fu8Rj{6XVWfApO&jq_=J$cml#x&}||yZ3?pH|rX@!Si3=^qS+~xAni7 z#X+BZ#q&Rj10`yn?Z0lfvD^1YE}qmGia$!eZqa*}V2{Z7v)&uhfAwQM!XLCclK!K| zFGcKzPU4$j5+w4*G~TFBzQRfW_@pJo`HJ*drf6Uky`yWS?9<1 zCj8cajc1;r=#8PJ_1_6&eRDzlS5rk)yzn7|9rR)3o(~;xMK~*rnl<`c zJHPpNcslOsis(Bb{V}92_>dJl@rA)l9lY*oCv6pmOndOyD_7pyYn)ww*qy~;{TQOx z|1?w&YMfUT>)igUY{&npGZzomQ|b)GOVMEYG@ki#90Pyi4b>kz=o3ABAby~&U$ra_ z(^J3ld2k{1Vb3S7U-YIQzZ~`&`{EKOUmd8&jz7=H;;`dC4VB5KDBAMU_|oG)@YE!R z`Gw-;s3`T{i68%?gunHvTkKx_u!E_dxAUCeZ}ue4 zyLKLv7f>ILyi?!P!t`MVSR@M-n${(QP^Z);*_=^d{Gk@?+Z^*7Y6@ys(6uSG*u|M}eDpVmP> z{Qm3bU!2ZkfFiOYbyi{6eUA&aeC#))!#;yg{e(Mxrq?(-{?MJpVf|R~$`3C*A=d97~!kMqt zHvi3`qrJv){KxvxoyFnD>;FH(t0>m;(mT;rZhsnLXWpTBGb${v#xrk@qvCJecwahQKDGW@1z-cIdcsz(O#163ck`|~=VU;3aSVW;ixd-ZGAykqm^|L?jr z4>&6R{94h&qbOeGrB}N>|4?NOXOFqS?MrJ$g{*<<+nLzwQ2|KB~Xz zyXWt3cKE?GFEW_=CHk=4>qm9nd-h>r`?t6G#m_!_fmgd-f8LwLVb_8F*PnmbC;7TH zuT#`{@H^W2(NKHhj~(>Nlgj8>A4+!3YkHL}PI*~x7gP^lIB43m6ZZaEEgXBv!4EF_ zh1UPS?3TsB^Y%gF(mdi_UQ!o*OXINfIS9pHB1vie*Ljzo|4ctXCpAzfNZwRN&-X{k z-fZVHeH!VeW7?ODoXu#;>UCB@u%Jtv4dXYP1d@ILl2?{OM2yl7eAA+lLr}8eb{Hz zY2RGxo6T^*veVv~-kiYa(Dr}2vpB3DYsUM3CwO=iqtiT#v>y6qdg@9r6mLfs%cuNa zk>AnQk4ALQY=2n!u!F>#oa99h@_Dp+HGfI|H}zrSZ|lGN-a7k+P5u!MdtltH)M@?i z%i>t*;}2RNeY5)M8+}kY6z@pBZq2LlUa|E24S1Sa9Prae&^xiGGWs-*?UB6?8jqjF zEv&wl#S!Wwo>*bhu$M!4=a(b4xZ)~!ZU1wY`1u~iZ5HzS^MMl2{6Xf29Ex|NV`=># zAo2Af?L2&54b=}m(7Pl~{pcrtp!GlD!yi6F_%zRg>ch^zz44jXewc?{$4`87>btG` zuQ~qE$l@SxzCpbH;pg-%trI_qYJDHjH|!Rt{LDKP?@7LH&Fd6(e(>OLeXq6qVFxX* z@}x3;pw*}TrYD}dz=hO@)5l+Yz&88L3}<$I^(V)l-g^GzUOJyff`jK?eK_|JoF}iM zc$JsTlfEIR^Ft2BUn5Cr{ntEo{U0~$M@@3t`=EZZditPsi1$3}zxK`Q&y}w!$h<=# zkHW0#Kk?iLV|{|pwe#TzKL`(qz7yhasQF5AQb!eU82KM7` zHJhhj`^^mp{wMy{C*^}DMa%0n z?|^nJ`R8hPg4jzuYOm50SB0T*+S5C%^yRZ8%iR8HU3UJrTP)U($$I~LQa|w^JYc<{ zJjCTHJ3;CT#XqCM@~Pc}Go!h6{4E#R)(802uYP*FKY6GNG`;d6r-;9y)BNN`20Q4( zxT%}2eD~G|hIQVX`TiO}yIa zUno9^ftCGt!b@MsXQ+0s$a4?y2dxgxkG>P8apW<5lD`vQxN-a5%cf522{SL-YW0n0 zy$p~2{_V};u;ZUo_4&6o)q^?@(YBVxr_YT+rr#zvBqkI^d{No|IpW8>EMD__Cw4pj zd|A7HRh;R2=I?KICw9wQB`=^pY&q+XQ;uBh(y;B1v+cip+HbtZ+4Y~zEDqb>%{ z^@<|RXZx>xvEvV}JuaO`DEgyAW&fKJZ~f5st>mwM^=;ezEspm~5xsc-A6|dB&9zVL zb@3}(hi#rd==Wb5Jdu7>K7aD_di*VyN^L!MD(ieiYP}zJ{~hzLwZ}96Annge{nR+F zqrjhfQ^XE>C;3v@;(8K0^(mI(l+S}&pUJ8xt_s7%i{2Tz)ujDHo}F>g&R3qycGR{% z@G53H?DNn>BS^f>OXKL9cZpw3WaY&Ehwi>}uGsU36T?v_o_WJ|Uu+%!-#3dRouBRh=W16JskOeRa@~o> z>Y-mv(KFA`zerS+`k%}Xf8)arI^q3~U;nARmKV7Ksj~`0ZNHi4opR`bVciQa8dV&1 zHoTSF{|@(m$*fyTuXWk=pDOdai+o_{Uo-}n`k(MHAL3HP4tkAGW%PWW&7Nm*7_Y04 z|4n^3>c%_%KBYD+9C6vn6J|bpi&s9o{&q$dhh6`Qh59q4%y`PS2ZKUi?6AX6F%})VHkAfCTmaq?2BQ@)NNn- z;Zwcz!>+&Tm5OZtljqOWRIez~zu5kaRDY+~P}I7qFZ3@S4W<68UwS=1^`;0P=$+(C zW%RuNC3{kzOt}-inaBAbUgp=BqH>GJie&x|0?2VeQJno>%Zp54tkfwsmy%Y9-4nH{@6{=Jc$QW zJu-+NsQR$oVjEv{<81>%W9HL8-s|cfuXa2B)|bU$=N~tV$DdcBUQoQ|_B+j|a_RXq zn#cU9r~ea@uUi;>g`d^aP(S>Yhkn%)8GV`;eJ4!gO7&5%Vkf?E@MTAgANXVzj-I~d zQkVU_b^XuT-l2cVsHp7!)OLTi+lD${_(1QHIQ63*{J<(c{NXc%PxExrlP84w zlfOOlo(Z8VEPlaTpKJa9?7mqXwq0$iypo>((!Q~Nh_$u!PF_SHu}6{b#?8`GV91qVI(G8#=9{qW_)v!g9Opao3XHygn?y-{=4F?Kjr* z%4grd4dqv~|Nl+H<-&9nW{?@jF1ucmlXXXsxlDoXv=`K8A*y_5J9;RCU!GJ5JU zdrf&Yuj!rY|3etg*=d83TYU1`aKVPhKY#YFt?U2y(mc}>9IfZSa@B)QgO)exFZL=-<7__4W3tv!#e+Vq^4X^^ zy!yl)g1dLK_a1ntb^XZ!SseB`?@@VJJpK5deLpdOCw7)rr|K8@_-Fqzk)qUpc=Bkj zKL4VA%n$U5-s-F9Kkr{uS?{w~F8P%(?u?foyngaCyzln=7kI%~?)5M+L#f7z(0 z>Oc3@;ct9NU+}B{M)SD#JZYTq&20BW4_ch^d64|bUR)fP`?J0nx^_Ff zrM{705kJtjuZg|Hqxq`z@Ks^B@RPTE?61?72><)j*{>b+^RvA2+40ZmG9P;m=%?fP z-!t$7#jCYMPhIp4Ih`MJ=$9KuEBil1axfp_Z2Oq>%luptr?SN%SV#R{_Au6sGm4P7sKuQ+58ecb%FSS_P!?XC;ndf4Ab`odk1~jY6sVkdVj|B zt~H+9<=j6Hy#ao^{;fBQ!+r;I{cWy#MUno+_Fwx3e>y+p&@T_zuI&GC$z*-veFR;* z-Tdf71>y(V`qpgcv-&lU2jN8qJLtOx4;=sc4Zl9UYe?C58acO-qn zk9x$byu{A$CgiI5h5i*KU$=01r^q}M>&P{=gX&Ko`5s0Ftq$cu-wD$=ulY=moYYap z8-97(b=UoRP9C1xdgPEj-fZpv@14bA+x_V(uciM#m8%Rok<+DDJ=ZN;ow<0Xa_E;E z1xo8bJd-7k`PlmipZZ}3eWJI#75&Gb?V9nZzuA@7_>jR4{$b5IwcSrHb`4_=+h^T( zZr>gryZ)^wi^KMR>U#bQUPbY0E#dJ_bawuoIMt`~3;ly)K&k)4%@jWCM@?jSyi-~I z>Su_IKWOzSujw1}_cy!xTRi#DBZD3EVW&?#@tb$2PYS#3aPVV;54jdzyZ=S&vF(nu z|0J7lP3mRoHE$|&-U@!>(|G0=`sD$KrT!~#dOf-EG$o@?{OWQ$afY5e$R(VU#erXn zR=*QJ4-QD=1=NS(SDiC$)P6p!a{oUbKWoqR=&Swy%9U3!pBbP3FvI5E5IalncqK@^ zcKxl|ZM#9;ilJX_Fe>$5?fj1$eKDTAJs)^N)G1_56VYbw0fbHdH2`qG-!Y=1bp@tL7K_SBfO1{yX8WYd+><>p{2J zed1UDrRebot&Ws$`uzP%ahk`2%46~Z>cfz?c0P2U<6jLcO?azs@QyR7%bvg2m&L(x z7ti&dx#|@~`WO43{QnVkHZ(qwL%%$zzOw)6`QMuIfXYX|K<~uQWb#(|Cw#=yACr}L zVfEpJ?SH!4et)|voV4gtzxeVMt@B^|OTG5HvZ?a&asTI8TkjgGXFg73mR@E4x6bNJ z=a)-F=$8|frT)XOan?t8z{HR9{`iAdhx()MglQb}Gri6ieJ8$f=u^vH@g+YxeBp*; z)_eG-HQ=%R|7I44y$>I?|2n^p{n!45{$Y}@Tl2bhtZyhX^P=7qv4dWDQW-tlL$hn$ z%-i(JV{soAsYwJwgIvnuXA71>zkkRL<&wYF?G>?GTw<@%6IX>{qZ0=1vdiPw zWjeg_+VjtAVrcdM;t$6qmQUxc;RdOlzL{S0ctz?9{j!0#{^LjgM`v;1Z*@^;!e@RO zZ}}4$KhP@={H7-kJGhYg@VTc~ynLrw{|w#ht@?|Xwrc(V(E(W;te=J}=<}CywJVDB zFTQ_y{#aUPQzDsP=wCG|O8s}@haUf`e&Sc(rJcVjF4ZHaSjtCT>AX~*#;0b^$?)6x_a3R=e*ga0uYZ%m>7N&|bX^zou>F8;tuyHZ-#hq&%o9Z4 z3Gp|qsE@ik@%{gI{gr%BD*IoPJ~J=AHx22B`tiF1f6)4;?-%+`n8wj()01DZ6JHp* z+PK}9@W|YIQ3WLdx1Jb z|7y{p)PMYwep%o17-pXnj(h%p|NPH&Xa04I6_&g&tas&?mkYmo&O4chZGV~~v;EIM z(EIP`yP|l_?RSk=8s`Mr54ZP8{ZcvfuO0(R{U4xw-GaPd-l;xC^{ekBcKkuhtGwvN z`y*U`PG0ImR)n_-!@A3@dB--Z-51t<>H7a&>)su`#@YJco5f-0U-c@`1cWMXWMFu!29<-8q5Dle&*z9HN8i~5u|^pA`rrT#nN<~}~_ z2laG|-K!sVFx6Wfnjb&VwrlV!uf-vQ%0nK*4*Ia^m5Z)3=o_=b#9NjLXWg?j{C51m znZ;q-pSb&f@kQ33ZnZl_@`0+acO~|k%4%ocp?_2~7{A)RV!aW~c7E4BNAPK0_(0-K zRzK#89<=^zUegn&dU)Y68GD64eBr)VPT2P79i9qLfAWk`L*G6D9{c^($l|c|bydCp z9bQH8YAxaMPIUCm^ttH06SR8z*Nlo%|20qP|7U9wZ|_6G2d_``R)_N92O3{uw>ag4 z_oM2`6V{nJe#z~>zH->`%)MXU;F(@{?E5#*;;{8$`S|?puUlRx_H=3Xq;K%2^Ft2( z@&mWB|MK=!um4&P^8=~JWUY&O(1X;&dS`m#G!MM+n2f!Hf7i;_{C3$3?mMw-=v!;- zxX-P>A>Piv^ks2SPyToP|EX@(E5AssrPZT${{NAAfa&}~zdYc%s{iOM-f116=EINu z2>ijcKGQ1?a*969Qzducfj4aY{KG$dSI3UpJ#EH-Brl{ zral}u?(oI~!}`L>gATcW_=?YalHbS{=%Zz7wW#p1jIqvg$kWg&`;YbN3-To)dcm47b&-U*iCRFB>G7Eljg*K#-AeD{4L4(nQe zlZ#J&_np@EhwXp2>b$6rzFD2Q@;X843H|cQ@~Pb`avcl)d~R$zS=CRC zx449tc(YSKh<_?~(8E`SVZChf%w<2gDoi-*yc=g<)w=(J{x`BXm}efpeqTEa*)XX~ zqULTVoV=oM$o4)YaiM?R7*OiJ`td!^eAvDisvmZcye5;k%D*Ocvz;x)IkkiEA%h+C zVM_PM7Wu+1i-n#3I^dVf4Z7BA9NQoA@_QKB`qRwf@J^9@pz7`U=jdzd-w=d1^ozmx zHJSh}J^9R@XK}E8nck`XKZIee3s0K8)`3rlwYON~2kr~QY@U10 zKTm$dg8l!6PwJ~{pG`lYouB!Iez{PZex&~OZ?sVd$|L5gU6mf z)|C4Be##$-=YN~XhSZ@bQFHsG{pCC$GTT*?bBPH3(;?2P`YT?tD{BiHGx7LZb{==ha{U@JdRx!NB z(Ko9y|j{6Z#a@k9pt^S{)jf>XB1S;QRMo6;z8zv9Qx$~H{)l1^mCHr!`~-%Wb9z7MKJ@uSAH9b8&Jv}{h>kk{u`0&By9{)xd|Lx;fyJfyR z7|-s%G+Fh*731$8C)m8TPGNQ4Z_xqm|970~X&!O(LNPuPO8wV7?qhHLsHq;{;J7H>?gbvn?8c6-^Nv4cp&w#U6`Nx zbsTg}Z!Z1es0o*hJn5iE!oZd{ZSkvH4o1hme@y2mKTovdO88l_;0Ng&cGdhsv0+py z^zA1VV`KN@1E@4v)BulcEM^{9@;bJay(^No)U_9-75?BNfa95rFZw-#MAgf*5u z^`Wyi@|wr*zc}x}xcT^a{4>$EH`UK77Sq$dp;O)ePe-Lv|8*SRN32gy24)=eiG##b z8$aV&JkR37^J;#l{uHbFobnPotoXfgN1nRxvtgwZci#O=-=F9;ZpWX4SzPA!_rLVP z^yIK6!{&9(^T_-{AqP10!}ueFYT`$SanS?46Q0@hOA$Y4+Xp<&M;|@IlsAX` z{>2YF;f9;GyS{(%+g`ZziyQnAJ+WQzuM>-R`|)Q!u}CQEmCZD;A{OO zUR-!cw)Rfw+3#OwW^wUgL+x5Oid9}SKl+B9&JQ~j6QW3||4y{MBJ;85G4GDUORv|m zJo=@GzZY8lnH9*h z&%ZOXxY+w|t>XTNI=-+wYw9h1{!Qk`{wNfi%6J2!;axZKJ%aTSou=}%4&tD95>IXF zvKbXbEJ_k@&P5p{u{hRYYa$2o(-U6LI)loYX za-n*u|H>=f|HNrNnDFX<>5iBBsC@Ge3LiVg%04rGimLA`{2^TY($2sB<ZB+UCD{vUB{ouT;W(4VQ}lzx)8t2T0xC z@ko$)mhS)A(6~Oo%s&*H$3Rv8>+LeDnT4VYTqXp>fHZiAJuClI3nKv=bKiCjw`J48XkSau9{yc7=rq-$Tr1SMo;`p(xo`ED@6Pvi@seUQH!cSfmhSgsB%*n@YxPMsfzVG@| zzOe+lcK&U;)RDf9)BXsnv-VCNQ-59iX8vS;JRhNu8~msG7%250-VDjJ zKGjqQbZXBOtba!kK4^8QuK6d;o^NsGTfP&1$(}=h|MF|Uy4OlqKiR+hX6xO$z(Mb# zQ+fWgq2uMM3#yfwxZ?DevhZX1F@_Tyx{#xsi>N?LyU2bdJ zIsVLrk=7N8tz)37|E22`YNEsbi+;i5f6UTxjHi7gjvqTk;)YJgujqe?m+(XLOMm(G z60aT}4w$+AONSqDCVV^oZ>YXv&;Hkh&)W3Hs7_jEQ~b;`6x&2cssB!R_~F}jpKBcb z0kJuc4j-g0F!f`n2;a~p<6f91N5?TAk0`|lQd{b4hU3)`<)t?fPMk)U^i_W7538X8aRP)MNEe{>d?JnKhI zb-;vIe^le@w>-vGH2*Z=W2Z=eyy}4|54(zwzkm4Xqketelr#H>|81e~eD=gg@pJwe zofO$0fl_s6f0NoAx1evYpJe`_*gmF~`cJ=-{_uPoCcOH&Qc0Kd-LHgqHr>MO8h24D1)4HhF{EgZ3EuQ3g#>I~fR`}5ipB{Jj_dmF&7B;@)rr&;N z$I)K%Z2ymrqFw)!XK`V>6RXw#8jm#Y1bN@|YO|e@Z}k*A#Dr4+nMWSO_C7-YN~k=3 z_k=!}^6~e=G|xM!$Nboez4U@Vd#TOOKkNM9ub#fhKhIy#Yn~l{&dlP%`<|Z=@Bc8F zz8TUVMak;C9_;*&Q$2m2tnP;RL$PCgQ0hNc^Pk5w+yB9v>-XO&qJw^lR=3mfz#e_}@Id;%+Ggv#RvX+mta|6@gD%-` z2Ku)DpPt3VD^id3XF%gMMJLGGt6i6$wOD_wZw+Ch8;YHy!}K-Z>gsjaG|LW%V%+tEyrRW)lda$doil_dt$(5%c zbjDRrhx#KMe6uliI{Nnhn`d!h`>@}i4ya#Itj_+P?G?1+AE$ZLm5a?hL$PagRBr!z z9T$9_H$&AW4pK)I!UL=HEZ<4p9%Me)U=M#deAIo#bI0_B&t3Mz%h&t#*68uRWp&iD zxa8-?-(TdZpSpzAS>xxvG1!)$)EA1~Vscgg$t(3q>+?>=hR5>`AGA7jeEhvI&9m{6 zdEoD**T4AgPri5j4x9E5+3E2&cKP$s!lw2pw*{5^OL9Jpo@;##C!1j$3AlT3G<({aQ{l<4!HI4 zCp+hV`2Fc%7MIfXSGoEX#p)dYq_&-Zpq_^2Cw3@g$5+{Z&9i>gR0nio69;|bw|?pP z@Id(R%&&Zlt1i4b^`jR~z4oEYZ=Bi}CND7Y)40?9_Ihe(zwEd&6D6I0@YrORNH<=&LM<|+6#QIPDAbP_ht@C+W z+m-S!s(Jx*9nM*#;U&^b$q;b_@9C)DlX9yoVMfhIz44Kbd`olWUp0elD z@2(ow`}um`xqRbm(62iF@k3_+{+m@S)K}L&qi_8x^?%=}RO-Kz0odQr7uznH8c%rt z*FS%SB|r1YMXrBlFf4h^Ml-&&%pm=-{r{lO3v8&JC)lU9qF9~xWB!N2JLRE6p4y?< zFA^&IKV9;z|25SE(NkXiYu$XygQsZz=Ir?vPx8DIK7BKL4*lV))4#Fr&g&0^t8dux zwFmCmm3)4G&GrW7-2c3$<0^{P`MhPe)?3&6Hd~#!<|{J4P{@t%tNPD*W%#A-pYim2 z;>z>SPR9o|kNdi!pQ6=I9!Nem*ux)Iyz0#d7W{B(Sm~1=?ElazZ=uim2m2kK>g4h8 zXZu&}^RW6eU3i|%);c);EX}7WO#GoZAUdl0&wa<>+kPw;9qNZyKc-vX^5~1A`D?T1 zTRh40PWU!1HV7XZRDW3EwGl_WvziMl9l7&O2S3*7zift?^|_hF#Se|=-!@L?Ez#M0 z{&fHOIKy;B}YU)2u9L6KnnPkDneT>bs8^6QLye>a~#c;%IF z^6dGgyrk}uj_Q+Fg<-o}F8W}R1x^gxzI^EtV}`AZj_rTsa=@%~{drC5EbVW!E{V?a ze6Ff|mta$D>I=oe(NXHZ6JA|@9?n*>|DOu0v*vkWe)Y{%$0_ED)U9?X z4vB%P{+GT7ZKzID(J4+BtY0d~d_nKTZ}rJxSH`&TQiN}q=$P&t`okIz9k=(A=RFo` zr|x$DZ+>|ex_15B%q%YUJ-`Zj{Lk|N(nrPx)o=elmG~JC48@@`9=a*N8N;3JpK()E zUR^F{F+TldJTUcRr|6xGPhO?1yfiYx)oJ|%4XYGYv2z1~O9w*N_4T9Q9j(Lioy1KCTammf z44=Jb$Rl5xHW>E$+;y*SbJ3IN@cEbHe-NE?yCYAk>8$b7H?MV7%`X&3#N^8UYrfT? z{jvAoByPOK@6|TUv$&*ubj|F;jaSv#9XubA4 zXk9Xk&f4@wiVxdR`-g5Qj*P@o|6O!vkp}?7>Ef~+ds!q){k8D?R~(BO&s)z z-|FkD;A=g`)BKYCANoW1ucJ^L9aBsF*FM#^6=mxyeF9x~+<2N#Jt@KisRy3<(a~`{IG29(!p1M2_4@Yr zUlQ_-zx9{v7JrHH?EGh*#l^O-CFKSX-l<( zg-89?C(UO(;$Z5xI&^$^pw*}2nxA^rkFM#>p&z|)bXqMbE<@5D#n%%6+jJ3*_b_*`_9`mgimd4zBK{{hiKpE&59#LcFU ziugg(ZB*o`o(Ivx1}prG6UIOM$nb~H9UV4mO#1M-%V)4XS^v>d%%6*#ArGC?CN&)B-2fdScYTGz@64$=d56esYi4E#F)M1$T(F<#T zZ?E~@{@7Vzy_XsrulGvl_)|{9+j%$oN1nYdbX=*vyk>m9e<2^Ff7qcoE{a%Rn3rPR z%NX#<=luzPiryu8YEvKcA8`Cx^gyebT9jd}m9Kb(G>R5#1#U%3p#eow@9 zVAntAl0n^RzIPf=`eyz{cO3mt93KPLf2Tafy5=z->r+#CIv#P*Ykq25|0hen#dEQ} z6Ewf-<2UT(PvYo>EkCyBaUcKd(_yO%ZXMpdQ z=MCHHDNcw9rT*(Ub-6us=lDO5bo4xyq(6udGv=Z{a5Z< z`qU|5%gc)u-aBt=uX>zsu+LqCI$pot{~=dfQLOTs`stf@!m<8ScSHQ4I59q`?7#9$ z{Xj>t4*o=R;e*r#nt$@_`4(p!Cp-_%qd)9Ac=$2b?)FI7?ZEfm`{wb7vmdbSe?Z3p zhtB@<7xSPV$(m(eYJSfwp}#y7eqi z^*pFLX3wENoc!rMKC|Xy?}zgyj``dw4{Slc9slH6T&$j<@%rZjtqvVmSmiZ5`~M5- z%!SMTn|X)g$MqUfEX=5ZY(eCvCy zn@1c($86Q3KKvkl=2`Mn7l#u(B z!4+Qf?EJ$d;o1CF*Z;p(9(4<=y!OhYZ`kSl=u0TR5JgJ;*Ljq#zhZs~!mIx(%4Lkp zIMfZAf4cCoQ$(kPjPE4h;ywIfi(gE<_uiq6@R{b=wXRrs67_K3DC>Jub?5c>M{6f_ z)zr=fnP(_YjgHFx59#LrkL`c7F18cuqd(qB9fm#nY&`Om-@nZ17k*>Wmmcq5cCoAX z{n}xxp>Nke(TVg1NuOw-ePh=bk<&uryU z4}Q>|*F+CKdWPufICJU`TORn$)oN=V8+IMK_}#y_a6$6y{@cwgF1!x%4|?4HttNcd z-pM1*+gtt0OYKlhiV3CuqqC&wS)bJJ6XZJe`d@`lyfQCW-V{N2DO$Zw`Mt2m^AgrN z=h~IueRJC|e#t}rcHg3j9lveS0+CEco;SA zn4`!3ej)U%|A}6{kN*Ecp2fxHpNucj&ht@s%7>S$t$m`-P@EnErT#nNa~>N$&p)qE z`a>M_nxESE+0HDkbrCl|^{XD^8R7>k_~?c87kzBv{NMdb*x+vu4jo+JLUio?cQA{K zeGd3^JpI3}&0FgfR;SOXnt z>F+B%{sfsX_kG0%nJ0+97s5AWT;_qT*h??Cq3&OcpK(_hJ^bC{4!`&YbnX1-q%1D> zI;gn*!8_rxMh8FF;b52EH*(1g#aZz|W&fMPV_x<=s^7*{yDqo$v-8!KN52&Di~9fK z{r}y*tGssg2UEkSH|KwG)E7GU|7ZU*C>7cMzoB;Cjbe57|7Pp_=6wHuc1*48KYBxT z{91wg?@Dd7{Z!Zgo29ba|KR)3Wa{xE)#wWglC!HQwS!xuPs>%q?N-w#sV zW`c|B|1UJu4>EoQmVWZSQ+6Ys$fo87SfEf+4gf7s%ZsXN@i#k$mCpMR%kaiI=BTF*Zx`ob!&O;7KeiU`#6fh-Rvz`>2U9)r;hXKUxK#PktHQ9$=o1#5{G(IC#NST% z_Q~U3Mc4NK4b@l7FK_?+<*5a2`y9}CO)y=1C!Wgxzsn^~eWAD@I!xb5+$-`sGWa&{ zKIJJA2gx_P7oWI}Tgp>Ex>m;=`q2xA{9)!o&(D8JIOfr1o;c-G(ktWSA~^VdSuamtb0CscKv^z#f5q1a`{7({kEY~p3XqDvg~u6rD8!+|7@US8pM;`bnYY_Nwv?D)||Pq_Wk2g43` z{`}1G3!aIdZU4UZmOu75i)i;I2k9aMYE*{iw)2ZMyE_{%>K>WQBz9IE74{XI=dSRzKUb&@p z0e{&)5x}wfdd?f}-+rLwOE(qU_%W4{T zin(BYLqYhUcj8Zxcoi}qLv-e~|5r`=)^5Y5EEujE`Qf48zb!}C_Wx6~PH?4L;tjBD zyK89NDOUBL>)+s!N8M_N;^HV&)&F`P!=2BgHL>k^aNTkG=o3FS^JP5aQgb zKk<`Sg<l=Bh6AHOu1KXpAuROljqAwPQ zrwFh9>ntu_{g%hLisqjteC!mN4|;~^={R%h4->w={UW=6_Og(Fbe(MvxVN+Y@1DiQ z`qU6Rm#@EOq<)FEjyG#O67){cu76P5=ApXGGZdG_gi`;}87+G7z4}um4iZmo{ONHR z^RPNA#;rf<>*2i3It3!Fj!BQWUhaVnjda7@J@|=zf&ZQr{Flw#8esAUF zp9!O$-fOk-U-_F?J^TJK&*EbBEUmwPWIRP-b=J%aq;J@!mrIV-Q(O@p)_=xv!sBx^ z{3;#dpx6A=#_yY1Tr8f8?VX_clX`4E6@K!nFx1AsR=jV2o;>ws{{nt2o{Er5FdmYtcn$=GnB;RcMrHCK2{j2Jl zpS+~5biRz&%OAeD%$s*zI&o$=`$umNe)O~NkZ0%rXJ&D+`~T$e_m8Jo|8k8}SBg&J zrTd>ZG%puqexdkkbX4{~-T$N06dmS^9_XFKQyV_>u{gTSNAr!34W_(a{#q87u+drT z9`>=dt_b;UR}Q}TOy~MTet$P8JUjkrsIB>If8y=De$00AsMqli^A3gFu(PWFr9S1N zkKg*E{`&e7M;|m^-|XY?QcUx_lYH}IE2jCW9o!|qyYXvVUK2*|yTj2tW@~%Rv+J)0 zvbb13hQ$M*NA305(75J#Me(+t54AZDV|C`j;Q6DjP+S!QHlF5_$NxCMw>p}tSJ6+e zI+mw4e$e8178mr*uln!}=g}WNy~nFx+B%#PCj4&io2Or~Fnad=%Rnr{jw?Oiztq&P z`@f%X7^6`^b zg<<^}rVxp@BVXse^5|7pDP_($udzEE6~wHNEZ#ywbT1zQVz z+wbJjZLc?8SKN06KA6^rzZa%?-s!mW+W*m~E%eqqgY_`>>dl_o{=7xex9@+as6IGK zzkeT4Kj_rf{-ER4&v{7mHsr$~i@R;|}x8u)2)d$zyBYpt0{tQT*HTi~G2iF0n z{&aq!m=dK-U-QYUZydwb+rRc9)%B`_A3YF1$m=JKW20xN&tLejTgkW2Uz4)9*!732>iM6V`W40MtZiJai@uqke2udo3B}iCyaCmB2~H8a z)TdNWd2{{$M|rnC`7m9C0N7eT4T{oZS%~0d;_}we|pw+AW!QTth zJjOA9GJY?;u=M(m*B^NJ?_t=u=J02>Ulu)k|DBP=#Vexco9YLh+KOV$a{Q^+jg;za z3KQK>To)as{yX7uA1(N{9W*8Gogi^A^-)9QXKL7T|_g`oGN7WHlr{Aef-@KDZIzRdpif=|jssGF)mwfot zTSDdipa1{mwKtpa^7pRXE3A9|0;|3_Z5Z>g^ZzqszNPJdK>c7%?LI}5HYBXN@G%d){HGr=9{U^i`=HfP z%i=;G#pWBP_|*pxW)3&l;*QQ80ht@|%Eq%M1YU4{K0 z`umsKe(Lh0RzIPC=)Z3G>^tB8p;tXS{vXtF!ARb%)|FTPh1l>xc!oOu|N8%bt@6`fU$gy16T+&O>@)rBFpj?1 z@lP)GGtc}Iz5i-c{fc6>_BT2Yvz0j*mr7e~S>s2qM4D$@5q?Xf82litmBn*Yu)2py-8j3>ThlF7mpZq z&0S&KMSESk)=mqfW5+*JbbN4Yz5b_Sd|`LiUU~El+xn*SW8R_oZcMi4SMx1zW(;@E zKhqx&zVhlKXMDzCo}l?Bb?f5?;TtkupX7OLu!p~Y*zF4~GxY4$`j_2r*GYapyMDv^ z-^}7-ukT^{|I4}h6~+2D=YLjHKK0PAhWMFZD5gb6W&fvl^Q{l)I*D_emY)iSMd^m$e-AM+-gs~88#d8xApFL?GE)u)Zw?7y`{dWD&U=|m<{$%s``@4nH`h?Y4qXVY*&9-?` zUqf<3ahr@cAhvgk%Huf7@@pCgl}8*z$86ZJ^2&; z@AZ%U%0g>SdFmAM?E23^)dL%9yKWS#v-VCNQ!jnPPUnXmirZsiY5PY{(fX7}diCeu z|A8zn){lzoKZP}$^AB2Q>H6nfb(x3Sp}0E+ zY&@qtMSe%ge5~(zH;*{zH9xhjZ%xU!crLcZU4{K0`ok9Ax#PZ1Tsk%M5B=WcZ=XJy z@$CH1OdS{G`m2WeLHg*#&)W2sS3bNnFPC^Iz8@b{^&h|W37w|oGakJ9ZsLazdYABP zVzY*qqUke_R0mrz%}?#H+7}mF`@AF03ah{J{z2oHI~9G~|4hl^;=z&a+ixtiG4nIj z`n`*ETbbV4;=;E55A$e9JQP2O4@}o3c>`iIkJ9#!p7J=3h7VdD%1`~+DW-Wotq)r< z%}?#H<~@&prgh*MYD16t#Ffw9g`S;%o07%F`mf)=TK^gvXWkW9y8lBh$y1$B{7}Xl z(75a7iPhTAHPx?qX7`DodGb7BgJ~b}_rf&KJFP>pOMx=?C)!Q$Kc!@C_M{`mhz#{L~Jke!0qNE8X@;7=7^Y`G$ReU9Wle{+nlUVV+g@ zpGoVHtj_tk%I&`)OzH~7Ju;s5**l4&pWaWvp2wQdQRDIb3?EY;&=9P=l3FTJqAKlYrjZ>PJ%`X{|H_V%R*(c$wKeFxFA z{tW2&JRdr~cxT!E^?O_N4V%J5Hx&0phw1l8+&jVad9?STro_=n(Yx+AwxQ00Jkw3{ zsHX~*&-0636^2!}`|yby_h@Fi@Orc3&s+j*cX_=1@GIxQ1IY&&-#hVe9VWIR^H5$W zrf2QN`tPD?-%Bta^YO$%^lFmFeRtu5r990iUWJy&IGShv#IDl8AJ(1z@nP>bwg}_T zynN#OEAEVr{r+Mgi;MN6;`}%BJWyDjHGb+Zo&V2;OPu{%C?1Fb8_y|Ek=K#+ zBiA^nJmR2F{5Eb?|7%%XjHme}`#+{P`qqyu@#HXx;EY(`%@{=35>%s5%}S z?BNfWe*E%FH-34GaQPu)k38hi&i&61$l_w-sZPG0p8v0DTv4pf@lI;jo${$CSGys| zyhHJm=&03KqDM_ z>NOu;Jgjs7w>?#VFhS10X}$DGSe^IVw4OQt{o_xg6uRDtGRW`vtWUYdK_@nG&?kPY zPx}cEwC%qtPyOhc-dy^_*clsqar;Hahm|HBaqU9GUqHw9KU1=}*!iD^+FCb!@pjhu zy%QgOGk-3A?*y55C?1NAQvY?l`a0cw>r*a%^w9(HS0Oymj{lSA$MT(y2htC0u!leF zFsA?UFYhun>~QRu7ykV6ndsa3w?Q2T?DhXUHQ}>1J?$HIIzQ}C{4Azc^`Gs)`jpE^ z_B=SXrwIO^|NZHh5sO^(_HGA{6g{bsAT<5`c&Um2C_b)qliC6 z8&~TwTlGu)hKvJ`IJQRzRGrjMUKRE)Ir!%duKV3anJ(zt`RBG?EXH(=E~z2+0gX1=`6Ew1&Xy2e+%l1}1BuL}E@8~MPt zGmp3=(*=FI|IeTV>~jEhCF7*|W-E`rd9~T!r}GQNBQe$b55F(E?R-Y*_h2^{a*b2#plf??v$q})-3&BQRCFvNbFqf zP&^hNl(v5-@$~;Rtxvhcy%VHgVCrX{AUx1?wO;d6zm9`0I%X5E;G<{1e_Z82zszGF zxASUt{rNx^7wb3oU#h8J#}TWu_UflDyZ>QY4|XoOq4;HdP}P6aTSEA@A8V@Z1eqV7 z`>;WDK>WQBzM+m+vXeThbi?wmJ-)~-6All{|7ndEzwp4x=-Bo@Q|4p)+s*a&U-Tc; zJ_@_D_D&x2J5X%v8~twxFBCK4gHr#Mm!8kJ{Z38t(NE`_`mvcWI^_LNZ~u%NHu9V8 zdDts21i$C;2mSQ!yYbuiF9SL+aFibZGheVzc^{M`HR0=vUC00eyX2$ zV}5YF{qURCp8<)p_Krt_*!KG)=3UcxLy&of;)&=meWyG{uA_#}^Jb_#;-F9b)-SCG z9tgkmI;DQqGhT%sy($blKfcU**;gJ310Q|uzuvubfAnnsgRWxE{bwr1&yvOThDgBX z?bL1vQeP;36&;oRPk;ZCs}87jq6c~>JhPK|;Ro3tS^qV^RDTaYdQ}*j+YbM)!R>m2W-HuphA-&40D^@ZZem|EF?AH$vNf2gl1 ze(wa)15-b?VXZsQ>g=oVJMpJHKc{{jFO1vvd!P8$(Su>!JC`1O#tEJK|FHjQ>Nw!y z@%~H8+wl;7r#!9KC$@e6g1@2sT#!D6;@8np>OZ=ZyZLMvCBE|NM|ATUC+(a0-R$`m zrw&m0riTrtyu=^IzVrF>@1A*9SozKU{{D|k=l2?~{QOmM{xcmv(*-}*|1%GeIDHJo zZ(?StpUk5s!j`YL?Ju0fvFj^!^DPfOMf|Ls7?Qd|AK^!k^&`Jg8!^exc8bmNVc z`oim8^D6t_$l}8M40U{ocGlEu$3N7W3y0Scb%x@p7%27MDPPBjkA4Z2$9X6CV9Lke z3)4JLUCawxv6o(0V~6vesD1z0u-1z|oBpv0YoP!C@BU+Qz*y=(^PAiL=UH6RJ}^%( zjaS+54YkiDTm4l!_(T4uD?akz`y)dAjZ^OVV8t3bcK&l_EI+f)Uu(zzA3Tu#g(3aX zaV4wsePrqUPp-P;sU3>n#z0m7=lT2(<5VF10MpmC`N?-$KUm=>uL}D|9JNTX*@3V1 zFF$3|6VKXhE3bN7H*a;+WIQ|npNq|Lw^*Hio9*OLUGsTQuF z{s&|nue@}=so(0*@k@NIhxw6jc{JOXzW4;5gZ=4hIF}vKp*!la@7u)|gvbZo$ zyZ)K^DhjLB|C~pH-U;$PX!R%aWB#G|T}&wTzfX0R6ogN`hOV1e|BKjO{g$VC)MNfB z!pBY#K6-}eImw?>f7oi9SDqcZ)T?3Z3FEInmzw$&#VW6&Enco~aJ{ z=z-{%oz#yXwCA(YJwE#A8KURrjvpRB{r(N#-g-uO^7`eLJL!VX?_c&+9mZFE@wWCe zHH{~J;vn_bT{oZo8OXdt@mwTUZU3gH<658cZhiFlA2w_d9T0yngm37&$Mx8Xz4XEg z&;9wWk-xqrj4jq3{liypB;Srd^DHjRGnd<+H?R$z@X|GP)|2Neeg3dR@dp`iK$N`e zMm|R~FZ5F+4x*FV_|xsc{5;Pti;MaHo9uALUoO3I;ad(4XOH>i@q0f#4Snl>GmDG$ zxlix^gWpMC_F+rK_9@J)w4fy%4z+^uhUPW(PW@&23N zU&Ch{>1vd00P|()$lIv$*j2)%s8qKWqGU z{O=^%hlgkgXAF$6~gIWjJP`lQRV$J6GzfU6c%{#G~ugyOfe<)sv4=Ve=w(v^* z&~eZs4tm#}$M>tmQ-n{Q#LcgIW;6br`jr<>JYoIT&rX}+_}gw<=^uA>e*ez(Z+R9M z_A9yE{&^U6fx?;9*qenqi5 z$30yH@FY$*JmVEtl55Dn(u;Nlb{bK7v zAav~d^8wWdhwA+w8tPXRtF`?*^@Dw4+x_S9gBtg0hvLs!dns-I8c**d$LrNldGrhP ziJy6b@Id%!9NXfmJFosQ^|7<>+;T0MhY=)mf); z`i8#6>A#K>3OQj@>OZ^z8N>R7KSkwL9)BkMR1Z7FQr%AKDa}j$X?|*lPrdlq?XNw3 zSJ?1VFOGWtiDBqh9)Awg>)#qWuA*4~X8)fa|7(3#cP5Dt*gy%9}^D|G&QUsY{o8Zi8LXwe!!p>MPpsPpMB)Sgro|@ksO3 zw)^^>o0USFa3Yb2EVkB;VBpU6*;IOu!i!2_v>`Iw(^R1aNvW)tth4?o;!{V$Ii z+ZP`A$PXSr><*l<)ddfmwxoZcfP*he!JHX4)+{;*Tt_-zJc-V z{dZ96<2r6TZgJ{Y6sxoL>Idl?`ldtO4ap6~EAc^9|2Yn$etZ6N$@flireIxer*5AY zERS&&&EJ$f>=dnj@}2O_j}4~0#2+>t^^an?(*{FW_Rqh)>fV>U#_jtT_=aWtl zAC^1lwskjs^02V{B2P_N;lq2Vi~SFu(?RlV`)zdBBUWc^dYb5+#M1pMb|_wpgi`-C zuXO+CwExPZPS7WQY{Oa>7vtw*Q@?i=b}v8i3JhQU=wkOhHGIKv-Nj$M_t14t^vbjM z-$_|q?E9D1=JNVy?N8l;-F9b*w)`h7MGMy z-2Bw5dL^C2PhJ&<33oiYY3l}LVZv!wK0fZ|=g_nB56vtto%4$ozrUy{PwNw_vp*`G ze{kJ=uXZT@9s{NApLx%aJmzDs!>0JXQ&e94da>by#)FT3bM}18gI9&A9=r;F>-QJi zocdb-%VCXU4;$Qh!VIrG_W$g6K&xXQi%V($pNpS0+mBuU>?C5p=S%7f#b6|q`mcHD zSRJ|QfvQIw^iFtYGj0{XA$;m5zhtZKy!yk`*B5_zv)y+McT5_-`CTWx!#Jh?e;?3# zR?+isjH@Wo^4e>hzFD2={A!XLihslhrT+IxJpDeA_YK2@SASIUyc6uz|El-vQl9Fe zYx9~*e>iBjWmdlW=J&%vi)?Y@{y*;Ye_yHB=B53~_vvx|GnYKp-pM0Aw_2UK_}PCj zzfim$1Eu~u;qyN-@Ok|iYX6CYUh~aX9`)b{;e+NUPxXkSV>WU4p!(4ZtB<^W>s!Bc za;S~@-g>K^Ii7sG{}nolcKsE4idnLx_0YG}?^W08Dc*=mRsB!@AIkck$29x==%Yz7u}Qo{JxjMnAP6C`?mcq@tRk8{mF<&+RLMC zx1dzfdcUplNYFb$`~H{um1lLU9f~(&LaG0n$9rJM$;Sp0e^_Pz6;55?vJqj`gTF9ws}DNY-|m&g#rmN2`K9&$Pa7In6sxmN>nZ*I znmQ5;#Xq~pb)xANIgef059JXD$u~P07eB~$Rhg&b5Krejr~a_o(+^BtZk1=lxS#L5 zZ1c&T(Xsbm?XT6t?@yEZvt&VsI_R7E(RGr@yhHI;blCH*am&lQ`IY-0<)!y)WgOGf zdEoDbX`XjdkNL3`d+CKWvztEm-G9v=*7?=@2mkK8sb2H!{7*BBiyi;7{pacjE1nPU zgvaMrtCKu^9;h=EZ^wkn{!bG<_@(|kiSs^Dzp$Hcc}czI&u7oKc#`Lx@NHh$isV&c z7`gVz_pkc&@G$26bN}n8tzJTh<9+MHfQ~b-=l}J(W%PNUNbOL(69=+!nUBs}s_3k% z`p8C^2q-G5ne<5=u%%O-i?7$|5b;1^ZQ=gFAk`l^3ba%*GuC^2gKhC;TtA( zdF-T)D&4U2MJIm#wePJQcH8pOy_Wvt*63D#|M)pQ|C48NvF%UCBVVGk^nY39)Bl>< zxghn0;=Sl7^@2+a3GI zT=@LQUpPId9y|Zmkh*O<>zl{#UzSjvq)z4qhT>n5Sk-^_>+q?!gwtfa`Z4?xOu$)(wVk>p{m6A3(1{JNK2vP(1TBxgD4KspHxEBZf6y~T&q@BA`on7b zU2ws2Tu`yFJyj(PV`H*`g`exaVz#8w9YHj!|LyDvCaE`9LoNb?_YR- z1oOWQX`g^DU4Q74I;5&u_WxtWV|8lZ&8|s26d%S1rT#mK*VF6Ya`C7APv`BO@bKHX z75z6o>ePJ8!vaQi}#I`<9>$c6`r+JF_dm-~N)bUET`g`dG zci(!8J+xdijQZxSCkC1rlz3WEq z;|ia8QzQ-{`zy6mr~j9+GJ^6mXM&*H-C zF~2Fk|DJ;l3ahjBP9B4$@87j9<^{I;A0wsxkMj8(0iWmHP{$<>Qis{}rJ@egYsfrG zc{&cd_B_s|KYadouU_tFoEc8P`nU;i-T5>+_WQ@0j88rJ4_bTwAt%qo28C5#dndlq z?;msVGafpv{>O|`|D(6H-y^_Bk$;3f3mOEHzmQ%{{8iz)FoN7y#KDEI@FV^oy@n@|L7_8U-S4L zpT5}pNV8j?IOv_k&87}T{GjPJD)Lm%gXm#{J^cUI{{Onw|2%6gRsEmq?f-Lawes(| z*iJy}yNmSF^;#CEpP=&5@z`L>OZ@#yjQ!E4_L;P8|B^K~^uX_&?lo@jzmv1LFmJz# z-v2FEzoJ;3?cHo0zrLRG(|WKQ5^wcCJ}UL!Y5rtE>qoAB5IyD9kL>1K9y~?!yV>(C zp5%Gu&ujm~n!6shz$b6}QK(%tb+5f^o&C=~Qit_nK<(l2{r8LM>oXl+*sbS7>#e7C z=Bmeh)Nb{^*^bz_u5ER6ylY#X?V0xtLl?uDSFDc^oBn~+1>*08@C|jGlCAz;df}4I z$F{CNc`00b$}7{}y1DcJBYgigDD{`F|E#GW)IN&0v!*V)|D^Kl`Ah6p{|DOHRsF9o zE!xb>`d$-1;}HkFOY#_35gusuH7fE{&x7b;gFXEH%O2NwrvJxTJNq4ek2?p>M~Z=qlz*x2`^KodUJ_=2_Q5 zK2GoC5x-li?9s zr__Jf_O`bFp@Q(!?Ern^pm#|geNlu5nr=E@@>CC9#xA48 z&Kc$VUp2G1upRgz@&2oC+4feGc)CvOsWT7i(LCnKyj%UB*4C=*|0XgL{Im|nB@TL* z5B~pn|8;&g9`knXDqD5B_PT}h=i>-P zqf-Bs$Lq-YQIkBY$B9iG^ogIkKzLx49(?o+nTL+k%g;EWHs4AYZguv(VVy6|%pTsM zv;W^Ci;L~28*0z(^*>49tbY3DM3;J6{h!(PRQ5kT|L@xD@^*+G=v|vN%jX9Ar3er7 zuFY=gHY3>QVXNPX--GC3gFXD=oI4+RdDsTEaM7QCKk?d`3!-P&|IEzdV%tCef0wHt zWE@3_cKX6`OR4^*FqwC&|ATE$ssB!Rev%-3+fHj5cZ!n*>xT=%2dxgR*Ziu3ono5j zo#>k%8&rLd4XQu9GS*MM=_kX&yFWaqw&$fY;o0}^Gqbo@|5w%TUz+L%8At1vXseIv zue|=5yj(K+`#usMRQ7*{@aRja|L7^N^7j|;6isiSTOU8jd<>oNOZFW4!{mn_`u*8! z*1}~cpY-m~tn>Xl`~QaOE9UpKkN!6vNcT&^YW2IuBhBj*EPeiU8gB|RzgGWaMpgey z_y0sELG(bc@zll-mg0F97vgh^=v86(!k(Da zfAc}FHqX06J8S&#>J5pdb)^4m-s-<=GfVxC&r0jKpx5zF%|f$-U$ z%&&ZltFGx`gFX0R(v#nNWZ2O!g>$Yx3 z`u(r#&S!qD{zt`9|CP`8Xz=Nep^i@+^qOyWQZIhcj{h54r{>RVpBdj!$G7_*KJ&~? zqxOEJbN~5t|D)sQT>nU^UAzi{mf-N(X`XlO(K_3icWtXvM=!mw(tFQ;^SlFA2&=4f>O(L8=||qRvsf#k%Nm{Z)$SSY5TOYhnAqiOqGJ=;QZI zVWQjWfAm!Le}!&-X*=laj-%^cx8KIE;^)GrKJV04zS;kwKV1Lrh#^m{bVa!NQ{$Q+ zZhi%I+3$~L>bR2?8;VX*>k+HGHof%v6Y`sqL!VmxA830@{nv3y$Nx_9eTtLB&+kak z2dN7*e@*hRQ}j;aJr zp7o3KKa;Y!@VfV-_4<=s{fc6>`j(92okZxH#cSdxPCc#uPiaRg`#+%_ou&VIG;MqL zPHgnR)NgfYz3@O@KfKSGzhZu-r{gnTFMk-j^&iGBeAmyyif@nq_z&+|fjqnZvzf)k z-iMZopT8ck?L+%0tXcY>)LHuf_eg3Pbg|7mScssF0S@5tb%b$Hj#?mR!nbJ1Ns zkEXitK>7}vU-@434Cm1wuH56@wcdZC5x$;}Tj*00I-kEjEA`rTpyTD|$KU_XuzBma z!Y;49=JCCiZ4V9M^1L#?R{y8BBc=YkwzqX$-0S$qMVj>)U7z@|t)7AIJezke{u1B) z_xQs$%Rc@{-=c?ytuOfLf)^aI5q-7af7Nsx#e6T_|Ij}$sYkL}eN;bv!%pW%pIZH& z(atUPzpw3W^?$e^eCjn+9&ymSBu{ztA0B9W&5AtL^B{WIU=M%)vL{SEcbSKO(?9&q z`@eY3lx?WTuD=@0;=*?7S4+-6sJ^huYtz%dVOPzs)&H4ol~Vs*+uQ2@NWoH{YT`$q zIOtuHM_&}-f$VqCBfn&;?!5Z@hdkSV=>tn$(!bP=JMVM&J`?GS^?#DoZSTL!$KT(r zlWxD%C+yA|KYg=08^SR=7rWK}!SodV{-vH5hI_d;F27?rYbT+x1t? zEG}#Z`Lp`}?*r-w89x_4YwzR{eZ#KmTdV(3sj~k%4)ZZR9S6Ivy!ubFxWEU~@y$PZ z_I!&wiTf0Fyt(y<#!r8F!y0El873aE)GvQO`b_Gw>wg+Lo?_nq@}>PfLEE}Z>VIUE`mcQY3ZL!PQ1#)}f7Y#Ud5o`U{wcHPTbw){$Ak0e59fX2 z^WmtkT@fz0{fQrMFw6NLjz1exkM*15f5uf5R;%AyhuIphy#ApkOy<|>fAp04kIrYh z`Bqm`{N4$o2c~{(!=w)8LELn7e)x?ymwxoZx@R45?`m7_9yWODcfWb}_|E^o@0rEL zwkxgA{(ntv;==B%@p~t}y7mp*>ee{*wECZCt);U6+qI*e&sU}XYah@9y=!O9lINmn z+l!wpw(%PpS2Vxsd#7=5F8%0*2?Ng`dfr7t!X{&ue_@S1U!X5`{m*0_2i#VV{~1qF zSe-RG-ifcSeZ#iCH6%_wt^P+(ssB0-*O9`nIu0WaGOpRGN1gaV)2(&a2Om8{^qlbL z)E{=f^e=DR{K5)hmtVgy>CiV1N8heL8Bjg&Q+oe5##0nld2M>*RENA=?V2F-ZuP&} z_Ehy>*sZwcmCy+TJ)UQ5kh(zpy%4^kj#sjiI;wQTsC(`?bKo^+dG@{56mw?^g!?0*`4Qwxl>kKzyF=0<1`gli09uP@lI`>w^-%1cjB9i z?SFDRqtt&LC;dLf`jpE!-U-r2F!f_wKbzformN$cj@p**B+qoP!5;ju;e%)Py}R`} zVZ-Sczk1?+_p(2=^KXNy4>r`Ub)#72wRhqx{r^`keydyU*7iT89VzwSwY@EWpCI=) z!A_BWrN5WKU&SY0h2F{dmXBS5RXX^?8eh2Mvac@mbf}&9*ar_k_>5N_+x{nIak2A1 zBjfeY7uo)Y=R^DAo$y&xXV1^Sxn#8ZKdt?!vi~*VQNPvE6u)8q-C5%={rq|=f8TsDVEQiMO%vPt zk!u`uYAdgPme}w?8&~Twe`EH1i!0yq84o`;*ux)2opJcs`)*z}j2XVq8+Y}816{87 z@y#qQhNI*CXI}6wy6t^Utj_zN*~;U-Vd&FuwtME+>VK@b)PHo=5@q=0TYbtS4*JB8 zZS@RfaWQ^FY~tqc(NEKL-T5VdP2ypv2j}1EtgvI)`NY?68@ALGc((tUBs_cn9jm{; z$kneXR%icXw${t<-@TKF^^N&7#NX_IM`v46F7-+C(IXBrp4qBb$(m)A$m^pzzRQggf~Ars{Om8{=T&@ zoG|Q=Gtjr+e>GJPMH$9>~h`n$K3RHuW{@D zlq@bD9IxO1*3_>k)@=4aP9paEJGHI_%C(*MK6bw6!m3}pX|V-X&)JS_|36devVQe#|4Qo)Kof1g zT8FSYYp;IhSFcGd^%DnM{cpCJrT)9Nw{`wg|3{ViSY3Gx+xa2lpm!2Cn|>+c2kC=7 zznV{-=oz915A5L&TOKy%kmG;k!cOn(ebN_K8iAgD{+*n~#g0Emjf`J_SzQf@v!=e( z=KLG;1JS{5^?#t9Th;%R?_HbSdEI%}wpw;GLRQ~G5Pgt3LHxZCzM&KSlCAz;dSTr4 zd!Kg3yCcKun=JM1KQ4Hp*E~D^A5?u%`(3r&QHNx;`p_qjO5eZds;hDITK%8Yj#Tx( zuH!Q=+fHk$=LCs^KJil*2oHo0nxA@Ak2pGJ6R+T-7Y_L9I+OQ5a9TL-_l-}CSnyc# z`P^Qb-;#R&e{>bIWJ&9xZ>gX0sHfHc$!)31{;wkg!6)BP>md#jPi_3A*Ht5n3-wx_ z>RX=c=9T=!D==*P#E=cH``t!i`%fP^dc?V>kYD=!(abC^p6`EiwH3wcZ11Vfc{A@M z!hD$@cB}tW+KkHnZzTEFk6dhCKi-K=98CQ-Zq@Tu6CHF-XKwr7gCAC!QTyyEAMP4f zIeecr-~7Qu>ax$jd3PP2?;p`s6jo=Aj(6g#Yv0UY6Ms6tR{y89Bi4V$al)_H1+5?I z_lo#aOyk&wc@`J)sEc~d?}X=5OzI>~UKNIw{?&KyBCoA1s;zmeuD$;@CBple9}>TR zeAMd8C7!Om6Ax(TfAHrTr*5@d{h!{BSpQwynx(G)t?j>|<7$2MkLSZXjf2bs8|>i^ zYn-^)#Y4jGp?2PSi!5;BboA}|lR+H^Y^aUCqIf%NuYS|pSYp`hZ&JI}{~2v&W&ibi z73)W?x?rE!d{0yVv&NmE8UiZ@jW|Eo7(ivJBb)l zcTN1Q{tvbvS^u5*y&~uFm`}PNghw1q{nQD<15H=!Ge7leJ?NriHt`<(u;$)twf}b{ ztaavloBv{9D%(@#@&Bg!|3}R%F802lTwCTVRtKM=`G*&uZ}Ftg zJIQCBW{=Y!=5GGj@JSn99k#e>`HeRI-duQW|1+rk;Pl1+|CD5v+r8P^Z)N|JDKGO; zJL>Q9NS*#{*QSTo+4E3T$MZy!bC&A<-s)Ttw*4)yd(NAZmm>Q3w4Qk^p+9W$s^iva ze}7hJ{`QDB9dP0w;I-rbW}{dbH`_D$|2rL5vZ`z9(9Ju^L9cXvQU7AZivC*1eJrW7 zANX*)W3*S^w+%B8j?r<75z1jIOttc$GCO*ndGB~@ldCH z9$ZR)So`*`z4+3{8)4R8UYHNr#`ga>~sp#)=Z``jp1x=@p*mfM|dgGRt z_`TYO%47NP4m66z{AydBleo=k8P-R)Hig&uKfPGDGAimXWagW z@<>I0)hW&QF3*nnE?+N#+V_9_{%@`r#`dcoy9Sw89fs{bbVqn%`{`ka6F0qd$}Z*i zU;O?tYZMFHyZ!%{&JVwLlE)ewDsF84;i=G*a9Q~jV58$R-| zL3lv?{g8Y^9j{_1ebn)W!>-!=yyqXE9gh9oI=ek}&Oz|l`JX|VkLj^SvHxGwbk8)d z#x*iJ4@@L{tNPo zivG+)$F)APJiC0}jZgDh$@MB$2cIJTerWvEyJC3zeO|x%G#=L6;`5KM@TU{Q>(77t zma9w)@Z0hK0^#?H)SEto8gD8(LDpVvyZ)cPGs)ojgE#8GFpn6&(>z7aV~}spV=_K* z&}+Tf^r46!G~OhRKJXdBr{j#*-(KSnpPaDorBm87&RzBG+h2PLb$0#LSff}tFM?jy zUsK|&tv~uv$3aNR5Z zy4P|IU8u9|-$?*}_T%FGTi5#5ek7}0r*U+H$KpcV0AYn*(^j?Yh19#v5D z;p6p+4Z;WF?}y|YCjEKrq>nn@u=Zh}zWdC(=Z9JM9K7d~3qK35UH{W<6pJ12@cgHK z)kCt&wRiHE|E+_c?H*qGi~1KM)al>S|4}91crqCWe&Qg0?|Sp_lLzv=rEzTIN%^or z^uh+!AKtvPhaL4VI;`kl zcq5;p>6G}1^Sa=;mweFr(EjlEL*v&vvupb3#}{Tjy3U+0U-3d%@6Wef@Z#x@!)N=S z;YP98^`Eo#`sYmjpc6Y?d-dD>&(%gB)1fJqQUBsmMSq=d<$D$N8;{!Qyy*jf-~6JB z_@WW>aA9-LO ze|zse^@o`oTypV?6R!?$IQ`CfYy9yZ zcx?M0Z4`?K3w6r~e%Mj}ZcbLGKYr6G%d=yCR)D?Vkk{Hz{N#hy zM~%MZrI^-vmyC<8nAWFu*k;-VpZM;i5Vn2&z(?QwOnLqJk;*%kV88FbbX=*5y6F6D z|F1TB6Gy+O|6pE=e%|F)v|YYl%>Jh%{xmQ@x2)Z1()kCtHnGn7}PdU}_8qkq1j z>6D4h^GE;WwLaRLZ*>D&r)d7>;`1$@)Opv-WA-@x|9$^I{_5d;Miu>a-h7TQJvw=I z%-{(Gfpr+r9FepE~7(*Piz!^oOkud*$AZ(|#7VJ@CMXU%g;1`vv>{ zH{l!a|DQH=p)Yj9uA5)n|BU3Nb^5bin;u!79rLpSOn>rPXNsSE(E3n4&96MzDW-Ma z3BUQV!9LGdSaYv)PW!|&7YBFWk3JWER-XSkTzRuz%#MC8{{w($O`k!HyWTvpqAu#M zod3xr1KpTs)PH^+F@C3cI**nj`P3U~K6$NMq@H}x^h)`L7oT6L({VhgJZ6v6AGSYi z?k^|)ZlkdM180mp``|Uy+wZU8QOw??_aDm|#lpPBD)ly7`{I8CZQf2Y*l#eusQ-dI zQmg-#Qb)e=CG#f^R`SSKBoAa>-qnn!e&WWzg#K{&+Nb_`*@w3bhkoO=Ke*!25%~DN z1$_+HUdsDVs&3Y2)+iRnW8P8!g?XT&zstSx_-BqF`L-YHir+gy;$Z5>HcXy}T0hU5 zYZQykr^e6tbr|M6a`w+JyLfikeD?Vl?tS5@w*Nm+`8x>?=e>Q@x2x~4zAH}Ot# z@EfW>>OY!iRP<+lSsq>F{4;*g6=8cnXl)~QI)58i>(Y3goz{8AOUGA#9S{Dn!@m3N zG4t|U!**ZWWA`_G=PCGW?|;j9gNO1e=L7gr$6_x zq<`bldhcR!T!@3-<=)b7edze)fyUb{%8bwIocKM+Jg~t&{t#At@#IH7y+LTbXTq`X zdh7`JYWM%A>HSaOS8UWQ^owrTb@OWvEL$*D^w)8CA2FTO@4-4fHEwn2QkUOp6bt(U zul3eHHV7X!*vB6}y64Ml9KYT`_}Hh8S$?f|mjD0D=ik9bv6vpKFaG~8B&%F|Cy(iu z`!-{n{+Z;VOVq#U*v3nFw(89{eVXEjA3o44kJ;#=h#$26I>~s{DIdIyYc}yd`QenG zzWm|K9-16JwfrZ~+jHg+JhuO#UU7%w^8YIyA`fiTEW}%?8+P6NqW(qa75#NSoJXqY zlt}|#^N53~-}=ybkq6rQdctRQ>W6n}{oxH4KlbY%Jac|n`!~N`|Cq-fV0+^F6FcuP z)+iS1tJ7}X=qbhj|7xA>|FcH1FhBeLH}P9vI-XQTU7S4P zxY6>O;)j=cNBy%rQmcP@{8?oGAL#G$E+VhahyJj=%cJG|mewEMc*-#wuQT_BuwtlpKwtwaUTAq{IsDCj+MSpl&l4tdq*gTKkiA@}| zag-;u$pekA*~@2jI*te7#RmKM!_)^}_sB{2Zx^P1{G?sK`1W%De|V!pgyAoGs;cXN-8>vF5y4lCDzbu>Px=oA+Swk{TgA58l*zw%(G znAUkG{N~372Q-cisy}}3K5g&!yZyrfZ(Vk`?;TS;|JKoZMf?AeLG>$&6?MvZq;V%` z_rDrYe^W6NWS&v~g3`Fziconl(&o%CaVY{j%bwcD#6 z(7Yzxpm#|ww==u`c})4ij@nr-iWTb}j|8#p`cIuN&y)4tl>DgwQ2xmDcez#a@;fr- zVLVxpR@q-$o|CxQ^r46!w7%-<)bBy~u)#k5u*r`PaR{z)c=BL}gcfE0VQ-9hAd0-u{)#*6! zF0H>kX_JFqJ^$o~+mjFY_@38pG7Ubv{si3=vqyFRpEZhw?N6+7J5BA%@sE=X=E=OH z{v&y$R{!++t7b2ccVfc_rhaVGZLqh_jyng$U&(j>8Gn1q2VcDVg;m#SPkDacr#5=^ zy^Le}kLoy}J`d;yN>+8v{5WrbU74TqM*ZjKEVcU2k}*uzrr7l1U2oj-5Iaw7-=4DlqnAFt>yO*3z3!0ZSKPfk|8}VIk0sbq8_d*Zd`0n=uZLEm zo6VCt<%{|+$ZKo$Pk(=r<=HVmE5P1=?DrS{?f3t;`_DZ5%^BV8_V-TaQM>=&I=TOI zqPJ9+YwzSSdf0Z<6hF@c81-M6M=JX3IIT`^oo)BZ=Us0cUK>|=Qky(5eSMmrdL4)H zjBlKN__+Q#eDS8+pFHZ;^8Uko{u-3|m>#R?|Bq+tR}`yUn=aZf$1T=pQ|r5e^b_?T z%{?}r%dKcL@1eph&j-T;CV7011V3nenxFcyQ%vi;lYY#P4bl%bsQ$3a7v9@idE~C} z#vkAF*;o8}HaxcfA8Zth=`mUFztL4c$T&^$vo=26|6r%jk9SfL^&iV0*?8*riu{fq zU5rP^g`YS`z1j4uh#xdw^5`SA|4o0m_g8Oz@}6V&508EJxGDGTJxc$kf2UC_j zSpB=--Dm$}uL{$DI&|d&@7@7kyZ>a?C>ERNlHUI?(T(}m%`fU-RMc+&YfC?-N7|q3 zjay#gw?4E#@<7|(I>P6*-s-SH<*~eR`omG%{^rrub~rX1v;5Y3-SX+{sAs?FM;par z==J-rrur4diu(=6BSGwz>Sp~jFGc1Z^)DD|_5V-(|LK7Cr^xuAZEr<4i}~JC{T_r5 z8?5m&PT1qj^MCZ#YtIOKZU6mKe|h&H+a=dObKeJ$I@3}48_8mE?Zf{6lzuWP@EV6V z>fg*G75%B}=Fvs;=!jj(?~1>*mmvJ0^{2e}CA;Lu52khANrm~b6{)MkFyom&Eq~6= zr-s)b_QlJ#oWC19cKzF!@`KDbLCK1`HF>0U1A>+FKh$fS*L&1I%PUMz;x6|_KYV(B zE5-xs=9&880n>P$O}?Qk+OobpwqhL*{?Pr}m!DnfK>c^{Mdo*K!mIPP^FMfgz0O@HzfxsM0^Q)j4r#6hq1W;1S|eEO%pVk_^``ornJea~rE z?|D}^ioTv{HT97e^k+* zypcRw?jIQ6cMG?@|L{BE)`!K9?x6LdyyovLKHuW>QHM_ZNaOfHjfZKsUh{k85t_W!4e zKIjWtf6cs`MV@c!T;VRC@1^ty|MunnJY}RA{CS7naNs3FUU}{Jrweo(#k&3^tGf11 ze0Kb=wr#f^iNhQ9AId#-`g@m07turYF})HUxbF)5AkR05zaNrs=!$lhe*gcCH7dyfC>`3`=&{$#jOEcW}~J@ogdjF;#w+2z_h zd5muG*Uc~LKR>Ui)qiKu>t$zz zIX~O=C$snM!ehVxO8Bzx>gQkjP;4X%JYMU$4;yxh^b_@8(1=T|{=4SUf3g2}MY#X= zuX)yIx3|s{SEN77PvT4JhcBEn?fTO`bK`Kh@tZ4u?xSPX?LQeOThJ&L8*jY&t8U(D zeS-88^dE&`>=cQE#8VqTuSbhFl^1ks*P-g*>*6*3@bQD!{p!8@EEmq- z?zm$fTxpPcJO9J@idp>guXqPCXYae}ZzPL7KRPbof5D&DGw-PXXd^DQ`mdQs7uo)^ z!Yudy17gDmde`f>{_65|UOX?PHlO~;OKm58zzdJr#Hq&y z)on&_>2FUw`@tLE@x2M{Ne68DST^(`Jf{Dc z@`D|> z%wq}tVY>;d4_|ruaM<>!YY#c@Ck{S4{u$PBz%?gqmmdM8`z6VW`Zsx`bxx4;|CRY@ zKK`hGQBl!fdEh7C;|B*C#X=sx`yd}Q9W@_+KTPXRkAL7_i|AMKaKXpUG!@TUcKTG&E zk9m*}+PKP_`ms|?>%2?G#Rlmc8&rRoef8>lzV?hg!n(5_-~W~OPWBqN^Pk;DvDo&$ zir)XWqkfQan&M~eojj^N|DgHIGwPq^6&3wm?v4JT0_^yn{g&m^hvxA)5`Hk%G4*4o znAUkG{g@vcq#tZh{b9pZu6frM+cd()?OBI@=Bm5l;rlPnw-|OB#gcA!;*I(^9tj%X zx@v3ObfZ7zjrtdlOi%JPueGu8pbM`XL(L}+`ovFPAbFt8ubGUeeZXh9l>YYGdwqHK z?Cmyh&$@r?Pba+m6Z*09pQAE9+nwJj-hJS?TZhweC9AqNKGhAoZhle!Zq8DtKgTtu z6Z1}SG-rvw*S<=Se9-#PI`a=MKHuU_;#Oa0kJBH*z-I5i>8wTwgCE*GTkAS_?f!cm z9ZxYU4*#D#%eG(jOIFma$s?_Ef_DAC+T<}FcGQ0`uc+uxoucWH#pJly^XUrHUWfnn z=bzV@{L14#_oEfV8b4d-e^=l7Y4ot)->JT)H~ar&p47?g%eB`$bihV$c+oTJKh%gz zo&K#=^XMXHdx?Vs@!C9AsjPJHNQ{b%A|jQ+!Uq@q9LIjJ+9nqt$R@oQcy z$+tTA6!G^%(jp%wPLy zBnx$|=@P*9PVKHBeZ}p6B==PGceyw2&pU$T+jg3XAAaJXcM>-n{S@(o@YL2RAMrH5 zpPzBUj%R*gr7vFJ3_DGqw8p?U?e%N>|G`GFm>$#h{u`=;&0pss*-<|ykLZiz7HoU| zl!tzz{`2!n8!zQ)2`~A^r{iN22l3Y-d0;v(^D_>8flKKR8#YJxTkws|!$y-1Sm{s8 z+(&<=|A@?&>k0f$j`}aiA64{sxi@bAZxA#+nqu4Y>3ZXq zm-v|{NFL}lpLnfZmp4v-d*XhVzvre&!|h3Xe(aM|XC1|Ow*MJ!6pMZTweH3Fa*1sp zna0soQM~2r0NvpCuD6bPNBtM(kvjbumwe-6e0Z~*B|cx93z83}b@=?=MWeHEeeC@^3ize}>qO?ECjY8PD`sx%mI1YkXJzpi^5>tf-^v!hM*%#_^{Z z^&ib^ZM-DE_4+)z$nj58Z2I<2>~!9#KOMJ{4{x`Z&+44`J*e|Edz}8T$;CT<=!QL( z4V!=UrH$_X^OxW^{kx4~F})}1`G?d`zuJdXEwcTut~~UasofQ1{!#z2+*7N6yEmWX zr!*fv(7U8gZRSh8KH9hRO!X<0F-Ci*L50d`vyw8x@ zQ}y_>t2T%ZP4Tn#P9E9!zvOkqpXwF$FDmNv=RQKFzm7-WzNjwWA1p8N+ql{fd7y0{ z%5Q$nXB@+&^tazI?c9ea&wEpQ=8J2dy!T(9r7!#beWXz=cKq2 zZhle!W?oUzU&rA(F4LnU{PzB&dBj2QBrmnCpYFeoTb=TIC;5h{9~-R64>RsuF#U=z zTpwn9WtTgbJF2|@C)@vsjx(g_^#2zI)y@e9$}b8)j+`36hthcM`Yf4Z9BMCylE=?A-dF8T)*Bdf55OD_?*6Nsq&8 zpTCm2e%GIP&2Q*}T{-@2NeeLN}!%yezK7aDYUHI+zXQ)vu_CD~2#ozydJ|w$b<3~66EAz{w zChFhKAJys4=RoGe^Jj=&!@~Dp|NoEcy#4F1eB(;f!|X52dFef`x!pVM`$$3CuXfd5 zSuVh`{dY(Fti9urAa>>c51Gc%OYNxtU>>Q{zqN(bnf^_&?fG=Qam!2mwjJpBF@mhy&R;T`S zeo_CSMqFz3PtGS$Z>YTJ1^UEqebninHHw8i<5^0by;D9=dDi_s(SJbVmFs^p@f)A(&1?Np z*t3eeV6$y!NyWKXLzUU;Q-w)V}|6|EobAA7uPY{H%@7i3i(s zqpm6OsQ>)@QAK|zadb63I^wtOL;1*S-6?VMLGQFK#S!t7m!kEb*6Fx@3H`y{nVqxR z{uhLqoBZzz+wJx{`0e_W1&w0yiu7ap3~D?bKe1aIYMxVUCUt5@{TJjB<9E4LzOMXe zVV2kbzymsolefgzKMeKqGhYyW3>j}g$HfNw_`}h6eSVj@Z`v{(b@Y^5-hcL7cy0Ud zDla%$&p%}9R}?Giq&k@GVzuC)=A0)pPozxSjt`5V@r=Q&KyicwuRdL=rPuu@@lut4Hy#D{lp!yZ9K2!fd zFSfQ7JMDw<@V5sh6p5z4Qyy?gZ1QbCmWkgx#Ua7gI|RuG85cCaTYSF7lX-c~hsW%3 z`orpPUhB7?yX=Xu#=Bnq_Roj2skif=okp>+on?0x-#^aBHe@_SsVesimFsVtl0iN5 zi~1Kctmv=vZYh#)`+fLyeDYd9lyS%hO|O)1WbyfxIvvM@%47C8{o&}-Uj4Rfw>mW( z`~Byp9`xs_^ktuayNzPu_3c+K{{Q-*bi0$RQt#B}dt3OulMHy7U(~;d)alQ8DDu-j zjCV-*|Ld=RUVq=8KR4&uwZcX#Y?Dpj^H%h+&tHQw57XoIdi?|2o1$cu`>)ilT>n6Q zS2EEv>R&``JSTDMYt!ER%Jx6d8;94sUO#*wd0@(Ge&vVHa4G%mwYS>j=v}^dLwoIu zS6cp}h2{URasQdF^v!naIseIcijq}bGryMVhFv$msDGBTRP=YbH-4_2BxrhMV%zIL z`ysFOON~20t4sPf|NP!O{Gj!(d>(`k8|>o`TkQMwzkhtz)?uqP=R9!OdvAx|?*F#1 zQ7o0~zB}p%bv*HwYp?#=^FLb8{G$Gar&j-o!cD&Eo{8T(!ItK=?(Owk-GKTP%|9Z3 z>=e<*JIOPIZwdWj!|xyV&O2WJ?XcmjWq1D3;8)?Z&tJnj4miE||H0?&_0ZKg<131{ zd_QQ_{r_51GMQ)8zhJD@U$3Jh-}FevCk}eoJ5E|h9%#>NU7h+p$oSY`AAeZyugh$G z|2eb52Dh&C&Hrtd_y6bn$H7Lim>yG#?_VC|xCNA~Qg54|_EEY1Fp~rq(QUBq* z7M;Dzt&+F$|L@c%$nyhwm$NMLxw&6H@vJB-&-dYX;`iWE`olIy?fJLIzC9sqf7F}b zas8L-z2FI0YiO1~YcL8qwyNbafApa1nGzjphdFTAZ! z36c+{{h2>oe7?o$8+4LyeAr-`m-xfM%f9!AlfQ9vIQaF?h4;QZ86JE8?KX;qd1mh} z{{L>Sz5iw!PsR~%)KUGlzrRT8qW<&q+KT@0yiu4-#@TcDcVWKGh97oga48 ze?iVs(ck6Xc>LcHq<`uRRR`jrcS#+3^~tZTQ$FH0uW|b6D|~F(Yya`s>z4~7Gj{yp zEB`nMkDY&LHj2gO$9+$e{w1qi!{eR!&@J^7r=O_*!n~qZ|Md7j?a$@a@p4J@tkME*m}RukhRTx1)_>vG?C? zi|=3V;e8jBta7_ITl?bkFY^EyZ;|)k(Y$ut`e%7|%+CsFcqeH3|D@Mn8TbEZ{lkX) z|KS5G{q3T#(S84T>b6Ixp@*IS9Mb;5b@lzXseVPV7NftDNbUDu8b_D7{g34p75!Bw zc*y5@v-hKf&+-z#SDX1Vp5-$?9oPJ7TYbud4fe?o>%ZfYeV*KEB&_dG3=3Djn*9vt z-?;BKXnk}V#lrl>T8#cWKDv1)nT%ueZ;HP?utMR-&HR{!MtM0*~a%I5@`9|*tM=%t7sw7!(L;@5HDwfu4V;R~DpbJO_o zYf{+iZ|~k@-WIFEWBdQ4&mCmtCm5!MX&WzqBv< zL@#Wwe1_;WPCtC%@2|V)F9)AG+#b04sH2a0(-+~l%5bC^J9a` z@3BGkhd-V7w>@rs)mV6Li=V#v{wK=g&%+zVV!Ds1-BHYXQLM%6|D8na_eb;tPhv;? zv%I3BzstSxxU%2%pUPL!%@yU<^9XhMP02@B_^`e588VM0^oK9)aOyACS!N`B@qr`0 z_4!-7@Z0q#-A1w4@kO8O4<)-?8{gK-L!Zg~cs)e@J9%bBf9ghr$Mnd=w&TwvuXSo~ zzSWVZXny6xPLcT-IvpQAY_N|%Y;f(|U%zeYgs|}z`+W5M*RBO`-|t_VI1(#phdG=SQ7)l3%gM=?`nI zFlAzP$y#C7a^0;y_V(T2GyTVuAKYE9KgrY&CVfe?d_L^{hfeD{iqxqc^&iY575z1@ z6{ODe&^mj4Y94XWyQEHS{Jj3sd~`Lx@>?A|OX_bniY2Un_NQLG&#b9o<4wNu$Dchn z46oh)Et5Q6-**2Qt*39vD)pvs<6BpGT!Km8QU9SlqfYZy8^* z%C&d$xN`ne^`VaEH|jr}S5)*@9{OQkR*xQFH)o0O*Lw?+52khaCA&OM#Sf-+-bsb| zu@$MS!?5Mu|GUb>-@FvII{cnFJKS}w*E&1@Kh!7|`~GWHz5mIe`W3~Bx@A1lxD(|5 zpVnuh6a7W~NAij~{ke}B`4v8wS6B7(F87xHv<^PdyF5Gg(Ji2SebKl3&-lZ+C!PD} zvwpK>IPZwJ{^f>qomXD2TS8Czl~H3~nwR>CgT!0xf71TDlF7WI{_}H>=}*35YuDa< z_J1HeDbj~`Qg62MR`RpnJmOXd4@jTbiqzF%*rt2Tm9uu!yQ;-`Gta{s#bWwrYOk#K zzjbOWidC+?`oYTQ-=^d;9^*v)7vzzO{x0`Me^$V_kK^m+ZR6JHKi)d;q@OyhaSslO zO}_O3A9=%)*E+U0-|7a$Pk-judh8U*H`M%!Jx+gn;#*GK>5I5D~YQ^%EPb&Vda zRkYqIW{Mp_<`?xJ%RLqSo#gSnkq>_bwO`(M$OqGW{QWSk^Yo=Un63VPe0KlsthxPe zTW)$X^Rwfh!A7x|9;+7jzx@>3g<&$jR7D-tU-|t@Q+cV&#BLABPZ4Z9r+JEEmAt$k z7-}B#_K9tN##O}M51Egl@>lGdaZ~;<@%L}NX|MP16jr{z^WbMD97Vsj{b!A0VSd@e z#rA&@wxP~LvKP_EiHFZY)@LSuo^Se!`WH+U{n2Y!>d2?wFv)8j-GO${ZHbTtf+g&BSHGA zZ2y`^oN=T6F0Zul3M<-*oJTS}vPQ8mE_}p6?T3JGOpRg`{aim zHe3J02i!b2WVz?dSzcR(0*2_|VPz?1PsuG$&^nqlgn?HLw8WJ%KeVnP9F9B{#Wy&{#jm8r@wc3wElCfcX@P?*Nfiw zuU1FZEsfXNrhlq;YP+Hh`U(ysLbj*Zuy2`)_B$Z?AhD zPrT)tez|TLo98XHqyC+|+VoH61rPJF{X|#B#Gj&fy>aXmsWV>2Z%V!O>(mCx!&Zc^ z4#Oq~y}a^e=RYrOao#$Poqt;(k;?ae^rJ|9ir$F_-LNb3>quVIznecY{atRA+cWz? z=3~!)R;1bXLtg7@@skgljsv}M&U2BMVp`{&)SDk0RDO>Qsy}Q#al8HhcNdR|L-=6#q?fZ&;Kxy49V{xqK0@P%xP_ucr-sgpza@!$76^r-6hkIWC8^hogq z82c$ux+&M5@f2G#C1!s36zM1GKb&V)^mn;8p3mP*kbK5B9O#W(p6m6SUUm6R$tTZv zG~X+q;Zpj;hhF^RL1%6|HJtIA=eGLxS%dJ|_2;9FVqt&kH`D76GxaNqRbA7+eg8OYdRRP@(6e#dHhzyrFxy4=rLp4NMpMaPx*A9lO?(HGzKbou{-{Qf1=ahihJpNks+O||}Xe2G@q^p9@XR!`rV)JFa1=Z|Xj z-&yiZ5A7@EBMy3()G@9id7$kdleqP-eDE%*pK-#YZ`)_WZC~z&v8!JDo>!h%UVnI) z^vnB_VN-wqn2ayk<=PY1{@HI^pPA&E-4r|OzaW29tAG0adzNR%{Hy@mu9+Wb<7hvr zO&(}`+L!sMcRC)pl>V^J3CF*8pEa%v>-=uc-@ki`qc6Mv=dg|gcGPA(Me&wvctEaO zz^x*gMOSL*7@P9!*KW`*W9uB{ENcTM~^=Aqt`5i$G(3r)QH<3*l84t^_Pjwn)=j6H{w=5 zAeq?hf!7ue)1SDLyq1p7eC&B_N*sTRUhDB&AIgs(WV!Yc0u&DkQ$DbO9 z4}=e7dr0Hh%*zm7r~QC^{Oy5XUH+B(&Yatx=x+Yvyr(bsTGu+NQ7pFIkEy+de*PU) zTT!fXKkc2y6z(GEYfc#yv!H=6yf*O zgUkyX?Bj2*w(V{gowJ^6ulB~@ZL-UIkA~l_e?}+e$)>3OS)*9U7puA^-b#NrnMp3M zbLJWKFGj5B?=-(FNWN{SnZ})BS1^6uS{=G5;+LxG>xVe=F?3QYwGc75z1@RYXgD z>hw?N>s_N*@LM0siyt&z?bH19t9%|@N`IK~=+w8bx&6n&jE{6~dCz_)!DsKkokp>+ zoqF#7sePfFWG(V~rXR&tCK)zwY~^8oQU6Y!S<&C+-tze*-@Dvf`n}7o(&oA&@ubIFjAP?# zzWH@N*eRxU-U+|?u|fL52G!qQdGGIj?t`EBa(m@4e8+pPoCS~V|Hm4|!uyiF4m#>r z6e~XOO&)3736B5wN3R{sE9&&G{QhV_`03j_u`9YIe)6oY7%|g7ze4Hf0XQ=VG{SW0C zwfgIIT&71y`MQd#198whiKjMx`mlH=`D}0ISAMH=nzy9>rqqQkH@Wh_3-9x-EZ#IgBezG^| z{T~L^4<_SCH0tQ&5%+(n^q)zbc}M+6a!;-PoA%}#ALA>+2YQ#(sg2)vlQ?{x#P7uR zDUv^4zvhL(HP%@$GH)cD_SFyHbp8cvp|f3oxUf+ymHQuN>Q@x2)H}6JehWqEXw{WNh!^iI*cylRm;SD3baaJ)&K^{08}PwSF6dE>>f!{{GRoch49 zbQ9;@lt<`a$HRWZkiHT`LPw@t;4Y9^tXI#*>nFWEO8#%kA45xl?Z+LRrLIGrhY}SV!h*$ zAa+ar*gVhJ^cVGC*ocei?{cf;<$oN=H=bs19&yk+iJPtSWWM-8y2Ap;`eH&nC8>3`O|q~gYaU5slVAMmiYU}Gk^7|^X7*+ z*ZpJj>+UT7|M6`SH=Q&;`%-cH!=qEbWS47;JMp!46ZC2`FKp%)^&iVKEBZ5!`FV7a z{g3vQj>qq`TOaAoXB-<}dClKhe7?ms-|C&@S8VlDSBK#zmrnT0k)z9nUte?Hy~l1` z?tl3IsZr$z*IE4ePwzA@6J$+a_Wc+A>pY3`KGGgosVJ?}pYL&)kJZDgNF4M|Ix@Ro ze$rp1PWj-qK9|p`r89%zWbQ$ zbvLyqKDE{fGtd1y^RVkb$CMB3sNGC(Kj{M-cVfyLs{_!Du1o%w< zW}{dbSFET5^JZMoks(N(o&V8zrC#}>{>{9yPJgbWKo@%*WqDPsbNRY_ek{)wZaY4u zFOWQt?aDJW)n|zvAFt!J8|UMjUS&CWb7`;t)a!Pv&qC#@ zUQz$75tmy1`~3bIeslzxm)Yo|h#xfllQ{hF8N%lp#nR8uIN@(op7`S>+cnz*tKGB9 zGjDhw{C57Wt9;-L{r#O&zoJ;>+Nx_Tfi91i!uic8g)k>&v+R zgH!*2VBOj~iL}!1znGUI^N#v=^9s{n`Mt}%@q3B%_`fNB^nxcv?<5aD^91pOwtviO z>XgreNuT)PtHbcA!%m*J-2YB(=t4jC{d+RM>_t8Q%y^2_gYbAK9&|H*NBq1F=r8I& zm`6;1r+LYE{2m0|4K)uw@~}bcBgrRThiRQp`@>eO_I{(tHk+devtVxbNcZ>-b#RepcbOzPB*`VZxiivBM5#_c~n{|OK1 ziZpwlWu89q(-%k{$aYa%r+mcWF`M{6qgd+v;iC0s-ujNqP7arCdEu)cep|W!;qzBV z>%nn;|0`8dM<&Jc5+N_0U(|noo?+v;+**WZ3$aa) zj@Xv3;}Hkx!#m9fHJ|f~mY-=}!q=AvUJ!n4Q2inM{{5|Mmw6$yRyyMA)4zQg{C4~~ z)F>9a4tis~{|r2el2xwZ@fxq3e``vvxiz0rI*8}@E^HZmM@G`F1#B1{53wvDg*yn!nlb6C?S6;ZrUk@$+|AqfQ>Nbjn z?bJ`!-(NJ*K6v3Vn|Po6aKOb+Jh5QrW#Pbc9{TJrUidaVwf7&`Lbv~{Q7krZ9Z$UF+W2OQ z4?CS7cGQ0~&#cv7kCZEVFi%C|pm#azBK!Yw=Cj=y4|T4fv)84~YYF|~{3*xfzxxUo zOrNvryvDHCdOQB_Hj0Hl{F-|Fsd-+pNLtK_zFMjqHg)hazo`FMPE*m}<=(je*+h_h zd;T-=dnZU7O#RkJ-SgEH9`-}V^Pkp#m+d!t`{+hr4!b_TJdFk~kR!5#9{(flvQ}1;A)K-5# zzOd@ACjDaLBi9V8ZT7-P?z`zmuXT3*O>O+y(Z&A%S+)y9&1)nJbzBc_{cC>OU(~;t zNkxCn=e#m?rhC>X7Vi`n2)2d=$p@_u)y@3f#phcbok1u0#)l22d5J%)_VQ{g?7ib9 zVf9--8ZSef<~45H|5&3~?D&5V{r+)K{UGCH;%DuhJmPoL)@MijsU7t%Dk}QJKVRxh zk4$+$r?%#`&Jmk@(8g6>^LG}XZ*k4Hdd9<#4fgSexyLFN!tT_C?;vwEI+5RLe?thw^pNw1i|HmZG{G$E^Lq&h5yo%&oAA`MduX*X` zWvgR+Mg0BHbf;d&F+a9qKfdt#4{zR@eetPbX7|cJ`EMNvPwn*&>+1dYvPQ9>KXsr) z%lCuI^$*lD54EHI#iLsN2Zfn=+4J2Lzjun7$9*iw2k8?`{n#mzZ^(Ge16whzPi_1E zyTgBR_a!FoO?XDnwMg0rrI{o<`nS9&+ zmB+hYKFdq|60_H*~S_oj6)M2{J#B@vx)*-8{2a|MdPFS)Lv9vjQ643Bm`ae#QmK z1C3AhGk+gF8ON|se%OEQFD`r3-&PE7dFf*xd*G}s>BsgzV=^Cm|Lv&V?M1Q5=K&t? zBvN_*&BRY0^Nacq=8=m2I*;`CEOq-I_(1P+)+PA-#XJ>_NBi(DXN!Hhe7(rLU15&f z-=6Wl?6`I2zR-3PPQK=ZpHD&$yZ_;c@`2mR4*=46OIEq|P97WI_Pw^fewdHWGwMH- zXIAugxi^0P+Dg#$$i%ko)%C_LFY(hCNFHc>&0aq8z493{k0tbn@W5L8|M{Acu;Y0< zzr6p}Z-&o~{|6hzVtTBr|9_E;BU#n8*F1Ey{xiwtd1T&E|KU7h<2lXK`H2fGQ&yCois(n=s?FRcF9KYud!$xFiYL+{;ern?zEZ2vQ+{GiUu-k&m!>pbXB zyyf#z`Th$Y>eP<&Y{h*lx1ZeWsmi>QyoN5uS9kuoBZ>4G^&ibW75!cAjs7Ix zyWAW7E?+N#n#X+?;Rj7W?HhkTOzXVMM~&kuk9WCM_R)_oZ1BZHKJ?+g%?KM!T<*pP zAH3DOSR5Do{@oR(%`ZD`@&EtpHBYk3HGcd3HGO81L*Min^&iV475%l2?-9u7`AGAL zgWlz=OK|+373tOIoBo}~!KL(v-FH~+kY#RvD7xaITw>=<^R8Q&{!H&DAPtbJlN(D9uABevnl5gmQzhbMuA75B~zvhK!Jau?j zebTFLx$yT#*#4jIe>3G*%;xF)@1XiYr?#S4b#TuI{o|k!{K+X`-y`tnH!Eg=gtk9%U=hdeg5hyKgj%0?w^J9bbgAJ-bY<1UR@7wG> zuL@h=_0JElfAIvbal8Ji(f%roj=MC$Z! zZP}Y|?;}N3IUanVcS)W0Nk8O)%$L`d`KePrc;PXdc%S@m0r(#c(<2kVJr9~k9Q27F+xjU+ zUF7&EYZME88V`Cnje|?+54%r!^Xukzjt+0!?}~>nI;TATIZ*kB5-c|Vo$Y%X`j@Qg z+B@-~8@B1ylsNN?`ge0rt^Vo#S6!YR^Ig7Pg6-cG>1AD4Z2CZV=I5QnD>nW*?DM>Y zFP-+t1^)S6!k3>pdzqb=ab9_B|C2R}1--I!i_c$=^SU(DI{KEXQWrb^Px6=#^N#ut z=Cu|5bsqeW82R=-)Xl5@4gFnFZr80dU-)gjK6-lPG3=8cw*A?RRUf-zQrP~fZML3# z-|f`d`Jb^yvDo)7I~TwIU6H;($tvfMuExjjuj!|wyiSn$Mg50zPep&1d*kQA?F7l^ z`8HI3;-Gg)9evd0(_a!#=LPT5`rB)t_=RgWef0G9tW7WadS|ur_wOA4k4fLAKYcaT zuP9cz_D&vy=w^MwGoX2DNBxI$Po4hh`Fz_>Gv#rL1xtLs-X%8A189Bd_~vgeKHuV+ zZ}p6a9~Gd%qyF>ri1AYgj^)us^oIv@McD3_ z-1@HA>HKZnf!;XZ+obu}X`OdcZ+>i0`8_tM{%}F#?ysNygU7=a=X~p&)8{&P?DOwn zqgYIjn(tpEE9%zdk=E%rTsIECcfE0*x2XSuJYwUy+=@0l+lXy?=y;6lU2oj-5>2<4F8&3c zcj{LZtGYHm)eSqHAMfK)|AjeAMSmyUmFqui^e3ynzQxfQbdqm; z*kGEM_`~jl_dK@Kk1h(G3*LLjn^!FVf8_1T+f8s~)jw0eqF7Nk$0I@P%Kc9?kNq0+ zi~5h|l@0NG>I`rcU*$eOa$EJ6z z6?Rn6Co<5`gV|hlM{#?gS zKK&XhFLBVjoVEUWBM;>D$b8IC->#su?TtLYg#Pxl_kQFR!=K#0J?)a0K78O^S5R-? zzmGPG#r8j~;`{eun{QL%b!+-UH|%tN?2p<56J@+XVf0Q>=iTyRn;x0QLCqr$+PE4| zZSp{yU&3c~PR9k|#RmKM!}Lq9oAl$?eI~4NR)*3wtuOLua6Du^aXuamy_TLpheMbFV?y2Zs zc%xsDeA`ZwKFDh&`Bq0Ciun5>^D$KZid{2q${((ozV~JuUH?K@u-@yA+I`g~JhuNC zY7~oYZ*2d}7u0!3cDXjbHIzs5*xyv&e$eqEjUJB!b^ zIQ#=TPMtkYf0(nzv-@@r{zKU8ZSh|?U)soPogIH>jbcHstT_FA@kQunsC8bkD2gj| zvHxGwxCgt9Vqt!9`_FPmMSq=d`g^YQb;Nk^f!^h;|9$VjVe5@e!od4+-rrqDvucGJwo9YKuAMuuJ@8mK20rWB5v<^G!-^nZL^yj$Vbfq5D z@yKg^OvWc4^yE?3lsx(%FGcI0x|9bSO!L$aZ9Jb8`sc|r*W~p8N>;glHe1JMyY)^Yw%?||O#D&*ZvM!|bGa35^5%<8|BP># z@{xJpA0sN#q^j` zy#GGPc4DY?^dVKHZml`WLqDCwZi*fCFGjTS$afO&cl?P?DSEBPPoE%ukbJfi^C$gS zUX7o+It=S<^6m|9o&U#%F7Vm;pQZ%pBYUQ}|4EZPP_oK3^CFIJsUKeYi~0}cET+H9 zt@3%~{f2z%4PB9L`^(neVv`SAAF3OE$u8d~@PlcccT!<~Y(?toFdX#k-v`cm?NB)M zm2>^Y{|xZh{y$TG#q6@;^v`VD?mHS!o`3O{x>e4<@w|2=lm4Rq!?~xTzmq(EM@+uy z4quYjdQ$4V`mIiF=4|>b(mm&GdGOHp+bi$C(zz?Ow}an~ zKeeAF{r*L|Sww%mZUKIqFa33-BI-YqKdR`jc`f;PZ0Grnr<+&BI+w5G@q^~Iek!(i zg6L^|D6jeF_vYaTtxowos61wm(;ueYzR6KfoqR-?K6BxMuf6i;)Z6FZu|~0sSAY7G zszvnQT6vseN0GkOj{48fBX#=od9b1fJm9eKw~iGgAEZyv{BH62X`DK*`L#$tX`cGS z`WMc+_cdoW!v;Hlck?eaUV_)YfA2Pmh5fD{D1QHSirE^Mtf*VYBSGVvskTdy{uHDB z3-XLQ{nP(btK0t33-m5$E%)o*R`=2w8V)k9V{+aO~< zXt{m0W@)}tgoiq{qyD3LMn!+kYwaNQ%%|@CmpJHMQpdQ8JQh?yX7Ar`}w!Sjr$({#u>lO z=->AL)GKChF82SYrsGOC)wS`dZrE0zNe%OE4@@c?wfd*O|7}XWy$)P2AG|*CTOT?v z@<8@qp!v~5`QSCaCG@vfJ7MQnKGNv6S3Bk{pLj)gZ}{x<*GQvSJlIj2J`}}UUDH?P z_rLHcGQX&Q!C2AX>9|9J707du+VIiZ=5b%cJ%C z5%OA^-&#`4<~He{^sqkT`?TL+EEklxPO$_ zv3>qssQilbWBO$hXYC!21hMV@w;H#3bR-YnsDCr})al<^uQ%U#2E>mIALw0Dr#60$ zpDaI<{EDA(()uO!YrWlnb@+vy?%QeCVXuDM|Bp3_#lHXTs1544;*GjBd8Bz7M>p@( zpDA_)nRnE`h*b1p)9owj-^p3(^rt_oZ_1eX z84sK1oqW*pR8RcaSe?lSvNavAsq8yLqIde=%5b3%#W?u zk1uR{HyS)Zy; zYDfJC^UR9=@T@F6%*)2l#19|mNn4Mr-w9gXKySYJ7xw1i2jTZl>p}Ri!9M=beCDxT zkNMh+u<0|O|KmUZ@RHX$JO4(#VkSR7J;jcDI*npU`x0-|(a9t9u)L=DnHL!KAIc-O z`s;Y)SM+yzb$NVbd9HBVagg<)^CAyqJM?|(h#Q~gSNu8-e#QygJ$2?^?>Om_u>Jr1 z^aC5b1jEbOPf)BB%0^(%^1uD$xfmg;7GHYJxh{Y3qT^GKckR=-f|2NsX; zao!diG#;(T-w(Y@#>1|`etco0)2_SVk_iumO^$ud?04T>J^smbJjLuwdjHRk`azwC zc+0hS^0<|rf7X1qU;2#tkK`2<{mJj<(Q^By-q01{^82qhi48wkr@v&E{N#gaop(}U zer!eR>M*SH+}uB(Gw^(v{iQEA*Uz4Z$G-m^Q+{xlV)x&%*F#t0tkJ=2)vfaXBbj6{ z9(7Uw`FW&H|CXGdDEA{B<>@LqLGoG;_xjPn#?}7JUyMEu{dF7jl=zsLPIIByJEpUhjap$p@it$msPP9ltBIyA)}^Z~R`X zPX9rPdnZU<>!Du1^{;&~zWL`%K6Z-ad)LdmwEpnu*hw=V-eojAzQwN}+I>!W|H;FJ zAKgLvG5u6G$*!*Hi{o%?ws&4f@J9U?<}4NcRj*b{#vtE#GV#Mt9Q01&W-DK9ekS?M zvr?yg9)u4Y?BfsX-*fVM`%WGW8?3eAPQ$;RKtFc;Gu9{;evf2*bj0tSV7jJn;&%Rl zeln?8jQ*o}q*nj*{J+byW4_DRCDJFzyy}oV(4JTA*ZlPFis8y}m(m~B-)w`+C#~ed z`YY~w)95N6pg%kRIjG}+>*@Vhlkp|1T*Cua&i`kUOPu+|?SCwf)aj2dI1%45Q&Us-9r<965^emnm*tmA;Qw#&Z&-k!cd$y#JPU|dDELr-5FS73+BLhc_I({)$&`Ixpf|?*HcRFQH#M|39ky;2ZV#FU%htki2wFzpeE8|Bmu% z9nWjjzhJD>pYOHkAAUopyu*V3@!!A0Pv3R+IQ^md-Gx&>ed5hw&R?$i=)5)l=(WzS zf9^Dj1%0xw6!%{`%Xife(oa+TtkK2pzvLvs^GuxmZPY)@GwSqD?`P;;o?YD^@ce+@ zxwd(!9(MhMlN{S_H4abIzfhv0KY2~5 zuhk>TYpvg#Z*}A;;_rvv37^dmy9WF5h0U+qaM#0jy*zAo?+Jgn;PQE1c}#zuZ@>Ky z{WOxr_|}vFHheq}^cnRZ$}>&>L??b%L_YQ8QDH@d3(?Xc_WPt3gQ zL%)OH?!T8ciiPdMPhQIJUsN~v)A?aX{YUZvYxURf5$NCen17;|<++@-+#b_9@<93_ z&-{$z3Od`)$@5F-4~NY-dBY#?v{pD`#hss@_mR68*N*=?Iu1CsIR1G)9Y?atwe_R@ z+V?N?-&9^~CwA0-e*UPUzs6gWC9k3fJc`6Yuk~j4%Wuj!wtv%ln-?|+A2!&>AMSYc zb)Wv#g)f9(-hbzJMh~^mm8O5zC>FMV`~EBOdsn1YR`}7)`cLMEE>ZslIZ3Vl`ZF_VI_$O#b8ACvLbfeEzVn-m%f{Q@z%i z{$q_|sa*f5dFUxw)wS2W+RwjTsX>>h|H9l;r$6Tb$>;TI=!B2F)>p+QAEa;4{6mY+ zw>Wx&PV(XL*kGEM_}f!%x^>!z=Uv~Pdcy;kjh^&ZcBgm=&$qOKCY%mM|lSoogi`0Cw}a7e9LbN4{`HrA6BotZ_53IODN0s#O<(9{{ZT)l{Ax%2$8t}d z{(K%ZJ>UVI@(l^L-YG~vNME4&-Qx2tJ|J=GyyjQzar$e2e|m_H(@Buue`V@d6zgAX z|F744`bV$CZV$*CY@Pm`M>Rcke0x1P8HYIN6F;`;mNkmS^3^78e)@Hi@4=DxPrv{o0y=5@w?hxtYQ ziZ=`-#@<*4*m19gBSXR@Y(UtSfg0%{6kyMe`e|jo!IG`zU=;c=-!b`;%wJZ|16Kx z=}#W>N%w;(U;2Jwb@ZW#zaJVu^)BzW+|Inqt!TS^UEQyx{9*l%{BQTRPi-DH`k$u{ zSz-3x-sM&CJa4xE`~M&2-S_(!r*-tFSh@d0mgK1&_3z}7ivI8o<`1j@A%CJuKGcj=hyTfQ+}|ccGinxl{%QM z<8U3mcM`Gvg~r*AqyF7IqfYKa)eXCDeo_CyoT{R~lL2@i zF+DosxAO$r4{^|IeQM(eEe8 zen`Hdj#sghKI(YG+|HgGowD60!{)o4x5d5pd>NkF}HaonFJ+5g9@mC2UI`>Jda5Pkz4^81Uw2{nZ!7tpaCPw zpdbX9ao`AEA6F5N;t)jP?W(mttIpY#vP0DO?(bKB)VJ4K-?jGI->$B%uCA`8o*{8p zrY-&Z?2pp=nW@O$F9HaYJJ80v2y=^5!)*$tf+&Io7%i@QXd{je^LMWMQ)S+{Eq{C z+wZiBte7_-NF4NTydNG2547h~>zZHdQ_pZ2{o(wLPS|>_cV^+jHU4<>X{+1+8rkQs zp7hQ3nLsw+iw_ooHZ{@c> zv@YX=Y*)R`6!_2Y->8~t#{ zue^70n6bjg&ROBR)%8E`%*w@Hhq>Al^#8}5+KOUT>vUXxkCOVC7Z~*)DXKK-Z}|%g zR^)XRdy1+9_0sFL%#U6PpLi46_({I`-IDugsuxx|bCYSSZSuD;;nL3z{`M{J@|w@_ zryU2WKj;5126dcVv6W!bAJ*?@2!Mv3XO^Nu2#gJGow>DG$G7 zSNkdaVB>h{_}T|~O&E4r>);J0JdlT79(mLI*Z*$y`HS=az47z$6ZQYETj~cHUs0k_ zM<sEpr(IP-}Z~CUv28a!v<3w@%KaTv~R}6Zoq!^!ur=9etoa= z&9K2I&i(!S*0{rK9XtOpl$8tLJNb$F|C@vA2c6pVm0)|0#{2N1{$*xOf6e1O2J_=cdP%F4x_C)Z&2@l&q}!;Xh;_RMzI%?bf^)D+l>Cf|yURIAfU@qfx-4T2+&BNah(>(8F9P?u<_Ny1RJ!HXEJ5M_! z_}8qsbzIPf2Q-4Dr)B(e^$KHJN zWgQo$pZb=o9=`Qf>T>*VIt=P~YnJ+N$9^7^XtlO^X&)T7P@lYXeo_B&BpcUBP4e1m z(|o?*&cN`Cmt{;O_r(*vX7tJnQ$jW5ifk1nSFXjU#9 zN23=!Lmgk()tY{5=Rdnzm%i1G`VSS6M*UZpye1tPk2vVv-_L^w+Ww3_HJ|#_Gh`m{ zz&`$P{~7I@p8s+y{4Sh&)XhGlo*n<^S-Dt0tCq)~r&}LdUs(0EcjB}EzowsD{5+q` zJL*5Lh}85a4_u`Ad4f)C>ix^lKj?U-2X@j2{iGNscjx>ud~0(2AKKM6+4O+tyyn^c zmy&vUr~LmHXWM-%D;LHWt6F;}kNMp?Jj?G%oW7#|!-c1&zbm}aKVJ~OcVgF2dF@X~ z96o4$42+Mnoxw{n&9iZ}j@eG?8Xp@}ewg{X!_I#5;=hIs9$s+tHaouPHE!oW7iQ(+ z73qgQL19IG9FGLCYxm#IHBR4ZNB!p)k(&O>OTSNHd_z}cSI3#wuPfcv_mXOxK7&cT zB{p@fj`rcn2VbV2dSS}ceZTqqyS@_Em^bH`>sPshe(d*0)K|>QpTAu3C_FGr7W^Q( zd97pHe=dpi6ZKzMc$)P0u83Cpvj5`vh4HA*`#$wSD6Kf3sW zBdBlZo%5_*cpdtsJ^$}CPm$};sGn}P%rELcQe@Wj*E(JizSS9wNpZ3It~4{QyMPZG zuR(u!DW-W|$1y*)Vw#`YVXZ%W_}w4=%}>HQAD_MIO6Q+Q9sB;bmz4|KfuHd6@&u^O zyQ}f!`4?~0!^tE2|223m@iQLJZ`6OZh}87gJjoiPGxb0x4Cd(*+x+kq@%KaOS>IaU z{E6MKUU=hkH(ho85BCYHZMW6sPoGmi{^@1qV)}FaRagC>Q(NaF))@WubE}63_Of!J z&(6RbOS`7O6JG83Cs#kHzIs=@{PR6w-Kn>&IQuhCdnfa;?SDjUJO8|u zp8wC)uPD|Mwo4}wyZ;aR@VsGbov455sp+pgUPnA%Uh5kwk2px&Y{qTk=UKV%I=4Kh z{uHVAGW}uRqi5W))A94drv{I0zwOx`uPeL%e^18)`Txh3`W3~h^f%kdBfI~b+V*~# zOPu*d{YyrZ{^Y^;CA)f^(LX%yy9ggNUW5MdQcUw~{09Bg{L~I}Hn`}W-#mXoXkYs7 zcYQ9LNj>}gJCc=)ZU38L_X ziN|ppicNhm)e(O`O!GYBF@J2ue)Yn%SKKoFmCxQ4)>-*@|MNGGP~VRKhqH3A?_W03 z|9{Wb4?3~awO7CW{)j%i!el)3i~8q9q^7?syirfhV_t0MhFYIE=v|RL=5qsmH1UbM zGOhaDmiWEKZ64TQ1E0RaJ4gTcsZS5C76xxQ;MbR|dL(t4u7A#dqW}L$eZ?$UsN*$` zZst$=qMxXLw1c=v~1Yv)@u4oi|>;+t2n}_&55o>u*QLkDJTSFRrxxqmC=AYE2!G->qPKC)_3M zf94gDn*Pjhk>r_9_){DaUi<3t{H8qf_m<4Jcsh;~er;Y$=?@3K{qk>KHnkVt_MS8R z9w%4d{~n}uhZB60{{A9YzoJ-CH^(DE?DlN6>AyQZ&ivx`KU_qb^zVE9tt+yt?VP&a z72eA4U13%Ivb`HWFT?hJW`6p28V8rrA6A|7{1*p~ek!bb)EVcmvgT*J*0b|(y{uep z|G$#{{vG`kg;lL>d(ghn%{$?wuOIBF|NMec)1SPBMRbggd1=;;f9(DDJc*+pnD%G> z-jewi*ZEk!6MoHJN`D9wrk?i9JB|#U*Pr|P$#4D|bsDd~lEcqGvEKlti>OwkjOZ$B>RouK6f8Ar?Z?jld;R{gtK%q&RlS~5yLSB#`2$)f7mVBgqT*3a zf92Kwe~mgl$)|tx0=<*C*^H}*A7p!A`!YXyT93MnYc}yd{IJ2L2i~ybeRqXf_jLdL z%pc4l-#-76ub8i@`~SqBH7xXnZrDxpi~5fgrE2=S!W%#T%IVSS^JreWYx^Jc0=*NS z*-8KSL3=%QwSK~@w|zsb{@*xvCM?__1jM-&yN3lS-IHzFZcgR{i-8ghQM*W>UYQIU)`tB~-j{27sYWizken-hX*gsd>htwqw zdNfqOU#0~L-4Sec_?FO&CY_G4q7`EHu(c^Y~VzJjeJN_BX%EjJ) zca=ZDGkpd%o{S^jG24H7{ePb1wZxA4yW)}6FRki%#83Y|jnjwr)!siAiwk_v^iqE6 z$4)WL^RB2~&C|QWs`^dyQ#)+^xkpF#efR6b_BS2&wbLGXk9S3Oe4pd`+flJi|Aza| z2&)>`Y@MHd{&iZftEh3dn& z(mvsl*M#Bi!{=W7;9h@~DwTfh|M?sSQpff?IhlgqCARj-{eQ>@se>K$&x;D^;$2}y zn>s6r4WE2NC-JuO+7FBE)o*#|r)d89!pBaLKB#9%JsoFh{bA>W9-p?=!e_$Hr+%ch z=5-6GZ`U98vU0KgE5|=A^(%^1>14K(NA~%PK6CN2-80Xqf47J<>OW2L;FE9tx$$x1 zC4R5Aq0YnjWf#?a%uoMm{xbT*uIFt(^SvvcA9lNY_iIm?I>YooFe?}P-fLLxZOi{Z z`mh}zYv1Gvt6Jl4t8U&&%ygsguK1(=z2Z@${^{>uH18kQKQBhD-Y3%j{$=`aw$_VR zu5{aQ*m94ppV;U==Uvf7-2cB*(DwVIYBS$lZE!$sMX{>whwA{nQyz85Q# z|G-6zYsa76tXyn+m{gvCKE~=hiKlCeYhUeL7}kGl$#&F#sEE|`r;eW2p?~YES7eQO zzB&K%6ZJbm%TwEfTA%F=UW(Q~c_8`NU>|>2_b1EWcftubg&D`}{M3R6-sUxr?|*sU z1?k7$@3el_u+T5(&8Sa4^}wkAyrNW1e^+?p>vCm5_|!3U5{K9Rt=L}umZvuTS^lW- z;iYJN^1(hjdw8Jo!>oq~|83>b6T+;oueQ~PF0cN7t(TRH=`g7FH9TG3m=EdExlapZ|-;UwcpZG7oI9!B5?=?(4!`KYYhMVcqUiuf6l7pLxw=f6qJ&rp%O`O?R6c_BiLfKTO;E8S?G(@9tT-)IOj0#E%|gRokK2P9E|7OWFt9 zE&izgNRiQ`KfmLEZ~FI&tm^qD4tiHaD_y-StjKr8I=-*#JdNKKzv=B7?0?W7wwu4p zm;Z9}VA%fqTl{&wmEXg7cKz)_9aqu%V_ZdH)z?;UZHalOn9fh_sQ+leXw-if$+teJ z(-S}A5eL1KcxvP4d9?U|@QFA18PCv-&#U>hF7?88_rLyvjrUt2Y=7(e8{M&>+W&L? zL#am81MF)3)_4@FTBqYS{{Pf~*3r71fz?V!qyFjV-=q)kq#pAFQ@{16vetzmTVfgUj)As4Uc2@XE{)(M;dC&FKx9`95tX$~F{(q;Ze$c6{DAt(QH}Ajb zoh!d5NPkiPlF_LD`s4X*w<(``pm#|gee~16C3)ndkJ(P+)WZh*_`_~5-R$pL>&UR{ zFWNWVId?be+x3TCtq-ocX}NgnD+(*>t>c-karXbjc|Lg`MyIHMnUU&gdBai}zSYUa z@15eXVEaRY@IkAm{hHq`nQw9BTfXvZ_EP%8+Lvv6=bKOcM3}bKk51j-;@5f2v+bWc zig{bFKW}B_f_`FE+k0yBJ*@Rfepi^xFX~@Ln)K&BTJRa)(2cKWyu@#P3^eeyFX~zy z<$JAXxQzbJS~uQ0Vbybw=&b#p4;_8w+GkPEzW*A@%Ef+vL0^ogD6DFo&JW$pPo0+J zFu$mOUU+KyyTZE^=O45mK5w_5l|1hR8OQoizWGPT^YDX~r}aFjb z05xp=fvKAr>O+I>9|G99P7Z~;L7Ll6%I!;@@p11Q0 z|FZpmOZuZe{aOD`1{{9=Q^@=Jr{axXW{*e+Y{d2MHeSrBYuYK)!eoaT^n}2Z0e2Y^bbi!wz z9ve(~i9c-ghVLAG*5WsYjb`1Gy>Rs@UgP%u+seuXz4H^w`@ePVxU45}*63iij&JAx z=_eP4^{sZ?{s)Ullm2Zld8S9wpLgTq#!LJ*uJ!{DWFGL$PybragUjgeOx}3?%|CnJ zoX+G)mtOMScXg;|-#-pzzS?2D?UOk3jrtE2%trm!E23jQ$F{^~ z`|wU|^a4{qx`OaP`XNq!&DQaj)gL-PdGg!)ON-?`7p;+xuj_ z|8_EOVMX0kZ?l~|;`?{vwfSjY)PG))QPW@J?YxNo9sQMuj-ZXJ@zjO~+Vh#zvpiR} zk$I51*kB)j_{18^{qDQ#9uO`&;&Y>u&lqIf|99Vitu~y+rKZ2mr`;{0|A79kOt zJ<%OR2d{kd_t6tRNL~|$|96k>Z@W_8UY|)l>iWcwZT;j~xupDm)$tGGg!L{v zxhiU^*@iO-w84fJkb2}$Mf)m@V(YE)N#!w-p3zyYCp8*K37i(JD$Gv6=$DS zyZ{b>Cb&NtdEw~0h#Z7@wd+vgb&)d z$~S*^$$X1bA9TXEde~sfOZ?&CV}5bQ6Q|4%BP%|3$<+G~@EW)G-_fjGmiGK7eMr`r z?Qu)3LqFXm+fo0KBGRaTOY%$)wRs(RCvoid%<+87qYp*={gC+?G9LA?75mleOgQGH z$L~1nyw1c+pPhN|X?IiC_CF(8x$wEi&nfT!^Ca^#)bWK~t*zcPt&{eN-IaLMf3$ei zq(9eDFt2n!=v|Q=&r@*y$rECGCun($t7!gm^y>W`KgfKjXGlFAXKDSR_50ub?B4wc z!p8q~(Kd4~86?m2AIi$bj{jHJ`;Yb14>G=@M8|CZ>F*y~nonL=?9RZX@h+@%l2q6Q2JkVVM4hd$-(SyCcKQ!@u~`-H!Vab#41!n3YTI_=o-!g;lM+lgA*s zVWW3Czo>tiS<`<&>an&u>bK`nZPP243~aE8PrM0h<2tEt{={yogFj3=VuKZ~-Q?(q z``!PQ|GETS?DJQy^%e7{9w{!sdK8{vqK{<7*MpNsZTVW!Y;-prTEdI^myc@tQ)d<7 z(ZAKn#ZNurpm!2CTl;O~=fbzwspiA?*dX<=!9M=5Z~FAgupR#s z2fdScYU2kju6((fVh!i&KIA3bY6A-f39?5$Ni&fcNOylnYW@u$87&g{rkH~y&|`! zzbm}a-x6dVrb9A5Jf3&>V48=&AEtTUNk8VtR_s?Vym{Kh&mZ#bEyL~~n|$Y|?w?_O zr}_`+IP2^Ee{!`|4;w$#Uv2JNXgsI+Jwf`6+y7wUsp+qEyep#B@u9tr^3rYlAMeEG zcW2ZGtq-k>zaOS~jAMQs7k|HcVd^e_e(Mo?tsUH)&+d81;p>ps`2EXS`u`VsRxUPw zr?$>VtTEfa9M)I*e%mp^YMN7)ekEdc%ZQA zYvSmJZO2bqpZP`o=NDCK`n$p#{i-*7tJ5pP)&5O+eD4e&v_1wT4}U*Q^J=ym-$%cC zVd7Dry7Zdg?j7Fv?W4}hK6|LweEa;>)q09{|NlYtg9B=7e%e><{!8$|mgGnM7Z#6d z`orrAk9o0u8YVoxcZLt9{o(J2X`Xk|kNL3``_&86wp{VNum0yJ!aCPqwD?VXt?Mc6N+HT|_7^J}+?X!HAT(|o-vvYPXJuMAh$ zwK%aEm%1K*iu9TI$!o%Jme0QRxea=mF5bz5ru|P|o_=`bHS7mKO}E#-+0@epyxQb3 zKm1Yukt{BzzbmY0>-^fTG%KBQvD588ov(Mo!%saBKWN)kw;@mKd9Xo$>KiZNhXcR# zw$qmX>TsC*k>QmOdwmzaZT}-#xv+isHOl{gxAR2aXrPmo@$C0UBU!oF_b90KQ>tK2~tqUf+_JiZ&{qmaruc|RG z8P{VkC5D5p{PAPcro1!s9=T}tbALOZeEa{GUREx){i9ctza@Ou=v}-2fbxj*d`A7t z5o-E7saQMzMn4In7w9!Vwehn()Z$$k*ZR~tmeY7fu*tY*Kv#a-6 z<0bxdTzDY!g=c>4(`!A$W%P&j7PSsM@x=qejJvL$`ORBzU_9IZEX>NK_WcXvDGIAv zQ^z~;+3_cNUGc+Xeo_Bc5oy$4^WfX_U1r5RS7iUs*Ps5wF$WxT@3)^>C4BVNU;W7w z?>>|H*!kzYoKE%o+L!zPM+1`IR^8A$-M+A+{^f`@{i!ou>d`;_8oKdu<0XFUL+io= z?e&zzEl=xtPd@?X()W~ zxogf|uvF*N9WL3{M zy!IEwkA9%_p}LuWaLIg&E8p^+@N4!``oqT_zv9r--g;3u-yL}Do>$+*d~E-t{g@v7 z|5W0aF5>)i9tqNy{r;5xl}DU$qyA-O8_$iegRYHwP=Ar+wLdEeAGC3mZ~oqr`4)E) zxBPxK{lEj&AG+ONKJ<}07l*yh-sqa%*B5(@H=h6hpdSBsvvT3}nf4>mYJ05hf9MO; zaiji2MQ%-hjTf8w$FUu^8ODe+UJ3u#+_ig_Udmd*7`($9VhBPuc*;a|AJF2^kt~^ zn76R1{TqJkBjFQo!Zgo28NvM6icNL!ht5&c&yC-Jh4vGh?7rL0&v?z_`cIBSLGnzW z!K_?te4S5f*M5K3l052xQUBp0V)b2NMcWnY`2ClDj)q^`{u}i4uE?&QhqOQHf%bg5 zGOc<&F+cTta2frfz2nVWt@ZH*AwTZWb# zpE&c5`p+*)HR``@5gnsrS8RJdst(ixy%Sz)r~O#Gp+DnmJ#6w)B+mF|YyGgsg(n<; z&%!Okn&0|y_vFhifM@zI%*v%!cToJ)6RTQ#Cy$BS{kQQmzX9c`9ra&Wcxw8?TO@q? zscrvG;=E6{uN=>}JnAW$e{jisizj*B37>hIy_EjG_y5#>OpnQO0y>?ybP;uH@kr1+ z!P@>m7eDXc%rEM{sEF8jPI=6`CpLWV#5PQL>F*XS51wM`$4;>}4&ze~TQSW~?XcFj zPJQv@sSCqePj&V>=E+N`-+2CCZa?2cH!#zMe6zJLj(^heZM#uk)PJPNZPK6j5$dGd zfp2pEsU|AGCh8o(HLi4fgQ|KVkjuudf{m?R~$s`kQ|| zOg%gPr;eh1|DI>%!u-XGy6JeS-PY?iiSs()`D1=j|Is41roWSl>G!!_{V7rpB%a#V z&tO(A7SF}DxNETgL4P=F(ktFN=ir;d+3t@A-1Y3J*LrsTVJIsXyZ+zl{}1Ns2YH^G z^q-}C*N~@mIs=nSLX-aVVLDL{q>rJpOqL$ESspya)Q_E_Js;$!<6||Jv_QbFBxK+12qWb$#MD zy^`^b-^$8`?Zo_j^kf{vKK!uPReQa1`Uhr(J#Sg%{?l%&et&eJjz5y%=KA}iuKGb8 zN4(YAJ9*6Y56lBZ2kfYStH@~7e@&?e-`+>cteB^L5C^?W@|4%e?+QPSV^hzNdQS7e zKK{g+$w_000)eW2NDz&5j zd68Yy-xc0UU%m&neCDZ$?)(l98%*=?_ro;LyP~VAFYgK~+OAmFUx#V^Fl+I5Z+L0$ zkzvF0E*gFQM+@1W?DJQzER)&uFnP)MAEPf}S8MO&apUn%E^+#b`gaRYqyA;Init1k zhED47{PMjU^+Eat@%KachK$GakFD6RURY>$|1-8mG^wf3JwxcpCSL?d@YQF7Vl23|$$vxbYId^`ZU3 z1MT@t;+CiNJjlGU!9M=Zls|6#+E@K*uruW+Q=Yr^nl-6!$N$4wxp;6V{r#6yzoJ;x z+N3JJ$wokKj$&LCC7LOYB@3;SVQU@JDpZKwD{9aZr#_x)4&x2F@d`;+{{>uf2P9E}F-?o4H09V%UzgpT4I3PA_n-}%aUu}!$lEJ*A{zHYQroYzb zybgTRCoi&MUczf%C4Tsz@sw}=!6ox8PJPe`-|As2lGlV`_5;@z|9cl^KYRG@%O6?& z{^(Gx+ZvB~`SbUm@9eccD6DG!W-G6~u6V4^T=_&k`aJ)o%msQ+-`Y0|&FRT2Haxc}Grp&#?{uIOUS^PSAYyCNFnef*tiA2@y0 zd-wiXXWF~pfA3KftLuN*{(EJa>iB2F^8NQVtE=^eRbRtHH*DK)B=d{<&o8Rf^mkI* zE1Djf-^6dr>5TgOvGz?r)a!@v4e5(|*h!yFb;G0$=05q!hkh3(UA)!QPaXMD>e})D zNLDWP`FD=~{vuaD=)_LfR&PCtVB715zH^C3{TCLGYI-V<{TTDHWctVc_rjyex9@+uS-F^A4e!6gs$Oqq zJ9$*Q|8PtE%+D(?>c6OnH0l4c&%Yg3RIBz^H62wS^ShE>d%aj(`Dx!ZTkG`mhs_WF z;Ql{b@vyMj4PX3}dud;R#uZ^oi##4({H&KN}w0kvg0=w|V7Rx#UNb z=YRfS`V5NSJH=d(wRiHUy@4S0)4KH28JJQMt-j`a#dfM!x1?9c2MMqJJ&98vv_8^$ z^Onr7<>@#c)H-G_r9W)&*lkb#dP)czp8e?WcE9n{kbcrQw)LfTm(|}{Wt-Q(@XOwm&cxOK!=;|Zxx=J{!V!9H3Vxq4T_)siG$uHd1}Mw z^<#YP$DW6jj|~oF<>Ijwsn>+zTZdluFI06 zZ(zht4bCTJL|DYh3+e z)z|dZo;aRwb(8r;{Yy`i{#I{-)^iDJKj_GHDA*u%K>YpCGoFrPHvRY>+%K^{Y+2utWEY3tn->U_-buue{#xRX`VSV5YWh2ga~}`uL-TAq(R#!|ulZ&xFV)xL zxr}Geuja$|*dX<=!9M=Zlou~s{-jk;?X2;+ZQkM zue}o=x>^6Z_?aK`j`|N3kw*QulRWsgAIrt>ogi^A^;;j>KRgf};F(|f)H7U0f7tM= zN1w6c-Is)ox7vQn_uqLa5L2fa)3l*fGGfu_IKH9vV;kGj+`n|L37XPtTbU-pwVKirwV%kB5wxXWLt zWADF1S-IHfueEjmpQ~R{tZM(`op``@S8e)fjgK?GsQ+*gsp+qI{Erg+|Ics#Yd$gI zw4LueDNLR6i_^~E;}P$Q*~jCbcL*}i+UKu9iJMNUk9h0PLpweH<}_dP*xsW4^NUDY z*So?S`|1kP7kME56unFG7+0~zZwbFKPwNq9yER+mY2DNg`~G#~8@_ktYcgG^W5+*Q z$1CO+mf!!r1|F#C2KdQCH}f~lFY3RrC}R4%!iu&l))D6kG7oe&R6XIfj~dUnJa~%c zcT475JjwIQw|QZM@UcPlhdn1ewDPf6d?M`e>g6VGbN>-u;tG+~b}4 z6~&7BXkD{4&VAU3+kT@ZOy(E$UsQOS^yj)O_*N&EI@E{PzF+-L(DJlT_~suT&%+PG z_fB|*)O(r!u-WW?i+?-#V%YN3E3P?ex9a-Ow`=`*39eOq{5^e5j$1%sEkS?ntG$IV zmo`Yux7oB4C$E|$7y#Y&vbHPdr*1A zLE@=x{mhd(7SF|I-sX4ePf^G9OY2u&SZBc=YkXn;*TVF@pWABwI)5U+asM;9JpXe# zJTOZZ{LByC%%4j_I={|aYB8&=bdg zbIT8&9}Yff_5Z%-8xK;~jz33roMA=!a%yWGv8r|2C%Snj9Jc#(eo_CjVoiS?r*{0= z(t4oQ=lKDB;`9!uQU6xqvHDKpJa62`13u52q0Wmq=(Ud7%A+oRkmmt3KY8>8E~7uZ^O2u-e=ub< z9KK+|0ngqrOrQ4uYk5{K`0f6mPW?KrSoJmYLpN;Gt1CIoFY2Eco+kahE27Qk@vXiq zX<2)cAoW4xY5(~9q1U>WkKKU%>V-|8zkS>3yKWn1UpQgGRa?wuJGT9QFDn;2{vXia zU#MniGBR<7x<_0!)|P(MX@?H_05LL5CzzeIn&XM&fa<+<^7 zm(?G(eAn||f8TkVhi%?|(1$LW`Xlz{i%4!H8%@&}N%{pJ#HT6-sv_BMjp)^AVz z%roj=dX`%MmgKYDcqg{$|G380&p0;zKS_V;rfB2n_#V`8!zX`r@C<+W)#3Ubc5Z)Q zwMXIG{zrNEbFTmCW#z)_LaeyoP=0DRp8x3zlfI+=gN3I_f6gnwH{DxO$2-M&g6;PU z!UyRSG{4rtP7%H#;|=Jz*kB)jIOx^K7ymmR=HBt6;~x7+_5JU`T5mAH33~psr+$!e zTHSKzpX2XL zzvsf^m;2BPo%LqD>&y2)_yOwIzJD3f@w$qtvv*>rYy4n)7hzD}_Fr0uc}M-{70f36 zrAmzV#m! zE27Q&aqo&~%y-2)eqS?Lkoq9=1o8Jn_=ZmE*KGCos~0x7XyYo?UG5epqH0%3+_5Y9G{%5Z9^FwxD{FSib0e`sa!zUc+ zoy^Df|2?t2V!pWe_Whqhol8d4zkCL3`t!Uk6dunP`)$L7 z*Z$0SzU9GFG=Fc&e2XV}-U;94fepgP2Gt+tJht|_|FzR<`?zPi&{1PUEz)Xd4lk*UOG?ZwT~XpZ^|>j&KEmH z@1&lM+hq6gQ#Wk$q2K<;;KQ@Rwk!Vds+BGs@tSA%9~;cd#q^kRMR5a^D{XtrHO~AM z#T)f<@~Ew7edpp&?WljZh}d{;Jdf|u=--aN=u`Q`L7(`sZG6o)e%VFrV<0OR^Q&$7 zPU7%hu0OovgXbOfu7hTTLw|qSjo*0nJnGo>52IQi?5gd?qgeH|cj9aO|3T)Xb)){h zBBQ3iE4=aZ>@-2sBNyAY2RA-$JjT&D^9136_PjRbQ3p(UOX&~Cta<+1_x{e2VQ|kc zOkVJZr|HLzKNn`@QakUWdNuVaS!1?;)s6bKyj(J({)5G%n*PcBYC82K-|9QDx$X`= zXgsZpzaOS~UdJ&%wqn0}VZF0=dGlK@`OmQa`)_&n#XbK@9lQQ&I4c*vm-DTL{f~|( z>}qZGYNR`HcDx71=fYUEz)X`GU-g`ZZKu`|R=jrabd&AJ{3@#&J^L{Mg`t z#<4;5hwXm;{?p@*Cv1P>Wvxp;`XcX#_Wkdu=7XE){Wk{HuP9d3C+CsIogn)SuQua= z%rojgugIwBKTvq1f2f4&cS5FW1i@6fuv1Sgf(e@;a=P*_nP$0I@Pc6$Cn=UM;$ea+z_ zvq^vMBgcFg-_RA=)#n4_xzb%dZ|J88546{(_Gy0dw4Mi-(cc+ZamVXFKJy!$flU|P z{^1v|q@JCB9?iM9DWzV=Rh=!R|Em*z9?sQUq~- z|AYR{s^@Ndf9r`$JF6`>=j*FqG0J$h{~ytD!8!W-U*-$yyoFt@t==kHM|rltPVA`v zqJmk|UwP^ERR41QyVAEkkM!%r@4;pCcUC|C`m-i||KZN$L(cp9X=nf1^y2*6kd6cP z`Tdu$tF_hJM(faLp4d9isQ*Y2Y0|%T|3m7fxJc^%zrOz=^Y>2rG1Pv;v72oWb^F+yAiUDPOFpPj|fEyTn#Le*OpC`e`khAN3zCA~pS;@YC~Y)<;j` z)MtAIy_2}v^rwg)w7T7fJgw(J>R~HVuL;Ahzg^>)x$l@C-n{QQKR@cIh4gRw=UKVf z{$h&SjHf8x`r143p&Rv^=GPgJ1Ere&I?g}s{}+<}Q+;f{J?Wpg^{?~tAoZ}pKK^k0 z=gPKaOPOV!!|0qP+j}Z0j%AIO8aaH*UvH9@+T^wN3w? z#F<~zzpPNxU-LMR3ZK`jq4J1>KJlB5+Alm1zVS4_X8(i!@Yl}96W_SfO`UJm zaqGA2vf93u)gO*{;>yeacE;7=*tJJTcKCdC|A#|$UV{k^l>fi*5z|NG!ixRmJkmTT z$bDn1Pv+N^IJ!jr%ZSz2eDZ`H$I(3eDN>K)W^AyDPrM1!JfHMuw)&gu;14Sub@?t| z{>#l_!s$cL&wTzG^11(&o%b2e%EhlP7vP#cgX$kp%mrD~FQ0!I7bMQ}7WFSHSbZmz zykdJtu`R!Rw&L>vY9GWw^3A5-CVnpC&|fXjX&j^;HrU{&UU+V~i+8?z*U|98zU`;h zIA#XpvH#)w4#VyB{!7$X%#y{c-|Fou1Z?z9?WljZU^eN``w;zGom}fX#Sy{(@&Es_ z;kOTc?;UI16j~Q;vBg>sZHpfE{qL~O2i&&&{&8>H-dYl8O@3;(m4|L1d~}TZ_ln#` z{aq2Q&hJ=$!*<4bKgI*oJpBF8yJGa%UpiiDyOL&~x0~vQb)Q;q)2pvo9A->?dWXqB zJH=}r+dt>WK(eQPW@J?K}qKVtTa1w&zpp5eK~! zp4p7s#HVf&XFk+3q@IqmwEoVFGw=P&=5xQ+nfakR58dS4YWsgzRxW(+4mR~K>}s9% z+0G?q{)YKQ{TCL@n*Pjhq==5uzbm%yDZ(2SyZtpm`Ub5J<(q$K$$X1bA9TXEde~sf zOZ;J@-z2FGe_NQ9OHKc-`W3~Bx;Y*R(pURU<88(vjve)1 zR4|+LPw!(xo}tbcy+H4Zs$-rvGe3#W9_W#xCSH0__H-))Jo&AmX ze|9!??D#(~hq3bt`NHuw$1&oK{ph^RcJhehf8t*KX19bF^&cr7)%16Tx4u|@gPvrd zE12ft?}urgcQS(cu@(E(3+t{oagQI}{i`rz{;Um`|NWUf@3#F9X?;-ZqPwB`gkKZI}S%HeFg^Vo|0>g_Oc)$lR5 zZ@=XZ&u)In!FT?0KKY#QwEb!?D;K|#{{LN9Z4e!u_*r|kYu7)kpYv$+*%^>Plm7Gr zpM3nLOD`*z_RfOvLE~xP_=R0PfB3;P&pXL5Kei%yO&DgJcj}>jyPLuWr~U3dTUjl2OxN^QZ&AX*(be(udiMuZSPCx;n1;$x}add7jKB-oU3`Snq%*@7?T_i^7a= zykpL{{&p<+cKsoB6!SaF&%g6+JguK4i^Vk$-OQg$g4a0xMEzSuq^7@)!|Mn>&!3_4 zh=b`k)<+Y+TXYfoNaEPu718STy{!Il+;h`^^XeBDhvTk%=IM9?4)2Pp@jMI1|9NRv z$3NdJuRj^F{ch5ycfw~)U+9Ky`8q%BsDEDM*7SFUH}<9fBV)R@#Q#s%KTqm9wdofd zTtCZgr&y~a4{>Wp4DQ_wL zomJ;N^_k!9a$je)3*GepzWf5y|B$R)IFDsWU%AFX##fZ+nC)M6qdxl^vs;o8^&c!A z)%2%+EF)wb7D!sCA@;e$4=^6~e>G|xNf$Nboe{py7&=U(%jD?faBSaYA{W=?qi z6<+h~_b+)?F3c}4|NidWp6S=sxKq^mh!u5IyLSE`eovV68TB73JT?7Y;f?$MtpwrI zm!TUUH(uhWFAyG>zOKx#^YL2Ga2fsKz%_2k*ZJzU;jN!NZ|Ir7RKGvv{dYJk7yBN* zt2W~)inqS@PJHObe4FMM^`BQ{H0s}ydZtG%_CH;Jr**r6HlEJc69<>kAKw1GRaUxf zc%5+2zVF+2{(&b_*FOJJUopS0y#JfMPoQVkuz1aDt8Vb|^EzUFQUBpAE{*!@_xSMB z=aIU^!PIYksBZ8;dp?u6<+<2oT)ZpR5nrbMj1ykCZ~rj)riJ0fkKX?Doc00KvF{&; z%JTNR6Ti9s{>7&dE;apie(i0G=$Q9kSDKX$YBN93Cw^?^ z%k>?`&$DvjyoUK-rp{L1X+2Qugbn`veY?E+Ro$?`lbc-o$*;YW?aA~X)jV*g^7@n0 zZ2QP1&bn#5_MTdY{xy&Bm|xU?VZp5F?+S1950y}zUvpyn6oc>to z&9iZtSK3D(f7oW$O*jATmR8vAYu7z?&Aw|>&-5S4%7t;oiaN9!{0@f3)#i7*UTu5+ zy24|gQU67Sr>4KogX>7)TOUb3#6j5m^i{TjL=yLz3p zd!=cw7t3qV-@7IAoATh>d^8_Fc}*DRe))gjzu8I$gkz5V^7EsU&!moh{$;yT%r7l( zzdED#yf7afKhwqPtt$>}@6^r(=`ZR(T6oaeyTXdL))%W<<4=)#-U$~QY~mAd!ZeTi z=GXD?H`T!(R_?yE)17mk4XYfs$HjkHseb+EBI&>OyOXZ^LB>sVqOUgb6qWDQ-x-h( zYBl}IYh~p^|J1Lc^3vaY3Ci{n@J(^g-3{O=)r(8g6A@%O_t z&&ErikEGA0x?$>X?qBhSIY)-IZ~eo&ADO)o_3i$9BU!m{-K*E{lWc#R=qT)JZT0y5 zJAHPwZemCMT@h*0zpZ)9r`AWAUA@m5FY(hC2oLni2hC5vNt`3m?1u{;9oH zFAC?MdCada*{-_(Fu%VW&B~>A{->pWMX{>2ck-CNIBv(b?Uw$uPSn4A)TBRt`sa0H z$hd-}uctKX60XaKDEy|Ha(R$+zua z`(ZoFKUjYM`#saIn~huG;iTW%`F|%2;^amB^CE)I-W68${@dP7Z02Eo^u$j+;-GgD zPi_2Z9{tq(4LZZKdOBW{pFYCmwcZwY`@afnZ1BL~PV;VpZ`XgSzRWZK&+_x{5w^eW zWlLPBwmPqSENpgX&l@79L&mvJd5WIekXq42dUSD z;gB8Q^Sb{<5QhK=`2flkI`H)pJ?7EUiDh>64$C^~im_u>Bnu?ReG0w^GNh z|J3cm^jM|*|GOWu-5RESB{sTI-}-Z9yY~4g>R*m%<7pf|-{Y7bx#ZjPpnT#W`DW9f zB7X2+z5hXnu+dxIci)5GeqGq~zkmF`bC+8Mf201}U0f`ezyn2vYK@LN(4)#& zPsii@b5lL|2t{@rtK^0Kd;C%{nI*KsY_qfPmwt2wN7d?PVIT6 z&!#-}Q`g2%{S7vK1oyq6iyl0FL0IekPwn@iFICq+%+1P$>uy2v=o=Jv_567!9cQtY__p84i>zwzcKy$&jst2xHt!ah%5#Z5AU3*LpPI+}2mM9;=NB2Kzmt6O()($X zZ^vUg9`!)4byAzUXCE3LZ_Hyp z^MwyDMH|PBugg3e<0)V5aKLxw>^S3y41l_ZGH&sZRF)%Y#|Dm`(#~ zw**ymemYXP(pnm(d^I z^7yy6Ugh5Jh5i5h`z_Xe^iN_CojHp!S}+u+t(g)*)?}j&%S@jvvRR{uCCv|Q&&+~^)-5+8+OzD zIsVB0_a zgF})B-?oc9M-tq&JpXV{ zI&WcBdYi5DtNs2i*ShGccGTY$5!1i4s=AHWiR49A9(|XJo9*94EeVSkEQFmGW;S+ZR|CT3@2>)^U z;8xc?w^WAj&AC&o+PRvL9l4!Nvwl`6J!sB(syrcek!L+)ru*TF$&;M&* z_B^l44}rTxGIt)A95f7-v+wR+fK1D|@Gm7dt~y|WM5xU=$ic3jYU;NQIF z+4VnzS-F@Vo9O+QsH-TfYMu7O_a^3VX>#+aQTl;y<`z_ z+y3)1t@{1fXnFttVbiZAan|%_wv$J;|5qCxuOH?YxBsCcqo#i{5BTI8QeUyH2p_cZ zwJ!dCnC5xLOZvm#uU>fLn%f*a>+3$e@$LDvk{=cO*$oPs9 z9rJohuRnpG3zI&h{__e?lm1-C2tVBp=$|-_G#+d`L|Y9F3dA8{(Wk%U)yzx@P$=h6Gu1frujwvhl?sT{axXW z-~Ub$G@Y1Vg2X}Z{(c@jFzwI!W*n_YU5h7v>NR1wR ztqbqv`oqwJ-?(SBW1bHGE6h3Unp1YB4%aQ&yjoeg^!xuWVMYDuQxVv;6@$m{0VxY~!XsS2R@{Nb3rkNx1Y7hfGd`k~|A^VfT)P~Xl! zbhC1?KGuGj=RdWN+V-E1uZu2G|B)h6(_iON+yB#FugI!ichm#DE23jQx4F`^^Igo_ z_|#Q2zxLr9@|W7*lJUcq8-L+#pWfpWVcUEDJaqiWuO#2jKMV=aUf;9y{DV`!_9Iq( z&AjaXhvezJcwU)b)PJ<7Qqy1a()-Dz`$w-l>Vc^roBnux8=wAke&$!(@^w6`gAF$D zsTW=t{MAFNt~sMKaPl{vpZ)G?|HJ36p{!gyI6KkbNjzP9_0yN_fAE9!*UieMGcc{h zM*UY89{p24MdF}$e?Jc%XwR$mMZS%r_5M+RIP+~QedE6WIy0R8hi{$m?2IF*Z`XgK zkK*>_?ax1K_o<;znO%Kt^_tGVwIqjmNBzr@n)I*z|5{h_YyA1*Z=W%~KI70AX#U=k z`4%s`tM+r=37LFO0rcZJ95msT}j&LhERJVWIZ2W{MeqI#_7igi_&c^hBnOP=}D zd{>sKj`y$?$!o$er*-A;K6lZPVa`L-uHEUdI~b4Ot(gvkI?gn`|7}ZcQ2P>ZwYGX| zDBme|6*Z6dfvA63(Z+=jZ&dP_4|(?fq`Wq`a6I4gsGH)vCG#y$>oPA-K7MSlk3Y%XjU$E9mKAE?*FgjN>;Uf+4qkcx9yewT9Og<&x=Pj z{axXW$N$PVJ@V3R=cl|A8(#ZvvEhTJUs`W?$^2TL6P^c|2R7KpAGrVghEM$QvTx2i zi+c9?E6>UWz4G$(=OgwzRbIEks`NJ7$)nouFIwU!pZP`oyG5i?fBhdZ_~aYve2IhJ z6;;O^f1*nhza@NoT{*SM!v>epAHMyj*S~)1NrRn%1AlkNi+{e&YdyRFCv_F`t@Z!c z7#GxdmMmW5cK>1OwS>t0&?)NQDrntajUtv+sOvm(Ha9&%65k zIRo@#pMQHQoo{D)!I9GZ1wzjn|Tq(j@$oG!K~@;3UA#1>v6226cQe8DFBYe>e&8&RO^G==>M;E)ekc6 zfcRNky|tCEakh8j*irxCf?3nw72fLkWIo;%-pcP?VO2KwJ%$h3xY`f?ewgNYSBw_N zV_vQ_ZM*ANFRby*l-sYK`1UaMzJH6if!p6}zJ33-Nb4z5&-O<>jcXo#hHjb1 zn%5JgZq$E%;i>7b^J^=@Xa8X6q#nHX?P9|R=@T^n@RIo!*YUj*p5Ze3!|WqwtucR< zR@iKTTmIm0?M*-S`D;YS0jKHz{|>5OQLIXDv$e0<=U=D!^xYCW>c6mf)TsYv!ZSU( zV%zqh^@xMsC3$KyU-FGlUr9aVr}^;lgQ*|8m6c1_Xq(^-_ z|DOww?aRJ@N&Ld9ue}prTXn-`oD`$}i;9d!{aq2Q_RqEbzs?UGLGOyJF?s0Fhu`?T zxUyX4LF!?Hef(kD@1Hk$^IK+x9bS3z0pGi2HhS3pzn7H@ec1iq62GvbZaI$xy%Xg4 z6For2gBSH5DKZ-M|Nq(VFFKR%xY|vce_dzNsk=R%&D{|_Z2v!6s1fx5yK1}fC|0%h zP99r5)eW2XTWp?>sDC+9O@FP&@95$4d>bl{IOr2Uo1+}Kvpi)Gh9Z0IPA_J ze(|ms&J2gXYK83a)vNRWhv+z^8cWc>r*TEGB9A^mClTBK;LpVmpLur%)+qy~KYS?fpNbs#rOL@vC4*JATJrEvf&#TVM z{AyFru)$AW6LuzDH}|C*&zaa+{f<9g|FcPdr61e>k7ngk+yCV12N_>cqSfmG-Kd|= zkJo$D-xawv{Y!8B{Hxz1FrS+KIzDmGYrfe@zxYAxuN#|*i+9DknvZvd^^g0*K3hL9 zW#Ey8Vc!X@e?RjFxpzg?IL|``)A{9x>{NUKurqpr!jApqJkoq8h;B9gb$-k{>fb6d z8ui~s>cLO@^KN|Hc!}To(B}aj$h_d0U-^t}=dS`mqf!TF7{I z{(m?t7Y}Z!_ung3iaIF@t6EbB6z~d*7BUjLF!?Hef;5lC%^aZDc^r*7@T>)RkxhKo7X%$|I^LN1)cmv z{r?^1dBu`6X2ra~#`~{oKJ$zEmm}8nS6*A^1E1&LFyZ}A|NPq-c-`99p75nPoq_E? z_1ID8c0K(coRy31_ea&{`B0u$lxVg6nXU2mZ1M1X*z?&EKYB*}dxfW_zv|BG5WdwT z4h|Pp;(qjGLHM9|;;&&Yet0PwpK+Y<&7ay$JoSgoR=nis3(x6>Ee?C{xo7RRCiCI^ zGoJ%M`Z0Y5lkr<(v-VCN@juAei~~|P>R(p0`kD{Vk4QcEw%zNvDX;yd@qEi05I=nL z4=tH*@g&bX;Zw)#rSylVFJArM|MnFn>t*$>gOzRFMI7xqhSE^KPr^1SM z&Lhopf{pLL%qJIK)PJaWRMX!T-so=$njX2>jO(4+%4@GHHhj?Z((&>4!!*yvV;+pJ z*soq#>s1@Pb;s{+64rkBmeU^CaxwMT-u5gm zMWm+xfbeVYztjPp@Tdp+#GmR252P>9{MrZg442X0nKpl)4PN_>w{)g`^|LE{_499| zo*n;;X60h<16{RSicT973S^;bGB`z7k6^G^LX?m!$YE;W8$y6yPWJGGT>_CM$k zAOFgyKc3z0!Y600@a;bwza@1WuRr-gdHs2A^JV_pmvmXJnP=_$$6Pq+@paUHVG(K6 zf4bzM7xhyl4tkg5scm`PB#zD?{3dMTHTktpxM0`owt4IaBjLP1o%qYG=Q^)``~A^i zRxYN;K>7Q-E6paJj3Zf5M<zq+f39`WUoq-GQbZc{-$C+fI_2W0K5@`HiJP5_iy!2DH?4KIcuD?Zr}p0DM+?Za{r|kIT(~~odp-Z-)UPO3wf5=<+p3%O-xaRe zEwQ8iqs5~}{kIdI>6D9Y+p8NNH(uhmaR(asx%k=6td8=%(>S<{{;=Wv=UbzPJ{UH- z*R8qJgkMnC?*BZfwUZ_?lLM>O9p8BMQI ziDQFSM|H#B57Ru)_*%zo_5UB{&OAVls@nf0Zh)vm!V(}Pmk=O8LK?%q*6rM3k;MiP zcnaDqvWYsfiHI<`qJj-hX@?>luKKIr+k zPF61I^OXK1T5XTo{g-mBr*Gwp`j?(2{rMd|^J4oMI^iV_`ovG4AbFt8t2LgFeE1BR zhmJE*fB%}-4exQ#txNjXykwnxg(<&;-#&jW%*w?pT7QhEC|T7S9`D45ZszZZAKl=M z`j-_=`b$-{U8y(Je&Ay~Y!E+)zZa5kSav(6A8f^5e4+cv$p^gdxK=p$kDvc|+zwvr z?DN-fRxX_Pvj6{TsUOsNi?>>PCy(2T)^|tzsU7t%8JqOyI(Ft|y3>b_Lmc#p-^SIx z$ph{6MB~(3o$~#w{{BfX%xxWf-p~6dud?3YyhHYf-(G*aS-J2&J^$-I#V_DHrq6)J zouc+DR<&Q${(m;fXWmi&yr?z)B(L`Ud#lKf`JgM!`sb;{&pbiK2TlLZcs}yI@)_#9 z%_iQ%A5Pfh&yRoX!0W@i-?#VvAL~@-pWmF73%WDETz>vC1)K3CyS}Dhbi;0%U(~;x zaijjTWDL{EiS0q1FLBVuP4&PY5FX2KDQ`FNlkYU%3!C!QA1=7*$Z1plcvHCkT|3U+ z=It%7e0Ka9$;!p_*jcZCqd(=5tZIKxZTtS$NyN5aS8V1T^)Ed(o|Cxgmy1ol`O|r7 zUVEGI`Av2Bdtu{v>G*ExOWX`8uedie7-%8l*haAad=Zdc8ZptXXS!U=GXb9b>uCpzkkxTU;X}nfBJ#`DJwsH z$Gng4#(eDm7lyPCaDw-LkgDo(c!Iy+H4TKebJ_ zZW7PMX1}vK`gIxym(w5S9(mol8?V_3TfAZMBY(TS`v09bD*tGL%r`;Fih4U92{J!B z|F3c8MI1ZoKU`GS^mm0fexEy2(Da~>uK3|24tgi?)Fz+fjm6>ZQ(mpNd~7hyo1j1B z-T3J!ech^9{FLv`TLyqqZ|Jk-a38KpZ7Xu z3OsiHb08}h(_?D+`=jdrcXEyEIAT@L1LK=JKCfwhQU8TSrj6$ctK#E6>ZV6thV6T2 z#v=}TCwXS0iz0rosXzGPGlWmanW#T(cI0c{H+i2m!sgHYb%%}L`y6%r{(}2vfYu-5 zDQ3x%_K9xhZ%IMAe?|Ql6_J|$I!=36sk1)d0p0kx5Dw`Yd(IjyfwRr zAHM#+Uth8Jmsi=bf7OrP`oS&#_<4Bj`ooc|T&pLsKgc`vD~eUEz50z$b;C~Q zhaL4FDOeiyPwzjR7uhjCFX4aw`CthY zf8VP7T}9=y-~YDMul!=w*YsPv{vp@;jv(`k`i~Z#n*L7myds|iunn~zZ1Ug-=?lc) z3&}UsermS*d+~)0KEB35JO1J1u;D5P9TZ3vq^7@b!}39+{<-8aFRMp)Q29CUN5@w!Vg*>+An^|uK4kT zX`OdcVSa2y>Y6aT;rueo8>!j@;e zu7CF%W{0gOUG&J^mk+^b_a9>%#rz}X3Frkr@n;Q-@ul})%B2AP=qKu5Mr!)&{1^tk z?0E#A=D`OKHb`F}{$5DFp_7hkw)%VVg>_ec{mCCa?XzL}`k(p9gNr`~kA43;sQlnM z@&hn*Gt}`UyIOno*Uta9B!_zXjQV$q2)cP!SYuvCCW%dcO@AkGyZ`xPVte&loo*-Q zYyL%&kDVg<@EO9V<4n{a!X2BOcHZXugRnemHgZ{ntoVF82OEFK@tiYC3Pp zs&q2j$s;@ePoEv}+kDiH`ge;+O@CK-W441KlGJ(t3Q~9@DsoJywk3yAJe~^m5V*!*N_*0bl#Fx_3NE@ zKy-s2KQ{eD{RfK5M*Uw~M91iumuB_(%ZUvi=o3FS_4liV`M>`3lXK=i4v+o*G~vrH*6U9af0iupc+Io_-%*>_-K89V2D7*{>c3qP z{U6XD9TihuZ2xlQaQ1z2IxnyB<@JYo8ys}oORnB5Y<}y@x7>5mEO_kwPq{aa(W6iA zzm)o0(kJy|RnG^02SMEC*Og4>ANT*E!c)_~Px9J(kI%R5$oOu299|n&<7O)_d7$-| z@L3%^pz?TZu))tbVf)=a_u4mn;+nA2!fD_9uOHn8za4+_tX!<0RhRSkcY7-j{lM$Q zPd`!r;lk6Xe|r3BNxf|cCpLT_^YR)uTl*mov^>o>KmEf4E~6j5{=Q4ET49r~y{Ugv z|3~KiaP!6VW%{S{oAk-@1RQx_mMr+ew(5rM>07btXTt>rRZV}*Yfmns|K#|iI*`|X zLh`*6WE|^5^Uc3#d>($#>Xgre%47Bf{bAEx<~zkkdnV=2e~g+-)Ef3AnLKJp?v=I143-YJd>w*OO*e2{s9<{w&m zzQy~LA9M}&1pU?hKR0~Yu0I<1lGi%>{Y5t`7vBHmkLc@f;+L#S2eX|#YAZ5d)2l0S z<`<7YiwaLof1Pi;Gd|yrJ1y~hH$D!pjjKGVO&(~xns0vUbsWYsz6tu_3pYOZsdImR z+&1C%cO3STpMRqRkLf=k^D#X(E&u=J!#1vtCt20n;@U5|!Jp0#JL*4Du-JI6u%gYp z7l=*&Jb&0f(!BP^$LCuee2V7pEIr@iNu76+Z{uQvep?aqNeUmP~P=7Ihj zzIPHj*!5SVS-J4MxB2Gk2N_3EqLps;`O9ftN0B2A)n`~q2>_>ZCs6~HhG}MT{0f^>i6I>`WYu|_kqVx z+IZg;!_McvYVS2(S^fU&AQ{)@)e<{@t~~!Rl8!6UYHj^%UtIsjxFGu%cGTY$EH(X0 zZ`6N>B3d0Eyeq8w^A0}HyCQ2Tb?DYf;%YOGl*c>C|5q_=eA3L<@A2R}!ls+M+vmS+ z2tGUiFskE#^ySpAC|0%h>ZgBnGe10iQp0wL`nL*CO@HzhO1|lVKSj-Je|mg=Q=R#{ zOV78slQ{G8*iA@ZXi7+ z)Gg3UBmxOQ_DrHM&x{@o(8roSt^(eH}&nB#a}n)bZ&PHn~Xe)Xm=eJJAZh0Mp0 z@!-Q&?8O(RUcb{>N6&p{aFajxwUf_Y;I+=q|0I0*Hs$Y+_O<7Go|TLBqkV|C(yO-r zJFUwF=`-p-Pa&^w8njb4iQLE|M)>*0sbFwL7_ewcInDer&P zp%;d^i@&<&ZtKi|-#-6#vT~uH{APXs(^WsHIOP2V4>5zi0dcIg4dCB zWt$Ccm3xhI{io?Lkd=$yQ2&2N^Sojij4S#wTjSgZp13`KT9U~;qy9sM$Hvuq>i8T- zzUi(!Uh|2AsUJR&Jdph@jbp=S*q4=y*-q^q{{FSD|LZzWzT)!!wO2ai+FyVBOzQ3T zN2AIEcGPZ-N3kmXtX;}3#||3?UheF zt@BRCH9xi@bxjx!oqWiW$LFsiER}wC{9mN}itwP9q0R#y&^vilyZ%u9yibHT>c60< zF#X8`yD=CS^6hz)mu`D~hPT}jKlz|{ny-B`5Ajy~rUfOVW`TqZubl#F3b#putq+h%LfX40lol745Mg12Ro+ka- zj^v}8q3S{$^sZp7bfqtlJka=b`*~MXSNo@Tg;mWL9`6e8#Qovk_kU-*zwEgv+<(=& z>%Qmg>iol@%F`N;_WGOnC9A%sug33RR4?Wm^rP!9KKJukPaJ!l?Z5W^AAZHWzxez254m*z_vC3GnJ)O- z#8cGyp;OdANeaniG1>+SkCod?GYyZ$rrQ%4`tMfE&veE-vxO!|!aj~1Sq{>*RD_ zhEC$#ud#jk_G(64l?&Sg6mQh6#Ussgg8UBM`qVt~nO}dO++d(df7lt9dPAKre7-9c*r4^1H$S#w6A%7y(vmx$-Dulc;q?D_$rW?M5c9C>|3{S{?5Le5=)_Lf^i{k52cACV zQ9J5idYbfa&z5}px8qK(JjzEL^iJZbjh{YiaqY|e%5QbbOZ~F?!vTw)o%`N37lk+O zwDntdeBUxQJ{4)B(tm}6<_rm=T44drnz@G67Nb2qL??_fI9&G9L zZyoi6^p&EMJbV91yruPBLFO0rFC#Vmo#g%7&i~{xo*n0O{F=@>ANo(RmY?v1_QtQ+ z`c>F<8e zPgi=HfqcdM_@&?fB-4fQQoFsic)U|g`)`Rq>fb3IHR``W@=On{qi^pd?lrId zvGMs<*Qa@k=I<;$-{MJ~cjI}?o}fQGvCctv9dOW!;kR%7;J&|^GmL(A{mF>(gT1c* z*KwsP>Ztwp{QRZ!j{0|t+M52%dq9{?kCxaq{WY(B!1#QtgHI8^WLKY)@q^6A(CPT_ zVJlMCgyE2H-TJGC-@8h9>slw>)_rOaUOWHOQhvo;Zohh^J?}dbXN?YKYhQN%f3?YD zJdQ(A|AC^mroR(LUWb?ub%se^`^!>CK4{}=Kjtq-|2OKt_0IFYcI~4Vgst|U{L<%s zcbnJ#Id6^)+VMZn%B41MosUGLj!qu&|93WT^|KwBf7E}lsHo|$yqxzW-^S02s#urk zX?d;;SLf|CuQ5ND{Ioyvz0)`dA2!&-9}d4~!6q*}d{#JYyO%uRUtRtG+TqGSnqWum zd_0O(U(;{x{SQ3!r*_nTs9>q-?}XbcnyxMNgMH)k_}v-#Abo-Odm;IT+E2|+`fTD2 z2VK{HeBTB4hl78++F!n~)=}`-`G4l6X!pNL{M5)lT@uTdkQN$8GGU z`9=K~7S&Dq<2RjLkzH+f;~x-z`#?eRLGQ$G`D%-|+D_zyUh~pA^AlI3t_j0tD{OV) zIel*qo3FU;_V4>({ra~~Rxa%4`7eso&(m(Q<43M>wu7R0qb_RO^$$+-;iax6{;2<= z;t@J~S6E|s^gd3^2Yy3WrrZ8%d5PbvZK(aE`S6;be$)Ep^oRFeu*Yxi-Lfy7e%D`K z=*&2fe(d(X{iXcrZ9c2YuqFFOWQt{4|bjaphfJe|XDI|Cqe> z?`DOAuDEWsB_Fs89y|Y|da_+=*FWp{S+c+bws(;PZ10wC_xEjF25S2ENq$?mlj)T7 z2S0HTzjwl8w(^n(+ICIiRtFELc^(^V@H0*rT=ka=c5;it$4*-F<{!1I?|-@f^H5eU z_Wk<~<@JYmdH5B}q%kY6e)>f>>~wzEQU9{zn*Pd{UQbA!q2@6^kbY7dKYdv|&&maV znpd-X=nQX)VZSq1xbEw3yFcu6_u5}t?@iVBU)=wyTee}>9sAYw|EF5&R}`x{KBsou zX+D^%P2Xxq{mYC-{g?Ou-%Hl&eU0g=oXU>*)6f7{$->|fBwhK^neG{@ri@pW#wXa(l36HeB0l& z-ulG`;loyhuL;9h58Srv%7a7UUi`CdX1+x{+iAWe3nnV7ivGXI`xM=ANl?Z?|*k5 z_V$meg18!Us0@TjqV`Vp?N1chDm=>|G^@orayVZMRd%5j6cQU zGA(PLA&BlEeSzlBm!5C&bQ~x7@OW&Hd~8ttVe|FYz4nSPPYJKtvoFlr>u+A;cKyRZ zRxYN;RQ>Xk-RnLF3bsYQu9VZcn|IZ|(Ww8S<(%_k1}#Bbv& zFL|K7uS()pr+n}(ufKnTeXf}`YsannH+<@u{ibvdgvY*r9L&mv-<|mNi+_K-;vwrV zmpE(cQ=9Yl#H~J=U(|oNsBP4LM`1RdI%0GDfS)+%T~enu`HW}z+86PfKdmQ!qW+fj z6IR>joP9qx^3t&89hZJ*lZV#vTF?0heh1%p{8t{^|FG#t(f)t1sXwiY`Y$LtLl2+i zw^uEqV~#%^vD19=+CR~_6Rhc|{h7ZUz2e6YvOT==87hz2#C!O|me;?2hk+lQAGW;b zj2qv!ef9qrhh*hqpSwoY?kIN0qgYWl#si&1`2D-ZbMa#{zo`GhB9iLqUEz(tds$DA ze0v_{;!pW3&n0#Ar$`>i`)c^S*4ONR(Qohnhpx`f-emd^{C55GNLDUfci^|y`_JU+ z2c6jI+W2Nj1iLoBj>Mz>i;72$`cIWS)2Su4J^$SJxaFzNHhyB02U>sdE3egKgYaR4 zJ^cOaoVR4)vRgLoU*~}}|MZKazYM><{~yW9#q{r}4X>hj>uc}C*H+!&Pv^(Hqy8gB zMNNMlhu=|>zuf!3D@GafeARgwuiEr$e(l4%qB_>QVm*e(mF9%~VfqG-&iMLA=Z5Kr z`#(N(Onv_!Qa*4q{rzK0{fc5$Yx)M!%{$34{rl9;1*86>#iK_3JCbL5rhomJr{8^sho|xV=eBzPb97V8k_8^H zt-4_wIw_=|{=Q90N2C5*j?cH_PD}jo!UuYn)Fu6s2a=!iVOv~zm)9T8I%#CH7hSYs zIB%o1&VJ9LYv8f#9|p5>vG<=F7vKKw^LyLhbl#FxUmKt5hV7l?nyoxh|5BMI{dqrN zI_1g(s;+Htp~T4tZCuSW|KQT|Ew1@i?ykdCfj(0s7w!VCqqjuQ%RCpR+ ze>*x}YdnfI<~T^7+6TXzfZw*C@-e@te_2t}AD;C5f1Z_#@o64>pilhPhxSPxNIrSy zPv({8nXThd-{cSLfA%-Oxc$%PhK(Nm%w?zhSv;RP|AP(Y-`D%kx3Y4K78AIZ42`>$%9cT&kb zqyBl}sp${jU=gi8AJxW7^4iCY&u5&p&iroa`DxthHQzTO{WQdbf7@9XKJv+%Li>bs zo}G91;qch&Z=RJ4&+q)6^7gk~Y(vITl&b3az0kNMJhO?D52_!&@b3Li{?pLCCxla8_~Id-J$V(zv-58QS-F@V>+1W@ zmiiUNs@C4gW8<4Y-o|gJ`BDFJL>n*VNw0rP`hbu5fnIsc?#s%hS3dhitxowo2p=}s z!yg9DUgD-K)vQ(@DC zcKGD^zpAc3I8B#I9X`NM8v?{fCOmCjI#xAN|+# zP(I=yeWW&i#xfC7cZGM1-<4*SpV(nw-HjFwzhgMO?@J%}*16}; z^cuJ0|A5wmGu3X5N3p7L&35t#-MrehzI7f^|KXwnJ-jQdXuDz^kN=wrnjRgo(|qEf zcS)U&M<3*Y9Cz$>h51saeDK0!Ht`<$;UmW^x$LEvUK1`{@SQ8a^yljO2hKltvvTnu z-@iEZE5BIvwRhq}H|sMOKl5YWQU3)+B-PWq!W;G1MDnRObY+?ypDZu&TOZm#d7#&P z^32~uPpeDnoBaLj-9CHQfBfRO{u$4=9+*Gt1jcK8{oN#f`}N?;JEeSC!(x2;ziHT3 zpUfxfzp$XH>92YGuY&2(Dzalf=t?u=_coTNezO@@(fp%QkDX$zf8||XfB4zwcH8v$ zJv!m1fBEiEybuT;zW3sF%y72;{+;HMPpi;CKs{!SR%+Y3@> zuR~q&!%rOaPU5LeKHJaYIuGJCzw!|`O#IX}VVHfxg+IIUCy$3YA6fC1D}VP3ul0QY zNMB(7_44=EEo?ArSm>***NwoBKgFp3NI_N8U-jU1ocXXF4P8-PeLl84SGw)El#WXt z=rx}_^Rr!ugUjg;JOBNbC-3a;ALd`PTm1X$JsH>b|ADMrOpmqo{C}=~MX{o8jz@y@ zU;F;CrEx{(7xf=4A~v4WJa9;C^4b1|eVVU%?UTmmTOE0d=5H-M-{MJ~caqOM&7Pn? zT)X1j^Y*?j4`1Hql~*tLdiDLwLCQOv;Cg!fb65Rf(w9W5?a}7{AIZzL9@~lC-zRTy zn)GjXB+v9{DG#VT#6h3j5i$MMZ#?ZHI!6DF*!KBE`BMG8lRW&`@KSGa`bzjLU-`@?e}aC- z2~!?8`R09wUl!JW&BDtrzT}PcW$%AFS-G&k=H=UXe`oSLKw^cXzYkjwbFY2Ec zkBr|H)|l<6*HO?v`-P$QK^*k1U>URB=(j1qBl*nRc&Kw42ba^|KXubT-SerhUev$t zhc5c%#fyK6F82QmBRbBY;x6U?-`Vek$(Lw-O}zI1@452nJegnAzw|WfzpK=dZ@RaX z&y9~;UaAkaB6%RX)4%zt({VhwjDGmS5AJ`@z$y>H6odQJIf@|J6~O zaTUc|t-boec90nJG2Q4}d87W_BD1EyE4=aiLtd=M?8mM&GoK*y1Bs`$_0!GD#o{fo zd0jWZQ~yMf`H`R4VasEm|I?vg*)weOOW9Cs_Cyhk`?2*JW9v$PW+}dFC^bk$E(>% zA5FYrwI6Nrh1-D$yN}YFt=#Y+U{srUn@Pk&Td>&-}*kBKT*zEf!zGd>q?+&dO zPdol~?|&Blp1*(WX63?m5i9D};*rL6e9jxd&+{g=qy9rhWuyLU39sqV(Q&$pIv#P* zJBgdEd5n)AYa;~`F8)q-9Pi`^`8A<{rB%9S-IHtpIhkfuXFW-PV96| zU$yH$siQx&qyEDMM@@gt<9C#%Q>)0Xwmahy2fZt@#=LGYt|ED0x__9TI#<%wkGIR{ z?_c|UgCG3R@Gbr8?En15ckH|x{oDJ`u8sq)rPseT%v-XnwRiFu-QZ8>haL4_P*l|P zhi5(EGd((D^E~vLM;uK3)`#jz9!Niyr@S@$U-XAvwww8Z@V&vX>jz(X)B1NF1iu~s znV+H^|MRR|?0Kqvh__lBpZu+(y&hPd&Y$^4{TCM1HT}tJ3AgElKSj;szQ^Q)^abMY zh2$GL>8EC^zZYNF=VJ$+^rp{`hJ8MN_Kxkr1suO^|6i#5itw3!xyGGfx~8w%{m)e& z;_yZN7Zn^$`g0#k>%+;29@OnX93-CFHhwoNm!^2l?8`yWMK91h$upaA74d`QgXX7B`G_-5vx)b}57Ykhvj-e?@#SH?H{LpO^dYxV zZ|C2-S-IHpbwmCB5ne^fs@7>g=$87ulN#n1^&c%fHT`v*c4vG($1Us6jgQ0Y6TkJL zyySt#+f=81c#Us@e#Qyge);;Vwm#?+VW*dzvfu*=4FHo}Le(QK7Xnd+0aq2+k7xgb^f*zLd zgpc#srazuPmY-+klJ;kHPUGOR>aTTS*Q-7^>G2i!3$Ojbk@Nkf)$#w0 z%HK(Fvg+SbzoJ<6wRhq}H*DqwGViE=S<$3F&qMM}KjsgP>bTbnk`JbJ<{w&mzQy4O z%WmU-m)aBb2Y>czFFIo+5Az>=;P@xMew5cbyZ-<@iuV7H16jGClUT8j7LPRU1Z(Gi z=(i(r<`?xZ8Q}F=55hw}+s{yWiG$Re&G=3Ec~&kwFQ{K$ovq%Lw6?v%k%#xui1bgv?Ew(!R8=t+l8@9djXNRBlu1~>l*Wcz@xv+mVUjOeTFI_WF`~0hU9!%yD z_3soMHT^Y@^GM9g-luens%rZa2fZtzW6nRg(zMqr#!K_*-~36x9-an2bxjy{`N~5- zpS$*O*yZh~9Q)XT)$fmZ|1+$8g1hMbCmB~!vg&L4M>ntehT1p%Mg6;3T+k!UYs=pw zRoC&fitLyVy3({bygu>M7f2pRKHJy)%cwK_#;1MYxBG8w^!Tec`N+ZhcWH#!*i}277&r8FE&+=SShmJk+eN&zKiQBv;=%=r+;VIW% zebLR2gju&feBZSPb>Xw?pA)`(VOB0Sp7t-^YMsXU-HQ30Lq){&cbccj_sI0m^N-hY_=tmE>r)#)Xz@-~ zF2?WF)_Hnta5?>9_jmn0e1F00(D}kc-#l~AW8t;yPX@DcvH5ZS!Kq)z6|25RkJ{^R zGC$@W^&c)+8uedY#vtFee@py~M;!DnsY5SC@<5(vrhmc@pCNoY&P4s;r+c1p@Q!!h z6n?q?q7T3HD93#4^KYJ&3;w*k{Ym`)&5rstE>?Z*o%q`6_2;?x(aSrrqy7tuM>YL* zezp70Fz*D>5%jKLt&UIL6;>R_73+9jZ!ba1Z{YJz@+OM#HDOq^^@4AI`ND_7-B%s< zx8I#y-G7+-Pv$zFqW%6l@k>^-8tO`W3~hwo7WG8~nBTDNodYq^NDwU-L{SS7cY?!cQFZ zuE<)-@h8zO&F45<^ShGHj$eK`?Xc1FeYd>wsaav;&u{ zAFp}n1*U#%!%kK%majJZiTR!6`4p4!YwiFWV*51kEw(5q>^E#bhf8U&vsir^k8x=m&BNrPUQw`SY)NTbMor zI$kzDqy0qv^P-}rzbm}apO;V_|2txv z{!Z;&ko&GM9%$pLKKOfKTIZdNV}5MKUVLHlYhUros~#Q=tG#gjmLL1uZt&RopJC+( zcPRgV`4nC!40RroU9F98edTeA=%#fXN22~^q^7^-aUBcuvhiEv`H6#G$4zbgjAwE9 z%5KIyZ?q4qQ(oq|tp5I4w@%(>kKx7r8=vxl?_d3c>i55#{~yiDg?Z*r98oMs(J$jl zR<*_t^7_mCKy=5B`gaSKl;69;8_yG@dbGq(fAFN}o#f%CFAzTnUmC}T&#+;;z}JM~ zM}PXs&POhJNV=98`nuZU0YoHLO1Il-RN8GsQ*C0W%|3q8q=2- zoB5bd2_JFLyQGeB70CnHFF^BCr+o0*yq3}5zt*=d{J~Xk9qeDbZ|xcHeRduA?EU9R zRxVt3KtDE~_9I!!lnI&_xkG(7U2)Or0yu z%!31BTYdwd%XOTSCIL| z{eQUd)bw|SH}3!G?;rEhZSRZVhY$2l^2|mrMf@Q1fRFl`t-Qs1pWW zy7kxhy(YlJ_rG@DZAkj}y#MT|4JLg^bj)#Eb%Vb)zm`-){TCFEYWnxVC}@3Yo$)z| zn~o)0eSfw-l$ZQonAUk6&-~bmz4*fX*}vL!(-*D}^MAYOir?J31$=h>$w*c%cKqK& z@4wMiKdAFS2Z@f+zt{Euxl}~`7Z#72^yfY*%!~01T~WOhyCnhBGRb; zWT`Vfl0LkXIQJWB|5|MPR@bL~Me~=V*SDX=sdM9bZ63?$?_YiR@XqPizNCMRD?Tx} z`w{Pj=YL%P^!vxw`1!N{M*T;MNTdFHN}cJ^5!>D$yYX?$OZ;AKL+!`%;qA-H#r$ep z9X9ABA04S{!Z5f0eLJl7fe>DE^~a}XeRoi2=l>TepCUXqzK$nZ)!O3PS6g*6e@Ai| zkJt04|7a0u)L-kYkCyU)PHf_!Py9A+-#^bM&v-N+KEq}7!xy%B(diE#cl9k{hqWGh z^wFE{WITKQWnPN;z2)!k7TdTkTk4X=+Z{=RpZ(ozC-MG1>8z%|#<`CN{j=Scd;do_ z?az2~>Bsuh`J1i%cn}_Ju!ld)9&)FgefUFR^X#tcK6=Vg@Y(&(wJ)3Js{j1{xAd3k zVtm|x$w`3YJ^AoQ{mYJ<^sjvn1Ye4aWSsV8g6Im`xT=r&b-vgsrgffimB;J}`omUP zc0%9rtA%YITzk{APP~nJ(|<547d!vCtmmJd)bqck*1yh^?Z7;v{$)i?f999yV|>JQ zJf08j-DG_D!L$y4FZA%I&!3XaMvww8aiR-PlCw-azc~&myl%M$B z;sz+UnLYy=cZw}R*51jZ_6$L6#z80gjQW=h#;^6%)$S{qXXQeF845$ZXU3%`)*n6O&cn~l!v@dq_=)*5+tl0jKM7B! z3wqHH{uI3v4|k5g3-WAd6bCq|6RpqDsSfAZ8 zt;X+N;&0MF%}eZX96_xj~MyrW~h0@L9gSQ&G=3Ex%9>SYIVxzLHMx29{w=> zV_#l%o2{-1(|>)%K3}@<@9^3A=Y?6h^t$dzvZ^)P0Yo=!)2kyj%rELcSa@psyTTjK zfA$NKk3PJP!%rOaE~!%+KWO>rrdab+pX9+qABIUjeBreZp8l%ut$KLa>*l|`?ChJq zO+NZ_T`)+U>6d3qUAz;w@#%e=&9Ax>N5`oDP*K^afATuQ^VQH5)zxvv`gNsy;^*Hq zpZ<&oy@`X%>F?jm$i0-h^qLcRIfEn63UM9{gdQ-~9c) zEq-@ln0m;Mx18a=jV?XUzvVhEsN;#ZTGMZPeeq$lzo&N8e?gJiq<`)D|6F;&20gu# zJmziVHsvRL)LWhM{j2_P`mq;3F#U`x!x<;WpZ^{@44-}fK9ZG-o&Vpm{QULZrsEIw zQZ;6O-9UNRPx7VPQU8TSq^3W0WwbiJqCb*aUW&#~Kdz*=_Z=P^ zO!E?d*y6vg{m9zeuNJoW!;yQv^wdZ&+hslP%5+ zyU)I5pH0rQ?b!JH_dUz&&zD$#+P8FH?eEs7_K$AXuhvAi=o3HVn_v6DPLX^=H=f61gN%a>sz1zF`IFoJIln*5xa`|6zw3Qdy~geO z&sJ70?C1H(`xGajJGQ@dCC(Z>%+_&g|38&WhRs9msQ+jYY0{tHaifdv2i>A7*12L` zZ6C{XrMr6nqqdFTP2wG~i5rj3$2*OK%jpl>KlRJ)elepP@;R$rzvd4ebn{MZ(@o>p{e5zRr=~x3xzw8;^f92k@M+%v`RhOb&%OVdTPj%7 zUv*8d&ZxvNBPjw&; zdRIiN;}Urwc_4kIacu7jwyLk?_4jXd<$l+79)7%k)@#?_E8A?QcLi(woPUI%U1u|- zw%6~!oZ5p%9RInViN~_;2zkUDG&B}%MJNX~V>#wGun_)7JR8_}=#@|0umrF&|zf(M_>92Wg zM|7zjf1L0T2YuqV{Xlui1C6)rW+}%l<%8GwmeJq8_AB4g+Hi#*^iMl))ze4&Plm_d z{|{v4VtVYLzrWMI$(OA9nmD>)H_b2V-z`{dJXcs_wxeFhQq!rWc}|cx=o7#7q4OXQ zO!q7EGjHXC*Z3yrhcDc7NgjWGk%vdlc;Abk9M}y$JO8sVE0@~&XQzHev8uIKKYiKt zKlI;`T;k+M{RfIjO@CK-

    + + + + + + + + + + + + + + +
    CountryMunicipalityYear
    UkraineOdessa1944
    """ + )[0] + + expected = DataFrame( + data=[["Ukraine", "Odessa", 1944]], + columns=["Country", "Municipality", "Year"], + ) + + tm.assert_frame_equal(result, expected) + + def test_tfoot_read(self): + """ + Make sure that read_html reads tfoot, containing td or th. + Ignores empty tfoot + """ + data_template = """ + + + + + + + + + + + + + + {footer} + +
    AB
    bodyAbodyB
    """ + + expected1 = DataFrame(data=[["bodyA", "bodyB"]], columns=["A", "B"]) + + expected2 = DataFrame( + data=[["bodyA", "bodyB"], ["footA", "footB"]], columns=["A", "B"] + ) + + data1 = data_template.format(footer="") + data2 = data_template.format(footer="
    footAfootB
    + + + + + + + + +
    SI
    text1944
    + """, + header=0, + )[0] + + expected = DataFrame([["text", 1944]], columns=("S", "I")) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.slow + def test_banklist_header(self, datapath): + from pandas.io.html import _remove_whitespace + + def try_remove_ws(x): + try: + return _remove_whitespace(x) + except AttributeError: + return x + + df = self.read_html(self.banklist_data, "Metcalf", attrs={"id": "table"})[0] + ground_truth = read_csv( + datapath("io", "data", "csv", "banklist.csv"), + converters={"Updated Date": Timestamp, "Closing Date": Timestamp}, + ) + assert df.shape == ground_truth.shape + old = [ + "First Vietnamese American BankIn Vietnamese", + "Westernbank Puerto RicoEn Espanol", + "R-G Premier Bank of Puerto RicoEn Espanol", + "EurobankEn Espanol", + "Sanderson State BankEn Espanol", + "Washington Mutual Bank(Including its subsidiary Washington " + "Mutual Bank FSB)", + "Silver State BankEn Espanol", + "AmTrade International BankEn Espanol", + "Hamilton Bank, NAEn Espanol", + "The Citizens Savings BankPioneer Community Bank, Inc.", + ] + new = [ + "First Vietnamese American Bank", + "Westernbank Puerto Rico", + "R-G Premier Bank of Puerto Rico", + "Eurobank", + "Sanderson State Bank", + "Washington Mutual Bank", + "Silver State Bank", + "AmTrade International Bank", + "Hamilton Bank, NA", + "The Citizens Savings Bank", + ] + dfnew = df.applymap(try_remove_ws).replace(old, new) + gtnew = ground_truth.applymap(try_remove_ws) + converted = dfnew._convert(datetime=True, numeric=True) + date_cols = ["Closing Date", "Updated Date"] + converted[date_cols] = converted[date_cols]._convert(datetime=True, coerce=True) + tm.assert_frame_equal(converted, gtnew) + + @pytest.mark.slow + def test_gold_canyon(self): + gc = "Gold Canyon" + with open(self.banklist_data, "r") as f: + raw_text = f.read() + + assert gc in raw_text + df = self.read_html(self.banklist_data, "Gold Canyon", attrs={"id": "table"})[0] + assert gc in df.to_string() + + def test_different_number_of_cols(self): + expected = self.read_html( + """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    C_l0_g0C_l0_g1C_l0_g2C_l0_g3C_l0_g4
    R_l0_g0 0.763 0.233 nan nan nan
    R_l0_g1 0.244 0.285 0.392 0.137 0.222
    """, + index_col=0, + )[0] + + result = self.read_html( + """ + + + + + + + + + + + + + + + + + + + + + + + + + +
    C_l0_g0C_l0_g1C_l0_g2C_l0_g3C_l0_g4
    R_l0_g0 0.763 0.233
    R_l0_g1 0.244 0.285 0.392 0.137 0.222
    """, + index_col=0, + )[0] + + tm.assert_frame_equal(result, expected) + + def test_colspan_rowspan_1(self): + # GH17054 + result = self.read_html( + """ + + + + + + + + + + + +
    ABC
    abc
    + """ + )[0] + + expected = DataFrame([["a", "b", "c"]], columns=["A", "B", "C"]) + + tm.assert_frame_equal(result, expected) + + def test_colspan_rowspan_copy_values(self): + # GH17054 + + # In ASCII, with lowercase letters being copies: + # + # X x Y Z W + # A B b z C + + result = self.read_html( + """ + + + + + + + + + + + + +
    XYZW
    ABC
    + """, + header=0, + )[0] + + expected = DataFrame( + data=[["A", "B", "B", "Z", "C"]], columns=["X", "X.1", "Y", "Z", "W"] + ) + + tm.assert_frame_equal(result, expected) + + def test_colspan_rowspan_both_not_1(self): + # GH17054 + + # In ASCII, with lowercase letters being copies: + # + # A B b b C + # a b b b D + + result = self.read_html( + """ + + + + + + + + + +
    ABC
    D
    + """, + header=0, + )[0] + + expected = DataFrame( + data=[["A", "B", "B", "B", "D"]], columns=["A", "B", "B.1", "B.2", "C"] + ) + + tm.assert_frame_equal(result, expected) + + def test_rowspan_at_end_of_row(self): + # GH17054 + + # In ASCII, with lowercase letters being copies: + # + # A B + # C b + + result = self.read_html( + """ + + + + + + + + +
    AB
    C
    + """, + header=0, + )[0] + + expected = DataFrame(data=[["C", "B"]], columns=["A", "B"]) + + tm.assert_frame_equal(result, expected) + + def test_rowspan_only_rows(self): + # GH17054 + + result = self.read_html( + """ + + + + + +
    AB
    + """, + header=0, + )[0] + + expected = DataFrame(data=[["A", "B"], ["A", "B"]], columns=["A", "B"]) + + tm.assert_frame_equal(result, expected) + + def test_header_inferred_from_rows_with_only_th(self): + # GH17054 + result = self.read_html( + """ + + + + + + + + + + + + + +
    AB
    ab
    12
    + """ + )[0] + + columns = MultiIndex(levels=[["A", "B"], ["a", "b"]], codes=[[0, 1], [0, 1]]) + expected = DataFrame(data=[[1, 2]], columns=columns) + + tm.assert_frame_equal(result, expected) + + def test_parse_dates_list(self): + df = DataFrame({"date": date_range("1/1/2001", periods=10)}) + expected = df.to_html() + res = self.read_html(expected, parse_dates=[1], index_col=0) + tm.assert_frame_equal(df, res[0]) + res = self.read_html(expected, parse_dates=["date"], index_col=0) + tm.assert_frame_equal(df, res[0]) + + def test_parse_dates_combine(self): + raw_dates = Series(date_range("1/1/2001", periods=10)) + df = DataFrame( + { + "date": raw_dates.map(lambda x: str(x.date())), + "time": raw_dates.map(lambda x: str(x.time())), + } + ) + res = self.read_html( + df.to_html(), parse_dates={"datetime": [1, 2]}, index_col=1 + ) + newdf = DataFrame({"datetime": raw_dates}) + tm.assert_frame_equal(newdf, res[0]) + + def test_wikipedia_states_table(self, datapath): + data = datapath("io", "data", "html", "wikipedia_states.html") + assert os.path.isfile(data), f"{repr(data)} is not a file" + assert os.path.getsize(data), f"{repr(data)} is an empty file" + result = self.read_html(data, "Arizona", header=1)[0] + assert result.shape == (60, 12) + assert "Unnamed" in result.columns[-1] + assert result["sq mi"].dtype == np.dtype("float64") + assert np.allclose(result.loc[0, "sq mi"], 665384.04) + + def test_wikipedia_states_multiindex(self, datapath): + data = datapath("io", "data", "html", "wikipedia_states.html") + result = self.read_html(data, "Arizona", index_col=0)[0] + assert result.shape == (60, 11) + assert "Unnamed" in result.columns[-1][1] + assert result.columns.nlevels == 2 + assert np.allclose(result.loc["Alaska", ("Total area[2]", "sq mi")], 665384.04) + + def test_parser_error_on_empty_header_row(self): + msg = ( + r"Passed header=\[0,1\] are too many " + r"rows for this multi_index of columns" + ) + with pytest.raises(ParserError, match=msg): + self.read_html( + """ + + + + + + + + +
    AB
    ab
    + """, + header=[0, 1], + ) + + def test_decimal_rows(self): + # GH 12907 + result = self.read_html( + """ + + + + + + + + + + + + +
    Header
    1100#101
    + + """, + decimal="#", + )[0] + + expected = DataFrame(data={"Header": 1100.101}, index=[0]) + + assert result["Header"].dtype == np.dtype("float64") + tm.assert_frame_equal(result, expected) + + def test_bool_header_arg(self): + # GH 6114 + for arg in [True, False]: + with pytest.raises(TypeError): + self.read_html(self.spam_data, header=arg) + + def test_converters(self): + # GH 13461 + result = self.read_html( + """ + + + + + + + + + + + + + +
    a
    0.763
    0.244
    """, + converters={"a": str}, + )[0] + + expected = DataFrame({"a": ["0.763", "0.244"]}) + + tm.assert_frame_equal(result, expected) + + def test_na_values(self): + # GH 13461 + result = self.read_html( + """ + + + + + + + + + + + + + +
    a
    0.763
    0.244
    """, + na_values=[0.244], + )[0] + + expected = DataFrame({"a": [0.763, np.nan]}) + + tm.assert_frame_equal(result, expected) + + def test_keep_default_na(self): + html_data = """ + + + + + + + + + + + + + +
    a
    N/A
    NA
    """ + + expected_df = DataFrame({"a": ["N/A", "NA"]}) + html_df = self.read_html(html_data, keep_default_na=False)[0] + tm.assert_frame_equal(expected_df, html_df) + + expected_df = DataFrame({"a": [np.nan, np.nan]}) + html_df = self.read_html(html_data, keep_default_na=True)[0] + tm.assert_frame_equal(expected_df, html_df) + + def test_preserve_empty_rows(self): + result = self.read_html( + """ + + + + + + + + + + + + + +
    AB
    ab
    + """ + )[0] + + expected = DataFrame(data=[["a", "b"], [np.nan, np.nan]], columns=["A", "B"]) + + tm.assert_frame_equal(result, expected) + + def test_ignore_empty_rows_when_inferring_header(self): + result = self.read_html( + """ + + + + + + + + + +
    AB
    ab
    12
    + """ + )[0] + + columns = MultiIndex(levels=[["A", "B"], ["a", "b"]], codes=[[0, 1], [0, 1]]) + expected = DataFrame(data=[[1, 2]], columns=columns) + + tm.assert_frame_equal(result, expected) + + def test_multiple_header_rows(self): + # Issue #13434 + expected_df = DataFrame( + data=[("Hillary", 68, "D"), ("Bernie", 74, "D"), ("Donald", 69, "R")] + ) + expected_df.columns = [ + ["Unnamed: 0_level_0", "Age", "Party"], + ["Name", "Unnamed: 1_level_1", "Unnamed: 2_level_1"], + ] + html = expected_df.to_html(index=False) + html_df = self.read_html(html)[0] + tm.assert_frame_equal(expected_df, html_df) + + def test_works_on_valid_markup(self, datapath): + filename = datapath("io", "data", "html", "valid_markup.html") + dfs = self.read_html(filename, index_col=0) + assert isinstance(dfs, list) + assert isinstance(dfs[0], DataFrame) + + @pytest.mark.slow + def test_fallback_success(self, datapath): + banklist_data = datapath("io", "data", "html", "banklist.html") + self.read_html(banklist_data, ".*Water.*", flavor=["lxml", "html5lib"]) + + def test_to_html_timestamp(self): + rng = date_range("2000-01-01", periods=10) + df = DataFrame(np.random.randn(10, 4), index=rng) + + result = df.to_html() + assert "2000-01-01" in result + + @pytest.mark.parametrize( + "displayed_only,exp0,exp1", + [ + (True, DataFrame(["foo"]), None), + (False, DataFrame(["foo bar baz qux"]), DataFrame(["foo"])), + ], + ) + def test_displayed_only(self, displayed_only, exp0, exp1): + # GH 20027 + data = StringIO( + """ + + + + + +
    + foo + bar + baz + qux +
    + + + + +
    foo
    + + """ + ) + + dfs = self.read_html(data, displayed_only=displayed_only) + tm.assert_frame_equal(dfs[0], exp0) + + if exp1 is not None: + tm.assert_frame_equal(dfs[1], exp1) + else: + assert len(dfs) == 1 # Should not parse hidden table + + def test_encode(self, html_encoding_file): + base_path = os.path.basename(html_encoding_file) + root = os.path.splitext(base_path)[0] + _, encoding = root.split("_") + + try: + with open(html_encoding_file, "rb") as fobj: + from_string = self.read_html( + fobj.read(), encoding=encoding, index_col=0 + ).pop() + + with open(html_encoding_file, "rb") as fobj: + from_file_like = self.read_html( + BytesIO(fobj.read()), encoding=encoding, index_col=0 + ).pop() + + from_filename = self.read_html( + html_encoding_file, encoding=encoding, index_col=0 + ).pop() + tm.assert_frame_equal(from_string, from_file_like) + tm.assert_frame_equal(from_string, from_filename) + except Exception: + # seems utf-16/32 fail on windows + if is_platform_windows(): + if "16" in encoding or "32" in encoding: + pytest.skip() + raise + + def test_parse_failure_unseekable(self): + # Issue #17975 + + if self.read_html.keywords.get("flavor") == "lxml": + pytest.skip("Not applicable for lxml") + + class UnseekableStringIO(StringIO): + def seekable(self): + return False + + bad = UnseekableStringIO( + """ +
    spameggs
    """ + ) + + assert self.read_html(bad) + + with pytest.raises(ValueError, match="passed a non-rewindable file object"): + self.read_html(bad) + + def test_parse_failure_rewinds(self): + # Issue #17975 + + class MockFile: + def __init__(self, data): + self.data = data + self.at_end = False + + def read(self, size=None): + data = "" if self.at_end else self.data + self.at_end = True + return data + + def seek(self, offset): + self.at_end = False + + def seekable(self): + return True + + good = MockFile("
    spam
    eggs
    ") + bad = MockFile("
    spameggs
    ") + + assert self.read_html(good) + assert self.read_html(bad) + + @pytest.mark.slow + def test_importcheck_thread_safety(self, datapath): + # see gh-16928 + + class ErrorThread(threading.Thread): + def run(self): + try: + super().run() + except Exception as err: + self.err = err + else: + self.err = None + + # force import check by reinitalising global vars in html.py + reload(pandas.io.html) + + filename = datapath("io", "data", "html", "valid_markup.html") + helper_thread1 = ErrorThread(target=self.read_html, args=(filename,)) + helper_thread2 = ErrorThread(target=self.read_html, args=(filename,)) + + helper_thread1.start() + helper_thread2.start() + + while helper_thread1.is_alive() or helper_thread2.is_alive(): + pass + assert None is helper_thread1.err is helper_thread2.err diff --git a/pandas/tests/io/test_orc.py b/pandas/tests/io/test_orc.py new file mode 100644 index 00000000..a1f9c6f6 --- /dev/null +++ b/pandas/tests/io/test_orc.py @@ -0,0 +1,227 @@ +""" test orc compat """ +import datetime +import os + +import numpy as np +import pytest + +import pandas as pd +from pandas import read_orc +import pandas._testing as tm + +pytest.importorskip("pyarrow", minversion="0.13.0") +pytest.importorskip("pyarrow.orc") + +pytestmark = pytest.mark.filterwarnings( + "ignore:RangeIndex.* is deprecated:DeprecationWarning" +) + + +@pytest.fixture +def dirpath(datapath): + return datapath("io", "data", "orc") + + +def test_orc_reader_empty(dirpath): + columns = [ + "boolean1", + "byte1", + "short1", + "int1", + "long1", + "float1", + "double1", + "bytes1", + "string1", + ] + dtypes = [ + "bool", + "int8", + "int16", + "int32", + "int64", + "float32", + "float64", + "object", + "object", + ] + expected = pd.DataFrame(index=pd.RangeIndex(0)) + for colname, dtype in zip(columns, dtypes): + expected[colname] = pd.Series(dtype=dtype) + + inputfile = os.path.join(dirpath, "TestOrcFile.emptyFile.orc") + got = read_orc(inputfile, columns=columns) + + tm.assert_equal(expected, got) + + +def test_orc_reader_basic(dirpath): + data = { + "boolean1": np.array([False, True], dtype="bool"), + "byte1": np.array([1, 100], dtype="int8"), + "short1": np.array([1024, 2048], dtype="int16"), + "int1": np.array([65536, 65536], dtype="int32"), + "long1": np.array([9223372036854775807, 9223372036854775807], dtype="int64"), + "float1": np.array([1.0, 2.0], dtype="float32"), + "double1": np.array([-15.0, -5.0], dtype="float64"), + "bytes1": np.array([b"\x00\x01\x02\x03\x04", b""], dtype="object"), + "string1": np.array(["hi", "bye"], dtype="object"), + } + expected = pd.DataFrame.from_dict(data) + + inputfile = os.path.join(dirpath, "TestOrcFile.test1.orc") + got = read_orc(inputfile, columns=data.keys()) + + tm.assert_equal(expected, got) + + +def test_orc_reader_decimal(dirpath): + from decimal import Decimal + + # Only testing the first 10 rows of data + data = { + "_col0": np.array( + [ + Decimal("-1000.50000"), + Decimal("-999.60000"), + Decimal("-998.70000"), + Decimal("-997.80000"), + Decimal("-996.90000"), + Decimal("-995.10000"), + Decimal("-994.11000"), + Decimal("-993.12000"), + Decimal("-992.13000"), + Decimal("-991.14000"), + ], + dtype="object", + ) + } + expected = pd.DataFrame.from_dict(data) + + inputfile = os.path.join(dirpath, "TestOrcFile.decimal.orc") + got = read_orc(inputfile).iloc[:10] + + tm.assert_equal(expected, got) + + +def test_orc_reader_date_low(dirpath): + data = { + "time": np.array( + [ + "1900-05-05 12:34:56.100000", + "1900-05-05 12:34:56.100100", + "1900-05-05 12:34:56.100200", + "1900-05-05 12:34:56.100300", + "1900-05-05 12:34:56.100400", + "1900-05-05 12:34:56.100500", + "1900-05-05 12:34:56.100600", + "1900-05-05 12:34:56.100700", + "1900-05-05 12:34:56.100800", + "1900-05-05 12:34:56.100900", + ], + dtype="datetime64[ns]", + ), + "date": np.array( + [ + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + datetime.date(1900, 12, 25), + ], + dtype="object", + ), + } + expected = pd.DataFrame.from_dict(data) + + inputfile = os.path.join(dirpath, "TestOrcFile.testDate1900.orc") + got = read_orc(inputfile).iloc[:10] + + tm.assert_equal(expected, got) + + +def test_orc_reader_date_high(dirpath): + data = { + "time": np.array( + [ + "2038-05-05 12:34:56.100000", + "2038-05-05 12:34:56.100100", + "2038-05-05 12:34:56.100200", + "2038-05-05 12:34:56.100300", + "2038-05-05 12:34:56.100400", + "2038-05-05 12:34:56.100500", + "2038-05-05 12:34:56.100600", + "2038-05-05 12:34:56.100700", + "2038-05-05 12:34:56.100800", + "2038-05-05 12:34:56.100900", + ], + dtype="datetime64[ns]", + ), + "date": np.array( + [ + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + datetime.date(2038, 12, 25), + ], + dtype="object", + ), + } + expected = pd.DataFrame.from_dict(data) + + inputfile = os.path.join(dirpath, "TestOrcFile.testDate2038.orc") + got = read_orc(inputfile).iloc[:10] + + tm.assert_equal(expected, got) + + +def test_orc_reader_snappy_compressed(dirpath): + data = { + "int1": np.array( + [ + -1160101563, + 1181413113, + 2065821249, + -267157795, + 172111193, + 1752363137, + 1406072123, + 1911809390, + -1308542224, + -467100286, + ], + dtype="int32", + ), + "string1": np.array( + [ + "f50dcb8", + "382fdaaa", + "90758c6", + "9e8caf3f", + "ee97332b", + "d634da1", + "2bea4396", + "d67d89e8", + "ad71007e", + "e8c82066", + ], + dtype="object", + ), + } + expected = pd.DataFrame.from_dict(data) + + inputfile = os.path.join(dirpath, "TestOrcFile.testSnappy.orc") + got = read_orc(inputfile).iloc[:10] + + tm.assert_equal(expected, got) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py new file mode 100644 index 00000000..0b883e2b --- /dev/null +++ b/pandas/tests/io/test_parquet.py @@ -0,0 +1,723 @@ +""" test parquet compat """ +import datetime +from distutils.version import LooseVersion +from io import BytesIO +import locale +import os +from warnings import catch_warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +import pandas._testing as tm + +from pandas.io.parquet import ( + FastParquetImpl, + PyArrowImpl, + get_engine, + read_parquet, + to_parquet, +) + +try: + import pyarrow # noqa + + _HAVE_PYARROW = True +except ImportError: + _HAVE_PYARROW = False + +try: + import fastparquet # noqa + + _HAVE_FASTPARQUET = True +except ImportError: + _HAVE_FASTPARQUET = False + +pytestmark = pytest.mark.filterwarnings( + "ignore:RangeIndex.* is deprecated:DeprecationWarning" +) + + +# setup engines & skips +@pytest.fixture( + params=[ + pytest.param( + "fastparquet", + marks=pytest.mark.skipif( + not _HAVE_FASTPARQUET, reason="fastparquet is not installed" + ), + ), + pytest.param( + "pyarrow", + marks=pytest.mark.skipif( + not _HAVE_PYARROW, reason="pyarrow is not installed" + ), + ), + ] +) +def engine(request): + return request.param + + +@pytest.fixture +def pa(): + if not _HAVE_PYARROW: + pytest.skip("pyarrow is not installed") + return "pyarrow" + + +@pytest.fixture +def fp(): + if not _HAVE_FASTPARQUET: + pytest.skip("fastparquet is not installed") + return "fastparquet" + + +@pytest.fixture +def df_compat(): + return pd.DataFrame({"A": [1, 2, 3], "B": "foo"}) + + +@pytest.fixture +def df_cross_compat(): + df = pd.DataFrame( + { + "a": list("abc"), + "b": list(range(1, 4)), + # 'c': np.arange(3, 6).astype('u1'), + "d": np.arange(4.0, 7.0, dtype="float64"), + "e": [True, False, True], + "f": pd.date_range("20130101", periods=3), + # 'g': pd.date_range('20130101', periods=3, + # tz='US/Eastern'), + # 'h': pd.date_range('20130101', periods=3, freq='ns') + } + ) + return df + + +@pytest.fixture +def df_full(): + return pd.DataFrame( + { + "string": list("abc"), + "string_with_nan": ["a", np.nan, "c"], + "string_with_none": ["a", None, "c"], + "bytes": [b"foo", b"bar", b"baz"], + "unicode": ["foo", "bar", "baz"], + "int": list(range(1, 4)), + "uint": np.arange(3, 6).astype("u1"), + "float": np.arange(4.0, 7.0, dtype="float64"), + "float_with_nan": [2.0, np.nan, 3.0], + "bool": [True, False, True], + "datetime": pd.date_range("20130101", periods=3), + "datetime_with_nat": [ + pd.Timestamp("20130101"), + pd.NaT, + pd.Timestamp("20130103"), + ], + } + ) + + +def check_round_trip( + df, + engine=None, + path=None, + write_kwargs=None, + read_kwargs=None, + expected=None, + check_names=True, + repeat=2, +): + """Verify parquet serializer and deserializer produce the same results. + + Performs a pandas to disk and disk to pandas round trip, + then compares the 2 resulting DataFrames to verify equality. + + Parameters + ---------- + df: Dataframe + engine: str, optional + 'pyarrow' or 'fastparquet' + path: str, optional + write_kwargs: dict of str:str, optional + read_kwargs: dict of str:str, optional + expected: DataFrame, optional + Expected deserialization result, otherwise will be equal to `df` + check_names: list of str, optional + Closed set of column names to be compared + repeat: int, optional + How many times to repeat the test + """ + + write_kwargs = write_kwargs or {"compression": None} + read_kwargs = read_kwargs or {} + + if expected is None: + expected = df + + if engine: + write_kwargs["engine"] = engine + read_kwargs["engine"] = engine + + def compare(repeat): + for _ in range(repeat): + df.to_parquet(path, **write_kwargs) + with catch_warnings(record=True): + actual = read_parquet(path, **read_kwargs) + + tm.assert_frame_equal(expected, actual, check_names=check_names) + + if path is None: + with tm.ensure_clean() as path: + compare(repeat) + else: + compare(repeat) + + +def test_invalid_engine(df_compat): + with pytest.raises(ValueError): + check_round_trip(df_compat, "foo", "bar") + + +def test_options_py(df_compat, pa): + # use the set option + + with pd.option_context("io.parquet.engine", "pyarrow"): + check_round_trip(df_compat) + + +def test_options_fp(df_compat, fp): + # use the set option + + with pd.option_context("io.parquet.engine", "fastparquet"): + check_round_trip(df_compat) + + +def test_options_auto(df_compat, fp, pa): + # use the set option + + with pd.option_context("io.parquet.engine", "auto"): + check_round_trip(df_compat) + + +def test_options_get_engine(fp, pa): + assert isinstance(get_engine("pyarrow"), PyArrowImpl) + assert isinstance(get_engine("fastparquet"), FastParquetImpl) + + with pd.option_context("io.parquet.engine", "pyarrow"): + assert isinstance(get_engine("auto"), PyArrowImpl) + assert isinstance(get_engine("pyarrow"), PyArrowImpl) + assert isinstance(get_engine("fastparquet"), FastParquetImpl) + + with pd.option_context("io.parquet.engine", "fastparquet"): + assert isinstance(get_engine("auto"), FastParquetImpl) + assert isinstance(get_engine("pyarrow"), PyArrowImpl) + assert isinstance(get_engine("fastparquet"), FastParquetImpl) + + with pd.option_context("io.parquet.engine", "auto"): + assert isinstance(get_engine("auto"), PyArrowImpl) + assert isinstance(get_engine("pyarrow"), PyArrowImpl) + assert isinstance(get_engine("fastparquet"), FastParquetImpl) + + +def test_cross_engine_pa_fp(df_cross_compat, pa, fp): + # cross-compat with differing reading/writing engines + + df = df_cross_compat + with tm.ensure_clean() as path: + df.to_parquet(path, engine=pa, compression=None) + + result = read_parquet(path, engine=fp) + tm.assert_frame_equal(result, df) + + result = read_parquet(path, engine=fp, columns=["a", "d"]) + tm.assert_frame_equal(result, df[["a", "d"]]) + + +def test_cross_engine_fp_pa(df_cross_compat, pa, fp): + # cross-compat with differing reading/writing engines + + if ( + LooseVersion(pyarrow.__version__) < "0.15" + and LooseVersion(pyarrow.__version__) >= "0.13" + ): + pytest.xfail( + "Reading fastparquet with pyarrow in 0.14 fails: " + "https://issues.apache.org/jira/browse/ARROW-6492" + ) + + df = df_cross_compat + with tm.ensure_clean() as path: + df.to_parquet(path, engine=fp, compression=None) + + with catch_warnings(record=True): + result = read_parquet(path, engine=pa) + tm.assert_frame_equal(result, df) + + result = read_parquet(path, engine=pa, columns=["a", "d"]) + tm.assert_frame_equal(result, df[["a", "d"]]) + + +class Base: + def check_error_on_write(self, df, engine, exc): + # check that we are raising the exception on writing + with tm.ensure_clean() as path: + with pytest.raises(exc): + to_parquet(df, path, engine, compression=None) + + +class TestBasic(Base): + def test_error(self, engine): + for obj in [ + pd.Series([1, 2, 3]), + 1, + "foo", + pd.Timestamp("20130101"), + np.array([1, 2, 3]), + ]: + self.check_error_on_write(obj, engine, ValueError) + + def test_columns_dtypes(self, engine): + df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))}) + + # unicode + df.columns = ["foo", "bar"] + check_round_trip(df, engine) + + def test_columns_dtypes_invalid(self, engine): + df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))}) + + # numeric + df.columns = [0, 1] + self.check_error_on_write(df, engine, ValueError) + + # bytes + df.columns = [b"foo", b"bar"] + self.check_error_on_write(df, engine, ValueError) + + # python object + df.columns = [ + datetime.datetime(2011, 1, 1, 0, 0), + datetime.datetime(2011, 1, 1, 1, 1), + ] + self.check_error_on_write(df, engine, ValueError) + + @pytest.mark.parametrize("compression", [None, "gzip", "snappy", "brotli"]) + def test_compression(self, engine, compression): + + if compression == "snappy": + pytest.importorskip("snappy") + + elif compression == "brotli": + pytest.importorskip("brotli") + + df = pd.DataFrame({"A": [1, 2, 3]}) + check_round_trip(df, engine, write_kwargs={"compression": compression}) + + def test_read_columns(self, engine): + # GH18154 + df = pd.DataFrame({"string": list("abc"), "int": list(range(1, 4))}) + + expected = pd.DataFrame({"string": list("abc")}) + check_round_trip( + df, engine, expected=expected, read_kwargs={"columns": ["string"]} + ) + + def test_write_index(self, engine): + check_names = engine != "fastparquet" + + df = pd.DataFrame({"A": [1, 2, 3]}) + check_round_trip(df, engine) + + indexes = [ + [2, 3, 4], + pd.date_range("20130101", periods=3), + list("abc"), + [1, 3, 4], + ] + # non-default index + for index in indexes: + df.index = index + check_round_trip(df, engine, check_names=check_names) + + # index with meta-data + df.index = [0, 1, 2] + df.index.name = "foo" + check_round_trip(df, engine) + + def test_write_multiindex(self, pa): + # Not supported in fastparquet as of 0.1.3 or older pyarrow version + engine = pa + + df = pd.DataFrame({"A": [1, 2, 3]}) + index = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]) + df.index = index + check_round_trip(df, engine) + + def test_write_column_multiindex(self, engine): + # column multi-index + mi_columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]) + df = pd.DataFrame(np.random.randn(4, 3), columns=mi_columns) + self.check_error_on_write(df, engine, ValueError) + + def test_multiindex_with_columns(self, pa): + engine = pa + dates = pd.date_range("01-Jan-2018", "01-Dec-2018", freq="MS") + df = pd.DataFrame(np.random.randn(2 * len(dates), 3), columns=list("ABC")) + index1 = pd.MultiIndex.from_product( + [["Level1", "Level2"], dates], names=["level", "date"] + ) + index2 = index1.copy(names=None) + for index in [index1, index2]: + df.index = index + + check_round_trip(df, engine) + check_round_trip( + df, engine, read_kwargs={"columns": ["A", "B"]}, expected=df[["A", "B"]] + ) + + def test_write_ignoring_index(self, engine): + # ENH 20768 + # Ensure index=False omits the index from the written Parquet file. + df = pd.DataFrame({"a": [1, 2, 3], "b": ["q", "r", "s"]}) + + write_kwargs = {"compression": None, "index": False} + + # Because we're dropping the index, we expect the loaded dataframe to + # have the default integer index. + expected = df.reset_index(drop=True) + + check_round_trip(df, engine, write_kwargs=write_kwargs, expected=expected) + + # Ignore custom index + df = pd.DataFrame( + {"a": [1, 2, 3], "b": ["q", "r", "s"]}, index=["zyx", "wvu", "tsr"] + ) + + check_round_trip(df, engine, write_kwargs=write_kwargs, expected=expected) + + # Ignore multi-indexes as well. + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + df = pd.DataFrame( + {"one": list(range(8)), "two": [-i for i in range(8)]}, index=arrays + ) + + expected = df.reset_index(drop=True) + check_round_trip(df, engine, write_kwargs=write_kwargs, expected=expected) + + +class TestParquetPyArrow(Base): + def test_basic(self, pa, df_full): + + df = df_full + + # additional supported types for pyarrow + df["datetime_tz"] = pd.date_range("20130101", periods=3, tz="Europe/Brussels") + df["bool_with_none"] = [True, None, True] + + check_round_trip(df, pa) + + def test_basic_subset_columns(self, pa, df_full): + # GH18628 + + df = df_full + # additional supported types for pyarrow + df["datetime_tz"] = pd.date_range("20130101", periods=3, tz="Europe/Brussels") + + check_round_trip( + df, + pa, + expected=df[["string", "int"]], + read_kwargs={"columns": ["string", "int"]}, + ) + + def test_duplicate_columns(self, pa): + # not currently able to handle duplicate columns + df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy() + self.check_error_on_write(df, pa, ValueError) + + def test_unsupported(self, pa): + if LooseVersion(pyarrow.__version__) < LooseVersion("0.15.1.dev"): + # period - will be supported using an extension type with pyarrow 1.0 + df = pd.DataFrame({"a": pd.period_range("2013", freq="M", periods=3)}) + # pyarrow 0.11 raises ArrowTypeError + # older pyarrows raise ArrowInvalid + self.check_error_on_write(df, pa, Exception) + + # timedelta + df = pd.DataFrame({"a": pd.timedelta_range("1 day", periods=3)}) + self.check_error_on_write(df, pa, NotImplementedError) + + # mixed python objects + df = pd.DataFrame({"a": ["a", 1, 2.0]}) + # pyarrow 0.11 raises ArrowTypeError + # older pyarrows raise ArrowInvalid + self.check_error_on_write(df, pa, Exception) + + def test_categorical(self, pa): + + # supported in >= 0.7.0 + df = pd.DataFrame() + df["a"] = pd.Categorical(list("abcdef")) + + # test for null, out-of-order values, and unobserved category + df["b"] = pd.Categorical( + ["bar", "foo", "foo", "bar", None, "bar"], + dtype=pd.CategoricalDtype(["foo", "bar", "baz"]), + ) + + # test for ordered flag + df["c"] = pd.Categorical( + ["a", "b", "c", "a", "c", "b"], categories=["b", "c", "d"], ordered=True + ) + + if LooseVersion(pyarrow.__version__) >= LooseVersion("0.15.0"): + check_round_trip(df, pa) + else: + # de-serialized as object for pyarrow < 0.15 + expected = df.astype(object) + check_round_trip(df, pa, expected=expected) + + # GH#33077 2020-03-27 + @pytest.mark.xfail( + locale.getlocale()[0] in ["zh_CN", "it_IT"], + reason="dateutil cannot parse e.g. '五, 27 3月 2020 21:45:38 GMT'", + ) + def test_s3_roundtrip(self, df_compat, s3_resource, pa): + # GH #19134 + check_round_trip(df_compat, pa, path="s3://pandas-test/pyarrow.parquet") + + @tm.network + @td.skip_if_no("pyarrow") + def test_parquet_read_from_url(self, df_compat): + url = ( + "https://raw.githubusercontent.com/pandas-dev/pandas/" + "master/pandas/tests/io/data/parquet/simple.parquet" + ) + df = pd.read_parquet(url) + tm.assert_frame_equal(df, df_compat) + + @td.skip_if_no("pyarrow") + def test_read_file_like_obj_support(self, df_compat): + buffer = BytesIO() + df_compat.to_parquet(buffer) + df_from_buf = pd.read_parquet(buffer) + tm.assert_frame_equal(df_compat, df_from_buf) + + def test_partition_cols_supported(self, pa, df_full): + # GH #23283 + partition_cols = ["bool", "int"] + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet(path, partition_cols=partition_cols, compression=None) + import pyarrow.parquet as pq + + dataset = pq.ParquetDataset(path, validate_schema=False) + assert len(dataset.partitions.partition_names) == 2 + assert dataset.partitions.partition_names == set(partition_cols) + + def test_partition_cols_string(self, pa, df_full): + # GH #27117 + partition_cols = "bool" + partition_cols_list = [partition_cols] + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet(path, partition_cols=partition_cols, compression=None) + import pyarrow.parquet as pq + + dataset = pq.ParquetDataset(path, validate_schema=False) + assert len(dataset.partitions.partition_names) == 1 + assert dataset.partitions.partition_names == set(partition_cols_list) + + def test_empty_dataframe(self, pa): + # GH #27339 + df = pd.DataFrame() + check_round_trip(df, pa) + + def test_write_with_schema(self, pa): + import pyarrow + + df = pd.DataFrame({"x": [0, 1]}) + schema = pyarrow.schema([pyarrow.field("x", type=pyarrow.bool_())]) + out_df = df.astype(bool) + check_round_trip(df, pa, write_kwargs={"schema": schema}, expected=out_df) + + @td.skip_if_no("pyarrow", min_version="0.15.0") + def test_additional_extension_arrays(self, pa): + # test additional ExtensionArrays that are supported through the + # __arrow_array__ protocol + df = pd.DataFrame( + { + "a": pd.Series([1, 2, 3], dtype="Int64"), + "b": pd.Series([1, 2, 3], dtype="UInt32"), + "c": pd.Series(["a", None, "c"], dtype="string"), + } + ) + if LooseVersion(pyarrow.__version__) >= LooseVersion("0.16.0"): + expected = df + else: + # de-serialized as plain int / object + expected = df.assign( + a=df.a.astype("int64"), b=df.b.astype("int64"), c=df.c.astype("object") + ) + check_round_trip(df, pa, expected=expected) + + df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")}) + if LooseVersion(pyarrow.__version__) >= LooseVersion("0.16.0"): + expected = df + else: + # if missing values in integer, currently de-serialized as float + expected = df.assign(a=df.a.astype("float64")) + check_round_trip(df, pa, expected=expected) + + @td.skip_if_no("pyarrow", min_version="0.16.0") + def test_additional_extension_types(self, pa): + # test additional ExtensionArrays that are supported through the + # __arrow_array__ protocol + by defining a custom ExtensionType + df = pd.DataFrame( + { + # Arrow does not yet support struct in writing to Parquet (ARROW-1644) + # "c": pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2), (3, 4)]), + "d": pd.period_range("2012-01-01", periods=3, freq="D"), + } + ) + check_round_trip(df, pa) + + @td.skip_if_no("pyarrow", min_version="0.17") + def test_filter_row_groups(self, pa): + # https://github.com/pandas-dev/pandas/issues/26551 + df = pd.DataFrame({"a": list(range(0, 3))}) + with tm.ensure_clean() as path: + df.to_parquet(path, pa) + result = read_parquet( + path, pa, filters=[("a", "==", 0)], use_legacy_dataset=False + ) + assert len(result) == 1 + + +class TestParquetFastParquet(Base): + @td.skip_if_no("fastparquet", min_version="0.3.2") + def test_basic(self, fp, df_full): + df = df_full + + df["datetime_tz"] = pd.date_range("20130101", periods=3, tz="US/Eastern") + df["timedelta"] = pd.timedelta_range("1 day", periods=3) + check_round_trip(df, fp) + + @pytest.mark.skip(reason="not supported") + def test_duplicate_columns(self, fp): + + # not currently able to handle duplicate columns + df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy() + self.check_error_on_write(df, fp, ValueError) + + def test_bool_with_none(self, fp): + df = pd.DataFrame({"a": [True, None, False]}) + expected = pd.DataFrame({"a": [1.0, np.nan, 0.0]}, dtype="float16") + check_round_trip(df, fp, expected=expected) + + def test_unsupported(self, fp): + + # period + df = pd.DataFrame({"a": pd.period_range("2013", freq="M", periods=3)}) + self.check_error_on_write(df, fp, ValueError) + + # mixed + df = pd.DataFrame({"a": ["a", 1, 2.0]}) + self.check_error_on_write(df, fp, ValueError) + + def test_categorical(self, fp): + df = pd.DataFrame({"a": pd.Categorical(list("abc"))}) + check_round_trip(df, fp) + + def test_filter_row_groups(self, fp): + d = {"a": list(range(0, 3))} + df = pd.DataFrame(d) + with tm.ensure_clean() as path: + df.to_parquet(path, fp, compression=None, row_group_offsets=1) + result = read_parquet(path, fp, filters=[("a", "==", 0)]) + assert len(result) == 1 + + def test_s3_roundtrip(self, df_compat, s3_resource, fp): + # GH #19134 + check_round_trip(df_compat, fp, path="s3://pandas-test/fastparquet.parquet") + + def test_partition_cols_supported(self, fp, df_full): + # GH #23283 + partition_cols = ["bool", "int"] + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet( + path, + engine="fastparquet", + partition_cols=partition_cols, + compression=None, + ) + assert os.path.exists(path) + import fastparquet # noqa: F811 + + actual_partition_cols = fastparquet.ParquetFile(path, False).cats + assert len(actual_partition_cols) == 2 + + def test_partition_cols_string(self, fp, df_full): + # GH #27117 + partition_cols = "bool" + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet( + path, + engine="fastparquet", + partition_cols=partition_cols, + compression=None, + ) + assert os.path.exists(path) + import fastparquet # noqa: F811 + + actual_partition_cols = fastparquet.ParquetFile(path, False).cats + assert len(actual_partition_cols) == 1 + + def test_partition_on_supported(self, fp, df_full): + # GH #23283 + partition_cols = ["bool", "int"] + df = df_full + with tm.ensure_clean_dir() as path: + df.to_parquet( + path, + engine="fastparquet", + compression=None, + partition_on=partition_cols, + ) + assert os.path.exists(path) + import fastparquet # noqa: F811 + + actual_partition_cols = fastparquet.ParquetFile(path, False).cats + assert len(actual_partition_cols) == 2 + + def test_error_on_using_partition_cols_and_partition_on(self, fp, df_full): + # GH #23283 + partition_cols = ["bool", "int"] + df = df_full + with pytest.raises(ValueError): + with tm.ensure_clean_dir() as path: + df.to_parquet( + path, + engine="fastparquet", + compression=None, + partition_on=partition_cols, + partition_cols=partition_cols, + ) + + def test_empty_dataframe(self, fp): + # GH #27339 + df = pd.DataFrame() + expected = df.copy() + expected.index.name = "index" + check_round_trip(df, fp, expected=expected) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py new file mode 100644 index 00000000..7605fae9 --- /dev/null +++ b/pandas/tests/io/test_pickle.py @@ -0,0 +1,498 @@ +""" +manage legacy pickle tests + +How to add pickle tests: + +1. Install pandas version intended to output the pickle. + +2. Execute "generate_legacy_storage_files.py" to create the pickle. +$ python generate_legacy_storage_files.py pickle + +3. Move the created pickle to "data/legacy_pickle/" directory. +""" +import bz2 +import glob +import gzip +import os +import pickle +import shutil +from warnings import catch_warnings, simplefilter +import zipfile + +import pytest + +from pandas.compat import _get_lzma_file, _import_lzma, is_platform_little_endian +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import Index +import pandas._testing as tm + +from pandas.tseries.offsets import Day, MonthEnd + +lzma = _import_lzma() + + +@pytest.fixture(scope="module") +def current_pickle_data(): + # our current version pickle data + from pandas.tests.io.generate_legacy_storage_files import create_pickle_data + + return create_pickle_data() + + +# --------------------- +# comparison functions +# --------------------- +def compare_element(result, expected, typ, version=None): + if isinstance(expected, Index): + tm.assert_index_equal(expected, result) + return + + if typ.startswith("sp_"): + comparator = tm.assert_equal + comparator(result, expected) + elif typ == "timestamp": + if expected is pd.NaT: + assert result is pd.NaT + else: + assert result == expected + assert result.freq == expected.freq + else: + comparator = getattr( + tm, "assert_{typ}_equal".format(typ=typ), tm.assert_almost_equal + ) + comparator(result, expected) + + +def compare(data, vf, version): + + data = pd.read_pickle(vf) + + m = globals() + for typ, dv in data.items(): + for dt, result in dv.items(): + expected = data[typ][dt] + + # use a specific comparator + # if available + comparator = "compare_{typ}_{dt}".format(typ=typ, dt=dt) + + comparator = m.get(comparator, m["compare_element"]) + comparator(result, expected, typ, version) + return data + + +def compare_series_ts(result, expected, typ, version): + # GH 7748 + tm.assert_series_equal(result, expected) + assert result.index.freq == expected.index.freq + assert not result.index.freq.normalize + tm.assert_series_equal(result > 0, expected > 0) + + # GH 9291 + freq = result.index.freq + assert freq + Day(1) == Day(2) + + res = freq + pd.Timedelta(hours=1) + assert isinstance(res, pd.Timedelta) + assert res == pd.Timedelta(days=1, hours=1) + + res = freq + pd.Timedelta(nanoseconds=1) + assert isinstance(res, pd.Timedelta) + assert res == pd.Timedelta(days=1, nanoseconds=1) + + +def compare_series_dt_tz(result, expected, typ, version): + tm.assert_series_equal(result, expected) + + +def compare_series_cat(result, expected, typ, version): + tm.assert_series_equal(result, expected) + + +def compare_frame_dt_mixed_tzs(result, expected, typ, version): + tm.assert_frame_equal(result, expected) + + +def compare_frame_cat_onecol(result, expected, typ, version): + tm.assert_frame_equal(result, expected) + + +def compare_frame_cat_and_float(result, expected, typ, version): + compare_frame_cat_onecol(result, expected, typ, version) + + +def compare_index_period(result, expected, typ, version): + tm.assert_index_equal(result, expected) + assert isinstance(result.freq, MonthEnd) + assert result.freq == MonthEnd() + assert result.freqstr == "M" + tm.assert_index_equal(result.shift(2), expected.shift(2)) + + +files = glob.glob( + os.path.join(os.path.dirname(__file__), "data", "legacy_pickle", "*", "*.pickle") +) + + +@pytest.fixture(params=files) +def legacy_pickle(request, datapath): + return datapath(request.param) + + +# --------------------- +# tests +# --------------------- +def test_pickles(current_pickle_data, legacy_pickle): + if not is_platform_little_endian(): + pytest.skip("known failure on non-little endian") + + version = os.path.basename(os.path.dirname(legacy_pickle)) + with catch_warnings(record=True): + simplefilter("ignore") + compare(current_pickle_data, legacy_pickle, version) + + +def test_round_trip_current(current_pickle_data): + def python_pickler(obj, path): + with open(path, "wb") as fh: + pickle.dump(obj, fh, protocol=-1) + + def python_unpickler(path): + with open(path, "rb") as fh: + fh.seek(0) + return pickle.load(fh) + + data = current_pickle_data + for typ, dv in data.items(): + for dt, expected in dv.items(): + + for writer in [pd.to_pickle, python_pickler]: + if writer is None: + continue + + with tm.ensure_clean() as path: + + # test writing with each pickler + writer(expected, path) + + # test reading with each unpickler + result = pd.read_pickle(path) + compare_element(result, expected, typ) + + result = python_unpickler(path) + compare_element(result, expected, typ) + + +def test_pickle_path_pathlib(): + df = tm.makeDataFrame() + result = tm.round_trip_pathlib(df.to_pickle, pd.read_pickle) + tm.assert_frame_equal(df, result) + + +def test_pickle_path_localpath(): + df = tm.makeDataFrame() + result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle) + tm.assert_frame_equal(df, result) + + +def test_legacy_sparse_warning(datapath): + """ + + Generated with + + >>> df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [0, 0, 1, 1]}).to_sparse() + >>> df.to_pickle("pandas/tests/io/data/pickle/sparseframe-0.20.3.pickle.gz", + ... compression="gzip") + + >>> s = df['B'] + >>> s.to_pickle("pandas/tests/io/data/pickle/sparseseries-0.20.3.pickle.gz", + ... compression="gzip") + """ + with tm.assert_produces_warning(FutureWarning): + simplefilter("ignore", DeprecationWarning) # from boto + pd.read_pickle( + datapath("io", "data", "pickle", "sparseseries-0.20.3.pickle.gz"), + compression="gzip", + ) + + with tm.assert_produces_warning(FutureWarning): + simplefilter("ignore", DeprecationWarning) # from boto + pd.read_pickle( + datapath("io", "data", "pickle", "sparseframe-0.20.3.pickle.gz"), + compression="gzip", + ) + + +# --------------------- +# test pickle compression +# --------------------- + + +@pytest.fixture +def get_random_path(): + return "__{}__.pickle".format(tm.rands(10)) + + +class TestCompression: + + _compression_to_extension = { + None: ".none", + "gzip": ".gz", + "bz2": ".bz2", + "zip": ".zip", + "xz": ".xz", + } + + def compress_file(self, src_path, dest_path, compression): + if compression is None: + shutil.copyfile(src_path, dest_path) + return + + if compression == "gzip": + f = gzip.open(dest_path, "w") + elif compression == "bz2": + f = bz2.BZ2File(dest_path, "w") + elif compression == "zip": + with zipfile.ZipFile(dest_path, "w", compression=zipfile.ZIP_DEFLATED) as f: + f.write(src_path, os.path.basename(src_path)) + elif compression == "xz": + f = _get_lzma_file(lzma)(dest_path, "w") + else: + msg = "Unrecognized compression type: {}".format(compression) + raise ValueError(msg) + + if compression != "zip": + with open(src_path, "rb") as fh, f: + f.write(fh.read()) + + def test_write_explicit(self, compression, get_random_path): + base = get_random_path + path1 = base + ".compressed" + path2 = base + ".raw" + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to compressed file + df.to_pickle(p1, compression=compression) + + # decompress + with tm.decompress_file(p1, compression=compression) as f: + with open(p2, "wb") as fh: + fh.write(f.read()) + + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) + + tm.assert_frame_equal(df, df2) + + @pytest.mark.parametrize("compression", ["", "None", "bad", "7z"]) + def test_write_explicit_bad(self, compression, get_random_path): + with pytest.raises(ValueError, match="Unrecognized compression type"): + with tm.ensure_clean(get_random_path) as path: + df = tm.makeDataFrame() + df.to_pickle(path, compression=compression) + + @pytest.mark.parametrize("ext", ["", ".gz", ".bz2", ".no_compress", ".xz"]) + def test_write_infer(self, ext, get_random_path): + base = get_random_path + path1 = base + ext + path2 = base + ".raw" + compression = None + for c in self._compression_to_extension: + if self._compression_to_extension[c] == ext: + compression = c + break + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to compressed file by inferred compression method + df.to_pickle(p1) + + # decompress + with tm.decompress_file(p1, compression=compression) as f: + with open(p2, "wb") as fh: + fh.write(f.read()) + + # read decompressed file + df2 = pd.read_pickle(p2, compression=None) + + tm.assert_frame_equal(df, df2) + + def test_read_explicit(self, compression, get_random_path): + base = get_random_path + path1 = base + ".raw" + path2 = base + ".compressed" + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to uncompressed file + df.to_pickle(p1, compression=None) + + # compress + self.compress_file(p1, p2, compression=compression) + + # read compressed file + df2 = pd.read_pickle(p2, compression=compression) + + tm.assert_frame_equal(df, df2) + + @pytest.mark.parametrize("ext", ["", ".gz", ".bz2", ".zip", ".no_compress", ".xz"]) + def test_read_infer(self, ext, get_random_path): + base = get_random_path + path1 = base + ".raw" + path2 = base + ext + compression = None + for c in self._compression_to_extension: + if self._compression_to_extension[c] == ext: + compression = c + break + + with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2: + df = tm.makeDataFrame() + + # write to uncompressed file + df.to_pickle(p1, compression=None) + + # compress + self.compress_file(p1, p2, compression=compression) + + # read compressed file by inferred compression method + df2 = pd.read_pickle(p2) + + tm.assert_frame_equal(df, df2) + + +# --------------------- +# test pickle compression +# --------------------- + + +class TestProtocol: + @pytest.mark.parametrize("protocol", [-1, 0, 1, 2]) + def test_read(self, protocol, get_random_path): + with tm.ensure_clean(get_random_path) as path: + df = tm.makeDataFrame() + df.to_pickle(path, protocol=protocol) + df2 = pd.read_pickle(path) + tm.assert_frame_equal(df, df2) + + +@pytest.mark.parametrize( + ["pickle_file", "excols"], + [ + ("test_py27.pkl", pd.Index(["a", "b", "c"])), + ( + "test_mi_py27.pkl", + pd.MultiIndex.from_arrays([["a", "b", "c"], ["A", "B", "C"]]), + ), + ], +) +def test_unicode_decode_error(datapath, pickle_file, excols): + # pickle file written with py27, should be readable without raising + # UnicodeDecodeError, see GH#28645 and GH#31988 + path = datapath("io", "data", "pickle", pickle_file) + df = pd.read_pickle(path) + + # just test the columns are correct since the values are random + tm.assert_index_equal(df.columns, excols) + + +# --------------------- +# tests for buffer I/O +# --------------------- + + +def test_pickle_buffer_roundtrip(): + with tm.ensure_clean() as path: + df = tm.makeDataFrame() + with open(path, "wb") as fh: + df.to_pickle(fh) + with open(path, "rb") as fh: + result = pd.read_pickle(fh) + tm.assert_frame_equal(df, result) + + +# --------------------- +# tests for URL I/O +# --------------------- + + +@pytest.mark.parametrize( + "mockurl", ["http://url.com", "ftp://test.com", "http://gzip.com"] +) +def test_pickle_generalurl_read(monkeypatch, mockurl): + def python_pickler(obj, path): + with open(path, "wb") as fh: + pickle.dump(obj, fh, protocol=-1) + + class MockReadResponse: + def __init__(self, path): + self.file = open(path, "rb") + if "gzip" in path: + self.headers = {"Content-Encoding": "gzip"} + else: + self.headers = {"Content-Encoding": None} + + def read(self): + return self.file.read() + + def close(self): + return self.file.close() + + with tm.ensure_clean() as path: + + def mock_urlopen_read(*args, **kwargs): + return MockReadResponse(path) + + df = tm.makeDataFrame() + python_pickler(df, path) + monkeypatch.setattr("urllib.request.urlopen", mock_urlopen_read) + result = pd.read_pickle(mockurl) + tm.assert_frame_equal(df, result) + + +@td.skip_if_no("gcsfs") +@pytest.mark.parametrize("mockurl", ["gs://gcs.com", "gcs://gcs.com"]) +def test_pickle_gcsurl_roundtrip(monkeypatch, mockurl): + with tm.ensure_clean() as path: + + class MockGCSFileSystem: + def __init__(self, *args, **kwargs): + pass + + def open(self, *args): + mode = args[1] or None + f = open(path, mode) + return f + + monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem) + df = tm.makeDataFrame() + df.to_pickle(mockurl) + result = pd.read_pickle(mockurl) + tm.assert_frame_equal(df, result) + + +@td.skip_if_no("s3fs") +@pytest.mark.parametrize("mockurl", ["s3://s3.com", "s3n://s3.com", "s3a://s3.com"]) +def test_pickle_s3url_roundtrip(monkeypatch, mockurl): + with tm.ensure_clean() as path: + + class MockS3FileSystem: + def __init__(self, *args, **kwargs): + pass + + def open(self, *args): + mode = args[1] or None + f = open(path, mode) + return f + + monkeypatch.setattr("s3fs.S3FileSystem", MockS3FileSystem) + df = tm.makeDataFrame() + df.to_pickle(mockurl) + result = pd.read_pickle(mockurl) + tm.assert_frame_equal(df, result) diff --git a/pandas/tests/io/test_s3.py b/pandas/tests/io/test_s3.py new file mode 100644 index 00000000..04c69795 --- /dev/null +++ b/pandas/tests/io/test_s3.py @@ -0,0 +1,25 @@ +from io import BytesIO + +import pytest + +from pandas import read_csv + +from pandas.io.common import is_s3_url + + +class TestS3URL: + def test_is_s3_url(self): + assert is_s3_url("s3://pandas/somethingelse.com") + assert not is_s3_url("s4://pandas/somethingelse.com") + + +def test_streaming_s3_objects(): + # GH17135 + # botocore gained iteration support in 1.10.47, can now be used in read_* + pytest.importorskip("botocore", minversion="1.10.47") + from botocore.response import StreamingBody + + data = [b"foo,bar,baz\n1,2,3\n4,5,6\n", b"just,the,header\n"] + for el in data: + body = StreamingBody(BytesIO(el), content_length=len(el)) + read_csv(body) diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py new file mode 100644 index 00000000..013f56f8 --- /dev/null +++ b/pandas/tests/io/test_spss.py @@ -0,0 +1,73 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +pyreadstat = pytest.importorskip("pyreadstat") + + +def test_spss_labelled_num(datapath): + # test file from the Haven project (https://haven.tidyverse.org/) + fname = datapath("io", "data", "spss", "labelled-num.sav") + + df = pd.read_spss(fname, convert_categoricals=True) + expected = pd.DataFrame({"VAR00002": "This is one"}, index=[0]) + expected["VAR00002"] = pd.Categorical(expected["VAR00002"]) + tm.assert_frame_equal(df, expected) + + df = pd.read_spss(fname, convert_categoricals=False) + expected = pd.DataFrame({"VAR00002": 1.0}, index=[0]) + tm.assert_frame_equal(df, expected) + + +def test_spss_labelled_num_na(datapath): + # test file from the Haven project (https://haven.tidyverse.org/) + fname = datapath("io", "data", "spss", "labelled-num-na.sav") + + df = pd.read_spss(fname, convert_categoricals=True) + expected = pd.DataFrame({"VAR00002": ["This is one", None]}) + expected["VAR00002"] = pd.Categorical(expected["VAR00002"]) + tm.assert_frame_equal(df, expected) + + df = pd.read_spss(fname, convert_categoricals=False) + expected = pd.DataFrame({"VAR00002": [1.0, np.nan]}) + tm.assert_frame_equal(df, expected) + + +def test_spss_labelled_str(datapath): + # test file from the Haven project (https://haven.tidyverse.org/) + fname = datapath("io", "data", "spss", "labelled-str.sav") + + df = pd.read_spss(fname, convert_categoricals=True) + expected = pd.DataFrame({"gender": ["Male", "Female"]}) + expected["gender"] = pd.Categorical(expected["gender"]) + tm.assert_frame_equal(df, expected) + + df = pd.read_spss(fname, convert_categoricals=False) + expected = pd.DataFrame({"gender": ["M", "F"]}) + tm.assert_frame_equal(df, expected) + + +def test_spss_umlauts(datapath): + # test file from the Haven project (https://haven.tidyverse.org/) + fname = datapath("io", "data", "spss", "umlauts.sav") + + df = pd.read_spss(fname, convert_categoricals=True) + expected = pd.DataFrame( + {"var1": ["the ä umlaut", "the ü umlaut", "the ä umlaut", "the ö umlaut"]} + ) + expected["var1"] = pd.Categorical(expected["var1"]) + tm.assert_frame_equal(df, expected) + + df = pd.read_spss(fname, convert_categoricals=False) + expected = pd.DataFrame({"var1": [1.0, 2.0, 1.0, 3.0]}) + tm.assert_frame_equal(df, expected) + + +def test_spss_usecols(datapath): + # usecols must be list-like + fname = datapath("io", "data", "spss", "labelled-num.sav") + + with pytest.raises(TypeError, match="usecols must be list-like."): + pd.read_spss(fname, usecols="VAR00002") diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py new file mode 100644 index 00000000..d0569cd1 --- /dev/null +++ b/pandas/tests/io/test_sql.py @@ -0,0 +1,2851 @@ +"""SQL io tests + +The SQL tests are broken down in different classes: + +- `PandasSQLTest`: base class with common methods for all test classes +- Tests for the public API (only tests with sqlite3) + - `_TestSQLApi` base class + - `TestSQLApi`: test the public API with sqlalchemy engine + - `TestSQLiteFallbackApi`: test the public API with a sqlite DBAPI + connection +- Tests for the different SQL flavors (flavor specific type conversions) + - Tests for the sqlalchemy mode: `_TestSQLAlchemy` is the base class with + common methods, `_TestSQLAlchemyConn` tests the API with a SQLAlchemy + Connection object. The different tested flavors (sqlite3, MySQL, + PostgreSQL) derive from the base class + - Tests for the fallback mode (`TestSQLiteFallback`) + +""" + +import csv +from datetime import date, datetime, time +from io import StringIO +import sqlite3 +import warnings + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_datetime64_dtype, is_datetime64tz_dtype + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + concat, + date_range, + isna, + to_datetime, + to_timedelta, +) +import pandas._testing as tm + +import pandas.io.sql as sql +from pandas.io.sql import read_sql_query, read_sql_table + +try: + import sqlalchemy + import sqlalchemy.schema + import sqlalchemy.sql.sqltypes as sqltypes + from sqlalchemy.ext import declarative + from sqlalchemy.orm import session as sa_session + + SQLALCHEMY_INSTALLED = True +except ImportError: + SQLALCHEMY_INSTALLED = False + +SQL_STRINGS = { + "create_iris": { + "sqlite": """CREATE TABLE iris ( + "SepalLength" REAL, + "SepalWidth" REAL, + "PetalLength" REAL, + "PetalWidth" REAL, + "Name" TEXT + )""", + "mysql": """CREATE TABLE iris ( + `SepalLength` DOUBLE, + `SepalWidth` DOUBLE, + `PetalLength` DOUBLE, + `PetalWidth` DOUBLE, + `Name` VARCHAR(200) + )""", + "postgresql": """CREATE TABLE iris ( + "SepalLength" DOUBLE PRECISION, + "SepalWidth" DOUBLE PRECISION, + "PetalLength" DOUBLE PRECISION, + "PetalWidth" DOUBLE PRECISION, + "Name" VARCHAR(200) + )""", + }, + "insert_iris": { + "sqlite": """INSERT INTO iris VALUES(?, ?, ?, ?, ?)""", + "mysql": """INSERT INTO iris VALUES(%s, %s, %s, %s, "%s");""", + "postgresql": """INSERT INTO iris VALUES(%s, %s, %s, %s, %s);""", + }, + "create_test_types": { + "sqlite": """CREATE TABLE types_test_data ( + "TextCol" TEXT, + "DateCol" TEXT, + "IntDateCol" INTEGER, + "IntDateOnlyCol" INTEGER, + "FloatCol" REAL, + "IntCol" INTEGER, + "BoolCol" INTEGER, + "IntColWithNull" INTEGER, + "BoolColWithNull" INTEGER + )""", + "mysql": """CREATE TABLE types_test_data ( + `TextCol` TEXT, + `DateCol` DATETIME, + `IntDateCol` INTEGER, + `IntDateOnlyCol` INTEGER, + `FloatCol` DOUBLE, + `IntCol` INTEGER, + `BoolCol` BOOLEAN, + `IntColWithNull` INTEGER, + `BoolColWithNull` BOOLEAN + )""", + "postgresql": """CREATE TABLE types_test_data ( + "TextCol" TEXT, + "DateCol" TIMESTAMP, + "DateColWithTz" TIMESTAMP WITH TIME ZONE, + "IntDateCol" INTEGER, + "IntDateOnlyCol" INTEGER, + "FloatCol" DOUBLE PRECISION, + "IntCol" INTEGER, + "BoolCol" BOOLEAN, + "IntColWithNull" INTEGER, + "BoolColWithNull" BOOLEAN + )""", + }, + "insert_test_types": { + "sqlite": { + "query": """ + INSERT INTO types_test_data + VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + "fields": ( + "TextCol", + "DateCol", + "IntDateCol", + "IntDateOnlyCol", + "FloatCol", + "IntCol", + "BoolCol", + "IntColWithNull", + "BoolColWithNull", + ), + }, + "mysql": { + "query": """ + INSERT INTO types_test_data + VALUES("%s", %s, %s, %s, %s, %s, %s, %s, %s) + """, + "fields": ( + "TextCol", + "DateCol", + "IntDateCol", + "IntDateOnlyCol", + "FloatCol", + "IntCol", + "BoolCol", + "IntColWithNull", + "BoolColWithNull", + ), + }, + "postgresql": { + "query": """ + INSERT INTO types_test_data + VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + """, + "fields": ( + "TextCol", + "DateCol", + "DateColWithTz", + "IntDateCol", + "IntDateOnlyCol", + "FloatCol", + "IntCol", + "BoolCol", + "IntColWithNull", + "BoolColWithNull", + ), + }, + }, + "read_parameters": { + "sqlite": "SELECT * FROM iris WHERE Name=? AND SepalLength=?", + "mysql": 'SELECT * FROM iris WHERE `Name`="%s" AND `SepalLength`=%s', + "postgresql": 'SELECT * FROM iris WHERE "Name"=%s AND "SepalLength"=%s', + }, + "read_named_parameters": { + "sqlite": """ + SELECT * FROM iris WHERE Name=:name AND SepalLength=:length + """, + "mysql": """ + SELECT * FROM iris WHERE + `Name`="%(name)s" AND `SepalLength`=%(length)s + """, + "postgresql": """ + SELECT * FROM iris WHERE + "Name"=%(name)s AND "SepalLength"=%(length)s + """, + }, + "create_view": { + "sqlite": """ + CREATE VIEW iris_view AS + SELECT * FROM iris + """ + }, +} + + +class MixInBase: + def teardown_method(self, method): + # if setup fails, there may not be a connection to close. + if hasattr(self, "conn"): + for tbl in self._get_all_tables(): + self.drop_table(tbl) + self._close_conn() + + +class MySQLMixIn(MixInBase): + def drop_table(self, table_name): + cur = self.conn.cursor() + cur.execute(f"DROP TABLE IF EXISTS {sql._get_valid_mysql_name(table_name)}") + self.conn.commit() + + def _get_all_tables(self): + cur = self.conn.cursor() + cur.execute("SHOW TABLES") + return [table[0] for table in cur.fetchall()] + + def _close_conn(self): + from pymysql.err import Error + + try: + self.conn.close() + except Error: + pass + + +class SQLiteMixIn(MixInBase): + def drop_table(self, table_name): + self.conn.execute( + f"DROP TABLE IF EXISTS {sql._get_valid_sqlite_name(table_name)}" + ) + self.conn.commit() + + def _get_all_tables(self): + c = self.conn.execute("SELECT name FROM sqlite_master WHERE type='table'") + return [table[0] for table in c.fetchall()] + + def _close_conn(self): + self.conn.close() + + +class SQLAlchemyMixIn(MixInBase): + def drop_table(self, table_name): + sql.SQLDatabase(self.conn).drop_table(table_name) + + def _get_all_tables(self): + meta = sqlalchemy.schema.MetaData(bind=self.conn) + meta.reflect() + table_list = meta.tables.keys() + return table_list + + def _close_conn(self): + pass + + +class PandasSQLTest: + """ + Base class with common private methods for SQLAlchemy and fallback cases. + + """ + + def _get_exec(self): + if hasattr(self.conn, "execute"): + return self.conn + else: + return self.conn.cursor() + + @pytest.fixture(params=[("io", "data", "csv", "iris.csv")]) + def load_iris_data(self, datapath, request): + import io + + iris_csv_file = datapath(*request.param) + + if not hasattr(self, "conn"): + self.setup_connect() + + self.drop_table("iris") + self._get_exec().execute(SQL_STRINGS["create_iris"][self.flavor]) + + with io.open(iris_csv_file, mode="r", newline=None) as iris_csv: + r = csv.reader(iris_csv) + next(r) # skip header row + ins = SQL_STRINGS["insert_iris"][self.flavor] + + for row in r: + self._get_exec().execute(ins, row) + + def _load_iris_view(self): + self.drop_table("iris_view") + self._get_exec().execute(SQL_STRINGS["create_view"][self.flavor]) + + def _check_iris_loaded_frame(self, iris_frame): + pytype = iris_frame.dtypes[0].type + row = iris_frame.iloc[0] + + assert issubclass(pytype, np.floating) + tm.equalContents(row.values, [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]) + + def _load_test1_data(self): + columns = ["index", "A", "B", "C", "D"] + data = [ + ( + "2000-01-03 00:00:00", + 0.980268513777, + 3.68573087906, + -0.364216805298, + -1.15973806169, + ), + ( + "2000-01-04 00:00:00", + 1.04791624281, + -0.0412318367011, + -0.16181208307, + 0.212549316967, + ), + ( + "2000-01-05 00:00:00", + 0.498580885705, + 0.731167677815, + -0.537677223318, + 1.34627041952, + ), + ( + "2000-01-06 00:00:00", + 1.12020151869, + 1.56762092543, + 0.00364077397681, + 0.67525259227, + ), + ] + + self.test_frame1 = DataFrame(data, columns=columns) + + def _load_test2_data(self): + df = DataFrame( + dict( + A=[4, 1, 3, 6], + B=["asd", "gsq", "ylt", "jkl"], + C=[1.1, 3.1, 6.9, 5.3], + D=[False, True, True, False], + E=["1990-11-22", "1991-10-26", "1993-11-26", "1995-12-12"], + ) + ) + df["E"] = to_datetime(df["E"]) + + self.test_frame2 = df + + def _load_test3_data(self): + columns = ["index", "A", "B"] + data = [ + ("2000-01-03 00:00:00", 2 ** 31 - 1, -1.987670), + ("2000-01-04 00:00:00", -29, -0.0412318367011), + ("2000-01-05 00:00:00", 20000, 0.731167677815), + ("2000-01-06 00:00:00", -290867, 1.56762092543), + ] + + self.test_frame3 = DataFrame(data, columns=columns) + + def _load_raw_sql(self): + self.drop_table("types_test_data") + self._get_exec().execute(SQL_STRINGS["create_test_types"][self.flavor]) + ins = SQL_STRINGS["insert_test_types"][self.flavor] + data = [ + { + "TextCol": "first", + "DateCol": "2000-01-03 00:00:00", + "DateColWithTz": "2000-01-01 00:00:00-08:00", + "IntDateCol": 535852800, + "IntDateOnlyCol": 20101010, + "FloatCol": 10.10, + "IntCol": 1, + "BoolCol": False, + "IntColWithNull": 1, + "BoolColWithNull": False, + }, + { + "TextCol": "first", + "DateCol": "2000-01-04 00:00:00", + "DateColWithTz": "2000-06-01 00:00:00-07:00", + "IntDateCol": 1356998400, + "IntDateOnlyCol": 20101212, + "FloatCol": 10.10, + "IntCol": 1, + "BoolCol": False, + "IntColWithNull": None, + "BoolColWithNull": None, + }, + ] + + for d in data: + self._get_exec().execute( + ins["query"], [d[field] for field in ins["fields"]] + ) + + def _count_rows(self, table_name): + result = ( + self._get_exec() + .execute(f"SELECT count(*) AS count_1 FROM {table_name}") + .fetchone() + ) + return result[0] + + def _read_sql_iris(self): + iris_frame = self.pandasSQL.read_query("SELECT * FROM iris") + self._check_iris_loaded_frame(iris_frame) + + def _read_sql_iris_parameter(self): + query = SQL_STRINGS["read_parameters"][self.flavor] + params = ["Iris-setosa", 5.1] + iris_frame = self.pandasSQL.read_query(query, params=params) + self._check_iris_loaded_frame(iris_frame) + + def _read_sql_iris_named_parameter(self): + query = SQL_STRINGS["read_named_parameters"][self.flavor] + params = {"name": "Iris-setosa", "length": 5.1} + iris_frame = self.pandasSQL.read_query(query, params=params) + self._check_iris_loaded_frame(iris_frame) + + def _to_sql(self, method=None): + self.drop_table("test_frame1") + + self.pandasSQL.to_sql(self.test_frame1, "test_frame1", method=method) + assert self.pandasSQL.has_table("test_frame1") + + num_entries = len(self.test_frame1) + num_rows = self._count_rows("test_frame1") + assert num_rows == num_entries + + # Nuke table + self.drop_table("test_frame1") + + def _to_sql_empty(self): + self.drop_table("test_frame1") + self.pandasSQL.to_sql(self.test_frame1.iloc[:0], "test_frame1") + + def _to_sql_fail(self): + self.drop_table("test_frame1") + + self.pandasSQL.to_sql(self.test_frame1, "test_frame1", if_exists="fail") + assert self.pandasSQL.has_table("test_frame1") + + msg = "Table 'test_frame1' already exists" + with pytest.raises(ValueError, match=msg): + self.pandasSQL.to_sql(self.test_frame1, "test_frame1", if_exists="fail") + + self.drop_table("test_frame1") + + def _to_sql_replace(self): + self.drop_table("test_frame1") + + self.pandasSQL.to_sql(self.test_frame1, "test_frame1", if_exists="fail") + # Add to table again + self.pandasSQL.to_sql(self.test_frame1, "test_frame1", if_exists="replace") + assert self.pandasSQL.has_table("test_frame1") + + num_entries = len(self.test_frame1) + num_rows = self._count_rows("test_frame1") + + assert num_rows == num_entries + self.drop_table("test_frame1") + + def _to_sql_append(self): + # Nuke table just in case + self.drop_table("test_frame1") + + self.pandasSQL.to_sql(self.test_frame1, "test_frame1", if_exists="fail") + + # Add to table again + self.pandasSQL.to_sql(self.test_frame1, "test_frame1", if_exists="append") + assert self.pandasSQL.has_table("test_frame1") + + num_entries = 2 * len(self.test_frame1) + num_rows = self._count_rows("test_frame1") + + assert num_rows == num_entries + self.drop_table("test_frame1") + + def _to_sql_method_callable(self): + check = [] # used to double check function below is really being used + + def sample(pd_table, conn, keys, data_iter): + check.append(1) + data = [dict(zip(keys, row)) for row in data_iter] + conn.execute(pd_table.table.insert(), data) + + self.drop_table("test_frame1") + + self.pandasSQL.to_sql(self.test_frame1, "test_frame1", method=sample) + assert self.pandasSQL.has_table("test_frame1") + + assert check == [1] + num_entries = len(self.test_frame1) + num_rows = self._count_rows("test_frame1") + assert num_rows == num_entries + # Nuke table + self.drop_table("test_frame1") + + def _roundtrip(self): + self.drop_table("test_frame_roundtrip") + self.pandasSQL.to_sql(self.test_frame1, "test_frame_roundtrip") + result = self.pandasSQL.read_query("SELECT * FROM test_frame_roundtrip") + + result.set_index("level_0", inplace=True) + # result.index.astype(int) + + result.index.name = None + + tm.assert_frame_equal(result, self.test_frame1) + + def _execute_sql(self): + # drop_sql = "DROP TABLE IF EXISTS test" # should already be done + iris_results = self.pandasSQL.execute("SELECT * FROM iris") + row = iris_results.fetchone() + tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]) + + def _to_sql_save_index(self): + df = DataFrame.from_records( + [(1, 2.1, "line1"), (2, 1.5, "line2")], columns=["A", "B", "C"], index=["A"] + ) + self.pandasSQL.to_sql(df, "test_to_sql_saves_index") + ix_cols = self._get_index_columns("test_to_sql_saves_index") + assert ix_cols == [["A"]] + + def _transaction_test(self): + with self.pandasSQL.run_transaction() as trans: + trans.execute("CREATE TABLE test_trans (A INT, B TEXT)") + + class DummyException(Exception): + pass + + # Make sure when transaction is rolled back, no rows get inserted + ins_sql = "INSERT INTO test_trans (A,B) VALUES (1, 'blah')" + try: + with self.pandasSQL.run_transaction() as trans: + trans.execute(ins_sql) + raise DummyException("error") + except DummyException: + # ignore raised exception + pass + res = self.pandasSQL.read_query("SELECT * FROM test_trans") + assert len(res) == 0 + + # Make sure when transaction is committed, rows do get inserted + with self.pandasSQL.run_transaction() as trans: + trans.execute(ins_sql) + res2 = self.pandasSQL.read_query("SELECT * FROM test_trans") + assert len(res2) == 1 + + +# ----------------------------------------------------------------------------- +# -- Testing the public API + + +class _TestSQLApi(PandasSQLTest): + """ + Base class to test the public API. + + From this two classes are derived to run these tests for both the + sqlalchemy mode (`TestSQLApi`) and the fallback mode + (`TestSQLiteFallbackApi`). These tests are run with sqlite3. Specific + tests for the different sql flavours are included in `_TestSQLAlchemy`. + + Notes: + flavor can always be passed even in SQLAlchemy mode, + should be correctly ignored. + + we don't use drop_table because that isn't part of the public api + + """ + + flavor = "sqlite" + mode: str + + def setup_connect(self): + self.conn = self.connect() + + @pytest.fixture(autouse=True) + def setup_method(self, load_iris_data): + self.load_test_data_and_sql() + + def load_test_data_and_sql(self): + self._load_iris_view() + self._load_test1_data() + self._load_test2_data() + self._load_test3_data() + self._load_raw_sql() + + def test_read_sql_iris(self): + iris_frame = sql.read_sql_query("SELECT * FROM iris", self.conn) + self._check_iris_loaded_frame(iris_frame) + + def test_read_sql_view(self): + iris_frame = sql.read_sql_query("SELECT * FROM iris_view", self.conn) + self._check_iris_loaded_frame(iris_frame) + + def test_to_sql(self): + sql.to_sql(self.test_frame1, "test_frame1", self.conn) + assert sql.has_table("test_frame1", self.conn) + + def test_to_sql_fail(self): + sql.to_sql(self.test_frame1, "test_frame2", self.conn, if_exists="fail") + assert sql.has_table("test_frame2", self.conn) + + msg = "Table 'test_frame2' already exists" + with pytest.raises(ValueError, match=msg): + sql.to_sql(self.test_frame1, "test_frame2", self.conn, if_exists="fail") + + def test_to_sql_replace(self): + sql.to_sql(self.test_frame1, "test_frame3", self.conn, if_exists="fail") + # Add to table again + sql.to_sql(self.test_frame1, "test_frame3", self.conn, if_exists="replace") + assert sql.has_table("test_frame3", self.conn) + + num_entries = len(self.test_frame1) + num_rows = self._count_rows("test_frame3") + + assert num_rows == num_entries + + def test_to_sql_append(self): + sql.to_sql(self.test_frame1, "test_frame4", self.conn, if_exists="fail") + + # Add to table again + sql.to_sql(self.test_frame1, "test_frame4", self.conn, if_exists="append") + assert sql.has_table("test_frame4", self.conn) + + num_entries = 2 * len(self.test_frame1) + num_rows = self._count_rows("test_frame4") + + assert num_rows == num_entries + + def test_to_sql_type_mapping(self): + sql.to_sql(self.test_frame3, "test_frame5", self.conn, index=False) + result = sql.read_sql("SELECT * FROM test_frame5", self.conn) + + tm.assert_frame_equal(self.test_frame3, result) + + def test_to_sql_series(self): + s = Series(np.arange(5, dtype="int64"), name="series") + sql.to_sql(s, "test_series", self.conn, index=False) + s2 = sql.read_sql_query("SELECT * FROM test_series", self.conn) + tm.assert_frame_equal(s.to_frame(), s2) + + def test_roundtrip(self): + sql.to_sql(self.test_frame1, "test_frame_roundtrip", con=self.conn) + result = sql.read_sql_query("SELECT * FROM test_frame_roundtrip", con=self.conn) + + # HACK! + result.index = self.test_frame1.index + result.set_index("level_0", inplace=True) + result.index.astype(int) + result.index.name = None + tm.assert_frame_equal(result, self.test_frame1) + + def test_roundtrip_chunksize(self): + sql.to_sql( + self.test_frame1, + "test_frame_roundtrip", + con=self.conn, + index=False, + chunksize=2, + ) + result = sql.read_sql_query("SELECT * FROM test_frame_roundtrip", con=self.conn) + tm.assert_frame_equal(result, self.test_frame1) + + def test_execute_sql(self): + # drop_sql = "DROP TABLE IF EXISTS test" # should already be done + iris_results = sql.execute("SELECT * FROM iris", con=self.conn) + row = iris_results.fetchone() + tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]) + + def test_date_parsing(self): + # Test date parsing in read_sql + # No Parsing + df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn) + assert not issubclass(df.DateCol.dtype.type, np.datetime64) + + df = sql.read_sql_query( + "SELECT * FROM types_test_data", self.conn, parse_dates=["DateCol"] + ) + assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert df.DateCol.tolist() == [ + pd.Timestamp(2000, 1, 3, 0, 0, 0), + pd.Timestamp(2000, 1, 4, 0, 0, 0), + ] + + df = sql.read_sql_query( + "SELECT * FROM types_test_data", + self.conn, + parse_dates={"DateCol": "%Y-%m-%d %H:%M:%S"}, + ) + assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert df.DateCol.tolist() == [ + pd.Timestamp(2000, 1, 3, 0, 0, 0), + pd.Timestamp(2000, 1, 4, 0, 0, 0), + ] + + df = sql.read_sql_query( + "SELECT * FROM types_test_data", self.conn, parse_dates=["IntDateCol"] + ) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + assert df.IntDateCol.tolist() == [ + pd.Timestamp(1986, 12, 25, 0, 0, 0), + pd.Timestamp(2013, 1, 1, 0, 0, 0), + ] + + df = sql.read_sql_query( + "SELECT * FROM types_test_data", self.conn, parse_dates={"IntDateCol": "s"} + ) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + assert df.IntDateCol.tolist() == [ + pd.Timestamp(1986, 12, 25, 0, 0, 0), + pd.Timestamp(2013, 1, 1, 0, 0, 0), + ] + + df = sql.read_sql_query( + "SELECT * FROM types_test_data", + self.conn, + parse_dates={"IntDateOnlyCol": "%Y%m%d"}, + ) + assert issubclass(df.IntDateOnlyCol.dtype.type, np.datetime64) + assert df.IntDateOnlyCol.tolist() == [ + pd.Timestamp("2010-10-10"), + pd.Timestamp("2010-12-12"), + ] + + def test_date_and_index(self): + # Test case where same column appears in parse_date and index_col + + df = sql.read_sql_query( + "SELECT * FROM types_test_data", + self.conn, + index_col="DateCol", + parse_dates=["DateCol", "IntDateCol"], + ) + + assert issubclass(df.index.dtype.type, np.datetime64) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + + def test_timedelta(self): + + # see #6921 + df = to_timedelta(Series(["00:00:01", "00:00:03"], name="foo")).to_frame() + with tm.assert_produces_warning(UserWarning): + df.to_sql("test_timedelta", self.conn) + result = sql.read_sql_query("SELECT * FROM test_timedelta", self.conn) + tm.assert_series_equal(result["foo"], df["foo"].astype("int64")) + + def test_complex_raises(self): + df = DataFrame({"a": [1 + 1j, 2j]}) + msg = "Complex datatypes not supported" + with pytest.raises(ValueError, match=msg): + df.to_sql("test_complex", self.conn) + + @pytest.mark.parametrize( + "index_name,index_label,expected", + [ + # no index name, defaults to 'index' + (None, None, "index"), + # specifying index_label + (None, "other_label", "other_label"), + # using the index name + ("index_name", None, "index_name"), + # has index name, but specifying index_label + ("index_name", "other_label", "other_label"), + # index name is integer + (0, None, "0"), + # index name is None but index label is integer + (None, 0, "0"), + ], + ) + def test_to_sql_index_label(self, index_name, index_label, expected): + temp_frame = DataFrame({"col1": range(4)}) + temp_frame.index.name = index_name + query = "SELECT * FROM test_index_label" + sql.to_sql(temp_frame, "test_index_label", self.conn, index_label=index_label) + frame = sql.read_sql_query(query, self.conn) + assert frame.columns[0] == expected + + def test_to_sql_index_label_multiindex(self): + temp_frame = DataFrame( + {"col1": range(4)}, + index=MultiIndex.from_product([("A0", "A1"), ("B0", "B1")]), + ) + + # no index name, defaults to 'level_0' and 'level_1' + sql.to_sql(temp_frame, "test_index_label", self.conn) + frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn) + assert frame.columns[0] == "level_0" + assert frame.columns[1] == "level_1" + + # specifying index_label + sql.to_sql( + temp_frame, + "test_index_label", + self.conn, + if_exists="replace", + index_label=["A", "B"], + ) + frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn) + assert frame.columns[:2].tolist() == ["A", "B"] + + # using the index name + temp_frame.index.names = ["A", "B"] + sql.to_sql(temp_frame, "test_index_label", self.conn, if_exists="replace") + frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn) + assert frame.columns[:2].tolist() == ["A", "B"] + + # has index name, but specifying index_label + sql.to_sql( + temp_frame, + "test_index_label", + self.conn, + if_exists="replace", + index_label=["C", "D"], + ) + frame = sql.read_sql_query("SELECT * FROM test_index_label", self.conn) + assert frame.columns[:2].tolist() == ["C", "D"] + + msg = "Length of 'index_label' should match number of levels, which is 2" + with pytest.raises(ValueError, match=msg): + sql.to_sql( + temp_frame, + "test_index_label", + self.conn, + if_exists="replace", + index_label="C", + ) + + def test_multiindex_roundtrip(self): + df = DataFrame.from_records( + [(1, 2.1, "line1"), (2, 1.5, "line2")], + columns=["A", "B", "C"], + index=["A", "B"], + ) + + df.to_sql("test_multiindex_roundtrip", self.conn) + result = sql.read_sql_query( + "SELECT * FROM test_multiindex_roundtrip", self.conn, index_col=["A", "B"] + ) + tm.assert_frame_equal(df, result, check_index_type=True) + + def test_integer_col_names(self): + df = DataFrame([[1, 2], [3, 4]], columns=[0, 1]) + sql.to_sql(df, "test_frame_integer_col_names", self.conn, if_exists="replace") + + def test_get_schema(self): + create_sql = sql.get_schema(self.test_frame1, "test", con=self.conn) + assert "CREATE" in create_sql + + def test_get_schema_dtypes(self): + float_frame = DataFrame({"a": [1.1, 1.2], "b": [2.1, 2.2]}) + dtype = sqlalchemy.Integer if self.mode == "sqlalchemy" else "INTEGER" + create_sql = sql.get_schema( + float_frame, "test", con=self.conn, dtype={"b": dtype} + ) + assert "CREATE" in create_sql + assert "INTEGER" in create_sql + + def test_get_schema_keys(self): + frame = DataFrame({"Col1": [1.1, 1.2], "Col2": [2.1, 2.2]}) + create_sql = sql.get_schema(frame, "test", con=self.conn, keys="Col1") + constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("Col1")' + assert constraint_sentence in create_sql + + # multiple columns as key (GH10385) + create_sql = sql.get_schema( + self.test_frame1, "test", con=self.conn, keys=["A", "B"] + ) + constraint_sentence = 'CONSTRAINT test_pk PRIMARY KEY ("A", "B")' + assert constraint_sentence in create_sql + + def test_chunksize_read(self): + df = DataFrame(np.random.randn(22, 5), columns=list("abcde")) + df.to_sql("test_chunksize", self.conn, index=False) + + # reading the query in one time + res1 = sql.read_sql_query("select * from test_chunksize", self.conn) + + # reading the query in chunks with read_sql_query + res2 = DataFrame() + i = 0 + sizes = [5, 5, 5, 5, 2] + + for chunk in sql.read_sql_query( + "select * from test_chunksize", self.conn, chunksize=5 + ): + res2 = concat([res2, chunk], ignore_index=True) + assert len(chunk) == sizes[i] + i += 1 + + tm.assert_frame_equal(res1, res2) + + # reading the query in chunks with read_sql_query + if self.mode == "sqlalchemy": + res3 = DataFrame() + i = 0 + sizes = [5, 5, 5, 5, 2] + + for chunk in sql.read_sql_table("test_chunksize", self.conn, chunksize=5): + res3 = concat([res3, chunk], ignore_index=True) + assert len(chunk) == sizes[i] + i += 1 + + tm.assert_frame_equal(res1, res3) + + def test_categorical(self): + # GH8624 + # test that categorical gets written correctly as dense column + df = DataFrame( + { + "person_id": [1, 2, 3], + "person_name": ["John P. Doe", "Jane Dove", "John P. Doe"], + } + ) + df2 = df.copy() + df2["person_name"] = df2["person_name"].astype("category") + + df2.to_sql("test_categorical", self.conn, index=False) + res = sql.read_sql_query("SELECT * FROM test_categorical", self.conn) + + tm.assert_frame_equal(res, df) + + def test_unicode_column_name(self): + # GH 11431 + df = DataFrame([[1, 2], [3, 4]], columns=["\xe9", "b"]) + df.to_sql("test_unicode", self.conn, index=False) + + def test_escaped_table_name(self): + # GH 13206 + df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) + df.to_sql("d1187b08-4943-4c8d-a7f6", self.conn, index=False) + + res = sql.read_sql_query("SELECT * FROM `d1187b08-4943-4c8d-a7f6`", self.conn) + + tm.assert_frame_equal(res, df) + + +@pytest.mark.single +@pytest.mark.skipif(not SQLALCHEMY_INSTALLED, reason="SQLAlchemy not installed") +class TestSQLApi(SQLAlchemyMixIn, _TestSQLApi): + """ + Test the public API as it would be used directly + + Tests for `read_sql_table` are included here, as this is specific for the + sqlalchemy mode. + + """ + + flavor = "sqlite" + mode = "sqlalchemy" + + def connect(self): + return sqlalchemy.create_engine("sqlite:///:memory:") + + def test_read_table_columns(self): + # test columns argument in read_table + sql.to_sql(self.test_frame1, "test_frame", self.conn) + + cols = ["A", "B"] + result = sql.read_sql_table("test_frame", self.conn, columns=cols) + assert result.columns.tolist() == cols + + def test_read_table_index_col(self): + # test columns argument in read_table + sql.to_sql(self.test_frame1, "test_frame", self.conn) + + result = sql.read_sql_table("test_frame", self.conn, index_col="index") + assert result.index.names == ["index"] + + result = sql.read_sql_table("test_frame", self.conn, index_col=["A", "B"]) + assert result.index.names == ["A", "B"] + + result = sql.read_sql_table( + "test_frame", self.conn, index_col=["A", "B"], columns=["C", "D"] + ) + assert result.index.names == ["A", "B"] + assert result.columns.tolist() == ["C", "D"] + + def test_read_sql_delegate(self): + iris_frame1 = sql.read_sql_query("SELECT * FROM iris", self.conn) + iris_frame2 = sql.read_sql("SELECT * FROM iris", self.conn) + tm.assert_frame_equal(iris_frame1, iris_frame2) + + iris_frame1 = sql.read_sql_table("iris", self.conn) + iris_frame2 = sql.read_sql("iris", self.conn) + tm.assert_frame_equal(iris_frame1, iris_frame2) + + def test_not_reflect_all_tables(self): + # create invalid table + qry = """CREATE TABLE invalid (x INTEGER, y UNKNOWN);""" + self.conn.execute(qry) + qry = """CREATE TABLE other_table (x INTEGER, y INTEGER);""" + self.conn.execute(qry) + + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + # Trigger a warning. + sql.read_sql_table("other_table", self.conn) + sql.read_sql_query("SELECT * FROM other_table", self.conn) + # Verify some things + assert len(w) == 0 + + def test_warning_case_insensitive_table_name(self): + # see gh-7815 + # + # We can't test that this warning is triggered, a the database + # configuration would have to be altered. But here we test that + # the warning is certainly NOT triggered in a normal case. + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + # This should not trigger a Warning + self.test_frame1.to_sql("CaseSensitive", self.conn) + # Verify some things + assert len(w) == 0 + + def _get_index_columns(self, tbl_name): + from sqlalchemy.engine import reflection + + insp = reflection.Inspector.from_engine(self.conn) + ixs = insp.get_indexes("test_index_saved") + ixs = [i["column_names"] for i in ixs] + return ixs + + def test_sqlalchemy_type_mapping(self): + + # Test Timestamp objects (no datetime64 because of timezone) (GH9085) + df = DataFrame( + {"time": to_datetime(["201412120154", "201412110254"], utc=True)} + ) + db = sql.SQLDatabase(self.conn) + table = sql.SQLTable("test_type", db, frame=df) + # GH 9086: TIMESTAMP is the suggested type for datetimes with timezones + assert isinstance(table.table.c["time"].type, sqltypes.TIMESTAMP) + + def test_database_uri_string(self): + + # Test read_sql and .to_sql method with a database URI (GH10654) + test_frame1 = self.test_frame1 + # db_uri = 'sqlite:///:memory:' # raises + # sqlalchemy.exc.OperationalError: (sqlite3.OperationalError) near + # "iris": syntax error [SQL: 'iris'] + with tm.ensure_clean() as name: + db_uri = "sqlite:///" + name + table = "iris" + test_frame1.to_sql(table, db_uri, if_exists="replace", index=False) + test_frame2 = sql.read_sql(table, db_uri) + test_frame3 = sql.read_sql_table(table, db_uri) + query = "SELECT * FROM iris" + test_frame4 = sql.read_sql_query(query, db_uri) + tm.assert_frame_equal(test_frame1, test_frame2) + tm.assert_frame_equal(test_frame1, test_frame3) + tm.assert_frame_equal(test_frame1, test_frame4) + + # using driver that will not be installed on Travis to trigger error + # in sqlalchemy.create_engine -> test passing of this error to user + try: + # the rest of this test depends on pg8000's being absent + import pg8000 # noqa + + pytest.skip("pg8000 is installed") + except ImportError: + pass + + db_uri = "postgresql+pg8000://user:pass@host/dbname" + with pytest.raises(ImportError, match="pg8000"): + sql.read_sql("select * from table", db_uri) + + def _make_iris_table_metadata(self): + sa = sqlalchemy + metadata = sa.MetaData() + iris = sa.Table( + "iris", + metadata, + sa.Column("SepalLength", sa.REAL), + sa.Column("SepalWidth", sa.REAL), + sa.Column("PetalLength", sa.REAL), + sa.Column("PetalWidth", sa.REAL), + sa.Column("Name", sa.TEXT), + ) + + return iris + + def test_query_by_text_obj(self): + # WIP : GH10846 + name_text = sqlalchemy.text("select * from iris where name=:name") + iris_df = sql.read_sql(name_text, self.conn, params={"name": "Iris-versicolor"}) + all_names = set(iris_df["Name"]) + assert all_names == {"Iris-versicolor"} + + def test_query_by_select_obj(self): + # WIP : GH10846 + iris = self._make_iris_table_metadata() + + name_select = sqlalchemy.select([iris]).where( + iris.c.Name == sqlalchemy.bindparam("name") + ) + iris_df = sql.read_sql(name_select, self.conn, params={"name": "Iris-setosa"}) + all_names = set(iris_df["Name"]) + assert all_names == {"Iris-setosa"} + + +class _EngineToConnMixin: + """ + A mixin that causes setup_connect to create a conn rather than an engine. + """ + + @pytest.fixture(autouse=True) + def setup_method(self, load_iris_data): + super().load_test_data_and_sql() + engine = self.conn + conn = engine.connect() + self.__tx = conn.begin() + self.pandasSQL = sql.SQLDatabase(conn) + self.__engine = engine + self.conn = conn + + yield + + self.__tx.rollback() + self.conn.close() + self.conn = self.__engine + self.pandasSQL = sql.SQLDatabase(self.__engine) + # XXX: + # super().teardown_method(method) + + +@pytest.mark.single +class TestSQLApiConn(_EngineToConnMixin, TestSQLApi): + pass + + +@pytest.mark.single +class TestSQLiteFallbackApi(SQLiteMixIn, _TestSQLApi): + """ + Test the public sqlite connection fallback API + + """ + + flavor = "sqlite" + mode = "fallback" + + def connect(self, database=":memory:"): + return sqlite3.connect(database) + + def test_sql_open_close(self): + # Test if the IO in the database still work if the connection closed + # between the writing and reading (as in many real situations). + + with tm.ensure_clean() as name: + + conn = self.connect(name) + sql.to_sql(self.test_frame3, "test_frame3_legacy", conn, index=False) + conn.close() + + conn = self.connect(name) + result = sql.read_sql_query("SELECT * FROM test_frame3_legacy;", conn) + conn.close() + + tm.assert_frame_equal(self.test_frame3, result) + + @pytest.mark.skipif(SQLALCHEMY_INSTALLED, reason="SQLAlchemy is installed") + def test_con_string_import_error(self): + conn = "mysql://root@localhost/pandas_nosetest" + msg = "Using URI string without sqlalchemy installed" + with pytest.raises(ImportError, match=msg): + sql.read_sql("SELECT * FROM iris", conn) + + def test_read_sql_delegate(self): + iris_frame1 = sql.read_sql_query("SELECT * FROM iris", self.conn) + iris_frame2 = sql.read_sql("SELECT * FROM iris", self.conn) + tm.assert_frame_equal(iris_frame1, iris_frame2) + + msg = "Execution failed on sql 'iris': near \"iris\": syntax error" + with pytest.raises(sql.DatabaseError, match=msg): + sql.read_sql("iris", self.conn) + + def test_safe_names_warning(self): + # GH 6798 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b "]) # has a space + # warns on create table with spaces in names + with tm.assert_produces_warning(): + sql.to_sql(df, "test_frame3_legacy", self.conn, index=False) + + def test_get_schema2(self): + # without providing a connection object (available for backwards comp) + create_sql = sql.get_schema(self.test_frame1, "test") + assert "CREATE" in create_sql + + def _get_sqlite_column_type(self, schema, column): + + for col in schema.split("\n"): + if col.split()[0].strip('""') == column: + return col.split()[1] + raise ValueError(f"Column {column} not found") + + def test_sqlite_type_mapping(self): + + # Test Timestamp objects (no datetime64 because of timezone) (GH9085) + df = DataFrame( + {"time": to_datetime(["201412120154", "201412110254"], utc=True)} + ) + db = sql.SQLiteDatabase(self.conn) + table = sql.SQLiteTable("test_type", db, frame=df) + schema = table.sql_schema() + assert self._get_sqlite_column_type(schema, "time") == "TIMESTAMP" + + +# ----------------------------------------------------------------------------- +# -- Database flavor specific tests + + +class _TestSQLAlchemy(SQLAlchemyMixIn, PandasSQLTest): + """ + Base class for testing the sqlalchemy backend. + + Subclasses for specific database types are created below. Tests that + deviate for each flavor are overwritten there. + + """ + + flavor: str + + @pytest.fixture(autouse=True, scope="class") + def setup_class(cls): + cls.setup_import() + cls.setup_driver() + conn = cls.connect() + conn.connect() + + def load_test_data_and_sql(self): + self._load_raw_sql() + self._load_test1_data() + + @pytest.fixture(autouse=True) + def setup_method(self, load_iris_data): + self.load_test_data_and_sql() + + @classmethod + def setup_import(cls): + # Skip this test if SQLAlchemy not available + if not SQLALCHEMY_INSTALLED: + pytest.skip("SQLAlchemy not installed") + + @classmethod + def setup_driver(cls): + raise NotImplementedError() + + @classmethod + def connect(cls): + raise NotImplementedError() + + def setup_connect(self): + try: + self.conn = self.connect() + self.pandasSQL = sql.SQLDatabase(self.conn) + # to test if connection can be made: + self.conn.connect() + except sqlalchemy.exc.OperationalError: + pytest.skip(f"Can't connect to {self.flavor} server") + + def test_read_sql(self): + self._read_sql_iris() + + def test_read_sql_parameter(self): + self._read_sql_iris_parameter() + + def test_read_sql_named_parameter(self): + self._read_sql_iris_named_parameter() + + def test_to_sql(self): + self._to_sql() + + def test_to_sql_empty(self): + self._to_sql_empty() + + def test_to_sql_fail(self): + self._to_sql_fail() + + def test_to_sql_replace(self): + self._to_sql_replace() + + def test_to_sql_append(self): + self._to_sql_append() + + def test_to_sql_method_multi(self): + self._to_sql(method="multi") + + def test_to_sql_method_callable(self): + self._to_sql_method_callable() + + def test_create_table(self): + temp_conn = self.connect() + temp_frame = DataFrame( + {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]} + ) + + pandasSQL = sql.SQLDatabase(temp_conn) + pandasSQL.to_sql(temp_frame, "temp_frame") + + assert temp_conn.has_table("temp_frame") + + def test_drop_table(self): + temp_conn = self.connect() + + temp_frame = DataFrame( + {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]} + ) + + pandasSQL = sql.SQLDatabase(temp_conn) + pandasSQL.to_sql(temp_frame, "temp_frame") + + assert temp_conn.has_table("temp_frame") + + pandasSQL.drop_table("temp_frame") + + assert not temp_conn.has_table("temp_frame") + + def test_roundtrip(self): + self._roundtrip() + + def test_execute_sql(self): + self._execute_sql() + + def test_read_table(self): + iris_frame = sql.read_sql_table("iris", con=self.conn) + self._check_iris_loaded_frame(iris_frame) + + def test_read_table_columns(self): + iris_frame = sql.read_sql_table( + "iris", con=self.conn, columns=["SepalLength", "SepalLength"] + ) + tm.equalContents(iris_frame.columns.values, ["SepalLength", "SepalLength"]) + + def test_read_table_absent_raises(self): + msg = "Table this_doesnt_exist not found" + with pytest.raises(ValueError, match=msg): + sql.read_sql_table("this_doesnt_exist", con=self.conn) + + def test_default_type_conversion(self): + df = sql.read_sql_table("types_test_data", self.conn) + + assert issubclass(df.FloatCol.dtype.type, np.floating) + assert issubclass(df.IntCol.dtype.type, np.integer) + assert issubclass(df.BoolCol.dtype.type, np.bool_) + + # Int column with NA values stays as float + assert issubclass(df.IntColWithNull.dtype.type, np.floating) + # Bool column with NA values becomes object + assert issubclass(df.BoolColWithNull.dtype.type, np.object) + + def test_bigint(self): + # int64 should be converted to BigInteger, GH7433 + df = DataFrame(data={"i64": [2 ** 62]}) + df.to_sql("test_bigint", self.conn, index=False) + result = sql.read_sql_table("test_bigint", self.conn) + + tm.assert_frame_equal(df, result) + + def test_default_date_load(self): + df = sql.read_sql_table("types_test_data", self.conn) + + # IMPORTANT - sqlite has no native date type, so shouldn't parse, but + # MySQL SHOULD be converted. + assert issubclass(df.DateCol.dtype.type, np.datetime64) + + def test_datetime_with_timezone(self): + # edge case that converts postgresql datetime with time zone types + # to datetime64[ns,psycopg2.tz.FixedOffsetTimezone..], which is ok + # but should be more natural, so coerce to datetime64[ns] for now + + def check(col): + # check that a column is either datetime64[ns] + # or datetime64[ns, UTC] + if is_datetime64_dtype(col.dtype): + + # "2000-01-01 00:00:00-08:00" should convert to + # "2000-01-01 08:00:00" + assert col[0] == Timestamp("2000-01-01 08:00:00") + + # "2000-06-01 00:00:00-07:00" should convert to + # "2000-06-01 07:00:00" + assert col[1] == Timestamp("2000-06-01 07:00:00") + + elif is_datetime64tz_dtype(col.dtype): + assert str(col.dt.tz) == "UTC" + + # "2000-01-01 00:00:00-08:00" should convert to + # "2000-01-01 08:00:00" + # "2000-06-01 00:00:00-07:00" should convert to + # "2000-06-01 07:00:00" + # GH 6415 + expected_data = [ + Timestamp("2000-01-01 08:00:00", tz="UTC"), + Timestamp("2000-06-01 07:00:00", tz="UTC"), + ] + expected = Series(expected_data, name=col.name) + tm.assert_series_equal(col, expected) + + else: + raise AssertionError( + f"DateCol loaded with incorrect type -> {col.dtype}" + ) + + # GH11216 + df = pd.read_sql_query("select * from types_test_data", self.conn) + if not hasattr(df, "DateColWithTz"): + pytest.skip("no column with datetime with time zone") + + # this is parsed on Travis (linux), but not on macosx for some reason + # even with the same versions of psycopg2 & sqlalchemy, possibly a + # Postgresql server version difference + col = df.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + + df = pd.read_sql_query( + "select * from types_test_data", self.conn, parse_dates=["DateColWithTz"] + ) + if not hasattr(df, "DateColWithTz"): + pytest.skip("no column with datetime with time zone") + col = df.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + assert str(col.dt.tz) == "UTC" + check(df.DateColWithTz) + + df = pd.concat( + list( + pd.read_sql_query( + "select * from types_test_data", self.conn, chunksize=1 + ) + ), + ignore_index=True, + ) + col = df.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + assert str(col.dt.tz) == "UTC" + expected = sql.read_sql_table("types_test_data", self.conn) + col = expected.DateColWithTz + assert is_datetime64tz_dtype(col.dtype) + tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz) + + # xref #7139 + # this might or might not be converted depending on the postgres driver + df = sql.read_sql_table("types_test_data", self.conn) + check(df.DateColWithTz) + + def test_datetime_with_timezone_roundtrip(self): + # GH 9086 + # Write datetimetz data to a db and read it back + # For dbs that support timestamps with timezones, should get back UTC + # otherwise naive data should be returned + expected = DataFrame( + {"A": date_range("2013-01-01 09:00:00", periods=3, tz="US/Pacific")} + ) + expected.to_sql("test_datetime_tz", self.conn, index=False) + + if self.flavor == "postgresql": + # SQLAlchemy "timezones" (i.e. offsets) are coerced to UTC + expected["A"] = expected["A"].dt.tz_convert("UTC") + else: + # Otherwise, timestamps are returned as local, naive + expected["A"] = expected["A"].dt.tz_localize(None) + + result = sql.read_sql_table("test_datetime_tz", self.conn) + tm.assert_frame_equal(result, expected) + + result = sql.read_sql_query("SELECT * FROM test_datetime_tz", self.conn) + if self.flavor == "sqlite": + # read_sql_query does not return datetime type like read_sql_table + assert isinstance(result.loc[0, "A"], str) + result["A"] = to_datetime(result["A"]) + tm.assert_frame_equal(result, expected) + + def test_naive_datetimeindex_roundtrip(self): + # GH 23510 + # Ensure that a naive DatetimeIndex isn't converted to UTC + dates = date_range("2018-01-01", periods=5, freq="6H") + expected = DataFrame({"nums": range(5)}, index=dates) + expected.to_sql("foo_table", self.conn, index_label="info_date") + result = sql.read_sql_table("foo_table", self.conn, index_col="info_date") + # result index with gain a name from a set_index operation; expected + tm.assert_frame_equal(result, expected, check_names=False) + + def test_date_parsing(self): + # No Parsing + df = sql.read_sql_table("types_test_data", self.conn) + expected_type = object if self.flavor == "sqlite" else np.datetime64 + assert issubclass(df.DateCol.dtype.type, expected_type) + + df = sql.read_sql_table("types_test_data", self.conn, parse_dates=["DateCol"]) + assert issubclass(df.DateCol.dtype.type, np.datetime64) + + df = sql.read_sql_table( + "types_test_data", self.conn, parse_dates={"DateCol": "%Y-%m-%d %H:%M:%S"} + ) + assert issubclass(df.DateCol.dtype.type, np.datetime64) + + df = sql.read_sql_table( + "types_test_data", + self.conn, + parse_dates={"DateCol": {"format": "%Y-%m-%d %H:%M:%S"}}, + ) + assert issubclass(df.DateCol.dtype.type, np.datetime64) + + df = sql.read_sql_table( + "types_test_data", self.conn, parse_dates=["IntDateCol"] + ) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + + df = sql.read_sql_table( + "types_test_data", self.conn, parse_dates={"IntDateCol": "s"} + ) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + + df = sql.read_sql_table( + "types_test_data", self.conn, parse_dates={"IntDateCol": {"unit": "s"}} + ) + assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + + def test_datetime(self): + df = DataFrame( + {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} + ) + df.to_sql("test_datetime", self.conn) + + # with read_table -> type information from schema used + result = sql.read_sql_table("test_datetime", self.conn) + result = result.drop("index", axis=1) + tm.assert_frame_equal(result, df) + + # with read_sql -> no type information -> sqlite has no native + result = sql.read_sql_query("SELECT * FROM test_datetime", self.conn) + result = result.drop("index", axis=1) + if self.flavor == "sqlite": + assert isinstance(result.loc[0, "A"], str) + result["A"] = to_datetime(result["A"]) + tm.assert_frame_equal(result, df) + else: + tm.assert_frame_equal(result, df) + + def test_datetime_NaT(self): + df = DataFrame( + {"A": date_range("2013-01-01 09:00:00", periods=3), "B": np.arange(3.0)} + ) + df.loc[1, "A"] = np.nan + df.to_sql("test_datetime", self.conn, index=False) + + # with read_table -> type information from schema used + result = sql.read_sql_table("test_datetime", self.conn) + tm.assert_frame_equal(result, df) + + # with read_sql -> no type information -> sqlite has no native + result = sql.read_sql_query("SELECT * FROM test_datetime", self.conn) + if self.flavor == "sqlite": + assert isinstance(result.loc[0, "A"], str) + result["A"] = to_datetime(result["A"], errors="coerce") + tm.assert_frame_equal(result, df) + else: + tm.assert_frame_equal(result, df) + + def test_datetime_date(self): + # test support for datetime.date + df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) + df.to_sql("test_date", self.conn, index=False) + res = read_sql_table("test_date", self.conn) + result = res["a"] + expected = to_datetime(df["a"]) + # comes back as datetime64 + tm.assert_series_equal(result, expected) + + def test_datetime_time(self): + # test support for datetime.time + df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) + df.to_sql("test_time", self.conn, index=False) + res = read_sql_table("test_time", self.conn) + tm.assert_frame_equal(res, df) + + # GH8341 + # first, use the fallback to have the sqlite adapter put in place + sqlite_conn = TestSQLiteFallback.connect() + sql.to_sql(df, "test_time2", sqlite_conn, index=False) + res = sql.read_sql_query("SELECT * FROM test_time2", sqlite_conn) + ref = df.applymap(lambda _: _.strftime("%H:%M:%S.%f")) + tm.assert_frame_equal(ref, res) # check if adapter is in place + # then test if sqlalchemy is unaffected by the sqlite adapter + sql.to_sql(df, "test_time3", self.conn, index=False) + if self.flavor == "sqlite": + res = sql.read_sql_query("SELECT * FROM test_time3", self.conn) + ref = df.applymap(lambda _: _.strftime("%H:%M:%S.%f")) + tm.assert_frame_equal(ref, res) + res = sql.read_sql_table("test_time3", self.conn) + tm.assert_frame_equal(df, res) + + def test_mixed_dtype_insert(self): + # see GH6509 + s1 = Series(2 ** 25 + 1, dtype=np.int32) + s2 = Series(0.0, dtype=np.float32) + df = DataFrame({"s1": s1, "s2": s2}) + + # write and read again + df.to_sql("test_read_write", self.conn, index=False) + df2 = sql.read_sql_table("test_read_write", self.conn) + + tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True) + + def test_nan_numeric(self): + # NaNs in numeric float column + df = DataFrame({"A": [0, 1, 2], "B": [0.2, np.nan, 5.6]}) + df.to_sql("test_nan", self.conn, index=False) + + # with read_table + result = sql.read_sql_table("test_nan", self.conn) + tm.assert_frame_equal(result, df) + + # with read_sql + result = sql.read_sql_query("SELECT * FROM test_nan", self.conn) + tm.assert_frame_equal(result, df) + + def test_nan_fullcolumn(self): + # full NaN column (numeric float column) + df = DataFrame({"A": [0, 1, 2], "B": [np.nan, np.nan, np.nan]}) + df.to_sql("test_nan", self.conn, index=False) + + # with read_table + result = sql.read_sql_table("test_nan", self.conn) + tm.assert_frame_equal(result, df) + + # with read_sql -> not type info from table -> stays None + df["B"] = df["B"].astype("object") + df["B"] = None + result = sql.read_sql_query("SELECT * FROM test_nan", self.conn) + tm.assert_frame_equal(result, df) + + def test_nan_string(self): + # NaNs in string column + df = DataFrame({"A": [0, 1, 2], "B": ["a", "b", np.nan]}) + df.to_sql("test_nan", self.conn, index=False) + + # NaNs are coming back as None + df.loc[2, "B"] = None + + # with read_table + result = sql.read_sql_table("test_nan", self.conn) + tm.assert_frame_equal(result, df) + + # with read_sql + result = sql.read_sql_query("SELECT * FROM test_nan", self.conn) + tm.assert_frame_equal(result, df) + + def _get_index_columns(self, tbl_name): + from sqlalchemy.engine import reflection + + insp = reflection.Inspector.from_engine(self.conn) + ixs = insp.get_indexes(tbl_name) + ixs = [i["column_names"] for i in ixs] + return ixs + + def test_to_sql_save_index(self): + self._to_sql_save_index() + + def test_transactions(self): + self._transaction_test() + + def test_get_schema_create_table(self): + # Use a dataframe without a bool column, since MySQL converts bool to + # TINYINT (which read_sql_table returns as an int and causes a dtype + # mismatch) + + self._load_test3_data() + tbl = "test_get_schema_create_table" + create_sql = sql.get_schema(self.test_frame3, tbl, con=self.conn) + blank_test_df = self.test_frame3.iloc[:0] + + self.drop_table(tbl) + self.conn.execute(create_sql) + returned_df = sql.read_sql_table(tbl, self.conn) + tm.assert_frame_equal(returned_df, blank_test_df, check_index_type=False) + self.drop_table(tbl) + + def test_dtype(self): + cols = ["A", "B"] + data = [(0.8, True), (0.9, None)] + df = DataFrame(data, columns=cols) + df.to_sql("dtype_test", self.conn) + df.to_sql("dtype_test2", self.conn, dtype={"B": sqlalchemy.TEXT}) + meta = sqlalchemy.schema.MetaData(bind=self.conn) + meta.reflect() + sqltype = meta.tables["dtype_test2"].columns["B"].type + assert isinstance(sqltype, sqlalchemy.TEXT) + msg = "The type of B is not a SQLAlchemy type" + with pytest.raises(ValueError, match=msg): + df.to_sql("error", self.conn, dtype={"B": str}) + + # GH9083 + df.to_sql("dtype_test3", self.conn, dtype={"B": sqlalchemy.String(10)}) + meta.reflect() + sqltype = meta.tables["dtype_test3"].columns["B"].type + assert isinstance(sqltype, sqlalchemy.String) + assert sqltype.length == 10 + + # single dtype + df.to_sql("single_dtype_test", self.conn, dtype=sqlalchemy.TEXT) + meta = sqlalchemy.schema.MetaData(bind=self.conn) + meta.reflect() + sqltypea = meta.tables["single_dtype_test"].columns["A"].type + sqltypeb = meta.tables["single_dtype_test"].columns["B"].type + assert isinstance(sqltypea, sqlalchemy.TEXT) + assert isinstance(sqltypeb, sqlalchemy.TEXT) + + def test_notna_dtype(self): + cols = { + "Bool": Series([True, None]), + "Date": Series([datetime(2012, 5, 1), None]), + "Int": Series([1, None], dtype="object"), + "Float": Series([1.1, None]), + } + df = DataFrame(cols) + + tbl = "notna_dtype_test" + df.to_sql(tbl, self.conn) + returned_df = sql.read_sql_table(tbl, self.conn) # noqa + meta = sqlalchemy.schema.MetaData(bind=self.conn) + meta.reflect() + if self.flavor == "mysql": + my_type = sqltypes.Integer + else: + my_type = sqltypes.Boolean + + col_dict = meta.tables[tbl].columns + + assert isinstance(col_dict["Bool"].type, my_type) + assert isinstance(col_dict["Date"].type, sqltypes.DateTime) + assert isinstance(col_dict["Int"].type, sqltypes.Integer) + assert isinstance(col_dict["Float"].type, sqltypes.Float) + + def test_double_precision(self): + V = 1.23456789101112131415 + + df = DataFrame( + { + "f32": Series([V], dtype="float32"), + "f64": Series([V], dtype="float64"), + "f64_as_f32": Series([V], dtype="float64"), + "i32": Series([5], dtype="int32"), + "i64": Series([5], dtype="int64"), + } + ) + + df.to_sql( + "test_dtypes", + self.conn, + index=False, + if_exists="replace", + dtype={"f64_as_f32": sqlalchemy.Float(precision=23)}, + ) + res = sql.read_sql_table("test_dtypes", self.conn) + + # check precision of float64 + assert np.round(df["f64"].iloc[0], 14) == np.round(res["f64"].iloc[0], 14) + + # check sql types + meta = sqlalchemy.schema.MetaData(bind=self.conn) + meta.reflect() + col_dict = meta.tables["test_dtypes"].columns + assert str(col_dict["f32"].type) == str(col_dict["f64_as_f32"].type) + assert isinstance(col_dict["f32"].type, sqltypes.Float) + assert isinstance(col_dict["f64"].type, sqltypes.Float) + assert isinstance(col_dict["i32"].type, sqltypes.Integer) + assert isinstance(col_dict["i64"].type, sqltypes.BigInteger) + + def test_connectable_issue_example(self): + # This tests the example raised in issue + # https://github.com/pandas-dev/pandas/issues/10104 + + def foo(connection): + query = "SELECT test_foo_data FROM test_foo_data" + return sql.read_sql_query(query, con=connection) + + def bar(connection, data): + data.to_sql(name="test_foo_data", con=connection, if_exists="append") + + def main(connectable): + with connectable.connect() as conn: + with conn.begin(): + foo_data = conn.run_callable(foo) + conn.run_callable(bar, foo_data) + + DataFrame({"test_foo_data": [0, 1, 2]}).to_sql("test_foo_data", self.conn) + main(self.conn) + + def test_temporary_table(self): + test_data = "Hello, World!" + expected = DataFrame({"spam": [test_data]}) + Base = declarative.declarative_base() + + class Temporary(Base): + __tablename__ = "temp_test" + __table_args__ = {"prefixes": ["TEMPORARY"]} + id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) + spam = sqlalchemy.Column(sqlalchemy.Unicode(30), nullable=False) + + Session = sa_session.sessionmaker(bind=self.conn) + session = Session() + with session.transaction: + conn = session.connection() + Temporary.__table__.create(conn) + session.add(Temporary(spam=test_data)) + session.flush() + df = sql.read_sql_query(sql=sqlalchemy.select([Temporary.spam]), con=conn) + + tm.assert_frame_equal(df, expected) + + +class _TestSQLAlchemyConn(_EngineToConnMixin, _TestSQLAlchemy): + def test_transactions(self): + pytest.skip("Nested transactions rollbacks don't work with Pandas") + + +class _TestSQLiteAlchemy: + """ + Test the sqlalchemy backend against an in-memory sqlite database. + + """ + + flavor = "sqlite" + + @classmethod + def connect(cls): + return sqlalchemy.create_engine("sqlite:///:memory:") + + @classmethod + def setup_driver(cls): + # sqlite3 is built-in + cls.driver = None + + def test_default_type_conversion(self): + df = sql.read_sql_table("types_test_data", self.conn) + + assert issubclass(df.FloatCol.dtype.type, np.floating) + assert issubclass(df.IntCol.dtype.type, np.integer) + + # sqlite has no boolean type, so integer type is returned + assert issubclass(df.BoolCol.dtype.type, np.integer) + + # Int column with NA values stays as float + assert issubclass(df.IntColWithNull.dtype.type, np.floating) + + # Non-native Bool column with NA values stays as float + assert issubclass(df.BoolColWithNull.dtype.type, np.floating) + + def test_default_date_load(self): + df = sql.read_sql_table("types_test_data", self.conn) + + # IMPORTANT - sqlite has no native date type, so shouldn't parse, but + assert not issubclass(df.DateCol.dtype.type, np.datetime64) + + def test_bigint_warning(self): + # test no warning for BIGINT (to support int64) is raised (GH7433) + df = DataFrame({"a": [1, 2]}, dtype="int64") + df.to_sql("test_bigintwarning", self.conn, index=False) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + sql.read_sql_table("test_bigintwarning", self.conn) + assert len(w) == 0 + + +class _TestMySQLAlchemy: + """ + Test the sqlalchemy backend against an MySQL database. + + """ + + flavor = "mysql" + + @classmethod + def connect(cls): + url = "mysql+{driver}://root@localhost/pandas_nosetest" + return sqlalchemy.create_engine( + url.format(driver=cls.driver), connect_args=cls.connect_args + ) + + @classmethod + def setup_driver(cls): + pymysql = pytest.importorskip("pymysql") + cls.driver = "pymysql" + cls.connect_args = {"client_flag": pymysql.constants.CLIENT.MULTI_STATEMENTS} + + def test_default_type_conversion(self): + df = sql.read_sql_table("types_test_data", self.conn) + + assert issubclass(df.FloatCol.dtype.type, np.floating) + assert issubclass(df.IntCol.dtype.type, np.integer) + + # MySQL has no real BOOL type (it's an alias for TINYINT) + assert issubclass(df.BoolCol.dtype.type, np.integer) + + # Int column with NA values stays as float + assert issubclass(df.IntColWithNull.dtype.type, np.floating) + + # Bool column with NA = int column with NA values => becomes float + assert issubclass(df.BoolColWithNull.dtype.type, np.floating) + + def test_read_procedure(self): + import pymysql + + # see GH7324. Although it is more an api test, it is added to the + # mysql tests as sqlite does not have stored procedures + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) + df.to_sql("test_procedure", self.conn, index=False) + + proc = """DROP PROCEDURE IF EXISTS get_testdb; + + CREATE PROCEDURE get_testdb () + + BEGIN + SELECT * FROM test_procedure; + END""" + + connection = self.conn.connect() + trans = connection.begin() + try: + r1 = connection.execute(proc) # noqa + trans.commit() + except pymysql.Error: + trans.rollback() + raise + + res1 = sql.read_sql_query("CALL get_testdb();", self.conn) + tm.assert_frame_equal(df, res1) + + # test delegation to read_sql_query + res2 = sql.read_sql("CALL get_testdb();", self.conn) + tm.assert_frame_equal(df, res2) + + +class _TestPostgreSQLAlchemy: + """ + Test the sqlalchemy backend against an PostgreSQL database. + + """ + + flavor = "postgresql" + + @classmethod + def connect(cls): + url = "postgresql+{driver}://postgres@localhost/pandas_nosetest" + return sqlalchemy.create_engine(url.format(driver=cls.driver)) + + @classmethod + def setup_driver(cls): + pytest.importorskip("psycopg2") + cls.driver = "psycopg2" + + def test_schema_support(self): + # only test this for postgresql (schema's not supported in + # mysql/sqlite) + df = DataFrame({"col1": [1, 2], "col2": [0.1, 0.2], "col3": ["a", "n"]}) + + # create a schema + self.conn.execute("DROP SCHEMA IF EXISTS other CASCADE;") + self.conn.execute("CREATE SCHEMA other;") + + # write dataframe to different schema's + df.to_sql("test_schema_public", self.conn, index=False) + df.to_sql( + "test_schema_public_explicit", self.conn, index=False, schema="public" + ) + df.to_sql("test_schema_other", self.conn, index=False, schema="other") + + # read dataframes back in + res1 = sql.read_sql_table("test_schema_public", self.conn) + tm.assert_frame_equal(df, res1) + res2 = sql.read_sql_table("test_schema_public_explicit", self.conn) + tm.assert_frame_equal(df, res2) + res3 = sql.read_sql_table( + "test_schema_public_explicit", self.conn, schema="public" + ) + tm.assert_frame_equal(df, res3) + res4 = sql.read_sql_table("test_schema_other", self.conn, schema="other") + tm.assert_frame_equal(df, res4) + msg = "Table test_schema_other not found" + with pytest.raises(ValueError, match=msg): + sql.read_sql_table("test_schema_other", self.conn, schema="public") + + # different if_exists options + + # create a schema + self.conn.execute("DROP SCHEMA IF EXISTS other CASCADE;") + self.conn.execute("CREATE SCHEMA other;") + + # write dataframe with different if_exists options + df.to_sql("test_schema_other", self.conn, schema="other", index=False) + df.to_sql( + "test_schema_other", + self.conn, + schema="other", + index=False, + if_exists="replace", + ) + df.to_sql( + "test_schema_other", + self.conn, + schema="other", + index=False, + if_exists="append", + ) + res = sql.read_sql_table("test_schema_other", self.conn, schema="other") + tm.assert_frame_equal(concat([df, df], ignore_index=True), res) + + # specifying schema in user-provided meta + + # The schema won't be applied on another Connection + # because of transactional schemas + if isinstance(self.conn, sqlalchemy.engine.Engine): + engine2 = self.connect() + meta = sqlalchemy.MetaData(engine2, schema="other") + pdsql = sql.SQLDatabase(engine2, meta=meta) + pdsql.to_sql(df, "test_schema_other2", index=False) + pdsql.to_sql(df, "test_schema_other2", index=False, if_exists="replace") + pdsql.to_sql(df, "test_schema_other2", index=False, if_exists="append") + res1 = sql.read_sql_table("test_schema_other2", self.conn, schema="other") + res2 = pdsql.read_table("test_schema_other2") + tm.assert_frame_equal(res1, res2) + + def test_copy_from_callable_insertion_method(self): + # GH 8953 + # Example in io.rst found under _io.sql.method + # not available in sqlite, mysql + def psql_insert_copy(table, conn, keys, data_iter): + # gets a DBAPI connection that can provide a cursor + dbapi_conn = conn.connection + with dbapi_conn.cursor() as cur: + s_buf = StringIO() + writer = csv.writer(s_buf) + writer.writerows(data_iter) + s_buf.seek(0) + + columns = ", ".join(f'"{k}"' for k in keys) + if table.schema: + table_name = f"{table.schema}.{table.name}" + else: + table_name = table.name + + sql_query = f"COPY {table_name} ({columns}) FROM STDIN WITH CSV" + cur.copy_expert(sql=sql_query, file=s_buf) + + expected = DataFrame({"col1": [1, 2], "col2": [0.1, 0.2], "col3": ["a", "n"]}) + expected.to_sql( + "test_copy_insert", self.conn, index=False, method=psql_insert_copy + ) + result = sql.read_sql_table("test_copy_insert", self.conn) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.single +@pytest.mark.db +class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy): + pass + + +@pytest.mark.single +@pytest.mark.db +class TestMySQLAlchemyConn(_TestMySQLAlchemy, _TestSQLAlchemyConn): + pass + + +@pytest.mark.single +@pytest.mark.db +class TestPostgreSQLAlchemy(_TestPostgreSQLAlchemy, _TestSQLAlchemy): + pass + + +@pytest.mark.single +@pytest.mark.db +class TestPostgreSQLAlchemyConn(_TestPostgreSQLAlchemy, _TestSQLAlchemyConn): + pass + + +@pytest.mark.single +class TestSQLiteAlchemy(_TestSQLiteAlchemy, _TestSQLAlchemy): + pass + + +@pytest.mark.single +class TestSQLiteAlchemyConn(_TestSQLiteAlchemy, _TestSQLAlchemyConn): + pass + + +# ----------------------------------------------------------------------------- +# -- Test Sqlite / MySQL fallback + + +@pytest.mark.single +class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest): + """ + Test the fallback mode against an in-memory sqlite database. + + """ + + flavor = "sqlite" + + @classmethod + def connect(cls): + return sqlite3.connect(":memory:") + + def setup_connect(self): + self.conn = self.connect() + + def load_test_data_and_sql(self): + self.pandasSQL = sql.SQLiteDatabase(self.conn) + self._load_test1_data() + + @pytest.fixture(autouse=True) + def setup_method(self, load_iris_data): + self.load_test_data_and_sql() + + def test_read_sql(self): + self._read_sql_iris() + + def test_read_sql_parameter(self): + self._read_sql_iris_parameter() + + def test_read_sql_named_parameter(self): + self._read_sql_iris_named_parameter() + + def test_to_sql(self): + self._to_sql() + + def test_to_sql_empty(self): + self._to_sql_empty() + + def test_to_sql_fail(self): + self._to_sql_fail() + + def test_to_sql_replace(self): + self._to_sql_replace() + + def test_to_sql_append(self): + self._to_sql_append() + + def test_create_and_drop_table(self): + temp_frame = DataFrame( + {"one": [1.0, 2.0, 3.0, 4.0], "two": [4.0, 3.0, 2.0, 1.0]} + ) + + self.pandasSQL.to_sql(temp_frame, "drop_test_frame") + + assert self.pandasSQL.has_table("drop_test_frame") + + self.pandasSQL.drop_table("drop_test_frame") + + assert not self.pandasSQL.has_table("drop_test_frame") + + def test_roundtrip(self): + self._roundtrip() + + def test_execute_sql(self): + self._execute_sql() + + def test_datetime_date(self): + # test support for datetime.date + df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"]) + df.to_sql("test_date", self.conn, index=False) + res = read_sql_query("SELECT * FROM test_date", self.conn) + if self.flavor == "sqlite": + # comes back as strings + tm.assert_frame_equal(res, df.astype(str)) + elif self.flavor == "mysql": + tm.assert_frame_equal(res, df) + + def test_datetime_time(self): + # test support for datetime.time, GH #8341 + df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"]) + df.to_sql("test_time", self.conn, index=False) + res = read_sql_query("SELECT * FROM test_time", self.conn) + if self.flavor == "sqlite": + # comes back as strings + expected = df.applymap(lambda _: _.strftime("%H:%M:%S.%f")) + tm.assert_frame_equal(res, expected) + + def _get_index_columns(self, tbl_name): + ixs = sql.read_sql_query( + "SELECT * FROM sqlite_master WHERE type = 'index' " + + f"AND tbl_name = '{tbl_name}'", + self.conn, + ) + ix_cols = [] + for ix_name in ixs.name: + ix_info = sql.read_sql_query(f"PRAGMA index_info({ix_name})", self.conn) + ix_cols.append(ix_info.name.tolist()) + return ix_cols + + def test_to_sql_save_index(self): + self._to_sql_save_index() + + def test_transactions(self): + self._transaction_test() + + def _get_sqlite_column_type(self, table, column): + recs = self.conn.execute(f"PRAGMA table_info({table})") + for cid, name, ctype, not_null, default, pk in recs: + if name == column: + return ctype + raise ValueError(f"Table {table}, column {column} not found") + + def test_dtype(self): + if self.flavor == "mysql": + pytest.skip("Not applicable to MySQL legacy") + cols = ["A", "B"] + data = [(0.8, True), (0.9, None)] + df = DataFrame(data, columns=cols) + df.to_sql("dtype_test", self.conn) + df.to_sql("dtype_test2", self.conn, dtype={"B": "STRING"}) + + # sqlite stores Boolean values as INTEGER + assert self._get_sqlite_column_type("dtype_test", "B") == "INTEGER" + + assert self._get_sqlite_column_type("dtype_test2", "B") == "STRING" + msg = r"B \(\) not a string" + with pytest.raises(ValueError, match=msg): + df.to_sql("error", self.conn, dtype={"B": bool}) + + # single dtype + df.to_sql("single_dtype_test", self.conn, dtype="STRING") + assert self._get_sqlite_column_type("single_dtype_test", "A") == "STRING" + assert self._get_sqlite_column_type("single_dtype_test", "B") == "STRING" + + def test_notna_dtype(self): + if self.flavor == "mysql": + pytest.skip("Not applicable to MySQL legacy") + + cols = { + "Bool": Series([True, None]), + "Date": Series([datetime(2012, 5, 1), None]), + "Int": Series([1, None], dtype="object"), + "Float": Series([1.1, None]), + } + df = DataFrame(cols) + + tbl = "notna_dtype_test" + df.to_sql(tbl, self.conn) + + assert self._get_sqlite_column_type(tbl, "Bool") == "INTEGER" + assert self._get_sqlite_column_type(tbl, "Date") == "TIMESTAMP" + assert self._get_sqlite_column_type(tbl, "Int") == "INTEGER" + assert self._get_sqlite_column_type(tbl, "Float") == "REAL" + + def test_illegal_names(self): + # For sqlite, these should work fine + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + + msg = "Empty table or column name specified" + with pytest.raises(ValueError, match=msg): + df.to_sql("", self.conn) + + for ndx, weird_name in enumerate( + [ + "test_weird_name]", + "test_weird_name[", + "test_weird_name`", + 'test_weird_name"', + "test_weird_name'", + "_b.test_weird_name_01-30", + '"_b.test_weird_name_01-30"', + "99beginswithnumber", + "12345", + "\xe9", + ] + ): + df.to_sql(weird_name, self.conn) + sql.table_exists(weird_name, self.conn) + + df2 = DataFrame([[1, 2], [3, 4]], columns=["a", weird_name]) + c_tbl = f"test_weird_col_name{ndx:d}" + df2.to_sql(c_tbl, self.conn) + sql.table_exists(c_tbl, self.conn) + + +# ----------------------------------------------------------------------------- +# -- Old tests from 0.13.1 (before refactor using sqlalchemy) + + +def date_format(dt): + """Returns date in YYYYMMDD format.""" + return dt.strftime("%Y%m%d") + + +_formatters = { + datetime: "'{}'".format, + str: "'{}'".format, + np.str_: "'{}'".format, + bytes: "'{}'".format, + float: "{:.8f}".format, + int: "{:d}".format, + type(None): lambda x: "NULL", + np.float64: "{:.10f}".format, + bool: "'{!s}'".format, +} + + +def format_query(sql, *args): + """ + + """ + processed_args = [] + for arg in args: + if isinstance(arg, float) and isna(arg): + arg = None + + formatter = _formatters[type(arg)] + processed_args.append(formatter(arg)) + + return sql % tuple(processed_args) + + +def tquery(query, con=None, cur=None): + """Replace removed sql.tquery function""" + res = sql.execute(query, con=con, cur=cur).fetchall() + if res is None: + return None + else: + return list(res) + + +@pytest.mark.single +class TestXSQLite(SQLiteMixIn): + @pytest.fixture(autouse=True) + def setup_method(self, request, datapath): + self.method = request.function + self.conn = sqlite3.connect(":memory:") + + # In some test cases we may close db connection + # Re-open conn here so we can perform cleanup in teardown + yield + self.method = request.function + self.conn = sqlite3.connect(":memory:") + + def test_basic(self): + frame = tm.makeTimeDataFrame() + self._check_roundtrip(frame) + + def test_write_row_by_row(self): + + frame = tm.makeTimeDataFrame() + frame.iloc[0, 0] = np.nan + create_sql = sql.get_schema(frame, "test") + cur = self.conn.cursor() + cur.execute(create_sql) + + cur = self.conn.cursor() + + ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" + for idx, row in frame.iterrows(): + fmt_sql = format_query(ins, *row) + tquery(fmt_sql, cur=cur) + + self.conn.commit() + + result = sql.read_sql("select * from test", con=self.conn) + result.index = frame.index + tm.assert_frame_equal(result, frame, check_less_precise=True) + + def test_execute(self): + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, "test") + cur = self.conn.cursor() + cur.execute(create_sql) + ins = "INSERT INTO test VALUES (?, ?, ?, ?)" + + row = frame.iloc[0] + sql.execute(ins, self.conn, params=tuple(row)) + self.conn.commit() + + result = sql.read_sql("select * from test", self.conn) + result.index = frame.index[:1] + tm.assert_frame_equal(result, frame[:1]) + + def test_schema(self): + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, "test") + lines = create_sql.splitlines() + for l in lines: + tokens = l.split(" ") + if len(tokens) == 2 and tokens[0] == "A": + assert tokens[1] == "DATETIME" + + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, "test", keys=["A", "B"]) + lines = create_sql.splitlines() + assert 'PRIMARY KEY ("A", "B")' in create_sql + cur = self.conn.cursor() + cur.execute(create_sql) + + def test_execute_fail(self): + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a, b) + ); + """ + cur = self.conn.cursor() + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) + sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.conn) + + with pytest.raises(Exception): + sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn) + + def test_execute_closed_connection(self): + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a, b) + ); + """ + cur = self.conn.cursor() + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) + self.conn.close() + + with pytest.raises(Exception): + tquery("select * from test", con=self.conn) + + def test_na_roundtrip(self): + pass + + def _check_roundtrip(self, frame): + sql.to_sql(frame, name="test_table", con=self.conn, index=False) + result = sql.read_sql("select * from test_table", self.conn) + + # HACK! Change this once indexes are handled properly. + result.index = frame.index + + expected = frame + tm.assert_frame_equal(result, expected) + + frame["txt"] = ["a"] * len(frame) + frame2 = frame.copy() + new_idx = Index(np.arange(len(frame2))) + 10 + frame2["Idx"] = new_idx.copy() + sql.to_sql(frame2, name="test_table2", con=self.conn, index=False) + result = sql.read_sql("select * from test_table2", self.conn, index_col="Idx") + expected = frame.copy() + expected.index = new_idx + expected.index.name = "Idx" + tm.assert_frame_equal(expected, result) + + def test_keyword_as_column_names(self): + df = DataFrame({"From": np.ones(5)}) + sql.to_sql(df, con=self.conn, name="testkeywords", index=False) + + def test_onecolumn_of_integer(self): + # GH 3628 + # a column_of_integers dataframe should transfer well to sql + + mono_df = DataFrame([1, 2], columns=["c0"]) + sql.to_sql(mono_df, con=self.conn, name="mono_df", index=False) + # computing the sum via sql + con_x = self.conn + the_sum = sum(my_c0[0] for my_c0 in con_x.execute("select * from mono_df")) + # it should not fail, and gives 3 ( Issue #3628 ) + assert the_sum == 3 + + result = sql.read_sql("select * from mono_df", con_x) + tm.assert_frame_equal(result, mono_df) + + def test_if_exists(self): + df_if_exists_1 = DataFrame({"col1": [1, 2], "col2": ["A", "B"]}) + df_if_exists_2 = DataFrame({"col1": [3, 4, 5], "col2": ["C", "D", "E"]}) + table_name = "table_if_exists" + sql_select = f"SELECT * FROM {table_name}" + + def clean_up(test_table_to_drop): + """ + Drops tables created from individual tests + so no dependencies arise from sequential tests + """ + self.drop_table(test_table_to_drop) + + msg = "'notvalidvalue' is not valid for if_exists" + with pytest.raises(ValueError, match=msg): + sql.to_sql( + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists="notvalidvalue", + ) + clean_up(table_name) + + # test if_exists='fail' + sql.to_sql( + frame=df_if_exists_1, con=self.conn, name=table_name, if_exists="fail" + ) + msg = "Table 'table_if_exists' already exists" + with pytest.raises(ValueError, match=msg): + sql.to_sql( + frame=df_if_exists_1, con=self.conn, name=table_name, if_exists="fail" + ) + # test if_exists='replace' + sql.to_sql( + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists="replace", + index=False, + ) + assert tquery(sql_select, con=self.conn) == [(1, "A"), (2, "B")] + sql.to_sql( + frame=df_if_exists_2, + con=self.conn, + name=table_name, + if_exists="replace", + index=False, + ) + assert tquery(sql_select, con=self.conn) == [(3, "C"), (4, "D"), (5, "E")] + clean_up(table_name) + + # test if_exists='append' + sql.to_sql( + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists="fail", + index=False, + ) + assert tquery(sql_select, con=self.conn) == [(1, "A"), (2, "B")] + sql.to_sql( + frame=df_if_exists_2, + con=self.conn, + name=table_name, + if_exists="append", + index=False, + ) + assert tquery(sql_select, con=self.conn) == [ + (1, "A"), + (2, "B"), + (3, "C"), + (4, "D"), + (5, "E"), + ] + clean_up(table_name) + + +@pytest.mark.single +@pytest.mark.db +@pytest.mark.skip( + reason="gh-13611: there is no support for MySQL if SQLAlchemy is not installed" +) +class TestXMySQL(MySQLMixIn): + @pytest.fixture(autouse=True, scope="class") + def setup_class(cls): + pymysql = pytest.importorskip("pymysql") + pymysql.connect(host="localhost", user="root", passwd="", db="pandas_nosetest") + try: + pymysql.connect(read_default_group="pandas") + except pymysql.ProgrammingError: + raise RuntimeError( + "Create a group of connection parameters under the heading " + "[pandas] in your system's mysql default file, " + "typically located at ~/.my.cnf or /etc/.my.cnf." + ) + except pymysql.Error: + raise RuntimeError( + "Cannot connect to database. " + "Create a group of connection parameters under the heading " + "[pandas] in your system's mysql default file, " + "typically located at ~/.my.cnf or /etc/.my.cnf." + ) + + @pytest.fixture(autouse=True) + def setup_method(self, request, datapath): + pymysql = pytest.importorskip("pymysql") + pymysql.connect(host="localhost", user="root", passwd="", db="pandas_nosetest") + try: + pymysql.connect(read_default_group="pandas") + except pymysql.ProgrammingError: + raise RuntimeError( + "Create a group of connection parameters under the heading " + "[pandas] in your system's mysql default file, " + "typically located at ~/.my.cnf or /etc/.my.cnf." + ) + except pymysql.Error: + raise RuntimeError( + "Cannot connect to database. " + "Create a group of connection parameters under the heading " + "[pandas] in your system's mysql default file, " + "typically located at ~/.my.cnf or /etc/.my.cnf." + ) + + self.method = request.function + + def test_basic(self): + frame = tm.makeTimeDataFrame() + self._check_roundtrip(frame) + + def test_write_row_by_row(self): + frame = tm.makeTimeDataFrame() + frame.iloc[0, 0] = np.nan + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = sql.get_schema(frame, "test") + cur = self.conn.cursor() + cur.execute(drop_sql) + cur.execute(create_sql) + ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" + for idx, row in frame.iterrows(): + fmt_sql = format_query(ins, *row) + tquery(fmt_sql, cur=cur) + + self.conn.commit() + + result = sql.read_sql("select * from test", con=self.conn) + result.index = frame.index + tm.assert_frame_equal(result, frame, check_less_precise=True) + + def test_chunksize_read_type(self): + frame = tm.makeTimeDataFrame() + frame.index.name = "index" + drop_sql = "DROP TABLE IF EXISTS test" + cur = self.conn.cursor() + cur.execute(drop_sql) + sql.to_sql(frame, name="test", con=self.conn) + query = "select * from test" + chunksize = 5 + chunk_gen = pd.read_sql_query( + sql=query, con=self.conn, chunksize=chunksize, index_col="index" + ) + chunk_df = next(chunk_gen) + tm.assert_frame_equal(frame[:chunksize], chunk_df) + + def test_execute(self): + frame = tm.makeTimeDataFrame() + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = sql.get_schema(frame, "test") + cur = self.conn.cursor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "Unknown table.*") + cur.execute(drop_sql) + cur.execute(create_sql) + ins = "INSERT INTO test VALUES (%s, %s, %s, %s)" + + row = frame.iloc[0].values.tolist() + sql.execute(ins, self.conn, params=tuple(row)) + self.conn.commit() + + result = sql.read_sql("select * from test", self.conn) + result.index = frame.index[:1] + tm.assert_frame_equal(result, frame[:1]) + + def test_schema(self): + frame = tm.makeTimeDataFrame() + create_sql = sql.get_schema(frame, "test") + lines = create_sql.splitlines() + for l in lines: + tokens = l.split(" ") + if len(tokens) == 2 and tokens[0] == "A": + assert tokens[1] == "DATETIME" + + frame = tm.makeTimeDataFrame() + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = sql.get_schema(frame, "test", keys=["A", "B"]) + lines = create_sql.splitlines() + assert "PRIMARY KEY (`A`, `B`)" in create_sql + cur = self.conn.cursor() + cur.execute(drop_sql) + cur.execute(create_sql) + + def test_execute_fail(self): + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a(5), b(5)) + ); + """ + cur = self.conn.cursor() + cur.execute(drop_sql) + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) + sql.execute('INSERT INTO test VALUES("foo", "baz", 2.567)', self.conn) + + with pytest.raises(Exception): + sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn) + + def test_execute_closed_connection(self, request, datapath): + drop_sql = "DROP TABLE IF EXISTS test" + create_sql = """ + CREATE TABLE test + ( + a TEXT, + b TEXT, + c REAL, + PRIMARY KEY (a(5), b(5)) + ); + """ + cur = self.conn.cursor() + cur.execute(drop_sql) + cur.execute(create_sql) + + sql.execute('INSERT INTO test VALUES("foo", "bar", 1.234)', self.conn) + self.conn.close() + + with pytest.raises(Exception): + tquery("select * from test", con=self.conn) + + # Initialize connection again (needed for tearDown) + self.setup_method(request, datapath) + + def test_na_roundtrip(self): + pass + + def _check_roundtrip(self, frame): + drop_sql = "DROP TABLE IF EXISTS test_table" + cur = self.conn.cursor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "Unknown table.*") + cur.execute(drop_sql) + sql.to_sql(frame, name="test_table", con=self.conn, index=False) + result = sql.read_sql("select * from test_table", self.conn) + + # HACK! Change this once indexes are handled properly. + result.index = frame.index + result.index.name = frame.index.name + + expected = frame + tm.assert_frame_equal(result, expected) + + frame["txt"] = ["a"] * len(frame) + frame2 = frame.copy() + index = Index(np.arange(len(frame2))) + 10 + frame2["Idx"] = index + drop_sql = "DROP TABLE IF EXISTS test_table2" + cur = self.conn.cursor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "Unknown table.*") + cur.execute(drop_sql) + sql.to_sql(frame2, name="test_table2", con=self.conn, index=False) + result = sql.read_sql("select * from test_table2", self.conn, index_col="Idx") + expected = frame.copy() + + # HACK! Change this once indexes are handled properly. + expected.index = index + expected.index.names = result.index.names + tm.assert_frame_equal(expected, result) + + def test_keyword_as_column_names(self): + df = DataFrame({"From": np.ones(5)}) + sql.to_sql( + df, con=self.conn, name="testkeywords", if_exists="replace", index=False + ) + + def test_if_exists(self): + df_if_exists_1 = DataFrame({"col1": [1, 2], "col2": ["A", "B"]}) + df_if_exists_2 = DataFrame({"col1": [3, 4, 5], "col2": ["C", "D", "E"]}) + table_name = "table_if_exists" + sql_select = f"SELECT * FROM {table_name}" + + def clean_up(test_table_to_drop): + """ + Drops tables created from individual tests + so no dependencies arise from sequential tests + """ + self.drop_table(test_table_to_drop) + + # test if invalid value for if_exists raises appropriate error + with pytest.raises(ValueError, match=""): + sql.to_sql( + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists="notvalidvalue", + ) + clean_up(table_name) + + # test if_exists='fail' + sql.to_sql( + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists="fail", + index=False, + ) + with pytest.raises(ValueError, match=""): + sql.to_sql( + frame=df_if_exists_1, con=self.conn, name=table_name, if_exists="fail" + ) + + # test if_exists='replace' + sql.to_sql( + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists="replace", + index=False, + ) + assert tquery(sql_select, con=self.conn) == [(1, "A"), (2, "B")] + sql.to_sql( + frame=df_if_exists_2, + con=self.conn, + name=table_name, + if_exists="replace", + index=False, + ) + assert tquery(sql_select, con=self.conn) == [(3, "C"), (4, "D"), (5, "E")] + clean_up(table_name) + + # test if_exists='append' + sql.to_sql( + frame=df_if_exists_1, + con=self.conn, + name=table_name, + if_exists="fail", + index=False, + ) + assert tquery(sql_select, con=self.conn) == [(1, "A"), (2, "B")] + sql.to_sql( + frame=df_if_exists_2, + con=self.conn, + name=table_name, + if_exists="append", + index=False, + ) + assert tquery(sql_select, con=self.conn) == [ + (1, "A"), + (2, "B"), + (3, "C"), + (4, "D"), + (5, "E"), + ] + clean_up(table_name) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py new file mode 100644 index 00000000..8e459f0c --- /dev/null +++ b/pandas/tests/io/test_stata.py @@ -0,0 +1,1818 @@ +import datetime as dt +from datetime import datetime +import gzip +import io +import os +import struct +import warnings + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_categorical_dtype + +import pandas as pd +import pandas._testing as tm +from pandas.core.frame import DataFrame, Series + +from pandas.io.parsers import read_csv +from pandas.io.stata import ( + InvalidColumnName, + PossiblePrecisionLoss, + StataMissingValue, + StataReader, + StataWriterUTF8, + read_stata, +) + + +@pytest.fixture() +def mixed_frame(): + return pd.DataFrame( + { + "a": [1, 2, 3, 4], + "b": [1.0, 3.0, 27.0, 81.0], + "c": ["Atlanta", "Birmingham", "Cincinnati", "Detroit"], + } + ) + + +@pytest.fixture +def dirpath(datapath): + return datapath("io", "data", "stata") + + +@pytest.fixture +def parsed_114(dirpath): + dta14_114 = os.path.join(dirpath, "stata5_114.dta") + parsed_114 = read_stata(dta14_114, convert_dates=True) + parsed_114.index.name = "index" + return parsed_114 + + +class TestStata: + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + self.dirpath = datapath("io", "data", "stata") + self.dta1_114 = os.path.join(self.dirpath, "stata1_114.dta") + self.dta1_117 = os.path.join(self.dirpath, "stata1_117.dta") + + self.dta2_113 = os.path.join(self.dirpath, "stata2_113.dta") + self.dta2_114 = os.path.join(self.dirpath, "stata2_114.dta") + self.dta2_115 = os.path.join(self.dirpath, "stata2_115.dta") + self.dta2_117 = os.path.join(self.dirpath, "stata2_117.dta") + + self.dta3_113 = os.path.join(self.dirpath, "stata3_113.dta") + self.dta3_114 = os.path.join(self.dirpath, "stata3_114.dta") + self.dta3_115 = os.path.join(self.dirpath, "stata3_115.dta") + self.dta3_117 = os.path.join(self.dirpath, "stata3_117.dta") + self.csv3 = os.path.join(self.dirpath, "stata3.csv") + + self.dta4_113 = os.path.join(self.dirpath, "stata4_113.dta") + self.dta4_114 = os.path.join(self.dirpath, "stata4_114.dta") + self.dta4_115 = os.path.join(self.dirpath, "stata4_115.dta") + self.dta4_117 = os.path.join(self.dirpath, "stata4_117.dta") + + self.dta_encoding = os.path.join(self.dirpath, "stata1_encoding.dta") + self.dta_encoding_118 = os.path.join(self.dirpath, "stata1_encoding_118.dta") + + self.csv14 = os.path.join(self.dirpath, "stata5.csv") + self.dta14_113 = os.path.join(self.dirpath, "stata5_113.dta") + self.dta14_114 = os.path.join(self.dirpath, "stata5_114.dta") + self.dta14_115 = os.path.join(self.dirpath, "stata5_115.dta") + self.dta14_117 = os.path.join(self.dirpath, "stata5_117.dta") + + self.csv15 = os.path.join(self.dirpath, "stata6.csv") + self.dta15_113 = os.path.join(self.dirpath, "stata6_113.dta") + self.dta15_114 = os.path.join(self.dirpath, "stata6_114.dta") + self.dta15_115 = os.path.join(self.dirpath, "stata6_115.dta") + self.dta15_117 = os.path.join(self.dirpath, "stata6_117.dta") + + self.dta16_115 = os.path.join(self.dirpath, "stata7_115.dta") + self.dta16_117 = os.path.join(self.dirpath, "stata7_117.dta") + + self.dta17_113 = os.path.join(self.dirpath, "stata8_113.dta") + self.dta17_115 = os.path.join(self.dirpath, "stata8_115.dta") + self.dta17_117 = os.path.join(self.dirpath, "stata8_117.dta") + + self.dta18_115 = os.path.join(self.dirpath, "stata9_115.dta") + self.dta18_117 = os.path.join(self.dirpath, "stata9_117.dta") + + self.dta19_115 = os.path.join(self.dirpath, "stata10_115.dta") + self.dta19_117 = os.path.join(self.dirpath, "stata10_117.dta") + + self.dta20_115 = os.path.join(self.dirpath, "stata11_115.dta") + self.dta20_117 = os.path.join(self.dirpath, "stata11_117.dta") + + self.dta21_117 = os.path.join(self.dirpath, "stata12_117.dta") + + self.dta22_118 = os.path.join(self.dirpath, "stata14_118.dta") + self.dta23 = os.path.join(self.dirpath, "stata15.dta") + + self.dta24_111 = os.path.join(self.dirpath, "stata7_111.dta") + self.dta25_118 = os.path.join(self.dirpath, "stata16_118.dta") + + self.dta26_119 = os.path.join(self.dirpath, "stata1_119.dta.gz") + + self.stata_dates = os.path.join(self.dirpath, "stata13_dates.dta") + + def read_dta(self, file): + # Legacy default reader configuration + return read_stata(file, convert_dates=True) + + def read_csv(self, file): + return read_csv(file, parse_dates=True) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_read_empty_dta(self, version): + empty_ds = DataFrame(columns=["unit"]) + # GH 7369, make sure can read a 0-obs dta file + with tm.ensure_clean() as path: + empty_ds.to_stata(path, write_index=False, version=version) + empty_ds2 = read_stata(path) + tm.assert_frame_equal(empty_ds, empty_ds2) + + @pytest.mark.parametrize("file", ["dta1_114", "dta1_117"]) + def test_read_dta1(self, file): + + file = getattr(self, file) + parsed = self.read_dta(file) + + # Pandas uses np.nan as missing value. + # Thus, all columns will be of type float, regardless of their name. + expected = DataFrame( + [(np.nan, np.nan, np.nan, np.nan, np.nan)], + columns=["float_miss", "double_miss", "byte_miss", "int_miss", "long_miss"], + ) + + # this is an oddity as really the nan should be float64, but + # the casting doesn't fail so need to match stata here + expected["float_miss"] = expected["float_miss"].astype(np.float32) + + tm.assert_frame_equal(parsed, expected) + + def test_read_dta2(self): + + expected = DataFrame.from_records( + [ + ( + datetime(2006, 11, 19, 23, 13, 20), + 1479596223000, + datetime(2010, 1, 20), + datetime(2010, 1, 8), + datetime(2010, 1, 1), + datetime(1974, 7, 1), + datetime(2010, 1, 1), + datetime(2010, 1, 1), + ), + ( + datetime(1959, 12, 31, 20, 3, 20), + -1479590, + datetime(1953, 10, 2), + datetime(1948, 6, 10), + datetime(1955, 1, 1), + datetime(1955, 7, 1), + datetime(1955, 1, 1), + datetime(2, 1, 1), + ), + (pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT, pd.NaT), + ], + columns=[ + "datetime_c", + "datetime_big_c", + "date", + "weekly_date", + "monthly_date", + "quarterly_date", + "half_yearly_date", + "yearly_date", + ], + ) + expected["yearly_date"] = expected["yearly_date"].astype("O") + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + parsed_114 = self.read_dta(self.dta2_114) + parsed_115 = self.read_dta(self.dta2_115) + parsed_117 = self.read_dta(self.dta2_117) + # 113 is buggy due to limits of date format support in Stata + # parsed_113 = self.read_dta(self.dta2_113) + + # Remove resource warnings + w = [x for x in w if x.category is UserWarning] + + # should get warning for each call to read_dta + assert len(w) == 3 + + # buggy test because of the NaT comparison on certain platforms + # Format 113 test fails since it does not support tc and tC formats + # tm.assert_frame_equal(parsed_113, expected) + tm.assert_frame_equal(parsed_114, expected, check_datetimelike_compat=True) + tm.assert_frame_equal(parsed_115, expected, check_datetimelike_compat=True) + tm.assert_frame_equal(parsed_117, expected, check_datetimelike_compat=True) + + @pytest.mark.parametrize("file", ["dta3_113", "dta3_114", "dta3_115", "dta3_117"]) + def test_read_dta3(self, file): + + file = getattr(self, file) + parsed = self.read_dta(file) + + # match stata here + expected = self.read_csv(self.csv3) + expected = expected.astype(np.float32) + expected["year"] = expected["year"].astype(np.int16) + expected["quarter"] = expected["quarter"].astype(np.int8) + + tm.assert_frame_equal(parsed, expected) + + @pytest.mark.parametrize("file", ["dta4_113", "dta4_114", "dta4_115", "dta4_117"]) + def test_read_dta4(self, file): + + file = getattr(self, file) + parsed = self.read_dta(file) + + expected = DataFrame.from_records( + [ + ["one", "ten", "one", "one", "one"], + ["two", "nine", "two", "two", "two"], + ["three", "eight", "three", "three", "three"], + ["four", "seven", 4, "four", "four"], + ["five", "six", 5, np.nan, "five"], + ["six", "five", 6, np.nan, "six"], + ["seven", "four", 7, np.nan, "seven"], + ["eight", "three", 8, np.nan, "eight"], + ["nine", "two", 9, np.nan, "nine"], + ["ten", "one", "ten", np.nan, "ten"], + ], + columns=[ + "fully_labeled", + "fully_labeled2", + "incompletely_labeled", + "labeled_with_missings", + "float_labelled", + ], + ) + + # these are all categoricals + expected = pd.concat( + [expected[col].astype("category") for col in expected], axis=1 + ) + + # stata doesn't save .category metadata + tm.assert_frame_equal(parsed, expected, check_categorical=False) + + # File containing strls + def test_read_dta12(self): + parsed_117 = self.read_dta(self.dta21_117) + expected = DataFrame.from_records( + [ + [1, "abc", "abcdefghi"], + [3, "cba", "qwertywertyqwerty"], + [93, "", "strl"], + ], + columns=["x", "y", "z"], + ) + + tm.assert_frame_equal(parsed_117, expected, check_dtype=False) + + def test_read_dta18(self): + parsed_118 = self.read_dta(self.dta22_118) + parsed_118["Bytes"] = parsed_118["Bytes"].astype("O") + expected = DataFrame.from_records( + [ + ["Cat", "Bogota", "Bogotá", 1, 1.0, "option b Ünicode", 1.0], + ["Dog", "Boston", "Uzunköprü", np.nan, np.nan, np.nan, np.nan], + ["Plane", "Rome", "Tromsø", 0, 0.0, "option a", 0.0], + ["Potato", "Tokyo", "Elâzığ", -4, 4.0, 4, 4], + ["", "", "", 0, 0.3332999, "option a", 1 / 3.0], + ], + columns=[ + "Things", + "Cities", + "Unicode_Cities_Strl", + "Ints", + "Floats", + "Bytes", + "Longs", + ], + ) + expected["Floats"] = expected["Floats"].astype(np.float32) + for col in parsed_118.columns: + tm.assert_almost_equal(parsed_118[col], expected[col]) + + with StataReader(self.dta22_118) as rdr: + vl = rdr.variable_labels() + vl_expected = { + "Unicode_Cities_Strl": "Here are some strls with Ünicode chars", + "Longs": "long data", + "Things": "Here are some things", + "Bytes": "byte data", + "Ints": "int data", + "Cities": "Here are some cities", + "Floats": "float data", + } + tm.assert_dict_equal(vl, vl_expected) + + assert rdr.data_label == "This is a Ünicode data label" + + def test_read_write_dta5(self): + original = DataFrame( + [(np.nan, np.nan, np.nan, np.nan, np.nan)], + columns=["float_miss", "double_miss", "byte_miss", "int_miss", "long_miss"], + ) + original.index.name = "index" + + with tm.ensure_clean() as path: + original.to_stata(path, None) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), original) + + def test_write_dta6(self): + original = self.read_csv(self.csv3) + original.index.name = "index" + original.index = original.index.astype(np.int32) + original["year"] = original["year"].astype(np.int32) + original["quarter"] = original["quarter"].astype(np.int32) + + with tm.ensure_clean() as path: + original.to_stata(path, None) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal( + written_and_read_again.set_index("index"), + original, + check_index_type=False, + ) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_read_write_dta10(self, version): + original = DataFrame( + data=[["string", "object", 1, 1.1, np.datetime64("2003-12-25")]], + columns=["string", "object", "integer", "floating", "datetime"], + ) + original["object"] = Series(original["object"], dtype=object) + original.index.name = "index" + original.index = original.index.astype(np.int32) + original["integer"] = original["integer"].astype(np.int32) + + with tm.ensure_clean() as path: + original.to_stata(path, {"datetime": "tc"}, version=version) + written_and_read_again = self.read_dta(path) + # original.index is np.int32, read index is np.int64 + tm.assert_frame_equal( + written_and_read_again.set_index("index"), + original, + check_index_type=False, + ) + + def test_stata_doc_examples(self): + with tm.ensure_clean() as path: + df = DataFrame(np.random.randn(10, 2), columns=list("AB")) + df.to_stata(path) + + def test_write_preserves_original(self): + # 9795 + np.random.seed(423) + df = pd.DataFrame(np.random.randn(5, 4), columns=list("abcd")) + df.loc[2, "a":"c"] = np.nan + df_copy = df.copy() + with tm.ensure_clean() as path: + df.to_stata(path, write_index=False) + tm.assert_frame_equal(df, df_copy) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_encoding(self, version): + + # GH 4626, proper encoding handling + raw = read_stata(self.dta_encoding) + encoded = read_stata(self.dta_encoding) + result = encoded.kreis1849[0] + + expected = raw.kreis1849[0] + assert result == expected + assert isinstance(result, str) + + with tm.ensure_clean() as path: + encoded.to_stata(path, write_index=False, version=version) + reread_encoded = read_stata(path) + tm.assert_frame_equal(encoded, reread_encoded) + + def test_read_write_dta11(self): + original = DataFrame( + [(1, 2, 3, 4)], + columns=[ + "good", + "b\u00E4d", + "8number", + "astringwithmorethan32characters______", + ], + ) + formatted = DataFrame( + [(1, 2, 3, 4)], + columns=["good", "b_d", "_8number", "astringwithmorethan32characters_"], + ) + formatted.index.name = "index" + formatted = formatted.astype(np.int32) + + with tm.ensure_clean() as path: + with tm.assert_produces_warning(pd.io.stata.InvalidColumnName): + original.to_stata(path, None) + + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_read_write_dta12(self, version): + original = DataFrame( + [(1, 2, 3, 4, 5, 6)], + columns=[ + "astringwithmorethan32characters_1", + "astringwithmorethan32characters_2", + "+", + "-", + "short", + "delete", + ], + ) + formatted = DataFrame( + [(1, 2, 3, 4, 5, 6)], + columns=[ + "astringwithmorethan32characters_", + "_0astringwithmorethan32character", + "_", + "_1_", + "_short", + "_delete", + ], + ) + formatted.index.name = "index" + formatted = formatted.astype(np.int32) + + with tm.ensure_clean() as path: + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always", InvalidColumnName) + original.to_stata(path, None, version=version) + # should get a warning for that format. + assert len(w) == 1 + + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted) + + def test_read_write_dta13(self): + s1 = Series(2 ** 9, dtype=np.int16) + s2 = Series(2 ** 17, dtype=np.int32) + s3 = Series(2 ** 33, dtype=np.int64) + original = DataFrame({"int16": s1, "int32": s2, "int64": s3}) + original.index.name = "index" + + formatted = original + formatted["int64"] = formatted["int64"].astype(np.float64) + + with tm.ensure_clean() as path: + original.to_stata(path) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + @pytest.mark.parametrize( + "file", ["dta14_113", "dta14_114", "dta14_115", "dta14_117"] + ) + def test_read_write_reread_dta14(self, file, parsed_114, version): + file = getattr(self, file) + parsed = self.read_dta(file) + parsed.index.name = "index" + + expected = self.read_csv(self.csv14) + cols = ["byte_", "int_", "long_", "float_", "double_"] + for col in cols: + expected[col] = expected[col]._convert(datetime=True, numeric=True) + expected["float_"] = expected["float_"].astype(np.float32) + expected["date_td"] = pd.to_datetime(expected["date_td"], errors="coerce") + + tm.assert_frame_equal(parsed_114, parsed) + + with tm.ensure_clean() as path: + parsed_114.to_stata(path, {"date_td": "td"}, version=version) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), parsed_114) + + @pytest.mark.parametrize( + "file", ["dta15_113", "dta15_114", "dta15_115", "dta15_117"] + ) + def test_read_write_reread_dta15(self, file): + + expected = self.read_csv(self.csv15) + expected["byte_"] = expected["byte_"].astype(np.int8) + expected["int_"] = expected["int_"].astype(np.int16) + expected["long_"] = expected["long_"].astype(np.int32) + expected["float_"] = expected["float_"].astype(np.float32) + expected["double_"] = expected["double_"].astype(np.float64) + expected["date_td"] = expected["date_td"].apply( + datetime.strptime, args=("%Y-%m-%d",) + ) + + file = getattr(self, file) + parsed = self.read_dta(file) + + tm.assert_frame_equal(expected, parsed) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_timestamp_and_label(self, version): + original = DataFrame([(1,)], columns=["variable"]) + time_stamp = datetime(2000, 2, 29, 14, 21) + data_label = "This is a data file." + with tm.ensure_clean() as path: + original.to_stata( + path, time_stamp=time_stamp, data_label=data_label, version=version + ) + + with StataReader(path) as reader: + assert reader.time_stamp == "29 Feb 2000 14:21" + assert reader.data_label == data_label + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_invalid_timestamp(self, version): + original = DataFrame([(1,)], columns=["variable"]) + time_stamp = "01 Jan 2000, 00:00:00" + with tm.ensure_clean() as path: + msg = "time_stamp should be datetime type" + with pytest.raises(ValueError, match=msg): + original.to_stata(path, time_stamp=time_stamp, version=version) + + def test_numeric_column_names(self): + original = DataFrame(np.reshape(np.arange(25.0), (5, 5))) + original.index.name = "index" + with tm.ensure_clean() as path: + # should get a warning for that format. + with tm.assert_produces_warning(InvalidColumnName): + original.to_stata(path) + + written_and_read_again = self.read_dta(path) + written_and_read_again = written_and_read_again.set_index("index") + columns = list(written_and_read_again.columns) + convert_col_name = lambda x: int(x[1]) + written_and_read_again.columns = map(convert_col_name, columns) + tm.assert_frame_equal(original, written_and_read_again) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + def test_nan_to_missing_value(self, version): + s1 = Series(np.arange(4.0), dtype=np.float32) + s2 = Series(np.arange(4.0), dtype=np.float64) + s1[::2] = np.nan + s2[1::2] = np.nan + original = DataFrame({"s1": s1, "s2": s2}) + original.index.name = "index" + with tm.ensure_clean() as path: + original.to_stata(path, version=version) + written_and_read_again = self.read_dta(path) + written_and_read_again = written_and_read_again.set_index("index") + tm.assert_frame_equal(written_and_read_again, original) + + def test_no_index(self): + columns = ["x", "y"] + original = DataFrame(np.reshape(np.arange(10.0), (5, 2)), columns=columns) + original.index.name = "index_not_written" + with tm.ensure_clean() as path: + original.to_stata(path, write_index=False) + written_and_read_again = self.read_dta(path) + with pytest.raises(KeyError, match=original.index.name): + written_and_read_again["index_not_written"] + + def test_string_no_dates(self): + s1 = Series(["a", "A longer string"]) + s2 = Series([1.0, 2.0], dtype=np.float64) + original = DataFrame({"s1": s1, "s2": s2}) + original.index.name = "index" + with tm.ensure_clean() as path: + original.to_stata(path) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), original) + + def test_large_value_conversion(self): + s0 = Series([1, 99], dtype=np.int8) + s1 = Series([1, 127], dtype=np.int8) + s2 = Series([1, 2 ** 15 - 1], dtype=np.int16) + s3 = Series([1, 2 ** 63 - 1], dtype=np.int64) + original = DataFrame({"s0": s0, "s1": s1, "s2": s2, "s3": s3}) + original.index.name = "index" + with tm.ensure_clean() as path: + with tm.assert_produces_warning(PossiblePrecisionLoss): + original.to_stata(path) + + written_and_read_again = self.read_dta(path) + modified = original.copy() + modified["s1"] = Series(modified["s1"], dtype=np.int16) + modified["s2"] = Series(modified["s2"], dtype=np.int32) + modified["s3"] = Series(modified["s3"], dtype=np.float64) + tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) + + def test_dates_invalid_column(self): + original = DataFrame([datetime(2006, 11, 19, 23, 13, 20)]) + original.index.name = "index" + with tm.ensure_clean() as path: + with tm.assert_produces_warning(InvalidColumnName): + original.to_stata(path, {0: "tc"}) + + written_and_read_again = self.read_dta(path) + modified = original.copy() + modified.columns = ["_0"] + tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) + + def test_105(self): + # Data obtained from: + # http://go.worldbank.org/ZXY29PVJ21 + dpath = os.path.join(self.dirpath, "S4_EDUC1.dta") + df = pd.read_stata(dpath) + df0 = [[1, 1, 3, -2], [2, 1, 2, -2], [4, 1, 1, -2]] + df0 = pd.DataFrame(df0) + df0.columns = ["clustnum", "pri_schl", "psch_num", "psch_dis"] + df0["clustnum"] = df0["clustnum"].astype(np.int16) + df0["pri_schl"] = df0["pri_schl"].astype(np.int8) + df0["psch_num"] = df0["psch_num"].astype(np.int8) + df0["psch_dis"] = df0["psch_dis"].astype(np.float32) + tm.assert_frame_equal(df.head(3), df0) + + def test_value_labels_old_format(self): + # GH 19417 + # + # Test that value_labels() returns an empty dict if the file format + # predates supporting value labels. + dpath = os.path.join(self.dirpath, "S4_EDUC1.dta") + reader = StataReader(dpath) + assert reader.value_labels() == {} + reader.close() + + def test_date_export_formats(self): + columns = ["tc", "td", "tw", "tm", "tq", "th", "ty"] + conversions = {c: c for c in columns} + data = [datetime(2006, 11, 20, 23, 13, 20)] * len(columns) + original = DataFrame([data], columns=columns) + original.index.name = "index" + expected_values = [ + datetime(2006, 11, 20, 23, 13, 20), # Time + datetime(2006, 11, 20), # Day + datetime(2006, 11, 19), # Week + datetime(2006, 11, 1), # Month + datetime(2006, 10, 1), # Quarter year + datetime(2006, 7, 1), # Half year + datetime(2006, 1, 1), + ] # Year + + expected = DataFrame([expected_values], columns=columns) + expected.index.name = "index" + with tm.ensure_clean() as path: + original.to_stata(path, conversions) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) + + def test_write_missing_strings(self): + original = DataFrame([["1"], [None]], columns=["foo"]) + expected = DataFrame([["1"], [""]], columns=["foo"]) + expected.index.name = "index" + with tm.ensure_clean() as path: + original.to_stata(path) + written_and_read_again = self.read_dta(path) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) + + @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) + @pytest.mark.parametrize("byteorder", [">", "<"]) + def test_bool_uint(self, byteorder, version): + s0 = Series([0, 1, True], dtype=np.bool) + s1 = Series([0, 1, 100], dtype=np.uint8) + s2 = Series([0, 1, 255], dtype=np.uint8) + s3 = Series([0, 1, 2 ** 15 - 100], dtype=np.uint16) + s4 = Series([0, 1, 2 ** 16 - 1], dtype=np.uint16) + s5 = Series([0, 1, 2 ** 31 - 100], dtype=np.uint32) + s6 = Series([0, 1, 2 ** 32 - 1], dtype=np.uint32) + + original = DataFrame( + {"s0": s0, "s1": s1, "s2": s2, "s3": s3, "s4": s4, "s5": s5, "s6": s6} + ) + original.index.name = "index" + expected = original.copy() + expected_types = ( + np.int8, + np.int8, + np.int16, + np.int16, + np.int32, + np.int32, + np.float64, + ) + for c, t in zip(expected.columns, expected_types): + expected[c] = expected[c].astype(t) + + with tm.ensure_clean() as path: + original.to_stata(path, byteorder=byteorder, version=version) + written_and_read_again = self.read_dta(path) + written_and_read_again = written_and_read_again.set_index("index") + tm.assert_frame_equal(written_and_read_again, expected) + + def test_variable_labels(self): + with StataReader(self.dta16_115) as rdr: + sr_115 = rdr.variable_labels() + with StataReader(self.dta16_117) as rdr: + sr_117 = rdr.variable_labels() + keys = ("var1", "var2", "var3") + labels = ("label1", "label2", "label3") + for k, v in sr_115.items(): + assert k in sr_117 + assert v == sr_117[k] + assert k in keys + assert v in labels + + def test_minimal_size_col(self): + str_lens = (1, 100, 244) + s = {} + for str_len in str_lens: + s["s" + str(str_len)] = Series( + ["a" * str_len, "b" * str_len, "c" * str_len] + ) + original = DataFrame(s) + with tm.ensure_clean() as path: + original.to_stata(path, write_index=False) + + with StataReader(path) as sr: + typlist = sr.typlist + variables = sr.varlist + formats = sr.fmtlist + for variable, fmt, typ in zip(variables, formats, typlist): + assert int(variable[1:]) == int(fmt[1:-1]) + assert int(variable[1:]) == typ + + def test_excessively_long_string(self): + str_lens = (1, 244, 500) + s = {} + for str_len in str_lens: + s["s" + str(str_len)] = Series( + ["a" * str_len, "b" * str_len, "c" * str_len] + ) + original = DataFrame(s) + msg = ( + r"Fixed width strings in Stata \.dta files are limited to 244" + r" \(or fewer\)\ncharacters\. Column 's500' does not satisfy" + r" this restriction\. Use the\n'version=117' parameter to write" + r" the newer \(Stata 13 and later\) format\." + ) + with pytest.raises(ValueError, match=msg): + with tm.ensure_clean() as path: + original.to_stata(path) + + def test_missing_value_generator(self): + types = ("b", "h", "l") + df = DataFrame([[0.0]], columns=["float_"]) + with tm.ensure_clean() as path: + df.to_stata(path) + with StataReader(path) as rdr: + valid_range = rdr.VALID_RANGE + expected_values = ["." + chr(97 + i) for i in range(26)] + expected_values.insert(0, ".") + for t in types: + offset = valid_range[t][1] + for i in range(0, 27): + val = StataMissingValue(offset + 1 + i) + assert val.string == expected_values[i] + + # Test extremes for floats + val = StataMissingValue(struct.unpack(" 0 + + if layout is not None: + result = self._get_axes_layout(_flatten(axes)) + assert result == layout + + tm.assert_numpy_array_equal( + visible_axes[0].figure.get_size_inches(), + np.array(figsize, dtype=np.float64), + ) + + def _get_axes_layout(self, axes): + x_set = set() + y_set = set() + for ax in axes: + # check axes coordinates to estimate layout + points = ax.get_position().get_points() + x_set.add(points[0][0]) + y_set.add(points[0][1]) + return (len(y_set), len(x_set)) + + def _flatten_visible(self, axes): + """ + Flatten axes, and filter only visible + + Parameters + ---------- + axes : matplotlib Axes object, or its list-like + + """ + from pandas.plotting._matplotlib.tools import _flatten + + axes = _flatten(axes) + axes = [ax for ax in axes if ax.get_visible()] + return axes + + def _check_has_errorbars(self, axes, xerr=0, yerr=0): + """ + Check axes has expected number of errorbars + + Parameters + ---------- + axes : matplotlib Axes object, or its list-like + xerr : number + expected number of x errorbar + yerr : number + expected number of y errorbar + """ + axes = self._flatten_visible(axes) + for ax in axes: + containers = ax.containers + xerr_count = 0 + yerr_count = 0 + for c in containers: + has_xerr = getattr(c, "has_xerr", False) + has_yerr = getattr(c, "has_yerr", False) + if has_xerr: + xerr_count += 1 + if has_yerr: + yerr_count += 1 + assert xerr == xerr_count + assert yerr == yerr_count + + def _check_box_return_type( + self, returned, return_type, expected_keys=None, check_ax_title=True + ): + """ + Check box returned type is correct + + Parameters + ---------- + returned : object to be tested, returned from boxplot + return_type : str + return_type passed to boxplot + expected_keys : list-like, optional + group labels in subplot case. If not passed, + the function checks assuming boxplot uses single ax + check_ax_title : bool + Whether to check the ax.title is the same as expected_key + Intended to be checked by calling from ``boxplot``. + Normal ``plot`` doesn't attach ``ax.title``, it must be disabled. + """ + from matplotlib.axes import Axes + + types = {"dict": dict, "axes": Axes, "both": tuple} + if expected_keys is None: + # should be fixed when the returning default is changed + if return_type is None: + return_type = "dict" + + assert isinstance(returned, types[return_type]) + if return_type == "both": + assert isinstance(returned.ax, Axes) + assert isinstance(returned.lines, dict) + else: + # should be fixed when the returning default is changed + if return_type is None: + for r in self._flatten_visible(returned): + assert isinstance(r, Axes) + return + + assert isinstance(returned, Series) + + assert sorted(returned.keys()) == sorted(expected_keys) + for key, value in returned.items(): + assert isinstance(value, types[return_type]) + # check returned dict has correct mapping + if return_type == "axes": + if check_ax_title: + assert value.get_title() == key + elif return_type == "both": + if check_ax_title: + assert value.ax.get_title() == key + assert isinstance(value.ax, Axes) + assert isinstance(value.lines, dict) + elif return_type == "dict": + line = value["medians"][0] + axes = line.axes + if check_ax_title: + assert axes.get_title() == key + else: + raise AssertionError + + def _check_grid_settings(self, obj, kinds, kws={}): + # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 + + import matplotlib as mpl + + def is_grid_on(): + xticks = self.plt.gca().xaxis.get_major_ticks() + yticks = self.plt.gca().yaxis.get_major_ticks() + # for mpl 2.2.2, gridOn and gridline.get_visible disagree. + # for new MPL, they are the same. + + if self.mpl_ge_3_1_0: + xoff = all(not g.gridline.get_visible() for g in xticks) + yoff = all(not g.gridline.get_visible() for g in yticks) + else: + xoff = all(not g.gridOn for g in xticks) + yoff = all(not g.gridOn for g in yticks) + + return not (xoff and yoff) + + spndx = 1 + for kind in kinds: + + self.plt.subplot(1, 4 * len(kinds), spndx) + spndx += 1 + mpl.rc("axes", grid=False) + obj.plot(kind=kind, **kws) + assert not is_grid_on() + + self.plt.subplot(1, 4 * len(kinds), spndx) + spndx += 1 + mpl.rc("axes", grid=True) + obj.plot(kind=kind, grid=False, **kws) + assert not is_grid_on() + + if kind != "pie": + self.plt.subplot(1, 4 * len(kinds), spndx) + spndx += 1 + mpl.rc("axes", grid=True) + obj.plot(kind=kind, **kws) + assert is_grid_on() + + self.plt.subplot(1, 4 * len(kinds), spndx) + spndx += 1 + mpl.rc("axes", grid=False) + obj.plot(kind=kind, grid=True, **kws) + assert is_grid_on() + + def _unpack_cycler(self, rcParams, field="color"): + """ + Auxiliary function for correctly unpacking cycler after MPL >= 1.5 + """ + return [v[field] for v in rcParams["axes.prop_cycle"]] + + +def _check_plot_works(f, filterwarnings="always", **kwargs): + import matplotlib.pyplot as plt + + ret = None + with warnings.catch_warnings(): + warnings.simplefilter(filterwarnings) + try: + try: + fig = kwargs["figure"] + except KeyError: + fig = plt.gcf() + + plt.clf() + + kwargs.get("ax", fig.add_subplot(211)) + ret = f(**kwargs) + + tm.assert_is_valid_plot_return_object(ret) + + if f is pd.plotting.bootstrap_plot: + assert "ax" not in kwargs + else: + kwargs["ax"] = fig.add_subplot(212) + + ret = f(**kwargs) + tm.assert_is_valid_plot_return_object(ret) + + with tm.ensure_clean(return_filelike=True) as path: + plt.savefig(path) + finally: + tm.close(fig) + + return ret + + +def curpath(): + pth, _ = os.path.split(os.path.abspath(__file__)) + return pth diff --git a/pandas/tests/plotting/test_backend.py b/pandas/tests/plotting/test_backend.py new file mode 100644 index 00000000..9025f8c3 --- /dev/null +++ b/pandas/tests/plotting/test_backend.py @@ -0,0 +1,104 @@ +import sys +import types + +import pkg_resources +import pytest + +import pandas.util._test_decorators as td + +import pandas + +dummy_backend = types.ModuleType("pandas_dummy_backend") +setattr(dummy_backend, "plot", lambda *args, **kwargs: "used_dummy") + + +@pytest.fixture +def restore_backend(): + """Restore the plotting backend to matplotlib""" + pandas.set_option("plotting.backend", "matplotlib") + yield + pandas.set_option("plotting.backend", "matplotlib") + + +def test_backend_is_not_module(): + msg = "Could not find plotting backend 'not_an_existing_module'." + with pytest.raises(ValueError, match=msg): + pandas.set_option("plotting.backend", "not_an_existing_module") + + assert pandas.options.plotting.backend == "matplotlib" + + +def test_backend_is_correct(monkeypatch, restore_backend): + monkeypatch.setitem(sys.modules, "pandas_dummy_backend", dummy_backend) + + pandas.set_option("plotting.backend", "pandas_dummy_backend") + assert pandas.get_option("plotting.backend") == "pandas_dummy_backend" + assert ( + pandas.plotting._core._get_plot_backend("pandas_dummy_backend") is dummy_backend + ) + + +def test_backend_can_be_set_in_plot_call(monkeypatch, restore_backend): + monkeypatch.setitem(sys.modules, "pandas_dummy_backend", dummy_backend) + df = pandas.DataFrame([1, 2, 3]) + + assert pandas.get_option("plotting.backend") == "matplotlib" + assert df.plot(backend="pandas_dummy_backend") == "used_dummy" + + +@td.skip_if_no_mpl +def test_register_entrypoint(restore_backend): + + dist = pkg_resources.get_distribution("pandas") + if dist.module_path not in pandas.__file__: + # We are running from a non-installed pandas, and this test is invalid + pytest.skip("Testing a non-installed pandas") + + mod = types.ModuleType("my_backend") + mod.plot = lambda *args, **kwargs: 1 + + backends = pkg_resources.get_entry_map("pandas") + my_entrypoint = pkg_resources.EntryPoint( + "pandas_plotting_backend", mod.__name__, dist=dist + ) + backends["pandas_plotting_backends"]["my_backend"] = my_entrypoint + # TODO: the docs recommend importlib.util.module_from_spec. But this works for now. + sys.modules["my_backend"] = mod + + result = pandas.plotting._core._get_plot_backend("my_backend") + assert result is mod + + # TODO: https://github.com/pandas-dev/pandas/issues/27517 + # Remove the td.skip_if_no_mpl + with pandas.option_context("plotting.backend", "my_backend"): + result = pandas.plotting._core._get_plot_backend() + + assert result is mod + + +def test_setting_backend_without_plot_raises(): + # GH-28163 + module = types.ModuleType("pandas_plot_backend") + sys.modules["pandas_plot_backend"] = module + + assert pandas.options.plotting.backend == "matplotlib" + with pytest.raises( + ValueError, match="Could not find plotting backend 'pandas_plot_backend'." + ): + pandas.set_option("plotting.backend", "pandas_plot_backend") + + assert pandas.options.plotting.backend == "matplotlib" + + +@td.skip_if_mpl +def test_no_matplotlib_ok(): + with pytest.raises(ImportError): + pandas.plotting._core._get_plot_backend("matplotlib") + + +def test_extra_kinds_ok(monkeypatch, restore_backend): + # https://github.com/pandas-dev/pandas/pull/28647 + monkeypatch.setitem(sys.modules, "pandas_dummy_backend", dummy_backend) + pandas.set_option("plotting.backend", "pandas_dummy_backend") + df = pandas.DataFrame({"A": [1, 2, 3]}) + df.plot(kind="not a real kind") diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py new file mode 100644 index 00000000..8ee279f0 --- /dev/null +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -0,0 +1,442 @@ +# coding: utf-8 + +import itertools +import string + +import numpy as np +from numpy import random +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame, MultiIndex, Series, date_range, timedelta_range +import pandas._testing as tm +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +import pandas.plotting as plotting + +""" Test cases for .boxplot method """ + + +@td.skip_if_no_mpl +class TestDataFramePlots(TestPlotBase): + @pytest.mark.slow + def test_boxplot_legacy1(self): + df = DataFrame( + np.random.randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["one", "two", "three", "four"], + ) + df["indic"] = ["foo", "bar"] * 3 + df["indic2"] = ["foo", "bar", "foo"] * 2 + + _check_plot_works(df.boxplot, return_type="dict") + _check_plot_works(df.boxplot, column=["one", "two"], return_type="dict") + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.boxplot, column=["one", "two"], by="indic") + _check_plot_works(df.boxplot, column="one", by=["indic", "indic2"]) + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.boxplot, by="indic") + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.boxplot, by=["indic", "indic2"]) + _check_plot_works(plotting._core.boxplot, data=df["one"], return_type="dict") + _check_plot_works(df.boxplot, notch=1, return_type="dict") + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.boxplot, by="indic", notch=1) + + @pytest.mark.slow + def test_boxplot_legacy2(self): + df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"]) + df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) + df["Y"] = Series(["A"] * 10) + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.boxplot, by="X") + + # When ax is supplied and required number of axes is 1, + # passed ax should be used: + fig, ax = self.plt.subplots() + axes = df.boxplot("Col1", by="X", ax=ax) + ax_axes = ax.axes + assert ax_axes is axes + + fig, ax = self.plt.subplots() + axes = df.groupby("Y").boxplot(ax=ax, return_type="axes") + ax_axes = ax.axes + assert ax_axes is axes["A"] + + # Multiple columns with an ax argument should use same figure + fig, ax = self.plt.subplots() + with tm.assert_produces_warning(UserWarning): + axes = df.boxplot( + column=["Col1", "Col2"], by="X", ax=ax, return_type="axes" + ) + assert axes["Col1"].get_figure() is fig + + # When by is None, check that all relevant lines are present in the + # dict + fig, ax = self.plt.subplots() + d = df.boxplot(ax=ax, return_type="dict") + lines = list(itertools.chain.from_iterable(d.values())) + assert len(ax.get_lines()) == len(lines) + + @pytest.mark.slow + def test_boxplot_return_type_none(self): + # GH 12216; return_type=None & by=None -> axes + result = self.hist_df.boxplot() + assert isinstance(result, self.plt.Axes) + + @pytest.mark.slow + def test_boxplot_return_type_legacy(self): + # API change in https://github.com/pandas-dev/pandas/pull/7096 + import matplotlib as mpl # noqa + + df = DataFrame( + np.random.randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["one", "two", "three", "four"], + ) + with pytest.raises(ValueError): + df.boxplot(return_type="NOTATYPE") + + result = df.boxplot() + self._check_box_return_type(result, "axes") + + with tm.assert_produces_warning(False): + result = df.boxplot(return_type="dict") + self._check_box_return_type(result, "dict") + + with tm.assert_produces_warning(False): + result = df.boxplot(return_type="axes") + self._check_box_return_type(result, "axes") + + with tm.assert_produces_warning(False): + result = df.boxplot(return_type="both") + self._check_box_return_type(result, "both") + + @pytest.mark.slow + def test_boxplot_axis_limits(self): + def _check_ax_limits(col, ax): + y_min, y_max = ax.get_ylim() + assert y_min <= col.min() + assert y_max >= col.max() + + df = self.hist_df.copy() + df["age"] = np.random.randint(1, 20, df.shape[0]) + # One full row + height_ax, weight_ax = df.boxplot(["height", "weight"], by="category") + _check_ax_limits(df["height"], height_ax) + _check_ax_limits(df["weight"], weight_ax) + assert weight_ax._sharey == height_ax + + # Two rows, one partial + p = df.boxplot(["height", "weight", "age"], by="category") + height_ax, weight_ax, age_ax = p[0, 0], p[0, 1], p[1, 0] + dummy_ax = p[1, 1] + + _check_ax_limits(df["height"], height_ax) + _check_ax_limits(df["weight"], weight_ax) + _check_ax_limits(df["age"], age_ax) + assert weight_ax._sharey == height_ax + assert age_ax._sharey == height_ax + assert dummy_ax._sharey is None + + @pytest.mark.slow + def test_boxplot_empty_column(self): + df = DataFrame(np.random.randn(20, 4)) + df.loc[:, 0] = np.nan + _check_plot_works(df.boxplot, return_type="axes") + + @pytest.mark.slow + def test_figsize(self): + df = DataFrame(np.random.rand(10, 5), columns=["A", "B", "C", "D", "E"]) + result = df.boxplot(return_type="axes", figsize=(12, 8)) + assert result.figure.bbox_inches.width == 12 + assert result.figure.bbox_inches.height == 8 + + def test_fontsize(self): + df = DataFrame({"a": [1, 2, 3, 4, 5, 6]}) + self._check_ticks_props( + df.boxplot("a", fontsize=16), xlabelsize=16, ylabelsize=16 + ) + + def test_boxplot_numeric_data(self): + # GH 22799 + df = DataFrame( + { + "a": date_range("2012-01-01", periods=100), + "b": np.random.randn(100), + "c": np.random.randn(100) + 2, + "d": date_range("2012-01-01", periods=100).astype(str), + "e": date_range("2012-01-01", periods=100, tz="UTC"), + "f": timedelta_range("1 days", periods=100), + } + ) + ax = df.plot(kind="box") + assert [x.get_text() for x in ax.get_xticklabels()] == ["b", "c"] + + @pytest.mark.parametrize( + "colors_kwd, expected", + [ + ( + dict(boxes="r", whiskers="b", medians="g", caps="c"), + dict(boxes="r", whiskers="b", medians="g", caps="c"), + ), + (dict(boxes="r"), dict(boxes="r")), + ("r", dict(boxes="r", whiskers="r", medians="r", caps="r")), + ], + ) + def test_color_kwd(self, colors_kwd, expected): + # GH: 26214 + df = DataFrame(random.rand(10, 2)) + result = df.boxplot(color=colors_kwd, return_type="dict") + for k, v in expected.items(): + assert result[k][0].get_color() == v + + @pytest.mark.parametrize( + "dict_colors, msg", + [(dict(boxes="r", invalid_key="r"), "invalid key 'invalid_key'")], + ) + def test_color_kwd_errors(self, dict_colors, msg): + # GH: 26214 + df = DataFrame(random.rand(10, 2)) + with pytest.raises(ValueError, match=msg): + df.boxplot(color=dict_colors, return_type="dict") + + +@td.skip_if_no_mpl +class TestDataFrameGroupByPlots(TestPlotBase): + @pytest.mark.slow + def test_boxplot_legacy1(self): + grouped = self.hist_df.groupby(by="gender") + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(grouped.boxplot, return_type="axes") + self._check_axes_shape(list(axes.values), axes_num=2, layout=(1, 2)) + axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + @pytest.mark.slow + def test_boxplot_legacy2(self): + tuples = zip(string.ascii_letters[:10], range(10)) + df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) + grouped = df.groupby(level=1) + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(grouped.boxplot, return_type="axes") + self._check_axes_shape(list(axes.values), axes_num=10, layout=(4, 3)) + + axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + @pytest.mark.slow + def test_boxplot_legacy3(self): + tuples = zip(string.ascii_letters[:10], range(10)) + df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) + grouped = df.unstack(level=1).groupby(level=0, axis=1) + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(grouped.boxplot, return_type="axes") + self._check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2)) + axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + @pytest.mark.slow + def test_grouped_plot_fignums(self): + n = 10 + weight = Series(np.random.normal(166, 20, size=n)) + height = Series(np.random.normal(60, 10, size=n)) + with tm.RNGContext(42): + gender = np.random.choice(["male", "female"], size=n) + df = DataFrame({"height": height, "weight": weight, "gender": gender}) + gb = df.groupby("gender") + + res = gb.plot() + assert len(self.plt.get_fignums()) == 2 + assert len(res) == 2 + tm.close() + + res = gb.boxplot(return_type="axes") + assert len(self.plt.get_fignums()) == 1 + assert len(res) == 2 + tm.close() + + # now works with GH 5610 as gender is excluded + res = df.groupby("gender").hist() + tm.close() + + @pytest.mark.slow + def test_grouped_box_return_type(self): + df = self.hist_df + + # old style: return_type=None + result = df.boxplot(by="gender") + assert isinstance(result, np.ndarray) + self._check_box_return_type( + result, None, expected_keys=["height", "weight", "category"] + ) + + # now for groupby + result = df.groupby("gender").boxplot(return_type="dict") + self._check_box_return_type(result, "dict", expected_keys=["Male", "Female"]) + + columns2 = "X B C D A G Y N Q O".split() + df2 = DataFrame(random.randn(50, 10), columns=columns2) + categories2 = "A B C D E F G H I J".split() + df2["category"] = categories2 * 5 + + for t in ["dict", "axes", "both"]: + returned = df.groupby("classroom").boxplot(return_type=t) + self._check_box_return_type(returned, t, expected_keys=["A", "B", "C"]) + + returned = df.boxplot(by="classroom", return_type=t) + self._check_box_return_type( + returned, t, expected_keys=["height", "weight", "category"] + ) + + returned = df2.groupby("category").boxplot(return_type=t) + self._check_box_return_type(returned, t, expected_keys=categories2) + + returned = df2.boxplot(by="category", return_type=t) + self._check_box_return_type(returned, t, expected_keys=columns2) + + @pytest.mark.slow + def test_grouped_box_layout(self): + df = self.hist_df + + msg = "Layout of 1x1 must be larger than required size 2" + with pytest.raises(ValueError, match=msg): + df.boxplot(column=["weight", "height"], by=df.gender, layout=(1, 1)) + + msg = "The 'layout' keyword is not supported when 'by' is None" + with pytest.raises(ValueError, match=msg): + df.boxplot( + column=["height", "weight", "category"], + layout=(2, 1), + return_type="dict", + ) + + msg = "At least one dimension of layout must be positive" + with pytest.raises(ValueError, match=msg): + df.boxplot(column=["weight", "height"], by=df.gender, layout=(-1, -1)) + + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + box = _check_plot_works( + df.groupby("gender").boxplot, column="height", return_type="dict" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=2, layout=(1, 2)) + + with tm.assert_produces_warning(UserWarning): + box = _check_plot_works( + df.groupby("category").boxplot, column="height", return_type="dict" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2)) + + # GH 6769 + with tm.assert_produces_warning(UserWarning): + box = _check_plot_works( + df.groupby("classroom").boxplot, column="height", return_type="dict" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) + + # GH 5897 + axes = df.boxplot( + column=["height", "weight", "category"], by="gender", return_type="axes" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) + for ax in [axes["height"]]: + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible([ax.xaxis.get_label()], visible=False) + for ax in [axes["weight"], axes["category"]]: + self._check_visible(ax.get_xticklabels()) + self._check_visible([ax.xaxis.get_label()]) + + box = df.groupby("classroom").boxplot( + column=["height", "weight", "category"], return_type="dict" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) + + with tm.assert_produces_warning(UserWarning): + box = _check_plot_works( + df.groupby("category").boxplot, + column="height", + layout=(3, 2), + return_type="dict", + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2)) + with tm.assert_produces_warning(UserWarning): + box = _check_plot_works( + df.groupby("category").boxplot, + column="height", + layout=(3, -1), + return_type="dict", + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2)) + + box = df.boxplot( + column=["height", "weight", "category"], by="gender", layout=(4, 1) + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(4, 1)) + + box = df.boxplot( + column=["height", "weight", "category"], by="gender", layout=(-1, 1) + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(3, 1)) + + box = df.groupby("classroom").boxplot( + column=["height", "weight", "category"], layout=(1, 4), return_type="dict" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 4)) + + box = df.groupby("classroom").boxplot( # noqa + column=["height", "weight", "category"], layout=(1, -1), return_type="dict" + ) + self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3)) + + @pytest.mark.slow + def test_grouped_box_multiple_axes(self): + # GH 6970, GH 7069 + df = self.hist_df + + # check warning to ignore sharex / sharey + # this check should be done in the first function which + # passes multiple axes to plot, hist or boxplot + # location should be changed if other test is added + # which has earlier alphabetical order + with tm.assert_produces_warning(UserWarning): + fig, axes = self.plt.subplots(2, 2) + df.groupby("category").boxplot(column="height", return_type="axes", ax=axes) + self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2)) + + fig, axes = self.plt.subplots(2, 3) + with tm.assert_produces_warning(UserWarning): + returned = df.boxplot( + column=["height", "weight", "category"], + by="gender", + return_type="axes", + ax=axes[0], + ) + returned = np.array(list(returned.values)) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + tm.assert_numpy_array_equal(returned, axes[0]) + assert returned[0].figure is fig + + # draw on second row + with tm.assert_produces_warning(UserWarning): + returned = df.groupby("classroom").boxplot( + column=["height", "weight", "category"], return_type="axes", ax=axes[1] + ) + returned = np.array(list(returned.values)) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + tm.assert_numpy_array_equal(returned, axes[1]) + assert returned[0].figure is fig + + with pytest.raises(ValueError): + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + with tm.assert_produces_warning(UserWarning): + axes = df.groupby("classroom").boxplot(ax=axes) + + def test_fontsize(self): + df = DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]}) + self._check_ticks_props( + df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16 + ) diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py new file mode 100644 index 00000000..e54f4784 --- /dev/null +++ b/pandas/tests/plotting/test_converter.py @@ -0,0 +1,365 @@ +from datetime import date, datetime +import subprocess +import sys + +import numpy as np +import pytest + +import pandas._config.config as cf + +from pandas.compat.numpy import np_datetime64_compat +import pandas.util._test_decorators as td + +from pandas import Index, Period, Series, Timestamp, date_range +import pandas._testing as tm + +from pandas.plotting import ( + deregister_matplotlib_converters, + register_matplotlib_converters, +) +from pandas.tseries.offsets import Day, Micro, Milli, Second + +try: + from pandas.plotting._matplotlib import converter +except ImportError: + # try / except, rather than skip, to avoid internal refactoring + # causing an improper skip + pass + +pytest.importorskip("matplotlib.pyplot") + + +def test_registry_mpl_resets(): + # Check that Matplotlib converters are properly reset (see issue #27481) + code = ( + "import matplotlib.units as units; " + "import matplotlib.dates as mdates; " + "n_conv = len(units.registry); " + "import pandas as pd; " + "pd.plotting.register_matplotlib_converters(); " + "pd.plotting.deregister_matplotlib_converters(); " + "assert len(units.registry) == n_conv" + ) + call = [sys.executable, "-c", code] + subprocess.check_output(call) + + +def test_timtetonum_accepts_unicode(): + assert converter.time2num("00:01") == converter.time2num("00:01") + + +class TestRegistration: + def test_register_by_default(self): + # Run in subprocess to ensure a clean state + code = ( + "'import matplotlib.units; " + "import pandas as pd; " + "units = dict(matplotlib.units.registry); " + "assert pd.Timestamp in units)'" + ) + call = [sys.executable, "-c", code] + assert subprocess.check_call(call) == 0 + + @td.skip_if_no("matplotlib", min_version="3.1.3") + def test_registering_no_warning(self): + plt = pytest.importorskip("matplotlib.pyplot") + s = Series(range(12), index=date_range("2017", periods=12)) + _, ax = plt.subplots() + + # Set to the "warn" state, in case this isn't the first test run + register_matplotlib_converters() + ax.plot(s.index, s.values) + + def test_pandas_plots_register(self): + pytest.importorskip("matplotlib.pyplot") + s = Series(range(12), index=date_range("2017", periods=12)) + # Set to the "warn" state, in case this isn't the first test run + with tm.assert_produces_warning(None) as w: + s.plot() + + assert len(w) == 0 + + def test_matplotlib_formatters(self): + units = pytest.importorskip("matplotlib.units") + + # Can't make any assertion about the start state. + # We we check that toggling converters off removes it, and toggling it + # on restores it. + + with cf.option_context("plotting.matplotlib.register_converters", True): + with cf.option_context("plotting.matplotlib.register_converters", False): + assert Timestamp not in units.registry + assert Timestamp in units.registry + + @td.skip_if_no("matplotlib", min_version="3.1.3") + def test_option_no_warning(self): + pytest.importorskip("matplotlib.pyplot") + ctx = cf.option_context("plotting.matplotlib.register_converters", False) + plt = pytest.importorskip("matplotlib.pyplot") + s = Series(range(12), index=date_range("2017", periods=12)) + _, ax = plt.subplots() + + # Test without registering first, no warning + with ctx: + ax.plot(s.index, s.values) + + # Now test with registering + register_matplotlib_converters() + with ctx: + ax.plot(s.index, s.values) + + def test_registry_resets(self): + units = pytest.importorskip("matplotlib.units") + dates = pytest.importorskip("matplotlib.dates") + + # make a copy, to reset to + original = dict(units.registry) + + try: + # get to a known state + units.registry.clear() + date_converter = dates.DateConverter() + units.registry[datetime] = date_converter + units.registry[date] = date_converter + + register_matplotlib_converters() + assert units.registry[date] is not date_converter + deregister_matplotlib_converters() + assert units.registry[date] is date_converter + + finally: + # restore original stater + units.registry.clear() + for k, v in original.items(): + units.registry[k] = v + + +class TestDateTimeConverter: + def setup_method(self, method): + self.dtc = converter.DatetimeConverter() + self.tc = converter.TimeFormatter(None) + + def test_convert_accepts_unicode(self): + r1 = self.dtc.convert("12:22", None, None) + r2 = self.dtc.convert("12:22", None, None) + assert r1 == r2, "DatetimeConverter.convert should accept unicode" + + def test_conversion(self): + rs = self.dtc.convert(["2012-1-1"], None, None)[0] + xp = datetime(2012, 1, 1).toordinal() + assert rs == xp + + rs = self.dtc.convert("2012-1-1", None, None) + assert rs == xp + + rs = self.dtc.convert(date(2012, 1, 1), None, None) + assert rs == xp + + rs = self.dtc.convert(datetime(2012, 1, 1).toordinal(), None, None) + assert rs == xp + + rs = self.dtc.convert("2012-1-1", None, None) + assert rs == xp + + rs = self.dtc.convert(Timestamp("2012-1-1"), None, None) + assert rs == xp + + # also testing datetime64 dtype (GH8614) + rs = self.dtc.convert(np_datetime64_compat("2012-01-01"), None, None) + assert rs == xp + + rs = self.dtc.convert( + np_datetime64_compat("2012-01-01 00:00:00+0000"), None, None + ) + assert rs == xp + + rs = self.dtc.convert( + np.array( + [ + np_datetime64_compat("2012-01-01 00:00:00+0000"), + np_datetime64_compat("2012-01-02 00:00:00+0000"), + ] + ), + None, + None, + ) + assert rs[0] == xp + + # we have a tz-aware date (constructed to that when we turn to utc it + # is the same as our sample) + ts = Timestamp("2012-01-01").tz_localize("UTC").tz_convert("US/Eastern") + rs = self.dtc.convert(ts, None, None) + assert rs == xp + + rs = self.dtc.convert(ts.to_pydatetime(), None, None) + assert rs == xp + + rs = self.dtc.convert(Index([ts - Day(1), ts]), None, None) + assert rs[1] == xp + + rs = self.dtc.convert(Index([ts - Day(1), ts]).to_pydatetime(), None, None) + assert rs[1] == xp + + def test_conversion_float(self): + decimals = 9 + + rs = self.dtc.convert(Timestamp("2012-1-1 01:02:03", tz="UTC"), None, None) + xp = converter.dates.date2num(Timestamp("2012-1-1 01:02:03", tz="UTC")) + tm.assert_almost_equal(rs, xp, decimals) + + rs = self.dtc.convert( + Timestamp("2012-1-1 09:02:03", tz="Asia/Hong_Kong"), None, None + ) + tm.assert_almost_equal(rs, xp, decimals) + + rs = self.dtc.convert(datetime(2012, 1, 1, 1, 2, 3), None, None) + tm.assert_almost_equal(rs, xp, decimals) + + def test_conversion_outofbounds_datetime(self): + # 2579 + values = [date(1677, 1, 1), date(1677, 1, 2)] + rs = self.dtc.convert(values, None, None) + xp = converter.dates.date2num(values) + tm.assert_numpy_array_equal(rs, xp) + rs = self.dtc.convert(values[0], None, None) + xp = converter.dates.date2num(values[0]) + assert rs == xp + + values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] + rs = self.dtc.convert(values, None, None) + xp = converter.dates.date2num(values) + tm.assert_numpy_array_equal(rs, xp) + rs = self.dtc.convert(values[0], None, None) + xp = converter.dates.date2num(values[0]) + assert rs == xp + + @pytest.mark.parametrize( + "time,format_expected", + [ + (0, "00:00"), # time2num(datetime.time.min) + (86399.999999, "23:59:59.999999"), # time2num(datetime.time.max) + (90000, "01:00"), + (3723, "01:02:03"), + (39723.2, "11:02:03.200"), + ], + ) + def test_time_formatter(self, time, format_expected): + # issue 18478 + result = self.tc(time) + assert result == format_expected + + def test_dateindex_conversion(self): + decimals = 9 + + for freq in ("B", "L", "S"): + dateindex = tm.makeDateIndex(k=10, freq=freq) + rs = self.dtc.convert(dateindex, None, None) + xp = converter.dates.date2num(dateindex._mpl_repr()) + tm.assert_almost_equal(rs, xp, decimals) + + def test_resolution(self): + def _assert_less(ts1, ts2): + val1 = self.dtc.convert(ts1, None, None) + val2 = self.dtc.convert(ts2, None, None) + if not val1 < val2: + raise AssertionError(f"{val1} is not less than {val2}.") + + # Matplotlib's time representation using floats cannot distinguish + # intervals smaller than ~10 microsecond in the common range of years. + ts = Timestamp("2012-1-1") + _assert_less(ts, ts + Second()) + _assert_less(ts, ts + Milli()) + _assert_less(ts, ts + Micro(50)) + + def test_convert_nested(self): + inner = [Timestamp("2017-01-01"), Timestamp("2017-01-02")] + data = [inner, inner] + result = self.dtc.convert(data, None, None) + expected = [self.dtc.convert(x, None, None) for x in data] + assert (np.array(result) == expected).all() + + +class TestPeriodConverter: + def setup_method(self, method): + self.pc = converter.PeriodConverter() + + class Axis: + pass + + self.axis = Axis() + self.axis.freq = "D" + + def test_convert_accepts_unicode(self): + r1 = self.pc.convert("2012-1-1", None, self.axis) + r2 = self.pc.convert("2012-1-1", None, self.axis) + assert r1 == r2 + + def test_conversion(self): + rs = self.pc.convert(["2012-1-1"], None, self.axis)[0] + xp = Period("2012-1-1").ordinal + assert rs == xp + + rs = self.pc.convert("2012-1-1", None, self.axis) + assert rs == xp + + rs = self.pc.convert([date(2012, 1, 1)], None, self.axis)[0] + assert rs == xp + + rs = self.pc.convert(date(2012, 1, 1), None, self.axis) + assert rs == xp + + rs = self.pc.convert([Timestamp("2012-1-1")], None, self.axis)[0] + assert rs == xp + + rs = self.pc.convert(Timestamp("2012-1-1"), None, self.axis) + assert rs == xp + + rs = self.pc.convert(np_datetime64_compat("2012-01-01"), None, self.axis) + assert rs == xp + + rs = self.pc.convert( + np_datetime64_compat("2012-01-01 00:00:00+0000"), None, self.axis + ) + assert rs == xp + + rs = self.pc.convert( + np.array( + [ + np_datetime64_compat("2012-01-01 00:00:00+0000"), + np_datetime64_compat("2012-01-02 00:00:00+0000"), + ] + ), + None, + self.axis, + ) + assert rs[0] == xp + + def test_integer_passthrough(self): + # GH9012 + rs = self.pc.convert([0, 1], None, self.axis) + xp = [0, 1] + assert rs == xp + + def test_convert_nested(self): + data = ["2012-1-1", "2012-1-2"] + r1 = self.pc.convert([data, data], None, self.axis) + r2 = [self.pc.convert(data, None, self.axis) for _ in range(2)] + assert r1 == r2 + + +class TestTimeDeltaConverter: + """Test timedelta converter""" + + @pytest.mark.parametrize( + "x, decimal, format_expected", + [ + (0.0, 0, "00:00:00"), + (3972320000000, 1, "01:06:12.3"), + (713233432000000, 2, "8 days 06:07:13.43"), + (32423432000000, 4, "09:00:23.4320"), + ], + ) + def test_format_timedelta_ticks(self, x, decimal, format_expected): + tdc = converter.TimeSeries_TimedeltaFormatter + result = tdc.format_timedelta_ticks(x, pos=None, n_decimals=decimal) + assert result == format_expected diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py new file mode 100644 index 00000000..bd5781cb --- /dev/null +++ b/pandas/tests/plotting/test_datetimelike.py @@ -0,0 +1,1518 @@ +""" Test cases for time series specific (freq conversion, etc) """ +from datetime import date, datetime, time, timedelta +import pickle +import sys + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame, Index, NaT, Series, isna +import pandas._testing as tm +from pandas.core.indexes.datetimes import bdate_range, date_range +from pandas.core.indexes.period import Period, PeriodIndex, period_range +from pandas.core.indexes.timedeltas import timedelta_range +from pandas.core.resample import DatetimeIndex +from pandas.tests.plotting.common import TestPlotBase + +from pandas.tseries.offsets import DateOffset + + +@td.skip_if_no_mpl +class TestTSPlot(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + + self.freq = ["S", "T", "H", "D", "W", "M", "Q", "A"] + idx = [period_range("12/31/1999", freq=x, periods=100) for x in self.freq] + self.period_ser = [Series(np.random.randn(len(x)), x) for x in idx] + self.period_df = [ + DataFrame(np.random.randn(len(x), 3), index=x, columns=["A", "B", "C"]) + for x in idx + ] + + freq = ["S", "T", "H", "D", "W", "M", "Q-DEC", "A", "1B30Min"] + idx = [date_range("12/31/1999", freq=x, periods=100) for x in freq] + self.datetime_ser = [Series(np.random.randn(len(x)), x) for x in idx] + self.datetime_df = [ + DataFrame(np.random.randn(len(x), 3), index=x, columns=["A", "B", "C"]) + for x in idx + ] + + def teardown_method(self, method): + tm.close() + + @pytest.mark.slow + def test_ts_plot_with_tz(self, tz_aware_fixture): + # GH2877, GH17173, GH31205 + tz = tz_aware_fixture + index = date_range("1/1/2011", periods=2, freq="H", tz=tz) + ts = Series([188.5, 328.25], index=index) + with tm.assert_produces_warning(None): + _check_plot_works(ts.plot) + + def test_fontsize_set_correctly(self): + # For issue #8765 + df = DataFrame(np.random.randn(10, 9), index=range(10)) + fig, ax = self.plt.subplots() + df.plot(fontsize=2, ax=ax) + for label in ax.get_xticklabels() + ax.get_yticklabels(): + assert label.get_fontsize() == 2 + + @pytest.mark.slow + def test_frame_inferred(self): + # inferred freq + idx = date_range("1/1/1987", freq="MS", periods=100) + idx = DatetimeIndex(idx.values, freq=None) + + df = DataFrame(np.random.randn(len(idx), 3), index=idx) + _check_plot_works(df.plot) + + # axes freq + idx = idx[0:40].union(idx[45:99]) + df2 = DataFrame(np.random.randn(len(idx), 3), index=idx) + _check_plot_works(df2.plot) + + # N > 1 + idx = date_range("2008-1-1 00:15:00", freq="15T", periods=10) + idx = DatetimeIndex(idx.values, freq=None) + df = DataFrame(np.random.randn(len(idx), 3), index=idx) + _check_plot_works(df.plot) + + def test_is_error_nozeroindex(self): + # GH11858 + i = np.array([1, 2, 3]) + a = DataFrame(i, index=i) + _check_plot_works(a.plot, xerr=a) + _check_plot_works(a.plot, yerr=a) + + def test_nonnumeric_exclude(self): + idx = date_range("1/1/1987", freq="A", periods=3) + df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]}, idx) + + fig, ax = self.plt.subplots() + df.plot(ax=ax) # it works + assert len(ax.get_lines()) == 1 # B was plotted + self.plt.close(fig) + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + df["A"].plot() + + @pytest.mark.slow + def test_tsplot(self): + + _, ax = self.plt.subplots() + ts = tm.makeTimeSeries() + + for s in self.period_ser: + _check_plot_works(s.plot, ax=ax) + + for s in self.datetime_ser: + _check_plot_works(s.plot, ax=ax) + + _, ax = self.plt.subplots() + ts.plot(style="k", ax=ax) + color = (0.0, 0.0, 0.0, 1) + assert color == ax.get_lines()[0].get_color() + + def test_both_style_and_color(self): + + ts = tm.makeTimeSeries() + msg = ( + "Cannot pass 'style' string with a color symbol and 'color' " + "keyword argument. Please use one or the other or pass 'style'" + " without a color symbol" + ) + with pytest.raises(ValueError, match=msg): + ts.plot(style="b-", color="#000099") + + s = ts.reset_index(drop=True) + with pytest.raises(ValueError, match=msg): + s.plot(style="b-", color="#000099") + + @pytest.mark.slow + def test_high_freq(self): + freaks = ["ms", "us"] + for freq in freaks: + _, ax = self.plt.subplots() + rng = date_range("1/1/2012", periods=100, freq=freq) + ser = Series(np.random.randn(len(rng)), rng) + _check_plot_works(ser.plot, ax=ax) + + def test_get_datevalue(self): + from pandas.plotting._matplotlib.converter import get_datevalue + + assert get_datevalue(None, "D") is None + assert get_datevalue(1987, "A") == 1987 + assert get_datevalue(Period(1987, "A"), "M") == Period("1987-12", "M").ordinal + assert get_datevalue("1/1/1987", "D") == Period("1987-1-1", "D").ordinal + + @pytest.mark.slow + def test_ts_plot_format_coord(self): + def check_format_of_first_point(ax, expected_string): + first_line = ax.get_lines()[0] + first_x = first_line.get_xdata()[0].ordinal + first_y = first_line.get_ydata()[0] + try: + assert expected_string == ax.format_coord(first_x, first_y) + except (ValueError): + pytest.skip( + "skipping test because issue forming test comparison GH7664" + ) + + annual = Series(1, index=date_range("2014-01-01", periods=3, freq="A-DEC")) + _, ax = self.plt.subplots() + annual.plot(ax=ax) + check_format_of_first_point(ax, "t = 2014 y = 1.000000") + + # note this is added to the annual plot already in existence, and + # changes its freq field + daily = Series(1, index=date_range("2014-01-01", periods=3, freq="D")) + daily.plot(ax=ax) + check_format_of_first_point(ax, "t = 2014-01-01 y = 1.000000") + tm.close() + + @pytest.mark.slow + def test_line_plot_period_series(self): + for s in self.period_ser: + _check_plot_works(s.plot, s.index.freq) + + @pytest.mark.slow + @pytest.mark.parametrize( + "frqncy", ["1S", "3S", "5T", "7H", "4D", "8W", "11M", "3A"] + ) + def test_line_plot_period_mlt_series(self, frqncy): + # test period index line plot for series with multiples (`mlt`) of the + # frequency (`frqncy`) rule code. tests resolution of issue #14763 + idx = period_range("12/31/1999", freq=frqncy, periods=100) + s = Series(np.random.randn(len(idx)), idx) + _check_plot_works(s.plot, s.index.freq.rule_code) + + @pytest.mark.slow + def test_line_plot_datetime_series(self): + for s in self.datetime_ser: + _check_plot_works(s.plot, s.index.freq.rule_code) + + @pytest.mark.slow + def test_line_plot_period_frame(self): + for df in self.period_df: + _check_plot_works(df.plot, df.index.freq) + + @pytest.mark.slow + @pytest.mark.parametrize( + "frqncy", ["1S", "3S", "5T", "7H", "4D", "8W", "11M", "3A"] + ) + def test_line_plot_period_mlt_frame(self, frqncy): + # test period index line plot for DataFrames with multiples (`mlt`) + # of the frequency (`frqncy`) rule code. tests resolution of issue + # #14763 + idx = period_range("12/31/1999", freq=frqncy, periods=100) + df = DataFrame(np.random.randn(len(idx), 3), index=idx, columns=["A", "B", "C"]) + freq = df.index.asfreq(df.index.freq.rule_code).freq + _check_plot_works(df.plot, freq) + + @pytest.mark.slow + def test_line_plot_datetime_frame(self): + for df in self.datetime_df: + freq = df.index.to_period(df.index.freq.rule_code).freq + _check_plot_works(df.plot, freq) + + @pytest.mark.slow + def test_line_plot_inferred_freq(self): + for ser in self.datetime_ser: + ser = Series(ser.values, Index(np.asarray(ser.index))) + _check_plot_works(ser.plot, ser.index.inferred_freq) + + ser = ser[[0, 3, 5, 6]] + _check_plot_works(ser.plot) + + def test_fake_inferred_business(self): + _, ax = self.plt.subplots() + rng = date_range("2001-1-1", "2001-1-10") + ts = Series(range(len(rng)), index=rng) + ts = ts[:3].append(ts[5:]) + ts.plot(ax=ax) + assert not hasattr(ax, "freq") + + @pytest.mark.slow + def test_plot_offset_freq(self): + ser = tm.makeTimeSeries() + _check_plot_works(ser.plot) + + dr = date_range(ser.index[0], freq="BQS", periods=10) + ser = Series(np.random.randn(len(dr)), index=dr) + _check_plot_works(ser.plot) + + @pytest.mark.slow + def test_plot_multiple_inferred_freq(self): + dr = Index([datetime(2000, 1, 1), datetime(2000, 1, 6), datetime(2000, 1, 11)]) + ser = Series(np.random.randn(len(dr)), index=dr) + _check_plot_works(ser.plot) + + @pytest.mark.slow + def test_uhf(self): + import pandas.plotting._matplotlib.converter as conv + + idx = date_range("2012-6-22 21:59:51.960928", freq="L", periods=500) + df = DataFrame(np.random.randn(len(idx), 2), index=idx) + + _, ax = self.plt.subplots() + df.plot(ax=ax) + axis = ax.get_xaxis() + + tlocs = axis.get_ticklocs() + tlabels = axis.get_ticklabels() + for loc, label in zip(tlocs, tlabels): + xp = conv._from_ordinal(loc).strftime("%H:%M:%S.%f") + rs = str(label.get_text()) + if len(rs): + assert xp == rs + + @pytest.mark.slow + def test_irreg_hf(self): + idx = date_range("2012-6-22 21:59:51", freq="S", periods=100) + df = DataFrame(np.random.randn(len(idx), 2), index=idx) + + irreg = df.iloc[[0, 1, 3, 4]] + _, ax = self.plt.subplots() + irreg.plot(ax=ax) + diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff() + + sec = 1.0 / 24 / 60 / 60 + assert (np.fabs(diffs[1:] - [sec, sec * 2, sec]) < 1e-8).all() + + _, ax = self.plt.subplots() + df2 = df.copy() + df2.index = df.index.astype(object) + df2.plot(ax=ax) + diffs = Series(ax.get_lines()[0].get_xydata()[:, 0]).diff() + assert (np.fabs(diffs[1:] - sec) < 1e-8).all() + + def test_irregular_datetime64_repr_bug(self): + ser = tm.makeTimeSeries() + ser = ser[[0, 1, 2, 7]] + + _, ax = self.plt.subplots() + + ret = ser.plot(ax=ax) + assert ret is not None + + for rs, xp in zip(ax.get_lines()[0].get_xdata(), ser.index): + assert rs == xp + + def test_business_freq(self): + bts = tm.makePeriodSeries() + _, ax = self.plt.subplots() + bts.plot(ax=ax) + assert ax.get_lines()[0].get_xydata()[0, 0] == bts.index[0].ordinal + idx = ax.get_lines()[0].get_xdata() + assert PeriodIndex(data=idx).freqstr == "B" + + @pytest.mark.slow + def test_business_freq_convert(self): + bts = tm.makeTimeSeries(300).asfreq("BM") + ts = bts.to_period("M") + _, ax = self.plt.subplots() + bts.plot(ax=ax) + assert ax.get_lines()[0].get_xydata()[0, 0] == ts.index[0].ordinal + idx = ax.get_lines()[0].get_xdata() + assert PeriodIndex(data=idx).freqstr == "M" + + def test_nonzero_base(self): + # GH2571 + idx = date_range("2012-12-20", periods=24, freq="H") + timedelta(minutes=30) + df = DataFrame(np.arange(24), index=idx) + _, ax = self.plt.subplots() + df.plot(ax=ax) + rs = ax.get_lines()[0].get_xdata() + assert not Index(rs).is_normalized + + def test_dataframe(self): + bts = DataFrame({"a": tm.makeTimeSeries()}) + _, ax = self.plt.subplots() + bts.plot(ax=ax) + idx = ax.get_lines()[0].get_xdata() + tm.assert_index_equal(bts.index.to_period(), PeriodIndex(idx)) + + @pytest.mark.slow + def test_axis_limits(self): + def _test(ax): + xlim = ax.get_xlim() + ax.set_xlim(xlim[0] - 5, xlim[1] + 10) + result = ax.get_xlim() + assert result[0] == xlim[0] - 5 + assert result[1] == xlim[1] + 10 + + # string + expected = (Period("1/1/2000", ax.freq), Period("4/1/2000", ax.freq)) + ax.set_xlim("1/1/2000", "4/1/2000") + result = ax.get_xlim() + assert int(result[0]) == expected[0].ordinal + assert int(result[1]) == expected[1].ordinal + + # datetime + expected = (Period("1/1/2000", ax.freq), Period("4/1/2000", ax.freq)) + ax.set_xlim(datetime(2000, 1, 1), datetime(2000, 4, 1)) + result = ax.get_xlim() + assert int(result[0]) == expected[0].ordinal + assert int(result[1]) == expected[1].ordinal + fig = ax.get_figure() + self.plt.close(fig) + + ser = tm.makeTimeSeries() + _, ax = self.plt.subplots() + ser.plot(ax=ax) + _test(ax) + + _, ax = self.plt.subplots() + df = DataFrame({"a": ser, "b": ser + 1}) + df.plot(ax=ax) + _test(ax) + + df = DataFrame({"a": ser, "b": ser + 1}) + axes = df.plot(subplots=True) + + for ax in axes: + _test(ax) + + def test_get_finder(self): + import pandas.plotting._matplotlib.converter as conv + + assert conv.get_finder("B") == conv._daily_finder + assert conv.get_finder("D") == conv._daily_finder + assert conv.get_finder("M") == conv._monthly_finder + assert conv.get_finder("Q") == conv._quarterly_finder + assert conv.get_finder("A") == conv._annual_finder + assert conv.get_finder("W") == conv._daily_finder + + @pytest.mark.slow + def test_finder_daily(self): + day_lst = [10, 40, 252, 400, 950, 2750, 10000] + + xpl1 = xpl2 = [Period("1999-1-1", freq="B").ordinal] * len(day_lst) + rs1 = [] + rs2 = [] + for i, n in enumerate(day_lst): + rng = bdate_range("1999-1-1", periods=n) + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs1.append(xaxis.get_majorticklocs()[0]) + + vmin, vmax = ax.get_xlim() + ax.set_xlim(vmin + 0.9, vmax) + rs2.append(xaxis.get_majorticklocs()[0]) + self.plt.close(ax.get_figure()) + + assert rs1 == xpl1 + assert rs2 == xpl2 + + @pytest.mark.slow + def test_finder_quarterly(self): + yrs = [3.5, 11] + + xpl1 = xpl2 = [Period("1988Q1").ordinal] * len(yrs) + rs1 = [] + rs2 = [] + for i, n in enumerate(yrs): + rng = period_range("1987Q2", periods=int(n * 4), freq="Q") + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs1.append(xaxis.get_majorticklocs()[0]) + + (vmin, vmax) = ax.get_xlim() + ax.set_xlim(vmin + 0.9, vmax) + rs2.append(xaxis.get_majorticklocs()[0]) + self.plt.close(ax.get_figure()) + + assert rs1 == xpl1 + assert rs2 == xpl2 + + @pytest.mark.slow + def test_finder_monthly(self): + yrs = [1.15, 2.5, 4, 11] + + xpl1 = xpl2 = [Period("Jan 1988").ordinal] * len(yrs) + rs1 = [] + rs2 = [] + for i, n in enumerate(yrs): + rng = period_range("1987Q2", periods=int(n * 12), freq="M") + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs1.append(xaxis.get_majorticklocs()[0]) + + vmin, vmax = ax.get_xlim() + ax.set_xlim(vmin + 0.9, vmax) + rs2.append(xaxis.get_majorticklocs()[0]) + self.plt.close(ax.get_figure()) + + assert rs1 == xpl1 + assert rs2 == xpl2 + + def test_finder_monthly_long(self): + rng = period_range("1988Q1", periods=24 * 12, freq="M") + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs = xaxis.get_majorticklocs()[0] + xp = Period("1989Q1", "M").ordinal + assert rs == xp + + @pytest.mark.slow + def test_finder_annual(self): + xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] + xp = [Period(x, freq="A").ordinal for x in xp] + rs = [] + for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]): + rng = period_range("1987", periods=nyears, freq="A") + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs.append(xaxis.get_majorticklocs()[0]) + self.plt.close(ax.get_figure()) + + assert rs == xp + + @pytest.mark.slow + def test_finder_minutely(self): + nminutes = 50 * 24 * 60 + rng = date_range("1/1/1999", freq="Min", periods=nminutes) + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs = xaxis.get_majorticklocs()[0] + xp = Period("1/1/1999", freq="Min").ordinal + + assert rs == xp + + def test_finder_hourly(self): + nhours = 23 + rng = date_range("1/1/1999", freq="H", periods=nhours) + ser = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + ser.plot(ax=ax) + xaxis = ax.get_xaxis() + rs = xaxis.get_majorticklocs()[0] + xp = Period("1/1/1999", freq="H").ordinal + + assert rs == xp + + @pytest.mark.slow + def test_gaps(self): + ts = tm.makeTimeSeries() + ts[5:25] = np.nan + _, ax = self.plt.subplots() + ts.plot(ax=ax) + lines = ax.get_lines() + assert len(lines) == 1 + line = lines[0] + data = line.get_xydata() + + if self.mpl_ge_3_0_0 or not self.mpl_ge_2_2_3: + data = np.ma.MaskedArray(data, mask=isna(data), fill_value=np.nan) + + assert isinstance(data, np.ma.core.MaskedArray) + mask = data.mask + assert mask[5:25, 1].all() + self.plt.close(ax.get_figure()) + + # irregular + ts = tm.makeTimeSeries() + ts = ts[[0, 1, 2, 5, 7, 9, 12, 15, 20]] + ts[2:5] = np.nan + _, ax = self.plt.subplots() + ax = ts.plot(ax=ax) + lines = ax.get_lines() + assert len(lines) == 1 + line = lines[0] + data = line.get_xydata() + + if self.mpl_ge_3_0_0 or not self.mpl_ge_2_2_3: + data = np.ma.MaskedArray(data, mask=isna(data), fill_value=np.nan) + + assert isinstance(data, np.ma.core.MaskedArray) + mask = data.mask + assert mask[2:5, 1].all() + self.plt.close(ax.get_figure()) + + # non-ts + idx = [0, 1, 2, 5, 7, 9, 12, 15, 20] + ser = Series(np.random.randn(len(idx)), idx) + ser[2:5] = np.nan + _, ax = self.plt.subplots() + ser.plot(ax=ax) + lines = ax.get_lines() + assert len(lines) == 1 + line = lines[0] + data = line.get_xydata() + if self.mpl_ge_3_0_0 or not self.mpl_ge_2_2_3: + data = np.ma.MaskedArray(data, mask=isna(data), fill_value=np.nan) + + assert isinstance(data, np.ma.core.MaskedArray) + mask = data.mask + assert mask[2:5, 1].all() + + @pytest.mark.slow + def test_gap_upsample(self): + low = tm.makeTimeSeries() + low[5:25] = np.nan + _, ax = self.plt.subplots() + low.plot(ax=ax) + + idxh = date_range(low.index[0], low.index[-1], freq="12h") + s = Series(np.random.randn(len(idxh)), idxh) + s.plot(secondary_y=True) + lines = ax.get_lines() + assert len(lines) == 1 + assert len(ax.right_ax.get_lines()) == 1 + + line = lines[0] + data = line.get_xydata() + if self.mpl_ge_3_0_0 or not self.mpl_ge_2_2_3: + data = np.ma.MaskedArray(data, mask=isna(data), fill_value=np.nan) + + assert isinstance(data, np.ma.core.MaskedArray) + mask = data.mask + assert mask[5:25, 1].all() + + @pytest.mark.slow + def test_secondary_y(self): + ser = Series(np.random.randn(10)) + ser2 = Series(np.random.randn(10)) + fig, _ = self.plt.subplots() + ax = ser.plot(secondary_y=True) + assert hasattr(ax, "left_ax") + assert not hasattr(ax, "right_ax") + axes = fig.get_axes() + line = ax.get_lines()[0] + xp = Series(line.get_ydata(), line.get_xdata()) + tm.assert_series_equal(ser, xp) + assert ax.get_yaxis().get_ticks_position() == "right" + assert not axes[0].get_yaxis().get_visible() + self.plt.close(fig) + + _, ax2 = self.plt.subplots() + ser2.plot(ax=ax2) + assert ax2.get_yaxis().get_ticks_position() == self.default_tick_position + self.plt.close(ax2.get_figure()) + + ax = ser2.plot() + ax2 = ser.plot(secondary_y=True) + assert ax.get_yaxis().get_visible() + assert not hasattr(ax, "left_ax") + assert hasattr(ax, "right_ax") + assert hasattr(ax2, "left_ax") + assert not hasattr(ax2, "right_ax") + + @pytest.mark.slow + def test_secondary_y_ts(self): + idx = date_range("1/1/2000", periods=10) + ser = Series(np.random.randn(10), idx) + ser2 = Series(np.random.randn(10), idx) + fig, _ = self.plt.subplots() + ax = ser.plot(secondary_y=True) + assert hasattr(ax, "left_ax") + assert not hasattr(ax, "right_ax") + axes = fig.get_axes() + line = ax.get_lines()[0] + xp = Series(line.get_ydata(), line.get_xdata()).to_timestamp() + tm.assert_series_equal(ser, xp) + assert ax.get_yaxis().get_ticks_position() == "right" + assert not axes[0].get_yaxis().get_visible() + self.plt.close(fig) + + _, ax2 = self.plt.subplots() + ser2.plot(ax=ax2) + assert ax2.get_yaxis().get_ticks_position() == self.default_tick_position + self.plt.close(ax2.get_figure()) + + ax = ser2.plot() + ax2 = ser.plot(secondary_y=True) + assert ax.get_yaxis().get_visible() + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_secondary_kde(self): + + ser = Series(np.random.randn(10)) + fig, ax = self.plt.subplots() + ax = ser.plot(secondary_y=True, kind="density", ax=ax) + assert hasattr(ax, "left_ax") + assert not hasattr(ax, "right_ax") + axes = fig.get_axes() + assert axes[1].get_yaxis().get_ticks_position() == "right" + + @pytest.mark.slow + def test_secondary_bar(self): + ser = Series(np.random.randn(10)) + fig, ax = self.plt.subplots() + ser.plot(secondary_y=True, kind="bar", ax=ax) + axes = fig.get_axes() + assert axes[1].get_yaxis().get_ticks_position() == "right" + + @pytest.mark.slow + def test_secondary_frame(self): + df = DataFrame(np.random.randn(5, 3), columns=["a", "b", "c"]) + axes = df.plot(secondary_y=["a", "c"], subplots=True) + assert axes[0].get_yaxis().get_ticks_position() == "right" + assert axes[1].get_yaxis().get_ticks_position() == self.default_tick_position + assert axes[2].get_yaxis().get_ticks_position() == "right" + + @pytest.mark.slow + def test_secondary_bar_frame(self): + df = DataFrame(np.random.randn(5, 3), columns=["a", "b", "c"]) + axes = df.plot(kind="bar", secondary_y=["a", "c"], subplots=True) + assert axes[0].get_yaxis().get_ticks_position() == "right" + assert axes[1].get_yaxis().get_ticks_position() == self.default_tick_position + assert axes[2].get_yaxis().get_ticks_position() == "right" + + def test_mixed_freq_regular_first(self): + # TODO + s1 = tm.makeTimeSeries() + s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]] + + # it works! + _, ax = self.plt.subplots() + s1.plot(ax=ax) + + ax2 = s2.plot(style="g", ax=ax) + lines = ax2.get_lines() + idx1 = PeriodIndex(lines[0].get_xdata()) + idx2 = PeriodIndex(lines[1].get_xdata()) + + tm.assert_index_equal(idx1, s1.index.to_period("B")) + tm.assert_index_equal(idx2, s2.index.to_period("B")) + + left, right = ax2.get_xlim() + pidx = s1.index.to_period() + assert left <= pidx[0].ordinal + assert right >= pidx[-1].ordinal + + @pytest.mark.slow + def test_mixed_freq_irregular_first(self): + s1 = tm.makeTimeSeries() + s2 = s1[[0, 5, 10, 11, 12, 13, 14, 15]] + _, ax = self.plt.subplots() + s2.plot(style="g", ax=ax) + s1.plot(ax=ax) + assert not hasattr(ax, "freq") + lines = ax.get_lines() + x1 = lines[0].get_xdata() + tm.assert_numpy_array_equal(x1, s2.index.astype(object).values) + x2 = lines[1].get_xdata() + tm.assert_numpy_array_equal(x2, s1.index.astype(object).values) + + def test_mixed_freq_regular_first_df(self): + # GH 9852 + s1 = tm.makeTimeSeries().to_frame() + s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :] + _, ax = self.plt.subplots() + s1.plot(ax=ax) + ax2 = s2.plot(style="g", ax=ax) + lines = ax2.get_lines() + idx1 = PeriodIndex(lines[0].get_xdata()) + idx2 = PeriodIndex(lines[1].get_xdata()) + assert idx1.equals(s1.index.to_period("B")) + assert idx2.equals(s2.index.to_period("B")) + left, right = ax2.get_xlim() + pidx = s1.index.to_period() + assert left <= pidx[0].ordinal + assert right >= pidx[-1].ordinal + + @pytest.mark.slow + def test_mixed_freq_irregular_first_df(self): + # GH 9852 + s1 = tm.makeTimeSeries().to_frame() + s2 = s1.iloc[[0, 5, 10, 11, 12, 13, 14, 15], :] + _, ax = self.plt.subplots() + s2.plot(style="g", ax=ax) + s1.plot(ax=ax) + assert not hasattr(ax, "freq") + lines = ax.get_lines() + x1 = lines[0].get_xdata() + tm.assert_numpy_array_equal(x1, s2.index.astype(object).values) + x2 = lines[1].get_xdata() + tm.assert_numpy_array_equal(x2, s1.index.astype(object).values) + + def test_mixed_freq_hf_first(self): + idxh = date_range("1/1/1999", periods=365, freq="D") + idxl = date_range("1/1/1999", periods=12, freq="M") + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + _, ax = self.plt.subplots() + high.plot(ax=ax) + low.plot(ax=ax) + for l in ax.get_lines(): + assert PeriodIndex(data=l.get_xdata()).freq == "D" + + @pytest.mark.slow + def test_mixed_freq_alignment(self): + ts_ind = date_range("2012-01-01 13:00", "2012-01-02", freq="H") + ts_data = np.random.randn(12) + + ts = Series(ts_data, index=ts_ind) + ts2 = ts.asfreq("T").interpolate() + + _, ax = self.plt.subplots() + ax = ts.plot(ax=ax) + ts2.plot(style="r", ax=ax) + + assert ax.lines[0].get_xdata()[0] == ax.lines[1].get_xdata()[0] + + @pytest.mark.slow + def test_mixed_freq_lf_first(self): + + idxh = date_range("1/1/1999", periods=365, freq="D") + idxl = date_range("1/1/1999", periods=12, freq="M") + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + _, ax = self.plt.subplots() + low.plot(legend=True, ax=ax) + high.plot(legend=True, ax=ax) + for l in ax.get_lines(): + assert PeriodIndex(data=l.get_xdata()).freq == "D" + leg = ax.get_legend() + assert len(leg.texts) == 2 + self.plt.close(ax.get_figure()) + + idxh = date_range("1/1/1999", periods=240, freq="T") + idxl = date_range("1/1/1999", periods=4, freq="H") + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + _, ax = self.plt.subplots() + low.plot(ax=ax) + high.plot(ax=ax) + for l in ax.get_lines(): + assert PeriodIndex(data=l.get_xdata()).freq == "T" + + def test_mixed_freq_irreg_period(self): + ts = tm.makeTimeSeries() + irreg = ts[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]] + rng = period_range("1/3/2000", periods=30, freq="B") + ps = Series(np.random.randn(len(rng)), rng) + _, ax = self.plt.subplots() + irreg.plot(ax=ax) + ps.plot(ax=ax) + + def test_mixed_freq_shared_ax(self): + + # GH13341, using sharex=True + idx1 = date_range("2015-01-01", periods=3, freq="M") + idx2 = idx1[:1].union(idx1[2:]) + s1 = Series(range(len(idx1)), idx1) + s2 = Series(range(len(idx2)), idx2) + + fig, (ax1, ax2) = self.plt.subplots(nrows=2, sharex=True) + s1.plot(ax=ax1) + s2.plot(ax=ax2) + + assert ax1.freq == "M" + assert ax2.freq == "M" + assert ax1.lines[0].get_xydata()[0, 0] == ax2.lines[0].get_xydata()[0, 0] + + # using twinx + fig, ax1 = self.plt.subplots() + ax2 = ax1.twinx() + s1.plot(ax=ax1) + s2.plot(ax=ax2) + + assert ax1.lines[0].get_xydata()[0, 0] == ax2.lines[0].get_xydata()[0, 0] + + # TODO (GH14330, GH14322) + # plotting the irregular first does not yet work + # fig, ax1 = plt.subplots() + # ax2 = ax1.twinx() + # s2.plot(ax=ax1) + # s1.plot(ax=ax2) + # assert (ax1.lines[0].get_xydata()[0, 0] == + # ax2.lines[0].get_xydata()[0, 0]) + + def test_nat_handling(self): + + _, ax = self.plt.subplots() + + dti = DatetimeIndex(["2015-01-01", NaT, "2015-01-03"]) + s = Series(range(len(dti)), dti) + s.plot(ax=ax) + xdata = ax.get_lines()[0].get_xdata() + # plot x data is bounded by index values + assert s.index.min() <= Series(xdata).min() + assert Series(xdata).max() <= s.index.max() + + @pytest.mark.slow + def test_to_weekly_resampling(self): + idxh = date_range("1/1/1999", periods=52, freq="W") + idxl = date_range("1/1/1999", periods=12, freq="M") + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + _, ax = self.plt.subplots() + high.plot(ax=ax) + low.plot(ax=ax) + for l in ax.get_lines(): + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq + + @pytest.mark.slow + def test_from_weekly_resampling(self): + idxh = date_range("1/1/1999", periods=52, freq="W") + idxl = date_range("1/1/1999", periods=12, freq="M") + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + _, ax = self.plt.subplots() + low.plot(ax=ax) + high.plot(ax=ax) + + expected_h = idxh.to_period().asi8.astype(np.float64) + expected_l = np.array( + [1514, 1519, 1523, 1527, 1531, 1536, 1540, 1544, 1549, 1553, 1558, 1562], + dtype=np.float64, + ) + for l in ax.get_lines(): + assert PeriodIndex(data=l.get_xdata()).freq == idxh.freq + xdata = l.get_xdata(orig=False) + if len(xdata) == 12: # idxl lines + tm.assert_numpy_array_equal(xdata, expected_l) + else: + tm.assert_numpy_array_equal(xdata, expected_h) + tm.close() + + @pytest.mark.slow + def test_from_resampling_area_line_mixed(self): + idxh = date_range("1/1/1999", periods=52, freq="W") + idxl = date_range("1/1/1999", periods=12, freq="M") + high = DataFrame(np.random.rand(len(idxh), 3), index=idxh, columns=[0, 1, 2]) + low = DataFrame(np.random.rand(len(idxl), 3), index=idxl, columns=[0, 1, 2]) + + # low to high + for kind1, kind2 in [("line", "area"), ("area", "line")]: + _, ax = self.plt.subplots() + low.plot(kind=kind1, stacked=True, ax=ax) + high.plot(kind=kind2, stacked=True, ax=ax) + + # check low dataframe result + expected_x = np.array( + [ + 1514, + 1519, + 1523, + 1527, + 1531, + 1536, + 1540, + 1544, + 1549, + 1553, + 1558, + 1562, + ], + dtype=np.float64, + ) + expected_y = np.zeros(len(expected_x), dtype=np.float64) + for i in range(3): + line = ax.lines[i] + assert PeriodIndex(line.get_xdata()).freq == idxh.freq + tm.assert_numpy_array_equal(line.get_xdata(orig=False), expected_x) + # check stacked values are correct + expected_y += low[i].values + tm.assert_numpy_array_equal(line.get_ydata(orig=False), expected_y) + + # check high dataframe result + expected_x = idxh.to_period().asi8.astype(np.float64) + expected_y = np.zeros(len(expected_x), dtype=np.float64) + for i in range(3): + line = ax.lines[3 + i] + assert PeriodIndex(data=line.get_xdata()).freq == idxh.freq + tm.assert_numpy_array_equal(line.get_xdata(orig=False), expected_x) + expected_y += high[i].values + tm.assert_numpy_array_equal(line.get_ydata(orig=False), expected_y) + + # high to low + for kind1, kind2 in [("line", "area"), ("area", "line")]: + _, ax = self.plt.subplots() + high.plot(kind=kind1, stacked=True, ax=ax) + low.plot(kind=kind2, stacked=True, ax=ax) + + # check high dataframe result + expected_x = idxh.to_period().asi8.astype(np.float64) + expected_y = np.zeros(len(expected_x), dtype=np.float64) + for i in range(3): + line = ax.lines[i] + assert PeriodIndex(data=line.get_xdata()).freq == idxh.freq + tm.assert_numpy_array_equal(line.get_xdata(orig=False), expected_x) + expected_y += high[i].values + tm.assert_numpy_array_equal(line.get_ydata(orig=False), expected_y) + + # check low dataframe result + expected_x = np.array( + [ + 1514, + 1519, + 1523, + 1527, + 1531, + 1536, + 1540, + 1544, + 1549, + 1553, + 1558, + 1562, + ], + dtype=np.float64, + ) + expected_y = np.zeros(len(expected_x), dtype=np.float64) + for i in range(3): + lines = ax.lines[3 + i] + assert PeriodIndex(data=lines.get_xdata()).freq == idxh.freq + tm.assert_numpy_array_equal(lines.get_xdata(orig=False), expected_x) + expected_y += low[i].values + tm.assert_numpy_array_equal(lines.get_ydata(orig=False), expected_y) + + @pytest.mark.slow + def test_mixed_freq_second_millisecond(self): + # GH 7772, GH 7760 + idxh = date_range("2014-07-01 09:00", freq="S", periods=50) + idxl = date_range("2014-07-01 09:00", freq="100L", periods=500) + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + # high to low + _, ax = self.plt.subplots() + high.plot(ax=ax) + low.plot(ax=ax) + assert len(ax.get_lines()) == 2 + for l in ax.get_lines(): + assert PeriodIndex(data=l.get_xdata()).freq == "L" + tm.close() + + # low to high + _, ax = self.plt.subplots() + low.plot(ax=ax) + high.plot(ax=ax) + assert len(ax.get_lines()) == 2 + for l in ax.get_lines(): + assert PeriodIndex(data=l.get_xdata()).freq == "L" + + @pytest.mark.slow + def test_irreg_dtypes(self): + # date + idx = [date(2000, 1, 1), date(2000, 1, 5), date(2000, 1, 20)] + df = DataFrame(np.random.randn(len(idx), 3), Index(idx, dtype=object)) + _check_plot_works(df.plot) + + # np.datetime64 + idx = date_range("1/1/2000", periods=10) + idx = idx[[0, 2, 5, 9]].astype(object) + df = DataFrame(np.random.randn(len(idx), 3), idx) + _, ax = self.plt.subplots() + _check_plot_works(df.plot, ax=ax) + + @pytest.mark.slow + def test_time(self): + t = datetime(1, 1, 1, 3, 30, 0) + deltas = np.random.randint(1, 20, 3).cumsum() + ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) + df = DataFrame( + {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts + ) + fig, ax = self.plt.subplots() + df.plot(ax=ax) + + # verify tick labels + ticks = ax.get_xticks() + labels = ax.get_xticklabels() + for t, l in zip(ticks, labels): + m, s = divmod(int(t), 60) + h, m = divmod(m, 60) + rs = l.get_text() + if len(rs) > 0: + if s != 0: + xp = time(h, m, s).strftime("%H:%M:%S") + else: + xp = time(h, m, s).strftime("%H:%M") + assert xp == rs + + @pytest.mark.slow + def test_time_change_xlim(self): + t = datetime(1, 1, 1, 3, 30, 0) + deltas = np.random.randint(1, 20, 3).cumsum() + ts = np.array([(t + timedelta(minutes=int(x))).time() for x in deltas]) + df = DataFrame( + {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts + ) + fig, ax = self.plt.subplots() + df.plot(ax=ax) + + # verify tick labels + ticks = ax.get_xticks() + labels = ax.get_xticklabels() + for t, l in zip(ticks, labels): + m, s = divmod(int(t), 60) + h, m = divmod(m, 60) + rs = l.get_text() + if len(rs) > 0: + if s != 0: + xp = time(h, m, s).strftime("%H:%M:%S") + else: + xp = time(h, m, s).strftime("%H:%M") + assert xp == rs + + # change xlim + ax.set_xlim("1:30", "5:00") + + # check tick labels again + ticks = ax.get_xticks() + labels = ax.get_xticklabels() + for t, l in zip(ticks, labels): + m, s = divmod(int(t), 60) + h, m = divmod(m, 60) + rs = l.get_text() + if len(rs) > 0: + if s != 0: + xp = time(h, m, s).strftime("%H:%M:%S") + else: + xp = time(h, m, s).strftime("%H:%M") + assert xp == rs + + @pytest.mark.slow + def test_time_musec(self): + t = datetime(1, 1, 1, 3, 30, 0) + deltas = np.random.randint(1, 20, 3).cumsum() + ts = np.array([(t + timedelta(microseconds=int(x))).time() for x in deltas]) + df = DataFrame( + {"a": np.random.randn(len(ts)), "b": np.random.randn(len(ts))}, index=ts + ) + fig, ax = self.plt.subplots() + ax = df.plot(ax=ax) + + # verify tick labels + ticks = ax.get_xticks() + labels = ax.get_xticklabels() + for t, l in zip(ticks, labels): + m, s = divmod(int(t), 60) + + us = int(round((t - int(t)) * 1e6)) + + h, m = divmod(m, 60) + rs = l.get_text() + if len(rs) > 0: + if (us % 1000) != 0: + xp = time(h, m, s, us).strftime("%H:%M:%S.%f") + elif (us // 1000) != 0: + xp = time(h, m, s, us).strftime("%H:%M:%S.%f")[:-3] + elif s != 0: + xp = time(h, m, s, us).strftime("%H:%M:%S") + else: + xp = time(h, m, s, us).strftime("%H:%M") + assert xp == rs + + @pytest.mark.slow + def test_secondary_upsample(self): + idxh = date_range("1/1/1999", periods=365, freq="D") + idxl = date_range("1/1/1999", periods=12, freq="M") + high = Series(np.random.randn(len(idxh)), idxh) + low = Series(np.random.randn(len(idxl)), idxl) + _, ax = self.plt.subplots() + low.plot(ax=ax) + ax = high.plot(secondary_y=True, ax=ax) + for l in ax.get_lines(): + assert PeriodIndex(l.get_xdata()).freq == "D" + assert hasattr(ax, "left_ax") + assert not hasattr(ax, "right_ax") + for l in ax.left_ax.get_lines(): + assert PeriodIndex(l.get_xdata()).freq == "D" + + @pytest.mark.slow + def test_secondary_legend(self): + fig = self.plt.figure() + ax = fig.add_subplot(211) + + # ts + df = tm.makeTimeDataFrame() + df.plot(secondary_y=["A", "B"], ax=ax) + leg = ax.get_legend() + assert len(leg.get_lines()) == 4 + assert leg.get_texts()[0].get_text() == "A (right)" + assert leg.get_texts()[1].get_text() == "B (right)" + assert leg.get_texts()[2].get_text() == "C" + assert leg.get_texts()[3].get_text() == "D" + assert ax.right_ax.get_legend() is None + colors = set() + for line in leg.get_lines(): + colors.add(line.get_color()) + + # TODO: color cycle problems + assert len(colors) == 4 + self.plt.close(fig) + + fig = self.plt.figure() + ax = fig.add_subplot(211) + df.plot(secondary_y=["A", "C"], mark_right=False, ax=ax) + leg = ax.get_legend() + assert len(leg.get_lines()) == 4 + assert leg.get_texts()[0].get_text() == "A" + assert leg.get_texts()[1].get_text() == "B" + assert leg.get_texts()[2].get_text() == "C" + assert leg.get_texts()[3].get_text() == "D" + self.plt.close(fig) + + fig, ax = self.plt.subplots() + df.plot(kind="bar", secondary_y=["A"], ax=ax) + leg = ax.get_legend() + assert leg.get_texts()[0].get_text() == "A (right)" + assert leg.get_texts()[1].get_text() == "B" + self.plt.close(fig) + + fig, ax = self.plt.subplots() + df.plot(kind="bar", secondary_y=["A"], mark_right=False, ax=ax) + leg = ax.get_legend() + assert leg.get_texts()[0].get_text() == "A" + assert leg.get_texts()[1].get_text() == "B" + self.plt.close(fig) + + fig = self.plt.figure() + ax = fig.add_subplot(211) + df = tm.makeTimeDataFrame() + ax = df.plot(secondary_y=["C", "D"], ax=ax) + leg = ax.get_legend() + assert len(leg.get_lines()) == 4 + assert ax.right_ax.get_legend() is None + colors = set() + for line in leg.get_lines(): + colors.add(line.get_color()) + + # TODO: color cycle problems + assert len(colors) == 4 + self.plt.close(fig) + + # non-ts + df = tm.makeDataFrame() + fig = self.plt.figure() + ax = fig.add_subplot(211) + ax = df.plot(secondary_y=["A", "B"], ax=ax) + leg = ax.get_legend() + assert len(leg.get_lines()) == 4 + assert ax.right_ax.get_legend() is None + colors = set() + for line in leg.get_lines(): + colors.add(line.get_color()) + + # TODO: color cycle problems + assert len(colors) == 4 + self.plt.close() + + fig = self.plt.figure() + ax = fig.add_subplot(211) + ax = df.plot(secondary_y=["C", "D"], ax=ax) + leg = ax.get_legend() + assert len(leg.get_lines()) == 4 + assert ax.right_ax.get_legend() is None + colors = set() + for line in leg.get_lines(): + colors.add(line.get_color()) + + # TODO: color cycle problems + assert len(colors) == 4 + + def test_format_date_axis(self): + rng = date_range("1/1/2012", periods=12, freq="M") + df = DataFrame(np.random.randn(len(rng), 3), rng) + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) + xaxis = ax.get_xaxis() + for l in xaxis.get_ticklabels(): + if len(l.get_text()) > 0: + assert l.get_rotation() == 30 + + @pytest.mark.slow + def test_ax_plot(self): + x = date_range(start="2012-01-02", periods=10, freq="D") + y = list(range(len(x))) + _, ax = self.plt.subplots() + lines = ax.plot(x, y, label="Y") + tm.assert_index_equal(DatetimeIndex(lines[0].get_xdata()), x) + + @pytest.mark.slow + def test_mpl_nopandas(self): + dates = [date(2008, 12, 31), date(2009, 1, 31)] + values1 = np.arange(10.0, 11.0, 0.5) + values2 = np.arange(11.0, 12.0, 0.5) + + kw = dict(fmt="-", lw=4) + + _, ax = self.plt.subplots() + ax.plot_date([x.toordinal() for x in dates], values1, **kw) + ax.plot_date([x.toordinal() for x in dates], values2, **kw) + + line1, line2 = ax.get_lines() + + exp = np.array([x.toordinal() for x in dates], dtype=np.float64) + tm.assert_numpy_array_equal(line1.get_xydata()[:, 0], exp) + exp = np.array([x.toordinal() for x in dates], dtype=np.float64) + tm.assert_numpy_array_equal(line2.get_xydata()[:, 0], exp) + + @pytest.mark.slow + def test_irregular_ts_shared_ax_xlim(self): + # GH 2960 + ts = tm.makeTimeSeries()[:20] + ts_irregular = ts[[1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 17, 18]] + + # plot the left section of the irregular series, then the right section + _, ax = self.plt.subplots() + ts_irregular[:5].plot(ax=ax) + ts_irregular[5:].plot(ax=ax) + + # check that axis limits are correct + left, right = ax.get_xlim() + assert left <= ts_irregular.index.min().toordinal() + assert right >= ts_irregular.index.max().toordinal() + + @pytest.mark.slow + def test_secondary_y_non_ts_xlim(self): + # GH 3490 - non-timeseries with secondary y + index_1 = [1, 2, 3, 4] + index_2 = [5, 6, 7, 8] + s1 = Series(1, index=index_1) + s2 = Series(2, index=index_2) + + _, ax = self.plt.subplots() + s1.plot(ax=ax) + left_before, right_before = ax.get_xlim() + s2.plot(secondary_y=True, ax=ax) + left_after, right_after = ax.get_xlim() + + assert left_before >= left_after + assert right_before < right_after + + @pytest.mark.slow + def test_secondary_y_regular_ts_xlim(self): + # GH 3490 - regular-timeseries with secondary y + index_1 = date_range(start="2000-01-01", periods=4, freq="D") + index_2 = date_range(start="2000-01-05", periods=4, freq="D") + s1 = Series(1, index=index_1) + s2 = Series(2, index=index_2) + + _, ax = self.plt.subplots() + s1.plot(ax=ax) + left_before, right_before = ax.get_xlim() + s2.plot(secondary_y=True, ax=ax) + left_after, right_after = ax.get_xlim() + + assert left_before >= left_after + assert right_before < right_after + + @pytest.mark.slow + def test_secondary_y_mixed_freq_ts_xlim(self): + # GH 3490 - mixed frequency timeseries with secondary y + rng = date_range("2000-01-01", periods=10000, freq="min") + ts = Series(1, index=rng) + + _, ax = self.plt.subplots() + ts.plot(ax=ax) + left_before, right_before = ax.get_xlim() + ts.resample("D").mean().plot(secondary_y=True, ax=ax) + left_after, right_after = ax.get_xlim() + + # a downsample should not have changed either limit + assert left_before == left_after + assert right_before == right_after + + @pytest.mark.slow + def test_secondary_y_irregular_ts_xlim(self): + # GH 3490 - irregular-timeseries with secondary y + ts = tm.makeTimeSeries()[:20] + ts_irregular = ts[[1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, 17, 18]] + + _, ax = self.plt.subplots() + ts_irregular[:5].plot(ax=ax) + # plot higher-x values on secondary axis + ts_irregular[5:].plot(secondary_y=True, ax=ax) + # ensure secondary limits aren't overwritten by plot on primary + ts_irregular[:5].plot(ax=ax) + + left, right = ax.get_xlim() + assert left <= ts_irregular.index.min().toordinal() + assert right >= ts_irregular.index.max().toordinal() + + def test_plot_outofbounds_datetime(self): + # 2579 - checking this does not raise + values = [date(1677, 1, 1), date(1677, 1, 2)] + _, ax = self.plt.subplots() + ax.plot(values) + + values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] + ax.plot(values) + + def test_format_timedelta_ticks_narrow(self): + + expected_labels = [f"00:00:00.0000000{i:0>2d}" for i in np.arange(10)] + + rng = timedelta_range("0", periods=10, freq="ns") + df = DataFrame(np.random.randn(len(rng), 3), rng) + fig, ax = self.plt.subplots() + df.plot(fontsize=2, ax=ax) + self.plt.draw() + labels = ax.get_xticklabels() + + result_labels = [x.get_text() for x in labels] + assert len(result_labels) == len(expected_labels) + assert result_labels == expected_labels + + def test_format_timedelta_ticks_wide(self): + expected_labels = [ + "00:00:00", + "1 days 03:46:40", + "2 days 07:33:20", + "3 days 11:20:00", + "4 days 15:06:40", + "5 days 18:53:20", + "6 days 22:40:00", + "8 days 02:26:40", + "9 days 06:13:20", + ] + + rng = timedelta_range("0", periods=10, freq="1 d") + df = DataFrame(np.random.randn(len(rng), 3), rng) + fig, ax = self.plt.subplots() + ax = df.plot(fontsize=2, ax=ax) + self.plt.draw() + labels = ax.get_xticklabels() + + result_labels = [x.get_text() for x in labels] + assert len(result_labels) == len(expected_labels) + assert result_labels == expected_labels + + def test_timedelta_plot(self): + # test issue #8711 + s = Series(range(5), timedelta_range("1day", periods=5)) + _, ax = self.plt.subplots() + _check_plot_works(s.plot, ax=ax) + + # test long period + index = timedelta_range("1 day 2 hr 30 min 10 s", periods=10, freq="1 d") + s = Series(np.random.randn(len(index)), index) + _, ax = self.plt.subplots() + _check_plot_works(s.plot, ax=ax) + + # test short period + index = timedelta_range("1 day 2 hr 30 min 10 s", periods=10, freq="1 ns") + s = Series(np.random.randn(len(index)), index) + _, ax = self.plt.subplots() + _check_plot_works(s.plot, ax=ax) + + def test_hist(self): + # https://github.com/matplotlib/matplotlib/issues/8459 + rng = date_range("1/1/2011", periods=10, freq="H") + x = rng + w1 = np.arange(0, 1, 0.1) + w2 = np.arange(0, 1, 0.1)[::-1] + _, ax = self.plt.subplots() + ax.hist([x, x], weights=[w1, w2]) + + @pytest.mark.slow + def test_overlapping_datetime(self): + # GB 6608 + s1 = Series( + [1, 2, 3], + index=[ + datetime(1995, 12, 31), + datetime(2000, 12, 31), + datetime(2005, 12, 31), + ], + ) + s2 = Series( + [1, 2, 3], + index=[ + datetime(1997, 12, 31), + datetime(2003, 12, 31), + datetime(2008, 12, 31), + ], + ) + + # plot first series, then add the second series to those axes, + # then try adding the first series again + _, ax = self.plt.subplots() + s1.plot(ax=ax) + s2.plot(ax=ax) + s1.plot(ax=ax) + + @pytest.mark.xfail(reason="GH9053 matplotlib does not use ax.xaxis.converter") + def test_add_matplotlib_datetime64(self): + # GH9053 - ensure that a plot with PeriodConverter still understands + # datetime64 data. This still fails because matplotlib overrides the + # ax.xaxis.converter with a DatetimeConverter + s = Series(np.random.randn(10), index=date_range("1970-01-02", periods=10)) + ax = s.plot() + ax.plot(s.index, s.values, color="g") + l1, l2 = ax.lines + tm.assert_numpy_array_equal(l1.get_xydata(), l2.get_xydata()) + + def test_matplotlib_scatter_datetime64(self): + # https://github.com/matplotlib/matplotlib/issues/11391 + df = DataFrame(np.random.RandomState(0).rand(10, 2), columns=["x", "y"]) + df["time"] = date_range("2018-01-01", periods=10, freq="D") + fig, ax = self.plt.subplots() + ax.scatter(x="time", y="y", data=df) + self.plt.draw() + label = ax.get_xticklabels()[0] + if self.mpl_ge_3_2_0: + expected = "2018-01-01" + elif self.mpl_ge_3_0_0: + expected = "2017-12-08" + else: + expected = "2017-12-12" + assert label.get_text() == expected + + +def _check_plot_works(f, freq=None, series=None, *args, **kwargs): + import matplotlib.pyplot as plt + + fig = plt.gcf() + + try: + plt.clf() + ax = fig.add_subplot(211) + orig_ax = kwargs.pop("ax", plt.gca()) + orig_axfreq = getattr(orig_ax, "freq", None) + + ret = f(*args, **kwargs) + assert ret is not None # do something more intelligent + + ax = kwargs.pop("ax", plt.gca()) + if series is not None: + dfreq = series.index.freq + if isinstance(dfreq, DateOffset): + dfreq = dfreq.rule_code + if orig_axfreq is None: + assert ax.freq == dfreq + + if freq is not None and orig_axfreq is None: + assert ax.freq == freq + + ax = fig.add_subplot(212) + kwargs["ax"] = ax + ret = f(*args, **kwargs) + assert ret is not None # TODO: do something more intelligent + + with tm.ensure_clean(return_filelike=True) as path: + plt.savefig(path) + + # GH18439 + # this is supported only in Python 3 pickle since + # pickle in Python2 doesn't support instancemethod pickling + # TODO(statsmodels 0.10.0): Remove the statsmodels check + # https://github.com/pandas-dev/pandas/issues/24088 + # https://github.com/statsmodels/statsmodels/issues/4772 + if "statsmodels" not in sys.modules: + with tm.ensure_clean(return_filelike=True) as path: + pickle.dump(fig, path) + finally: + plt.close(fig) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py new file mode 100644 index 00000000..1c429baf --- /dev/null +++ b/pandas/tests/plotting/test_frame.py @@ -0,0 +1,3314 @@ +# coding: utf-8 + +""" Test cases for DataFrame.plot """ + +from datetime import date, datetime +import itertools +import string +import warnings + +import numpy as np +from numpy.random import rand, randn +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.api import is_list_like + +import pandas as pd +from pandas import DataFrame, MultiIndex, PeriodIndex, Series, bdate_range, date_range +import pandas._testing as tm +from pandas.core.arrays import integer_array +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +from pandas.io.formats.printing import pprint_thing +import pandas.plotting as plotting + + +@td.skip_if_no_mpl +class TestDataFramePlots(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.tdf = tm.makeTimeDataFrame() + self.hexbin_df = DataFrame( + { + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20), + } + ) + + def _assert_ytickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_yticklabels(), visible=exp) + + def _assert_xtickslabels_visibility(self, axes, expected): + for ax, exp in zip(axes, expected): + self._check_visible(ax.get_xticklabels(), visible=exp) + + @pytest.mark.slow + def test_plot(self): + from pandas.plotting._matplotlib.compat import _mpl_ge_3_1_0 + + df = self.tdf + _check_plot_works(df.plot, grid=False) + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot, subplots=True) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot, subplots=True, layout=(-1, 2)) + self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot, subplots=True, use_index=False) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + if _mpl_ge_3_1_0(): + msg = "'Line2D' object has no property 'blarg'" + else: + msg = "Unknown property blarg" + with pytest.raises(AttributeError, match=msg): + df.plot.line(blarg=True) + + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + _check_plot_works(df.plot, use_index=True) + _check_plot_works(df.plot, sort_columns=False) + _check_plot_works(df.plot, yticks=[1, 5, 10]) + _check_plot_works(df.plot, xticks=[1, 5, 10]) + _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100)) + + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.plot, subplots=True, title="blah") + + # We have to redo it here because _check_plot_works does two plots, + # once without an ax kwarg and once with an ax kwarg and the new sharex + # behaviour does not remove the visibility of the latter axis (as ax is + # present). see: https://github.com/pandas-dev/pandas/issues/9737 + + axes = df.plot(subplots=True, title="blah") + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + # axes[0].figure.savefig("test.png") + for ax in axes[:2]: + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible([ax.xaxis.get_label()], visible=False) + for ax in [axes[2]]: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible([ax.xaxis.get_label()]) + self._check_ticks_props(ax, xrot=0) + + _check_plot_works(df.plot, title="blah") + + tuples = zip(string.ascii_letters[:10], range(10)) + df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) + _check_plot_works(df.plot, use_index=True) + + # unicode + index = MultiIndex.from_tuples( + [ + ("\u03b1", 0), + ("\u03b1", 1), + ("\u03b2", 2), + ("\u03b2", 3), + ("\u03b3", 4), + ("\u03b3", 5), + ("\u03b4", 6), + ("\u03b4", 7), + ], + names=["i0", "i1"], + ) + columns = MultiIndex.from_tuples( + [("bar", "\u0394"), ("bar", "\u0395")], names=["c0", "c1"] + ) + df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) + _check_plot_works(df.plot, title="\u03A3") + + # GH 6951 + # Test with single column + df = DataFrame({"x": np.random.rand(10)}) + axes = _check_plot_works(df.plot.bar, subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + axes = _check_plot_works(df.plot.bar, subplots=True, layout=(-1, 1)) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + # When ax is supplied and required number of axes is 1, + # passed ax should be used: + fig, ax = self.plt.subplots() + axes = df.plot.bar(subplots=True, ax=ax) + assert len(axes) == 1 + result = ax.axes + assert result is axes[0] + + def test_integer_array_plot(self): + # GH 25587 + arr = integer_array([1, 2, 3, 4], dtype="UInt32") + + s = Series(arr) + _check_plot_works(s.plot.line) + _check_plot_works(s.plot.bar) + _check_plot_works(s.plot.hist) + _check_plot_works(s.plot.pie) + + df = DataFrame({"x": arr, "y": arr}) + _check_plot_works(df.plot.line) + _check_plot_works(df.plot.bar) + _check_plot_works(df.plot.hist) + _check_plot_works(df.plot.pie, y="y") + _check_plot_works(df.plot.scatter, x="x", y="y") + _check_plot_works(df.plot.hexbin, x="x", y="y") + + def test_mpl2_color_cycle_str(self): + # GH 15516 + colors = ["C" + str(x) for x in range(10)] + df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) + for c in colors: + _check_plot_works(df.plot, color=c) + + def test_color_single_series_list(self): + # GH 3486 + df = DataFrame({"A": [1, 2, 3]}) + _check_plot_works(df.plot, color=["red"]) + + def test_rgb_tuple_color(self): + # GH 16695 + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0)) + _check_plot_works(df.plot, x="x", y="y", color=(1, 0, 0, 0.5)) + + def test_color_empty_string(self): + df = DataFrame(randn(10, 2)) + with pytest.raises(ValueError): + df.plot(color="") + + def test_color_and_style_arguments(self): + df = DataFrame({"x": [1, 2], "y": [3, 4]}) + # passing both 'color' and 'style' arguments should be allowed + # if there is no color symbol in the style strings: + ax = df.plot(color=["red", "black"], style=["-", "--"]) + # check that the linestyles are correctly set: + linestyle = [line.get_linestyle() for line in ax.lines] + assert linestyle == ["-", "--"] + # check that the colors are correctly set: + color = [line.get_color() for line in ax.lines] + assert color == ["red", "black"] + # passing both 'color' and 'style' arguments should not be allowed + # if there is a color symbol in the style strings: + with pytest.raises(ValueError): + df.plot(color=["red", "black"], style=["k-", "r--"]) + + def test_nonnumeric_exclude(self): + df = DataFrame({"A": ["x", "y", "z"], "B": [1, 2, 3]}) + ax = df.plot() + assert len(ax.get_lines()) == 1 # B was plotted + + @pytest.mark.slow + def test_implicit_label(self): + df = DataFrame(randn(10, 3), columns=["a", "b", "c"]) + ax = df.plot(x="a", y="b") + self._check_text_labels(ax.xaxis.get_label(), "a") + + @pytest.mark.slow + def test_donot_overwrite_index_name(self): + # GH 8494 + df = DataFrame(randn(2, 2), columns=["a", "b"]) + df.index.name = "NAME" + df.plot(y="b", label="LABEL") + assert df.index.name == "NAME" + + @pytest.mark.slow + def test_plot_xy(self): + # columns.inferred_type == 'string' + df = self.tdf + self._check_data(df.plot(x=0, y=1), df.set_index("A")["B"].plot()) + self._check_data(df.plot(x=0), df.set_index("A").plot()) + self._check_data(df.plot(y=0), df.B.plot()) + self._check_data(df.plot(x="A", y="B"), df.set_index("A").B.plot()) + self._check_data(df.plot(x="A"), df.set_index("A").plot()) + self._check_data(df.plot(y="B"), df.B.plot()) + + # columns.inferred_type == 'integer' + df.columns = np.arange(1, len(df.columns) + 1) + self._check_data(df.plot(x=1, y=2), df.set_index(1)[2].plot()) + self._check_data(df.plot(x=1), df.set_index(1).plot()) + self._check_data(df.plot(y=1), df[1].plot()) + + # figsize and title + ax = df.plot(x=1, y=2, title="Test", figsize=(16, 8)) + self._check_text_labels(ax.title, "Test") + self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16.0, 8.0)) + + # columns.inferred_type == 'mixed' + # TODO add MultiIndex test + + @pytest.mark.slow + @pytest.mark.parametrize( + "input_log, expected_log", [(True, "log"), ("sym", "symlog")] + ) + def test_logscales(self, input_log, expected_log): + df = DataFrame({"a": np.arange(100)}, index=np.arange(100)) + + ax = df.plot(logy=input_log) + self._check_ax_scales(ax, yaxis=expected_log) + assert ax.get_yscale() == expected_log + + ax = df.plot(logx=input_log) + self._check_ax_scales(ax, xaxis=expected_log) + assert ax.get_xscale() == expected_log + + ax = df.plot(loglog=input_log) + self._check_ax_scales(ax, xaxis=expected_log, yaxis=expected_log) + assert ax.get_xscale() == expected_log + assert ax.get_yscale() == expected_log + + @pytest.mark.parametrize("input_param", ["logx", "logy", "loglog"]) + def test_invalid_logscale(self, input_param): + # GH: 24867 + df = DataFrame({"a": np.arange(100)}, index=np.arange(100)) + + msg = "Boolean, None and 'sym' are valid options, 'sm' is given." + with pytest.raises(ValueError, match=msg): + df.plot(**{input_param: "sm"}) + + @pytest.mark.slow + def test_xcompat(self): + import pandas as pd + + df = self.tdf + ax = df.plot(x_compat=True) + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + + tm.close() + pd.plotting.plot_params["xaxis.compat"] = True + ax = df.plot() + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + + tm.close() + pd.plotting.plot_params["x_compat"] = False + + ax = df.plot() + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex) + + tm.close() + # useful if you're plotting a bunch together + with pd.plotting.plot_params.use("x_compat", True): + ax = df.plot() + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + + tm.close() + ax = df.plot() + lines = ax.get_lines() + assert not isinstance(lines[0].get_xdata(), PeriodIndex) + assert isinstance(PeriodIndex(lines[0].get_xdata()), PeriodIndex) + + def test_period_compat(self): + # GH 9012 + # period-array conversions + df = DataFrame( + np.random.rand(21, 2), + index=bdate_range(datetime(2000, 1, 1), datetime(2000, 1, 31)), + columns=["a", "b"], + ) + + df.plot() + self.plt.axhline(y=0) + tm.close() + + def test_unsorted_index(self): + df = DataFrame( + {"y": np.arange(100)}, index=np.arange(99, -1, -1), dtype=np.int64 + ) + ax = df.plot() + lines = ax.get_lines()[0] + rs = lines.get_xydata() + rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y") + tm.assert_series_equal(rs, df.y, check_index_type=False) + tm.close() + + df.index = pd.Index(np.arange(99, -1, -1), dtype=np.float64) + ax = df.plot() + lines = ax.get_lines()[0] + rs = lines.get_xydata() + rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name="y") + tm.assert_series_equal(rs, df.y) + + def test_unsorted_index_lims(self): + df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0]}, index=[1.0, 0.0, 3.0, 2.0]) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + df = DataFrame( + {"y": [0.0, 1.0, np.nan, 3.0, 4.0, 5.0, 6.0]}, + index=[1.0, 0.0, 3.0, 2.0, np.nan, 3.0, 2.0], + ) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + df = DataFrame({"y": [0.0, 1.0, 2.0, 3.0], "z": [91.0, 90.0, 93.0, 92.0]}) + ax = df.plot(x="z", y="y") + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data()[0]) + assert xmax >= np.nanmax(lines[0].get_data()[0]) + + @pytest.mark.slow + def test_subplots(self): + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + for kind in ["bar", "barh", "line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True, legend=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + assert axes.shape == (3,) + + for ax, column in zip(axes, df.columns): + self._check_legend_labels(ax, labels=[pprint_thing(column)]) + + for ax in axes[:-2]: + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + if not (kind == "bar" and self.mpl_ge_3_1_0): + # change https://github.com/pandas-dev/pandas/issues/26714 + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, sharex=False) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + + axes = df.plot(kind=kind, subplots=True, legend=False) + for ax in axes: + assert ax.get_legend() is None + + def test_groupby_boxplot_sharey(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharey can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # set sharey=True should be identical + axes = df.groupby("c").boxplot(sharey=True) + expected = [True, False, True, False] + self._assert_ytickslabels_visibility(axes, expected) + + # sharey=False, all yticklabels should be visible + axes = df.groupby("c").boxplot(sharey=False) + expected = [True, True, True, True] + self._assert_ytickslabels_visibility(axes, expected) + + def test_groupby_boxplot_sharex(self): + # https://github.com/pandas-dev/pandas/issues/20968 + # sharex can now be switched check whether the right + # pair of axes is turned on or off + + df = DataFrame( + { + "a": [-1.43, -0.15, -3.70, -1.43, -0.14], + "b": [0.56, 0.84, 0.29, 0.56, 0.85], + "c": [0, 1, 2, 3, 1], + }, + index=[0, 1, 2, 3, 4], + ) + + # behavior without keyword + axes = df.groupby("c").boxplot() + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # set sharex=False should be identical + axes = df.groupby("c").boxplot(sharex=False) + expected = [True, True, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + # sharex=True, yticklabels should be visible + # only for bottom plots + axes = df.groupby("c").boxplot(sharex=True) + expected = [False, False, True, True] + self._assert_xtickslabels_visibility(axes, expected) + + @pytest.mark.slow + def test_subplots_timeseries(self): + idx = date_range(start="2014-07-01", freq="M", periods=10) + df = DataFrame(np.random.rand(10, 3), index=idx) + + for kind in ["line", "area"]: + axes = df.plot(kind=kind, subplots=True, sharex=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + + for ax in axes[:-2]: + # GH 7801 + self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible(ax.xaxis.get_label(), visible=False) + self._check_visible(ax.get_yticklabels()) + + self._check_visible(axes[-1].xaxis) + self._check_visible(axes[-1].get_xticklabels()) + self._check_visible(axes[-1].get_xticklabels(minor=True)) + self._check_visible(axes[-1].xaxis.get_label()) + self._check_visible(axes[-1].get_yticklabels()) + self._check_ticks_props(axes, xrot=0) + + axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) + for ax in axes: + self._check_visible(ax.xaxis) + self._check_visible(ax.get_xticklabels()) + self._check_visible(ax.get_xticklabels(minor=True)) + self._check_visible(ax.xaxis.get_label()) + self._check_visible(ax.get_yticklabels()) + self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = { + "numeric": np.array([1, 2, 5]), + "timedelta": [ + pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h"), + ], + "datetime_no_tz": [ + pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + "datetime_all_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00", utc=True), + pd.to_datetime("2017-08-02 00:00:00", utc=True), + ], + "text": ["This", "should", "fail"], + } + testdata = DataFrame(data) + + ax_numeric = testdata.plot(y="numeric") + assert ( + ax_numeric.get_lines()[0].get_data()[1] == testdata["numeric"].values + ).all() + ax_timedelta = testdata.plot(y="timedelta") + assert ( + ax_timedelta.get_lines()[0].get_data()[1] == testdata["timedelta"].values + ).all() + ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") + assert ( + ax_datetime_no_tz.get_lines()[0].get_data()[1] + == testdata["datetime_no_tz"].values + ).all() + ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") + assert ( + ax_datetime_all_tz.get_lines()[0].get_data()[1] + == testdata["datetime_all_tz"].values + ).all() + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + testdata.plot(y="text") + + @pytest.mark.xfail(reason="not support for period, categorical, datetime_mixed_tz") + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formatter (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formatter (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handles ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = { + "numeric": np.array([1, 2, 5]), + "period": [ + pd.Period("2017-08-01 00:00:00", freq="H"), + pd.Period("2017-08-01 02:00", freq="H"), + pd.Period("2017-08-02 00:00:00", freq="H"), + ], + "categorical": pd.Categorical( + ["c", "b", "a"], categories=["a", "b", "c"], ordered=False + ), + "datetime_mixed_tz": [ + pd.to_datetime("2017-08-01 00:00:00", utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00"), + ], + } + testdata = pd.DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert ( + ax_period.get_lines()[0].get_data()[1] == testdata["period"].values + ).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert ( + ax_categorical.get_lines()[0].get_data()[1] + == testdata["categorical"].values + ).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", y="datetime_mixed_tz") + assert ( + ax_datetime_mixed_tz.get_lines()[0].get_data()[1] + == testdata["datetime_mixed_tz"].values + ).all() + + @pytest.mark.slow + def test_subplots_layout(self): + # GH 6667 + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, layout=(2, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(-1, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(2, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert axes.shape == (2, 2) + + axes = df.plot(subplots=True, layout=(1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(-1, 4)) + self._check_axes_shape(axes, axes_num=3, layout=(1, 4)) + assert axes.shape == (1, 4) + + axes = df.plot(subplots=True, layout=(4, -1)) + self._check_axes_shape(axes, axes_num=3, layout=(4, 1)) + assert axes.shape == (4, 1) + + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(1, 1)) + with pytest.raises(ValueError): + df.plot(subplots=True, layout=(-1, -1)) + + # single column + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + axes = df.plot(subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + axes = df.plot(subplots=True, layout=(3, 3)) + self._check_axes_shape(axes, axes_num=1, layout=(3, 3)) + assert axes.shape == (3, 3) + + @pytest.mark.slow + def test_subplots_warnings(self): + # GH 9464 + with tm.assert_produces_warning(None): + df = DataFrame(np.random.randn(100, 4)) + df.plot(subplots=True, layout=(3, 2)) + + df = DataFrame( + np.random.randn(100, 4), index=date_range("1/1/2000", periods=100) + ) + df.plot(subplots=True, layout=(3, 2)) + + @pytest.mark.slow + def test_subplots_multiple_axes(self): + # GH 5353, 6970, GH 7069 + fig, axes = self.plt.subplots(2, 3) + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + + returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + # draw on second row + returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + assert returned.shape == (3,) + assert returned[0].figure is fig + self._check_axes_shape(axes, axes_num=6, layout=(2, 3)) + tm.close() + + with pytest.raises(ValueError): + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + df.plot(subplots=True, ax=axes) + + # pass 2-dim axes and invalid layout + # invalid lauout should not affect to input and return value + # (show warning is tested in + # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes + fig, axes = self.plt.subplots(2, 2) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) + df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, 1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(2, -1), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + returned = df.plot( + subplots=True, ax=axes, layout=(-1, 2), sharex=False, sharey=False + ) + self._check_axes_shape(returned, axes_num=4, layout=(2, 2)) + assert returned.shape == (4,) + + # single column + fig, axes = self.plt.subplots(1, 1) + df = DataFrame(np.random.rand(10, 1), index=list(string.ascii_letters[:10])) + + axes = df.plot(subplots=True, ax=[axes], sharex=False, sharey=False) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + assert axes.shape == (1,) + + def test_subplots_ts_share_axes(self): + # GH 3964 + fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) + self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) + df = DataFrame( + np.random.randn(10, 9), + index=date_range(start="2014-07-01", freq="M", periods=10), + ) + for i, ax in enumerate(axes.ravel()): + df[i].plot(ax=ax, fontsize=5) + + # Rows other than bottom should not be visible + for ax in axes[0:-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=False) + + # Bottom row should be visible + for ax in axes[-1].ravel(): + self._check_visible(ax.get_xticklabels(), visible=True) + + # First column should be visible + for ax in axes[[0, 1, 2], [0]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + # Other columns should not be visible + for ax in axes[[0, 1, 2], [1]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + for ax in axes[[0, 1, 2], [2]].ravel(): + self._check_visible(ax.get_yticklabels(), visible=False) + + def test_subplots_sharex_axes_existing_axes(self): + # GH 9158 + d = {"A": [1.0, 2.0, 3.0, 4.0], "B": [4.0, 3.0, 2.0, 1.0], "C": [5, 1, 3, 4]} + df = DataFrame(d, index=date_range("2014 10 11", "2014 10 14")) + + axes = df[["A", "B"]].plot(subplots=True) + df["C"].plot(ax=axes[0], secondary_y=True) + + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + for ax in axes.ravel(): + self._check_visible(ax.get_yticklabels(), visible=True) + + @pytest.mark.slow + def test_subplots_dup_columns(self): + # GH 10962 + df = DataFrame(np.random.rand(5, 5), columns=list("aaaaa")) + axes = df.plot(subplots=True) + for ax in axes: + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + axes = df.plot(subplots=True, secondary_y="a") + for ax in axes: + # (right) is only attached when subplots=False + self._check_legend_labels(ax, labels=["a"]) + assert len(ax.lines) == 1 + tm.close() + + ax = df.plot(secondary_y="a") + self._check_legend_labels(ax, labels=["a (right)"] * 5) + assert len(ax.lines) == 0 + assert len(ax.right_ax.lines) == 5 + + def test_negative_log(self): + df = -DataFrame( + rand(6, 4), + index=list(string.ascii_letters[:6]), + columns=["x", "y", "z", "four"], + ) + + with pytest.raises(ValueError): + df.plot.area(logy=True) + with pytest.raises(ValueError): + df.plot.area(loglog=True) + + def _compare_stacked_y_cood(self, normal_lines, stacked_lines): + base = np.zeros(len(normal_lines[0].get_data()[1])) + for nl, sl in zip(normal_lines, stacked_lines): + base += nl.get_data()[1] # get y coordinates + sy = sl.get_data()[1] + tm.assert_numpy_array_equal(base, sy) + + def test_line_area_stacked(self): + with tm.RNGContext(42): + df = DataFrame(rand(6, 4), columns=["w", "x", "y", "z"]) + neg_df = -df + # each column has either positive or negative value + sep_df = DataFrame( + {"w": rand(6), "x": rand(6), "y": -rand(6), "z": -rand(6)} + ) + # each column has positive-negative mixed value + mixed_df = DataFrame( + randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["w", "x", "y", "z"], + ) + + for kind in ["line", "area"]: + ax1 = _check_plot_works(df.plot, kind=kind, stacked=False) + ax2 = _check_plot_works(df.plot, kind=kind, stacked=True) + self._compare_stacked_y_cood(ax1.lines, ax2.lines) + + ax1 = _check_plot_works(neg_df.plot, kind=kind, stacked=False) + ax2 = _check_plot_works(neg_df.plot, kind=kind, stacked=True) + self._compare_stacked_y_cood(ax1.lines, ax2.lines) + + ax1 = _check_plot_works(sep_df.plot, kind=kind, stacked=False) + ax2 = _check_plot_works(sep_df.plot, kind=kind, stacked=True) + self._compare_stacked_y_cood(ax1.lines[:2], ax2.lines[:2]) + self._compare_stacked_y_cood(ax1.lines[2:], ax2.lines[2:]) + + _check_plot_works(mixed_df.plot, stacked=False) + with pytest.raises(ValueError): + mixed_df.plot(stacked=True) + + # Use an index with strictly positive values, preventing + # matplotlib from warning about ignoring xlim + df2 = df.set_index(df.index + 1) + _check_plot_works(df2.plot, kind=kind, logx=True, stacked=True) + + def test_line_area_nan_df(self): + values1 = [1, 2, np.nan, 3] + values2 = [3, np.nan, 2, 1] + df = DataFrame({"a": values1, "b": values2}) + tdf = DataFrame({"a": values1, "b": values2}, index=tm.makeDateIndex(k=4)) + + for d in [df, tdf]: + ax = _check_plot_works(d.plot) + masked1 = ax.lines[0].get_ydata() + masked2 = ax.lines[1].get_ydata() + # remove nan for comparison purpose + + exp = np.array([1, 2, 3], dtype=np.float64) + tm.assert_numpy_array_equal(np.delete(masked1.data, 2), exp) + + exp = np.array([3, 2, 1], dtype=np.float64) + tm.assert_numpy_array_equal(np.delete(masked2.data, 1), exp) + tm.assert_numpy_array_equal( + masked1.mask, np.array([False, False, True, False]) + ) + tm.assert_numpy_array_equal( + masked2.mask, np.array([False, True, False, False]) + ) + + expected1 = np.array([1, 2, 0, 3], dtype=np.float64) + expected2 = np.array([3, 0, 2, 1], dtype=np.float64) + + ax = _check_plot_works(d.plot, stacked=True) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) + tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2) + + ax = _check_plot_works(d.plot.area) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) + tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2) + + ax = _check_plot_works(d.plot.area, stacked=False) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) + tm.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected2) + + def test_line_lim(self): + df = DataFrame(rand(6, 3), columns=["x", "y", "z"]) + ax = df.plot() + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + + ax = df.plot(secondary_y=True) + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + + axes = df.plot(secondary_y=True, subplots=True) + self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) + for ax in axes: + assert hasattr(ax, "left_ax") + assert not hasattr(ax, "right_ax") + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + + def test_area_lim(self): + df = DataFrame(rand(6, 4), columns=["x", "y", "z", "four"]) + + neg_df = -df + for stacked in [True, False]: + ax = _check_plot_works(df.plot.area, stacked=stacked) + xmin, xmax = ax.get_xlim() + ymin, ymax = ax.get_ylim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data()[0][0] + assert xmax >= lines[0].get_data()[0][-1] + assert ymin == 0 + + ax = _check_plot_works(neg_df.plot.area, stacked=stacked) + ymin, ymax = ax.get_ylim() + assert ymax == 0 + + @pytest.mark.slow + def test_bar_colors(self): + import matplotlib.pyplot as plt + + default_colors = self._unpack_cycler(plt.rcParams) + + df = DataFrame(randn(5, 5)) + ax = df.plot.bar() + self._check_colors(ax.patches[::5], facecolors=default_colors[:5]) + tm.close() + + custom_colors = "rgcby" + ax = df.plot.bar(color=custom_colors) + self._check_colors(ax.patches[::5], facecolors=custom_colors) + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + ax = df.plot.bar(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::5], facecolors=rgba_colors) + tm.close() + + # Test colormap functionality + ax = df.plot.bar(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::5], facecolors=rgba_colors) + tm.close() + + ax = df.loc[:, [0]].plot.bar(color="DodgerBlue") + self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) + tm.close() + + ax = df.plot(kind="bar", color="green") + self._check_colors(ax.patches[::5], facecolors=["green"] * 5) + tm.close() + + def test_bar_user_colors(self): + df = pd.DataFrame( + {"A": range(4), "B": range(1, 5), "color": ["red", "blue", "blue", "red"]} + ) + # This should *only* work when `y` is specified, else + # we use one color per column + ax = df.plot.bar(y="A", color=df["color"]) + result = [p.get_facecolor() for p in ax.patches] + expected = [ + (1.0, 0.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (1.0, 0.0, 0.0, 1.0), + ] + assert result == expected + + @pytest.mark.slow + def test_bar_linewidth(self): + df = DataFrame(randn(5, 5)) + + # regular + ax = df.plot.bar(linewidth=2) + for r in ax.patches: + assert r.get_linewidth() == 2 + + # stacked + ax = df.plot.bar(stacked=True, linewidth=2) + for r in ax.patches: + assert r.get_linewidth() == 2 + + # subplots + axes = df.plot.bar(linewidth=2, subplots=True) + self._check_axes_shape(axes, axes_num=5, layout=(5, 1)) + for ax in axes: + for r in ax.patches: + assert r.get_linewidth() == 2 + + @pytest.mark.slow + def test_bar_barwidth(self): + df = DataFrame(randn(5, 5)) + + width = 0.9 + + # regular + ax = df.plot.bar(width=width) + for r in ax.patches: + assert r.get_width() == width / len(df.columns) + + # stacked + ax = df.plot.bar(stacked=True, width=width) + for r in ax.patches: + assert r.get_width() == width + + # horizontal regular + ax = df.plot.barh(width=width) + for r in ax.patches: + assert r.get_height() == width / len(df.columns) + + # horizontal stacked + ax = df.plot.barh(stacked=True, width=width) + for r in ax.patches: + assert r.get_height() == width + + # subplots + axes = df.plot.bar(width=width, subplots=True) + for ax in axes: + for r in ax.patches: + assert r.get_width() == width + + # horizontal subplots + axes = df.plot.barh(width=width, subplots=True) + for ax in axes: + for r in ax.patches: + assert r.get_height() == width + + @pytest.mark.slow + def test_bar_barwidth_position(self): + df = DataFrame(randn(5, 5)) + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, position=0.2) + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, position=0.2 + ) + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, position=0.2 + ) + + @pytest.mark.slow + def test_bar_barwidth_position_int(self): + # GH 12979 + df = DataFrame(randn(5, 5)) + + for w in [1, 1.0]: + ax = df.plot.bar(stacked=True, width=w) + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4])) + assert ax.get_xlim() == (-0.75, 4.75) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.5 + assert ax.patches[-1].get_x() == 3.5 + + self._check_bar_alignment(df, kind="bar", stacked=True, width=1) + self._check_bar_alignment(df, kind="barh", stacked=False, width=1) + self._check_bar_alignment(df, kind="barh", stacked=True, width=1) + self._check_bar_alignment(df, kind="bar", subplots=True, width=1) + self._check_bar_alignment(df, kind="barh", subplots=True, width=1) + + @pytest.mark.slow + def test_bar_bottom_left(self): + df = DataFrame(rand(5, 5)) + ax = df.plot.bar(stacked=False, bottom=1) + result = [p.get_y() for p in ax.patches] + assert result == [1] * 25 + + ax = df.plot.bar(stacked=True, bottom=[-1, -2, -3, -4, -5]) + result = [p.get_y() for p in ax.patches[:5]] + assert result == [-1, -2, -3, -4, -5] + + ax = df.plot.barh(stacked=False, left=np.array([1, 1, 1, 1, 1])) + result = [p.get_x() for p in ax.patches] + assert result == [1] * 25 + + ax = df.plot.barh(stacked=True, left=[1, 2, 3, 4, 5]) + result = [p.get_x() for p in ax.patches[:5]] + assert result == [1, 2, 3, 4, 5] + + axes = df.plot.bar(subplots=True, bottom=-1) + for ax in axes: + result = [p.get_y() for p in ax.patches] + assert result == [-1] * 5 + + axes = df.plot.barh(subplots=True, left=np.array([1, 1, 1, 1, 1])) + for ax in axes: + result = [p.get_x() for p in ax.patches] + assert result == [1] * 5 + + @pytest.mark.slow + def test_bar_nan(self): + df = DataFrame({"A": [10, np.nan, 20], "B": [5, 10, 20], "C": [1, 2, 3]}) + ax = df.plot.bar() + expected = [10, 0, 20, 5, 10, 20, 1, 2, 3] + result = [p.get_height() for p in ax.patches] + assert result == expected + + ax = df.plot.bar(stacked=True) + result = [p.get_height() for p in ax.patches] + assert result == expected + + result = [p.get_y() for p in ax.patches] + expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0] + assert result == expected + + @pytest.mark.slow + def test_bar_categorical(self): + # GH 13019 + df1 = pd.DataFrame( + np.random.randn(6, 5), + index=pd.Index(list("ABCDEF")), + columns=pd.Index(list("abcde")), + ) + # categorical index must behave the same + df2 = pd.DataFrame( + np.random.randn(6, 5), + index=pd.CategoricalIndex(list("ABCDEF")), + columns=pd.CategoricalIndex(list("abcde")), + ) + + for df in [df1, df2]: + ax = df.plot.bar() + ticks = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) + assert ax.get_xlim() == (-0.5, 5.5) + # check left-edge of bars + assert ax.patches[0].get_x() == -0.25 + assert ax.patches[-1].get_x() == 5.15 + + ax = df.plot.bar(stacked=True) + tm.assert_numpy_array_equal(ticks, np.array([0, 1, 2, 3, 4, 5])) + assert ax.get_xlim() == (-0.5, 5.5) + assert ax.patches[0].get_x() == -0.25 + assert ax.patches[-1].get_x() == 4.75 + + @pytest.mark.slow + def test_plot_scatter(self): + df = DataFrame( + randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["x", "y", "z", "four"], + ) + + _check_plot_works(df.plot.scatter, x="x", y="y") + _check_plot_works(df.plot.scatter, x=1, y=2) + + with pytest.raises(TypeError): + df.plot.scatter(x="x") + with pytest.raises(TypeError): + df.plot.scatter(y="y") + + # GH 6951 + axes = df.plot(x="x", y="y", kind="scatter", subplots=True) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + def test_raise_error_on_datetime_time_data(self): + # GH 8113, datetime.time type is not supported by matplotlib in scatter + df = pd.DataFrame(np.random.randn(10), columns=["a"]) + df["dtime"] = pd.date_range(start="2014-01-01", freq="h", periods=10).time + msg = "must be a string or a number, not 'datetime.time'" + + with pytest.raises(TypeError, match=msg): + df.plot(kind="scatter", x="dtime", y="a") + + def test_scatterplot_datetime_data(self): + # GH 30391 + dates = pd.date_range(start=date(2019, 1, 1), periods=12, freq="W") + vals = np.random.normal(0, 1, len(dates)) + df = pd.DataFrame({"dates": dates, "vals": vals}) + + _check_plot_works(df.plot.scatter, x="dates", y="vals") + _check_plot_works(df.plot.scatter, x=0, y=1) + + def test_scatterplot_object_data(self): + # GH 18755 + df = pd.DataFrame(dict(a=["A", "B", "C"], b=[2, 3, 4])) + + _check_plot_works(df.plot.scatter, x="a", y="b") + _check_plot_works(df.plot.scatter, x=0, y=1) + + df = pd.DataFrame(dict(a=["A", "B", "C"], b=["a", "b", "c"])) + + _check_plot_works(df.plot.scatter, x="a", y="b") + _check_plot_works(df.plot.scatter, x=0, y=1) + + @pytest.mark.slow + def test_if_scatterplot_colorbar_affects_xaxis_visibility(self): + # addressing issue #10611, to ensure colobar does not + # interfere with x-axis label and ticklabels with + # ipython inline backend. + random_array = np.random.random((1000, 3)) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + + ax1 = df.plot.scatter(x="A label", y="B label") + ax2 = df.plot.scatter(x="A label", y="B label", c="C label") + + vis1 = [vis.get_visible() for vis in ax1.xaxis.get_minorticklabels()] + vis2 = [vis.get_visible() for vis in ax2.xaxis.get_minorticklabels()] + assert vis1 == vis2 + + vis1 = [vis.get_visible() for vis in ax1.xaxis.get_majorticklabels()] + vis2 = [vis.get_visible() for vis in ax2.xaxis.get_majorticklabels()] + assert vis1 == vis2 + + assert ( + ax1.xaxis.get_label().get_visible() == ax2.xaxis.get_label().get_visible() + ) + + @pytest.mark.slow + def test_if_hexbin_xaxis_label_is_visible(self): + # addressing issue #10678, to ensure colobar does not + # interfere with x-axis label and ticklabels with + # ipython inline backend. + random_array = np.random.random((1000, 3)) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + + ax = df.plot.hexbin("A label", "B label", gridsize=12) + assert all(vis.get_visible() for vis in ax.xaxis.get_minorticklabels()) + assert all(vis.get_visible() for vis in ax.xaxis.get_majorticklabels()) + assert ax.xaxis.get_label().get_visible() + + @pytest.mark.slow + def test_if_scatterplot_colorbars_are_next_to_parent_axes(self): + import matplotlib.pyplot as plt + + random_array = np.random.random((1000, 3)) + df = pd.DataFrame(random_array, columns=["A label", "B label", "C label"]) + + fig, axes = plt.subplots(1, 2) + df.plot.scatter("A label", "B label", c="C label", ax=axes[0]) + df.plot.scatter("A label", "B label", c="C label", ax=axes[1]) + plt.tight_layout() + + points = np.array([ax.get_position().get_points() for ax in fig.axes]) + axes_x_coords = points[:, :, 0] + parent_distance = axes_x_coords[1, :] - axes_x_coords[0, :] + colorbar_distance = axes_x_coords[3, :] - axes_x_coords[2, :] + assert np.isclose(parent_distance, colorbar_distance, atol=1e-7).all() + + @pytest.mark.parametrize("x, y", [("x", "y"), ("y", "x"), ("y", "y")]) + @pytest.mark.slow + def test_plot_scatter_with_categorical_data(self, x, y): + # after fixing GH 18755, should be able to plot categorical data + df = pd.DataFrame( + {"x": [1, 2, 3, 4], "y": pd.Categorical(["a", "b", "a", "c"])} + ) + + _check_plot_works(df.plot.scatter, x=x, y=y) + + @pytest.mark.slow + def test_plot_scatter_with_c(self): + df = DataFrame( + randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["x", "y", "z", "four"], + ) + + axes = [df.plot.scatter(x="x", y="y", c="z"), df.plot.scatter(x=0, y=1, c=2)] + for ax in axes: + # default to Greys + assert ax.collections[0].cmap.name == "Greys" + + # n.b. there appears to be no public method + # to get the colorbar label + assert ax.collections[0].colorbar._label == "z" + + cm = "cubehelix" + ax = df.plot.scatter(x="x", y="y", c="z", colormap=cm) + assert ax.collections[0].cmap.name == cm + + # verify turning off colorbar works + ax = df.plot.scatter(x="x", y="y", c="z", colorbar=False) + assert ax.collections[0].colorbar is None + + # verify that we can still plot a solid color + ax = df.plot.scatter(x=0, y=1, c="red") + assert ax.collections[0].colorbar is None + self._check_colors(ax.collections, facecolors=["r"]) + + # Ensure that we can pass an np.array straight through to matplotlib, + # this functionality was accidentally removed previously. + # See https://github.com/pandas-dev/pandas/issues/8852 for bug report + # + # Exercise colormap path and non-colormap path as they are independent + # + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + red_rgba = [1.0, 0.0, 0.0, 1.0] + green_rgba = [0.0, 1.0, 0.0, 1.0] + rgba_array = np.array([red_rgba, green_rgba]) + ax = df.plot.scatter(x="A", y="B", c=rgba_array) + # expect the face colors of the points in the non-colormap path to be + # identical to the values we supplied, normally we'd be on shaky ground + # comparing floats for equality but here we expect them to be + # identical. + tm.assert_numpy_array_equal(ax.collections[0].get_facecolor(), rgba_array) + # we don't test the colors of the faces in this next plot because they + # are dependent on the spring colormap, which may change its colors + # later. + float_array = np.array([0.0, 1.0]) + df.plot.scatter(x="A", y="B", c=float_array, cmap="spring") + + def test_scatter_colors(self): + df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3]}) + with pytest.raises(TypeError): + df.plot.scatter(x="a", y="b", c="c", color="green") + + default_colors = self._unpack_cycler(self.plt.rcParams) + + ax = df.plot.scatter(x="a", y="b", c="c") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array(self.colorconverter.to_rgba(default_colors[0])), + ) + + ax = df.plot.scatter(x="a", y="b", color="white") + tm.assert_numpy_array_equal( + ax.collections[0].get_facecolor()[0], + np.array([1, 1, 1, 1], dtype=np.float64), + ) + + @pytest.mark.slow + def test_plot_bar(self): + df = DataFrame( + randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["one", "two", "three", "four"], + ) + + _check_plot_works(df.plot.bar) + _check_plot_works(df.plot.bar, legend=False) + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.plot.bar, subplots=True) + _check_plot_works(df.plot.bar, stacked=True) + + df = DataFrame( + randn(10, 15), index=list(string.ascii_letters[:10]), columns=range(15) + ) + _check_plot_works(df.plot.bar) + + df = DataFrame({"a": [0, 1], "b": [1, 0]}) + ax = _check_plot_works(df.plot.bar) + self._check_ticks_props(ax, xrot=90) + + ax = df.plot.bar(rot=35, fontsize=10) + self._check_ticks_props(ax, xrot=35, xlabelsize=10, ylabelsize=10) + + ax = _check_plot_works(df.plot.barh) + self._check_ticks_props(ax, yrot=0) + + ax = df.plot.barh(rot=55, fontsize=11) + self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) + + def _check_bar_alignment( + self, + df, + kind="bar", + stacked=False, + subplots=False, + align="center", + width=0.5, + position=0.5, + ): + + axes = df.plot( + kind=kind, + stacked=stacked, + subplots=subplots, + align=align, + width=width, + position=position, + grid=True, + ) + + axes = self._flatten_visible(axes) + + for ax in axes: + if kind == "bar": + axis = ax.xaxis + ax_min, ax_max = ax.get_xlim() + min_edge = min(p.get_x() for p in ax.patches) + max_edge = max(p.get_x() + p.get_width() for p in ax.patches) + elif kind == "barh": + axis = ax.yaxis + ax_min, ax_max = ax.get_ylim() + min_edge = min(p.get_y() for p in ax.patches) + max_edge = max(p.get_y() + p.get_height() for p in ax.patches) + else: + raise ValueError + + # GH 7498 + # compare margins between lim and bar edges + tm.assert_almost_equal(ax_min, min_edge - 0.25) + tm.assert_almost_equal(ax_max, max_edge + 0.25) + + p = ax.patches[0] + if kind == "bar" and (stacked is True or subplots is True): + edge = p.get_x() + center = edge + p.get_width() * position + elif kind == "bar" and stacked is False: + center = p.get_x() + p.get_width() * len(df.columns) * position + edge = p.get_x() + elif kind == "barh" and (stacked is True or subplots is True): + center = p.get_y() + p.get_height() * position + edge = p.get_y() + elif kind == "barh" and stacked is False: + center = p.get_y() + p.get_height() * len(df.columns) * position + edge = p.get_y() + else: + raise ValueError + + # Check the ticks locates on integer + assert (axis.get_ticklocs() == np.arange(len(df))).all() + + if align == "center": + # Check whether the bar locates on center + tm.assert_almost_equal(axis.get_ticklocs()[0], center) + elif align == "edge": + # Check whether the bar's edge starts from the tick + tm.assert_almost_equal(axis.get_ticklocs()[0], edge) + else: + raise ValueError + + return axes + + @pytest.mark.slow + def test_bar_stacked_center(self): + # GH2157 + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=True, width=0.9) + + @pytest.mark.slow + def test_bar_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=False, width=0.9) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=False, width=0.9) + + @pytest.mark.slow + def test_bar_subplots_center(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="bar", subplots=True, width=0.9) + self._check_bar_alignment(df, kind="barh", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True, width=0.9) + + @pytest.mark.slow + def test_bar_align_single_column(self): + df = DataFrame(randn(5)) + self._check_bar_alignment(df, kind="bar", stacked=False) + self._check_bar_alignment(df, kind="bar", stacked=True) + self._check_bar_alignment(df, kind="barh", stacked=False) + self._check_bar_alignment(df, kind="barh", stacked=True) + self._check_bar_alignment(df, kind="bar", subplots=True) + self._check_bar_alignment(df, kind="barh", subplots=True) + + @pytest.mark.slow + def test_bar_edge(self): + df = DataFrame({"A": [3] * 5, "B": list(range(5))}, index=range(5)) + + self._check_bar_alignment(df, kind="bar", stacked=True, align="edge") + self._check_bar_alignment(df, kind="bar", stacked=True, width=0.9, align="edge") + self._check_bar_alignment(df, kind="barh", stacked=True, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=True, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="bar", stacked=False, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", stacked=False, align="edge") + self._check_bar_alignment( + df, kind="barh", stacked=False, width=0.9, align="edge" + ) + + self._check_bar_alignment(df, kind="bar", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="bar", subplots=True, width=0.9, align="edge" + ) + self._check_bar_alignment(df, kind="barh", subplots=True, align="edge") + self._check_bar_alignment( + df, kind="barh", subplots=True, width=0.9, align="edge" + ) + + @pytest.mark.slow + def test_bar_log_no_subplots(self): + # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 + # regressions in 1.2.1 + expected = np.array([0.1, 1.0, 10.0, 100]) + + # no subplots + df = DataFrame({"A": [3] * 5, "B": list(range(1, 6))}, index=range(5)) + ax = df.plot.bar(grid=True, log=True) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_bar_log_subplots(self): + expected = np.array([0.1, 1.0, 10.0, 100.0, 1000.0, 1e4]) + + ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( + log=True, subplots=True + ) + + tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) + tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_boxplot(self): + df = self.hist_df + series = df["height"] + numeric_cols = df._get_numeric_data().columns + labels = [pprint_thing(c) for c in numeric_cols] + + ax = _check_plot_works(df.plot.box) + self._check_text_labels(ax.get_xticklabels(), labels) + tm.assert_numpy_array_equal( + ax.xaxis.get_ticklocs(), np.arange(1, len(numeric_cols) + 1) + ) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) + + axes = series.plot.box(rot=40) + self._check_ticks_props(axes, xrot=40, yrot=0) + tm.close() + + ax = _check_plot_works(series.plot.box) + + positions = np.array([1, 6, 7]) + ax = df.plot.box(positions=positions) + numeric_cols = df._get_numeric_data().columns + labels = [pprint_thing(c) for c in numeric_cols] + self._check_text_labels(ax.get_xticklabels(), labels) + tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), positions) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) + + @pytest.mark.slow + def test_boxplot_vertical(self): + df = self.hist_df + numeric_cols = df._get_numeric_data().columns + labels = [pprint_thing(c) for c in numeric_cols] + + # if horizontal, yticklabels are rotated + ax = df.plot.box(rot=50, fontsize=8, vert=False) + self._check_ticks_props(ax, xrot=0, yrot=50, ylabelsize=8) + self._check_text_labels(ax.get_yticklabels(), labels) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) + + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot.box, subplots=True, vert=False, logx=True) + self._check_axes_shape(axes, axes_num=3, layout=(1, 3)) + self._check_ax_scales(axes, xaxis="log") + for ax, label in zip(axes, labels): + self._check_text_labels(ax.get_yticklabels(), [label]) + assert len(ax.lines) == self.bp_n_objects + + positions = np.array([3, 2, 8]) + ax = df.plot.box(positions=positions, vert=False) + self._check_text_labels(ax.get_yticklabels(), labels) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), positions) + assert len(ax.lines) == self.bp_n_objects * len(numeric_cols) + + @pytest.mark.slow + def test_boxplot_return_type(self): + df = DataFrame( + randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=["one", "two", "three", "four"], + ) + with pytest.raises(ValueError): + df.plot.box(return_type="NOTATYPE") + + result = df.plot.box(return_type="dict") + self._check_box_return_type(result, "dict") + + result = df.plot.box(return_type="axes") + self._check_box_return_type(result, "axes") + + result = df.plot.box() # default axes + self._check_box_return_type(result, "axes") + + result = df.plot.box(return_type="both") + self._check_box_return_type(result, "both") + + @pytest.mark.slow + def test_boxplot_subplots_return_type(self): + df = self.hist_df + + # normal style: return_type=None + result = df.plot.box(subplots=True) + assert isinstance(result, Series) + self._check_box_return_type( + result, None, expected_keys=["height", "weight", "category"] + ) + + for t in ["dict", "axes", "both"]: + returned = df.plot.box(return_type=t, subplots=True) + self._check_box_return_type( + returned, + t, + expected_keys=["height", "weight", "category"], + check_ax_title=False, + ) + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_df(self): + df = DataFrame(randn(100, 4)) + ax = _check_plot_works(df.plot, kind="kde") + expected = [pprint_thing(c) for c in df.columns] + self._check_legend_labels(ax, labels=expected) + self._check_ticks_props(ax, xrot=0) + + ax = df.plot(kind="kde", rot=20, fontsize=5) + self._check_ticks_props(ax, xrot=20, xlabelsize=5, ylabelsize=5) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot, kind="kde", subplots=True) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + axes = df.plot(kind="kde", logy=True, subplots=True) + self._check_ax_scales(axes, yaxis="log") + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_missing_vals(self): + df = DataFrame(np.random.uniform(size=(100, 4))) + df.loc[0, 0] = np.nan + _check_plot_works(df.plot, kind="kde") + + @pytest.mark.slow + def test_hist_df(self): + from matplotlib.patches import Rectangle + + df = DataFrame(randn(100, 4)) + series = df[0] + + ax = _check_plot_works(df.plot.hist) + expected = [pprint_thing(c) for c in df.columns] + self._check_legend_labels(ax, labels=expected) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot.hist, subplots=True, logy=True) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + self._check_ax_scales(axes, yaxis="log") + + axes = series.plot.hist(rot=40) + self._check_ticks_props(axes, xrot=40, yrot=0) + tm.close() + + ax = series.plot.hist(cumulative=True, bins=4, density=True) + # height of last bin (index 5) must be 1.0 + rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] + tm.assert_almost_equal(rects[-1].get_height(), 1.0) + tm.close() + + ax = series.plot.hist(cumulative=True, bins=4) + rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] + + tm.assert_almost_equal(rects[-2].get_height(), 100.0) + tm.close() + + # if horizontal, yticklabels are rotated + axes = df.plot.hist(rot=50, fontsize=8, orientation="horizontal") + self._check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8) + + def _check_box_coord( + self, + patches, + expected_y=None, + expected_h=None, + expected_x=None, + expected_w=None, + ): + result_y = np.array([p.get_y() for p in patches]) + result_height = np.array([p.get_height() for p in patches]) + result_x = np.array([p.get_x() for p in patches]) + result_width = np.array([p.get_width() for p in patches]) + # dtype is depending on above values, no need to check + + if expected_y is not None: + tm.assert_numpy_array_equal(result_y, expected_y, check_dtype=False) + if expected_h is not None: + tm.assert_numpy_array_equal(result_height, expected_h, check_dtype=False) + if expected_x is not None: + tm.assert_numpy_array_equal(result_x, expected_x, check_dtype=False) + if expected_w is not None: + tm.assert_numpy_array_equal(result_width, expected_w, check_dtype=False) + + @pytest.mark.slow + def test_hist_df_coord(self): + normal_df = DataFrame( + { + "A": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([10, 9, 8, 7, 6])), + "B": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([8, 8, 8, 8, 8])), + "C": np.repeat(np.array([1, 2, 3, 4, 5]), np.array([6, 7, 8, 9, 10])), + }, + columns=["A", "B", "C"], + ) + + nan_df = DataFrame( + { + "A": np.repeat( + np.array([np.nan, 1, 2, 3, 4, 5]), np.array([3, 10, 9, 8, 7, 6]) + ), + "B": np.repeat( + np.array([1, np.nan, 2, 3, 4, 5]), np.array([8, 3, 8, 8, 8, 8]) + ), + "C": np.repeat( + np.array([1, 2, 3, np.nan, 4, 5]), np.array([6, 7, 8, 3, 9, 10]) + ), + }, + columns=["A", "B", "C"], + ) + + for df in [normal_df, nan_df]: + ax = df.plot.hist(bins=5) + self._check_box_coord( + ax.patches[:5], + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + ax.patches[5:10], + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + ax.patches[10:], + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([6, 7, 8, 9, 10]), + ) + + ax = df.plot.hist(bins=5, stacked=True) + self._check_box_coord( + ax.patches[:5], + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + ax.patches[5:10], + expected_y=np.array([10, 9, 8, 7, 6]), + expected_h=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + ax.patches[10:], + expected_y=np.array([18, 17, 16, 15, 14]), + expected_h=np.array([6, 7, 8, 9, 10]), + ) + + axes = df.plot.hist(bins=5, stacked=True, subplots=True) + self._check_box_coord( + axes[0].patches, + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + axes[1].patches, + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + axes[2].patches, + expected_y=np.array([0, 0, 0, 0, 0]), + expected_h=np.array([6, 7, 8, 9, 10]), + ) + + # horizontal + ax = df.plot.hist(bins=5, orientation="horizontal") + self._check_box_coord( + ax.patches[:5], + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + ax.patches[5:10], + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + ax.patches[10:], + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([6, 7, 8, 9, 10]), + ) + + ax = df.plot.hist(bins=5, stacked=True, orientation="horizontal") + self._check_box_coord( + ax.patches[:5], + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + ax.patches[5:10], + expected_x=np.array([10, 9, 8, 7, 6]), + expected_w=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + ax.patches[10:], + expected_x=np.array([18, 17, 16, 15, 14]), + expected_w=np.array([6, 7, 8, 9, 10]), + ) + + axes = df.plot.hist( + bins=5, stacked=True, subplots=True, orientation="horizontal" + ) + self._check_box_coord( + axes[0].patches, + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([10, 9, 8, 7, 6]), + ) + self._check_box_coord( + axes[1].patches, + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([8, 8, 8, 8, 8]), + ) + self._check_box_coord( + axes[2].patches, + expected_x=np.array([0, 0, 0, 0, 0]), + expected_w=np.array([6, 7, 8, 9, 10]), + ) + + @pytest.mark.slow + def test_plot_int_columns(self): + df = DataFrame(randn(100, 4)).cumsum() + _check_plot_works(df.plot, legend=True) + + @pytest.mark.slow + def test_df_legend_labels(self): + kinds = ["line", "bar", "barh", "kde", "area", "hist"] + df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) + df2 = DataFrame(rand(3, 3), columns=["d", "e", "f"]) + df3 = DataFrame(rand(3, 3), columns=["g", "h", "i"]) + df4 = DataFrame(rand(3, 3), columns=["j", "k", "l"]) + + for kind in kinds: + + ax = df.plot(kind=kind, legend=True) + self._check_legend_labels(ax, labels=df.columns) + + ax = df2.plot(kind=kind, legend=False, ax=ax) + self._check_legend_labels(ax, labels=df.columns) + + ax = df3.plot(kind=kind, legend=True, ax=ax) + self._check_legend_labels(ax, labels=df.columns.union(df3.columns)) + + ax = df4.plot(kind=kind, legend="reverse", ax=ax) + expected = list(df.columns.union(df3.columns)) + list(reversed(df4.columns)) + self._check_legend_labels(ax, labels=expected) + + # Secondary Y + ax = df.plot(legend=True, secondary_y="b") + self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) + ax = df2.plot(legend=False, ax=ax) + self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) + ax = df3.plot(kind="bar", legend=True, secondary_y="h", ax=ax) + self._check_legend_labels( + ax, labels=["a", "b (right)", "c", "g", "h (right)", "i"] + ) + + # Time Series + ind = date_range("1/1/2014", periods=3) + df = DataFrame(randn(3, 3), columns=["a", "b", "c"], index=ind) + df2 = DataFrame(randn(3, 3), columns=["d", "e", "f"], index=ind) + df3 = DataFrame(randn(3, 3), columns=["g", "h", "i"], index=ind) + ax = df.plot(legend=True, secondary_y="b") + self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) + ax = df2.plot(legend=False, ax=ax) + self._check_legend_labels(ax, labels=["a", "b (right)", "c"]) + ax = df3.plot(legend=True, ax=ax) + self._check_legend_labels(ax, labels=["a", "b (right)", "c", "g", "h", "i"]) + + # scatter + ax = df.plot.scatter(x="a", y="b", label="data1") + self._check_legend_labels(ax, labels=["data1"]) + ax = df2.plot.scatter(x="d", y="e", legend=False, label="data2", ax=ax) + self._check_legend_labels(ax, labels=["data1"]) + ax = df3.plot.scatter(x="g", y="h", label="data3", ax=ax) + self._check_legend_labels(ax, labels=["data1", "data3"]) + + # ensure label args pass through and + # index name does not mutate + # column names don't mutate + df5 = df.set_index("a") + ax = df5.plot(y="b") + self._check_legend_labels(ax, labels=["b"]) + ax = df5.plot(y="b", label="LABEL_b") + self._check_legend_labels(ax, labels=["LABEL_b"]) + self._check_text_labels(ax.xaxis.get_label(), "a") + ax = df5.plot(y="c", label="LABEL_c", ax=ax) + self._check_legend_labels(ax, labels=["LABEL_b", "LABEL_c"]) + assert df5.columns.tolist() == ["b", "c"] + + def test_missing_marker_multi_plots_on_same_ax(self): + # GH 18222 + df = pd.DataFrame( + data=[[1, 1, 1, 1], [2, 2, 4, 8]], columns=["x", "r", "g", "b"] + ) + fig, ax = self.plt.subplots(nrows=1, ncols=3) + # Left plot + df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[0]) + df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[0]) + df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[0]) + self._check_legend_labels(ax[0], labels=["r", "g", "b"]) + self._check_legend_marker(ax[0], expected_markers=["o", "x", "o"]) + # Center plot + df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[1]) + df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[1]) + df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[1]) + self._check_legend_labels(ax[1], labels=["b", "r", "g"]) + self._check_legend_marker(ax[1], expected_markers=["o", "o", "x"]) + # Right plot + df.plot(x="x", y="g", linewidth=1, marker="x", color="g", ax=ax[2]) + df.plot(x="x", y="b", linewidth=1, marker="o", color="b", ax=ax[2]) + df.plot(x="x", y="r", linewidth=0, marker="o", color="r", ax=ax[2]) + self._check_legend_labels(ax[2], labels=["g", "b", "r"]) + self._check_legend_marker(ax[2], expected_markers=["x", "o", "o"]) + + def test_legend_name(self): + multi = DataFrame( + randn(4, 4), + columns=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], + ) + multi.columns.names = ["group", "individual"] + + ax = multi.plot() + leg_title = ax.legend_.get_title() + self._check_text_labels(leg_title, "group,individual") + + df = DataFrame(randn(5, 5)) + ax = df.plot(legend=True, ax=ax) + leg_title = ax.legend_.get_title() + self._check_text_labels(leg_title, "group,individual") + + df.columns.name = "new" + ax = df.plot(legend=False, ax=ax) + leg_title = ax.legend_.get_title() + self._check_text_labels(leg_title, "group,individual") + + ax = df.plot(legend=True, ax=ax) + leg_title = ax.legend_.get_title() + self._check_text_labels(leg_title, "new") + + @pytest.mark.slow + def test_no_legend(self): + kinds = ["line", "bar", "barh", "kde", "area", "hist"] + df = DataFrame(rand(3, 3), columns=["a", "b", "c"]) + + for kind in kinds: + + ax = df.plot(kind=kind, legend=False) + self._check_legend_labels(ax, visible=False) + + @pytest.mark.slow + def test_style_by_column(self): + import matplotlib.pyplot as plt + + fig = plt.gcf() + + df = DataFrame(randn(100, 3)) + for markers in [ + {0: "^", 1: "+", 2: "o"}, + {0: "^", 1: "+"}, + ["^", "+", "o"], + ["^", "+"], + ]: + fig.clf() + fig.add_subplot(111) + ax = df.plot(style=markers) + for i, l in enumerate(ax.get_lines()[: len(markers)]): + assert l.get_marker() == markers[i] + + @pytest.mark.slow + def test_line_label_none(self): + s = Series([1, 2]) + ax = s.plot() + assert ax.get_legend() is None + + ax = s.plot(legend=True) + assert ax.get_legend().get_texts()[0].get_text() == "None" + + @pytest.mark.slow + def test_line_colors(self): + from matplotlib import cm + + custom_colors = "rgcby" + df = DataFrame(randn(5, 5)) + + ax = df.plot(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + + tm.close() + + ax2 = df.plot(color=custom_colors) + lines2 = ax2.get_lines() + + for l1, l2 in zip(ax.get_lines(), lines2): + assert l1.get_color() == l2.get_color() + + tm.close() + + ax = df.plot(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + ax = df.plot(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + ax = df.loc[:, [0]].plot(color="DodgerBlue") + self._check_colors(ax.lines, linecolors=["DodgerBlue"]) + + ax = df.plot(color="red") + self._check_colors(ax.get_lines(), linecolors=["red"] * 5) + tm.close() + + # GH 10299 + custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] + ax = df.plot(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + tm.close() + + with pytest.raises(ValueError): + # Color contains shorthand hex value results in ValueError + custom_colors = ["#F00", "#00F", "#FF0", "#000", "#FFF"] + # Forced show plot + _check_plot_works(df.plot, color=custom_colors) + + @pytest.mark.slow + def test_dont_modify_colors(self): + colors = ["r", "g", "b"] + pd.DataFrame(np.random.rand(10, 2)).plot(color=colors) + assert len(colors) == 3 + + @pytest.mark.slow + def test_line_colors_and_styles_subplots(self): + # GH 9894 + from matplotlib import cm + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(randn(5, 5)) + + axes = df.plot(subplots=True) + for ax, c in zip(axes, list(default_colors)): + c = [c] + self._check_colors(ax.get_lines(), linecolors=c) + tm.close() + + # single color char + axes = df.plot(subplots=True, color="k") + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["k"]) + tm.close() + + # single color str + axes = df.plot(subplots=True, color="green") + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["green"]) + tm.close() + + custom_colors = "rgcby" + axes = df.plot(color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + axes = df.plot(color=list(custom_colors), subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # GH 10299 + custom_colors = ["#FF0000", "#0000FF", "#FFFF00", "#000000", "#FFFFFF"] + axes = df.plot(color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + with pytest.raises(ValueError): + # Color contains shorthand hex value results in ValueError + custom_colors = ["#F00", "#00F", "#FF0", "#000", "#FFF"] + # Forced show plot + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.plot, color=custom_colors, subplots=True) + + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + for cmap in ["jet", cm.jet]: + axes = df.plot(colormap=cmap, subplots=True) + for ax, c in zip(axes, rgba_colors): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + axes = df.loc[:, [0]].plot(color="DodgerBlue", subplots=True) + self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) + + # single character style + axes = df.plot(style="r", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["r"]) + tm.close() + + # list of styles + styles = list("rgcby") + axes = df.plot(style=styles, subplots=True) + for ax, c in zip(axes, styles): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + @pytest.mark.slow + def test_area_colors(self): + from matplotlib import cm + from matplotlib.collections import PolyCollection + + custom_colors = "rgcby" + df = DataFrame(rand(5, 5)) + + ax = df.plot.area(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + self._check_colors(poly, facecolors=custom_colors) + + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, facecolors=custom_colors) + + for h in handles: + assert h.get_alpha() is None + tm.close() + + ax = df.plot.area(colormap="jet") + jet_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=jet_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + self._check_colors(poly, facecolors=jet_colors) + + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, facecolors=jet_colors) + for h in handles: + assert h.get_alpha() is None + tm.close() + + # When stacked=False, alpha is set to 0.5 + ax = df.plot.area(colormap=cm.jet, stacked=False) + self._check_colors(ax.get_lines(), linecolors=jet_colors) + poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)] + jet_with_alpha = [(c[0], c[1], c[2], 0.5) for c in jet_colors] + self._check_colors(poly, facecolors=jet_with_alpha) + + handles, labels = ax.get_legend_handles_labels() + linecolors = jet_with_alpha + self._check_colors(handles[: len(jet_colors)], linecolors=linecolors) + for h in handles: + assert h.get_alpha() == 0.5 + + @pytest.mark.slow + def test_hist_colors(self): + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(randn(5, 5)) + ax = df.plot.hist() + self._check_colors(ax.patches[::10], facecolors=default_colors[:5]) + tm.close() + + custom_colors = "rgcby" + ax = df.plot.hist(color=custom_colors) + self._check_colors(ax.patches[::10], facecolors=custom_colors) + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + ax = df.plot.hist(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::10], facecolors=rgba_colors) + tm.close() + + # Test colormap functionality + ax = df.plot.hist(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, 5)] + self._check_colors(ax.patches[::10], facecolors=rgba_colors) + tm.close() + + ax = df.loc[:, [0]].plot.hist(color="DodgerBlue") + self._check_colors([ax.patches[0]], facecolors=["DodgerBlue"]) + + ax = df.plot(kind="hist", color="green") + self._check_colors(ax.patches[::10], facecolors=["green"] * 5) + tm.close() + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_colors(self): + from matplotlib import cm + + custom_colors = "rgcby" + df = DataFrame(rand(5, 5)) + + ax = df.plot.kde(color=custom_colors) + self._check_colors(ax.get_lines(), linecolors=custom_colors) + tm.close() + + ax = df.plot.kde(colormap="jet") + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + tm.close() + + ax = df.plot.kde(colormap=cm.jet) + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + self._check_colors(ax.get_lines(), linecolors=rgba_colors) + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_colors_and_styles_subplots(self): + from matplotlib import cm + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(randn(5, 5)) + + axes = df.plot(kind="kde", subplots=True) + for ax, c in zip(axes, list(default_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # single color char + axes = df.plot(kind="kde", color="k", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["k"]) + tm.close() + + # single color str + axes = df.plot(kind="kde", color="red", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["red"]) + tm.close() + + custom_colors = "rgcby" + axes = df.plot(kind="kde", color=custom_colors, subplots=True) + for ax, c in zip(axes, list(custom_colors)): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + rgba_colors = [cm.jet(n) for n in np.linspace(0, 1, len(df))] + for cmap in ["jet", cm.jet]: + axes = df.plot(kind="kde", colormap=cmap, subplots=True) + for ax, c in zip(axes, rgba_colors): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + # make color a list if plotting one column frame + # handles cases like df.plot(color='DodgerBlue') + axes = df.loc[:, [0]].plot(kind="kde", color="DodgerBlue", subplots=True) + self._check_colors(axes[0].lines, linecolors=["DodgerBlue"]) + + # single character style + axes = df.plot(kind="kde", style="r", subplots=True) + for ax in axes: + self._check_colors(ax.get_lines(), linecolors=["r"]) + tm.close() + + # list of styles + styles = list("rgcby") + axes = df.plot(kind="kde", style=styles, subplots=True) + for ax, c in zip(axes, styles): + self._check_colors(ax.get_lines(), linecolors=[c]) + tm.close() + + @pytest.mark.slow + def test_boxplot_colors(self): + def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c="k", fliers_c=None): + # TODO: outside this func? + if fliers_c is None: + fliers_c = "k" + self._check_colors(bp["boxes"], linecolors=[box_c] * len(bp["boxes"])) + self._check_colors( + bp["whiskers"], linecolors=[whiskers_c] * len(bp["whiskers"]) + ) + self._check_colors( + bp["medians"], linecolors=[medians_c] * len(bp["medians"]) + ) + self._check_colors(bp["fliers"], linecolors=[fliers_c] * len(bp["fliers"])) + self._check_colors(bp["caps"], linecolors=[caps_c] * len(bp["caps"])) + + default_colors = self._unpack_cycler(self.plt.rcParams) + + df = DataFrame(randn(5, 5)) + bp = df.plot.box(return_type="dict") + _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) + tm.close() + + dict_colors = dict( + boxes="#572923", whiskers="#982042", medians="#804823", caps="#123456" + ) + bp = df.plot.box(color=dict_colors, sym="r+", return_type="dict") + _check_colors( + bp, + dict_colors["boxes"], + dict_colors["whiskers"], + dict_colors["medians"], + dict_colors["caps"], + "r", + ) + tm.close() + + # partial colors + dict_colors = dict(whiskers="c", medians="m") + bp = df.plot.box(color=dict_colors, return_type="dict") + _check_colors(bp, default_colors[0], "c", "m") + tm.close() + + from matplotlib import cm + + # Test str -> colormap functionality + bp = df.plot.box(colormap="jet", return_type="dict") + jet_colors = [cm.jet(n) for n in np.linspace(0, 1, 3)] + _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) + tm.close() + + # Test colormap functionality + bp = df.plot.box(colormap=cm.jet, return_type="dict") + _check_colors(bp, jet_colors[0], jet_colors[0], jet_colors[2]) + tm.close() + + # string color is applied to all artists except fliers + bp = df.plot.box(color="DodgerBlue", return_type="dict") + _check_colors(bp, "DodgerBlue", "DodgerBlue", "DodgerBlue", "DodgerBlue") + + # tuple is also applied to all artists except fliers + bp = df.plot.box(color=(0, 1, 0), sym="#123456", return_type="dict") + _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), "#123456") + + with pytest.raises(ValueError): + # Color contains invalid key results in ValueError + df.plot.box(color=dict(boxes="red", xxxx="blue")) + + def test_default_color_cycle(self): + import matplotlib.pyplot as plt + import cycler + + colors = list("rgbk") + plt.rcParams["axes.prop_cycle"] = cycler.cycler("color", colors) + + df = DataFrame(randn(5, 3)) + ax = df.plot() + + expected = self._unpack_cycler(plt.rcParams)[:3] + self._check_colors(ax.get_lines(), linecolors=expected) + + def test_unordered_ts(self): + df = DataFrame( + np.array([3.0, 2.0, 1.0]), + index=[date(2012, 10, 1), date(2012, 9, 1), date(2012, 8, 1)], + columns=["test"], + ) + ax = df.plot() + xticks = ax.lines[0].get_xdata() + assert xticks[0] < xticks[1] + ydata = ax.lines[0].get_ydata() + tm.assert_numpy_array_equal(ydata, np.array([1.0, 2.0, 3.0])) + + @td.skip_if_no_scipy + def test_kind_both_ways(self): + df = DataFrame({"x": [1, 2, 3]}) + for kind in plotting.PlotAccessor._common_kinds: + + df.plot(kind=kind) + getattr(df.plot, kind)() + for kind in ["scatter", "hexbin"]: + df.plot("x", "x", kind=kind) + getattr(df.plot, kind)("x", "x") + + def test_all_invalid_plot_data(self): + df = DataFrame(list("abcd")) + for kind in plotting.PlotAccessor._common_kinds: + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + df.plot(kind=kind) + + @pytest.mark.slow + def test_partially_invalid_plot_data(self): + with tm.RNGContext(42): + df = DataFrame(randn(10, 2), dtype=object) + df[np.random.rand(df.shape[0]) > 0.5] = "a" + for kind in plotting.PlotAccessor._common_kinds: + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + df.plot(kind=kind) + + with tm.RNGContext(42): + # area plot doesn't support positive/negative mixed data + kinds = ["area"] + df = DataFrame(rand(10, 2), dtype=object) + df[np.random.rand(df.shape[0]) > 0.5] = "a" + for kind in kinds: + with pytest.raises(TypeError): + df.plot(kind=kind) + + def test_invalid_kind(self): + df = DataFrame(randn(10, 2)) + with pytest.raises(ValueError): + df.plot(kind="aasdf") + + @pytest.mark.parametrize( + "x,y,lbl", + [ + (["B", "C"], "A", "a"), + (["A"], ["B", "C"], ["b", "c"]), + ("A", ["B", "C"], "badlabel"), + ], + ) + def test_invalid_xy_args(self, x, y, lbl): + # GH 18671, 19699 allows y to be list-like but not x + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + with pytest.raises(ValueError): + df.plot(x=x, y=y, label=lbl) + + @pytest.mark.parametrize("x,y", [("A", "B"), (["A"], "B")]) + def test_invalid_xy_args_dup_cols(self, x, y): + # GH 18671, 19699 allows y to be list-like but not x + df = DataFrame([[1, 3, 5], [2, 4, 6]], columns=list("AAB")) + with pytest.raises(ValueError): + df.plot(x=x, y=y) + + @pytest.mark.parametrize( + "x,y,lbl,colors", + [ + ("A", ["B"], ["b"], ["red"]), + ("A", ["B", "C"], ["b", "c"], ["red", "blue"]), + (0, [1, 2], ["bokeh", "cython"], ["green", "yellow"]), + ], + ) + def test_y_listlike(self, x, y, lbl, colors): + # GH 19699: tests list-like y and verifies lbls & colors + df = DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]}) + _check_plot_works(df.plot, x="A", y=y, label=lbl) + + ax = df.plot(x=x, y=y, label=lbl, color=colors) + assert len(ax.lines) == len(y) + self._check_colors(ax.get_lines(), linecolors=colors) + + @pytest.mark.parametrize("x,y,colnames", [(0, 1, ["A", "B"]), (1, 0, [0, 1])]) + def test_xy_args_integer(self, x, y, colnames): + # GH 20056: tests integer args for xy and checks col names + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + df.columns = colnames + _check_plot_works(df.plot, x=x, y=y) + + @pytest.mark.slow + def test_hexbin_basic(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", gridsize=10) + # TODO: need better way to test. This just does existence. + assert len(ax.collections) == 1 + + # GH 6951 + axes = df.plot.hexbin(x="A", y="B", subplots=True) + # hexbin should have 2 axes in the figure, 1 for plotting and another + # is colorbar + assert len(axes[0].figure.axes) == 2 + # return value is single axes + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + + @pytest.mark.slow + def test_hexbin_with_c(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", C="C") + assert len(ax.collections) == 1 + + ax = df.plot.hexbin(x="A", y="B", C="C", reduce_C_function=np.std) + assert len(ax.collections) == 1 + + @pytest.mark.slow + def test_hexbin_cmap(self): + df = self.hexbin_df + + # Default to BuGn + ax = df.plot.hexbin(x="A", y="B") + assert ax.collections[0].cmap.name == "BuGn" + + cm = "cubehelix" + ax = df.plot.hexbin(x="A", y="B", colormap=cm) + assert ax.collections[0].cmap.name == cm + + @pytest.mark.slow + def test_no_color_bar(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", colorbar=None) + assert ax.collections[0].colorbar is None + + @pytest.mark.slow + def test_allow_cmap(self): + df = self.hexbin_df + + ax = df.plot.hexbin(x="A", y="B", cmap="YlGn") + assert ax.collections[0].cmap.name == "YlGn" + + with pytest.raises(TypeError): + df.plot.hexbin(x="A", y="B", cmap="YlGn", colormap="BuGn") + + @pytest.mark.slow + def test_pie_df(self): + df = DataFrame( + np.random.rand(5, 3), + columns=["X", "Y", "Z"], + index=["a", "b", "c", "d", "e"], + ) + with pytest.raises(ValueError): + df.plot.pie() + + ax = _check_plot_works(df.plot.pie, y="Y") + self._check_text_labels(ax.texts, df.index) + + ax = _check_plot_works(df.plot.pie, y=2) + self._check_text_labels(ax.texts, df.index) + + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.plot.pie, subplots=True) + assert len(axes) == len(df.columns) + for ax in axes: + self._check_text_labels(ax.texts, df.index) + for ax, ylabel in zip(axes, df.columns): + assert ax.get_ylabel() == ylabel + + labels = ["A", "B", "C", "D", "E"] + color_args = ["r", "g", "b", "c", "m"] + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works( + df.plot.pie, subplots=True, labels=labels, colors=color_args + ) + assert len(axes) == len(df.columns) + + for ax in axes: + self._check_text_labels(ax.texts, labels) + self._check_colors(ax.patches, facecolors=color_args) + + def test_pie_df_nan(self): + df = DataFrame(np.random.rand(4, 4)) + for i in range(4): + df.iloc[i, i] = np.nan + fig, axes = self.plt.subplots(ncols=4) + df.plot.pie(subplots=True, ax=axes, legend=True) + + base_expected = ["0", "1", "2", "3"] + for i, ax in enumerate(axes): + expected = list(base_expected) # force copy + expected[i] = "" + result = [x.get_text() for x in ax.texts] + assert result == expected + # legend labels + # NaN's not included in legend with subplots + # see https://github.com/pandas-dev/pandas/issues/8390 + assert [x.get_text() for x in ax.get_legend().get_texts()] == base_expected[ + :i + ] + base_expected[i + 1 :] + + @pytest.mark.slow + def test_errorbar_plot(self): + with warnings.catch_warnings(): + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + df = DataFrame(d) + d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4} + df_err = DataFrame(d_err) + + # check line plots + ax = _check_plot_works(df.plot, yerr=df_err, logy=True) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, yerr=df_err, loglog=True) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + kinds = ["line", "bar", "barh"] + for kind in kinds: + ax = _check_plot_works(df.plot, yerr=df_err["x"], kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + ax = _check_plot_works( + df.plot, yerr=df_err["x"], xerr=df_err["x"], kind=kind + ) + self._check_has_errorbars(ax, xerr=2, yerr=2) + ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + # _check_plot_works adds an ax so catch warning. see GH #13188 + axes = _check_plot_works( + df.plot, yerr=df_err, xerr=df_err, subplots=True, kind=kind + ) + self._check_has_errorbars(axes, xerr=1, yerr=1) + + ax = _check_plot_works( + (df + 1).plot, yerr=df_err, xerr=df_err, kind="bar", log=True + ) + self._check_has_errorbars(ax, xerr=2, yerr=2) + + # yerr is raw error values + ax = _check_plot_works(df["y"].plot, yerr=np.ones(12) * 0.4) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + # yerr is column name + for yerr in ["yerr", "誤差"]: + s_df = df.copy() + s_df[yerr] = np.ones(12) * 0.2 + ax = _check_plot_works(s_df.plot, yerr=yerr) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(s_df.plot, y="y", x="x", yerr=yerr) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + with pytest.raises(ValueError): + df.plot(yerr=np.random.randn(11)) + + df_err = DataFrame({"x": ["zzz"] * 12, "y": ["zzz"] * 12}) + with pytest.raises((ValueError, TypeError)): + df.plot(yerr=df_err) + + @pytest.mark.xfail(reason="Iterator is consumed", raises=ValueError) + @pytest.mark.slow + def test_errorbar_plot_iterator(self): + with warnings.catch_warnings(): + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + df = DataFrame(d) + + # yerr is iterator + ax = _check_plot_works(df.plot, yerr=itertools.repeat(0.1, len(df))) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + @pytest.mark.slow + def test_errorbar_with_integer_column_names(self): + # test with integer column names + df = DataFrame(np.random.randn(10, 2)) + df_err = DataFrame(np.random.randn(10, 2)) + ax = _check_plot_works(df.plot, yerr=df_err) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(df.plot, y=0, yerr=1) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + @pytest.mark.slow + def test_errorbar_with_partial_columns(self): + df = DataFrame(np.random.randn(10, 3)) + df_err = DataFrame(np.random.randn(10, 2), columns=[0, 2]) + kinds = ["line", "bar"] + for kind in kinds: + ax = _check_plot_works(df.plot, yerr=df_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + ix = date_range("1/1/2000", periods=10, freq="M") + df.set_index(ix, inplace=True) + df_err.set_index(ix, inplace=True) + ax = _check_plot_works(df.plot, yerr=df_err, kind="line") + self._check_has_errorbars(ax, xerr=0, yerr=2) + + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + df = DataFrame(d) + d_err = {"x": np.ones(12) * 0.2, "z": np.ones(12) * 0.4} + df_err = DataFrame(d_err) + for err in [d_err, df_err]: + ax = _check_plot_works(df.plot, yerr=err) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + @pytest.mark.slow + def test_errorbar_timeseries(self): + + with warnings.catch_warnings(): + d = {"x": np.arange(12), "y": np.arange(12, 0, -1)} + d_err = {"x": np.ones(12) * 0.2, "y": np.ones(12) * 0.4} + + # check time-series plots + ix = date_range("1/1/2000", "1/1/2001", freq="M") + tdf = DataFrame(d, index=ix) + tdf_err = DataFrame(d_err, index=ix) + + kinds = ["line", "bar", "barh"] + for kind in kinds: + ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + ax = _check_plot_works(tdf.plot, y="y", yerr=tdf_err["x"], kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(tdf.plot, y="y", yerr="x", kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=2) + + # _check_plot_works adds an ax so catch warning. see GH #13188 + axes = _check_plot_works( + tdf.plot, kind=kind, yerr=tdf_err, subplots=True + ) + self._check_has_errorbars(axes, xerr=0, yerr=1) + + def test_errorbar_asymmetrical(self): + + np.random.seed(0) + err = np.random.rand(3, 2, 5) + + # each column is [0, 1, 2, 3, 4], [3, 4, 5, 6, 7]... + df = DataFrame(np.arange(15).reshape(3, 5)).T + + ax = df.plot(yerr=err, xerr=err / 2) + + yerr_0_0 = ax.collections[1].get_paths()[0].vertices[:, 1] + expected_0_0 = err[0, :, 0] * np.array([-1, 1]) + tm.assert_almost_equal(yerr_0_0, expected_0_0) + + with pytest.raises(ValueError): + df.plot(yerr=err.T) + + tm.close() + + def test_table(self): + df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) + _check_plot_works(df.plot, table=True) + _check_plot_works(df.plot, table=df) + + ax = df.plot() + assert len(ax.tables) == 0 + plotting.table(ax, df.T) + assert len(ax.tables) == 1 + + def test_errorbar_scatter(self): + df = DataFrame(np.random.randn(5, 2), index=range(5), columns=["x", "y"]) + df_err = DataFrame( + np.random.randn(5, 2) / 5, index=range(5), columns=["x", "y"] + ) + + ax = _check_plot_works(df.plot.scatter, x="x", y="y") + self._check_has_errorbars(ax, xerr=0, yerr=0) + ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err) + self._check_has_errorbars(ax, xerr=1, yerr=0) + + ax = _check_plot_works(df.plot.scatter, x="x", y="y", yerr=df_err) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(df.plot.scatter, x="x", y="y", xerr=df_err, yerr=df_err) + self._check_has_errorbars(ax, xerr=1, yerr=1) + + def _check_errorbar_color(containers, expected, has_err="has_xerr"): + lines = [] + errs = [c.lines for c in ax.containers if getattr(c, has_err, False)][0] + for el in errs: + if is_list_like(el): + lines.extend(el) + else: + lines.append(el) + err_lines = [x for x in lines if x in ax.collections] + self._check_colors( + err_lines, linecolors=np.array([expected] * len(err_lines)) + ) + + # GH 8081 + df = DataFrame(np.random.randn(10, 5), columns=["a", "b", "c", "d", "e"]) + ax = df.plot.scatter(x="a", y="b", xerr="d", yerr="e", c="red") + self._check_has_errorbars(ax, xerr=1, yerr=1) + _check_errorbar_color(ax.containers, "red", has_err="has_xerr") + _check_errorbar_color(ax.containers, "red", has_err="has_yerr") + + ax = df.plot.scatter(x="a", y="b", yerr="e", color="green") + self._check_has_errorbars(ax, xerr=0, yerr=1) + _check_errorbar_color(ax.containers, "green", has_err="has_yerr") + + @pytest.mark.slow + def test_sharex_and_ax(self): + # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, + # the axis in fig.get_axis() are sorted differently than pandas + # expected them, so make sure that only the right ones are removed + import matplotlib.pyplot as plt + + plt.close("all") + gs, axes = _generate_4_axes_via_gridspec() + + df = DataFrame( + { + "a": [1, 2, 3, 4, 5, 6], + "b": [1, 2, 3, 4, 5, 6], + "c": [1, 2, 3, 4, 5, 6], + "d": [1, 2, 3, 4, 5, 6], + } + ) + + def _check(axes): + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + for ax in [axes[0], axes[2]]: + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + for ax in [axes[1], axes[3]]: + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + + for ax in axes: + df.plot(x="a", y="b", title="title", ax=ax, sharex=True) + gs.tight_layout(plt.gcf()) + _check(axes) + tm.close() + + gs, axes = _generate_4_axes_via_gridspec() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True) + _check(axes) + tm.close() + + gs, axes = _generate_4_axes_via_gridspec() + # without sharex, no labels should be touched! + for ax in axes: + df.plot(x="a", y="b", title="title", ax=ax) + + gs.tight_layout(plt.gcf()) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + @pytest.mark.slow + def test_sharey_and_ax(self): + # https://github.com/pandas-dev/pandas/issues/9737 using gridspec, + # the axis in fig.get_axis() are sorted differently than pandas + # expected them, so make sure that only the right ones are removed + import matplotlib.pyplot as plt + + gs, axes = _generate_4_axes_via_gridspec() + + df = DataFrame( + { + "a": [1, 2, 3, 4, 5, 6], + "b": [1, 2, 3, 4, 5, 6], + "c": [1, 2, 3, 4, 5, 6], + "d": [1, 2, 3, 4, 5, 6], + } + ) + + def _check(axes): + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + for ax in [axes[0], axes[1]]: + self._check_visible(ax.get_yticklabels(), visible=True) + for ax in [axes[2], axes[3]]: + self._check_visible(ax.get_yticklabels(), visible=False) + + for ax in axes: + df.plot(x="a", y="b", title="title", ax=ax, sharey=True) + gs.tight_layout(plt.gcf()) + _check(axes) + tm.close() + + gs, axes = _generate_4_axes_via_gridspec() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharey=True) + + gs.tight_layout(plt.gcf()) + _check(axes) + tm.close() + + gs, axes = _generate_4_axes_via_gridspec() + # without sharex, no labels should be touched! + for ax in axes: + df.plot(x="a", y="b", title="title", ax=ax) + + gs.tight_layout(plt.gcf()) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + + @td.skip_if_no_scipy + def test_memory_leak(self): + """ Check that every plot type gets properly collected. """ + import weakref + import gc + + results = {} + for kind in plotting.PlotAccessor._all_kinds: + + args = {} + if kind in ["hexbin", "scatter", "pie"]: + df = self.hexbin_df + args = {"x": "A", "y": "B"} + elif kind == "area": + df = self.tdf.abs() + else: + df = self.tdf + + # Use a weakref so we can see if the object gets collected without + # also preventing it from being collected + results[kind] = weakref.proxy(df.plot(kind=kind, **args)) + + # have matplotlib delete all the figures + tm.close() + # force a garbage collection + gc.collect() + for key in results: + # check that every plot was collected + with pytest.raises(ReferenceError): + # need to actually access something to get an error + results[key].lines + + @pytest.mark.slow + def test_df_subplots_patterns_minorticks(self): + # GH 10657 + import matplotlib.pyplot as plt + + df = DataFrame( + np.random.randn(10, 2), + index=date_range("1/1/2000", periods=10), + columns=list("AB"), + ) + + # shared subplots + fig, axes = plt.subplots(2, 1, sharex=True) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + fig, axes = plt.subplots(2, 1) + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of 1st ax must be hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + # not shared + fig, axes = plt.subplots(2, 1) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + @pytest.mark.slow + def test_df_gridspec_patterns(self): + # GH 10819 + import matplotlib.pyplot as plt + import matplotlib.gridspec as gridspec + + ts = Series(np.random.randn(10), index=date_range("1/1/2000", periods=10)) + + df = DataFrame(np.random.randn(10, 2), index=ts.index, columns=list("AB")) + + def _get_vertical_grid(): + gs = gridspec.GridSpec(3, 1) + fig = plt.figure() + ax1 = fig.add_subplot(gs[:2, :]) + ax2 = fig.add_subplot(gs[2, :]) + return ax1, ax2 + + def _get_horizontal_grid(): + gs = gridspec.GridSpec(1, 3) + fig = plt.figure() + ax1 = fig.add_subplot(gs[:, :2]) + ax2 = fig.add_subplot(gs[:, 2]) + return ax1, ax2 + + for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]: + ax1 = ts.plot(ax=ax1) + assert len(ax1.lines) == 1 + ax2 = df.plot(ax=ax2) + assert len(ax2.lines) == 2 + for ax in [ax1, ax2]: + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + # subplots=True + for ax1, ax2 in [_get_vertical_grid(), _get_horizontal_grid()]: + axes = df.plot(subplots=True, ax=[ax1, ax2]) + assert len(ax1.lines) == 1 + assert len(ax2.lines) == 1 + for ax in axes: + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + # vertical / subplots / sharex=True / sharey=True + ax1, ax2 = _get_vertical_grid() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True) + assert len(axes[0].lines) == 1 + assert len(axes[1].lines) == 1 + for ax in [ax1, ax2]: + # yaxis are visible because there is only one column + self._check_visible(ax.get_yticklabels(), visible=True) + # xaxis of axes0 (top) are hidden + self._check_visible(axes[0].get_xticklabels(), visible=False) + self._check_visible(axes[0].get_xticklabels(minor=True), visible=False) + self._check_visible(axes[1].get_xticklabels(), visible=True) + self._check_visible(axes[1].get_xticklabels(minor=True), visible=True) + tm.close() + + # horizontal / subplots / sharex=True / sharey=True + ax1, ax2 = _get_horizontal_grid() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, sharey=True) + assert len(axes[0].lines) == 1 + assert len(axes[1].lines) == 1 + self._check_visible(axes[0].get_yticklabels(), visible=True) + # yaxis of axes1 (right) are hidden + self._check_visible(axes[1].get_yticklabels(), visible=False) + for ax in [ax1, ax2]: + # xaxis are visible because there is only one column + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + # boxed + def _get_boxed_grid(): + gs = gridspec.GridSpec(3, 3) + fig = plt.figure() + ax1 = fig.add_subplot(gs[:2, :2]) + ax2 = fig.add_subplot(gs[:2, 2]) + ax3 = fig.add_subplot(gs[2, :2]) + ax4 = fig.add_subplot(gs[2, 2]) + return ax1, ax2, ax3, ax4 + + axes = _get_boxed_grid() + df = DataFrame(np.random.randn(10, 4), index=ts.index, columns=list("ABCD")) + axes = df.plot(subplots=True, ax=axes) + for ax in axes: + assert len(ax.lines) == 1 + # axis are visible because these are not shared + self._check_visible(ax.get_yticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + # subplots / sharex=True / sharey=True + axes = _get_boxed_grid() + with tm.assert_produces_warning(UserWarning): + axes = df.plot(subplots=True, ax=axes, sharex=True, sharey=True) + for ax in axes: + assert len(ax.lines) == 1 + for ax in [axes[0], axes[2]]: # left column + self._check_visible(ax.get_yticklabels(), visible=True) + for ax in [axes[1], axes[3]]: # right column + self._check_visible(ax.get_yticklabels(), visible=False) + for ax in [axes[0], axes[1]]: # top row + self._check_visible(ax.get_xticklabels(), visible=False) + self._check_visible(ax.get_xticklabels(minor=True), visible=False) + for ax in [axes[2], axes[3]]: # bottom row + self._check_visible(ax.get_xticklabels(), visible=True) + self._check_visible(ax.get_xticklabels(minor=True), visible=True) + tm.close() + + @pytest.mark.slow + def test_df_grid_settings(self): + # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 + self._check_grid_settings( + DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}), + plotting.PlotAccessor._dataframe_kinds, + kws={"x": "a", "y": "b"}, + ) + + def test_invalid_colormap(self): + df = DataFrame(randn(3, 2), columns=["A", "B"]) + + with pytest.raises(ValueError): + df.plot(colormap="invalid_colormap") + + def test_plain_axes(self): + + # supplied ax itself is a SubplotAxes, but figure contains also + # a plain Axes object (GH11556) + fig, ax = self.plt.subplots() + fig.add_axes([0.2, 0.2, 0.2, 0.2]) + Series(rand(10)).plot(ax=ax) + + # supplied ax itself is a plain Axes, but because the cmap keyword + # a new ax is created for the colorbar -> also multiples axes (GH11520) + df = DataFrame({"a": randn(8), "b": randn(8)}) + fig = self.plt.figure() + ax = fig.add_axes((0, 0, 1, 1)) + df.plot(kind="scatter", ax=ax, x="a", y="b", c="a", cmap="hsv") + + # other examples + fig, ax = self.plt.subplots() + from mpl_toolkits.axes_grid1 import make_axes_locatable + + divider = make_axes_locatable(ax) + cax = divider.append_axes("right", size="5%", pad=0.05) + Series(rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=cax) + + fig, ax = self.plt.subplots() + from mpl_toolkits.axes_grid1.inset_locator import inset_axes + + iax = inset_axes(ax, width="30%", height=1.0, loc=3) + Series(rand(10)).plot(ax=ax) + Series(rand(10)).plot(ax=iax) + + def test_passed_bar_colors(self): + import matplotlib as mpl + + color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] + colormap = mpl.colors.ListedColormap(color_tuples) + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + assert color_tuples == [c.get_facecolor() for c in barplot.patches] + + def test_rcParams_bar_colors(self): + import matplotlib as mpl + + color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] + with mpl.rc_context(rc={"axes.prop_cycle": mpl.cycler("color", color_tuples)}): + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + assert color_tuples == [c.get_facecolor() for c in barplot.patches] + + @pytest.mark.parametrize("method", ["line", "barh", "bar"]) + def test_secondary_axis_font_size(self, method): + # GH: 12565 + df = ( + pd.DataFrame(np.random.randn(15, 2), columns=list("AB")) + .assign(C=lambda df: df.B.cumsum()) + .assign(D=lambda df: df.C * 1.1) + ) + + fontsize = 20 + sy = ["C", "D"] + + kwargs = dict(secondary_y=sy, fontsize=fontsize, mark_right=True) + ax = getattr(df.plot, method)(**kwargs) + self._check_ticks_props(axes=ax.right_ax, ylabelsize=fontsize) + + @pytest.mark.slow + def test_x_string_values_ticks(self): + # Test if string plot index have a fixed xtick position + # GH: 7612, GH: 22334 + df = pd.DataFrame( + { + "sales": [3, 2, 3], + "visits": [20, 42, 28], + "day": ["Monday", "Tuesday", "Wednesday"], + } + ) + ax = df.plot.area(x="day") + ax.set_xlim(-1, 3) + xticklabels = [t.get_text() for t in ax.get_xticklabels()] + labels_position = dict(zip(xticklabels, ax.get_xticks())) + # Testing if the label stayed at the right position + assert labels_position["Monday"] == 0.0 + assert labels_position["Tuesday"] == 1.0 + assert labels_position["Wednesday"] == 2.0 + + @pytest.mark.slow + def test_x_multiindex_values_ticks(self): + # Test if multiindex plot index have a fixed xtick position + # GH: 15912 + index = pd.MultiIndex.from_product([[2012, 2013], [1, 2]]) + df = pd.DataFrame(np.random.randn(4, 2), columns=["A", "B"], index=index) + ax = df.plot() + ax.set_xlim(-1, 4) + xticklabels = [t.get_text() for t in ax.get_xticklabels()] + labels_position = dict(zip(xticklabels, ax.get_xticks())) + # Testing if the label stayed at the right position + assert labels_position["(2012, 1)"] == 0.0 + assert labels_position["(2012, 2)"] == 1.0 + assert labels_position["(2013, 1)"] == 2.0 + assert labels_position["(2013, 2)"] == 3.0 + + @pytest.mark.parametrize("kind", ["line", "area"]) + def test_xlim_plot_line(self, kind): + # test if xlim is set correctly in plot.line and plot.area + # GH 27686 + df = pd.DataFrame([2, 4], index=[1, 2]) + ax = df.plot(kind=kind) + xlims = ax.get_xlim() + assert xlims[0] < 1 + assert xlims[1] > 2 + + def test_xlim_plot_line_correctly_in_mixed_plot_type(self): + # test if xlim is set correctly when ax contains multiple different kinds + # of plots, GH 27686 + fig, ax = self.plt.subplots() + + indexes = ["k1", "k2", "k3", "k4"] + df = pd.DataFrame( + { + "s1": [1000, 2000, 1500, 2000], + "s2": [900, 1400, 2000, 3000], + "s3": [1500, 1500, 1600, 1200], + "secondary_y": [1, 3, 4, 3], + }, + index=indexes, + ) + df[["s1", "s2", "s3"]].plot.bar(ax=ax, stacked=False) + df[["secondary_y"]].plot(ax=ax, secondary_y=True) + + xlims = ax.get_xlim() + assert xlims[0] < 0 + assert xlims[1] > 3 + + # make sure axis labels are plotted correctly as well + xticklabels = [t.get_text() for t in ax.get_xticklabels()] + assert xticklabels == indexes + + def test_subplots_sharex_false(self): + # test when sharex is set to False, two plots should have different + # labels, GH 25160 + df = pd.DataFrame(np.random.rand(10, 2)) + df.iloc[5:, 1] = np.nan + df.iloc[:5, 0] = np.nan + + figs, axs = self.plt.subplots(2, 1) + df.plot.line(ax=axs, subplots=True, sharex=False) + + expected_ax1 = np.arange(4.5, 10, 0.5) + expected_ax2 = np.arange(-0.5, 5, 0.5) + + tm.assert_numpy_array_equal(axs[0].get_xticks(), expected_ax1) + tm.assert_numpy_array_equal(axs[1].get_xticks(), expected_ax2) + + def test_plot_no_rows(self): + # GH 27758 + df = pd.DataFrame(columns=["foo"], dtype=int) + assert df.empty + ax = df.plot() + assert len(ax.get_lines()) == 1 + line = ax.get_lines()[0] + assert len(line.get_xdata()) == 0 + assert len(line.get_ydata()) == 0 + + def test_plot_no_numeric_data(self): + df = pd.DataFrame(["a", "b", "c"]) + with pytest.raises(TypeError): + df.plot() + + def test_missing_markers_legend(self): + # 14958 + df = pd.DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"]) + ax = df.plot(y=["A"], marker="x", linestyle="solid") + df.plot(y=["B"], marker="o", linestyle="dotted", ax=ax) + df.plot(y=["C"], marker="<", linestyle="dotted", ax=ax) + + self._check_legend_labels(ax, labels=["A", "B", "C"]) + self._check_legend_marker(ax, expected_markers=["x", "o", "<"]) + + def test_missing_markers_legend_using_style(self): + # 14563 + df = pd.DataFrame( + { + "A": [1, 2, 3, 4, 5, 6], + "B": [2, 4, 1, 3, 2, 4], + "C": [3, 3, 2, 6, 4, 2], + "X": [1, 2, 3, 4, 5, 6], + } + ) + + fig, ax = self.plt.subplots() + for kind in "ABC": + df.plot("X", kind, label=kind, ax=ax, style=".") + + self._check_legend_labels(ax, labels=["A", "B", "C"]) + self._check_legend_marker(ax, expected_markers=[".", ".", "."]) + + +def _generate_4_axes_via_gridspec(): + import matplotlib.pyplot as plt + import matplotlib as mpl + import matplotlib.gridspec # noqa + + gs = mpl.gridspec.GridSpec(2, 2) + ax_tl = plt.subplot(gs[0, 0]) + ax_ll = plt.subplot(gs[1, 0]) + ax_tr = plt.subplot(gs[0, 1]) + ax_lr = plt.subplot(gs[1, 1]) + + return gs, [ax_tl, ax_ll, ax_tr, ax_lr] diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/tests/plotting/test_groupby.py new file mode 100644 index 00000000..8fec4bb1 --- /dev/null +++ b/pandas/tests/plotting/test_groupby.py @@ -0,0 +1,69 @@ +# coding: utf-8 + +""" Test cases for GroupBy.plot """ + + +import numpy as np + +import pandas.util._test_decorators as td + +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.tests.plotting.common import TestPlotBase + + +@td.skip_if_no_mpl +class TestDataFrameGroupByPlots(TestPlotBase): + def test_series_groupby_plotting_nominally_works(self): + n = 10 + weight = Series(np.random.normal(166, 20, size=n)) + height = Series(np.random.normal(60, 10, size=n)) + with tm.RNGContext(42): + gender = np.random.choice(["male", "female"], size=n) + + weight.groupby(gender).plot() + tm.close() + height.groupby(gender).hist() + tm.close() + # Regression test for GH8733 + height.groupby(gender).plot(alpha=0.5) + tm.close() + + def test_plotting_with_float_index_works(self): + # GH 7025 + df = DataFrame( + {"def": [1, 1, 1, 2, 2, 2, 3, 3, 3], "val": np.random.randn(9)}, + index=[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0], + ) + + df.groupby("def")["val"].plot() + tm.close() + df.groupby("def")["val"].apply(lambda x: x.plot()) + tm.close() + + def test_hist_single_row(self): + # GH10214 + bins = np.arange(80, 100 + 2, 1) + df = DataFrame({"Name": ["AAA", "BBB"], "ByCol": [1, 2], "Mark": [85, 89]}) + df["Mark"].hist(by=df["ByCol"], bins=bins) + df = DataFrame({"Name": ["AAA"], "ByCol": [1], "Mark": [85]}) + df["Mark"].hist(by=df["ByCol"], bins=bins) + + def test_plot_submethod_works(self): + df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")}) + df.groupby("z").plot.scatter("x", "y") + tm.close() + df.groupby("z")["x"].plot.line() + tm.close() + + def test_plot_kwargs(self): + + df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")}) + + res = df.groupby("z").plot(kind="scatter", x="x", y="y") + # check that a scatter plot is effectively plotted: the axes should + # contain a PathCollection from the scatter plot (GH11805) + assert len(res["a"].collections) == 1 + + res = df.groupby("z").plot.scatter(x="x", y="y") + assert len(res["a"].collections) == 1 diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py new file mode 100644 index 00000000..50ebbc22 --- /dev/null +++ b/pandas/tests/plotting/test_hist_method.py @@ -0,0 +1,464 @@ +# coding: utf-8 + +""" Test cases for .hist method """ + +import numpy as np +from numpy.random import randn +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + + +@td.skip_if_no_mpl +class TestSeriesPlots(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.ts = tm.makeTimeSeries() + self.ts.name = "ts" + + @pytest.mark.slow + def test_hist_legacy(self): + _check_plot_works(self.ts.hist) + _check_plot_works(self.ts.hist, grid=False) + _check_plot_works(self.ts.hist, figsize=(8, 10)) + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + _check_plot_works(self.ts.hist, by=self.ts.index.month) + with tm.assert_produces_warning(UserWarning): + _check_plot_works(self.ts.hist, by=self.ts.index.month, bins=5) + + fig, ax = self.plt.subplots(1, 1) + _check_plot_works(self.ts.hist, ax=ax) + _check_plot_works(self.ts.hist, ax=ax, figure=fig) + _check_plot_works(self.ts.hist, figure=fig) + tm.close() + + fig, (ax1, ax2) = self.plt.subplots(1, 2) + _check_plot_works(self.ts.hist, figure=fig, ax=ax1) + _check_plot_works(self.ts.hist, figure=fig, ax=ax2) + + with pytest.raises(ValueError): + self.ts.hist(by=self.ts.index, figure=fig) + + @pytest.mark.slow + def test_hist_bins_legacy(self): + df = DataFrame(np.random.randn(10, 2)) + ax = df.hist(bins=2)[0][0] + assert len(ax.patches) == 2 + + @pytest.mark.slow + def test_hist_layout(self): + df = self.hist_df + with pytest.raises(ValueError): + df.height.hist(layout=(1, 1)) + + with pytest.raises(ValueError): + df.height.hist(layout=[1, 1]) + + @pytest.mark.slow + def test_hist_layout_with_by(self): + df = self.hist_df + + # _check_plot_works adds an `ax` kwarg to the method call + # so we get a warning about an axis being cleared, even + # though we don't explicing pass one, see GH #13188 + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.gender, layout=(2, 1)) + self._check_axes_shape(axes, axes_num=2, layout=(2, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.gender, layout=(3, -1)) + self._check_axes_shape(axes, axes_num=2, layout=(3, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.category, layout=(4, 1)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.category, layout=(2, -1)) + self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.category, layout=(3, -1)) + self._check_axes_shape(axes, axes_num=4, layout=(3, 2)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.category, layout=(-1, 4)) + self._check_axes_shape(axes, axes_num=4, layout=(1, 4)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.classroom, layout=(2, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + + axes = df.height.hist(by=df.category, layout=(4, 2), figsize=(12, 7)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 7)) + + @pytest.mark.slow + def test_hist_no_overlap(self): + from matplotlib.pyplot import subplot, gcf + + x = Series(randn(2)) + y = Series(randn(2)) + subplot(121) + x.hist() + subplot(122) + y.hist() + fig = gcf() + axes = fig.axes + assert len(axes) == 2 + + @pytest.mark.slow + def test_hist_by_no_extra_plots(self): + df = self.hist_df + axes = df.height.hist(by=df.gender) # noqa + assert len(self.plt.get_fignums()) == 1 + + @pytest.mark.slow + def test_plot_fails_when_ax_differs_from_figure(self): + from pylab import figure + + fig1 = figure() + fig2 = figure() + ax1 = fig1.add_subplot(111) + with pytest.raises(AssertionError): + self.ts.hist(ax=ax1, figure=fig2) + + +@td.skip_if_no_mpl +class TestDataFramePlots(TestPlotBase): + @pytest.mark.slow + def test_hist_df_legacy(self): + from matplotlib.patches import Rectangle + + with tm.assert_produces_warning(UserWarning): + _check_plot_works(self.hist_df.hist) + + # make sure layout is handled + df = DataFrame(randn(100, 3)) + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.hist, grid=False) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + assert not axes[1, 1].get_visible() + + df = DataFrame(randn(100, 1)) + _check_plot_works(df.hist) + + # make sure layout is handled + df = DataFrame(randn(100, 6)) + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.hist, layout=(4, 2)) + self._check_axes_shape(axes, axes_num=6, layout=(4, 2)) + + # make sure sharex, sharey is handled + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.hist, sharex=True, sharey=True) + + # handle figsize arg + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.hist, figsize=(8, 10)) + + # check bins argument + with tm.assert_produces_warning(UserWarning): + _check_plot_works(df.hist, bins=5) + + # make sure xlabelsize and xrot are handled + ser = df[0] + xf, yf = 20, 18 + xrot, yrot = 30, 40 + axes = ser.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot) + self._check_ticks_props( + axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot + ) + + xf, yf = 20, 18 + xrot, yrot = 30, 40 + axes = df.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot) + self._check_ticks_props( + axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot + ) + + tm.close() + + ax = ser.hist(cumulative=True, bins=4, density=True) + # height of last bin (index 5) must be 1.0 + rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] + tm.assert_almost_equal(rects[-1].get_height(), 1.0) + + tm.close() + ax = ser.hist(log=True) + # scale of y must be 'log' + self._check_ax_scales(ax, yaxis="log") + + tm.close() + + # propagate attr exception from matplotlib.Axes.hist + with pytest.raises(AttributeError): + ser.hist(foo="bar") + + @pytest.mark.slow + def test_hist_non_numerical_raises(self): + # gh-10444 + df = DataFrame(np.random.rand(10, 2)) + df_o = df.astype(np.object) + + msg = "hist method requires numerical columns, nothing to plot." + with pytest.raises(ValueError, match=msg): + df_o.hist() + + @pytest.mark.slow + def test_hist_layout(self): + df = DataFrame(randn(100, 3)) + + layout_to_expected_size = ( + {"layout": None, "expected_size": (2, 2)}, # default is 2x2 + {"layout": (2, 2), "expected_size": (2, 2)}, + {"layout": (4, 1), "expected_size": (4, 1)}, + {"layout": (1, 4), "expected_size": (1, 4)}, + {"layout": (3, 3), "expected_size": (3, 3)}, + {"layout": (-1, 4), "expected_size": (1, 4)}, + {"layout": (4, -1), "expected_size": (4, 1)}, + {"layout": (-1, 2), "expected_size": (2, 2)}, + {"layout": (2, -1), "expected_size": (2, 2)}, + ) + + for layout_test in layout_to_expected_size: + axes = df.hist(layout=layout_test["layout"]) + expected = layout_test["expected_size"] + self._check_axes_shape(axes, axes_num=3, layout=expected) + + # layout too small for all 4 plots + with pytest.raises(ValueError): + df.hist(layout=(1, 1)) + + # invalid format for layout + with pytest.raises(ValueError): + df.hist(layout=(1,)) + with pytest.raises(ValueError): + df.hist(layout=(-1, -1)) + + @pytest.mark.slow + # GH 9351 + def test_tight_layout(self): + df = DataFrame(randn(100, 3)) + _check_plot_works(df.hist) + self.plt.tight_layout() + + tm.close() + + def test_hist_subplot_xrot(self): + # GH 30288 + df = DataFrame( + { + "length": [1.5, 0.5, 1.2, 0.9, 3], + "animal": ["pig", "rabbit", "pig", "pig", "rabbit"], + } + ) + axes = _check_plot_works( + df.hist, + filterwarnings="always", + column="length", + by="animal", + bins=5, + xrot=0, + ) + self._check_ticks_props(axes, xrot=0) + + +@td.skip_if_no_mpl +class TestDataFrameGroupByPlots(TestPlotBase): + @pytest.mark.slow + def test_grouped_hist_legacy(self): + from matplotlib.patches import Rectangle + from pandas.plotting._matplotlib.hist import _grouped_hist + + df = DataFrame(randn(500, 2), columns=["A", "B"]) + df["C"] = np.random.randint(0, 4, 500) + df["D"] = ["X"] * 500 + + axes = _grouped_hist(df.A, by=df.C) + self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) + + tm.close() + axes = df.hist(by=df.C) + self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) + + tm.close() + # group by a key with single value + axes = df.hist(by="D", rot=30) + self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) + self._check_ticks_props(axes, xrot=30) + + tm.close() + # make sure kwargs to hist are handled + xf, yf = 20, 18 + xrot, yrot = 30, 40 + + axes = _grouped_hist( + df.A, + by=df.C, + cumulative=True, + bins=4, + xlabelsize=xf, + xrot=xrot, + ylabelsize=yf, + yrot=yrot, + density=True, + ) + # height of last bin (index 5) must be 1.0 + for ax in axes.ravel(): + rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] + height = rects[-1].get_height() + tm.assert_almost_equal(height, 1.0) + self._check_ticks_props( + axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot + ) + + tm.close() + axes = _grouped_hist(df.A, by=df.C, log=True) + # scale of y must be 'log' + self._check_ax_scales(axes, yaxis="log") + + tm.close() + # propagate attr exception from matplotlib.Axes.hist + with pytest.raises(AttributeError): + _grouped_hist(df.A, by=df.C, foo="bar") + + msg = "Specify figure size by tuple instead" + with pytest.raises(ValueError, match=msg): + df.hist(by="C", figsize="default") + + @pytest.mark.slow + def test_grouped_hist_legacy2(self): + n = 10 + weight = Series(np.random.normal(166, 20, size=n)) + height = Series(np.random.normal(60, 10, size=n)) + with tm.RNGContext(42): + gender_int = np.random.choice([0, 1], size=n) + df_int = DataFrame({"height": height, "weight": weight, "gender": gender_int}) + gb = df_int.groupby("gender") + axes = gb.hist() + assert len(axes) == 2 + assert len(self.plt.get_fignums()) == 2 + tm.close() + + @pytest.mark.slow + def test_grouped_hist_layout(self): + df = self.hist_df + msg = "Layout of 1x1 must be larger than required size 2" + with pytest.raises(ValueError, match=msg): + df.hist(column="weight", by=df.gender, layout=(1, 1)) + + msg = "Layout of 1x3 must be larger than required size 4" + with pytest.raises(ValueError, match=msg): + df.hist(column="height", by=df.category, layout=(1, 3)) + + msg = "At least one dimension of layout must be positive" + with pytest.raises(ValueError, match=msg): + df.hist(column="height", by=df.category, layout=(-1, -1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works( + df.hist, column="height", by=df.gender, layout=(2, 1) + ) + self._check_axes_shape(axes, axes_num=2, layout=(2, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works( + df.hist, column="height", by=df.gender, layout=(2, -1) + ) + self._check_axes_shape(axes, axes_num=2, layout=(2, 1)) + + axes = df.hist(column="height", by=df.category, layout=(4, 1)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + axes = df.hist(column="height", by=df.category, layout=(-1, 1)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + axes = df.hist(column="height", by=df.category, layout=(4, 2), figsize=(12, 8)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 8)) + tm.close() + + # GH 6769 + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works( + df.hist, column="height", by="classroom", layout=(2, 2) + ) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + + # without column + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.hist, by="classroom") + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + + axes = df.hist(by="gender", layout=(3, 5)) + self._check_axes_shape(axes, axes_num=2, layout=(3, 5)) + + axes = df.hist(column=["height", "weight", "category"]) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + + @pytest.mark.slow + def test_grouped_hist_multiple_axes(self): + # GH 6970, GH 7069 + df = self.hist_df + + fig, axes = self.plt.subplots(2, 3) + returned = df.hist(column=["height", "weight", "category"], ax=axes[0]) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + tm.assert_numpy_array_equal(returned, axes[0]) + assert returned[0].figure is fig + returned = df.hist(by="classroom", ax=axes[1]) + self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) + tm.assert_numpy_array_equal(returned, axes[1]) + assert returned[0].figure is fig + + with pytest.raises(ValueError): + fig, axes = self.plt.subplots(2, 3) + # pass different number of axes from required + axes = df.hist(column="height", ax=axes) + + @pytest.mark.slow + def test_axis_share_x(self): + df = self.hist_df + # GH4089 + ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True) + + # share x + assert ax1._shared_x_axes.joined(ax1, ax2) + assert ax2._shared_x_axes.joined(ax1, ax2) + + # don't share y + assert not ax1._shared_y_axes.joined(ax1, ax2) + assert not ax2._shared_y_axes.joined(ax1, ax2) + + @pytest.mark.slow + def test_axis_share_y(self): + df = self.hist_df + ax1, ax2 = df.hist(column="height", by=df.gender, sharey=True) + + # share y + assert ax1._shared_y_axes.joined(ax1, ax2) + assert ax2._shared_y_axes.joined(ax1, ax2) + + # don't share x + assert not ax1._shared_x_axes.joined(ax1, ax2) + assert not ax2._shared_x_axes.joined(ax1, ax2) + + @pytest.mark.slow + def test_axis_share_xy(self): + df = self.hist_df + ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True, sharey=True) + + # share both x and y + assert ax1._shared_x_axes.joined(ax1, ax2) + assert ax2._shared_x_axes.joined(ax1, ax2) + + assert ax1._shared_y_axes.joined(ax1, ax2) + assert ax2._shared_y_axes.joined(ax1, ax2) diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py new file mode 100644 index 00000000..60788aac --- /dev/null +++ b/pandas/tests/plotting/test_misc.py @@ -0,0 +1,412 @@ +# coding: utf-8 + +""" Test cases for misc plot functions """ + +import numpy as np +from numpy import random +from numpy.random import randn +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +import pandas.plotting as plotting + + +@td.skip_if_mpl +def test_import_error_message(): + # GH-19810 + df = DataFrame({"A": [1, 2]}) + + with pytest.raises(ImportError, match="matplotlib is required for plotting"): + df.plot() + + +def test_get_accessor_args(): + func = plotting._core.PlotAccessor._get_call_args + + msg = "Called plot accessor for type list, expected Series or DataFrame" + with pytest.raises(TypeError, match=msg): + func(backend_name="", data=[], args=[], kwargs={}) + + msg = "should not be called with positional arguments" + with pytest.raises(TypeError, match=msg): + func(backend_name="", data=Series(dtype=object), args=["line", None], kwargs={}) + + x, y, kind, kwargs = func( + backend_name="", + data=DataFrame(), + args=["x"], + kwargs={"y": "y", "kind": "bar", "grid": False}, + ) + assert x == "x" + assert y == "y" + assert kind == "bar" + assert kwargs == {"grid": False} + + x, y, kind, kwargs = func( + backend_name="pandas.plotting._matplotlib", + data=Series(dtype=object), + args=[], + kwargs={}, + ) + assert x is None + assert y is None + assert kind == "line" + assert len(kwargs) == 22 + + +@td.skip_if_no_mpl +class TestSeriesPlots(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.ts = tm.makeTimeSeries() + self.ts.name = "ts" + + @pytest.mark.slow + def test_autocorrelation_plot(self): + from pandas.plotting import autocorrelation_plot + + _check_plot_works(autocorrelation_plot, series=self.ts) + _check_plot_works(autocorrelation_plot, series=self.ts.values) + + ax = autocorrelation_plot(self.ts, label="Test") + self._check_legend_labels(ax, labels=["Test"]) + + @pytest.mark.slow + def test_lag_plot(self): + from pandas.plotting import lag_plot + + _check_plot_works(lag_plot, series=self.ts) + _check_plot_works(lag_plot, series=self.ts, lag=5) + + @pytest.mark.slow + def test_bootstrap_plot(self): + from pandas.plotting import bootstrap_plot + + _check_plot_works(bootstrap_plot, series=self.ts, size=10) + + +@td.skip_if_no_mpl +class TestDataFramePlots(TestPlotBase): + @td.skip_if_no_scipy + def test_scatter_matrix_axis(self): + from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0 + + scatter_matrix = plotting.scatter_matrix + + with tm.RNGContext(42): + df = DataFrame(randn(100, 3)) + + # we are plotting multiples on a sub-plot + with tm.assert_produces_warning( + UserWarning, raise_on_extra_warnings=_mpl_ge_3_0_0() + ): + axes = _check_plot_works( + scatter_matrix, filterwarnings="always", frame=df, range_padding=0.1 + ) + axes0_labels = axes[0][0].yaxis.get_majorticklabels() + + # GH 5662 + expected = ["-2", "0", "2"] + self._check_text_labels(axes0_labels, expected) + self._check_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + + df[0] = (df[0] - 2) / 3 + + # we are plotting multiples on a sub-plot + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works( + scatter_matrix, filterwarnings="always", frame=df, range_padding=0.1 + ) + axes0_labels = axes[0][0].yaxis.get_majorticklabels() + expected = ["-1.0", "-0.5", "0.0"] + self._check_text_labels(axes0_labels, expected) + self._check_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + + @pytest.mark.slow + def test_andrews_curves(self, iris): + from pandas.plotting import andrews_curves + from matplotlib import cm + + df = iris + + _check_plot_works(andrews_curves, frame=df, class_column="Name") + + rgba = ("#556270", "#4ECDC4", "#C7F464") + ax = _check_plot_works( + andrews_curves, frame=df, class_column="Name", color=rgba + ) + self._check_colors( + ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10] + ) + + cnames = ["dodgerblue", "aquamarine", "seagreen"] + ax = _check_plot_works( + andrews_curves, frame=df, class_column="Name", color=cnames + ) + self._check_colors( + ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10] + ) + + ax = _check_plot_works( + andrews_curves, frame=df, class_column="Name", colormap=cm.jet + ) + cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] + self._check_colors( + ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10] + ) + + length = 10 + df = DataFrame( + { + "A": random.rand(length), + "B": random.rand(length), + "C": random.rand(length), + "Name": ["A"] * length, + } + ) + + _check_plot_works(andrews_curves, frame=df, class_column="Name") + + rgba = ("#556270", "#4ECDC4", "#C7F464") + ax = _check_plot_works( + andrews_curves, frame=df, class_column="Name", color=rgba + ) + self._check_colors( + ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10] + ) + + cnames = ["dodgerblue", "aquamarine", "seagreen"] + ax = _check_plot_works( + andrews_curves, frame=df, class_column="Name", color=cnames + ) + self._check_colors( + ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10] + ) + + ax = _check_plot_works( + andrews_curves, frame=df, class_column="Name", colormap=cm.jet + ) + cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] + self._check_colors( + ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10] + ) + + colors = ["b", "g", "r"] + df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors}) + ax = andrews_curves(df, "Name", color=colors) + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, linecolors=colors) + + @pytest.mark.slow + def test_parallel_coordinates(self, iris): + from pandas.plotting import parallel_coordinates + from matplotlib import cm + + df = iris + + ax = _check_plot_works(parallel_coordinates, frame=df, class_column="Name") + nlines = len(ax.get_lines()) + nxticks = len(ax.xaxis.get_ticklabels()) + + rgba = ("#556270", "#4ECDC4", "#C7F464") + ax = _check_plot_works( + parallel_coordinates, frame=df, class_column="Name", color=rgba + ) + self._check_colors( + ax.get_lines()[:10], linecolors=rgba, mapping=df["Name"][:10] + ) + + cnames = ["dodgerblue", "aquamarine", "seagreen"] + ax = _check_plot_works( + parallel_coordinates, frame=df, class_column="Name", color=cnames + ) + self._check_colors( + ax.get_lines()[:10], linecolors=cnames, mapping=df["Name"][:10] + ) + + ax = _check_plot_works( + parallel_coordinates, frame=df, class_column="Name", colormap=cm.jet + ) + cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] + self._check_colors( + ax.get_lines()[:10], linecolors=cmaps, mapping=df["Name"][:10] + ) + + ax = _check_plot_works( + parallel_coordinates, frame=df, class_column="Name", axvlines=False + ) + assert len(ax.get_lines()) == (nlines - nxticks) + + colors = ["b", "g", "r"] + df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3], "C": [1, 2, 3], "Name": colors}) + ax = parallel_coordinates(df, "Name", color=colors) + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, linecolors=colors) + + # not sure if this is indicative of a problem + @pytest.mark.filterwarnings("ignore:Attempting to set:UserWarning") + def test_parallel_coordinates_with_sorted_labels(self): + """ For #15908 """ + from pandas.plotting import parallel_coordinates + + df = DataFrame( + { + "feat": list(range(30)), + "class": [2 for _ in range(10)] + + [3 for _ in range(10)] + + [1 for _ in range(10)], + } + ) + ax = parallel_coordinates(df, "class", sort_labels=True) + polylines, labels = ax.get_legend_handles_labels() + color_label_tuples = zip( + [polyline.get_color() for polyline in polylines], labels + ) + ordered_color_label_tuples = sorted(color_label_tuples, key=lambda x: x[1]) + prev_next_tupels = zip( + list(ordered_color_label_tuples[0:-1]), list(ordered_color_label_tuples[1:]) + ) + for prev, nxt in prev_next_tupels: + # labels and colors are ordered strictly increasing + assert prev[1] < nxt[1] and prev[0] < nxt[0] + + @pytest.mark.slow + def test_radviz(self, iris): + from pandas.plotting import radviz + from matplotlib import cm + + df = iris + _check_plot_works(radviz, frame=df, class_column="Name") + + rgba = ("#556270", "#4ECDC4", "#C7F464") + ax = _check_plot_works(radviz, frame=df, class_column="Name", color=rgba) + # skip Circle drawn as ticks + patches = [p for p in ax.patches[:20] if p.get_label() != ""] + self._check_colors(patches[:10], facecolors=rgba, mapping=df["Name"][:10]) + + cnames = ["dodgerblue", "aquamarine", "seagreen"] + _check_plot_works(radviz, frame=df, class_column="Name", color=cnames) + patches = [p for p in ax.patches[:20] if p.get_label() != ""] + self._check_colors(patches, facecolors=cnames, mapping=df["Name"][:10]) + + _check_plot_works(radviz, frame=df, class_column="Name", colormap=cm.jet) + cmaps = [cm.jet(n) for n in np.linspace(0, 1, df["Name"].nunique())] + patches = [p for p in ax.patches[:20] if p.get_label() != ""] + self._check_colors(patches, facecolors=cmaps, mapping=df["Name"][:10]) + + colors = [[0.0, 0.0, 1.0, 1.0], [0.0, 0.5, 1.0, 1.0], [1.0, 0.0, 0.0, 1.0]] + df = DataFrame( + {"A": [1, 2, 3], "B": [2, 1, 3], "C": [3, 2, 1], "Name": ["b", "g", "r"]} + ) + ax = radviz(df, "Name", color=colors) + handles, labels = ax.get_legend_handles_labels() + self._check_colors(handles, facecolors=colors) + + @pytest.mark.slow + def test_subplot_titles(self, iris): + df = iris.drop("Name", axis=1).head() + # Use the column names as the subplot titles + title = list(df.columns) + + # Case len(title) == len(df) + plot = df.plot(subplots=True, title=title) + assert [p.get_title() for p in plot] == title + + # Case len(title) > len(df) + msg = ( + "The length of `title` must equal the number of columns if" + " using `title` of type `list` and `subplots=True`" + ) + with pytest.raises(ValueError, match=msg): + df.plot(subplots=True, title=title + ["kittens > puppies"]) + + # Case len(title) < len(df) + with pytest.raises(ValueError, match=msg): + df.plot(subplots=True, title=title[:2]) + + # Case subplots=False and title is of type list + msg = ( + "Using `title` of type `list` is not supported unless" + " `subplots=True` is passed" + ) + with pytest.raises(ValueError, match=msg): + df.plot(subplots=False, title=title) + + # Case df with 3 numeric columns but layout of (2,2) + plot = df.drop("SepalWidth", axis=1).plot( + subplots=True, layout=(2, 2), title=title[:-1] + ) + title_list = [ax.get_title() for sublist in plot for ax in sublist] + assert title_list == title[:3] + [""] + + def test_get_standard_colors_random_seed(self): + # GH17525 + df = DataFrame(np.zeros((10, 10))) + + # Make sure that the random seed isn't reset by _get_standard_colors + plotting.parallel_coordinates(df, 0) + rand1 = random.random() + plotting.parallel_coordinates(df, 0) + rand2 = random.random() + assert rand1 != rand2 + + # Make sure it produces the same colors every time it's called + from pandas.plotting._matplotlib.style import _get_standard_colors + + color1 = _get_standard_colors(1, color_type="random") + color2 = _get_standard_colors(1, color_type="random") + assert color1 == color2 + + def test_get_standard_colors_default_num_colors(self): + from pandas.plotting._matplotlib.style import _get_standard_colors + + # Make sure the default color_types returns the specified amount + color1 = _get_standard_colors(1, color_type="default") + color2 = _get_standard_colors(9, color_type="default") + color3 = _get_standard_colors(20, color_type="default") + assert len(color1) == 1 + assert len(color2) == 9 + assert len(color3) == 20 + + def test_plot_single_color(self): + # Example from #20585. All 3 bars should have the same color + df = DataFrame( + { + "account-start": ["2017-02-03", "2017-03-03", "2017-01-01"], + "client": ["Alice Anders", "Bob Baker", "Charlie Chaplin"], + "balance": [-1432.32, 10.43, 30000.00], + "db-id": [1234, 2424, 251], + "proxy-id": [525, 1525, 2542], + "rank": [52, 525, 32], + } + ) + ax = df.client.value_counts().plot.bar() + colors = [rect.get_facecolor() for rect in ax.get_children()[0:3]] + assert all(color == colors[0] for color in colors) + + def test_get_standard_colors_no_appending(self): + # GH20726 + + # Make sure not to add more colors so that matplotlib can cycle + # correctly. + from matplotlib import cm + from pandas.plotting._matplotlib.style import _get_standard_colors + + color_before = cm.gnuplot(range(5)) + color_after = _get_standard_colors(1, color=color_before) + assert len(color_after) == len(color_before) + + df = DataFrame(np.random.randn(48, 4), columns=list("ABCD")) + + color_list = cm.gnuplot(np.linspace(0, 1, 16)) + p = df.A.plot.bar(figsize=(16, 7), color=color_list) + assert p.patches[1].get_facecolor() == p.patches[17].get_facecolor() diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py new file mode 100644 index 00000000..8463f30b --- /dev/null +++ b/pandas/tests/plotting/test_series.py @@ -0,0 +1,938 @@ +# coding: utf-8 + +""" Test cases for Series.plot """ + + +from datetime import datetime +from itertools import chain + +import numpy as np +from numpy.random import randn +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, Series, date_range +import pandas._testing as tm +from pandas.tests.plotting.common import TestPlotBase, _check_plot_works + +import pandas.plotting as plotting + + +@td.skip_if_no_mpl +class TestSeriesPlots(TestPlotBase): + def setup_method(self, method): + TestPlotBase.setup_method(self, method) + import matplotlib as mpl + + mpl.rcdefaults() + + self.ts = tm.makeTimeSeries() + self.ts.name = "ts" + + self.series = tm.makeStringSeries() + self.series.name = "series" + + self.iseries = tm.makePeriodSeries() + self.iseries.name = "iseries" + + @pytest.mark.slow + def test_plot(self): + _check_plot_works(self.ts.plot, label="foo") + _check_plot_works(self.ts.plot, use_index=False) + axes = _check_plot_works(self.ts.plot, rot=0) + self._check_ticks_props(axes, xrot=0) + + ax = _check_plot_works(self.ts.plot, style=".", logy=True) + self._check_ax_scales(ax, yaxis="log") + + ax = _check_plot_works(self.ts.plot, style=".", logx=True) + self._check_ax_scales(ax, xaxis="log") + + ax = _check_plot_works(self.ts.plot, style=".", loglog=True) + self._check_ax_scales(ax, xaxis="log", yaxis="log") + + _check_plot_works(self.ts[:10].plot.bar) + _check_plot_works(self.ts.plot.area, stacked=False) + _check_plot_works(self.iseries.plot) + + for kind in ["line", "bar", "barh", "kde", "hist", "box"]: + _check_plot_works(self.series[:5].plot, kind=kind) + + _check_plot_works(self.series[:10].plot.barh) + ax = _check_plot_works(Series(randn(10)).plot.bar, color="black") + self._check_colors([ax.patches[0]], facecolors=["black"]) + + # GH 6951 + ax = _check_plot_works(self.ts.plot, subplots=True) + self._check_axes_shape(ax, axes_num=1, layout=(1, 1)) + + ax = _check_plot_works(self.ts.plot, subplots=True, layout=(-1, 1)) + self._check_axes_shape(ax, axes_num=1, layout=(1, 1)) + ax = _check_plot_works(self.ts.plot, subplots=True, layout=(1, -1)) + self._check_axes_shape(ax, axes_num=1, layout=(1, 1)) + + @pytest.mark.slow + def test_plot_figsize_and_title(self): + # figsize and title + _, ax = self.plt.subplots() + ax = self.series.plot(title="Test", figsize=(16, 8), ax=ax) + self._check_text_labels(ax.title, "Test") + self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16, 8)) + + def test_dont_modify_rcParams(self): + # GH 8242 + key = "axes.prop_cycle" + colors = self.plt.rcParams[key] + _, ax = self.plt.subplots() + Series([1, 2, 3]).plot(ax=ax) + assert colors == self.plt.rcParams[key] + + def test_ts_line_lim(self): + fig, ax = self.plt.subplots() + ax = self.ts.plot(ax=ax) + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data(orig=False)[0][0] + assert xmax >= lines[0].get_data(orig=False)[0][-1] + tm.close() + + ax = self.ts.plot(secondary_y=True, ax=ax) + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= lines[0].get_data(orig=False)[0][0] + assert xmax >= lines[0].get_data(orig=False)[0][-1] + + def test_ts_area_lim(self): + _, ax = self.plt.subplots() + ax = self.ts.plot.area(stacked=False, ax=ax) + xmin, xmax = ax.get_xlim() + line = ax.get_lines()[0].get_data(orig=False)[0] + assert xmin <= line[0] + assert xmax >= line[-1] + tm.close() + + # GH 7471 + _, ax = self.plt.subplots() + ax = self.ts.plot.area(stacked=False, x_compat=True, ax=ax) + xmin, xmax = ax.get_xlim() + line = ax.get_lines()[0].get_data(orig=False)[0] + assert xmin <= line[0] + assert xmax >= line[-1] + tm.close() + + tz_ts = self.ts.copy() + tz_ts.index = tz_ts.tz_localize("GMT").tz_convert("CET") + _, ax = self.plt.subplots() + ax = tz_ts.plot.area(stacked=False, x_compat=True, ax=ax) + xmin, xmax = ax.get_xlim() + line = ax.get_lines()[0].get_data(orig=False)[0] + assert xmin <= line[0] + assert xmax >= line[-1] + tm.close() + + _, ax = self.plt.subplots() + ax = tz_ts.plot.area(stacked=False, secondary_y=True, ax=ax) + xmin, xmax = ax.get_xlim() + line = ax.get_lines()[0].get_data(orig=False)[0] + assert xmin <= line[0] + assert xmax >= line[-1] + + def test_label(self): + s = Series([1, 2]) + _, ax = self.plt.subplots() + ax = s.plot(label="LABEL", legend=True, ax=ax) + self._check_legend_labels(ax, labels=["LABEL"]) + self.plt.close() + _, ax = self.plt.subplots() + ax = s.plot(legend=True, ax=ax) + self._check_legend_labels(ax, labels=["None"]) + self.plt.close() + # get name from index + s.name = "NAME" + _, ax = self.plt.subplots() + ax = s.plot(legend=True, ax=ax) + self._check_legend_labels(ax, labels=["NAME"]) + self.plt.close() + # override the default + _, ax = self.plt.subplots() + ax = s.plot(legend=True, label="LABEL", ax=ax) + self._check_legend_labels(ax, labels=["LABEL"]) + self.plt.close() + # Add lebel info, but don't draw + _, ax = self.plt.subplots() + ax = s.plot(legend=False, label="LABEL", ax=ax) + assert ax.get_legend() is None # Hasn't been drawn + ax.legend() # draw it + self._check_legend_labels(ax, labels=["LABEL"]) + + def test_boolean(self): + # GH 23719 + s = Series([False, False, True]) + _check_plot_works(s.plot, include_bool=True) + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + _check_plot_works(s.plot) + + def test_line_area_nan_series(self): + values = [1, 2, np.nan, 3] + s = Series(values) + ts = Series(values, index=tm.makeDateIndex(k=4)) + + for d in [s, ts]: + ax = _check_plot_works(d.plot) + masked = ax.lines[0].get_ydata() + # remove nan for comparison purpose + exp = np.array([1, 2, 3], dtype=np.float64) + tm.assert_numpy_array_equal(np.delete(masked.data, 2), exp) + tm.assert_numpy_array_equal( + masked.mask, np.array([False, False, True, False]) + ) + + expected = np.array([1, 2, 0, 3], dtype=np.float64) + ax = _check_plot_works(d.plot, stacked=True) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) + ax = _check_plot_works(d.plot.area) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) + ax = _check_plot_works(d.plot.area, stacked=False) + tm.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected) + + def test_line_use_index_false(self): + s = Series([1, 2, 3], index=["a", "b", "c"]) + s.index.name = "The Index" + _, ax = self.plt.subplots() + ax = s.plot(use_index=False, ax=ax) + label = ax.get_xlabel() + assert label == "" + _, ax = self.plt.subplots() + ax2 = s.plot.bar(use_index=False, ax=ax) + label2 = ax2.get_xlabel() + assert label2 == "" + + @pytest.mark.slow + def test_bar_log(self): + expected = np.array([1e-1, 1e0, 1e1, 1e2, 1e3, 1e4]) + + _, ax = self.plt.subplots() + ax = Series([200, 500]).plot.bar(log=True, ax=ax) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + tm.close() + + _, ax = self.plt.subplots() + ax = Series([200, 500]).plot.barh(log=True, ax=ax) + tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), expected) + tm.close() + + # GH 9905 + expected = np.array([1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1]) + + _, ax = self.plt.subplots() + ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind="bar", ax=ax) + ymin = 0.0007943282347242822 + ymax = 0.12589254117941673 + res = ax.get_ylim() + tm.assert_almost_equal(res[0], ymin) + tm.assert_almost_equal(res[1], ymax) + tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) + tm.close() + + _, ax = self.plt.subplots() + ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind="barh", ax=ax) + res = ax.get_xlim() + tm.assert_almost_equal(res[0], ymin) + tm.assert_almost_equal(res[1], ymax) + tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), expected) + + @pytest.mark.slow + def test_bar_ignore_index(self): + df = Series([1, 2, 3, 4], index=["a", "b", "c", "d"]) + _, ax = self.plt.subplots() + ax = df.plot.bar(use_index=False, ax=ax) + self._check_text_labels(ax.get_xticklabels(), ["0", "1", "2", "3"]) + + def test_bar_user_colors(self): + s = Series([1, 2, 3, 4]) + ax = s.plot.bar(color=["red", "blue", "blue", "red"]) + result = [p.get_facecolor() for p in ax.patches] + expected = [ + (1.0, 0.0, 0.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (0.0, 0.0, 1.0, 1.0), + (1.0, 0.0, 0.0, 1.0), + ] + assert result == expected + + def test_rotation(self): + df = DataFrame(randn(5, 5)) + # Default rot 0 + _, ax = self.plt.subplots() + axes = df.plot(ax=ax) + self._check_ticks_props(axes, xrot=0) + + _, ax = self.plt.subplots() + axes = df.plot(rot=30, ax=ax) + self._check_ticks_props(axes, xrot=30) + + def test_irregular_datetime(self): + rng = date_range("1/1/2000", "3/1/2000") + rng = rng[[0, 1, 2, 3, 5, 9, 10, 11, 12]] + ser = Series(randn(len(rng)), rng) + _, ax = self.plt.subplots() + ax = ser.plot(ax=ax) + xp = datetime(1999, 1, 1).toordinal() + ax.set_xlim("1/1/1999", "1/1/2001") + assert xp == ax.get_xlim()[0] + + def test_unsorted_index_xlim(self): + ser = Series( + [0.0, 1.0, np.nan, 3.0, 4.0, 5.0, 6.0], + index=[1.0, 0.0, 3.0, 2.0, np.nan, 3.0, 2.0], + ) + _, ax = self.plt.subplots() + ax = ser.plot(ax=ax) + xmin, xmax = ax.get_xlim() + lines = ax.get_lines() + assert xmin <= np.nanmin(lines[0].get_data(orig=False)[0]) + assert xmax >= np.nanmax(lines[0].get_data(orig=False)[0]) + + @pytest.mark.slow + def test_pie_series(self): + # if sum of values is less than 1.0, pie handle them as rate and draw + # semicircle. + series = Series( + np.random.randint(1, 5), index=["a", "b", "c", "d", "e"], name="YLABEL" + ) + ax = _check_plot_works(series.plot.pie) + self._check_text_labels(ax.texts, series.index) + assert ax.get_ylabel() == "YLABEL" + + # without wedge labels + ax = _check_plot_works(series.plot.pie, labels=None) + self._check_text_labels(ax.texts, [""] * 5) + + # with less colors than elements + color_args = ["r", "g", "b"] + ax = _check_plot_works(series.plot.pie, colors=color_args) + + color_expected = ["r", "g", "b", "r", "g"] + self._check_colors(ax.patches, facecolors=color_expected) + + # with labels and colors + labels = ["A", "B", "C", "D", "E"] + color_args = ["r", "g", "b", "c", "m"] + ax = _check_plot_works(series.plot.pie, labels=labels, colors=color_args) + self._check_text_labels(ax.texts, labels) + self._check_colors(ax.patches, facecolors=color_args) + + # with autopct and fontsize + ax = _check_plot_works( + series.plot.pie, colors=color_args, autopct="%.2f", fontsize=7 + ) + pcts = [f"{s*100:.2f}" for s in series.values / float(series.sum())] + expected_texts = list(chain.from_iterable(zip(series.index, pcts))) + self._check_text_labels(ax.texts, expected_texts) + for t in ax.texts: + assert t.get_fontsize() == 7 + + # includes negative value + with pytest.raises(ValueError): + series = Series([1, 2, 0, 4, -1], index=["a", "b", "c", "d", "e"]) + series.plot.pie() + + # includes nan + series = Series([1, 2, np.nan, 4], index=["a", "b", "c", "d"], name="YLABEL") + ax = _check_plot_works(series.plot.pie) + self._check_text_labels(ax.texts, ["a", "b", "", "d"]) + + def test_pie_nan(self): + s = Series([1, np.nan, 1, 1]) + _, ax = self.plt.subplots() + ax = s.plot.pie(legend=True, ax=ax) + expected = ["0", "", "2", "3"] + result = [x.get_text() for x in ax.texts] + assert result == expected + + @pytest.mark.slow + def test_hist_df_kwargs(self): + df = DataFrame(np.random.randn(10, 2)) + _, ax = self.plt.subplots() + ax = df.plot.hist(bins=5, ax=ax) + assert len(ax.patches) == 10 + + @pytest.mark.slow + def test_hist_df_with_nonnumerics(self): + # GH 9853 + with tm.RNGContext(1): + df = DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"]) + df["E"] = ["x", "y"] * 5 + _, ax = self.plt.subplots() + ax = df.plot.hist(bins=5, ax=ax) + assert len(ax.patches) == 20 + + _, ax = self.plt.subplots() + ax = df.plot.hist(ax=ax) # bins=10 + assert len(ax.patches) == 40 + + @pytest.mark.slow + def test_hist_legacy(self): + _check_plot_works(self.ts.hist) + _check_plot_works(self.ts.hist, grid=False) + _check_plot_works(self.ts.hist, figsize=(8, 10)) + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + _check_plot_works(self.ts.hist, by=self.ts.index.month) + with tm.assert_produces_warning(UserWarning): + _check_plot_works(self.ts.hist, by=self.ts.index.month, bins=5) + + fig, ax = self.plt.subplots(1, 1) + _check_plot_works(self.ts.hist, ax=ax) + _check_plot_works(self.ts.hist, ax=ax, figure=fig) + _check_plot_works(self.ts.hist, figure=fig) + tm.close() + + fig, (ax1, ax2) = self.plt.subplots(1, 2) + _check_plot_works(self.ts.hist, figure=fig, ax=ax1) + _check_plot_works(self.ts.hist, figure=fig, ax=ax2) + + with pytest.raises(ValueError): + self.ts.hist(by=self.ts.index, figure=fig) + + @pytest.mark.slow + def test_hist_bins_legacy(self): + df = DataFrame(np.random.randn(10, 2)) + ax = df.hist(bins=2)[0][0] + assert len(ax.patches) == 2 + + @pytest.mark.slow + def test_hist_layout(self): + df = self.hist_df + with pytest.raises(ValueError): + df.height.hist(layout=(1, 1)) + + with pytest.raises(ValueError): + df.height.hist(layout=[1, 1]) + + @pytest.mark.slow + def test_hist_layout_with_by(self): + df = self.hist_df + + # _check_plot_works adds an ax so catch warning. see GH #13188 + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.gender, layout=(2, 1)) + self._check_axes_shape(axes, axes_num=2, layout=(2, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.gender, layout=(3, -1)) + self._check_axes_shape(axes, axes_num=2, layout=(3, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.category, layout=(4, 1)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.category, layout=(2, -1)) + self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.category, layout=(3, -1)) + self._check_axes_shape(axes, axes_num=4, layout=(3, 2)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.category, layout=(-1, 4)) + self._check_axes_shape(axes, axes_num=4, layout=(1, 4)) + + with tm.assert_produces_warning(UserWarning): + axes = _check_plot_works(df.height.hist, by=df.classroom, layout=(2, 2)) + self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) + + axes = df.height.hist(by=df.category, layout=(4, 2), figsize=(12, 7)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 7)) + + @pytest.mark.slow + def test_hist_no_overlap(self): + from matplotlib.pyplot import subplot, gcf + + x = Series(randn(2)) + y = Series(randn(2)) + subplot(121) + x.hist() + subplot(122) + y.hist() + fig = gcf() + axes = fig.axes + assert len(axes) == 2 + + @pytest.mark.slow + def test_hist_secondary_legend(self): + # GH 9610 + df = DataFrame(np.random.randn(30, 4), columns=list("abcd")) + + # primary -> secondary + _, ax = self.plt.subplots() + ax = df["a"].plot.hist(legend=True, ax=ax) + df["b"].plot.hist(ax=ax, legend=True, secondary_y=True) + # both legends are dran on left ax + # left and right axis must be visible + self._check_legend_labels(ax, labels=["a", "b (right)"]) + assert ax.get_yaxis().get_visible() + assert ax.right_ax.get_yaxis().get_visible() + tm.close() + + # secondary -> secondary + _, ax = self.plt.subplots() + ax = df["a"].plot.hist(legend=True, secondary_y=True, ax=ax) + df["b"].plot.hist(ax=ax, legend=True, secondary_y=True) + # both legends are draw on left ax + # left axis must be invisible, right axis must be visible + self._check_legend_labels(ax.left_ax, labels=["a (right)", "b (right)"]) + assert not ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() + tm.close() + + # secondary -> primary + _, ax = self.plt.subplots() + ax = df["a"].plot.hist(legend=True, secondary_y=True, ax=ax) + # right axes is returned + df["b"].plot.hist(ax=ax, legend=True) + # both legends are draw on left ax + # left and right axis must be visible + self._check_legend_labels(ax.left_ax, labels=["a (right)", "b"]) + assert ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() + tm.close() + + @pytest.mark.slow + def test_df_series_secondary_legend(self): + # GH 9779 + df = DataFrame(np.random.randn(30, 3), columns=list("abc")) + s = Series(np.random.randn(30), name="x") + + # primary -> secondary (without passing ax) + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) + s.plot(legend=True, secondary_y=True, ax=ax) + # both legends are dran on left ax + # left and right axis must be visible + self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"]) + assert ax.get_yaxis().get_visible() + assert ax.right_ax.get_yaxis().get_visible() + tm.close() + + # primary -> secondary (with passing ax) + _, ax = self.plt.subplots() + ax = df.plot(ax=ax) + s.plot(ax=ax, legend=True, secondary_y=True) + # both legends are dran on left ax + # left and right axis must be visible + self._check_legend_labels(ax, labels=["a", "b", "c", "x (right)"]) + assert ax.get_yaxis().get_visible() + assert ax.right_ax.get_yaxis().get_visible() + tm.close() + + # secondary -> secondary (without passing ax) + _, ax = self.plt.subplots() + ax = df.plot(secondary_y=True, ax=ax) + s.plot(legend=True, secondary_y=True, ax=ax) + # both legends are dran on left ax + # left axis must be invisible and right axis must be visible + expected = ["a (right)", "b (right)", "c (right)", "x (right)"] + self._check_legend_labels(ax.left_ax, labels=expected) + assert not ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() + tm.close() + + # secondary -> secondary (with passing ax) + _, ax = self.plt.subplots() + ax = df.plot(secondary_y=True, ax=ax) + s.plot(ax=ax, legend=True, secondary_y=True) + # both legends are dran on left ax + # left axis must be invisible and right axis must be visible + expected = ["a (right)", "b (right)", "c (right)", "x (right)"] + self._check_legend_labels(ax.left_ax, expected) + assert not ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() + tm.close() + + # secondary -> secondary (with passing ax) + _, ax = self.plt.subplots() + ax = df.plot(secondary_y=True, mark_right=False, ax=ax) + s.plot(ax=ax, legend=True, secondary_y=True) + # both legends are dran on left ax + # left axis must be invisible and right axis must be visible + expected = ["a", "b", "c", "x (right)"] + self._check_legend_labels(ax.left_ax, expected) + assert not ax.left_ax.get_yaxis().get_visible() + assert ax.get_yaxis().get_visible() + tm.close() + + @pytest.mark.slow + @pytest.mark.parametrize( + "input_logy, expected_scale", [(True, "log"), ("sym", "symlog")] + ) + def test_secondary_logy(self, input_logy, expected_scale): + # GH 25545 + s1 = Series(np.random.randn(30)) + s2 = Series(np.random.randn(30)) + + # GH 24980 + ax1 = s1.plot(logy=input_logy) + ax2 = s2.plot(secondary_y=True, logy=input_logy) + + assert ax1.get_yscale() == expected_scale + assert ax2.get_yscale() == expected_scale + + @pytest.mark.slow + def test_plot_fails_with_dupe_color_and_style(self): + x = Series(randn(2)) + with pytest.raises(ValueError): + _, ax = self.plt.subplots() + x.plot(style="k--", color="k", ax=ax) + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_hist_kde(self): + + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(logy=True, ax=ax) + self._check_ax_scales(ax, yaxis="log") + xlabels = ax.get_xticklabels() + # ticks are values, thus ticklabels are blank + self._check_text_labels(xlabels, [""] * len(xlabels)) + ylabels = ax.get_yticklabels() + self._check_text_labels(ylabels, [""] * len(ylabels)) + + _check_plot_works(self.ts.plot.kde) + _check_plot_works(self.ts.plot.density) + _, ax = self.plt.subplots() + ax = self.ts.plot.kde(logy=True, ax=ax) + self._check_ax_scales(ax, yaxis="log") + xlabels = ax.get_xticklabels() + self._check_text_labels(xlabels, [""] * len(xlabels)) + ylabels = ax.get_yticklabels() + self._check_text_labels(ylabels, [""] * len(ylabels)) + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_kwargs(self): + sample_points = np.linspace(-100, 100, 20) + _check_plot_works(self.ts.plot.kde, bw_method="scott", ind=20) + _check_plot_works(self.ts.plot.kde, bw_method=None, ind=20) + _check_plot_works(self.ts.plot.kde, bw_method=None, ind=np.int(20)) + _check_plot_works(self.ts.plot.kde, bw_method=0.5, ind=sample_points) + _check_plot_works(self.ts.plot.density, bw_method=0.5, ind=sample_points) + _, ax = self.plt.subplots() + ax = self.ts.plot.kde(logy=True, bw_method=0.5, ind=sample_points, ax=ax) + self._check_ax_scales(ax, yaxis="log") + self._check_text_labels(ax.yaxis.get_label(), "Density") + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_kde_missing_vals(self): + s = Series(np.random.uniform(size=50)) + s[0] = np.nan + axes = _check_plot_works(s.plot.kde) + + # gh-14821: check if the values have any missing values + assert any(~np.isnan(axes.lines[0].get_xdata())) + + @pytest.mark.slow + def test_hist_kwargs(self): + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(bins=5, ax=ax) + assert len(ax.patches) == 5 + self._check_text_labels(ax.yaxis.get_label(), "Frequency") + tm.close() + + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(orientation="horizontal", ax=ax) + self._check_text_labels(ax.xaxis.get_label(), "Frequency") + tm.close() + + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(align="left", stacked=True, ax=ax) + tm.close() + + @pytest.mark.slow + @td.skip_if_no_scipy + def test_hist_kde_color(self): + _, ax = self.plt.subplots() + ax = self.ts.plot.hist(logy=True, bins=10, color="b", ax=ax) + self._check_ax_scales(ax, yaxis="log") + assert len(ax.patches) == 10 + self._check_colors(ax.patches, facecolors=["b"] * 10) + + _, ax = self.plt.subplots() + ax = self.ts.plot.kde(logy=True, color="r", ax=ax) + self._check_ax_scales(ax, yaxis="log") + lines = ax.get_lines() + assert len(lines) == 1 + self._check_colors(lines, ["r"]) + + @pytest.mark.slow + def test_boxplot_series(self): + _, ax = self.plt.subplots() + ax = self.ts.plot.box(logy=True, ax=ax) + self._check_ax_scales(ax, yaxis="log") + xlabels = ax.get_xticklabels() + self._check_text_labels(xlabels, [self.ts.name]) + ylabels = ax.get_yticklabels() + self._check_text_labels(ylabels, [""] * len(ylabels)) + + @pytest.mark.slow + def test_kind_both_ways(self): + s = Series(range(3)) + kinds = ( + plotting.PlotAccessor._common_kinds + plotting.PlotAccessor._series_kinds + ) + _, ax = self.plt.subplots() + for kind in kinds: + + s.plot(kind=kind, ax=ax) + getattr(s.plot, kind)() + + @pytest.mark.slow + def test_invalid_plot_data(self): + s = Series(list("abcd")) + _, ax = self.plt.subplots() + for kind in plotting.PlotAccessor._common_kinds: + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + s.plot(kind=kind, ax=ax) + + @pytest.mark.slow + def test_valid_object_plot(self): + s = Series(range(10), dtype=object) + for kind in plotting.PlotAccessor._common_kinds: + _check_plot_works(s.plot, kind=kind) + + def test_partially_invalid_plot_data(self): + s = Series(["a", "b", 1.0, 2]) + _, ax = self.plt.subplots() + for kind in plotting.PlotAccessor._common_kinds: + + msg = "no numeric data to plot" + with pytest.raises(TypeError, match=msg): + s.plot(kind=kind, ax=ax) + + def test_invalid_kind(self): + s = Series([1, 2]) + with pytest.raises(ValueError): + s.plot(kind="aasdf") + + @pytest.mark.slow + def test_dup_datetime_index_plot(self): + dr1 = date_range("1/1/2009", periods=4) + dr2 = date_range("1/2/2009", periods=4) + index = dr1.append(dr2) + values = randn(index.size) + s = Series(values, index=index) + _check_plot_works(s.plot) + + @pytest.mark.slow + def test_errorbar_plot(self): + + s = Series(np.arange(10), name="x") + s_err = np.random.randn(10) + d_err = DataFrame(randn(10, 2), index=s.index, columns=["x", "y"]) + # test line and bar plots + kinds = ["line", "bar"] + for kind in kinds: + ax = _check_plot_works(s.plot, yerr=Series(s_err), kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(s.plot, yerr=s_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(s.plot, yerr=s_err.tolist(), kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(s.plot, yerr=d_err, kind=kind) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(s.plot, xerr=0.2, yerr=0.2, kind=kind) + self._check_has_errorbars(ax, xerr=1, yerr=1) + + ax = _check_plot_works(s.plot, xerr=s_err) + self._check_has_errorbars(ax, xerr=1, yerr=0) + + # test time series plotting + ix = date_range("1/1/2000", "1/1/2001", freq="M") + ts = Series(np.arange(12), index=ix, name="x") + ts_err = Series(np.random.randn(12), index=ix) + td_err = DataFrame(randn(12, 2), index=ix, columns=["x", "y"]) + + ax = _check_plot_works(ts.plot, yerr=ts_err) + self._check_has_errorbars(ax, xerr=0, yerr=1) + ax = _check_plot_works(ts.plot, yerr=td_err) + self._check_has_errorbars(ax, xerr=0, yerr=1) + + # check incorrect lengths and types + with pytest.raises(ValueError): + s.plot(yerr=np.arange(11)) + + s_err = ["zzz"] * 10 + with pytest.raises(TypeError): + s.plot(yerr=s_err) + + def test_table(self): + _check_plot_works(self.series.plot, table=True) + _check_plot_works(self.series.plot, table=self.series) + + @pytest.mark.slow + def test_series_grid_settings(self): + # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 + self._check_grid_settings( + Series([1, 2, 3]), + plotting.PlotAccessor._series_kinds + plotting.PlotAccessor._common_kinds, + ) + + @pytest.mark.slow + def test_standard_colors(self): + from pandas.plotting._matplotlib.style import _get_standard_colors + + for c in ["r", "red", "green", "#FF0000"]: + result = _get_standard_colors(1, color=c) + assert result == [c] + + result = _get_standard_colors(1, color=[c]) + assert result == [c] + + result = _get_standard_colors(3, color=c) + assert result == [c] * 3 + + result = _get_standard_colors(3, color=[c]) + assert result == [c] * 3 + + @pytest.mark.slow + def test_standard_colors_all(self): + import matplotlib.colors as colors + from pandas.plotting._matplotlib.style import _get_standard_colors + + # multiple colors like mediumaquamarine + for c in colors.cnames: + result = _get_standard_colors(num_colors=1, color=c) + assert result == [c] + + result = _get_standard_colors(num_colors=1, color=[c]) + assert result == [c] + + result = _get_standard_colors(num_colors=3, color=c) + assert result == [c] * 3 + + result = _get_standard_colors(num_colors=3, color=[c]) + assert result == [c] * 3 + + # single letter colors like k + for c in colors.ColorConverter.colors: + result = _get_standard_colors(num_colors=1, color=c) + assert result == [c] + + result = _get_standard_colors(num_colors=1, color=[c]) + assert result == [c] + + result = _get_standard_colors(num_colors=3, color=c) + assert result == [c] * 3 + + result = _get_standard_colors(num_colors=3, color=[c]) + assert result == [c] * 3 + + def test_series_plot_color_kwargs(self): + # GH1890 + _, ax = self.plt.subplots() + ax = Series(np.arange(12) + 1).plot(color="green", ax=ax) + self._check_colors(ax.get_lines(), linecolors=["green"]) + + def test_time_series_plot_color_kwargs(self): + # #1890 + _, ax = self.plt.subplots() + ax = Series(np.arange(12) + 1, index=date_range("1/1/2000", periods=12)).plot( + color="green", ax=ax + ) + self._check_colors(ax.get_lines(), linecolors=["green"]) + + def test_time_series_plot_color_with_empty_kwargs(self): + import matplotlib as mpl + + def_colors = self._unpack_cycler(mpl.rcParams) + index = date_range("1/1/2000", periods=12) + s = Series(np.arange(1, 13), index=index) + + ncolors = 3 + + _, ax = self.plt.subplots() + for i in range(ncolors): + ax = s.plot(ax=ax) + self._check_colors(ax.get_lines(), linecolors=def_colors[:ncolors]) + + def test_xticklabels(self): + # GH11529 + s = Series(np.arange(10), index=[f"P{i:02d}" for i in range(10)]) + _, ax = self.plt.subplots() + ax = s.plot(xticks=[0, 3, 5, 9], ax=ax) + exp = [f"P{i:02d}" for i in [0, 3, 5, 9]] + self._check_text_labels(ax.get_xticklabels(), exp) + + def test_xtick_barPlot(self): + # GH28172 + s = pd.Series(range(10), index=[f"P{i:02d}" for i in range(10)]) + ax = s.plot.bar(xticks=range(0, 11, 2)) + exp = np.array(list(range(0, 11, 2))) + tm.assert_numpy_array_equal(exp, ax.get_xticks()) + + def test_custom_business_day_freq(self): + # GH7222 + from pandas.tseries.offsets import CustomBusinessDay + + s = Series( + range(100, 121), + index=pd.bdate_range( + start="2014-05-01", + end="2014-06-01", + freq=CustomBusinessDay(holidays=["2014-05-26"]), + ), + ) + + _check_plot_works(s.plot) + + @pytest.mark.xfail + def test_plot_accessor_updates_on_inplace(self): + s = Series([1, 2, 3, 4]) + _, ax = self.plt.subplots() + ax = s.plot(ax=ax) + before = ax.xaxis.get_ticklocs() + + s.drop([0, 1], inplace=True) + _, ax = self.plt.subplots() + after = ax.xaxis.get_ticklocs() + tm.assert_numpy_array_equal(before, after) + + @pytest.mark.parametrize("kind", ["line", "area"]) + def test_plot_xlim_for_series(self, kind): + # test if xlim is also correctly plotted in Series for line and area + # GH 27686 + s = Series([2, 3]) + _, ax = self.plt.subplots() + s.plot(kind=kind, ax=ax) + xlims = ax.get_xlim() + + assert xlims[0] < 0 + assert xlims[1] > 1 + + def test_plot_no_rows(self): + # GH 27758 + df = pd.Series(dtype=int) + assert df.empty + ax = df.plot() + assert len(ax.get_lines()) == 1 + line = ax.get_lines()[0] + assert len(line.get_xdata()) == 0 + assert len(line.get_ydata()) == 0 + + def test_plot_no_numeric_data(self): + df = pd.Series(["a", "b", "c"]) + with pytest.raises(TypeError): + df.plot() + + def test_style_single_ok(self): + s = pd.Series([1, 2]) + ax = s.plot(style="s", color="C3") + assert ax.lines[0].get_color() == ["C3"] diff --git a/pandas/tests/reductions/__init__.py b/pandas/tests/reductions/__init__.py new file mode 100644 index 00000000..e3851753 --- /dev/null +++ b/pandas/tests/reductions/__init__.py @@ -0,0 +1,4 @@ +""" +Tests for reductions where we want to test for matching behavior across +Array, Index, Series, and DataFrame methods. +""" diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py new file mode 100644 index 00000000..7400b049 --- /dev/null +++ b/pandas/tests/reductions/test_reductions.py @@ -0,0 +1,1278 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + NaT, + Period, + PeriodIndex, + RangeIndex, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + isna, + timedelta_range, + to_timedelta, +) +import pandas._testing as tm +from pandas.core import nanops + + +def get_objs(): + indexes = [ + tm.makeBoolIndex(10, name="a"), + tm.makeIntIndex(10, name="a"), + tm.makeFloatIndex(10, name="a"), + tm.makeDateIndex(10, name="a"), + tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern"), + tm.makePeriodIndex(10, name="a"), + tm.makeStringIndex(10, name="a"), + tm.makeUnicodeIndex(10, name="a"), + ] + + arr = np.random.randn(10) + series = [Series(arr, index=idx, name="a") for idx in indexes] + + objs = indexes + series + return objs + + +objs = get_objs() + + +class TestReductions: + @pytest.mark.parametrize("opname", ["max", "min"]) + @pytest.mark.parametrize("obj", objs) + def test_ops(self, opname, obj): + result = getattr(obj, opname)() + if not isinstance(obj, PeriodIndex): + expected = getattr(obj.values, opname)() + else: + expected = pd.Period( + ordinal=getattr(obj._ndarray_values, opname)(), freq=obj.freq + ) + try: + assert result == expected + except TypeError: + # comparing tz-aware series with np.array results in + # TypeError + expected = expected.astype("M8[ns]").astype("int64") + assert result.value == expected + + def test_nanops(self): + # GH#7261 + for opname in ["max", "min"]: + for klass in [Index, Series]: + arg_op = "arg" + opname if klass is Index else "idx" + opname + + obj = klass([np.nan, 2.0]) + assert getattr(obj, opname)() == 2.0 + + obj = klass([np.nan]) + assert pd.isna(getattr(obj, opname)()) + assert pd.isna(getattr(obj, opname)(skipna=False)) + + obj = klass([], dtype=object) + assert pd.isna(getattr(obj, opname)()) + assert pd.isna(getattr(obj, opname)(skipna=False)) + + obj = klass([pd.NaT, datetime(2011, 11, 1)]) + # check DatetimeIndex monotonic path + assert getattr(obj, opname)() == datetime(2011, 11, 1) + assert getattr(obj, opname)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT]) + # check DatetimeIndex non-monotonic path + assert getattr(obj, opname)(), datetime(2011, 11, 1) + assert getattr(obj, opname)(skipna=False) is pd.NaT + + assert getattr(obj, arg_op)() == 1 + result = getattr(obj, arg_op)(skipna=False) + if klass is Series: + assert np.isnan(result) + else: + assert result == -1 + + for dtype in ["M8[ns]", "datetime64[ns, UTC]"]: + # cases with empty Series/DatetimeIndex + obj = klass([], dtype=dtype) + + assert getattr(obj, opname)() is pd.NaT + assert getattr(obj, opname)(skipna=False) is pd.NaT + + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)() + with pytest.raises(ValueError, match="empty sequence"): + getattr(obj, arg_op)(skipna=False) + + # argmin/max + obj = Index(np.arange(5, dtype="int64")) + assert obj.argmin() == 0 + assert obj.argmax() == 4 + + obj = Index([np.nan, 1, np.nan, 2]) + assert obj.argmin() == 1 + assert obj.argmax() == 3 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 + + obj = Index([np.nan]) + assert obj.argmin() == -1 + assert obj.argmax() == -1 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 + + obj = Index([pd.NaT, datetime(2011, 11, 1), datetime(2011, 11, 2), pd.NaT]) + assert obj.argmin() == 1 + assert obj.argmax() == 2 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 + + obj = Index([pd.NaT]) + assert obj.argmin() == -1 + assert obj.argmax() == -1 + assert obj.argmin(skipna=False) == -1 + assert obj.argmax(skipna=False) == -1 + + @pytest.mark.parametrize("op, expected_col", [["max", "a"], ["min", "b"]]) + def test_same_tz_min_max_axis_1(self, op, expected_col): + # GH 10390 + df = DataFrame( + pd.date_range("2016-01-01 00:00:00", periods=3, tz="UTC"), columns=["a"] + ) + df["b"] = df.a.subtract(pd.Timedelta(seconds=3600)) + result = getattr(df, op)(axis=1) + expected = df[expected_col].rename(None) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("func", ["maximum", "minimum"]) + def test_numpy_reduction_with_tz_aware_dtype(self, tz_aware_fixture, func): + # GH 15552 + tz = tz_aware_fixture + arg = pd.to_datetime(["2019"]).tz_localize(tz) + expected = Series(arg) + result = getattr(np, func)(expected, expected) + tm.assert_series_equal(result, expected) + + +class TestIndexReductions: + # Note: the name TestIndexReductions indicates these tests + # were moved from a Index-specific test file, _not_ that these tests are + # intended long-term to be Index-specific + + @pytest.mark.parametrize( + "start,stop,step", + [ + (0, 400, 3), + (500, 0, -6), + (-(10 ** 6), 10 ** 6, 4), + (10 ** 6, -(10 ** 6), -4), + (0, 10, 20), + ], + ) + def test_max_min_range(self, start, stop, step): + # GH#17607 + idx = RangeIndex(start, stop, step) + expected = idx._int64index.max() + result = idx.max() + assert result == expected + + # skipna should be irrelevant since RangeIndex should never have NAs + result2 = idx.max(skipna=False) + assert result2 == expected + + expected = idx._int64index.min() + result = idx.min() + assert result == expected + + # skipna should be irrelevant since RangeIndex should never have NAs + result2 = idx.min(skipna=False) + assert result2 == expected + + # empty + idx = RangeIndex(start, stop, -step) + assert isna(idx.max()) + assert isna(idx.min()) + + def test_minmax_timedelta64(self): + + # monotonic + idx1 = TimedeltaIndex(["1 days", "2 days", "3 days"]) + assert idx1.is_monotonic + + # non-monotonic + idx2 = TimedeltaIndex(["1 days", np.nan, "3 days", "NaT"]) + assert not idx2.is_monotonic + + for idx in [idx1, idx2]: + assert idx.min() == Timedelta("1 days") + assert idx.max() == Timedelta("3 days") + assert idx.argmin() == 0 + assert idx.argmax() == 2 + + for op in ["min", "max"]: + # Return NaT + obj = TimedeltaIndex([]) + assert pd.isna(getattr(obj, op)()) + + obj = TimedeltaIndex([pd.NaT]) + assert pd.isna(getattr(obj, op)()) + + obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) + assert pd.isna(getattr(obj, op)()) + + def test_numpy_minmax_timedelta64(self): + td = timedelta_range("16815 days", "16820 days", freq="D") + + assert np.min(td) == Timedelta("16815 days") + assert np.max(td) == Timedelta("16820 days") + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.min(td, out=0) + with pytest.raises(ValueError, match=errmsg): + np.max(td, out=0) + + assert np.argmin(td) == 0 + assert np.argmax(td) == 5 + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.argmin(td, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmax(td, out=0) + + def test_timedelta_ops(self): + # GH#4984 + # make sure ops return Timedelta + s = Series( + [Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)] + ) + td = s.diff() + + result = td.mean() + expected = to_timedelta(timedelta(seconds=9)) + assert result == expected + + result = td.to_frame().mean() + assert result[0] == expected + + result = td.quantile(0.1) + expected = Timedelta(np.timedelta64(2600, "ms")) + assert result == expected + + result = td.median() + expected = to_timedelta("00:00:09") + assert result == expected + + result = td.to_frame().median() + assert result[0] == expected + + # GH#6462 + # consistency in returned values for sum + result = td.sum() + expected = to_timedelta("00:01:21") + assert result == expected + + result = td.to_frame().sum() + assert result[0] == expected + + # std + result = td.std() + expected = to_timedelta(Series(td.dropna().values).std()) + assert result == expected + + result = td.to_frame().std() + assert result[0] == expected + + # GH#10040 + # make sure NaT is properly handled by median() + s = Series([Timestamp("2015-02-03"), Timestamp("2015-02-07")]) + assert s.diff().median() == timedelta(days=4) + + s = Series( + [Timestamp("2015-02-03"), Timestamp("2015-02-07"), Timestamp("2015-02-15")] + ) + assert s.diff().median() == timedelta(days=6) + + @pytest.mark.parametrize("opname", ["skew", "kurt", "sem", "prod", "var"]) + def test_invalid_td64_reductions(self, opname): + s = Series( + [Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)] + ) + td = s.diff() + + msg = "reduction operation '{op}' not allowed for this dtype" + msg = msg.format(op=opname) + + with pytest.raises(TypeError, match=msg): + getattr(td, opname)() + + with pytest.raises(TypeError, match=msg): + getattr(td.to_frame(), opname)(numeric_only=False) + + def test_minmax_tz(self, tz_naive_fixture): + tz = tz_naive_fixture + # monotonic + idx1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz=tz) + assert idx1.is_monotonic + + # non-monotonic + idx2 = pd.DatetimeIndex( + ["2011-01-01", pd.NaT, "2011-01-03", "2011-01-02", pd.NaT], tz=tz + ) + assert not idx2.is_monotonic + + for idx in [idx1, idx2]: + assert idx.min() == Timestamp("2011-01-01", tz=tz) + assert idx.max() == Timestamp("2011-01-03", tz=tz) + assert idx.argmin() == 0 + assert idx.argmax() == 2 + + @pytest.mark.parametrize("op", ["min", "max"]) + def test_minmax_nat_datetime64(self, op): + # Return NaT + obj = DatetimeIndex([]) + assert pd.isna(getattr(obj, op)()) + + obj = DatetimeIndex([pd.NaT]) + assert pd.isna(getattr(obj, op)()) + + obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) + assert pd.isna(getattr(obj, op)()) + + def test_numpy_minmax_integer(self): + # GH#26125 + idx = Index([1, 2, 3]) + + expected = idx.values.max() + result = np.max(idx) + assert result == expected + + expected = idx.values.min() + result = np.min(idx) + assert result == expected + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.min(idx, out=0) + with pytest.raises(ValueError, match=errmsg): + np.max(idx, out=0) + + expected = idx.values.argmax() + result = np.argmax(idx) + assert result == expected + + expected = idx.values.argmin() + result = np.argmin(idx) + assert result == expected + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.argmin(idx, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmax(idx, out=0) + + def test_numpy_minmax_range(self): + # GH#26125 + idx = RangeIndex(0, 10, 3) + + expected = idx._int64index.max() + result = np.max(idx) + assert result == expected + + expected = idx._int64index.min() + result = np.min(idx) + assert result == expected + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.min(idx, out=0) + with pytest.raises(ValueError, match=errmsg): + np.max(idx, out=0) + + # No need to test again argmax/argmin compat since the implementation + # is the same as basic integer index + + def test_numpy_minmax_datetime64(self): + dr = pd.date_range(start="2016-01-15", end="2016-01-20") + + assert np.min(dr) == Timestamp("2016-01-15 00:00:00", freq="D") + assert np.max(dr) == Timestamp("2016-01-20 00:00:00", freq="D") + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.min(dr, out=0) + + with pytest.raises(ValueError, match=errmsg): + np.max(dr, out=0) + + assert np.argmin(dr) == 0 + assert np.argmax(dr) == 5 + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.argmin(dr, out=0) + + with pytest.raises(ValueError, match=errmsg): + np.argmax(dr, out=0) + + def test_minmax_period(self): + + # monotonic + idx1 = pd.PeriodIndex([NaT, "2011-01-01", "2011-01-02", "2011-01-03"], freq="D") + assert idx1.is_monotonic + + # non-monotonic + idx2 = pd.PeriodIndex( + ["2011-01-01", NaT, "2011-01-03", "2011-01-02", NaT], freq="D" + ) + assert not idx2.is_monotonic + + for idx in [idx1, idx2]: + assert idx.min() == pd.Period("2011-01-01", freq="D") + assert idx.max() == pd.Period("2011-01-03", freq="D") + assert idx1.argmin() == 1 + assert idx2.argmin() == 0 + assert idx1.argmax() == 3 + assert idx2.argmax() == 2 + + for op in ["min", "max"]: + # Return NaT + obj = PeriodIndex([], freq="M") + result = getattr(obj, op)() + assert result is NaT + + obj = PeriodIndex([NaT], freq="M") + result = getattr(obj, op)() + assert result is NaT + + obj = PeriodIndex([NaT, NaT, NaT], freq="M") + result = getattr(obj, op)() + assert result is NaT + + def test_numpy_minmax_period(self): + pr = pd.period_range(start="2016-01-15", end="2016-01-20") + + assert np.min(pr) == Period("2016-01-15", freq="D") + assert np.max(pr) == Period("2016-01-20", freq="D") + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.min(pr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.max(pr, out=0) + + assert np.argmin(pr) == 0 + assert np.argmax(pr) == 5 + + errmsg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=errmsg): + np.argmin(pr, out=0) + with pytest.raises(ValueError, match=errmsg): + np.argmax(pr, out=0) + + def test_min_max_categorical(self): + + ci = pd.CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + with pytest.raises(TypeError): + ci.min() + with pytest.raises(TypeError): + ci.max() + + ci = pd.CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=True) + assert ci.min() == "c" + assert ci.max() == "b" + + +class TestSeriesReductions: + # Note: the name TestSeriesReductions indicates these tests + # were moved from a series-specific test file, _not_ that these tests are + # intended long-term to be series-specific + + def test_sum_inf(self): + s = Series(np.random.randn(10)) + s2 = s.copy() + + s[5:8] = np.inf + s2[5:8] = np.nan + + assert np.isinf(s.sum()) + + arr = np.random.randn(100, 100).astype("f4") + arr[:, 2] = np.inf + + with pd.option_context("mode.use_inf_as_na", True): + tm.assert_almost_equal(s.sum(), s2.sum()) + + res = nanops.nansum(arr, axis=1) + assert np.isinf(res).all() + + @pytest.mark.parametrize("use_bottleneck", [True, False]) + @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)]) + def test_empty(self, method, unit, use_bottleneck): + with pd.option_context("use_bottleneck", use_bottleneck): + # GH#9422 / GH#18921 + # Entirely empty + s = Series([], dtype=object) + # NA by default + result = getattr(s, method)() + assert result == unit + + # Explicit + result = getattr(s, method)(min_count=0) + assert result == unit + + result = getattr(s, method)(min_count=1) + assert pd.isna(result) + + # Skipna, default + result = getattr(s, method)(skipna=True) + result == unit + + # Skipna, explicit + result = getattr(s, method)(skipna=True, min_count=0) + assert result == unit + + result = getattr(s, method)(skipna=True, min_count=1) + assert pd.isna(result) + + # All-NA + s = Series([np.nan]) + # NA by default + result = getattr(s, method)() + assert result == unit + + # Explicit + result = getattr(s, method)(min_count=0) + assert result == unit + + result = getattr(s, method)(min_count=1) + assert pd.isna(result) + + # Skipna, default + result = getattr(s, method)(skipna=True) + result == unit + + # skipna, explicit + result = getattr(s, method)(skipna=True, min_count=0) + assert result == unit + + result = getattr(s, method)(skipna=True, min_count=1) + assert pd.isna(result) + + # Mix of valid, empty + s = Series([np.nan, 1]) + # Default + result = getattr(s, method)() + assert result == 1.0 + + # Explicit + result = getattr(s, method)(min_count=0) + assert result == 1.0 + + result = getattr(s, method)(min_count=1) + assert result == 1.0 + + # Skipna + result = getattr(s, method)(skipna=True) + assert result == 1.0 + + result = getattr(s, method)(skipna=True, min_count=0) + assert result == 1.0 + + result = getattr(s, method)(skipna=True, min_count=1) + assert result == 1.0 + + # GH#844 (changed in GH#9422) + df = DataFrame(np.empty((10, 0))) + assert (getattr(df, method)(1) == unit).all() + + s = pd.Series([1]) + result = getattr(s, method)(min_count=2) + assert pd.isna(result) + + s = pd.Series([np.nan]) + result = getattr(s, method)(min_count=2) + assert pd.isna(result) + + s = pd.Series([np.nan, 1]) + result = getattr(s, method)(min_count=2) + assert pd.isna(result) + + @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)]) + def test_empty_multi(self, method, unit): + s = pd.Series( + [1, np.nan, np.nan, np.nan], + index=pd.MultiIndex.from_product([("a", "b"), (0, 1)]), + ) + # 1 / 0 by default + result = getattr(s, method)(level=0) + expected = pd.Series([1, unit], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + # min_count=0 + result = getattr(s, method)(level=0, min_count=0) + expected = pd.Series([1, unit], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + # min_count=1 + result = getattr(s, method)(level=0, min_count=1) + expected = pd.Series([1, np.nan], index=["a", "b"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("method", ["mean", "median", "std", "var"]) + def test_ops_consistency_on_empty(self, method): + + # GH#7869 + # consistency on empty + + # float + result = getattr(Series(dtype=float), method)() + assert pd.isna(result) + + # timedelta64[ns] + tdser = Series([], dtype="m8[ns]") + if method == "var": + with pytest.raises(TypeError, match="operation 'var' not allowed"): + getattr(tdser, method)() + else: + result = getattr(tdser, method)() + assert result is pd.NaT + + def test_nansum_buglet(self): + ser = Series([1.0, np.nan], index=[0, 1]) + result = np.nansum(ser) + tm.assert_almost_equal(result, 1) + + @pytest.mark.parametrize("use_bottleneck", [True, False]) + def test_sum_overflow(self, use_bottleneck): + + with pd.option_context("use_bottleneck", use_bottleneck): + # GH#6915 + # overflowing on the smaller int dtypes + for dtype in ["int32", "int64"]: + v = np.arange(5000000, dtype=dtype) + s = Series(v) + + result = s.sum(skipna=False) + assert int(result) == v.sum(dtype="int64") + result = s.min(skipna=False) + assert int(result) == 0 + result = s.max(skipna=False) + assert int(result) == v[-1] + + for dtype in ["float32", "float64"]: + v = np.arange(5000000, dtype=dtype) + s = Series(v) + + result = s.sum(skipna=False) + assert result == v.sum(dtype=dtype) + result = s.min(skipna=False) + assert np.allclose(float(result), 0.0) + result = s.max(skipna=False) + assert np.allclose(float(result), v[-1]) + + def test_empty_timeseries_reductions_return_nat(self): + # covers GH#11245 + for dtype in ("m8[ns]", "m8[ns]", "M8[ns]", "M8[ns, UTC]"): + assert Series([], dtype=dtype).min() is pd.NaT + assert Series([], dtype=dtype).max() is pd.NaT + assert Series([], dtype=dtype).min(skipna=False) is pd.NaT + assert Series([], dtype=dtype).max(skipna=False) is pd.NaT + + def test_numpy_argmin(self): + # See GH#16830 + data = np.arange(1, 11) + + s = Series(data, index=data) + result = np.argmin(s) + + expected = np.argmin(data) + assert result == expected + + result = s.argmin() + + assert result == expected + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argmin(s, out=data) + + def test_numpy_argmax(self): + # See GH#16830 + data = np.arange(1, 11) + + s = Series(data, index=data) + result = np.argmax(s) + expected = np.argmax(data) + assert result == expected + + result = s.argmax() + + assert result == expected + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.argmax(s, out=data) + + def test_idxmin(self): + # test idxmin + # _check_stat_op approach can not be used here because of isna check. + string_series = tm.makeStringSeries().rename("series") + + # add some NaNs + string_series[5:15] = np.NaN + + # skipna or no + assert string_series[string_series.idxmin()] == string_series.min() + assert pd.isna(string_series.idxmin(skipna=False)) + + # no NaNs + nona = string_series.dropna() + assert nona[nona.idxmin()] == nona.min() + assert nona.index.values.tolist().index(nona.idxmin()) == nona.values.argmin() + + # all NaNs + allna = string_series * np.nan + assert pd.isna(allna.idxmin()) + + # datetime64[ns] + s = Series(pd.date_range("20130102", periods=6)) + result = s.idxmin() + assert result == 0 + + s[0] = np.nan + result = s.idxmin() + assert result == 1 + + def test_idxmax(self): + # test idxmax + # _check_stat_op approach can not be used here because of isna check. + string_series = tm.makeStringSeries().rename("series") + + # add some NaNs + string_series[5:15] = np.NaN + + # skipna or no + assert string_series[string_series.idxmax()] == string_series.max() + assert pd.isna(string_series.idxmax(skipna=False)) + + # no NaNs + nona = string_series.dropna() + assert nona[nona.idxmax()] == nona.max() + assert nona.index.values.tolist().index(nona.idxmax()) == nona.values.argmax() + + # all NaNs + allna = string_series * np.nan + assert pd.isna(allna.idxmax()) + + from pandas import date_range + + s = Series(date_range("20130102", periods=6)) + result = s.idxmax() + assert result == 5 + + s[5] = np.nan + result = s.idxmax() + assert result == 4 + + # Float64Index + # GH#5914 + s = pd.Series([1, 2, 3], [1.1, 2.1, 3.1]) + result = s.idxmax() + assert result == 3.1 + result = s.idxmin() + assert result == 1.1 + + s = pd.Series(s.index, s.index) + result = s.idxmax() + assert result == 3.1 + result = s.idxmin() + assert result == 1.1 + + def test_all_any(self): + ts = tm.makeTimeSeries() + bool_series = ts > 0 + assert not bool_series.all() + assert bool_series.any() + + # Alternative types, with implicit 'object' dtype. + s = Series(["abc", True]) + assert "abc" == s.any() # 'abc' || True => 'abc' + + def test_all_any_params(self): + # Check skipna, with implicit 'object' dtype. + s1 = Series([np.nan, True]) + s2 = Series([np.nan, False]) + assert s1.all(skipna=False) # nan && True => True + assert s1.all(skipna=True) + assert np.isnan(s2.any(skipna=False)) # nan || False => nan + assert not s2.any(skipna=True) + + # Check level. + s = pd.Series([False, False, True, True, False, True], index=[0, 0, 1, 1, 2, 2]) + tm.assert_series_equal(s.all(level=0), Series([False, True, False])) + tm.assert_series_equal(s.any(level=0), Series([False, True, True])) + + # bool_only is not implemented with level option. + with pytest.raises(NotImplementedError): + s.any(bool_only=True, level=0) + with pytest.raises(NotImplementedError): + s.all(bool_only=True, level=0) + + # bool_only is not implemented alone. + with pytest.raises(NotImplementedError): + s.any(bool_only=True) + with pytest.raises(NotImplementedError): + s.all(bool_only=True) + + def test_timedelta64_analytics(self): + + # index min/max + dti = pd.date_range("2012-1-1", periods=3, freq="D") + td = Series(dti) - pd.Timestamp("20120101") + + result = td.idxmin() + assert result == 0 + + result = td.idxmax() + assert result == 2 + + # GH#2982 + # with NaT + td[0] = np.nan + + result = td.idxmin() + assert result == 1 + + result = td.idxmax() + assert result == 2 + + # abs + s1 = Series(pd.date_range("20120101", periods=3)) + s2 = Series(pd.date_range("20120102", periods=3)) + expected = Series(s2 - s1) + + # FIXME: don't leave commented-out code + # this fails as numpy returns timedelta64[us] + # result = np.abs(s1-s2) + # assert_frame_equal(result,expected) + + result = (s1 - s2).abs() + tm.assert_series_equal(result, expected) + + # max/min + result = td.max() + expected = pd.Timedelta("2 days") + assert result == expected + + result = td.min() + expected = pd.Timedelta("1 days") + assert result == expected + + @pytest.mark.parametrize( + "test_input,error_type", + [ + (pd.Series([], dtype="float64"), ValueError), + # For strings, or any Series with dtype 'O' + (pd.Series(["foo", "bar", "baz"]), TypeError), + (pd.Series([(1,), (2,)]), TypeError), + # For mixed data types + (pd.Series(["foo", "foo", "bar", "bar", None, np.nan, "baz"]), TypeError), + ], + ) + def test_assert_idxminmax_raises(self, test_input, error_type): + """ + Cases where ``Series.argmax`` and related should raise an exception + """ + with pytest.raises(error_type): + test_input.idxmin() + with pytest.raises(error_type): + test_input.idxmin(skipna=False) + with pytest.raises(error_type): + test_input.idxmax() + with pytest.raises(error_type): + test_input.idxmax(skipna=False) + + def test_idxminmax_with_inf(self): + # For numeric data with NA and Inf (GH #13595) + s = pd.Series([0, -np.inf, np.inf, np.nan]) + + assert s.idxmin() == 1 + assert np.isnan(s.idxmin(skipna=False)) + + assert s.idxmax() == 2 + assert np.isnan(s.idxmax(skipna=False)) + + # Using old-style behavior that treats floating point nan, -inf, and + # +inf as missing + with pd.option_context("mode.use_inf_as_na", True): + assert s.idxmin() == 0 + assert np.isnan(s.idxmin(skipna=False)) + assert s.idxmax() == 0 + np.isnan(s.idxmax(skipna=False)) + + +class TestDatetime64SeriesReductions: + # Note: the name TestDatetime64SeriesReductions indicates these tests + # were moved from a series-specific test file, _not_ that these tests are + # intended long-term to be series-specific + + @pytest.mark.parametrize( + "nat_ser", + [ + Series([pd.NaT, pd.NaT]), + Series([pd.NaT, pd.Timedelta("nat")]), + Series([pd.Timedelta("nat"), pd.Timedelta("nat")]), + ], + ) + def test_minmax_nat_series(self, nat_ser): + # GH#23282 + assert nat_ser.min() is pd.NaT + assert nat_ser.max() is pd.NaT + assert nat_ser.min(skipna=False) is pd.NaT + assert nat_ser.max(skipna=False) is pd.NaT + + @pytest.mark.parametrize( + "nat_df", + [ + pd.DataFrame([pd.NaT, pd.NaT]), + pd.DataFrame([pd.NaT, pd.Timedelta("nat")]), + pd.DataFrame([pd.Timedelta("nat"), pd.Timedelta("nat")]), + ], + ) + def test_minmax_nat_dataframe(self, nat_df): + # GH#23282 + assert nat_df.min()[0] is pd.NaT + assert nat_df.max()[0] is pd.NaT + assert nat_df.min(skipna=False)[0] is pd.NaT + assert nat_df.max(skipna=False)[0] is pd.NaT + + def test_min_max(self): + rng = pd.date_range("1/1/2000", "12/31/2000") + rng2 = rng.take(np.random.permutation(len(rng))) + + the_min = rng2.min() + the_max = rng2.max() + assert isinstance(the_min, pd.Timestamp) + assert isinstance(the_max, pd.Timestamp) + assert the_min == rng[0] + assert the_max == rng[-1] + + assert rng.min() == rng[0] + assert rng.max() == rng[-1] + + def test_min_max_series(self): + rng = pd.date_range("1/1/2000", periods=10, freq="4h") + lvls = ["A", "A", "A", "B", "B", "B", "C", "C", "C", "C"] + df = DataFrame({"TS": rng, "V": np.random.randn(len(rng)), "L": lvls}) + + result = df.TS.max() + exp = pd.Timestamp(df.TS.iat[-1]) + assert isinstance(result, pd.Timestamp) + assert result == exp + + result = df.TS.min() + exp = pd.Timestamp(df.TS.iat[0]) + assert isinstance(result, pd.Timestamp) + assert result == exp + + +class TestCategoricalSeriesReductions: + # Note: the name TestCategoricalSeriesReductions indicates these tests + # were moved from a series-specific test file, _not_ that these tests are + # intended long-term to be series-specific + + def test_min_max(self): + # unordered cats have no min/max + cat = Series(Categorical(["a", "b", "c", "d"], ordered=False)) + with pytest.raises(TypeError): + cat.min() + with pytest.raises(TypeError): + cat.max() + + cat = Series(Categorical(["a", "b", "c", "d"], ordered=True)) + _min = cat.min() + _max = cat.max() + assert _min == "a" + assert _max == "d" + + cat = Series( + Categorical( + ["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True + ) + ) + _min = cat.min() + _max = cat.max() + assert _min == "d" + assert _max == "a" + + cat = Series( + Categorical( + [np.nan, "b", "c", np.nan], + categories=["d", "c", "b", "a"], + ordered=True, + ) + ) + _min = cat.min() + _max = cat.max() + assert _min == "c" + assert _max == "b" + + cat = Series( + Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True + ) + ) + _min = cat.min() + _max = cat.max() + assert _min == 2 + assert _max == 1 + + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_skipna(self, skipna): + # GH 25303 + cat = Series( + Categorical(["a", "b", np.nan, "a"], categories=["b", "a"], ordered=True) + ) + _min = cat.min(skipna=skipna) + _max = cat.max(skipna=skipna) + + if skipna is True: + assert _min == "b" + assert _max == "a" + else: + assert np.isnan(_min) + assert np.isnan(_max) + + +class TestSeriesMode: + # Note: the name TestSeriesMode indicates these tests + # were moved from a series-specific test file, _not_ that these tests are + # intended long-term to be series-specific + + @pytest.mark.parametrize( + "dropna, expected", + [(True, Series([], dtype=np.float64)), (False, Series([], dtype=np.float64))], + ) + def test_mode_empty(self, dropna, expected): + s = Series([], dtype=np.float64) + result = s.mode(dropna) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dropna, data, expected", + [ + (True, [1, 1, 1, 2], [1]), + (True, [1, 1, 1, 2, 3, 3, 3], [1, 3]), + (False, [1, 1, 1, 2], [1]), + (False, [1, 1, 1, 2, 3, 3, 3], [1, 3]), + ], + ) + @pytest.mark.parametrize( + "dt", list(np.typecodes["AllInteger"] + np.typecodes["Float"]) + ) + def test_mode_numerical(self, dropna, data, expected, dt): + s = Series(data, dtype=dt) + result = s.mode(dropna) + expected = Series(expected, dtype=dt) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dropna, expected", [(True, [1.0]), (False, [1, np.nan])]) + def test_mode_numerical_nan(self, dropna, expected): + s = Series([1, 1, 2, np.nan, np.nan]) + result = s.mode(dropna) + expected = Series(expected) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dropna, expected1, expected2, expected3", + [(True, ["b"], ["bar"], ["nan"]), (False, ["b"], [np.nan], ["nan"])], + ) + def test_mode_str_obj(self, dropna, expected1, expected2, expected3): + # Test string and object types. + data = ["a"] * 2 + ["b"] * 3 + + s = Series(data, dtype="c") + result = s.mode(dropna) + expected1 = Series(expected1, dtype="c") + tm.assert_series_equal(result, expected1) + + data = ["foo", "bar", "bar", np.nan, np.nan, np.nan] + + s = Series(data, dtype=object) + result = s.mode(dropna) + expected2 = Series(expected2, dtype=object) + tm.assert_series_equal(result, expected2) + + data = ["foo", "bar", "bar", np.nan, np.nan, np.nan] + + s = Series(data, dtype=object).astype(str) + result = s.mode(dropna) + expected3 = Series(expected3, dtype=str) + tm.assert_series_equal(result, expected3) + + @pytest.mark.parametrize( + "dropna, expected1, expected2", + [(True, ["foo"], ["foo"]), (False, ["foo"], [np.nan])], + ) + def test_mode_mixeddtype(self, dropna, expected1, expected2): + s = Series([1, "foo", "foo"]) + result = s.mode(dropna) + expected = Series(expected1) + tm.assert_series_equal(result, expected) + + s = Series([1, "foo", "foo", np.nan, np.nan, np.nan]) + result = s.mode(dropna) + expected = Series(expected2, dtype=object) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dropna, expected1, expected2", + [ + ( + True, + ["1900-05-03", "2011-01-03", "2013-01-02"], + ["2011-01-03", "2013-01-02"], + ), + (False, [np.nan], [np.nan, "2011-01-03", "2013-01-02"]), + ], + ) + def test_mode_datetime(self, dropna, expected1, expected2): + s = Series( + ["2011-01-03", "2013-01-02", "1900-05-03", "nan", "nan"], dtype="M8[ns]" + ) + result = s.mode(dropna) + expected1 = Series(expected1, dtype="M8[ns]") + tm.assert_series_equal(result, expected1) + + s = Series( + [ + "2011-01-03", + "2013-01-02", + "1900-05-03", + "2011-01-03", + "2013-01-02", + "nan", + "nan", + ], + dtype="M8[ns]", + ) + result = s.mode(dropna) + expected2 = Series(expected2, dtype="M8[ns]") + tm.assert_series_equal(result, expected2) + + @pytest.mark.parametrize( + "dropna, expected1, expected2", + [ + (True, ["-1 days", "0 days", "1 days"], ["2 min", "1 day"]), + (False, [np.nan], [np.nan, "2 min", "1 day"]), + ], + ) + def test_mode_timedelta(self, dropna, expected1, expected2): + # gh-5986: Test timedelta types. + + s = Series( + ["1 days", "-1 days", "0 days", "nan", "nan"], dtype="timedelta64[ns]" + ) + result = s.mode(dropna) + expected1 = Series(expected1, dtype="timedelta64[ns]") + tm.assert_series_equal(result, expected1) + + s = Series( + [ + "1 day", + "1 day", + "-1 day", + "-1 day 2 min", + "2 min", + "2 min", + "nan", + "nan", + ], + dtype="timedelta64[ns]", + ) + result = s.mode(dropna) + expected2 = Series(expected2, dtype="timedelta64[ns]") + tm.assert_series_equal(result, expected2) + + @pytest.mark.parametrize( + "dropna, expected1, expected2, expected3", + [ + ( + True, + Categorical([1, 2], categories=[1, 2]), + Categorical(["a"], categories=[1, "a"]), + Categorical([3, 1], categories=[3, 2, 1], ordered=True), + ), + ( + False, + Categorical([np.nan], categories=[1, 2]), + Categorical([np.nan, "a"], categories=[1, "a"]), + Categorical([np.nan, 3, 1], categories=[3, 2, 1], ordered=True), + ), + ], + ) + def test_mode_category(self, dropna, expected1, expected2, expected3): + s = Series(Categorical([1, 2, np.nan, np.nan])) + result = s.mode(dropna) + expected1 = Series(expected1, dtype="category") + tm.assert_series_equal(result, expected1) + + s = Series(Categorical([1, "a", "a", np.nan, np.nan])) + result = s.mode(dropna) + expected2 = Series(expected2, dtype="category") + tm.assert_series_equal(result, expected2) + + s = Series( + Categorical( + [1, 1, 2, 3, 3, np.nan, np.nan], categories=[3, 2, 1], ordered=True + ) + ) + result = s.mode(dropna) + expected3 = Series(expected3, dtype="category") + tm.assert_series_equal(result, expected3) + + @pytest.mark.parametrize( + "dropna, expected1, expected2", + [(True, [2 ** 63], [1, 2 ** 63]), (False, [2 ** 63], [1, 2 ** 63])], + ) + def test_mode_intoverflow(self, dropna, expected1, expected2): + # Test for uint64 overflow. + s = Series([1, 2 ** 63, 2 ** 63], dtype=np.uint64) + result = s.mode(dropna) + expected1 = Series(expected1, dtype=np.uint64) + tm.assert_series_equal(result, expected1) + + s = Series([1, 2 ** 63], dtype=np.uint64) + result = s.mode(dropna) + expected2 = Series(expected2, dtype=np.uint64) + tm.assert_series_equal(result, expected2) + + def test_mode_sortwarning(self): + # Check for the warning that is raised when the mode + # results cannot be sorted + + expected = Series(["foo", np.nan]) + s = Series([1, "foo", "foo", np.nan, np.nan]) + + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + result = s.mode(dropna=False) + result = result.sort_values().reset_index(drop=True) + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py new file mode 100644 index 00000000..59dbcb9a --- /dev/null +++ b/pandas/tests/reductions/test_stat_reductions.py @@ -0,0 +1,270 @@ +""" +Tests for statistical reductions of 2nd moment or higher: var, skew, kurt, ... +""" +import inspect + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray + + +class TestDatetimeLikeStatReductions: + @pytest.mark.parametrize("box", [Series, pd.Index, DatetimeArray]) + def test_dt64_mean(self, tz_naive_fixture, box): + tz = tz_naive_fixture + + dti = pd.date_range("2001-01-01", periods=11, tz=tz) + # shuffle so that we are not just working with monotone-increasing + dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6]) + dtarr = dti._data + + obj = box(dtarr) + assert obj.mean() == pd.Timestamp("2001-01-06", tz=tz) + assert obj.mean(skipna=False) == pd.Timestamp("2001-01-06", tz=tz) + + # dtarr[-2] will be the first date 2001-01-1 + dtarr[-2] = pd.NaT + + obj = box(dtarr) + assert obj.mean() == pd.Timestamp("2001-01-06 07:12:00", tz=tz) + assert obj.mean(skipna=False) is pd.NaT + + @pytest.mark.parametrize("box", [Series, pd.Index, PeriodArray]) + def test_period_mean(self, box): + # GH#24757 + dti = pd.date_range("2001-01-01", periods=11) + # shuffle so that we are not just working with monotone-increasing + dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6]) + + # use hourly frequency to avoid rounding errors in expected results + # TODO: flesh this out with different frequencies + parr = dti._data.to_period("H") + obj = box(parr) + with pytest.raises(TypeError, match="ambiguous"): + obj.mean() + with pytest.raises(TypeError, match="ambiguous"): + obj.mean(skipna=True) + + # parr[-2] will be the first date 2001-01-1 + parr[-2] = pd.NaT + + with pytest.raises(TypeError, match="ambiguous"): + obj.mean() + with pytest.raises(TypeError, match="ambiguous"): + obj.mean(skipna=True) + + @pytest.mark.parametrize("box", [Series, pd.Index, TimedeltaArray]) + def test_td64_mean(self, box): + tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4], unit="D") + + tdarr = tdi._data + obj = box(tdarr) + + result = obj.mean() + expected = np.array(tdarr).mean() + assert result == expected + + tdarr[0] = pd.NaT + assert obj.mean(skipna=False) is pd.NaT + + result2 = obj.mean(skipna=True) + assert result2 == tdi[1:].mean() + + # exact equality fails by 1 nanosecond + assert result2.round("us") == (result * 11.0 / 10).round("us") + + +class TestSeriesStatReductions: + # Note: the name TestSeriesStatReductions indicates these tests + # were moved from a series-specific test file, _not_ that these tests are + # intended long-term to be series-specific + + def _check_stat_op( + self, name, alternate, string_series_, check_objects=False, check_allna=False + ): + + with pd.option_context("use_bottleneck", False): + f = getattr(Series, name) + + # add some NaNs + string_series_[5:15] = np.NaN + + # mean, idxmax, idxmin, min, and max are valid for dates + if name not in ["max", "min", "mean"]: + ds = Series(pd.date_range("1/1/2001", periods=10)) + with pytest.raises(TypeError): + f(ds) + + # skipna or no + assert pd.notna(f(string_series_)) + assert pd.isna(f(string_series_, skipna=False)) + + # check the result is correct + nona = string_series_.dropna() + tm.assert_almost_equal(f(nona), alternate(nona.values)) + tm.assert_almost_equal(f(string_series_), alternate(nona.values)) + + allna = string_series_ * np.nan + + if check_allna: + assert np.isnan(f(allna)) + + # dtype=object with None, it works! + s = Series([1, 2, 3, None, 5]) + f(s) + + # GH#2888 + items = [0] + items.extend(range(2 ** 40, 2 ** 40 + 1000)) + s = Series(items, dtype="int64") + tm.assert_almost_equal(float(f(s)), float(alternate(s.values))) + + # check date range + if check_objects: + s = Series(pd.bdate_range("1/1/2000", periods=10)) + res = f(s) + exp = alternate(s) + assert res == exp + + # check on string data + if name not in ["sum", "min", "max"]: + with pytest.raises(TypeError): + f(Series(list("abc"))) + + # Invalid axis. + with pytest.raises(ValueError): + f(string_series_, axis=1) + + # Unimplemented numeric_only parameter. + if "numeric_only" in inspect.getfullargspec(f).args: + with pytest.raises(NotImplementedError, match=name): + f(string_series_, numeric_only=True) + + def test_sum(self): + string_series = tm.makeStringSeries().rename("series") + self._check_stat_op("sum", np.sum, string_series, check_allna=False) + + def test_mean(self): + string_series = tm.makeStringSeries().rename("series") + self._check_stat_op("mean", np.mean, string_series) + + def test_median(self): + string_series = tm.makeStringSeries().rename("series") + self._check_stat_op("median", np.median, string_series) + + # test with integers, test failure + int_ts = Series(np.ones(10, dtype=int), index=range(10)) + tm.assert_almost_equal(np.median(int_ts), int_ts.median()) + + def test_prod(self): + string_series = tm.makeStringSeries().rename("series") + self._check_stat_op("prod", np.prod, string_series) + + def test_min(self): + string_series = tm.makeStringSeries().rename("series") + self._check_stat_op("min", np.min, string_series, check_objects=True) + + def test_max(self): + string_series = tm.makeStringSeries().rename("series") + self._check_stat_op("max", np.max, string_series, check_objects=True) + + def test_var_std(self): + string_series = tm.makeStringSeries().rename("series") + datetime_series = tm.makeTimeSeries().rename("ts") + + alt = lambda x: np.std(x, ddof=1) + self._check_stat_op("std", alt, string_series) + + alt = lambda x: np.var(x, ddof=1) + self._check_stat_op("var", alt, string_series) + + result = datetime_series.std(ddof=4) + expected = np.std(datetime_series.values, ddof=4) + tm.assert_almost_equal(result, expected) + + result = datetime_series.var(ddof=4) + expected = np.var(datetime_series.values, ddof=4) + tm.assert_almost_equal(result, expected) + + # 1 - element series with ddof=1 + s = datetime_series.iloc[[0]] + result = s.var(ddof=1) + assert pd.isna(result) + + result = s.std(ddof=1) + assert pd.isna(result) + + def test_sem(self): + string_series = tm.makeStringSeries().rename("series") + datetime_series = tm.makeTimeSeries().rename("ts") + + alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) + self._check_stat_op("sem", alt, string_series) + + result = datetime_series.sem(ddof=4) + expected = np.std(datetime_series.values, ddof=4) / np.sqrt( + len(datetime_series.values) + ) + tm.assert_almost_equal(result, expected) + + # 1 - element series with ddof=1 + s = datetime_series.iloc[[0]] + result = s.sem(ddof=1) + assert pd.isna(result) + + @td.skip_if_no_scipy + def test_skew(self): + from scipy.stats import skew + + string_series = tm.makeStringSeries().rename("series") + + alt = lambda x: skew(x, bias=False) + self._check_stat_op("skew", alt, string_series) + + # test corner cases, skew() returns NaN unless there's at least 3 + # values + min_N = 3 + for i in range(1, min_N + 1): + s = Series(np.ones(i)) + df = DataFrame(np.ones((i, i))) + if i < min_N: + assert np.isnan(s.skew()) + assert np.isnan(df.skew()).all() + else: + assert 0 == s.skew() + assert (df.skew() == 0).all() + + @td.skip_if_no_scipy + def test_kurt(self): + from scipy.stats import kurtosis + + string_series = tm.makeStringSeries().rename("series") + + alt = lambda x: kurtosis(x, bias=False) + self._check_stat_op("kurt", alt, string_series) + + index = pd.MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + s = Series(np.random.randn(6), index=index) + tm.assert_almost_equal(s.kurt(), s.kurt(level=0)["bar"]) + + # test corner cases, kurt() returns NaN unless there's at least 4 + # values + min_N = 4 + for i in range(1, min_N + 1): + s = Series(np.ones(i)) + df = DataFrame(np.ones((i, i))) + if i < min_N: + assert np.isnan(s.kurt()) + assert np.isnan(df.kurt()).all() + else: + assert 0 == s.kurt() + assert (df.kurt() == 0).all() diff --git a/pandas/tests/resample/__init__.py b/pandas/tests/resample/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/resample/conftest.py b/pandas/tests/resample/conftest.py new file mode 100644 index 00000000..bb4f7ced --- /dev/null +++ b/pandas/tests/resample/conftest.py @@ -0,0 +1,158 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import DataFrame, Series +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import period_range + +# The various methods we support +downsample_methods = [ + "min", + "max", + "first", + "last", + "sum", + "mean", + "sem", + "median", + "prod", + "var", + "std", + "ohlc", + "quantile", +] +upsample_methods = ["count", "size"] +series_methods = ["nunique"] +resample_methods = downsample_methods + upsample_methods + series_methods + + +@pytest.fixture(params=downsample_methods) +def downsample_method(request): + """Fixture for parametrization of Grouper downsample methods.""" + return request.param + + +@pytest.fixture(params=upsample_methods) +def upsample_method(request): + """Fixture for parametrization of Grouper upsample methods.""" + return request.param + + +@pytest.fixture(params=resample_methods) +def resample_method(request): + """Fixture for parametrization of Grouper resample methods.""" + return request.param + + +@pytest.fixture +def simple_date_range_series(): + """ + Series with date range index and random data for test purposes. + """ + + def _simple_date_range_series(start, end, freq="D"): + rng = date_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) + + return _simple_date_range_series + + +@pytest.fixture +def simple_period_range_series(): + """ + Series with period range index and random data for test purposes. + """ + + def _simple_period_range_series(start, end, freq="D"): + rng = period_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) + + return _simple_period_range_series + + +@pytest.fixture +def _index_start(): + """Fixture for parametrization of index, series and frame.""" + return datetime(2005, 1, 1) + + +@pytest.fixture +def _index_end(): + """Fixture for parametrization of index, series and frame.""" + return datetime(2005, 1, 10) + + +@pytest.fixture +def _index_freq(): + """Fixture for parametrization of index, series and frame.""" + return "D" + + +@pytest.fixture +def _index_name(): + """Fixture for parametrization of index, series and frame.""" + return None + + +@pytest.fixture +def index(_index_factory, _index_start, _index_end, _index_freq, _index_name): + """Fixture for parametrization of date_range, period_range and + timedelta_range indexes""" + return _index_factory(_index_start, _index_end, freq=_index_freq, name=_index_name) + + +@pytest.fixture +def _static_values(index): + """Fixture for parametrization of values used in parametrization of + Series and DataFrames with date_range, period_range and + timedelta_range indexes""" + return np.arange(len(index)) + + +@pytest.fixture +def _series_name(): + """Fixture for parametrization of Series name for Series used with + date_range, period_range and timedelta_range indexes""" + return None + + +@pytest.fixture +def series(index, _series_name, _static_values): + """Fixture for parametrization of Series with date_range, period_range and + timedelta_range indexes""" + return Series(_static_values, index=index, name=_series_name) + + +@pytest.fixture +def empty_series(series): + """Fixture for parametrization of empty Series with date_range, + period_range and timedelta_range indexes""" + return series[:0] + + +@pytest.fixture +def frame(index, _series_name, _static_values): + """Fixture for parametrization of DataFrame with date_range, period_range + and timedelta_range indexes""" + # _series_name is intentionally unused + return DataFrame({"value": _static_values}, index=index) + + +@pytest.fixture +def empty_frame(series): + """Fixture for parametrization of empty DataFrame with date_range, + period_range and timedelta_range indexes""" + index = series.index[:0] + return DataFrame(index=index) + + +@pytest.fixture(params=[Series, DataFrame]) +def series_and_frame(request, series, frame): + """Fixture for parametrization of Series and DataFrame with date_range, + period_range and timedelta_range indexes""" + if request.param == Series: + return series + if request.param == DataFrame: + return frame diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py new file mode 100644 index 00000000..f8a1810e --- /dev/null +++ b/pandas/tests/resample/test_base.py @@ -0,0 +1,269 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.core.groupby.groupby import DataError +from pandas.core.groupby.grouper import Grouper +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import PeriodIndex, period_range +from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range + +# a fixture value can be overridden by the test parameter value. Note that the +# value of the fixture can be overridden this way even if the test doesn't use +# it directly (doesn't mention it in the function prototype). +# see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa +# in this module we override the fixture values defined in conftest.py +# tuples of '_index_factory,_series_name,_index_start,_index_end' +DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10)) +PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10)) +TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day") + +all_ts = pytest.mark.parametrize( + "_index_factory,_series_name,_index_start,_index_end", + [DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE], +) + + +@pytest.fixture +def create_index(_index_factory): + def _create_index(*args, **kwargs): + """ return the _index_factory created using the args, kwargs """ + return _index_factory(*args, **kwargs) + + return _create_index + + +@pytest.mark.parametrize("freq", ["2D", "1H"]) +@pytest.mark.parametrize( + "_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE] +) +def test_asfreq(series_and_frame, freq, create_index): + obj = series_and_frame + + result = obj.resample(freq).asfreq() + new_index = create_index(obj.index[0], obj.index[-1], freq=freq) + expected = obj.reindex(new_index) + tm.assert_almost_equal(result, expected) + + +@pytest.mark.parametrize( + "_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE] +) +def test_asfreq_fill_value(series, create_index): + # test for fill value during resampling, issue 3715 + + s = series + + result = s.resample("1H").asfreq() + new_index = create_index(s.index[0], s.index[-1], freq="1H") + expected = s.reindex(new_index) + tm.assert_series_equal(result, expected) + + frame = s.to_frame("value") + frame.iloc[1] = None + result = frame.resample("1H").asfreq(fill_value=4.0) + new_index = create_index(frame.index[0], frame.index[-1], freq="1H") + expected = frame.reindex(new_index, fill_value=4.0) + tm.assert_frame_equal(result, expected) + + +@all_ts +def test_resample_interpolate(frame): + # # 12925 + df = frame + tm.assert_frame_equal( + df.resample("1T").asfreq().interpolate(), df.resample("1T").interpolate() + ) + + +def test_raises_on_non_datetimelike_index(): + # this is a non datetimelike index + xp = DataFrame() + msg = ( + "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, " + "but got an instance of 'Index'" + ) + with pytest.raises(TypeError, match=msg): + xp.resample("A").mean() + + +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_resample_empty_series(freq, empty_series, resample_method): + # GH12771 & GH12868 + + if resample_method == "ohlc": + pytest.skip("need to test for ohlc from GH13083") + + s = empty_series + result = getattr(s.resample(freq), resample_method)() + + expected = s.copy() + if isinstance(s.index, PeriodIndex): + expected.index = s.index.asfreq(freq=freq) + else: + expected.index = s.index._shallow_copy(freq=freq) + tm.assert_index_equal(result.index, expected.index) + assert result.index.freq == expected.index.freq + tm.assert_series_equal(result, expected, check_dtype=False) + + +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +@pytest.mark.parametrize("resample_method", ["count", "size"]) +def test_resample_count_empty_series(freq, empty_series, resample_method): + # GH28427 + result = getattr(empty_series.resample(freq), resample_method)() + + if isinstance(empty_series.index, PeriodIndex): + index = empty_series.index.asfreq(freq=freq) + else: + index = empty_series.index._shallow_copy(freq=freq) + expected = pd.Series([], dtype="int64", index=index, name=empty_series.name) + + tm.assert_series_equal(result, expected) + + +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_resample_empty_dataframe(empty_frame, freq, resample_method): + # GH13212 + df = empty_frame + # count retains dimensions too + result = getattr(df.resample(freq), resample_method)() + if resample_method != "size": + expected = df.copy() + else: + # GH14962 + expected = Series([], dtype=object) + + if isinstance(df.index, PeriodIndex): + expected.index = df.index.asfreq(freq=freq) + else: + expected.index = df.index._shallow_copy(freq=freq) + tm.assert_index_equal(result.index, expected.index) + assert result.index.freq == expected.index.freq + tm.assert_almost_equal(result, expected, check_dtype=False) + + # test size for GH13212 (currently stays as df) + + +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_resample_count_empty_dataframe(freq, empty_frame): + # GH28427 + + empty_frame = empty_frame.copy() + empty_frame["a"] = [] + + result = empty_frame.resample(freq).count() + + if isinstance(empty_frame.index, PeriodIndex): + index = empty_frame.index.asfreq(freq=freq) + else: + index = empty_frame.index._shallow_copy(freq=freq) + expected = pd.DataFrame({"a": []}, dtype="int64", index=index) + + tm.assert_frame_equal(result, expected) + + +@all_ts +@pytest.mark.parametrize("freq", ["M", "D", "H"]) +def test_resample_size_empty_dataframe(freq, empty_frame): + # GH28427 + + empty_frame = empty_frame.copy() + empty_frame["a"] = [] + + result = empty_frame.resample(freq).size() + + if isinstance(empty_frame.index, PeriodIndex): + index = empty_frame.index.asfreq(freq=freq) + else: + index = empty_frame.index._shallow_copy(freq=freq) + expected = pd.Series([], dtype="int64", index=index) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0)) +@pytest.mark.parametrize("dtype", [np.float, np.int, np.object, "datetime64[ns]"]) +def test_resample_empty_dtypes(index, dtype, resample_method): + + # Empty series were sometimes causing a segfault (for the functions + # with Cython bounds-checking disabled) or an IndexError. We just run + # them to ensure they no longer do. (GH #10228) + empty_series = Series([], index, dtype) + try: + getattr(empty_series.resample("d"), resample_method)() + except DataError: + # Ignore these since some combinations are invalid + # (ex: doing mean with dtype of np.object) + pass + + +@all_ts +@pytest.mark.parametrize("arg", ["mean", {"value": "mean"}, ["mean"]]) +def test_resample_loffset_arg_type(frame, create_index, arg): + # GH 13218, 15002 + df = frame + expected_means = [df.values[i : i + 2].mean() for i in range(0, len(df.values), 2)] + expected_index = create_index(df.index[0], periods=len(df.index) / 2, freq="2D") + + # loffset coerces PeriodIndex to DateTimeIndex + if isinstance(expected_index, PeriodIndex): + expected_index = expected_index.to_timestamp() + + expected_index += timedelta(hours=2) + expected = DataFrame({"value": expected_means}, index=expected_index) + + result_agg = df.resample("2D", loffset="2H").agg(arg) + + if isinstance(arg, list): + expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) + + # GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex + if isinstance(expected.index, TimedeltaIndex): + msg = "DataFrame are different" + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(result_agg, expected) + else: + tm.assert_frame_equal(result_agg, expected) + + +@all_ts +def test_apply_to_empty_series(empty_series): + # GH 14313 + s = empty_series + for freq in ["M", "D", "H"]: + result = s.resample(freq).apply(lambda x: 1) + expected = s.resample(freq).apply(np.sum) + + tm.assert_series_equal(result, expected, check_dtype=False) + + +@all_ts +def test_resampler_is_iterable(series): + # GH 15314 + freq = "H" + tg = Grouper(freq=freq, convention="start") + grouped = series.groupby(tg) + resampled = series.resample(freq) + for (rk, rv), (gk, gv) in zip(resampled, grouped): + assert rk == gk + tm.assert_series_equal(rv, gv) + + +@all_ts +def test_resample_quantile(series): + # GH 15023 + s = series + q = 0.75 + freq = "H" + result = s.resample(freq).quantile(q) + expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py new file mode 100644 index 00000000..ab6985b1 --- /dev/null +++ b/pandas/tests/resample/test_datetime_index.py @@ -0,0 +1,1583 @@ +from datetime import datetime, timedelta +from functools import partial +from io import StringIO + +import numpy as np +import pytest +import pytz + +from pandas.errors import UnsupportedFunctionCall + +import pandas as pd +from pandas import DataFrame, Series, Timedelta, Timestamp, isna, notna +import pandas._testing as tm +from pandas.core.groupby.grouper import Grouper +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import Period, period_range +from pandas.core.resample import DatetimeIndex, _get_timestamp_range_edges + +import pandas.tseries.offsets as offsets +from pandas.tseries.offsets import BDay, Minute + + +@pytest.fixture() +def _index_factory(): + return date_range + + +@pytest.fixture +def _index_freq(): + return "Min" + + +@pytest.fixture +def _static_values(index): + return np.random.rand(len(index)) + + +def test_custom_grouper(index): + + dti = index + s = Series(np.array([1] * len(dti)), index=dti, dtype="int64") + + b = Grouper(freq=Minute(5)) + g = s.groupby(b) + + # check all cython functions work + funcs = ["add", "mean", "prod", "ohlc", "min", "max", "var"] + for f in funcs: + g._cython_agg_general(f) + + b = Grouper(freq=Minute(5), closed="right", label="right") + g = s.groupby(b) + # check all cython functions work + funcs = ["add", "mean", "prod", "ohlc", "min", "max", "var"] + for f in funcs: + g._cython_agg_general(f) + + assert g.ngroups == 2593 + assert notna(g.mean()).all() + + # construct expected val + arr = [1] + [5] * 2592 + idx = dti[0:-1:5] + idx = idx.append(dti[-1:]) + expect = Series(arr, index=idx) + + # GH2763 - return in put dtype if we can + result = g.agg(np.sum) + tm.assert_series_equal(result, expect) + + df = DataFrame(np.random.rand(len(dti), 10), index=dti, dtype="float64") + r = df.groupby(b).agg(np.sum) + + assert len(r.columns) == 10 + assert len(r.index) == 2593 + + +@pytest.mark.parametrize( + "_index_start,_index_end,_index_name", + [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")], +) +@pytest.mark.parametrize( + "closed, expected", + [ + ( + "right", + lambda s: Series( + [s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], + index=date_range("1/1/2000", periods=4, freq="5min", name="index"), + ), + ), + ( + "left", + lambda s: Series( + [s[:5].mean(), s[5:10].mean(), s[10:].mean()], + index=date_range( + "1/1/2000 00:05", periods=3, freq="5min", name="index" + ), + ), + ), + ], +) +def test_resample_basic(series, closed, expected): + s = series + expected = expected(s) + result = s.resample("5min", closed=closed, label="right").mean() + tm.assert_series_equal(result, expected) + + +def test_resample_integerarray(): + # GH 25580, resample on IntegerArray + ts = pd.Series( + range(9), index=pd.date_range("1/1/2000", periods=9, freq="T"), dtype="Int64" + ) + result = ts.resample("3T").sum() + expected = Series( + [3, 12, 21], + index=pd.date_range("1/1/2000", periods=3, freq="3T"), + dtype="Int64", + ) + tm.assert_series_equal(result, expected) + + result = ts.resample("3T").mean() + expected = Series( + [1, 4, 7], index=pd.date_range("1/1/2000", periods=3, freq="3T"), dtype="Int64", + ) + tm.assert_series_equal(result, expected) + + +def test_resample_basic_grouper(series): + s = series + result = s.resample("5Min").last() + grouper = Grouper(freq=Minute(5), closed="left", label="left") + expected = s.groupby(grouper).agg(lambda x: x[-1]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "_index_start,_index_end,_index_name", + [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")], +) +@pytest.mark.parametrize( + "keyword,value", + [("label", "righttt"), ("closed", "righttt"), ("convention", "starttt")], +) +def test_resample_string_kwargs(series, keyword, value): + # see gh-19303 + # Check that wrong keyword argument strings raise an error + msg = f"Unsupported value {value} for `{keyword}`" + with pytest.raises(ValueError, match=msg): + series.resample("5min", **({keyword: value})) + + +@pytest.mark.parametrize( + "_index_start,_index_end,_index_name", + [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")], +) +def test_resample_how(series, downsample_method): + if downsample_method == "ohlc": + pytest.skip("covered by test_resample_how_ohlc") + + s = series + grouplist = np.ones_like(s) + grouplist[0] = 0 + grouplist[1:6] = 1 + grouplist[6:11] = 2 + grouplist[11:] = 3 + expected = s.groupby(grouplist).agg(downsample_method) + expected.index = date_range("1/1/2000", periods=4, freq="5min", name="index") + + result = getattr( + s.resample("5min", closed="right", label="right"), downsample_method + )() + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "_index_start,_index_end,_index_name", + [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")], +) +def test_resample_how_ohlc(series): + s = series + grouplist = np.ones_like(s) + grouplist[0] = 0 + grouplist[1:6] = 1 + grouplist[6:11] = 2 + grouplist[11:] = 3 + + def _ohlc(group): + if isna(group).all(): + return np.repeat(np.nan, 4) + return [group[0], group.max(), group.min(), group[-1]] + + expected = DataFrame( + s.groupby(grouplist).agg(_ohlc).values.tolist(), + index=date_range("1/1/2000", periods=4, freq="5min", name="index"), + columns=["open", "high", "low", "close"], + ) + + result = s.resample("5min", closed="right", label="right").ohlc() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max", "sum", "prod", "mean", "var", "std"]) +def test_numpy_compat(func): + # see gh-12811 + s = Series([1, 2, 3, 4, 5], index=date_range("20130101", periods=5, freq="s")) + r = s.resample("2s") + + msg = "numpy operations are not valid with resample" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, func)(func, 1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, func)(axis=1) + + +def test_resample_how_callables(): + # GH#7929 + data = np.arange(5, dtype=np.int64) + ind = date_range(start="2014-01-01", periods=len(data), freq="d") + df = DataFrame({"A": data, "B": data}, index=ind) + + def fn(x, a=1): + return str(type(x)) + + class FnClass: + def __call__(self, x): + return str(type(x)) + + df_standard = df.resample("M").apply(fn) + df_lambda = df.resample("M").apply(lambda x: str(type(x))) + df_partial = df.resample("M").apply(partial(fn)) + df_partial2 = df.resample("M").apply(partial(fn, a=2)) + df_class = df.resample("M").apply(FnClass()) + + tm.assert_frame_equal(df_standard, df_lambda) + tm.assert_frame_equal(df_standard, df_partial) + tm.assert_frame_equal(df_standard, df_partial2) + tm.assert_frame_equal(df_standard, df_class) + + +def test_resample_rounding(): + # GH 8371 + # odd results when rounding is needed + + data = """date,time,value +11-08-2014,00:00:01.093,1 +11-08-2014,00:00:02.159,1 +11-08-2014,00:00:02.667,1 +11-08-2014,00:00:03.175,1 +11-08-2014,00:00:07.058,1 +11-08-2014,00:00:07.362,1 +11-08-2014,00:00:08.324,1 +11-08-2014,00:00:08.830,1 +11-08-2014,00:00:08.982,1 +11-08-2014,00:00:09.815,1 +11-08-2014,00:00:10.540,1 +11-08-2014,00:00:11.061,1 +11-08-2014,00:00:11.617,1 +11-08-2014,00:00:13.607,1 +11-08-2014,00:00:14.535,1 +11-08-2014,00:00:15.525,1 +11-08-2014,00:00:17.960,1 +11-08-2014,00:00:20.674,1 +11-08-2014,00:00:21.191,1""" + + df = pd.read_csv( + StringIO(data), + parse_dates={"timestamp": ["date", "time"]}, + index_col="timestamp", + ) + df.index.name = None + result = df.resample("6s").sum() + expected = DataFrame( + {"value": [4, 9, 4, 2]}, index=date_range("2014-11-08", freq="6s", periods=4) + ) + tm.assert_frame_equal(result, expected) + + result = df.resample("7s").sum() + expected = DataFrame( + {"value": [4, 10, 4, 1]}, index=date_range("2014-11-08", freq="7s", periods=4) + ) + tm.assert_frame_equal(result, expected) + + result = df.resample("11s").sum() + expected = DataFrame( + {"value": [11, 8]}, index=date_range("2014-11-08", freq="11s", periods=2) + ) + tm.assert_frame_equal(result, expected) + + result = df.resample("13s").sum() + expected = DataFrame( + {"value": [13, 6]}, index=date_range("2014-11-08", freq="13s", periods=2) + ) + tm.assert_frame_equal(result, expected) + + result = df.resample("17s").sum() + expected = DataFrame( + {"value": [16, 3]}, index=date_range("2014-11-08", freq="17s", periods=2) + ) + tm.assert_frame_equal(result, expected) + + +def test_resample_basic_from_daily(): + # from daily + dti = date_range( + start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D", name="index" + ) + + s = Series(np.random.rand(len(dti)), dti) + + # to weekly + result = s.resample("w-sun").last() + + assert len(result) == 3 + assert (result.index.dayofweek == [6, 6, 6]).all() + assert result.iloc[0] == s["1/2/2005"] + assert result.iloc[1] == s["1/9/2005"] + assert result.iloc[2] == s.iloc[-1] + + result = s.resample("W-MON").last() + assert len(result) == 2 + assert (result.index.dayofweek == [0, 0]).all() + assert result.iloc[0] == s["1/3/2005"] + assert result.iloc[1] == s["1/10/2005"] + + result = s.resample("W-TUE").last() + assert len(result) == 2 + assert (result.index.dayofweek == [1, 1]).all() + assert result.iloc[0] == s["1/4/2005"] + assert result.iloc[1] == s["1/10/2005"] + + result = s.resample("W-WED").last() + assert len(result) == 2 + assert (result.index.dayofweek == [2, 2]).all() + assert result.iloc[0] == s["1/5/2005"] + assert result.iloc[1] == s["1/10/2005"] + + result = s.resample("W-THU").last() + assert len(result) == 2 + assert (result.index.dayofweek == [3, 3]).all() + assert result.iloc[0] == s["1/6/2005"] + assert result.iloc[1] == s["1/10/2005"] + + result = s.resample("W-FRI").last() + assert len(result) == 2 + assert (result.index.dayofweek == [4, 4]).all() + assert result.iloc[0] == s["1/7/2005"] + assert result.iloc[1] == s["1/10/2005"] + + # to biz day + result = s.resample("B").last() + assert len(result) == 7 + assert (result.index.dayofweek == [4, 0, 1, 2, 3, 4, 0]).all() + + assert result.iloc[0] == s["1/2/2005"] + assert result.iloc[1] == s["1/3/2005"] + assert result.iloc[5] == s["1/9/2005"] + assert result.index.name == "index" + + +def test_resample_upsampling_picked_but_not_correct(): + + # Test for issue #3020 + dates = date_range("01-Jan-2014", "05-Jan-2014", freq="D") + series = Series(1, index=dates) + + result = series.resample("D").mean() + assert result.index[0] == dates[0] + + # GH 5955 + # incorrect deciding to upsample when the axis frequency matches the + # resample frequency + + s = Series( + np.arange(1.0, 6), index=[datetime(1975, 1, i, 12, 0) for i in range(1, 6)] + ) + expected = Series( + np.arange(1.0, 6), index=date_range("19750101", periods=5, freq="D") + ) + + result = s.resample("D").count() + tm.assert_series_equal(result, Series(1, index=expected.index)) + + result1 = s.resample("D").sum() + result2 = s.resample("D").mean() + tm.assert_series_equal(result1, expected) + tm.assert_series_equal(result2, expected) + + +def test_resample_frame_basic(): + df = tm.makeTimeDataFrame() + + b = Grouper(freq="M") + g = df.groupby(b) + + # check all cython functions work + funcs = ["add", "mean", "prod", "min", "max", "var"] + for f in funcs: + g._cython_agg_general(f) + + result = df.resample("A").mean() + tm.assert_series_equal(result["A"], df["A"].resample("A").mean()) + + result = df.resample("M").mean() + tm.assert_series_equal(result["A"], df["A"].resample("M").mean()) + + df.resample("M", kind="period").mean() + df.resample("W-WED", kind="period").mean() + + +@pytest.mark.parametrize( + "loffset", [timedelta(minutes=1), "1min", Minute(1), np.timedelta64(1, "m")] +) +def test_resample_loffset(loffset): + # GH 7687 + rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min") + s = Series(np.random.randn(14), index=rng) + + result = s.resample("5min", closed="right", label="right", loffset=loffset).mean() + idx = date_range("1/1/2000", periods=4, freq="5min") + expected = Series( + [s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], + index=idx + timedelta(minutes=1), + ) + tm.assert_series_equal(result, expected) + assert result.index.freq == Minute(5) + + # from daily + dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D") + ser = Series(np.random.rand(len(dti)), dti) + + # to weekly + result = ser.resample("w-sun").last() + business_day_offset = BDay() + expected = ser.resample("w-sun", loffset=-business_day_offset).last() + assert result.index[0] - business_day_offset == expected.index[0] + + +def test_resample_loffset_upsample(): + # GH 20744 + rng = date_range("1/1/2000 00:00:00", "1/1/2000 00:13:00", freq="min") + s = Series(np.random.randn(14), index=rng) + + result = s.resample( + "5min", closed="right", label="right", loffset=timedelta(minutes=1) + ).ffill() + idx = date_range("1/1/2000", periods=4, freq="5min") + expected = Series([s[0], s[5], s[10], s[-1]], index=idx + timedelta(minutes=1)) + + tm.assert_series_equal(result, expected) + + +def test_resample_loffset_count(): + # GH 12725 + start_time = "1/1/2000 00:00:00" + rng = date_range(start_time, periods=100, freq="S") + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample("10S", loffset="1s").count() + + expected_index = date_range(start_time, periods=10, freq="10S") + timedelta( + seconds=1 + ) + expected = Series(10, index=expected_index) + + tm.assert_series_equal(result, expected) + + # Same issue should apply to .size() since it goes through + # same code path + result = ts.resample("10S", loffset="1s").size() + + tm.assert_series_equal(result, expected) + + +def test_resample_upsample(): + # from daily + dti = date_range( + start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D", name="index" + ) + + s = Series(np.random.rand(len(dti)), dti) + + # to minutely, by padding + result = s.resample("Min").pad() + assert len(result) == 12961 + assert result[0] == s[0] + assert result[-1] == s[-1] + + assert result.index.name == "index" + + +def test_resample_how_method(): + # GH9915 + s = Series( + [11, 22], + index=[ + Timestamp("2015-03-31 21:48:52.672000"), + Timestamp("2015-03-31 21:49:52.739000"), + ], + ) + expected = Series( + [11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22], + index=[ + Timestamp("2015-03-31 21:48:50"), + Timestamp("2015-03-31 21:49:00"), + Timestamp("2015-03-31 21:49:10"), + Timestamp("2015-03-31 21:49:20"), + Timestamp("2015-03-31 21:49:30"), + Timestamp("2015-03-31 21:49:40"), + Timestamp("2015-03-31 21:49:50"), + ], + ) + tm.assert_series_equal(s.resample("10S").mean(), expected) + + +def test_resample_extra_index_point(): + # GH#9756 + index = date_range(start="20150101", end="20150331", freq="BM") + expected = DataFrame({"A": Series([21, 41, 63], index=index)}) + + index = date_range(start="20150101", end="20150331", freq="B") + df = DataFrame({"A": Series(range(len(index)), index=index)}, dtype="int64") + result = df.resample("BM").last() + tm.assert_frame_equal(result, expected) + + +def test_upsample_with_limit(): + rng = date_range("1/1/2000", periods=3, freq="5t") + ts = Series(np.random.randn(len(rng)), rng) + + result = ts.resample("t").ffill(limit=2) + expected = ts.reindex(result.index, method="ffill", limit=2) + tm.assert_series_equal(result, expected) + + +def test_nearest_upsample_with_limit(): + rng = date_range("1/1/2000", periods=3, freq="5t") + ts = Series(np.random.randn(len(rng)), rng) + + result = ts.resample("t").nearest(limit=2) + expected = ts.reindex(result.index, method="nearest", limit=2) + tm.assert_series_equal(result, expected) + + +def test_resample_ohlc(series): + s = series + + grouper = Grouper(freq=Minute(5)) + expect = s.groupby(grouper).agg(lambda x: x[-1]) + result = s.resample("5Min").ohlc() + + assert len(result) == len(expect) + assert len(result.columns) == 4 + + xs = result.iloc[-2] + assert xs["open"] == s[-6] + assert xs["high"] == s[-6:-1].max() + assert xs["low"] == s[-6:-1].min() + assert xs["close"] == s[-2] + + xs = result.iloc[0] + assert xs["open"] == s[0] + assert xs["high"] == s[:5].max() + assert xs["low"] == s[:5].min() + assert xs["close"] == s[4] + + +def test_resample_ohlc_result(): + + # GH 12332 + index = pd.date_range("1-1-2000", "2-15-2000", freq="h") + index = index.union(pd.date_range("4-15-2000", "5-15-2000", freq="h")) + s = Series(range(len(index)), index=index) + + a = s.loc[:"4-15-2000"].resample("30T").ohlc() + assert isinstance(a, DataFrame) + + b = s.loc[:"4-14-2000"].resample("30T").ohlc() + assert isinstance(b, DataFrame) + + # GH12348 + # raising on odd period + rng = date_range("2013-12-30", "2014-01-07") + index = rng.drop( + [ + Timestamp("2014-01-01"), + Timestamp("2013-12-31"), + Timestamp("2014-01-04"), + Timestamp("2014-01-05"), + ] + ) + df = DataFrame(data=np.arange(len(index)), index=index) + result = df.resample("B").mean() + expected = df.reindex(index=date_range(rng[0], rng[-1], freq="B")) + tm.assert_frame_equal(result, expected) + + +def test_resample_ohlc_dataframe(): + df = ( + DataFrame( + { + "PRICE": { + Timestamp("2011-01-06 10:59:05", tz=None): 24990, + Timestamp("2011-01-06 12:43:33", tz=None): 25499, + Timestamp("2011-01-06 12:54:09", tz=None): 25499, + }, + "VOLUME": { + Timestamp("2011-01-06 10:59:05", tz=None): 1500000000, + Timestamp("2011-01-06 12:43:33", tz=None): 5000000000, + Timestamp("2011-01-06 12:54:09", tz=None): 100000000, + }, + } + ) + ).reindex(["VOLUME", "PRICE"], axis=1) + res = df.resample("H").ohlc() + exp = pd.concat( + [df["VOLUME"].resample("H").ohlc(), df["PRICE"].resample("H").ohlc()], + axis=1, + keys=["VOLUME", "PRICE"], + ) + tm.assert_frame_equal(exp, res) + + df.columns = [["a", "b"], ["c", "d"]] + res = df.resample("H").ohlc() + exp.columns = pd.MultiIndex.from_tuples( + [ + ("a", "c", "open"), + ("a", "c", "high"), + ("a", "c", "low"), + ("a", "c", "close"), + ("b", "d", "open"), + ("b", "d", "high"), + ("b", "d", "low"), + ("b", "d", "close"), + ] + ) + tm.assert_frame_equal(exp, res) + + # dupe columns fail atm + # df.columns = ['PRICE', 'PRICE'] + + +def test_resample_dup_index(): + + # GH 4812 + # dup columns with resample raising + df = DataFrame( + np.random.randn(4, 12), + index=[2000, 2000, 2000, 2000], + columns=[Period(year=2000, month=i + 1, freq="M") for i in range(12)], + ) + df.iloc[3, :] = np.nan + result = df.resample("Q", axis=1).mean() + expected = df.groupby(lambda x: int((x.month - 1) / 3), axis=1).mean() + expected.columns = [Period(year=2000, quarter=i + 1, freq="Q") for i in range(4)] + tm.assert_frame_equal(result, expected) + + +def test_resample_reresample(): + dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D") + s = Series(np.random.rand(len(dti)), dti) + bs = s.resample("B", closed="right", label="right").mean() + result = bs.resample("8H").mean() + assert len(result) == 22 + assert isinstance(result.index.freq, offsets.DateOffset) + assert result.index.freq == offsets.Hour(8) + + +def test_resample_timestamp_to_period(simple_date_range_series): + ts = simple_date_range_series("1/1/1990", "1/1/2000") + + result = ts.resample("A-DEC", kind="period").mean() + expected = ts.resample("A-DEC").mean() + expected.index = period_range("1990", "2000", freq="a-dec") + tm.assert_series_equal(result, expected) + + result = ts.resample("A-JUN", kind="period").mean() + expected = ts.resample("A-JUN").mean() + expected.index = period_range("1990", "2000", freq="a-jun") + tm.assert_series_equal(result, expected) + + result = ts.resample("M", kind="period").mean() + expected = ts.resample("M").mean() + expected.index = period_range("1990-01", "2000-01", freq="M") + tm.assert_series_equal(result, expected) + + result = ts.resample("M", kind="period").mean() + expected = ts.resample("M").mean() + expected.index = period_range("1990-01", "2000-01", freq="M") + tm.assert_series_equal(result, expected) + + +def test_ohlc_5min(): + def _ohlc(group): + if isna(group).all(): + return np.repeat(np.nan, 4) + return [group[0], group.max(), group.min(), group[-1]] + + rng = date_range("1/1/2000 00:00:00", "1/1/2000 5:59:50", freq="10s") + ts = Series(np.random.randn(len(rng)), index=rng) + + resampled = ts.resample("5min", closed="right", label="right").ohlc() + + assert (resampled.loc["1/1/2000 00:00"] == ts[0]).all() + + exp = _ohlc(ts[1:31]) + assert (resampled.loc["1/1/2000 00:05"] == exp).all() + + exp = _ohlc(ts["1/1/2000 5:55:01":]) + assert (resampled.loc["1/1/2000 6:00:00"] == exp).all() + + +def test_downsample_non_unique(): + rng = date_range("1/1/2000", "2/29/2000") + rng2 = rng.repeat(5).values + ts = Series(np.random.randn(len(rng2)), index=rng2) + + result = ts.resample("M").mean() + + expected = ts.groupby(lambda x: x.month).mean() + assert len(result) == 2 + tm.assert_almost_equal(result[0], expected[1]) + tm.assert_almost_equal(result[1], expected[2]) + + +def test_asfreq_non_unique(): + # GH #1077 + rng = date_range("1/1/2000", "2/29/2000") + rng2 = rng.repeat(2).values + ts = Series(np.random.randn(len(rng2)), index=rng2) + + msg = "cannot reindex from a duplicate axis" + with pytest.raises(ValueError, match=msg): + ts.asfreq("B") + + +def test_resample_axis1(): + rng = date_range("1/1/2000", "2/29/2000") + df = DataFrame(np.random.randn(3, len(rng)), columns=rng, index=["a", "b", "c"]) + + result = df.resample("M", axis=1).mean() + expected = df.T.resample("M").mean().T + tm.assert_frame_equal(result, expected) + + +def test_resample_anchored_ticks(): + # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should + # "anchor" the origin at midnight so we get regular intervals rather + # than starting from the first timestamp which might start in the + # middle of a desired interval + + rng = date_range("1/1/2000 04:00:00", periods=86400, freq="s") + ts = Series(np.random.randn(len(rng)), index=rng) + ts[:2] = np.nan # so results are the same + + freqs = ["t", "5t", "15t", "30t", "4h", "12h"] + for freq in freqs: + result = ts[2:].resample(freq, closed="left", label="left").mean() + expected = ts.resample(freq, closed="left", label="left").mean() + tm.assert_series_equal(result, expected) + + +def test_resample_single_group(): + mysum = lambda x: x.sum() + + rng = date_range("2000-1-1", "2000-2-10", freq="D") + ts = Series(np.random.randn(len(rng)), index=rng) + tm.assert_series_equal(ts.resample("M").sum(), ts.resample("M").apply(mysum)) + + rng = date_range("2000-1-1", "2000-1-10", freq="D") + ts = Series(np.random.randn(len(rng)), index=rng) + tm.assert_series_equal(ts.resample("M").sum(), ts.resample("M").apply(mysum)) + + # GH 3849 + s = Series( + [30.1, 31.6], + index=[Timestamp("20070915 15:30:00"), Timestamp("20070915 15:40:00")], + ) + expected = Series([0.75], index=[Timestamp("20070915")]) + result = s.resample("D").apply(lambda x: np.std(x)) + tm.assert_series_equal(result, expected) + + +def test_resample_base(): + rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s") + ts = Series(np.random.randn(len(rng)), index=rng) + + resampled = ts.resample("5min", base=2).mean() + exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min") + tm.assert_index_equal(resampled.index, exp_rng) + + +def test_resample_float_base(): + # GH25161 + dt = pd.to_datetime( + ["2018-11-26 16:17:43.51", "2018-11-26 16:17:44.51", "2018-11-26 16:17:45.51"] + ) + s = Series(np.arange(3), index=dt) + + base = 17 + 43.51 / 60 + result = s.resample("3min", base=base).size() + expected = Series(3, index=pd.DatetimeIndex(["2018-11-26 16:17:43.51"])) + tm.assert_series_equal(result, expected) + + +def test_resample_daily_anchored(): + rng = date_range("1/1/2000 0:00:00", periods=10000, freq="T") + ts = Series(np.random.randn(len(rng)), index=rng) + ts[:2] = np.nan # so results are the same + + result = ts[2:].resample("D", closed="left", label="left").mean() + expected = ts.resample("D", closed="left", label="left").mean() + tm.assert_series_equal(result, expected) + + +def test_resample_to_period_monthly_buglet(): + # GH #1259 + + rng = date_range("1/1/2000", "12/31/2000") + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample("M", kind="period").mean() + exp_index = period_range("Jan-2000", "Dec-2000", freq="M") + tm.assert_index_equal(result.index, exp_index) + + +def test_period_with_agg(): + + # aggregate a period resampler with a lambda + s2 = Series( + np.random.randint(0, 5, 50), + index=pd.period_range("2012-01-01", freq="H", periods=50), + dtype="float64", + ) + + expected = s2.to_timestamp().resample("D").mean().to_period() + result = s2.resample("D").agg(lambda x: x.mean()) + tm.assert_series_equal(result, expected) + + +def test_resample_segfault(): + # GH 8573 + # segfaulting in older versions + all_wins_and_wagers = [ + (1, datetime(2013, 10, 1, 16, 20), 1, 0), + (2, datetime(2013, 10, 1, 16, 10), 1, 0), + (2, datetime(2013, 10, 1, 18, 15), 1, 0), + (2, datetime(2013, 10, 1, 16, 10, 31), 1, 0), + ] + + df = DataFrame.from_records( + all_wins_and_wagers, columns=("ID", "timestamp", "A", "B") + ).set_index("timestamp") + result = df.groupby("ID").resample("5min").sum() + expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum()) + tm.assert_frame_equal(result, expected) + + +def test_resample_dtype_preservation(): + + # GH 12202 + # validation tests for dtype preservation + + df = DataFrame( + { + "date": pd.date_range(start="2016-01-01", periods=4, freq="W"), + "group": [1, 1, 2, 2], + "val": Series([5, 6, 7, 8], dtype="int32"), + } + ).set_index("date") + + result = df.resample("1D").ffill() + assert result.val.dtype == np.int32 + + result = df.groupby("group").resample("1D").ffill() + assert result.val.dtype == np.int32 + + +def test_resample_dtype_coercion(): + + pytest.importorskip("scipy.interpolate") + + # GH 16361 + df = {"a": [1, 3, 1, 4]} + df = DataFrame(df, index=pd.date_range("2017-01-01", "2017-01-04")) + + expected = df.astype("float64").resample("H").mean()["a"].interpolate("cubic") + + result = df.resample("H")["a"].mean().interpolate("cubic") + tm.assert_series_equal(result, expected) + + result = df.resample("H").mean()["a"].interpolate("cubic") + tm.assert_series_equal(result, expected) + + +def test_weekly_resample_buglet(): + # #1327 + rng = date_range("1/1/2000", freq="B", periods=20) + ts = Series(np.random.randn(len(rng)), index=rng) + + resampled = ts.resample("W").mean() + expected = ts.resample("W-SUN").mean() + tm.assert_series_equal(resampled, expected) + + +def test_monthly_resample_error(): + # #1451 + dates = date_range("4/16/2012 20:00", periods=5000, freq="h") + ts = Series(np.random.randn(len(dates)), index=dates) + # it works! + ts.resample("M") + + +def test_nanosecond_resample_error(): + # GH 12307 - Values falls after last bin when + # Resampling using pd.tseries.offsets.Nano as period + start = 1443707890427 + exp_start = 1443707890400 + indx = pd.date_range(start=pd.to_datetime(start), periods=10, freq="100n") + ts = Series(range(len(indx)), index=indx) + r = ts.resample(pd.tseries.offsets.Nano(100)) + result = r.agg("mean") + + exp_indx = pd.date_range(start=pd.to_datetime(exp_start), periods=10, freq="100n") + exp = Series(range(len(exp_indx)), index=exp_indx) + + tm.assert_series_equal(result, exp) + + +def test_resample_anchored_intraday(simple_date_range_series): + # #1471, #1458 + + rng = date_range("1/1/2012", "4/1/2012", freq="100min") + df = DataFrame(rng.month, index=rng) + + result = df.resample("M").mean() + expected = df.resample("M", kind="period").mean().to_timestamp(how="end") + expected.index += Timedelta(1, "ns") - Timedelta(1, "D") + tm.assert_frame_equal(result, expected) + + result = df.resample("M", closed="left").mean() + exp = df.tshift(1, freq="D").resample("M", kind="period").mean() + exp = exp.to_timestamp(how="end") + + exp.index = exp.index + Timedelta(1, "ns") - Timedelta(1, "D") + tm.assert_frame_equal(result, exp) + + rng = date_range("1/1/2012", "4/1/2012", freq="100min") + df = DataFrame(rng.month, index=rng) + + result = df.resample("Q").mean() + expected = df.resample("Q", kind="period").mean().to_timestamp(how="end") + expected.index += Timedelta(1, "ns") - Timedelta(1, "D") + tm.assert_frame_equal(result, expected) + + result = df.resample("Q", closed="left").mean() + expected = df.tshift(1, freq="D").resample("Q", kind="period", closed="left").mean() + expected = expected.to_timestamp(how="end") + expected.index += Timedelta(1, "ns") - Timedelta(1, "D") + tm.assert_frame_equal(result, expected) + + ts = simple_date_range_series("2012-04-29 23:00", "2012-04-30 5:00", freq="h") + resampled = ts.resample("M").mean() + assert len(resampled) == 1 + + +def test_resample_anchored_monthstart(simple_date_range_series): + ts = simple_date_range_series("1/1/2000", "12/31/2002") + + freqs = ["MS", "BMS", "QS-MAR", "AS-DEC", "AS-JUN"] + + for freq in freqs: + ts.resample(freq).mean() + + +def test_resample_anchored_multiday(): + # When resampling a range spanning multiple days, ensure that the + # start date gets used to determine the offset. Fixes issue where + # a one day period is not a multiple of the frequency. + # + # See: https://github.com/pandas-dev/pandas/issues/8683 + + index = pd.date_range( + "2014-10-14 23:06:23.206", periods=3, freq="400L" + ) | pd.date_range("2014-10-15 23:00:00", periods=2, freq="2200L") + + s = Series(np.random.randn(5), index=index) + + # Ensure left closing works + result = s.resample("2200L").mean() + assert result.index[-1] == Timestamp("2014-10-15 23:00:02.000") + + # Ensure right closing works + result = s.resample("2200L", label="right").mean() + assert result.index[-1] == Timestamp("2014-10-15 23:00:04.200") + + +def test_corner_cases(simple_period_range_series, simple_date_range_series): + # miscellaneous test coverage + + rng = date_range("1/1/2000", periods=12, freq="t") + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample("5t", closed="right", label="left").mean() + ex_index = date_range("1999-12-31 23:55", periods=4, freq="5t") + tm.assert_index_equal(result.index, ex_index) + + len0pts = simple_period_range_series("2007-01", "2010-05", freq="M")[:0] + # it works + result = len0pts.resample("A-DEC").mean() + assert len(result) == 0 + + # resample to periods + ts = simple_date_range_series("2000-04-28", "2000-04-30 11:00", freq="h") + result = ts.resample("M", kind="period").mean() + assert len(result) == 1 + assert result.index[0] == Period("2000-04", freq="M") + + +def test_anchored_lowercase_buglet(): + dates = date_range("4/16/2012 20:00", periods=50000, freq="s") + ts = Series(np.random.randn(len(dates)), index=dates) + # it works! + ts.resample("d").mean() + + +def test_upsample_apply_functions(): + # #1596 + rng = pd.date_range("2012-06-12", periods=4, freq="h") + + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample("20min").aggregate(["mean", "sum"]) + assert isinstance(result, DataFrame) + + +def test_resample_not_monotonic(): + rng = pd.date_range("2012-06-12", periods=200, freq="h") + ts = Series(np.random.randn(len(rng)), index=rng) + + ts = ts.take(np.random.permutation(len(ts))) + + result = ts.resample("D").sum() + exp = ts.sort_index().resample("D").sum() + tm.assert_series_equal(result, exp) + + +def test_resample_median_bug_1688(): + + for dtype in ["int64", "int32", "float64", "float32"]: + df = DataFrame( + [1, 2], + index=[datetime(2012, 1, 1, 0, 0, 0), datetime(2012, 1, 1, 0, 5, 0)], + dtype=dtype, + ) + + result = df.resample("T").apply(lambda x: x.mean()) + exp = df.asfreq("T") + tm.assert_frame_equal(result, exp) + + result = df.resample("T").median() + exp = df.asfreq("T") + tm.assert_frame_equal(result, exp) + + +def test_how_lambda_functions(simple_date_range_series): + + ts = simple_date_range_series("1/1/2000", "4/1/2000") + + result = ts.resample("M").apply(lambda x: x.mean()) + exp = ts.resample("M").mean() + tm.assert_series_equal(result, exp) + + foo_exp = ts.resample("M").mean() + foo_exp.name = "foo" + bar_exp = ts.resample("M").std() + bar_exp.name = "bar" + + result = ts.resample("M").apply([lambda x: x.mean(), lambda x: x.std(ddof=1)]) + result.columns = ["foo", "bar"] + tm.assert_series_equal(result["foo"], foo_exp) + tm.assert_series_equal(result["bar"], bar_exp) + + # this is a MI Series, so comparing the names of the results + # doesn't make sense + result = ts.resample("M").aggregate( + {"foo": lambda x: x.mean(), "bar": lambda x: x.std(ddof=1)} + ) + tm.assert_series_equal(result["foo"], foo_exp, check_names=False) + tm.assert_series_equal(result["bar"], bar_exp, check_names=False) + + +def test_resample_unequal_times(): + # #1772 + start = datetime(1999, 3, 1, 5) + # end hour is less than start + end = datetime(2012, 7, 31, 4) + bad_ind = date_range(start, end, freq="30min") + df = DataFrame({"close": 1}, index=bad_ind) + + # it works! + df.resample("AS").sum() + + +def test_resample_consistency(): + + # GH 6418 + # resample with bfill / limit / reindex consistency + + i30 = pd.date_range("2002-02-02", periods=4, freq="30T") + s = Series(np.arange(4.0), index=i30) + s[2] = np.NaN + + # Upsample by factor 3 with reindex() and resample() methods: + i10 = pd.date_range(i30[0], i30[-1], freq="10T") + + s10 = s.reindex(index=i10, method="bfill") + s10_2 = s.reindex(index=i10, method="bfill", limit=2) + rl = s.reindex_like(s10, method="bfill", limit=2) + r10_2 = s.resample("10Min").bfill(limit=2) + r10 = s.resample("10Min").bfill() + + # s10_2, r10, r10_2, rl should all be equal + tm.assert_series_equal(s10_2, r10) + tm.assert_series_equal(s10_2, r10_2) + tm.assert_series_equal(s10_2, rl) + + +def test_resample_timegrouper(): + # GH 7227 + dates1 = [ + datetime(2014, 10, 1), + datetime(2014, 9, 3), + datetime(2014, 11, 5), + datetime(2014, 9, 5), + datetime(2014, 10, 8), + datetime(2014, 7, 15), + ] + + dates2 = dates1[:2] + [pd.NaT] + dates1[2:4] + [pd.NaT] + dates1[4:] + dates3 = [pd.NaT] + dates1 + [pd.NaT] + + for dates in [dates1, dates2, dates3]: + df = DataFrame(dict(A=dates, B=np.arange(len(dates)))) + result = df.set_index("A").resample("M").count() + exp_idx = pd.DatetimeIndex( + ["2014-07-31", "2014-08-31", "2014-09-30", "2014-10-31", "2014-11-30"], + freq="M", + name="A", + ) + expected = DataFrame({"B": [1, 0, 2, 2, 1]}, index=exp_idx) + tm.assert_frame_equal(result, expected) + + result = df.groupby(pd.Grouper(freq="M", key="A")).count() + tm.assert_frame_equal(result, expected) + + df = DataFrame(dict(A=dates, B=np.arange(len(dates)), C=np.arange(len(dates)))) + result = df.set_index("A").resample("M").count() + expected = DataFrame( + {"B": [1, 0, 2, 2, 1], "C": [1, 0, 2, 2, 1]}, + index=exp_idx, + columns=["B", "C"], + ) + tm.assert_frame_equal(result, expected) + + result = df.groupby(pd.Grouper(freq="M", key="A")).count() + tm.assert_frame_equal(result, expected) + + +def test_resample_nunique(): + + # GH 12352 + df = DataFrame( + { + "ID": { + Timestamp("2015-06-05 00:00:00"): "0010100903", + Timestamp("2015-06-08 00:00:00"): "0010150847", + }, + "DATE": { + Timestamp("2015-06-05 00:00:00"): "2015-06-05", + Timestamp("2015-06-08 00:00:00"): "2015-06-08", + }, + } + ) + r = df.resample("D") + g = df.groupby(pd.Grouper(freq="D")) + expected = df.groupby(pd.Grouper(freq="D")).ID.apply(lambda x: x.nunique()) + assert expected.name == "ID" + + for t in [r, g]: + result = r.ID.nunique() + tm.assert_series_equal(result, expected) + + result = df.ID.resample("D").nunique() + tm.assert_series_equal(result, expected) + + result = df.ID.groupby(pd.Grouper(freq="D")).nunique() + tm.assert_series_equal(result, expected) + + +def test_resample_nunique_preserves_column_level_names(): + # see gh-23222 + df = tm.makeTimeDataFrame(freq="1D").abs() + df.columns = pd.MultiIndex.from_arrays( + [df.columns.tolist()] * 2, names=["lev0", "lev1"] + ) + result = df.resample("1h").nunique() + tm.assert_index_equal(df.columns, result.columns) + + +def test_resample_nunique_with_date_gap(): + # GH 13453 + index = pd.date_range("1-1-2000", "2-15-2000", freq="h") + index2 = pd.date_range("4-15-2000", "5-15-2000", freq="h") + index3 = index.append(index2) + s = Series(range(len(index3)), index=index3, dtype="int64") + r = s.resample("M") + + # Since all elements are unique, these should all be the same + results = [r.count(), r.nunique(), r.agg(Series.nunique), r.agg("nunique")] + + tm.assert_series_equal(results[0], results[1]) + tm.assert_series_equal(results[0], results[2]) + tm.assert_series_equal(results[0], results[3]) + + +@pytest.mark.parametrize("n", [10000, 100000]) +@pytest.mark.parametrize("k", [10, 100, 1000]) +def test_resample_group_info(n, k): + # GH10914 + + # use a fixed seed to always have the same uniques + prng = np.random.RandomState(1234) + + dr = date_range(start="2015-08-27", periods=n // 10, freq="T") + ts = Series(prng.randint(0, n // k, n).astype("int64"), index=prng.choice(dr, n)) + + left = ts.resample("30T").nunique() + ix = date_range(start=ts.index.min(), end=ts.index.max(), freq="30T") + + vals = ts.values + bins = np.searchsorted(ix.values, ts.index, side="right") + + sorter = np.lexsort((vals, bins)) + vals, bins = vals[sorter], bins[sorter] + + mask = np.r_[True, vals[1:] != vals[:-1]] + mask |= np.r_[True, bins[1:] != bins[:-1]] + + arr = np.bincount(bins[mask] - 1, minlength=len(ix)).astype("int64", copy=False) + right = Series(arr, index=ix) + + tm.assert_series_equal(left, right) + + +def test_resample_size(): + n = 10000 + dr = date_range("2015-09-19", periods=n, freq="T") + ts = Series(np.random.randn(n), index=np.random.choice(dr, n)) + + left = ts.resample("7T").size() + ix = date_range(start=left.index.min(), end=ts.index.max(), freq="7T") + + bins = np.searchsorted(ix.values, ts.index.values, side="right") + val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype("int64", copy=False) + + right = Series(val, index=ix) + tm.assert_series_equal(left, right) + + +def test_resample_across_dst(): + # The test resamples a DatetimeIndex with values before and after a + # DST change + # Issue: 14682 + + # The DatetimeIndex we will start with + # (note that DST happens at 03:00+02:00 -> 02:00+01:00) + # 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00 + df1 = DataFrame([1477786980, 1477790580], columns=["ts"]) + dti1 = DatetimeIndex( + pd.to_datetime(df1.ts, unit="s") + .dt.tz_localize("UTC") + .dt.tz_convert("Europe/Madrid") + ) + + # The expected DatetimeIndex after resampling. + # 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00 + df2 = DataFrame([1477785600, 1477789200], columns=["ts"]) + dti2 = DatetimeIndex( + pd.to_datetime(df2.ts, unit="s") + .dt.tz_localize("UTC") + .dt.tz_convert("Europe/Madrid") + ) + df = DataFrame([5, 5], index=dti1) + + result = df.resample(rule="H").sum() + expected = DataFrame([5, 5], index=dti2) + + tm.assert_frame_equal(result, expected) + + +def test_groupby_with_dst_time_change(): + # GH 24972 + index = pd.DatetimeIndex( + [1478064900001000000, 1480037118776792000], tz="UTC" + ).tz_convert("America/Chicago") + + df = pd.DataFrame([1, 2], index=index) + result = df.groupby(pd.Grouper(freq="1d")).last() + expected_index_values = pd.date_range( + "2016-11-02", "2016-11-24", freq="d", tz="America/Chicago" + ) + + index = pd.DatetimeIndex(expected_index_values) + expected = pd.DataFrame([1.0] + ([np.nan] * 21) + [2.0], index=index) + tm.assert_frame_equal(result, expected) + + +def test_resample_dst_anchor(): + # 5172 + dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz="US/Eastern") + df = DataFrame([5], index=dti) + tm.assert_frame_equal( + df.resample(rule="D").sum(), DataFrame([5], index=df.index.normalize()) + ) + df.resample(rule="MS").sum() + tm.assert_frame_equal( + df.resample(rule="MS").sum(), + DataFrame([5], index=DatetimeIndex([datetime(2012, 11, 1)], tz="US/Eastern")), + ) + + dti = date_range("2013-09-30", "2013-11-02", freq="30Min", tz="Europe/Paris") + values = range(dti.size) + df = DataFrame({"a": values, "b": values, "c": values}, index=dti, dtype="int64") + how = {"a": "min", "b": "max", "c": "count"} + + tm.assert_frame_equal( + df.resample("W-MON").agg(how)[["a", "b", "c"]], + DataFrame( + { + "a": [0, 48, 384, 720, 1056, 1394], + "b": [47, 383, 719, 1055, 1393, 1586], + "c": [48, 336, 336, 336, 338, 193], + }, + index=date_range("9/30/2013", "11/4/2013", freq="W-MON", tz="Europe/Paris"), + ), + "W-MON Frequency", + ) + + tm.assert_frame_equal( + df.resample("2W-MON").agg(how)[["a", "b", "c"]], + DataFrame( + { + "a": [0, 48, 720, 1394], + "b": [47, 719, 1393, 1586], + "c": [48, 672, 674, 193], + }, + index=date_range( + "9/30/2013", "11/11/2013", freq="2W-MON", tz="Europe/Paris" + ), + ), + "2W-MON Frequency", + ) + + tm.assert_frame_equal( + df.resample("MS").agg(how)[["a", "b", "c"]], + DataFrame( + {"a": [0, 48, 1538], "b": [47, 1537, 1586], "c": [48, 1490, 49]}, + index=date_range("9/1/2013", "11/1/2013", freq="MS", tz="Europe/Paris"), + ), + "MS Frequency", + ) + + tm.assert_frame_equal( + df.resample("2MS").agg(how)[["a", "b", "c"]], + DataFrame( + {"a": [0, 1538], "b": [1537, 1586], "c": [1538, 49]}, + index=date_range("9/1/2013", "11/1/2013", freq="2MS", tz="Europe/Paris"), + ), + "2MS Frequency", + ) + + df_daily = df["10/26/2013":"10/29/2013"] + tm.assert_frame_equal( + df_daily.resample("D").agg({"a": "min", "b": "max", "c": "count"})[ + ["a", "b", "c"] + ], + DataFrame( + { + "a": [1248, 1296, 1346, 1394], + "b": [1295, 1345, 1393, 1441], + "c": [48, 50, 48, 48], + }, + index=date_range("10/26/2013", "10/29/2013", freq="D", tz="Europe/Paris"), + ), + "D Frequency", + ) + + +def test_downsample_across_dst(): + # GH 8531 + tz = pytz.timezone("Europe/Berlin") + dt = datetime(2014, 10, 26) + dates = date_range(tz.localize(dt), periods=4, freq="2H") + result = Series(5, index=dates).resample("H").mean() + expected = Series( + [5.0, np.nan] * 3 + [5.0], + index=date_range(tz.localize(dt), periods=7, freq="H"), + ) + tm.assert_series_equal(result, expected) + + +def test_downsample_across_dst_weekly(): + # GH 9119, GH 21459 + df = DataFrame( + index=DatetimeIndex( + ["2017-03-25", "2017-03-26", "2017-03-27", "2017-03-28", "2017-03-29"], + tz="Europe/Amsterdam", + ), + data=[11, 12, 13, 14, 15], + ) + result = df.resample("1W").sum() + expected = DataFrame( + [23, 42], + index=pd.DatetimeIndex(["2017-03-26", "2017-04-02"], tz="Europe/Amsterdam"), + ) + tm.assert_frame_equal(result, expected) + + idx = pd.date_range("2013-04-01", "2013-05-01", tz="Europe/London", freq="H") + s = Series(index=idx, dtype=np.float64) + result = s.resample("W").mean() + expected = Series( + index=pd.date_range("2013-04-07", freq="W", periods=5, tz="Europe/London"), + dtype=np.float64, + ) + tm.assert_series_equal(result, expected) + + +def test_resample_with_nat(): + # GH 13020 + index = DatetimeIndex( + [ + pd.NaT, + "1970-01-01 00:00:00", + pd.NaT, + "1970-01-01 00:00:01", + "1970-01-01 00:00:02", + ] + ) + frame = DataFrame([2, 3, 5, 7, 11], index=index) + + index_1s = DatetimeIndex( + ["1970-01-01 00:00:00", "1970-01-01 00:00:01", "1970-01-01 00:00:02"] + ) + frame_1s = DataFrame([3, 7, 11], index=index_1s) + tm.assert_frame_equal(frame.resample("1s").mean(), frame_1s) + + index_2s = DatetimeIndex(["1970-01-01 00:00:00", "1970-01-01 00:00:02"]) + frame_2s = DataFrame([5, 11], index=index_2s) + tm.assert_frame_equal(frame.resample("2s").mean(), frame_2s) + + index_3s = DatetimeIndex(["1970-01-01 00:00:00"]) + frame_3s = DataFrame([7], index=index_3s) + tm.assert_frame_equal(frame.resample("3s").mean(), frame_3s) + + tm.assert_frame_equal(frame.resample("60s").mean(), frame_3s) + + +def test_resample_datetime_values(): + # GH 13119 + # check that datetime dtype is preserved when NaT values are + # introduced by the resampling + + dates = [datetime(2016, 1, 15), datetime(2016, 1, 19)] + df = DataFrame({"timestamp": dates}, index=dates) + + exp = Series( + [datetime(2016, 1, 15), pd.NaT, datetime(2016, 1, 19)], + index=date_range("2016-01-15", periods=3, freq="2D"), + name="timestamp", + ) + + res = df.resample("2D").first()["timestamp"] + tm.assert_series_equal(res, exp) + res = df["timestamp"].resample("2D").first() + tm.assert_series_equal(res, exp) + + +def test_resample_apply_with_additional_args(series): + # GH 14615 + def f(data, add_arg): + return np.mean(data) * add_arg + + multiplier = 10 + result = series.resample("D").apply(f, multiplier) + expected = series.resample("D").mean().multiply(multiplier) + tm.assert_series_equal(result, expected) + + # Testing as kwarg + result = series.resample("D").apply(f, add_arg=multiplier) + expected = series.resample("D").mean().multiply(multiplier) + tm.assert_series_equal(result, expected) + + # Testing dataframe + df = pd.DataFrame({"A": 1, "B": 2}, index=pd.date_range("2017", periods=10)) + result = df.groupby("A").resample("D").agg(f, multiplier) + expected = df.groupby("A").resample("D").mean().multiply(multiplier) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("k", [1, 2, 3]) +@pytest.mark.parametrize( + "n1, freq1, n2, freq2", + [ + (30, "S", 0.5, "Min"), + (60, "S", 1, "Min"), + (3600, "S", 1, "H"), + (60, "Min", 1, "H"), + (21600, "S", 0.25, "D"), + (86400, "S", 1, "D"), + (43200, "S", 0.5, "D"), + (1440, "Min", 1, "D"), + (12, "H", 0.5, "D"), + (24, "H", 1, "D"), + ], +) +def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k): + # GH 24127 + n1_ = n1 * k + n2_ = n2 * k + s = pd.Series( + 0, index=pd.date_range("19910905 13:00", "19911005 07:00", freq=freq1) + ) + s = s + range(len(s)) + + result1 = s.resample(str(n1_) + freq1).mean() + result2 = s.resample(str(n2_) + freq2).mean() + tm.assert_series_equal(result1, result2) + + +@pytest.mark.parametrize( + "first,last,offset,exp_first,exp_last", + [ + ("19910905", "19920406", "D", "19910905", "19920407"), + ("19910905 00:00", "19920406 06:00", "D", "19910905", "19920407"), + ("19910905 06:00", "19920406 06:00", "H", "19910905 06:00", "19920406 07:00"), + ("19910906", "19920406", "M", "19910831", "19920430"), + ("19910831", "19920430", "M", "19910831", "19920531"), + ("1991-08", "1992-04", "M", "19910831", "19920531"), + ], +) +def test_get_timestamp_range_edges(first, last, offset, exp_first, exp_last): + first = pd.Period(first) + first = first.to_timestamp(first.freq) + last = pd.Period(last) + last = last.to_timestamp(last.freq) + + exp_first = pd.Timestamp(exp_first, freq=offset) + exp_last = pd.Timestamp(exp_last, freq=offset) + + offset = pd.tseries.frequencies.to_offset(offset) + result = _get_timestamp_range_edges(first, last, offset) + expected = (exp_first, exp_last) + assert result == expected + + +def test_resample_apply_product(): + # GH 5586 + index = date_range(start="2012-01-31", freq="M", periods=12) + + ts = Series(range(12), index=index) + df = DataFrame(dict(A=ts, B=ts + 2)) + result = df.resample("Q").apply(np.product) + expected = DataFrame( + np.array([[0, 24], [60, 210], [336, 720], [990, 1716]], dtype=np.int64), + index=DatetimeIndex( + ["2012-03-31", "2012-06-30", "2012-09-30", "2012-12-31"], freq="Q-DEC" + ), + columns=["A", "B"], + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py new file mode 100644 index 00000000..955f8c74 --- /dev/null +++ b/pandas/tests/resample/test_period_index.py @@ -0,0 +1,872 @@ +from datetime import datetime, timedelta + +import dateutil +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs.ccalendar import DAYS, MONTHS +from pandas._libs.tslibs.period import IncompatibleFrequency + +import pandas as pd +from pandas import DataFrame, Series, Timestamp +import pandas._testing as tm +from pandas.core.indexes.base import InvalidIndexError +from pandas.core.indexes.datetimes import date_range +from pandas.core.indexes.period import Period, PeriodIndex, period_range +from pandas.core.resample import _get_period_range_edges + +import pandas.tseries.offsets as offsets + + +@pytest.fixture() +def _index_factory(): + return period_range + + +@pytest.fixture +def _series_name(): + return "pi" + + +class TestPeriodIndex: + @pytest.mark.parametrize("freq", ["2D", "1H", "2H"]) + @pytest.mark.parametrize("kind", ["period", None, "timestamp"]) + def test_asfreq(self, series_and_frame, freq, kind): + # GH 12884, 15944 + # make sure .asfreq() returns PeriodIndex (except kind='timestamp') + + obj = series_and_frame + if kind == "timestamp": + expected = obj.to_timestamp().resample(freq).asfreq() + else: + start = obj.index[0].to_timestamp(how="start") + end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start") + new_index = date_range(start=start, end=end, freq=freq, closed="left") + expected = obj.to_timestamp().reindex(new_index).to_period(freq) + result = obj.resample(freq, kind=kind).asfreq() + tm.assert_almost_equal(result, expected) + + def test_asfreq_fill_value(self, series): + # test for fill value during resampling, issue 3715 + + s = series + new_index = date_range( + s.index[0].to_timestamp(how="start"), + (s.index[-1]).to_timestamp(how="start"), + freq="1H", + ) + expected = s.to_timestamp().reindex(new_index, fill_value=4.0) + result = s.resample("1H", kind="timestamp").asfreq(fill_value=4.0) + tm.assert_series_equal(result, expected) + + frame = s.to_frame("value") + new_index = date_range( + frame.index[0].to_timestamp(how="start"), + (frame.index[-1]).to_timestamp(how="start"), + freq="1H", + ) + expected = frame.to_timestamp().reindex(new_index, fill_value=3.0) + result = frame.resample("1H", kind="timestamp").asfreq(fill_value=3.0) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("freq", ["H", "12H", "2D", "W"]) + @pytest.mark.parametrize("kind", [None, "period", "timestamp"]) + @pytest.mark.parametrize("kwargs", [dict(on="date"), dict(level="d")]) + def test_selection(self, index, freq, kind, kwargs): + # This is a bug, these should be implemented + # GH 14008 + rng = np.arange(len(index), dtype=np.int64) + df = DataFrame( + {"date": index, "a": rng}, + index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]), + ) + msg = ( + "Resampling from level= or on= selection with a PeriodIndex is " + r"not currently supported, use \.set_index\(\.\.\.\) to " + "explicitly set index" + ) + with pytest.raises(NotImplementedError, match=msg): + df.resample(freq, kind=kind, **kwargs) + + @pytest.mark.parametrize("month", MONTHS) + @pytest.mark.parametrize("meth", ["ffill", "bfill"]) + @pytest.mark.parametrize("conv", ["start", "end"]) + @pytest.mark.parametrize("targ", ["D", "B", "M"]) + def test_annual_upsample_cases( + self, targ, conv, meth, month, simple_period_range_series + ): + ts = simple_period_range_series( + "1/1/1990", "12/31/1991", freq="A-{month}".format(month=month) + ) + + result = getattr(ts.resample(targ, convention=conv), meth)() + expected = result.to_timestamp(targ, how=conv) + expected = expected.asfreq(targ, meth).to_period() + tm.assert_series_equal(result, expected) + + def test_basic_downsample(self, simple_period_range_series): + ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M") + result = ts.resample("a-dec").mean() + + expected = ts.groupby(ts.index.year).mean() + expected.index = period_range("1/1/1990", "6/30/1995", freq="a-dec") + tm.assert_series_equal(result, expected) + + # this is ok + tm.assert_series_equal(ts.resample("a-dec").mean(), result) + tm.assert_series_equal(ts.resample("a").mean(), result) + + @pytest.mark.parametrize( + "rule,expected_error_msg", + [ + ("a-dec", ""), + ("q-mar", ""), + ("M", ""), + ("w-thu", ""), + ], + ) + def test_not_subperiod(self, simple_period_range_series, rule, expected_error_msg): + # These are incompatible period rules for resampling + ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="w-wed") + msg = ( + "Frequency cannot be resampled to {}, as they " + "are not sub or super periods" + ).format(expected_error_msg) + with pytest.raises(IncompatibleFrequency, match=msg): + ts.resample(rule).mean() + + @pytest.mark.parametrize("freq", ["D", "2D"]) + def test_basic_upsample(self, freq, simple_period_range_series): + ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M") + result = ts.resample("a-dec").mean() + + resampled = result.resample(freq, convention="end").ffill() + expected = result.to_timestamp(freq, how="end") + expected = expected.asfreq(freq, "ffill").to_period(freq) + tm.assert_series_equal(resampled, expected) + + def test_upsample_with_limit(self): + rng = period_range("1/1/2000", periods=5, freq="A") + ts = Series(np.random.randn(len(rng)), rng) + + result = ts.resample("M", convention="end").ffill(limit=2) + expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2) + tm.assert_series_equal(result, expected) + + def test_annual_upsample(self, simple_period_range_series): + ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC") + df = DataFrame({"a": ts}) + rdf = df.resample("D").ffill() + exp = df["a"].resample("D").ffill() + tm.assert_series_equal(rdf["a"], exp) + + rng = period_range("2000", "2003", freq="A-DEC") + ts = Series([1, 2, 3, 4], index=rng) + + result = ts.resample("M").ffill() + ex_index = period_range("2000-01", "2003-12", freq="M") + + expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("month", MONTHS) + @pytest.mark.parametrize("target", ["D", "B", "M"]) + @pytest.mark.parametrize("convention", ["start", "end"]) + def test_quarterly_upsample( + self, month, target, convention, simple_period_range_series + ): + freq = "Q-{month}".format(month=month) + ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq) + result = ts.resample(target, convention=convention).ffill() + expected = result.to_timestamp(target, how=convention) + expected = expected.asfreq(target, "ffill").to_period() + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("target", ["D", "B"]) + @pytest.mark.parametrize("convention", ["start", "end"]) + def test_monthly_upsample(self, target, convention, simple_period_range_series): + ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M") + result = ts.resample(target, convention=convention).ffill() + expected = result.to_timestamp(target, how=convention) + expected = expected.asfreq(target, "ffill").to_period() + tm.assert_series_equal(result, expected) + + def test_resample_basic(self): + # GH3609 + s = Series( + range(100), + index=date_range("20130101", freq="s", periods=100, name="idx"), + dtype="float", + ) + s[10:30] = np.nan + index = PeriodIndex( + [Period("2013-01-01 00:00", "T"), Period("2013-01-01 00:01", "T")], + name="idx", + ) + expected = Series([34.5, 79.5], index=index) + result = s.to_period().resample("T", kind="period").mean() + tm.assert_series_equal(result, expected) + result2 = s.resample("T", kind="period").mean() + tm.assert_series_equal(result2, expected) + + @pytest.mark.parametrize( + "freq,expected_vals", [("M", [31, 29, 31, 9]), ("2M", [31 + 29, 31 + 9])] + ) + def test_resample_count(self, freq, expected_vals): + # GH12774 + series = Series(1, index=pd.period_range(start="2000", periods=100)) + result = series.resample(freq).count() + expected_index = pd.period_range( + start="2000", freq=freq, periods=len(expected_vals) + ) + expected = Series(expected_vals, index=expected_index) + tm.assert_series_equal(result, expected) + + def test_resample_same_freq(self, resample_method): + + # GH12770 + series = Series( + range(3), index=pd.period_range(start="2000", periods=3, freq="M") + ) + expected = series + + result = getattr(series.resample("M"), resample_method)() + tm.assert_series_equal(result, expected) + + def test_resample_incompat_freq(self): + msg = ( + "Frequency cannot be resampled to , " + "as they are not sub or super periods" + ) + with pytest.raises(IncompatibleFrequency, match=msg): + Series( + range(3), index=pd.period_range(start="2000", periods=3, freq="M") + ).resample("W").mean() + + def test_with_local_timezone_pytz(self): + # see gh-5430 + local_timezone = pytz.timezone("America/Los_Angeles") + + start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc) + # 1 day later + end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc) + + index = pd.date_range(start, end, freq="H") + + series = Series(1, index=index) + series = series.tz_convert(local_timezone) + result = series.resample("D", kind="period").mean() + + # Create the expected series + # Index is moved back a day with the timezone conversion from UTC to + # Pacific + expected_index = pd.period_range(start=start, end=end, freq="D") - offsets.Day() + expected = Series(1, index=expected_index) + tm.assert_series_equal(result, expected) + + def test_resample_with_pytz(self): + # GH 13238 + s = Series( + 2, index=pd.date_range("2017-01-01", periods=48, freq="H", tz="US/Eastern") + ) + result = s.resample("D").mean() + expected = Series( + 2, index=pd.DatetimeIndex(["2017-01-01", "2017-01-02"], tz="US/Eastern") + ) + tm.assert_series_equal(result, expected) + # Especially assert that the timezone is LMT for pytz + assert result.index.tz == pytz.timezone("US/Eastern") + + def test_with_local_timezone_dateutil(self): + # see gh-5430 + local_timezone = "dateutil/America/Los_Angeles" + + start = datetime( + year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc() + ) + # 1 day later + end = datetime( + year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc() + ) + + index = pd.date_range(start, end, freq="H", name="idx") + + series = Series(1, index=index) + series = series.tz_convert(local_timezone) + result = series.resample("D", kind="period").mean() + + # Create the expected series + # Index is moved back a day with the timezone conversion from UTC to + # Pacific + expected_index = ( + pd.period_range(start=start, end=end, freq="D", name="idx") - offsets.Day() + ) + expected = Series(1, index=expected_index) + tm.assert_series_equal(result, expected) + + def test_resample_nonexistent_time_bin_edge(self): + # GH 19375 + index = date_range("2017-03-12", "2017-03-12 1:45:00", freq="15T") + s = Series(np.zeros(len(index)), index=index) + expected = s.tz_localize("US/Pacific") + result = expected.resample("900S").mean() + tm.assert_series_equal(result, expected) + + # GH 23742 + index = date_range(start="2017-10-10", end="2017-10-20", freq="1H") + index = index.tz_localize("UTC").tz_convert("America/Sao_Paulo") + df = DataFrame(data=list(range(len(index))), index=index) + result = df.groupby(pd.Grouper(freq="1D")).count() + expected = date_range( + start="2017-10-09", + end="2017-10-20", + freq="D", + tz="America/Sao_Paulo", + nonexistent="shift_forward", + closed="left", + ) + tm.assert_index_equal(result.index, expected) + + def test_resample_ambiguous_time_bin_edge(self): + # GH 10117 + idx = pd.date_range( + "2014-10-25 22:00:00", "2014-10-26 00:30:00", freq="30T", tz="Europe/London" + ) + expected = Series(np.zeros(len(idx)), index=idx) + result = expected.resample("30T").mean() + tm.assert_series_equal(result, expected) + + def test_fill_method_and_how_upsample(self): + # GH2073 + s = Series( + np.arange(9, dtype="int64"), + index=date_range("2010-01-01", periods=9, freq="Q"), + ) + last = s.resample("M").ffill() + both = s.resample("M").ffill().resample("M").last().astype("int64") + tm.assert_series_equal(last, both) + + @pytest.mark.parametrize("day", DAYS) + @pytest.mark.parametrize("target", ["D", "B"]) + @pytest.mark.parametrize("convention", ["start", "end"]) + def test_weekly_upsample(self, day, target, convention, simple_period_range_series): + freq = "W-{day}".format(day=day) + ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq) + result = ts.resample(target, convention=convention).ffill() + expected = result.to_timestamp(target, how=convention) + expected = expected.asfreq(target, "ffill").to_period() + tm.assert_series_equal(result, expected) + + def test_resample_to_timestamps(self, simple_period_range_series): + ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M") + + result = ts.resample("A-DEC", kind="timestamp").mean() + expected = ts.to_timestamp(how="start").resample("A-DEC").mean() + tm.assert_series_equal(result, expected) + + def test_resample_to_quarterly(self, simple_period_range_series): + for month in MONTHS: + ts = simple_period_range_series( + "1990", "1992", freq="A-{month}".format(month=month) + ) + quar_ts = ts.resample("Q-{month}".format(month=month)).ffill() + + stamps = ts.to_timestamp("D", how="start") + qdates = period_range( + ts.index[0].asfreq("D", "start"), + ts.index[-1].asfreq("D", "end"), + freq="Q-{month}".format(month=month), + ) + + expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill") + expected.index = qdates + + tm.assert_series_equal(quar_ts, expected) + + # conforms, but different month + ts = simple_period_range_series("1990", "1992", freq="A-JUN") + + for how in ["start", "end"]: + result = ts.resample("Q-MAR", convention=how).ffill() + expected = ts.asfreq("Q-MAR", how=how) + expected = expected.reindex(result.index, method="ffill") + + # .to_timestamp('D') + # expected = expected.resample('Q-MAR').ffill() + + tm.assert_series_equal(result, expected) + + def test_resample_fill_missing(self): + rng = PeriodIndex([2000, 2005, 2007, 2009], freq="A") + + s = Series(np.random.randn(4), index=rng) + + stamps = s.to_timestamp() + filled = s.resample("A").ffill() + expected = stamps.resample("A").ffill().to_period("A") + tm.assert_series_equal(filled, expected) + + def test_cant_fill_missing_dups(self): + rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq="A") + s = Series(np.random.randn(5), index=rng) + msg = "Reindexing only valid with uniquely valued Index objects" + with pytest.raises(InvalidIndexError, match=msg): + s.resample("A").ffill() + + @pytest.mark.parametrize("freq", ["5min"]) + @pytest.mark.parametrize("kind", ["period", None, "timestamp"]) + def test_resample_5minute(self, freq, kind): + rng = period_range("1/1/2000", "1/5/2000", freq="T") + ts = Series(np.random.randn(len(rng)), index=rng) + expected = ts.to_timestamp().resample(freq).mean() + if kind != "timestamp": + expected = expected.to_period(freq) + result = ts.resample(freq, kind=kind).mean() + tm.assert_series_equal(result, expected) + + def test_upsample_daily_business_daily(self, simple_period_range_series): + ts = simple_period_range_series("1/1/2000", "2/1/2000", freq="B") + + result = ts.resample("D").asfreq() + expected = ts.asfreq("D").reindex(period_range("1/3/2000", "2/1/2000")) + tm.assert_series_equal(result, expected) + + ts = simple_period_range_series("1/1/2000", "2/1/2000") + result = ts.resample("H", convention="s").asfreq() + exp_rng = period_range("1/1/2000", "2/1/2000 23:00", freq="H") + expected = ts.asfreq("H", how="s").reindex(exp_rng) + tm.assert_series_equal(result, expected) + + def test_resample_irregular_sparse(self): + dr = date_range(start="1/1/2012", freq="5min", periods=1000) + s = Series(np.array(100), index=dr) + # subset the data. + subset = s[:"2012-01-04 06:55"] + + result = subset.resample("10min").apply(len) + expected = s.resample("10min").apply(len).loc[result.index] + tm.assert_series_equal(result, expected) + + def test_resample_weekly_all_na(self): + rng = date_range("1/1/2000", periods=10, freq="W-WED") + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample("W-THU").asfreq() + + assert result.isna().all() + + result = ts.resample("W-THU").asfreq().ffill()[:-1] + expected = ts.asfreq("W-THU").ffill() + tm.assert_series_equal(result, expected) + + def test_resample_tz_localized(self): + dr = date_range(start="2012-4-13", end="2012-5-1") + ts = Series(range(len(dr)), index=dr) + + ts_utc = ts.tz_localize("UTC") + ts_local = ts_utc.tz_convert("America/Los_Angeles") + + result = ts_local.resample("W").mean() + + ts_local_naive = ts_local.copy() + ts_local_naive.index = [ + x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime() + ] + + exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles") + + tm.assert_series_equal(result, exp) + + # it works + result = ts_local.resample("D").mean() + + # #2245 + idx = date_range( + "2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney" + ) + s = Series([1, 2], index=idx) + + result = s.resample("D", closed="right", label="right").mean() + ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney") + expected = Series([1.5], index=ex_index) + + tm.assert_series_equal(result, expected) + + # for good measure + result = s.resample("D", kind="period").mean() + ex_index = period_range("2001-09-20", periods=1, freq="D") + expected = Series([1.5], index=ex_index) + tm.assert_series_equal(result, expected) + + # GH 6397 + # comparing an offset that doesn't propagate tz's + rng = date_range("1/1/2011", periods=20000, freq="H") + rng = rng.tz_localize("EST") + ts = DataFrame(index=rng) + ts["first"] = np.random.randn(len(rng)) + ts["second"] = np.cumsum(np.random.randn(len(rng))) + expected = DataFrame( + { + "first": ts.resample("A").sum()["first"], + "second": ts.resample("A").mean()["second"], + }, + columns=["first", "second"], + ) + result = ( + ts.resample("A") + .agg({"first": np.sum, "second": np.mean}) + .reindex(columns=["first", "second"]) + ) + tm.assert_frame_equal(result, expected) + + def test_closed_left_corner(self): + # #1465 + s = Series( + np.random.randn(21), + index=date_range(start="1/1/2012 9:30", freq="1min", periods=21), + ) + s[0] = np.nan + + result = s.resample("10min", closed="left", label="right").mean() + exp = s[1:].resample("10min", closed="left", label="right").mean() + tm.assert_series_equal(result, exp) + + result = s.resample("10min", closed="left", label="left").mean() + exp = s[1:].resample("10min", closed="left", label="left").mean() + + ex_index = date_range(start="1/1/2012 9:30", freq="10min", periods=3) + + tm.assert_index_equal(result.index, ex_index) + tm.assert_series_equal(result, exp) + + def test_quarterly_resampling(self): + rng = period_range("2000Q1", periods=10, freq="Q-DEC") + ts = Series(np.arange(10), index=rng) + + result = ts.resample("A").mean() + exp = ts.to_timestamp().resample("A").mean().to_period() + tm.assert_series_equal(result, exp) + + def test_resample_weekly_bug_1726(self): + # 8/6/12 is a Monday + ind = date_range(start="8/6/2012", end="8/26/2012", freq="D") + n = len(ind) + data = [[x] * 5 for x in range(n)] + df = DataFrame(data, columns=["open", "high", "low", "close", "vol"], index=ind) + + # it works! + df.resample("W-MON", closed="left", label="left").first() + + def test_resample_with_dst_time_change(self): + # GH 15549 + index = ( + pd.DatetimeIndex([1457537600000000000, 1458059600000000000]) + .tz_localize("UTC") + .tz_convert("America/Chicago") + ) + df = pd.DataFrame([1, 2], index=index) + result = df.resample("12h", closed="right", label="right").last().ffill() + + expected_index_values = [ + "2016-03-09 12:00:00-06:00", + "2016-03-10 00:00:00-06:00", + "2016-03-10 12:00:00-06:00", + "2016-03-11 00:00:00-06:00", + "2016-03-11 12:00:00-06:00", + "2016-03-12 00:00:00-06:00", + "2016-03-12 12:00:00-06:00", + "2016-03-13 00:00:00-06:00", + "2016-03-13 13:00:00-05:00", + "2016-03-14 01:00:00-05:00", + "2016-03-14 13:00:00-05:00", + "2016-03-15 01:00:00-05:00", + "2016-03-15 13:00:00-05:00", + ] + index = pd.to_datetime(expected_index_values, utc=True).tz_convert( + "America/Chicago" + ) + expected = pd.DataFrame( + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], + index=index, + ) + tm.assert_frame_equal(result, expected) + + def test_resample_bms_2752(self): + # GH2753 + foo = Series(index=pd.bdate_range("20000101", "20000201"), dtype=np.float64) + res1 = foo.resample("BMS").mean() + res2 = foo.resample("BMS").mean().resample("B").mean() + assert res1.index[0] == Timestamp("20000103") + assert res1.index[0] == res2.index[0] + + # def test_monthly_convention_span(self): + # rng = period_range('2000-01', periods=3, freq='M') + # ts = Series(np.arange(3), index=rng) + + # # hacky way to get same thing + # exp_index = period_range('2000-01-01', '2000-03-31', freq='D') + # expected = ts.asfreq('D', how='end').reindex(exp_index) + # expected = expected.fillna(method='bfill') + + # result = ts.resample('D', convention='span').mean() + + # tm.assert_series_equal(result, expected) + + def test_default_right_closed_label(self): + end_freq = ["D", "Q", "M", "D"] + end_types = ["M", "A", "Q", "W"] + + for from_freq, to_freq in zip(end_freq, end_types): + idx = date_range(start="8/15/2012", periods=100, freq=from_freq) + df = DataFrame(np.random.randn(len(idx), 2), idx) + + resampled = df.resample(to_freq).mean() + tm.assert_frame_equal( + resampled, df.resample(to_freq, closed="right", label="right").mean() + ) + + def test_default_left_closed_label(self): + others = ["MS", "AS", "QS", "D", "H"] + others_freq = ["D", "Q", "M", "H", "T"] + + for from_freq, to_freq in zip(others_freq, others): + idx = date_range(start="8/15/2012", periods=100, freq=from_freq) + df = DataFrame(np.random.randn(len(idx), 2), idx) + + resampled = df.resample(to_freq).mean() + tm.assert_frame_equal( + resampled, df.resample(to_freq, closed="left", label="left").mean() + ) + + def test_all_values_single_bin(self): + # 2070 + index = period_range(start="2012-01-01", end="2012-12-31", freq="M") + s = Series(np.random.randn(len(index)), index=index) + + result = s.resample("A").mean() + tm.assert_almost_equal(result[0], s.mean()) + + def test_evenly_divisible_with_no_extra_bins(self): + # 4076 + # when the frequency is evenly divisible, sometimes extra bins + + df = DataFrame(np.random.randn(9, 3), index=date_range("2000-1-1", periods=9)) + result = df.resample("5D").mean() + expected = pd.concat([df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T + expected.index = [Timestamp("2000-1-1"), Timestamp("2000-1-6")] + tm.assert_frame_equal(result, expected) + + index = date_range(start="2001-5-4", periods=28) + df = DataFrame( + [ + { + "REST_KEY": 1, + "DLY_TRN_QT": 80, + "DLY_SLS_AMT": 90, + "COOP_DLY_TRN_QT": 30, + "COOP_DLY_SLS_AMT": 20, + } + ] + * 28 + + [ + { + "REST_KEY": 2, + "DLY_TRN_QT": 70, + "DLY_SLS_AMT": 10, + "COOP_DLY_TRN_QT": 50, + "COOP_DLY_SLS_AMT": 20, + } + ] + * 28, + index=index.append(index), + ).sort_index() + + index = date_range("2001-5-4", periods=4, freq="7D") + expected = DataFrame( + [ + { + "REST_KEY": 14, + "DLY_TRN_QT": 14, + "DLY_SLS_AMT": 14, + "COOP_DLY_TRN_QT": 14, + "COOP_DLY_SLS_AMT": 14, + } + ] + * 4, + index=index, + ) + result = df.resample("7D").count() + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + [ + { + "REST_KEY": 21, + "DLY_TRN_QT": 1050, + "DLY_SLS_AMT": 700, + "COOP_DLY_TRN_QT": 560, + "COOP_DLY_SLS_AMT": 280, + } + ] + * 4, + index=index, + ) + result = df.resample("7D").sum() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("kind", ["period", None, "timestamp"]) + @pytest.mark.parametrize("agg_arg", ["mean", {"value": "mean"}, ["mean"]]) + def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg): + # make sure passing loffset returns DatetimeIndex in all cases + # basic method taken from Base.test_resample_loffset_arg_type() + df = frame + expected_means = [ + df.values[i : i + 2].mean() for i in range(0, len(df.values), 2) + ] + expected_index = period_range(df.index[0], periods=len(df.index) / 2, freq="2D") + + # loffset coerces PeriodIndex to DateTimeIndex + expected_index = expected_index.to_timestamp() + expected_index += timedelta(hours=2) + expected = DataFrame({"value": expected_means}, index=expected_index) + + result_agg = df.resample("2D", loffset="2H", kind=kind).agg(agg_arg) + if isinstance(agg_arg, list): + expected.columns = pd.MultiIndex.from_tuples([("value", "mean")]) + tm.assert_frame_equal(result_agg, expected) + + @pytest.mark.parametrize("freq, period_mult", [("H", 24), ("12H", 2)]) + @pytest.mark.parametrize("kind", [None, "period"]) + def test_upsampling_ohlc(self, freq, period_mult, kind): + # GH 13083 + pi = period_range(start="2000", freq="D", periods=10) + s = Series(range(len(pi)), index=pi) + expected = s.to_timestamp().resample(freq).ohlc().to_period(freq) + + # timestamp-based resampling doesn't include all sub-periods + # of the last original period, so extend accordingly: + new_index = period_range(start="2000", freq=freq, periods=period_mult * len(pi)) + expected = expected.reindex(new_index) + result = s.resample(freq, kind=kind).ohlc() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "periods, values", + [ + ( + [ + pd.NaT, + "1970-01-01 00:00:00", + pd.NaT, + "1970-01-01 00:00:02", + "1970-01-01 00:00:03", + ], + [2, 3, 5, 7, 11], + ), + ( + [ + pd.NaT, + pd.NaT, + "1970-01-01 00:00:00", + pd.NaT, + pd.NaT, + pd.NaT, + "1970-01-01 00:00:02", + "1970-01-01 00:00:03", + pd.NaT, + pd.NaT, + ], + [1, 2, 3, 5, 6, 8, 7, 11, 12, 13], + ), + ], + ) + @pytest.mark.parametrize( + "freq, expected_values", + [ + ("1s", [3, np.NaN, 7, 11]), + ("2s", [3, int((7 + 11) / 2)]), + ("3s", [int((3 + 7) / 2), 11]), + ], + ) + def test_resample_with_nat(self, periods, values, freq, expected_values): + # GH 13224 + index = PeriodIndex(periods, freq="S") + frame = DataFrame(values, index=index) + + expected_index = period_range( + "1970-01-01 00:00:00", periods=len(expected_values), freq=freq + ) + expected = DataFrame(expected_values, index=expected_index) + result = frame.resample(freq).mean() + tm.assert_frame_equal(result, expected) + + def test_resample_with_only_nat(self): + # GH 13224 + pi = PeriodIndex([pd.NaT] * 3, freq="S") + frame = DataFrame([2, 3, 5], index=pi) + expected_index = PeriodIndex(data=[], freq=pi.freq) + expected = DataFrame(index=expected_index) + result = frame.resample("1s").mean() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "start,end,start_freq,end_freq,base", + [ + ("19910905", "19910909 03:00", "H", "24H", 10), + ("19910905", "19910909 12:00", "H", "24H", 10), + ("19910905", "19910909 23:00", "H", "24H", 10), + ("19910905 10:00", "19910909", "H", "24H", 10), + ("19910905 10:00", "19910909 10:00", "H", "24H", 10), + ("19910905", "19910909 10:00", "H", "24H", 10), + ("19910905 12:00", "19910909", "H", "24H", 10), + ("19910905 12:00", "19910909 03:00", "H", "24H", 10), + ("19910905 12:00", "19910909 12:00", "H", "24H", 10), + ("19910905 12:00", "19910909 12:00", "H", "24H", 34), + ("19910905 12:00", "19910909 12:00", "H", "17H", 10), + ("19910905 12:00", "19910909 12:00", "H", "17H", 3), + ("19910905 12:00", "19910909 1:00", "H", "M", 3), + ("19910905", "19910913 06:00", "2H", "24H", 10), + ("19910905", "19910905 01:39", "Min", "5Min", 3), + ("19910905", "19910905 03:18", "2Min", "5Min", 3), + ], + ) + def test_resample_with_non_zero_base(self, start, end, start_freq, end_freq, base): + # GH 23882 + s = pd.Series(0, index=pd.period_range(start, end, freq=start_freq)) + s = s + np.arange(len(s)) + result = s.resample(end_freq, base=base).mean() + result = result.to_timestamp(end_freq) + # to_timestamp casts 24H -> D + result = result.asfreq(end_freq) if end_freq == "24H" else result + expected = s.to_timestamp().resample(end_freq, base=base).mean() + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "first,last,offset,exp_first,exp_last", + [ + ("19910905", "19920406", "D", "19910905", "19920406"), + ("19910905 00:00", "19920406 06:00", "D", "19910905", "19920406"), + ( + "19910905 06:00", + "19920406 06:00", + "H", + "19910905 06:00", + "19920406 06:00", + ), + ("19910906", "19920406", "M", "1991-09", "1992-04"), + ("19910831", "19920430", "M", "1991-08", "1992-04"), + ("1991-08", "1992-04", "M", "1991-08", "1992-04"), + ], + ) + def test_get_period_range_edges(self, first, last, offset, exp_first, exp_last): + first = pd.Period(first) + last = pd.Period(last) + + exp_first = pd.Period(exp_first, freq=offset) + exp_last = pd.Period(exp_last, freq=offset) + + offset = pd.tseries.frequencies.to_offset(offset) + result = _get_period_range_edges(first, last, offset) + expected = (exp_first, exp_last) + assert result == expected diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py new file mode 100644 index 00000000..ee3b5364 --- /dev/null +++ b/pandas/tests/resample/test_resample_api.py @@ -0,0 +1,606 @@ +from collections import OrderedDict +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.core.indexes.datetimes import date_range + +dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min") + +test_series = Series(np.random.rand(len(dti)), dti) +_test_frame = DataFrame({"A": test_series, "B": test_series, "C": np.arange(len(dti))}) + + +@pytest.fixture +def test_frame(): + return _test_frame.copy() + + +def test_str(): + + r = test_series.resample("H") + assert ( + "DatetimeIndexResampler [freq=, axis=0, closed=left, " + "label=left, convention=start, base=0]" in str(r) + ) + + +def test_api(): + + r = test_series.resample("H") + result = r.mean() + assert isinstance(result, Series) + assert len(result) == 217 + + r = test_series.to_frame().resample("H") + result = r.mean() + assert isinstance(result, DataFrame) + assert len(result) == 217 + + +def test_groupby_resample_api(): + + # GH 12448 + # .groupby(...).resample(...) hitting warnings + # when appropriate + df = DataFrame( + { + "date": pd.date_range(start="2016-01-01", periods=4, freq="W"), + "group": [1, 1, 2, 2], + "val": [5, 6, 7, 8], + } + ).set_index("date") + + # replication step + i = ( + pd.date_range("2016-01-03", periods=8).tolist() + + pd.date_range("2016-01-17", periods=8).tolist() + ) + index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], names=["group", "date"]) + expected = DataFrame({"val": [5] * 7 + [6] + [7] * 7 + [8]}, index=index) + result = df.groupby("group").apply(lambda x: x.resample("1D").ffill())[["val"]] + tm.assert_frame_equal(result, expected) + + +def test_groupby_resample_on_api(): + + # GH 15021 + # .groupby(...).resample(on=...) results in an unexpected + # keyword warning. + df = DataFrame( + { + "key": ["A", "B"] * 5, + "dates": pd.date_range("2016-01-01", periods=10), + "values": np.random.randn(10), + } + ) + + expected = df.set_index("dates").groupby("key").resample("D").mean() + + result = df.groupby("key").resample("D", on="dates").mean() + tm.assert_frame_equal(result, expected) + + +def test_pipe(test_frame): + # GH17905 + + # series + r = test_series.resample("H") + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_series_equal(result, expected) + + # dataframe + r = test_frame.resample("H") + expected = r.max() - r.mean() + result = r.pipe(lambda x: x.max() - x.mean()) + tm.assert_frame_equal(result, expected) + + +def test_getitem(test_frame): + + r = test_frame.resample("H") + tm.assert_index_equal(r._selected_obj.columns, test_frame.columns) + + r = test_frame.resample("H")["B"] + assert r._selected_obj.name == test_frame.columns[1] + + # technically this is allowed + r = test_frame.resample("H")["A", "B"] + tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]]) + + r = test_frame.resample("H")["A", "B"] + tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]]) + + +@pytest.mark.parametrize("key", [["D"], ["A", "D"]]) +def test_select_bad_cols(key, test_frame): + g = test_frame.resample("H") + # 'A' should not be referenced as a bad column... + # will have to rethink regex if you change message! + msg = r"^\"Columns not found: 'D'\"$" + with pytest.raises(KeyError, match=msg): + g[key] + + +def test_attribute_access(test_frame): + + r = test_frame.resample("H") + tm.assert_series_equal(r.A.sum(), r["A"].sum()) + + +def test_api_compat_before_use(): + + # make sure that we are setting the binner + # on these attributes + for attr in ["groups", "ngroups", "indices"]: + rng = pd.date_range("1/1/2012", periods=100, freq="S") + ts = Series(np.arange(len(rng)), index=rng) + rs = ts.resample("30s") + + # before use + getattr(rs, attr) + + # after grouper is initialized is ok + rs.mean() + getattr(rs, attr) + + +def tests_skip_nuisance(test_frame): + + df = test_frame + df["D"] = "foo" + r = df.resample("H") + result = r[["A", "B"]].sum() + expected = pd.concat([r.A.sum(), r.B.sum()], axis=1) + tm.assert_frame_equal(result, expected) + + expected = r[["A", "B", "C"]].sum() + result = r.sum() + tm.assert_frame_equal(result, expected) + + +def test_downsample_but_actually_upsampling(): + + # this is reindex / asfreq + rng = pd.date_range("1/1/2012", periods=100, freq="S") + ts = Series(np.arange(len(rng), dtype="int64"), index=rng) + result = ts.resample("20s").asfreq() + expected = Series( + [0, 20, 40, 60, 80], + index=pd.date_range("2012-01-01 00:00:00", freq="20s", periods=5), + ) + tm.assert_series_equal(result, expected) + + +def test_combined_up_downsampling_of_irregular(): + + # since we are really doing an operation like this + # ts2.resample('2s').mean().ffill() + # preserve these semantics + + rng = pd.date_range("1/1/2012", periods=100, freq="S") + ts = Series(np.arange(len(rng)), index=rng) + ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]] + + result = ts2.resample("2s").mean().ffill() + expected = Series( + [ + 0.5, + 2.5, + 5.0, + 7.0, + 7.0, + 11.0, + 11.0, + 15.0, + 16.0, + 16.0, + 16.0, + 16.0, + 25.0, + 25.0, + 25.0, + 30.0, + ], + index=pd.DatetimeIndex( + [ + "2012-01-01 00:00:00", + "2012-01-01 00:00:02", + "2012-01-01 00:00:04", + "2012-01-01 00:00:06", + "2012-01-01 00:00:08", + "2012-01-01 00:00:10", + "2012-01-01 00:00:12", + "2012-01-01 00:00:14", + "2012-01-01 00:00:16", + "2012-01-01 00:00:18", + "2012-01-01 00:00:20", + "2012-01-01 00:00:22", + "2012-01-01 00:00:24", + "2012-01-01 00:00:26", + "2012-01-01 00:00:28", + "2012-01-01 00:00:30", + ], + dtype="datetime64[ns]", + freq="2S", + ), + ) + tm.assert_series_equal(result, expected) + + +def test_transform(): + + r = test_series.resample("20min") + expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean") + result = r.transform("mean") + tm.assert_series_equal(result, expected) + + +def test_fillna(): + + # need to upsample here + rng = pd.date_range("1/1/2012", periods=10, freq="2S") + ts = Series(np.arange(len(rng), dtype="int64"), index=rng) + r = ts.resample("s") + + expected = r.ffill() + result = r.fillna(method="ffill") + tm.assert_series_equal(result, expected) + + expected = r.bfill() + result = r.fillna(method="bfill") + tm.assert_series_equal(result, expected) + + msg = ( + r"Invalid fill method\. Expecting pad \(ffill\), backfill" + r" \(bfill\) or nearest\. Got 0" + ) + with pytest.raises(ValueError, match=msg): + r.fillna(0) + + +def test_apply_without_aggregation(): + + # both resample and groupby should work w/o aggregation + r = test_series.resample("20min") + g = test_series.groupby(pd.Grouper(freq="20min")) + + for t in [g, r]: + result = t.apply(lambda x: x) + tm.assert_series_equal(result, test_series) + + +def test_agg_consistency(): + + # make sure that we are consistent across + # similar aggregations with and w/o selection list + df = DataFrame( + np.random.randn(1000, 3), + index=pd.date_range("1/1/2012", freq="S", periods=1000), + columns=["A", "B", "C"], + ) + + r = df.resample("3T") + + msg = "nested renamer is not supported" + with pytest.raises(pd.core.base.SpecificationError, match=msg): + r.agg({"r1": "mean", "r2": "sum"}) + + +# TODO: once GH 14008 is fixed, move these tests into +# `Base` test class + + +def test_agg(): + # test with all three Resampler apis and TimeGrouper + + np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") + index.name = "date" + df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index) + df_col = df.reset_index() + df_mult = df_col.copy() + df_mult.index = pd.MultiIndex.from_arrays( + [range(10), df.index], names=["index", "date"] + ) + r = df.resample("2D") + cases = [ + r, + df_col.resample("2D", on="date"), + df_mult.resample("2D", level="date"), + df.groupby(pd.Grouper(freq="2D")), + ] + + a_mean = r["A"].mean() + a_std = r["A"].std() + a_sum = r["A"].sum() + b_mean = r["B"].mean() + b_std = r["B"].std() + b_sum = r["B"].sum() + + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) + for t in cases: + result = t.aggregate([np.mean, np.std]) + tm.assert_frame_equal(result, expected) + + expected = pd.concat([a_mean, b_std], axis=1) + for t in cases: + result = t.aggregate({"A": np.mean, "B": np.std}) + tm.assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([a_mean, a_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")]) + for t in cases: + result = t.aggregate({"A": ["mean", "std"]}) + tm.assert_frame_equal(result, expected) + + expected = pd.concat([a_mean, a_sum], axis=1) + expected.columns = ["mean", "sum"] + for t in cases: + result = t["A"].aggregate(["mean", "sum"]) + tm.assert_frame_equal(result, expected) + + msg = "nested renamer is not supported" + for t in cases: + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t.aggregate({"A": {"mean": "mean", "sum": "sum"}}) + + expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples( + [("A", "mean"), ("A", "sum"), ("B", "mean2"), ("B", "sum2")] + ) + for t in cases: + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t.aggregate( + { + "A": {"mean": "mean", "sum": "sum"}, + "B": {"mean2": "mean", "sum2": "sum"}, + } + ) + + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples( + [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")] + ) + for t in cases: + result = t.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]}) + tm.assert_frame_equal(result, expected, check_like=True) + + expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples( + [ + ("r1", "A", "mean"), + ("r1", "A", "sum"), + ("r2", "B", "mean"), + ("r2", "B", "sum"), + ] + ) + + +def test_agg_misc(): + # test with all three Resampler apis and TimeGrouper + + np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") + index.name = "date" + df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index) + df_col = df.reset_index() + df_mult = df_col.copy() + df_mult.index = pd.MultiIndex.from_arrays( + [range(10), df.index], names=["index", "date"] + ) + + r = df.resample("2D") + cases = [ + r, + df_col.resample("2D", on="date"), + df_mult.resample("2D", level="date"), + df.groupby(pd.Grouper(freq="2D")), + ] + + # passed lambda + for t in cases: + result = t.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) + rcustom = t["B"].apply(lambda x: np.std(x, ddof=1)) + expected = pd.concat([r["A"].sum(), rcustom], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) + + # agg with renamers + expected = pd.concat( + [t["A"].sum(), t["B"].sum(), t["A"].mean(), t["B"].mean()], axis=1 + ) + expected.columns = pd.MultiIndex.from_tuples( + [("result1", "A"), ("result1", "B"), ("result2", "A"), ("result2", "B")] + ) + + msg = "nested renamer is not supported" + for t in cases: + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t[["A", "B"]].agg(OrderedDict([("result1", np.sum), ("result2", np.mean)])) + + # agg with different hows + expected = pd.concat( + [t["A"].sum(), t["A"].std(), t["B"].mean(), t["B"].std()], axis=1 + ) + expected.columns = pd.MultiIndex.from_tuples( + [("A", "sum"), ("A", "std"), ("B", "mean"), ("B", "std")] + ) + for t in cases: + result = t.agg(OrderedDict([("A", ["sum", "std"]), ("B", ["mean", "std"])])) + tm.assert_frame_equal(result, expected, check_like=True) + + # equivalent of using a selection list / or not + for t in cases: + result = t[["A", "B"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]}) + tm.assert_frame_equal(result, expected, check_like=True) + + # series like aggs + for t in cases: + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t["A"].agg({"A": ["sum", "std"]}) + + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t["A"].agg({"A": ["sum", "std"], "B": ["mean", "std"]}) + + # errors + # invalid names in the agg specification + msg = "\"Column 'B' does not exist!\"" + for t in cases: + with pytest.raises(KeyError, match=msg): + t[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]}) + + +def test_agg_nested_dicts(): + + np.random.seed(1234) + index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") + index.name = "date" + df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index) + df_col = df.reset_index() + df_mult = df_col.copy() + df_mult.index = pd.MultiIndex.from_arrays( + [range(10), df.index], names=["index", "date"] + ) + r = df.resample("2D") + cases = [ + r, + df_col.resample("2D", on="date"), + df_mult.resample("2D", level="date"), + df.groupby(pd.Grouper(freq="2D")), + ] + + msg = "nested renamer is not supported" + for t in cases: + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}}) + + for t in cases: + + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t[["A", "B"]].agg( + {"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}} + ) + + with pytest.raises(pd.core.base.SpecificationError, match=msg): + t.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) + + +def test_try_aggregate_non_existing_column(): + # GH 16766 + data = [ + {"dt": datetime(2017, 6, 1, 0), "x": 1.0, "y": 2.0}, + {"dt": datetime(2017, 6, 1, 1), "x": 2.0, "y": 2.0}, + {"dt": datetime(2017, 6, 1, 2), "x": 3.0, "y": 1.5}, + ] + df = DataFrame(data).set_index("dt") + + # Error as we don't have 'z' column + msg = "\"Column 'z' does not exist!\"" + with pytest.raises(KeyError, match=msg): + df.resample("30T").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]}) + + +def test_selection_api_validation(): + # GH 13500 + index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") + + rng = np.arange(len(index), dtype=np.int64) + df = DataFrame( + {"date": index, "a": rng}, + index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]), + ) + df_exp = DataFrame({"a": rng}, index=index) + + # non DatetimeIndex + msg = ( + "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, " + "but got an instance of 'Int64Index'" + ) + with pytest.raises(TypeError, match=msg): + df.resample("2D", level="v") + + msg = "The Grouper cannot specify both a key and a level!" + with pytest.raises(ValueError, match=msg): + df.resample("2D", on="date", level="d") + + msg = "unhashable type: 'list'" + with pytest.raises(TypeError, match=msg): + df.resample("2D", on=["a", "date"]) + + msg = r"\"Level \['a', 'date'\] not found\"" + with pytest.raises(KeyError, match=msg): + df.resample("2D", level=["a", "date"]) + + # upsampling not allowed + msg = ( + "Upsampling from level= or on= selection is not supported, use " + r"\.set_index\(\.\.\.\) to explicitly set index to datetime-like" + ) + with pytest.raises(ValueError, match=msg): + df.resample("2D", level="d").asfreq() + with pytest.raises(ValueError, match=msg): + df.resample("2D", on="date").asfreq() + + exp = df_exp.resample("2D").sum() + exp.index.name = "date" + tm.assert_frame_equal(exp, df.resample("2D", on="date").sum()) + + exp.index.name = "d" + tm.assert_frame_equal(exp, df.resample("2D", level="d").sum()) + + +@pytest.mark.parametrize( + "col_name", ["t2", "t2x", "t2q", "T_2M", "t2p", "t2m", "t2m1", "T2M"] +) +def test_agg_with_datetime_index_list_agg_func(col_name): + # GH 22660 + # The parametrized column names would get converted to dates by our + # date parser. Some would result in OutOfBoundsError (ValueError) while + # others would result in OverflowError when passed into Timestamp. + # We catch these errors and move on to the correct branch. + df = pd.DataFrame( + list(range(200)), + index=pd.date_range( + start="2017-01-01", freq="15min", periods=200, tz="Europe/Berlin" + ), + columns=[col_name], + ) + result = df.resample("1d").aggregate(["mean"]) + expected = pd.DataFrame( + [47.5, 143.5, 195.5], + index=pd.date_range( + start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin" + ), + columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]), + ) + tm.assert_frame_equal(result, expected) + + +def test_resample_agg_readonly(): + # GH#31710 cython needs to allow readonly data + index = pd.date_range("2020-01-01", "2020-01-02", freq="1h") + arr = np.zeros_like(index) + arr.setflags(write=False) + + ser = pd.Series(arr, index=index) + rs = ser.resample("1D") + + expected = pd.Series([pd.Timestamp(0), pd.Timestamp(0)], index=index[::24]) + + result = rs.agg("last") + tm.assert_series_equal(result, expected) + + result = rs.agg("first") + tm.assert_series_equal(result, expected) + + result = rs.agg("max") + tm.assert_series_equal(result, expected) + + result = rs.agg("min") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py new file mode 100644 index 00000000..19afe257 --- /dev/null +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -0,0 +1,288 @@ +from textwrap import dedent + +import numpy as np + +from pandas.util._test_decorators import async_mark + +import pandas as pd +from pandas import DataFrame, Series, Timestamp +import pandas._testing as tm +from pandas.core.indexes.datetimes import date_range + +test_frame = DataFrame( + {"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)}, + index=date_range("1/1/2000", freq="s", periods=40), +) + + +@async_mark() +async def test_tab_complete_ipython6_warning(ip): + from IPython.core.completer import provisionalcompleter + + code = dedent( + """\ + import pandas._testing as tm + s = tm.makeTimeSeries() + rs = s.resample("D") + """ + ) + await ip.run_code(code) + + # TODO: remove it when Ipython updates + # GH 33567, jedi version raises Deprecation warning in Ipython + import jedi + + if jedi.__version__ < "0.17.0": + warning = tm.assert_produces_warning(None) + else: + warning = tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False) + with warning: + with provisionalcompleter("ignore"): + list(ip.Completer.completions("rs.", 1)) + + +def test_deferred_with_groupby(): + + # GH 12486 + # support deferred resample ops with groupby + data = [ + ["2010-01-01", "A", 2], + ["2010-01-02", "A", 3], + ["2010-01-05", "A", 8], + ["2010-01-10", "A", 7], + ["2010-01-13", "A", 3], + ["2010-01-01", "B", 5], + ["2010-01-03", "B", 2], + ["2010-01-04", "B", 1], + ["2010-01-11", "B", 7], + ["2010-01-14", "B", 3], + ] + + df = DataFrame(data, columns=["date", "id", "score"]) + df.date = pd.to_datetime(df.date) + + def f(x): + return x.set_index("date").resample("D").asfreq() + + expected = df.groupby("id").apply(f) + result = df.set_index("date").groupby("id").resample("D").asfreq() + tm.assert_frame_equal(result, expected) + + df = DataFrame( + { + "date": pd.date_range(start="2016-01-01", periods=4, freq="W"), + "group": [1, 1, 2, 2], + "val": [5, 6, 7, 8], + } + ).set_index("date") + + def f(x): + return x.resample("1D").ffill() + + expected = df.groupby("group").apply(f) + result = df.groupby("group").resample("1D").ffill() + tm.assert_frame_equal(result, expected) + + +def test_getitem(): + g = test_frame.groupby("A") + + expected = g.B.apply(lambda x: x.resample("2s").mean()) + + result = g.resample("2s").B.mean() + tm.assert_series_equal(result, expected) + + result = g.B.resample("2s").mean() + tm.assert_series_equal(result, expected) + + result = g.resample("2s").mean().B + tm.assert_series_equal(result, expected) + + +def test_getitem_multiple(): + + # GH 13174 + # multiple calls after selection causing an issue with aliasing + data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}] + df = DataFrame(data, index=pd.date_range("2016-01-01", periods=2)) + r = df.groupby("id").resample("1D") + result = r["buyer"].count() + expected = Series( + [1, 1], + index=pd.MultiIndex.from_tuples( + [(1, Timestamp("2016-01-01")), (2, Timestamp("2016-01-02"))], + names=["id", None], + ), + name="buyer", + ) + tm.assert_series_equal(result, expected) + + result = r["buyer"].count() + tm.assert_series_equal(result, expected) + + +def test_groupby_resample_on_api_with_getitem(): + # GH 17813 + df = pd.DataFrame( + {"id": list("aabbb"), "date": pd.date_range("1-1-2016", periods=5), "data": 1} + ) + exp = df.set_index("date").groupby("id").resample("2D")["data"].sum() + result = df.groupby("id").resample("2D", on="date")["data"].sum() + tm.assert_series_equal(result, exp) + + +def test_nearest(): + + # GH 17496 + # Resample nearest + index = pd.date_range("1/1/2000", periods=3, freq="T") + result = Series(range(3), index=index).resample("20s").nearest() + + expected = Series( + [0, 0, 1, 1, 1, 2, 2], + index=pd.DatetimeIndex( + [ + "2000-01-01 00:00:00", + "2000-01-01 00:00:20", + "2000-01-01 00:00:40", + "2000-01-01 00:01:00", + "2000-01-01 00:01:20", + "2000-01-01 00:01:40", + "2000-01-01 00:02:00", + ], + dtype="datetime64[ns]", + freq="20S", + ), + ) + tm.assert_series_equal(result, expected) + + +def test_methods(): + g = test_frame.groupby("A") + r = g.resample("2s") + + for f in ["first", "last", "median", "sem", "sum", "mean", "min", "max"]: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample("2s"), f)()) + tm.assert_frame_equal(result, expected) + + for f in ["size"]: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample("2s"), f)()) + tm.assert_series_equal(result, expected) + + for f in ["count"]: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample("2s"), f)()) + tm.assert_frame_equal(result, expected) + + # series only + for f in ["nunique"]: + result = getattr(r.B, f)() + expected = g.B.apply(lambda x: getattr(x.resample("2s"), f)()) + tm.assert_series_equal(result, expected) + + for f in ["nearest", "backfill", "ffill", "asfreq"]: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.resample("2s"), f)()) + tm.assert_frame_equal(result, expected) + + result = r.ohlc() + expected = g.apply(lambda x: x.resample("2s").ohlc()) + tm.assert_frame_equal(result, expected) + + for f in ["std", "var"]: + result = getattr(r, f)(ddof=1) + expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1)) + tm.assert_frame_equal(result, expected) + + +def test_apply(): + + g = test_frame.groupby("A") + r = g.resample("2s") + + # reduction + expected = g.resample("2s").sum() + + def f(x): + return x.resample("2s").sum() + + result = r.apply(f) + tm.assert_frame_equal(result, expected) + + def f(x): + return x.resample("2s").apply(lambda y: y.sum()) + + result = g.apply(f) + tm.assert_frame_equal(result, expected) + + +def test_apply_with_mutated_index(): + # GH 15169 + index = pd.date_range("1-1-2015", "12-31-15", freq="D") + df = DataFrame(data={"col1": np.random.rand(len(index))}, index=index) + + def f(x): + s = Series([1, 2], index=["a", "b"]) + return s + + expected = df.groupby(pd.Grouper(freq="M")).apply(f) + + result = df.resample("M").apply(f) + tm.assert_frame_equal(result, expected) + + # A case for series + expected = df["col1"].groupby(pd.Grouper(freq="M")).apply(f) + result = df["col1"].resample("M").apply(f) + tm.assert_series_equal(result, expected) + + +def test_resample_groupby_with_label(): + # GH 13235 + index = date_range("2000-01-01", freq="2D", periods=5) + df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]}) + result = df.groupby("col0").resample("1W", label="left").sum() + + mi = [ + np.array([0, 0, 1, 2]), + pd.to_datetime( + np.array(["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"]) + ), + ] + mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None]) + expected = DataFrame( + data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex + ) + + tm.assert_frame_equal(result, expected) + + +def test_consistency_with_window(): + + # consistent return values with window + df = test_frame + expected = pd.Int64Index([1, 2, 3], name="A") + result = df.groupby("A").resample("2s").mean() + assert result.index.nlevels == 2 + tm.assert_index_equal(result.index.levels[0], expected) + + result = df.groupby("A").rolling(20).mean() + assert result.index.nlevels == 2 + tm.assert_index_equal(result.index.levels[0], expected) + + +def test_median_duplicate_columns(): + # GH 14233 + + df = DataFrame( + np.random.randn(20, 3), + columns=list("aaa"), + index=pd.date_range("2012-01-01", periods=20, freq="s"), + ) + df2 = df.copy() + df2.columns = ["a", "b", "c"] + expected = df2.resample("5s").median() + result = df.resample("5s").median() + expected.columns = result.columns + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py new file mode 100644 index 00000000..3aa77659 --- /dev/null +++ b/pandas/tests/resample/test_time_grouper.py @@ -0,0 +1,278 @@ +from datetime import datetime +from operator import methodcaller + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.core.groupby.grouper import Grouper +from pandas.core.indexes.datetimes import date_range + +test_series = Series(np.random.randn(1000), index=date_range("1/1/2000", periods=1000)) + + +def test_apply(): + grouper = Grouper(freq="A", label="right", closed="right") + + grouped = test_series.groupby(grouper) + + def f(x): + return x.sort_values()[-3:] + + applied = grouped.apply(f) + expected = test_series.groupby(lambda x: x.year).apply(f) + + applied.index = applied.index.droplevel(0) + expected.index = expected.index.droplevel(0) + tm.assert_series_equal(applied, expected) + + +def test_count(): + test_series[::3] = np.nan + + expected = test_series.groupby(lambda x: x.year).count() + + grouper = Grouper(freq="A", label="right", closed="right") + result = test_series.groupby(grouper).count() + expected.index = result.index + tm.assert_series_equal(result, expected) + + result = test_series.resample("A").count() + expected.index = result.index + tm.assert_series_equal(result, expected) + + +def test_numpy_reduction(): + result = test_series.resample("A", closed="right").prod() + + expected = test_series.groupby(lambda x: x.year).agg(np.prod) + expected.index = result.index + + tm.assert_series_equal(result, expected) + + +def test_apply_iteration(): + # #2300 + N = 1000 + ind = pd.date_range(start="2000-01-01", freq="D", periods=N) + df = DataFrame({"open": 1, "close": 2}, index=ind) + tg = Grouper(freq="M") + + _, grouper, _ = tg._get_grouper(df) + + # Errors + grouped = df.groupby(grouper, group_keys=False) + + def f(df): + return df["close"] / df["open"] + + # it works! + result = grouped.apply(f) + tm.assert_index_equal(result.index, df.index) + + +@pytest.mark.parametrize( + "name, func", + [ + ("Int64Index", tm.makeIntIndex), + ("Index", tm.makeUnicodeIndex), + ("Float64Index", tm.makeFloatIndex), + ("MultiIndex", lambda m: tm.makeCustomIndex(m, 2)), + ], +) +def test_fails_on_no_datetime_index(name, func): + n = 2 + index = func(n) + df = DataFrame({"a": np.random.randn(n)}, index=index) + + msg = ( + "Only valid with DatetimeIndex, TimedeltaIndex " + f"or PeriodIndex, but got an instance of '{name}'" + ) + with pytest.raises(TypeError, match=msg): + df.groupby(Grouper(freq="D")) + + +def test_aaa_group_order(): + # GH 12840 + # check TimeGrouper perform stable sorts + n = 20 + data = np.random.randn(n, 4) + df = DataFrame(data, columns=["A", "B", "C", "D"]) + df["key"] = [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + datetime(2013, 1, 3), + datetime(2013, 1, 4), + datetime(2013, 1, 5), + ] * 4 + grouped = df.groupby(Grouper(key="key", freq="D")) + + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5]) + tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5]) + + +def test_aggregate_normal(resample_method): + """Check TimeGrouper's aggregation is identical as normal groupby.""" + + if resample_method == "ohlc": + pytest.xfail(reason="DataError: No numeric types to aggregate") + + data = np.random.randn(20, 4) + normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) + normal_df["key"] = [1, 2, 3, 4, 5] * 4 + + dt_df = DataFrame(data, columns=["A", "B", "C", "D"]) + dt_df["key"] = [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + datetime(2013, 1, 3), + datetime(2013, 1, 4), + datetime(2013, 1, 5), + ] * 4 + + normal_grouped = normal_df.groupby("key") + dt_grouped = dt_df.groupby(Grouper(key="key", freq="D")) + + expected = getattr(normal_grouped, resample_method)() + dt_result = getattr(dt_grouped, resample_method)() + expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key") + tm.assert_equal(expected, dt_result) + + # if TimeGrouper is used included, 'nth' doesn't work yet + + """ + for func in ['nth']: + expected = getattr(normal_grouped, func)(3) + expected.index = date_range(start='2013-01-01', + freq='D', periods=5, name='key') + dt_result = getattr(dt_grouped, func)(3) + tm.assert_frame_equal(expected, dt_result) + """ + + +@pytest.mark.parametrize( + "method, method_args, unit", + [ + ("sum", dict(), 0), + ("sum", dict(min_count=0), 0), + ("sum", dict(min_count=1), np.nan), + ("prod", dict(), 1), + ("prod", dict(min_count=0), 1), + ("prod", dict(min_count=1), np.nan), + ], +) +def test_resample_entirly_nat_window(method, method_args, unit): + s = pd.Series([0] * 2 + [np.nan] * 2, index=pd.date_range("2017", periods=4)) + result = methodcaller(method, **method_args)(s.resample("2d")) + expected = pd.Series( + [0.0, unit], index=pd.to_datetime(["2017-01-01", "2017-01-03"]) + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "func, fill_value", + [("min", np.nan), ("max", np.nan), ("sum", 0), ("prod", 1), ("count", 0)], +) +def test_aggregate_with_nat(func, fill_value): + # check TimeGrouper's aggregation is identical as normal groupby + # if NaT is included, 'var', 'std', 'mean', 'first','last' + # and 'nth' doesn't work yet + + n = 20 + data = np.random.randn(n, 4).astype("int64") + normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) + normal_df["key"] = [1, 2, np.nan, 4, 5] * 4 + + dt_df = DataFrame(data, columns=["A", "B", "C", "D"]) + dt_df["key"] = [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + pd.NaT, + datetime(2013, 1, 4), + datetime(2013, 1, 5), + ] * 4 + + normal_grouped = normal_df.groupby("key") + dt_grouped = dt_df.groupby(Grouper(key="key", freq="D")) + + normal_result = getattr(normal_grouped, func)() + dt_result = getattr(dt_grouped, func)() + + pad = DataFrame([[fill_value] * 4], index=[3], columns=["A", "B", "C", "D"]) + expected = normal_result.append(pad) + expected = expected.sort_index() + expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key") + tm.assert_frame_equal(expected, dt_result) + assert dt_result.index.name == "key" + + +def test_aggregate_with_nat_size(): + # GH 9925 + n = 20 + data = np.random.randn(n, 4).astype("int64") + normal_df = DataFrame(data, columns=["A", "B", "C", "D"]) + normal_df["key"] = [1, 2, np.nan, 4, 5] * 4 + + dt_df = DataFrame(data, columns=["A", "B", "C", "D"]) + dt_df["key"] = [ + datetime(2013, 1, 1), + datetime(2013, 1, 2), + pd.NaT, + datetime(2013, 1, 4), + datetime(2013, 1, 5), + ] * 4 + + normal_grouped = normal_df.groupby("key") + dt_grouped = dt_df.groupby(Grouper(key="key", freq="D")) + + normal_result = normal_grouped.size() + dt_result = dt_grouped.size() + + pad = Series([0], index=[3]) + expected = normal_result.append(pad) + expected = expected.sort_index() + expected.index = date_range(start="2013-01-01", freq="D", periods=5, name="key") + tm.assert_series_equal(expected, dt_result) + assert dt_result.index.name == "key" + + +def test_repr(): + # GH18203 + result = repr(Grouper(key="A", freq="H")) + expected = ( + "TimeGrouper(key='A', freq=, axis=0, sort=True, " + "closed='left', label='left', how='mean', " + "convention='e', base=0)" + ) + assert result == expected + + +@pytest.mark.parametrize( + "method, method_args, expected_values", + [ + ("sum", dict(), [1, 0, 1]), + ("sum", dict(min_count=0), [1, 0, 1]), + ("sum", dict(min_count=1), [1, np.nan, 1]), + ("sum", dict(min_count=2), [np.nan, np.nan, np.nan]), + ("prod", dict(), [1, 1, 1]), + ("prod", dict(min_count=0), [1, 1, 1]), + ("prod", dict(min_count=1), [1, np.nan, 1]), + ("prod", dict(min_count=2), [np.nan, np.nan, np.nan]), + ], +) +def test_upsample_sum(method, method_args, expected_values): + s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H")) + resampled = s.resample("30T") + index = pd.to_datetime( + ["2017-01-01T00:00:00", "2017-01-01T00:30:00", "2017-01-01T01:00:00"] + ) + result = methodcaller(method, **method_args)(resampled) + expected = pd.Series(expected_values, index=index) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py new file mode 100644 index 00000000..a4d14f12 --- /dev/null +++ b/pandas/tests/resample/test_timedelta.py @@ -0,0 +1,127 @@ +from datetime import timedelta + +import numpy as np + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.core.indexes.timedeltas import timedelta_range + + +def test_asfreq_bug(): + df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)]) + result = df.resample("1T").asfreq() + expected = DataFrame( + data=[1, np.nan, np.nan, 3], + index=timedelta_range("0 day", periods=4, freq="1T"), + ) + tm.assert_frame_equal(result, expected) + + +def test_resample_with_nat(): + # GH 13223 + index = pd.to_timedelta(["0s", pd.NaT, "2s"]) + result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean() + expected = DataFrame( + {"value": [2.5, np.nan, 5.0]}, + index=timedelta_range("0 day", periods=3, freq="1S"), + ) + tm.assert_frame_equal(result, expected) + + +def test_resample_as_freq_with_subperiod(): + # GH 13022 + index = timedelta_range("00:00:00", "00:10:00", freq="5T") + df = DataFrame(data={"value": [1, 5, 10]}, index=index) + result = df.resample("2T").asfreq() + expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]} + expected = DataFrame( + data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2T") + ) + tm.assert_frame_equal(result, expected) + + +def test_resample_with_timedeltas(): + + expected = DataFrame({"A": np.arange(1480)}) + expected = expected.groupby(expected.index // 30).sum() + expected.index = pd.timedelta_range("0 days", freq="30T", periods=50) + + df = DataFrame( + {"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="T") + ) + result = df.resample("30T").sum() + + tm.assert_frame_equal(result, expected) + + s = df["A"] + result = s.resample("30T").sum() + tm.assert_series_equal(result, expected["A"]) + + +def test_resample_single_period_timedelta(): + + s = Series(list(range(5)), index=pd.timedelta_range("1 day", freq="s", periods=5)) + result = s.resample("2s").sum() + expected = Series( + [1, 5, 4], index=pd.timedelta_range("1 day", freq="2s", periods=3) + ) + tm.assert_series_equal(result, expected) + + +def test_resample_timedelta_idempotency(): + + # GH 12072 + index = pd.timedelta_range("0", periods=9, freq="10L") + series = Series(range(9), index=index) + result = series.resample("10L").mean() + expected = series + tm.assert_series_equal(result, expected) + + +def test_resample_base_with_timedeltaindex(): + + # GH 10530 + rng = timedelta_range(start="0s", periods=25, freq="s") + ts = Series(np.random.randn(len(rng)), index=rng) + + with_base = ts.resample("2s", base=5).mean() + without_base = ts.resample("2s").mean() + + exp_without_base = timedelta_range(start="0s", end="25s", freq="2s") + exp_with_base = timedelta_range(start="5s", end="29s", freq="2s") + + tm.assert_index_equal(without_base.index, exp_without_base) + tm.assert_index_equal(with_base.index, exp_with_base) + + +def test_resample_categorical_data_with_timedeltaindex(): + # GH #12169 + df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s")) + df["Group"] = df["Group_obj"].astype("category") + result = df.resample("10s").agg(lambda x: (x.value_counts().index[0])) + expected = DataFrame( + {"Group_obj": ["A", "A"], "Group": ["A", "A"]}, + index=pd.to_timedelta([0, 10], unit="s"), + ) + expected = expected.reindex(["Group_obj", "Group"], axis=1) + expected["Group"] = expected["Group_obj"] + tm.assert_frame_equal(result, expected) + + +def test_resample_timedelta_values(): + # GH 13119 + # check that timedelta dtype is preserved when NaT values are + # introduced by the resampling + + times = timedelta_range("1 day", "4 day", freq="4D") + df = DataFrame({"time": times}, index=times) + + times2 = timedelta_range("1 day", "4 day", freq="2D") + exp = Series(times2, index=times2, name="time") + exp.iloc[1] = pd.NaT + + res = df.resample("2D").first()["time"] + tm.assert_series_equal(res, exp) + res = df["time"].resample("2D").first() + tm.assert_series_equal(res, exp) diff --git a/pandas/tests/reshape/__init__.py b/pandas/tests/reshape/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/reshape/data/cut_data.csv b/pandas/tests/reshape/data/cut_data.csv new file mode 100644 index 00000000..c198ec77 --- /dev/null +++ b/pandas/tests/reshape/data/cut_data.csv @@ -0,0 +1 @@ +1.001 0.994 0.9951 0.9956 0.9956 0.9951 0.9949 1.001 0.994 0.9938 0.9908 0.9947 0.992 0.9912 1.0002 0.9914 0.9928 0.9892 0.9917 0.9955 0.9892 0.9912 0.993 0.9937 0.9951 0.9955 0.993 0.9961 0.9914 0.9906 0.9974 0.9934 0.992 0.9939 0.9962 0.9905 0.9934 0.9906 0.9999 0.9999 0.9937 0.9937 0.9954 0.9934 0.9934 0.9931 0.994 0.9939 0.9954 0.995 0.9917 0.9914 0.991 0.9911 0.993 0.9908 0.9962 0.9972 0.9931 0.9926 0.9951 0.9972 0.991 0.9931 0.9927 0.9934 0.9903 0.992 0.9926 0.9962 0.9956 0.9958 0.9964 0.9941 0.9926 0.9962 0.9898 0.9912 0.9961 0.9949 0.9929 0.9985 0.9946 0.9966 0.9974 0.9975 0.9974 0.9972 0.9974 0.9975 0.9974 0.9957 0.99 0.9899 0.9916 0.9969 0.9979 0.9913 0.9956 0.9979 0.9975 0.9962 0.997 1 0.9975 0.9974 0.9962 0.999 0.999 0.9927 0.9959 1 0.9982 0.9968 0.9968 0.994 0.9914 0.9911 0.9982 0.9982 0.9934 0.9984 0.9952 0.9952 0.9928 0.9912 0.994 0.9958 0.9924 0.9924 0.994 0.9958 0.9979 0.9982 0.9961 0.9979 0.992 0.9975 0.9917 0.9923 0.9927 0.9975 0.992 0.9947 0.9921 0.9905 0.9918 0.9951 0.9917 0.994 0.9934 0.9968 0.994 0.9919 0.9966 0.9979 0.9979 0.9898 0.9894 0.9894 0.9898 0.998 0.9932 0.9979 0.997 0.9972 0.9974 0.9896 0.9968 0.9958 0.9906 0.9917 0.9902 0.9918 0.999 0.9927 0.991 0.9972 0.9931 0.995 0.9951 0.9936 1.001 0.9979 0.997 0.9972 0.9954 0.9924 0.9906 0.9962 0.9962 1.001 0.9928 0.9942 0.9942 0.9942 0.9942 0.9961 0.998 0.9961 0.9984 0.998 0.9973 0.9949 0.9924 0.9972 0.9958 0.9968 0.9938 0.993 0.994 0.9918 0.9958 0.9944 0.9912 0.9961 0.9939 0.9961 0.9989 0.9938 0.9939 0.9971 0.9912 0.9936 0.9929 0.9998 0.9938 0.9969 0.9938 0.9998 0.9972 0.9976 0.9976 0.9979 0.9979 0.9979 0.9979 0.9972 0.9918 0.9982 0.9985 0.9944 0.9903 0.9934 0.9975 0.9923 0.99 0.9905 0.9905 0.996 0.9964 0.998 0.9975 0.9913 0.9932 0.9935 0.9927 0.9927 0.9912 0.9904 0.9939 0.9996 0.9944 0.9977 0.9912 0.9996 0.9965 0.9944 0.9945 0.9944 0.9965 0.9944 0.9972 0.9949 0.9966 0.9954 0.9954 0.9915 0.9919 0.9916 0.99 0.9909 0.9938 0.9982 0.9988 0.9961 0.9978 0.9979 0.9979 0.9979 0.9979 0.9945 1 0.9957 0.9968 0.9934 0.9976 0.9932 0.997 0.9923 0.9914 0.992 0.9914 0.9914 0.9949 0.9949 0.995 0.995 0.9927 0.9928 0.9917 0.9918 0.9954 0.9941 0.9941 0.9934 0.9927 0.9938 0.9933 0.9934 0.9927 0.9938 0.9927 0.9946 0.993 0.9946 0.9976 0.9944 0.9978 0.992 0.9912 0.9927 0.9906 0.9954 0.9923 0.9906 0.991 0.9972 0.9945 0.9934 0.9964 0.9948 0.9962 0.9931 0.993 0.9942 0.9906 0.9995 0.998 0.997 0.9914 0.992 0.9924 0.992 0.9937 0.9978 0.9978 0.9927 0.994 0.9935 0.9968 0.9941 0.9942 0.9978 0.9923 0.9912 0.9923 0.9927 0.9931 0.9941 0.9927 0.9931 0.9934 0.9936 0.9893 0.9893 0.9919 0.9924 0.9927 0.9919 0.9924 0.9975 0.9969 0.9936 0.991 0.9893 0.9906 0.9941 0.995 0.9983 0.9983 0.9916 0.9957 0.99 0.9976 0.992 0.9917 0.9917 0.9993 0.9908 0.9917 0.9976 0.9934 1 0.9918 0.992 0.9896 0.9932 0.992 0.9917 0.9999 0.998 0.9918 0.9918 0.9999 0.998 0.9927 0.9959 0.9927 0.9929 0.9898 0.9954 0.9954 0.9954 0.9954 0.9954 0.9954 0.9974 0.9936 0.9978 0.9974 0.9927 0.9934 0.9938 0.9922 0.992 0.9935 0.9906 0.9934 0.9934 0.9913 0.9938 0.9898 0.9975 0.9975 0.9937 0.9914 0.9982 0.9982 0.9929 0.9971 0.9921 0.9931 0.9924 0.9929 0.9982 0.9892 0.9956 0.9924 0.9971 0.9956 0.9982 0.9973 0.9932 0.9976 0.9962 0.9956 0.9932 0.9976 0.9992 0.9983 0.9937 0.99 0.9944 0.9938 0.9965 0.9893 0.9927 0.994 0.9928 0.9964 0.9917 0.9972 0.9964 0.9954 0.993 0.9928 0.9916 0.9936 0.9962 0.9899 0.9898 0.996 0.9907 0.994 0.9913 0.9976 0.9904 0.992 0.9976 0.999 0.9975 0.9937 0.9937 0.998 0.998 0.9944 0.9938 0.9907 0.9938 0.9921 0.9908 0.9931 0.9915 0.9952 0.9926 0.9934 0.992 0.9918 0.9942 0.9942 0.9942 0.9901 0.9898 0.9902 0.9934 0.9906 0.9898 0.9896 0.9922 0.9947 0.9945 0.9976 0.9976 0.9976 0.9987 0.9987 0.9976 0.992 0.9955 0.9953 0.9976 0.992 0.9952 0.9983 0.9933 0.9958 0.9922 0.9928 0.9976 0.9976 0.9916 0.9901 0.9976 0.9901 0.9916 0.9982 0.993 0.9969 0.991 0.9953 0.9924 0.9969 0.9928 0.9945 0.9967 0.9944 0.9928 0.9929 0.9948 0.9976 0.9912 0.9987 0.99 0.991 0.9933 0.9933 0.9899 0.9912 0.9912 0.9976 0.994 0.9947 0.9954 0.993 0.9954 0.9963 0.992 0.9926 0.995 0.9983 0.992 0.9968 0.9905 0.9904 0.9926 0.9968 0.9928 0.9949 0.9909 0.9937 0.9914 0.9905 0.9904 0.9924 0.9924 0.9965 0.9965 0.9993 0.9965 0.9908 0.992 0.9978 0.9978 0.9978 0.9978 0.9912 0.9928 0.9928 0.993 0.9993 0.9965 0.9937 0.9913 0.9934 0.9952 0.9983 0.9957 0.9957 0.9916 0.9999 0.9999 0.9936 0.9972 0.9933 0.9934 0.9931 0.9976 0.9937 0.9937 0.991 0.9979 0.9971 0.9969 0.9968 0.9961 0.993 0.9973 0.9944 0.9986 0.9986 0.9986 0.9986 0.9972 0.9917 0.992 0.9932 0.9936 0.9915 0.9922 0.9934 0.9952 0.9972 0.9934 0.9958 0.9944 0.9908 0.9958 0.9925 0.9966 0.9972 0.9912 0.995 0.9928 0.9968 0.9955 0.9981 0.991 0.991 0.991 0.992 0.9931 0.997 0.9948 0.9923 0.9976 0.9938 0.9984 0.9972 0.9922 0.9935 0.9944 0.9942 0.9944 0.9997 0.9977 0.9912 0.9982 0.9982 0.9983 0.998 0.9894 0.9927 0.9917 0.9904 0.993 0.9941 0.9943 0.99855 0.99345 0.998 0.9916 0.9916 0.99475 0.99325 0.9933 0.9969 1.0002 0.9933 0.9937 0.99685 0.99455 0.9917 0.99035 0.9914 0.99225 0.99155 0.9954 0.99455 0.9924 0.99695 0.99655 0.9934 0.998 0.9971 0.9948 0.998 0.9971 0.99215 0.9948 0.9915 0.99115 0.9932 0.9977 0.99535 0.99165 0.9953 0.9928 0.9958 0.9928 0.9928 0.9964 0.9987 0.9953 0.9932 0.9907 0.99755 0.99935 0.9932 0.9932 0.9958 0.99585 1.00055 0.9985 0.99505 0.992 0.9988 0.99175 0.9962 0.9962 0.9942 0.9927 0.9927 0.99985 0.997 0.9918 0.99215 0.99865 0.9992 1.0006 0.99135 0.99715 0.9992 1.0006 0.99865 0.99815 0.99815 0.99815 0.9949 0.99815 0.99815 0.99225 0.99445 0.99225 0.99335 0.99625 0.9971 0.9983 0.99445 0.99085 0.9977 0.9953 0.99775 0.99795 0.99505 0.9977 0.9975 0.99745 0.9976 0.99775 0.9953 0.9932 0.99405 1 0.99785 0.9939 0.9939 0.99675 0.9939 0.99675 0.98965 0.9971 0.99445 0.9945 0.9939 0.9958 0.9956 0.99055 0.9959 0.9925 0.9963 0.9935 0.99105 0.99045 0.9963 0.99155 0.99085 0.99085 0.99085 0.9924 0.9924 0.99975 0.99975 0.99315 0.9917 0.9917 0.99845 0.9921 0.99975 0.9909 0.99315 0.99855 0.9934 0.9978 0.9934 0.9949 0.99855 0.9986 0.99725 0.9946 0.99255 0.9996 0.9939 0.99 0.9937 0.9886 0.9934 1 0.9994 0.9926 0.9956 0.9978 0.9915 0.9939 0.9932 0.993 0.9898 0.9921 0.9932 0.9919 0.993 0.9953 0.9928 0.9928 0.9976 0.9906 0.9918 0.99185 0.9918 0.99185 0.994 0.9908 0.9928 0.9896 0.9908 0.9918 0.9952 0.9923 0.9915 0.9952 0.9947 0.9983 0.9975 0.995 0.9944 0.994 0.9944 0.9908 0.99795 0.9985 0.99425 0.99425 0.9943 0.9924 0.9946 0.9924 0.995 0.9919 0.99 0.9923 0.9956 0.9978 0.9978 0.9967 0.9934 0.9936 0.9932 0.9934 0.998 0.9978 0.9929 0.9974 0.99685 0.99495 0.99745 0.99505 0.992 0.9978 0.9956 0.9982 0.99485 0.9971 0.99265 0.9904 0.9965 0.9946 0.99965 0.9935 0.996 0.9942 0.9936 0.9965 0.9928 0.9928 0.9965 0.9936 0.9938 0.9926 0.9926 0.9983 0.9983 0.992 0.9983 0.9923 0.9972 0.9928 0.9928 0.9994 0.991 0.9906 0.9894 0.9898 0.9994 0.991 0.9925 0.9956 0.9946 0.9966 0.9951 0.9927 0.9927 0.9951 0.9894 0.9907 0.9925 0.9928 0.9941 0.9941 0.9925 0.9935 0.9932 0.9944 0.9972 0.994 0.9956 0.9927 0.9924 0.9966 0.9997 0.9936 0.9936 0.9952 0.9952 0.9928 0.9911 0.993 0.9911 0.9932 0.993 0.993 0.9932 0.9932 0.9943 0.9968 0.9994 0.9926 0.9968 0.9932 0.9916 0.9946 0.9925 0.9925 0.9935 0.9962 0.9928 0.993 0.993 0.9956 0.9941 0.9972 0.9948 0.9955 0.9972 0.9972 0.9983 0.9942 0.9936 0.9956 0.9953 0.9918 0.995 0.992 0.9952 1.001 0.9924 0.9932 0.9937 0.9918 0.9934 0.991 0.9962 0.9932 0.9908 0.9962 0.9918 0.9941 0.9931 0.9981 0.9931 0.9944 0.992 0.9966 0.9956 0.9956 0.9949 1.0002 0.9942 0.9923 0.9917 0.9931 0.992 1.0002 0.9953 0.9951 0.9974 0.9904 0.9974 0.9944 1.0004 0.9952 0.9956 0.995 0.995 0.9995 0.9942 0.9977 0.992 0.992 0.9995 0.9934 1.0006 0.9982 0.9928 0.9945 0.9963 0.9906 0.9956 0.9942 0.9962 0.9894 0.995 0.9908 0.9914 0.9938 0.9977 0.9922 0.992 0.9903 0.9893 0.9952 0.9903 0.9912 0.9983 0.9937 0.9932 0.9928 0.9922 0.9976 0.9922 0.9974 0.998 0.9931 0.9911 0.9944 0.9937 0.9974 0.989 0.992 0.9928 0.9918 0.9936 0.9944 0.9988 0.994 0.9953 0.9986 0.9914 0.9934 0.996 0.9937 0.9921 0.998 0.996 0.9933 0.9933 0.9959 0.9936 0.9953 0.9938 0.9952 0.9959 0.9959 0.9937 0.992 0.9967 0.9944 0.9998 0.9998 0.9942 0.9998 0.9945 0.9998 0.9946 0.9942 0.9928 0.9946 0.9927 0.9938 0.9918 0.9945 0.9966 0.9954 0.9913 0.9931 0.9986 0.9965 0.9984 0.9952 0.9956 0.9949 0.9954 0.996 0.9931 0.992 0.9912 0.9978 0.9938 0.9914 0.9932 0.9944 0.9913 0.9948 0.998 0.9998 0.9964 0.9992 0.9948 0.9998 0.998 0.9939 0.992 0.9922 0.9955 0.9917 0.9917 0.9954 0.9986 0.9955 0.9917 0.9907 0.9922 0.9958 0.993 0.9917 0.9926 0.9959 0.9906 0.9993 0.993 0.9906 0.992 0.992 0.994 0.9959 0.9908 0.9902 0.9908 0.9943 0.9921 0.9911 0.9986 0.992 0.992 0.9943 0.9937 0.993 0.9902 0.9928 0.9896 0.998 0.9954 0.9938 0.9918 0.9896 0.9944 0.9999 0.9953 0.992 0.9925 0.9981 0.9952 0.9927 0.9927 0.9911 0.9936 0.9959 0.9946 0.9948 0.9955 0.9951 0.9952 0.9946 0.9946 0.9944 0.9938 0.9963 0.991 1.0003 0.9966 0.9993 1.0003 0.9938 0.9965 0.9938 0.9993 0.9938 1.0003 0.9966 0.9942 0.9928 0.991 0.9911 0.9977 0.9927 0.9911 0.991 0.9912 0.9907 0.9902 0.992 0.994 0.9966 0.993 0.993 0.993 0.9966 0.9942 0.9925 0.9925 0.9928 0.995 0.9939 0.9958 0.9952 1 0.9948 0.99 0.9958 0.9948 0.9949 0.997 0.9927 0.9938 0.9949 0.9953 0.997 0.9932 0.9927 0.9932 0.9955 0.9914 0.991 0.992 0.9924 0.9927 0.9911 0.9958 0.9928 0.9902 0.994 0.994 0.9972 1.0004 0.991 0.9918 0.995 0.9941 0.9956 0.9956 0.9959 0.9922 0.9931 0.9959 0.9984 0.9908 0.991 0.9928 0.9936 0.9941 0.9924 0.9917 0.9906 0.995 0.9956 0.9955 0.9907 1 0.9953 0.9911 0.9922 0.9951 0.9948 0.9906 0.994 0.9907 0.9927 0.9914 0.9958 1 0.9984 0.9941 0.9944 0.998 0.998 0.9902 0.9911 0.9929 0.993 0.9918 0.992 0.9932 0.992 0.994 0.9923 0.993 0.9956 0.9907 0.99 0.9918 0.9926 0.995 0.99 0.99 0.9946 0.9907 0.9898 0.9918 0.9986 0.9986 0.9928 0.9986 0.9979 0.994 0.9937 0.9938 0.9942 0.9944 0.993 0.9986 0.9932 0.9934 0.9928 0.9925 0.9944 0.9909 0.9932 0.9934 1.0001 0.992 0.9916 0.998 0.9919 0.9925 0.9977 0.9944 0.991 0.99 0.9917 0.9923 0.9928 0.9923 0.9928 0.9902 0.9893 0.9917 0.9982 1.0005 0.9923 0.9951 0.9956 0.998 0.9928 0.9938 0.9914 0.9955 0.9924 0.9911 0.9917 0.9917 0.9932 0.9955 0.9929 0.9955 0.9958 1.0012 0.9968 0.9911 0.9924 0.991 0.9946 0.9928 0.9946 0.9917 0.9918 0.9926 0.9931 0.9932 0.9903 0.9928 0.9929 0.9958 0.9955 0.9911 0.9938 0.9942 0.9945 0.9962 0.992 0.9927 0.9948 0.9945 0.9942 0.9952 0.9942 0.9958 0.9918 0.9932 1.0004 0.9972 0.9998 0.9918 0.9918 0.9964 0.9936 0.9931 0.9938 0.9934 0.99 0.9914 0.9904 0.994 0.9938 0.9933 0.9909 0.9942 0.9945 0.9954 0.996 0.9991 0.993 0.9942 0.9934 0.9939 0.9937 0.994 0.9926 0.9951 0.9952 0.9935 0.9938 0.9939 0.9933 0.9927 0.998 0.9997 0.9981 0.992 0.9954 0.992 0.9997 0.9981 0.9943 0.9941 0.9936 0.9996 0.9932 0.9926 0.9936 0.992 0.9936 0.9996 0.993 0.9924 0.9928 0.9926 0.9952 0.9945 0.9945 0.9903 0.9932 0.9953 0.9936 0.9912 0.9962 0.9965 0.9932 0.9967 0.9953 0.9963 0.992 0.991 0.9958 0.99 0.991 0.9958 0.9938 0.9996 0.9946 0.9974 0.9945 0.9946 0.9974 0.9957 0.9931 0.9947 0.9953 0.9931 0.9946 0.9978 0.9989 1.0004 0.9938 0.9934 0.9978 0.9956 0.9982 0.9948 0.9956 0.9982 0.9926 0.991 0.9945 0.9916 0.9953 0.9938 0.9956 0.9906 0.9956 0.9932 0.9914 0.9938 0.996 0.9906 0.98815 0.9942 0.9903 0.9906 0.9935 1.0024 0.9968 0.9906 0.9941 0.9919 0.9928 0.9958 0.9932 0.9957 0.9937 0.9982 0.9928 0.9919 0.9956 0.9957 0.9954 0.993 0.9954 0.9987 0.9956 0.9928 0.9951 0.993 0.9928 0.9926 0.9938 1.0001 0.9933 0.9952 0.9934 0.9988 0.993 0.9952 0.9948 0.9998 0.9971 0.9998 0.9962 0.9948 0.99 0.9942 0.9965 0.9912 0.9978 0.9928 1.0103 0.9956 0.9936 0.9929 0.9966 0.9964 0.996 0.9959 0.9954 0.9914 1.0103 1.0004 0.9911 0.9938 0.9927 0.9922 0.9924 0.9963 0.9936 0.9951 0.9951 0.9955 0.9961 0.9936 0.992 0.9944 0.9944 1.0008 0.9962 0.9986 0.9986 1 0.9986 0.9982 1 0.9949 0.9915 0.9951 0.9986 0.9927 0.9955 0.9952 0.9928 0.9982 0.9914 0.9927 0.9918 0.9944 0.9969 0.9955 0.9954 0.9955 0.9921 0.9934 0.9998 0.9946 0.9984 0.9924 0.9939 0.995 0.9957 0.9953 0.9912 0.9939 0.9921 0.9954 0.9933 0.9941 0.995 0.9977 0.9912 0.9945 0.9952 0.9924 0.9986 0.9953 0.9939 0.9929 0.9988 0.9906 0.9914 0.9978 0.9928 0.9948 0.9978 0.9946 0.9908 0.9954 0.9906 0.99705 0.9982 0.9932 0.9977 0.994 0.9982 0.9929 0.9924 0.9966 0.9921 0.9967 0.9934 0.9914 0.99705 0.9961 0.9967 0.9926 0.99605 0.99435 0.9948 0.9916 0.997 0.9961 0.9967 0.9961 0.9955 0.9922 0.9918 0.9955 0.9941 0.9955 0.9955 0.9924 0.9973 0.999 0.9941 0.9922 0.9922 0.9953 0.9945 0.9945 0.9957 0.9932 0.9945 0.9913 0.9909 0.9939 0.991 0.9954 0.9943 0.993 1.0002 0.9946 0.9953 0.9918 0.9936 0.9984 0.9956 0.9966 0.9942 0.9984 0.9956 0.9966 0.9974 0.9944 1.0008 0.9974 1.0008 0.9928 0.9944 0.9908 0.9917 0.9911 0.9912 0.9953 0.9932 0.9896 0.9889 0.9912 0.9926 0.9911 0.9964 0.9974 0.9944 0.9974 0.9964 0.9963 0.9948 0.9948 0.9953 0.9948 0.9953 0.9949 0.9988 0.9954 0.992 0.9984 0.9954 0.9926 0.992 0.9976 0.9972 0.991 0.998 0.9966 0.998 1.0007 0.992 0.9925 0.991 0.9934 0.9955 0.9944 0.9981 0.9968 0.9946 0.9946 0.9981 0.9946 0.997 0.9924 0.9958 0.994 0.9958 0.9984 0.9948 0.9932 0.9952 0.9924 0.9945 0.9976 0.9976 0.9938 0.9997 0.994 0.9921 0.9986 0.9987 0.9991 0.9987 0.9991 0.9991 0.9948 0.9987 0.993 0.9988 1 0.9932 0.9991 0.9989 1 1 0.9952 0.9969 0.9966 0.9966 0.9976 0.99 0.9988 0.9942 0.9984 0.9932 0.9969 0.9966 0.9933 0.9916 0.9914 0.9966 0.9958 0.9926 0.9939 0.9953 0.9906 0.9914 0.9958 0.9926 0.9991 0.9994 0.9976 0.9966 0.9953 0.9923 0.993 0.9931 0.9932 0.9926 0.9938 0.9966 0.9974 0.9924 0.9948 0.9964 0.9924 0.9966 0.9974 0.9938 0.9928 0.9959 1.0001 0.9959 1.0001 0.9968 0.9932 0.9954 0.9992 0.9932 0.9939 0.9952 0.9996 0.9966 0.9925 0.996 0.9996 0.9973 0.9937 0.9966 1.0017 0.993 0.993 0.9959 0.9958 1.0017 0.9958 0.9979 0.9941 0.997 0.9934 0.9927 0.9944 0.9927 0.9963 1.0011 1.0011 0.9959 0.9973 0.9966 0.9932 0.9984 0.999 0.999 0.999 0.999 0.999 1.0006 0.9937 0.9954 0.997 0.9912 0.9939 0.999 0.9957 0.9926 0.9994 1.0004 0.9994 1.0004 1.0004 1.0002 0.9922 0.9922 0.9934 0.9926 0.9941 0.9994 1.0004 0.9924 0.9948 0.9935 0.9918 0.9948 0.9924 0.9979 0.993 0.994 0.991 0.993 0.9922 0.9979 0.9937 0.9928 0.9965 0.9928 0.9991 0.9948 0.9925 0.9958 0.9962 0.9965 0.9951 0.9944 0.9916 0.9987 0.9928 0.9926 0.9934 0.9944 0.9949 0.9926 0.997 0.9949 0.9948 0.992 0.9964 0.9926 0.9982 0.9955 0.9955 0.9958 0.9997 1.0001 1.0001 0.9918 0.9918 0.9931 1.0001 0.9926 0.9966 0.9932 0.9969 0.9925 0.9914 0.996 0.9952 0.9934 0.9939 0.9939 0.9906 0.9901 0.9948 0.995 0.9953 0.9953 0.9952 0.996 0.9948 0.9951 0.9931 0.9962 0.9948 0.9959 0.9962 0.9958 0.9948 0.9948 0.994 0.9942 0.9942 0.9948 0.9964 0.9958 0.9932 0.9986 0.9986 0.9988 0.9953 0.9983 1 0.9951 0.9983 0.9906 0.9981 0.9936 0.9951 0.9953 1.0005 0.9972 1 0.9969 1.0001 1.0001 1.0001 0.9934 0.9969 1.0001 0.9902 0.993 0.9914 0.9941 0.9967 0.9918 0.998 0.9967 0.9918 0.9957 0.9986 0.9958 0.9948 0.9918 0.9923 0.9998 0.9998 0.9914 0.9939 0.9966 0.995 0.9966 0.994 0.9972 0.9998 0.9998 0.9982 0.9924 0.9972 0.997 0.9954 0.9962 0.9972 0.9921 0.9905 0.9998 0.993 0.9941 0.9994 0.9962 0.992 0.9922 0.994 0.9897 0.9954 0.99 0.9948 0.9922 0.998 0.9944 0.9944 0.9986 0.9986 0.9986 0.9986 0.9986 0.996 0.9999 0.9986 0.9986 0.996 0.9951 0.9999 0.993 0.9982 0.992 0.9963 0.995 0.9956 0.997 0.9936 0.9935 0.9963 0.9967 0.9912 0.9981 0.9966 0.9967 0.9963 0.9935 0.9902 0.99 0.996 0.9966 0.9962 0.994 0.996 0.994 0.9944 0.9974 0.996 0.9922 0.9917 0.9918 0.9936 0.9938 0.9918 0.9939 0.9917 0.9981 0.9941 0.9928 0.9952 0.9898 0.9914 0.9981 0.9957 0.998 0.9957 0.9986 0.9983 0.9982 0.997 0.9947 0.997 0.9947 0.99416 0.99516 0.99496 0.9974 0.99579 0.9983 0.99471 0.9974 0.99644 0.99579 0.99699 0.99758 0.9977 0.99397 0.9983 0.99471 0.99243 0.9962 1.00182 0.99384 0.99582 0.9962 0.9924 0.99466 0.99212 0.99449 0.99748 0.99449 0.99748 0.99475 0.99189 0.99827 0.99752 0.99827 0.99479 0.99752 0.99642 1.00047 0.99382 0.99784 0.99486 0.99537 0.99382 0.99838 0.99566 0.99268 0.99566 0.99468 0.9933 0.99307 0.99907 0.99907 0.99907 0.99907 0.99471 0.99471 0.99907 0.99148 0.99383 0.99365 0.99272 0.99148 0.99235 0.99508 0.9946 0.99674 0.99018 0.99235 0.99084 0.99856 0.99591 0.9975 0.9944 0.99173 0.99378 0.99805 0.99534 0.99232 0.99805 0.99078 0.99534 0.99061 0.99182 0.9966 0.9912 0.99779 0.99814 0.99096 0.99379 0.99426 0.99228 0.99335 0.99595 0.99297 0.99687 0.99297 0.99687 0.99445 0.9986 0.99154 0.9981 0.98993 1.00241 0.99716 0.99437 0.9972 0.99756 0.99509 0.99572 0.99756 0.99175 0.99254 0.99509 0.99676 0.9979 0.99194 0.99077 0.99782 0.99942 0.99708 0.99353 0.99256 0.99199 0.9918 0.99354 0.99244 0.99831 0.99396 0.99724 0.99524 0.9927 0.99802 0.99512 0.99438 0.99679 0.99652 0.99698 0.99474 0.99511 0.99582 0.99125 0.99256 0.9911 0.99168 0.9911 0.99556 1.00098 0.99516 0.99516 0.99518 0.99347 0.9929 0.99347 0.99841 0.99362 0.99361 0.9914 0.99114 0.9925 0.99453 0.9938 0.9938 0.99806 0.9961 1.00016 0.9916 0.99116 0.99319 0.99517 0.99514 0.99566 0.99166 0.99587 0.99558 0.99117 0.99399 0.99741 0.99405 0.99622 1.00051 0.99803 0.99405 0.99773 0.99397 0.99622 0.99713 0.99274 1.00118 0.99176 0.9969 0.99771 0.99411 0.99771 0.99411 0.99194 0.99558 0.99194 0.99558 0.99577 0.99564 0.99578 0.99888 1.00014 0.99441 0.99594 0.99437 0.99594 0.9979 0.99434 0.99203 0.998 0.99316 0.998 0.99314 0.99316 0.99612 0.99295 0.99394 0.99642 0.99642 0.99248 0.99268 0.99954 0.99692 0.99592 0.99592 0.99692 0.99822 0.99822 0.99402 0.99404 0.99787 0.99347 0.99838 0.99839 0.99375 0.99155 0.9936 0.99434 0.9922 0.99571 0.99658 0.99076 0.99496 0.9937 0.99076 0.99542 0.99825 0.99289 0.99432 0.99523 0.99542 0.9959 0.99543 0.99662 0.99088 0.99088 0.99922 0.9966 0.99466 0.99922 0.99836 0.99836 0.99238 0.99645 1 1 0.99376 1 0.99513 0.99556 0.99556 0.99543 0.99886 0.99526 0.99166 0.99691 0.99732 0.99573 0.99656 0.99112 0.99214 0.99165 0.99004 0.99463 0.99683 0.99004 0.99596 0.99898 0.99114 0.99508 0.99306 0.99898 0.99508 0.99114 0.99342 0.99345 0.99772 0.99239 0.99502 0.99502 0.99479 0.99207 0.99497 0.99828 0.99542 0.99542 0.99228 0.99706 0.99497 0.99669 0.99828 0.99269 0.99196 0.99662 0.99475 0.99544 0.99944 0.99475 0.99544 0.9966 0.99066 0.9907 0.99066 0.998 0.9907 0.99066 0.99307 0.99106 0.99696 0.99106 0.99307 0.99167 0.99902 0.98992 0.99182 0.99556 0.99582 0.99182 0.98972 0.99352 0.9946 0.99273 0.99628 0.99582 0.99553 0.98914 0.99354 0.99976 0.99808 0.99808 0.99808 0.99808 0.99808 0.99808 0.9919 0.99808 0.99499 0.99655 0.99615 0.99296 0.99482 0.99079 0.99366 0.99434 0.98958 0.99434 0.99938 0.99059 0.99835 0.98958 0.99159 0.99159 0.98931 0.9938 0.99558 0.99563 0.98931 0.99691 0.9959 0.99159 0.99628 0.99076 0.99678 0.99678 0.99678 0.99089 0.99537 1.0002 0.99628 0.99089 0.99678 0.99076 0.99332 0.99316 0.99272 0.99636 0.99202 0.99148 0.99064 0.99884 0.99773 1.00013 0.98974 0.99773 1.00013 0.99112 0.99136 0.99132 0.99642 0.99488 0.99527 0.99578 0.99352 0.99199 0.99198 0.99756 0.99578 0.99561 0.99347 0.98936 0.99786 0.99705 0.9942 0.9948 0.99116 0.99688 0.98974 0.99542 0.99154 0.99118 0.99044 0.9914 0.9979 0.98892 0.99114 0.99188 0.99583 0.98892 0.98892 0.99704 0.9911 0.99334 0.99334 0.99094 0.99014 0.99304 0.99652 0.98944 0.99772 0.99367 0.99304 0.99183 0.99126 0.98944 0.99577 0.99772 0.99652 0.99428 0.99388 0.99208 0.99256 0.99388 0.9925 0.99904 0.99216 0.99208 0.99428 0.99165 0.99924 0.99924 0.99924 0.9956 0.99562 0.9972 0.99924 0.9958 0.99976 0.99976 0.99296 0.9957 0.9958 0.99579 0.99541 0.99976 0.99518 0.99168 0.99276 0.99085 0.99873 0.99172 0.99312 0.99276 0.9972 0.99278 0.99092 0.9962 0.99053 0.99858 0.9984 0.99335 0.99053 0.9949 0.9962 0.99092 0.99532 0.99727 0.99026 0.99668 0.99727 0.9952 0.99144 0.99144 0.99015 0.9914 0.99693 0.99035 0.99693 0.99035 0.99006 0.99126 0.98994 0.98985 0.9971 0.99882 0.99477 0.99478 0.99576 0.99578 0.99354 0.99244 0.99084 0.99612 0.99356 0.98952 0.99612 0.99084 0.99244 0.99955 0.99374 0.9892 0.99144 0.99352 0.99352 0.9935 0.99237 0.99144 0.99022 0.99032 1.03898 0.99587 0.99587 0.99587 0.99976 0.99354 0.99976 0.99552 0.99552 0.99587 0.99604 0.99584 0.98894 0.9963 0.993 0.98894 0.9963 0.99068 0.98964 0.99604 0.99584 0.9923 0.99437 0.993 0.99238 0.99801 0.99802 0.99566 0.99067 0.99066 0.9929 0.9934 0.99067 0.98912 0.99066 0.99228 0.98912 0.9958 0.99052 0.99312 0.9968 0.99502 0.99084 0.99573 0.99256 0.9959 0.99084 0.99084 0.99644 0.99526 0.9954 0.99095 0.99188 0.9909 0.99256 0.9959 0.99581 0.99132 0.98936 0.99136 0.99142 0.99232 0.99232 0.993 0.99311 0.99132 0.98993 0.99208 0.99776 0.99839 0.99574 0.99093 0.99156 0.99278 0.9924 0.98984 0.99035 0.9924 0.99165 0.9923 0.99278 0.99008 0.98964 0.99156 0.9909 0.98984 0.9889 0.99178 0.99076 0.9889 0.99046 0.98999 0.98946 0.98976 0.99046 0.99672 0.99482 0.98945 0.98883 0.99362 0.99075 0.99436 0.98988 0.99158 0.99265 0.99195 0.99168 0.9918 0.99313 0.9895 0.9932 0.99848 0.9909 0.99014 0.9952 0.99652 0.99848 0.99104 0.99772 0.9922 0.99076 0.99622 0.9902 0.99114 0.9938 0.99594 0.9902 0.99035 0.99032 0.99558 0.99622 0.99076 0.99413 0.99043 0.99043 0.98982 0.98934 0.9902 0.99449 0.99629 0.9948 0.98984 0.99326 0.99834 0.99555 0.98975 0.99216 0.99216 0.99834 0.9901 0.98975 0.99573 0.99326 0.99215 0.98993 0.99218 0.99555 0.99564 0.99564 0.99397 0.99576 0.99601 0.99564 0.99397 0.98713 0.99308 0.99308 0.99582 0.99494 0.9929 0.99471 0.9929 0.9929 0.99037 0.99304 0.99026 0.98986 0.99471 0.98951 0.99634 0.99368 0.99792 0.99026 0.99362 0.98919 0.99835 0.99835 0.99038 0.99104 0.99038 0.99286 0.99296 0.99835 0.9954 0.9914 0.99286 0.99604 0.99604 0.99119 0.99007 0.99507 0.99596 0.99011 0.99184 0.99469 0.99469 0.99406 0.99305 0.99096 0.98956 0.9921 0.99496 0.99406 0.99406 0.9888 0.98942 0.99082 0.98802 17.3 1.4 1.3 1.6 5.25 2.4 14.6 11.8 1.5 1.8 7.7 2 1.8 1.4 16.7 8.1 8 4.7 8.1 2.1 16.7 6.4 1.5 7.6 1.5 12.4 1.3 1.7 8.1 7.1 7.6 2.3 6.5 1.4 12.7 1.6 1.1 1.2 6.5 4.6 0.6 10.6 4.6 4.8 2.7 12.6 0.6 9.2 6.6 7 8.45 11.1 18.15 18.15 4.1 4.1 4.6 18.15 4.9 8.3 1.4 11.5 1.8 1.6 2.4 4.9 1.8 4.3 4.4 1.4 1.6 1.3 5.2 5.6 5.3 4.9 2.4 1.6 2.1 1.4 7.1 1.6 10.7 11.1 10.7 1.6 1.6 1.5 1.5 1.6 1.6 8 7.7 2.7 15.1 15.1 8.9 6 12.3 13.1 6.7 12.3 2.3 11.1 1.5 6.7 6 15.2 10.2 13.1 10.7 17.1 17.1 17.1 1.9 10.7 17.1 1.2 1.2 3.1 1.5 10.7 4.9 12.6 10.7 4.9 12.15 12 1.7 2.6 1.4 1.9 16.9 16.9 2.1 7 7.1 5.9 7.1 8.7 13.2 15.3 15.3 13.2 2.7 10.65 10 6.8 15.6 13.2 5.1 3 15.3 2.1 1.9 8.6 8.75 3.6 4.7 1.3 1.8 9.7 4 2.4 4.7 18.8 1.8 1.8 12.8 12.8 12.8 12.8 12.8 7.8 16.75 12.8 12.8 7.8 5.4 16.75 1.3 10.1 3.8 10.9 6.6 9.8 11.7 1.2 1.4 9.6 12.2 2.6 10.7 4.9 12.2 9.6 1.4 1.1 1 8.2 11.3 7.3 2.3 8.2 2.1 2 10 15.75 3.9 2 1.5 1.6 1.4 1.5 1.4 2 13.8 1.3 3.8 6.9 2.2 1.6 13.8 10.8 12.8 10.8 15.3 12.1 12 11.6 9.2 11.6 9.2 2.8 1.6 6.1 8.5 7.8 14.9 6.2 8.5 8.2 7.8 10.6 11.2 11.6 7.1 14.9 6.2 1.7 7.7 17.3 1.4 7.7 7.7 3.4 1.6 1.4 1.4 10.4 1.4 10.4 4.1 2.8 15.7 10.9 15.7 6.5 10.9 5.9 17.3 1.4 13.5 8.5 6.2 1.4 14.95 7.7 1.3 7.7 1.3 1.3 1.3 15.6 15.6 15.6 15.6 4.9 5 15.6 6.5 1.4 2.7 1.2 6.5 6.4 6.9 7.2 10.6 3.5 6.4 2.3 12.05 7 11.8 1.4 5 2.2 14.6 1.6 1.3 14.6 2.8 1.6 3.3 6.3 8.1 1.6 10.6 11.8 1.7 8.1 1.4 1.3 1.8 7.2 1.1 11.95 1.1 11.95 2.2 12.7 1.4 10.6 1.9 17.8 10.2 4.8 9.8 8.4 7.2 4.8 8.4 4.5 1.4 7.2 11 11.1 2.6 2 10.1 13.3 11.4 1.3 1.4 1.4 7 2 1.2 12.9 5 10.1 3.75 1.7 12.6 1.3 1.6 7.6 8.1 14.9 6 6 7.2 3 1.2 2 4.9 2 8.9 16.45 2 1.9 5.1 4.4 5.8 4.4 12.9 1.3 1.3 1.2 2.7 1.7 8.2 1.5 1.5 12.9 3.9 17.75 4.9 1.6 1.4 2 2 8.2 2.1 1.8 8.5 4.45 5.8 13 2.7 7.3 19.1 8.8 2.7 7.4 2.3 6.85 11.4 0.9 19.35 7.9 11.75 7.7 3 7.7 3 1.5 7.5 1.5 7.5 8.3 7.05 8.4 13.9 17.5 5.6 9.4 4.8 9.4 9.7 6.3 1.6 14.6 2.5 14.6 2.6 2.5 8.2 1.5 2.3 10 10 1.6 1.6 16 10.4 7.4 7.4 10.4 16.05 16.05 2.6 2.5 10.8 1.2 12.1 11.95 1.7 0.8 1.4 1.3 6.3 10.3 15.55 1.5 1.5 1.4 1.5 7.9 13 1 4.85 7.1 7.9 7.5 7.6 10.3 1.7 1.7 19.95 7.7 5.3 19.95 12.7 12.7 1.5 11.3 18.1 18.1 7 18.1 6.4 1.4 1.4 3.1 14.1 7.7 5.2 11.6 10.4 7.5 11.2 0.8 1.4 4.7 3.1 4 11.3 3.1 8.1 14.8 1.4 8.1 3.5 14.8 8.1 1.4 1.5 1.5 12.8 1.6 7.1 7.1 11.2 1.7 6.7 17.3 8.6 8.6 1.5 12.1 6.7 10.7 17.3 1.8 1.4 7.5 4.8 7.1 16.9 4.8 7.1 11.3 1.1 1.2 1.1 12.9 1.2 1.1 1.2 2.3 10 2.3 1.2 1.4 14.9 1.8 1.8 7 8.6 1.8 1.1 1.3 4.9 1.9 10.4 10 8.6 1.7 1.7 18.95 12.8 12.8 12.8 12.8 12.8 12.8 0.7 12.8 1.4 13.3 8.5 1.5 11.7 5 1.2 2.1 1.4 2.1 16 1.1 15.3 1.4 2.8 2.8 0.9 2.5 8.1 8.2 0.9 11.1 7.8 2.8 10.1 3.2 14.2 14.2 14.2 2.9 6 20.4 10.1 2.9 14.2 3.2 0.95 1.7 1.7 9 1.3 1.4 2.4 16 11.4 14.35 2.1 11.4 14.35 1.1 1.1 1.2 15.8 5.2 5.2 9.6 5.2 1.2 0.8 14.45 9.6 6.9 3.4 2.3 11 5.95 5.1 5.4 1.2 12.6 1 6.6 1.5 1 1.1 6.6 8.2 2 1.4 2 7.5 2 2 13.3 2.85 5.6 5.6 1 3.2 1 7.1 2.4 11.2 9.5 1 1.8 2.6 2.4 8 11.2 7.1 3.3 10.3 1.2 1.6 10.3 9.65 16.4 1.5 1.2 3.3 5 16.3 16.3 16.3 6.5 6.4 10.2 16.3 7.4 13.7 13.7 1.3 7.4 7.4 7.45 7.2 13.7 10.4 1.1 6.5 4.6 13.9 5.2 1.7 6.5 16.4 3.6 1.5 12.4 1.7 6.2 6.2 2.6 1.7 9.3 12.4 1.5 9.1 12 4.8 12.3 12 2.7 3.6 3.6 4.3 1.8 11.8 1.8 11.8 1.8 1.4 6.6 1.55 0.7 6.4 11.8 4.3 5.1 5.8 5.9 1.3 1.4 1.2 7.4 10.8 1.8 7.4 1.2 1.4 14.4 1.7 3.6 3.6 10.05 10.05 10.5 1.9 3.6 1.65 1.9 65.8 6.85 7.4 7.4 20.2 11 20.2 6.2 6.2 6.85 8 8.2 2.2 10.1 7.2 2.2 10.1 1.6 1.3 8 8.2 5.3 14 7.2 1.6 11.8 9.6 6.1 2.7 3.6 1.7 1.6 2.7 1 0.9 1.6 1 10.6 2 1.2 6.2 9.2 5 6.3 3.3 8 1.2 1.2 16.2 11.6 7.2 1.1 3.4 1.4 3.3 8 9.3 2.3 0.9 3.5 1.7 1.3 1.3 5.6 7.4 2.3 1 1.5 10 14.9 9.3 1 1 5.9 5 1.25 3.9 5 0.8 1 5.9 1.6 1.3 1 1.1 1.25 1.4 1.2 5 1.4 1.7 1.8 1.6 1.5 1.7 13.9 5.9 2.1 1.1 6.7 2.7 6.7 3.95 7.75 10.6 1.6 2.5 0.7 11.1 5.15 4.7 9.7 1.7 1.4 2 7.5 9.7 0.8 13.1 1.1 2.2 8.9 1.1 0.9 1.7 6.9 1.1 1 1 7.6 8.9 2.2 1.2 1 1 3.1 1.95 2.2 8.75 11.9 2.7 5.45 6.3 14.4 7.8 1.6 9.1 9.1 14.4 1.3 1.6 11.3 6.3 0.7 1.25 0.7 7.8 10.3 10.3 7.8 8.7 8.3 10.3 7.8 1.2 8.3 8.3 6.2 5 1.8 1.6 1.8 1.8 2.9 6 0.9 1.1 1.6 5.45 14.05 8 13.1 4.9 1.3 2.2 14.9 14.9 0.95 1.4 0.95 1.7 5.6 14.9 7.1 1.2 9.6 11.4 11.4 7.9 5 11.1 8 3.8 10.55 10.2 10.2 9.8 6.3 1.1 4.5 6.3 10.9 9.8 9.8 0.8 0.8 1.2 1.3 9.8 10.2 10.9 6.3 6.3 1.2 0.9 1.1 4.5 3.7 18.1 1.35 5.5 3.1 12.85 19.8 8.25 12.85 3.8 6.9 8.25 11.7 4.6 4 19.8 12.85 1.2 8.9 11.7 6.2 14.8 14.8 10.8 1.6 8.3 8.4 2.5 3.5 17.2 2.1 12.2 11.8 16.8 17.2 1.1 14.7 5.5 6.1 1.2 1.3 8.7 1.7 8.7 10.2 4.5 5.9 1.7 1.4 5.4 7.9 1.1 7 7 7.6 7 12.3 15.3 12.3 1.2 2.3 6.1 7.6 10.2 4.1 2.9 8.5 1.5 3.1 7.9 3.5 4.9 1.1 7 1.2 4.5 2.6 9.9 4.5 9.5 1.5 3.2 2.6 11.2 3.2 2.3 4.9 4.9 1.4 1.5 6.7 2.1 4.3 10.9 7 2.3 2.5 2.6 3.2 2.5 14.7 4.5 2.2 1.9 1.6 17.3 4.2 4.2 2.5 1.9 1.4 0.8 8 1.6 1.7 5.5 17.3 8.6 6.9 2.1 2.2 1.5 2.5 17.6 4.2 2.9 4.8 11.9 0.9 1.3 6.4 4.3 11.9 8.1 1.3 0.9 17.2 17.2 17.2 8.7 17.2 8.7 7.5 17.2 4.6 3.7 2.2 7.4 15.1 7.4 4.8 7.9 1 15.1 7.4 4.8 4.6 1.4 6.2 6.1 5.1 6.3 0.9 2.3 6.6 7.5 8.6 11.9 2.3 7.1 4.3 1.1 1 7.9 1 1 1 7.3 1.7 1.3 6.4 1.8 1.5 3.8 7.9 1 1.2 5.3 9.1 6.5 9.1 6.3 5.1 6.5 2.4 9.1 7.5 5 6.75 1.2 1.6 16.05 5 12.4 0.95 4.6 1.7 1 1.3 5 2.5 2.6 2.1 12.75 1.1 12.4 3.7 2.65 2.5 8.2 7.3 1.1 6.6 7 14.5 11.8 3 3.7 6 4.6 2.5 3.3 1 1.1 1.4 3.3 8.55 2.5 6.7 3.8 4.5 4.6 4.2 11.3 5.5 4.2 2.2 14.5 14.5 14.5 14.5 14.5 14.5 1.5 18.75 3.6 1.4 5.1 10.5 2 2.6 9.2 1.8 5.7 2.4 1.9 1.4 0.9 4.6 1.4 9.2 1.4 1.8 2.3 2.3 4.4 6.4 2.9 2.8 2.9 4.4 8.2 1 2.9 7 1.8 1.5 7 8.2 7.6 2.3 8.7 1 2.9 6.7 5 1.9 2 1.9 8.5 12.6 5.2 2.1 1.1 1.3 1.1 9.2 1.2 1.1 8.3 1.8 1.4 15.7 4.35 1.8 1.6 2 5 1.8 1.3 1 1.4 8.1 8.6 3.7 5.7 2.35 13.65 13.65 13.65 15.2 4.6 1.2 4.6 6.65 13.55 13.65 9.8 10.3 6.7 15.2 9.9 7.2 1.1 8.3 11.25 12.8 9.65 12.6 12.2 8.3 11.25 1.3 9.9 7.2 1.1 1.1 4.8 1.1 1.4 1.7 10.6 1.4 1.1 5.55 2.1 1.7 9 1.7 1.8 4.7 11.3 3.6 6.9 3.6 4.9 6.95 1.9 4.7 11.3 1.8 11.3 8.2 8.3 9.55 8.4 7.8 7.8 10.2 5.5 7.8 7.4 3.3 5 3.3 5 1.3 1.2 7.4 7.8 9.9 0.7 4.6 5.6 9.5 14.8 4.6 2.1 11.6 1.2 11.6 2.1 20.15 4.7 4.3 14.5 4.9 14.55 14.55 10.05 4.9 14.5 14.55 15.25 3.15 1.3 5.2 1.1 7.1 8.8 18.5 8.8 1.4 1.2 5 1.6 18.75 6 9.4 9.7 4.75 6 5.35 5.35 6.8 6.9 1.4 0.9 1.2 1.3 2.6 12 9.85 3.85 2 1.6 7.8 1.9 2 10.3 1.1 12 3.85 9.85 2 4 1.1 10.4 6.1 1.8 10.4 4.7 4 1.1 6.4 8.15 6.1 4.8 1.2 1.1 1.4 7.4 1.8 1 15.5 15.5 8.4 2.4 3.95 19.95 2 3 15.5 8.4 14.3 4.2 1.4 3 4.9 2.4 14.3 10.7 11 1.4 1.2 12.9 10.8 1.3 2 1.8 1.2 7.5 9.7 3.8 7.2 9.7 6.3 6.3 0.8 8.6 6.3 3.1 7.2 7.1 6.4 14.7 7.2 7.1 1.9 1.2 4.8 1.2 3.4 4.3 8.5 1.8 1.8 19.5 8.5 19.9 8.3 1.8 1.1 16.65 16.65 16.65 0.9 6.1 10.2 0.9 16.65 3.85 4.4 4.5 3.2 4.5 4.4 9.7 4.2 4.2 1.1 9.7 4.2 5.6 4.2 1.6 1.6 1.1 14.6 2.6 1.2 7.25 6.55 7 1.5 1.4 7.25 1 4.2 17.5 17.5 17.5 1.5 1.3 3.9 4.2 7.6 1 1.1 11.8 1.4 9.7 12.9 1.6 7.2 7.1 1.9 8.8 7.2 1.4 14.3 14.3 8.8 1.4 1.8 14.3 7.2 1.2 11.8 0.9 12.6 26.05 4.7 12.6 1.2 26.05 6.1 11.8 0.9 5.6 5.3 5.7 8 8 17.6 8 8.8 1.5 1.4 4.8 2.4 3.7 4.9 5.7 5.7 4.9 2 5.1 4.5 3.2 6.65 1.6 4 17.75 1.4 17.75 7.2 5.7 8.5 11.4 5.4 2.7 4.3 1.2 1.8 1.3 5.7 2.7 11.7 4.3 11 1.6 11.6 6.2 1.8 1.2 1 2.4 1.2 8.2 18.8 9.6 12.9 9.2 1.2 12.9 8 12.9 1.6 12 2.5 9.2 4.4 8.8 9.6 8 18.8 1.3 1.2 12.9 1.2 1.6 1.5 18.15 13.1 13.1 13.1 13.1 1 1.6 11.8 1.4 1 13.1 10.6 10.4 1.1 7.4 1.2 3.4 18.15 8 2.5 2 2 6.9 1.2 9.4 2.9 6.9 5.4 1.3 20.8 10.3 1.3 1.6 13.1 1.8 8 1.6 1.4 14.7 14.7 14.7 14.7 14.7 14.7 14.7 1.8 10.6 12.5 6.8 14.7 2.9 1.4 1.4 2.1 7.4 2.9 1.4 1.4 7.4 5 2.5 6.1 2.7 2.1 12.9 12.9 12.9 13.7 12.9 2.4 9.8 13.7 1.3 12.1 6.1 7.7 6.1 1.4 7.7 12.1 6.8 9.2 8.3 17.4 2.7 12.8 8.2 8.1 8.2 8.3 8 11.8 12 1.7 17.4 13.9 10.7 2 2.2 1.3 1.1 2 6.4 1.3 1.1 10.7 6.4 6.3 6.4 15.1 2 2 2.2 12.1 8.8 8.8 5.1 6.8 6.8 3.7 12.2 5.7 8.1 2.5 4 6.8 1 5.1 5.8 10.6 3.5 3.5 16.4 4.8 3.3 1.2 1.2 4.8 3.3 2.5 8.7 1.6 4 2.5 16.2 9 16.2 1.4 7 9 3.1 1.5 4.6 4.8 4.6 1.5 2.7 6.3 7.2 7.2 12.4 6.6 6.6 4 4.8 1.3 7.2 11.1 12.4 9.8 6.6 13.3 11.7 8 1.6 16.55 1.5 10.2 6.6 17.8 17.8 1.5 7.4 17.8 2 7.4 2 17.8 12.1 8.2 1.5 8.7 3.5 6.4 2.1 7.7 12.3 1.3 8.7 3.5 1.1 2.8 3.5 1.9 3.8 3.8 2.4 4.8 4.8 6.2 1.3 3.8 1.5 4.8 1.9 6.2 7.9 1.6 1.4 2.6 14.8 2.4 0.9 0.9 1.2 9.9 3.9 15.6 15.6 1.5 1.6 7.8 5.6 1.3 16.7 7.95 6.7 1.1 6.3 8.9 1 1.5 6.6 6.2 6.3 2.1 2.2 5.4 8.9 1 17.9 2.6 1.3 17.9 2.6 2.3 4.3 7.1 7.1 11.9 11.7 5.8 3.8 12.4 6.5 7.1 7.6 7.9 2.8 10.6 2.8 1.5 7.6 7.9 1.7 7.6 7.5 1.7 1.7 12.1 4.5 1.7 8 7.6 8.6 8.6 14.6 1.6 8.6 14.6 1.1 3.7 8.9 8.9 4.7 8.9 3.1 5.8 5.8 5.8 1 15.8 1.5 5.2 1.5 2.5 1 15.8 5.9 3.1 3.1 5.8 11.5 18 4.8 8.5 1.6 18 4.8 5.9 1.1 8.5 13.1 4.1 2.9 13.1 1.1 1.5 7.75 1.15 1 17.8 5.7 17.8 7.4 1.4 1.4 1 4.4 1.6 7.9 15.5 15.5 15.5 15.5 17.55 13.5 13.5 1.3 15.5 11.6 7.9 15.5 17.55 11.6 13.15 1.9 13.5 1.3 6.1 6.1 1.9 1.9 1.6 11.3 8.4 8.3 8.4 12.2 8 1.3 12.7 1.3 10.5 12.5 9.6 1.5 1.5 7.8 10.8 12.5 8.6 1.2 14.5 3.7 1.1 1.1 3.8 4.6 10.2 7.9 2.4 10.7 4.9 10.7 1.1 7.9 5.6 2.4 14.2 9.5 9.5 4.1 4.7 1.4 0.9 20.3 3.5 2.7 1.2 1.2 2 1.1 1.5 1.2 18.1 18.1 3.6 3.5 12.1 17.45 12.1 3 1.6 5.7 5.6 6.8 15.6 6 1.8 8.6 8.6 11.5 7.8 2.4 5 8.6 1.5 5.4 11.9 11.9 9 10 11.9 11.9 15.5 5.4 15 1.4 9.4 3.7 15 1.4 6.5 1.4 6.3 13.7 13.7 13.7 13.7 13.7 13.7 1.5 1.6 1.4 3.5 1 1.4 1.5 13.7 1.6 5.2 1.4 11.9 2.4 3.2 1.7 4.2 15.4 13 5.6 9.7 2.5 4 15.4 1.2 2 1.2 5.1 1.4 1.2 6.5 1.3 6.5 2.7 1.3 7.4 12.9 1.3 1.2 2.6 2.3 1.3 10.5 2.6 14.4 1.2 3.1 1.7 6 11.8 6.2 1.4 12.1 12.1 12.1 3.9 4.6 12.1 1.2 8.1 3.9 1.1 6.5 10.1 10.7 3.2 12.4 5.2 5 2.5 9.2 6.9 2 15 15 1.2 15 1.8 10.8 3.9 4.2 2 13.5 13.3 2.2 1.4 1.6 2.2 14.8 1.8 14.8 1.3 9.9 5.1 5.1 1.5 1.5 11.1 5.25 2.3 7.9 8 1.4 5.25 2.3 2.3 3.5 13.7 9.9 15.4 16 16 16 16 2.4 5.5 2.3 16.8 16 17.8 17.8 6.8 6.8 6.8 6.8 1.6 4.7 11.8 17.8 15.7 5.8 15.7 9 15.7 5.8 8.8 10.2 6.6 6.5 8.9 11.1 4.2 1.6 7.4 11.5 1.6 2 4.8 9.8 1.9 4.2 1.6 7.3 5.4 10.4 1.9 7.3 5.4 7.7 11.5 1.2 2.2 1 8.2 8.3 8.2 9.3 8.1 8.2 8.3 13.9 13.9 13.9 13.9 13.9 13.9 13.9 2 13.9 15.7 1.2 1.5 1.2 3.2 1.2 2.6 13.2 10.4 5.7 2.5 1.6 1.4 7.4 2.5 5.6 3.6 7.5 5.8 1.6 1.5 2.9 11.2 9.65 10.1 3.2 11.2 11.45 9.65 4.5 2.7 3.5 1.7 2.1 4.8 5 2.6 6.6 5 7.3 5 1.7 2.6 8.2 8.2 5 1.2 7.1 9.5 15.8 15.5 15.8 17.05 12.7 12.3 11.8 11.8 11.8 12.3 11.8 13.6 5.2 6.2 7.9 7.9 3.3 2.8 7.9 3.3 6.3 4.9 10.4 4.9 10.4 16 6.3 2.2 17.3 17.3 17.3 17.3 2.2 2.2 17.3 6.6 6.5 12.3 5 2.8 13.6 2.8 5.4 10.9 1.7 9.15 4.5 9.15 1.4 5.9 16.4 1.2 16.4 5.9 7.8 7.8 2.8 2.9 2.5 12.8 12.2 7.7 2.8 2.9 17.3 19.3 19.3 19.3 2.7 6.4 17.3 2.4 2.8 1.7 15.4 15.4 4.1 6.6 1.2 2.1 1 1.1 1.4 1.6 9.8 1.9 1.3 7.9 7.9 4.5 22.6 7.9 3.5 1.2 4.5 2 7.8 0.9 2.9 2.9 3.5 4.2 9.7 10.5 1.1 16.1 1.1 8.1 6.2 7.7 2.4 16.3 2.3 8.4 8.5 6 1.1 1.75 2.6 1.3 2.1 1.1 1.1 2.8 9 2.8 2.2 5.1 3.5 12.7 7.5 2 3.5 14.3 9.8 12.7 12.7 5.1 3.5 12.7 12.9 12.9 1.3 10.5 1.5 12.7 12.9 1.2 6.2 8.8 3.9 1.3 9.1 9.1 3.9 1.8 2.1 1.4 14.7 9.1 1.9 1.8 9.6 3.9 1.3 11.8 1.9 12 7.9 9.3 4.6 2.2 10.2 10.6 1.4 9.1 11.1 9.1 4.4 2.8 1.1 1.3 1.2 3.3 9.7 2.3 1.1 11.4 1.2 14.7 13.8 1.3 6.3 7.9 2 11.8 1.2 10 5.2 1.2 7.2 9.9 5.3 13.55 2.2 9.9 4.3 13 13.55 1 1.1 6.9 13.4 4.6 9.9 3 5.8 12.9 3.2 0.8 2.5 2.4 7.2 7.3 6.3 4.25 1.2 2 4.25 4.7 4.5 1.4 4.1 5.3 4.2 6.65 8.2 2.6 2.6 2 12.2 2.3 8.2 5 10.7 10.8 1.7 1.3 1.7 12.7 1.3 1.2 1.3 5.7 3.4 1.1 1 1 1.65 6.8 6.8 4.9 1.4 2.5 10.8 10.8 10.8 10.8 2.8 1.3 2 1.1 8.2 6 6.1 8.2 8.8 6.1 6 1.2 11.4 1.3 1.3 6.2 3.2 4.5 9.9 6.2 11.4 1.3 1.3 0.9 0.7 1 1 10.4 1.3 12.5 12.5 12.5 12.5 19.25 1.1 12.5 19.25 9 1.2 9 1.3 12.8 12.8 7.6 7.6 1.4 8.3 9 1.85 12.55 1.4 1.8 4 12.55 9 3 1.85 7.9 2.6 1.2 7.1 7.9 1.3 10.7 7.7 8.4 10.7 12.7 1.8 7.7 10.5 1.6 1.85 10.5 10.5 1 1.2 1.7 1.6 9 1.9 1.2 1.5 3.9 3.6 1.2 5 2.9 10.4 11.4 18.35 18.4 1.2 7.1 1.3 1.5 10.2 2.2 3.5 3.5 3.9 7.4 7.4 11 1.5 3.9 5.4 1.5 5 1.2 13 13 13 13 8.6 1.7 1.2 1.2 1.2 2 19.4 0.8 6.3 6.4 12.1 12.1 12.9 2.4 4.3 4.2 12.9 1.7 2.2 12.1 3.4 7.4 7.3 1.1 1.1 1.4 14.5 8 1.1 1.1 2.2 5.8 0.9 6.4 10.9 7.3 8.3 1.3 3.3 1 1.1 1 5.1 3.2 12.6 3.7 1.7 5.1 1 1.3 1.5 4.6 10.3 6.1 6.1 1.2 10.3 9.9 1.6 1.1 1.5 1.2 1.5 1.1 11.5 7.8 7.4 1.45 8.9 1.1 1 2.5 1.1 2.4 2.3 5.1 2.5 8.9 2.5 8.9 1.6 1.4 3.9 13.7 13.7 9.2 7.8 7.6 7.7 3 1.3 4 1.1 2 1.9 1.4 4.5 10.1 6.6 1.9 12.4 1.6 2.5 1.2 2.5 0.8 0.9 8.1 8.1 11.75 1.3 1.9 8.3 8.1 5.7 1.9 1.2 11.75 2.2 0.9 1.3 1.6 8 1.2 1.1 0.8 \ No newline at end of file diff --git a/pandas/tests/reshape/merge/__init__.py b/pandas/tests/reshape/merge/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/reshape/merge/data/allow_exact_matches.csv b/pandas/tests/reshape/merge/data/allow_exact_matches.csv new file mode 100644 index 00000000..0446fb74 --- /dev/null +++ b/pandas/tests/reshape/merge/data/allow_exact_matches.csv @@ -0,0 +1,28 @@ +time,ticker,price,quantity,marketCenter,bid,ask +20160525 13:30:00.023,MSFT,51.95,75,NASDAQ,, +20160525 13:30:00.038,MSFT,51.95,155,NASDAQ,51.95,51.95 +20160525 13:30:00.048,GOOG,720.77,100,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.92,100,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,200,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,300,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,600,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,44,NASDAQ,720.5,720.93 +20160525 13:30:00.074,AAPL,98.67,478343,NASDAQ,, +20160525 13:30:00.075,AAPL,98.67,478343,NASDAQ,, +20160525 13:30:00.075,AAPL,98.66,6,NASDAQ,, +20160525 13:30:00.075,AAPL,98.65,30,NASDAQ,, +20160525 13:30:00.075,AAPL,98.65,75,NASDAQ,, +20160525 13:30:00.075,AAPL,98.65,20,NASDAQ,, +20160525 13:30:00.075,AAPL,98.65,35,NASDAQ,, +20160525 13:30:00.075,AAPL,98.65,10,NASDAQ,, +20160525 13:30:00.075,AAPL,98.55,6,ARCA,, +20160525 13:30:00.075,AAPL,98.55,6,ARCA,, +20160525 13:30:00.076,AAPL,98.56,1000,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,300,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,400,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,600,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 +20160525 13:30:00.078,MSFT,51.95,783,NASDAQ,51.95,51.95 +20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.95,51.95 +20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.95,51.95 diff --git a/pandas/tests/reshape/merge/data/allow_exact_matches_and_tolerance.csv b/pandas/tests/reshape/merge/data/allow_exact_matches_and_tolerance.csv new file mode 100644 index 00000000..0446fb74 --- /dev/null +++ b/pandas/tests/reshape/merge/data/allow_exact_matches_and_tolerance.csv @@ -0,0 +1,28 @@ +time,ticker,price,quantity,marketCenter,bid,ask +20160525 13:30:00.023,MSFT,51.95,75,NASDAQ,, +20160525 13:30:00.038,MSFT,51.95,155,NASDAQ,51.95,51.95 +20160525 13:30:00.048,GOOG,720.77,100,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.92,100,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,200,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,300,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,600,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,44,NASDAQ,720.5,720.93 +20160525 13:30:00.074,AAPL,98.67,478343,NASDAQ,, +20160525 13:30:00.075,AAPL,98.67,478343,NASDAQ,, +20160525 13:30:00.075,AAPL,98.66,6,NASDAQ,, +20160525 13:30:00.075,AAPL,98.65,30,NASDAQ,, +20160525 13:30:00.075,AAPL,98.65,75,NASDAQ,, +20160525 13:30:00.075,AAPL,98.65,20,NASDAQ,, +20160525 13:30:00.075,AAPL,98.65,35,NASDAQ,, +20160525 13:30:00.075,AAPL,98.65,10,NASDAQ,, +20160525 13:30:00.075,AAPL,98.55,6,ARCA,, +20160525 13:30:00.075,AAPL,98.55,6,ARCA,, +20160525 13:30:00.076,AAPL,98.56,1000,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,300,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,400,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,600,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 +20160525 13:30:00.078,MSFT,51.95,783,NASDAQ,51.95,51.95 +20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.95,51.95 +20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.95,51.95 diff --git a/pandas/tests/reshape/merge/data/asof.csv b/pandas/tests/reshape/merge/data/asof.csv new file mode 100644 index 00000000..d7d061bc --- /dev/null +++ b/pandas/tests/reshape/merge/data/asof.csv @@ -0,0 +1,28 @@ +time,ticker,price,quantity,marketCenter,bid,ask +20160525 13:30:00.023,MSFT,51.95,75,NASDAQ,51.95,51.95 +20160525 13:30:00.038,MSFT,51.95,155,NASDAQ,51.95,51.95 +20160525 13:30:00.048,GOOG,720.77,100,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.92,100,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,200,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,300,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,600,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,44,NASDAQ,720.5,720.93 +20160525 13:30:00.074,AAPL,98.67,478343,NASDAQ,, +20160525 13:30:00.075,AAPL,98.67,478343,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.66,6,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,30,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,75,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,20,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,35,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,10,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.55,6,ARCA,98.55,98.56 +20160525 13:30:00.075,AAPL,98.55,6,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,1000,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,300,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,400,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,600,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 +20160525 13:30:00.078,MSFT,51.95,783,NASDAQ,51.92,51.95 +20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.92,51.95 +20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.92,51.95 diff --git a/pandas/tests/reshape/merge/data/asof2.csv b/pandas/tests/reshape/merge/data/asof2.csv new file mode 100644 index 00000000..2c9c0392 --- /dev/null +++ b/pandas/tests/reshape/merge/data/asof2.csv @@ -0,0 +1,78 @@ +time,ticker,price,quantity,marketCenter,bid,ask +20160525 13:30:00.023,MSFT,51.95,75,NASDAQ,51.95,51.95 +20160525 13:30:00.038,MSFT,51.95,155,NASDAQ,51.95,51.95 +20160525 13:30:00.048,GOOG,720.77,100,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.92,100,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,200,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,300,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,600,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,44,NASDAQ,720.5,720.93 +20160525 13:30:00.074,AAPL,98.67,478343,NASDAQ,, +20160525 13:30:00.075,AAPL,98.67,478343,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.66,6,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,30,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,75,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,20,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,35,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,10,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.55,6,ARCA,98.55,98.56 +20160525 13:30:00.075,AAPL,98.55,6,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,1000,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,300,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,400,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,600,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 +20160525 13:30:00.078,MSFT,51.95,783,NASDAQ,51.92,51.95 +20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.92,51.95 +20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.92,51.95 +20160525 13:30:00.084,AAPL,98.64,40,NASDAQ,98.55,98.56 +20160525 13:30:00.084,AAPL,98.55,149,EDGX,98.55,98.56 +20160525 13:30:00.086,AAPL,98.56,500,ARCA,98.55,98.63 +20160525 13:30:00.104,AAPL,98.63,647,EDGX,98.62,98.63 +20160525 13:30:00.104,AAPL,98.63,300,EDGX,98.62,98.63 +20160525 13:30:00.104,AAPL,98.63,50,NASDAQ,98.62,98.63 +20160525 13:30:00.104,AAPL,98.63,50,NASDAQ,98.62,98.63 +20160525 13:30:00.104,AAPL,98.63,70,NASDAQ,98.62,98.63 +20160525 13:30:00.104,AAPL,98.63,70,NASDAQ,98.62,98.63 +20160525 13:30:00.104,AAPL,98.63,1,NASDAQ,98.62,98.63 +20160525 13:30:00.104,AAPL,98.63,62,NASDAQ,98.62,98.63 +20160525 13:30:00.104,AAPL,98.63,10,NASDAQ,98.62,98.63 +20160525 13:30:00.104,AAPL,98.63,100,ARCA,98.62,98.63 +20160525 13:30:00.105,AAPL,98.63,100,ARCA,98.62,98.63 +20160525 13:30:00.105,AAPL,98.63,700,ARCA,98.62,98.63 +20160525 13:30:00.106,AAPL,98.63,61,EDGX,98.62,98.63 +20160525 13:30:00.107,AAPL,98.63,100,ARCA,98.62,98.63 +20160525 13:30:00.107,AAPL,98.63,53,ARCA,98.62,98.63 +20160525 13:30:00.108,AAPL,98.63,100,ARCA,98.62,98.63 +20160525 13:30:00.108,AAPL,98.63,839,ARCA,98.62,98.63 +20160525 13:30:00.115,AAPL,98.63,5,EDGX,98.62,98.63 +20160525 13:30:00.118,AAPL,98.63,295,EDGX,98.62,98.63 +20160525 13:30:00.118,AAPL,98.63,5,EDGX,98.62,98.63 +20160525 13:30:00.128,AAPL,98.63,100,NASDAQ,98.62,98.63 +20160525 13:30:00.128,AAPL,98.63,100,NASDAQ,98.62,98.63 +20160525 13:30:00.128,MSFT,51.92,100,ARCA,51.92,51.95 +20160525 13:30:00.129,AAPL,98.62,100,NASDAQ,98.61,98.63 +20160525 13:30:00.129,AAPL,98.62,10,NASDAQ,98.61,98.63 +20160525 13:30:00.129,AAPL,98.62,59,NASDAQ,98.61,98.63 +20160525 13:30:00.129,AAPL,98.62,31,NASDAQ,98.61,98.63 +20160525 13:30:00.129,AAPL,98.62,69,NASDAQ,98.61,98.63 +20160525 13:30:00.129,AAPL,98.62,12,NASDAQ,98.61,98.63 +20160525 13:30:00.129,AAPL,98.62,12,EDGX,98.61,98.63 +20160525 13:30:00.129,AAPL,98.62,100,ARCA,98.61,98.63 +20160525 13:30:00.129,AAPL,98.62,100,ARCA,98.61,98.63 +20160525 13:30:00.130,MSFT,51.95,317,ARCA,51.93,51.95 +20160525 13:30:00.130,MSFT,51.95,283,ARCA,51.93,51.95 +20160525 13:30:00.135,MSFT,51.93,100,EDGX,51.92,51.95 +20160525 13:30:00.135,AAPL,98.62,100,ARCA,98.61,98.62 +20160525 13:30:00.144,AAPL,98.62,12,NASDAQ,98.61,98.62 +20160525 13:30:00.144,AAPL,98.62,88,NASDAQ,98.61,98.62 +20160525 13:30:00.144,AAPL,98.62,162,NASDAQ,98.61,98.62 +20160525 13:30:00.144,AAPL,98.61,100,BATS,98.61,98.62 +20160525 13:30:00.144,AAPL,98.62,61,ARCA,98.61,98.62 +20160525 13:30:00.144,AAPL,98.62,25,ARCA,98.61,98.62 +20160525 13:30:00.144,AAPL,98.62,14,ARCA,98.61,98.62 +20160525 13:30:00.145,AAPL,98.62,12,ARCA,98.6,98.63 +20160525 13:30:00.145,AAPL,98.62,100,ARCA,98.6,98.63 +20160525 13:30:00.145,AAPL,98.63,100,NASDAQ,98.6,98.63 +20160525 13:30:00.145,AAPL,98.63,100,NASDAQ,98.6,98.63 diff --git a/pandas/tests/reshape/merge/data/quotes.csv b/pandas/tests/reshape/merge/data/quotes.csv new file mode 100644 index 00000000..3f31d2cf --- /dev/null +++ b/pandas/tests/reshape/merge/data/quotes.csv @@ -0,0 +1,17 @@ +time,ticker,bid,ask +20160525 13:30:00.023,GOOG,720.50,720.93 +20160525 13:30:00.023,MSFT,51.95,51.95 +20160525 13:30:00.041,MSFT,51.95,51.95 +20160525 13:30:00.048,GOOG,720.50,720.93 +20160525 13:30:00.048,GOOG,720.50,720.93 +20160525 13:30:00.048,GOOG,720.50,720.93 +20160525 13:30:00.048,GOOG,720.50,720.93 +20160525 13:30:00.072,GOOG,720.50,720.88 +20160525 13:30:00.075,AAPL,98.55,98.56 +20160525 13:30:00.076,AAPL,98.55,98.56 +20160525 13:30:00.076,AAPL,98.55,98.56 +20160525 13:30:00.076,AAPL,98.55,98.56 +20160525 13:30:00.078,MSFT,51.95,51.95 +20160525 13:30:00.078,MSFT,51.95,51.95 +20160525 13:30:00.078,MSFT,51.95,51.95 +20160525 13:30:00.078,MSFT,51.92,51.95 diff --git a/pandas/tests/reshape/merge/data/quotes2.csv b/pandas/tests/reshape/merge/data/quotes2.csv new file mode 100644 index 00000000..7ade1e7f --- /dev/null +++ b/pandas/tests/reshape/merge/data/quotes2.csv @@ -0,0 +1,57 @@ +time,ticker,bid,ask +20160525 13:30:00.023,GOOG,720.50,720.93 +20160525 13:30:00.023,MSFT,51.95,51.95 +20160525 13:30:00.041,MSFT,51.95,51.95 +20160525 13:30:00.048,GOOG,720.50,720.93 +20160525 13:30:00.048,GOOG,720.50,720.93 +20160525 13:30:00.048,GOOG,720.50,720.93 +20160525 13:30:00.048,GOOG,720.50,720.93 +20160525 13:30:00.072,GOOG,720.50,720.88 +20160525 13:30:00.075,AAPL,98.55,98.56 +20160525 13:30:00.076,AAPL,98.55,98.56 +20160525 13:30:00.076,AAPL,98.55,98.56 +20160525 13:30:00.076,AAPL,98.55,98.56 +20160525 13:30:00.078,MSFT,51.95,51.95 +20160525 13:30:00.078,MSFT,51.95,51.95 +20160525 13:30:00.078,MSFT,51.95,51.95 +20160525 13:30:00.078,MSFT,51.92,51.95 +20160525 13:30:00.079,MSFT,51.92,51.95 +20160525 13:30:00.080,AAPL,98.55,98.56 +20160525 13:30:00.084,AAPL,98.55,98.56 +20160525 13:30:00.086,AAPL,98.55,98.63 +20160525 13:30:00.088,AAPL,98.65,98.63 +20160525 13:30:00.089,AAPL,98.63,98.63 +20160525 13:30:00.104,AAPL,98.63,98.63 +20160525 13:30:00.104,AAPL,98.63,98.63 +20160525 13:30:00.104,AAPL,98.63,98.63 +20160525 13:30:00.104,AAPL,98.63,98.63 +20160525 13:30:00.104,AAPL,98.62,98.63 +20160525 13:30:00.105,AAPL,98.62,98.63 +20160525 13:30:00.107,AAPL,98.62,98.63 +20160525 13:30:00.115,AAPL,98.62,98.63 +20160525 13:30:00.115,AAPL,98.62,98.63 +20160525 13:30:00.118,AAPL,98.62,98.63 +20160525 13:30:00.128,AAPL,98.62,98.63 +20160525 13:30:00.128,AAPL,98.62,98.63 +20160525 13:30:00.129,AAPL,98.62,98.63 +20160525 13:30:00.129,AAPL,98.61,98.63 +20160525 13:30:00.129,AAPL,98.62,98.63 +20160525 13:30:00.129,AAPL,98.62,98.63 +20160525 13:30:00.129,AAPL,98.61,98.63 +20160525 13:30:00.130,MSFT,51.93,51.95 +20160525 13:30:00.130,MSFT,51.93,51.95 +20160525 13:30:00.130,AAPL,98.61,98.63 +20160525 13:30:00.131,AAPL,98.61,98.62 +20160525 13:30:00.131,AAPL,98.61,98.62 +20160525 13:30:00.135,MSFT,51.92,51.95 +20160525 13:30:00.135,AAPL,98.61,98.62 +20160525 13:30:00.136,AAPL,98.61,98.62 +20160525 13:30:00.136,AAPL,98.61,98.62 +20160525 13:30:00.144,AAPL,98.61,98.62 +20160525 13:30:00.144,AAPL,98.61,98.62 +20160525 13:30:00.145,AAPL,98.61,98.62 +20160525 13:30:00.145,AAPL,98.61,98.63 +20160525 13:30:00.145,AAPL,98.61,98.63 +20160525 13:30:00.145,AAPL,98.60,98.63 +20160525 13:30:00.145,AAPL,98.61,98.63 +20160525 13:30:00.145,AAPL,98.60,98.63 diff --git a/pandas/tests/reshape/merge/data/tolerance.csv b/pandas/tests/reshape/merge/data/tolerance.csv new file mode 100644 index 00000000..d7d061bc --- /dev/null +++ b/pandas/tests/reshape/merge/data/tolerance.csv @@ -0,0 +1,28 @@ +time,ticker,price,quantity,marketCenter,bid,ask +20160525 13:30:00.023,MSFT,51.95,75,NASDAQ,51.95,51.95 +20160525 13:30:00.038,MSFT,51.95,155,NASDAQ,51.95,51.95 +20160525 13:30:00.048,GOOG,720.77,100,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.92,100,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,200,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,300,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,600,NASDAQ,720.5,720.93 +20160525 13:30:00.048,GOOG,720.93,44,NASDAQ,720.5,720.93 +20160525 13:30:00.074,AAPL,98.67,478343,NASDAQ,, +20160525 13:30:00.075,AAPL,98.67,478343,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.66,6,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,30,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,75,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,20,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,35,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.65,10,NASDAQ,98.55,98.56 +20160525 13:30:00.075,AAPL,98.55,6,ARCA,98.55,98.56 +20160525 13:30:00.075,AAPL,98.55,6,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,1000,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,300,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,400,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,600,ARCA,98.55,98.56 +20160525 13:30:00.076,AAPL,98.56,200,ARCA,98.55,98.56 +20160525 13:30:00.078,MSFT,51.95,783,NASDAQ,51.92,51.95 +20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.92,51.95 +20160525 13:30:00.078,MSFT,51.95,100,NASDAQ,51.92,51.95 diff --git a/pandas/tests/reshape/merge/data/trades.csv b/pandas/tests/reshape/merge/data/trades.csv new file mode 100644 index 00000000..b26a4ce7 --- /dev/null +++ b/pandas/tests/reshape/merge/data/trades.csv @@ -0,0 +1,28 @@ +time,ticker,price,quantity,marketCenter +20160525 13:30:00.023,MSFT,51.9500,75,NASDAQ +20160525 13:30:00.038,MSFT,51.9500,155,NASDAQ +20160525 13:30:00.048,GOOG,720.7700,100,NASDAQ +20160525 13:30:00.048,GOOG,720.9200,100,NASDAQ +20160525 13:30:00.048,GOOG,720.9300,200,NASDAQ +20160525 13:30:00.048,GOOG,720.9300,300,NASDAQ +20160525 13:30:00.048,GOOG,720.9300,600,NASDAQ +20160525 13:30:00.048,GOOG,720.9300,44,NASDAQ +20160525 13:30:00.074,AAPL,98.6700,478343,NASDAQ +20160525 13:30:00.075,AAPL,98.6700,478343,NASDAQ +20160525 13:30:00.075,AAPL,98.6600,6,NASDAQ +20160525 13:30:00.075,AAPL,98.6500,30,NASDAQ +20160525 13:30:00.075,AAPL,98.6500,75,NASDAQ +20160525 13:30:00.075,AAPL,98.6500,20,NASDAQ +20160525 13:30:00.075,AAPL,98.6500,35,NASDAQ +20160525 13:30:00.075,AAPL,98.6500,10,NASDAQ +20160525 13:30:00.075,AAPL,98.5500,6,ARCA +20160525 13:30:00.075,AAPL,98.5500,6,ARCA +20160525 13:30:00.076,AAPL,98.5600,1000,ARCA +20160525 13:30:00.076,AAPL,98.5600,200,ARCA +20160525 13:30:00.076,AAPL,98.5600,300,ARCA +20160525 13:30:00.076,AAPL,98.5600,400,ARCA +20160525 13:30:00.076,AAPL,98.5600,600,ARCA +20160525 13:30:00.076,AAPL,98.5600,200,ARCA +20160525 13:30:00.078,MSFT,51.9500,783,NASDAQ +20160525 13:30:00.078,MSFT,51.9500,100,NASDAQ +20160525 13:30:00.078,MSFT,51.9500,100,NASDAQ diff --git a/pandas/tests/reshape/merge/data/trades2.csv b/pandas/tests/reshape/merge/data/trades2.csv new file mode 100644 index 00000000..64021faa --- /dev/null +++ b/pandas/tests/reshape/merge/data/trades2.csv @@ -0,0 +1,78 @@ +time,ticker,price,quantity,marketCenter +20160525 13:30:00.023,MSFT,51.9500,75,NASDAQ +20160525 13:30:00.038,MSFT,51.9500,155,NASDAQ +20160525 13:30:00.048,GOOG,720.7700,100,NASDAQ +20160525 13:30:00.048,GOOG,720.9200,100,NASDAQ +20160525 13:30:00.048,GOOG,720.9300,200,NASDAQ +20160525 13:30:00.048,GOOG,720.9300,300,NASDAQ +20160525 13:30:00.048,GOOG,720.9300,600,NASDAQ +20160525 13:30:00.048,GOOG,720.9300,44,NASDAQ +20160525 13:30:00.074,AAPL,98.6700,478343,NASDAQ +20160525 13:30:00.075,AAPL,98.6700,478343,NASDAQ +20160525 13:30:00.075,AAPL,98.6600,6,NASDAQ +20160525 13:30:00.075,AAPL,98.6500,30,NASDAQ +20160525 13:30:00.075,AAPL,98.6500,75,NASDAQ +20160525 13:30:00.075,AAPL,98.6500,20,NASDAQ +20160525 13:30:00.075,AAPL,98.6500,35,NASDAQ +20160525 13:30:00.075,AAPL,98.6500,10,NASDAQ +20160525 13:30:00.075,AAPL,98.5500,6,ARCA +20160525 13:30:00.075,AAPL,98.5500,6,ARCA +20160525 13:30:00.076,AAPL,98.5600,1000,ARCA +20160525 13:30:00.076,AAPL,98.5600,200,ARCA +20160525 13:30:00.076,AAPL,98.5600,300,ARCA +20160525 13:30:00.076,AAPL,98.5600,400,ARCA +20160525 13:30:00.076,AAPL,98.5600,600,ARCA +20160525 13:30:00.076,AAPL,98.5600,200,ARCA +20160525 13:30:00.078,MSFT,51.9500,783,NASDAQ +20160525 13:30:00.078,MSFT,51.9500,100,NASDAQ +20160525 13:30:00.078,MSFT,51.9500,100,NASDAQ +20160525 13:30:00.084,AAPL,98.6400,40,NASDAQ +20160525 13:30:00.084,AAPL,98.5500,149,EDGX +20160525 13:30:00.086,AAPL,98.5600,500,ARCA +20160525 13:30:00.104,AAPL,98.6300,647,EDGX +20160525 13:30:00.104,AAPL,98.6300,300,EDGX +20160525 13:30:00.104,AAPL,98.6300,50,NASDAQ +20160525 13:30:00.104,AAPL,98.6300,50,NASDAQ +20160525 13:30:00.104,AAPL,98.6300,70,NASDAQ +20160525 13:30:00.104,AAPL,98.6300,70,NASDAQ +20160525 13:30:00.104,AAPL,98.6300,1,NASDAQ +20160525 13:30:00.104,AAPL,98.6300,62,NASDAQ +20160525 13:30:00.104,AAPL,98.6300,10,NASDAQ +20160525 13:30:00.104,AAPL,98.6300,100,ARCA +20160525 13:30:00.105,AAPL,98.6300,100,ARCA +20160525 13:30:00.105,AAPL,98.6300,700,ARCA +20160525 13:30:00.106,AAPL,98.6300,61,EDGX +20160525 13:30:00.107,AAPL,98.6300,100,ARCA +20160525 13:30:00.107,AAPL,98.6300,53,ARCA +20160525 13:30:00.108,AAPL,98.6300,100,ARCA +20160525 13:30:00.108,AAPL,98.6300,839,ARCA +20160525 13:30:00.115,AAPL,98.6300,5,EDGX +20160525 13:30:00.118,AAPL,98.6300,295,EDGX +20160525 13:30:00.118,AAPL,98.6300,5,EDGX +20160525 13:30:00.128,AAPL,98.6300,100,NASDAQ +20160525 13:30:00.128,AAPL,98.6300,100,NASDAQ +20160525 13:30:00.128,MSFT,51.9200,100,ARCA +20160525 13:30:00.129,AAPL,98.6200,100,NASDAQ +20160525 13:30:00.129,AAPL,98.6200,10,NASDAQ +20160525 13:30:00.129,AAPL,98.6200,59,NASDAQ +20160525 13:30:00.129,AAPL,98.6200,31,NASDAQ +20160525 13:30:00.129,AAPL,98.6200,69,NASDAQ +20160525 13:30:00.129,AAPL,98.6200,12,NASDAQ +20160525 13:30:00.129,AAPL,98.6200,12,EDGX +20160525 13:30:00.129,AAPL,98.6200,100,ARCA +20160525 13:30:00.129,AAPL,98.6200,100,ARCA +20160525 13:30:00.130,MSFT,51.9500,317,ARCA +20160525 13:30:00.130,MSFT,51.9500,283,ARCA +20160525 13:30:00.135,MSFT,51.9300,100,EDGX +20160525 13:30:00.135,AAPL,98.6200,100,ARCA +20160525 13:30:00.144,AAPL,98.6200,12,NASDAQ +20160525 13:30:00.144,AAPL,98.6200,88,NASDAQ +20160525 13:30:00.144,AAPL,98.6200,162,NASDAQ +20160525 13:30:00.144,AAPL,98.6100,100,BATS +20160525 13:30:00.144,AAPL,98.6200,61,ARCA +20160525 13:30:00.144,AAPL,98.6200,25,ARCA +20160525 13:30:00.144,AAPL,98.6200,14,ARCA +20160525 13:30:00.145,AAPL,98.6200,12,ARCA +20160525 13:30:00.145,AAPL,98.6200,100,ARCA +20160525 13:30:00.145,AAPL,98.6300,100,NASDAQ +20160525 13:30:00.145,AAPL,98.6300,100,NASDAQ diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py new file mode 100644 index 00000000..a660acb1 --- /dev/null +++ b/pandas/tests/reshape/merge/test_join.py @@ -0,0 +1,884 @@ +import numpy as np +from numpy.random import randn +import pytest + +from pandas._libs import join as libjoin + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series, concat, merge +import pandas._testing as tm +from pandas.tests.reshape.merge.test_merge import NGROUPS, N, get_test_data + +a_ = np.array + + +class TestJoin: + def setup_method(self, method): + # aggregate multiple columns + self.df = DataFrame( + { + "key1": get_test_data(), + "key2": get_test_data(), + "data1": np.random.randn(N), + "data2": np.random.randn(N), + } + ) + + # exclude a couple keys for fun + self.df = self.df[self.df["key2"] > 1] + + self.df2 = DataFrame( + { + "key1": get_test_data(n=N // 5), + "key2": get_test_data(ngroups=NGROUPS // 2, n=N // 5), + "value": np.random.randn(N // 5), + } + ) + + index, data = tm.getMixedTypeDict() + self.target = DataFrame(data, index=index) + + # Join on string value + self.source = DataFrame( + {"MergedA": data["A"], "MergedD": data["D"]}, index=data["C"] + ) + + def test_cython_left_outer_join(self): + left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) + right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) + max_group = 5 + + ls, rs = libjoin.left_outer_join(left, right, max_group) + + exp_ls = left.argsort(kind="mergesort") + exp_rs = right.argsort(kind="mergesort") + + exp_li = a_([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10]) + exp_ri = a_([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5, -1, -1]) + + exp_ls = exp_ls.take(exp_li) + exp_ls[exp_li == -1] = -1 + + exp_rs = exp_rs.take(exp_ri) + exp_rs[exp_ri == -1] = -1 + + tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) + + def test_cython_right_outer_join(self): + left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) + right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) + max_group = 5 + + rs, ls = libjoin.left_outer_join(right, left, max_group) + + exp_ls = left.argsort(kind="mergesort") + exp_rs = right.argsort(kind="mergesort") + + # 0 1 1 1 + exp_li = a_( + [ + 0, + 1, + 2, + 3, + 4, + 5, + 3, + 4, + 5, + 3, + 4, + 5, + # 2 2 4 + 6, + 7, + 8, + 6, + 7, + 8, + -1, + ] + ) + exp_ri = a_([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6]) + + exp_ls = exp_ls.take(exp_li) + exp_ls[exp_li == -1] = -1 + + exp_rs = exp_rs.take(exp_ri) + exp_rs[exp_ri == -1] = -1 + + tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) + + def test_cython_inner_join(self): + left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) + right = a_([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.int64) + max_group = 5 + + ls, rs = libjoin.inner_join(left, right, max_group) + + exp_ls = left.argsort(kind="mergesort") + exp_rs = right.argsort(kind="mergesort") + + exp_li = a_([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8]) + exp_ri = a_([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5]) + + exp_ls = exp_ls.take(exp_li) + exp_ls[exp_li == -1] = -1 + + exp_rs = exp_rs.take(exp_ri) + exp_rs[exp_ri == -1] = -1 + + tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) + + def test_left_outer_join(self): + joined_key2 = merge(self.df, self.df2, on="key2") + _check_join(self.df, self.df2, joined_key2, ["key2"], how="left") + + joined_both = merge(self.df, self.df2) + _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="left") + + def test_right_outer_join(self): + joined_key2 = merge(self.df, self.df2, on="key2", how="right") + _check_join(self.df, self.df2, joined_key2, ["key2"], how="right") + + joined_both = merge(self.df, self.df2, how="right") + _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="right") + + def test_full_outer_join(self): + joined_key2 = merge(self.df, self.df2, on="key2", how="outer") + _check_join(self.df, self.df2, joined_key2, ["key2"], how="outer") + + joined_both = merge(self.df, self.df2, how="outer") + _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="outer") + + def test_inner_join(self): + joined_key2 = merge(self.df, self.df2, on="key2", how="inner") + _check_join(self.df, self.df2, joined_key2, ["key2"], how="inner") + + joined_both = merge(self.df, self.df2, how="inner") + _check_join(self.df, self.df2, joined_both, ["key1", "key2"], how="inner") + + def test_handle_overlap(self): + joined = merge(self.df, self.df2, on="key2", suffixes=[".foo", ".bar"]) + + assert "key1.foo" in joined + assert "key1.bar" in joined + + def test_handle_overlap_arbitrary_key(self): + joined = merge( + self.df, + self.df2, + left_on="key2", + right_on="key1", + suffixes=[".foo", ".bar"], + ) + assert "key1.foo" in joined + assert "key2.bar" in joined + + def test_join_on(self): + target = self.target + source = self.source + + merged = target.join(source, on="C") + tm.assert_series_equal(merged["MergedA"], target["A"], check_names=False) + tm.assert_series_equal(merged["MergedD"], target["D"], check_names=False) + + # join with duplicates (fix regression from DataFrame/Matrix merge) + df = DataFrame({"key": ["a", "a", "b", "b", "c"]}) + df2 = DataFrame({"value": [0, 1, 2]}, index=["a", "b", "c"]) + joined = df.join(df2, on="key") + expected = DataFrame( + {"key": ["a", "a", "b", "b", "c"], "value": [0, 0, 1, 1, 2]} + ) + tm.assert_frame_equal(joined, expected) + + # Test when some are missing + df_a = DataFrame([[1], [2], [3]], index=["a", "b", "c"], columns=["one"]) + df_b = DataFrame([["foo"], ["bar"]], index=[1, 2], columns=["two"]) + df_c = DataFrame([[1], [2]], index=[1, 2], columns=["three"]) + joined = df_a.join(df_b, on="one") + joined = joined.join(df_c, on="one") + assert np.isnan(joined["two"]["c"]) + assert np.isnan(joined["three"]["c"]) + + # merge column not p resent + with pytest.raises(KeyError, match="^'E'$"): + target.join(source, on="E") + + # overlap + source_copy = source.copy() + source_copy["A"] = 0 + msg = ( + "You are trying to merge on float64 and object columns. If" + " you wish to proceed you should use pd.concat" + ) + with pytest.raises(ValueError, match=msg): + target.join(source_copy, on="A") + + def test_join_on_fails_with_different_right_index(self): + df = DataFrame( + {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)} + ) + df2 = DataFrame( + {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}, + index=tm.makeCustomIndex(10, 2), + ) + msg = r'len\(left_on\) must equal the number of levels in the index of "right"' + with pytest.raises(ValueError, match=msg): + merge(df, df2, left_on="a", right_index=True) + + def test_join_on_fails_with_different_left_index(self): + df = DataFrame( + {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)}, + index=tm.makeCustomIndex(3, 2), + ) + df2 = DataFrame( + {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)} + ) + msg = r'len\(right_on\) must equal the number of levels in the index of "left"' + with pytest.raises(ValueError, match=msg): + merge(df, df2, right_on="b", left_index=True) + + def test_join_on_fails_with_different_column_counts(self): + df = DataFrame( + {"a": np.random.choice(["m", "f"], size=3), "b": np.random.randn(3)} + ) + df2 = DataFrame( + {"a": np.random.choice(["m", "f"], size=10), "b": np.random.randn(10)}, + index=tm.makeCustomIndex(10, 2), + ) + msg = r"len\(right_on\) must equal len\(left_on\)" + with pytest.raises(ValueError, match=msg): + merge(df, df2, right_on="a", left_on=["a", "b"]) + + @pytest.mark.parametrize("wrong_type", [2, "str", None, np.array([0, 1])]) + def test_join_on_fails_with_wrong_object_type(self, wrong_type): + # GH12081 - original issue + + # GH21220 - merging of Series and DataFrame is now allowed + # Edited test to remove the Series object from test parameters + + df = DataFrame({"a": [1, 1]}) + msg = "Can only merge Series or DataFrame objects, a {} was passed".format( + str(type(wrong_type)) + ) + with pytest.raises(TypeError, match=msg): + merge(wrong_type, df, left_on="a", right_on="a") + with pytest.raises(TypeError, match=msg): + merge(df, wrong_type, left_on="a", right_on="a") + + def test_join_on_pass_vector(self): + expected = self.target.join(self.source, on="C") + del expected["C"] + + join_col = self.target.pop("C") + result = self.target.join(self.source, on=join_col) + tm.assert_frame_equal(result, expected) + + def test_join_with_len0(self): + # nothing to merge + merged = self.target.join(self.source.reindex([]), on="C") + for col in self.source: + assert col in merged + assert merged[col].isna().all() + + merged2 = self.target.join(self.source.reindex([]), on="C", how="inner") + tm.assert_index_equal(merged2.columns, merged.columns) + assert len(merged2) == 0 + + def test_join_on_inner(self): + df = DataFrame({"key": ["a", "a", "d", "b", "b", "c"]}) + df2 = DataFrame({"value": [0, 1]}, index=["a", "b"]) + + joined = df.join(df2, on="key", how="inner") + + expected = df.join(df2, on="key") + expected = expected[expected["value"].notna()] + tm.assert_series_equal(joined["key"], expected["key"], check_dtype=False) + tm.assert_series_equal(joined["value"], expected["value"], check_dtype=False) + tm.assert_index_equal(joined.index, expected.index) + + def test_join_on_singlekey_list(self): + df = DataFrame({"key": ["a", "a", "b", "b", "c"]}) + df2 = DataFrame({"value": [0, 1, 2]}, index=["a", "b", "c"]) + + # corner cases + joined = df.join(df2, on=["key"]) + expected = df.join(df2, on="key") + + tm.assert_frame_equal(joined, expected) + + def test_join_on_series(self): + result = self.target.join(self.source["MergedA"], on="C") + expected = self.target.join(self.source[["MergedA"]], on="C") + tm.assert_frame_equal(result, expected) + + def test_join_on_series_buglet(self): + # GH #638 + df = DataFrame({"a": [1, 1]}) + ds = Series([2], index=[1], name="b") + result = df.join(ds, on="a") + expected = DataFrame({"a": [1, 1], "b": [2, 2]}, index=df.index) + tm.assert_frame_equal(result, expected) + + def test_join_index_mixed(self, join_type): + # no overlapping blocks + df1 = DataFrame(index=np.arange(10)) + df1["bool"] = True + df1["string"] = "foo" + + df2 = DataFrame(index=np.arange(5, 15)) + df2["int"] = 1 + df2["float"] = 1.0 + + joined = df1.join(df2, how=join_type) + expected = _join_by_hand(df1, df2, how=join_type) + tm.assert_frame_equal(joined, expected) + + joined = df2.join(df1, how=join_type) + expected = _join_by_hand(df2, df1, how=join_type) + tm.assert_frame_equal(joined, expected) + + def test_join_index_mixed_overlap(self): + df1 = DataFrame( + {"A": 1.0, "B": 2, "C": "foo", "D": True}, + index=np.arange(10), + columns=["A", "B", "C", "D"], + ) + assert df1["B"].dtype == np.int64 + assert df1["D"].dtype == np.bool_ + + df2 = DataFrame( + {"A": 1.0, "B": 2, "C": "foo", "D": True}, + index=np.arange(0, 10, 2), + columns=["A", "B", "C", "D"], + ) + + # overlap + joined = df1.join(df2, lsuffix="_one", rsuffix="_two") + expected_columns = [ + "A_one", + "B_one", + "C_one", + "D_one", + "A_two", + "B_two", + "C_two", + "D_two", + ] + df1.columns = expected_columns[:4] + df2.columns = expected_columns[4:] + expected = _join_by_hand(df1, df2) + tm.assert_frame_equal(joined, expected) + + def test_join_empty_bug(self): + # generated an exception in 0.4.3 + x = DataFrame() + x.join(DataFrame([3], index=[0], columns=["A"]), how="outer") + + def test_join_unconsolidated(self): + # GH #331 + a = DataFrame(randn(30, 2), columns=["a", "b"]) + c = Series(randn(30)) + a["c"] = c + d = DataFrame(randn(30, 1), columns=["q"]) + + # it works! + a.join(d) + d.join(a) + + def test_join_multiindex(self): + index1 = MultiIndex.from_arrays( + [["a", "a", "a", "b", "b", "b"], [1, 2, 3, 1, 2, 3]], + names=["first", "second"], + ) + + index2 = MultiIndex.from_arrays( + [["b", "b", "b", "c", "c", "c"], [1, 2, 3, 1, 2, 3]], + names=["first", "second"], + ) + + df1 = DataFrame(data=np.random.randn(6), index=index1, columns=["var X"]) + df2 = DataFrame(data=np.random.randn(6), index=index2, columns=["var Y"]) + + df1 = df1.sort_index(level=0) + df2 = df2.sort_index(level=0) + + joined = df1.join(df2, how="outer") + ex_index = Index(index1.values).union(Index(index2.values)) + expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) + expected.index.names = index1.names + tm.assert_frame_equal(joined, expected) + assert joined.index.names == index1.names + + df1 = df1.sort_index(level=1) + df2 = df2.sort_index(level=1) + + joined = df1.join(df2, how="outer").sort_index(level=0) + ex_index = Index(index1.values).union(Index(index2.values)) + expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) + expected.index.names = index1.names + + tm.assert_frame_equal(joined, expected) + assert joined.index.names == index1.names + + def test_join_inner_multiindex(self): + key1 = ["bar", "bar", "bar", "foo", "foo", "baz", "baz", "qux", "qux", "snap"] + key2 = [ + "two", + "one", + "three", + "one", + "two", + "one", + "two", + "two", + "three", + "one", + ] + + data = np.random.randn(len(key1)) + data = DataFrame({"key1": key1, "key2": key2, "data": data}) + + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + to_join = DataFrame( + np.random.randn(10, 3), index=index, columns=["j_one", "j_two", "j_three"] + ) + + joined = data.join(to_join, on=["key1", "key2"], how="inner") + expected = merge( + data, + to_join.reset_index(), + left_on=["key1", "key2"], + right_on=["first", "second"], + how="inner", + sort=False, + ) + + expected2 = merge( + to_join, + data, + right_on=["key1", "key2"], + left_index=True, + how="inner", + sort=False, + ) + tm.assert_frame_equal(joined, expected2.reindex_like(joined)) + + expected2 = merge( + to_join, + data, + right_on=["key1", "key2"], + left_index=True, + how="inner", + sort=False, + ) + + expected = expected.drop(["first", "second"], axis=1) + expected.index = joined.index + + assert joined.index.is_monotonic + tm.assert_frame_equal(joined, expected) + + # _assert_same_contents(expected, expected2.loc[:, expected.columns]) + + def test_join_hierarchical_mixed(self): + # GH 2024 + df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "c"]) + new_df = df.groupby(["a"]).agg({"b": [np.mean, np.sum]}) + other_df = DataFrame([(1, 2, 3), (7, 10, 6)], columns=["a", "b", "d"]) + other_df.set_index("a", inplace=True) + # GH 9455, 12219 + with tm.assert_produces_warning(UserWarning): + result = merge(new_df, other_df, left_index=True, right_index=True) + assert ("b", "mean") in result + assert "b" in result + + def test_join_float64_float32(self): + + a = DataFrame(randn(10, 2), columns=["a", "b"], dtype=np.float64) + b = DataFrame(randn(10, 1), columns=["c"], dtype=np.float32) + joined = a.join(b) + assert joined.dtypes["a"] == "float64" + assert joined.dtypes["b"] == "float64" + assert joined.dtypes["c"] == "float32" + + a = np.random.randint(0, 5, 100).astype("int64") + b = np.random.random(100).astype("float64") + c = np.random.random(100).astype("float32") + df = DataFrame({"a": a, "b": b, "c": c}) + xpdf = DataFrame({"a": a, "b": b, "c": c}) + s = DataFrame(np.random.random(5).astype("float32"), columns=["md"]) + rs = df.merge(s, left_on="a", right_index=True) + assert rs.dtypes["a"] == "int64" + assert rs.dtypes["b"] == "float64" + assert rs.dtypes["c"] == "float32" + assert rs.dtypes["md"] == "float32" + + xp = xpdf.merge(s, left_on="a", right_index=True) + tm.assert_frame_equal(rs, xp) + + def test_join_many_non_unique_index(self): + df1 = DataFrame({"a": [1, 1], "b": [1, 1], "c": [10, 20]}) + df2 = DataFrame({"a": [1, 1], "b": [1, 2], "d": [100, 200]}) + df3 = DataFrame({"a": [1, 1], "b": [1, 2], "e": [1000, 2000]}) + idf1 = df1.set_index(["a", "b"]) + idf2 = df2.set_index(["a", "b"]) + idf3 = df3.set_index(["a", "b"]) + + result = idf1.join([idf2, idf3], how="outer") + + df_partially_merged = merge(df1, df2, on=["a", "b"], how="outer") + expected = merge(df_partially_merged, df3, on=["a", "b"], how="outer") + + result = result.reset_index() + expected = expected[result.columns] + expected["a"] = expected.a.astype("int64") + expected["b"] = expected.b.astype("int64") + tm.assert_frame_equal(result, expected) + + df1 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 1], "c": [10, 20, 30]}) + df2 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "d": [100, 200, 300]}) + df3 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "e": [1000, 2000, 3000]}) + idf1 = df1.set_index(["a", "b"]) + idf2 = df2.set_index(["a", "b"]) + idf3 = df3.set_index(["a", "b"]) + result = idf1.join([idf2, idf3], how="inner") + + df_partially_merged = merge(df1, df2, on=["a", "b"], how="inner") + expected = merge(df_partially_merged, df3, on=["a", "b"], how="inner") + + result = result.reset_index() + + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + # GH 11519 + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + } + ) + s = Series( + np.repeat(np.arange(8), 2), index=np.repeat(np.arange(8), 2), name="TEST" + ) + inner = df.join(s, how="inner") + outer = df.join(s, how="outer") + left = df.join(s, how="left") + right = df.join(s, how="right") + tm.assert_frame_equal(inner, outer) + tm.assert_frame_equal(inner, left) + tm.assert_frame_equal(inner, right) + + def test_join_sort(self): + left = DataFrame({"key": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 4]}) + right = DataFrame({"value2": ["a", "b", "c"]}, index=["bar", "baz", "foo"]) + + joined = left.join(right, on="key", sort=True) + expected = DataFrame( + { + "key": ["bar", "baz", "foo", "foo"], + "value": [2, 3, 1, 4], + "value2": ["a", "b", "c", "c"], + }, + index=[1, 2, 0, 3], + ) + tm.assert_frame_equal(joined, expected) + + # smoke test + joined = left.join(right, on="key", sort=False) + tm.assert_index_equal(joined.index, pd.Index(list(range(4)))) + + def test_join_mixed_non_unique_index(self): + # GH 12814, unorderable types in py3 with a non-unique index + df1 = DataFrame({"a": [1, 2, 3, 4]}, index=[1, 2, 3, "a"]) + df2 = DataFrame({"b": [5, 6, 7, 8]}, index=[1, 3, 3, 4]) + result = df1.join(df2) + expected = DataFrame( + {"a": [1, 2, 3, 3, 4], "b": [5, np.nan, 6, 7, np.nan]}, + index=[1, 2, 3, 3, "a"], + ) + tm.assert_frame_equal(result, expected) + + df3 = DataFrame({"a": [1, 2, 3, 4]}, index=[1, 2, 2, "a"]) + df4 = DataFrame({"b": [5, 6, 7, 8]}, index=[1, 2, 3, 4]) + result = df3.join(df4) + expected = DataFrame( + {"a": [1, 2, 3, 4], "b": [5, 6, 6, np.nan]}, index=[1, 2, 2, "a"] + ) + tm.assert_frame_equal(result, expected) + + def test_join_non_unique_period_index(self): + # GH #16871 + index = pd.period_range("2016-01-01", periods=16, freq="M") + df = DataFrame(list(range(len(index))), index=index, columns=["pnum"]) + df2 = concat([df, df]) + result = df.join(df2, how="inner", rsuffix="_df2") + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=["pnum", "pnum_df2"], + index=df2.sort_index().index, + ) + tm.assert_frame_equal(result, expected) + + def test_mixed_type_join_with_suffix(self): + # GH #916 + df = DataFrame(np.random.randn(20, 6), columns=["a", "b", "c", "d", "e", "f"]) + df.insert(0, "id", 0) + df.insert(5, "dt", "foo") + + grouped = df.groupby("id") + mn = grouped.mean() + cn = grouped.count() + + # it works! + mn.join(cn, rsuffix="_right") + + def test_join_many(self): + df = DataFrame(np.random.randn(10, 6), columns=list("abcdef")) + df_list = [df[["a", "b"]], df[["c", "d"]], df[["e", "f"]]] + + joined = df_list[0].join(df_list[1:]) + tm.assert_frame_equal(joined, df) + + df_list = [df[["a", "b"]][:-2], df[["c", "d"]][2:], df[["e", "f"]][1:9]] + + def _check_diff_index(df_list, result, exp_index): + reindexed = [x.reindex(exp_index) for x in df_list] + expected = reindexed[0].join(reindexed[1:]) + tm.assert_frame_equal(result, expected) + + # different join types + joined = df_list[0].join(df_list[1:], how="outer") + _check_diff_index(df_list, joined, df.index) + + joined = df_list[0].join(df_list[1:]) + _check_diff_index(df_list, joined, df_list[0].index) + + joined = df_list[0].join(df_list[1:], how="inner") + _check_diff_index(df_list, joined, df.index[2:8]) + + msg = "Joining multiple DataFrames only supported for joining on index" + with pytest.raises(ValueError, match=msg): + df_list[0].join(df_list[1:], on="a") + + def test_join_many_mixed(self): + df = DataFrame(np.random.randn(8, 4), columns=["A", "B", "C", "D"]) + df["key"] = ["foo", "bar"] * 4 + df1 = df.loc[:, ["A", "B"]] + df2 = df.loc[:, ["C", "D"]] + df3 = df.loc[:, ["key"]] + + result = df1.join([df2, df3]) + tm.assert_frame_equal(result, df) + + def test_join_dups(self): + + # joining dups + df = concat( + [ + DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]), + DataFrame( + np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + ), + ], + axis=1, + ) + + expected = concat([df, df], axis=1) + result = df.join(df, rsuffix="_2") + result.columns = expected.columns + tm.assert_frame_equal(result, expected) + + # GH 4975, invalid join on dups + w = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + x = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + y = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + z = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + + dta = x.merge(y, left_index=True, right_index=True).merge( + z, left_index=True, right_index=True, how="outer" + ) + dta = dta.merge(w, left_index=True, right_index=True) + expected = concat([x, y, z, w], axis=1) + expected.columns = ["x_x", "y_x", "x_y", "y_y", "x_x", "y_x", "x_y", "y_y"] + tm.assert_frame_equal(dta, expected) + + def test_join_multi_to_multi(self, join_type): + # GH 20475 + leftindex = MultiIndex.from_product( + [list("abc"), list("xy"), [1, 2]], names=["abc", "xy", "num"] + ) + left = DataFrame({"v1": range(12)}, index=leftindex) + + rightindex = MultiIndex.from_product( + [list("abc"), list("xy")], names=["abc", "xy"] + ) + right = DataFrame({"v2": [100 * i for i in range(1, 7)]}, index=rightindex) + + result = left.join(right, on=["abc", "xy"], how=join_type) + expected = ( + left.reset_index() + .merge(right.reset_index(), on=["abc", "xy"], how=join_type) + .set_index(["abc", "xy", "num"]) + ) + tm.assert_frame_equal(expected, result) + + msg = r'len\(left_on\) must equal the number of levels in the index of "right"' + with pytest.raises(ValueError, match=msg): + left.join(right, on="xy", how=join_type) + + with pytest.raises(ValueError, match=msg): + right.join(left, on=["abc", "xy"], how=join_type) + + def test_join_on_tz_aware_datetimeindex(self): + # GH 23931, 26335 + df1 = pd.DataFrame( + { + "date": pd.date_range( + start="2018-01-01", periods=5, tz="America/Chicago" + ), + "vals": list("abcde"), + } + ) + + df2 = pd.DataFrame( + { + "date": pd.date_range( + start="2018-01-03", periods=5, tz="America/Chicago" + ), + "vals_2": list("tuvwx"), + } + ) + result = df1.join(df2.set_index("date"), on="date") + expected = df1.copy() + expected["vals_2"] = pd.Series([np.nan] * 2 + list("tuv"), dtype=object) + tm.assert_frame_equal(result, expected) + + def test_join_datetime_string(self): + # GH 5647 + dfa = DataFrame( + [ + ["2012-08-02", "L", 10], + ["2012-08-02", "J", 15], + ["2013-04-06", "L", 20], + ["2013-04-06", "J", 25], + ], + columns=["x", "y", "a"], + ) + dfa["x"] = pd.to_datetime(dfa["x"]) + dfb = DataFrame( + [["2012-08-02", "J", 1], ["2013-04-06", "L", 2]], + columns=["x", "y", "z"], + index=[2, 4], + ) + dfb["x"] = pd.to_datetime(dfb["x"]) + result = dfb.join(dfa.set_index(["x", "y"]), on=["x", "y"]) + expected = DataFrame( + [ + [pd.Timestamp("2012-08-02 00:00:00"), "J", 1, 15], + [pd.Timestamp("2013-04-06 00:00:00"), "L", 2, 20], + ], + index=[2, 4], + columns=["x", "y", "z", "a"], + ) + tm.assert_frame_equal(result, expected) + + +def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix="_y"): + + # some smoke tests + for c in join_col: + assert result[c].notna().all() + + left_grouped = left.groupby(join_col) + right_grouped = right.groupby(join_col) + + for group_key, group in result.groupby(join_col): + l_joined = _restrict_to_columns(group, left.columns, lsuffix) + r_joined = _restrict_to_columns(group, right.columns, rsuffix) + + try: + lgroup = left_grouped.get_group(group_key) + except KeyError: + if how in ("left", "inner"): + raise AssertionError( + "key {group_key!s} should not have been in the join".format( + group_key=group_key + ) + ) + + _assert_all_na(l_joined, left.columns, join_col) + else: + _assert_same_contents(l_joined, lgroup) + + try: + rgroup = right_grouped.get_group(group_key) + except KeyError: + if how in ("right", "inner"): + raise AssertionError( + "key {group_key!s} should not have been in the join".format( + group_key=group_key + ) + ) + + _assert_all_na(r_joined, right.columns, join_col) + else: + _assert_same_contents(r_joined, rgroup) + + +def _restrict_to_columns(group, columns, suffix): + found = [ + c for c in group.columns if c in columns or c.replace(suffix, "") in columns + ] + + # filter + group = group.loc[:, found] + + # get rid of suffixes, if any + group = group.rename(columns=lambda x: x.replace(suffix, "")) + + # put in the right order... + group = group.loc[:, columns] + + return group + + +def _assert_same_contents(join_chunk, source): + NA_SENTINEL = -1234567 # drop_duplicates not so NA-friendly... + + jvalues = join_chunk.fillna(NA_SENTINEL).drop_duplicates().values + svalues = source.fillna(NA_SENTINEL).drop_duplicates().values + + rows = {tuple(row) for row in jvalues} + assert len(rows) == len(source) + assert all(tuple(row) in rows for row in svalues) + + +def _assert_all_na(join_chunk, source_columns, join_col): + for c in source_columns: + if c in join_col: + continue + assert join_chunk[c].isna().all() + + +def _join_by_hand(a, b, how="left"): + join_index = a.index.join(b.index, how=how) + + a_re = a.reindex(join_index) + b_re = b.reindex(join_index) + + result_columns = a.columns.append(b.columns) + + for col, s in b_re.items(): + a_re[col] = s + return a_re.reindex(columns=result_columns) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py new file mode 100644 index 00000000..8465e2ca --- /dev/null +++ b/pandas/tests/reshape/merge/test_merge.py @@ -0,0 +1,2172 @@ +from collections import OrderedDict +from datetime import date, datetime, timedelta +import random +import re + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DataFrame, + DatetimeIndex, + Float64Index, + Int64Index, + IntervalIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, + UInt64Index, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT +from pandas.core.reshape.concat import concat +from pandas.core.reshape.merge import MergeError, merge + +N = 50 +NGROUPS = 8 + + +def get_test_data(ngroups=NGROUPS, n=N): + unique_groups = list(range(ngroups)) + arr = np.asarray(np.tile(unique_groups, n // ngroups)) + + if len(arr) < n: + arr = np.asarray(list(arr) + unique_groups[: n - len(arr)]) + + random.shuffle(arr) + return arr + + +def get_series(): + return [ + pd.Series([1], dtype="int64"), + pd.Series([1], dtype="Int64"), + pd.Series([1.23]), + pd.Series(["foo"]), + pd.Series([True]), + pd.Series([pd.Timestamp("2018-01-01")]), + pd.Series([pd.Timestamp("2018-01-01", tz="US/Eastern")]), + ] + + +def get_series_na(): + return [ + pd.Series([np.nan], dtype="Int64"), + pd.Series([np.nan], dtype="float"), + pd.Series([np.nan], dtype="object"), + pd.Series([pd.NaT]), + ] + + +@pytest.fixture(params=get_series(), ids=lambda x: x.dtype.name) +def series_of_dtype(request): + """ + A parametrized fixture returning a variety of Series of different + dtypes + """ + return request.param + + +@pytest.fixture(params=get_series(), ids=lambda x: x.dtype.name) +def series_of_dtype2(request): + """ + A duplicate of the series_of_dtype fixture, so that it can be used + twice by a single function + """ + return request.param + + +@pytest.fixture(params=get_series_na(), ids=lambda x: x.dtype.name) +def series_of_dtype_all_na(request): + """ + A parametrized fixture returning a variety of Series with all NA + values + """ + return request.param + + +class TestMerge: + def setup_method(self, method): + # aggregate multiple columns + self.df = DataFrame( + { + "key1": get_test_data(), + "key2": get_test_data(), + "data1": np.random.randn(N), + "data2": np.random.randn(N), + } + ) + + # exclude a couple keys for fun + self.df = self.df[self.df["key2"] > 1] + + self.df2 = DataFrame( + { + "key1": get_test_data(n=N // 5), + "key2": get_test_data(ngroups=NGROUPS // 2, n=N // 5), + "value": np.random.randn(N // 5), + } + ) + + self.left = DataFrame( + {"key": ["a", "b", "c", "d", "e", "e", "a"], "v1": np.random.randn(7)} + ) + self.right = DataFrame({"v2": np.random.randn(4)}, index=["d", "b", "c", "a"]) + + def test_merge_inner_join_empty(self): + # GH 15328 + df_empty = pd.DataFrame() + df_a = pd.DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") + result = pd.merge(df_empty, df_a, left_index=True, right_index=True) + expected = pd.DataFrame({"a": []}, index=[], dtype="int64") + tm.assert_frame_equal(result, expected) + + def test_merge_common(self): + joined = merge(self.df, self.df2) + exp = merge(self.df, self.df2, on=["key1", "key2"]) + tm.assert_frame_equal(joined, exp) + + def test_merge_non_string_columns(self): + # https://github.com/pandas-dev/pandas/issues/17962 + # Checks that method runs for non string column names + left = pd.DataFrame( + {0: [1, 0, 1, 0], 1: [0, 1, 0, 0], 2: [0, 0, 2, 0], 3: [1, 0, 0, 3]} + ) + + right = left.astype(float) + expected = left + result = pd.merge(left, right) + tm.assert_frame_equal(expected, result) + + def test_merge_index_as_on_arg(self): + # GH14355 + + left = self.df.set_index("key1") + right = self.df2.set_index("key1") + result = merge(left, right, on="key1") + expected = merge(self.df, self.df2, on="key1").set_index("key1") + tm.assert_frame_equal(result, expected) + + def test_merge_index_singlekey_right_vs_left(self): + left = DataFrame( + {"key": ["a", "b", "c", "d", "e", "e", "a"], "v1": np.random.randn(7)} + ) + right = DataFrame({"v2": np.random.randn(4)}, index=["d", "b", "c", "a"]) + + merged1 = merge( + left, right, left_on="key", right_index=True, how="left", sort=False + ) + merged2 = merge( + right, left, right_on="key", left_index=True, how="right", sort=False + ) + tm.assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) + + merged1 = merge( + left, right, left_on="key", right_index=True, how="left", sort=True + ) + merged2 = merge( + right, left, right_on="key", left_index=True, how="right", sort=True + ) + tm.assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) + + def test_merge_index_singlekey_inner(self): + left = DataFrame( + {"key": ["a", "b", "c", "d", "e", "e", "a"], "v1": np.random.randn(7)} + ) + right = DataFrame({"v2": np.random.randn(4)}, index=["d", "b", "c", "a"]) + + # inner join + result = merge(left, right, left_on="key", right_index=True, how="inner") + expected = left.join(right, on="key").loc[result.index] + tm.assert_frame_equal(result, expected) + + result = merge(right, left, right_on="key", left_index=True, how="inner") + expected = left.join(right, on="key").loc[result.index] + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + def test_merge_misspecified(self): + msg = "Must pass right_on or right_index=True" + with pytest.raises(pd.errors.MergeError, match=msg): + merge(self.left, self.right, left_index=True) + msg = "Must pass left_on or left_index=True" + with pytest.raises(pd.errors.MergeError, match=msg): + merge(self.left, self.right, right_index=True) + + msg = ( + 'Can only pass argument "on" OR "left_on" and "right_on", not' + " a combination of both" + ) + with pytest.raises(pd.errors.MergeError, match=msg): + merge(self.left, self.left, left_on="key", on="key") + + msg = r"len\(right_on\) must equal len\(left_on\)" + with pytest.raises(ValueError, match=msg): + merge(self.df, self.df2, left_on=["key1"], right_on=["key1", "key2"]) + + def test_index_and_on_parameters_confusion(self): + msg = "right_index parameter must be of type bool, not " + with pytest.raises(ValueError, match=msg): + merge( + self.df, + self.df2, + how="left", + left_index=False, + right_index=["key1", "key2"], + ) + msg = "left_index parameter must be of type bool, not " + with pytest.raises(ValueError, match=msg): + merge( + self.df, + self.df2, + how="left", + left_index=["key1", "key2"], + right_index=False, + ) + with pytest.raises(ValueError, match=msg): + merge( + self.df, + self.df2, + how="left", + left_index=["key1", "key2"], + right_index=["key1", "key2"], + ) + + def test_merge_overlap(self): + merged = merge(self.left, self.left, on="key") + exp_len = (self.left["key"].value_counts() ** 2).sum() + assert len(merged) == exp_len + assert "v1_x" in merged + assert "v1_y" in merged + + def test_merge_different_column_key_names(self): + left = DataFrame({"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 4]}) + right = DataFrame({"rkey": ["foo", "bar", "qux", "foo"], "value": [5, 6, 7, 8]}) + + merged = left.merge( + right, left_on="lkey", right_on="rkey", how="outer", sort=True + ) + + exp = pd.Series(["bar", "baz", "foo", "foo", "foo", "foo", np.nan], name="lkey") + tm.assert_series_equal(merged["lkey"], exp) + + exp = pd.Series(["bar", np.nan, "foo", "foo", "foo", "foo", "qux"], name="rkey") + tm.assert_series_equal(merged["rkey"], exp) + + exp = pd.Series([2, 3, 1, 1, 4, 4, np.nan], name="value_x") + tm.assert_series_equal(merged["value_x"], exp) + + exp = pd.Series([6, np.nan, 5, 8, 5, 8, 7], name="value_y") + tm.assert_series_equal(merged["value_y"], exp) + + def test_merge_copy(self): + left = DataFrame({"a": 0, "b": 1}, index=range(10)) + right = DataFrame({"c": "foo", "d": "bar"}, index=range(10)) + + merged = merge(left, right, left_index=True, right_index=True, copy=True) + + merged["a"] = 6 + assert (left["a"] == 0).all() + + merged["d"] = "peekaboo" + assert (right["d"] == "bar").all() + + def test_merge_nocopy(self): + left = DataFrame({"a": 0, "b": 1}, index=range(10)) + right = DataFrame({"c": "foo", "d": "bar"}, index=range(10)) + + merged = merge(left, right, left_index=True, right_index=True, copy=False) + + merged["a"] = 6 + assert (left["a"] == 6).all() + + merged["d"] = "peekaboo" + assert (right["d"] == "peekaboo").all() + + def test_intelligently_handle_join_key(self): + # #733, be a bit more 1337 about not returning unconsolidated DataFrame + + left = DataFrame( + {"key": [1, 1, 2, 2, 3], "value": list(range(5))}, columns=["value", "key"] + ) + right = DataFrame({"key": [1, 1, 2, 3, 4, 5], "rvalue": list(range(6))}) + + joined = merge(left, right, on="key", how="outer") + expected = DataFrame( + { + "key": [1, 1, 1, 1, 2, 2, 3, 4, 5], + "value": np.array([0, 0, 1, 1, 2, 3, 4, np.nan, np.nan]), + "rvalue": [0, 1, 0, 1, 2, 2, 3, 4, 5], + }, + columns=["value", "key", "rvalue"], + ) + tm.assert_frame_equal(joined, expected) + + def test_merge_join_key_dtype_cast(self): + # #8596 + + df1 = DataFrame({"key": [1], "v1": [10]}) + df2 = DataFrame({"key": [2], "v1": [20]}) + df = merge(df1, df2, how="outer") + assert df["key"].dtype == "int64" + + df1 = DataFrame({"key": [True], "v1": [1]}) + df2 = DataFrame({"key": [False], "v1": [0]}) + df = merge(df1, df2, how="outer") + + # GH13169 + # this really should be bool + assert df["key"].dtype == "object" + + df1 = DataFrame({"val": [1]}) + df2 = DataFrame({"val": [2]}) + lkey = np.array([1]) + rkey = np.array([2]) + df = merge(df1, df2, left_on=lkey, right_on=rkey, how="outer") + assert df["key_0"].dtype == "int64" + + def test_handle_join_key_pass_array(self): + left = DataFrame( + {"key": [1, 1, 2, 2, 3], "value": np.arange(5)}, columns=["value", "key"] + ) + right = DataFrame({"rvalue": np.arange(6)}) + key = np.array([1, 1, 2, 3, 4, 5]) + + merged = merge(left, right, left_on="key", right_on=key, how="outer") + merged2 = merge(right, left, left_on=key, right_on="key", how="outer") + + tm.assert_series_equal(merged["key"], merged2["key"]) + assert merged["key"].notna().all() + assert merged2["key"].notna().all() + + left = DataFrame({"value": np.arange(5)}, columns=["value"]) + right = DataFrame({"rvalue": np.arange(6)}) + lkey = np.array([1, 1, 2, 2, 3]) + rkey = np.array([1, 1, 2, 3, 4, 5]) + + merged = merge(left, right, left_on=lkey, right_on=rkey, how="outer") + tm.assert_series_equal( + merged["key_0"], Series([1, 1, 1, 1, 2, 2, 3, 4, 5], name="key_0") + ) + + left = DataFrame({"value": np.arange(3)}) + right = DataFrame({"rvalue": np.arange(6)}) + + key = np.array([0, 1, 1, 2, 2, 3], dtype=np.int64) + merged = merge(left, right, left_index=True, right_on=key, how="outer") + tm.assert_series_equal(merged["key_0"], Series(key, name="key_0")) + + def test_no_overlap_more_informative_error(self): + dt = datetime.now() + df1 = DataFrame({"x": ["a"]}, index=[dt]) + + df2 = DataFrame({"y": ["b", "c"]}, index=[dt, dt]) + + msg = ( + "No common columns to perform merge on. " + "Merge options: left_on={lon}, right_on={ron}, " + "left_index={lidx}, right_index={ridx}".format( + lon=None, ron=None, lidx=False, ridx=False + ) + ) + + with pytest.raises(MergeError, match=msg): + merge(df1, df2) + + def test_merge_non_unique_indexes(self): + + dt = datetime(2012, 5, 1) + dt2 = datetime(2012, 5, 2) + dt3 = datetime(2012, 5, 3) + dt4 = datetime(2012, 5, 4) + + df1 = DataFrame({"x": ["a"]}, index=[dt]) + df2 = DataFrame({"y": ["b", "c"]}, index=[dt, dt]) + _check_merge(df1, df2) + + # Not monotonic + df1 = DataFrame({"x": ["a", "b", "q"]}, index=[dt2, dt, dt4]) + df2 = DataFrame( + {"y": ["c", "d", "e", "f", "g", "h"]}, index=[dt3, dt3, dt2, dt2, dt, dt] + ) + _check_merge(df1, df2) + + df1 = DataFrame({"x": ["a", "b"]}, index=[dt, dt]) + df2 = DataFrame({"y": ["c", "d"]}, index=[dt, dt]) + _check_merge(df1, df2) + + def test_merge_non_unique_index_many_to_many(self): + dt = datetime(2012, 5, 1) + dt2 = datetime(2012, 5, 2) + dt3 = datetime(2012, 5, 3) + df1 = DataFrame({"x": ["a", "b", "c", "d"]}, index=[dt2, dt2, dt, dt]) + df2 = DataFrame( + {"y": ["e", "f", "g", " h", "i"]}, index=[dt2, dt2, dt3, dt, dt] + ) + _check_merge(df1, df2) + + def test_left_merge_empty_dataframe(self): + left = DataFrame({"key": [1], "value": [2]}) + right = DataFrame({"key": []}) + + result = merge(left, right, on="key", how="left") + tm.assert_frame_equal(result, left) + + result = merge(right, left, on="key", how="right") + tm.assert_frame_equal(result, left) + + @pytest.mark.parametrize( + "kwarg", + [ + dict(left_index=True, right_index=True), + dict(left_index=True, right_on="x"), + dict(left_on="a", right_index=True), + dict(left_on="a", right_on="x"), + ], + ) + def test_merge_left_empty_right_empty(self, join_type, kwarg): + # GH 10824 + left = pd.DataFrame(columns=["a", "b", "c"]) + right = pd.DataFrame(columns=["x", "y", "z"]) + + exp_in = pd.DataFrame( + columns=["a", "b", "c", "x", "y", "z"], + index=pd.Index([], dtype=object), + dtype=object, + ) + + result = pd.merge(left, right, how=join_type, **kwarg) + tm.assert_frame_equal(result, exp_in) + + def test_merge_left_empty_right_notempty(self): + # GH 10824 + left = pd.DataFrame(columns=["a", "b", "c"]) + right = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["x", "y", "z"]) + + exp_out = pd.DataFrame( + { + "a": np.array([np.nan] * 3, dtype=object), + "b": np.array([np.nan] * 3, dtype=object), + "c": np.array([np.nan] * 3, dtype=object), + "x": [1, 4, 7], + "y": [2, 5, 8], + "z": [3, 6, 9], + }, + columns=["a", "b", "c", "x", "y", "z"], + ) + exp_in = exp_out[0:0] # make empty DataFrame keeping dtype + # result will have object dtype + exp_in.index = exp_in.index.astype(object) + + def check1(exp, kwarg): + result = pd.merge(left, right, how="inner", **kwarg) + tm.assert_frame_equal(result, exp) + result = pd.merge(left, right, how="left", **kwarg) + tm.assert_frame_equal(result, exp) + + def check2(exp, kwarg): + result = pd.merge(left, right, how="right", **kwarg) + tm.assert_frame_equal(result, exp) + result = pd.merge(left, right, how="outer", **kwarg) + tm.assert_frame_equal(result, exp) + + for kwarg in [ + dict(left_index=True, right_index=True), + dict(left_index=True, right_on="x"), + ]: + check1(exp_in, kwarg) + check2(exp_out, kwarg) + + kwarg = dict(left_on="a", right_index=True) + check1(exp_in, kwarg) + exp_out["a"] = [0, 1, 2] + check2(exp_out, kwarg) + + kwarg = dict(left_on="a", right_on="x") + check1(exp_in, kwarg) + exp_out["a"] = np.array([np.nan] * 3, dtype=object) + check2(exp_out, kwarg) + + def test_merge_left_notempty_right_empty(self): + # GH 10824 + left = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"]) + right = pd.DataFrame(columns=["x", "y", "z"]) + + exp_out = pd.DataFrame( + { + "a": [1, 4, 7], + "b": [2, 5, 8], + "c": [3, 6, 9], + "x": np.array([np.nan] * 3, dtype=object), + "y": np.array([np.nan] * 3, dtype=object), + "z": np.array([np.nan] * 3, dtype=object), + }, + columns=["a", "b", "c", "x", "y", "z"], + ) + exp_in = exp_out[0:0] # make empty DataFrame keeping dtype + # result will have object dtype + exp_in.index = exp_in.index.astype(object) + + def check1(exp, kwarg): + result = pd.merge(left, right, how="inner", **kwarg) + tm.assert_frame_equal(result, exp) + result = pd.merge(left, right, how="right", **kwarg) + tm.assert_frame_equal(result, exp) + + def check2(exp, kwarg): + result = pd.merge(left, right, how="left", **kwarg) + tm.assert_frame_equal(result, exp) + result = pd.merge(left, right, how="outer", **kwarg) + tm.assert_frame_equal(result, exp) + + for kwarg in [ + dict(left_index=True, right_index=True), + dict(left_index=True, right_on="x"), + dict(left_on="a", right_index=True), + dict(left_on="a", right_on="x"), + ]: + check1(exp_in, kwarg) + check2(exp_out, kwarg) + + def test_merge_empty_frame(self, series_of_dtype, series_of_dtype2): + # GH 25183 + df = pd.DataFrame( + {"key": series_of_dtype, "value": series_of_dtype2}, + columns=["key", "value"], + ) + df_empty = df[:0] + expected = pd.DataFrame( + { + "value_x": pd.Series(dtype=df.dtypes["value"]), + "key": pd.Series(dtype=df.dtypes["key"]), + "value_y": pd.Series(dtype=df.dtypes["value"]), + }, + columns=["value_x", "key", "value_y"], + ) + actual = df_empty.merge(df, on="key") + tm.assert_frame_equal(actual, expected) + + def test_merge_all_na_column(self, series_of_dtype, series_of_dtype_all_na): + # GH 25183 + df_left = pd.DataFrame( + {"key": series_of_dtype, "value": series_of_dtype_all_na}, + columns=["key", "value"], + ) + df_right = pd.DataFrame( + {"key": series_of_dtype, "value": series_of_dtype_all_na}, + columns=["key", "value"], + ) + expected = pd.DataFrame( + { + "key": series_of_dtype, + "value_x": series_of_dtype_all_na, + "value_y": series_of_dtype_all_na, + }, + columns=["key", "value_x", "value_y"], + ) + actual = df_left.merge(df_right, on="key") + tm.assert_frame_equal(actual, expected) + + def test_merge_nosort(self): + # GH#2098, TODO: anything to do? + + d = { + "var1": np.random.randint(0, 10, size=10), + "var2": np.random.randint(0, 10, size=10), + "var3": [ + datetime(2012, 1, 12), + datetime(2011, 2, 4), + datetime(2010, 2, 3), + datetime(2012, 1, 12), + datetime(2011, 2, 4), + datetime(2012, 4, 3), + datetime(2012, 3, 4), + datetime(2008, 5, 1), + datetime(2010, 2, 3), + datetime(2012, 2, 3), + ], + } + df = DataFrame.from_dict(d) + var3 = df.var3.unique() + var3.sort() + new = DataFrame.from_dict({"var3": var3, "var8": np.random.random(7)}) + + result = df.merge(new, on="var3", sort=False) + exp = merge(df, new, on="var3", sort=False) + tm.assert_frame_equal(result, exp) + + assert (df.var3.unique() == result.var3.unique()).all() + + def test_merge_nan_right(self): + df1 = DataFrame({"i1": [0, 1], "i2": [0, 1]}) + df2 = DataFrame({"i1": [0], "i3": [0]}) + result = df1.join(df2, on="i1", rsuffix="_") + expected = ( + DataFrame( + { + "i1": {0: 0.0, 1: 1}, + "i2": {0: 0, 1: 1}, + "i1_": {0: 0, 1: np.nan}, + "i3": {0: 0.0, 1: np.nan}, + None: {0: 0, 1: 0}, + } + ) + .set_index(None) + .reset_index()[["i1", "i2", "i1_", "i3"]] + ) + tm.assert_frame_equal(result, expected, check_dtype=False) + + df1 = DataFrame({"i1": [0, 1], "i2": [0.5, 1.5]}) + df2 = DataFrame({"i1": [0], "i3": [0.7]}) + result = df1.join(df2, rsuffix="_", on="i1") + expected = DataFrame( + { + "i1": {0: 0, 1: 1}, + "i1_": {0: 0.0, 1: np.nan}, + "i2": {0: 0.5, 1: 1.5}, + "i3": {0: 0.69999999999999996, 1: np.nan}, + } + )[["i1", "i2", "i1_", "i3"]] + tm.assert_frame_equal(result, expected) + + def test_merge_type(self): + class NotADataFrame(DataFrame): + @property + def _constructor(self): + return NotADataFrame + + nad = NotADataFrame(self.df) + result = nad.merge(self.df2, on="key1") + + assert isinstance(result, NotADataFrame) + + def test_join_append_timedeltas(self): + # timedelta64 issues with join/merge + # GH 5695 + + d = {"d": datetime(2013, 11, 5, 5, 56), "t": timedelta(0, 22500)} + df = DataFrame(columns=list("dt")) + df = df.append(d, ignore_index=True) + result = df.append(d, ignore_index=True) + expected = DataFrame( + { + "d": [datetime(2013, 11, 5, 5, 56), datetime(2013, 11, 5, 5, 56)], + "t": [timedelta(0, 22500), timedelta(0, 22500)], + } + ) + tm.assert_frame_equal(result, expected) + + td = np.timedelta64(300000000) + lhs = DataFrame(Series([td, td], index=["A", "B"])) + rhs = DataFrame(Series([td], index=["A"])) + + result = lhs.join(rhs, rsuffix="r", how="left") + expected = DataFrame( + { + "0": Series([td, td], index=list("AB")), + "0r": Series([td, pd.NaT], index=list("AB")), + } + ) + tm.assert_frame_equal(result, expected) + + def test_other_datetime_unit(self): + # GH 13389 + df1 = pd.DataFrame({"entity_id": [101, 102]}) + s = pd.Series([None, None], index=[101, 102], name="days") + + for dtype in [ + "datetime64[D]", + "datetime64[h]", + "datetime64[m]", + "datetime64[s]", + "datetime64[ms]", + "datetime64[us]", + "datetime64[ns]", + ]: + + df2 = s.astype(dtype).to_frame("days") + # coerces to datetime64[ns], thus should not be affected + assert df2["days"].dtype == "datetime64[ns]" + + result = df1.merge(df2, left_on="entity_id", right_index=True) + + exp = pd.DataFrame( + { + "entity_id": [101, 102], + "days": np.array(["nat", "nat"], dtype="datetime64[ns]"), + }, + columns=["entity_id", "days"], + ) + tm.assert_frame_equal(result, exp) + + @pytest.mark.parametrize("unit", ["D", "h", "m", "s", "ms", "us", "ns"]) + def test_other_timedelta_unit(self, unit): + # GH 13389 + df1 = pd.DataFrame({"entity_id": [101, 102]}) + s = pd.Series([None, None], index=[101, 102], name="days") + + dtype = "m8[{}]".format(unit) + df2 = s.astype(dtype).to_frame("days") + assert df2["days"].dtype == "m8[ns]" + + result = df1.merge(df2, left_on="entity_id", right_index=True) + + exp = pd.DataFrame( + {"entity_id": [101, 102], "days": np.array(["nat", "nat"], dtype=dtype)}, + columns=["entity_id", "days"], + ) + tm.assert_frame_equal(result, exp) + + def test_overlapping_columns_error_message(self): + df = DataFrame({"key": [1, 2, 3], "v1": [4, 5, 6], "v2": [7, 8, 9]}) + df2 = DataFrame({"key": [1, 2, 3], "v1": [4, 5, 6], "v2": [7, 8, 9]}) + + df.columns = ["key", "foo", "foo"] + df2.columns = ["key", "bar", "bar"] + expected = DataFrame( + { + "key": [1, 2, 3], + "v1": [4, 5, 6], + "v2": [7, 8, 9], + "v3": [4, 5, 6], + "v4": [7, 8, 9], + } + ) + expected.columns = ["key", "foo", "foo", "bar", "bar"] + tm.assert_frame_equal(merge(df, df2), expected) + + # #2649, #10639 + df2.columns = ["key1", "foo", "foo"] + msg = r"Data columns not unique: Index\(\['foo', 'foo'\], dtype='object'\)" + with pytest.raises(MergeError, match=msg): + merge(df, df2) + + def test_merge_on_datetime64tz(self): + + # GH11405 + left = pd.DataFrame( + { + "key": pd.date_range("20151010", periods=2, tz="US/Eastern"), + "value": [1, 2], + } + ) + right = pd.DataFrame( + { + "key": pd.date_range("20151011", periods=3, tz="US/Eastern"), + "value": [1, 2, 3], + } + ) + + expected = DataFrame( + { + "key": pd.date_range("20151010", periods=4, tz="US/Eastern"), + "value_x": [1, 2, np.nan, np.nan], + "value_y": [np.nan, 1, 2, 3], + } + ) + result = pd.merge(left, right, on="key", how="outer") + tm.assert_frame_equal(result, expected) + + left = pd.DataFrame( + { + "key": [1, 2], + "value": pd.date_range("20151010", periods=2, tz="US/Eastern"), + } + ) + right = pd.DataFrame( + { + "key": [2, 3], + "value": pd.date_range("20151011", periods=2, tz="US/Eastern"), + } + ) + expected = DataFrame( + { + "key": [1, 2, 3], + "value_x": list(pd.date_range("20151010", periods=2, tz="US/Eastern")) + + [pd.NaT], + "value_y": [pd.NaT] + + list(pd.date_range("20151011", periods=2, tz="US/Eastern")), + } + ) + result = pd.merge(left, right, on="key", how="outer") + tm.assert_frame_equal(result, expected) + assert result["value_x"].dtype == "datetime64[ns, US/Eastern]" + assert result["value_y"].dtype == "datetime64[ns, US/Eastern]" + + def test_merge_on_datetime64tz_empty(self): + # https://github.com/pandas-dev/pandas/issues/25014 + dtz = pd.DatetimeTZDtype(tz="UTC") + right = pd.DataFrame( + { + "date": [pd.Timestamp("2018", tz=dtz.tz)], + "value": [4.0], + "date2": [pd.Timestamp("2019", tz=dtz.tz)], + }, + columns=["date", "value", "date2"], + ) + left = right[:0] + result = left.merge(right, on="date") + expected = pd.DataFrame( + { + "value_x": pd.Series(dtype=float), + "date2_x": pd.Series(dtype=dtz), + "date": pd.Series(dtype=dtz), + "value_y": pd.Series(dtype=float), + "date2_y": pd.Series(dtype=dtz), + }, + columns=["value_x", "date2_x", "date", "value_y", "date2_y"], + ) + tm.assert_frame_equal(result, expected) + + def test_merge_datetime64tz_with_dst_transition(self): + # GH 18885 + df1 = pd.DataFrame( + pd.date_range("2017-10-29 01:00", periods=4, freq="H", tz="Europe/Madrid"), + columns=["date"], + ) + df1["value"] = 1 + df2 = pd.DataFrame( + { + "date": pd.to_datetime( + [ + "2017-10-29 03:00:00", + "2017-10-29 04:00:00", + "2017-10-29 05:00:00", + ] + ), + "value": 2, + } + ) + df2["date"] = df2["date"].dt.tz_localize("UTC").dt.tz_convert("Europe/Madrid") + result = pd.merge(df1, df2, how="outer", on="date") + expected = pd.DataFrame( + { + "date": pd.date_range( + "2017-10-29 01:00", periods=7, freq="H", tz="Europe/Madrid" + ), + "value_x": [1] * 4 + [np.nan] * 3, + "value_y": [np.nan] * 4 + [2] * 3, + } + ) + tm.assert_frame_equal(result, expected) + + def test_merge_non_unique_period_index(self): + # GH #16871 + index = pd.period_range("2016-01-01", periods=16, freq="M") + df = DataFrame(list(range(len(index))), index=index, columns=["pnum"]) + df2 = concat([df, df]) + result = df.merge(df2, left_index=True, right_index=True, how="inner") + expected = DataFrame( + np.tile(np.arange(16, dtype=np.int64).repeat(2).reshape(-1, 1), 2), + columns=["pnum_x", "pnum_y"], + index=df2.sort_index().index, + ) + tm.assert_frame_equal(result, expected) + + def test_merge_on_periods(self): + left = pd.DataFrame( + {"key": pd.period_range("20151010", periods=2, freq="D"), "value": [1, 2]} + ) + right = pd.DataFrame( + { + "key": pd.period_range("20151011", periods=3, freq="D"), + "value": [1, 2, 3], + } + ) + + expected = DataFrame( + { + "key": pd.period_range("20151010", periods=4, freq="D"), + "value_x": [1, 2, np.nan, np.nan], + "value_y": [np.nan, 1, 2, 3], + } + ) + result = pd.merge(left, right, on="key", how="outer") + tm.assert_frame_equal(result, expected) + + left = pd.DataFrame( + {"key": [1, 2], "value": pd.period_range("20151010", periods=2, freq="D")} + ) + right = pd.DataFrame( + {"key": [2, 3], "value": pd.period_range("20151011", periods=2, freq="D")} + ) + + exp_x = pd.period_range("20151010", periods=2, freq="D") + exp_y = pd.period_range("20151011", periods=2, freq="D") + expected = DataFrame( + { + "key": [1, 2, 3], + "value_x": list(exp_x) + [pd.NaT], + "value_y": [pd.NaT] + list(exp_y), + } + ) + result = pd.merge(left, right, on="key", how="outer") + tm.assert_frame_equal(result, expected) + assert result["value_x"].dtype == "Period[D]" + assert result["value_y"].dtype == "Period[D]" + + def test_indicator(self): + # PR #10054. xref #7412 and closes #8790. + df1 = DataFrame( + {"col1": [0, 1], "col_conflict": [1, 2], "col_left": ["a", "b"]} + ) + df1_copy = df1.copy() + + df2 = DataFrame( + { + "col1": [1, 2, 3, 4, 5], + "col_conflict": [1, 2, 3, 4, 5], + "col_right": [2, 2, 2, 2, 2], + } + ) + df2_copy = df2.copy() + + df_result = DataFrame( + { + "col1": [0, 1, 2, 3, 4, 5], + "col_conflict_x": [1, 2, np.nan, np.nan, np.nan, np.nan], + "col_left": ["a", "b", np.nan, np.nan, np.nan, np.nan], + "col_conflict_y": [np.nan, 1, 2, 3, 4, 5], + "col_right": [np.nan, 2, 2, 2, 2, 2], + } + ) + df_result["_merge"] = Categorical( + [ + "left_only", + "both", + "right_only", + "right_only", + "right_only", + "right_only", + ], + categories=["left_only", "right_only", "both"], + ) + + df_result = df_result[ + [ + "col1", + "col_conflict_x", + "col_left", + "col_conflict_y", + "col_right", + "_merge", + ] + ] + + test = merge(df1, df2, on="col1", how="outer", indicator=True) + tm.assert_frame_equal(test, df_result) + test = df1.merge(df2, on="col1", how="outer", indicator=True) + tm.assert_frame_equal(test, df_result) + + # No side effects + tm.assert_frame_equal(df1, df1_copy) + tm.assert_frame_equal(df2, df2_copy) + + # Check with custom name + df_result_custom_name = df_result + df_result_custom_name = df_result_custom_name.rename( + columns={"_merge": "custom_name"} + ) + + test_custom_name = merge( + df1, df2, on="col1", how="outer", indicator="custom_name" + ) + tm.assert_frame_equal(test_custom_name, df_result_custom_name) + test_custom_name = df1.merge( + df2, on="col1", how="outer", indicator="custom_name" + ) + tm.assert_frame_equal(test_custom_name, df_result_custom_name) + + # Check only accepts strings and booleans + msg = "indicator option can only accept boolean or string arguments" + with pytest.raises(ValueError, match=msg): + merge(df1, df2, on="col1", how="outer", indicator=5) + with pytest.raises(ValueError, match=msg): + df1.merge(df2, on="col1", how="outer", indicator=5) + + # Check result integrity + + test2 = merge(df1, df2, on="col1", how="left", indicator=True) + assert (test2._merge != "right_only").all() + test2 = df1.merge(df2, on="col1", how="left", indicator=True) + assert (test2._merge != "right_only").all() + + test3 = merge(df1, df2, on="col1", how="right", indicator=True) + assert (test3._merge != "left_only").all() + test3 = df1.merge(df2, on="col1", how="right", indicator=True) + assert (test3._merge != "left_only").all() + + test4 = merge(df1, df2, on="col1", how="inner", indicator=True) + assert (test4._merge == "both").all() + test4 = df1.merge(df2, on="col1", how="inner", indicator=True) + assert (test4._merge == "both").all() + + # Check if working name in df + for i in ["_right_indicator", "_left_indicator", "_merge"]: + df_badcolumn = DataFrame({"col1": [1, 2], i: [2, 2]}) + + msg = ( + "Cannot use `indicator=True` option when data contains a" + " column named {}|" + "Cannot use name of an existing column for indicator" + " column" + ).format(i) + with pytest.raises(ValueError, match=msg): + merge(df1, df_badcolumn, on="col1", how="outer", indicator=True) + with pytest.raises(ValueError, match=msg): + df1.merge(df_badcolumn, on="col1", how="outer", indicator=True) + + # Check for name conflict with custom name + df_badcolumn = DataFrame({"col1": [1, 2], "custom_column_name": [2, 2]}) + + msg = "Cannot use name of an existing column for indicator column" + with pytest.raises(ValueError, match=msg): + merge( + df1, + df_badcolumn, + on="col1", + how="outer", + indicator="custom_column_name", + ) + with pytest.raises(ValueError, match=msg): + df1.merge( + df_badcolumn, on="col1", how="outer", indicator="custom_column_name" + ) + + # Merge on multiple columns + df3 = DataFrame({"col1": [0, 1], "col2": ["a", "b"]}) + + df4 = DataFrame({"col1": [1, 1, 3], "col2": ["b", "x", "y"]}) + + hand_coded_result = DataFrame( + {"col1": [0, 1, 1, 3], "col2": ["a", "b", "x", "y"]} + ) + hand_coded_result["_merge"] = Categorical( + ["left_only", "both", "right_only", "right_only"], + categories=["left_only", "right_only", "both"], + ) + + test5 = merge(df3, df4, on=["col1", "col2"], how="outer", indicator=True) + tm.assert_frame_equal(test5, hand_coded_result) + test5 = df3.merge(df4, on=["col1", "col2"], how="outer", indicator=True) + tm.assert_frame_equal(test5, hand_coded_result) + + def test_validation(self): + left = DataFrame( + {"a": ["a", "b", "c", "d"], "b": ["cat", "dog", "weasel", "horse"]}, + index=range(4), + ) + + right = DataFrame( + { + "a": ["a", "b", "c", "d", "e"], + "c": ["meow", "bark", "um... weasel noise?", "nay", "chirp"], + }, + index=range(5), + ) + + # Make sure no side effects. + left_copy = left.copy() + right_copy = right.copy() + + result = merge(left, right, left_index=True, right_index=True, validate="1:1") + tm.assert_frame_equal(left, left_copy) + tm.assert_frame_equal(right, right_copy) + + # make sure merge still correct + expected = DataFrame( + { + "a_x": ["a", "b", "c", "d"], + "b": ["cat", "dog", "weasel", "horse"], + "a_y": ["a", "b", "c", "d"], + "c": ["meow", "bark", "um... weasel noise?", "nay"], + }, + index=range(4), + columns=["a_x", "b", "a_y", "c"], + ) + + result = merge( + left, right, left_index=True, right_index=True, validate="one_to_one" + ) + tm.assert_frame_equal(result, expected) + + expected_2 = DataFrame( + { + "a": ["a", "b", "c", "d"], + "b": ["cat", "dog", "weasel", "horse"], + "c": ["meow", "bark", "um... weasel noise?", "nay"], + }, + index=range(4), + ) + + result = merge(left, right, on="a", validate="1:1") + tm.assert_frame_equal(left, left_copy) + tm.assert_frame_equal(right, right_copy) + tm.assert_frame_equal(result, expected_2) + + result = merge(left, right, on="a", validate="one_to_one") + tm.assert_frame_equal(result, expected_2) + + # One index, one column + expected_3 = DataFrame( + { + "b": ["cat", "dog", "weasel", "horse"], + "a": ["a", "b", "c", "d"], + "c": ["meow", "bark", "um... weasel noise?", "nay"], + }, + columns=["b", "a", "c"], + index=range(4), + ) + + left_index_reset = left.set_index("a") + result = merge( + left_index_reset, + right, + left_index=True, + right_on="a", + validate="one_to_one", + ) + tm.assert_frame_equal(result, expected_3) + + # Dups on right + right_w_dups = right.append(pd.DataFrame({"a": ["e"], "c": ["moo"]}, index=[4])) + merge( + left, + right_w_dups, + left_index=True, + right_index=True, + validate="one_to_many", + ) + + msg = "Merge keys are not unique in right dataset; not a one-to-one merge" + with pytest.raises(MergeError, match=msg): + merge( + left, + right_w_dups, + left_index=True, + right_index=True, + validate="one_to_one", + ) + + with pytest.raises(MergeError, match=msg): + merge(left, right_w_dups, on="a", validate="one_to_one") + + # Dups on left + left_w_dups = left.append( + pd.DataFrame({"a": ["a"], "c": ["cow"]}, index=[3]), sort=True + ) + merge( + left_w_dups, + right, + left_index=True, + right_index=True, + validate="many_to_one", + ) + + msg = "Merge keys are not unique in left dataset; not a one-to-one merge" + with pytest.raises(MergeError, match=msg): + merge( + left_w_dups, + right, + left_index=True, + right_index=True, + validate="one_to_one", + ) + + with pytest.raises(MergeError, match=msg): + merge(left_w_dups, right, on="a", validate="one_to_one") + + # Dups on both + merge(left_w_dups, right_w_dups, on="a", validate="many_to_many") + + msg = "Merge keys are not unique in right dataset; not a many-to-one merge" + with pytest.raises(MergeError, match=msg): + merge( + left_w_dups, + right_w_dups, + left_index=True, + right_index=True, + validate="many_to_one", + ) + + msg = "Merge keys are not unique in left dataset; not a one-to-many merge" + with pytest.raises(MergeError, match=msg): + merge(left_w_dups, right_w_dups, on="a", validate="one_to_many") + + # Check invalid arguments + msg = "Not a valid argument for validate" + with pytest.raises(ValueError, match=msg): + merge(left, right, on="a", validate="jibberish") + + # Two column merge, dups in both, but jointly no dups. + left = DataFrame( + { + "a": ["a", "a", "b", "b"], + "b": [0, 1, 0, 1], + "c": ["cat", "dog", "weasel", "horse"], + }, + index=range(4), + ) + + right = DataFrame( + { + "a": ["a", "a", "b"], + "b": [0, 1, 0], + "d": ["meow", "bark", "um... weasel noise?"], + }, + index=range(3), + ) + + expected_multi = DataFrame( + { + "a": ["a", "a", "b"], + "b": [0, 1, 0], + "c": ["cat", "dog", "weasel"], + "d": ["meow", "bark", "um... weasel noise?"], + }, + index=range(3), + ) + + msg = ( + "Merge keys are not unique in either left or right dataset;" + " not a one-to-one merge" + ) + with pytest.raises(MergeError, match=msg): + merge(left, right, on="a", validate="1:1") + + result = merge(left, right, on=["a", "b"], validate="1:1") + tm.assert_frame_equal(result, expected_multi) + + def test_merge_two_empty_df_no_division_error(self): + # GH17776, PR #17846 + a = pd.DataFrame({"a": [], "b": [], "c": []}) + with np.errstate(divide="raise"): + merge(a, a, on=("a", "b")) + + @pytest.mark.parametrize("how", ["right", "outer"]) + @pytest.mark.parametrize( + "index,expected_index", + [ + ( + CategoricalIndex([1, 2, 4]), + CategoricalIndex([1, 2, 4, None, None, None]), + ), + ( + DatetimeIndex(["2001-01-01", "2002-02-02", "2003-03-03"]), + DatetimeIndex( + ["2001-01-01", "2002-02-02", "2003-03-03", pd.NaT, pd.NaT, pd.NaT] + ), + ), + (Float64Index([1, 2, 3]), Float64Index([1, 2, 3, None, None, None])), + (Int64Index([1, 2, 3]), Float64Index([1, 2, 3, None, None, None])), + ( + IntervalIndex.from_tuples([(1, 2), (2, 3), (3, 4)]), + IntervalIndex.from_tuples( + [(1, 2), (2, 3), (3, 4), np.nan, np.nan, np.nan] + ), + ), + ( + PeriodIndex(["2001-01-01", "2001-01-02", "2001-01-03"], freq="D"), + PeriodIndex( + ["2001-01-01", "2001-01-02", "2001-01-03", pd.NaT, pd.NaT, pd.NaT], + freq="D", + ), + ), + ( + TimedeltaIndex(["1d", "2d", "3d"]), + TimedeltaIndex(["1d", "2d", "3d", pd.NaT, pd.NaT, pd.NaT]), + ), + ], + ) + def test_merge_on_index_with_more_values(self, how, index, expected_index): + # GH 24212 + # pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that + # -1 is interpreted as a missing value instead of the last element + df1 = pd.DataFrame({"a": [1, 2, 3], "key": [0, 2, 2]}, index=index) + df2 = pd.DataFrame({"b": [1, 2, 3, 4, 5]}) + result = df1.merge(df2, left_on="key", right_index=True, how=how) + expected = pd.DataFrame( + [ + [1.0, 0, 1], + [2.0, 2, 3], + [3.0, 2, 3], + [np.nan, 1, 2], + [np.nan, 3, 4], + [np.nan, 4, 5], + ], + columns=["a", "key", "b"], + ) + expected.set_index(expected_index, inplace=True) + tm.assert_frame_equal(result, expected) + + def test_merge_right_index_right(self): + # Note: the expected output here is probably incorrect. + # See https://github.com/pandas-dev/pandas/issues/17257 for more. + # We include this as a regression test for GH-24897. + left = pd.DataFrame({"a": [1, 2, 3], "key": [0, 1, 1]}) + right = pd.DataFrame({"b": [1, 2, 3]}) + + expected = pd.DataFrame( + {"a": [1, 2, 3, None], "key": [0, 1, 1, 2], "b": [1, 2, 2, 3]}, + columns=["a", "key", "b"], + index=[0, 1, 2, np.nan], + ) + result = left.merge(right, left_on="key", right_index=True, how="right") + tm.assert_frame_equal(result, expected) + + def test_merge_take_missing_values_from_index_of_other_dtype(self): + # GH 24212 + left = pd.DataFrame( + { + "a": [1, 2, 3], + "key": pd.Categorical(["a", "a", "b"], categories=list("abc")), + } + ) + right = pd.DataFrame( + {"b": [1, 2, 3]}, index=pd.CategoricalIndex(["a", "b", "c"]) + ) + result = left.merge(right, left_on="key", right_index=True, how="right") + expected = pd.DataFrame( + { + "a": [1, 2, 3, None], + "key": pd.Categorical(["a", "a", "b", "c"]), + "b": [1, 1, 2, 3], + }, + index=[0, 1, 2, np.nan], + ) + expected = expected.reindex(columns=["a", "key", "b"]) + tm.assert_frame_equal(result, expected) + + def test_merge_readonly(self): + # https://github.com/pandas-dev/pandas/issues/27943 + data1 = pd.DataFrame( + np.arange(20).reshape((4, 5)) + 1, columns=["a", "b", "c", "d", "e"] + ) + data2 = pd.DataFrame( + np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"] + ) + + data1._data.blocks[0].values.flags.writeable = False + data1.merge(data2) # no error + + +def _check_merge(x, y): + for how in ["inner", "left", "outer"]: + result = x.join(y, how=how) + + expected = merge(x.reset_index(), y.reset_index(), how=how, sort=True) + expected = expected.set_index("index") + + # TODO check_names on merge? + tm.assert_frame_equal(result, expected, check_names=False) + + +class TestMergeDtypes: + @pytest.mark.parametrize( + "right_vals", [["foo", "bar"], Series(["foo", "bar"]).astype("category")] + ) + def test_different(self, right_vals): + + left = DataFrame( + { + "A": ["foo", "bar"], + "B": Series(["foo", "bar"]).astype("category"), + "C": [1, 2], + "D": [1.0, 2.0], + "E": Series([1, 2], dtype="uint64"), + "F": Series([1, 2], dtype="int32"), + } + ) + right = DataFrame({"A": right_vals}) + + # GH 9780 + # We allow merging on object and categorical cols and cast + # categorical cols to object + result = pd.merge(left, right, on="A") + assert is_object_dtype(result.A.dtype) + + @pytest.mark.parametrize("d1", [np.int64, np.int32, np.int16, np.int8, np.uint8]) + @pytest.mark.parametrize("d2", [np.int64, np.float64, np.float32, np.float16]) + def test_join_multi_dtypes(self, d1, d2): + + dtype1 = np.dtype(d1) + dtype2 = np.dtype(d2) + + left = DataFrame( + { + "k1": np.array([0, 1, 2] * 8, dtype=dtype1), + "k2": ["foo", "bar"] * 12, + "v": np.array(np.arange(24), dtype=np.int64), + } + ) + + index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")]) + right = DataFrame({"v2": np.array([5, 7], dtype=dtype2)}, index=index) + + result = left.join(right, on=["k1", "k2"]) + + expected = left.copy() + + if dtype2.kind == "i": + dtype2 = np.dtype("float64") + expected["v2"] = np.array(np.nan, dtype=dtype2) + expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7 + + tm.assert_frame_equal(result, expected) + + result = left.join(right, on=["k1", "k2"], sort=True) + expected.sort_values(["k1", "k2"], kind="mergesort", inplace=True) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "int_vals, float_vals, exp_vals", + [ + ([1, 2, 3], [1.0, 2.0, 3.0], {"X": [1, 2, 3], "Y": [1.0, 2.0, 3.0]}), + ([1, 2, 3], [1.0, 3.0], {"X": [1, 3], "Y": [1.0, 3.0]}), + ([1, 2], [1.0, 2.0, 3.0], {"X": [1, 2], "Y": [1.0, 2.0]}), + ], + ) + def test_merge_on_ints_floats(self, int_vals, float_vals, exp_vals): + # GH 16572 + # Check that float column is not cast to object if + # merging on float and int columns + A = DataFrame({"X": int_vals}) + B = DataFrame({"Y": float_vals}) + expected = DataFrame(exp_vals) + + result = A.merge(B, left_on="X", right_on="Y") + tm.assert_frame_equal(result, expected) + + result = B.merge(A, left_on="Y", right_on="X") + tm.assert_frame_equal(result, expected[["Y", "X"]]) + + def test_merge_key_dtype_cast(self): + # GH 17044 + df1 = DataFrame({"key": [1.0, 2.0], "v1": [10, 20]}, columns=["key", "v1"]) + df2 = DataFrame({"key": [2], "v2": [200]}, columns=["key", "v2"]) + result = df1.merge(df2, on="key", how="left") + expected = DataFrame( + {"key": [1.0, 2.0], "v1": [10, 20], "v2": [np.nan, 200.0]}, + columns=["key", "v1", "v2"], + ) + tm.assert_frame_equal(result, expected) + + def test_merge_on_ints_floats_warning(self): + # GH 16572 + # merge will produce a warning when merging on int and + # float columns where the float values are not exactly + # equal to their int representation + A = DataFrame({"X": [1, 2, 3]}) + B = DataFrame({"Y": [1.1, 2.5, 3.0]}) + expected = DataFrame({"X": [3], "Y": [3.0]}) + + with tm.assert_produces_warning(UserWarning): + result = A.merge(B, left_on="X", right_on="Y") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(UserWarning): + result = B.merge(A, left_on="Y", right_on="X") + tm.assert_frame_equal(result, expected[["Y", "X"]]) + + # test no warning if float has NaNs + B = DataFrame({"Y": [np.nan, np.nan, 3.0]}) + + with tm.assert_produces_warning(None): + result = B.merge(A, left_on="Y", right_on="X") + tm.assert_frame_equal(result, expected[["Y", "X"]]) + + def test_merge_incompat_infer_boolean_object(self): + # GH21119: bool + object bool merge OK + df1 = DataFrame({"key": Series([True, False], dtype=object)}) + df2 = DataFrame({"key": [True, False]}) + + expected = DataFrame({"key": [True, False]}, dtype=object) + result = pd.merge(df1, df2, on="key") + tm.assert_frame_equal(result, expected) + result = pd.merge(df2, df1, on="key") + tm.assert_frame_equal(result, expected) + + # with missing value + df1 = DataFrame({"key": Series([True, False, np.nan], dtype=object)}) + df2 = DataFrame({"key": [True, False]}) + + expected = DataFrame({"key": [True, False]}, dtype=object) + result = pd.merge(df1, df2, on="key") + tm.assert_frame_equal(result, expected) + result = pd.merge(df2, df1, on="key") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "df1_vals, df2_vals", + [ + # merge on category coerces to object + ([0, 1, 2], Series(["a", "b", "a"]).astype("category")), + ([0.0, 1.0, 2.0], Series(["a", "b", "a"]).astype("category")), + # no not infer + ([0, 1], pd.Series([False, True], dtype=object)), + ([0, 1], pd.Series([False, True], dtype=bool)), + ], + ) + def test_merge_incompat_dtypes_are_ok(self, df1_vals, df2_vals): + # these are explicitly allowed incompat merges, that pass thru + # the result type is dependent on if the values on the rhs are + # inferred, otherwise these will be coerced to object + + df1 = DataFrame({"A": df1_vals}) + df2 = DataFrame({"A": df2_vals}) + + result = pd.merge(df1, df2, on=["A"]) + assert is_object_dtype(result.A.dtype) + result = pd.merge(df2, df1, on=["A"]) + assert is_object_dtype(result.A.dtype) + + @pytest.mark.parametrize( + "df1_vals, df2_vals", + [ + # do not infer to numeric + (Series([1, 2], dtype="uint64"), ["a", "b", "c"]), + (Series([1, 2], dtype="int32"), ["a", "b", "c"]), + ([0, 1, 2], ["0", "1", "2"]), + ([0.0, 1.0, 2.0], ["0", "1", "2"]), + ([0, 1, 2], ["0", "1", "2"]), + ( + pd.date_range("1/1/2011", periods=2, freq="D"), + ["2011-01-01", "2011-01-02"], + ), + (pd.date_range("1/1/2011", periods=2, freq="D"), [0, 1]), + (pd.date_range("1/1/2011", periods=2, freq="D"), [0.0, 1.0]), + ( + pd.date_range("20130101", periods=3), + pd.date_range("20130101", periods=3, tz="US/Eastern"), + ), + ], + ) + def test_merge_incompat_dtypes_error(self, df1_vals, df2_vals): + # GH 9780, GH 15800 + # Raise a ValueError when a user tries to merge on + # dtypes that are incompatible (e.g., obj and int/float) + + df1 = DataFrame({"A": df1_vals}) + df2 = DataFrame({"A": df2_vals}) + + msg = ( + "You are trying to merge on {lk_dtype} and " + "{rk_dtype} columns. If you wish to proceed " + "you should use pd.concat".format( + lk_dtype=df1["A"].dtype, rk_dtype=df2["A"].dtype + ) + ) + msg = re.escape(msg) + with pytest.raises(ValueError, match=msg): + pd.merge(df1, df2, on=["A"]) + + # Check that error still raised when swapping order of dataframes + msg = ( + "You are trying to merge on {lk_dtype} and " + "{rk_dtype} columns. If you wish to proceed " + "you should use pd.concat".format( + lk_dtype=df2["A"].dtype, rk_dtype=df1["A"].dtype + ) + ) + msg = re.escape(msg) + with pytest.raises(ValueError, match=msg): + pd.merge(df2, df1, on=["A"]) + + +@pytest.fixture +def left(): + np.random.seed(1234) + return DataFrame( + { + "X": Series(np.random.choice(["foo", "bar"], size=(10,))).astype( + CDT(["foo", "bar"]) + ), + "Y": np.random.choice(["one", "two", "three"], size=(10,)), + } + ) + + +@pytest.fixture +def right(): + np.random.seed(1234) + return DataFrame( + {"X": Series(["foo", "bar"]).astype(CDT(["foo", "bar"])), "Z": [1, 2]} + ) + + +class TestMergeCategorical: + def test_identical(self, left): + # merging on the same, should preserve dtypes + merged = pd.merge(left, left, on="X") + result = merged.dtypes.sort_index() + expected = Series( + [CategoricalDtype(), np.dtype("O"), np.dtype("O")], + index=["X", "Y_x", "Y_y"], + ) + tm.assert_series_equal(result, expected) + + def test_basic(self, left, right): + # we have matching Categorical dtypes in X + # so should preserve the merged column + merged = pd.merge(left, right, on="X") + result = merged.dtypes.sort_index() + expected = Series( + [CategoricalDtype(), np.dtype("O"), np.dtype("int64")], + index=["X", "Y", "Z"], + ) + tm.assert_series_equal(result, expected) + + def test_merge_categorical(self): + # GH 9426 + + right = DataFrame( + { + "c": {0: "a", 1: "b", 2: "c", 3: "d", 4: "e"}, + "d": {0: "null", 1: "null", 2: "null", 3: "null", 4: "null"}, + } + ) + left = DataFrame( + { + "a": {0: "f", 1: "f", 2: "f", 3: "f", 4: "f"}, + "b": {0: "g", 1: "g", 2: "g", 3: "g", 4: "g"}, + } + ) + df = pd.merge(left, right, how="left", left_on="b", right_on="c") + + # object-object + expected = df.copy() + + # object-cat + # note that we propagate the category + # because we don't have any matching rows + cright = right.copy() + cright["d"] = cright["d"].astype("category") + result = pd.merge(left, cright, how="left", left_on="b", right_on="c") + expected["d"] = expected["d"].astype(CategoricalDtype(["null"])) + tm.assert_frame_equal(result, expected) + + # cat-object + cleft = left.copy() + cleft["b"] = cleft["b"].astype("category") + result = pd.merge(cleft, cright, how="left", left_on="b", right_on="c") + tm.assert_frame_equal(result, expected) + + # cat-cat + cright = right.copy() + cright["d"] = cright["d"].astype("category") + cleft = left.copy() + cleft["b"] = cleft["b"].astype("category") + result = pd.merge(cleft, cright, how="left", left_on="b", right_on="c") + tm.assert_frame_equal(result, expected) + + def tests_merge_categorical_unordered_equal(self): + # GH-19551 + df1 = DataFrame( + { + "Foo": Categorical(["A", "B", "C"], categories=["A", "B", "C"]), + "Left": ["A0", "B0", "C0"], + } + ) + + df2 = DataFrame( + { + "Foo": Categorical(["C", "B", "A"], categories=["C", "B", "A"]), + "Right": ["C1", "B1", "A1"], + } + ) + result = pd.merge(df1, df2, on=["Foo"]) + expected = DataFrame( + { + "Foo": pd.Categorical(["A", "B", "C"]), + "Left": ["A0", "B0", "C0"], + "Right": ["A1", "B1", "C1"], + } + ) + tm.assert_frame_equal(result, expected) + + def test_other_columns(self, left, right): + # non-merge columns should preserve if possible + right = right.assign(Z=right.Z.astype("category")) + + merged = pd.merge(left, right, on="X") + result = merged.dtypes.sort_index() + expected = Series( + [CategoricalDtype(), np.dtype("O"), CategoricalDtype()], + index=["X", "Y", "Z"], + ) + tm.assert_series_equal(result, expected) + + # categories are preserved + assert left.X.values.is_dtype_equal(merged.X.values) + assert right.Z.values.is_dtype_equal(merged.Z.values) + + @pytest.mark.parametrize( + "change", + [ + lambda x: x, + lambda x: x.astype(CDT(["foo", "bar", "bah"])), + lambda x: x.astype(CDT(ordered=True)), + ], + ) + def test_dtype_on_merged_different(self, change, join_type, left, right): + # our merging columns, X now has 2 different dtypes + # so we must be object as a result + + X = change(right.X.astype("object")) + right = right.assign(X=X) + assert is_categorical_dtype(left.X.values) + # assert not left.X.values.is_dtype_equal(right.X.values) + + merged = pd.merge(left, right, on="X", how=join_type) + + result = merged.dtypes.sort_index() + expected = Series( + [np.dtype("O"), np.dtype("O"), np.dtype("int64")], index=["X", "Y", "Z"] + ) + tm.assert_series_equal(result, expected) + + def test_self_join_multiple_categories(self): + # GH 16767 + # non-duplicates should work with multiple categories + m = 5 + df = pd.DataFrame( + { + "a": ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] * m, + "b": ["t", "w", "x", "y", "z"] * 2 * m, + "c": [ + letter + for each in ["m", "n", "u", "p", "o"] + for letter in [each] * 2 * m + ], + "d": [ + letter + for each in [ + "aa", + "bb", + "cc", + "dd", + "ee", + "ff", + "gg", + "hh", + "ii", + "jj", + ] + for letter in [each] * m + ], + } + ) + + # change them all to categorical variables + df = df.apply(lambda x: x.astype("category")) + + # self-join should equal ourselves + result = pd.merge(df, df, on=list(df.columns)) + + tm.assert_frame_equal(result, df) + + def test_dtype_on_categorical_dates(self): + # GH 16900 + # dates should not be coerced to ints + + df = pd.DataFrame( + [[date(2001, 1, 1), 1.1], [date(2001, 1, 2), 1.3]], columns=["date", "num2"] + ) + df["date"] = df["date"].astype("category") + + df2 = pd.DataFrame( + [[date(2001, 1, 1), 1.3], [date(2001, 1, 3), 1.4]], columns=["date", "num4"] + ) + df2["date"] = df2["date"].astype("category") + + expected_outer = pd.DataFrame( + [ + [pd.Timestamp("2001-01-01"), 1.1, 1.3], + [pd.Timestamp("2001-01-02"), 1.3, np.nan], + [pd.Timestamp("2001-01-03"), np.nan, 1.4], + ], + columns=["date", "num2", "num4"], + ) + result_outer = pd.merge(df, df2, how="outer", on=["date"]) + tm.assert_frame_equal(result_outer, expected_outer) + + expected_inner = pd.DataFrame( + [[pd.Timestamp("2001-01-01"), 1.1, 1.3]], columns=["date", "num2", "num4"] + ) + result_inner = pd.merge(df, df2, how="inner", on=["date"]) + tm.assert_frame_equal(result_inner, expected_inner) + + @pytest.mark.parametrize("ordered", [True, False]) + @pytest.mark.parametrize( + "category_column,categories,expected_categories", + [ + ([False, True, True, False], [True, False], [True, False]), + ([2, 1, 1, 2], [1, 2], [1, 2]), + (["False", "True", "True", "False"], ["True", "False"], ["True", "False"]), + ], + ) + def test_merging_with_bool_or_int_cateorical_column( + self, category_column, categories, expected_categories, ordered + ): + # GH 17187 + # merging with a boolean/int categorical column + df1 = pd.DataFrame({"id": [1, 2, 3, 4], "cat": category_column}) + df1["cat"] = df1["cat"].astype(CDT(categories, ordered=ordered)) + df2 = pd.DataFrame({"id": [2, 4], "num": [1, 9]}) + result = df1.merge(df2) + expected = pd.DataFrame( + {"id": [2, 4], "cat": expected_categories, "num": [1, 9]} + ) + expected["cat"] = expected["cat"].astype(CDT(categories, ordered=ordered)) + tm.assert_frame_equal(expected, result) + + def test_merge_on_int_array(self): + # GH 23020 + df = pd.DataFrame({"A": pd.Series([1, 2, np.nan], dtype="Int64"), "B": 1}) + result = pd.merge(df, df, on="A") + expected = pd.DataFrame( + {"A": pd.Series([1, 2, np.nan], dtype="Int64"), "B_x": 1, "B_y": 1} + ) + tm.assert_frame_equal(result, expected) + + +@pytest.fixture +def left_df(): + return DataFrame({"a": [20, 10, 0]}, index=[2, 1, 0]) + + +@pytest.fixture +def right_df(): + return DataFrame({"b": [300, 100, 200]}, index=[3, 1, 2]) + + +class TestMergeOnIndexes: + @pytest.mark.parametrize( + "how, sort, expected", + [ + ("inner", False, DataFrame({"a": [20, 10], "b": [200, 100]}, index=[2, 1])), + ("inner", True, DataFrame({"a": [10, 20], "b": [100, 200]}, index=[1, 2])), + ( + "left", + False, + DataFrame({"a": [20, 10, 0], "b": [200, 100, np.nan]}, index=[2, 1, 0]), + ), + ( + "left", + True, + DataFrame({"a": [0, 10, 20], "b": [np.nan, 100, 200]}, index=[0, 1, 2]), + ), + ( + "right", + False, + DataFrame( + {"a": [np.nan, 10, 20], "b": [300, 100, 200]}, index=[3, 1, 2] + ), + ), + ( + "right", + True, + DataFrame( + {"a": [10, 20, np.nan], "b": [100, 200, 300]}, index=[1, 2, 3] + ), + ), + ( + "outer", + False, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), + ), + ( + "outer", + True, + DataFrame( + {"a": [0, 10, 20, np.nan], "b": [np.nan, 100, 200, 300]}, + index=[0, 1, 2, 3], + ), + ), + ], + ) + def test_merge_on_indexes(self, left_df, right_df, how, sort, expected): + result = pd.merge( + left_df, right_df, left_index=True, right_index=True, how=how, sort=sort + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "index", + [ + CategoricalIndex(["A", "B"], categories=["A", "B"], name="index_col"), + Float64Index([1.0, 2.0], name="index_col"), + Int64Index([1, 2], name="index_col"), + UInt64Index([1, 2], name="index_col"), + RangeIndex(start=0, stop=2, name="index_col"), + DatetimeIndex(["2018-01-01", "2018-01-02"], name="index_col"), + ], + ids=lambda x: type(x).__name__, +) +def test_merge_index_types(index): + # gh-20777 + # assert key access is consistent across index types + left = DataFrame({"left_data": [1, 2]}, index=index) + right = DataFrame({"right_data": [1.0, 2.0]}, index=index) + + result = left.merge(right, on=["index_col"]) + + expected = DataFrame( + OrderedDict([("left_data", [1, 2]), ("right_data", [1.0, 2.0])]), index=index + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "on,left_on,right_on,left_index,right_index,nm", + [ + (["outer", "inner"], None, None, False, False, "B"), + (None, None, None, True, True, "B"), + (None, ["outer", "inner"], None, False, True, "B"), + (None, None, ["outer", "inner"], True, False, "B"), + (["outer", "inner"], None, None, False, False, None), + (None, None, None, True, True, None), + (None, ["outer", "inner"], None, False, True, None), + (None, None, ["outer", "inner"], True, False, None), + ], +) +def test_merge_series(on, left_on, right_on, left_index, right_index, nm): + # GH 21220 + a = pd.DataFrame( + {"A": [1, 2, 3, 4]}, + index=pd.MultiIndex.from_product( + [["a", "b"], [0, 1]], names=["outer", "inner"] + ), + ) + b = pd.Series( + [1, 2, 3, 4], + index=pd.MultiIndex.from_product( + [["a", "b"], [1, 2]], names=["outer", "inner"] + ), + name=nm, + ) + expected = pd.DataFrame( + {"A": [2, 4], "B": [1, 3]}, + index=pd.MultiIndex.from_product([["a", "b"], [1]], names=["outer", "inner"]), + ) + if nm is not None: + result = pd.merge( + a, + b, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + ) + tm.assert_frame_equal(result, expected) + else: + msg = "Cannot merge a Series without a name" + with pytest.raises(ValueError, match=msg): + result = pd.merge( + a, + b, + on=on, + left_on=left_on, + right_on=right_on, + left_index=left_index, + right_index=right_index, + ) + + +@pytest.mark.parametrize( + "col1, col2, kwargs, expected_cols", + [ + (0, 0, dict(suffixes=("", "_dup")), ["0", "0_dup"]), + (0, 0, dict(suffixes=(None, "_dup")), [0, "0_dup"]), + (0, 0, dict(suffixes=("_x", "_y")), ["0_x", "0_y"]), + ("a", 0, dict(suffixes=(None, "_y")), ["a", 0]), + (0.0, 0.0, dict(suffixes=("_x", None)), ["0.0_x", 0.0]), + ("b", "b", dict(suffixes=(None, "_y")), ["b", "b_y"]), + ("a", "a", dict(suffixes=("_x", None)), ["a_x", "a"]), + ("a", "b", dict(suffixes=("_x", None)), ["a", "b"]), + ("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]), + (0, 0, dict(suffixes=["_a", None]), ["0_a", 0]), + ("a", "a", dict(), ["a_x", "a_y"]), + (0, 0, dict(), ["0_x", "0_y"]), + ], +) +def test_merge_suffix(col1, col2, kwargs, expected_cols): + # issue: 24782 + a = pd.DataFrame({col1: [1, 2, 3]}) + b = pd.DataFrame({col2: [4, 5, 6]}) + + expected = pd.DataFrame([[1, 4], [2, 5], [3, 6]], columns=expected_cols) + + result = a.merge(b, left_index=True, right_index=True, **kwargs) + tm.assert_frame_equal(result, expected) + + result = pd.merge(a, b, left_index=True, right_index=True, **kwargs) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "col1, col2, suffixes", + [ + ("a", "a", [None, None]), + ("a", "a", (None, None)), + ("a", "a", ("", None)), + (0, 0, [None, None]), + (0, 0, (None, "")), + ], +) +def test_merge_suffix_error(col1, col2, suffixes): + # issue: 24782 + a = pd.DataFrame({col1: [1, 2, 3]}) + b = pd.DataFrame({col2: [3, 4, 5]}) + + # TODO: might reconsider current raise behaviour, see issue 24782 + msg = "columns overlap but no suffix specified" + with pytest.raises(ValueError, match=msg): + pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) + + +@pytest.mark.parametrize("col1, col2, suffixes", [("a", "a", None), (0, 0, None)]) +def test_merge_suffix_none_error(col1, col2, suffixes): + # issue: 24782 + a = pd.DataFrame({col1: [1, 2, 3]}) + b = pd.DataFrame({col2: [3, 4, 5]}) + + # TODO: might reconsider current raise behaviour, see GH24782 + msg = "iterable" + with pytest.raises(TypeError, match=msg): + pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) + + +@pytest.mark.parametrize("cat_dtype", ["one", "two"]) +@pytest.mark.parametrize("reverse", [True, False]) +def test_merge_equal_cat_dtypes(cat_dtype, reverse): + # see gh-22501 + cat_dtypes = { + "one": CategoricalDtype(categories=["a", "b", "c"], ordered=False), + "two": CategoricalDtype(categories=["a", "b", "c"], ordered=False), + } + + df1 = DataFrame( + {"foo": Series(["a", "b", "c"]).astype(cat_dtypes["one"]), "left": [1, 2, 3]} + ).set_index("foo") + + data_foo = ["a", "b", "c"] + data_right = [1, 2, 3] + + if reverse: + data_foo.reverse() + data_right.reverse() + + df2 = DataFrame( + {"foo": Series(data_foo).astype(cat_dtypes[cat_dtype]), "right": data_right} + ).set_index("foo") + + result = df1.merge(df2, left_index=True, right_index=True) + + expected = DataFrame( + { + "left": [1, 2, 3], + "right": [1, 2, 3], + "foo": Series(["a", "b", "c"]).astype(cat_dtypes["one"]), + } + ).set_index("foo") + + # Categorical is unordered, so don't check ordering. + tm.assert_frame_equal(result, expected, check_categorical=False) + + +def test_merge_equal_cat_dtypes2(): + # see gh-22501 + cat_dtype = CategoricalDtype(categories=["a", "b", "c"], ordered=False) + + # Test Data + df1 = DataFrame( + {"foo": Series(["a", "b"]).astype(cat_dtype), "left": [1, 2]} + ).set_index("foo") + + df2 = DataFrame( + {"foo": Series(["a", "b", "c"]).astype(cat_dtype), "right": [3, 2, 1]} + ).set_index("foo") + + result = df1.merge(df2, left_index=True, right_index=True) + + expected = DataFrame( + {"left": [1, 2], "right": [3, 2], "foo": Series(["a", "b"]).astype(cat_dtype)} + ).set_index("foo") + + # Categorical is unordered, so don't check ordering. + tm.assert_frame_equal(result, expected, check_categorical=False) + + +def test_merge_on_cat_and_ext_array(): + # GH 28668 + right = DataFrame( + {"a": Series([pd.Interval(0, 1), pd.Interval(1, 2)], dtype="interval")} + ) + left = right.copy() + left["a"] = left["a"].astype("category") + + result = pd.merge(left, right, how="inner", on="a") + expected = right.copy() + + tm.assert_frame_equal(result, expected) + + +def test_merge_multiindex_columns(): + # Issue #28518 + # Verify that merging two dataframes give the expected labels + # The original cause of this issue come from a bug lexsort_depth and is tested in + # test_lexsort_depth + + letters = ["a", "b", "c", "d"] + numbers = ["1", "2", "3"] + index = pd.MultiIndex.from_product((letters, numbers), names=["outer", "inner"]) + + frame_x = pd.DataFrame(columns=index) + frame_x["id"] = "" + frame_y = pd.DataFrame(columns=index) + frame_y["id"] = "" + + l_suf = "_x" + r_suf = "_y" + result = frame_x.merge(frame_y, on="id", suffixes=((l_suf, r_suf))) + + # Constructing the expected results + expected_labels = [l + l_suf for l in letters] + [l + r_suf for l in letters] + expected_index = pd.MultiIndex.from_product( + [expected_labels, numbers], names=["outer", "inner"] + ) + expected = pd.DataFrame(columns=expected_index) + expected["id"] = "" + + tm.assert_frame_equal(result, expected) + + +def test_merge_datetime_upcast_dtype(): + # https://github.com/pandas-dev/pandas/issues/31208 + df1 = pd.DataFrame({"x": ["a", "b", "c"], "y": ["1", "2", "4"]}) + df2 = pd.DataFrame( + {"y": ["1", "2", "3"], "z": pd.to_datetime(["2000", "2001", "2002"])} + ) + result = pd.merge(df1, df2, how="left", on="y") + expected = pd.DataFrame( + { + "x": ["a", "b", "c"], + "y": ["1", "2", "4"], + "z": pd.to_datetime(["2000", "2001", "NaT"]), + } + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py new file mode 100644 index 00000000..8037095a --- /dev/null +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -0,0 +1,1343 @@ +import datetime + +import numpy as np +import pytest +import pytz + +import pandas as pd +from pandas import Timedelta, merge_asof, read_csv, to_datetime +import pandas._testing as tm +from pandas.core.reshape.merge import MergeError + + +class TestAsOfMerge: + def read_data(self, datapath, name, dedupe=False): + path = datapath("reshape", "merge", "data", name) + x = read_csv(path) + if dedupe: + x = x.drop_duplicates(["time", "ticker"], keep="last").reset_index( + drop=True + ) + x.time = to_datetime(x.time) + return x + + @pytest.fixture(autouse=True) + def setup_method(self, datapath): + + self.trades = self.read_data(datapath, "trades.csv") + self.quotes = self.read_data(datapath, "quotes.csv", dedupe=True) + self.asof = self.read_data(datapath, "asof.csv") + self.tolerance = self.read_data(datapath, "tolerance.csv") + self.allow_exact_matches = self.read_data(datapath, "allow_exact_matches.csv") + self.allow_exact_matches_and_tolerance = self.read_data( + datapath, "allow_exact_matches_and_tolerance.csv" + ) + + def test_examples1(self): + """ doc-string examples """ + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, 3, 7]} + ) + + result = pd.merge_asof(left, right, on="a") + tm.assert_frame_equal(result, expected) + + def test_examples2(self): + """ doc-string examples """ + + trades = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.038", + "20160525 13:30:00.048", + "20160525 13:30:00.048", + "20160525 13:30:00.048", + ] + ), + "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + }, + columns=["time", "ticker", "price", "quantity"], + ) + + quotes = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.030", + "20160525 13:30:00.041", + "20160525 13:30:00.048", + "20160525 13:30:00.049", + "20160525 13:30:00.072", + "20160525 13:30:00.075", + ] + ), + "ticker": [ + "GOOG", + "MSFT", + "MSFT", + "MSFT", + "GOOG", + "AAPL", + "GOOG", + "MSFT", + ], + "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], + "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03], + }, + columns=["time", "ticker", "bid", "ask"], + ) + + pd.merge_asof(trades, quotes, on="time", by="ticker") + + pd.merge_asof( + trades, quotes, on="time", by="ticker", tolerance=pd.Timedelta("2ms") + ) + + expected = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.038", + "20160525 13:30:00.048", + "20160525 13:30:00.048", + "20160525 13:30:00.048", + ] + ), + "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + "bid": [np.nan, 51.97, np.nan, np.nan, np.nan], + "ask": [np.nan, 51.98, np.nan, np.nan, np.nan], + }, + columns=["time", "ticker", "price", "quantity", "bid", "ask"], + ) + + result = pd.merge_asof( + trades, + quotes, + on="time", + by="ticker", + tolerance=pd.Timedelta("10ms"), + allow_exact_matches=False, + ) + tm.assert_frame_equal(result, expected) + + def test_examples3(self): + """ doc-string examples """ + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, 6, np.nan]} + ) + + result = pd.merge_asof(left, right, on="a", direction="forward") + tm.assert_frame_equal(result, expected) + + def test_examples4(self): + """ doc-string examples """ + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, 6, 7]} + ) + + result = pd.merge_asof(left, right, on="a", direction="nearest") + tm.assert_frame_equal(result, expected) + + def test_basic(self): + + expected = self.asof + trades = self.trades + quotes = self.quotes + + result = merge_asof(trades, quotes, on="time", by="ticker") + tm.assert_frame_equal(result, expected) + + def test_basic_categorical(self): + + expected = self.asof + trades = self.trades.copy() + trades.ticker = trades.ticker.astype("category") + quotes = self.quotes.copy() + quotes.ticker = quotes.ticker.astype("category") + expected.ticker = expected.ticker.astype("category") + + result = merge_asof(trades, quotes, on="time", by="ticker") + tm.assert_frame_equal(result, expected) + + def test_basic_left_index(self): + + # GH14253 + expected = self.asof + trades = self.trades.set_index("time") + quotes = self.quotes + + result = merge_asof( + trades, quotes, left_index=True, right_on="time", by="ticker" + ) + # left-only index uses right"s index, oddly + expected.index = result.index + # time column appears after left"s columns + expected = expected[result.columns] + tm.assert_frame_equal(result, expected) + + def test_basic_right_index(self): + + expected = self.asof + trades = self.trades + quotes = self.quotes.set_index("time") + + result = merge_asof( + trades, quotes, left_on="time", right_index=True, by="ticker" + ) + tm.assert_frame_equal(result, expected) + + def test_basic_left_index_right_index(self): + + expected = self.asof.set_index("time") + trades = self.trades.set_index("time") + quotes = self.quotes.set_index("time") + + result = merge_asof( + trades, quotes, left_index=True, right_index=True, by="ticker" + ) + tm.assert_frame_equal(result, expected) + + def test_multi_index(self): + + # MultiIndex is prohibited + trades = self.trades.set_index(["time", "price"]) + quotes = self.quotes.set_index("time") + with pytest.raises(MergeError): + merge_asof(trades, quotes, left_index=True, right_index=True) + + trades = self.trades.set_index("time") + quotes = self.quotes.set_index(["time", "bid"]) + with pytest.raises(MergeError): + merge_asof(trades, quotes, left_index=True, right_index=True) + + def test_on_and_index(self): + + # "on" parameter and index together is prohibited + trades = self.trades.set_index("time") + quotes = self.quotes.set_index("time") + with pytest.raises(MergeError): + merge_asof( + trades, quotes, left_on="price", left_index=True, right_index=True + ) + + trades = self.trades.set_index("time") + quotes = self.quotes.set_index("time") + with pytest.raises(MergeError): + merge_asof( + trades, quotes, right_on="bid", left_index=True, right_index=True + ) + + def test_basic_left_by_right_by(self): + + # GH14253 + expected = self.asof + trades = self.trades + quotes = self.quotes + + result = merge_asof( + trades, quotes, on="time", left_by="ticker", right_by="ticker" + ) + tm.assert_frame_equal(result, expected) + + def test_missing_right_by(self): + + expected = self.asof + trades = self.trades + quotes = self.quotes + + q = quotes[quotes.ticker != "MSFT"] + result = merge_asof(trades, q, on="time", by="ticker") + expected.loc[expected.ticker == "MSFT", ["bid", "ask"]] = np.nan + tm.assert_frame_equal(result, expected) + + def test_multiby(self): + # GH13936 + trades = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.046", + "20160525 13:30:00.048", + "20160525 13:30:00.050", + ] + ), + "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + "exch": ["ARCA", "NSDQ", "NSDQ", "BATS", "NSDQ"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + }, + columns=["time", "ticker", "exch", "price", "quantity"], + ) + + quotes = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.030", + "20160525 13:30:00.041", + "20160525 13:30:00.045", + "20160525 13:30:00.049", + ] + ), + "ticker": ["GOOG", "MSFT", "MSFT", "MSFT", "GOOG", "AAPL"], + "exch": ["BATS", "NSDQ", "ARCA", "ARCA", "NSDQ", "ARCA"], + "bid": [720.51, 51.95, 51.97, 51.99, 720.50, 97.99], + "ask": [720.92, 51.96, 51.98, 52.00, 720.93, 98.01], + }, + columns=["time", "ticker", "exch", "bid", "ask"], + ) + + expected = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.046", + "20160525 13:30:00.048", + "20160525 13:30:00.050", + ] + ), + "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], + "exch": ["ARCA", "NSDQ", "NSDQ", "BATS", "NSDQ"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + "bid": [np.nan, 51.95, 720.50, 720.51, np.nan], + "ask": [np.nan, 51.96, 720.93, 720.92, np.nan], + }, + columns=["time", "ticker", "exch", "price", "quantity", "bid", "ask"], + ) + + result = pd.merge_asof(trades, quotes, on="time", by=["ticker", "exch"]) + tm.assert_frame_equal(result, expected) + + def test_multiby_heterogeneous_types(self): + # GH13936 + trades = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.046", + "20160525 13:30:00.048", + "20160525 13:30:00.050", + ] + ), + "ticker": [0, 0, 1, 1, 2], + "exch": ["ARCA", "NSDQ", "NSDQ", "BATS", "NSDQ"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + }, + columns=["time", "ticker", "exch", "price", "quantity"], + ) + + quotes = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.030", + "20160525 13:30:00.041", + "20160525 13:30:00.045", + "20160525 13:30:00.049", + ] + ), + "ticker": [1, 0, 0, 0, 1, 2], + "exch": ["BATS", "NSDQ", "ARCA", "ARCA", "NSDQ", "ARCA"], + "bid": [720.51, 51.95, 51.97, 51.99, 720.50, 97.99], + "ask": [720.92, 51.96, 51.98, 52.00, 720.93, 98.01], + }, + columns=["time", "ticker", "exch", "bid", "ask"], + ) + + expected = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.023", + "20160525 13:30:00.023", + "20160525 13:30:00.046", + "20160525 13:30:00.048", + "20160525 13:30:00.050", + ] + ), + "ticker": [0, 0, 1, 1, 2], + "exch": ["ARCA", "NSDQ", "NSDQ", "BATS", "NSDQ"], + "price": [51.95, 51.95, 720.77, 720.92, 98.00], + "quantity": [75, 155, 100, 100, 100], + "bid": [np.nan, 51.95, 720.50, 720.51, np.nan], + "ask": [np.nan, 51.96, 720.93, 720.92, np.nan], + }, + columns=["time", "ticker", "exch", "price", "quantity", "bid", "ask"], + ) + + result = pd.merge_asof(trades, quotes, on="time", by=["ticker", "exch"]) + tm.assert_frame_equal(result, expected) + + def test_multiby_indexed(self): + # GH15676 + left = pd.DataFrame( + [ + [pd.to_datetime("20160602"), 1, "a"], + [pd.to_datetime("20160602"), 2, "a"], + [pd.to_datetime("20160603"), 1, "b"], + [pd.to_datetime("20160603"), 2, "b"], + ], + columns=["time", "k1", "k2"], + ).set_index("time") + + right = pd.DataFrame( + [ + [pd.to_datetime("20160502"), 1, "a", 1.0], + [pd.to_datetime("20160502"), 2, "a", 2.0], + [pd.to_datetime("20160503"), 1, "b", 3.0], + [pd.to_datetime("20160503"), 2, "b", 4.0], + ], + columns=["time", "k1", "k2", "value"], + ).set_index("time") + + expected = pd.DataFrame( + [ + [pd.to_datetime("20160602"), 1, "a", 1.0], + [pd.to_datetime("20160602"), 2, "a", 2.0], + [pd.to_datetime("20160603"), 1, "b", 3.0], + [pd.to_datetime("20160603"), 2, "b", 4.0], + ], + columns=["time", "k1", "k2", "value"], + ).set_index("time") + + result = pd.merge_asof( + left, right, left_index=True, right_index=True, by=["k1", "k2"] + ) + + tm.assert_frame_equal(expected, result) + + with pytest.raises(MergeError): + pd.merge_asof( + left, + right, + left_index=True, + right_index=True, + left_by=["k1", "k2"], + right_by=["k1"], + ) + + def test_basic2(self, datapath): + + expected = self.read_data(datapath, "asof2.csv") + trades = self.read_data(datapath, "trades2.csv") + quotes = self.read_data(datapath, "quotes2.csv", dedupe=True) + + result = merge_asof(trades, quotes, on="time", by="ticker") + tm.assert_frame_equal(result, expected) + + def test_basic_no_by(self): + f = ( + lambda x: x[x.ticker == "MSFT"] + .drop("ticker", axis=1) + .reset_index(drop=True) + ) + + # just use a single ticker + expected = f(self.asof) + trades = f(self.trades) + quotes = f(self.quotes) + + result = merge_asof(trades, quotes, on="time") + tm.assert_frame_equal(result, expected) + + def test_valid_join_keys(self): + + trades = self.trades + quotes = self.quotes + + with pytest.raises(MergeError): + merge_asof(trades, quotes, left_on="time", right_on="bid", by="ticker") + + with pytest.raises(MergeError): + merge_asof(trades, quotes, on=["time", "ticker"], by="ticker") + + with pytest.raises(MergeError): + merge_asof(trades, quotes, by="ticker") + + def test_with_duplicates(self, datapath): + + q = ( + pd.concat([self.quotes, self.quotes]) + .sort_values(["time", "ticker"]) + .reset_index(drop=True) + ) + result = merge_asof(self.trades, q, on="time", by="ticker") + expected = self.read_data(datapath, "asof.csv") + tm.assert_frame_equal(result, expected) + + def test_with_duplicates_no_on(self): + + df1 = pd.DataFrame({"key": [1, 1, 3], "left_val": [1, 2, 3]}) + df2 = pd.DataFrame({"key": [1, 2, 2], "right_val": [1, 2, 3]}) + result = merge_asof(df1, df2, on="key") + expected = pd.DataFrame( + {"key": [1, 1, 3], "left_val": [1, 2, 3], "right_val": [1, 1, 3]} + ) + tm.assert_frame_equal(result, expected) + + def test_valid_allow_exact_matches(self): + + trades = self.trades + quotes = self.quotes + + with pytest.raises(MergeError): + merge_asof( + trades, quotes, on="time", by="ticker", allow_exact_matches="foo" + ) + + def test_valid_tolerance(self): + + trades = self.trades + quotes = self.quotes + + # dti + merge_asof(trades, quotes, on="time", by="ticker", tolerance=Timedelta("1s")) + + # integer + merge_asof( + trades.reset_index(), + quotes.reset_index(), + on="index", + by="ticker", + tolerance=1, + ) + + # incompat + with pytest.raises(MergeError): + merge_asof(trades, quotes, on="time", by="ticker", tolerance=1) + + # invalid + with pytest.raises(MergeError): + merge_asof( + trades.reset_index(), + quotes.reset_index(), + on="index", + by="ticker", + tolerance=1.0, + ) + + # invalid negative + with pytest.raises(MergeError): + merge_asof( + trades, quotes, on="time", by="ticker", tolerance=-Timedelta("1s") + ) + + with pytest.raises(MergeError): + merge_asof( + trades.reset_index(), + quotes.reset_index(), + on="index", + by="ticker", + tolerance=-1, + ) + + def test_non_sorted(self): + + trades = self.trades.sort_values("time", ascending=False) + quotes = self.quotes.sort_values("time", ascending=False) + + # we require that we are already sorted on time & quotes + assert not trades.time.is_monotonic + assert not quotes.time.is_monotonic + with pytest.raises(ValueError): + merge_asof(trades, quotes, on="time", by="ticker") + + trades = self.trades.sort_values("time") + assert trades.time.is_monotonic + assert not quotes.time.is_monotonic + with pytest.raises(ValueError): + merge_asof(trades, quotes, on="time", by="ticker") + + quotes = self.quotes.sort_values("time") + assert trades.time.is_monotonic + assert quotes.time.is_monotonic + + # ok, though has dupes + merge_asof(trades, self.quotes, on="time", by="ticker") + + @pytest.mark.parametrize( + "tolerance", + [Timedelta("1day"), datetime.timedelta(days=1)], + ids=["pd.Timedelta", "datetime.timedelta"], + ) + def test_tolerance(self, tolerance): + + trades = self.trades + quotes = self.quotes + + result = merge_asof(trades, quotes, on="time", by="ticker", tolerance=tolerance) + expected = self.tolerance + tm.assert_frame_equal(result, expected) + + def test_tolerance_forward(self): + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 7, 11], "right_val": [1, 2, 3, 7, 11]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, np.nan, 11]} + ) + + result = pd.merge_asof(left, right, on="a", direction="forward", tolerance=1) + tm.assert_frame_equal(result, expected) + + def test_tolerance_nearest(self): + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 7, 11], "right_val": [1, 2, 3, 7, 11]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [1, np.nan, 11]} + ) + + result = pd.merge_asof(left, right, on="a", direction="nearest", tolerance=1) + tm.assert_frame_equal(result, expected) + + def test_tolerance_tz(self): + # GH 14844 + left = pd.DataFrame( + { + "date": pd.date_range( + start=pd.to_datetime("2016-01-02"), + freq="D", + periods=5, + tz=pytz.timezone("UTC"), + ), + "value1": np.arange(5), + } + ) + right = pd.DataFrame( + { + "date": pd.date_range( + start=pd.to_datetime("2016-01-01"), + freq="D", + periods=5, + tz=pytz.timezone("UTC"), + ), + "value2": list("ABCDE"), + } + ) + result = pd.merge_asof(left, right, on="date", tolerance=pd.Timedelta("1 day")) + + expected = pd.DataFrame( + { + "date": pd.date_range( + start=pd.to_datetime("2016-01-02"), + freq="D", + periods=5, + tz=pytz.timezone("UTC"), + ), + "value1": np.arange(5), + "value2": list("BCDEE"), + } + ) + tm.assert_frame_equal(result, expected) + + def test_tolerance_float(self): + # GH22981 + left = pd.DataFrame({"a": [1.1, 3.5, 10.9], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame( + {"a": [1.0, 2.5, 3.3, 7.5, 11.5], "right_val": [1.0, 2.5, 3.3, 7.5, 11.5]} + ) + + expected = pd.DataFrame( + { + "a": [1.1, 3.5, 10.9], + "left_val": ["a", "b", "c"], + "right_val": [1, 3.3, np.nan], + } + ) + + result = pd.merge_asof(left, right, on="a", direction="nearest", tolerance=0.5) + tm.assert_frame_equal(result, expected) + + def test_index_tolerance(self): + # GH 15135 + expected = self.tolerance.set_index("time") + trades = self.trades.set_index("time") + quotes = self.quotes.set_index("time") + + result = pd.merge_asof( + trades, + quotes, + left_index=True, + right_index=True, + by="ticker", + tolerance=pd.Timedelta("1day"), + ) + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches(self): + + result = merge_asof( + self.trades, self.quotes, on="time", by="ticker", allow_exact_matches=False + ) + expected = self.allow_exact_matches + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_forward(self): + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 7, 11], "right_val": [1, 2, 3, 7, 11]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [2, 7, 11]} + ) + + result = pd.merge_asof( + left, right, on="a", direction="forward", allow_exact_matches=False + ) + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_nearest(self): + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 2, 3, 7, 11], "right_val": [1, 2, 3, 7, 11]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [2, 3, 11]} + ) + + result = pd.merge_asof( + left, right, on="a", direction="nearest", allow_exact_matches=False + ) + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_and_tolerance(self): + + result = merge_asof( + self.trades, + self.quotes, + on="time", + by="ticker", + tolerance=Timedelta("100ms"), + allow_exact_matches=False, + ) + expected = self.allow_exact_matches_and_tolerance + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_and_tolerance2(self): + # GH 13695 + df1 = pd.DataFrame( + {"time": pd.to_datetime(["2016-07-15 13:30:00.030"]), "username": ["bob"]} + ) + df2 = pd.DataFrame( + { + "time": pd.to_datetime( + ["2016-07-15 13:30:00.000", "2016-07-15 13:30:00.030"] + ), + "version": [1, 2], + } + ) + + result = pd.merge_asof(df1, df2, on="time") + expected = pd.DataFrame( + { + "time": pd.to_datetime(["2016-07-15 13:30:00.030"]), + "username": ["bob"], + "version": [2], + } + ) + tm.assert_frame_equal(result, expected) + + result = pd.merge_asof(df1, df2, on="time", allow_exact_matches=False) + expected = pd.DataFrame( + { + "time": pd.to_datetime(["2016-07-15 13:30:00.030"]), + "username": ["bob"], + "version": [1], + } + ) + tm.assert_frame_equal(result, expected) + + result = pd.merge_asof( + df1, + df2, + on="time", + allow_exact_matches=False, + tolerance=pd.Timedelta("10ms"), + ) + expected = pd.DataFrame( + { + "time": pd.to_datetime(["2016-07-15 13:30:00.030"]), + "username": ["bob"], + "version": [np.nan], + } + ) + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_and_tolerance3(self): + # GH 13709 + df1 = pd.DataFrame( + { + "time": pd.to_datetime( + ["2016-07-15 13:30:00.030", "2016-07-15 13:30:00.030"] + ), + "username": ["bob", "charlie"], + } + ) + df2 = pd.DataFrame( + { + "time": pd.to_datetime( + ["2016-07-15 13:30:00.000", "2016-07-15 13:30:00.030"] + ), + "version": [1, 2], + } + ) + + result = pd.merge_asof( + df1, + df2, + on="time", + allow_exact_matches=False, + tolerance=pd.Timedelta("10ms"), + ) + expected = pd.DataFrame( + { + "time": pd.to_datetime( + ["2016-07-15 13:30:00.030", "2016-07-15 13:30:00.030"] + ), + "username": ["bob", "charlie"], + "version": [np.nan, np.nan], + } + ) + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_and_tolerance_forward(self): + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 3, 4, 6, 11], "right_val": [1, 3, 4, 6, 11]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [np.nan, 6, 11]} + ) + + result = pd.merge_asof( + left, + right, + on="a", + direction="forward", + allow_exact_matches=False, + tolerance=1, + ) + tm.assert_frame_equal(result, expected) + + def test_allow_exact_matches_and_tolerance_nearest(self): + # GH14887 + + left = pd.DataFrame({"a": [1, 5, 10], "left_val": ["a", "b", "c"]}) + right = pd.DataFrame({"a": [1, 3, 4, 6, 11], "right_val": [1, 3, 4, 7, 11]}) + + expected = pd.DataFrame( + {"a": [1, 5, 10], "left_val": ["a", "b", "c"], "right_val": [np.nan, 4, 11]} + ) + + result = pd.merge_asof( + left, + right, + on="a", + direction="nearest", + allow_exact_matches=False, + tolerance=1, + ) + tm.assert_frame_equal(result, expected) + + def test_forward_by(self): + # GH14887 + + left = pd.DataFrame( + { + "a": [1, 5, 10, 12, 15], + "b": ["X", "X", "Y", "Z", "Y"], + "left_val": ["a", "b", "c", "d", "e"], + } + ) + right = pd.DataFrame( + { + "a": [1, 6, 11, 15, 16], + "b": ["X", "Z", "Y", "Z", "Y"], + "right_val": [1, 6, 11, 15, 16], + } + ) + + expected = pd.DataFrame( + { + "a": [1, 5, 10, 12, 15], + "b": ["X", "X", "Y", "Z", "Y"], + "left_val": ["a", "b", "c", "d", "e"], + "right_val": [1, np.nan, 11, 15, 16], + } + ) + + result = pd.merge_asof(left, right, on="a", by="b", direction="forward") + tm.assert_frame_equal(result, expected) + + def test_nearest_by(self): + # GH14887 + + left = pd.DataFrame( + { + "a": [1, 5, 10, 12, 15], + "b": ["X", "X", "Z", "Z", "Y"], + "left_val": ["a", "b", "c", "d", "e"], + } + ) + right = pd.DataFrame( + { + "a": [1, 6, 11, 15, 16], + "b": ["X", "Z", "Z", "Z", "Y"], + "right_val": [1, 6, 11, 15, 16], + } + ) + + expected = pd.DataFrame( + { + "a": [1, 5, 10, 12, 15], + "b": ["X", "X", "Z", "Z", "Y"], + "left_val": ["a", "b", "c", "d", "e"], + "right_val": [1, 1, 11, 11, 16], + } + ) + + result = pd.merge_asof(left, right, on="a", by="b", direction="nearest") + tm.assert_frame_equal(result, expected) + + def test_by_int(self): + # we specialize by type, so test that this is correct + df1 = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.020", + "20160525 13:30:00.030", + "20160525 13:30:00.040", + "20160525 13:30:00.050", + "20160525 13:30:00.060", + ] + ), + "key": [1, 2, 1, 3, 2], + "value1": [1.1, 1.2, 1.3, 1.4, 1.5], + }, + columns=["time", "key", "value1"], + ) + + df2 = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.015", + "20160525 13:30:00.020", + "20160525 13:30:00.025", + "20160525 13:30:00.035", + "20160525 13:30:00.040", + "20160525 13:30:00.055", + "20160525 13:30:00.060", + "20160525 13:30:00.065", + ] + ), + "key": [2, 1, 1, 3, 2, 1, 2, 3], + "value2": [2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8], + }, + columns=["time", "key", "value2"], + ) + + result = pd.merge_asof(df1, df2, on="time", by="key") + + expected = pd.DataFrame( + { + "time": pd.to_datetime( + [ + "20160525 13:30:00.020", + "20160525 13:30:00.030", + "20160525 13:30:00.040", + "20160525 13:30:00.050", + "20160525 13:30:00.060", + ] + ), + "key": [1, 2, 1, 3, 2], + "value1": [1.1, 1.2, 1.3, 1.4, 1.5], + "value2": [2.2, 2.1, 2.3, 2.4, 2.7], + }, + columns=["time", "key", "value1", "value2"], + ) + + tm.assert_frame_equal(result, expected) + + def test_on_float(self): + # mimics how to determine the minimum-price variation + df1 = pd.DataFrame( + { + "price": [5.01, 0.0023, 25.13, 340.05, 30.78, 1040.90, 0.0078], + "symbol": list("ABCDEFG"), + }, + columns=["symbol", "price"], + ) + + df2 = pd.DataFrame( + {"price": [0.0, 1.0, 100.0], "mpv": [0.0001, 0.01, 0.05]}, + columns=["price", "mpv"], + ) + + df1 = df1.sort_values("price").reset_index(drop=True) + + result = pd.merge_asof(df1, df2, on="price") + + expected = pd.DataFrame( + { + "symbol": list("BGACEDF"), + "price": [0.0023, 0.0078, 5.01, 25.13, 30.78, 340.05, 1040.90], + "mpv": [0.0001, 0.0001, 0.01, 0.01, 0.01, 0.05, 0.05], + }, + columns=["symbol", "price", "mpv"], + ) + + tm.assert_frame_equal(result, expected) + + def test_on_specialized_type(self, any_real_dtype): + # see gh-13936 + dtype = np.dtype(any_real_dtype).type + + df1 = pd.DataFrame( + {"value": [5, 2, 25, 100, 78, 120, 79], "symbol": list("ABCDEFG")}, + columns=["symbol", "value"], + ) + df1.value = dtype(df1.value) + + df2 = pd.DataFrame( + {"value": [0, 80, 120, 125], "result": list("xyzw")}, + columns=["value", "result"], + ) + df2.value = dtype(df2.value) + + df1 = df1.sort_values("value").reset_index(drop=True) + result = pd.merge_asof(df1, df2, on="value") + + expected = pd.DataFrame( + { + "symbol": list("BACEGDF"), + "value": [2, 5, 25, 78, 79, 100, 120], + "result": list("xxxxxyz"), + }, + columns=["symbol", "value", "result"], + ) + expected.value = dtype(expected.value) + + tm.assert_frame_equal(result, expected) + + def test_on_specialized_type_by_int(self, any_real_dtype): + # see gh-13936 + dtype = np.dtype(any_real_dtype).type + + df1 = pd.DataFrame( + { + "value": [5, 2, 25, 100, 78, 120, 79], + "key": [1, 2, 3, 2, 3, 1, 2], + "symbol": list("ABCDEFG"), + }, + columns=["symbol", "key", "value"], + ) + df1.value = dtype(df1.value) + + df2 = pd.DataFrame( + {"value": [0, 80, 120, 125], "key": [1, 2, 2, 3], "result": list("xyzw")}, + columns=["value", "key", "result"], + ) + df2.value = dtype(df2.value) + + df1 = df1.sort_values("value").reset_index(drop=True) + result = pd.merge_asof(df1, df2, on="value", by="key") + + expected = pd.DataFrame( + { + "symbol": list("BACEGDF"), + "key": [2, 1, 3, 3, 2, 2, 1], + "value": [2, 5, 25, 78, 79, 100, 120], + "result": [np.nan, "x", np.nan, np.nan, np.nan, "y", "x"], + }, + columns=["symbol", "key", "value", "result"], + ) + expected.value = dtype(expected.value) + + tm.assert_frame_equal(result, expected) + + def test_on_float_by_int(self): + # type specialize both "by" and "on" parameters + df1 = pd.DataFrame( + { + "symbol": list("AAABBBCCC"), + "exch": [1, 2, 3, 1, 2, 3, 1, 2, 3], + "price": [ + 3.26, + 3.2599, + 3.2598, + 12.58, + 12.59, + 12.5, + 378.15, + 378.2, + 378.25, + ], + }, + columns=["symbol", "exch", "price"], + ) + + df2 = pd.DataFrame( + { + "exch": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "price": [0.0, 1.0, 100.0, 0.0, 5.0, 100.0, 0.0, 5.0, 1000.0], + "mpv": [0.0001, 0.01, 0.05, 0.0001, 0.01, 0.1, 0.0001, 0.25, 1.0], + }, + columns=["exch", "price", "mpv"], + ) + + df1 = df1.sort_values("price").reset_index(drop=True) + df2 = df2.sort_values("price").reset_index(drop=True) + + result = pd.merge_asof(df1, df2, on="price", by="exch") + + expected = pd.DataFrame( + { + "symbol": list("AAABBBCCC"), + "exch": [3, 2, 1, 3, 1, 2, 1, 2, 3], + "price": [ + 3.2598, + 3.2599, + 3.26, + 12.5, + 12.58, + 12.59, + 378.15, + 378.2, + 378.25, + ], + "mpv": [0.0001, 0.0001, 0.01, 0.25, 0.01, 0.01, 0.05, 0.1, 0.25], + }, + columns=["symbol", "exch", "price", "mpv"], + ) + + tm.assert_frame_equal(result, expected) + + def test_merge_datatype_error_raises(self): + msg = r"incompatible merge keys \[0\] .*, must be the same type" + + left = pd.DataFrame({"left_val": [1, 5, 10], "a": ["a", "b", "c"]}) + right = pd.DataFrame({"right_val": [1, 2, 3, 6, 7], "a": [1, 2, 3, 6, 7]}) + + with pytest.raises(MergeError, match=msg): + merge_asof(left, right, on="a") + + def test_merge_datatype_categorical_error_raises(self): + msg = ( + r"incompatible merge keys \[0\] .* both sides category, " + "but not equal ones" + ) + + left = pd.DataFrame( + {"left_val": [1, 5, 10], "a": pd.Categorical(["a", "b", "c"])} + ) + right = pd.DataFrame( + { + "right_val": [1, 2, 3, 6, 7], + "a": pd.Categorical(["a", "X", "c", "X", "b"]), + } + ) + + with pytest.raises(MergeError, match=msg): + merge_asof(left, right, on="a") + + def test_merge_groupby_multiple_column_with_categorical_column(self): + # GH 16454 + df = pd.DataFrame({"x": [0], "y": [0], "z": pd.Categorical([0])}) + result = merge_asof(df, df, on="x", by=["y", "z"]) + expected = pd.DataFrame({"x": [0], "y": [0], "z": pd.Categorical([0])}) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "func", [lambda x: x, lambda x: to_datetime(x)], ids=["numeric", "datetime"] + ) + @pytest.mark.parametrize("side", ["left", "right"]) + def test_merge_on_nans(self, func, side): + # GH 23189 + msg = "Merge keys contain null values on {} side".format(side) + nulls = func([1.0, 5.0, np.nan]) + non_nulls = func([1.0, 5.0, 10.0]) + df_null = pd.DataFrame({"a": nulls, "left_val": ["a", "b", "c"]}) + df = pd.DataFrame({"a": non_nulls, "right_val": [1, 6, 11]}) + + with pytest.raises(ValueError, match=msg): + if side == "left": + merge_asof(df_null, df, on="a") + else: + merge_asof(df, df_null, on="a") + + def test_merge_by_col_tz_aware(self): + # GH 21184 + left = pd.DataFrame( + { + "by_col": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"), + "on_col": [2], + "values": ["a"], + } + ) + right = pd.DataFrame( + { + "by_col": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"), + "on_col": [1], + "values": ["b"], + } + ) + result = pd.merge_asof(left, right, by="by_col", on="on_col") + expected = pd.DataFrame( + [[pd.Timestamp("2018-01-01", tz="UTC"), 2, "a", "b"]], + columns=["by_col", "on_col", "values_x", "values_y"], + ) + tm.assert_frame_equal(result, expected) + + def test_by_mixed_tz_aware(self): + # GH 26649 + left = pd.DataFrame( + { + "by_col1": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"), + "by_col2": ["HELLO"], + "on_col": [2], + "value": ["a"], + } + ) + right = pd.DataFrame( + { + "by_col1": pd.DatetimeIndex(["2018-01-01"]).tz_localize("UTC"), + "by_col2": ["WORLD"], + "on_col": [1], + "value": ["b"], + } + ) + result = pd.merge_asof(left, right, by=["by_col1", "by_col2"], on="on_col") + expected = pd.DataFrame( + [[pd.Timestamp("2018-01-01", tz="UTC"), "HELLO", 2, "a"]], + columns=["by_col1", "by_col2", "on_col", "value_x"], + ) + expected["value_y"] = np.array([np.nan], dtype=object) + tm.assert_frame_equal(result, expected) + + def test_timedelta_tolerance_nearest(self): + # GH 27642 + + left = pd.DataFrame( + list(zip([0, 5, 10, 15, 20, 25], [0, 1, 2, 3, 4, 5])), + columns=["time", "left"], + ) + + left["time"] = pd.to_timedelta(left["time"], "ms") + + right = pd.DataFrame( + list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5])), + columns=["time", "right"], + ) + + right["time"] = pd.to_timedelta(right["time"], "ms") + + expected = pd.DataFrame( + list( + zip( + [0, 5, 10, 15, 20, 25], + [0, 1, 2, 3, 4, 5], + [0, np.nan, 2, 4, np.nan, np.nan], + ) + ), + columns=["time", "left", "right"], + ) + + expected["time"] = pd.to_timedelta(expected["time"], "ms") + + result = pd.merge_asof( + left, right, on="time", tolerance=Timedelta("1ms"), direction="nearest" + ) + + tm.assert_frame_equal(result, expected) + + def test_int_type_tolerance(self, any_int_dtype): + # GH #28870 + + left = pd.DataFrame({"a": [0, 10, 20], "left_val": [1, 2, 3]}) + right = pd.DataFrame({"a": [5, 15, 25], "right_val": [1, 2, 3]}) + left["a"] = left["a"].astype(any_int_dtype) + right["a"] = right["a"].astype(any_int_dtype) + + expected = pd.DataFrame( + {"a": [0, 10, 20], "left_val": [1, 2, 3], "right_val": [np.nan, 1.0, 2.0]} + ) + expected["a"] = expected["a"].astype(any_int_dtype) + + result = pd.merge_asof(left, right, on="a", tolerance=10) + tm.assert_frame_equal(result, expected) + + def test_merge_index_column_tz(self): + # GH 29864 + index = pd.date_range("2019-10-01", freq="30min", periods=5, tz="UTC") + left = pd.DataFrame([0.9, 0.8, 0.7, 0.6], columns=["xyz"], index=index[1:]) + right = pd.DataFrame({"from_date": index, "abc": [2.46] * 4 + [2.19]}) + result = pd.merge_asof( + left=left, right=right, left_index=True, right_on=["from_date"] + ) + expected = pd.DataFrame( + { + "xyz": [0.9, 0.8, 0.7, 0.6], + "from_date": index[1:], + "abc": [2.46] * 3 + [2.19], + }, + index=pd.Index([1, 2, 3, 4]), + ) + tm.assert_frame_equal(result, expected) + + result = pd.merge_asof( + left=right, right=left, right_index=True, left_on=["from_date"] + ) + expected = pd.DataFrame( + { + "from_date": index, + "abc": [2.46] * 4 + [2.19], + "xyz": [np.nan, 0.9, 0.8, 0.7, 0.6], + }, + index=pd.Index([0, 1, 2, 3, 4]), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_merge_index_as_string.py b/pandas/tests/reshape/merge/test_merge_index_as_string.py new file mode 100644 index 00000000..691f2549 --- /dev/null +++ b/pandas/tests/reshape/merge/test_merge_index_as_string.py @@ -0,0 +1,188 @@ +import numpy as np +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +@pytest.fixture +def df1(): + return DataFrame( + dict( + outer=[1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4], + inner=[1, 2, 3, 1, 2, 3, 4, 1, 2, 1, 2], + v1=np.linspace(0, 1, 11), + ) + ) + + +@pytest.fixture +def df2(): + return DataFrame( + dict( + outer=[1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3], + inner=[1, 2, 2, 3, 3, 4, 2, 3, 1, 1, 2, 3], + v2=np.linspace(10, 11, 12), + ) + ) + + +@pytest.fixture(params=[[], ["outer"], ["outer", "inner"]]) +def left_df(request, df1): + """ Construct left test DataFrame with specified levels + (any of 'outer', 'inner', and 'v1')""" + levels = request.param + if levels: + df1 = df1.set_index(levels) + + return df1 + + +@pytest.fixture(params=[[], ["outer"], ["outer", "inner"]]) +def right_df(request, df2): + """ Construct right test DataFrame with specified levels + (any of 'outer', 'inner', and 'v2')""" + levels = request.param + + if levels: + df2 = df2.set_index(levels) + + return df2 + + +def compute_expected(df_left, df_right, on=None, left_on=None, right_on=None, how=None): + """ + Compute the expected merge result for the test case. + + This method computes the expected result of merging two DataFrames on + a combination of their columns and index levels. It does so by + explicitly dropping/resetting their named index levels, performing a + merge on their columns, and then finally restoring the appropriate + index in the result. + + Parameters + ---------- + df_left : DataFrame + The left DataFrame (may have zero or more named index levels) + df_right : DataFrame + The right DataFrame (may have zero or more named index levels) + on : list of str + The on parameter to the merge operation + left_on : list of str + The left_on parameter to the merge operation + right_on : list of str + The right_on parameter to the merge operation + how : str + The how parameter to the merge operation + + Returns + ------- + DataFrame + The expected merge result + """ + + # Handle on param if specified + if on is not None: + left_on, right_on = on, on + + # Compute input named index levels + left_levels = [n for n in df_left.index.names if n is not None] + right_levels = [n for n in df_right.index.names if n is not None] + + # Compute output named index levels + output_levels = [i for i in left_on if i in right_levels and i in left_levels] + + # Drop index levels that aren't involved in the merge + drop_left = [n for n in left_levels if n not in left_on] + if drop_left: + df_left = df_left.reset_index(drop_left, drop=True) + + drop_right = [n for n in right_levels if n not in right_on] + if drop_right: + df_right = df_right.reset_index(drop_right, drop=True) + + # Convert remaining index levels to columns + reset_left = [n for n in left_levels if n in left_on] + if reset_left: + df_left = df_left.reset_index(level=reset_left) + + reset_right = [n for n in right_levels if n in right_on] + if reset_right: + df_right = df_right.reset_index(level=reset_right) + + # Perform merge + expected = df_left.merge(df_right, left_on=left_on, right_on=right_on, how=how) + + # Restore index levels + if output_levels: + expected = expected.set_index(output_levels) + + return expected + + +@pytest.mark.parametrize( + "on,how", + [ + (["outer"], "inner"), + (["inner"], "left"), + (["outer", "inner"], "right"), + (["inner", "outer"], "outer"), + ], +) +def test_merge_indexes_and_columns_on(left_df, right_df, on, how): + + # Construct expected result + expected = compute_expected(left_df, right_df, on=on, how=how) + + # Perform merge + result = left_df.merge(right_df, on=on, how=how) + tm.assert_frame_equal(result, expected, check_like=True) + + +@pytest.mark.parametrize( + "left_on,right_on,how", + [ + (["outer"], ["outer"], "inner"), + (["inner"], ["inner"], "right"), + (["outer", "inner"], ["outer", "inner"], "left"), + (["inner", "outer"], ["inner", "outer"], "outer"), + ], +) +def test_merge_indexes_and_columns_lefton_righton( + left_df, right_df, left_on, right_on, how +): + + # Construct expected result + expected = compute_expected( + left_df, right_df, left_on=left_on, right_on=right_on, how=how + ) + + # Perform merge + result = left_df.merge(right_df, left_on=left_on, right_on=right_on, how=how) + tm.assert_frame_equal(result, expected, check_like=True) + + +@pytest.mark.parametrize("left_index", ["inner", ["inner", "outer"]]) +def test_join_indexes_and_columns_on(df1, df2, left_index, join_type): + + # Construct left_df + left_df = df1.set_index(left_index) + + # Construct right_df + right_df = df2.set_index(["outer", "inner"]) + + # Result + expected = ( + left_df.reset_index() + .join( + right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y" + ) + .set_index(left_index) + ) + + # Perform join + result = left_df.join( + right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y" + ) + + tm.assert_frame_equal(result, expected, check_like=True) diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py new file mode 100644 index 00000000..e0063925 --- /dev/null +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -0,0 +1,117 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, merge_ordered +import pandas._testing as tm + + +class TestMergeOrdered: + def setup_method(self, method): + self.left = DataFrame({"key": ["a", "c", "e"], "lvalue": [1, 2.0, 3]}) + + self.right = DataFrame({"key": ["b", "c", "d", "f"], "rvalue": [1, 2, 3.0, 4]}) + + def test_basic(self): + result = merge_ordered(self.left, self.right, on="key") + expected = DataFrame( + { + "key": ["a", "b", "c", "d", "e", "f"], + "lvalue": [1, np.nan, 2, np.nan, 3, np.nan], + "rvalue": [np.nan, 1, 2, 3, np.nan, 4], + } + ) + + tm.assert_frame_equal(result, expected) + + def test_ffill(self): + result = merge_ordered(self.left, self.right, on="key", fill_method="ffill") + expected = DataFrame( + { + "key": ["a", "b", "c", "d", "e", "f"], + "lvalue": [1.0, 1, 2, 2, 3, 3.0], + "rvalue": [np.nan, 1, 2, 3, 3, 4], + } + ) + tm.assert_frame_equal(result, expected) + + def test_multigroup(self): + left = pd.concat([self.left, self.left], ignore_index=True) + + left["group"] = ["a"] * 3 + ["b"] * 3 + + result = merge_ordered( + left, self.right, on="key", left_by="group", fill_method="ffill" + ) + expected = DataFrame( + { + "key": ["a", "b", "c", "d", "e", "f"] * 2, + "lvalue": [1.0, 1, 2, 2, 3, 3.0] * 2, + "rvalue": [np.nan, 1, 2, 3, 3, 4] * 2, + } + ) + expected["group"] = ["a"] * 6 + ["b"] * 6 + + tm.assert_frame_equal(result, expected.loc[:, result.columns]) + + result2 = merge_ordered( + self.right, left, on="key", right_by="group", fill_method="ffill" + ) + tm.assert_frame_equal(result, result2.loc[:, result.columns]) + + result = merge_ordered(left, self.right, on="key", left_by="group") + assert result["group"].notna().all() + + def test_merge_type(self): + class NotADataFrame(DataFrame): + @property + def _constructor(self): + return NotADataFrame + + nad = NotADataFrame(self.left) + result = nad.merge(self.right, on="key") + + assert isinstance(result, NotADataFrame) + + def test_empty_sequence_concat(self): + # GH 9157 + empty_pat = "[Nn]o objects" + none_pat = "objects.*None" + test_cases = [ + ((), empty_pat), + ([], empty_pat), + ({}, empty_pat), + ([None], none_pat), + ([None, None], none_pat), + ] + for df_seq, pattern in test_cases: + with pytest.raises(ValueError, match=pattern): + pd.concat(df_seq) + + pd.concat([pd.DataFrame()]) + pd.concat([None, pd.DataFrame()]) + pd.concat([pd.DataFrame(), None]) + + def test_doc_example(self): + left = DataFrame( + { + "group": list("aaabbb"), + "key": ["a", "c", "e", "a", "c", "e"], + "lvalue": [1, 2, 3] * 2, + } + ) + + right = DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]}) + + result = merge_ordered(left, right, fill_method="ffill", left_by="group") + + expected = DataFrame( + { + "group": list("aaaaabbbbb"), + "key": ["a", "b", "c", "d", "e"] * 2, + "lvalue": [1, 1, 2, 2, 3] * 2, + "rvalue": [np.nan, 1, 2, 3, 3] * 2, + } + ) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py new file mode 100644 index 00000000..1f78c190 --- /dev/null +++ b/pandas/tests/reshape/merge/test_multi.py @@ -0,0 +1,839 @@ +import numpy as np +from numpy.random import randn +import pytest + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series +import pandas._testing as tm +from pandas.core.reshape.concat import concat +from pandas.core.reshape.merge import merge + + +@pytest.fixture +def left(): + """left dataframe (not multi-indexed) for multi-index join tests""" + # a little relevant example with NAs + key1 = ["bar", "bar", "bar", "foo", "foo", "baz", "baz", "qux", "qux", "snap"] + key2 = ["two", "one", "three", "one", "two", "one", "two", "two", "three", "one"] + + data = np.random.randn(len(key1)) + return DataFrame({"key1": key1, "key2": key2, "data": data}) + + +@pytest.fixture +def right(): + """right dataframe (multi-indexed) for multi-index join tests""" + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["key1", "key2"], + ) + + return DataFrame( + np.random.randn(10, 3), index=index, columns=["j_one", "j_two", "j_three"] + ) + + +@pytest.fixture +def left_multi(): + return DataFrame( + dict( + Origin=["A", "A", "B", "B", "C"], + Destination=["A", "B", "A", "C", "A"], + Period=["AM", "AM", "IP", "AM", "OP"], + TripPurp=["hbw", "nhb", "hbo", "nhb", "hbw"], + Trips=[1987, 3647, 2470, 4296, 4444], + ), + columns=["Origin", "Destination", "Period", "TripPurp", "Trips"], + ).set_index(["Origin", "Destination", "Period", "TripPurp"]) + + +@pytest.fixture +def right_multi(): + return DataFrame( + dict( + Origin=["A", "A", "B", "B", "C", "C", "E"], + Destination=["A", "B", "A", "B", "A", "B", "F"], + Period=["AM", "AM", "IP", "AM", "OP", "IP", "AM"], + LinkType=["a", "b", "c", "b", "a", "b", "a"], + Distance=[100, 80, 90, 80, 75, 35, 55], + ), + columns=["Origin", "Destination", "Period", "LinkType", "Distance"], + ).set_index(["Origin", "Destination", "Period", "LinkType"]) + + +@pytest.fixture +def on_cols_multi(): + return ["Origin", "Destination", "Period"] + + +@pytest.fixture +def idx_cols_multi(): + return ["Origin", "Destination", "Period", "TripPurp", "LinkType"] + + +class TestMergeMulti: + def setup_method(self): + self.index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + self.to_join = DataFrame( + np.random.randn(10, 3), + index=self.index, + columns=["j_one", "j_two", "j_three"], + ) + + # a little relevant example with NAs + key1 = ["bar", "bar", "bar", "foo", "foo", "baz", "baz", "qux", "qux", "snap"] + key2 = [ + "two", + "one", + "three", + "one", + "two", + "one", + "two", + "two", + "three", + "one", + ] + + data = np.random.randn(len(key1)) + self.data = DataFrame({"key1": key1, "key2": key2, "data": data}) + + def test_merge_on_multikey(self, left, right, join_type): + on_cols = ["key1", "key2"] + result = left.join(right, on=on_cols, how=join_type).reset_index(drop=True) + + expected = pd.merge(left, right.reset_index(), on=on_cols, how=join_type) + + tm.assert_frame_equal(result, expected) + + result = left.join(right, on=on_cols, how=join_type, sort=True).reset_index( + drop=True + ) + + expected = pd.merge( + left, right.reset_index(), on=on_cols, how=join_type, sort=True + ) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("sort", [False, True]) + def test_left_join_multi_index(self, left, right, sort): + icols = ["1st", "2nd", "3rd"] + + def bind_cols(df): + iord = lambda a: 0 if a != a else ord(a) + f = lambda ts: ts.map(iord) - ord("a") + return f(df["1st"]) + f(df["3rd"]) * 1e2 + df["2nd"].fillna(0) * 1e4 + + def run_asserts(left, right, sort): + res = left.join(right, on=icols, how="left", sort=sort) + + assert len(left) < len(res) + 1 + assert not res["4th"].isna().any() + assert not res["5th"].isna().any() + + tm.assert_series_equal(res["4th"], -res["5th"], check_names=False) + result = bind_cols(res.iloc[:, :-2]) + tm.assert_series_equal(res["4th"], result, check_names=False) + assert result.name is None + + if sort: + tm.assert_frame_equal(res, res.sort_values(icols, kind="mergesort")) + + out = merge(left, right.reset_index(), on=icols, sort=sort, how="left") + + res.index = np.arange(len(res)) + tm.assert_frame_equal(out, res) + + lc = list(map(chr, np.arange(ord("a"), ord("z") + 1))) + left = DataFrame(np.random.choice(lc, (5000, 2)), columns=["1st", "3rd"]) + left.insert(1, "2nd", np.random.randint(0, 1000, len(left))) + + i = np.random.permutation(len(left)) + right = left.iloc[i].copy() + + left["4th"] = bind_cols(left) + right["5th"] = -bind_cols(right) + right.set_index(icols, inplace=True) + + run_asserts(left, right, sort) + + # inject some nulls + left.loc[1::23, "1st"] = np.nan + left.loc[2::37, "2nd"] = np.nan + left.loc[3::43, "3rd"] = np.nan + left["4th"] = bind_cols(left) + + i = np.random.permutation(len(left)) + right = left.iloc[i, :-1] + right["5th"] = -bind_cols(right) + right.set_index(icols, inplace=True) + + run_asserts(left, right, sort) + + @pytest.mark.parametrize("sort", [False, True]) + def test_merge_right_vs_left(self, left, right, sort): + # compare left vs right merge with multikey + on_cols = ["key1", "key2"] + merged_left_right = left.merge( + right, left_on=on_cols, right_index=True, how="left", sort=sort + ) + + merge_right_left = right.merge( + left, right_on=on_cols, left_index=True, how="right", sort=sort + ) + + # Reorder columns + merge_right_left = merge_right_left[merged_left_right.columns] + + tm.assert_frame_equal(merged_left_right, merge_right_left) + + def test_merge_multiple_cols_with_mixed_cols_index(self): + # GH29522 + s = pd.Series( + range(6), + pd.MultiIndex.from_product([["A", "B"], [1, 2, 3]], names=["lev1", "lev2"]), + name="Amount", + ) + df = pd.DataFrame( + {"lev1": list("AAABBB"), "lev2": [1, 2, 3, 1, 2, 3], "col": 0} + ) + result = pd.merge(df, s.reset_index(), on=["lev1", "lev2"]) + expected = pd.DataFrame( + { + "lev1": list("AAABBB"), + "lev2": [1, 2, 3, 1, 2, 3], + "col": [0] * 6, + "Amount": range(6), + } + ) + tm.assert_frame_equal(result, expected) + + def test_compress_group_combinations(self): + + # ~ 40000000 possible unique groups + key1 = tm.rands_array(10, 10000) + key1 = np.tile(key1, 2) + key2 = key1[::-1] + + df = DataFrame({"key1": key1, "key2": key2, "value1": np.random.randn(20000)}) + + df2 = DataFrame( + {"key1": key1[::2], "key2": key2[::2], "value2": np.random.randn(10000)} + ) + + # just to hit the label compression code path + merge(df, df2, how="outer") + + def test_left_join_index_preserve_order(self): + + on_cols = ["k1", "k2"] + left = DataFrame( + { + "k1": [0, 1, 2] * 8, + "k2": ["foo", "bar"] * 12, + "v": np.array(np.arange(24), dtype=np.int64), + } + ) + + index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")]) + right = DataFrame({"v2": [5, 7]}, index=index) + + result = left.join(right, on=on_cols) + + expected = left.copy() + expected["v2"] = np.nan + expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7 + + tm.assert_frame_equal(result, expected) + + result.sort_values(on_cols, kind="mergesort", inplace=True) + expected = left.join(right, on=on_cols, sort=True) + + tm.assert_frame_equal(result, expected) + + # test join with multi dtypes blocks + left = DataFrame( + { + "k1": [0, 1, 2] * 8, + "k2": ["foo", "bar"] * 12, + "k3": np.array([0, 1, 2] * 8, dtype=np.float32), + "v": np.array(np.arange(24), dtype=np.int32), + } + ) + + index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")]) + right = DataFrame({"v2": [5, 7]}, index=index) + + result = left.join(right, on=on_cols) + + expected = left.copy() + expected["v2"] = np.nan + expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5 + expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7 + + tm.assert_frame_equal(result, expected) + + result = result.sort_values(on_cols, kind="mergesort") + expected = left.join(right, on=on_cols, sort=True) + + tm.assert_frame_equal(result, expected) + + def test_left_join_index_multi_match_multiindex(self): + left = DataFrame( + [ + ["X", "Y", "C", "a"], + ["W", "Y", "C", "e"], + ["V", "Q", "A", "h"], + ["V", "R", "D", "i"], + ["X", "Y", "D", "b"], + ["X", "Y", "A", "c"], + ["W", "Q", "B", "f"], + ["W", "R", "C", "g"], + ["V", "Y", "C", "j"], + ["X", "Y", "B", "d"], + ], + columns=["cola", "colb", "colc", "tag"], + index=[3, 2, 0, 1, 7, 6, 4, 5, 9, 8], + ) + + right = DataFrame( + [ + ["W", "R", "C", 0], + ["W", "Q", "B", 3], + ["W", "Q", "B", 8], + ["X", "Y", "A", 1], + ["X", "Y", "A", 4], + ["X", "Y", "B", 5], + ["X", "Y", "C", 6], + ["X", "Y", "C", 9], + ["X", "Q", "C", -6], + ["X", "R", "C", -9], + ["V", "Y", "C", 7], + ["V", "R", "D", 2], + ["V", "R", "D", -1], + ["V", "Q", "A", -3], + ], + columns=["col1", "col2", "col3", "val"], + ).set_index(["col1", "col2", "col3"]) + + result = left.join(right, on=["cola", "colb", "colc"], how="left") + + expected = DataFrame( + [ + ["X", "Y", "C", "a", 6], + ["X", "Y", "C", "a", 9], + ["W", "Y", "C", "e", np.nan], + ["V", "Q", "A", "h", -3], + ["V", "R", "D", "i", 2], + ["V", "R", "D", "i", -1], + ["X", "Y", "D", "b", np.nan], + ["X", "Y", "A", "c", 1], + ["X", "Y", "A", "c", 4], + ["W", "Q", "B", "f", 3], + ["W", "Q", "B", "f", 8], + ["W", "R", "C", "g", 0], + ["V", "Y", "C", "j", 7], + ["X", "Y", "B", "d", 5], + ], + columns=["cola", "colb", "colc", "tag", "val"], + index=[3, 3, 2, 0, 1, 1, 7, 6, 6, 4, 4, 5, 9, 8], + ) + + tm.assert_frame_equal(result, expected) + + result = left.join(right, on=["cola", "colb", "colc"], how="left", sort=True) + + expected = expected.sort_values(["cola", "colb", "colc"], kind="mergesort") + + tm.assert_frame_equal(result, expected) + + def test_left_join_index_multi_match(self): + left = DataFrame( + [["c", 0], ["b", 1], ["a", 2], ["b", 3]], + columns=["tag", "val"], + index=[2, 0, 1, 3], + ) + + right = DataFrame( + [ + ["a", "v"], + ["c", "w"], + ["c", "x"], + ["d", "y"], + ["a", "z"], + ["c", "r"], + ["e", "q"], + ["c", "s"], + ], + columns=["tag", "char"], + ).set_index("tag") + + result = left.join(right, on="tag", how="left") + + expected = DataFrame( + [ + ["c", 0, "w"], + ["c", 0, "x"], + ["c", 0, "r"], + ["c", 0, "s"], + ["b", 1, np.nan], + ["a", 2, "v"], + ["a", 2, "z"], + ["b", 3, np.nan], + ], + columns=["tag", "val", "char"], + index=[2, 2, 2, 2, 0, 1, 1, 3], + ) + + tm.assert_frame_equal(result, expected) + + result = left.join(right, on="tag", how="left", sort=True) + expected2 = expected.sort_values("tag", kind="mergesort") + + tm.assert_frame_equal(result, expected2) + + # GH7331 - maintain left frame order in left merge + result = merge(left, right.reset_index(), how="left", on="tag") + expected.index = np.arange(len(expected)) + tm.assert_frame_equal(result, expected) + + def test_left_merge_na_buglet(self): + left = DataFrame( + { + "id": list("abcde"), + "v1": randn(5), + "v2": randn(5), + "dummy": list("abcde"), + "v3": randn(5), + }, + columns=["id", "v1", "v2", "dummy", "v3"], + ) + right = DataFrame( + { + "id": ["a", "b", np.nan, np.nan, np.nan], + "sv3": [1.234, 5.678, np.nan, np.nan, np.nan], + } + ) + + result = merge(left, right, on="id", how="left") + + rdf = right.drop(["id"], axis=1) + expected = left.join(rdf) + tm.assert_frame_equal(result, expected) + + def test_merge_na_keys(self): + data = [ + [1950, "A", 1.5], + [1950, "B", 1.5], + [1955, "B", 1.5], + [1960, "B", np.nan], + [1970, "B", 4.0], + [1950, "C", 4.0], + [1960, "C", np.nan], + [1965, "C", 3.0], + [1970, "C", 4.0], + ] + + frame = DataFrame(data, columns=["year", "panel", "data"]) + + other_data = [ + [1960, "A", np.nan], + [1970, "A", np.nan], + [1955, "A", np.nan], + [1965, "A", np.nan], + [1965, "B", np.nan], + [1955, "C", np.nan], + ] + other = DataFrame(other_data, columns=["year", "panel", "data"]) + + result = frame.merge(other, how="outer") + + expected = frame.fillna(-999).merge(other.fillna(-999), how="outer") + expected = expected.replace(-999, np.nan) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("klass", [None, np.asarray, Series, Index]) + def test_merge_datetime_index(self, klass): + # see gh-19038 + df = DataFrame( + [1, 2, 3], ["2016-01-01", "2017-01-01", "2018-01-01"], columns=["a"] + ) + df.index = pd.to_datetime(df.index) + on_vector = df.index.year + + if klass is not None: + on_vector = klass(on_vector) + + expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]}) + + result = df.merge(df, on=["a", on_vector], how="inner") + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]} + ) + + result = df.merge(df, on=[df.index.year], how="inner") + tm.assert_frame_equal(result, expected) + + def test_join_multi_levels(self): + + # GH 3662 + # merge multi-levels + household = DataFrame( + dict( + household_id=[1, 2, 3], + male=[0, 1, 0], + wealth=[196087.3, 316478.7, 294750], + ), + columns=["household_id", "male", "wealth"], + ).set_index("household_id") + portfolio = DataFrame( + dict( + household_id=[1, 2, 2, 3, 3, 3, 4], + asset_id=[ + "nl0000301109", + "nl0000289783", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "nl0000289965", + np.nan, + ], + name=[ + "ABN Amro", + "Robeco", + "Royal Dutch Shell", + "Royal Dutch Shell", + "AAB Eastern Europe Equity Fund", + "Postbank BioTech Fonds", + np.nan, + ], + share=[1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0], + ), + columns=["household_id", "asset_id", "name", "share"], + ).set_index(["household_id", "asset_id"]) + result = household.join(portfolio, how="inner") + expected = ( + DataFrame( + dict( + male=[0, 1, 1, 0, 0, 0], + wealth=[196087.3, 316478.7, 316478.7, 294750.0, 294750.0, 294750.0], + name=[ + "ABN Amro", + "Robeco", + "Royal Dutch Shell", + "Royal Dutch Shell", + "AAB Eastern Europe Equity Fund", + "Postbank BioTech Fonds", + ], + share=[1.00, 0.40, 0.60, 0.15, 0.60, 0.25], + household_id=[1, 2, 2, 3, 3, 3], + asset_id=[ + "nl0000301109", + "nl0000289783", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "nl0000289965", + ], + ) + ) + .set_index(["household_id", "asset_id"]) + .reindex(columns=["male", "wealth", "name", "share"]) + ) + tm.assert_frame_equal(result, expected) + + # equivalency + result = merge( + household.reset_index(), + portfolio.reset_index(), + on=["household_id"], + how="inner", + ).set_index(["household_id", "asset_id"]) + tm.assert_frame_equal(result, expected) + + result = household.join(portfolio, how="outer") + expected = concat( + [ + expected, + ( + DataFrame( + dict(share=[1.00]), + index=MultiIndex.from_tuples( + [(4, np.nan)], names=["household_id", "asset_id"] + ), + ) + ), + ], + axis=0, + sort=True, + ).reindex(columns=expected.columns) + tm.assert_frame_equal(result, expected) + + # invalid cases + household.index.name = "foo" + + with pytest.raises(ValueError): + household.join(portfolio, how="inner") + + portfolio2 = portfolio.copy() + portfolio2.index.set_names(["household_id", "foo"]) + + with pytest.raises(ValueError): + portfolio2.join(portfolio, how="inner") + + def test_join_multi_levels2(self): + + # some more advanced merges + # GH6360 + household = DataFrame( + dict( + household_id=[1, 2, 2, 3, 3, 3, 4], + asset_id=[ + "nl0000301109", + "nl0000301109", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "nl0000289965", + np.nan, + ], + share=[1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0], + ), + columns=["household_id", "asset_id", "share"], + ).set_index(["household_id", "asset_id"]) + + log_return = DataFrame( + dict( + asset_id=[ + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "lu0197800237", + ], + t=[233, 234, 235, 180, 181], + log_return=[0.09604978, -0.06524096, 0.03532373, 0.03025441, 0.036997], + ) + ).set_index(["asset_id", "t"]) + + expected = ( + DataFrame( + dict( + household_id=[2, 2, 2, 3, 3, 3, 3, 3], + asset_id=[ + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "lu0197800237", + ], + t=[233, 234, 235, 233, 234, 235, 180, 181], + share=[0.6, 0.6, 0.6, 0.15, 0.15, 0.15, 0.6, 0.6], + log_return=[ + 0.09604978, + -0.06524096, + 0.03532373, + 0.09604978, + -0.06524096, + 0.03532373, + 0.03025441, + 0.036997, + ], + ) + ) + .set_index(["household_id", "asset_id", "t"]) + .reindex(columns=["share", "log_return"]) + ) + + # this is the equivalency + result = merge( + household.reset_index(), + log_return.reset_index(), + on=["asset_id"], + how="inner", + ).set_index(["household_id", "asset_id", "t"]) + tm.assert_frame_equal(result, expected) + + expected = ( + DataFrame( + dict( + household_id=[1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4], + asset_id=[ + "nl0000301109", + "nl0000301109", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "gb00b03mlx29", + "lu0197800237", + "lu0197800237", + "nl0000289965", + None, + ], + t=[None, None, 233, 234, 235, 233, 234, 235, 180, 181, None, None], + share=[ + 1.0, + 0.4, + 0.6, + 0.6, + 0.6, + 0.15, + 0.15, + 0.15, + 0.6, + 0.6, + 0.25, + 1.0, + ], + log_return=[ + None, + None, + 0.09604978, + -0.06524096, + 0.03532373, + 0.09604978, + -0.06524096, + 0.03532373, + 0.03025441, + 0.036997, + None, + None, + ], + ) + ) + .set_index(["household_id", "asset_id", "t"]) + .reindex(columns=["share", "log_return"]) + ) + + result = merge( + household.reset_index(), + log_return.reset_index(), + on=["asset_id"], + how="outer", + ).set_index(["household_id", "asset_id", "t"]) + + tm.assert_frame_equal(result, expected) + + +class TestJoinMultiMulti: + def test_join_multi_multi( + self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi + ): + # Multi-index join tests + expected = ( + pd.merge( + left_multi.reset_index(), + right_multi.reset_index(), + how=join_type, + on=on_cols_multi, + ) + .set_index(idx_cols_multi) + .sort_index() + ) + + result = left_multi.join(right_multi, how=join_type).sort_index() + tm.assert_frame_equal(result, expected) + + def test_join_multi_empty_frames( + self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi + ): + + left_multi = left_multi.drop(columns=left_multi.columns) + right_multi = right_multi.drop(columns=right_multi.columns) + + expected = ( + pd.merge( + left_multi.reset_index(), + right_multi.reset_index(), + how=join_type, + on=on_cols_multi, + ) + .set_index(idx_cols_multi) + .sort_index() + ) + + result = left_multi.join(right_multi, how=join_type).sort_index() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [None, np.asarray, Series, Index]) + def test_merge_datetime_index(self, box): + # see gh-19038 + df = DataFrame( + [1, 2, 3], ["2016-01-01", "2017-01-01", "2018-01-01"], columns=["a"] + ) + df.index = pd.to_datetime(df.index) + on_vector = df.index.year + + if box is not None: + on_vector = box(on_vector) + + expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]}) + + result = df.merge(df, on=["a", on_vector], how="inner") + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]} + ) + + result = df.merge(df, on=[df.index.year], how="inner") + tm.assert_frame_equal(result, expected) + + def test_single_common_level(self): + index_left = pd.MultiIndex.from_tuples( + [("K0", "X0"), ("K0", "X1"), ("K1", "X2")], names=["key", "X"] + ) + + left = pd.DataFrame( + {"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, index=index_left + ) + + index_right = pd.MultiIndex.from_tuples( + [("K0", "Y0"), ("K1", "Y1"), ("K2", "Y2"), ("K2", "Y3")], names=["key", "Y"] + ) + + right = pd.DataFrame( + {"C": ["C0", "C1", "C2", "C3"], "D": ["D0", "D1", "D2", "D3"]}, + index=index_right, + ) + + result = left.join(right) + expected = pd.merge( + left.reset_index(), right.reset_index(), on=["key"], how="inner" + ).set_index(["key", "X", "Y"]) + + tm.assert_frame_equal(result, expected) + + def test_join_multi_wrong_order(self): + # GH 25760 + # GH 28956 + + midx1 = pd.MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"]) + midx3 = pd.MultiIndex.from_tuples([(4, 1), (3, 2), (3, 1)], names=["b", "a"]) + + left = pd.DataFrame(index=midx1, data={"x": [10, 20, 30, 40]}) + right = pd.DataFrame(index=midx3, data={"y": ["foo", "bar", "fing"]}) + + result = left.join(right) + + expected = pd.DataFrame( + index=midx1, + data={"x": [10, 20, 30, 40], "y": ["fing", "foo", "bar", np.nan]}, + ) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_pivot_old.py b/pandas/tests/reshape/merge/test_pivot_old.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py new file mode 100644 index 00000000..990669f1 --- /dev/null +++ b/pandas/tests/reshape/test_concat.py @@ -0,0 +1,2752 @@ +from collections import OrderedDict, abc, deque +import datetime as dt +from datetime import datetime +from decimal import Decimal +from io import StringIO +from itertools import combinations +from warnings import catch_warnings + +import dateutil +import numpy as np +from numpy.random import randn +import pytest + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, + Timestamp, + concat, + date_range, + isna, + read_csv, +) +import pandas._testing as tm +from pandas.core.arrays import SparseArray +from pandas.core.construction import create_series_with_explicit_dtype +from pandas.tests.extension.decimal import to_decimal + + +@pytest.fixture(params=[True, False]) +def sort(request): + """Boolean sort keyword for concat and DataFrame.append.""" + return request.param + + +class TestConcatAppendCommon: + """ + Test common dtype coercion rules between concat and append. + """ + + def setup_method(self, method): + + dt_data = [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + ] + tz_data = [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timestamp("2011-01-03", tz="US/Eastern"), + ] + + td_data = [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + ] + + period_data = [ + pd.Period("2011-01", freq="M"), + pd.Period("2011-02", freq="M"), + pd.Period("2011-03", freq="M"), + ] + + self.data = { + "bool": [True, False, True], + "int64": [1, 2, 3], + "float64": [1.1, np.nan, 3.3], + "category": pd.Categorical(["X", "Y", "Z"]), + "object": ["a", "b", "c"], + "datetime64[ns]": dt_data, + "datetime64[ns, US/Eastern]": tz_data, + "timedelta64[ns]": td_data, + "period[M]": period_data, + } + + def _check_expected_dtype(self, obj, label): + """ + Check whether obj has expected dtype depending on label + considering not-supported dtypes + """ + if isinstance(obj, pd.Index): + if label == "bool": + assert obj.dtype == "object" + else: + assert obj.dtype == label + elif isinstance(obj, pd.Series): + if label.startswith("period"): + assert obj.dtype == "Period[M]" + else: + assert obj.dtype == label + else: + raise ValueError + + def test_dtypes(self): + # to confirm test case covers intended dtypes + for typ, vals in self.data.items(): + self._check_expected_dtype(pd.Index(vals), typ) + self._check_expected_dtype(pd.Series(vals), typ) + + def test_concatlike_same_dtypes(self): + # GH 13660 + for typ1, vals1 in self.data.items(): + + vals2 = vals1 + vals3 = vals1 + + if typ1 == "category": + exp_data = pd.Categorical(list(vals1) + list(vals2)) + exp_data3 = pd.Categorical(list(vals1) + list(vals2) + list(vals3)) + else: + exp_data = vals1 + vals2 + exp_data3 = vals1 + vals2 + vals3 + + # ----- Index ----- # + + # index.append + res = pd.Index(vals1).append(pd.Index(vals2)) + exp = pd.Index(exp_data) + tm.assert_index_equal(res, exp) + + # 3 elements + res = pd.Index(vals1).append([pd.Index(vals2), pd.Index(vals3)]) + exp = pd.Index(exp_data3) + tm.assert_index_equal(res, exp) + + # index.append name mismatch + i1 = pd.Index(vals1, name="x") + i2 = pd.Index(vals2, name="y") + res = i1.append(i2) + exp = pd.Index(exp_data) + tm.assert_index_equal(res, exp) + + # index.append name match + i1 = pd.Index(vals1, name="x") + i2 = pd.Index(vals2, name="x") + res = i1.append(i2) + exp = pd.Index(exp_data, name="x") + tm.assert_index_equal(res, exp) + + # cannot append non-index + with pytest.raises(TypeError, match="all inputs must be Index"): + pd.Index(vals1).append(vals2) + + with pytest.raises(TypeError, match="all inputs must be Index"): + pd.Index(vals1).append([pd.Index(vals2), vals3]) + + # ----- Series ----- # + + # series.append + res = pd.Series(vals1).append(pd.Series(vals2), ignore_index=True) + exp = pd.Series(exp_data) + tm.assert_series_equal(res, exp, check_index_type=True) + + # concat + res = pd.concat([pd.Series(vals1), pd.Series(vals2)], ignore_index=True) + tm.assert_series_equal(res, exp, check_index_type=True) + + # 3 elements + res = pd.Series(vals1).append( + [pd.Series(vals2), pd.Series(vals3)], ignore_index=True + ) + exp = pd.Series(exp_data3) + tm.assert_series_equal(res, exp) + + res = pd.concat( + [pd.Series(vals1), pd.Series(vals2), pd.Series(vals3)], + ignore_index=True, + ) + tm.assert_series_equal(res, exp) + + # name mismatch + s1 = pd.Series(vals1, name="x") + s2 = pd.Series(vals2, name="y") + res = s1.append(s2, ignore_index=True) + exp = pd.Series(exp_data) + tm.assert_series_equal(res, exp, check_index_type=True) + + res = pd.concat([s1, s2], ignore_index=True) + tm.assert_series_equal(res, exp, check_index_type=True) + + # name match + s1 = pd.Series(vals1, name="x") + s2 = pd.Series(vals2, name="x") + res = s1.append(s2, ignore_index=True) + exp = pd.Series(exp_data, name="x") + tm.assert_series_equal(res, exp, check_index_type=True) + + res = pd.concat([s1, s2], ignore_index=True) + tm.assert_series_equal(res, exp, check_index_type=True) + + # cannot append non-index + msg = ( + r"cannot concatenate object of type '.+';" + " only Series and DataFrame objs are valid" + ) + with pytest.raises(TypeError, match=msg): + pd.Series(vals1).append(vals2) + + with pytest.raises(TypeError, match=msg): + pd.Series(vals1).append([pd.Series(vals2), vals3]) + + with pytest.raises(TypeError, match=msg): + pd.concat([pd.Series(vals1), vals2]) + + with pytest.raises(TypeError, match=msg): + pd.concat([pd.Series(vals1), pd.Series(vals2), vals3]) + + def test_concatlike_dtypes_coercion(self): + # GH 13660 + for typ1, vals1 in self.data.items(): + for typ2, vals2 in self.data.items(): + + vals3 = vals2 + + # basically infer + exp_index_dtype = None + exp_series_dtype = None + + if typ1 == typ2: + # same dtype is tested in test_concatlike_same_dtypes + continue + elif typ1 == "category" or typ2 == "category": + # ToDo: suspicious + continue + + # specify expected dtype + if typ1 == "bool" and typ2 in ("int64", "float64"): + # series coerces to numeric based on numpy rule + # index doesn't because bool is object dtype + exp_series_dtype = typ2 + elif typ2 == "bool" and typ1 in ("int64", "float64"): + exp_series_dtype = typ1 + elif ( + typ1 == "datetime64[ns, US/Eastern]" + or typ2 == "datetime64[ns, US/Eastern]" + or typ1 == "timedelta64[ns]" + or typ2 == "timedelta64[ns]" + ): + exp_index_dtype = object + exp_series_dtype = object + + exp_data = vals1 + vals2 + exp_data3 = vals1 + vals2 + vals3 + + # ----- Index ----- # + + # index.append + res = pd.Index(vals1).append(pd.Index(vals2)) + exp = pd.Index(exp_data, dtype=exp_index_dtype) + tm.assert_index_equal(res, exp) + + # 3 elements + res = pd.Index(vals1).append([pd.Index(vals2), pd.Index(vals3)]) + exp = pd.Index(exp_data3, dtype=exp_index_dtype) + tm.assert_index_equal(res, exp) + + # ----- Series ----- # + + # series.append + res = pd.Series(vals1).append(pd.Series(vals2), ignore_index=True) + exp = pd.Series(exp_data, dtype=exp_series_dtype) + tm.assert_series_equal(res, exp, check_index_type=True) + + # concat + res = pd.concat([pd.Series(vals1), pd.Series(vals2)], ignore_index=True) + tm.assert_series_equal(res, exp, check_index_type=True) + + # 3 elements + res = pd.Series(vals1).append( + [pd.Series(vals2), pd.Series(vals3)], ignore_index=True + ) + exp = pd.Series(exp_data3, dtype=exp_series_dtype) + tm.assert_series_equal(res, exp) + + res = pd.concat( + [pd.Series(vals1), pd.Series(vals2), pd.Series(vals3)], + ignore_index=True, + ) + tm.assert_series_equal(res, exp) + + def test_concatlike_common_coerce_to_pandas_object(self): + # GH 13626 + # result must be Timestamp/Timedelta, not datetime.datetime/timedelta + dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"]) + tdi = pd.TimedeltaIndex(["1 days", "2 days"]) + + exp = pd.Index( + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + ] + ) + + res = dti.append(tdi) + tm.assert_index_equal(res, exp) + assert isinstance(res[0], pd.Timestamp) + assert isinstance(res[-1], pd.Timedelta) + + dts = pd.Series(dti) + tds = pd.Series(tdi) + res = dts.append(tds) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + assert isinstance(res.iloc[0], pd.Timestamp) + assert isinstance(res.iloc[-1], pd.Timedelta) + + res = pd.concat([dts, tds]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + assert isinstance(res.iloc[0], pd.Timestamp) + assert isinstance(res.iloc[-1], pd.Timedelta) + + def test_concatlike_datetimetz(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH 7795 + dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) + dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz=tz) + + exp = pd.DatetimeIndex( + ["2011-01-01", "2011-01-02", "2012-01-01", "2012-01-02"], tz=tz + ) + + res = dti1.append(dti2) + tm.assert_index_equal(res, exp) + + dts1 = pd.Series(dti1) + dts2 = pd.Series(dti2) + res = dts1.append(dts2) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([dts1, dts2]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + @pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo", "EST5EDT"]) + def test_concatlike_datetimetz_short(self, tz): + # GH#7795 + ix1 = pd.date_range(start="2014-07-15", end="2014-07-17", freq="D", tz=tz) + ix2 = pd.DatetimeIndex(["2014-07-11", "2014-07-21"], tz=tz) + df1 = pd.DataFrame(0, index=ix1, columns=["A", "B"]) + df2 = pd.DataFrame(0, index=ix2, columns=["A", "B"]) + + exp_idx = pd.DatetimeIndex( + ["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"], + tz=tz, + ) + exp = pd.DataFrame(0, index=exp_idx, columns=["A", "B"]) + + tm.assert_frame_equal(df1.append(df2), exp) + tm.assert_frame_equal(pd.concat([df1, df2]), exp) + + def test_concatlike_datetimetz_to_object(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH 13660 + + # different tz coerces to object + dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) + dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"]) + + exp = pd.Index( + [ + pd.Timestamp("2011-01-01", tz=tz), + pd.Timestamp("2011-01-02", tz=tz), + pd.Timestamp("2012-01-01"), + pd.Timestamp("2012-01-02"), + ], + dtype=object, + ) + + res = dti1.append(dti2) + tm.assert_index_equal(res, exp) + + dts1 = pd.Series(dti1) + dts2 = pd.Series(dti2) + res = dts1.append(dts2) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([dts1, dts2]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + # different tz + dti3 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz="US/Pacific") + + exp = pd.Index( + [ + pd.Timestamp("2011-01-01", tz=tz), + pd.Timestamp("2011-01-02", tz=tz), + pd.Timestamp("2012-01-01", tz="US/Pacific"), + pd.Timestamp("2012-01-02", tz="US/Pacific"), + ], + dtype=object, + ) + + res = dti1.append(dti3) + # tm.assert_index_equal(res, exp) + + dts1 = pd.Series(dti1) + dts3 = pd.Series(dti3) + res = dts1.append(dts3) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([dts1, dts3]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + def test_concatlike_common_period(self): + # GH 13660 + pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") + pi2 = pd.PeriodIndex(["2012-01", "2012-02"], freq="M") + + exp = pd.PeriodIndex(["2011-01", "2011-02", "2012-01", "2012-02"], freq="M") + + res = pi1.append(pi2) + tm.assert_index_equal(res, exp) + + ps1 = pd.Series(pi1) + ps2 = pd.Series(pi2) + res = ps1.append(ps2) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([ps1, ps2]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + def test_concatlike_common_period_diff_freq_to_object(self): + # GH 13221 + pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") + pi2 = pd.PeriodIndex(["2012-01-01", "2012-02-01"], freq="D") + + exp = pd.Index( + [ + pd.Period("2011-01", freq="M"), + pd.Period("2011-02", freq="M"), + pd.Period("2012-01-01", freq="D"), + pd.Period("2012-02-01", freq="D"), + ], + dtype=object, + ) + + res = pi1.append(pi2) + tm.assert_index_equal(res, exp) + + ps1 = pd.Series(pi1) + ps2 = pd.Series(pi2) + res = ps1.append(ps2) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([ps1, ps2]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + def test_concatlike_common_period_mixed_dt_to_object(self): + # GH 13221 + # different datetimelike + pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M") + tdi = pd.TimedeltaIndex(["1 days", "2 days"]) + exp = pd.Index( + [ + pd.Period("2011-01", freq="M"), + pd.Period("2011-02", freq="M"), + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + ], + dtype=object, + ) + + res = pi1.append(tdi) + tm.assert_index_equal(res, exp) + + ps1 = pd.Series(pi1) + tds = pd.Series(tdi) + res = ps1.append(tds) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([ps1, tds]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + # inverse + exp = pd.Index( + [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Period("2011-01", freq="M"), + pd.Period("2011-02", freq="M"), + ], + dtype=object, + ) + + res = tdi.append(pi1) + tm.assert_index_equal(res, exp) + + ps1 = pd.Series(pi1) + tds = pd.Series(tdi) + res = tds.append(ps1) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + res = pd.concat([tds, ps1]) + tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + + def test_concat_categorical(self): + # GH 13524 + + # same categories -> category + s1 = pd.Series([1, 2, np.nan], dtype="category") + s2 = pd.Series([2, 1, 2], dtype="category") + + exp = pd.Series([1, 2, np.nan, 2, 1, 2], dtype="category") + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + # partially different categories => not-category + s1 = pd.Series([3, 2], dtype="category") + s2 = pd.Series([2, 1], dtype="category") + + exp = pd.Series([3, 2, 2, 1]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + # completely different categories (same dtype) => not-category + s1 = pd.Series([10, 11, np.nan], dtype="category") + s2 = pd.Series([np.nan, 1, 3, 2], dtype="category") + + exp = pd.Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype="object") + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + def test_union_categorical_same_categories_different_order(self): + # https://github.com/pandas-dev/pandas/issues/19096 + a = pd.Series(Categorical(["a", "b", "c"], categories=["a", "b", "c"])) + b = pd.Series(Categorical(["a", "b", "c"], categories=["b", "a", "c"])) + result = pd.concat([a, b], ignore_index=True) + expected = pd.Series( + Categorical(["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"]) + ) + tm.assert_series_equal(result, expected) + + def test_concat_categorical_coercion(self): + # GH 13524 + + # category + not-category => not-category + s1 = pd.Series([1, 2, np.nan], dtype="category") + s2 = pd.Series([2, 1, 2]) + + exp = pd.Series([1, 2, np.nan, 2, 1, 2], dtype="object") + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + # result shouldn't be affected by 1st elem dtype + exp = pd.Series([2, 1, 2, 1, 2, np.nan], dtype="object") + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + + # all values are not in category => not-category + s1 = pd.Series([3, 2], dtype="category") + s2 = pd.Series([2, 1]) + + exp = pd.Series([3, 2, 2, 1]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + exp = pd.Series([2, 1, 3, 2]) + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + + # completely different categories => not-category + s1 = pd.Series([10, 11, np.nan], dtype="category") + s2 = pd.Series([1, 3, 2]) + + exp = pd.Series([10, 11, np.nan, 1, 3, 2], dtype="object") + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + exp = pd.Series([1, 3, 2, 10, 11, np.nan], dtype="object") + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + + # different dtype => not-category + s1 = pd.Series([10, 11, np.nan], dtype="category") + s2 = pd.Series(["a", "b", "c"]) + + exp = pd.Series([10, 11, np.nan, "a", "b", "c"]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + exp = pd.Series(["a", "b", "c", 10, 11, np.nan]) + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + + # if normal series only contains NaN-likes => not-category + s1 = pd.Series([10, 11], dtype="category") + s2 = pd.Series([np.nan, np.nan, np.nan]) + + exp = pd.Series([10, 11, np.nan, np.nan, np.nan]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + exp = pd.Series([np.nan, np.nan, np.nan, 10, 11]) + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + + def test_concat_categorical_3elem_coercion(self): + # GH 13524 + + # mixed dtypes => not-category + s1 = pd.Series([1, 2, np.nan], dtype="category") + s2 = pd.Series([2, 1, 2], dtype="category") + s3 = pd.Series([1, 2, 1, 2, np.nan]) + + exp = pd.Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="object") + tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) + tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp) + + exp = pd.Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="object") + tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp) + + # values are all in either category => not-category + s1 = pd.Series([4, 5, 6], dtype="category") + s2 = pd.Series([1, 2, 3], dtype="category") + s3 = pd.Series([1, 3, 4]) + + exp = pd.Series([4, 5, 6, 1, 2, 3, 1, 3, 4]) + tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) + tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp) + + exp = pd.Series([1, 3, 4, 4, 5, 6, 1, 2, 3]) + tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp) + + # values are all in either category => not-category + s1 = pd.Series([4, 5, 6], dtype="category") + s2 = pd.Series([1, 2, 3], dtype="category") + s3 = pd.Series([10, 11, 12]) + + exp = pd.Series([4, 5, 6, 1, 2, 3, 10, 11, 12]) + tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp) + tm.assert_series_equal(s1.append([s2, s3], ignore_index=True), exp) + + exp = pd.Series([10, 11, 12, 4, 5, 6, 1, 2, 3]) + tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s3.append([s1, s2], ignore_index=True), exp) + + def test_concat_categorical_multi_coercion(self): + # GH 13524 + + s1 = pd.Series([1, 3], dtype="category") + s2 = pd.Series([3, 4], dtype="category") + s3 = pd.Series([2, 3]) + s4 = pd.Series([2, 2], dtype="category") + s5 = pd.Series([1, np.nan]) + s6 = pd.Series([1, 3, 2], dtype="category") + + # mixed dtype, values are all in categories => not-category + exp = pd.Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2]) + res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True) + tm.assert_series_equal(res, exp) + res = s1.append([s2, s3, s4, s5, s6], ignore_index=True) + tm.assert_series_equal(res, exp) + + exp = pd.Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3]) + res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True) + tm.assert_series_equal(res, exp) + res = s6.append([s5, s4, s3, s2, s1], ignore_index=True) + tm.assert_series_equal(res, exp) + + def test_concat_categorical_ordered(self): + # GH 13524 + + s1 = pd.Series(pd.Categorical([1, 2, np.nan], ordered=True)) + s2 = pd.Series(pd.Categorical([2, 1, 2], ordered=True)) + + exp = pd.Series(pd.Categorical([1, 2, np.nan, 2, 1, 2], ordered=True)) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + exp = pd.Series( + pd.Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan], ordered=True) + ) + tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s1.append([s2, s1], ignore_index=True), exp) + + def test_concat_categorical_coercion_nan(self): + # GH 13524 + + # some edge cases + # category + not-category => not category + s1 = pd.Series(np.array([np.nan, np.nan], dtype=np.float64), dtype="category") + s2 = pd.Series([np.nan, 1]) + + exp = pd.Series([np.nan, np.nan, np.nan, 1]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + s1 = pd.Series([1, np.nan], dtype="category") + s2 = pd.Series([np.nan, np.nan]) + + exp = pd.Series([1, np.nan, np.nan, np.nan], dtype="object") + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + # mixed dtype, all nan-likes => not-category + s1 = pd.Series([np.nan, np.nan], dtype="category") + s2 = pd.Series([np.nan, np.nan]) + + exp = pd.Series([np.nan, np.nan, np.nan, np.nan]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + + # all category nan-likes => category + s1 = pd.Series([np.nan, np.nan], dtype="category") + s2 = pd.Series([np.nan, np.nan], dtype="category") + + exp = pd.Series([np.nan, np.nan, np.nan, np.nan], dtype="category") + + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + def test_concat_categorical_empty(self): + # GH 13524 + + s1 = pd.Series([], dtype="category") + s2 = pd.Series([1, 2], dtype="category") + + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) + tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) + + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2) + tm.assert_series_equal(s2.append(s1, ignore_index=True), s2) + + s1 = pd.Series([], dtype="category") + s2 = pd.Series([], dtype="category") + + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) + tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) + + s1 = pd.Series([], dtype="category") + s2 = pd.Series([], dtype="object") + + # different dtype => not-category + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2) + tm.assert_series_equal(s1.append(s2, ignore_index=True), s2) + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2) + tm.assert_series_equal(s2.append(s1, ignore_index=True), s2) + + s1 = pd.Series([], dtype="category") + s2 = pd.Series([np.nan, np.nan]) + + # empty Series is ignored + exp = pd.Series([np.nan, np.nan]) + tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp) + tm.assert_series_equal(s1.append(s2, ignore_index=True), exp) + + tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp) + tm.assert_series_equal(s2.append(s1, ignore_index=True), exp) + + +class TestAppend: + def test_append(self, sort, float_frame): + mixed_frame = float_frame.copy() + mixed_frame["foo"] = "bar" + + begin_index = float_frame.index[:5] + end_index = float_frame.index[5:] + + begin_frame = float_frame.reindex(begin_index) + end_frame = float_frame.reindex(end_index) + + appended = begin_frame.append(end_frame) + tm.assert_almost_equal(appended["A"], float_frame["A"]) + + del end_frame["A"] + partial_appended = begin_frame.append(end_frame, sort=sort) + assert "A" in partial_appended + + partial_appended = end_frame.append(begin_frame, sort=sort) + assert "A" in partial_appended + + # mixed type handling + appended = mixed_frame[:5].append(mixed_frame[5:]) + tm.assert_frame_equal(appended, mixed_frame) + + # what to test here + mixed_appended = mixed_frame[:5].append(float_frame[5:], sort=sort) + mixed_appended2 = float_frame[:5].append(mixed_frame[5:], sort=sort) + + # all equal except 'foo' column + tm.assert_frame_equal( + mixed_appended.reindex(columns=["A", "B", "C", "D"]), + mixed_appended2.reindex(columns=["A", "B", "C", "D"]), + ) + + def test_append_empty(self, float_frame): + empty = DataFrame() + + appended = float_frame.append(empty) + tm.assert_frame_equal(float_frame, appended) + assert appended is not float_frame + + appended = empty.append(float_frame) + tm.assert_frame_equal(float_frame, appended) + assert appended is not float_frame + + def test_append_overlap_raises(self, float_frame): + msg = "Indexes have overlapping values" + with pytest.raises(ValueError, match=msg): + float_frame.append(float_frame, verify_integrity=True) + + def test_append_new_columns(self): + # see gh-6129: new columns + df = DataFrame({"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}}) + row = Series([5, 6, 7], index=["a", "b", "c"], name="z") + expected = DataFrame( + { + "a": {"x": 1, "y": 2, "z": 5}, + "b": {"x": 3, "y": 4, "z": 6}, + "c": {"z": 7}, + } + ) + result = df.append(row) + tm.assert_frame_equal(result, expected) + + def test_append_length0_frame(self, sort): + df = DataFrame(columns=["A", "B", "C"]) + df3 = DataFrame(index=[0, 1], columns=["A", "B"]) + df5 = df.append(df3, sort=sort) + + expected = DataFrame(index=[0, 1], columns=["A", "B", "C"]) + tm.assert_frame_equal(df5, expected) + + def test_append_records(self): + arr1 = np.zeros((2,), dtype=("i4,f4,a10")) + arr1[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")] + + arr2 = np.zeros((3,), dtype=("i4,f4,a10")) + arr2[:] = [(3, 4.0, "foo"), (5, 6.0, "bar"), (7.0, 8.0, "baz")] + + df1 = DataFrame(arr1) + df2 = DataFrame(arr2) + + result = df1.append(df2, ignore_index=True) + expected = DataFrame(np.concatenate((arr1, arr2))) + tm.assert_frame_equal(result, expected) + + # rewrite sort fixture, since we also want to test default of None + def test_append_sorts(self, sort): + df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) + df2 = pd.DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3]) + + with tm.assert_produces_warning(None): + result = df1.append(df2, sort=sort) + + # for None / True + expected = pd.DataFrame( + {"b": [1, 2, None, None], "a": [1, 2, 1, 2], "c": [None, None, 3, 4]}, + columns=["a", "b", "c"], + ) + if sort is False: + expected = expected[["b", "a", "c"]] + tm.assert_frame_equal(result, expected) + + def test_append_different_columns(self, sort): + df = DataFrame( + { + "bools": np.random.randn(10) > 0, + "ints": np.random.randint(0, 10, 10), + "floats": np.random.randn(10), + "strings": ["foo", "bar"] * 5, + } + ) + + a = df[:5].loc[:, ["bools", "ints", "floats"]] + b = df[5:].loc[:, ["strings", "ints", "floats"]] + + appended = a.append(b, sort=sort) + assert isna(appended["strings"][0:4]).all() + assert isna(appended["bools"][5:]).all() + + def test_append_many(self, sort, float_frame): + chunks = [ + float_frame[:5], + float_frame[5:10], + float_frame[10:15], + float_frame[15:], + ] + + result = chunks[0].append(chunks[1:]) + tm.assert_frame_equal(result, float_frame) + + chunks[-1] = chunks[-1].copy() + chunks[-1]["foo"] = "bar" + result = chunks[0].append(chunks[1:], sort=sort) + tm.assert_frame_equal(result.loc[:, float_frame.columns], float_frame) + assert (result["foo"][15:] == "bar").all() + assert result["foo"][:15].isna().all() + + def test_append_preserve_index_name(self): + # #980 + df1 = DataFrame(columns=["A", "B", "C"]) + df1 = df1.set_index(["A"]) + df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"]) + df2 = df2.set_index(["A"]) + + result = df1.append(df2) + assert result.index.name == "A" + + indexes_can_append = [ + pd.RangeIndex(3), + pd.Index([4, 5, 6]), + pd.Index([4.5, 5.5, 6.5]), + pd.Index(list("abc")), + pd.CategoricalIndex("A B C".split()), + pd.CategoricalIndex("D E F".split(), ordered=True), + pd.IntervalIndex.from_breaks([7, 8, 9, 10]), + pd.DatetimeIndex( + [ + dt.datetime(2013, 1, 3, 0, 0), + dt.datetime(2013, 1, 3, 6, 10), + dt.datetime(2013, 1, 3, 7, 12), + ] + ), + ] + + indexes_cannot_append_with_other = [ + pd.MultiIndex.from_arrays(["A B C".split(), "D E F".split()]) + ] + + all_indexes = indexes_can_append + indexes_cannot_append_with_other + + @pytest.mark.parametrize("index", all_indexes, ids=lambda x: type(x).__name__) + def test_append_same_columns_type(self, index): + # GH18359 + + # df wider than ser + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index) + ser_index = index[:2] + ser = pd.Series([7, 8], index=ser_index, name=2) + result = df.append(ser) + expected = pd.DataFrame( + [[1.0, 2.0, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index + ) + tm.assert_frame_equal(result, expected) + + # ser wider than df + ser_index = index + index = index[:2] + df = pd.DataFrame([[1, 2], [4, 5]], columns=index) + ser = pd.Series([7, 8, 9], index=ser_index, name=2) + result = df.append(ser) + expected = pd.DataFrame( + [[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]], + index=[0, 1, 2], + columns=ser_index, + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "df_columns, series_index", + combinations(indexes_can_append, r=2), + ids=lambda x: type(x).__name__, + ) + def test_append_different_columns_types(self, df_columns, series_index): + # GH18359 + # See also test 'test_append_different_columns_types_raises' below + # for errors raised when appending + + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns) + ser = pd.Series([7, 8, 9], index=series_index, name=2) + + result = df.append(ser) + idx_diff = ser.index.difference(df_columns) + combined_columns = Index(df_columns.tolist()).append(idx_diff) + expected = pd.DataFrame( + [ + [1.0, 2.0, 3.0, np.nan, np.nan, np.nan], + [4, 5, 6, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, 7, 8, 9], + ], + index=[0, 1, 2], + columns=combined_columns, + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "index_can_append", indexes_can_append, ids=lambda x: type(x).__name__ + ) + @pytest.mark.parametrize( + "index_cannot_append_with_other", + indexes_cannot_append_with_other, + ids=lambda x: type(x).__name__, + ) + def test_append_different_columns_types_raises( + self, index_can_append, index_cannot_append_with_other + ): + # GH18359 + # Dataframe.append will raise if MultiIndex appends + # or is appended to a different index type + # + # See also test 'test_append_different_columns_types' above for + # appending without raising. + + df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append) + ser = pd.Series([7, 8, 9], index=index_cannot_append_with_other, name=2) + msg = ( + r"Expected tuple, got (int|long|float|str|" + r"pandas._libs.interval.Interval)|" + r"object of type '(int|float|Timestamp|" + r"pandas._libs.interval.Interval)' has no len\(\)|" + ) + with pytest.raises(TypeError, match=msg): + df.append(ser) + + df = pd.DataFrame( + [[1, 2, 3], [4, 5, 6]], columns=index_cannot_append_with_other + ) + ser = pd.Series([7, 8, 9], index=index_can_append, name=2) + + with pytest.raises(TypeError, match=msg): + df.append(ser) + + def test_append_dtype_coerce(self, sort): + + # GH 4993 + # appending with datetime will incorrectly convert datetime64 + + df1 = DataFrame( + index=[1, 2], + data=[dt.datetime(2013, 1, 1, 0, 0), dt.datetime(2013, 1, 2, 0, 0)], + columns=["start_time"], + ) + df2 = DataFrame( + index=[4, 5], + data=[ + [dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10)], + [dt.datetime(2013, 1, 4, 0, 0), dt.datetime(2013, 1, 4, 7, 10)], + ], + columns=["start_time", "end_time"], + ) + + expected = concat( + [ + Series( + [ + pd.NaT, + pd.NaT, + dt.datetime(2013, 1, 3, 6, 10), + dt.datetime(2013, 1, 4, 7, 10), + ], + name="end_time", + ), + Series( + [ + dt.datetime(2013, 1, 1, 0, 0), + dt.datetime(2013, 1, 2, 0, 0), + dt.datetime(2013, 1, 3, 0, 0), + dt.datetime(2013, 1, 4, 0, 0), + ], + name="start_time", + ), + ], + axis=1, + sort=sort, + ) + result = df1.append(df2, ignore_index=True, sort=sort) + if sort: + expected = expected[["end_time", "start_time"]] + else: + expected = expected[["start_time", "end_time"]] + + tm.assert_frame_equal(result, expected) + + def test_append_missing_column_proper_upcast(self, sort): + df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")}) + df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)}) + + appended = df1.append(df2, ignore_index=True, sort=sort) + assert appended["A"].dtype == "f8" + assert appended["B"].dtype == "O" + + def test_append_empty_frame_to_series_with_dateutil_tz(self): + # GH 23682 + date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc()) + s = Series({"date": date, "a": 1.0, "b": 2.0}) + df = DataFrame(columns=["c", "d"]) + result = df.append(s, ignore_index=True) + # n.b. it's not clear to me that expected is correct here. + # It's possible that the `date` column should have + # datetime64[ns, tz] dtype for both result and expected. + # that would be more consistent with new columns having + # their own dtype (float for a and b, datetime64ns, tz for date). + expected = DataFrame( + [[np.nan, np.nan, 1.0, 2.0, date]], + columns=["c", "d", "a", "b", "date"], + dtype=object, + ) + # These columns get cast to object after append + expected["a"] = expected["a"].astype(float) + expected["b"] = expected["b"].astype(float) + tm.assert_frame_equal(result, expected) + + +class TestConcatenate: + def test_concat_copy(self): + df = DataFrame(np.random.randn(4, 3)) + df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1)) + df3 = DataFrame({5: "foo"}, index=range(4)) + + # These are actual copies. + result = concat([df, df2, df3], axis=1, copy=True) + + for b in result._data.blocks: + assert b.values.base is None + + # These are the same. + result = concat([df, df2, df3], axis=1, copy=False) + + for b in result._data.blocks: + if b.is_float: + assert b.values.base is df._data.blocks[0].values.base + elif b.is_integer: + assert b.values.base is df2._data.blocks[0].values.base + elif b.is_object: + assert b.values.base is not None + + # Float block was consolidated. + df4 = DataFrame(np.random.randn(4, 1)) + result = concat([df, df2, df3, df4], axis=1, copy=False) + for b in result._data.blocks: + if b.is_float: + assert b.values.base is None + elif b.is_integer: + assert b.values.base is df2._data.blocks[0].values.base + elif b.is_object: + assert b.values.base is not None + + def test_concat_with_group_keys(self): + df = DataFrame(np.random.randn(4, 3)) + df2 = DataFrame(np.random.randn(4, 4)) + + # axis=0 + df = DataFrame(np.random.randn(3, 4)) + df2 = DataFrame(np.random.randn(4, 4)) + + result = concat([df, df2], keys=[0, 1]) + exp_index = MultiIndex.from_arrays( + [[0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 0, 1, 2, 3]] + ) + expected = DataFrame(np.r_[df.values, df2.values], index=exp_index) + tm.assert_frame_equal(result, expected) + + result = concat([df, df], keys=[0, 1]) + exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) + expected = DataFrame(np.r_[df.values, df.values], index=exp_index2) + tm.assert_frame_equal(result, expected) + + # axis=1 + df = DataFrame(np.random.randn(4, 3)) + df2 = DataFrame(np.random.randn(4, 4)) + + result = concat([df, df2], keys=[0, 1], axis=1) + expected = DataFrame(np.c_[df.values, df2.values], columns=exp_index) + tm.assert_frame_equal(result, expected) + + result = concat([df, df], keys=[0, 1], axis=1) + expected = DataFrame(np.c_[df.values, df.values], columns=exp_index2) + tm.assert_frame_equal(result, expected) + + def test_concat_keys_specific_levels(self): + df = DataFrame(np.random.randn(10, 4)) + pieces = [df.iloc[:, [0, 1]], df.iloc[:, [2]], df.iloc[:, [3]]] + level = ["three", "two", "one", "zero"] + result = concat( + pieces, + axis=1, + keys=["one", "two", "three"], + levels=[level], + names=["group_key"], + ) + + tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key")) + tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3])) + + assert result.columns.names == ["group_key", None] + + def test_concat_dataframe_keys_bug(self, sort): + t1 = DataFrame( + {"value": Series([1, 2, 3], index=Index(["a", "b", "c"], name="id"))} + ) + t2 = DataFrame({"value": Series([7, 8], index=Index(["a", "b"], name="id"))}) + + # it works + result = concat([t1, t2], axis=1, keys=["t1", "t2"], sort=sort) + assert list(result.columns) == [("t1", "value"), ("t2", "value")] + + def test_concat_series_partial_columns_names(self): + # GH10698 + foo = Series([1, 2], name="foo") + bar = Series([1, 2]) + baz = Series([4, 5]) + + result = concat([foo, bar, baz], axis=1) + expected = DataFrame( + {"foo": [1, 2], 0: [1, 2], 1: [4, 5]}, columns=["foo", 0, 1] + ) + tm.assert_frame_equal(result, expected) + + result = concat([foo, bar, baz], axis=1, keys=["red", "blue", "yellow"]) + expected = DataFrame( + {"red": [1, 2], "blue": [1, 2], "yellow": [4, 5]}, + columns=["red", "blue", "yellow"], + ) + tm.assert_frame_equal(result, expected) + + result = concat([foo, bar, baz], axis=1, ignore_index=True) + expected = DataFrame({0: [1, 2], 1: [1, 2], 2: [4, 5]}) + tm.assert_frame_equal(result, expected) + + def test_concat_dict(self): + frames = { + "foo": DataFrame(np.random.randn(4, 3)), + "bar": DataFrame(np.random.randn(4, 3)), + "baz": DataFrame(np.random.randn(4, 3)), + "qux": DataFrame(np.random.randn(4, 3)), + } + + sorted_keys = list(frames.keys()) + + result = concat(frames) + expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys) + tm.assert_frame_equal(result, expected) + + result = concat(frames, axis=1) + expected = concat([frames[k] for k in sorted_keys], keys=sorted_keys, axis=1) + tm.assert_frame_equal(result, expected) + + keys = ["baz", "foo", "bar"] + result = concat(frames, keys=keys) + expected = concat([frames[k] for k in keys], keys=keys) + tm.assert_frame_equal(result, expected) + + def test_concat_ignore_index(self, sort): + frame1 = DataFrame( + {"test1": ["a", "b", "c"], "test2": [1, 2, 3], "test3": [4.5, 3.2, 1.2]} + ) + frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]}) + frame1.index = Index(["x", "y", "z"]) + frame2.index = Index(["x", "y", "q"]) + + v1 = concat([frame1, frame2], axis=1, ignore_index=True, sort=sort) + + nan = np.nan + expected = DataFrame( + [ + [nan, nan, nan, 4.3], + ["a", 1, 4.5, 5.2], + ["b", 2, 3.2, 2.2], + ["c", 3, 1.2, nan], + ], + index=Index(["q", "x", "y", "z"]), + ) + if not sort: + expected = expected.loc[["x", "y", "z", "q"]] + + tm.assert_frame_equal(v1, expected) + + def test_concat_multiindex_with_keys(self): + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + frame = DataFrame( + np.random.randn(10, 3), + index=index, + columns=Index(["A", "B", "C"], name="exp"), + ) + result = concat([frame, frame], keys=[0, 1], names=["iteration"]) + + assert result.index.names == ("iteration",) + index.names + tm.assert_frame_equal(result.loc[0], frame) + tm.assert_frame_equal(result.loc[1], frame) + assert result.index.nlevels == 3 + + def test_concat_multiindex_with_tz(self): + # GH 6606 + df = DataFrame( + { + "dt": [ + datetime(2014, 1, 1), + datetime(2014, 1, 2), + datetime(2014, 1, 3), + ], + "b": ["A", "B", "C"], + "c": [1, 2, 3], + "d": [4, 5, 6], + } + ) + df["dt"] = df["dt"].apply(lambda d: Timestamp(d, tz="US/Pacific")) + df = df.set_index(["dt", "b"]) + + exp_idx1 = DatetimeIndex( + ["2014-01-01", "2014-01-02", "2014-01-03"] * 2, tz="US/Pacific", name="dt" + ) + exp_idx2 = Index(["A", "B", "C"] * 2, name="b") + exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) + expected = DataFrame( + {"c": [1, 2, 3] * 2, "d": [4, 5, 6] * 2}, index=exp_idx, columns=["c", "d"] + ) + + result = concat([df, df]) + tm.assert_frame_equal(result, expected) + + def test_concat_multiindex_with_none_in_index_names(self): + # GH 15787 + index = pd.MultiIndex.from_product([[1], range(5)], names=["level1", None]) + df = pd.DataFrame({"col": range(5)}, index=index, dtype=np.int32) + + result = concat([df, df], keys=[1, 2], names=["level2"]) + index = pd.MultiIndex.from_product( + [[1, 2], [1], range(5)], names=["level2", "level1", None] + ) + expected = pd.DataFrame( + {"col": list(range(5)) * 2}, index=index, dtype=np.int32 + ) + tm.assert_frame_equal(result, expected) + + result = concat([df, df[:2]], keys=[1, 2], names=["level2"]) + level2 = [1] * 5 + [2] * 2 + level1 = [1] * 7 + no_name = list(range(5)) + list(range(2)) + tuples = list(zip(level2, level1, no_name)) + index = pd.MultiIndex.from_tuples(tuples, names=["level2", "level1", None]) + expected = pd.DataFrame({"col": no_name}, index=index, dtype=np.int32) + tm.assert_frame_equal(result, expected) + + def test_concat_keys_and_levels(self): + df = DataFrame(np.random.randn(1, 3)) + df2 = DataFrame(np.random.randn(1, 4)) + + levels = [["foo", "baz"], ["one", "two"]] + names = ["first", "second"] + result = concat( + [df, df2, df, df2], + keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")], + levels=levels, + names=names, + ) + expected = concat([df, df2, df, df2]) + exp_index = MultiIndex( + levels=levels + [[0]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1], [0, 0, 0, 0]], + names=names + [None], + ) + expected.index = exp_index + + tm.assert_frame_equal(result, expected) + + # no names + result = concat( + [df, df2, df, df2], + keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")], + levels=levels, + ) + assert result.index.names == (None,) * 3 + + # no levels + result = concat( + [df, df2, df, df2], + keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")], + names=["first", "second"], + ) + assert result.index.names == ("first", "second", None) + tm.assert_index_equal( + result.index.levels[0], Index(["baz", "foo"], name="first") + ) + + def test_concat_keys_levels_no_overlap(self): + # GH #1406 + df = DataFrame(np.random.randn(1, 3), index=["a"]) + df2 = DataFrame(np.random.randn(1, 4), index=["b"]) + + msg = "Values not found in passed level" + with pytest.raises(ValueError, match=msg): + concat([df, df], keys=["one", "two"], levels=[["foo", "bar", "baz"]]) + + msg = "Key one not in level" + with pytest.raises(ValueError, match=msg): + concat([df, df2], keys=["one", "two"], levels=[["foo", "bar", "baz"]]) + + def test_concat_rename_index(self): + a = DataFrame( + np.random.rand(3, 3), + columns=list("ABC"), + index=Index(list("abc"), name="index_a"), + ) + b = DataFrame( + np.random.rand(3, 3), + columns=list("ABC"), + index=Index(list("abc"), name="index_b"), + ) + + result = concat([a, b], keys=["key0", "key1"], names=["lvl0", "lvl1"]) + + exp = concat([a, b], keys=["key0", "key1"], names=["lvl0"]) + names = list(exp.index.names) + names[1] = "lvl1" + exp.index.set_names(names, inplace=True) + + tm.assert_frame_equal(result, exp) + assert result.index.names == exp.index.names + + def test_crossed_dtypes_weird_corner(self): + columns = ["A", "B", "C", "D"] + df1 = DataFrame( + { + "A": np.array([1, 2, 3, 4], dtype="f8"), + "B": np.array([1, 2, 3, 4], dtype="i8"), + "C": np.array([1, 2, 3, 4], dtype="f8"), + "D": np.array([1, 2, 3, 4], dtype="i8"), + }, + columns=columns, + ) + + df2 = DataFrame( + { + "A": np.array([1, 2, 3, 4], dtype="i8"), + "B": np.array([1, 2, 3, 4], dtype="f8"), + "C": np.array([1, 2, 3, 4], dtype="i8"), + "D": np.array([1, 2, 3, 4], dtype="f8"), + }, + columns=columns, + ) + + appended = df1.append(df2, ignore_index=True) + expected = DataFrame( + np.concatenate([df1.values, df2.values], axis=0), columns=columns + ) + tm.assert_frame_equal(appended, expected) + + df = DataFrame(np.random.randn(1, 3), index=["a"]) + df2 = DataFrame(np.random.randn(1, 4), index=["b"]) + result = concat([df, df2], keys=["one", "two"], names=["first", "second"]) + assert result.index.names == ("first", "second") + + def test_dups_index(self): + # GH 4771 + + # single dtypes + df = DataFrame( + np.random.randint(0, 10, size=40).reshape(10, 4), + columns=["A", "A", "C", "C"], + ) + + result = concat([df, df], axis=1) + tm.assert_frame_equal(result.iloc[:, :4], df) + tm.assert_frame_equal(result.iloc[:, 4:], df) + + result = concat([df, df], axis=0) + tm.assert_frame_equal(result.iloc[:10], df) + tm.assert_frame_equal(result.iloc[10:], df) + + # multi dtypes + df = concat( + [ + DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]), + DataFrame( + np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] + ), + ], + axis=1, + ) + + result = concat([df, df], axis=1) + tm.assert_frame_equal(result.iloc[:, :6], df) + tm.assert_frame_equal(result.iloc[:, 6:], df) + + result = concat([df, df], axis=0) + tm.assert_frame_equal(result.iloc[:10], df) + tm.assert_frame_equal(result.iloc[10:], df) + + # append + result = df.iloc[0:8, :].append(df.iloc[8:]) + tm.assert_frame_equal(result, df) + + result = df.iloc[0:8, :].append(df.iloc[8:9]).append(df.iloc[9:10]) + tm.assert_frame_equal(result, df) + + expected = concat([df, df], axis=0) + result = df.append(df) + tm.assert_frame_equal(result, expected) + + def test_with_mixed_tuples(self, sort): + # 10697 + # columns have mixed tuples, so handle properly + df1 = DataFrame({"A": "foo", ("B", 1): "bar"}, index=range(2)) + df2 = DataFrame({"B": "foo", ("B", 1): "bar"}, index=range(2)) + + # it works + concat([df1, df2], sort=sort) + + def test_handle_empty_objects(self, sort): + df = DataFrame(np.random.randn(10, 4), columns=list("abcd")) + + baz = df[:5].copy() + baz["foo"] = "bar" + empty = df[5:5] + + frames = [baz, empty, empty, df[5:]] + concatted = concat(frames, axis=0, sort=sort) + + expected = df.reindex(columns=["a", "b", "c", "d", "foo"]) + expected["foo"] = expected["foo"].astype("O") + expected.loc[0:4, "foo"] = "bar" + + tm.assert_frame_equal(concatted, expected) + + # empty as first element with time series + # GH3259 + df = DataFrame( + dict(A=range(10000)), index=date_range("20130101", periods=10000, freq="s") + ) + empty = DataFrame() + result = concat([df, empty], axis=1) + tm.assert_frame_equal(result, df) + result = concat([empty, df], axis=1) + tm.assert_frame_equal(result, df) + + result = concat([df, empty]) + tm.assert_frame_equal(result, df) + result = concat([empty, df]) + tm.assert_frame_equal(result, df) + + def test_concat_mixed_objs(self): + + # concat mixed series/frames + # G2385 + + # axis 1 + index = date_range("01-Jan-2013", periods=10, freq="H") + arr = np.arange(10, dtype="int64") + s1 = Series(arr, index=index) + s2 = Series(arr, index=index) + df = DataFrame(arr.reshape(-1, 1), index=index) + + expected = DataFrame( + np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 0] + ) + result = concat([df, df], axis=1) + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 1] + ) + result = concat([s1, s2], axis=1) + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2] + ) + result = concat([s1, s2, s1], axis=1) + tm.assert_frame_equal(result, expected) + + expected = DataFrame( + np.repeat(arr, 5).reshape(-1, 5), index=index, columns=[0, 0, 1, 2, 3] + ) + result = concat([s1, df, s2, s2, s1], axis=1) + tm.assert_frame_equal(result, expected) + + # with names + s1.name = "foo" + expected = DataFrame( + np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, 0] + ) + result = concat([s1, df, s2], axis=1) + tm.assert_frame_equal(result, expected) + + s2.name = "bar" + expected = DataFrame( + np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, "bar"] + ) + result = concat([s1, df, s2], axis=1) + tm.assert_frame_equal(result, expected) + + # ignore index + expected = DataFrame( + np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2] + ) + result = concat([s1, df, s2], axis=1, ignore_index=True) + tm.assert_frame_equal(result, expected) + + # axis 0 + expected = DataFrame( + np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0] + ) + result = concat([s1, df, s2]) + tm.assert_frame_equal(result, expected) + + expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0]) + result = concat([s1, df, s2], ignore_index=True) + tm.assert_frame_equal(result, expected) + + def test_empty_dtype_coerce(self): + + # xref to #12411 + # xref to #12045 + # xref to #11594 + # see below + + # 10571 + df1 = DataFrame(data=[[1, None], [2, None]], columns=["a", "b"]) + df2 = DataFrame(data=[[3, None], [4, None]], columns=["a", "b"]) + result = concat([df1, df2]) + expected = df1.dtypes + tm.assert_series_equal(result.dtypes, expected) + + def test_dtype_coerceion(self): + + # 12411 + df = DataFrame({"date": [pd.Timestamp("20130101").tz_localize("UTC"), pd.NaT]}) + + result = concat([df.iloc[[0]], df.iloc[[1]]]) + tm.assert_series_equal(result.dtypes, df.dtypes) + + # 12045 + import datetime + + df = DataFrame( + {"date": [datetime.datetime(2012, 1, 1), datetime.datetime(1012, 1, 2)]} + ) + result = concat([df.iloc[[0]], df.iloc[[1]]]) + tm.assert_series_equal(result.dtypes, df.dtypes) + + # 11594 + df = DataFrame({"text": ["some words"] + [None] * 9}) + result = concat([df.iloc[[0]], df.iloc[[1]]]) + tm.assert_series_equal(result.dtypes, df.dtypes) + + def test_concat_series(self): + + ts = tm.makeTimeSeries() + ts.name = "foo" + + pieces = [ts[:5], ts[5:15], ts[15:]] + + result = concat(pieces) + tm.assert_series_equal(result, ts) + assert result.name == ts.name + + result = concat(pieces, keys=[0, 1, 2]) + expected = ts.copy() + + ts.index = DatetimeIndex(np.array(ts.index.values, dtype="M8[ns]")) + + exp_codes = [np.repeat([0, 1, 2], [len(x) for x in pieces]), np.arange(len(ts))] + exp_index = MultiIndex(levels=[[0, 1, 2], ts.index], codes=exp_codes) + expected.index = exp_index + tm.assert_series_equal(result, expected) + + def test_concat_series_axis1(self, sort=sort): + ts = tm.makeTimeSeries() + + pieces = [ts[:-2], ts[2:], ts[2:-2]] + + result = concat(pieces, axis=1) + expected = DataFrame(pieces).T + tm.assert_frame_equal(result, expected) + + result = concat(pieces, keys=["A", "B", "C"], axis=1) + expected = DataFrame(pieces, index=["A", "B", "C"]).T + tm.assert_frame_equal(result, expected) + + # preserve series names, #2489 + s = Series(randn(5), name="A") + s2 = Series(randn(5), name="B") + + result = concat([s, s2], axis=1) + expected = DataFrame({"A": s, "B": s2}) + tm.assert_frame_equal(result, expected) + + s2.name = None + result = concat([s, s2], axis=1) + tm.assert_index_equal(result.columns, Index(["A", 0], dtype="object")) + + # must reindex, #2603 + s = Series(randn(3), index=["c", "a", "b"], name="A") + s2 = Series(randn(4), index=["d", "a", "b", "c"], name="B") + result = concat([s, s2], axis=1, sort=sort) + expected = DataFrame({"A": s, "B": s2}) + tm.assert_frame_equal(result, expected) + + def test_concat_series_axis1_names_applied(self): + # ensure names argument is not ignored on axis=1, #23490 + s = Series([1, 2, 3]) + s2 = Series([4, 5, 6]) + result = concat([s, s2], axis=1, keys=["a", "b"], names=["A"]) + expected = DataFrame( + [[1, 4], [2, 5], [3, 6]], columns=pd.Index(["a", "b"], name="A") + ) + tm.assert_frame_equal(result, expected) + + result = concat([s, s2], axis=1, keys=[("a", 1), ("b", 2)], names=["A", "B"]) + expected = DataFrame( + [[1, 4], [2, 5], [3, 6]], + columns=MultiIndex.from_tuples([("a", 1), ("b", 2)], names=["A", "B"]), + ) + tm.assert_frame_equal(result, expected) + + def test_concat_single_with_key(self): + df = DataFrame(np.random.randn(10, 4)) + + result = concat([df], keys=["foo"]) + expected = concat([df, df], keys=["foo", "bar"]) + tm.assert_frame_equal(result, expected[:10]) + + def test_concat_exclude_none(self): + df = DataFrame(np.random.randn(10, 4)) + + pieces = [df[:5], None, None, df[5:]] + result = concat(pieces) + tm.assert_frame_equal(result, df) + with pytest.raises(ValueError, match="All objects passed were None"): + concat([None, None]) + + def test_concat_datetime64_block(self): + from pandas.core.indexes.datetimes import date_range + + rng = date_range("1/1/2000", periods=10) + + df = DataFrame({"time": rng}) + + result = concat([df, df]) + assert (result.iloc[:10]["time"] == rng).all() + assert (result.iloc[10:]["time"] == rng).all() + + def test_concat_timedelta64_block(self): + from pandas import to_timedelta + + rng = to_timedelta(np.arange(10), unit="s") + + df = DataFrame({"time": rng}) + + result = concat([df, df]) + assert (result.iloc[:10]["time"] == rng).all() + assert (result.iloc[10:]["time"] == rng).all() + + def test_concat_keys_with_none(self): + # #1649 + df0 = DataFrame([[10, 20, 30], [10, 20, 30], [10, 20, 30]]) + + result = concat(dict(a=None, b=df0, c=df0[:2], d=df0[:1], e=df0)) + expected = concat(dict(b=df0, c=df0[:2], d=df0[:1], e=df0)) + tm.assert_frame_equal(result, expected) + + result = concat( + [None, df0, df0[:2], df0[:1], df0], keys=["a", "b", "c", "d", "e"] + ) + expected = concat([df0, df0[:2], df0[:1], df0], keys=["b", "c", "d", "e"]) + tm.assert_frame_equal(result, expected) + + def test_concat_bug_1719(self): + ts1 = tm.makeTimeSeries() + ts2 = tm.makeTimeSeries()[::2] + + # to join with union + # these two are of different length! + left = concat([ts1, ts2], join="outer", axis=1) + right = concat([ts2, ts1], join="outer", axis=1) + + assert len(left) == len(right) + + def test_concat_bug_2972(self): + ts0 = Series(np.zeros(5)) + ts1 = Series(np.ones(5)) + ts0.name = ts1.name = "same name" + result = concat([ts0, ts1], axis=1) + + expected = DataFrame({0: ts0, 1: ts1}) + expected.columns = ["same name", "same name"] + tm.assert_frame_equal(result, expected) + + def test_concat_bug_3602(self): + + # GH 3602, duplicate columns + df1 = DataFrame( + { + "firmNo": [0, 0, 0, 0], + "prc": [6, 6, 6, 6], + "stringvar": ["rrr", "rrr", "rrr", "rrr"], + } + ) + df2 = DataFrame( + {"C": [9, 10, 11, 12], "misc": [1, 2, 3, 4], "prc": [6, 6, 6, 6]} + ) + expected = DataFrame( + [ + [0, 6, "rrr", 9, 1, 6], + [0, 6, "rrr", 10, 2, 6], + [0, 6, "rrr", 11, 3, 6], + [0, 6, "rrr", 12, 4, 6], + ] + ) + expected.columns = ["firmNo", "prc", "stringvar", "C", "misc", "prc"] + + result = concat([df1, df2], axis=1) + tm.assert_frame_equal(result, expected) + + def test_concat_inner_join_empty(self): + # GH 15328 + df_empty = pd.DataFrame() + df_a = pd.DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") + df_expected = pd.DataFrame({"a": []}, index=[], dtype="int64") + + for how, expected in [("inner", df_expected), ("outer", df_a)]: + result = pd.concat([df_a, df_empty], axis=1, join=how) + tm.assert_frame_equal(result, expected) + + def test_concat_series_axis1_same_names_ignore_index(self): + dates = date_range("01-Jan-2013", "01-Jan-2014", freq="MS")[0:-1] + s1 = Series(randn(len(dates)), index=dates, name="value") + s2 = Series(randn(len(dates)), index=dates, name="value") + + result = concat([s1, s2], axis=1, ignore_index=True) + expected = Index([0, 1]) + + tm.assert_index_equal(result.columns, expected) + + def test_concat_iterables(self): + # GH8645 check concat works with tuples, list, generators, and weird + # stuff like deque and custom iterables + df1 = DataFrame([1, 2, 3]) + df2 = DataFrame([4, 5, 6]) + expected = DataFrame([1, 2, 3, 4, 5, 6]) + tm.assert_frame_equal(concat((df1, df2), ignore_index=True), expected) + tm.assert_frame_equal(concat([df1, df2], ignore_index=True), expected) + tm.assert_frame_equal( + concat((df for df in (df1, df2)), ignore_index=True), expected + ) + tm.assert_frame_equal(concat(deque((df1, df2)), ignore_index=True), expected) + + class CustomIterator1: + def __len__(self) -> int: + return 2 + + def __getitem__(self, index): + try: + return {0: df1, 1: df2}[index] + except KeyError: + raise IndexError + + tm.assert_frame_equal(pd.concat(CustomIterator1(), ignore_index=True), expected) + + class CustomIterator2(abc.Iterable): + def __iter__(self): + yield df1 + yield df2 + + tm.assert_frame_equal(pd.concat(CustomIterator2(), ignore_index=True), expected) + + def test_concat_invalid(self): + + # trying to concat a ndframe with a non-ndframe + df1 = tm.makeCustomDataframe(10, 2) + msg = ( + "cannot concatenate object of type '{}';" + " only Series and DataFrame objs are valid" + ) + for obj in [1, dict(), [1, 2], (1, 2)]: + with pytest.raises(TypeError, match=msg.format(type(obj))): + concat([df1, obj]) + + def test_concat_invalid_first_argument(self): + df1 = tm.makeCustomDataframe(10, 2) + df2 = tm.makeCustomDataframe(10, 2) + msg = ( + "first argument must be an iterable of pandas " + 'objects, you passed an object of type "DataFrame"' + ) + with pytest.raises(TypeError, match=msg): + concat(df1, df2) + + # generator ok though + concat(DataFrame(np.random.rand(5, 5)) for _ in range(3)) + + # text reader ok + # GH6583 + data = """index,A,B,C,D +foo,2,3,4,5 +bar,7,8,9,10 +baz,12,13,14,15 +qux,12,13,14,15 +foo2,12,13,14,15 +bar2,12,13,14,15 +""" + + reader = read_csv(StringIO(data), chunksize=1) + result = concat(reader, ignore_index=True) + expected = read_csv(StringIO(data)) + tm.assert_frame_equal(result, expected) + + def test_concat_NaT_series(self): + # GH 11693 + # test for merging NaT series with datetime series. + x = Series( + date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="US/Eastern") + ) + y = Series(pd.NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]") + expected = Series([x[0], x[1], pd.NaT, pd.NaT]) + + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # all NaT with tz + expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns, US/Eastern]") + result = pd.concat([y, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # without tz + x = pd.Series(pd.date_range("20151124 08:00", "20151124 09:00", freq="1h")) + y = pd.Series(pd.date_range("20151124 10:00", "20151124 11:00", freq="1h")) + y[:] = pd.NaT + expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT]) + result = pd.concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # all NaT without tz + x[:] = pd.NaT + expected = pd.Series(pd.NaT, index=range(4), dtype="datetime64[ns]") + result = pd.concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + def test_concat_tz_frame(self): + df2 = DataFrame( + dict( + A=pd.Timestamp("20130102", tz="US/Eastern"), + B=pd.Timestamp("20130603", tz="CET"), + ), + index=range(5), + ) + + # concat + df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) + tm.assert_frame_equal(df2, df3) + + def test_concat_tz_series(self): + # gh-11755: tz and no tz + x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC")) + y = Series(date_range("2012-01-01", "2012-01-02")) + expected = Series([x[0], x[1], y[0], y[1]], dtype="object") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # gh-11887: concat tz and object + x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC")) + y = Series(["a", "b"]) + expected = Series([x[0], x[1], y[0], y[1]], dtype="object") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # see gh-12217 and gh-12306 + # Concatenating two UTC times + first = pd.DataFrame([[datetime(2016, 1, 1)]]) + first[0] = first[0].dt.tz_localize("UTC") + + second = pd.DataFrame([[datetime(2016, 1, 2)]]) + second[0] = second[0].dt.tz_localize("UTC") + + result = pd.concat([first, second]) + assert result[0].dtype == "datetime64[ns, UTC]" + + # Concatenating two London times + first = pd.DataFrame([[datetime(2016, 1, 1)]]) + first[0] = first[0].dt.tz_localize("Europe/London") + + second = pd.DataFrame([[datetime(2016, 1, 2)]]) + second[0] = second[0].dt.tz_localize("Europe/London") + + result = pd.concat([first, second]) + assert result[0].dtype == "datetime64[ns, Europe/London]" + + # Concatenating 2+1 London times + first = pd.DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]]) + first[0] = first[0].dt.tz_localize("Europe/London") + + second = pd.DataFrame([[datetime(2016, 1, 3)]]) + second[0] = second[0].dt.tz_localize("Europe/London") + + result = pd.concat([first, second]) + assert result[0].dtype == "datetime64[ns, Europe/London]" + + # Concat'ing 1+2 London times + first = pd.DataFrame([[datetime(2016, 1, 1)]]) + first[0] = first[0].dt.tz_localize("Europe/London") + + second = pd.DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]]) + second[0] = second[0].dt.tz_localize("Europe/London") + + result = pd.concat([first, second]) + assert result[0].dtype == "datetime64[ns, Europe/London]" + + def test_concat_tz_series_with_datetimelike(self): + # see gh-12620: tz and timedelta + x = [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-02-01", tz="US/Eastern"), + ] + y = [pd.Timedelta("1 day"), pd.Timedelta("2 day")] + result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) + tm.assert_series_equal(result, pd.Series(x + y, dtype="object")) + + # tz and period + y = [pd.Period("2011-03", freq="M"), pd.Period("2011-04", freq="M")] + result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) + tm.assert_series_equal(result, pd.Series(x + y, dtype="object")) + + def test_concat_tz_series_tzlocal(self): + # see gh-13583 + x = [ + pd.Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()), + pd.Timestamp("2011-02-01", tz=dateutil.tz.tzlocal()), + ] + y = [ + pd.Timestamp("2012-01-01", tz=dateutil.tz.tzlocal()), + pd.Timestamp("2012-02-01", tz=dateutil.tz.tzlocal()), + ] + + result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) + tm.assert_series_equal(result, pd.Series(x + y)) + assert result.dtype == "datetime64[ns, tzlocal()]" + + @pytest.mark.parametrize("tz1", [None, "UTC"]) + @pytest.mark.parametrize("tz2", [None, "UTC"]) + @pytest.mark.parametrize("s", [pd.NaT, pd.Timestamp("20150101")]) + def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s): + # GH 12396 + + # tz-naive + first = pd.DataFrame([[pd.NaT], [pd.NaT]]).apply( + lambda x: x.dt.tz_localize(tz1) + ) + second = pd.DataFrame([s]).apply(lambda x: x.dt.tz_localize(tz2)) + + result = pd.concat([first, second], axis=0) + expected = pd.DataFrame(pd.Series([pd.NaT, pd.NaT, s], index=[0, 1, 0])) + expected = expected.apply(lambda x: x.dt.tz_localize(tz2)) + if tz1 != tz2: + expected = expected.astype(object) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz1", [None, "UTC"]) + @pytest.mark.parametrize("tz2", [None, "UTC"]) + def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2): + # GH 12396 + + first = pd.DataFrame(pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)) + second = pd.DataFrame(pd.Series([pd.NaT]).dt.tz_localize(tz2), columns=[1]) + expected = pd.DataFrame( + { + 0: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1), + 1: pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2), + } + ) + result = pd.concat([first, second], axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz1", [None, "UTC"]) + @pytest.mark.parametrize("tz2", [None, "UTC"]) + def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): + # GH 12396 + + # tz-naive + first = pd.Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1) + second = pd.DataFrame( + [ + [pd.Timestamp("2015/01/01", tz=tz2)], + [pd.Timestamp("2016/01/01", tz=tz2)], + ], + index=[2, 3], + ) + + expected = pd.DataFrame( + [ + pd.NaT, + pd.NaT, + pd.Timestamp("2015/01/01", tz=tz2), + pd.Timestamp("2016/01/01", tz=tz2), + ] + ) + if tz1 != tz2: + expected = expected.astype(object) + + result = pd.concat([first, second]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_concat_NaT_dataframes(self, tz): + # GH 12396 + + first = pd.DataFrame([[pd.NaT], [pd.NaT]]) + first = first.apply(lambda x: x.dt.tz_localize(tz)) + second = pd.DataFrame( + [[pd.Timestamp("2015/01/01", tz=tz)], [pd.Timestamp("2016/01/01", tz=tz)]], + index=[2, 3], + ) + expected = pd.DataFrame( + [ + pd.NaT, + pd.NaT, + pd.Timestamp("2015/01/01", tz=tz), + pd.Timestamp("2016/01/01", tz=tz), + ] + ) + + result = pd.concat([first, second], axis=0) + tm.assert_frame_equal(result, expected) + + def test_concat_period_series(self): + x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) + y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="D")) + expected = Series([x[0], x[1], y[0], y[1]], dtype="Period[D]") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + def test_concat_period_multiple_freq_series(self): + x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) + y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="M")) + expected = Series([x[0], x[1], y[0], y[1]], dtype="object") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + assert result.dtype == "object" + + def test_concat_period_other_series(self): + x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) + y = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="M")) + expected = Series([x[0], x[1], y[0], y[1]], dtype="object") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + assert result.dtype == "object" + + # non-period + x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) + y = Series(pd.DatetimeIndex(["2015-11-01", "2015-12-01"])) + expected = Series([x[0], x[1], y[0], y[1]], dtype="object") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + assert result.dtype == "object" + + x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) + y = Series(["A", "B"]) + expected = Series([x[0], x[1], y[0], y[1]], dtype="object") + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + assert result.dtype == "object" + + def test_concat_empty_series(self): + # GH 11082 + s1 = pd.Series([1, 2, 3], name="x") + s2 = pd.Series(name="y", dtype="float64") + res = pd.concat([s1, s2], axis=1) + exp = pd.DataFrame( + {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]}, + index=pd.Index([0, 1, 2], dtype="O"), + ) + tm.assert_frame_equal(res, exp) + + s1 = pd.Series([1, 2, 3], name="x") + s2 = pd.Series(name="y", dtype="float64") + res = pd.concat([s1, s2], axis=0) + # name will be reset + exp = pd.Series([1, 2, 3]) + tm.assert_series_equal(res, exp) + + # empty Series with no name + s1 = pd.Series([1, 2, 3], name="x") + s2 = pd.Series(name=None, dtype="float64") + res = pd.concat([s1, s2], axis=1) + exp = pd.DataFrame( + {"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]}, + columns=["x", 0], + index=pd.Index([0, 1, 2], dtype="O"), + ) + tm.assert_frame_equal(res, exp) + + @pytest.mark.parametrize("tz", [None, "UTC"]) + @pytest.mark.parametrize("values", [[], [1, 2, 3]]) + def test_concat_empty_series_timelike(self, tz, values): + # GH 18447 + + first = Series([], dtype="M8[ns]").dt.tz_localize(tz) + dtype = None if values else np.float64 + second = Series(values, dtype=dtype) + + expected = DataFrame( + { + 0: pd.Series([pd.NaT] * len(values), dtype="M8[ns]").dt.tz_localize(tz), + 1: values, + } + ) + result = concat([first, second], axis=1) + tm.assert_frame_equal(result, expected) + + def test_default_index(self): + # is_series and ignore_index + s1 = pd.Series([1, 2, 3], name="x") + s2 = pd.Series([4, 5, 6], name="y") + res = pd.concat([s1, s2], axis=1, ignore_index=True) + assert isinstance(res.columns, pd.RangeIndex) + exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]]) + # use check_index_type=True to check the result have + # RangeIndex (default index) + tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) + + # is_series and all inputs have no names + s1 = pd.Series([1, 2, 3]) + s2 = pd.Series([4, 5, 6]) + res = pd.concat([s1, s2], axis=1, ignore_index=False) + assert isinstance(res.columns, pd.RangeIndex) + exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]]) + exp.columns = pd.RangeIndex(2) + tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) + + # is_dataframe and ignore_index + df1 = pd.DataFrame({"A": [1, 2], "B": [5, 6]}) + df2 = pd.DataFrame({"A": [3, 4], "B": [7, 8]}) + + res = pd.concat([df1, df2], axis=0, ignore_index=True) + exp = pd.DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], columns=["A", "B"]) + tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) + + res = pd.concat([df1, df2], axis=1, ignore_index=True) + exp = pd.DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]]) + tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True) + + def test_concat_multiindex_rangeindex(self): + # GH13542 + # when multi-index levels are RangeIndex objects + # there is a bug in concat with objects of len 1 + + df = DataFrame(np.random.randn(9, 2)) + df.index = MultiIndex( + levels=[pd.RangeIndex(3), pd.RangeIndex(3)], + codes=[np.repeat(np.arange(3), 3), np.tile(np.arange(3), 3)], + ) + + res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]]) + exp = df.iloc[[2, 3, 4, 5], :] + tm.assert_frame_equal(res, exp) + + def test_concat_multiindex_dfs_with_deepcopy(self): + # GH 9967 + from copy import deepcopy + + example_multiindex1 = pd.MultiIndex.from_product([["a"], ["b"]]) + example_dataframe1 = pd.DataFrame([0], index=example_multiindex1) + + example_multiindex2 = pd.MultiIndex.from_product([["a"], ["c"]]) + example_dataframe2 = pd.DataFrame([1], index=example_multiindex2) + + example_dict = {"s1": example_dataframe1, "s2": example_dataframe2} + expected_index = pd.MultiIndex( + levels=[["s1", "s2"], ["a"], ["b", "c"]], + codes=[[0, 1], [0, 0], [0, 1]], + names=["testname", None, None], + ) + expected = pd.DataFrame([[0], [1]], index=expected_index) + result_copy = pd.concat(deepcopy(example_dict), names=["testname"]) + tm.assert_frame_equal(result_copy, expected) + result_no_copy = pd.concat(example_dict, names=["testname"]) + tm.assert_frame_equal(result_no_copy, expected) + + def test_categorical_concat_append(self): + cat = Categorical(["a", "b"], categories=["a", "b"]) + vals = [1, 2] + df = DataFrame({"cats": cat, "vals": vals}) + cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"]) + vals2 = [1, 2, 1, 2] + exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1])) + + tm.assert_frame_equal(pd.concat([df, df]), exp) + tm.assert_frame_equal(df.append(df), exp) + + # GH 13524 can concat different categories + cat3 = Categorical(["a", "b"], categories=["a", "b", "c"]) + vals3 = [1, 2] + df_different_categories = DataFrame({"cats": cat3, "vals": vals3}) + + res = pd.concat([df, df_different_categories], ignore_index=True) + exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]}) + tm.assert_frame_equal(res, exp) + + res = df.append(df_different_categories, ignore_index=True) + tm.assert_frame_equal(res, exp) + + def test_categorical_concat_dtypes(self): + + # GH8143 + index = ["cat", "obj", "num"] + cat = Categorical(["a", "b", "c"]) + obj = Series(["a", "b", "c"]) + num = Series([1, 2, 3]) + df = pd.concat([Series(cat), obj, num], axis=1, keys=index) + + result = df.dtypes == "object" + expected = Series([False, True, False], index=index) + tm.assert_series_equal(result, expected) + + result = df.dtypes == "int64" + expected = Series([False, False, True], index=index) + tm.assert_series_equal(result, expected) + + result = df.dtypes == "category" + expected = Series([True, False, False], index=index) + tm.assert_series_equal(result, expected) + + def test_categorical_concat(self, sort): + # See GH 10177 + df1 = DataFrame( + np.arange(18, dtype="int64").reshape(6, 3), columns=["a", "b", "c"] + ) + + df2 = DataFrame(np.arange(14, dtype="int64").reshape(7, 2), columns=["a", "c"]) + + cat_values = ["one", "one", "two", "one", "two", "two", "one"] + df2["h"] = Series(Categorical(cat_values)) + + res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort) + exp = DataFrame( + { + "a": [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12], + "b": [ + 1, + 4, + 7, + 10, + 13, + 16, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + ], + "c": [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13], + "h": [None] * 6 + cat_values, + } + ) + tm.assert_frame_equal(res, exp) + + def test_categorical_concat_gh7864(self): + # GH 7864 + # make sure ordering is preserved + df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list("abbaae")}) + df["grade"] = Categorical(df["raw_grade"]) + df["grade"].cat.set_categories(["e", "a", "b"]) + + df1 = df[0:3] + df2 = df[3:] + + tm.assert_index_equal(df["grade"].cat.categories, df1["grade"].cat.categories) + tm.assert_index_equal(df["grade"].cat.categories, df2["grade"].cat.categories) + + dfx = pd.concat([df1, df2]) + tm.assert_index_equal(df["grade"].cat.categories, dfx["grade"].cat.categories) + + dfa = df1.append(df2) + tm.assert_index_equal(df["grade"].cat.categories, dfa["grade"].cat.categories) + + def test_categorical_concat_preserve(self): + + # GH 8641 series concat not preserving category dtype + # GH 13524 can concat different categories + s = Series(list("abc"), dtype="category") + s2 = Series(list("abd"), dtype="category") + + exp = Series(list("abcabd")) + res = pd.concat([s, s2], ignore_index=True) + tm.assert_series_equal(res, exp) + + exp = Series(list("abcabc"), dtype="category") + res = pd.concat([s, s], ignore_index=True) + tm.assert_series_equal(res, exp) + + exp = Series(list("abcabc"), index=[0, 1, 2, 0, 1, 2], dtype="category") + res = pd.concat([s, s]) + tm.assert_series_equal(res, exp) + + a = Series(np.arange(6, dtype="int64")) + b = Series(list("aabbca")) + + df2 = DataFrame({"A": a, "B": b.astype(CategoricalDtype(list("cab")))}) + res = pd.concat([df2, df2]) + exp = DataFrame( + { + "A": pd.concat([a, a]), + "B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))), + } + ) + tm.assert_frame_equal(res, exp) + + def test_categorical_index_preserver(self): + + a = Series(np.arange(6, dtype="int64")) + b = Series(list("aabbca")) + + df2 = DataFrame( + {"A": a, "B": b.astype(CategoricalDtype(list("cab")))} + ).set_index("B") + result = pd.concat([df2, df2]) + expected = DataFrame( + { + "A": pd.concat([a, a]), + "B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))), + } + ).set_index("B") + tm.assert_frame_equal(result, expected) + + # wrong categories + df3 = DataFrame( + {"A": a, "B": Categorical(b, categories=list("abe"))} + ).set_index("B") + msg = "categories must match existing categories when appending" + with pytest.raises(TypeError, match=msg): + pd.concat([df2, df3]) + + def test_concat_categoricalindex(self): + # GH 16111, categories that aren't lexsorted + categories = [9, 0, 1, 2, 3] + + a = pd.Series(1, index=pd.CategoricalIndex([9, 0], categories=categories)) + b = pd.Series(2, index=pd.CategoricalIndex([0, 1], categories=categories)) + c = pd.Series(3, index=pd.CategoricalIndex([1, 2], categories=categories)) + + result = pd.concat([a, b, c], axis=1) + + exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories) + exp = pd.DataFrame( + { + 0: [1, 1, np.nan, np.nan], + 1: [np.nan, 2, 2, np.nan], + 2: [np.nan, np.nan, 3, 3], + }, + columns=[0, 1, 2], + index=exp_idx, + ) + tm.assert_frame_equal(result, exp) + + def test_concat_order(self): + # GH 17344 + dfs = [pd.DataFrame(index=range(3), columns=["a", 1, None])] + dfs += [ + pd.DataFrame(index=range(3), columns=[None, 1, "a"]) for i in range(100) + ] + + result = pd.concat(dfs, sort=True).columns + expected = dfs[0].columns + tm.assert_index_equal(result, expected) + + def test_concat_datetime_timezone(self): + # GH 18523 + idx1 = pd.date_range("2011-01-01", periods=3, freq="H", tz="Europe/Paris") + idx2 = pd.date_range(start=idx1[0], end=idx1[-1], freq="H") + df1 = pd.DataFrame({"a": [1, 2, 3]}, index=idx1) + df2 = pd.DataFrame({"b": [1, 2, 3]}, index=idx2) + result = pd.concat([df1, df2], axis=1) + + exp_idx = ( + DatetimeIndex( + [ + "2011-01-01 00:00:00+01:00", + "2011-01-01 01:00:00+01:00", + "2011-01-01 02:00:00+01:00", + ], + freq="H", + ) + .tz_convert("UTC") + .tz_convert("Europe/Paris") + ) + + expected = pd.DataFrame( + [[1, 1], [2, 2], [3, 3]], index=exp_idx, columns=["a", "b"] + ) + + tm.assert_frame_equal(result, expected) + + idx3 = pd.date_range("2011-01-01", periods=3, freq="H", tz="Asia/Tokyo") + df3 = pd.DataFrame({"b": [1, 2, 3]}, index=idx3) + result = pd.concat([df1, df3], axis=1) + + exp_idx = DatetimeIndex( + [ + "2010-12-31 15:00:00+00:00", + "2010-12-31 16:00:00+00:00", + "2010-12-31 17:00:00+00:00", + "2010-12-31 23:00:00+00:00", + "2011-01-01 00:00:00+00:00", + "2011-01-01 01:00:00+00:00", + ] + ) + + expected = pd.DataFrame( + [ + [np.nan, 1], + [np.nan, 2], + [np.nan, 3], + [1, np.nan], + [2, np.nan], + [3, np.nan], + ], + index=exp_idx, + columns=["a", "b"], + ) + + tm.assert_frame_equal(result, expected) + + # GH 13783: Concat after resample + result = pd.concat( + [df1.resample("H").mean(), df2.resample("H").mean()], sort=True + ) + expected = pd.DataFrame( + {"a": [1, 2, 3] + [np.nan] * 3, "b": [np.nan] * 3 + [1, 2, 3]}, + index=idx1.append(idx1), + ) + tm.assert_frame_equal(result, expected) + + def test_concat_different_extension_dtypes_upcasts(self): + a = pd.Series(pd.core.arrays.integer_array([1, 2])) + b = pd.Series(to_decimal([1, 2])) + + result = pd.concat([a, b], ignore_index=True) + expected = pd.Series([1, 2, Decimal(1), Decimal(2)], dtype=object) + tm.assert_series_equal(result, expected) + + def test_concat_odered_dict(self): + # GH 21510 + expected = pd.concat( + [pd.Series(range(3)), pd.Series(range(4))], keys=["First", "Another"] + ) + result = pd.concat( + OrderedDict( + [("First", pd.Series(range(3))), ("Another", pd.Series(range(4)))] + ) + ) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("pdt", [pd.Series, pd.DataFrame]) +@pytest.mark.parametrize("dt", np.sctypes["float"]) +def test_concat_no_unnecessary_upcast(dt, pdt): + # GH 13247 + dims = pdt(dtype=object).ndim + + dfs = [ + pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], dtype=dt, ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims)), + ] + x = pd.concat(dfs) + assert x.values.dtype == dt + + +@pytest.mark.parametrize("pdt", [create_series_with_explicit_dtype, pd.DataFrame]) +@pytest.mark.parametrize("dt", np.sctypes["int"]) +def test_concat_will_upcast(dt, pdt): + with catch_warnings(record=True): + dims = pdt().ndim + dfs = [ + pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims)), + ] + x = pd.concat(dfs) + assert x.values.dtype == "float64" + + +def test_concat_empty_and_non_empty_frame_regression(): + # GH 18178 regression test + df1 = pd.DataFrame({"foo": [1]}) + df2 = pd.DataFrame({"foo": []}) + expected = pd.DataFrame({"foo": [1.0]}) + result = pd.concat([df1, df2]) + tm.assert_frame_equal(result, expected) + + +def test_concat_empty_and_non_empty_series_regression(): + # GH 18187 regression test + s1 = pd.Series([1]) + s2 = pd.Series([], dtype=object) + + expected = s1 + result = pd.concat([s1, s2]) + tm.assert_series_equal(result, expected) + + +def test_concat_sorts_columns(sort): + # GH-4588 + df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"]) + df2 = pd.DataFrame({"a": [3, 4], "c": [5, 6]}) + + # for sort=True/None + expected = pd.DataFrame( + {"a": [1, 2, 3, 4], "b": [1, 2, None, None], "c": [None, None, 5, 6]}, + columns=["a", "b", "c"], + ) + + if sort is False: + expected = expected[["b", "a", "c"]] + + # default + with tm.assert_produces_warning(None): + result = pd.concat([df1, df2], ignore_index=True, sort=sort) + tm.assert_frame_equal(result, expected) + + +def test_concat_sorts_index(sort): + df1 = pd.DataFrame({"a": [1, 2, 3]}, index=["c", "a", "b"]) + df2 = pd.DataFrame({"b": [1, 2]}, index=["a", "b"]) + + # For True/None + expected = pd.DataFrame( + {"a": [2, 3, 1], "b": [1, 2, None]}, index=["a", "b", "c"], columns=["a", "b"] + ) + if sort is False: + expected = expected.loc[["c", "a", "b"]] + + # Warn and sort by default + with tm.assert_produces_warning(None): + result = pd.concat([df1, df2], axis=1, sort=sort) + tm.assert_frame_equal(result, expected) + + +def test_concat_inner_sort(sort): + # https://github.com/pandas-dev/pandas/pull/20613 + df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]}, columns=["b", "a", "c"]) + df2 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}, index=[3, 4]) + + with tm.assert_produces_warning(None): + # unset sort should *not* warn for inner join + # since that never sorted + result = pd.concat([df1, df2], sort=sort, join="inner", ignore_index=True) + + expected = pd.DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]}, columns=["b", "a"]) + if sort is True: + expected = expected[["a", "b"]] + tm.assert_frame_equal(result, expected) + + +def test_concat_aligned_sort(): + # GH-4588 + df = pd.DataFrame({"c": [1, 2], "b": [3, 4], "a": [5, 6]}, columns=["c", "b", "a"]) + result = pd.concat([df, df], sort=True, ignore_index=True) + expected = pd.DataFrame( + {"a": [5, 6, 5, 6], "b": [3, 4, 3, 4], "c": [1, 2, 1, 2]}, + columns=["a", "b", "c"], + ) + tm.assert_frame_equal(result, expected) + + result = pd.concat([df, df[["c", "b"]]], join="inner", sort=True, ignore_index=True) + expected = expected[["b", "c"]] + tm.assert_frame_equal(result, expected) + + +def test_concat_aligned_sort_does_not_raise(): + # GH-4588 + # We catch TypeErrors from sorting internally and do not re-raise. + df = pd.DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, "a"]) + expected = pd.DataFrame({1: [1, 2, 1, 2], "a": [3, 4, 3, 4]}, columns=[1, "a"]) + result = pd.concat([df, df], ignore_index=True, sort=True) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("s1name,s2name", [(np.int64(190), (43, 0)), (190, (43, 0))]) +def test_concat_series_name_npscalar_tuple(s1name, s2name): + # GH21015 + s1 = pd.Series({"a": 1, "b": 2}, name=s1name) + s2 = pd.Series({"c": 5, "d": 6}, name=s2name) + result = pd.concat([s1, s2]) + expected = pd.Series({"a": 1, "b": 2, "c": 5, "d": 6}) + tm.assert_series_equal(result, expected) + + +def test_concat_categorical_tz(): + # GH-23816 + a = pd.Series(pd.date_range("2017-01-01", periods=2, tz="US/Pacific")) + b = pd.Series(["a", "b"], dtype="category") + result = pd.concat([a, b], ignore_index=True) + expected = pd.Series( + [ + pd.Timestamp("2017-01-01", tz="US/Pacific"), + pd.Timestamp("2017-01-02", tz="US/Pacific"), + "a", + "b", + ] + ) + tm.assert_series_equal(result, expected) + + +def test_concat_categorical_unchanged(): + # GH-12007 + # test fix for when concat on categorical and float + # coerces dtype categorical -> float + df = pd.DataFrame(pd.Series(["a", "b", "c"], dtype="category", name="A")) + ser = pd.Series([0, 1, 2], index=[0, 1, 3], name="B") + result = pd.concat([df, ser], axis=1) + expected = pd.DataFrame( + { + "A": pd.Series(["a", "b", "c", np.nan], dtype="category"), + "B": pd.Series([0, 1, np.nan, 2], dtype="float"), + } + ) + tm.assert_equal(result, expected) + + +def test_concat_datetimeindex_freq(): + # GH 3232 + # Monotonic index result + dr = pd.date_range("01-Jan-2013", periods=100, freq="50L", tz="UTC") + data = list(range(100)) + expected = pd.DataFrame(data, index=dr) + result = pd.concat([expected[:50], expected[50:]]) + tm.assert_frame_equal(result, expected) + + # Non-monotonic index result + result = pd.concat([expected[50:], expected[:50]]) + expected = pd.DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50])) + expected.index._data.freq = None + tm.assert_frame_equal(result, expected) + + +def test_concat_empty_df_object_dtype(): + # GH 9149 + df_1 = pd.DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]}) + df_2 = pd.DataFrame(columns=df_1.columns) + result = pd.concat([df_1, df_2], axis=0) + expected = df_1.astype(object) + tm.assert_frame_equal(result, expected) + + +def test_concat_sparse(): + # GH 23557 + a = pd.Series(SparseArray([0, 1, 2])) + expected = pd.DataFrame(data=[[0, 0], [1, 1], [2, 2]]).astype( + pd.SparseDtype(np.int64, 0) + ) + result = pd.concat([a, a], axis=1) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py new file mode 100644 index 00000000..830e786f --- /dev/null +++ b/pandas/tests/reshape/test_cut.py @@ -0,0 +1,627 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + Interval, + IntervalIndex, + Series, + TimedeltaIndex, + Timestamp, + cut, + date_range, + isna, + qcut, + timedelta_range, + to_datetime, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT +import pandas.core.reshape.tile as tmod + + +def test_simple(): + data = np.ones(5, dtype="int64") + result = cut(data, 4, labels=False) + + expected = np.array([1, 1, 1, 1, 1]) + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + +def test_bins(): + data = np.array([0.2, 1.4, 2.5, 6.2, 9.7, 2.1]) + result, bins = cut(data, 3, retbins=True) + + intervals = IntervalIndex.from_breaks(bins.round(3)) + intervals = intervals.take([0, 0, 0, 1, 2, 0]) + expected = Categorical(intervals, ordered=True) + + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, 6.53333333, 9.7])) + + +def test_right(): + data = np.array([0.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) + result, bins = cut(data, 4, right=True, retbins=True) + + intervals = IntervalIndex.from_breaks(bins.round(3)) + expected = Categorical(intervals, ordered=True) + expected = expected.take([0, 0, 0, 2, 3, 0, 0]) + + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.1905, 2.575, 4.95, 7.325, 9.7])) + + +def test_no_right(): + data = np.array([0.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575]) + result, bins = cut(data, 4, right=False, retbins=True) + + intervals = IntervalIndex.from_breaks(bins.round(3), closed="left") + intervals = intervals.take([0, 0, 0, 2, 3, 0, 1]) + expected = Categorical(intervals, ordered=True) + + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95, 7.325, 9.7095])) + + +def test_array_like(): + data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1] + result, bins = cut(data, 3, retbins=True) + + intervals = IntervalIndex.from_breaks(bins.round(3)) + intervals = intervals.take([0, 0, 0, 1, 2, 0]) + expected = Categorical(intervals, ordered=True) + + tm.assert_categorical_equal(result, expected) + tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, 6.53333333, 9.7])) + + +def test_bins_from_interval_index(): + c = cut(range(5), 3) + expected = c + result = cut(range(5), bins=expected.categories) + tm.assert_categorical_equal(result, expected) + + expected = Categorical.from_codes( + np.append(c.codes, -1), categories=c.categories, ordered=True + ) + result = cut(range(6), bins=expected.categories) + tm.assert_categorical_equal(result, expected) + + +def test_bins_from_interval_index_doc_example(): + # Make sure we preserve the bins. + ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60]) + c = cut(ages, bins=[0, 18, 35, 70]) + expected = IntervalIndex.from_tuples([(0, 18), (18, 35), (35, 70)]) + tm.assert_index_equal(c.categories, expected) + + result = cut([25, 20, 50], bins=c.categories) + tm.assert_index_equal(result.categories, expected) + tm.assert_numpy_array_equal(result.codes, np.array([1, 1, 2], dtype="int8")) + + +def test_bins_not_overlapping_from_interval_index(): + # see gh-23980 + msg = "Overlapping IntervalIndex is not accepted" + ii = IntervalIndex.from_tuples([(0, 10), (2, 12), (4, 14)]) + + with pytest.raises(ValueError, match=msg): + cut([5, 6], bins=ii) + + +def test_bins_not_monotonic(): + msg = "bins must increase monotonically" + data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1] + + with pytest.raises(ValueError, match=msg): + cut(data, [0.1, 1.5, 1, 10]) + + +@pytest.mark.parametrize( + "x, bins, expected", + [ + ( + date_range("2017-12-31", periods=3), + [Timestamp.min, Timestamp("2018-01-01"), Timestamp.max], + IntervalIndex.from_tuples( + [ + (Timestamp.min, Timestamp("2018-01-01")), + (Timestamp("2018-01-01"), Timestamp.max), + ] + ), + ), + ( + [-1, 0, 1], + np.array( + [np.iinfo(np.int64).min, 0, np.iinfo(np.int64).max], dtype="int64" + ), + IntervalIndex.from_tuples( + [(np.iinfo(np.int64).min, 0), (0, np.iinfo(np.int64).max)] + ), + ), + ( + [np.timedelta64(-1), np.timedelta64(0), np.timedelta64(1)], + np.array( + [ + np.timedelta64(-np.iinfo(np.int64).max), + np.timedelta64(0), + np.timedelta64(np.iinfo(np.int64).max), + ] + ), + IntervalIndex.from_tuples( + [ + (np.timedelta64(-np.iinfo(np.int64).max), np.timedelta64(0)), + (np.timedelta64(0), np.timedelta64(np.iinfo(np.int64).max)), + ] + ), + ), + ], +) +def test_bins_monotonic_not_overflowing(x, bins, expected): + # GH 26045 + result = cut(x, bins) + tm.assert_index_equal(result.categories, expected) + + +def test_wrong_num_labels(): + msg = "Bin labels must be one fewer than the number of bin edges" + data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1] + + with pytest.raises(ValueError, match=msg): + cut(data, [0, 1, 10], labels=["foo", "bar", "baz"]) + + +@pytest.mark.parametrize( + "x,bins,msg", + [ + ([], 2, "Cannot cut empty array"), + ([1, 2, 3], 0.5, "`bins` should be a positive integer"), + ], +) +def test_cut_corner(x, bins, msg): + with pytest.raises(ValueError, match=msg): + cut(x, bins) + + +@pytest.mark.parametrize("arg", [2, np.eye(2), DataFrame(np.eye(2))]) +@pytest.mark.parametrize("cut_func", [cut, qcut]) +def test_cut_not_1d_arg(arg, cut_func): + msg = "Input array must be 1 dimensional" + with pytest.raises(ValueError, match=msg): + cut_func(arg, 2) + + +@pytest.mark.parametrize( + "data", + [ + [0, 1, 2, 3, 4, np.inf], + [-np.inf, 0, 1, 2, 3, 4], + [-np.inf, 0, 1, 2, 3, 4, np.inf], + ], +) +def test_int_bins_with_inf(data): + # GH 24314 + msg = "cannot specify integer `bins` when input data contains infinity" + with pytest.raises(ValueError, match=msg): + cut(data, bins=3) + + +def test_cut_out_of_range_more(): + # see gh-1511 + name = "x" + + ser = Series([0, -1, 0, 1, -3], name=name) + ind = cut(ser, [0, 1], labels=False) + + exp = Series([np.nan, np.nan, np.nan, 0, np.nan], name=name) + tm.assert_series_equal(ind, exp) + + +@pytest.mark.parametrize( + "right,breaks,closed", + [ + (True, [-1e-3, 0.25, 0.5, 0.75, 1], "right"), + (False, [0, 0.25, 0.5, 0.75, 1 + 1e-3], "left"), + ], +) +def test_labels(right, breaks, closed): + arr = np.tile(np.arange(0, 1.01, 0.1), 4) + + result, bins = cut(arr, 4, retbins=True, right=right) + ex_levels = IntervalIndex.from_breaks(breaks, closed=closed) + tm.assert_index_equal(result.categories, ex_levels) + + +def test_cut_pass_series_name_to_factor(): + name = "foo" + ser = Series(np.random.randn(100), name=name) + + factor = cut(ser, 4) + assert factor.name == name + + +def test_label_precision(): + arr = np.arange(0, 0.73, 0.01) + result = cut(arr, 4, precision=2) + + ex_levels = IntervalIndex.from_breaks([-0.00072, 0.18, 0.36, 0.54, 0.72]) + tm.assert_index_equal(result.categories, ex_levels) + + +@pytest.mark.parametrize("labels", [None, False]) +def test_na_handling(labels): + arr = np.arange(0, 0.75, 0.01) + arr[::3] = np.nan + + result = cut(arr, 4, labels=labels) + result = np.asarray(result) + + expected = np.where(isna(arr), np.nan, result) + tm.assert_almost_equal(result, expected) + + +def test_inf_handling(): + data = np.arange(6) + data_ser = Series(data, dtype="int64") + + bins = [-np.inf, 2, 4, np.inf] + result = cut(data, bins) + result_ser = cut(data_ser, bins) + + ex_uniques = IntervalIndex.from_breaks(bins) + tm.assert_index_equal(result.categories, ex_uniques) + + assert result[5] == Interval(4, np.inf) + assert result[0] == Interval(-np.inf, 2) + assert result_ser[5] == Interval(4, np.inf) + assert result_ser[0] == Interval(-np.inf, 2) + + +def test_cut_out_of_bounds(): + arr = np.random.randn(100) + result = cut(arr, [-1, 0, 1]) + + mask = isna(result) + ex_mask = (arr < -1) | (arr > 1) + tm.assert_numpy_array_equal(mask, ex_mask) + + +@pytest.mark.parametrize( + "get_labels,get_expected", + [ + ( + lambda labels: labels, + lambda labels: Categorical( + ["Medium"] + 4 * ["Small"] + ["Medium", "Large"], + categories=labels, + ordered=True, + ), + ), + ( + lambda labels: Categorical.from_codes([0, 1, 2], labels), + lambda labels: Categorical.from_codes([1] + 4 * [0] + [1, 2], labels), + ), + ], +) +def test_cut_pass_labels(get_labels, get_expected): + bins = [0, 25, 50, 100] + arr = [50, 5, 10, 15, 20, 30, 70] + labels = ["Small", "Medium", "Large"] + + result = cut(arr, bins, labels=get_labels(labels)) + tm.assert_categorical_equal(result, get_expected(labels)) + + +def test_cut_pass_labels_compat(): + # see gh-16459 + arr = [50, 5, 10, 15, 20, 30, 70] + labels = ["Good", "Medium", "Bad"] + + result = cut(arr, 3, labels=labels) + exp = cut(arr, 3, labels=Categorical(labels, categories=labels, ordered=True)) + tm.assert_categorical_equal(result, exp) + + +@pytest.mark.parametrize("x", [np.arange(11.0), np.arange(11.0) / 1e10]) +def test_round_frac_just_works(x): + # It works. + cut(x, 2) + + +@pytest.mark.parametrize( + "val,precision,expected", + [ + (-117.9998, 3, -118), + (117.9998, 3, 118), + (117.9998, 2, 118), + (0.000123456, 2, 0.00012), + ], +) +def test_round_frac(val, precision, expected): + # see gh-1979 + result = tmod._round_frac(val, precision=precision) + assert result == expected + + +def test_cut_return_intervals(): + ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) + result = cut(ser, 3) + + exp_bins = np.linspace(0, 8, num=4).round(3) + exp_bins[0] -= 0.008 + + expected = Series( + IntervalIndex.from_breaks(exp_bins, closed="right").take( + [0, 0, 0, 1, 1, 1, 2, 2, 2] + ) + ).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + +def test_series_ret_bins(): + # see gh-8589 + ser = Series(np.arange(4)) + result, bins = cut(ser, 2, retbins=True) + + expected = Series( + IntervalIndex.from_breaks([-0.003, 1.5, 3], closed="right").repeat(2) + ).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,msg", + [ + (dict(duplicates="drop"), None), + (dict(), "Bin edges must be unique"), + (dict(duplicates="raise"), "Bin edges must be unique"), + (dict(duplicates="foo"), "invalid value for 'duplicates' parameter"), + ], +) +def test_cut_duplicates_bin(kwargs, msg): + # see gh-20947 + bins = [0, 2, 4, 6, 10, 10] + values = Series(np.array([1, 3, 5, 7, 9]), index=["a", "b", "c", "d", "e"]) + + if msg is not None: + with pytest.raises(ValueError, match=msg): + cut(values, bins, **kwargs) + else: + result = cut(values, bins, **kwargs) + expected = cut(values, pd.unique(bins)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("data", [9.0, -9.0, 0.0]) +@pytest.mark.parametrize("length", [1, 2]) +def test_single_bin(data, length): + # see gh-14652, gh-15428 + ser = Series([data] * length) + result = cut(ser, 1, labels=False) + + expected = Series([0] * length) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "array_1_writeable,array_2_writeable", [(True, True), (True, False), (False, False)] +) +def test_cut_read_only(array_1_writeable, array_2_writeable): + # issue 18773 + array_1 = np.arange(0, 100, 10) + array_1.flags.writeable = array_1_writeable + + array_2 = np.arange(0, 100, 10) + array_2.flags.writeable = array_2_writeable + + hundred_elements = np.arange(100) + tm.assert_categorical_equal( + cut(hundred_elements, array_1), cut(hundred_elements, array_2) + ) + + +@pytest.mark.parametrize( + "conv", + [ + lambda v: Timestamp(v), + lambda v: to_datetime(v), + lambda v: np.datetime64(v), + lambda v: Timestamp(v).to_pydatetime(), + ], +) +def test_datetime_bin(conv): + data = [np.datetime64("2012-12-13"), np.datetime64("2012-12-15")] + bin_data = ["2012-12-12", "2012-12-14", "2012-12-16"] + + expected = Series( + IntervalIndex( + [ + Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])), + Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2])), + ] + ) + ).astype(CDT(ordered=True)) + + bins = [conv(v) for v in bin_data] + result = Series(cut(data, bins=bins)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + to_datetime(Series(["2013-01-01", "2013-01-02", "2013-01-03"])), + [ + np.datetime64("2013-01-01"), + np.datetime64("2013-01-02"), + np.datetime64("2013-01-03"), + ], + np.array( + [ + np.datetime64("2013-01-01"), + np.datetime64("2013-01-02"), + np.datetime64("2013-01-03"), + ] + ), + DatetimeIndex(["2013-01-01", "2013-01-02", "2013-01-03"]), + ], +) +def test_datetime_cut(data): + # see gh-14714 + # + # Testing time data when it comes in various collection types. + result, _ = cut(data, 3, retbins=True) + expected = Series( + IntervalIndex( + [ + Interval( + Timestamp("2012-12-31 23:57:07.200000"), + Timestamp("2013-01-01 16:00:00"), + ), + Interval( + Timestamp("2013-01-01 16:00:00"), Timestamp("2013-01-02 08:00:00") + ), + Interval( + Timestamp("2013-01-02 08:00:00"), Timestamp("2013-01-03 00:00:00") + ), + ] + ) + ).astype(CDT(ordered=True)) + tm.assert_series_equal(Series(result), expected) + + +@pytest.mark.parametrize( + "bins", + [ + 3, + [ + Timestamp("2013-01-01 04:57:07.200000"), + Timestamp("2013-01-01 21:00:00"), + Timestamp("2013-01-02 13:00:00"), + Timestamp("2013-01-03 05:00:00"), + ], + ], +) +@pytest.mark.parametrize("box", [list, np.array, Index, Series]) +def test_datetime_tz_cut(bins, box): + # see gh-19872 + tz = "US/Eastern" + s = Series(date_range("20130101", periods=3, tz=tz)) + + if not isinstance(bins, int): + bins = box(bins) + + result = cut(s, bins) + expected = Series( + IntervalIndex( + [ + Interval( + Timestamp("2012-12-31 23:57:07.200000", tz=tz), + Timestamp("2013-01-01 16:00:00", tz=tz), + ), + Interval( + Timestamp("2013-01-01 16:00:00", tz=tz), + Timestamp("2013-01-02 08:00:00", tz=tz), + ), + Interval( + Timestamp("2013-01-02 08:00:00", tz=tz), + Timestamp("2013-01-03 00:00:00", tz=tz), + ), + ] + ) + ).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + +def test_datetime_nan_error(): + msg = "bins must be of datetime64 dtype" + + with pytest.raises(ValueError, match=msg): + cut(date_range("20130101", periods=3), bins=[0, 2, 4]) + + +def test_datetime_nan_mask(): + result = cut( + date_range("20130102", periods=5), bins=date_range("20130101", periods=2) + ) + + mask = result.categories.isna() + tm.assert_numpy_array_equal(mask, np.array([False])) + + mask = result.isna() + tm.assert_numpy_array_equal(mask, np.array([False, True, True, True, True])) + + +@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"]) +def test_datetime_cut_roundtrip(tz): + # see gh-19891 + ser = Series(date_range("20180101", periods=3, tz=tz)) + result, result_bins = cut(ser, 2, retbins=True) + + expected = cut(ser, result_bins) + tm.assert_series_equal(result, expected) + + expected_bins = DatetimeIndex( + ["2017-12-31 23:57:07.200000", "2018-01-02 00:00:00", "2018-01-03 00:00:00"] + ) + expected_bins = expected_bins.tz_localize(tz) + tm.assert_index_equal(result_bins, expected_bins) + + +def test_timedelta_cut_roundtrip(): + # see gh-19891 + ser = Series(timedelta_range("1day", periods=3)) + result, result_bins = cut(ser, 2, retbins=True) + + expected = cut(ser, result_bins) + tm.assert_series_equal(result, expected) + + expected_bins = TimedeltaIndex( + ["0 days 23:57:07.200000", "2 days 00:00:00", "3 days 00:00:00"] + ) + tm.assert_index_equal(result_bins, expected_bins) + + +@pytest.mark.parametrize("bins", [6, 7]) +@pytest.mark.parametrize( + "box, compare", + [ + (Series, tm.assert_series_equal), + (np.array, tm.assert_categorical_equal), + (list, tm.assert_equal), + ], +) +def test_cut_bool_coercion_to_int(bins, box, compare): + # issue 20303 + data_expected = box([0, 1, 1, 0, 1] * 10) + data_result = box([False, True, True, False, True] * 10) + expected = cut(data_expected, bins, duplicates="drop") + result = cut(data_result, bins, duplicates="drop") + compare(result, expected) + + +@pytest.mark.parametrize("labels", ["foo", 1, True]) +def test_cut_incorrect_labels(labels): + # GH 13318 + values = range(5) + msg = "Bin labels must either be False, None or passed in as a list-like argument" + with pytest.raises(ValueError, match=msg): + cut(values, 4, labels=labels) + + +@pytest.mark.parametrize("bins", [3, [0, 5, 15]]) +@pytest.mark.parametrize("right", [True, False]) +@pytest.mark.parametrize("include_lowest", [True, False]) +def test_cut_nullable_integer(bins, right, include_lowest): + a = np.random.randint(0, 10, size=50).astype(float) + a[::2] = np.nan + result = cut( + pd.array(a, dtype="Int64"), bins, right=right, include_lowest=include_lowest + ) + expected = cut(a, bins, right=right, include_lowest=include_lowest) + tm.assert_categorical_equal(result, expected) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py new file mode 100644 index 00000000..81432584 --- /dev/null +++ b/pandas/tests/reshape/test_melt.py @@ -0,0 +1,992 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, lreshape, melt, wide_to_long +import pandas._testing as tm + + +class TestMelt: + def setup_method(self, method): + self.df = tm.makeTimeDataFrame()[:10] + self.df["id1"] = (self.df["A"] > 0).astype(np.int64) + self.df["id2"] = (self.df["B"] > 0).astype(np.int64) + + self.var_name = "var" + self.value_name = "val" + + self.df1 = pd.DataFrame( + [ + [1.067683, -1.110463, 0.20867], + [-1.321405, 0.368915, -1.055342], + [-0.807333, 0.08298, -0.873361], + ] + ) + self.df1.columns = [list("ABC"), list("abc")] + self.df1.columns.names = ["CAP", "low"] + + def test_top_level_method(self): + result = melt(self.df) + assert result.columns.tolist() == ["variable", "value"] + + def test_method_signatures(self): + tm.assert_frame_equal(self.df.melt(), melt(self.df)) + + tm.assert_frame_equal( + self.df.melt(id_vars=["id1", "id2"], value_vars=["A", "B"]), + melt(self.df, id_vars=["id1", "id2"], value_vars=["A", "B"]), + ) + + tm.assert_frame_equal( + self.df.melt(var_name=self.var_name, value_name=self.value_name), + melt(self.df, var_name=self.var_name, value_name=self.value_name), + ) + + tm.assert_frame_equal(self.df1.melt(col_level=0), melt(self.df1, col_level=0)) + + def test_default_col_names(self): + result = self.df.melt() + assert result.columns.tolist() == ["variable", "value"] + + result1 = self.df.melt(id_vars=["id1"]) + assert result1.columns.tolist() == ["id1", "variable", "value"] + + result2 = self.df.melt(id_vars=["id1", "id2"]) + assert result2.columns.tolist() == ["id1", "id2", "variable", "value"] + + def test_value_vars(self): + result3 = self.df.melt(id_vars=["id1", "id2"], value_vars="A") + assert len(result3) == 10 + + result4 = self.df.melt(id_vars=["id1", "id2"], value_vars=["A", "B"]) + expected4 = DataFrame( + { + "id1": self.df["id1"].tolist() * 2, + "id2": self.df["id2"].tolist() * 2, + "variable": ["A"] * 10 + ["B"] * 10, + "value": (self.df["A"].tolist() + self.df["B"].tolist()), + }, + columns=["id1", "id2", "variable", "value"], + ) + tm.assert_frame_equal(result4, expected4) + + def test_value_vars_types(self): + # GH 15348 + expected = DataFrame( + { + "id1": self.df["id1"].tolist() * 2, + "id2": self.df["id2"].tolist() * 2, + "variable": ["A"] * 10 + ["B"] * 10, + "value": (self.df["A"].tolist() + self.df["B"].tolist()), + }, + columns=["id1", "id2", "variable", "value"], + ) + + for type_ in (tuple, list, np.array): + result = self.df.melt(id_vars=["id1", "id2"], value_vars=type_(("A", "B"))) + tm.assert_frame_equal(result, expected) + + def test_vars_work_with_multiindex(self): + expected = DataFrame( + { + ("A", "a"): self.df1[("A", "a")], + "CAP": ["B"] * len(self.df1), + "low": ["b"] * len(self.df1), + "value": self.df1[("B", "b")], + }, + columns=[("A", "a"), "CAP", "low", "value"], + ) + + result = self.df1.melt(id_vars=[("A", "a")], value_vars=[("B", "b")]) + tm.assert_frame_equal(result, expected) + + def test_single_vars_work_with_multiindex(self): + expected = DataFrame( + { + "A": {0: 1.067683, 1: -1.321405, 2: -0.807333}, + "CAP": {0: "B", 1: "B", 2: "B"}, + "value": {0: -1.110463, 1: 0.368915, 2: 0.08298}, + } + ) + result = self.df1.melt(["A"], ["B"], col_level=0) + tm.assert_frame_equal(result, expected) + + def test_tuple_vars_fail_with_multiindex(self): + # melt should fail with an informative error message if + # the columns have a MultiIndex and a tuple is passed + # for id_vars or value_vars. + tuple_a = ("A", "a") + list_a = [tuple_a] + tuple_b = ("B", "b") + list_b = [tuple_b] + + msg = r"(id|value)_vars must be a list of tuples when columns are a MultiIndex" + for id_vars, value_vars in ( + (tuple_a, list_b), + (list_a, tuple_b), + (tuple_a, tuple_b), + ): + with pytest.raises(ValueError, match=msg): + self.df1.melt(id_vars=id_vars, value_vars=value_vars) + + def test_custom_var_name(self): + result5 = self.df.melt(var_name=self.var_name) + assert result5.columns.tolist() == ["var", "value"] + + result6 = self.df.melt(id_vars=["id1"], var_name=self.var_name) + assert result6.columns.tolist() == ["id1", "var", "value"] + + result7 = self.df.melt(id_vars=["id1", "id2"], var_name=self.var_name) + assert result7.columns.tolist() == ["id1", "id2", "var", "value"] + + result8 = self.df.melt( + id_vars=["id1", "id2"], value_vars="A", var_name=self.var_name + ) + assert result8.columns.tolist() == ["id1", "id2", "var", "value"] + + result9 = self.df.melt( + id_vars=["id1", "id2"], value_vars=["A", "B"], var_name=self.var_name + ) + expected9 = DataFrame( + { + "id1": self.df["id1"].tolist() * 2, + "id2": self.df["id2"].tolist() * 2, + self.var_name: ["A"] * 10 + ["B"] * 10, + "value": (self.df["A"].tolist() + self.df["B"].tolist()), + }, + columns=["id1", "id2", self.var_name, "value"], + ) + tm.assert_frame_equal(result9, expected9) + + def test_custom_value_name(self): + result10 = self.df.melt(value_name=self.value_name) + assert result10.columns.tolist() == ["variable", "val"] + + result11 = self.df.melt(id_vars=["id1"], value_name=self.value_name) + assert result11.columns.tolist() == ["id1", "variable", "val"] + + result12 = self.df.melt(id_vars=["id1", "id2"], value_name=self.value_name) + assert result12.columns.tolist() == ["id1", "id2", "variable", "val"] + + result13 = self.df.melt( + id_vars=["id1", "id2"], value_vars="A", value_name=self.value_name + ) + assert result13.columns.tolist() == ["id1", "id2", "variable", "val"] + + result14 = self.df.melt( + id_vars=["id1", "id2"], value_vars=["A", "B"], value_name=self.value_name + ) + expected14 = DataFrame( + { + "id1": self.df["id1"].tolist() * 2, + "id2": self.df["id2"].tolist() * 2, + "variable": ["A"] * 10 + ["B"] * 10, + self.value_name: (self.df["A"].tolist() + self.df["B"].tolist()), + }, + columns=["id1", "id2", "variable", self.value_name], + ) + tm.assert_frame_equal(result14, expected14) + + def test_custom_var_and_value_name(self): + + result15 = self.df.melt(var_name=self.var_name, value_name=self.value_name) + assert result15.columns.tolist() == ["var", "val"] + + result16 = self.df.melt( + id_vars=["id1"], var_name=self.var_name, value_name=self.value_name + ) + assert result16.columns.tolist() == ["id1", "var", "val"] + + result17 = self.df.melt( + id_vars=["id1", "id2"], var_name=self.var_name, value_name=self.value_name + ) + assert result17.columns.tolist() == ["id1", "id2", "var", "val"] + + result18 = self.df.melt( + id_vars=["id1", "id2"], + value_vars="A", + var_name=self.var_name, + value_name=self.value_name, + ) + assert result18.columns.tolist() == ["id1", "id2", "var", "val"] + + result19 = self.df.melt( + id_vars=["id1", "id2"], + value_vars=["A", "B"], + var_name=self.var_name, + value_name=self.value_name, + ) + expected19 = DataFrame( + { + "id1": self.df["id1"].tolist() * 2, + "id2": self.df["id2"].tolist() * 2, + self.var_name: ["A"] * 10 + ["B"] * 10, + self.value_name: (self.df["A"].tolist() + self.df["B"].tolist()), + }, + columns=["id1", "id2", self.var_name, self.value_name], + ) + tm.assert_frame_equal(result19, expected19) + + df20 = self.df.copy() + df20.columns.name = "foo" + result20 = df20.melt() + assert result20.columns.tolist() == ["foo", "value"] + + def test_col_level(self): + res1 = self.df1.melt(col_level=0) + res2 = self.df1.melt(col_level="CAP") + assert res1.columns.tolist() == ["CAP", "value"] + assert res2.columns.tolist() == ["CAP", "value"] + + def test_multiindex(self): + res = self.df1.melt() + assert res.columns.tolist() == ["CAP", "low", "value"] + + @pytest.mark.parametrize( + "col", + [ + pd.Series(pd.date_range("2010", periods=5, tz="US/Pacific")), + pd.Series(["a", "b", "c", "a", "d"], dtype="category"), + pd.Series([0, 1, 0, 0, 0]), + ], + ) + def test_pandas_dtypes(self, col): + # GH 15785 + df = DataFrame( + {"klass": range(5), "col": col, "attr1": [1, 0, 0, 0, 0], "attr2": col} + ) + expected_value = pd.concat([pd.Series([1, 0, 0, 0, 0]), col], ignore_index=True) + result = melt( + df, id_vars=["klass", "col"], var_name="attribute", value_name="value" + ) + expected = DataFrame( + { + 0: list(range(5)) * 2, + 1: pd.concat([col] * 2, ignore_index=True), + 2: ["attr1"] * 5 + ["attr2"] * 5, + 3: expected_value, + } + ) + expected.columns = ["klass", "col", "attribute", "value"] + tm.assert_frame_equal(result, expected) + + def test_preserve_category(self): + # GH 15853 + data = DataFrame({"A": [1, 2], "B": pd.Categorical(["X", "Y"])}) + result = pd.melt(data, ["B"], ["A"]) + expected = DataFrame( + {"B": pd.Categorical(["X", "Y"]), "variable": ["A", "A"], "value": [1, 2]} + ) + + tm.assert_frame_equal(result, expected) + + def test_melt_missing_columns_raises(self): + # GH-23575 + # This test is to ensure that pandas raises an error if melting is + # attempted with column names absent from the dataframe + + # Generate data + df = pd.DataFrame(np.random.randn(5, 4), columns=list("abcd")) + + # Try to melt with missing `value_vars` column name + msg = "The following '{Var}' are not present in the DataFrame: {Col}" + with pytest.raises( + KeyError, match=msg.format(Var="value_vars", Col="\\['C'\\]") + ): + df.melt(["a", "b"], ["C", "d"]) + + # Try to melt with missing `id_vars` column name + with pytest.raises(KeyError, match=msg.format(Var="id_vars", Col="\\['A'\\]")): + df.melt(["A", "b"], ["c", "d"]) + + # Multiple missing + with pytest.raises( + KeyError, + match=msg.format(Var="id_vars", Col="\\['not_here', 'or_there'\\]"), + ): + df.melt(["a", "b", "not_here", "or_there"], ["c", "d"]) + + # Multiindex melt fails if column is missing from multilevel melt + multi = df.copy() + multi.columns = [list("ABCD"), list("abcd")] + with pytest.raises(KeyError, match=msg.format(Var="id_vars", Col="\\['E'\\]")): + multi.melt([("E", "a")], [("B", "b")]) + # Multiindex fails if column is missing from single level melt + with pytest.raises( + KeyError, match=msg.format(Var="value_vars", Col="\\['F'\\]") + ): + multi.melt(["A"], ["F"], col_level=0) + + def test_melt_mixed_int_str_id_vars(self): + # GH 29718 + df = DataFrame({0: ["foo"], "a": ["bar"], "b": [1], "d": [2]}) + result = melt(df, id_vars=[0, "a"], value_vars=["b", "d"]) + expected = DataFrame( + {0: ["foo"] * 2, "a": ["bar"] * 2, "variable": list("bd"), "value": [1, 2]} + ) + tm.assert_frame_equal(result, expected) + + def test_melt_mixed_int_str_value_vars(self): + # GH 29718 + df = DataFrame({0: ["foo"], "a": ["bar"]}) + result = melt(df, value_vars=[0, "a"]) + expected = DataFrame({"variable": [0, "a"], "value": ["foo", "bar"]}) + tm.assert_frame_equal(result, expected) + + +class TestLreshape: + def test_pairs(self): + data = { + "birthdt": [ + "08jan2009", + "20dec2008", + "30dec2008", + "21dec2008", + "11jan2009", + ], + "birthwt": [1766, 3301, 1454, 3139, 4133], + "id": [101, 102, 103, 104, 105], + "sex": ["Male", "Female", "Female", "Female", "Female"], + "visitdt1": [ + "11jan2009", + "22dec2008", + "04jan2009", + "29dec2008", + "20jan2009", + ], + "visitdt2": ["21jan2009", np.nan, "22jan2009", "31dec2008", "03feb2009"], + "visitdt3": ["05feb2009", np.nan, np.nan, "02jan2009", "15feb2009"], + "wt1": [1823, 3338, 1549, 3298, 4306], + "wt2": [2011.0, np.nan, 1892.0, 3338.0, 4575.0], + "wt3": [2293.0, np.nan, np.nan, 3377.0, 4805.0], + } + + df = DataFrame(data) + + spec = { + "visitdt": ["visitdt{i:d}".format(i=i) for i in range(1, 4)], + "wt": ["wt{i:d}".format(i=i) for i in range(1, 4)], + } + result = lreshape(df, spec) + + exp_data = { + "birthdt": [ + "08jan2009", + "20dec2008", + "30dec2008", + "21dec2008", + "11jan2009", + "08jan2009", + "30dec2008", + "21dec2008", + "11jan2009", + "08jan2009", + "21dec2008", + "11jan2009", + ], + "birthwt": [ + 1766, + 3301, + 1454, + 3139, + 4133, + 1766, + 1454, + 3139, + 4133, + 1766, + 3139, + 4133, + ], + "id": [101, 102, 103, 104, 105, 101, 103, 104, 105, 101, 104, 105], + "sex": [ + "Male", + "Female", + "Female", + "Female", + "Female", + "Male", + "Female", + "Female", + "Female", + "Male", + "Female", + "Female", + ], + "visitdt": [ + "11jan2009", + "22dec2008", + "04jan2009", + "29dec2008", + "20jan2009", + "21jan2009", + "22jan2009", + "31dec2008", + "03feb2009", + "05feb2009", + "02jan2009", + "15feb2009", + ], + "wt": [ + 1823.0, + 3338.0, + 1549.0, + 3298.0, + 4306.0, + 2011.0, + 1892.0, + 3338.0, + 4575.0, + 2293.0, + 3377.0, + 4805.0, + ], + } + exp = DataFrame(exp_data, columns=result.columns) + tm.assert_frame_equal(result, exp) + + result = lreshape(df, spec, dropna=False) + exp_data = { + "birthdt": [ + "08jan2009", + "20dec2008", + "30dec2008", + "21dec2008", + "11jan2009", + "08jan2009", + "20dec2008", + "30dec2008", + "21dec2008", + "11jan2009", + "08jan2009", + "20dec2008", + "30dec2008", + "21dec2008", + "11jan2009", + ], + "birthwt": [ + 1766, + 3301, + 1454, + 3139, + 4133, + 1766, + 3301, + 1454, + 3139, + 4133, + 1766, + 3301, + 1454, + 3139, + 4133, + ], + "id": [ + 101, + 102, + 103, + 104, + 105, + 101, + 102, + 103, + 104, + 105, + 101, + 102, + 103, + 104, + 105, + ], + "sex": [ + "Male", + "Female", + "Female", + "Female", + "Female", + "Male", + "Female", + "Female", + "Female", + "Female", + "Male", + "Female", + "Female", + "Female", + "Female", + ], + "visitdt": [ + "11jan2009", + "22dec2008", + "04jan2009", + "29dec2008", + "20jan2009", + "21jan2009", + np.nan, + "22jan2009", + "31dec2008", + "03feb2009", + "05feb2009", + np.nan, + np.nan, + "02jan2009", + "15feb2009", + ], + "wt": [ + 1823.0, + 3338.0, + 1549.0, + 3298.0, + 4306.0, + 2011.0, + np.nan, + 1892.0, + 3338.0, + 4575.0, + 2293.0, + np.nan, + np.nan, + 3377.0, + 4805.0, + ], + } + exp = DataFrame(exp_data, columns=result.columns) + tm.assert_frame_equal(result, exp) + + with tm.assert_produces_warning(FutureWarning): + result = lreshape(df, spec, dropna=False, label="foo") + + spec = { + "visitdt": ["visitdt{i:d}".format(i=i) for i in range(1, 3)], + "wt": ["wt{i:d}".format(i=i) for i in range(1, 4)], + } + msg = "All column lists must be same length" + with pytest.raises(ValueError, match=msg): + lreshape(df, spec) + + +class TestWideToLong: + def test_simple(self): + np.random.seed(123) + x = np.random.randn(3) + df = pd.DataFrame( + { + "A1970": {0: "a", 1: "b", 2: "c"}, + "A1980": {0: "d", 1: "e", 2: "f"}, + "B1970": {0: 2.5, 1: 1.2, 2: 0.7}, + "B1980": {0: 3.2, 1: 1.3, 2: 0.1}, + "X": dict(zip(range(3), x)), + } + ) + df["id"] = df.index + exp_data = { + "X": x.tolist() + x.tolist(), + "A": ["a", "b", "c", "d", "e", "f"], + "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2], + } + expected = DataFrame(exp_data) + expected = expected.set_index(["id", "year"])[["X", "A", "B"]] + result = wide_to_long(df, ["A", "B"], i="id", j="year") + tm.assert_frame_equal(result, expected) + + def test_stubs(self): + # GH9204 + df = pd.DataFrame([[0, 1, 2, 3, 8], [4, 5, 6, 7, 9]]) + df.columns = ["id", "inc1", "inc2", "edu1", "edu2"] + stubs = ["inc", "edu"] + + # TODO: unused? + df_long = pd.wide_to_long(df, stubs, i="id", j="age") # noqa + + assert stubs == ["inc", "edu"] + + def test_separating_character(self): + # GH14779 + np.random.seed(123) + x = np.random.randn(3) + df = pd.DataFrame( + { + "A.1970": {0: "a", 1: "b", 2: "c"}, + "A.1980": {0: "d", 1: "e", 2: "f"}, + "B.1970": {0: 2.5, 1: 1.2, 2: 0.7}, + "B.1980": {0: 3.2, 1: 1.3, 2: 0.1}, + "X": dict(zip(range(3), x)), + } + ) + df["id"] = df.index + exp_data = { + "X": x.tolist() + x.tolist(), + "A": ["a", "b", "c", "d", "e", "f"], + "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2], + } + expected = DataFrame(exp_data) + expected = expected.set_index(["id", "year"])[["X", "A", "B"]] + result = wide_to_long(df, ["A", "B"], i="id", j="year", sep=".") + tm.assert_frame_equal(result, expected) + + def test_escapable_characters(self): + np.random.seed(123) + x = np.random.randn(3) + df = pd.DataFrame( + { + "A(quarterly)1970": {0: "a", 1: "b", 2: "c"}, + "A(quarterly)1980": {0: "d", 1: "e", 2: "f"}, + "B(quarterly)1970": {0: 2.5, 1: 1.2, 2: 0.7}, + "B(quarterly)1980": {0: 3.2, 1: 1.3, 2: 0.1}, + "X": dict(zip(range(3), x)), + } + ) + df["id"] = df.index + exp_data = { + "X": x.tolist() + x.tolist(), + "A(quarterly)": ["a", "b", "c", "d", "e", "f"], + "B(quarterly)": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2], + } + expected = DataFrame(exp_data) + expected = expected.set_index(["id", "year"])[ + ["X", "A(quarterly)", "B(quarterly)"] + ] + result = wide_to_long(df, ["A(quarterly)", "B(quarterly)"], i="id", j="year") + tm.assert_frame_equal(result, expected) + + def test_unbalanced(self): + # test that we can have a varying amount of time variables + df = pd.DataFrame( + { + "A2010": [1.0, 2.0], + "A2011": [3.0, 4.0], + "B2010": [5.0, 6.0], + "X": ["X1", "X2"], + } + ) + df["id"] = df.index + exp_data = { + "X": ["X1", "X1", "X2", "X2"], + "A": [1.0, 3.0, 2.0, 4.0], + "B": [5.0, np.nan, 6.0, np.nan], + "id": [0, 0, 1, 1], + "year": [2010, 2011, 2010, 2011], + } + expected = pd.DataFrame(exp_data) + expected = expected.set_index(["id", "year"])[["X", "A", "B"]] + result = wide_to_long(df, ["A", "B"], i="id", j="year") + tm.assert_frame_equal(result, expected) + + def test_character_overlap(self): + # Test we handle overlapping characters in both id_vars and value_vars + df = pd.DataFrame( + { + "A11": ["a11", "a22", "a33"], + "A12": ["a21", "a22", "a23"], + "B11": ["b11", "b12", "b13"], + "B12": ["b21", "b22", "b23"], + "BB11": [1, 2, 3], + "BB12": [4, 5, 6], + "BBBX": [91, 92, 93], + "BBBZ": [91, 92, 93], + } + ) + df["id"] = df.index + expected = pd.DataFrame( + { + "BBBX": [91, 92, 93, 91, 92, 93], + "BBBZ": [91, 92, 93, 91, 92, 93], + "A": ["a11", "a22", "a33", "a21", "a22", "a23"], + "B": ["b11", "b12", "b13", "b21", "b22", "b23"], + "BB": [1, 2, 3, 4, 5, 6], + "id": [0, 1, 2, 0, 1, 2], + "year": [11, 11, 11, 12, 12, 12], + } + ) + expected = expected.set_index(["id", "year"])[["BBBX", "BBBZ", "A", "B", "BB"]] + result = wide_to_long(df, ["A", "B", "BB"], i="id", j="year") + tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1)) + + def test_invalid_separator(self): + # if an invalid separator is supplied a empty data frame is returned + sep = "nope!" + df = pd.DataFrame( + { + "A2010": [1.0, 2.0], + "A2011": [3.0, 4.0], + "B2010": [5.0, 6.0], + "X": ["X1", "X2"], + } + ) + df["id"] = df.index + exp_data = { + "X": "", + "A2010": [], + "A2011": [], + "B2010": [], + "id": [], + "year": [], + "A": [], + "B": [], + } + expected = pd.DataFrame(exp_data).astype({"year": "int"}) + expected = expected.set_index(["id", "year"])[ + ["X", "A2010", "A2011", "B2010", "A", "B"] + ] + expected.index.set_levels([0, 1], level=0, inplace=True) + result = wide_to_long(df, ["A", "B"], i="id", j="year", sep=sep) + tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1)) + + def test_num_string_disambiguation(self): + # Test that we can disambiguate number value_vars from + # string value_vars + df = pd.DataFrame( + { + "A11": ["a11", "a22", "a33"], + "A12": ["a21", "a22", "a23"], + "B11": ["b11", "b12", "b13"], + "B12": ["b21", "b22", "b23"], + "BB11": [1, 2, 3], + "BB12": [4, 5, 6], + "Arating": [91, 92, 93], + "Arating_old": [91, 92, 93], + } + ) + df["id"] = df.index + expected = pd.DataFrame( + { + "Arating": [91, 92, 93, 91, 92, 93], + "Arating_old": [91, 92, 93, 91, 92, 93], + "A": ["a11", "a22", "a33", "a21", "a22", "a23"], + "B": ["b11", "b12", "b13", "b21", "b22", "b23"], + "BB": [1, 2, 3, 4, 5, 6], + "id": [0, 1, 2, 0, 1, 2], + "year": [11, 11, 11, 12, 12, 12], + } + ) + expected = expected.set_index(["id", "year"])[ + ["Arating", "Arating_old", "A", "B", "BB"] + ] + result = wide_to_long(df, ["A", "B", "BB"], i="id", j="year") + tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1)) + + def test_invalid_suffixtype(self): + # If all stubs names end with a string, but a numeric suffix is + # assumed, an empty data frame is returned + df = pd.DataFrame( + { + "Aone": [1.0, 2.0], + "Atwo": [3.0, 4.0], + "Bone": [5.0, 6.0], + "X": ["X1", "X2"], + } + ) + df["id"] = df.index + exp_data = { + "X": "", + "Aone": [], + "Atwo": [], + "Bone": [], + "id": [], + "year": [], + "A": [], + "B": [], + } + expected = pd.DataFrame(exp_data).astype({"year": "int"}) + + expected = expected.set_index(["id", "year"]) + expected.index.set_levels([0, 1], level=0, inplace=True) + result = wide_to_long(df, ["A", "B"], i="id", j="year") + tm.assert_frame_equal(result.sort_index(axis=1), expected.sort_index(axis=1)) + + def test_multiple_id_columns(self): + # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm + df = pd.DataFrame( + { + "famid": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "birth": [1, 2, 3, 1, 2, 3, 1, 2, 3], + "ht1": [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], + "ht2": [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9], + } + ) + expected = pd.DataFrame( + { + "ht": [ + 2.8, + 3.4, + 2.9, + 3.8, + 2.2, + 2.9, + 2.0, + 3.2, + 1.8, + 2.8, + 1.9, + 2.4, + 2.2, + 3.3, + 2.3, + 3.4, + 2.1, + 2.9, + ], + "famid": [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3], + "birth": [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3], + "age": [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2], + } + ) + expected = expected.set_index(["famid", "birth", "age"])[["ht"]] + result = wide_to_long(df, "ht", i=["famid", "birth"], j="age") + tm.assert_frame_equal(result, expected) + + def test_non_unique_idvars(self): + # GH16382 + # Raise an error message if non unique id vars (i) are passed + df = pd.DataFrame( + {"A_A1": [1, 2, 3, 4, 5], "B_B1": [1, 2, 3, 4, 5], "x": [1, 1, 1, 1, 1]} + ) + msg = "the id variables need to uniquely identify each row" + with pytest.raises(ValueError, match=msg): + wide_to_long(df, ["A_A", "B_B"], i="x", j="colname") + + def test_cast_j_int(self): + df = pd.DataFrame( + { + "actor_1": ["CCH Pounder", "Johnny Depp", "Christoph Waltz"], + "actor_2": ["Joel David Moore", "Orlando Bloom", "Rory Kinnear"], + "actor_fb_likes_1": [1000.0, 40000.0, 11000.0], + "actor_fb_likes_2": [936.0, 5000.0, 393.0], + "title": ["Avatar", "Pirates of the Caribbean", "Spectre"], + } + ) + + expected = pd.DataFrame( + { + "actor": [ + "CCH Pounder", + "Johnny Depp", + "Christoph Waltz", + "Joel David Moore", + "Orlando Bloom", + "Rory Kinnear", + ], + "actor_fb_likes": [1000.0, 40000.0, 11000.0, 936.0, 5000.0, 393.0], + "num": [1, 1, 1, 2, 2, 2], + "title": [ + "Avatar", + "Pirates of the Caribbean", + "Spectre", + "Avatar", + "Pirates of the Caribbean", + "Spectre", + ], + } + ).set_index(["title", "num"]) + result = wide_to_long( + df, ["actor", "actor_fb_likes"], i="title", j="num", sep="_" + ) + + tm.assert_frame_equal(result, expected) + + def test_identical_stubnames(self): + df = pd.DataFrame( + { + "A2010": [1.0, 2.0], + "A2011": [3.0, 4.0], + "B2010": [5.0, 6.0], + "A": ["X1", "X2"], + } + ) + msg = "stubname can't be identical to a column name" + with pytest.raises(ValueError, match=msg): + wide_to_long(df, ["A", "B"], i="A", j="colname") + + def test_nonnumeric_suffix(self): + df = pd.DataFrame( + { + "treatment_placebo": [1.0, 2.0], + "treatment_test": [3.0, 4.0], + "result_placebo": [5.0, 6.0], + "A": ["X1", "X2"], + } + ) + expected = pd.DataFrame( + { + "A": ["X1", "X1", "X2", "X2"], + "colname": ["placebo", "test", "placebo", "test"], + "result": [5.0, np.nan, 6.0, np.nan], + "treatment": [1.0, 3.0, 2.0, 4.0], + } + ) + expected = expected.set_index(["A", "colname"]) + result = wide_to_long( + df, ["result", "treatment"], i="A", j="colname", suffix="[a-z]+", sep="_" + ) + tm.assert_frame_equal(result, expected) + + def test_mixed_type_suffix(self): + df = pd.DataFrame( + { + "A": ["X1", "X2"], + "result_1": [0, 9], + "result_foo": [5.0, 6.0], + "treatment_1": [1.0, 2.0], + "treatment_foo": [3.0, 4.0], + } + ) + expected = pd.DataFrame( + { + "A": ["X1", "X2", "X1", "X2"], + "colname": ["1", "1", "foo", "foo"], + "result": [0.0, 9.0, 5.0, 6.0], + "treatment": [1.0, 2.0, 3.0, 4.0], + } + ).set_index(["A", "colname"]) + result = wide_to_long( + df, ["result", "treatment"], i="A", j="colname", suffix=".+", sep="_" + ) + tm.assert_frame_equal(result, expected) + + def test_float_suffix(self): + df = pd.DataFrame( + { + "treatment_1.1": [1.0, 2.0], + "treatment_2.1": [3.0, 4.0], + "result_1.2": [5.0, 6.0], + "result_1": [0, 9], + "A": ["X1", "X2"], + } + ) + expected = pd.DataFrame( + { + "A": ["X1", "X1", "X1", "X1", "X2", "X2", "X2", "X2"], + "colname": [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1], + "result": [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan], + "treatment": [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0], + } + ) + expected = expected.set_index(["A", "colname"]) + result = wide_to_long( + df, ["result", "treatment"], i="A", j="colname", suffix="[0-9.]+", sep="_" + ) + tm.assert_frame_equal(result, expected) + + def test_col_substring_of_stubname(self): + # GH22468 + # Don't raise ValueError when a column name is a substring + # of a stubname that's been passed as a string + wide_data = { + "node_id": {0: 0, 1: 1, 2: 2, 3: 3, 4: 4}, + "A": {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81}, + "PA0": {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6}, + "PA1": {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67}, + "PA3": {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67}, + } + wide_df = pd.DataFrame.from_dict(wide_data) + expected = pd.wide_to_long( + wide_df, stubnames=["PA"], i=["node_id", "A"], j="time" + ) + result = pd.wide_to_long(wide_df, stubnames="PA", i=["node_id", "A"], j="time") + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py new file mode 100644 index 00000000..743fc50c --- /dev/null +++ b/pandas/tests/reshape/test_pivot.py @@ -0,0 +1,2643 @@ +from datetime import date, datetime, timedelta +from itertools import product + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Grouper, + Index, + MultiIndex, + Series, + concat, + date_range, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT +from pandas.core.reshape.pivot import crosstab, pivot_table + + +@pytest.fixture(params=[True, False]) +def dropna(request): + return request.param + + +@pytest.fixture(params=[([0] * 4, [1] * 4), (range(0, 3), range(1, 4))]) +def interval_values(request, closed): + left, right = request.param + return Categorical(pd.IntervalIndex.from_arrays(left, right, closed)) + + +class TestPivotTable: + def setup_method(self, method): + self.data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + def test_pivot_table(self, observed): + index = ["A", "B"] + columns = "C" + table = pivot_table( + self.data, values="D", index=index, columns=columns, observed=observed + ) + + table2 = self.data.pivot_table( + values="D", index=index, columns=columns, observed=observed + ) + tm.assert_frame_equal(table, table2) + + # this works + pivot_table(self.data, values="D", index=index, observed=observed) + + if len(index) > 1: + assert table.index.names == tuple(index) + else: + assert table.index.name == index[0] + + if len(columns) > 1: + assert table.columns.names == columns + else: + assert table.columns.name == columns[0] + + expected = self.data.groupby(index + [columns])["D"].agg(np.mean).unstack() + tm.assert_frame_equal(table, expected) + + def test_pivot_table_categorical_observed_equal(self, observed): + # issue #24923 + df = pd.DataFrame( + {"col1": list("abcde"), "col2": list("fghij"), "col3": [1, 2, 3, 4, 5]} + ) + + expected = df.pivot_table( + index="col1", values="col3", columns="col2", aggfunc=np.sum, fill_value=0 + ) + + expected.index = expected.index.astype("category") + expected.columns = expected.columns.astype("category") + + df.col1 = df.col1.astype("category") + df.col2 = df.col2.astype("category") + + result = df.pivot_table( + index="col1", + values="col3", + columns="col2", + aggfunc=np.sum, + fill_value=0, + observed=observed, + ) + + tm.assert_frame_equal(result, expected) + + def test_pivot_table_nocols(self): + df = DataFrame( + {"rows": ["a", "b", "c"], "cols": ["x", "y", "z"], "values": [1, 2, 3]} + ) + rs = df.pivot_table(columns="cols", aggfunc=np.sum) + xp = df.pivot_table(index="cols", aggfunc=np.sum).T + tm.assert_frame_equal(rs, xp) + + rs = df.pivot_table(columns="cols", aggfunc={"values": "mean"}) + xp = df.pivot_table(index="cols", aggfunc={"values": "mean"}).T + tm.assert_frame_equal(rs, xp) + + def test_pivot_table_dropna(self): + df = DataFrame( + { + "amount": {0: 60000, 1: 100000, 2: 50000, 3: 30000}, + "customer": {0: "A", 1: "A", 2: "B", 3: "C"}, + "month": {0: 201307, 1: 201309, 2: 201308, 3: 201310}, + "product": {0: "a", 1: "b", 2: "c", 3: "d"}, + "quantity": {0: 2000000, 1: 500000, 2: 1000000, 3: 1000000}, + } + ) + pv_col = df.pivot_table( + "quantity", "month", ["customer", "product"], dropna=False + ) + pv_ind = df.pivot_table( + "quantity", ["customer", "product"], "month", dropna=False + ) + + m = MultiIndex.from_tuples( + [ + ("A", "a"), + ("A", "b"), + ("A", "c"), + ("A", "d"), + ("B", "a"), + ("B", "b"), + ("B", "c"), + ("B", "d"), + ("C", "a"), + ("C", "b"), + ("C", "c"), + ("C", "d"), + ], + names=["customer", "product"], + ) + tm.assert_index_equal(pv_col.columns, m) + tm.assert_index_equal(pv_ind.index, m) + + def test_pivot_table_categorical(self): + + cat1 = Categorical( + ["a", "a", "b", "b"], categories=["a", "b", "z"], ordered=True + ) + cat2 = Categorical( + ["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True + ) + df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) + result = pd.pivot_table(df, values="values", index=["A", "B"], dropna=True) + + exp_index = pd.MultiIndex.from_arrays([cat1, cat2], names=["A", "B"]) + expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index) + tm.assert_frame_equal(result, expected) + + def test_pivot_table_dropna_categoricals(self, dropna): + # GH 15193 + categories = ["a", "b", "c", "d"] + + df = DataFrame( + { + "A": ["a", "a", "a", "b", "b", "b", "c", "c", "c"], + "B": [1, 2, 3, 1, 2, 3, 1, 2, 3], + "C": range(0, 9), + } + ) + + df["A"] = df["A"].astype(CDT(categories, ordered=False)) + result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna) + expected_columns = Series(["a", "b", "c"], name="A") + expected_columns = expected_columns.astype(CDT(categories, ordered=False)) + expected_index = Series([1, 2, 3], name="B") + expected = DataFrame( + [[0, 3, 6], [1, 4, 7], [2, 5, 8]], + index=expected_index, + columns=expected_columns, + ) + if not dropna: + # add back the non observed to compare + expected = expected.reindex(columns=Categorical(categories)).astype("float") + + tm.assert_frame_equal(result, expected) + + def test_pivot_with_non_observable_dropna(self, dropna): + # gh-21133 + df = pd.DataFrame( + { + "A": pd.Categorical( + [np.nan, "low", "high", "low", "high"], + categories=["low", "high"], + ordered=True, + ), + "B": range(5), + } + ) + + result = df.pivot_table(index="A", values="B", dropna=dropna) + expected = pd.DataFrame( + {"B": [2, 3]}, + index=pd.Index( + pd.Categorical.from_codes( + [0, 1], categories=["low", "high"], ordered=True + ), + name="A", + ), + ) + + tm.assert_frame_equal(result, expected) + + # gh-21378 + df = pd.DataFrame( + { + "A": pd.Categorical( + ["left", "low", "high", "low", "high"], + categories=["low", "high", "left"], + ordered=True, + ), + "B": range(5), + } + ) + + result = df.pivot_table(index="A", values="B", dropna=dropna) + expected = pd.DataFrame( + {"B": [2, 3, 0]}, + index=pd.Index( + pd.Categorical.from_codes( + [0, 1, 2], categories=["low", "high", "left"], ordered=True + ), + name="A", + ), + ) + + tm.assert_frame_equal(result, expected) + + def test_pivot_with_interval_index(self, interval_values, dropna): + # GH 25814 + df = DataFrame({"A": interval_values, "B": 1}) + result = df.pivot_table(index="A", values="B", dropna=dropna) + expected = DataFrame({"B": 1}, index=Index(interval_values.unique(), name="A")) + tm.assert_frame_equal(result, expected) + + def test_pivot_with_interval_index_margins(self): + # GH 25815 + ordered_cat = pd.IntervalIndex.from_arrays([0, 0, 1, 1], [1, 1, 2, 2]) + df = DataFrame( + { + "A": np.arange(4, 0, -1, dtype=np.intp), + "B": ["a", "b", "a", "b"], + "C": pd.Categorical(ordered_cat, ordered=True).sort_values( + ascending=False + ), + } + ) + + pivot_tab = pd.pivot_table( + df, index="C", columns="B", values="A", aggfunc="sum", margins=True + ) + + result = pivot_tab["All"] + expected = Series( + [3, 7, 10], + index=Index([pd.Interval(0, 1), pd.Interval(1, 2), "All"], name="C"), + name="All", + dtype=np.intp, + ) + tm.assert_series_equal(result, expected) + + def test_pass_array(self): + result = self.data.pivot_table("D", index=self.data.A, columns=self.data.C) + expected = self.data.pivot_table("D", index="A", columns="C") + tm.assert_frame_equal(result, expected) + + def test_pass_function(self): + result = self.data.pivot_table("D", index=lambda x: x // 5, columns=self.data.C) + expected = self.data.pivot_table("D", index=self.data.index // 5, columns="C") + tm.assert_frame_equal(result, expected) + + def test_pivot_table_multiple(self): + index = ["A", "B"] + columns = "C" + table = pivot_table(self.data, index=index, columns=columns) + expected = self.data.groupby(index + [columns]).agg(np.mean).unstack() + tm.assert_frame_equal(table, expected) + + def test_pivot_dtypes(self): + + # can convert dtypes + f = DataFrame( + { + "a": ["cat", "bat", "cat", "bat"], + "v": [1, 2, 3, 4], + "i": ["a", "b", "a", "b"], + } + ) + assert f.dtypes["v"] == "int64" + + z = pivot_table( + f, values="v", index=["a"], columns=["i"], fill_value=0, aggfunc=np.sum + ) + result = z.dtypes + expected = Series([np.dtype("int64")] * 2, index=Index(list("ab"), name="i")) + tm.assert_series_equal(result, expected) + + # cannot convert dtypes + f = DataFrame( + { + "a": ["cat", "bat", "cat", "bat"], + "v": [1.5, 2.5, 3.5, 4.5], + "i": ["a", "b", "a", "b"], + } + ) + assert f.dtypes["v"] == "float64" + + z = pivot_table( + f, values="v", index=["a"], columns=["i"], fill_value=0, aggfunc=np.mean + ) + result = z.dtypes + expected = Series([np.dtype("float64")] * 2, index=Index(list("ab"), name="i")) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "columns,values", + [ + ("bool1", ["float1", "float2"]), + ("bool1", ["float1", "float2", "bool1"]), + ("bool2", ["float1", "float2", "bool1"]), + ], + ) + def test_pivot_preserve_dtypes(self, columns, values): + # GH 7142 regression test + v = np.arange(5, dtype=np.float64) + df = DataFrame( + {"float1": v, "float2": v + 2.0, "bool1": v <= 2, "bool2": v <= 3} + ) + + df_res = df.reset_index().pivot_table( + index="index", columns=columns, values=values + ) + + result = dict(df_res.dtypes) + expected = { + col: np.dtype("O") if col[0].startswith("b") else np.dtype("float64") + for col in df_res + } + assert result == expected + + def test_pivot_no_values(self): + # GH 14380 + idx = pd.DatetimeIndex( + ["2011-01-01", "2011-02-01", "2011-01-02", "2011-01-01", "2011-01-02"] + ) + df = pd.DataFrame({"A": [1, 2, 3, 4, 5]}, index=idx) + res = df.pivot_table(index=df.index.month, columns=df.index.day) + + exp_columns = pd.MultiIndex.from_tuples([("A", 1), ("A", 2)]) + exp = pd.DataFrame( + [[2.5, 4.0], [2.0, np.nan]], index=[1, 2], columns=exp_columns + ) + tm.assert_frame_equal(res, exp) + + df = pd.DataFrame( + { + "A": [1, 2, 3, 4, 5], + "dt": pd.date_range("2011-01-01", freq="D", periods=5), + }, + index=idx, + ) + res = df.pivot_table( + index=df.index.month, columns=pd.Grouper(key="dt", freq="M") + ) + exp_columns = pd.MultiIndex.from_tuples([("A", pd.Timestamp("2011-01-31"))]) + exp_columns.names = [None, "dt"] + exp = pd.DataFrame([3.25, 2.0], index=[1, 2], columns=exp_columns) + tm.assert_frame_equal(res, exp) + + res = df.pivot_table( + index=pd.Grouper(freq="A"), columns=pd.Grouper(key="dt", freq="M") + ) + exp = pd.DataFrame( + [3], index=pd.DatetimeIndex(["2011-12-31"]), columns=exp_columns + ) + tm.assert_frame_equal(res, exp) + + def test_pivot_multi_values(self): + result = pivot_table( + self.data, values=["D", "E"], index="A", columns=["B", "C"], fill_value=0 + ) + expected = pivot_table( + self.data.drop(["F"], axis=1), index="A", columns=["B", "C"], fill_value=0 + ) + tm.assert_frame_equal(result, expected) + + def test_pivot_multi_functions(self): + f = lambda func: pivot_table( + self.data, values=["D", "E"], index=["A", "B"], columns="C", aggfunc=func + ) + result = f([np.mean, np.std]) + means = f(np.mean) + stds = f(np.std) + expected = concat([means, stds], keys=["mean", "std"], axis=1) + tm.assert_frame_equal(result, expected) + + # margins not supported?? + f = lambda func: pivot_table( + self.data, + values=["D", "E"], + index=["A", "B"], + columns="C", + aggfunc=func, + margins=True, + ) + result = f([np.mean, np.std]) + means = f(np.mean) + stds = f(np.std) + expected = concat([means, stds], keys=["mean", "std"], axis=1) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_index_with_nan(self, method): + # GH 3588 + nan = np.nan + df = DataFrame( + { + "a": ["R1", "R2", nan, "R4"], + "b": ["C1", "C2", "C3", "C4"], + "c": [10, 15, 17, 20], + } + ) + if method: + result = df.pivot("a", "b", "c") + else: + result = pd.pivot(df, "a", "b", "c") + expected = DataFrame( + [ + [nan, nan, 17, nan], + [10, nan, nan, nan], + [nan, 15, nan, nan], + [nan, nan, nan, 20], + ], + index=Index([nan, "R1", "R2", "R4"], name="a"), + columns=Index(["C1", "C2", "C3", "C4"], name="b"), + ) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(df.pivot("b", "a", "c"), expected.T) + + # GH9491 + df = DataFrame( + { + "a": pd.date_range("2014-02-01", periods=6, freq="D"), + "c": 100 + np.arange(6), + } + ) + df["b"] = df["a"] - pd.Timestamp("2014-02-02") + df.loc[1, "a"] = df.loc[3, "a"] = nan + df.loc[1, "b"] = df.loc[4, "b"] = nan + + if method: + pv = df.pivot("a", "b", "c") + else: + pv = pd.pivot(df, "a", "b", "c") + assert pv.notna().values.sum() == len(df) + + for _, row in df.iterrows(): + assert pv.loc[row["a"], row["b"]] == row["c"] + + if method: + result = df.pivot("b", "a", "c") + else: + result = pd.pivot(df, "b", "a", "c") + tm.assert_frame_equal(result, pv.T) + + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_with_tz(self, method): + # GH 5878 + df = DataFrame( + { + "dt1": [ + datetime(2013, 1, 1, 9, 0), + datetime(2013, 1, 2, 9, 0), + datetime(2013, 1, 1, 9, 0), + datetime(2013, 1, 2, 9, 0), + ], + "dt2": [ + datetime(2014, 1, 1, 9, 0), + datetime(2014, 1, 1, 9, 0), + datetime(2014, 1, 2, 9, 0), + datetime(2014, 1, 2, 9, 0), + ], + "data1": np.arange(4, dtype="int64"), + "data2": np.arange(4, dtype="int64"), + } + ) + + df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d, tz="US/Pacific")) + df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d, tz="Asia/Tokyo")) + + exp_col1 = Index(["data1", "data1", "data2", "data2"]) + exp_col2 = pd.DatetimeIndex( + ["2014/01/01 09:00", "2014/01/02 09:00"] * 2, name="dt2", tz="Asia/Tokyo" + ) + exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2]) + expected = DataFrame( + [[0, 2, 0, 2], [1, 3, 1, 3]], + index=pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" + ), + columns=exp_col, + ) + + if method: + pv = df.pivot(index="dt1", columns="dt2") + else: + pv = pd.pivot(df, index="dt1", columns="dt2") + tm.assert_frame_equal(pv, expected) + + expected = DataFrame( + [[0, 2], [1, 3]], + index=pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" + ), + columns=pd.DatetimeIndex( + ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" + ), + ) + + if method: + pv = df.pivot(index="dt1", columns="dt2", values="data1") + else: + pv = pd.pivot(df, index="dt1", columns="dt2", values="data1") + tm.assert_frame_equal(pv, expected) + + def test_pivot_tz_in_values(self): + # GH 14948 + df = pd.DataFrame( + [ + { + "uid": "aa", + "ts": pd.Timestamp("2016-08-12 13:00:00-0700", tz="US/Pacific"), + }, + { + "uid": "aa", + "ts": pd.Timestamp("2016-08-12 08:00:00-0700", tz="US/Pacific"), + }, + { + "uid": "aa", + "ts": pd.Timestamp("2016-08-12 14:00:00-0700", tz="US/Pacific"), + }, + { + "uid": "aa", + "ts": pd.Timestamp("2016-08-25 11:00:00-0700", tz="US/Pacific"), + }, + { + "uid": "aa", + "ts": pd.Timestamp("2016-08-25 13:00:00-0700", tz="US/Pacific"), + }, + ] + ) + + df = df.set_index("ts").reset_index() + mins = df.ts.map(lambda x: x.replace(hour=0, minute=0, second=0, microsecond=0)) + + result = pd.pivot_table( + df.set_index("ts").reset_index(), + values="ts", + index=["uid"], + columns=[mins], + aggfunc=np.min, + ) + expected = pd.DataFrame( + [ + [ + pd.Timestamp("2016-08-12 08:00:00-0700", tz="US/Pacific"), + pd.Timestamp("2016-08-25 11:00:00-0700", tz="US/Pacific"), + ] + ], + index=pd.Index(["aa"], name="uid"), + columns=pd.DatetimeIndex( + [ + pd.Timestamp("2016-08-12 00:00:00", tz="US/Pacific"), + pd.Timestamp("2016-08-25 00:00:00", tz="US/Pacific"), + ], + name="ts", + ), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_periods(self, method): + df = DataFrame( + { + "p1": [ + pd.Period("2013-01-01", "D"), + pd.Period("2013-01-02", "D"), + pd.Period("2013-01-01", "D"), + pd.Period("2013-01-02", "D"), + ], + "p2": [ + pd.Period("2013-01", "M"), + pd.Period("2013-01", "M"), + pd.Period("2013-02", "M"), + pd.Period("2013-02", "M"), + ], + "data1": np.arange(4, dtype="int64"), + "data2": np.arange(4, dtype="int64"), + } + ) + + exp_col1 = Index(["data1", "data1", "data2", "data2"]) + exp_col2 = pd.PeriodIndex(["2013-01", "2013-02"] * 2, name="p2", freq="M") + exp_col = pd.MultiIndex.from_arrays([exp_col1, exp_col2]) + expected = DataFrame( + [[0, 2, 0, 2], [1, 3, 1, 3]], + index=pd.PeriodIndex(["2013-01-01", "2013-01-02"], name="p1", freq="D"), + columns=exp_col, + ) + if method: + pv = df.pivot(index="p1", columns="p2") + else: + pv = pd.pivot(df, index="p1", columns="p2") + tm.assert_frame_equal(pv, expected) + + expected = DataFrame( + [[0, 2], [1, 3]], + index=pd.PeriodIndex(["2013-01-01", "2013-01-02"], name="p1", freq="D"), + columns=pd.PeriodIndex(["2013-01", "2013-02"], name="p2", freq="M"), + ) + if method: + pv = df.pivot(index="p1", columns="p2", values="data1") + else: + pv = pd.pivot(df, index="p1", columns="p2", values="data1") + tm.assert_frame_equal(pv, expected) + + def test_pivot_periods_with_margins(self): + # GH 28323 + df = DataFrame( + { + "a": [1, 1, 2, 2], + "b": [ + pd.Period("2019Q1"), + pd.Period("2019Q2"), + pd.Period("2019Q1"), + pd.Period("2019Q2"), + ], + "x": 1.0, + } + ) + + expected = DataFrame( + data=1.0, + index=pd.Index([1, 2, "All"], name="a"), + columns=pd.Index( + [pd.Period("2019Q1"), pd.Period("2019Q2"), "All"], name="b" + ), + ) + + result = df.pivot_table(index="a", columns="b", values="x", margins=True) + tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "values", + [ + ["baz", "zoo"], + np.array(["baz", "zoo"]), + pd.Series(["baz", "zoo"]), + pd.Index(["baz", "zoo"]), + ], + ) + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_with_list_like_values(self, values, method): + # issue #17160 + df = pd.DataFrame( + { + "foo": ["one", "one", "one", "two", "two", "two"], + "bar": ["A", "B", "C", "A", "B", "C"], + "baz": [1, 2, 3, 4, 5, 6], + "zoo": ["x", "y", "z", "q", "w", "t"], + } + ) + + if method: + result = df.pivot(index="foo", columns="bar", values=values) + else: + result = pd.pivot(df, index="foo", columns="bar", values=values) + + data = [[1, 2, 3, "x", "y", "z"], [4, 5, 6, "q", "w", "t"]] + index = Index(data=["one", "two"], name="foo") + columns = MultiIndex( + levels=[["baz", "zoo"], ["A", "B", "C"]], + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + names=[None, "bar"], + ) + expected = DataFrame(data=data, index=index, columns=columns, dtype="object") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "values", + [ + ["bar", "baz"], + np.array(["bar", "baz"]), + pd.Series(["bar", "baz"]), + pd.Index(["bar", "baz"]), + ], + ) + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_with_list_like_values_nans(self, values, method): + # issue #17160 + df = pd.DataFrame( + { + "foo": ["one", "one", "one", "two", "two", "two"], + "bar": ["A", "B", "C", "A", "B", "C"], + "baz": [1, 2, 3, 4, 5, 6], + "zoo": ["x", "y", "z", "q", "w", "t"], + } + ) + + if method: + result = df.pivot(index="zoo", columns="foo", values=values) + else: + result = pd.pivot(df, index="zoo", columns="foo", values=values) + + data = [ + [np.nan, "A", np.nan, 4], + [np.nan, "C", np.nan, 6], + [np.nan, "B", np.nan, 5], + ["A", np.nan, 1, np.nan], + ["B", np.nan, 2, np.nan], + ["C", np.nan, 3, np.nan], + ] + index = Index(data=["q", "t", "w", "x", "y", "z"], name="zoo") + columns = MultiIndex( + levels=[["bar", "baz"], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[None, "foo"], + ) + expected = DataFrame(data=data, index=index, columns=columns, dtype="object") + tm.assert_frame_equal(result, expected) + + @pytest.mark.xfail( + reason="MultiIndexed unstack with tuple names fails with KeyError GH#19966" + ) + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_with_multiindex(self, method): + # issue #17160 + index = Index(data=[0, 1, 2, 3, 4, 5]) + data = [ + ["one", "A", 1, "x"], + ["one", "B", 2, "y"], + ["one", "C", 3, "z"], + ["two", "A", 4, "q"], + ["two", "B", 5, "w"], + ["two", "C", 6, "t"], + ] + columns = MultiIndex( + levels=[["bar", "baz"], ["first", "second"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + ) + df = DataFrame(data=data, index=index, columns=columns, dtype="object") + if method: + result = df.pivot( + index=("bar", "first"), + columns=("bar", "second"), + values=("baz", "first"), + ) + else: + result = pd.pivot( + df, + index=("bar", "first"), + columns=("bar", "second"), + values=("baz", "first"), + ) + + data = { + "A": Series([1, 4], index=["one", "two"]), + "B": Series([2, 5], index=["one", "two"]), + "C": Series([3, 6], index=["one", "two"]), + } + expected = DataFrame(data) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("method", [True, False]) + def test_pivot_with_tuple_of_values(self, method): + # issue #17160 + df = pd.DataFrame( + { + "foo": ["one", "one", "one", "two", "two", "two"], + "bar": ["A", "B", "C", "A", "B", "C"], + "baz": [1, 2, 3, 4, 5, 6], + "zoo": ["x", "y", "z", "q", "w", "t"], + } + ) + with pytest.raises(KeyError, match=r"^\('bar', 'baz'\)$"): + # tuple is seen as a single column name + if method: + df.pivot(index="zoo", columns="foo", values=("bar", "baz")) + else: + pd.pivot(df, index="zoo", columns="foo", values=("bar", "baz")) + + def test_margins(self): + def _check_output( + result, values_col, index=["A", "B"], columns=["C"], margins_col="All" + ): + col_margins = result.loc[result.index[:-1], margins_col] + expected_col_margins = self.data.groupby(index)[values_col].mean() + tm.assert_series_equal(col_margins, expected_col_margins, check_names=False) + assert col_margins.name == margins_col + + result = result.sort_index() + index_margins = result.loc[(margins_col, "")].iloc[:-1] + + expected_ix_margins = self.data.groupby(columns)[values_col].mean() + tm.assert_series_equal( + index_margins, expected_ix_margins, check_names=False + ) + assert index_margins.name == (margins_col, "") + + grand_total_margins = result.loc[(margins_col, ""), margins_col] + expected_total_margins = self.data[values_col].mean() + assert grand_total_margins == expected_total_margins + + # column specified + result = self.data.pivot_table( + values="D", index=["A", "B"], columns="C", margins=True, aggfunc=np.mean + ) + _check_output(result, "D") + + # Set a different margins_name (not 'All') + result = self.data.pivot_table( + values="D", + index=["A", "B"], + columns="C", + margins=True, + aggfunc=np.mean, + margins_name="Totals", + ) + _check_output(result, "D", margins_col="Totals") + + # no column specified + table = self.data.pivot_table( + index=["A", "B"], columns="C", margins=True, aggfunc=np.mean + ) + for value_col in table.columns.levels[0]: + _check_output(table[value_col], value_col) + + # no col + + # to help with a buglet + self.data.columns = [k * 2 for k in self.data.columns] + table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc=np.mean) + for value_col in table.columns: + totals = table.loc[("All", ""), value_col] + assert totals == self.data[value_col].mean() + + # no rows + rtable = self.data.pivot_table( + columns=["AA", "BB"], margins=True, aggfunc=np.mean + ) + assert isinstance(rtable, Series) + + table = self.data.pivot_table(index=["AA", "BB"], margins=True, aggfunc="mean") + for item in ["DD", "EE", "FF"]: + totals = table.loc[("All", ""), item] + assert totals == self.data[item].mean() + + def test_margins_dtype(self): + # GH 17013 + + df = self.data.copy() + df[["D", "E", "F"]] = np.arange(len(df) * 3).reshape(len(df), 3) + + mi_val = list(product(["bar", "foo"], ["one", "two"])) + [("All", "")] + mi = MultiIndex.from_tuples(mi_val, names=("A", "B")) + expected = DataFrame( + {"dull": [12, 21, 3, 9, 45], "shiny": [33, 0, 36, 51, 120]}, index=mi + ).rename_axis("C", axis=1) + expected["All"] = expected["dull"] + expected["shiny"] + + result = df.pivot_table( + values="D", + index=["A", "B"], + columns="C", + margins=True, + aggfunc=np.sum, + fill_value=0, + ) + + tm.assert_frame_equal(expected, result) + + @pytest.mark.xfail(reason="GH#17035 (len of floats is casted back to floats)") + def test_margins_dtype_len(self): + mi_val = list(product(["bar", "foo"], ["one", "two"])) + [("All", "")] + mi = MultiIndex.from_tuples(mi_val, names=("A", "B")) + expected = DataFrame( + {"dull": [1, 1, 2, 1, 5], "shiny": [2, 0, 2, 2, 6]}, index=mi + ).rename_axis("C", axis=1) + expected["All"] = expected["dull"] + expected["shiny"] + + result = self.data.pivot_table( + values="D", + index=["A", "B"], + columns="C", + margins=True, + aggfunc=len, + fill_value=0, + ) + + tm.assert_frame_equal(expected, result) + + def test_pivot_integer_columns(self): + # caused by upstream bug in unstack + + d = date.min + data = list( + product( + ["foo", "bar"], + ["A", "B", "C"], + ["x1", "x2"], + [d + timedelta(i) for i in range(20)], + [1.0], + ) + ) + df = DataFrame(data) + table = df.pivot_table(values=4, index=[0, 1, 3], columns=[2]) + + df2 = df.rename(columns=str) + table2 = df2.pivot_table(values="4", index=["0", "1", "3"], columns=["2"]) + + tm.assert_frame_equal(table, table2, check_names=False) + + def test_pivot_no_level_overlap(self): + # GH #1181 + + data = DataFrame( + { + "a": ["a", "a", "a", "a", "b", "b", "b", "b"] * 2, + "b": [0, 0, 0, 0, 1, 1, 1, 1] * 2, + "c": (["foo"] * 4 + ["bar"] * 4) * 2, + "value": np.random.randn(16), + } + ) + + table = data.pivot_table("value", index="a", columns=["b", "c"]) + + grouped = data.groupby(["a", "b", "c"])["value"].mean() + expected = grouped.unstack("b").unstack("c").dropna(axis=1, how="all") + tm.assert_frame_equal(table, expected) + + def test_pivot_columns_lexsorted(self): + + n = 10000 + + dtype = np.dtype( + [ + ("Index", object), + ("Symbol", object), + ("Year", int), + ("Month", int), + ("Day", int), + ("Quantity", int), + ("Price", float), + ] + ) + + products = np.array( + [ + ("SP500", "ADBE"), + ("SP500", "NVDA"), + ("SP500", "ORCL"), + ("NDQ100", "AAPL"), + ("NDQ100", "MSFT"), + ("NDQ100", "GOOG"), + ("FTSE", "DGE.L"), + ("FTSE", "TSCO.L"), + ("FTSE", "GSK.L"), + ], + dtype=[("Index", object), ("Symbol", object)], + ) + items = np.empty(n, dtype=dtype) + iproduct = np.random.randint(0, len(products), n) + items["Index"] = products["Index"][iproduct] + items["Symbol"] = products["Symbol"][iproduct] + dr = pd.date_range(date(2000, 1, 1), date(2010, 12, 31)) + dates = dr[np.random.randint(0, len(dr), n)] + items["Year"] = dates.year + items["Month"] = dates.month + items["Day"] = dates.day + items["Price"] = np.random.lognormal(4.0, 2.0, n) + + df = DataFrame(items) + + pivoted = df.pivot_table( + "Price", + index=["Month", "Day"], + columns=["Index", "Symbol", "Year"], + aggfunc="mean", + ) + + assert pivoted.columns.is_monotonic + + def test_pivot_complex_aggfunc(self): + f = {"D": ["std"], "E": ["sum"]} + expected = self.data.groupby(["A", "B"]).agg(f).unstack("B") + result = self.data.pivot_table(index="A", columns="B", aggfunc=f) + + tm.assert_frame_equal(result, expected) + + def test_margins_no_values_no_cols(self): + # Regression test on pivot table: no values or cols passed. + result = self.data[["A", "B"]].pivot_table( + index=["A", "B"], aggfunc=len, margins=True + ) + result_list = result.tolist() + assert sum(result_list[:-1]) == result_list[-1] + + def test_margins_no_values_two_rows(self): + # Regression test on pivot table: no values passed but rows are a + # multi-index + result = self.data[["A", "B", "C"]].pivot_table( + index=["A", "B"], columns="C", aggfunc=len, margins=True + ) + assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0] + + def test_margins_no_values_one_row_one_col(self): + # Regression test on pivot table: no values passed but row and col + # defined + result = self.data[["A", "B"]].pivot_table( + index="A", columns="B", aggfunc=len, margins=True + ) + assert result.All.tolist() == [4.0, 7.0, 11.0] + + def test_margins_no_values_two_row_two_cols(self): + # Regression test on pivot table: no values passed but rows and cols + # are multi-indexed + self.data["D"] = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"] + result = self.data[["A", "B", "C", "D"]].pivot_table( + index=["A", "B"], columns=["C", "D"], aggfunc=len, margins=True + ) + assert result.All.tolist() == [3.0, 1.0, 4.0, 3.0, 11.0] + + @pytest.mark.parametrize("margin_name", ["foo", "one", 666, None, ["a", "b"]]) + def test_pivot_table_with_margins_set_margin_name(self, margin_name): + # see gh-3335 + msg = ( + r'Conflicting name "{}" in margins|' + "margins_name argument must be a string" + ).format(margin_name) + with pytest.raises(ValueError, match=msg): + # multi-index index + pivot_table( + self.data, + values="D", + index=["A", "B"], + columns=["C"], + margins=True, + margins_name=margin_name, + ) + with pytest.raises(ValueError, match=msg): + # multi-index column + pivot_table( + self.data, + values="D", + index=["C"], + columns=["A", "B"], + margins=True, + margins_name=margin_name, + ) + with pytest.raises(ValueError, match=msg): + # non-multi-index index/column + pivot_table( + self.data, + values="D", + index=["A"], + columns=["B"], + margins=True, + margins_name=margin_name, + ) + + def test_pivot_timegrouper(self): + df = DataFrame( + { + "Branch": "A A A A A A A B".split(), + "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 1, 8, 1, 9, 3], + "Date": [ + datetime(2013, 1, 1), + datetime(2013, 1, 1), + datetime(2013, 10, 1), + datetime(2013, 10, 2), + datetime(2013, 10, 1), + datetime(2013, 10, 2), + datetime(2013, 12, 2), + datetime(2013, 12, 2), + ], + } + ).set_index("Date") + + expected = DataFrame( + np.array([10, 18, 3], dtype="int64").reshape(1, 3), + index=[datetime(2013, 12, 31)], + columns="Carl Joe Mark".split(), + ) + expected.index.name = "Date" + expected.columns.name = "Buyer" + + result = pivot_table( + df, + index=Grouper(freq="A"), + columns="Buyer", + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index="Buyer", + columns=Grouper(freq="A"), + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected.T) + + expected = DataFrame( + np.array([1, np.nan, 3, 9, 18, np.nan]).reshape(2, 3), + index=[datetime(2013, 1, 1), datetime(2013, 7, 1)], + columns="Carl Joe Mark".split(), + ) + expected.index.name = "Date" + expected.columns.name = "Buyer" + + result = pivot_table( + df, + index=Grouper(freq="6MS"), + columns="Buyer", + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index="Buyer", + columns=Grouper(freq="6MS"), + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected.T) + + # passing the name + df = df.reset_index() + result = pivot_table( + df, + index=Grouper(freq="6MS", key="Date"), + columns="Buyer", + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index="Buyer", + columns=Grouper(freq="6MS", key="Date"), + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected.T) + + msg = "'The grouper name foo is not found'" + with pytest.raises(KeyError, match=msg): + pivot_table( + df, + index=Grouper(freq="6MS", key="foo"), + columns="Buyer", + values="Quantity", + aggfunc=np.sum, + ) + with pytest.raises(KeyError, match=msg): + pivot_table( + df, + index="Buyer", + columns=Grouper(freq="6MS", key="foo"), + values="Quantity", + aggfunc=np.sum, + ) + + # passing the level + df = df.set_index("Date") + result = pivot_table( + df, + index=Grouper(freq="6MS", level="Date"), + columns="Buyer", + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index="Buyer", + columns=Grouper(freq="6MS", level="Date"), + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected.T) + + msg = "The level foo is not valid" + with pytest.raises(ValueError, match=msg): + pivot_table( + df, + index=Grouper(freq="6MS", level="foo"), + columns="Buyer", + values="Quantity", + aggfunc=np.sum, + ) + with pytest.raises(ValueError, match=msg): + pivot_table( + df, + index="Buyer", + columns=Grouper(freq="6MS", level="foo"), + values="Quantity", + aggfunc=np.sum, + ) + + # double grouper + df = DataFrame( + { + "Branch": "A A A A A A A B".split(), + "Buyer": "Carl Mark Carl Carl Joe Joe Joe Carl".split(), + "Quantity": [1, 3, 5, 1, 8, 1, 9, 3], + "Date": [ + datetime(2013, 11, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 11, 1, 20, 0), + datetime(2013, 10, 2, 10, 0), + datetime(2013, 10, 2, 12, 0), + datetime(2013, 12, 5, 14, 0), + ], + "PayDay": [ + datetime(2013, 10, 4, 0, 0), + datetime(2013, 10, 15, 13, 5), + datetime(2013, 9, 5, 20, 0), + datetime(2013, 11, 2, 10, 0), + datetime(2013, 10, 7, 20, 0), + datetime(2013, 9, 5, 10, 0), + datetime(2013, 12, 30, 12, 0), + datetime(2013, 11, 20, 14, 0), + ], + } + ) + + result = pivot_table( + df, + index=Grouper(freq="M", key="Date"), + columns=Grouper(freq="M", key="PayDay"), + values="Quantity", + aggfunc=np.sum, + ) + expected = DataFrame( + np.array( + [ + np.nan, + 3, + np.nan, + np.nan, + 6, + np.nan, + 1, + 9, + np.nan, + 9, + np.nan, + np.nan, + np.nan, + np.nan, + 3, + np.nan, + ] + ).reshape(4, 4), + index=[ + datetime(2013, 9, 30), + datetime(2013, 10, 31), + datetime(2013, 11, 30), + datetime(2013, 12, 31), + ], + columns=[ + datetime(2013, 9, 30), + datetime(2013, 10, 31), + datetime(2013, 11, 30), + datetime(2013, 12, 31), + ], + ) + expected.index.name = "Date" + expected.columns.name = "PayDay" + + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index=Grouper(freq="M", key="PayDay"), + columns=Grouper(freq="M", key="Date"), + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected.T) + + tuples = [ + (datetime(2013, 9, 30), datetime(2013, 10, 31)), + (datetime(2013, 10, 31), datetime(2013, 9, 30)), + (datetime(2013, 10, 31), datetime(2013, 11, 30)), + (datetime(2013, 10, 31), datetime(2013, 12, 31)), + (datetime(2013, 11, 30), datetime(2013, 10, 31)), + (datetime(2013, 12, 31), datetime(2013, 11, 30)), + ] + idx = MultiIndex.from_tuples(tuples, names=["Date", "PayDay"]) + expected = DataFrame( + np.array( + [3, np.nan, 6, np.nan, 1, np.nan, 9, np.nan, 9, np.nan, np.nan, 3] + ).reshape(6, 2), + index=idx, + columns=["A", "B"], + ) + expected.columns.name = "Branch" + + result = pivot_table( + df, + index=[Grouper(freq="M", key="Date"), Grouper(freq="M", key="PayDay")], + columns=["Branch"], + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index=["Branch"], + columns=[Grouper(freq="M", key="Date"), Grouper(freq="M", key="PayDay")], + values="Quantity", + aggfunc=np.sum, + ) + tm.assert_frame_equal(result, expected.T) + + def test_pivot_datetime_tz(self): + dates1 = [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ] + dates2 = [ + "2013-01-01 15:00:00", + "2013-01-01 15:00:00", + "2013-01-01 15:00:00", + "2013-02-01 15:00:00", + "2013-02-01 15:00:00", + "2013-02-01 15:00:00", + ] + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "dt1": dates1, + "dt2": dates2, + "value1": np.arange(6, dtype="int64"), + "value2": [1, 2] * 3, + } + ) + df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d, tz="US/Pacific")) + df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d, tz="Asia/Tokyo")) + + exp_idx = pd.DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + tz="US/Pacific", + name="dt1", + ) + exp_col1 = Index(["value1", "value1"]) + exp_col2 = Index(["a", "b"], name="label") + exp_col = MultiIndex.from_arrays([exp_col1, exp_col2]) + expected = DataFrame([[0, 3], [1, 4], [2, 5]], index=exp_idx, columns=exp_col) + result = pivot_table(df, index=["dt1"], columns=["label"], values=["value1"]) + tm.assert_frame_equal(result, expected) + + exp_col1 = Index(["sum", "sum", "sum", "sum", "mean", "mean", "mean", "mean"]) + exp_col2 = Index(["value1", "value1", "value2", "value2"] * 2) + exp_col3 = pd.DatetimeIndex( + ["2013-01-01 15:00:00", "2013-02-01 15:00:00"] * 4, + tz="Asia/Tokyo", + name="dt2", + ) + exp_col = MultiIndex.from_arrays([exp_col1, exp_col2, exp_col3]) + expected = DataFrame( + np.array( + [ + [0, 3, 1, 2, 0, 3, 1, 2], + [1, 4, 2, 1, 1, 4, 2, 1], + [2, 5, 1, 2, 2, 5, 1, 2], + ], + dtype="int64", + ), + index=exp_idx, + columns=exp_col, + ) + + result = pivot_table( + df, + index=["dt1"], + columns=["dt2"], + values=["value1", "value2"], + aggfunc=[np.sum, np.mean], + ) + tm.assert_frame_equal(result, expected) + + def test_pivot_dtaccessor(self): + # GH 8103 + dates1 = [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ] + dates2 = [ + "2013-01-01 15:00:00", + "2013-01-01 15:00:00", + "2013-01-01 15:00:00", + "2013-02-01 15:00:00", + "2013-02-01 15:00:00", + "2013-02-01 15:00:00", + ] + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "dt1": dates1, + "dt2": dates2, + "value1": np.arange(6, dtype="int64"), + "value2": [1, 2] * 3, + } + ) + df["dt1"] = df["dt1"].apply(lambda d: pd.Timestamp(d)) + df["dt2"] = df["dt2"].apply(lambda d: pd.Timestamp(d)) + + result = pivot_table( + df, index="label", columns=df["dt1"].dt.hour, values="value1" + ) + + exp_idx = Index(["a", "b"], name="label") + expected = DataFrame( + {7: [0, 3], 8: [1, 4], 9: [2, 5]}, + index=exp_idx, + columns=Index([7, 8, 9], name="dt1"), + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, index=df["dt2"].dt.month, columns=df["dt1"].dt.hour, values="value1" + ) + + expected = DataFrame( + {7: [0, 3], 8: [1, 4], 9: [2, 5]}, + index=Index([1, 2], name="dt2"), + columns=Index([7, 8, 9], name="dt1"), + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index=df["dt2"].dt.year.values, + columns=[df["dt1"].dt.hour, df["dt2"].dt.month], + values="value1", + ) + + exp_col = MultiIndex.from_arrays( + [[7, 7, 8, 8, 9, 9], [1, 2] * 3], names=["dt1", "dt2"] + ) + expected = DataFrame( + np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"), index=[2013], columns=exp_col + ) + tm.assert_frame_equal(result, expected) + + result = pivot_table( + df, + index=np.array(["X", "X", "X", "X", "Y", "Y"]), + columns=[df["dt1"].dt.hour, df["dt2"].dt.month], + values="value1", + ) + expected = DataFrame( + np.array( + [[0, 3, 1, np.nan, 2, np.nan], [np.nan, np.nan, np.nan, 4, np.nan, 5]] + ), + index=["X", "Y"], + columns=exp_col, + ) + tm.assert_frame_equal(result, expected) + + def test_daily(self): + rng = date_range("1/1/2000", "12/31/2004", freq="D") + ts = Series(np.random.randn(len(rng)), index=rng) + + annual = pivot_table( + DataFrame(ts), index=ts.index.year, columns=ts.index.dayofyear + ) + annual.columns = annual.columns.droplevel(0) + + doy = np.asarray(ts.index.dayofyear) + + for i in range(1, 367): + subset = ts[doy == i] + subset.index = subset.index.year + + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + assert result.name == i + + def test_monthly(self): + rng = date_range("1/1/2000", "12/31/2004", freq="M") + ts = Series(np.random.randn(len(rng)), index=rng) + + annual = pivot_table( + pd.DataFrame(ts), index=ts.index.year, columns=ts.index.month + ) + annual.columns = annual.columns.droplevel(0) + + month = ts.index.month + for i in range(1, 13): + subset = ts[month == i] + subset.index = subset.index.year + result = annual[i].dropna() + tm.assert_series_equal(result, subset, check_names=False) + assert result.name == i + + def test_pivot_table_with_iterator_values(self): + # GH 12017 + aggs = {"D": "sum", "E": "mean"} + + pivot_values_list = pd.pivot_table( + self.data, index=["A"], values=list(aggs.keys()), aggfunc=aggs + ) + + pivot_values_keys = pd.pivot_table( + self.data, index=["A"], values=aggs.keys(), aggfunc=aggs + ) + tm.assert_frame_equal(pivot_values_keys, pivot_values_list) + + agg_values_gen = (value for value in aggs.keys()) + pivot_values_gen = pd.pivot_table( + self.data, index=["A"], values=agg_values_gen, aggfunc=aggs + ) + tm.assert_frame_equal(pivot_values_gen, pivot_values_list) + + def test_pivot_table_margins_name_with_aggfunc_list(self): + # GH 13354 + margins_name = "Weekly" + costs = pd.DataFrame( + { + "item": ["bacon", "cheese", "bacon", "cheese"], + "cost": [2.5, 4.5, 3.2, 3.3], + "day": ["M", "M", "T", "T"], + } + ) + table = costs.pivot_table( + index="item", + columns="day", + margins=True, + margins_name=margins_name, + aggfunc=[np.mean, max], + ) + ix = pd.Index(["bacon", "cheese", margins_name], dtype="object", name="item") + tups = [ + ("mean", "cost", "M"), + ("mean", "cost", "T"), + ("mean", "cost", margins_name), + ("max", "cost", "M"), + ("max", "cost", "T"), + ("max", "cost", margins_name), + ] + cols = pd.MultiIndex.from_tuples(tups, names=[None, None, "day"]) + expected = pd.DataFrame(table.values, index=ix, columns=cols) + tm.assert_frame_equal(table, expected) + + @pytest.mark.xfail(reason="GH#17035 (np.mean of ints is casted back to ints)") + def test_categorical_margins(self, observed): + # GH 10989 + df = pd.DataFrame( + {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2} + ) + + expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected.index = Index([0, 1, "All"], name="y") + expected.columns = Index([0, 1, "All"], name="z") + + table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) + tm.assert_frame_equal(table, expected) + + @pytest.mark.xfail(reason="GH#17035 (np.mean of ints is casted back to ints)") + def test_categorical_margins_category(self, observed): + df = pd.DataFrame( + {"x": np.arange(8), "y": np.arange(8) // 4, "z": np.arange(8) % 2} + ) + + expected = pd.DataFrame([[1.0, 2.0, 1.5], [5, 6, 5.5], [3, 4, 3.5]]) + expected.index = Index([0, 1, "All"], name="y") + expected.columns = Index([0, 1, "All"], name="z") + + df.y = df.y.astype("category") + df.z = df.z.astype("category") + table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) + tm.assert_frame_equal(table, expected) + + def test_margins_casted_to_float(self, observed): + # GH 24893 + df = pd.DataFrame( + { + "A": [2, 4, 6, 8], + "B": [1, 4, 5, 8], + "C": [1, 3, 4, 6], + "D": ["X", "X", "Y", "Y"], + } + ) + + result = pd.pivot_table(df, index="D", margins=True) + expected = pd.DataFrame( + {"A": [3, 7, 5], "B": [2.5, 6.5, 4.5], "C": [2, 5, 3.5]}, + index=pd.Index(["X", "Y", "All"], name="D"), + ) + tm.assert_frame_equal(result, expected) + + def test_pivot_with_categorical(self, observed, ordered_fixture): + # gh-21370 + idx = [np.nan, "low", "high", "low", np.nan] + col = [np.nan, "A", "B", np.nan, "A"] + df = pd.DataFrame( + { + "In": pd.Categorical( + idx, categories=["low", "high"], ordered=ordered_fixture + ), + "Col": pd.Categorical( + col, categories=["A", "B"], ordered=ordered_fixture + ), + "Val": range(1, 6), + } + ) + # case with index/columns/value + result = df.pivot_table( + index="In", columns="Col", values="Val", observed=observed + ) + + expected_cols = pd.CategoricalIndex( + ["A", "B"], ordered=ordered_fixture, name="Col" + ) + + expected = pd.DataFrame( + data=[[2.0, np.nan], [np.nan, 3.0]], columns=expected_cols + ) + expected.index = Index( + pd.Categorical( + ["low", "high"], categories=["low", "high"], ordered=ordered_fixture + ), + name="In", + ) + + tm.assert_frame_equal(result, expected) + + # case with columns/value + result = df.pivot_table(columns="Col", values="Val", observed=observed) + + expected = pd.DataFrame( + data=[[3.5, 3.0]], columns=expected_cols, index=Index(["Val"]) + ) + + tm.assert_frame_equal(result, expected) + + def test_categorical_aggfunc(self, observed): + # GH 9534 + df = pd.DataFrame( + {"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]} + ) + df["C1"] = df["C1"].astype("category") + result = df.pivot_table( + "V", index="C1", columns="C2", dropna=observed, aggfunc="count" + ) + + expected_index = pd.CategoricalIndex( + ["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1" + ) + expected_columns = pd.Index(["a", "b"], name="C2") + expected_data = np.array([[1.0, np.nan], [1.0, np.nan], [np.nan, 2.0]]) + expected = pd.DataFrame( + expected_data, index=expected_index, columns=expected_columns + ) + tm.assert_frame_equal(result, expected) + + def test_categorical_pivot_index_ordering(self, observed): + # GH 8731 + df = pd.DataFrame( + { + "Sales": [100, 120, 220], + "Month": ["January", "January", "January"], + "Year": [2013, 2014, 2013], + } + ) + months = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + ] + df["Month"] = df["Month"].astype("category").cat.set_categories(months) + result = df.pivot_table( + values="Sales", + index="Month", + columns="Year", + dropna=observed, + aggfunc="sum", + ) + expected_columns = pd.Int64Index([2013, 2014], name="Year") + expected_index = pd.CategoricalIndex( + ["January"], categories=months, ordered=False, name="Month" + ) + expected = pd.DataFrame( + [[320, 120]], index=expected_index, columns=expected_columns + ) + if not observed: + result = result.dropna().astype(np.int64) + + tm.assert_frame_equal(result, expected) + + def test_pivot_table_not_series(self): + # GH 4386 + # pivot_table always returns a DataFrame + # when values is not list like and columns is None + # and aggfunc is not instance of list + df = DataFrame({"col1": [3, 4, 5], "col2": ["C", "D", "E"], "col3": [1, 3, 9]}) + + result = df.pivot_table("col1", index=["col3", "col2"], aggfunc=np.sum) + m = MultiIndex.from_arrays([[1, 3, 9], ["C", "D", "E"]], names=["col3", "col2"]) + expected = DataFrame([3, 4, 5], index=m, columns=["col1"]) + + tm.assert_frame_equal(result, expected) + + result = df.pivot_table("col1", index="col3", columns="col2", aggfunc=np.sum) + expected = DataFrame( + [[3, np.NaN, np.NaN], [np.NaN, 4, np.NaN], [np.NaN, np.NaN, 5]], + index=Index([1, 3, 9], name="col3"), + columns=Index(["C", "D", "E"], name="col2"), + ) + + tm.assert_frame_equal(result, expected) + + result = df.pivot_table("col1", index="col3", aggfunc=[np.sum]) + m = MultiIndex.from_arrays([["sum"], ["col1"]]) + expected = DataFrame([3, 4, 5], index=Index([1, 3, 9], name="col3"), columns=m) + + tm.assert_frame_equal(result, expected) + + def test_pivot_margins_name_unicode(self): + # issue #13292 + greek = "\u0394\u03bf\u03ba\u03b9\u03bc\u03ae" + frame = pd.DataFrame({"foo": [1, 2, 3]}) + table = pd.pivot_table( + frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek + ) + index = pd.Index([1, 2, 3, greek], dtype="object", name="foo") + expected = pd.DataFrame(index=index) + tm.assert_frame_equal(table, expected) + + def test_pivot_string_as_func(self): + # GH #18713 + # for correctness purposes + data = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": range(11), + } + ) + + result = pivot_table(data, index="A", columns="B", aggfunc="sum") + mi = MultiIndex( + levels=[["C"], ["one", "two"]], codes=[[0, 0], [0, 1]], names=[None, "B"] + ) + expected = DataFrame( + {("C", "one"): {"bar": 15, "foo": 13}, ("C", "two"): {"bar": 7, "foo": 20}}, + columns=mi, + ).rename_axis("A") + tm.assert_frame_equal(result, expected) + + result = pivot_table(data, index="A", columns="B", aggfunc=["sum", "mean"]) + mi = MultiIndex( + levels=[["sum", "mean"], ["C"], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 0, 0, 0], [0, 1, 0, 1]], + names=[None, None, "B"], + ) + expected = DataFrame( + { + ("mean", "C", "one"): {"bar": 5.0, "foo": 3.25}, + ("mean", "C", "two"): {"bar": 7.0, "foo": 6.666666666666667}, + ("sum", "C", "one"): {"bar": 15, "foo": 13}, + ("sum", "C", "two"): {"bar": 7, "foo": 20}, + }, + columns=mi, + ).rename_axis("A") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "f, f_numpy", + [ + ("sum", np.sum), + ("mean", np.mean), + ("std", np.std), + (["sum", "mean"], [np.sum, np.mean]), + (["sum", "std"], [np.sum, np.std]), + (["std", "mean"], [np.std, np.mean]), + ], + ) + def test_pivot_string_func_vs_func(self, f, f_numpy): + # GH #18713 + # for consistency purposes + result = pivot_table(self.data, index="A", columns="B", aggfunc=f) + expected = pivot_table(self.data, index="A", columns="B", aggfunc=f_numpy) + tm.assert_frame_equal(result, expected) + + @pytest.mark.slow + def test_pivot_number_of_levels_larger_than_int32(self): + # GH 20601 + df = DataFrame( + {"ind1": np.arange(2 ** 16), "ind2": np.arange(2 ** 16), "count": 0} + ) + + msg = "Unstacked DataFrame is too big, causing int32 overflow" + with pytest.raises(ValueError, match=msg): + df.pivot_table( + index="ind1", columns="ind2", values="count", aggfunc="count" + ) + + def test_pivot_table_aggfunc_dropna(self, dropna): + # GH 22159 + df = pd.DataFrame( + { + "fruit": ["apple", "peach", "apple"], + "size": [1, 1, 2], + "taste": [7, 6, 6], + } + ) + + def ret_one(x): + return 1 + + def ret_sum(x): + return sum(x) + + def ret_none(x): + return np.nan + + result = pd.pivot_table( + df, columns="fruit", aggfunc=[ret_sum, ret_none, ret_one], dropna=dropna + ) + + data = [[3, 1, np.nan, np.nan, 1, 1], [13, 6, np.nan, np.nan, 1, 1]] + col = pd.MultiIndex.from_product( + [["ret_sum", "ret_none", "ret_one"], ["apple", "peach"]], + names=[None, "fruit"], + ) + expected = pd.DataFrame(data, index=["size", "taste"], columns=col) + + if dropna: + expected = expected.dropna(axis="columns") + + tm.assert_frame_equal(result, expected) + + def test_pivot_table_aggfunc_scalar_dropna(self, dropna): + # GH 22159 + df = pd.DataFrame( + {"A": ["one", "two", "one"], "x": [3, np.nan, 2], "y": [1, np.nan, np.nan]} + ) + + result = pd.pivot_table(df, columns="A", aggfunc=np.mean, dropna=dropna) + + data = [[2.5, np.nan], [1, np.nan]] + col = pd.Index(["one", "two"], name="A") + expected = pd.DataFrame(data, index=["x", "y"], columns=col) + + if dropna: + expected = expected.dropna(axis="columns") + + tm.assert_frame_equal(result, expected) + + def test_pivot_table_empty_aggfunc(self): + # GH 9186 + df = pd.DataFrame( + { + "A": [2, 2, 3, 3, 2], + "id": [5, 6, 7, 8, 9], + "C": ["p", "q", "q", "p", "q"], + "D": [None, None, None, None, None], + } + ) + result = df.pivot_table(index="A", columns="D", values="id", aggfunc=np.size) + expected = pd.DataFrame() + tm.assert_frame_equal(result, expected) + + def test_pivot_table_no_column_raises(self): + # GH 10326 + def agg(l): + return np.mean(l) + + foo = pd.DataFrame( + {"X": [0, 0, 1, 1], "Y": [0, 1, 0, 1], "Z": [10, 20, 30, 40]} + ) + with pytest.raises(KeyError, match="notpresent"): + foo.pivot_table("notpresent", "X", "Y", aggfunc=agg) + + +class TestCrosstab: + def setup_method(self, method): + df = DataFrame( + { + "A": [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + "B": [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + "C": [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + "D": np.random.randn(11), + "E": np.random.randn(11), + "F": np.random.randn(11), + } + ) + + self.df = df.append(df, ignore_index=True) + + def test_crosstab_single(self): + df = self.df + result = crosstab(df["A"], df["C"]) + expected = df.groupby(["A", "C"]).size().unstack() + tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64)) + + def test_crosstab_multiple(self): + df = self.df + + result = crosstab(df["A"], [df["B"], df["C"]]) + expected = df.groupby(["A", "B", "C"]).size() + expected = expected.unstack("B").unstack("C").fillna(0).astype(np.int64) + tm.assert_frame_equal(result, expected) + + result = crosstab([df["B"], df["C"]], df["A"]) + expected = df.groupby(["B", "C", "A"]).size() + expected = expected.unstack("A").fillna(0).astype(np.int64) + tm.assert_frame_equal(result, expected) + + def test_crosstab_ndarray(self): + a = np.random.randint(0, 5, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 10, size=100) + + df = DataFrame({"a": a, "b": b, "c": c}) + + result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c")) + expected = crosstab(df["a"], [df["b"], df["c"]]) + tm.assert_frame_equal(result, expected) + + result = crosstab([b, c], a, colnames=["a"], rownames=("b", "c")) + expected = crosstab([df["b"], df["c"]], df["a"]) + tm.assert_frame_equal(result, expected) + + # assign arbitrary names + result = crosstab(self.df["A"].values, self.df["C"].values) + assert result.index.name == "row_0" + assert result.columns.name == "col_0" + + def test_crosstab_non_aligned(self): + # GH 17005 + a = pd.Series([0, 1, 1], index=["a", "b", "c"]) + b = pd.Series([3, 4, 3, 4, 3], index=["a", "b", "c", "d", "f"]) + c = np.array([3, 4, 3]) + + expected = pd.DataFrame( + [[1, 0], [1, 1]], + index=Index([0, 1], name="row_0"), + columns=Index([3, 4], name="col_0"), + ) + + result = crosstab(a, b) + tm.assert_frame_equal(result, expected) + + result = crosstab(a, c) + tm.assert_frame_equal(result, expected) + + def test_crosstab_margins(self): + a = np.random.randint(0, 7, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 5, size=100) + + df = DataFrame({"a": a, "b": b, "c": c}) + + result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c"), margins=True) + + assert result.index.names == ("a",) + assert result.columns.names == ["b", "c"] + + all_cols = result["All", ""] + exp_cols = df.groupby(["a"]).size().astype("i8") + # to keep index.name + exp_margin = Series([len(df)], index=Index(["All"], name="a")) + exp_cols = exp_cols.append(exp_margin) + exp_cols.name = ("All", "") + + tm.assert_series_equal(all_cols, exp_cols) + + all_rows = result.loc["All"] + exp_rows = df.groupby(["b", "c"]).size().astype("i8") + exp_rows = exp_rows.append(Series([len(df)], index=[("All", "")])) + exp_rows.name = "All" + + exp_rows = exp_rows.reindex(all_rows.index) + exp_rows = exp_rows.fillna(0).astype(np.int64) + tm.assert_series_equal(all_rows, exp_rows) + + def test_crosstab_margins_set_margin_name(self): + # GH 15972 + a = np.random.randint(0, 7, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 5, size=100) + + df = DataFrame({"a": a, "b": b, "c": c}) + + result = crosstab( + a, + [b, c], + rownames=["a"], + colnames=("b", "c"), + margins=True, + margins_name="TOTAL", + ) + + assert result.index.names == ("a",) + assert result.columns.names == ["b", "c"] + + all_cols = result["TOTAL", ""] + exp_cols = df.groupby(["a"]).size().astype("i8") + # to keep index.name + exp_margin = Series([len(df)], index=Index(["TOTAL"], name="a")) + exp_cols = exp_cols.append(exp_margin) + exp_cols.name = ("TOTAL", "") + + tm.assert_series_equal(all_cols, exp_cols) + + all_rows = result.loc["TOTAL"] + exp_rows = df.groupby(["b", "c"]).size().astype("i8") + exp_rows = exp_rows.append(Series([len(df)], index=[("TOTAL", "")])) + exp_rows.name = "TOTAL" + + exp_rows = exp_rows.reindex(all_rows.index) + exp_rows = exp_rows.fillna(0).astype(np.int64) + tm.assert_series_equal(all_rows, exp_rows) + + msg = "margins_name argument must be a string" + for margins_name in [666, None, ["a", "b"]]: + with pytest.raises(ValueError, match=msg): + crosstab( + a, + [b, c], + rownames=["a"], + colnames=("b", "c"), + margins=True, + margins_name=margins_name, + ) + + def test_crosstab_pass_values(self): + a = np.random.randint(0, 7, size=100) + b = np.random.randint(0, 3, size=100) + c = np.random.randint(0, 5, size=100) + values = np.random.randn(100) + + table = crosstab( + [a, b], c, values, aggfunc=np.sum, rownames=["foo", "bar"], colnames=["baz"] + ) + + df = DataFrame({"foo": a, "bar": b, "baz": c, "values": values}) + + expected = df.pivot_table( + "values", index=["foo", "bar"], columns="baz", aggfunc=np.sum + ) + tm.assert_frame_equal(table, expected) + + def test_crosstab_dropna(self): + # GH 3820 + a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object) + b = np.array(["one", "one", "two", "one", "two", "two", "two"], dtype=object) + c = np.array( + ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object + ) + res = pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=False) + m = MultiIndex.from_tuples( + [("one", "dull"), ("one", "shiny"), ("two", "dull"), ("two", "shiny")], + names=["b", "c"], + ) + tm.assert_index_equal(res.columns, m) + + def test_crosstab_no_overlap(self): + # GS 10291 + + s1 = pd.Series([1, 2, 3], index=[1, 2, 3]) + s2 = pd.Series([4, 5, 6], index=[4, 5, 6]) + + actual = crosstab(s1, s2) + expected = pd.DataFrame() + + tm.assert_frame_equal(actual, expected) + + def test_margin_dropna(self): + # GH 12577 + # pivot_table counts null into margin ('All') + # when margins=true and dropna=true + + df = pd.DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]}) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=True) + expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 3, 5]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3, 4, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + df = DataFrame( + {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]} + ) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=True) + expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3.0, 4.0, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + df = DataFrame( + {"a": [1, np.nan, np.nan, np.nan, np.nan, 2], "b": [3, 3, 4, 4, 4, 4]} + ) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=True) + expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 1, 2]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3, 4, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + # GH 12642 + # _add_margins raises KeyError: Level None not found + # when margins=True and dropna=False + df = pd.DataFrame({"a": [1, 2, 2, 2, 2, np.nan], "b": [3, 3, 4, 4, 4, 4]}) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) + expected = pd.DataFrame([[1, 0, 1], [1, 3, 4], [2, 4, 6]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3, 4, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + df = DataFrame( + {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]} + ) + actual = pd.crosstab(df.a, df.b, margins=True, dropna=False) + expected = pd.DataFrame([[1, 0, 1], [0, 1, 1], [1, 4, 6]]) + expected.index = Index([1.0, 2.0, "All"], name="a") + expected.columns = Index([3.0, 4.0, "All"], name="b") + tm.assert_frame_equal(actual, expected) + + a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object) + b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object) + c = np.array( + ["dull", "dull", "dull", "dull", "dull", "shiny", "shiny"], dtype=object + ) + + actual = pd.crosstab( + a, [b, c], rownames=["a"], colnames=["b", "c"], margins=True, dropna=False + ) + m = MultiIndex.from_arrays( + [ + ["one", "one", "two", "two", "All"], + ["dull", "shiny", "dull", "shiny", ""], + ], + names=["b", "c"], + ) + expected = DataFrame( + [[1, 0, 1, 0, 2], [2, 0, 1, 1, 5], [3, 0, 2, 1, 7]], columns=m + ) + expected.index = Index(["bar", "foo", "All"], name="a") + tm.assert_frame_equal(actual, expected) + + actual = pd.crosstab( + [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=False + ) + m = MultiIndex.from_arrays( + [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]], + names=["a", "b"], + ) + expected = DataFrame( + [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 2, 7]], index=m + ) + expected.columns = Index(["dull", "shiny", "All"], name="c") + tm.assert_frame_equal(actual, expected) + + actual = pd.crosstab( + [a, b], c, rownames=["a", "b"], colnames=["c"], margins=True, dropna=True + ) + m = MultiIndex.from_arrays( + [["bar", "bar", "foo", "foo", "All"], ["one", "two", "one", "two", ""]], + names=["a", "b"], + ) + expected = DataFrame( + [[1, 0, 1], [1, 0, 1], [2, 0, 2], [1, 1, 2], [5, 1, 6]], index=m + ) + expected.columns = Index(["dull", "shiny", "All"], name="c") + tm.assert_frame_equal(actual, expected) + + def test_crosstab_normalize(self): + # Issue 12578 + df = pd.DataFrame( + {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]} + ) + + rindex = pd.Index([1, 2], name="a") + cindex = pd.Index([3, 4], name="b") + full_normal = pd.DataFrame([[0.2, 0], [0.2, 0.6]], index=rindex, columns=cindex) + row_normal = pd.DataFrame( + [[1.0, 0], [0.25, 0.75]], index=rindex, columns=cindex + ) + col_normal = pd.DataFrame([[0.5, 0], [0.5, 1.0]], index=rindex, columns=cindex) + + # Check all normalize args + tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize="all"), full_normal) + tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize=True), full_normal) + tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize="index"), row_normal) + tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize="columns"), col_normal) + tm.assert_frame_equal( + pd.crosstab(df.a, df.b, normalize=1), + pd.crosstab(df.a, df.b, normalize="columns"), + ) + tm.assert_frame_equal( + pd.crosstab(df.a, df.b, normalize=0), + pd.crosstab(df.a, df.b, normalize="index"), + ) + + row_normal_margins = pd.DataFrame( + [[1.0, 0], [0.25, 0.75], [0.4, 0.6]], + index=pd.Index([1, 2, "All"], name="a", dtype="object"), + columns=pd.Index([3, 4], name="b", dtype="object"), + ) + col_normal_margins = pd.DataFrame( + [[0.5, 0, 0.2], [0.5, 1.0, 0.8]], + index=pd.Index([1, 2], name="a", dtype="object"), + columns=pd.Index([3, 4, "All"], name="b", dtype="object"), + ) + + all_normal_margins = pd.DataFrame( + [[0.2, 0, 0.2], [0.2, 0.6, 0.8], [0.4, 0.6, 1]], + index=pd.Index([1, 2, "All"], name="a", dtype="object"), + columns=pd.Index([3, 4, "All"], name="b", dtype="object"), + ) + tm.assert_frame_equal( + pd.crosstab(df.a, df.b, normalize="index", margins=True), row_normal_margins + ) + tm.assert_frame_equal( + pd.crosstab(df.a, df.b, normalize="columns", margins=True), + col_normal_margins, + ) + tm.assert_frame_equal( + pd.crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins + ) + + # Test arrays + pd.crosstab( + [np.array([1, 1, 2, 2]), np.array([1, 2, 1, 2])], np.array([1, 2, 1, 2]) + ) + + # Test with aggfunc + norm_counts = pd.DataFrame( + [[0.25, 0, 0.25], [0.25, 0.5, 0.75], [0.5, 0.5, 1]], + index=pd.Index([1, 2, "All"], name="a", dtype="object"), + columns=pd.Index([3, 4, "All"], name="b"), + ) + test_case = pd.crosstab( + df.a, df.b, df.c, aggfunc="count", normalize="all", margins=True + ) + tm.assert_frame_equal(test_case, norm_counts) + + df = pd.DataFrame( + {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [0, 4, np.nan, 3, 3]} + ) + + norm_sum = pd.DataFrame( + [[0, 0, 0.0], [0.4, 0.6, 1], [0.4, 0.6, 1]], + index=pd.Index([1, 2, "All"], name="a", dtype="object"), + columns=pd.Index([3, 4, "All"], name="b", dtype="object"), + ) + test_case = pd.crosstab( + df.a, df.b, df.c, aggfunc=np.sum, normalize="all", margins=True + ) + tm.assert_frame_equal(test_case, norm_sum) + + def test_crosstab_with_empties(self): + # Check handling of empties + df = pd.DataFrame( + { + "a": [1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4], + "c": [np.nan, np.nan, np.nan, np.nan, np.nan], + } + ) + + empty = pd.DataFrame( + [[0.0, 0.0], [0.0, 0.0]], + index=pd.Index([1, 2], name="a", dtype="int64"), + columns=pd.Index([3, 4], name="b"), + ) + + for i in [True, "index", "columns"]: + calculated = pd.crosstab( + df.a, df.b, values=df.c, aggfunc="count", normalize=i + ) + tm.assert_frame_equal(empty, calculated) + + nans = pd.DataFrame( + [[0.0, np.nan], [0.0, 0.0]], + index=pd.Index([1, 2], name="a", dtype="int64"), + columns=pd.Index([3, 4], name="b"), + ) + + calculated = pd.crosstab( + df.a, df.b, values=df.c, aggfunc="count", normalize=False + ) + tm.assert_frame_equal(nans, calculated) + + def test_crosstab_errors(self): + # Issue 12578 + + df = pd.DataFrame( + {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]} + ) + + error = "values cannot be used without an aggfunc." + with pytest.raises(ValueError, match=error): + pd.crosstab(df.a, df.b, values=df.c) + + error = "aggfunc cannot be used without values" + with pytest.raises(ValueError, match=error): + pd.crosstab(df.a, df.b, aggfunc=np.mean) + + error = "Not a valid normalize argument" + with pytest.raises(ValueError, match=error): + pd.crosstab(df.a, df.b, normalize="42") + + with pytest.raises(ValueError, match=error): + pd.crosstab(df.a, df.b, normalize=42) + + error = "Not a valid margins argument" + with pytest.raises(ValueError, match=error): + pd.crosstab(df.a, df.b, normalize="all", margins=42) + + def test_crosstab_with_categorial_columns(self): + # GH 8860 + df = pd.DataFrame( + { + "MAKE": ["Honda", "Acura", "Tesla", "Honda", "Honda", "Acura"], + "MODEL": ["Sedan", "Sedan", "Electric", "Pickup", "Sedan", "Sedan"], + } + ) + categories = ["Sedan", "Electric", "Pickup"] + df["MODEL"] = df["MODEL"].astype("category").cat.set_categories(categories) + result = pd.crosstab(df["MAKE"], df["MODEL"]) + + expected_index = pd.Index(["Acura", "Honda", "Tesla"], name="MAKE") + expected_columns = pd.CategoricalIndex( + categories, categories=categories, ordered=False, name="MODEL" + ) + expected_data = [[2, 0, 0], [2, 0, 1], [0, 1, 0]] + expected = pd.DataFrame( + expected_data, index=expected_index, columns=expected_columns + ) + tm.assert_frame_equal(result, expected) + + def test_crosstab_with_numpy_size(self): + # GH 4003 + df = pd.DataFrame( + { + "A": ["one", "one", "two", "three"] * 6, + "B": ["A", "B", "C"] * 8, + "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4, + "D": np.random.randn(24), + "E": np.random.randn(24), + } + ) + result = pd.crosstab( + index=[df["A"], df["B"]], + columns=[df["C"]], + margins=True, + aggfunc=np.size, + values=df["D"], + ) + expected_index = pd.MultiIndex( + levels=[["All", "one", "three", "two"], ["", "A", "B", "C"]], + codes=[[1, 1, 1, 2, 2, 2, 3, 3, 3, 0], [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]], + names=["A", "B"], + ) + expected_column = pd.Index(["bar", "foo", "All"], dtype="object", name="C") + expected_data = np.array( + [ + [2.0, 2.0, 4.0], + [2.0, 2.0, 4.0], + [2.0, 2.0, 4.0], + [2.0, np.nan, 2.0], + [np.nan, 2.0, 2.0], + [2.0, np.nan, 2.0], + [np.nan, 2.0, 2.0], + [2.0, np.nan, 2.0], + [np.nan, 2.0, 2.0], + [12.0, 12.0, 24.0], + ] + ) + expected = pd.DataFrame( + expected_data, index=expected_index, columns=expected_column + ) + tm.assert_frame_equal(result, expected) + + def test_crosstab_dup_index_names(self): + # GH 13279 + s = pd.Series(range(3), name="foo") + + result = pd.crosstab(s, s) + expected_index = pd.Index(range(3), name="foo") + expected = pd.DataFrame( + np.eye(3, dtype=np.int64), index=expected_index, columns=expected_index + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("names", [["a", ("b", "c")], [("a", "b"), "c"]]) + def test_crosstab_tuple_name(self, names): + s1 = pd.Series(range(3), name=names[0]) + s2 = pd.Series(range(1, 4), name=names[1]) + + mi = pd.MultiIndex.from_arrays([range(3), range(1, 4)], names=names) + expected = pd.Series(1, index=mi).unstack(1, fill_value=0) + + result = pd.crosstab(s1, s2) + tm.assert_frame_equal(result, expected) + + def test_crosstab_unsorted_order(self): + df = pd.DataFrame({"b": [3, 1, 2], "a": [5, 4, 6]}, index=["C", "A", "B"]) + result = pd.crosstab(df.index, [df.b, df.a]) + e_idx = pd.Index(["A", "B", "C"], name="row_0") + e_columns = pd.MultiIndex.from_tuples( + [(1, 4), (2, 6), (3, 5)], names=["b", "a"] + ) + expected = pd.DataFrame( + [[1, 0, 0], [0, 1, 0], [0, 0, 1]], index=e_idx, columns=e_columns + ) + tm.assert_frame_equal(result, expected) + + def test_margin_normalize(self): + # GH 27500 + df = pd.DataFrame( + { + "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], + "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], + "C": [ + "small", + "large", + "large", + "small", + "small", + "large", + "small", + "small", + "large", + ], + "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], + } + ) + # normalize on index + result = pd.crosstab( + [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=0 + ) + expected = pd.DataFrame( + [[0.5, 0.5], [0.5, 0.5], [0.666667, 0.333333], [0, 1], [0.444444, 0.555556]] + ) + expected.index = MultiIndex( + levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]], + codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], + names=["A", "B"], + ) + expected.columns = Index(["large", "small"], dtype="object", name="C") + tm.assert_frame_equal(result, expected) + + # normalize on columns + result = pd.crosstab( + [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=1 + ) + expected = pd.DataFrame( + [ + [0.25, 0.2, 0.222222], + [0.25, 0.2, 0.222222], + [0.5, 0.2, 0.333333], + [0, 0.4, 0.222222], + ] + ) + expected.columns = Index( + ["large", "small", "Sub-Total"], dtype="object", name="C" + ) + expected.index = MultiIndex( + levels=[["bar", "foo"], ["one", "two"]], + codes=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=["A", "B"], + ) + tm.assert_frame_equal(result, expected) + + # normalize on both index and column + result = pd.crosstab( + [df.A, df.B], df.C, margins=True, margins_name="Sub-Total", normalize=True + ) + expected = pd.DataFrame( + [ + [0.111111, 0.111111, 0.222222], + [0.111111, 0.111111, 0.222222], + [0.222222, 0.111111, 0.333333], + [0.000000, 0.222222, 0.222222], + [0.444444, 0.555555, 1], + ] + ) + expected.columns = Index( + ["large", "small", "Sub-Total"], dtype="object", name="C" + ) + expected.index = MultiIndex( + levels=[["Sub-Total", "bar", "foo"], ["", "one", "two"]], + codes=[[1, 1, 2, 2, 0], [1, 2, 1, 2, 0]], + names=["A", "B"], + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py new file mode 100644 index 00000000..c436ab5d --- /dev/null +++ b/pandas/tests/reshape/test_qcut.py @@ -0,0 +1,300 @@ +import os + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DatetimeIndex, + Interval, + IntervalIndex, + NaT, + Series, + TimedeltaIndex, + Timestamp, + cut, + date_range, + isna, + qcut, + timedelta_range, +) +import pandas._testing as tm +from pandas.api.types import CategoricalDtype as CDT +from pandas.core.algorithms import quantile + +from pandas.tseries.offsets import Day, Nano + + +def test_qcut(): + arr = np.random.randn(1000) + + # We store the bins as Index that have been + # rounded to comparisons are a bit tricky. + labels, bins = qcut(arr, 4, retbins=True) + ex_bins = quantile(arr, [0, 0.25, 0.5, 0.75, 1.0]) + + result = labels.categories.left.values + assert np.allclose(result, ex_bins[:-1], atol=1e-2) + + result = labels.categories.right.values + assert np.allclose(result, ex_bins[1:], atol=1e-2) + + ex_levels = cut(arr, ex_bins, include_lowest=True) + tm.assert_categorical_equal(labels, ex_levels) + + +def test_qcut_bounds(): + arr = np.random.randn(1000) + + factor = qcut(arr, 10, labels=False) + assert len(np.unique(factor)) == 10 + + +def test_qcut_specify_quantiles(): + arr = np.random.randn(100) + factor = qcut(arr, [0, 0.25, 0.5, 0.75, 1.0]) + + expected = qcut(arr, 4) + tm.assert_categorical_equal(factor, expected) + + +def test_qcut_all_bins_same(): + with pytest.raises(ValueError, match="edges.*unique"): + qcut([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3) + + +def test_qcut_include_lowest(): + values = np.arange(10) + ii = qcut(values, 4) + + ex_levels = IntervalIndex( + [ + Interval(-0.001, 2.25), + Interval(2.25, 4.5), + Interval(4.5, 6.75), + Interval(6.75, 9), + ] + ) + tm.assert_index_equal(ii.categories, ex_levels) + + +def test_qcut_nas(): + arr = np.random.randn(100) + arr[:20] = np.nan + + result = qcut(arr, 4) + assert isna(result[:20]).all() + + +def test_qcut_index(): + result = qcut([0, 2], 2) + intervals = [Interval(-0.001, 1), Interval(1, 2)] + + expected = Categorical(intervals, ordered=True) + tm.assert_categorical_equal(result, expected) + + +def test_qcut_binning_issues(datapath): + # see gh-1978, gh-1979 + cut_file = datapath(os.path.join("reshape", "data", "cut_data.csv")) + arr = np.loadtxt(cut_file) + result = qcut(arr, 20) + + starts = [] + ends = [] + + for lev in np.unique(result): + s = lev.left + e = lev.right + assert s != e + + starts.append(float(s)) + ends.append(float(e)) + + for (sp, sn), (ep, en) in zip( + zip(starts[:-1], starts[1:]), zip(ends[:-1], ends[1:]) + ): + assert sp < sn + assert ep < en + assert ep <= sn + + +def test_qcut_return_intervals(): + ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8]) + res = qcut(ser, [0, 0.333, 0.666, 1]) + + exp_levels = np.array( + [Interval(-0.001, 2.664), Interval(2.664, 5.328), Interval(5.328, 8)] + ) + exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(CDT(ordered=True)) + tm.assert_series_equal(res, exp) + + +@pytest.mark.parametrize("labels", ["foo", 1, True]) +def test_qcut_incorrect_labels(labels): + # GH 13318 + values = range(5) + msg = "Bin labels must either be False, None or passed in as a list-like argument" + with pytest.raises(ValueError, match=msg): + qcut(values, 4, labels=labels) + + +@pytest.mark.parametrize("labels", [["a", "b", "c"], list(range(3))]) +def test_qcut_wrong_length_labels(labels): + # GH 13318 + values = range(10) + msg = "Bin labels must be one fewer than the number of bin edges" + with pytest.raises(ValueError, match=msg): + qcut(values, 4, labels=labels) + + +@pytest.mark.parametrize( + "labels, expected", + [ + (["a", "b", "c"], Categorical(["a", "b", "c"], ordered=True)), + (list(range(3)), Categorical([0, 1, 2], ordered=True)), + ], +) +def test_qcut_list_like_labels(labels, expected): + # GH 13318 + values = range(3) + result = qcut(values, 3, labels=labels) + tm.assert_categorical_equal(result, expected) + + +@pytest.mark.parametrize( + "kwargs,msg", + [ + (dict(duplicates="drop"), None), + (dict(), "Bin edges must be unique"), + (dict(duplicates="raise"), "Bin edges must be unique"), + (dict(duplicates="foo"), "invalid value for 'duplicates' parameter"), + ], +) +def test_qcut_duplicates_bin(kwargs, msg): + # see gh-7751 + values = [0, 0, 0, 0, 1, 2, 3] + + if msg is not None: + with pytest.raises(ValueError, match=msg): + qcut(values, 3, **kwargs) + else: + result = qcut(values, 3, **kwargs) + expected = IntervalIndex([Interval(-0.001, 1), Interval(1, 3)]) + tm.assert_index_equal(result.categories, expected) + + +@pytest.mark.parametrize( + "data,start,end", [(9.0, 8.999, 9.0), (0.0, -0.001, 0.0), (-9.0, -9.001, -9.0)] +) +@pytest.mark.parametrize("length", [1, 2]) +@pytest.mark.parametrize("labels", [None, False]) +def test_single_quantile(data, start, end, length, labels): + # see gh-15431 + ser = Series([data] * length) + result = qcut(ser, 1, labels=labels) + + if labels is None: + intervals = IntervalIndex([Interval(start, end)] * length, closed="right") + expected = Series(intervals).astype(CDT(ordered=True)) + else: + expected = Series([0] * length) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "ser", + [ + Series(DatetimeIndex(["20180101", NaT, "20180103"])), + Series(TimedeltaIndex(["0 days", NaT, "2 days"])), + ], + ids=lambda x: str(x.dtype), +) +def test_qcut_nat(ser): + # see gh-19768 + intervals = IntervalIndex.from_tuples( + [(ser[0] - Nano(), ser[2] - Day()), np.nan, (ser[2] - Day(), ser[2])] + ) + expected = Series(Categorical(intervals, ordered=True)) + + result = qcut(ser, 2) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("bins", [3, np.linspace(0, 1, 4)]) +def test_datetime_tz_qcut(bins): + # see gh-19872 + tz = "US/Eastern" + ser = Series(date_range("20130101", periods=3, tz=tz)) + + result = qcut(ser, bins) + expected = Series( + IntervalIndex( + [ + Interval( + Timestamp("2012-12-31 23:59:59.999999999", tz=tz), + Timestamp("2013-01-01 16:00:00", tz=tz), + ), + Interval( + Timestamp("2013-01-01 16:00:00", tz=tz), + Timestamp("2013-01-02 08:00:00", tz=tz), + ), + Interval( + Timestamp("2013-01-02 08:00:00", tz=tz), + Timestamp("2013-01-03 00:00:00", tz=tz), + ), + ] + ) + ).astype(CDT(ordered=True)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "arg,expected_bins", + [ + [ + timedelta_range("1day", periods=3), + TimedeltaIndex(["1 days", "2 days", "3 days"]), + ], + [ + date_range("20180101", periods=3), + DatetimeIndex(["2018-01-01", "2018-01-02", "2018-01-03"]), + ], + ], +) +def test_date_like_qcut_bins(arg, expected_bins): + # see gh-19891 + ser = Series(arg) + result, result_bins = qcut(ser, 2, retbins=True) + tm.assert_index_equal(result_bins, expected_bins) + + +@pytest.mark.parametrize("bins", [6, 7]) +@pytest.mark.parametrize( + "box, compare", + [ + (Series, tm.assert_series_equal), + (np.array, tm.assert_categorical_equal), + (list, tm.assert_equal), + ], +) +def test_qcut_bool_coercion_to_int(bins, box, compare): + # issue 20303 + data_expected = box([0, 1, 1, 0, 1] * 10) + data_result = box([False, True, True, False, True] * 10) + expected = qcut(data_expected, bins, duplicates="drop") + result = qcut(data_result, bins, duplicates="drop") + compare(result, expected) + + +@pytest.mark.parametrize("q", [2, 5, 10]) +def test_qcut_nullable_integer(q, any_nullable_int_dtype): + arr = pd.array(np.arange(100), dtype=any_nullable_int_dtype) + arr[::2] = pd.NA + + result = qcut(arr, q) + expected = qcut(arr.astype(float), q) + + tm.assert_categorical_equal(result, expected) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py new file mode 100644 index 00000000..f25291f4 --- /dev/null +++ b/pandas/tests/reshape/test_reshape.py @@ -0,0 +1,647 @@ +from collections import OrderedDict + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer_dtype + +import pandas as pd +from pandas import Categorical, DataFrame, Index, Series, get_dummies +import pandas._testing as tm +from pandas.core.arrays.sparse import SparseArray, SparseDtype + + +class TestGetDummies: + @pytest.fixture + def df(self): + return DataFrame({"A": ["a", "b", "a"], "B": ["b", "b", "c"], "C": [1, 2, 3]}) + + @pytest.fixture(params=["uint8", "i8", np.float64, bool, None]) + def dtype(self, request): + return np.dtype(request.param) + + @pytest.fixture(params=["dense", "sparse"]) + def sparse(self, request): + # params are strings to simplify reading test results, + # e.g. TestGetDummies::test_basic[uint8-sparse] instead of [uint8-True] + return request.param == "sparse" + + def effective_dtype(self, dtype): + if dtype is None: + return np.uint8 + return dtype + + def test_raises_on_dtype_object(self, df): + with pytest.raises(ValueError): + get_dummies(df, dtype="object") + + def test_basic(self, sparse, dtype): + s_list = list("abc") + s_series = Series(s_list) + s_series_index = Series(s_list, list("ABC")) + + expected = DataFrame( + {"a": [1, 0, 0], "b": [0, 1, 0], "c": [0, 0, 1]}, + dtype=self.effective_dtype(dtype), + ) + if sparse: + expected = expected.apply(SparseArray, fill_value=0.0) + result = get_dummies(s_list, sparse=sparse, dtype=dtype) + tm.assert_frame_equal(result, expected) + + result = get_dummies(s_series, sparse=sparse, dtype=dtype) + tm.assert_frame_equal(result, expected) + + expected.index = list("ABC") + result = get_dummies(s_series_index, sparse=sparse, dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_basic_types(self, sparse, dtype): + # GH 10531 + s_list = list("abc") + s_series = Series(s_list) + s_df = DataFrame( + {"a": [0, 1, 0, 1, 2], "b": ["A", "A", "B", "C", "C"], "c": [2, 3, 3, 3, 2]} + ) + + expected = DataFrame( + {"a": [1, 0, 0], "b": [0, 1, 0], "c": [0, 0, 1]}, + dtype=self.effective_dtype(dtype), + columns=list("abc"), + ) + if sparse: + if is_integer_dtype(dtype): + fill_value = 0 + elif dtype == bool: + fill_value = False + else: + fill_value = 0.0 + + expected = expected.apply(SparseArray, fill_value=fill_value) + result = get_dummies(s_list, sparse=sparse, dtype=dtype) + tm.assert_frame_equal(result, expected) + + result = get_dummies(s_series, sparse=sparse, dtype=dtype) + tm.assert_frame_equal(result, expected) + + result = get_dummies(s_df, columns=s_df.columns, sparse=sparse, dtype=dtype) + if sparse: + dtype_name = "Sparse[{}, {}]".format( + self.effective_dtype(dtype).name, fill_value + ) + else: + dtype_name = self.effective_dtype(dtype).name + + expected = Series({dtype_name: 8}) + result = result.dtypes.value_counts() + result.index = [str(i) for i in result.index] + tm.assert_series_equal(result, expected) + + result = get_dummies(s_df, columns=["a"], sparse=sparse, dtype=dtype) + + expected_counts = {"int64": 1, "object": 1} + expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0) + + expected = Series(expected_counts).sort_index() + result = result.dtypes.value_counts() + result.index = [str(i) for i in result.index] + result = result.sort_index() + tm.assert_series_equal(result, expected) + + def test_just_na(self, sparse): + just_na_list = [np.nan] + just_na_series = Series(just_na_list) + just_na_series_index = Series(just_na_list, index=["A"]) + + res_list = get_dummies(just_na_list, sparse=sparse) + res_series = get_dummies(just_na_series, sparse=sparse) + res_series_index = get_dummies(just_na_series_index, sparse=sparse) + + assert res_list.empty + assert res_series.empty + assert res_series_index.empty + + assert res_list.index.tolist() == [0] + assert res_series.index.tolist() == [0] + assert res_series_index.index.tolist() == ["A"] + + def test_include_na(self, sparse, dtype): + s = ["a", "b", np.nan] + res = get_dummies(s, sparse=sparse, dtype=dtype) + exp = DataFrame( + {"a": [1, 0, 0], "b": [0, 1, 0]}, dtype=self.effective_dtype(dtype) + ) + if sparse: + exp = exp.apply(SparseArray, fill_value=0.0) + tm.assert_frame_equal(res, exp) + + # Sparse dataframes do not allow nan labelled columns, see #GH8822 + res_na = get_dummies(s, dummy_na=True, sparse=sparse, dtype=dtype) + exp_na = DataFrame( + {np.nan: [0, 0, 1], "a": [1, 0, 0], "b": [0, 1, 0]}, + dtype=self.effective_dtype(dtype), + ) + exp_na = exp_na.reindex(["a", "b", np.nan], axis=1) + # hack (NaN handling in assert_index_equal) + exp_na.columns = res_na.columns + if sparse: + exp_na = exp_na.apply(SparseArray, fill_value=0.0) + tm.assert_frame_equal(res_na, exp_na) + + res_just_na = get_dummies([np.nan], dummy_na=True, sparse=sparse, dtype=dtype) + exp_just_na = DataFrame( + Series(1, index=[0]), columns=[np.nan], dtype=self.effective_dtype(dtype) + ) + tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values) + + def test_unicode(self, sparse): + # See GH 6885 - get_dummies chokes on unicode values + import unicodedata + + e = "e" + eacute = unicodedata.lookup("LATIN SMALL LETTER E WITH ACUTE") + s = [e, eacute, eacute] + res = get_dummies(s, prefix="letter", sparse=sparse) + exp = DataFrame( + {"letter_e": [1, 0, 0], "letter_{eacute}".format(eacute=eacute): [0, 1, 1]}, + dtype=np.uint8, + ) + if sparse: + exp = exp.apply(SparseArray, fill_value=0) + tm.assert_frame_equal(res, exp) + + def test_dataframe_dummies_all_obj(self, df, sparse): + df = df[["A", "B"]] + result = get_dummies(df, sparse=sparse) + expected = DataFrame( + {"A_a": [1, 0, 1], "A_b": [0, 1, 0], "B_b": [1, 1, 0], "B_c": [0, 0, 1]}, + dtype=np.uint8, + ) + if sparse: + expected = pd.DataFrame( + { + "A_a": SparseArray([1, 0, 1], dtype="uint8"), + "A_b": SparseArray([0, 1, 0], dtype="uint8"), + "B_b": SparseArray([1, 1, 0], dtype="uint8"), + "B_c": SparseArray([0, 0, 1], dtype="uint8"), + } + ) + + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_mix_default(self, df, sparse, dtype): + result = get_dummies(df, sparse=sparse, dtype=dtype) + if sparse: + arr = SparseArray + typ = SparseDtype(dtype, 0) + else: + arr = np.array + typ = dtype + expected = DataFrame( + { + "C": [1, 2, 3], + "A_a": arr([1, 0, 1], dtype=typ), + "A_b": arr([0, 1, 0], dtype=typ), + "B_b": arr([1, 1, 0], dtype=typ), + "B_c": arr([0, 0, 1], dtype=typ), + } + ) + expected = expected[["C", "A_a", "A_b", "B_b", "B_c"]] + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_prefix_list(self, df, sparse): + prefixes = ["from_A", "from_B"] + result = get_dummies(df, prefix=prefixes, sparse=sparse) + expected = DataFrame( + { + "C": [1, 2, 3], + "from_A_a": [1, 0, 1], + "from_A_b": [0, 1, 0], + "from_B_b": [1, 1, 0], + "from_B_c": [0, 0, 1], + }, + dtype=np.uint8, + ) + expected[["C"]] = df[["C"]] + cols = ["from_A_a", "from_A_b", "from_B_b", "from_B_c"] + expected = expected[["C"] + cols] + + typ = SparseArray if sparse else pd.Series + expected[cols] = expected[cols].apply(lambda x: typ(x)) + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_prefix_str(self, df, sparse): + # not that you should do this... + result = get_dummies(df, prefix="bad", sparse=sparse) + bad_columns = ["bad_a", "bad_b", "bad_b", "bad_c"] + expected = DataFrame( + [[1, 1, 0, 1, 0], [2, 0, 1, 1, 0], [3, 1, 0, 0, 1]], + columns=["C"] + bad_columns, + dtype=np.uint8, + ) + expected = expected.astype({"C": np.int64}) + if sparse: + # work around astyping & assigning with duplicate columns + # https://github.com/pandas-dev/pandas/issues/14427 + expected = pd.concat( + [ + pd.Series([1, 2, 3], name="C"), + pd.Series([1, 0, 1], name="bad_a", dtype="Sparse[uint8]"), + pd.Series([0, 1, 0], name="bad_b", dtype="Sparse[uint8]"), + pd.Series([1, 1, 0], name="bad_b", dtype="Sparse[uint8]"), + pd.Series([0, 0, 1], name="bad_c", dtype="Sparse[uint8]"), + ], + axis=1, + ) + + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_subset(self, df, sparse): + result = get_dummies(df, prefix=["from_A"], columns=["A"], sparse=sparse) + expected = DataFrame( + { + "B": ["b", "b", "c"], + "C": [1, 2, 3], + "from_A_a": [1, 0, 1], + "from_A_b": [0, 1, 0], + }, + dtype=np.uint8, + ) + expected[["C"]] = df[["C"]] + if sparse: + cols = ["from_A_a", "from_A_b"] + expected[cols] = expected[cols].astype(pd.SparseDtype("uint8", 0)) + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_prefix_sep(self, df, sparse): + result = get_dummies(df, prefix_sep="..", sparse=sparse) + expected = DataFrame( + { + "C": [1, 2, 3], + "A..a": [1, 0, 1], + "A..b": [0, 1, 0], + "B..b": [1, 1, 0], + "B..c": [0, 0, 1], + }, + dtype=np.uint8, + ) + expected[["C"]] = df[["C"]] + expected = expected[["C", "A..a", "A..b", "B..b", "B..c"]] + if sparse: + cols = ["A..a", "A..b", "B..b", "B..c"] + expected[cols] = expected[cols].astype(pd.SparseDtype("uint8", 0)) + + tm.assert_frame_equal(result, expected) + + result = get_dummies(df, prefix_sep=["..", "__"], sparse=sparse) + expected = expected.rename(columns={"B..b": "B__b", "B..c": "B__c"}) + tm.assert_frame_equal(result, expected) + + result = get_dummies(df, prefix_sep={"A": "..", "B": "__"}, sparse=sparse) + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_prefix_bad_length(self, df, sparse): + with pytest.raises(ValueError): + get_dummies(df, prefix=["too few"], sparse=sparse) + + def test_dataframe_dummies_prefix_sep_bad_length(self, df, sparse): + with pytest.raises(ValueError): + get_dummies(df, prefix_sep=["bad"], sparse=sparse) + + def test_dataframe_dummies_prefix_dict(self, sparse): + prefixes = {"A": "from_A", "B": "from_B"} + df = DataFrame({"C": [1, 2, 3], "A": ["a", "b", "a"], "B": ["b", "b", "c"]}) + result = get_dummies(df, prefix=prefixes, sparse=sparse) + + expected = DataFrame( + { + "C": [1, 2, 3], + "from_A_a": [1, 0, 1], + "from_A_b": [0, 1, 0], + "from_B_b": [1, 1, 0], + "from_B_c": [0, 0, 1], + } + ) + + columns = ["from_A_a", "from_A_b", "from_B_b", "from_B_c"] + expected[columns] = expected[columns].astype(np.uint8) + if sparse: + expected[columns] = expected[columns].astype(pd.SparseDtype("uint8", 0)) + + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_with_na(self, df, sparse, dtype): + df.loc[3, :] = [np.nan, np.nan, np.nan] + result = get_dummies(df, dummy_na=True, sparse=sparse, dtype=dtype).sort_index( + axis=1 + ) + + if sparse: + arr = SparseArray + typ = SparseDtype(dtype, 0) + else: + arr = np.array + typ = dtype + + expected = DataFrame( + { + "C": [1, 2, 3, np.nan], + "A_a": arr([1, 0, 1, 0], dtype=typ), + "A_b": arr([0, 1, 0, 0], dtype=typ), + "A_nan": arr([0, 0, 0, 1], dtype=typ), + "B_b": arr([1, 1, 0, 0], dtype=typ), + "B_c": arr([0, 0, 1, 0], dtype=typ), + "B_nan": arr([0, 0, 0, 1], dtype=typ), + } + ).sort_index(axis=1) + + tm.assert_frame_equal(result, expected) + + result = get_dummies(df, dummy_na=False, sparse=sparse, dtype=dtype) + expected = expected[["C", "A_a", "A_b", "B_b", "B_c"]] + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_with_categorical(self, df, sparse, dtype): + df["cat"] = pd.Categorical(["x", "y", "y"]) + result = get_dummies(df, sparse=sparse, dtype=dtype).sort_index(axis=1) + if sparse: + arr = SparseArray + typ = SparseDtype(dtype, 0) + else: + arr = np.array + typ = dtype + + expected = DataFrame( + { + "C": [1, 2, 3], + "A_a": arr([1, 0, 1], dtype=typ), + "A_b": arr([0, 1, 0], dtype=typ), + "B_b": arr([1, 1, 0], dtype=typ), + "B_c": arr([0, 0, 1], dtype=typ), + "cat_x": arr([1, 0, 0], dtype=typ), + "cat_y": arr([0, 1, 1], dtype=typ), + } + ).sort_index(axis=1) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "get_dummies_kwargs,expected", + [ + ( + {"data": pd.DataFrame(({"ä": ["a"]}))}, + pd.DataFrame({"ä_a": [1]}, dtype=np.uint8), + ), + ( + {"data": pd.DataFrame({"x": ["ä"]})}, + pd.DataFrame({"x_ä": [1]}, dtype=np.uint8), + ), + ( + {"data": pd.DataFrame({"x": ["a"]}), "prefix": "ä"}, + pd.DataFrame({"ä_a": [1]}, dtype=np.uint8), + ), + ( + {"data": pd.DataFrame({"x": ["a"]}), "prefix_sep": "ä"}, + pd.DataFrame({"xäa": [1]}, dtype=np.uint8), + ), + ], + ) + def test_dataframe_dummies_unicode(self, get_dummies_kwargs, expected): + # GH22084 pd.get_dummies incorrectly encodes unicode characters + # in dataframe column names + result = get_dummies(**get_dummies_kwargs) + tm.assert_frame_equal(result, expected) + + def test_basic_drop_first(self, sparse): + # GH12402 Add a new parameter `drop_first` to avoid collinearity + # Basic case + s_list = list("abc") + s_series = Series(s_list) + s_series_index = Series(s_list, list("ABC")) + + expected = DataFrame({"b": [0, 1, 0], "c": [0, 0, 1]}, dtype=np.uint8) + + result = get_dummies(s_list, drop_first=True, sparse=sparse) + if sparse: + expected = expected.apply(SparseArray, fill_value=0) + tm.assert_frame_equal(result, expected) + + result = get_dummies(s_series, drop_first=True, sparse=sparse) + tm.assert_frame_equal(result, expected) + + expected.index = list("ABC") + result = get_dummies(s_series_index, drop_first=True, sparse=sparse) + tm.assert_frame_equal(result, expected) + + def test_basic_drop_first_one_level(self, sparse): + # Test the case that categorical variable only has one level. + s_list = list("aaa") + s_series = Series(s_list) + s_series_index = Series(s_list, list("ABC")) + + expected = DataFrame(index=np.arange(3)) + + result = get_dummies(s_list, drop_first=True, sparse=sparse) + tm.assert_frame_equal(result, expected) + + result = get_dummies(s_series, drop_first=True, sparse=sparse) + tm.assert_frame_equal(result, expected) + + expected = DataFrame(index=list("ABC")) + result = get_dummies(s_series_index, drop_first=True, sparse=sparse) + tm.assert_frame_equal(result, expected) + + def test_basic_drop_first_NA(self, sparse): + # Test NA handling together with drop_first + s_NA = ["a", "b", np.nan] + res = get_dummies(s_NA, drop_first=True, sparse=sparse) + exp = DataFrame({"b": [0, 1, 0]}, dtype=np.uint8) + if sparse: + exp = exp.apply(SparseArray, fill_value=0) + + tm.assert_frame_equal(res, exp) + + res_na = get_dummies(s_NA, dummy_na=True, drop_first=True, sparse=sparse) + exp_na = DataFrame({"b": [0, 1, 0], np.nan: [0, 0, 1]}, dtype=np.uint8).reindex( + ["b", np.nan], axis=1 + ) + if sparse: + exp_na = exp_na.apply(SparseArray, fill_value=0) + tm.assert_frame_equal(res_na, exp_na) + + res_just_na = get_dummies( + [np.nan], dummy_na=True, drop_first=True, sparse=sparse + ) + exp_just_na = DataFrame(index=np.arange(1)) + tm.assert_frame_equal(res_just_na, exp_just_na) + + def test_dataframe_dummies_drop_first(self, df, sparse): + df = df[["A", "B"]] + result = get_dummies(df, drop_first=True, sparse=sparse) + expected = DataFrame({"A_b": [0, 1, 0], "B_c": [0, 0, 1]}, dtype=np.uint8) + if sparse: + expected = expected.apply(SparseArray, fill_value=0) + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_drop_first_with_categorical(self, df, sparse, dtype): + df["cat"] = pd.Categorical(["x", "y", "y"]) + result = get_dummies(df, drop_first=True, sparse=sparse) + expected = DataFrame( + {"C": [1, 2, 3], "A_b": [0, 1, 0], "B_c": [0, 0, 1], "cat_y": [0, 1, 1]} + ) + cols = ["A_b", "B_c", "cat_y"] + expected[cols] = expected[cols].astype(np.uint8) + expected = expected[["C", "A_b", "B_c", "cat_y"]] + if sparse: + for col in cols: + expected[col] = SparseArray(expected[col]) + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_drop_first_with_na(self, df, sparse): + df.loc[3, :] = [np.nan, np.nan, np.nan] + result = get_dummies( + df, dummy_na=True, drop_first=True, sparse=sparse + ).sort_index(axis=1) + expected = DataFrame( + { + "C": [1, 2, 3, np.nan], + "A_b": [0, 1, 0, 0], + "A_nan": [0, 0, 0, 1], + "B_c": [0, 0, 1, 0], + "B_nan": [0, 0, 0, 1], + } + ) + cols = ["A_b", "A_nan", "B_c", "B_nan"] + expected[cols] = expected[cols].astype(np.uint8) + expected = expected.sort_index(axis=1) + if sparse: + for col in cols: + expected[col] = SparseArray(expected[col]) + + tm.assert_frame_equal(result, expected) + + result = get_dummies(df, dummy_na=False, drop_first=True, sparse=sparse) + expected = expected[["C", "A_b", "B_c"]] + tm.assert_frame_equal(result, expected) + + def test_int_int(self): + data = Series([1, 2, 1]) + result = pd.get_dummies(data) + expected = DataFrame([[1, 0], [0, 1], [1, 0]], columns=[1, 2], dtype=np.uint8) + tm.assert_frame_equal(result, expected) + + data = Series(pd.Categorical(["a", "b", "a"])) + result = pd.get_dummies(data) + expected = DataFrame( + [[1, 0], [0, 1], [1, 0]], columns=pd.Categorical(["a", "b"]), dtype=np.uint8 + ) + tm.assert_frame_equal(result, expected) + + def test_int_df(self, dtype): + data = DataFrame( + { + "A": [1, 2, 1], + "B": pd.Categorical(["a", "b", "a"]), + "C": [1, 2, 1], + "D": [1.0, 2.0, 1.0], + } + ) + columns = ["C", "D", "A_1", "A_2", "B_a", "B_b"] + expected = DataFrame( + [[1, 1.0, 1, 0, 1, 0], [2, 2.0, 0, 1, 0, 1], [1, 1.0, 1, 0, 1, 0]], + columns=columns, + ) + expected[columns[2:]] = expected[columns[2:]].astype(dtype) + result = pd.get_dummies(data, columns=["A", "B"], dtype=dtype) + tm.assert_frame_equal(result, expected) + + def test_dataframe_dummies_preserve_categorical_dtype(self, dtype): + # GH13854 + for ordered in [False, True]: + cat = pd.Categorical(list("xy"), categories=list("xyz"), ordered=ordered) + result = get_dummies(cat, dtype=dtype) + + data = np.array([[1, 0, 0], [0, 1, 0]], dtype=self.effective_dtype(dtype)) + cols = pd.CategoricalIndex( + cat.categories, categories=cat.categories, ordered=ordered + ) + expected = DataFrame(data, columns=cols, dtype=self.effective_dtype(dtype)) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("sparse", [True, False]) + def test_get_dummies_dont_sparsify_all_columns(self, sparse): + # GH18914 + df = DataFrame.from_dict( + OrderedDict([("GDP", [1, 2]), ("Nation", ["AB", "CD"])]) + ) + df = get_dummies(df, columns=["Nation"], sparse=sparse) + df2 = df.reindex(columns=["GDP"]) + + tm.assert_frame_equal(df[["GDP"]], df2) + + def test_get_dummies_duplicate_columns(self, df): + # GH20839 + df.columns = ["A", "A", "A"] + result = get_dummies(df).sort_index(axis=1) + + expected = DataFrame( + [[1, 1, 0, 1, 0], [2, 0, 1, 1, 0], [3, 1, 0, 0, 1]], + columns=["A", "A_a", "A_b", "A_b", "A_c"], + dtype=np.uint8, + ).sort_index(axis=1) + + expected = expected.astype({"A": np.int64}) + + tm.assert_frame_equal(result, expected) + + def test_get_dummies_all_sparse(self): + df = pd.DataFrame({"A": [1, 2]}) + result = pd.get_dummies(df, columns=["A"], sparse=True) + dtype = SparseDtype("uint8", 0) + expected = pd.DataFrame( + { + "A_1": SparseArray([1, 0], dtype=dtype), + "A_2": SparseArray([0, 1], dtype=dtype), + } + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("values", ["baz"]) + def test_get_dummies_with_string_values(self, values): + # issue #28383 + df = pd.DataFrame( + { + "bar": [1, 2, 3, 4, 5, 6], + "foo": ["one", "one", "one", "two", "two", "two"], + "baz": ["A", "B", "C", "A", "B", "C"], + "zoo": ["x", "y", "z", "q", "w", "t"], + } + ) + + msg = "Input must be a list-like for parameter `columns`" + + with pytest.raises(TypeError, match=msg): + pd.get_dummies(df, columns=values) + + +class TestCategoricalReshape: + def test_reshaping_multi_index_categorical(self): + + cols = ["ItemA", "ItemB", "ItemC"] + data = {c: tm.makeTimeDataFrame() for c in cols} + df = pd.concat({c: data[c].stack() for c in data}, axis="columns") + df.index.names = ["major", "minor"] + df["str"] = "foo" + + df["category"] = df["str"].astype("category") + result = df["category"].unstack() + + dti = df.index.levels[0] + c = Categorical(["foo"] * len(dti)) + expected = DataFrame( + {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()}, + columns=Index(list("ABCD"), name="minor"), + index=dti.rename("major"), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_union_categoricals.py b/pandas/tests/reshape/test_union_categoricals.py new file mode 100644 index 00000000..a503173b --- /dev/null +++ b/pandas/tests/reshape/test_union_categoricals.py @@ -0,0 +1,348 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.concat import union_categoricals + +import pandas as pd +from pandas import Categorical, CategoricalIndex, Series +import pandas._testing as tm + + +class TestUnionCategoricals: + def test_union_categorical(self): + # GH 13361 + data = [ + (list("abc"), list("abd"), list("abcabd")), + ([0, 1, 2], [2, 3, 4], [0, 1, 2, 2, 3, 4]), + ([0, 1.2, 2], [2, 3.4, 4], [0, 1.2, 2, 2, 3.4, 4]), + ( + ["b", "b", np.nan, "a"], + ["a", np.nan, "c"], + ["b", "b", np.nan, "a", "a", np.nan, "c"], + ), + ( + pd.date_range("2014-01-01", "2014-01-05"), + pd.date_range("2014-01-06", "2014-01-07"), + pd.date_range("2014-01-01", "2014-01-07"), + ), + ( + pd.date_range("2014-01-01", "2014-01-05", tz="US/Central"), + pd.date_range("2014-01-06", "2014-01-07", tz="US/Central"), + pd.date_range("2014-01-01", "2014-01-07", tz="US/Central"), + ), + ( + pd.period_range("2014-01-01", "2014-01-05"), + pd.period_range("2014-01-06", "2014-01-07"), + pd.period_range("2014-01-01", "2014-01-07"), + ), + ] + + for a, b, combined in data: + for box in [Categorical, CategoricalIndex, Series]: + result = union_categoricals([box(Categorical(a)), box(Categorical(b))]) + expected = Categorical(combined) + tm.assert_categorical_equal(result, expected, check_category_order=True) + + # new categories ordered by appearance + s = Categorical(["x", "y", "z"]) + s2 = Categorical(["a", "b", "c"]) + result = union_categoricals([s, s2]) + expected = Categorical( + ["x", "y", "z", "a", "b", "c"], categories=["x", "y", "z", "a", "b", "c"] + ) + tm.assert_categorical_equal(result, expected) + + s = Categorical([0, 1.2, 2], ordered=True) + s2 = Categorical([0, 1.2, 2], ordered=True) + result = union_categoricals([s, s2]) + expected = Categorical([0, 1.2, 2, 0, 1.2, 2], ordered=True) + tm.assert_categorical_equal(result, expected) + + # must exactly match types + s = Categorical([0, 1.2, 2]) + s2 = Categorical([2, 3, 4]) + msg = "dtype of categories must be the same" + with pytest.raises(TypeError, match=msg): + union_categoricals([s, s2]) + + msg = "No Categoricals to union" + with pytest.raises(ValueError, match=msg): + union_categoricals([]) + + def test_union_categoricals_nan(self): + # GH 13759 + res = union_categoricals( + [pd.Categorical([1, 2, np.nan]), pd.Categorical([3, 2, np.nan])] + ) + exp = Categorical([1, 2, np.nan, 3, 2, np.nan]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals( + [pd.Categorical(["A", "B"]), pd.Categorical(["B", "B", np.nan])] + ) + exp = Categorical(["A", "B", "B", "B", np.nan]) + tm.assert_categorical_equal(res, exp) + + val1 = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-03-01"), pd.NaT] + val2 = [pd.NaT, pd.Timestamp("2011-01-01"), pd.Timestamp("2011-02-01")] + + res = union_categoricals([pd.Categorical(val1), pd.Categorical(val2)]) + exp = Categorical( + val1 + val2, + categories=[ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-03-01"), + pd.Timestamp("2011-02-01"), + ], + ) + tm.assert_categorical_equal(res, exp) + + # all NaN + res = union_categoricals( + [ + pd.Categorical(np.array([np.nan, np.nan], dtype=object)), + pd.Categorical(["X"]), + ] + ) + exp = Categorical([np.nan, np.nan, "X"]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals( + [pd.Categorical([np.nan, np.nan]), pd.Categorical([np.nan, np.nan])] + ) + exp = Categorical([np.nan, np.nan, np.nan, np.nan]) + tm.assert_categorical_equal(res, exp) + + def test_union_categoricals_empty(self): + # GH 13759 + res = union_categoricals([pd.Categorical([]), pd.Categorical([])]) + exp = Categorical([]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([Categorical([]), Categorical(["1"])]) + exp = Categorical(["1"]) + tm.assert_categorical_equal(res, exp) + + def test_union_categorical_same_category(self): + # check fastpath + c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) + c2 = Categorical([3, 2, 1, np.nan], categories=[1, 2, 3, 4]) + res = union_categoricals([c1, c2]) + exp = Categorical([1, 2, 3, 4, 3, 2, 1, np.nan], categories=[1, 2, 3, 4]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical(["z", "z", "z"], categories=["x", "y", "z"]) + c2 = Categorical(["x", "x", "x"], categories=["x", "y", "z"]) + res = union_categoricals([c1, c2]) + exp = Categorical(["z", "z", "z", "x", "x", "x"], categories=["x", "y", "z"]) + tm.assert_categorical_equal(res, exp) + + def test_union_categorical_same_categories_different_order(self): + # https://github.com/pandas-dev/pandas/issues/19096 + c1 = Categorical(["a", "b", "c"], categories=["a", "b", "c"]) + c2 = Categorical(["a", "b", "c"], categories=["b", "a", "c"]) + result = union_categoricals([c1, c2]) + expected = Categorical( + ["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"] + ) + tm.assert_categorical_equal(result, expected) + + def test_union_categoricals_ordered(self): + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], ordered=False) + + msg = "Categorical.ordered must be the same" + with pytest.raises(TypeError, match=msg): + union_categoricals([c1, c2]) + + res = union_categoricals([c1, c1]) + exp = Categorical([1, 2, 3, 1, 2, 3], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3, np.nan], ordered=True) + c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) + + res = union_categoricals([c1, c2]) + exp = Categorical([1, 2, 3, np.nan, 3, 2], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) + + msg = "to union ordered Categoricals, all categories must be the same" + with pytest.raises(TypeError, match=msg): + union_categoricals([c1, c2]) + + def test_union_categoricals_ignore_order(self): + # GH 15219 + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], ordered=False) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + msg = "Categorical.ordered must be the same" + with pytest.raises(TypeError, match=msg): + union_categoricals([c1, c2], ignore_order=False) + + res = union_categoricals([c1, c1], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c1, c1], ignore_order=False) + exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], ordered=True) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3, np.nan], ordered=True) + c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, np.nan, 3, 2]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True) + + res = union_categoricals([c1, c2], ignore_order=True) + exp = Categorical([1, 2, 3, 1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + res = union_categoricals([c2, c1], ignore_order=True, sort_categories=True) + exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(res, exp) + + c1 = Categorical([1, 2, 3], ordered=True) + c2 = Categorical([4, 5, 6], ordered=True) + result = union_categoricals([c1, c2], ignore_order=True) + expected = Categorical([1, 2, 3, 4, 5, 6]) + tm.assert_categorical_equal(result, expected) + + msg = "to union ordered Categoricals, all categories must be the same" + with pytest.raises(TypeError, match=msg): + union_categoricals([c1, c2], ignore_order=False) + + with pytest.raises(TypeError, match=msg): + union_categoricals([c1, c2]) + + def test_union_categoricals_sort(self): + # GH 13846 + c1 = Categorical(["x", "y", "z"]) + c2 = Categorical(["a", "b", "c"]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical( + ["x", "y", "z", "a", "b", "c"], categories=["a", "b", "c", "x", "y", "z"] + ) + tm.assert_categorical_equal(result, expected) + + # fastpath + c1 = Categorical(["a", "b"], categories=["b", "a", "c"]) + c2 = Categorical(["b", "c"], categories=["b", "a", "c"]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(["a", "b"], categories=["c", "a", "b"]) + c2 = Categorical(["b", "c"], categories=["c", "a", "b"]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + # fastpath - skip resort + c1 = Categorical(["a", "b"], categories=["a", "b", "c"]) + c2 = Categorical(["b", "c"], categories=["a", "b", "c"]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(["x", np.nan]) + c2 = Categorical([np.nan, "b"]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical(["x", np.nan, np.nan, "b"], categories=["b", "x"]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([np.nan]) + c2 = Categorical([np.nan]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical([np.nan, np.nan]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([]) + c2 = Categorical([]) + result = union_categoricals([c1, c2], sort_categories=True) + expected = Categorical([]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(["b", "a"], categories=["b", "a", "c"], ordered=True) + c2 = Categorical(["a", "c"], categories=["b", "a", "c"], ordered=True) + with pytest.raises(TypeError): + union_categoricals([c1, c2], sort_categories=True) + + def test_union_categoricals_sort_false(self): + # GH 13846 + c1 = Categorical(["x", "y", "z"]) + c2 = Categorical(["a", "b", "c"]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical( + ["x", "y", "z", "a", "b", "c"], categories=["x", "y", "z", "a", "b", "c"] + ) + tm.assert_categorical_equal(result, expected) + + # fastpath + c1 = Categorical(["a", "b"], categories=["b", "a", "c"]) + c2 = Categorical(["b", "c"], categories=["b", "a", "c"]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(["a", "b", "b", "c"], categories=["b", "a", "c"]) + tm.assert_categorical_equal(result, expected) + + # fastpath - skip resort + c1 = Categorical(["a", "b"], categories=["a", "b", "c"]) + c2 = Categorical(["b", "c"], categories=["a", "b", "c"]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(["a", "b", "b", "c"], categories=["a", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(["x", np.nan]) + c2 = Categorical([np.nan, "b"]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical(["x", np.nan, np.nan, "b"], categories=["x", "b"]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([np.nan]) + c2 = Categorical([np.nan]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical([np.nan, np.nan]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical([]) + c2 = Categorical([]) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical([]) + tm.assert_categorical_equal(result, expected) + + c1 = Categorical(["b", "a"], categories=["b", "a", "c"], ordered=True) + c2 = Categorical(["a", "c"], categories=["b", "a", "c"], ordered=True) + result = union_categoricals([c1, c2], sort_categories=False) + expected = Categorical( + ["b", "a", "a", "c"], categories=["b", "a", "c"], ordered=True + ) + tm.assert_categorical_equal(result, expected) + + def test_union_categorical_unwrap(self): + # GH 14173 + c1 = Categorical(["a", "b"]) + c2 = pd.Series(["b", "c"], dtype="category") + result = union_categoricals([c1, c2]) + expected = Categorical(["a", "b", "b", "c"]) + tm.assert_categorical_equal(result, expected) + + c2 = CategoricalIndex(c2) + result = union_categoricals([c1, c2]) + tm.assert_categorical_equal(result, expected) + + c1 = Series(c1) + result = union_categoricals([c1, c2]) + tm.assert_categorical_equal(result, expected) + + with pytest.raises(TypeError): + union_categoricals([c1, ["a", "b", "c"]]) diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py new file mode 100644 index 00000000..cd518dda --- /dev/null +++ b/pandas/tests/reshape/test_util.py @@ -0,0 +1,51 @@ +import numpy as np +import pytest + +from pandas import Index, date_range +import pandas._testing as tm +from pandas.core.reshape.util import cartesian_product + + +class TestCartesianProduct: + def test_simple(self): + x, y = list("ABC"), [1, 22] + result1, result2 = cartesian_product([x, y]) + expected1 = np.array(["A", "A", "B", "B", "C", "C"]) + expected2 = np.array([1, 22, 1, 22, 1, 22]) + tm.assert_numpy_array_equal(result1, expected1) + tm.assert_numpy_array_equal(result2, expected2) + + def test_datetimeindex(self): + # regression test for GitHub issue #6439 + # make sure that the ordering on datetimeindex is consistent + x = date_range("2000-01-01", periods=2) + result1, result2 = [Index(y).day for y in cartesian_product([x, x])] + expected1 = Index([1, 1, 2, 2]) + expected2 = Index([1, 2, 1, 2]) + tm.assert_index_equal(result1, expected1) + tm.assert_index_equal(result2, expected2) + + def test_empty(self): + # product of empty factors + X = [[], [0, 1], []] + Y = [[], [], ["a", "b", "c"]] + for x, y in zip(X, Y): + expected1 = np.array([], dtype=np.asarray(x).dtype) + expected2 = np.array([], dtype=np.asarray(y).dtype) + result1, result2 = cartesian_product([x, y]) + tm.assert_numpy_array_equal(result1, expected1) + tm.assert_numpy_array_equal(result2, expected2) + + # empty product (empty input): + result = cartesian_product([]) + expected = [] + assert result == expected + + @pytest.mark.parametrize( + "X", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]] + ) + def test_invalid_input(self, X): + msg = "Input must be a list-like of list-likes" + + with pytest.raises(TypeError, match=msg): + cartesian_product(X=X) diff --git a/pandas/tests/scalar/__init__.py b/pandas/tests/scalar/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/scalar/interval/__init__.py b/pandas/tests/scalar/interval/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/scalar/interval/test_interval.py b/pandas/tests/scalar/interval/test_interval.py new file mode 100644 index 00000000..b51429d0 --- /dev/null +++ b/pandas/tests/scalar/interval/test_interval.py @@ -0,0 +1,259 @@ +import numpy as np +import pytest + +from pandas import Interval, Period, Timedelta, Timestamp +import pandas.core.common as com + + +@pytest.fixture +def interval(): + return Interval(0, 1) + + +class TestInterval: + def test_properties(self, interval): + assert interval.closed == "right" + assert interval.left == 0 + assert interval.right == 1 + assert interval.mid == 0.5 + + def test_repr(self, interval): + assert repr(interval) == "Interval(0, 1, closed='right')" + assert str(interval) == "(0, 1]" + + interval_left = Interval(0, 1, closed="left") + assert repr(interval_left) == "Interval(0, 1, closed='left')" + assert str(interval_left) == "[0, 1)" + + def test_contains(self, interval): + assert 0.5 in interval + assert 1 in interval + assert 0 not in interval + + msg = "__contains__ not defined for two intervals" + with pytest.raises(TypeError, match=msg): + interval in interval + + interval_both = Interval(0, 1, closed="both") + assert 0 in interval_both + assert 1 in interval_both + + interval_neither = Interval(0, 1, closed="neither") + assert 0 not in interval_neither + assert 0.5 in interval_neither + assert 1 not in interval_neither + + def test_equal(self): + assert Interval(0, 1) == Interval(0, 1, closed="right") + assert Interval(0, 1) != Interval(0, 1, closed="left") + assert Interval(0, 1) != 0 + + def test_comparison(self): + with pytest.raises(TypeError, match="unorderable types"): + Interval(0, 1) < 2 + + assert Interval(0, 1) < Interval(1, 2) + assert Interval(0, 1) < Interval(0, 2) + assert Interval(0, 1) < Interval(0.5, 1.5) + assert Interval(0, 1) <= Interval(0, 1) + assert Interval(0, 1) > Interval(-1, 2) + assert Interval(0, 1) >= Interval(0, 1) + + def test_hash(self, interval): + # should not raise + hash(interval) + + @pytest.mark.parametrize( + "left, right, expected", + [ + (0, 5, 5), + (-2, 5.5, 7.5), + (10, 10, 0), + (10, np.inf, np.inf), + (-np.inf, -5, np.inf), + (-np.inf, np.inf, np.inf), + (Timedelta("0 days"), Timedelta("5 days"), Timedelta("5 days")), + (Timedelta("10 days"), Timedelta("10 days"), Timedelta("0 days")), + (Timedelta("1H10M"), Timedelta("5H5M"), Timedelta("3H55M")), + (Timedelta("5S"), Timedelta("1H"), Timedelta("59M55S")), + ], + ) + def test_length(self, left, right, expected): + # GH 18789 + iv = Interval(left, right) + result = iv.length + assert result == expected + + @pytest.mark.parametrize( + "left, right, expected", + [ + ("2017-01-01", "2017-01-06", "5 days"), + ("2017-01-01", "2017-01-01 12:00:00", "12 hours"), + ("2017-01-01 12:00", "2017-01-01 12:00:00", "0 days"), + ("2017-01-01 12:01", "2017-01-05 17:31:00", "4 days 5 hours 30 min"), + ], + ) + @pytest.mark.parametrize("tz", (None, "UTC", "CET", "US/Eastern")) + def test_length_timestamp(self, tz, left, right, expected): + # GH 18789 + iv = Interval(Timestamp(left, tz=tz), Timestamp(right, tz=tz)) + result = iv.length + expected = Timedelta(expected) + assert result == expected + + @pytest.mark.parametrize( + "left, right", + [ + (0, 1), + (Timedelta("0 days"), Timedelta("1 day")), + (Timestamp("2018-01-01"), Timestamp("2018-01-02")), + ( + Timestamp("2018-01-01", tz="US/Eastern"), + Timestamp("2018-01-02", tz="US/Eastern"), + ), + ], + ) + def test_is_empty(self, left, right, closed): + # GH27219 + # non-empty always return False + iv = Interval(left, right, closed) + assert iv.is_empty is False + + # same endpoint is empty except when closed='both' (contains one point) + iv = Interval(left, left, closed) + result = iv.is_empty + expected = closed != "both" + assert result is expected + + @pytest.mark.parametrize( + "left, right", + [ + ("a", "z"), + (("a", "b"), ("c", "d")), + (list("AB"), list("ab")), + (Interval(0, 1), Interval(1, 2)), + (Period("2018Q1", freq="Q"), Period("2018Q1", freq="Q")), + ], + ) + def test_construct_errors(self, left, right): + # GH 23013 + msg = "Only numeric, Timestamp and Timedelta endpoints are allowed" + with pytest.raises(ValueError, match=msg): + Interval(left, right) + + def test_math_add(self, closed): + interval = Interval(0, 1, closed=closed) + expected = Interval(1, 2, closed=closed) + + result = interval + 1 + assert result == expected + + result = 1 + interval + assert result == expected + + result = interval + result += 1 + assert result == expected + + msg = r"unsupported operand type\(s\) for \+" + with pytest.raises(TypeError, match=msg): + interval + interval + + with pytest.raises(TypeError, match=msg): + interval + "foo" + + def test_math_sub(self, closed): + interval = Interval(0, 1, closed=closed) + expected = Interval(-1, 0, closed=closed) + + result = interval - 1 + assert result == expected + + result = interval + result -= 1 + assert result == expected + + msg = r"unsupported operand type\(s\) for -" + with pytest.raises(TypeError, match=msg): + interval - interval + + with pytest.raises(TypeError, match=msg): + interval - "foo" + + def test_math_mult(self, closed): + interval = Interval(0, 1, closed=closed) + expected = Interval(0, 2, closed=closed) + + result = interval * 2 + assert result == expected + + result = 2 * interval + assert result == expected + + result = interval + result *= 2 + assert result == expected + + msg = r"unsupported operand type\(s\) for \*" + with pytest.raises(TypeError, match=msg): + interval * interval + + msg = r"can\'t multiply sequence by non-int" + with pytest.raises(TypeError, match=msg): + interval * "foo" + + def test_math_div(self, closed): + interval = Interval(0, 1, closed=closed) + expected = Interval(0, 0.5, closed=closed) + + result = interval / 2.0 + assert result == expected + + result = interval + result /= 2.0 + assert result == expected + + msg = r"unsupported operand type\(s\) for /" + with pytest.raises(TypeError, match=msg): + interval / interval + + with pytest.raises(TypeError, match=msg): + interval / "foo" + + def test_math_floordiv(self, closed): + interval = Interval(1, 2, closed=closed) + expected = Interval(0, 1, closed=closed) + + result = interval // 2 + assert result == expected + + result = interval + result //= 2 + assert result == expected + + msg = r"unsupported operand type\(s\) for //" + with pytest.raises(TypeError, match=msg): + interval // interval + + with pytest.raises(TypeError, match=msg): + interval // "foo" + + def test_constructor_errors(self): + msg = "invalid option for 'closed': foo" + with pytest.raises(ValueError, match=msg): + Interval(0, 1, closed="foo") + + msg = "left side of interval must be <= right side" + with pytest.raises(ValueError, match=msg): + Interval(1, 0) + + @pytest.mark.parametrize( + "tz_left, tz_right", [(None, "UTC"), ("UTC", None), ("UTC", "US/Eastern")] + ) + def test_constructor_errors_tz(self, tz_left, tz_right): + # GH 18538 + left = Timestamp("2017-01-01", tz=tz_left) + right = Timestamp("2017-01-02", tz=tz_right) + error = TypeError if com.any_none(tz_left, tz_right) else ValueError + with pytest.raises(error): + Interval(left, right) diff --git a/pandas/tests/scalar/interval/test_ops.py b/pandas/tests/scalar/interval/test_ops.py new file mode 100644 index 00000000..2d9f0954 --- /dev/null +++ b/pandas/tests/scalar/interval/test_ops.py @@ -0,0 +1,64 @@ +"""Tests for Interval-Interval operations, such as overlaps, contains, etc.""" +import pytest + +from pandas import Interval, Timedelta, Timestamp + + +@pytest.fixture( + params=[ + (Timedelta("0 days"), Timedelta("1 day")), + (Timestamp("2018-01-01"), Timedelta("1 day")), + (0, 1), + ], + ids=lambda x: type(x[0]).__name__, +) +def start_shift(request): + """ + Fixture for generating intervals of types from a start value and a shift + value that can be added to start to generate an endpoint + """ + return request.param + + +class TestOverlaps: + def test_overlaps_self(self, start_shift, closed): + start, shift = start_shift + interval = Interval(start, start + shift, closed) + assert interval.overlaps(interval) + + def test_overlaps_nested(self, start_shift, closed, other_closed): + start, shift = start_shift + interval1 = Interval(start, start + 3 * shift, other_closed) + interval2 = Interval(start + shift, start + 2 * shift, closed) + + # nested intervals should always overlap + assert interval1.overlaps(interval2) + + def test_overlaps_disjoint(self, start_shift, closed, other_closed): + start, shift = start_shift + interval1 = Interval(start, start + shift, other_closed) + interval2 = Interval(start + 2 * shift, start + 3 * shift, closed) + + # disjoint intervals should never overlap + assert not interval1.overlaps(interval2) + + def test_overlaps_endpoint(self, start_shift, closed, other_closed): + start, shift = start_shift + interval1 = Interval(start, start + shift, other_closed) + interval2 = Interval(start + shift, start + 2 * shift, closed) + + # overlap if shared endpoint is closed for both (overlap at a point) + result = interval1.overlaps(interval2) + expected = interval1.closed_right and interval2.closed_left + assert result == expected + + @pytest.mark.parametrize( + "other", + [10, True, "foo", Timedelta("1 day"), Timestamp("2018-01-01")], + ids=lambda x: type(x).__name__, + ) + def test_overlaps_invalid_type(self, other): + interval = Interval(0, 1) + msg = f"`other` must be an Interval, got {type(other).__name__}" + with pytest.raises(TypeError, match=msg): + interval.overlaps(other) diff --git a/pandas/tests/scalar/period/__init__.py b/pandas/tests/scalar/period/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py new file mode 100644 index 00000000..357274e7 --- /dev/null +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -0,0 +1,780 @@ +import pytest + +from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG, _period_code_map +from pandas.errors import OutOfBoundsDatetime + +from pandas import Period, offsets + + +class TestFreqConversion: + """Test frequency conversion of date objects""" + + @pytest.mark.parametrize("freq", ["A", "Q", "M", "W", "B", "D"]) + def test_asfreq_near_zero(self, freq): + # GH#19643, GH#19650 + per = Period("0001-01-01", freq=freq) + tup1 = (per.year, per.hour, per.day) + + prev = per - 1 + assert prev.ordinal == per.ordinal - 1 + tup2 = (prev.year, prev.month, prev.day) + assert tup2 < tup1 + + def test_asfreq_near_zero_weekly(self): + # GH#19834 + per1 = Period("0001-01-01", "D") + 6 + per2 = Period("0001-01-01", "D") - 6 + week1 = per1.asfreq("W") + week2 = per2.asfreq("W") + assert week1 != week2 + assert week1.asfreq("D", "E") >= per1 + assert week2.asfreq("D", "S") <= per2 + + def test_to_timestamp_out_of_bounds(self): + # GH#19643, used to incorrectly give Timestamp in 1754 + per = Period("0001-01-01", freq="B") + with pytest.raises(OutOfBoundsDatetime): + per.to_timestamp() + + def test_asfreq_corner(self): + val = Period(freq="A", year=2007) + result1 = val.asfreq("5t") + result2 = val.asfreq("t") + expected = Period("2007-12-31 23:59", freq="t") + assert result1.ordinal == expected.ordinal + assert result1.freqstr == "5T" + assert result2.ordinal == expected.ordinal + assert result2.freqstr == "T" + + def test_conv_annual(self): + # frequency conversion tests: from Annual Frequency + + ival_A = Period(freq="A", year=2007) + + ival_AJAN = Period(freq="A-JAN", year=2007) + ival_AJUN = Period(freq="A-JUN", year=2007) + ival_ANOV = Period(freq="A-NOV", year=2007) + + ival_A_to_Q_start = Period(freq="Q", year=2007, quarter=1) + ival_A_to_Q_end = Period(freq="Q", year=2007, quarter=4) + ival_A_to_M_start = Period(freq="M", year=2007, month=1) + ival_A_to_M_end = Period(freq="M", year=2007, month=12) + ival_A_to_W_start = Period(freq="W", year=2007, month=1, day=1) + ival_A_to_W_end = Period(freq="W", year=2007, month=12, day=31) + ival_A_to_B_start = Period(freq="B", year=2007, month=1, day=1) + ival_A_to_B_end = Period(freq="B", year=2007, month=12, day=31) + ival_A_to_D_start = Period(freq="D", year=2007, month=1, day=1) + ival_A_to_D_end = Period(freq="D", year=2007, month=12, day=31) + ival_A_to_H_start = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_A_to_H_end = Period(freq="H", year=2007, month=12, day=31, hour=23) + ival_A_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_A_to_T_end = Period( + freq="Min", year=2007, month=12, day=31, hour=23, minute=59 + ) + ival_A_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_A_to_S_end = Period( + freq="S", year=2007, month=12, day=31, hour=23, minute=59, second=59 + ) + + ival_AJAN_to_D_end = Period(freq="D", year=2007, month=1, day=31) + ival_AJAN_to_D_start = Period(freq="D", year=2006, month=2, day=1) + ival_AJUN_to_D_end = Period(freq="D", year=2007, month=6, day=30) + ival_AJUN_to_D_start = Period(freq="D", year=2006, month=7, day=1) + ival_ANOV_to_D_end = Period(freq="D", year=2007, month=11, day=30) + ival_ANOV_to_D_start = Period(freq="D", year=2006, month=12, day=1) + + assert ival_A.asfreq("Q", "S") == ival_A_to_Q_start + assert ival_A.asfreq("Q", "e") == ival_A_to_Q_end + assert ival_A.asfreq("M", "s") == ival_A_to_M_start + assert ival_A.asfreq("M", "E") == ival_A_to_M_end + assert ival_A.asfreq("W", "S") == ival_A_to_W_start + assert ival_A.asfreq("W", "E") == ival_A_to_W_end + assert ival_A.asfreq("B", "S") == ival_A_to_B_start + assert ival_A.asfreq("B", "E") == ival_A_to_B_end + assert ival_A.asfreq("D", "S") == ival_A_to_D_start + assert ival_A.asfreq("D", "E") == ival_A_to_D_end + assert ival_A.asfreq("H", "S") == ival_A_to_H_start + assert ival_A.asfreq("H", "E") == ival_A_to_H_end + assert ival_A.asfreq("min", "S") == ival_A_to_T_start + assert ival_A.asfreq("min", "E") == ival_A_to_T_end + assert ival_A.asfreq("T", "S") == ival_A_to_T_start + assert ival_A.asfreq("T", "E") == ival_A_to_T_end + assert ival_A.asfreq("S", "S") == ival_A_to_S_start + assert ival_A.asfreq("S", "E") == ival_A_to_S_end + + assert ival_AJAN.asfreq("D", "S") == ival_AJAN_to_D_start + assert ival_AJAN.asfreq("D", "E") == ival_AJAN_to_D_end + + assert ival_AJUN.asfreq("D", "S") == ival_AJUN_to_D_start + assert ival_AJUN.asfreq("D", "E") == ival_AJUN_to_D_end + + assert ival_ANOV.asfreq("D", "S") == ival_ANOV_to_D_start + assert ival_ANOV.asfreq("D", "E") == ival_ANOV_to_D_end + + assert ival_A.asfreq("A") == ival_A + + def test_conv_quarterly(self): + # frequency conversion tests: from Quarterly Frequency + + ival_Q = Period(freq="Q", year=2007, quarter=1) + ival_Q_end_of_year = Period(freq="Q", year=2007, quarter=4) + + ival_QEJAN = Period(freq="Q-JAN", year=2007, quarter=1) + ival_QEJUN = Period(freq="Q-JUN", year=2007, quarter=1) + + ival_Q_to_A = Period(freq="A", year=2007) + ival_Q_to_M_start = Period(freq="M", year=2007, month=1) + ival_Q_to_M_end = Period(freq="M", year=2007, month=3) + ival_Q_to_W_start = Period(freq="W", year=2007, month=1, day=1) + ival_Q_to_W_end = Period(freq="W", year=2007, month=3, day=31) + ival_Q_to_B_start = Period(freq="B", year=2007, month=1, day=1) + ival_Q_to_B_end = Period(freq="B", year=2007, month=3, day=30) + ival_Q_to_D_start = Period(freq="D", year=2007, month=1, day=1) + ival_Q_to_D_end = Period(freq="D", year=2007, month=3, day=31) + ival_Q_to_H_start = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_Q_to_H_end = Period(freq="H", year=2007, month=3, day=31, hour=23) + ival_Q_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_Q_to_T_end = Period( + freq="Min", year=2007, month=3, day=31, hour=23, minute=59 + ) + ival_Q_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_Q_to_S_end = Period( + freq="S", year=2007, month=3, day=31, hour=23, minute=59, second=59 + ) + + ival_QEJAN_to_D_start = Period(freq="D", year=2006, month=2, day=1) + ival_QEJAN_to_D_end = Period(freq="D", year=2006, month=4, day=30) + + ival_QEJUN_to_D_start = Period(freq="D", year=2006, month=7, day=1) + ival_QEJUN_to_D_end = Period(freq="D", year=2006, month=9, day=30) + + assert ival_Q.asfreq("A") == ival_Q_to_A + assert ival_Q_end_of_year.asfreq("A") == ival_Q_to_A + + assert ival_Q.asfreq("M", "S") == ival_Q_to_M_start + assert ival_Q.asfreq("M", "E") == ival_Q_to_M_end + assert ival_Q.asfreq("W", "S") == ival_Q_to_W_start + assert ival_Q.asfreq("W", "E") == ival_Q_to_W_end + assert ival_Q.asfreq("B", "S") == ival_Q_to_B_start + assert ival_Q.asfreq("B", "E") == ival_Q_to_B_end + assert ival_Q.asfreq("D", "S") == ival_Q_to_D_start + assert ival_Q.asfreq("D", "E") == ival_Q_to_D_end + assert ival_Q.asfreq("H", "S") == ival_Q_to_H_start + assert ival_Q.asfreq("H", "E") == ival_Q_to_H_end + assert ival_Q.asfreq("Min", "S") == ival_Q_to_T_start + assert ival_Q.asfreq("Min", "E") == ival_Q_to_T_end + assert ival_Q.asfreq("S", "S") == ival_Q_to_S_start + assert ival_Q.asfreq("S", "E") == ival_Q_to_S_end + + assert ival_QEJAN.asfreq("D", "S") == ival_QEJAN_to_D_start + assert ival_QEJAN.asfreq("D", "E") == ival_QEJAN_to_D_end + assert ival_QEJUN.asfreq("D", "S") == ival_QEJUN_to_D_start + assert ival_QEJUN.asfreq("D", "E") == ival_QEJUN_to_D_end + + assert ival_Q.asfreq("Q") == ival_Q + + def test_conv_monthly(self): + # frequency conversion tests: from Monthly Frequency + + ival_M = Period(freq="M", year=2007, month=1) + ival_M_end_of_year = Period(freq="M", year=2007, month=12) + ival_M_end_of_quarter = Period(freq="M", year=2007, month=3) + ival_M_to_A = Period(freq="A", year=2007) + ival_M_to_Q = Period(freq="Q", year=2007, quarter=1) + ival_M_to_W_start = Period(freq="W", year=2007, month=1, day=1) + ival_M_to_W_end = Period(freq="W", year=2007, month=1, day=31) + ival_M_to_B_start = Period(freq="B", year=2007, month=1, day=1) + ival_M_to_B_end = Period(freq="B", year=2007, month=1, day=31) + ival_M_to_D_start = Period(freq="D", year=2007, month=1, day=1) + ival_M_to_D_end = Period(freq="D", year=2007, month=1, day=31) + ival_M_to_H_start = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_M_to_H_end = Period(freq="H", year=2007, month=1, day=31, hour=23) + ival_M_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_M_to_T_end = Period( + freq="Min", year=2007, month=1, day=31, hour=23, minute=59 + ) + ival_M_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_M_to_S_end = Period( + freq="S", year=2007, month=1, day=31, hour=23, minute=59, second=59 + ) + + assert ival_M.asfreq("A") == ival_M_to_A + assert ival_M_end_of_year.asfreq("A") == ival_M_to_A + assert ival_M.asfreq("Q") == ival_M_to_Q + assert ival_M_end_of_quarter.asfreq("Q") == ival_M_to_Q + + assert ival_M.asfreq("W", "S") == ival_M_to_W_start + assert ival_M.asfreq("W", "E") == ival_M_to_W_end + assert ival_M.asfreq("B", "S") == ival_M_to_B_start + assert ival_M.asfreq("B", "E") == ival_M_to_B_end + assert ival_M.asfreq("D", "S") == ival_M_to_D_start + assert ival_M.asfreq("D", "E") == ival_M_to_D_end + assert ival_M.asfreq("H", "S") == ival_M_to_H_start + assert ival_M.asfreq("H", "E") == ival_M_to_H_end + assert ival_M.asfreq("Min", "S") == ival_M_to_T_start + assert ival_M.asfreq("Min", "E") == ival_M_to_T_end + assert ival_M.asfreq("S", "S") == ival_M_to_S_start + assert ival_M.asfreq("S", "E") == ival_M_to_S_end + + assert ival_M.asfreq("M") == ival_M + + def test_conv_weekly(self): + # frequency conversion tests: from Weekly Frequency + ival_W = Period(freq="W", year=2007, month=1, day=1) + + ival_WSUN = Period(freq="W", year=2007, month=1, day=7) + ival_WSAT = Period(freq="W-SAT", year=2007, month=1, day=6) + ival_WFRI = Period(freq="W-FRI", year=2007, month=1, day=5) + ival_WTHU = Period(freq="W-THU", year=2007, month=1, day=4) + ival_WWED = Period(freq="W-WED", year=2007, month=1, day=3) + ival_WTUE = Period(freq="W-TUE", year=2007, month=1, day=2) + ival_WMON = Period(freq="W-MON", year=2007, month=1, day=1) + + ival_WSUN_to_D_start = Period(freq="D", year=2007, month=1, day=1) + ival_WSUN_to_D_end = Period(freq="D", year=2007, month=1, day=7) + ival_WSAT_to_D_start = Period(freq="D", year=2006, month=12, day=31) + ival_WSAT_to_D_end = Period(freq="D", year=2007, month=1, day=6) + ival_WFRI_to_D_start = Period(freq="D", year=2006, month=12, day=30) + ival_WFRI_to_D_end = Period(freq="D", year=2007, month=1, day=5) + ival_WTHU_to_D_start = Period(freq="D", year=2006, month=12, day=29) + ival_WTHU_to_D_end = Period(freq="D", year=2007, month=1, day=4) + ival_WWED_to_D_start = Period(freq="D", year=2006, month=12, day=28) + ival_WWED_to_D_end = Period(freq="D", year=2007, month=1, day=3) + ival_WTUE_to_D_start = Period(freq="D", year=2006, month=12, day=27) + ival_WTUE_to_D_end = Period(freq="D", year=2007, month=1, day=2) + ival_WMON_to_D_start = Period(freq="D", year=2006, month=12, day=26) + ival_WMON_to_D_end = Period(freq="D", year=2007, month=1, day=1) + + ival_W_end_of_year = Period(freq="W", year=2007, month=12, day=31) + ival_W_end_of_quarter = Period(freq="W", year=2007, month=3, day=31) + ival_W_end_of_month = Period(freq="W", year=2007, month=1, day=31) + ival_W_to_A = Period(freq="A", year=2007) + ival_W_to_Q = Period(freq="Q", year=2007, quarter=1) + ival_W_to_M = Period(freq="M", year=2007, month=1) + + if Period(freq="D", year=2007, month=12, day=31).weekday == 6: + ival_W_to_A_end_of_year = Period(freq="A", year=2007) + else: + ival_W_to_A_end_of_year = Period(freq="A", year=2008) + + if Period(freq="D", year=2007, month=3, day=31).weekday == 6: + ival_W_to_Q_end_of_quarter = Period(freq="Q", year=2007, quarter=1) + else: + ival_W_to_Q_end_of_quarter = Period(freq="Q", year=2007, quarter=2) + + if Period(freq="D", year=2007, month=1, day=31).weekday == 6: + ival_W_to_M_end_of_month = Period(freq="M", year=2007, month=1) + else: + ival_W_to_M_end_of_month = Period(freq="M", year=2007, month=2) + + ival_W_to_B_start = Period(freq="B", year=2007, month=1, day=1) + ival_W_to_B_end = Period(freq="B", year=2007, month=1, day=5) + ival_W_to_D_start = Period(freq="D", year=2007, month=1, day=1) + ival_W_to_D_end = Period(freq="D", year=2007, month=1, day=7) + ival_W_to_H_start = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_W_to_H_end = Period(freq="H", year=2007, month=1, day=7, hour=23) + ival_W_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_W_to_T_end = Period( + freq="Min", year=2007, month=1, day=7, hour=23, minute=59 + ) + ival_W_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_W_to_S_end = Period( + freq="S", year=2007, month=1, day=7, hour=23, minute=59, second=59 + ) + + assert ival_W.asfreq("A") == ival_W_to_A + assert ival_W_end_of_year.asfreq("A") == ival_W_to_A_end_of_year + + assert ival_W.asfreq("Q") == ival_W_to_Q + assert ival_W_end_of_quarter.asfreq("Q") == ival_W_to_Q_end_of_quarter + + assert ival_W.asfreq("M") == ival_W_to_M + assert ival_W_end_of_month.asfreq("M") == ival_W_to_M_end_of_month + + assert ival_W.asfreq("B", "S") == ival_W_to_B_start + assert ival_W.asfreq("B", "E") == ival_W_to_B_end + + assert ival_W.asfreq("D", "S") == ival_W_to_D_start + assert ival_W.asfreq("D", "E") == ival_W_to_D_end + + assert ival_WSUN.asfreq("D", "S") == ival_WSUN_to_D_start + assert ival_WSUN.asfreq("D", "E") == ival_WSUN_to_D_end + assert ival_WSAT.asfreq("D", "S") == ival_WSAT_to_D_start + assert ival_WSAT.asfreq("D", "E") == ival_WSAT_to_D_end + assert ival_WFRI.asfreq("D", "S") == ival_WFRI_to_D_start + assert ival_WFRI.asfreq("D", "E") == ival_WFRI_to_D_end + assert ival_WTHU.asfreq("D", "S") == ival_WTHU_to_D_start + assert ival_WTHU.asfreq("D", "E") == ival_WTHU_to_D_end + assert ival_WWED.asfreq("D", "S") == ival_WWED_to_D_start + assert ival_WWED.asfreq("D", "E") == ival_WWED_to_D_end + assert ival_WTUE.asfreq("D", "S") == ival_WTUE_to_D_start + assert ival_WTUE.asfreq("D", "E") == ival_WTUE_to_D_end + assert ival_WMON.asfreq("D", "S") == ival_WMON_to_D_start + assert ival_WMON.asfreq("D", "E") == ival_WMON_to_D_end + + assert ival_W.asfreq("H", "S") == ival_W_to_H_start + assert ival_W.asfreq("H", "E") == ival_W_to_H_end + assert ival_W.asfreq("Min", "S") == ival_W_to_T_start + assert ival_W.asfreq("Min", "E") == ival_W_to_T_end + assert ival_W.asfreq("S", "S") == ival_W_to_S_start + assert ival_W.asfreq("S", "E") == ival_W_to_S_end + + assert ival_W.asfreq("W") == ival_W + + msg = INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + ival_W.asfreq("WK") + + def test_conv_weekly_legacy(self): + # frequency conversion tests: from Weekly Frequency + msg = INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + Period(freq="WK", year=2007, month=1, day=1) + + with pytest.raises(ValueError, match=msg): + Period(freq="WK-SAT", year=2007, month=1, day=6) + with pytest.raises(ValueError, match=msg): + Period(freq="WK-FRI", year=2007, month=1, day=5) + with pytest.raises(ValueError, match=msg): + Period(freq="WK-THU", year=2007, month=1, day=4) + with pytest.raises(ValueError, match=msg): + Period(freq="WK-WED", year=2007, month=1, day=3) + with pytest.raises(ValueError, match=msg): + Period(freq="WK-TUE", year=2007, month=1, day=2) + with pytest.raises(ValueError, match=msg): + Period(freq="WK-MON", year=2007, month=1, day=1) + + def test_conv_business(self): + # frequency conversion tests: from Business Frequency" + + ival_B = Period(freq="B", year=2007, month=1, day=1) + ival_B_end_of_year = Period(freq="B", year=2007, month=12, day=31) + ival_B_end_of_quarter = Period(freq="B", year=2007, month=3, day=30) + ival_B_end_of_month = Period(freq="B", year=2007, month=1, day=31) + ival_B_end_of_week = Period(freq="B", year=2007, month=1, day=5) + + ival_B_to_A = Period(freq="A", year=2007) + ival_B_to_Q = Period(freq="Q", year=2007, quarter=1) + ival_B_to_M = Period(freq="M", year=2007, month=1) + ival_B_to_W = Period(freq="W", year=2007, month=1, day=7) + ival_B_to_D = Period(freq="D", year=2007, month=1, day=1) + ival_B_to_H_start = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_B_to_H_end = Period(freq="H", year=2007, month=1, day=1, hour=23) + ival_B_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_B_to_T_end = Period( + freq="Min", year=2007, month=1, day=1, hour=23, minute=59 + ) + ival_B_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_B_to_S_end = Period( + freq="S", year=2007, month=1, day=1, hour=23, minute=59, second=59 + ) + + assert ival_B.asfreq("A") == ival_B_to_A + assert ival_B_end_of_year.asfreq("A") == ival_B_to_A + assert ival_B.asfreq("Q") == ival_B_to_Q + assert ival_B_end_of_quarter.asfreq("Q") == ival_B_to_Q + assert ival_B.asfreq("M") == ival_B_to_M + assert ival_B_end_of_month.asfreq("M") == ival_B_to_M + assert ival_B.asfreq("W") == ival_B_to_W + assert ival_B_end_of_week.asfreq("W") == ival_B_to_W + + assert ival_B.asfreq("D") == ival_B_to_D + + assert ival_B.asfreq("H", "S") == ival_B_to_H_start + assert ival_B.asfreq("H", "E") == ival_B_to_H_end + assert ival_B.asfreq("Min", "S") == ival_B_to_T_start + assert ival_B.asfreq("Min", "E") == ival_B_to_T_end + assert ival_B.asfreq("S", "S") == ival_B_to_S_start + assert ival_B.asfreq("S", "E") == ival_B_to_S_end + + assert ival_B.asfreq("B") == ival_B + + def test_conv_daily(self): + # frequency conversion tests: from Business Frequency" + + ival_D = Period(freq="D", year=2007, month=1, day=1) + ival_D_end_of_year = Period(freq="D", year=2007, month=12, day=31) + ival_D_end_of_quarter = Period(freq="D", year=2007, month=3, day=31) + ival_D_end_of_month = Period(freq="D", year=2007, month=1, day=31) + ival_D_end_of_week = Period(freq="D", year=2007, month=1, day=7) + + ival_D_friday = Period(freq="D", year=2007, month=1, day=5) + ival_D_saturday = Period(freq="D", year=2007, month=1, day=6) + ival_D_sunday = Period(freq="D", year=2007, month=1, day=7) + + # TODO: unused? + # ival_D_monday = Period(freq='D', year=2007, month=1, day=8) + + ival_B_friday = Period(freq="B", year=2007, month=1, day=5) + ival_B_monday = Period(freq="B", year=2007, month=1, day=8) + + ival_D_to_A = Period(freq="A", year=2007) + + ival_Deoq_to_AJAN = Period(freq="A-JAN", year=2008) + ival_Deoq_to_AJUN = Period(freq="A-JUN", year=2007) + ival_Deoq_to_ADEC = Period(freq="A-DEC", year=2007) + + ival_D_to_QEJAN = Period(freq="Q-JAN", year=2007, quarter=4) + ival_D_to_QEJUN = Period(freq="Q-JUN", year=2007, quarter=3) + ival_D_to_QEDEC = Period(freq="Q-DEC", year=2007, quarter=1) + + ival_D_to_M = Period(freq="M", year=2007, month=1) + ival_D_to_W = Period(freq="W", year=2007, month=1, day=7) + + ival_D_to_H_start = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_D_to_H_end = Period(freq="H", year=2007, month=1, day=1, hour=23) + ival_D_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_D_to_T_end = Period( + freq="Min", year=2007, month=1, day=1, hour=23, minute=59 + ) + ival_D_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_D_to_S_end = Period( + freq="S", year=2007, month=1, day=1, hour=23, minute=59, second=59 + ) + + assert ival_D.asfreq("A") == ival_D_to_A + + assert ival_D_end_of_quarter.asfreq("A-JAN") == ival_Deoq_to_AJAN + assert ival_D_end_of_quarter.asfreq("A-JUN") == ival_Deoq_to_AJUN + assert ival_D_end_of_quarter.asfreq("A-DEC") == ival_Deoq_to_ADEC + + assert ival_D_end_of_year.asfreq("A") == ival_D_to_A + assert ival_D_end_of_quarter.asfreq("Q") == ival_D_to_QEDEC + assert ival_D.asfreq("Q-JAN") == ival_D_to_QEJAN + assert ival_D.asfreq("Q-JUN") == ival_D_to_QEJUN + assert ival_D.asfreq("Q-DEC") == ival_D_to_QEDEC + assert ival_D.asfreq("M") == ival_D_to_M + assert ival_D_end_of_month.asfreq("M") == ival_D_to_M + assert ival_D.asfreq("W") == ival_D_to_W + assert ival_D_end_of_week.asfreq("W") == ival_D_to_W + + assert ival_D_friday.asfreq("B") == ival_B_friday + assert ival_D_saturday.asfreq("B", "S") == ival_B_friday + assert ival_D_saturday.asfreq("B", "E") == ival_B_monday + assert ival_D_sunday.asfreq("B", "S") == ival_B_friday + assert ival_D_sunday.asfreq("B", "E") == ival_B_monday + + assert ival_D.asfreq("H", "S") == ival_D_to_H_start + assert ival_D.asfreq("H", "E") == ival_D_to_H_end + assert ival_D.asfreq("Min", "S") == ival_D_to_T_start + assert ival_D.asfreq("Min", "E") == ival_D_to_T_end + assert ival_D.asfreq("S", "S") == ival_D_to_S_start + assert ival_D.asfreq("S", "E") == ival_D_to_S_end + + assert ival_D.asfreq("D") == ival_D + + def test_conv_hourly(self): + # frequency conversion tests: from Hourly Frequency" + + ival_H = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_H_end_of_year = Period(freq="H", year=2007, month=12, day=31, hour=23) + ival_H_end_of_quarter = Period(freq="H", year=2007, month=3, day=31, hour=23) + ival_H_end_of_month = Period(freq="H", year=2007, month=1, day=31, hour=23) + ival_H_end_of_week = Period(freq="H", year=2007, month=1, day=7, hour=23) + ival_H_end_of_day = Period(freq="H", year=2007, month=1, day=1, hour=23) + ival_H_end_of_bus = Period(freq="H", year=2007, month=1, day=1, hour=23) + + ival_H_to_A = Period(freq="A", year=2007) + ival_H_to_Q = Period(freq="Q", year=2007, quarter=1) + ival_H_to_M = Period(freq="M", year=2007, month=1) + ival_H_to_W = Period(freq="W", year=2007, month=1, day=7) + ival_H_to_D = Period(freq="D", year=2007, month=1, day=1) + ival_H_to_B = Period(freq="B", year=2007, month=1, day=1) + + ival_H_to_T_start = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0 + ) + ival_H_to_T_end = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=59 + ) + ival_H_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_H_to_S_end = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=59, second=59 + ) + + assert ival_H.asfreq("A") == ival_H_to_A + assert ival_H_end_of_year.asfreq("A") == ival_H_to_A + assert ival_H.asfreq("Q") == ival_H_to_Q + assert ival_H_end_of_quarter.asfreq("Q") == ival_H_to_Q + assert ival_H.asfreq("M") == ival_H_to_M + assert ival_H_end_of_month.asfreq("M") == ival_H_to_M + assert ival_H.asfreq("W") == ival_H_to_W + assert ival_H_end_of_week.asfreq("W") == ival_H_to_W + assert ival_H.asfreq("D") == ival_H_to_D + assert ival_H_end_of_day.asfreq("D") == ival_H_to_D + assert ival_H.asfreq("B") == ival_H_to_B + assert ival_H_end_of_bus.asfreq("B") == ival_H_to_B + + assert ival_H.asfreq("Min", "S") == ival_H_to_T_start + assert ival_H.asfreq("Min", "E") == ival_H_to_T_end + assert ival_H.asfreq("S", "S") == ival_H_to_S_start + assert ival_H.asfreq("S", "E") == ival_H_to_S_end + + assert ival_H.asfreq("H") == ival_H + + def test_conv_minutely(self): + # frequency conversion tests: from Minutely Frequency" + + ival_T = Period(freq="Min", year=2007, month=1, day=1, hour=0, minute=0) + ival_T_end_of_year = Period( + freq="Min", year=2007, month=12, day=31, hour=23, minute=59 + ) + ival_T_end_of_quarter = Period( + freq="Min", year=2007, month=3, day=31, hour=23, minute=59 + ) + ival_T_end_of_month = Period( + freq="Min", year=2007, month=1, day=31, hour=23, minute=59 + ) + ival_T_end_of_week = Period( + freq="Min", year=2007, month=1, day=7, hour=23, minute=59 + ) + ival_T_end_of_day = Period( + freq="Min", year=2007, month=1, day=1, hour=23, minute=59 + ) + ival_T_end_of_bus = Period( + freq="Min", year=2007, month=1, day=1, hour=23, minute=59 + ) + ival_T_end_of_hour = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=59 + ) + + ival_T_to_A = Period(freq="A", year=2007) + ival_T_to_Q = Period(freq="Q", year=2007, quarter=1) + ival_T_to_M = Period(freq="M", year=2007, month=1) + ival_T_to_W = Period(freq="W", year=2007, month=1, day=7) + ival_T_to_D = Period(freq="D", year=2007, month=1, day=1) + ival_T_to_B = Period(freq="B", year=2007, month=1, day=1) + ival_T_to_H = Period(freq="H", year=2007, month=1, day=1, hour=0) + + ival_T_to_S_start = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + ival_T_to_S_end = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=59 + ) + + assert ival_T.asfreq("A") == ival_T_to_A + assert ival_T_end_of_year.asfreq("A") == ival_T_to_A + assert ival_T.asfreq("Q") == ival_T_to_Q + assert ival_T_end_of_quarter.asfreq("Q") == ival_T_to_Q + assert ival_T.asfreq("M") == ival_T_to_M + assert ival_T_end_of_month.asfreq("M") == ival_T_to_M + assert ival_T.asfreq("W") == ival_T_to_W + assert ival_T_end_of_week.asfreq("W") == ival_T_to_W + assert ival_T.asfreq("D") == ival_T_to_D + assert ival_T_end_of_day.asfreq("D") == ival_T_to_D + assert ival_T.asfreq("B") == ival_T_to_B + assert ival_T_end_of_bus.asfreq("B") == ival_T_to_B + assert ival_T.asfreq("H") == ival_T_to_H + assert ival_T_end_of_hour.asfreq("H") == ival_T_to_H + + assert ival_T.asfreq("S", "S") == ival_T_to_S_start + assert ival_T.asfreq("S", "E") == ival_T_to_S_end + + assert ival_T.asfreq("Min") == ival_T + + def test_conv_secondly(self): + # frequency conversion tests: from Secondly Frequency" + + ival_S = Period(freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=0) + ival_S_end_of_year = Period( + freq="S", year=2007, month=12, day=31, hour=23, minute=59, second=59 + ) + ival_S_end_of_quarter = Period( + freq="S", year=2007, month=3, day=31, hour=23, minute=59, second=59 + ) + ival_S_end_of_month = Period( + freq="S", year=2007, month=1, day=31, hour=23, minute=59, second=59 + ) + ival_S_end_of_week = Period( + freq="S", year=2007, month=1, day=7, hour=23, minute=59, second=59 + ) + ival_S_end_of_day = Period( + freq="S", year=2007, month=1, day=1, hour=23, minute=59, second=59 + ) + ival_S_end_of_bus = Period( + freq="S", year=2007, month=1, day=1, hour=23, minute=59, second=59 + ) + ival_S_end_of_hour = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=59, second=59 + ) + ival_S_end_of_minute = Period( + freq="S", year=2007, month=1, day=1, hour=0, minute=0, second=59 + ) + + ival_S_to_A = Period(freq="A", year=2007) + ival_S_to_Q = Period(freq="Q", year=2007, quarter=1) + ival_S_to_M = Period(freq="M", year=2007, month=1) + ival_S_to_W = Period(freq="W", year=2007, month=1, day=7) + ival_S_to_D = Period(freq="D", year=2007, month=1, day=1) + ival_S_to_B = Period(freq="B", year=2007, month=1, day=1) + ival_S_to_H = Period(freq="H", year=2007, month=1, day=1, hour=0) + ival_S_to_T = Period(freq="Min", year=2007, month=1, day=1, hour=0, minute=0) + + assert ival_S.asfreq("A") == ival_S_to_A + assert ival_S_end_of_year.asfreq("A") == ival_S_to_A + assert ival_S.asfreq("Q") == ival_S_to_Q + assert ival_S_end_of_quarter.asfreq("Q") == ival_S_to_Q + assert ival_S.asfreq("M") == ival_S_to_M + assert ival_S_end_of_month.asfreq("M") == ival_S_to_M + assert ival_S.asfreq("W") == ival_S_to_W + assert ival_S_end_of_week.asfreq("W") == ival_S_to_W + assert ival_S.asfreq("D") == ival_S_to_D + assert ival_S_end_of_day.asfreq("D") == ival_S_to_D + assert ival_S.asfreq("B") == ival_S_to_B + assert ival_S_end_of_bus.asfreq("B") == ival_S_to_B + assert ival_S.asfreq("H") == ival_S_to_H + assert ival_S_end_of_hour.asfreq("H") == ival_S_to_H + assert ival_S.asfreq("Min") == ival_S_to_T + assert ival_S_end_of_minute.asfreq("Min") == ival_S_to_T + + assert ival_S.asfreq("S") == ival_S + + def test_asfreq_mult(self): + # normal freq to mult freq + p = Period(freq="A", year=2007) + # ordinal will not change + for freq in ["3A", offsets.YearEnd(3)]: + result = p.asfreq(freq) + expected = Period("2007", freq="3A") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + # ordinal will not change + for freq in ["3A", offsets.YearEnd(3)]: + result = p.asfreq(freq, how="S") + expected = Period("2007", freq="3A") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + + # mult freq to normal freq + p = Period(freq="3A", year=2007) + # ordinal will change because how=E is the default + for freq in ["A", offsets.YearEnd()]: + result = p.asfreq(freq) + expected = Period("2009", freq="A") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + # ordinal will not change + for freq in ["A", offsets.YearEnd()]: + result = p.asfreq(freq, how="S") + expected = Period("2007", freq="A") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + + p = Period(freq="A", year=2007) + for freq in ["2M", offsets.MonthEnd(2)]: + result = p.asfreq(freq) + expected = Period("2007-12", freq="2M") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + for freq in ["2M", offsets.MonthEnd(2)]: + result = p.asfreq(freq, how="S") + expected = Period("2007-01", freq="2M") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + + p = Period(freq="3A", year=2007) + for freq in ["2M", offsets.MonthEnd(2)]: + result = p.asfreq(freq) + expected = Period("2009-12", freq="2M") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + for freq in ["2M", offsets.MonthEnd(2)]: + result = p.asfreq(freq, how="S") + expected = Period("2007-01", freq="2M") + + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + + def test_asfreq_combined(self): + # normal freq to combined freq + p = Period("2007", freq="H") + + # ordinal will not change + expected = Period("2007", freq="25H") + for freq, how in zip(["1D1H", "1H1D"], ["E", "S"]): + result = p.asfreq(freq, how=how) + assert result == expected + assert result.ordinal == expected.ordinal + assert result.freq == expected.freq + + # combined freq to normal freq + p1 = Period(freq="1D1H", year=2007) + p2 = Period(freq="1H1D", year=2007) + + # ordinal will change because how=E is the default + result1 = p1.asfreq("H") + result2 = p2.asfreq("H") + expected = Period("2007-01-02", freq="H") + assert result1 == expected + assert result1.ordinal == expected.ordinal + assert result1.freq == expected.freq + assert result2 == expected + assert result2.ordinal == expected.ordinal + assert result2.freq == expected.freq + + # ordinal will not change + result1 = p1.asfreq("H", how="S") + result2 = p2.asfreq("H", how="S") + expected = Period("2007-01-01", freq="H") + assert result1 == expected + assert result1.ordinal == expected.ordinal + assert result1.freq == expected.freq + assert result2 == expected + assert result2.ordinal == expected.ordinal + assert result2.freq == expected.freq + + def test_asfreq_MS(self): + initial = Period("2013") + + assert initial.asfreq(freq="M", how="S") == Period("2013-01", "M") + + msg = INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + initial.asfreq(freq="MS", how="S") + + with pytest.raises(ValueError, match=msg): + Period("2013-01", "MS") + + assert _period_code_map.get("MS") is None diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py new file mode 100644 index 00000000..6af9c988 --- /dev/null +++ b/pandas/tests/scalar/period/test_period.py @@ -0,0 +1,1567 @@ +from datetime import date, datetime, timedelta +from distutils.version import StrictVersion + +import dateutil +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import iNaT, period as libperiod +from pandas._libs.tslibs.ccalendar import DAYS, MONTHS +from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG +from pandas._libs.tslibs.parsing import DateParseError +from pandas._libs.tslibs.period import IncompatibleFrequency +from pandas._libs.tslibs.timezones import dateutil_gettz, maybe_get_tz +from pandas.compat.numpy import np_datetime64_compat + +import pandas as pd +from pandas import NaT, Period, Timedelta, Timestamp, offsets +import pandas._testing as tm + + +class TestPeriodConstruction: + def test_construction(self): + i1 = Period("1/1/2005", freq="M") + i2 = Period("Jan 2005") + + assert i1 == i2 + + i1 = Period("2005", freq="A") + i2 = Period("2005") + i3 = Period("2005", freq="a") + + assert i1 == i2 + assert i1 == i3 + + i4 = Period("2005", freq="M") + i5 = Period("2005", freq="m") + + msg = r"Input has different freq=M from Period\(freq=A-DEC\)" + with pytest.raises(IncompatibleFrequency, match=msg): + i1 != i4 + assert i4 == i5 + + i1 = Period.now("Q") + i2 = Period(datetime.now(), freq="Q") + i3 = Period.now("q") + + assert i1 == i2 + assert i1 == i3 + + i1 = Period("1982", freq="min") + i2 = Period("1982", freq="MIN") + assert i1 == i2 + i2 = Period("1982", freq=("Min", 1)) + assert i1 == i2 + + i1 = Period(year=2005, month=3, day=1, freq="D") + i2 = Period("3/1/2005", freq="D") + assert i1 == i2 + + i3 = Period(year=2005, month=3, day=1, freq="d") + assert i1 == i3 + + i1 = Period("2007-01-01 09:00:00.001") + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq="L") + assert i1 == expected + + expected = Period(np_datetime64_compat("2007-01-01 09:00:00.001Z"), freq="L") + assert i1 == expected + + i1 = Period("2007-01-01 09:00:00.00101") + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq="U") + assert i1 == expected + + expected = Period(np_datetime64_compat("2007-01-01 09:00:00.00101Z"), freq="U") + assert i1 == expected + + msg = "Must supply freq for ordinal value" + with pytest.raises(ValueError, match=msg): + Period(ordinal=200701) + + with pytest.raises(ValueError, match="Invalid frequency: X"): + Period("2007-1-1", freq="X") + + def test_construction_bday(self): + + # Biz day construction, roll forward if non-weekday + i1 = Period("3/10/12", freq="B") + i2 = Period("3/10/12", freq="D") + assert i1 == i2.asfreq("B") + i2 = Period("3/11/12", freq="D") + assert i1 == i2.asfreq("B") + i2 = Period("3/12/12", freq="D") + assert i1 == i2.asfreq("B") + + i3 = Period("3/10/12", freq="b") + assert i1 == i3 + + i1 = Period(year=2012, month=3, day=10, freq="B") + i2 = Period("3/12/12", freq="B") + assert i1 == i2 + + def test_construction_quarter(self): + + i1 = Period(year=2005, quarter=1, freq="Q") + i2 = Period("1/1/2005", freq="Q") + assert i1 == i2 + + i1 = Period(year=2005, quarter=3, freq="Q") + i2 = Period("9/1/2005", freq="Q") + assert i1 == i2 + + i1 = Period("2005Q1") + i2 = Period(year=2005, quarter=1, freq="Q") + i3 = Period("2005q1") + assert i1 == i2 + assert i1 == i3 + + i1 = Period("05Q1") + assert i1 == i2 + lower = Period("05q1") + assert i1 == lower + + i1 = Period("1Q2005") + assert i1 == i2 + lower = Period("1q2005") + assert i1 == lower + + i1 = Period("1Q05") + assert i1 == i2 + lower = Period("1q05") + assert i1 == lower + + i1 = Period("4Q1984") + assert i1.year == 1984 + lower = Period("4q1984") + assert i1 == lower + + def test_construction_month(self): + + expected = Period("2007-01", freq="M") + i1 = Period("200701", freq="M") + assert i1 == expected + + i1 = Period("200701", freq="M") + assert i1 == expected + + i1 = Period(200701, freq="M") + assert i1 == expected + + i1 = Period(ordinal=200701, freq="M") + assert i1.year == 18695 + + i1 = Period(datetime(2007, 1, 1), freq="M") + i2 = Period("200701", freq="M") + assert i1 == i2 + + i1 = Period(date(2007, 1, 1), freq="M") + i2 = Period(datetime(2007, 1, 1), freq="M") + i3 = Period(np.datetime64("2007-01-01"), freq="M") + i4 = Period(np_datetime64_compat("2007-01-01 00:00:00Z"), freq="M") + i5 = Period(np_datetime64_compat("2007-01-01 00:00:00.000Z"), freq="M") + assert i1 == i2 + assert i1 == i3 + assert i1 == i4 + assert i1 == i5 + + def test_period_constructor_offsets(self): + assert Period("1/1/2005", freq=offsets.MonthEnd()) == Period( + "1/1/2005", freq="M" + ) + assert Period("2005", freq=offsets.YearEnd()) == Period("2005", freq="A") + assert Period("2005", freq=offsets.MonthEnd()) == Period("2005", freq="M") + assert Period("3/10/12", freq=offsets.BusinessDay()) == Period( + "3/10/12", freq="B" + ) + assert Period("3/10/12", freq=offsets.Day()) == Period("3/10/12", freq="D") + + assert Period( + year=2005, quarter=1, freq=offsets.QuarterEnd(startingMonth=12) + ) == Period(year=2005, quarter=1, freq="Q") + assert Period( + year=2005, quarter=2, freq=offsets.QuarterEnd(startingMonth=12) + ) == Period(year=2005, quarter=2, freq="Q") + + assert Period(year=2005, month=3, day=1, freq=offsets.Day()) == Period( + year=2005, month=3, day=1, freq="D" + ) + assert Period(year=2012, month=3, day=10, freq=offsets.BDay()) == Period( + year=2012, month=3, day=10, freq="B" + ) + + expected = Period("2005-03-01", freq="3D") + assert Period(year=2005, month=3, day=1, freq=offsets.Day(3)) == expected + assert Period(year=2005, month=3, day=1, freq="3D") == expected + + assert Period(year=2012, month=3, day=10, freq=offsets.BDay(3)) == Period( + year=2012, month=3, day=10, freq="3B" + ) + + assert Period(200701, freq=offsets.MonthEnd()) == Period(200701, freq="M") + + i1 = Period(ordinal=200701, freq=offsets.MonthEnd()) + i2 = Period(ordinal=200701, freq="M") + assert i1 == i2 + assert i1.year == 18695 + assert i2.year == 18695 + + i1 = Period(datetime(2007, 1, 1), freq="M") + i2 = Period("200701", freq="M") + assert i1 == i2 + + i1 = Period(date(2007, 1, 1), freq="M") + i2 = Period(datetime(2007, 1, 1), freq="M") + i3 = Period(np.datetime64("2007-01-01"), freq="M") + i4 = Period(np_datetime64_compat("2007-01-01 00:00:00Z"), freq="M") + i5 = Period(np_datetime64_compat("2007-01-01 00:00:00.000Z"), freq="M") + assert i1 == i2 + assert i1 == i3 + assert i1 == i4 + assert i1 == i5 + + i1 = Period("2007-01-01 09:00:00.001") + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq="L") + assert i1 == expected + + expected = Period(np_datetime64_compat("2007-01-01 09:00:00.001Z"), freq="L") + assert i1 == expected + + i1 = Period("2007-01-01 09:00:00.00101") + expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq="U") + assert i1 == expected + + expected = Period(np_datetime64_compat("2007-01-01 09:00:00.00101Z"), freq="U") + assert i1 == expected + + def test_invalid_arguments(self): + with pytest.raises(ValueError): + Period(datetime.now()) + with pytest.raises(ValueError): + Period(datetime.now().date()) + + with pytest.raises(ValueError): + Period(1.6, freq="D") + with pytest.raises(ValueError): + Period(ordinal=1.6, freq="D") + with pytest.raises(ValueError): + Period(ordinal=2, value=1, freq="D") + + with pytest.raises(ValueError): + Period(month=1) + + with pytest.raises(ValueError): + Period("-2000", "A") + with pytest.raises(DateParseError): + Period("0", "A") + with pytest.raises(DateParseError): + Period("1/1/-2000", "A") + + def test_constructor_corner(self): + expected = Period("2007-01", freq="2M") + assert Period(year=2007, month=1, freq="2M") == expected + + assert Period(None) is NaT + + p = Period("2007-01-01", freq="D") + + result = Period(p, freq="A") + exp = Period("2007", freq="A") + assert result == exp + + def test_constructor_infer_freq(self): + p = Period("2007-01-01") + assert p.freq == "D" + + p = Period("2007-01-01 07") + assert p.freq == "H" + + p = Period("2007-01-01 07:10") + assert p.freq == "T" + + p = Period("2007-01-01 07:10:15") + assert p.freq == "S" + + p = Period("2007-01-01 07:10:15.123") + assert p.freq == "L" + + p = Period("2007-01-01 07:10:15.123000") + assert p.freq == "L" + + p = Period("2007-01-01 07:10:15.123400") + assert p.freq == "U" + + def test_multiples(self): + result1 = Period("1989", freq="2A") + result2 = Period("1989", freq="A") + assert result1.ordinal == result2.ordinal + assert result1.freqstr == "2A-DEC" + assert result2.freqstr == "A-DEC" + assert result1.freq == offsets.YearEnd(2) + assert result2.freq == offsets.YearEnd() + + assert (result1 + 1).ordinal == result1.ordinal + 2 + assert (1 + result1).ordinal == result1.ordinal + 2 + assert (result1 - 1).ordinal == result2.ordinal - 2 + assert (-1 + result1).ordinal == result2.ordinal - 2 + + @pytest.mark.parametrize("month", MONTHS) + def test_period_cons_quarterly(self, month): + # bugs in scikits.timeseries + freq = "Q-{month}".format(month=month) + exp = Period("1989Q3", freq=freq) + assert "1989Q3" in str(exp) + stamp = exp.to_timestamp("D", how="end") + p = Period(stamp, freq=freq) + assert p == exp + + stamp = exp.to_timestamp("3D", how="end") + p = Period(stamp, freq=freq) + assert p == exp + + @pytest.mark.parametrize("month", MONTHS) + def test_period_cons_annual(self, month): + # bugs in scikits.timeseries + freq = "A-{month}".format(month=month) + exp = Period("1989", freq=freq) + stamp = exp.to_timestamp("D", how="end") + timedelta(days=30) + p = Period(stamp, freq=freq) + + assert p == exp + 1 + assert isinstance(p, Period) + + @pytest.mark.parametrize("day", DAYS) + @pytest.mark.parametrize("num", range(10, 17)) + def test_period_cons_weekly(self, num, day): + daystr = "2011-02-{num}".format(num=num) + freq = "W-{day}".format(day=day) + + result = Period(daystr, freq=freq) + expected = Period(daystr, freq="D").asfreq(freq) + assert result == expected + assert isinstance(result, Period) + + def test_period_from_ordinal(self): + p = Period("2011-01", freq="M") + res = Period._from_ordinal(p.ordinal, freq="M") + assert p == res + assert isinstance(res, Period) + + def test_period_cons_nat(self): + p = Period("NaT", freq="M") + assert p is NaT + + p = Period("nat", freq="W-SUN") + assert p is NaT + + p = Period(iNaT, freq="D") + assert p is NaT + + p = Period(iNaT, freq="3D") + assert p is NaT + + p = Period(iNaT, freq="1D1H") + assert p is NaT + + p = Period("NaT") + assert p is NaT + + p = Period(iNaT) + assert p is NaT + + def test_period_cons_mult(self): + p1 = Period("2011-01", freq="3M") + p2 = Period("2011-01", freq="M") + assert p1.ordinal == p2.ordinal + + assert p1.freq == offsets.MonthEnd(3) + assert p1.freqstr == "3M" + + assert p2.freq == offsets.MonthEnd() + assert p2.freqstr == "M" + + result = p1 + 1 + assert result.ordinal == (p2 + 3).ordinal + + assert result.freq == p1.freq + assert result.freqstr == "3M" + + result = p1 - 1 + assert result.ordinal == (p2 - 3).ordinal + assert result.freq == p1.freq + assert result.freqstr == "3M" + + msg = "Frequency must be positive, because it represents span: -3M" + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="-3M") + + msg = "Frequency must be positive, because it represents span: 0M" + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="0M") + + def test_period_cons_combined(self): + p = [ + ( + Period("2011-01", freq="1D1H"), + Period("2011-01", freq="1H1D"), + Period("2011-01", freq="H"), + ), + ( + Period(ordinal=1, freq="1D1H"), + Period(ordinal=1, freq="1H1D"), + Period(ordinal=1, freq="H"), + ), + ] + + for p1, p2, p3 in p: + assert p1.ordinal == p3.ordinal + assert p2.ordinal == p3.ordinal + + assert p1.freq == offsets.Hour(25) + assert p1.freqstr == "25H" + + assert p2.freq == offsets.Hour(25) + assert p2.freqstr == "25H" + + assert p3.freq == offsets.Hour() + assert p3.freqstr == "H" + + result = p1 + 1 + assert result.ordinal == (p3 + 25).ordinal + assert result.freq == p1.freq + assert result.freqstr == "25H" + + result = p2 + 1 + assert result.ordinal == (p3 + 25).ordinal + assert result.freq == p2.freq + assert result.freqstr == "25H" + + result = p1 - 1 + assert result.ordinal == (p3 - 25).ordinal + assert result.freq == p1.freq + assert result.freqstr == "25H" + + result = p2 - 1 + assert result.ordinal == (p3 - 25).ordinal + assert result.freq == p2.freq + assert result.freqstr == "25H" + + msg = "Frequency must be positive, because it represents span: -25H" + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="-1D1H") + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="-1H1D") + with pytest.raises(ValueError, match=msg): + Period(ordinal=1, freq="-1D1H") + with pytest.raises(ValueError, match=msg): + Period(ordinal=1, freq="-1H1D") + + msg = "Frequency must be positive, because it represents span: 0D" + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="0D0H") + with pytest.raises(ValueError, match=msg): + Period(ordinal=1, freq="0D0H") + + # You can only combine together day and intraday offsets + msg = "Invalid frequency: 1W1D" + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="1W1D") + msg = "Invalid frequency: 1D1W" + with pytest.raises(ValueError, match=msg): + Period("2011-01", freq="1D1W") + + +class TestPeriodMethods: + def test_round_trip(self): + p = Period("2000Q1") + new_p = tm.round_trip_pickle(p) + assert new_p == p + + def test_hash(self): + assert hash(Period("2011-01", freq="M")) == hash(Period("2011-01", freq="M")) + + assert hash(Period("2011-01-01", freq="D")) != hash(Period("2011-01", freq="M")) + + assert hash(Period("2011-01", freq="3M")) != hash(Period("2011-01", freq="2M")) + + assert hash(Period("2011-01", freq="M")) != hash(Period("2011-02", freq="M")) + + # -------------------------------------------------------------- + # to_timestamp + + @pytest.mark.parametrize("tzstr", ["Europe/Brussels", "Asia/Tokyo", "US/Pacific"]) + def test_to_timestamp_tz_arg(self, tzstr): + p = Period("1/1/2005", freq="M").to_timestamp(tz=tzstr) + exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + p = Period("1/1/2005", freq="3H").to_timestamp(tz=tzstr) + exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + p = Period("1/1/2005", freq="A").to_timestamp(freq="A", tz=tzstr) + exp = Timestamp("31/12/2005", tz="UTC").tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + p = Period("1/1/2005", freq="A").to_timestamp(freq="3H", tz=tzstr) + exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) + exp_zone = pytz.timezone(tzstr).normalize(p) + + assert p == exp + assert p.tz == exp_zone.tzinfo + assert p.tz == exp.tz + + @pytest.mark.parametrize( + "tzstr", + ["dateutil/Europe/Brussels", "dateutil/Asia/Tokyo", "dateutil/US/Pacific"], + ) + def test_to_timestamp_tz_arg_dateutil(self, tzstr): + tz = maybe_get_tz(tzstr) + p = Period("1/1/2005", freq="M").to_timestamp(tz=tz) + exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) + assert p == exp + assert p.tz == dateutil_gettz(tzstr.split("/", 1)[1]) + assert p.tz == exp.tz + + p = Period("1/1/2005", freq="M").to_timestamp(freq="3H", tz=tz) + exp = Timestamp("1/1/2005", tz="UTC").tz_convert(tzstr) + assert p == exp + assert p.tz == dateutil_gettz(tzstr.split("/", 1)[1]) + assert p.tz == exp.tz + + def test_to_timestamp_tz_arg_dateutil_from_string(self): + p = Period("1/1/2005", freq="M").to_timestamp(tz="dateutil/Europe/Brussels") + assert p.tz == dateutil_gettz("Europe/Brussels") + + def test_to_timestamp_mult(self): + p = Period("2011-01", freq="M") + assert p.to_timestamp(how="S") == Timestamp("2011-01-01") + expected = Timestamp("2011-02-01") - Timedelta(1, "ns") + assert p.to_timestamp(how="E") == expected + + p = Period("2011-01", freq="3M") + assert p.to_timestamp(how="S") == Timestamp("2011-01-01") + expected = Timestamp("2011-04-01") - Timedelta(1, "ns") + assert p.to_timestamp(how="E") == expected + + def test_to_timestamp(self): + p = Period("1982", freq="A") + start_ts = p.to_timestamp(how="S") + aliases = ["s", "StarT", "BEGIn"] + for a in aliases: + assert start_ts == p.to_timestamp("D", how=a) + # freq with mult should not affect to the result + assert start_ts == p.to_timestamp("3D", how=a) + + end_ts = p.to_timestamp(how="E") + aliases = ["e", "end", "FINIsH"] + for a in aliases: + assert end_ts == p.to_timestamp("D", how=a) + assert end_ts == p.to_timestamp("3D", how=a) + + from_lst = ["A", "Q", "M", "W", "B", "D", "H", "Min", "S"] + + def _ex(p): + return Timestamp((p + p.freq).start_time.value - 1) + + for i, fcode in enumerate(from_lst): + p = Period("1982", freq=fcode) + result = p.to_timestamp().to_period(fcode) + assert result == p + + assert p.start_time == p.to_timestamp(how="S") + + assert p.end_time == _ex(p) + + # Frequency other than daily + + p = Period("1985", freq="A") + + result = p.to_timestamp("H", how="end") + expected = Timestamp(1986, 1, 1) - Timedelta(1, "ns") + assert result == expected + result = p.to_timestamp("3H", how="end") + assert result == expected + + result = p.to_timestamp("T", how="end") + expected = Timestamp(1986, 1, 1) - Timedelta(1, "ns") + assert result == expected + result = p.to_timestamp("2T", how="end") + assert result == expected + + result = p.to_timestamp(how="end") + expected = Timestamp(1986, 1, 1) - Timedelta(1, "ns") + assert result == expected + + expected = datetime(1985, 1, 1) + result = p.to_timestamp("H", how="start") + assert result == expected + result = p.to_timestamp("T", how="start") + assert result == expected + result = p.to_timestamp("S", how="start") + assert result == expected + result = p.to_timestamp("3H", how="start") + assert result == expected + result = p.to_timestamp("5S", how="start") + assert result == expected + + # -------------------------------------------------------------- + # Rendering: __repr__, strftime, etc + + def test_repr(self): + p = Period("Jan-2000") + assert "2000-01" in repr(p) + + p = Period("2000-12-15") + assert "2000-12-15" in repr(p) + + def test_repr_nat(self): + p = Period("nat", freq="M") + assert repr(NaT) in repr(p) + + def test_millisecond_repr(self): + p = Period("2000-01-01 12:15:02.123") + + assert repr(p) == "Period('2000-01-01 12:15:02.123', 'L')" + + def test_microsecond_repr(self): + p = Period("2000-01-01 12:15:02.123567") + + assert repr(p) == "Period('2000-01-01 12:15:02.123567', 'U')" + + def test_strftime(self): + # GH#3363 + p = Period("2000-1-1 12:34:12", freq="S") + res = p.strftime("%Y-%m-%d %H:%M:%S") + assert res == "2000-01-01 12:34:12" + assert isinstance(res, str) + + +class TestPeriodProperties: + "Test properties such as year, month, weekday, etc...." + + @pytest.mark.parametrize("freq", ["A", "M", "D", "H"]) + def test_is_leap_year(self, freq): + # GH 13727 + p = Period("2000-01-01 00:00:00", freq=freq) + assert p.is_leap_year + assert isinstance(p.is_leap_year, bool) + + p = Period("1999-01-01 00:00:00", freq=freq) + assert not p.is_leap_year + + p = Period("2004-01-01 00:00:00", freq=freq) + assert p.is_leap_year + + p = Period("2100-01-01 00:00:00", freq=freq) + assert not p.is_leap_year + + def test_quarterly_negative_ordinals(self): + p = Period(ordinal=-1, freq="Q-DEC") + assert p.year == 1969 + assert p.quarter == 4 + assert isinstance(p, Period) + + p = Period(ordinal=-2, freq="Q-DEC") + assert p.year == 1969 + assert p.quarter == 3 + assert isinstance(p, Period) + + p = Period(ordinal=-2, freq="M") + assert p.year == 1969 + assert p.month == 11 + assert isinstance(p, Period) + + def test_freq_str(self): + i1 = Period("1982", freq="Min") + assert i1.freq == offsets.Minute() + assert i1.freqstr == "T" + + def test_period_deprecated_freq(self): + cases = { + "M": ["MTH", "MONTH", "MONTHLY", "Mth", "month", "monthly"], + "B": ["BUS", "BUSINESS", "BUSINESSLY", "WEEKDAY", "bus"], + "D": ["DAY", "DLY", "DAILY", "Day", "Dly", "Daily"], + "H": ["HR", "HOUR", "HRLY", "HOURLY", "hr", "Hour", "HRly"], + "T": ["minute", "MINUTE", "MINUTELY", "minutely"], + "S": ["sec", "SEC", "SECOND", "SECONDLY", "second"], + "L": ["MILLISECOND", "MILLISECONDLY", "millisecond"], + "U": ["MICROSECOND", "MICROSECONDLY", "microsecond"], + "N": ["NANOSECOND", "NANOSECONDLY", "nanosecond"], + } + + msg = INVALID_FREQ_ERR_MSG + for exp, freqs in cases.items(): + for freq in freqs: + with pytest.raises(ValueError, match=msg): + Period("2016-03-01 09:00", freq=freq) + with pytest.raises(ValueError, match=msg): + Period(ordinal=1, freq=freq) + + # check supported freq-aliases still works + p1 = Period("2016-03-01 09:00", freq=exp) + p2 = Period(ordinal=1, freq=exp) + assert isinstance(p1, Period) + assert isinstance(p2, Period) + + def test_start_time(self): + freq_lst = ["A", "Q", "M", "D", "H", "T", "S"] + xp = datetime(2012, 1, 1) + for f in freq_lst: + p = Period("2012", freq=f) + assert p.start_time == xp + assert Period("2012", freq="B").start_time == datetime(2012, 1, 2) + assert Period("2012", freq="W").start_time == datetime(2011, 12, 26) + + def test_end_time(self): + p = Period("2012", freq="A") + + def _ex(*args): + return Timestamp(Timestamp(datetime(*args)).value - 1) + + xp = _ex(2013, 1, 1) + assert xp == p.end_time + + p = Period("2012", freq="Q") + xp = _ex(2012, 4, 1) + assert xp == p.end_time + + p = Period("2012", freq="M") + xp = _ex(2012, 2, 1) + assert xp == p.end_time + + p = Period("2012", freq="D") + xp = _ex(2012, 1, 2) + assert xp == p.end_time + + p = Period("2012", freq="H") + xp = _ex(2012, 1, 1, 1) + assert xp == p.end_time + + p = Period("2012", freq="B") + xp = _ex(2012, 1, 3) + assert xp == p.end_time + + p = Period("2012", freq="W") + xp = _ex(2012, 1, 2) + assert xp == p.end_time + + # Test for GH 11738 + p = Period("2012", freq="15D") + xp = _ex(2012, 1, 16) + assert xp == p.end_time + + p = Period("2012", freq="1D1H") + xp = _ex(2012, 1, 2, 1) + assert xp == p.end_time + + p = Period("2012", freq="1H1D") + xp = _ex(2012, 1, 2, 1) + assert xp == p.end_time + + def test_anchor_week_end_time(self): + def _ex(*args): + return Timestamp(Timestamp(datetime(*args)).value - 1) + + p = Period("2013-1-1", "W-SAT") + xp = _ex(2013, 1, 6) + assert p.end_time == xp + + def test_properties_annually(self): + # Test properties on Periods with annually frequency. + a_date = Period(freq="A", year=2007) + assert a_date.year == 2007 + + def test_properties_quarterly(self): + # Test properties on Periods with daily frequency. + qedec_date = Period(freq="Q-DEC", year=2007, quarter=1) + qejan_date = Period(freq="Q-JAN", year=2007, quarter=1) + qejun_date = Period(freq="Q-JUN", year=2007, quarter=1) + # + for x in range(3): + for qd in (qedec_date, qejan_date, qejun_date): + assert (qd + x).qyear == 2007 + assert (qd + x).quarter == x + 1 + + def test_properties_monthly(self): + # Test properties on Periods with daily frequency. + m_date = Period(freq="M", year=2007, month=1) + for x in range(11): + m_ival_x = m_date + x + assert m_ival_x.year == 2007 + if 1 <= x + 1 <= 3: + assert m_ival_x.quarter == 1 + elif 4 <= x + 1 <= 6: + assert m_ival_x.quarter == 2 + elif 7 <= x + 1 <= 9: + assert m_ival_x.quarter == 3 + elif 10 <= x + 1 <= 12: + assert m_ival_x.quarter == 4 + assert m_ival_x.month == x + 1 + + def test_properties_weekly(self): + # Test properties on Periods with daily frequency. + w_date = Period(freq="W", year=2007, month=1, day=7) + # + assert w_date.year == 2007 + assert w_date.quarter == 1 + assert w_date.month == 1 + assert w_date.week == 1 + assert (w_date - 1).week == 52 + assert w_date.days_in_month == 31 + assert Period(freq="W", year=2012, month=2, day=1).days_in_month == 29 + + def test_properties_weekly_legacy(self): + # Test properties on Periods with daily frequency. + w_date = Period(freq="W", year=2007, month=1, day=7) + assert w_date.year == 2007 + assert w_date.quarter == 1 + assert w_date.month == 1 + assert w_date.week == 1 + assert (w_date - 1).week == 52 + assert w_date.days_in_month == 31 + + exp = Period(freq="W", year=2012, month=2, day=1) + assert exp.days_in_month == 29 + + msg = INVALID_FREQ_ERR_MSG + with pytest.raises(ValueError, match=msg): + Period(freq="WK", year=2007, month=1, day=7) + + def test_properties_daily(self): + # Test properties on Periods with daily frequency. + b_date = Period(freq="B", year=2007, month=1, day=1) + # + assert b_date.year == 2007 + assert b_date.quarter == 1 + assert b_date.month == 1 + assert b_date.day == 1 + assert b_date.weekday == 0 + assert b_date.dayofyear == 1 + assert b_date.days_in_month == 31 + assert Period(freq="B", year=2012, month=2, day=1).days_in_month == 29 + + d_date = Period(freq="D", year=2007, month=1, day=1) + + assert d_date.year == 2007 + assert d_date.quarter == 1 + assert d_date.month == 1 + assert d_date.day == 1 + assert d_date.weekday == 0 + assert d_date.dayofyear == 1 + assert d_date.days_in_month == 31 + assert Period(freq="D", year=2012, month=2, day=1).days_in_month == 29 + + def test_properties_hourly(self): + # Test properties on Periods with hourly frequency. + h_date1 = Period(freq="H", year=2007, month=1, day=1, hour=0) + h_date2 = Period(freq="2H", year=2007, month=1, day=1, hour=0) + + for h_date in [h_date1, h_date2]: + assert h_date.year == 2007 + assert h_date.quarter == 1 + assert h_date.month == 1 + assert h_date.day == 1 + assert h_date.weekday == 0 + assert h_date.dayofyear == 1 + assert h_date.hour == 0 + assert h_date.days_in_month == 31 + assert ( + Period(freq="H", year=2012, month=2, day=1, hour=0).days_in_month == 29 + ) + + def test_properties_minutely(self): + # Test properties on Periods with minutely frequency. + t_date = Period(freq="Min", year=2007, month=1, day=1, hour=0, minute=0) + # + assert t_date.quarter == 1 + assert t_date.month == 1 + assert t_date.day == 1 + assert t_date.weekday == 0 + assert t_date.dayofyear == 1 + assert t_date.hour == 0 + assert t_date.minute == 0 + assert t_date.days_in_month == 31 + assert ( + Period(freq="D", year=2012, month=2, day=1, hour=0, minute=0).days_in_month + == 29 + ) + + def test_properties_secondly(self): + # Test properties on Periods with secondly frequency. + s_date = Period( + freq="Min", year=2007, month=1, day=1, hour=0, minute=0, second=0 + ) + # + assert s_date.year == 2007 + assert s_date.quarter == 1 + assert s_date.month == 1 + assert s_date.day == 1 + assert s_date.weekday == 0 + assert s_date.dayofyear == 1 + assert s_date.hour == 0 + assert s_date.minute == 0 + assert s_date.second == 0 + assert s_date.days_in_month == 31 + assert ( + Period( + freq="Min", year=2012, month=2, day=1, hour=0, minute=0, second=0 + ).days_in_month + == 29 + ) + + +class TestPeriodField: + def test_get_period_field_array_raises_on_out_of_range(self): + msg = "Buffer dtype mismatch, expected 'int64_t' but got 'double'" + with pytest.raises(ValueError, match=msg): + libperiod.get_period_field_arr(-1, np.empty(1), 0) + + +class TestComparisons: + def setup_method(self, method): + self.january1 = Period("2000-01", "M") + self.january2 = Period("2000-01", "M") + self.february = Period("2000-02", "M") + self.march = Period("2000-03", "M") + self.day = Period("2012-01-01", "D") + + def test_equal(self): + assert self.january1 == self.january2 + + def test_equal_Raises_Value(self): + with pytest.raises(IncompatibleFrequency): + self.january1 == self.day + + def test_notEqual(self): + assert self.january1 != 1 + assert self.january1 != self.february + + def test_greater(self): + assert self.february > self.january1 + + def test_greater_Raises_Value(self): + with pytest.raises(IncompatibleFrequency): + self.january1 > self.day + + def test_greater_Raises_Type(self): + with pytest.raises(TypeError): + self.january1 > 1 + + def test_greaterEqual(self): + assert self.january1 >= self.january2 + + def test_greaterEqual_Raises_Value(self): + with pytest.raises(IncompatibleFrequency): + self.january1 >= self.day + + with pytest.raises(TypeError): + print(self.january1 >= 1) + + def test_smallerEqual(self): + assert self.january1 <= self.january2 + + def test_smallerEqual_Raises_Value(self): + with pytest.raises(IncompatibleFrequency): + self.january1 <= self.day + + def test_smallerEqual_Raises_Type(self): + with pytest.raises(TypeError): + self.january1 <= 1 + + def test_smaller(self): + assert self.january1 < self.february + + def test_smaller_Raises_Value(self): + with pytest.raises(IncompatibleFrequency): + self.january1 < self.day + + def test_smaller_Raises_Type(self): + with pytest.raises(TypeError): + self.january1 < 1 + + def test_sort(self): + periods = [self.march, self.january1, self.february] + correctPeriods = [self.january1, self.february, self.march] + assert sorted(periods) == correctPeriods + + def test_period_nat_comp(self): + p_nat = Period("NaT", freq="D") + p = Period("2011-01-01", freq="D") + + nat = Timestamp("NaT") + t = Timestamp("2011-01-01") + # confirm Period('NaT') work identical with Timestamp('NaT') + for left, right in [ + (p_nat, p), + (p, p_nat), + (p_nat, p_nat), + (nat, t), + (t, nat), + (nat, nat), + ]: + assert not left < right + assert not left > right + assert not left == right + assert left != right + assert not left <= right + assert not left >= right + + +class TestArithmetic: + def test_sub_delta(self): + left, right = Period("2011", freq="A"), Period("2007", freq="A") + result = left - right + assert result == 4 * right.freq + + with pytest.raises(IncompatibleFrequency): + left - Period("2007-01", freq="M") + + def test_add_integer(self): + per1 = Period(freq="D", year=2008, month=1, day=1) + per2 = Period(freq="D", year=2008, month=1, day=2) + assert per1 + 1 == per2 + assert 1 + per1 == per2 + + def test_add_sub_nat(self): + # GH#13071 + p = Period("2011-01", freq="M") + assert p + NaT is NaT + assert NaT + p is NaT + assert p - NaT is NaT + assert NaT - p is NaT + + p = Period("NaT", freq="M") + assert p is NaT + assert p + NaT is NaT + assert NaT + p is NaT + assert p - NaT is NaT + assert NaT - p is NaT + + def test_add_invalid(self): + # GH#4731 + per1 = Period(freq="D", year=2008, month=1, day=1) + per2 = Period(freq="D", year=2008, month=1, day=2) + + msg = r"unsupported operand type\(s\)" + with pytest.raises(TypeError, match=msg): + per1 + "str" + with pytest.raises(TypeError, match=msg): + "str" + per1 + with pytest.raises(TypeError, match=msg): + per1 + per2 + + boxes = [lambda x: x, lambda x: pd.Series([x]), lambda x: pd.Index([x])] + ids = ["identity", "Series", "Index"] + + @pytest.mark.parametrize("lbox", boxes, ids=ids) + @pytest.mark.parametrize("rbox", boxes, ids=ids) + def test_add_timestamp_raises(self, rbox, lbox): + # GH#17983 + ts = Timestamp("2017") + per = Period("2017", freq="M") + + # We may get a different message depending on which class raises + # the error. + msg = ( + r"cannot add|unsupported operand|" + r"can only operate on a|incompatible type|" + r"ufunc add cannot use operands" + ) + with pytest.raises(TypeError, match=msg): + lbox(ts) + rbox(per) + + with pytest.raises(TypeError, match=msg): + lbox(per) + rbox(ts) + + with pytest.raises(TypeError, match=msg): + lbox(per) + rbox(per) + + def test_sub(self): + per1 = Period("2011-01-01", freq="D") + per2 = Period("2011-01-15", freq="D") + + off = per1.freq + assert per1 - per2 == -14 * off + assert per2 - per1 == 14 * off + + msg = r"Input has different freq=M from Period\(freq=D\)" + with pytest.raises(IncompatibleFrequency, match=msg): + per1 - Period("2011-02", freq="M") + + @pytest.mark.parametrize("n", [1, 2, 3, 4]) + def test_sub_n_gt_1_ticks(self, tick_classes, n): + # GH 23878 + p1 = Period("19910905", freq=tick_classes(n)) + p2 = Period("19920406", freq=tick_classes(n)) + + expected = Period(str(p2), freq=p2.freq.base) - Period( + str(p1), freq=p1.freq.base + ) + + assert (p2 - p1) == expected + + @pytest.mark.parametrize("normalize", [True, False]) + @pytest.mark.parametrize("n", [1, 2, 3, 4]) + @pytest.mark.parametrize( + "offset, kwd_name", + [ + (offsets.YearEnd, "month"), + (offsets.QuarterEnd, "startingMonth"), + (offsets.MonthEnd, None), + (offsets.Week, "weekday"), + ], + ) + def test_sub_n_gt_1_offsets(self, offset, kwd_name, n, normalize): + # GH 23878 + kwds = {kwd_name: 3} if kwd_name is not None else {} + p1_d = "19910905" + p2_d = "19920406" + p1 = Period(p1_d, freq=offset(n, normalize, **kwds)) + p2 = Period(p2_d, freq=offset(n, normalize, **kwds)) + + expected = Period(p2_d, freq=p2.freq.base) - Period(p1_d, freq=p1.freq.base) + + assert (p2 - p1) == expected + + def test_add_offset(self): + # freq is DateOffset + for freq in ["A", "2A", "3A"]: + p = Period("2011", freq=freq) + exp = Period("2013", freq=freq) + assert p + offsets.YearEnd(2) == exp + assert offsets.YearEnd(2) + p == exp + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(365, "D"), + timedelta(365), + ]: + with pytest.raises(IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + with pytest.raises(IncompatibleFrequency): + o + p + + for freq in ["M", "2M", "3M"]: + p = Period("2011-03", freq=freq) + exp = Period("2011-05", freq=freq) + assert p + offsets.MonthEnd(2) == exp + assert offsets.MonthEnd(2) + p == exp + + exp = Period("2012-03", freq=freq) + assert p + offsets.MonthEnd(12) == exp + assert offsets.MonthEnd(12) + p == exp + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(365, "D"), + timedelta(365), + ]: + with pytest.raises(IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + with pytest.raises(IncompatibleFrequency): + o + p + + # freq is Tick + for freq in ["D", "2D", "3D"]: + p = Period("2011-04-01", freq=freq) + + exp = Period("2011-04-06", freq=freq) + assert p + offsets.Day(5) == exp + assert offsets.Day(5) + p == exp + + exp = Period("2011-04-02", freq=freq) + assert p + offsets.Hour(24) == exp + assert offsets.Hour(24) + p == exp + + exp = Period("2011-04-03", freq=freq) + assert p + np.timedelta64(2, "D") == exp + with pytest.raises(TypeError): + np.timedelta64(2, "D") + p + + exp = Period("2011-04-02", freq=freq) + assert p + np.timedelta64(3600 * 24, "s") == exp + with pytest.raises(TypeError): + np.timedelta64(3600 * 24, "s") + p + + exp = Period("2011-03-30", freq=freq) + assert p + timedelta(-2) == exp + assert timedelta(-2) + p == exp + + exp = Period("2011-04-03", freq=freq) + assert p + timedelta(hours=48) == exp + assert timedelta(hours=48) + p == exp + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(4, "h"), + timedelta(hours=23), + ]: + with pytest.raises(IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + with pytest.raises(IncompatibleFrequency): + o + p + + for freq in ["H", "2H", "3H"]: + p = Period("2011-04-01 09:00", freq=freq) + + exp = Period("2011-04-03 09:00", freq=freq) + assert p + offsets.Day(2) == exp + assert offsets.Day(2) + p == exp + + exp = Period("2011-04-01 12:00", freq=freq) + assert p + offsets.Hour(3) == exp + assert offsets.Hour(3) + p == exp + + exp = Period("2011-04-01 12:00", freq=freq) + assert p + np.timedelta64(3, "h") == exp + with pytest.raises(TypeError): + np.timedelta64(3, "h") + p + + exp = Period("2011-04-01 10:00", freq=freq) + assert p + np.timedelta64(3600, "s") == exp + with pytest.raises(TypeError): + np.timedelta64(3600, "s") + p + + exp = Period("2011-04-01 11:00", freq=freq) + assert p + timedelta(minutes=120) == exp + assert timedelta(minutes=120) + p == exp + + exp = Period("2011-04-05 12:00", freq=freq) + assert p + timedelta(days=4, minutes=180) == exp + assert timedelta(days=4, minutes=180) + p == exp + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(3200, "s"), + timedelta(hours=23, minutes=30), + ]: + with pytest.raises(IncompatibleFrequency): + p + o + + if isinstance(o, np.timedelta64): + with pytest.raises(TypeError): + o + p + else: + with pytest.raises(IncompatibleFrequency): + o + p + + def test_add_offset_nat(self): + # freq is DateOffset + for freq in ["A", "2A", "3A"]: + p = Period("NaT", freq=freq) + assert p is NaT + for o in [offsets.YearEnd(2)]: + assert p + o is NaT + assert o + p is NaT + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(365, "D"), + timedelta(365), + ]: + assert p + o is NaT + assert o + p is NaT + + for freq in ["M", "2M", "3M"]: + p = Period("NaT", freq=freq) + assert p is NaT + for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: + assert p + o is NaT + assert o + p is NaT + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(365, "D"), + timedelta(365), + ]: + assert p + o is NaT + assert o + p is NaT + + # freq is Tick + for freq in ["D", "2D", "3D"]: + p = Period("NaT", freq=freq) + assert p is NaT + for o in [ + offsets.Day(5), + offsets.Hour(24), + np.timedelta64(2, "D"), + np.timedelta64(3600 * 24, "s"), + timedelta(-2), + timedelta(hours=48), + ]: + assert p + o is NaT + assert o + p is NaT + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(4, "h"), + timedelta(hours=23), + ]: + assert p + o is NaT + assert o + p is NaT + + for freq in ["H", "2H", "3H"]: + p = Period("NaT", freq=freq) + assert p is NaT + for o in [ + offsets.Day(2), + offsets.Hour(3), + np.timedelta64(3, "h"), + np.timedelta64(3600, "s"), + timedelta(minutes=120), + timedelta(days=4, minutes=180), + ]: + assert p + o is NaT + assert o + p is NaT + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(3200, "s"), + timedelta(hours=23, minutes=30), + ]: + assert p + o is NaT + assert o + p is NaT + + def test_sub_offset(self): + # freq is DateOffset + for freq in ["A", "2A", "3A"]: + p = Period("2011", freq=freq) + assert p - offsets.YearEnd(2) == Period("2009", freq=freq) + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(365, "D"), + timedelta(365), + ]: + with pytest.raises(IncompatibleFrequency): + p - o + + for freq in ["M", "2M", "3M"]: + p = Period("2011-03", freq=freq) + assert p - offsets.MonthEnd(2) == Period("2011-01", freq=freq) + assert p - offsets.MonthEnd(12) == Period("2010-03", freq=freq) + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(365, "D"), + timedelta(365), + ]: + with pytest.raises(IncompatibleFrequency): + p - o + + # freq is Tick + for freq in ["D", "2D", "3D"]: + p = Period("2011-04-01", freq=freq) + assert p - offsets.Day(5) == Period("2011-03-27", freq=freq) + assert p - offsets.Hour(24) == Period("2011-03-31", freq=freq) + assert p - np.timedelta64(2, "D") == Period("2011-03-30", freq=freq) + assert p - np.timedelta64(3600 * 24, "s") == Period("2011-03-31", freq=freq) + assert p - timedelta(-2) == Period("2011-04-03", freq=freq) + assert p - timedelta(hours=48) == Period("2011-03-30", freq=freq) + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(4, "h"), + timedelta(hours=23), + ]: + with pytest.raises(IncompatibleFrequency): + p - o + + for freq in ["H", "2H", "3H"]: + p = Period("2011-04-01 09:00", freq=freq) + assert p - offsets.Day(2) == Period("2011-03-30 09:00", freq=freq) + assert p - offsets.Hour(3) == Period("2011-04-01 06:00", freq=freq) + assert p - np.timedelta64(3, "h") == Period("2011-04-01 06:00", freq=freq) + assert p - np.timedelta64(3600, "s") == Period( + "2011-04-01 08:00", freq=freq + ) + assert p - timedelta(minutes=120) == Period("2011-04-01 07:00", freq=freq) + assert p - timedelta(days=4, minutes=180) == Period( + "2011-03-28 06:00", freq=freq + ) + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(3200, "s"), + timedelta(hours=23, minutes=30), + ]: + with pytest.raises(IncompatibleFrequency): + p - o + + def test_sub_offset_nat(self): + # freq is DateOffset + for freq in ["A", "2A", "3A"]: + p = Period("NaT", freq=freq) + assert p is NaT + for o in [offsets.YearEnd(2)]: + assert p - o is NaT + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(365, "D"), + timedelta(365), + ]: + assert p - o is NaT + + for freq in ["M", "2M", "3M"]: + p = Period("NaT", freq=freq) + assert p is NaT + for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: + assert p - o is NaT + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(365, "D"), + timedelta(365), + ]: + assert p - o is NaT + + # freq is Tick + for freq in ["D", "2D", "3D"]: + p = Period("NaT", freq=freq) + assert p is NaT + for o in [ + offsets.Day(5), + offsets.Hour(24), + np.timedelta64(2, "D"), + np.timedelta64(3600 * 24, "s"), + timedelta(-2), + timedelta(hours=48), + ]: + assert p - o is NaT + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(4, "h"), + timedelta(hours=23), + ]: + assert p - o is NaT + + for freq in ["H", "2H", "3H"]: + p = Period("NaT", freq=freq) + assert p is NaT + for o in [ + offsets.Day(2), + offsets.Hour(3), + np.timedelta64(3, "h"), + np.timedelta64(3600, "s"), + timedelta(minutes=120), + timedelta(days=4, minutes=180), + ]: + assert p - o is NaT + + for o in [ + offsets.YearBegin(2), + offsets.MonthBegin(1), + offsets.Minute(), + np.timedelta64(3200, "s"), + timedelta(hours=23, minutes=30), + ]: + assert p - o is NaT + + @pytest.mark.parametrize("freq", ["M", "2M", "3M"]) + def test_nat_ops(self, freq): + p = Period("NaT", freq=freq) + assert p is NaT + assert p + 1 is NaT + assert 1 + p is NaT + assert p - 1 is NaT + assert p - Period("2011-01", freq=freq) is NaT + assert Period("2011-01", freq=freq) - p is NaT + + def test_period_ops_offset(self): + p = Period("2011-04-01", freq="D") + result = p + offsets.Day() + exp = Period("2011-04-02", freq="D") + assert result == exp + + result = p - offsets.Day(2) + exp = Period("2011-03-30", freq="D") + assert result == exp + + msg = r"Input cannot be converted to Period\(freq=D\)" + with pytest.raises(IncompatibleFrequency, match=msg): + p + offsets.Hour(2) + + with pytest.raises(IncompatibleFrequency, match=msg): + p - offsets.Hour(2) + + +def test_period_immutable(): + # see gh-17116 + per = Period("2014Q1") + with pytest.raises(AttributeError): + per.ordinal = 14 + + freq = per.freq + with pytest.raises(AttributeError): + per.freq = 2 * freq + + +@pytest.mark.xfail( + StrictVersion(dateutil.__version__.split(".dev")[0]) < StrictVersion("2.7.0"), + reason="Bug in dateutil < 2.7.0 when parsing old dates: Period('0001-01-07', 'D')", + strict=False, +) +def test_small_year_parsing(): + per1 = Period("0001-01-07", "D") + assert per1.year == 1 + assert per1.day == 7 diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py new file mode 100644 index 00000000..07656de2 --- /dev/null +++ b/pandas/tests/scalar/test_na_scalar.py @@ -0,0 +1,294 @@ +import pickle + +import numpy as np +import pytest + +from pandas._libs.missing import NA + +from pandas.core.dtypes.common import is_scalar + +import pandas as pd +import pandas._testing as tm + + +def test_singleton(): + assert NA is NA + new_NA = type(NA)() + assert new_NA is NA + + +def test_repr(): + assert repr(NA) == "" + assert str(NA) == "" + + +def test_truthiness(): + with pytest.raises(TypeError): + bool(NA) + + with pytest.raises(TypeError): + not NA + + +def test_hashable(): + assert hash(NA) == hash(NA) + d = {NA: "test"} + assert d[NA] == "test" + + +def test_arithmetic_ops(all_arithmetic_functions): + op = all_arithmetic_functions + + for other in [NA, 1, 1.0, "a", np.int64(1), np.nan]: + if op.__name__ in ("pow", "rpow", "rmod") and isinstance(other, str): + continue + if op.__name__ in ("divmod", "rdivmod"): + assert op(NA, other) is (NA, NA) + else: + if op.__name__ == "rpow": + # avoid special case + other += 1 + assert op(NA, other) is NA + + +def test_comparison_ops(): + + for other in [NA, 1, 1.0, "a", np.int64(1), np.nan, np.bool_(True)]: + assert (NA == other) is NA + assert (NA != other) is NA + assert (NA > other) is NA + assert (NA >= other) is NA + assert (NA < other) is NA + assert (NA <= other) is NA + assert (other == NA) is NA + assert (other != NA) is NA + assert (other > NA) is NA + assert (other >= NA) is NA + assert (other < NA) is NA + assert (other <= NA) is NA + + +@pytest.mark.parametrize( + "value", + [ + 0, + 0.0, + -0, + -0.0, + False, + np.bool_(False), + np.int_(0), + np.float_(0), + np.int_(-0), + np.float_(-0), + ], +) +@pytest.mark.parametrize("asarray", [True, False]) +def test_pow_special(value, asarray): + if asarray: + value = np.array([value]) + result = pd.NA ** value + + if asarray: + result = result[0] + else: + # this assertion isn't possible for ndarray. + assert isinstance(result, type(value)) + assert result == 1 + + +@pytest.mark.parametrize( + "value", [1, 1.0, True, np.bool_(True), np.int_(1), np.float_(1)], +) +@pytest.mark.parametrize("asarray", [True, False]) +def test_rpow_special(value, asarray): + if asarray: + value = np.array([value]) + result = value ** pd.NA + + if asarray: + result = result[0] + elif not isinstance(value, (np.float_, np.bool_, np.int_)): + # this assertion isn't possible with asarray=True + assert isinstance(result, type(value)) + + assert result == value + + +@pytest.mark.parametrize( + "value", [-1, -1.0, np.int_(-1), np.float_(-1)], +) +@pytest.mark.parametrize("asarray", [True, False]) +def test_rpow_minus_one(value, asarray): + if asarray: + value = np.array([value]) + result = value ** pd.NA + + if asarray: + result = result[0] + + assert pd.isna(result) + + +def test_unary_ops(): + assert +NA is NA + assert -NA is NA + assert abs(NA) is NA + assert ~NA is NA + + +def test_logical_and(): + + assert NA & True is NA + assert True & NA is NA + assert NA & False is False + assert False & NA is False + assert NA & NA is NA + + with pytest.raises(TypeError): + NA & 5 + + +def test_logical_or(): + + assert NA | True is True + assert True | NA is True + assert NA | False is NA + assert False | NA is NA + assert NA | NA is NA + + with pytest.raises(TypeError): + NA | 5 + + +def test_logical_xor(): + + assert NA ^ True is NA + assert True ^ NA is NA + assert NA ^ False is NA + assert False ^ NA is NA + assert NA ^ NA is NA + + with pytest.raises(TypeError): + NA ^ 5 + + +def test_logical_not(): + assert ~NA is NA + + +@pytest.mark.parametrize( + "shape", [(3,), (3, 3), (1, 2, 3)], +) +def test_arithmetic_ndarray(shape, all_arithmetic_functions): + op = all_arithmetic_functions + a = np.zeros(shape) + if op.__name__ == "pow": + a += 5 + result = op(pd.NA, a) + expected = np.full(a.shape, pd.NA, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + +def test_is_scalar(): + assert is_scalar(NA) is True + + +def test_isna(): + assert pd.isna(NA) is True + assert pd.notna(NA) is False + + +def test_series_isna(): + s = pd.Series([1, NA], dtype=object) + expected = pd.Series([False, True]) + tm.assert_series_equal(s.isna(), expected) + + +def test_ufunc(): + assert np.log(pd.NA) is pd.NA + assert np.add(pd.NA, 1) is pd.NA + result = np.divmod(pd.NA, 1) + assert result[0] is pd.NA and result[1] is pd.NA + + result = np.frexp(pd.NA) + assert result[0] is pd.NA and result[1] is pd.NA + + +def test_ufunc_raises(): + with pytest.raises(ValueError, match="ufunc method 'at'"): + np.log.at(pd.NA, 0) + + +def test_binary_input_not_dunder(): + a = np.array([1, 2, 3]) + expected = np.array([pd.NA, pd.NA, pd.NA], dtype=object) + result = np.logaddexp(a, pd.NA) + tm.assert_numpy_array_equal(result, expected) + + result = np.logaddexp(pd.NA, a) + tm.assert_numpy_array_equal(result, expected) + + # all NA, multiple inputs + assert np.logaddexp(pd.NA, pd.NA) is pd.NA + + result = np.modf(pd.NA, pd.NA) + assert len(result) == 2 + assert all(x is pd.NA for x in result) + + +def test_divmod_ufunc(): + # binary in, binary out. + a = np.array([1, 2, 3]) + expected = np.array([pd.NA, pd.NA, pd.NA], dtype=object) + + result = np.divmod(a, pd.NA) + assert isinstance(result, tuple) + for arr in result: + tm.assert_numpy_array_equal(arr, expected) + tm.assert_numpy_array_equal(arr, expected) + + result = np.divmod(pd.NA, a) + for arr in result: + tm.assert_numpy_array_equal(arr, expected) + tm.assert_numpy_array_equal(arr, expected) + + +def test_integer_hash_collision_dict(): + # GH 30013 + result = {NA: "foo", hash(NA): "bar"} + + assert result[NA] == "foo" + assert result[hash(NA)] == "bar" + + +def test_integer_hash_collision_set(): + # GH 30013 + result = {NA, hash(NA)} + + assert len(result) == 2 + assert NA in result + assert hash(NA) in result + + +def test_pickle_roundtrip(): + # https://github.com/pandas-dev/pandas/issues/31847 + result = pickle.loads(pickle.dumps(pd.NA)) + assert result is pd.NA + + +def test_pickle_roundtrip_pandas(): + result = tm.round_trip_pickle(pd.NA) + assert result is pd.NA + + +@pytest.mark.parametrize( + "values, dtype", [([1, 2, pd.NA], "Int64"), (["A", "B", pd.NA], "string")] +) +@pytest.mark.parametrize("as_frame", [True, False]) +def test_pickle_roundtrip_containers(as_frame, values, dtype): + s = pd.Series(pd.array(values, dtype=dtype)) + if as_frame: + s = s.to_frame(name="A") + result = tm.round_trip_pickle(s) + tm.assert_equal(result, s) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py new file mode 100644 index 00000000..a537f000 --- /dev/null +++ b/pandas/tests/scalar/test_nat.py @@ -0,0 +1,510 @@ +from datetime import datetime, timedelta +import operator + +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import iNaT +import pandas.compat as compat + +from pandas.core.dtypes.common import is_datetime64_any_dtype + +from pandas import ( + DatetimeIndex, + Index, + NaT, + Period, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + isna, +) +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray +from pandas.core.ops import roperator + + +@pytest.mark.parametrize( + "nat,idx", + [ + (Timestamp("NaT"), DatetimeIndex), + (Timedelta("NaT"), TimedeltaIndex), + (Period("NaT", freq="M"), PeriodArray), + ], +) +def test_nat_fields(nat, idx): + + for field in idx._field_ops: + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == "weekday": + continue + + result = getattr(NaT, field) + assert np.isnan(result) + + result = getattr(nat, field) + assert np.isnan(result) + + for field in idx._bool_ops: + + result = getattr(NaT, field) + assert result is False + + result = getattr(nat, field) + assert result is False + + +def test_nat_vector_field_access(): + idx = DatetimeIndex(["1/1/2000", None, None, "1/4/2000"]) + + for field in DatetimeIndex._field_ops: + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == "weekday": + continue + + result = getattr(idx, field) + expected = Index([getattr(x, field) for x in idx]) + tm.assert_index_equal(result, expected) + + ser = Series(idx) + + for field in DatetimeIndex._field_ops: + # weekday is a property of DTI, but a method + # on NaT/Timestamp for compat with datetime + if field == "weekday": + continue + + result = getattr(ser.dt, field) + expected = [getattr(x, field) for x in idx] + tm.assert_series_equal(result, Series(expected)) + + for field in DatetimeIndex._bool_ops: + result = getattr(ser.dt, field) + expected = [getattr(x, field) for x in idx] + tm.assert_series_equal(result, Series(expected)) + + +@pytest.mark.parametrize("klass", [Timestamp, Timedelta, Period]) +@pytest.mark.parametrize("value", [None, np.nan, iNaT, float("nan"), NaT, "NaT", "nat"]) +def test_identity(klass, value): + assert klass(value) is NaT + + +@pytest.mark.parametrize("klass", [Timestamp, Timedelta, Period]) +@pytest.mark.parametrize("value", ["", "nat", "NAT", None, np.nan]) +def test_equality(klass, value): + if klass is Period and value == "": + pytest.skip("Period cannot parse empty string") + + assert klass(value).value == iNaT + + +@pytest.mark.parametrize("klass", [Timestamp, Timedelta]) +@pytest.mark.parametrize("method", ["round", "floor", "ceil"]) +@pytest.mark.parametrize("freq", ["s", "5s", "min", "5min", "h", "5h"]) +def test_round_nat(klass, method, freq): + # see gh-14940 + ts = klass("nat") + + round_method = getattr(ts, method) + assert round_method(freq) is ts + + +@pytest.mark.parametrize( + "method", + [ + "astimezone", + "combine", + "ctime", + "dst", + "fromordinal", + "fromtimestamp", + pytest.param( + "fromisocalendar", + marks=pytest.mark.skipif( + not compat.PY38, + reason="'fromisocalendar' was added in stdlib datetime in python 3.8", + ), + ), + "isocalendar", + "strftime", + "strptime", + "time", + "timestamp", + "timetuple", + "timetz", + "toordinal", + "tzname", + "utcfromtimestamp", + "utcnow", + "utcoffset", + "utctimetuple", + "timestamp", + ], +) +def test_nat_methods_raise(method): + # see gh-9513, gh-17329 + msg = f"NaTType does not support {method}" + + with pytest.raises(ValueError, match=msg): + getattr(NaT, method)() + + +@pytest.mark.parametrize("method", ["weekday", "isoweekday"]) +def test_nat_methods_nan(method): + # see gh-9513, gh-17329 + assert np.isnan(getattr(NaT, method)()) + + +@pytest.mark.parametrize( + "method", ["date", "now", "replace", "today", "tz_convert", "tz_localize"] +) +def test_nat_methods_nat(method): + # see gh-8254, gh-9513, gh-17329 + assert getattr(NaT, method)() is NaT + + +@pytest.mark.parametrize( + "get_nat", [lambda x: NaT, lambda x: Timedelta(x), lambda x: Timestamp(x)] +) +def test_nat_iso_format(get_nat): + # see gh-12300 + assert get_nat("NaT").isoformat() == "NaT" + + +@pytest.mark.parametrize( + "klass,expected", + [ + (Timestamp, ["freqstr", "normalize", "to_julian_date", "to_period", "tz"]), + ( + Timedelta, + [ + "components", + "delta", + "is_populated", + "resolution_string", + "to_pytimedelta", + "to_timedelta64", + "view", + ], + ), + ], +) +def test_missing_public_nat_methods(klass, expected): + # see gh-17327 + # + # NaT should have *most* of the Timestamp and Timedelta methods. + # Here, we check which public methods NaT does not have. We + # ignore any missing private methods. + nat_names = dir(NaT) + klass_names = dir(klass) + + missing = [x for x in klass_names if x not in nat_names and not x.startswith("_")] + missing.sort() + + assert missing == expected + + +def _get_overlap_public_nat_methods(klass, as_tuple=False): + """ + Get overlapping public methods between NaT and another class. + + Parameters + ---------- + klass : type + The class to compare with NaT + as_tuple : bool, default False + Whether to return a list of tuples of the form (klass, method). + + Returns + ------- + overlap : list + """ + nat_names = dir(NaT) + klass_names = dir(klass) + + overlap = [ + x + for x in nat_names + if x in klass_names and not x.startswith("_") and callable(getattr(klass, x)) + ] + + # Timestamp takes precedence over Timedelta in terms of overlap. + if klass is Timedelta: + ts_names = dir(Timestamp) + overlap = [x for x in overlap if x not in ts_names] + + if as_tuple: + overlap = [(klass, method) for method in overlap] + + overlap.sort() + return overlap + + +@pytest.mark.parametrize( + "klass,expected", + [ + ( + Timestamp, + [ + "astimezone", + "ceil", + "combine", + "ctime", + "date", + "day_name", + "dst", + "floor", + "fromisocalendar", + "fromisoformat", + "fromordinal", + "fromtimestamp", + "isocalendar", + "isoformat", + "isoweekday", + "month_name", + "now", + "replace", + "round", + "strftime", + "strptime", + "time", + "timestamp", + "timetuple", + "timetz", + "to_datetime64", + "to_numpy", + "to_pydatetime", + "today", + "toordinal", + "tz_convert", + "tz_localize", + "tzname", + "utcfromtimestamp", + "utcnow", + "utcoffset", + "utctimetuple", + "weekday", + ], + ), + (Timedelta, ["total_seconds"]), + ], +) +def test_overlap_public_nat_methods(klass, expected): + # see gh-17327 + # + # NaT should have *most* of the Timestamp and Timedelta methods. + # In case when Timestamp, Timedelta, and NaT are overlap, the overlap + # is considered to be with Timestamp and NaT, not Timedelta. + + # "fromisoformat" was introduced in 3.7 + if klass is Timestamp and not compat.PY37: + expected.remove("fromisoformat") + + # "fromisocalendar" was introduced in 3.8 + if klass is Timestamp and not compat.PY38: + expected.remove("fromisocalendar") + + assert _get_overlap_public_nat_methods(klass) == expected + + +@pytest.mark.parametrize( + "compare", + ( + _get_overlap_public_nat_methods(Timestamp, True) + + _get_overlap_public_nat_methods(Timedelta, True) + ), +) +def test_nat_doc_strings(compare): + # see gh-17327 + # + # The docstrings for overlapping methods should match. + klass, method = compare + klass_doc = getattr(klass, method).__doc__ + + nat_doc = getattr(NaT, method).__doc__ + assert klass_doc == nat_doc + + +_ops = { + "left_plus_right": lambda a, b: a + b, + "right_plus_left": lambda a, b: b + a, + "left_minus_right": lambda a, b: a - b, + "right_minus_left": lambda a, b: b - a, + "left_times_right": lambda a, b: a * b, + "right_times_left": lambda a, b: b * a, + "left_div_right": lambda a, b: a / b, + "right_div_left": lambda a, b: b / a, +} + + +@pytest.mark.parametrize("op_name", list(_ops.keys())) +@pytest.mark.parametrize( + "value,val_type", + [ + (2, "scalar"), + (1.5, "floating"), + (np.nan, "floating"), + ("foo", "str"), + (timedelta(3600), "timedelta"), + (Timedelta("5s"), "timedelta"), + (datetime(2014, 1, 1), "timestamp"), + (Timestamp("2014-01-01"), "timestamp"), + (Timestamp("2014-01-01", tz="UTC"), "timestamp"), + (Timestamp("2014-01-01", tz="US/Eastern"), "timestamp"), + (pytz.timezone("Asia/Tokyo").localize(datetime(2014, 1, 1)), "timestamp"), + ], +) +def test_nat_arithmetic_scalar(op_name, value, val_type): + # see gh-6873 + invalid_ops = { + "scalar": {"right_div_left"}, + "floating": { + "right_div_left", + "left_minus_right", + "right_minus_left", + "left_plus_right", + "right_plus_left", + }, + "str": set(_ops.keys()), + "timedelta": {"left_times_right", "right_times_left"}, + "timestamp": { + "left_times_right", + "right_times_left", + "left_div_right", + "right_div_left", + }, + } + + op = _ops[op_name] + + if op_name in invalid_ops.get(val_type, set()): + if ( + val_type == "timedelta" + and "times" in op_name + and isinstance(value, Timedelta) + ): + msg = "Cannot multiply" + elif val_type == "str": + # un-specific check here because the message comes from str + # and varies by method + msg = ( + "can only concatenate str|" + "unsupported operand type|" + "can't multiply sequence|" + "Can't convert 'NaTType'|" + "must be str, not NaTType" + ) + else: + msg = "unsupported operand type" + + with pytest.raises(TypeError, match=msg): + op(NaT, value) + else: + if val_type == "timedelta" and "div" in op_name: + expected = np.nan + else: + expected = NaT + + assert op(NaT, value) is expected + + +@pytest.mark.parametrize( + "val,expected", [(np.nan, NaT), (NaT, np.nan), (np.timedelta64("NaT"), np.nan)] +) +def test_nat_rfloordiv_timedelta(val, expected): + # see gh-#18846 + # + # See also test_timedelta.TestTimedeltaArithmetic.test_floordiv + td = Timedelta(hours=3, minutes=4) + assert td // val is expected + + +@pytest.mark.parametrize( + "op_name", + ["left_plus_right", "right_plus_left", "left_minus_right", "right_minus_left"], +) +@pytest.mark.parametrize( + "value", + [ + DatetimeIndex(["2011-01-01", "2011-01-02"], name="x"), + DatetimeIndex(["2011-01-01", "2011-01-02"], tz="US/Eastern", name="x"), + DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"]), + DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"], tz="US/Pacific"), + TimedeltaIndex(["1 day", "2 day"], name="x"), + ], +) +def test_nat_arithmetic_index(op_name, value): + # see gh-11718 + exp_name = "x" + exp_data = [NaT] * 2 + + if is_datetime64_any_dtype(value.dtype) and "plus" in op_name: + expected = DatetimeIndex(exp_data, tz=value.tz, name=exp_name) + else: + expected = TimedeltaIndex(exp_data, name=exp_name) + + if not isinstance(value, Index): + expected = expected.array + + op = _ops[op_name] + result = op(NaT, value) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "op_name", + ["left_plus_right", "right_plus_left", "left_minus_right", "right_minus_left"], +) +@pytest.mark.parametrize("box", [TimedeltaIndex, Series, TimedeltaArray._from_sequence]) +def test_nat_arithmetic_td64_vector(op_name, box): + # see gh-19124 + vec = box(["1 day", "2 day"], dtype="timedelta64[ns]") + box_nat = box([NaT, NaT], dtype="timedelta64[ns]") + tm.assert_equal(_ops[op_name](vec, NaT), box_nat) + + +@pytest.mark.parametrize( + "dtype,op,out_dtype", + [ + ("datetime64[ns]", operator.add, "datetime64[ns]"), + ("datetime64[ns]", roperator.radd, "datetime64[ns]"), + ("datetime64[ns]", operator.sub, "timedelta64[ns]"), + ("datetime64[ns]", roperator.rsub, "timedelta64[ns]"), + ("timedelta64[ns]", operator.add, "datetime64[ns]"), + ("timedelta64[ns]", roperator.radd, "datetime64[ns]"), + ("timedelta64[ns]", operator.sub, "datetime64[ns]"), + ("timedelta64[ns]", roperator.rsub, "timedelta64[ns]"), + ], +) +def test_nat_arithmetic_ndarray(dtype, op, out_dtype): + other = np.arange(10).astype(dtype) + result = op(NaT, other) + + expected = np.empty(other.shape, dtype=out_dtype) + expected.fill("NaT") + tm.assert_numpy_array_equal(result, expected) + + +def test_nat_pinned_docstrings(): + # see gh-17327 + assert NaT.ctime.__doc__ == datetime.ctime.__doc__ + + +def test_to_numpy_alias(): + # GH 24653: alias .to_numpy() for scalars + expected = NaT.to_datetime64() + result = NaT.to_numpy() + + assert isna(expected) and isna(result) + + +@pytest.mark.parametrize("other", [Timedelta(0), Timestamp(0)]) +def test_nat_comparisons(compare_operators_no_eq_ne, other): + # GH 26039 + assert getattr(NaT, compare_operators_no_eq_ne)(other) is False + assert getattr(other, compare_operators_no_eq_ne)(NaT) is False diff --git a/pandas/tests/scalar/timedelta/__init__.py b/pandas/tests/scalar/timedelta/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py new file mode 100644 index 00000000..555b47c8 --- /dev/null +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -0,0 +1,758 @@ +""" +Tests for scalar Timedelta arithmetic ops +""" +from datetime import datetime, timedelta +import operator + +import numpy as np +import pytest + +import pandas as pd +from pandas import NaT, Timedelta, Timestamp, _is_numpy_dev, offsets +import pandas._testing as tm +from pandas.core import ops + + +class TestTimedeltaAdditionSubtraction: + """ + Tests for Timedelta methods: + + __add__, __radd__, + __sub__, __rsub__ + """ + + @pytest.mark.parametrize( + "ten_seconds", + [ + Timedelta(10, unit="s"), + timedelta(seconds=10), + np.timedelta64(10, "s"), + np.timedelta64(10000000000, "ns"), + offsets.Second(10), + ], + ) + def test_td_add_sub_ten_seconds(self, ten_seconds): + # GH#6808 + base = Timestamp("20130101 09:01:12.123456") + expected_add = Timestamp("20130101 09:01:22.123456") + expected_sub = Timestamp("20130101 09:01:02.123456") + + result = base + ten_seconds + assert result == expected_add + + result = base - ten_seconds + assert result == expected_sub + + @pytest.mark.parametrize( + "one_day_ten_secs", + [ + Timedelta("1 day, 00:00:10"), + Timedelta("1 days, 00:00:10"), + timedelta(days=1, seconds=10), + np.timedelta64(1, "D") + np.timedelta64(10, "s"), + offsets.Day() + offsets.Second(10), + ], + ) + def test_td_add_sub_one_day_ten_seconds(self, one_day_ten_secs): + # GH#6808 + base = Timestamp("20130102 09:01:12.123456") + expected_add = Timestamp("20130103 09:01:22.123456") + expected_sub = Timestamp("20130101 09:01:02.123456") + + result = base + one_day_ten_secs + assert result == expected_add + + result = base - one_day_ten_secs + assert result == expected_sub + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_datetimelike_scalar(self, op): + # GH#19738 + td = Timedelta(10, unit="d") + + result = op(td, datetime(2016, 1, 1)) + if op is operator.add: + # datetime + Timedelta does _not_ call Timedelta.__radd__, + # so we get a datetime back instead of a Timestamp + assert isinstance(result, Timestamp) + assert result == Timestamp(2016, 1, 11) + + result = op(td, Timestamp("2018-01-12 18:09")) + assert isinstance(result, Timestamp) + assert result == Timestamp("2018-01-22 18:09") + + result = op(td, np.datetime64("2018-01-12")) + assert isinstance(result, Timestamp) + assert result == Timestamp("2018-01-22") + + result = op(td, NaT) + assert result is NaT + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_td(self, op): + td = Timedelta(10, unit="d") + + result = op(td, Timedelta(days=10)) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=20) + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_pytimedelta(self, op): + td = Timedelta(10, unit="d") + result = op(td, timedelta(days=9)) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=19) + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_timedelta64(self, op): + td = Timedelta(10, unit="d") + result = op(td, np.timedelta64(-4, "D")) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=6) + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_offset(self, op): + td = Timedelta(10, unit="d") + + result = op(td, offsets.Hour(6)) + assert isinstance(result, Timedelta) + assert result == Timedelta(days=10, hours=6) + + def test_td_sub_td(self): + td = Timedelta(10, unit="d") + expected = Timedelta(0, unit="ns") + result = td - td + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_sub_pytimedelta(self): + td = Timedelta(10, unit="d") + expected = Timedelta(0, unit="ns") + + result = td - td.to_pytimedelta() + assert isinstance(result, Timedelta) + assert result == expected + + result = td.to_pytimedelta() - td + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_sub_timedelta64(self): + td = Timedelta(10, unit="d") + expected = Timedelta(0, unit="ns") + + result = td - td.to_timedelta64() + assert isinstance(result, Timedelta) + assert result == expected + + result = td.to_timedelta64() - td + assert isinstance(result, Timedelta) + assert result == expected + + def test_td_sub_nat(self): + # In this context pd.NaT is treated as timedelta-like + td = Timedelta(10, unit="d") + result = td - NaT + assert result is NaT + + def test_td_sub_td64_nat(self): + td = Timedelta(10, unit="d") + td_nat = np.timedelta64("NaT") + + result = td - td_nat + assert result is NaT + + result = td_nat - td + assert result is NaT + + def test_td_sub_offset(self): + td = Timedelta(10, unit="d") + result = td - offsets.Hour(1) + assert isinstance(result, Timedelta) + assert result == Timedelta(239, unit="h") + + def test_td_add_sub_numeric_raises(self): + td = Timedelta(10, unit="d") + for other in [2, 2.0, np.int64(2), np.float64(2)]: + with pytest.raises(TypeError): + td + other + with pytest.raises(TypeError): + other + td + with pytest.raises(TypeError): + td - other + with pytest.raises(TypeError): + other - td + + def test_td_rsub_nat(self): + td = Timedelta(10, unit="d") + result = NaT - td + assert result is NaT + + result = np.datetime64("NaT") - td + assert result is NaT + + def test_td_rsub_offset(self): + result = offsets.Hour(1) - Timedelta(10, unit="d") + assert isinstance(result, Timedelta) + assert result == Timedelta(-239, unit="h") + + def test_td_sub_timedeltalike_object_dtype_array(self): + # GH#21980 + arr = np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")]) + exp = np.array([Timestamp("20121231 9:01"), Timestamp("20121229 9:02")]) + res = arr - Timedelta("1D") + tm.assert_numpy_array_equal(res, exp) + + def test_td_sub_mixed_most_timedeltalike_object_dtype_array(self): + # GH#21980 + now = Timestamp.now() + arr = np.array([now, Timedelta("1D"), np.timedelta64(2, "h")]) + exp = np.array( + [ + now - Timedelta("1D"), + Timedelta("0D"), + np.timedelta64(2, "h") - Timedelta("1D"), + ] + ) + res = arr - Timedelta("1D") + tm.assert_numpy_array_equal(res, exp) + + def test_td_rsub_mixed_most_timedeltalike_object_dtype_array(self): + # GH#21980 + now = Timestamp.now() + arr = np.array([now, Timedelta("1D"), np.timedelta64(2, "h")]) + with pytest.raises(TypeError): + Timedelta("1D") - arr + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_timedeltalike_object_dtype_array(self, op): + # GH#21980 + arr = np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")]) + exp = np.array([Timestamp("20130102 9:01"), Timestamp("20121231 9:02")]) + res = op(arr, Timedelta("1D")) + tm.assert_numpy_array_equal(res, exp) + + @pytest.mark.parametrize("op", [operator.add, ops.radd]) + def test_td_add_mixed_timedeltalike_object_dtype_array(self, op): + # GH#21980 + now = Timestamp.now() + arr = np.array([now, Timedelta("1D")]) + exp = np.array([now + Timedelta("1D"), Timedelta("2D")]) + res = op(arr, Timedelta("1D")) + tm.assert_numpy_array_equal(res, exp) + + # TODO: moved from index tests following #24365, may need de-duplication + def test_ops_ndarray(self): + td = Timedelta("1 day") + + # timedelta, timedelta + other = pd.to_timedelta(["1 day"]).values + expected = pd.to_timedelta(["2 days"]).values + tm.assert_numpy_array_equal(td + other, expected) + tm.assert_numpy_array_equal(other + td, expected) + msg = r"unsupported operand type\(s\) for \+: 'Timedelta' and 'int'" + with pytest.raises(TypeError, match=msg): + td + np.array([1]) + msg = r"unsupported operand type\(s\) for \+: 'numpy.ndarray' and 'Timedelta'" + with pytest.raises(TypeError, match=msg): + np.array([1]) + td + + expected = pd.to_timedelta(["0 days"]).values + tm.assert_numpy_array_equal(td - other, expected) + tm.assert_numpy_array_equal(-other + td, expected) + msg = r"unsupported operand type\(s\) for -: 'Timedelta' and 'int'" + with pytest.raises(TypeError, match=msg): + td - np.array([1]) + msg = r"unsupported operand type\(s\) for -: 'numpy.ndarray' and 'Timedelta'" + with pytest.raises(TypeError, match=msg): + np.array([1]) - td + + expected = pd.to_timedelta(["2 days"]).values + tm.assert_numpy_array_equal(td * np.array([2]), expected) + tm.assert_numpy_array_equal(np.array([2]) * td, expected) + msg = ( + "ufunc '?multiply'? cannot use operands with types" + r" dtype\(' right + + assert not left == right + assert left != right + + def test_ops_notimplemented(self): + class Other: + pass + + other = Other() + + td = Timedelta("1 day") + assert td.__add__(other) is NotImplemented + assert td.__sub__(other) is NotImplemented + assert td.__truediv__(other) is NotImplemented + assert td.__mul__(other) is NotImplemented + assert td.__floordiv__(other) is NotImplemented + + def test_unary_ops(self): + td = Timedelta(10, unit="d") + + # __neg__, __pos__ + assert -td == Timedelta(-10, unit="d") + assert -td == Timedelta("-10d") + assert +td == Timedelta(10, unit="d") + + # __abs__, __abs__(__neg__) + assert abs(td) == td + assert abs(-td) == td + assert abs(-td) == Timedelta("10d") + + +class TestTimedeltaComparison: + def test_compare_tick(self, tick_classes): + cls = tick_classes + + off = cls(4) + td = off.delta + assert isinstance(td, Timedelta) + + assert td == off + assert not td != off + assert td <= off + assert td >= off + assert not td < off + assert not td > off + + assert not td == 2 * off + assert td != 2 * off + assert td <= 2 * off + assert td < 2 * off + assert not td >= 2 * off + assert not td > 2 * off + + def test_comparison_object_array(self): + # analogous to GH#15183 + td = Timedelta("2 days") + other = Timedelta("3 hours") + + arr = np.array([other, td], dtype=object) + res = arr == td + expected = np.array([False, True], dtype=bool) + assert (res == expected).all() + + # 2D case + arr = np.array([[other, td], [td, other]], dtype=object) + res = arr != td + expected = np.array([[True, False], [False, True]], dtype=bool) + assert res.shape == expected.shape + assert (res == expected).all() + + def test_compare_timedelta_ndarray(self): + # GH11835 + periods = [Timedelta("0 days 01:00:00"), Timedelta("0 days 01:00:00")] + arr = np.array(periods) + result = arr[0] > arr + expected = np.array([False, False]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.skip(reason="GH#20829 is reverted until after 0.24.0") + def test_compare_custom_object(self): + """ + Make sure non supported operations on Timedelta returns NonImplemented + and yields to other operand (GH#20829). + """ + + class CustomClass: + def __init__(self, cmp_result=None): + self.cmp_result = cmp_result + + def generic_result(self): + if self.cmp_result is None: + return NotImplemented + else: + return self.cmp_result + + def __eq__(self, other): + return self.generic_result() + + def __gt__(self, other): + return self.generic_result() + + t = Timedelta("1s") + + assert not (t == "string") + assert not (t == 1) + assert not (t == CustomClass()) + assert not (t == CustomClass(cmp_result=False)) + + assert t < CustomClass(cmp_result=True) + assert not (t < CustomClass(cmp_result=False)) + + assert t == CustomClass(cmp_result=True) + + @pytest.mark.parametrize("val", ["string", 1]) + def test_compare_unknown_type(self, val): + # GH20829 + t = Timedelta("1s") + with pytest.raises(TypeError): + t >= val + with pytest.raises(TypeError): + t > val + with pytest.raises(TypeError): + t <= val + with pytest.raises(TypeError): + t < val + + +class TestTimedeltas: + @pytest.mark.parametrize( + "unit, value, expected", + [ + ("us", 9.999, 9999), + ("ms", 9.999999, 9999999), + ("s", 9.999999999, 9999999999), + ], + ) + def test_rounding_on_int_unit_construction(self, unit, value, expected): + # GH 12690 + result = Timedelta(value, unit=unit) + assert result.value == expected + result = Timedelta(str(value) + unit) + assert result.value == expected + + def test_total_seconds_scalar(self): + # see gh-10939 + rng = Timedelta("1 days, 10:11:12.100123456") + expt = 1 * 86400 + 10 * 3600 + 11 * 60 + 12 + 100123456.0 / 1e9 + tm.assert_almost_equal(rng.total_seconds(), expt) + + rng = Timedelta(np.nan) + assert np.isnan(rng.total_seconds()) + + def test_conversion(self): + + for td in [Timedelta(10, unit="d"), Timedelta("1 days, 10:11:12.012345")]: + pydt = td.to_pytimedelta() + assert td == Timedelta(pydt) + assert td == pydt + assert isinstance(pydt, timedelta) and not isinstance(pydt, Timedelta) + + assert td == np.timedelta64(td.value, "ns") + td64 = td.to_timedelta64() + + assert td64 == np.timedelta64(td.value, "ns") + assert td == td64 + + assert isinstance(td64, np.timedelta64) + + # this is NOT equal and cannot be roundtripped (because of the nanos) + td = Timedelta("1 days, 10:11:12.012345678") + assert td != td.to_pytimedelta() + + def test_freq_conversion(self): + + # truediv + td = Timedelta("1 days 2 hours 3 ns") + result = td / np.timedelta64(1, "D") + assert result == td.value / float(86400 * 1e9) + result = td / np.timedelta64(1, "s") + assert result == td.value / float(1e9) + result = td / np.timedelta64(1, "ns") + assert result == td.value + + # floordiv + td = Timedelta("1 days 2 hours 3 ns") + result = td // np.timedelta64(1, "D") + assert result == 1 + result = td // np.timedelta64(1, "s") + assert result == 93600 + result = td // np.timedelta64(1, "ns") + assert result == td.value + + def test_fields(self): + def check(value): + # that we are int + assert isinstance(value, int) + + # compat to datetime.timedelta + rng = to_timedelta("1 days, 10:11:12") + assert rng.days == 1 + assert rng.seconds == 10 * 3600 + 11 * 60 + 12 + assert rng.microseconds == 0 + assert rng.nanoseconds == 0 + + msg = "'Timedelta' object has no attribute '{}'" + with pytest.raises(AttributeError, match=msg.format("hours")): + rng.hours + with pytest.raises(AttributeError, match=msg.format("minutes")): + rng.minutes + with pytest.raises(AttributeError, match=msg.format("milliseconds")): + rng.milliseconds + + # GH 10050 + check(rng.days) + check(rng.seconds) + check(rng.microseconds) + check(rng.nanoseconds) + + td = Timedelta("-1 days, 10:11:12") + assert abs(td) == Timedelta("13:48:48") + assert str(td) == "-1 days +10:11:12" + assert -td == Timedelta("0 days 13:48:48") + assert -Timedelta("-1 days, 10:11:12").value == 49728000000000 + assert Timedelta("-1 days, 10:11:12").value == -49728000000000 + + rng = to_timedelta("-1 days, 10:11:12.100123456") + assert rng.days == -1 + assert rng.seconds == 10 * 3600 + 11 * 60 + 12 + assert rng.microseconds == 100 * 1000 + 123 + assert rng.nanoseconds == 456 + msg = "'Timedelta' object has no attribute '{}'" + with pytest.raises(AttributeError, match=msg.format("hours")): + rng.hours + with pytest.raises(AttributeError, match=msg.format("minutes")): + rng.minutes + with pytest.raises(AttributeError, match=msg.format("milliseconds")): + rng.milliseconds + + # components + tup = to_timedelta(-1, "us").components + assert tup.days == -1 + assert tup.hours == 23 + assert tup.minutes == 59 + assert tup.seconds == 59 + assert tup.milliseconds == 999 + assert tup.microseconds == 999 + assert tup.nanoseconds == 0 + + # GH 10050 + check(tup.days) + check(tup.hours) + check(tup.minutes) + check(tup.seconds) + check(tup.milliseconds) + check(tup.microseconds) + check(tup.nanoseconds) + + tup = Timedelta("-1 days 1 us").components + assert tup.days == -2 + assert tup.hours == 23 + assert tup.minutes == 59 + assert tup.seconds == 59 + assert tup.milliseconds == 999 + assert tup.microseconds == 999 + assert tup.nanoseconds == 0 + + def test_iso_conversion(self): + # GH #21877 + expected = Timedelta(1, unit="s") + assert to_timedelta("P0DT0H0M1S") == expected + + def test_nat_converters(self): + result = to_timedelta("nat").to_numpy() + assert result.dtype.kind == "M" + assert result.astype("int64") == iNaT + + result = to_timedelta("nan").to_numpy() + assert result.dtype.kind == "M" + assert result.astype("int64") == iNaT + + @pytest.mark.parametrize( + "units, np_unit", + [ + (["W", "w"], "W"), + (["D", "d", "days", "day", "Days", "Day"], "D"), + ( + ["m", "minute", "min", "minutes", "t", "Minute", "Min", "Minutes", "T"], + "m", + ), + (["s", "seconds", "sec", "second", "S", "Seconds", "Sec", "Second"], "s"), + ( + [ + "ms", + "milliseconds", + "millisecond", + "milli", + "millis", + "l", + "MS", + "Milliseconds", + "Millisecond", + "Milli", + "Millis", + "L", + ], + "ms", + ), + ( + [ + "us", + "microseconds", + "microsecond", + "micro", + "micros", + "u", + "US", + "Microseconds", + "Microsecond", + "Micro", + "Micros", + "U", + ], + "us", + ), + ( + [ + "ns", + "nanoseconds", + "nanosecond", + "nano", + "nanos", + "n", + "NS", + "Nanoseconds", + "Nanosecond", + "Nano", + "Nanos", + "N", + ], + "ns", + ), + ], + ) + @pytest.mark.parametrize("wrapper", [np.array, list, pd.Index]) + def test_unit_parser(self, units, np_unit, wrapper): + # validate all units, GH 6855, GH 21762 + for unit in units: + # array-likes + expected = TimedeltaIndex( + [np.timedelta64(i, np_unit) for i in np.arange(5).tolist()] + ) + result = to_timedelta(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) + result = TimedeltaIndex(wrapper(range(5)), unit=unit) + tm.assert_index_equal(result, expected) + + if unit == "M": + # M is treated as minutes in string repr + expected = TimedeltaIndex( + [np.timedelta64(i, "m") for i in np.arange(5).tolist()] + ) + + str_repr = [f"{x}{unit}" for x in np.arange(5)] + result = to_timedelta(wrapper(str_repr)) + tm.assert_index_equal(result, expected) + result = TimedeltaIndex(wrapper(str_repr)) + tm.assert_index_equal(result, expected) + + # scalar + expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]")) + + result = to_timedelta(2, unit=unit) + assert result == expected + result = Timedelta(2, unit=unit) + assert result == expected + + if unit == "M": + expected = Timedelta(np.timedelta64(2, "m").astype("timedelta64[ns]")) + + result = to_timedelta(f"2{unit}") + assert result == expected + result = Timedelta(f"2{unit}") + assert result == expected + + @pytest.mark.parametrize("unit", ["Y", "y", "M"]) + def test_unit_m_y_raises(self, unit): + msg = "Units 'M' and 'Y' are no longer supported" + with pytest.raises(ValueError, match=msg): + Timedelta(10, unit) + + with pytest.raises(ValueError, match=msg): + to_timedelta(10, unit) + + with pytest.raises(ValueError, match=msg): + to_timedelta([1, 2], unit) + + def test_numeric_conversions(self): + assert Timedelta(0) == np.timedelta64(0, "ns") + assert Timedelta(10) == np.timedelta64(10, "ns") + assert Timedelta(10, unit="ns") == np.timedelta64(10, "ns") + + assert Timedelta(10, unit="us") == np.timedelta64(10, "us") + assert Timedelta(10, unit="ms") == np.timedelta64(10, "ms") + assert Timedelta(10, unit="s") == np.timedelta64(10, "s") + assert Timedelta(10, unit="d") == np.timedelta64(10, "D") + + def test_timedelta_conversions(self): + assert Timedelta(timedelta(seconds=1)) == np.timedelta64(1, "s").astype( + "m8[ns]" + ) + assert Timedelta(timedelta(microseconds=1)) == np.timedelta64(1, "us").astype( + "m8[ns]" + ) + assert Timedelta(timedelta(days=1)) == np.timedelta64(1, "D").astype("m8[ns]") + + def test_to_numpy_alias(self): + # GH 24653: alias .to_numpy() for scalars + td = Timedelta("10m7s") + assert td.to_timedelta64() == td.to_numpy() + + def test_round(self): + + t1 = Timedelta("1 days 02:34:56.789123456") + t2 = Timedelta("-1 days 02:34:56.789123456") + + for (freq, s1, s2) in [ + ("N", t1, t2), + ( + "U", + Timedelta("1 days 02:34:56.789123000"), + Timedelta("-1 days 02:34:56.789123000"), + ), + ( + "L", + Timedelta("1 days 02:34:56.789000000"), + Timedelta("-1 days 02:34:56.789000000"), + ), + ("S", Timedelta("1 days 02:34:57"), Timedelta("-1 days 02:34:57")), + ("2S", Timedelta("1 days 02:34:56"), Timedelta("-1 days 02:34:56")), + ("5S", Timedelta("1 days 02:34:55"), Timedelta("-1 days 02:34:55")), + ("T", Timedelta("1 days 02:35:00"), Timedelta("-1 days 02:35:00")), + ("12T", Timedelta("1 days 02:36:00"), Timedelta("-1 days 02:36:00")), + ("H", Timedelta("1 days 03:00:00"), Timedelta("-1 days 03:00:00")), + ("d", Timedelta("1 days"), Timedelta("-1 days")), + ]: + r1 = t1.round(freq) + assert r1 == s1 + r2 = t2.round(freq) + assert r2 == s2 + + # invalid + for freq, msg in [ + ("Y", " is a non-fixed frequency"), + ("M", " is a non-fixed frequency"), + ("foobar", "Invalid frequency: foobar"), + ]: + with pytest.raises(ValueError, match=msg): + t1.round(freq) + + t1 = timedelta_range("1 days", periods=3, freq="1 min 2 s 3 us") + t2 = -1 * t1 + t1a = timedelta_range("1 days", periods=3, freq="1 min 2 s") + t1c = TimedeltaIndex([1, 1, 1], unit="D") + + # note that negative times round DOWN! so don't give whole numbers + for (freq, s1, s2) in [ + ("N", t1, t2), + ("U", t1, t2), + ( + "L", + t1a, + TimedeltaIndex( + ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"], + dtype="timedelta64[ns]", + freq=None, + ), + ), + ( + "S", + t1a, + TimedeltaIndex( + ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"], + dtype="timedelta64[ns]", + freq=None, + ), + ), + ( + "12T", + t1c, + TimedeltaIndex( + ["-1 days", "-1 days", "-1 days"], + dtype="timedelta64[ns]", + freq=None, + ), + ), + ( + "H", + t1c, + TimedeltaIndex( + ["-1 days", "-1 days", "-1 days"], + dtype="timedelta64[ns]", + freq=None, + ), + ), + ("d", t1c, TimedeltaIndex([-1, -1, -1], unit="D")), + ]: + + r1 = t1.round(freq) + tm.assert_index_equal(r1, s1) + r2 = t2.round(freq) + tm.assert_index_equal(r2, s2) + + # invalid + for freq, msg in [ + ("Y", " is a non-fixed frequency"), + ("M", " is a non-fixed frequency"), + ("foobar", "Invalid frequency: foobar"), + ]: + with pytest.raises(ValueError, match=msg): + t1.round(freq) + + def test_contains(self): + # Checking for any NaT-like objects + # GH 13603 + td = to_timedelta(range(5), unit="d") + pd.offsets.Hour(1) + for v in [NaT, None, float("nan"), np.nan]: + assert not (v in td) + + td = to_timedelta([NaT]) + for v in [NaT, None, float("nan"), np.nan]: + assert v in td + + def test_identity(self): + + td = Timedelta(10, unit="d") + assert isinstance(td, Timedelta) + assert isinstance(td, timedelta) + + def test_short_format_converters(self): + def conv(v): + return v.astype("m8[ns]") + + assert Timedelta("10") == np.timedelta64(10, "ns") + assert Timedelta("10ns") == np.timedelta64(10, "ns") + assert Timedelta("100") == np.timedelta64(100, "ns") + assert Timedelta("100ns") == np.timedelta64(100, "ns") + + assert Timedelta("1000") == np.timedelta64(1000, "ns") + assert Timedelta("1000ns") == np.timedelta64(1000, "ns") + assert Timedelta("1000NS") == np.timedelta64(1000, "ns") + + assert Timedelta("10us") == np.timedelta64(10000, "ns") + assert Timedelta("100us") == np.timedelta64(100000, "ns") + assert Timedelta("1000us") == np.timedelta64(1000000, "ns") + assert Timedelta("1000Us") == np.timedelta64(1000000, "ns") + assert Timedelta("1000uS") == np.timedelta64(1000000, "ns") + + assert Timedelta("1ms") == np.timedelta64(1000000, "ns") + assert Timedelta("10ms") == np.timedelta64(10000000, "ns") + assert Timedelta("100ms") == np.timedelta64(100000000, "ns") + assert Timedelta("1000ms") == np.timedelta64(1000000000, "ns") + + assert Timedelta("-1s") == -np.timedelta64(1000000000, "ns") + assert Timedelta("1s") == np.timedelta64(1000000000, "ns") + assert Timedelta("10s") == np.timedelta64(10000000000, "ns") + assert Timedelta("100s") == np.timedelta64(100000000000, "ns") + assert Timedelta("1000s") == np.timedelta64(1000000000000, "ns") + + assert Timedelta("1d") == conv(np.timedelta64(1, "D")) + assert Timedelta("-1d") == -conv(np.timedelta64(1, "D")) + assert Timedelta("1D") == conv(np.timedelta64(1, "D")) + assert Timedelta("10D") == conv(np.timedelta64(10, "D")) + assert Timedelta("100D") == conv(np.timedelta64(100, "D")) + assert Timedelta("1000D") == conv(np.timedelta64(1000, "D")) + assert Timedelta("10000D") == conv(np.timedelta64(10000, "D")) + + # space + assert Timedelta(" 10000D ") == conv(np.timedelta64(10000, "D")) + assert Timedelta(" - 10000D ") == -conv(np.timedelta64(10000, "D")) + + # invalid + with pytest.raises(ValueError): + Timedelta("1foo") + with pytest.raises(ValueError): + Timedelta("foo") + + def test_full_format_converters(self): + def conv(v): + return v.astype("m8[ns]") + + d1 = np.timedelta64(1, "D") + + assert Timedelta("1days") == conv(d1) + assert Timedelta("1days,") == conv(d1) + assert Timedelta("- 1days,") == -conv(d1) + + assert Timedelta("00:00:01") == conv(np.timedelta64(1, "s")) + assert Timedelta("06:00:01") == conv(np.timedelta64(6 * 3600 + 1, "s")) + assert Timedelta("06:00:01.0") == conv(np.timedelta64(6 * 3600 + 1, "s")) + assert Timedelta("06:00:01.01") == conv( + np.timedelta64(1000 * (6 * 3600 + 1) + 10, "ms") + ) + + assert Timedelta("- 1days, 00:00:01") == conv(-d1 + np.timedelta64(1, "s")) + assert Timedelta("1days, 06:00:01") == conv( + d1 + np.timedelta64(6 * 3600 + 1, "s") + ) + assert Timedelta("1days, 06:00:01.01") == conv( + d1 + np.timedelta64(1000 * (6 * 3600 + 1) + 10, "ms") + ) + + # invalid + with pytest.raises(ValueError): + Timedelta("- 1days, 00") + + def test_overflow(self): + # GH 9442 + s = Series(pd.date_range("20130101", periods=100000, freq="H")) + s[0] += Timedelta("1s 1ms") + + # mean + result = (s - s.min()).mean() + expected = Timedelta((TimedeltaIndex((s - s.min())).asi8 / len(s)).sum()) + + # the computation is converted to float so + # might be some loss of precision + assert np.allclose(result.value / 1000, expected.value / 1000) + + # sum + msg = "overflow in timedelta operation" + with pytest.raises(ValueError, match=msg): + (s - s.min()).sum() + s1 = s[0:10000] + with pytest.raises(ValueError, match=msg): + (s1 - s1.min()).sum() + s2 = s[0:1000] + result = (s2 - s2.min()).sum() + + def test_pickle(self): + + v = Timedelta("1 days 10:11:12.0123456") + v_p = tm.round_trip_pickle(v) + assert v == v_p + + def test_timedelta_hash_equality(self): + # GH 11129 + v = Timedelta(1, "D") + td = timedelta(days=1) + assert hash(v) == hash(td) + + d = {td: 2} + assert d[v] == 2 + + tds = timedelta_range("1 second", periods=20) + assert all(hash(td) == hash(td.to_pytimedelta()) for td in tds) + + # python timedeltas drop ns resolution + ns_td = Timedelta(1, "ns") + assert hash(ns_td) != hash(ns_td.to_pytimedelta()) + + def test_implementation_limits(self): + min_td = Timedelta(Timedelta.min) + max_td = Timedelta(Timedelta.max) + + # GH 12727 + # timedelta limits correspond to int64 boundaries + assert min_td.value == np.iinfo(np.int64).min + 1 + assert max_td.value == np.iinfo(np.int64).max + + # Beyond lower limit, a NAT before the Overflow + assert (min_td - Timedelta(1, "ns")) is NaT + + with pytest.raises(OverflowError): + min_td - Timedelta(2, "ns") + + with pytest.raises(OverflowError): + max_td + Timedelta(1, "ns") + + # Same tests using the internal nanosecond values + td = Timedelta(min_td.value - 1, "ns") + assert td is NaT + + with pytest.raises(OverflowError): + Timedelta(min_td.value - 2, "ns") + + with pytest.raises(OverflowError): + Timedelta(max_td.value + 1, "ns") + + def test_total_seconds_precision(self): + # GH 19458 + assert Timedelta("30S").total_seconds() == 30.0 + assert Timedelta("0").total_seconds() == 0.0 + assert Timedelta("-2S").total_seconds() == -2.0 + assert Timedelta("5.324S").total_seconds() == 5.324 + assert (Timedelta("30S").total_seconds() - 30.0) < 1e-20 + assert (30.0 - Timedelta("30S").total_seconds()) < 1e-20 + + def test_timedelta_arithmetic(self): + data = Series(["nat", "32 days"], dtype="timedelta64[ns]") + deltas = [timedelta(days=1), Timedelta(1, unit="D")] + for delta in deltas: + result_method = data.add(delta) + result_operator = data + delta + expected = Series(["nat", "33 days"], dtype="timedelta64[ns]") + tm.assert_series_equal(result_operator, expected) + tm.assert_series_equal(result_method, expected) + + result_method = data.sub(delta) + result_operator = data - delta + expected = Series(["nat", "31 days"], dtype="timedelta64[ns]") + tm.assert_series_equal(result_operator, expected) + tm.assert_series_equal(result_method, expected) + # GH 9396 + result_method = data.div(delta) + result_operator = data / delta + expected = Series([np.nan, 32.0], dtype="float64") + tm.assert_series_equal(result_operator, expected) + tm.assert_series_equal(result_method, expected) + + def test_apply_to_timedelta(self): + timedelta_NaT = to_timedelta("NaT") + + list_of_valid_strings = ["00:00:01", "00:00:02"] + a = to_timedelta(list_of_valid_strings) + b = Series(list_of_valid_strings).apply(to_timedelta) + # Can't compare until apply on a Series gives the correct dtype + # assert_series_equal(a, b) + + list_of_strings = ["00:00:01", np.nan, NaT, timedelta_NaT] + + # TODO: unused? + a = to_timedelta(list_of_strings) # noqa + b = Series(list_of_strings).apply(to_timedelta) # noqa + # Can't compare until apply on a Series gives the correct dtype + # assert_series_equal(a, b) + + def test_components(self): + rng = timedelta_range("1 days, 10:11:12", periods=2, freq="s") + rng.components + + # with nat + s = Series(rng) + s[1] = np.nan + + result = s.dt.components + assert not result.iloc[0].isna().all() + assert result.iloc[1].isna().all() + + def test_resolution_string(self): + assert Timedelta(days=1).resolution_string == "D" + assert Timedelta(days=1, hours=6).resolution_string == "H" + assert Timedelta(days=1, minutes=6).resolution_string == "T" + assert Timedelta(days=1, seconds=6).resolution_string == "S" + assert Timedelta(days=1, milliseconds=6).resolution_string == "L" + assert Timedelta(days=1, microseconds=6).resolution_string == "U" + assert Timedelta(days=1, nanoseconds=6).resolution_string == "N" + + def test_resolution_deprecated(self): + # GH#21344 + td = Timedelta(days=4, hours=3) + result = td.resolution + assert result == Timedelta(nanoseconds=1) + + # Check that the attribute is available on the class, mirroring + # the stdlib timedelta behavior + result = Timedelta.resolution + assert result == Timedelta(nanoseconds=1) + + +@pytest.mark.parametrize( + "value, expected", + [ + (Timedelta("10S"), True), + (Timedelta("-10S"), True), + (Timedelta(10, unit="ns"), True), + (Timedelta(0, unit="ns"), False), + (Timedelta(-10, unit="ns"), True), + (Timedelta(None), True), + (NaT, True), + ], +) +def test_truthiness(value, expected): + # https://github.com/pandas-dev/pandas/issues/21484 + assert bool(value) is expected diff --git a/pandas/tests/scalar/timestamp/__init__.py b/pandas/tests/scalar/timestamp/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py new file mode 100644 index 00000000..ccd7bf72 --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -0,0 +1,228 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +from pandas.errors import OutOfBoundsDatetime + +from pandas import Timedelta, Timestamp + +from pandas.tseries import offsets +from pandas.tseries.frequencies import to_offset + + +class TestTimestampArithmetic: + def test_overflow_offset(self): + # no overflow expected + + stamp = Timestamp("2000/1/1") + offset_no_overflow = to_offset("D") * 100 + + expected = Timestamp("2000/04/10") + assert stamp + offset_no_overflow == expected + + assert offset_no_overflow + stamp == expected + + expected = Timestamp("1999/09/23") + assert stamp - offset_no_overflow == expected + + def test_overflow_offset_raises(self): + # xref https://github.com/statsmodels/statsmodels/issues/3374 + # ends up multiplying really large numbers which overflow + + stamp = Timestamp("2017-01-13 00:00:00", freq="D") + offset_overflow = 20169940 * offsets.Day(1) + msg = ( + "the add operation between " + r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} " + "will overflow" + ) + + with pytest.raises(OverflowError, match=msg): + stamp + offset_overflow + + with pytest.raises(OverflowError, match=msg): + offset_overflow + stamp + + with pytest.raises(OverflowError, match=msg): + stamp - offset_overflow + + # xref https://github.com/pandas-dev/pandas/issues/14080 + # used to crash, so check for proper overflow exception + + stamp = Timestamp("2000/1/1") + offset_overflow = to_offset("D") * 100 ** 25 + + with pytest.raises(OverflowError, match=msg): + stamp + offset_overflow + + with pytest.raises(OverflowError, match=msg): + offset_overflow + stamp + + with pytest.raises(OverflowError, match=msg): + stamp - offset_overflow + + def test_overflow_timestamp_raises(self): + # https://github.com/pandas-dev/pandas/issues/31774 + msg = "Result is too large" + a = Timestamp("2101-01-01 00:00:00") + b = Timestamp("1688-01-01 00:00:00") + + with pytest.raises(OutOfBoundsDatetime, match=msg): + a - b + + # but we're OK for timestamp and datetime.datetime + assert (a - b.to_pydatetime()) == (a.to_pydatetime() - b) + + def test_delta_preserve_nanos(self): + val = Timestamp(1337299200000000123) + result = val + timedelta(1) + assert result.nanosecond == val.nanosecond + + def test_rsub_dtscalars(self, tz_naive_fixture): + # In particular, check that datetime64 - Timestamp works GH#28286 + td = Timedelta(1235345642000) + ts = Timestamp.now(tz_naive_fixture) + other = ts + td + + assert other - ts == td + assert other.to_pydatetime() - ts == td + if tz_naive_fixture is None: + assert other.to_datetime64() - ts == td + else: + with pytest.raises(TypeError, match="subtraction must have"): + other.to_datetime64() - ts + + def test_timestamp_sub_datetime(self): + dt = datetime(2013, 10, 12) + ts = Timestamp(datetime(2013, 10, 13)) + assert (ts - dt).days == 1 + assert (dt - ts).days == -1 + + def test_addition_subtraction_types(self): + # Assert on the types resulting from Timestamp +/- various date/time + # objects + dt = datetime(2014, 3, 4) + td = timedelta(seconds=1) + # build a timestamp with a frequency, since then it supports + # addition/subtraction of integers + ts = Timestamp(dt, freq="D") + + msg = "Addition/subtraction of integers" + with pytest.raises(TypeError, match=msg): + # GH#22535 add/sub with integers is deprecated + ts + 1 + with pytest.raises(TypeError, match=msg): + ts - 1 + + # Timestamp + datetime not supported, though subtraction is supported + # and yields timedelta more tests in tseries/base/tests/test_base.py + assert type(ts - dt) == Timedelta + assert type(ts + td) == Timestamp + assert type(ts - td) == Timestamp + + # Timestamp +/- datetime64 not supported, so not tested (could possibly + # assert error raised?) + td64 = np.timedelta64(1, "D") + assert type(ts + td64) == Timestamp + assert type(ts - td64) == Timestamp + + @pytest.mark.parametrize( + "freq, td, td64", + [ + ("S", timedelta(seconds=1), np.timedelta64(1, "s")), + ("min", timedelta(minutes=1), np.timedelta64(1, "m")), + ("H", timedelta(hours=1), np.timedelta64(1, "h")), + ("D", timedelta(days=1), np.timedelta64(1, "D")), + ("W", timedelta(weeks=1), np.timedelta64(1, "W")), + ("M", None, np.timedelta64(1, "M")), + ], + ) + def test_addition_subtraction_preserve_frequency(self, freq, td, td64): + ts = Timestamp("2014-03-05 00:00:00", freq=freq) + original_freq = ts.freq + + assert (ts + 1 * original_freq).freq == original_freq + assert (ts - 1 * original_freq).freq == original_freq + + if td is not None: + # timedelta does not support months as unit + assert (ts + td).freq == original_freq + assert (ts - td).freq == original_freq + + assert (ts + td64).freq == original_freq + assert (ts - td64).freq == original_freq + + @pytest.mark.parametrize( + "td", [Timedelta(hours=3), np.timedelta64(3, "h"), timedelta(hours=3)] + ) + def test_radd_tdscalar(self, td): + # GH#24775 timedelta64+Timestamp should not raise + ts = Timestamp.now() + assert td + ts == ts + td + + @pytest.mark.parametrize( + "other,expected_difference", + [ + (np.timedelta64(-123, "ns"), -123), + (np.timedelta64(1234567898, "ns"), 1234567898), + (np.timedelta64(-123, "us"), -123000), + (np.timedelta64(-123, "ms"), -123000000), + ], + ) + def test_timestamp_add_timedelta64_unit(self, other, expected_difference): + ts = Timestamp(datetime.utcnow()) + result = ts + other + valdiff = result.value - ts.value + assert valdiff == expected_difference + + @pytest.mark.parametrize("ts", [Timestamp.now(), Timestamp.now("utc")]) + @pytest.mark.parametrize( + "other", + [ + 1, + np.int64(1), + np.array([1, 2], dtype=np.int32), + np.array([3, 4], dtype=np.uint64), + ], + ) + def test_add_int_no_freq_raises(self, ts, other): + msg = "Addition/subtraction of integers and integer-arrays" + with pytest.raises(TypeError, match=msg): + ts + other + with pytest.raises(TypeError, match=msg): + other + ts + + with pytest.raises(TypeError, match=msg): + ts - other + with pytest.raises(TypeError): + other - ts + + @pytest.mark.parametrize( + "ts", + [ + Timestamp("1776-07-04", freq="D"), + Timestamp("1776-07-04", tz="UTC", freq="D"), + ], + ) + @pytest.mark.parametrize( + "other", + [ + 1, + np.int64(1), + np.array([1, 2], dtype=np.int32), + np.array([3, 4], dtype=np.uint64), + ], + ) + def test_add_int_with_freq(self, ts, other): + + with pytest.raises(TypeError): + ts + other + with pytest.raises(TypeError): + other + ts + + with pytest.raises(TypeError): + ts - other + + with pytest.raises(TypeError): + other - ts diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py new file mode 100644 index 00000000..fce4fa6e --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_comparisons.py @@ -0,0 +1,192 @@ +from datetime import datetime +import operator + +import numpy as np +import pytest + +from pandas import Timestamp + + +class TestTimestampComparison: + def test_comparison_object_array(self): + # GH#15183 + ts = Timestamp("2011-01-03 00:00:00-0500", tz="US/Eastern") + other = Timestamp("2011-01-01 00:00:00-0500", tz="US/Eastern") + naive = Timestamp("2011-01-01 00:00:00") + + arr = np.array([other, ts], dtype=object) + res = arr == ts + expected = np.array([False, True], dtype=bool) + assert (res == expected).all() + + # 2D case + arr = np.array([[other, ts], [ts, other]], dtype=object) + res = arr != ts + expected = np.array([[True, False], [False, True]], dtype=bool) + assert res.shape == expected.shape + assert (res == expected).all() + + # tzaware mismatch + arr = np.array([naive], dtype=object) + with pytest.raises(TypeError): + arr < ts + + def test_comparison(self): + # 5-18-2012 00:00:00.000 + stamp = 1337299200000000000 + + val = Timestamp(stamp) + + assert val == val + assert not val != val + assert not val < val + assert val <= val + assert not val > val + assert val >= val + + other = datetime(2012, 5, 18) + assert val == other + assert not val != other + assert not val < other + assert val <= other + assert not val > other + assert val >= other + + other = Timestamp(stamp + 100) + + assert val != other + assert val != other + assert val < other + assert val <= other + assert other > val + assert other >= val + + def test_compare_invalid(self): + # GH#8058 + val = Timestamp("20130101 12:01:02") + assert not val == "foo" + assert not val == 10.0 + assert not val == 1 + assert not val == [] + assert not val == {"foo": 1} + assert not val == np.float64(1) + assert not val == np.int64(1) + + assert val != "foo" + assert val != 10.0 + assert val != 1 + assert val != [] + assert val != {"foo": 1} + assert val != np.float64(1) + assert val != np.int64(1) + + def test_cant_compare_tz_naive_w_aware(self, utc_fixture): + # see GH#1404 + a = Timestamp("3/12/2012") + b = Timestamp("3/12/2012", tz=utc_fixture) + + with pytest.raises(TypeError): + a == b + with pytest.raises(TypeError): + a != b + with pytest.raises(TypeError): + a < b + with pytest.raises(TypeError): + a <= b + with pytest.raises(TypeError): + a > b + with pytest.raises(TypeError): + a >= b + + with pytest.raises(TypeError): + b == a + with pytest.raises(TypeError): + b != a + with pytest.raises(TypeError): + b < a + with pytest.raises(TypeError): + b <= a + with pytest.raises(TypeError): + b > a + with pytest.raises(TypeError): + b >= a + + assert not a == b.to_pydatetime() + assert not a.to_pydatetime() == b + + def test_timestamp_compare_scalars(self): + # case where ndim == 0 + lhs = np.datetime64(datetime(2013, 12, 6)) + rhs = Timestamp("now") + nat = Timestamp("nat") + + ops = {"gt": "lt", "lt": "gt", "ge": "le", "le": "ge", "eq": "eq", "ne": "ne"} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + expected = left_f(lhs, rhs) + + result = right_f(rhs, lhs) + assert result == expected + + expected = left_f(rhs, nat) + result = right_f(nat, rhs) + assert result == expected + + def test_timestamp_compare_with_early_datetime(self): + # e.g. datetime.min + stamp = Timestamp("2012-01-01") + + assert not stamp == datetime.min + assert not stamp == datetime(1600, 1, 1) + assert not stamp == datetime(2700, 1, 1) + assert stamp != datetime.min + assert stamp != datetime(1600, 1, 1) + assert stamp != datetime(2700, 1, 1) + assert stamp > datetime(1600, 1, 1) + assert stamp >= datetime(1600, 1, 1) + assert stamp < datetime(2700, 1, 1) + assert stamp <= datetime(2700, 1, 1) + + def test_compare_zerodim_array(self): + # GH#26916 + ts = Timestamp.now() + dt64 = np.datetime64("2016-01-01", "ns") + arr = np.array(dt64) + assert arr.ndim == 0 + + result = arr < ts + assert result is True + result = arr > ts + assert result is False + + +def test_rich_comparison_with_unsupported_type(): + # Comparisons with unsupported objects should return NotImplemented + # (it previously raised TypeError, see #24011) + + class Inf: + def __lt__(self, o): + return False + + def __le__(self, o): + return isinstance(o, Inf) + + def __gt__(self, o): + return not isinstance(o, Inf) + + def __ge__(self, o): + return True + + def __eq__(self, other) -> bool: + return isinstance(other, Inf) + + inf = Inf() + timestamp = Timestamp("2018-11-30") + + for left, right in [(inf, timestamp), (timestamp, inf)]: + assert left > right or left < right + assert left >= right or left <= right + assert not (left == right) + assert left != right diff --git a/pandas/tests/scalar/timestamp/test_rendering.py b/pandas/tests/scalar/timestamp/test_rendering.py new file mode 100644 index 00000000..6b64b230 --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_rendering.py @@ -0,0 +1,87 @@ +import pprint + +import pytest +import pytz # noqa # a test below uses pytz but only inside a `eval` call + +from pandas import Timestamp + + +class TestTimestampRendering: + + timezones = ["UTC", "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"] + + @pytest.mark.parametrize("tz", timezones) + @pytest.mark.parametrize("freq", ["D", "M", "S", "N"]) + @pytest.mark.parametrize( + "date", ["2014-03-07", "2014-01-01 09:00", "2014-01-01 00:00:00.000000001"] + ) + def test_repr(self, date, freq, tz): + # avoid to match with timezone name + freq_repr = "'{0}'".format(freq) + if tz.startswith("dateutil"): + tz_repr = tz.replace("dateutil", "") + else: + tz_repr = tz + + date_only = Timestamp(date) + assert date in repr(date_only) + assert tz_repr not in repr(date_only) + assert freq_repr not in repr(date_only) + assert date_only == eval(repr(date_only)) + + date_tz = Timestamp(date, tz=tz) + assert date in repr(date_tz) + assert tz_repr in repr(date_tz) + assert freq_repr not in repr(date_tz) + assert date_tz == eval(repr(date_tz)) + + date_freq = Timestamp(date, freq=freq) + assert date in repr(date_freq) + assert tz_repr not in repr(date_freq) + assert freq_repr in repr(date_freq) + assert date_freq == eval(repr(date_freq)) + + date_tz_freq = Timestamp(date, tz=tz, freq=freq) + assert date in repr(date_tz_freq) + assert tz_repr in repr(date_tz_freq) + assert freq_repr in repr(date_tz_freq) + assert date_tz_freq == eval(repr(date_tz_freq)) + + def test_repr_utcoffset(self): + # This can cause the tz field to be populated, but it's redundant to + # include this information in the date-string. + date_with_utc_offset = Timestamp("2014-03-13 00:00:00-0400", tz=None) + assert "2014-03-13 00:00:00-0400" in repr(date_with_utc_offset) + assert "tzoffset" not in repr(date_with_utc_offset) + assert "pytz.FixedOffset(-240)" in repr(date_with_utc_offset) + expr = repr(date_with_utc_offset).replace( + "'pytz.FixedOffset(-240)'", "pytz.FixedOffset(-240)" + ) + assert date_with_utc_offset == eval(expr) + + def test_timestamp_repr_pre1900(self): + # pre-1900 + stamp = Timestamp("1850-01-01", tz="US/Eastern") + repr(stamp) + + iso8601 = "1850-01-01 01:23:45.012345" + stamp = Timestamp(iso8601, tz="US/Eastern") + result = repr(stamp) + assert iso8601 in result + + def test_pprint(self): + # GH#12622 + nested_obj = {"foo": 1, "bar": [{"w": {"a": Timestamp("2011-01-01")}}] * 10} + result = pprint.pformat(nested_obj, width=50) + expected = r"""{'bar': [{'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}, + {'w': {'a': Timestamp('2011-01-01 00:00:00')}}], + 'foo': 1}""" + assert result == expected diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py new file mode 100644 index 00000000..1ce1087c --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -0,0 +1,1087 @@ +""" test the scalar Timestamp """ + +import calendar +from datetime import datetime, timedelta +import locale +import unicodedata + +import dateutil +from dateutil.tz import tzutc +import numpy as np +import pytest +import pytz +from pytz import timezone, utc + +from pandas._libs.tslibs import conversion +from pandas._libs.tslibs.timezones import dateutil_gettz as gettz, get_timezone +import pandas.compat as compat +from pandas.compat.numpy import np_datetime64_compat +from pandas.errors import OutOfBoundsDatetime +import pandas.util._test_decorators as td + +from pandas import NaT, Period, Timedelta, Timestamp +import pandas._testing as tm + +from pandas.tseries import offsets + + +class TestTimestampProperties: + def test_properties_business(self): + ts = Timestamp("2017-10-01", freq="B") + control = Timestamp("2017-10-01") + assert ts.dayofweek == 6 + assert not ts.is_month_start # not a weekday + assert not ts.is_quarter_start # not a weekday + # Control case: non-business is month/qtr start + assert control.is_month_start + assert control.is_quarter_start + + ts = Timestamp("2017-09-30", freq="B") + control = Timestamp("2017-09-30") + assert ts.dayofweek == 5 + assert not ts.is_month_end # not a weekday + assert not ts.is_quarter_end # not a weekday + # Control case: non-business is month/qtr start + assert control.is_month_end + assert control.is_quarter_end + + def test_fields(self): + def check(value, equal): + # that we are int like + assert isinstance(value, int) + assert value == equal + + # GH 10050 + ts = Timestamp("2015-05-10 09:06:03.000100001") + check(ts.year, 2015) + check(ts.month, 5) + check(ts.day, 10) + check(ts.hour, 9) + check(ts.minute, 6) + check(ts.second, 3) + msg = "'Timestamp' object has no attribute 'millisecond'" + with pytest.raises(AttributeError, match=msg): + ts.millisecond + check(ts.microsecond, 100) + check(ts.nanosecond, 1) + check(ts.dayofweek, 6) + check(ts.quarter, 2) + check(ts.dayofyear, 130) + check(ts.week, 19) + check(ts.daysinmonth, 31) + check(ts.daysinmonth, 31) + + # GH 13303 + ts = Timestamp("2014-12-31 23:59:00-05:00", tz="US/Eastern") + check(ts.year, 2014) + check(ts.month, 12) + check(ts.day, 31) + check(ts.hour, 23) + check(ts.minute, 59) + check(ts.second, 0) + msg = "'Timestamp' object has no attribute 'millisecond'" + with pytest.raises(AttributeError, match=msg): + ts.millisecond + check(ts.microsecond, 0) + check(ts.nanosecond, 0) + check(ts.dayofweek, 2) + check(ts.quarter, 4) + check(ts.dayofyear, 365) + check(ts.week, 1) + check(ts.daysinmonth, 31) + + ts = Timestamp("2014-01-01 00:00:00+01:00") + starts = ["is_month_start", "is_quarter_start", "is_year_start"] + for start in starts: + assert getattr(ts, start) + ts = Timestamp("2014-12-31 23:59:59+01:00") + ends = ["is_month_end", "is_year_end", "is_quarter_end"] + for end in ends: + assert getattr(ts, end) + + # GH 12806 + @pytest.mark.parametrize( + "data", + [Timestamp("2017-08-28 23:00:00"), Timestamp("2017-08-28 23:00:00", tz="EST")], + ) + @pytest.mark.parametrize( + "time_locale", [None] if tm.get_locales() is None else [None] + tm.get_locales() + ) + def test_names(self, data, time_locale): + # GH 17354 + # Test .day_name(), .month_name + if time_locale is None: + expected_day = "Monday" + expected_month = "August" + else: + with tm.set_locale(time_locale, locale.LC_TIME): + expected_day = calendar.day_name[0].capitalize() + expected_month = calendar.month_name[8].capitalize() + + result_day = data.day_name(time_locale) + result_month = data.month_name(time_locale) + + # Work around https://github.com/pandas-dev/pandas/issues/22342 + # different normalizations + expected_day = unicodedata.normalize("NFD", expected_day) + expected_month = unicodedata.normalize("NFD", expected_month) + + result_day = unicodedata.normalize("NFD", result_day) + result_month = unicodedata.normalize("NFD", result_month) + + assert result_day == expected_day + assert result_month == expected_month + + # Test NaT + nan_ts = Timestamp(NaT) + assert np.isnan(nan_ts.day_name(time_locale)) + assert np.isnan(nan_ts.month_name(time_locale)) + + def test_is_leap_year(self, tz_naive_fixture): + tz = tz_naive_fixture + # GH 13727 + dt = Timestamp("2000-01-01 00:00:00", tz=tz) + assert dt.is_leap_year + assert isinstance(dt.is_leap_year, bool) + + dt = Timestamp("1999-01-01 00:00:00", tz=tz) + assert not dt.is_leap_year + + dt = Timestamp("2004-01-01 00:00:00", tz=tz) + assert dt.is_leap_year + + dt = Timestamp("2100-01-01 00:00:00", tz=tz) + assert not dt.is_leap_year + + def test_woy_boundary(self): + # make sure weeks at year boundaries are correct + d = datetime(2013, 12, 31) + result = Timestamp(d).week + expected = 1 # ISO standard + assert result == expected + + d = datetime(2008, 12, 28) + result = Timestamp(d).week + expected = 52 # ISO standard + assert result == expected + + d = datetime(2009, 12, 31) + result = Timestamp(d).week + expected = 53 # ISO standard + assert result == expected + + d = datetime(2010, 1, 1) + result = Timestamp(d).week + expected = 53 # ISO standard + assert result == expected + + d = datetime(2010, 1, 3) + result = Timestamp(d).week + expected = 53 # ISO standard + assert result == expected + + result = np.array( + [ + Timestamp(datetime(*args)).week + for args in [(2000, 1, 1), (2000, 1, 2), (2005, 1, 1), (2005, 1, 2)] + ] + ) + assert (result == [52, 52, 53, 53]).all() + + def test_resolution(self): + # GH#21336, GH#21365 + dt = Timestamp("2100-01-01 00:00:00") + assert dt.resolution == Timedelta(nanoseconds=1) + + # Check that the attribute is available on the class, mirroring + # the stdlib datetime behavior + assert Timestamp.resolution == Timedelta(nanoseconds=1) + + +class TestTimestampConstructors: + def test_constructor(self): + base_str = "2014-07-01 09:00" + base_dt = datetime(2014, 7, 1, 9) + base_expected = 1_404_205_200_000_000_000 + + # confirm base representation is correct + assert calendar.timegm(base_dt.timetuple()) * 1_000_000_000 == base_expected + + tests = [ + (base_str, base_dt, base_expected), + ( + "2014-07-01 10:00", + datetime(2014, 7, 1, 10), + base_expected + 3600 * 1_000_000_000, + ), + ( + "2014-07-01 09:00:00.000008000", + datetime(2014, 7, 1, 9, 0, 0, 8), + base_expected + 8000, + ), + ( + "2014-07-01 09:00:00.000000005", + Timestamp("2014-07-01 09:00:00.000000005"), + base_expected + 5, + ), + ] + + timezones = [ + (None, 0), + ("UTC", 0), + (pytz.utc, 0), + ("Asia/Tokyo", 9), + ("US/Eastern", -4), + ("dateutil/US/Pacific", -7), + (pytz.FixedOffset(-180), -3), + (dateutil.tz.tzoffset(None, 18000), 5), + ] + + for date_str, date, expected in tests: + for result in [Timestamp(date_str), Timestamp(date)]: + # only with timestring + assert result.value == expected + assert conversion.pydt_to_i8(result) == expected + + # re-creation shouldn't affect to internal value + result = Timestamp(result) + assert result.value == expected + assert conversion.pydt_to_i8(result) == expected + + # with timezone + for tz, offset in timezones: + for result in [Timestamp(date_str, tz=tz), Timestamp(date, tz=tz)]: + expected_tz = expected - offset * 3600 * 1_000_000_000 + assert result.value == expected_tz + assert conversion.pydt_to_i8(result) == expected_tz + + # should preserve tz + result = Timestamp(result) + assert result.value == expected_tz + assert conversion.pydt_to_i8(result) == expected_tz + + # should convert to UTC + if tz is not None: + result = Timestamp(result).tz_convert("UTC") + else: + result = Timestamp(result, tz="UTC") + expected_utc = expected - offset * 3600 * 1_000_000_000 + assert result.value == expected_utc + assert conversion.pydt_to_i8(result) == expected_utc + + def test_constructor_with_stringoffset(self): + # GH 7833 + base_str = "2014-07-01 11:00:00+02:00" + base_dt = datetime(2014, 7, 1, 9) + base_expected = 1_404_205_200_000_000_000 + + # confirm base representation is correct + assert calendar.timegm(base_dt.timetuple()) * 1_000_000_000 == base_expected + + tests = [ + (base_str, base_expected), + ("2014-07-01 12:00:00+02:00", base_expected + 3600 * 1_000_000_000), + ("2014-07-01 11:00:00.000008000+02:00", base_expected + 8000), + ("2014-07-01 11:00:00.000000005+02:00", base_expected + 5), + ] + + timezones = [ + (None, 0), + ("UTC", 0), + (pytz.utc, 0), + ("Asia/Tokyo", 9), + ("US/Eastern", -4), + ("dateutil/US/Pacific", -7), + (pytz.FixedOffset(-180), -3), + (dateutil.tz.tzoffset(None, 18000), 5), + ] + + for date_str, expected in tests: + for result in [Timestamp(date_str)]: + # only with timestring + assert result.value == expected + assert conversion.pydt_to_i8(result) == expected + + # re-creation shouldn't affect to internal value + result = Timestamp(result) + assert result.value == expected + assert conversion.pydt_to_i8(result) == expected + + # with timezone + for tz, offset in timezones: + result = Timestamp(date_str, tz=tz) + expected_tz = expected + assert result.value == expected_tz + assert conversion.pydt_to_i8(result) == expected_tz + + # should preserve tz + result = Timestamp(result) + assert result.value == expected_tz + assert conversion.pydt_to_i8(result) == expected_tz + + # should convert to UTC + result = Timestamp(result).tz_convert("UTC") + expected_utc = expected + assert result.value == expected_utc + assert conversion.pydt_to_i8(result) == expected_utc + + # This should be 2013-11-01 05:00 in UTC + # converted to Chicago tz + result = Timestamp("2013-11-01 00:00:00-0500", tz="America/Chicago") + assert result.value == Timestamp("2013-11-01 05:00").value + expected = "Timestamp('2013-11-01 00:00:00-0500', tz='America/Chicago')" # noqa + assert repr(result) == expected + assert result == eval(repr(result)) + + # This should be 2013-11-01 05:00 in UTC + # converted to Tokyo tz (+09:00) + result = Timestamp("2013-11-01 00:00:00-0500", tz="Asia/Tokyo") + assert result.value == Timestamp("2013-11-01 05:00").value + expected = "Timestamp('2013-11-01 14:00:00+0900', tz='Asia/Tokyo')" + assert repr(result) == expected + assert result == eval(repr(result)) + + # GH11708 + # This should be 2015-11-18 10:00 in UTC + # converted to Asia/Katmandu + result = Timestamp("2015-11-18 15:45:00+05:45", tz="Asia/Katmandu") + assert result.value == Timestamp("2015-11-18 10:00").value + expected = "Timestamp('2015-11-18 15:45:00+0545', tz='Asia/Katmandu')" + assert repr(result) == expected + assert result == eval(repr(result)) + + # This should be 2015-11-18 10:00 in UTC + # converted to Asia/Kolkata + result = Timestamp("2015-11-18 15:30:00+05:30", tz="Asia/Kolkata") + assert result.value == Timestamp("2015-11-18 10:00").value + expected = "Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata')" + assert repr(result) == expected + assert result == eval(repr(result)) + + def test_constructor_invalid(self): + with pytest.raises(TypeError, match="Cannot convert input"): + Timestamp(slice(2)) + with pytest.raises(ValueError, match="Cannot convert Period"): + Timestamp(Period("1000-01-01")) + + def test_constructor_invalid_tz(self): + # GH#17690 + with pytest.raises(TypeError, match="must be a datetime.tzinfo"): + Timestamp("2017-10-22", tzinfo="US/Eastern") + + with pytest.raises(ValueError, match="at most one of"): + Timestamp("2017-10-22", tzinfo=utc, tz="UTC") + + with pytest.raises(ValueError, match="Invalid frequency:"): + # GH#5168 + # case where user tries to pass tz as an arg, not kwarg, gets + # interpreted as a `freq` + Timestamp("2012-01-01", "US/Pacific") + + def test_constructor_strptime(self): + # GH25016 + # Test support for Timestamp.strptime + fmt = "%Y%m%d-%H%M%S-%f%z" + ts = "20190129-235348-000001+0000" + with pytest.raises(NotImplementedError): + Timestamp.strptime(ts, fmt) + + def test_constructor_tz_or_tzinfo(self): + # GH#17943, GH#17690, GH#5168 + stamps = [ + Timestamp(year=2017, month=10, day=22, tz="UTC"), + Timestamp(year=2017, month=10, day=22, tzinfo=utc), + Timestamp(year=2017, month=10, day=22, tz=utc), + Timestamp(datetime(2017, 10, 22), tzinfo=utc), + Timestamp(datetime(2017, 10, 22), tz="UTC"), + Timestamp(datetime(2017, 10, 22), tz=utc), + ] + assert all(ts == stamps[0] for ts in stamps) + + def test_constructor_positional(self): + # see gh-10758 + with pytest.raises(TypeError): + Timestamp(2000, 1) + with pytest.raises(ValueError): + Timestamp(2000, 0, 1) + with pytest.raises(ValueError): + Timestamp(2000, 13, 1) + with pytest.raises(ValueError): + Timestamp(2000, 1, 0) + with pytest.raises(ValueError): + Timestamp(2000, 1, 32) + + # see gh-11630 + assert repr(Timestamp(2015, 11, 12)) == repr(Timestamp("20151112")) + assert repr(Timestamp(2015, 11, 12, 1, 2, 3, 999999)) == repr( + Timestamp("2015-11-12 01:02:03.999999") + ) + + def test_constructor_keyword(self): + # GH 10758 + with pytest.raises(TypeError): + Timestamp(year=2000, month=1) + with pytest.raises(ValueError): + Timestamp(year=2000, month=0, day=1) + with pytest.raises(ValueError): + Timestamp(year=2000, month=13, day=1) + with pytest.raises(ValueError): + Timestamp(year=2000, month=1, day=0) + with pytest.raises(ValueError): + Timestamp(year=2000, month=1, day=32) + + assert repr(Timestamp(year=2015, month=11, day=12)) == repr( + Timestamp("20151112") + ) + + assert repr( + Timestamp( + year=2015, + month=11, + day=12, + hour=1, + minute=2, + second=3, + microsecond=999999, + ) + ) == repr(Timestamp("2015-11-12 01:02:03.999999")) + + def test_constructor_fromordinal(self): + base = datetime(2000, 1, 1) + + ts = Timestamp.fromordinal(base.toordinal(), freq="D") + assert base == ts + assert ts.freq == "D" + assert base.toordinal() == ts.toordinal() + + ts = Timestamp.fromordinal(base.toordinal(), tz="US/Eastern") + assert Timestamp("2000-01-01", tz="US/Eastern") == ts + assert base.toordinal() == ts.toordinal() + + # GH#3042 + dt = datetime(2011, 4, 16, 0, 0) + ts = Timestamp.fromordinal(dt.toordinal()) + assert ts.to_pydatetime() == dt + + # with a tzinfo + stamp = Timestamp("2011-4-16", tz="US/Eastern") + dt_tz = stamp.to_pydatetime() + ts = Timestamp.fromordinal(dt_tz.toordinal(), tz="US/Eastern") + assert ts.to_pydatetime() == dt_tz + + @pytest.mark.parametrize( + "result", + [ + Timestamp(datetime(2000, 1, 2, 3, 4, 5, 6), nanosecond=1), + Timestamp( + year=2000, + month=1, + day=2, + hour=3, + minute=4, + second=5, + microsecond=6, + nanosecond=1, + ), + Timestamp( + year=2000, + month=1, + day=2, + hour=3, + minute=4, + second=5, + microsecond=6, + nanosecond=1, + tz="UTC", + ), + Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, None), + Timestamp(2000, 1, 2, 3, 4, 5, 6, 1, pytz.UTC), + ], + ) + def test_constructor_nanosecond(self, result): + # GH 18898 + expected = Timestamp(datetime(2000, 1, 2, 3, 4, 5, 6), tz=result.tz) + expected = expected + Timedelta(nanoseconds=1) + assert result == expected + + @pytest.mark.parametrize("z", ["Z0", "Z00"]) + def test_constructor_invalid_Z0_isostring(self, z): + # GH 8910 + with pytest.raises(ValueError): + Timestamp("2014-11-02 01:00{}".format(z)) + + @pytest.mark.parametrize( + "arg", + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + ], + ) + def test_invalid_date_kwarg_with_string_input(self, arg): + kwarg = {arg: 1} + with pytest.raises(ValueError): + Timestamp("2010-10-10 12:59:59.999999999", **kwarg) + + def test_out_of_bounds_integer_value(self): + # GH#26651 check that we raise OutOfBoundsDatetime, not OverflowError + with pytest.raises(OutOfBoundsDatetime): + Timestamp(Timestamp.max.value * 2) + with pytest.raises(OutOfBoundsDatetime): + Timestamp(Timestamp.min.value * 2) + + def test_out_of_bounds_value(self): + one_us = np.timedelta64(1).astype("timedelta64[us]") + + # By definition we can't go out of bounds in [ns], so we + # convert the datetime64s to [us] so we can go out of bounds + min_ts_us = np.datetime64(Timestamp.min).astype("M8[us]") + max_ts_us = np.datetime64(Timestamp.max).astype("M8[us]") + + # No error for the min/max datetimes + Timestamp(min_ts_us) + Timestamp(max_ts_us) + + # One us less than the minimum is an error + with pytest.raises(ValueError): + Timestamp(min_ts_us - one_us) + + # One us more than the maximum is an error + with pytest.raises(ValueError): + Timestamp(max_ts_us + one_us) + + def test_out_of_bounds_string(self): + with pytest.raises(ValueError): + Timestamp("1676-01-01") + with pytest.raises(ValueError): + Timestamp("2263-01-01") + + def test_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + with pytest.raises(OutOfBoundsDatetime): + Timestamp("2262-04-11 23:47:16.854775808") + + def test_bounds_with_different_units(self): + out_of_bounds_dates = ("1677-09-21", "2262-04-12") + + time_units = ("D", "h", "m", "s", "ms", "us") + + for date_string in out_of_bounds_dates: + for unit in time_units: + dt64 = np.datetime64(date_string, unit) + with pytest.raises(ValueError): + Timestamp(dt64) + + in_bounds_dates = ("1677-09-23", "2262-04-11") + + for date_string in in_bounds_dates: + for unit in time_units: + dt64 = np.datetime64(date_string, unit) + Timestamp(dt64) + + def test_min_valid(self): + # Ensure that Timestamp.min is a valid Timestamp + Timestamp(Timestamp.min) + + def test_max_valid(self): + # Ensure that Timestamp.max is a valid Timestamp + Timestamp(Timestamp.max) + + def test_now(self): + # GH#9000 + ts_from_string = Timestamp("now") + ts_from_method = Timestamp.now() + ts_datetime = datetime.now() + + ts_from_string_tz = Timestamp("now", tz="US/Eastern") + ts_from_method_tz = Timestamp.now(tz="US/Eastern") + + # Check that the delta between the times is less than 1s (arbitrarily + # small) + delta = Timedelta(seconds=1) + assert abs(ts_from_method - ts_from_string) < delta + assert abs(ts_datetime - ts_from_method) < delta + assert abs(ts_from_method_tz - ts_from_string_tz) < delta + assert ( + abs( + ts_from_string_tz.tz_localize(None) + - ts_from_method_tz.tz_localize(None) + ) + < delta + ) + + def test_today(self): + ts_from_string = Timestamp("today") + ts_from_method = Timestamp.today() + ts_datetime = datetime.today() + + ts_from_string_tz = Timestamp("today", tz="US/Eastern") + ts_from_method_tz = Timestamp.today(tz="US/Eastern") + + # Check that the delta between the times is less than 1s (arbitrarily + # small) + delta = Timedelta(seconds=1) + assert abs(ts_from_method - ts_from_string) < delta + assert abs(ts_datetime - ts_from_method) < delta + assert abs(ts_from_method_tz - ts_from_string_tz) < delta + assert ( + abs( + ts_from_string_tz.tz_localize(None) + - ts_from_method_tz.tz_localize(None) + ) + < delta + ) + + @pytest.mark.parametrize("tz", [None, pytz.timezone("US/Pacific")]) + def test_disallow_setting_tz(self, tz): + # GH 3746 + ts = Timestamp("2010") + with pytest.raises(AttributeError): + ts.tz = tz + + @pytest.mark.parametrize("offset", ["+0300", "+0200"]) + def test_construct_timestamp_near_dst(self, offset): + # GH 20854 + expected = Timestamp( + "2016-10-30 03:00:00{}".format(offset), tz="Europe/Helsinki" + ) + result = Timestamp(expected).tz_convert("Europe/Helsinki") + assert result == expected + + @pytest.mark.parametrize( + "arg", ["2013/01/01 00:00:00+09:00", "2013-01-01 00:00:00+09:00"] + ) + def test_construct_with_different_string_format(self, arg): + # GH 12064 + result = Timestamp(arg) + expected = Timestamp(datetime(2013, 1, 1), tz=pytz.FixedOffset(540)) + assert result == expected + + def test_construct_timestamp_preserve_original_frequency(self): + # GH 22311 + result = Timestamp(Timestamp("2010-08-08", freq="D")).freq + expected = offsets.Day() + assert result == expected + + def test_constructor_invalid_frequency(self): + # GH 22311 + with pytest.raises(ValueError, match="Invalid frequency:"): + Timestamp("2012-01-01", freq=[]) + + @pytest.mark.parametrize("box", [datetime, Timestamp]) + def test_raise_tz_and_tzinfo_in_datetime_input(self, box): + # GH 23579 + kwargs = {"year": 2018, "month": 1, "day": 1, "tzinfo": utc} + with pytest.raises(ValueError, match="Cannot pass a datetime or Timestamp"): + Timestamp(box(**kwargs), tz="US/Pacific") + with pytest.raises(ValueError, match="Cannot pass a datetime or Timestamp"): + Timestamp(box(**kwargs), tzinfo=pytz.timezone("US/Pacific")) + + def test_dont_convert_dateutil_utc_to_pytz_utc(self): + result = Timestamp(datetime(2018, 1, 1), tz=tzutc()) + expected = Timestamp(datetime(2018, 1, 1)).tz_localize(tzutc()) + assert result == expected + + def test_constructor_subclassed_datetime(self): + # GH 25851 + # ensure that subclassed datetime works for + # Timestamp creation + class SubDatetime(datetime): + pass + + data = SubDatetime(2000, 1, 1) + result = Timestamp(data) + expected = Timestamp(2000, 1, 1) + assert result == expected + + @pytest.mark.skipif( + not compat.PY38, + reason="datetime.fromisocalendar was added in Python version 3.8", + ) + def test_constructor_fromisocalendar(self): + # GH 30395 + expected_timestamp = Timestamp("2000-01-03 00:00:00") + expected_stdlib = datetime.fromisocalendar(2000, 1, 1) + result = Timestamp.fromisocalendar(2000, 1, 1) + assert result == expected_timestamp + assert result == expected_stdlib + assert isinstance(result, Timestamp) + + +class TestTimestamp: + def test_tz(self): + tstr = "2014-02-01 09:00" + ts = Timestamp(tstr) + local = ts.tz_localize("Asia/Tokyo") + assert local.hour == 9 + assert local == Timestamp(tstr, tz="Asia/Tokyo") + conv = local.tz_convert("US/Eastern") + assert conv == Timestamp("2014-01-31 19:00", tz="US/Eastern") + assert conv.hour == 19 + + # preserves nanosecond + ts = Timestamp(tstr) + offsets.Nano(5) + local = ts.tz_localize("Asia/Tokyo") + assert local.hour == 9 + assert local.nanosecond == 5 + conv = local.tz_convert("US/Eastern") + assert conv.nanosecond == 5 + assert conv.hour == 19 + + def test_utc_z_designator(self): + assert get_timezone(Timestamp("2014-11-02 01:00Z").tzinfo) is utc + + def test_asm8(self): + np.random.seed(7_960_929) + ns = [Timestamp.min.value, Timestamp.max.value, 1000] + + for n in ns: + assert ( + Timestamp(n).asm8.view("i8") == np.datetime64(n, "ns").view("i8") == n + ) + + assert Timestamp("nat").asm8.view("i8") == np.datetime64("nat", "ns").view("i8") + + def test_class_ops_pytz(self): + def compare(x, y): + assert int((Timestamp(x).value - Timestamp(y).value) / 1e9) == 0 + + compare(Timestamp.now(), datetime.now()) + compare(Timestamp.now("UTC"), datetime.now(timezone("UTC"))) + compare(Timestamp.utcnow(), datetime.utcnow()) + compare(Timestamp.today(), datetime.today()) + current_time = calendar.timegm(datetime.now().utctimetuple()) + compare( + Timestamp.utcfromtimestamp(current_time), + datetime.utcfromtimestamp(current_time), + ) + compare( + Timestamp.fromtimestamp(current_time), datetime.fromtimestamp(current_time) + ) + + date_component = datetime.utcnow() + time_component = (date_component + timedelta(minutes=10)).time() + compare( + Timestamp.combine(date_component, time_component), + datetime.combine(date_component, time_component), + ) + + def test_class_ops_dateutil(self): + def compare(x, y): + assert ( + int( + np.round(Timestamp(x).value / 1e9) + - np.round(Timestamp(y).value / 1e9) + ) + == 0 + ) + + compare(Timestamp.now(), datetime.now()) + compare(Timestamp.now("UTC"), datetime.now(tzutc())) + compare(Timestamp.utcnow(), datetime.utcnow()) + compare(Timestamp.today(), datetime.today()) + current_time = calendar.timegm(datetime.now().utctimetuple()) + compare( + Timestamp.utcfromtimestamp(current_time), + datetime.utcfromtimestamp(current_time), + ) + compare( + Timestamp.fromtimestamp(current_time), datetime.fromtimestamp(current_time) + ) + + date_component = datetime.utcnow() + time_component = (date_component + timedelta(minutes=10)).time() + compare( + Timestamp.combine(date_component, time_component), + datetime.combine(date_component, time_component), + ) + + def test_basics_nanos(self): + val = np.int64(946_684_800_000_000_000).view("M8[ns]") + stamp = Timestamp(val.view("i8") + 500) + assert stamp.year == 2000 + assert stamp.month == 1 + assert stamp.microsecond == 0 + assert stamp.nanosecond == 500 + + # GH 14415 + val = np.iinfo(np.int64).min + 80_000_000_000_000 + stamp = Timestamp(val) + assert stamp.year == 1677 + assert stamp.month == 9 + assert stamp.day == 21 + assert stamp.microsecond == 145224 + assert stamp.nanosecond == 192 + + @pytest.mark.parametrize( + "value, check_kwargs", + [ + [946688461000000000, {}], + [946688461000000000 / 1000, dict(unit="us")], + [946688461000000000 / 1_000_000, dict(unit="ms")], + [946688461000000000 / 1_000_000_000, dict(unit="s")], + [10957, dict(unit="D", h=0)], + [ + (946688461000000000 + 500000) / 1000000000, + dict(unit="s", us=499, ns=964), + ], + [(946688461000000000 + 500000000) / 1000000000, dict(unit="s", us=500000)], + [(946688461000000000 + 500000) / 1000000, dict(unit="ms", us=500)], + [(946688461000000000 + 500000) / 1000, dict(unit="us", us=500)], + [(946688461000000000 + 500000000) / 1000000, dict(unit="ms", us=500000)], + [946688461000000000 / 1000.0 + 5, dict(unit="us", us=5)], + [946688461000000000 / 1000.0 + 5000, dict(unit="us", us=5000)], + [946688461000000000 / 1000000.0 + 0.5, dict(unit="ms", us=500)], + [946688461000000000 / 1000000.0 + 0.005, dict(unit="ms", us=5, ns=5)], + [946688461000000000 / 1000000000.0 + 0.5, dict(unit="s", us=500000)], + [10957 + 0.5, dict(unit="D", h=12)], + ], + ) + def test_unit(self, value, check_kwargs): + def check(value, unit=None, h=1, s=1, us=0, ns=0): + stamp = Timestamp(value, unit=unit) + assert stamp.year == 2000 + assert stamp.month == 1 + assert stamp.day == 1 + assert stamp.hour == h + if unit != "D": + assert stamp.minute == 1 + assert stamp.second == s + assert stamp.microsecond == us + else: + assert stamp.minute == 0 + assert stamp.second == 0 + assert stamp.microsecond == 0 + assert stamp.nanosecond == ns + + check(value, **check_kwargs) + + def test_roundtrip(self): + + # test value to string and back conversions + # further test accessors + base = Timestamp("20140101 00:00:00") + + result = Timestamp(base.value + Timedelta("5ms").value) + assert result == Timestamp(f"{base}.005000") + assert result.microsecond == 5000 + + result = Timestamp(base.value + Timedelta("5us").value) + assert result == Timestamp(f"{base}.000005") + assert result.microsecond == 5 + + result = Timestamp(base.value + Timedelta("5ns").value) + assert result == Timestamp(f"{base}.000000005") + assert result.nanosecond == 5 + assert result.microsecond == 0 + + result = Timestamp(base.value + Timedelta("6ms 5us").value) + assert result == Timestamp(f"{base}.006005") + assert result.microsecond == 5 + 6 * 1000 + + result = Timestamp(base.value + Timedelta("200ms 5us").value) + assert result == Timestamp(f"{base}.200005") + assert result.microsecond == 5 + 200 * 1000 + + def test_hash_equivalent(self): + d = {datetime(2011, 1, 1): 5} + stamp = Timestamp(datetime(2011, 1, 1)) + assert d[stamp] == 5 + + def test_tz_conversion_freq(self, tz_naive_fixture): + # GH25241 + t1 = Timestamp("2019-01-01 10:00", freq="H") + assert t1.tz_localize(tz=tz_naive_fixture).freq == t1.freq + t2 = Timestamp("2019-01-02 12:00", tz="UTC", freq="T") + assert t2.tz_convert(tz="UTC").freq == t2.freq + + +class TestTimestampNsOperations: + def test_nanosecond_string_parsing(self): + ts = Timestamp("2013-05-01 07:15:45.123456789") + # GH 7878 + expected_repr = "2013-05-01 07:15:45.123456789" + expected_value = 1_367_392_545_123_456_789 + assert ts.value == expected_value + assert expected_repr in repr(ts) + + ts = Timestamp("2013-05-01 07:15:45.123456789+09:00", tz="Asia/Tokyo") + assert ts.value == expected_value - 9 * 3600 * 1_000_000_000 + assert expected_repr in repr(ts) + + ts = Timestamp("2013-05-01 07:15:45.123456789", tz="UTC") + assert ts.value == expected_value + assert expected_repr in repr(ts) + + ts = Timestamp("2013-05-01 07:15:45.123456789", tz="US/Eastern") + assert ts.value == expected_value + 4 * 3600 * 1_000_000_000 + assert expected_repr in repr(ts) + + # GH 10041 + ts = Timestamp("20130501T071545.123456789") + assert ts.value == expected_value + assert expected_repr in repr(ts) + + def test_nanosecond_timestamp(self): + # GH 7610 + expected = 1_293_840_000_000_000_005 + t = Timestamp("2011-01-01") + offsets.Nano(5) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" + assert t.value == expected + assert t.nanosecond == 5 + + t = Timestamp(t) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" + assert t.value == expected + assert t.nanosecond == 5 + + t = Timestamp(np_datetime64_compat("2011-01-01 00:00:00.000000005Z")) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000005')" + assert t.value == expected + assert t.nanosecond == 5 + + expected = 1_293_840_000_000_000_010 + t = t + offsets.Nano(5) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" + assert t.value == expected + assert t.nanosecond == 10 + + t = Timestamp(t) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" + assert t.value == expected + assert t.nanosecond == 10 + + t = Timestamp(np_datetime64_compat("2011-01-01 00:00:00.000000010Z")) + assert repr(t) == "Timestamp('2011-01-01 00:00:00.000000010')" + assert t.value == expected + assert t.nanosecond == 10 + + +class TestTimestampToJulianDate: + def test_compare_1700(self): + r = Timestamp("1700-06-23").to_julian_date() + assert r == 2_342_145.5 + + def test_compare_2000(self): + r = Timestamp("2000-04-12").to_julian_date() + assert r == 2_451_646.5 + + def test_compare_2100(self): + r = Timestamp("2100-08-12").to_julian_date() + assert r == 2_488_292.5 + + def test_compare_hour01(self): + r = Timestamp("2000-08-12T01:00:00").to_julian_date() + assert r == 2_451_768.5416666666666666 + + def test_compare_hour13(self): + r = Timestamp("2000-08-12T13:00:00").to_julian_date() + assert r == 2_451_769.0416666666666666 + + +class TestTimestampConversion: + def test_conversion(self): + # GH#9255 + ts = Timestamp("2000-01-01") + + result = ts.to_pydatetime() + expected = datetime(2000, 1, 1) + assert result == expected + assert type(result) == type(expected) + + result = ts.to_datetime64() + expected = np.datetime64(ts.value, "ns") + assert result == expected + assert type(result) == type(expected) + assert result.dtype == expected.dtype + + def test_to_pydatetime_nonzero_nano(self): + ts = Timestamp("2011-01-01 9:00:00.123456789") + + # Warn the user of data loss (nanoseconds). + with tm.assert_produces_warning(UserWarning, check_stacklevel=False): + expected = datetime(2011, 1, 1, 9, 0, 0, 123456) + result = ts.to_pydatetime() + assert result == expected + + def test_timestamp_to_datetime(self): + stamp = Timestamp("20090415", tz="US/Eastern", freq="D") + dtval = stamp.to_pydatetime() + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo + + def test_timestamp_to_datetime_dateutil(self): + stamp = Timestamp("20090415", tz="dateutil/US/Eastern", freq="D") + dtval = stamp.to_pydatetime() + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo + + def test_timestamp_to_datetime_explicit_pytz(self): + stamp = Timestamp("20090415", tz=pytz.timezone("US/Eastern"), freq="D") + dtval = stamp.to_pydatetime() + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo + + @td.skip_if_windows_python_3 + def test_timestamp_to_datetime_explicit_dateutil(self): + stamp = Timestamp("20090415", tz=gettz("US/Eastern"), freq="D") + dtval = stamp.to_pydatetime() + assert stamp == dtval + assert stamp.tzinfo == dtval.tzinfo + + def test_to_datetime_bijective(self): + # Ensure that converting to datetime and back only loses precision + # by going from nanoseconds to microseconds. + exp_warning = None if Timestamp.max.nanosecond == 0 else UserWarning + with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + assert ( + Timestamp(Timestamp.max.to_pydatetime()).value / 1000 + == Timestamp.max.value / 1000 + ) + + exp_warning = None if Timestamp.min.nanosecond == 0 else UserWarning + with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + assert ( + Timestamp(Timestamp.min.to_pydatetime()).value / 1000 + == Timestamp.min.value / 1000 + ) + + def test_to_period_tz_warning(self): + # GH#21333 make sure a warning is issued when timezone + # info is lost + ts = Timestamp("2009-04-15 16:17:18", tz="US/Eastern") + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + ts.to_period("D") + + def test_to_numpy_alias(self): + # GH 24653: alias .to_numpy() for scalars + ts = Timestamp(datetime.now()) + assert ts.to_datetime64() == ts.to_numpy() + + +class SubDatetime(datetime): + pass + + +@pytest.mark.parametrize( + "lh,rh", + [ + (SubDatetime(2000, 1, 1), Timedelta(hours=1)), + (Timedelta(hours=1), SubDatetime(2000, 1, 1)), + ], +) +def test_dt_subclass_add_timedelta(lh, rh): + # GH#25851 + # ensure that subclassed datetime works for + # Timedelta operations + result = lh + rh + expected = SubDatetime(2000, 1, 1, 1) + assert result == expected diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py new file mode 100644 index 00000000..6537f6cc --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -0,0 +1,418 @@ +""" +Tests for Timestamp timezone-related methods +""" +from datetime import date, datetime, timedelta + +import dateutil +from dateutil.tz import gettz, tzoffset +import pytest +import pytz +from pytz.exceptions import AmbiguousTimeError, NonExistentTimeError + +from pandas._libs.tslibs import timezones +from pandas.errors import OutOfBoundsDatetime +import pandas.util._test_decorators as td + +from pandas import NaT, Timestamp + + +class TestTimestampTZOperations: + # -------------------------------------------------------------- + # Timestamp.tz_localize + + def test_tz_localize_pushes_out_of_bounds(self): + # GH#12677 + # tz_localize that pushes away from the boundary is OK + pac = Timestamp.min.tz_localize("US/Pacific") + assert pac.value > Timestamp.min.value + pac.tz_convert("Asia/Tokyo") # tz_convert doesn't change value + with pytest.raises(OutOfBoundsDatetime): + Timestamp.min.tz_localize("Asia/Tokyo") + + # tz_localize that pushes away from the boundary is OK + tokyo = Timestamp.max.tz_localize("Asia/Tokyo") + assert tokyo.value < Timestamp.max.value + tokyo.tz_convert("US/Pacific") # tz_convert doesn't change value + with pytest.raises(OutOfBoundsDatetime): + Timestamp.max.tz_localize("US/Pacific") + + def test_tz_localize_ambiguous_bool(self): + # make sure that we are correctly accepting bool values as ambiguous + # GH#14402 + ts = Timestamp("2015-11-01 01:00:03") + expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central") + expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central") + + with pytest.raises(pytz.AmbiguousTimeError): + ts.tz_localize("US/Central") + + result = ts.tz_localize("US/Central", ambiguous=True) + assert result == expected0 + + result = ts.tz_localize("US/Central", ambiguous=False) + assert result == expected1 + + def test_tz_localize_ambiguous(self): + ts = Timestamp("2014-11-02 01:00") + ts_dst = ts.tz_localize("US/Eastern", ambiguous=True) + ts_no_dst = ts.tz_localize("US/Eastern", ambiguous=False) + + assert (ts_no_dst.value - ts_dst.value) / 1e9 == 3600 + with pytest.raises(ValueError): + ts.tz_localize("US/Eastern", ambiguous="infer") + + # GH#8025 + msg = "Cannot localize tz-aware Timestamp, use tz_convert for conversions" + with pytest.raises(TypeError, match=msg): + Timestamp("2011-01-01", tz="US/Eastern").tz_localize("Asia/Tokyo") + + msg = "Cannot convert tz-naive Timestamp, use tz_localize to localize" + with pytest.raises(TypeError, match=msg): + Timestamp("2011-01-01").tz_convert("Asia/Tokyo") + + @pytest.mark.parametrize( + "stamp, tz", + [ + ("2015-03-08 02:00", "US/Eastern"), + ("2015-03-08 02:30", "US/Pacific"), + ("2015-03-29 02:00", "Europe/Paris"), + ("2015-03-29 02:30", "Europe/Belgrade"), + ], + ) + def test_tz_localize_nonexistent(self, stamp, tz): + # GH#13057 + ts = Timestamp(stamp) + with pytest.raises(NonExistentTimeError): + ts.tz_localize(tz) + # GH 22644 + with pytest.raises(NonExistentTimeError): + ts.tz_localize(tz, nonexistent="raise") + assert ts.tz_localize(tz, nonexistent="NaT") is NaT + + def test_tz_localize_ambiguous_raise(self): + # GH#13057 + ts = Timestamp("2015-11-1 01:00") + with pytest.raises(AmbiguousTimeError): + ts.tz_localize("US/Pacific", ambiguous="raise") + + def test_tz_localize_nonexistent_invalid_arg(self): + # GH 22644 + tz = "Europe/Warsaw" + ts = Timestamp("2015-03-29 02:00:00") + with pytest.raises(ValueError): + ts.tz_localize(tz, nonexistent="foo") + + @pytest.mark.parametrize( + "stamp", + [ + "2014-02-01 09:00", + "2014-07-08 09:00", + "2014-11-01 17:00", + "2014-11-05 00:00", + ], + ) + def test_tz_localize_roundtrip(self, stamp, tz_aware_fixture): + tz = tz_aware_fixture + ts = Timestamp(stamp) + localized = ts.tz_localize(tz) + assert localized == Timestamp(stamp, tz=tz) + + with pytest.raises(TypeError): + localized.tz_localize(tz) + + reset = localized.tz_localize(None) + assert reset == ts + assert reset.tzinfo is None + + def test_tz_localize_ambiguous_compat(self): + # validate that pytz and dateutil are compat for dst + # when the transition happens + naive = Timestamp("2013-10-27 01:00:00") + + pytz_zone = "Europe/London" + dateutil_zone = "dateutil/Europe/London" + result_pytz = naive.tz_localize(pytz_zone, ambiguous=0) + result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=0) + assert result_pytz.value == result_dateutil.value + assert result_pytz.value == 1382835600000000000 + + # fixed ambiguous behavior + # see gh-14621 + assert result_pytz.to_pydatetime().tzname() == "GMT" + assert result_dateutil.to_pydatetime().tzname() == "BST" + assert str(result_pytz) != str(result_dateutil) + + # 1 hour difference + result_pytz = naive.tz_localize(pytz_zone, ambiguous=1) + result_dateutil = naive.tz_localize(dateutil_zone, ambiguous=1) + assert result_pytz.value == result_dateutil.value + assert result_pytz.value == 1382832000000000000 + + # see gh-14621 + assert str(result_pytz) == str(result_dateutil) + assert ( + result_pytz.to_pydatetime().tzname() + == result_dateutil.to_pydatetime().tzname() + ) + + @pytest.mark.parametrize( + "tz", + [ + pytz.timezone("US/Eastern"), + gettz("US/Eastern"), + "US/Eastern", + "dateutil/US/Eastern", + ], + ) + def test_timestamp_tz_localize(self, tz): + stamp = Timestamp("3/11/2012 04:00") + + result = stamp.tz_localize(tz) + expected = Timestamp("3/11/2012 04:00", tz=tz) + assert result.hour == expected.hour + assert result == expected + + @pytest.mark.parametrize( + "start_ts, tz, end_ts, shift", + [ + ["2015-03-29 02:20:00", "Europe/Warsaw", "2015-03-29 03:00:00", "forward"], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 01:59:59.999999999", + "backward", + ], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 03:20:00", + timedelta(hours=1), + ], + [ + "2015-03-29 02:20:00", + "Europe/Warsaw", + "2015-03-29 01:20:00", + timedelta(hours=-1), + ], + ["2018-03-11 02:33:00", "US/Pacific", "2018-03-11 03:00:00", "forward"], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 01:59:59.999999999", + "backward", + ], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 03:33:00", + timedelta(hours=1), + ], + [ + "2018-03-11 02:33:00", + "US/Pacific", + "2018-03-11 01:33:00", + timedelta(hours=-1), + ], + ], + ) + @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) + def test_timestamp_tz_localize_nonexistent_shift( + self, start_ts, tz, end_ts, shift, tz_type + ): + # GH 8917, 24466 + tz = tz_type + tz + if isinstance(shift, str): + shift = "shift_" + shift + ts = Timestamp(start_ts) + result = ts.tz_localize(tz, nonexistent=shift) + expected = Timestamp(end_ts).tz_localize(tz) + assert result == expected + + @pytest.mark.parametrize("offset", [-1, 1]) + @pytest.mark.parametrize("tz_type", ["", "dateutil/"]) + def test_timestamp_tz_localize_nonexistent_shift_invalid(self, offset, tz_type): + # GH 8917, 24466 + tz = tz_type + "Europe/Warsaw" + ts = Timestamp("2015-03-29 02:20:00") + msg = "The provided timedelta will relocalize on a nonexistent time" + with pytest.raises(ValueError, match=msg): + ts.tz_localize(tz, nonexistent=timedelta(seconds=offset)) + + @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) + def test_timestamp_tz_localize_nonexistent_NaT(self, tz): + # GH 8917 + ts = Timestamp("2015-03-29 02:20:00") + result = ts.tz_localize(tz, nonexistent="NaT") + assert result is NaT + + @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) + def test_timestamp_tz_localize_nonexistent_raise(self, tz): + # GH 8917 + ts = Timestamp("2015-03-29 02:20:00") + with pytest.raises(pytz.NonExistentTimeError): + ts.tz_localize(tz, nonexistent="raise") + with pytest.raises(ValueError): + ts.tz_localize(tz, nonexistent="foo") + + # ------------------------------------------------------------------ + # Timestamp.tz_convert + + @pytest.mark.parametrize( + "stamp", + [ + "2014-02-01 09:00", + "2014-07-08 09:00", + "2014-11-01 17:00", + "2014-11-05 00:00", + ], + ) + def test_tz_convert_roundtrip(self, stamp, tz_aware_fixture): + tz = tz_aware_fixture + + ts = Timestamp(stamp, tz="UTC") + converted = ts.tz_convert(tz) + + reset = converted.tz_convert(None) + assert reset == Timestamp(stamp) + assert reset.tzinfo is None + assert reset == converted.tz_convert("UTC").tz_localize(None) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_astimezone(self, tzstr): + # astimezone is an alias for tz_convert, so keep it with + # the tz_convert tests + utcdate = Timestamp("3/11/2012 22:00", tz="UTC") + expected = utcdate.tz_convert(tzstr) + result = utcdate.astimezone(tzstr) + assert expected == result + assert isinstance(result, Timestamp) + + @td.skip_if_windows + def test_tz_convert_utc_with_system_utc(self): + + # from system utc to real utc + ts = Timestamp("2001-01-05 11:56", tz=timezones.maybe_get_tz("dateutil/UTC")) + # check that the time hasn't changed. + assert ts == ts.tz_convert(dateutil.tz.tzutc()) + + # from system utc to real utc + ts = Timestamp("2001-01-05 11:56", tz=timezones.maybe_get_tz("dateutil/UTC")) + # check that the time hasn't changed. + assert ts == ts.tz_convert(dateutil.tz.tzutc()) + + # ------------------------------------------------------------------ + # Timestamp.__init__ with tz str or tzinfo + + def test_timestamp_constructor_tz_utc(self): + utc_stamp = Timestamp("3/11/2012 05:00", tz="utc") + assert utc_stamp.tzinfo is pytz.utc + assert utc_stamp.hour == 5 + + utc_stamp = Timestamp("3/11/2012 05:00").tz_localize("utc") + assert utc_stamp.hour == 5 + + def test_timestamp_to_datetime_tzoffset(self): + tzinfo = tzoffset(None, 7200) + expected = Timestamp("3/11/2012 04:00", tz=tzinfo) + result = Timestamp(expected.to_pydatetime()) + assert expected == result + + def test_timestamp_constructor_near_dst_boundary(self): + # GH#11481 & GH#15777 + # Naive string timestamps were being localized incorrectly + # with tz_convert_single instead of tz_localize_to_utc + + for tz in ["Europe/Brussels", "Europe/Prague"]: + result = Timestamp("2015-10-25 01:00", tz=tz) + expected = Timestamp("2015-10-25 01:00").tz_localize(tz) + assert result == expected + + with pytest.raises(pytz.AmbiguousTimeError): + Timestamp("2015-10-25 02:00", tz=tz) + + result = Timestamp("2017-03-26 01:00", tz="Europe/Paris") + expected = Timestamp("2017-03-26 01:00").tz_localize("Europe/Paris") + assert result == expected + + with pytest.raises(pytz.NonExistentTimeError): + Timestamp("2017-03-26 02:00", tz="Europe/Paris") + + # GH#11708 + naive = Timestamp("2015-11-18 10:00:00") + result = naive.tz_localize("UTC").tz_convert("Asia/Kolkata") + expected = Timestamp("2015-11-18 15:30:00+0530", tz="Asia/Kolkata") + assert result == expected + + # GH#15823 + result = Timestamp("2017-03-26 00:00", tz="Europe/Paris") + expected = Timestamp("2017-03-26 00:00:00+0100", tz="Europe/Paris") + assert result == expected + + result = Timestamp("2017-03-26 01:00", tz="Europe/Paris") + expected = Timestamp("2017-03-26 01:00:00+0100", tz="Europe/Paris") + assert result == expected + + with pytest.raises(pytz.NonExistentTimeError): + Timestamp("2017-03-26 02:00", tz="Europe/Paris") + + result = Timestamp("2017-03-26 02:00:00+0100", tz="Europe/Paris") + naive = Timestamp(result.value) + expected = naive.tz_localize("UTC").tz_convert("Europe/Paris") + assert result == expected + + result = Timestamp("2017-03-26 03:00", tz="Europe/Paris") + expected = Timestamp("2017-03-26 03:00:00+0200", tz="Europe/Paris") + assert result == expected + + @pytest.mark.parametrize( + "tz", + [ + pytz.timezone("US/Eastern"), + gettz("US/Eastern"), + "US/Eastern", + "dateutil/US/Eastern", + ], + ) + def test_timestamp_constructed_by_date_and_tz(self, tz): + # GH#2993, Timestamp cannot be constructed by datetime.date + # and tz correctly + + result = Timestamp(date(2012, 3, 11), tz=tz) + + expected = Timestamp("3/11/2012", tz=tz) + assert result.hour == expected.hour + assert result == expected + + @pytest.mark.parametrize( + "tz", + [ + pytz.timezone("US/Eastern"), + gettz("US/Eastern"), + "US/Eastern", + "dateutil/US/Eastern", + ], + ) + def test_timestamp_add_timedelta_push_over_dst_boundary(self, tz): + # GH#1389 + + # 4 hours before DST transition + stamp = Timestamp("3/10/2012 22:00", tz=tz) + + result = stamp + timedelta(hours=6) + + # spring forward, + "7" hours + expected = Timestamp("3/11/2012 05:00", tz=tz) + + assert result == expected + + def test_timestamp_timetz_equivalent_with_datetime_tz(self, tz_naive_fixture): + # GH21358 + tz = timezones.maybe_get_tz(tz_naive_fixture) + + stamp = Timestamp("2018-06-04 10:20:30", tz=tz) + _datetime = datetime(2018, 6, 4, hour=10, minute=20, second=30, tzinfo=tz) + + result = stamp.timetz() + expected = _datetime.timetz() + + assert result == expected diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py new file mode 100644 index 00000000..65066fd0 --- /dev/null +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -0,0 +1,420 @@ +from datetime import datetime + +from dateutil.tz import gettz +import pytest +import pytz +from pytz import utc + +from pandas._libs.tslibs import conversion +from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG +import pandas.util._test_decorators as td + +from pandas import NaT, Timestamp +import pandas._testing as tm + +from pandas.tseries.frequencies import to_offset + + +class TestTimestampUnaryOps: + + # -------------------------------------------------------------- + # Timestamp.round + @pytest.mark.parametrize( + "timestamp, freq, expected", + [ + ("20130101 09:10:11", "D", "20130101"), + ("20130101 19:10:11", "D", "20130102"), + ("20130201 12:00:00", "D", "20130202"), + ("20130104 12:00:00", "D", "20130105"), + ("2000-01-05 05:09:15.13", "D", "2000-01-05 00:00:00"), + ("2000-01-05 05:09:15.13", "H", "2000-01-05 05:00:00"), + ("2000-01-05 05:09:15.13", "S", "2000-01-05 05:09:15"), + ], + ) + def test_round_frequencies(self, timestamp, freq, expected): + dt = Timestamp(timestamp) + result = dt.round(freq) + expected = Timestamp(expected) + assert result == expected + + def test_round_tzaware(self): + dt = Timestamp("20130101 09:10:11", tz="US/Eastern") + result = dt.round("D") + expected = Timestamp("20130101", tz="US/Eastern") + assert result == expected + + dt = Timestamp("20130101 09:10:11", tz="US/Eastern") + result = dt.round("s") + assert result == dt + + def test_round_30min(self): + # round + dt = Timestamp("20130104 12:32:00") + result = dt.round("30Min") + expected = Timestamp("20130104 12:30:00") + assert result == expected + + def test_round_subsecond(self): + # GH#14440 & GH#15578 + result = Timestamp("2016-10-17 12:00:00.0015").round("ms") + expected = Timestamp("2016-10-17 12:00:00.002000") + assert result == expected + + result = Timestamp("2016-10-17 12:00:00.00149").round("ms") + expected = Timestamp("2016-10-17 12:00:00.001000") + assert result == expected + + ts = Timestamp("2016-10-17 12:00:00.0015") + for freq in ["us", "ns"]: + assert ts == ts.round(freq) + + result = Timestamp("2016-10-17 12:00:00.001501031").round("10ns") + expected = Timestamp("2016-10-17 12:00:00.001501030") + assert result == expected + + def test_round_nonstandard_freq(self): + with tm.assert_produces_warning(False): + Timestamp("2016-10-17 12:00:00.001501031").round("1010ns") + + def test_round_invalid_arg(self): + stamp = Timestamp("2000-01-05 05:09:15.13") + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + stamp.round("foo") + + @pytest.mark.parametrize( + "test_input, rounder, freq, expected", + [ + ("2117-01-01 00:00:45", "floor", "15s", "2117-01-01 00:00:45"), + ("2117-01-01 00:00:45", "ceil", "15s", "2117-01-01 00:00:45"), + ( + "2117-01-01 00:00:45.000000012", + "floor", + "10ns", + "2117-01-01 00:00:45.000000010", + ), + ( + "1823-01-01 00:00:01.000000012", + "ceil", + "10ns", + "1823-01-01 00:00:01.000000020", + ), + ("1823-01-01 00:00:01", "floor", "1s", "1823-01-01 00:00:01"), + ("1823-01-01 00:00:01", "ceil", "1s", "1823-01-01 00:00:01"), + ("NaT", "floor", "1s", "NaT"), + ("NaT", "ceil", "1s", "NaT"), + ], + ) + def test_ceil_floor_edge(self, test_input, rounder, freq, expected): + dt = Timestamp(test_input) + func = getattr(dt, rounder) + result = func(freq) + + if dt is NaT: + assert result is NaT + else: + expected = Timestamp(expected) + assert result == expected + + @pytest.mark.parametrize( + "test_input, freq, expected", + [ + ("2018-01-01 00:02:06", "2s", "2018-01-01 00:02:06"), + ("2018-01-01 00:02:00", "2T", "2018-01-01 00:02:00"), + ("2018-01-01 00:04:00", "4T", "2018-01-01 00:04:00"), + ("2018-01-01 00:15:00", "15T", "2018-01-01 00:15:00"), + ("2018-01-01 00:20:00", "20T", "2018-01-01 00:20:00"), + ("2018-01-01 03:00:00", "3H", "2018-01-01 03:00:00"), + ], + ) + @pytest.mark.parametrize("rounder", ["ceil", "floor", "round"]) + def test_round_minute_freq(self, test_input, freq, expected, rounder): + # Ensure timestamps that shouldn't round dont! + # GH#21262 + + dt = Timestamp(test_input) + expected = Timestamp(expected) + func = getattr(dt, rounder) + result = func(freq) + assert result == expected + + def test_ceil(self): + dt = Timestamp("20130101 09:10:11") + result = dt.ceil("D") + expected = Timestamp("20130102") + assert result == expected + + def test_floor(self): + dt = Timestamp("20130101 09:10:11") + result = dt.floor("D") + expected = Timestamp("20130101") + assert result == expected + + @pytest.mark.parametrize("method", ["ceil", "round", "floor"]) + def test_round_dst_border_ambiguous(self, method): + # GH 18946 round near "fall back" DST + ts = Timestamp("2017-10-29 00:00:00", tz="UTC").tz_convert("Europe/Madrid") + # + result = getattr(ts, method)("H", ambiguous=True) + assert result == ts + + result = getattr(ts, method)("H", ambiguous=False) + expected = Timestamp("2017-10-29 01:00:00", tz="UTC").tz_convert( + "Europe/Madrid" + ) + assert result == expected + + result = getattr(ts, method)("H", ambiguous="NaT") + assert result is NaT + + with pytest.raises(pytz.AmbiguousTimeError): + getattr(ts, method)("H", ambiguous="raise") + + @pytest.mark.parametrize( + "method, ts_str, freq", + [ + ["ceil", "2018-03-11 01:59:00-0600", "5min"], + ["round", "2018-03-11 01:59:00-0600", "5min"], + ["floor", "2018-03-11 03:01:00-0500", "2H"], + ], + ) + def test_round_dst_border_nonexistent(self, method, ts_str, freq): + # GH 23324 round near "spring forward" DST + ts = Timestamp(ts_str, tz="America/Chicago") + result = getattr(ts, method)(freq, nonexistent="shift_forward") + expected = Timestamp("2018-03-11 03:00:00", tz="America/Chicago") + assert result == expected + + result = getattr(ts, method)(freq, nonexistent="NaT") + assert result is NaT + + with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"): + getattr(ts, method)(freq, nonexistent="raise") + + @pytest.mark.parametrize( + "timestamp", + [ + "2018-01-01 0:0:0.124999360", + "2018-01-01 0:0:0.125000367", + "2018-01-01 0:0:0.125500", + "2018-01-01 0:0:0.126500", + "2018-01-01 12:00:00", + "2019-01-01 12:00:00", + ], + ) + @pytest.mark.parametrize( + "freq", + [ + "2ns", + "3ns", + "4ns", + "5ns", + "6ns", + "7ns", + "250ns", + "500ns", + "750ns", + "1us", + "19us", + "250us", + "500us", + "750us", + "1s", + "2s", + "3s", + "1D", + ], + ) + def test_round_int64(self, timestamp, freq): + """check that all rounding modes are accurate to int64 precision + see GH#22591 + """ + dt = Timestamp(timestamp) + unit = to_offset(freq).nanos + + # test floor + result = dt.floor(freq) + assert result.value % unit == 0, "floor not a {} multiple".format(freq) + assert 0 <= dt.value - result.value < unit, "floor error" + + # test ceil + result = dt.ceil(freq) + assert result.value % unit == 0, "ceil not a {} multiple".format(freq) + assert 0 <= result.value - dt.value < unit, "ceil error" + + # test round + result = dt.round(freq) + assert result.value % unit == 0, "round not a {} multiple".format(freq) + assert abs(result.value - dt.value) <= unit // 2, "round error" + if unit % 2 == 0 and abs(result.value - dt.value) == unit // 2: + # round half to even + assert result.value // unit % 2 == 0, "round half to even error" + + # -------------------------------------------------------------- + # Timestamp.replace + + def test_replace_naive(self): + # GH#14621, GH#7825 + ts = Timestamp("2016-01-01 09:00:00") + result = ts.replace(hour=0) + expected = Timestamp("2016-01-01 00:00:00") + assert result == expected + + def test_replace_aware(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH#14621, GH#7825 + # replacing datetime components with and w/o presence of a timezone + ts = Timestamp("2016-01-01 09:00:00", tz=tz) + result = ts.replace(hour=0) + expected = Timestamp("2016-01-01 00:00:00", tz=tz) + assert result == expected + + def test_replace_preserves_nanos(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH#14621, GH#7825 + ts = Timestamp("2016-01-01 09:00:00.000000123", tz=tz) + result = ts.replace(hour=0) + expected = Timestamp("2016-01-01 00:00:00.000000123", tz=tz) + assert result == expected + + def test_replace_multiple(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH#14621, GH#7825 + # replacing datetime components with and w/o presence of a timezone + # test all + ts = Timestamp("2016-01-01 09:00:00.000000123", tz=tz) + result = ts.replace( + year=2015, + month=2, + day=2, + hour=0, + minute=5, + second=5, + microsecond=5, + nanosecond=5, + ) + expected = Timestamp("2015-02-02 00:05:05.000005005", tz=tz) + assert result == expected + + def test_replace_invalid_kwarg(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH#14621, GH#7825 + ts = Timestamp("2016-01-01 09:00:00.000000123", tz=tz) + with pytest.raises(TypeError): + ts.replace(foo=5) + + def test_replace_integer_args(self, tz_aware_fixture): + tz = tz_aware_fixture + # GH#14621, GH#7825 + ts = Timestamp("2016-01-01 09:00:00.000000123", tz=tz) + with pytest.raises(ValueError): + ts.replace(hour=0.1) + + def test_replace_tzinfo_equiv_tz_localize_none(self): + # GH#14621, GH#7825 + # assert conversion to naive is the same as replacing tzinfo with None + ts = Timestamp("2013-11-03 01:59:59.999999-0400", tz="US/Eastern") + assert ts.tz_localize(None) == ts.replace(tzinfo=None) + + @td.skip_if_windows + def test_replace_tzinfo(self): + # GH#15683 + dt = datetime(2016, 3, 27, 1) + tzinfo = pytz.timezone("CET").localize(dt, is_dst=False).tzinfo + + result_dt = dt.replace(tzinfo=tzinfo) + result_pd = Timestamp(dt).replace(tzinfo=tzinfo) + + # datetime.timestamp() converts in the local timezone + with tm.set_timezone("UTC"): + assert result_dt.timestamp() == result_pd.timestamp() + + assert result_dt == result_pd + assert result_dt == result_pd.to_pydatetime() + + result_dt = dt.replace(tzinfo=tzinfo).replace(tzinfo=None) + result_pd = Timestamp(dt).replace(tzinfo=tzinfo).replace(tzinfo=None) + + # datetime.timestamp() converts in the local timezone + with tm.set_timezone("UTC"): + assert result_dt.timestamp() == result_pd.timestamp() + + assert result_dt == result_pd + assert result_dt == result_pd.to_pydatetime() + + @pytest.mark.parametrize( + "tz, normalize", + [ + (pytz.timezone("US/Eastern"), lambda x: x.tzinfo.normalize(x)), + (gettz("US/Eastern"), lambda x: x), + ], + ) + def test_replace_across_dst(self, tz, normalize): + # GH#18319 check that 1) timezone is correctly normalized and + # 2) that hour is not incorrectly changed by this normalization + ts_naive = Timestamp("2017-12-03 16:03:30") + ts_aware = conversion.localize_pydatetime(ts_naive, tz) + + # Preliminary sanity-check + assert ts_aware == normalize(ts_aware) + + # Replace across DST boundary + ts2 = ts_aware.replace(month=6) + + # Check that `replace` preserves hour literal + assert (ts2.hour, ts2.minute) == (ts_aware.hour, ts_aware.minute) + + # Check that post-replace object is appropriately normalized + ts2b = normalize(ts2) + assert ts2 == ts2b + + def test_replace_dst_border(self): + # Gh 7825 + t = Timestamp("2013-11-3", tz="America/Chicago") + result = t.replace(hour=3) + expected = Timestamp("2013-11-3 03:00:00", tz="America/Chicago") + assert result == expected + + @pytest.mark.parametrize("fold", [0, 1]) + @pytest.mark.parametrize("tz", ["dateutil/Europe/London", "Europe/London"]) + def test_replace_dst_fold(self, fold, tz): + # GH 25017 + d = datetime(2019, 10, 27, 2, 30) + ts = Timestamp(d, tz=tz) + result = ts.replace(hour=1, fold=fold) + expected = Timestamp(datetime(2019, 10, 27, 1, 30)).tz_localize( + tz, ambiguous=not fold + ) + assert result == expected + + # -------------------------------------------------------------- + # Timestamp.normalize + + @pytest.mark.parametrize("arg", ["2013-11-30", "2013-11-30 12:00:00"]) + def test_normalize(self, tz_naive_fixture, arg): + tz = tz_naive_fixture + ts = Timestamp(arg, tz=tz) + result = ts.normalize() + expected = Timestamp("2013-11-30", tz=tz) + assert result == expected + + # -------------------------------------------------------------- + + @td.skip_if_windows + def test_timestamp(self): + # GH#17329 + # tz-naive --> treat it as if it were UTC for purposes of timestamp() + ts = Timestamp.now() + uts = ts.replace(tzinfo=utc) + assert ts.timestamp() == uts.timestamp() + + tsc = Timestamp("2014-10-11 11:00:01.12345678", tz="US/Central") + utsc = tsc.tz_convert("UTC") + + # utsc is a different representation of the same time + assert tsc.timestamp() == utsc.timestamp() + + # datetime.timestamp() converts in the local timezone + with tm.set_timezone("UTC"): + # should agree with datetime.timestamp method + dt = ts.to_pydatetime() + assert dt.timestamp() == ts.timestamp() diff --git a/pandas/tests/series/__init__.py b/pandas/tests/series/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/series/conftest.py b/pandas/tests/series/conftest.py new file mode 100644 index 00000000..ff0b0c71 --- /dev/null +++ b/pandas/tests/series/conftest.py @@ -0,0 +1,33 @@ +import pytest + +import pandas._testing as tm + + +@pytest.fixture +def datetime_series(): + """ + Fixture for Series of floats with DatetimeIndex + """ + s = tm.makeTimeSeries() + s.name = "ts" + return s + + +@pytest.fixture +def string_series(): + """ + Fixture for Series of floats with Index of unique strings + """ + s = tm.makeStringSeries() + s.name = "series" + return s + + +@pytest.fixture +def object_series(): + """ + Fixture for Series of dtype object with Index of unique strings + """ + s = tm.makeObjectSeries() + s.name = "objects" + return s diff --git a/pandas/tests/series/indexing/__init__.py b/pandas/tests/series/indexing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/series/indexing/test_alter_index.py b/pandas/tests/series/indexing/test_alter_index.py new file mode 100644 index 00000000..61d76005 --- /dev/null +++ b/pandas/tests/series/indexing/test_alter_index.py @@ -0,0 +1,570 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import Categorical, Series, date_range, isna +import pandas._testing as tm + + +@pytest.mark.parametrize( + "first_slice,second_slice", + [ + [[2, None], [None, -5]], + [[None, 0], [None, -5]], + [[None, -5], [None, 0]], + [[None, 0], [None, 0]], + ], +) +@pytest.mark.parametrize("fill", [None, -1]) +def test_align(datetime_series, first_slice, second_slice, join_type, fill): + a = datetime_series[slice(*first_slice)] + b = datetime_series[slice(*second_slice)] + + aa, ab = a.align(b, join=join_type, fill_value=fill) + + join_index = a.index.join(b.index, how=join_type) + if fill is not None: + diff_a = aa.index.difference(join_index) + diff_b = ab.index.difference(join_index) + if len(diff_a) > 0: + assert (aa.reindex(diff_a) == fill).all() + if len(diff_b) > 0: + assert (ab.reindex(diff_b) == fill).all() + + ea = a.reindex(join_index) + eb = b.reindex(join_index) + + if fill is not None: + ea = ea.fillna(fill) + eb = eb.fillna(fill) + + tm.assert_series_equal(aa, ea) + tm.assert_series_equal(ab, eb) + assert aa.name == "ts" + assert ea.name == "ts" + assert ab.name == "ts" + assert eb.name == "ts" + + +@pytest.mark.parametrize( + "first_slice,second_slice", + [ + [[2, None], [None, -5]], + [[None, 0], [None, -5]], + [[None, -5], [None, 0]], + [[None, 0], [None, 0]], + ], +) +@pytest.mark.parametrize("method", ["pad", "bfill"]) +@pytest.mark.parametrize("limit", [None, 1]) +def test_align_fill_method( + datetime_series, first_slice, second_slice, join_type, method, limit +): + a = datetime_series[slice(*first_slice)] + b = datetime_series[slice(*second_slice)] + + aa, ab = a.align(b, join=join_type, method=method, limit=limit) + + join_index = a.index.join(b.index, how=join_type) + ea = a.reindex(join_index) + eb = b.reindex(join_index) + + ea = ea.fillna(method=method, limit=limit) + eb = eb.fillna(method=method, limit=limit) + + tm.assert_series_equal(aa, ea) + tm.assert_series_equal(ab, eb) + + +def test_align_nocopy(datetime_series): + b = datetime_series[:5].copy() + + # do copy + a = datetime_series.copy() + ra, _ = a.align(b, join="left") + ra[:5] = 5 + assert not (a[:5] == 5).any() + + # do not copy + a = datetime_series.copy() + ra, _ = a.align(b, join="left", copy=False) + ra[:5] = 5 + assert (a[:5] == 5).all() + + # do copy + a = datetime_series.copy() + b = datetime_series[:5].copy() + _, rb = a.align(b, join="right") + rb[:3] = 5 + assert not (b[:3] == 5).any() + + # do not copy + a = datetime_series.copy() + b = datetime_series[:5].copy() + _, rb = a.align(b, join="right", copy=False) + rb[:2] = 5 + assert (b[:2] == 5).all() + + +def test_align_same_index(datetime_series): + a, b = datetime_series.align(datetime_series, copy=False) + assert a.index is datetime_series.index + assert b.index is datetime_series.index + + a, b = datetime_series.align(datetime_series, copy=True) + assert a.index is not datetime_series.index + assert b.index is not datetime_series.index + + +def test_align_multiindex(): + # GH 10665 + + midx = pd.MultiIndex.from_product( + [range(2), range(3), range(2)], names=("a", "b", "c") + ) + idx = pd.Index(range(2), name="b") + s1 = pd.Series(np.arange(12, dtype="int64"), index=midx) + s2 = pd.Series(np.arange(2, dtype="int64"), index=idx) + + # these must be the same results (but flipped) + res1l, res1r = s1.align(s2, join="left") + res2l, res2r = s2.align(s1, join="right") + + expl = s1 + tm.assert_series_equal(expl, res1l) + tm.assert_series_equal(expl, res2r) + expr = pd.Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx) + tm.assert_series_equal(expr, res1r) + tm.assert_series_equal(expr, res2l) + + res1l, res1r = s1.align(s2, join="right") + res2l, res2r = s2.align(s1, join="left") + + exp_idx = pd.MultiIndex.from_product( + [range(2), range(2), range(2)], names=("a", "b", "c") + ) + expl = pd.Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx) + tm.assert_series_equal(expl, res1l) + tm.assert_series_equal(expl, res2r) + expr = pd.Series([0, 0, 1, 1] * 2, index=exp_idx) + tm.assert_series_equal(expr, res1r) + tm.assert_series_equal(expr, res2l) + + +@pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None]) +def test_align_method(method): + # GH31788 + ser = pd.Series(range(3), index=range(3)) + df = pd.DataFrame(0.0, index=range(3), columns=range(3)) + + result_ser, result_df = ser.align(df, method=method) + tm.assert_series_equal(result_ser, ser) + tm.assert_frame_equal(result_df, df) + + +def test_reindex(datetime_series, string_series): + identity = string_series.reindex(string_series.index) + + # __array_interface__ is not defined for older numpies + # and on some pythons + try: + assert np.may_share_memory(string_series.index, identity.index) + except AttributeError: + pass + + assert identity.index.is_(string_series.index) + assert identity.index.identical(string_series.index) + + subIndex = string_series.index[10:20] + subSeries = string_series.reindex(subIndex) + + for idx, val in subSeries.items(): + assert val == string_series[idx] + + subIndex2 = datetime_series.index[10:20] + subTS = datetime_series.reindex(subIndex2) + + for idx, val in subTS.items(): + assert val == datetime_series[idx] + stuffSeries = datetime_series.reindex(subIndex) + + assert np.isnan(stuffSeries).all() + + # This is extremely important for the Cython code to not screw up + nonContigIndex = datetime_series.index[::2] + subNonContig = datetime_series.reindex(nonContigIndex) + for idx, val in subNonContig.items(): + assert val == datetime_series[idx] + + # return a copy the same index here + result = datetime_series.reindex() + assert not (result is datetime_series) + + +def test_reindex_nan(): + ts = Series([2, 3, 5, 7], index=[1, 4, np.nan, 8]) + + i, j = [np.nan, 1, np.nan, 8, 4, np.nan], [2, 0, 2, 3, 1, 2] + tm.assert_series_equal(ts.reindex(i), ts.iloc[j]) + + ts.index = ts.index.astype("object") + + # reindex coerces index.dtype to float, loc/iloc doesn't + tm.assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False) + + +def test_reindex_series_add_nat(): + rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s") + series = Series(rng) + + result = series.reindex(range(15)) + assert np.issubdtype(result.dtype, np.dtype("M8[ns]")) + + mask = result.isna() + assert mask[-5:].all() + assert not mask[:-5].any() + + +def test_reindex_with_datetimes(): + rng = date_range("1/1/2000", periods=20) + ts = Series(np.random.randn(20), index=rng) + + result = ts.reindex(list(ts.index[5:10])) + expected = ts[5:10] + tm.assert_series_equal(result, expected) + + result = ts[list(ts.index[5:10])] + tm.assert_series_equal(result, expected) + + +def test_reindex_corner(datetime_series): + # (don't forget to fix this) I think it's fixed + empty = Series(dtype=object) + empty.reindex(datetime_series.index, method="pad") # it works + + # corner case: pad empty series + reindexed = empty.reindex(datetime_series.index, method="pad") + + # pass non-Index + reindexed = datetime_series.reindex(list(datetime_series.index)) + tm.assert_series_equal(datetime_series, reindexed) + + # bad fill method + ts = datetime_series[::2] + msg = ( + r"Invalid fill method\. Expecting pad \(ffill\), backfill" + r" \(bfill\) or nearest\. Got foo" + ) + with pytest.raises(ValueError, match=msg): + ts.reindex(datetime_series.index, method="foo") + + +def test_reindex_pad(): + s = Series(np.arange(10), dtype="int64") + s2 = s[::2] + + reindexed = s2.reindex(s.index, method="pad") + reindexed2 = s2.reindex(s.index, method="ffill") + tm.assert_series_equal(reindexed, reindexed2) + + expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10)) + tm.assert_series_equal(reindexed, expected) + + # GH4604 + s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"]) + new_index = ["a", "g", "c", "f"] + expected = Series([1, 1, 3, 3], index=new_index) + + # this changes dtype because the ffill happens after + result = s.reindex(new_index).ffill() + tm.assert_series_equal(result, expected.astype("float64")) + + result = s.reindex(new_index).ffill(downcast="infer") + tm.assert_series_equal(result, expected) + + expected = Series([1, 5, 3, 5], index=new_index) + result = s.reindex(new_index, method="ffill") + tm.assert_series_equal(result, expected) + + # inference of new dtype + s = Series([True, False, False, True], index=list("abcd")) + new_index = "agc" + result = s.reindex(list(new_index)).ffill() + expected = Series([True, True, False], index=list(new_index)) + tm.assert_series_equal(result, expected) + + # GH4618 shifted series downcasting + s = Series(False, index=range(0, 5)) + result = s.shift(1).fillna(method="bfill") + expected = Series(False, index=range(0, 5)) + tm.assert_series_equal(result, expected) + + +def test_reindex_nearest(): + s = Series(np.arange(10, dtype="int64")) + target = [0.1, 0.9, 1.5, 2.0] + actual = s.reindex(target, method="nearest") + expected = Series(np.around(target).astype("int64"), target) + tm.assert_series_equal(expected, actual) + + actual = s.reindex_like(actual, method="nearest") + tm.assert_series_equal(expected, actual) + + actual = s.reindex_like(actual, method="nearest", tolerance=1) + tm.assert_series_equal(expected, actual) + actual = s.reindex_like(actual, method="nearest", tolerance=[1, 2, 3, 4]) + tm.assert_series_equal(expected, actual) + + actual = s.reindex(target, method="nearest", tolerance=0.2) + expected = Series([0, 1, np.nan, 2], target) + tm.assert_series_equal(expected, actual) + + actual = s.reindex(target, method="nearest", tolerance=[0.3, 0.01, 0.4, 3]) + expected = Series([0, np.nan, np.nan, 2], target) + tm.assert_series_equal(expected, actual) + + +def test_reindex_backfill(): + pass + + +def test_reindex_int(datetime_series): + ts = datetime_series[::2] + int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index) + + # this should work fine + reindexed_int = int_ts.reindex(datetime_series.index) + + # if NaNs introduced + assert reindexed_int.dtype == np.float_ + + # NO NaNs introduced + reindexed_int = int_ts.reindex(int_ts.index[::2]) + assert reindexed_int.dtype == np.int_ + + +def test_reindex_bool(datetime_series): + # A series other than float, int, string, or object + ts = datetime_series[::2] + bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) + + # this should work fine + reindexed_bool = bool_ts.reindex(datetime_series.index) + + # if NaNs introduced + assert reindexed_bool.dtype == np.object_ + + # NO NaNs introduced + reindexed_bool = bool_ts.reindex(bool_ts.index[::2]) + assert reindexed_bool.dtype == np.bool_ + + +def test_reindex_bool_pad(datetime_series): + # fail + ts = datetime_series[5:] + bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) + filled_bool = bool_ts.reindex(datetime_series.index, method="pad") + assert isna(filled_bool[:5]).all() + + +def test_reindex_categorical(): + index = date_range("20000101", periods=3) + + # reindexing to an invalid Categorical + s = Series(["a", "b", "c"], dtype="category") + result = s.reindex(index) + expected = Series( + Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"]) + ) + expected.index = index + tm.assert_series_equal(result, expected) + + # partial reindexing + expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"])) + expected.index = [1, 2] + result = s.reindex([1, 2]) + tm.assert_series_equal(result, expected) + + expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"])) + expected.index = [2, 3] + result = s.reindex([2, 3]) + tm.assert_series_equal(result, expected) + + +def test_reindex_like(datetime_series): + other = datetime_series[::2] + tm.assert_series_equal( + datetime_series.reindex(other.index), datetime_series.reindex_like(other) + ) + + # GH 7179 + day1 = datetime(2013, 3, 5) + day2 = datetime(2013, 5, 5) + day3 = datetime(2014, 3, 5) + + series1 = Series([5, None, None], [day1, day2, day3]) + series2 = Series([None, None], [day1, day3]) + + result = series1.reindex_like(series2, method="pad") + expected = Series([5, np.nan], index=[day1, day3]) + tm.assert_series_equal(result, expected) + + +def test_reindex_fill_value(): + # ----------------------------------------------------------- + # floats + floats = Series([1.0, 2.0, 3.0]) + result = floats.reindex([1, 2, 3]) + expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + result = floats.reindex([1, 2, 3], fill_value=0) + expected = Series([2.0, 3.0, 0], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + # ----------------------------------------------------------- + # ints + ints = Series([1, 2, 3]) + + result = ints.reindex([1, 2, 3]) + expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + # don't upcast + result = ints.reindex([1, 2, 3], fill_value=0) + expected = Series([2, 3, 0], index=[1, 2, 3]) + assert issubclass(result.dtype.type, np.integer) + tm.assert_series_equal(result, expected) + + # ----------------------------------------------------------- + # objects + objects = Series([1, 2, 3], dtype=object) + + result = objects.reindex([1, 2, 3]) + expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object) + tm.assert_series_equal(result, expected) + + result = objects.reindex([1, 2, 3], fill_value="foo") + expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object) + tm.assert_series_equal(result, expected) + + # ------------------------------------------------------------ + # bools + bools = Series([True, False, True]) + + result = bools.reindex([1, 2, 3]) + expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object) + tm.assert_series_equal(result, expected) + + result = bools.reindex([1, 2, 3], fill_value=False) + expected = Series([False, True, False], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + +def test_reindex_datetimeindexes_tz_naive_and_aware(): + # GH 8306 + idx = date_range("20131101", tz="America/Chicago", periods=7) + newidx = date_range("20131103", periods=10, freq="H") + s = Series(range(7), index=idx) + with pytest.raises(TypeError): + s.reindex(newidx, method="ffill") + + +def test_reindex_empty_series_tz_dtype(): + # GH 20869 + result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1]) + expected = Series([pd.NaT] * 2, dtype="datetime64[ns, UTC]") + tm.assert_equal(result, expected) + + +def test_rename(): + # GH 17407 + s = Series(range(1, 6), index=pd.Index(range(2, 7), name="IntIndex")) + result = s.rename(str) + expected = s.rename(lambda i: str(i)) + tm.assert_series_equal(result, expected) + + assert result.name == expected.name + + +@pytest.mark.parametrize( + "data, index, drop_labels, axis, expected_data, expected_index", + [ + # Unique Index + ([1, 2], ["one", "two"], ["two"], 0, [1], ["one"]), + ([1, 2], ["one", "two"], ["two"], "rows", [1], ["one"]), + ([1, 1, 2], ["one", "two", "one"], ["two"], 0, [1, 2], ["one", "one"]), + # GH 5248 Non-Unique Index + ([1, 1, 2], ["one", "two", "one"], "two", 0, [1, 2], ["one", "one"]), + ([1, 1, 2], ["one", "two", "one"], ["one"], 0, [1], ["two"]), + ([1, 1, 2], ["one", "two", "one"], "one", 0, [1], ["two"]), + ], +) +def test_drop_unique_and_non_unique_index( + data, index, axis, drop_labels, expected_data, expected_index +): + + s = Series(data=data, index=index) + result = s.drop(drop_labels, axis=axis) + expected = Series(data=expected_data, index=expected_index) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data, index, drop_labels, axis, error_type, error_desc", + [ + # single string/tuple-like + (range(3), list("abc"), "bc", 0, KeyError, "not found in axis"), + # bad axis + (range(3), list("abc"), ("a",), 0, KeyError, "not found in axis"), + (range(3), list("abc"), "one", "columns", ValueError, "No axis named columns"), + ], +) +def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc): + + with pytest.raises(error_type, match=error_desc): + Series(data, index=index).drop(drop_labels, axis=axis) + + +def test_drop_with_ignore_errors(): + # errors='ignore' + s = Series(range(3), index=list("abc")) + result = s.drop("bc", errors="ignore") + tm.assert_series_equal(result, s) + result = s.drop(["a", "d"], errors="ignore") + expected = s.iloc[1:] + tm.assert_series_equal(result, expected) + + # GH 8522 + s = Series([2, 3], index=[True, False]) + assert s.index.is_object() + result = s.drop(True) + expected = Series([3], index=[False]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 3]]) +@pytest.mark.parametrize("drop_labels", [[], [1], [3]]) +def test_drop_empty_list(index, drop_labels): + # GH 21494 + expected_index = [i for i in index if i not in drop_labels] + series = pd.Series(index=index, dtype=object).drop(drop_labels) + expected = pd.Series(index=expected_index, dtype=object) + tm.assert_series_equal(series, expected) + + +@pytest.mark.parametrize( + "data, index, drop_labels", + [ + (None, [1, 2, 3], [1, 4]), + (None, [1, 2, 2], [1, 4]), + ([2, 3], [0, 1], [False, True]), + ], +) +def test_drop_non_empty_list(data, index, drop_labels): + # GH 21494 and GH 16877 + with pytest.raises(KeyError, match="not found in axis"): + dtype = object if data is None else None + pd.Series(data=data, index=index, dtype=dtype).drop(drop_labels) diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py new file mode 100644 index 00000000..232c9cbc --- /dev/null +++ b/pandas/tests/series/indexing/test_boolean.py @@ -0,0 +1,627 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer + +import pandas as pd +from pandas import Index, Series, Timestamp, date_range, isna +import pandas._testing as tm +from pandas.core.indexing import IndexingError + +from pandas.tseries.offsets import BDay + + +def test_getitem_boolean(string_series): + s = string_series + mask = s > s.median() + + # passing list is OK + result = s[list(mask)] + expected = s[mask] + tm.assert_series_equal(result, expected) + tm.assert_index_equal(result.index, s.index[mask]) + + +def test_getitem_boolean_empty(): + s = Series([], dtype=np.int64) + s.index.name = "index_name" + s = s[s.isna()] + assert s.index.name == "index_name" + assert s.dtype == np.int64 + + # GH5877 + # indexing with empty series + s = Series(["A", "B"]) + expected = Series(np.nan, index=["C"], dtype=object) + result = s[Series(["C"], dtype=object)] + tm.assert_series_equal(result, expected) + + s = Series(["A", "B"]) + expected = Series(dtype=object, index=Index([], dtype="int64")) + result = s[Series([], dtype=object)] + tm.assert_series_equal(result, expected) + + # invalid because of the boolean indexer + # that's empty or not-aligned + msg = ( + r"Unalignable boolean Series provided as indexer \(index of" + r" the boolean Series and of the indexed object do not match" + ) + with pytest.raises(IndexingError, match=msg): + s[Series([], dtype=bool)] + + with pytest.raises(IndexingError, match=msg): + s[Series([True], dtype=bool)] + + +def test_getitem_boolean_object(string_series): + # using column from DataFrame + + s = string_series + mask = s > s.median() + omask = mask.astype(object) + + # getitem + result = s[omask] + expected = s[mask] + tm.assert_series_equal(result, expected) + + # setitem + s2 = s.copy() + cop = s.copy() + cop[omask] = 5 + s2[mask] = 5 + tm.assert_series_equal(cop, s2) + + # nans raise exception + omask[5:10] = np.nan + msg = "Cannot mask with non-boolean array containing NA / NaN values" + with pytest.raises(ValueError, match=msg): + s[omask] + with pytest.raises(ValueError, match=msg): + s[omask] = 5 + + +def test_getitem_setitem_boolean_corner(datetime_series): + ts = datetime_series + mask_shifted = ts.shift(1, freq=BDay()) > ts.median() + + # these used to raise...?? + + msg = ( + r"Unalignable boolean Series provided as indexer \(index of" + r" the boolean Series and of the indexed object do not match" + ) + with pytest.raises(IndexingError, match=msg): + ts[mask_shifted] + with pytest.raises(IndexingError, match=msg): + ts[mask_shifted] = 1 + + with pytest.raises(IndexingError, match=msg): + ts.loc[mask_shifted] + with pytest.raises(IndexingError, match=msg): + ts.loc[mask_shifted] = 1 + + +def test_setitem_boolean(string_series): + mask = string_series > string_series.median() + + # similar indexed series + result = string_series.copy() + result[mask] = string_series * 2 + expected = string_series * 2 + tm.assert_series_equal(result[mask], expected[mask]) + + # needs alignment + result = string_series.copy() + result[mask] = (string_series * 2)[0:5] + expected = (string_series * 2)[0:5].reindex_like(string_series) + expected[-mask] = string_series[mask] + tm.assert_series_equal(result[mask], expected[mask]) + + +def test_get_set_boolean_different_order(string_series): + ordered = string_series.sort_values() + + # setting + copy = string_series.copy() + copy[ordered > 0] = 0 + + expected = string_series.copy() + expected[expected > 0] = 0 + + tm.assert_series_equal(copy, expected) + + # getting + sel = string_series[ordered > 0] + exp = string_series[string_series > 0] + tm.assert_series_equal(sel, exp) + + +def test_where_unsafe_int(sint_dtype): + s = Series(np.arange(10), dtype=sint_dtype) + mask = s < 5 + + s[mask] = range(2, 7) + expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype=sint_dtype) + + tm.assert_series_equal(s, expected) + + +def test_where_unsafe_float(float_dtype): + s = Series(np.arange(10), dtype=float_dtype) + mask = s < 5 + + s[mask] = range(2, 7) + data = list(range(2, 7)) + list(range(5, 10)) + expected = Series(data, dtype=float_dtype) + + tm.assert_series_equal(s, expected) + + +@pytest.mark.parametrize( + "dtype,expected_dtype", + [ + (np.int8, np.float64), + (np.int16, np.float64), + (np.int32, np.float64), + (np.int64, np.float64), + (np.float32, np.float32), + (np.float64, np.float64), + ], +) +def test_where_unsafe_upcast(dtype, expected_dtype): + # see gh-9743 + s = Series(np.arange(10), dtype=dtype) + values = [2.5, 3.5, 4.5, 5.5, 6.5] + mask = s < 5 + expected = Series(values + list(range(5, 10)), dtype=expected_dtype) + s[mask] = values + tm.assert_series_equal(s, expected) + + +def test_where_unsafe(): + # see gh-9731 + s = Series(np.arange(10), dtype="int64") + values = [2.5, 3.5, 4.5, 5.5] + + mask = s > 5 + expected = Series(list(range(6)) + values, dtype="float64") + + s[mask] = values + tm.assert_series_equal(s, expected) + + # see gh-3235 + s = Series(np.arange(10), dtype="int64") + mask = s < 5 + s[mask] = range(2, 7) + expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64") + tm.assert_series_equal(s, expected) + assert s.dtype == expected.dtype + + s = Series(np.arange(10), dtype="int64") + mask = s > 5 + s[mask] = [0] * 4 + expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64") + tm.assert_series_equal(s, expected) + + s = Series(np.arange(10)) + mask = s > 5 + + msg = "cannot assign mismatch length to masked array" + with pytest.raises(ValueError, match=msg): + s[mask] = [5, 4, 3, 2, 1] + + with pytest.raises(ValueError, match=msg): + s[mask] = [0] * 5 + + # dtype changes + s = Series([1, 2, 3, 4]) + result = s.where(s > 2, np.nan) + expected = Series([np.nan, np.nan, 3, 4]) + tm.assert_series_equal(result, expected) + + # GH 4667 + # setting with None changes dtype + s = Series(range(10)).astype(float) + s[8] = None + result = s[8] + assert isna(result) + + s = Series(range(10)).astype(float) + s[s > 8] = None + result = s[isna(s)] + expected = Series(np.nan, index=[9]) + tm.assert_series_equal(result, expected) + + +def test_where(): + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.where(cond).dropna() + rs2 = s[cond] + tm.assert_series_equal(rs, rs2) + + rs = s.where(cond, -s) + tm.assert_series_equal(rs, s.abs()) + + rs = s.where(cond) + assert s.shape == rs.shape + assert rs is not s + + # test alignment + cond = Series([True, False, False, True, False], index=s.index) + s2 = -(s.abs()) + + expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index) + rs = s2.where(cond[:3]) + tm.assert_series_equal(rs, expected) + + expected = s2.abs() + expected.iloc[0] = s2[0] + rs = s2.where(cond[:3], -s2) + tm.assert_series_equal(rs, expected) + + +def test_where_error(): + s = Series(np.random.randn(5)) + cond = s > 0 + + msg = "Array conditional must be same shape as self" + with pytest.raises(ValueError, match=msg): + s.where(1) + with pytest.raises(ValueError, match=msg): + s.where(cond[:3].values, -s) + + # GH 2745 + s = Series([1, 2]) + s[[True, False]] = [0, 1] + expected = Series([0, 2]) + tm.assert_series_equal(s, expected) + + # failures + msg = "cannot assign mismatch length to masked array" + with pytest.raises(ValueError, match=msg): + s[[True, False]] = [0, 2, 3] + msg = ( + "NumPy boolean array indexing assignment cannot assign 0 input " + "values to the 1 output values where the mask is true" + ) + with pytest.raises(ValueError, match=msg): + s[[True, False]] = [] + + +@pytest.mark.parametrize("klass", [list, tuple, np.array, Series]) +def test_where_array_like(klass): + # see gh-15414 + s = Series([1, 2, 3]) + cond = [False, True, True] + expected = Series([np.nan, 2, 3]) + + result = s.where(klass(cond)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "cond", + [ + [1, 0, 1], + Series([2, 5, 7]), + ["True", "False", "True"], + [Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")], + ], +) +def test_where_invalid_input(cond): + # see gh-15414: only boolean arrays accepted + s = Series([1, 2, 3]) + msg = "Boolean array expected for the condition" + + with pytest.raises(ValueError, match=msg): + s.where(cond) + + msg = "Array conditional must be same shape as self" + with pytest.raises(ValueError, match=msg): + s.where([True]) + + +def test_where_ndframe_align(): + msg = "Array conditional must be same shape as self" + s = Series([1, 2, 3]) + + cond = [True] + with pytest.raises(ValueError, match=msg): + s.where(cond) + + expected = Series([1, np.nan, np.nan]) + + out = s.where(Series(cond)) + tm.assert_series_equal(out, expected) + + cond = np.array([False, True, False, True]) + with pytest.raises(ValueError, match=msg): + s.where(cond) + + expected = Series([np.nan, 2, np.nan]) + + out = s.where(Series(cond)) + tm.assert_series_equal(out, expected) + + +def test_where_setitem_invalid(): + # GH 2702 + # make sure correct exceptions are raised on invalid list assignment + + msg = "cannot set using a {} indexer with a different length than the value" + + # slice + s = Series(list("abc")) + + with pytest.raises(ValueError, match=msg.format("slice")): + s[0:3] = list(range(27)) + + s[0:3] = list(range(3)) + expected = Series([0, 1, 2]) + tm.assert_series_equal(s.astype(np.int64), expected) + + # slice with step + s = Series(list("abcdef")) + + with pytest.raises(ValueError, match=msg.format("slice")): + s[0:4:2] = list(range(27)) + + s = Series(list("abcdef")) + s[0:4:2] = list(range(2)) + expected = Series([0, "b", 1, "d", "e", "f"]) + tm.assert_series_equal(s, expected) + + # neg slices + s = Series(list("abcdef")) + + with pytest.raises(ValueError, match=msg.format("slice")): + s[:-1] = list(range(27)) + + s[-3:-1] = list(range(2)) + expected = Series(["a", "b", "c", 0, 1, "f"]) + tm.assert_series_equal(s, expected) + + # list + s = Series(list("abc")) + + with pytest.raises(ValueError, match=msg.format("list-like")): + s[[0, 1, 2]] = list(range(27)) + + s = Series(list("abc")) + + with pytest.raises(ValueError, match=msg.format("list-like")): + s[[0, 1, 2]] = list(range(2)) + + # scalar + s = Series(list("abc")) + s[0] = list(range(10)) + expected = Series([list(range(10)), "b", "c"]) + tm.assert_series_equal(s, expected) + + +@pytest.mark.parametrize("size", range(2, 6)) +@pytest.mark.parametrize( + "mask", [[True, False, False, False, False], [True, False], [False]] +) +@pytest.mark.parametrize( + "item", [2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min] +) +# Test numpy arrays, lists and tuples as the input to be +# broadcast +@pytest.mark.parametrize( + "box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)] +) +def test_broadcast(size, mask, item, box): + selection = np.resize(mask, size) + + data = np.arange(size, dtype=float) + + # Construct the expected series by taking the source + # data or item based on the selection + expected = Series( + [item if use_item else data[i] for i, use_item in enumerate(selection)] + ) + + s = Series(data) + s[selection] = box(item) + tm.assert_series_equal(s, expected) + + s = Series(data) + result = s.where(~selection, box(item)) + tm.assert_series_equal(result, expected) + + s = Series(data) + result = s.mask(selection, box(item)) + tm.assert_series_equal(result, expected) + + +def test_where_inplace(): + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.copy() + + rs.where(cond, inplace=True) + tm.assert_series_equal(rs.dropna(), s[cond]) + tm.assert_series_equal(rs, s.where(cond)) + + rs = s.copy() + rs.where(cond, -s, inplace=True) + tm.assert_series_equal(rs, s.where(cond, -s)) + + +def test_where_dups(): + # GH 4550 + # where crashes with dups in index + s1 = Series(list(range(3))) + s2 = Series(list(range(3))) + comb = pd.concat([s1, s2]) + result = comb.where(comb < 2) + expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(result, expected) + + # GH 4548 + # inplace updating not working with dups + comb[comb < 1] = 5 + expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(comb, expected) + + comb[comb < 2] += 10 + expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(comb, expected) + + +def test_where_numeric_with_string(): + # GH 9280 + s = pd.Series([1, 2, 3]) + w = s.where(s > 1, "X") + + assert not is_integer(w[0]) + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == "object" + + w = s.where(s > 1, ["X", "Y", "Z"]) + assert not is_integer(w[0]) + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == "object" + + w = s.where(s > 1, np.array(["X", "Y", "Z"])) + assert not is_integer(w[0]) + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == "object" + + +def test_where_timedelta_coerce(): + s = Series([1, 2], dtype="timedelta64[ns]") + expected = Series([10, 10]) + mask = np.array([False, False]) + + rs = s.where(mask, [10, 10]) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, 10) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, 10.0) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, [10.0, 10.0]) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, [10.0, np.nan]) + expected = Series([10, None], dtype="object") + tm.assert_series_equal(rs, expected) + + +def test_where_datetime_conversion(): + s = Series(date_range("20130102", periods=2)) + expected = Series([10, 10]) + mask = np.array([False, False]) + + rs = s.where(mask, [10, 10]) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, 10) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, 10.0) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, [10.0, 10.0]) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, [10.0, np.nan]) + expected = Series([10, None], dtype="object") + tm.assert_series_equal(rs, expected) + + # GH 15701 + timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"] + s = Series([pd.Timestamp(t) for t in timestamps]) + rs = s.where(Series([False, True])) + expected = Series([pd.NaT, s[1]]) + tm.assert_series_equal(rs, expected) + + +def test_where_dt_tz_values(tz_naive_fixture): + ser1 = pd.Series( + pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture) + ) + ser2 = pd.Series( + pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture) + ) + mask = pd.Series([True, True, False]) + result = ser1.where(mask, ser2) + exp = pd.Series( + pd.DatetimeIndex(["20150101", "20150102", "20160516"], tz=tz_naive_fixture) + ) + tm.assert_series_equal(exp, result) + + +def test_mask(): + # compare with tested results in test_where + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.where(~cond, np.nan) + tm.assert_series_equal(rs, s.mask(cond)) + + rs = s.where(~cond) + rs2 = s.mask(cond) + tm.assert_series_equal(rs, rs2) + + rs = s.where(~cond, -s) + rs2 = s.mask(cond, -s) + tm.assert_series_equal(rs, rs2) + + cond = Series([True, False, False, True, False], index=s.index) + s2 = -(s.abs()) + rs = s2.where(~cond[:3]) + rs2 = s2.mask(cond[:3]) + tm.assert_series_equal(rs, rs2) + + rs = s2.where(~cond[:3], -s2) + rs2 = s2.mask(cond[:3], -s2) + tm.assert_series_equal(rs, rs2) + + msg = "Array conditional must be same shape as self" + with pytest.raises(ValueError, match=msg): + s.mask(1) + with pytest.raises(ValueError, match=msg): + s.mask(cond[:3].values, -s) + + # dtype changes + s = Series([1, 2, 3, 4]) + result = s.mask(s > 2, np.nan) + expected = Series([1, 2, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + # see gh-21891 + s = Series([1, 2]) + res = s.mask([True, False]) + + exp = Series([np.nan, 2]) + tm.assert_series_equal(res, exp) + + +def test_mask_inplace(): + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.copy() + rs.mask(cond, inplace=True) + tm.assert_series_equal(rs.dropna(), s[~cond]) + tm.assert_series_equal(rs, s.mask(cond)) + + rs = s.copy() + rs.mask(cond, -s, inplace=True) + tm.assert_series_equal(rs, s.mask(cond, -s)) diff --git a/pandas/tests/series/indexing/test_callable.py b/pandas/tests/series/indexing/test_callable.py new file mode 100644 index 00000000..fe575cf1 --- /dev/null +++ b/pandas/tests/series/indexing/test_callable.py @@ -0,0 +1,33 @@ +import pandas as pd +import pandas._testing as tm + + +def test_getitem_callable(): + # GH 12533 + s = pd.Series(4, index=list("ABCD")) + result = s[lambda x: "A"] + assert result == s.loc["A"] + + result = s[lambda x: ["A", "B"]] + tm.assert_series_equal(result, s.loc[["A", "B"]]) + + result = s[lambda x: [True, False, True, True]] + tm.assert_series_equal(result, s.iloc[[0, 2, 3]]) + + +def test_setitem_callable(): + # GH 12533 + s = pd.Series([1, 2, 3, 4], index=list("ABCD")) + s[lambda x: "A"] = -1 + tm.assert_series_equal(s, pd.Series([-1, 2, 3, 4], index=list("ABCD"))) + + +def test_setitem_other_callable(): + # GH 13299 + inc = lambda x: x + 1 + + s = pd.Series([1, 2, -1, 4]) + s[s < 0] = inc + + expected = pd.Series([1, 2, inc, 4]) + tm.assert_series_equal(s, expected) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py new file mode 100644 index 00000000..15ff5f6b --- /dev/null +++ b/pandas/tests/series/indexing/test_datetime.py @@ -0,0 +1,772 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +from pandas._libs import iNaT +import pandas._libs.index as _index + +import pandas as pd +from pandas import DataFrame, DatetimeIndex, NaT, Series, Timestamp, date_range +import pandas._testing as tm + + +""" +Also test support for datetime64[ns] in Series / DataFrame +""" + + +def test_fancy_getitem(): + dti = date_range( + freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1) + ) + + s = Series(np.arange(len(dti)), index=dti) + + assert s[48] == 48 + assert s["1/2/2009"] == 48 + assert s["2009-1-2"] == 48 + assert s[datetime(2009, 1, 2)] == 48 + assert s[Timestamp(datetime(2009, 1, 2))] == 48 + with pytest.raises(KeyError, match=r"^'2009-1-3'$"): + s["2009-1-3"] + tm.assert_series_equal( + s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)] + ) + + +def test_fancy_setitem(): + dti = date_range( + freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1) + ) + + s = Series(np.arange(len(dti)), index=dti) + s[48] = -1 + assert s[48] == -1 + s["1/2/2009"] = -2 + assert s[48] == -2 + s["1/2/2009":"2009-06-05"] = -3 + assert (s[48:54] == -3).all() + + +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +@pytest.mark.parametrize("tz", [None, "Asia/Shanghai", "Europe/Berlin"]) +@pytest.mark.parametrize("name", [None, "my_dti"]) +def test_dti_snap(name, tz): + dti = DatetimeIndex( + [ + "1/1/2002", + "1/2/2002", + "1/3/2002", + "1/4/2002", + "1/5/2002", + "1/6/2002", + "1/7/2002", + ], + name=name, + tz=tz, + freq="D", + ) + + result = dti.snap(freq="W-MON") + expected = date_range("12/31/2001", "1/7/2002", name=name, tz=tz, freq="w-mon") + expected = expected.repeat([3, 4]) + tm.assert_index_equal(result, expected) + assert result.tz == expected.tz + + result = dti.snap(freq="B") + + expected = date_range("1/1/2002", "1/7/2002", name=name, tz=tz, freq="b") + expected = expected.repeat([1, 1, 1, 2, 2]) + tm.assert_index_equal(result, expected) + assert result.tz == expected.tz + + +def test_dti_reset_index_round_trip(): + dti = date_range(start="1/1/2001", end="6/1/2001", freq="D") + d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti) + d2 = d1.reset_index() + assert d2.dtypes[0] == np.dtype("M8[ns]") + d3 = d2.set_index("index") + tm.assert_frame_equal(d1, d3, check_names=False) + + # #2329 + stamp = datetime(2012, 11, 22) + df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"]) + df = df.set_index("Date") + + assert df.index[0] == stamp + assert df.reset_index()["Date"][0] == stamp + + +def test_series_set_value(): + # #1561 + + dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)] + index = DatetimeIndex(dates) + + s = Series(dtype=object)._set_value(dates[0], 1.0) + s2 = s._set_value(dates[1], np.nan) + + expected = Series([1.0, np.nan], index=index) + + tm.assert_series_equal(s2, expected) + + # FIXME: dont leave commented-out + # s = Series(index[:1], index[:1]) + # s2 = s._set_value(dates[1], index[1]) + # assert s2.values.dtype == 'M8[ns]' + + +@pytest.mark.slow +def test_slice_locs_indexerror(): + times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(100000)] + s = Series(range(100000), times) + s.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)] + + +def test_slicing_datetimes(): + # GH 7523 + + # unique + df = DataFrame( + np.arange(4.0, dtype="float64"), + index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 3, 4]], + ) + result = df.loc[datetime(2001, 1, 1, 10) :] + tm.assert_frame_equal(result, df) + result = df.loc[: datetime(2001, 1, 4, 10)] + tm.assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)] + tm.assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11) :] + expected = df.iloc[1:] + tm.assert_frame_equal(result, expected) + result = df.loc["20010101 11":] + tm.assert_frame_equal(result, expected) + + # duplicates + df = pd.DataFrame( + np.arange(5.0, dtype="float64"), + index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 2, 3, 4]], + ) + + result = df.loc[datetime(2001, 1, 1, 10) :] + tm.assert_frame_equal(result, df) + result = df.loc[: datetime(2001, 1, 4, 10)] + tm.assert_frame_equal(result, df) + result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)] + tm.assert_frame_equal(result, df) + + result = df.loc[datetime(2001, 1, 1, 11) :] + expected = df.iloc[1:] + tm.assert_frame_equal(result, expected) + result = df.loc["20010101 11":] + tm.assert_frame_equal(result, expected) + + +def test_frame_datetime64_duplicated(): + dates = date_range("2010-07-01", end="2010-08-05") + + tst = DataFrame({"symbol": "AAA", "date": dates}) + result = tst.duplicated(["date", "symbol"]) + assert (-result).all() + + tst = DataFrame({"date": dates}) + result = tst.duplicated() + assert (-result).all() + + +def test_getitem_setitem_datetime_tz_pytz(): + from pytz import timezone as tz + from pandas import date_range + + N = 50 + # testing with timezone, GH #2785 + rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern") + ts = Series(np.random.randn(N), index=rng) + + # also test Timestamp tz handling, GH #2789 + result = ts.copy() + result["1990-01-01 09:00:00+00:00"] = 0 + result["1990-01-01 09:00:00+00:00"] = ts[4] + tm.assert_series_equal(result, ts) + + result = ts.copy() + result["1990-01-01 03:00:00-06:00"] = 0 + result["1990-01-01 03:00:00-06:00"] = ts[4] + tm.assert_series_equal(result, ts) + + # repeat with datetimes + result = ts.copy() + result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = ts[4] + tm.assert_series_equal(result, ts) + + result = ts.copy() + + # comparison dates with datetime MUST be localized! + date = tz("US/Central").localize(datetime(1990, 1, 1, 3)) + result[date] = 0 + result[date] = ts[4] + tm.assert_series_equal(result, ts) + + +def test_getitem_setitem_datetime_tz_dateutil(): + from dateutil.tz import tzutc + from pandas._libs.tslibs.timezones import dateutil_gettz as gettz + + tz = ( + lambda x: tzutc() if x == "UTC" else gettz(x) + ) # handle special case for utc in dateutil + + from pandas import date_range + + N = 50 + + # testing with timezone, GH #2785 + rng = date_range("1/1/1990", periods=N, freq="H", tz="America/New_York") + ts = Series(np.random.randn(N), index=rng) + + # also test Timestamp tz handling, GH #2789 + result = ts.copy() + result["1990-01-01 09:00:00+00:00"] = 0 + result["1990-01-01 09:00:00+00:00"] = ts[4] + tm.assert_series_equal(result, ts) + + result = ts.copy() + result["1990-01-01 03:00:00-06:00"] = 0 + result["1990-01-01 03:00:00-06:00"] = ts[4] + tm.assert_series_equal(result, ts) + + # repeat with datetimes + result = ts.copy() + result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = 0 + result[datetime(1990, 1, 1, 9, tzinfo=tz("UTC"))] = ts[4] + tm.assert_series_equal(result, ts) + + result = ts.copy() + result[datetime(1990, 1, 1, 3, tzinfo=tz("America/Chicago"))] = 0 + result[datetime(1990, 1, 1, 3, tzinfo=tz("America/Chicago"))] = ts[4] + tm.assert_series_equal(result, ts) + + +def test_getitem_setitem_datetimeindex(): + N = 50 + # testing with timezone, GH #2785 + rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern") + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04:00:00"] + expected = ts[4] + assert result == expected + + result = ts.copy() + result["1990-01-01 04:00:00"] = 0 + result["1990-01-01 04:00:00"] = ts[4] + tm.assert_series_equal(result, ts) + + result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0 + result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8] + tm.assert_series_equal(result, ts) + + lb = "1990-01-01 04:00:00" + rb = "1990-01-01 07:00:00" + # GH#18435 strings get a pass from tzawareness compat + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + lb = "1990-01-01 04:00:00-0500" + rb = "1990-01-01 07:00:00-0500" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + # repeat all the above with naive datetimes + result = ts[datetime(1990, 1, 1, 4)] + expected = ts[4] + assert result == expected + + result = ts.copy() + result[datetime(1990, 1, 1, 4)] = 0 + result[datetime(1990, 1, 1, 4)] = ts[4] + tm.assert_series_equal(result, ts) + + result = ts[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + result = ts.copy() + result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = 0 + result[datetime(1990, 1, 1, 4) : datetime(1990, 1, 1, 7)] = ts[4:8] + tm.assert_series_equal(result, ts) + + lb = datetime(1990, 1, 1, 4) + rb = datetime(1990, 1, 1, 7) + msg = "Cannot compare tz-naive and tz-aware datetime-like objects" + with pytest.raises(TypeError, match=msg): + # tznaive vs tzaware comparison is invalid + # see GH#18376, GH#18162 + ts[(ts.index >= lb) & (ts.index <= rb)] + + lb = pd.Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo) + rb = pd.Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo) + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + result = ts[ts.index[4]] + expected = ts[4] + assert result == expected + + result = ts[ts.index[4:8]] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result[4:8] = ts[4:8] + tm.assert_series_equal(result, ts) + + # also test partial date slicing + result = ts["1990-01-02"] + expected = ts[24:48] + tm.assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-02"] = 0 + result["1990-01-02"] = ts[24:48] + tm.assert_series_equal(result, ts) + + +def test_getitem_setitem_periodindex(): + from pandas import period_range + + N = 50 + rng = period_range("1/1/1990", periods=N, freq="H") + ts = Series(np.random.randn(N), index=rng) + + result = ts["1990-01-01 04"] + expected = ts[4] + assert result == expected + + result = ts.copy() + result["1990-01-01 04"] = 0 + result["1990-01-01 04"] = ts[4] + tm.assert_series_equal(result, ts) + + result = ts["1990-01-01 04":"1990-01-01 07"] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + result = ts.copy() + result["1990-01-01 04":"1990-01-01 07"] = 0 + result["1990-01-01 04":"1990-01-01 07"] = ts[4:8] + tm.assert_series_equal(result, ts) + + lb = "1990-01-01 04" + rb = "1990-01-01 07" + result = ts[(ts.index >= lb) & (ts.index <= rb)] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + # GH 2782 + result = ts[ts.index[4]] + expected = ts[4] + assert result == expected + + result = ts[ts.index[4:8]] + expected = ts[4:8] + tm.assert_series_equal(result, expected) + + result = ts.copy() + result[ts.index[4:8]] = 0 + result[4:8] = ts[4:8] + tm.assert_series_equal(result, ts) + + +# FutureWarning from NumPy. +@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") +def test_getitem_median_slice_bug(): + index = date_range("20090415", "20090519", freq="2B") + s = Series(np.random.randn(13), index=index) + + indexer = [slice(6, 7, None)] + result = s[indexer] + expected = s[indexer[0]] + tm.assert_series_equal(result, expected) + + +def test_datetime_indexing(): + from pandas import date_range + + index = date_range("1/1/2000", "1/7/2000") + index = index.repeat(3) + + s = Series(len(index), index=index) + stamp = Timestamp("1/8/2000") + + with pytest.raises(KeyError, match=r"^947289600000000000$"): + s[stamp] + s[stamp] = 0 + assert s[stamp] == 0 + + # not monotonic + s = Series(len(index), index=index) + s = s[::-1] + + with pytest.raises(KeyError, match=r"^947289600000000000$"): + s[stamp] + s[stamp] = 0 + assert s[stamp] == 0 + + +""" +test duplicates in time series +""" + + +@pytest.fixture +def dups(): + dates = [ + datetime(2000, 1, 2), + datetime(2000, 1, 2), + datetime(2000, 1, 2), + datetime(2000, 1, 3), + datetime(2000, 1, 3), + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 4), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + ] + + return Series(np.random.randn(len(dates)), index=dates) + + +def test_constructor(dups): + assert isinstance(dups, Series) + assert isinstance(dups.index, DatetimeIndex) + + +def test_is_unique_monotonic(dups): + assert not dups.index.is_unique + + +def test_index_unique(dups): + uniques = dups.index.unique() + expected = DatetimeIndex( + [ + datetime(2000, 1, 2), + datetime(2000, 1, 3), + datetime(2000, 1, 4), + datetime(2000, 1, 5), + ] + ) + assert uniques.dtype == "M8[ns]" # sanity + tm.assert_index_equal(uniques, expected) + assert dups.index.nunique() == 4 + + # #2563 + assert isinstance(uniques, DatetimeIndex) + + dups_local = dups.index.tz_localize("US/Eastern") + dups_local.name = "foo" + result = dups_local.unique() + expected = DatetimeIndex(expected, name="foo") + expected = expected.tz_localize("US/Eastern") + assert result.tz is not None + assert result.name == "foo" + tm.assert_index_equal(result, expected) + + # NaT, note this is excluded + arr = [1370745748 + t for t in range(20)] + [iNaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + assert idx.nunique() == 20 + assert idx.nunique(dropna=False) == 21 + + arr = [ + Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20) + ] + [NaT] + idx = DatetimeIndex(arr * 3) + tm.assert_index_equal(idx.unique(), DatetimeIndex(arr)) + assert idx.nunique() == 20 + assert idx.nunique(dropna=False) == 21 + + +def test_index_dupes_contains(): + d = datetime(2011, 12, 5, 20, 30) + ix = DatetimeIndex([d, d]) + assert d in ix + + +def test_duplicate_dates_indexing(dups): + ts = dups + + uniques = ts.index.unique() + for date in uniques: + result = ts[date] + + mask = ts.index == date + total = (ts.index == date).sum() + expected = ts[mask] + if total > 1: + tm.assert_series_equal(result, expected) + else: + tm.assert_almost_equal(result, expected[0]) + + cp = ts.copy() + cp[date] = 0 + expected = Series(np.where(mask, 0, ts), index=ts.index) + tm.assert_series_equal(cp, expected) + + with pytest.raises(KeyError, match=r"^947116800000000000$"): + ts[datetime(2000, 1, 6)] + + # new index + ts[datetime(2000, 1, 6)] = 0 + assert ts[datetime(2000, 1, 6)] == 0 + + +def test_range_slice(): + idx = DatetimeIndex(["1/1/2000", "1/2/2000", "1/2/2000", "1/3/2000", "1/4/2000"]) + + ts = Series(np.random.randn(len(idx)), index=idx) + + result = ts["1/2/2000":] + expected = ts[1:] + tm.assert_series_equal(result, expected) + + result = ts["1/2/2000":"1/3/2000"] + expected = ts[1:4] + tm.assert_series_equal(result, expected) + + +def test_groupby_average_dup_values(dups): + result = dups.groupby(level=0).mean() + expected = dups.groupby(dups.index).mean() + tm.assert_series_equal(result, expected) + + +def test_indexing_over_size_cutoff(): + import datetime + + # #1821 + + old_cutoff = _index._SIZE_CUTOFF + try: + _index._SIZE_CUTOFF = 1000 + + # create large list of non periodic datetime + dates = [] + sec = datetime.timedelta(seconds=1) + half_sec = datetime.timedelta(microseconds=500000) + d = datetime.datetime(2011, 12, 5, 20, 30) + n = 1100 + for i in range(n): + dates.append(d) + dates.append(d + sec) + dates.append(d + sec + half_sec) + dates.append(d + sec + sec + half_sec) + d += 3 * sec + + # duplicate some values in the list + duplicate_positions = np.random.randint(0, len(dates) - 1, 20) + for p in duplicate_positions: + dates[p + 1] = dates[p] + + df = DataFrame( + np.random.randn(len(dates), 4), index=dates, columns=list("ABCD") + ) + + pos = n * 3 + timestamp = df.index[pos] + assert timestamp in df.index + + # it works! + df.loc[timestamp] + assert len(df.loc[[timestamp]]) > 0 + finally: + _index._SIZE_CUTOFF = old_cutoff + + +def test_indexing_over_size_cutoff_period_index(monkeypatch): + # GH 27136 + + monkeypatch.setattr(_index, "_SIZE_CUTOFF", 1000) + + n = 1100 + idx = pd.period_range("1/1/2000", freq="T", periods=n) + assert idx._engine.over_size_threshold + + s = pd.Series(np.random.randn(len(idx)), index=idx) + + pos = n - 1 + timestamp = idx[pos] + assert timestamp in s.index + + # it works! + s[timestamp] + assert len(s.loc[[timestamp]]) > 0 + + +def test_indexing_unordered(): + # GH 2437 + rng = date_range(start="2011-01-01", end="2011-01-15") + ts = Series(np.random.rand(len(rng)), index=rng) + ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]]) + + for t in ts.index: + # TODO: unused? + s = str(t) # noqa + + expected = ts[t] + result = ts2[t] + assert expected == result + + # GH 3448 (ranges) + def compare(slobj): + result = ts2[slobj].copy() + result = result.sort_index() + expected = ts[slobj] + tm.assert_series_equal(result, expected) + + compare(slice("2011-01-01", "2011-01-15")) + compare(slice("2010-12-30", "2011-01-15")) + compare(slice("2011-01-01", "2011-01-16")) + + # partial ranges + compare(slice("2011-01-01", "2011-01-6")) + compare(slice("2011-01-06", "2011-01-8")) + compare(slice("2011-01-06", "2011-01-12")) + + # single values + result = ts2["2011"].sort_index() + expected = ts["2011"] + tm.assert_series_equal(result, expected) + + # diff freq + rng = date_range(datetime(2005, 1, 1), periods=20, freq="M") + ts = Series(np.arange(len(rng)), index=rng) + ts = ts.take(np.random.permutation(20)) + + result = ts["2005"] + for t in result.index: + assert t.year == 2005 + + +def test_indexing(): + idx = date_range("2001-1-1", periods=20, freq="M") + ts = Series(np.random.rand(len(idx)), index=idx) + + # getting + + # GH 3070, make sure semantics work on Series/Frame + expected = ts["2001"] + expected.name = "A" + + df = DataFrame(dict(A=ts)) + result = df["2001"]["A"] + tm.assert_series_equal(expected, result) + + # setting + ts["2001"] = 1 + expected = ts["2001"] + expected.name = "A" + + df.loc["2001", "A"] = 1 + + result = df["2001"]["A"] + tm.assert_series_equal(expected, result) + + # GH3546 (not including times on the last day) + idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="H") + ts = Series(range(len(idx)), index=idx) + expected = ts["2013-05"] + tm.assert_series_equal(expected, ts) + + idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="S") + ts = Series(range(len(idx)), index=idx) + expected = ts["2013-05"] + tm.assert_series_equal(expected, ts) + + idx = [ + Timestamp("2013-05-31 00:00"), + Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)), + ] + ts = Series(range(len(idx)), index=idx) + expected = ts["2013"] + tm.assert_series_equal(expected, ts) + + # GH14826, indexing with a seconds resolution string / datetime object + df = DataFrame( + np.random.rand(5, 5), + columns=["open", "high", "low", "close", "volume"], + index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"), + ) + expected = df.loc[[df.index[2]]] + + # this is a single date, so will raise + with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"): + df["2012-01-02 18:01:02"] + msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)" + with pytest.raises(KeyError, match=msg): + df[df.index[2]] + + +""" +test NaT support +""" + + +def test_set_none_nan(): + series = Series(date_range("1/1/2000", periods=10)) + series[3] = None + assert series[3] is NaT + + series[3:5] = None + assert series[4] is NaT + + series[5] = np.nan + assert series[5] is NaT + + series[5:7] = np.nan + assert series[6] is NaT + + +def test_nat_operations(): + # GH 8617 + s = Series([0, pd.NaT], dtype="m8[ns]") + exp = s[0] + assert s.median() == exp + assert s.min() == exp + assert s.max() == exp + + +@pytest.mark.parametrize("method", ["round", "floor", "ceil"]) +@pytest.mark.parametrize("freq", ["s", "5s", "min", "5min", "h", "5h"]) +def test_round_nat(method, freq): + # GH14940 + s = Series([pd.NaT]) + expected = Series(pd.NaT) + round_method = getattr(s.dt, method) + tm.assert_series_equal(round_method(freq), expected) + + +def test_setitem_tuple_with_datetimetz(): + # GH 20441 + arr = date_range("2017", periods=4, tz="US/Eastern") + index = [(0, 1), (0, 2), (0, 3), (0, 4)] + result = Series(arr, index=index) + expected = result.copy() + result[(0, 1)] = np.nan + expected.iloc[0] = np.nan + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_iloc.py b/pandas/tests/series/indexing/test_iloc.py new file mode 100644 index 00000000..f276eb5b --- /dev/null +++ b/pandas/tests/series/indexing/test_iloc.py @@ -0,0 +1,32 @@ +import numpy as np + +from pandas import Series +import pandas._testing as tm + + +def test_iloc(): + s = Series(np.random.randn(10), index=list(range(0, 20, 2))) + + for i in range(len(s)): + result = s.iloc[i] + exp = s[s.index[i]] + tm.assert_almost_equal(result, exp) + + # pass a slice + result = s.iloc[slice(1, 3)] + expected = s.loc[2:4] + tm.assert_series_equal(result, expected) + + # test slice is a view + result[:] = 0 + assert (s[1:3] == 0).all() + + # list of integers + result = s.iloc[[0, 2, 3, 4, 5]] + expected = s.reindex(s.index[[0, 2, 3, 4, 5]]) + tm.assert_series_equal(result, expected) + + +def test_iloc_nonunique(): + s = Series([0, 1, 2], index=[0, 1, 0]) + assert s.iloc[2] == 2 diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py new file mode 100644 index 00000000..4fa70793 --- /dev/null +++ b/pandas/tests/series/indexing/test_indexing.py @@ -0,0 +1,937 @@ +""" test get/set & misc """ + +from datetime import timedelta + +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_scalar + +import pandas as pd +from pandas import Categorical, DataFrame, MultiIndex, Series, Timedelta, Timestamp +import pandas._testing as tm + +from pandas.tseries.offsets import BDay + + +def test_basic_indexing(): + s = Series(np.random.randn(5), index=["a", "b", "a", "a", "b"]) + + msg = "index out of bounds" + with pytest.raises(IndexError, match=msg): + s[5] + msg = "index 5 is out of bounds for axis 0 with size 5" + with pytest.raises(IndexError, match=msg): + s[5] = 0 + + with pytest.raises(KeyError, match=r"^'c'$"): + s["c"] + + s = s.sort_index() + + msg = r"index out of bounds|^5$" + with pytest.raises(IndexError, match=msg): + s[5] + msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$" + with pytest.raises(IndexError, match=msg): + s[5] = 0 + + +def test_basic_getitem_with_labels(datetime_series): + indices = datetime_series.index[[5, 10, 15]] + + result = datetime_series[indices] + expected = datetime_series.reindex(indices) + tm.assert_series_equal(result, expected) + + result = datetime_series[indices[0] : indices[2]] + expected = datetime_series.loc[indices[0] : indices[2]] + tm.assert_series_equal(result, expected) + + # integer indexes, be careful + s = Series(np.random.randn(10), index=list(range(0, 20, 2))) + inds = [0, 2, 5, 7, 8] + arr_inds = np.array([0, 2, 5, 7, 8]) + with pytest.raises(KeyError, match="with any missing labels"): + s[inds] + + with pytest.raises(KeyError, match="with any missing labels"): + s[arr_inds] + + # GH12089 + # with tz for values + s = Series( + pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"] + ) + expected = Timestamp("2011-01-01", tz="US/Eastern") + result = s.loc["a"] + assert result == expected + result = s.iloc[0] + assert result == expected + result = s["a"] + assert result == expected + + +def test_getitem_setitem_ellipsis(): + s = Series(np.random.randn(10)) + + np.fix(s) + + result = s[...] + tm.assert_series_equal(result, s) + + s[...] = 5 + assert (result == 5).all() + + +def test_getitem_get(datetime_series, string_series, object_series): + idx1 = string_series.index[5] + idx2 = object_series.index[5] + + assert string_series[idx1] == string_series.get(idx1) + assert object_series[idx2] == object_series.get(idx2) + + assert string_series[idx1] == string_series[5] + assert object_series[idx2] == object_series[5] + + assert string_series.get(-1) == string_series.get(string_series.index[-1]) + assert string_series[5] == string_series.get(string_series.index[5]) + + # missing + d = datetime_series.index[0] - BDay() + msg = r"Timestamp\('1999-12-31 00:00:00', freq='B'\)" + with pytest.raises(KeyError, match=msg): + datetime_series[d] + + # None + # GH 5652 + s1 = Series(dtype=object) + s2 = Series(dtype=object, index=list("abc")) + for s in [s1, s2]: + result = s.get(None) + assert result is None + + +def test_getitem_fancy(string_series, object_series): + slice1 = string_series[[1, 2, 3]] + slice2 = object_series[[1, 2, 3]] + assert string_series.index[2] == slice1.index[1] + assert object_series.index[2] == slice2.index[1] + assert string_series[2] == slice1[1] + assert object_series[2] == slice2[1] + + +def test_getitem_generator(string_series): + gen = (x > 0 for x in string_series) + result = string_series[gen] + result2 = string_series[iter(string_series > 0)] + expected = string_series[string_series > 0] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + +def test_type_promotion(): + # GH12599 + s = pd.Series(dtype=object) + s["a"] = pd.Timestamp("2016-01-01") + s["b"] = 3.0 + s["c"] = "foo" + expected = Series([pd.Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"]) + tm.assert_series_equal(s, expected) + + +@pytest.mark.parametrize( + "result_1, duplicate_item, expected_1", + [ + [ + pd.Series({1: 12, 2: [1, 2, 2, 3]}), + pd.Series({1: 313}), + pd.Series({1: 12}, dtype=object), + ], + [ + pd.Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}), + pd.Series({1: [1, 2, 3]}), + pd.Series({1: [1, 2, 3]}), + ], + ], +) +def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1): + # GH 17610 + result = result_1.append(duplicate_item) + expected = expected_1.append(duplicate_item) + tm.assert_series_equal(result[1], expected) + assert result[2] == result_1[2] + + +def test_getitem_out_of_bounds(datetime_series): + # don't segfault, GH #495 + msg = "index out of bounds" + with pytest.raises(IndexError, match=msg): + datetime_series[len(datetime_series)] + + # GH #917 + s = Series([], dtype=object) + with pytest.raises(IndexError, match=msg): + s[-1] + + +def test_getitem_setitem_integers(): + # caused bug without test + s = Series([1, 2, 3], ["a", "b", "c"]) + + assert s.iloc[0] == s["a"] + s.iloc[0] = 5 + tm.assert_almost_equal(s["a"], 5) + + +def test_getitem_box_float64(datetime_series): + value = datetime_series[5] + assert isinstance(value, np.float64) + + +@pytest.mark.parametrize( + "arr", + [np.random.randn(10), tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern")], +) +def test_get(arr): + # GH 21260 + s = Series(arr, index=[2 * i for i in range(len(arr))]) + assert s.get(4) == s.iloc[2] + + result = s.get([4, 6]) + expected = s.iloc[[2, 3]] + tm.assert_series_equal(result, expected) + + result = s.get(slice(2)) + expected = s.iloc[[0, 1]] + tm.assert_series_equal(result, expected) + + assert s.get(-1) is None + assert s.get(s.index.max() + 1) is None + + s = Series(arr[:6], index=list("abcdef")) + assert s.get("c") == s.iloc[2] + + result = s.get(slice("b", "d")) + expected = s.iloc[[1, 2, 3]] + tm.assert_series_equal(result, expected) + + result = s.get("Z") + assert result is None + + assert s.get(4) == s.iloc[4] + assert s.get(-1) == s.iloc[-1] + assert s.get(len(s)) is None + + # GH 21257 + s = pd.Series(arr) + s2 = s[::2] + assert s2.get(1) is None + + +def test_series_box_timestamp(): + rng = pd.date_range("20090415", "20090519", freq="B") + ser = Series(rng) + + assert isinstance(ser[5], pd.Timestamp) + + rng = pd.date_range("20090415", "20090519", freq="B") + ser = Series(rng, index=rng) + assert isinstance(ser[5], pd.Timestamp) + + assert isinstance(ser.iat[5], pd.Timestamp) + + +def test_getitem_ambiguous_keyerror(): + s = Series(range(10), index=list(range(0, 20, 2))) + with pytest.raises(KeyError, match=r"^1$"): + s[1] + with pytest.raises(KeyError, match=r"^1$"): + s.loc[1] + + +def test_getitem_unordered_dup(): + obj = Series(range(5), index=["c", "a", "a", "b", "b"]) + assert is_scalar(obj["c"]) + assert obj["c"] == 0 + + +def test_getitem_dups_with_missing(): + # breaks reindex, so need to use .loc internally + # GH 4246 + s = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"]) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[["foo", "bar", "bah", "bam"]] + + with pytest.raises(KeyError, match="with any missing labels"): + s[["foo", "bar", "bah", "bam"]] + + +def test_getitem_dups(): + s = Series(range(5), index=["A", "A", "B", "C", "C"], dtype=np.int64) + expected = Series([3, 4], index=["C", "C"], dtype=np.int64) + result = s["C"] + tm.assert_series_equal(result, expected) + + +def test_setitem_ambiguous_keyerror(): + s = Series(range(10), index=list(range(0, 20, 2))) + + # equivalent of an append + s2 = s.copy() + s2[1] = 5 + expected = s.append(Series([5], index=[1])) + tm.assert_series_equal(s2, expected) + + s2 = s.copy() + s2.loc[1] = 5 + expected = s.append(Series([5], index=[1])) + tm.assert_series_equal(s2, expected) + + +def test_getitem_dataframe(): + rng = list(range(10)) + s = pd.Series(10, index=rng) + df = pd.DataFrame(rng, index=rng) + msg = ( + "Indexing a Series with DataFrame is not supported, " + "use the appropriate DataFrame column" + ) + with pytest.raises(TypeError, match=msg): + s[df > 5] + + +def test_setitem(datetime_series, string_series): + datetime_series[datetime_series.index[5]] = np.NaN + datetime_series[[1, 2, 17]] = np.NaN + datetime_series[6] = np.NaN + assert np.isnan(datetime_series[6]) + assert np.isnan(datetime_series[2]) + datetime_series[np.isnan(datetime_series)] = 5 + assert not np.isnan(datetime_series[2]) + + # caught this bug when writing tests + series = Series(tm.makeIntIndex(20).astype(float), index=tm.makeIntIndex(20)) + + series[::2] = 0 + assert (series[::2] == 0).all() + + # set item that's not contained + s = string_series.copy() + s["foobar"] = 1 + + app = Series([1], index=["foobar"], name="series") + expected = string_series.append(app) + tm.assert_series_equal(s, expected) + + # Test for issue #10193 + key = pd.Timestamp("2012-01-01") + series = pd.Series(dtype=object) + series[key] = 47 + expected = pd.Series(47, [key]) + tm.assert_series_equal(series, expected) + + series = pd.Series([], pd.DatetimeIndex([], freq="D"), dtype=object) + series[key] = 47 + expected = pd.Series(47, pd.DatetimeIndex([key], freq="D")) + tm.assert_series_equal(series, expected) + + +def test_setitem_dtypes(): + # change dtypes + # GH 4463 + expected = Series([np.nan, 2, 3]) + + s = Series([1, 2, 3]) + s.iloc[0] = np.nan + tm.assert_series_equal(s, expected) + + s = Series([1, 2, 3]) + s.loc[0] = np.nan + tm.assert_series_equal(s, expected) + + s = Series([1, 2, 3]) + s[0] = np.nan + tm.assert_series_equal(s, expected) + + s = Series([False]) + s.loc[0] = np.nan + tm.assert_series_equal(s, Series([np.nan])) + + s = Series([False, True]) + s.loc[0] = np.nan + tm.assert_series_equal(s, Series([np.nan, 1.0])) + + +def test_set_value(datetime_series, string_series): + idx = datetime_series.index[10] + res = datetime_series._set_value(idx, 0) + assert res is datetime_series + assert datetime_series[idx] == 0 + + # equiv + s = string_series.copy() + res = s._set_value("foobar", 0) + assert res is s + assert res.index[-1] == "foobar" + assert res["foobar"] == 0 + + s = string_series.copy() + s.loc["foobar"] = 0 + assert s.index[-1] == "foobar" + assert s["foobar"] == 0 + + +def test_setslice(datetime_series): + sl = datetime_series[5:20] + assert len(sl) == len(sl.index) + assert sl.index.is_unique is True + + +def test_2d_to_1d_assignment_raises(): + x = np.random.randn(2, 2) + y = pd.Series(range(2)) + + msg = ( + r"shape mismatch: value array of shape \(2,2\) could not be" + r" broadcast to indexing result of shape \(2,\)" + ) + with pytest.raises(ValueError, match=msg): + y.loc[range(2)] = x + + msg = r"could not broadcast input array from shape \(2,2\) into shape \(2\)" + with pytest.raises(ValueError, match=msg): + y.loc[:] = x + + +# FutureWarning from NumPy about [slice(None, 5). +@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") +def test_basic_getitem_setitem_corner(datetime_series): + # invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2] + msg = "Can only tuple-index with a MultiIndex" + with pytest.raises(ValueError, match=msg): + datetime_series[:, 2] + with pytest.raises(ValueError, match=msg): + datetime_series[:, 2] = 2 + + # weird lists. [slice(0, 5)] will work but not two slices + result = datetime_series[[slice(None, 5)]] + expected = datetime_series[:5] + tm.assert_series_equal(result, expected) + + # OK + msg = r"unhashable type(: 'slice')?" + with pytest.raises(TypeError, match=msg): + datetime_series[[5, slice(None, None)]] + with pytest.raises(TypeError, match=msg): + datetime_series[[5, slice(None, None)]] = 2 + + +@pytest.mark.parametrize("tz", ["US/Eastern", "UTC", "Asia/Tokyo"]) +def test_setitem_with_tz(tz): + orig = pd.Series(pd.date_range("2016-01-01", freq="H", periods=3, tz=tz)) + assert orig.dtype == "datetime64[ns, {0}]".format(tz) + + # scalar + s = orig.copy() + s[1] = pd.Timestamp("2011-01-01", tz=tz) + exp = pd.Series( + [ + pd.Timestamp("2016-01-01 00:00", tz=tz), + pd.Timestamp("2011-01-01 00:00", tz=tz), + pd.Timestamp("2016-01-01 02:00", tz=tz), + ] + ) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.loc[1] = pd.Timestamp("2011-01-01", tz=tz) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.iloc[1] = pd.Timestamp("2011-01-01", tz=tz) + tm.assert_series_equal(s, exp) + + # vector + vals = pd.Series( + [pd.Timestamp("2011-01-01", tz=tz), pd.Timestamp("2012-01-01", tz=tz)], + index=[1, 2], + ) + assert vals.dtype == "datetime64[ns, {0}]".format(tz) + + s[[1, 2]] = vals + exp = pd.Series( + [ + pd.Timestamp("2016-01-01 00:00", tz=tz), + pd.Timestamp("2011-01-01 00:00", tz=tz), + pd.Timestamp("2012-01-01 00:00", tz=tz), + ] + ) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.loc[[1, 2]] = vals + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.iloc[[1, 2]] = vals + tm.assert_series_equal(s, exp) + + +def test_setitem_with_tz_dst(): + # GH XXX + tz = "US/Eastern" + orig = pd.Series(pd.date_range("2016-11-06", freq="H", periods=3, tz=tz)) + assert orig.dtype == "datetime64[ns, {0}]".format(tz) + + # scalar + s = orig.copy() + s[1] = pd.Timestamp("2011-01-01", tz=tz) + exp = pd.Series( + [ + pd.Timestamp("2016-11-06 00:00-04:00", tz=tz), + pd.Timestamp("2011-01-01 00:00-05:00", tz=tz), + pd.Timestamp("2016-11-06 01:00-05:00", tz=tz), + ] + ) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.loc[1] = pd.Timestamp("2011-01-01", tz=tz) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.iloc[1] = pd.Timestamp("2011-01-01", tz=tz) + tm.assert_series_equal(s, exp) + + # vector + vals = pd.Series( + [pd.Timestamp("2011-01-01", tz=tz), pd.Timestamp("2012-01-01", tz=tz)], + index=[1, 2], + ) + assert vals.dtype == "datetime64[ns, {0}]".format(tz) + + s[[1, 2]] = vals + exp = pd.Series( + [ + pd.Timestamp("2016-11-06 00:00", tz=tz), + pd.Timestamp("2011-01-01 00:00", tz=tz), + pd.Timestamp("2012-01-01 00:00", tz=tz), + ] + ) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.loc[[1, 2]] = vals + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.iloc[[1, 2]] = vals + tm.assert_series_equal(s, exp) + + +def test_categorical_assigning_ops(): + orig = Series(Categorical(["b", "b"], categories=["a", "b"])) + s = orig.copy() + s[:] = "a" + exp = Series(Categorical(["a", "a"], categories=["a", "b"])) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s[1] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s[s.index > 0] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s[[False, True]] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"])) + tm.assert_series_equal(s, exp) + + s = orig.copy() + s.index = ["x", "y"] + s["y"] = "a" + exp = Series(Categorical(["b", "a"], categories=["a", "b"]), index=["x", "y"]) + tm.assert_series_equal(s, exp) + + # ensure that one can set something to np.nan + s = Series(Categorical([1, 2, 3])) + exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3])) + s[1] = np.nan + tm.assert_series_equal(s, exp) + + +def test_slice(string_series, object_series): + numSlice = string_series[10:20] + numSliceEnd = string_series[-10:] + objSlice = object_series[10:20] + + assert string_series.index[9] not in numSlice.index + assert object_series.index[9] not in objSlice.index + + assert len(numSlice) == len(numSlice.index) + assert string_series[numSlice.index[0]] == numSlice[numSlice.index[0]] + + assert numSlice.index[1] == string_series.index[11] + assert tm.equalContents(numSliceEnd, np.array(string_series)[-10:]) + + # Test return view. + sl = string_series[10:20] + sl[:] = 0 + + assert (string_series[10:20] == 0).all() + + +def test_slice_can_reorder_not_uniquely_indexed(): + s = Series(1, index=["a", "a", "b", "b", "c"]) + s[::-1] # it works! + + +def test_loc_setitem(string_series): + inds = string_series.index[[3, 4, 7]] + + result = string_series.copy() + result.loc[inds] = 5 + + expected = string_series.copy() + expected[[3, 4, 7]] = 5 + tm.assert_series_equal(result, expected) + + result.iloc[5:10] = 10 + expected[5:10] = 10 + tm.assert_series_equal(result, expected) + + # set slice with indices + d1, d2 = string_series.index[[5, 15]] + result.loc[d1:d2] = 6 + expected[5:16] = 6 # because it's inclusive + tm.assert_series_equal(result, expected) + + # set index value + string_series.loc[d1] = 4 + string_series.loc[d2] = 6 + assert string_series[d1] == 4 + assert string_series[d2] == 6 + + +def test_setitem_na(): + # these induce dtype changes + expected = Series([np.nan, 3, np.nan, 5, np.nan, 7, np.nan, 9, np.nan]) + s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) + s[::2] = np.nan + tm.assert_series_equal(s, expected) + + # gets coerced to float, right? + expected = Series([np.nan, 1, np.nan, 0]) + s = Series([True, True, False, False]) + s[::2] = np.nan + tm.assert_series_equal(s, expected) + + expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]) + s = Series(np.arange(10)) + s[:5] = np.nan + tm.assert_series_equal(s, expected) + + +def test_timedelta_assignment(): + # GH 8209 + s = Series([], dtype=object) + s.loc["B"] = timedelta(1) + tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"])) + + s = s.reindex(s.index.insert(0, "A")) + tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"])) + + result = s.fillna(timedelta(1)) + expected = Series(Timedelta("1 days"), index=["A", "B"]) + tm.assert_series_equal(result, expected) + + s.loc["A"] = timedelta(1) + tm.assert_series_equal(s, expected) + + # GH 14155 + s = Series(10 * [np.timedelta64(10, "m")]) + s.loc[[1, 2, 3]] = np.timedelta64(20, "m") + expected = pd.Series(10 * [np.timedelta64(10, "m")]) + expected.loc[[1, 2, 3]] = pd.Timedelta(np.timedelta64(20, "m")) + tm.assert_series_equal(s, expected) + + +@pytest.mark.parametrize( + "nat_val,should_cast", + [ + (pd.NaT, True), + (np.timedelta64("NaT", "ns"), False), + (np.datetime64("NaT", "ns"), True), + ], +) +@pytest.mark.parametrize("tz", [None, "UTC"]) +def test_dt64_series_assign_nat(nat_val, should_cast, tz): + # some nat-like values should be cast to datetime64 when inserting + # into a datetime64 series. Others should coerce to object + # and retain their dtypes. + dti = pd.date_range("2016-01-01", periods=3, tz=tz) + base = pd.Series(dti) + expected = pd.Series([pd.NaT] + list(dti[1:]), dtype=dti.dtype) + if not should_cast: + expected = expected.astype(object) + + ser = base.copy(deep=True) + ser[0] = nat_val + tm.assert_series_equal(ser, expected) + + ser = base.copy(deep=True) + ser.loc[0] = nat_val + tm.assert_series_equal(ser, expected) + + ser = base.copy(deep=True) + ser.iloc[0] = nat_val + tm.assert_series_equal(ser, expected) + + +@pytest.mark.parametrize( + "nat_val,should_cast", + [ + (pd.NaT, True), + (np.timedelta64("NaT", "ns"), True), + (np.datetime64("NaT", "ns"), False), + ], +) +def test_td64_series_assign_nat(nat_val, should_cast): + # some nat-like values should be cast to timedelta64 when inserting + # into a timedelta64 series. Others should coerce to object + # and retain their dtypes. + base = pd.Series([0, 1, 2], dtype="m8[ns]") + expected = pd.Series([pd.NaT, 1, 2], dtype="m8[ns]") + if not should_cast: + expected = expected.astype(object) + + ser = base.copy(deep=True) + ser[0] = nat_val + tm.assert_series_equal(ser, expected) + + ser = base.copy(deep=True) + ser.loc[0] = nat_val + tm.assert_series_equal(ser, expected) + + ser = base.copy(deep=True) + ser.iloc[0] = nat_val + tm.assert_series_equal(ser, expected) + + +@pytest.mark.parametrize( + "td", + [ + pd.Timedelta("9 days"), + pd.Timedelta("9 days").to_timedelta64(), + pd.Timedelta("9 days").to_pytimedelta(), + ], +) +def test_append_timedelta_does_not_cast(td): + # GH#22717 inserting a Timedelta should _not_ cast to int64 + expected = pd.Series(["x", td], index=[0, "td"], dtype=object) + + ser = pd.Series(["x"]) + ser["td"] = td + tm.assert_series_equal(ser, expected) + assert isinstance(ser["td"], pd.Timedelta) + + ser = pd.Series(["x"]) + ser.loc["td"] = pd.Timedelta("9 days") + tm.assert_series_equal(ser, expected) + assert isinstance(ser["td"], pd.Timedelta) + + +def test_underlying_data_conversion(): + # GH 4080 + df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]}) + df.set_index(["a", "b", "c"], inplace=True) + s = Series([1], index=[(2, 2, 2)]) + df["val"] = 0 + df + df["val"].update(s) + + expected = DataFrame(dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0])) + expected.set_index(["a", "b", "c"], inplace=True) + tm.assert_frame_equal(df, expected) + + # GH 3970 + # these are chained assignments as well + pd.set_option("chained_assignment", None) + df = DataFrame({"aa": range(5), "bb": [2.2] * 5}) + df["cc"] = 0.0 + + ck = [True] * len(df) + + df["bb"].iloc[0] = 0.13 + + # TODO: unused + df_tmp = df.iloc[ck] # noqa + + df["bb"].iloc[0] = 0.15 + assert df["bb"].iloc[0] == 0.15 + pd.set_option("chained_assignment", "raise") + + # GH 3217 + df = DataFrame(dict(a=[1, 3], b=[np.nan, 2])) + df["c"] = np.nan + df["c"].update(pd.Series(["foo"], index=[0])) + + expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=["foo", np.nan])) + tm.assert_frame_equal(df, expected) + + +def test_preserve_refs(datetime_series): + seq = datetime_series[[5, 10, 15]] + seq[1] = np.NaN + assert not np.isnan(datetime_series[10]) + + +def test_cast_on_putmask(): + # GH 2746 + + # need to upcast + s = Series([1, 2], index=[1, 2], dtype="int64") + s[[True, False]] = Series([0], index=[1], dtype="int64") + expected = Series([0, 2], index=[1, 2], dtype="int64") + + tm.assert_series_equal(s, expected) + + +def test_type_promote_putmask(): + # GH8387: test that changing types does not break alignment + ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5) + left, mask = ts.copy(), ts > 0 + right = ts[mask].copy().map(str) + left[mask] = right + tm.assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t)) + + s = Series([0, 1, 2, 0]) + mask = s > 0 + s2 = s[mask].map(str) + s[mask] = s2 + tm.assert_series_equal(s, Series([0, "1", "2", 0])) + + s = Series([0, "foo", "bar", 0]) + mask = Series([False, True, True, False]) + s2 = s[mask] + s[mask] = s2 + tm.assert_series_equal(s, Series([0, "foo", "bar", 0])) + + +def test_multilevel_preserve_name(): + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + s = Series(np.random.randn(len(index)), index=index, name="sth") + + result = s["foo"] + result2 = s.loc["foo"] + assert result.name == s.name + assert result2.name == s.name + + +def test_setitem_scalar_into_readonly_backing_data(): + # GH14359: test that you cannot mutate a read only buffer + + array = np.zeros(5) + array.flags.writeable = False # make the array immutable + series = Series(array) + + for n in range(len(series)): + msg = "assignment destination is read-only" + with pytest.raises(ValueError, match=msg): + series[n] = 1 + + assert array[n] == 0 + + +def test_setitem_slice_into_readonly_backing_data(): + # GH14359: test that you cannot mutate a read only buffer + + array = np.zeros(5) + array.flags.writeable = False # make the array immutable + series = Series(array) + + msg = "assignment destination is read-only" + with pytest.raises(ValueError, match=msg): + series[1:3] = 1 + + assert not array.any() + + +""" +miscellaneous methods +""" + + +def test_pop(): + # GH 6600 + df = DataFrame({"A": 0, "B": np.arange(5, dtype="int64"), "C": 0}) + k = df.iloc[4] + + result = k.pop("B") + assert result == 4 + + expected = Series([0, 0], index=["A", "C"], name=4) + tm.assert_series_equal(k, expected) + + +def test_take(): + s = Series([-1, 5, 6, 2, 4]) + + actual = s.take([1, 3, 4]) + expected = Series([5, 2, 4], index=[1, 3, 4]) + tm.assert_series_equal(actual, expected) + + actual = s.take([-1, 3, 4]) + expected = Series([4, 2, 4], index=[4, 3, 4]) + tm.assert_series_equal(actual, expected) + + msg = "index {} is out of bounds for( axis 0 with)? size 5" + with pytest.raises(IndexError, match=msg.format(10)): + s.take([1, 10]) + with pytest.raises(IndexError, match=msg.format(5)): + s.take([2, 5]) + + +def test_take_categorical(): + # https://github.com/pandas-dev/pandas/issues/20664 + s = Series(pd.Categorical(["a", "b", "c"])) + result = s.take([-2, -2, 0]) + expected = Series( + pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0] + ) + tm.assert_series_equal(result, expected) + + +def test_head_tail(string_series): + tm.assert_series_equal(string_series.head(), string_series[:5]) + tm.assert_series_equal(string_series.head(0), string_series[0:0]) + tm.assert_series_equal(string_series.tail(), string_series[-5:]) + tm.assert_series_equal(string_series.tail(0), string_series[0:0]) + + +def test_uint_drop(any_int_dtype): + # see GH18311 + # assigning series.loc[0] = 4 changed series.dtype to int + series = pd.Series([1, 2, 3], dtype=any_int_dtype) + series.loc[0] = 4 + expected = pd.Series([4, 2, 3], dtype=any_int_dtype) + tm.assert_series_equal(series, expected) + + +def test_getitem_2d_no_warning(): + # https://github.com/pandas-dev/pandas/issues/30867 + # Don't want to support this long-term, but + # for now ensure that the warning from Index + # doesn't comes through via Series.__getitem__. + series = pd.Series([1, 2, 3], index=[1, 2, 3]) + with tm.assert_produces_warning(None): + series[:, None] diff --git a/pandas/tests/series/indexing/test_loc.py b/pandas/tests/series/indexing/test_loc.py new file mode 100644 index 00000000..7d6b6c78 --- /dev/null +++ b/pandas/tests/series/indexing/test_loc.py @@ -0,0 +1,159 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Series, Timestamp +import pandas._testing as tm + + +@pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)]) +def test_loc_uint64(val, expected): + # see gh-19399 + s = Series({2 ** 63 - 1: 3, 2 ** 63: 4}) + assert s.loc[val] == expected + + +def test_loc_getitem(string_series, datetime_series): + inds = string_series.index[[3, 4, 7]] + tm.assert_series_equal(string_series.loc[inds], string_series.reindex(inds)) + tm.assert_series_equal(string_series.iloc[5::2], string_series[5::2]) + + # slice with indices + d1, d2 = datetime_series.index[[5, 15]] + result = datetime_series.loc[d1:d2] + expected = datetime_series.truncate(d1, d2) + tm.assert_series_equal(result, expected) + + # boolean + mask = string_series > string_series.median() + tm.assert_series_equal(string_series.loc[mask], string_series[mask]) + + # ask for index value + assert datetime_series.loc[d1] == datetime_series[d1] + assert datetime_series.loc[d2] == datetime_series[d2] + + +def test_loc_getitem_not_monotonic(datetime_series): + d1, d2 = datetime_series.index[[5, 15]] + + ts2 = datetime_series[::2][[1, 2, 0]] + + msg = r"Timestamp\('2000-01-10 00:00:00'\)" + with pytest.raises(KeyError, match=msg): + ts2.loc[d1:d2] + with pytest.raises(KeyError, match=msg): + ts2.loc[d1:d2] = 0 + + +def test_loc_getitem_setitem_integer_slice_keyerrors(): + s = Series(np.random.randn(10), index=list(range(0, 20, 2))) + + # this is OK + cp = s.copy() + cp.iloc[4:10] = 0 + assert (cp.iloc[4:10] == 0).all() + + # so is this + cp = s.copy() + cp.iloc[3:11] = 0 + assert (cp.iloc[3:11] == 0).values.all() + + result = s.iloc[2:6] + result2 = s.loc[3:11] + expected = s.reindex([4, 6, 8, 10]) + + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + # non-monotonic, raise KeyError + s2 = s.iloc[list(range(5)) + list(range(9, 4, -1))] + with pytest.raises(KeyError, match=r"^3$"): + s2.loc[3:11] + with pytest.raises(KeyError, match=r"^3$"): + s2.loc[3:11] = 0 + + +def test_loc_getitem_iterator(string_series): + idx = iter(string_series.index[:10]) + result = string_series.loc[idx] + tm.assert_series_equal(result, string_series[:10]) + + +def test_loc_setitem_boolean(string_series): + mask = string_series > string_series.median() + + result = string_series.copy() + result.loc[mask] = 0 + expected = string_series + expected[mask] = 0 + tm.assert_series_equal(result, expected) + + +def test_loc_setitem_corner(string_series): + inds = list(string_series.index[[5, 8, 12]]) + string_series.loc[inds] = 5 + msg = r"\['foo'\] not in index" + with pytest.raises(KeyError, match=msg): + string_series.loc[inds + ["foo"]] = 5 + + +def test_basic_setitem_with_labels(datetime_series): + indices = datetime_series.index[[5, 10, 15]] + + cp = datetime_series.copy() + exp = datetime_series.copy() + cp[indices] = 0 + exp.loc[indices] = 0 + tm.assert_series_equal(cp, exp) + + cp = datetime_series.copy() + exp = datetime_series.copy() + cp[indices[0] : indices[2]] = 0 + exp.loc[indices[0] : indices[2]] = 0 + tm.assert_series_equal(cp, exp) + + # integer indexes, be careful + s = Series(np.random.randn(10), index=list(range(0, 20, 2))) + inds = [0, 4, 6] + arr_inds = np.array([0, 4, 6]) + + cp = s.copy() + exp = s.copy() + s[inds] = 0 + s.loc[inds] = 0 + tm.assert_series_equal(cp, exp) + + cp = s.copy() + exp = s.copy() + s[arr_inds] = 0 + s.loc[arr_inds] = 0 + tm.assert_series_equal(cp, exp) + + inds_notfound = [0, 4, 5, 6] + arr_inds_notfound = np.array([0, 4, 5, 6]) + msg = r"\[5\] not contained in the index" + with pytest.raises(ValueError, match=msg): + s[inds_notfound] = 0 + with pytest.raises(Exception, match=msg): + s[arr_inds_notfound] = 0 + + # GH12089 + # with tz for values + s = Series( + pd.date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"] + ) + s2 = s.copy() + expected = Timestamp("2011-01-03", tz="US/Eastern") + s2.loc["a"] = expected + result = s2.loc["a"] + assert result == expected + + s2 = s.copy() + s2.iloc[0] = expected + result = s2.iloc[0] + assert result == expected + + s2 = s.copy() + s2["a"] = expected + result = s2["a"] + assert result == expected diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py new file mode 100644 index 00000000..ce0d04ff --- /dev/null +++ b/pandas/tests/series/indexing/test_numeric.py @@ -0,0 +1,313 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, Series +import pandas._testing as tm + + +def test_get(): + # GH 6383 + s = Series( + np.array( + [ + 43, + 48, + 60, + 48, + 50, + 51, + 50, + 45, + 57, + 48, + 56, + 45, + 51, + 39, + 55, + 43, + 54, + 52, + 51, + 54, + ] + ) + ) + + result = s.get(25, 0) + expected = 0 + assert result == expected + + s = Series( + np.array( + [ + 43, + 48, + 60, + 48, + 50, + 51, + 50, + 45, + 57, + 48, + 56, + 45, + 51, + 39, + 55, + 43, + 54, + 52, + 51, + 54, + ] + ), + index=pd.Float64Index( + [ + 25.0, + 36.0, + 49.0, + 64.0, + 81.0, + 100.0, + 121.0, + 144.0, + 169.0, + 196.0, + 1225.0, + 1296.0, + 1369.0, + 1444.0, + 1521.0, + 1600.0, + 1681.0, + 1764.0, + 1849.0, + 1936.0, + ] + ), + ) + + result = s.get(25, 0) + expected = 43 + assert result == expected + + # GH 7407 + # with a boolean accessor + df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3}) + vc = df.i.value_counts() + result = vc.get(99, default="Missing") + assert result == "Missing" + + vc = df.b.value_counts() + result = vc.get(False, default="Missing") + assert result == 3 + + result = vc.get(True, default="Missing") + assert result == "Missing" + + +def test_get_nan(): + # GH 8569 + s = pd.Float64Index(range(10)).to_series() + assert s.get(np.nan) is None + assert s.get(np.nan, default="Missing") == "Missing" + + +def test_get_nan_multiple(): + # GH 8569 + # ensure that fixing "test_get_nan" above hasn't broken get + # with multiple elements + s = pd.Float64Index(range(10)).to_series() + + idx = [2, 30] + assert s.get(idx) is None + + idx = [2, np.nan] + assert s.get(idx) is None + + # GH 17295 - all missing keys + idx = [20, 30] + assert s.get(idx) is None + + idx = [np.nan, np.nan] + assert s.get(idx) is None + + +def test_delitem(): + # GH 5542 + # should delete the item inplace + s = Series(range(5)) + del s[0] + + expected = Series(range(1, 5), index=range(1, 5)) + tm.assert_series_equal(s, expected) + + del s[1] + expected = Series(range(2, 5), index=range(2, 5)) + tm.assert_series_equal(s, expected) + + # empty + s = Series(dtype=object) + + with pytest.raises(KeyError, match=r"^0$"): + del s[0] + + # only 1 left, del, add, del + s = Series(1) + del s[0] + tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64"))) + s[0] = 1 + tm.assert_series_equal(s, Series(1)) + del s[0] + tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="int64"))) + + # Index(dtype=object) + s = Series(1, index=["a"]) + del s["a"] + tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="object"))) + s["a"] = 1 + tm.assert_series_equal(s, Series(1, index=["a"])) + del s["a"] + tm.assert_series_equal(s, Series(dtype="int64", index=Index([], dtype="object"))) + + +def test_slice_float64(): + values = np.arange(10.0, 50.0, 2) + index = Index(values) + + start, end = values[[5, 15]] + + s = Series(np.random.randn(20), index=index) + + result = s[start:end] + expected = s.iloc[5:16] + tm.assert_series_equal(result, expected) + + result = s.loc[start:end] + tm.assert_series_equal(result, expected) + + df = DataFrame(np.random.randn(20, 3), index=index) + + result = df[start:end] + expected = df.iloc[5:16] + tm.assert_frame_equal(result, expected) + + result = df.loc[start:end] + tm.assert_frame_equal(result, expected) + + +def test_getitem_negative_out_of_bounds(): + s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) + + msg = "index out of bounds" + with pytest.raises(IndexError, match=msg): + s[-11] + msg = "index -11 is out of bounds for axis 0 with size 10" + with pytest.raises(IndexError, match=msg): + s[-11] = "foo" + + +def test_getitem_regression(): + s = Series(range(5), index=list(range(5))) + result = s[list(range(5))] + tm.assert_series_equal(result, s) + + +def test_getitem_setitem_slice_bug(): + s = Series(range(10), index=list(range(10))) + result = s[-12:] + tm.assert_series_equal(result, s) + + result = s[-7:] + tm.assert_series_equal(result, s[3:]) + + result = s[:-12] + tm.assert_series_equal(result, s[:0]) + + s = Series(range(10), index=list(range(10))) + s[-12:] = 0 + assert (s == 0).all() + + s[:-12] = 5 + assert (s == 0).all() + + +def test_getitem_setitem_slice_integers(): + s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) + + result = s[:4] + expected = s.reindex([2, 4, 6, 8]) + tm.assert_series_equal(result, expected) + + s[:4] = 0 + assert (s[:4] == 0).all() + assert not (s[4:] == 0).any() + + +def test_setitem_float_labels(): + # note labels are floats + s = Series(["a", "b", "c"], index=[0, 0.5, 1]) + tmp = s.copy() + + s.loc[1] = "zoo" + tmp.iloc[2] = "zoo" + + tm.assert_series_equal(s, tmp) + + +def test_slice_float_get_set(datetime_series): + msg = ( + r"cannot do slice indexing on with these indexers \[{key}\]" + r" of " + ) + with pytest.raises(TypeError, match=msg.format(key=r"4\.0")): + datetime_series[4.0:10.0] + + with pytest.raises(TypeError, match=msg.format(key=r"4\.0")): + datetime_series[4.0:10.0] = 0 + + with pytest.raises(TypeError, match=msg.format(key=r"4\.5")): + datetime_series[4.5:10.0] + with pytest.raises(TypeError, match=msg.format(key=r"4\.5")): + datetime_series[4.5:10.0] = 0 + + +def test_slice_floats2(): + s = Series(np.random.rand(10), index=np.arange(10, 20, dtype=float)) + + assert len(s.loc[12.0:]) == 8 + assert len(s.loc[12.5:]) == 7 + + i = np.arange(10, 20, dtype=float) + i[2] = 12.2 + s.index = i + assert len(s.loc[12.0:]) == 8 + assert len(s.loc[12.5:]) == 7 + + +def test_int_indexing(): + s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2]) + + with pytest.raises(KeyError, match=r"^5$"): + s[5] + + with pytest.raises(KeyError, match=r"^'c'$"): + s["c"] + + # not monotonic + s = Series(np.random.randn(6), index=[2, 2, 0, 0, 1, 1]) + + with pytest.raises(KeyError, match=r"^5$"): + s[5] + + with pytest.raises(KeyError, match=r"^'c'$"): + s["c"] + + +def test_getitem_int64(datetime_series): + idx = np.int64(5) + assert datetime_series[idx] == datetime_series[5] diff --git a/pandas/tests/series/methods/__init__.py b/pandas/tests/series/methods/__init__.py new file mode 100644 index 00000000..bcb0d30f --- /dev/null +++ b/pandas/tests/series/methods/__init__.py @@ -0,0 +1,7 @@ +""" +Test files dedicated to individual (stand-alone) Series methods + +Ideally these files/tests should correspond 1-to-1 with tests.frame.methods + +These may also present opportunities for sharing/de-duplicating test code. +""" diff --git a/pandas/tests/series/methods/test_append.py b/pandas/tests/series/methods/test_append.py new file mode 100644 index 00000000..4d64b5b3 --- /dev/null +++ b/pandas/tests/series/methods/test_append.py @@ -0,0 +1,168 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, DatetimeIndex, Series, date_range +import pandas._testing as tm + + +class TestSeriesAppend: + def test_append(self, datetime_series, string_series, object_series): + appended_series = string_series.append(object_series) + for idx, value in appended_series.items(): + if idx in string_series.index: + assert value == string_series[idx] + elif idx in object_series.index: + assert value == object_series[idx] + else: + raise AssertionError("orphaned index!") + + msg = "Indexes have overlapping values:" + with pytest.raises(ValueError, match=msg): + datetime_series.append(datetime_series, verify_integrity=True) + + def test_append_many(self, datetime_series): + pieces = [datetime_series[:5], datetime_series[5:10], datetime_series[10:]] + + result = pieces[0].append(pieces[1:]) + tm.assert_series_equal(result, datetime_series) + + def test_append_duplicates(self): + # GH 13677 + s1 = pd.Series([1, 2, 3]) + s2 = pd.Series([4, 5, 6]) + exp = pd.Series([1, 2, 3, 4, 5, 6], index=[0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(s1.append(s2), exp) + tm.assert_series_equal(pd.concat([s1, s2]), exp) + + # the result must have RangeIndex + exp = pd.Series([1, 2, 3, 4, 5, 6]) + tm.assert_series_equal( + s1.append(s2, ignore_index=True), exp, check_index_type=True + ) + tm.assert_series_equal( + pd.concat([s1, s2], ignore_index=True), exp, check_index_type=True + ) + + msg = "Indexes have overlapping values:" + with pytest.raises(ValueError, match=msg): + s1.append(s2, verify_integrity=True) + with pytest.raises(ValueError, match=msg): + pd.concat([s1, s2], verify_integrity=True) + + def test_append_tuples(self): + # GH 28410 + s = pd.Series([1, 2, 3]) + list_input = [s, s] + tuple_input = (s, s) + + expected = s.append(list_input) + result = s.append(tuple_input) + + tm.assert_series_equal(expected, result) + + def test_append_dataframe_regression(self): + # GH 30975 + df = pd.DataFrame({"A": [1, 2]}) + result = df.A.append([df]) + expected = pd.DataFrame( + {0: [1.0, 2.0, None, None], "A": [None, None, 1.0, 2.0]}, index=[0, 1, 0, 1] + ) + + tm.assert_frame_equal(expected, result) + + +class TestSeriesAppendWithDatetimeIndex: + def test_append(self): + rng = date_range("5/8/2012 1:45", periods=10, freq="5T") + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + + result = ts.append(ts) + result_df = df.append(df) + ex_index = DatetimeIndex(np.tile(rng.values, 2)) + tm.assert_index_equal(result.index, ex_index) + tm.assert_index_equal(result_df.index, ex_index) + + appended = rng.append(rng) + tm.assert_index_equal(appended, ex_index) + + appended = rng.append([rng, rng]) + ex_index = DatetimeIndex(np.tile(rng.values, 3)) + tm.assert_index_equal(appended, ex_index) + + # different index names + rng1 = rng.copy() + rng2 = rng.copy() + rng1.name = "foo" + rng2.name = "bar" + assert rng1.append(rng1).name == "foo" + assert rng1.append(rng2).name is None + + def test_append_tz(self): + # see gh-2938 + rng = date_range("5/8/2012 1:45", periods=10, freq="5T", tz="US/Eastern") + rng2 = date_range("5/8/2012 2:35", periods=10, freq="5T", tz="US/Eastern") + rng3 = date_range("5/8/2012 1:45", periods=20, freq="5T", tz="US/Eastern") + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_tz_explicit_pytz(self): + # see gh-2938 + from pytz import timezone as timezone + + rng = date_range( + "5/8/2012 1:45", periods=10, freq="5T", tz=timezone("US/Eastern") + ) + rng2 = date_range( + "5/8/2012 2:35", periods=10, freq="5T", tz=timezone("US/Eastern") + ) + rng3 = date_range( + "5/8/2012 1:45", periods=20, freq="5T", tz=timezone("US/Eastern") + ) + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) + + def test_append_tz_dateutil(self): + # see gh-2938 + rng = date_range( + "5/8/2012 1:45", periods=10, freq="5T", tz="dateutil/US/Eastern" + ) + rng2 = date_range( + "5/8/2012 2:35", periods=10, freq="5T", tz="dateutil/US/Eastern" + ) + rng3 = date_range( + "5/8/2012 1:45", periods=20, freq="5T", tz="dateutil/US/Eastern" + ) + ts = Series(np.random.randn(len(rng)), rng) + df = DataFrame(np.random.randn(len(rng), 4), index=rng) + ts2 = Series(np.random.randn(len(rng2)), rng2) + df2 = DataFrame(np.random.randn(len(rng2), 4), index=rng2) + + result = ts.append(ts2) + result_df = df.append(df2) + tm.assert_index_equal(result.index, rng3) + tm.assert_index_equal(result_df.index, rng3) + + appended = rng.append(rng2) + tm.assert_index_equal(appended, rng3) diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py new file mode 100644 index 00000000..1fc98ded --- /dev/null +++ b/pandas/tests/series/methods/test_argsort.py @@ -0,0 +1,63 @@ +import numpy as np +import pytest + +from pandas import Series, Timestamp, isna +import pandas._testing as tm + + +class TestSeriesArgsort: + def _check_accum_op(self, name, ser, check_dtype=True): + func = getattr(np, name) + tm.assert_numpy_array_equal( + func(ser).values, func(np.array(ser)), check_dtype=check_dtype, + ) + + # with missing values + ts = ser.copy() + ts[::2] = np.NaN + + result = func(ts)[1::2] + expected = func(np.array(ts.dropna())) + + tm.assert_numpy_array_equal(result.values, expected, check_dtype=False) + + def test_argsort(self, datetime_series): + self._check_accum_op("argsort", datetime_series, check_dtype=False) + argsorted = datetime_series.argsort() + assert issubclass(argsorted.dtype.type, np.integer) + + # GH#2967 (introduced bug in 0.11-dev I think) + s = Series([Timestamp("201301{i:02d}".format(i=i)) for i in range(1, 6)]) + assert s.dtype == "datetime64[ns]" + shifted = s.shift(-1) + assert shifted.dtype == "datetime64[ns]" + assert isna(shifted[4]) + + result = s.argsort() + expected = Series(range(5), dtype="int64") + tm.assert_series_equal(result, expected) + + result = shifted.argsort() + expected = Series(list(range(4)) + [-1], dtype="int64") + tm.assert_series_equal(result, expected) + + def test_argsort_stable(self): + s = Series(np.random.randint(0, 100, size=10000)) + mindexer = s.argsort(kind="mergesort") + qindexer = s.argsort() + + mexpected = np.argsort(s.values, kind="mergesort") + qexpected = np.argsort(s.values, kind="quicksort") + + tm.assert_series_equal(mindexer, Series(mexpected), check_dtype=False) + tm.assert_series_equal(qindexer, Series(qexpected), check_dtype=False) + msg = ( + r"ndarray Expected type ," + r" found instead" + ) + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(qindexer, mindexer) + + def test_argsort_preserve_name(self, datetime_series): + result = datetime_series.argsort() + assert result.name == datetime_series.name diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py new file mode 100644 index 00000000..b121efd2 --- /dev/null +++ b/pandas/tests/series/methods/test_asof.py @@ -0,0 +1,178 @@ +import numpy as np +import pytest + +from pandas import Series, Timestamp, date_range, isna, notna, offsets +import pandas._testing as tm + + +class TestSeriesAsof: + def test_basic(self): + + # array or list or dates + N = 50 + rng = date_range("1/1/1990", periods=N, freq="53s") + ts = Series(np.random.randn(N), index=rng) + ts[15:30] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + + result = ts.asof(dates) + assert notna(result).all() + lb = ts.index[14] + ub = ts.index[30] + + result = ts.asof(list(dates)) + assert notna(result).all() + lb = ts.index[14] + ub = ts.index[30] + + mask = (result.index >= lb) & (result.index < ub) + rs = result[mask] + assert (rs == ts[lb]).all() + + val = result[result.index[result.index >= ub][0]] + assert ts[ub] == val + + def test_scalar(self): + + N = 30 + rng = date_range("1/1/1990", periods=N, freq="53s") + ts = Series(np.arange(N), index=rng) + ts[5:10] = np.NaN + ts[15:20] = np.NaN + + val1 = ts.asof(ts.index[7]) + val2 = ts.asof(ts.index[19]) + + assert val1 == ts[4] + assert val2 == ts[14] + + # accepts strings + val1 = ts.asof(str(ts.index[7])) + assert val1 == ts[4] + + # in there + result = ts.asof(ts.index[3]) + assert result == ts[3] + + # no as of value + d = ts.index[0] - offsets.BDay() + assert np.isnan(ts.asof(d)) + + def test_with_nan(self): + # basic asof test + rng = date_range("1/1/2000", "1/2/2000", freq="4h") + s = Series(np.arange(len(rng)), index=rng) + r = s.resample("2h").mean() + + result = r.asof(r.index) + expected = Series( + [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0], + index=date_range("1/1/2000", "1/2/2000", freq="2h"), + ) + tm.assert_series_equal(result, expected) + + r.iloc[3:5] = np.nan + result = r.asof(r.index) + expected = Series( + [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0], + index=date_range("1/1/2000", "1/2/2000", freq="2h"), + ) + tm.assert_series_equal(result, expected) + + r.iloc[-3:] = np.nan + result = r.asof(r.index) + expected = Series( + [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0], + index=date_range("1/1/2000", "1/2/2000", freq="2h"), + ) + tm.assert_series_equal(result, expected) + + def test_periodindex(self): + from pandas import period_range, PeriodIndex + + # array or list or dates + N = 50 + rng = period_range("1/1/1990", periods=N, freq="H") + ts = Series(np.random.randn(N), index=rng) + ts[15:30] = np.nan + dates = date_range("1/1/1990", periods=N * 3, freq="37min") + + result = ts.asof(dates) + assert notna(result).all() + lb = ts.index[14] + ub = ts.index[30] + + result = ts.asof(list(dates)) + assert notna(result).all() + lb = ts.index[14] + ub = ts.index[30] + + pix = PeriodIndex(result.index.values, freq="H") + mask = (pix >= lb) & (pix < ub) + rs = result[mask] + assert (rs == ts[lb]).all() + + ts[5:10] = np.nan + ts[15:20] = np.nan + + val1 = ts.asof(ts.index[7]) + val2 = ts.asof(ts.index[19]) + + assert val1 == ts[4] + assert val2 == ts[14] + + # accepts strings + val1 = ts.asof(str(ts.index[7])) + assert val1 == ts[4] + + # in there + assert ts.asof(ts.index[3]) == ts[3] + + # no as of value + d = ts.index[0].to_timestamp() - offsets.BDay() + assert isna(ts.asof(d)) + + def test_errors(self): + + s = Series( + [1, 2, 3], + index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")], + ) + + # non-monotonic + assert not s.index.is_monotonic + with pytest.raises(ValueError): + s.asof(s.index[0]) + + # subset with Series + N = 10 + rng = date_range("1/1/1990", periods=N, freq="53s") + s = Series(np.random.randn(N), index=rng) + with pytest.raises(ValueError): + s.asof(s.index[0], subset="foo") + + def test_all_nans(self): + # GH 15713 + # series is all nans + result = Series([np.nan]).asof([0]) + expected = Series([np.nan]) + tm.assert_series_equal(result, expected) + + # testing non-default indexes + N = 50 + rng = date_range("1/1/1990", periods=N, freq="53s") + + dates = date_range("1/1/1990", periods=N * 3, freq="25s") + result = Series(np.nan, index=rng).asof(dates) + expected = Series(np.nan, index=dates) + tm.assert_series_equal(result, expected) + + # testing scalar input + date = date_range("1/1/1990", periods=N * 3, freq="25s")[0] + result = Series(np.nan, index=rng).asof(date) + assert isna(result) + + # test name is propagated + result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5]) + expected = Series(np.nan, index=[4, 5], name="test") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py new file mode 100644 index 00000000..37764d3b --- /dev/null +++ b/pandas/tests/series/methods/test_clip.py @@ -0,0 +1,99 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Series, Timestamp, isna, notna +import pandas._testing as tm + + +class TestSeriesClip: + def test_clip(self, datetime_series): + val = datetime_series.median() + + assert datetime_series.clip(lower=val).min() == val + assert datetime_series.clip(upper=val).max() == val + + result = datetime_series.clip(-0.5, 0.5) + expected = np.clip(datetime_series, -0.5, 0.5) + tm.assert_series_equal(result, expected) + assert isinstance(expected, Series) + + def test_clip_types_and_nulls(self): + + sers = [ + Series([np.nan, 1.0, 2.0, 3.0]), + Series([None, "a", "b", "c"]), + Series(pd.to_datetime([np.nan, 1, 2, 3], unit="D")), + ] + + for s in sers: + thresh = s[2] + lower = s.clip(lower=thresh) + upper = s.clip(upper=thresh) + assert lower[notna(lower)].min() == thresh + assert upper[notna(upper)].max() == thresh + assert list(isna(s)) == list(isna(lower)) + assert list(isna(s)) == list(isna(upper)) + + def test_clip_with_na_args(self): + """Should process np.nan argument as None """ + # GH#17276 + s = Series([1, 2, 3]) + + tm.assert_series_equal(s.clip(np.nan), Series([1, 2, 3])) + tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3])) + + # GH#19992 + tm.assert_series_equal(s.clip(lower=[0, 4, np.nan]), Series([1, 4, np.nan])) + tm.assert_series_equal(s.clip(upper=[1, np.nan, 1]), Series([1, np.nan, 1])) + + def test_clip_against_series(self): + # GH#6966 + + s = Series([1.0, 1.0, 4.0]) + + lower = Series([1.0, 2.0, 3.0]) + upper = Series([1.5, 2.5, 3.5]) + + tm.assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) + tm.assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])]) + def test_clip_against_list_like(self, inplace, upper): + # GH#15390 + original = pd.Series([5, 6, 7]) + result = original.clip(upper=upper, inplace=inplace) + expected = pd.Series([1, 2, 3]) + + if inplace: + result = original + tm.assert_series_equal(result, expected, check_exact=True) + + def test_clip_with_datetimes(self): + # GH#11838 + # naive and tz-aware datetimes + + t = Timestamp("2015-12-01 09:30:30") + s = Series([Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:31:00")]) + result = s.clip(upper=t) + expected = Series( + [Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:30:30")] + ) + tm.assert_series_equal(result, expected) + + t = Timestamp("2015-12-01 09:30:30", tz="US/Eastern") + s = Series( + [ + Timestamp("2015-12-01 09:30:00", tz="US/Eastern"), + Timestamp("2015-12-01 09:31:00", tz="US/Eastern"), + ] + ) + result = s.clip(upper=t) + expected = Series( + [ + Timestamp("2015-12-01 09:30:00", tz="US/Eastern"), + Timestamp("2015-12-01 09:30:30", tz="US/Eastern"), + ] + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_count.py b/pandas/tests/series/methods/test_count.py new file mode 100644 index 00000000..1ca48eeb --- /dev/null +++ b/pandas/tests/series/methods/test_count.py @@ -0,0 +1,38 @@ +import numpy as np + +import pandas as pd +from pandas import Categorical, MultiIndex, Series +import pandas._testing as tm + + +class TestSeriesCount: + def test_count(self, datetime_series): + assert datetime_series.count() == len(datetime_series) + + datetime_series[::2] = np.NaN + + assert datetime_series.count() == np.isfinite(datetime_series).sum() + + mi = MultiIndex.from_arrays([list("aabbcc"), [1, 2, 2, np.nan, 1, 2]]) + ts = Series(np.arange(len(mi)), index=mi) + + left = ts.count(level=1) + right = Series([2, 3, 1], index=[1, 2, np.nan]) + tm.assert_series_equal(left, right) + + ts.iloc[[0, 3, 5]] = np.nan + tm.assert_series_equal(ts.count(level=1), right - 1) + + # GH#29478 + with pd.option_context("use_inf_as_na", True): + assert pd.Series([pd.Timestamp("1990/1/1")]).count() == 1 + + def test_count_categorical(self): + + ser = Series( + Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True + ) + ) + result = ser.count() + assert result == 2 diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py new file mode 100644 index 00000000..1f6033d4 --- /dev/null +++ b/pandas/tests/series/methods/test_cov_corr.py @@ -0,0 +1,158 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import Series, isna +import pandas._testing as tm + + +class TestSeriesCov: + def test_cov(self, datetime_series): + # full overlap + tm.assert_almost_equal( + datetime_series.cov(datetime_series), datetime_series.std() ** 2 + ) + + # partial overlap + tm.assert_almost_equal( + datetime_series[:15].cov(datetime_series[5:]), + datetime_series[5:15].std() ** 2, + ) + + # No overlap + assert np.isnan(datetime_series[::2].cov(datetime_series[1::2])) + + # all NA + cp = datetime_series[:10].copy() + cp[:] = np.nan + assert isna(cp.cov(cp)) + + # min_periods + assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12)) + + ts1 = datetime_series[:15].reindex(datetime_series.index) + ts2 = datetime_series[5:].reindex(datetime_series.index) + assert isna(ts1.cov(ts2, min_periods=12)) + + +class TestSeriesCorr: + @td.skip_if_no_scipy + def test_corr(self, datetime_series): + import scipy.stats as stats + + # full overlap + tm.assert_almost_equal(datetime_series.corr(datetime_series), 1) + + # partial overlap + tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1) + + assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12)) + + ts1 = datetime_series[:15].reindex(datetime_series.index) + ts2 = datetime_series[5:].reindex(datetime_series.index) + assert isna(ts1.corr(ts2, min_periods=12)) + + # No overlap + assert np.isnan(datetime_series[::2].corr(datetime_series[1::2])) + + # all NA + cp = datetime_series[:10].copy() + cp[:] = np.nan + assert isna(cp.corr(cp)) + + A = tm.makeTimeSeries() + B = tm.makeTimeSeries() + result = A.corr(B) + expected, _ = stats.pearsonr(A, B) + tm.assert_almost_equal(result, expected) + + @td.skip_if_no_scipy + def test_corr_rank(self): + import scipy.stats as stats + + # kendall and spearman + A = tm.makeTimeSeries() + B = tm.makeTimeSeries() + A[-5:] = A[:5] + result = A.corr(B, method="kendall") + expected = stats.kendalltau(A, B)[0] + tm.assert_almost_equal(result, expected) + + result = A.corr(B, method="spearman") + expected = stats.spearmanr(A, B)[0] + tm.assert_almost_equal(result, expected) + + # results from R + A = Series( + [ + -0.89926396, + 0.94209606, + -1.03289164, + -0.95445587, + 0.76910310, + -0.06430576, + -2.09704447, + 0.40660407, + -0.89926396, + 0.94209606, + ] + ) + B = Series( + [ + -1.01270225, + -0.62210117, + -1.56895827, + 0.59592943, + -0.01680292, + 1.17258718, + -1.06009347, + -0.10222060, + -0.89076239, + 0.89372375, + ] + ) + kexp = 0.4319297 + sexp = 0.5853767 + tm.assert_almost_equal(A.corr(B, method="kendall"), kexp) + tm.assert_almost_equal(A.corr(B, method="spearman"), sexp) + + def test_corr_invalid_method(self): + # GH PR #22298 + s1 = pd.Series(np.random.randn(10)) + s2 = pd.Series(np.random.randn(10)) + msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " + with pytest.raises(ValueError, match=msg): + s1.corr(s2, method="____") + + def test_corr_callable_method(self, datetime_series): + # simple correlation example + # returns 1 if exact equality, 0 otherwise + my_corr = lambda a, b: 1.0 if (a == b).all() else 0.0 + + # simple example + s1 = Series([1, 2, 3, 4, 5]) + s2 = Series([5, 4, 3, 2, 1]) + expected = 0 + tm.assert_almost_equal(s1.corr(s2, method=my_corr), expected) + + # full overlap + tm.assert_almost_equal( + datetime_series.corr(datetime_series, method=my_corr), 1.0 + ) + + # partial overlap + tm.assert_almost_equal( + datetime_series[:15].corr(datetime_series[5:], method=my_corr), 1.0 + ) + + # No overlap + assert np.isnan( + datetime_series[::2].corr(datetime_series[1::2], method=my_corr) + ) + + # dataframe example + df = pd.DataFrame([s1, s2]) + expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}]) + tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected) diff --git a/pandas/tests/series/methods/test_describe.py b/pandas/tests/series/methods/test_describe.py new file mode 100644 index 00000000..b147a04b --- /dev/null +++ b/pandas/tests/series/methods/test_describe.py @@ -0,0 +1,69 @@ +import numpy as np + +from pandas import Series, Timestamp, date_range +import pandas._testing as tm + + +class TestSeriesDescribe: + def test_describe(self): + s = Series([0, 1, 2, 3, 4], name="int_data") + result = s.describe() + expected = Series( + [5, 2, s.std(), 0, 1, 2, 3, 4], + name="int_data", + index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], + ) + tm.assert_series_equal(result, expected) + + s = Series([True, True, False, False, False], name="bool_data") + result = s.describe() + expected = Series( + [5, 2, False, 3], name="bool_data", index=["count", "unique", "top", "freq"] + ) + tm.assert_series_equal(result, expected) + + s = Series(["a", "a", "b", "c", "d"], name="str_data") + result = s.describe() + expected = Series( + [5, 4, "a", 2], name="str_data", index=["count", "unique", "top", "freq"] + ) + tm.assert_series_equal(result, expected) + + def test_describe_empty_object(self): + # https://github.com/pandas-dev/pandas/issues/27183 + s = Series([None, None], dtype=object) + result = s.describe() + expected = Series( + [0, 0, np.nan, np.nan], + dtype=object, + index=["count", "unique", "top", "freq"], + ) + tm.assert_series_equal(result, expected) + + result = s[:0].describe() + tm.assert_series_equal(result, expected) + # ensure NaN, not None + assert np.isnan(result.iloc[2]) + assert np.isnan(result.iloc[3]) + + def test_describe_with_tz(self, tz_naive_fixture): + # GH 21332 + tz = tz_naive_fixture + name = str(tz_naive_fixture) + start = Timestamp(2018, 1, 1) + end = Timestamp(2018, 1, 5) + s = Series(date_range(start, end, tz=tz), name=name) + result = s.describe() + expected = Series( + [ + 5, + 5, + s.value_counts().index[0], + 1, + start.tz_localize(tz), + end.tz_localize(tz), + ], + name=name, + index=["count", "unique", "top", "freq", "first", "last"], + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_diff.py b/pandas/tests/series/methods/test_diff.py new file mode 100644 index 00000000..033f75e9 --- /dev/null +++ b/pandas/tests/series/methods/test_diff.py @@ -0,0 +1,77 @@ +import numpy as np +import pytest + +from pandas import Series, TimedeltaIndex, date_range +import pandas._testing as tm + + +class TestSeriesDiff: + def test_diff_np(self): + pytest.skip("skipping due to Series no longer being an ndarray") + + # no longer works as the return type of np.diff is now nd.array + s = Series(np.arange(5)) + + r = np.diff(s) + tm.assert_series_equal(Series([np.nan, 0, 0, 0, np.nan]), r) + + def test_diff_int(self): + # int dtype + a = 10000000000000000 + b = a + 1 + s = Series([a, b]) + + result = s.diff() + assert result[1] == 1 + + def test_diff_tz(self): + # Combined datetime diff, normal diff and boolean diff test + ts = tm.makeTimeSeries(name="ts") + ts.diff() + + # neg n + result = ts.diff(-1) + expected = ts - ts.shift(-1) + tm.assert_series_equal(result, expected) + + # 0 + result = ts.diff(0) + expected = ts - ts + tm.assert_series_equal(result, expected) + + # datetime diff (GH#3100) + s = Series(date_range("20130102", periods=5)) + result = s.diff() + expected = s - s.shift(1) + tm.assert_series_equal(result, expected) + + # timedelta diff + result = result - result.shift(1) # previous result + expected = expected.diff() # previously expected + tm.assert_series_equal(result, expected) + + # with tz + s = Series( + date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo" + ) + result = s.diff() + expected = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "input,output,diff", + [([False, True, True, False, False], [np.nan, True, False, True, False], 1)], + ) + def test_diff_bool(self, input, output, diff): + # boolean series (test for fixing #17294) + s = Series(input) + result = s.diff() + expected = Series(output) + tm.assert_series_equal(result, expected) + + def test_diff_object_dtype(self): + # object series + s = Series([False, True, 5.0, np.nan, True, False]) + result = s.diff() + expected = s - s.shift(1) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_drop_duplicates.py b/pandas/tests/series/methods/test_drop_duplicates.py new file mode 100644 index 00000000..2d052505 --- /dev/null +++ b/pandas/tests/series/methods/test_drop_duplicates.py @@ -0,0 +1,141 @@ +import numpy as np +import pytest + +from pandas import Categorical, Series +import pandas._testing as tm + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, False, False, True, True, False])), + ("last", Series([False, True, True, False, False, False, False])), + (False, Series([False, True, True, False, True, True, False])), + ], +) +def test_drop_duplicates(any_numpy_dtype, keep, expected): + tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype)) + + if tc.dtype == "bool": + pytest.skip("tested separately in test_drop_duplicates_bool") + + tm.assert_series_equal(tc.duplicated(keep=keep), expected) + tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(keep=keep, inplace=True) + tm.assert_series_equal(sc, tc[~expected]) + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, True, True])), + ("last", Series([True, True, False, False])), + (False, Series([True, True, True, True])), + ], +) +def test_drop_duplicates_bool(keep, expected): + tc = Series([True, False, True, False]) + + tm.assert_series_equal(tc.duplicated(keep=keep), expected) + tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(keep=keep, inplace=True) + tm.assert_series_equal(sc, tc[~expected]) + + +class TestSeriesDropDuplicates: + @pytest.mark.parametrize( + "dtype", + ["int_", "uint", "float_", "unicode_", "timedelta64[h]", "datetime64[D]"], + ) + def test_drop_duplicates_categorical_non_bool(self, dtype, ordered_fixture): + cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype)) + + # Test case 1 + input1 = np.array([1, 2, 3, 3], dtype=np.dtype(dtype)) + tc1 = Series(Categorical(input1, categories=cat_array, ordered=ordered_fixture)) + if dtype == "datetime64[D]": + # pre-empty flaky xfail, tc1 values are seemingly-random + if not (np.array(tc1) == input1).all(): + pytest.xfail(reason="GH#7996") + + expected = Series([False, False, False, True]) + tm.assert_series_equal(tc1.duplicated(), expected) + tm.assert_series_equal(tc1.drop_duplicates(), tc1[~expected]) + sc = tc1.copy() + sc.drop_duplicates(inplace=True) + tm.assert_series_equal(sc, tc1[~expected]) + + expected = Series([False, False, True, False]) + tm.assert_series_equal(tc1.duplicated(keep="last"), expected) + tm.assert_series_equal(tc1.drop_duplicates(keep="last"), tc1[~expected]) + sc = tc1.copy() + sc.drop_duplicates(keep="last", inplace=True) + tm.assert_series_equal(sc, tc1[~expected]) + + expected = Series([False, False, True, True]) + tm.assert_series_equal(tc1.duplicated(keep=False), expected) + tm.assert_series_equal(tc1.drop_duplicates(keep=False), tc1[~expected]) + sc = tc1.copy() + sc.drop_duplicates(keep=False, inplace=True) + tm.assert_series_equal(sc, tc1[~expected]) + + # Test case 2 + input2 = np.array([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype)) + tc2 = Series(Categorical(input2, categories=cat_array, ordered=ordered_fixture)) + if dtype == "datetime64[D]": + # pre-empty flaky xfail, tc2 values are seemingly-random + if not (np.array(tc2) == input2).all(): + pytest.xfail(reason="GH#7996") + + expected = Series([False, False, False, False, True, True, False]) + tm.assert_series_equal(tc2.duplicated(), expected) + tm.assert_series_equal(tc2.drop_duplicates(), tc2[~expected]) + sc = tc2.copy() + sc.drop_duplicates(inplace=True) + tm.assert_series_equal(sc, tc2[~expected]) + + expected = Series([False, True, True, False, False, False, False]) + tm.assert_series_equal(tc2.duplicated(keep="last"), expected) + tm.assert_series_equal(tc2.drop_duplicates(keep="last"), tc2[~expected]) + sc = tc2.copy() + sc.drop_duplicates(keep="last", inplace=True) + tm.assert_series_equal(sc, tc2[~expected]) + + expected = Series([False, True, True, False, True, True, False]) + tm.assert_series_equal(tc2.duplicated(keep=False), expected) + tm.assert_series_equal(tc2.drop_duplicates(keep=False), tc2[~expected]) + sc = tc2.copy() + sc.drop_duplicates(keep=False, inplace=True) + tm.assert_series_equal(sc, tc2[~expected]) + + def test_drop_duplicates_categorical_bool(self, ordered_fixture): + tc = Series( + Categorical( + [True, False, True, False], + categories=[True, False], + ordered=ordered_fixture, + ) + ) + + expected = Series([False, False, True, True]) + tm.assert_series_equal(tc.duplicated(), expected) + tm.assert_series_equal(tc.drop_duplicates(), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(inplace=True) + tm.assert_series_equal(sc, tc[~expected]) + + expected = Series([True, True, False, False]) + tm.assert_series_equal(tc.duplicated(keep="last"), expected) + tm.assert_series_equal(tc.drop_duplicates(keep="last"), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(keep="last", inplace=True) + tm.assert_series_equal(sc, tc[~expected]) + + expected = Series([True, True, True, True]) + tm.assert_series_equal(tc.duplicated(keep=False), expected) + tm.assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected]) + sc = tc.copy() + sc.drop_duplicates(keep=False, inplace=True) + tm.assert_series_equal(sc, tc[~expected]) diff --git a/pandas/tests/series/methods/test_duplicated.py b/pandas/tests/series/methods/test_duplicated.py new file mode 100644 index 00000000..5cc29791 --- /dev/null +++ b/pandas/tests/series/methods/test_duplicated.py @@ -0,0 +1,35 @@ +import numpy as np +import pytest + +from pandas import Series +import pandas._testing as tm + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, True, False, True], name="name")), + ("last", Series([True, True, False, False, False], name="name")), + (False, Series([True, True, True, False, True], name="name")), + ], +) +def test_duplicated_keep(keep, expected): + ser = Series(["a", "b", "b", "c", "a"], name="name") + + result = ser.duplicated(keep=keep) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "keep, expected", + [ + ("first", Series([False, False, True, False, True])), + ("last", Series([True, True, False, False, False])), + (False, Series([True, True, True, False, True])), + ], +) +def test_duplicated_nan_none(keep, expected): + ser = Series([np.nan, 3, 3, None, np.nan], dtype=object) + + result = ser.duplicated(keep=keep) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_explode.py b/pandas/tests/series/methods/test_explode.py new file mode 100644 index 00000000..979199e1 --- /dev/null +++ b/pandas/tests/series/methods/test_explode.py @@ -0,0 +1,121 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_basic(): + s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo") + result = s.explode() + expected = pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo" + ) + tm.assert_series_equal(result, expected) + + +def test_mixed_type(): + s = pd.Series( + [[0, 1, 2], np.nan, None, np.array([]), pd.Series(["a", "b"])], name="foo" + ) + result = s.explode() + expected = pd.Series( + [0, 1, 2, np.nan, None, np.nan, "a", "b"], + index=[0, 0, 0, 1, 2, 3, 4, 4], + dtype=object, + name="foo", + ) + tm.assert_series_equal(result, expected) + + +def test_empty(): + s = pd.Series(dtype=object) + result = s.explode() + expected = s.copy() + tm.assert_series_equal(result, expected) + + +def test_nested_lists(): + s = pd.Series([[[1, 2, 3]], [1, 2], 1]) + result = s.explode() + expected = pd.Series([[1, 2, 3], 1, 2, 1], index=[0, 1, 1, 2]) + tm.assert_series_equal(result, expected) + + +def test_multi_index(): + s = pd.Series( + [[0, 1, 2], np.nan, [], (3, 4)], + name="foo", + index=pd.MultiIndex.from_product([list("ab"), range(2)], names=["foo", "bar"]), + ) + result = s.explode() + index = pd.MultiIndex.from_tuples( + [("a", 0), ("a", 0), ("a", 0), ("a", 1), ("b", 0), ("b", 1), ("b", 1)], + names=["foo", "bar"], + ) + expected = pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=object, name="foo" + ) + tm.assert_series_equal(result, expected) + + +def test_large(): + s = pd.Series([range(256)]).explode() + result = s.explode() + tm.assert_series_equal(result, s) + + +def test_invert_array(): + df = pd.DataFrame({"a": pd.date_range("20190101", periods=3, tz="UTC")}) + + listify = df.apply(lambda x: x.array, axis=1) + result = listify.explode() + tm.assert_series_equal(result, df["a"].rename()) + + +@pytest.mark.parametrize( + "s", [pd.Series([1, 2, 3]), pd.Series(pd.date_range("2019", periods=3, tz="UTC"))] +) +def non_object_dtype(s): + result = s.explode() + tm.assert_series_equal(result, s) + + +def test_typical_usecase(): + + df = pd.DataFrame( + [{"var1": "a,b,c", "var2": 1}, {"var1": "d,e,f", "var2": 2}], + columns=["var1", "var2"], + ) + exploded = df.var1.str.split(",").explode() + exploded + result = df[["var2"]].join(exploded) + expected = pd.DataFrame( + {"var2": [1, 1, 1, 2, 2, 2], "var1": list("abcdef")}, + columns=["var2", "var1"], + index=[0, 0, 0, 1, 1, 1], + ) + tm.assert_frame_equal(result, expected) + + +def test_nested_EA(): + # a nested EA array + s = pd.Series( + [ + pd.date_range("20170101", periods=3, tz="UTC"), + pd.date_range("20170104", periods=3, tz="UTC"), + ] + ) + result = s.explode() + expected = pd.Series( + pd.date_range("20170101", periods=6, tz="UTC"), index=[0, 0, 0, 1, 1, 1] + ) + tm.assert_series_equal(result, expected) + + +def test_duplicate_index(): + # GH 28005 + s = pd.Series([[1, 2], [3, 4]], index=[0, 0]) + result = s.explode() + expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py new file mode 100644 index 00000000..ca93e989 --- /dev/null +++ b/pandas/tests/series/methods/test_isin.py @@ -0,0 +1,82 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Series, date_range +import pandas._testing as tm + + +class TestSeriesIsIn: + def test_isin(self): + s = Series(["A", "B", "C", "a", "B", "B", "A", "C"]) + + result = s.isin(["A", "C"]) + expected = Series([True, False, True, False, False, False, True, True]) + tm.assert_series_equal(result, expected) + + # GH#16012 + # This specific issue has to have a series over 1e6 in len, but the + # comparison array (in_list) must be large enough so that numpy doesn't + # do a manual masking trick that will avoid this issue altogether + s = Series(list("abcdefghijk" * 10 ** 5)) + # If numpy doesn't do the manual comparison/mask, these + # unorderable mixed types are what cause the exception in numpy + in_list = [-1, "a", "b", "G", "Y", "Z", "E", "K", "E", "S", "I", "R", "R"] * 6 + + assert s.isin(in_list).sum() == 200000 + + def test_isin_with_string_scalar(self): + # GH#4763 + s = Series(["A", "B", "C", "a", "B", "B", "A", "C"]) + msg = ( + r"only list-like objects are allowed to be passed to isin\(\)," + r" you passed a \[str\]" + ) + with pytest.raises(TypeError, match=msg): + s.isin("a") + + s = Series(["aaa", "b", "c"]) + with pytest.raises(TypeError, match=msg): + s.isin("aaa") + + def test_isin_with_i8(self): + # GH#5021 + + expected = Series([True, True, False, False, False]) + expected2 = Series([False, True, False, False, False]) + + # datetime64[ns] + s = Series(date_range("jan-01-2013", "jan-05-2013")) + + result = s.isin(s[0:2]) + tm.assert_series_equal(result, expected) + + result = s.isin(s[0:2].values) + tm.assert_series_equal(result, expected) + + # fails on dtype conversion in the first place + result = s.isin(s[0:2].values.astype("datetime64[D]")) + tm.assert_series_equal(result, expected) + + result = s.isin([s[1]]) + tm.assert_series_equal(result, expected2) + + result = s.isin([np.datetime64(s[1])]) + tm.assert_series_equal(result, expected2) + + result = s.isin(set(s[0:2])) + tm.assert_series_equal(result, expected) + + # timedelta64[ns] + s = Series(pd.to_timedelta(range(5), unit="d")) + result = s.isin(s[0:2]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) + def test_isin_empty(self, empty): + # see GH#16991 + s = Series(["a", "b"]) + expected = Series([False, False]) + + result = s.isin(empty) + tm.assert_series_equal(expected, result) diff --git a/pandas/tests/series/methods/test_nlargest.py b/pandas/tests/series/methods/test_nlargest.py new file mode 100644 index 00000000..a029965c --- /dev/null +++ b/pandas/tests/series/methods/test_nlargest.py @@ -0,0 +1,213 @@ +""" +Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo" +but are implicitly also testing nsmallest_foo. +""" +from itertools import product + +import numpy as np +import pytest + +import pandas as pd +from pandas import Series +import pandas._testing as tm + +main_dtypes = [ + "datetime", + "datetimetz", + "timedelta", + "int8", + "int16", + "int32", + "int64", + "float32", + "float64", + "uint8", + "uint16", + "uint32", + "uint64", +] + + +@pytest.fixture +def s_main_dtypes(): + """ + A DataFrame with many dtypes + + * datetime + * datetimetz + * timedelta + * [u]int{8,16,32,64} + * float{32,64} + + The columns are the name of the dtype. + """ + df = pd.DataFrame( + { + "datetime": pd.to_datetime(["2003", "2002", "2001", "2002", "2005"]), + "datetimetz": pd.to_datetime( + ["2003", "2002", "2001", "2002", "2005"] + ).tz_localize("US/Eastern"), + "timedelta": pd.to_timedelta(["3d", "2d", "1d", "2d", "5d"]), + } + ) + + for dtype in [ + "int8", + "int16", + "int32", + "int64", + "float32", + "float64", + "uint8", + "uint16", + "uint32", + "uint64", + ]: + df[dtype] = Series([3, 2, 1, 2, 5], dtype=dtype) + + return df + + +@pytest.fixture(params=main_dtypes) +def s_main_dtypes_split(request, s_main_dtypes): + """Each series in s_main_dtypes.""" + return s_main_dtypes[request.param] + + +def assert_check_nselect_boundary(vals, dtype, method): + # helper function for 'test_boundary_{dtype}' tests + ser = Series(vals, dtype=dtype) + result = getattr(ser, method)(3) + expected_idxr = [0, 1, 2] if method == "nsmallest" else [3, 2, 1] + expected = ser.loc[expected_idxr] + tm.assert_series_equal(result, expected) + + +class TestSeriesNLargestNSmallest: + @pytest.mark.parametrize( + "r", + [ + Series([3.0, 2, 1, 2, "5"], dtype="object"), + Series([3.0, 2, 1, 2, 5], dtype="object"), + # not supported on some archs + # Series([3., 2, 1, 2, 5], dtype='complex256'), + Series([3.0, 2, 1, 2, 5], dtype="complex128"), + Series(list("abcde")), + Series(list("abcde"), dtype="category"), + ], + ) + def test_nlargest_error(self, r): + dt = r.dtype + msg = "Cannot use method 'n(larg|small)est' with dtype {dt}".format(dt=dt) + args = 2, len(r), 0, -1 + methods = r.nlargest, r.nsmallest + for method, arg in product(methods, args): + with pytest.raises(TypeError, match=msg): + method(arg) + + def test_nsmallest_nlargest(self, s_main_dtypes_split): + # float, int, datetime64 (use i8), timedelts64 (same), + # object that are numbers, object that are strings + ser = s_main_dtypes_split + + tm.assert_series_equal(ser.nsmallest(2), ser.iloc[[2, 1]]) + tm.assert_series_equal(ser.nsmallest(2, keep="last"), ser.iloc[[2, 3]]) + + empty = ser.iloc[0:0] + tm.assert_series_equal(ser.nsmallest(0), empty) + tm.assert_series_equal(ser.nsmallest(-1), empty) + tm.assert_series_equal(ser.nlargest(0), empty) + tm.assert_series_equal(ser.nlargest(-1), empty) + + tm.assert_series_equal(ser.nsmallest(len(ser)), ser.sort_values()) + tm.assert_series_equal(ser.nsmallest(len(ser) + 1), ser.sort_values()) + tm.assert_series_equal(ser.nlargest(len(ser)), ser.iloc[[4, 0, 1, 3, 2]]) + tm.assert_series_equal(ser.nlargest(len(ser) + 1), ser.iloc[[4, 0, 1, 3, 2]]) + + def test_nlargest_misc(self): + + ser = Series([3.0, np.nan, 1, 2, 5]) + tm.assert_series_equal(ser.nlargest(), ser.iloc[[4, 0, 3, 2]]) + tm.assert_series_equal(ser.nsmallest(), ser.iloc[[2, 3, 0, 4]]) + + msg = 'keep must be either "first", "last"' + with pytest.raises(ValueError, match=msg): + ser.nsmallest(keep="invalid") + with pytest.raises(ValueError, match=msg): + ser.nlargest(keep="invalid") + + # GH#15297 + ser = Series([1] * 5, index=[1, 2, 3, 4, 5]) + expected_first = Series([1] * 3, index=[1, 2, 3]) + expected_last = Series([1] * 3, index=[5, 4, 3]) + + result = ser.nsmallest(3) + tm.assert_series_equal(result, expected_first) + + result = ser.nsmallest(3, keep="last") + tm.assert_series_equal(result, expected_last) + + result = ser.nlargest(3) + tm.assert_series_equal(result, expected_first) + + result = ser.nlargest(3, keep="last") + tm.assert_series_equal(result, expected_last) + + @pytest.mark.parametrize("n", range(1, 5)) + def test_nlargest_n(self, n): + + # GH 13412 + ser = Series([1, 4, 3, 2], index=[0, 0, 1, 1]) + result = ser.nlargest(n) + expected = ser.sort_values(ascending=False).head(n) + tm.assert_series_equal(result, expected) + + result = ser.nsmallest(n) + expected = ser.sort_values().head(n) + tm.assert_series_equal(result, expected) + + def test_nlargest_boundary_integer(self, nselect_method, any_int_dtype): + # GH#21426 + dtype_info = np.iinfo(any_int_dtype) + min_val, max_val = dtype_info.min, dtype_info.max + vals = [min_val, min_val + 1, max_val - 1, max_val] + assert_check_nselect_boundary(vals, any_int_dtype, nselect_method) + + def test_nlargest_boundary_float(self, nselect_method, float_dtype): + # GH#21426 + dtype_info = np.finfo(float_dtype) + min_val, max_val = dtype_info.min, dtype_info.max + min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=float_dtype) + vals = [min_val, min_2nd, max_2nd, max_val] + assert_check_nselect_boundary(vals, float_dtype, nselect_method) + + @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"]) + def test_nlargest_boundary_datetimelike(self, nselect_method, dtype): + # GH#21426 + # use int64 bounds and +1 to min_val since true minimum is NaT + # (include min_val/NaT at end to maintain same expected_idxr) + dtype_info = np.iinfo("int64") + min_val, max_val = dtype_info.min, dtype_info.max + vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val] + assert_check_nselect_boundary(vals, dtype, nselect_method) + + def test_nlargest_duplicate_keep_all_ties(self): + # see GH#16818 + ser = Series([10, 9, 8, 7, 7, 7, 7, 6]) + result = ser.nlargest(4, keep="all") + expected = Series([10, 9, 8, 7, 7, 7, 7]) + tm.assert_series_equal(result, expected) + + result = ser.nsmallest(2, keep="all") + expected = Series([6, 7, 7, 7, 7], index=[7, 3, 4, 5, 6]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data,expected", [([True, False], [True]), ([True, False, True, True], [True])] + ) + def test_nlargest_boolean(self, data, expected): + # GH#26154 : ensure True > False + ser = Series(data) + result = ser.nlargest(1) + expected = Series(expected) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_pct_change.py b/pandas/tests/series/methods/test_pct_change.py new file mode 100644 index 00000000..1efb5789 --- /dev/null +++ b/pandas/tests/series/methods/test_pct_change.py @@ -0,0 +1,79 @@ +import numpy as np +import pytest + +from pandas import Series, date_range +import pandas._testing as tm + + +class TestSeriesPctChange: + def test_pct_change(self, datetime_series): + rs = datetime_series.pct_change(fill_method=None) + tm.assert_series_equal(rs, datetime_series / datetime_series.shift(1) - 1) + + rs = datetime_series.pct_change(2) + filled = datetime_series.fillna(method="pad") + tm.assert_series_equal(rs, filled / filled.shift(2) - 1) + + rs = datetime_series.pct_change(fill_method="bfill", limit=1) + filled = datetime_series.fillna(method="bfill", limit=1) + tm.assert_series_equal(rs, filled / filled.shift(1) - 1) + + rs = datetime_series.pct_change(freq="5D") + filled = datetime_series.fillna(method="pad") + tm.assert_series_equal( + rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled) + ) + + def test_pct_change_with_duplicate_axis(self): + # GH#28664 + common_idx = date_range("2019-11-14", periods=5, freq="D") + result = Series(range(5), common_idx).pct_change(freq="B") + + # the reason that the expected should be like this is documented at PR 28681 + expected = Series([np.NaN, np.inf, np.NaN, np.NaN, 3.0], common_idx) + + tm.assert_series_equal(result, expected) + + def test_pct_change_shift_over_nas(self): + s = Series([1.0, 1.5, np.nan, 2.5, 3.0]) + + chg = s.pct_change() + expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2]) + tm.assert_series_equal(chg, expected) + + @pytest.mark.parametrize( + "freq, periods, fill_method, limit", + [ + ("5B", 5, None, None), + ("3B", 3, None, None), + ("3B", 3, "bfill", None), + ("7B", 7, "pad", 1), + ("7B", 7, "bfill", 3), + ("14B", 14, None, None), + ], + ) + def test_pct_change_periods_freq( + self, freq, periods, fill_method, limit, datetime_series + ): + # GH#7292 + rs_freq = datetime_series.pct_change( + freq=freq, fill_method=fill_method, limit=limit + ) + rs_periods = datetime_series.pct_change( + periods, fill_method=fill_method, limit=limit + ) + tm.assert_series_equal(rs_freq, rs_periods) + + empty_ts = Series(index=datetime_series.index, dtype=object) + rs_freq = empty_ts.pct_change(freq=freq, fill_method=fill_method, limit=limit) + rs_periods = empty_ts.pct_change(periods, fill_method=fill_method, limit=limit) + tm.assert_series_equal(rs_freq, rs_periods) + + +@pytest.mark.parametrize("fill_method", ["pad", "ffill", None]) +def test_pct_change_with_duplicated_indices(fill_method): + # GH30463 + s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3) + result = s.pct_change(fill_method=fill_method) + expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_quantile.py b/pandas/tests/series/methods/test_quantile.py new file mode 100644 index 00000000..79f50afc --- /dev/null +++ b/pandas/tests/series/methods/test_quantile.py @@ -0,0 +1,216 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer + +import pandas as pd +from pandas import Index, Series +import pandas._testing as tm +from pandas.core.indexes.datetimes import Timestamp + + +class TestSeriesQuantile: + def test_quantile(self, datetime_series): + + q = datetime_series.quantile(0.1) + assert q == np.percentile(datetime_series.dropna(), 10) + + q = datetime_series.quantile(0.9) + assert q == np.percentile(datetime_series.dropna(), 90) + + # object dtype + q = Series(datetime_series, dtype=object).quantile(0.9) + assert q == np.percentile(datetime_series.dropna(), 90) + + # datetime64[ns] dtype + dts = datetime_series.index.to_series() + q = dts.quantile(0.2) + assert q == Timestamp("2000-01-10 19:12:00") + + # timedelta64[ns] dtype + tds = dts.diff() + q = tds.quantile(0.25) + assert q == pd.to_timedelta("24:00:00") + + # GH7661 + result = Series([np.timedelta64("NaT")]).sum() + assert result == pd.Timedelta(0) + + msg = "percentiles should all be in the interval \\[0, 1\\]" + for invalid in [-1, 2, [0.5, -1], [0.5, 2]]: + with pytest.raises(ValueError, match=msg): + datetime_series.quantile(invalid) + + def test_quantile_multi(self, datetime_series): + + qs = [0.1, 0.9] + result = datetime_series.quantile(qs) + expected = pd.Series( + [ + np.percentile(datetime_series.dropna(), 10), + np.percentile(datetime_series.dropna(), 90), + ], + index=qs, + name=datetime_series.name, + ) + tm.assert_series_equal(result, expected) + + dts = datetime_series.index.to_series() + dts.name = "xxx" + result = dts.quantile((0.2, 0.2)) + expected = Series( + [Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")], + index=[0.2, 0.2], + name="xxx", + ) + tm.assert_series_equal(result, expected) + + result = datetime_series.quantile([]) + expected = pd.Series( + [], name=datetime_series.name, index=Index([], dtype=float), dtype="float64" + ) + tm.assert_series_equal(result, expected) + + def test_quantile_interpolation(self, datetime_series): + # see gh-10174 + + # interpolation = linear (default case) + q = datetime_series.quantile(0.1, interpolation="linear") + assert q == np.percentile(datetime_series.dropna(), 10) + q1 = datetime_series.quantile(0.1) + assert q1 == np.percentile(datetime_series.dropna(), 10) + + # test with and without interpolation keyword + assert q == q1 + + def test_quantile_interpolation_dtype(self): + # GH #10174 + + # interpolation = linear (default case) + q = pd.Series([1, 3, 4]).quantile(0.5, interpolation="lower") + assert q == np.percentile(np.array([1, 3, 4]), 50) + assert is_integer(q) + + q = pd.Series([1, 3, 4]).quantile(0.5, interpolation="higher") + assert q == np.percentile(np.array([1, 3, 4]), 50) + assert is_integer(q) + + def test_quantile_nan(self): + + # GH 13098 + s = pd.Series([1, 2, 3, 4, np.nan]) + result = s.quantile(0.5) + expected = 2.5 + assert result == expected + + # all nan/empty + s1 = Series([], dtype=object) + cases = [s1, Series([np.nan, np.nan])] + + for s in cases: + res = s.quantile(0.5) + assert np.isnan(res) + + res = s.quantile([0.5]) + tm.assert_series_equal(res, pd.Series([np.nan], index=[0.5])) + + res = s.quantile([0.2, 0.3]) + tm.assert_series_equal(res, pd.Series([np.nan, np.nan], index=[0.2, 0.3])) + + @pytest.mark.parametrize( + "case", + [ + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + ], + [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timestamp("2011-01-03", tz="US/Eastern"), + ], + [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")], + # NaT + [ + pd.Timestamp("2011-01-01"), + pd.Timestamp("2011-01-02"), + pd.Timestamp("2011-01-03"), + pd.NaT, + ], + [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + pd.Timestamp("2011-01-03", tz="US/Eastern"), + pd.NaT, + ], + [ + pd.Timedelta("1 days"), + pd.Timedelta("2 days"), + pd.Timedelta("3 days"), + pd.NaT, + ], + ], + ) + def test_quantile_box(self, case): + s = pd.Series(case, name="XXX") + res = s.quantile(0.5) + assert res == case[1] + + res = s.quantile([0.5]) + exp = pd.Series([case[1]], index=[0.5], name="XXX") + tm.assert_series_equal(res, exp) + + def test_datetime_timedelta_quantiles(self): + # covers #9694 + assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5)) + assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5)) + + def test_quantile_nat(self): + res = Series([pd.NaT, pd.NaT]).quantile(0.5) + assert res is pd.NaT + + res = Series([pd.NaT, pd.NaT]).quantile([0.5]) + tm.assert_series_equal(res, pd.Series([pd.NaT], index=[0.5])) + + @pytest.mark.parametrize( + "values, dtype", + [([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")], + ) + def test_quantile_sparse(self, values, dtype): + ser = pd.Series(values, dtype=dtype) + result = ser.quantile([0.5]) + expected = pd.Series(np.asarray(ser)).quantile([0.5]) + tm.assert_series_equal(result, expected) + + def test_quantile_empty(self): + + # floats + s = Series([], dtype="float64") + + res = s.quantile(0.5) + assert np.isnan(res) + + res = s.quantile([0.5]) + exp = Series([np.nan], index=[0.5]) + tm.assert_series_equal(res, exp) + + # int + s = Series([], dtype="int64") + + res = s.quantile(0.5) + assert np.isnan(res) + + res = s.quantile([0.5]) + exp = Series([np.nan], index=[0.5]) + tm.assert_series_equal(res, exp) + + # datetime + s = Series([], dtype="datetime64[ns]") + + res = s.quantile(0.5) + assert res is pd.NaT + + res = s.quantile([0.5]) + exp = Series([pd.NaT], index=[0.5]) + tm.assert_series_equal(res, exp) diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py new file mode 100644 index 00000000..3d4688c8 --- /dev/null +++ b/pandas/tests/series/methods/test_rank.py @@ -0,0 +1,565 @@ +from itertools import chain, product + +import numpy as np +import pytest + +from pandas._libs.algos import Infinity, NegInfinity +from pandas._libs.tslib import iNaT +import pandas.util._test_decorators as td + +from pandas import NaT, Series, Timestamp, date_range +import pandas._testing as tm +from pandas.api.types import CategoricalDtype + + +class TestSeriesRank: + s = Series([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3]) + + results = { + "average": np.array([1.5, 5.5, 7.0, 3.5, np.nan, 3.5, 1.5, 8.0, np.nan, 5.5]), + "min": np.array([1, 5, 7, 3, np.nan, 3, 1, 8, np.nan, 5]), + "max": np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6]), + "first": np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6]), + "dense": np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3]), + } + + def test_rank(self, datetime_series): + pytest.importorskip("scipy.stats.special") + rankdata = pytest.importorskip("scipy.stats.rankdata") + + datetime_series[::2] = np.nan + datetime_series[:10][::3] = 4.0 + + ranks = datetime_series.rank() + oranks = datetime_series.astype("O").rank() + + tm.assert_series_equal(ranks, oranks) + + mask = np.isnan(datetime_series) + filled = datetime_series.fillna(np.inf) + + # rankdata returns a ndarray + exp = Series(rankdata(filled), index=filled.index, name="ts") + exp[mask] = np.nan + + tm.assert_series_equal(ranks, exp) + + iseries = Series(np.arange(5).repeat(2)) + + iranks = iseries.rank() + exp = iseries.astype(float).rank() + tm.assert_series_equal(iranks, exp) + iseries = Series(np.arange(5)) + 1.0 + exp = iseries / 5.0 + iranks = iseries.rank(pct=True) + + tm.assert_series_equal(iranks, exp) + + iseries = Series(np.repeat(1, 100)) + exp = Series(np.repeat(0.505, 100)) + iranks = iseries.rank(pct=True) + tm.assert_series_equal(iranks, exp) + + iseries[1] = np.nan + exp = Series(np.repeat(50.0 / 99.0, 100)) + exp[1] = np.nan + iranks = iseries.rank(pct=True) + tm.assert_series_equal(iranks, exp) + + iseries = Series(np.arange(5)) + 1.0 + iseries[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + tm.assert_series_equal(iranks, exp) + + iseries = Series(np.repeat(np.nan, 100)) + exp = iseries.copy() + iranks = iseries.rank(pct=True) + tm.assert_series_equal(iranks, exp) + + iseries = Series(np.arange(5)) + 1 + iseries[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + tm.assert_series_equal(iranks, exp) + + rng = date_range("1/1/1990", periods=5) + iseries = Series(np.arange(5), rng) + 1 + iseries.iloc[4] = np.nan + exp = iseries / 4.0 + iranks = iseries.rank(pct=True) + tm.assert_series_equal(iranks, exp) + + iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1]) + exp = Series([2, 1, 3, 5, 4, 6.0]) + iranks = iseries.rank() + tm.assert_series_equal(iranks, exp) + + # GH 5968 + iseries = Series(["3 day", "1 day 10m", "-2 day", NaT], dtype="m8[ns]") + exp = Series([3, 2, 1, np.nan]) + iranks = iseries.rank() + tm.assert_series_equal(iranks, exp) + + values = np.array( + [-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40], + dtype="float64", + ) + random_order = np.random.permutation(len(values)) + iseries = Series(values[random_order]) + exp = Series(random_order + 1.0, dtype="float64") + iranks = iseries.rank() + tm.assert_series_equal(iranks, exp) + + def test_rank_categorical(self): + # GH issue #15420 rank incorrectly orders ordered categories + + # Test ascending/descending ranking for ordered categoricals + exp = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) + exp_desc = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0]) + ordered = Series( + ["first", "second", "third", "fourth", "fifth", "sixth"] + ).astype( + CategoricalDtype( + categories=["first", "second", "third", "fourth", "fifth", "sixth"], + ordered=True, + ) + ) + tm.assert_series_equal(ordered.rank(), exp) + tm.assert_series_equal(ordered.rank(ascending=False), exp_desc) + + # Unordered categoricals should be ranked as objects + unordered = Series( + ["first", "second", "third", "fourth", "fifth", "sixth"] + ).astype( + CategoricalDtype( + categories=["first", "second", "third", "fourth", "fifth", "sixth"], + ordered=False, + ) + ) + exp_unordered = Series([2.0, 4.0, 6.0, 3.0, 1.0, 5.0]) + res = unordered.rank() + tm.assert_series_equal(res, exp_unordered) + + unordered1 = Series([1, 2, 3, 4, 5, 6]).astype( + CategoricalDtype([1, 2, 3, 4, 5, 6], False) + ) + exp_unordered1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) + res1 = unordered1.rank() + tm.assert_series_equal(res1, exp_unordered1) + + # Test na_option for rank data + na_ser = Series( + ["first", "second", "third", "fourth", "fifth", "sixth", np.NaN] + ).astype( + CategoricalDtype( + ["first", "second", "third", "fourth", "fifth", "sixth", "seventh"], + True, + ) + ) + + exp_top = Series([2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 1.0]) + exp_bot = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) + exp_keep = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.NaN]) + + tm.assert_series_equal(na_ser.rank(na_option="top"), exp_top) + tm.assert_series_equal(na_ser.rank(na_option="bottom"), exp_bot) + tm.assert_series_equal(na_ser.rank(na_option="keep"), exp_keep) + + # Test na_option for rank data with ascending False + exp_top = Series([7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0]) + exp_bot = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 7.0]) + exp_keep = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, np.NaN]) + + tm.assert_series_equal(na_ser.rank(na_option="top", ascending=False), exp_top) + tm.assert_series_equal( + na_ser.rank(na_option="bottom", ascending=False), exp_bot + ) + tm.assert_series_equal(na_ser.rank(na_option="keep", ascending=False), exp_keep) + + # Test invalid values for na_option + msg = "na_option must be one of 'keep', 'top', or 'bottom'" + + with pytest.raises(ValueError, match=msg): + na_ser.rank(na_option="bad", ascending=False) + + # invalid type + with pytest.raises(ValueError, match=msg): + na_ser.rank(na_option=True, ascending=False) + + # Test with pct=True + na_ser = Series(["first", "second", "third", "fourth", np.NaN]).astype( + CategoricalDtype(["first", "second", "third", "fourth"], True) + ) + exp_top = Series([0.4, 0.6, 0.8, 1.0, 0.2]) + exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.0]) + exp_keep = Series([0.25, 0.5, 0.75, 1.0, np.NaN]) + + tm.assert_series_equal(na_ser.rank(na_option="top", pct=True), exp_top) + tm.assert_series_equal(na_ser.rank(na_option="bottom", pct=True), exp_bot) + tm.assert_series_equal(na_ser.rank(na_option="keep", pct=True), exp_keep) + + def test_rank_signature(self): + s = Series([0, 1]) + s.rank(method="average") + msg = ( + "No axis named average for object type " + ) + with pytest.raises(ValueError, match=msg): + s.rank("average") + + @pytest.mark.parametrize( + "contents,dtype", + [ + ( + [ + -np.inf, + -50, + -1, + -1e-20, + -1e-25, + -1e-50, + 0, + 1e-40, + 1e-20, + 1e-10, + 2, + 40, + np.inf, + ], + "float64", + ), + ( + [ + -np.inf, + -50, + -1, + -1e-20, + -1e-25, + -1e-45, + 0, + 1e-40, + 1e-20, + 1e-10, + 2, + 40, + np.inf, + ], + "float32", + ), + ([np.iinfo(np.uint8).min, 1, 2, 100, np.iinfo(np.uint8).max], "uint8"), + pytest.param( + [ + np.iinfo(np.int64).min, + -100, + 0, + 1, + 9999, + 100000, + 1e10, + np.iinfo(np.int64).max, + ], + "int64", + marks=pytest.mark.xfail( + reason="iNaT is equivalent to minimum value of dtype" + "int64 pending issue GH#16674" + ), + ), + ([NegInfinity(), "1", "A", "BA", "Ba", "C", Infinity()], "object"), + ], + ) + def test_rank_inf(self, contents, dtype): + dtype_na_map = { + "float64": np.nan, + "float32": np.nan, + "int64": iNaT, + "object": None, + } + # Insert nans at random positions if underlying dtype has missing + # value. Then adjust the expected order by adding nans accordingly + # This is for testing whether rank calculation is affected + # when values are interwined with nan values. + values = np.array(contents, dtype=dtype) + exp_order = np.array(range(len(values)), dtype="float64") + 1.0 + if dtype in dtype_na_map: + na_value = dtype_na_map[dtype] + nan_indices = np.random.choice(range(len(values)), 5) + values = np.insert(values, nan_indices, na_value) + exp_order = np.insert(exp_order, nan_indices, np.nan) + # shuffle the testing array and expected results in the same way + random_order = np.random.permutation(len(values)) + iseries = Series(values[random_order]) + exp = Series(exp_order[random_order], dtype="float64") + iranks = iseries.rank() + tm.assert_series_equal(iranks, exp) + + def test_rank_tie_methods(self): + s = self.s + + def _check(s, expected, method="average"): + result = s.rank(method=method) + tm.assert_series_equal(result, Series(expected)) + + dtypes = [None, object] + disabled = {(object, "first")} + results = self.results + + for method, dtype in product(results, dtypes): + if (dtype, method) in disabled: + continue + series = s if dtype is None else s.astype(dtype) + _check(series, results[method], method=method) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("ascending", [True, False]) + @pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"]) + @pytest.mark.parametrize("na_option", ["top", "bottom", "keep"]) + def test_rank_tie_methods_on_infs_nans(self, method, na_option, ascending): + dtypes = [ + ("object", None, Infinity(), NegInfinity()), + ("float64", np.nan, np.inf, -np.inf), + ] + chunk = 3 + disabled = {("object", "first")} + + def _check(s, method, na_option, ascending): + exp_ranks = { + "average": ([2, 2, 2], [5, 5, 5], [8, 8, 8]), + "min": ([1, 1, 1], [4, 4, 4], [7, 7, 7]), + "max": ([3, 3, 3], [6, 6, 6], [9, 9, 9]), + "first": ([1, 2, 3], [4, 5, 6], [7, 8, 9]), + "dense": ([1, 1, 1], [2, 2, 2], [3, 3, 3]), + } + ranks = exp_ranks[method] + if na_option == "top": + order = [ranks[1], ranks[0], ranks[2]] + elif na_option == "bottom": + order = [ranks[0], ranks[2], ranks[1]] + else: + order = [ranks[0], [np.nan] * chunk, ranks[1]] + expected = order if ascending else order[::-1] + expected = list(chain.from_iterable(expected)) + result = s.rank(method=method, na_option=na_option, ascending=ascending) + tm.assert_series_equal(result, Series(expected, dtype="float64")) + + for dtype, na_value, pos_inf, neg_inf in dtypes: + in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk + iseries = Series(in_arr, dtype=dtype) + if (dtype, method) in disabled: + continue + _check(iseries, method, na_option, ascending) + + def test_rank_desc_mix_nans_infs(self): + # GH 19538 + # check descending ranking when mix nans and infs + iseries = Series([1, np.nan, np.inf, -np.inf, 25]) + result = iseries.rank(ascending=False) + exp = Series([3, np.nan, 1, 4, 2], dtype="float64") + tm.assert_series_equal(result, exp) + + def test_rank_methods_series(self): + pytest.importorskip("scipy.stats.special") + rankdata = pytest.importorskip("scipy.stats.rankdata") + + xs = np.random.randn(9) + xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates + np.random.shuffle(xs) + + index = [chr(ord("a") + i) for i in range(len(xs))] + + for vals in [xs, xs + 1e6, xs * 1e-6]: + ts = Series(vals, index=index) + + for m in ["average", "min", "max", "first", "dense"]: + result = ts.rank(method=m) + sprank = rankdata(vals, m if m != "first" else "ordinal") + expected = Series(sprank, index=index).astype("float64") + tm.assert_series_equal(result, expected) + + def test_rank_dense_method(self): + dtypes = ["O", "f8", "i8"] + in_out = [ + ([1], [1]), + ([2], [1]), + ([0], [1]), + ([2, 2], [1, 1]), + ([1, 2, 3], [1, 2, 3]), + ([4, 2, 1], [3, 2, 1]), + ([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]), + ([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5]), + ] + + for ser, exp in in_out: + for dtype in dtypes: + s = Series(ser).astype(dtype) + result = s.rank(method="dense") + expected = Series(exp).astype(result.dtype) + tm.assert_series_equal(result, expected) + + def test_rank_descending(self): + dtypes = ["O", "f8", "i8"] + + for dtype, method in product(dtypes, self.results): + if "i" in dtype: + s = self.s.dropna() + else: + s = self.s.astype(dtype) + + res = s.rank(ascending=False) + expected = (s.max() - s).rank() + tm.assert_series_equal(res, expected) + + if method == "first" and dtype == "O": + continue + + expected = (s.max() - s).rank(method=method) + res2 = s.rank(method=method, ascending=False) + tm.assert_series_equal(res2, expected) + + def test_rank_int(self): + s = self.s.dropna().astype("i8") + + for method, res in self.results.items(): + result = s.rank(method=method) + expected = Series(res).dropna() + expected.index = result.index + tm.assert_series_equal(result, expected) + + def test_rank_object_bug(self): + # GH 13445 + + # smoke tests + Series([np.nan] * 32).astype(object).rank(ascending=True) + Series([np.nan] * 32).astype(object).rank(ascending=False) + + def test_rank_modify_inplace(self): + # GH 18521 + # Check rank does not mutate series + s = Series([Timestamp("2017-01-05 10:20:27.569000"), NaT]) + expected = s.copy() + + s.rank() + result = s + tm.assert_series_equal(result, expected) + + +# GH15630, pct should be on 100% basis when method='dense' + + +@pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) +@pytest.mark.parametrize( + "ser, exp", + [ + ([1], [1.0]), + ([1, 2], [1.0 / 2, 2.0 / 2]), + ([2, 2], [1.0, 1.0]), + ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), + ([1, 2, 2], [1.0 / 2, 2.0 / 2, 2.0 / 2]), + ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), + ([1, 1, 5, 5, 3], [1.0 / 3, 1.0 / 3, 3.0 / 3, 3.0 / 3, 2.0 / 3]), + ([1, 1, 3, 3, 5, 5], [1.0 / 3, 1.0 / 3, 2.0 / 3, 2.0 / 3, 3.0 / 3, 3.0 / 3]), + ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), + ], +) +def test_rank_dense_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method="dense", pct=True) + expected = Series(exp).astype(result.dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) +@pytest.mark.parametrize( + "ser, exp", + [ + ([1], [1.0]), + ([1, 2], [1.0 / 2, 2.0 / 2]), + ([2, 2], [1.0 / 2, 1.0 / 2]), + ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), + ([1, 2, 2], [1.0 / 3, 2.0 / 3, 2.0 / 3]), + ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), + ([1, 1, 5, 5, 3], [1.0 / 5, 1.0 / 5, 4.0 / 5, 4.0 / 5, 3.0 / 5]), + ([1, 1, 3, 3, 5, 5], [1.0 / 6, 1.0 / 6, 3.0 / 6, 3.0 / 6, 5.0 / 6, 5.0 / 6]), + ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), + ], +) +def test_rank_min_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method="min", pct=True) + expected = Series(exp).astype(result.dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) +@pytest.mark.parametrize( + "ser, exp", + [ + ([1], [1.0]), + ([1, 2], [1.0 / 2, 2.0 / 2]), + ([2, 2], [1.0, 1.0]), + ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), + ([1, 2, 2], [1.0 / 3, 3.0 / 3, 3.0 / 3]), + ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), + ([1, 1, 5, 5, 3], [2.0 / 5, 2.0 / 5, 5.0 / 5, 5.0 / 5, 3.0 / 5]), + ([1, 1, 3, 3, 5, 5], [2.0 / 6, 2.0 / 6, 4.0 / 6, 4.0 / 6, 6.0 / 6, 6.0 / 6]), + ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), + ], +) +def test_rank_max_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method="max", pct=True) + expected = Series(exp).astype(result.dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["O", "f8", "i8"]) +@pytest.mark.parametrize( + "ser, exp", + [ + ([1], [1.0]), + ([1, 2], [1.0 / 2, 2.0 / 2]), + ([2, 2], [1.5 / 2, 1.5 / 2]), + ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), + ([1, 2, 2], [1.0 / 3, 2.5 / 3, 2.5 / 3]), + ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), + ([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3.0 / 5]), + ([1, 1, 3, 3, 5, 5], [1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]), + ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), + ], +) +def test_rank_average_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method="average", pct=True) + expected = Series(exp).astype(result.dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["f8", "i8"]) +@pytest.mark.parametrize( + "ser, exp", + [ + ([1], [1.0]), + ([1, 2], [1.0 / 2, 2.0 / 2]), + ([2, 2], [1.0 / 2, 2.0 / 2.0]), + ([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]), + ([1, 2, 2], [1.0 / 3, 2.0 / 3, 3.0 / 3]), + ([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]), + ([1, 1, 5, 5, 3], [1.0 / 5, 2.0 / 5, 4.0 / 5, 5.0 / 5, 3.0 / 5]), + ([1, 1, 3, 3, 5, 5], [1.0 / 6, 2.0 / 6, 3.0 / 6, 4.0 / 6, 5.0 / 6, 6.0 / 6]), + ([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]), + ], +) +def test_rank_first_pct(dtype, ser, exp): + s = Series(ser).astype(dtype) + result = s.rank(method="first", pct=True) + expected = Series(exp).astype(result.dtype) + tm.assert_series_equal(result, expected) + + +@pytest.mark.single +@pytest.mark.high_memory +def test_pct_max_many_rows(): + # GH 18271 + s = Series(np.arange(2 ** 24 + 1)) + result = s.rank(pct=True).max() + assert result == 1 diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py new file mode 100644 index 00000000..e5ccf166 --- /dev/null +++ b/pandas/tests/series/methods/test_replace.py @@ -0,0 +1,369 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestSeriesReplace: + def test_replace(self, datetime_series): + N = 100 + ser = pd.Series(np.random.randn(N)) + ser[0:4] = np.nan + ser[6:10] = 0 + + # replace list with a single value + ser.replace([np.nan], -1, inplace=True) + + exp = ser.fillna(-1) + tm.assert_series_equal(ser, exp) + + rs = ser.replace(0.0, np.nan) + ser[ser == 0.0] = np.nan + tm.assert_series_equal(rs, ser) + + ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) + ser[:5] = np.nan + ser[6:10] = "foo" + ser[20:30] = "bar" + + # replace list with a single value + rs = ser.replace([np.nan, "foo", "bar"], -1) + + assert (rs[:5] == -1).all() + assert (rs[6:10] == -1).all() + assert (rs[20:30] == -1).all() + assert (pd.isna(ser[:5])).all() + + # replace with different values + rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3}) + + assert (rs[:5] == -1).all() + assert (rs[6:10] == -2).all() + assert (rs[20:30] == -3).all() + assert (pd.isna(ser[:5])).all() + + # replace with different values with 2 lists + rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3]) + tm.assert_series_equal(rs, rs2) + + # replace inplace + ser.replace([np.nan, "foo", "bar"], -1, inplace=True) + + assert (ser[:5] == -1).all() + assert (ser[6:10] == -1).all() + assert (ser[20:30] == -1).all() + + ser = pd.Series([np.nan, 0, np.inf]) + tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) + + ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT]) + tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) + filled = ser.copy() + filled[4] = 0 + tm.assert_series_equal(ser.replace(np.inf, 0), filled) + + ser = pd.Series(datetime_series.index) + tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0)) + + # malformed + msg = r"Replacement lists must match in length\. Expecting 3 got 2" + with pytest.raises(ValueError, match=msg): + ser.replace([1, 2, 3], [np.nan, 0]) + + # make sure that we aren't just masking a TypeError because bools don't + # implement indexing + with pytest.raises(TypeError, match="Cannot compare types .+"): + ser.replace([1, 2], [np.nan, 0]) + + ser = pd.Series([0, 1, 2, 3, 4]) + result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) + tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0])) + + def test_replace_gh5319(self): + # API change from 0.12? + # GH 5319 + ser = pd.Series([0, np.nan, 2, 3, 4]) + expected = ser.ffill() + result = ser.replace([np.nan]) + tm.assert_series_equal(result, expected) + + ser = pd.Series([0, np.nan, 2, 3, 4]) + expected = ser.ffill() + result = ser.replace(np.nan) + tm.assert_series_equal(result, expected) + # GH 5797 + ser = pd.Series(pd.date_range("20130101", periods=5)) + expected = ser.copy() + expected.loc[2] = pd.Timestamp("20120101") + result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")}) + tm.assert_series_equal(result, expected) + result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101")) + tm.assert_series_equal(result, expected) + + # GH 11792: Test with replacing NaT in a list with tz data + ts = pd.Timestamp("2015/01/01", tz="UTC") + s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")]) + result = s.replace([np.nan, pd.NaT], pd.Timestamp.min) + expected = pd.Series([pd.Timestamp.min, ts], dtype=object) + tm.assert_series_equal(expected, result) + + def test_replace_with_single_list(self): + ser = pd.Series([0, 1, 2, 3, 4]) + result = ser.replace([1, 2, 3]) + tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4])) + + s = ser.copy() + s.replace([1, 2, 3], inplace=True) + tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4])) + + # make sure things don't get corrupted when fillna call fails + s = ser.copy() + msg = ( + r"Invalid fill method\. Expecting pad \(ffill\) or backfill" + r" \(bfill\)\. Got crash_cymbal" + ) + with pytest.raises(ValueError, match=msg): + s.replace([1, 2, 3], inplace=True, method="crash_cymbal") + tm.assert_series_equal(s, ser) + + def test_replace_with_empty_list(self): + # GH 21977 + s = pd.Series([[1], [2, 3], [], np.nan, [4]]) + expected = s + result = s.replace([], np.nan) + tm.assert_series_equal(result, expected) + + # GH 19266 + with pytest.raises(ValueError, match="cannot assign mismatch"): + s.replace({np.nan: []}) + with pytest.raises(ValueError, match="cannot assign mismatch"): + s.replace({np.nan: ["dummy", "alt"]}) + + def test_replace_mixed_types(self): + s = pd.Series(np.arange(5), dtype="int64") + + def check_replace(to_rep, val, expected): + sc = s.copy() + r = s.replace(to_rep, val) + sc.replace(to_rep, val, inplace=True) + tm.assert_series_equal(expected, r) + tm.assert_series_equal(expected, sc) + + # MUST upcast to float + e = pd.Series([0.0, 1.0, 2.0, 3.0, 4.0]) + tr, v = [3], [3.0] + check_replace(tr, v, e) + + # MUST upcast to float + e = pd.Series([0, 1, 2, 3.5, 4]) + tr, v = [3], [3.5] + check_replace(tr, v, e) + + # casts to object + e = pd.Series([0, 1, 2, 3.5, "a"]) + tr, v = [3, 4], [3.5, "a"] + check_replace(tr, v, e) + + # again casts to object + e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")]) + tr, v = [3, 4], [3.5, pd.Timestamp("20130101")] + check_replace(tr, v, e) + + # casts to object + e = pd.Series([0, 1, 2, 3.5, True], dtype="object") + tr, v = [3, 4], [3.5, True] + check_replace(tr, v, e) + + # test an object with dates + floats + integers + strings + dr = ( + pd.date_range("1/1/2001", "1/10/2001", freq="D") + .to_series() + .reset_index(drop=True) + ) + result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"]) + expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object) + tm.assert_series_equal(result, expected) + + def test_replace_bool_with_string_no_op(self): + s = pd.Series([True, False, True]) + result = s.replace("fun", "in-the-sun") + tm.assert_series_equal(s, result) + + def test_replace_bool_with_string(self): + # nonexistent elements + s = pd.Series([True, False, True]) + result = s.replace(True, "2u") + expected = pd.Series(["2u", False, "2u"]) + tm.assert_series_equal(expected, result) + + def test_replace_bool_with_bool(self): + s = pd.Series([True, False, True]) + result = s.replace(True, False) + expected = pd.Series([False] * len(s)) + tm.assert_series_equal(expected, result) + + def test_replace_with_dict_with_bool_keys(self): + s = pd.Series([True, False, True]) + with pytest.raises(TypeError, match="Cannot compare types .+"): + s.replace({"asdf": "asdb", True: "yes"}) + + def test_replace2(self): + N = 100 + ser = pd.Series(np.fabs(np.random.randn(N)), tm.makeDateIndex(N), dtype=object) + ser[:5] = np.nan + ser[6:10] = "foo" + ser[20:30] = "bar" + + # replace list with a single value + rs = ser.replace([np.nan, "foo", "bar"], -1) + + assert (rs[:5] == -1).all() + assert (rs[6:10] == -1).all() + assert (rs[20:30] == -1).all() + assert (pd.isna(ser[:5])).all() + + # replace with different values + rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3}) + + assert (rs[:5] == -1).all() + assert (rs[6:10] == -2).all() + assert (rs[20:30] == -3).all() + assert (pd.isna(ser[:5])).all() + + # replace with different values with 2 lists + rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3]) + tm.assert_series_equal(rs, rs2) + + # replace inplace + ser.replace([np.nan, "foo", "bar"], -1, inplace=True) + assert (ser[:5] == -1).all() + assert (ser[6:10] == -1).all() + assert (ser[20:30] == -1).all() + + def test_replace_with_empty_dictlike(self): + # GH 15289 + s = pd.Series(list("abcd")) + tm.assert_series_equal(s, s.replace(dict())) + + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + empty_series = pd.Series([]) + tm.assert_series_equal(s, s.replace(empty_series)) + + def test_replace_string_with_number(self): + # GH 15743 + s = pd.Series([1, 2, 3]) + result = s.replace("2", np.nan) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(expected, result) + + def test_replace_replacer_equals_replacement(self): + # GH 20656 + # make sure all replacers are matching against original values + s = pd.Series(["a", "b"]) + expected = pd.Series(["b", "a"]) + result = s.replace({"a": "b", "b": "a"}) + tm.assert_series_equal(expected, result) + + def test_replace_unicode_with_number(self): + # GH 15743 + s = pd.Series([1, 2, 3]) + result = s.replace("2", np.nan) + expected = pd.Series([1, 2, 3]) + tm.assert_series_equal(expected, result) + + def test_replace_mixed_types_with_string(self): + # Testing mixed + s = pd.Series([1, 2, 3, "4", 4, 5]) + result = s.replace([2, "4"], np.nan) + expected = pd.Series([1, np.nan, 3, np.nan, 4, 5]) + tm.assert_series_equal(expected, result) + + @pytest.mark.parametrize( + "categorical, numeric", + [ + (pd.Categorical("A", categories=["A", "B"]), [1]), + (pd.Categorical(("A",), categories=["A", "B"]), [1]), + (pd.Categorical(("A", "B"), categories=["A", "B"]), [1, 2]), + ], + ) + def test_replace_categorical(self, categorical, numeric): + # GH 24971 + # Do not check if dtypes are equal due to a known issue that + # Categorical.replace sometimes coerces to object (GH 23305) + s = pd.Series(categorical) + result = s.replace({"A": 1, "B": 2}) + expected = pd.Series(numeric) + tm.assert_series_equal(expected, result, check_dtype=False) + + def test_replace_categorical_single(self): + # GH 26988 + dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific") + s = pd.Series(dti) + c = s.astype("category") + + expected = c.copy() + expected = expected.cat.add_categories("foo") + expected[2] = "foo" + expected = expected.cat.remove_unused_categories() + assert c[2] != "foo" + + result = c.replace(c[2], "foo") + tm.assert_series_equal(expected, result) + assert c[2] != "foo" # ensure non-inplace call does not alter original + + c.replace(c[2], "foo", inplace=True) + tm.assert_series_equal(expected, c) + + first_value = c[0] + c.replace(c[1], c[0], inplace=True) + assert c[0] == c[1] == first_value # test replacing with existing value + + def test_replace_with_no_overflowerror(self): + # GH 25616 + # casts to object without Exception from OverflowError + s = pd.Series([0, 1, 2, 3, 4]) + result = s.replace([3], ["100000000000000000000"]) + expected = pd.Series([0, 1, 2, "100000000000000000000", 4]) + tm.assert_series_equal(result, expected) + + s = pd.Series([0, "100000000000000000000", "100000000000000000001"]) + result = s.replace(["100000000000000000000"], [1]) + expected = pd.Series([0, 1, "100000000000000000001"]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "ser, to_replace, exp", + [ + ([1, 2, 3], {1: 2, 2: 3, 3: 4}, [2, 3, 4]), + (["1", "2", "3"], {"1": "2", "2": "3", "3": "4"}, ["2", "3", "4"]), + ], + ) + def test_replace_commutative(self, ser, to_replace, exp): + # GH 16051 + # DataFrame.replace() overwrites when values are non-numeric + + series = pd.Series(ser) + + expected = pd.Series(exp) + result = series.replace(to_replace) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "ser, exp", [([1, 2, 3], [1, True, 3]), (["x", 2, 3], ["x", True, 3])] + ) + def test_replace_no_cast(self, ser, exp): + # GH 9113 + # BUG: replace int64 dtype with bool coerces to int64 + + series = pd.Series(ser) + result = series.replace(2, True) + expected = pd.Series(exp) + + tm.assert_series_equal(result, expected) + + def test_replace_extension_other(self): + # https://github.com/pandas-dev/pandas/issues/34530 + ser = pd.Series(pd.array([1, 2, 3], dtype="Int64")) + ser.replace("", "") # no exception diff --git a/pandas/tests/series/methods/test_round.py b/pandas/tests/series/methods/test_round.py new file mode 100644 index 00000000..7f0711a0 --- /dev/null +++ b/pandas/tests/series/methods/test_round.py @@ -0,0 +1,46 @@ +import numpy as np +import pytest + +from pandas import Series +import pandas._testing as tm + + +class TestSeriesRound: + def test_round(self, datetime_series): + datetime_series.index.name = "index_name" + result = datetime_series.round(2) + expected = Series( + np.round(datetime_series.values, 2), index=datetime_series.index, name="ts" + ) + tm.assert_series_equal(result, expected) + assert result.name == datetime_series.name + + def test_round_numpy(self): + # See GH#12600 + ser = Series([1.53, 1.36, 0.06]) + out = np.round(ser, decimals=0) + expected = Series([2.0, 1.0, 0.0]) + tm.assert_series_equal(out, expected) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.round(ser, decimals=0, out=ser) + + def test_round_numpy_with_nan(self): + # See GH#14197 + ser = Series([1.53, np.nan, 0.06]) + with tm.assert_produces_warning(None): + result = ser.round() + expected = Series([2.0, np.nan, 0.0]) + tm.assert_series_equal(result, expected) + + def test_round_builtin(self): + ser = Series([1.123, 2.123, 3.123], index=range(3)) + result = round(ser) + expected_rounded0 = Series([1.0, 2.0, 3.0], index=range(3)) + tm.assert_series_equal(result, expected_rounded0) + + decimals = 2 + expected_rounded = Series([1.12, 2.12, 3.12], index=range(3)) + result = round(ser, decimals) + tm.assert_series_equal(result, expected_rounded) diff --git a/pandas/tests/series/methods/test_searchsorted.py b/pandas/tests/series/methods/test_searchsorted.py new file mode 100644 index 00000000..fd6c6f74 --- /dev/null +++ b/pandas/tests/series/methods/test_searchsorted.py @@ -0,0 +1,55 @@ +import numpy as np + +from pandas import Series, Timestamp, date_range +import pandas._testing as tm +from pandas.api.types import is_scalar + + +class TestSeriesSearchSorted: + def test_searchsorted(self): + ser = Series([1, 2, 3]) + + result = ser.searchsorted(1, side="left") + assert is_scalar(result) + assert result == 0 + + result = ser.searchsorted(1, side="right") + assert is_scalar(result) + assert result == 1 + + def test_searchsorted_numeric_dtypes_scalar(self): + ser = Series([1, 2, 90, 1000, 3e9]) + res = ser.searchsorted(30) + assert is_scalar(res) + assert res == 2 + + res = ser.searchsorted([30]) + exp = np.array([2], dtype=np.intp) + tm.assert_numpy_array_equal(res, exp) + + def test_searchsorted_numeric_dtypes_vector(self): + ser = Series([1, 2, 90, 1000, 3e9]) + res = ser.searchsorted([91, 2e6]) + exp = np.array([3, 4], dtype=np.intp) + tm.assert_numpy_array_equal(res, exp) + + def test_searchsorted_datetime64_scalar(self): + ser = Series(date_range("20120101", periods=10, freq="2D")) + val = Timestamp("20120102") + res = ser.searchsorted(val) + assert is_scalar(res) + assert res == 1 + + def test_searchsorted_datetime64_list(self): + ser = Series(date_range("20120101", periods=10, freq="2D")) + vals = [Timestamp("20120102"), Timestamp("20120104")] + res = ser.searchsorted(vals) + exp = np.array([1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(res, exp) + + def test_searchsorted_sorter(self): + # GH8490 + ser = Series([3, 1, 2]) + res = ser.searchsorted([0, 3], sorter=np.argsort(ser)) + exp = np.array([0, 2], dtype=np.intp) + tm.assert_numpy_array_equal(res, exp) diff --git a/pandas/tests/series/methods/test_shift.py b/pandas/tests/series/methods/test_shift.py new file mode 100644 index 00000000..e8d7f595 --- /dev/null +++ b/pandas/tests/series/methods/test_shift.py @@ -0,0 +1,275 @@ +import numpy as np +import pytest + +from pandas.errors import NullFrequencyError + +import pandas as pd +from pandas import ( + DatetimeIndex, + Index, + NaT, + Series, + TimedeltaIndex, + date_range, + offsets, +) +import pandas._testing as tm + +from pandas.tseries.offsets import BDay + + +class TestShift: + def test_shift(self, datetime_series): + shifted = datetime_series.shift(1) + unshifted = shifted.shift(-1) + + tm.assert_index_equal(shifted.index, datetime_series.index) + tm.assert_index_equal(unshifted.index, datetime_series.index) + tm.assert_numpy_array_equal( + unshifted.dropna().values, datetime_series.values[:-1] + ) + + offset = BDay() + shifted = datetime_series.shift(1, freq=offset) + unshifted = shifted.shift(-1, freq=offset) + + tm.assert_series_equal(unshifted, datetime_series) + + unshifted = datetime_series.shift(0, freq=offset) + tm.assert_series_equal(unshifted, datetime_series) + + shifted = datetime_series.shift(1, freq="B") + unshifted = shifted.shift(-1, freq="B") + + tm.assert_series_equal(unshifted, datetime_series) + + # corner case + unshifted = datetime_series.shift(0) + tm.assert_series_equal(unshifted, datetime_series) + + # Shifting with PeriodIndex + ps = tm.makePeriodSeries() + shifted = ps.shift(1) + unshifted = shifted.shift(-1) + tm.assert_index_equal(shifted.index, ps.index) + tm.assert_index_equal(unshifted.index, ps.index) + tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1]) + + shifted2 = ps.shift(1, "B") + shifted3 = ps.shift(1, BDay()) + tm.assert_series_equal(shifted2, shifted3) + tm.assert_series_equal(ps, shifted2.shift(-1, "B")) + + msg = "Given freq D does not match PeriodIndex freq B" + with pytest.raises(ValueError, match=msg): + ps.shift(freq="D") + + # legacy support + shifted4 = ps.shift(1, freq="B") + tm.assert_series_equal(shifted2, shifted4) + + shifted5 = ps.shift(1, freq=BDay()) + tm.assert_series_equal(shifted5, shifted4) + + # 32-bit taking + # GH#8129 + index = date_range("2000-01-01", periods=5) + for dtype in ["int32", "int64"]: + s1 = Series(np.arange(5, dtype=dtype), index=index) + p = s1.iloc[1] + result = s1.shift(periods=p) + expected = Series([np.nan, 0, 1, 2, 3], index=index) + tm.assert_series_equal(result, expected) + + # GH#8260 + # with tz + s = Series( + date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo" + ) + result = s - s.shift() + + exp = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo") + tm.assert_series_equal(result, exp) + + # incompat tz + s2 = Series(date_range("2000-01-01 09:00:00", periods=5, tz="CET"), name="foo") + msg = "DatetimeArray subtraction must have the same timezones or no timezones" + with pytest.raises(TypeError, match=msg): + s - s2 + + def test_shift2(self): + ts = Series( + np.random.randn(5), index=date_range("1/1/2000", periods=5, freq="H") + ) + + result = ts.shift(1, freq="5T") + exp_index = ts.index.shift(1, freq="5T") + tm.assert_index_equal(result.index, exp_index) + + # GH#1063, multiple of same base + result = ts.shift(1, freq="4H") + exp_index = ts.index + offsets.Hour(4) + tm.assert_index_equal(result.index, exp_index) + + idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"]) + msg = "Cannot shift with no freq" + with pytest.raises(NullFrequencyError, match=msg): + idx.shift(1) + + def test_shift_fill_value(self): + # GH#24128 + ts = Series( + [1.0, 2.0, 3.0, 4.0, 5.0], index=date_range("1/1/2000", periods=5, freq="H") + ) + + exp = Series( + [0.0, 1.0, 2.0, 3.0, 4.0], index=date_range("1/1/2000", periods=5, freq="H") + ) + # check that fill value works + result = ts.shift(1, fill_value=0.0) + tm.assert_series_equal(result, exp) + + exp = Series( + [0.0, 0.0, 1.0, 2.0, 3.0], index=date_range("1/1/2000", periods=5, freq="H") + ) + result = ts.shift(2, fill_value=0.0) + tm.assert_series_equal(result, exp) + + ts = pd.Series([1, 2, 3]) + res = ts.shift(2, fill_value=0) + assert res.dtype == ts.dtype + + def test_shift_categorical_fill_value(self): + ts = pd.Series(["a", "b", "c", "d"], dtype="category") + res = ts.shift(1, fill_value="a") + expected = pd.Series( + pd.Categorical( + ["a", "a", "b", "c"], categories=["a", "b", "c", "d"], ordered=False + ) + ) + tm.assert_equal(res, expected) + + # check for incorrect fill_value + msg = "'fill_value=f' is not present in this Categorical's categories" + with pytest.raises(ValueError, match=msg): + ts.shift(1, fill_value="f") + + def test_shift_dst(self): + # GH#13926 + dates = date_range("2016-11-06", freq="H", periods=10, tz="US/Eastern") + s = Series(dates) + + res = s.shift(0) + tm.assert_series_equal(res, s) + assert res.dtype == "datetime64[ns, US/Eastern]" + + res = s.shift(1) + exp_vals = [NaT] + dates.astype(object).values.tolist()[:9] + exp = Series(exp_vals) + tm.assert_series_equal(res, exp) + assert res.dtype == "datetime64[ns, US/Eastern]" + + res = s.shift(-2) + exp_vals = dates.astype(object).values.tolist()[2:] + [NaT, NaT] + exp = Series(exp_vals) + tm.assert_series_equal(res, exp) + assert res.dtype == "datetime64[ns, US/Eastern]" + + for ex in [10, -10, 20, -20]: + res = s.shift(ex) + exp = Series([NaT] * 10, dtype="datetime64[ns, US/Eastern]") + tm.assert_series_equal(res, exp) + assert res.dtype == "datetime64[ns, US/Eastern]" + + def test_tshift(self, datetime_series): + # PeriodIndex + ps = tm.makePeriodSeries() + shifted = ps.tshift(1) + unshifted = shifted.tshift(-1) + + tm.assert_series_equal(unshifted, ps) + + shifted2 = ps.tshift(freq="B") + tm.assert_series_equal(shifted, shifted2) + + shifted3 = ps.tshift(freq=BDay()) + tm.assert_series_equal(shifted, shifted3) + + msg = "Given freq M does not match PeriodIndex freq B" + with pytest.raises(ValueError, match=msg): + ps.tshift(freq="M") + + # DatetimeIndex + shifted = datetime_series.tshift(1) + unshifted = shifted.tshift(-1) + + tm.assert_series_equal(datetime_series, unshifted) + + shifted2 = datetime_series.tshift(freq=datetime_series.index.freq) + tm.assert_series_equal(shifted, shifted2) + + inferred_ts = Series( + datetime_series.values, Index(np.asarray(datetime_series.index)), name="ts" + ) + shifted = inferred_ts.tshift(1) + unshifted = shifted.tshift(-1) + tm.assert_series_equal(shifted, datetime_series.tshift(1)) + tm.assert_series_equal(unshifted, inferred_ts) + + no_freq = datetime_series[[0, 5, 7]] + msg = "Freq was not given and was not set in the index" + with pytest.raises(ValueError, match=msg): + no_freq.tshift() + + def test_shift_int(self, datetime_series): + ts = datetime_series.astype(int) + shifted = ts.shift(1) + expected = ts.astype(float).shift(1) + tm.assert_series_equal(shifted, expected) + + def test_shift_object_non_scalar_fill(self): + # shift requires scalar fill_value except for object dtype + ser = Series(range(3)) + with pytest.raises(ValueError, match="fill_value must be a scalar"): + ser.shift(1, fill_value=[]) + + df = ser.to_frame() + with pytest.raises(ValueError, match="fill_value must be a scalar"): + df.shift(1, fill_value=np.arange(3)) + + obj_ser = ser.astype(object) + result = obj_ser.shift(1, fill_value={}) + assert result[0] == {} + + obj_df = obj_ser.to_frame() + result = obj_df.shift(1, fill_value={}) + assert result.iloc[0, 0] == {} + + def test_shift_categorical(self): + # GH#9416 + s = pd.Series(["a", "b", "c", "d"], dtype="category") + + tm.assert_series_equal(s.iloc[:-1], s.shift(1).shift(-1).dropna()) + + sp1 = s.shift(1) + tm.assert_index_equal(s.index, sp1.index) + assert np.all(sp1.values.codes[:1] == -1) + assert np.all(s.values.codes[:-1] == sp1.values.codes[1:]) + + sn2 = s.shift(-2) + tm.assert_index_equal(s.index, sn2.index) + assert np.all(sn2.values.codes[-2:] == -1) + assert np.all(s.values.codes[2:] == sn2.values.codes[:-2]) + + tm.assert_index_equal(s.values.categories, sp1.values.categories) + tm.assert_index_equal(s.values.categories, sn2.values.categories) + + def test_shift_dt64values_int_fill_deprecated(self): + # GH#31971 + ser = pd.Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]) + + with tm.assert_produces_warning(FutureWarning): + result = ser.shift(1, fill_value=0) + + expected = pd.Series([pd.Timestamp(0), ser[0]]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_sort_index.py b/pandas/tests/series/methods/test_sort_index.py new file mode 100644 index 00000000..6fa4eeae --- /dev/null +++ b/pandas/tests/series/methods/test_sort_index.py @@ -0,0 +1,168 @@ +import random + +import numpy as np +import pytest + +from pandas import IntervalIndex, MultiIndex, Series +import pandas._testing as tm + + +class TestSeriesSortIndex: + def test_sort_index(self, datetime_series): + rindex = list(datetime_series.index) + random.shuffle(rindex) + + random_order = datetime_series.reindex(rindex) + sorted_series = random_order.sort_index() + tm.assert_series_equal(sorted_series, datetime_series) + + # descending + sorted_series = random_order.sort_index(ascending=False) + tm.assert_series_equal( + sorted_series, datetime_series.reindex(datetime_series.index[::-1]) + ) + + # compat on level + sorted_series = random_order.sort_index(level=0) + tm.assert_series_equal(sorted_series, datetime_series) + + # compat on axis + sorted_series = random_order.sort_index(axis=0) + tm.assert_series_equal(sorted_series, datetime_series) + + msg = "No axis named 1 for object type " + with pytest.raises(ValueError, match=msg): + random_order.sort_values(axis=1) + + sorted_series = random_order.sort_index(level=0, axis=0) + tm.assert_series_equal(sorted_series, datetime_series) + + with pytest.raises(ValueError, match=msg): + random_order.sort_index(level=0, axis=1) + + def test_sort_index_inplace(self, datetime_series): + + # For GH#11402 + rindex = list(datetime_series.index) + random.shuffle(rindex) + + # descending + random_order = datetime_series.reindex(rindex) + result = random_order.sort_index(ascending=False, inplace=True) + + assert result is None + tm.assert_series_equal( + random_order, datetime_series.reindex(datetime_series.index[::-1]) + ) + + # ascending + random_order = datetime_series.reindex(rindex) + result = random_order.sort_index(ascending=True, inplace=True) + + assert result is None + tm.assert_series_equal(random_order, datetime_series) + + def test_sort_index_level(self): + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + s = Series([1, 2], mi) + backwards = s.iloc[[1, 0]] + + res = s.sort_index(level="A") + tm.assert_series_equal(backwards, res) + + res = s.sort_index(level=["A", "B"]) + tm.assert_series_equal(backwards, res) + + res = s.sort_index(level="A", sort_remaining=False) + tm.assert_series_equal(s, res) + + res = s.sort_index(level=["A", "B"], sort_remaining=False) + tm.assert_series_equal(s, res) + + @pytest.mark.parametrize("level", ["A", 0]) # GH#21052 + def test_sort_index_multiindex(self, level): + + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) + s = Series([1, 2], mi) + backwards = s.iloc[[1, 0]] + + # implicit sort_remaining=True + res = s.sort_index(level=level) + tm.assert_series_equal(backwards, res) + + # GH#13496 + # sort has no effect without remaining lvls + res = s.sort_index(level=level, sort_remaining=False) + tm.assert_series_equal(s, res) + + def test_sort_index_kind(self): + # GH#14444 & GH#13589: Add support for sort algo choosing + series = Series(index=[3, 2, 1, 4, 3], dtype=object) + expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object) + + index_sorted_series = series.sort_index(kind="mergesort") + tm.assert_series_equal(expected_series, index_sorted_series) + + index_sorted_series = series.sort_index(kind="quicksort") + tm.assert_series_equal(expected_series, index_sorted_series) + + index_sorted_series = series.sort_index(kind="heapsort") + tm.assert_series_equal(expected_series, index_sorted_series) + + def test_sort_index_na_position(self): + series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object) + expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object) + + index_sorted_series = series.sort_index(na_position="first") + tm.assert_series_equal(expected_series_first, index_sorted_series) + + expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object) + + index_sorted_series = series.sort_index(na_position="last") + tm.assert_series_equal(expected_series_last, index_sorted_series) + + def test_sort_index_intervals(self): + s = Series( + [np.nan, 1, 2, 3], IntervalIndex.from_arrays([0, 1, 2, 3], [1, 2, 3, 4]) + ) + + result = s.sort_index() + expected = s + tm.assert_series_equal(result, expected) + + result = s.sort_index(ascending=False) + expected = Series( + [3, 2, 1, np.nan], IntervalIndex.from_arrays([3, 2, 1, 0], [4, 3, 2, 1]) + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_list, sorted_list, ascending, ignore_index, output_index", + [ + ([2, 3, 6, 1], [2, 3, 6, 1], True, True, [0, 1, 2, 3]), + ([2, 3, 6, 1], [2, 3, 6, 1], True, False, [0, 1, 2, 3]), + ([2, 3, 6, 1], [1, 6, 3, 2], False, True, [0, 1, 2, 3]), + ([2, 3, 6, 1], [1, 6, 3, 2], False, False, [3, 2, 1, 0]), + ], + ) + def test_sort_index_ignore_index( + self, inplace, original_list, sorted_list, ascending, ignore_index, output_index + ): + # GH 30114 + ser = Series(original_list) + expected = Series(sorted_list, index=output_index) + kwargs = { + "ascending": ascending, + "ignore_index": ignore_index, + "inplace": inplace, + } + + if inplace: + result_ser = ser.copy() + result_ser.sort_index(**kwargs) + else: + result_ser = ser.sort_index(**kwargs) + + tm.assert_series_equal(result_ser, expected) + tm.assert_series_equal(ser, Series(original_list)) diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py new file mode 100644 index 00000000..caa2abd6 --- /dev/null +++ b/pandas/tests/series/methods/test_sort_values.py @@ -0,0 +1,183 @@ +import numpy as np +import pytest + +from pandas import Categorical, DataFrame, Series +import pandas._testing as tm + + +class TestSeriesSortValues: + def test_sort_values(self, datetime_series): + + # check indexes are reordered corresponding with the values + ser = Series([3, 2, 4, 1], ["A", "B", "C", "D"]) + expected = Series([1, 2, 3, 4], ["D", "B", "A", "C"]) + result = ser.sort_values() + tm.assert_series_equal(expected, result) + + ts = datetime_series.copy() + ts[:5] = np.NaN + vals = ts.values + + result = ts.sort_values() + assert np.isnan(result[-5:]).all() + tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:])) + + # na_position + result = ts.sort_values(na_position="first") + assert np.isnan(result[:5]).all() + tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:])) + + # something object-type + ser = Series(["A", "B"], [1, 2]) + # no failure + ser.sort_values() + + # ascending=False + ordered = ts.sort_values(ascending=False) + expected = np.sort(ts.dropna().values)[::-1] + tm.assert_almost_equal(expected, ordered.dropna().values) + ordered = ts.sort_values(ascending=False, na_position="first") + tm.assert_almost_equal(expected, ordered.dropna().values) + + # ascending=[False] should behave the same as ascending=False + ordered = ts.sort_values(ascending=[False]) + expected = ts.sort_values(ascending=False) + tm.assert_series_equal(expected, ordered) + ordered = ts.sort_values(ascending=[False], na_position="first") + expected = ts.sort_values(ascending=False, na_position="first") + tm.assert_series_equal(expected, ordered) + + msg = "ascending must be boolean" + with pytest.raises(ValueError, match=msg): + ts.sort_values(ascending=None) + msg = r"Length of ascending \(0\) must be 1 for Series" + with pytest.raises(ValueError, match=msg): + ts.sort_values(ascending=[]) + msg = r"Length of ascending \(3\) must be 1 for Series" + with pytest.raises(ValueError, match=msg): + ts.sort_values(ascending=[1, 2, 3]) + msg = r"Length of ascending \(2\) must be 1 for Series" + with pytest.raises(ValueError, match=msg): + ts.sort_values(ascending=[False, False]) + msg = "ascending must be boolean" + with pytest.raises(ValueError, match=msg): + ts.sort_values(ascending="foobar") + + # inplace=True + ts = datetime_series.copy() + ts.sort_values(ascending=False, inplace=True) + tm.assert_series_equal(ts, datetime_series.sort_values(ascending=False)) + tm.assert_index_equal( + ts.index, datetime_series.sort_values(ascending=False).index + ) + + # GH#5856/5853 + # Series.sort_values operating on a view + df = DataFrame(np.random.randn(10, 4)) + s = df.iloc[:, 0] + + msg = ( + "This Series is a view of some other array, to sort in-place " + "you must create a copy" + ) + with pytest.raises(ValueError, match=msg): + s.sort_values(inplace=True) + + def test_sort_values_categorical(self): + + c = Categorical(["a", "b", "b", "a"], ordered=False) + cat = Series(c.copy()) + + # sort in the categories order + expected = Series( + Categorical(["a", "a", "b", "b"], ordered=False), index=[0, 3, 1, 2] + ) + result = cat.sort_values() + tm.assert_series_equal(result, expected) + + cat = Series(Categorical(["a", "c", "b", "d"], ordered=True)) + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=np.object_) + tm.assert_numpy_array_equal(res.__array__(), exp) + + cat = Series( + Categorical( + ["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True + ) + ) + res = cat.sort_values() + exp = np.array(["a", "b", "c", "d"], dtype=np.object_) + tm.assert_numpy_array_equal(res.__array__(), exp) + + res = cat.sort_values(ascending=False) + exp = np.array(["d", "c", "b", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(res.__array__(), exp) + + raw_cat1 = Categorical( + ["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False + ) + raw_cat2 = Categorical( + ["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True + ) + s = ["a", "b", "c", "d"] + df = DataFrame( + {"unsort": raw_cat1, "sort": raw_cat2, "string": s, "values": [1, 2, 3, 4]} + ) + + # Cats must be sorted in a dataframe + res = df.sort_values(by=["string"], ascending=False) + exp = np.array(["d", "c", "b", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp) + assert res["sort"].dtype == "category" + + res = df.sort_values(by=["sort"], ascending=False) + exp = df.sort_values(by=["string"], ascending=True) + tm.assert_series_equal(res["values"], exp["values"]) + assert res["sort"].dtype == "category" + assert res["unsort"].dtype == "category" + + # unordered cat, but we allow this + df.sort_values(by=["unsort"], ascending=False) + + # multi-columns sort + # GH#7848 + df = DataFrame( + {"id": [6, 5, 4, 3, 2, 1], "raw_grade": ["a", "b", "b", "a", "a", "e"]} + ) + df["grade"] = Categorical(df["raw_grade"], ordered=True) + df["grade"] = df["grade"].cat.set_categories(["b", "e", "a"]) + + # sorts 'grade' according to the order of the categories + result = df.sort_values(by=["grade"]) + expected = df.iloc[[1, 2, 5, 0, 3, 4]] + tm.assert_frame_equal(result, expected) + + # multi + result = df.sort_values(by=["grade", "id"]) + expected = df.iloc[[2, 1, 5, 4, 3, 0]] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize( + "original_list, sorted_list, ignore_index, output_index", + [ + ([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]), + ([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]), + ], + ) + def test_sort_values_ignore_index( + self, inplace, original_list, sorted_list, ignore_index, output_index + ): + # GH 30114 + ser = Series(original_list) + expected = Series(sorted_list, index=output_index) + kwargs = {"ignore_index": ignore_index, "inplace": inplace} + + if inplace: + result_ser = ser.copy() + result_ser.sort_values(ascending=False, **kwargs) + else: + result_ser = ser.sort_values(ascending=False, **kwargs) + + tm.assert_series_equal(result_ser, expected) + tm.assert_series_equal(ser, Series(original_list)) diff --git a/pandas/tests/series/methods/test_to_dict.py b/pandas/tests/series/methods/test_to_dict.py new file mode 100644 index 00000000..2fbf3e8d --- /dev/null +++ b/pandas/tests/series/methods/test_to_dict.py @@ -0,0 +1,20 @@ +import collections + +import pytest + +from pandas import Series +import pandas._testing as tm + + +class TestSeriesToDict: + @pytest.mark.parametrize( + "mapping", (dict, collections.defaultdict(list), collections.OrderedDict) + ) + def test_to_dict(self, mapping, datetime_series): + # GH#16122 + tm.assert_series_equal( + Series(datetime_series.to_dict(mapping), name="ts"), datetime_series + ) + from_method = Series(datetime_series.to_dict(collections.Counter)) + from_constructor = Series(collections.Counter(datetime_series.items())) + tm.assert_series_equal(from_method, from_constructor) diff --git a/pandas/tests/series/methods/test_truncate.py b/pandas/tests/series/methods/test_truncate.py new file mode 100644 index 00000000..d4e2890e --- /dev/null +++ b/pandas/tests/series/methods/test_truncate.py @@ -0,0 +1,78 @@ +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + +from pandas.tseries.offsets import BDay + + +class TestTruncate: + def test_truncate(self, datetime_series): + offset = BDay() + + ts = datetime_series[::3] + + start, end = datetime_series.index[3], datetime_series.index[6] + start_missing, end_missing = datetime_series.index[2], datetime_series.index[7] + + # neither specified + truncated = ts.truncate() + tm.assert_series_equal(truncated, ts) + + # both specified + expected = ts[1:3] + + truncated = ts.truncate(start, end) + tm.assert_series_equal(truncated, expected) + + truncated = ts.truncate(start_missing, end_missing) + tm.assert_series_equal(truncated, expected) + + # start specified + expected = ts[1:] + + truncated = ts.truncate(before=start) + tm.assert_series_equal(truncated, expected) + + truncated = ts.truncate(before=start_missing) + tm.assert_series_equal(truncated, expected) + + # end specified + expected = ts[:3] + + truncated = ts.truncate(after=end) + tm.assert_series_equal(truncated, expected) + + truncated = ts.truncate(after=end_missing) + tm.assert_series_equal(truncated, expected) + + # corner case, empty series returned + truncated = ts.truncate(after=datetime_series.index[0] - offset) + assert len(truncated) == 0 + + truncated = ts.truncate(before=datetime_series.index[-1] + offset) + assert len(truncated) == 0 + + msg = "Truncate: 1999-12-31 00:00:00 must be after 2000-02-14 00:00:00" + with pytest.raises(ValueError, match=msg): + ts.truncate( + before=datetime_series.index[-1] + offset, + after=datetime_series.index[0] - offset, + ) + + def test_truncate_nonsortedindex(self): + # GH#17935 + + s = pd.Series(["a", "b", "c", "d", "e"], index=[5, 3, 2, 9, 0]) + msg = "truncate requires a sorted index" + + with pytest.raises(ValueError, match=msg): + s.truncate(before=3, after=9) + + rng = pd.date_range("2011-01-01", "2012-01-01", freq="W") + ts = pd.Series(np.random.randn(len(rng)), index=rng) + msg = "truncate requires a sorted index" + + with pytest.raises(ValueError, match=msg): + ts.sort_values(ascending=False).truncate(before="2011-11", after="2011-12") diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py new file mode 100644 index 00000000..f97362ce --- /dev/null +++ b/pandas/tests/series/methods/test_value_counts.py @@ -0,0 +1,205 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Categorical, CategoricalIndex, Series +import pandas._testing as tm + + +class TestSeriesValueCounts: + def test_value_counts_datetime(self): + # most dtypes are tested in tests/base + values = [ + pd.Timestamp("2011-01-01 09:00"), + pd.Timestamp("2011-01-01 10:00"), + pd.Timestamp("2011-01-01 11:00"), + pd.Timestamp("2011-01-01 09:00"), + pd.Timestamp("2011-01-01 09:00"), + pd.Timestamp("2011-01-01 11:00"), + ] + + exp_idx = pd.DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"] + ) + exp = pd.Series([3, 2, 1], index=exp_idx, name="xxx") + + ser = pd.Series(values, name="xxx") + tm.assert_series_equal(ser.value_counts(), exp) + # check DatetimeIndex outputs the same result + idx = pd.DatetimeIndex(values, name="xxx") + tm.assert_series_equal(idx.value_counts(), exp) + + # normalize + exp = pd.Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + def test_value_counts_datetime_tz(self): + values = [ + pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"), + pd.Timestamp("2011-01-01 10:00", tz="US/Eastern"), + pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"), + pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"), + pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"), + pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"), + ] + + exp_idx = pd.DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"], + tz="US/Eastern", + ) + exp = pd.Series([3, 2, 1], index=exp_idx, name="xxx") + + ser = pd.Series(values, name="xxx") + tm.assert_series_equal(ser.value_counts(), exp) + idx = pd.DatetimeIndex(values, name="xxx") + tm.assert_series_equal(idx.value_counts(), exp) + + exp = pd.Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + def test_value_counts_period(self): + values = [ + pd.Period("2011-01", freq="M"), + pd.Period("2011-02", freq="M"), + pd.Period("2011-03", freq="M"), + pd.Period("2011-01", freq="M"), + pd.Period("2011-01", freq="M"), + pd.Period("2011-03", freq="M"), + ] + + exp_idx = pd.PeriodIndex(["2011-01", "2011-03", "2011-02"], freq="M") + exp = pd.Series([3, 2, 1], index=exp_idx, name="xxx") + + ser = pd.Series(values, name="xxx") + tm.assert_series_equal(ser.value_counts(), exp) + # check DatetimeIndex outputs the same result + idx = pd.PeriodIndex(values, name="xxx") + tm.assert_series_equal(idx.value_counts(), exp) + + # normalize + exp = pd.Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + def test_value_counts_categorical_ordered(self): + # most dtypes are tested in tests/base + values = pd.Categorical([1, 2, 3, 1, 1, 3], ordered=True) + + exp_idx = pd.CategoricalIndex([1, 3, 2], categories=[1, 2, 3], ordered=True) + exp = pd.Series([3, 2, 1], index=exp_idx, name="xxx") + + ser = pd.Series(values, name="xxx") + tm.assert_series_equal(ser.value_counts(), exp) + # check CategoricalIndex outputs the same result + idx = pd.CategoricalIndex(values, name="xxx") + tm.assert_series_equal(idx.value_counts(), exp) + + # normalize + exp = pd.Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + def test_value_counts_categorical_not_ordered(self): + values = pd.Categorical([1, 2, 3, 1, 1, 3], ordered=False) + + exp_idx = pd.CategoricalIndex([1, 3, 2], categories=[1, 2, 3], ordered=False) + exp = pd.Series([3, 2, 1], index=exp_idx, name="xxx") + + ser = pd.Series(values, name="xxx") + tm.assert_series_equal(ser.value_counts(), exp) + # check CategoricalIndex outputs the same result + idx = pd.CategoricalIndex(values, name="xxx") + tm.assert_series_equal(idx.value_counts(), exp) + + # normalize + exp = pd.Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) + + def test_value_counts_categorical(self): + # GH#12835 + cats = Categorical(list("abcccb"), categories=list("cabd")) + ser = Series(cats, name="xxx") + res = ser.value_counts(sort=False) + + exp_index = CategoricalIndex(list("cabd"), categories=cats.categories) + exp = Series([3, 1, 2, 0], name="xxx", index=exp_index) + tm.assert_series_equal(res, exp) + + res = ser.value_counts(sort=True) + + exp_index = CategoricalIndex(list("cbad"), categories=cats.categories) + exp = Series([3, 2, 1, 0], name="xxx", index=exp_index) + tm.assert_series_equal(res, exp) + + # check object dtype handles the Series.name as the same + # (tested in tests/base) + ser = Series(["a", "b", "c", "c", "c", "b"], name="xxx") + res = ser.value_counts() + exp = Series([3, 2, 1], name="xxx", index=["c", "b", "a"]) + tm.assert_series_equal(res, exp) + + def test_value_counts_categorical_with_nan(self): + # see GH#9443 + + # sanity check + ser = Series(["a", "b", "a"], dtype="category") + exp = Series([2, 1], index=CategoricalIndex(["a", "b"])) + + res = ser.value_counts(dropna=True) + tm.assert_series_equal(res, exp) + + res = ser.value_counts(dropna=True) + tm.assert_series_equal(res, exp) + + # same Series via two different constructions --> same behaviour + series = [ + Series(["a", "b", None, "a", None, None], dtype="category"), + Series( + Categorical(["a", "b", None, "a", None, None], categories=["a", "b"]) + ), + ] + + for ser in series: + # None is a NaN value, so we exclude its count here + exp = Series([2, 1], index=CategoricalIndex(["a", "b"])) + res = ser.value_counts(dropna=True) + tm.assert_series_equal(res, exp) + + # we don't exclude the count of None and sort by counts + exp = Series([3, 2, 1], index=CategoricalIndex([np.nan, "a", "b"])) + res = ser.value_counts(dropna=False) + tm.assert_series_equal(res, exp) + + # When we aren't sorting by counts, and np.nan isn't a + # category, it should be last. + exp = Series([2, 1, 3], index=CategoricalIndex(["a", "b", np.nan])) + res = ser.value_counts(dropna=False, sort=False) + tm.assert_series_equal(res, exp) + + @pytest.mark.parametrize( + "ser, dropna, exp", + [ + ( + pd.Series([False, True, True, pd.NA]), + False, + pd.Series([2, 1, 1], index=[True, False, pd.NA]), + ), + ( + pd.Series([False, True, True, pd.NA]), + True, + pd.Series([2, 1], index=[True, False]), + ), + ( + pd.Series(range(3), index=[True, False, np.nan]).index, + False, + pd.Series([1, 1, 1], index=[True, False, pd.NA]), + ), + ], + ) + def test_value_counts_bool_with_nan(self, ser, dropna, exp): + # GH32146 + out = ser.value_counts(dropna=dropna) + tm.assert_series_equal(out, exp) diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py new file mode 100644 index 00000000..628c6658 --- /dev/null +++ b/pandas/tests/series/test_alter_axes.py @@ -0,0 +1,352 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import DataFrame, Index, MultiIndex, RangeIndex, Series +import pandas._testing as tm + + +class TestSeriesAlterAxes: + def test_setindex(self, string_series): + # wrong type + msg = ( + r"Index\(\.\.\.\) must be called with a collection of some" + r" kind, None was passed" + ) + with pytest.raises(TypeError, match=msg): + string_series.index = None + + # wrong length + msg = ( + "Length mismatch: Expected axis has 30 elements, " + "new values have 29 elements" + ) + with pytest.raises(ValueError, match=msg): + string_series.index = np.arange(len(string_series) - 1) + + # works + string_series.index = np.arange(len(string_series)) + assert isinstance(string_series.index, Index) + + # Renaming + + def test_rename(self, datetime_series): + ts = datetime_series + renamer = lambda x: x.strftime("%Y%m%d") + renamed = ts.rename(renamer) + assert renamed.index[0] == renamer(ts.index[0]) + + # dict + rename_dict = dict(zip(ts.index, renamed.index)) + renamed2 = ts.rename(rename_dict) + tm.assert_series_equal(renamed, renamed2) + + # partial dict + s = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64") + renamed = s.rename({"b": "foo", "d": "bar"}) + tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"])) + + # index with name + renamer = Series( + np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64" + ) + renamed = renamer.rename({}) + assert renamed.index.name == renamer.index.name + + def test_rename_by_series(self): + s = Series(range(5), name="foo") + renamer = Series({1: 10, 2: 20}) + result = s.rename(renamer) + expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo") + tm.assert_series_equal(result, expected) + + def test_rename_set_name(self): + s = Series(range(4), index=list("abcd")) + for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: + result = s.rename(name) + assert result.name == name + tm.assert_numpy_array_equal(result.index.values, s.index.values) + assert s.name is None + + def test_rename_set_name_inplace(self): + s = Series(range(3), index=list("abc")) + for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]: + s.rename(name, inplace=True) + assert s.name == name + + exp = np.array(["a", "b", "c"], dtype=np.object_) + tm.assert_numpy_array_equal(s.index.values, exp) + + def test_rename_axis_supported(self): + # Supporting axis for compatibility, detailed in GH-18589 + s = Series(range(5)) + s.rename({}, axis=0) + s.rename({}, axis="index") + # TODO: clean up shared index validation + # with pytest.raises(ValueError, match="No axis named 5"): + # s.rename({}, axis=5) + + def test_set_name_attribute(self): + s = Series([1, 2, 3]) + s2 = Series([1, 2, 3], name="bar") + for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]: + s.name = name + assert s.name == name + s2.name = name + assert s2.name == name + + def test_set_name(self): + s = Series([1, 2, 3]) + s2 = s._set_name("foo") + assert s2.name == "foo" + assert s.name is None + assert s is not s2 + + def test_rename_inplace(self, datetime_series): + renamer = lambda x: x.strftime("%Y%m%d") + expected = renamer(datetime_series.index[0]) + + datetime_series.rename(renamer, inplace=True) + assert datetime_series.index[0] == expected + + def test_set_index_makes_timeseries(self): + idx = tm.makeDateIndex(10) + + s = Series(range(10)) + s.index = idx + assert s.index.is_all_dates + + def test_reset_index(self): + df = tm.makeDataFrame()[:5] + ser = df.stack() + ser.index.names = ["hash", "category"] + + ser.name = "value" + df = ser.reset_index() + assert "value" in df + + df = ser.reset_index(name="value2") + assert "value2" in df + + # check inplace + s = ser.reset_index(drop=True) + s2 = ser + s2.reset_index(drop=True, inplace=True) + tm.assert_series_equal(s, s2) + + # level + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + s = Series(np.random.randn(6), index=index) + rs = s.reset_index(level=1) + assert len(rs.columns) == 2 + + rs = s.reset_index(level=[0, 2], drop=True) + tm.assert_index_equal(rs.index, Index(index.get_level_values(1))) + assert isinstance(rs, Series) + + def test_reset_index_name(self): + s = Series([1, 2, 3], index=Index(range(3), name="x")) + assert s.reset_index().index.name is None + assert s.reset_index(drop=True).index.name is None + + def test_reset_index_level(self): + df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) + + for levels in ["A", "B"], [0, 1]: + # With MultiIndex + s = df.set_index(["A", "B"])["C"] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df.set_index("B")) + + result = s.reset_index(level=levels) + tm.assert_frame_equal(result, df) + + result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True) + tm.assert_frame_equal(result, df[["C"]]) + + with pytest.raises(KeyError, match="Level E "): + s.reset_index(level=["A", "E"]) + + # With single-level Index + s = df.set_index("A")["B"] + + result = s.reset_index(level=levels[0]) + tm.assert_frame_equal(result, df[["A", "B"]]) + + result = s.reset_index(level=levels[:1]) + tm.assert_frame_equal(result, df[["A", "B"]]) + + result = s.reset_index(level=levels[0], drop=True) + tm.assert_series_equal(result, df["B"]) + + with pytest.raises(IndexError, match="Too many levels"): + s.reset_index(level=[0, 1, 2]) + + # Check that .reset_index([],drop=True) doesn't fail + result = Series(range(4)).reset_index([], drop=True) + expected = Series(range(4)) + tm.assert_series_equal(result, expected) + + def test_reset_index_range(self): + # GH 12071 + s = Series(range(2), name="A", dtype="int64") + series_result = s.reset_index() + assert isinstance(series_result.index, RangeIndex) + series_expected = DataFrame( + [[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2) + ) + tm.assert_frame_equal(series_result, series_expected) + + def test_reorder_levels(self): + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + names=["L0", "L1", "L2"], + ) + s = Series(np.arange(6), index=index) + + # no change, position + result = s.reorder_levels([0, 1, 2]) + tm.assert_series_equal(s, result) + + # no change, labels + result = s.reorder_levels(["L0", "L1", "L2"]) + tm.assert_series_equal(s, result) + + # rotate, position + result = s.reorder_levels([1, 2, 0]) + e_idx = MultiIndex( + levels=[["one", "two", "three"], [0, 1], ["bar"]], + codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0]], + names=["L1", "L2", "L0"], + ) + expected = Series(np.arange(6), index=e_idx) + tm.assert_series_equal(result, expected) + + def test_rename_axis_mapper(self): + # GH 19978 + mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"]) + s = Series(list(range(len(mi))), index=mi) + + result = s.rename_axis(index={"ll": "foo"}) + assert result.index.names == ["foo", "nn"] + + result = s.rename_axis(index=str.upper, axis=0) + assert result.index.names == ["LL", "NN"] + + result = s.rename_axis(index=["foo", "goo"]) + assert result.index.names == ["foo", "goo"] + + with pytest.raises(TypeError, match="unexpected"): + s.rename_axis(columns="wrong") + + def test_rename_axis_inplace(self, datetime_series): + # GH 15704 + expected = datetime_series.rename_axis("foo") + result = datetime_series + no_return = result.rename_axis("foo", inplace=True) + + assert no_return is None + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("kwargs", [{"mapper": None}, {"index": None}, {}]) + def test_rename_axis_none(self, kwargs): + # GH 25034 + index = Index(list("abc"), name="foo") + df = Series([1, 2, 3], index=index) + + result = df.rename_axis(**kwargs) + expected_index = index.rename(None) if kwargs else index + expected = Series([1, 2, 3], index=expected_index) + tm.assert_series_equal(result, expected) + + def test_rename_with_custom_indexer(self): + # GH 27814 + class MyIndexer: + pass + + ix = MyIndexer() + s = Series([1, 2, 3]).rename(ix) + assert s.name is ix + + def test_rename_with_custom_indexer_inplace(self): + # GH 27814 + class MyIndexer: + pass + + ix = MyIndexer() + s = Series([1, 2, 3]) + s.rename(ix, inplace=True) + assert s.name is ix + + def test_set_axis_inplace_axes(self, axis_series): + # GH14636 + ser = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64") + + expected = ser.copy() + expected.index = list("abcd") + + # inplace=True + # The FutureWarning comes from the fact that we would like to have + # inplace default to False some day + result = ser.copy() + result.set_axis(list("abcd"), axis=axis_series, inplace=True) + tm.assert_series_equal(result, expected) + + def test_set_axis_inplace(self): + # GH14636 + + s = Series(np.arange(4), index=[1, 3, 5, 7], dtype="int64") + + expected = s.copy() + expected.index = list("abcd") + + # inplace=False + result = s.set_axis(list("abcd"), axis=0, inplace=False) + tm.assert_series_equal(expected, result) + + # omitting the "axis" parameter + with tm.assert_produces_warning(None): + result = s.set_axis(list("abcd"), inplace=False) + tm.assert_series_equal(result, expected) + + # wrong values for the "axis" parameter + for axis in [2, "foo"]: + with pytest.raises(ValueError, match="No axis named"): + s.set_axis(list("abcd"), axis=axis, inplace=False) + + def test_reset_index_drop_errors(self): + # GH 20925 + + # KeyError raised for series index when passed level name is missing + s = Series(range(4)) + with pytest.raises(KeyError, match="does not match index name"): + s.reset_index("wrong", drop=True) + with pytest.raises(KeyError, match="does not match index name"): + s.reset_index("wrong") + + # KeyError raised for series when level to be dropped is missing + s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2)) + with pytest.raises(KeyError, match="not found"): + s.reset_index("wrong", drop=True) + + def test_droplevel(self): + # GH20342 + ser = Series([1, 2, 3, 4]) + ser.index = MultiIndex.from_arrays( + [(1, 2, 3, 4), (5, 6, 7, 8)], names=["a", "b"] + ) + expected = ser.reset_index("b", drop=True) + result = ser.droplevel("b", axis="index") + tm.assert_series_equal(result, expected) + # test that droplevel raises ValueError on axis != 0 + with pytest.raises(ValueError): + ser.droplevel(1, axis="columns") diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py new file mode 100644 index 00000000..c29bd3ea --- /dev/null +++ b/pandas/tests/series/test_analytics.py @@ -0,0 +1,270 @@ +import operator + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, MultiIndex, Series +import pandas._testing as tm + + +class TestSeriesAnalytics: + def test_prod_numpy16_bug(self): + s = Series([1.0, 1.0, 1.0], index=range(3)) + result = s.prod() + + assert not isinstance(result, Series) + + def test_dot(self): + a = Series(np.random.randn(4), index=["p", "q", "r", "s"]) + b = DataFrame( + np.random.randn(3, 4), index=["1", "2", "3"], columns=["p", "q", "r", "s"] + ).T + + result = a.dot(b) + expected = Series(np.dot(a.values, b.values), index=["1", "2", "3"]) + tm.assert_series_equal(result, expected) + + # Check index alignment + b2 = b.reindex(index=reversed(b.index)) + result = a.dot(b) + tm.assert_series_equal(result, expected) + + # Check ndarray argument + result = a.dot(b.values) + assert np.all(result == expected.values) + tm.assert_almost_equal(a.dot(b["2"].values), expected["2"]) + + # Check series argument + tm.assert_almost_equal(a.dot(b["1"]), expected["1"]) + tm.assert_almost_equal(a.dot(b2["1"]), expected["1"]) + + msg = r"Dot product shape mismatch, \(4,\) vs \(3,\)" + # exception raised is of type Exception + with pytest.raises(Exception, match=msg): + a.dot(a.values[:3]) + msg = "matrices are not aligned" + with pytest.raises(ValueError, match=msg): + a.dot(b.T) + + def test_matmul(self): + # matmul test is for GH #10259 + a = Series(np.random.randn(4), index=["p", "q", "r", "s"]) + b = DataFrame( + np.random.randn(3, 4), index=["1", "2", "3"], columns=["p", "q", "r", "s"] + ).T + + # Series @ DataFrame -> Series + result = operator.matmul(a, b) + expected = Series(np.dot(a.values, b.values), index=["1", "2", "3"]) + tm.assert_series_equal(result, expected) + + # DataFrame @ Series -> Series + result = operator.matmul(b.T, a) + expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"]) + tm.assert_series_equal(result, expected) + + # Series @ Series -> scalar + result = operator.matmul(a, a) + expected = np.dot(a.values, a.values) + tm.assert_almost_equal(result, expected) + + # GH 21530 + # vector (1D np.array) @ Series (__rmatmul__) + result = operator.matmul(a.values, a) + expected = np.dot(a.values, a.values) + tm.assert_almost_equal(result, expected) + + # GH 21530 + # vector (1D list) @ Series (__rmatmul__) + result = operator.matmul(a.values.tolist(), a) + expected = np.dot(a.values, a.values) + tm.assert_almost_equal(result, expected) + + # GH 21530 + # matrix (2D np.array) @ Series (__rmatmul__) + result = operator.matmul(b.T.values, a) + expected = np.dot(b.T.values, a.values) + tm.assert_almost_equal(result, expected) + + # GH 21530 + # matrix (2D nested lists) @ Series (__rmatmul__) + result = operator.matmul(b.T.values.tolist(), a) + expected = np.dot(b.T.values, a.values) + tm.assert_almost_equal(result, expected) + + # mixed dtype DataFrame @ Series + a["p"] = int(a.p) + result = operator.matmul(b.T, a) + expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"]) + tm.assert_series_equal(result, expected) + + # different dtypes DataFrame @ Series + a = a.astype(int) + result = operator.matmul(b.T, a) + expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"]) + tm.assert_series_equal(result, expected) + + msg = r"Dot product shape mismatch, \(4,\) vs \(3,\)" + # exception raised is of type Exception + with pytest.raises(Exception, match=msg): + a.dot(a.values[:3]) + msg = "matrices are not aligned" + with pytest.raises(ValueError, match=msg): + a.dot(b.T) + + def test_ptp(self): + # GH21614 + N = 1000 + arr = np.random.randn(N) + ser = Series(arr) + assert np.ptp(ser) == np.ptp(arr) + + def test_repeat(self): + s = Series(np.random.randn(3), index=["a", "b", "c"]) + + reps = s.repeat(5) + exp = Series(s.values.repeat(5), index=s.index.values.repeat(5)) + tm.assert_series_equal(reps, exp) + + to_rep = [2, 3, 4] + reps = s.repeat(to_rep) + exp = Series(s.values.repeat(to_rep), index=s.index.values.repeat(to_rep)) + tm.assert_series_equal(reps, exp) + + def test_numpy_repeat(self): + s = Series(np.arange(3), name="x") + expected = Series(s.values.repeat(2), name="x", index=s.index.values.repeat(2)) + tm.assert_series_equal(np.repeat(s, 2), expected) + + msg = "the 'axis' parameter is not supported" + with pytest.raises(ValueError, match=msg): + np.repeat(s, 2, axis=0) + + def test_is_monotonic(self): + + s = Series(np.random.randint(0, 10, size=1000)) + assert not s.is_monotonic + s = Series(np.arange(1000)) + assert s.is_monotonic is True + assert s.is_monotonic_increasing is True + s = Series(np.arange(1000, 0, -1)) + assert s.is_monotonic_decreasing is True + + s = Series(pd.date_range("20130101", periods=10)) + assert s.is_monotonic is True + assert s.is_monotonic_increasing is True + s = Series(list(reversed(s.tolist()))) + assert s.is_monotonic is False + assert s.is_monotonic_decreasing is True + + def test_unstack(self): + + index = MultiIndex( + levels=[["bar", "foo"], ["one", "three", "two"]], + codes=[[1, 1, 0, 0], [0, 1, 0, 2]], + ) + + s = Series(np.arange(4.0), index=index) + unstacked = s.unstack() + + expected = DataFrame( + [[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]], + index=["bar", "foo"], + columns=["one", "three", "two"], + ) + + tm.assert_frame_equal(unstacked, expected) + + unstacked = s.unstack(level=0) + tm.assert_frame_equal(unstacked, expected.T) + + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + s = Series(np.random.randn(6), index=index) + exp_index = MultiIndex( + levels=[["one", "two", "three"], [0, 1]], + codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0) + unstacked = s.unstack(0).sort_index() + tm.assert_frame_equal(unstacked, expected) + + # GH5873 + idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]]) + ts = pd.Series([1, 2], index=idx) + left = ts.unstack() + right = DataFrame( + [[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5] + ) + tm.assert_frame_equal(left, right) + + idx = pd.MultiIndex.from_arrays( + [ + ["cat", "cat", "cat", "dog", "dog"], + ["a", "a", "b", "a", "b"], + [1, 2, 1, 1, np.nan], + ] + ) + ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx) + right = DataFrame( + [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]], + columns=["cat", "dog"], + ) + tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)] + right.index = pd.MultiIndex.from_tuples(tpls) + tm.assert_frame_equal(ts.unstack(level=0), right) + + @pytest.mark.parametrize("func", [np.any, np.all]) + @pytest.mark.parametrize("kwargs", [dict(keepdims=True), dict(out=object())]) + @td.skip_if_np_lt("1.15") + def test_validate_any_all_out_keepdims_raises(self, kwargs, func): + s = pd.Series([1, 2]) + param = list(kwargs)[0] + name = func.__name__ + + msg = ( + r"the '{arg}' parameter is not " + r"supported in the pandas " + r"implementation of {fname}\(\)" + ).format(arg=param, fname=name) + with pytest.raises(ValueError, match=msg): + func(s, **kwargs) + + @td.skip_if_np_lt("1.15") + def test_validate_sum_initial(self): + s = pd.Series([1, 2]) + msg = ( + r"the 'initial' parameter is not " + r"supported in the pandas " + r"implementation of sum\(\)" + ) + with pytest.raises(ValueError, match=msg): + np.sum(s, initial=10) + + def test_validate_median_initial(self): + s = pd.Series([1, 2]) + msg = ( + r"the 'overwrite_input' parameter is not " + r"supported in the pandas " + r"implementation of median\(\)" + ) + with pytest.raises(ValueError, match=msg): + # It seems like np.median doesn't dispatch, so we use the + # method instead of the ufunc. + s.median(overwrite_input=True) + + @td.skip_if_np_lt("1.15") + def test_validate_stat_keepdims(self): + s = pd.Series([1, 2]) + msg = ( + r"the 'keepdims' parameter is not " + r"supported in the pandas " + r"implementation of sum\(\)" + ) + with pytest.raises(ValueError, match=msg): + np.sum(s, keepdims=True) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py new file mode 100644 index 00000000..901d983b --- /dev/null +++ b/pandas/tests/series/test_api.py @@ -0,0 +1,747 @@ +from collections import OrderedDict +import pydoc +import warnings + +import numpy as np +import pytest + +from pandas.util._test_decorators import async_mark + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + Series, + Timedelta, + TimedeltaIndex, + Timestamp, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays import PeriodArray + +import pandas.io.formats.printing as printing + + +class TestSeriesMisc: + def test_scalarop_preserve_name(self, datetime_series): + result = datetime_series * 2 + assert result.name == datetime_series.name + + def test_copy_name(self, datetime_series): + result = datetime_series.copy() + assert result.name == datetime_series.name + + def test_copy_index_name_checking(self, datetime_series): + # don't want to be able to modify the index stored elsewhere after + # making a copy + + datetime_series.index.name = None + assert datetime_series.index.name is None + assert datetime_series is datetime_series + + cp = datetime_series.copy() + cp.index.name = "foo" + printing.pprint_thing(datetime_series.index.name) + assert datetime_series.index.name is None + + def test_append_preserve_name(self, datetime_series): + result = datetime_series[:5].append(datetime_series[5:]) + assert result.name == datetime_series.name + + def test_binop_maybe_preserve_name(self, datetime_series): + # names match, preserve + result = datetime_series * datetime_series + assert result.name == datetime_series.name + result = datetime_series.mul(datetime_series) + assert result.name == datetime_series.name + + result = datetime_series * datetime_series[:-2] + assert result.name == datetime_series.name + + # names don't match, don't preserve + cp = datetime_series.copy() + cp.name = "something else" + result = datetime_series + cp + assert result.name is None + result = datetime_series.add(cp) + assert result.name is None + + ops = ["add", "sub", "mul", "div", "truediv", "floordiv", "mod", "pow"] + ops = ops + ["r" + op for op in ops] + for op in ops: + # names match, preserve + s = datetime_series.copy() + result = getattr(s, op)(s) + assert result.name == datetime_series.name + + # names don't match, don't preserve + cp = datetime_series.copy() + cp.name = "changed" + result = getattr(s, op)(cp) + assert result.name is None + + def test_combine_first_name(self, datetime_series): + result = datetime_series.combine_first(datetime_series[:5]) + assert result.name == datetime_series.name + + def test_getitem_preserve_name(self, datetime_series): + result = datetime_series[datetime_series > 0] + assert result.name == datetime_series.name + + result = datetime_series[[0, 2, 4]] + assert result.name == datetime_series.name + + result = datetime_series[5:10] + assert result.name == datetime_series.name + + def test_pickle_datetimes(self, datetime_series): + unp_ts = self._pickle_roundtrip(datetime_series) + tm.assert_series_equal(unp_ts, datetime_series) + + def test_pickle_strings(self, string_series): + unp_series = self._pickle_roundtrip(string_series) + tm.assert_series_equal(unp_series, string_series) + + def _pickle_roundtrip(self, obj): + + with tm.ensure_clean() as path: + obj.to_pickle(path) + unpickled = pd.read_pickle(path) + return unpickled + + def test_sort_index_name(self, datetime_series): + result = datetime_series.sort_index(ascending=False) + assert result.name == datetime_series.name + + def test_constructor_dict(self): + d = {"a": 0.0, "b": 1.0, "c": 2.0} + result = Series(d) + expected = Series(d, index=sorted(d.keys())) + tm.assert_series_equal(result, expected) + + result = Series(d, index=["b", "c", "d", "a"]) + expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) + tm.assert_series_equal(result, expected) + + def test_constructor_subclass_dict(self, dict_subclass): + data = dict_subclass((x, 10.0 * x) for x in range(10)) + series = Series(data) + expected = Series(dict(data.items())) + tm.assert_series_equal(series, expected) + + def test_constructor_ordereddict(self): + # GH3283 + data = OrderedDict( + ("col{i}".format(i=i), np.random.random()) for i in range(12) + ) + + series = Series(data) + expected = Series(list(data.values()), list(data.keys())) + tm.assert_series_equal(series, expected) + + # Test with subclass + class A(OrderedDict): + pass + + series = Series(A(data)) + tm.assert_series_equal(series, expected) + + def test_constructor_dict_multiindex(self): + d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0} + _d = sorted(d.items()) + result = Series(d) + expected = Series( + [x[1] for x in _d], index=pd.MultiIndex.from_tuples([x[0] for x in _d]) + ) + tm.assert_series_equal(result, expected) + + d["z"] = 111.0 + _d.insert(0, ("z", d["z"])) + result = Series(d) + expected = Series( + [x[1] for x in _d], index=pd.Index([x[0] for x in _d], tupleize_cols=False) + ) + result = result.reindex(index=expected.index) + tm.assert_series_equal(result, expected) + + def test_constructor_dict_timedelta_index(self): + # GH #12169 : Resample category data with timedelta index + # construct Series from dict as data and TimedeltaIndex as index + # will result NaN in result Series data + expected = Series( + data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s") + ) + + result = Series( + data={ + pd.to_timedelta(0, unit="s"): "A", + pd.to_timedelta(10, unit="s"): "B", + pd.to_timedelta(20, unit="s"): "C", + }, + index=pd.to_timedelta([0, 10, 20], unit="s"), + ) + tm.assert_series_equal(result, expected) + + def test_sparse_accessor_updates_on_inplace(self): + s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]") + s.drop([0, 1], inplace=True) + assert s.sparse.density == 1.0 + + def test_tab_completion(self): + # GH 9910 + s = Series(list("abcd")) + # Series of str values should have .str but not .dt/.cat in __dir__ + assert "str" in dir(s) + assert "dt" not in dir(s) + assert "cat" not in dir(s) + + # similarly for .dt + s = Series(date_range("1/1/2015", periods=5)) + assert "dt" in dir(s) + assert "str" not in dir(s) + assert "cat" not in dir(s) + + # Similarly for .cat, but with the twist that str and dt should be + # there if the categories are of that type first cat and str. + s = Series(list("abbcd"), dtype="category") + assert "cat" in dir(s) + assert "str" in dir(s) # as it is a string categorical + assert "dt" not in dir(s) + + # similar to cat and str + s = Series(date_range("1/1/2015", periods=5)).astype("category") + assert "cat" in dir(s) + assert "str" not in dir(s) + assert "dt" in dir(s) # as it is a datetime categorical + + def test_tab_completion_with_categorical(self): + # test the tab completion display + ok_for_cat = [ + "categories", + "codes", + "ordered", + "set_categories", + "add_categories", + "remove_categories", + "rename_categories", + "reorder_categories", + "remove_unused_categories", + "as_ordered", + "as_unordered", + ] + + def get_dir(s): + results = [r for r in s.cat.__dir__() if not r.startswith("_")] + return sorted(set(results)) + + s = Series(list("aabbcde")).astype("category") + results = get_dir(s) + tm.assert_almost_equal(results, sorted(set(ok_for_cat))) + + @pytest.mark.parametrize( + "index", + [ + tm.makeUnicodeIndex(10), + tm.makeStringIndex(10), + tm.makeCategoricalIndex(10), + Index(["foo", "bar", "baz"] * 2), + tm.makeDateIndex(10), + tm.makePeriodIndex(10), + tm.makeTimedeltaIndex(10), + tm.makeIntIndex(10), + tm.makeUIntIndex(10), + tm.makeIntIndex(10), + tm.makeFloatIndex(10), + Index([True, False]), + Index(["a{}".format(i) for i in range(101)]), + pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")), + pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")), + ], + ) + def test_index_tab_completion(self, index): + # dir contains string-like values of the Index. + s = pd.Series(index=index, dtype=object) + dir_s = dir(s) + for i, x in enumerate(s.index.unique(level=0)): + if i < 100: + assert not isinstance(x, str) or not x.isidentifier() or x in dir_s + else: + assert x not in dir_s + + def test_not_hashable(self): + s_empty = Series(dtype=object) + s = Series([1]) + msg = "'Series' objects are mutable, thus they cannot be hashed" + with pytest.raises(TypeError, match=msg): + hash(s_empty) + with pytest.raises(TypeError, match=msg): + hash(s) + + def test_contains(self, datetime_series): + tm.assert_contains_all(datetime_series.index, datetime_series) + + def test_iter_datetimes(self, datetime_series): + for i, val in enumerate(datetime_series): + assert val == datetime_series[i] + + def test_iter_strings(self, string_series): + for i, val in enumerate(string_series): + assert val == string_series[i] + + def test_keys(self, datetime_series): + # HACK: By doing this in two stages, we avoid 2to3 wrapping the call + # to .keys() in a list() + getkeys = datetime_series.keys + assert getkeys() is datetime_series.index + + def test_values(self, datetime_series): + tm.assert_almost_equal( + datetime_series.values, datetime_series, check_dtype=False + ) + + def test_iteritems_datetimes(self, datetime_series): + for idx, val in datetime_series.iteritems(): + assert val == datetime_series[idx] + + def test_iteritems_strings(self, string_series): + for idx, val in string_series.iteritems(): + assert val == string_series[idx] + + # assert is lazy (generators don't define reverse, lists do) + assert not hasattr(string_series.iteritems(), "reverse") + + def test_items_datetimes(self, datetime_series): + for idx, val in datetime_series.items(): + assert val == datetime_series[idx] + + def test_items_strings(self, string_series): + for idx, val in string_series.items(): + assert val == string_series[idx] + + # assert is lazy (generators don't define reverse, lists do) + assert not hasattr(string_series.items(), "reverse") + + def test_raise_on_info(self): + s = Series(np.random.randn(10)) + msg = "'Series' object has no attribute 'info'" + with pytest.raises(AttributeError, match=msg): + s.info() + + def test_copy(self): + + for deep in [None, False, True]: + s = Series(np.arange(10), dtype="float64") + + # default deep is True + if deep is None: + s2 = s.copy() + else: + s2 = s.copy(deep=deep) + + s2[::2] = np.NaN + + if deep is None or deep is True: + # Did not modify original Series + assert np.isnan(s2[0]) + assert not np.isnan(s[0]) + else: + # we DID modify the original Series + assert np.isnan(s2[0]) + assert np.isnan(s[0]) + + def test_copy_tzaware(self): + # GH#11794 + # copy of tz-aware + expected = Series([Timestamp("2012/01/01", tz="UTC")]) + expected2 = Series([Timestamp("1999/01/01", tz="UTC")]) + + for deep in [None, False, True]: + + s = Series([Timestamp("2012/01/01", tz="UTC")]) + + if deep is None: + s2 = s.copy() + else: + s2 = s.copy(deep=deep) + + s2[0] = pd.Timestamp("1999/01/01", tz="UTC") + + # default deep is True + if deep is None or deep is True: + # Did not modify original Series + tm.assert_series_equal(s2, expected2) + tm.assert_series_equal(s, expected) + else: + # we DID modify the original Series + tm.assert_series_equal(s2, expected2) + tm.assert_series_equal(s, expected2) + + def test_axis_alias(self): + s = Series([1, 2, np.nan]) + tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) + assert s.dropna().sum("rows") == 3 + assert s._get_axis_number("rows") == 0 + assert s._get_axis_name("rows") == "index" + + def test_class_axis(self): + # https://github.com/pandas-dev/pandas/issues/18147 + # no exception and no empty docstring + assert pydoc.getdoc(Series.index) + + def test_numpy_unique(self, datetime_series): + # it works! + np.unique(datetime_series) + + def test_item(self): + s = Series([1]) + result = s.item() + assert result == 1 + assert result == s.iloc[0] + assert isinstance(result, int) # i.e. not np.int64 + + ser = Series([0.5], index=[3]) + result = ser.item() + assert isinstance(result, float) + assert result == 0.5 + + ser = Series([1, 2]) + msg = "can only convert an array of size 1" + with pytest.raises(ValueError, match=msg): + ser.item() + + dti = pd.date_range("2016-01-01", periods=2) + with pytest.raises(ValueError, match=msg): + dti.item() + with pytest.raises(ValueError, match=msg): + Series(dti).item() + + val = dti[:1].item() + assert isinstance(val, Timestamp) + val = Series(dti)[:1].item() + assert isinstance(val, Timestamp) + + tdi = dti - dti + with pytest.raises(ValueError, match=msg): + tdi.item() + with pytest.raises(ValueError, match=msg): + Series(tdi).item() + + val = tdi[:1].item() + assert isinstance(val, Timedelta) + val = Series(tdi)[:1].item() + assert isinstance(val, Timedelta) + + # Case where ser[0] would not work + ser = Series(dti, index=[5, 6]) + val = ser[:1].item() + assert val == dti[0] + + def test_ndarray_compat(self): + + # test numpy compat with Series as sub-class of NDFrame + tsdf = DataFrame( + np.random.randn(1000, 3), + columns=["A", "B", "C"], + index=date_range("1/1/2000", periods=1000), + ) + + def f(x): + return x[x.idxmax()] + + result = tsdf.apply(f) + expected = tsdf.max() + tm.assert_series_equal(result, expected) + + # using an ndarray like function + s = Series(np.random.randn(10)) + result = Series(np.ones_like(s)) + expected = Series(1, index=range(10), dtype="float64") + tm.assert_series_equal(result, expected) + + # ravel + s = Series(np.random.randn(10)) + tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F")) + + def test_str_accessor_updates_on_inplace(self): + s = pd.Series(list("abc")) + s.drop([0], inplace=True) + assert len(s.str.lower()) == 2 + + def test_str_attribute(self): + # GH9068 + methods = ["strip", "rstrip", "lstrip"] + s = Series([" jack", "jill ", " jesse ", "frank"]) + for method in methods: + expected = Series([getattr(str, method)(x) for x in s.values]) + tm.assert_series_equal(getattr(Series.str, method)(s.str), expected) + + # str accessor only valid with string values + s = Series(range(5)) + with pytest.raises(AttributeError, match="only use .str accessor"): + s.str.repeat(2) + + def test_empty_method(self): + s_empty = pd.Series(dtype=object) + assert s_empty.empty + + s2 = pd.Series(index=[1], dtype=object) + for full_series in [pd.Series([1]), s2]: + assert not full_series.empty + + @async_mark() + async def test_tab_complete_warning(self, ip): + # https://github.com/pandas-dev/pandas/issues/16409 + pytest.importorskip("IPython", minversion="6.0.0") + from IPython.core.completer import provisionalcompleter + + code = "import pandas as pd; s = pd.Series()" + await ip.run_code(code) + + # TODO: remove it when Ipython updates + # GH 33567, jedi version raises Deprecation warning in Ipython + import jedi + + if jedi.__version__ < "0.17.0": + warning = tm.assert_produces_warning(None) + else: + warning = tm.assert_produces_warning( + DeprecationWarning, check_stacklevel=False + ) + with warning: + with provisionalcompleter("ignore"): + list(ip.Completer.completions("s.", 1)) + + def test_integer_series_size(self): + # GH 25580 + s = Series(range(9)) + assert s.size == 9 + s = Series(range(9), dtype="Int64") + assert s.size == 9 + + def test_attrs(self): + s = pd.Series([0, 1], name="abc") + assert s.attrs == {} + s.attrs["version"] = 1 + result = s + 1 + assert result.attrs == {"version": 1} + + +class TestCategoricalSeries: + @pytest.mark.parametrize( + "method", + [ + lambda x: x.cat.set_categories([1, 2, 3]), + lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True), + lambda x: x.cat.rename_categories([1, 2, 3]), + lambda x: x.cat.remove_unused_categories(), + lambda x: x.cat.remove_categories([2]), + lambda x: x.cat.add_categories([4]), + lambda x: x.cat.as_ordered(), + lambda x: x.cat.as_unordered(), + ], + ) + def test_getname_categorical_accessor(self, method): + # GH 17509 + s = Series([1, 2, 3], name="A").astype("category") + expected = "A" + result = method(s).name + assert result == expected + + def test_cat_accessor(self): + s = Series(Categorical(["a", "b", np.nan, "a"])) + tm.assert_index_equal(s.cat.categories, Index(["a", "b"])) + assert not s.cat.ordered, False + + exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"]) + s.cat.set_categories(["b", "a"], inplace=True) + tm.assert_categorical_equal(s.values, exp) + + res = s.cat.set_categories(["b", "a"]) + tm.assert_categorical_equal(res.values, exp) + + s[:] = "a" + s = s.cat.remove_unused_categories() + tm.assert_index_equal(s.cat.categories, Index(["a"])) + + def test_cat_accessor_api(self): + # GH 9322 + from pandas.core.arrays.categorical import CategoricalAccessor + + assert Series.cat is CategoricalAccessor + s = Series(list("aabbcde")).astype("category") + assert isinstance(s.cat, CategoricalAccessor) + + invalid = Series([1]) + with pytest.raises(AttributeError, match="only use .cat accessor"): + invalid.cat + assert not hasattr(invalid, "cat") + + def test_cat_accessor_no_new_attributes(self): + # https://github.com/pandas-dev/pandas/issues/10673 + c = Series(list("aabbcde")).astype("category") + with pytest.raises(AttributeError, match="You cannot add any new attribute"): + c.cat.xlabel = "a" + + def test_cat_accessor_updates_on_inplace(self): + s = Series(list("abc")).astype("category") + s.drop(0, inplace=True) + s.cat.remove_unused_categories(inplace=True) + assert len(s.cat.categories) == 2 + + def test_categorical_delegations(self): + + # invalid accessor + msg = r"Can only use \.cat accessor with a 'category' dtype" + with pytest.raises(AttributeError, match=msg): + Series([1, 2, 3]).cat + with pytest.raises(AttributeError, match=msg): + Series([1, 2, 3]).cat() + with pytest.raises(AttributeError, match=msg): + Series(["a", "b", "c"]).cat + with pytest.raises(AttributeError, match=msg): + Series(np.arange(5.0)).cat + with pytest.raises(AttributeError, match=msg): + Series([Timestamp("20130101")]).cat + + # Series should delegate calls to '.categories', '.codes', '.ordered' + # and the methods '.set_categories()' 'drop_unused_categories()' to the + # categorical + s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) + exp_categories = Index(["a", "b", "c"]) + tm.assert_index_equal(s.cat.categories, exp_categories) + s.cat.categories = [1, 2, 3] + exp_categories = Index([1, 2, 3]) + tm.assert_index_equal(s.cat.categories, exp_categories) + + exp_codes = Series([0, 1, 2, 0], dtype="int8") + tm.assert_series_equal(s.cat.codes, exp_codes) + + assert s.cat.ordered + s = s.cat.as_unordered() + assert not s.cat.ordered + s.cat.as_ordered(inplace=True) + assert s.cat.ordered + + # reorder + s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) + exp_categories = Index(["c", "b", "a"]) + exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) + s = s.cat.set_categories(["c", "b", "a"]) + tm.assert_index_equal(s.cat.categories, exp_categories) + tm.assert_numpy_array_equal(s.values.__array__(), exp_values) + tm.assert_numpy_array_equal(s.__array__(), exp_values) + + # remove unused categories + s = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"])) + exp_categories = Index(["a", "b"]) + exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_) + s = s.cat.remove_unused_categories() + tm.assert_index_equal(s.cat.categories, exp_categories) + tm.assert_numpy_array_equal(s.values.__array__(), exp_values) + tm.assert_numpy_array_equal(s.__array__(), exp_values) + + # This method is likely to be confused, so test that it raises an error + # on wrong inputs: + msg = "'Series' object has no attribute 'set_categories'" + with pytest.raises(AttributeError, match=msg): + s.set_categories([4, 3, 2, 1]) + + # right: s.cat.set_categories([4,3,2,1]) + + # GH18862 (let Series.cat.rename_categories take callables) + s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) + result = s.cat.rename_categories(lambda x: x.upper()) + expected = Series( + Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True) + ) + tm.assert_series_equal(result, expected) + + def test_dt_accessor_api_for_categorical(self): + # https://github.com/pandas-dev/pandas/issues/10661 + from pandas.core.indexes.accessors import Properties + + s_dr = Series(date_range("1/1/2015", periods=5, tz="MET")) + c_dr = s_dr.astype("category") + + s_pr = Series(period_range("1/1/2015", freq="D", periods=5)) + c_pr = s_pr.astype("category") + + s_tdr = Series(timedelta_range("1 days", "10 days")) + c_tdr = s_tdr.astype("category") + + # only testing field (like .day) + # and bool (is_month_start) + get_ops = lambda x: x._datetimelike_ops + + test_data = [ + ("Datetime", get_ops(DatetimeIndex), s_dr, c_dr), + ("Period", get_ops(PeriodArray), s_pr, c_pr), + ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr), + ] + + assert isinstance(c_dr.dt, Properties) + + special_func_defs = [ + ("strftime", ("%Y-%m-%d",), {}), + ("tz_convert", ("EST",), {}), + ("round", ("D",), {}), + ("floor", ("D",), {}), + ("ceil", ("D",), {}), + ("asfreq", ("D",), {}), + # FIXME: don't leave commented-out + # ('tz_localize', ("UTC",), {}), + ] + _special_func_names = [f[0] for f in special_func_defs] + + # the series is already localized + _ignore_names = ["tz_localize", "components"] + + for name, attr_names, s, c in test_data: + func_names = [ + f + for f in dir(s.dt) + if not ( + f.startswith("_") + or f in attr_names + or f in _special_func_names + or f in _ignore_names + ) + ] + + func_defs = [(f, (), {}) for f in func_names] + for f_def in special_func_defs: + if f_def[0] in dir(s.dt): + func_defs.append(f_def) + + for func, args, kwargs in func_defs: + with warnings.catch_warnings(): + if func == "to_period": + # dropping TZ + warnings.simplefilter("ignore", UserWarning) + res = getattr(c.dt, func)(*args, **kwargs) + exp = getattr(s.dt, func)(*args, **kwargs) + + tm.assert_equal(res, exp) + + for attr in attr_names: + res = getattr(c.dt, attr) + exp = getattr(s.dt, attr) + + if isinstance(res, DataFrame): + tm.assert_frame_equal(res, exp) + elif isinstance(res, Series): + tm.assert_series_equal(res, exp) + else: + tm.assert_almost_equal(res, exp) + + invalid = Series([1, 2, 3]).astype("category") + msg = "Can only use .dt accessor with datetimelike" + + with pytest.raises(AttributeError, match=msg): + invalid.dt + assert not hasattr(invalid, "str") diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py new file mode 100644 index 00000000..a4c55a80 --- /dev/null +++ b/pandas/tests/series/test_apply.py @@ -0,0 +1,789 @@ +from collections import Counter, defaultdict +from itertools import chain + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, Series, isna +import pandas._testing as tm +from pandas.conftest import _get_cython_table_params +from pandas.core.base import SpecificationError + + +class TestSeriesApply: + def test_apply(self, datetime_series): + with np.errstate(all="ignore"): + tm.assert_series_equal( + datetime_series.apply(np.sqrt), np.sqrt(datetime_series) + ) + + # element-wise apply + import math + + tm.assert_series_equal( + datetime_series.apply(math.exp), np.exp(datetime_series) + ) + + # empty series + s = Series(dtype=object, name="foo", index=pd.Index([], name="bar")) + rs = s.apply(lambda x: x) + tm.assert_series_equal(s, rs) + + # check all metadata (GH 9322) + assert s is not rs + assert s.index is rs.index + assert s.dtype == rs.dtype + assert s.name == rs.name + + # index but no data + s = Series(index=[1, 2, 3], dtype=np.float64) + rs = s.apply(lambda x: x) + tm.assert_series_equal(s, rs) + + def test_apply_same_length_inference_bug(self): + s = Series([1, 2]) + f = lambda x: (x, x + 1) + + result = s.apply(f) + expected = s.map(f) + tm.assert_series_equal(result, expected) + + s = Series([1, 2, 3]) + result = s.apply(f) + expected = s.map(f) + tm.assert_series_equal(result, expected) + + def test_apply_dont_convert_dtype(self): + s = Series(np.random.randn(10)) + + f = lambda x: x if x > 0 else np.nan + result = s.apply(f, convert_dtype=False) + assert result.dtype == object + + def test_with_string_args(self, datetime_series): + + for arg in ["sum", "mean", "min", "max", "std"]: + result = datetime_series.apply(arg) + expected = getattr(datetime_series, arg)() + assert result == expected + + def test_apply_args(self): + s = Series(["foo,bar"]) + + result = s.apply(str.split, args=(",",)) + assert result[0] == ["foo", "bar"] + assert isinstance(result[0], list) + + def test_series_map_box_timestamps(self): + # GH#2689, GH#2627 + ser = Series(pd.date_range("1/1/2000", periods=10)) + + def func(x): + return (x.hour, x.day, x.month) + + # it works! + ser.map(func) + ser.apply(func) + + def test_apply_box(self): + # ufunc will not be boxed. Same test cases as the test_map_box + vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] + s = pd.Series(vals) + assert s.dtype == "datetime64[ns]" + # boxed value must be Timestamp instance + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + exp = pd.Series(["Timestamp_1_None", "Timestamp_2_None"]) + tm.assert_series_equal(res, exp) + + vals = [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + ] + s = pd.Series(vals) + assert s.dtype == "datetime64[ns, US/Eastern]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + exp = pd.Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) + tm.assert_series_equal(res, exp) + + # timedelta + vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] + s = pd.Series(vals) + assert s.dtype == "timedelta64[ns]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") + exp = pd.Series(["Timedelta_1", "Timedelta_2"]) + tm.assert_series_equal(res, exp) + + # period + vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] + s = pd.Series(vals) + assert s.dtype == "Period[M]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") + exp = pd.Series(["Period_M", "Period_M"]) + tm.assert_series_equal(res, exp) + + def test_apply_datetimetz(self): + values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize( + "Asia/Tokyo" + ) + s = pd.Series(values, name="XX") + + result = s.apply(lambda x: x + pd.offsets.Day()) + exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize( + "Asia/Tokyo" + ) + exp = pd.Series(exp_values, name="XX") + tm.assert_series_equal(result, exp) + + # change dtype + # GH 14506 : Returned dtype changed from int32 to int64 + result = s.apply(lambda x: x.hour) + exp = pd.Series(list(range(24)) + [0], name="XX", dtype=np.int64) + tm.assert_series_equal(result, exp) + + # not vectorized + def f(x): + if not isinstance(x, pd.Timestamp): + raise ValueError + return str(x.tz) + + result = s.map(f) + exp = pd.Series(["Asia/Tokyo"] * 25, name="XX") + tm.assert_series_equal(result, exp) + + def test_apply_dict_depr(self): + + tsdf = pd.DataFrame( + np.random.randn(10, 3), + columns=["A", "B", "C"], + index=pd.date_range("1/1/2000", periods=10), + ) + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + tsdf.A.agg({"foo": ["sum", "mean"]}) + + def test_apply_categorical(self): + values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) + ser = pd.Series(values, name="XX", index=list("abcdefg")) + result = ser.apply(lambda x: x.lower()) + + # should be categorical dtype when the number of categories are + # the same + values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True) + exp = pd.Series(values, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + tm.assert_categorical_equal(result.values, exp.values) + + result = ser.apply(lambda x: "A") + exp = pd.Series(["A"] * 7, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + assert result.dtype == np.object + + @pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]]) + def test_apply_categorical_with_nan_values(self, series): + # GH 20714 bug fixed in: GH 24275 + s = pd.Series(series, dtype="category") + result = s.apply(lambda x: x.split("-")[0]) + result = result.astype(object) + expected = pd.Series(["1", "1", np.NaN], dtype="category") + expected = expected.astype(object) + tm.assert_series_equal(result, expected) + + def test_apply_empty_integer_series_with_datetime_index(self): + # GH 21245 + s = pd.Series([], index=pd.date_range(start="2018-01-01", periods=0), dtype=int) + result = s.apply(lambda x: x) + tm.assert_series_equal(result, s) + + +class TestSeriesAggregate: + def test_transform(self, string_series): + # transforming functions + + with np.errstate(all="ignore"): + + f_sqrt = np.sqrt(string_series) + f_abs = np.abs(string_series) + + # ufunc + result = string_series.transform(np.sqrt) + expected = f_sqrt.copy() + tm.assert_series_equal(result, expected) + + result = string_series.apply(np.sqrt) + tm.assert_series_equal(result, expected) + + # list-like + result = string_series.transform([np.sqrt]) + expected = f_sqrt.to_frame().copy() + expected.columns = ["sqrt"] + tm.assert_frame_equal(result, expected) + + result = string_series.transform([np.sqrt]) + tm.assert_frame_equal(result, expected) + + result = string_series.transform(["sqrt"]) + tm.assert_frame_equal(result, expected) + + # multiple items in list + # these are in the order as if we are applying both functions per + # series and then concatting + expected = pd.concat([f_sqrt, f_abs], axis=1) + expected.columns = ["sqrt", "absolute"] + result = string_series.apply([np.sqrt, np.abs]) + tm.assert_frame_equal(result, expected) + + result = string_series.transform(["sqrt", "abs"]) + expected.columns = ["sqrt", "abs"] + tm.assert_frame_equal(result, expected) + + # dict, provide renaming + expected = pd.concat([f_sqrt, f_abs], axis=1) + expected.columns = ["foo", "bar"] + expected = expected.unstack().rename("series") + + result = string_series.apply({"foo": np.sqrt, "bar": np.abs}) + tm.assert_series_equal(result.reindex_like(expected), expected) + + def test_transform_and_agg_error(self, string_series): + # we are trying to transform with an aggregator + with pytest.raises(ValueError): + string_series.transform(["min", "max"]) + + with pytest.raises(ValueError): + with np.errstate(all="ignore"): + string_series.agg(["sqrt", "max"]) + + with pytest.raises(ValueError): + with np.errstate(all="ignore"): + string_series.transform(["sqrt", "max"]) + + with pytest.raises(ValueError): + with np.errstate(all="ignore"): + string_series.agg({"foo": np.sqrt, "bar": "sum"}) + + def test_demo(self): + # demonstration tests + s = Series(range(6), dtype="int64", name="series") + + result = s.agg(["min", "max"]) + expected = Series([0, 5], index=["min", "max"], name="series") + tm.assert_series_equal(result, expected) + + result = s.agg({"foo": "min"}) + expected = Series([0], index=["foo"], name="series") + tm.assert_series_equal(result, expected) + + # nested renaming + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + s.agg({"foo": ["min", "max"]}) + + def test_multiple_aggregators_with_dict_api(self): + + s = Series(range(6), dtype="int64", name="series") + # nested renaming + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + s.agg({"foo": ["min", "max"], "bar": ["sum", "mean"]}) + + def test_agg_apply_evaluate_lambdas_the_same(self, string_series): + # test that we are evaluating row-by-row first + # before vectorized evaluation + result = string_series.apply(lambda x: str(x)) + expected = string_series.agg(lambda x: str(x)) + tm.assert_series_equal(result, expected) + + result = string_series.apply(str) + expected = string_series.agg(str) + tm.assert_series_equal(result, expected) + + def test_with_nested_series(self, datetime_series): + # GH 2316 + # .agg with a reducer and a transform, what to do + result = datetime_series.apply( + lambda x: Series([x, x ** 2], index=["x", "x^2"]) + ) + expected = DataFrame({"x": datetime_series, "x^2": datetime_series ** 2}) + tm.assert_frame_equal(result, expected) + + result = datetime_series.agg(lambda x: Series([x, x ** 2], index=["x", "x^2"])) + tm.assert_frame_equal(result, expected) + + def test_replicate_describe(self, string_series): + # this also tests a result set that is all scalars + expected = string_series.describe() + result = string_series.apply( + { + "count": "count", + "mean": "mean", + "std": "std", + "min": "min", + "25%": lambda x: x.quantile(0.25), + "50%": "median", + "75%": lambda x: x.quantile(0.75), + "max": "max", + } + ) + tm.assert_series_equal(result, expected) + + def test_reduce(self, string_series): + # reductions with named functions + result = string_series.agg(["sum", "mean"]) + expected = Series( + [string_series.sum(), string_series.mean()], + ["sum", "mean"], + name=string_series.name, + ) + tm.assert_series_equal(result, expected) + + def test_non_callable_aggregates(self): + # test agg using non-callable series attributes + s = Series([1, 2, None]) + + # Calling agg w/ just a string arg same as calling s.arg + result = s.agg("size") + expected = s.size + assert result == expected + + # test when mixed w/ callable reducers + result = s.agg(["size", "count", "mean"]) + expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5}) + tm.assert_series_equal(result[expected.index], expected) + + @pytest.mark.parametrize( + "series, func, expected", + chain( + _get_cython_table_params( + Series(dtype=np.float64), + [ + ("sum", 0), + ("max", np.nan), + ("min", np.nan), + ("all", True), + ("any", False), + ("mean", np.nan), + ("prod", 1), + ("std", np.nan), + ("var", np.nan), + ("median", np.nan), + ], + ), + _get_cython_table_params( + Series([np.nan, 1, 2, 3]), + [ + ("sum", 6), + ("max", 3), + ("min", 1), + ("all", True), + ("any", True), + ("mean", 2), + ("prod", 6), + ("std", 1), + ("var", 1), + ("median", 2), + ], + ), + _get_cython_table_params( + Series("a b c".split()), + [ + ("sum", "abc"), + ("max", "c"), + ("min", "a"), + ("all", "c"), # see GH12863 + ("any", "a"), + ], + ), + ), + ) + def test_agg_cython_table(self, series, func, expected): + # GH21224 + # test reducing functions in + # pandas.core.base.SelectionMixin._cython_table + result = series.agg(func) + if tm.is_number(expected): + assert np.isclose(result, expected, equal_nan=True) + else: + assert result == expected + + @pytest.mark.parametrize( + "series, func, expected", + chain( + _get_cython_table_params( + Series(dtype=np.float64), + [ + ("cumprod", Series([], Index([]), dtype=np.float64)), + ("cumsum", Series([], Index([]), dtype=np.float64)), + ], + ), + _get_cython_table_params( + Series([np.nan, 1, 2, 3]), + [ + ("cumprod", Series([np.nan, 1, 2, 6])), + ("cumsum", Series([np.nan, 1, 3, 6])), + ], + ), + _get_cython_table_params( + Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))] + ), + ), + ) + def test_agg_cython_table_transform(self, series, func, expected): + # GH21224 + # test transforming functions in + # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) + result = series.agg(func) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "series, func, expected", + chain( + _get_cython_table_params( + Series("a b c".split()), + [ + ("mean", TypeError), # mean raises TypeError + ("prod", TypeError), + ("std", TypeError), + ("var", TypeError), + ("median", TypeError), + ("cumprod", TypeError), + ], + ) + ), + ) + def test_agg_cython_table_raises(self, series, func, expected): + # GH21224 + with pytest.raises(expected): + # e.g. Series('a b'.split()).cumprod() will raise + series.agg(func) + + +class TestSeriesMap: + def test_map(self, datetime_series): + index, data = tm.getMixedTypeDict() + + source = Series(data["B"], index=data["C"]) + target = Series(data["C"][:4], index=data["D"][:4]) + + merged = target.map(source) + + for k, v in merged.items(): + assert v == source[target[k]] + + # input could be a dict + merged = target.map(source.to_dict()) + + for k, v in merged.items(): + assert v == source[target[k]] + + # function + result = datetime_series.map(lambda x: x * 2) + tm.assert_series_equal(result, datetime_series * 2) + + # GH 10324 + a = Series([1, 2, 3, 4]) + b = Series(["even", "odd", "even", "odd"], dtype="category") + c = Series(["even", "odd", "even", "odd"]) + + exp = Series(["odd", "even", "odd", np.nan], dtype="category") + tm.assert_series_equal(a.map(b), exp) + exp = Series(["odd", "even", "odd", np.nan]) + tm.assert_series_equal(a.map(c), exp) + + a = Series(["a", "b", "c", "d"]) + b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"])) + c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"])) + + exp = Series([np.nan, 1, 2, 3]) + tm.assert_series_equal(a.map(b), exp) + exp = Series([np.nan, 1, 2, 3]) + tm.assert_series_equal(a.map(c), exp) + + a = Series(["a", "b", "c", "d"]) + b = Series( + ["B", "C", "D", "E"], + dtype="category", + index=pd.CategoricalIndex(["b", "c", "d", "e"]), + ) + c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"])) + + exp = Series( + pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"]) + ) + tm.assert_series_equal(a.map(b), exp) + exp = Series([np.nan, "B", "C", "D"]) + tm.assert_series_equal(a.map(c), exp) + + @pytest.mark.parametrize("index", tm.all_index_generator(10)) + def test_map_empty(self, index): + s = Series(index) + result = s.map({}) + + expected = pd.Series(np.nan, index=s.index) + tm.assert_series_equal(result, expected) + + def test_map_compat(self): + # related GH 8024 + s = Series([True, True, False], index=[1, 2, 3]) + result = s.map({True: "foo", False: "bar"}) + expected = Series(["foo", "foo", "bar"], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + def test_map_int(self): + left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4}) + right = Series({1: 11, 2: 22, 3: 33}) + + assert left.dtype == np.float_ + assert issubclass(right.dtype.type, np.integer) + + merged = left.map(right) + assert merged.dtype == np.float_ + assert isna(merged["d"]) + assert not isna(merged["c"]) + + def test_map_type_inference(self): + s = Series(range(3)) + s2 = s.map(lambda x: np.where(x == 0, 0, 1)) + assert issubclass(s2.dtype.type, np.integer) + + def test_map_decimal(self, string_series): + from decimal import Decimal + + result = string_series.map(lambda x: Decimal(str(x))) + assert result.dtype == np.object_ + assert isinstance(result[0], Decimal) + + def test_map_na_exclusion(self): + s = Series([1.5, np.nan, 3, np.nan, 5]) + + result = s.map(lambda x: x * 2, na_action="ignore") + exp = s * 2 + tm.assert_series_equal(result, exp) + + def test_map_dict_with_tuple_keys(self): + """ + Due to new MultiIndex-ing behaviour in v0.14.0, + dicts with tuple keys passed to map were being + converted to a multi-index, preventing tuple values + from being mapped properly. + """ + # GH 18496 + df = pd.DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]}) + label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"} + + df["labels"] = df["a"].map(label_mappings) + df["expected_labels"] = pd.Series(["A", "B", "A", "B"], index=df.index) + # All labels should be filled now + tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False) + + def test_map_counter(self): + s = Series(["a", "b", "c"], index=[1, 2, 3]) + counter = Counter() + counter["b"] = 5 + counter["c"] += 1 + result = s.map(counter) + expected = Series([0, 5, 1], index=[1, 2, 3]) + tm.assert_series_equal(result, expected) + + def test_map_defaultdict(self): + s = Series([1, 2, 3], index=["a", "b", "c"]) + default_dict = defaultdict(lambda: "blank") + default_dict[1] = "stuff" + result = s.map(default_dict) + expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"]) + tm.assert_series_equal(result, expected) + + def test_map_dict_na_key(self): + # https://github.com/pandas-dev/pandas/issues/17648 + # Checks that np.nan key is appropriately mapped + s = Series([1, 2, np.nan]) + expected = Series(["a", "b", "c"]) + result = s.map({1: "a", 2: "b", np.nan: "c"}) + tm.assert_series_equal(result, expected) + + def test_map_dict_subclass_with_missing(self): + """ + Test Series.map with a dictionary subclass that defines __missing__, + i.e. sets a default value (GH #15999). + """ + + class DictWithMissing(dict): + def __missing__(self, key): + return "missing" + + s = Series([1, 2, 3]) + dictionary = DictWithMissing({3: "three"}) + result = s.map(dictionary) + expected = Series(["missing", "missing", "three"]) + tm.assert_series_equal(result, expected) + + def test_map_dict_subclass_without_missing(self): + class DictWithoutMissing(dict): + pass + + s = Series([1, 2, 3]) + dictionary = DictWithoutMissing({3: "three"}) + result = s.map(dictionary) + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) + + def test_map_abc_mapping(self, non_mapping_dict_subclass): + # https://github.com/pandas-dev/pandas/issues/29733 + # Check collections.abc.Mapping support as mapper for Series.map + s = Series([1, 2, 3]) + not_a_dictionary = non_mapping_dict_subclass({3: "three"}) + result = s.map(not_a_dictionary) + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) + + def test_map_abc_mapping_with_missing(self, non_mapping_dict_subclass): + # https://github.com/pandas-dev/pandas/issues/29733 + # Check collections.abc.Mapping support as mapper for Series.map + class NonDictMappingWithMissing(non_mapping_dict_subclass): + def __missing__(self, key): + return "missing" + + s = Series([1, 2, 3]) + not_a_dictionary = NonDictMappingWithMissing({3: "three"}) + result = s.map(not_a_dictionary) + # __missing__ is a dict concept, not a Mapping concept, + # so it should not change the result! + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) + + def test_map_box(self): + vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")] + s = pd.Series(vals) + assert s.dtype == "datetime64[ns]" + # boxed value must be Timestamp instance + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + exp = pd.Series(["Timestamp_1_None", "Timestamp_2_None"]) + tm.assert_series_equal(res, exp) + + vals = [ + pd.Timestamp("2011-01-01", tz="US/Eastern"), + pd.Timestamp("2011-01-02", tz="US/Eastern"), + ] + s = pd.Series(vals) + assert s.dtype == "datetime64[ns, US/Eastern]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + exp = pd.Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) + tm.assert_series_equal(res, exp) + + # timedelta + vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] + s = pd.Series(vals) + assert s.dtype == "timedelta64[ns]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") + exp = pd.Series(["Timedelta_1", "Timedelta_2"]) + tm.assert_series_equal(res, exp) + + # period + vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] + s = pd.Series(vals) + assert s.dtype == "Period[M]" + res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") + exp = pd.Series(["Period_M", "Period_M"]) + tm.assert_series_equal(res, exp) + + def test_map_categorical(self): + values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) + s = pd.Series(values, name="XX", index=list("abcdefg")) + + result = s.map(lambda x: x.lower()) + exp_values = pd.Categorical( + list("abbabcd"), categories=list("dcba"), ordered=True + ) + exp = pd.Series(exp_values, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + tm.assert_categorical_equal(result.values, exp_values) + + result = s.map(lambda x: "A") + exp = pd.Series(["A"] * 7, name="XX", index=list("abcdefg")) + tm.assert_series_equal(result, exp) + assert result.dtype == np.object + + with pytest.raises(NotImplementedError): + s.map(lambda x: x, na_action="ignore") + + def test_map_datetimetz(self): + values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize( + "Asia/Tokyo" + ) + s = pd.Series(values, name="XX") + + # keep tz + result = s.map(lambda x: x + pd.offsets.Day()) + exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize( + "Asia/Tokyo" + ) + exp = pd.Series(exp_values, name="XX") + tm.assert_series_equal(result, exp) + + # change dtype + # GH 14506 : Returned dtype changed from int32 to int64 + result = s.map(lambda x: x.hour) + exp = pd.Series(list(range(24)) + [0], name="XX", dtype=np.int64) + tm.assert_series_equal(result, exp) + + with pytest.raises(NotImplementedError): + s.map(lambda x: x, na_action="ignore") + + # not vectorized + def f(x): + if not isinstance(x, pd.Timestamp): + raise ValueError + return str(x.tz) + + result = s.map(f) + exp = pd.Series(["Asia/Tokyo"] * 25, name="XX") + tm.assert_series_equal(result, exp) + + @pytest.mark.parametrize( + "vals,mapping,exp", + [ + (list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]), + (list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3), + (list(range(3)), {0: 42}, [42] + [np.nan] * 3), + ], + ) + def test_map_missing_mixed(self, vals, mapping, exp): + # GH20495 + s = pd.Series(vals + [np.nan]) + result = s.map(mapping) + + tm.assert_series_equal(result, pd.Series(exp)) + + @pytest.mark.parametrize( + "dti,exp", + [ + ( + Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])), + DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"), + ), + ( + tm.makeTimeSeries(nper=30), + DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"), + ), + ], + ) + def test_apply_series_on_date_time_index_aware_series(self, dti, exp): + # GH 25959 + # Calling apply on a localized time series should not cause an error + index = dti.tz_localize("UTC").index + result = pd.Series(index).apply(lambda x: pd.Series([1, 2])) + tm.assert_frame_equal(result, exp) + + def test_apply_scaler_on_date_time_index_aware_series(self): + # GH 25959 + # Calling apply on a localized time series should not cause an error + series = tm.makeTimeSeries(nper=30).tz_localize("UTC") + result = pd.Series(series.index).apply(lambda x: 1) + tm.assert_series_equal(result, pd.Series(np.ones(30), dtype="int64")) + + def test_map_float_to_string_precision(self): + # GH 13228 + ser = pd.Series(1 / 3) + result = ser.map(lambda val: str(val)).to_dict() + expected = {0: "0.3333333333333333"} + assert result == expected diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py new file mode 100644 index 00000000..f3ffdc37 --- /dev/null +++ b/pandas/tests/series/test_arithmetic.py @@ -0,0 +1,205 @@ +import operator + +import numpy as np +import pytest + +from pandas._libs.tslibs import IncompatibleFrequency + +import pandas as pd +from pandas import Series +import pandas._testing as tm + + +def _permute(obj): + return obj.take(np.random.permutation(len(obj))) + + +class TestSeriesFlexArithmetic: + @pytest.mark.parametrize( + "ts", + [ + (lambda x: x, lambda x: x * 2, False), + (lambda x: x, lambda x: x[::2], False), + (lambda x: x, lambda x: 5, True), + (lambda x: tm.makeFloatSeries(), lambda x: tm.makeFloatSeries(), True), + ], + ) + @pytest.mark.parametrize( + "opname", ["add", "sub", "mul", "floordiv", "truediv", "pow"] + ) + def test_flex_method_equivalence(self, opname, ts): + # check that Series.{opname} behaves like Series.__{opname}__, + tser = tm.makeTimeSeries().rename("ts") + + series = ts[0](tser) + other = ts[1](tser) + check_reverse = ts[2] + + op = getattr(Series, opname) + alt = getattr(operator, opname) + + result = op(series, other) + expected = alt(series, other) + tm.assert_almost_equal(result, expected) + if check_reverse: + rop = getattr(Series, "r" + opname) + result = rop(series, other) + expected = alt(other, series) + tm.assert_almost_equal(result, expected) + + def test_flex_method_subclass_metadata_preservation(self, all_arithmetic_operators): + # GH 13208 + class MySeries(Series): + _metadata = ["x"] + + @property + def _constructor(self): + return MySeries + + opname = all_arithmetic_operators + op = getattr(Series, opname) + m = MySeries([1, 2, 3], name="test") + m.x = 42 + result = op(m, 1) + assert result.x == 42 + + +class TestSeriesArithmetic: + # Some of these may end up in tests/arithmetic, but are not yet sorted + + def test_add_series_with_period_index(self): + rng = pd.period_range("1/1/2000", "1/1/2010", freq="A") + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts + ts[::2] + expected = ts + ts + expected[1::2] = np.nan + tm.assert_series_equal(result, expected) + + result = ts + _permute(ts[::2]) + tm.assert_series_equal(result, expected) + + msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" + with pytest.raises(IncompatibleFrequency, match=msg): + ts + ts.asfreq("D", how="end") + + @pytest.mark.parametrize( + "target_add,input_value,expected_value", + [ + ("!", ["hello", "world"], ["hello!", "world!"]), + ("m", ["hello", "world"], ["hellom", "worldm"]), + ], + ) + def test_string_addition(self, target_add, input_value, expected_value): + # GH28658 - ensure adding 'm' does not raise an error + a = Series(input_value) + + result = a + target_add + expected = Series(expected_value) + tm.assert_series_equal(result, expected) + + +# ------------------------------------------------------------------ +# Comparisons + + +class TestSeriesFlexComparison: + def test_comparison_flex_basic(self): + left = pd.Series(np.random.randn(10)) + right = pd.Series(np.random.randn(10)) + + tm.assert_series_equal(left.eq(right), left == right) + tm.assert_series_equal(left.ne(right), left != right) + tm.assert_series_equal(left.le(right), left < right) + tm.assert_series_equal(left.lt(right), left <= right) + tm.assert_series_equal(left.gt(right), left > right) + tm.assert_series_equal(left.ge(right), left >= right) + + # axis + for axis in [0, None, "index"]: + tm.assert_series_equal(left.eq(right, axis=axis), left == right) + tm.assert_series_equal(left.ne(right, axis=axis), left != right) + tm.assert_series_equal(left.le(right, axis=axis), left < right) + tm.assert_series_equal(left.lt(right, axis=axis), left <= right) + tm.assert_series_equal(left.gt(right, axis=axis), left > right) + tm.assert_series_equal(left.ge(right, axis=axis), left >= right) + + # + msg = "No axis named 1 for object type" + for op in ["eq", "ne", "le", "le", "gt", "ge"]: + with pytest.raises(ValueError, match=msg): + getattr(left, op)(right, axis=1) + + +class TestSeriesComparison: + def test_comparison_different_length(self): + a = Series(["a", "b", "c"]) + b = Series(["b", "a"]) + with pytest.raises(ValueError): + a < b + + a = Series([1, 2]) + b = Series([2, 3, 4]) + with pytest.raises(ValueError): + a == b + + @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) + def test_ser_flex_cmp_return_dtypes(self, opname): + # GH#15115 + ser = Series([1, 3, 2], index=range(3)) + const = 2 + result = getattr(ser, opname)(const).dtypes + expected = np.dtype("bool") + assert result == expected + + @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"]) + def test_ser_flex_cmp_return_dtypes_empty(self, opname): + # GH#15115 empty Series case + ser = Series([1, 3, 2], index=range(3)) + empty = ser.iloc[:0] + const = 2 + result = getattr(empty, opname)(const).dtypes + expected = np.dtype("bool") + assert result == expected + + @pytest.mark.parametrize( + "op", + [operator.eq, operator.ne, operator.le, operator.lt, operator.ge, operator.gt], + ) + @pytest.mark.parametrize( + "names", [(None, None, None), ("foo", "bar", None), ("baz", "baz", "baz")] + ) + def test_ser_cmp_result_names(self, names, op): + # datetime64 dtype + dti = pd.date_range("1949-06-07 03:00:00", freq="H", periods=5, name=names[0]) + ser = Series(dti).rename(names[1]) + result = op(ser, dti) + assert result.name == names[2] + + # datetime64tz dtype + dti = dti.tz_localize("US/Central") + ser = Series(dti).rename(names[1]) + result = op(ser, dti) + assert result.name == names[2] + + # timedelta64 dtype + tdi = dti - dti.shift(1) + ser = Series(tdi).rename(names[1]) + result = op(ser, tdi) + assert result.name == names[2] + + # interval dtype + if op in [operator.eq, operator.ne]: + # interval dtype comparisons not yet implemented + ii = pd.interval_range(start=0, periods=5, name=names[0]) + ser = Series(ii).rename(names[1]) + result = op(ser, ii) + assert result.name == names[2] + + # categorical + if op in [operator.eq, operator.ne]: + # categorical dtype comparisons raise for inequalities + cidx = tdi.astype("category") + ser = Series(cidx).rename(names[1]) + result = op(ser, cidx) + assert result.name == names[2] diff --git a/pandas/tests/series/test_block_internals.py b/pandas/tests/series/test_block_internals.py new file mode 100644 index 00000000..18e75c3b --- /dev/null +++ b/pandas/tests/series/test_block_internals.py @@ -0,0 +1,39 @@ +import pandas as pd + +# Segregated collection of methods that require the BlockManager internal data +# structure + + +class TestSeriesBlockInternals: + def test_setitem_invalidates_datetime_index_freq(self): + # GH#24096 altering a datetime64tz Series inplace invalidates the + # `freq` attribute on the underlying DatetimeIndex + + dti = pd.date_range("20130101", periods=3, tz="US/Eastern") + ts = dti[1] + ser = pd.Series(dti) + assert ser._values is not dti + assert ser._values._data.base is not dti._data._data.base + assert dti.freq == "D" + ser.iloc[1] = pd.NaT + assert ser._values.freq is None + + # check that the DatetimeIndex was not altered in place + assert ser._values is not dti + assert ser._values._data.base is not dti._data._data.base + assert dti[1] == ts + assert dti.freq == "D" + + def test_dt64tz_setitem_does_not_mutate_dti(self): + # GH#21907, GH#24096 + dti = pd.date_range("2016-01-01", periods=10, tz="US/Pacific") + ts = dti[0] + ser = pd.Series(dti) + assert ser._values is not dti + assert ser._values._data.base is not dti._data._data.base + assert ser._data.blocks[0].values is not dti + assert ser._data.blocks[0].values._data.base is not dti._data._data.base + + ser[::3] = pd.NaT + assert ser[0] is pd.NaT + assert dti[0] == ts diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py new file mode 100644 index 00000000..239353d3 --- /dev/null +++ b/pandas/tests/series/test_combine_concat.py @@ -0,0 +1,267 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm + + +class TestSeriesCombine: + def test_combine_scalar(self): + # GH 21248 + # Note - combine() with another Series is tested elsewhere because + # it is used when testing operators + s = pd.Series([i * 10 for i in range(5)]) + result = s.combine(3, lambda x, y: x + y) + expected = pd.Series([i * 10 + 3 for i in range(5)]) + tm.assert_series_equal(result, expected) + + result = s.combine(22, lambda x, y: min(x, y)) + expected = pd.Series([min(i * 10, 22) for i in range(5)]) + tm.assert_series_equal(result, expected) + + def test_combine_first(self): + values = tm.makeIntIndex(20).values.astype(float) + series = Series(values, index=tm.makeIntIndex(20)) + + series_copy = series * 2 + series_copy[::2] = np.NaN + + # nothing used from the input + combined = series.combine_first(series_copy) + + tm.assert_series_equal(combined, series) + + # Holes filled from input + combined = series_copy.combine_first(series) + assert np.isfinite(combined).all() + + tm.assert_series_equal(combined[::2], series[::2]) + tm.assert_series_equal(combined[1::2], series_copy[1::2]) + + # mixed types + index = tm.makeStringIndex(20) + floats = Series(tm.randn(20), index=index) + strings = Series(tm.makeStringIndex(10), index=index[::2]) + + combined = strings.combine_first(floats) + + tm.assert_series_equal(strings, combined.loc[index[::2]]) + tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]]) + + # corner case + s = Series([1.0, 2, 3], index=[0, 1, 2]) + empty = Series([], index=[], dtype=object) + result = s.combine_first(empty) + s.index = s.index.astype("O") + tm.assert_series_equal(s, result) + + def test_update(self): + s = Series([1.5, np.nan, 3.0, 4.0, np.nan]) + s2 = Series([np.nan, 3.5, np.nan, 5.0]) + s.update(s2) + + expected = Series([1.5, 3.5, 3.0, 5.0, np.nan]) + tm.assert_series_equal(s, expected) + + # GH 3217 + df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) + df["c"] = np.nan + + df["c"].update(Series(["foo"], index=[0])) + expected = DataFrame( + [[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"] + ) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize( + "other, dtype, expected", + [ + # other is int + ([61, 63], "int32", pd.Series([10, 61, 12], dtype="int32")), + ([61, 63], "int64", pd.Series([10, 61, 12])), + ([61, 63], float, pd.Series([10.0, 61.0, 12.0])), + ([61, 63], object, pd.Series([10, 61, 12], dtype=object)), + # other is float, but can be cast to int + ([61.0, 63.0], "int32", pd.Series([10, 61, 12], dtype="int32")), + ([61.0, 63.0], "int64", pd.Series([10, 61, 12])), + ([61.0, 63.0], float, pd.Series([10.0, 61.0, 12.0])), + ([61.0, 63.0], object, pd.Series([10, 61.0, 12], dtype=object)), + # others is float, cannot be cast to int + ([61.1, 63.1], "int32", pd.Series([10.0, 61.1, 12.0])), + ([61.1, 63.1], "int64", pd.Series([10.0, 61.1, 12.0])), + ([61.1, 63.1], float, pd.Series([10.0, 61.1, 12.0])), + ([61.1, 63.1], object, pd.Series([10, 61.1, 12], dtype=object)), + # other is object, cannot be cast + ([(61,), (63,)], "int32", pd.Series([10, (61,), 12])), + ([(61,), (63,)], "int64", pd.Series([10, (61,), 12])), + ([(61,), (63,)], float, pd.Series([10.0, (61,), 12.0])), + ([(61,), (63,)], object, pd.Series([10, (61,), 12])), + ], + ) + def test_update_dtypes(self, other, dtype, expected): + + s = Series([10, 11, 12], dtype=dtype) + other = Series(other, index=[1, 3]) + s.update(other) + + tm.assert_series_equal(s, expected) + + def test_concat_empty_series_dtypes_roundtrips(self): + + # round-tripping with self & like self + dtypes = map(np.dtype, ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"]) + + for dtype in dtypes: + assert pd.concat([Series(dtype=dtype)]).dtype == dtype + assert pd.concat([Series(dtype=dtype), Series(dtype=dtype)]).dtype == dtype + + def int_result_type(dtype, dtype2): + typs = {dtype.kind, dtype2.kind} + if not len(typs - {"i", "u", "b"}) and ( + dtype.kind == "i" or dtype2.kind == "i" + ): + return "i" + elif not len(typs - {"u", "b"}) and ( + dtype.kind == "u" or dtype2.kind == "u" + ): + return "u" + return None + + def float_result_type(dtype, dtype2): + typs = {dtype.kind, dtype2.kind} + if not len(typs - {"f", "i", "u"}) and ( + dtype.kind == "f" or dtype2.kind == "f" + ): + return "f" + return None + + def get_result_type(dtype, dtype2): + result = float_result_type(dtype, dtype2) + if result is not None: + return result + result = int_result_type(dtype, dtype2) + if result is not None: + return result + return "O" + + for dtype in dtypes: + for dtype2 in dtypes: + if dtype == dtype2: + continue + + expected = get_result_type(dtype, dtype2) + result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype + assert result.kind == expected + + def test_combine_first_dt_tz_values(self, tz_naive_fixture): + ser1 = pd.Series( + pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture), + name="ser1", + ) + ser2 = pd.Series( + pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture), + index=[2, 3, 4], + name="ser2", + ) + result = ser1.combine_first(ser2) + exp_vals = pd.DatetimeIndex( + ["20150101", "20150102", "20150103", "20160515", "20160516"], + tz=tz_naive_fixture, + ) + exp = pd.Series(exp_vals, name="ser1") + tm.assert_series_equal(exp, result) + + def test_concat_empty_series_dtypes(self): + + # booleans + assert ( + pd.concat([Series(dtype=np.bool_), Series(dtype=np.int32)]).dtype + == np.int32 + ) + assert ( + pd.concat([Series(dtype=np.bool_), Series(dtype=np.float32)]).dtype + == np.object_ + ) + + # datetime-like + assert ( + pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.bool)]).dtype + == np.object_ + ) + assert ( + pd.concat([Series(dtype="m8[ns]"), Series(dtype=np.int64)]).dtype + == np.object_ + ) + assert ( + pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.bool)]).dtype + == np.object_ + ) + assert ( + pd.concat([Series(dtype="M8[ns]"), Series(dtype=np.int64)]).dtype + == np.object_ + ) + assert ( + pd.concat( + [Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)] + ).dtype + == np.object_ + ) + + # categorical + assert ( + pd.concat([Series(dtype="category"), Series(dtype="category")]).dtype + == "category" + ) + # GH 18515 + assert ( + pd.concat( + [Series(np.array([]), dtype="category"), Series(dtype="float64")] + ).dtype + == "float64" + ) + assert ( + pd.concat([Series(dtype="category"), Series(dtype="object")]).dtype + == "object" + ) + + # sparse + # TODO: move? + result = pd.concat( + [ + Series(dtype="float64").astype("Sparse"), + Series(dtype="float64").astype("Sparse"), + ] + ) + assert result.dtype == "Sparse[float64]" + + result = pd.concat( + [Series(dtype="float64").astype("Sparse"), Series(dtype="float64")] + ) + # TODO: release-note: concat sparse dtype + expected = pd.SparseDtype(np.float64) + assert result.dtype == expected + + result = pd.concat( + [Series(dtype="float64").astype("Sparse"), Series(dtype="object")] + ) + # TODO: release-note: concat sparse dtype + expected = pd.SparseDtype("object") + assert result.dtype == expected + + def test_combine_first_dt64(self): + from pandas.core.tools.datetimes import to_datetime + + s0 = to_datetime(Series(["2010", np.NaN])) + s1 = to_datetime(Series([np.NaN, "2011"])) + rs = s0.combine_first(s1) + xp = to_datetime(Series(["2010", "2011"])) + tm.assert_series_equal(rs, xp) + + s0 = to_datetime(Series(["2010", np.NaN])) + s1 = Series([np.NaN, "2011"]) + rs = s0.combine_first(s1) + xp = Series([datetime(2010, 1, 1), "2011"]) + tm.assert_series_equal(rs, xp) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py new file mode 100644 index 00000000..c38e5708 --- /dev/null +++ b/pandas/tests/series/test_constructors.py @@ -0,0 +1,1406 @@ +from collections import OrderedDict +from datetime import datetime, timedelta + +import numpy as np +import numpy.ma as ma +import pytest + +from pandas._libs import lib +from pandas._libs.tslib import iNaT + +from pandas.core.dtypes.common import is_categorical_dtype, is_datetime64tz_dtype +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + IntervalIndex, + MultiIndex, + NaT, + Series, + Timestamp, + date_range, + isna, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays import IntervalArray, period_array + + +class TestSeriesConstructors: + @pytest.mark.parametrize( + "constructor,check_index_type", + [ + # NOTE: some overlap with test_constructor_empty but that test does not + # test for None or an empty generator. + # test_constructor_pass_none tests None but only with the index also + # passed. + (lambda: Series(), True), + (lambda: Series(None), True), + (lambda: Series({}), True), + (lambda: Series(()), False), # creates a RangeIndex + (lambda: Series([]), False), # creates a RangeIndex + (lambda: Series((_ for _ in [])), False), # creates a RangeIndex + (lambda: Series(data=None), True), + (lambda: Series(data={}), True), + (lambda: Series(data=()), False), # creates a RangeIndex + (lambda: Series(data=[]), False), # creates a RangeIndex + (lambda: Series(data=(_ for _ in [])), False), # creates a RangeIndex + ], + ) + def test_empty_constructor(self, constructor, check_index_type): + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + expected = Series() + result = constructor() + + assert len(result.index) == 0 + tm.assert_series_equal(result, expected, check_index_type=check_index_type) + + def test_invalid_dtype(self): + # GH15520 + msg = "not understood" + invalid_list = [pd.Timestamp, "pd.Timestamp", list] + for dtype in invalid_list: + with pytest.raises(TypeError, match=msg): + Series([], name="time", dtype=dtype) + + def test_invalid_compound_dtype(self): + # GH#13296 + c_dtype = np.dtype([("a", "i8"), ("b", "f4")]) + cdt_arr = np.array([(1, 0.4), (256, -13)], dtype=c_dtype) + + with pytest.raises(ValueError, match="Use DataFrame instead"): + Series(cdt_arr, index=["A", "B"]) + + def test_scalar_conversion(self): + + # Pass in scalar is disabled + scalar = Series(0.5) + assert not isinstance(scalar, float) + + # Coercion + assert float(Series([1.0])) == 1.0 + assert int(Series([1.0])) == 1 + + def test_constructor(self, datetime_series): + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + empty_series = Series() + assert datetime_series.index.is_all_dates + + # Pass in Series + derived = Series(datetime_series) + assert derived.index.is_all_dates + + assert tm.equalContents(derived.index, datetime_series.index) + # Ensure new index is not created + assert id(datetime_series.index) == id(derived.index) + + # Mixed type Series + mixed = Series(["hello", np.NaN], index=[0, 1]) + assert mixed.dtype == np.object_ + assert mixed[1] is np.NaN + + assert not empty_series.index.is_all_dates + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + assert not Series().index.is_all_dates + + # exception raised is of type Exception + with pytest.raises(Exception, match="Data must be 1-dimensional"): + Series(np.random.randn(3, 3), index=np.arange(3)) + + mixed.name = "Series" + rs = Series(mixed).name + xp = "Series" + assert rs == xp + + # raise on MultiIndex GH4187 + m = MultiIndex.from_arrays([[1, 2], [3, 4]]) + msg = "initializing a Series from a MultiIndex is not supported" + with pytest.raises(NotImplementedError, match=msg): + Series(m) + + @pytest.mark.parametrize("input_class", [list, dict, OrderedDict]) + def test_constructor_empty(self, input_class): + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + empty = Series() + empty2 = Series(input_class()) + + # these are Index() and RangeIndex() which don't compare type equal + # but are just .equals + tm.assert_series_equal(empty, empty2, check_index_type=False) + + # With explicit dtype: + empty = Series(dtype="float64") + empty2 = Series(input_class(), dtype="float64") + tm.assert_series_equal(empty, empty2, check_index_type=False) + + # GH 18515 : with dtype=category: + empty = Series(dtype="category") + empty2 = Series(input_class(), dtype="category") + tm.assert_series_equal(empty, empty2, check_index_type=False) + + if input_class is not list: + # With index: + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + empty = Series(index=range(10)) + empty2 = Series(input_class(), index=range(10)) + tm.assert_series_equal(empty, empty2) + + # With index and dtype float64: + empty = Series(np.nan, index=range(10)) + empty2 = Series(input_class(), index=range(10), dtype="float64") + tm.assert_series_equal(empty, empty2) + + # GH 19853 : with empty string, index and dtype str + empty = Series("", dtype=str, index=range(3)) + empty2 = Series("", index=range(3)) + tm.assert_series_equal(empty, empty2) + + @pytest.mark.parametrize("input_arg", [np.nan, float("nan")]) + def test_constructor_nan(self, input_arg): + empty = Series(dtype="float64", index=range(10)) + empty2 = Series(input_arg, index=range(10)) + + tm.assert_series_equal(empty, empty2, check_index_type=False) + + @pytest.mark.parametrize( + "dtype", + ["f8", "i8", "M8[ns]", "m8[ns]", "category", "object", "datetime64[ns, UTC]"], + ) + @pytest.mark.parametrize("index", [None, pd.Index([])]) + def test_constructor_dtype_only(self, dtype, index): + # GH-20865 + result = pd.Series(dtype=dtype, index=index) + assert result.dtype == dtype + assert len(result) == 0 + + def test_constructor_no_data_index_order(self): + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + result = pd.Series(index=["b", "a", "c"]) + assert result.index.tolist() == ["b", "a", "c"] + + def test_constructor_no_data_string_type(self): + # GH 22477 + result = pd.Series(index=[1], dtype=str) + assert np.isnan(result.iloc[0]) + + @pytest.mark.parametrize("item", ["entry", "ѐ", 13]) + def test_constructor_string_element_string_type(self, item): + # GH 22477 + result = pd.Series(item, index=[1], dtype=str) + assert result.iloc[0] == str(item) + + def test_constructor_dtype_str_na_values(self, string_dtype): + # https://github.com/pandas-dev/pandas/issues/21083 + ser = Series(["x", None], dtype=string_dtype) + result = ser.isna() + expected = Series([False, True]) + tm.assert_series_equal(result, expected) + assert ser.iloc[1] is None + + ser = Series(["x", np.nan], dtype=string_dtype) + assert np.isnan(ser.iloc[1]) + + def test_constructor_series(self): + index1 = ["d", "b", "a", "c"] + index2 = sorted(index1) + s1 = Series([4, 7, -5, 3], index=index1) + s2 = Series(s1, index=index2) + + tm.assert_series_equal(s2, s1.sort_index()) + + def test_constructor_iterable(self): + # GH 21987 + class Iter: + def __iter__(self): + for i in range(10): + yield i + + expected = Series(list(range(10)), dtype="int64") + result = Series(Iter(), dtype="int64") + tm.assert_series_equal(result, expected) + + def test_constructor_sequence(self): + # GH 21987 + expected = Series(list(range(10)), dtype="int64") + result = Series(range(10), dtype="int64") + tm.assert_series_equal(result, expected) + + def test_constructor_single_str(self): + # GH 21987 + expected = Series(["abc"]) + result = Series("abc") + tm.assert_series_equal(result, expected) + + def test_constructor_list_like(self): + + # make sure that we are coercing different + # list-likes to standard dtypes and not + # platform specific + expected = Series([1, 2, 3], dtype="int64") + for obj in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3], dtype="int64")]: + result = Series(obj, index=[0, 1, 2]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["bool", "int32", "int64", "float64"]) + def test_constructor_index_dtype(self, dtype): + # GH 17088 + + s = Series(Index([0, 2, 4]), dtype=dtype) + assert s.dtype == dtype + + @pytest.mark.parametrize( + "input_vals", + [ + ([1, 2]), + (["1", "2"]), + (list(pd.date_range("1/1/2011", periods=2, freq="H"))), + (list(pd.date_range("1/1/2011", periods=2, freq="H", tz="US/Eastern"))), + ([pd.Interval(left=0, right=5)]), + ], + ) + def test_constructor_list_str(self, input_vals, string_dtype): + # GH 16605 + # Ensure that data elements from a list are converted to strings + # when dtype is str, 'str', or 'U' + result = Series(input_vals, dtype=string_dtype) + expected = Series(input_vals).astype(string_dtype) + tm.assert_series_equal(result, expected) + + def test_constructor_list_str_na(self, string_dtype): + result = Series([1.0, 2.0, np.nan], dtype=string_dtype) + expected = Series(["1.0", "2.0", np.nan], dtype=object) + tm.assert_series_equal(result, expected) + assert np.isnan(result[2]) + + def test_constructor_generator(self): + gen = (i for i in range(10)) + + result = Series(gen) + exp = Series(range(10)) + tm.assert_series_equal(result, exp) + + gen = (i for i in range(10)) + result = Series(gen, index=range(10, 20)) + exp.index = range(10, 20) + tm.assert_series_equal(result, exp) + + def test_constructor_map(self): + # GH8909 + m = map(lambda x: x, range(10)) + + result = Series(m) + exp = Series(range(10)) + tm.assert_series_equal(result, exp) + + m = map(lambda x: x, range(10)) + result = Series(m, index=range(10, 20)) + exp.index = range(10, 20) + tm.assert_series_equal(result, exp) + + def test_constructor_categorical(self): + cat = pd.Categorical([0, 1, 2, 0, 1, 2], ["a", "b", "c"], fastpath=True) + res = Series(cat) + tm.assert_categorical_equal(res.values, cat) + + # can cast to a new dtype + result = Series(pd.Categorical([1, 2, 3]), dtype="int64") + expected = pd.Series([1, 2, 3], dtype="int64") + tm.assert_series_equal(result, expected) + + # GH12574 + cat = Series(pd.Categorical([1, 2, 3]), dtype="category") + assert is_categorical_dtype(cat) + assert is_categorical_dtype(cat.dtype) + s = Series([1, 2, 3], dtype="category") + assert is_categorical_dtype(s) + assert is_categorical_dtype(s.dtype) + + def test_constructor_categorical_with_coercion(self): + factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]) + # test basic creation / coercion of categoricals + s = Series(factor, name="A") + assert s.dtype == "category" + assert len(s) == len(factor) + str(s.values) + str(s) + + # in a frame + df = DataFrame({"A": factor}) + result = df["A"] + tm.assert_series_equal(result, s) + result = df.iloc[:, 0] + tm.assert_series_equal(result, s) + assert len(df) == len(factor) + str(df.values) + str(df) + + df = DataFrame({"A": s}) + result = df["A"] + tm.assert_series_equal(result, s) + assert len(df) == len(factor) + str(df.values) + str(df) + + # multiples + df = DataFrame({"A": s, "B": s, "C": 1}) + result1 = df["A"] + result2 = df["B"] + tm.assert_series_equal(result1, s) + tm.assert_series_equal(result2, s, check_names=False) + assert result2.name == "B" + assert len(df) == len(factor) + str(df.values) + str(df) + + # GH8623 + x = DataFrame( + [[1, "John P. Doe"], [2, "Jane Dove"], [1, "John P. Doe"]], + columns=["person_id", "person_name"], + ) + x["person_name"] = Categorical(x.person_name) # doing this breaks transform + + expected = x.iloc[0].person_name + result = x.person_name.iloc[0] + assert result == expected + + result = x.person_name[0] + assert result == expected + + result = x.person_name.loc[0] + assert result == expected + + def test_constructor_categorical_dtype(self): + result = pd.Series( + ["a", "b"], dtype=CategoricalDtype(["a", "b", "c"], ordered=True) + ) + assert is_categorical_dtype(result) is True + tm.assert_index_equal(result.cat.categories, pd.Index(["a", "b", "c"])) + assert result.cat.ordered + + result = pd.Series(["a", "b"], dtype=CategoricalDtype(["b", "a"])) + assert is_categorical_dtype(result) + tm.assert_index_equal(result.cat.categories, pd.Index(["b", "a"])) + assert result.cat.ordered is False + + # GH 19565 - Check broadcasting of scalar with Categorical dtype + result = Series( + "a", index=[0, 1], dtype=CategoricalDtype(["a", "b"], ordered=True) + ) + expected = Series( + ["a", "a"], index=[0, 1], dtype=CategoricalDtype(["a", "b"], ordered=True) + ) + tm.assert_series_equal(result, expected, check_categorical=True) + + def test_constructor_categorical_string(self): + # GH 26336: the string 'category' maintains existing CategoricalDtype + cdt = CategoricalDtype(categories=list("dabc"), ordered=True) + expected = Series(list("abcabc"), dtype=cdt) + + # Series(Categorical, dtype='category') keeps existing dtype + cat = Categorical(list("abcabc"), dtype=cdt) + result = Series(cat, dtype="category") + tm.assert_series_equal(result, expected) + + # Series(Series[Categorical], dtype='category') keeps existing dtype + result = Series(result, dtype="category") + tm.assert_series_equal(result, expected) + + def test_categorical_sideeffects_free(self): + # Passing a categorical to a Series and then changing values in either + # the series or the categorical should not change the values in the + # other one, IF you specify copy! + cat = Categorical(["a", "b", "c", "a"]) + s = Series(cat, copy=True) + assert s.cat is not cat + s.cat.categories = [1, 2, 3] + exp_s = np.array([1, 2, 3, 1], dtype=np.int64) + exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) + tm.assert_numpy_array_equal(s.__array__(), exp_s) + tm.assert_numpy_array_equal(cat.__array__(), exp_cat) + + # setting + s[0] = 2 + exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) + tm.assert_numpy_array_equal(s.__array__(), exp_s2) + tm.assert_numpy_array_equal(cat.__array__(), exp_cat) + + # however, copy is False by default + # so this WILL change values + cat = Categorical(["a", "b", "c", "a"]) + s = Series(cat) + assert s.values is cat + s.cat.categories = [1, 2, 3] + exp_s = np.array([1, 2, 3, 1], dtype=np.int64) + tm.assert_numpy_array_equal(s.__array__(), exp_s) + tm.assert_numpy_array_equal(cat.__array__(), exp_s) + + s[0] = 2 + exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) + tm.assert_numpy_array_equal(s.__array__(), exp_s2) + tm.assert_numpy_array_equal(cat.__array__(), exp_s2) + + def test_unordered_compare_equal(self): + left = pd.Series(["a", "b", "c"], dtype=CategoricalDtype(["a", "b"])) + right = pd.Series(pd.Categorical(["a", "b", np.nan], categories=["a", "b"])) + tm.assert_series_equal(left, right) + + def test_constructor_maskedarray(self): + data = ma.masked_all((3,), dtype=float) + result = Series(data) + expected = Series([np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + data[0] = 0.0 + data[2] = 2.0 + index = ["a", "b", "c"] + result = Series(data, index=index) + expected = Series([0.0, np.nan, 2.0], index=index) + tm.assert_series_equal(result, expected) + + data[1] = 1.0 + result = Series(data, index=index) + expected = Series([0.0, 1.0, 2.0], index=index) + tm.assert_series_equal(result, expected) + + data = ma.masked_all((3,), dtype=int) + result = Series(data) + expected = Series([np.nan, np.nan, np.nan], dtype=float) + tm.assert_series_equal(result, expected) + + data[0] = 0 + data[2] = 2 + index = ["a", "b", "c"] + result = Series(data, index=index) + expected = Series([0, np.nan, 2], index=index, dtype=float) + tm.assert_series_equal(result, expected) + + data[1] = 1 + result = Series(data, index=index) + expected = Series([0, 1, 2], index=index, dtype=int) + tm.assert_series_equal(result, expected) + + data = ma.masked_all((3,), dtype=bool) + result = Series(data) + expected = Series([np.nan, np.nan, np.nan], dtype=object) + tm.assert_series_equal(result, expected) + + data[0] = True + data[2] = False + index = ["a", "b", "c"] + result = Series(data, index=index) + expected = Series([True, np.nan, False], index=index, dtype=object) + tm.assert_series_equal(result, expected) + + data[1] = True + result = Series(data, index=index) + expected = Series([True, True, False], index=index, dtype=bool) + tm.assert_series_equal(result, expected) + + data = ma.masked_all((3,), dtype="M8[ns]") + result = Series(data) + expected = Series([iNaT, iNaT, iNaT], dtype="M8[ns]") + tm.assert_series_equal(result, expected) + + data[0] = datetime(2001, 1, 1) + data[2] = datetime(2001, 1, 3) + index = ["a", "b", "c"] + result = Series(data, index=index) + expected = Series( + [datetime(2001, 1, 1), iNaT, datetime(2001, 1, 3)], + index=index, + dtype="M8[ns]", + ) + tm.assert_series_equal(result, expected) + + data[1] = datetime(2001, 1, 2) + result = Series(data, index=index) + expected = Series( + [datetime(2001, 1, 1), datetime(2001, 1, 2), datetime(2001, 1, 3)], + index=index, + dtype="M8[ns]", + ) + tm.assert_series_equal(result, expected) + + def test_constructor_maskedarray_hardened(self): + # Check numpy masked arrays with hard masks -- from GH24574 + data = ma.masked_all((3,), dtype=float).harden_mask() + result = pd.Series(data) + expected = pd.Series([np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + def test_series_ctor_plus_datetimeindex(self): + rng = date_range("20090415", "20090519", freq="B") + data = {k: 1 for k in rng} + + result = Series(data, index=rng) + assert result.index is rng + + def test_constructor_default_index(self): + s = Series([0, 1, 2]) + tm.assert_index_equal(s.index, pd.Index(np.arange(3))) + + @pytest.mark.parametrize( + "input", + [ + [1, 2, 3], + (1, 2, 3), + list(range(3)), + pd.Categorical(["a", "b", "a"]), + (i for i in range(3)), + map(lambda x: x, range(3)), + ], + ) + def test_constructor_index_mismatch(self, input): + # GH 19342 + # test that construction of a Series with an index of different length + # raises an error + msg = "Length of passed values is 3, index implies 4" + with pytest.raises(ValueError, match=msg): + Series(input, index=np.arange(4)) + + def test_constructor_numpy_scalar(self): + # GH 19342 + # construction with a numpy scalar + # should not raise + result = Series(np.array(100), index=np.arange(4), dtype="int64") + expected = Series(100, index=np.arange(4), dtype="int64") + tm.assert_series_equal(result, expected) + + def test_constructor_broadcast_list(self): + # GH 19342 + # construction with single-element container and index + # should raise + msg = "Length of passed values is 1, index implies 3" + with pytest.raises(ValueError, match=msg): + Series(["foo"], index=["a", "b", "c"]) + + def test_constructor_corner(self): + df = tm.makeTimeDataFrame() + objs = [df, df] + s = Series(objs, index=[0, 1]) + assert isinstance(s, Series) + + def test_constructor_sanitize(self): + s = Series(np.array([1.0, 1.0, 8.0]), dtype="i8") + assert s.dtype == np.dtype("i8") + + s = Series(np.array([1.0, 1.0, np.nan]), copy=True, dtype="i8") + assert s.dtype == np.dtype("f8") + + def test_constructor_copy(self): + # GH15125 + # test dtype parameter has no side effects on copy=True + for data in [[1.0], np.array([1.0])]: + x = Series(data) + y = pd.Series(x, copy=True, dtype=float) + + # copy=True maintains original data in Series + tm.assert_series_equal(x, y) + + # changes to origin of copy does not affect the copy + x[0] = 2.0 + assert not x.equals(y) + assert x[0] == 2.0 + assert y[0] == 1.0 + + @pytest.mark.parametrize( + "index", + [ + pd.date_range("20170101", periods=3, tz="US/Eastern"), + pd.date_range("20170101", periods=3), + pd.timedelta_range("1 day", periods=3), + pd.period_range("2012Q1", periods=3, freq="Q"), + pd.Index(list("abc")), + pd.Int64Index([1, 2, 3]), + pd.RangeIndex(0, 3), + ], + ids=lambda x: type(x).__name__, + ) + def test_constructor_limit_copies(self, index): + # GH 17449 + # limit copies of input + s = pd.Series(index) + + # we make 1 copy; this is just a smoke test here + assert s._data.blocks[0].values is not index + + def test_constructor_pass_none(self): + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + s = Series(None, index=range(5)) + assert s.dtype == np.float64 + + s = Series(None, index=range(5), dtype=object) + assert s.dtype == np.object_ + + # GH 7431 + # inference on the index + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + s = Series(index=np.array([None])) + expected = Series(index=Index([None])) + tm.assert_series_equal(s, expected) + + def test_constructor_pass_nan_nat(self): + # GH 13467 + exp = Series([np.nan, np.nan], dtype=np.float64) + assert exp.dtype == np.float64 + tm.assert_series_equal(Series([np.nan, np.nan]), exp) + tm.assert_series_equal(Series(np.array([np.nan, np.nan])), exp) + + exp = Series([pd.NaT, pd.NaT]) + assert exp.dtype == "datetime64[ns]" + tm.assert_series_equal(Series([pd.NaT, pd.NaT]), exp) + tm.assert_series_equal(Series(np.array([pd.NaT, pd.NaT])), exp) + + tm.assert_series_equal(Series([pd.NaT, np.nan]), exp) + tm.assert_series_equal(Series(np.array([pd.NaT, np.nan])), exp) + + tm.assert_series_equal(Series([np.nan, pd.NaT]), exp) + tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp) + + def test_constructor_cast(self): + msg = "could not convert string to float" + with pytest.raises(ValueError, match=msg): + Series(["a", "b", "c"], dtype=float) + + def test_constructor_unsigned_dtype_overflow(self, uint_dtype): + # see gh-15832 + msg = "Trying to coerce negative values to unsigned integers" + with pytest.raises(OverflowError, match=msg): + Series([-1], dtype=uint_dtype) + + def test_constructor_coerce_float_fail(self, any_int_dtype): + # see gh-15832 + msg = "Trying to coerce float values to integers" + with pytest.raises(ValueError, match=msg): + Series([1, 2, 3.5], dtype=any_int_dtype) + + def test_constructor_coerce_float_valid(self, float_dtype): + s = Series([1, 2, 3.5], dtype=float_dtype) + expected = Series([1, 2, 3.5]).astype(float_dtype) + tm.assert_series_equal(s, expected) + + def test_constructor_dtype_no_cast(self): + # see gh-1572 + s = Series([1, 2, 3]) + s2 = Series(s, dtype=np.int64) + + s2[1] = 5 + assert s[1] == 5 + + def test_constructor_datelike_coercion(self): + + # GH 9477 + # incorrectly inferring on dateimelike looking when object dtype is + # specified + s = Series([Timestamp("20130101"), "NOV"], dtype=object) + assert s.iloc[0] == Timestamp("20130101") + assert s.iloc[1] == "NOV" + assert s.dtype == object + + # the dtype was being reset on the slicing and re-inferred to datetime + # even thought the blocks are mixed + belly = "216 3T19".split() + wing1 = "2T15 4H19".split() + wing2 = "416 4T20".split() + mat = pd.to_datetime("2016-01-22 2019-09-07".split()) + df = pd.DataFrame({"wing1": wing1, "wing2": wing2, "mat": mat}, index=belly) + + result = df.loc["3T19"] + assert result.dtype == object + result = df.loc["216"] + assert result.dtype == object + + def test_constructor_datetimes_with_nulls(self): + # gh-15869 + for arr in [ + np.array([None, None, None, None, datetime.now(), None]), + np.array([None, None, datetime.now(), None]), + ]: + result = Series(arr) + assert result.dtype == "M8[ns]" + + def test_constructor_dtype_datetime64(self): + + s = Series(iNaT, dtype="M8[ns]", index=range(5)) + assert isna(s).all() + + # in theory this should be all nulls, but since + # we are not specifying a dtype is ambiguous + s = Series(iNaT, index=range(5)) + assert not isna(s).all() + + s = Series(np.nan, dtype="M8[ns]", index=range(5)) + assert isna(s).all() + + s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype="M8[ns]") + assert isna(s[1]) + assert s.dtype == "M8[ns]" + + s = Series([datetime(2001, 1, 2, 0, 0), np.nan], dtype="M8[ns]") + assert isna(s[1]) + assert s.dtype == "M8[ns]" + + # GH3416 + dates = [ + np.datetime64(datetime(2013, 1, 1)), + np.datetime64(datetime(2013, 1, 2)), + np.datetime64(datetime(2013, 1, 3)), + ] + + s = Series(dates) + assert s.dtype == "M8[ns]" + + s.iloc[0] = np.nan + assert s.dtype == "M8[ns]" + + # GH3414 related + expected = Series( + [datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3)], + dtype="datetime64[ns]", + ) + + result = Series(Series(dates).astype(np.int64) / 1000000, dtype="M8[ms]") + tm.assert_series_equal(result, expected) + + result = Series(dates, dtype="datetime64[ns]") + tm.assert_series_equal(result, expected) + + expected = Series( + [pd.NaT, datetime(2013, 1, 2), datetime(2013, 1, 3)], dtype="datetime64[ns]" + ) + result = Series([np.nan] + dates[1:], dtype="datetime64[ns]") + tm.assert_series_equal(result, expected) + + dts = Series(dates, dtype="datetime64[ns]") + + # valid astype + dts.astype("int64") + + # invalid casting + msg = r"cannot astype a datetimelike from \[datetime64\[ns\]\] to \[int32\]" + with pytest.raises(TypeError, match=msg): + dts.astype("int32") + + # ints are ok + # we test with np.int64 to get similar results on + # windows / 32-bit platforms + result = Series(dts, dtype=np.int64) + expected = Series(dts.astype(np.int64)) + tm.assert_series_equal(result, expected) + + # invalid dates can be help as object + result = Series([datetime(2, 1, 1)]) + assert result[0] == datetime(2, 1, 1, 0, 0) + + result = Series([datetime(3000, 1, 1)]) + assert result[0] == datetime(3000, 1, 1, 0, 0) + + # don't mix types + result = Series([Timestamp("20130101"), 1], index=["a", "b"]) + assert result["a"] == Timestamp("20130101") + assert result["b"] == 1 + + # GH6529 + # coerce datetime64 non-ns properly + dates = date_range("01-Jan-2015", "01-Dec-2015", freq="M") + values2 = dates.view(np.ndarray).astype("datetime64[ns]") + expected = Series(values2, index=dates) + + for dtype in ["s", "D", "ms", "us", "ns"]: + values1 = dates.view(np.ndarray).astype("M8[{0}]".format(dtype)) + result = Series(values1, dates) + tm.assert_series_equal(result, expected) + + # GH 13876 + # coerce to non-ns to object properly + expected = Series(values2, index=dates, dtype=object) + for dtype in ["s", "D", "ms", "us", "ns"]: + values1 = dates.view(np.ndarray).astype("M8[{0}]".format(dtype)) + result = Series(values1, index=dates, dtype=object) + tm.assert_series_equal(result, expected) + + # leave datetime.date alone + dates2 = np.array([d.date() for d in dates.to_pydatetime()], dtype=object) + series1 = Series(dates2, dates) + tm.assert_numpy_array_equal(series1.values, dates2) + assert series1.dtype == object + + # these will correctly infer a datetime + s = Series([None, pd.NaT, "2013-08-05 15:30:00.000001"]) + assert s.dtype == "datetime64[ns]" + s = Series([np.nan, pd.NaT, "2013-08-05 15:30:00.000001"]) + assert s.dtype == "datetime64[ns]" + s = Series([pd.NaT, None, "2013-08-05 15:30:00.000001"]) + assert s.dtype == "datetime64[ns]" + s = Series([pd.NaT, np.nan, "2013-08-05 15:30:00.000001"]) + assert s.dtype == "datetime64[ns]" + + # tz-aware (UTC and other tz's) + # GH 8411 + dr = date_range("20130101", periods=3) + assert Series(dr).iloc[0].tz is None + dr = date_range("20130101", periods=3, tz="UTC") + assert str(Series(dr).iloc[0].tz) == "UTC" + dr = date_range("20130101", periods=3, tz="US/Eastern") + assert str(Series(dr).iloc[0].tz) == "US/Eastern" + + # non-convertible + s = Series([1479596223000, -1479590, pd.NaT]) + assert s.dtype == "object" + assert s[2] is pd.NaT + assert "NaT" in str(s) + + # if we passed a NaT it remains + s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), pd.NaT]) + assert s.dtype == "object" + assert s[2] is pd.NaT + assert "NaT" in str(s) + + # if we passed a nan it remains + s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan]) + assert s.dtype == "object" + assert s[2] is np.nan + assert "NaN" in str(s) + + def test_constructor_with_datetime_tz(self): + + # 8260 + # support datetime64 with tz + + dr = date_range("20130101", periods=3, tz="US/Eastern") + s = Series(dr) + assert s.dtype.name == "datetime64[ns, US/Eastern]" + assert s.dtype == "datetime64[ns, US/Eastern]" + assert is_datetime64tz_dtype(s.dtype) + assert "datetime64[ns, US/Eastern]" in str(s) + + # export + result = s.values + assert isinstance(result, np.ndarray) + assert result.dtype == "datetime64[ns]" + + exp = pd.DatetimeIndex(result) + exp = exp.tz_localize("UTC").tz_convert(tz=s.dt.tz) + tm.assert_index_equal(dr, exp) + + # indexing + result = s.iloc[0] + assert result == Timestamp( + "2013-01-01 00:00:00-0500", tz="US/Eastern", freq="D" + ) + result = s[0] + assert result == Timestamp( + "2013-01-01 00:00:00-0500", tz="US/Eastern", freq="D" + ) + + result = s[Series([True, True, False], index=s.index)] + tm.assert_series_equal(result, s[0:2]) + + result = s.iloc[0:1] + tm.assert_series_equal(result, Series(dr[0:1])) + + # concat + result = pd.concat([s.iloc[0:1], s.iloc[1:]]) + tm.assert_series_equal(result, s) + + # short str + assert "datetime64[ns, US/Eastern]" in str(s) + + # formatting with NaT + result = s.shift() + assert "datetime64[ns, US/Eastern]" in str(result) + assert "NaT" in str(result) + + # long str + t = Series(date_range("20130101", periods=1000, tz="US/Eastern")) + assert "datetime64[ns, US/Eastern]" in str(t) + + result = pd.DatetimeIndex(s, freq="infer") + tm.assert_index_equal(result, dr) + + # inference + s = Series( + [ + pd.Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"), + pd.Timestamp("2013-01-02 14:00:00-0800", tz="US/Pacific"), + ] + ) + assert s.dtype == "datetime64[ns, US/Pacific]" + assert lib.infer_dtype(s, skipna=True) == "datetime64" + + s = Series( + [ + pd.Timestamp("2013-01-01 13:00:00-0800", tz="US/Pacific"), + pd.Timestamp("2013-01-02 14:00:00-0800", tz="US/Eastern"), + ] + ) + assert s.dtype == "object" + assert lib.infer_dtype(s, skipna=True) == "datetime" + + # with all NaT + s = Series(pd.NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]") + expected = Series(pd.DatetimeIndex(["NaT", "NaT"], tz="US/Eastern")) + tm.assert_series_equal(s, expected) + + @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) + @pytest.mark.parametrize("dtype", ["M8", "m8"]) + @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) + def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit): + # tests all units + # gh-19223 + dtype = "{}[{}]".format(dtype, unit) + arr = np.array([1, 2, 3], dtype=arr_dtype) + s = Series(arr) + result = s.astype(dtype) + expected = Series(arr.astype(dtype)) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("arg", ["2013-01-01 00:00:00", pd.NaT, np.nan, None]) + def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg): + # GH 17415: With naive string + result = Series([arg], dtype="datetime64[ns, CET]") + expected = Series(pd.Timestamp(arg)).dt.tz_localize("CET") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("interval_constructor", [IntervalIndex, IntervalArray]) + def test_construction_interval(self, interval_constructor): + # construction from interval & array of intervals + intervals = interval_constructor.from_breaks(np.arange(3), closed="right") + result = Series(intervals) + assert result.dtype == "interval[int64]" + tm.assert_index_equal(Index(result.values), Index(intervals)) + + @pytest.mark.parametrize( + "data_constructor", [list, np.array], ids=["list", "ndarray[object]"] + ) + def test_constructor_infer_interval(self, data_constructor): + # GH 23563: consistent closed results in interval dtype + data = [pd.Interval(0, 1), pd.Interval(0, 2), None] + result = pd.Series(data_constructor(data)) + expected = pd.Series(IntervalArray(data)) + assert result.dtype == "interval[float64]" + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data_constructor", [list, np.array], ids=["list", "ndarray[object]"] + ) + def test_constructor_interval_mixed_closed(self, data_constructor): + # GH 23563: mixed closed results in object dtype (not interval dtype) + data = [pd.Interval(0, 1, closed="both"), pd.Interval(0, 2, closed="neither")] + result = Series(data_constructor(data)) + assert result.dtype == object + assert result.tolist() == data + + def test_construction_consistency(self): + + # make sure that we are not re-localizing upon construction + # GH 14928 + s = Series(pd.date_range("20130101", periods=3, tz="US/Eastern")) + + result = Series(s, dtype=s.dtype) + tm.assert_series_equal(result, s) + + result = Series(s.dt.tz_convert("UTC"), dtype=s.dtype) + tm.assert_series_equal(result, s) + + result = Series(s.values, dtype=s.dtype) + tm.assert_series_equal(result, s) + + @pytest.mark.parametrize( + "data_constructor", [list, np.array], ids=["list", "ndarray[object]"] + ) + def test_constructor_infer_period(self, data_constructor): + data = [pd.Period("2000", "D"), pd.Period("2001", "D"), None] + result = pd.Series(data_constructor(data)) + expected = pd.Series(period_array(data)) + tm.assert_series_equal(result, expected) + assert result.dtype == "Period[D]" + + def test_constructor_period_incompatible_frequency(self): + data = [pd.Period("2000", "D"), pd.Period("2001", "A")] + result = pd.Series(data) + assert result.dtype == object + assert result.tolist() == data + + def test_constructor_periodindex(self): + # GH7932 + # converting a PeriodIndex when put in a Series + + pi = period_range("20130101", periods=5, freq="D") + s = Series(pi) + assert s.dtype == "Period[D]" + expected = Series(pi.astype(object)) + tm.assert_series_equal(s, expected) + + def test_constructor_dict(self): + d = {"a": 0.0, "b": 1.0, "c": 2.0} + result = Series(d, index=["b", "c", "d", "a"]) + expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) + tm.assert_series_equal(result, expected) + + pidx = tm.makePeriodIndex(100) + d = {pidx[0]: 0, pidx[1]: 1} + result = Series(d, index=pidx) + expected = Series(np.nan, pidx, dtype=np.float64) + expected.iloc[0] = 0 + expected.iloc[1] = 1 + tm.assert_series_equal(result, expected) + + def test_constructor_dict_list_value_explicit_dtype(self): + # GH 18625 + d = {"a": [[2], [3], [4]]} + result = Series(d, index=["a"], dtype="object") + expected = Series(d, index=["a"]) + tm.assert_series_equal(result, expected) + + def test_constructor_dict_order(self): + # GH19018 + # initialization ordering: by insertion order if python>= 3.6, else + # order by value + d = {"b": 1, "a": 0, "c": 2} + result = Series(d) + expected = Series([1, 0, 2], index=list("bac")) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("value", [2, np.nan, None, float("nan")]) + def test_constructor_dict_nan_key(self, value): + # GH 18480 + d = {1: "a", value: "b", float("nan"): "c", 4: "d"} + result = Series(d).sort_values() + expected = Series(["a", "b", "c", "d"], index=[1, value, np.nan, 4]) + tm.assert_series_equal(result, expected) + + # MultiIndex: + d = {(1, 1): "a", (2, np.nan): "b", (3, value): "c"} + result = Series(d).sort_values() + expected = Series( + ["a", "b", "c"], index=Index([(1, 1), (2, np.nan), (3, value)]) + ) + tm.assert_series_equal(result, expected) + + def test_constructor_dict_datetime64_index(self): + # GH 9456 + + dates_as_str = ["1984-02-19", "1988-11-06", "1989-12-03", "1990-03-15"] + values = [42544017.198965244, 1234565, 40512335.181958228, -1] + + def create_data(constructor): + return dict(zip((constructor(x) for x in dates_as_str), values)) + + data_datetime64 = create_data(np.datetime64) + data_datetime = create_data(lambda x: datetime.strptime(x, "%Y-%m-%d")) + data_Timestamp = create_data(Timestamp) + + expected = Series(values, (Timestamp(x) for x in dates_as_str)) + + result_datetime64 = Series(data_datetime64) + result_datetime = Series(data_datetime) + result_Timestamp = Series(data_Timestamp) + + tm.assert_series_equal(result_datetime64, expected) + tm.assert_series_equal(result_datetime, expected) + tm.assert_series_equal(result_Timestamp, expected) + + def test_constructor_mapping(self, non_mapping_dict_subclass): + # GH 29788 + ndm = non_mapping_dict_subclass({3: "three"}) + result = Series(ndm) + expected = Series(["three"], index=[3]) + + tm.assert_series_equal(result, expected) + + def test_constructor_list_of_tuples(self): + data = [(1, 1), (2, 2), (2, 3)] + s = Series(data) + assert list(s) == data + + def test_constructor_tuple_of_tuples(self): + data = ((1, 1), (2, 2), (2, 3)) + s = Series(data) + assert tuple(s) == data + + def test_constructor_dict_of_tuples(self): + data = {(1, 2): 3, (None, 5): 6} + result = Series(data).sort_values() + expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)])) + tm.assert_series_equal(result, expected) + + def test_constructor_set(self): + values = {1, 2, 3, 4, 5} + with pytest.raises(TypeError, match="'set' type is unordered"): + Series(values) + values = frozenset(values) + with pytest.raises(TypeError, match="'frozenset' type is unordered"): + Series(values) + + # https://github.com/pandas-dev/pandas/issues/22698 + @pytest.mark.filterwarnings("ignore:elementwise comparison:FutureWarning") + def test_fromDict(self): + data = {"a": 0, "b": 1, "c": 2, "d": 3} + + series = Series(data) + tm.assert_is_sorted(series.index) + + data = {"a": 0, "b": "1", "c": "2", "d": datetime.now()} + series = Series(data) + assert series.dtype == np.object_ + + data = {"a": 0, "b": "1", "c": "2", "d": "3"} + series = Series(data) + assert series.dtype == np.object_ + + data = {"a": "0", "b": "1"} + series = Series(data, dtype=float) + assert series.dtype == np.float64 + + def test_fromValue(self, datetime_series): + + nans = Series(np.NaN, index=datetime_series.index, dtype=np.float64) + assert nans.dtype == np.float_ + assert len(nans) == len(datetime_series) + + strings = Series("foo", index=datetime_series.index) + assert strings.dtype == np.object_ + assert len(strings) == len(datetime_series) + + d = datetime.now() + dates = Series(d, index=datetime_series.index) + assert dates.dtype == "M8[ns]" + assert len(dates) == len(datetime_series) + + # GH12336 + # Test construction of categorical series from value + categorical = Series(0, index=datetime_series.index, dtype="category") + expected = Series(0, index=datetime_series.index).astype("category") + assert categorical.dtype == "category" + assert len(categorical) == len(datetime_series) + tm.assert_series_equal(categorical, expected) + + def test_constructor_dtype_timedelta64(self): + + # basic + td = Series([timedelta(days=i) for i in range(3)]) + assert td.dtype == "timedelta64[ns]" + + td = Series([timedelta(days=1)]) + assert td.dtype == "timedelta64[ns]" + + td = Series([timedelta(days=1), timedelta(days=2), np.timedelta64(1, "s")]) + + assert td.dtype == "timedelta64[ns]" + + # mixed with NaT + td = Series([timedelta(days=1), NaT], dtype="m8[ns]") + assert td.dtype == "timedelta64[ns]" + + td = Series([timedelta(days=1), np.nan], dtype="m8[ns]") + assert td.dtype == "timedelta64[ns]" + + td = Series([np.timedelta64(300000000), pd.NaT], dtype="m8[ns]") + assert td.dtype == "timedelta64[ns]" + + # improved inference + # GH5689 + td = Series([np.timedelta64(300000000), NaT]) + assert td.dtype == "timedelta64[ns]" + + # because iNaT is int, not coerced to timedelta + td = Series([np.timedelta64(300000000), iNaT]) + assert td.dtype == "object" + + td = Series([np.timedelta64(300000000), np.nan]) + assert td.dtype == "timedelta64[ns]" + + td = Series([pd.NaT, np.timedelta64(300000000)]) + assert td.dtype == "timedelta64[ns]" + + td = Series([np.timedelta64(1, "s")]) + assert td.dtype == "timedelta64[ns]" + + # these are frequency conversion astypes + # for t in ['s', 'D', 'us', 'ms']: + # with pytest.raises(TypeError): + # td.astype('m8[%s]' % t) + + # valid astype + td.astype("int64") + + # invalid casting + msg = r"cannot astype a timedelta from \[timedelta64\[ns\]\] to \[int32\]" + with pytest.raises(TypeError, match=msg): + td.astype("int32") + + # this is an invalid casting + msg = "Could not convert object to NumPy timedelta" + with pytest.raises(ValueError, match=msg): + Series([timedelta(days=1), "foo"], dtype="m8[ns]") + + # leave as object here + td = Series([timedelta(days=i) for i in range(3)] + ["foo"]) + assert td.dtype == "object" + + # these will correctly infer a timedelta + s = Series([None, pd.NaT, "1 Day"]) + assert s.dtype == "timedelta64[ns]" + s = Series([np.nan, pd.NaT, "1 Day"]) + assert s.dtype == "timedelta64[ns]" + s = Series([pd.NaT, None, "1 Day"]) + assert s.dtype == "timedelta64[ns]" + s = Series([pd.NaT, np.nan, "1 Day"]) + assert s.dtype == "timedelta64[ns]" + + # GH 16406 + def test_constructor_mixed_tz(self): + s = Series([Timestamp("20130101"), Timestamp("20130101", tz="US/Eastern")]) + expected = Series( + [Timestamp("20130101"), Timestamp("20130101", tz="US/Eastern")], + dtype="object", + ) + tm.assert_series_equal(s, expected) + + def test_NaT_scalar(self): + series = Series([0, 1000, 2000, iNaT], dtype="M8[ns]") + + val = series[3] + assert isna(val) + + series[2] = val + assert isna(series[2]) + + def test_NaT_cast(self): + # GH10747 + result = Series([np.nan]).astype("M8[ns]") + expected = Series([NaT]) + tm.assert_series_equal(result, expected) + + def test_constructor_name_hashable(self): + for n in [777, 777.0, "name", datetime(2001, 11, 11), (1,), "\u05D0"]: + for data in [[1, 2, 3], np.ones(3), {"a": 0, "b": 1}]: + s = Series(data, name=n) + assert s.name == n + + def test_constructor_name_unhashable(self): + msg = r"Series\.name must be a hashable type" + for n in [["name_list"], np.ones(2), {1: 2}]: + for data in [["name_list"], np.ones(2), {1: 2}]: + with pytest.raises(TypeError, match=msg): + Series(data, name=n) + + def test_auto_conversion(self): + series = Series(list(date_range("1/1/2000", periods=10))) + assert series.dtype == "M8[ns]" + + def test_convert_non_ns(self): + # convert from a numpy array of non-ns timedelta64 + arr = np.array([1, 2, 3], dtype="timedelta64[s]") + s = Series(arr) + expected = Series(pd.timedelta_range("00:00:01", periods=3, freq="s")) + tm.assert_series_equal(s, expected) + + # convert from a numpy array of non-ns datetime64 + # note that creating a numpy datetime64 is in LOCAL time!!!! + # seems to work for M8[D], but not for M8[s] + + s = Series( + np.array(["2013-01-01", "2013-01-02", "2013-01-03"], dtype="datetime64[D]") + ) + tm.assert_series_equal(s, Series(date_range("20130101", periods=3, freq="D"))) + + # s = Series(np.array(['2013-01-01 00:00:01','2013-01-01 + # 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]')) + + # tm.assert_series_equal(s,date_range('20130101 + # 00:00:01',period=3,freq='s')) + + @pytest.mark.parametrize( + "index", + [ + date_range("1/1/2000", periods=10), + timedelta_range("1 day", periods=10), + period_range("2000-Q1", periods=10, freq="Q"), + ], + ids=lambda x: type(x).__name__, + ) + def test_constructor_cant_cast_datetimelike(self, index): + + # floats are not ok + msg = "Cannot cast {}.*? to ".format( + # strip Index to convert PeriodIndex -> Period + # We don't care whether the error message says + # PeriodIndex or PeriodArray + type(index).__name__.rstrip("Index") + ) + with pytest.raises(TypeError, match=msg): + Series(index, dtype=float) + + # ints are ok + # we test with np.int64 to get similar results on + # windows / 32-bit platforms + result = Series(index, dtype=np.int64) + expected = Series(index.astype(np.int64)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "index", + [ + date_range("1/1/2000", periods=10), + timedelta_range("1 day", periods=10), + period_range("2000-Q1", periods=10, freq="Q"), + ], + ids=lambda x: type(x).__name__, + ) + def test_constructor_cast_object(self, index): + s = Series(index, dtype=object) + exp = Series(index).astype(object) + tm.assert_series_equal(s, exp) + + s = Series(pd.Index(index, dtype=object), dtype=object) + exp = Series(index).astype(object) + tm.assert_series_equal(s, exp) + + s = Series(index.astype(object), dtype=object) + exp = Series(index).astype(object) + tm.assert_series_equal(s, exp) + + @pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64]) + def test_constructor_generic_timestamp_no_frequency(self, dtype): + # see gh-15524, gh-15987 + msg = "dtype has no unit. Please pass in" + + with pytest.raises(ValueError, match=msg): + Series([], dtype=dtype) + + @pytest.mark.parametrize( + "dtype,msg", + [ + ("m8[ps]", "cannot convert timedeltalike"), + ("M8[ps]", "cannot convert datetimelike"), + ], + ) + def test_constructor_generic_timestamp_bad_frequency(self, dtype, msg): + # see gh-15524, gh-15987 + + with pytest.raises(TypeError, match=msg): + Series([], dtype=dtype) + + @pytest.mark.parametrize("dtype", [None, "uint8", "category"]) + def test_constructor_range_dtype(self, dtype): + # GH 16804 + expected = Series([0, 1, 2, 3, 4], dtype=dtype or "int64") + result = Series(range(5), dtype=dtype) + tm.assert_series_equal(result, expected) + + def test_constructor_tz_mixed_data(self): + # GH 13051 + dt_list = [ + Timestamp("2016-05-01 02:03:37"), + Timestamp("2016-04-30 19:03:37-0700", tz="US/Pacific"), + ] + result = Series(dt_list) + expected = Series(dt_list, dtype=object) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_convert_dtypes.py b/pandas/tests/series/test_convert_dtypes.py new file mode 100644 index 00000000..a41f893e --- /dev/null +++ b/pandas/tests/series/test_convert_dtypes.py @@ -0,0 +1,286 @@ +from itertools import product + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm + + +class TestSeriesConvertDtypes: + # The answerdict has keys that have 4 tuples, corresponding to the arguments + # infer_objects, convert_string, convert_integer, convert_boolean + # This allows all 16 possible combinations to be tested. Since common + # combinations expect the same answer, this provides an easy way to list + # all the possibilities + @pytest.mark.parametrize( + "data, maindtype, answerdict", + [ + ( + [1, 2, 3], + np.dtype("int32"), + { + ((True, False), (True, False), (True,), (True, False)): "Int32", + ((True, False), (True, False), (False,), (True, False)): np.dtype( + "int32" + ), + }, + ), + ( + [1, 2, 3], + np.dtype("int64"), + { + ((True, False), (True, False), (True,), (True, False)): "Int64", + ((True, False), (True, False), (False,), (True, False)): np.dtype( + "int64" + ), + }, + ), + ( + ["x", "y", "z"], + np.dtype("O"), + { + ( + (True, False), + (True,), + (True, False), + (True, False), + ): pd.StringDtype(), + ((True, False), (False,), (True, False), (True, False)): np.dtype( + "O" + ), + }, + ), + ( + [True, False, np.nan], + np.dtype("O"), + { + ( + (True, False), + (True, False), + (True, False), + (True,), + ): pd.BooleanDtype(), + ((True, False), (True, False), (True, False), (False,)): np.dtype( + "O" + ), + }, + ), + ( + ["h", "i", np.nan], + np.dtype("O"), + { + ( + (True, False), + (True,), + (True, False), + (True, False), + ): pd.StringDtype(), + ((True, False), (False,), (True, False), (True, False)): np.dtype( + "O" + ), + }, + ), + ( # GH32117 + ["h", "i", 1], + np.dtype("O"), + { + ( + (True, False), + (True, False), + (True, False), + (True, False), + ): np.dtype("O"), + }, + ), + ( + [10, np.nan, 20], + np.dtype("float"), + { + ((True, False), (True, False), (True,), (True, False)): "Int64", + ((True, False), (True, False), (False,), (True, False)): np.dtype( + "float" + ), + }, + ), + ( + [np.nan, 100.5, 200], + np.dtype("float"), + { + ( + (True, False), + (True, False), + (True, False), + (True, False), + ): np.dtype("float"), + }, + ), + ( + [3, 4, 5], + "Int8", + {((True, False), (True, False), (True, False), (True, False)): "Int8"}, + ), + ( + [[1, 2], [3, 4], [5]], + None, + { + ( + (True, False), + (True, False), + (True, False), + (True, False), + ): np.dtype("O"), + }, + ), + ( + [4, 5, 6], + np.dtype("uint32"), + { + ((True, False), (True, False), (True,), (True, False)): "UInt32", + ((True, False), (True, False), (False,), (True, False)): np.dtype( + "uint32" + ), + }, + ), + ( + [-10, 12, 13], + np.dtype("i1"), + { + ((True, False), (True, False), (True,), (True, False)): "Int8", + ((True, False), (True, False), (False,), (True, False)): np.dtype( + "i1" + ), + }, + ), + ( + [1, 2.0], + object, + { + ((True,), (True, False), (True,), (True, False)): "Int64", + ((True,), (True, False), (False,), (True, False)): np.dtype( + "float" + ), + ((False,), (True, False), (True, False), (True, False)): np.dtype( + "object" + ), + }, + ), + ( + [1, 2.5], + object, + { + ((True,), (True, False), (True, False), (True, False)): np.dtype( + "float" + ), + ((False,), (True, False), (True, False), (True, False)): np.dtype( + "object" + ), + }, + ), + ( + ["a", "b"], + pd.CategoricalDtype(), + { + ( + (True, False), + (True, False), + (True, False), + (True, False), + ): pd.CategoricalDtype(), + }, + ), + ( + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + pd.DatetimeTZDtype(tz="UTC"), + { + ( + (True, False), + (True, False), + (True, False), + (True, False), + ): pd.DatetimeTZDtype(tz="UTC"), + }, + ), + ( + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + "datetime64[ns]", + { + ( + (True, False), + (True, False), + (True, False), + (True, False), + ): np.dtype("datetime64[ns]"), + }, + ), + ( + pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]), + object, + { + ((True,), (True, False), (True, False), (True, False),): np.dtype( + "datetime64[ns]" + ), + ((False,), (True, False), (True, False), (True, False),): np.dtype( + "O" + ), + }, + ), + ( + pd.period_range("1/1/2011", freq="M", periods=3), + None, + { + ( + (True, False), + (True, False), + (True, False), + (True, False), + ): pd.PeriodDtype("M"), + }, + ), + ( + pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]), + None, + { + ( + (True, False), + (True, False), + (True, False), + (True, False), + ): pd.IntervalDtype("int64"), + }, + ), + ], + ) + @pytest.mark.parametrize("params", product(*[(True, False)] * 4)) + def test_convert_dtypes(self, data, maindtype, params, answerdict): + if maindtype is not None: + series = pd.Series(data, dtype=maindtype) + else: + series = pd.Series(data) + answers = {k: a for (kk, a) in answerdict.items() for k in product(*kk)} + + ns = series.convert_dtypes(*params) + expected_dtype = answers[tuple(params)] + expected = pd.Series(series.values, dtype=expected_dtype) + tm.assert_series_equal(ns, expected) + + # Test that it is a copy + copy = series.copy(deep=True) + ns[ns.notna()] = np.nan + + # Make sure original not changed + tm.assert_series_equal(series, copy) + + def test_convert_string_dtype(self): + # https://github.com/pandas-dev/pandas/issues/31731 -> converting columns + # that are already string dtype + df = pd.DataFrame( + {"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype="string" + ) + result = df.convert_dtypes() + tm.assert_frame_equal(df, result) + + def test_convert_bool_dtype(self): + # GH32287 + df = pd.DataFrame({"A": pd.array([True])}) + tm.assert_frame_equal(df, df.convert_dtypes()) diff --git a/pandas/tests/series/test_cumulative.py b/pandas/tests/series/test_cumulative.py new file mode 100644 index 00000000..0cb1c038 --- /dev/null +++ b/pandas/tests/series/test_cumulative.py @@ -0,0 +1,181 @@ +""" +Tests for Series cumulative operations. + +See also +-------- +tests.frame.test_cumulative +""" +from itertools import product + +import numpy as np +import pytest + +import pandas as pd +from pandas import _is_numpy_dev +import pandas._testing as tm + + +def _check_accum_op(name, series, check_dtype=True): + func = getattr(np, name) + tm.assert_numpy_array_equal( + func(series).values, func(np.array(series)), check_dtype=check_dtype, + ) + + # with missing values + ts = series.copy() + ts[::2] = np.NaN + + result = func(ts)[1::2] + expected = func(np.array(ts.dropna())) + + tm.assert_numpy_array_equal(result.values, expected, check_dtype=False) + + +class TestSeriesCumulativeOps: + def test_cumsum(self, datetime_series): + _check_accum_op("cumsum", datetime_series) + + def test_cumprod(self, datetime_series): + _check_accum_op("cumprod", datetime_series) + + @pytest.mark.xfail( + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, + ) + def test_cummin(self, datetime_series): + tm.assert_numpy_array_equal( + datetime_series.cummin().values, + np.minimum.accumulate(np.array(datetime_series)), + ) + ts = datetime_series.copy() + ts[::2] = np.NaN + result = ts.cummin()[1::2] + expected = np.minimum.accumulate(ts.dropna()) + + tm.assert_series_equal(result, expected) + + @pytest.mark.xfail( + _is_numpy_dev, + reason="https://github.com/pandas-dev/pandas/issues/31992", + strict=False, + ) + def test_cummax(self, datetime_series): + tm.assert_numpy_array_equal( + datetime_series.cummax().values, + np.maximum.accumulate(np.array(datetime_series)), + ) + ts = datetime_series.copy() + ts[::2] = np.NaN + result = ts.cummax()[1::2] + expected = np.maximum.accumulate(ts.dropna()) + + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tz", [None, "US/Pacific"]) + def test_cummin_datetime64(self, tz): + s = pd.Series( + pd.to_datetime( + ["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"] + ).tz_localize(tz) + ) + + expected = pd.Series( + pd.to_datetime( + ["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-1"] + ).tz_localize(tz) + ) + result = s.cummin(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_datetime( + ["NaT", "2000-1-2", "2000-1-2", "2000-1-1", "2000-1-1", "2000-1-1"] + ).tz_localize(tz) + ) + result = s.cummin(skipna=False) + tm.assert_series_equal(expected, result) + + @pytest.mark.parametrize("tz", [None, "US/Pacific"]) + def test_cummax_datetime64(self, tz): + s = pd.Series( + pd.to_datetime( + ["NaT", "2000-1-2", "NaT", "2000-1-1", "NaT", "2000-1-3"] + ).tz_localize(tz) + ) + + expected = pd.Series( + pd.to_datetime( + ["NaT", "2000-1-2", "NaT", "2000-1-2", "NaT", "2000-1-3"] + ).tz_localize(tz) + ) + result = s.cummax(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_datetime( + ["NaT", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-2", "2000-1-3"] + ).tz_localize(tz) + ) + result = s.cummax(skipna=False) + tm.assert_series_equal(expected, result) + + def test_cummin_timedelta64(self): + s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "1 min"]) + ) + result = s.cummin(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "2 min", "1 min", "1 min", "1 min"]) + ) + result = s.cummin(skipna=False) + tm.assert_series_equal(expected, result) + + def test_cummax_timedelta64(self): + s = pd.Series(pd.to_timedelta(["NaT", "2 min", "NaT", "1 min", "NaT", "3 min"])) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "NaT", "2 min", "NaT", "3 min"]) + ) + result = s.cummax(skipna=True) + tm.assert_series_equal(expected, result) + + expected = pd.Series( + pd.to_timedelta(["NaT", "2 min", "2 min", "2 min", "2 min", "3 min"]) + ) + result = s.cummax(skipna=False) + tm.assert_series_equal(expected, result) + + def test_cummethods_bool(self): + # GH#6270 + + a = pd.Series([False, False, False, True, True, False, False]) + b = ~a + c = pd.Series([False] * len(b)) + d = ~c + methods = { + "cumsum": np.cumsum, + "cumprod": np.cumprod, + "cummin": np.minimum.accumulate, + "cummax": np.maximum.accumulate, + } + args = product((a, b, c, d), methods) + for s, method in args: + expected = pd.Series(methods[method](s.values)) + result = getattr(s, method)() + tm.assert_series_equal(result, expected) + + e = pd.Series([False, True, np.nan, False]) + cse = pd.Series([0, 1, np.nan, 1], dtype=object) + cpe = pd.Series([False, 0, np.nan, 0]) + cmin = pd.Series([False, False, np.nan, False]) + cmax = pd.Series([False, True, np.nan, True]) + expecteds = {"cumsum": cse, "cumprod": cpe, "cummin": cmin, "cummax": cmax} + + for method in methods: + res = getattr(e, method)() + tm.assert_series_equal(res, expecteds[method]) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py new file mode 100644 index 00000000..b8be4ea1 --- /dev/null +++ b/pandas/tests/series/test_datetime_values.py @@ -0,0 +1,689 @@ +import calendar +from datetime import date, datetime, time +import locale +import unicodedata + +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs.timezones import maybe_get_tz + +from pandas.core.dtypes.common import is_integer_dtype, is_list_like + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + PeriodIndex, + Series, + TimedeltaIndex, + bdate_range, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.core.arrays import PeriodArray +import pandas.core.common as com + + +class TestSeriesDatetimeValues: + def test_dt_namespace_accessor(self): + + # GH 7207, 11128 + # test .dt namespace accessor + + ok_for_period = PeriodArray._datetimelike_ops + ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"] + ok_for_dt = DatetimeIndex._datetimelike_ops + ok_for_dt_methods = [ + "to_period", + "to_pydatetime", + "tz_localize", + "tz_convert", + "normalize", + "strftime", + "round", + "floor", + "ceil", + "day_name", + "month_name", + ] + ok_for_td = TimedeltaIndex._datetimelike_ops + ok_for_td_methods = [ + "components", + "to_pytimedelta", + "total_seconds", + "round", + "floor", + "ceil", + ] + + def get_expected(s, name): + result = getattr(Index(s._values), prop) + if isinstance(result, np.ndarray): + if is_integer_dtype(result): + result = result.astype("int64") + elif not is_list_like(result): + return result + return Series(result, index=s.index, name=s.name) + + def compare(s, name): + a = getattr(s.dt, prop) + b = get_expected(s, prop) + if not (is_list_like(a) and is_list_like(b)): + assert a == b + else: + tm.assert_series_equal(a, b) + + # datetimeindex + cases = [ + Series(date_range("20130101", periods=5), name="xxx"), + Series(date_range("20130101", periods=5, freq="s"), name="xxx"), + Series(date_range("20130101 00:00:00", periods=5, freq="ms"), name="xxx"), + ] + for s in cases: + for prop in ok_for_dt: + # we test freq below + if prop != "freq": + compare(s, prop) + + for prop in ok_for_dt_methods: + getattr(s.dt, prop) + + result = s.dt.to_pydatetime() + assert isinstance(result, np.ndarray) + assert result.dtype == object + + result = s.dt.tz_localize("US/Eastern") + exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern") + expected = Series(exp_values, index=s.index, name="xxx") + tm.assert_series_equal(result, expected) + + tz_result = result.dt.tz + assert str(tz_result) == "US/Eastern" + freq_result = s.dt.freq + assert freq_result == DatetimeIndex(s.values, freq="infer").freq + + # let's localize, then convert + result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern") + exp_values = ( + DatetimeIndex(s.values).tz_localize("UTC").tz_convert("US/Eastern") + ) + expected = Series(exp_values, index=s.index, name="xxx") + tm.assert_series_equal(result, expected) + + # datetimeindex with tz + s = Series(date_range("20130101", periods=5, tz="US/Eastern"), name="xxx") + for prop in ok_for_dt: + + # we test freq below + if prop != "freq": + compare(s, prop) + + for prop in ok_for_dt_methods: + getattr(s.dt, prop) + + result = s.dt.to_pydatetime() + assert isinstance(result, np.ndarray) + assert result.dtype == object + + result = s.dt.tz_convert("CET") + expected = Series(s._values.tz_convert("CET"), index=s.index, name="xxx") + tm.assert_series_equal(result, expected) + + tz_result = result.dt.tz + assert str(tz_result) == "CET" + freq_result = s.dt.freq + assert freq_result == DatetimeIndex(s.values, freq="infer").freq + + # timedelta index + cases = [ + Series( + timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx" + ), + Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"), + Series( + timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"), + name="xxx", + ), + ] + for s in cases: + for prop in ok_for_td: + # we test freq below + if prop != "freq": + compare(s, prop) + + for prop in ok_for_td_methods: + getattr(s.dt, prop) + + result = s.dt.components + assert isinstance(result, DataFrame) + tm.assert_index_equal(result.index, s.index) + + result = s.dt.to_pytimedelta() + assert isinstance(result, np.ndarray) + assert result.dtype == object + + result = s.dt.total_seconds() + assert isinstance(result, pd.Series) + assert result.dtype == "float64" + + freq_result = s.dt.freq + assert freq_result == TimedeltaIndex(s.values, freq="infer").freq + + # both + index = date_range("20130101", periods=3, freq="D") + s = Series(date_range("20140204", periods=3, freq="s"), index=index, name="xxx") + exp = Series( + np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx" + ) + tm.assert_series_equal(s.dt.year, exp) + + exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx") + tm.assert_series_equal(s.dt.month, exp) + + exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx") + tm.assert_series_equal(s.dt.second, exp) + + exp = pd.Series([s[0]] * 3, index=index, name="xxx") + tm.assert_series_equal(s.dt.normalize(), exp) + + # periodindex + cases = [Series(period_range("20130101", periods=5, freq="D"), name="xxx")] + for s in cases: + for prop in ok_for_period: + # we test freq below + if prop != "freq": + compare(s, prop) + + for prop in ok_for_period_methods: + getattr(s.dt, prop) + + freq_result = s.dt.freq + assert freq_result == PeriodIndex(s.values).freq + + # test limited display api + def get_dir(s): + results = [r for r in s.dt.__dir__() if not r.startswith("_")] + return sorted(set(results)) + + s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") + results = get_dir(s) + tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) + + s = Series( + period_range("20130101", periods=5, freq="D", name="xxx").astype(object) + ) + results = get_dir(s) + tm.assert_almost_equal( + results, sorted(set(ok_for_period + ok_for_period_methods)) + ) + + # 11295 + # ambiguous time error on the conversions + s = Series(pd.date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx") + s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago") + results = get_dir(s) + tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) + exp_values = pd.date_range( + "2015-01-01", "2016-01-01", freq="T", tz="UTC" + ).tz_convert("America/Chicago") + expected = Series(exp_values, name="xxx") + tm.assert_series_equal(s, expected) + + # no setting allowed + s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") + with pytest.raises(ValueError, match="modifications"): + s.dt.hour = 5 + + # trying to set a copy + with pd.option_context("chained_assignment", "raise"): + with pytest.raises(com.SettingWithCopyError): + s.dt.hour[0] = 5 + + @pytest.mark.parametrize( + "method, dates", + [ + ["round", ["2012-01-02", "2012-01-02", "2012-01-01"]], + ["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]], + ["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]], + ], + ) + def test_dt_round(self, method, dates): + # round + s = Series( + pd.to_datetime( + ["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"] + ), + name="xxx", + ) + result = getattr(s.dt, method)("D") + expected = Series(pd.to_datetime(dates), name="xxx") + tm.assert_series_equal(result, expected) + + def test_dt_round_tz(self): + s = Series( + pd.to_datetime( + ["2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00"] + ), + name="xxx", + ) + result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern").dt.round("D") + + exp_values = pd.to_datetime( + ["2012-01-01", "2012-01-01", "2012-01-01"] + ).tz_localize("US/Eastern") + expected = Series(exp_values, name="xxx") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("method", ["ceil", "round", "floor"]) + def test_dt_round_tz_ambiguous(self, method): + # GH 18946 round near "fall back" DST + df1 = pd.DataFrame( + [ + pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True), + pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True), + pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True), + ], + columns=["date"], + ) + df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid") + # infer + result = getattr(df1.date.dt, method)("H", ambiguous="infer") + expected = df1["date"] + tm.assert_series_equal(result, expected) + + # bool-array + result = getattr(df1.date.dt, method)("H", ambiguous=[True, False, False]) + tm.assert_series_equal(result, expected) + + # NaT + result = getattr(df1.date.dt, method)("H", ambiguous="NaT") + expected = df1["date"].copy() + expected.iloc[0:2] = pd.NaT + tm.assert_series_equal(result, expected) + + # raise + with pytest.raises(pytz.AmbiguousTimeError): + getattr(df1.date.dt, method)("H", ambiguous="raise") + + @pytest.mark.parametrize( + "method, ts_str, freq", + [ + ["ceil", "2018-03-11 01:59:00-0600", "5min"], + ["round", "2018-03-11 01:59:00-0600", "5min"], + ["floor", "2018-03-11 03:01:00-0500", "2H"], + ], + ) + def test_dt_round_tz_nonexistent(self, method, ts_str, freq): + # GH 23324 round near "spring forward" DST + s = Series([pd.Timestamp(ts_str, tz="America/Chicago")]) + result = getattr(s.dt, method)(freq, nonexistent="shift_forward") + expected = Series([pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")]) + tm.assert_series_equal(result, expected) + + result = getattr(s.dt, method)(freq, nonexistent="NaT") + expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz) + tm.assert_series_equal(result, expected) + + with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"): + getattr(s.dt, method)(freq, nonexistent="raise") + + def test_dt_namespace_accessor_categorical(self): + # GH 19468 + dti = DatetimeIndex(["20171111", "20181212"]).repeat(2) + s = Series(pd.Categorical(dti), name="foo") + result = s.dt.year + expected = Series([2017, 2017, 2018, 2018], name="foo") + tm.assert_series_equal(result, expected) + + def test_dt_tz_localize_categorical(self, tz_aware_fixture): + # GH 27952 + tz = tz_aware_fixture + datetimes = pd.Series( + ["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns]" + ) + categorical = datetimes.astype("category") + result = categorical.dt.tz_localize(tz) + expected = datetimes.dt.tz_localize(tz) + tm.assert_series_equal(result, expected) + + def test_dt_tz_convert_categorical(self, tz_aware_fixture): + # GH 27952 + tz = tz_aware_fixture + datetimes = pd.Series( + ["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns, MET]" + ) + categorical = datetimes.astype("category") + result = categorical.dt.tz_convert(tz) + expected = datetimes.dt.tz_convert(tz) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("accessor", ["year", "month", "day"]) + def test_dt_other_accessors_categorical(self, accessor): + # GH 27952 + datetimes = pd.Series( + ["2018-01-01", "2018-01-01", "2019-01-02"], dtype="datetime64[ns]" + ) + categorical = datetimes.astype("category") + result = getattr(categorical.dt, accessor) + expected = getattr(datetimes.dt, accessor) + tm.assert_series_equal(result, expected) + + def test_dt_accessor_no_new_attributes(self): + # https://github.com/pandas-dev/pandas/issues/10673 + s = Series(date_range("20130101", periods=5, freq="D")) + with pytest.raises(AttributeError, match="You cannot add any new attribute"): + s.dt.xlabel = "a" + + @pytest.mark.parametrize( + "time_locale", [None] if tm.get_locales() is None else [None] + tm.get_locales() + ) + def test_dt_accessor_datetime_name_accessors(self, time_locale): + # Test Monday -> Sunday and January -> December, in that sequence + if time_locale is None: + # If the time_locale is None, day-name and month_name should + # return the english attributes + expected_days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + expected_months = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", + ] + else: + with tm.set_locale(time_locale, locale.LC_TIME): + expected_days = calendar.day_name[:] + expected_months = calendar.month_name[1:] + + s = Series(date_range(freq="D", start=datetime(1998, 1, 1), periods=365)) + english_days = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + for day, name, eng_name in zip(range(4, 11), expected_days, english_days): + name = name.capitalize() + assert s.dt.day_name(locale=time_locale)[day] == name + s = s.append(Series([pd.NaT])) + assert np.isnan(s.dt.day_name(locale=time_locale).iloc[-1]) + + s = Series(date_range(freq="M", start="2012", end="2013")) + result = s.dt.month_name(locale=time_locale) + expected = Series([month.capitalize() for month in expected_months]) + + # work around https://github.com/pandas-dev/pandas/issues/22342 + result = result.str.normalize("NFD") + expected = expected.str.normalize("NFD") + + tm.assert_series_equal(result, expected) + + for s_date, expected in zip(s, expected_months): + result = s_date.month_name(locale=time_locale) + expected = expected.capitalize() + + result = unicodedata.normalize("NFD", result) + expected = unicodedata.normalize("NFD", expected) + + assert result == expected + + s = s.append(Series([pd.NaT])) + assert np.isnan(s.dt.month_name(locale=time_locale).iloc[-1]) + + def test_strftime(self): + # GH 10086 + s = Series(date_range("20130101", periods=5)) + result = s.dt.strftime("%Y/%m/%d") + expected = Series( + ["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"] + ) + tm.assert_series_equal(result, expected) + + s = Series(date_range("2015-02-03 11:22:33.4567", periods=5)) + result = s.dt.strftime("%Y/%m/%d %H-%M-%S") + expected = Series( + [ + "2015/02/03 11-22-33", + "2015/02/04 11-22-33", + "2015/02/05 11-22-33", + "2015/02/06 11-22-33", + "2015/02/07 11-22-33", + ] + ) + tm.assert_series_equal(result, expected) + + s = Series(period_range("20130101", periods=5)) + result = s.dt.strftime("%Y/%m/%d") + expected = Series( + ["2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"] + ) + tm.assert_series_equal(result, expected) + + s = Series(period_range("2015-02-03 11:22:33.4567", periods=5, freq="s")) + result = s.dt.strftime("%Y/%m/%d %H-%M-%S") + expected = Series( + [ + "2015/02/03 11-22-33", + "2015/02/03 11-22-34", + "2015/02/03 11-22-35", + "2015/02/03 11-22-36", + "2015/02/03 11-22-37", + ] + ) + tm.assert_series_equal(result, expected) + + s = Series(date_range("20130101", periods=5)) + s.iloc[0] = pd.NaT + result = s.dt.strftime("%Y/%m/%d") + expected = Series( + [np.nan, "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"] + ) + tm.assert_series_equal(result, expected) + + datetime_index = date_range("20150301", periods=5) + result = datetime_index.strftime("%Y/%m/%d") + + expected = Index( + ["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"], + dtype=np.object_, + ) + # dtype may be S10 or U10 depending on python version + tm.assert_index_equal(result, expected) + + period_index = period_range("20150301", periods=5) + result = period_index.strftime("%Y/%m/%d") + expected = Index( + ["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"], + dtype="=U10", + ) + tm.assert_index_equal(result, expected) + + s = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)]) + result = s.dt.strftime("%Y-%m-%d %H:%M:%S") + expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"]) + tm.assert_series_equal(result, expected) + + s = Series(period_range("20130101", periods=4, freq="H")) + result = s.dt.strftime("%Y/%m/%d %H:%M:%S") + expected = Series( + [ + "2013/01/01 00:00:00", + "2013/01/01 01:00:00", + "2013/01/01 02:00:00", + "2013/01/01 03:00:00", + ] + ) + + s = Series(period_range("20130101", periods=4, freq="L")) + result = s.dt.strftime("%Y/%m/%d %H:%M:%S.%l") + expected = Series( + [ + "2013/01/01 00:00:00.000", + "2013/01/01 00:00:00.001", + "2013/01/01 00:00:00.002", + "2013/01/01 00:00:00.003", + ] + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "data", + [ + DatetimeIndex(["2019-01-01", pd.NaT]), + PeriodIndex(["2019-01-01", pd.NaT], dtype="period[D]"), + ], + ) + def test_strftime_nat(self, data): + # GH 29578 + s = Series(data) + result = s.dt.strftime("%Y-%m-%d") + expected = Series(["2019-01-01", np.nan]) + tm.assert_series_equal(result, expected) + + def test_valid_dt_with_missing_values(self): + + from datetime import date, time + + # GH 8689 + s = Series(date_range("20130101", periods=5, freq="D")) + s.iloc[2] = pd.NaT + + for attr in ["microsecond", "nanosecond", "second", "minute", "hour", "day"]: + expected = getattr(s.dt, attr).copy() + expected.iloc[2] = np.nan + result = getattr(s.dt, attr) + tm.assert_series_equal(result, expected) + + result = s.dt.date + expected = Series( + [ + date(2013, 1, 1), + date(2013, 1, 2), + np.nan, + date(2013, 1, 4), + date(2013, 1, 5), + ], + dtype="object", + ) + tm.assert_series_equal(result, expected) + + result = s.dt.time + expected = Series([time(0), time(0), np.nan, time(0), time(0)], dtype="object") + tm.assert_series_equal(result, expected) + + def test_dt_accessor_api(self): + # GH 9322 + from pandas.core.indexes.accessors import ( + CombinedDatetimelikeProperties, + DatetimeProperties, + ) + + assert Series.dt is CombinedDatetimelikeProperties + + s = Series(date_range("2000-01-01", periods=3)) + assert isinstance(s.dt, DatetimeProperties) + + @pytest.mark.parametrize( + "ser", [Series(np.arange(5)), Series(list("abcde")), Series(np.random.randn(5))] + ) + def test_dt_accessor_invalid(self, ser): + # GH#9322 check that series with incorrect dtypes don't have attr + with pytest.raises(AttributeError, match="only use .dt accessor"): + ser.dt + assert not hasattr(ser, "dt") + + def test_dt_accessor_updates_on_inplace(self): + s = Series(pd.date_range("2018-01-01", periods=10)) + s[2] = None + s.fillna(pd.Timestamp("2018-01-01"), inplace=True) + result = s.dt.date + assert result[0] == result[2] + + def test_between(self): + s = Series(bdate_range("1/1/2000", periods=20).astype(object)) + s[::2] = np.nan + + result = s[s.between(s[3], s[17])] + expected = s[3:18].dropna() + tm.assert_series_equal(result, expected) + + result = s[s.between(s[3], s[17], inclusive=False)] + expected = s[5:16].dropna() + tm.assert_series_equal(result, expected) + + def test_date_tz(self): + # GH11757 + rng = pd.DatetimeIndex( + ["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], + tz="US/Eastern", + ) + s = Series(rng) + expected = Series([date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)]) + tm.assert_series_equal(s.dt.date, expected) + tm.assert_series_equal(s.apply(lambda x: x.date()), expected) + + def test_datetime_understood(self): + # Ensures it doesn't fail to create the right series + # reported in issue#16726 + series = pd.Series(pd.date_range("2012-01-01", periods=3)) + offset = pd.offsets.DateOffset(days=6) + result = series - offset + expected = pd.Series(pd.to_datetime(["2011-12-26", "2011-12-27", "2011-12-28"])) + tm.assert_series_equal(result, expected) + + def test_dt_timetz_accessor(self, tz_naive_fixture): + # GH21358 + tz = maybe_get_tz(tz_naive_fixture) + + dtindex = pd.DatetimeIndex( + ["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz=tz + ) + s = Series(dtindex) + expected = Series( + [time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz)] + ) + result = s.dt.timetz + tm.assert_series_equal(result, expected) + + def test_setitem_with_string_index(self): + # GH 23451 + x = pd.Series([1, 2, 3], index=["Date", "b", "other"]) + x["Date"] = date.today() + assert x.Date == date.today() + assert x["Date"] == date.today() + + def test_setitem_with_different_tz(self): + # GH#24024 + ser = pd.Series(pd.date_range("2000", periods=2, tz="US/Central")) + ser[0] = pd.Timestamp("2000", tz="US/Eastern") + expected = pd.Series( + [ + pd.Timestamp("2000-01-01 00:00:00-05:00", tz="US/Eastern"), + pd.Timestamp("2000-01-02 00:00:00-06:00", tz="US/Central"), + ], + dtype=object, + ) + tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py new file mode 100644 index 00000000..a57ec2ba --- /dev/null +++ b/pandas/tests/series/test_dtypes.py @@ -0,0 +1,489 @@ +from datetime import datetime, timedelta +from importlib import reload +import string +import sys + +import numpy as np +import pytest + +from pandas._libs.tslibs import iNaT + +from pandas.core.dtypes.dtypes import CategoricalDtype + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + Series, + Timedelta, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestSeriesDtypes: + def test_dt64_series_astype_object(self): + dt64ser = Series(date_range("20130101", periods=3)) + result = dt64ser.astype(object) + assert isinstance(result.iloc[0], datetime) + assert result.dtype == np.object_ + + def test_td64_series_astype_object(self): + tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]") + result = tdser.astype(object) + assert isinstance(result.iloc[0], timedelta) + assert result.dtype == np.object_ + + @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"]) + def test_astype(self, dtype): + s = Series(np.random.randn(5), name="foo") + as_typed = s.astype(dtype) + + assert as_typed.dtype == dtype + assert as_typed.name == s.name + + def test_dtype(self, datetime_series): + + assert datetime_series.dtype == np.dtype("float64") + assert datetime_series.dtypes == np.dtype("float64") + + @pytest.mark.parametrize("value", [np.nan, np.inf]) + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) + def test_astype_cast_nan_inf_int(self, dtype, value): + # gh-14265: check NaN and inf raise error when converting to int + msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" + s = Series([value]) + + with pytest.raises(ValueError, match=msg): + s.astype(dtype) + + @pytest.mark.parametrize("dtype", [int, np.int8, np.int64]) + def test_astype_cast_object_int_fail(self, dtype): + arr = Series(["car", "house", "tree", "1"]) + msg = r"invalid literal for int\(\) with base 10: 'car'" + with pytest.raises(ValueError, match=msg): + arr.astype(dtype) + + def test_astype_cast_object_int(self): + arr = Series(["1", "2", "3", "4"], dtype=object) + result = arr.astype(int) + + tm.assert_series_equal(result, Series(np.arange(1, 5))) + + def test_astype_datetime(self): + s = Series(iNaT, dtype="M8[ns]", index=range(5)) + + s = s.astype("O") + assert s.dtype == np.object_ + + s = Series([datetime(2001, 1, 2, 0, 0)]) + + s = s.astype("O") + assert s.dtype == np.object_ + + s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)]) + + s[1] = np.nan + assert s.dtype == "M8[ns]" + + s = s.astype("O") + assert s.dtype == np.object_ + + def test_astype_datetime64tz(self): + s = Series(date_range("20130101", periods=3, tz="US/Eastern")) + + # astype + result = s.astype(object) + expected = Series(s.astype(object), dtype=object) + tm.assert_series_equal(result, expected) + + result = Series(s.values).dt.tz_localize("UTC").dt.tz_convert(s.dt.tz) + tm.assert_series_equal(result, s) + + # astype - object, preserves on construction + result = Series(s.astype(object)) + expected = s.astype(object) + tm.assert_series_equal(result, expected) + + # astype - datetime64[ns, tz] + result = Series(s.values).astype("datetime64[ns, US/Eastern]") + tm.assert_series_equal(result, s) + + result = Series(s.values).astype(s.dtype) + tm.assert_series_equal(result, s) + + result = s.astype("datetime64[ns, CET]") + expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET")) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [str, np.str_]) + @pytest.mark.parametrize( + "series", + [ + Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), + Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]), + ], + ) + def test_astype_str_map(self, dtype, series): + # see gh-4405 + result = series.astype(dtype) + expected = series.map(str) + tm.assert_series_equal(result, expected) + + def test_astype_str_cast(self): + # see gh-9757 + ts = Series([Timestamp("2010-01-04 00:00:00")]) + s = ts.astype(str) + + expected = Series([str("2010-01-04")]) + tm.assert_series_equal(s, expected) + + ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")]) + s = ts.astype(str) + + expected = Series([str("2010-01-04 00:00:00-05:00")]) + tm.assert_series_equal(s, expected) + + td = Series([Timedelta(1, unit="d")]) + s = td.astype(str) + + expected = Series([str("1 days 00:00:00.000000000")]) + tm.assert_series_equal(s, expected) + + def test_astype_unicode(self): + # see gh-7758: A bit of magic is required to set + # default encoding to utf-8 + digits = string.digits + test_series = [ + Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), + Series(["データーサイエンス、お前はもう死んでいる"]), + ] + + former_encoding = None + + if sys.getdefaultencoding() == "utf-8": + test_series.append(Series(["野菜食べないとやばい".encode("utf-8")])) + + for s in test_series: + res = s.astype("unicode") + expec = s.map(str) + tm.assert_series_equal(res, expec) + + # Restore the former encoding + if former_encoding is not None and former_encoding != "utf-8": + reload(sys) + sys.setdefaultencoding(former_encoding) + + @pytest.mark.parametrize("dtype_class", [dict, Series]) + def test_astype_dict_like(self, dtype_class): + # see gh-7271 + s = Series(range(0, 10, 2), name="abc") + + dt1 = dtype_class({"abc": str}) + result = s.astype(dt1) + expected = Series(["0", "2", "4", "6", "8"], name="abc") + tm.assert_series_equal(result, expected) + + dt2 = dtype_class({"abc": "float64"}) + result = s.astype(dt2) + expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc") + tm.assert_series_equal(result, expected) + + dt3 = dtype_class({"abc": str, "def": str}) + msg = ( + "Only the Series name can be used for the key in Series dtype" + r" mappings\." + ) + with pytest.raises(KeyError, match=msg): + s.astype(dt3) + + dt4 = dtype_class({0: str}) + with pytest.raises(KeyError, match=msg): + s.astype(dt4) + + # GH16717 + # if dtypes provided is empty, it should error + if dtype_class is Series: + dt5 = dtype_class({}, dtype=object) + else: + dt5 = dtype_class({}) + + with pytest.raises(KeyError, match=msg): + s.astype(dt5) + + def test_astype_categories_raises(self): + # deprecated 17636, removed in GH-27141 + s = Series(["a", "b", "a"]) + with pytest.raises(TypeError, match="got an unexpected"): + s.astype("category", categories=["a", "b"], ordered=True) + + def test_astype_from_categorical(self): + items = ["a", "b", "c", "a"] + s = Series(items) + exp = Series(Categorical(items)) + res = s.astype("category") + tm.assert_series_equal(res, exp) + + items = [1, 2, 3, 1] + s = Series(items) + exp = Series(Categorical(items)) + res = s.astype("category") + tm.assert_series_equal(res, exp) + + df = DataFrame({"cats": [1, 2, 3, 4, 5, 6], "vals": [1, 2, 3, 4, 5, 6]}) + cats = Categorical([1, 2, 3, 4, 5, 6]) + exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + df["cats"] = df["cats"].astype("category") + tm.assert_frame_equal(exp_df, df) + + df = DataFrame( + {"cats": ["a", "b", "b", "a", "a", "d"], "vals": [1, 2, 3, 4, 5, 6]} + ) + cats = Categorical(["a", "b", "b", "a", "a", "d"]) + exp_df = DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + df["cats"] = df["cats"].astype("category") + tm.assert_frame_equal(exp_df, df) + + # with keywords + lst = ["a", "b", "c", "a"] + s = Series(lst) + exp = Series(Categorical(lst, ordered=True)) + res = s.astype(CategoricalDtype(None, ordered=True)) + tm.assert_series_equal(res, exp) + + exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True)) + res = s.astype(CategoricalDtype(list("abcdef"), ordered=True)) + tm.assert_series_equal(res, exp) + + def test_astype_categorical_to_other(self): + + value = np.random.RandomState(0).randint(0, 10000, 100) + df = DataFrame({"value": value}) + labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] + cat_labels = Categorical(labels, labels) + + df = df.sort_values(by=["value"], ascending=True) + df["value_group"] = pd.cut( + df.value, range(0, 10500, 500), right=False, labels=cat_labels + ) + + s = df["value_group"] + expected = s + tm.assert_series_equal(s.astype("category"), expected) + tm.assert_series_equal(s.astype(CategoricalDtype()), expected) + msg = r"could not convert string to float|invalid literal for float\(\)" + with pytest.raises(ValueError, match=msg): + s.astype("float64") + + cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])) + exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"]) + tm.assert_series_equal(cat.astype("str"), exp) + s2 = Series(Categorical(["1", "2", "3", "4"])) + exp2 = Series([1, 2, 3, 4]).astype(int) + tm.assert_series_equal(s2.astype("int"), exp2) + + # object don't sort correctly, so just compare that we have the same + # values + def cmp(a, b): + tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b))) + + expected = Series(np.array(s.values), name="value_group") + cmp(s.astype("object"), expected) + cmp(s.astype(np.object_), expected) + + # array conversion + tm.assert_almost_equal(np.array(s), np.array(s.values)) + + # valid conversion + for valid in [ + lambda x: x.astype("category"), + lambda x: x.astype(CategoricalDtype()), + lambda x: x.astype("object").astype("category"), + lambda x: x.astype("object").astype(CategoricalDtype()), + ]: + + result = valid(s) + # compare series values + # internal .categories can't be compared because it is sorted + tm.assert_series_equal(result, s, check_categorical=False) + + # invalid conversion (these are NOT a dtype) + msg = ( + r"invalid type for astype" + ) + for invalid in [ + lambda x: x.astype(Categorical), + lambda x: x.astype("object").astype(Categorical), + ]: + with pytest.raises(TypeError, match=msg): + invalid(s) + + @pytest.mark.parametrize("name", [None, "foo"]) + @pytest.mark.parametrize("dtype_ordered", [True, False]) + @pytest.mark.parametrize("series_ordered", [True, False]) + def test_astype_categorical_to_categorical( + self, name, dtype_ordered, series_ordered + ): + # GH 10696/18593 + s_data = list("abcaacbab") + s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered) + s = Series(s_data, dtype=s_dtype, name=name) + + # unspecified categories + dtype = CategoricalDtype(ordered=dtype_ordered) + result = s.astype(dtype) + exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered) + expected = Series(s_data, name=name, dtype=exp_dtype) + tm.assert_series_equal(result, expected) + + # different categories + dtype = CategoricalDtype(list("adc"), dtype_ordered) + result = s.astype(dtype) + expected = Series(s_data, name=name, dtype=dtype) + tm.assert_series_equal(result, expected) + + if dtype_ordered is False: + # not specifying ordered, so only test once + expected = s + result = s.astype("category") + tm.assert_series_equal(result, expected) + + def test_astype_bool_missing_to_categorical(self): + # GH-19182 + s = Series([True, False, np.nan]) + assert s.dtypes == np.object_ + + result = s.astype(CategoricalDtype(categories=[True, False])) + expected = Series(Categorical([True, False, np.nan], categories=[True, False])) + tm.assert_series_equal(result, expected) + + def test_astype_categoricaldtype(self): + s = Series(["a", "b", "a"]) + result = s.astype(CategoricalDtype(["a", "b"], ordered=True)) + expected = Series(Categorical(["a", "b", "a"], ordered=True)) + tm.assert_series_equal(result, expected) + + result = s.astype(CategoricalDtype(["a", "b"], ordered=False)) + expected = Series(Categorical(["a", "b", "a"], ordered=False)) + tm.assert_series_equal(result, expected) + + result = s.astype(CategoricalDtype(["a", "b", "c"], ordered=False)) + expected = Series( + Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False) + ) + tm.assert_series_equal(result, expected) + tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"])) + + @pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64]) + def test_astype_generic_timestamp_no_frequency(self, dtype): + # see gh-15524, gh-15987 + data = [1] + s = Series(data) + + msg = ( + r"The '{dtype}' dtype has no unit\. " + r"Please pass in '{dtype}\[ns\]' instead." + ).format(dtype=dtype.__name__) + with pytest.raises(ValueError, match=msg): + s.astype(dtype) + + @pytest.mark.parametrize("dtype", np.typecodes["All"]) + def test_astype_empty_constructor_equality(self, dtype): + # see gh-15524 + + if dtype not in ( + "S", + "V", # poor support (if any) currently + "M", + "m", # Generic timestamps raise a ValueError. Already tested. + ): + init_empty = Series([], dtype=dtype) + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + as_type_empty = Series([]).astype(dtype) + tm.assert_series_equal(init_empty, as_type_empty) + + def test_arg_for_errors_in_astype(self): + # see gh-14878 + s = Series([1, 2, 3]) + + msg = ( + r"Expected value of kwarg 'errors' to be one of \['raise'," + r" 'ignore'\]\. Supplied value is 'False'" + ) + with pytest.raises(ValueError, match=msg): + s.astype(np.float64, errors=False) + + s.astype(np.int8, errors="raise") + + def test_intercept_astype_object(self): + series = Series(date_range("1/1/2000", periods=10)) + + # This test no longer makes sense, as + # Series is by default already M8[ns]. + expected = series.astype("object") + + df = DataFrame({"a": series, "b": np.random.randn(len(series))}) + exp_dtypes = Series( + [np.dtype("datetime64[ns]"), np.dtype("float64")], index=["a", "b"] + ) + tm.assert_series_equal(df.dtypes, exp_dtypes) + + result = df.values.squeeze() + assert (result[:, 0] == expected.values).all() + + df = DataFrame({"a": series, "b": ["foo"] * len(series)}) + + result = df.values.squeeze() + assert (result[:, 0] == expected.values).all() + + def test_series_to_categorical(self): + # see gh-16524: test conversion of Series to Categorical + series = Series(["a", "b", "c"]) + + result = Series(series, dtype="category") + expected = Series(["a", "b", "c"], dtype="category") + + tm.assert_series_equal(result, expected) + + def test_infer_objects_series(self): + # GH 11221 + actual = Series(np.array([1, 2, 3], dtype="O")).infer_objects() + expected = Series([1, 2, 3]) + tm.assert_series_equal(actual, expected) + + actual = Series(np.array([1, 2, 3, None], dtype="O")).infer_objects() + expected = Series([1.0, 2.0, 3.0, np.nan]) + tm.assert_series_equal(actual, expected) + + # only soft conversions, unconvertable pass thru unchanged + actual = Series(np.array([1, 2, 3, None, "a"], dtype="O")).infer_objects() + expected = Series([1, 2, 3, None, "a"]) + + assert actual.dtype == "object" + tm.assert_series_equal(actual, expected) + + @pytest.mark.parametrize( + "data", + [ + pd.period_range("2000", periods=4), + pd.IntervalIndex.from_breaks([1, 2, 3, 4]), + ], + ) + def test_values_compatibility(self, data): + # https://github.com/pandas-dev/pandas/issues/23995 + result = pd.Series(data).values + expected = np.array(data.astype(object)) + tm.assert_numpy_array_equal(result, expected) + + def test_reindex_astype_order_consistency(self): + # GH 17444 + s = Series([1, 2, 3], index=[2, 0, 1]) + new_index = [0, 1, 2] + temp_dtype = "category" + new_dtype = str + s1 = s.reindex(new_index).astype(temp_dtype).astype(new_dtype) + s2 = s.astype(temp_dtype).reindex(new_index).astype(new_dtype) + tm.assert_series_equal(s1, s2) diff --git a/pandas/tests/series/test_duplicates.py b/pandas/tests/series/test_duplicates.py new file mode 100644 index 00000000..3513db61 --- /dev/null +++ b/pandas/tests/series/test_duplicates.py @@ -0,0 +1,92 @@ +import numpy as np +import pytest + +from pandas import Categorical, Series +import pandas._testing as tm +from pandas.core.construction import create_series_with_explicit_dtype + + +def test_nunique(): + # basics.rst doc example + series = Series(np.random.randn(500)) + series[20:500] = np.nan + series[10:20] = 5000 + result = series.nunique() + assert result == 11 + + # GH 18051 + s = Series(Categorical([])) + assert s.nunique() == 0 + s = Series(Categorical([np.nan])) + assert s.nunique() == 0 + + +def test_unique(): + # GH714 also, dtype=float + s = Series([1.2345] * 100) + s[::2] = np.nan + result = s.unique() + assert len(result) == 2 + + s = Series([1.2345] * 100, dtype="f4") + s[::2] = np.nan + result = s.unique() + assert len(result) == 2 + + # NAs in object arrays #714 + s = Series(["foo"] * 100, dtype="O") + s[::2] = np.nan + result = s.unique() + assert len(result) == 2 + + # decision about None + s = Series([1, 2, 3, None, None, None], dtype=object) + result = s.unique() + expected = np.array([1, 2, 3, None], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + # GH 18051 + s = Series(Categorical([])) + tm.assert_categorical_equal(s.unique(), Categorical([]), check_dtype=False) + s = Series(Categorical([np.nan])) + tm.assert_categorical_equal(s.unique(), Categorical([np.nan]), check_dtype=False) + + +def test_unique_data_ownership(): + # it works! #1807 + Series(Series(["a", "c", "b"]).unique()).sort_values() + + +@pytest.mark.parametrize( + "data, expected", + [ + (np.random.randint(0, 10, size=1000), False), + (np.arange(1000), True), + ([], True), + ([np.nan], True), + (["foo", "bar", np.nan], True), + (["foo", "foo", np.nan], False), + (["foo", "bar", np.nan, np.nan], False), + ], +) +def test_is_unique(data, expected): + # GH11946 / GH25180 + s = create_series_with_explicit_dtype(data, dtype_if_empty=object) + assert s.is_unique is expected + + +def test_is_unique_class_ne(capsys): + # GH 20661 + class Foo: + def __init__(self, val): + self._value = val + + def __ne__(self, other): + raise Exception("NEQ not supported") + + with capsys.disabled(): + li = [Foo(i) for i in range(5)] + s = Series(li, index=list(range(5))) + s.is_unique + captured = capsys.readouterr() + assert len(captured.err) == 0 diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py new file mode 100644 index 00000000..4c817ed2 --- /dev/null +++ b/pandas/tests/series/test_internals.py @@ -0,0 +1,244 @@ +from datetime import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import NaT, Series, Timestamp +import pandas._testing as tm +from pandas.core.internals.blocks import IntBlock + + +class TestSeriesInternals: + + # GH 10265 + def test_convert(self): + # Tests: All to nans, coerce, true + # Test coercion returns correct type + s = Series(["a", "b", "c"]) + results = s._convert(datetime=True, coerce=True) + expected = Series([NaT] * 3) + tm.assert_series_equal(results, expected) + + results = s._convert(numeric=True, coerce=True) + expected = Series([np.nan] * 3) + tm.assert_series_equal(results, expected) + + expected = Series([NaT] * 3, dtype=np.dtype("m8[ns]")) + results = s._convert(timedelta=True, coerce=True) + tm.assert_series_equal(results, expected) + + dt = datetime(2001, 1, 1, 0, 0) + td = dt - datetime(2000, 1, 1, 0, 0) + + # Test coercion with mixed types + s = Series(["a", "3.1415", dt, td]) + results = s._convert(datetime=True, coerce=True) + expected = Series([NaT, NaT, dt, NaT]) + tm.assert_series_equal(results, expected) + + results = s._convert(numeric=True, coerce=True) + expected = Series([np.nan, 3.1415, np.nan, np.nan]) + tm.assert_series_equal(results, expected) + + results = s._convert(timedelta=True, coerce=True) + expected = Series([NaT, NaT, NaT, td], dtype=np.dtype("m8[ns]")) + tm.assert_series_equal(results, expected) + + # Test standard conversion returns original + results = s._convert(datetime=True) + tm.assert_series_equal(results, s) + results = s._convert(numeric=True) + expected = Series([np.nan, 3.1415, np.nan, np.nan]) + tm.assert_series_equal(results, expected) + results = s._convert(timedelta=True) + tm.assert_series_equal(results, s) + + # test pass-through and non-conversion when other types selected + s = Series(["1.0", "2.0", "3.0"]) + results = s._convert(datetime=True, numeric=True, timedelta=True) + expected = Series([1.0, 2.0, 3.0]) + tm.assert_series_equal(results, expected) + results = s._convert(True, False, True) + tm.assert_series_equal(results, s) + + s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype="O") + results = s._convert(datetime=True, numeric=True, timedelta=True) + expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)]) + tm.assert_series_equal(results, expected) + results = s._convert(datetime=False, numeric=True, timedelta=True) + tm.assert_series_equal(results, s) + + td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0) + s = Series([td, td], dtype="O") + results = s._convert(datetime=True, numeric=True, timedelta=True) + expected = Series([td, td]) + tm.assert_series_equal(results, expected) + results = s._convert(True, True, False) + tm.assert_series_equal(results, s) + + s = Series([1.0, 2, 3], index=["a", "b", "c"]) + result = s._convert(numeric=True) + tm.assert_series_equal(result, s) + + # force numeric conversion + r = s.copy().astype("O") + r["a"] = "1" + result = r._convert(numeric=True) + tm.assert_series_equal(result, s) + + r = s.copy().astype("O") + r["a"] = "1." + result = r._convert(numeric=True) + tm.assert_series_equal(result, s) + + r = s.copy().astype("O") + r["a"] = "garbled" + result = r._convert(numeric=True) + expected = s.copy() + expected["a"] = np.nan + tm.assert_series_equal(result, expected) + + # GH 4119, not converting a mixed type (e.g.floats and object) + s = Series([1, "na", 3, 4]) + result = s._convert(datetime=True, numeric=True) + expected = Series([1, np.nan, 3, 4]) + tm.assert_series_equal(result, expected) + + s = Series([1, "", 3, 4]) + result = s._convert(datetime=True, numeric=True) + tm.assert_series_equal(result, expected) + + # dates + s = Series( + [ + datetime(2001, 1, 1, 0, 0), + datetime(2001, 1, 2, 0, 0), + datetime(2001, 1, 3, 0, 0), + ] + ) + s2 = Series( + [ + datetime(2001, 1, 1, 0, 0), + datetime(2001, 1, 2, 0, 0), + datetime(2001, 1, 3, 0, 0), + "foo", + 1.0, + 1, + Timestamp("20010104"), + "20010105", + ], + dtype="O", + ) + + result = s._convert(datetime=True) + expected = Series( + [Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103")], + dtype="M8[ns]", + ) + tm.assert_series_equal(result, expected) + + result = s._convert(datetime=True, coerce=True) + tm.assert_series_equal(result, expected) + + expected = Series( + [ + Timestamp("20010101"), + Timestamp("20010102"), + Timestamp("20010103"), + NaT, + NaT, + NaT, + Timestamp("20010104"), + Timestamp("20010105"), + ], + dtype="M8[ns]", + ) + result = s2._convert(datetime=True, numeric=False, timedelta=False, coerce=True) + tm.assert_series_equal(result, expected) + result = s2._convert(datetime=True, coerce=True) + tm.assert_series_equal(result, expected) + + s = Series(["foo", "bar", 1, 1.0], dtype="O") + result = s._convert(datetime=True, coerce=True) + expected = Series([NaT] * 2 + [Timestamp(1)] * 2) + tm.assert_series_equal(result, expected) + + # preserver if non-object + s = Series([1], dtype="float32") + result = s._convert(datetime=True, coerce=True) + tm.assert_series_equal(result, s) + + # r = s.copy() + # r[0] = np.nan + # result = r._convert(convert_dates=True,convert_numeric=False) + # assert result.dtype == 'M8[ns]' + + # dateutil parses some single letters into today's value as a date + expected = Series([NaT]) + for x in "abcdefghijklmnopqrstuvwxyz": + s = Series([x]) + result = s._convert(datetime=True, coerce=True) + tm.assert_series_equal(result, expected) + s = Series([x.upper()]) + result = s._convert(datetime=True, coerce=True) + tm.assert_series_equal(result, expected) + + def test_convert_no_arg_error(self): + s = Series(["1.0", "2"]) + msg = r"At least one of datetime, numeric or timedelta must be True\." + with pytest.raises(ValueError, match=msg): + s._convert() + + def test_convert_preserve_bool(self): + s = Series([1, True, 3, 5], dtype=object) + r = s._convert(datetime=True, numeric=True) + e = Series([1, 1, 3, 5], dtype="i8") + tm.assert_series_equal(r, e) + + def test_convert_preserve_all_bool(self): + s = Series([False, True, False, False], dtype=object) + r = s._convert(datetime=True, numeric=True) + e = Series([False, True, False, False], dtype=bool) + tm.assert_series_equal(r, e) + + def test_constructor_no_pandas_array(self): + ser = pd.Series([1, 2, 3]) + result = pd.Series(ser.array) + tm.assert_series_equal(ser, result) + assert isinstance(result._data.blocks[0], IntBlock) + + def test_astype_no_pandas_dtype(self): + # https://github.com/pandas-dev/pandas/pull/24866 + ser = pd.Series([1, 2], dtype="int64") + # Don't have PandasDtype in the public API, so we use `.array.dtype`, + # which is a PandasDtype. + result = ser.astype(ser.array.dtype) + tm.assert_series_equal(result, ser) + + def test_from_array(self): + result = pd.Series(pd.array(["1H", "2H"], dtype="timedelta64[ns]")) + assert result._data.blocks[0].is_extension is False + + result = pd.Series(pd.array(["2015"], dtype="datetime64[ns]")) + assert result._data.blocks[0].is_extension is False + + def test_from_list_dtype(self): + result = pd.Series(["1H", "2H"], dtype="timedelta64[ns]") + assert result._data.blocks[0].is_extension is False + + result = pd.Series(["2015"], dtype="datetime64[ns]") + assert result._data.blocks[0].is_extension is False + + +def test_hasnans_unchached_for_series(): + # GH#19700 + idx = pd.Index([0, 1]) + assert idx.hasnans is False + assert "hasnans" in idx._cache + ser = idx.to_series() + assert ser.hasnans is False + assert not hasattr(ser, "_cache") + ser.iloc[-1] = np.nan + assert ser.hasnans is True + assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__ diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py new file mode 100644 index 00000000..510c11a5 --- /dev/null +++ b/pandas/tests/series/test_io.py @@ -0,0 +1,240 @@ +from datetime import datetime +from io import StringIO + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm + +from pandas.io.common import get_handle + + +class TestSeriesToCSV: + def read_csv(self, path, **kwargs): + params = dict(squeeze=True, index_col=0, header=None, parse_dates=True) + params.update(**kwargs) + + header = params.get("header") + out = pd.read_csv(path, **params) + + if header is None: + out.name = out.index.name = None + + return out + + def test_from_csv(self, datetime_series, string_series): + + with tm.ensure_clean() as path: + datetime_series.to_csv(path, header=False) + ts = self.read_csv(path) + tm.assert_series_equal(datetime_series, ts, check_names=False) + + assert ts.name is None + assert ts.index.name is None + + # see gh-10483 + datetime_series.to_csv(path, header=True) + ts_h = self.read_csv(path, header=0) + assert ts_h.name == "ts" + + string_series.to_csv(path, header=False) + series = self.read_csv(path) + tm.assert_series_equal(string_series, series, check_names=False) + + assert series.name is None + assert series.index.name is None + + string_series.to_csv(path, header=True) + series_h = self.read_csv(path, header=0) + assert series_h.name == "series" + + with open(path, "w") as outfile: + outfile.write("1998-01-01|1.0\n1999-01-01|2.0") + + series = self.read_csv(path, sep="|") + check_series = Series( + {datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0} + ) + tm.assert_series_equal(check_series, series) + + series = self.read_csv(path, sep="|", parse_dates=False) + check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0}) + tm.assert_series_equal(check_series, series) + + def test_to_csv(self, datetime_series): + import io + + with tm.ensure_clean() as path: + datetime_series.to_csv(path, header=False) + + with io.open(path, newline=None) as f: + lines = f.readlines() + assert lines[1] != "\n" + + datetime_series.to_csv(path, index=False, header=False) + arr = np.loadtxt(path) + tm.assert_almost_equal(arr, datetime_series.values) + + def test_to_csv_unicode_index(self): + buf = StringIO() + s = Series(["\u05d0", "d2"], index=["\u05d0", "\u05d1"]) + + s.to_csv(buf, encoding="UTF-8", header=False) + buf.seek(0) + + s2 = self.read_csv(buf, index_col=0, encoding="UTF-8") + tm.assert_series_equal(s, s2) + + def test_to_csv_float_format(self): + + with tm.ensure_clean() as filename: + ser = Series([0.123456, 0.234567, 0.567567]) + ser.to_csv(filename, float_format="%.2f", header=False) + + rs = self.read_csv(filename) + xp = Series([0.12, 0.23, 0.57]) + tm.assert_series_equal(rs, xp) + + def test_to_csv_list_entries(self): + s = Series(["jack and jill", "jesse and frank"]) + + split = s.str.split(r"\s+and\s+") + + buf = StringIO() + split.to_csv(buf, header=False) + + def test_to_csv_path_is_none(self): + # GH 8215 + # Series.to_csv() was returning None, inconsistent with + # DataFrame.to_csv() which returned string + s = Series([1, 2, 3]) + csv_str = s.to_csv(path_or_buf=None, header=False) + assert isinstance(csv_str, str) + + @pytest.mark.parametrize( + "s,encoding", + [ + ( + Series([0.123456, 0.234567, 0.567567], index=["A", "B", "C"], name="X"), + None, + ), + # GH 21241, 21118 + (Series(["abc", "def", "ghi"], name="X"), "ascii"), + (Series(["123", "你好", "世界"], name="中文"), "gb2312"), + (Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), "cp737"), + ], + ) + def test_to_csv_compression(self, s, encoding, compression): + + with tm.ensure_clean() as filename: + + s.to_csv(filename, compression=compression, encoding=encoding, header=True) + # test the round trip - to_csv -> read_csv + result = pd.read_csv( + filename, + compression=compression, + encoding=encoding, + index_col=0, + squeeze=True, + ) + tm.assert_series_equal(s, result) + + # test the round trip using file handle - to_csv -> read_csv + f, _handles = get_handle( + filename, "w", compression=compression, encoding=encoding + ) + with f: + s.to_csv(f, encoding=encoding, header=True) + result = pd.read_csv( + filename, + compression=compression, + encoding=encoding, + index_col=0, + squeeze=True, + ) + tm.assert_series_equal(s, result) + + # explicitly ensure file was compressed + with tm.decompress_file(filename, compression) as fh: + text = fh.read().decode(encoding or "utf8") + assert s.name in text + + with tm.decompress_file(filename, compression) as fh: + tm.assert_series_equal( + s, pd.read_csv(fh, index_col=0, squeeze=True, encoding=encoding) + ) + + def test_to_csv_interval_index(self): + # GH 28210 + s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3)) + + with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path: + s.to_csv(path, header=False) + result = self.read_csv(path, index_col=0, squeeze=True) + + # can't roundtrip intervalindex via read_csv so check string repr (GH 23595) + expected = s.copy() + expected.index = expected.index.astype(str) + + tm.assert_series_equal(result, expected) + + +class TestSeriesIO: + def test_to_frame(self, datetime_series): + datetime_series.name = None + rs = datetime_series.to_frame() + xp = pd.DataFrame(datetime_series.values, index=datetime_series.index) + tm.assert_frame_equal(rs, xp) + + datetime_series.name = "testname" + rs = datetime_series.to_frame() + xp = pd.DataFrame( + dict(testname=datetime_series.values), index=datetime_series.index + ) + tm.assert_frame_equal(rs, xp) + + rs = datetime_series.to_frame(name="testdifferent") + xp = pd.DataFrame( + dict(testdifferent=datetime_series.values), index=datetime_series.index + ) + tm.assert_frame_equal(rs, xp) + + def test_timeseries_periodindex(self): + # GH2891 + from pandas import period_range + + prng = period_range("1/1/2011", "1/1/2012", freq="M") + ts = Series(np.random.randn(len(prng)), prng) + new_ts = tm.round_trip_pickle(ts) + assert new_ts.index.freq == "M" + + def test_pickle_preserve_name(self): + for n in [777, 777.0, "name", datetime(2001, 11, 11), (1, 2)]: + unpickled = self._pickle_roundtrip_name(tm.makeTimeSeries(name=n)) + assert unpickled.name == n + + def _pickle_roundtrip_name(self, obj): + + with tm.ensure_clean() as path: + obj.to_pickle(path) + unpickled = pd.read_pickle(path) + return unpickled + + def test_to_frame_expanddim(self): + # GH 9762 + + class SubclassedSeries(Series): + @property + def _constructor_expanddim(self): + return SubclassedFrame + + class SubclassedFrame(DataFrame): + pass + + s = SubclassedSeries([1, 2, 3], name="X") + result = s.to_frame() + assert isinstance(result, SubclassedFrame) + expected = SubclassedFrame({"X": [1, 2, 3]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py new file mode 100644 index 00000000..7b6d9210 --- /dev/null +++ b/pandas/tests/series/test_missing.py @@ -0,0 +1,1650 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest +import pytz + +from pandas._libs.tslib import iNaT +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + IntervalIndex, + MultiIndex, + NaT, + Series, + Timedelta, + Timestamp, + date_range, + isna, +) +import pandas._testing as tm + + +def _simple_ts(start, end, freq="D"): + rng = date_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) + + +class TestSeriesMissingData: + def test_timedelta_fillna(self): + # GH 3371 + s = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130102"), + Timestamp("20130103 9:01:01"), + ] + ) + td = s.diff() + + # reg fillna + result = td.fillna(Timedelta(seconds=0)) + expected = Series( + [ + timedelta(0), + timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1), + ] + ) + tm.assert_series_equal(result, expected) + + # interpreted as seconds, deprecated + with pytest.raises(TypeError, match="Passing integers to fillna"): + td.fillna(1) + + result = td.fillna(Timedelta(seconds=1)) + expected = Series( + [ + timedelta(seconds=1), + timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1), + ] + ) + tm.assert_series_equal(result, expected) + + result = td.fillna(timedelta(days=1, seconds=1)) + expected = Series( + [ + timedelta(days=1, seconds=1), + timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1), + ] + ) + tm.assert_series_equal(result, expected) + + result = td.fillna(np.timedelta64(int(1e9))) + expected = Series( + [ + timedelta(seconds=1), + timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1), + ] + ) + tm.assert_series_equal(result, expected) + + result = td.fillna(NaT) + expected = Series( + [ + NaT, + timedelta(0), + timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1), + ], + dtype="m8[ns]", + ) + tm.assert_series_equal(result, expected) + + # ffill + td[2] = np.nan + result = td.ffill() + expected = td.fillna(Timedelta(seconds=0)) + expected[0] = np.nan + tm.assert_series_equal(result, expected) + + # bfill + td[2] = np.nan + result = td.bfill() + expected = td.fillna(Timedelta(seconds=0)) + expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1) + tm.assert_series_equal(result, expected) + + def test_datetime64_fillna(self): + + s = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130102"), + Timestamp("20130103 9:01:01"), + ] + ) + s[2] = np.nan + + # reg fillna + result = s.fillna(Timestamp("20130104")) + expected = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130104"), + Timestamp("20130103 9:01:01"), + ] + ) + tm.assert_series_equal(result, expected) + + result = s.fillna(NaT) + expected = s + tm.assert_series_equal(result, expected) + + # ffill + result = s.ffill() + expected = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130103 9:01:01"), + ] + ) + tm.assert_series_equal(result, expected) + + # bfill + result = s.bfill() + expected = Series( + [ + Timestamp("20130101"), + Timestamp("20130101"), + Timestamp("20130103 9:01:01"), + Timestamp("20130103 9:01:01"), + ] + ) + tm.assert_series_equal(result, expected) + + # GH 6587 + # make sure that we are treating as integer when filling + # this also tests inference of a datetime-like with NaT's + s = Series([pd.NaT, pd.NaT, "2013-08-05 15:30:00.000001"]) + expected = Series( + [ + "2013-08-05 15:30:00.000001", + "2013-08-05 15:30:00.000001", + "2013-08-05 15:30:00.000001", + ], + dtype="M8[ns]", + ) + result = s.fillna(method="backfill") + tm.assert_series_equal(result, expected) + + def test_datetime64_tz_fillna(self): + + for tz in ["US/Eastern", "Asia/Tokyo"]: + # DatetimeBlock + s = Series( + [ + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-03 10:00"), + pd.NaT, + ] + ) + null_loc = pd.Series([False, True, False, True]) + + result = s.fillna(pd.Timestamp("2011-01-02 10:00")) + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00"), + Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-02 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + # check s is not changed + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna(pd.Timestamp("2011-01-02 10:00", tz=tz)) + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-02 10:00", tz=tz), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna("AAA") + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + "AAA", + Timestamp("2011-01-03 10:00"), + "AAA", + ], + dtype=object, + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna( + { + 1: pd.Timestamp("2011-01-02 10:00", tz=tz), + 3: pd.Timestamp("2011-01-04 10:00"), + } + ) + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-04 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna( + { + 1: pd.Timestamp("2011-01-02 10:00"), + 3: pd.Timestamp("2011-01-04 10:00"), + } + ) + expected = Series( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00"), + Timestamp("2011-01-03 10:00"), + Timestamp("2011-01-04 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + # DatetimeBlockTZ + idx = pd.DatetimeIndex( + ["2011-01-01 10:00", pd.NaT, "2011-01-03 10:00", pd.NaT], tz=tz + ) + s = pd.Series(idx) + assert s.dtype == f"datetime64[ns, {tz}]" + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna(pd.Timestamp("2011-01-02 10:00")) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2011-01-02 10:00"), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2011-01-02 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna(pd.Timestamp("2011-01-02 10:00", tz=tz)) + idx = pd.DatetimeIndex( + [ + "2011-01-01 10:00", + "2011-01-02 10:00", + "2011-01-03 10:00", + "2011-01-02 10:00", + ], + tz=tz, + ) + expected = Series(idx) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna(pd.Timestamp("2011-01-02 10:00", tz=tz).to_pydatetime()) + idx = pd.DatetimeIndex( + [ + "2011-01-01 10:00", + "2011-01-02 10:00", + "2011-01-03 10:00", + "2011-01-02 10:00", + ], + tz=tz, + ) + expected = Series(idx) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna("AAA") + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + "AAA", + Timestamp("2011-01-03 10:00", tz=tz), + "AAA", + ], + dtype=object, + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna( + { + 1: pd.Timestamp("2011-01-02 10:00", tz=tz), + 3: pd.Timestamp("2011-01-04 10:00"), + } + ) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2011-01-04 10:00"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna( + { + 1: pd.Timestamp("2011-01-02 10:00", tz=tz), + 3: pd.Timestamp("2011-01-04 10:00", tz=tz), + } + ) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2011-01-02 10:00", tz=tz), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2011-01-04 10:00", tz=tz), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + # filling with a naive/other zone, coerce to object + result = s.fillna(Timestamp("20130101")) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2013-01-01"), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2013-01-01"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + result = s.fillna(Timestamp("20130101", tz="US/Pacific")) + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz=tz), + Timestamp("2013-01-01", tz="US/Pacific"), + Timestamp("2011-01-03 10:00", tz=tz), + Timestamp("2013-01-01", tz="US/Pacific"), + ] + ) + tm.assert_series_equal(expected, result) + tm.assert_series_equal(pd.isna(s), null_loc) + + # with timezone + # GH 15855 + df = pd.Series([pd.Timestamp("2012-11-11 00:00:00+01:00"), pd.NaT]) + exp = pd.Series( + [ + pd.Timestamp("2012-11-11 00:00:00+01:00"), + pd.Timestamp("2012-11-11 00:00:00+01:00"), + ] + ) + tm.assert_series_equal(df.fillna(method="pad"), exp) + + df = pd.Series([pd.NaT, pd.Timestamp("2012-11-11 00:00:00+01:00")]) + exp = pd.Series( + [ + pd.Timestamp("2012-11-11 00:00:00+01:00"), + pd.Timestamp("2012-11-11 00:00:00+01:00"), + ] + ) + tm.assert_series_equal(df.fillna(method="bfill"), exp) + + def test_datetime64_non_nano_fillna(self): + # GH#27419 + ser = Series([Timestamp("2010-01-01"), pd.NaT, Timestamp("2000-01-01")]) + val = np.datetime64("1975-04-05", "ms") + + result = ser.fillna(val) + expected = Series( + [Timestamp("2010-01-01"), Timestamp("1975-04-05"), Timestamp("2000-01-01")] + ) + tm.assert_series_equal(result, expected) + + def test_fillna_consistency(self): + # GH 16402 + # fillna with a tz aware to a tz-naive, should result in object + + s = Series([Timestamp("20130101"), pd.NaT]) + + result = s.fillna(Timestamp("20130101", tz="US/Eastern")) + expected = Series( + [Timestamp("20130101"), Timestamp("2013-01-01", tz="US/Eastern")], + dtype="object", + ) + tm.assert_series_equal(result, expected) + + # where (we ignore the errors=) + result = s.where( + [True, False], Timestamp("20130101", tz="US/Eastern"), errors="ignore" + ) + tm.assert_series_equal(result, expected) + + result = s.where( + [True, False], Timestamp("20130101", tz="US/Eastern"), errors="ignore" + ) + tm.assert_series_equal(result, expected) + + # with a non-datetime + result = s.fillna("foo") + expected = Series([Timestamp("20130101"), "foo"]) + tm.assert_series_equal(result, expected) + + # assignment + s2 = s.copy() + s2[1] = "foo" + tm.assert_series_equal(s2, expected) + + def test_where_sparse(self): + # GH#17198 make sure we dont get an AttributeError for sp_index + ser = pd.Series(pd.arrays.SparseArray([1, 2])) + result = ser.where(ser >= 2, 0) + expected = pd.Series(pd.arrays.SparseArray([0, 2])) + tm.assert_series_equal(result, expected) + + def test_datetime64tz_fillna_round_issue(self): + # GH 14872 + + data = pd.Series( + [pd.NaT, pd.NaT, datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc)] + ) + + filled = data.fillna(method="bfill") + + expected = pd.Series( + [ + datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc), + datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc), + datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc), + ] + ) + + tm.assert_series_equal(filled, expected) + + def test_fillna_downcast(self): + # GH 15277 + # infer int64 from float64 + s = pd.Series([1.0, np.nan]) + result = s.fillna(0, downcast="infer") + expected = pd.Series([1, 0]) + tm.assert_series_equal(result, expected) + + # infer int64 from float64 when fillna value is a dict + s = pd.Series([1.0, np.nan]) + result = s.fillna({1: 0}, downcast="infer") + expected = pd.Series([1, 0]) + tm.assert_series_equal(result, expected) + + def test_fillna_int(self): + s = Series(np.random.randint(-100, 100, 50)) + s.fillna(method="ffill", inplace=True) + tm.assert_series_equal(s.fillna(method="ffill", inplace=False), s) + + def test_fillna_raise(self): + s = Series(np.random.randint(-100, 100, 50)) + msg = '"value" parameter must be a scalar or dict, but you passed a "list"' + with pytest.raises(TypeError, match=msg): + s.fillna([1, 2]) + + msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"' + with pytest.raises(TypeError, match=msg): + s.fillna((1, 2)) + + # related GH 9217, make sure limit is an int and greater than 0 + s = Series([1, 2, 3, None]) + msg = ( + r"Cannot specify both 'value' and 'method'\.|" + r"Limit must be greater than 0|" + "Limit must be an integer" + ) + for limit in [-1, 0, 1.0, 2.0]: + for method in ["backfill", "bfill", "pad", "ffill", None]: + with pytest.raises(ValueError, match=msg): + s.fillna(1, limit=limit, method=method) + + def test_categorical_nan_equality(self): + cat = Series(Categorical(["a", "b", "c", np.nan])) + exp = Series([True, True, True, False]) + res = cat == cat + tm.assert_series_equal(res, exp) + + def test_categorical_nan_handling(self): + + # NaNs are represented as -1 in labels + s = Series(Categorical(["a", "b", np.nan, "a"])) + tm.assert_index_equal(s.cat.categories, Index(["a", "b"])) + tm.assert_numpy_array_equal( + s.values.codes, np.array([0, 1, -1, 0], dtype=np.int8) + ) + + @pytest.mark.parametrize( + "fill_value, expected_output", + [ + ("a", ["a", "a", "b", "a", "a"]), + ({1: "a", 3: "b", 4: "b"}, ["a", "a", "b", "b", "b"]), + ({1: "a"}, ["a", "a", "b", np.nan, np.nan]), + ({1: "a", 3: "b"}, ["a", "a", "b", "b", np.nan]), + (Series("a"), ["a", np.nan, "b", np.nan, np.nan]), + (Series("a", index=[1]), ["a", "a", "b", np.nan, np.nan]), + (Series({1: "a", 3: "b"}), ["a", "a", "b", "b", np.nan]), + (Series(["a", "b"], index=[3, 4]), ["a", np.nan, "b", "a", "b"]), + ], + ) + def test_fillna_categorical(self, fill_value, expected_output): + # GH 17033 + # Test fillna for a Categorical series + data = ["a", np.nan, "b", np.nan, np.nan] + s = Series(Categorical(data, categories=["a", "b"])) + exp = Series(Categorical(expected_output, categories=["a", "b"])) + tm.assert_series_equal(s.fillna(fill_value), exp) + + @pytest.mark.parametrize( + "fill_value, expected_output", + [ + (Series(["a", "b", "c", "d", "e"]), ["a", "b", "b", "d", "e"]), + (Series(["b", "d", "a", "d", "a"]), ["a", "d", "b", "d", "a"]), + ( + Series( + Categorical( + ["b", "d", "a", "d", "a"], categories=["b", "c", "d", "e", "a"] + ) + ), + ["a", "d", "b", "d", "a"], + ), + ], + ) + def test_fillna_categorical_with_new_categories(self, fill_value, expected_output): + # GH 26215 + data = ["a", np.nan, "b", np.nan, np.nan] + s = Series(Categorical(data, categories=["a", "b", "c", "d", "e"])) + exp = Series(Categorical(expected_output, categories=["a", "b", "c", "d", "e"])) + tm.assert_series_equal(s.fillna(fill_value), exp) + + def test_fillna_categorical_raise(self): + data = ["a", np.nan, "b", np.nan, np.nan] + s = Series(Categorical(data, categories=["a", "b"])) + + with pytest.raises(ValueError, match="fill value must be in categories"): + s.fillna("d") + + with pytest.raises(ValueError, match="fill value must be in categories"): + s.fillna(Series("d")) + + with pytest.raises(ValueError, match="fill value must be in categories"): + s.fillna({1: "d", 3: "a"}) + + msg = '"value" parameter must be a scalar or dict, but you passed a "list"' + with pytest.raises(TypeError, match=msg): + s.fillna(["a", "b"]) + + msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"' + with pytest.raises(TypeError, match=msg): + s.fillna(("a", "b")) + + msg = ( + '"value" parameter must be a scalar, dict ' + 'or Series, but you passed a "DataFrame"' + ) + with pytest.raises(TypeError, match=msg): + s.fillna(DataFrame({1: ["a"], 3: ["b"]})) + + def test_fillna_nat(self): + series = Series([0, 1, 2, iNaT], dtype="M8[ns]") + + filled = series.fillna(method="pad") + filled2 = series.fillna(value=series.values[2]) + + expected = series.copy() + expected.values[3] = expected.values[2] + + tm.assert_series_equal(filled, expected) + tm.assert_series_equal(filled2, expected) + + df = DataFrame({"A": series}) + filled = df.fillna(method="pad") + filled2 = df.fillna(value=series.values[2]) + expected = DataFrame({"A": expected}) + tm.assert_frame_equal(filled, expected) + tm.assert_frame_equal(filled2, expected) + + series = Series([iNaT, 0, 1, 2], dtype="M8[ns]") + + filled = series.fillna(method="bfill") + filled2 = series.fillna(value=series[1]) + + expected = series.copy() + expected[0] = expected[1] + + tm.assert_series_equal(filled, expected) + tm.assert_series_equal(filled2, expected) + + df = DataFrame({"A": series}) + filled = df.fillna(method="bfill") + filled2 = df.fillna(value=series[1]) + expected = DataFrame({"A": expected}) + tm.assert_frame_equal(filled, expected) + tm.assert_frame_equal(filled2, expected) + + def test_isna_for_inf(self): + s = Series(["a", np.inf, np.nan, 1.0]) + with pd.option_context("mode.use_inf_as_na", True): + r = s.isna() + dr = s.dropna() + e = Series([False, True, True, False]) + de = Series(["a", 1.0], index=[0, 3]) + tm.assert_series_equal(r, e) + tm.assert_series_equal(dr, de) + + def test_isnull_for_inf_deprecated(self): + # gh-17115 + s = Series(["a", np.inf, np.nan, 1.0]) + with pd.option_context("mode.use_inf_as_null", True): + r = s.isna() + dr = s.dropna() + + e = Series([False, True, True, False]) + de = Series(["a", 1.0], index=[0, 3]) + tm.assert_series_equal(r, e) + tm.assert_series_equal(dr, de) + + def test_fillna(self, datetime_series): + ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5)) + + tm.assert_series_equal(ts, ts.fillna(method="ffill")) + + ts[2] = np.NaN + + exp = Series([0.0, 1.0, 1.0, 3.0, 4.0], index=ts.index) + tm.assert_series_equal(ts.fillna(method="ffill"), exp) + + exp = Series([0.0, 1.0, 3.0, 3.0, 4.0], index=ts.index) + tm.assert_series_equal(ts.fillna(method="backfill"), exp) + + exp = Series([0.0, 1.0, 5.0, 3.0, 4.0], index=ts.index) + tm.assert_series_equal(ts.fillna(value=5), exp) + + msg = "Must specify a fill 'value' or 'method'" + with pytest.raises(ValueError, match=msg): + ts.fillna() + + msg = "Cannot specify both 'value' and 'method'" + with pytest.raises(ValueError, match=msg): + datetime_series.fillna(value=0, method="ffill") + + # GH 5703 + s1 = Series([np.nan]) + s2 = Series([1]) + result = s1.fillna(s2) + expected = Series([1.0]) + tm.assert_series_equal(result, expected) + result = s1.fillna({}) + tm.assert_series_equal(result, s1) + result = s1.fillna(Series((), dtype=object)) + tm.assert_series_equal(result, s1) + result = s2.fillna(s1) + tm.assert_series_equal(result, s2) + result = s1.fillna({0: 1}) + tm.assert_series_equal(result, expected) + result = s1.fillna({1: 1}) + tm.assert_series_equal(result, Series([np.nan])) + result = s1.fillna({0: 1, 1: 1}) + tm.assert_series_equal(result, expected) + result = s1.fillna(Series({0: 1, 1: 1})) + tm.assert_series_equal(result, expected) + result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5])) + tm.assert_series_equal(result, s1) + + s1 = Series([0, 1, 2], list("abc")) + s2 = Series([0, np.nan, 2], list("bac")) + result = s2.fillna(s1) + expected = Series([0, 0, 2.0], list("bac")) + tm.assert_series_equal(result, expected) + + # limit + s = Series(np.nan, index=[0, 1, 2]) + result = s.fillna(999, limit=1) + expected = Series([999, np.nan, np.nan], index=[0, 1, 2]) + tm.assert_series_equal(result, expected) + + result = s.fillna(999, limit=2) + expected = Series([999, 999, np.nan], index=[0, 1, 2]) + tm.assert_series_equal(result, expected) + + # GH 9043 + # make sure a string representation of int/float values can be filled + # correctly without raising errors or being converted + vals = ["0", "1.5", "-0.3"] + for val in vals: + s = Series([0, 1, np.nan, np.nan, 4], dtype="float64") + result = s.fillna(val) + expected = Series([0, 1, val, val, 4], dtype="object") + tm.assert_series_equal(result, expected) + + def test_fillna_bug(self): + x = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"]) + filled = x.fillna(method="ffill") + expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0], x.index) + tm.assert_series_equal(filled, expected) + + filled = x.fillna(method="bfill") + expected = Series([1.0, 1.0, 3.0, 3.0, np.nan], x.index) + tm.assert_series_equal(filled, expected) + + def test_fillna_inplace(self): + x = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"]) + y = x.copy() + + y.fillna(value=0, inplace=True) + + expected = x.fillna(value=0) + tm.assert_series_equal(y, expected) + + def test_fillna_invalid_method(self, datetime_series): + try: + datetime_series.fillna(method="ffil") + except ValueError as inst: + assert "ffil" in str(inst) + + def test_ffill(self): + ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5)) + ts[2] = np.NaN + tm.assert_series_equal(ts.ffill(), ts.fillna(method="ffill")) + + def test_ffill_mixed_dtypes_without_missing_data(self): + # GH14956 + series = pd.Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1]) + result = series.ffill() + tm.assert_series_equal(series, result) + + def test_bfill(self): + ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5)) + ts[2] = np.NaN + tm.assert_series_equal(ts.bfill(), ts.fillna(method="bfill")) + + def test_timedelta64_nan(self): + + td = Series([timedelta(days=i) for i in range(10)]) + + # nan ops on timedeltas + td1 = td.copy() + td1[0] = np.nan + assert isna(td1[0]) + assert td1[0].value == iNaT + td1[0] = td[0] + assert not isna(td1[0]) + + # GH#16674 iNaT is treated as an integer when given by the user + td1[1] = iNaT + assert not isna(td1[1]) + assert td1.dtype == np.object_ + assert td1[1] == iNaT + td1[1] = td[1] + assert not isna(td1[1]) + + td1[2] = NaT + assert isna(td1[2]) + assert td1[2].value == iNaT + td1[2] = td[2] + assert not isna(td1[2]) + + # FIXME: don't leave commented-out + # boolean setting + # this doesn't work, not sure numpy even supports it + # result = td[(td>np.timedelta64(timedelta(days=3))) & + # td= -0.5) & (datetime_series <= 0.5) + # tm.assert_series_equal(selector, expected) + + def test_dropna_empty(self): + s = Series([], dtype=object) + + assert len(s.dropna()) == 0 + s.dropna(inplace=True) + assert len(s) == 0 + + # invalid axis + msg = "No axis named 1 for object type " + with pytest.raises(ValueError, match=msg): + s.dropna(axis=1) + + def test_datetime64_tz_dropna(self): + # DatetimeBlock + s = Series( + [ + Timestamp("2011-01-01 10:00"), + pd.NaT, + Timestamp("2011-01-03 10:00"), + pd.NaT, + ] + ) + result = s.dropna() + expected = Series( + [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-03 10:00")], index=[0, 2] + ) + tm.assert_series_equal(result, expected) + + # DatetimeBlockTZ + idx = pd.DatetimeIndex( + ["2011-01-01 10:00", pd.NaT, "2011-01-03 10:00", pd.NaT], tz="Asia/Tokyo" + ) + s = pd.Series(idx) + assert s.dtype == "datetime64[ns, Asia/Tokyo]" + result = s.dropna() + expected = Series( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-03 10:00", tz="Asia/Tokyo"), + ], + index=[0, 2], + ) + assert result.dtype == "datetime64[ns, Asia/Tokyo]" + tm.assert_series_equal(result, expected) + + def test_dropna_no_nan(self): + for s in [Series([1, 2, 3], name="x"), Series([False, True, False], name="x")]: + + result = s.dropna() + tm.assert_series_equal(result, s) + assert result is not s + + s2 = s.copy() + s2.dropna(inplace=True) + tm.assert_series_equal(s2, s) + + def test_dropna_intervals(self): + s = Series( + [np.nan, 1, 2, 3], + IntervalIndex.from_arrays([np.nan, 0, 1, 2], [np.nan, 1, 2, 3]), + ) + + result = s.dropna() + expected = s.iloc[1:] + tm.assert_series_equal(result, expected) + + def test_valid(self, datetime_series): + ts = datetime_series.copy() + ts[::2] = np.NaN + + result = ts.dropna() + assert len(result) == ts.count() + tm.assert_series_equal(result, ts[1::2]) + tm.assert_series_equal(result, ts[pd.notna(ts)]) + + def test_isna(self): + ser = Series([0, 5.4, 3, np.nan, -0.001]) + expected = Series([False, False, False, True, False]) + tm.assert_series_equal(ser.isna(), expected) + + ser = Series(["hi", "", np.nan]) + expected = Series([False, False, True]) + tm.assert_series_equal(ser.isna(), expected) + + def test_notna(self): + ser = Series([0, 5.4, 3, np.nan, -0.001]) + expected = Series([True, True, True, False, True]) + tm.assert_series_equal(ser.notna(), expected) + + ser = Series(["hi", "", np.nan]) + expected = Series([True, True, False]) + tm.assert_series_equal(ser.notna(), expected) + + def test_pad_nan(self): + x = Series( + [np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"], dtype=float + ) + + x.fillna(method="pad", inplace=True) + + expected = Series( + [np.nan, 1.0, 1.0, 3.0, 3.0], ["z", "a", "b", "c", "d"], dtype=float + ) + tm.assert_series_equal(x[1:], expected[1:]) + assert np.isnan(x[0]), np.isnan(expected[0]) + + def test_pad_require_monotonicity(self): + rng = date_range("1/1/2000", "3/1/2000", freq="B") + + # neither monotonic increasing or decreasing + rng2 = rng[[1, 0, 2]] + + msg = "index must be monotonic increasing or decreasing" + with pytest.raises(ValueError, match=msg): + rng2.get_indexer(rng, method="pad") + + def test_dropna_preserve_name(self, datetime_series): + datetime_series[:5] = np.nan + result = datetime_series.dropna() + assert result.name == datetime_series.name + name = datetime_series.name + ts = datetime_series.copy() + ts.dropna(inplace=True) + assert ts.name == name + + def test_fill_value_when_combine_const(self): + # GH12723 + s = Series([0, 1, np.nan, 3, 4, 5]) + + exp = s.fillna(0).add(2) + res = s.add(2, fill_value=0) + tm.assert_series_equal(res, exp) + + def test_series_fillna_limit(self): + index = np.arange(10) + s = Series(np.random.randn(10), index=index) + + result = s[:2].reindex(index) + result = result.fillna(method="pad", limit=5) + + expected = s[:2].reindex(index).fillna(method="pad") + expected[-3:] = np.nan + tm.assert_series_equal(result, expected) + + result = s[-2:].reindex(index) + result = result.fillna(method="bfill", limit=5) + + expected = s[-2:].reindex(index).fillna(method="backfill") + expected[:3] = np.nan + tm.assert_series_equal(result, expected) + + def test_series_pad_backfill_limit(self): + index = np.arange(10) + s = Series(np.random.randn(10), index=index) + + result = s[:2].reindex(index, method="pad", limit=5) + + expected = s[:2].reindex(index).fillna(method="pad") + expected[-3:] = np.nan + tm.assert_series_equal(result, expected) + + result = s[-2:].reindex(index, method="backfill", limit=5) + + expected = s[-2:].reindex(index).fillna(method="backfill") + expected[:3] = np.nan + tm.assert_series_equal(result, expected) + + +@pytest.fixture( + params=[ + "linear", + "index", + "values", + "nearest", + "slinear", + "zero", + "quadratic", + "cubic", + "barycentric", + "krogh", + "polynomial", + "spline", + "piecewise_polynomial", + "from_derivatives", + "pchip", + "akima", + ] +) +def nontemporal_method(request): + """ Fixture that returns an (method name, required kwargs) pair. + + This fixture does not include method 'time' as a parameterization; that + method requires a Series with a DatetimeIndex, and is generally tested + separately from these non-temporal methods. + """ + method = request.param + kwargs = dict(order=1) if method in ("spline", "polynomial") else dict() + return method, kwargs + + +@pytest.fixture( + params=[ + "linear", + "slinear", + "zero", + "quadratic", + "cubic", + "barycentric", + "krogh", + "polynomial", + "spline", + "piecewise_polynomial", + "from_derivatives", + "pchip", + "akima", + ] +) +def interp_methods_ind(request): + """ Fixture that returns a (method name, required kwargs) pair to + be tested for various Index types. + + This fixture does not include methods - 'time', 'index', 'nearest', + 'values' as a parameterization + """ + method = request.param + kwargs = dict(order=1) if method in ("spline", "polynomial") else dict() + return method, kwargs + + +class TestSeriesInterpolateData: + def test_interpolate(self, datetime_series, string_series): + ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index) + + ts_copy = ts.copy() + ts_copy[5:10] = np.NaN + + linear_interp = ts_copy.interpolate(method="linear") + tm.assert_series_equal(linear_interp, ts) + + ord_ts = Series( + [d.toordinal() for d in datetime_series.index], index=datetime_series.index + ).astype(float) + + ord_ts_copy = ord_ts.copy() + ord_ts_copy[5:10] = np.NaN + + time_interp = ord_ts_copy.interpolate(method="time") + tm.assert_series_equal(time_interp, ord_ts) + + def test_interpolate_time_raises_for_non_timeseries(self): + # When method='time' is used on a non-TimeSeries that contains a null + # value, a ValueError should be raised. + non_ts = Series([0, 1, 2, np.NaN]) + msg = "time-weighted interpolation only works on Series.* with a DatetimeIndex" + with pytest.raises(ValueError, match=msg): + non_ts.interpolate(method="time") + + @td.skip_if_no_scipy + def test_interpolate_pchip(self): + + ser = Series(np.sort(np.random.uniform(size=100))) + + # interpolate at new_index + new_index = ser.index.union( + Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75]) + ).astype(float) + interp_s = ser.reindex(new_index).interpolate(method="pchip") + # does not blow up, GH5977 + interp_s[49:51] + + @td.skip_if_no_scipy + def test_interpolate_akima(self): + + ser = Series([10, 11, 12, 13]) + + expected = Series( + [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), + ) + # interpolate at new_index + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( + float + ) + interp_s = ser.reindex(new_index).interpolate(method="akima") + tm.assert_series_equal(interp_s[1:3], expected) + + @td.skip_if_no_scipy + def test_interpolate_piecewise_polynomial(self): + ser = Series([10, 11, 12, 13]) + + expected = Series( + [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), + ) + # interpolate at new_index + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( + float + ) + interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial") + tm.assert_series_equal(interp_s[1:3], expected) + + @td.skip_if_no_scipy + def test_interpolate_from_derivatives(self): + ser = Series([10, 11, 12, 13]) + + expected = Series( + [11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]), + ) + # interpolate at new_index + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( + float + ) + interp_s = ser.reindex(new_index).interpolate(method="from_derivatives") + tm.assert_series_equal(interp_s[1:3], expected) + + @pytest.mark.parametrize( + "kwargs", + [ + {}, + pytest.param( + {"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy + ), + ], + ) + def test_interpolate_corners(self, kwargs): + s = Series([np.nan, np.nan]) + tm.assert_series_equal(s.interpolate(**kwargs), s) + + s = Series([], dtype=object).interpolate() + tm.assert_series_equal(s.interpolate(**kwargs), s) + + def test_interpolate_index_values(self): + s = Series(np.nan, index=np.sort(np.random.rand(30))) + s[::3] = np.random.randn(10) + + vals = s.index.values.astype(float) + + result = s.interpolate(method="index") + + expected = s.copy() + bad = isna(expected.values) + good = ~bad + expected = Series( + np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad] + ) + + tm.assert_series_equal(result[bad], expected) + + # 'values' is synonymous with 'index' for the method kwarg + other_result = s.interpolate(method="values") + + tm.assert_series_equal(other_result, result) + tm.assert_series_equal(other_result[bad], expected) + + def test_interpolate_non_ts(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + msg = ( + "time-weighted interpolation only works on Series or DataFrames " + "with a DatetimeIndex" + ) + with pytest.raises(ValueError, match=msg): + s.interpolate(method="time") + + @pytest.mark.parametrize( + "kwargs", + [ + {}, + pytest.param( + {"method": "polynomial", "order": 1}, marks=td.skip_if_no_scipy + ), + ], + ) + def test_nan_interpolate(self, kwargs): + s = Series([0, 1, np.nan, 3]) + result = s.interpolate(**kwargs) + expected = Series([0.0, 1.0, 2.0, 3.0]) + tm.assert_series_equal(result, expected) + + def test_nan_irregular_index(self): + s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9]) + result = s.interpolate() + expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9]) + tm.assert_series_equal(result, expected) + + def test_nan_str_index(self): + s = Series([0, 1, 2, np.nan], index=list("abcd")) + result = s.interpolate() + expected = Series([0.0, 1.0, 2.0, 2.0], index=list("abcd")) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_interp_quad(self): + sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4]) + result = sq.interpolate(method="quadratic") + expected = Series([1.0, 4.0, 9.0, 16.0], index=[1, 2, 3, 4]) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_interp_scipy_basic(self): + s = Series([1, 3, np.nan, 12, np.nan, 25]) + # slinear + expected = Series([1.0, 3.0, 7.5, 12.0, 18.5, 25.0]) + result = s.interpolate(method="slinear") + tm.assert_series_equal(result, expected) + + result = s.interpolate(method="slinear", downcast="infer") + tm.assert_series_equal(result, expected) + # nearest + expected = Series([1, 3, 3, 12, 12, 25]) + result = s.interpolate(method="nearest") + tm.assert_series_equal(result, expected.astype("float")) + + result = s.interpolate(method="nearest", downcast="infer") + tm.assert_series_equal(result, expected) + # zero + expected = Series([1, 3, 3, 12, 12, 25]) + result = s.interpolate(method="zero") + tm.assert_series_equal(result, expected.astype("float")) + + result = s.interpolate(method="zero", downcast="infer") + tm.assert_series_equal(result, expected) + # quadratic + # GH #15662. + expected = Series([1, 3.0, 6.823529, 12.0, 18.058824, 25.0]) + result = s.interpolate(method="quadratic") + tm.assert_series_equal(result, expected) + + result = s.interpolate(method="quadratic", downcast="infer") + tm.assert_series_equal(result, expected) + # cubic + expected = Series([1.0, 3.0, 6.8, 12.0, 18.2, 25.0]) + result = s.interpolate(method="cubic") + tm.assert_series_equal(result, expected) + + def test_interp_limit(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0]) + result = s.interpolate(method="linear", limit=2) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("limit", [-1, 0]) + def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method, limit): + # GH 9217: make sure limit is greater than zero. + s = pd.Series([1, 2, np.nan, 4]) + method, kwargs = nontemporal_method + with pytest.raises(ValueError, match="Limit must be greater than 0"): + s.interpolate(limit=limit, method=method, **kwargs) + + def test_interpolate_invalid_float_limit(self, nontemporal_method): + # GH 9217: make sure limit is an integer. + s = pd.Series([1, 2, np.nan, 4]) + method, kwargs = nontemporal_method + limit = 2.0 + with pytest.raises(ValueError, match="Limit must be an integer"): + s.interpolate(limit=limit, method=method, **kwargs) + + @pytest.mark.parametrize("invalid_method", [None, "nonexistent_method"]) + def test_interp_invalid_method(self, invalid_method): + s = Series([1, 3, np.nan, 12, np.nan, 25]) + + msg = f"method must be one of.* Got '{invalid_method}' instead" + with pytest.raises(ValueError, match=msg): + s.interpolate(method=invalid_method) + + # When an invalid method and invalid limit (such as -1) are + # provided, the error message reflects the invalid method. + with pytest.raises(ValueError, match=msg): + s.interpolate(method=invalid_method, limit=-1) + + def test_interp_limit_forward(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + # Provide 'forward' (the default) explicitly here. + expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0]) + + result = s.interpolate(method="linear", limit=2, limit_direction="forward") + tm.assert_series_equal(result, expected) + + result = s.interpolate(method="linear", limit=2, limit_direction="FORWARD") + tm.assert_series_equal(result, expected) + + def test_interp_unlimited(self): + # these test are for issue #16282 default Limit=None is unlimited + s = Series([np.nan, 1.0, 3.0, np.nan, np.nan, np.nan, 11.0, np.nan]) + expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0]) + result = s.interpolate(method="linear", limit_direction="both") + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0]) + result = s.interpolate(method="linear", limit_direction="forward") + tm.assert_series_equal(result, expected) + + expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, np.nan]) + result = s.interpolate(method="linear", limit_direction="backward") + tm.assert_series_equal(result, expected) + + def test_interp_limit_bad_direction(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + msg = ( + r"Invalid limit_direction: expecting one of \['forward'," + r" 'backward', 'both'\], got 'abc'" + ) + with pytest.raises(ValueError, match=msg): + s.interpolate(method="linear", limit=2, limit_direction="abc") + + # raises an error even if no limit is specified. + with pytest.raises(ValueError, match=msg): + s.interpolate(method="linear", limit_direction="abc") + + # limit_area introduced GH #16284 + def test_interp_limit_area(self): + # These tests are for issue #9218 -- fill NaNs in both directions. + s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan]) + + expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan]) + result = s.interpolate(method="linear", limit_area="inside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan] + ) + result = s.interpolate(method="linear", limit_area="inside", limit=1) + + expected = Series([np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan]) + result = s.interpolate( + method="linear", limit_area="inside", limit_direction="both", limit=1 + ) + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0]) + result = s.interpolate(method="linear", limit_area="outside") + tm.assert_series_equal(result, expected) + + expected = Series( + [np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan] + ) + result = s.interpolate(method="linear", limit_area="outside", limit=1) + + expected = Series([np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]) + result = s.interpolate( + method="linear", limit_area="outside", limit_direction="both", limit=1 + ) + tm.assert_series_equal(result, expected) + + expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan]) + result = s.interpolate( + method="linear", limit_area="outside", direction="backward" + ) + + # raises an error even if limit type is wrong. + msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc" + with pytest.raises(ValueError, match=msg): + s.interpolate(method="linear", limit_area="abc") + + def test_interp_limit_direction(self): + # These tests are for issue #9218 -- fill NaNs in both directions. + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + expected = Series([1.0, 3.0, np.nan, 7.0, 9.0, 11.0]) + result = s.interpolate(method="linear", limit=2, limit_direction="backward") + tm.assert_series_equal(result, expected) + + expected = Series([1.0, 3.0, 5.0, np.nan, 9.0, 11.0]) + result = s.interpolate(method="linear", limit=1, limit_direction="both") + tm.assert_series_equal(result, expected) + + # Check that this works on a longer series of nans. + s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan]) + + expected = Series([1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0]) + result = s.interpolate(method="linear", limit=2, limit_direction="both") + tm.assert_series_equal(result, expected) + + expected = Series( + [1.0, 3.0, 4.0, np.nan, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0] + ) + result = s.interpolate(method="linear", limit=1, limit_direction="both") + tm.assert_series_equal(result, expected) + + def test_interp_limit_to_ends(self): + # These test are for issue #10420 -- flow back to beginning. + s = Series([np.nan, np.nan, 5, 7, 9, np.nan]) + + expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, np.nan]) + result = s.interpolate(method="linear", limit=2, limit_direction="backward") + tm.assert_series_equal(result, expected) + + expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, 9.0]) + result = s.interpolate(method="linear", limit=2, limit_direction="both") + tm.assert_series_equal(result, expected) + + def test_interp_limit_before_ends(self): + # These test are for issue #11115 -- limit ends properly. + s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan]) + + expected = Series([np.nan, np.nan, 5.0, 7.0, 7.0, np.nan]) + result = s.interpolate(method="linear", limit=1, limit_direction="forward") + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, 5.0, 5.0, 7.0, np.nan, np.nan]) + result = s.interpolate(method="linear", limit=1, limit_direction="backward") + tm.assert_series_equal(result, expected) + + expected = Series([np.nan, 5.0, 5.0, 7.0, 7.0, np.nan]) + result = s.interpolate(method="linear", limit=1, limit_direction="both") + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_interp_all_good(self): + s = Series([1, 2, 3]) + result = s.interpolate(method="polynomial", order=1) + tm.assert_series_equal(result, s) + + # non-scipy + result = s.interpolate() + tm.assert_series_equal(result, s) + + @pytest.mark.parametrize( + "check_scipy", [False, pytest.param(True, marks=td.skip_if_no_scipy)] + ) + def test_interp_multiIndex(self, check_scipy): + idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")]) + s = Series([1, 2, np.nan], index=idx) + + expected = s.copy() + expected.loc[2] = 2 + result = s.interpolate() + tm.assert_series_equal(result, expected) + + msg = "Only `method=linear` interpolation is supported on MultiIndexes" + if check_scipy: + with pytest.raises(ValueError, match=msg): + s.interpolate(method="polynomial", order=1) + + @td.skip_if_no_scipy + def test_interp_nonmono_raise(self): + s = Series([1, np.nan, 3], index=[0, 2, 1]) + msg = "krogh interpolation requires that the index be monotonic" + with pytest.raises(ValueError, match=msg): + s.interpolate(method="krogh") + + @td.skip_if_no_scipy + @pytest.mark.parametrize("method", ["nearest", "pad"]) + def test_interp_datetime64(self, method, tz_naive_fixture): + df = Series( + [1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture) + ) + result = df.interpolate(method=method) + expected = Series( + [1.0, 1.0, 3.0], + index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture), + ) + tm.assert_series_equal(result, expected) + + def test_interp_pad_datetime64tz_values(self): + # GH#27628 missing.interpolate_2d should handle datetimetz values + dti = pd.date_range("2015-04-05", periods=3, tz="US/Central") + ser = pd.Series(dti) + ser[1] = pd.NaT + result = ser.interpolate(method="pad") + + expected = pd.Series(dti) + expected[1] = expected[0] + tm.assert_series_equal(result, expected) + + def test_interp_limit_no_nans(self): + # GH 7173 + s = pd.Series([1.0, 2.0, 3.0]) + result = s.interpolate(limit=1) + expected = s + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("method", ["polynomial", "spline"]) + def test_no_order(self, method): + # see GH-10633, GH-24014 + s = Series([0, 1, np.nan, 3]) + msg = "You must specify the order of the spline or polynomial" + with pytest.raises(ValueError, match=msg): + s.interpolate(method=method) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("order", [-1, -1.0, 0, 0.0, np.nan]) + def test_interpolate_spline_invalid_order(self, order): + s = Series([0, 1, np.nan, 3]) + msg = "order needs to be specified and greater than 0" + with pytest.raises(ValueError, match=msg): + s.interpolate(method="spline", order=order) + + @td.skip_if_no_scipy + def test_spline(self): + s = Series([1, 2, np.nan, 4, 5, np.nan, 7]) + result = s.interpolate(method="spline", order=1) + expected = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) + tm.assert_series_equal(result, expected) + + @td.skip_if_no_scipy + def test_spline_extrapolate(self): + s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) + result3 = s.interpolate(method="spline", order=1, ext=3) + expected3 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0]) + tm.assert_series_equal(result3, expected3) + + result1 = s.interpolate(method="spline", order=1, ext=0) + expected1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) + tm.assert_series_equal(result1, expected1) + + @td.skip_if_no_scipy + def test_spline_smooth(self): + s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7]) + assert ( + s.interpolate(method="spline", order=3, s=0)[5] + != s.interpolate(method="spline", order=3)[5] + ) + + @td.skip_if_no_scipy + def test_spline_interpolation(self): + s = Series(np.arange(10) ** 2) + s[np.random.randint(0, 9, 3)] = np.nan + result1 = s.interpolate(method="spline", order=1) + expected1 = s.interpolate(method="spline", order=1) + tm.assert_series_equal(result1, expected1) + + def test_interp_timedelta64(self): + # GH 6424 + df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 3])) + result = df.interpolate(method="time") + expected = Series([1.0, 2.0, 3.0], index=pd.to_timedelta([1, 2, 3])) + tm.assert_series_equal(result, expected) + + # test for non uniform spacing + df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 4])) + result = df.interpolate(method="time") + expected = Series([1.0, 1.666667, 3.0], index=pd.to_timedelta([1, 2, 4])) + tm.assert_series_equal(result, expected) + + def test_series_interpolate_method_values(self): + # #1646 + ts = _simple_ts("1/1/2000", "1/20/2000") + ts[::2] = np.nan + + result = ts.interpolate(method="values") + exp = ts.interpolate() + tm.assert_series_equal(result, exp) + + def test_series_interpolate_intraday(self): + # #1698 + index = pd.date_range("1/1/2012", periods=4, freq="12D") + ts = pd.Series([0, 12, 24, 36], index) + new_index = index.append(index + pd.DateOffset(days=1)).sort_values() + + exp = ts.reindex(new_index).interpolate(method="time") + + index = pd.date_range("1/1/2012", periods=4, freq="12H") + ts = pd.Series([0, 12, 24, 36], index) + new_index = index.append(index + pd.DateOffset(hours=1)).sort_values() + result = ts.reindex(new_index).interpolate(method="time") + + tm.assert_numpy_array_equal(result.values, exp.values) + + @pytest.mark.parametrize( + "ind", + [ + ["a", "b", "c", "d"], + pd.period_range(start="2019-01-01", periods=4), + pd.interval_range(start=0, end=4), + ], + ) + def test_interp_non_timedelta_index(self, interp_methods_ind, ind): + # gh 21662 + df = pd.DataFrame([0, 1, np.nan, 3], index=ind) + + method, kwargs = interp_methods_ind + if method == "pchip": + pytest.importorskip("scipy") + + if method == "linear": + result = df[0].interpolate(**kwargs) + expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) + tm.assert_series_equal(result, expected) + else: + expected_error = ( + "Index column must be numeric or datetime type when " + f"using {method} method other than linear. " + "Try setting a numeric or datetime index column before " + "interpolating." + ) + with pytest.raises(ValueError, match=expected_error): + df[0].interpolate(method=method, **kwargs) + + def test_interpolate_timedelta_index(self, interp_methods_ind): + """ + Tests for non numerical index types - object, period, timedelta + Note that all methods except time, index, nearest and values + are tested here. + """ + # gh 21662 + ind = pd.timedelta_range(start=1, periods=4) + df = pd.DataFrame([0, 1, np.nan, 3], index=ind) + + method, kwargs = interp_methods_ind + if method == "pchip": + pytest.importorskip("scipy") + + if method in {"linear", "pchip"}: + result = df[0].interpolate(method=method, **kwargs) + expected = pd.Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind) + tm.assert_series_equal(result, expected) + else: + pytest.skip( + "This interpolation method is not supported for Timedelta Index yet." + ) + + @pytest.mark.parametrize( + "ascending, expected_values", + [(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])], + ) + def test_interpolate_unsorted_index(self, ascending, expected_values): + # GH 21037 + ts = pd.Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1]) + result = ts.sort_index(ascending=ascending).interpolate(method="index") + expected = pd.Series(data=expected_values, index=expected_values, dtype=float) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py new file mode 100644 index 00000000..bdd9f92d --- /dev/null +++ b/pandas/tests/series/test_operators.py @@ -0,0 +1,936 @@ +from datetime import datetime, timedelta +import operator + +import numpy as np +import pytest + +import pandas as pd +from pandas import Categorical, DataFrame, Index, Series, bdate_range, date_range, isna +import pandas._testing as tm +from pandas.core import ops +import pandas.core.nanops as nanops + + +class TestSeriesLogicalOps: + @pytest.mark.parametrize("bool_op", [operator.and_, operator.or_, operator.xor]) + def test_bool_operators_with_nas(self, bool_op): + # boolean &, |, ^ should work with object arrays and propagate NAs + ser = Series(bdate_range("1/1/2000", periods=10), dtype=object) + ser[::2] = np.nan + + mask = ser.isna() + filled = ser.fillna(ser[0]) + + result = bool_op(ser < ser[9], ser > ser[3]) + + expected = bool_op(filled < filled[9], filled > filled[3]) + expected[mask] = False + tm.assert_series_equal(result, expected) + + def test_logical_operators_bool_dtype_with_empty(self): + # GH#9016: support bitwise op for integer types + index = list("bca") + + s_tft = Series([True, False, True], index=index) + s_fff = Series([False, False, False], index=index) + s_empty = Series([], dtype=object) + + res = s_tft & s_empty + expected = s_fff + tm.assert_series_equal(res, expected) + + res = s_tft | s_empty + expected = s_tft + tm.assert_series_equal(res, expected) + + @pytest.mark.parametrize( + "left, right, op, expected", + [ + ( + [True, False, np.nan], + [True, False, True], + operator.and_, + [True, False, False], + ), + ( + [True, False, True], + [True, False, np.nan], + operator.and_, + [True, False, False], + ), + ( + [True, False, np.nan], + [True, False, True], + operator.or_, + [True, False, False], + ), + ( + [True, False, True], + [True, False, np.nan], + operator.or_, + [True, False, True], + ), + ], + ) + def test_logical_operators_nans(self, left, right, op, expected): + # GH 13896 + result = op(Series(left), Series(right)) + expected = Series(expected) + + tm.assert_series_equal(result, expected) + + def test_logical_operators_int_dtype_with_int_dtype(self): + # GH#9016: support bitwise op for integer types + + # TODO: unused + # s_0101 = Series([0, 1, 0, 1]) + + s_0123 = Series(range(4), dtype="int64") + s_3333 = Series([3] * 4) + s_4444 = Series([4] * 4) + + res = s_0123 & s_3333 + expected = Series(range(4), dtype="int64") + tm.assert_series_equal(res, expected) + + res = s_0123 | s_4444 + expected = Series(range(4, 8), dtype="int64") + tm.assert_series_equal(res, expected) + + s_1111 = Series([1] * 4, dtype="int8") + res = s_0123 & s_1111 + expected = Series([0, 1, 0, 1], dtype="int64") + tm.assert_series_equal(res, expected) + + res = s_0123.astype(np.int16) | s_1111.astype(np.int32) + expected = Series([1, 1, 3, 3], dtype="int32") + tm.assert_series_equal(res, expected) + + def test_logical_operators_int_dtype_with_int_scalar(self): + # GH#9016: support bitwise op for integer types + s_0123 = Series(range(4), dtype="int64") + + res = s_0123 & 0 + expected = Series([0] * 4) + tm.assert_series_equal(res, expected) + + res = s_0123 & 1 + expected = Series([0, 1, 0, 1]) + tm.assert_series_equal(res, expected) + + def test_logical_operators_int_dtype_with_float(self): + # GH#9016: support bitwise op for integer types + s_0123 = Series(range(4), dtype="int64") + + with pytest.raises(TypeError): + s_0123 & np.NaN + with pytest.raises(TypeError): + s_0123 & 3.14 + with pytest.raises(TypeError): + s_0123 & [0.1, 4, 3.14, 2] + with pytest.raises(TypeError): + s_0123 & np.array([0.1, 4, 3.14, 2]) + with pytest.raises(TypeError): + s_0123 & Series([0.1, 4, -3.14, 2]) + + def test_logical_operators_int_dtype_with_str(self): + s_1111 = Series([1] * 4, dtype="int8") + + with pytest.raises(TypeError): + s_1111 & "a" + with pytest.raises(TypeError): + s_1111 & ["a", "b", "c", "d"] + + def test_logical_operators_int_dtype_with_bool(self): + # GH#9016: support bitwise op for integer types + s_0123 = Series(range(4), dtype="int64") + + expected = Series([False] * 4) + + result = s_0123 & False + tm.assert_series_equal(result, expected) + + result = s_0123 & [False] + tm.assert_series_equal(result, expected) + + result = s_0123 & (False,) + tm.assert_series_equal(result, expected) + + result = s_0123 ^ False + expected = Series([False, True, True, True]) + tm.assert_series_equal(result, expected) + + def test_logical_operators_int_dtype_with_object(self): + # GH#9016: support bitwise op for integer types + s_0123 = Series(range(4), dtype="int64") + + result = s_0123 & Series([False, np.NaN, False, False]) + expected = Series([False] * 4) + tm.assert_series_equal(result, expected) + + s_abNd = Series(["a", "b", np.NaN, "d"]) + with pytest.raises(TypeError, match="unsupported.* 'int' and 'str'"): + s_0123 & s_abNd + + def test_logical_operators_bool_dtype_with_int(self): + index = list("bca") + + s_tft = Series([True, False, True], index=index) + s_fff = Series([False, False, False], index=index) + + res = s_tft & 0 + expected = s_fff + tm.assert_series_equal(res, expected) + + res = s_tft & 1 + expected = s_tft + tm.assert_series_equal(res, expected) + + def test_logical_ops_bool_dtype_with_ndarray(self): + # make sure we operate on ndarray the same as Series + left = pd.Series([True, True, True, False, True]) + right = [True, False, None, True, np.nan] + + expected = pd.Series([True, False, False, False, False]) + result = left & right + tm.assert_series_equal(result, expected) + result = left & np.array(right) + tm.assert_series_equal(result, expected) + result = left & pd.Index(right) + tm.assert_series_equal(result, expected) + result = left & pd.Series(right) + tm.assert_series_equal(result, expected) + + expected = pd.Series([True, True, True, True, True]) + result = left | right + tm.assert_series_equal(result, expected) + result = left | np.array(right) + tm.assert_series_equal(result, expected) + result = left | pd.Index(right) + tm.assert_series_equal(result, expected) + result = left | pd.Series(right) + tm.assert_series_equal(result, expected) + + expected = pd.Series([False, True, True, True, True]) + result = left ^ right + tm.assert_series_equal(result, expected) + result = left ^ np.array(right) + tm.assert_series_equal(result, expected) + result = left ^ pd.Index(right) + tm.assert_series_equal(result, expected) + result = left ^ pd.Series(right) + tm.assert_series_equal(result, expected) + + def test_logical_operators_int_dtype_with_bool_dtype_and_reindex(self): + # GH#9016: support bitwise op for integer types + + # with non-matching indexes, logical operators will cast to object + # before operating + index = list("bca") + + s_tft = Series([True, False, True], index=index) + s_tft = Series([True, False, True], index=index) + s_tff = Series([True, False, False], index=index) + + s_0123 = Series(range(4), dtype="int64") + + # s_0123 will be all false now because of reindexing like s_tft + expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"]) + result = s_tft & s_0123 + tm.assert_series_equal(result, expected) + + expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"]) + result = s_0123 & s_tft + tm.assert_series_equal(result, expected) + + s_a0b1c0 = Series([1], list("b")) + + res = s_tft & s_a0b1c0 + expected = s_tff.reindex(list("abc")) + tm.assert_series_equal(res, expected) + + res = s_tft | s_a0b1c0 + expected = s_tft.reindex(list("abc")) + tm.assert_series_equal(res, expected) + + def test_scalar_na_logical_ops_corners(self): + s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) + + with pytest.raises(TypeError): + s & datetime(2005, 1, 1) + + s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) + s[::2] = np.nan + + expected = Series(True, index=s.index) + expected[::2] = False + result = s & list(s) + tm.assert_series_equal(result, expected) + + d = DataFrame({"A": s}) + # TODO: Fix this exception - needs to be fixed! (see GH5035) + # (previously this was a TypeError because series returned + # NotImplemented + + # this is an alignment issue; these are equivalent + # https://github.com/pandas-dev/pandas/issues/5284 + + with pytest.raises(TypeError): + d.__and__(s, axis="columns") + with pytest.raises(TypeError): + d.__and__(s, axis=1) + + with pytest.raises(TypeError): + s & d + with pytest.raises(TypeError): + d & s + + expected = (s & s).to_frame("A") + result = d.__and__(s, axis="index") + tm.assert_frame_equal(result, expected) + + result = d.__and__(s, axis=0) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("op", [operator.and_, operator.or_, operator.xor]) + def test_logical_ops_with_index(self, op): + # GH#22092, GH#19792 + ser = Series([True, True, False, False]) + idx1 = Index([True, False, True, False]) + idx2 = Index([1, 0, 1, 0]) + + expected = Series([op(ser[n], idx1[n]) for n in range(len(ser))]) + + result = op(ser, idx1) + tm.assert_series_equal(result, expected) + + expected = Series([op(ser[n], idx2[n]) for n in range(len(ser))], dtype=bool) + + result = op(ser, idx2) + tm.assert_series_equal(result, expected) + + def test_reversed_xor_with_index_returns_index(self): + # GH#22092, GH#19792 + ser = Series([True, True, False, False]) + idx1 = Index([True, False, True, False]) + idx2 = Index([1, 0, 1, 0]) + + expected = Index.symmetric_difference(idx1, ser) + result = idx1 ^ ser + tm.assert_index_equal(result, expected) + + expected = Index.symmetric_difference(idx2, ser) + result = idx2 ^ ser + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "op", + [ + pytest.param( + ops.rand_, + marks=pytest.mark.xfail( + reason="GH#22092 Index __and__ returns Index intersection", + raises=AssertionError, + strict=True, + ), + ), + pytest.param( + ops.ror_, + marks=pytest.mark.xfail( + reason="GH#22092 Index __or__ returns Index union", + raises=AssertionError, + strict=True, + ), + ), + ], + ) + def test_reversed_logical_op_with_index_returns_series(self, op): + # GH#22092, GH#19792 + ser = Series([True, True, False, False]) + idx1 = Index([True, False, True, False]) + idx2 = Index([1, 0, 1, 0]) + + expected = pd.Series(op(idx1.values, ser.values)) + result = op(ser, idx1) + tm.assert_series_equal(result, expected) + + expected = pd.Series(op(idx2.values, ser.values)) + result = op(ser, idx2) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "op, expected", + [ + (ops.rand_, pd.Index([False, True])), + (ops.ror_, pd.Index([False, True])), + (ops.rxor, pd.Index([])), + ], + ) + def test_reverse_ops_with_index(self, op, expected): + # https://github.com/pandas-dev/pandas/pull/23628 + # multi-set Index ops are buggy, so let's avoid duplicates... + ser = Series([True, False]) + idx = Index([False, True]) + result = op(ser, idx) + tm.assert_index_equal(result, expected) + + def test_logical_ops_label_based(self): + # GH#4947 + # logical ops should be label based + + a = Series([True, False, True], list("bca")) + b = Series([False, True, False], list("abc")) + + expected = Series([False, True, False], list("abc")) + result = a & b + tm.assert_series_equal(result, expected) + + expected = Series([True, True, False], list("abc")) + result = a | b + tm.assert_series_equal(result, expected) + + expected = Series([True, False, False], list("abc")) + result = a ^ b + tm.assert_series_equal(result, expected) + + # rhs is bigger + a = Series([True, False, True], list("bca")) + b = Series([False, True, False, True], list("abcd")) + + expected = Series([False, True, False, False], list("abcd")) + result = a & b + tm.assert_series_equal(result, expected) + + expected = Series([True, True, False, False], list("abcd")) + result = a | b + tm.assert_series_equal(result, expected) + + # filling + + # vs empty + empty = Series([], dtype=object) + + result = a & empty.copy() + expected = Series([False, False, False], list("bca")) + tm.assert_series_equal(result, expected) + + result = a | empty.copy() + expected = Series([True, False, True], list("bca")) + tm.assert_series_equal(result, expected) + + # vs non-matching + result = a & Series([1], ["z"]) + expected = Series([False, False, False, False], list("abcz")) + tm.assert_series_equal(result, expected) + + result = a | Series([1], ["z"]) + expected = Series([True, True, False, False], list("abcz")) + tm.assert_series_equal(result, expected) + + # identity + # we would like s[s|e] == s to hold for any e, whether empty or not + for e in [ + empty.copy(), + Series([1], ["z"]), + Series(np.nan, b.index), + Series(np.nan, a.index), + ]: + result = a[a | e] + tm.assert_series_equal(result, a[a]) + + for e in [Series(["z"])]: + result = a[a | e] + tm.assert_series_equal(result, a[a]) + + # vs scalars + index = list("bca") + t = Series([True, False, True]) + + for v in [True, 1, 2]: + result = Series([True, False, True], index=index) | v + expected = Series([True, True, True], index=index) + tm.assert_series_equal(result, expected) + + for v in [np.nan, "foo"]: + with pytest.raises(TypeError): + t | v + + for v in [False, 0]: + result = Series([True, False, True], index=index) | v + expected = Series([True, False, True], index=index) + tm.assert_series_equal(result, expected) + + for v in [True, 1]: + result = Series([True, False, True], index=index) & v + expected = Series([True, False, True], index=index) + tm.assert_series_equal(result, expected) + + for v in [False, 0]: + result = Series([True, False, True], index=index) & v + expected = Series([False, False, False], index=index) + tm.assert_series_equal(result, expected) + for v in [np.nan]: + with pytest.raises(TypeError): + t & v + + def test_logical_ops_df_compat(self): + # GH#1134 + s1 = pd.Series([True, False, True], index=list("ABC"), name="x") + s2 = pd.Series([True, True, False], index=list("ABD"), name="x") + + exp = pd.Series([True, False, False, False], index=list("ABCD"), name="x") + tm.assert_series_equal(s1 & s2, exp) + tm.assert_series_equal(s2 & s1, exp) + + # True | np.nan => True + exp_or1 = pd.Series([True, True, True, False], index=list("ABCD"), name="x") + tm.assert_series_equal(s1 | s2, exp_or1) + # np.nan | True => np.nan, filled with False + exp_or = pd.Series([True, True, False, False], index=list("ABCD"), name="x") + tm.assert_series_equal(s2 | s1, exp_or) + + # DataFrame doesn't fill nan with False + tm.assert_frame_equal(s1.to_frame() & s2.to_frame(), exp.to_frame()) + tm.assert_frame_equal(s2.to_frame() & s1.to_frame(), exp.to_frame()) + + exp = pd.DataFrame({"x": [True, True, np.nan, np.nan]}, index=list("ABCD")) + tm.assert_frame_equal(s1.to_frame() | s2.to_frame(), exp_or1.to_frame()) + tm.assert_frame_equal(s2.to_frame() | s1.to_frame(), exp_or.to_frame()) + + # different length + s3 = pd.Series([True, False, True], index=list("ABC"), name="x") + s4 = pd.Series([True, True, True, True], index=list("ABCD"), name="x") + + exp = pd.Series([True, False, True, False], index=list("ABCD"), name="x") + tm.assert_series_equal(s3 & s4, exp) + tm.assert_series_equal(s4 & s3, exp) + + # np.nan | True => np.nan, filled with False + exp_or1 = pd.Series([True, True, True, False], index=list("ABCD"), name="x") + tm.assert_series_equal(s3 | s4, exp_or1) + # True | np.nan => True + exp_or = pd.Series([True, True, True, True], index=list("ABCD"), name="x") + tm.assert_series_equal(s4 | s3, exp_or) + + tm.assert_frame_equal(s3.to_frame() & s4.to_frame(), exp.to_frame()) + tm.assert_frame_equal(s4.to_frame() & s3.to_frame(), exp.to_frame()) + + tm.assert_frame_equal(s3.to_frame() | s4.to_frame(), exp_or1.to_frame()) + tm.assert_frame_equal(s4.to_frame() | s3.to_frame(), exp_or.to_frame()) + + +class TestSeriesComparisons: + def test_comparisons(self): + left = np.random.randn(10) + right = np.random.randn(10) + left[:3] = np.nan + + result = nanops.nangt(left, right) + with np.errstate(invalid="ignore"): + expected = (left > right).astype("O") + expected[:3] = np.nan + + tm.assert_almost_equal(result, expected) + + s = Series(["a", "b", "c"]) + s2 = Series([False, True, False]) + + # it works! + exp = Series([False, False, False]) + tm.assert_series_equal(s == s2, exp) + tm.assert_series_equal(s2 == s, exp) + + def test_categorical_comparisons(self): + # GH 8938 + # allow equality comparisons + a = Series(list("abc"), dtype="category") + b = Series(list("abc"), dtype="object") + c = Series(["a", "b", "cc"], dtype="object") + d = Series(list("acb"), dtype="object") + e = Categorical(list("abc")) + f = Categorical(list("acb")) + + # vs scalar + assert not (a == "a").all() + assert ((a != "a") == ~(a == "a")).all() + + assert not ("a" == a).all() + assert (a == "a")[0] + assert ("a" == a)[0] + assert not ("a" != a)[0] + + # vs list-like + assert (a == a).all() + assert not (a != a).all() + + assert (a == list(a)).all() + assert (a == b).all() + assert (b == a).all() + assert ((~(a == b)) == (a != b)).all() + assert ((~(b == a)) == (b != a)).all() + + assert not (a == c).all() + assert not (c == a).all() + assert not (a == d).all() + assert not (d == a).all() + + # vs a cat-like + assert (a == e).all() + assert (e == a).all() + assert not (a == f).all() + assert not (f == a).all() + + assert (~(a == e) == (a != e)).all() + assert (~(e == a) == (e != a)).all() + assert (~(a == f) == (a != f)).all() + assert (~(f == a) == (f != a)).all() + + # non-equality is not comparable + with pytest.raises(TypeError): + a < b + with pytest.raises(TypeError): + b < a + with pytest.raises(TypeError): + a > b + with pytest.raises(TypeError): + b > a + + def test_comparison_tuples(self): + # GH11339 + # comparisons vs tuple + s = Series([(1, 1), (1, 2)]) + + result = s == (1, 2) + expected = Series([False, True]) + tm.assert_series_equal(result, expected) + + result = s != (1, 2) + expected = Series([True, False]) + tm.assert_series_equal(result, expected) + + result = s == (0, 0) + expected = Series([False, False]) + tm.assert_series_equal(result, expected) + + result = s != (0, 0) + expected = Series([True, True]) + tm.assert_series_equal(result, expected) + + s = Series([(1, 1), (1, 1)]) + + result = s == (1, 1) + expected = Series([True, True]) + tm.assert_series_equal(result, expected) + + result = s != (1, 1) + expected = Series([False, False]) + tm.assert_series_equal(result, expected) + + s = Series([frozenset([1]), frozenset([1, 2])]) + + result = s == frozenset([1]) + expected = Series([True, False]) + tm.assert_series_equal(result, expected) + + def test_comparison_operators_with_nas(self): + ser = Series(bdate_range("1/1/2000", periods=10), dtype=object) + ser[::2] = np.nan + + # test that comparisons work + ops = ["lt", "le", "gt", "ge", "eq", "ne"] + for op in ops: + val = ser[5] + + f = getattr(operator, op) + result = f(ser, val) + + expected = f(ser.dropna(), val).reindex(ser.index) + + if op == "ne": + expected = expected.fillna(True).astype(bool) + else: + expected = expected.fillna(False).astype(bool) + + tm.assert_series_equal(result, expected) + + # FIXME: dont leave commented-out + # fffffffuuuuuuuuuuuu + # result = f(val, s) + # expected = f(val, s.dropna()).reindex(s.index) + # tm.assert_series_equal(result, expected) + + def test_unequal_categorical_comparison_raises_type_error(self): + # unequal comparison should raise for unordered cats + cat = Series(Categorical(list("abc"))) + with pytest.raises(TypeError): + cat > "b" + + cat = Series(Categorical(list("abc"), ordered=False)) + with pytest.raises(TypeError): + cat > "b" + + # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057 + # and following comparisons with scalars not in categories should raise + # for unequal comps, but not for equal/not equal + cat = Series(Categorical(list("abc"), ordered=True)) + + with pytest.raises(TypeError): + cat < "d" + with pytest.raises(TypeError): + cat > "d" + with pytest.raises(TypeError): + "d" < cat + with pytest.raises(TypeError): + "d" > cat + + tm.assert_series_equal(cat == "d", Series([False, False, False])) + tm.assert_series_equal(cat != "d", Series([True, True, True])) + + def test_ne(self): + ts = Series([3, 4, 5, 6, 7], [3, 4, 5, 6, 7], dtype=float) + expected = [True, True, False, True, True] + assert tm.equalContents(ts.index != 5, expected) + assert tm.equalContents(~(ts.index == 5), expected) + + def test_comp_ops_df_compat(self): + # GH 1134 + s1 = pd.Series([1, 2, 3], index=list("ABC"), name="x") + s2 = pd.Series([2, 2, 2], index=list("ABD"), name="x") + + s3 = pd.Series([1, 2, 3], index=list("ABC"), name="x") + s4 = pd.Series([2, 2, 2, 2], index=list("ABCD"), name="x") + + for left, right in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]: + + msg = "Can only compare identically-labeled Series objects" + with pytest.raises(ValueError, match=msg): + left == right + + with pytest.raises(ValueError, match=msg): + left != right + + with pytest.raises(ValueError, match=msg): + left < right + + msg = "Can only compare identically-labeled DataFrame objects" + with pytest.raises(ValueError, match=msg): + left.to_frame() == right.to_frame() + + with pytest.raises(ValueError, match=msg): + left.to_frame() != right.to_frame() + + with pytest.raises(ValueError, match=msg): + left.to_frame() < right.to_frame() + + def test_compare_series_interval_keyword(self): + # GH 25338 + s = Series(["IntervalA", "IntervalB", "IntervalC"]) + result = s == "IntervalA" + expected = Series([True, False, False]) + tm.assert_series_equal(result, expected) + + +class TestSeriesFlexComparisonOps: + def test_comparison_flex_alignment(self): + left = Series([1, 3, 2], index=list("abc")) + right = Series([2, 2, 2], index=list("bcd")) + + exp = pd.Series([False, False, True, False], index=list("abcd")) + tm.assert_series_equal(left.eq(right), exp) + + exp = pd.Series([True, True, False, True], index=list("abcd")) + tm.assert_series_equal(left.ne(right), exp) + + exp = pd.Series([False, False, True, False], index=list("abcd")) + tm.assert_series_equal(left.le(right), exp) + + exp = pd.Series([False, False, False, False], index=list("abcd")) + tm.assert_series_equal(left.lt(right), exp) + + exp = pd.Series([False, True, True, False], index=list("abcd")) + tm.assert_series_equal(left.ge(right), exp) + + exp = pd.Series([False, True, False, False], index=list("abcd")) + tm.assert_series_equal(left.gt(right), exp) + + def test_comparison_flex_alignment_fill(self): + left = Series([1, 3, 2], index=list("abc")) + right = Series([2, 2, 2], index=list("bcd")) + + exp = pd.Series([False, False, True, True], index=list("abcd")) + tm.assert_series_equal(left.eq(right, fill_value=2), exp) + + exp = pd.Series([True, True, False, False], index=list("abcd")) + tm.assert_series_equal(left.ne(right, fill_value=2), exp) + + exp = pd.Series([False, False, True, True], index=list("abcd")) + tm.assert_series_equal(left.le(right, fill_value=0), exp) + + exp = pd.Series([False, False, False, True], index=list("abcd")) + tm.assert_series_equal(left.lt(right, fill_value=0), exp) + + exp = pd.Series([True, True, True, False], index=list("abcd")) + tm.assert_series_equal(left.ge(right, fill_value=0), exp) + + exp = pd.Series([True, True, False, False], index=list("abcd")) + tm.assert_series_equal(left.gt(right, fill_value=0), exp) + + +class TestSeriesOperators: + def test_operators_empty_int_corner(self): + s1 = Series([], [], dtype=np.int32) + s2 = Series({"x": 0.0}) + tm.assert_series_equal(s1 * s2, Series([np.nan], index=["x"])) + + def test_ops_datetimelike_align(self): + # GH 7500 + # datetimelike ops need to align + dt = Series(date_range("2012-1-1", periods=3, freq="D")) + dt.iloc[2] = np.nan + dt2 = dt[::-1] + + expected = Series([timedelta(0), timedelta(0), pd.NaT]) + # name is reset + result = dt2 - dt + tm.assert_series_equal(result, expected) + + expected = Series(expected, name=0) + result = (dt2.to_frame() - dt.to_frame())[0] + tm.assert_series_equal(result, expected) + + def test_operators_corner(self, datetime_series): + empty = Series([], index=Index([]), dtype=np.float64) + + result = datetime_series + empty + assert np.isnan(result).all() + + result = empty + empty.copy() + assert len(result) == 0 + + # TODO: this returned NotImplemented earlier, what to do? + # deltas = Series([timedelta(1)] * 5, index=np.arange(5)) + # sub_deltas = deltas[::2] + # deltas5 = deltas * 5 + # deltas = deltas + sub_deltas + + # float + int + int_ts = datetime_series.astype(int)[:-5] + added = datetime_series + int_ts + expected = Series( + datetime_series.values[:-5] + int_ts.values, + index=datetime_series.index[:-5], + name="ts", + ) + tm.assert_series_equal(added[:-5], expected) + + pairings = [(Series.div, operator.truediv, 1), (Series.rdiv, ops.rtruediv, 1)] + for op in ["add", "sub", "mul", "pow", "truediv", "floordiv"]: + fv = 0 + lop = getattr(Series, op) + lequiv = getattr(operator, op) + rop = getattr(Series, "r" + op) + # bind op at definition time... + requiv = lambda x, y, op=op: getattr(operator, op)(y, x) + pairings.append((lop, lequiv, fv)) + pairings.append((rop, requiv, fv)) + + @pytest.mark.parametrize("op, equiv_op, fv", pairings) + def test_operators_combine(self, op, equiv_op, fv): + def _check_fill(meth, op, a, b, fill_value=0): + exp_index = a.index.union(b.index) + a = a.reindex(exp_index) + b = b.reindex(exp_index) + + amask = isna(a) + bmask = isna(b) + + exp_values = [] + for i in range(len(exp_index)): + with np.errstate(all="ignore"): + if amask[i]: + if bmask[i]: + exp_values.append(np.nan) + continue + exp_values.append(op(fill_value, b[i])) + elif bmask[i]: + if amask[i]: + exp_values.append(np.nan) + continue + exp_values.append(op(a[i], fill_value)) + else: + exp_values.append(op(a[i], b[i])) + + result = meth(a, b, fill_value=fill_value) + expected = Series(exp_values, exp_index) + tm.assert_series_equal(result, expected) + + a = Series([np.nan, 1.0, 2.0, 3.0, np.nan], index=np.arange(5)) + b = Series([np.nan, 1, np.nan, 3, np.nan, 4.0], index=np.arange(6)) + + result = op(a, b) + exp = equiv_op(a, b) + tm.assert_series_equal(result, exp) + _check_fill(op, equiv_op, a, b, fill_value=fv) + # should accept axis=0 or axis='rows' + op(a, b, axis=0) + + def test_operators_na_handling(self): + from decimal import Decimal + from datetime import date + + s = Series( + [Decimal("1.3"), Decimal("2.3")], index=[date(2012, 1, 1), date(2012, 1, 2)] + ) + + result = s + s.shift(1) + result2 = s.shift(1) + s + assert isna(result[0]) + assert isna(result2[0]) + + def test_op_duplicate_index(self): + # GH14227 + s1 = Series([1, 2], index=[1, 1]) + s2 = Series([10, 10], index=[1, 2]) + result = s1 + s2 + expected = pd.Series([11, 12, np.nan], index=[1, 1, 2]) + tm.assert_series_equal(result, expected) + + def test_divmod(self): + # GH25557 + a = Series([1, 1, 1, np.nan], index=["a", "b", "c", "d"]) + b = Series([2, np.nan, 1, np.nan], index=["a", "b", "d", "e"]) + + result = a.divmod(b) + expected = divmod(a, b) + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + result = a.rdivmod(b) + expected = divmod(b, a) + tm.assert_series_equal(result[0], expected[0]) + tm.assert_series_equal(result[1], expected[1]) + + @pytest.mark.parametrize("index", [None, range(9)]) + def test_series_integer_mod(self, index): + # see gh-24396 + s1 = Series(range(1, 10)) + s2 = Series("foo", index=index) + + msg = "not all arguments converted during string formatting" + + with pytest.raises(TypeError, match=msg): + s2 % s1 + + +class TestSeriesUnaryOps: + # __neg__, __pos__, __inv__ + + def test_neg(self): + ser = tm.makeStringSeries() + ser.name = "series" + tm.assert_series_equal(-ser, -1 * ser) + + def test_invert(self): + ser = tm.makeStringSeries() + ser.name = "series" + tm.assert_series_equal(-(ser < 0), ~(ser < 0)) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py new file mode 100644 index 00000000..03fee389 --- /dev/null +++ b/pandas/tests/series/test_period.py @@ -0,0 +1,170 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Period, Series, period_range +import pandas._testing as tm +from pandas.core.arrays import PeriodArray + + +class TestSeriesPeriod: + def setup_method(self, method): + self.series = Series(period_range("2000-01-01", periods=10, freq="D")) + + def test_auto_conversion(self): + series = Series(list(period_range("2000-01-01", periods=10, freq="D"))) + assert series.dtype == "Period[D]" + + series = pd.Series( + [pd.Period("2011-01-01", freq="D"), pd.Period("2011-02-01", freq="D")] + ) + assert series.dtype == "Period[D]" + + def test_getitem(self): + assert self.series[1] == pd.Period("2000-01-02", freq="D") + + result = self.series[[2, 4]] + exp = pd.Series( + [pd.Period("2000-01-03", freq="D"), pd.Period("2000-01-05", freq="D")], + index=[2, 4], + dtype="Period[D]", + ) + tm.assert_series_equal(result, exp) + assert result.dtype == "Period[D]" + + def test_isna(self): + # GH 13737 + s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")]) + tm.assert_series_equal(s.isna(), Series([False, True])) + tm.assert_series_equal(s.notna(), Series([True, False])) + + def test_fillna(self): + # GH 13737 + s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")]) + + res = s.fillna(pd.Period("2012-01", freq="M")) + exp = Series([pd.Period("2011-01", freq="M"), pd.Period("2012-01", freq="M")]) + tm.assert_series_equal(res, exp) + assert res.dtype == "Period[M]" + + def test_dropna(self): + # GH 13737 + s = Series([pd.Period("2011-01", freq="M"), pd.Period("NaT", freq="M")]) + tm.assert_series_equal(s.dropna(), Series([pd.Period("2011-01", freq="M")])) + + def test_between(self): + left, right = self.series[[2, 7]] + result = self.series.between(left, right) + expected = (self.series >= left) & (self.series <= right) + tm.assert_series_equal(result, expected) + + # --------------------------------------------------------------------- + # NaT support + + @pytest.mark.xfail(reason="PeriodDtype Series not supported yet") + def test_NaT_scalar(self): + series = Series([0, 1000, 2000, pd._libs.iNaT], dtype="period[D]") + + val = series[3] + assert pd.isna(val) + + series[2] = val + assert pd.isna(series[2]) + + def test_NaT_cast(self): + result = Series([np.nan]).astype("period[D]") + expected = Series([pd.NaT], dtype="period[D]") + tm.assert_series_equal(result, expected) + + def test_set_none(self): + self.series[3] = None + assert self.series[3] is pd.NaT + + self.series[3:5] = None + assert self.series[4] is pd.NaT + + def test_set_nan(self): + # Do we want to allow this? + self.series[5] = np.nan + assert self.series[5] is pd.NaT + + self.series[5:7] = np.nan + assert self.series[6] is pd.NaT + + def test_intercept_astype_object(self): + expected = self.series.astype("object") + + df = DataFrame({"a": self.series, "b": np.random.randn(len(self.series))}) + + result = df.values.squeeze() + assert (result[:, 0] == expected.values).all() + + df = DataFrame({"a": self.series, "b": ["foo"] * len(self.series)}) + + result = df.values.squeeze() + assert (result[:, 0] == expected.values).all() + + def test_align_series(self, join_type): + rng = period_range("1/1/2000", "1/1/2010", freq="A") + ts = Series(np.random.randn(len(rng)), index=rng) + + ts.align(ts[::2], join=join_type) + + def test_truncate(self): + # GH 17717 + idx1 = pd.PeriodIndex( + [pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] + ) + series1 = pd.Series([1, 2, 3], index=idx1) + result1 = series1.truncate(after="2017-09-02") + + expected_idx1 = pd.PeriodIndex( + [pd.Period("2017-09-02"), pd.Period("2017-09-02")] + ) + tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1)) + + idx2 = pd.PeriodIndex( + [pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")] + ) + series2 = pd.Series([1, 2, 3], index=idx2) + result2 = series2.sort_index().truncate(after="2017-09-02") + + expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")]) + tm.assert_series_equal(result2, pd.Series([2], index=expected_idx2)) + + @pytest.mark.parametrize( + "input_vals", + [ + [Period("2016-01", freq="M"), Period("2016-02", freq="M")], + [Period("2016-01-01", freq="D"), Period("2016-01-02", freq="D")], + [ + Period("2016-01-01 00:00:00", freq="H"), + Period("2016-01-01 01:00:00", freq="H"), + ], + [ + Period("2016-01-01 00:00:00", freq="M"), + Period("2016-01-01 00:01:00", freq="M"), + ], + [ + Period("2016-01-01 00:00:00", freq="S"), + Period("2016-01-01 00:00:01", freq="S"), + ], + ], + ) + def test_end_time_timevalues(self, input_vals): + # GH 17157 + # Check that the time part of the Period is adjusted by end_time + # when using the dt accessor on a Series + input_vals = PeriodArray._from_sequence(np.asarray(input_vals)) + + s = Series(input_vals) + result = s.dt.end_time + expected = s.apply(lambda x: x.end_time) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("input_vals", [("2001"), ("NaT")]) + def test_to_period(self, input_vals): + # GH 21205 + expected = Series([input_vals], dtype="Period[D]") + result = Series([input_vals], dtype="datetime64[ns]").dt.to_period("D") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py new file mode 100644 index 00000000..64a8c456 --- /dev/null +++ b/pandas/tests/series/test_repr.py @@ -0,0 +1,489 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + Categorical, + DataFrame, + Index, + MultiIndex, + Series, + date_range, + option_context, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +class TestSeriesRepr: + def test_multilevel_name_print(self): + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + s = Series(range(len(index)), index=index, name="sth") + expected = [ + "first second", + "foo one 0", + " two 1", + " three 2", + "bar one 3", + " two 4", + "baz two 5", + " three 6", + "qux one 7", + " two 8", + " three 9", + "Name: sth, dtype: int64", + ] + expected = "\n".join(expected) + assert repr(s) == expected + + def test_name_printing(self): + # Test small Series. + s = Series([0, 1, 2]) + + s.name = "test" + assert "Name: test" in repr(s) + + s.name = None + assert "Name:" not in repr(s) + + # Test big Series (diff code path). + s = Series(range(1000)) + + s.name = "test" + assert "Name: test" in repr(s) + + s.name = None + assert "Name:" not in repr(s) + + s = Series(index=date_range("20010101", "20020101"), name="test", dtype=object) + assert "Name: test" in repr(s) + + def test_repr(self, datetime_series, string_series, object_series): + str(datetime_series) + str(string_series) + str(string_series.astype(int)) + str(object_series) + + str(Series(tm.randn(1000), index=np.arange(1000))) + str(Series(tm.randn(1000), index=np.arange(1000, 0, step=-1))) + + # empty + str(Series(dtype=object)) + + # with NaNs + string_series[5:7] = np.NaN + str(string_series) + + # with Nones + ots = datetime_series.astype("O") + ots[::2] = None + repr(ots) + + # various names + for name in [ + "", + 1, + 1.2, + "foo", + "\u03B1\u03B2\u03B3", + "loooooooooooooooooooooooooooooooooooooooooooooooooooong", + ("foo", "bar", "baz"), + (1, 2), + ("foo", 1, 2.3), + ("\u03B1", "\u03B2", "\u03B3"), + ("\u03B1", "bar"), + ]: + string_series.name = name + repr(string_series) + + biggie = Series( + tm.randn(1000), index=np.arange(1000), name=("foo", "bar", "baz") + ) + repr(biggie) + + # 0 as name + ser = Series(np.random.randn(100), name=0) + rep_str = repr(ser) + assert "Name: 0" in rep_str + + # tidy repr + ser = Series(np.random.randn(1001), name=0) + rep_str = repr(ser) + assert "Name: 0" in rep_str + + ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"]) + assert "\t" not in repr(ser) + assert "\r" not in repr(ser) + assert "a\n" not in repr(ser) + + # with empty series (#4651) + s = Series([], dtype=np.int64, name="foo") + assert repr(s) == "Series([], Name: foo, dtype: int64)" + + s = Series([], dtype=np.int64, name=None) + assert repr(s) == "Series([], dtype: int64)" + + def test_tidy_repr(self): + a = Series(["\u05d0"] * 1000) + a.name = "title1" + repr(a) # should not raise exception + + def test_repr_bool_fails(self, capsys): + s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)]) + + # It works (with no Cython exception barf)! + repr(s) + + captured = capsys.readouterr() + assert captured.err == "" + + def test_repr_name_iterable_indexable(self): + s = Series([1, 2, 3], name=np.int64(3)) + + # it works! + repr(s) + + s.name = ("\u05d0",) * 2 + repr(s) + + def test_repr_should_return_str(self): + # https://docs.python.org/3/reference/datamodel.html#object.__repr__ + # ...The return value must be a string object. + + # (str on py2.x, str (unicode) on py3) + + data = [8, 5, 3, 5] + index1 = ["\u03c3", "\u03c4", "\u03c5", "\u03c6"] + df = Series(data, index=index1) + assert type(df.__repr__() == str) # both py2 / 3 + + def test_repr_max_rows(self): + # GH 6863 + with pd.option_context("max_rows", None): + str(Series(range(1001))) # should not raise exception + + def test_unicode_string_with_unicode(self): + df = Series(["\u05d0"], name="\u05d1") + str(df) + + def test_str_to_bytes_raises(self): + # GH 26447 + df = Series(["abc"], name="abc") + msg = "^'str' object cannot be interpreted as an integer$" + with pytest.raises(TypeError, match=msg): + bytes(df) + + def test_timeseries_repr_object_dtype(self): + index = Index( + [datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], dtype=object + ) + ts = Series(np.random.randn(len(index)), index) + repr(ts) + + ts = tm.makeTimeSeries(1000) + assert repr(ts).splitlines()[-1].startswith("Freq:") + + ts2 = ts.iloc[np.random.randint(0, len(ts) - 1, 400)] + repr(ts2).splitlines()[-1] + + def test_latex_repr(self): + result = r"""\begin{tabular}{ll} +\toprule +{} & 0 \\ +\midrule +0 & $\alpha$ \\ +1 & b \\ +2 & c \\ +\bottomrule +\end{tabular} +""" + with option_context("display.latex.escape", False, "display.latex.repr", True): + s = Series([r"$\alpha$", "b", "c"]) + assert result == s._repr_latex_() + + assert s._repr_latex_() is None + + def test_index_repr_in_frame_with_nan(self): + # see gh-25061 + i = Index([1, np.nan]) + s = Series([1, 2], index=i) + exp = """1.0 1\nNaN 2\ndtype: int64""" + + assert repr(s) == exp + + +class TestCategoricalRepr: + def test_categorical_repr_unicode(self): + # see gh-21002 + + class County: + name = "San Sebastián" + state = "PR" + + def __repr__(self) -> str: + return self.name + ", " + self.state + + cat = pd.Categorical([County() for _ in range(61)]) + idx = pd.Index(cat) + ser = idx.to_series() + + repr(ser) + str(ser) + + def test_categorical_repr(self): + a = Series(Categorical([1, 2, 3, 4])) + exp = ( + "0 1\n1 2\n2 3\n3 4\n" + + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]" + ) + + assert exp == a.__str__() + + a = Series(Categorical(["a", "b"] * 25)) + exp = ( + "0 a\n1 b\n" + + " ..\n" + + "48 a\n49 b\n" + + "Length: 50, dtype: category\nCategories (2, object): [a, b]" + ) + with option_context("display.max_rows", 5): + assert exp == repr(a) + + levs = list("abcdefghijklmnopqrstuvwxyz") + a = Series(Categorical(["a", "b"], categories=levs, ordered=True)) + exp = ( + "0 a\n1 b\n" + "dtype: category\n" + "Categories (26, object): [a < b < c < d ... w < x < y < z]" + ) + assert exp == a.__str__() + + def test_categorical_series_repr(self): + s = Series(Categorical([1, 2, 3])) + exp = """0 1 +1 2 +2 3 +dtype: category +Categories (3, int64): [1, 2, 3]""" + + assert repr(s) == exp + + s = Series(Categorical(np.arange(10))) + exp = """0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +dtype: category +Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]""" + + assert repr(s) == exp + + def test_categorical_series_repr_ordered(self): + s = Series(Categorical([1, 2, 3], ordered=True)) + exp = """0 1 +1 2 +2 3 +dtype: category +Categories (3, int64): [1 < 2 < 3]""" + + assert repr(s) == exp + + s = Series(Categorical(np.arange(10), ordered=True)) + exp = """0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +dtype: category +Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]""" + + assert repr(s) == exp + + def test_categorical_series_repr_datetime(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + s = Series(Categorical(idx)) + exp = """0 2011-01-01 09:00:00 +1 2011-01-01 10:00:00 +2 2011-01-01 11:00:00 +3 2011-01-01 12:00:00 +4 2011-01-01 13:00:00 +dtype: category +Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, + 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa + + assert repr(s) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + s = Series(Categorical(idx)) + exp = """0 2011-01-01 09:00:00-05:00 +1 2011-01-01 10:00:00-05:00 +2 2011-01-01 11:00:00-05:00 +3 2011-01-01 12:00:00-05:00 +4 2011-01-01 13:00:00-05:00 +dtype: category +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, + 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, + 2011-01-01 13:00:00-05:00]""" # noqa + + assert repr(s) == exp + + def test_categorical_series_repr_datetime_ordered(self): + idx = date_range("2011-01-01 09:00", freq="H", periods=5) + s = Series(Categorical(idx, ordered=True)) + exp = """0 2011-01-01 09:00:00 +1 2011-01-01 10:00:00 +2 2011-01-01 11:00:00 +3 2011-01-01 12:00:00 +4 2011-01-01 13:00:00 +dtype: category +Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa + + assert repr(s) == exp + + idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern") + s = Series(Categorical(idx, ordered=True)) + exp = """0 2011-01-01 09:00:00-05:00 +1 2011-01-01 10:00:00-05:00 +2 2011-01-01 11:00:00-05:00 +3 2011-01-01 12:00:00-05:00 +4 2011-01-01 13:00:00-05:00 +dtype: category +Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < + 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < + 2011-01-01 13:00:00-05:00]""" # noqa + + assert repr(s) == exp + + def test_categorical_series_repr_period(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + s = Series(Categorical(idx)) + exp = """0 2011-01-01 09:00 +1 2011-01-01 10:00 +2 2011-01-01 11:00 +3 2011-01-01 12:00 +4 2011-01-01 13:00 +dtype: category +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" # noqa + + assert repr(s) == exp + + idx = period_range("2011-01", freq="M", periods=5) + s = Series(Categorical(idx)) + exp = """0 2011-01 +1 2011-02 +2 2011-03 +3 2011-04 +4 2011-05 +dtype: category +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" + + assert repr(s) == exp + + def test_categorical_series_repr_period_ordered(self): + idx = period_range("2011-01-01 09:00", freq="H", periods=5) + s = Series(Categorical(idx, ordered=True)) + exp = """0 2011-01-01 09:00 +1 2011-01-01 10:00 +2 2011-01-01 11:00 +3 2011-01-01 12:00 +4 2011-01-01 13:00 +dtype: category +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" # noqa + + assert repr(s) == exp + + idx = period_range("2011-01", freq="M", periods=5) + s = Series(Categorical(idx, ordered=True)) + exp = """0 2011-01 +1 2011-02 +2 2011-03 +3 2011-04 +4 2011-05 +dtype: category +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" + + assert repr(s) == exp + + def test_categorical_series_repr_timedelta(self): + idx = timedelta_range("1 days", periods=5) + s = Series(Categorical(idx)) + exp = """0 1 days +1 2 days +2 3 days +3 4 days +4 5 days +dtype: category +Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" + + assert repr(s) == exp + + idx = timedelta_range("1 hours", periods=10) + s = Series(Categorical(idx)) + exp = """0 0 days 01:00:00 +1 1 days 01:00:00 +2 2 days 01:00:00 +3 3 days 01:00:00 +4 4 days 01:00:00 +5 5 days 01:00:00 +6 6 days 01:00:00 +7 7 days 01:00:00 +8 8 days 01:00:00 +9 9 days 01:00:00 +dtype: category +Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, + 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, + 8 days 01:00:00, 9 days 01:00:00]""" # noqa + + assert repr(s) == exp + + def test_categorical_series_repr_timedelta_ordered(self): + idx = timedelta_range("1 days", periods=5) + s = Series(Categorical(idx, ordered=True)) + exp = """0 1 days +1 2 days +2 3 days +3 4 days +4 5 days +dtype: category +Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa + + assert repr(s) == exp + + idx = timedelta_range("1 hours", periods=10) + s = Series(Categorical(idx, ordered=True)) + exp = """0 0 days 01:00:00 +1 1 days 01:00:00 +2 2 days 01:00:00 +3 3 days 01:00:00 +4 4 days 01:00:00 +5 5 days 01:00:00 +6 6 days 01:00:00 +7 7 days 01:00:00 +8 8 days 01:00:00 +9 9 days 01:00:00 +dtype: category +Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < + 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 < + 8 days 01:00:00 < 9 days 01:00:00]""" # noqa + + assert repr(s) == exp diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py new file mode 100644 index 00000000..73247bbf --- /dev/null +++ b/pandas/tests/series/test_subclass.py @@ -0,0 +1,37 @@ +import pandas._testing as tm + + +class TestSeriesSubclassing: + def test_indexing_sliced(self): + s = tm.SubclassedSeries([1, 2, 3, 4], index=list("abcd")) + res = s.loc[["a", "b"]] + exp = tm.SubclassedSeries([1, 2], index=list("ab")) + tm.assert_series_equal(res, exp) + + res = s.iloc[[2, 3]] + exp = tm.SubclassedSeries([3, 4], index=list("cd")) + tm.assert_series_equal(res, exp) + + res = s.loc[["a", "b"]] + exp = tm.SubclassedSeries([1, 2], index=list("ab")) + tm.assert_series_equal(res, exp) + + def test_to_frame(self): + s = tm.SubclassedSeries([1, 2, 3, 4], index=list("abcd"), name="xxx") + res = s.to_frame() + exp = tm.SubclassedDataFrame({"xxx": [1, 2, 3, 4]}, index=list("abcd")) + tm.assert_frame_equal(res, exp) + + def test_subclass_unstack(self): + # GH 15564 + s = tm.SubclassedSeries([1, 2, 3, 4], index=[list("aabb"), list("xyxy")]) + + res = s.unstack() + exp = tm.SubclassedDataFrame({"x": [1, 3], "y": [2, 4]}, index=["a", "b"]) + + tm.assert_frame_equal(res, exp) + + def test_subclass_empty_repr(self): + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + sub_series = tm.SubclassedSeries() + assert "SubclassedSeries" in repr(sub_series) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py new file mode 100644 index 00000000..459377fb --- /dev/null +++ b/pandas/tests/series/test_timeseries.py @@ -0,0 +1,767 @@ +from datetime import datetime, time, timedelta +from io import StringIO +from itertools import product + +import numpy as np +import pytest + +from pandas._libs.tslib import iNaT +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import ( + DataFrame, + DatetimeIndex, + NaT, + Series, + Timestamp, + concat, + date_range, + timedelta_range, + to_datetime, +) +import pandas._testing as tm + +from pandas.tseries.offsets import BDay, BMonthEnd + + +def _simple_ts(start, end, freq="D"): + rng = date_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) + + +def assert_range_equal(left, right): + assert left.equals(right) + assert left.freq == right.freq + assert left.tz == right.tz + + +class TestTimeSeries: + def test_asfreq(self): + ts = Series( + [0.0, 1.0, 2.0], + index=[ + datetime(2009, 10, 30), + datetime(2009, 11, 30), + datetime(2009, 12, 31), + ], + ) + + daily_ts = ts.asfreq("B") + monthly_ts = daily_ts.asfreq("BM") + tm.assert_series_equal(monthly_ts, ts) + + daily_ts = ts.asfreq("B", method="pad") + monthly_ts = daily_ts.asfreq("BM") + tm.assert_series_equal(monthly_ts, ts) + + daily_ts = ts.asfreq(BDay()) + monthly_ts = daily_ts.asfreq(BMonthEnd()) + tm.assert_series_equal(monthly_ts, ts) + + result = ts[:0].asfreq("M") + assert len(result) == 0 + assert result is not ts + + daily_ts = ts.asfreq("D", fill_value=-1) + result = daily_ts.value_counts().sort_index() + expected = Series([60, 1, 1, 1], index=[-1.0, 2.0, 1.0, 0.0]).sort_index() + tm.assert_series_equal(result, expected) + + def test_asfreq_datetimeindex_empty_series(self): + # GH 14320 + index = pd.DatetimeIndex(["2016-09-29 11:00"]) + expected = Series(index=index, dtype=object).asfreq("H") + result = Series([3], index=index.copy()).asfreq("H") + tm.assert_index_equal(expected.index, result.index) + + def test_autocorr(self, datetime_series): + # Just run the function + corr1 = datetime_series.autocorr() + + # Now run it with the lag parameter + corr2 = datetime_series.autocorr(lag=1) + + # corr() with lag needs Series of at least length 2 + if len(datetime_series) <= 2: + assert np.isnan(corr1) + assert np.isnan(corr2) + else: + assert corr1 == corr2 + + # Choose a random lag between 1 and length of Series - 2 + # and compare the result with the Series corr() function + n = 1 + np.random.randint(max(1, len(datetime_series) - 2)) + corr1 = datetime_series.corr(datetime_series.shift(n)) + corr2 = datetime_series.autocorr(lag=n) + + # corr() with lag needs Series of at least length 2 + if len(datetime_series) <= 2: + assert np.isnan(corr1) + assert np.isnan(corr2) + else: + assert corr1 == corr2 + + def test_first_last_valid(self, datetime_series): + ts = datetime_series.copy() + ts[:5] = np.NaN + + index = ts.first_valid_index() + assert index == ts.index[5] + + ts[-5:] = np.NaN + index = ts.last_valid_index() + assert index == ts.index[-6] + + ts[:] = np.nan + assert ts.last_valid_index() is None + assert ts.first_valid_index() is None + + ser = Series([], index=[], dtype=object) + assert ser.last_valid_index() is None + assert ser.first_valid_index() is None + + # GH12800 + empty = Series(dtype=object) + assert empty.last_valid_index() is None + assert empty.first_valid_index() is None + + # GH20499: its preserves freq with holes + ts.index = date_range("20110101", periods=len(ts), freq="B") + ts.iloc[1] = 1 + ts.iloc[-2] = 1 + assert ts.first_valid_index() == ts.index[1] + assert ts.last_valid_index() == ts.index[-2] + assert ts.first_valid_index().freq == ts.index.freq + assert ts.last_valid_index().freq == ts.index.freq + + def test_mpl_compat_hack(self, datetime_series): + + # This is currently failing because the test was relying on + # the DeprecationWarning coming through Index.__getitem__. + # We want to implement a warning specifically for Series.__getitem__ + # at which point this will become a Deprecation/FutureWarning + with tm.assert_produces_warning(None): + # GH#30588 multi-dimensional indexing deprecated + result = datetime_series[:, np.newaxis] + expected = datetime_series.values[:, np.newaxis] + tm.assert_almost_equal(result, expected) + + def test_timeseries_coercion(self): + idx = tm.makeDateIndex(10000) + ser = Series(np.random.randn(len(idx)), idx.astype(object)) + assert ser.index.is_all_dates + assert isinstance(ser.index, DatetimeIndex) + + def test_contiguous_boolean_preserve_freq(self): + rng = date_range("1/1/2000", "3/1/2000", freq="B") + + mask = np.zeros(len(rng), dtype=bool) + mask[10:20] = True + + masked = rng[mask] + expected = rng[10:20] + assert expected.freq is not None + assert_range_equal(masked, expected) + + mask[22] = True + masked = rng[mask] + assert masked.freq is None + + def test_to_datetime_unit(self): + + epoch = 1370745748 + s = Series([epoch + t for t in range(20)]) + result = to_datetime(s, unit="s") + expected = Series( + [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + ) + tm.assert_series_equal(result, expected) + + s = Series([epoch + t for t in range(20)]).astype(float) + result = to_datetime(s, unit="s") + expected = Series( + [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + ) + tm.assert_series_equal(result, expected) + + s = Series([epoch + t for t in range(20)] + [iNaT]) + result = to_datetime(s, unit="s") + expected = Series( + [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + + [NaT] + ) + tm.assert_series_equal(result, expected) + + s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float) + result = to_datetime(s, unit="s") + expected = Series( + [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + + [NaT] + ) + tm.assert_series_equal(result, expected) + + # GH13834 + s = Series([epoch + t for t in np.arange(0, 2, 0.25)] + [iNaT]).astype(float) + result = to_datetime(s, unit="s") + expected = Series( + [ + Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) + for t in np.arange(0, 2, 0.25) + ] + + [NaT] + ) + tm.assert_series_equal(result, expected) + + s = concat( + [Series([epoch + t for t in range(20)]).astype(float), Series([np.nan])], + ignore_index=True, + ) + result = to_datetime(s, unit="s") + expected = Series( + [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + + [NaT] + ) + tm.assert_series_equal(result, expected) + + result = to_datetime([1, 2, "NaT", pd.NaT, np.nan], unit="D") + expected = DatetimeIndex( + [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3 + ) + tm.assert_index_equal(result, expected) + + msg = "non convertible value foo with the unit 'D'" + with pytest.raises(ValueError, match=msg): + to_datetime([1, 2, "foo"], unit="D") + msg = "cannot convert input 111111111 with the unit 'D'" + with pytest.raises(OutOfBoundsDatetime, match=msg): + to_datetime([1, 2, 111111111], unit="D") + + # coerce we can process + expected = DatetimeIndex( + [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1 + ) + result = to_datetime([1, 2, "foo"], unit="D", errors="coerce") + tm.assert_index_equal(result, expected) + + result = to_datetime([1, 2, 111111111], unit="D", errors="coerce") + tm.assert_index_equal(result, expected) + + def test_series_ctor_datetime64(self): + rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") + dates = np.asarray(rng) + + series = Series(dates) + assert np.issubdtype(series.dtype, np.dtype("M8[ns]")) + + def test_series_repr_nat(self): + series = Series([0, 1000, 2000, iNaT], dtype="M8[ns]") + + result = repr(series) + expected = ( + "0 1970-01-01 00:00:00.000000\n" + "1 1970-01-01 00:00:00.000001\n" + "2 1970-01-01 00:00:00.000002\n" + "3 NaT\n" + "dtype: datetime64[ns]" + ) + assert result == expected + + def test_asfreq_keep_index_name(self): + # GH #9854 + index_name = "bar" + index = pd.date_range("20130101", periods=20, name=index_name) + df = pd.DataFrame(list(range(20)), columns=["foo"], index=index) + + assert index_name == df.index.name + assert index_name == df.asfreq("10D").index.name + + def test_promote_datetime_date(self): + rng = date_range("1/1/2000", periods=20) + ts = Series(np.random.randn(20), index=rng) + + ts_slice = ts[5:] + ts2 = ts_slice.copy() + ts2.index = [x.date() for x in ts2.index] + + result = ts + ts2 + result2 = ts2 + ts + expected = ts + ts[5:] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + # test asfreq + result = ts2.asfreq("4H", method="ffill") + expected = ts[5:].asfreq("4H", method="ffill") + tm.assert_series_equal(result, expected) + + result = rng.get_indexer(ts2.index) + expected = rng.get_indexer(ts_slice.index) + tm.assert_numpy_array_equal(result, expected) + + def test_asfreq_normalize(self): + rng = date_range("1/1/2000 09:30", periods=20) + norm = date_range("1/1/2000", periods=20) + vals = np.random.randn(20) + ts = Series(vals, index=rng) + + result = ts.asfreq("D", normalize=True) + norm = date_range("1/1/2000", periods=20) + expected = Series(vals, index=norm) + + tm.assert_series_equal(result, expected) + + vals = np.random.randn(20, 3) + ts = DataFrame(vals, index=rng) + + result = ts.asfreq("D", normalize=True) + expected = DataFrame(vals, index=norm) + + tm.assert_frame_equal(result, expected) + + def test_first_subset(self): + ts = _simple_ts("1/1/2000", "1/1/2010", freq="12h") + result = ts.first("10d") + assert len(result) == 20 + + ts = _simple_ts("1/1/2000", "1/1/2010") + result = ts.first("10d") + assert len(result) == 10 + + result = ts.first("3M") + expected = ts[:"3/31/2000"] + tm.assert_series_equal(result, expected) + + result = ts.first("21D") + expected = ts[:21] + tm.assert_series_equal(result, expected) + + result = ts[:0].first("3M") + tm.assert_series_equal(result, ts[:0]) + + def test_first_raises(self): + # GH20725 + ser = pd.Series("a b c".split()) + msg = "'first' only supports a DatetimeIndex index" + with pytest.raises(TypeError, match=msg): + ser.first("1D") + + def test_last_subset(self): + ts = _simple_ts("1/1/2000", "1/1/2010", freq="12h") + result = ts.last("10d") + assert len(result) == 20 + + ts = _simple_ts("1/1/2000", "1/1/2010") + result = ts.last("10d") + assert len(result) == 10 + + result = ts.last("21D") + expected = ts["12/12/2009":] + tm.assert_series_equal(result, expected) + + result = ts.last("21D") + expected = ts[-21:] + tm.assert_series_equal(result, expected) + + result = ts[:0].last("3M") + tm.assert_series_equal(result, ts[:0]) + + def test_last_raises(self): + # GH20725 + ser = pd.Series("a b c".split()) + msg = "'last' only supports a DatetimeIndex index" + with pytest.raises(TypeError, match=msg): + ser.last("1D") + + def test_format_pre_1900_dates(self): + rng = date_range("1/1/1850", "1/1/1950", freq="A-DEC") + rng.format() + ts = Series(1, index=rng) + repr(ts) + + def test_at_time(self): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = Series(np.random.randn(len(rng)), index=rng) + rs = ts.at_time(rng[1]) + assert (rs.index.hour == rng[1].hour).all() + assert (rs.index.minute == rng[1].minute).all() + assert (rs.index.second == rng[1].second).all() + + result = ts.at_time("9:30") + expected = ts.at_time(time(9, 30)) + tm.assert_series_equal(result, expected) + + df = DataFrame(np.random.randn(len(rng), 3), index=rng) + + result = ts[time(9, 30)] + result_df = df.loc[time(9, 30)] + expected = ts[(rng.hour == 9) & (rng.minute == 30)] + exp_df = df[(rng.hour == 9) & (rng.minute == 30)] + + # FIXME: dont leave commented-out + # expected.index = date_range('1/1/2000', '1/4/2000') + + tm.assert_series_equal(result, expected) + tm.assert_frame_equal(result_df, exp_df) + + chunk = df.loc["1/4/2000":] + result = chunk.loc[time(9, 30)] + expected = result_df[-1:] + tm.assert_frame_equal(result, expected) + + # midnight, everything + rng = date_range("1/1/2000", "1/31/2000") + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.at_time(time(0, 0)) + tm.assert_series_equal(result, ts) + + # time doesn't exist + rng = date_range("1/1/2012", freq="23Min", periods=384) + ts = Series(np.random.randn(len(rng)), rng) + rs = ts.at_time("16:00") + assert len(rs) == 0 + + def test_at_time_raises(self): + # GH20725 + ser = pd.Series("a b c".split()) + msg = "Index must be DatetimeIndex" + with pytest.raises(TypeError, match=msg): + ser.at_time("00:00") + + def test_between(self): + series = Series(date_range("1/1/2000", periods=10)) + left, right = series[[2, 7]] + + result = series.between(left, right) + expected = (series >= left) & (series <= right) + tm.assert_series_equal(result, expected) + + def test_between_time(self): + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = Series(np.random.randn(len(rng)), index=rng) + stime = time(0, 0) + etime = time(1, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = 13 * 4 + 1 + if not inc_start: + exp_len -= 5 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert t >= stime + else: + assert t > stime + + if inc_end: + assert t <= etime + else: + assert t < etime + + result = ts.between_time("00:00", "01:00") + expected = ts.between_time(stime, etime) + tm.assert_series_equal(result, expected) + + # across midnight + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = Series(np.random.randn(len(rng)), index=rng) + stime = time(22, 0) + etime = time(9, 0) + + close_open = product([True, False], [True, False]) + for inc_start, inc_end in close_open: + filtered = ts.between_time(stime, etime, inc_start, inc_end) + exp_len = (12 * 11 + 1) * 4 + 1 + if not inc_start: + exp_len -= 4 + if not inc_end: + exp_len -= 4 + + assert len(filtered) == exp_len + for rs in filtered.index: + t = rs.time() + if inc_start: + assert (t >= stime) or (t <= etime) + else: + assert (t > stime) or (t <= etime) + + if inc_end: + assert (t <= etime) or (t >= stime) + else: + assert (t < etime) or (t >= stime) + + def test_between_time_raises(self): + # GH20725 + ser = pd.Series("a b c".split()) + msg = "Index must be DatetimeIndex" + with pytest.raises(TypeError, match=msg): + ser.between_time(start_time="00:00", end_time="12:00") + + def test_between_time_types(self): + # GH11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" + with pytest.raises(ValueError, match=msg): + rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + frame = DataFrame({"A": 0}, index=rng) + with pytest.raises(ValueError, match=msg): + frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + series = Series(0, index=rng) + with pytest.raises(ValueError, match=msg): + series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) + + @td.skip_if_has_locale + def test_between_time_formats(self): + # GH11818 + rng = date_range("1/1/2000", "1/5/2000", freq="5min") + ts = DataFrame(np.random.randn(len(rng), 2), index=rng) + + strings = [ + ("2:00", "2:30"), + ("0200", "0230"), + ("2:00am", "2:30am"), + ("0200am", "0230am"), + ("2:00:00", "2:30:00"), + ("020000", "023000"), + ("2:00:00am", "2:30:00am"), + ("020000am", "023000am"), + ] + expected_length = 28 + + for time_string in strings: + assert len(ts.between_time(*time_string)) == expected_length + + def test_between_time_axis(self): + # issue 8839 + rng = date_range("1/1/2000", periods=100, freq="10min") + ts = Series(np.random.randn(len(rng)), index=rng) + stime, etime = ("08:00:00", "09:00:00") + expected_length = 7 + + assert len(ts.between_time(stime, etime)) == expected_length + assert len(ts.between_time(stime, etime, axis=0)) == expected_length + msg = "No axis named 1 for object type " + with pytest.raises(ValueError, match=msg): + ts.between_time(stime, etime, axis=1) + + def test_to_period(self): + from pandas.core.indexes.period import period_range + + ts = _simple_ts("1/1/2000", "1/1/2001") + + pts = ts.to_period() + exp = ts.copy() + exp.index = period_range("1/1/2000", "1/1/2001") + tm.assert_series_equal(pts, exp) + + pts = ts.to_period("M") + exp.index = exp.index.asfreq("M") + tm.assert_index_equal(pts.index, exp.index.asfreq("M")) + tm.assert_series_equal(pts, exp) + + # GH 7606 without freq + idx = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"]) + exp_idx = pd.PeriodIndex( + ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], freq="D" + ) + + s = Series(np.random.randn(4), index=idx) + expected = s.copy() + expected.index = exp_idx + tm.assert_series_equal(s.to_period(), expected) + + df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) + expected = df.copy() + expected.index = exp_idx + tm.assert_frame_equal(df.to_period(), expected) + + expected = df.copy() + expected.columns = exp_idx + tm.assert_frame_equal(df.to_period(axis=1), expected) + + def test_groupby_count_dateparseerror(self): + dr = date_range(start="1/1/2012", freq="5min", periods=10) + + # BAD Example, datetimes first + s = Series(np.arange(10), index=[dr, np.arange(10)]) + grouped = s.groupby(lambda x: x[1] % 2 == 0) + result = grouped.count() + + s = Series(np.arange(10), index=[np.arange(10), dr]) + grouped = s.groupby(lambda x: x[0] % 2 == 0) + expected = grouped.count() + + tm.assert_series_equal(result, expected) + + def test_to_csv_numpy_16_bug(self): + frame = DataFrame({"a": date_range("1/1/2000", periods=10)}) + + buf = StringIO() + frame.to_csv(buf) + + result = buf.getvalue() + assert "2000-01-01" in result + + def test_series_map_box_timedelta(self): + # GH 11349 + s = Series(timedelta_range("1 day 1 s", periods=5, freq="h")) + + def f(x): + return x.total_seconds() + + s.map(f) + s.apply(f) + DataFrame(s).applymap(f) + + def test_asfreq_resample_set_correct_freq(self): + # GH5613 + # we test if .asfreq() and .resample() set the correct value for .freq + df = pd.DataFrame( + {"date": ["2012-01-01", "2012-01-02", "2012-01-03"], "col": [1, 2, 3]} + ) + df = df.set_index(pd.to_datetime(df.date)) + + # testing the settings before calling .asfreq() and .resample() + assert df.index.freq is None + assert df.index.inferred_freq == "D" + + # does .asfreq() set .freq correctly? + assert df.asfreq("D").index.freq == "D" + + # does .resample() set .freq correctly? + assert df.resample("D").asfreq().index.freq == "D" + + def test_pickle(self): + + # GH4606 + p = tm.round_trip_pickle(NaT) + assert p is NaT + + idx = pd.to_datetime(["2013-01-01", NaT, "2014-01-06"]) + idx_p = tm.round_trip_pickle(idx) + assert idx_p[0] == idx[0] + assert idx_p[1] is NaT + assert idx_p[2] == idx[2] + + # GH11002 + # don't infer freq + idx = date_range("1750-1-1", "2050-1-1", freq="7D") + idx_p = tm.round_trip_pickle(idx) + tm.assert_index_equal(idx, idx_p) + + @pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"]) + def test_setops_preserve_freq(self, tz): + rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz) + + result = rng[:50].union(rng[50:100]) + assert result.name == rng.name + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].union(rng[30:100]) + assert result.name == rng.name + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].union(rng[60:100]) + assert result.name == rng.name + assert result.freq is None + assert result.tz == rng.tz + + result = rng[:50].intersection(rng[25:75]) + assert result.name == rng.name + assert result.freqstr == "D" + assert result.tz == rng.tz + + nofreq = DatetimeIndex(list(rng[25:75]), name="other") + result = rng[:50].union(nofreq) + assert result.name is None + assert result.freq == rng.freq + assert result.tz == rng.tz + + result = rng[:50].intersection(nofreq) + assert result.name is None + assert result.freq == rng.freq + assert result.tz == rng.tz + + def test_from_M8_structured(self): + dates = [(datetime(2012, 9, 9, 0, 0), datetime(2012, 9, 8, 15, 10))] + arr = np.array(dates, dtype=[("Date", "M8[us]"), ("Forecasting", "M8[us]")]) + df = DataFrame(arr) + + assert df["Date"][0] == dates[0][0] + assert df["Forecasting"][0] == dates[0][1] + + s = Series(arr["Date"]) + assert isinstance(s[0], Timestamp) + assert s[0] == dates[0][0] + + def test_get_level_values_box(self): + from pandas import MultiIndex + + dates = date_range("1/1/2000", periods=4) + levels = [dates, [0, 1]] + codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]] + + index = MultiIndex(levels=levels, codes=codes) + + assert isinstance(index.get_level_values(0)[0], Timestamp) + + def test_view_tz(self): + # GH#24024 + ser = pd.Series(pd.date_range("2000", periods=4, tz="US/Central")) + result = ser.view("i8") + expected = pd.Series( + [ + 946706400000000000, + 946792800000000000, + 946879200000000000, + 946965600000000000, + ] + ) + tm.assert_series_equal(result, expected) + + def test_asarray_tz_naive(self): + # This shouldn't produce a warning. + ser = pd.Series(pd.date_range("2000", periods=2)) + expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]") + result = np.asarray(ser) + + tm.assert_numpy_array_equal(result, expected) + + # optionally, object + result = np.asarray(ser, dtype=object) + + expected = np.array([pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-02")]) + tm.assert_numpy_array_equal(result, expected) + + def test_asarray_tz_aware(self): + tz = "US/Central" + ser = pd.Series(pd.date_range("2000", periods=2, tz=tz)) + expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]") + result = np.asarray(ser, dtype="datetime64[ns]") + + tm.assert_numpy_array_equal(result, expected) + + # Old behavior with no warning + result = np.asarray(ser, dtype="M8[ns]") + + tm.assert_numpy_array_equal(result, expected) + + # Future behavior with no warning + expected = np.array( + [pd.Timestamp("2000-01-01", tz=tz), pd.Timestamp("2000-01-02", tz=tz)] + ) + result = np.asarray(ser, dtype=object) + + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py new file mode 100644 index 00000000..a363f927 --- /dev/null +++ b/pandas/tests/series/test_timezones.py @@ -0,0 +1,366 @@ +""" +Tests for Series timezone-related methods +""" +from datetime import datetime + +from dateutil.tz import tzoffset +import numpy as np +import pytest +import pytz + +from pandas._libs.tslibs import conversion, timezones + +from pandas import DatetimeIndex, Index, NaT, Series, Timestamp +import pandas._testing as tm +from pandas.core.indexes.datetimes import date_range + + +class TestSeriesTimezones: + # ----------------------------------------------------------------- + # Series.tz_localize + def test_series_tz_localize(self): + + rng = date_range("1/1/2011", periods=100, freq="H") + ts = Series(1, index=rng) + + result = ts.tz_localize("utc") + assert result.index.tz.zone == "UTC" + + # Can't localize if already tz-aware + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + ts = Series(1, index=rng) + + with pytest.raises(TypeError, match="Already tz-aware"): + ts.tz_localize("US/Eastern") + + def test_series_tz_localize_ambiguous_bool(self): + # make sure that we are correctly accepting bool values as ambiguous + + # GH#14402 + ts = Timestamp("2015-11-01 01:00:03") + expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central") + expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central") + + ser = Series([ts]) + expected0 = Series([expected0]) + expected1 = Series([expected1]) + + with pytest.raises(pytz.AmbiguousTimeError): + ser.dt.tz_localize("US/Central") + + result = ser.dt.tz_localize("US/Central", ambiguous=True) + tm.assert_series_equal(result, expected0) + + result = ser.dt.tz_localize("US/Central", ambiguous=[True]) + tm.assert_series_equal(result, expected0) + + result = ser.dt.tz_localize("US/Central", ambiguous=False) + tm.assert_series_equal(result, expected1) + + result = ser.dt.tz_localize("US/Central", ambiguous=[False]) + tm.assert_series_equal(result, expected1) + + @pytest.mark.parametrize("tz", ["Europe/Warsaw", "dateutil/Europe/Warsaw"]) + @pytest.mark.parametrize( + "method, exp", + [ + ["shift_forward", "2015-03-29 03:00:00"], + ["NaT", NaT], + ["raise", None], + ["foo", "invalid"], + ], + ) + def test_series_tz_localize_nonexistent(self, tz, method, exp): + # GH 8917 + n = 60 + dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") + s = Series(1, dti) + if method == "raise": + with pytest.raises(pytz.NonExistentTimeError): + s.tz_localize(tz, nonexistent=method) + elif exp == "invalid": + with pytest.raises(ValueError): + dti.tz_localize(tz, nonexistent=method) + else: + result = s.tz_localize(tz, nonexistent=method) + expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz)) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_series_tz_localize_empty(self, tzstr): + # GH#2248 + ser = Series(dtype=object) + + ser2 = ser.tz_localize("utc") + assert ser2.index.tz == pytz.utc + + ser2 = ser.tz_localize(tzstr) + timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr)) + + # ----------------------------------------------------------------- + # Series.tz_convert + + def test_series_tz_convert(self): + rng = date_range("1/1/2011", periods=200, freq="D", tz="US/Eastern") + ts = Series(1, index=rng) + + result = ts.tz_convert("Europe/Berlin") + assert result.index.tz.zone == "Europe/Berlin" + + # can't convert tz-naive + rng = date_range("1/1/2011", periods=200, freq="D") + ts = Series(1, index=rng) + + with pytest.raises(TypeError, match="Cannot convert tz-naive"): + ts.tz_convert("US/Eastern") + + def test_series_tz_convert_to_utc(self): + base = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC") + idx1 = base.tz_convert("Asia/Tokyo")[:2] + idx2 = base.tz_convert("US/Eastern")[1:] + + res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) + tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base)) + + # ----------------------------------------------------------------- + # Series.append + + def test_series_append_aware(self): + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex( + ["2011-01-01 01:00", "2011-01-01 02:00"], tz="US/Eastern" + ) + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + assert ts_result.index.tz == rng1.tz + + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="UTC") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="UTC") + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex(["2011-01-01 01:00", "2011-01-01 02:00"], tz="UTC") + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + utc = rng1.tz + assert utc == ts_result.index.tz + + # GH#7795 + # different tz coerces to object dtype, not UTC + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H", tz="US/Eastern") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Central") + ser1 = Series([1], index=rng1) + ser2 = Series([2], index=rng2) + ts_result = ser1.append(ser2) + exp_index = Index( + [ + Timestamp("1/1/2011 01:00", tz="US/Eastern"), + Timestamp("1/1/2011 02:00", tz="US/Central"), + ] + ) + exp = Series([1, 2], index=exp_index) + tm.assert_series_equal(ts_result, exp) + + def test_series_append_aware_naive(self): + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") + rng2 = date_range("1/1/2011 02:00", periods=1, freq="H", tz="US/Eastern") + ser1 = Series(np.random.randn(len(rng1)), index=rng1) + ser2 = Series(np.random.randn(len(rng2)), index=rng2) + ts_result = ser1.append(ser2) + + expected = ser1.index.astype(object).append(ser2.index.astype(object)) + assert ts_result.index.equals(expected) + + # mixed + rng1 = date_range("1/1/2011 01:00", periods=1, freq="H") + rng2 = range(100) + ser1 = Series(np.random.randn(len(rng1)), index=rng1) + ser2 = Series(np.random.randn(len(rng2)), index=rng2) + ts_result = ser1.append(ser2) + + expected = ser1.index.astype(object).append(ser2.index) + assert ts_result.index.equals(expected) + + def test_series_append_dst(self): + rng1 = date_range("1/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") + rng2 = date_range("8/1/2016 01:00", periods=3, freq="H", tz="US/Eastern") + ser1 = Series([1, 2, 3], index=rng1) + ser2 = Series([10, 11, 12], index=rng2) + ts_result = ser1.append(ser2) + + exp_index = DatetimeIndex( + [ + "2016-01-01 01:00", + "2016-01-01 02:00", + "2016-01-01 03:00", + "2016-08-01 01:00", + "2016-08-01 02:00", + "2016-08-01 03:00", + ], + tz="US/Eastern", + ) + exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) + tm.assert_series_equal(ts_result, exp) + assert ts_result.index.tz == rng1.tz + + # ----------------------------------------------------------------- + + def test_dateutil_tzoffset_support(self): + values = [188.5, 328.25] + tzinfo = tzoffset(None, 7200) + index = [ + datetime(2012, 5, 11, 11, tzinfo=tzinfo), + datetime(2012, 5, 11, 12, tzinfo=tzinfo), + ] + series = Series(data=values, index=index) + + assert series.index.tz == tzinfo + + # it works! #2443 + repr(series.index[0]) + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_tz_aware_asfreq(self, tz): + dr = date_range("2011-12-01", "2012-07-20", freq="D", tz=tz) + + ser = Series(np.random.randn(len(dr)), index=dr) + + # it works! + ser.asfreq("T") + + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_string_index_alias_tz_aware(self, tz): + rng = date_range("1/1/2000", periods=10, tz=tz) + ser = Series(np.random.randn(len(rng)), index=rng) + + result = ser["1/3/2000"] + tm.assert_almost_equal(result, ser[2]) + + # TODO: De-duplicate with test below + def test_series_add_tz_mismatch_converts_to_utc_duplicate(self): + rng = date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern") + ser = Series(np.random.randn(len(rng)), index=rng) + + ts_moscow = ser.tz_convert("Europe/Moscow") + + result = ser + ts_moscow + assert result.index.tz is pytz.utc + + result = ts_moscow + ser + assert result.index.tz is pytz.utc + + def test_series_add_tz_mismatch_converts_to_utc(self): + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + + perm = np.random.permutation(100)[:90] + ser1 = Series( + np.random.randn(90), index=rng.take(perm).tz_convert("US/Eastern") + ) + + perm = np.random.permutation(100)[:90] + ser2 = Series( + np.random.randn(90), index=rng.take(perm).tz_convert("Europe/Berlin") + ) + + result = ser1 + ser2 + + uts1 = ser1.tz_convert("utc") + uts2 = ser2.tz_convert("utc") + expected = uts1 + uts2 + + assert result.index.tz == pytz.UTC + tm.assert_series_equal(result, expected) + + def test_series_add_aware_naive_raises(self): + rng = date_range("1/1/2011", periods=10, freq="H") + ser = Series(np.random.randn(len(rng)), index=rng) + + ser_utc = ser.tz_localize("utc") + + with pytest.raises(Exception): + ser + ser_utc + + with pytest.raises(Exception): + ser_utc + ser + + def test_series_align_aware(self): + idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern") + ser = Series(np.random.randn(len(idx1)), index=idx1) + ser_central = ser.tz_convert("US/Central") + # # different timezones convert to UTC + + new1, new2 = ser.align(ser_central) + assert new1.index.tz == pytz.UTC + assert new2.index.tz == pytz.UTC + + @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) + def test_localized_at_time_between_time(self, tzstr): + from datetime import time + + tz = timezones.maybe_get_tz(tzstr) + + rng = date_range("4/16/2012", "5/1/2012", freq="H") + ts = Series(np.random.randn(len(rng)), index=rng) + + ts_local = ts.tz_localize(tzstr) + + result = ts_local.at_time(time(10, 0)) + expected = ts.at_time(time(10, 0)).tz_localize(tzstr) + tm.assert_series_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + t1, t2 = time(10, 0), time(11, 0) + result = ts_local.between_time(t1, t2) + expected = ts.between_time(t1, t2).tz_localize(tzstr) + tm.assert_series_equal(result, expected) + assert timezones.tz_compare(result.index.tz, tz) + + @pytest.mark.parametrize("tzstr", ["Europe/Berlin", "dateutil/Europe/Berlin"]) + def test_getitem_pydatetime_tz(self, tzstr): + tz = timezones.maybe_get_tz(tzstr) + + index = date_range( + start="2012-12-24 16:00", end="2012-12-24 18:00", freq="H", tz=tzstr + ) + ts = Series(index=index, data=index.hour) + time_pandas = Timestamp("2012-12-24 17:00", tz=tzstr) + + dt = datetime(2012, 12, 24, 17, 0) + time_datetime = conversion.localize_pydatetime(dt, tz) + assert ts[time_pandas] == ts[time_datetime] + + def test_series_truncate_datetimeindex_tz(self): + # GH 9243 + idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific") + s = Series(range(len(idx)), index=idx) + result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4)) + expected = Series([1, 2, 3], index=idx[1:4]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("copy", [True, False]) + @pytest.mark.parametrize( + "method, tz", [["tz_localize", None], ["tz_convert", "Europe/Berlin"]] + ) + def test_tz_localize_convert_copy_inplace_mutate(self, copy, method, tz): + # GH 6326 + result = Series( + np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz=tz) + ) + getattr(result, method)("UTC", copy=copy) + expected = Series( + np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz=tz) + ) + tm.assert_series_equal(result, expected) + + def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): + # GH 25843 + tz = tz_aware_fixture + result = Series([Timestamp("2019", tz=tz)], dtype="datetime64[ns]") + expected = Series([Timestamp("2019")]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py new file mode 100644 index 00000000..ece7f1f2 --- /dev/null +++ b/pandas/tests/series/test_ufunc.py @@ -0,0 +1,304 @@ +from collections import deque +import string + +import numpy as np +import pytest + +import pandas as pd +import pandas._testing as tm +from pandas.arrays import SparseArray + +UNARY_UFUNCS = [np.positive, np.floor, np.exp] +BINARY_UFUNCS = [np.add, np.logaddexp] # dunder op +SPARSE = [True, False] +SPARSE_IDS = ["sparse", "dense"] +SHUFFLE = [True, False] + + +@pytest.fixture +def arrays_for_binary_ufunc(): + """ + A pair of random, length-100 integer-dtype arrays, that are mostly 0. + """ + a1 = np.random.randint(0, 10, 100, dtype="int64") + a2 = np.random.randint(0, 10, 100, dtype="int64") + a1[::3] = 0 + a2[::4] = 0 + return a1, a2 + + +@pytest.mark.parametrize("ufunc", UNARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +def test_unary_ufunc(ufunc, sparse): + # Test that ufunc(Series) == Series(ufunc) + array = np.random.randint(0, 10, 10, dtype="int64") + array[::2] = 0 + if sparse: + array = SparseArray(array, dtype=pd.SparseDtype("int64", 0)) + + index = list(string.ascii_letters[:10]) + name = "name" + series = pd.Series(array, index=index, name=name) + + result = ufunc(series) + expected = pd.Series(ufunc(array), index=index, name=name) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"]) +def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc): + # Test that ufunc(Series(a), array) == Series(ufunc(a, b)) + a1, a2 = arrays_for_binary_ufunc + if sparse: + a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) + a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) + + name = "name" # op(Series, array) preserves the name. + series = pd.Series(a1, name=name) + other = a2 + + array_args = (a1, a2) + series_args = (series, other) # ufunc(series, array) + + if flip: + array_args = reversed(array_args) + series_args = reversed(series_args) # ufunc(array, series) + + expected = pd.Series(ufunc(*array_args), name=name) + result = ufunc(*series_args) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"]) +def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc): + # Test that + # * func(Series(a), Series(b)) == Series(ufunc(a, b)) + # * ufunc(Index, Series) dispatches to Series (returns a Series) + a1, a2 = arrays_for_binary_ufunc + if sparse: + a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) + a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) + + name = "name" # op(Series, array) preserves the name. + series = pd.Series(a1, name=name) + other = pd.Index(a2, name=name).astype("int64") + + array_args = (a1, a2) + series_args = (series, other) # ufunc(series, array) + + if flip: + array_args = reversed(array_args) + series_args = reversed(series_args) # ufunc(array, series) + + expected = pd.Series(ufunc(*array_args), name=name) + result = ufunc(*series_args) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("shuffle", [True, False], ids=["unaligned", "aligned"]) +@pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"]) +def test_binary_ufunc_with_series( + flip, shuffle, sparse, ufunc, arrays_for_binary_ufunc +): + # Test that + # * func(Series(a), Series(b)) == Series(ufunc(a, b)) + # with alignment between the indices + a1, a2 = arrays_for_binary_ufunc + if sparse: + a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) + a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) + + name = "name" # op(Series, array) preserves the name. + series = pd.Series(a1, name=name) + other = pd.Series(a2, name=name) + + idx = np.random.permutation(len(a1)) + + if shuffle: + other = other.take(idx) + if flip: + index = other.align(series)[0].index + else: + index = series.align(other)[0].index + else: + index = series.index + + array_args = (a1, a2) + series_args = (series, other) # ufunc(series, array) + + if flip: + array_args = tuple(reversed(array_args)) + series_args = tuple(reversed(series_args)) # ufunc(array, series) + + expected = pd.Series(ufunc(*array_args), index=index, name=name) + result = ufunc(*series_args) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("flip", [True, False]) +def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc): + # Test that + # * ufunc(Series, scalar) == Series(ufunc(array, scalar)) + # * ufunc(Series, scalar) == ufunc(scalar, Series) + array, _ = arrays_for_binary_ufunc + if sparse: + array = SparseArray(array) + other = 2 + series = pd.Series(array, name="name") + + series_args = (series, other) + array_args = (array, other) + + if flip: + series_args = tuple(reversed(series_args)) + array_args = tuple(reversed(array_args)) + + expected = pd.Series(ufunc(*array_args), name="name") + result = ufunc(*series_args) + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("ufunc", [np.divmod]) # any others? +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("shuffle", SHUFFLE) +@pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning") +def test_multiple_ouput_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc): + # Test that + # the same conditions from binary_ufunc_scalar apply to + # ufuncs with multiple outputs. + if sparse and ufunc is np.divmod: + pytest.skip("sparse divmod not implemented.") + + a1, a2 = arrays_for_binary_ufunc + # work around https://github.com/pandas-dev/pandas/issues/26987 + a1[a1 == 0] = 1 + a2[a2 == 0] = 1 + + if sparse: + a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0)) + a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0)) + + s1 = pd.Series(a1) + s2 = pd.Series(a2) + + if shuffle: + # ensure we align before applying the ufunc + s2 = s2.sample(frac=1) + + expected = ufunc(a1, a2) + assert isinstance(expected, tuple) + + result = ufunc(s1, s2) + assert isinstance(result, tuple) + tm.assert_series_equal(result[0], pd.Series(expected[0])) + tm.assert_series_equal(result[1], pd.Series(expected[1])) + + +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +def test_multiple_ouput_ufunc(sparse, arrays_for_binary_ufunc): + # Test that the same conditions from unary input apply to multi-output + # ufuncs + array, _ = arrays_for_binary_ufunc + + if sparse: + array = SparseArray(array) + + series = pd.Series(array, name="name") + result = np.modf(series) + expected = np.modf(array) + + assert isinstance(result, tuple) + assert isinstance(expected, tuple) + + tm.assert_series_equal(result[0], pd.Series(expected[0], name="name")) + tm.assert_series_equal(result[1], pd.Series(expected[1], name="name")) + + +@pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS) +@pytest.mark.parametrize("ufunc", BINARY_UFUNCS) +def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc): + # Drop the names when they differ. + a1, a2 = arrays_for_binary_ufunc + s1 = pd.Series(a1, name="a") + s2 = pd.Series(a2, name="b") + + result = ufunc(s1, s2) + assert result.name is None + + +def test_object_series_ok(): + class Dummy: + def __init__(self, value): + self.value = value + + def __add__(self, other): + return self.value + other.value + + arr = np.array([Dummy(0), Dummy(1)]) + ser = pd.Series(arr) + tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr))) + tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1)))) + + +@pytest.mark.parametrize( + "values", + [ + pd.array([1, 3, 2], dtype="int64"), + pd.array([1, 10, 0], dtype="Sparse[int]"), + pd.to_datetime(["2000", "2010", "2001"]), + pd.to_datetime(["2000", "2010", "2001"]).tz_localize("CET"), + pd.to_datetime(["2000", "2010", "2001"]).to_period(freq="D"), + ], +) +def test_reduce(values): + a = pd.Series(values) + assert np.maximum.reduce(a) == values[1] + + +@pytest.mark.parametrize("type_", [list, deque, tuple]) +def test_binary_ufunc_other_types(type_): + a = pd.Series([1, 2, 3], name="name") + b = type_([3, 4, 5]) + + result = np.add(a, b) + expected = pd.Series(np.add(a.to_numpy(), b), name="name") + tm.assert_series_equal(result, expected) + + +def test_object_dtype_ok(): + class Thing: + def __init__(self, value): + self.value = value + + def __add__(self, other): + other = getattr(other, "value", other) + return type(self)(self.value + other) + + def __eq__(self, other) -> bool: + return type(other) is Thing and self.value == other.value + + def __repr__(self) -> str: + return "Thing({})".format(self.value) + + s = pd.Series([Thing(1), Thing(2)]) + result = np.add(s, Thing(1)) + expected = pd.Series([Thing(2), Thing(3)]) + tm.assert_series_equal(result, expected) + + +def test_outer(): + # https://github.com/pandas-dev/pandas/issues/27186 + s = pd.Series([1, 2, 3]) + o = np.array([1, 2, 3]) + + with pytest.raises(NotImplementedError): + np.subtract.outer(s, o) diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py new file mode 100644 index 00000000..c4311f50 --- /dev/null +++ b/pandas/tests/series/test_validate.py @@ -0,0 +1,20 @@ +import pytest + + +class TestSeriesValidate: + """Tests for error handling related to data types of method arguments.""" + + @pytest.mark.parametrize( + "func", + ["reset_index", "_set_name", "sort_values", "sort_index", "rename", "dropna"], + ) + @pytest.mark.parametrize("inplace", [1, "True", [1, 2, 3], 5.0]) + def test_validate_bool_args(self, string_series, func, inplace): + msg = 'For argument "inplace" expected type bool' + kwargs = dict(inplace=inplace) + + if func == "_set_name": + kwargs["name"] = "hello" + + with pytest.raises(ValueError, match=msg): + getattr(string_series, func)(**kwargs) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py new file mode 100644 index 00000000..57ee3bed --- /dev/null +++ b/pandas/tests/test_algos.py @@ -0,0 +1,2295 @@ +from datetime import datetime +from itertools import permutations +import struct + +import numpy as np +from numpy.random import RandomState +import pytest + +from pandas._libs import algos as libalgos, groupby as libgroupby, hashtable as ht +from pandas.compat.numpy import np_array_datetime64_compat +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_complex_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, +) +from pandas.core.dtypes.dtypes import CategoricalDtype as CDT + +import pandas as pd +from pandas import ( + Categorical, + CategoricalIndex, + DatetimeIndex, + Index, + IntervalIndex, + Series, + Timestamp, + compat, +) +import pandas._testing as tm +from pandas.conftest import BYTES_DTYPES, STRING_DTYPES +import pandas.core.algorithms as algos +from pandas.core.arrays import DatetimeArray +import pandas.core.common as com + + +class TestFactorize: + def test_basic(self): + + codes, uniques = algos.factorize(["a", "b", "b", "a", "a", "c", "c", "c"]) + tm.assert_numpy_array_equal(uniques, np.array(["a", "b", "c"], dtype=object)) + + codes, uniques = algos.factorize( + ["a", "b", "b", "a", "a", "c", "c", "c"], sort=True + ) + exp = np.array([0, 1, 1, 0, 0, 2, 2, 2], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = np.array(["a", "b", "c"], dtype=object) + tm.assert_numpy_array_equal(uniques, exp) + + codes, uniques = algos.factorize(list(reversed(range(5)))) + exp = np.array([0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = np.array([4, 3, 2, 1, 0], dtype=np.int64) + tm.assert_numpy_array_equal(uniques, exp) + + codes, uniques = algos.factorize(list(reversed(range(5))), sort=True) + + exp = np.array([4, 3, 2, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = np.array([0, 1, 2, 3, 4], dtype=np.int64) + tm.assert_numpy_array_equal(uniques, exp) + + codes, uniques = algos.factorize(list(reversed(np.arange(5.0)))) + exp = np.array([0, 1, 2, 3, 4], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = np.array([4.0, 3.0, 2.0, 1.0, 0.0], dtype=np.float64) + tm.assert_numpy_array_equal(uniques, exp) + + codes, uniques = algos.factorize(list(reversed(np.arange(5.0))), sort=True) + exp = np.array([4, 3, 2, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = np.array([0.0, 1.0, 2.0, 3.0, 4.0], dtype=np.float64) + tm.assert_numpy_array_equal(uniques, exp) + + def test_mixed(self): + + # doc example reshaping.rst + x = Series(["A", "A", np.nan, "B", 3.14, np.inf]) + codes, uniques = algos.factorize(x) + + exp = np.array([0, 0, -1, 1, 2, 3], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = Index(["A", "B", 3.14, np.inf]) + tm.assert_index_equal(uniques, exp) + + codes, uniques = algos.factorize(x, sort=True) + exp = np.array([2, 2, -1, 3, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = Index([3.14, np.inf, "A", "B"]) + tm.assert_index_equal(uniques, exp) + + def test_datelike(self): + + # M8 + v1 = Timestamp("20130101 09:00:00.00004") + v2 = Timestamp("20130101") + x = Series([v1, v1, v1, v2, v2, v1]) + codes, uniques = algos.factorize(x) + + exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = DatetimeIndex([v1, v2]) + tm.assert_index_equal(uniques, exp) + + codes, uniques = algos.factorize(x, sort=True) + exp = np.array([1, 1, 1, 0, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + exp = DatetimeIndex([v2, v1]) + tm.assert_index_equal(uniques, exp) + + # period + v1 = pd.Period("201302", freq="M") + v2 = pd.Period("201303", freq="M") + x = Series([v1, v1, v1, v2, v2, v1]) + + # periods are not 'sorted' as they are converted back into an index + codes, uniques = algos.factorize(x) + exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + tm.assert_index_equal(uniques, pd.PeriodIndex([v1, v2])) + + codes, uniques = algos.factorize(x, sort=True) + exp = np.array([0, 0, 0, 1, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + tm.assert_index_equal(uniques, pd.PeriodIndex([v1, v2])) + + # GH 5986 + v1 = pd.to_timedelta("1 day 1 min") + v2 = pd.to_timedelta("1 day") + x = Series([v1, v2, v1, v1, v2, v2, v1]) + codes, uniques = algos.factorize(x) + exp = np.array([0, 1, 0, 0, 1, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + tm.assert_index_equal(uniques, pd.to_timedelta([v1, v2])) + + codes, uniques = algos.factorize(x, sort=True) + exp = np.array([1, 0, 1, 1, 0, 0, 1], dtype=np.intp) + tm.assert_numpy_array_equal(codes, exp) + tm.assert_index_equal(uniques, pd.to_timedelta([v2, v1])) + + def test_factorize_nan(self): + # nan should map to na_sentinel, not reverse_indexer[na_sentinel] + # rizer.factorize should not raise an exception if na_sentinel indexes + # outside of reverse_indexer + key = np.array([1, 2, 1, np.nan], dtype="O") + rizer = ht.Factorizer(len(key)) + for na_sentinel in (-1, 20): + ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel) + expected = np.array([0, 1, 0, na_sentinel], dtype="int32") + assert len(set(key)) == len(set(expected)) + tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel) + + # nan still maps to na_sentinel when sort=False + key = np.array([0, np.nan, 1], dtype="O") + na_sentinel = -1 + + # TODO(wesm): unused? + ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel) # noqa + + expected = np.array([2, -1, 0], dtype="int32") + assert len(set(key)) == len(set(expected)) + tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel) + + @pytest.mark.parametrize( + "data, expected_codes, expected_uniques", + [ + ( + [(1, 1), (1, 2), (0, 0), (1, 2), "nonsense"], + [0, 1, 2, 1, 3], + [(1, 1), (1, 2), (0, 0), "nonsense"], + ), + ( + [(1, 1), (1, 2), (0, 0), (1, 2), (1, 2, 3)], + [0, 1, 2, 1, 3], + [(1, 1), (1, 2), (0, 0), (1, 2, 3)], + ), + ([(1, 1), (1, 2), (0, 0), (1, 2)], [0, 1, 2, 1], [(1, 1), (1, 2), (0, 0)]), + ], + ) + def test_factorize_tuple_list(self, data, expected_codes, expected_uniques): + # GH9454 + codes, uniques = pd.factorize(data) + + tm.assert_numpy_array_equal(codes, np.array(expected_codes, dtype=np.intp)) + + expected_uniques_array = com.asarray_tuplesafe(expected_uniques, dtype=object) + tm.assert_numpy_array_equal(uniques, expected_uniques_array) + + def test_complex_sorting(self): + # gh 12666 - check no segfault + x17 = np.array([complex(i) for i in range(17)], dtype=object) + + msg = ( + "unorderable types: .* [<>] .*" + "|" # the above case happens for numpy < 1.14 + "'[<>]' not supported between instances of .*" + ) + with pytest.raises(TypeError, match=msg): + algos.factorize(x17[::-1], sort=True) + + def test_float64_factorize(self, writable): + data = np.array([1.0, 1e8, 1.0, 1e-8, 1e8, 1.0], dtype=np.float64) + data.setflags(write=writable) + expected_codes = np.array([0, 1, 0, 2, 1, 0], dtype=np.intp) + expected_uniques = np.array([1.0, 1e8, 1e-8], dtype=np.float64) + + codes, uniques = algos.factorize(data) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + def test_uint64_factorize(self, writable): + data = np.array([2 ** 64 - 1, 1, 2 ** 64 - 1], dtype=np.uint64) + data.setflags(write=writable) + expected_codes = np.array([0, 1, 0], dtype=np.intp) + expected_uniques = np.array([2 ** 64 - 1, 1], dtype=np.uint64) + + codes, uniques = algos.factorize(data) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + def test_int64_factorize(self, writable): + data = np.array([2 ** 63 - 1, -(2 ** 63), 2 ** 63 - 1], dtype=np.int64) + data.setflags(write=writable) + expected_codes = np.array([0, 1, 0], dtype=np.intp) + expected_uniques = np.array([2 ** 63 - 1, -(2 ** 63)], dtype=np.int64) + + codes, uniques = algos.factorize(data) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + def test_string_factorize(self, writable): + data = np.array(["a", "c", "a", "b", "c"], dtype=object) + data.setflags(write=writable) + expected_codes = np.array([0, 1, 0, 2, 1], dtype=np.intp) + expected_uniques = np.array(["a", "c", "b"], dtype=object) + + codes, uniques = algos.factorize(data) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + def test_object_factorize(self, writable): + data = np.array(["a", "c", None, np.nan, "a", "b", pd.NaT, "c"], dtype=object) + data.setflags(write=writable) + expected_codes = np.array([0, 1, -1, -1, 0, 2, -1, 1], dtype=np.intp) + expected_uniques = np.array(["a", "c", "b"], dtype=object) + + codes, uniques = algos.factorize(data) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + def test_deprecate_order(self): + # gh 19727 - check warning is raised for deprecated keyword, order. + # Test not valid once order keyword is removed. + data = np.array([2 ** 63, 1, 2 ** 63], dtype=np.uint64) + with pytest.raises(TypeError, match="got an unexpected keyword"): + algos.factorize(data, order=True) + with tm.assert_produces_warning(False): + algos.factorize(data) + + @pytest.mark.parametrize( + "data", + [ + np.array([0, 1, 0], dtype="u8"), + np.array([-(2 ** 63), 1, -(2 ** 63)], dtype="i8"), + np.array(["__nan__", "foo", "__nan__"], dtype="object"), + ], + ) + def test_parametrized_factorize_na_value_default(self, data): + # arrays that include the NA default for that type, but isn't used. + codes, uniques = algos.factorize(data) + expected_uniques = data[[0, 1]] + expected_codes = np.array([0, 1, 0], dtype=np.intp) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + @pytest.mark.parametrize( + "data, na_value", + [ + (np.array([0, 1, 0, 2], dtype="u8"), 0), + (np.array([1, 0, 1, 2], dtype="u8"), 1), + (np.array([-(2 ** 63), 1, -(2 ** 63), 0], dtype="i8"), -(2 ** 63)), + (np.array([1, -(2 ** 63), 1, 0], dtype="i8"), 1), + (np.array(["a", "", "a", "b"], dtype=object), "a"), + (np.array([(), ("a", 1), (), ("a", 2)], dtype=object), ()), + (np.array([("a", 1), (), ("a", 1), ("a", 2)], dtype=object), ("a", 1)), + ], + ) + def test_parametrized_factorize_na_value(self, data, na_value): + codes, uniques = algos._factorize_array(data, na_value=na_value) + expected_uniques = data[[1, 3]] + expected_codes = np.array([-1, 0, -1, 1], dtype=np.intp) + tm.assert_numpy_array_equal(codes, expected_codes) + tm.assert_numpy_array_equal(uniques, expected_uniques) + + @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("na_sentinel", [-1, -10, 100]) + @pytest.mark.parametrize( + "data, uniques", + [ + ( + np.array(["b", "a", None, "b"], dtype=object), + np.array(["b", "a"], dtype=object), + ), + ( + pd.array([2, 1, np.nan, 2], dtype="Int64"), + pd.array([2, 1], dtype="Int64"), + ), + ], + ids=["numpy_array", "extension_array"], + ) + def test_factorize_na_sentinel(self, sort, na_sentinel, data, uniques): + codes, uniques = algos.factorize(data, sort=sort, na_sentinel=na_sentinel) + if sort: + expected_codes = np.array([1, 0, na_sentinel, 1], dtype=np.intp) + expected_uniques = algos.safe_sort(uniques) + else: + expected_codes = np.array([0, 1, na_sentinel, 0], dtype=np.intp) + expected_uniques = uniques + tm.assert_numpy_array_equal(codes, expected_codes) + if isinstance(data, np.ndarray): + tm.assert_numpy_array_equal(uniques, expected_uniques) + else: + tm.assert_extension_array_equal(uniques, expected_uniques) + + +class TestUnique: + def test_ints(self): + arr = np.random.randint(0, 100, size=50) + + result = algos.unique(arr) + assert isinstance(result, np.ndarray) + + def test_objects(self): + arr = np.random.randint(0, 100, size=50).astype("O") + + result = algos.unique(arr) + assert isinstance(result, np.ndarray) + + def test_object_refcount_bug(self): + lst = ["A", "B", "C", "D", "E"] + for i in range(1000): + len(algos.unique(lst)) + + def test_on_index_object(self): + + mindex = pd.MultiIndex.from_arrays( + [np.arange(5).repeat(5), np.tile(np.arange(5), 5)] + ) + expected = mindex.values + expected.sort() + + mindex = mindex.repeat(2) + + result = pd.unique(mindex) + result.sort() + + tm.assert_almost_equal(result, expected) + + def test_dtype_preservation(self, any_numpy_dtype): + # GH 15442 + if any_numpy_dtype in (BYTES_DTYPES + STRING_DTYPES): + pytest.skip("skip string dtype") + elif is_integer_dtype(any_numpy_dtype): + data = [1, 2, 2] + uniques = [1, 2] + elif is_float_dtype(any_numpy_dtype): + data = [1, 2, 2] + uniques = [1.0, 2.0] + elif is_complex_dtype(any_numpy_dtype): + data = [complex(1, 0), complex(2, 0), complex(2, 0)] + uniques = [complex(1, 0), complex(2, 0)] + elif is_bool_dtype(any_numpy_dtype): + data = [True, True, False] + uniques = [True, False] + elif is_object_dtype(any_numpy_dtype): + data = ["A", "B", "B"] + uniques = ["A", "B"] + else: + # datetime64[ns]/M8[ns]/timedelta64[ns]/m8[ns] tested elsewhere + data = [1, 2, 2] + uniques = [1, 2] + + result = Series(data, dtype=any_numpy_dtype).unique() + expected = np.array(uniques, dtype=any_numpy_dtype) + + tm.assert_numpy_array_equal(result, expected) + + def test_datetime64_dtype_array_returned(self): + # GH 9431 + expected = np_array_datetime64_compat( + [ + "2015-01-03T00:00:00.000000000+0000", + "2015-01-01T00:00:00.000000000+0000", + ], + dtype="M8[ns]", + ) + + dt_index = pd.to_datetime( + [ + "2015-01-03T00:00:00.000000000", + "2015-01-01T00:00:00.000000000", + "2015-01-01T00:00:00.000000000", + ] + ) + result = algos.unique(dt_index) + tm.assert_numpy_array_equal(result, expected) + assert result.dtype == expected.dtype + + s = Series(dt_index) + result = algos.unique(s) + tm.assert_numpy_array_equal(result, expected) + assert result.dtype == expected.dtype + + arr = s.values + result = algos.unique(arr) + tm.assert_numpy_array_equal(result, expected) + assert result.dtype == expected.dtype + + def test_datetime_non_ns(self): + a = np.array(["2000", "2000", "2001"], dtype="datetime64[s]") + result = pd.unique(a) + expected = np.array(["2000", "2001"], dtype="datetime64[ns]") + tm.assert_numpy_array_equal(result, expected) + + def test_timedelta_non_ns(self): + a = np.array(["2000", "2000", "2001"], dtype="timedelta64[s]") + result = pd.unique(a) + expected = np.array([2000000000000, 2001000000000], dtype="timedelta64[ns]") + tm.assert_numpy_array_equal(result, expected) + + def test_timedelta64_dtype_array_returned(self): + # GH 9431 + expected = np.array([31200, 45678, 10000], dtype="m8[ns]") + + td_index = pd.to_timedelta([31200, 45678, 31200, 10000, 45678]) + result = algos.unique(td_index) + tm.assert_numpy_array_equal(result, expected) + assert result.dtype == expected.dtype + + s = Series(td_index) + result = algos.unique(s) + tm.assert_numpy_array_equal(result, expected) + assert result.dtype == expected.dtype + + arr = s.values + result = algos.unique(arr) + tm.assert_numpy_array_equal(result, expected) + assert result.dtype == expected.dtype + + def test_uint64_overflow(self): + s = Series([1, 2, 2 ** 63, 2 ** 63], dtype=np.uint64) + exp = np.array([1, 2, 2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(algos.unique(s), exp) + + def test_nan_in_object_array(self): + duplicated_items = ["a", np.nan, "c", "c"] + result = pd.unique(duplicated_items) + expected = np.array(["a", np.nan, "c"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_categorical(self): + + # we are expecting to return in the order + # of appearance + expected = Categorical(list("bac"), categories=list("bac")) + + # we are expecting to return in the order + # of the categories + expected_o = Categorical(list("bac"), categories=list("abc"), ordered=True) + + # GH 15939 + c = Categorical(list("baabc")) + result = c.unique() + tm.assert_categorical_equal(result, expected) + + result = algos.unique(c) + tm.assert_categorical_equal(result, expected) + + c = Categorical(list("baabc"), ordered=True) + result = c.unique() + tm.assert_categorical_equal(result, expected_o) + + result = algos.unique(c) + tm.assert_categorical_equal(result, expected_o) + + # Series of categorical dtype + s = Series(Categorical(list("baabc")), name="foo") + result = s.unique() + tm.assert_categorical_equal(result, expected) + + result = pd.unique(s) + tm.assert_categorical_equal(result, expected) + + # CI -> return CI + ci = CategoricalIndex(Categorical(list("baabc"), categories=list("bac"))) + expected = CategoricalIndex(expected) + result = ci.unique() + tm.assert_index_equal(result, expected) + + result = pd.unique(ci) + tm.assert_index_equal(result, expected) + + def test_datetime64tz_aware(self): + # GH 15939 + + result = Series( + Index( + [ + Timestamp("20160101", tz="US/Eastern"), + Timestamp("20160101", tz="US/Eastern"), + ] + ) + ).unique() + expected = DatetimeArray._from_sequence( + np.array([Timestamp("2016-01-01 00:00:00-0500", tz="US/Eastern")]) + ) + tm.assert_extension_array_equal(result, expected) + + result = Index( + [ + Timestamp("20160101", tz="US/Eastern"), + Timestamp("20160101", tz="US/Eastern"), + ] + ).unique() + expected = DatetimeIndex( + ["2016-01-01 00:00:00"], dtype="datetime64[ns, US/Eastern]", freq=None + ) + tm.assert_index_equal(result, expected) + + result = pd.unique( + Series( + Index( + [ + Timestamp("20160101", tz="US/Eastern"), + Timestamp("20160101", tz="US/Eastern"), + ] + ) + ) + ) + expected = DatetimeArray._from_sequence( + np.array([Timestamp("2016-01-01", tz="US/Eastern")]) + ) + tm.assert_extension_array_equal(result, expected) + + result = pd.unique( + Index( + [ + Timestamp("20160101", tz="US/Eastern"), + Timestamp("20160101", tz="US/Eastern"), + ] + ) + ) + expected = DatetimeIndex( + ["2016-01-01 00:00:00"], dtype="datetime64[ns, US/Eastern]", freq=None + ) + tm.assert_index_equal(result, expected) + + def test_order_of_appearance(self): + # 9346 + # light testing of guarantee of order of appearance + # these also are the doc-examples + result = pd.unique(Series([2, 1, 3, 3])) + tm.assert_numpy_array_equal(result, np.array([2, 1, 3], dtype="int64")) + + result = pd.unique(Series([2] + [1] * 5)) + tm.assert_numpy_array_equal(result, np.array([2, 1], dtype="int64")) + + result = pd.unique(Series([Timestamp("20160101"), Timestamp("20160101")])) + expected = np.array(["2016-01-01T00:00:00.000000000"], dtype="datetime64[ns]") + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique( + Index( + [ + Timestamp("20160101", tz="US/Eastern"), + Timestamp("20160101", tz="US/Eastern"), + ] + ) + ) + expected = DatetimeIndex( + ["2016-01-01 00:00:00"], dtype="datetime64[ns, US/Eastern]", freq=None + ) + tm.assert_index_equal(result, expected) + + result = pd.unique(list("aabc")) + expected = np.array(["a", "b", "c"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = pd.unique(Series(Categorical(list("aabc")))) + expected = Categorical(list("abc")) + tm.assert_categorical_equal(result, expected) + + @pytest.mark.parametrize( + "arg ,expected", + [ + (("1", "1", "2"), np.array(["1", "2"], dtype=object)), + (("foo",), np.array(["foo"], dtype=object)), + ], + ) + def test_tuple_with_strings(self, arg, expected): + # see GH 17108 + result = pd.unique(arg) + tm.assert_numpy_array_equal(result, expected) + + def test_obj_none_preservation(self): + # GH 20866 + arr = np.array(["foo", None], dtype=object) + result = pd.unique(arr) + expected = np.array(["foo", None], dtype=object) + + tm.assert_numpy_array_equal(result, expected, strict_nan=True) + + def test_signed_zero(self): + # GH 21866 + a = np.array([-0.0, 0.0]) + result = pd.unique(a) + expected = np.array([-0.0]) # 0.0 and -0.0 are equivalent + tm.assert_numpy_array_equal(result, expected) + + def test_different_nans(self): + # GH 21866 + # create different nans from bit-patterns: + NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0] + NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0] + assert NAN1 != NAN1 + assert NAN2 != NAN2 + a = np.array([NAN1, NAN2]) # NAN1 and NAN2 are equivalent + result = pd.unique(a) + expected = np.array([np.nan]) + tm.assert_numpy_array_equal(result, expected) + + def test_first_nan_kept(self): + # GH 22295 + # create different nans from bit-patterns: + bits_for_nan1 = 0xFFF8000000000001 + bits_for_nan2 = 0x7FF8000000000001 + NAN1 = struct.unpack("d", struct.pack("=Q", bits_for_nan1))[0] + NAN2 = struct.unpack("d", struct.pack("=Q", bits_for_nan2))[0] + assert NAN1 != NAN1 + assert NAN2 != NAN2 + for el_type in [np.float64, np.object]: + a = np.array([NAN1, NAN2], dtype=el_type) + result = pd.unique(a) + assert result.size == 1 + # use bit patterns to identify which nan was kept: + result_nan_bits = struct.unpack("=Q", struct.pack("d", result[0]))[0] + assert result_nan_bits == bits_for_nan1 + + def test_do_not_mangle_na_values(self, unique_nulls_fixture, unique_nulls_fixture2): + # GH 22295 + if unique_nulls_fixture is unique_nulls_fixture2: + return # skip it, values not unique + a = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=np.object) + result = pd.unique(a) + assert result.size == 2 + assert a[0] is unique_nulls_fixture + assert a[1] is unique_nulls_fixture2 + + +class TestIsin: + def test_invalid(self): + + msg = ( + r"only list-like objects are allowed to be passed to isin\(\)," + r" you passed a \[int\]" + ) + with pytest.raises(TypeError, match=msg): + algos.isin(1, 1) + with pytest.raises(TypeError, match=msg): + algos.isin(1, [1]) + with pytest.raises(TypeError, match=msg): + algos.isin([1], 1) + + def test_basic(self): + + result = algos.isin([1, 2], [1]) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(np.array([1, 2]), [1]) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(Series([1, 2]), [1]) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(Series([1, 2]), Series([1])) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(Series([1, 2]), {1}) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(["a", "b"], ["a"]) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(Series(["a", "b"]), Series(["a"])) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(Series(["a", "b"]), {"a"}) + expected = np.array([True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(["a", "b"], [1]) + expected = np.array([False, False]) + tm.assert_numpy_array_equal(result, expected) + + def test_i8(self): + + arr = pd.date_range("20130101", periods=3).values + result = algos.isin(arr, [arr[0]]) + expected = np.array([True, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(arr, arr[0:2]) + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(arr, set(arr[0:2])) + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + arr = pd.timedelta_range("1 day", periods=3).values + result = algos.isin(arr, [arr[0]]) + expected = np.array([True, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(arr, arr[0:2]) + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.isin(arr, set(arr[0:2])) + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + def test_large(self): + + s = pd.date_range("20000101", periods=2000000, freq="s").values + result = algos.isin(s, s[0:2]) + expected = np.zeros(len(s), dtype=bool) + expected[0] = True + expected[1] = True + tm.assert_numpy_array_equal(result, expected) + + def test_categorical_from_codes(self): + # GH 16639 + vals = np.array([0, 1, 2, 0]) + cats = ["a", "b", "c"] + Sd = Series(Categorical(1).from_codes(vals, cats)) + St = Series(Categorical(1).from_codes(np.array([0, 1]), cats)) + expected = np.array([True, True, False, True]) + result = algos.isin(Sd, St) + tm.assert_numpy_array_equal(expected, result) + + def test_same_nan_is_in(self): + # GH 22160 + # nan is special, because from " a is b" doesn't follow "a == b" + # at least, isin() should follow python's "np.nan in [nan] == True" + # casting to -> np.float64 -> another float-object somewhere on + # the way could lead jepardize this behavior + comps = [np.nan] # could be casted to float64 + values = [np.nan] + expected = np.array([True]) + result = algos.isin(comps, values) + tm.assert_numpy_array_equal(expected, result) + + def test_same_object_is_in(self): + # GH 22160 + # there could be special treatment for nans + # the user however could define a custom class + # with similar behavior, then we at least should + # fall back to usual python's behavior: "a in [a] == True" + class LikeNan: + def __eq__(self, other) -> bool: + return False + + def __hash__(self): + return 0 + + a, b = LikeNan(), LikeNan() + # same object -> True + tm.assert_numpy_array_equal(algos.isin([a], [a]), np.array([True])) + # different objects -> False + tm.assert_numpy_array_equal(algos.isin([a], [b]), np.array([False])) + + def test_different_nans(self): + # GH 22160 + # all nans are handled as equivalent + + comps = [float("nan")] + values = [float("nan")] + assert comps[0] is not values[0] # different nan-objects + + # as list of python-objects: + result = algos.isin(comps, values) + tm.assert_numpy_array_equal(np.array([True]), result) + + # as object-array: + result = algos.isin( + np.asarray(comps, dtype=np.object), np.asarray(values, dtype=np.object) + ) + tm.assert_numpy_array_equal(np.array([True]), result) + + # as float64-array: + result = algos.isin( + np.asarray(comps, dtype=np.float64), np.asarray(values, dtype=np.float64) + ) + tm.assert_numpy_array_equal(np.array([True]), result) + + def test_no_cast(self): + # GH 22160 + # ensure 42 is not casted to a string + comps = ["ss", 42] + values = ["42"] + expected = np.array([False, False]) + result = algos.isin(comps, values) + tm.assert_numpy_array_equal(expected, result) + + @pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])]) + def test_empty(self, empty): + # see gh-16991 + vals = Index(["a", "b"]) + expected = np.array([False, False]) + + result = algos.isin(vals, empty) + tm.assert_numpy_array_equal(expected, result) + + def test_different_nan_objects(self): + # GH 22119 + comps = np.array(["nan", np.nan * 1j, float("nan")], dtype=np.object) + vals = np.array([float("nan")], dtype=np.object) + expected = np.array([False, False, True]) + result = algos.isin(comps, vals) + tm.assert_numpy_array_equal(expected, result) + + def test_different_nans_as_float64(self): + # GH 21866 + # create different nans from bit-patterns, + # these nans will land in different buckets in the hash-table + # if no special care is taken + NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0] + NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0] + assert NAN1 != NAN1 + assert NAN2 != NAN2 + + # check that NAN1 and NAN2 are equivalent: + arr = np.array([NAN1, NAN2], dtype=np.float64) + lookup1 = np.array([NAN1], dtype=np.float64) + result = algos.isin(arr, lookup1) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + lookup2 = np.array([NAN2], dtype=np.float64) + result = algos.isin(arr, lookup2) + expected = np.array([True, True]) + tm.assert_numpy_array_equal(result, expected) + + +class TestValueCounts: + def test_value_counts(self): + np.random.seed(1234) + from pandas.core.reshape.tile import cut + + arr = np.random.randn(4) + factor = cut(arr, 4) + + # assert isinstance(factor, n) + result = algos.value_counts(factor) + breaks = [-1.194, -0.535, 0.121, 0.777, 1.433] + index = IntervalIndex.from_breaks(breaks).astype(CDT(ordered=True)) + expected = Series([1, 1, 1, 1], index=index) + tm.assert_series_equal(result.sort_index(), expected.sort_index()) + + def test_value_counts_bins(self): + s = [1, 2, 3, 4] + result = algos.value_counts(s, bins=1) + expected = Series([4], index=IntervalIndex.from_tuples([(0.996, 4.0)])) + tm.assert_series_equal(result, expected) + + result = algos.value_counts(s, bins=2, sort=False) + expected = Series( + [2, 2], index=IntervalIndex.from_tuples([(0.996, 2.5), (2.5, 4.0)]) + ) + tm.assert_series_equal(result, expected) + + def test_value_counts_dtypes(self): + result = algos.value_counts([1, 1.0]) + assert len(result) == 1 + + result = algos.value_counts([1, 1.0], bins=1) + assert len(result) == 1 + + result = algos.value_counts(Series([1, 1.0, "1"])) # object + assert len(result) == 2 + + msg = "bins argument only works with numeric data" + with pytest.raises(TypeError, match=msg): + algos.value_counts(["1", 1], bins=1) + + def test_value_counts_nat(self): + td = Series([np.timedelta64(10000), pd.NaT], dtype="timedelta64[ns]") + dt = pd.to_datetime(["NaT", "2014-01-01"]) + + for s in [td, dt]: + vc = algos.value_counts(s) + vc_with_na = algos.value_counts(s, dropna=False) + assert len(vc) == 1 + assert len(vc_with_na) == 2 + + exp_dt = Series({Timestamp("2014-01-01 00:00:00"): 1}) + tm.assert_series_equal(algos.value_counts(dt), exp_dt) + # TODO same for (timedelta) + + def test_value_counts_datetime_outofbounds(self): + # GH 13663 + s = Series( + [ + datetime(3000, 1, 1), + datetime(5000, 1, 1), + datetime(5000, 1, 1), + datetime(6000, 1, 1), + datetime(3000, 1, 1), + datetime(3000, 1, 1), + ] + ) + res = s.value_counts() + + exp_index = Index( + [datetime(3000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1)], + dtype=object, + ) + exp = Series([3, 2, 1], index=exp_index) + tm.assert_series_equal(res, exp) + + # GH 12424 + res = pd.to_datetime(Series(["2362-01-01", np.nan]), errors="ignore") + exp = Series(["2362-01-01", np.nan], dtype=object) + tm.assert_series_equal(res, exp) + + def test_categorical(self): + s = Series(Categorical(list("aaabbc"))) + result = s.value_counts() + expected = Series([3, 2, 1], index=CategoricalIndex(["a", "b", "c"])) + + tm.assert_series_equal(result, expected, check_index_type=True) + + # preserve order? + s = s.cat.as_ordered() + result = s.value_counts() + expected.index = expected.index.as_ordered() + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_categorical_nans(self): + s = Series(Categorical(list("aaaaabbbcc"))) # 4,3,2,1 (nan) + s.iloc[1] = np.nan + result = s.value_counts() + expected = Series( + [4, 3, 2], + index=CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c"]), + ) + tm.assert_series_equal(result, expected, check_index_type=True) + result = s.value_counts(dropna=False) + expected = Series([4, 3, 2, 1], index=CategoricalIndex(["a", "b", "c", np.nan])) + tm.assert_series_equal(result, expected, check_index_type=True) + + # out of order + s = Series( + Categorical(list("aaaaabbbcc"), ordered=True, categories=["b", "a", "c"]) + ) + s.iloc[1] = np.nan + result = s.value_counts() + expected = Series( + [4, 3, 2], + index=CategoricalIndex( + ["a", "b", "c"], categories=["b", "a", "c"], ordered=True + ), + ) + tm.assert_series_equal(result, expected, check_index_type=True) + + result = s.value_counts(dropna=False) + expected = Series( + [4, 3, 2, 1], + index=CategoricalIndex( + ["a", "b", "c", np.nan], categories=["b", "a", "c"], ordered=True + ), + ) + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_categorical_zeroes(self): + # keep the `d` category with 0 + s = Series(Categorical(list("bbbaac"), categories=list("abcd"), ordered=True)) + result = s.value_counts() + expected = Series( + [3, 2, 1, 0], + index=Categorical( + ["b", "a", "c", "d"], categories=list("abcd"), ordered=True + ), + ) + tm.assert_series_equal(result, expected, check_index_type=True) + + def test_dropna(self): + # https://github.com/pandas-dev/pandas/issues/9443#issuecomment-73719328 + + tm.assert_series_equal( + Series([True, True, False]).value_counts(dropna=True), + Series([2, 1], index=[True, False]), + ) + tm.assert_series_equal( + Series([True, True, False]).value_counts(dropna=False), + Series([2, 1], index=[True, False]), + ) + + tm.assert_series_equal( + Series([True, True, False, None]).value_counts(dropna=True), + Series([2, 1], index=[True, False]), + ) + tm.assert_series_equal( + Series([True, True, False, None]).value_counts(dropna=False), + Series([2, 1, 1], index=[True, False, np.nan]), + ) + tm.assert_series_equal( + Series([10.3, 5.0, 5.0]).value_counts(dropna=True), + Series([2, 1], index=[5.0, 10.3]), + ) + tm.assert_series_equal( + Series([10.3, 5.0, 5.0]).value_counts(dropna=False), + Series([2, 1], index=[5.0, 10.3]), + ) + + tm.assert_series_equal( + Series([10.3, 5.0, 5.0, None]).value_counts(dropna=True), + Series([2, 1], index=[5.0, 10.3]), + ) + + # 32-bit linux has a different ordering + if not compat.is_platform_32bit(): + result = Series([10.3, 5.0, 5.0, None]).value_counts(dropna=False) + expected = Series([2, 1, 1], index=[5.0, 10.3, np.nan]) + tm.assert_series_equal(result, expected) + + def test_value_counts_normalized(self): + # GH12558 + s = Series([1, 2, np.nan, np.nan, np.nan]) + dtypes = (np.float64, np.object, "M8[ns]") + for t in dtypes: + s_typed = s.astype(t) + result = s_typed.value_counts(normalize=True, dropna=False) + expected = Series( + [0.6, 0.2, 0.2], index=Series([np.nan, 2.0, 1.0], dtype=t) + ) + tm.assert_series_equal(result, expected) + + result = s_typed.value_counts(normalize=True, dropna=True) + expected = Series([0.5, 0.5], index=Series([2.0, 1.0], dtype=t)) + tm.assert_series_equal(result, expected) + + def test_value_counts_uint64(self): + arr = np.array([2 ** 63], dtype=np.uint64) + expected = Series([1], index=[2 ** 63]) + result = algos.value_counts(arr) + + tm.assert_series_equal(result, expected) + + arr = np.array([-1, 2 ** 63], dtype=object) + expected = Series([1, 1], index=[-1, 2 ** 63]) + result = algos.value_counts(arr) + + # 32-bit linux has a different ordering + if not compat.is_platform_32bit(): + tm.assert_series_equal(result, expected) + + +class TestDuplicated: + def test_duplicated_with_nas(self): + keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object) + + result = algos.duplicated(keys) + expected = np.array([False, False, False, True, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.duplicated(keys, keep="first") + expected = np.array([False, False, False, True, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.duplicated(keys, keep="last") + expected = np.array([True, False, True, False, False, False]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.duplicated(keys, keep=False) + expected = np.array([True, False, True, True, False, True]) + tm.assert_numpy_array_equal(result, expected) + + keys = np.empty(8, dtype=object) + for i, t in enumerate( + zip([0, 0, np.nan, np.nan] * 2, [0, np.nan, 0, np.nan] * 2) + ): + keys[i] = t + + result = algos.duplicated(keys) + falses = [False] * 4 + trues = [True] * 4 + expected = np.array(falses + trues) + tm.assert_numpy_array_equal(result, expected) + + result = algos.duplicated(keys, keep="last") + expected = np.array(trues + falses) + tm.assert_numpy_array_equal(result, expected) + + result = algos.duplicated(keys, keep=False) + expected = np.array(trues + trues) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( + "case", + [ + np.array([1, 2, 1, 5, 3, 2, 4, 1, 5, 6]), + np.array([1.1, 2.2, 1.1, np.nan, 3.3, 2.2, 4.4, 1.1, np.nan, 6.6]), + np.array( + [ + 1 + 1j, + 2 + 2j, + 1 + 1j, + 5 + 5j, + 3 + 3j, + 2 + 2j, + 4 + 4j, + 1 + 1j, + 5 + 5j, + 6 + 6j, + ] + ), + np.array(["a", "b", "a", "e", "c", "b", "d", "a", "e", "f"], dtype=object), + np.array( + [1, 2 ** 63, 1, 3 ** 5, 10, 2 ** 63, 39, 1, 3 ** 5, 7], dtype=np.uint64 + ), + ], + ) + def test_numeric_object_likes(self, case): + exp_first = np.array( + [False, False, True, False, False, True, False, True, True, False] + ) + exp_last = np.array( + [True, True, True, True, False, False, False, False, False, False] + ) + exp_false = exp_first | exp_last + + res_first = algos.duplicated(case, keep="first") + tm.assert_numpy_array_equal(res_first, exp_first) + + res_last = algos.duplicated(case, keep="last") + tm.assert_numpy_array_equal(res_last, exp_last) + + res_false = algos.duplicated(case, keep=False) + tm.assert_numpy_array_equal(res_false, exp_false) + + # index + for idx in [Index(case), Index(case, dtype="category")]: + res_first = idx.duplicated(keep="first") + tm.assert_numpy_array_equal(res_first, exp_first) + + res_last = idx.duplicated(keep="last") + tm.assert_numpy_array_equal(res_last, exp_last) + + res_false = idx.duplicated(keep=False) + tm.assert_numpy_array_equal(res_false, exp_false) + + # series + for s in [Series(case), Series(case, dtype="category")]: + res_first = s.duplicated(keep="first") + tm.assert_series_equal(res_first, Series(exp_first)) + + res_last = s.duplicated(keep="last") + tm.assert_series_equal(res_last, Series(exp_last)) + + res_false = s.duplicated(keep=False) + tm.assert_series_equal(res_false, Series(exp_false)) + + def test_datetime_likes(self): + + dt = [ + "2011-01-01", + "2011-01-02", + "2011-01-01", + "NaT", + "2011-01-03", + "2011-01-02", + "2011-01-04", + "2011-01-01", + "NaT", + "2011-01-06", + ] + td = [ + "1 days", + "2 days", + "1 days", + "NaT", + "3 days", + "2 days", + "4 days", + "1 days", + "NaT", + "6 days", + ] + + cases = [ + np.array([Timestamp(d) for d in dt]), + np.array([Timestamp(d, tz="US/Eastern") for d in dt]), + np.array([pd.Period(d, freq="D") for d in dt]), + np.array([np.datetime64(d) for d in dt]), + np.array([pd.Timedelta(d) for d in td]), + ] + + exp_first = np.array( + [False, False, True, False, False, True, False, True, True, False] + ) + exp_last = np.array( + [True, True, True, True, False, False, False, False, False, False] + ) + exp_false = exp_first | exp_last + + for case in cases: + res_first = algos.duplicated(case, keep="first") + tm.assert_numpy_array_equal(res_first, exp_first) + + res_last = algos.duplicated(case, keep="last") + tm.assert_numpy_array_equal(res_last, exp_last) + + res_false = algos.duplicated(case, keep=False) + tm.assert_numpy_array_equal(res_false, exp_false) + + # index + for idx in [ + Index(case), + Index(case, dtype="category"), + Index(case, dtype=object), + ]: + res_first = idx.duplicated(keep="first") + tm.assert_numpy_array_equal(res_first, exp_first) + + res_last = idx.duplicated(keep="last") + tm.assert_numpy_array_equal(res_last, exp_last) + + res_false = idx.duplicated(keep=False) + tm.assert_numpy_array_equal(res_false, exp_false) + + # series + for s in [ + Series(case), + Series(case, dtype="category"), + Series(case, dtype=object), + ]: + res_first = s.duplicated(keep="first") + tm.assert_series_equal(res_first, Series(exp_first)) + + res_last = s.duplicated(keep="last") + tm.assert_series_equal(res_last, Series(exp_last)) + + res_false = s.duplicated(keep=False) + tm.assert_series_equal(res_false, Series(exp_false)) + + def test_unique_index(self): + cases = [Index([1, 2, 3]), pd.RangeIndex(0, 3)] + for case in cases: + assert case.is_unique is True + tm.assert_numpy_array_equal( + case.duplicated(), np.array([False, False, False]) + ) + + @pytest.mark.parametrize( + "arr, unique", + [ + ( + [(0, 0), (0, 1), (1, 0), (1, 1), (0, 0), (0, 1), (1, 0), (1, 1)], + [(0, 0), (0, 1), (1, 0), (1, 1)], + ), + ( + [("b", "c"), ("a", "b"), ("a", "b"), ("b", "c")], + [("b", "c"), ("a", "b")], + ), + ([("a", 1), ("b", 2), ("a", 3), ("a", 1)], [("a", 1), ("b", 2), ("a", 3)]), + ], + ) + def test_unique_tuples(self, arr, unique): + # https://github.com/pandas-dev/pandas/issues/16519 + expected = np.empty(len(unique), dtype=object) + expected[:] = unique + + result = pd.unique(arr) + tm.assert_numpy_array_equal(result, expected) + + +class GroupVarTestMixin: + def test_group_var_generic_1d(self): + prng = RandomState(1234) + + out = (np.nan * np.ones((5, 1))).astype(self.dtype) + counts = np.zeros(5, dtype="int64") + values = 10 * prng.rand(15, 1).astype(self.dtype) + labels = np.tile(np.arange(5), (3,)).astype("int64") + + expected_out = ( + np.squeeze(values).reshape((5, 3), order="F").std(axis=1, ddof=1) ** 2 + )[:, np.newaxis] + expected_counts = counts + 3 + + self.algo(out, counts, values, labels) + assert np.allclose(out, expected_out, self.rtol) + tm.assert_numpy_array_equal(counts, expected_counts) + + def test_group_var_generic_1d_flat_labels(self): + prng = RandomState(1234) + + out = (np.nan * np.ones((1, 1))).astype(self.dtype) + counts = np.zeros(1, dtype="int64") + values = 10 * prng.rand(5, 1).astype(self.dtype) + labels = np.zeros(5, dtype="int64") + + expected_out = np.array([[values.std(ddof=1) ** 2]]) + expected_counts = counts + 5 + + self.algo(out, counts, values, labels) + + assert np.allclose(out, expected_out, self.rtol) + tm.assert_numpy_array_equal(counts, expected_counts) + + def test_group_var_generic_2d_all_finite(self): + prng = RandomState(1234) + + out = (np.nan * np.ones((5, 2))).astype(self.dtype) + counts = np.zeros(5, dtype="int64") + values = 10 * prng.rand(10, 2).astype(self.dtype) + labels = np.tile(np.arange(5), (2,)).astype("int64") + + expected_out = np.std(values.reshape(2, 5, 2), ddof=1, axis=0) ** 2 + expected_counts = counts + 2 + + self.algo(out, counts, values, labels) + assert np.allclose(out, expected_out, self.rtol) + tm.assert_numpy_array_equal(counts, expected_counts) + + def test_group_var_generic_2d_some_nan(self): + prng = RandomState(1234) + + out = (np.nan * np.ones((5, 2))).astype(self.dtype) + counts = np.zeros(5, dtype="int64") + values = 10 * prng.rand(10, 2).astype(self.dtype) + values[:, 1] = np.nan + labels = np.tile(np.arange(5), (2,)).astype("int64") + + expected_out = np.vstack( + [ + values[:, 0].reshape(5, 2, order="F").std(ddof=1, axis=1) ** 2, + np.nan * np.ones(5), + ] + ).T.astype(self.dtype) + expected_counts = counts + 2 + + self.algo(out, counts, values, labels) + tm.assert_almost_equal(out, expected_out, check_less_precise=6) + tm.assert_numpy_array_equal(counts, expected_counts) + + def test_group_var_constant(self): + # Regression test from GH 10448. + + out = np.array([[np.nan]], dtype=self.dtype) + counts = np.array([0], dtype="int64") + values = 0.832845131556193 * np.ones((3, 1), dtype=self.dtype) + labels = np.zeros(3, dtype="int64") + + self.algo(out, counts, values, labels) + + assert counts[0] == 3 + assert out[0, 0] >= 0 + tm.assert_almost_equal(out[0, 0], 0.0) + + +class TestGroupVarFloat64(GroupVarTestMixin): + __test__ = True + + algo = staticmethod(libgroupby.group_var_float64) + dtype = np.float64 + rtol = 1e-5 + + def test_group_var_large_inputs(self): + + prng = RandomState(1234) + + out = np.array([[np.nan]], dtype=self.dtype) + counts = np.array([0], dtype="int64") + values = (prng.rand(10 ** 6) + 10 ** 12).astype(self.dtype) + values.shape = (10 ** 6, 1) + labels = np.zeros(10 ** 6, dtype="int64") + + self.algo(out, counts, values, labels) + + assert counts[0] == 10 ** 6 + tm.assert_almost_equal(out[0, 0], 1.0 / 12, check_less_precise=True) + + +class TestGroupVarFloat32(GroupVarTestMixin): + __test__ = True + + algo = staticmethod(libgroupby.group_var_float32) + dtype = np.float32 + rtol = 1e-2 + + +class TestHashTable: + def test_string_hashtable_set_item_signature(self): + # GH#30419 fix typing in StringHashTable.set_item to prevent segfault + tbl = ht.StringHashTable() + + tbl.set_item("key", 1) + assert tbl.get_item("key") == 1 + + with pytest.raises(TypeError, match="'key' has incorrect type"): + # key arg typed as string, not object + tbl.set_item(4, 6) + with pytest.raises(TypeError, match="'val' has incorrect type"): + tbl.get_item(4) + + def test_lookup_nan(self, writable): + xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3]) + # GH 21688 ensure we can deal with readonly memory views + xs.setflags(write=writable) + m = ht.Float64HashTable() + m.map_locations(xs) + tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.int64)) + + def test_add_signed_zeros(self): + # GH 21866 inconsistent hash-function for float64 + # default hash-function would lead to different hash-buckets + # for 0.0 and -0.0 if there are more than 2^30 hash-buckets + # but this would mean 16GB + N = 4 # 12 * 10**8 would trigger the error, if you have enough memory + m = ht.Float64HashTable(N) + m.set_item(0.0, 0) + m.set_item(-0.0, 0) + assert len(m) == 1 # 0.0 and -0.0 are equivalent + + def test_add_different_nans(self): + # GH 21866 inconsistent hash-function for float64 + # create different nans from bit-patterns: + NAN1 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000000))[0] + NAN2 = struct.unpack("d", struct.pack("=Q", 0x7FF8000000000001))[0] + assert NAN1 != NAN1 + assert NAN2 != NAN2 + # default hash function would lead to different hash-buckets + # for NAN1 and NAN2 even if there are only 4 buckets: + m = ht.Float64HashTable() + m.set_item(NAN1, 0) + m.set_item(NAN2, 0) + assert len(m) == 1 # NAN1 and NAN2 are equivalent + + def test_lookup_overflow(self, writable): + xs = np.array([1, 2, 2 ** 63], dtype=np.uint64) + # GH 21688 ensure we can deal with readonly memory views + xs.setflags(write=writable) + m = ht.UInt64HashTable() + m.map_locations(xs) + tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.int64)) + + def test_get_unique(self): + s = Series([1, 2, 2 ** 63, 2 ** 63], dtype=np.uint64) + exp = np.array([1, 2, 2 ** 63], dtype=np.uint64) + tm.assert_numpy_array_equal(s.unique(), exp) + + @pytest.mark.parametrize("nvals", [0, 10]) # resizing to 0 is special case + @pytest.mark.parametrize( + "htable, uniques, dtype, safely_resizes", + [ + (ht.PyObjectHashTable, ht.ObjectVector, "object", False), + (ht.StringHashTable, ht.ObjectVector, "object", True), + (ht.Float64HashTable, ht.Float64Vector, "float64", False), + (ht.Int64HashTable, ht.Int64Vector, "int64", False), + (ht.UInt64HashTable, ht.UInt64Vector, "uint64", False), + ], + ) + def test_vector_resize( + self, writable, htable, uniques, dtype, safely_resizes, nvals + ): + # Test for memory errors after internal vector + # reallocations (GH 7157) + vals = np.array(np.random.randn(1000), dtype=dtype) + + # GH 21688 ensures we can deal with read-only memory views + vals.setflags(write=writable) + + # initialise instances; cannot initialise in parametrization, + # as otherwise external views would be held on the array (which is + # one of the things this test is checking) + htable = htable() + uniques = uniques() + + # get_labels may append to uniques + htable.get_labels(vals[:nvals], uniques, 0, -1) + # to_array() sets an external_view_exists flag on uniques. + tmp = uniques.to_array() + oldshape = tmp.shape + + # subsequent get_labels() calls can no longer append to it + # (except for StringHashTables + ObjectVector) + if safely_resizes: + htable.get_labels(vals, uniques, 0, -1) + else: + with pytest.raises(ValueError, match="external reference.*"): + htable.get_labels(vals, uniques, 0, -1) + + uniques.to_array() # should not raise here + assert tmp.shape == oldshape + + @pytest.mark.parametrize( + "htable, tm_dtype", + [ + (ht.PyObjectHashTable, "String"), + (ht.StringHashTable, "String"), + (ht.Float64HashTable, "Float"), + (ht.Int64HashTable, "Int"), + (ht.UInt64HashTable, "UInt"), + ], + ) + def test_hashtable_unique(self, htable, tm_dtype, writable): + # output of maker has guaranteed unique elements + maker = getattr(tm, "make" + tm_dtype + "Index") + s = Series(maker(1000)) + if htable == ht.Float64HashTable: + # add NaN for float column + s.loc[500] = np.nan + elif htable == ht.PyObjectHashTable: + # use different NaN types for object column + s.loc[500:502] = [np.nan, None, pd.NaT] + + # create duplicated selection + s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True) + s_duplicated.values.setflags(write=writable) + + # drop_duplicates has own cython code (hash_table_func_helper.pxi) + # and is tested separately; keeps first occurrence like ht.unique() + expected_unique = s_duplicated.drop_duplicates(keep="first").values + result_unique = htable().unique(s_duplicated.values) + tm.assert_numpy_array_equal(result_unique, expected_unique) + + # test return_inverse=True + # reconstruction can only succeed if the inverse is correct + result_unique, result_inverse = htable().unique( + s_duplicated.values, return_inverse=True + ) + tm.assert_numpy_array_equal(result_unique, expected_unique) + reconstr = result_unique[result_inverse] + tm.assert_numpy_array_equal(reconstr, s_duplicated.values) + + @pytest.mark.parametrize( + "htable, tm_dtype", + [ + (ht.PyObjectHashTable, "String"), + (ht.StringHashTable, "String"), + (ht.Float64HashTable, "Float"), + (ht.Int64HashTable, "Int"), + (ht.UInt64HashTable, "UInt"), + ], + ) + def test_hashtable_factorize(self, htable, tm_dtype, writable): + # output of maker has guaranteed unique elements + maker = getattr(tm, "make" + tm_dtype + "Index") + s = Series(maker(1000)) + if htable == ht.Float64HashTable: + # add NaN for float column + s.loc[500] = np.nan + elif htable == ht.PyObjectHashTable: + # use different NaN types for object column + s.loc[500:502] = [np.nan, None, pd.NaT] + + # create duplicated selection + s_duplicated = s.sample(frac=3, replace=True).reset_index(drop=True) + s_duplicated.values.setflags(write=writable) + na_mask = s_duplicated.isna().values + + result_unique, result_inverse = htable().factorize(s_duplicated.values) + + # drop_duplicates has own cython code (hash_table_func_helper.pxi) + # and is tested separately; keeps first occurrence like ht.factorize() + # since factorize removes all NaNs, we do the same here + expected_unique = s_duplicated.dropna().drop_duplicates().values + tm.assert_numpy_array_equal(result_unique, expected_unique) + + # reconstruction can only succeed if the inverse is correct. Since + # factorize removes the NaNs, those have to be excluded here as well + result_reconstruct = result_unique[result_inverse[~na_mask]] + expected_reconstruct = s_duplicated.dropna().values + tm.assert_numpy_array_equal(result_reconstruct, expected_reconstruct) + + @pytest.mark.parametrize( + "hashtable", + [ + ht.PyObjectHashTable, + ht.StringHashTable, + ht.Float64HashTable, + ht.Int64HashTable, + ht.UInt64HashTable, + ], + ) + def test_hashtable_large_sizehint(self, hashtable): + # GH 22729 + size_hint = np.iinfo(np.uint32).max + 1 + tbl = hashtable(size_hint=size_hint) # noqa + + +def test_quantile(): + s = Series(np.random.randn(100)) + + result = algos.quantile(s, [0, 0.25, 0.5, 0.75, 1.0]) + expected = algos.quantile(s.values, [0, 0.25, 0.5, 0.75, 1.0]) + tm.assert_almost_equal(result, expected) + + +def test_unique_label_indices(): + + a = np.random.randint(1, 1 << 10, 1 << 15).astype("i8") + + left = ht.unique_label_indices(a) + right = np.unique(a, return_index=True)[1] + + tm.assert_numpy_array_equal(left, right, check_dtype=False) + + a[np.random.choice(len(a), 10)] = -1 + left = ht.unique_label_indices(a) + right = np.unique(a, return_index=True)[1][1:] + tm.assert_numpy_array_equal(left, right, check_dtype=False) + + +class TestRank: + @td.skip_if_no_scipy + def test_scipy_compat(self): + from scipy.stats import rankdata + + def _check(arr): + mask = ~np.isfinite(arr) + arr = arr.copy() + result = libalgos.rank_1d(arr) + arr[mask] = np.inf + exp = rankdata(arr) + exp[mask] = np.nan + tm.assert_almost_equal(result, exp) + + _check(np.array([np.nan, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 3, np.nan])) + _check(np.array([4.0, np.nan, 5.0, 5.0, 5.0, np.nan, 1, 2, 4.0, np.nan])) + + def test_basic(self): + exp = np.array([1, 2], dtype=np.float64) + + for dtype in np.typecodes["AllInteger"]: + s = Series([1, 100], dtype=dtype) + tm.assert_numpy_array_equal(algos.rank(s), exp) + + def test_uint64_overflow(self): + exp = np.array([1, 2], dtype=np.float64) + + for dtype in [np.float64, np.uint64]: + s = Series([1, 2 ** 63], dtype=dtype) + tm.assert_numpy_array_equal(algos.rank(s), exp) + + def test_too_many_ndims(self): + arr = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]) + msg = "Array with ndim > 2 are not supported" + + with pytest.raises(TypeError, match=msg): + algos.rank(arr) + + @pytest.mark.single + @pytest.mark.high_memory + @pytest.mark.parametrize( + "values", + [np.arange(2 ** 24 + 1), np.arange(2 ** 25 + 2).reshape(2 ** 24 + 1, 2)], + ids=["1d", "2d"], + ) + def test_pct_max_many_rows(self, values): + # GH 18271 + result = algos.rank(values, pct=True).max() + assert result == 1 + + +def test_pad_backfill_object_segfault(): + + old = np.array([], dtype="O") + new = np.array([datetime(2010, 12, 31)], dtype="O") + + result = libalgos.pad["object"](old, new) + expected = np.array([-1], dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + result = libalgos.pad["object"](new, old) + expected = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + result = libalgos.backfill["object"](old, new) + expected = np.array([-1], dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + result = libalgos.backfill["object"](new, old) + expected = np.array([], dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + +class TestTseriesUtil: + def test_combineFunc(self): + pass + + def test_reindex(self): + pass + + def test_isna(self): + pass + + def test_groupby(self): + pass + + def test_groupby_withnull(self): + pass + + def test_backfill(self): + old = Index([1, 5, 10]) + new = Index(list(range(12))) + + filler = libalgos.backfill["int64_t"](old.values, new.values) + + expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.int64) + tm.assert_numpy_array_equal(filler, expect_filler) + + # corner case + old = Index([1, 4]) + new = Index(list(range(5, 10))) + filler = libalgos.backfill["int64_t"](old.values, new.values) + + expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) + tm.assert_numpy_array_equal(filler, expect_filler) + + def test_pad(self): + old = Index([1, 5, 10]) + new = Index(list(range(12))) + + filler = libalgos.pad["int64_t"](old.values, new.values) + + expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.int64) + tm.assert_numpy_array_equal(filler, expect_filler) + + # corner case + old = Index([5, 10]) + new = Index(np.arange(5)) + filler = libalgos.pad["int64_t"](old.values, new.values) + expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) + tm.assert_numpy_array_equal(filler, expect_filler) + + +def test_is_lexsorted(): + failure = [ + np.array( + [ + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + dtype="int64", + ), + np.array( + [ + 30, + 29, + 28, + 27, + 26, + 25, + 24, + 23, + 22, + 21, + 20, + 19, + 18, + 17, + 16, + 15, + 14, + 13, + 12, + 11, + 10, + 9, + 8, + 7, + 6, + 5, + 4, + 3, + 2, + 1, + 0, + 30, + 29, + 28, + 27, + 26, + 25, + 24, + 23, + 22, + 21, + 20, + 19, + 18, + 17, + 16, + 15, + 14, + 13, + 12, + 11, + 10, + 9, + 8, + 7, + 6, + 5, + 4, + 3, + 2, + 1, + 0, + 30, + 29, + 28, + 27, + 26, + 25, + 24, + 23, + 22, + 21, + 20, + 19, + 18, + 17, + 16, + 15, + 14, + 13, + 12, + 11, + 10, + 9, + 8, + 7, + 6, + 5, + 4, + 3, + 2, + 1, + 0, + 30, + 29, + 28, + 27, + 26, + 25, + 24, + 23, + 22, + 21, + 20, + 19, + 18, + 17, + 16, + 15, + 14, + 13, + 12, + 11, + 10, + 9, + 8, + 7, + 6, + 5, + 4, + 3, + 2, + 1, + 0, + ], + dtype="int64", + ), + ] + + assert not libalgos.is_lexsorted(failure) + + +def test_groupsort_indexer(): + a = np.random.randint(0, 1000, 100).astype(np.int64) + b = np.random.randint(0, 1000, 100).astype(np.int64) + + result = libalgos.groupsort_indexer(a, 1000)[0] + + # need to use a stable sort + # np.argsort returns int, groupsort_indexer + # always returns int64 + expected = np.argsort(a, kind="mergesort") + expected = expected.astype(np.int64) + + tm.assert_numpy_array_equal(result, expected) + + # compare with lexsort + # np.lexsort returns int, groupsort_indexer + # always returns int64 + key = a * 1000 + b + result = libalgos.groupsort_indexer(key, 1000000)[0] + expected = np.lexsort((b, a)) + expected = expected.astype(np.int64) + + tm.assert_numpy_array_equal(result, expected) + + +def test_infinity_sort(): + # GH 13445 + # numpy's argsort can be unhappy if something is less than + # itself. Instead, let's give our infinities a self-consistent + # ordering, but outside the float extended real line. + + Inf = libalgos.Infinity() + NegInf = libalgos.NegInfinity() + + ref_nums = [NegInf, float("-inf"), -1e100, 0, 1e100, float("inf"), Inf] + + assert all(Inf >= x for x in ref_nums) + assert all(Inf > x or x is Inf for x in ref_nums) + assert Inf >= Inf and Inf == Inf + assert not Inf < Inf and not Inf > Inf + assert libalgos.Infinity() == libalgos.Infinity() + assert not libalgos.Infinity() != libalgos.Infinity() + + assert all(NegInf <= x for x in ref_nums) + assert all(NegInf < x or x is NegInf for x in ref_nums) + assert NegInf <= NegInf and NegInf == NegInf + assert not NegInf < NegInf and not NegInf > NegInf + assert libalgos.NegInfinity() == libalgos.NegInfinity() + assert not libalgos.NegInfinity() != libalgos.NegInfinity() + + for perm in permutations(ref_nums): + assert sorted(perm) == ref_nums + + # smoke tests + np.array([libalgos.Infinity()] * 32).argsort() + np.array([libalgos.NegInfinity()] * 32).argsort() + + +def test_infinity_against_nan(): + Inf = libalgos.Infinity() + NegInf = libalgos.NegInfinity() + + assert not Inf > np.nan + assert not Inf >= np.nan + assert not Inf < np.nan + assert not Inf <= np.nan + assert not Inf == np.nan + assert Inf != np.nan + + assert not NegInf > np.nan + assert not NegInf >= np.nan + assert not NegInf < np.nan + assert not NegInf <= np.nan + assert not NegInf == np.nan + assert NegInf != np.nan + + +def test_ensure_platform_int(): + arr = np.arange(100, dtype=np.intp) + + result = libalgos.ensure_platform_int(arr) + assert result is arr + + +def test_int64_add_overflow(): + # see gh-14068 + msg = "Overflow in int64 addition" + m = np.iinfo(np.int64).max + n = np.iinfo(np.int64).min + + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr(np.array([m, m]), m) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr(np.array([m, m]), np.array([m, m])) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr(np.array([n, n]), n) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr(np.array([n, n]), np.array([n, n])) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr(np.array([m, n]), np.array([n, n])) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr( + np.array([m, m]), np.array([m, m]), arr_mask=np.array([False, True]) + ) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr( + np.array([m, m]), np.array([m, m]), b_mask=np.array([False, True]) + ) + with pytest.raises(OverflowError, match=msg): + algos.checked_add_with_arr( + np.array([m, m]), + np.array([m, m]), + arr_mask=np.array([False, True]), + b_mask=np.array([False, True]), + ) + with pytest.raises(OverflowError, match=msg): + with tm.assert_produces_warning(RuntimeWarning): + algos.checked_add_with_arr(np.array([m, m]), np.array([np.nan, m])) + + # Check that the nan boolean arrays override whether or not + # the addition overflows. We don't check the result but just + # the fact that an OverflowError is not raised. + algos.checked_add_with_arr( + np.array([m, m]), np.array([m, m]), arr_mask=np.array([True, True]) + ) + algos.checked_add_with_arr( + np.array([m, m]), np.array([m, m]), b_mask=np.array([True, True]) + ) + algos.checked_add_with_arr( + np.array([m, m]), + np.array([m, m]), + arr_mask=np.array([True, False]), + b_mask=np.array([False, True]), + ) + + +class TestMode: + def test_no_mode(self): + exp = Series([], dtype=np.float64) + tm.assert_series_equal(algos.mode([]), exp) + + def test_mode_single(self): + # GH 15714 + exp_single = [1] + data_single = [1] + + exp_multi = [1] + data_multi = [1, 1] + + for dt in np.typecodes["AllInteger"] + np.typecodes["Float"]: + s = Series(data_single, dtype=dt) + exp = Series(exp_single, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + s = Series(data_multi, dtype=dt) + exp = Series(exp_multi, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + exp = Series([1], dtype=np.int) + tm.assert_series_equal(algos.mode([1]), exp) + + exp = Series(["a", "b", "c"], dtype=np.object) + tm.assert_series_equal(algos.mode(["a", "b", "c"]), exp) + + def test_number_mode(self): + exp_single = [1] + data_single = [1] * 5 + [2] * 3 + + exp_multi = [1, 3] + data_multi = [1] * 5 + [2] * 3 + [3] * 5 + + for dt in np.typecodes["AllInteger"] + np.typecodes["Float"]: + s = Series(data_single, dtype=dt) + exp = Series(exp_single, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + s = Series(data_multi, dtype=dt) + exp = Series(exp_multi, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + def test_strobj_mode(self): + exp = ["b"] + data = ["a"] * 2 + ["b"] * 3 + + s = Series(data, dtype="c") + exp = Series(exp, dtype="c") + tm.assert_series_equal(algos.mode(s), exp) + + exp = ["bar"] + data = ["foo"] * 2 + ["bar"] * 3 + + for dt in [str, object]: + s = Series(data, dtype=dt) + exp = Series(exp, dtype=dt) + tm.assert_series_equal(algos.mode(s), exp) + + def test_datelike_mode(self): + exp = Series(["1900-05-03", "2011-01-03", "2013-01-02"], dtype="M8[ns]") + s = Series(["2011-01-03", "2013-01-02", "1900-05-03"], dtype="M8[ns]") + tm.assert_series_equal(algos.mode(s), exp) + + exp = Series(["2011-01-03", "2013-01-02"], dtype="M8[ns]") + s = Series( + ["2011-01-03", "2013-01-02", "1900-05-03", "2011-01-03", "2013-01-02"], + dtype="M8[ns]", + ) + tm.assert_series_equal(algos.mode(s), exp) + + def test_timedelta_mode(self): + exp = Series(["-1 days", "0 days", "1 days"], dtype="timedelta64[ns]") + s = Series(["1 days", "-1 days", "0 days"], dtype="timedelta64[ns]") + tm.assert_series_equal(algos.mode(s), exp) + + exp = Series(["2 min", "1 day"], dtype="timedelta64[ns]") + s = Series( + ["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"], + dtype="timedelta64[ns]", + ) + tm.assert_series_equal(algos.mode(s), exp) + + def test_mixed_dtype(self): + exp = Series(["foo"]) + s = Series([1, "foo", "foo"]) + tm.assert_series_equal(algos.mode(s), exp) + + def test_uint64_overflow(self): + exp = Series([2 ** 63], dtype=np.uint64) + s = Series([1, 2 ** 63, 2 ** 63], dtype=np.uint64) + tm.assert_series_equal(algos.mode(s), exp) + + exp = Series([1, 2 ** 63], dtype=np.uint64) + s = Series([1, 2 ** 63], dtype=np.uint64) + tm.assert_series_equal(algos.mode(s), exp) + + def test_categorical(self): + c = Categorical([1, 2]) + exp = c + tm.assert_categorical_equal(algos.mode(c), exp) + tm.assert_categorical_equal(c.mode(), exp) + + c = Categorical([1, "a", "a"]) + exp = Categorical(["a"], categories=[1, "a"]) + tm.assert_categorical_equal(algos.mode(c), exp) + tm.assert_categorical_equal(c.mode(), exp) + + c = Categorical([1, 1, 2, 3, 3]) + exp = Categorical([1, 3], categories=[1, 2, 3]) + tm.assert_categorical_equal(algos.mode(c), exp) + tm.assert_categorical_equal(c.mode(), exp) + + def test_index(self): + idx = Index([1, 2, 3]) + exp = Series([1, 2, 3], dtype=np.int64) + tm.assert_series_equal(algos.mode(idx), exp) + + idx = Index([1, "a", "a"]) + exp = Series(["a"], dtype=object) + tm.assert_series_equal(algos.mode(idx), exp) + + idx = Index([1, 1, 2, 3, 3]) + exp = Series([1, 3], dtype=np.int64) + tm.assert_series_equal(algos.mode(idx), exp) + + exp = Series(["2 min", "1 day"], dtype="timedelta64[ns]") + idx = Index( + ["1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min"], + dtype="timedelta64[ns]", + ) + tm.assert_series_equal(algos.mode(idx), exp) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py new file mode 100644 index 00000000..a8a0fcea --- /dev/null +++ b/pandas/tests/test_common.py @@ -0,0 +1,131 @@ +import collections +from distutils.version import LooseVersion +from functools import partial +import string + +import numpy as np +import pytest + +import pandas as pd +from pandas import Series, Timestamp +from pandas.core import ops +import pandas.core.common as com + + +def test_get_callable_name(): + getname = com.get_callable_name + + def fn(x): + return x + + lambda_ = lambda x: x # noqa: E731 + part1 = partial(fn) + part2 = partial(part1) + + class somecall: + def __call__(self): + return x # noqa + + assert getname(fn) == "fn" + assert getname(lambda_) + assert getname(part1) == "fn" + assert getname(part2) == "fn" + assert getname(somecall()) == "somecall" + assert getname(1) is None + + +def test_any_none(): + assert com.any_none(1, 2, 3, None) + assert not com.any_none(1, 2, 3, 4) + + +def test_all_not_none(): + assert com.all_not_none(1, 2, 3, 4) + assert not com.all_not_none(1, 2, 3, None) + assert not com.all_not_none(None, None, None, None) + + +def test_random_state(): + import numpy.random as npr + + # Check with seed + state = com.random_state(5) + assert state.uniform() == npr.RandomState(5).uniform() + + # Check with random state object + state2 = npr.RandomState(10) + assert com.random_state(state2).uniform() == npr.RandomState(10).uniform() + + # check with no arg random state + assert com.random_state() is np.random + + # Error for floats or strings + with pytest.raises(ValueError): + com.random_state("test") + + with pytest.raises(ValueError): + com.random_state(5.5) + + +@pytest.mark.parametrize( + "left, right, expected", + [ + (Series([1], name="x"), Series([2], name="x"), "x"), + (Series([1], name="x"), Series([2], name="y"), None), + (Series([1]), Series([2], name="x"), None), + (Series([1], name="x"), Series([2]), None), + (Series([1], name="x"), [2], "x"), + ([1], Series([2], name="y"), "y"), + ], +) +def test_maybe_match_name(left, right, expected): + assert ops._maybe_match_name(left, right) == expected + + +def test_dict_compat(): + data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2} + data_unchanged = {1: 2, 3: 4, 5: 6} + expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2} + assert com.dict_compat(data_datetime64) == expected + assert com.dict_compat(expected) == expected + assert com.dict_compat(data_unchanged) == data_unchanged + + +def test_standardize_mapping(): + # No uninitialized defaultdicts + with pytest.raises(TypeError): + com.standardize_mapping(collections.defaultdict) + + # No non-mapping subtypes, instance + with pytest.raises(TypeError): + com.standardize_mapping([]) + + # No non-mapping subtypes, class + with pytest.raises(TypeError): + com.standardize_mapping(list) + + fill = {"bad": "data"} + assert com.standardize_mapping(fill) == dict + + # Convert instance to type + assert com.standardize_mapping({}) == dict + + dd = collections.defaultdict(list) + assert isinstance(com.standardize_mapping(dd), partial) + + +def test_git_version(): + # GH 21295 + git_version = pd.__git_version__ + assert len(git_version) == 40 + assert all(c in string.hexdigits for c in git_version) + + +def test_version_tag(): + version = pd.__version__ + try: + version > LooseVersion("0.0.1") + except TypeError: + raise ValueError( + "No git tags exist, please sync tags between upstream and your repo" + ) diff --git a/pandas/tests/test_compat.py b/pandas/tests/test_compat.py new file mode 100644 index 00000000..4ff8b0b3 --- /dev/null +++ b/pandas/tests/test_compat.py @@ -0,0 +1,3 @@ +""" +Testing that functions from compat work as expected +""" diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py new file mode 100644 index 00000000..9f473d1a --- /dev/null +++ b/pandas/tests/test_downstream.py @@ -0,0 +1,180 @@ +""" +Testing that we work in the downstream packages +""" +import importlib +import subprocess +import sys + +import numpy as np # noqa +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame, Series +import pandas._testing as tm + + +def import_module(name): + # we *only* want to skip if the module is truly not available + # and NOT just an actual import error because of pandas changes + + try: + return importlib.import_module(name) + except ModuleNotFoundError: # noqa + pytest.skip("skipping as {} not available".format(name)) + + +@pytest.fixture +def df(): + return DataFrame({"A": [1, 2, 3]}) + + +def test_dask(df): + + toolz = import_module("toolz") # noqa + dask = import_module("dask") # noqa + + import dask.dataframe as dd + + ddf = dd.from_pandas(df, npartitions=3) + assert ddf.A is not None + assert ddf.compute() is not None + + +@pytest.mark.filterwarnings("ignore:Panel class is removed") +def test_xarray(df): + + xarray = import_module("xarray") # noqa + + assert df.to_xarray() is not None + + +@td.skip_if_no("cftime") +@td.skip_if_no("xarray", "0.10.4") +def test_xarray_cftimeindex_nearest(): + # https://github.com/pydata/xarray/issues/3751 + import cftime + import xarray + + times = xarray.cftime_range("0001", periods=2) + result = times.get_loc(cftime.DatetimeGregorian(2000, 1, 1), method="nearest") + expected = 1 + assert result == expected + + +def test_oo_optimizable(): + # GH 21071 + subprocess.check_call([sys.executable, "-OO", "-c", "import pandas"]) + + +@tm.network +# Cython import warning +@pytest.mark.filterwarnings("ignore:can't:ImportWarning") +@pytest.mark.filterwarnings( + # patsy needs to update their imports + "ignore:Using or importing the ABCs from 'collections:DeprecationWarning" +) +def test_statsmodels(): + + statsmodels = import_module("statsmodels") # noqa + import statsmodels.api as sm + import statsmodels.formula.api as smf + + df = sm.datasets.get_rdataset("Guerry", "HistData").data + smf.ols("Lottery ~ Literacy + np.log(Pop1831)", data=df).fit() + + +# Cython import warning +@pytest.mark.filterwarnings("ignore:can't:ImportWarning") +def test_scikit_learn(df): + + sklearn = import_module("sklearn") # noqa + from sklearn import svm, datasets + + digits = datasets.load_digits() + clf = svm.SVC(gamma=0.001, C=100.0) + clf.fit(digits.data[:-1], digits.target[:-1]) + clf.predict(digits.data[-1:]) + + +# Cython import warning and traitlets +@tm.network +@pytest.mark.filterwarnings("ignore") +def test_seaborn(): + + seaborn = import_module("seaborn") + tips = seaborn.load_dataset("tips") + seaborn.stripplot(x="day", y="total_bill", data=tips) + + +def test_pandas_gbq(df): + + pandas_gbq = import_module("pandas_gbq") # noqa + + +@pytest.mark.xfail(reason="0.7.0 pending") +@tm.network +def test_pandas_datareader(): + + pandas_datareader = import_module("pandas_datareader") # noqa + pandas_datareader.DataReader("F", "quandl", "2017-01-01", "2017-02-01") + + +# importing from pandas, Cython import warning +@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") +@pytest.mark.skip(reason="Anaconda installation issue - GH32144") +def test_geopandas(): + + geopandas = import_module("geopandas") # noqa + fp = geopandas.datasets.get_path("naturalearth_lowres") + assert geopandas.read_file(fp) is not None + + +def test_geopandas_coordinate_indexer(): + # this test is included to have coverage of one case in the indexing.py + # code that is only kept for compatibility with geopandas, see + # https://github.com/pandas-dev/pandas/issues/27258 + # We should be able to remove this after some time when its usage is + # removed in geopandas + from pandas.core.indexing import _NDFrameIndexer + + class _CoordinateIndexer(_NDFrameIndexer): + def _getitem_tuple(self, tup): + obj = self.obj + xs, ys = tup + return obj[xs][ys] + + Series._create_indexer("cx", _CoordinateIndexer) + s = Series(range(5)) + res = s.cx[:, :] + tm.assert_series_equal(s, res) + + +# Cython import warning +@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") +@pytest.mark.filterwarnings("ignore:RangeIndex.* is deprecated:DeprecationWarning") +def test_pyarrow(df): + + pyarrow = import_module("pyarrow") # noqa + table = pyarrow.Table.from_pandas(df) + result = table.to_pandas() + tm.assert_frame_equal(result, df) + + +@pytest.mark.xfail(reason="pandas-wheels-50", strict=False) +def test_missing_required_dependency(): + # GH 23868 + # To ensure proper isolation, we pass these flags + # -S : disable site-packages + # -s : disable user site-packages + # -E : disable PYTHON* env vars, especially PYTHONPATH + # And, that's apparently not enough, so we give up. + # https://github.com/MacPython/pandas-wheels/pull/50 + call = ["python", "-sSE", "-c", "import pandas"] + + with pytest.raises(subprocess.CalledProcessError) as exc: + subprocess.check_output(call, stderr=subprocess.STDOUT) + + output = exc.value.stdout.decode() + for name in ["numpy", "pytz", "dateutil"]: + assert name in output diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py new file mode 100644 index 00000000..fa214244 --- /dev/null +++ b/pandas/tests/test_errors.py @@ -0,0 +1,66 @@ +import pytest + +from pandas.errors import AbstractMethodError + +import pandas as pd # noqa + + +@pytest.mark.parametrize( + "exc", + [ + "UnsupportedFunctionCall", + "UnsortedIndexError", + "OutOfBoundsDatetime", + "ParserError", + "PerformanceWarning", + "DtypeWarning", + "EmptyDataError", + "ParserWarning", + "MergeError", + ], +) +def test_exception_importable(exc): + from pandas import errors + + e = getattr(errors, exc) + assert e is not None + + # check that we can raise on them + with pytest.raises(e): + raise e() + + +def test_catch_oob(): + from pandas import errors + + try: + pd.Timestamp("15000101") + except errors.OutOfBoundsDatetime: + pass + + +class Foo: + @classmethod + def classmethod(cls): + raise AbstractMethodError(cls, methodtype="classmethod") + + @property + def property(self): + raise AbstractMethodError(self, methodtype="property") + + def method(self): + raise AbstractMethodError(self) + + +def test_AbstractMethodError_classmethod(): + xpr = "This classmethod must be defined in the concrete class Foo" + with pytest.raises(AbstractMethodError, match=xpr): + Foo.classmethod() + + xpr = "This property must be defined in the concrete class Foo" + with pytest.raises(AbstractMethodError, match=xpr): + Foo().property + + xpr = "This method must be defined in the concrete class Foo" + with pytest.raises(AbstractMethodError, match=xpr): + Foo().method() diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py new file mode 100644 index 00000000..fadab5d8 --- /dev/null +++ b/pandas/tests/test_expressions.py @@ -0,0 +1,384 @@ +import operator +import re + +import numpy as np +from numpy.random import randn +import pytest + +import pandas._testing as tm +from pandas.core.api import DataFrame +from pandas.core.computation import expressions as expr + +_frame = DataFrame(randn(10000, 4), columns=list("ABCD"), dtype="float64") +_frame2 = DataFrame(randn(100, 4), columns=list("ABCD"), dtype="float64") +_mixed = DataFrame( + { + "A": _frame["A"].copy(), + "B": _frame["B"].astype("float32"), + "C": _frame["C"].astype("int64"), + "D": _frame["D"].astype("int32"), + } +) +_mixed2 = DataFrame( + { + "A": _frame2["A"].copy(), + "B": _frame2["B"].astype("float32"), + "C": _frame2["C"].astype("int64"), + "D": _frame2["D"].astype("int32"), + } +) +_integer = DataFrame( + np.random.randint(1, 100, size=(10001, 4)), columns=list("ABCD"), dtype="int64" +) +_integer2 = DataFrame( + np.random.randint(1, 100, size=(101, 4)), columns=list("ABCD"), dtype="int64" +) + + +@pytest.mark.skipif(not expr._USE_NUMEXPR, reason="not using numexpr") +class TestExpressions: + def setup_method(self, method): + + self.frame = _frame.copy() + self.frame2 = _frame2.copy() + self.mixed = _mixed.copy() + self.mixed2 = _mixed2.copy() + self._MIN_ELEMENTS = expr._MIN_ELEMENTS + + def teardown_method(self, method): + expr._MIN_ELEMENTS = self._MIN_ELEMENTS + + def run_arithmetic(self, df, other): + expr._MIN_ELEMENTS = 0 + operations = ["add", "sub", "mul", "mod", "truediv", "floordiv"] + for test_flex in [True, False]: + for arith in operations: + # TODO: share with run_binary + if test_flex: + op = lambda x, y: getattr(x, arith)(y) + op.__name__ = arith + else: + op = getattr(operator, arith) + expr.set_use_numexpr(False) + expected = op(df, other) + expr.set_use_numexpr(True) + + result = op(df, other) + if arith == "truediv": + if expected.ndim == 1: + assert expected.dtype.kind == "f" + else: + assert all(x.kind == "f" for x in expected.dtypes.values) + tm.assert_equal(expected, result) + + def run_binary(self, df, other): + """ + tests solely that the result is the same whether or not numexpr is + enabled. Need to test whether the function does the correct thing + elsewhere. + """ + expr._MIN_ELEMENTS = 0 + expr.set_test_mode(True) + operations = ["gt", "lt", "ge", "le", "eq", "ne"] + + for test_flex in [True, False]: + for arith in operations: + if test_flex: + op = lambda x, y: getattr(x, arith)(y) + op.__name__ = arith + else: + op = getattr(operator, arith) + expr.set_use_numexpr(False) + expected = op(df, other) + expr.set_use_numexpr(True) + + expr.get_test_result() + result = op(df, other) + used_numexpr = expr.get_test_result() + assert used_numexpr, "Did not use numexpr as expected." + tm.assert_equal(expected, result) + + def run_frame(self, df, other, run_binary=True): + self.run_arithmetic(df, other) + if run_binary: + expr.set_use_numexpr(False) + binary_comp = other + 1 + expr.set_use_numexpr(True) + self.run_binary(df, binary_comp) + + for i in range(len(df.columns)): + self.run_arithmetic(df.iloc[:, i], other.iloc[:, i]) + # FIXME: dont leave commented-out + # series doesn't uses vec_compare instead of numexpr... + # binary_comp = other.iloc[:, i] + 1 + # self.run_binary(df.iloc[:, i], binary_comp) + + @pytest.mark.parametrize( + "df", + [ + _integer, + _integer2, + # randint to get a case with zeros + _integer * np.random.randint(0, 2, size=np.shape(_integer)), + _frame, + _frame2, + _mixed, + _mixed2, + ], + ) + def test_arithmetic(self, df): + # TODO: FIGURE OUT HOW TO GET RUN_BINARY TO WORK WITH MIXED=... + # can't do arithmetic because comparison methods try to do *entire* + # frame instead of by-column + kinds = {x.kind for x in df.dtypes.values} + should = len(kinds) == 1 + + self.run_frame(df, df, run_binary=should) + + def test_invalid(self): + + # no op + result = expr._can_use_numexpr( + operator.add, None, self.frame, self.frame, "evaluate" + ) + assert not result + + # mixed + result = expr._can_use_numexpr( + operator.add, "+", self.mixed, self.frame, "evaluate" + ) + assert not result + + # min elements + result = expr._can_use_numexpr( + operator.add, "+", self.frame2, self.frame2, "evaluate" + ) + assert not result + + # ok, we only check on first part of expression + result = expr._can_use_numexpr( + operator.add, "+", self.frame, self.frame2, "evaluate" + ) + assert result + + @pytest.mark.parametrize( + "opname,op_str", + [("add", "+"), ("sub", "-"), ("mul", "*"), ("truediv", "/"), ("pow", "**")], + ) + @pytest.mark.parametrize("left,right", [(_frame, _frame2), (_mixed, _mixed2)]) + def test_binary_ops(self, opname, op_str, left, right): + def testit(): + + if opname == "pow": + # TODO: get this working + return + + op = getattr(operator, opname) + + result = expr._can_use_numexpr(op, op_str, left, left, "evaluate") + assert result != left._is_mixed_type + + result = expr.evaluate(op, op_str, left, left, use_numexpr=True) + expected = expr.evaluate(op, op_str, left, left, use_numexpr=False) + + if isinstance(result, DataFrame): + tm.assert_frame_equal(result, expected) + else: + tm.assert_numpy_array_equal(result, expected.values) + + result = expr._can_use_numexpr(op, op_str, right, right, "evaluate") + assert not result + + expr.set_use_numexpr(False) + testit() + expr.set_use_numexpr(True) + expr.set_numexpr_threads(1) + testit() + expr.set_numexpr_threads() + testit() + + @pytest.mark.parametrize( + "opname,op_str", + [ + ("gt", ">"), + ("lt", "<"), + ("ge", ">="), + ("le", "<="), + ("eq", "=="), + ("ne", "!="), + ], + ) + @pytest.mark.parametrize("left,right", [(_frame, _frame2), (_mixed, _mixed2)]) + def test_comparison_ops(self, opname, op_str, left, right): + def testit(): + f12 = left + 1 + f22 = right + 1 + + op = getattr(operator, opname) + + result = expr._can_use_numexpr(op, op_str, left, f12, "evaluate") + assert result != left._is_mixed_type + + result = expr.evaluate(op, op_str, left, f12, use_numexpr=True) + expected = expr.evaluate(op, op_str, left, f12, use_numexpr=False) + if isinstance(result, DataFrame): + tm.assert_frame_equal(result, expected) + else: + tm.assert_numpy_array_equal(result, expected.values) + + result = expr._can_use_numexpr(op, op_str, right, f22, "evaluate") + assert not result + + expr.set_use_numexpr(False) + testit() + expr.set_use_numexpr(True) + expr.set_numexpr_threads(1) + testit() + expr.set_numexpr_threads() + testit() + + @pytest.mark.parametrize("cond", [True, False]) + @pytest.mark.parametrize("df", [_frame, _frame2, _mixed, _mixed2]) + def test_where(self, cond, df): + def testit(): + c = np.empty(df.shape, dtype=np.bool_) + c.fill(cond) + result = expr.where(c, df.values, df.values + 1) + expected = np.where(c, df.values, df.values + 1) + tm.assert_numpy_array_equal(result, expected) + + expr.set_use_numexpr(False) + testit() + expr.set_use_numexpr(True) + expr.set_numexpr_threads(1) + testit() + expr.set_numexpr_threads() + testit() + + @pytest.mark.parametrize( + "op_str,opname", [("/", "truediv"), ("//", "floordiv"), ("**", "pow")] + ) + def test_bool_ops_raise_on_arithmetic(self, op_str, opname): + df = DataFrame({"a": np.random.rand(10) > 0.5, "b": np.random.rand(10) > 0.5}) + + msg = f"operator {repr(op_str)} not implemented for bool dtypes" + f = getattr(operator, opname) + err_msg = re.escape(msg) + + with pytest.raises(NotImplementedError, match=err_msg): + f(df, df) + + with pytest.raises(NotImplementedError, match=err_msg): + f(df.a, df.b) + + with pytest.raises(NotImplementedError, match=err_msg): + f(df.a, True) + + with pytest.raises(NotImplementedError, match=err_msg): + f(False, df.a) + + with pytest.raises(NotImplementedError, match=err_msg): + f(False, df) + + with pytest.raises(NotImplementedError, match=err_msg): + f(df, True) + + @pytest.mark.parametrize( + "op_str,opname", [("+", "add"), ("*", "mul"), ("-", "sub")] + ) + def test_bool_ops_warn_on_arithmetic(self, op_str, opname): + n = 10 + df = DataFrame({"a": np.random.rand(n) > 0.5, "b": np.random.rand(n) > 0.5}) + + subs = {"+": "|", "*": "&", "-": "^"} + sub_funcs = {"|": "or_", "&": "and_", "^": "xor"} + + f = getattr(operator, opname) + fe = getattr(operator, sub_funcs[subs[op_str]]) + + if op_str == "-": + # raises TypeError + return + + with tm.use_numexpr(True, min_elements=5): + with tm.assert_produces_warning(check_stacklevel=False): + r = f(df, df) + e = fe(df, df) + tm.assert_frame_equal(r, e) + + with tm.assert_produces_warning(check_stacklevel=False): + r = f(df.a, df.b) + e = fe(df.a, df.b) + tm.assert_series_equal(r, e) + + with tm.assert_produces_warning(check_stacklevel=False): + r = f(df.a, True) + e = fe(df.a, True) + tm.assert_series_equal(r, e) + + with tm.assert_produces_warning(check_stacklevel=False): + r = f(False, df.a) + e = fe(False, df.a) + tm.assert_series_equal(r, e) + + with tm.assert_produces_warning(check_stacklevel=False): + r = f(False, df) + e = fe(False, df) + tm.assert_frame_equal(r, e) + + with tm.assert_produces_warning(check_stacklevel=False): + r = f(df, True) + e = fe(df, True) + tm.assert_frame_equal(r, e) + + @pytest.mark.parametrize( + "test_input,expected", + [ + ( + DataFrame( + [[0, 1, 2, "aa"], [0, 1, 2, "aa"]], columns=["a", "b", "c", "dtype"] + ), + DataFrame([[False, False], [False, False]], columns=["a", "dtype"]), + ), + ( + DataFrame( + [[0, 3, 2, "aa"], [0, 4, 2, "aa"], [0, 1, 1, "bb"]], + columns=["a", "b", "c", "dtype"], + ), + DataFrame( + [[False, False], [False, False], [False, False]], + columns=["a", "dtype"], + ), + ), + ], + ) + def test_bool_ops_column_name_dtype(self, test_input, expected): + # GH 22383 - .ne fails if columns containing column name 'dtype' + result = test_input.loc[:, ["a", "dtype"]].ne(test_input.loc[:, ["a", "dtype"]]) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "arith", ("add", "sub", "mul", "mod", "truediv", "floordiv") + ) + @pytest.mark.parametrize("axis", (0, 1)) + def test_frame_series_axis(self, axis, arith): + # GH#26736 Dataframe.floordiv(Series, axis=1) fails + if axis == 1 and arith == "floordiv": + pytest.xfail("'floordiv' does not succeed with axis=1 #27636") + + df = self.frame + if axis == 1: + other = self.frame.iloc[0, :] + else: + other = self.frame.iloc[:, 0] + + expr._MIN_ELEMENTS = 0 + + op_func = getattr(df, arith) + + expr.set_use_numexpr(False) + expected = op_func(other, axis=axis) + expr.set_use_numexpr(True) + + result = op_func(other, axis=axis) + tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py new file mode 100644 index 00000000..129dc275 --- /dev/null +++ b/pandas/tests/test_join.py @@ -0,0 +1,346 @@ +import numpy as np +import pytest + +from pandas._libs import join as _join + +from pandas import Categorical, DataFrame, Index, merge +import pandas._testing as tm + + +class TestIndexer: + @pytest.mark.parametrize( + "dtype", ["int32", "int64", "float32", "float64", "object"] + ) + def test_outer_join_indexer(self, dtype): + indexer = _join.outer_join_indexer + + left = np.arange(3, dtype=dtype) + right = np.arange(2, 5, dtype=dtype) + empty = np.array([], dtype=dtype) + + result, lindexer, rindexer = indexer(left, right) + assert isinstance(result, np.ndarray) + assert isinstance(lindexer, np.ndarray) + assert isinstance(rindexer, np.ndarray) + tm.assert_numpy_array_equal(result, np.arange(5, dtype=dtype)) + exp = np.array([0, 1, 2, -1, -1], dtype=np.int64) + tm.assert_numpy_array_equal(lindexer, exp) + exp = np.array([-1, -1, 0, 1, 2], dtype=np.int64) + tm.assert_numpy_array_equal(rindexer, exp) + + result, lindexer, rindexer = indexer(empty, right) + tm.assert_numpy_array_equal(result, right) + exp = np.array([-1, -1, -1], dtype=np.int64) + tm.assert_numpy_array_equal(lindexer, exp) + exp = np.array([0, 1, 2], dtype=np.int64) + tm.assert_numpy_array_equal(rindexer, exp) + + result, lindexer, rindexer = indexer(left, empty) + tm.assert_numpy_array_equal(result, left) + exp = np.array([0, 1, 2], dtype=np.int64) + tm.assert_numpy_array_equal(lindexer, exp) + exp = np.array([-1, -1, -1], dtype=np.int64) + tm.assert_numpy_array_equal(rindexer, exp) + + +def test_left_join_indexer_unique(): + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) + b = np.array([2, 2, 3, 4, 4], dtype=np.int64) + + result = _join.left_join_indexer_unique(b, a) + expected = np.array([1, 1, 2, 3, 3], dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + +def test_left_outer_join_bug(): + left = np.array( + [ + 0, + 1, + 0, + 1, + 1, + 2, + 3, + 1, + 0, + 2, + 1, + 2, + 0, + 1, + 1, + 2, + 3, + 2, + 3, + 2, + 1, + 1, + 3, + 0, + 3, + 2, + 3, + 0, + 0, + 2, + 3, + 2, + 0, + 3, + 1, + 3, + 0, + 1, + 3, + 0, + 0, + 1, + 0, + 3, + 1, + 0, + 1, + 0, + 1, + 1, + 0, + 2, + 2, + 2, + 2, + 2, + 0, + 3, + 1, + 2, + 0, + 0, + 3, + 1, + 3, + 2, + 2, + 0, + 1, + 3, + 0, + 2, + 3, + 2, + 3, + 3, + 2, + 3, + 3, + 1, + 3, + 2, + 0, + 0, + 3, + 1, + 1, + 1, + 0, + 2, + 3, + 3, + 1, + 2, + 0, + 3, + 1, + 2, + 0, + 2, + ], + dtype=np.int64, + ) + + right = np.array([3, 1], dtype=np.int64) + max_groups = 4 + + lidx, ridx = _join.left_outer_join(left, right, max_groups, sort=False) + + exp_lidx = np.arange(len(left), dtype=np.int64) + exp_ridx = -np.ones(len(left), dtype=np.int64) + + exp_ridx[left == 1] = 1 + exp_ridx[left == 3] = 0 + + tm.assert_numpy_array_equal(lidx, exp_lidx) + tm.assert_numpy_array_equal(ridx, exp_ridx) + + +def test_inner_join_indexer(): + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) + b = np.array([0, 3, 5, 7, 9], dtype=np.int64) + + index, ares, bres = _join.inner_join_indexer(a, b) + + index_exp = np.array([3, 5], dtype=np.int64) + tm.assert_almost_equal(index, index_exp) + + aexp = np.array([2, 4], dtype=np.int64) + bexp = np.array([1, 2], dtype=np.int64) + tm.assert_almost_equal(ares, aexp) + tm.assert_almost_equal(bres, bexp) + + a = np.array([5], dtype=np.int64) + b = np.array([5], dtype=np.int64) + + index, ares, bres = _join.inner_join_indexer(a, b) + tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) + tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) + tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) + + +def test_outer_join_indexer(): + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) + b = np.array([0, 3, 5, 7, 9], dtype=np.int64) + + index, ares, bres = _join.outer_join_indexer(a, b) + + index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64) + tm.assert_almost_equal(index, index_exp) + + aexp = np.array([-1, 0, 1, 2, 3, 4, -1, -1], dtype=np.int64) + bexp = np.array([0, -1, -1, 1, -1, 2, 3, 4], dtype=np.int64) + tm.assert_almost_equal(ares, aexp) + tm.assert_almost_equal(bres, bexp) + + a = np.array([5], dtype=np.int64) + b = np.array([5], dtype=np.int64) + + index, ares, bres = _join.outer_join_indexer(a, b) + tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) + tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) + tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) + + +def test_left_join_indexer(): + a = np.array([1, 2, 3, 4, 5], dtype=np.int64) + b = np.array([0, 3, 5, 7, 9], dtype=np.int64) + + index, ares, bres = _join.left_join_indexer(a, b) + + tm.assert_almost_equal(index, a) + + aexp = np.array([0, 1, 2, 3, 4], dtype=np.int64) + bexp = np.array([-1, -1, 1, -1, 2], dtype=np.int64) + tm.assert_almost_equal(ares, aexp) + tm.assert_almost_equal(bres, bexp) + + a = np.array([5], dtype=np.int64) + b = np.array([5], dtype=np.int64) + + index, ares, bres = _join.left_join_indexer(a, b) + tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64)) + tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64)) + tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64)) + + +def test_left_join_indexer2(): + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + + res, lidx, ridx = _join.left_join_indexer(idx2.values, idx.values) + + exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) + tm.assert_almost_equal(res, exp_res) + + exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64) + tm.assert_almost_equal(lidx, exp_lidx) + + exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) + tm.assert_almost_equal(ridx, exp_ridx) + + +def test_outer_join_indexer2(): + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + + res, lidx, ridx = _join.outer_join_indexer(idx2.values, idx.values) + + exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64) + tm.assert_almost_equal(res, exp_res) + + exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64) + tm.assert_almost_equal(lidx, exp_lidx) + + exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64) + tm.assert_almost_equal(ridx, exp_ridx) + + +def test_inner_join_indexer2(): + idx = Index([1, 1, 2, 5]) + idx2 = Index([1, 2, 5, 7, 9]) + + res, lidx, ridx = _join.inner_join_indexer(idx2.values, idx.values) + + exp_res = np.array([1, 1, 2, 5], dtype=np.int64) + tm.assert_almost_equal(res, exp_res) + + exp_lidx = np.array([0, 0, 1, 2], dtype=np.int64) + tm.assert_almost_equal(lidx, exp_lidx) + + exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64) + tm.assert_almost_equal(ridx, exp_ridx) + + +def test_merge_join_categorical_multiindex(): + # From issue 16627 + a = { + "Cat1": Categorical(["a", "b", "a", "c", "a", "b"], ["a", "b", "c"]), + "Int1": [0, 1, 0, 1, 0, 0], + } + a = DataFrame(a) + + b = { + "Cat": Categorical(["a", "b", "c", "a", "b", "c"], ["a", "b", "c"]), + "Int": [0, 0, 0, 1, 1, 1], + "Factor": [1.1, 1.2, 1.3, 1.4, 1.5, 1.6], + } + b = DataFrame(b).set_index(["Cat", "Int"])["Factor"] + + expected = merge( + a, + b.reset_index(), + left_on=["Cat1", "Int1"], + right_on=["Cat", "Int"], + how="left", + ) + result = a.join(b, on=["Cat1", "Int1"]) + expected = expected.drop(["Cat", "Int"], axis=1) + tm.assert_frame_equal(expected, result) + + # Same test, but with ordered categorical + a = { + "Cat1": Categorical( + ["a", "b", "a", "c", "a", "b"], ["b", "a", "c"], ordered=True + ), + "Int1": [0, 1, 0, 1, 0, 0], + } + a = DataFrame(a) + + b = { + "Cat": Categorical( + ["a", "b", "c", "a", "b", "c"], ["b", "a", "c"], ordered=True + ), + "Int": [0, 0, 0, 1, 1, 1], + "Factor": [1.1, 1.2, 1.3, 1.4, 1.5, 1.6], + } + b = DataFrame(b).set_index(["Cat", "Int"])["Factor"] + + expected = merge( + a, + b.reset_index(), + left_on=["Cat1", "Int1"], + right_on=["Cat", "Int"], + how="left", + ) + result = a.join(b, on=["Cat1", "Int1"]) + expected = expected.drop(["Cat", "Int"], axis=1) + tm.assert_frame_equal(expected, result) diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py new file mode 100644 index 00000000..f839aa19 --- /dev/null +++ b/pandas/tests/test_lib.py @@ -0,0 +1,196 @@ +import numpy as np +import pytest + +from pandas._libs import lib, writers as libwriters + +from pandas import Index +import pandas._testing as tm + + +class TestMisc: + def test_max_len_string_array(self): + + arr = a = np.array(["foo", "b", np.nan], dtype="object") + assert libwriters.max_len_string_array(arr) == 3 + + # unicode + arr = a.astype("U").astype(object) + assert libwriters.max_len_string_array(arr) == 3 + + # bytes for python3 + arr = a.astype("S").astype(object) + assert libwriters.max_len_string_array(arr) == 3 + + # raises + with pytest.raises(TypeError): + libwriters.max_len_string_array(arr.astype("U")) + + def test_fast_unique_multiple_list_gen_sort(self): + keys = [["p", "a"], ["n", "d"], ["a", "s"]] + + gen = (key for key in keys) + expected = np.array(["a", "d", "n", "p", "s"]) + out = lib.fast_unique_multiple_list_gen(gen, sort=True) + tm.assert_numpy_array_equal(np.array(out), expected) + + gen = (key for key in keys) + expected = np.array(["p", "a", "n", "d", "s"]) + out = lib.fast_unique_multiple_list_gen(gen, sort=False) + tm.assert_numpy_array_equal(np.array(out), expected) + + +class TestIndexing: + def test_maybe_indices_to_slice_left_edge(self): + target = np.arange(100) + + # slice + indices = np.array([], dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + for end in [1, 2, 5, 20, 99]: + for step in [1, 2, 4]: + indices = np.arange(0, end, step, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # reverse + indices = indices[::-1] + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # not slice + for case in [[2, 1, 2, 0], [2, 2, 1, 0], [0, 1, 2, 1], [-2, 0, 2], [2, 0, -2]]: + indices = np.array(case, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + def test_maybe_indices_to_slice_right_edge(self): + target = np.arange(100) + + # slice + for start in [0, 2, 5, 20, 97, 98]: + for step in [1, 2, 4]: + indices = np.arange(start, 99, step, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # reverse + indices = indices[::-1] + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # not slice + indices = np.array([97, 98, 99, 100], dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + + with pytest.raises(IndexError): + target[indices] + with pytest.raises(IndexError): + target[maybe_slice] + + indices = np.array([100, 99, 98, 97], dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + + with pytest.raises(IndexError): + target[indices] + with pytest.raises(IndexError): + target[maybe_slice] + + for case in [[99, 97, 99, 96], [99, 99, 98, 97], [98, 98, 97, 96]]: + indices = np.array(case, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + def test_maybe_indices_to_slice_both_edges(self): + target = np.arange(10) + + # slice + for step in [1, 2, 4, 5, 8, 9]: + indices = np.arange(0, 9, step, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # reverse + indices = indices[::-1] + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # not slice + for case in [[4, 2, 0, -2], [2, 2, 1, 0], [0, 1, 2, 1]]: + indices = np.array(case, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + def test_maybe_indices_to_slice_middle(self): + target = np.arange(100) + + # slice + for start, end in [(2, 10), (5, 25), (65, 97)]: + for step in [1, 2, 4, 20]: + indices = np.arange(start, end, step, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # reverse + indices = indices[::-1] + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + # not slice + for case in [[14, 12, 10, 12], [12, 12, 11, 10], [10, 11, 12, 11]]: + indices = np.array(case, dtype=np.int64) + maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) + + assert not isinstance(maybe_slice, slice) + tm.assert_numpy_array_equal(maybe_slice, indices) + tm.assert_numpy_array_equal(target[indices], target[maybe_slice]) + + def test_maybe_booleans_to_slice(self): + arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8) + result = lib.maybe_booleans_to_slice(arr) + assert result.dtype == np.bool_ + + result = lib.maybe_booleans_to_slice(arr[:0]) + assert result == slice(0, 0) + + def test_get_reverse_indexer(self): + indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.int64) + result = lib.get_reverse_indexer(indexer, 5) + expected = np.array([4, 2, 3, 6, 7], dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + +def test_cache_readonly_preserve_docstrings(): + # GH18197 + assert Index.hasnans.__doc__ is not None diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py new file mode 100644 index 00000000..5382ad84 --- /dev/null +++ b/pandas/tests/test_multilevel.py @@ -0,0 +1,2473 @@ +import datetime +from io import StringIO +import itertools +from itertools import product + +import numpy as np +from numpy.random import randn +import pytest +import pytz + +from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, isna +import pandas._testing as tm + +AGG_FUNCTIONS = [ + "sum", + "prod", + "min", + "max", + "median", + "mean", + "skew", + "mad", + "std", + "var", + "sem", +] + + +class Base: + def setup_method(self, method): + + index = MultiIndex( + levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], + codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=["first", "second"], + ) + self.frame = DataFrame( + np.random.randn(10, 3), + index=index, + columns=Index(["A", "B", "C"], name="exp"), + ) + + self.single_level = MultiIndex( + levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"] + ) + + # create test series object + arrays = [ + ["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + ] + tuples = zip(*arrays) + index = MultiIndex.from_tuples(tuples) + s = Series(randn(8), index=index) + s[3] = np.NaN + self.series = s + + self.tdf = tm.makeTimeDataFrame(100) + self.ymd = self.tdf.groupby( + [lambda x: x.year, lambda x: x.month, lambda x: x.day] + ).sum() + + # use Int64Index, to make sure things work + self.ymd.index.set_levels( + [lev.astype("i8") for lev in self.ymd.index.levels], inplace=True + ) + self.ymd.index.set_names(["year", "month", "day"], inplace=True) + + +class TestMultiLevel(Base): + def test_append(self): + a, b = self.frame[:5], self.frame[5:] + + result = a.append(b) + tm.assert_frame_equal(result, self.frame) + + result = a["A"].append(b["A"]) + tm.assert_series_equal(result, self.frame["A"]) + + def test_append_index(self): + idx1 = Index([1.1, 1.2, 1.3]) + idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo") + idx3 = Index(["A", "B", "C"]) + + midx_lv2 = MultiIndex.from_arrays([idx1, idx2]) + midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3]) + + result = idx1.append(midx_lv2) + + # see gh-7112 + tz = pytz.timezone("Asia/Tokyo") + expected_tuples = [ + (1.1, tz.localize(datetime.datetime(2011, 1, 1))), + (1.2, tz.localize(datetime.datetime(2011, 1, 2))), + (1.3, tz.localize(datetime.datetime(2011, 1, 3))), + ] + expected = Index([1.1, 1.2, 1.3] + expected_tuples) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(idx1) + expected = Index(expected_tuples + [1.1, 1.2, 1.3]) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(midx_lv2) + expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) + tm.assert_index_equal(result, expected) + + result = midx_lv2.append(midx_lv3) + tm.assert_index_equal(result, expected) + + result = midx_lv3.append(midx_lv2) + expected = Index._simple_new( + np.array( + [ + (1.1, tz.localize(datetime.datetime(2011, 1, 1)), "A"), + (1.2, tz.localize(datetime.datetime(2011, 1, 2)), "B"), + (1.3, tz.localize(datetime.datetime(2011, 1, 3)), "C"), + ] + + expected_tuples, + dtype=object, + ), + None, + ) + tm.assert_index_equal(result, expected) + + def test_dataframe_constructor(self): + multi = DataFrame( + np.random.randn(4, 4), + index=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])], + ) + assert isinstance(multi.index, MultiIndex) + assert not isinstance(multi.columns, MultiIndex) + + multi = DataFrame( + np.random.randn(4, 4), columns=[["a", "a", "b", "b"], ["x", "y", "x", "y"]] + ) + assert isinstance(multi.columns, MultiIndex) + + def test_series_constructor(self): + multi = Series( + 1.0, index=[np.array(["a", "a", "b", "b"]), np.array(["x", "y", "x", "y"])] + ) + assert isinstance(multi.index, MultiIndex) + + multi = Series(1.0, index=[["a", "a", "b", "b"], ["x", "y", "x", "y"]]) + assert isinstance(multi.index, MultiIndex) + + multi = Series(range(4), index=[["a", "a", "b", "b"], ["x", "y", "x", "y"]]) + assert isinstance(multi.index, MultiIndex) + + def test_reindex_level(self): + # axis=0 + month_sums = self.ymd.sum(level="month") + result = month_sums.reindex(self.ymd.index, level=1) + expected = self.ymd.groupby(level="month").transform(np.sum) + + tm.assert_frame_equal(result, expected) + + # Series + result = month_sums["A"].reindex(self.ymd.index, level=1) + expected = self.ymd["A"].groupby(level="month").transform(np.sum) + tm.assert_series_equal(result, expected, check_names=False) + + # axis=1 + month_sums = self.ymd.T.sum(axis=1, level="month") + result = month_sums.reindex(columns=self.ymd.index, level=1) + expected = self.ymd.groupby(level="month").transform(np.sum).T + tm.assert_frame_equal(result, expected) + + def test_binops_level(self): + def _check_op(opname): + op = getattr(DataFrame, opname) + month_sums = self.ymd.sum(level="month") + result = op(self.ymd, month_sums, level="month") + + broadcasted = self.ymd.groupby(level="month").transform(np.sum) + expected = op(self.ymd, broadcasted) + tm.assert_frame_equal(result, expected) + + # Series + op = getattr(Series, opname) + result = op(self.ymd["A"], month_sums["A"], level="month") + broadcasted = self.ymd["A"].groupby(level="month").transform(np.sum) + expected = op(self.ymd["A"], broadcasted) + expected.name = "A" + tm.assert_series_equal(result, expected) + + _check_op("sub") + _check_op("add") + _check_op("mul") + _check_op("div") + + def test_pickle(self): + def _test_roundtrip(frame): + unpickled = tm.round_trip_pickle(frame) + tm.assert_frame_equal(frame, unpickled) + + _test_roundtrip(self.frame) + _test_roundtrip(self.frame.T) + _test_roundtrip(self.ymd) + _test_roundtrip(self.ymd.T) + + def test_reindex(self): + expected = self.frame.iloc[[0, 3]] + reindexed = self.frame.loc[[("foo", "one"), ("bar", "one")]] + tm.assert_frame_equal(reindexed, expected) + + def test_reindex_preserve_levels(self): + new_index = self.ymd.index[::10] + chunk = self.ymd.reindex(new_index) + assert chunk.index is new_index + + chunk = self.ymd.loc[new_index] + assert chunk.index is new_index + + ymdT = self.ymd.T + chunk = ymdT.reindex(columns=new_index) + assert chunk.columns is new_index + + chunk = ymdT.loc[:, new_index] + assert chunk.columns is new_index + + def test_repr_to_string(self): + repr(self.frame) + repr(self.ymd) + repr(self.frame.T) + repr(self.ymd.T) + + buf = StringIO() + self.frame.to_string(buf=buf) + self.ymd.to_string(buf=buf) + self.frame.T.to_string(buf=buf) + self.ymd.T.to_string(buf=buf) + + def test_repr_name_coincide(self): + index = MultiIndex.from_tuples( + [("a", 0, "foo"), ("b", 1, "bar")], names=["a", "b", "c"] + ) + + df = DataFrame({"value": [0, 1]}, index=index) + + lines = repr(df).split("\n") + assert lines[2].startswith("a 0 foo") + + def test_delevel_infer_dtype(self): + tuples = list(product(["foo", "bar"], [10, 20], [1.0, 1.1])) + index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"]) + df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index) + deleveled = df.reset_index() + assert is_integer_dtype(deleveled["prm1"]) + assert is_float_dtype(deleveled["prm2"]) + + def test_reset_index_with_drop(self): + deleveled = self.ymd.reset_index(drop=True) + assert len(deleveled.columns) == len(self.ymd.columns) + assert deleveled.index.name == self.ymd.index.name + + deleveled = self.series.reset_index() + assert isinstance(deleveled, DataFrame) + assert len(deleveled.columns) == len(self.series.index.levels) + 1 + assert deleveled.index.name == self.series.index.name + + deleveled = self.series.reset_index(drop=True) + assert isinstance(deleveled, Series) + assert deleveled.index.name == self.series.index.name + + def test_count_level(self): + def _check_counts(frame, axis=0): + index = frame._get_axis(axis) + for i in range(index.nlevels): + result = frame.count(axis=axis, level=i) + expected = frame.groupby(axis=axis, level=i).count() + expected = expected.reindex_like(result).astype("i8") + tm.assert_frame_equal(result, expected) + + self.frame.iloc[1, [1, 2]] = np.nan + self.frame.iloc[7, [0, 1]] = np.nan + self.ymd.iloc[1, [1, 2]] = np.nan + self.ymd.iloc[7, [0, 1]] = np.nan + + _check_counts(self.frame) + _check_counts(self.ymd) + _check_counts(self.frame.T, axis=1) + _check_counts(self.ymd.T, axis=1) + + # can't call with level on regular DataFrame + df = tm.makeTimeDataFrame() + with pytest.raises(TypeError, match="hierarchical"): + df.count(level=0) + + self.frame["D"] = "foo" + result = self.frame.count(level=0, numeric_only=True) + tm.assert_index_equal(result.columns, Index(list("ABC"), name="exp")) + + def test_count_level_series(self): + index = MultiIndex( + levels=[["foo", "bar", "baz"], ["one", "two", "three", "four"]], + codes=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]], + ) + + s = Series(np.random.randn(len(index)), index=index) + + result = s.count(level=0) + expected = s.groupby(level=0).count() + tm.assert_series_equal( + result.astype("f8"), expected.reindex(result.index).fillna(0) + ) + + result = s.count(level=1) + expected = s.groupby(level=1).count() + tm.assert_series_equal( + result.astype("f8"), expected.reindex(result.index).fillna(0) + ) + + def test_count_level_corner(self): + s = self.frame["A"][:0] + result = s.count(level=0) + expected = Series(0, index=s.index.levels[0], name="A") + tm.assert_series_equal(result, expected) + + df = self.frame[:0] + result = df.count(level=0) + expected = ( + DataFrame(index=s.index.levels[0].set_names(["first"]), columns=df.columns) + .fillna(0) + .astype(np.int64) + ) + tm.assert_frame_equal(result, expected) + + def test_get_level_number_out_of_bounds(self): + with pytest.raises(IndexError, match="Too many levels"): + self.frame.index._get_level_number(2) + with pytest.raises(IndexError, match="not a valid level number"): + self.frame.index._get_level_number(-3) + + def test_unstack(self): + # just check that it works for now + unstacked = self.ymd.unstack() + unstacked.unstack() + + # test that ints work + self.ymd.astype(int).unstack() + + # test that int32 work + self.ymd.astype(np.int32).unstack() + + @pytest.mark.parametrize( + "result_rows,result_columns,index_product,expected_row", + [ + ( + [[1, 1, None, None, 30.0, None], [2, 2, None, None, 30.0, None]], + ["ix1", "ix2", "col1", "col2", "col3", "col4"], + 2, + [None, None, 30.0, None], + ), + ( + [[1, 1, None, None, 30.0], [2, 2, None, None, 30.0]], + ["ix1", "ix2", "col1", "col2", "col3"], + 2, + [None, None, 30.0], + ), + ( + [[1, 1, None, None, 30.0], [2, None, None, None, 30.0]], + ["ix1", "ix2", "col1", "col2", "col3"], + None, + [None, None, 30.0], + ), + ], + ) + def test_unstack_partial( + self, result_rows, result_columns, index_product, expected_row + ): + # check for regressions on this issue: + # https://github.com/pandas-dev/pandas/issues/19351 + # make sure DataFrame.unstack() works when its run on a subset of the DataFrame + # and the Index levels contain values that are not present in the subset + result = pd.DataFrame(result_rows, columns=result_columns).set_index( + ["ix1", "ix2"] + ) + result = result.iloc[1:2].unstack("ix2") + expected = pd.DataFrame( + [expected_row], + columns=pd.MultiIndex.from_product( + [result_columns[2:], [index_product]], names=[None, "ix2"] + ), + index=pd.Index([2], name="ix1"), + ) + tm.assert_frame_equal(result, expected) + + def test_unstack_multiple_no_empty_columns(self): + index = MultiIndex.from_tuples( + [(0, "foo", 0), (0, "bar", 0), (1, "baz", 1), (1, "qux", 1)] + ) + + s = Series(np.random.randn(4), index=index) + + unstacked = s.unstack([1, 2]) + expected = unstacked.dropna(axis=1, how="all") + tm.assert_frame_equal(unstacked, expected) + + def test_stack(self): + # regular roundtrip + unstacked = self.ymd.unstack() + restacked = unstacked.stack() + tm.assert_frame_equal(restacked, self.ymd) + + unlexsorted = self.ymd.sort_index(level=2) + + unstacked = unlexsorted.unstack(2) + restacked = unstacked.stack() + tm.assert_frame_equal(restacked.sort_index(level=0), self.ymd) + + unlexsorted = unlexsorted[::-1] + unstacked = unlexsorted.unstack(1) + restacked = unstacked.stack().swaplevel(1, 2) + tm.assert_frame_equal(restacked.sort_index(level=0), self.ymd) + + unlexsorted = unlexsorted.swaplevel(0, 1) + unstacked = unlexsorted.unstack(0).swaplevel(0, 1, axis=1) + restacked = unstacked.stack(0).swaplevel(1, 2) + tm.assert_frame_equal(restacked.sort_index(level=0), self.ymd) + + # columns unsorted + unstacked = self.ymd.unstack() + unstacked = unstacked.sort_index(axis=1, ascending=False) + restacked = unstacked.stack() + tm.assert_frame_equal(restacked, self.ymd) + + # more than 2 levels in the columns + unstacked = self.ymd.unstack(1).unstack(1) + + result = unstacked.stack(1) + expected = self.ymd.unstack() + tm.assert_frame_equal(result, expected) + + result = unstacked.stack(2) + expected = self.ymd.unstack(1) + tm.assert_frame_equal(result, expected) + + result = unstacked.stack(0) + expected = self.ymd.stack().unstack(1).unstack(1) + tm.assert_frame_equal(result, expected) + + # not all levels present in each echelon + unstacked = self.ymd.unstack(2).loc[:, ::3] + stacked = unstacked.stack().stack() + ymd_stacked = self.ymd.stack() + tm.assert_series_equal(stacked, ymd_stacked.reindex(stacked.index)) + + # stack with negative number + result = self.ymd.unstack(0).stack(-2) + expected = self.ymd.unstack(0).stack(0) + + # GH10417 + def check(left, right): + tm.assert_series_equal(left, right) + assert left.index.is_unique is False + li, ri = left.index, right.index + tm.assert_index_equal(li, ri) + + df = DataFrame( + np.arange(12).reshape(4, 3), + index=list("abab"), + columns=["1st", "2nd", "3rd"], + ) + + mi = MultiIndex( + levels=[["a", "b"], ["1st", "2nd", "3rd"]], + codes=[np.tile(np.arange(2).repeat(3), 2), np.tile(np.arange(3), 4)], + ) + + left, right = df.stack(), Series(np.arange(12), index=mi) + check(left, right) + + df.columns = ["1st", "2nd", "1st"] + mi = MultiIndex( + levels=[["a", "b"], ["1st", "2nd"]], + codes=[np.tile(np.arange(2).repeat(3), 2), np.tile([0, 1, 0], 4)], + ) + + left, right = df.stack(), Series(np.arange(12), index=mi) + check(left, right) + + tpls = ("a", 2), ("b", 1), ("a", 1), ("b", 2) + df.index = MultiIndex.from_tuples(tpls) + mi = MultiIndex( + levels=[["a", "b"], [1, 2], ["1st", "2nd"]], + codes=[ + np.tile(np.arange(2).repeat(3), 2), + np.repeat([1, 0, 1], [3, 6, 3]), + np.tile([0, 1, 0], 4), + ], + ) + + left, right = df.stack(), Series(np.arange(12), index=mi) + check(left, right) + + def test_unstack_odd_failure(self): + data = """day,time,smoker,sum,len +Fri,Dinner,No,8.25,3. +Fri,Dinner,Yes,27.03,9 +Fri,Lunch,No,3.0,1 +Fri,Lunch,Yes,13.68,6 +Sat,Dinner,No,139.63,45 +Sat,Dinner,Yes,120.77,42 +Sun,Dinner,No,180.57,57 +Sun,Dinner,Yes,66.82,19 +Thur,Dinner,No,3.0,1 +Thur,Lunch,No,117.32,44 +Thur,Lunch,Yes,51.51,17""" + + df = pd.read_csv(StringIO(data)).set_index(["day", "time", "smoker"]) + + # it works, #2100 + result = df.unstack(2) + + recons = result.stack() + tm.assert_frame_equal(recons, df) + + def test_stack_mixed_dtype(self): + df = self.frame.T + df["foo", "four"] = "foo" + df = df.sort_index(level=1, axis=1) + + stacked = df.stack() + result = df["foo"].stack().sort_index() + tm.assert_series_equal(stacked["foo"], result, check_names=False) + assert result.name is None + assert stacked["bar"].dtype == np.float_ + + def test_unstack_bug(self): + df = DataFrame( + { + "state": ["naive", "naive", "naive", "activ", "activ", "activ"], + "exp": ["a", "b", "b", "b", "a", "a"], + "barcode": [1, 2, 3, 4, 1, 3], + "v": ["hi", "hi", "bye", "bye", "bye", "peace"], + "extra": np.arange(6.0), + } + ) + + result = df.groupby(["state", "exp", "barcode", "v"]).apply(len) + + unstacked = result.unstack() + restacked = unstacked.stack() + tm.assert_series_equal(restacked, result.reindex(restacked.index).astype(float)) + + def test_stack_unstack_preserve_names(self): + unstacked = self.frame.unstack() + assert unstacked.index.name == "first" + assert unstacked.columns.names == ["exp", "second"] + + restacked = unstacked.stack() + assert restacked.index.names == self.frame.index.names + + @pytest.mark.parametrize("method", ["stack", "unstack"]) + def test_stack_unstack_wrong_level_name(self, method): + # GH 18303 - wrong level name should raise + + # A DataFrame with flat axes: + df = self.frame.loc["foo"] + + with pytest.raises(KeyError, match="does not match index name"): + getattr(df, method)("mistake") + + if method == "unstack": + # Same on a Series: + s = df.iloc[:, 0] + with pytest.raises(KeyError, match="does not match index name"): + getattr(s, method)("mistake") + + def test_unused_level_raises(self): + # GH 20410 + mi = MultiIndex( + levels=[["a_lot", "onlyone", "notevenone"], [1970, ""]], + codes=[[1, 0], [1, 0]], + ) + df = DataFrame(-1, index=range(3), columns=mi) + + with pytest.raises(KeyError, match="notevenone"): + df["notevenone"] + + def test_unstack_level_name(self): + result = self.frame.unstack("second") + expected = self.frame.unstack(level=1) + tm.assert_frame_equal(result, expected) + + def test_stack_level_name(self): + unstacked = self.frame.unstack("second") + result = unstacked.stack("exp") + expected = self.frame.unstack().stack(0) + tm.assert_frame_equal(result, expected) + + result = self.frame.stack("exp") + expected = self.frame.stack() + tm.assert_series_equal(result, expected) + + def test_stack_unstack_multiple(self): + unstacked = self.ymd.unstack(["year", "month"]) + expected = self.ymd.unstack("year").unstack("month") + tm.assert_frame_equal(unstacked, expected) + assert unstacked.columns.names == expected.columns.names + + # series + s = self.ymd["A"] + s_unstacked = s.unstack(["year", "month"]) + tm.assert_frame_equal(s_unstacked, expected["A"]) + + restacked = unstacked.stack(["year", "month"]) + restacked = restacked.swaplevel(0, 1).swaplevel(1, 2) + restacked = restacked.sort_index(level=0) + + tm.assert_frame_equal(restacked, self.ymd) + assert restacked.index.names == self.ymd.index.names + + # GH #451 + unstacked = self.ymd.unstack([1, 2]) + expected = self.ymd.unstack(1).unstack(1).dropna(axis=1, how="all") + tm.assert_frame_equal(unstacked, expected) + + unstacked = self.ymd.unstack([2, 1]) + expected = self.ymd.unstack(2).unstack(1).dropna(axis=1, how="all") + tm.assert_frame_equal(unstacked, expected.loc[:, unstacked.columns]) + + def test_stack_names_and_numbers(self): + unstacked = self.ymd.unstack(["year", "month"]) + + # Can't use mixture of names and numbers to stack + with pytest.raises(ValueError, match="level should contain"): + unstacked.stack([0, "month"]) + + def test_stack_multiple_out_of_bounds(self): + # nlevels == 3 + unstacked = self.ymd.unstack(["year", "month"]) + + with pytest.raises(IndexError, match="Too many levels"): + unstacked.stack([2, 3]) + with pytest.raises(IndexError, match="not a valid level number"): + unstacked.stack([-4, -3]) + + def test_unstack_period_series(self): + # GH 4342 + idx1 = pd.PeriodIndex( + ["2013-01", "2013-01", "2013-02", "2013-02", "2013-03", "2013-03"], + freq="M", + name="period", + ) + idx2 = Index(["A", "B"] * 3, name="str") + value = [1, 2, 3, 4, 5, 6] + + idx = MultiIndex.from_arrays([idx1, idx2]) + s = Series(value, index=idx) + + result1 = s.unstack() + result2 = s.unstack(level=1) + result3 = s.unstack(level=0) + + e_idx = pd.PeriodIndex( + ["2013-01", "2013-02", "2013-03"], freq="M", name="period" + ) + expected = DataFrame( + {"A": [1, 3, 5], "B": [2, 4, 6]}, index=e_idx, columns=["A", "B"] + ) + expected.columns.name = "str" + + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected.T) + + idx1 = pd.PeriodIndex( + ["2013-01", "2013-01", "2013-02", "2013-02", "2013-03", "2013-03"], + freq="M", + name="period1", + ) + + idx2 = pd.PeriodIndex( + ["2013-12", "2013-11", "2013-10", "2013-09", "2013-08", "2013-07"], + freq="M", + name="period2", + ) + idx = MultiIndex.from_arrays([idx1, idx2]) + s = Series(value, index=idx) + + result1 = s.unstack() + result2 = s.unstack(level=1) + result3 = s.unstack(level=0) + + e_idx = pd.PeriodIndex( + ["2013-01", "2013-02", "2013-03"], freq="M", name="period1" + ) + e_cols = pd.PeriodIndex( + ["2013-07", "2013-08", "2013-09", "2013-10", "2013-11", "2013-12"], + freq="M", + name="period2", + ) + expected = DataFrame( + [ + [np.nan, np.nan, np.nan, np.nan, 2, 1], + [np.nan, np.nan, 4, 3, np.nan, np.nan], + [6, 5, np.nan, np.nan, np.nan, np.nan], + ], + index=e_idx, + columns=e_cols, + ) + + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected.T) + + def test_unstack_period_frame(self): + # GH 4342 + idx1 = pd.PeriodIndex( + ["2014-01", "2014-02", "2014-02", "2014-02", "2014-01", "2014-01"], + freq="M", + name="period1", + ) + idx2 = pd.PeriodIndex( + ["2013-12", "2013-12", "2014-02", "2013-10", "2013-10", "2014-02"], + freq="M", + name="period2", + ) + value = {"A": [1, 2, 3, 4, 5, 6], "B": [6, 5, 4, 3, 2, 1]} + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame(value, index=idx) + + result1 = df.unstack() + result2 = df.unstack(level=1) + result3 = df.unstack(level=0) + + e_1 = pd.PeriodIndex(["2014-01", "2014-02"], freq="M", name="period1") + e_2 = pd.PeriodIndex( + ["2013-10", "2013-12", "2014-02", "2013-10", "2013-12", "2014-02"], + freq="M", + name="period2", + ) + e_cols = MultiIndex.from_arrays(["A A A B B B".split(), e_2]) + expected = DataFrame( + [[5, 1, 6, 2, 6, 1], [4, 2, 3, 3, 5, 4]], index=e_1, columns=e_cols + ) + + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + + e_1 = pd.PeriodIndex( + ["2014-01", "2014-02", "2014-01", "2014-02"], freq="M", name="period1" + ) + e_2 = pd.PeriodIndex( + ["2013-10", "2013-12", "2014-02"], freq="M", name="period2" + ) + e_cols = MultiIndex.from_arrays(["A A B B".split(), e_1]) + expected = DataFrame( + [[5, 4, 2, 3], [1, 2, 6, 5], [6, 3, 1, 4]], index=e_2, columns=e_cols + ) + + tm.assert_frame_equal(result3, expected) + + def test_stack_multiple_bug(self): + """ bug when some uniques are not present in the data #3170""" + id_col = ([1] * 3) + ([2] * 3) + name = (["a"] * 3) + (["b"] * 3) + date = pd.to_datetime(["2013-01-03", "2013-01-04", "2013-01-05"] * 2) + var1 = np.random.randint(0, 100, 6) + df = DataFrame(dict(ID=id_col, NAME=name, DATE=date, VAR1=var1)) + + multi = df.set_index(["DATE", "ID"]) + multi.columns.name = "Params" + unst = multi.unstack("ID") + down = unst.resample("W-THU").mean() + + rs = down.stack("ID") + xp = unst.loc[:, ["VAR1"]].resample("W-THU").mean().stack("ID") + xp.columns.name = "Params" + tm.assert_frame_equal(rs, xp) + + def test_stack_dropna(self): + # GH #3997 + df = DataFrame({"A": ["a1", "a2"], "B": ["b1", "b2"], "C": [1, 1]}) + df = df.set_index(["A", "B"]) + + stacked = df.unstack().stack(dropna=False) + assert len(stacked) > len(stacked.dropna()) + + stacked = df.unstack().stack(dropna=True) + tm.assert_frame_equal(stacked, stacked.dropna()) + + def test_unstack_multiple_hierarchical(self): + df = DataFrame( + index=[ + [0, 0, 0, 0, 1, 1, 1, 1], + [0, 0, 1, 1, 0, 0, 1, 1], + [0, 1, 0, 1, 0, 1, 0, 1], + ], + columns=[[0, 0, 1, 1], [0, 1, 0, 1]], + ) + + df.index.names = ["a", "b", "c"] + df.columns.names = ["d", "e"] + + # it works! + df.unstack(["b", "c"]) + + def test_groupby_transform(self): + s = self.frame["A"] + grouper = s.index.get_level_values(0) + + grouped = s.groupby(grouper) + + applied = grouped.apply(lambda x: x * 2) + expected = grouped.transform(lambda x: x * 2) + result = applied.reindex(expected.index) + tm.assert_series_equal(result, expected, check_names=False) + + def test_unstack_sparse_keyspace(self): + # memory problems with naive impl #2278 + # Generate Long File & Test Pivot + NUM_ROWS = 1000 + + df = DataFrame( + { + "A": np.random.randint(100, size=NUM_ROWS), + "B": np.random.randint(300, size=NUM_ROWS), + "C": np.random.randint(-7, 7, size=NUM_ROWS), + "D": np.random.randint(-19, 19, size=NUM_ROWS), + "E": np.random.randint(3000, size=NUM_ROWS), + "F": np.random.randn(NUM_ROWS), + } + ) + + idf = df.set_index(["A", "B", "C", "D", "E"]) + + # it works! is sufficient + idf.unstack("E") + + def test_unstack_unobserved_keys(self): + # related to #2278 refactoring + levels = [[0, 1], [0, 1, 2, 3]] + codes = [[0, 0, 1, 1], [0, 2, 0, 2]] + + index = MultiIndex(levels, codes) + + df = DataFrame(np.random.randn(4, 2), index=index) + + result = df.unstack() + assert len(result.columns) == 4 + + recons = result.stack() + tm.assert_frame_equal(recons, df) + + @pytest.mark.slow + def test_unstack_number_of_levels_larger_than_int32(self): + # GH 20601 + df = DataFrame( + np.random.randn(2 ** 16, 2), index=[np.arange(2 ** 16), np.arange(2 ** 16)] + ) + with pytest.raises(ValueError, match="int32 overflow"): + df.unstack() + + def test_stack_order_with_unsorted_levels(self): + # GH 16323 + + def manual_compare_stacked(df, df_stacked, lev0, lev1): + assert all( + df.loc[row, col] == df_stacked.loc[(row, col[lev0]), col[lev1]] + for row in df.index + for col in df.columns + ) + + # deep check for 1-row case + for width in [2, 3]: + levels_poss = itertools.product( + itertools.permutations([0, 1, 2], width), repeat=2 + ) + + for levels in levels_poss: + columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + df = DataFrame(columns=columns, data=[range(4)]) + for stack_lev in range(2): + df_stacked = df.stack(stack_lev) + manual_compare_stacked(df, df_stacked, stack_lev, 1 - stack_lev) + + # check multi-row case + mi = MultiIndex( + levels=[["A", "C", "B"], ["B", "A", "C"]], + codes=[np.repeat(range(3), 3), np.tile(range(3), 3)], + ) + df = DataFrame( + columns=mi, index=range(5), data=np.arange(5 * len(mi)).reshape(5, -1) + ) + manual_compare_stacked(df, df.stack(0), 0, 1) + + def test_groupby_corner(self): + midx = MultiIndex( + levels=[["foo"], ["bar"], ["baz"]], + codes=[[0], [0], [0]], + names=["one", "two", "three"], + ) + df = DataFrame([np.random.rand(4)], columns=["a", "b", "c", "d"], index=midx) + # should work + df.groupby(level="three") + + def test_groupby_level_no_obs(self): + # #1697 + midx = MultiIndex.from_tuples( + [ + ("f1", "s1"), + ("f1", "s2"), + ("f2", "s1"), + ("f2", "s2"), + ("f3", "s1"), + ("f3", "s2"), + ] + ) + df = DataFrame([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], columns=midx) + df1 = df.loc(axis=1)[df.columns.map(lambda u: u[0] in ["f2", "f3"])] + + grouped = df1.groupby(axis=1, level=0) + result = grouped.sum() + assert (result.columns == ["f2", "f3"]).all() + + def test_join(self): + a = self.frame.loc[self.frame.index[:5], ["A"]] + b = self.frame.loc[self.frame.index[2:], ["B", "C"]] + + joined = a.join(b, how="outer").reindex(self.frame.index) + expected = self.frame.copy() + expected.values[np.isnan(joined.values)] = np.nan + + assert not np.isnan(joined.values).all() + + # TODO what should join do with names ? + tm.assert_frame_equal(joined, expected, check_names=False) + + def test_swaplevel(self): + swapped = self.frame["A"].swaplevel() + swapped2 = self.frame["A"].swaplevel(0) + swapped3 = self.frame["A"].swaplevel(0, 1) + swapped4 = self.frame["A"].swaplevel("first", "second") + assert not swapped.index.equals(self.frame.index) + tm.assert_series_equal(swapped, swapped2) + tm.assert_series_equal(swapped, swapped3) + tm.assert_series_equal(swapped, swapped4) + + back = swapped.swaplevel() + back2 = swapped.swaplevel(0) + back3 = swapped.swaplevel(0, 1) + back4 = swapped.swaplevel("second", "first") + assert back.index.equals(self.frame.index) + tm.assert_series_equal(back, back2) + tm.assert_series_equal(back, back3) + tm.assert_series_equal(back, back4) + + ft = self.frame.T + swapped = ft.swaplevel("first", "second", axis=1) + exp = self.frame.swaplevel("first", "second").T + tm.assert_frame_equal(swapped, exp) + + def test_reorder_levels(self): + result = self.ymd.reorder_levels(["month", "day", "year"]) + expected = self.ymd.swaplevel(0, 1).swaplevel(1, 2) + tm.assert_frame_equal(result, expected) + + result = self.ymd["A"].reorder_levels(["month", "day", "year"]) + expected = self.ymd["A"].swaplevel(0, 1).swaplevel(1, 2) + tm.assert_series_equal(result, expected) + + result = self.ymd.T.reorder_levels(["month", "day", "year"], axis=1) + expected = self.ymd.T.swaplevel(0, 1, axis=1).swaplevel(1, 2, axis=1) + tm.assert_frame_equal(result, expected) + + with pytest.raises(TypeError, match="hierarchical axis"): + self.ymd.reorder_levels([1, 2], axis=1) + + with pytest.raises(IndexError, match="Too many levels"): + self.ymd.index.reorder_levels([1, 2, 3]) + + def test_insert_index(self): + df = self.ymd[:5].T + df[2000, 1, 10] = df[2000, 1, 7] + assert isinstance(df.columns, MultiIndex) + assert (df[2000, 1, 10] == df[2000, 1, 7]).all() + + def test_alignment(self): + x = Series( + data=[1, 2, 3], index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3)]) + ) + + y = Series( + data=[4, 5, 6], index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B", 3)]) + ) + + res = x - y + exp_index = x.index.union(y.index) + exp = x.reindex(exp_index) - y.reindex(exp_index) + tm.assert_series_equal(res, exp) + + # hit non-monotonic code path + res = x[::-1] - y[::-1] + exp_index = x.index.union(y.index) + exp = x.reindex(exp_index) - y.reindex(exp_index) + tm.assert_series_equal(res, exp) + + def test_count(self): + frame = self.frame.copy() + frame.index.names = ["a", "b"] + + result = frame.count(level="b") + expect = self.frame.count(level=1) + tm.assert_frame_equal(result, expect, check_names=False) + + result = frame.count(level="a") + expect = self.frame.count(level=0) + tm.assert_frame_equal(result, expect, check_names=False) + + series = self.series.copy() + series.index.names = ["a", "b"] + + result = series.count(level="b") + expect = self.series.count(level=1).rename_axis("b") + tm.assert_series_equal(result, expect) + + result = series.count(level="a") + expect = self.series.count(level=0).rename_axis("a") + tm.assert_series_equal(result, expect) + + msg = "Level x not found" + with pytest.raises(KeyError, match=msg): + series.count("x") + with pytest.raises(KeyError, match=msg): + frame.count(level="x") + + @pytest.mark.parametrize("op", AGG_FUNCTIONS) + @pytest.mark.parametrize("level", [0, 1]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("sort", [True, False]) + def test_series_group_min_max(self, op, level, skipna, sort): + # GH 17537 + grouped = self.series.groupby(level=level, sort=sort) + # skipna=True + leftside = grouped.agg(lambda x: getattr(x, op)(skipna=skipna)) + rightside = getattr(self.series, op)(level=level, skipna=skipna) + if sort: + rightside = rightside.sort_index(level=level) + tm.assert_series_equal(leftside, rightside) + + @pytest.mark.parametrize("op", AGG_FUNCTIONS) + @pytest.mark.parametrize("level", [0, 1]) + @pytest.mark.parametrize("axis", [0, 1]) + @pytest.mark.parametrize("skipna", [True, False]) + @pytest.mark.parametrize("sort", [True, False]) + def test_frame_group_ops(self, op, level, axis, skipna, sort): + # GH 17537 + self.frame.iloc[1, [1, 2]] = np.nan + self.frame.iloc[7, [0, 1]] = np.nan + + level_name = self.frame.index.names[level] + + if axis == 0: + frame = self.frame + else: + frame = self.frame.T + + grouped = frame.groupby(level=level, axis=axis, sort=sort) + + pieces = [] + + def aggf(x): + pieces.append(x) + return getattr(x, op)(skipna=skipna, axis=axis) + + leftside = grouped.agg(aggf) + rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) + if sort: + rightside = rightside.sort_index(level=level, axis=axis) + frame = frame.sort_index(level=level, axis=axis) + + # for good measure, groupby detail + level_index = frame._get_axis(axis).levels[level].rename(level_name) + + tm.assert_index_equal(leftside._get_axis(axis), level_index) + tm.assert_index_equal(rightside._get_axis(axis), level_index) + + tm.assert_frame_equal(leftside, rightside) + + def test_stat_op_corner(self): + obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)])) + + result = obj.sum(level=0) + expected = Series([10.0], index=[2]) + tm.assert_series_equal(result, expected) + + def test_frame_any_all_group(self): + df = DataFrame( + {"data": [False, False, True, False, True, False, True]}, + index=[ + ["one", "one", "two", "one", "two", "two", "two"], + [0, 1, 0, 2, 1, 2, 3], + ], + ) + + result = df.any(level=0) + ex = DataFrame({"data": [False, True]}, index=["one", "two"]) + tm.assert_frame_equal(result, ex) + + result = df.all(level=0) + ex = DataFrame({"data": [False, False]}, index=["one", "two"]) + tm.assert_frame_equal(result, ex) + + def test_series_any_timedelta(self): + # GH 17667 + df = DataFrame( + { + "a": Series([0, 0]), + "t": Series([pd.to_timedelta(0, "s"), pd.to_timedelta(1, "ms")]), + } + ) + + result = df.any(axis=0) + expected = Series(data=[False, True], index=["a", "t"]) + tm.assert_series_equal(result, expected) + + result = df.any(axis=1) + expected = Series(data=[False, True]) + tm.assert_series_equal(result, expected) + + def test_std_var_pass_ddof(self): + index = MultiIndex.from_arrays( + [np.arange(5).repeat(10), np.tile(np.arange(10), 5)] + ) + df = DataFrame(np.random.randn(len(index), 5), index=index) + + for meth in ["var", "std"]: + ddof = 4 + alt = lambda x: getattr(x, meth)(ddof=ddof) + + result = getattr(df[0], meth)(level=0, ddof=ddof) + expected = df[0].groupby(level=0).agg(alt) + tm.assert_series_equal(result, expected) + + result = getattr(df, meth)(level=0, ddof=ddof) + expected = df.groupby(level=0).agg(alt) + tm.assert_frame_equal(result, expected) + + def test_frame_series_agg_multiple_levels(self): + result = self.ymd.sum(level=["year", "month"]) + expected = self.ymd.groupby(level=["year", "month"]).sum() + tm.assert_frame_equal(result, expected) + + result = self.ymd["A"].sum(level=["year", "month"]) + expected = self.ymd["A"].groupby(level=["year", "month"]).sum() + tm.assert_series_equal(result, expected) + + def test_groupby_multilevel(self): + result = self.ymd.groupby(level=[0, 1]).mean() + + k1 = self.ymd.index.get_level_values(0) + k2 = self.ymd.index.get_level_values(1) + + expected = self.ymd.groupby([k1, k2]).mean() + + # TODO groupby with level_values drops names + tm.assert_frame_equal(result, expected, check_names=False) + assert result.index.names == self.ymd.index.names[:2] + + result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean() + tm.assert_frame_equal(result, result2) + + def test_groupby_multilevel_with_transform(self): + pass + + def test_multilevel_consolidate(self): + index = MultiIndex.from_tuples( + [("foo", "one"), ("foo", "two"), ("bar", "one"), ("bar", "two")] + ) + df = DataFrame(np.random.randn(4, 4), index=index, columns=index) + df["Totals", ""] = df.sum(1) + df = df._consolidate() + + def test_loc_preserve_names(self): + result = self.ymd.loc[2000] + result2 = self.ymd["A"].loc[2000] + assert result.index.names == self.ymd.index.names[1:] + assert result2.index.names == self.ymd.index.names[1:] + + result = self.ymd.loc[2000, 2] + result2 = self.ymd["A"].loc[2000, 2] + assert result.index.name == self.ymd.index.names[2] + assert result2.index.name == self.ymd.index.names[2] + + def test_unstack_preserve_types(self): + # GH #403 + self.ymd["E"] = "foo" + self.ymd["F"] = 2 + + unstacked = self.ymd.unstack("month") + assert unstacked["A", 1].dtype == np.float64 + assert unstacked["E", 1].dtype == np.object_ + assert unstacked["F", 1].dtype == np.float64 + + def test_unstack_group_index_overflow(self): + codes = np.tile(np.arange(500), 2) + level = np.arange(500) + + index = MultiIndex( + levels=[level] * 8 + [[0, 1]], + codes=[codes] * 8 + [np.arange(2).repeat(500)], + ) + + s = Series(np.arange(1000), index=index) + result = s.unstack() + assert result.shape == (500, 2) + + # test roundtrip + stacked = result.stack() + tm.assert_series_equal(s, stacked.reindex(s.index)) + + # put it at beginning + index = MultiIndex( + levels=[[0, 1]] + [level] * 8, + codes=[np.arange(2).repeat(500)] + [codes] * 8, + ) + + s = Series(np.arange(1000), index=index) + result = s.unstack(0) + assert result.shape == (500, 2) + + # put it in middle + index = MultiIndex( + levels=[level] * 4 + [[0, 1]] + [level] * 4, + codes=([codes] * 4 + [np.arange(2).repeat(500)] + [codes] * 4), + ) + + s = Series(np.arange(1000), index=index) + result = s.unstack(4) + assert result.shape == (500, 2) + + def test_pyint_engine(self): + # GH 18519 : when combinations of codes cannot be represented in 64 + # bits, the index underlying the MultiIndex engine works with Python + # integers, rather than uint64. + N = 5 + keys = [ + tuple(l) + for l in [ + [0] * 10 * N, + [1] * 10 * N, + [2] * 10 * N, + [np.nan] * N + [2] * 9 * N, + [0] * N + [2] * 9 * N, + [np.nan] * N + [2] * 8 * N + [0] * N, + ] + ] + # Each level contains 4 elements (including NaN), so it is represented + # in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a + # 64 bit engine and truncating the first levels, the fourth and fifth + # keys would collide; if truncating the last levels, the fifth and + # sixth; if rotating bits rather than shifting, the third and fifth. + + for idx in range(len(keys)): + index = MultiIndex.from_tuples(keys) + assert index.get_loc(keys[idx]) == idx + + expected = np.arange(idx + 1, dtype=np.intp) + result = index.get_indexer([keys[i] for i in expected]) + tm.assert_numpy_array_equal(result, expected) + + # With missing key: + idces = range(len(keys)) + expected = np.array([-1] + list(idces), dtype=np.intp) + missing = tuple([0, 1] * 5 * N) + result = index.get_indexer([missing] + [keys[i] for i in idces]) + tm.assert_numpy_array_equal(result, expected) + + def test_to_html(self): + self.ymd.columns.name = "foo" + self.ymd.to_html() + self.ymd.T.to_html() + + def test_level_with_tuples(self): + index = MultiIndex( + levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + ) + + series = Series(np.random.randn(6), index=index) + frame = DataFrame(np.random.randn(6, 4), index=index) + + result = series[("foo", "bar", 0)] + result2 = series.loc[("foo", "bar", 0)] + expected = series[:2] + expected.index = expected.index.droplevel(0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + with pytest.raises(KeyError, match=r"^\(\('foo', 'bar', 0\), 2\)$"): + series[("foo", "bar", 0), 2] + + result = frame.loc[("foo", "bar", 0)] + result2 = frame.xs(("foo", "bar", 0)) + expected = frame[:2] + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + index = MultiIndex( + levels=[[("foo", "bar"), ("foo", "baz"), ("foo", "qux")], [0, 1]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + ) + + series = Series(np.random.randn(6), index=index) + frame = DataFrame(np.random.randn(6, 4), index=index) + + result = series[("foo", "bar")] + result2 = series.loc[("foo", "bar")] + expected = series[:2] + expected.index = expected.index.droplevel(0) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + result = frame.loc[("foo", "bar")] + result2 = frame.xs(("foo", "bar")) + expected = frame[:2] + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + + def test_mixed_depth_drop(self): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(randn(4, 6), columns=index) + + result = df.drop("a", axis=1) + expected = df.drop([("a", "", "")], axis=1) + tm.assert_frame_equal(expected, result) + + result = df.drop(["top"], axis=1) + expected = df.drop([("top", "OD", "wx")], axis=1) + expected = expected.drop([("top", "OD", "wy")], axis=1) + tm.assert_frame_equal(expected, result) + + result = df.drop(("top", "OD", "wx"), axis=1) + expected = df.drop([("top", "OD", "wx")], axis=1) + tm.assert_frame_equal(expected, result) + + expected = df.drop([("top", "OD", "wy")], axis=1) + expected = df.drop("top", axis=1) + + result = df.drop("result1", level=1, axis=1) + expected = df.drop( + [("routine1", "result1", ""), ("routine2", "result1", "")], axis=1 + ) + tm.assert_frame_equal(expected, result) + + def test_drop_multiindex_other_level_nan(self): + # GH 12754 + df = ( + DataFrame( + { + "A": ["one", "one", "two", "two"], + "B": [np.nan, 0.0, 1.0, 2.0], + "C": ["a", "b", "c", "c"], + "D": [1, 2, 3, 4], + } + ) + .set_index(["A", "B", "C"]) + .sort_index() + ) + result = df.drop("c", level="C") + expected = DataFrame( + [2, 1], + columns=["D"], + index=pd.MultiIndex.from_tuples( + [("one", 0.0, "b"), ("one", np.nan, "a")], names=["A", "B", "C"] + ), + ) + tm.assert_frame_equal(result, expected) + + def test_drop_nonunique(self): + df = DataFrame( + [ + ["x-a", "x", "a", 1.5], + ["x-a", "x", "a", 1.2], + ["z-c", "z", "c", 3.1], + ["x-a", "x", "a", 4.1], + ["x-b", "x", "b", 5.1], + ["x-b", "x", "b", 4.1], + ["x-b", "x", "b", 2.2], + ["y-a", "y", "a", 1.2], + ["z-b", "z", "b", 2.1], + ], + columns=["var1", "var2", "var3", "var4"], + ) + + grp_size = df.groupby("var1").size() + drop_idx = grp_size.loc[grp_size == 1] + + idf = df.set_index(["var1", "var2", "var3"]) + + # it works! #2101 + result = idf.drop(drop_idx.index, level=0).reset_index() + expected = df[-df.var1.isin(drop_idx.index)] + + result.index = expected.index + + tm.assert_frame_equal(result, expected) + + def test_mixed_depth_pop(self): + arrays = [ + ["a", "top", "top", "routine1", "routine1", "routine2"], + ["", "OD", "OD", "result1", "result2", "result1"], + ["", "wx", "wy", "", "", ""], + ] + + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(randn(4, 6), columns=index) + + df1 = df.copy() + df2 = df.copy() + result = df1.pop("a") + expected = df2.pop(("a", "", "")) + tm.assert_series_equal(expected, result, check_names=False) + tm.assert_frame_equal(df1, df2) + assert result.name == "a" + + expected = df1["top"] + df1 = df1.drop(["top"], axis=1) + result = df2.pop("top") + tm.assert_frame_equal(expected, result) + tm.assert_frame_equal(df1, df2) + + def test_reindex_level_partial_selection(self): + result = self.frame.reindex(["foo", "qux"], level=0) + expected = self.frame.iloc[[0, 1, 2, 7, 8, 9]] + tm.assert_frame_equal(result, expected) + + result = self.frame.T.reindex(["foo", "qux"], axis=1, level=0) + tm.assert_frame_equal(result, expected.T) + + result = self.frame.loc[["foo", "qux"]] + tm.assert_frame_equal(result, expected) + + result = self.frame["A"].loc[["foo", "qux"]] + tm.assert_series_equal(result, expected["A"]) + + result = self.frame.T.loc[:, ["foo", "qux"]] + tm.assert_frame_equal(result, expected.T) + + def test_drop_level(self): + result = self.frame.drop(["bar", "qux"], level="first") + expected = self.frame.iloc[[0, 1, 2, 5, 6]] + tm.assert_frame_equal(result, expected) + + result = self.frame.drop(["two"], level="second") + expected = self.frame.iloc[[0, 2, 3, 6, 7, 9]] + tm.assert_frame_equal(result, expected) + + result = self.frame.T.drop(["bar", "qux"], axis=1, level="first") + expected = self.frame.iloc[[0, 1, 2, 5, 6]].T + tm.assert_frame_equal(result, expected) + + result = self.frame.T.drop(["two"], axis=1, level="second") + expected = self.frame.iloc[[0, 2, 3, 6, 7, 9]].T + tm.assert_frame_equal(result, expected) + + def test_drop_level_nonunique_datetime(self): + # GH 12701 + idx = Index([2, 3, 4, 4, 5], name="id") + idxdt = pd.to_datetime( + [ + "201603231400", + "201603231500", + "201603231600", + "201603231600", + "201603231700", + ] + ) + df = DataFrame(np.arange(10).reshape(5, 2), columns=list("ab"), index=idx) + df["tstamp"] = idxdt + df = df.set_index("tstamp", append=True) + ts = Timestamp("201603231600") + assert df.index.is_unique is False + + result = df.drop(ts, level="tstamp") + expected = df.loc[idx != 4] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("box", [Series, DataFrame]) + def test_drop_tz_aware_timestamp_across_dst(self, box): + # GH 21761 + start = Timestamp("2017-10-29", tz="Europe/Berlin") + end = Timestamp("2017-10-29 04:00:00", tz="Europe/Berlin") + index = pd.date_range(start, end, freq="15min") + data = box(data=[1] * len(index), index=index) + result = data.drop(start) + expected_start = Timestamp("2017-10-29 00:15:00", tz="Europe/Berlin") + expected_idx = pd.date_range(expected_start, end, freq="15min") + expected = box(data=[1] * len(expected_idx), index=expected_idx) + tm.assert_equal(result, expected) + + def test_drop_preserve_names(self): + index = MultiIndex.from_arrays( + [[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3]], names=["one", "two"] + ) + + df = DataFrame(np.random.randn(6, 3), index=index) + + result = df.drop([(0, 2)]) + assert result.index.names == ("one", "two") + + def test_unicode_repr_issues(self): + levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])] + codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] + index = MultiIndex(levels=levels, codes=codes) + + repr(index.levels) + + # NumPy bug + # repr(index.get_level_values(1)) + + def test_unicode_repr_level_names(self): + index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) + + s = Series(range(2), index=index) + df = DataFrame(np.random.randn(2, 4), index=index) + repr(s) + repr(df) + + def test_join_segfault(self): + # 1532 + df1 = DataFrame({"a": [1, 1], "b": [1, 2], "x": [1, 2]}) + df2 = DataFrame({"a": [2, 2], "b": [1, 2], "y": [1, 2]}) + df1 = df1.set_index(["a", "b"]) + df2 = df2.set_index(["a", "b"]) + # it works! + for how in ["left", "right", "outer"]: + df1.join(df2, how=how) + + def test_frame_dict_constructor_empty_series(self): + s1 = Series( + [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)]) + ) + s2 = Series( + [1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)]) + ) + s3 = Series(dtype=object) + + # it works! + DataFrame({"foo": s1, "bar": s2, "baz": s3}) + DataFrame.from_dict({"foo": s1, "baz": s3, "bar": s2}) + + @pytest.mark.parametrize("d", [4, "d"]) + def test_empty_frame_groupby_dtypes_consistency(self, d): + # GH 20888 + group_keys = ["a", "b", "c"] + df = DataFrame({"a": [1], "b": [2], "c": [3], "d": [d]}) + + g = df[df.a == 2].groupby(group_keys) + result = g.first().index + expected = MultiIndex( + levels=[[1], [2], [3]], codes=[[], [], []], names=["a", "b", "c"] + ) + + tm.assert_index_equal(result, expected) + + def test_multiindex_na_repr(self): + # only an issue with long columns + df3 = DataFrame( + { + "A" * 30: {("A", "A0006000", "nuit"): "A0006000"}, + "B" * 30: {("A", "A0006000", "nuit"): np.nan}, + "C" * 30: {("A", "A0006000", "nuit"): np.nan}, + "D" * 30: {("A", "A0006000", "nuit"): np.nan}, + "E" * 30: {("A", "A0006000", "nuit"): "A"}, + "F" * 30: {("A", "A0006000", "nuit"): np.nan}, + } + ) + + idf = df3.set_index(["A" * 30, "C" * 30]) + repr(idf) + + def test_assign_index_sequences(self): + # #2200 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index( + ["a", "b"] + ) + index = list(df.index) + index[0] = ("faz", "boo") + df.index = index + repr(df) + + # this travels an improper code path + index[0] = ["faz", "boo"] + df.index = index + repr(df) + + def test_tuples_have_na(self): + index = MultiIndex( + levels=[[1, 0], [0, 1, 2, 3]], + codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]], + ) + + assert isna(index[4][0]) + assert isna(index.values[4][0]) + + def test_duplicate_groupby_issues(self): + idx_tp = [ + ("600809", "20061231"), + ("600809", "20070331"), + ("600809", "20070630"), + ("600809", "20070331"), + ] + dt = ["demo", "demo", "demo", "demo"] + + idx = MultiIndex.from_tuples(idx_tp, names=["STK_ID", "RPT_Date"]) + s = Series(dt, index=idx) + + result = s.groupby(s.index).first() + assert len(result) == 3 + + def test_duplicate_mi(self): + # GH 4516 + df = DataFrame( + [ + ["foo", "bar", 1.0, 1], + ["foo", "bar", 2.0, 2], + ["bah", "bam", 3.0, 3], + ["bah", "bam", 4.0, 4], + ["foo", "bar", 5.0, 5], + ["bah", "bam", 6.0, 6], + ], + columns=list("ABCD"), + ) + df = df.set_index(["A", "B"]) + df = df.sort_index(level=0) + expected = DataFrame( + [["foo", "bar", 1.0, 1], ["foo", "bar", 2.0, 2], ["foo", "bar", 5.0, 5]], + columns=list("ABCD"), + ).set_index(["A", "B"]) + result = df.loc[("foo", "bar")] + tm.assert_frame_equal(result, expected) + + def test_duplicated_drop_duplicates(self): + # GH 4060 + idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2])) + + expected = np.array([False, False, False, True, False, False], dtype=bool) + duplicated = idx.duplicated() + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(), expected) + + expected = np.array([True, False, False, False, False, False]) + duplicated = idx.duplicated(keep="last") + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected) + + expected = np.array([True, False, False, True, False, False]) + duplicated = idx.duplicated(keep=False) + tm.assert_numpy_array_equal(duplicated, expected) + assert duplicated.dtype == bool + expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) + tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) + + def test_multiindex_set_index(self): + # segfault in #3308 + d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]} + df = DataFrame(d) + tuples = [(0, 1), (0, 2), (1, 2)] + df["tuples"] = tuples + + index = MultiIndex.from_tuples(df["tuples"]) + # it works! + df.set_index(index) + + def test_datetimeindex(self): + idx1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, + tz="Asia/Tokyo", + ) + idx2 = pd.date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") + idx = MultiIndex.from_arrays([idx1, idx2]) + + expected1 = pd.DatetimeIndex( + ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" + ) + + tm.assert_index_equal(idx.levels[0], expected1) + tm.assert_index_equal(idx.levels[1], idx2) + + # from datetime combos + # GH 7888 + date1 = datetime.date.today() + date2 = datetime.datetime.today() + date3 = Timestamp.today() + + for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): + index = MultiIndex.from_product([[d1], [d2]]) + assert isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) + + def test_constructor_with_tz(self): + + index = pd.DatetimeIndex( + ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific" + ) + columns = pd.DatetimeIndex( + ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo" + ) + + result = MultiIndex.from_arrays([index, columns]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + + result = MultiIndex.from_arrays([Series(index), Series(columns)]) + + assert result.names == ["dt1", "dt2"] + tm.assert_index_equal(result.levels[0], index) + tm.assert_index_equal(result.levels[1], columns) + + def test_set_index_datetime(self): + # GH 3950 + df = DataFrame( + { + "label": ["a", "a", "a", "b", "b", "b"], + "datetime": [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ], + "value": range(6), + } + ) + df.index = pd.to_datetime(df.pop("datetime"), utc=True) + df.index = df.index.tz_convert("US/Pacific") + + expected = pd.DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + name="datetime", + ) + expected = expected.tz_localize("UTC").tz_convert("US/Pacific") + + df = df.set_index("label", append=True) + tm.assert_index_equal(df.index.levels[0], expected) + tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label")) + assert df.index.names == ["datetime", "label"] + + df = df.swaplevel(0, 1) + tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label")) + tm.assert_index_equal(df.index.levels[1], expected) + assert df.index.names == ["label", "datetime"] + + df = DataFrame(np.random.random(6)) + idx1 = pd.DatetimeIndex( + [ + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + "2011-07-19 07:00:00", + "2011-07-19 08:00:00", + "2011-07-19 09:00:00", + ], + tz="US/Eastern", + ) + idx2 = pd.DatetimeIndex( + [ + "2012-04-01 09:00", + "2012-04-01 09:00", + "2012-04-01 09:00", + "2012-04-02 09:00", + "2012-04-02 09:00", + "2012-04-02 09:00", + ], + tz="US/Eastern", + ) + idx3 = pd.date_range("2011-01-01 09:00", periods=6, tz="Asia/Tokyo") + + df = df.set_index(idx1) + df = df.set_index(idx2, append=True) + df = df.set_index(idx3, append=True) + + expected1 = pd.DatetimeIndex( + ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"], + tz="US/Eastern", + ) + expected2 = pd.DatetimeIndex( + ["2012-04-01 09:00", "2012-04-02 09:00"], tz="US/Eastern" + ) + + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) + + # GH 7092 + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) + + def test_reset_index_datetime(self): + # GH 3950 + for tz in ["UTC", "Asia/Tokyo", "US/Eastern"]: + idx1 = pd.date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1") + idx2 = Index(range(5), name="idx2", dtype="int64") + idx = MultiIndex.from_arrays([idx1, idx2]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, + ) + + expected = DataFrame( + { + "idx1": [ + datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "a", "b"], + ) + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + + tm.assert_frame_equal(df.reset_index(), expected) + + idx3 = pd.date_range( + "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3" + ) + idx = MultiIndex.from_arrays([idx1, idx2, idx3]) + df = DataFrame( + {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]}, + index=idx, + ) + + expected = DataFrame( + { + "idx1": [ + datetime.datetime(2011, 1, 1), + datetime.datetime(2011, 1, 2), + datetime.datetime(2011, 1, 3), + datetime.datetime(2011, 1, 4), + datetime.datetime(2011, 1, 5), + ], + "idx2": np.arange(5, dtype="int64"), + "idx3": [ + datetime.datetime(2012, 1, 1), + datetime.datetime(2012, 2, 1), + datetime.datetime(2012, 3, 1), + datetime.datetime(2012, 4, 1), + datetime.datetime(2012, 5, 1), + ], + "a": np.arange(5, dtype="int64"), + "b": ["A", "B", "C", "D", "E"], + }, + columns=["idx1", "idx2", "idx3", "a", "b"], + ) + expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz)) + expected["idx3"] = expected["idx3"].apply( + lambda d: Timestamp(d, tz="Europe/Paris") + ) + tm.assert_frame_equal(df.reset_index(), expected) + + # GH 7793 + idx = MultiIndex.from_product( + [["a", "b"], pd.date_range("20130101", periods=3, tz=tz)] + ) + df = DataFrame( + np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx + ) + + expected = DataFrame( + { + "level_0": "a a a b b b".split(), + "level_1": [ + datetime.datetime(2013, 1, 1), + datetime.datetime(2013, 1, 2), + datetime.datetime(2013, 1, 3), + ] + * 2, + "a": np.arange(6, dtype="int64"), + }, + columns=["level_0", "level_1", "a"], + ) + expected["level_1"] = expected["level_1"].apply( + lambda d: Timestamp(d, freq="D", tz=tz) + ) + tm.assert_frame_equal(df.reset_index(), expected) + + def test_reset_index_period(self): + # GH 7746 + idx = MultiIndex.from_product( + [pd.period_range("20130101", periods=3, freq="M"), list("abc")], + names=["month", "feature"], + ) + + df = DataFrame( + np.arange(9, dtype="int64").reshape(-1, 1), index=idx, columns=["a"] + ) + expected = DataFrame( + { + "month": ( + [pd.Period("2013-01", freq="M")] * 3 + + [pd.Period("2013-02", freq="M")] * 3 + + [pd.Period("2013-03", freq="M")] * 3 + ), + "feature": ["a", "b", "c"] * 3, + "a": np.arange(9, dtype="int64"), + }, + columns=["month", "feature", "a"], + ) + tm.assert_frame_equal(df.reset_index(), expected) + + def test_reset_index_multiindex_columns(self): + levels = [["A", ""], ["B", "b"]] + df = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) + result = df[["B"]].rename_axis("A").reset_index() + tm.assert_frame_equal(result, df) + + # gh-16120: already existing column + msg = r"cannot insert \('A', ''\), already exists" + with pytest.raises(ValueError, match=msg): + df.rename_axis("A").reset_index() + + # gh-16164: multiindex (tuple) full key + result = df.set_index([("A", "")]).reset_index() + tm.assert_frame_equal(result, df) + + # with additional (unnamed) index level + idx_col = DataFrame( + [[0], [1]], columns=MultiIndex.from_tuples([("level_0", "")]) + ) + expected = pd.concat([idx_col, df[[("B", "b"), ("A", "")]]], axis=1) + result = df.set_index([("B", "b")], append=True).reset_index() + tm.assert_frame_equal(result, expected) + + # with index name which is a too long tuple... + msg = "Item must have length equal to number of levels." + with pytest.raises(ValueError, match=msg): + df.rename_axis([("C", "c", "i")]).reset_index() + + # or too short... + levels = [["A", "a", ""], ["B", "b", "i"]] + df2 = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels)) + idx_col = DataFrame( + [[0], [1]], columns=MultiIndex.from_tuples([("C", "c", "ii")]) + ) + expected = pd.concat([idx_col, df2], axis=1) + result = df2.rename_axis([("C", "c")]).reset_index(col_fill="ii") + tm.assert_frame_equal(result, expected) + + # ... which is incompatible with col_fill=None + with pytest.raises( + ValueError, + match=( + "col_fill=None is incompatible with " + r"incomplete column name \('C', 'c'\)" + ), + ): + df2.rename_axis([("C", "c")]).reset_index(col_fill=None) + + # with col_level != 0 + result = df2.rename_axis([("c", "ii")]).reset_index(col_level=1, col_fill="C") + tm.assert_frame_equal(result, expected) + + def test_set_index_period(self): + # GH 6631 + df = DataFrame(np.random.random(6)) + idx1 = pd.period_range("2011-01-01", periods=3, freq="M") + idx1 = idx1.append(idx1) + idx2 = pd.period_range("2013-01-01 09:00", periods=2, freq="H") + idx2 = idx2.append(idx2).append(idx2) + idx3 = pd.period_range("2005", periods=6, freq="A") + + df = df.set_index(idx1) + df = df.set_index(idx2, append=True) + df = df.set_index(idx3, append=True) + + expected1 = pd.period_range("2011-01-01", periods=3, freq="M") + expected2 = pd.period_range("2013-01-01 09:00", periods=2, freq="H") + + tm.assert_index_equal(df.index.levels[0], expected1) + tm.assert_index_equal(df.index.levels[1], expected2) + tm.assert_index_equal(df.index.levels[2], idx3) + + tm.assert_index_equal(df.index.get_level_values(0), idx1) + tm.assert_index_equal(df.index.get_level_values(1), idx2) + tm.assert_index_equal(df.index.get_level_values(2), idx3) + + def test_repeat(self): + # GH 9361 + # fixed by # GH 7891 + m_idx = MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)]) + data = ["a", "b", "c", "d"] + m_df = Series(data, index=m_idx) + assert m_df.repeat(3).shape == (3 * len(data),) + + def test_subsets_multiindex_dtype(self): + # GH 20757 + data = [["x", 1]] + columns = [("a", "b", np.nan), ("a", "c", 0.0)] + df = DataFrame(data, columns=pd.MultiIndex.from_tuples(columns)) + expected = df.dtypes.a.b + result = df.a.b.dtypes + tm.assert_series_equal(result, expected) + + +class TestSorted(Base): + """ everything you wanted to test about sorting """ + + def test_sort_index_preserve_levels(self): + result = self.frame.sort_index() + assert result.index.names == self.frame.index.names + + def test_sorting_repr_8017(self): + + np.random.seed(0) + data = np.random.randn(3, 4) + + for gen, extra in [ + ([1.0, 3.0, 2.0, 5.0], 4.0), + ([1, 3, 2, 5], 4), + ( + [ + Timestamp("20130101"), + Timestamp("20130103"), + Timestamp("20130102"), + Timestamp("20130105"), + ], + Timestamp("20130104"), + ), + (["1one", "3one", "2one", "5one"], "4one"), + ]: + columns = MultiIndex.from_tuples([("red", i) for i in gen]) + df = DataFrame(data, index=list("def"), columns=columns) + df2 = pd.concat( + [ + df, + DataFrame( + "world", + index=list("def"), + columns=MultiIndex.from_tuples([("red", extra)]), + ), + ], + axis=1, + ) + + # check that the repr is good + # make sure that we have a correct sparsified repr + # e.g. only 1 header of read + assert str(df2).splitlines()[0].split() == ["red"] + + # GH 8017 + # sorting fails after columns added + + # construct single-dtype then sort + result = df.copy().sort_index(axis=1) + expected = df.iloc[:, [0, 2, 1, 3]] + tm.assert_frame_equal(result, expected) + + result = df2.sort_index(axis=1) + expected = df2.iloc[:, [0, 2, 1, 4, 3]] + tm.assert_frame_equal(result, expected) + + # setitem then sort + result = df.copy() + result[("red", extra)] = "world" + + result = result.sort_index(axis=1) + tm.assert_frame_equal(result, expected) + + def test_sort_index_level(self): + df = self.frame.copy() + df.index = np.arange(len(df)) + + # axis=1 + + # series + a_sorted = self.frame["A"].sort_index(level=0) + + # preserve names + assert a_sorted.index.names == self.frame.index.names + + # inplace + rs = self.frame.copy() + rs.sort_index(level=0, inplace=True) + tm.assert_frame_equal(rs, self.frame.sort_index(level=0)) + + def test_sort_index_level_large_cardinality(self): + + # #2684 (int64) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000), index=index, dtype=np.int64) + + # it works! + result = df.sort_index(level=0) + assert result.index.lexsort_depth == 3 + + # #2684 (int32) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000), index=index, dtype=np.int32) + + # it works! + result = df.sort_index(level=0) + assert (result.dtypes.values == df.dtypes.values).all() + assert result.index.lexsort_depth == 3 + + def test_sort_index_level_by_name(self): + self.frame.index.names = ["first", "second"] + result = self.frame.sort_index(level="second") + expected = self.frame.sort_index(level=1) + tm.assert_frame_equal(result, expected) + + def test_sort_index_level_mixed(self): + sorted_before = self.frame.sort_index(level=1) + + df = self.frame.copy() + df["foo"] = "bar" + sorted_after = df.sort_index(level=1) + tm.assert_frame_equal(sorted_before, sorted_after.drop(["foo"], axis=1)) + + dft = self.frame.T + sorted_before = dft.sort_index(level=1, axis=1) + dft["foo", "three"] = "bar" + + sorted_after = dft.sort_index(level=1, axis=1) + tm.assert_frame_equal( + sorted_before.drop([("foo", "three")], axis=1), + sorted_after.drop([("foo", "three")], axis=1), + ) + + def test_is_lexsorted(self): + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + ) + assert index.is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]] + ) + assert not index.is_lexsorted() + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]] + ) + assert not index.is_lexsorted() + assert index.lexsort_depth == 0 + + def test_raise_invalid_sortorder(self): + # Test that the MultiIndex constructor raise when a incorrect sortorder is given + # Issue #28518 + + levels = [[0, 1], [0, 1, 2]] + + # Correct sortorder + MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + + with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"): + MultiIndex( + levels=levels, + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], + sortorder=2, + ) + + with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"): + MultiIndex( + levels=levels, + codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], + sortorder=1, + ) + + def test_lexsort_depth(self): + # Test that lexsort_depth return the correct sortorder + # when it was given to the MultiIndex const. + # Issue #28518 + + levels = [[0, 1], [0, 1, 2]] + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2 + ) + assert index.lexsort_depth == 2 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1 + ) + assert index.lexsort_depth == 1 + + index = MultiIndex( + levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0 + ) + assert index.lexsort_depth == 0 + + def test_sort_index_and_reconstruction(self): + + # 15622 + # lexsortedness should be identical + # across MultiIndex construction methods + + df = DataFrame([[1, 1], [2, 2]], index=list("ab")) + expected = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex.from_tuples( + [(0.5, "a"), (0.5, "b"), (0.8, "a"), (0.8, "b")] + ), + ) + assert expected.index.is_lexsorted() + + result = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex.from_product([[0.5, 0.8], list("ab")]), + ) + result = result.sort_index() + assert result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + result = DataFrame( + [[1, 1], [2, 2], [1, 1], [2, 2]], + index=MultiIndex( + levels=[[0.5, 0.8], ["a", "b"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] + ), + ) + result = result.sort_index() + assert result.index.is_lexsorted() + + tm.assert_frame_equal(result, expected) + + concatted = pd.concat([df, df], keys=[0.8, 0.5]) + result = concatted.sort_index() + + assert result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + # 14015 + df = DataFrame( + [[1, 2], [6, 7]], + columns=MultiIndex.from_tuples( + [(0, "20160811 12:00:00"), (0, "20160809 12:00:00")], + names=["l1", "Date"], + ), + ) + + df.columns.set_levels( + pd.to_datetime(df.columns.levels[1]), level=1, inplace=True + ) + assert not df.columns.is_lexsorted() + assert not df.columns.is_monotonic + result = df.sort_index(axis=1) + assert result.columns.is_lexsorted() + assert result.columns.is_monotonic + result = df.sort_index(axis=1, level=1) + assert result.columns.is_lexsorted() + assert result.columns.is_monotonic + + def test_sort_index_and_reconstruction_doc_example(self): + # doc example + df = DataFrame( + {"value": [1, 2, 3, 4]}, + index=MultiIndex( + levels=[["a", "b"], ["bb", "aa"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] + ), + ) + assert df.index.is_lexsorted() + assert not df.index.is_monotonic + + # sort it + expected = DataFrame( + {"value": [2, 1, 4, 3]}, + index=MultiIndex( + levels=[["a", "b"], ["aa", "bb"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] + ), + ) + result = df.sort_index() + assert result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + # reconstruct + result = df.sort_index().copy() + result.index = result.index._sort_levels_monotonic() + assert result.index.is_lexsorted() + assert result.index.is_monotonic + + tm.assert_frame_equal(result, expected) + + def test_sort_index_non_existent_label_multiindex(self): + # GH 12261 + df = DataFrame(0, columns=[], index=pd.MultiIndex.from_product([[], []])) + df.loc["b", "2"] = 1 + df.loc["a", "3"] = 1 + result = df.sort_index().index.is_monotonic + assert result is True + + def test_sort_index_reorder_on_ops(self): + # 15687 + df = DataFrame( + np.random.randn(8, 2), + index=MultiIndex.from_product( + [["a", "b"], ["big", "small"], ["red", "blu"]], + names=["letter", "size", "color"], + ), + columns=["near", "far"], + ) + df = df.sort_index() + + def my_func(group): + group.index = ["newz", "newa"] + return group + + result = df.groupby(level=["letter", "size"]).apply(my_func).sort_index() + expected = MultiIndex.from_product( + [["a", "b"], ["big", "small"], ["newa", "newz"]], + names=["letter", "size", None], + ) + + tm.assert_index_equal(result.index, expected) + + def test_sort_non_lexsorted(self): + # degenerate case where we sort but don't + # have a satisfying result :< + # GH 15797 + idx = MultiIndex( + [["A", "B", "C"], ["c", "b", "a"]], [[0, 1, 2, 0, 1, 2], [0, 2, 1, 1, 0, 2]] + ) + + df = DataFrame({"col": range(len(idx))}, index=idx, dtype="int64") + assert df.index.is_lexsorted() is False + assert df.index.is_monotonic is False + + sorted = df.sort_index() + assert sorted.index.is_lexsorted() is True + assert sorted.index.is_monotonic is True + + expected = DataFrame( + {"col": [1, 4, 5, 2]}, + index=MultiIndex.from_tuples( + [("B", "a"), ("B", "c"), ("C", "a"), ("C", "b")] + ), + dtype="int64", + ) + result = sorted.loc[pd.IndexSlice["B":"C", "a":"c"], :] + tm.assert_frame_equal(result, expected) + + def test_sort_index_nan(self): + # GH 14784 + # incorrect sorting w.r.t. nans + tuples = [[12, 13], [np.nan, np.nan], [np.nan, 3], [1, 2]] + mi = MultiIndex.from_tuples(tuples) + + df = DataFrame(np.arange(16).reshape(4, 4), index=mi, columns=list("ABCD")) + s = Series(np.arange(4), index=mi) + + df2 = DataFrame( + { + "date": pd.to_datetime( + [ + "20121002", + "20121007", + "20130130", + "20130202", + "20130305", + "20121002", + "20121207", + "20130130", + "20130202", + "20130305", + "20130202", + "20130305", + ] + ), + "user_id": [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], + "whole_cost": [ + 1790, + np.nan, + 280, + 259, + np.nan, + 623, + 90, + 312, + np.nan, + 301, + 359, + 801, + ], + "cost": [12, 15, 10, 24, 39, 1, 0, np.nan, 45, 34, 1, 12], + } + ).set_index(["date", "user_id"]) + + # sorting frame, default nan position is last + result = df.sort_index() + expected = df.iloc[[3, 0, 2, 1], :] + tm.assert_frame_equal(result, expected) + + # sorting frame, nan position last + result = df.sort_index(na_position="last") + expected = df.iloc[[3, 0, 2, 1], :] + tm.assert_frame_equal(result, expected) + + # sorting frame, nan position first + result = df.sort_index(na_position="first") + expected = df.iloc[[1, 2, 3, 0], :] + tm.assert_frame_equal(result, expected) + + # sorting frame with removed rows + result = df2.dropna().sort_index() + expected = df2.sort_index().dropna() + tm.assert_frame_equal(result, expected) + + # sorting series, default nan position is last + result = s.sort_index() + expected = s.iloc[[3, 0, 2, 1]] + tm.assert_series_equal(result, expected) + + # sorting series, nan position last + result = s.sort_index(na_position="last") + expected = s.iloc[[3, 0, 2, 1]] + tm.assert_series_equal(result, expected) + + # sorting series, nan position first + result = s.sort_index(na_position="first") + expected = s.iloc[[1, 2, 3, 0]] + tm.assert_series_equal(result, expected) + + def test_sort_ascending_list(self): + # GH: 16934 + + # Set up a Series with a three level MultiIndex + arrays = [ + ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], + ["one", "two", "one", "two", "one", "two", "one", "two"], + [4, 3, 2, 1, 4, 3, 2, 1], + ] + tuples = zip(*arrays) + mi = MultiIndex.from_tuples(tuples, names=["first", "second", "third"]) + s = Series(range(8), index=mi) + + # Sort with boolean ascending + result = s.sort_index(level=["third", "first"], ascending=False) + expected = s.iloc[[4, 0, 5, 1, 6, 2, 7, 3]] + tm.assert_series_equal(result, expected) + + # Sort with list of boolean ascending + result = s.sort_index(level=["third", "first"], ascending=[False, True]) + expected = s.iloc[[0, 4, 1, 5, 2, 6, 3, 7]] + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py new file mode 100644 index 00000000..2c5d028e --- /dev/null +++ b/pandas/tests/test_nanops.py @@ -0,0 +1,1074 @@ +from functools import partial +import operator +import warnings + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas.core.dtypes.common import is_integer_dtype + +import pandas as pd +from pandas import Series, isna +import pandas._testing as tm +from pandas.core.arrays import DatetimeArray +import pandas.core.nanops as nanops + +use_bn = nanops._USE_BOTTLENECK +has_c16 = hasattr(np, "complex128") + + +class TestnanopsDataFrame: + def setup_method(self, method): + np.random.seed(11235) + nanops._USE_BOTTLENECK = False + + arr_shape = (11, 7) + + self.arr_float = np.random.randn(*arr_shape) + self.arr_float1 = np.random.randn(*arr_shape) + self.arr_complex = self.arr_float + self.arr_float1 * 1j + self.arr_int = np.random.randint(-10, 10, arr_shape) + self.arr_bool = np.random.randint(0, 2, arr_shape) == 0 + self.arr_str = np.abs(self.arr_float).astype("S") + self.arr_utf = np.abs(self.arr_float).astype("U") + self.arr_date = np.random.randint(0, 20000, arr_shape).astype("M8[ns]") + self.arr_tdelta = np.random.randint(0, 20000, arr_shape).astype("m8[ns]") + + self.arr_nan = np.tile(np.nan, arr_shape) + self.arr_float_nan = np.vstack([self.arr_float, self.arr_nan]) + self.arr_float1_nan = np.vstack([self.arr_float1, self.arr_nan]) + self.arr_nan_float1 = np.vstack([self.arr_nan, self.arr_float1]) + self.arr_nan_nan = np.vstack([self.arr_nan, self.arr_nan]) + + self.arr_inf = self.arr_float * np.inf + self.arr_float_inf = np.vstack([self.arr_float, self.arr_inf]) + + self.arr_nan_inf = np.vstack([self.arr_nan, self.arr_inf]) + self.arr_float_nan_inf = np.vstack([self.arr_float, self.arr_nan, self.arr_inf]) + self.arr_nan_nan_inf = np.vstack([self.arr_nan, self.arr_nan, self.arr_inf]) + self.arr_obj = np.vstack( + [ + self.arr_float.astype("O"), + self.arr_int.astype("O"), + self.arr_bool.astype("O"), + self.arr_complex.astype("O"), + self.arr_str.astype("O"), + self.arr_utf.astype("O"), + self.arr_date.astype("O"), + self.arr_tdelta.astype("O"), + ] + ) + + with np.errstate(invalid="ignore"): + self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j + self.arr_complex_nan = np.vstack([self.arr_complex, self.arr_nan_nanj]) + + self.arr_nan_infj = self.arr_inf * 1j + self.arr_complex_nan_infj = np.vstack([self.arr_complex, self.arr_nan_infj]) + + self.arr_float_2d = self.arr_float + self.arr_float1_2d = self.arr_float1 + + self.arr_nan_2d = self.arr_nan + self.arr_float_nan_2d = self.arr_float_nan + self.arr_float1_nan_2d = self.arr_float1_nan + self.arr_nan_float1_2d = self.arr_nan_float1 + + self.arr_float_1d = self.arr_float[:, 0] + self.arr_float1_1d = self.arr_float1[:, 0] + + self.arr_nan_1d = self.arr_nan[:, 0] + self.arr_float_nan_1d = self.arr_float_nan[:, 0] + self.arr_float1_nan_1d = self.arr_float1_nan[:, 0] + self.arr_nan_float1_1d = self.arr_nan_float1[:, 0] + + def teardown_method(self, method): + nanops._USE_BOTTLENECK = use_bn + + def check_results(self, targ, res, axis, check_dtype=True): + res = getattr(res, "asm8", res) + res = getattr(res, "values", res) + + # timedeltas are a beast here + def _coerce_tds(targ, res): + if hasattr(targ, "dtype") and targ.dtype == "m8[ns]": + if len(targ) == 1: + targ = targ[0].item() + res = res.item() + else: + targ = targ.view("i8") + return targ, res + + try: + if ( + axis != 0 + and hasattr(targ, "shape") + and targ.ndim + and targ.shape != res.shape + ): + res = np.split(res, [targ.shape[0]], axis=0)[0] + except (ValueError, IndexError): + targ, res = _coerce_tds(targ, res) + + try: + tm.assert_almost_equal(targ, res, check_dtype=check_dtype) + except AssertionError: + + # handle timedelta dtypes + if hasattr(targ, "dtype") and targ.dtype == "m8[ns]": + targ, res = _coerce_tds(targ, res) + tm.assert_almost_equal(targ, res, check_dtype=check_dtype) + return + + # There are sometimes rounding errors with + # complex and object dtypes. + # If it isn't one of those, re-raise the error. + if not hasattr(res, "dtype") or res.dtype.kind not in ["c", "O"]: + raise + # convert object dtypes to something that can be split into + # real and imaginary parts + if res.dtype.kind == "O": + if targ.dtype.kind != "O": + res = res.astype(targ.dtype) + else: + cast_dtype = "c16" if has_c16 else "f8" + res = res.astype(cast_dtype) + targ = targ.astype(cast_dtype) + # there should never be a case where numpy returns an object + # but nanops doesn't, so make that an exception + elif targ.dtype.kind == "O": + raise + tm.assert_almost_equal(np.real(targ), np.real(res), check_dtype=check_dtype) + tm.assert_almost_equal(np.imag(targ), np.imag(res), check_dtype=check_dtype) + + def check_fun_data( + self, + testfunc, + targfunc, + testarval, + targarval, + check_dtype=True, + empty_targfunc=None, + **kwargs, + ): + for axis in list(range(targarval.ndim)) + [None]: + for skipna in [False, True]: + targartempval = targarval if skipna else testarval + if skipna and empty_targfunc and isna(targartempval).all(): + targ = empty_targfunc(targartempval, axis=axis, **kwargs) + else: + targ = targfunc(targartempval, axis=axis, **kwargs) + + res = testfunc(testarval, axis=axis, skipna=skipna, **kwargs) + self.check_results(targ, res, axis, check_dtype=check_dtype) + if skipna: + res = testfunc(testarval, axis=axis, **kwargs) + self.check_results(targ, res, axis, check_dtype=check_dtype) + if axis is None: + res = testfunc(testarval, skipna=skipna, **kwargs) + self.check_results(targ, res, axis, check_dtype=check_dtype) + if skipna and axis is None: + res = testfunc(testarval, **kwargs) + self.check_results(targ, res, axis, check_dtype=check_dtype) + + if testarval.ndim <= 1: + return + + # Recurse on lower-dimension + testarval2 = np.take(testarval, 0, axis=-1) + targarval2 = np.take(targarval, 0, axis=-1) + self.check_fun_data( + testfunc, + targfunc, + testarval2, + targarval2, + check_dtype=check_dtype, + empty_targfunc=empty_targfunc, + **kwargs, + ) + + def check_fun(self, testfunc, targfunc, testar, empty_targfunc=None, **kwargs): + + targar = testar + if testar.endswith("_nan") and hasattr(self, testar[:-4]): + targar = testar[:-4] + + testarval = getattr(self, testar) + targarval = getattr(self, targar) + self.check_fun_data( + testfunc, + targfunc, + testarval, + targarval, + empty_targfunc=empty_targfunc, + **kwargs, + ) + + def check_funs( + self, + testfunc, + targfunc, + allow_complex=True, + allow_all_nan=True, + allow_date=True, + allow_tdelta=True, + allow_obj=True, + **kwargs, + ): + self.check_fun(testfunc, targfunc, "arr_float", **kwargs) + self.check_fun(testfunc, targfunc, "arr_float_nan", **kwargs) + self.check_fun(testfunc, targfunc, "arr_int", **kwargs) + self.check_fun(testfunc, targfunc, "arr_bool", **kwargs) + objs = [ + self.arr_float.astype("O"), + self.arr_int.astype("O"), + self.arr_bool.astype("O"), + ] + + if allow_all_nan: + self.check_fun(testfunc, targfunc, "arr_nan", **kwargs) + + if allow_complex: + self.check_fun(testfunc, targfunc, "arr_complex", **kwargs) + self.check_fun(testfunc, targfunc, "arr_complex_nan", **kwargs) + if allow_all_nan: + self.check_fun(testfunc, targfunc, "arr_nan_nanj", **kwargs) + objs += [self.arr_complex.astype("O")] + + if allow_date: + targfunc(self.arr_date) + self.check_fun(testfunc, targfunc, "arr_date", **kwargs) + objs += [self.arr_date.astype("O")] + + if allow_tdelta: + try: + targfunc(self.arr_tdelta) + except TypeError: + pass + else: + self.check_fun(testfunc, targfunc, "arr_tdelta", **kwargs) + objs += [self.arr_tdelta.astype("O")] + + if allow_obj: + self.arr_obj = np.vstack(objs) + # some nanops handle object dtypes better than their numpy + # counterparts, so the numpy functions need to be given something + # else + if allow_obj == "convert": + targfunc = partial( + self._badobj_wrap, func=targfunc, allow_complex=allow_complex + ) + self.check_fun(testfunc, targfunc, "arr_obj", **kwargs) + + def _badobj_wrap(self, value, func, allow_complex=True, **kwargs): + if value.dtype.kind == "O": + if allow_complex: + value = value.astype("c16") + else: + value = value.astype("f8") + return func(value, **kwargs) + + @pytest.mark.parametrize( + "nan_op,np_op", [(nanops.nanany, np.any), (nanops.nanall, np.all)] + ) + def test_nan_funcs(self, nan_op, np_op): + # TODO: allow tdelta, doesn't break tests + self.check_funs( + nan_op, np_op, allow_all_nan=False, allow_date=False, allow_tdelta=False + ) + + def test_nansum(self): + self.check_funs( + nanops.nansum, + np.sum, + allow_date=False, + check_dtype=False, + empty_targfunc=np.nansum, + ) + + def test_nanmean(self): + self.check_funs( + nanops.nanmean, + np.mean, + allow_complex=False, # TODO: allow this, doesn't break test + allow_obj=False, + allow_date=False, + ) + + def test_nanmean_overflow(self): + # GH 10155 + # In the previous implementation mean can overflow for int dtypes, it + # is now consistent with numpy + + for a in [2 ** 55, -(2 ** 55), 20150515061816532]: + s = Series(a, index=range(500), dtype=np.int64) + result = s.mean() + np_result = s.values.mean() + assert result == a + assert result == np_result + assert result.dtype == np.float64 + + @pytest.mark.parametrize( + "dtype", + [ + np.int16, + np.int32, + np.int64, + np.float32, + np.float64, + getattr(np, "float128", None), + ], + ) + def test_returned_dtype(self, dtype): + if dtype is None: + # no float128 available + return + + s = Series(range(10), dtype=dtype) + group_a = ["mean", "std", "var", "skew", "kurt"] + group_b = ["min", "max"] + for method in group_a + group_b: + result = getattr(s, method)() + if is_integer_dtype(dtype) and method in group_a: + assert result.dtype == np.float64 + else: + assert result.dtype == dtype + + def test_nanmedian(self): + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + self.check_funs( + nanops.nanmedian, + np.median, + allow_complex=False, + allow_date=False, + allow_obj="convert", + ) + + @pytest.mark.parametrize("ddof", range(3)) + def test_nanvar(self, ddof): + self.check_funs( + nanops.nanvar, + np.var, + allow_complex=False, + allow_date=False, + allow_obj="convert", + ddof=ddof, + ) + + @pytest.mark.parametrize("ddof", range(3)) + def test_nanstd(self, ddof): + self.check_funs( + nanops.nanstd, + np.std, + allow_complex=False, + allow_date=False, + allow_obj="convert", + ddof=ddof, + ) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("ddof", range(3)) + def test_nansem(self, ddof): + from scipy.stats import sem + + with np.errstate(invalid="ignore"): + self.check_funs( + nanops.nansem, + sem, + allow_complex=False, + allow_date=False, + allow_tdelta=False, + allow_obj="convert", + ddof=ddof, + ) + + @pytest.mark.parametrize( + "nan_op,np_op", [(nanops.nanmin, np.min), (nanops.nanmax, np.max)] + ) + def test_nanops_with_warnings(self, nan_op, np_op): + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + self.check_funs(nan_op, np_op, allow_obj=False) + + def _argminmax_wrap(self, value, axis=None, func=None): + res = func(value, axis) + nans = np.min(value, axis) + nullnan = isna(nans) + if res.ndim: + res[nullnan] = -1 + elif ( + hasattr(nullnan, "all") + and nullnan.all() + or not hasattr(nullnan, "all") + and nullnan + ): + res = -1 + return res + + def test_nanargmax(self): + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + func = partial(self._argminmax_wrap, func=np.argmax) + self.check_funs(nanops.nanargmax, func, allow_obj=False) + + def test_nanargmin(self): + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + func = partial(self._argminmax_wrap, func=np.argmin) + self.check_funs(nanops.nanargmin, func, allow_obj=False) + + def _skew_kurt_wrap(self, values, axis=None, func=None): + if not isinstance(values.dtype.type, np.floating): + values = values.astype("f8") + result = func(values, axis=axis, bias=False) + # fix for handling cases where all elements in an axis are the same + if isinstance(result, np.ndarray): + result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0 + return result + elif np.max(values) == np.min(values): + return 0.0 + return result + + @td.skip_if_no_scipy + def test_nanskew(self): + from scipy.stats import skew + + func = partial(self._skew_kurt_wrap, func=skew) + with np.errstate(invalid="ignore"): + self.check_funs( + nanops.nanskew, + func, + allow_complex=False, + allow_date=False, + allow_tdelta=False, + ) + + @td.skip_if_no_scipy + def test_nankurt(self): + from scipy.stats import kurtosis + + func1 = partial(kurtosis, fisher=True) + func = partial(self._skew_kurt_wrap, func=func1) + with np.errstate(invalid="ignore"): + self.check_funs( + nanops.nankurt, + func, + allow_complex=False, + allow_date=False, + allow_tdelta=False, + ) + + def test_nanprod(self): + self.check_funs( + nanops.nanprod, + np.prod, + allow_date=False, + allow_tdelta=False, + empty_targfunc=np.nanprod, + ) + + def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs): + res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs) + res01 = checkfun( + self.arr_float_2d, + self.arr_float1_2d, + min_periods=len(self.arr_float_2d) - 1, + **kwargs, + ) + tm.assert_almost_equal(targ0, res00) + tm.assert_almost_equal(targ0, res01) + + res10 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d, **kwargs) + res11 = checkfun( + self.arr_float_nan_2d, + self.arr_float1_nan_2d, + min_periods=len(self.arr_float_2d) - 1, + **kwargs, + ) + tm.assert_almost_equal(targ1, res10) + tm.assert_almost_equal(targ1, res11) + + targ2 = np.nan + res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, **kwargs) + res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, **kwargs) + res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, **kwargs) + res23 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d, **kwargs) + res24 = checkfun( + self.arr_float_nan_2d, + self.arr_nan_float1_2d, + min_periods=len(self.arr_float_2d) - 1, + **kwargs, + ) + res25 = checkfun( + self.arr_float_2d, + self.arr_float1_2d, + min_periods=len(self.arr_float_2d) + 1, + **kwargs, + ) + tm.assert_almost_equal(targ2, res20) + tm.assert_almost_equal(targ2, res21) + tm.assert_almost_equal(targ2, res22) + tm.assert_almost_equal(targ2, res23) + tm.assert_almost_equal(targ2, res24) + tm.assert_almost_equal(targ2, res25) + + def check_nancorr_nancov_1d(self, checkfun, targ0, targ1, **kwargs): + res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, **kwargs) + res01 = checkfun( + self.arr_float_1d, + self.arr_float1_1d, + min_periods=len(self.arr_float_1d) - 1, + **kwargs, + ) + tm.assert_almost_equal(targ0, res00) + tm.assert_almost_equal(targ0, res01) + + res10 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d, **kwargs) + res11 = checkfun( + self.arr_float_nan_1d, + self.arr_float1_nan_1d, + min_periods=len(self.arr_float_1d) - 1, + **kwargs, + ) + tm.assert_almost_equal(targ1, res10) + tm.assert_almost_equal(targ1, res11) + + targ2 = np.nan + res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, **kwargs) + res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, **kwargs) + res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, **kwargs) + res23 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d, **kwargs) + res24 = checkfun( + self.arr_float_nan_1d, + self.arr_nan_float1_1d, + min_periods=len(self.arr_float_1d) - 1, + **kwargs, + ) + res25 = checkfun( + self.arr_float_1d, + self.arr_float1_1d, + min_periods=len(self.arr_float_1d) + 1, + **kwargs, + ) + tm.assert_almost_equal(targ2, res20) + tm.assert_almost_equal(targ2, res21) + tm.assert_almost_equal(targ2, res22) + tm.assert_almost_equal(targ2, res23) + tm.assert_almost_equal(targ2, res24) + tm.assert_almost_equal(targ2, res25) + + def test_nancorr(self): + targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1] + targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] + self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1) + targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1] + targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1] + self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson") + + def test_nancorr_pearson(self): + targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1] + targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] + self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="pearson") + targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1] + targ1 = np.corrcoef(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1] + self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="pearson") + + @td.skip_if_no_scipy + def test_nancorr_kendall(self): + from scipy.stats import kendalltau + + targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0] + targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] + self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="kendall") + targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0] + targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0] + self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="kendall") + + @td.skip_if_no_scipy + def test_nancorr_spearman(self): + from scipy.stats import spearmanr + + targ0 = spearmanr(self.arr_float_2d, self.arr_float1_2d)[0] + targ1 = spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0] + self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1, method="spearman") + targ0 = spearmanr(self.arr_float_1d, self.arr_float1_1d)[0] + targ1 = spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0] + self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="spearman") + + @td.skip_if_no_scipy + def test_invalid_method(self): + targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1] + targ1 = np.corrcoef(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] + msg = "Unkown method 'foo', expected one of 'kendall', 'spearman'" + with pytest.raises(ValueError, match=msg): + self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1, method="foo") + + def test_nancov(self): + targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1] + targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] + self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1) + targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1] + targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1] + self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1) + + def check_nancomp(self, checkfun, targ0): + arr_float = self.arr_float + arr_float1 = self.arr_float1 + arr_nan = self.arr_nan + arr_nan_nan = self.arr_nan_nan + arr_float_nan = self.arr_float_nan + arr_float1_nan = self.arr_float1_nan + arr_nan_float1 = self.arr_nan_float1 + + while targ0.ndim: + res0 = checkfun(arr_float, arr_float1) + tm.assert_almost_equal(targ0, res0) + + if targ0.ndim > 1: + targ1 = np.vstack([targ0, arr_nan]) + else: + targ1 = np.hstack([targ0, arr_nan]) + res1 = checkfun(arr_float_nan, arr_float1_nan) + tm.assert_numpy_array_equal(targ1, res1, check_dtype=False) + + targ2 = arr_nan_nan + res2 = checkfun(arr_float_nan, arr_nan_float1) + tm.assert_numpy_array_equal(targ2, res2, check_dtype=False) + + # Lower dimension for next step in the loop + arr_float = np.take(arr_float, 0, axis=-1) + arr_float1 = np.take(arr_float1, 0, axis=-1) + arr_nan = np.take(arr_nan, 0, axis=-1) + arr_nan_nan = np.take(arr_nan_nan, 0, axis=-1) + arr_float_nan = np.take(arr_float_nan, 0, axis=-1) + arr_float1_nan = np.take(arr_float1_nan, 0, axis=-1) + arr_nan_float1 = np.take(arr_nan_float1, 0, axis=-1) + targ0 = np.take(targ0, 0, axis=-1) + + @pytest.mark.parametrize( + "op,nanop", + [ + (operator.eq, nanops.naneq), + (operator.ne, nanops.nanne), + (operator.gt, nanops.nangt), + (operator.ge, nanops.nange), + (operator.lt, nanops.nanlt), + (operator.le, nanops.nanle), + ], + ) + def test_nan_comparison(self, op, nanop): + targ0 = op(self.arr_float, self.arr_float1) + self.check_nancomp(nanop, targ0) + + def check_bool(self, func, value, correct): + while getattr(value, "ndim", True): + res0 = func(value) + if correct: + assert res0 + else: + assert not res0 + + if not hasattr(value, "ndim"): + break + + # Reduce dimension for next step in the loop + value = np.take(value, 0, axis=-1) + + def test__has_infs(self): + pairs = [ + ("arr_complex", False), + ("arr_int", False), + ("arr_bool", False), + ("arr_str", False), + ("arr_utf", False), + ("arr_complex", False), + ("arr_complex_nan", False), + ("arr_nan_nanj", False), + ("arr_nan_infj", True), + ("arr_complex_nan_infj", True), + ] + pairs_float = [ + ("arr_float", False), + ("arr_nan", False), + ("arr_float_nan", False), + ("arr_nan_nan", False), + ("arr_float_inf", True), + ("arr_inf", True), + ("arr_nan_inf", True), + ("arr_float_nan_inf", True), + ("arr_nan_nan_inf", True), + ] + + for arr, correct in pairs: + val = getattr(self, arr) + self.check_bool(nanops._has_infs, val, correct) + + for arr, correct in pairs_float: + val = getattr(self, arr) + self.check_bool(nanops._has_infs, val, correct) + self.check_bool(nanops._has_infs, val.astype("f4"), correct) + self.check_bool(nanops._has_infs, val.astype("f2"), correct) + + def test__bn_ok_dtype(self): + assert nanops._bn_ok_dtype(self.arr_float.dtype, "test") + assert nanops._bn_ok_dtype(self.arr_complex.dtype, "test") + assert nanops._bn_ok_dtype(self.arr_int.dtype, "test") + assert nanops._bn_ok_dtype(self.arr_bool.dtype, "test") + assert nanops._bn_ok_dtype(self.arr_str.dtype, "test") + assert nanops._bn_ok_dtype(self.arr_utf.dtype, "test") + assert not nanops._bn_ok_dtype(self.arr_date.dtype, "test") + assert not nanops._bn_ok_dtype(self.arr_tdelta.dtype, "test") + assert not nanops._bn_ok_dtype(self.arr_obj.dtype, "test") + + +class TestEnsureNumeric: + def test_numeric_values(self): + # Test integer + assert nanops._ensure_numeric(1) == 1 + + # Test float + assert nanops._ensure_numeric(1.1) == 1.1 + + # Test complex + assert nanops._ensure_numeric(1 + 2j) == 1 + 2j + + def test_ndarray(self): + # Test numeric ndarray + values = np.array([1, 2, 3]) + assert np.allclose(nanops._ensure_numeric(values), values) + + # Test object ndarray + o_values = values.astype(object) + assert np.allclose(nanops._ensure_numeric(o_values), values) + + # Test convertible string ndarray + s_values = np.array(["1", "2", "3"], dtype=object) + assert np.allclose(nanops._ensure_numeric(s_values), values) + + # Test non-convertible string ndarray + s_values = np.array(["foo", "bar", "baz"], dtype=object) + msg = r"could not convert string to float: '(foo|baz)'" + with pytest.raises(ValueError, match=msg): + nanops._ensure_numeric(s_values) + + def test_convertable_values(self): + assert np.allclose(nanops._ensure_numeric("1"), 1.0) + assert np.allclose(nanops._ensure_numeric("1.1"), 1.1) + assert np.allclose(nanops._ensure_numeric("1+1j"), 1 + 1j) + + def test_non_convertable_values(self): + msg = "Could not convert foo to numeric" + with pytest.raises(TypeError, match=msg): + nanops._ensure_numeric("foo") + + # with the wrong type, python raises TypeError for us + msg = "argument must be a string or a number" + with pytest.raises(TypeError, match=msg): + nanops._ensure_numeric({}) + with pytest.raises(TypeError, match=msg): + nanops._ensure_numeric([]) + + +class TestNanvarFixedValues: + + # xref GH10242 + + def setup_method(self, method): + # Samples from a normal distribution. + self.variance = variance = 3.0 + self.samples = self.prng.normal(scale=variance ** 0.5, size=100000) + + def test_nanvar_all_finite(self): + samples = self.samples + actual_variance = nanops.nanvar(samples) + tm.assert_almost_equal(actual_variance, self.variance, check_less_precise=2) + + def test_nanvar_nans(self): + samples = np.nan * np.ones(2 * self.samples.shape[0]) + samples[::2] = self.samples + + actual_variance = nanops.nanvar(samples, skipna=True) + tm.assert_almost_equal(actual_variance, self.variance, check_less_precise=2) + + actual_variance = nanops.nanvar(samples, skipna=False) + tm.assert_almost_equal(actual_variance, np.nan, check_less_precise=2) + + def test_nanstd_nans(self): + samples = np.nan * np.ones(2 * self.samples.shape[0]) + samples[::2] = self.samples + + actual_std = nanops.nanstd(samples, skipna=True) + tm.assert_almost_equal(actual_std, self.variance ** 0.5, check_less_precise=2) + + actual_std = nanops.nanvar(samples, skipna=False) + tm.assert_almost_equal(actual_std, np.nan, check_less_precise=2) + + def test_nanvar_axis(self): + # Generate some sample data. + samples_norm = self.samples + samples_unif = self.prng.uniform(size=samples_norm.shape[0]) + samples = np.vstack([samples_norm, samples_unif]) + + actual_variance = nanops.nanvar(samples, axis=1) + tm.assert_almost_equal( + actual_variance, np.array([self.variance, 1.0 / 12]), check_less_precise=2 + ) + + def test_nanvar_ddof(self): + n = 5 + samples = self.prng.uniform(size=(10000, n + 1)) + samples[:, -1] = np.nan # Force use of our own algorithm. + + variance_0 = nanops.nanvar(samples, axis=1, skipna=True, ddof=0).mean() + variance_1 = nanops.nanvar(samples, axis=1, skipna=True, ddof=1).mean() + variance_2 = nanops.nanvar(samples, axis=1, skipna=True, ddof=2).mean() + + # The unbiased estimate. + var = 1.0 / 12 + tm.assert_almost_equal(variance_1, var, check_less_precise=2) + + # The underestimated variance. + tm.assert_almost_equal(variance_0, (n - 1.0) / n * var, check_less_precise=2) + + # The overestimated variance. + tm.assert_almost_equal( + variance_2, (n - 1.0) / (n - 2.0) * var, check_less_precise=2 + ) + + def test_ground_truth(self): + # Test against values that were precomputed with Numpy. + samples = np.empty((4, 4)) + samples[:3, :3] = np.array( + [ + [0.97303362, 0.21869576, 0.55560287], + [0.72980153, 0.03109364, 0.99155171], + [0.09317602, 0.60078248, 0.15871292], + ] + ) + samples[3] = samples[:, 3] = np.nan + + # Actual variances along axis=0, 1 for ddof=0, 1, 2 + variance = np.array( + [ + [ + [0.13762259, 0.05619224, 0.11568816], + [0.20643388, 0.08428837, 0.17353224], + [0.41286776, 0.16857673, 0.34706449], + ], + [ + [0.09519783, 0.16435395, 0.05082054], + [0.14279674, 0.24653093, 0.07623082], + [0.28559348, 0.49306186, 0.15246163], + ], + ] + ) + + # Test nanvar. + for axis in range(2): + for ddof in range(3): + var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof) + tm.assert_almost_equal(var[:3], variance[axis, ddof]) + assert np.isnan(var[3]) + + # Test nanstd. + for axis in range(2): + for ddof in range(3): + std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof) + tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5) + assert np.isnan(std[3]) + + def test_nanstd_roundoff(self): + # Regression test for GH 10242 (test data taken from GH 10489). Ensure + # that variance is stable. + data = Series(766897346 * np.ones(10)) + for ddof in range(3): + result = data.std(ddof=ddof) + assert result == 0.0 + + @property + def prng(self): + return np.random.RandomState(1234) + + +class TestNanskewFixedValues: + + # xref GH 11974 + + def setup_method(self, method): + # Test data + skewness value (computed with scipy.stats.skew) + self.samples = np.sin(np.linspace(0, 1, 200)) + self.actual_skew = -0.1875895205961754 + + def test_constant_series(self): + # xref GH 11974 + for val in [3075.2, 3075.3, 3075.5]: + data = val * np.ones(300) + skew = nanops.nanskew(data) + assert skew == 0.0 + + def test_all_finite(self): + alpha, beta = 0.3, 0.1 + left_tailed = self.prng.beta(alpha, beta, size=100) + assert nanops.nanskew(left_tailed) < 0 + + alpha, beta = 0.1, 0.3 + right_tailed = self.prng.beta(alpha, beta, size=100) + assert nanops.nanskew(right_tailed) > 0 + + def test_ground_truth(self): + skew = nanops.nanskew(self.samples) + tm.assert_almost_equal(skew, self.actual_skew) + + def test_axis(self): + samples = np.vstack([self.samples, np.nan * np.ones(len(self.samples))]) + skew = nanops.nanskew(samples, axis=1) + tm.assert_almost_equal(skew, np.array([self.actual_skew, np.nan])) + + def test_nans(self): + samples = np.hstack([self.samples, np.nan]) + skew = nanops.nanskew(samples, skipna=False) + assert np.isnan(skew) + + def test_nans_skipna(self): + samples = np.hstack([self.samples, np.nan]) + skew = nanops.nanskew(samples, skipna=True) + tm.assert_almost_equal(skew, self.actual_skew) + + @property + def prng(self): + return np.random.RandomState(1234) + + +class TestNankurtFixedValues: + + # xref GH 11974 + + def setup_method(self, method): + # Test data + kurtosis value (computed with scipy.stats.kurtosis) + self.samples = np.sin(np.linspace(0, 1, 200)) + self.actual_kurt = -1.2058303433799713 + + def test_constant_series(self): + # xref GH 11974 + for val in [3075.2, 3075.3, 3075.5]: + data = val * np.ones(300) + kurt = nanops.nankurt(data) + assert kurt == 0.0 + + def test_all_finite(self): + alpha, beta = 0.3, 0.1 + left_tailed = self.prng.beta(alpha, beta, size=100) + assert nanops.nankurt(left_tailed) < 0 + + alpha, beta = 0.1, 0.3 + right_tailed = self.prng.beta(alpha, beta, size=100) + assert nanops.nankurt(right_tailed) > 0 + + def test_ground_truth(self): + kurt = nanops.nankurt(self.samples) + tm.assert_almost_equal(kurt, self.actual_kurt) + + def test_axis(self): + samples = np.vstack([self.samples, np.nan * np.ones(len(self.samples))]) + kurt = nanops.nankurt(samples, axis=1) + tm.assert_almost_equal(kurt, np.array([self.actual_kurt, np.nan])) + + def test_nans(self): + samples = np.hstack([self.samples, np.nan]) + kurt = nanops.nankurt(samples, skipna=False) + assert np.isnan(kurt) + + def test_nans_skipna(self): + samples = np.hstack([self.samples, np.nan]) + kurt = nanops.nankurt(samples, skipna=True) + tm.assert_almost_equal(kurt, self.actual_kurt) + + @property + def prng(self): + return np.random.RandomState(1234) + + +class TestDatetime64NaNOps: + @pytest.mark.parametrize("tz", [None, "UTC"]) + @pytest.mark.xfail(reason="disabled") + # Enabling mean changes the behavior of DataFrame.mean + # See https://github.com/pandas-dev/pandas/issues/24752 + def test_nanmean(self, tz): + dti = pd.date_range("2016-01-01", periods=3, tz=tz) + expected = dti[1] + + for obj in [dti, DatetimeArray(dti), Series(dti)]: + result = nanops.nanmean(obj) + assert result == expected + + dti2 = dti.insert(1, pd.NaT) + + for obj in [dti2, DatetimeArray(dti2), Series(dti2)]: + result = nanops.nanmean(obj) + assert result == expected + + +def test_use_bottleneck(): + + if nanops._BOTTLENECK_INSTALLED: + + pd.set_option("use_bottleneck", True) + assert pd.get_option("use_bottleneck") + + pd.set_option("use_bottleneck", False) + assert not pd.get_option("use_bottleneck") + + pd.set_option("use_bottleneck", use_bn) + + +@pytest.mark.parametrize( + "numpy_op, expected", + [ + (np.sum, 10), + (np.nansum, 10), + (np.mean, 2.5), + (np.nanmean, 2.5), + (np.median, 2.5), + (np.nanmedian, 2.5), + (np.min, 1), + (np.max, 4), + (np.nanmin, 1), + (np.nanmax, 4), + ], +) +def test_numpy_ops(numpy_op, expected): + # GH8383 + result = numpy_op(pd.Series([1, 2, 3, 4])) + assert result == expected + + +@pytest.mark.parametrize( + "operation", + [ + nanops.nanany, + nanops.nanall, + nanops.nansum, + nanops.nanmean, + nanops.nanmedian, + nanops.nanstd, + nanops.nanvar, + nanops.nansem, + nanops.nanargmax, + nanops.nanargmin, + nanops.nanmax, + nanops.nanmin, + nanops.nanskew, + nanops.nankurt, + nanops.nanprod, + ], +) +def test_nanops_independent_of_mask_param(operation): + # GH22764 + s = pd.Series([1, 2, np.nan, 3, np.nan, 4]) + mask = s.isna() + median_expected = operation(s) + median_result = operation(s, mask=mask) + assert median_expected == median_result diff --git a/pandas/tests/test_optional_dependency.py b/pandas/tests/test_optional_dependency.py new file mode 100644 index 00000000..ce527214 --- /dev/null +++ b/pandas/tests/test_optional_dependency.py @@ -0,0 +1,52 @@ +import sys +import types + +import pytest + +from pandas.compat._optional import VERSIONS, import_optional_dependency + +import pandas._testing as tm + + +def test_import_optional(): + match = "Missing .*notapackage.* pip .* conda .* notapackage" + with pytest.raises(ImportError, match=match): + import_optional_dependency("notapackage") + + result = import_optional_dependency("notapackage", raise_on_missing=False) + assert result is None + + +def test_xlrd_version_fallback(): + pytest.importorskip("xlrd") + import_optional_dependency("xlrd") + + +def test_bad_version(): + name = "fakemodule" + module = types.ModuleType(name) + module.__version__ = "0.9.0" + sys.modules[name] = module + VERSIONS[name] = "1.0.0" + + match = "Pandas requires .*1.0.0.* of .fakemodule.*'0.9.0'" + with pytest.raises(ImportError, match=match): + import_optional_dependency("fakemodule") + + with tm.assert_produces_warning(UserWarning): + result = import_optional_dependency("fakemodule", on_version="warn") + assert result is None + + module.__version__ = "1.0.0" # exact match is OK + result = import_optional_dependency("fakemodule") + assert result is module + + +def test_no_version_raises(): + name = "fakemodule" + module = types.ModuleType(name) + sys.modules[name] = module + VERSIONS[name] = "1.0.0" + + with pytest.raises(ImportError, match="Can't determine .* fakemodule"): + import_optional_dependency(name) diff --git a/pandas/tests/test_register_accessor.py b/pandas/tests/test_register_accessor.py new file mode 100644 index 00000000..08a55818 --- /dev/null +++ b/pandas/tests/test_register_accessor.py @@ -0,0 +1,92 @@ +import contextlib + +import pytest + +import pandas as pd +import pandas._testing as tm + + +@contextlib.contextmanager +def ensure_removed(obj, attr): + """Ensure that an attribute added to 'obj' during the test is + removed when we're done""" + try: + yield + finally: + try: + delattr(obj, attr) + except AttributeError: + pass + obj._accessors.discard(attr) + + +class MyAccessor: + def __init__(self, obj): + self.obj = obj + self.item = "item" + + @property + def prop(self): + return self.item + + def method(self): + return self.item + + +@pytest.mark.parametrize( + "obj, registrar", + [ + (pd.Series, pd.api.extensions.register_series_accessor), + (pd.DataFrame, pd.api.extensions.register_dataframe_accessor), + (pd.Index, pd.api.extensions.register_index_accessor), + ], +) +def test_register(obj, registrar): + with ensure_removed(obj, "mine"): + before = set(dir(obj)) + registrar("mine")(MyAccessor) + o = obj([]) if obj is not pd.Series else obj([], dtype=object) + assert o.mine.prop == "item" + after = set(dir(obj)) + assert (before ^ after) == {"mine"} + assert "mine" in obj._accessors + + +def test_accessor_works(): + with ensure_removed(pd.Series, "mine"): + pd.api.extensions.register_series_accessor("mine")(MyAccessor) + + s = pd.Series([1, 2]) + assert s.mine.obj is s + + assert s.mine.prop == "item" + assert s.mine.method() == "item" + + +def test_overwrite_warns(): + # Need to restore mean + mean = pd.Series.mean + try: + with tm.assert_produces_warning(UserWarning) as w: + pd.api.extensions.register_series_accessor("mean")(MyAccessor) + s = pd.Series([1, 2]) + assert s.mean.prop == "item" + msg = str(w[0].message) + assert "mean" in msg + assert "MyAccessor" in msg + assert "Series" in msg + finally: + pd.Series.mean = mean + + +def test_raises_attribute_error(): + + with ensure_removed(pd.Series, "bad"): + + @pd.api.extensions.register_series_accessor("bad") + class Bad: + def __init__(self, data): + raise AttributeError("whoops") + + with pytest.raises(AttributeError, match="whoops"): + pd.Series([], dtype=object).bad diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py new file mode 100644 index 00000000..98297474 --- /dev/null +++ b/pandas/tests/test_sorting.py @@ -0,0 +1,454 @@ +from collections import defaultdict +from datetime import datetime +from itertools import product + +import numpy as np +import pytest + +from pandas import DataFrame, MultiIndex, Series, array, concat, merge +import pandas._testing as tm +from pandas.core.algorithms import safe_sort +import pandas.core.common as com +from pandas.core.sorting import ( + decons_group_index, + get_group_index, + is_int64_overflow_possible, + lexsort_indexer, + nargsort, +) + + +class TestSorting: + @pytest.mark.slow + def test_int64_overflow(self): + + B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500))) + A = np.arange(2500) + df = DataFrame( + { + "A": A, + "B": B, + "C": A, + "D": B, + "E": A, + "F": B, + "G": A, + "H": B, + "values": np.random.randn(2500), + } + ) + + lg = df.groupby(["A", "B", "C", "D", "E", "F", "G", "H"]) + rg = df.groupby(["H", "G", "F", "E", "D", "C", "B", "A"]) + + left = lg.sum()["values"] + right = rg.sum()["values"] + + exp_index, _ = left.index.sortlevel() + tm.assert_index_equal(left.index, exp_index) + + exp_index, _ = right.index.sortlevel(0) + tm.assert_index_equal(right.index, exp_index) + + tups = list(map(tuple, df[["A", "B", "C", "D", "E", "F", "G", "H"]].values)) + tups = com.asarray_tuplesafe(tups) + + expected = df.groupby(tups).sum()["values"] + + for k, v in expected.items(): + assert left[k] == right[k[::-1]] + assert left[k] == v + assert len(left) == len(right) + + def test_int64_overflow_moar(self): + + # GH9096 + values = range(55109) + data = DataFrame.from_dict({"a": values, "b": values, "c": values, "d": values}) + grouped = data.groupby(["a", "b", "c", "d"]) + assert len(grouped) == len(values) + + arr = np.random.randint(-1 << 12, 1 << 12, (1 << 15, 5)) + i = np.random.choice(len(arr), len(arr) * 4) + arr = np.vstack((arr, arr[i])) # add sume duplicate rows + + i = np.random.permutation(len(arr)) + arr = arr[i] # shuffle rows + + df = DataFrame(arr, columns=list("abcde")) + df["jim"], df["joe"] = np.random.randn(2, len(df)) * 10 + gr = df.groupby(list("abcde")) + + # verify this is testing what it is supposed to test! + assert is_int64_overflow_possible(gr.grouper.shape) + + # manually compute groupings + jim, joe = defaultdict(list), defaultdict(list) + for key, a, b in zip(map(tuple, arr), df["jim"], df["joe"]): + jim[key].append(a) + joe[key].append(b) + + assert len(gr) == len(jim) + mi = MultiIndex.from_tuples(jim.keys(), names=list("abcde")) + + def aggr(func): + f = lambda a: np.fromiter(map(func, a), dtype="f8") + arr = np.vstack((f(jim.values()), f(joe.values()))).T + res = DataFrame(arr, columns=["jim", "joe"], index=mi) + return res.sort_index() + + tm.assert_frame_equal(gr.mean(), aggr(np.mean)) + tm.assert_frame_equal(gr.median(), aggr(np.median)) + + def test_lexsort_indexer(self): + keys = [[np.nan] * 5 + list(range(100)) + [np.nan] * 5] + # orders=True, na_position='last' + result = lexsort_indexer(keys, orders=True, na_position="last") + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=True, na_position='first' + result = lexsort_indexer(keys, orders=True, na_position="first") + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=False, na_position='last' + result = lexsort_indexer(keys, orders=False, na_position="last") + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + # orders=False, na_position='first' + result = lexsort_indexer(keys, orders=False, na_position="first") + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.intp)) + + def test_nargsort(self): + # np.argsort(items) places NaNs last + items = [np.nan] * 5 + list(range(100)) + [np.nan] * 5 + # np.argsort(items2) may not place NaNs first + items2 = np.array(items, dtype="O") + + # mergesort is the most difficult to get right because we want it to be + # stable. + + # According to numpy/core/tests/test_multiarray, """The number of + # sorted items must be greater than ~50 to check the actual algorithm + # because quick and merge sort fall over to insertion sort for small + # arrays.""" + + # mergesort, ascending=True, na_position='last' + result = nargsort(items, kind="mergesort", ascending=True, na_position="last") + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='first' + result = nargsort(items, kind="mergesort", ascending=True, na_position="first") + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='last' + result = nargsort(items, kind="mergesort", ascending=False, na_position="last") + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='first' + result = nargsort(items, kind="mergesort", ascending=False, na_position="first") + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='last' + result = nargsort(items2, kind="mergesort", ascending=True, na_position="last") + exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=True, na_position='first' + result = nargsort(items2, kind="mergesort", ascending=True, na_position="first") + exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='last' + result = nargsort(items2, kind="mergesort", ascending=False, na_position="last") + exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + # mergesort, ascending=False, na_position='first' + result = nargsort( + items2, kind="mergesort", ascending=False, na_position="first" + ) + exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) + + +class TestMerge: + @pytest.mark.slow + def test_int64_overflow_issues(self): + + # #2690, combinatorial explosion + df1 = DataFrame(np.random.randn(1000, 7), columns=list("ABCDEF") + ["G1"]) + df2 = DataFrame(np.random.randn(1000, 7), columns=list("ABCDEF") + ["G2"]) + + # it works! + result = merge(df1, df2, how="outer") + assert len(result) == 2000 + + low, high, n = -1 << 10, 1 << 10, 1 << 20 + left = DataFrame(np.random.randint(low, high, (n, 7)), columns=list("ABCDEFG")) + left["left"] = left.sum(axis=1) + + # one-2-one match + i = np.random.permutation(len(left)) + right = left.iloc[i].copy() + right.columns = right.columns[:-1].tolist() + ["right"] + right.index = np.arange(len(right)) + right["right"] *= -1 + + out = merge(left, right, how="outer") + assert len(out) == len(left) + tm.assert_series_equal(out["left"], -out["right"], check_names=False) + result = out.iloc[:, :-2].sum(axis=1) + tm.assert_series_equal(out["left"], result, check_names=False) + assert result.name is None + + out.sort_values(out.columns.tolist(), inplace=True) + out.index = np.arange(len(out)) + for how in ["left", "right", "outer", "inner"]: + tm.assert_frame_equal(out, merge(left, right, how=how, sort=True)) + + # check that left merge w/ sort=False maintains left frame order + out = merge(left, right, how="left", sort=False) + tm.assert_frame_equal(left, out[left.columns.tolist()]) + + out = merge(right, left, how="left", sort=False) + tm.assert_frame_equal(right, out[right.columns.tolist()]) + + # one-2-many/none match + n = 1 << 11 + left = DataFrame( + np.random.randint(low, high, (n, 7)).astype("int64"), + columns=list("ABCDEFG"), + ) + + # confirm that this is checking what it is supposed to check + shape = left.apply(Series.nunique).values + assert is_int64_overflow_possible(shape) + + # add duplicates to left frame + left = concat([left, left], ignore_index=True) + + right = DataFrame( + np.random.randint(low, high, (n // 2, 7)).astype("int64"), + columns=list("ABCDEFG"), + ) + + # add duplicates & overlap with left to the right frame + i = np.random.choice(len(left), n) + right = concat([right, right, left.iloc[i]], ignore_index=True) + + left["left"] = np.random.randn(len(left)) + right["right"] = np.random.randn(len(right)) + + # shuffle left & right frames + i = np.random.permutation(len(left)) + left = left.iloc[i].copy() + left.index = np.arange(len(left)) + + i = np.random.permutation(len(right)) + right = right.iloc[i].copy() + right.index = np.arange(len(right)) + + # manually compute outer merge + ldict, rdict = defaultdict(list), defaultdict(list) + + for idx, row in left.set_index(list("ABCDEFG")).iterrows(): + ldict[idx].append(row["left"]) + + for idx, row in right.set_index(list("ABCDEFG")).iterrows(): + rdict[idx].append(row["right"]) + + vals = [] + for k, lval in ldict.items(): + rval = rdict.get(k, [np.nan]) + for lv, rv in product(lval, rval): + vals.append(k + tuple([lv, rv])) + + for k, rval in rdict.items(): + if k not in ldict: + for rv in rval: + vals.append(k + tuple([np.nan, rv])) + + def align(df): + df = df.sort_values(df.columns.tolist()) + df.index = np.arange(len(df)) + return df + + def verify_order(df): + kcols = list("ABCDEFG") + tm.assert_frame_equal( + df[kcols].copy(), df[kcols].sort_values(kcols, kind="mergesort") + ) + + out = DataFrame(vals, columns=list("ABCDEFG") + ["left", "right"]) + out = align(out) + + jmask = { + "left": out["left"].notna(), + "right": out["right"].notna(), + "inner": out["left"].notna() & out["right"].notna(), + "outer": np.ones(len(out), dtype="bool"), + } + + for how in "left", "right", "outer", "inner": + mask = jmask[how] + frame = align(out[mask].copy()) + assert mask.all() ^ mask.any() or how == "outer" + + for sort in [False, True]: + res = merge(left, right, how=how, sort=sort) + if sort: + verify_order(res) + + # as in GH9092 dtypes break with outer/right join + tm.assert_frame_equal( + frame, align(res), check_dtype=how not in ("right", "outer") + ) + + +def test_decons(): + def testit(codes_list, shape): + group_index = get_group_index(codes_list, shape, sort=True, xnull=True) + codes_list2 = decons_group_index(group_index, shape) + + for a, b in zip(codes_list, codes_list2): + tm.assert_numpy_array_equal(a, b) + + shape = (4, 5, 6) + codes_list = [ + np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100).astype(np.int64), + np.tile([0, 2, 4, 3, 0, 1, 2, 3], 100).astype(np.int64), + np.tile([5, 1, 0, 2, 3, 0, 5, 4], 100).astype(np.int64), + ] + testit(codes_list, shape) + + shape = (10000, 10000) + codes_list = [ + np.tile(np.arange(10000, dtype=np.int64), 5), + np.tile(np.arange(10000, dtype=np.int64), 5), + ] + testit(codes_list, shape) + + +class TestSafeSort: + def test_basic_sort(self): + values = [3, 1, 2, 0, 4] + result = safe_sort(values) + expected = np.array([0, 1, 2, 3, 4]) + tm.assert_numpy_array_equal(result, expected) + + values = list("baaacb") + result = safe_sort(values) + expected = np.array(list("aaabbc"), dtype="object") + tm.assert_numpy_array_equal(result, expected) + + values = [] + result = safe_sort(values) + expected = np.array([]) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("verify", [True, False]) + def test_codes(self, verify): + values = [3, 1, 2, 0, 4] + expected = np.array([0, 1, 2, 3, 4]) + + codes = [0, 1, 1, 2, 3, 0, -1, 4] + result, result_codes = safe_sort(values, codes, verify=verify) + expected_codes = np.array([3, 1, 1, 2, 0, 3, -1, 4], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_codes, expected_codes) + + # na_sentinel + codes = [0, 1, 1, 2, 3, 0, 99, 4] + result, result_codes = safe_sort(values, codes, na_sentinel=99, verify=verify) + expected_codes = np.array([3, 1, 1, 2, 0, 3, 99, 4], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_codes, expected_codes) + + codes = [] + result, result_codes = safe_sort(values, codes, verify=verify) + expected_codes = np.array([], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_codes, expected_codes) + + @pytest.mark.parametrize("na_sentinel", [-1, 99]) + def test_codes_out_of_bound(self, na_sentinel): + values = [3, 1, 2, 0, 4] + expected = np.array([0, 1, 2, 3, 4]) + + # out of bound indices + codes = [0, 101, 102, 2, 3, 0, 99, 4] + result, result_codes = safe_sort(values, codes, na_sentinel=na_sentinel) + expected_codes = np.array( + [3, na_sentinel, na_sentinel, 2, 0, 3, na_sentinel, 4], dtype=np.intp + ) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_codes, expected_codes) + + def test_mixed_integer(self): + values = np.array(["b", 1, 0, "a", 0, "b"], dtype=object) + result = safe_sort(values) + expected = np.array([0, 0, 1, "a", "b", "b"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + values = np.array(["b", 1, 0, "a"], dtype=object) + codes = [0, 1, 2, 3, 0, -1, 1] + result, result_codes = safe_sort(values, codes) + expected = np.array([0, 1, "a", "b"], dtype=object) + expected_codes = np.array([3, 1, 0, 2, 3, -1, 1], dtype=np.intp) + tm.assert_numpy_array_equal(result, expected) + tm.assert_numpy_array_equal(result_codes, expected_codes) + + def test_mixed_integer_from_list(self): + values = ["b", 1, 0, "a", 0, "b"] + result = safe_sort(values) + expected = np.array([0, 0, 1, "a", "b", "b"], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_unsortable(self): + # GH 13714 + arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object) + msg = ( + "unorderable types: .* [<>] .*" + "|" # the above case happens for numpy < 1.14 + "'[<>]' not supported between instances of .*" + ) + with pytest.raises(TypeError, match=msg): + safe_sort(arr) + + def test_exceptions(self): + with pytest.raises(TypeError, match="Only list-like objects are allowed"): + safe_sort(values=1) + + with pytest.raises(TypeError, match="Only list-like objects or None"): + safe_sort(values=[0, 1, 2], codes=1) + + with pytest.raises(ValueError, match="values should be unique"): + safe_sort(values=[0, 1, 2, 1], codes=[0, 1]) + + def test_extension_array(self): + # a = array([1, 3, np.nan, 2], dtype='Int64') + a = array([1, 3, 2], dtype="Int64") + result = safe_sort(a) + # expected = array([1, 2, 3, np.nan], dtype='Int64') + expected = array([1, 2, 3], dtype="Int64") + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize("verify", [True, False]) + @pytest.mark.parametrize("na_sentinel", [-1, 99]) + def test_extension_array_codes(self, verify, na_sentinel): + a = array([1, 3, 2], dtype="Int64") + result, codes = safe_sort( + a, [0, 1, na_sentinel, 2], na_sentinel=na_sentinel, verify=verify + ) + expected_values = array([1, 2, 3], dtype="Int64") + expected_codes = np.array([0, 2, na_sentinel, 1], dtype=np.intp) + tm.assert_extension_array_equal(result, expected_values) + tm.assert_numpy_array_equal(codes, expected_codes) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py new file mode 100644 index 00000000..76683d2c --- /dev/null +++ b/pandas/tests/test_strings.py @@ -0,0 +1,3606 @@ +from datetime import datetime, timedelta +import re + +import numpy as np +from numpy.random import randint +import pytest + +from pandas._libs import lib + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series, concat, isna, notna +import pandas._testing as tm +import pandas.core.strings as strings + + +def assert_series_or_index_equal(left, right): + if isinstance(left, Series): + tm.assert_series_equal(left, right) + else: # Index + tm.assert_index_equal(left, right) + + +_any_string_method = [ + ("cat", (), {"sep": ","}), + ("cat", (Series(list("zyx")),), {"sep": ",", "join": "left"}), + ("center", (10,), {}), + ("contains", ("a",), {}), + ("count", ("a",), {}), + ("decode", ("UTF-8",), {}), + ("encode", ("UTF-8",), {}), + ("endswith", ("a",), {}), + ("extract", ("([a-z]*)",), {"expand": False}), + ("extract", ("([a-z]*)",), {"expand": True}), + ("extractall", ("([a-z]*)",), {}), + ("find", ("a",), {}), + ("findall", ("a",), {}), + ("get", (0,), {}), + # because "index" (and "rindex") fail intentionally + # if the string is not found, search only for empty string + ("index", ("",), {}), + ("join", (",",), {}), + ("ljust", (10,), {}), + ("match", ("a",), {}), + ("normalize", ("NFC",), {}), + ("pad", (10,), {}), + ("partition", (" ",), {"expand": False}), + ("partition", (" ",), {"expand": True}), + ("repeat", (3,), {}), + ("replace", ("a", "z"), {}), + ("rfind", ("a",), {}), + ("rindex", ("",), {}), + ("rjust", (10,), {}), + ("rpartition", (" ",), {"expand": False}), + ("rpartition", (" ",), {"expand": True}), + ("slice", (0, 1), {}), + ("slice_replace", (0, 1, "z"), {}), + ("split", (" ",), {"expand": False}), + ("split", (" ",), {"expand": True}), + ("startswith", ("a",), {}), + # translating unicode points of "a" to "d" + ("translate", ({97: 100},), {}), + ("wrap", (2,), {}), + ("zfill", (10,), {}), +] + list( + zip( + [ + # methods without positional arguments: zip with empty tuple and empty dict + "capitalize", + "cat", + "get_dummies", + "isalnum", + "isalpha", + "isdecimal", + "isdigit", + "islower", + "isnumeric", + "isspace", + "istitle", + "isupper", + "len", + "lower", + "lstrip", + "partition", + "rpartition", + "rsplit", + "rstrip", + "slice", + "slice_replace", + "split", + "strip", + "swapcase", + "title", + "upper", + "casefold", + ], + [()] * 100, + [{}] * 100, + ) +) +ids, _, _ = zip(*_any_string_method) # use method name as fixture-id + + +# test that the above list captures all methods of StringMethods +missing_methods = { + f for f in dir(strings.StringMethods) if not f.startswith("_") +} - set(ids) +assert not missing_methods + + +@pytest.fixture(params=_any_string_method, ids=ids) +def any_string_method(request): + """ + Fixture for all public methods of `StringMethods` + + This fixture returns a tuple of the method name and sample arguments + necessary to call the method. + + Returns + ------- + method_name : str + The name of the method in `StringMethods` + args : tuple + Sample values for the positional arguments + kwargs : dict + Sample values for the keyword arguments + + Examples + -------- + >>> def test_something(any_string_method): + ... s = pd.Series(['a', 'b', np.nan, 'd']) + ... + ... method_name, args, kwargs = any_string_method + ... method = getattr(s.str, method_name) + ... # will not raise + ... method(*args, **kwargs) + """ + return request.param + + +# subset of the full set from pandas/conftest.py +_any_allowed_skipna_inferred_dtype = [ + ("string", ["a", np.nan, "c"]), + ("bytes", [b"a", np.nan, b"c"]), + ("empty", [np.nan, np.nan, np.nan]), + ("empty", []), + ("mixed-integer", ["a", np.nan, 2]), +] +ids, _ = zip(*_any_allowed_skipna_inferred_dtype) # use inferred type as id + + +@pytest.fixture(params=_any_allowed_skipna_inferred_dtype, ids=ids) +def any_allowed_skipna_inferred_dtype(request): + """ + Fixture for all (inferred) dtypes allowed in StringMethods.__init__ + + The covered (inferred) types are: + * 'string' + * 'empty' + * 'bytes' + * 'mixed' + * 'mixed-integer' + + Returns + ------- + inferred_dtype : str + The string for the inferred dtype from _libs.lib.infer_dtype + values : np.ndarray + An array of object dtype that will be inferred to have + `inferred_dtype` + + Examples + -------- + >>> import pandas._libs.lib as lib + >>> + >>> def test_something(any_allowed_skipna_inferred_dtype): + ... inferred_dtype, values = any_allowed_skipna_inferred_dtype + ... # will pass + ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype + ... + ... # constructor for .str-accessor will also pass + ... pd.Series(values).str + """ + inferred_dtype, values = request.param + values = np.array(values, dtype=object) # object dtype to avoid casting + + # correctness of inference tested in tests/dtypes/test_inference.py + return inferred_dtype, values + + +class TestStringMethods: + def test_api(self): + + # GH 6106, GH 9322 + assert Series.str is strings.StringMethods + assert isinstance(Series([""]).str, strings.StringMethods) + + def test_api_mi_raises(self): + # GH 23679 + mi = MultiIndex.from_arrays([["a", "b", "c"]]) + msg = "Can only use .str accessor with Index, not MultiIndex" + with pytest.raises(AttributeError, match=msg): + mi.str + assert not hasattr(mi, "str") + + @pytest.mark.parametrize("dtype", [object, "category"]) + def test_api_per_dtype(self, index_or_series, dtype, any_skipna_inferred_dtype): + # one instance of parametrized fixture + box = index_or_series + inferred_dtype, values = any_skipna_inferred_dtype + + if dtype == "category" and len(values) and values[1] is pd.NA: + pytest.xfail(reason="Categorical does not yet support pd.NA") + + t = box(values, dtype=dtype) # explicit dtype to avoid casting + + # TODO: get rid of these xfails + if dtype == "category" and inferred_dtype in ["period", "interval"]: + pytest.xfail( + reason="Conversion to numpy array fails because " + "the ._values-attribute is not a numpy array for " + "PeriodArray/IntervalArray; see GH 23553" + ) + + types_passing_constructor = [ + "string", + "unicode", + "empty", + "bytes", + "mixed", + "mixed-integer", + ] + if inferred_dtype in types_passing_constructor: + # GH 6106 + assert isinstance(t.str, strings.StringMethods) + else: + # GH 9184, GH 23011, GH 23163 + msg = "Can only use .str accessor with string values.*" + with pytest.raises(AttributeError, match=msg): + t.str + assert not hasattr(t, "str") + + @pytest.mark.parametrize("dtype", [object, "category"]) + def test_api_per_method( + self, + index_or_series, + dtype, + any_allowed_skipna_inferred_dtype, + any_string_method, + ): + # this test does not check correctness of the different methods, + # just that the methods work on the specified (inferred) dtypes, + # and raise on all others + box = index_or_series + + # one instance of each parametrized fixture + inferred_dtype, values = any_allowed_skipna_inferred_dtype + method_name, args, kwargs = any_string_method + + # TODO: get rid of these xfails + if ( + method_name in ["partition", "rpartition"] + and box == Index + and inferred_dtype == "empty" + ): + pytest.xfail(reason="Method cannot deal with empty Index") + if ( + method_name == "split" + and box == Index + and values.size == 0 + and kwargs.get("expand", None) is not None + ): + pytest.xfail(reason="Split fails on empty Series when expand=True") + if ( + method_name == "get_dummies" + and box == Index + and inferred_dtype == "empty" + and (dtype == object or values.size == 0) + ): + pytest.xfail(reason="Need to fortify get_dummies corner cases") + + t = box(values, dtype=dtype) # explicit dtype to avoid casting + method = getattr(t.str, method_name) + + bytes_allowed = method_name in ["decode", "get", "len", "slice"] + # as of v0.23.4, all methods except 'cat' are very lenient with the + # allowed data types, just returning NaN for entries that error. + # This could be changed with an 'errors'-kwarg to the `str`-accessor, + # see discussion in GH 13877 + mixed_allowed = method_name not in ["cat"] + + allowed_types = ( + ["string", "unicode", "empty"] + + ["bytes"] * bytes_allowed + + ["mixed", "mixed-integer"] * mixed_allowed + ) + + if inferred_dtype in allowed_types: + # xref GH 23555, GH 23556 + method(*args, **kwargs) # works! + else: + # GH 23011, GH 23163 + msg = ( + f"Cannot use .str.{method_name} with values of " + f"inferred dtype {repr(inferred_dtype)}." + ) + with pytest.raises(TypeError, match=msg): + method(*args, **kwargs) + + def test_api_for_categorical(self, any_string_method): + # https://github.com/pandas-dev/pandas/issues/10661 + s = Series(list("aabb")) + s = s + " " + s + c = s.astype("category") + assert isinstance(c.str, strings.StringMethods) + + method_name, args, kwargs = any_string_method + + result = getattr(c.str, method_name)(*args, **kwargs) + expected = getattr(s.str, method_name)(*args, **kwargs) + + if isinstance(result, DataFrame): + tm.assert_frame_equal(result, expected) + elif isinstance(result, Series): + tm.assert_series_equal(result, expected) + else: + # str.cat(others=None) returns string, for example + assert result == expected + + def test_iter(self): + # GH3638 + strs = "google", "wikimedia", "wikipedia", "wikitravel" + ds = Series(strs) + + with tm.assert_produces_warning(FutureWarning): + for s in ds.str: + # iter must yield a Series + assert isinstance(s, Series) + + # indices of each yielded Series should be equal to the index of + # the original Series + tm.assert_index_equal(s.index, ds.index) + + for el in s: + # each element of the series is either a basestring/str or nan + assert isinstance(el, str) or isna(el) + + # desired behavior is to iterate until everything would be nan on the + # next iter so make sure the last element of the iterator was 'l' in + # this case since 'wikitravel' is the longest string + assert s.dropna().values.item() == "l" + + def test_iter_empty(self): + ds = Series([], dtype=object) + + i, s = 100, 1 + + with tm.assert_produces_warning(FutureWarning): + for i, s in enumerate(ds.str): + pass + + # nothing to iterate over so nothing defined values should remain + # unchanged + assert i == 100 + assert s == 1 + + def test_iter_single_element(self): + ds = Series(["a"]) + + with tm.assert_produces_warning(FutureWarning): + for i, s in enumerate(ds.str): + pass + + assert not i + tm.assert_series_equal(ds, s) + + def test_iter_object_try_string(self): + ds = Series([slice(None, randint(10), randint(10, 20)) for _ in range(4)]) + + i, s = 100, "h" + + with tm.assert_produces_warning(FutureWarning): + for i, s in enumerate(ds.str): + pass + + assert i == 100 + assert s == "h" + + @pytest.mark.parametrize("other", [None, Series, Index]) + def test_str_cat_name(self, index_or_series, other): + # GH 21053 + box = index_or_series + values = ["a", "b"] + if other: + other = other(values) + else: + other = values + result = box(values, name="name").str.cat(other, sep=",") + assert result.name == "name" + + def test_str_cat(self, index_or_series): + box = index_or_series + # test_cat above tests "str_cat" from ndarray; + # here testing "str.cat" from Series/Indext to ndarray/list + s = box(["a", "a", "b", "b", "c", np.nan]) + + # single array + result = s.str.cat() + expected = "aabbc" + assert result == expected + + result = s.str.cat(na_rep="-") + expected = "aabbc-" + assert result == expected + + result = s.str.cat(sep="_", na_rep="NA") + expected = "a_a_b_b_c_NA" + assert result == expected + + t = np.array(["a", np.nan, "b", "d", "foo", np.nan], dtype=object) + expected = box(["aa", "a-", "bb", "bd", "cfoo", "--"]) + + # Series/Index with array + result = s.str.cat(t, na_rep="-") + assert_series_or_index_equal(result, expected) + + # Series/Index with list + result = s.str.cat(list(t), na_rep="-") + assert_series_or_index_equal(result, expected) + + # errors for incorrect lengths + rgx = r"If `others` contains arrays or lists \(or other list-likes.*" + z = Series(["1", "2", "3"]) + + with pytest.raises(ValueError, match=rgx): + s.str.cat(z.values) + + with pytest.raises(ValueError, match=rgx): + s.str.cat(list(z)) + + def test_str_cat_raises_intuitive_error(self, index_or_series): + # GH 11334 + box = index_or_series + s = box(["a", "b", "c", "d"]) + message = "Did you mean to supply a `sep` keyword?" + with pytest.raises(ValueError, match=message): + s.str.cat("|") + with pytest.raises(ValueError, match=message): + s.str.cat(" ") + + @pytest.mark.parametrize("sep", ["", None]) + @pytest.mark.parametrize("dtype_target", ["object", "category"]) + @pytest.mark.parametrize("dtype_caller", ["object", "category"]) + def test_str_cat_categorical( + self, index_or_series, dtype_caller, dtype_target, sep + ): + box = index_or_series + + s = Index(["a", "a", "b", "a"], dtype=dtype_caller) + s = s if box == Index else Series(s, index=s) + t = Index(["b", "a", "b", "c"], dtype=dtype_target) + + expected = Index(["ab", "aa", "bb", "ac"]) + expected = expected if box == Index else Series(expected, index=s) + + # Series/Index with unaligned Index -> t.values + result = s.str.cat(t.values, sep=sep) + assert_series_or_index_equal(result, expected) + + # Series/Index with Series having matching Index + t = Series(t.values, index=s) + result = s.str.cat(t, sep=sep) + assert_series_or_index_equal(result, expected) + + # Series/Index with Series.values + result = s.str.cat(t.values, sep=sep) + assert_series_or_index_equal(result, expected) + + # Series/Index with Series having different Index + t = Series(t.values, index=t.values) + expected = Index(["aa", "aa", "aa", "bb", "bb"]) + expected = ( + expected if box == Index else Series(expected, index=expected.str[:1]) + ) + + result = s.str.cat(t, sep=sep) + assert_series_or_index_equal(result, expected) + + # test integer/float dtypes (inferred by constructor) and mixed + @pytest.mark.parametrize( + "data", + [[1, 2, 3], [0.1, 0.2, 0.3], [1, 2, "b"]], + ids=["integers", "floats", "mixed"], + ) + # without dtype=object, np.array would cast [1, 2, 'b'] to ['1', '2', 'b'] + @pytest.mark.parametrize( + "box", + [Series, Index, list, lambda x: np.array(x, dtype=object)], + ids=["Series", "Index", "list", "np.array"], + ) + def test_str_cat_wrong_dtype_raises(self, box, data): + # GH 22722 + s = Series(["a", "b", "c"]) + t = box(data) + + msg = "Concatenation requires list-likes containing only strings.*" + with pytest.raises(TypeError, match=msg): + # need to use outer and na_rep, as otherwise Index would not raise + s.str.cat(t, join="outer", na_rep="-") + + def test_str_cat_mixed_inputs(self, index_or_series): + box = index_or_series + s = Index(["a", "b", "c", "d"]) + s = s if box == Index else Series(s, index=s) + + t = Series(["A", "B", "C", "D"], index=s.values) + d = concat([t, Series(s, index=s)], axis=1) + + expected = Index(["aAa", "bBb", "cCc", "dDd"]) + expected = expected if box == Index else Series(expected.values, index=s.values) + + # Series/Index with DataFrame + result = s.str.cat(d) + assert_series_or_index_equal(result, expected) + + # Series/Index with two-dimensional ndarray + result = s.str.cat(d.values) + assert_series_or_index_equal(result, expected) + + # Series/Index with list of Series + result = s.str.cat([t, s]) + assert_series_or_index_equal(result, expected) + + # Series/Index with mixed list of Series/array + result = s.str.cat([t, s.values]) + assert_series_or_index_equal(result, expected) + + # Series/Index with list of Series; different indexes + t.index = ["b", "c", "d", "a"] + expected = box(["aDa", "bAb", "cBc", "dCd"]) + expected = expected if box == Index else Series(expected.values, index=s.values) + result = s.str.cat([t, s]) + assert_series_or_index_equal(result, expected) + + # Series/Index with mixed list; different index + result = s.str.cat([t, s.values]) + assert_series_or_index_equal(result, expected) + + # Series/Index with DataFrame; different indexes + d.index = ["b", "c", "d", "a"] + expected = box(["aDd", "bAa", "cBb", "dCc"]) + expected = expected if box == Index else Series(expected.values, index=s.values) + result = s.str.cat(d) + assert_series_or_index_equal(result, expected) + + # errors for incorrect lengths + rgx = r"If `others` contains arrays or lists \(or other list-likes.*" + z = Series(["1", "2", "3"]) + e = concat([z, z], axis=1) + + # two-dimensional ndarray + with pytest.raises(ValueError, match=rgx): + s.str.cat(e.values) + + # list of list-likes + with pytest.raises(ValueError, match=rgx): + s.str.cat([z.values, s.values]) + + # mixed list of Series/list-like + with pytest.raises(ValueError, match=rgx): + s.str.cat([z.values, s]) + + # errors for incorrect arguments in list-like + rgx = "others must be Series, Index, DataFrame,.*" + # make sure None/NaN do not crash checks in _get_series_list + u = Series(["a", np.nan, "c", None]) + + # mix of string and Series + with pytest.raises(TypeError, match=rgx): + s.str.cat([u, "u"]) + + # DataFrame in list + with pytest.raises(TypeError, match=rgx): + s.str.cat([u, d]) + + # 2-dim ndarray in list + with pytest.raises(TypeError, match=rgx): + s.str.cat([u, d.values]) + + # nested lists + with pytest.raises(TypeError, match=rgx): + s.str.cat([u, [u, d]]) + + # forbidden input type: set + # GH 23009 + with pytest.raises(TypeError, match=rgx): + s.str.cat(set(u)) + + # forbidden input type: set in list + # GH 23009 + with pytest.raises(TypeError, match=rgx): + s.str.cat([u, set(u)]) + + # other forbidden input type, e.g. int + with pytest.raises(TypeError, match=rgx): + s.str.cat(1) + + # nested list-likes + with pytest.raises(TypeError, match=rgx): + s.str.cat(iter([t.values, list(s)])) + + @pytest.mark.parametrize("join", ["left", "outer", "inner", "right"]) + def test_str_cat_align_indexed(self, index_or_series, join): + # https://github.com/pandas-dev/pandas/issues/18657 + box = index_or_series + + s = Series(["a", "b", "c", "d"], index=["a", "b", "c", "d"]) + t = Series(["D", "A", "E", "B"], index=["d", "a", "e", "b"]) + sa, ta = s.align(t, join=join) + # result after manual alignment of inputs + expected = sa.str.cat(ta, na_rep="-") + + if box == Index: + s = Index(s) + sa = Index(sa) + expected = Index(expected) + + result = s.str.cat(t, join=join, na_rep="-") + assert_series_or_index_equal(result, expected) + + @pytest.mark.parametrize("join", ["left", "outer", "inner", "right"]) + def test_str_cat_align_mixed_inputs(self, join): + s = Series(["a", "b", "c", "d"]) + t = Series(["d", "a", "e", "b"], index=[3, 0, 4, 1]) + d = concat([t, t], axis=1) + + expected_outer = Series(["aaa", "bbb", "c--", "ddd", "-ee"]) + expected = expected_outer.loc[s.index.join(t.index, how=join)] + + # list of Series + result = s.str.cat([t, t], join=join, na_rep="-") + tm.assert_series_equal(result, expected) + + # DataFrame + result = s.str.cat(d, join=join, na_rep="-") + tm.assert_series_equal(result, expected) + + # mixed list of indexed/unindexed + u = np.array(["A", "B", "C", "D"]) + expected_outer = Series(["aaA", "bbB", "c-C", "ddD", "-e-"]) + # joint index of rhs [t, u]; u will be forced have index of s + rhs_idx = t.index & s.index if join == "inner" else t.index | s.index + + expected = expected_outer.loc[s.index.join(rhs_idx, how=join)] + result = s.str.cat([t, u], join=join, na_rep="-") + tm.assert_series_equal(result, expected) + + with pytest.raises(TypeError, match="others must be Series,.*"): + # nested lists are forbidden + s.str.cat([t, list(u)], join=join) + + # errors for incorrect lengths + rgx = r"If `others` contains arrays or lists \(or other list-likes.*" + z = Series(["1", "2", "3"]).values + + # unindexed object of wrong length + with pytest.raises(ValueError, match=rgx): + s.str.cat(z, join=join) + + # unindexed object of wrong length in list + with pytest.raises(ValueError, match=rgx): + s.str.cat([t, z], join=join) + + index_or_series2 = [Series, Index] # type: ignore + # List item 0 has incompatible type "Type[Series]"; expected "Type[PandasObject]" + # See GH#29725 + + @pytest.mark.parametrize("other", index_or_series2) + def test_str_cat_all_na(self, index_or_series, other): + # GH 24044 + box = index_or_series + + # check that all NaNs in caller / target work + s = Index(["a", "b", "c", "d"]) + s = s if box == Index else Series(s, index=s) + t = other([np.nan] * 4, dtype=object) + # add index of s for alignment + t = t if other == Index else Series(t, index=s) + + # all-NA target + if box == Series: + expected = Series([np.nan] * 4, index=s.index, dtype=object) + else: # box == Index + expected = Index([np.nan] * 4, dtype=object) + result = s.str.cat(t, join="left") + assert_series_or_index_equal(result, expected) + + # all-NA caller (only for Series) + if other == Series: + expected = Series([np.nan] * 4, dtype=object, index=t.index) + result = t.str.cat(s, join="left") + tm.assert_series_equal(result, expected) + + def test_str_cat_special_cases(self): + s = Series(["a", "b", "c", "d"]) + t = Series(["d", "a", "e", "b"], index=[3, 0, 4, 1]) + + # iterator of elements with different types + expected = Series(["aaa", "bbb", "c-c", "ddd", "-e-"]) + result = s.str.cat(iter([t, s.values]), join="outer", na_rep="-") + tm.assert_series_equal(result, expected) + + # right-align with different indexes in others + expected = Series(["aa-", "d-d"], index=[0, 3]) + result = s.str.cat([t.loc[[0]], t.loc[[3]]], join="right", na_rep="-") + tm.assert_series_equal(result, expected) + + def test_cat_on_filtered_index(self): + df = DataFrame( + index=MultiIndex.from_product( + [[2011, 2012], [1, 2, 3]], names=["year", "month"] + ) + ) + + df = df.reset_index() + df = df[df.month > 1] + + str_year = df.year.astype("str") + str_month = df.month.astype("str") + str_both = str_year.str.cat(str_month, sep=" ") + + assert str_both.loc[1] == "2011 2" + + str_multiple = str_year.str.cat([str_month, str_month], sep=" ") + + assert str_multiple.loc[1] == "2011 2 2" + + def test_count(self): + values = np.array( + ["foo", "foofoo", np.nan, "foooofooofommmfoo"], dtype=np.object_ + ) + + result = strings.str_count(values, "f[o]+") + exp = np.array([1, 2, np.nan, 4]) + tm.assert_numpy_array_equal(result, exp) + + result = Series(values).str.count("f[o]+") + exp = Series([1, 2, np.nan, 4]) + assert isinstance(result, Series) + tm.assert_series_equal(result, exp) + + # mixed + mixed = np.array( + ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0], + dtype=object, + ) + rs = strings.str_count(mixed, "a") + xp = np.array([1, np.nan, 0, np.nan, np.nan, 0, np.nan, np.nan, np.nan]) + tm.assert_numpy_array_equal(rs, xp) + + rs = Series(mixed).str.count("a") + xp = Series([1, np.nan, 0, np.nan, np.nan, 0, np.nan, np.nan, np.nan]) + assert isinstance(rs, Series) + tm.assert_series_equal(rs, xp) + + def test_contains(self): + values = np.array( + ["foo", np.nan, "fooommm__foo", "mmm_", "foommm[_]+bar"], dtype=np.object_ + ) + pat = "mmm[_]+" + + result = strings.str_contains(values, pat) + expected = np.array([False, np.nan, True, True, False], dtype=np.object_) + tm.assert_numpy_array_equal(result, expected) + + result = strings.str_contains(values, pat, regex=False) + expected = np.array([False, np.nan, False, False, True], dtype=np.object_) + tm.assert_numpy_array_equal(result, expected) + + values = np.array(["foo", "xyz", "fooommm__foo", "mmm_"], dtype=object) + result = strings.str_contains(values, pat) + expected = np.array([False, False, True, True]) + assert result.dtype == np.bool_ + tm.assert_numpy_array_equal(result, expected) + + # case insensitive using regex + values = np.array(["Foo", "xYz", "fOOomMm__fOo", "MMM_"], dtype=object) + result = strings.str_contains(values, "FOO|mmm", case=False) + expected = np.array([True, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + # case insensitive without regex + result = strings.str_contains(values, "foo", regex=False, case=False) + expected = np.array([True, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + # mixed + mixed = np.array( + ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0], + dtype=object, + ) + rs = strings.str_contains(mixed, "o") + xp = np.array( + [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan], + dtype=np.object_, + ) + tm.assert_numpy_array_equal(rs, xp) + + rs = Series(mixed).str.contains("o") + xp = Series( + [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan] + ) + assert isinstance(rs, Series) + tm.assert_series_equal(rs, xp) + + # unicode + values = np.array(["foo", np.nan, "fooommm__foo", "mmm_"], dtype=np.object_) + pat = "mmm[_]+" + + result = strings.str_contains(values, pat) + expected = np.array([False, np.nan, True, True], dtype=np.object_) + tm.assert_numpy_array_equal(result, expected) + + result = strings.str_contains(values, pat, na=False) + expected = np.array([False, False, True, True]) + tm.assert_numpy_array_equal(result, expected) + + values = np.array(["foo", "xyz", "fooommm__foo", "mmm_"], dtype=np.object_) + result = strings.str_contains(values, pat) + expected = np.array([False, False, True, True]) + assert result.dtype == np.bool_ + tm.assert_numpy_array_equal(result, expected) + + def test_contains_for_object_category(self): + # gh 22158 + + # na for category + values = Series(["a", "b", "c", "a", np.nan], dtype="category") + result = values.str.contains("a", na=True) + expected = Series([True, False, False, True, True]) + tm.assert_series_equal(result, expected) + + result = values.str.contains("a", na=False) + expected = Series([True, False, False, True, False]) + tm.assert_series_equal(result, expected) + + # na for objects + values = Series(["a", "b", "c", "a", np.nan]) + result = values.str.contains("a", na=True) + expected = Series([True, False, False, True, True]) + tm.assert_series_equal(result, expected) + + result = values.str.contains("a", na=False) + expected = Series([True, False, False, True, False]) + tm.assert_series_equal(result, expected) + + def test_startswith(self): + values = Series(["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"]) + + result = values.str.startswith("foo") + exp = Series([False, np.nan, True, False, False, np.nan, True]) + tm.assert_series_equal(result, exp) + + result = values.str.startswith("foo", na=True) + tm.assert_series_equal(result, exp.fillna(True).astype(bool)) + + # mixed + mixed = np.array( + ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0], + dtype=np.object_, + ) + rs = strings.str_startswith(mixed, "f") + xp = np.array( + [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan], + dtype=np.object_, + ) + tm.assert_numpy_array_equal(rs, xp) + + rs = Series(mixed).str.startswith("f") + assert isinstance(rs, Series) + xp = Series( + [False, np.nan, False, np.nan, np.nan, True, np.nan, np.nan, np.nan] + ) + tm.assert_series_equal(rs, xp) + + def test_endswith(self): + values = Series(["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"]) + + result = values.str.endswith("foo") + exp = Series([False, np.nan, False, False, True, np.nan, True]) + tm.assert_series_equal(result, exp) + + result = values.str.endswith("foo", na=False) + tm.assert_series_equal(result, exp.fillna(False).astype(bool)) + + # mixed + mixed = np.array( + ["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0], + dtype=object, + ) + rs = strings.str_endswith(mixed, "f") + xp = np.array( + [False, np.nan, False, np.nan, np.nan, False, np.nan, np.nan, np.nan], + dtype=np.object_, + ) + tm.assert_numpy_array_equal(rs, xp) + + rs = Series(mixed).str.endswith("f") + xp = Series( + [False, np.nan, False, np.nan, np.nan, False, np.nan, np.nan, np.nan] + ) + assert isinstance(rs, Series) + tm.assert_series_equal(rs, xp) + + def test_title(self): + values = Series(["FOO", "BAR", np.nan, "Blah", "blurg"]) + + result = values.str.title() + exp = Series(["Foo", "Bar", np.nan, "Blah", "Blurg"]) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series( + ["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0] + ) + mixed = mixed.str.title() + exp = Series( + ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", np.nan, np.nan, np.nan] + ) + tm.assert_almost_equal(mixed, exp) + + def test_lower_upper(self): + values = Series(["om", np.nan, "nom", "nom"]) + + result = values.str.upper() + exp = Series(["OM", np.nan, "NOM", "NOM"]) + tm.assert_series_equal(result, exp) + + result = result.str.lower() + tm.assert_series_equal(result, values) + + # mixed + mixed = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0]) + mixed = mixed.str.upper() + rs = Series(mixed).str.lower() + xp = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan]) + assert isinstance(rs, Series) + tm.assert_series_equal(rs, xp) + + def test_capitalize(self): + values = Series(["FOO", "BAR", np.nan, "Blah", "blurg"]) + result = values.str.capitalize() + exp = Series(["Foo", "Bar", np.nan, "Blah", "Blurg"]) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series( + ["FOO", np.nan, "bar", True, datetime.today(), "blah", None, 1, 2.0] + ) + mixed = mixed.str.capitalize() + exp = Series( + ["Foo", np.nan, "Bar", np.nan, np.nan, "Blah", np.nan, np.nan, np.nan] + ) + tm.assert_almost_equal(mixed, exp) + + def test_swapcase(self): + values = Series(["FOO", "BAR", np.nan, "Blah", "blurg"]) + result = values.str.swapcase() + exp = Series(["foo", "bar", np.nan, "bLAH", "BLURG"]) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series( + ["FOO", np.nan, "bar", True, datetime.today(), "Blah", None, 1, 2.0] + ) + mixed = mixed.str.swapcase() + exp = Series( + ["foo", np.nan, "BAR", np.nan, np.nan, "bLAH", np.nan, np.nan, np.nan] + ) + tm.assert_almost_equal(mixed, exp) + + def test_casemethods(self): + values = ["aaa", "bbb", "CCC", "Dddd", "eEEE"] + s = Series(values) + assert s.str.lower().tolist() == [v.lower() for v in values] + assert s.str.upper().tolist() == [v.upper() for v in values] + assert s.str.title().tolist() == [v.title() for v in values] + assert s.str.capitalize().tolist() == [v.capitalize() for v in values] + assert s.str.swapcase().tolist() == [v.swapcase() for v in values] + + def test_replace(self): + values = Series(["fooBAD__barBAD", np.nan]) + + result = values.str.replace("BAD[_]*", "") + exp = Series(["foobar", np.nan]) + tm.assert_series_equal(result, exp) + + result = values.str.replace("BAD[_]*", "", n=1) + exp = Series(["foobarBAD", np.nan]) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series( + ["aBAD", np.nan, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0] + ) + + rs = Series(mixed).str.replace("BAD[_]*", "") + xp = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan]) + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + # flags + unicode + values = Series([b"abcd,\xc3\xa0".decode("utf-8")]) + exp = Series([b"abcd, \xc3\xa0".decode("utf-8")]) + result = values.str.replace(r"(?<=\w),(?=\w)", ", ", flags=re.UNICODE) + tm.assert_series_equal(result, exp) + + # GH 13438 + msg = "repl must be a string or callable" + for klass in (Series, Index): + for repl in (None, 3, {"a": "b"}): + for data in (["a", "b", None], ["a", "b", "c", "ad"]): + values = klass(data) + with pytest.raises(TypeError, match=msg): + values.str.replace("a", repl) + + def test_replace_callable(self): + # GH 15055 + values = Series(["fooBAD__barBAD", np.nan]) + + # test with callable + repl = lambda m: m.group(0).swapcase() + result = values.str.replace("[a-z][A-Z]{2}", repl, n=2) + exp = Series(["foObaD__baRbaD", np.nan]) + tm.assert_series_equal(result, exp) + + # test with wrong number of arguments, raising an error + p_err = ( + r"((takes)|(missing)) (?(2)from \d+ to )?\d+ " + r"(?(3)required )positional arguments?" + ) + + repl = lambda: None + with pytest.raises(TypeError, match=p_err): + values.str.replace("a", repl) + + repl = lambda m, x: None + with pytest.raises(TypeError, match=p_err): + values.str.replace("a", repl) + + repl = lambda m, x, y=None: None + with pytest.raises(TypeError, match=p_err): + values.str.replace("a", repl) + + # test regex named groups + values = Series(["Foo Bar Baz", np.nan]) + pat = r"(?P\w+) (?P\w+) (?P\w+)" + repl = lambda m: m.group("middle").swapcase() + result = values.str.replace(pat, repl) + exp = Series(["bAR", np.nan]) + tm.assert_series_equal(result, exp) + + def test_replace_compiled_regex(self): + # GH 15446 + values = Series(["fooBAD__barBAD", np.nan]) + + # test with compiled regex + pat = re.compile(r"BAD[_]*") + result = values.str.replace(pat, "") + exp = Series(["foobar", np.nan]) + tm.assert_series_equal(result, exp) + + result = values.str.replace(pat, "", n=1) + exp = Series(["foobarBAD", np.nan]) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series( + ["aBAD", np.nan, "bBAD", True, datetime.today(), "fooBAD", None, 1, 2.0] + ) + + rs = Series(mixed).str.replace(pat, "") + xp = Series(["a", np.nan, "b", np.nan, np.nan, "foo", np.nan, np.nan, np.nan]) + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + # flags + unicode + values = Series([b"abcd,\xc3\xa0".decode("utf-8")]) + exp = Series([b"abcd, \xc3\xa0".decode("utf-8")]) + pat = re.compile(r"(?<=\w),(?=\w)", flags=re.UNICODE) + result = values.str.replace(pat, ", ") + tm.assert_series_equal(result, exp) + + # case and flags provided to str.replace will have no effect + # and will produce warnings + values = Series(["fooBAD__barBAD__bad", np.nan]) + pat = re.compile(r"BAD[_]*") + + with pytest.raises(ValueError, match="case and flags cannot be"): + result = values.str.replace(pat, "", flags=re.IGNORECASE) + + with pytest.raises(ValueError, match="case and flags cannot be"): + result = values.str.replace(pat, "", case=False) + + with pytest.raises(ValueError, match="case and flags cannot be"): + result = values.str.replace(pat, "", case=True) + + # test with callable + values = Series(["fooBAD__barBAD", np.nan]) + repl = lambda m: m.group(0).swapcase() + pat = re.compile("[a-z][A-Z]{2}") + result = values.str.replace(pat, repl, n=2) + exp = Series(["foObaD__baRbaD", np.nan]) + tm.assert_series_equal(result, exp) + + def test_replace_literal(self): + # GH16808 literal replace (regex=False vs regex=True) + values = Series(["f.o", "foo", np.nan]) + exp = Series(["bao", "bao", np.nan]) + result = values.str.replace("f.", "ba") + tm.assert_series_equal(result, exp) + + exp = Series(["bao", "foo", np.nan]) + result = values.str.replace("f.", "ba", regex=False) + tm.assert_series_equal(result, exp) + + # Cannot do a literal replace if given a callable repl or compiled + # pattern + callable_repl = lambda m: m.group(0).swapcase() + compiled_pat = re.compile("[a-z][A-Z]{2}") + + msg = "Cannot use a callable replacement when regex=False" + with pytest.raises(ValueError, match=msg): + values.str.replace("abc", callable_repl, regex=False) + + msg = "Cannot use a compiled regex as replacement pattern with regex=False" + with pytest.raises(ValueError, match=msg): + values.str.replace(compiled_pat, "", regex=False) + + def test_repeat(self): + values = Series(["a", "b", np.nan, "c", np.nan, "d"]) + + result = values.str.repeat(3) + exp = Series(["aaa", "bbb", np.nan, "ccc", np.nan, "ddd"]) + tm.assert_series_equal(result, exp) + + result = values.str.repeat([1, 2, 3, 4, 5, 6]) + exp = Series(["a", "bb", np.nan, "cccc", np.nan, "dddddd"]) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series(["a", np.nan, "b", True, datetime.today(), "foo", None, 1, 2.0]) + + rs = Series(mixed).str.repeat(3) + xp = Series( + ["aaa", np.nan, "bbb", np.nan, np.nan, "foofoofoo", np.nan, np.nan, np.nan] + ) + assert isinstance(rs, Series) + tm.assert_series_equal(rs, xp) + + def test_repeat_with_null(self): + # GH: 31632 + values = Series(["a", None], dtype="string") + result = values.str.repeat([3, 4]) + exp = Series(["aaa", None], dtype="string") + tm.assert_series_equal(result, exp) + + values = Series(["a", "b"], dtype="string") + result = values.str.repeat([3, None]) + exp = Series(["aaa", None], dtype="string") + tm.assert_series_equal(result, exp) + + def test_match(self): + # New match behavior introduced in 0.13 + values = Series(["fooBAD__barBAD", np.nan, "foo"]) + result = values.str.match(".*(BAD[_]+).*(BAD)") + exp = Series([True, np.nan, False]) + tm.assert_series_equal(result, exp) + + values = Series(["fooBAD__barBAD", np.nan, "foo"]) + result = values.str.match(".*BAD[_]+.*BAD") + exp = Series([True, np.nan, False]) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series( + [ + "aBAD_BAD", + np.nan, + "BAD_b_BAD", + True, + datetime.today(), + "foo", + None, + 1, + 2.0, + ] + ) + rs = Series(mixed).str.match(".*(BAD[_]+).*(BAD)") + xp = Series([True, np.nan, True, np.nan, np.nan, False, np.nan, np.nan, np.nan]) + assert isinstance(rs, Series) + tm.assert_series_equal(rs, xp) + + # na GH #6609 + res = Series(["a", 0, np.nan]).str.match("a", na=False) + exp = Series([True, False, False]) + tm.assert_series_equal(exp, res) + res = Series(["a", 0, np.nan]).str.match("a") + exp = Series([True, np.nan, np.nan]) + tm.assert_series_equal(exp, res) + + def test_extract_expand_None(self): + values = Series(["fooBAD__barBAD", np.nan, "foo"]) + with pytest.raises(ValueError, match="expand must be True or False"): + values.str.extract(".*(BAD[_]+).*(BAD)", expand=None) + + def test_extract_expand_unspecified(self): + values = Series(["fooBAD__barBAD", np.nan, "foo"]) + result_unspecified = values.str.extract(".*(BAD[_]+).*") + assert isinstance(result_unspecified, DataFrame) + result_true = values.str.extract(".*(BAD[_]+).*", expand=True) + tm.assert_frame_equal(result_unspecified, result_true) + + def test_extract_expand_False(self): + # Contains tests like those in test_match and some others. + values = Series(["fooBAD__barBAD", np.nan, "foo"]) + er = [np.nan, np.nan] # empty row + + result = values.str.extract(".*(BAD[_]+).*(BAD)", expand=False) + exp = DataFrame([["BAD__", "BAD"], er, er]) + tm.assert_frame_equal(result, exp) + + # mixed + mixed = Series( + [ + "aBAD_BAD", + np.nan, + "BAD_b_BAD", + True, + datetime.today(), + "foo", + None, + 1, + 2.0, + ] + ) + + rs = Series(mixed).str.extract(".*(BAD[_]+).*(BAD)", expand=False) + exp = DataFrame([["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er]) + tm.assert_frame_equal(rs, exp) + + # unicode + values = Series(["fooBAD__barBAD", np.nan, "foo"]) + + result = values.str.extract(".*(BAD[_]+).*(BAD)", expand=False) + exp = DataFrame([["BAD__", "BAD"], er, er]) + tm.assert_frame_equal(result, exp) + + # GH9980 + # Index only works with one regex group since + # multi-group would expand to a frame + idx = Index(["A1", "A2", "A3", "A4", "B5"]) + with pytest.raises(ValueError, match="supported"): + idx.str.extract("([AB])([123])", expand=False) + + # these should work for both Series and Index + for klass in [Series, Index]: + # no groups + s_or_idx = klass(["A1", "B2", "C3"]) + msg = "pattern contains no capture groups" + with pytest.raises(ValueError, match=msg): + s_or_idx.str.extract("[ABC][123]", expand=False) + + # only non-capturing groups + with pytest.raises(ValueError, match=msg): + s_or_idx.str.extract("(?:[AB]).*", expand=False) + + # single group renames series/index properly + s_or_idx = klass(["A1", "A2"]) + result = s_or_idx.str.extract(r"(?PA)\d", expand=False) + assert result.name == "uno" + + exp = klass(["A", "A"], name="uno") + if klass == Series: + tm.assert_series_equal(result, exp) + else: + tm.assert_index_equal(result, exp) + + s = Series(["A1", "B2", "C3"]) + # one group, no matches + result = s.str.extract("(_)", expand=False) + exp = Series([np.nan, np.nan, np.nan], dtype=object) + tm.assert_series_equal(result, exp) + + # two groups, no matches + result = s.str.extract("(_)(_)", expand=False) + exp = DataFrame( + [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]], dtype=object + ) + tm.assert_frame_equal(result, exp) + + # one group, some matches + result = s.str.extract("([AB])[123]", expand=False) + exp = Series(["A", "B", np.nan]) + tm.assert_series_equal(result, exp) + + # two groups, some matches + result = s.str.extract("([AB])([123])", expand=False) + exp = DataFrame([["A", "1"], ["B", "2"], [np.nan, np.nan]]) + tm.assert_frame_equal(result, exp) + + # one named group + result = s.str.extract("(?P[AB])", expand=False) + exp = Series(["A", "B", np.nan], name="letter") + tm.assert_series_equal(result, exp) + + # two named groups + result = s.str.extract("(?P[AB])(?P[123])", expand=False) + exp = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, np.nan]], columns=["letter", "number"] + ) + tm.assert_frame_equal(result, exp) + + # mix named and unnamed groups + result = s.str.extract("([AB])(?P[123])", expand=False) + exp = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, np.nan]], columns=[0, "number"] + ) + tm.assert_frame_equal(result, exp) + + # one normal group, one non-capturing group + result = s.str.extract("([AB])(?:[123])", expand=False) + exp = Series(["A", "B", np.nan]) + tm.assert_series_equal(result, exp) + + # two normal groups, one non-capturing group + result = Series(["A11", "B22", "C33"]).str.extract( + "([AB])([123])(?:[123])", expand=False + ) + exp = DataFrame([["A", "1"], ["B", "2"], [np.nan, np.nan]]) + tm.assert_frame_equal(result, exp) + + # one optional group followed by one normal group + result = Series(["A1", "B2", "3"]).str.extract( + "(?P[AB])?(?P[123])", expand=False + ) + exp = DataFrame( + [["A", "1"], ["B", "2"], [np.nan, "3"]], columns=["letter", "number"] + ) + tm.assert_frame_equal(result, exp) + + # one normal group followed by one optional group + result = Series(["A1", "B2", "C"]).str.extract( + "(?P[ABC])(?P[123])?", expand=False + ) + exp = DataFrame( + [["A", "1"], ["B", "2"], ["C", np.nan]], columns=["letter", "number"] + ) + tm.assert_frame_equal(result, exp) + + # GH6348 + # not passing index to the extractor + def check_index(index): + data = ["A1", "B2", "C"] + index = index[: len(data)] + s = Series(data, index=index) + result = s.str.extract(r"(\d)", expand=False) + exp = Series(["1", "2", np.nan], index=index) + tm.assert_series_equal(result, exp) + + result = Series(data, index=index).str.extract( + r"(?P\D)(?P\d)?", expand=False + ) + e_list = [["A", "1"], ["B", "2"], ["C", np.nan]] + exp = DataFrame(e_list, columns=["letter", "number"], index=index) + tm.assert_frame_equal(result, exp) + + i_funs = [ + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeIntIndex, + tm.makeDateIndex, + tm.makePeriodIndex, + tm.makeRangeIndex, + ] + for index in i_funs: + check_index(index()) + + # single_series_name_is_preserved. + s = Series(["a3", "b3", "c2"], name="bob") + r = s.str.extract(r"(?P[a-z])", expand=False) + e = Series(["a", "b", "c"], name="sue") + tm.assert_series_equal(r, e) + assert r.name == e.name + + def test_extract_expand_True(self): + # Contains tests like those in test_match and some others. + values = Series(["fooBAD__barBAD", np.nan, "foo"]) + er = [np.nan, np.nan] # empty row + + result = values.str.extract(".*(BAD[_]+).*(BAD)", expand=True) + exp = DataFrame([["BAD__", "BAD"], er, er]) + tm.assert_frame_equal(result, exp) + + # mixed + mixed = Series( + [ + "aBAD_BAD", + np.nan, + "BAD_b_BAD", + True, + datetime.today(), + "foo", + None, + 1, + 2.0, + ] + ) + + rs = Series(mixed).str.extract(".*(BAD[_]+).*(BAD)", expand=True) + exp = DataFrame([["BAD_", "BAD"], er, ["BAD_", "BAD"], er, er, er, er, er, er]) + tm.assert_frame_equal(rs, exp) + + # these should work for both Series and Index + for klass in [Series, Index]: + # no groups + s_or_idx = klass(["A1", "B2", "C3"]) + msg = "pattern contains no capture groups" + with pytest.raises(ValueError, match=msg): + s_or_idx.str.extract("[ABC][123]", expand=True) + + # only non-capturing groups + with pytest.raises(ValueError, match=msg): + s_or_idx.str.extract("(?:[AB]).*", expand=True) + + # single group renames series/index properly + s_or_idx = klass(["A1", "A2"]) + result_df = s_or_idx.str.extract(r"(?PA)\d", expand=True) + assert isinstance(result_df, DataFrame) + result_series = result_df["uno"] + tm.assert_series_equal(result_series, Series(["A", "A"], name="uno")) + + def test_extract_series(self): + # extract should give the same result whether or not the + # series has a name. + for series_name in None, "series_name": + s = Series(["A1", "B2", "C3"], name=series_name) + # one group, no matches + result = s.str.extract("(_)", expand=True) + exp = DataFrame([np.nan, np.nan, np.nan], dtype=object) + tm.assert_frame_equal(result, exp) + + # two groups, no matches + result = s.str.extract("(_)(_)", expand=True) + exp = DataFrame( + [[np.nan, np.nan], [np.nan, np.nan], [np.nan, np.nan]], dtype=object + ) + tm.assert_frame_equal(result, exp) + + # one group, some matches + result = s.str.extract("([AB])[123]", expand=True) + exp = DataFrame(["A", "B", np.nan]) + tm.assert_frame_equal(result, exp) + + # two groups, some matches + result = s.str.extract("([AB])([123])", expand=True) + exp = DataFrame([["A", "1"], ["B", "2"], [np.nan, np.nan]]) + tm.assert_frame_equal(result, exp) + + # one named group + result = s.str.extract("(?P[AB])", expand=True) + exp = DataFrame({"letter": ["A", "B", np.nan]}) + tm.assert_frame_equal(result, exp) + + # two named groups + result = s.str.extract("(?P[AB])(?P[123])", expand=True) + e_list = [["A", "1"], ["B", "2"], [np.nan, np.nan]] + exp = DataFrame(e_list, columns=["letter", "number"]) + tm.assert_frame_equal(result, exp) + + # mix named and unnamed groups + result = s.str.extract("([AB])(?P[123])", expand=True) + exp = DataFrame(e_list, columns=[0, "number"]) + tm.assert_frame_equal(result, exp) + + # one normal group, one non-capturing group + result = s.str.extract("([AB])(?:[123])", expand=True) + exp = DataFrame(["A", "B", np.nan]) + tm.assert_frame_equal(result, exp) + + def test_extract_optional_groups(self): + + # two normal groups, one non-capturing group + result = Series(["A11", "B22", "C33"]).str.extract( + "([AB])([123])(?:[123])", expand=True + ) + exp = DataFrame([["A", "1"], ["B", "2"], [np.nan, np.nan]]) + tm.assert_frame_equal(result, exp) + + # one optional group followed by one normal group + result = Series(["A1", "B2", "3"]).str.extract( + "(?P[AB])?(?P[123])", expand=True + ) + e_list = [["A", "1"], ["B", "2"], [np.nan, "3"]] + exp = DataFrame(e_list, columns=["letter", "number"]) + tm.assert_frame_equal(result, exp) + + # one normal group followed by one optional group + result = Series(["A1", "B2", "C"]).str.extract( + "(?P[ABC])(?P[123])?", expand=True + ) + e_list = [["A", "1"], ["B", "2"], ["C", np.nan]] + exp = DataFrame(e_list, columns=["letter", "number"]) + tm.assert_frame_equal(result, exp) + + # GH6348 + # not passing index to the extractor + def check_index(index): + data = ["A1", "B2", "C"] + index = index[: len(data)] + result = Series(data, index=index).str.extract(r"(\d)", expand=True) + exp = DataFrame(["1", "2", np.nan], index=index) + tm.assert_frame_equal(result, exp) + + result = Series(data, index=index).str.extract( + r"(?P\D)(?P\d)?", expand=True + ) + e_list = [["A", "1"], ["B", "2"], ["C", np.nan]] + exp = DataFrame(e_list, columns=["letter", "number"], index=index) + tm.assert_frame_equal(result, exp) + + i_funs = [ + tm.makeStringIndex, + tm.makeUnicodeIndex, + tm.makeIntIndex, + tm.makeDateIndex, + tm.makePeriodIndex, + tm.makeRangeIndex, + ] + for index in i_funs: + check_index(index()) + + def test_extract_single_group_returns_frame(self): + # GH11386 extract should always return DataFrame, even when + # there is only one group. Prior to v0.18.0, extract returned + # Series when there was only one group in the regex. + s = Series(["a3", "b3", "c2"], name="series_name") + r = s.str.extract(r"(?P[a-z])", expand=True) + e = DataFrame({"letter": ["a", "b", "c"]}) + tm.assert_frame_equal(r, e) + + def test_extractall(self): + subject_list = [ + "dave@google.com", + "tdhock5@gmail.com", + "maudelaperriere@gmail.com", + "rob@gmail.com some text steve@gmail.com", + "a@b.com some text c@d.com and e@f.com", + np.nan, + "", + ] + expected_tuples = [ + ("dave", "google", "com"), + ("tdhock5", "gmail", "com"), + ("maudelaperriere", "gmail", "com"), + ("rob", "gmail", "com"), + ("steve", "gmail", "com"), + ("a", "b", "com"), + ("c", "d", "com"), + ("e", "f", "com"), + ] + named_pattern = r""" + (?P[a-z0-9]+) + @ + (?P[a-z]+) + \. + (?P[a-z]{2,4}) + """ + expected_columns = ["user", "domain", "tld"] + S = Series(subject_list) + # extractall should return a DataFrame with one row for each + # match, indexed by the subject from which the match came. + expected_index = MultiIndex.from_tuples( + [(0, 0), (1, 0), (2, 0), (3, 0), (3, 1), (4, 0), (4, 1), (4, 2)], + names=(None, "match"), + ) + expected_df = DataFrame(expected_tuples, expected_index, expected_columns) + computed_df = S.str.extractall(named_pattern, re.VERBOSE) + tm.assert_frame_equal(computed_df, expected_df) + + # The index of the input Series should be used to construct + # the index of the output DataFrame: + series_index = MultiIndex.from_tuples( + [ + ("single", "Dave"), + ("single", "Toby"), + ("single", "Maude"), + ("multiple", "robAndSteve"), + ("multiple", "abcdef"), + ("none", "missing"), + ("none", "empty"), + ] + ) + Si = Series(subject_list, series_index) + expected_index = MultiIndex.from_tuples( + [ + ("single", "Dave", 0), + ("single", "Toby", 0), + ("single", "Maude", 0), + ("multiple", "robAndSteve", 0), + ("multiple", "robAndSteve", 1), + ("multiple", "abcdef", 0), + ("multiple", "abcdef", 1), + ("multiple", "abcdef", 2), + ], + names=(None, None, "match"), + ) + expected_df = DataFrame(expected_tuples, expected_index, expected_columns) + computed_df = Si.str.extractall(named_pattern, re.VERBOSE) + tm.assert_frame_equal(computed_df, expected_df) + + # MultiIndexed subject with names. + Sn = Series(subject_list, series_index) + Sn.index.names = ("matches", "description") + expected_index.names = ("matches", "description", "match") + expected_df = DataFrame(expected_tuples, expected_index, expected_columns) + computed_df = Sn.str.extractall(named_pattern, re.VERBOSE) + tm.assert_frame_equal(computed_df, expected_df) + + # optional groups. + subject_list = ["", "A1", "32"] + named_pattern = "(?P[AB])?(?P[123])" + computed_df = Series(subject_list).str.extractall(named_pattern) + expected_index = MultiIndex.from_tuples( + [(1, 0), (2, 0), (2, 1)], names=(None, "match") + ) + expected_df = DataFrame( + [("A", "1"), (np.nan, "3"), (np.nan, "2")], + expected_index, + columns=["letter", "number"], + ) + tm.assert_frame_equal(computed_df, expected_df) + + # only one of two groups has a name. + pattern = "([AB])?(?P[123])" + computed_df = Series(subject_list).str.extractall(pattern) + expected_df = DataFrame( + [("A", "1"), (np.nan, "3"), (np.nan, "2")], + expected_index, + columns=[0, "number"], + ) + tm.assert_frame_equal(computed_df, expected_df) + + def test_extractall_single_group(self): + # extractall(one named group) returns DataFrame with one named + # column. + s = Series(["a3", "b3", "d4c2"], name="series_name") + r = s.str.extractall(r"(?P[a-z])") + i = MultiIndex.from_tuples( + [(0, 0), (1, 0), (2, 0), (2, 1)], names=(None, "match") + ) + e = DataFrame({"letter": ["a", "b", "d", "c"]}, i) + tm.assert_frame_equal(r, e) + + # extractall(one un-named group) returns DataFrame with one + # un-named column. + r = s.str.extractall(r"([a-z])") + e = DataFrame(["a", "b", "d", "c"], i) + tm.assert_frame_equal(r, e) + + def test_extractall_single_group_with_quantifier(self): + # extractall(one un-named group with quantifier) returns + # DataFrame with one un-named column (GH13382). + s = Series(["ab3", "abc3", "d4cd2"], name="series_name") + r = s.str.extractall(r"([a-z]+)") + i = MultiIndex.from_tuples( + [(0, 0), (1, 0), (2, 0), (2, 1)], names=(None, "match") + ) + e = DataFrame(["ab", "abc", "d", "cd"], i) + tm.assert_frame_equal(r, e) + + @pytest.mark.parametrize( + "data, names", + [ + ([], (None,)), + ([], ("i1",)), + ([], (None, "i2")), + ([], ("i1", "i2")), + (["a3", "b3", "d4c2"], (None,)), + (["a3", "b3", "d4c2"], ("i1", "i2")), + (["a3", "b3", "d4c2"], (None, "i2")), + (["a3", "b3", "d4c2"], ("i1", "i2")), + ], + ) + def test_extractall_no_matches(self, data, names): + # GH19075 extractall with no matches should return a valid MultiIndex + n = len(data) + if len(names) == 1: + i = Index(range(n), name=names[0]) + else: + a = (tuple([i] * (n - 1)) for i in range(n)) + i = MultiIndex.from_tuples(a, names=names) + s = Series(data, name="series_name", index=i, dtype="object") + ei = MultiIndex.from_tuples([], names=(names + ("match",))) + + # one un-named group. + r = s.str.extractall("(z)") + e = DataFrame(columns=[0], index=ei) + tm.assert_frame_equal(r, e) + + # two un-named groups. + r = s.str.extractall("(z)(z)") + e = DataFrame(columns=[0, 1], index=ei) + tm.assert_frame_equal(r, e) + + # one named group. + r = s.str.extractall("(?Pz)") + e = DataFrame(columns=["first"], index=ei) + tm.assert_frame_equal(r, e) + + # two named groups. + r = s.str.extractall("(?Pz)(?Pz)") + e = DataFrame(columns=["first", "second"], index=ei) + tm.assert_frame_equal(r, e) + + # one named, one un-named. + r = s.str.extractall("(z)(?Pz)") + e = DataFrame(columns=[0, "second"], index=ei) + tm.assert_frame_equal(r, e) + + def test_extractall_stringindex(self): + s = Series(["a1a2", "b1", "c1"], name="xxx") + res = s.str.extractall(r"[ab](?P\d)") + exp_idx = MultiIndex.from_tuples( + [(0, 0), (0, 1), (1, 0)], names=[None, "match"] + ) + exp = DataFrame({"digit": ["1", "2", "1"]}, index=exp_idx) + tm.assert_frame_equal(res, exp) + + # index should return the same result as the default index without name + # thus index.name doesn't affect to the result + for idx in [ + Index(["a1a2", "b1", "c1"]), + Index(["a1a2", "b1", "c1"], name="xxx"), + ]: + + res = idx.str.extractall(r"[ab](?P\d)") + tm.assert_frame_equal(res, exp) + + s = Series( + ["a1a2", "b1", "c1"], + name="s_name", + index=Index(["XX", "yy", "zz"], name="idx_name"), + ) + res = s.str.extractall(r"[ab](?P\d)") + exp_idx = MultiIndex.from_tuples( + [("XX", 0), ("XX", 1), ("yy", 0)], names=["idx_name", "match"] + ) + exp = DataFrame({"digit": ["1", "2", "1"]}, index=exp_idx) + tm.assert_frame_equal(res, exp) + + def test_extractall_errors(self): + # Does not make sense to use extractall with a regex that has + # no capture groups. (it returns DataFrame with one column for + # each capture group) + s = Series(["a3", "b3", "d4c2"], name="series_name") + with pytest.raises(ValueError, match="no capture groups"): + s.str.extractall(r"[a-z]") + + def test_extract_index_one_two_groups(self): + s = Series(["a3", "b3", "d4c2"], index=["A3", "B3", "D4"], name="series_name") + r = s.index.str.extract(r"([A-Z])", expand=True) + e = DataFrame(["A", "B", "D"]) + tm.assert_frame_equal(r, e) + + # Prior to v0.18.0, index.str.extract(regex with one group) + # returned Index. With more than one group, extract raised an + # error (GH9980). Now extract always returns DataFrame. + r = s.index.str.extract(r"(?P[A-Z])(?P[0-9])", expand=True) + e_list = [("A", "3"), ("B", "3"), ("D", "4")] + e = DataFrame(e_list, columns=["letter", "digit"]) + tm.assert_frame_equal(r, e) + + def test_extractall_same_as_extract(self): + s = Series(["a3", "b3", "c2"], name="series_name") + + pattern_two_noname = r"([a-z])([0-9])" + extract_two_noname = s.str.extract(pattern_two_noname, expand=True) + has_multi_index = s.str.extractall(pattern_two_noname) + no_multi_index = has_multi_index.xs(0, level="match") + tm.assert_frame_equal(extract_two_noname, no_multi_index) + + pattern_two_named = r"(?P[a-z])(?P[0-9])" + extract_two_named = s.str.extract(pattern_two_named, expand=True) + has_multi_index = s.str.extractall(pattern_two_named) + no_multi_index = has_multi_index.xs(0, level="match") + tm.assert_frame_equal(extract_two_named, no_multi_index) + + pattern_one_named = r"(?P[a-z])" + extract_one_named = s.str.extract(pattern_one_named, expand=True) + has_multi_index = s.str.extractall(pattern_one_named) + no_multi_index = has_multi_index.xs(0, level="match") + tm.assert_frame_equal(extract_one_named, no_multi_index) + + pattern_one_noname = r"([a-z])" + extract_one_noname = s.str.extract(pattern_one_noname, expand=True) + has_multi_index = s.str.extractall(pattern_one_noname) + no_multi_index = has_multi_index.xs(0, level="match") + tm.assert_frame_equal(extract_one_noname, no_multi_index) + + def test_extractall_same_as_extract_subject_index(self): + # same as above tests, but s has an MultiIndex. + i = MultiIndex.from_tuples( + [("A", "first"), ("B", "second"), ("C", "third")], + names=("capital", "ordinal"), + ) + s = Series(["a3", "b3", "c2"], i, name="series_name") + + pattern_two_noname = r"([a-z])([0-9])" + extract_two_noname = s.str.extract(pattern_two_noname, expand=True) + has_match_index = s.str.extractall(pattern_two_noname) + no_match_index = has_match_index.xs(0, level="match") + tm.assert_frame_equal(extract_two_noname, no_match_index) + + pattern_two_named = r"(?P[a-z])(?P[0-9])" + extract_two_named = s.str.extract(pattern_two_named, expand=True) + has_match_index = s.str.extractall(pattern_two_named) + no_match_index = has_match_index.xs(0, level="match") + tm.assert_frame_equal(extract_two_named, no_match_index) + + pattern_one_named = r"(?P[a-z])" + extract_one_named = s.str.extract(pattern_one_named, expand=True) + has_match_index = s.str.extractall(pattern_one_named) + no_match_index = has_match_index.xs(0, level="match") + tm.assert_frame_equal(extract_one_named, no_match_index) + + pattern_one_noname = r"([a-z])" + extract_one_noname = s.str.extract(pattern_one_noname, expand=True) + has_match_index = s.str.extractall(pattern_one_noname) + no_match_index = has_match_index.xs(0, level="match") + tm.assert_frame_equal(extract_one_noname, no_match_index) + + def test_empty_str_methods(self): + empty_str = empty = Series(dtype=object) + empty_int = Series(dtype="int64") + empty_bool = Series(dtype=bool) + empty_bytes = Series(dtype=object) + + # GH7241 + # (extract) on empty series + + tm.assert_series_equal(empty_str, empty.str.cat(empty)) + assert "" == empty.str.cat() + tm.assert_series_equal(empty_str, empty.str.title()) + tm.assert_series_equal(empty_int, empty.str.count("a")) + tm.assert_series_equal(empty_bool, empty.str.contains("a")) + tm.assert_series_equal(empty_bool, empty.str.startswith("a")) + tm.assert_series_equal(empty_bool, empty.str.endswith("a")) + tm.assert_series_equal(empty_str, empty.str.lower()) + tm.assert_series_equal(empty_str, empty.str.upper()) + tm.assert_series_equal(empty_str, empty.str.replace("a", "b")) + tm.assert_series_equal(empty_str, empty.str.repeat(3)) + tm.assert_series_equal(empty_bool, empty.str.match("^a")) + tm.assert_frame_equal( + DataFrame(columns=[0], dtype=str), empty.str.extract("()", expand=True) + ) + tm.assert_frame_equal( + DataFrame(columns=[0, 1], dtype=str), empty.str.extract("()()", expand=True) + ) + tm.assert_series_equal(empty_str, empty.str.extract("()", expand=False)) + tm.assert_frame_equal( + DataFrame(columns=[0, 1], dtype=str), + empty.str.extract("()()", expand=False), + ) + tm.assert_frame_equal(DataFrame(dtype=str), empty.str.get_dummies()) + tm.assert_series_equal(empty_str, empty_str.str.join("")) + tm.assert_series_equal(empty_int, empty.str.len()) + tm.assert_series_equal(empty_str, empty_str.str.findall("a")) + tm.assert_series_equal(empty_int, empty.str.find("a")) + tm.assert_series_equal(empty_int, empty.str.rfind("a")) + tm.assert_series_equal(empty_str, empty.str.pad(42)) + tm.assert_series_equal(empty_str, empty.str.center(42)) + tm.assert_series_equal(empty_str, empty.str.split("a")) + tm.assert_series_equal(empty_str, empty.str.rsplit("a")) + tm.assert_series_equal(empty_str, empty.str.partition("a", expand=False)) + tm.assert_series_equal(empty_str, empty.str.rpartition("a", expand=False)) + tm.assert_series_equal(empty_str, empty.str.slice(stop=1)) + tm.assert_series_equal(empty_str, empty.str.slice(step=1)) + tm.assert_series_equal(empty_str, empty.str.strip()) + tm.assert_series_equal(empty_str, empty.str.lstrip()) + tm.assert_series_equal(empty_str, empty.str.rstrip()) + tm.assert_series_equal(empty_str, empty.str.wrap(42)) + tm.assert_series_equal(empty_str, empty.str.get(0)) + tm.assert_series_equal(empty_str, empty_bytes.str.decode("ascii")) + tm.assert_series_equal(empty_bytes, empty.str.encode("ascii")) + # ismethods should always return boolean (GH 29624) + tm.assert_series_equal(empty_bool, empty.str.isalnum()) + tm.assert_series_equal(empty_bool, empty.str.isalpha()) + tm.assert_series_equal(empty_bool, empty.str.isdigit()) + tm.assert_series_equal(empty_bool, empty.str.isspace()) + tm.assert_series_equal(empty_bool, empty.str.islower()) + tm.assert_series_equal(empty_bool, empty.str.isupper()) + tm.assert_series_equal(empty_bool, empty.str.istitle()) + tm.assert_series_equal(empty_bool, empty.str.isnumeric()) + tm.assert_series_equal(empty_bool, empty.str.isdecimal()) + tm.assert_series_equal(empty_str, empty.str.capitalize()) + tm.assert_series_equal(empty_str, empty.str.swapcase()) + tm.assert_series_equal(empty_str, empty.str.normalize("NFC")) + + table = str.maketrans("a", "b") + tm.assert_series_equal(empty_str, empty.str.translate(table)) + + def test_empty_str_methods_to_frame(self): + empty = Series(dtype=str) + empty_df = DataFrame() + tm.assert_frame_equal(empty_df, empty.str.partition("a")) + tm.assert_frame_equal(empty_df, empty.str.rpartition("a")) + + def test_ismethods(self): + values = ["A", "b", "Xy", "4", "3A", "", "TT", "55", "-", " "] + str_s = Series(values) + alnum_e = [True, True, True, True, True, False, True, True, False, False] + alpha_e = [True, True, True, False, False, False, True, False, False, False] + digit_e = [False, False, False, True, False, False, False, True, False, False] + + # TODO: unused + num_e = [ # noqa + False, + False, + False, + True, + False, + False, + False, + True, + False, + False, + ] + + space_e = [False, False, False, False, False, False, False, False, False, True] + lower_e = [False, True, False, False, False, False, False, False, False, False] + upper_e = [True, False, False, False, True, False, True, False, False, False] + title_e = [True, False, True, False, True, False, False, False, False, False] + + tm.assert_series_equal(str_s.str.isalnum(), Series(alnum_e)) + tm.assert_series_equal(str_s.str.isalpha(), Series(alpha_e)) + tm.assert_series_equal(str_s.str.isdigit(), Series(digit_e)) + tm.assert_series_equal(str_s.str.isspace(), Series(space_e)) + tm.assert_series_equal(str_s.str.islower(), Series(lower_e)) + tm.assert_series_equal(str_s.str.isupper(), Series(upper_e)) + tm.assert_series_equal(str_s.str.istitle(), Series(title_e)) + + assert str_s.str.isalnum().tolist() == [v.isalnum() for v in values] + assert str_s.str.isalpha().tolist() == [v.isalpha() for v in values] + assert str_s.str.isdigit().tolist() == [v.isdigit() for v in values] + assert str_s.str.isspace().tolist() == [v.isspace() for v in values] + assert str_s.str.islower().tolist() == [v.islower() for v in values] + assert str_s.str.isupper().tolist() == [v.isupper() for v in values] + assert str_s.str.istitle().tolist() == [v.istitle() for v in values] + + def test_isnumeric(self): + # 0x00bc: ¼ VULGAR FRACTION ONE QUARTER + # 0x2605: ★ not number + # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY + # 0xFF13: 3 Em 3 + values = ["A", "3", "¼", "★", "፸", "3", "four"] + s = Series(values) + numeric_e = [False, True, True, False, True, True, False] + decimal_e = [False, True, False, False, False, True, False] + tm.assert_series_equal(s.str.isnumeric(), Series(numeric_e)) + tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e)) + + unicodes = ["A", "3", "¼", "★", "፸", "3", "four"] + assert s.str.isnumeric().tolist() == [v.isnumeric() for v in unicodes] + assert s.str.isdecimal().tolist() == [v.isdecimal() for v in unicodes] + + values = ["A", np.nan, "¼", "★", np.nan, "3", "four"] + s = Series(values) + numeric_e = [False, np.nan, True, False, np.nan, True, False] + decimal_e = [False, np.nan, False, False, np.nan, True, False] + tm.assert_series_equal(s.str.isnumeric(), Series(numeric_e)) + tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e)) + + def test_get_dummies(self): + s = Series(["a|b", "a|c", np.nan]) + result = s.str.get_dummies("|") + expected = DataFrame([[1, 1, 0], [1, 0, 1], [0, 0, 0]], columns=list("abc")) + tm.assert_frame_equal(result, expected) + + s = Series(["a;b", "a", 7]) + result = s.str.get_dummies(";") + expected = DataFrame([[0, 1, 1], [0, 1, 0], [1, 0, 0]], columns=list("7ab")) + tm.assert_frame_equal(result, expected) + + # GH9980, GH8028 + idx = Index(["a|b", "a|c", "b|c"]) + result = idx.str.get_dummies("|") + + expected = MultiIndex.from_tuples( + [(1, 1, 0), (1, 0, 1), (0, 1, 1)], names=("a", "b", "c") + ) + tm.assert_index_equal(result, expected) + + def test_get_dummies_with_name_dummy(self): + # GH 12180 + # Dummies named 'name' should work as expected + s = Series(["a", "b,name", "b"]) + result = s.str.get_dummies(",") + expected = DataFrame( + [[1, 0, 0], [0, 1, 1], [0, 1, 0]], columns=["a", "b", "name"] + ) + tm.assert_frame_equal(result, expected) + + idx = Index(["a|b", "name|c", "b|name"]) + result = idx.str.get_dummies("|") + + expected = MultiIndex.from_tuples( + [(1, 1, 0, 0), (0, 0, 1, 1), (0, 1, 0, 1)], names=("a", "b", "c", "name") + ) + tm.assert_index_equal(result, expected) + + def test_join(self): + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) + result = values.str.split("_").str.join("_") + tm.assert_series_equal(values, result) + + # mixed + mixed = Series( + [ + "a_b", + np.nan, + "asdf_cas_asdf", + True, + datetime.today(), + "foo", + None, + 1, + 2.0, + ] + ) + + rs = Series(mixed).str.split("_").str.join("_") + xp = Series( + [ + "a_b", + np.nan, + "asdf_cas_asdf", + np.nan, + np.nan, + "foo", + np.nan, + np.nan, + np.nan, + ] + ) + + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + def test_len(self): + values = Series(["foo", "fooo", "fooooo", np.nan, "fooooooo"]) + + result = values.str.len() + exp = values.map(lambda x: len(x) if notna(x) else np.nan) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series( + [ + "a_b", + np.nan, + "asdf_cas_asdf", + True, + datetime.today(), + "foo", + None, + 1, + 2.0, + ] + ) + + rs = Series(mixed).str.len() + xp = Series([3, np.nan, 13, np.nan, np.nan, 3, np.nan, np.nan, np.nan]) + + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + def test_findall(self): + values = Series(["fooBAD__barBAD", np.nan, "foo", "BAD"]) + + result = values.str.findall("BAD[_]*") + exp = Series([["BAD__", "BAD"], np.nan, [], ["BAD"]]) + tm.assert_almost_equal(result, exp) + + # mixed + mixed = Series( + [ + "fooBAD__barBAD", + np.nan, + "foo", + True, + datetime.today(), + "BAD", + None, + 1, + 2.0, + ] + ) + + rs = Series(mixed).str.findall("BAD[_]*") + xp = Series( + [ + ["BAD__", "BAD"], + np.nan, + [], + np.nan, + np.nan, + ["BAD"], + np.nan, + np.nan, + np.nan, + ] + ) + + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + def test_find(self): + values = Series(["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF", "XXXX"]) + result = values.str.find("EF") + tm.assert_series_equal(result, Series([4, 3, 1, 0, -1])) + expected = np.array([v.find("EF") for v in values.values], dtype=np.int64) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.rfind("EF") + tm.assert_series_equal(result, Series([4, 5, 7, 4, -1])) + expected = np.array([v.rfind("EF") for v in values.values], dtype=np.int64) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.find("EF", 3) + tm.assert_series_equal(result, Series([4, 3, 7, 4, -1])) + expected = np.array([v.find("EF", 3) for v in values.values], dtype=np.int64) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.rfind("EF", 3) + tm.assert_series_equal(result, Series([4, 5, 7, 4, -1])) + expected = np.array([v.rfind("EF", 3) for v in values.values], dtype=np.int64) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.find("EF", 3, 6) + tm.assert_series_equal(result, Series([4, 3, -1, 4, -1])) + expected = np.array([v.find("EF", 3, 6) for v in values.values], dtype=np.int64) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.rfind("EF", 3, 6) + tm.assert_series_equal(result, Series([4, 3, -1, 4, -1])) + expected = np.array( + [v.rfind("EF", 3, 6) for v in values.values], dtype=np.int64 + ) + tm.assert_numpy_array_equal(result.values, expected) + + with pytest.raises(TypeError, match="expected a string object, not int"): + result = values.str.find(0) + + with pytest.raises(TypeError, match="expected a string object, not int"): + result = values.str.rfind(0) + + def test_find_nan(self): + values = Series(["ABCDEFG", np.nan, "DEFGHIJEF", np.nan, "XXXX"]) + result = values.str.find("EF") + tm.assert_series_equal(result, Series([4, np.nan, 1, np.nan, -1])) + + result = values.str.rfind("EF") + tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1])) + + result = values.str.find("EF", 3) + tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1])) + + result = values.str.rfind("EF", 3) + tm.assert_series_equal(result, Series([4, np.nan, 7, np.nan, -1])) + + result = values.str.find("EF", 3, 6) + tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1])) + + result = values.str.rfind("EF", 3, 6) + tm.assert_series_equal(result, Series([4, np.nan, -1, np.nan, -1])) + + def test_index(self): + def _check(result, expected): + if isinstance(result, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_index_equal(result, expected) + + for klass in [Series, Index]: + s = klass(["ABCDEFG", "BCDEFEF", "DEFGHIJEF", "EFGHEF"]) + + result = s.str.index("EF") + _check(result, klass([4, 3, 1, 0])) + expected = np.array([v.index("EF") for v in s.values], dtype=np.int64) + tm.assert_numpy_array_equal(result.values, expected) + + result = s.str.rindex("EF") + _check(result, klass([4, 5, 7, 4])) + expected = np.array([v.rindex("EF") for v in s.values], dtype=np.int64) + tm.assert_numpy_array_equal(result.values, expected) + + result = s.str.index("EF", 3) + _check(result, klass([4, 3, 7, 4])) + expected = np.array([v.index("EF", 3) for v in s.values], dtype=np.int64) + tm.assert_numpy_array_equal(result.values, expected) + + result = s.str.rindex("EF", 3) + _check(result, klass([4, 5, 7, 4])) + expected = np.array([v.rindex("EF", 3) for v in s.values], dtype=np.int64) + tm.assert_numpy_array_equal(result.values, expected) + + result = s.str.index("E", 4, 8) + _check(result, klass([4, 5, 7, 4])) + expected = np.array([v.index("E", 4, 8) for v in s.values], dtype=np.int64) + tm.assert_numpy_array_equal(result.values, expected) + + result = s.str.rindex("E", 0, 5) + _check(result, klass([4, 3, 1, 4])) + expected = np.array([v.rindex("E", 0, 5) for v in s.values], dtype=np.int64) + tm.assert_numpy_array_equal(result.values, expected) + + with pytest.raises(ValueError, match="substring not found"): + result = s.str.index("DE") + + msg = "expected a string object, not int" + with pytest.raises(TypeError, match=msg): + result = s.str.index(0) + + # test with nan + s = Series(["abcb", "ab", "bcbe", np.nan]) + result = s.str.index("b") + tm.assert_series_equal(result, Series([1, 1, 0, np.nan])) + result = s.str.rindex("b") + tm.assert_series_equal(result, Series([3, 1, 2, np.nan])) + + def test_pad(self): + values = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"]) + + result = values.str.pad(5, side="left") + exp = Series([" a", " b", np.nan, " c", np.nan, "eeeeee"]) + tm.assert_almost_equal(result, exp) + + result = values.str.pad(5, side="right") + exp = Series(["a ", "b ", np.nan, "c ", np.nan, "eeeeee"]) + tm.assert_almost_equal(result, exp) + + result = values.str.pad(5, side="both") + exp = Series([" a ", " b ", np.nan, " c ", np.nan, "eeeeee"]) + tm.assert_almost_equal(result, exp) + + # mixed + mixed = Series(["a", np.nan, "b", True, datetime.today(), "ee", None, 1, 2.0]) + + rs = Series(mixed).str.pad(5, side="left") + xp = Series( + [" a", np.nan, " b", np.nan, np.nan, " ee", np.nan, np.nan, np.nan] + ) + + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + mixed = Series(["a", np.nan, "b", True, datetime.today(), "ee", None, 1, 2.0]) + + rs = Series(mixed).str.pad(5, side="right") + xp = Series( + ["a ", np.nan, "b ", np.nan, np.nan, "ee ", np.nan, np.nan, np.nan] + ) + + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + mixed = Series(["a", np.nan, "b", True, datetime.today(), "ee", None, 1, 2.0]) + + rs = Series(mixed).str.pad(5, side="both") + xp = Series( + [" a ", np.nan, " b ", np.nan, np.nan, " ee ", np.nan, np.nan, np.nan] + ) + + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + def test_pad_fillchar(self): + + values = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"]) + + result = values.str.pad(5, side="left", fillchar="X") + exp = Series(["XXXXa", "XXXXb", np.nan, "XXXXc", np.nan, "eeeeee"]) + tm.assert_almost_equal(result, exp) + + result = values.str.pad(5, side="right", fillchar="X") + exp = Series(["aXXXX", "bXXXX", np.nan, "cXXXX", np.nan, "eeeeee"]) + tm.assert_almost_equal(result, exp) + + result = values.str.pad(5, side="both", fillchar="X") + exp = Series(["XXaXX", "XXbXX", np.nan, "XXcXX", np.nan, "eeeeee"]) + tm.assert_almost_equal(result, exp) + + msg = "fillchar must be a character, not str" + with pytest.raises(TypeError, match=msg): + result = values.str.pad(5, fillchar="XY") + + msg = "fillchar must be a character, not int" + with pytest.raises(TypeError, match=msg): + result = values.str.pad(5, fillchar=5) + + @pytest.mark.parametrize("f", ["center", "ljust", "rjust", "zfill", "pad"]) + def test_pad_width(self, f): + # see gh-13598 + s = Series(["1", "22", "a", "bb"]) + msg = "width must be of integer type, not*" + + with pytest.raises(TypeError, match=msg): + getattr(s.str, f)("f") + + def test_translate(self): + def _check(result, expected): + if isinstance(result, Series): + tm.assert_series_equal(result, expected) + else: + tm.assert_index_equal(result, expected) + + for klass in [Series, Index]: + s = klass(["abcdefg", "abcc", "cdddfg", "cdefggg"]) + table = str.maketrans("abc", "cde") + result = s.str.translate(table) + expected = klass(["cdedefg", "cdee", "edddfg", "edefggg"]) + _check(result, expected) + + # Series with non-string values + s = Series(["a", "b", "c", 1.2]) + expected = Series(["c", "d", "e", np.nan]) + result = s.str.translate(table) + tm.assert_series_equal(result, expected) + + def test_center_ljust_rjust(self): + values = Series(["a", "b", np.nan, "c", np.nan, "eeeeee"]) + + result = values.str.center(5) + exp = Series([" a ", " b ", np.nan, " c ", np.nan, "eeeeee"]) + tm.assert_almost_equal(result, exp) + + result = values.str.ljust(5) + exp = Series(["a ", "b ", np.nan, "c ", np.nan, "eeeeee"]) + tm.assert_almost_equal(result, exp) + + result = values.str.rjust(5) + exp = Series([" a", " b", np.nan, " c", np.nan, "eeeeee"]) + tm.assert_almost_equal(result, exp) + + # mixed + mixed = Series( + ["a", np.nan, "b", True, datetime.today(), "c", "eee", None, 1, 2.0] + ) + + rs = Series(mixed).str.center(5) + xp = Series( + [ + " a ", + np.nan, + " b ", + np.nan, + np.nan, + " c ", + " eee ", + np.nan, + np.nan, + np.nan, + ] + ) + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + rs = Series(mixed).str.ljust(5) + xp = Series( + [ + "a ", + np.nan, + "b ", + np.nan, + np.nan, + "c ", + "eee ", + np.nan, + np.nan, + np.nan, + ] + ) + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + rs = Series(mixed).str.rjust(5) + xp = Series( + [ + " a", + np.nan, + " b", + np.nan, + np.nan, + " c", + " eee", + np.nan, + np.nan, + np.nan, + ] + ) + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + def test_center_ljust_rjust_fillchar(self): + values = Series(["a", "bb", "cccc", "ddddd", "eeeeee"]) + + result = values.str.center(5, fillchar="X") + expected = Series(["XXaXX", "XXbbX", "Xcccc", "ddddd", "eeeeee"]) + tm.assert_series_equal(result, expected) + expected = np.array([v.center(5, "X") for v in values.values], dtype=np.object_) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.ljust(5, fillchar="X") + expected = Series(["aXXXX", "bbXXX", "ccccX", "ddddd", "eeeeee"]) + tm.assert_series_equal(result, expected) + expected = np.array([v.ljust(5, "X") for v in values.values], dtype=np.object_) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.rjust(5, fillchar="X") + expected = Series(["XXXXa", "XXXbb", "Xcccc", "ddddd", "eeeeee"]) + tm.assert_series_equal(result, expected) + expected = np.array([v.rjust(5, "X") for v in values.values], dtype=np.object_) + tm.assert_numpy_array_equal(result.values, expected) + + # If fillchar is not a charatter, normal str raises TypeError + # 'aaa'.ljust(5, 'XY') + # TypeError: must be char, not str + template = "fillchar must be a character, not {dtype}" + + with pytest.raises(TypeError, match=template.format(dtype="str")): + values.str.center(5, fillchar="XY") + + with pytest.raises(TypeError, match=template.format(dtype="str")): + values.str.ljust(5, fillchar="XY") + + with pytest.raises(TypeError, match=template.format(dtype="str")): + values.str.rjust(5, fillchar="XY") + + with pytest.raises(TypeError, match=template.format(dtype="int")): + values.str.center(5, fillchar=1) + + with pytest.raises(TypeError, match=template.format(dtype="int")): + values.str.ljust(5, fillchar=1) + + with pytest.raises(TypeError, match=template.format(dtype="int")): + values.str.rjust(5, fillchar=1) + + def test_zfill(self): + values = Series(["1", "22", "aaa", "333", "45678"]) + + result = values.str.zfill(5) + expected = Series(["00001", "00022", "00aaa", "00333", "45678"]) + tm.assert_series_equal(result, expected) + expected = np.array([v.zfill(5) for v in values.values], dtype=np.object_) + tm.assert_numpy_array_equal(result.values, expected) + + result = values.str.zfill(3) + expected = Series(["001", "022", "aaa", "333", "45678"]) + tm.assert_series_equal(result, expected) + expected = np.array([v.zfill(3) for v in values.values], dtype=np.object_) + tm.assert_numpy_array_equal(result.values, expected) + + values = Series(["1", np.nan, "aaa", np.nan, "45678"]) + result = values.str.zfill(5) + expected = Series(["00001", np.nan, "00aaa", np.nan, "45678"]) + tm.assert_series_equal(result, expected) + + def test_split(self): + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) + + result = values.str.split("_") + exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]]) + tm.assert_series_equal(result, exp) + + # more than one char + values = Series(["a__b__c", "c__d__e", np.nan, "f__g__h"]) + result = values.str.split("__") + tm.assert_series_equal(result, exp) + + result = values.str.split("__", expand=False) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series(["a_b_c", np.nan, "d_e_f", True, datetime.today(), None, 1, 2.0]) + result = mixed.str.split("_") + exp = Series( + [ + ["a", "b", "c"], + np.nan, + ["d", "e", "f"], + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + ] + ) + assert isinstance(result, Series) + tm.assert_almost_equal(result, exp) + + result = mixed.str.split("_", expand=False) + assert isinstance(result, Series) + tm.assert_almost_equal(result, exp) + + # regex split + values = Series(["a,b_c", "c_d,e", np.nan, "f,g,h"]) + result = values.str.split("[,_]") + exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]]) + tm.assert_series_equal(result, exp) + + def test_rsplit(self): + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) + result = values.str.rsplit("_") + exp = Series([["a", "b", "c"], ["c", "d", "e"], np.nan, ["f", "g", "h"]]) + tm.assert_series_equal(result, exp) + + # more than one char + values = Series(["a__b__c", "c__d__e", np.nan, "f__g__h"]) + result = values.str.rsplit("__") + tm.assert_series_equal(result, exp) + + result = values.str.rsplit("__", expand=False) + tm.assert_series_equal(result, exp) + + # mixed + mixed = Series(["a_b_c", np.nan, "d_e_f", True, datetime.today(), None, 1, 2.0]) + result = mixed.str.rsplit("_") + exp = Series( + [ + ["a", "b", "c"], + np.nan, + ["d", "e", "f"], + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + ] + ) + assert isinstance(result, Series) + tm.assert_almost_equal(result, exp) + + result = mixed.str.rsplit("_", expand=False) + assert isinstance(result, Series) + tm.assert_almost_equal(result, exp) + + # regex split is not supported by rsplit + values = Series(["a,b_c", "c_d,e", np.nan, "f,g,h"]) + result = values.str.rsplit("[,_]") + exp = Series([["a,b_c"], ["c_d,e"], np.nan, ["f,g,h"]]) + tm.assert_series_equal(result, exp) + + # setting max number of splits, make sure it's from reverse + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) + result = values.str.rsplit("_", n=1) + exp = Series([["a_b", "c"], ["c_d", "e"], np.nan, ["f_g", "h"]]) + tm.assert_series_equal(result, exp) + + def test_split_blank_string(self): + # expand blank split GH 20067 + values = Series([""], name="test") + result = values.str.split(expand=True) + exp = DataFrame([[]]) # NOTE: this is NOT an empty DataFrame + tm.assert_frame_equal(result, exp) + + values = Series(["a b c", "a b", "", " "], name="test") + result = values.str.split(expand=True) + exp = DataFrame( + [ + ["a", "b", "c"], + ["a", "b", np.nan], + [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan], + ] + ) + tm.assert_frame_equal(result, exp) + + def test_split_noargs(self): + # #1859 + s = Series(["Wes McKinney", "Travis Oliphant"]) + result = s.str.split() + expected = ["Travis", "Oliphant"] + assert result[1] == expected + result = s.str.rsplit() + assert result[1] == expected + + def test_split_maxsplit(self): + # re.split 0, str.split -1 + s = Series(["bd asdf jfg", "kjasdflqw asdfnfk"]) + + result = s.str.split(n=-1) + xp = s.str.split() + tm.assert_series_equal(result, xp) + + result = s.str.split(n=0) + tm.assert_series_equal(result, xp) + + xp = s.str.split("asdf") + result = s.str.split("asdf", n=0) + tm.assert_series_equal(result, xp) + + result = s.str.split("asdf", n=-1) + tm.assert_series_equal(result, xp) + + def test_split_no_pat_with_nonzero_n(self): + s = Series(["split once", "split once too!"]) + result = s.str.split(n=1) + expected = Series({0: ["split", "once"], 1: ["split", "once too!"]}) + tm.assert_series_equal(expected, result, check_index_type=False) + + def test_split_to_dataframe(self): + s = Series(["nosplit", "alsonosplit"]) + result = s.str.split("_", expand=True) + exp = DataFrame({0: Series(["nosplit", "alsonosplit"])}) + tm.assert_frame_equal(result, exp) + + s = Series(["some_equal_splits", "with_no_nans"]) + result = s.str.split("_", expand=True) + exp = DataFrame( + {0: ["some", "with"], 1: ["equal", "no"], 2: ["splits", "nans"]} + ) + tm.assert_frame_equal(result, exp) + + s = Series(["some_unequal_splits", "one_of_these_things_is_not"]) + result = s.str.split("_", expand=True) + exp = DataFrame( + { + 0: ["some", "one"], + 1: ["unequal", "of"], + 2: ["splits", "these"], + 3: [np.nan, "things"], + 4: [np.nan, "is"], + 5: [np.nan, "not"], + } + ) + tm.assert_frame_equal(result, exp) + + s = Series(["some_splits", "with_index"], index=["preserve", "me"]) + result = s.str.split("_", expand=True) + exp = DataFrame( + {0: ["some", "with"], 1: ["splits", "index"]}, index=["preserve", "me"] + ) + tm.assert_frame_equal(result, exp) + + with pytest.raises(ValueError, match="expand must be"): + s.str.split("_", expand="not_a_boolean") + + def test_split_to_multiindex_expand(self): + # https://github.com/pandas-dev/pandas/issues/23677 + + idx = Index(["nosplit", "alsonosplit", np.nan]) + result = idx.str.split("_", expand=True) + exp = idx + tm.assert_index_equal(result, exp) + assert result.nlevels == 1 + + idx = Index(["some_equal_splits", "with_no_nans", np.nan, None]) + result = idx.str.split("_", expand=True) + exp = MultiIndex.from_tuples( + [ + ("some", "equal", "splits"), + ("with", "no", "nans"), + [np.nan, np.nan, np.nan], + [None, None, None], + ] + ) + tm.assert_index_equal(result, exp) + assert result.nlevels == 3 + + idx = Index(["some_unequal_splits", "one_of_these_things_is_not", np.nan, None]) + result = idx.str.split("_", expand=True) + exp = MultiIndex.from_tuples( + [ + ("some", "unequal", "splits", np.nan, np.nan, np.nan), + ("one", "of", "these", "things", "is", "not"), + (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan), + (None, None, None, None, None, None), + ] + ) + tm.assert_index_equal(result, exp) + assert result.nlevels == 6 + + with pytest.raises(ValueError, match="expand must be"): + idx.str.split("_", expand="not_a_boolean") + + def test_rsplit_to_dataframe_expand(self): + s = Series(["nosplit", "alsonosplit"]) + result = s.str.rsplit("_", expand=True) + exp = DataFrame({0: Series(["nosplit", "alsonosplit"])}) + tm.assert_frame_equal(result, exp) + + s = Series(["some_equal_splits", "with_no_nans"]) + result = s.str.rsplit("_", expand=True) + exp = DataFrame( + {0: ["some", "with"], 1: ["equal", "no"], 2: ["splits", "nans"]} + ) + tm.assert_frame_equal(result, exp) + + result = s.str.rsplit("_", expand=True, n=2) + exp = DataFrame( + {0: ["some", "with"], 1: ["equal", "no"], 2: ["splits", "nans"]} + ) + tm.assert_frame_equal(result, exp) + + result = s.str.rsplit("_", expand=True, n=1) + exp = DataFrame({0: ["some_equal", "with_no"], 1: ["splits", "nans"]}) + tm.assert_frame_equal(result, exp) + + s = Series(["some_splits", "with_index"], index=["preserve", "me"]) + result = s.str.rsplit("_", expand=True) + exp = DataFrame( + {0: ["some", "with"], 1: ["splits", "index"]}, index=["preserve", "me"] + ) + tm.assert_frame_equal(result, exp) + + def test_rsplit_to_multiindex_expand(self): + idx = Index(["nosplit", "alsonosplit"]) + result = idx.str.rsplit("_", expand=True) + exp = idx + tm.assert_index_equal(result, exp) + assert result.nlevels == 1 + + idx = Index(["some_equal_splits", "with_no_nans"]) + result = idx.str.rsplit("_", expand=True) + exp = MultiIndex.from_tuples( + [("some", "equal", "splits"), ("with", "no", "nans")] + ) + tm.assert_index_equal(result, exp) + assert result.nlevels == 3 + + idx = Index(["some_equal_splits", "with_no_nans"]) + result = idx.str.rsplit("_", expand=True, n=1) + exp = MultiIndex.from_tuples([("some_equal", "splits"), ("with_no", "nans")]) + tm.assert_index_equal(result, exp) + assert result.nlevels == 2 + + def test_split_nan_expand(self): + # gh-18450 + s = Series(["foo,bar,baz", np.nan]) + result = s.str.split(",", expand=True) + exp = DataFrame([["foo", "bar", "baz"], [np.nan, np.nan, np.nan]]) + tm.assert_frame_equal(result, exp) + + # check that these are actually np.nan and not None + # TODO see GH 18463 + # tm.assert_frame_equal does not differentiate + assert all(np.isnan(x) for x in result.iloc[1]) + + def test_split_with_name(self): + # GH 12617 + + # should preserve name + s = Series(["a,b", "c,d"], name="xxx") + res = s.str.split(",") + exp = Series([["a", "b"], ["c", "d"]], name="xxx") + tm.assert_series_equal(res, exp) + + res = s.str.split(",", expand=True) + exp = DataFrame([["a", "b"], ["c", "d"]]) + tm.assert_frame_equal(res, exp) + + idx = Index(["a,b", "c,d"], name="xxx") + res = idx.str.split(",") + exp = Index([["a", "b"], ["c", "d"]], name="xxx") + assert res.nlevels == 1 + tm.assert_index_equal(res, exp) + + res = idx.str.split(",", expand=True) + exp = MultiIndex.from_tuples([("a", "b"), ("c", "d")]) + assert res.nlevels == 2 + tm.assert_index_equal(res, exp) + + def test_partition_series(self): + # https://github.com/pandas-dev/pandas/issues/23558 + + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None]) + + result = values.str.partition("_", expand=False) + exp = Series( + [("a", "_", "b_c"), ("c", "_", "d_e"), np.nan, ("f", "_", "g_h"), None] + ) + tm.assert_series_equal(result, exp) + + result = values.str.rpartition("_", expand=False) + exp = Series( + [("a_b", "_", "c"), ("c_d", "_", "e"), np.nan, ("f_g", "_", "h"), None] + ) + tm.assert_series_equal(result, exp) + + # more than one char + values = Series(["a__b__c", "c__d__e", np.nan, "f__g__h", None]) + result = values.str.partition("__", expand=False) + exp = Series( + [ + ("a", "__", "b__c"), + ("c", "__", "d__e"), + np.nan, + ("f", "__", "g__h"), + None, + ] + ) + tm.assert_series_equal(result, exp) + + result = values.str.rpartition("__", expand=False) + exp = Series( + [ + ("a__b", "__", "c"), + ("c__d", "__", "e"), + np.nan, + ("f__g", "__", "h"), + None, + ] + ) + tm.assert_series_equal(result, exp) + + # None + values = Series(["a b c", "c d e", np.nan, "f g h", None]) + result = values.str.partition(expand=False) + exp = Series( + [("a", " ", "b c"), ("c", " ", "d e"), np.nan, ("f", " ", "g h"), None] + ) + tm.assert_series_equal(result, exp) + + result = values.str.rpartition(expand=False) + exp = Series( + [("a b", " ", "c"), ("c d", " ", "e"), np.nan, ("f g", " ", "h"), None] + ) + tm.assert_series_equal(result, exp) + + # Not split + values = Series(["abc", "cde", np.nan, "fgh", None]) + result = values.str.partition("_", expand=False) + exp = Series([("abc", "", ""), ("cde", "", ""), np.nan, ("fgh", "", ""), None]) + tm.assert_series_equal(result, exp) + + result = values.str.rpartition("_", expand=False) + exp = Series([("", "", "abc"), ("", "", "cde"), np.nan, ("", "", "fgh"), None]) + tm.assert_series_equal(result, exp) + + # unicode + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) + + result = values.str.partition("_", expand=False) + exp = Series([("a", "_", "b_c"), ("c", "_", "d_e"), np.nan, ("f", "_", "g_h")]) + tm.assert_series_equal(result, exp) + + result = values.str.rpartition("_", expand=False) + exp = Series([("a_b", "_", "c"), ("c_d", "_", "e"), np.nan, ("f_g", "_", "h")]) + tm.assert_series_equal(result, exp) + + # compare to standard lib + values = Series(["A_B_C", "B_C_D", "E_F_G", "EFGHEF"]) + result = values.str.partition("_", expand=False).tolist() + assert result == [v.partition("_") for v in values] + result = values.str.rpartition("_", expand=False).tolist() + assert result == [v.rpartition("_") for v in values] + + def test_partition_index(self): + # https://github.com/pandas-dev/pandas/issues/23558 + + values = Index(["a_b_c", "c_d_e", "f_g_h", np.nan, None]) + + result = values.str.partition("_", expand=False) + exp = Index( + np.array( + [("a", "_", "b_c"), ("c", "_", "d_e"), ("f", "_", "g_h"), np.nan, None], + dtype=object, + ) + ) + tm.assert_index_equal(result, exp) + assert result.nlevels == 1 + + result = values.str.rpartition("_", expand=False) + exp = Index( + np.array( + [("a_b", "_", "c"), ("c_d", "_", "e"), ("f_g", "_", "h"), np.nan, None], + dtype=object, + ) + ) + tm.assert_index_equal(result, exp) + assert result.nlevels == 1 + + result = values.str.partition("_") + exp = Index( + [ + ("a", "_", "b_c"), + ("c", "_", "d_e"), + ("f", "_", "g_h"), + (np.nan, np.nan, np.nan), + (None, None, None), + ] + ) + tm.assert_index_equal(result, exp) + assert isinstance(result, MultiIndex) + assert result.nlevels == 3 + + result = values.str.rpartition("_") + exp = Index( + [ + ("a_b", "_", "c"), + ("c_d", "_", "e"), + ("f_g", "_", "h"), + (np.nan, np.nan, np.nan), + (None, None, None), + ] + ) + tm.assert_index_equal(result, exp) + assert isinstance(result, MultiIndex) + assert result.nlevels == 3 + + def test_partition_to_dataframe(self): + # https://github.com/pandas-dev/pandas/issues/23558 + + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None]) + result = values.str.partition("_") + exp = DataFrame( + { + 0: ["a", "c", np.nan, "f", None], + 1: ["_", "_", np.nan, "_", None], + 2: ["b_c", "d_e", np.nan, "g_h", None], + } + ) + tm.assert_frame_equal(result, exp) + + result = values.str.rpartition("_") + exp = DataFrame( + { + 0: ["a_b", "c_d", np.nan, "f_g", None], + 1: ["_", "_", np.nan, "_", None], + 2: ["c", "e", np.nan, "h", None], + } + ) + tm.assert_frame_equal(result, exp) + + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h", None]) + result = values.str.partition("_", expand=True) + exp = DataFrame( + { + 0: ["a", "c", np.nan, "f", None], + 1: ["_", "_", np.nan, "_", None], + 2: ["b_c", "d_e", np.nan, "g_h", None], + } + ) + tm.assert_frame_equal(result, exp) + + result = values.str.rpartition("_", expand=True) + exp = DataFrame( + { + 0: ["a_b", "c_d", np.nan, "f_g", None], + 1: ["_", "_", np.nan, "_", None], + 2: ["c", "e", np.nan, "h", None], + } + ) + tm.assert_frame_equal(result, exp) + + def test_partition_with_name(self): + # GH 12617 + + s = Series(["a,b", "c,d"], name="xxx") + res = s.str.partition(",") + exp = DataFrame({0: ["a", "c"], 1: [",", ","], 2: ["b", "d"]}) + tm.assert_frame_equal(res, exp) + + # should preserve name + res = s.str.partition(",", expand=False) + exp = Series([("a", ",", "b"), ("c", ",", "d")], name="xxx") + tm.assert_series_equal(res, exp) + + idx = Index(["a,b", "c,d"], name="xxx") + res = idx.str.partition(",") + exp = MultiIndex.from_tuples([("a", ",", "b"), ("c", ",", "d")]) + assert res.nlevels == 3 + tm.assert_index_equal(res, exp) + + # should preserve name + res = idx.str.partition(",", expand=False) + exp = Index(np.array([("a", ",", "b"), ("c", ",", "d")]), name="xxx") + assert res.nlevels == 1 + tm.assert_index_equal(res, exp) + + def test_partition_sep_kwarg(self): + # GH 22676; depr kwarg "pat" in favor of "sep" + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) + + expected = values.str.partition(sep="_") + result = values.str.partition("_") + tm.assert_frame_equal(result, expected) + + expected = values.str.rpartition(sep="_") + result = values.str.rpartition("_") + tm.assert_frame_equal(result, expected) + + def test_pipe_failures(self): + # #2119 + s = Series(["A|B|C"]) + + result = s.str.split("|") + exp = Series([["A", "B", "C"]]) + + tm.assert_series_equal(result, exp) + + result = s.str.replace("|", " ") + exp = Series(["A B C"]) + + tm.assert_series_equal(result, exp) + + @pytest.mark.parametrize( + "start, stop, step, expected", + [ + (2, 5, None, Series(["foo", "bar", np.nan, "baz"])), + (0, 3, -1, Series(["", "", np.nan, ""])), + (None, None, -1, Series(["owtoofaa", "owtrabaa", np.nan, "xuqzabaa"])), + (3, 10, 2, Series(["oto", "ato", np.nan, "aqx"])), + (3, 0, -1, Series(["ofa", "aba", np.nan, "aba"])), + ], + ) + def test_slice(self, start, stop, step, expected): + values = Series(["aafootwo", "aabartwo", np.nan, "aabazqux"]) + result = values.str.slice(start, stop, step) + tm.assert_series_equal(result, expected) + + # mixed + mixed = Series( + ["aafootwo", np.nan, "aabartwo", True, datetime.today(), None, 1, 2.0] + ) + + rs = Series(mixed).str.slice(2, 5) + xp = Series(["foo", np.nan, "bar", np.nan, np.nan, np.nan, np.nan, np.nan]) + + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + rs = Series(mixed).str.slice(2, 5, -1) + xp = Series(["oof", np.nan, "rab", np.nan, np.nan, np.nan, np.nan, np.nan]) + + def test_slice_replace(self): + values = Series(["short", "a bit longer", "evenlongerthanthat", "", np.nan]) + + exp = Series(["shrt", "a it longer", "evnlongerthanthat", "", np.nan]) + result = values.str.slice_replace(2, 3) + tm.assert_series_equal(result, exp) + + exp = Series(["shzrt", "a zit longer", "evznlongerthanthat", "z", np.nan]) + result = values.str.slice_replace(2, 3, "z") + tm.assert_series_equal(result, exp) + + exp = Series(["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan]) + result = values.str.slice_replace(2, 2, "z") + tm.assert_series_equal(result, exp) + + exp = Series(["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan]) + result = values.str.slice_replace(2, 1, "z") + tm.assert_series_equal(result, exp) + + exp = Series(["shorz", "a bit longez", "evenlongerthanthaz", "z", np.nan]) + result = values.str.slice_replace(-1, None, "z") + tm.assert_series_equal(result, exp) + + exp = Series(["zrt", "zer", "zat", "z", np.nan]) + result = values.str.slice_replace(None, -2, "z") + tm.assert_series_equal(result, exp) + + exp = Series(["shortz", "a bit znger", "evenlozerthanthat", "z", np.nan]) + result = values.str.slice_replace(6, 8, "z") + tm.assert_series_equal(result, exp) + + exp = Series(["zrt", "a zit longer", "evenlongzerthanthat", "z", np.nan]) + result = values.str.slice_replace(-10, 3, "z") + tm.assert_series_equal(result, exp) + + def test_strip_lstrip_rstrip(self): + values = Series([" aa ", " bb \n", np.nan, "cc "]) + + result = values.str.strip() + exp = Series(["aa", "bb", np.nan, "cc"]) + tm.assert_series_equal(result, exp) + + result = values.str.lstrip() + exp = Series(["aa ", "bb \n", np.nan, "cc "]) + tm.assert_series_equal(result, exp) + + result = values.str.rstrip() + exp = Series([" aa", " bb", np.nan, "cc"]) + tm.assert_series_equal(result, exp) + + def test_strip_lstrip_rstrip_mixed(self): + # mixed + mixed = Series( + [" aa ", np.nan, " bb \t\n", True, datetime.today(), None, 1, 2.0] + ) + + rs = Series(mixed).str.strip() + xp = Series(["aa", np.nan, "bb", np.nan, np.nan, np.nan, np.nan, np.nan]) + + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + rs = Series(mixed).str.lstrip() + xp = Series(["aa ", np.nan, "bb \t\n", np.nan, np.nan, np.nan, np.nan, np.nan]) + + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + rs = Series(mixed).str.rstrip() + xp = Series([" aa", np.nan, " bb", np.nan, np.nan, np.nan, np.nan, np.nan]) + + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + def test_strip_lstrip_rstrip_args(self): + values = Series(["xxABCxx", "xx BNSD", "LDFJH xx"]) + + rs = values.str.strip("x") + xp = Series(["ABC", " BNSD", "LDFJH "]) + tm.assert_series_equal(rs, xp) + + rs = values.str.lstrip("x") + xp = Series(["ABCxx", " BNSD", "LDFJH xx"]) + tm.assert_series_equal(rs, xp) + + rs = values.str.rstrip("x") + xp = Series(["xxABC", "xx BNSD", "LDFJH "]) + tm.assert_series_equal(rs, xp) + + def test_wrap(self): + # test values are: two words less than width, two words equal to width, + # two words greater than width, one word less than width, one word + # equal to width, one word greater than width, multiple tokens with + # trailing whitespace equal to width + values = Series( + [ + "hello world", + "hello world!", + "hello world!!", + "abcdefabcde", + "abcdefabcdef", + "abcdefabcdefa", + "ab ab ab ab ", + "ab ab ab ab a", + "\t", + ] + ) + + # expected values + xp = Series( + [ + "hello world", + "hello world!", + "hello\nworld!!", + "abcdefabcde", + "abcdefabcdef", + "abcdefabcdef\na", + "ab ab ab ab", + "ab ab ab ab\na", + "", + ] + ) + + rs = values.str.wrap(12, break_long_words=True) + tm.assert_series_equal(rs, xp) + + # test with pre and post whitespace (non-unicode), NaN, and non-ascii + # Unicode + values = Series([" pre ", np.nan, "\xac\u20ac\U00008000 abadcafe"]) + xp = Series([" pre", np.nan, "\xac\u20ac\U00008000 ab\nadcafe"]) + rs = values.str.wrap(6) + tm.assert_series_equal(rs, xp) + + def test_get(self): + values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) + + result = values.str.split("_").str.get(1) + expected = Series(["b", "d", np.nan, "g"]) + tm.assert_series_equal(result, expected) + + # mixed + mixed = Series(["a_b_c", np.nan, "c_d_e", True, datetime.today(), None, 1, 2.0]) + + rs = Series(mixed).str.split("_").str.get(1) + xp = Series(["b", np.nan, "d", np.nan, np.nan, np.nan, np.nan, np.nan]) + + assert isinstance(rs, Series) + tm.assert_almost_equal(rs, xp) + + # bounds testing + values = Series(["1_2_3_4_5", "6_7_8_9_10", "11_12"]) + + # positive index + result = values.str.split("_").str.get(2) + expected = Series(["3", "8", np.nan]) + tm.assert_series_equal(result, expected) + + # negative index + result = values.str.split("_").str.get(-3) + expected = Series(["3", "8", np.nan]) + tm.assert_series_equal(result, expected) + + def test_get_complex(self): + # GH 20671, getting value not in dict raising `KeyError` + values = Series([(1, 2, 3), [1, 2, 3], {1, 2, 3}, {1: "a", 2: "b", 3: "c"}]) + + result = values.str.get(1) + expected = Series([2, 2, np.nan, "a"]) + tm.assert_series_equal(result, expected) + + result = values.str.get(-1) + expected = Series([3, 3, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("to_type", [tuple, list, np.array]) + def test_get_complex_nested(self, to_type): + values = Series([to_type([to_type([1, 2])])]) + + result = values.str.get(0) + expected = Series([to_type([1, 2])]) + tm.assert_series_equal(result, expected) + + result = values.str.get(1) + expected = Series([np.nan]) + tm.assert_series_equal(result, expected) + + def test_contains_moar(self): + # PR #1179 + s = Series(["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"]) + + result = s.str.contains("a") + expected = Series( + [False, False, False, True, True, False, np.nan, False, False, True] + ) + tm.assert_series_equal(result, expected) + + result = s.str.contains("a", case=False) + expected = Series( + [True, False, False, True, True, False, np.nan, True, False, True] + ) + tm.assert_series_equal(result, expected) + + result = s.str.contains("Aa") + expected = Series( + [False, False, False, True, False, False, np.nan, False, False, False] + ) + tm.assert_series_equal(result, expected) + + result = s.str.contains("ba") + expected = Series( + [False, False, False, True, False, False, np.nan, False, False, False] + ) + tm.assert_series_equal(result, expected) + + result = s.str.contains("ba", case=False) + expected = Series( + [False, False, False, True, True, False, np.nan, True, False, False] + ) + tm.assert_series_equal(result, expected) + + def test_contains_nan(self): + # PR #14171 + s = Series([np.nan, np.nan, np.nan], dtype=np.object_) + + result = s.str.contains("foo", na=False) + expected = Series([False, False, False], dtype=np.bool_) + tm.assert_series_equal(result, expected) + + result = s.str.contains("foo", na=True) + expected = Series([True, True, True], dtype=np.bool_) + tm.assert_series_equal(result, expected) + + result = s.str.contains("foo", na="foo") + expected = Series(["foo", "foo", "foo"], dtype=np.object_) + tm.assert_series_equal(result, expected) + + result = s.str.contains("foo") + expected = Series([np.nan, np.nan, np.nan], dtype=np.object_) + tm.assert_series_equal(result, expected) + + def test_replace_moar(self): + # PR #1179 + s = Series(["A", "B", "C", "Aaba", "Baca", "", np.nan, "CABA", "dog", "cat"]) + + result = s.str.replace("A", "YYY") + expected = Series( + ["YYY", "B", "C", "YYYaba", "Baca", "", np.nan, "CYYYBYYY", "dog", "cat"] + ) + tm.assert_series_equal(result, expected) + + result = s.str.replace("A", "YYY", case=False) + expected = Series( + [ + "YYY", + "B", + "C", + "YYYYYYbYYY", + "BYYYcYYY", + "", + np.nan, + "CYYYBYYY", + "dog", + "cYYYt", + ] + ) + tm.assert_series_equal(result, expected) + + result = s.str.replace("^.a|dog", "XX-XX ", case=False) + expected = Series( + [ + "A", + "B", + "C", + "XX-XX ba", + "XX-XX ca", + "", + np.nan, + "XX-XX BA", + "XX-XX ", + "XX-XX t", + ] + ) + tm.assert_series_equal(result, expected) + + def test_string_slice_get_syntax(self): + s = Series( + [ + "YYY", + "B", + "C", + "YYYYYYbYYY", + "BYYYcYYY", + np.nan, + "CYYYBYYY", + "dog", + "cYYYt", + ] + ) + + result = s.str[0] + expected = s.str.get(0) + tm.assert_series_equal(result, expected) + + result = s.str[:3] + expected = s.str.slice(stop=3) + tm.assert_series_equal(result, expected) + + result = s.str[2::-1] + expected = s.str.slice(start=2, step=-1) + tm.assert_series_equal(result, expected) + + def test_string_slice_out_of_bounds(self): + s = Series([(1, 2), (1,), (3, 4, 5)]) + + result = s.str[1] + expected = Series([2, np.nan, 4]) + + tm.assert_series_equal(result, expected) + + s = Series(["foo", "b", "ba"]) + result = s.str[1] + expected = Series(["o", np.nan, "a"]) + tm.assert_series_equal(result, expected) + + def test_match_findall_flags(self): + data = { + "Dave": "dave@google.com", + "Steve": "steve@gmail.com", + "Rob": "rob@gmail.com", + "Wes": np.nan, + } + data = Series(data) + + pat = r"([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})" + + result = data.str.extract(pat, flags=re.IGNORECASE, expand=True) + assert result.iloc[0].tolist() == ["dave", "google", "com"] + + result = data.str.match(pat, flags=re.IGNORECASE) + assert result[0] + + result = data.str.findall(pat, flags=re.IGNORECASE) + assert result[0][0] == ("dave", "google", "com") + + result = data.str.count(pat, flags=re.IGNORECASE) + assert result[0] == 1 + + with tm.assert_produces_warning(UserWarning): + result = data.str.contains(pat, flags=re.IGNORECASE) + assert result[0] + + def test_encode_decode(self): + base = Series(["a", "b", "a\xe4"]) + series = base.str.encode("utf-8") + + f = lambda x: x.decode("utf-8") + result = series.str.decode("utf-8") + exp = series.map(f) + + tm.assert_series_equal(result, exp) + + def test_encode_decode_errors(self): + encodeBase = Series(["a", "b", "a\x9d"]) + + msg = ( + r"'charmap' codec can't encode character '\\x9d' in position 1:" + " character maps to " + ) + with pytest.raises(UnicodeEncodeError, match=msg): + encodeBase.str.encode("cp1252") + + f = lambda x: x.encode("cp1252", "ignore") + result = encodeBase.str.encode("cp1252", "ignore") + exp = encodeBase.map(f) + tm.assert_series_equal(result, exp) + + decodeBase = Series([b"a", b"b", b"a\x9d"]) + + msg = ( + "'charmap' codec can't decode byte 0x9d in position 1:" + " character maps to " + ) + with pytest.raises(UnicodeDecodeError, match=msg): + decodeBase.str.decode("cp1252") + + f = lambda x: x.decode("cp1252", "ignore") + result = decodeBase.str.decode("cp1252", "ignore") + exp = decodeBase.map(f) + + tm.assert_series_equal(result, exp) + + def test_normalize(self): + values = ["ABC", "ABC", "123", np.nan, "アイエ"] + s = Series(values, index=["a", "b", "c", "d", "e"]) + + normed = ["ABC", "ABC", "123", np.nan, "アイエ"] + expected = Series(normed, index=["a", "b", "c", "d", "e"]) + + result = s.str.normalize("NFKC") + tm.assert_series_equal(result, expected) + + expected = Series( + ["ABC", "ABC", "123", np.nan, "アイエ"], index=["a", "b", "c", "d", "e"] + ) + + result = s.str.normalize("NFC") + tm.assert_series_equal(result, expected) + + with pytest.raises(ValueError, match="invalid normalization form"): + s.str.normalize("xxx") + + s = Index(["ABC", "123", "アイエ"]) + expected = Index(["ABC", "123", "アイエ"]) + result = s.str.normalize("NFKC") + tm.assert_index_equal(result, expected) + + def test_index_str_accessor_visibility(self): + from pandas.core.strings import StringMethods + + cases = [ + (["a", "b"], "string"), + (["a", "b", 1], "mixed-integer"), + (["a", "b", 1.3], "mixed"), + (["a", "b", 1.3, 1], "mixed-integer"), + (["aa", datetime(2011, 1, 1)], "mixed"), + ] + for values, tp in cases: + idx = Index(values) + assert isinstance(Series(values).str, StringMethods) + assert isinstance(idx.str, StringMethods) + assert idx.inferred_type == tp + + for values, tp in cases: + idx = Index(values) + assert isinstance(Series(values).str, StringMethods) + assert isinstance(idx.str, StringMethods) + assert idx.inferred_type == tp + + cases = [ + ([1, np.nan], "floating"), + ([datetime(2011, 1, 1)], "datetime64"), + ([timedelta(1)], "timedelta64"), + ] + for values, tp in cases: + idx = Index(values) + message = "Can only use .str accessor with string values" + with pytest.raises(AttributeError, match=message): + Series(values).str + with pytest.raises(AttributeError, match=message): + idx.str + assert idx.inferred_type == tp + + # MultiIndex has mixed dtype, but not allow to use accessor + idx = MultiIndex.from_tuples([("a", "b"), ("a", "b")]) + assert idx.inferred_type == "mixed" + message = "Can only use .str accessor with Index, not MultiIndex" + with pytest.raises(AttributeError, match=message): + idx.str + + def test_str_accessor_no_new_attributes(self): + # https://github.com/pandas-dev/pandas/issues/10673 + s = Series(list("aabbcde")) + with pytest.raises(AttributeError, match="You cannot add any new attribute"): + s.str.xlabel = "a" + + def test_method_on_bytes(self): + lhs = Series(np.array(list("abc"), "S1").astype(object)) + rhs = Series(np.array(list("def"), "S1").astype(object)) + with pytest.raises(TypeError, match="Cannot use .str.cat with values of.*"): + lhs.str.cat(rhs) + + def test_casefold(self): + # GH25405 + expected = Series(["ss", np.nan, "case", "ssd"]) + s = Series(["ß", np.nan, "case", "ßd"]) + result = s.str.casefold() + + tm.assert_series_equal(result, expected) + + +def test_string_array(any_string_method): + method_name, args, kwargs = any_string_method + if method_name == "decode": + pytest.skip("decode requires bytes.") + + data = ["a", "bb", np.nan, "ccc"] + a = Series(data, dtype=object) + b = Series(data, dtype="string") + + expected = getattr(a.str, method_name)(*args, **kwargs) + result = getattr(b.str, method_name)(*args, **kwargs) + + if isinstance(expected, Series): + if expected.dtype == "object" and lib.is_string_array( + expected.dropna().values, + ): + assert result.dtype == "string" + result = result.astype(object) + + elif expected.dtype == "object" and lib.is_bool_array( + expected.values, skipna=True + ): + assert result.dtype == "boolean" + result = result.astype(object) + + elif expected.dtype == "float" and expected.isna().any(): + assert result.dtype == "Int64" + result = result.astype("float") + + elif isinstance(expected, DataFrame): + columns = expected.select_dtypes(include="object").columns + assert all(result[columns].dtypes == "string") + result[columns] = result[columns].astype(object) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "method,expected", + [ + ("count", [2, None]), + ("find", [0, None]), + ("index", [0, None]), + ("rindex", [2, None]), + ], +) +def test_string_array_numeric_integer_array(method, expected): + s = Series(["aba", None], dtype="string") + result = getattr(s.str, method)("a") + expected = Series(expected, dtype="Int64") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "method,expected", + [ + ("isdigit", [False, None, True]), + ("isalpha", [True, None, False]), + ("isalnum", [True, None, True]), + ("isdigit", [False, None, True]), + ], +) +def test_string_array_boolean_array(method, expected): + s = Series(["a", None, "1"], dtype="string") + result = getattr(s.str, method)() + expected = Series(expected, dtype="boolean") + tm.assert_series_equal(result, expected) + + +def test_string_array_extract(): + # https://github.com/pandas-dev/pandas/issues/30969 + # Only expand=False & multiple groups was failing + a = Series(["a1", "b2", "cc"], dtype="string") + b = Series(["a1", "b2", "cc"], dtype="object") + pat = r"(\w)(\d)" + + result = a.str.extract(pat, expand=False) + expected = b.str.extract(pat, expand=False) + assert all(result.dtypes == "string") + + result = result.astype(object) + tm.assert_equal(result, expected) diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py new file mode 100644 index 00000000..465296a6 --- /dev/null +++ b/pandas/tests/test_take.py @@ -0,0 +1,461 @@ +from datetime import datetime +import re + +import numpy as np +import pytest + +from pandas._libs.tslib import iNaT + +import pandas._testing as tm +import pandas.core.algorithms as algos + + +@pytest.fixture(params=[True, False]) +def writeable(request): + return request.param + + +# Check that take_nd works both with writeable arrays +# (in which case fast typed memory-views implementation) +# and read-only arrays alike. +@pytest.fixture( + params=[ + (np.float64, True), + (np.float32, True), + (np.uint64, False), + (np.uint32, False), + (np.uint16, False), + (np.uint8, False), + (np.int64, False), + (np.int32, False), + (np.int16, False), + (np.int8, False), + (np.object_, True), + (np.bool, False), + ] +) +def dtype_can_hold_na(request): + return request.param + + +@pytest.fixture( + params=[ + (np.int8, np.int16(127), np.int8), + (np.int8, np.int16(128), np.int16), + (np.int32, 1, np.int32), + (np.int32, 2.0, np.float64), + (np.int32, 3.0 + 4.0j, np.complex128), + (np.int32, True, np.object_), + (np.int32, "", np.object_), + (np.float64, 1, np.float64), + (np.float64, 2.0, np.float64), + (np.float64, 3.0 + 4.0j, np.complex128), + (np.float64, True, np.object_), + (np.float64, "", np.object_), + (np.complex128, 1, np.complex128), + (np.complex128, 2.0, np.complex128), + (np.complex128, 3.0 + 4.0j, np.complex128), + (np.complex128, True, np.object_), + (np.complex128, "", np.object_), + (np.bool_, 1, np.object_), + (np.bool_, 2.0, np.object_), + (np.bool_, 3.0 + 4.0j, np.object_), + (np.bool_, True, np.bool_), + (np.bool_, "", np.object_), + ] +) +def dtype_fill_out_dtype(request): + return request.param + + +class TestTake: + # Standard incompatible fill error. + fill_error = re.compile("Incompatible type for fill_value") + + def test_1d_with_out(self, dtype_can_hold_na, writeable): + dtype, can_hold_na = dtype_can_hold_na + + data = np.random.randint(0, 2, 4).astype(dtype) + data.flags.writeable = writeable + + indexer = [2, 1, 0, 1] + out = np.empty(4, dtype=dtype) + algos.take_1d(data, indexer, out=out) + + expected = data.take(indexer) + tm.assert_almost_equal(out, expected) + + indexer = [2, 1, 0, -1] + out = np.empty(4, dtype=dtype) + + if can_hold_na: + algos.take_1d(data, indexer, out=out) + expected = data.take(indexer) + expected[3] = np.nan + tm.assert_almost_equal(out, expected) + else: + with pytest.raises(TypeError, match=self.fill_error): + algos.take_1d(data, indexer, out=out) + + # No Exception otherwise. + data.take(indexer, out=out) + + def test_1d_fill_nonna(self, dtype_fill_out_dtype): + dtype, fill_value, out_dtype = dtype_fill_out_dtype + data = np.random.randint(0, 2, 4).astype(dtype) + indexer = [2, 1, 0, -1] + + result = algos.take_1d(data, indexer, fill_value=fill_value) + assert (result[[0, 1, 2]] == data[[2, 1, 0]]).all() + assert result[3] == fill_value + assert result.dtype == out_dtype + + indexer = [2, 1, 0, 1] + + result = algos.take_1d(data, indexer, fill_value=fill_value) + assert (result[[0, 1, 2, 3]] == data[indexer]).all() + assert result.dtype == dtype + + def test_2d_with_out(self, dtype_can_hold_na, writeable): + dtype, can_hold_na = dtype_can_hold_na + + data = np.random.randint(0, 2, (5, 3)).astype(dtype) + data.flags.writeable = writeable + + indexer = [2, 1, 0, 1] + out0 = np.empty((4, 3), dtype=dtype) + out1 = np.empty((5, 4), dtype=dtype) + algos.take_nd(data, indexer, out=out0, axis=0) + algos.take_nd(data, indexer, out=out1, axis=1) + + expected0 = data.take(indexer, axis=0) + expected1 = data.take(indexer, axis=1) + tm.assert_almost_equal(out0, expected0) + tm.assert_almost_equal(out1, expected1) + + indexer = [2, 1, 0, -1] + out0 = np.empty((4, 3), dtype=dtype) + out1 = np.empty((5, 4), dtype=dtype) + + if can_hold_na: + algos.take_nd(data, indexer, out=out0, axis=0) + algos.take_nd(data, indexer, out=out1, axis=1) + + expected0 = data.take(indexer, axis=0) + expected1 = data.take(indexer, axis=1) + expected0[3, :] = np.nan + expected1[:, 3] = np.nan + + tm.assert_almost_equal(out0, expected0) + tm.assert_almost_equal(out1, expected1) + else: + for i, out in enumerate([out0, out1]): + with pytest.raises(TypeError, match=self.fill_error): + algos.take_nd(data, indexer, out=out, axis=i) + + # No Exception otherwise. + data.take(indexer, out=out, axis=i) + + def test_2d_fill_nonna(self, dtype_fill_out_dtype): + dtype, fill_value, out_dtype = dtype_fill_out_dtype + data = np.random.randint(0, 2, (5, 3)).astype(dtype) + indexer = [2, 1, 0, -1] + + result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) + assert (result[[0, 1, 2], :] == data[[2, 1, 0], :]).all() + assert (result[3, :] == fill_value).all() + assert result.dtype == out_dtype + + result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) + assert (result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all() + assert (result[:, 3] == fill_value).all() + assert result.dtype == out_dtype + + indexer = [2, 1, 0, 1] + result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) + assert (result[[0, 1, 2, 3], :] == data[indexer, :]).all() + assert result.dtype == dtype + + result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) + assert (result[:, [0, 1, 2, 3]] == data[:, indexer]).all() + assert result.dtype == dtype + + def test_3d_with_out(self, dtype_can_hold_na): + dtype, can_hold_na = dtype_can_hold_na + + data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype) + indexer = [2, 1, 0, 1] + + out0 = np.empty((4, 4, 3), dtype=dtype) + out1 = np.empty((5, 4, 3), dtype=dtype) + out2 = np.empty((5, 4, 4), dtype=dtype) + + algos.take_nd(data, indexer, out=out0, axis=0) + algos.take_nd(data, indexer, out=out1, axis=1) + algos.take_nd(data, indexer, out=out2, axis=2) + + expected0 = data.take(indexer, axis=0) + expected1 = data.take(indexer, axis=1) + expected2 = data.take(indexer, axis=2) + + tm.assert_almost_equal(out0, expected0) + tm.assert_almost_equal(out1, expected1) + tm.assert_almost_equal(out2, expected2) + + indexer = [2, 1, 0, -1] + out0 = np.empty((4, 4, 3), dtype=dtype) + out1 = np.empty((5, 4, 3), dtype=dtype) + out2 = np.empty((5, 4, 4), dtype=dtype) + + if can_hold_na: + algos.take_nd(data, indexer, out=out0, axis=0) + algos.take_nd(data, indexer, out=out1, axis=1) + algos.take_nd(data, indexer, out=out2, axis=2) + + expected0 = data.take(indexer, axis=0) + expected1 = data.take(indexer, axis=1) + expected2 = data.take(indexer, axis=2) + + expected0[3, :, :] = np.nan + expected1[:, 3, :] = np.nan + expected2[:, :, 3] = np.nan + + tm.assert_almost_equal(out0, expected0) + tm.assert_almost_equal(out1, expected1) + tm.assert_almost_equal(out2, expected2) + else: + for i, out in enumerate([out0, out1, out2]): + with pytest.raises(TypeError, match=self.fill_error): + algos.take_nd(data, indexer, out=out, axis=i) + + # No Exception otherwise. + data.take(indexer, out=out, axis=i) + + def test_3d_fill_nonna(self, dtype_fill_out_dtype): + dtype, fill_value, out_dtype = dtype_fill_out_dtype + + data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype) + indexer = [2, 1, 0, -1] + + result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) + assert (result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all() + assert (result[3, :, :] == fill_value).all() + assert result.dtype == out_dtype + + result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) + assert (result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all() + assert (result[:, 3, :] == fill_value).all() + assert result.dtype == out_dtype + + result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value) + assert (result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all() + assert (result[:, :, 3] == fill_value).all() + assert result.dtype == out_dtype + + indexer = [2, 1, 0, 1] + result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value) + assert (result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all() + assert result.dtype == dtype + + result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value) + assert (result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all() + assert result.dtype == dtype + + result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value) + assert (result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all() + assert result.dtype == dtype + + def test_1d_other_dtypes(self): + arr = np.random.randn(10).astype(np.float32) + + indexer = [1, 2, 3, -1] + result = algos.take_1d(arr, indexer) + expected = arr.take(indexer) + expected[-1] = np.nan + tm.assert_almost_equal(result, expected) + + def test_2d_other_dtypes(self): + arr = np.random.randn(10, 5).astype(np.float32) + + indexer = [1, 2, 3, -1] + + # axis=0 + result = algos.take_nd(arr, indexer, axis=0) + expected = arr.take(indexer, axis=0) + expected[-1] = np.nan + tm.assert_almost_equal(result, expected) + + # axis=1 + result = algos.take_nd(arr, indexer, axis=1) + expected = arr.take(indexer, axis=1) + expected[:, -1] = np.nan + tm.assert_almost_equal(result, expected) + + def test_1d_bool(self): + arr = np.array([0, 1, 0], dtype=bool) + + result = algos.take_1d(arr, [0, 2, 2, 1]) + expected = arr.take([0, 2, 2, 1]) + tm.assert_numpy_array_equal(result, expected) + + result = algos.take_1d(arr, [0, 2, -1]) + assert result.dtype == np.object_ + + def test_2d_bool(self): + arr = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=bool) + + result = algos.take_nd(arr, [0, 2, 2, 1]) + expected = arr.take([0, 2, 2, 1], axis=0) + tm.assert_numpy_array_equal(result, expected) + + result = algos.take_nd(arr, [0, 2, 2, 1], axis=1) + expected = arr.take([0, 2, 2, 1], axis=1) + tm.assert_numpy_array_equal(result, expected) + + result = algos.take_nd(arr, [0, 2, -1]) + assert result.dtype == np.object_ + + def test_2d_float32(self): + arr = np.random.randn(4, 3).astype(np.float32) + indexer = [0, 2, -1, 1, -1] + + # axis=0 + result = algos.take_nd(arr, indexer, axis=0) + result2 = np.empty_like(result) + algos.take_nd(arr, indexer, axis=0, out=result2) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=0) + expected[[2, 4], :] = np.nan + tm.assert_almost_equal(result, expected) + + # this now accepts a float32! # test with float64 out buffer + out = np.empty((len(indexer), arr.shape[1]), dtype="float32") + algos.take_nd(arr, indexer, out=out) # it works! + + # axis=1 + result = algos.take_nd(arr, indexer, axis=1) + result2 = np.empty_like(result) + algos.take_nd(arr, indexer, axis=1, out=result2) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=1) + expected[:, [2, 4]] = np.nan + tm.assert_almost_equal(result, expected) + + def test_2d_datetime64(self): + # 2005/01/01 - 2006/01/01 + arr = np.random.randint(11045376, 11360736, (5, 3)) * 100000000000 + arr = arr.view(dtype="datetime64[ns]") + indexer = [0, 2, -1, 1, -1] + + # axis=0 + result = algos.take_nd(arr, indexer, axis=0) + result2 = np.empty_like(result) + algos.take_nd(arr, indexer, axis=0, out=result2) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=0) + expected.view(np.int64)[[2, 4], :] = iNaT + tm.assert_almost_equal(result, expected) + + result = algos.take_nd(arr, indexer, axis=0, fill_value=datetime(2007, 1, 1)) + result2 = np.empty_like(result) + algos.take_nd( + arr, indexer, out=result2, axis=0, fill_value=datetime(2007, 1, 1) + ) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=0) + expected[[2, 4], :] = datetime(2007, 1, 1) + tm.assert_almost_equal(result, expected) + + # axis=1 + result = algos.take_nd(arr, indexer, axis=1) + result2 = np.empty_like(result) + algos.take_nd(arr, indexer, axis=1, out=result2) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=1) + expected.view(np.int64)[:, [2, 4]] = iNaT + tm.assert_almost_equal(result, expected) + + result = algos.take_nd(arr, indexer, axis=1, fill_value=datetime(2007, 1, 1)) + result2 = np.empty_like(result) + algos.take_nd( + arr, indexer, out=result2, axis=1, fill_value=datetime(2007, 1, 1) + ) + tm.assert_almost_equal(result, result2) + + expected = arr.take(indexer, axis=1) + expected[:, [2, 4]] = datetime(2007, 1, 1) + tm.assert_almost_equal(result, expected) + + def test_take_axis_0(self): + arr = np.arange(12).reshape(4, 3) + result = algos.take(arr, [0, -1]) + expected = np.array([[0, 1, 2], [9, 10, 11]]) + tm.assert_numpy_array_equal(result, expected) + + # allow_fill=True + result = algos.take(arr, [0, -1], allow_fill=True, fill_value=0) + expected = np.array([[0, 1, 2], [0, 0, 0]]) + tm.assert_numpy_array_equal(result, expected) + + def test_take_axis_1(self): + arr = np.arange(12).reshape(4, 3) + result = algos.take(arr, [0, -1], axis=1) + expected = np.array([[0, 2], [3, 5], [6, 8], [9, 11]]) + tm.assert_numpy_array_equal(result, expected) + + # allow_fill=True + result = algos.take(arr, [0, -1], axis=1, allow_fill=True, fill_value=0) + expected = np.array([[0, 0], [3, 0], [6, 0], [9, 0]]) + tm.assert_numpy_array_equal(result, expected) + + # GH#26976 make sure we validate along the correct axis + with pytest.raises(IndexError, match="indices are out-of-bounds"): + algos.take(arr, [0, 3], axis=1, allow_fill=True, fill_value=0) + + +class TestExtensionTake: + # The take method found in pd.api.extensions + + def test_bounds_check_large(self): + arr = np.array([1, 2]) + with pytest.raises(IndexError): + algos.take(arr, [2, 3], allow_fill=True) + + with pytest.raises(IndexError): + algos.take(arr, [2, 3], allow_fill=False) + + def test_bounds_check_small(self): + arr = np.array([1, 2, 3], dtype=np.int64) + indexer = [0, -1, -2] + with pytest.raises(ValueError): + algos.take(arr, indexer, allow_fill=True) + + result = algos.take(arr, indexer) + expected = np.array([1, 3, 2], dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("allow_fill", [True, False]) + def test_take_empty(self, allow_fill): + arr = np.array([], dtype=np.int64) + # empty take is ok + result = algos.take(arr, [], allow_fill=allow_fill) + tm.assert_numpy_array_equal(arr, result) + + with pytest.raises(IndexError): + algos.take(arr, [0], allow_fill=allow_fill) + + def test_take_na_empty(self): + result = algos.take(np.array([]), [-1, -1], allow_fill=True, fill_value=0.0) + expected = np.array([0.0, 0.0]) + tm.assert_numpy_array_equal(result, expected) + + def test_take_coerces_list(self): + arr = [1, 2, 3] + result = algos.take(arr, [0, 0]) + expected = np.array([1, 1]) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/tools/__init__.py b/pandas/tests/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py new file mode 100644 index 00000000..2fd39d5a --- /dev/null +++ b/pandas/tests/tools/test_numeric.py @@ -0,0 +1,629 @@ +import decimal + +import numpy as np +from numpy import iinfo +import pytest + +import pandas as pd +from pandas import DataFrame, Index, Series, to_numeric +import pandas._testing as tm + + +@pytest.fixture(params=[None, "ignore", "raise", "coerce"]) +def errors(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def signed(request): + return request.param + + +@pytest.fixture(params=[lambda x: x, str], ids=["identity", "str"]) +def transform(request): + return request.param + + +@pytest.fixture(params=[47393996303418497800, 100000000000000000000]) +def large_val(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def multiple_elts(request): + return request.param + + +@pytest.fixture( + params=[ + (lambda x: Index(x, name="idx"), tm.assert_index_equal), + (lambda x: Series(x, name="ser"), tm.assert_series_equal), + (lambda x: np.array(Index(x).values), tm.assert_numpy_array_equal), + ] +) +def transform_assert_equal(request): + return request.param + + +@pytest.mark.parametrize( + "input_kwargs,result_kwargs", + [ + (dict(), dict(dtype=np.int64)), + (dict(errors="coerce", downcast="integer"), dict(dtype=np.int8)), + ], +) +def test_empty(input_kwargs, result_kwargs): + # see gh-16302 + ser = Series([], dtype=object) + result = to_numeric(ser, **input_kwargs) + + expected = Series([], **result_kwargs) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("last_val", ["7", 7]) +def test_series(last_val): + ser = Series(["1", "-3.14", last_val]) + result = to_numeric(ser) + + expected = Series([1, -3.14, 7]) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data", + [ + [1, 3, 4, 5], + [1.0, 3.0, 4.0, 5.0], + # Bool is regarded as numeric. + [True, False, True, True], + ], +) +def test_series_numeric(data): + ser = Series(data, index=list("ABCD"), name="EFG") + + result = to_numeric(ser) + tm.assert_series_equal(result, ser) + + +@pytest.mark.parametrize( + "data,msg", + [ + ([1, -3.14, "apple"], 'Unable to parse string "apple" at position 2'), + ( + ["orange", 1, -3.14, "apple"], + 'Unable to parse string "orange" at position 0', + ), + ], +) +def test_error(data, msg): + ser = Series(data) + + with pytest.raises(ValueError, match=msg): + to_numeric(ser, errors="raise") + + +@pytest.mark.parametrize( + "errors,exp_data", [("ignore", [1, -3.14, "apple"]), ("coerce", [1, -3.14, np.nan])] +) +def test_ignore_error(errors, exp_data): + ser = Series([1, -3.14, "apple"]) + result = to_numeric(ser, errors=errors) + + expected = Series(exp_data) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "errors,exp", + [ + ("raise", 'Unable to parse string "apple" at position 2'), + ("ignore", [True, False, "apple"]), + # Coerces to float. + ("coerce", [1.0, 0.0, np.nan]), + ], +) +def test_bool_handling(errors, exp): + ser = Series([True, False, "apple"]) + + if isinstance(exp, str): + with pytest.raises(ValueError, match=exp): + to_numeric(ser, errors=errors) + else: + result = to_numeric(ser, errors=errors) + expected = Series(exp) + + tm.assert_series_equal(result, expected) + + +def test_list(): + ser = ["1", "-3.14", "7"] + res = to_numeric(ser) + + expected = np.array([1, -3.14, 7]) + tm.assert_numpy_array_equal(res, expected) + + +@pytest.mark.parametrize( + "data,arr_kwargs", + [ + ([1, 3, 4, 5], dict(dtype=np.int64)), + ([1.0, 3.0, 4.0, 5.0], dict()), + # Boolean is regarded as numeric. + ([True, False, True, True], dict()), + ], +) +def test_list_numeric(data, arr_kwargs): + result = to_numeric(data) + expected = np.array(data, **arr_kwargs) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("kwargs", [dict(dtype="O"), dict()]) +def test_numeric(kwargs): + data = [1, -3.14, 7] + + ser = Series(data, **kwargs) + result = to_numeric(ser) + + expected = Series(data) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "columns", + [ + # One column. + "a", + # Multiple columns. + ["a", "b"], + ], +) +def test_numeric_df_columns(columns): + # see gh-14827 + df = DataFrame( + dict( + a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), "0.1"], + b=[1.0, 2.0, 3.0, 4.0], + ) + ) + + expected = DataFrame(dict(a=[1.2, 3.14, np.inf, 0.1], b=[1.0, 2.0, 3.0, 4.0])) + + df_copy = df.copy() + df_copy[columns] = df_copy[columns].apply(to_numeric) + + tm.assert_frame_equal(df_copy, expected) + + +@pytest.mark.parametrize( + "data,exp_data", + [ + ( + [[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1], + [[3.14, 1.0], 1.6, 0.1], + ), + ([np.array([decimal.Decimal(3.14), 1.0]), 0.1], [[3.14, 1.0], 0.1]), + ], +) +def test_numeric_embedded_arr_likes(data, exp_data): + # Test to_numeric with embedded lists and arrays + df = DataFrame(dict(a=data)) + df["a"] = df["a"].apply(to_numeric) + + expected = DataFrame(dict(a=exp_data)) + tm.assert_frame_equal(df, expected) + + +def test_all_nan(): + ser = Series(["a", "b", "c"]) + result = to_numeric(ser, errors="coerce") + + expected = Series([np.nan, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + +def test_type_check(errors): + # see gh-11776 + df = DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]}) + kwargs = dict(errors=errors) if errors is not None else dict() + error_ctx = pytest.raises(TypeError, match="1-d array") + + with error_ctx: + to_numeric(df, **kwargs) + + +@pytest.mark.parametrize("val", [1, 1.1, 20001]) +def test_scalar(val, signed, transform): + val = -val if signed else val + assert to_numeric(transform(val)) == float(val) + + +def test_really_large_scalar(large_val, signed, transform, errors): + # see gh-24910 + kwargs = dict(errors=errors) if errors is not None else dict() + val = -large_val if signed else large_val + + val = transform(val) + val_is_string = isinstance(val, str) + + if val_is_string and errors in (None, "raise"): + msg = "Integer out of range. at position 0" + with pytest.raises(ValueError, match=msg): + to_numeric(val, **kwargs) + else: + expected = float(val) if (errors == "coerce" and val_is_string) else val + tm.assert_almost_equal(to_numeric(val, **kwargs), expected) + + +def test_really_large_in_arr(large_val, signed, transform, multiple_elts, errors): + # see gh-24910 + kwargs = dict(errors=errors) if errors is not None else dict() + val = -large_val if signed else large_val + val = transform(val) + + extra_elt = "string" + arr = [val] + multiple_elts * [extra_elt] + + val_is_string = isinstance(val, str) + coercing = errors == "coerce" + + if errors in (None, "raise") and (val_is_string or multiple_elts): + if val_is_string: + msg = "Integer out of range. at position 0" + else: + msg = 'Unable to parse string "string" at position 1' + + with pytest.raises(ValueError, match=msg): + to_numeric(arr, **kwargs) + else: + result = to_numeric(arr, **kwargs) + + exp_val = float(val) if (coercing and val_is_string) else val + expected = [exp_val] + + if multiple_elts: + if coercing: + expected.append(np.nan) + exp_dtype = float + else: + expected.append(extra_elt) + exp_dtype = object + else: + exp_dtype = float if isinstance(exp_val, (int, float)) else object + + tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype)) + + +def test_really_large_in_arr_consistent(large_val, signed, multiple_elts, errors): + # see gh-24910 + # + # Even if we discover that we have to hold float, does not mean + # we should be lenient on subsequent elements that fail to be integer. + kwargs = dict(errors=errors) if errors is not None else dict() + arr = [str(-large_val if signed else large_val)] + + if multiple_elts: + arr.insert(0, large_val) + + if errors in (None, "raise"): + index = int(multiple_elts) + msg = "Integer out of range. at position {index}".format(index=index) + + with pytest.raises(ValueError, match=msg): + to_numeric(arr, **kwargs) + else: + result = to_numeric(arr, **kwargs) + + if errors == "coerce": + expected = [float(i) for i in arr] + exp_dtype = float + else: + expected = arr + exp_dtype = object + + tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype)) + + +@pytest.mark.parametrize( + "errors,checker", + [ + ("raise", 'Unable to parse string "fail" at position 0'), + ("ignore", lambda x: x == "fail"), + ("coerce", lambda x: np.isnan(x)), + ], +) +def test_scalar_fail(errors, checker): + scalar = "fail" + + if isinstance(checker, str): + with pytest.raises(ValueError, match=checker): + to_numeric(scalar, errors=errors) + else: + assert checker(to_numeric(scalar, errors=errors)) + + +@pytest.mark.parametrize("data", [[1, 2, 3], [1.0, np.nan, 3, np.nan]]) +def test_numeric_dtypes(data, transform_assert_equal): + transform, assert_equal = transform_assert_equal + data = transform(data) + + result = to_numeric(data) + assert_equal(result, data) + + +@pytest.mark.parametrize( + "data,exp", + [ + (["1", "2", "3"], np.array([1, 2, 3], dtype="int64")), + (["1.5", "2.7", "3.4"], np.array([1.5, 2.7, 3.4])), + ], +) +def test_str(data, exp, transform_assert_equal): + transform, assert_equal = transform_assert_equal + result = to_numeric(transform(data)) + + expected = transform(exp) + assert_equal(result, expected) + + +def test_datetime_like(tz_naive_fixture, transform_assert_equal): + transform, assert_equal = transform_assert_equal + idx = pd.date_range("20130101", periods=3, tz=tz_naive_fixture) + + result = to_numeric(transform(idx)) + expected = transform(idx.asi8) + assert_equal(result, expected) + + +def test_timedelta(transform_assert_equal): + transform, assert_equal = transform_assert_equal + idx = pd.timedelta_range("1 days", periods=3, freq="D") + + result = to_numeric(transform(idx)) + expected = transform(idx.asi8) + assert_equal(result, expected) + + +def test_period(transform_assert_equal): + transform, assert_equal = transform_assert_equal + + idx = pd.period_range("2011-01", periods=3, freq="M", name="") + inp = transform(idx) + + if isinstance(inp, Index): + result = to_numeric(inp) + expected = transform(idx.asi8) + assert_equal(result, expected) + else: + # TODO: PeriodDtype, so support it in to_numeric. + pytest.skip("Missing PeriodDtype support in to_numeric") + + +@pytest.mark.parametrize( + "errors,expected", + [ + ("raise", "Invalid object type at position 0"), + ("ignore", Series([[10.0, 2], 1.0, "apple"])), + ("coerce", Series([np.nan, 1.0, np.nan])), + ], +) +def test_non_hashable(errors, expected): + # see gh-13324 + ser = Series([[10.0, 2], 1.0, "apple"]) + + if isinstance(expected, str): + with pytest.raises(TypeError, match=expected): + to_numeric(ser, errors=errors) + else: + result = to_numeric(ser, errors=errors) + tm.assert_series_equal(result, expected) + + +def test_downcast_invalid_cast(): + # see gh-13352 + data = ["1", 2, 3] + invalid_downcast = "unsigned-integer" + msg = "invalid downcasting method provided" + + with pytest.raises(ValueError, match=msg): + to_numeric(data, downcast=invalid_downcast) + + +def test_errors_invalid_value(): + # see gh-26466 + data = ["1", 2, 3] + invalid_error_value = "invalid" + msg = "invalid error value specified" + + with pytest.raises(ValueError, match=msg): + to_numeric(data, errors=invalid_error_value) + + +@pytest.mark.parametrize( + "data", + [ + ["1", 2, 3], + [1, 2, 3], + np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"), + ], +) +@pytest.mark.parametrize( + "kwargs,exp_dtype", + [ + # Basic function tests. + (dict(), np.int64), + (dict(downcast=None), np.int64), + # Support below np.float32 is rare and far between. + (dict(downcast="float"), np.dtype(np.float32).char), + # Basic dtype support. + (dict(downcast="unsigned"), np.dtype(np.typecodes["UnsignedInteger"][0])), + ], +) +def test_downcast_basic(data, kwargs, exp_dtype): + # see gh-13352 + result = to_numeric(data, **kwargs) + expected = np.array([1, 2, 3], dtype=exp_dtype) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("signed_downcast", ["integer", "signed"]) +@pytest.mark.parametrize( + "data", + [ + ["1", 2, 3], + [1, 2, 3], + np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"), + ], +) +def test_signed_downcast(data, signed_downcast): + # see gh-13352 + smallest_int_dtype = np.dtype(np.typecodes["Integer"][0]) + expected = np.array([1, 2, 3], dtype=smallest_int_dtype) + + res = to_numeric(data, downcast=signed_downcast) + tm.assert_numpy_array_equal(res, expected) + + +def test_ignore_downcast_invalid_data(): + # If we can't successfully cast the given + # data to a numeric dtype, do not bother + # with the downcast parameter. + data = ["foo", 2, 3] + expected = np.array(data, dtype=object) + + res = to_numeric(data, errors="ignore", downcast="unsigned") + tm.assert_numpy_array_equal(res, expected) + + +def test_ignore_downcast_neg_to_unsigned(): + # Cannot cast to an unsigned integer + # because we have a negative number. + data = ["-1", 2, 3] + expected = np.array([-1, 2, 3], dtype=np.int64) + + res = to_numeric(data, downcast="unsigned") + tm.assert_numpy_array_equal(res, expected) + + +@pytest.mark.parametrize("downcast", ["integer", "signed", "unsigned"]) +@pytest.mark.parametrize( + "data,expected", + [ + (["1.1", 2, 3], np.array([1.1, 2, 3], dtype=np.float64)), + ( + [10000.0, 20000, 3000, 40000.36, 50000, 50000.00], + np.array( + [10000.0, 20000, 3000, 40000.36, 50000, 50000.00], dtype=np.float64 + ), + ), + ], +) +def test_ignore_downcast_cannot_convert_float(data, expected, downcast): + # Cannot cast to an integer (signed or unsigned) + # because we have a float number. + res = to_numeric(data, downcast=downcast) + tm.assert_numpy_array_equal(res, expected) + + +@pytest.mark.parametrize( + "downcast,expected_dtype", + [("integer", np.int16), ("signed", np.int16), ("unsigned", np.uint16)], +) +def test_downcast_not8bit(downcast, expected_dtype): + # the smallest integer dtype need not be np.(u)int8 + data = ["256", 257, 258] + + expected = np.array([256, 257, 258], dtype=expected_dtype) + res = to_numeric(data, downcast=downcast) + tm.assert_numpy_array_equal(res, expected) + + +@pytest.mark.parametrize( + "dtype,downcast,min_max", + [ + ("int8", "integer", [iinfo(np.int8).min, iinfo(np.int8).max]), + ("int16", "integer", [iinfo(np.int16).min, iinfo(np.int16).max]), + ("int32", "integer", [iinfo(np.int32).min, iinfo(np.int32).max]), + ("int64", "integer", [iinfo(np.int64).min, iinfo(np.int64).max]), + ("uint8", "unsigned", [iinfo(np.uint8).min, iinfo(np.uint8).max]), + ("uint16", "unsigned", [iinfo(np.uint16).min, iinfo(np.uint16).max]), + ("uint32", "unsigned", [iinfo(np.uint32).min, iinfo(np.uint32).max]), + ("uint64", "unsigned", [iinfo(np.uint64).min, iinfo(np.uint64).max]), + ("int16", "integer", [iinfo(np.int8).min, iinfo(np.int8).max + 1]), + ("int32", "integer", [iinfo(np.int16).min, iinfo(np.int16).max + 1]), + ("int64", "integer", [iinfo(np.int32).min, iinfo(np.int32).max + 1]), + ("int16", "integer", [iinfo(np.int8).min - 1, iinfo(np.int16).max]), + ("int32", "integer", [iinfo(np.int16).min - 1, iinfo(np.int32).max]), + ("int64", "integer", [iinfo(np.int32).min - 1, iinfo(np.int64).max]), + ("uint16", "unsigned", [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]), + ("uint32", "unsigned", [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]), + ("uint64", "unsigned", [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]), + ], +) +def test_downcast_limits(dtype, downcast, min_max): + # see gh-14404: test the limits of each downcast. + series = to_numeric(Series(min_max), downcast=downcast) + assert series.dtype == dtype + + +@pytest.mark.parametrize( + "ser,expected", + [ + ( + pd.Series([0, 9223372036854775808]), + pd.Series([0, 9223372036854775808], dtype=np.uint64), + ) + ], +) +def test_downcast_uint64(ser, expected): + # see gh-14422: + # BUG: to_numeric doesn't work uint64 numbers + + result = pd.to_numeric(ser, downcast="unsigned") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "data,exp_data", + [ + ( + [200, 300, "", "NaN", 30000000000000000000], + [200, 300, np.nan, np.nan, 30000000000000000000], + ), + ( + ["12345678901234567890", "1234567890", "ITEM"], + [12345678901234567890, 1234567890, np.nan], + ), + ], +) +def test_coerce_uint64_conflict(data, exp_data): + # see gh-17007 and gh-17125 + # + # Still returns float despite the uint64-nan conflict, + # which would normally force the casting to object. + result = to_numeric(Series(data), errors="coerce") + expected = Series(exp_data, dtype=float) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "errors,exp", + [ + ("ignore", Series(["12345678901234567890", "1234567890", "ITEM"])), + ("raise", "Unable to parse string"), + ], +) +def test_non_coerce_uint64_conflict(errors, exp): + # see gh-17007 and gh-17125 + # + # For completeness. + ser = Series(["12345678901234567890", "1234567890", "ITEM"]) + + if isinstance(exp, str): + with pytest.raises(ValueError, match=exp): + to_numeric(ser, errors=errors) + else: + result = to_numeric(ser, errors=errors) + tm.assert_series_equal(result, ser) diff --git a/pandas/tests/tseries/__init__.py b/pandas/tests/tseries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/tseries/frequencies/__init__.py b/pandas/tests/tseries/frequencies/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/tseries/frequencies/test_freq_code.py b/pandas/tests/tseries/frequencies/test_freq_code.py new file mode 100644 index 00000000..be07f829 --- /dev/null +++ b/pandas/tests/tseries/frequencies/test_freq_code.py @@ -0,0 +1,192 @@ +import pytest + +from pandas._libs.tslibs import frequencies as libfrequencies, resolution +from pandas._libs.tslibs.frequencies import ( + FreqGroup, + _period_code_map, + get_freq, + get_freq_code, +) + +import pandas.tseries.offsets as offsets + + +@pytest.fixture(params=list(_period_code_map.items())) +def period_code_item(request): + return request.param + + +@pytest.mark.parametrize( + "freqstr,expected", + [ + ("A", 1000), + ("3A", 1000), + ("-1A", 1000), + ("Y", 1000), + ("3Y", 1000), + ("-1Y", 1000), + ("W", 4000), + ("W-MON", 4001), + ("W-FRI", 4005), + ], +) +def test_freq_code(freqstr, expected): + assert get_freq(freqstr) == expected + + +def test_freq_code_match(period_code_item): + freqstr, code = period_code_item + assert get_freq(freqstr) == code + + +@pytest.mark.parametrize( + "freqstr,expected", + [ + ("A", 1000), + ("3A", 1000), + ("-1A", 1000), + ("A-JAN", 1000), + ("A-MAY", 1000), + ("Y", 1000), + ("3Y", 1000), + ("-1Y", 1000), + ("Y-JAN", 1000), + ("Y-MAY", 1000), + (offsets.YearEnd(), 1000), + (offsets.YearEnd(month=1), 1000), + (offsets.YearEnd(month=5), 1000), + ("W", 4000), + ("W-MON", 4000), + ("W-FRI", 4000), + (offsets.Week(), 4000), + (offsets.Week(weekday=1), 4000), + (offsets.Week(weekday=5), 4000), + ("T", FreqGroup.FR_MIN), + ], +) +def test_freq_group(freqstr, expected): + assert resolution.get_freq_group(freqstr) == expected + + +def test_freq_group_match(period_code_item): + freqstr, code = period_code_item + + str_group = resolution.get_freq_group(freqstr) + code_group = resolution.get_freq_group(code) + + assert str_group == code_group == code // 1000 * 1000 + + +@pytest.mark.parametrize( + "freqstr,exp_freqstr", + [("D", "D"), ("W", "D"), ("M", "D"), ("S", "S"), ("T", "S"), ("H", "S")], +) +def test_get_to_timestamp_base(freqstr, exp_freqstr): + tsb = libfrequencies.get_to_timestamp_base + + assert tsb(get_freq_code(freqstr)[0]) == get_freq_code(exp_freqstr)[0] + + +_reso = resolution.Resolution + + +@pytest.mark.parametrize( + "freqstr,expected", + [ + ("A", "year"), + ("Q", "quarter"), + ("M", "month"), + ("D", "day"), + ("H", "hour"), + ("T", "minute"), + ("S", "second"), + ("L", "millisecond"), + ("U", "microsecond"), + ("N", "nanosecond"), + ], +) +def test_get_str_from_freq(freqstr, expected): + assert _reso.get_str_from_freq(freqstr) == expected + + +@pytest.mark.parametrize("freq", ["A", "Q", "M", "D", "H", "T", "S", "L", "U", "N"]) +def test_get_freq_roundtrip(freq): + result = _reso.get_freq(_reso.get_str_from_freq(freq)) + assert freq == result + + +@pytest.mark.parametrize("freq", ["D", "H", "T", "S", "L", "U"]) +def test_get_freq_roundtrip2(freq): + result = _reso.get_freq(_reso.get_str(_reso.get_reso_from_freq(freq))) + assert freq == result + + +@pytest.mark.parametrize( + "args,expected", + [ + ((1.5, "T"), (90, "S")), + ((62.4, "T"), (3744, "S")), + ((1.04, "H"), (3744, "S")), + ((1, "D"), (1, "D")), + ((0.342931, "H"), (1234551600, "U")), + ((1.2345, "D"), (106660800, "L")), + ], +) +def test_resolution_bumping(args, expected): + # see gh-14378 + assert _reso.get_stride_from_decimal(*args) == expected + + +@pytest.mark.parametrize( + "args", + [ + (0.5, "N"), + # Too much precision in the input can prevent. + (0.3429324798798269273987982, "H"), + ], +) +def test_cat(args): + msg = "Could not convert to integer offset at any resolution" + + with pytest.raises(ValueError, match=msg): + _reso.get_stride_from_decimal(*args) + + +@pytest.mark.parametrize( + "freq_input,expected", + [ + # Frequency string. + ("A", (get_freq("A"), 1)), + ("3D", (get_freq("D"), 3)), + ("-2M", (get_freq("M"), -2)), + # Tuple. + (("D", 1), (get_freq("D"), 1)), + (("A", 3), (get_freq("A"), 3)), + (("M", -2), (get_freq("M"), -2)), + ((5, "T"), (FreqGroup.FR_MIN, 5)), + # Numeric Tuple. + ((1000, 1), (1000, 1)), + # Offsets. + (offsets.Day(), (get_freq("D"), 1)), + (offsets.Day(3), (get_freq("D"), 3)), + (offsets.Day(-2), (get_freq("D"), -2)), + (offsets.MonthEnd(), (get_freq("M"), 1)), + (offsets.MonthEnd(3), (get_freq("M"), 3)), + (offsets.MonthEnd(-2), (get_freq("M"), -2)), + (offsets.Week(), (get_freq("W"), 1)), + (offsets.Week(3), (get_freq("W"), 3)), + (offsets.Week(-2), (get_freq("W"), -2)), + (offsets.Hour(), (FreqGroup.FR_HR, 1)), + # Monday is weekday=0. + (offsets.Week(weekday=1), (get_freq("W-TUE"), 1)), + (offsets.Week(3, weekday=0), (get_freq("W-MON"), 3)), + (offsets.Week(-2, weekday=4), (get_freq("W-FRI"), -2)), + ], +) +def test_get_freq_code(freq_input, expected): + assert get_freq_code(freq_input) == expected + + +def test_get_code_invalid(): + with pytest.raises(ValueError, match="Invalid frequency"): + get_freq_code((5, "baz")) diff --git a/pandas/tests/tseries/frequencies/test_inference.py b/pandas/tests/tseries/frequencies/test_inference.py new file mode 100644 index 00000000..c4660417 --- /dev/null +++ b/pandas/tests/tseries/frequencies/test_inference.py @@ -0,0 +1,535 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +from pandas._libs.tslibs.ccalendar import DAYS, MONTHS +from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG +from pandas.compat import is_platform_windows + +from pandas import DatetimeIndex, Index, Series, Timestamp, date_range, period_range +import pandas._testing as tm +from pandas.core.tools.datetimes import to_datetime + +import pandas.tseries.frequencies as frequencies +import pandas.tseries.offsets as offsets + + +def _check_generated_range(start, periods, freq): + """ + Check the range generated from a given start, frequency, and period count. + + Parameters + ---------- + start : str + The start date. + periods : int + The number of periods. + freq : str + The frequency of the range. + """ + freq = freq.upper() + + gen = date_range(start, periods=periods, freq=freq) + index = DatetimeIndex(gen.values) + + if not freq.startswith("Q-"): + assert frequencies.infer_freq(index) == gen.freqstr + else: + inf_freq = frequencies.infer_freq(index) + is_dec_range = inf_freq == "Q-DEC" and gen.freqstr in ( + "Q", + "Q-DEC", + "Q-SEP", + "Q-JUN", + "Q-MAR", + ) + is_nov_range = inf_freq == "Q-NOV" and gen.freqstr in ( + "Q-NOV", + "Q-AUG", + "Q-MAY", + "Q-FEB", + ) + is_oct_range = inf_freq == "Q-OCT" and gen.freqstr in ( + "Q-OCT", + "Q-JUL", + "Q-APR", + "Q-JAN", + ) + assert is_dec_range or is_nov_range or is_oct_range + + +@pytest.fixture( + params=[ + (timedelta(1), "D"), + (timedelta(hours=1), "H"), + (timedelta(minutes=1), "T"), + (timedelta(seconds=1), "S"), + (np.timedelta64(1, "ns"), "N"), + (timedelta(microseconds=1), "U"), + (timedelta(microseconds=1000), "L"), + ] +) +def base_delta_code_pair(request): + return request.param + + +@pytest.fixture(params=[1, 2, 3, 4]) +def count(request): + return request.param + + +@pytest.fixture(params=DAYS) +def day(request): + return request.param + + +@pytest.fixture(params=MONTHS) +def month(request): + return request.param + + +@pytest.fixture(params=[5, 7]) +def periods(request): + return request.param + + +def test_raise_if_period_index(): + index = period_range(start="1/1/1990", periods=20, freq="M") + msg = "Check the `freq` attribute instead of using infer_freq" + + with pytest.raises(TypeError, match=msg): + frequencies.infer_freq(index) + + +def test_raise_if_too_few(): + index = DatetimeIndex(["12/31/1998", "1/3/1999"]) + msg = "Need at least 3 dates to infer frequency" + + with pytest.raises(ValueError, match=msg): + frequencies.infer_freq(index) + + +def test_business_daily(): + index = DatetimeIndex(["01/01/1999", "1/4/1999", "1/5/1999"]) + assert frequencies.infer_freq(index) == "B" + + +def test_business_daily_look_alike(): + # see gh-16624 + # + # Do not infer "B when "weekend" (2-day gap) in wrong place. + index = DatetimeIndex(["12/31/1998", "1/3/1999", "1/4/1999"]) + assert frequencies.infer_freq(index) is None + + +def test_day_corner(): + index = DatetimeIndex(["1/1/2000", "1/2/2000", "1/3/2000"]) + assert frequencies.infer_freq(index) == "D" + + +def test_non_datetime_index(): + dates = to_datetime(["1/1/2000", "1/2/2000", "1/3/2000"]) + assert frequencies.infer_freq(dates) == "D" + + +def test_fifth_week_of_month_infer(): + # see gh-9425 + # + # Only attempt to infer up to WOM-4. + index = DatetimeIndex(["2014-03-31", "2014-06-30", "2015-03-30"]) + assert frequencies.infer_freq(index) is None + + +def test_week_of_month_fake(): + # All of these dates are on same day + # of week and are 4 or 5 weeks apart. + index = DatetimeIndex(["2013-08-27", "2013-10-01", "2013-10-29", "2013-11-26"]) + assert frequencies.infer_freq(index) != "WOM-4TUE" + + +def test_fifth_week_of_month(): + # see gh-9425 + # + # Only supports freq up to WOM-4. + msg = ( + "Of the four parameters: start, end, periods, " + "and freq, exactly three must be specified" + ) + + with pytest.raises(ValueError, match=msg): + date_range("2014-01-01", freq="WOM-5MON") + + +def test_monthly_ambiguous(): + rng = DatetimeIndex(["1/31/2000", "2/29/2000", "3/31/2000"]) + assert rng.inferred_freq == "M" + + +def test_annual_ambiguous(): + rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) + assert rng.inferred_freq == "A-JAN" + + +def test_infer_freq_delta(base_delta_code_pair, count): + b = Timestamp(datetime.now()) + base_delta, code = base_delta_code_pair + + inc = base_delta * count + index = DatetimeIndex([b + inc * j for j in range(3)]) + + exp_freq = "{count:d}{code}".format(count=count, code=code) if count > 1 else code + assert frequencies.infer_freq(index) == exp_freq + + +@pytest.mark.parametrize( + "constructor", + [ + lambda now, delta: DatetimeIndex( + [now + delta * 7] + [now + delta * j for j in range(3)] + ), + lambda now, delta: DatetimeIndex( + [now + delta * j for j in range(3)] + [now + delta * 7] + ), + ], +) +def test_infer_freq_custom(base_delta_code_pair, constructor): + b = Timestamp(datetime.now()) + base_delta, _ = base_delta_code_pair + + index = constructor(b, base_delta) + assert frequencies.infer_freq(index) is None + + +def test_weekly_infer(periods, day): + _check_generated_range("1/1/2000", periods, "W-{day}".format(day=day)) + + +def test_week_of_month_infer(periods, day, count): + _check_generated_range( + "1/1/2000", periods, "WOM-{count}{day}".format(count=count, day=day) + ) + + +@pytest.mark.parametrize("freq", ["M", "BM", "BMS"]) +def test_monthly_infer(periods, freq): + _check_generated_range("1/1/2000", periods, "M") + + +def test_quarterly_infer(month, periods): + _check_generated_range("1/1/2000", periods, "Q-{month}".format(month=month)) + + +@pytest.mark.parametrize("annual", ["A", "BA"]) +def test_annually_infer(month, periods, annual): + _check_generated_range( + "1/1/2000", periods, "{annual}-{month}".format(annual=annual, month=month) + ) + + +@pytest.mark.parametrize( + "freq,expected", [("Q", "Q-DEC"), ("Q-NOV", "Q-NOV"), ("Q-OCT", "Q-OCT")] +) +def test_infer_freq_index(freq, expected): + rng = period_range("1959Q2", "2009Q3", freq=freq) + rng = Index(rng.to_timestamp("D", how="e").astype(object)) + + assert rng.inferred_freq == expected + + +@pytest.mark.parametrize( + "expected,dates", + list( + { + "AS-JAN": ["2009-01-01", "2010-01-01", "2011-01-01", "2012-01-01"], + "Q-OCT": ["2009-01-31", "2009-04-30", "2009-07-31", "2009-10-31"], + "M": ["2010-11-30", "2010-12-31", "2011-01-31", "2011-02-28"], + "W-SAT": ["2010-12-25", "2011-01-01", "2011-01-08", "2011-01-15"], + "D": ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], + "H": [ + "2011-12-31 22:00", + "2011-12-31 23:00", + "2012-01-01 00:00", + "2012-01-01 01:00", + ], + }.items() + ), +) +def test_infer_freq_tz(tz_naive_fixture, expected, dates): + # see gh-7310 + tz = tz_naive_fixture + idx = DatetimeIndex(dates, tz=tz) + assert idx.inferred_freq == expected + + +@pytest.mark.parametrize( + "date_pair", + [ + ["2013-11-02", "2013-11-5"], # Fall DST + ["2014-03-08", "2014-03-11"], # Spring DST + ["2014-01-01", "2014-01-03"], # Regular Time + ], +) +@pytest.mark.parametrize( + "freq", ["3H", "10T", "3601S", "3600001L", "3600000001U", "3600000000001N"] +) +def test_infer_freq_tz_transition(tz_naive_fixture, date_pair, freq): + # see gh-8772 + tz = tz_naive_fixture + idx = date_range(date_pair[0], date_pair[1], freq=freq, tz=tz) + assert idx.inferred_freq == freq + + +def test_infer_freq_tz_transition_custom(): + index = date_range("2013-11-03", periods=5, freq="3H").tz_localize( + "America/Chicago" + ) + assert index.inferred_freq is None + + +@pytest.mark.parametrize( + "data,expected", + [ + # Hourly freq in a day must result in "H" + ( + [ + "2014-07-01 09:00", + "2014-07-01 10:00", + "2014-07-01 11:00", + "2014-07-01 12:00", + "2014-07-01 13:00", + "2014-07-01 14:00", + ], + "H", + ), + ( + [ + "2014-07-01 09:00", + "2014-07-01 10:00", + "2014-07-01 11:00", + "2014-07-01 12:00", + "2014-07-01 13:00", + "2014-07-01 14:00", + "2014-07-01 15:00", + "2014-07-01 16:00", + "2014-07-02 09:00", + "2014-07-02 10:00", + "2014-07-02 11:00", + ], + "BH", + ), + ( + [ + "2014-07-04 09:00", + "2014-07-04 10:00", + "2014-07-04 11:00", + "2014-07-04 12:00", + "2014-07-04 13:00", + "2014-07-04 14:00", + "2014-07-04 15:00", + "2014-07-04 16:00", + "2014-07-07 09:00", + "2014-07-07 10:00", + "2014-07-07 11:00", + ], + "BH", + ), + ( + [ + "2014-07-04 09:00", + "2014-07-04 10:00", + "2014-07-04 11:00", + "2014-07-04 12:00", + "2014-07-04 13:00", + "2014-07-04 14:00", + "2014-07-04 15:00", + "2014-07-04 16:00", + "2014-07-07 09:00", + "2014-07-07 10:00", + "2014-07-07 11:00", + "2014-07-07 12:00", + "2014-07-07 13:00", + "2014-07-07 14:00", + "2014-07-07 15:00", + "2014-07-07 16:00", + "2014-07-08 09:00", + "2014-07-08 10:00", + "2014-07-08 11:00", + "2014-07-08 12:00", + "2014-07-08 13:00", + "2014-07-08 14:00", + "2014-07-08 15:00", + "2014-07-08 16:00", + ], + "BH", + ), + ], +) +def test_infer_freq_business_hour(data, expected): + # see gh-7905 + idx = DatetimeIndex(data) + assert idx.inferred_freq == expected + + +def test_not_monotonic(): + rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) + rng = rng[::-1] + + assert rng.inferred_freq == "-1A-JAN" + + +def test_non_datetime_index2(): + rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) + vals = rng.to_pydatetime() + + result = frequencies.infer_freq(vals) + assert result == rng.inferred_freq + + +@pytest.mark.parametrize( + "idx", [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)] +) +def test_invalid_index_types(idx): + msg = ( + "(cannot infer freq from a non-convertible)|" + "(Check the `freq` attribute instead of using infer_freq)" + ) + + with pytest.raises(TypeError, match=msg): + frequencies.infer_freq(idx) + + +@pytest.mark.skipif(is_platform_windows(), reason="see gh-10822: Windows issue") +@pytest.mark.parametrize("idx", [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]) +def test_invalid_index_types_unicode(idx): + # see gh-10822 + # + # Odd error message on conversions to datetime for unicode. + msg = "Unknown string format" + + with pytest.raises(ValueError, match=msg): + frequencies.infer_freq(idx) + + +def test_string_datetime_like_compat(): + # see gh-6463 + data = ["2004-01", "2004-02", "2004-03", "2004-04"] + + expected = frequencies.infer_freq(data) + result = frequencies.infer_freq(Index(data)) + + assert result == expected + + +def test_series(): + # see gh-6407 + s = Series(date_range("20130101", "20130110")) + inferred = frequencies.infer_freq(s) + assert inferred == "D" + + +@pytest.mark.parametrize("end", [10, 10.0]) +def test_series_invalid_type(end): + # see gh-6407 + msg = "cannot infer freq from a non-convertible dtype on a Series" + s = Series(np.arange(end)) + + with pytest.raises(TypeError, match=msg): + frequencies.infer_freq(s) + + +def test_series_inconvertible_string(): + # see gh-6407 + msg = "Unknown string format" + + with pytest.raises(ValueError, match=msg): + frequencies.infer_freq(Series(["foo", "bar"])) + + +@pytest.mark.parametrize("freq", [None, "L"]) +def test_series_period_index(freq): + # see gh-6407 + # + # Cannot infer on PeriodIndex + msg = "cannot infer freq from a non-convertible dtype on a Series" + s = Series(period_range("2013", periods=10, freq=freq)) + + with pytest.raises(TypeError, match=msg): + frequencies.infer_freq(s) + + +@pytest.mark.parametrize("freq", ["M", "L", "S"]) +def test_series_datetime_index(freq): + s = Series(date_range("20130101", periods=10, freq=freq)) + inferred = frequencies.infer_freq(s) + assert inferred == freq + + +@pytest.mark.parametrize( + "offset_func", + [ + frequencies._get_offset, + lambda freq: date_range("2011-01-01", periods=5, freq=freq), + ], +) +@pytest.mark.parametrize( + "freq", + [ + "WEEKDAY", + "EOM", + "W@MON", + "W@TUE", + "W@WED", + "W@THU", + "W@FRI", + "W@SAT", + "W@SUN", + "Q@JAN", + "Q@FEB", + "Q@MAR", + "A@JAN", + "A@FEB", + "A@MAR", + "A@APR", + "A@MAY", + "A@JUN", + "A@JUL", + "A@AUG", + "A@SEP", + "A@OCT", + "A@NOV", + "A@DEC", + "Y@JAN", + "WOM@1MON", + "WOM@2MON", + "WOM@3MON", + "WOM@4MON", + "WOM@1TUE", + "WOM@2TUE", + "WOM@3TUE", + "WOM@4TUE", + "WOM@1WED", + "WOM@2WED", + "WOM@3WED", + "WOM@4WED", + "WOM@1THU", + "WOM@2THU", + "WOM@3THU", + "WOM@4THU", + "WOM@1FRI", + "WOM@2FRI", + "WOM@3FRI", + "WOM@4FRI", + ], +) +def test_legacy_offset_warnings(offset_func, freq): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + offset_func(freq) + + +def test_ms_vs_capital_ms(): + left = frequencies._get_offset("ms") + right = frequencies._get_offset("MS") + + assert left == offsets.Milli() + assert right == offsets.MonthBegin() diff --git a/pandas/tests/tseries/frequencies/test_to_offset.py b/pandas/tests/tseries/frequencies/test_to_offset.py new file mode 100644 index 00000000..b6069c44 --- /dev/null +++ b/pandas/tests/tseries/frequencies/test_to_offset.py @@ -0,0 +1,176 @@ +import re + +import pytest + +from pandas import Timedelta + +import pandas.tseries.frequencies as frequencies +import pandas.tseries.offsets as offsets + + +@pytest.mark.parametrize( + "freq_input,expected", + [ + (frequencies.to_offset("10us"), offsets.Micro(10)), + (offsets.Hour(), offsets.Hour()), + ((5, "T"), offsets.Minute(5)), + ("2h30min", offsets.Minute(150)), + ("2h 30min", offsets.Minute(150)), + ("2h30min15s", offsets.Second(150 * 60 + 15)), + ("2h 60min", offsets.Hour(3)), + ("2h 20.5min", offsets.Second(8430)), + ("1.5min", offsets.Second(90)), + ("0.5S", offsets.Milli(500)), + ("15l500u", offsets.Micro(15500)), + ("10s75L", offsets.Milli(10075)), + ("1s0.25ms", offsets.Micro(1000250)), + ("1s0.25L", offsets.Micro(1000250)), + ("2800N", offsets.Nano(2800)), + ("2SM", offsets.SemiMonthEnd(2)), + ("2SM-16", offsets.SemiMonthEnd(2, day_of_month=16)), + ("2SMS-14", offsets.SemiMonthBegin(2, day_of_month=14)), + ("2SMS-15", offsets.SemiMonthBegin(2)), + ], +) +def test_to_offset(freq_input, expected): + result = frequencies.to_offset(freq_input) + assert result == expected + + +@pytest.mark.parametrize( + "freqstr,expected", [("-1S", -1), ("-2SM", -2), ("-1SMS", -1), ("-5min10s", -310)] +) +def test_to_offset_negative(freqstr, expected): + result = frequencies.to_offset(freqstr) + assert result.n == expected + + +@pytest.mark.parametrize( + "freqstr", + [ + "2h20m", + "U1", + "-U", + "3U1", + "-2-3U", + "-2D:3H", + "1.5.0S", + "2SMS-15-15", + "2SMS-15D", + "100foo", + # Invalid leading +/- signs. + "+-1d", + "-+1h", + "+1", + "-7", + "+d", + "-m", + # Invalid shortcut anchors. + "SM-0", + "SM-28", + "SM-29", + "SM-FOO", + "BSM", + "SM--1", + "SMS-1", + "SMS-28", + "SMS-30", + "SMS-BAR", + "SMS-BYR", + "BSMS", + "SMS--2", + ], +) +def test_to_offset_invalid(freqstr): + # see gh-13930 + + # We escape string because some of our + # inputs contain regex special characters. + msg = re.escape("Invalid frequency: {freqstr}".format(freqstr=freqstr)) + with pytest.raises(ValueError, match=msg): + frequencies.to_offset(freqstr) + + +def test_to_offset_no_evaluate(): + with pytest.raises(ValueError, match="Could not evaluate"): + frequencies.to_offset(("", "")) + + +@pytest.mark.parametrize( + "freqstr,expected", + [ + ("2D 3H", offsets.Hour(51)), + ("2 D3 H", offsets.Hour(51)), + ("2 D 3 H", offsets.Hour(51)), + (" 2 D 3 H ", offsets.Hour(51)), + (" H ", offsets.Hour()), + (" 3 H ", offsets.Hour(3)), + ], +) +def test_to_offset_whitespace(freqstr, expected): + result = frequencies.to_offset(freqstr) + assert result == expected + + +@pytest.mark.parametrize( + "freqstr,expected", [("00H 00T 01S", 1), ("-00H 03T 14S", -194)] +) +def test_to_offset_leading_zero(freqstr, expected): + result = frequencies.to_offset(freqstr) + assert result.n == expected + + +@pytest.mark.parametrize("freqstr,expected", [("+1d", 1), ("+2h30min", 150)]) +def test_to_offset_leading_plus(freqstr, expected): + result = frequencies.to_offset(freqstr) + assert result.n == expected + + +@pytest.mark.parametrize( + "kwargs,expected", + [ + (dict(days=1, seconds=1), offsets.Second(86401)), + (dict(days=-1, seconds=1), offsets.Second(-86399)), + (dict(hours=1, minutes=10), offsets.Minute(70)), + (dict(hours=1, minutes=-10), offsets.Minute(50)), + (dict(weeks=1), offsets.Day(7)), + (dict(hours=1), offsets.Hour(1)), + (dict(hours=1), frequencies.to_offset("60min")), + (dict(microseconds=1), offsets.Micro(1)), + ], +) +def test_to_offset_pd_timedelta(kwargs, expected): + # see gh-9064 + td = Timedelta(**kwargs) + result = frequencies.to_offset(td) + assert result == expected + + +def test_to_offset_pd_timedelta_invalid(): + # see gh-9064 + msg = "Invalid frequency: 0 days 00:00:00" + td = Timedelta(microseconds=0) + + with pytest.raises(ValueError, match=msg): + frequencies.to_offset(td) + + +@pytest.mark.parametrize( + "shortcut,expected", + [ + ("W", offsets.Week(weekday=6)), + ("W-SUN", offsets.Week(weekday=6)), + ("Q", offsets.QuarterEnd(startingMonth=12)), + ("Q-DEC", offsets.QuarterEnd(startingMonth=12)), + ("Q-MAY", offsets.QuarterEnd(startingMonth=5)), + ("SM", offsets.SemiMonthEnd(day_of_month=15)), + ("SM-15", offsets.SemiMonthEnd(day_of_month=15)), + ("SM-1", offsets.SemiMonthEnd(day_of_month=1)), + ("SM-27", offsets.SemiMonthEnd(day_of_month=27)), + ("SMS-2", offsets.SemiMonthBegin(day_of_month=2)), + ("SMS-27", offsets.SemiMonthBegin(day_of_month=27)), + ], +) +def test_anchored_shortcuts(shortcut, expected): + result = frequencies.to_offset(shortcut) + assert result == expected diff --git a/pandas/tests/tseries/holiday/__init__.py b/pandas/tests/tseries/holiday/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/tseries/holiday/test_calendar.py b/pandas/tests/tseries/holiday/test_calendar.py new file mode 100644 index 00000000..5b4a7c74 --- /dev/null +++ b/pandas/tests/tseries/holiday/test_calendar.py @@ -0,0 +1,100 @@ +from datetime import datetime + +import pytest + +from pandas import DatetimeIndex, offsets, to_datetime +import pandas._testing as tm + +from pandas.tseries.holiday import ( + AbstractHolidayCalendar, + Holiday, + Timestamp, + USFederalHolidayCalendar, + USLaborDay, + USThanksgivingDay, + get_calendar, +) + + +@pytest.mark.parametrize( + "transform", [lambda x: x, lambda x: x.strftime("%Y-%m-%d"), lambda x: Timestamp(x)] +) +def test_calendar(transform): + start_date = datetime(2012, 1, 1) + end_date = datetime(2012, 12, 31) + + calendar = USFederalHolidayCalendar() + holidays = calendar.holidays(transform(start_date), transform(end_date)) + + expected = [ + datetime(2012, 1, 2), + datetime(2012, 1, 16), + datetime(2012, 2, 20), + datetime(2012, 5, 28), + datetime(2012, 7, 4), + datetime(2012, 9, 3), + datetime(2012, 10, 8), + datetime(2012, 11, 12), + datetime(2012, 11, 22), + datetime(2012, 12, 25), + ] + + assert list(holidays.to_pydatetime()) == expected + + +def test_calendar_caching(): + # see gh-9552. + + class TestCalendar(AbstractHolidayCalendar): + def __init__(self, name=None, rules=None): + super().__init__(name=name, rules=rules) + + jan1 = TestCalendar(rules=[Holiday("jan1", year=2015, month=1, day=1)]) + jan2 = TestCalendar(rules=[Holiday("jan2", year=2015, month=1, day=2)]) + + # Getting holidays for Jan 1 should not alter results for Jan 2. + tm.assert_index_equal(jan1.holidays(), DatetimeIndex(["01-Jan-2015"])) + tm.assert_index_equal(jan2.holidays(), DatetimeIndex(["02-Jan-2015"])) + + +def test_calendar_observance_dates(): + # see gh-11477 + us_fed_cal = get_calendar("USFederalHolidayCalendar") + holidays0 = us_fed_cal.holidays( + datetime(2015, 7, 3), datetime(2015, 7, 3) + ) # <-- same start and end dates + holidays1 = us_fed_cal.holidays( + datetime(2015, 7, 3), datetime(2015, 7, 6) + ) # <-- different start and end dates + holidays2 = us_fed_cal.holidays( + datetime(2015, 7, 3), datetime(2015, 7, 3) + ) # <-- same start and end dates + + # These should all produce the same result. + # + # In addition, calling with different start and end + # dates should not alter the output if we call the + # function again with the same start and end date. + tm.assert_index_equal(holidays0, holidays1) + tm.assert_index_equal(holidays0, holidays2) + + +def test_rule_from_name(): + us_fed_cal = get_calendar("USFederalHolidayCalendar") + assert us_fed_cal.rule_from_name("Thanksgiving") == USThanksgivingDay + + +def test_calendar_2031(): + # See gh-27790 + # + # Labor Day 2031 is on September 1. Saturday before is August 30. + # Next working day after August 30 ought to be Tuesday, September 2. + + class testCalendar(AbstractHolidayCalendar): + rules = [USLaborDay] + + cal = testCalendar() + workDay = offsets.CustomBusinessDay(calendar=cal) + Sat_before_Labor_Day_2031 = to_datetime("2031-08-30") + next_working_day = Sat_before_Labor_Day_2031 + 0 * workDay + assert next_working_day == to_datetime("2031-09-02") diff --git a/pandas/tests/tseries/holiday/test_federal.py b/pandas/tests/tseries/holiday/test_federal.py new file mode 100644 index 00000000..64c60d4e --- /dev/null +++ b/pandas/tests/tseries/holiday/test_federal.py @@ -0,0 +1,38 @@ +from datetime import datetime + +from pandas.tseries.holiday import ( + AbstractHolidayCalendar, + USMartinLutherKingJr, + USMemorialDay, +) + + +def test_no_mlk_before_1986(): + # see gh-10278 + class MLKCalendar(AbstractHolidayCalendar): + rules = [USMartinLutherKingJr] + + holidays = MLKCalendar().holidays(start="1984", end="1988").to_pydatetime().tolist() + + # Testing to make sure holiday is not incorrectly observed before 1986. + assert holidays == [datetime(1986, 1, 20, 0, 0), datetime(1987, 1, 19, 0, 0)] + + +def test_memorial_day(): + class MemorialDay(AbstractHolidayCalendar): + rules = [USMemorialDay] + + holidays = MemorialDay().holidays(start="1971", end="1980").to_pydatetime().tolist() + + # Fixes 5/31 error and checked manually against Wikipedia. + assert holidays == [ + datetime(1971, 5, 31, 0, 0), + datetime(1972, 5, 29, 0, 0), + datetime(1973, 5, 28, 0, 0), + datetime(1974, 5, 27, 0, 0), + datetime(1975, 5, 26, 0, 0), + datetime(1976, 5, 31, 0, 0), + datetime(1977, 5, 30, 0, 0), + datetime(1978, 5, 29, 0, 0), + datetime(1979, 5, 28, 0, 0), + ] diff --git a/pandas/tests/tseries/holiday/test_holiday.py b/pandas/tests/tseries/holiday/test_holiday.py new file mode 100644 index 00000000..a2c146db --- /dev/null +++ b/pandas/tests/tseries/holiday/test_holiday.py @@ -0,0 +1,268 @@ +from datetime import datetime + +import pytest +from pytz import utc + +import pandas._testing as tm + +from pandas.tseries.holiday import ( + MO, + SA, + AbstractHolidayCalendar, + DateOffset, + EasterMonday, + GoodFriday, + Holiday, + HolidayCalendarFactory, + Timestamp, + USColumbusDay, + USLaborDay, + USMartinLutherKingJr, + USMemorialDay, + USPresidentsDay, + USThanksgivingDay, + get_calendar, + next_monday, +) + + +def _check_holiday_results(holiday, start, end, expected): + """ + Check that the dates for a given holiday match in date and timezone. + + Parameters + ---------- + holiday : Holiday + The holiday to check. + start : datetime-like + The start date of range in which to collect dates for a given holiday. + end : datetime-like + The end date of range in which to collect dates for a given holiday. + expected : list + The list of dates we expect to get. + """ + assert list(holiday.dates(start, end)) == expected + + # Verify that timezone info is preserved. + assert list( + holiday.dates(utc.localize(Timestamp(start)), utc.localize(Timestamp(end))) + ) == [utc.localize(dt) for dt in expected] + + +@pytest.mark.parametrize( + "holiday,start_date,end_date,expected", + [ + ( + USMemorialDay, + datetime(2011, 1, 1), + datetime(2020, 12, 31), + [ + datetime(2011, 5, 30), + datetime(2012, 5, 28), + datetime(2013, 5, 27), + datetime(2014, 5, 26), + datetime(2015, 5, 25), + datetime(2016, 5, 30), + datetime(2017, 5, 29), + datetime(2018, 5, 28), + datetime(2019, 5, 27), + datetime(2020, 5, 25), + ], + ), + ( + Holiday("July 4th Eve", month=7, day=3), + "2001-01-01", + "2003-03-03", + [Timestamp("2001-07-03 00:00:00"), Timestamp("2002-07-03 00:00:00")], + ), + ( + Holiday("July 4th Eve", month=7, day=3, days_of_week=(0, 1, 2, 3)), + "2001-01-01", + "2008-03-03", + [ + Timestamp("2001-07-03 00:00:00"), + Timestamp("2002-07-03 00:00:00"), + Timestamp("2003-07-03 00:00:00"), + Timestamp("2006-07-03 00:00:00"), + Timestamp("2007-07-03 00:00:00"), + ], + ), + ( + EasterMonday, + datetime(2011, 1, 1), + datetime(2020, 12, 31), + [ + Timestamp("2011-04-25 00:00:00"), + Timestamp("2012-04-09 00:00:00"), + Timestamp("2013-04-01 00:00:00"), + Timestamp("2014-04-21 00:00:00"), + Timestamp("2015-04-06 00:00:00"), + Timestamp("2016-03-28 00:00:00"), + Timestamp("2017-04-17 00:00:00"), + Timestamp("2018-04-02 00:00:00"), + Timestamp("2019-04-22 00:00:00"), + Timestamp("2020-04-13 00:00:00"), + ], + ), + ( + GoodFriday, + datetime(2011, 1, 1), + datetime(2020, 12, 31), + [ + Timestamp("2011-04-22 00:00:00"), + Timestamp("2012-04-06 00:00:00"), + Timestamp("2013-03-29 00:00:00"), + Timestamp("2014-04-18 00:00:00"), + Timestamp("2015-04-03 00:00:00"), + Timestamp("2016-03-25 00:00:00"), + Timestamp("2017-04-14 00:00:00"), + Timestamp("2018-03-30 00:00:00"), + Timestamp("2019-04-19 00:00:00"), + Timestamp("2020-04-10 00:00:00"), + ], + ), + ( + USThanksgivingDay, + datetime(2011, 1, 1), + datetime(2020, 12, 31), + [ + datetime(2011, 11, 24), + datetime(2012, 11, 22), + datetime(2013, 11, 28), + datetime(2014, 11, 27), + datetime(2015, 11, 26), + datetime(2016, 11, 24), + datetime(2017, 11, 23), + datetime(2018, 11, 22), + datetime(2019, 11, 28), + datetime(2020, 11, 26), + ], + ), + ], +) +def test_holiday_dates(holiday, start_date, end_date, expected): + _check_holiday_results(holiday, start_date, end_date, expected) + + +@pytest.mark.parametrize( + "holiday,start,expected", + [ + (USMemorialDay, datetime(2015, 7, 1), []), + (USMemorialDay, "2015-05-25", "2015-05-25"), + (USLaborDay, datetime(2015, 7, 1), []), + (USLaborDay, "2015-09-07", "2015-09-07"), + (USColumbusDay, datetime(2015, 7, 1), []), + (USColumbusDay, "2015-10-12", "2015-10-12"), + (USThanksgivingDay, datetime(2015, 7, 1), []), + (USThanksgivingDay, "2015-11-26", "2015-11-26"), + (USMartinLutherKingJr, datetime(2015, 7, 1), []), + (USMartinLutherKingJr, "2015-01-19", "2015-01-19"), + (USPresidentsDay, datetime(2015, 7, 1), []), + (USPresidentsDay, "2015-02-16", "2015-02-16"), + (GoodFriday, datetime(2015, 7, 1), []), + (GoodFriday, "2015-04-03", "2015-04-03"), + (EasterMonday, "2015-04-06", "2015-04-06"), + (EasterMonday, datetime(2015, 7, 1), []), + (EasterMonday, "2015-04-05", []), + ("New Years Day", "2015-01-01", "2015-01-01"), + ("New Years Day", "2010-12-31", "2010-12-31"), + ("New Years Day", datetime(2015, 7, 1), []), + ("New Years Day", "2011-01-01", []), + ("July 4th", "2015-07-03", "2015-07-03"), + ("July 4th", datetime(2015, 7, 1), []), + ("July 4th", "2015-07-04", []), + ("Veterans Day", "2012-11-12", "2012-11-12"), + ("Veterans Day", datetime(2015, 7, 1), []), + ("Veterans Day", "2012-11-11", []), + ("Christmas", "2011-12-26", "2011-12-26"), + ("Christmas", datetime(2015, 7, 1), []), + ("Christmas", "2011-12-25", []), + ], +) +def test_holidays_within_dates(holiday, start, expected): + # see gh-11477 + # + # Fix holiday behavior where holiday.dates returned dates outside + # start/end date, or observed rules could not be applied because the + # holiday was not in the original date range (e.g., 7/4/2015 -> 7/3/2015). + if isinstance(holiday, str): + calendar = get_calendar("USFederalHolidayCalendar") + holiday = calendar.rule_from_name(holiday) + + if isinstance(expected, str): + expected = [Timestamp(expected)] + + _check_holiday_results(holiday, start, start, expected) + + +@pytest.mark.parametrize( + "transform", [lambda x: x.strftime("%Y-%m-%d"), lambda x: Timestamp(x)] +) +def test_argument_types(transform): + start_date = datetime(2011, 1, 1) + end_date = datetime(2020, 12, 31) + + holidays = USThanksgivingDay.dates(start_date, end_date) + holidays2 = USThanksgivingDay.dates(transform(start_date), transform(end_date)) + tm.assert_index_equal(holidays, holidays2) + + +@pytest.mark.parametrize( + "name,kwargs", + [ + ("One-Time", dict(year=2012, month=5, day=28)), + ( + "Range", + dict( + month=5, + day=28, + start_date=datetime(2012, 1, 1), + end_date=datetime(2012, 12, 31), + offset=DateOffset(weekday=MO(1)), + ), + ), + ], +) +def test_special_holidays(name, kwargs): + base_date = [datetime(2012, 5, 28)] + holiday = Holiday(name, **kwargs) + + start_date = datetime(2011, 1, 1) + end_date = datetime(2020, 12, 31) + + assert base_date == holiday.dates(start_date, end_date) + + +def test_get_calendar(): + class TestCalendar(AbstractHolidayCalendar): + rules = [] + + calendar = get_calendar("TestCalendar") + assert TestCalendar == type(calendar) + + +def test_factory(): + class_1 = HolidayCalendarFactory( + "MemorialDay", AbstractHolidayCalendar, USMemorialDay + ) + class_2 = HolidayCalendarFactory( + "Thanksgiving", AbstractHolidayCalendar, USThanksgivingDay + ) + class_3 = HolidayCalendarFactory("Combined", class_1, class_2) + + assert len(class_1.rules) == 1 + assert len(class_2.rules) == 1 + assert len(class_3.rules) == 2 + + +def test_both_offset_observance_raises(): + # see gh-10217 + msg = "Cannot use both offset and observance" + with pytest.raises(NotImplementedError, match=msg): + Holiday( + "Cyber Monday", + month=11, + day=1, + offset=[DateOffset(weekday=SA(4))], + observance=next_monday, + ) diff --git a/pandas/tests/tseries/holiday/test_observance.py b/pandas/tests/tseries/holiday/test_observance.py new file mode 100644 index 00000000..9ee63d2a --- /dev/null +++ b/pandas/tests/tseries/holiday/test_observance.py @@ -0,0 +1,87 @@ +from datetime import datetime + +import pytest + +from pandas.tseries.holiday import ( + after_nearest_workday, + before_nearest_workday, + nearest_workday, + next_monday, + next_monday_or_tuesday, + next_workday, + previous_friday, + previous_workday, + sunday_to_monday, + weekend_to_monday, +) + +_WEDNESDAY = datetime(2014, 4, 9) +_THURSDAY = datetime(2014, 4, 10) +_FRIDAY = datetime(2014, 4, 11) +_SATURDAY = datetime(2014, 4, 12) +_SUNDAY = datetime(2014, 4, 13) +_MONDAY = datetime(2014, 4, 14) +_TUESDAY = datetime(2014, 4, 15) + + +@pytest.mark.parametrize("day", [_SATURDAY, _SUNDAY]) +def test_next_monday(day): + assert next_monday(day) == _MONDAY + + +@pytest.mark.parametrize( + "day,expected", [(_SATURDAY, _MONDAY), (_SUNDAY, _TUESDAY), (_MONDAY, _TUESDAY)] +) +def test_next_monday_or_tuesday(day, expected): + assert next_monday_or_tuesday(day) == expected + + +@pytest.mark.parametrize("day", [_SATURDAY, _SUNDAY]) +def test_previous_friday(day): + assert previous_friday(day) == _FRIDAY + + +def test_sunday_to_monday(): + assert sunday_to_monday(_SUNDAY) == _MONDAY + + +@pytest.mark.parametrize( + "day,expected", [(_SATURDAY, _FRIDAY), (_SUNDAY, _MONDAY), (_MONDAY, _MONDAY)] +) +def test_nearest_workday(day, expected): + assert nearest_workday(day) == expected + + +@pytest.mark.parametrize( + "day,expected", [(_SATURDAY, _MONDAY), (_SUNDAY, _MONDAY), (_MONDAY, _MONDAY)] +) +def test_weekend_to_monday(day, expected): + assert weekend_to_monday(day) == expected + + +@pytest.mark.parametrize( + "day,expected", [(_SATURDAY, _MONDAY), (_SUNDAY, _MONDAY), (_MONDAY, _TUESDAY)] +) +def test_next_workday(day, expected): + assert next_workday(day) == expected + + +@pytest.mark.parametrize( + "day,expected", [(_SATURDAY, _FRIDAY), (_SUNDAY, _FRIDAY), (_TUESDAY, _MONDAY)] +) +def test_previous_workday(day, expected): + assert previous_workday(day) == expected + + +@pytest.mark.parametrize( + "day,expected", [(_SATURDAY, _THURSDAY), (_SUNDAY, _FRIDAY), (_TUESDAY, _MONDAY)] +) +def test_before_nearest_workday(day, expected): + assert before_nearest_workday(day) == expected + + +@pytest.mark.parametrize( + "day,expected", [(_SATURDAY, _MONDAY), (_SUNDAY, _TUESDAY), (_FRIDAY, _MONDAY)] +) +def test_after_nearest_workday(day, expected): + assert after_nearest_workday(day) == expected diff --git a/pandas/tests/tseries/offsets/__init__.py b/pandas/tests/tseries/offsets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/tseries/offsets/common.py b/pandas/tests/tseries/offsets/common.py new file mode 100644 index 00000000..71953fd0 --- /dev/null +++ b/pandas/tests/tseries/offsets/common.py @@ -0,0 +1,26 @@ +""" +Assertion helpers for offsets tests +""" + + +def assert_offset_equal(offset, base, expected): + actual = offset + base + actual_swapped = base + offset + actual_apply = offset.apply(base) + try: + assert actual == expected + assert actual_swapped == expected + assert actual_apply == expected + except AssertionError: + raise AssertionError( + f"\nExpected: {expected}\nActual: {actual}\nFor Offset: {offset})" + f"\nAt Date: {base}" + ) + + +def assert_is_on_offset(offset, date, expected): + actual = offset.is_on_offset(date) + assert actual == expected, ( + f"\nExpected: {expected}\nActual: {actual}\nFor Offset: {offset})" + f"\nAt Date: {date}" + ) diff --git a/pandas/tests/tseries/offsets/conftest.py b/pandas/tests/tseries/offsets/conftest.py new file mode 100644 index 00000000..2f6868f5 --- /dev/null +++ b/pandas/tests/tseries/offsets/conftest.py @@ -0,0 +1,25 @@ +import pytest + +import pandas.tseries.offsets as offsets + + +@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__]) +def offset_types(request): + """ + Fixture for all the datetime offsets available for a time series. + """ + return request.param + + +@pytest.fixture( + params=[ + getattr(offsets, o) + for o in offsets.__all__ + if issubclass(getattr(offsets, o), offsets.MonthOffset) and o != "MonthOffset" + ] +) +def month_classes(request): + """ + Fixture for month based datetime offsets available for a time series. + """ + return request.param diff --git a/pandas/tests/tseries/offsets/data/cday-0.14.1.pickle b/pandas/tests/tseries/offsets/data/cday-0.14.1.pickle new file mode 100644 index 0000000000000000000000000000000000000000..48488099482e4e8823cdc89371c7d567e415419c GIT binary patch literal 492 zcmb8syH5f^5C(7-5svi@KH4kK=4uOVkXU$ug0L~MTW*KkhI=qOyCG+Ci8dJjYCWJa zbbOOcGs&;`=Cc_uWv(QJmcVGBu$a#YEb(VYw#WvKbPhyAS#o6eU)ZehC=lJeFqusC z2Vm<*v=%ExcwAvycuA3JB}>9i5a%vimSJ2N2s4*jEsd!eie)MdM{jrM%9cyu*cHvu zL)3`a_XK$mwFG+{kH@dSb+#*NRDoc_h;7EM?M!&@PmOKX6Ff=0}GsGho|8^Q9332z0k$RsWBR`f?j?oS=~ UZ=n2X;`|q*@h<5kJvR&g0L;>%1poj5 literal 0 HcmV?d00001 diff --git a/pandas/tests/tseries/offsets/data/dateoffset_0_15_2.pickle b/pandas/tests/tseries/offsets/data/dateoffset_0_15_2.pickle new file mode 100644 index 00000000..ce561526 --- /dev/null +++ b/pandas/tests/tseries/offsets/data/dateoffset_0_15_2.pickle @@ -0,0 +1,183 @@ +(dp0 +S'YearBegin' +p1 +ccopy_reg +_reconstructor +p2 +(cpandas.tseries.offsets +YearBegin +p3 +c__builtin__ +object +p4 +Ntp5 +Rp6 +(dp7 +S'normalize' +p8 +I00 +sS'kwds' +p9 +(dp10 +sS'n' +p11 +I1 +sS'_offset' +p12 +cdatetime +timedelta +p13 +(I1 +I0 +I0 +tp14 +Rp15 +sS'month' +p16 +I1 +sS'_use_relativedelta' +p17 +I00 +sbsS'Week' +p18 +g2 +(cpandas.tseries.offsets +Week +p19 +g4 +Ntp20 +Rp21 +(dp22 +g8 +I00 +sS'_inc' +p23 +g13 +(I7 +I0 +I0 +tp24 +Rp25 +sg9 +(dp26 +sS'weekday' +p27 +Nsg11 +I1 +sbsS'MonthBegin' +p28 +g2 +(cpandas.tseries.offsets +MonthBegin +p29 +g4 +Ntp30 +Rp31 +(dp32 +g8 +I00 +sg12 +g13 +(I1 +I0 +I0 +tp33 +Rp34 +sg17 +I00 +sg9 +(dp35 +sg11 +I1 +sbsS'Day' +p36 +g2 +(cpandas.tseries.offsets +Day +p37 +g4 +Ntp38 +Rp39 +(dp40 +g8 +I00 +sg12 +g13 +(I1 +I0 +I0 +tp41 +Rp42 +sg17 +I00 +sg9 +(dp43 +sg11 +I1 +sbsS'DateOffset' +p44 +g2 +(cpandas.tseries.offsets +DateOffset +p45 +g4 +Ntp46 +Rp47 +(dp48 +g8 +I00 +sg12 +(idateutil.relativedelta +relativedelta +p49 +(dp50 +S'_has_time' +p51 +I0 +sS'hour' +p52 +NsS'seconds' +p53 +I0 +sS'months' +p54 +I0 +sS'year' +p55 +NsS'days' +p56 +I0 +sS'years' +p57 +I1 +sS'hours' +p58 +I0 +sS'second' +p59 +NsS'microsecond' +p60 +Nsg16 +NsS'microseconds' +p61 +I0 +sS'leapdays' +p62 +I0 +sS'minutes' +p63 +I0 +sS'day' +p64 +NsS'minute' +p65 +Nsg27 +Nsbsg17 +I01 +sg9 +(dp66 +g57 +I1 +ssg11 +I1 +sbs. \ No newline at end of file diff --git a/pandas/tests/tseries/offsets/test_fiscal.py b/pandas/tests/tseries/offsets/test_fiscal.py new file mode 100644 index 00000000..56861195 --- /dev/null +++ b/pandas/tests/tseries/offsets/test_fiscal.py @@ -0,0 +1,692 @@ +""" +Tests for Fiscal Year and Fiscal Quarter offset classes +""" +from datetime import datetime + +from dateutil.relativedelta import relativedelta +import pytest + +from pandas._libs.tslibs.frequencies import INVALID_FREQ_ERR_MSG + +from pandas import Timestamp +import pandas._testing as tm + +from pandas.tseries.frequencies import get_offset +from pandas.tseries.offsets import FY5253, FY5253Quarter + +from .common import assert_is_on_offset, assert_offset_equal +from .test_offsets import Base, WeekDay + + +def makeFY5253LastOfMonthQuarter(*args, **kwds): + return FY5253Quarter(*args, variation="last", **kwds) + + +def makeFY5253NearestEndMonthQuarter(*args, **kwds): + return FY5253Quarter(*args, variation="nearest", **kwds) + + +def makeFY5253NearestEndMonth(*args, **kwds): + return FY5253(*args, variation="nearest", **kwds) + + +def makeFY5253LastOfMonth(*args, **kwds): + return FY5253(*args, variation="last", **kwds) + + +def test_get_offset_name(): + assert ( + makeFY5253LastOfMonthQuarter( + weekday=1, startingMonth=3, qtr_with_extra_week=4 + ).freqstr + == "REQ-L-MAR-TUE-4" + ) + assert ( + makeFY5253NearestEndMonthQuarter( + weekday=1, startingMonth=3, qtr_with_extra_week=3 + ).freqstr + == "REQ-N-MAR-TUE-3" + ) + + +def test_get_offset(): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + with tm.assert_produces_warning(FutureWarning): + get_offset("gibberish") + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + with tm.assert_produces_warning(FutureWarning): + get_offset("QS-JAN-B") + + pairs = [ + ("RE-N-DEC-MON", makeFY5253NearestEndMonth(weekday=0, startingMonth=12)), + ("RE-L-DEC-TUE", makeFY5253LastOfMonth(weekday=1, startingMonth=12)), + ( + "REQ-L-MAR-TUE-4", + makeFY5253LastOfMonthQuarter( + weekday=1, startingMonth=3, qtr_with_extra_week=4 + ), + ), + ( + "REQ-L-DEC-MON-3", + makeFY5253LastOfMonthQuarter( + weekday=0, startingMonth=12, qtr_with_extra_week=3 + ), + ), + ( + "REQ-N-DEC-MON-3", + makeFY5253NearestEndMonthQuarter( + weekday=0, startingMonth=12, qtr_with_extra_week=3 + ), + ), + ] + + for name, expected in pairs: + with tm.assert_produces_warning(FutureWarning): + offset = get_offset(name) + assert offset == expected, ( + f"Expected {repr(name)} to yield {repr(expected)} " + f"(actual: {repr(offset)})" + ) + + +class TestFY5253LastOfMonth(Base): + offset_lom_sat_aug = makeFY5253LastOfMonth(1, startingMonth=8, weekday=WeekDay.SAT) + offset_lom_sat_sep = makeFY5253LastOfMonth(1, startingMonth=9, weekday=WeekDay.SAT) + + on_offset_cases = [ + # From Wikipedia (see: + # http://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar#Last_Saturday_of_the_month_at_fiscal_year_end) + (offset_lom_sat_aug, datetime(2006, 8, 26), True), + (offset_lom_sat_aug, datetime(2007, 8, 25), True), + (offset_lom_sat_aug, datetime(2008, 8, 30), True), + (offset_lom_sat_aug, datetime(2009, 8, 29), True), + (offset_lom_sat_aug, datetime(2010, 8, 28), True), + (offset_lom_sat_aug, datetime(2011, 8, 27), True), + (offset_lom_sat_aug, datetime(2012, 8, 25), True), + (offset_lom_sat_aug, datetime(2013, 8, 31), True), + (offset_lom_sat_aug, datetime(2014, 8, 30), True), + (offset_lom_sat_aug, datetime(2015, 8, 29), True), + (offset_lom_sat_aug, datetime(2016, 8, 27), True), + (offset_lom_sat_aug, datetime(2017, 8, 26), True), + (offset_lom_sat_aug, datetime(2018, 8, 25), True), + (offset_lom_sat_aug, datetime(2019, 8, 31), True), + (offset_lom_sat_aug, datetime(2006, 8, 27), False), + (offset_lom_sat_aug, datetime(2007, 8, 28), False), + (offset_lom_sat_aug, datetime(2008, 8, 31), False), + (offset_lom_sat_aug, datetime(2009, 8, 30), False), + (offset_lom_sat_aug, datetime(2010, 8, 29), False), + (offset_lom_sat_aug, datetime(2011, 8, 28), False), + (offset_lom_sat_aug, datetime(2006, 8, 25), False), + (offset_lom_sat_aug, datetime(2007, 8, 24), False), + (offset_lom_sat_aug, datetime(2008, 8, 29), False), + (offset_lom_sat_aug, datetime(2009, 8, 28), False), + (offset_lom_sat_aug, datetime(2010, 8, 27), False), + (offset_lom_sat_aug, datetime(2011, 8, 26), False), + (offset_lom_sat_aug, datetime(2019, 8, 30), False), + # From GMCR (see for example: + # http://yahoo.brand.edgar-online.com/Default.aspx? + # companyid=3184&formtypeID=7) + (offset_lom_sat_sep, datetime(2010, 9, 25), True), + (offset_lom_sat_sep, datetime(2011, 9, 24), True), + (offset_lom_sat_sep, datetime(2012, 9, 29), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + def test_apply(self): + offset_lom_aug_sat = makeFY5253LastOfMonth(startingMonth=8, weekday=WeekDay.SAT) + offset_lom_aug_sat_1 = makeFY5253LastOfMonth( + n=1, startingMonth=8, weekday=WeekDay.SAT + ) + + date_seq_lom_aug_sat = [ + datetime(2006, 8, 26), + datetime(2007, 8, 25), + datetime(2008, 8, 30), + datetime(2009, 8, 29), + datetime(2010, 8, 28), + datetime(2011, 8, 27), + datetime(2012, 8, 25), + datetime(2013, 8, 31), + datetime(2014, 8, 30), + datetime(2015, 8, 29), + datetime(2016, 8, 27), + ] + + tests = [ + (offset_lom_aug_sat, date_seq_lom_aug_sat), + (offset_lom_aug_sat_1, date_seq_lom_aug_sat), + (offset_lom_aug_sat, [datetime(2006, 8, 25)] + date_seq_lom_aug_sat), + (offset_lom_aug_sat_1, [datetime(2006, 8, 27)] + date_seq_lom_aug_sat[1:]), + ( + makeFY5253LastOfMonth(n=-1, startingMonth=8, weekday=WeekDay.SAT), + list(reversed(date_seq_lom_aug_sat)), + ), + ] + for test in tests: + offset, data = test + current = data[0] + for datum in data[1:]: + current = current + offset + assert current == datum + + +class TestFY5253NearestEndMonth(Base): + def test_get_year_end(self): + assert makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.SAT + ).get_year_end(datetime(2013, 1, 1)) == datetime(2013, 8, 31) + assert makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.SUN + ).get_year_end(datetime(2013, 1, 1)) == datetime(2013, 9, 1) + assert makeFY5253NearestEndMonth( + startingMonth=8, weekday=WeekDay.FRI + ).get_year_end(datetime(2013, 1, 1)) == datetime(2013, 8, 30) + + offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") + assert offset_n.get_year_end(datetime(2012, 1, 1)) == datetime(2013, 1, 1) + assert offset_n.get_year_end(datetime(2012, 1, 10)) == datetime(2013, 1, 1) + + assert offset_n.get_year_end(datetime(2013, 1, 1)) == datetime(2013, 12, 31) + assert offset_n.get_year_end(datetime(2013, 1, 2)) == datetime(2013, 12, 31) + assert offset_n.get_year_end(datetime(2013, 1, 3)) == datetime(2013, 12, 31) + assert offset_n.get_year_end(datetime(2013, 1, 10)) == datetime(2013, 12, 31) + + JNJ = FY5253(n=1, startingMonth=12, weekday=6, variation="nearest") + assert JNJ.get_year_end(datetime(2006, 1, 1)) == datetime(2006, 12, 31) + + offset_lom_aug_sat = makeFY5253NearestEndMonth( + 1, startingMonth=8, weekday=WeekDay.SAT + ) + offset_lom_aug_thu = makeFY5253NearestEndMonth( + 1, startingMonth=8, weekday=WeekDay.THU + ) + offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") + + on_offset_cases = [ + # From Wikipedia (see: + # http://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar + # #Saturday_nearest_the_end_of_month) + # 2006-09-02 2006 September 2 + # 2007-09-01 2007 September 1 + # 2008-08-30 2008 August 30 (leap year) + # 2009-08-29 2009 August 29 + # 2010-08-28 2010 August 28 + # 2011-09-03 2011 September 3 + # 2012-09-01 2012 September 1 (leap year) + # 2013-08-31 2013 August 31 + # 2014-08-30 2014 August 30 + # 2015-08-29 2015 August 29 + # 2016-09-03 2016 September 3 (leap year) + # 2017-09-02 2017 September 2 + # 2018-09-01 2018 September 1 + # 2019-08-31 2019 August 31 + (offset_lom_aug_sat, datetime(2006, 9, 2), True), + (offset_lom_aug_sat, datetime(2007, 9, 1), True), + (offset_lom_aug_sat, datetime(2008, 8, 30), True), + (offset_lom_aug_sat, datetime(2009, 8, 29), True), + (offset_lom_aug_sat, datetime(2010, 8, 28), True), + (offset_lom_aug_sat, datetime(2011, 9, 3), True), + (offset_lom_aug_sat, datetime(2016, 9, 3), True), + (offset_lom_aug_sat, datetime(2017, 9, 2), True), + (offset_lom_aug_sat, datetime(2018, 9, 1), True), + (offset_lom_aug_sat, datetime(2019, 8, 31), True), + (offset_lom_aug_sat, datetime(2006, 8, 27), False), + (offset_lom_aug_sat, datetime(2007, 8, 28), False), + (offset_lom_aug_sat, datetime(2008, 8, 31), False), + (offset_lom_aug_sat, datetime(2009, 8, 30), False), + (offset_lom_aug_sat, datetime(2010, 8, 29), False), + (offset_lom_aug_sat, datetime(2011, 8, 28), False), + (offset_lom_aug_sat, datetime(2006, 8, 25), False), + (offset_lom_aug_sat, datetime(2007, 8, 24), False), + (offset_lom_aug_sat, datetime(2008, 8, 29), False), + (offset_lom_aug_sat, datetime(2009, 8, 28), False), + (offset_lom_aug_sat, datetime(2010, 8, 27), False), + (offset_lom_aug_sat, datetime(2011, 8, 26), False), + (offset_lom_aug_sat, datetime(2019, 8, 30), False), + # From Micron, see: + # http://google.brand.edgar-online.com/?sym=MU&formtypeID=7 + (offset_lom_aug_thu, datetime(2012, 8, 30), True), + (offset_lom_aug_thu, datetime(2011, 9, 1), True), + (offset_n, datetime(2012, 12, 31), False), + (offset_n, datetime(2013, 1, 1), True), + (offset_n, datetime(2013, 1, 2), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + def test_apply(self): + date_seq_nem_8_sat = [ + datetime(2006, 9, 2), + datetime(2007, 9, 1), + datetime(2008, 8, 30), + datetime(2009, 8, 29), + datetime(2010, 8, 28), + datetime(2011, 9, 3), + ] + + JNJ = [ + datetime(2005, 1, 2), + datetime(2006, 1, 1), + datetime(2006, 12, 31), + datetime(2007, 12, 30), + datetime(2008, 12, 28), + datetime(2010, 1, 3), + datetime(2011, 1, 2), + datetime(2012, 1, 1), + datetime(2012, 12, 30), + ] + + DEC_SAT = FY5253(n=-1, startingMonth=12, weekday=5, variation="nearest") + + tests = [ + ( + makeFY5253NearestEndMonth(startingMonth=8, weekday=WeekDay.SAT), + date_seq_nem_8_sat, + ), + ( + makeFY5253NearestEndMonth(n=1, startingMonth=8, weekday=WeekDay.SAT), + date_seq_nem_8_sat, + ), + ( + makeFY5253NearestEndMonth(startingMonth=8, weekday=WeekDay.SAT), + [datetime(2006, 9, 1)] + date_seq_nem_8_sat, + ), + ( + makeFY5253NearestEndMonth(n=1, startingMonth=8, weekday=WeekDay.SAT), + [datetime(2006, 9, 3)] + date_seq_nem_8_sat[1:], + ), + ( + makeFY5253NearestEndMonth(n=-1, startingMonth=8, weekday=WeekDay.SAT), + list(reversed(date_seq_nem_8_sat)), + ), + ( + makeFY5253NearestEndMonth(n=1, startingMonth=12, weekday=WeekDay.SUN), + JNJ, + ), + ( + makeFY5253NearestEndMonth(n=-1, startingMonth=12, weekday=WeekDay.SUN), + list(reversed(JNJ)), + ), + ( + makeFY5253NearestEndMonth(n=1, startingMonth=12, weekday=WeekDay.SUN), + [datetime(2005, 1, 2), datetime(2006, 1, 1)], + ), + ( + makeFY5253NearestEndMonth(n=1, startingMonth=12, weekday=WeekDay.SUN), + [datetime(2006, 1, 2), datetime(2006, 12, 31)], + ), + (DEC_SAT, [datetime(2013, 1, 15), datetime(2012, 12, 29)]), + ] + for test in tests: + offset, data = test + current = data[0] + for datum in data[1:]: + current = current + offset + assert current == datum + + +class TestFY5253LastOfMonthQuarter(Base): + def test_is_anchored(self): + assert makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ).is_anchored() + assert makeFY5253LastOfMonthQuarter( + weekday=WeekDay.SAT, startingMonth=3, qtr_with_extra_week=4 + ).is_anchored() + assert not makeFY5253LastOfMonthQuarter( + 2, startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ).is_anchored() + + def test_equality(self): + assert makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) == makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + assert makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) != makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SUN, qtr_with_extra_week=4 + ) + assert makeFY5253LastOfMonthQuarter( + startingMonth=1, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) != makeFY5253LastOfMonthQuarter( + startingMonth=2, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + + def test_offset(self): + offset = makeFY5253LastOfMonthQuarter( + 1, startingMonth=9, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + offset2 = makeFY5253LastOfMonthQuarter( + 2, startingMonth=9, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + offset4 = makeFY5253LastOfMonthQuarter( + 4, startingMonth=9, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + + offset_neg1 = makeFY5253LastOfMonthQuarter( + -1, startingMonth=9, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + offset_neg2 = makeFY5253LastOfMonthQuarter( + -2, startingMonth=9, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + + GMCR = [ + datetime(2010, 3, 27), + datetime(2010, 6, 26), + datetime(2010, 9, 25), + datetime(2010, 12, 25), + datetime(2011, 3, 26), + datetime(2011, 6, 25), + datetime(2011, 9, 24), + datetime(2011, 12, 24), + datetime(2012, 3, 24), + datetime(2012, 6, 23), + datetime(2012, 9, 29), + datetime(2012, 12, 29), + datetime(2013, 3, 30), + datetime(2013, 6, 29), + ] + + assert_offset_equal(offset, base=GMCR[0], expected=GMCR[1]) + assert_offset_equal( + offset, base=GMCR[0] + relativedelta(days=-1), expected=GMCR[0] + ) + assert_offset_equal(offset, base=GMCR[1], expected=GMCR[2]) + + assert_offset_equal(offset2, base=GMCR[0], expected=GMCR[2]) + assert_offset_equal(offset4, base=GMCR[0], expected=GMCR[4]) + + assert_offset_equal(offset_neg1, base=GMCR[-1], expected=GMCR[-2]) + assert_offset_equal( + offset_neg1, base=GMCR[-1] + relativedelta(days=+1), expected=GMCR[-1] + ) + assert_offset_equal(offset_neg2, base=GMCR[-1], expected=GMCR[-3]) + + date = GMCR[0] + relativedelta(days=-1) + for expected in GMCR: + assert_offset_equal(offset, date, expected) + date = date + offset + + date = GMCR[-1] + relativedelta(days=+1) + for expected in reversed(GMCR): + assert_offset_equal(offset_neg1, date, expected) + date = date + offset_neg1 + + lomq_aug_sat_4 = makeFY5253LastOfMonthQuarter( + 1, startingMonth=8, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + lomq_sep_sat_4 = makeFY5253LastOfMonthQuarter( + 1, startingMonth=9, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + + on_offset_cases = [ + # From Wikipedia + (lomq_aug_sat_4, datetime(2006, 8, 26), True), + (lomq_aug_sat_4, datetime(2007, 8, 25), True), + (lomq_aug_sat_4, datetime(2008, 8, 30), True), + (lomq_aug_sat_4, datetime(2009, 8, 29), True), + (lomq_aug_sat_4, datetime(2010, 8, 28), True), + (lomq_aug_sat_4, datetime(2011, 8, 27), True), + (lomq_aug_sat_4, datetime(2019, 8, 31), True), + (lomq_aug_sat_4, datetime(2006, 8, 27), False), + (lomq_aug_sat_4, datetime(2007, 8, 28), False), + (lomq_aug_sat_4, datetime(2008, 8, 31), False), + (lomq_aug_sat_4, datetime(2009, 8, 30), False), + (lomq_aug_sat_4, datetime(2010, 8, 29), False), + (lomq_aug_sat_4, datetime(2011, 8, 28), False), + (lomq_aug_sat_4, datetime(2006, 8, 25), False), + (lomq_aug_sat_4, datetime(2007, 8, 24), False), + (lomq_aug_sat_4, datetime(2008, 8, 29), False), + (lomq_aug_sat_4, datetime(2009, 8, 28), False), + (lomq_aug_sat_4, datetime(2010, 8, 27), False), + (lomq_aug_sat_4, datetime(2011, 8, 26), False), + (lomq_aug_sat_4, datetime(2019, 8, 30), False), + # From GMCR + (lomq_sep_sat_4, datetime(2010, 9, 25), True), + (lomq_sep_sat_4, datetime(2011, 9, 24), True), + (lomq_sep_sat_4, datetime(2012, 9, 29), True), + (lomq_sep_sat_4, datetime(2013, 6, 29), True), + (lomq_sep_sat_4, datetime(2012, 6, 23), True), + (lomq_sep_sat_4, datetime(2012, 6, 30), False), + (lomq_sep_sat_4, datetime(2013, 3, 30), True), + (lomq_sep_sat_4, datetime(2012, 3, 24), True), + (lomq_sep_sat_4, datetime(2012, 12, 29), True), + (lomq_sep_sat_4, datetime(2011, 12, 24), True), + # INTC (extra week in Q1) + # See: http://www.intc.com/releasedetail.cfm?ReleaseID=542844 + ( + makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ), + datetime(2011, 4, 2), + True, + ), + # see: http://google.brand.edgar-online.com/?sym=INTC&formtypeID=7 + ( + makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ), + datetime(2012, 12, 29), + True, + ), + ( + makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ), + datetime(2011, 12, 31), + True, + ), + ( + makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ), + datetime(2010, 12, 25), + True, + ), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + def test_year_has_extra_week(self): + # End of long Q1 + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(2011, 4, 2)) + + # Start of long Q1 + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(2010, 12, 26)) + + # End of year before year with long Q1 + assert not makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(2010, 12, 25)) + + for year in [ + x for x in range(1994, 2011 + 1) if x not in [2011, 2005, 2000, 1994] + ]: + assert not makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(year, 4, 2)) + + # Other long years + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(2005, 4, 2)) + + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(2000, 4, 2)) + + assert makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ).year_has_extra_week(datetime(1994, 4, 2)) + + def test_get_weeks(self): + sat_dec_1 = makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=1 + ) + sat_dec_4 = makeFY5253LastOfMonthQuarter( + 1, startingMonth=12, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + + assert sat_dec_1.get_weeks(datetime(2011, 4, 2)) == [14, 13, 13, 13] + assert sat_dec_4.get_weeks(datetime(2011, 4, 2)) == [13, 13, 13, 14] + assert sat_dec_1.get_weeks(datetime(2010, 12, 25)) == [13, 13, 13, 13] + + +class TestFY5253NearestEndMonthQuarter(Base): + + offset_nem_sat_aug_4 = makeFY5253NearestEndMonthQuarter( + 1, startingMonth=8, weekday=WeekDay.SAT, qtr_with_extra_week=4 + ) + offset_nem_thu_aug_4 = makeFY5253NearestEndMonthQuarter( + 1, startingMonth=8, weekday=WeekDay.THU, qtr_with_extra_week=4 + ) + offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") + + on_offset_cases = [ + # From Wikipedia + (offset_nem_sat_aug_4, datetime(2006, 9, 2), True), + (offset_nem_sat_aug_4, datetime(2007, 9, 1), True), + (offset_nem_sat_aug_4, datetime(2008, 8, 30), True), + (offset_nem_sat_aug_4, datetime(2009, 8, 29), True), + (offset_nem_sat_aug_4, datetime(2010, 8, 28), True), + (offset_nem_sat_aug_4, datetime(2011, 9, 3), True), + (offset_nem_sat_aug_4, datetime(2016, 9, 3), True), + (offset_nem_sat_aug_4, datetime(2017, 9, 2), True), + (offset_nem_sat_aug_4, datetime(2018, 9, 1), True), + (offset_nem_sat_aug_4, datetime(2019, 8, 31), True), + (offset_nem_sat_aug_4, datetime(2006, 8, 27), False), + (offset_nem_sat_aug_4, datetime(2007, 8, 28), False), + (offset_nem_sat_aug_4, datetime(2008, 8, 31), False), + (offset_nem_sat_aug_4, datetime(2009, 8, 30), False), + (offset_nem_sat_aug_4, datetime(2010, 8, 29), False), + (offset_nem_sat_aug_4, datetime(2011, 8, 28), False), + (offset_nem_sat_aug_4, datetime(2006, 8, 25), False), + (offset_nem_sat_aug_4, datetime(2007, 8, 24), False), + (offset_nem_sat_aug_4, datetime(2008, 8, 29), False), + (offset_nem_sat_aug_4, datetime(2009, 8, 28), False), + (offset_nem_sat_aug_4, datetime(2010, 8, 27), False), + (offset_nem_sat_aug_4, datetime(2011, 8, 26), False), + (offset_nem_sat_aug_4, datetime(2019, 8, 30), False), + # From Micron, see: + # http://google.brand.edgar-online.com/?sym=MU&formtypeID=7 + (offset_nem_thu_aug_4, datetime(2012, 8, 30), True), + (offset_nem_thu_aug_4, datetime(2011, 9, 1), True), + # See: http://google.brand.edgar-online.com/?sym=MU&formtypeID=13 + (offset_nem_thu_aug_4, datetime(2013, 5, 30), True), + (offset_nem_thu_aug_4, datetime(2013, 2, 28), True), + (offset_nem_thu_aug_4, datetime(2012, 11, 29), True), + (offset_nem_thu_aug_4, datetime(2012, 5, 31), True), + (offset_nem_thu_aug_4, datetime(2007, 3, 1), True), + (offset_nem_thu_aug_4, datetime(1994, 3, 3), True), + (offset_n, datetime(2012, 12, 31), False), + (offset_n, datetime(2013, 1, 1), True), + (offset_n, datetime(2013, 1, 2), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + def test_offset(self): + offset = makeFY5253NearestEndMonthQuarter( + 1, startingMonth=8, weekday=WeekDay.THU, qtr_with_extra_week=4 + ) + + MU = [ + datetime(2012, 5, 31), + datetime(2012, 8, 30), + datetime(2012, 11, 29), + datetime(2013, 2, 28), + datetime(2013, 5, 30), + ] + + date = MU[0] + relativedelta(days=-1) + for expected in MU: + assert_offset_equal(offset, date, expected) + date = date + offset + + assert_offset_equal(offset, datetime(2012, 5, 31), datetime(2012, 8, 30)) + assert_offset_equal(offset, datetime(2012, 5, 30), datetime(2012, 5, 31)) + + offset2 = FY5253Quarter( + weekday=5, startingMonth=12, variation="last", qtr_with_extra_week=4 + ) + + assert_offset_equal(offset2, datetime(2013, 1, 15), datetime(2013, 3, 30)) + + +def test_bunched_yearends(): + # GH#14774 cases with two fiscal year-ends in the same calendar-year + fy = FY5253(n=1, weekday=5, startingMonth=12, variation="nearest") + dt = Timestamp("2004-01-01") + assert fy.rollback(dt) == Timestamp("2002-12-28") + assert (-fy).apply(dt) == Timestamp("2002-12-28") + assert dt - fy == Timestamp("2002-12-28") + + assert fy.rollforward(dt) == Timestamp("2004-01-03") + assert fy.apply(dt) == Timestamp("2004-01-03") + assert fy + dt == Timestamp("2004-01-03") + assert dt + fy == Timestamp("2004-01-03") + + # Same thing, but starting from a Timestamp in the previous year. + dt = Timestamp("2003-12-31") + assert fy.rollback(dt) == Timestamp("2002-12-28") + assert (-fy).apply(dt) == Timestamp("2002-12-28") + assert dt - fy == Timestamp("2002-12-28") + + +def test_fy5253_last_onoffset(): + # GH#18877 dates on the year-end but not normalized to midnight + offset = FY5253(n=-5, startingMonth=5, variation="last", weekday=0) + ts = Timestamp("1984-05-28 06:29:43.955911354+0200", tz="Europe/San_Marino") + fast = offset.is_on_offset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_fy5253_nearest_onoffset(): + # GH#18877 dates on the year-end but not normalized to midnight + offset = FY5253(n=3, startingMonth=7, variation="nearest", weekday=2) + ts = Timestamp("2032-07-28 00:12:59.035729419+0000", tz="Africa/Dakar") + fast = offset.is_on_offset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_fy5253qtr_onoffset_nearest(): + # GH#19036 + ts = Timestamp("1985-09-02 23:57:46.232550356-0300", tz="Atlantic/Bermuda") + offset = FY5253Quarter( + n=3, qtr_with_extra_week=1, startingMonth=2, variation="nearest", weekday=0 + ) + fast = offset.is_on_offset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_fy5253qtr_onoffset_last(): + # GH#19036 + offset = FY5253Quarter( + n=-2, qtr_with_extra_week=1, startingMonth=7, variation="last", weekday=2 + ) + ts = Timestamp("2011-01-26 19:03:40.331096129+0200", tz="Africa/Windhoek") + slow = (ts + offset) - offset == ts + fast = offset.is_on_offset(ts) + assert fast == slow diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py new file mode 100644 index 00000000..2f00a58f --- /dev/null +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -0,0 +1,4415 @@ +from datetime import date, datetime, time as dt_time, timedelta +from typing import Dict, List, Optional, Tuple, Type + +import numpy as np +import pytest + +from pandas._libs.tslibs import ( + NaT, + OutOfBoundsDatetime, + Timestamp, + conversion, + timezones, +) +from pandas._libs.tslibs.frequencies import ( + INVALID_FREQ_ERR_MSG, + get_freq_code, + get_freq_str, +) +import pandas._libs.tslibs.offsets as liboffsets +from pandas._libs.tslibs.offsets import ApplyTypeError +import pandas.compat as compat +from pandas.compat.numpy import np_datetime64_compat +from pandas.errors import PerformanceWarning + +import pandas._testing as tm +from pandas.core.indexes.datetimes import DatetimeIndex, date_range +from pandas.core.series import Series + +from pandas.io.pickle import read_pickle +from pandas.tseries.frequencies import _get_offset, _offset_map +from pandas.tseries.holiday import USFederalHolidayCalendar +import pandas.tseries.offsets as offsets +from pandas.tseries.offsets import ( + FY5253, + BaseOffset, + BDay, + BMonthBegin, + BMonthEnd, + BQuarterBegin, + BQuarterEnd, + BusinessHour, + BYearBegin, + BYearEnd, + CBMonthBegin, + CBMonthEnd, + CDay, + CustomBusinessDay, + CustomBusinessHour, + CustomBusinessMonthBegin, + CustomBusinessMonthEnd, + DateOffset, + Day, + Easter, + FY5253Quarter, + LastWeekOfMonth, + MonthBegin, + MonthEnd, + Nano, + QuarterBegin, + QuarterEnd, + SemiMonthBegin, + SemiMonthEnd, + Tick, + Week, + WeekOfMonth, + YearBegin, + YearEnd, +) + +from .common import assert_is_on_offset, assert_offset_equal + + +class WeekDay: + # TODO: Remove: This is not used outside of tests + MON = 0 + TUE = 1 + WED = 2 + THU = 3 + FRI = 4 + SAT = 5 + SUN = 6 + + +##### +# DateOffset Tests +##### +_ApplyCases = List[Tuple[BaseOffset, Dict[datetime, datetime]]] + + +class Base: + _offset: Optional[Type[DateOffset]] = None + d = Timestamp(datetime(2008, 1, 2)) + + timezones = [ + None, + "UTC", + "Asia/Tokyo", + "US/Eastern", + "dateutil/Asia/Tokyo", + "dateutil/US/Pacific", + ] + + def _get_offset(self, klass, value=1, normalize=False): + # create instance from offset class + if klass is FY5253: + klass = klass( + n=value, + startingMonth=1, + weekday=1, + variation="last", + normalize=normalize, + ) + elif klass is FY5253Quarter: + klass = klass( + n=value, + startingMonth=1, + weekday=1, + qtr_with_extra_week=1, + variation="last", + normalize=normalize, + ) + elif klass is LastWeekOfMonth: + klass = klass(n=value, weekday=5, normalize=normalize) + elif klass is WeekOfMonth: + klass = klass(n=value, week=1, weekday=5, normalize=normalize) + elif klass is Week: + klass = klass(n=value, weekday=5, normalize=normalize) + elif klass is DateOffset: + klass = klass(days=value, normalize=normalize) + else: + klass = klass(value, normalize=normalize) + return klass + + def test_apply_out_of_range(self, tz_naive_fixture): + tz = tz_naive_fixture + if self._offset is None: + return + + # try to create an out-of-bounds result timestamp; if we can't create + # the offset skip + try: + if self._offset in (BusinessHour, CustomBusinessHour): + # Using 10000 in BusinessHour fails in tz check because of DST + # difference + offset = self._get_offset(self._offset, value=100000) + else: + offset = self._get_offset(self._offset, value=10000) + + result = Timestamp("20080101") + offset + assert isinstance(result, datetime) + assert result.tzinfo is None + + # Check tz is preserved + t = Timestamp("20080101", tz=tz) + result = t + offset + assert isinstance(result, datetime) + assert t.tzinfo == result.tzinfo + + except OutOfBoundsDatetime: + pass + except (ValueError, KeyError): + # we are creating an invalid offset + # so ignore + pass + + def test_offsets_compare_equal(self): + # root cause of GH#456: __ne__ was not implemented + if self._offset is None: + return + offset1 = self._offset() + offset2 = self._offset() + assert not offset1 != offset2 + assert offset1 == offset2 + + def test_rsub(self): + if self._offset is None or not hasattr(self, "offset2"): + # i.e. skip for TestCommon and YQM subclasses that do not have + # offset2 attr + return + assert self.d - self.offset2 == (-self.offset2).apply(self.d) + + def test_radd(self): + if self._offset is None or not hasattr(self, "offset2"): + # i.e. skip for TestCommon and YQM subclasses that do not have + # offset2 attr + return + assert self.d + self.offset2 == self.offset2 + self.d + + def test_sub(self): + if self._offset is None or not hasattr(self, "offset2"): + # i.e. skip for TestCommon and YQM subclasses that do not have + # offset2 attr + return + off = self.offset2 + msg = "Cannot subtract datetime from offset" + with pytest.raises(TypeError, match=msg): + off - self.d + + assert 2 * off - off == off + assert self.d - self.offset2 == self.d + self._offset(-2) + assert self.d - self.offset2 == self.d - (2 * off - off) + + def testMult1(self): + if self._offset is None or not hasattr(self, "offset1"): + # i.e. skip for TestCommon and YQM subclasses that do not have + # offset1 attr + return + assert self.d + 10 * self.offset1 == self.d + self._offset(10) + assert self.d + 5 * self.offset1 == self.d + self._offset(5) + + def testMult2(self): + if self._offset is None: + return + assert self.d + (-5 * self._offset(-10)) == self.d + self._offset(50) + assert self.d + (-3 * self._offset(-2)) == self.d + self._offset(6) + + def test_compare_str(self): + # GH#23524 + # comparing to strings that cannot be cast to DateOffsets should + # not raise for __eq__ or __ne__ + if self._offset is None: + return + off = self._get_offset(self._offset) + + assert not off == "infer" + assert off != "foo" + # Note: inequalities are only implemented for Tick subclasses; + # tests for this are in test_ticks + + +class TestCommon(Base): + # exected value created by Base._get_offset + # are applied to 2011/01/01 09:00 (Saturday) + # used for .apply and .rollforward + expecteds = { + "Day": Timestamp("2011-01-02 09:00:00"), + "DateOffset": Timestamp("2011-01-02 09:00:00"), + "BusinessDay": Timestamp("2011-01-03 09:00:00"), + "CustomBusinessDay": Timestamp("2011-01-03 09:00:00"), + "CustomBusinessMonthEnd": Timestamp("2011-01-31 09:00:00"), + "CustomBusinessMonthBegin": Timestamp("2011-01-03 09:00:00"), + "MonthBegin": Timestamp("2011-02-01 09:00:00"), + "BusinessMonthBegin": Timestamp("2011-01-03 09:00:00"), + "MonthEnd": Timestamp("2011-01-31 09:00:00"), + "SemiMonthEnd": Timestamp("2011-01-15 09:00:00"), + "SemiMonthBegin": Timestamp("2011-01-15 09:00:00"), + "BusinessMonthEnd": Timestamp("2011-01-31 09:00:00"), + "YearBegin": Timestamp("2012-01-01 09:00:00"), + "BYearBegin": Timestamp("2011-01-03 09:00:00"), + "YearEnd": Timestamp("2011-12-31 09:00:00"), + "BYearEnd": Timestamp("2011-12-30 09:00:00"), + "QuarterBegin": Timestamp("2011-03-01 09:00:00"), + "BQuarterBegin": Timestamp("2011-03-01 09:00:00"), + "QuarterEnd": Timestamp("2011-03-31 09:00:00"), + "BQuarterEnd": Timestamp("2011-03-31 09:00:00"), + "BusinessHour": Timestamp("2011-01-03 10:00:00"), + "CustomBusinessHour": Timestamp("2011-01-03 10:00:00"), + "WeekOfMonth": Timestamp("2011-01-08 09:00:00"), + "LastWeekOfMonth": Timestamp("2011-01-29 09:00:00"), + "FY5253Quarter": Timestamp("2011-01-25 09:00:00"), + "FY5253": Timestamp("2011-01-25 09:00:00"), + "Week": Timestamp("2011-01-08 09:00:00"), + "Easter": Timestamp("2011-04-24 09:00:00"), + "Hour": Timestamp("2011-01-01 10:00:00"), + "Minute": Timestamp("2011-01-01 09:01:00"), + "Second": Timestamp("2011-01-01 09:00:01"), + "Milli": Timestamp("2011-01-01 09:00:00.001000"), + "Micro": Timestamp("2011-01-01 09:00:00.000001"), + "Nano": Timestamp(np_datetime64_compat("2011-01-01T09:00:00.000000001Z")), + } + + def test_immutable(self, offset_types): + # GH#21341 check that __setattr__ raises + offset = self._get_offset(offset_types) + with pytest.raises(AttributeError): + offset.normalize = True + with pytest.raises(AttributeError): + offset.n = 91 + + def test_return_type(self, offset_types): + offset = self._get_offset(offset_types) + + # make sure that we are returning a Timestamp + result = Timestamp("20080101") + offset + assert isinstance(result, Timestamp) + + # make sure that we are returning NaT + assert NaT + offset is NaT + assert offset + NaT is NaT + + assert NaT - offset is NaT + assert (-offset).apply(NaT) is NaT + + def test_offset_n(self, offset_types): + offset = self._get_offset(offset_types) + assert offset.n == 1 + + neg_offset = offset * -1 + assert neg_offset.n == -1 + + mul_offset = offset * 3 + assert mul_offset.n == 3 + + def test_offset_timedelta64_arg(self, offset_types): + # check that offset._validate_n raises TypeError on a timedelt64 + # object + off = self._get_offset(offset_types) + + td64 = np.timedelta64(4567, "s") + with pytest.raises(TypeError, match="argument must be an integer"): + type(off)(n=td64, **off.kwds) + + def test_offset_mul_ndarray(self, offset_types): + off = self._get_offset(offset_types) + + expected = np.array([[off, off * 2], [off * 3, off * 4]]) + + result = np.array([[1, 2], [3, 4]]) * off + tm.assert_numpy_array_equal(result, expected) + + result = off * np.array([[1, 2], [3, 4]]) + tm.assert_numpy_array_equal(result, expected) + + def test_offset_freqstr(self, offset_types): + offset = self._get_offset(offset_types) + + freqstr = offset.freqstr + if freqstr not in ("", "", "LWOM-SAT"): + code = _get_offset(freqstr) + assert offset.rule_code == code + + def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=False): + + if normalize and issubclass(offset, Tick): + # normalize=True disallowed for Tick subclasses GH#21427 + return + + offset_s = self._get_offset(offset, normalize=normalize) + func = getattr(offset_s, funcname) + + result = func(dt) + assert isinstance(result, Timestamp) + assert result == expected + + result = func(Timestamp(dt)) + assert isinstance(result, Timestamp) + assert result == expected + + # see gh-14101 + exp_warning = None + ts = Timestamp(dt) + Nano(5) + + if ( + type(offset_s).__name__ == "DateOffset" + and (funcname == "apply" or normalize) + and ts.nanosecond > 0 + ): + exp_warning = UserWarning + + # test nanosecond is preserved + with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + result = func(ts) + assert isinstance(result, Timestamp) + if normalize is False: + assert result == expected + Nano(5) + else: + assert result == expected + + if isinstance(dt, np.datetime64): + # test tz when input is datetime or Timestamp + return + + for tz in self.timezones: + expected_localize = expected.tz_localize(tz) + tz_obj = timezones.maybe_get_tz(tz) + dt_tz = conversion.localize_pydatetime(dt, tz_obj) + + result = func(dt_tz) + assert isinstance(result, Timestamp) + assert result == expected_localize + + result = func(Timestamp(dt, tz=tz)) + assert isinstance(result, Timestamp) + assert result == expected_localize + + # see gh-14101 + exp_warning = None + ts = Timestamp(dt, tz=tz) + Nano(5) + + if ( + type(offset_s).__name__ == "DateOffset" + and (funcname == "apply" or normalize) + and ts.nanosecond > 0 + ): + exp_warning = UserWarning + + # test nanosecond is preserved + with tm.assert_produces_warning(exp_warning, check_stacklevel=False): + result = func(ts) + assert isinstance(result, Timestamp) + if normalize is False: + assert result == expected_localize + Nano(5) + else: + assert result == expected_localize + + def test_apply(self, offset_types): + sdt = datetime(2011, 1, 1, 9, 0) + ndt = np_datetime64_compat("2011-01-01 09:00Z") + + for dt in [sdt, ndt]: + expected = self.expecteds[offset_types.__name__] + self._check_offsetfunc_works(offset_types, "apply", dt, expected) + + expected = Timestamp(expected.date()) + self._check_offsetfunc_works( + offset_types, "apply", dt, expected, normalize=True + ) + + def test_rollforward(self, offset_types): + expecteds = self.expecteds.copy() + + # result will not be changed if the target is on the offset + no_changes = [ + "Day", + "MonthBegin", + "SemiMonthBegin", + "YearBegin", + "Week", + "Hour", + "Minute", + "Second", + "Milli", + "Micro", + "Nano", + "DateOffset", + ] + for n in no_changes: + expecteds[n] = Timestamp("2011/01/01 09:00") + + expecteds["BusinessHour"] = Timestamp("2011-01-03 09:00:00") + expecteds["CustomBusinessHour"] = Timestamp("2011-01-03 09:00:00") + + # but be changed when normalize=True + norm_expected = expecteds.copy() + for k in norm_expected: + norm_expected[k] = Timestamp(norm_expected[k].date()) + + normalized = { + "Day": Timestamp("2011-01-02 00:00:00"), + "DateOffset": Timestamp("2011-01-02 00:00:00"), + "MonthBegin": Timestamp("2011-02-01 00:00:00"), + "SemiMonthBegin": Timestamp("2011-01-15 00:00:00"), + "YearBegin": Timestamp("2012-01-01 00:00:00"), + "Week": Timestamp("2011-01-08 00:00:00"), + "Hour": Timestamp("2011-01-01 00:00:00"), + "Minute": Timestamp("2011-01-01 00:00:00"), + "Second": Timestamp("2011-01-01 00:00:00"), + "Milli": Timestamp("2011-01-01 00:00:00"), + "Micro": Timestamp("2011-01-01 00:00:00"), + } + norm_expected.update(normalized) + + sdt = datetime(2011, 1, 1, 9, 0) + ndt = np_datetime64_compat("2011-01-01 09:00Z") + + for dt in [sdt, ndt]: + expected = expecteds[offset_types.__name__] + self._check_offsetfunc_works(offset_types, "rollforward", dt, expected) + expected = norm_expected[offset_types.__name__] + self._check_offsetfunc_works( + offset_types, "rollforward", dt, expected, normalize=True + ) + + def test_rollback(self, offset_types): + expecteds = { + "BusinessDay": Timestamp("2010-12-31 09:00:00"), + "CustomBusinessDay": Timestamp("2010-12-31 09:00:00"), + "CustomBusinessMonthEnd": Timestamp("2010-12-31 09:00:00"), + "CustomBusinessMonthBegin": Timestamp("2010-12-01 09:00:00"), + "BusinessMonthBegin": Timestamp("2010-12-01 09:00:00"), + "MonthEnd": Timestamp("2010-12-31 09:00:00"), + "SemiMonthEnd": Timestamp("2010-12-31 09:00:00"), + "BusinessMonthEnd": Timestamp("2010-12-31 09:00:00"), + "BYearBegin": Timestamp("2010-01-01 09:00:00"), + "YearEnd": Timestamp("2010-12-31 09:00:00"), + "BYearEnd": Timestamp("2010-12-31 09:00:00"), + "QuarterBegin": Timestamp("2010-12-01 09:00:00"), + "BQuarterBegin": Timestamp("2010-12-01 09:00:00"), + "QuarterEnd": Timestamp("2010-12-31 09:00:00"), + "BQuarterEnd": Timestamp("2010-12-31 09:00:00"), + "BusinessHour": Timestamp("2010-12-31 17:00:00"), + "CustomBusinessHour": Timestamp("2010-12-31 17:00:00"), + "WeekOfMonth": Timestamp("2010-12-11 09:00:00"), + "LastWeekOfMonth": Timestamp("2010-12-25 09:00:00"), + "FY5253Quarter": Timestamp("2010-10-26 09:00:00"), + "FY5253": Timestamp("2010-01-26 09:00:00"), + "Easter": Timestamp("2010-04-04 09:00:00"), + } + + # result will not be changed if the target is on the offset + for n in [ + "Day", + "MonthBegin", + "SemiMonthBegin", + "YearBegin", + "Week", + "Hour", + "Minute", + "Second", + "Milli", + "Micro", + "Nano", + "DateOffset", + ]: + expecteds[n] = Timestamp("2011/01/01 09:00") + + # but be changed when normalize=True + norm_expected = expecteds.copy() + for k in norm_expected: + norm_expected[k] = Timestamp(norm_expected[k].date()) + + normalized = { + "Day": Timestamp("2010-12-31 00:00:00"), + "DateOffset": Timestamp("2010-12-31 00:00:00"), + "MonthBegin": Timestamp("2010-12-01 00:00:00"), + "SemiMonthBegin": Timestamp("2010-12-15 00:00:00"), + "YearBegin": Timestamp("2010-01-01 00:00:00"), + "Week": Timestamp("2010-12-25 00:00:00"), + "Hour": Timestamp("2011-01-01 00:00:00"), + "Minute": Timestamp("2011-01-01 00:00:00"), + "Second": Timestamp("2011-01-01 00:00:00"), + "Milli": Timestamp("2011-01-01 00:00:00"), + "Micro": Timestamp("2011-01-01 00:00:00"), + } + norm_expected.update(normalized) + + sdt = datetime(2011, 1, 1, 9, 0) + ndt = np_datetime64_compat("2011-01-01 09:00Z") + + for dt in [sdt, ndt]: + expected = expecteds[offset_types.__name__] + self._check_offsetfunc_works(offset_types, "rollback", dt, expected) + + expected = norm_expected[offset_types.__name__] + self._check_offsetfunc_works( + offset_types, "rollback", dt, expected, normalize=True + ) + + def test_is_on_offset(self, offset_types): + dt = self.expecteds[offset_types.__name__] + offset_s = self._get_offset(offset_types) + assert offset_s.is_on_offset(dt) + + # when normalize=True, is_on_offset checks time is 00:00:00 + if issubclass(offset_types, Tick): + # normalize=True disallowed for Tick subclasses GH#21427 + return + offset_n = self._get_offset(offset_types, normalize=True) + assert not offset_n.is_on_offset(dt) + + if offset_types in (BusinessHour, CustomBusinessHour): + # In default BusinessHour (9:00-17:00), normalized time + # cannot be in business hour range + return + date = datetime(dt.year, dt.month, dt.day) + assert offset_n.is_on_offset(date) + + def test_add(self, offset_types, tz_naive_fixture): + tz = tz_naive_fixture + dt = datetime(2011, 1, 1, 9, 0) + + offset_s = self._get_offset(offset_types) + expected = self.expecteds[offset_types.__name__] + + result_dt = dt + offset_s + result_ts = Timestamp(dt) + offset_s + for result in [result_dt, result_ts]: + assert isinstance(result, Timestamp) + assert result == expected + + expected_localize = expected.tz_localize(tz) + result = Timestamp(dt, tz=tz) + offset_s + assert isinstance(result, Timestamp) + assert result == expected_localize + + # normalize=True, disallowed for Tick subclasses GH#21427 + if issubclass(offset_types, Tick): + return + offset_s = self._get_offset(offset_types, normalize=True) + expected = Timestamp(expected.date()) + + result_dt = dt + offset_s + result_ts = Timestamp(dt) + offset_s + for result in [result_dt, result_ts]: + assert isinstance(result, Timestamp) + assert result == expected + + expected_localize = expected.tz_localize(tz) + result = Timestamp(dt, tz=tz) + offset_s + assert isinstance(result, Timestamp) + assert result == expected_localize + + def test_add_empty_datetimeindex(self, offset_types, tz_naive_fixture): + # GH#12724, GH#30336 + offset_s = self._get_offset(offset_types) + + dti = DatetimeIndex([], tz=tz_naive_fixture) + + warn = None + if isinstance( + offset_s, + ( + Easter, + WeekOfMonth, + LastWeekOfMonth, + CustomBusinessDay, + BusinessHour, + CustomBusinessHour, + CustomBusinessMonthBegin, + CustomBusinessMonthEnd, + FY5253, + FY5253Quarter, + ), + ): + # We don't have an optimized apply_index + warn = PerformanceWarning + + with tm.assert_produces_warning(warn): + result = dti + offset_s + tm.assert_index_equal(result, dti) + with tm.assert_produces_warning(warn): + result = offset_s + dti + tm.assert_index_equal(result, dti) + + dta = dti._data + with tm.assert_produces_warning(warn): + result = dta + offset_s + tm.assert_equal(result, dta) + with tm.assert_produces_warning(warn): + result = offset_s + dta + tm.assert_equal(result, dta) + + def test_pickle_v0_15_2(self, datapath): + offsets = { + "DateOffset": DateOffset(years=1), + "MonthBegin": MonthBegin(1), + "Day": Day(1), + "YearBegin": YearBegin(1), + "Week": Week(1), + } + + pickle_path = datapath("tseries", "offsets", "data", "dateoffset_0_15_2.pickle") + # This code was executed once on v0.15.2 to generate the pickle: + # with open(pickle_path, 'wb') as f: pickle.dump(offsets, f) + # + tm.assert_dict_equal(offsets, read_pickle(pickle_path)) + + def test_onOffset_deprecated(self, offset_types): + # GH#30340 use idiomatic naming + off = self._get_offset(offset_types) + + ts = Timestamp.now() + with tm.assert_produces_warning(FutureWarning): + result = off.onOffset(ts) + + expected = off.is_on_offset(ts) + assert result == expected + + def test_isAnchored_deprecated(self, offset_types): + # GH#30340 use idiomatic naming + off = self._get_offset(offset_types) + + with tm.assert_produces_warning(FutureWarning): + result = off.isAnchored() + + expected = off.is_anchored() + assert result == expected + + +class TestDateOffset(Base): + def setup_method(self, method): + self.d = Timestamp(datetime(2008, 1, 2)) + _offset_map.clear() + + def test_repr(self): + repr(DateOffset()) + repr(DateOffset(2)) + repr(2 * DateOffset()) + repr(2 * DateOffset(months=2)) + + def test_mul(self): + assert DateOffset(2) == 2 * DateOffset(1) + assert DateOffset(2) == DateOffset(1) * 2 + + def test_constructor(self): + + assert (self.d + DateOffset(months=2)) == datetime(2008, 3, 2) + assert (self.d - DateOffset(months=2)) == datetime(2007, 11, 2) + + assert (self.d + DateOffset(2)) == datetime(2008, 1, 4) + + assert not DateOffset(2).is_anchored() + assert DateOffset(1).is_anchored() + + d = datetime(2008, 1, 31) + assert (d + DateOffset(months=1)) == datetime(2008, 2, 29) + + def test_copy(self): + assert DateOffset(months=2).copy() == DateOffset(months=2) + + def test_eq(self): + offset1 = DateOffset(days=1) + offset2 = DateOffset(days=365) + + assert offset1 != offset2 + + +class TestBusinessDay(Base): + _offset = BDay + + def setup_method(self, method): + self.d = datetime(2008, 1, 1) + + self.offset = BDay() + self.offset1 = self.offset + self.offset2 = BDay(2) + + def test_different_normalize_equals(self): + # GH#21404 changed __eq__ to return False when `normalize` does not match + offset = self._offset() + offset2 = self._offset(normalize=True) + assert offset != offset2 + + def test_repr(self): + assert repr(self.offset) == "" + assert repr(self.offset2) == "<2 * BusinessDays>" + + if compat.PY37: + expected = "" + else: + expected = "" + assert repr(self.offset + timedelta(1)) == expected + + def test_with_offset(self): + offset = self.offset + timedelta(hours=2) + + assert (self.d + offset) == datetime(2008, 1, 2, 2) + + def test_eq(self): + assert self.offset2 == self.offset2 + + def test_mul(self): + pass + + def test_hash(self): + assert hash(self.offset2) == hash(self.offset2) + + def test_call(self): + assert self.offset2(self.d) == datetime(2008, 1, 3) + + def testRollback1(self): + assert BDay(10).rollback(self.d) == self.d + + def testRollback2(self): + assert BDay(10).rollback(datetime(2008, 1, 5)) == datetime(2008, 1, 4) + + def testRollforward1(self): + assert BDay(10).rollforward(self.d) == self.d + + def testRollforward2(self): + assert BDay(10).rollforward(datetime(2008, 1, 5)) == datetime(2008, 1, 7) + + def test_roll_date_object(self): + offset = BDay() + + dt = date(2012, 9, 15) + + result = offset.rollback(dt) + assert result == datetime(2012, 9, 14) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 17) + + offset = offsets.Day() + result = offset.rollback(dt) + assert result == datetime(2012, 9, 15) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 15) + + def test_is_on_offset(self): + tests = [ + (BDay(), datetime(2008, 1, 1), True), + (BDay(), datetime(2008, 1, 5), False), + ] + + for offset, d, expected in tests: + assert_is_on_offset(offset, d, expected) + + apply_cases: _ApplyCases = [] + apply_cases.append( + ( + BDay(), + { + datetime(2008, 1, 1): datetime(2008, 1, 2), + datetime(2008, 1, 4): datetime(2008, 1, 7), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 8), + }, + ) + ) + + apply_cases.append( + ( + 2 * BDay(), + { + datetime(2008, 1, 1): datetime(2008, 1, 3), + datetime(2008, 1, 4): datetime(2008, 1, 8), + datetime(2008, 1, 5): datetime(2008, 1, 8), + datetime(2008, 1, 6): datetime(2008, 1, 8), + datetime(2008, 1, 7): datetime(2008, 1, 9), + }, + ) + ) + + apply_cases.append( + ( + -BDay(), + { + datetime(2008, 1, 1): datetime(2007, 12, 31), + datetime(2008, 1, 4): datetime(2008, 1, 3), + datetime(2008, 1, 5): datetime(2008, 1, 4), + datetime(2008, 1, 6): datetime(2008, 1, 4), + datetime(2008, 1, 7): datetime(2008, 1, 4), + datetime(2008, 1, 8): datetime(2008, 1, 7), + }, + ) + ) + + apply_cases.append( + ( + -2 * BDay(), + { + datetime(2008, 1, 1): datetime(2007, 12, 28), + datetime(2008, 1, 4): datetime(2008, 1, 2), + datetime(2008, 1, 5): datetime(2008, 1, 3), + datetime(2008, 1, 6): datetime(2008, 1, 3), + datetime(2008, 1, 7): datetime(2008, 1, 3), + datetime(2008, 1, 8): datetime(2008, 1, 4), + datetime(2008, 1, 9): datetime(2008, 1, 7), + }, + ) + ) + + apply_cases.append( + ( + BDay(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 4): datetime(2008, 1, 4), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 7), + }, + ) + ) + + @pytest.mark.parametrize("case", apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_apply_large_n(self): + dt = datetime(2012, 10, 23) + + result = dt + BDay(10) + assert result == datetime(2012, 11, 6) + + result = dt + BDay(100) - BDay(100) + assert result == dt + + off = BDay() * 6 + rs = datetime(2012, 1, 1) - off + xp = datetime(2011, 12, 23) + assert rs == xp + + st = datetime(2011, 12, 18) + rs = st + off + xp = datetime(2011, 12, 26) + assert rs == xp + + off = BDay() * 10 + rs = datetime(2014, 1, 5) + off # see #5890 + xp = datetime(2014, 1, 17) + assert rs == xp + + def test_apply_corner(self): + msg = "Only know how to combine business day with datetime or timedelta" + with pytest.raises(ApplyTypeError, match=msg): + BDay().apply(BMonthEnd()) + + +class TestBusinessHour(Base): + _offset = BusinessHour + + def setup_method(self, method): + self.d = datetime(2014, 7, 1, 10, 00) + + self.offset1 = BusinessHour() + self.offset2 = BusinessHour(n=3) + + self.offset3 = BusinessHour(n=-1) + self.offset4 = BusinessHour(n=-4) + + from datetime import time as dt_time + + self.offset5 = BusinessHour(start=dt_time(11, 0), end=dt_time(14, 30)) + self.offset6 = BusinessHour(start="20:00", end="05:00") + self.offset7 = BusinessHour(n=-2, start=dt_time(21, 30), end=dt_time(6, 30)) + self.offset8 = BusinessHour(start=["09:00", "13:00"], end=["12:00", "17:00"]) + self.offset9 = BusinessHour( + n=3, start=["09:00", "22:00"], end=["13:00", "03:00"] + ) + self.offset10 = BusinessHour( + n=-1, start=["23:00", "13:00"], end=["02:00", "17:00"] + ) + + @pytest.mark.parametrize( + "start,end,match", + [ + ( + dt_time(11, 0, 5), + "17:00", + "time data must be specified only with hour and minute", + ), + ("AAA", "17:00", "time data must match '%H:%M' format"), + ("14:00:05", "17:00", "time data must match '%H:%M' format"), + ([], "17:00", "Must include at least 1 start time"), + ("09:00", [], "Must include at least 1 end time"), + ( + ["09:00", "11:00"], + "17:00", + "number of starting time and ending time must be the same", + ), + ( + ["09:00", "11:00"], + ["10:00"], + "number of starting time and ending time must be the same", + ), + ( + ["09:00", "11:00"], + ["12:00", "20:00"], + r"invalid starting and ending time\(s\): opening hours should not " + "touch or overlap with one another", + ), + ( + ["12:00", "20:00"], + ["09:00", "11:00"], + r"invalid starting and ending time\(s\): opening hours should not " + "touch or overlap with one another", + ), + ], + ) + def test_constructor_errors(self, start, end, match): + with pytest.raises(ValueError, match=match): + BusinessHour(start=start, end=end) + + def test_different_normalize_equals(self): + # GH#21404 changed __eq__ to return False when `normalize` does not match + offset = self._offset() + offset2 = self._offset(normalize=True) + assert offset != offset2 + + def test_repr(self): + assert repr(self.offset1) == "" + assert repr(self.offset2) == "<3 * BusinessHours: BH=09:00-17:00>" + assert repr(self.offset3) == "<-1 * BusinessHour: BH=09:00-17:00>" + assert repr(self.offset4) == "<-4 * BusinessHours: BH=09:00-17:00>" + + assert repr(self.offset5) == "" + assert repr(self.offset6) == "" + assert repr(self.offset7) == "<-2 * BusinessHours: BH=21:30-06:30>" + assert repr(self.offset8) == "" + assert repr(self.offset9) == "<3 * BusinessHours: BH=09:00-13:00,22:00-03:00>" + assert repr(self.offset10) == "<-1 * BusinessHour: BH=13:00-17:00,23:00-02:00>" + + def test_with_offset(self): + expected = Timestamp("2014-07-01 13:00") + + assert self.d + BusinessHour() * 3 == expected + assert self.d + BusinessHour(n=3) == expected + + @pytest.mark.parametrize( + "offset_name", + ["offset1", "offset2", "offset3", "offset4", "offset8", "offset9", "offset10"], + ) + def test_eq_attribute(self, offset_name): + offset = getattr(self, offset_name) + assert offset == offset + + @pytest.mark.parametrize( + "offset1,offset2", + [ + (BusinessHour(start="09:00"), BusinessHour()), + ( + BusinessHour(start=["23:00", "13:00"], end=["12:00", "17:00"]), + BusinessHour(start=["13:00", "23:00"], end=["17:00", "12:00"]), + ), + ], + ) + def test_eq(self, offset1, offset2): + assert offset1 == offset2 + + @pytest.mark.parametrize( + "offset1,offset2", + [ + (BusinessHour(), BusinessHour(-1)), + (BusinessHour(start="09:00"), BusinessHour(start="09:01")), + ( + BusinessHour(start="09:00", end="17:00"), + BusinessHour(start="17:00", end="09:01"), + ), + ( + BusinessHour(start=["13:00", "23:00"], end=["18:00", "07:00"]), + BusinessHour(start=["13:00", "23:00"], end=["17:00", "12:00"]), + ), + ], + ) + def test_neq(self, offset1, offset2): + assert offset1 != offset2 + + @pytest.mark.parametrize( + "offset_name", + ["offset1", "offset2", "offset3", "offset4", "offset8", "offset9", "offset10"], + ) + def test_hash(self, offset_name): + offset = getattr(self, offset_name) + assert offset == offset + + def test_call(self): + assert self.offset1(self.d) == datetime(2014, 7, 1, 11) + assert self.offset2(self.d) == datetime(2014, 7, 1, 13) + assert self.offset3(self.d) == datetime(2014, 6, 30, 17) + assert self.offset4(self.d) == datetime(2014, 6, 30, 14) + assert self.offset8(self.d) == datetime(2014, 7, 1, 11) + assert self.offset9(self.d) == datetime(2014, 7, 1, 22) + assert self.offset10(self.d) == datetime(2014, 7, 1, 1) + + def test_sub(self): + # we have to override test_sub here because self.offset2 is not + # defined as self._offset(2) + off = self.offset2 + msg = "Cannot subtract datetime from offset" + with pytest.raises(TypeError, match=msg): + off - self.d + assert 2 * off - off == off + + assert self.d - self.offset2 == self.d + self._offset(-3) + + def testRollback1(self): + assert self.offset1.rollback(self.d) == self.d + assert self.offset2.rollback(self.d) == self.d + assert self.offset3.rollback(self.d) == self.d + assert self.offset4.rollback(self.d) == self.d + assert self.offset5.rollback(self.d) == datetime(2014, 6, 30, 14, 30) + assert self.offset6.rollback(self.d) == datetime(2014, 7, 1, 5, 0) + assert self.offset7.rollback(self.d) == datetime(2014, 7, 1, 6, 30) + assert self.offset8.rollback(self.d) == self.d + assert self.offset9.rollback(self.d) == self.d + assert self.offset10.rollback(self.d) == datetime(2014, 7, 1, 2) + + d = datetime(2014, 7, 1, 0) + assert self.offset1.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset2.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset3.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset4.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset5.rollback(d) == datetime(2014, 6, 30, 14, 30) + assert self.offset6.rollback(d) == d + assert self.offset7.rollback(d) == d + assert self.offset8.rollback(d) == datetime(2014, 6, 30, 17) + assert self.offset9.rollback(d) == d + assert self.offset10.rollback(d) == d + + assert self._offset(5).rollback(self.d) == self.d + + def testRollback2(self): + assert self._offset(-3).rollback(datetime(2014, 7, 5, 15, 0)) == datetime( + 2014, 7, 4, 17, 0 + ) + + def testRollforward1(self): + assert self.offset1.rollforward(self.d) == self.d + assert self.offset2.rollforward(self.d) == self.d + assert self.offset3.rollforward(self.d) == self.d + assert self.offset4.rollforward(self.d) == self.d + assert self.offset5.rollforward(self.d) == datetime(2014, 7, 1, 11, 0) + assert self.offset6.rollforward(self.d) == datetime(2014, 7, 1, 20, 0) + assert self.offset7.rollforward(self.d) == datetime(2014, 7, 1, 21, 30) + assert self.offset8.rollforward(self.d) == self.d + assert self.offset9.rollforward(self.d) == self.d + assert self.offset10.rollforward(self.d) == datetime(2014, 7, 1, 13) + + d = datetime(2014, 7, 1, 0) + assert self.offset1.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset2.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset3.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset4.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset5.rollforward(d) == datetime(2014, 7, 1, 11) + assert self.offset6.rollforward(d) == d + assert self.offset7.rollforward(d) == d + assert self.offset8.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset9.rollforward(d) == d + assert self.offset10.rollforward(d) == d + + assert self._offset(5).rollforward(self.d) == self.d + + def testRollforward2(self): + assert self._offset(-3).rollforward(datetime(2014, 7, 5, 16, 0)) == datetime( + 2014, 7, 7, 9 + ) + + def test_roll_date_object(self): + offset = BusinessHour() + + dt = datetime(2014, 7, 6, 15, 0) + + result = offset.rollback(dt) + assert result == datetime(2014, 7, 4, 17) + + result = offset.rollforward(dt) + assert result == datetime(2014, 7, 7, 9) + + normalize_cases = [] + normalize_cases.append( + ( + BusinessHour(normalize=True), + { + datetime(2014, 7, 1, 8): datetime(2014, 7, 1), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2), + datetime(2014, 7, 1, 23): datetime(2014, 7, 2), + datetime(2014, 7, 1, 0): datetime(2014, 7, 1), + datetime(2014, 7, 4, 15): datetime(2014, 7, 4), + datetime(2014, 7, 4, 15, 59): datetime(2014, 7, 4), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7), + datetime(2014, 7, 5, 23): datetime(2014, 7, 7), + datetime(2014, 7, 6, 10): datetime(2014, 7, 7), + }, + ) + ) + + normalize_cases.append( + ( + BusinessHour(-1, normalize=True), + { + datetime(2014, 7, 1, 8): datetime(2014, 6, 30), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1), + datetime(2014, 7, 1, 10): datetime(2014, 6, 30), + datetime(2014, 7, 1, 0): datetime(2014, 6, 30), + datetime(2014, 7, 7, 10): datetime(2014, 7, 4), + datetime(2014, 7, 7, 10, 1): datetime(2014, 7, 7), + datetime(2014, 7, 5, 23): datetime(2014, 7, 4), + datetime(2014, 7, 6, 10): datetime(2014, 7, 4), + }, + ) + ) + + normalize_cases.append( + ( + BusinessHour(1, normalize=True, start="17:00", end="04:00"), + { + datetime(2014, 7, 1, 8): datetime(2014, 7, 1), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1), + datetime(2014, 7, 1, 23): datetime(2014, 7, 2), + datetime(2014, 7, 2, 2): datetime(2014, 7, 2), + datetime(2014, 7, 2, 3): datetime(2014, 7, 2), + datetime(2014, 7, 4, 23): datetime(2014, 7, 5), + datetime(2014, 7, 5, 2): datetime(2014, 7, 5), + datetime(2014, 7, 7, 2): datetime(2014, 7, 7), + datetime(2014, 7, 7, 17): datetime(2014, 7, 7), + }, + ) + ) + + @pytest.mark.parametrize("case", normalize_cases) + def test_normalize(self, case): + offset, cases = case + for dt, expected in cases.items(): + assert offset.apply(dt) == expected + + on_offset_cases = [] + on_offset_cases.append( + ( + BusinessHour(), + { + datetime(2014, 7, 1, 9): True, + datetime(2014, 7, 1, 8, 59): False, + datetime(2014, 7, 1, 8): False, + datetime(2014, 7, 1, 17): True, + datetime(2014, 7, 1, 17, 1): False, + datetime(2014, 7, 1, 18): False, + datetime(2014, 7, 5, 9): False, + datetime(2014, 7, 6, 12): False, + }, + ) + ) + + on_offset_cases.append( + ( + BusinessHour(start="10:00", end="15:00"), + { + datetime(2014, 7, 1, 9): False, + datetime(2014, 7, 1, 10): True, + datetime(2014, 7, 1, 15): True, + datetime(2014, 7, 1, 15, 1): False, + datetime(2014, 7, 5, 12): False, + datetime(2014, 7, 6, 12): False, + }, + ) + ) + + on_offset_cases.append( + ( + BusinessHour(start="19:00", end="05:00"), + { + datetime(2014, 7, 1, 9, 0): False, + datetime(2014, 7, 1, 10, 0): False, + datetime(2014, 7, 1, 15): False, + datetime(2014, 7, 1, 15, 1): False, + datetime(2014, 7, 5, 12, 0): False, + datetime(2014, 7, 6, 12, 0): False, + datetime(2014, 7, 1, 19, 0): True, + datetime(2014, 7, 2, 0, 0): True, + datetime(2014, 7, 4, 23): True, + datetime(2014, 7, 5, 1): True, + datetime(2014, 7, 5, 5, 0): True, + datetime(2014, 7, 6, 23, 0): False, + datetime(2014, 7, 7, 3, 0): False, + }, + ) + ) + + on_offset_cases.append( + ( + BusinessHour(start=["09:00", "13:00"], end=["12:00", "17:00"]), + { + datetime(2014, 7, 1, 9): True, + datetime(2014, 7, 1, 8, 59): False, + datetime(2014, 7, 1, 8): False, + datetime(2014, 7, 1, 17): True, + datetime(2014, 7, 1, 17, 1): False, + datetime(2014, 7, 1, 18): False, + datetime(2014, 7, 5, 9): False, + datetime(2014, 7, 6, 12): False, + datetime(2014, 7, 1, 12, 30): False, + }, + ) + ) + + on_offset_cases.append( + ( + BusinessHour(start=["19:00", "23:00"], end=["21:00", "05:00"]), + { + datetime(2014, 7, 1, 9, 0): False, + datetime(2014, 7, 1, 10, 0): False, + datetime(2014, 7, 1, 15): False, + datetime(2014, 7, 1, 15, 1): False, + datetime(2014, 7, 5, 12, 0): False, + datetime(2014, 7, 6, 12, 0): False, + datetime(2014, 7, 1, 19, 0): True, + datetime(2014, 7, 2, 0, 0): True, + datetime(2014, 7, 4, 23): True, + datetime(2014, 7, 5, 1): True, + datetime(2014, 7, 5, 5, 0): True, + datetime(2014, 7, 6, 23, 0): False, + datetime(2014, 7, 7, 3, 0): False, + datetime(2014, 7, 4, 22): False, + }, + ) + ) + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, cases = case + for dt, expected in cases.items(): + assert offset.is_on_offset(dt) == expected + + opening_time_cases = [] + # opening time should be affected by sign of n, not by n's value and + # end + opening_time_cases.append( + ( + [ + BusinessHour(), + BusinessHour(n=2), + BusinessHour(n=4), + BusinessHour(end="10:00"), + BusinessHour(n=2, end="4:00"), + BusinessHour(n=4, end="15:00"), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 9), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 9), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 9), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 9), + ), + # if timestamp is on opening time, next opening time is + # as it is + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 2, 9), + ), + datetime(2014, 7, 2, 10): ( + datetime(2014, 7, 3, 9), + datetime(2014, 7, 2, 9), + ), + # 2014-07-05 is saturday + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 9), + ), + datetime(2014, 7, 7, 9, 1): ( + datetime(2014, 7, 8, 9), + datetime(2014, 7, 7, 9), + ), + }, + ) + ) + + opening_time_cases.append( + ( + [ + BusinessHour(start="11:15"), + BusinessHour(n=2, start="11:15"), + BusinessHour(n=3, start="11:15"), + BusinessHour(start="11:15", end="10:00"), + BusinessHour(n=2, start="11:15", end="4:00"), + BusinessHour(n=3, start="11:15", end="15:00"), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 7, 1, 11, 15), + datetime(2014, 6, 30, 11, 15), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15), + ), + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15), + ), + datetime(2014, 7, 2, 10): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 11, 15), + ), + datetime(2014, 7, 2, 11, 15): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 2, 11, 15), + ), + datetime(2014, 7, 2, 11, 15, 1): ( + datetime(2014, 7, 3, 11, 15), + datetime(2014, 7, 2, 11, 15), + ), + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 4, 11, 15), + datetime(2014, 7, 3, 11, 15), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15), + ), + datetime(2014, 7, 7, 9, 1): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 11, 15), + ), + }, + ) + ) + + opening_time_cases.append( + ( + [ + BusinessHour(-1), + BusinessHour(n=-2), + BusinessHour(n=-4), + BusinessHour(n=-1, end="10:00"), + BusinessHour(n=-2, end="4:00"), + BusinessHour(n=-4, end="15:00"), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 7, 1, 9), + datetime(2014, 7, 2, 9), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 1, 9), + datetime(2014, 7, 2, 9), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 1, 9), + datetime(2014, 7, 2, 9), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 1, 9), + datetime(2014, 7, 2, 9), + ), + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 2, 9), + ), + datetime(2014, 7, 2, 10): ( + datetime(2014, 7, 2, 9), + datetime(2014, 7, 3, 9), + ), + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 4, 9), + datetime(2014, 7, 7, 9), + ), + datetime(2014, 7, 7, 9): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 7, 9), + ), + datetime(2014, 7, 7, 9, 1): ( + datetime(2014, 7, 7, 9), + datetime(2014, 7, 8, 9), + ), + }, + ) + ) + + opening_time_cases.append( + ( + [ + BusinessHour(start="17:00", end="05:00"), + BusinessHour(n=3, start="17:00", end="03:00"), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 7, 1, 17), + datetime(2014, 6, 30, 17), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 2, 17), + datetime(2014, 7, 1, 17), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 2, 17), + datetime(2014, 7, 1, 17), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 2, 17), + datetime(2014, 7, 1, 17), + ), + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 2, 17), + datetime(2014, 7, 1, 17), + ), + datetime(2014, 7, 4, 17): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 7, 17), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 3, 17), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 7, 17), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 7, 17), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 7, 17), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 7, 17, 1): ( + datetime(2014, 7, 8, 17), + datetime(2014, 7, 7, 17), + ), + }, + ) + ) + + opening_time_cases.append( + ( + [ + BusinessHour(-1, start="17:00", end="05:00"), + BusinessHour(n=-2, start="17:00", end="03:00"), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 6, 30, 17), + datetime(2014, 7, 1, 17), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 2, 16, 59): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 17), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 3, 17), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 17), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 17), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 17), + ), + datetime(2014, 7, 7, 18): ( + datetime(2014, 7, 7, 17), + datetime(2014, 7, 8, 17), + ), + }, + ) + ) + + opening_time_cases.append( + ( + [ + BusinessHour(start=["11:15", "15:00"], end=["13:00", "20:00"]), + BusinessHour(n=3, start=["11:15", "15:00"], end=["12:00", "20:00"]), + BusinessHour(start=["11:15", "15:00"], end=["13:00", "17:00"]), + BusinessHour(n=2, start=["11:15", "15:00"], end=["12:00", "03:00"]), + BusinessHour(n=3, start=["11:15", "15:00"], end=["13:00", "16:00"]), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 7, 1, 11, 15), + datetime(2014, 6, 30, 15), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15), + ), + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15), + ), + datetime(2014, 7, 2, 10): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 1, 15), + ), + datetime(2014, 7, 2, 11, 15): ( + datetime(2014, 7, 2, 11, 15), + datetime(2014, 7, 2, 11, 15), + ), + datetime(2014, 7, 2, 11, 15, 1): ( + datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 11, 15), + ), + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 4, 11, 15), + datetime(2014, 7, 3, 15), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15), + ), + datetime(2014, 7, 7, 9, 1): ( + datetime(2014, 7, 7, 11, 15), + datetime(2014, 7, 4, 15), + ), + datetime(2014, 7, 7, 12): ( + datetime(2014, 7, 7, 15), + datetime(2014, 7, 7, 11, 15), + ), + }, + ) + ) + + opening_time_cases.append( + ( + [ + BusinessHour(n=-1, start=["17:00", "08:00"], end=["05:00", "10:00"]), + BusinessHour(n=-2, start=["08:00", "17:00"], end=["10:00", "03:00"]), + ], + { + datetime(2014, 7, 1, 11): ( + datetime(2014, 7, 1, 8), + datetime(2014, 7, 1, 17), + ), + datetime(2014, 7, 1, 18): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 8), + ), + datetime(2014, 7, 1, 23): ( + datetime(2014, 7, 1, 17), + datetime(2014, 7, 2, 8), + ), + datetime(2014, 7, 2, 8): ( + datetime(2014, 7, 2, 8), + datetime(2014, 7, 2, 8), + ), + datetime(2014, 7, 2, 9): ( + datetime(2014, 7, 2, 8), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 2, 16, 59): ( + datetime(2014, 7, 2, 8), + datetime(2014, 7, 2, 17), + ), + datetime(2014, 7, 5, 10): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 8), + ), + datetime(2014, 7, 4, 10): ( + datetime(2014, 7, 4, 8), + datetime(2014, 7, 4, 17), + ), + datetime(2014, 7, 4, 23): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 8), + ), + datetime(2014, 7, 6, 10): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 8), + ), + datetime(2014, 7, 7, 5): ( + datetime(2014, 7, 4, 17), + datetime(2014, 7, 7, 8), + ), + datetime(2014, 7, 7, 18): ( + datetime(2014, 7, 7, 17), + datetime(2014, 7, 8, 8), + ), + }, + ) + ) + + @pytest.mark.parametrize("case", opening_time_cases) + def test_opening_time(self, case): + _offsets, cases = case + for offset in _offsets: + for dt, (exp_next, exp_prev) in cases.items(): + assert offset._next_opening_time(dt) == exp_next + assert offset._prev_opening_time(dt) == exp_prev + + apply_cases = [] + apply_cases.append( + ( + BusinessHour(), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 12), + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16), + datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 10), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 2, 9, 30, 15), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 10), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 12), + # out of business hours + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 10), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10), + # saturday + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 9, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 9, 30, 30), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(4), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 15), + datetime(2014, 7, 1, 13): datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 11), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 12), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 13), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 13), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 13), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 13), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 13), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 13), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 12, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 12, 30, 30), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(-1), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 10), + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 12), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 15), + datetime(2014, 7, 1, 10): datetime(2014, 6, 30, 17), + datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 1, 15, 30, 15), + datetime(2014, 7, 1, 9, 30, 15): datetime(2014, 6, 30, 16, 30, 15), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 16), + datetime(2014, 7, 1, 5): datetime(2014, 6, 30, 16), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 10), + # out of business hours + datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 16), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 16), + datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 16), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 16), + # saturday + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 16), + datetime(2014, 7, 7, 9): datetime(2014, 7, 4, 16), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 16, 30), + datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 16, 30, 30), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(-4), + { + datetime(2014, 7, 1, 11): datetime(2014, 6, 30, 15), + datetime(2014, 7, 1, 13): datetime(2014, 6, 30, 17), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 11), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 12), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 13), + datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 15), + datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 13), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 13), + datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 13), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 13), + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 13), + datetime(2014, 7, 4, 18): datetime(2014, 7, 4, 13), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 13, 30), + datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 13, 30, 30), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(start="13:00", end="16:00"), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 13), + datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 14), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 14), + datetime(2014, 7, 1, 15, 30, 15): datetime(2014, 7, 2, 13, 30, 15), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 14), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 14), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(n=2, start="13:00", end="16:00"), + { + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 14): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 15), + datetime(2014, 7, 2, 14, 30): datetime(2014, 7, 3, 13, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 15), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 15), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 15), + datetime(2014, 7, 4, 14, 30): datetime(2014, 7, 7, 13, 30), + datetime(2014, 7, 4, 14, 30, 30): datetime(2014, 7, 7, 13, 30, 30), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(n=-1, start="13:00", end="16:00"), + { + datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 15), + datetime(2014, 7, 2, 13): datetime(2014, 7, 1, 15), + datetime(2014, 7, 2, 14): datetime(2014, 7, 1, 16), + datetime(2014, 7, 2, 15): datetime(2014, 7, 2, 14), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 16): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 13, 30, 15): datetime(2014, 7, 1, 15, 30, 15), + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 15), + datetime(2014, 7, 7, 11): datetime(2014, 7, 4, 15), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(n=-3, start="10:00", end="16:00"), + { + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 13), + datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 11), + datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 13), + datetime(2014, 7, 2, 13): datetime(2014, 7, 1, 16), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 13), + datetime(2014, 7, 2, 11, 30): datetime(2014, 7, 1, 14, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 13), + datetime(2014, 7, 4, 10): datetime(2014, 7, 3, 13), + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 13), + datetime(2014, 7, 4, 16): datetime(2014, 7, 4, 13), + datetime(2014, 7, 4, 12, 30): datetime(2014, 7, 3, 15, 30), + datetime(2014, 7, 4, 12, 30, 30): datetime(2014, 7, 3, 15, 30, 30), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(start="19:00", end="05:00"), + { + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 20), + datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 20), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 20), + datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 20), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 20), + datetime(2014, 7, 2, 4, 30): datetime(2014, 7, 2, 19, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 1), + datetime(2014, 7, 4, 10): datetime(2014, 7, 4, 20), + datetime(2014, 7, 4, 23): datetime(2014, 7, 5, 0), + datetime(2014, 7, 5, 0): datetime(2014, 7, 5, 1), + datetime(2014, 7, 5, 4): datetime(2014, 7, 7, 19), + datetime(2014, 7, 5, 4, 30): datetime(2014, 7, 7, 19, 30), + datetime(2014, 7, 5, 4, 30, 30): datetime(2014, 7, 7, 19, 30, 30), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(n=-1, start="19:00", end="05:00"), + { + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 4), + datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 20): datetime(2014, 7, 2, 5), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 19, 30): datetime(2014, 7, 2, 4, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 23), + datetime(2014, 7, 3, 6): datetime(2014, 7, 3, 4), + datetime(2014, 7, 4, 23): datetime(2014, 7, 4, 22), + datetime(2014, 7, 5, 0): datetime(2014, 7, 4, 23), + datetime(2014, 7, 5, 4): datetime(2014, 7, 5, 3), + datetime(2014, 7, 7, 19, 30): datetime(2014, 7, 5, 4, 30), + datetime(2014, 7, 7, 19, 30, 30): datetime(2014, 7, 5, 4, 30, 30), + }, + ) + ) + + # long business hours (see gh-26381) + apply_cases.append( + ( + BusinessHour(n=4, start="00:00", end="23:00"), + { + datetime(2014, 7, 3, 22): datetime(2014, 7, 4, 3), + datetime(2014, 7, 4, 22): datetime(2014, 7, 7, 3), + datetime(2014, 7, 3, 22, 30): datetime(2014, 7, 4, 3, 30), + datetime(2014, 7, 3, 22, 20): datetime(2014, 7, 4, 3, 20), + datetime(2014, 7, 4, 22, 30, 30): datetime(2014, 7, 7, 3, 30, 30), + datetime(2014, 7, 4, 22, 30, 20): datetime(2014, 7, 7, 3, 30, 20), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(n=-4, start="00:00", end="23:00"), + { + datetime(2014, 7, 4, 3): datetime(2014, 7, 3, 22), + datetime(2014, 7, 7, 3): datetime(2014, 7, 4, 22), + datetime(2014, 7, 4, 3, 30): datetime(2014, 7, 3, 22, 30), + datetime(2014, 7, 4, 3, 20): datetime(2014, 7, 3, 22, 20), + datetime(2014, 7, 7, 3, 30, 30): datetime(2014, 7, 4, 22, 30, 30), + datetime(2014, 7, 7, 3, 30, 20): datetime(2014, 7, 4, 22, 30, 20), + }, + ) + ) + + # multiple business hours + apply_cases.append( + ( + BusinessHour(start=["09:00", "14:00"], end=["12:00", "18:00"]), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16), + datetime(2014, 7, 1, 19): datetime(2014, 7, 2, 10), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 17), + datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 1, 17, 30, 15), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 9), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 14), + # out of business hours + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 15), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 10), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10), + # saturday + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 9), + datetime(2014, 7, 4, 17, 30): datetime(2014, 7, 7, 9, 30), + datetime(2014, 7, 4, 17, 30, 30): datetime(2014, 7, 7, 9, 30, 30), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(n=4, start=["09:00", "14:00"], end=["12:00", "18:00"]), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 17), + datetime(2014, 7, 1, 13): datetime(2014, 7, 2, 9), + datetime(2014, 7, 1, 15): datetime(2014, 7, 2, 10), + datetime(2014, 7, 1, 16): datetime(2014, 7, 2, 11), + datetime(2014, 7, 1, 17): datetime(2014, 7, 2, 14), + datetime(2014, 7, 2, 11): datetime(2014, 7, 2, 17), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 15), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 15), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 15), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 15), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 15), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 14), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 11, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 11, 30, 30), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(n=-4, start=["09:00", "14:00"], end=["12:00", "18:00"]), + { + datetime(2014, 7, 1, 11): datetime(2014, 6, 30, 16), + datetime(2014, 7, 1, 13): datetime(2014, 6, 30, 17), + datetime(2014, 7, 1, 15): datetime(2014, 6, 30, 18), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1, 10), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 11), + datetime(2014, 7, 2, 11): datetime(2014, 7, 1, 16), + datetime(2014, 7, 2, 8): datetime(2014, 7, 1, 12), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 12), + datetime(2014, 7, 2, 23): datetime(2014, 7, 2, 12), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 12), + datetime(2014, 7, 5, 15): datetime(2014, 7, 4, 12), + datetime(2014, 7, 4, 18): datetime(2014, 7, 4, 12), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 4, 14, 30), + datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 4, 14, 30, 30), + }, + ) + ) + + apply_cases.append( + ( + BusinessHour(n=-1, start=["19:00", "03:00"], end=["01:00", "05:00"]), + { + datetime(2014, 7, 1, 17): datetime(2014, 7, 1, 4), + datetime(2014, 7, 2, 14): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 8): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 13): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 20): datetime(2014, 7, 2, 5), + datetime(2014, 7, 2, 19): datetime(2014, 7, 2, 4), + datetime(2014, 7, 2, 4): datetime(2014, 7, 2, 1), + datetime(2014, 7, 2, 19, 30): datetime(2014, 7, 2, 4, 30), + datetime(2014, 7, 3, 0): datetime(2014, 7, 2, 23), + datetime(2014, 7, 3, 6): datetime(2014, 7, 3, 4), + datetime(2014, 7, 4, 23): datetime(2014, 7, 4, 22), + datetime(2014, 7, 5, 0): datetime(2014, 7, 4, 23), + datetime(2014, 7, 5, 4): datetime(2014, 7, 5, 0), + datetime(2014, 7, 7, 3, 30): datetime(2014, 7, 5, 0, 30), + datetime(2014, 7, 7, 19, 30): datetime(2014, 7, 7, 4, 30), + datetime(2014, 7, 7, 19, 30, 30): datetime(2014, 7, 7, 4, 30, 30), + }, + ) + ) + + @pytest.mark.parametrize("case", apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + apply_large_n_cases = [] + # A week later + apply_large_n_cases.append( + ( + BusinessHour(40), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 8, 11), + datetime(2014, 7, 1, 13): datetime(2014, 7, 8, 13), + datetime(2014, 7, 1, 15): datetime(2014, 7, 8, 15), + datetime(2014, 7, 1, 16): datetime(2014, 7, 8, 16), + datetime(2014, 7, 1, 17): datetime(2014, 7, 9, 9), + datetime(2014, 7, 2, 11): datetime(2014, 7, 9, 11), + datetime(2014, 7, 2, 8): datetime(2014, 7, 9, 9), + datetime(2014, 7, 2, 19): datetime(2014, 7, 10, 9), + datetime(2014, 7, 2, 23): datetime(2014, 7, 10, 9), + datetime(2014, 7, 3, 0): datetime(2014, 7, 10, 9), + datetime(2014, 7, 5, 15): datetime(2014, 7, 14, 9), + datetime(2014, 7, 4, 18): datetime(2014, 7, 14, 9), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 14, 9, 30), + datetime(2014, 7, 7, 9, 30, 30): datetime(2014, 7, 14, 9, 30, 30), + }, + ) + ) + + # 3 days and 1 hour before + apply_large_n_cases.append( + ( + BusinessHour(-25), + { + datetime(2014, 7, 1, 11): datetime(2014, 6, 26, 10), + datetime(2014, 7, 1, 13): datetime(2014, 6, 26, 12), + datetime(2014, 7, 1, 9): datetime(2014, 6, 25, 16), + datetime(2014, 7, 1, 10): datetime(2014, 6, 25, 17), + datetime(2014, 7, 3, 11): datetime(2014, 6, 30, 10), + datetime(2014, 7, 3, 8): datetime(2014, 6, 27, 16), + datetime(2014, 7, 3, 19): datetime(2014, 6, 30, 16), + datetime(2014, 7, 3, 23): datetime(2014, 6, 30, 16), + datetime(2014, 7, 4, 9): datetime(2014, 6, 30, 16), + datetime(2014, 7, 5, 15): datetime(2014, 7, 1, 16), + datetime(2014, 7, 6, 18): datetime(2014, 7, 1, 16), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 1, 16, 30), + datetime(2014, 7, 7, 10, 30, 30): datetime(2014, 7, 2, 9, 30, 30), + }, + ) + ) + + # 5 days and 3 hours later + apply_large_n_cases.append( + ( + BusinessHour(28, start="21:00", end="02:00"), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 9, 0), + datetime(2014, 7, 1, 22): datetime(2014, 7, 9, 1), + datetime(2014, 7, 1, 23): datetime(2014, 7, 9, 21), + datetime(2014, 7, 2, 2): datetime(2014, 7, 10, 0), + datetime(2014, 7, 3, 21): datetime(2014, 7, 11, 0), + datetime(2014, 7, 4, 1): datetime(2014, 7, 11, 23), + datetime(2014, 7, 4, 2): datetime(2014, 7, 12, 0), + datetime(2014, 7, 4, 3): datetime(2014, 7, 12, 0), + datetime(2014, 7, 5, 1): datetime(2014, 7, 14, 23), + datetime(2014, 7, 5, 15): datetime(2014, 7, 15, 0), + datetime(2014, 7, 6, 18): datetime(2014, 7, 15, 0), + datetime(2014, 7, 7, 1): datetime(2014, 7, 15, 0), + datetime(2014, 7, 7, 23, 30): datetime(2014, 7, 15, 21, 30), + }, + ) + ) + + # large n for multiple opening hours (3 days and 1 hour before) + apply_large_n_cases.append( + ( + BusinessHour(n=-25, start=["09:00", "14:00"], end=["12:00", "19:00"]), + { + datetime(2014, 7, 1, 11): datetime(2014, 6, 26, 10), + datetime(2014, 7, 1, 13): datetime(2014, 6, 26, 11), + datetime(2014, 7, 1, 9): datetime(2014, 6, 25, 18), + datetime(2014, 7, 1, 10): datetime(2014, 6, 25, 19), + datetime(2014, 7, 3, 11): datetime(2014, 6, 30, 10), + datetime(2014, 7, 3, 8): datetime(2014, 6, 27, 18), + datetime(2014, 7, 3, 19): datetime(2014, 6, 30, 18), + datetime(2014, 7, 3, 23): datetime(2014, 6, 30, 18), + datetime(2014, 7, 4, 9): datetime(2014, 6, 30, 18), + datetime(2014, 7, 5, 15): datetime(2014, 7, 1, 18), + datetime(2014, 7, 6, 18): datetime(2014, 7, 1, 18), + datetime(2014, 7, 7, 9, 30): datetime(2014, 7, 1, 18, 30), + datetime(2014, 7, 7, 10, 30, 30): datetime(2014, 7, 2, 9, 30, 30), + }, + ) + ) + + # 5 days and 3 hours later + apply_large_n_cases.append( + ( + BusinessHour(28, start=["21:00", "03:00"], end=["01:00", "04:00"]), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 9, 0), + datetime(2014, 7, 1, 22): datetime(2014, 7, 9, 3), + datetime(2014, 7, 1, 23): datetime(2014, 7, 9, 21), + datetime(2014, 7, 2, 2): datetime(2014, 7, 9, 23), + datetime(2014, 7, 3, 21): datetime(2014, 7, 11, 0), + datetime(2014, 7, 4, 1): datetime(2014, 7, 11, 23), + datetime(2014, 7, 4, 2): datetime(2014, 7, 11, 23), + datetime(2014, 7, 4, 3): datetime(2014, 7, 11, 23), + datetime(2014, 7, 4, 21): datetime(2014, 7, 12, 0), + datetime(2014, 7, 5, 0): datetime(2014, 7, 14, 22), + datetime(2014, 7, 5, 1): datetime(2014, 7, 14, 23), + datetime(2014, 7, 5, 15): datetime(2014, 7, 14, 23), + datetime(2014, 7, 6, 18): datetime(2014, 7, 14, 23), + datetime(2014, 7, 7, 1): datetime(2014, 7, 14, 23), + datetime(2014, 7, 7, 23, 30): datetime(2014, 7, 15, 21, 30), + }, + ) + ) + + @pytest.mark.parametrize("case", apply_large_n_cases) + def test_apply_large_n(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_apply_nanoseconds(self): + tests = [] + + tests.append( + ( + BusinessHour(), + { + Timestamp("2014-07-04 15:00") + + Nano(5): Timestamp("2014-07-04 16:00") + + Nano(5), + Timestamp("2014-07-04 16:00") + + Nano(5): Timestamp("2014-07-07 09:00") + + Nano(5), + Timestamp("2014-07-04 16:00") + - Nano(5): Timestamp("2014-07-04 17:00") + - Nano(5), + }, + ) + ) + + tests.append( + ( + BusinessHour(-1), + { + Timestamp("2014-07-04 15:00") + + Nano(5): Timestamp("2014-07-04 14:00") + + Nano(5), + Timestamp("2014-07-04 10:00") + + Nano(5): Timestamp("2014-07-04 09:00") + + Nano(5), + Timestamp("2014-07-04 10:00") + - Nano(5): Timestamp("2014-07-03 17:00") + - Nano(5), + }, + ) + ) + + for offset, cases in tests: + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_datetimeindex(self): + idx1 = date_range(start="2014-07-04 15:00", end="2014-07-08 10:00", freq="BH") + idx2 = date_range(start="2014-07-04 15:00", periods=12, freq="BH") + idx3 = date_range(end="2014-07-08 10:00", periods=12, freq="BH") + expected = DatetimeIndex( + [ + "2014-07-04 15:00", + "2014-07-04 16:00", + "2014-07-07 09:00", + "2014-07-07 10:00", + "2014-07-07 11:00", + "2014-07-07 12:00", + "2014-07-07 13:00", + "2014-07-07 14:00", + "2014-07-07 15:00", + "2014-07-07 16:00", + "2014-07-08 09:00", + "2014-07-08 10:00", + ], + freq="BH", + ) + for idx in [idx1, idx2, idx3]: + tm.assert_index_equal(idx, expected) + + idx1 = date_range(start="2014-07-04 15:45", end="2014-07-08 10:45", freq="BH") + idx2 = date_range(start="2014-07-04 15:45", periods=12, freq="BH") + idx3 = date_range(end="2014-07-08 10:45", periods=12, freq="BH") + + expected = DatetimeIndex( + [ + "2014-07-04 15:45", + "2014-07-04 16:45", + "2014-07-07 09:45", + "2014-07-07 10:45", + "2014-07-07 11:45", + "2014-07-07 12:45", + "2014-07-07 13:45", + "2014-07-07 14:45", + "2014-07-07 15:45", + "2014-07-07 16:45", + "2014-07-08 09:45", + "2014-07-08 10:45", + ], + freq="BH", + ) + expected = idx1 + for idx in [idx1, idx2, idx3]: + tm.assert_index_equal(idx, expected) + + +class TestCustomBusinessHour(Base): + _offset = CustomBusinessHour + holidays = ["2014-06-27", datetime(2014, 6, 30), np.datetime64("2014-07-02")] + + def setup_method(self, method): + # 2014 Calendar to check custom holidays + # Sun Mon Tue Wed Thu Fri Sat + # 6/22 23 24 25 26 27 28 + # 29 30 7/1 2 3 4 5 + # 6 7 8 9 10 11 12 + self.d = datetime(2014, 7, 1, 10, 00) + self.offset1 = CustomBusinessHour(weekmask="Tue Wed Thu Fri") + + self.offset2 = CustomBusinessHour(holidays=self.holidays) + + def test_constructor_errors(self): + from datetime import time as dt_time + + with pytest.raises(ValueError): + CustomBusinessHour(start=dt_time(11, 0, 5)) + with pytest.raises(ValueError): + CustomBusinessHour(start="AAA") + with pytest.raises(ValueError): + CustomBusinessHour(start="14:00:05") + + def test_different_normalize_equals(self): + # GH#21404 changed __eq__ to return False when `normalize` does not match + offset = self._offset() + offset2 = self._offset(normalize=True) + assert offset != offset2 + + def test_repr(self): + assert repr(self.offset1) == "" + assert repr(self.offset2) == "" + + def test_with_offset(self): + expected = Timestamp("2014-07-01 13:00") + + assert self.d + CustomBusinessHour() * 3 == expected + assert self.d + CustomBusinessHour(n=3) == expected + + def test_eq(self): + for offset in [self.offset1, self.offset2]: + assert offset == offset + + assert CustomBusinessHour() != CustomBusinessHour(-1) + assert CustomBusinessHour(start="09:00") == CustomBusinessHour() + assert CustomBusinessHour(start="09:00") != CustomBusinessHour(start="09:01") + assert CustomBusinessHour(start="09:00", end="17:00") != CustomBusinessHour( + start="17:00", end="09:01" + ) + + assert CustomBusinessHour(weekmask="Tue Wed Thu Fri") != CustomBusinessHour( + weekmask="Mon Tue Wed Thu Fri" + ) + assert CustomBusinessHour(holidays=["2014-06-27"]) != CustomBusinessHour( + holidays=["2014-06-28"] + ) + + def test_sub(self): + # override the Base.test_sub implementation because self.offset2 is + # defined differently in this class than the test expects + pass + + def test_hash(self): + assert hash(self.offset1) == hash(self.offset1) + assert hash(self.offset2) == hash(self.offset2) + + def test_call(self): + assert self.offset1(self.d) == datetime(2014, 7, 1, 11) + assert self.offset2(self.d) == datetime(2014, 7, 1, 11) + + def testRollback1(self): + assert self.offset1.rollback(self.d) == self.d + assert self.offset2.rollback(self.d) == self.d + + d = datetime(2014, 7, 1, 0) + + # 2014/07/01 is Tuesday, 06/30 is Monday(holiday) + assert self.offset1.rollback(d) == datetime(2014, 6, 27, 17) + + # 2014/6/30 and 2014/6/27 are holidays + assert self.offset2.rollback(d) == datetime(2014, 6, 26, 17) + + def testRollback2(self): + assert self._offset(-3).rollback(datetime(2014, 7, 5, 15, 0)) == datetime( + 2014, 7, 4, 17, 0 + ) + + def testRollforward1(self): + assert self.offset1.rollforward(self.d) == self.d + assert self.offset2.rollforward(self.d) == self.d + + d = datetime(2014, 7, 1, 0) + assert self.offset1.rollforward(d) == datetime(2014, 7, 1, 9) + assert self.offset2.rollforward(d) == datetime(2014, 7, 1, 9) + + def testRollforward2(self): + assert self._offset(-3).rollforward(datetime(2014, 7, 5, 16, 0)) == datetime( + 2014, 7, 7, 9 + ) + + def test_roll_date_object(self): + offset = BusinessHour() + + dt = datetime(2014, 7, 6, 15, 0) + + result = offset.rollback(dt) + assert result == datetime(2014, 7, 4, 17) + + result = offset.rollforward(dt) + assert result == datetime(2014, 7, 7, 9) + + normalize_cases = [] + normalize_cases.append( + ( + CustomBusinessHour(normalize=True, holidays=holidays), + { + datetime(2014, 7, 1, 8): datetime(2014, 7, 1), + datetime(2014, 7, 1, 17): datetime(2014, 7, 3), + datetime(2014, 7, 1, 16): datetime(2014, 7, 3), + datetime(2014, 7, 1, 23): datetime(2014, 7, 3), + datetime(2014, 7, 1, 0): datetime(2014, 7, 1), + datetime(2014, 7, 4, 15): datetime(2014, 7, 4), + datetime(2014, 7, 4, 15, 59): datetime(2014, 7, 4), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7), + datetime(2014, 7, 5, 23): datetime(2014, 7, 7), + datetime(2014, 7, 6, 10): datetime(2014, 7, 7), + }, + ) + ) + + normalize_cases.append( + ( + CustomBusinessHour(-1, normalize=True, holidays=holidays), + { + datetime(2014, 7, 1, 8): datetime(2014, 6, 26), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1), + datetime(2014, 7, 1, 16): datetime(2014, 7, 1), + datetime(2014, 7, 1, 10): datetime(2014, 6, 26), + datetime(2014, 7, 1, 0): datetime(2014, 6, 26), + datetime(2014, 7, 7, 10): datetime(2014, 7, 4), + datetime(2014, 7, 7, 10, 1): datetime(2014, 7, 7), + datetime(2014, 7, 5, 23): datetime(2014, 7, 4), + datetime(2014, 7, 6, 10): datetime(2014, 7, 4), + }, + ) + ) + + normalize_cases.append( + ( + CustomBusinessHour( + 1, normalize=True, start="17:00", end="04:00", holidays=holidays + ), + { + datetime(2014, 7, 1, 8): datetime(2014, 7, 1), + datetime(2014, 7, 1, 17): datetime(2014, 7, 1), + datetime(2014, 7, 1, 23): datetime(2014, 7, 2), + datetime(2014, 7, 2, 2): datetime(2014, 7, 2), + datetime(2014, 7, 2, 3): datetime(2014, 7, 3), + datetime(2014, 7, 4, 23): datetime(2014, 7, 5), + datetime(2014, 7, 5, 2): datetime(2014, 7, 5), + datetime(2014, 7, 7, 2): datetime(2014, 7, 7), + datetime(2014, 7, 7, 17): datetime(2014, 7, 7), + }, + ) + ) + + @pytest.mark.parametrize("norm_cases", normalize_cases) + def test_normalize(self, norm_cases): + offset, cases = norm_cases + for dt, expected in cases.items(): + assert offset.apply(dt) == expected + + def test_is_on_offset(self): + tests = [] + + tests.append( + ( + CustomBusinessHour(start="10:00", end="15:00", holidays=self.holidays), + { + datetime(2014, 7, 1, 9): False, + datetime(2014, 7, 1, 10): True, + datetime(2014, 7, 1, 15): True, + datetime(2014, 7, 1, 15, 1): False, + datetime(2014, 7, 5, 12): False, + datetime(2014, 7, 6, 12): False, + }, + ) + ) + + for offset, cases in tests: + for dt, expected in cases.items(): + assert offset.is_on_offset(dt) == expected + + apply_cases = [] + apply_cases.append( + ( + CustomBusinessHour(holidays=holidays), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 12), + datetime(2014, 7, 1, 13): datetime(2014, 7, 1, 14), + datetime(2014, 7, 1, 15): datetime(2014, 7, 1, 16), + datetime(2014, 7, 1, 19): datetime(2014, 7, 3, 10), + datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 9), + datetime(2014, 7, 1, 16, 30, 15): datetime(2014, 7, 3, 9, 30, 15), + datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 10), + # out of business hours + datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 10), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 10), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 10), + # saturday + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 10), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 9, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 9, 30, 30), + }, + ) + ) + + apply_cases.append( + ( + CustomBusinessHour(4, holidays=holidays), + { + datetime(2014, 7, 1, 11): datetime(2014, 7, 1, 15), + datetime(2014, 7, 1, 13): datetime(2014, 7, 3, 9), + datetime(2014, 7, 1, 15): datetime(2014, 7, 3, 11), + datetime(2014, 7, 1, 16): datetime(2014, 7, 3, 12), + datetime(2014, 7, 1, 17): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 11): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 8): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 19): datetime(2014, 7, 3, 13), + datetime(2014, 7, 2, 23): datetime(2014, 7, 3, 13), + datetime(2014, 7, 3, 0): datetime(2014, 7, 3, 13), + datetime(2014, 7, 5, 15): datetime(2014, 7, 7, 13), + datetime(2014, 7, 4, 17): datetime(2014, 7, 7, 13), + datetime(2014, 7, 4, 16, 30): datetime(2014, 7, 7, 12, 30), + datetime(2014, 7, 4, 16, 30, 30): datetime(2014, 7, 7, 12, 30, 30), + }, + ) + ) + + @pytest.mark.parametrize("apply_case", apply_cases) + def test_apply(self, apply_case): + offset, cases = apply_case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + nano_cases = [] + nano_cases.append( + ( + CustomBusinessHour(holidays=holidays), + { + Timestamp("2014-07-01 15:00") + + Nano(5): Timestamp("2014-07-01 16:00") + + Nano(5), + Timestamp("2014-07-01 16:00") + + Nano(5): Timestamp("2014-07-03 09:00") + + Nano(5), + Timestamp("2014-07-01 16:00") + - Nano(5): Timestamp("2014-07-01 17:00") + - Nano(5), + }, + ) + ) + + nano_cases.append( + ( + CustomBusinessHour(-1, holidays=holidays), + { + Timestamp("2014-07-01 15:00") + + Nano(5): Timestamp("2014-07-01 14:00") + + Nano(5), + Timestamp("2014-07-01 10:00") + + Nano(5): Timestamp("2014-07-01 09:00") + + Nano(5), + Timestamp("2014-07-01 10:00") + - Nano(5): Timestamp("2014-06-26 17:00") + - Nano(5), + }, + ) + ) + + @pytest.mark.parametrize("nano_case", nano_cases) + def test_apply_nanoseconds(self, nano_case): + offset, cases = nano_case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + +class TestCustomBusinessDay(Base): + _offset = CDay + + def setup_method(self, method): + self.d = datetime(2008, 1, 1) + self.nd = np_datetime64_compat("2008-01-01 00:00:00Z") + + self.offset = CDay() + self.offset1 = self.offset + self.offset2 = CDay(2) + + def test_different_normalize_equals(self): + # GH#21404 changed __eq__ to return False when `normalize` does not match + offset = self._offset() + offset2 = self._offset(normalize=True) + assert offset != offset2 + + def test_repr(self): + assert repr(self.offset) == "" + assert repr(self.offset2) == "<2 * CustomBusinessDays>" + + if compat.PY37: + expected = "" + else: + expected = "" + assert repr(self.offset + timedelta(1)) == expected + + def test_with_offset(self): + offset = self.offset + timedelta(hours=2) + + assert (self.d + offset) == datetime(2008, 1, 2, 2) + + def test_eq(self): + assert self.offset2 == self.offset2 + + def test_mul(self): + pass + + def test_hash(self): + assert hash(self.offset2) == hash(self.offset2) + + def test_call(self): + assert self.offset2(self.d) == datetime(2008, 1, 3) + assert self.offset2(self.nd) == datetime(2008, 1, 3) + + def testRollback1(self): + assert CDay(10).rollback(self.d) == self.d + + def testRollback2(self): + assert CDay(10).rollback(datetime(2008, 1, 5)) == datetime(2008, 1, 4) + + def testRollforward1(self): + assert CDay(10).rollforward(self.d) == self.d + + def testRollforward2(self): + assert CDay(10).rollforward(datetime(2008, 1, 5)) == datetime(2008, 1, 7) + + def test_roll_date_object(self): + offset = CDay() + + dt = date(2012, 9, 15) + + result = offset.rollback(dt) + assert result == datetime(2012, 9, 14) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 17) + + offset = offsets.Day() + result = offset.rollback(dt) + assert result == datetime(2012, 9, 15) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 15) + + on_offset_cases = [ + (CDay(), datetime(2008, 1, 1), True), + (CDay(), datetime(2008, 1, 5), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, d, expected = case + assert_is_on_offset(offset, d, expected) + + apply_cases: _ApplyCases = [] + apply_cases.append( + ( + CDay(), + { + datetime(2008, 1, 1): datetime(2008, 1, 2), + datetime(2008, 1, 4): datetime(2008, 1, 7), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 8), + }, + ) + ) + + apply_cases.append( + ( + 2 * CDay(), + { + datetime(2008, 1, 1): datetime(2008, 1, 3), + datetime(2008, 1, 4): datetime(2008, 1, 8), + datetime(2008, 1, 5): datetime(2008, 1, 8), + datetime(2008, 1, 6): datetime(2008, 1, 8), + datetime(2008, 1, 7): datetime(2008, 1, 9), + }, + ) + ) + + apply_cases.append( + ( + -CDay(), + { + datetime(2008, 1, 1): datetime(2007, 12, 31), + datetime(2008, 1, 4): datetime(2008, 1, 3), + datetime(2008, 1, 5): datetime(2008, 1, 4), + datetime(2008, 1, 6): datetime(2008, 1, 4), + datetime(2008, 1, 7): datetime(2008, 1, 4), + datetime(2008, 1, 8): datetime(2008, 1, 7), + }, + ) + ) + + apply_cases.append( + ( + -2 * CDay(), + { + datetime(2008, 1, 1): datetime(2007, 12, 28), + datetime(2008, 1, 4): datetime(2008, 1, 2), + datetime(2008, 1, 5): datetime(2008, 1, 3), + datetime(2008, 1, 6): datetime(2008, 1, 3), + datetime(2008, 1, 7): datetime(2008, 1, 3), + datetime(2008, 1, 8): datetime(2008, 1, 4), + datetime(2008, 1, 9): datetime(2008, 1, 7), + }, + ) + ) + + apply_cases.append( + ( + CDay(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 4): datetime(2008, 1, 4), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 7), + }, + ) + ) + + @pytest.mark.parametrize("case", apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_apply_large_n(self): + dt = datetime(2012, 10, 23) + + result = dt + CDay(10) + assert result == datetime(2012, 11, 6) + + result = dt + CDay(100) - CDay(100) + assert result == dt + + off = CDay() * 6 + rs = datetime(2012, 1, 1) - off + xp = datetime(2011, 12, 23) + assert rs == xp + + st = datetime(2011, 12, 18) + rs = st + off + xp = datetime(2011, 12, 26) + assert rs == xp + + def test_apply_corner(self): + msg = ( + "Only know how to combine trading day " + "with datetime, datetime64 or timedelta" + ) + with pytest.raises(ApplyTypeError, match=msg): + CDay().apply(BMonthEnd()) + + def test_holidays(self): + # Define a TradingDay offset + holidays = ["2012-05-01", datetime(2013, 5, 1), np.datetime64("2014-05-01")] + tday = CDay(holidays=holidays) + for year in range(2012, 2015): + dt = datetime(year, 4, 30) + xp = datetime(year, 5, 2) + rs = dt + tday + assert rs == xp + + def test_weekmask(self): + weekmask_saudi = "Sat Sun Mon Tue Wed" # Thu-Fri Weekend + weekmask_uae = "1111001" # Fri-Sat Weekend + weekmask_egypt = [1, 1, 1, 1, 0, 0, 1] # Fri-Sat Weekend + bday_saudi = CDay(weekmask=weekmask_saudi) + bday_uae = CDay(weekmask=weekmask_uae) + bday_egypt = CDay(weekmask=weekmask_egypt) + dt = datetime(2013, 5, 1) + xp_saudi = datetime(2013, 5, 4) + xp_uae = datetime(2013, 5, 2) + xp_egypt = datetime(2013, 5, 2) + assert xp_saudi == dt + bday_saudi + assert xp_uae == dt + bday_uae + assert xp_egypt == dt + bday_egypt + xp2 = datetime(2013, 5, 5) + assert xp2 == dt + 2 * bday_saudi + assert xp2 == dt + 2 * bday_uae + assert xp2 == dt + 2 * bday_egypt + + def test_weekmask_and_holidays(self): + weekmask_egypt = "Sun Mon Tue Wed Thu" # Fri-Sat Weekend + holidays = ["2012-05-01", datetime(2013, 5, 1), np.datetime64("2014-05-01")] + bday_egypt = CDay(holidays=holidays, weekmask=weekmask_egypt) + dt = datetime(2013, 4, 30) + xp_egypt = datetime(2013, 5, 5) + assert xp_egypt == dt + 2 * bday_egypt + + @pytest.mark.filterwarnings("ignore:Non:pandas.errors.PerformanceWarning") + def test_calendar(self): + calendar = USFederalHolidayCalendar() + dt = datetime(2014, 1, 17) + assert_offset_equal(CDay(calendar=calendar), dt, datetime(2014, 1, 21)) + + def test_roundtrip_pickle(self): + def _check_roundtrip(obj): + unpickled = tm.round_trip_pickle(obj) + assert unpickled == obj + + _check_roundtrip(self.offset) + _check_roundtrip(self.offset2) + _check_roundtrip(self.offset * 2) + + def test_pickle_compat_0_14_1(self, datapath): + hdays = [datetime(2013, 1, 1) for ele in range(4)] + pth = datapath("tseries", "offsets", "data", "cday-0.14.1.pickle") + cday0_14_1 = read_pickle(pth) + cday = CDay(holidays=hdays) + assert cday == cday0_14_1 + + +class CustomBusinessMonthBase: + def setup_method(self, method): + self.d = datetime(2008, 1, 1) + + self.offset = self._offset() + self.offset1 = self.offset + self.offset2 = self._offset(2) + + def test_eq(self): + assert self.offset2 == self.offset2 + + def test_mul(self): + pass + + def test_hash(self): + assert hash(self.offset2) == hash(self.offset2) + + def test_roundtrip_pickle(self): + def _check_roundtrip(obj): + unpickled = tm.round_trip_pickle(obj) + assert unpickled == obj + + _check_roundtrip(self._offset()) + _check_roundtrip(self._offset(2)) + _check_roundtrip(self._offset() * 2) + + def test_copy(self): + # GH 17452 + off = self._offset(weekmask="Mon Wed Fri") + assert off == off.copy() + + +class TestCustomBusinessMonthEnd(CustomBusinessMonthBase, Base): + _offset = CBMonthEnd + + def test_different_normalize_equals(self): + # GH#21404 changed __eq__ to return False when `normalize` does not match + offset = self._offset() + offset2 = self._offset(normalize=True) + assert offset != offset2 + + def test_repr(self): + assert repr(self.offset) == "" + assert repr(self.offset2) == "<2 * CustomBusinessMonthEnds>" + + def testCall(self): + assert self.offset2(self.d) == datetime(2008, 2, 29) + + def testRollback1(self): + assert CDay(10).rollback(datetime(2007, 12, 31)) == datetime(2007, 12, 31) + + def testRollback2(self): + assert CBMonthEnd(10).rollback(self.d) == datetime(2007, 12, 31) + + def testRollforward1(self): + assert CBMonthEnd(10).rollforward(self.d) == datetime(2008, 1, 31) + + def test_roll_date_object(self): + offset = CBMonthEnd() + + dt = date(2012, 9, 15) + + result = offset.rollback(dt) + assert result == datetime(2012, 8, 31) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 28) + + offset = offsets.Day() + result = offset.rollback(dt) + assert result == datetime(2012, 9, 15) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 15) + + on_offset_cases = [ + (CBMonthEnd(), datetime(2008, 1, 31), True), + (CBMonthEnd(), datetime(2008, 1, 1), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, d, expected = case + assert_is_on_offset(offset, d, expected) + + apply_cases: _ApplyCases = [] + apply_cases.append( + ( + CBMonthEnd(), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 2, 7): datetime(2008, 2, 29), + }, + ) + ) + + apply_cases.append( + ( + 2 * CBMonthEnd(), + { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 2, 7): datetime(2008, 3, 31), + }, + ) + ) + + apply_cases.append( + ( + -CBMonthEnd(), + { + datetime(2008, 1, 1): datetime(2007, 12, 31), + datetime(2008, 2, 8): datetime(2008, 1, 31), + }, + ) + ) + + apply_cases.append( + ( + -2 * CBMonthEnd(), + { + datetime(2008, 1, 1): datetime(2007, 11, 30), + datetime(2008, 2, 9): datetime(2007, 12, 31), + }, + ) + ) + + apply_cases.append( + ( + CBMonthEnd(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 2, 7): datetime(2008, 2, 29), + }, + ) + ) + + @pytest.mark.parametrize("case", apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_apply_large_n(self): + dt = datetime(2012, 10, 23) + + result = dt + CBMonthEnd(10) + assert result == datetime(2013, 7, 31) + + result = dt + CDay(100) - CDay(100) + assert result == dt + + off = CBMonthEnd() * 6 + rs = datetime(2012, 1, 1) - off + xp = datetime(2011, 7, 29) + assert rs == xp + + st = datetime(2011, 12, 18) + rs = st + off + xp = datetime(2012, 5, 31) + assert rs == xp + + def test_holidays(self): + # Define a TradingDay offset + holidays = ["2012-01-31", datetime(2012, 2, 28), np.datetime64("2012-02-29")] + bm_offset = CBMonthEnd(holidays=holidays) + dt = datetime(2012, 1, 1) + assert dt + bm_offset == datetime(2012, 1, 30) + assert dt + 2 * bm_offset == datetime(2012, 2, 27) + + @pytest.mark.filterwarnings("ignore:Non:pandas.errors.PerformanceWarning") + def test_datetimeindex(self): + from pandas.tseries.holiday import USFederalHolidayCalendar + + hcal = USFederalHolidayCalendar() + freq = CBMonthEnd(calendar=hcal) + + assert date_range(start="20120101", end="20130101", freq=freq).tolist()[ + 0 + ] == datetime(2012, 1, 31) + + +class TestCustomBusinessMonthBegin(CustomBusinessMonthBase, Base): + _offset = CBMonthBegin + + def test_different_normalize_equals(self): + # GH#21404 changed __eq__ to return False when `normalize` does not match + offset = self._offset() + offset2 = self._offset(normalize=True) + assert offset != offset2 + + def test_repr(self): + assert repr(self.offset) == "" + assert repr(self.offset2) == "<2 * CustomBusinessMonthBegins>" + + def testCall(self): + assert self.offset2(self.d) == datetime(2008, 3, 3) + + def testRollback1(self): + assert CDay(10).rollback(datetime(2007, 12, 31)) == datetime(2007, 12, 31) + + def testRollback2(self): + assert CBMonthBegin(10).rollback(self.d) == datetime(2008, 1, 1) + + def testRollforward1(self): + assert CBMonthBegin(10).rollforward(self.d) == datetime(2008, 1, 1) + + def test_roll_date_object(self): + offset = CBMonthBegin() + + dt = date(2012, 9, 15) + + result = offset.rollback(dt) + assert result == datetime(2012, 9, 3) + + result = offset.rollforward(dt) + assert result == datetime(2012, 10, 1) + + offset = offsets.Day() + result = offset.rollback(dt) + assert result == datetime(2012, 9, 15) + + result = offset.rollforward(dt) + assert result == datetime(2012, 9, 15) + + on_offset_cases = [ + (CBMonthBegin(), datetime(2008, 1, 1), True), + (CBMonthBegin(), datetime(2008, 1, 31), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + apply_cases: _ApplyCases = [] + apply_cases.append( + ( + CBMonthBegin(), + { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 2, 7): datetime(2008, 3, 3), + }, + ) + ) + + apply_cases.append( + ( + 2 * CBMonthBegin(), + { + datetime(2008, 1, 1): datetime(2008, 3, 3), + datetime(2008, 2, 7): datetime(2008, 4, 1), + }, + ) + ) + + apply_cases.append( + ( + -CBMonthBegin(), + { + datetime(2008, 1, 1): datetime(2007, 12, 3), + datetime(2008, 2, 8): datetime(2008, 2, 1), + }, + ) + ) + + apply_cases.append( + ( + -2 * CBMonthBegin(), + { + datetime(2008, 1, 1): datetime(2007, 11, 1), + datetime(2008, 2, 9): datetime(2008, 1, 1), + }, + ) + ) + + apply_cases.append( + ( + CBMonthBegin(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 7): datetime(2008, 2, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", apply_cases) + def test_apply(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_apply_large_n(self): + dt = datetime(2012, 10, 23) + + result = dt + CBMonthBegin(10) + assert result == datetime(2013, 8, 1) + + result = dt + CDay(100) - CDay(100) + assert result == dt + + off = CBMonthBegin() * 6 + rs = datetime(2012, 1, 1) - off + xp = datetime(2011, 7, 1) + assert rs == xp + + st = datetime(2011, 12, 18) + rs = st + off + + xp = datetime(2012, 6, 1) + assert rs == xp + + def test_holidays(self): + # Define a TradingDay offset + holidays = ["2012-02-01", datetime(2012, 2, 2), np.datetime64("2012-03-01")] + bm_offset = CBMonthBegin(holidays=holidays) + dt = datetime(2012, 1, 1) + + assert dt + bm_offset == datetime(2012, 1, 2) + assert dt + 2 * bm_offset == datetime(2012, 2, 3) + + @pytest.mark.filterwarnings("ignore:Non:pandas.errors.PerformanceWarning") + def test_datetimeindex(self): + hcal = USFederalHolidayCalendar() + cbmb = CBMonthBegin(calendar=hcal) + assert date_range(start="20120101", end="20130101", freq=cbmb).tolist()[ + 0 + ] == datetime(2012, 1, 3) + + +class TestWeek(Base): + _offset = Week + d = Timestamp(datetime(2008, 1, 2)) + offset1 = _offset() + offset2 = _offset(2) + + def test_repr(self): + assert repr(Week(weekday=0)) == "" + assert repr(Week(n=-1, weekday=0)) == "<-1 * Week: weekday=0>" + assert repr(Week(n=-2, weekday=0)) == "<-2 * Weeks: weekday=0>" + + def test_corner(self): + with pytest.raises(ValueError): + Week(weekday=7) + + with pytest.raises(ValueError, match="Day must be"): + Week(weekday=-1) + + def test_is_anchored(self): + assert Week(weekday=0).is_anchored() + assert not Week().is_anchored() + assert not Week(2, weekday=2).is_anchored() + assert not Week(2).is_anchored() + + offset_cases = [] + # not business week + offset_cases.append( + ( + Week(), + { + datetime(2008, 1, 1): datetime(2008, 1, 8), + datetime(2008, 1, 4): datetime(2008, 1, 11), + datetime(2008, 1, 5): datetime(2008, 1, 12), + datetime(2008, 1, 6): datetime(2008, 1, 13), + datetime(2008, 1, 7): datetime(2008, 1, 14), + }, + ) + ) + + # Mon + offset_cases.append( + ( + Week(weekday=0), + { + datetime(2007, 12, 31): datetime(2008, 1, 7), + datetime(2008, 1, 4): datetime(2008, 1, 7), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 14), + }, + ) + ) + + # n=0 -> roll forward. Mon + offset_cases.append( + ( + Week(0, weekday=0), + { + datetime(2007, 12, 31): datetime(2007, 12, 31), + datetime(2008, 1, 4): datetime(2008, 1, 7), + datetime(2008, 1, 5): datetime(2008, 1, 7), + datetime(2008, 1, 6): datetime(2008, 1, 7), + datetime(2008, 1, 7): datetime(2008, 1, 7), + }, + ) + ) + + # n=0 -> roll forward. Mon + offset_cases.append( + ( + Week(-2, weekday=1), + { + datetime(2010, 4, 6): datetime(2010, 3, 23), + datetime(2010, 4, 8): datetime(2010, 3, 30), + datetime(2010, 4, 5): datetime(2010, 3, 23), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + @pytest.mark.parametrize("weekday", range(7)) + def test_is_on_offset(self, weekday): + offset = Week(weekday=weekday) + + for day in range(1, 8): + date = datetime(2008, 1, day) + + if day % 7 == weekday: + expected = True + else: + expected = False + assert_is_on_offset(offset, date, expected) + + +class TestWeekOfMonth(Base): + _offset = WeekOfMonth + offset1 = _offset() + offset2 = _offset(2) + + def test_constructor(self): + with pytest.raises(ValueError, match="^Week"): + WeekOfMonth(n=1, week=4, weekday=0) + + with pytest.raises(ValueError, match="^Week"): + WeekOfMonth(n=1, week=-1, weekday=0) + + with pytest.raises(ValueError, match="^Day"): + WeekOfMonth(n=1, week=0, weekday=-1) + + with pytest.raises(ValueError, match="^Day"): + WeekOfMonth(n=1, week=0, weekday=-7) + + def test_repr(self): + assert ( + repr(WeekOfMonth(weekday=1, week=2)) == "" + ) + + def test_offset(self): + date1 = datetime(2011, 1, 4) # 1st Tuesday of Month + date2 = datetime(2011, 1, 11) # 2nd Tuesday of Month + date3 = datetime(2011, 1, 18) # 3rd Tuesday of Month + date4 = datetime(2011, 1, 25) # 4th Tuesday of Month + + # see for loop for structure + test_cases = [ + (-2, 2, 1, date1, datetime(2010, 11, 16)), + (-2, 2, 1, date2, datetime(2010, 11, 16)), + (-2, 2, 1, date3, datetime(2010, 11, 16)), + (-2, 2, 1, date4, datetime(2010, 12, 21)), + (-1, 2, 1, date1, datetime(2010, 12, 21)), + (-1, 2, 1, date2, datetime(2010, 12, 21)), + (-1, 2, 1, date3, datetime(2010, 12, 21)), + (-1, 2, 1, date4, datetime(2011, 1, 18)), + (0, 0, 1, date1, datetime(2011, 1, 4)), + (0, 0, 1, date2, datetime(2011, 2, 1)), + (0, 0, 1, date3, datetime(2011, 2, 1)), + (0, 0, 1, date4, datetime(2011, 2, 1)), + (0, 1, 1, date1, datetime(2011, 1, 11)), + (0, 1, 1, date2, datetime(2011, 1, 11)), + (0, 1, 1, date3, datetime(2011, 2, 8)), + (0, 1, 1, date4, datetime(2011, 2, 8)), + (0, 0, 1, date1, datetime(2011, 1, 4)), + (0, 1, 1, date2, datetime(2011, 1, 11)), + (0, 2, 1, date3, datetime(2011, 1, 18)), + (0, 3, 1, date4, datetime(2011, 1, 25)), + (1, 0, 0, date1, datetime(2011, 2, 7)), + (1, 0, 0, date2, datetime(2011, 2, 7)), + (1, 0, 0, date3, datetime(2011, 2, 7)), + (1, 0, 0, date4, datetime(2011, 2, 7)), + (1, 0, 1, date1, datetime(2011, 2, 1)), + (1, 0, 1, date2, datetime(2011, 2, 1)), + (1, 0, 1, date3, datetime(2011, 2, 1)), + (1, 0, 1, date4, datetime(2011, 2, 1)), + (1, 0, 2, date1, datetime(2011, 1, 5)), + (1, 0, 2, date2, datetime(2011, 2, 2)), + (1, 0, 2, date3, datetime(2011, 2, 2)), + (1, 0, 2, date4, datetime(2011, 2, 2)), + (1, 2, 1, date1, datetime(2011, 1, 18)), + (1, 2, 1, date2, datetime(2011, 1, 18)), + (1, 2, 1, date3, datetime(2011, 2, 15)), + (1, 2, 1, date4, datetime(2011, 2, 15)), + (2, 2, 1, date1, datetime(2011, 2, 15)), + (2, 2, 1, date2, datetime(2011, 2, 15)), + (2, 2, 1, date3, datetime(2011, 3, 15)), + (2, 2, 1, date4, datetime(2011, 3, 15)), + ] + + for n, week, weekday, dt, expected in test_cases: + offset = WeekOfMonth(n, week=week, weekday=weekday) + assert_offset_equal(offset, dt, expected) + + # try subtracting + result = datetime(2011, 2, 1) - WeekOfMonth(week=1, weekday=2) + assert result == datetime(2011, 1, 12) + + result = datetime(2011, 2, 3) - WeekOfMonth(week=0, weekday=2) + assert result == datetime(2011, 2, 2) + + on_offset_cases = [ + (0, 0, datetime(2011, 2, 7), True), + (0, 0, datetime(2011, 2, 6), False), + (0, 0, datetime(2011, 2, 14), False), + (1, 0, datetime(2011, 2, 14), True), + (0, 1, datetime(2011, 2, 1), True), + (0, 1, datetime(2011, 2, 8), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + week, weekday, dt, expected = case + offset = WeekOfMonth(week=week, weekday=weekday) + assert offset.is_on_offset(dt) == expected + + +class TestLastWeekOfMonth(Base): + _offset = LastWeekOfMonth + offset1 = _offset() + offset2 = _offset(2) + + def test_constructor(self): + with pytest.raises(ValueError, match="^N cannot be 0"): + LastWeekOfMonth(n=0, weekday=1) + + with pytest.raises(ValueError, match="^Day"): + LastWeekOfMonth(n=1, weekday=-1) + + with pytest.raises(ValueError, match="^Day"): + LastWeekOfMonth(n=1, weekday=7) + + def test_offset(self): + # Saturday + last_sat = datetime(2013, 8, 31) + next_sat = datetime(2013, 9, 28) + offset_sat = LastWeekOfMonth(n=1, weekday=5) + + one_day_before = last_sat + timedelta(days=-1) + assert one_day_before + offset_sat == last_sat + + one_day_after = last_sat + timedelta(days=+1) + assert one_day_after + offset_sat == next_sat + + # Test On that day + assert last_sat + offset_sat == next_sat + + # Thursday + + offset_thur = LastWeekOfMonth(n=1, weekday=3) + last_thurs = datetime(2013, 1, 31) + next_thurs = datetime(2013, 2, 28) + + one_day_before = last_thurs + timedelta(days=-1) + assert one_day_before + offset_thur == last_thurs + + one_day_after = last_thurs + timedelta(days=+1) + assert one_day_after + offset_thur == next_thurs + + # Test on that day + assert last_thurs + offset_thur == next_thurs + + three_before = last_thurs + timedelta(days=-3) + assert three_before + offset_thur == last_thurs + + two_after = last_thurs + timedelta(days=+2) + assert two_after + offset_thur == next_thurs + + offset_sunday = LastWeekOfMonth(n=1, weekday=WeekDay.SUN) + assert datetime(2013, 7, 31) + offset_sunday == datetime(2013, 8, 25) + + on_offset_cases = [ + (WeekDay.SUN, datetime(2013, 1, 27), True), + (WeekDay.SAT, datetime(2013, 3, 30), True), + (WeekDay.MON, datetime(2013, 2, 18), False), # Not the last Mon + (WeekDay.SUN, datetime(2013, 2, 25), False), # Not a SUN + (WeekDay.MON, datetime(2013, 2, 25), True), + (WeekDay.SAT, datetime(2013, 11, 30), True), + (WeekDay.SAT, datetime(2006, 8, 26), True), + (WeekDay.SAT, datetime(2007, 8, 25), True), + (WeekDay.SAT, datetime(2008, 8, 30), True), + (WeekDay.SAT, datetime(2009, 8, 29), True), + (WeekDay.SAT, datetime(2010, 8, 28), True), + (WeekDay.SAT, datetime(2011, 8, 27), True), + (WeekDay.SAT, datetime(2019, 8, 31), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + weekday, dt, expected = case + offset = LastWeekOfMonth(weekday=weekday) + assert offset.is_on_offset(dt) == expected + + +class TestSemiMonthEnd(Base): + _offset = SemiMonthEnd + offset1 = _offset() + offset2 = _offset(2) + + def test_offset_whole_year(self): + dates = ( + datetime(2007, 12, 31), + datetime(2008, 1, 15), + datetime(2008, 1, 31), + datetime(2008, 2, 15), + datetime(2008, 2, 29), + datetime(2008, 3, 15), + datetime(2008, 3, 31), + datetime(2008, 4, 15), + datetime(2008, 4, 30), + datetime(2008, 5, 15), + datetime(2008, 5, 31), + datetime(2008, 6, 15), + datetime(2008, 6, 30), + datetime(2008, 7, 15), + datetime(2008, 7, 31), + datetime(2008, 8, 15), + datetime(2008, 8, 31), + datetime(2008, 9, 15), + datetime(2008, 9, 30), + datetime(2008, 10, 15), + datetime(2008, 10, 31), + datetime(2008, 11, 15), + datetime(2008, 11, 30), + datetime(2008, 12, 15), + datetime(2008, 12, 31), + ) + + for base, exp_date in zip(dates[:-1], dates[1:]): + assert_offset_equal(SemiMonthEnd(), base, exp_date) + + # ensure .apply_index works as expected + s = DatetimeIndex(dates[:-1]) + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = SemiMonthEnd().apply_index(s) + + exp = DatetimeIndex(dates[1:]) + tm.assert_index_equal(result, exp) + + # ensure generating a range with DatetimeIndex gives same result + result = date_range(start=dates[0], end=dates[-1], freq="SM") + exp = DatetimeIndex(dates) + tm.assert_index_equal(result, exp) + + offset_cases = [] + offset_cases.append( + ( + SemiMonthEnd(), + { + datetime(2008, 1, 1): datetime(2008, 1, 15), + datetime(2008, 1, 15): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 15), + datetime(2006, 12, 14): datetime(2006, 12, 15), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2007, 1, 15), + datetime(2007, 1, 1): datetime(2007, 1, 15), + datetime(2006, 12, 1): datetime(2006, 12, 15), + datetime(2006, 12, 15): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(day_of_month=20), + { + datetime(2008, 1, 1): datetime(2008, 1, 20), + datetime(2008, 1, 15): datetime(2008, 1, 20), + datetime(2008, 1, 21): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 20), + datetime(2006, 12, 14): datetime(2006, 12, 20), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2007, 1, 20), + datetime(2007, 1, 1): datetime(2007, 1, 20), + datetime(2006, 12, 1): datetime(2006, 12, 20), + datetime(2006, 12, 15): datetime(2006, 12, 20), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 15), + datetime(2008, 1, 16): datetime(2008, 1, 31), + datetime(2008, 1, 15): datetime(2008, 1, 15), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2006, 12, 31), + datetime(2007, 1, 1): datetime(2007, 1, 15), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(0, day_of_month=16), + { + datetime(2008, 1, 1): datetime(2008, 1, 16), + datetime(2008, 1, 16): datetime(2008, 1, 16), + datetime(2008, 1, 15): datetime(2008, 1, 16), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2006, 12, 31), + datetime(2007, 1, 1): datetime(2007, 1, 16), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(2), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2006, 12, 29): datetime(2007, 1, 15), + datetime(2006, 12, 31): datetime(2007, 1, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + datetime(2007, 1, 16): datetime(2007, 2, 15), + datetime(2006, 11, 1): datetime(2006, 11, 30), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2008, 6, 15), + datetime(2008, 12, 31): datetime(2008, 12, 15), + datetime(2006, 12, 29): datetime(2006, 12, 15), + datetime(2006, 12, 30): datetime(2006, 12, 15), + datetime(2007, 1, 1): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(-1, day_of_month=4), + { + datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2007, 1, 4): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2008, 6, 4), + datetime(2008, 12, 31): datetime(2008, 12, 4), + datetime(2006, 12, 5): datetime(2006, 12, 4), + datetime(2006, 12, 30): datetime(2006, 12, 4), + datetime(2007, 1, 1): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthEnd(-2), + { + datetime(2007, 1, 1): datetime(2006, 12, 15), + datetime(2008, 6, 30): datetime(2008, 5, 31), + datetime(2008, 3, 15): datetime(2008, 2, 15), + datetime(2008, 12, 31): datetime(2008, 11, 30), + datetime(2006, 12, 29): datetime(2006, 11, 30), + datetime(2006, 12, 14): datetime(2006, 11, 15), + datetime(2007, 1, 1): datetime(2006, 12, 15), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + @pytest.mark.parametrize("case", offset_cases) + def test_apply_index(self, case): + offset, cases = case + s = DatetimeIndex(cases.keys()) + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = offset.apply_index(s) + + exp = DatetimeIndex(cases.values()) + tm.assert_index_equal(result, exp) + + on_offset_cases = [ + (datetime(2007, 12, 31), True), + (datetime(2007, 12, 15), True), + (datetime(2007, 12, 14), False), + (datetime(2007, 12, 1), False), + (datetime(2008, 2, 29), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + dt, expected = case + assert_is_on_offset(SemiMonthEnd(), dt, expected) + + @pytest.mark.parametrize("klass", [Series, DatetimeIndex]) + def test_vectorized_offset_addition(self, klass): + s = klass( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = s + SemiMonthEnd() + result2 = SemiMonthEnd() + s + + exp = klass( + [ + Timestamp("2000-01-31 00:15:00", tz="US/Central"), + Timestamp("2000-02-29", tz="US/Central"), + ], + name="a", + ) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + s = klass( + [ + Timestamp("2000-01-01 00:15:00", tz="US/Central"), + Timestamp("2000-02-01", tz="US/Central"), + ], + name="a", + ) + + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = s + SemiMonthEnd() + result2 = SemiMonthEnd() + s + + exp = klass( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + +class TestSemiMonthBegin(Base): + _offset = SemiMonthBegin + offset1 = _offset() + offset2 = _offset(2) + + def test_offset_whole_year(self): + dates = ( + datetime(2007, 12, 15), + datetime(2008, 1, 1), + datetime(2008, 1, 15), + datetime(2008, 2, 1), + datetime(2008, 2, 15), + datetime(2008, 3, 1), + datetime(2008, 3, 15), + datetime(2008, 4, 1), + datetime(2008, 4, 15), + datetime(2008, 5, 1), + datetime(2008, 5, 15), + datetime(2008, 6, 1), + datetime(2008, 6, 15), + datetime(2008, 7, 1), + datetime(2008, 7, 15), + datetime(2008, 8, 1), + datetime(2008, 8, 15), + datetime(2008, 9, 1), + datetime(2008, 9, 15), + datetime(2008, 10, 1), + datetime(2008, 10, 15), + datetime(2008, 11, 1), + datetime(2008, 11, 15), + datetime(2008, 12, 1), + datetime(2008, 12, 15), + ) + + for base, exp_date in zip(dates[:-1], dates[1:]): + assert_offset_equal(SemiMonthBegin(), base, exp_date) + + # ensure .apply_index works as expected + s = DatetimeIndex(dates[:-1]) + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = SemiMonthBegin().apply_index(s) + + exp = DatetimeIndex(dates[1:]) + tm.assert_index_equal(result, exp) + + # ensure generating a range with DatetimeIndex gives same result + result = date_range(start=dates[0], end=dates[-1], freq="SMS") + exp = DatetimeIndex(dates) + tm.assert_index_equal(result, exp) + + offset_cases = [] + offset_cases.append( + ( + SemiMonthBegin(), + { + datetime(2008, 1, 1): datetime(2008, 1, 15), + datetime(2008, 1, 15): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 14): datetime(2006, 12, 15), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2007, 1, 1): datetime(2007, 1, 15), + datetime(2006, 12, 1): datetime(2006, 12, 15), + datetime(2006, 12, 15): datetime(2007, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthBegin(day_of_month=20), + { + datetime(2008, 1, 1): datetime(2008, 1, 20), + datetime(2008, 1, 15): datetime(2008, 1, 20), + datetime(2008, 1, 21): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 14): datetime(2006, 12, 20), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2007, 1, 1): datetime(2007, 1, 20), + datetime(2006, 12, 1): datetime(2006, 12, 20), + datetime(2006, 12, 15): datetime(2006, 12, 20), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthBegin(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 16): datetime(2008, 2, 1), + datetime(2008, 1, 15): datetime(2008, 1, 15), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 2): datetime(2006, 12, 15), + datetime(2007, 1, 1): datetime(2007, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthBegin(0, day_of_month=16), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 1, 16): datetime(2008, 1, 16), + datetime(2008, 1, 15): datetime(2008, 1, 16), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2007, 1, 5): datetime(2007, 1, 16), + datetime(2007, 1, 1): datetime(2007, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthBegin(2), + { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 15), + datetime(2006, 12, 1): datetime(2007, 1, 1), + datetime(2006, 12, 29): datetime(2007, 1, 15), + datetime(2006, 12, 15): datetime(2007, 1, 15), + datetime(2007, 1, 1): datetime(2007, 2, 1), + datetime(2007, 1, 16): datetime(2007, 2, 15), + datetime(2006, 11, 1): datetime(2006, 12, 1), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthBegin(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 15), + datetime(2008, 6, 30): datetime(2008, 6, 15), + datetime(2008, 6, 14): datetime(2008, 6, 1), + datetime(2008, 12, 31): datetime(2008, 12, 15), + datetime(2006, 12, 29): datetime(2006, 12, 15), + datetime(2006, 12, 15): datetime(2006, 12, 1), + datetime(2007, 1, 1): datetime(2006, 12, 15), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthBegin(-1, day_of_month=4), + { + datetime(2007, 1, 1): datetime(2006, 12, 4), + datetime(2007, 1, 4): datetime(2007, 1, 1), + datetime(2008, 6, 30): datetime(2008, 6, 4), + datetime(2008, 12, 31): datetime(2008, 12, 4), + datetime(2006, 12, 5): datetime(2006, 12, 4), + datetime(2006, 12, 30): datetime(2006, 12, 4), + datetime(2006, 12, 2): datetime(2006, 12, 1), + datetime(2007, 1, 1): datetime(2006, 12, 4), + }, + ) + ) + + offset_cases.append( + ( + SemiMonthBegin(-2), + { + datetime(2007, 1, 1): datetime(2006, 12, 1), + datetime(2008, 6, 30): datetime(2008, 6, 1), + datetime(2008, 6, 14): datetime(2008, 5, 15), + datetime(2008, 12, 31): datetime(2008, 12, 1), + datetime(2006, 12, 29): datetime(2006, 12, 1), + datetime(2006, 12, 15): datetime(2006, 11, 15), + datetime(2007, 1, 1): datetime(2006, 12, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + @pytest.mark.parametrize("case", offset_cases) + def test_apply_index(self, case): + offset, cases = case + s = DatetimeIndex(cases.keys()) + + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = offset.apply_index(s) + + exp = DatetimeIndex(cases.values()) + tm.assert_index_equal(result, exp) + + on_offset_cases = [ + (datetime(2007, 12, 1), True), + (datetime(2007, 12, 15), True), + (datetime(2007, 12, 14), False), + (datetime(2007, 12, 31), False), + (datetime(2008, 2, 15), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + dt, expected = case + assert_is_on_offset(SemiMonthBegin(), dt, expected) + + @pytest.mark.parametrize("klass", [Series, DatetimeIndex]) + def test_vectorized_offset_addition(self, klass): + s = klass( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = s + SemiMonthBegin() + result2 = SemiMonthBegin() + s + + exp = klass( + [ + Timestamp("2000-02-01 00:15:00", tz="US/Central"), + Timestamp("2000-03-01", tz="US/Central"), + ], + name="a", + ) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + s = klass( + [ + Timestamp("2000-01-01 00:15:00", tz="US/Central"), + Timestamp("2000-02-01", tz="US/Central"), + ], + name="a", + ) + with tm.assert_produces_warning(None): + # GH#22535 check that we don't get a FutureWarning from adding + # an integer array to PeriodIndex + result = s + SemiMonthBegin() + result2 = SemiMonthBegin() + s + + exp = klass( + [ + Timestamp("2000-01-15 00:15:00", tz="US/Central"), + Timestamp("2000-02-15", tz="US/Central"), + ], + name="a", + ) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) + + +def test_Easter(): + assert_offset_equal(Easter(), datetime(2010, 1, 1), datetime(2010, 4, 4)) + assert_offset_equal(Easter(), datetime(2010, 4, 5), datetime(2011, 4, 24)) + assert_offset_equal(Easter(2), datetime(2010, 1, 1), datetime(2011, 4, 24)) + + assert_offset_equal(Easter(), datetime(2010, 4, 4), datetime(2011, 4, 24)) + assert_offset_equal(Easter(2), datetime(2010, 4, 4), datetime(2012, 4, 8)) + + assert_offset_equal(-Easter(), datetime(2011, 1, 1), datetime(2010, 4, 4)) + assert_offset_equal(-Easter(), datetime(2010, 4, 5), datetime(2010, 4, 4)) + assert_offset_equal(-Easter(2), datetime(2011, 1, 1), datetime(2009, 4, 12)) + + assert_offset_equal(-Easter(), datetime(2010, 4, 4), datetime(2009, 4, 12)) + assert_offset_equal(-Easter(2), datetime(2010, 4, 4), datetime(2008, 3, 23)) + + +class TestOffsetNames: + def test_get_offset_name(self): + assert BDay().freqstr == "B" + assert BDay(2).freqstr == "2B" + assert BMonthEnd().freqstr == "BM" + assert Week(weekday=0).freqstr == "W-MON" + assert Week(weekday=1).freqstr == "W-TUE" + assert Week(weekday=2).freqstr == "W-WED" + assert Week(weekday=3).freqstr == "W-THU" + assert Week(weekday=4).freqstr == "W-FRI" + + assert LastWeekOfMonth(weekday=WeekDay.SUN).freqstr == "LWOM-SUN" + + +def test_get_offset(): + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + _get_offset("gibberish") + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + _get_offset("QS-JAN-B") + + pairs = [ + ("B", BDay()), + ("b", BDay()), + ("bm", BMonthEnd()), + ("Bm", BMonthEnd()), + ("W-MON", Week(weekday=0)), + ("W-TUE", Week(weekday=1)), + ("W-WED", Week(weekday=2)), + ("W-THU", Week(weekday=3)), + ("W-FRI", Week(weekday=4)), + ] + + for name, expected in pairs: + offset = _get_offset(name) + assert offset == expected, ( + f"Expected {repr(name)} to yield {repr(expected)} " + f"(actual: {repr(offset)})" + ) + + +def test_get_offset_legacy(): + pairs = [("w@Sat", Week(weekday=5))] + for name, expected in pairs: + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + _get_offset(name) + + +class TestOffsetAliases: + def setup_method(self, method): + _offset_map.clear() + + def test_alias_equality(self): + for k, v in _offset_map.items(): + if v is None: + continue + assert k == v.copy() + + def test_rule_code(self): + lst = ["M", "MS", "BM", "BMS", "D", "B", "H", "T", "S", "L", "U"] + for k in lst: + assert k == _get_offset(k).rule_code + # should be cached - this is kind of an internals test... + assert k in _offset_map + assert k == (_get_offset(k) * 3).rule_code + + suffix_lst = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"] + base = "W" + for v in suffix_lst: + alias = "-".join([base, v]) + assert alias == _get_offset(alias).rule_code + assert alias == (_get_offset(alias) * 5).rule_code + + suffix_lst = [ + "JAN", + "FEB", + "MAR", + "APR", + "MAY", + "JUN", + "JUL", + "AUG", + "SEP", + "OCT", + "NOV", + "DEC", + ] + base_lst = ["A", "AS", "BA", "BAS", "Q", "QS", "BQ", "BQS"] + for base in base_lst: + for v in suffix_lst: + alias = "-".join([base, v]) + assert alias == _get_offset(alias).rule_code + assert alias == (_get_offset(alias) * 5).rule_code + + lst = ["M", "D", "B", "H", "T", "S", "L", "U"] + for k in lst: + code, stride = get_freq_code("3" + k) + assert isinstance(code, int) + assert stride == 3 + assert k == get_freq_str(code) + + +def test_dateoffset_misc(): + oset = offsets.DateOffset(months=2, days=4) + # it works + oset.freqstr + + assert not offsets.DateOffset(months=2) == 2 + + +def test_freq_offsets(): + off = BDay(1, offset=timedelta(0, 1800)) + assert off.freqstr == "B+30Min" + + off = BDay(1, offset=timedelta(0, -1800)) + assert off.freqstr == "B-30Min" + + +class TestReprNames: + def test_str_for_named_is_name(self): + # look at all the amazing combinations! + month_prefixes = ["A", "AS", "BA", "BAS", "Q", "BQ", "BQS", "QS"] + names = [ + prefix + "-" + month + for prefix in month_prefixes + for month in [ + "JAN", + "FEB", + "MAR", + "APR", + "MAY", + "JUN", + "JUL", + "AUG", + "SEP", + "OCT", + "NOV", + "DEC", + ] + ] + days = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"] + names += ["W-" + day for day in days] + names += ["WOM-" + week + day for week in ("1", "2", "3", "4") for day in days] + _offset_map.clear() + for name in names: + offset = _get_offset(name) + assert offset.freqstr == name + + +def get_utc_offset_hours(ts): + # take a Timestamp and compute total hours of utc offset + o = ts.utcoffset() + return (o.days * 24 * 3600 + o.seconds) / 3600.0 + + +class TestDST: + """ + test DateOffset additions over Daylight Savings Time + """ + + # one microsecond before the DST transition + ts_pre_fallback = "2013-11-03 01:59:59.999999" + ts_pre_springfwd = "2013-03-10 01:59:59.999999" + + # test both basic names and dateutil timezones + timezone_utc_offsets = { + "US/Eastern": dict(utc_offset_daylight=-4, utc_offset_standard=-5), + "dateutil/US/Pacific": dict(utc_offset_daylight=-7, utc_offset_standard=-8), + } + valid_date_offsets_singular = [ + "weekday", + "day", + "hour", + "minute", + "second", + "microsecond", + ] + valid_date_offsets_plural = [ + "weeks", + "days", + "hours", + "minutes", + "seconds", + "milliseconds", + "microseconds", + ] + + def _test_all_offsets(self, n, **kwds): + valid_offsets = ( + self.valid_date_offsets_plural + if n > 1 + else self.valid_date_offsets_singular + ) + + for name in valid_offsets: + self._test_offset(offset_name=name, offset_n=n, **kwds) + + def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset): + offset = DateOffset(**{offset_name: offset_n}) + + t = tstart + offset + if expected_utc_offset is not None: + assert get_utc_offset_hours(t) == expected_utc_offset + + if offset_name == "weeks": + # dates should match + assert t.date() == timedelta(days=7 * offset.kwds["weeks"]) + tstart.date() + # expect the same day of week, hour of day, minute, second, ... + assert ( + t.dayofweek == tstart.dayofweek + and t.hour == tstart.hour + and t.minute == tstart.minute + and t.second == tstart.second + ) + elif offset_name == "days": + # dates should match + assert timedelta(offset.kwds["days"]) + tstart.date() == t.date() + # expect the same hour of day, minute, second, ... + assert ( + t.hour == tstart.hour + and t.minute == tstart.minute + and t.second == tstart.second + ) + elif offset_name in self.valid_date_offsets_singular: + # expect the singular offset value to match between tstart and t + datepart_offset = getattr( + t, offset_name if offset_name != "weekday" else "dayofweek" + ) + assert datepart_offset == offset.kwds[offset_name] + else: + # the offset should be the same as if it was done in UTC + assert t == (tstart.tz_convert("UTC") + offset).tz_convert("US/Pacific") + + def _make_timestamp(self, string, hrs_offset, tz): + if hrs_offset >= 0: + offset_string = f"{hrs_offset:02d}00" + else: + offset_string = f"-{(hrs_offset * -1):02}00" + return Timestamp(string + offset_string).tz_convert(tz) + + def test_springforward_plural(self): + # test moving from standard to daylight savings + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets["utc_offset_standard"] + hrs_post = utc_offsets["utc_offset_daylight"] + self._test_all_offsets( + n=3, + tstart=self._make_timestamp(self.ts_pre_springfwd, hrs_pre, tz), + expected_utc_offset=hrs_post, + ) + + def test_fallback_singular(self): + # in the case of singular offsets, we don't necessarily know which utc + # offset the new Timestamp will wind up in (the tz for 1 month may be + # different from 1 second) so we don't specify an expected_utc_offset + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets["utc_offset_standard"] + self._test_all_offsets( + n=1, + tstart=self._make_timestamp(self.ts_pre_fallback, hrs_pre, tz), + expected_utc_offset=None, + ) + + def test_springforward_singular(self): + for tz, utc_offsets in self.timezone_utc_offsets.items(): + hrs_pre = utc_offsets["utc_offset_standard"] + self._test_all_offsets( + n=1, + tstart=self._make_timestamp(self.ts_pre_springfwd, hrs_pre, tz), + expected_utc_offset=None, + ) + + offset_classes = { + MonthBegin: ["11/2/2012", "12/1/2012"], + MonthEnd: ["11/2/2012", "11/30/2012"], + BMonthBegin: ["11/2/2012", "12/3/2012"], + BMonthEnd: ["11/2/2012", "11/30/2012"], + CBMonthBegin: ["11/2/2012", "12/3/2012"], + CBMonthEnd: ["11/2/2012", "11/30/2012"], + SemiMonthBegin: ["11/2/2012", "11/15/2012"], + SemiMonthEnd: ["11/2/2012", "11/15/2012"], + Week: ["11/2/2012", "11/9/2012"], + YearBegin: ["11/2/2012", "1/1/2013"], + YearEnd: ["11/2/2012", "12/31/2012"], + BYearBegin: ["11/2/2012", "1/1/2013"], + BYearEnd: ["11/2/2012", "12/31/2012"], + QuarterBegin: ["11/2/2012", "12/1/2012"], + QuarterEnd: ["11/2/2012", "12/31/2012"], + BQuarterBegin: ["11/2/2012", "12/3/2012"], + BQuarterEnd: ["11/2/2012", "12/31/2012"], + Day: ["11/4/2012", "11/4/2012 23:00"], + }.items() + + @pytest.mark.parametrize("tup", offset_classes) + def test_all_offset_classes(self, tup): + offset, test_values = tup + + first = Timestamp(test_values[0], tz="US/Eastern") + offset() + second = Timestamp(test_values[1], tz="US/Eastern") + assert first == second + + +# --------------------------------------------------------------------- +def test_get_offset_day_error(): + # subclass of _BaseOffset must override _day_opt attribute, or we should + # get a NotImplementedError + + with pytest.raises(NotImplementedError): + DateOffset()._get_offset_day(datetime.now()) + + +def test_valid_default_arguments(offset_types): + # GH#19142 check that the calling the constructors without passing + # any keyword arguments produce valid offsets + cls = offset_types + cls() + + +@pytest.mark.parametrize("kwd", sorted(liboffsets.relativedelta_kwds)) +def test_valid_month_attributes(kwd, month_classes): + # GH#18226 + cls = month_classes + # check that we cannot create e.g. MonthEnd(weeks=3) + with pytest.raises(TypeError): + cls(**{kwd: 3}) + + +@pytest.mark.parametrize("kwd", sorted(liboffsets.relativedelta_kwds)) +def test_valid_relativedelta_kwargs(kwd): + # Check that all the arguments specified in liboffsets.relativedelta_kwds + # are in fact valid relativedelta keyword args + DateOffset(**{kwd: 1}) + + +@pytest.mark.parametrize("kwd", sorted(liboffsets.relativedelta_kwds)) +def test_valid_tick_attributes(kwd, tick_classes): + # GH#18226 + cls = tick_classes + # check that we cannot create e.g. Hour(weeks=3) + with pytest.raises(TypeError): + cls(**{kwd: 3}) + + +def test_validate_n_error(): + with pytest.raises(TypeError): + DateOffset(n="Doh!") + + with pytest.raises(TypeError): + MonthBegin(n=timedelta(1)) + + with pytest.raises(TypeError): + BDay(n=np.array([1, 2], dtype=np.int64)) + + +def test_require_integers(offset_types): + cls = offset_types + with pytest.raises(ValueError): + cls(n=1.5) + + +def test_tick_normalize_raises(tick_classes): + # check that trying to create a Tick object with normalize=True raises + # GH#21427 + cls = tick_classes + with pytest.raises(ValueError): + cls(n=3, normalize=True) + + +def test_weeks_onoffset(): + # GH#18510 Week with weekday = None, normalize = False should always + # be is_on_offset + offset = Week(n=2, weekday=None) + ts = Timestamp("1862-01-13 09:03:34.873477378+0210", tz="Africa/Lusaka") + fast = offset.is_on_offset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + # negative n + offset = Week(n=2, weekday=None) + ts = Timestamp("1856-10-24 16:18:36.556360110-0717", tz="Pacific/Easter") + fast = offset.is_on_offset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_weekofmonth_onoffset(): + # GH#18864 + # Make sure that nanoseconds don't trip up is_on_offset (and with it apply) + offset = WeekOfMonth(n=2, week=2, weekday=0) + ts = Timestamp("1916-05-15 01:14:49.583410462+0422", tz="Asia/Qyzylorda") + fast = offset.is_on_offset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + # negative n + offset = WeekOfMonth(n=-3, week=1, weekday=0) + ts = Timestamp("1980-12-08 03:38:52.878321185+0500", tz="Asia/Oral") + fast = offset.is_on_offset(ts) + slow = (ts + offset) - offset == ts + assert fast == slow + + +def test_last_week_of_month_on_offset(): + # GH#19036, GH#18977 _adjust_dst was incorrect for LastWeekOfMonth + offset = LastWeekOfMonth(n=4, weekday=6) + ts = Timestamp("1917-05-27 20:55:27.084284178+0200", tz="Europe/Warsaw") + slow = (ts + offset) - offset == ts + fast = offset.is_on_offset(ts) + assert fast == slow + + # negative n + offset = LastWeekOfMonth(n=-4, weekday=5) + ts = Timestamp("2005-08-27 05:01:42.799392561-0500", tz="America/Rainy_River") + slow = (ts + offset) - offset == ts + fast = offset.is_on_offset(ts) + assert fast == slow + + +def test_week_add_invalid(): + # Week with weekday should raise TypeError and _not_ AttributeError + # when adding invalid offset + offset = Week(weekday=1) + other = Day() + with pytest.raises(TypeError, match="Cannot add"): + offset + other diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py new file mode 100644 index 00000000..716d3ff3 --- /dev/null +++ b/pandas/tests/tseries/offsets/test_offsets_properties.py @@ -0,0 +1,140 @@ +""" +Behavioral based tests for offsets and date_range. + +This file is adapted from https://github.com/pandas-dev/pandas/pull/18761 - +which was more ambitious but less idiomatic in its use of Hypothesis. + +You may wish to consult the previous version for inspiration on further +tests, or when trying to pin down the bugs exposed by the tests below. +""" +import warnings + +from hypothesis import assume, given, strategies as st +from hypothesis.extra.dateutil import timezones as dateutil_timezones +from hypothesis.extra.pytz import timezones as pytz_timezones +import pytest + +import pandas as pd +from pandas import Timestamp + +from pandas.tseries.offsets import ( + BMonthBegin, + BMonthEnd, + BQuarterBegin, + BQuarterEnd, + BYearBegin, + BYearEnd, + MonthBegin, + MonthEnd, + QuarterBegin, + QuarterEnd, + YearBegin, + YearEnd, +) + +# ---------------------------------------------------------------- +# Helpers for generating random data + +with warnings.catch_warnings(): + warnings.simplefilter("ignore") + min_dt = Timestamp(1900, 1, 1).to_pydatetime() + max_dt = Timestamp(1900, 1, 1).to_pydatetime() + +gen_date_range = st.builds( + pd.date_range, + start=st.datetimes( + # TODO: Choose the min/max values more systematically + min_value=Timestamp(1900, 1, 1).to_pydatetime(), + max_value=Timestamp(2100, 1, 1).to_pydatetime(), + ), + periods=st.integers(min_value=2, max_value=100), + freq=st.sampled_from("Y Q M D H T s ms us ns".split()), + tz=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()), +) + +gen_random_datetime = st.datetimes( + min_value=min_dt, + max_value=max_dt, + timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()), +) + +# The strategy for each type is registered in conftest.py, as they don't carry +# enough runtime information (e.g. type hints) to infer how to build them. +gen_yqm_offset = st.one_of( + *map( + st.from_type, + [ + MonthBegin, + MonthEnd, + BMonthBegin, + BMonthEnd, + QuarterBegin, + QuarterEnd, + BQuarterBegin, + BQuarterEnd, + YearBegin, + YearEnd, + BYearBegin, + BYearEnd, + ], + ) +) + + +# ---------------------------------------------------------------- +# Offset-specific behaviour tests + + +# Based on CI runs: Always passes on OSX, fails on Linux, sometimes on Windows +@pytest.mark.xfail(strict=False, reason="inconsistent between OSs, Pythons") +@given(gen_random_datetime, gen_yqm_offset) +def test_on_offset_implementations(dt, offset): + assume(not offset.normalize) + # check that the class-specific implementations of is_on_offset match + # the general case definition: + # (dt + offset) - offset == dt + compare = (dt + offset) - offset + assert offset.is_on_offset(dt) == (compare == dt) + + +@pytest.mark.xfail( + reason="res_v2 below is incorrect, needs to use the " + "commented-out version with tz_localize. " + "But with that fix in place, hypothesis then " + "has errors in timezone generation." +) +@given(gen_yqm_offset, gen_date_range) +def test_apply_index_implementations(offset, rng): + # offset.apply_index(dti)[i] should match dti[i] + offset + assume(offset.n != 0) # TODO: test for that case separately + + # rng = pd.date_range(start='1/1/2000', periods=100000, freq='T') + ser = pd.Series(rng) + + res = rng + offset + res_v2 = offset.apply_index(rng) + # res_v2 = offset.apply_index(rng.tz_localize(None)).tz_localize(rng.tz) + assert (res == res_v2).all() + + assert res[0] == rng[0] + offset + assert res[-1] == rng[-1] + offset + res2 = ser + offset + # apply_index is only for indexes, not series, so no res2_v2 + assert res2.iloc[0] == ser.iloc[0] + offset + assert res2.iloc[-1] == ser.iloc[-1] + offset + # TODO: Check randomly assorted entries, not just first/last + + +@pytest.mark.xfail # TODO: reason? +@given(gen_yqm_offset) +def test_shift_across_dst(offset): + # GH#18319 check that 1) timezone is correctly normalized and + # 2) that hour is not incorrectly changed by this normalization + # Note that dti includes a transition across DST boundary + dti = pd.date_range( + start="2017-10-30 12:00:00", end="2017-11-06", freq="D", tz="US/Eastern" + ) + assert (dti.hour == 12).all() # we haven't screwed up yet + + res = dti + offset + assert (res.hour == 12).all() diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py new file mode 100644 index 00000000..297e5c31 --- /dev/null +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -0,0 +1,322 @@ +""" +Tests for offsets.Tick and subclasses +""" +from datetime import datetime, timedelta + +from hypothesis import assume, example, given, settings, strategies as st +import numpy as np +import pytest + +from pandas import Timedelta, Timestamp +import pandas._testing as tm + +from pandas.tseries import offsets +from pandas.tseries.offsets import Hour, Micro, Milli, Minute, Nano, Second + +from .common import assert_offset_equal + +# --------------------------------------------------------------------- +# Test Helpers + +tick_classes = [Hour, Minute, Second, Milli, Micro, Nano] + + +# --------------------------------------------------------------------- + + +def test_apply_ticks(): + result = offsets.Hour(3).apply(offsets.Hour(4)) + exp = offsets.Hour(7) + assert result == exp + + +def test_delta_to_tick(): + delta = timedelta(3) + + tick = offsets._delta_to_tick(delta) + assert tick == offsets.Day(3) + + td = Timedelta(nanoseconds=5) + tick = offsets._delta_to_tick(td) + assert tick == Nano(5) + + +@pytest.mark.parametrize("cls", tick_classes) +@settings(deadline=None) # GH 24641 +@example(n=2, m=3) +@example(n=800, m=300) +@example(n=1000, m=5) +@given(n=st.integers(-999, 999), m=st.integers(-999, 999)) +def test_tick_add_sub(cls, n, m): + # For all Tick subclasses and all integers n, m, we should have + # tick(n) + tick(m) == tick(n+m) + # tick(n) - tick(m) == tick(n-m) + left = cls(n) + right = cls(m) + expected = cls(n + m) + + assert left + right == expected + assert left.apply(right) == expected + + expected = cls(n - m) + assert left - right == expected + + +@pytest.mark.parametrize("cls", tick_classes) +@settings(deadline=None) +@example(n=2, m=3) +@given(n=st.integers(-999, 999), m=st.integers(-999, 999)) +def test_tick_equality(cls, n, m): + assume(m != n) + # tick == tock iff tick.n == tock.n + left = cls(n) + right = cls(m) + assert left != right + assert not (left == right) + + right = cls(n) + assert left == right + assert not (left != right) + + if n != 0: + assert cls(n) != cls(-n) + + +# --------------------------------------------------------------------- + + +def test_Hour(): + assert_offset_equal(Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 1)) + assert_offset_equal(Hour(-1), datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) + assert_offset_equal(2 * Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 2)) + assert_offset_equal(-1 * Hour(), datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) + + assert Hour(3) + Hour(2) == Hour(5) + assert Hour(3) - Hour(2) == Hour() + + assert Hour(4) != Hour(1) + + +def test_Minute(): + assert_offset_equal(Minute(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 1)) + assert_offset_equal(Minute(-1), datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1)) + assert_offset_equal(2 * Minute(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 2)) + assert_offset_equal(-1 * Minute(), datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1)) + + assert Minute(3) + Minute(2) == Minute(5) + assert Minute(3) - Minute(2) == Minute() + assert Minute(5) != Minute() + + +def test_Second(): + assert_offset_equal(Second(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 1)) + assert_offset_equal(Second(-1), datetime(2010, 1, 1, 0, 0, 1), datetime(2010, 1, 1)) + assert_offset_equal( + 2 * Second(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 2) + ) + assert_offset_equal( + -1 * Second(), datetime(2010, 1, 1, 0, 0, 1), datetime(2010, 1, 1) + ) + + assert Second(3) + Second(2) == Second(5) + assert Second(3) - Second(2) == Second() + + +def test_Millisecond(): + assert_offset_equal( + Milli(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 0, 1000) + ) + assert_offset_equal( + Milli(-1), datetime(2010, 1, 1, 0, 0, 0, 1000), datetime(2010, 1, 1) + ) + assert_offset_equal( + Milli(2), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 0, 2000) + ) + assert_offset_equal( + 2 * Milli(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 0, 2000) + ) + assert_offset_equal( + -1 * Milli(), datetime(2010, 1, 1, 0, 0, 0, 1000), datetime(2010, 1, 1) + ) + + assert Milli(3) + Milli(2) == Milli(5) + assert Milli(3) - Milli(2) == Milli() + + +def test_MillisecondTimestampArithmetic(): + assert_offset_equal( + Milli(), Timestamp("2010-01-01"), Timestamp("2010-01-01 00:00:00.001") + ) + assert_offset_equal( + Milli(-1), Timestamp("2010-01-01 00:00:00.001"), Timestamp("2010-01-01") + ) + + +def test_Microsecond(): + assert_offset_equal(Micro(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 0, 1)) + assert_offset_equal( + Micro(-1), datetime(2010, 1, 1, 0, 0, 0, 1), datetime(2010, 1, 1) + ) + + assert_offset_equal( + 2 * Micro(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 0, 2) + ) + assert_offset_equal( + -1 * Micro(), datetime(2010, 1, 1, 0, 0, 0, 1), datetime(2010, 1, 1) + ) + + assert Micro(3) + Micro(2) == Micro(5) + assert Micro(3) - Micro(2) == Micro() + + +def test_NanosecondGeneric(): + timestamp = Timestamp(datetime(2010, 1, 1)) + assert timestamp.nanosecond == 0 + + result = timestamp + Nano(10) + assert result.nanosecond == 10 + + reverse_result = Nano(10) + timestamp + assert reverse_result.nanosecond == 10 + + +def test_Nanosecond(): + timestamp = Timestamp(datetime(2010, 1, 1)) + assert_offset_equal(Nano(), timestamp, timestamp + np.timedelta64(1, "ns")) + assert_offset_equal(Nano(-1), timestamp + np.timedelta64(1, "ns"), timestamp) + assert_offset_equal(2 * Nano(), timestamp, timestamp + np.timedelta64(2, "ns")) + assert_offset_equal(-1 * Nano(), timestamp + np.timedelta64(1, "ns"), timestamp) + + assert Nano(3) + Nano(2) == Nano(5) + assert Nano(3) - Nano(2) == Nano() + + # GH9284 + assert Nano(1) + Nano(10) == Nano(11) + assert Nano(5) + Micro(1) == Nano(1005) + assert Micro(5) + Nano(1) == Nano(5001) + + +@pytest.mark.parametrize( + "kls, expected", + [ + (Hour, Timedelta(hours=5)), + (Minute, Timedelta(hours=2, minutes=3)), + (Second, Timedelta(hours=2, seconds=3)), + (Milli, Timedelta(hours=2, milliseconds=3)), + (Micro, Timedelta(hours=2, microseconds=3)), + (Nano, Timedelta(hours=2, nanoseconds=3)), + ], +) +def test_tick_addition(kls, expected): + offset = kls(3) + result = offset + Timedelta(hours=2) + assert isinstance(result, Timedelta) + assert result == expected + + +@pytest.mark.parametrize("cls", tick_classes) +def test_tick_division(cls): + off = cls(10) + + assert off / cls(5) == 2 + assert off / 2 == cls(5) + assert off / 2.0 == cls(5) + + assert off / off.delta == 1 + assert off / off.delta.to_timedelta64() == 1 + + assert off / Nano(1) == off.delta / Nano(1).delta + + if cls is not Nano: + # A case where we end up with a smaller class + result = off / 1000 + assert isinstance(result, offsets.Tick) + assert not isinstance(result, cls) + assert result.delta == off.delta / 1000 + + if cls._inc < Timedelta(seconds=1): + # Case where we end up with a bigger class + result = off / 0.001 + assert isinstance(result, offsets.Tick) + assert not isinstance(result, cls) + assert result.delta == off.delta / 0.001 + + +@pytest.mark.parametrize("cls", tick_classes) +def test_tick_rdiv(cls): + off = cls(10) + delta = off.delta + td64 = delta.to_timedelta64() + + with pytest.raises(TypeError): + 2 / off + with pytest.raises(TypeError): + 2.0 / off + + assert (td64 * 2.5) / off == 2.5 + + if cls is not Nano: + # skip pytimedelta for Nano since it gets dropped + assert (delta.to_pytimedelta() * 2) / off == 2 + + result = np.array([2 * td64, td64]) / off + expected = np.array([2.0, 1.0]) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("cls1", tick_classes) +@pytest.mark.parametrize("cls2", tick_classes) +def test_tick_zero(cls1, cls2): + assert cls1(0) == cls2(0) + assert cls1(0) + cls2(0) == cls1(0) + + if cls1 is not Nano: + assert cls1(2) + cls2(0) == cls1(2) + + if cls1 is Nano: + assert cls1(2) + Nano(0) == cls1(2) + + +@pytest.mark.parametrize("cls", tick_classes) +def test_tick_equalities(cls): + assert cls() == cls(1) + + +@pytest.mark.parametrize("cls", tick_classes) +def test_tick_offset(cls): + assert not cls().is_anchored() + + +@pytest.mark.parametrize("cls", tick_classes) +def test_compare_ticks(cls): + three = cls(3) + four = cls(4) + + assert three < cls(4) + assert cls(3) < four + assert four > cls(3) + assert cls(4) > three + assert cls(3) == cls(3) + assert cls(3) != cls(4) + + +@pytest.mark.parametrize("cls", tick_classes) +def test_compare_ticks_to_strs(cls): + # GH#23524 + off = cls(19) + + # These tests should work with any strings, but we particularly are + # interested in "infer" as that comparison is convenient to make in + # Datetime/Timedelta Array/Index constructors + assert not off == "infer" + assert not "foo" == off + + for left, right in [("infer", off), (off, "infer")]: + with pytest.raises(TypeError): + left < right + with pytest.raises(TypeError): + left <= right + with pytest.raises(TypeError): + left > right + with pytest.raises(TypeError): + left >= right diff --git a/pandas/tests/tseries/offsets/test_yqm_offsets.py b/pandas/tests/tseries/offsets/test_yqm_offsets.py new file mode 100644 index 00000000..79a0e0f2 --- /dev/null +++ b/pandas/tests/tseries/offsets/test_yqm_offsets.py @@ -0,0 +1,1464 @@ +""" +Tests for Year, Quarter, and Month-based DateOffset subclasses +""" +from datetime import datetime + +import pytest + +import pandas as pd +from pandas import Timestamp + +from pandas.tseries.offsets import ( + BMonthBegin, + BMonthEnd, + BQuarterBegin, + BQuarterEnd, + BYearBegin, + BYearEnd, + MonthBegin, + MonthEnd, + QuarterBegin, + QuarterEnd, + YearBegin, + YearEnd, +) + +from .common import assert_is_on_offset, assert_offset_equal +from .test_offsets import Base + +# -------------------------------------------------------------------- +# Misc + + +def test_quarterly_dont_normalize(): + date = datetime(2012, 3, 31, 5, 30) + + offsets = (QuarterBegin, QuarterEnd, BQuarterEnd, BQuarterBegin) + + for klass in offsets: + result = date + klass() + assert result.time() == date.time() + + +@pytest.mark.parametrize("n", [-2, 1]) +@pytest.mark.parametrize( + "cls", + [ + MonthBegin, + MonthEnd, + BMonthBegin, + BMonthEnd, + QuarterBegin, + QuarterEnd, + BQuarterBegin, + BQuarterEnd, + YearBegin, + YearEnd, + BYearBegin, + BYearEnd, + ], +) +def test_apply_index(cls, n): + offset = cls(n=n) + rng = pd.date_range(start="1/1/2000", periods=100000, freq="T") + ser = pd.Series(rng) + + res = rng + offset + res_v2 = offset.apply_index(rng) + assert (res == res_v2).all() + assert res[0] == rng[0] + offset + assert res[-1] == rng[-1] + offset + res2 = ser + offset + # apply_index is only for indexes, not series, so no res2_v2 + assert res2.iloc[0] == ser.iloc[0] + offset + assert res2.iloc[-1] == ser.iloc[-1] + offset + + +@pytest.mark.parametrize( + "offset", [QuarterBegin(), QuarterEnd(), BQuarterBegin(), BQuarterEnd()] +) +def test_on_offset(offset): + dates = [ + datetime(2016, m, d) + for m in [10, 11, 12] + for d in [1, 2, 3, 28, 29, 30, 31] + if not (m == 11 and d == 31) + ] + for date in dates: + res = offset.is_on_offset(date) + slow_version = date == (date + offset) - offset + assert res == slow_version + + +# -------------------------------------------------------------------- +# Months + + +class TestMonthBegin(Base): + _offset = MonthBegin + + offset_cases = [] + # NOTE: I'm not entirely happy with the logic here for Begin -ss + # see thread 'offset conventions' on the ML + offset_cases.append( + ( + MonthBegin(), + { + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 2, 1): datetime(2008, 3, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2006, 12, 1): datetime(2007, 1, 1), + datetime(2007, 1, 31): datetime(2007, 2, 1), + }, + ) + ) + + offset_cases.append( + ( + MonthBegin(0), + { + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2006, 12, 3): datetime(2007, 1, 1), + datetime(2007, 1, 31): datetime(2007, 2, 1), + }, + ) + ) + + offset_cases.append( + ( + MonthBegin(2), + { + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 1, 31): datetime(2008, 3, 1), + datetime(2006, 12, 31): datetime(2007, 2, 1), + datetime(2007, 12, 28): datetime(2008, 2, 1), + datetime(2007, 1, 1): datetime(2007, 3, 1), + datetime(2006, 11, 1): datetime(2007, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + MonthBegin(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 1), + datetime(2008, 5, 31): datetime(2008, 5, 1), + datetime(2008, 12, 31): datetime(2008, 12, 1), + datetime(2006, 12, 29): datetime(2006, 12, 1), + datetime(2006, 1, 2): datetime(2006, 1, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + +class TestMonthEnd(Base): + _offset = MonthEnd + + def test_day_of_month(self): + dt = datetime(2007, 1, 1) + offset = MonthEnd() + + result = dt + offset + assert result == Timestamp(2007, 1, 31) + + result = result + offset + assert result == Timestamp(2007, 2, 28) + + def test_normalize(self): + dt = datetime(2007, 1, 1, 3) + + result = dt + MonthEnd(normalize=True) + expected = dt.replace(hour=0) + MonthEnd() + assert result == expected + + offset_cases = [] + offset_cases.append( + ( + MonthEnd(), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2007, 1, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + datetime(2006, 12, 1): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + MonthEnd(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2006, 12, 29): datetime(2006, 12, 31), + datetime(2006, 12, 31): datetime(2006, 12, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + }, + ) + ) + + offset_cases.append( + ( + MonthEnd(2), + { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 3, 31), + datetime(2006, 12, 29): datetime(2007, 1, 31), + datetime(2006, 12, 31): datetime(2007, 2, 28), + datetime(2007, 1, 1): datetime(2007, 2, 28), + datetime(2006, 11, 1): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + MonthEnd(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2008, 5, 31), + datetime(2008, 12, 31): datetime(2008, 11, 30), + datetime(2006, 12, 29): datetime(2006, 11, 30), + datetime(2006, 12, 30): datetime(2006, 11, 30), + datetime(2007, 1, 1): datetime(2006, 12, 31), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (MonthEnd(), datetime(2007, 12, 31), True), + (MonthEnd(), datetime(2008, 1, 1), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +class TestBMonthBegin(Base): + _offset = BMonthBegin + + def test_offsets_compare_equal(self): + # root cause of #456 + offset1 = BMonthBegin() + offset2 = BMonthBegin() + assert not offset1 != offset2 + + offset_cases = [] + offset_cases.append( + ( + BMonthBegin(), + { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2006, 9, 1): datetime(2006, 10, 2), + datetime(2007, 1, 1): datetime(2007, 2, 1), + datetime(2006, 12, 1): datetime(2007, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + BMonthBegin(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2006, 10, 2): datetime(2006, 10, 2), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2006, 12, 29): datetime(2007, 1, 1), + datetime(2006, 12, 31): datetime(2007, 1, 1), + datetime(2006, 9, 15): datetime(2006, 10, 2), + }, + ) + ) + + offset_cases.append( + ( + BMonthBegin(2), + { + datetime(2008, 1, 1): datetime(2008, 3, 3), + datetime(2008, 1, 15): datetime(2008, 3, 3), + datetime(2006, 12, 29): datetime(2007, 2, 1), + datetime(2006, 12, 31): datetime(2007, 2, 1), + datetime(2007, 1, 1): datetime(2007, 3, 1), + datetime(2006, 11, 1): datetime(2007, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + BMonthBegin(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 1), + datetime(2008, 6, 30): datetime(2008, 6, 2), + datetime(2008, 6, 1): datetime(2008, 5, 1), + datetime(2008, 3, 10): datetime(2008, 3, 3), + datetime(2008, 12, 31): datetime(2008, 12, 1), + datetime(2006, 12, 29): datetime(2006, 12, 1), + datetime(2006, 12, 30): datetime(2006, 12, 1), + datetime(2007, 1, 1): datetime(2006, 12, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (BMonthBegin(), datetime(2007, 12, 31), False), + (BMonthBegin(), datetime(2008, 1, 1), True), + (BMonthBegin(), datetime(2001, 4, 2), True), + (BMonthBegin(), datetime(2008, 3, 3), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +class TestBMonthEnd(Base): + _offset = BMonthEnd + + def test_normalize(self): + dt = datetime(2007, 1, 1, 3) + + result = dt + BMonthEnd(normalize=True) + expected = dt.replace(hour=0) + BMonthEnd() + assert result == expected + + def test_offsets_compare_equal(self): + # root cause of #456 + offset1 = BMonthEnd() + offset2 = BMonthEnd() + assert not offset1 != offset2 + + offset_cases = [] + offset_cases.append( + ( + BMonthEnd(), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2006, 12, 29): datetime(2007, 1, 31), + datetime(2006, 12, 31): datetime(2007, 1, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + datetime(2006, 12, 1): datetime(2006, 12, 29), + }, + ) + ) + + offset_cases.append( + ( + BMonthEnd(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2006, 12, 29): datetime(2006, 12, 29), + datetime(2006, 12, 31): datetime(2007, 1, 31), + datetime(2007, 1, 1): datetime(2007, 1, 31), + }, + ) + ) + + offset_cases.append( + ( + BMonthEnd(2), + { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 3, 31), + datetime(2006, 12, 29): datetime(2007, 2, 28), + datetime(2006, 12, 31): datetime(2007, 2, 28), + datetime(2007, 1, 1): datetime(2007, 2, 28), + datetime(2006, 11, 1): datetime(2006, 12, 29), + }, + ) + ) + + offset_cases.append( + ( + BMonthEnd(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 29), + datetime(2008, 6, 30): datetime(2008, 5, 30), + datetime(2008, 12, 31): datetime(2008, 11, 28), + datetime(2006, 12, 29): datetime(2006, 11, 30), + datetime(2006, 12, 30): datetime(2006, 12, 29), + datetime(2007, 1, 1): datetime(2006, 12, 29), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (BMonthEnd(), datetime(2007, 12, 31), True), + (BMonthEnd(), datetime(2008, 1, 1), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +# -------------------------------------------------------------------- +# Quarters + + +class TestQuarterBegin(Base): + def test_repr(self): + expected = "" + assert repr(QuarterBegin()) == expected + expected = "" + assert repr(QuarterBegin(startingMonth=3)) == expected + expected = "" + assert repr(QuarterBegin(startingMonth=1)) == expected + + def test_is_anchored(self): + assert QuarterBegin(startingMonth=1).is_anchored() + assert QuarterBegin().is_anchored() + assert not QuarterBegin(2, startingMonth=1).is_anchored() + + def test_offset_corner_case(self): + # corner + offset = QuarterBegin(n=-1, startingMonth=1) + assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 1) + + offset_cases = [] + offset_cases.append( + ( + QuarterBegin(startingMonth=1), + { + datetime(2007, 12, 1): datetime(2008, 1, 1), + datetime(2008, 1, 1): datetime(2008, 4, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2008, 3, 31): datetime(2008, 4, 1), + datetime(2008, 4, 15): datetime(2008, 7, 1), + datetime(2008, 4, 1): datetime(2008, 7, 1), + }, + ) + ) + + offset_cases.append( + ( + QuarterBegin(startingMonth=2), + { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 1, 15): datetime(2008, 2, 1), + datetime(2008, 2, 29): datetime(2008, 5, 1), + datetime(2008, 3, 15): datetime(2008, 5, 1), + datetime(2008, 3, 31): datetime(2008, 5, 1), + datetime(2008, 4, 15): datetime(2008, 5, 1), + datetime(2008, 4, 30): datetime(2008, 5, 1), + }, + ) + ) + + offset_cases.append( + ( + QuarterBegin(startingMonth=1, n=0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 12, 1): datetime(2009, 1, 1), + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2008, 3, 31): datetime(2008, 4, 1), + datetime(2008, 4, 15): datetime(2008, 7, 1), + datetime(2008, 4, 30): datetime(2008, 7, 1), + }, + ) + ) + + offset_cases.append( + ( + QuarterBegin(startingMonth=1, n=-1), + { + datetime(2008, 1, 1): datetime(2007, 10, 1), + datetime(2008, 1, 31): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 1, 1), + datetime(2008, 2, 29): datetime(2008, 1, 1), + datetime(2008, 3, 15): datetime(2008, 1, 1), + datetime(2008, 3, 31): datetime(2008, 1, 1), + datetime(2008, 4, 15): datetime(2008, 4, 1), + datetime(2008, 4, 30): datetime(2008, 4, 1), + datetime(2008, 7, 1): datetime(2008, 4, 1), + }, + ) + ) + + offset_cases.append( + ( + QuarterBegin(startingMonth=1, n=2), + { + datetime(2008, 1, 1): datetime(2008, 7, 1), + datetime(2008, 2, 15): datetime(2008, 7, 1), + datetime(2008, 2, 29): datetime(2008, 7, 1), + datetime(2008, 3, 15): datetime(2008, 7, 1), + datetime(2008, 3, 31): datetime(2008, 7, 1), + datetime(2008, 4, 15): datetime(2008, 10, 1), + datetime(2008, 4, 1): datetime(2008, 10, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + +class TestQuarterEnd(Base): + _offset = QuarterEnd + + def test_repr(self): + expected = "" + assert repr(QuarterEnd()) == expected + expected = "" + assert repr(QuarterEnd(startingMonth=3)) == expected + expected = "" + assert repr(QuarterEnd(startingMonth=1)) == expected + + def test_is_anchored(self): + assert QuarterEnd(startingMonth=1).is_anchored() + assert QuarterEnd().is_anchored() + assert not QuarterEnd(2, startingMonth=1).is_anchored() + + def test_offset_corner_case(self): + # corner + offset = QuarterEnd(n=-1, startingMonth=1) + assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 31) + + offset_cases = [] + offset_cases.append( + ( + QuarterEnd(startingMonth=1), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 4, 30), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 7, 31), + }, + ) + ) + + offset_cases.append( + ( + QuarterEnd(startingMonth=2), + { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2008, 2, 15): datetime(2008, 2, 29), + datetime(2008, 2, 29): datetime(2008, 5, 31), + datetime(2008, 3, 15): datetime(2008, 5, 31), + datetime(2008, 3, 31): datetime(2008, 5, 31), + datetime(2008, 4, 15): datetime(2008, 5, 31), + datetime(2008, 4, 30): datetime(2008, 5, 31), + }, + ) + ) + + offset_cases.append( + ( + QuarterEnd(startingMonth=1, n=0), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 4, 30), + }, + ) + ) + + offset_cases.append( + ( + QuarterEnd(startingMonth=1, n=-1), + { + datetime(2008, 1, 1): datetime(2007, 10, 31), + datetime(2008, 1, 31): datetime(2007, 10, 31), + datetime(2008, 2, 15): datetime(2008, 1, 31), + datetime(2008, 2, 29): datetime(2008, 1, 31), + datetime(2008, 3, 15): datetime(2008, 1, 31), + datetime(2008, 3, 31): datetime(2008, 1, 31), + datetime(2008, 4, 15): datetime(2008, 1, 31), + datetime(2008, 4, 30): datetime(2008, 1, 31), + datetime(2008, 7, 1): datetime(2008, 4, 30), + }, + ) + ) + + offset_cases.append( + ( + QuarterEnd(startingMonth=1, n=2), + { + datetime(2008, 1, 31): datetime(2008, 7, 31), + datetime(2008, 2, 15): datetime(2008, 7, 31), + datetime(2008, 2, 29): datetime(2008, 7, 31), + datetime(2008, 3, 15): datetime(2008, 7, 31), + datetime(2008, 3, 31): datetime(2008, 7, 31), + datetime(2008, 4, 15): datetime(2008, 7, 31), + datetime(2008, 4, 30): datetime(2008, 10, 31), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (QuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), + (QuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), + (QuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), + (QuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), + (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), + (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 31), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), + (QuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), + (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 31), True), + (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), + (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), + (QuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), True), + (QuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 31), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +class TestBQuarterBegin(Base): + _offset = BQuarterBegin + + def test_repr(self): + expected = "" + assert repr(BQuarterBegin()) == expected + expected = "" + assert repr(BQuarterBegin(startingMonth=3)) == expected + expected = "" + assert repr(BQuarterBegin(startingMonth=1)) == expected + + def test_is_anchored(self): + assert BQuarterBegin(startingMonth=1).is_anchored() + assert BQuarterBegin().is_anchored() + assert not BQuarterBegin(2, startingMonth=1).is_anchored() + + def test_offset_corner_case(self): + # corner + offset = BQuarterBegin(n=-1, startingMonth=1) + assert datetime(2007, 4, 3) + offset == datetime(2007, 4, 2) + + offset_cases = [] + offset_cases.append( + ( + BQuarterBegin(startingMonth=1), + { + datetime(2008, 1, 1): datetime(2008, 4, 1), + datetime(2008, 1, 31): datetime(2008, 4, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2008, 3, 31): datetime(2008, 4, 1), + datetime(2008, 4, 15): datetime(2008, 7, 1), + datetime(2007, 3, 15): datetime(2007, 4, 2), + datetime(2007, 2, 28): datetime(2007, 4, 2), + datetime(2007, 1, 1): datetime(2007, 4, 2), + datetime(2007, 4, 15): datetime(2007, 7, 2), + datetime(2007, 7, 1): datetime(2007, 7, 2), + datetime(2007, 4, 1): datetime(2007, 4, 2), + datetime(2007, 4, 2): datetime(2007, 7, 2), + datetime(2008, 4, 30): datetime(2008, 7, 1), + }, + ) + ) + + offset_cases.append( + ( + BQuarterBegin(startingMonth=2), + { + datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 1, 15): datetime(2008, 2, 1), + datetime(2008, 2, 29): datetime(2008, 5, 1), + datetime(2008, 3, 15): datetime(2008, 5, 1), + datetime(2008, 3, 31): datetime(2008, 5, 1), + datetime(2008, 4, 15): datetime(2008, 5, 1), + datetime(2008, 8, 15): datetime(2008, 11, 3), + datetime(2008, 9, 15): datetime(2008, 11, 3), + datetime(2008, 11, 1): datetime(2008, 11, 3), + datetime(2008, 4, 30): datetime(2008, 5, 1), + }, + ) + ) + + offset_cases.append( + ( + BQuarterBegin(startingMonth=1, n=0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2007, 12, 31): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 1, 15): datetime(2008, 4, 1), + datetime(2008, 2, 27): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2007, 4, 1): datetime(2007, 4, 2), + datetime(2007, 4, 2): datetime(2007, 4, 2), + datetime(2007, 7, 1): datetime(2007, 7, 2), + datetime(2007, 4, 15): datetime(2007, 7, 2), + datetime(2007, 7, 2): datetime(2007, 7, 2), + }, + ) + ) + + offset_cases.append( + ( + BQuarterBegin(startingMonth=1, n=-1), + { + datetime(2008, 1, 1): datetime(2007, 10, 1), + datetime(2008, 1, 31): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 1, 1), + datetime(2008, 2, 29): datetime(2008, 1, 1), + datetime(2008, 3, 15): datetime(2008, 1, 1), + datetime(2008, 3, 31): datetime(2008, 1, 1), + datetime(2008, 4, 15): datetime(2008, 4, 1), + datetime(2007, 7, 3): datetime(2007, 7, 2), + datetime(2007, 4, 3): datetime(2007, 4, 2), + datetime(2007, 7, 2): datetime(2007, 4, 2), + datetime(2008, 4, 1): datetime(2008, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + BQuarterBegin(startingMonth=1, n=2), + { + datetime(2008, 1, 1): datetime(2008, 7, 1), + datetime(2008, 1, 15): datetime(2008, 7, 1), + datetime(2008, 2, 29): datetime(2008, 7, 1), + datetime(2008, 3, 15): datetime(2008, 7, 1), + datetime(2007, 3, 31): datetime(2007, 7, 2), + datetime(2007, 4, 15): datetime(2007, 10, 1), + datetime(2008, 4, 30): datetime(2008, 10, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + +class TestBQuarterEnd(Base): + _offset = BQuarterEnd + + def test_repr(self): + expected = "" + assert repr(BQuarterEnd()) == expected + expected = "" + assert repr(BQuarterEnd(startingMonth=3)) == expected + expected = "" + assert repr(BQuarterEnd(startingMonth=1)) == expected + + def test_is_anchored(self): + assert BQuarterEnd(startingMonth=1).is_anchored() + assert BQuarterEnd().is_anchored() + assert not BQuarterEnd(2, startingMonth=1).is_anchored() + + def test_offset_corner_case(self): + # corner + offset = BQuarterEnd(n=-1, startingMonth=1) + assert datetime(2010, 1, 31) + offset == datetime(2010, 1, 29) + + offset_cases = [] + offset_cases.append( + ( + BQuarterEnd(startingMonth=1), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 4, 30), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 7, 31), + }, + ) + ) + + offset_cases.append( + ( + BQuarterEnd(startingMonth=2), + { + datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2008, 2, 15): datetime(2008, 2, 29), + datetime(2008, 2, 29): datetime(2008, 5, 30), + datetime(2008, 3, 15): datetime(2008, 5, 30), + datetime(2008, 3, 31): datetime(2008, 5, 30), + datetime(2008, 4, 15): datetime(2008, 5, 30), + datetime(2008, 4, 30): datetime(2008, 5, 30), + }, + ) + ) + + offset_cases.append( + ( + BQuarterEnd(startingMonth=1, n=0), + { + datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 4, 30), + }, + ) + ) + + offset_cases.append( + ( + BQuarterEnd(startingMonth=1, n=-1), + { + datetime(2008, 1, 1): datetime(2007, 10, 31), + datetime(2008, 1, 31): datetime(2007, 10, 31), + datetime(2008, 2, 15): datetime(2008, 1, 31), + datetime(2008, 2, 29): datetime(2008, 1, 31), + datetime(2008, 3, 15): datetime(2008, 1, 31), + datetime(2008, 3, 31): datetime(2008, 1, 31), + datetime(2008, 4, 15): datetime(2008, 1, 31), + datetime(2008, 4, 30): datetime(2008, 1, 31), + }, + ) + ) + + offset_cases.append( + ( + BQuarterEnd(startingMonth=1, n=2), + { + datetime(2008, 1, 31): datetime(2008, 7, 31), + datetime(2008, 2, 15): datetime(2008, 7, 31), + datetime(2008, 2, 29): datetime(2008, 7, 31), + datetime(2008, 3, 15): datetime(2008, 7, 31), + datetime(2008, 3, 31): datetime(2008, 7, 31), + datetime(2008, 4, 15): datetime(2008, 7, 31), + datetime(2008, 4, 30): datetime(2008, 10, 31), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (BQuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), True), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), True), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), True), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +# -------------------------------------------------------------------- +# Years + + +class TestYearBegin(Base): + _offset = YearBegin + + def test_misspecified(self): + with pytest.raises(ValueError, match="Month must go from 1 to 12"): + YearBegin(month=13) + + offset_cases = [] + offset_cases.append( + ( + YearBegin(), + { + datetime(2008, 1, 1): datetime(2009, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2005, 12, 30): datetime(2006, 1, 1), + datetime(2005, 12, 31): datetime(2006, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2005, 12, 30): datetime(2006, 1, 1), + datetime(2005, 12, 31): datetime(2006, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(3), + { + datetime(2008, 1, 1): datetime(2011, 1, 1), + datetime(2008, 6, 30): datetime(2011, 1, 1), + datetime(2008, 12, 31): datetime(2011, 1, 1), + datetime(2005, 12, 30): datetime(2008, 1, 1), + datetime(2005, 12, 31): datetime(2008, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(-1), + { + datetime(2007, 1, 1): datetime(2006, 1, 1), + datetime(2007, 1, 15): datetime(2007, 1, 1), + datetime(2008, 6, 30): datetime(2008, 1, 1), + datetime(2008, 12, 31): datetime(2008, 1, 1), + datetime(2006, 12, 29): datetime(2006, 1, 1), + datetime(2006, 12, 30): datetime(2006, 1, 1), + datetime(2007, 1, 1): datetime(2006, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(-2), + { + datetime(2007, 1, 1): datetime(2005, 1, 1), + datetime(2008, 6, 30): datetime(2007, 1, 1), + datetime(2008, 12, 31): datetime(2007, 1, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(month=4), + { + datetime(2007, 4, 1): datetime(2008, 4, 1), + datetime(2007, 4, 15): datetime(2008, 4, 1), + datetime(2007, 3, 1): datetime(2007, 4, 1), + datetime(2007, 12, 15): datetime(2008, 4, 1), + datetime(2012, 1, 31): datetime(2012, 4, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(0, month=4), + { + datetime(2007, 4, 1): datetime(2007, 4, 1), + datetime(2007, 3, 1): datetime(2007, 4, 1), + datetime(2007, 12, 15): datetime(2008, 4, 1), + datetime(2012, 1, 31): datetime(2012, 4, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(4, month=4), + { + datetime(2007, 4, 1): datetime(2011, 4, 1), + datetime(2007, 4, 15): datetime(2011, 4, 1), + datetime(2007, 3, 1): datetime(2010, 4, 1), + datetime(2007, 12, 15): datetime(2011, 4, 1), + datetime(2012, 1, 31): datetime(2015, 4, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(-1, month=4), + { + datetime(2007, 4, 1): datetime(2006, 4, 1), + datetime(2007, 3, 1): datetime(2006, 4, 1), + datetime(2007, 12, 15): datetime(2007, 4, 1), + datetime(2012, 1, 31): datetime(2011, 4, 1), + }, + ) + ) + + offset_cases.append( + ( + YearBegin(-3, month=4), + { + datetime(2007, 4, 1): datetime(2004, 4, 1), + datetime(2007, 3, 1): datetime(2004, 4, 1), + datetime(2007, 12, 15): datetime(2005, 4, 1), + datetime(2012, 1, 31): datetime(2009, 4, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (YearBegin(), datetime(2007, 1, 3), False), + (YearBegin(), datetime(2008, 1, 1), True), + (YearBegin(), datetime(2006, 12, 31), False), + (YearBegin(), datetime(2006, 1, 2), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +class TestYearEnd(Base): + _offset = YearEnd + + def test_misspecified(self): + with pytest.raises(ValueError, match="Month must go from 1 to 12"): + YearEnd(month=13) + + offset_cases = [] + offset_cases.append( + ( + YearEnd(), + { + datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2009, 12, 31), + datetime(2005, 12, 30): datetime(2005, 12, 31), + datetime(2005, 12, 31): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + YearEnd(0), + { + datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2008, 12, 31), + datetime(2005, 12, 30): datetime(2005, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + YearEnd(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2007, 12, 31), + datetime(2008, 12, 31): datetime(2007, 12, 31), + datetime(2006, 12, 29): datetime(2005, 12, 31), + datetime(2006, 12, 30): datetime(2005, 12, 31), + datetime(2007, 1, 1): datetime(2006, 12, 31), + }, + ) + ) + + offset_cases.append( + ( + YearEnd(-2), + { + datetime(2007, 1, 1): datetime(2005, 12, 31), + datetime(2008, 6, 30): datetime(2006, 12, 31), + datetime(2008, 12, 31): datetime(2006, 12, 31), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (YearEnd(), datetime(2007, 12, 31), True), + (YearEnd(), datetime(2008, 1, 1), False), + (YearEnd(), datetime(2006, 12, 31), True), + (YearEnd(), datetime(2006, 12, 29), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +class TestYearEndDiffMonth(Base): + offset_cases = [] + offset_cases.append( + ( + YearEnd(month=3), + { + datetime(2008, 1, 1): datetime(2008, 3, 31), + datetime(2008, 2, 15): datetime(2008, 3, 31), + datetime(2008, 3, 31): datetime(2009, 3, 31), + datetime(2008, 3, 30): datetime(2008, 3, 31), + datetime(2005, 3, 31): datetime(2006, 3, 31), + datetime(2006, 7, 30): datetime(2007, 3, 31), + }, + ) + ) + + offset_cases.append( + ( + YearEnd(0, month=3), + { + datetime(2008, 1, 1): datetime(2008, 3, 31), + datetime(2008, 2, 28): datetime(2008, 3, 31), + datetime(2008, 3, 31): datetime(2008, 3, 31), + datetime(2005, 3, 30): datetime(2005, 3, 31), + }, + ) + ) + + offset_cases.append( + ( + YearEnd(-1, month=3), + { + datetime(2007, 1, 1): datetime(2006, 3, 31), + datetime(2008, 2, 28): datetime(2007, 3, 31), + datetime(2008, 3, 31): datetime(2007, 3, 31), + datetime(2006, 3, 29): datetime(2005, 3, 31), + datetime(2006, 3, 30): datetime(2005, 3, 31), + datetime(2007, 3, 1): datetime(2006, 3, 31), + }, + ) + ) + + offset_cases.append( + ( + YearEnd(-2, month=3), + { + datetime(2007, 1, 1): datetime(2005, 3, 31), + datetime(2008, 6, 30): datetime(2007, 3, 31), + datetime(2008, 3, 31): datetime(2006, 3, 31), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (YearEnd(month=3), datetime(2007, 3, 31), True), + (YearEnd(month=3), datetime(2008, 1, 1), False), + (YearEnd(month=3), datetime(2006, 3, 31), True), + (YearEnd(month=3), datetime(2006, 3, 29), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +class TestBYearBegin(Base): + _offset = BYearBegin + + def test_misspecified(self): + msg = "Month must go from 1 to 12" + with pytest.raises(ValueError, match=msg): + BYearBegin(month=13) + with pytest.raises(ValueError, match=msg): + BYearEnd(month=13) + + offset_cases = [] + offset_cases.append( + ( + BYearBegin(), + { + datetime(2008, 1, 1): datetime(2009, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2011, 1, 1): datetime(2011, 1, 3), + datetime(2011, 1, 3): datetime(2012, 1, 2), + datetime(2005, 12, 30): datetime(2006, 1, 2), + datetime(2005, 12, 31): datetime(2006, 1, 2), + }, + ) + ) + + offset_cases.append( + ( + BYearBegin(0), + { + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2005, 12, 30): datetime(2006, 1, 2), + datetime(2005, 12, 31): datetime(2006, 1, 2), + }, + ) + ) + + offset_cases.append( + ( + BYearBegin(-1), + { + datetime(2007, 1, 1): datetime(2006, 1, 2), + datetime(2009, 1, 4): datetime(2009, 1, 1), + datetime(2009, 1, 1): datetime(2008, 1, 1), + datetime(2008, 6, 30): datetime(2008, 1, 1), + datetime(2008, 12, 31): datetime(2008, 1, 1), + datetime(2006, 12, 29): datetime(2006, 1, 2), + datetime(2006, 12, 30): datetime(2006, 1, 2), + datetime(2006, 1, 1): datetime(2005, 1, 3), + }, + ) + ) + + offset_cases.append( + ( + BYearBegin(-2), + { + datetime(2007, 1, 1): datetime(2005, 1, 3), + datetime(2007, 6, 30): datetime(2006, 1, 2), + datetime(2008, 12, 31): datetime(2007, 1, 1), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + +class TestBYearEnd(Base): + _offset = BYearEnd + + offset_cases = [] + offset_cases.append( + ( + BYearEnd(), + { + datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2009, 12, 31), + datetime(2005, 12, 30): datetime(2006, 12, 29), + datetime(2005, 12, 31): datetime(2006, 12, 29), + }, + ) + ) + + offset_cases.append( + ( + BYearEnd(0), + { + datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2008, 12, 31), + datetime(2005, 12, 31): datetime(2006, 12, 29), + }, + ) + ) + + offset_cases.append( + ( + BYearEnd(-1), + { + datetime(2007, 1, 1): datetime(2006, 12, 29), + datetime(2008, 6, 30): datetime(2007, 12, 31), + datetime(2008, 12, 31): datetime(2007, 12, 31), + datetime(2006, 12, 29): datetime(2005, 12, 30), + datetime(2006, 12, 30): datetime(2006, 12, 29), + datetime(2007, 1, 1): datetime(2006, 12, 29), + }, + ) + ) + + offset_cases.append( + ( + BYearEnd(-2), + { + datetime(2007, 1, 1): datetime(2005, 12, 30), + datetime(2008, 6, 30): datetime(2006, 12, 29), + datetime(2008, 12, 31): datetime(2006, 12, 29), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [ + (BYearEnd(), datetime(2007, 12, 31), True), + (BYearEnd(), datetime(2008, 1, 1), False), + (BYearEnd(), datetime(2006, 12, 31), False), + (BYearEnd(), datetime(2006, 12, 29), True), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) + + +class TestBYearEndLagged(Base): + _offset = BYearEnd + + def test_bad_month_fail(self): + msg = "Month must go from 1 to 12" + with pytest.raises(ValueError, match=msg): + BYearEnd(month=13) + with pytest.raises(ValueError, match=msg): + BYearEnd(month=0) + + offset_cases = [] + offset_cases.append( + ( + BYearEnd(month=6), + { + datetime(2008, 1, 1): datetime(2008, 6, 30), + datetime(2007, 6, 30): datetime(2008, 6, 30), + }, + ) + ) + + offset_cases.append( + ( + BYearEnd(n=-1, month=6), + { + datetime(2008, 1, 1): datetime(2007, 6, 29), + datetime(2007, 6, 30): datetime(2007, 6, 29), + }, + ) + ) + + @pytest.mark.parametrize("case", offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in cases.items(): + assert_offset_equal(offset, base, expected) + + def test_roll(self): + offset = BYearEnd(month=6) + date = datetime(2009, 11, 30) + + assert offset.rollforward(date) == datetime(2010, 6, 30) + assert offset.rollback(date) == datetime(2009, 6, 30) + + on_offset_cases = [ + (BYearEnd(month=2), datetime(2007, 2, 28), True), + (BYearEnd(month=6), datetime(2007, 6, 30), False), + ] + + @pytest.mark.parametrize("case", on_offset_cases) + def test_is_on_offset(self, case): + offset, dt, expected = case + assert_is_on_offset(offset, dt, expected) diff --git a/pandas/tests/tslibs/__init__.py b/pandas/tests/tslibs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py new file mode 100644 index 00000000..7a8a6d51 --- /dev/null +++ b/pandas/tests/tslibs/test_api.py @@ -0,0 +1,47 @@ +"""Tests that the tslibs API is locked down""" + +from pandas._libs import tslibs + + +def test_namespace(): + + submodules = [ + "c_timestamp", + "ccalendar", + "conversion", + "fields", + "frequencies", + "nattype", + "np_datetime", + "offsets", + "parsing", + "period", + "resolution", + "strptime", + "timedeltas", + "timestamps", + "timezones", + "tzconversion", + ] + + api = [ + "NaT", + "NaTType", + "iNaT", + "is_null_datetimelike", + "NullFrequencyError", + "OutOfBoundsDatetime", + "Period", + "IncompatibleFrequency", + "Timedelta", + "Timestamp", + "delta_to_nanoseconds", + "ints_to_pytimedelta", + "localize_pydatetime", + "normalize_date", + "tz_convert_single", + ] + + expected = set(submodules + api) + names = [x for x in dir(tslibs) if not x.startswith("__")] + assert set(names) == expected diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py new file mode 100644 index 00000000..a40fcd72 --- /dev/null +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -0,0 +1,197 @@ +from datetime import date, datetime + +from dateutil.tz.tz import tzoffset +import numpy as np +import pytest +import pytz + +from pandas._libs import iNaT, tslib +from pandas.compat.numpy import np_array_datetime64_compat + +from pandas import Timestamp +import pandas._testing as tm + + +@pytest.mark.parametrize( + "data,expected", + [ + ( + ["01-01-2013", "01-02-2013"], + [ + "2013-01-01T00:00:00.000000000-0000", + "2013-01-02T00:00:00.000000000-0000", + ], + ), + ( + ["Mon Sep 16 2013", "Tue Sep 17 2013"], + [ + "2013-09-16T00:00:00.000000000-0000", + "2013-09-17T00:00:00.000000000-0000", + ], + ), + ], +) +def test_parsing_valid_dates(data, expected): + arr = np.array(data, dtype=object) + result, _ = tslib.array_to_datetime(arr) + + expected = np_array_datetime64_compat(expected, dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "dt_string, expected_tz", + [ + ["01-01-2013 08:00:00+08:00", 480], + ["2013-01-01T08:00:00.000000000+0800", 480], + ["2012-12-31T16:00:00.000000000-0800", -480], + ["12-31-2012 23:00:00-01:00", -60], + ], +) +def test_parsing_timezone_offsets(dt_string, expected_tz): + # All of these datetime strings with offsets are equivalent + # to the same datetime after the timezone offset is added. + arr = np.array(["01-01-2013 00:00:00"], dtype=object) + expected, _ = tslib.array_to_datetime(arr) + + arr = np.array([dt_string], dtype=object) + result, result_tz = tslib.array_to_datetime(arr) + + tm.assert_numpy_array_equal(result, expected) + assert result_tz is pytz.FixedOffset(expected_tz) + + +def test_parsing_non_iso_timezone_offset(): + dt_string = "01-01-2013T00:00:00.000000000+0000" + arr = np.array([dt_string], dtype=object) + + result, result_tz = tslib.array_to_datetime(arr) + expected = np.array([np.datetime64("2013-01-01 00:00:00.000000000")]) + + tm.assert_numpy_array_equal(result, expected) + assert result_tz is pytz.FixedOffset(0) + + +def test_parsing_different_timezone_offsets(): + # see gh-17697 + data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] + data = np.array(data, dtype=object) + + result, result_tz = tslib.array_to_datetime(data) + expected = np.array( + [ + datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), + datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 23400)), + ], + dtype=object, + ) + + tm.assert_numpy_array_equal(result, expected) + assert result_tz is None + + +@pytest.mark.parametrize( + "data", [["-352.737091", "183.575577"], ["1", "2", "3", "4", "5"]] +) +def test_number_looking_strings_not_into_datetime(data): + # see gh-4601 + # + # These strings don't look like datetimes, so + # they shouldn't be attempted to be converted. + arr = np.array(data, dtype=object) + result, _ = tslib.array_to_datetime(arr, errors="ignore") + + tm.assert_numpy_array_equal(result, arr) + + +@pytest.mark.parametrize( + "invalid_date", + [ + date(1000, 1, 1), + datetime(1000, 1, 1), + "1000-01-01", + "Jan 1, 1000", + np.datetime64("1000-01-01"), + ], +) +@pytest.mark.parametrize("errors", ["coerce", "raise"]) +def test_coerce_outside_ns_bounds(invalid_date, errors): + arr = np.array([invalid_date], dtype="object") + kwargs = dict(values=arr, errors=errors) + + if errors == "raise": + msg = "Out of bounds nanosecond timestamp" + + with pytest.raises(ValueError, match=msg): + tslib.array_to_datetime(**kwargs) + else: # coerce. + result, _ = tslib.array_to_datetime(**kwargs) + expected = np.array([iNaT], dtype="M8[ns]") + + tm.assert_numpy_array_equal(result, expected) + + +def test_coerce_outside_ns_bounds_one_valid(): + arr = np.array(["1/1/1000", "1/1/2000"], dtype=object) + result, _ = tslib.array_to_datetime(arr, errors="coerce") + + expected = [iNaT, "2000-01-01T00:00:00.000000000-0000"] + expected = np_array_datetime64_compat(expected, dtype="M8[ns]") + + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("errors", ["ignore", "coerce"]) +def test_coerce_of_invalid_datetimes(errors): + arr = np.array(["01-01-2013", "not_a_date", "1"], dtype=object) + kwargs = dict(values=arr, errors=errors) + + if errors == "ignore": + # Without coercing, the presence of any invalid + # dates prevents any values from being converted. + result, _ = tslib.array_to_datetime(**kwargs) + tm.assert_numpy_array_equal(result, arr) + else: # coerce. + # With coercing, the invalid dates becomes iNaT + result, _ = tslib.array_to_datetime(arr, errors="coerce") + expected = ["2013-01-01T00:00:00.000000000-0000", iNaT, iNaT] + + tm.assert_numpy_array_equal( + result, np_array_datetime64_compat(expected, dtype="M8[ns]") + ) + + +def test_to_datetime_barely_out_of_bounds(): + # see gh-19382, gh-19529 + # + # Close enough to bounds that dropping nanos + # would result in an in-bounds datetime. + arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object) + msg = "Out of bounds nanosecond timestamp: 2262-04-11 23:47:16" + + with pytest.raises(tslib.OutOfBoundsDatetime, match=msg): + tslib.array_to_datetime(arr) + + +class SubDatetime(datetime): + pass + + +@pytest.mark.parametrize( + "data,expected", + [ + ([SubDatetime(2000, 1, 1)], ["2000-01-01T00:00:00.000000000-0000"]), + ([datetime(2000, 1, 1)], ["2000-01-01T00:00:00.000000000-0000"]), + ([Timestamp(2000, 1, 1)], ["2000-01-01T00:00:00.000000000-0000"]), + ], +) +def test_datetime_subclass(data, expected): + # GH 25851 + # ensure that subclassed datetime works with + # array_to_datetime + + arr = np.array(data, dtype=object) + result, _ = tslib.array_to_datetime(arr) + + expected = np_array_datetime64_compat(expected, dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/tslibs/test_ccalendar.py b/pandas/tests/tslibs/test_ccalendar.py new file mode 100644 index 00000000..6f6e3241 --- /dev/null +++ b/pandas/tests/tslibs/test_ccalendar.py @@ -0,0 +1,27 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas._libs.tslibs import ccalendar + + +@pytest.mark.parametrize( + "date_tuple,expected", + [ + ((2001, 3, 1), 60), + ((2004, 3, 1), 61), + ((1907, 12, 31), 365), # End-of-year, non-leap year. + ((2004, 12, 31), 366), # End-of-year, leap year. + ], +) +def test_get_day_of_year_numeric(date_tuple, expected): + assert ccalendar.get_day_of_year(*date_tuple) == expected + + +def test_get_day_of_year_dt(): + dt = datetime.fromordinal(1 + np.random.randint(365 * 4000)) + result = ccalendar.get_day_of_year(dt.year, dt.month, dt.day) + + expected = (dt - dt.replace(month=1, day=1)).days + 1 + assert result == expected diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py new file mode 100644 index 00000000..2beeae85 --- /dev/null +++ b/pandas/tests/tslibs/test_conversion.py @@ -0,0 +1,100 @@ +from datetime import datetime + +import numpy as np +import pytest +from pytz import UTC + +from pandas._libs.tslib import iNaT +from pandas._libs.tslibs import conversion, timezones, tzconversion + +from pandas import Timestamp, date_range +import pandas._testing as tm + + +def _compare_utc_to_local(tz_didx): + def f(x): + return conversion.tz_convert_single(x, UTC, tz_didx.tz) + + result = tzconversion.tz_convert(tz_didx.asi8, UTC, tz_didx.tz) + expected = np.vectorize(f)(tz_didx.asi8) + + tm.assert_numpy_array_equal(result, expected) + + +def _compare_local_to_utc(tz_didx, utc_didx): + def f(x): + return conversion.tz_convert_single(x, tz_didx.tz, UTC) + + result = tzconversion.tz_convert(utc_didx.asi8, tz_didx.tz, UTC) + expected = np.vectorize(f)(utc_didx.asi8) + + tm.assert_numpy_array_equal(result, expected) + + +def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture): + tz = tz_aware_fixture + tz_didx = date_range("2014-03-01", "2015-01-10", freq="H", tz=tz) + utc_didx = date_range("2014-03-01", "2015-01-10", freq="H") + + _compare_utc_to_local(tz_didx) + _compare_local_to_utc(tz_didx, utc_didx) + + +@pytest.mark.parametrize("freq", ["D", "A"]) +def test_tz_convert_single_matches_tz_convert(tz_aware_fixture, freq): + tz = tz_aware_fixture + tz_didx = date_range("2000-01-01", "2020-01-01", freq=freq, tz=tz) + utc_didx = date_range("2000-01-01", "2020-01-01", freq=freq) + + _compare_utc_to_local(tz_didx) + _compare_local_to_utc(tz_didx, utc_didx) + + +@pytest.mark.parametrize( + "arr", + [ + pytest.param(np.array([], dtype=np.int64), id="empty"), + pytest.param(np.array([iNaT], dtype=np.int64), id="all_nat"), + ], +) +def test_tz_convert_corner(arr): + result = tzconversion.tz_convert( + arr, timezones.maybe_get_tz("US/Eastern"), timezones.maybe_get_tz("Asia/Tokyo") + ) + tm.assert_numpy_array_equal(result, arr) + + +@pytest.mark.parametrize("copy", [True, False]) +@pytest.mark.parametrize("dtype", ["M8[ns]", "M8[s]"]) +def test_length_zero_copy(dtype, copy): + arr = np.array([], dtype=dtype) + result = conversion.ensure_datetime64ns(arr, copy=copy) + assert result.base is (None if copy else arr) + + +class SubDatetime(datetime): + pass + + +@pytest.mark.parametrize( + "dt, expected", + [ + pytest.param( + Timestamp("2000-01-01"), Timestamp("2000-01-01", tz=UTC), id="timestamp" + ), + pytest.param( + datetime(2000, 1, 1), datetime(2000, 1, 1, tzinfo=UTC), id="datetime" + ), + pytest.param( + SubDatetime(2000, 1, 1), + SubDatetime(2000, 1, 1, tzinfo=UTC), + id="subclassed_datetime", + ), + ], +) +def test_localize_pydatetime_dt_types(dt, expected): + # GH 25851 + # ensure that subclassed datetime works with + # localize_pydatetime + result = conversion.localize_pydatetime(dt, UTC) + assert result == expected diff --git a/pandas/tests/tslibs/test_fields.py b/pandas/tests/tslibs/test_fields.py new file mode 100644 index 00000000..943f4207 --- /dev/null +++ b/pandas/tests/tslibs/test_fields.py @@ -0,0 +1,31 @@ +import numpy as np + +from pandas._libs.tslibs import fields + +import pandas._testing as tm + + +def test_fields_readonly(): + # https://github.com/vaexio/vaex/issues/357 + # fields functions should't raise when we pass read-only data + dtindex = np.arange(5, dtype=np.int64) * 10 ** 9 * 3600 * 24 * 32 + dtindex.flags.writeable = False + + result = fields.get_date_name_field(dtindex, "month_name") + expected = np.array( + ["January", "February", "March", "April", "May"], dtype=np.object + ) + tm.assert_numpy_array_equal(result, expected) + + result = fields.get_date_field(dtindex, "Y") + expected = np.array([1970, 1970, 1970, 1970, 1970], dtype=np.int32) + tm.assert_numpy_array_equal(result, expected) + + result = fields.get_start_end_field(dtindex, "is_month_start", None) + expected = np.array([True, False, False, False, False], dtype=np.bool_) + tm.assert_numpy_array_equal(result, expected) + + # treat dtindex as timedeltas for this next one + result = fields.get_timedelta_field(dtindex, "days") + expected = np.arange(5, dtype=np.int32) * 32 + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/tslibs/test_libfrequencies.py b/pandas/tests/tslibs/test_libfrequencies.py new file mode 100644 index 00000000..5810c7e5 --- /dev/null +++ b/pandas/tests/tslibs/test_libfrequencies.py @@ -0,0 +1,104 @@ +import pytest + +from pandas._libs.tslibs.frequencies import ( + INVALID_FREQ_ERR_MSG, + _period_str_to_code, + get_rule_month, + is_subperiod, + is_superperiod, +) + +from pandas.tseries import offsets + + +@pytest.mark.parametrize( + "obj,expected", + [ + ("W", "DEC"), + (offsets.Week(), "DEC"), + ("D", "DEC"), + (offsets.Day(), "DEC"), + ("Q", "DEC"), + (offsets.QuarterEnd(startingMonth=12), "DEC"), + ("Q-JAN", "JAN"), + (offsets.QuarterEnd(startingMonth=1), "JAN"), + ("A-DEC", "DEC"), + ("Y-DEC", "DEC"), + (offsets.YearEnd(), "DEC"), + ("A-MAY", "MAY"), + ("Y-MAY", "MAY"), + (offsets.YearEnd(month=5), "MAY"), + ], +) +def test_get_rule_month(obj, expected): + result = get_rule_month(obj) + assert result == expected + + +@pytest.mark.parametrize( + "obj,expected", + [ + ("A", 1000), + ("A-DEC", 1000), + ("A-JAN", 1001), + ("Y", 1000), + ("Y-DEC", 1000), + ("Y-JAN", 1001), + ("Q", 2000), + ("Q-DEC", 2000), + ("Q-FEB", 2002), + ("W", 4000), + ("W-SUN", 4000), + ("W-FRI", 4005), + ("Min", 8000), + ("ms", 10000), + ("US", 11000), + ("NS", 12000), + ], +) +def test_period_str_to_code(obj, expected): + assert _period_str_to_code(obj) == expected + + +@pytest.mark.parametrize( + "p1,p2,expected", + [ + # Input validation. + (offsets.MonthEnd(), None, False), + (offsets.YearEnd(), None, False), + (None, offsets.YearEnd(), False), + (None, offsets.MonthEnd(), False), + (None, None, False), + (offsets.YearEnd(), offsets.MonthEnd(), True), + (offsets.Hour(), offsets.Minute(), True), + (offsets.Second(), offsets.Milli(), True), + (offsets.Milli(), offsets.Micro(), True), + (offsets.Micro(), offsets.Nano(), True), + ], +) +def test_super_sub_symmetry(p1, p2, expected): + assert is_superperiod(p1, p2) is expected + assert is_subperiod(p2, p1) is expected + + +@pytest.mark.parametrize( + "freq,expected,aliases", + [ + ("D", 6000, ["DAY", "DLY", "DAILY"]), + ("M", 3000, ["MTH", "MONTH", "MONTHLY"]), + ("N", 12000, ["NANOSECOND", "NANOSECONDLY"]), + ("H", 7000, ["HR", "HOUR", "HRLY", "HOURLY"]), + ("T", 8000, ["minute", "MINUTE", "MINUTELY"]), + ("L", 10000, ["MILLISECOND", "MILLISECONDLY"]), + ("U", 11000, ["MICROSECOND", "MICROSECONDLY"]), + ("S", 9000, ["sec", "SEC", "SECOND", "SECONDLY"]), + ("B", 5000, ["BUS", "BUSINESS", "BUSINESSLY", "WEEKDAY"]), + ], +) +def test_assert_aliases_deprecated(freq, expected, aliases): + assert isinstance(aliases, list) + assert _period_str_to_code(freq) == expected + + for alias in aliases: + with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG): + _period_str_to_code(alias) diff --git a/pandas/tests/tslibs/test_liboffsets.py b/pandas/tests/tslibs/test_liboffsets.py new file mode 100644 index 00000000..6ff2ae66 --- /dev/null +++ b/pandas/tests/tslibs/test_liboffsets.py @@ -0,0 +1,169 @@ +""" +Tests for helper functions in the cython tslibs.offsets +""" +from datetime import datetime + +import pytest + +import pandas._libs.tslibs.offsets as liboffsets +from pandas._libs.tslibs.offsets import roll_qtrday + +from pandas import Timestamp + + +@pytest.fixture(params=["start", "end", "business_start", "business_end"]) +def day_opt(request): + return request.param + + +@pytest.mark.parametrize( + "dt,exp_week_day,exp_last_day", + [ + (datetime(2017, 11, 30), 3, 30), # Business day. + (datetime(1993, 10, 31), 6, 29), # Non-business day. + ], +) +def test_get_last_bday(dt, exp_week_day, exp_last_day): + assert dt.weekday() == exp_week_day + assert liboffsets.get_lastbday(dt.year, dt.month) == exp_last_day + + +@pytest.mark.parametrize( + "dt,exp_week_day,exp_first_day", + [ + (datetime(2017, 4, 1), 5, 3), # Non-weekday. + (datetime(1993, 10, 1), 4, 1), # Business day. + ], +) +def test_get_first_bday(dt, exp_week_day, exp_first_day): + assert dt.weekday() == exp_week_day + assert liboffsets.get_firstbday(dt.year, dt.month) == exp_first_day + + +@pytest.mark.parametrize( + "months,day_opt,expected", + [ + (0, 15, datetime(2017, 11, 15)), + (0, None, datetime(2017, 11, 30)), + (1, "start", datetime(2017, 12, 1)), + (-145, "end", datetime(2005, 10, 31)), + (0, "business_end", datetime(2017, 11, 30)), + (0, "business_start", datetime(2017, 11, 1)), + ], +) +def test_shift_month_dt(months, day_opt, expected): + dt = datetime(2017, 11, 30) + assert liboffsets.shift_month(dt, months, day_opt=day_opt) == expected + + +@pytest.mark.parametrize( + "months,day_opt,expected", + [ + (1, "start", Timestamp("1929-06-01")), + (-3, "end", Timestamp("1929-02-28")), + (25, None, Timestamp("1931-06-5")), + (-1, 31, Timestamp("1929-04-30")), + ], +) +def test_shift_month_ts(months, day_opt, expected): + ts = Timestamp("1929-05-05") + assert liboffsets.shift_month(ts, months, day_opt=day_opt) == expected + + +def test_shift_month_error(): + dt = datetime(2017, 11, 15) + day_opt = "this should raise" + + with pytest.raises(ValueError, match=day_opt): + liboffsets.shift_month(dt, 3, day_opt=day_opt) + + +@pytest.mark.parametrize( + "other,expected", + [ + # Before March 1. + (datetime(2017, 2, 10), {2: 1, -7: -7, 0: 0}), + # After March 1. + (Timestamp("2014-03-15", tz="US/Eastern"), {2: 2, -7: -6, 0: 1}), + ], +) +@pytest.mark.parametrize("n", [2, -7, 0]) +def test_roll_yearday(other, expected, n): + month = 3 + day_opt = "start" # `other` will be compared to March 1. + + assert liboffsets.roll_yearday(other, n, month, day_opt) == expected[n] + + +@pytest.mark.parametrize( + "other,expected", + [ + # Before June 30. + (datetime(1999, 6, 29), {5: 4, -7: -7, 0: 0}), + # After June 30. + (Timestamp(2072, 8, 24, 6, 17, 18), {5: 5, -7: -6, 0: 1}), + ], +) +@pytest.mark.parametrize("n", [5, -7, 0]) +def test_roll_yearday2(other, expected, n): + month = 6 + day_opt = "end" # `other` will be compared to June 30. + + assert liboffsets.roll_yearday(other, n, month, day_opt) == expected[n] + + +def test_get_day_of_month_error(): + # get_day_of_month is not directly exposed. + # We test it via roll_yearday. + dt = datetime(2017, 11, 15) + day_opt = "foo" + + with pytest.raises(ValueError, match=day_opt): + # To hit the raising case we need month == dt.month and n > 0. + liboffsets.roll_yearday(dt, n=3, month=11, day_opt=day_opt) + + +@pytest.mark.parametrize( + "month", + [3, 5], # (other.month % 3) < (month % 3) # (other.month % 3) > (month % 3) +) +@pytest.mark.parametrize("n", [4, -3]) +def test_roll_qtr_day_not_mod_unequal(day_opt, month, n): + expected = {3: {-3: -2, 4: 4}, 5: {-3: -3, 4: 3}} + + other = Timestamp(2072, 10, 1, 6, 17, 18) # Saturday. + assert roll_qtrday(other, n, month, day_opt, modby=3) == expected[month][n] + + +@pytest.mark.parametrize( + "other,month,exp_dict", + [ + # Monday. + (datetime(1999, 5, 31), 2, {-1: {"start": 0, "business_start": 0}}), + # Saturday. + ( + Timestamp(2072, 10, 1, 6, 17, 18), + 4, + {2: {"end": 1, "business_end": 1, "business_start": 1}}, + ), + # First business day. + ( + Timestamp(2072, 10, 3, 6, 17, 18), + 4, + {2: {"end": 1, "business_end": 1}, -1: {"start": 0}}, + ), + ], +) +@pytest.mark.parametrize("n", [2, -1]) +def test_roll_qtr_day_mod_equal(other, month, exp_dict, n, day_opt): + # All cases have (other.month % 3) == (month % 3). + expected = exp_dict.get(n, {}).get(day_opt, n) + assert roll_qtrday(other, n, month, day_opt, modby=3) == expected + + +@pytest.mark.parametrize( + "n,expected", [(42, {29: 42, 1: 42, 31: 41}), (-4, {29: -4, 1: -3, 31: -4})] +) +@pytest.mark.parametrize("compare", [29, 1, 31]) +def test_roll_convention(n, expected, compare): + assert liboffsets.roll_convention(29, n, compare) == expected[compare] diff --git a/pandas/tests/tslibs/test_normalize_date.py b/pandas/tests/tslibs/test_normalize_date.py new file mode 100644 index 00000000..2a41836f --- /dev/null +++ b/pandas/tests/tslibs/test_normalize_date.py @@ -0,0 +1,41 @@ +"""Tests for functions from pandas._libs.tslibs""" + +from datetime import date, datetime + +import pytest + +from pandas._libs import tslibs +from pandas._libs.tslibs.timestamps import Timestamp + + +@pytest.mark.parametrize( + "value,expected", + [ + (date(2012, 9, 7), datetime(2012, 9, 7)), + (datetime(2012, 9, 7, 12), datetime(2012, 9, 7)), + (datetime(2007, 10, 1, 1, 12, 5, 10), datetime(2007, 10, 1)), + ], +) +def test_normalize_date(value, expected): + result = tslibs.normalize_date(value) + assert result == expected + + +class SubDatetime(datetime): + pass + + +@pytest.mark.parametrize( + "dt, expected", + [ + (Timestamp(2000, 1, 1, 1), Timestamp(2000, 1, 1, 0)), + (datetime(2000, 1, 1, 1), datetime(2000, 1, 1, 0)), + (SubDatetime(2000, 1, 1, 1), SubDatetime(2000, 1, 1, 0)), + ], +) +def test_normalize_date_sub_types(dt, expected): + # GH 25851 + # ensure that subclassed datetime works with + # normalize_date + result = tslibs.normalize_date(dt) + assert result == expected diff --git a/pandas/tests/tslibs/test_parse_iso8601.py b/pandas/tests/tslibs/test_parse_iso8601.py new file mode 100644 index 00000000..a58f227c --- /dev/null +++ b/pandas/tests/tslibs/test_parse_iso8601.py @@ -0,0 +1,72 @@ +from datetime import datetime + +import pytest + +from pandas._libs import tslib + + +@pytest.mark.parametrize( + "date_str, exp", + [ + ("2011-01-02", datetime(2011, 1, 2)), + ("2011-1-2", datetime(2011, 1, 2)), + ("2011-01", datetime(2011, 1, 1)), + ("2011-1", datetime(2011, 1, 1)), + ("2011 01 02", datetime(2011, 1, 2)), + ("2011.01.02", datetime(2011, 1, 2)), + ("2011/01/02", datetime(2011, 1, 2)), + ("2011\\01\\02", datetime(2011, 1, 2)), + ("2013-01-01 05:30:00", datetime(2013, 1, 1, 5, 30)), + ("2013-1-1 5:30:00", datetime(2013, 1, 1, 5, 30)), + ], +) +def test_parsers_iso8601(date_str, exp): + # see gh-12060 + # + # Test only the ISO parser - flexibility to + # different separators and leading zero's. + actual = tslib._test_parse_iso8601(date_str) + assert actual == exp + + +@pytest.mark.parametrize( + "date_str", + [ + "2011-01/02", + "2011=11=11", + "201401", + "201111", + "200101", + # Mixed separated and unseparated. + "2005-0101", + "200501-01", + "20010101 12:3456", + "20010101 1234:56", + # HHMMSS must have two digits in + # each component if unseparated. + "20010101 1", + "20010101 123", + "20010101 12345", + "20010101 12345Z", + ], +) +def test_parsers_iso8601_invalid(date_str): + msg = 'Error parsing datetime string "{s}"'.format(s=date_str) + + with pytest.raises(ValueError, match=msg): + tslib._test_parse_iso8601(date_str) + + +def test_parsers_iso8601_invalid_offset_invalid(): + date_str = "2001-01-01 12-34-56" + msg = f'Timezone hours offset out of range in datetime string "{date_str}"' + + with pytest.raises(ValueError, match=msg): + tslib._test_parse_iso8601(date_str) + + +def test_parsers_iso8601_leading_space(): + # GH#25895 make sure isoparser doesn't overflow with long input + date_str, expected = ("2013-1-1 5:30:00", datetime(2013, 1, 1, 5, 30)) + actual = tslib._test_parse_iso8601(" " * 200 + date_str) + assert actual == expected diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py new file mode 100644 index 00000000..36f7ada7 --- /dev/null +++ b/pandas/tests/tslibs/test_parsing.py @@ -0,0 +1,227 @@ +""" +Tests for Timestamp parsing, aimed at pandas/_libs/tslibs/parsing.pyx +""" +from datetime import datetime + +from dateutil.parser import parse +import numpy as np +import pytest + +from pandas._libs.tslibs import parsing +from pandas._libs.tslibs.parsing import parse_time_string +import pandas.util._test_decorators as td + +import pandas._testing as tm + + +def test_parse_time_string(): + (date, parsed, reso) = parse_time_string("4Q1984") + (date_lower, parsed_lower, reso_lower) = parse_time_string("4q1984") + + assert date == date_lower + assert reso == reso_lower + assert parsed == parsed_lower + + +def test_parse_time_string_invalid_type(): + # Raise on invalid input, don't just return it + with pytest.raises(TypeError): + parse_time_string((4, 5)) + + +@pytest.mark.parametrize( + "dashed,normal", [("1988-Q2", "1988Q2"), ("2Q-1988", "2Q1988")] +) +def test_parse_time_quarter_with_dash(dashed, normal): + # see gh-9688 + (date_dash, parsed_dash, reso_dash) = parse_time_string(dashed) + (date, parsed, reso) = parse_time_string(normal) + + assert date_dash == date + assert parsed_dash == parsed + assert reso_dash == reso + + +@pytest.mark.parametrize("dashed", ["-2Q1992", "2-Q1992", "4-4Q1992"]) +def test_parse_time_quarter_with_dash_error(dashed): + msg = "Unknown datetime string format, unable to parse: {dashed}" + + with pytest.raises(parsing.DateParseError, match=msg.format(dashed=dashed)): + parse_time_string(dashed) + + +@pytest.mark.parametrize( + "date_string,expected", + [ + ("123.1234", False), + ("-50000", False), + ("999", False), + ("m", False), + ("T", False), + ("Mon Sep 16, 2013", True), + ("2012-01-01", True), + ("01/01/2012", True), + ("01012012", True), + ("0101", True), + ("1-1", True), + ], +) +def test_does_not_convert_mixed_integer(date_string, expected): + assert parsing._does_string_look_like_datetime(date_string) is expected + + +@pytest.mark.parametrize( + "date_str,kwargs,msg", + [ + ( + "2013Q5", + dict(), + ( + "Incorrect quarterly string is given, " + "quarter must be between 1 and 4: 2013Q5" + ), + ), + # see gh-5418 + ( + "2013Q1", + dict(freq="INVLD-L-DEC-SAT"), + ( + "Unable to retrieve month information " + "from given freq: INVLD-L-DEC-SAT" + ), + ), + ], +) +def test_parsers_quarterly_with_freq_error(date_str, kwargs, msg): + with pytest.raises(parsing.DateParseError, match=msg): + parsing.parse_time_string(date_str, **kwargs) + + +@pytest.mark.parametrize( + "date_str,freq,expected", + [ + ("2013Q2", None, datetime(2013, 4, 1)), + ("2013Q2", "A-APR", datetime(2012, 8, 1)), + ("2013-Q2", "A-DEC", datetime(2013, 4, 1)), + ], +) +def test_parsers_quarterly_with_freq(date_str, freq, expected): + result, _, _ = parsing.parse_time_string(date_str, freq=freq) + assert result == expected + + +@pytest.mark.parametrize( + "date_str", ["2Q 2005", "2Q-200A", "2Q-200", "22Q2005", "2Q200.", "6Q-20"] +) +def test_parsers_quarter_invalid(date_str): + if date_str == "6Q-20": + msg = ( + "Incorrect quarterly string is given, quarter " + "must be between 1 and 4: {date_str}" + ) + else: + msg = "Unknown datetime string format, unable to parse: {date_str}" + + with pytest.raises(ValueError, match=msg.format(date_str=date_str)): + parsing.parse_time_string(date_str) + + +@pytest.mark.parametrize( + "date_str,expected", + [("201101", datetime(2011, 1, 1, 0, 0)), ("200005", datetime(2000, 5, 1, 0, 0))], +) +def test_parsers_month_freq(date_str, expected): + result, _, _ = parsing.parse_time_string(date_str, freq="M") + assert result == expected + + +@td.skip_if_not_us_locale +@pytest.mark.parametrize( + "string,fmt", + [ + ("20111230", "%Y%m%d"), + ("2011-12-30", "%Y-%m-%d"), + ("30-12-2011", "%d-%m-%Y"), + ("2011-12-30 00:00:00", "%Y-%m-%d %H:%M:%S"), + ("2011-12-30T00:00:00", "%Y-%m-%dT%H:%M:%S"), + ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"), + ], +) +def test_guess_datetime_format_with_parseable_formats(string, fmt): + result = parsing._guess_datetime_format(string) + assert result == fmt + + +@pytest.mark.parametrize("dayfirst,expected", [(True, "%d/%m/%Y"), (False, "%m/%d/%Y")]) +def test_guess_datetime_format_with_dayfirst(dayfirst, expected): + ambiguous_string = "01/01/2011" + result = parsing._guess_datetime_format(ambiguous_string, dayfirst=dayfirst) + assert result == expected + + +@td.skip_if_has_locale +@pytest.mark.parametrize( + "string,fmt", + [ + ("30/Dec/2011", "%d/%b/%Y"), + ("30/December/2011", "%d/%B/%Y"), + ("30/Dec/2011 00:00:00", "%d/%b/%Y %H:%M:%S"), + ], +) +def test_guess_datetime_format_with_locale_specific_formats(string, fmt): + result = parsing._guess_datetime_format(string) + assert result == fmt + + +@pytest.mark.parametrize( + "invalid_dt", + [ + "2013", + "01/2013", + "12:00:00", + "1/1/1/1", + "this_is_not_a_datetime", + "51a", + 9, + datetime(2011, 1, 1), + ], +) +def test_guess_datetime_format_invalid_inputs(invalid_dt): + # A datetime string must include a year, month and a day for it to be + # guessable, in addition to being a string that looks like a datetime. + assert parsing._guess_datetime_format(invalid_dt) is None + + +@pytest.mark.parametrize( + "string,fmt", + [ + ("2011-1-1", "%Y-%m-%d"), + ("1/1/2011", "%m/%d/%Y"), + ("30-1-2011", "%d-%m-%Y"), + ("2011-1-1 0:0:0", "%Y-%m-%d %H:%M:%S"), + ("2011-1-3T00:00:0", "%Y-%m-%dT%H:%M:%S"), + ("2011-1-1 00:00:00", "%Y-%m-%d %H:%M:%S"), + ], +) +def test_guess_datetime_format_no_padding(string, fmt): + # see gh-11142 + result = parsing._guess_datetime_format(string) + assert result == fmt + + +def test_try_parse_dates(): + arr = np.array(["5/1/2000", "6/1/2000", "7/1/2000"], dtype=object) + result = parsing.try_parse_dates(arr, dayfirst=True) + + expected = np.array([parse(d, dayfirst=True) for d in arr]) + tm.assert_numpy_array_equal(result, expected) + + +def test_parse_time_string_check_instance_type_raise_exception(): + # issue 20684 + with pytest.raises(TypeError): + parse_time_string((1, 2, 3)) + + result = parse_time_string("2019") + expected = (datetime(2019, 1, 1), datetime(2019, 1, 1), "year") + assert result == expected diff --git a/pandas/tests/tslibs/test_period_asfreq.py b/pandas/tests/tslibs/test_period_asfreq.py new file mode 100644 index 00000000..5497cb65 --- /dev/null +++ b/pandas/tests/tslibs/test_period_asfreq.py @@ -0,0 +1,78 @@ +import pytest + +from pandas._libs.tslibs.frequencies import get_freq +from pandas._libs.tslibs.period import period_asfreq, period_ordinal + + +@pytest.mark.parametrize( + "freq1,freq2,expected", + [ + ("D", "H", 24), + ("D", "T", 1440), + ("D", "S", 86400), + ("D", "L", 86400000), + ("D", "U", 86400000000), + ("D", "N", 86400000000000), + ("H", "T", 60), + ("H", "S", 3600), + ("H", "L", 3600000), + ("H", "U", 3600000000), + ("H", "N", 3600000000000), + ("T", "S", 60), + ("T", "L", 60000), + ("T", "U", 60000000), + ("T", "N", 60000000000), + ("S", "L", 1000), + ("S", "U", 1000000), + ("S", "N", 1000000000), + ("L", "U", 1000), + ("L", "N", 1000000), + ("U", "N", 1000), + ], +) +def test_intra_day_conversion_factors(freq1, freq2, expected): + assert period_asfreq(1, get_freq(freq1), get_freq(freq2), False) == expected + + +@pytest.mark.parametrize( + "freq,expected", [("A", 0), ("M", 0), ("W", 1), ("D", 0), ("B", 0)] +) +def test_period_ordinal_start_values(freq, expected): + # information for Jan. 1, 1970. + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq(freq)) == expected + + +@pytest.mark.parametrize( + "dt,expected", + [ + ((1970, 1, 4, 0, 0, 0, 0, 0), 1), + ((1970, 1, 5, 0, 0, 0, 0, 0), 2), + ((2013, 10, 6, 0, 0, 0, 0, 0), 2284), + ((2013, 10, 7, 0, 0, 0, 0, 0), 2285), + ], +) +def test_period_ordinal_week(dt, expected): + args = dt + (get_freq("W"),) + assert period_ordinal(*args) == expected + + +@pytest.mark.parametrize( + "day,expected", + [ + # Thursday (Oct. 3, 2013). + (3, 11415), + # Friday (Oct. 4, 2013). + (4, 11416), + # Saturday (Oct. 5, 2013). + (5, 11417), + # Sunday (Oct. 6, 2013). + (6, 11417), + # Monday (Oct. 7, 2013). + (7, 11417), + # Tuesday (Oct. 8, 2013). + (8, 11418), + ], +) +def test_period_ordinal_business_day(day, expected): + args = (2013, 10, day, 0, 0, 0, 0, 0, get_freq("B")) + assert period_ordinal(*args) == expected diff --git a/pandas/tests/tslibs/test_timedeltas.py b/pandas/tests/tslibs/test_timedeltas.py new file mode 100644 index 00000000..86d5cc74 --- /dev/null +++ b/pandas/tests/tslibs/test_timedeltas.py @@ -0,0 +1,30 @@ +import numpy as np +import pytest + +from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds + +from pandas import Timedelta, offsets + + +@pytest.mark.parametrize( + "obj,expected", + [ + (np.timedelta64(14, "D"), 14 * 24 * 3600 * 1e9), + (Timedelta(minutes=-7), -7 * 60 * 1e9), + (Timedelta(minutes=-7).to_pytimedelta(), -7 * 60 * 1e9), + (offsets.Nano(125), 125), + (1, 1), + (np.int64(2), 2), + (np.int32(3), 3), + ], +) +def test_delta_to_nanoseconds(obj, expected): + result = delta_to_nanoseconds(obj) + assert result == expected + + +def test_delta_to_nanoseconds_error(): + obj = np.array([123456789], dtype="m8[ns]") + + with pytest.raises(TypeError, match=""): + delta_to_nanoseconds(obj) diff --git a/pandas/tests/tslibs/test_timezones.py b/pandas/tests/tslibs/test_timezones.py new file mode 100644 index 00000000..03cc8fcb --- /dev/null +++ b/pandas/tests/tslibs/test_timezones.py @@ -0,0 +1,108 @@ +from datetime import datetime + +import dateutil.tz +import pytest +import pytz + +from pandas._libs.tslibs import conversion, timezones + +from pandas import Timestamp + + +@pytest.mark.parametrize("tz_name", list(pytz.common_timezones)) +def test_cache_keys_are_distinct_for_pytz_vs_dateutil(tz_name): + if tz_name == "UTC": + pytest.skip("UTC: special case in dateutil") + + tz_p = timezones.maybe_get_tz(tz_name) + tz_d = timezones.maybe_get_tz("dateutil/" + tz_name) + + if tz_d is None: + pytest.skip(tz_name + ": dateutil does not know about this one") + + assert timezones._p_tz_cache_key(tz_p) != timezones._p_tz_cache_key(tz_d) + + +def test_tzlocal_repr(): + # see gh-13583 + ts = Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()) + assert ts.tz == dateutil.tz.tzlocal() + assert "tz='tzlocal()')" in repr(ts) + + +def test_tzlocal_maybe_get_tz(): + # see gh-13583 + tz = timezones.maybe_get_tz("tzlocal()") + assert tz == dateutil.tz.tzlocal() + + +def test_tzlocal_offset(): + # see gh-13583 + # + # Get offset using normal datetime for test. + ts = Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()) + + offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) + offset = offset.total_seconds() * 1000000000 + + assert ts.value + offset == Timestamp("2011-01-01").value + + +@pytest.fixture( + params=[ + (pytz.timezone("US/Eastern"), lambda tz, x: tz.localize(x)), + (dateutil.tz.gettz("US/Eastern"), lambda tz, x: x.replace(tzinfo=tz)), + ] +) +def infer_setup(request): + eastern, localize = request.param + + start_naive = datetime(2001, 1, 1) + end_naive = datetime(2009, 1, 1) + + start = localize(eastern, start_naive) + end = localize(eastern, end_naive) + + return eastern, localize, start, end, start_naive, end_naive + + +def test_infer_tz_compat(infer_setup): + eastern, _, start, end, start_naive, end_naive = infer_setup + + assert ( + timezones.infer_tzinfo(start, end) + is conversion.localize_pydatetime(start_naive, eastern).tzinfo + ) + assert ( + timezones.infer_tzinfo(start, None) + is conversion.localize_pydatetime(start_naive, eastern).tzinfo + ) + assert ( + timezones.infer_tzinfo(None, end) + is conversion.localize_pydatetime(end_naive, eastern).tzinfo + ) + + +def test_infer_tz_utc_localize(infer_setup): + _, _, start, end, start_naive, end_naive = infer_setup + utc = pytz.utc + + start = utc.localize(start_naive) + end = utc.localize(end_naive) + + assert timezones.infer_tzinfo(start, end) is utc + + +@pytest.mark.parametrize("ordered", [True, False]) +def test_infer_tz_mismatch(infer_setup, ordered): + eastern, _, _, _, start_naive, end_naive = infer_setup + msg = "Inputs must both have the same timezone" + + utc = pytz.utc + start = utc.localize(start_naive) + end = conversion.localize_pydatetime(end_naive, eastern) + + args = (start, end) if ordered else (end, start) + + with pytest.raises(AssertionError, match=msg): + timezones.infer_tzinfo(*args) diff --git a/pandas/tests/util/__init__.py b/pandas/tests/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/util/conftest.py b/pandas/tests/util/conftest.py new file mode 100644 index 00000000..5eff49ab --- /dev/null +++ b/pandas/tests/util/conftest.py @@ -0,0 +1,26 @@ +import pytest + + +@pytest.fixture(params=[True, False]) +def check_dtype(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def check_exact(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def check_index_type(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def check_less_precise(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def check_categorical(request): + return request.param diff --git a/pandas/tests/util/test_assert_almost_equal.py b/pandas/tests/util/test_assert_almost_equal.py new file mode 100644 index 00000000..b8048891 --- /dev/null +++ b/pandas/tests/util/test_assert_almost_equal.py @@ -0,0 +1,361 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Index, Series, Timestamp +import pandas._testing as tm + + +def _assert_almost_equal_both(a, b, **kwargs): + """ + Check that two objects are approximately equal. + + This check is performed commutatively. + + Parameters + ---------- + a : object + The first object to compare. + b : object + The second object to compare. + kwargs : dict + The arguments passed to `tm.assert_almost_equal`. + """ + tm.assert_almost_equal(a, b, **kwargs) + tm.assert_almost_equal(b, a, **kwargs) + + +def _assert_not_almost_equal(a, b, **kwargs): + """ + Check that two objects are not approximately equal. + + Parameters + ---------- + a : object + The first object to compare. + b : object + The second object to compare. + kwargs : dict + The arguments passed to `tm.assert_almost_equal`. + """ + try: + tm.assert_almost_equal(a, b, **kwargs) + msg = f"{a} and {b} were approximately equal when they shouldn't have been" + pytest.fail(msg=msg) + except AssertionError: + pass + + +def _assert_not_almost_equal_both(a, b, **kwargs): + """ + Check that two objects are not approximately equal. + + This check is performed commutatively. + + Parameters + ---------- + a : object + The first object to compare. + b : object + The second object to compare. + kwargs : dict + The arguments passed to `tm.assert_almost_equal`. + """ + _assert_not_almost_equal(a, b, **kwargs) + _assert_not_almost_equal(b, a, **kwargs) + + +@pytest.mark.parametrize( + "a,b", + [ + (1.1, 1.1), + (1.1, 1.100001), + (np.int16(1), 1.000001), + (np.float64(1.1), 1.1), + (np.uint32(5), 5), + ], +) +def test_assert_almost_equal_numbers(a, b): + _assert_almost_equal_both(a, b) + + +@pytest.mark.parametrize("a,b", [(1.1, 1), (1.1, True), (1, 2), (1.0001, np.int16(1))]) +def test_assert_not_almost_equal_numbers(a, b): + _assert_not_almost_equal_both(a, b) + + +@pytest.mark.parametrize("a,b", [(0, 0), (0, 0.0), (0, np.float64(0)), (0.000001, 0)]) +def test_assert_almost_equal_numbers_with_zeros(a, b): + _assert_almost_equal_both(a, b) + + +@pytest.mark.parametrize("a,b", [(0.001, 0), (1, 0)]) +def test_assert_not_almost_equal_numbers_with_zeros(a, b): + _assert_not_almost_equal_both(a, b) + + +@pytest.mark.parametrize("a,b", [(1, "abc"), (1, [1]), (1, object())]) +def test_assert_not_almost_equal_numbers_with_mixed(a, b): + _assert_not_almost_equal_both(a, b) + + +@pytest.mark.parametrize( + "left_dtype", ["M8[ns]", "m8[ns]", "float64", "int64", "object"] +) +@pytest.mark.parametrize( + "right_dtype", ["M8[ns]", "m8[ns]", "float64", "int64", "object"] +) +def test_assert_almost_equal_edge_case_ndarrays(left_dtype, right_dtype): + # Empty compare. + _assert_almost_equal_both( + np.array([], dtype=left_dtype), + np.array([], dtype=right_dtype), + check_dtype=False, + ) + + +def test_assert_almost_equal_dicts(): + _assert_almost_equal_both({"a": 1, "b": 2}, {"a": 1, "b": 2}) + + +@pytest.mark.parametrize( + "a,b", + [ + ({"a": 1, "b": 2}, {"a": 1, "b": 3}), + ({"a": 1, "b": 2}, {"a": 1, "b": 2, "c": 3}), + ({"a": 1}, 1), + ({"a": 1}, "abc"), + ({"a": 1}, [1]), + ], +) +def test_assert_not_almost_equal_dicts(a, b): + _assert_not_almost_equal_both(a, b) + + +@pytest.mark.parametrize("val", [1, 2]) +def test_assert_almost_equal_dict_like_object(val): + dict_val = 1 + real_dict = dict(a=val) + + class DictLikeObj: + def keys(self): + return ("a",) + + def __getitem__(self, item): + if item == "a": + return dict_val + + func = ( + _assert_almost_equal_both if val == dict_val else _assert_not_almost_equal_both + ) + func(real_dict, DictLikeObj(), check_dtype=False) + + +def test_assert_almost_equal_strings(): + _assert_almost_equal_both("abc", "abc") + + +@pytest.mark.parametrize( + "a,b", [("abc", "abcd"), ("abc", "abd"), ("abc", 1), ("abc", [1])] +) +def test_assert_not_almost_equal_strings(a, b): + _assert_not_almost_equal_both(a, b) + + +@pytest.mark.parametrize( + "a,b", [([1, 2, 3], [1, 2, 3]), (np.array([1, 2, 3]), np.array([1, 2, 3]))] +) +def test_assert_almost_equal_iterables(a, b): + _assert_almost_equal_both(a, b) + + +@pytest.mark.parametrize( + "a,b", + [ + # Class is different. + (np.array([1, 2, 3]), [1, 2, 3]), + # Dtype is different. + (np.array([1, 2, 3]), np.array([1.0, 2.0, 3.0])), + # Can't compare generators. + (iter([1, 2, 3]), [1, 2, 3]), + ([1, 2, 3], [1, 2, 4]), + ([1, 2, 3], [1, 2, 3, 4]), + ([1, 2, 3], 1), + ], +) +def test_assert_not_almost_equal_iterables(a, b): + _assert_not_almost_equal(a, b) + + +def test_assert_almost_equal_null(): + _assert_almost_equal_both(None, None) + + +@pytest.mark.parametrize("a,b", [(None, np.NaN), (None, 0), (np.NaN, 0)]) +def test_assert_not_almost_equal_null(a, b): + _assert_not_almost_equal(a, b) + + +@pytest.mark.parametrize( + "a,b", + [ + (np.inf, np.inf), + (np.inf, float("inf")), + (np.array([np.inf, np.nan, -np.inf]), np.array([np.inf, np.nan, -np.inf])), + ( + np.array([np.inf, None, -np.inf], dtype=np.object_), + np.array([np.inf, np.nan, -np.inf], dtype=np.object_), + ), + ], +) +def test_assert_almost_equal_inf(a, b): + _assert_almost_equal_both(a, b) + + +def test_assert_not_almost_equal_inf(): + _assert_not_almost_equal_both(np.inf, 0) + + +@pytest.mark.parametrize( + "a,b", + [ + (Index([1.0, 1.1]), Index([1.0, 1.100001])), + (Series([1.0, 1.1]), Series([1.0, 1.100001])), + (np.array([1.1, 2.000001]), np.array([1.1, 2.0])), + (DataFrame({"a": [1.0, 1.1]}), DataFrame({"a": [1.0, 1.100001]})), + ], +) +def test_assert_almost_equal_pandas(a, b): + _assert_almost_equal_both(a, b) + + +def test_assert_almost_equal_object(): + a = [Timestamp("2011-01-01"), Timestamp("2011-01-01")] + b = [Timestamp("2011-01-01"), Timestamp("2011-01-01")] + _assert_almost_equal_both(a, b) + + +def test_assert_almost_equal_value_mismatch(): + msg = "expected 2\\.00000 but got 1\\.00000, with decimal 5" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(1, 2) + + +@pytest.mark.parametrize( + "a,b,klass1,klass2", + [(np.array([1]), 1, "ndarray", "int"), (1, np.array([1]), "int", "ndarray")], +) +def test_assert_almost_equal_class_mismatch(a, b, klass1, klass2): + + msg = f"""numpy array are different + +numpy array classes are different +\\[left\\]: {klass1} +\\[right\\]: {klass2}""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(a, b) + + +def test_assert_almost_equal_value_mismatch1(): + msg = """numpy array are different + +numpy array values are different \\(66\\.66667 %\\) +\\[left\\]: \\[nan, 2\\.0, 3\\.0\\] +\\[right\\]: \\[1\\.0, nan, 3\\.0\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) + + +def test_assert_almost_equal_value_mismatch2(): + msg = """numpy array are different + +numpy array values are different \\(50\\.0 %\\) +\\[left\\]: \\[1, 2\\] +\\[right\\]: \\[1, 3\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(np.array([1, 2]), np.array([1, 3])) + + +def test_assert_almost_equal_value_mismatch3(): + msg = """numpy array are different + +numpy array values are different \\(16\\.66667 %\\) +\\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\] +\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal( + np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]]) + ) + + +def test_assert_almost_equal_value_mismatch4(): + msg = """numpy array are different + +numpy array values are different \\(25\\.0 %\\) +\\[left\\]: \\[\\[1, 2\\], \\[3, 4\\]\\] +\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) + + +def test_assert_almost_equal_shape_mismatch_override(): + msg = """Index are different + +Index shapes are different +\\[left\\]: \\(2L*,\\) +\\[right\\]: \\(3L*,\\)""" + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(np.array([1, 2]), np.array([3, 4, 5]), obj="Index") + + +def test_assert_almost_equal_unicode(): + # see gh-20503 + msg = """numpy array are different + +numpy array values are different \\(33\\.33333 %\\) +\\[left\\]: \\[á, à, ä\\] +\\[right\\]: \\[á, à, å\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(np.array(["á", "à", "ä"]), np.array(["á", "à", "å"])) + + +def test_assert_almost_equal_timestamp(): + a = np.array([Timestamp("2011-01-01"), Timestamp("2011-01-01")]) + b = np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]) + + msg = """numpy array are different + +numpy array values are different \\(50\\.0 %\\) +\\[left\\]: \\[2011-01-01 00:00:00, 2011-01-01 00:00:00\\] +\\[right\\]: \\[2011-01-01 00:00:00, 2011-01-02 00:00:00\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal(a, b) + + +def test_assert_almost_equal_iterable_length_mismatch(): + msg = """Iterable are different + +Iterable length are different +\\[left\\]: 2 +\\[right\\]: 3""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal([1, 2], [3, 4, 5]) + + +def test_assert_almost_equal_iterable_values_mismatch(): + msg = """Iterable are different + +Iterable values are different \\(50\\.0 %\\) +\\[left\\]: \\[1, 2\\] +\\[right\\]: \\[1, 3\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_almost_equal([1, 2], [1, 3]) diff --git a/pandas/tests/util/test_assert_categorical_equal.py b/pandas/tests/util/test_assert_categorical_equal.py new file mode 100644 index 00000000..8957e7a1 --- /dev/null +++ b/pandas/tests/util/test_assert_categorical_equal.py @@ -0,0 +1,90 @@ +import pytest + +from pandas import Categorical +import pandas._testing as tm + + +@pytest.mark.parametrize( + "c", + [Categorical([1, 2, 3, 4]), Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4, 5])], +) +def test_categorical_equal(c): + tm.assert_categorical_equal(c, c) + + +@pytest.mark.parametrize("check_category_order", [True, False]) +def test_categorical_equal_order_mismatch(check_category_order): + c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4]) + c2 = Categorical([1, 2, 3, 4], categories=[4, 3, 2, 1]) + kwargs = dict(check_category_order=check_category_order) + + if check_category_order: + msg = """Categorical\\.categories are different + +Categorical\\.categories values are different \\(100\\.0 %\\) +\\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[4, 3, 2, 1\\], dtype='int64'\\)""" + with pytest.raises(AssertionError, match=msg): + tm.assert_categorical_equal(c1, c2, **kwargs) + else: + tm.assert_categorical_equal(c1, c2, **kwargs) + + +def test_categorical_equal_categories_mismatch(): + msg = """Categorical\\.categories are different + +Categorical\\.categories values are different \\(25\\.0 %\\) +\\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[1, 2, 3, 5\\], dtype='int64'\\)""" + + c1 = Categorical([1, 2, 3, 4]) + c2 = Categorical([1, 2, 3, 5]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_categorical_equal(c1, c2) + + +def test_categorical_equal_codes_mismatch(): + categories = [1, 2, 3, 4] + msg = """Categorical\\.codes are different + +Categorical\\.codes values are different \\(50\\.0 %\\) +\\[left\\]: \\[0, 1, 3, 2\\] +\\[right\\]: \\[0, 1, 2, 3\\]""" + + c1 = Categorical([1, 2, 4, 3], categories=categories) + c2 = Categorical([1, 2, 3, 4], categories=categories) + + with pytest.raises(AssertionError, match=msg): + tm.assert_categorical_equal(c1, c2) + + +def test_categorical_equal_ordered_mismatch(): + data = [1, 2, 3, 4] + msg = """Categorical are different + +Attribute "ordered" are different +\\[left\\]: False +\\[right\\]: True""" + + c1 = Categorical(data, ordered=False) + c2 = Categorical(data, ordered=True) + + with pytest.raises(AssertionError, match=msg): + tm.assert_categorical_equal(c1, c2) + + +@pytest.mark.parametrize("obj", ["index", "foo", "pandas"]) +def test_categorical_equal_object_override(obj): + data = [1, 2, 3, 4] + msg = f"""{obj} are different + +Attribute "ordered" are different +\\[left\\]: False +\\[right\\]: True""" + + c1 = Categorical(data, ordered=False) + c2 = Categorical(data, ordered=True) + + with pytest.raises(AssertionError, match=msg): + tm.assert_categorical_equal(c1, c2, obj=obj) diff --git a/pandas/tests/util/test_assert_extension_array_equal.py b/pandas/tests/util/test_assert_extension_array_equal.py new file mode 100644 index 00000000..0547323b --- /dev/null +++ b/pandas/tests/util/test_assert_extension_array_equal.py @@ -0,0 +1,107 @@ +import numpy as np +import pytest + +import pandas._testing as tm +from pandas.core.arrays.sparse import SparseArray + + +@pytest.mark.parametrize( + "kwargs", + [ + dict(), # Default is check_exact=False + dict(check_exact=False), + dict(check_exact=True), + ], +) +def test_assert_extension_array_equal_not_exact(kwargs): + # see gh-23709 + arr1 = SparseArray([-0.17387645482451206, 0.3414148016424936]) + arr2 = SparseArray([-0.17387645482451206, 0.3414148016424937]) + + if kwargs.get("check_exact", False): + msg = """\ +ExtensionArray are different + +ExtensionArray values are different \\(50\\.0 %\\) +\\[left\\]: \\[-0\\.17387645482.*, 0\\.341414801642.*\\] +\\[right\\]: \\[-0\\.17387645482.*, 0\\.341414801642.*\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_extension_array_equal(arr1, arr2, **kwargs) + else: + tm.assert_extension_array_equal(arr1, arr2, **kwargs) + + +@pytest.mark.parametrize( + "check_less_precise", [True, False, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +) +def test_assert_extension_array_equal_less_precise(check_less_precise): + arr1 = SparseArray([0.5, 0.123456]) + arr2 = SparseArray([0.5, 0.123457]) + + kwargs = dict(check_less_precise=check_less_precise) + + if check_less_precise is False or check_less_precise >= 5: + msg = """\ +ExtensionArray are different + +ExtensionArray values are different \\(50\\.0 %\\) +\\[left\\]: \\[0\\.5, 0\\.123456\\] +\\[right\\]: \\[0\\.5, 0\\.123457\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_extension_array_equal(arr1, arr2, **kwargs) + else: + tm.assert_extension_array_equal(arr1, arr2, **kwargs) + + +def test_assert_extension_array_equal_dtype_mismatch(check_dtype): + end = 5 + kwargs = dict(check_dtype=check_dtype) + + arr1 = SparseArray(np.arange(end, dtype="int64")) + arr2 = SparseArray(np.arange(end, dtype="int32")) + + if check_dtype: + msg = """\ +ExtensionArray are different + +Attribute "dtype" are different +\\[left\\]: Sparse\\[int64, 0\\] +\\[right\\]: Sparse\\[int32, 0\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_extension_array_equal(arr1, arr2, **kwargs) + else: + tm.assert_extension_array_equal(arr1, arr2, **kwargs) + + +def test_assert_extension_array_equal_missing_values(): + arr1 = SparseArray([np.nan, 1, 2, np.nan]) + arr2 = SparseArray([np.nan, 1, 2, 3]) + + msg = """\ +ExtensionArray NA mask are different + +ExtensionArray NA mask values are different \\(25\\.0 %\\) +\\[left\\]: \\[True, False, False, True\\] +\\[right\\]: \\[True, False, False, False\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_extension_array_equal(arr1, arr2) + + +@pytest.mark.parametrize("side", ["left", "right"]) +def test_assert_extension_array_equal_non_extension_array(side): + numpy_array = np.arange(5) + extension_array = SparseArray(numpy_array) + + msg = f"{side} is not an ExtensionArray" + args = ( + (numpy_array, extension_array) + if side == "left" + else (extension_array, numpy_array) + ) + + with pytest.raises(AssertionError, match=msg): + tm.assert_extension_array_equal(*args) diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py new file mode 100644 index 00000000..23c845f2 --- /dev/null +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -0,0 +1,224 @@ +import pytest + +from pandas import DataFrame +import pandas._testing as tm + + +@pytest.fixture(params=[True, False]) +def by_blocks_fixture(request): + return request.param + + +@pytest.fixture(params=["DataFrame", "Series"]) +def obj_fixture(request): + return request.param + + +def _assert_frame_equal_both(a, b, **kwargs): + """ + Check that two DataFrame equal. + + This check is performed commutatively. + + Parameters + ---------- + a : DataFrame + The first DataFrame to compare. + b : DataFrame + The second DataFrame to compare. + kwargs : dict + The arguments passed to `tm.assert_frame_equal`. + """ + tm.assert_frame_equal(a, b, **kwargs) + tm.assert_frame_equal(b, a, **kwargs) + + +def _assert_not_frame_equal(a, b, **kwargs): + """ + Check that two DataFrame are not equal. + + Parameters + ---------- + a : DataFrame + The first DataFrame to compare. + b : DataFrame + The second DataFrame to compare. + kwargs : dict + The arguments passed to `tm.assert_frame_equal`. + """ + try: + tm.assert_frame_equal(a, b, **kwargs) + msg = "The two DataFrames were equal when they shouldn't have been" + + pytest.fail(msg=msg) + except AssertionError: + pass + + +def _assert_not_frame_equal_both(a, b, **kwargs): + """ + Check that two DataFrame are not equal. + + This check is performed commutatively. + + Parameters + ---------- + a : DataFrame + The first DataFrame to compare. + b : DataFrame + The second DataFrame to compare. + kwargs : dict + The arguments passed to `tm.assert_frame_equal`. + """ + _assert_not_frame_equal(a, b, **kwargs) + _assert_not_frame_equal(b, a, **kwargs) + + +@pytest.mark.parametrize("check_like", [True, False]) +def test_frame_equal_row_order_mismatch(check_like, obj_fixture): + df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"]) + df2 = DataFrame({"A": [3, 2, 1], "B": [6, 5, 4]}, index=["c", "b", "a"]) + + if not check_like: # Do not ignore row-column orderings. + msg = f"{obj_fixture}.index are different" + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, check_like=check_like, obj=obj_fixture) + else: + _assert_frame_equal_both(df1, df2, check_like=check_like, obj=obj_fixture) + + +@pytest.mark.parametrize( + "df1,df2", + [ + (DataFrame({"A": [1, 2, 3]}), DataFrame({"A": [1, 2, 3, 4]})), + (DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), DataFrame({"A": [1, 2, 3]})), + ], +) +def test_frame_equal_shape_mismatch(df1, df2, obj_fixture): + msg = f"{obj_fixture} are different" + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, obj=obj_fixture) + + +@pytest.mark.parametrize( + "df1,df2,msg", + [ + # Index + ( + DataFrame.from_records({"a": [1, 2], "c": ["l1", "l2"]}, index=["a"]), + DataFrame.from_records({"a": [1.0, 2.0], "c": ["l1", "l2"]}, index=["a"]), + "DataFrame\\.index are different", + ), + # MultiIndex + ( + DataFrame.from_records( + {"a": [1, 2], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"] + ), + DataFrame.from_records( + {"a": [1.0, 2.0], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"] + ), + "MultiIndex level \\[0\\] are different", + ), + ], +) +def test_frame_equal_index_dtype_mismatch(df1, df2, msg, check_index_type): + kwargs = dict(check_index_type=check_index_type) + + if check_index_type: + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, **kwargs) + else: + tm.assert_frame_equal(df1, df2, **kwargs) + + +def test_empty_dtypes(check_dtype): + columns = ["col1", "col2"] + df1 = DataFrame(columns=columns) + df2 = DataFrame(columns=columns) + + kwargs = dict(check_dtype=check_dtype) + df1["col1"] = df1["col1"].astype("int64") + + if check_dtype: + msg = r"Attributes of DataFrame\..* are different" + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, **kwargs) + else: + tm.assert_frame_equal(df1, df2, **kwargs) + + +def test_frame_equal_index_mismatch(obj_fixture): + msg = f"""{obj_fixture}\\.index are different + +{obj_fixture}\\.index values are different \\(33\\.33333 %\\) +\\[left\\]: Index\\(\\['a', 'b', 'c'\\], dtype='object'\\) +\\[right\\]: Index\\(\\['a', 'b', 'd'\\], dtype='object'\\)""" + + df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"]) + df2 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "d"]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, obj=obj_fixture) + + +def test_frame_equal_columns_mismatch(obj_fixture): + msg = f"""{obj_fixture}\\.columns are different + +{obj_fixture}\\.columns values are different \\(50\\.0 %\\) +\\[left\\]: Index\\(\\['A', 'B'\\], dtype='object'\\) +\\[right\\]: Index\\(\\['A', 'b'\\], dtype='object'\\)""" + + df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"]) + df2 = DataFrame({"A": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, obj=obj_fixture) + + +def test_frame_equal_block_mismatch(by_blocks_fixture, obj_fixture): + obj = obj_fixture + msg = f"""{obj}\\.iloc\\[:, 1\\] \\(column name="B"\\) are different + +{obj}\\.iloc\\[:, 1\\] \\(column name="B"\\) values are different \\(33\\.33333 %\\) +\\[left\\]: \\[4, 5, 6\\] +\\[right\\]: \\[4, 5, 7\\]""" + + df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + df2 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 7]}) + + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, by_blocks=by_blocks_fixture, obj=obj_fixture) + + +@pytest.mark.parametrize( + "df1,df2,msg", + [ + ( + DataFrame({"A": ["á", "à", "ä"], "E": ["é", "è", "ë"]}), + DataFrame({"A": ["á", "à", "ä"], "E": ["é", "è", "e̊"]}), + """{obj}\\.iloc\\[:, 1\\] \\(column name="E"\\) are different + +{obj}\\.iloc\\[:, 1\\] \\(column name="E"\\) values are different \\(33\\.33333 %\\) +\\[left\\]: \\[é, è, ë\\] +\\[right\\]: \\[é, è, e̊\\]""", + ), + ( + DataFrame({"A": ["á", "à", "ä"], "E": ["é", "è", "ë"]}), + DataFrame({"A": ["a", "a", "a"], "E": ["e", "e", "e"]}), + """{obj}\\.iloc\\[:, 0\\] \\(column name="A"\\) are different + +{obj}\\.iloc\\[:, 0\\] \\(column name="A"\\) values are different \\(100\\.0 %\\) +\\[left\\]: \\[á, à, ä\\] +\\[right\\]: \\[a, a, a\\]""", + ), + ], +) +def test_frame_equal_unicode(df1, df2, msg, by_blocks_fixture, obj_fixture): + # see gh-20503 + # + # Test ensures that `tm.assert_frame_equals` raises the right exception + # when comparing DataFrames containing differing unicode objects. + msg = msg.format(obj=obj_fixture) + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, by_blocks=by_blocks_fixture, obj=obj_fixture) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py new file mode 100644 index 00000000..bbbeebce --- /dev/null +++ b/pandas/tests/util/test_assert_index_equal.py @@ -0,0 +1,172 @@ +import numpy as np +import pytest + +from pandas import Categorical, Index, MultiIndex, NaT +import pandas._testing as tm + + +def test_index_equal_levels_mismatch(): + msg = """Index are different + +Index levels are different +\\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: 2, MultiIndex\\(\\[\\('A', 1\\), + \\('A', 2\\), + \\('B', 3\\), + \\('B', 4\\)\\], + \\)""" + + idx1 = Index([1, 2, 3]) + idx2 = MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3), ("B", 4)]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2, exact=False) + + +def test_index_equal_values_mismatch(check_exact): + msg = """MultiIndex level \\[1\\] are different + +MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) +\\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" + + idx1 = MultiIndex.from_tuples([("A", 2), ("A", 2), ("B", 3), ("B", 4)]) + idx2 = MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3), ("B", 4)]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2, check_exact=check_exact) + + +def test_index_equal_length_mismatch(check_exact): + msg = """Index are different + +Index length are different +\\[left\\]: 3, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: 4, Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" + + idx1 = Index([1, 2, 3]) + idx2 = Index([1, 2, 3, 4]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2, check_exact=check_exact) + + +def test_index_equal_class_mismatch(check_exact): + msg = """Index are different + +Index classes are different +\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: Float64Index\\(\\[1\\.0, 2\\.0, 3\\.0\\], dtype='float64'\\)""" + + idx1 = Index([1, 2, 3]) + idx2 = Index([1, 2, 3.0]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2, exact=True, check_exact=check_exact) + + +def test_index_equal_values_close(check_exact): + idx1 = Index([1, 2, 3.0]) + idx2 = Index([1, 2, 3.0000000001]) + + if check_exact: + msg = """Index are different + +Index values are different \\(33\\.33333 %\\) +\\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) +\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2, check_exact=check_exact) + else: + tm.assert_index_equal(idx1, idx2, check_exact=check_exact) + + +def test_index_equal_values_less_close(check_exact, check_less_precise): + idx1 = Index([1, 2, 3.0]) + idx2 = Index([1, 2, 3.0001]) + kwargs = dict(check_exact=check_exact, check_less_precise=check_less_precise) + + if check_exact or not check_less_precise: + msg = """Index are different + +Index values are different \\(33\\.33333 %\\) +\\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) +\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2, **kwargs) + else: + tm.assert_index_equal(idx1, idx2, **kwargs) + + +def test_index_equal_values_too_far(check_exact, check_less_precise): + idx1 = Index([1, 2, 3]) + idx2 = Index([1, 2, 4]) + kwargs = dict(check_exact=check_exact, check_less_precise=check_less_precise) + + msg = """Index are different + +Index values are different \\(33\\.33333 %\\) +\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[1, 2, 4\\], dtype='int64'\\)""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2, **kwargs) + + +def test_index_equal_level_values_mismatch(check_exact, check_less_precise): + idx1 = MultiIndex.from_tuples([("A", 2), ("A", 2), ("B", 3), ("B", 4)]) + idx2 = MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3), ("B", 4)]) + kwargs = dict(check_exact=check_exact, check_less_precise=check_less_precise) + + msg = """MultiIndex level \\[1\\] are different + +MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) +\\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2, **kwargs) + + +@pytest.mark.parametrize( + "name1,name2", + [(None, "x"), ("x", "x"), (np.nan, np.nan), (NaT, NaT), (np.nan, NaT)], +) +def test_index_equal_names(name1, name2): + + idx1 = Index([1, 2, 3], name=name1) + idx2 = Index([1, 2, 3], name=name2) + + if name1 == name2 or name1 is name2: + tm.assert_index_equal(idx1, idx2) + else: + name1 = "'x'" if name1 == "x" else name1 + name2 = "'x'" if name2 == "x" else name2 + msg = f"""Index are different + +Attribute "names" are different +\\[left\\]: \\[{name1}\\] +\\[right\\]: \\[{name2}\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2) + + +def test_index_equal_category_mismatch(check_categorical): + msg = """Index are different + +Attribute "dtype" are different +\\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\) +\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], \ +ordered=False\\)""" + + idx1 = Index(Categorical(["a", "b"])) + idx2 = Index(Categorical(["a", "b"], categories=["a", "b", "c"])) + + if check_categorical: + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2, check_categorical=check_categorical) + else: + tm.assert_index_equal(idx1, idx2, check_categorical=check_categorical) diff --git a/pandas/tests/util/test_assert_interval_array_equal.py b/pandas/tests/util/test_assert_interval_array_equal.py new file mode 100644 index 00000000..96f2973a --- /dev/null +++ b/pandas/tests/util/test_assert_interval_array_equal.py @@ -0,0 +1,81 @@ +import pytest + +from pandas import interval_range +import pandas._testing as tm + + +@pytest.mark.parametrize( + "kwargs", + [ + dict(start=0, periods=4), + dict(start=1, periods=5), + dict(start=5, end=10, closed="left"), + ], +) +def test_interval_array_equal(kwargs): + arr = interval_range(**kwargs).values + tm.assert_interval_array_equal(arr, arr) + + +def test_interval_array_equal_closed_mismatch(): + kwargs = dict(start=0, periods=5) + arr1 = interval_range(closed="left", **kwargs).values + arr2 = interval_range(closed="right", **kwargs).values + + msg = """\ +IntervalArray are different + +Attribute "closed" are different +\\[left\\]: left +\\[right\\]: right""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_interval_array_equal(arr1, arr2) + + +def test_interval_array_equal_periods_mismatch(): + kwargs = dict(start=0) + arr1 = interval_range(periods=5, **kwargs).values + arr2 = interval_range(periods=6, **kwargs).values + + msg = """\ +IntervalArray.left are different + +IntervalArray.left length are different +\\[left\\]: 5, Int64Index\\(\\[0, 1, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: 6, Int64Index\\(\\[0, 1, 2, 3, 4, 5\\], dtype='int64'\\)""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_interval_array_equal(arr1, arr2) + + +def test_interval_array_equal_end_mismatch(): + kwargs = dict(start=0, periods=5) + arr1 = interval_range(end=10, **kwargs).values + arr2 = interval_range(end=20, **kwargs).values + + msg = """\ +IntervalArray.left are different + +IntervalArray.left values are different \\(80.0 %\\) +\\[left\\]: Int64Index\\(\\[0, 2, 4, 6, 8\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[0, 4, 8, 12, 16\\], dtype='int64'\\)""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_interval_array_equal(arr1, arr2) + + +def test_interval_array_equal_start_mismatch(): + kwargs = dict(periods=4) + arr1 = interval_range(start=0, **kwargs).values + arr2 = interval_range(start=1, **kwargs).values + + msg = """\ +IntervalArray.left are different + +IntervalArray.left values are different \\(100.0 %\\) +\\[left\\]: Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\) +\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_interval_array_equal(arr1, arr2) diff --git a/pandas/tests/util/test_assert_numpy_array_equal.py b/pandas/tests/util/test_assert_numpy_array_equal.py new file mode 100644 index 00000000..d29ddedd --- /dev/null +++ b/pandas/tests/util/test_assert_numpy_array_equal.py @@ -0,0 +1,213 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Timestamp +import pandas._testing as tm + + +def test_assert_numpy_array_equal_shape_mismatch(): + msg = """numpy array are different + +numpy array shapes are different +\\[left\\]: \\(2L*,\\) +\\[right\\]: \\(3L*,\\)""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5])) + + +def test_assert_numpy_array_equal_bad_type(): + expected = "Expected type" + + with pytest.raises(AssertionError, match=expected): + tm.assert_numpy_array_equal(1, 2) + + +@pytest.mark.parametrize( + "a,b,klass1,klass2", + [(np.array([1]), 1, "ndarray", "int"), (1, np.array([1]), "int", "ndarray")], +) +def test_assert_numpy_array_equal_class_mismatch(a, b, klass1, klass2): + msg = f"""numpy array are different + +numpy array classes are different +\\[left\\]: {klass1} +\\[right\\]: {klass2}""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(a, b) + + +def test_assert_numpy_array_equal_value_mismatch1(): + msg = """numpy array are different + +numpy array values are different \\(66\\.66667 %\\) +\\[left\\]: \\[nan, 2\\.0, 3\\.0\\] +\\[right\\]: \\[1\\.0, nan, 3\\.0\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) + + +def test_assert_numpy_array_equal_value_mismatch2(): + msg = """numpy array are different + +numpy array values are different \\(50\\.0 %\\) +\\[left\\]: \\[1, 2\\] +\\[right\\]: \\[1, 3\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3])) + + +def test_assert_numpy_array_equal_value_mismatch3(): + msg = """numpy array are different + +numpy array values are different \\(16\\.66667 %\\) +\\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\] +\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal( + np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]]) + ) + + +def test_assert_numpy_array_equal_value_mismatch4(): + msg = """numpy array are different + +numpy array values are different \\(50\\.0 %\\) +\\[left\\]: \\[1\\.1, 2\\.000001\\] +\\[right\\]: \\[1\\.1, 2.0\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0])) + + +def test_assert_numpy_array_equal_value_mismatch5(): + msg = """numpy array are different + +numpy array values are different \\(16\\.66667 %\\) +\\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\] +\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal( + np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]]) + ) + + +def test_assert_numpy_array_equal_value_mismatch6(): + msg = """numpy array are different + +numpy array values are different \\(25\\.0 %\\) +\\[left\\]: \\[\\[1, 2\\], \\[3, 4\\]\\] +\\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal( + np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]]) + ) + + +def test_assert_numpy_array_equal_shape_mismatch_override(): + msg = """Index are different + +Index shapes are different +\\[left\\]: \\(2L*,\\) +\\[right\\]: \\(3L*,\\)""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]), obj="Index") + + +def test_numpy_array_equal_unicode(): + # see gh-20503 + # + # Test ensures that `tm.assert_numpy_array_equals` raises the right + # exception when comparing np.arrays containing differing unicode objects. + msg = """numpy array are different + +numpy array values are different \\(33\\.33333 %\\) +\\[left\\]: \\[á, à, ä\\] +\\[right\\]: \\[á, à, å\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal( + np.array(["á", "à", "ä"]), np.array(["á", "à", "å"]) + ) + + +def test_numpy_array_equal_object(): + a = np.array([Timestamp("2011-01-01"), Timestamp("2011-01-01")]) + b = np.array([Timestamp("2011-01-01"), Timestamp("2011-01-02")]) + + msg = """numpy array are different + +numpy array values are different \\(50\\.0 %\\) +\\[left\\]: \\[2011-01-01 00:00:00, 2011-01-01 00:00:00\\] +\\[right\\]: \\[2011-01-01 00:00:00, 2011-01-02 00:00:00\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(a, b) + + +@pytest.mark.parametrize("other_type", ["same", "copy"]) +@pytest.mark.parametrize("check_same", ["same", "copy"]) +def test_numpy_array_equal_copy_flag(other_type, check_same): + a = np.array([1, 2, 3]) + msg = None + + if other_type == "same": + other = a.view() + else: + other = a.copy() + + if check_same != other_type: + msg = ( + r"array\(\[1, 2, 3\]\) is not array\(\[1, 2, 3\]\)" + if check_same == "same" + else r"array\(\[1, 2, 3\]\) is array\(\[1, 2, 3\]\)" + ) + + if msg is not None: + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(a, other, check_same=check_same) + else: + tm.assert_numpy_array_equal(a, other, check_same=check_same) + + +def test_numpy_array_equal_contains_na(): + # https://github.com/pandas-dev/pandas/issues/31881 + a = np.array([True, False]) + b = np.array([True, pd.NA], dtype=object) + + msg = """numpy array are different + +numpy array values are different \\(50.0 %\\) +\\[left\\]: \\[True, False\\] +\\[right\\]: \\[True, \\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(a, b) + + +def test_numpy_array_equal_identical_na(nulls_fixture): + a = np.array([nulls_fixture], dtype=object) + + tm.assert_numpy_array_equal(a, a) + + +def test_numpy_array_equal_different_na(): + a = np.array([np.nan], dtype=object) + b = np.array([pd.NA], dtype=object) + + msg = """numpy array are different + +numpy array values are different \\(100.0 %\\) +\\[left\\]: \\[nan\\] +\\[right\\]: \\[\\]""" + + with pytest.raises(AssertionError, match=msg): + tm.assert_numpy_array_equal(a, b) diff --git a/pandas/tests/util/test_assert_produces_warning.py b/pandas/tests/util/test_assert_produces_warning.py new file mode 100644 index 00000000..87765c90 --- /dev/null +++ b/pandas/tests/util/test_assert_produces_warning.py @@ -0,0 +1,22 @@ +import warnings + +import pytest + +import pandas._testing as tm + + +def f(): + warnings.warn("f1", FutureWarning) + warnings.warn("f2", RuntimeWarning) + + +@pytest.mark.filterwarnings("ignore:f1:FutureWarning") +def test_assert_produces_warning_honors_filter(): + # Raise by default. + msg = r"Caused unexpected warning\(s\)" + with pytest.raises(AssertionError, match=msg): + with tm.assert_produces_warning(RuntimeWarning): + f() + + with tm.assert_produces_warning(RuntimeWarning, raise_on_extra_warnings=False): + f() diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py new file mode 100644 index 00000000..eaf0824f --- /dev/null +++ b/pandas/tests/util/test_assert_series_equal.py @@ -0,0 +1,196 @@ +import pytest + +from pandas import Categorical, DataFrame, Series +import pandas._testing as tm + + +def _assert_series_equal_both(a, b, **kwargs): + """ + Check that two Series equal. + + This check is performed commutatively. + + Parameters + ---------- + a : Series + The first Series to compare. + b : Series + The second Series to compare. + kwargs : dict + The arguments passed to `tm.assert_series_equal`. + """ + tm.assert_series_equal(a, b, **kwargs) + tm.assert_series_equal(b, a, **kwargs) + + +def _assert_not_series_equal(a, b, **kwargs): + """ + Check that two Series are not equal. + + Parameters + ---------- + a : Series + The first Series to compare. + b : Series + The second Series to compare. + kwargs : dict + The arguments passed to `tm.assert_series_equal`. + """ + try: + tm.assert_series_equal(a, b, **kwargs) + msg = "The two Series were equal when they shouldn't have been" + + pytest.fail(msg=msg) + except AssertionError: + pass + + +def _assert_not_series_equal_both(a, b, **kwargs): + """ + Check that two Series are not equal. + + This check is performed commutatively. + + Parameters + ---------- + a : Series + The first Series to compare. + b : Series + The second Series to compare. + kwargs : dict + The arguments passed to `tm.assert_series_equal`. + """ + _assert_not_series_equal(a, b, **kwargs) + _assert_not_series_equal(b, a, **kwargs) + + +@pytest.mark.parametrize("data", [range(3), list("abc"), list("áàä")]) +def test_series_equal(data): + _assert_series_equal_both(Series(data), Series(data)) + + +@pytest.mark.parametrize( + "data1,data2", + [ + (range(3), range(1, 4)), + (list("abc"), list("xyz")), + (list("áàä"), list("éèë")), + (list("áàä"), list(b"aaa")), + (range(3), range(4)), + ], +) +def test_series_not_equal_value_mismatch(data1, data2): + _assert_not_series_equal_both(Series(data1), Series(data2)) + + +@pytest.mark.parametrize( + "kwargs", + [ + dict(dtype="float64"), # dtype mismatch + dict(index=[1, 2, 4]), # index mismatch + dict(name="foo"), # name mismatch + ], +) +def test_series_not_equal_metadata_mismatch(kwargs): + data = range(3) + s1 = Series(data) + + s2 = Series(data, **kwargs) + _assert_not_series_equal_both(s1, s2) + + +@pytest.mark.parametrize("data1,data2", [(0.12345, 0.12346), (0.1235, 0.1236)]) +@pytest.mark.parametrize("dtype", ["float32", "float64"]) +@pytest.mark.parametrize("check_less_precise", [False, True, 0, 1, 2, 3, 10]) +def test_less_precise(data1, data2, dtype, check_less_precise): + s1 = Series([data1], dtype=dtype) + s2 = Series([data2], dtype=dtype) + + kwargs = dict(check_less_precise=check_less_precise) + + if (check_less_precise is False or check_less_precise == 10) or ( + (check_less_precise is True or check_less_precise >= 3) + and abs(data1 - data2) >= 0.0001 + ): + msg = "Series values are different" + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s2, **kwargs) + else: + _assert_series_equal_both(s1, s2, **kwargs) + + +@pytest.mark.parametrize( + "s1,s2,msg", + [ + # Index + ( + Series(["l1", "l2"], index=[1, 2]), + Series(["l1", "l2"], index=[1.0, 2.0]), + "Series\\.index are different", + ), + # MultiIndex + ( + DataFrame.from_records( + {"a": [1, 2], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"] + ).c, + DataFrame.from_records( + {"a": [1.0, 2.0], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"] + ).c, + "MultiIndex level \\[0\\] are different", + ), + ], +) +def test_series_equal_index_dtype(s1, s2, msg, check_index_type): + kwargs = dict(check_index_type=check_index_type) + + if check_index_type: + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s2, **kwargs) + else: + tm.assert_series_equal(s1, s2, **kwargs) + + +def test_series_equal_length_mismatch(check_less_precise): + msg = """Series are different + +Series length are different +\\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) +\\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" + + s1 = Series([1, 2, 3]) + s2 = Series([1, 2, 3, 4]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s2, check_less_precise=check_less_precise) + + +def test_series_equal_values_mismatch(check_less_precise): + msg = """Series are different + +Series values are different \\(33\\.33333 %\\) +\\[left\\]: \\[1, 2, 3\\] +\\[right\\]: \\[1, 2, 4\\]""" + + s1 = Series([1, 2, 3]) + s2 = Series([1, 2, 4]) + + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s2, check_less_precise=check_less_precise) + + +def test_series_equal_categorical_mismatch(check_categorical): + msg = """Attributes of Series are different + +Attribute "dtype" are different +\\[left\\]: CategoricalDtype\\(categories=\\['a', 'b'\\], ordered=False\\) +\\[right\\]: CategoricalDtype\\(categories=\\['a', 'b', 'c'\\], \ +ordered=False\\)""" + + s1 = Series(Categorical(["a", "b"])) + s2 = Series(Categorical(["a", "b"], categories=list("abc"))) + + if check_categorical: + with pytest.raises(AssertionError, match=msg): + tm.assert_series_equal(s1, s2, check_categorical=check_categorical) + else: + _assert_series_equal_both(s1, s2, check_categorical=check_categorical) diff --git a/pandas/tests/util/test_deprecate.py b/pandas/tests/util/test_deprecate.py new file mode 100644 index 00000000..ee4f7e3f --- /dev/null +++ b/pandas/tests/util/test_deprecate.py @@ -0,0 +1,64 @@ +from textwrap import dedent + +import pytest + +from pandas.util._decorators import deprecate + +import pandas._testing as tm + + +def new_func(): + """ + This is the summary. The deprecate directive goes next. + + This is the extended summary. The deprecate directive goes before this. + """ + return "new_func called" + + +def new_func_no_docstring(): + return "new_func_no_docstring called" + + +def new_func_wrong_docstring(): + """Summary should be in the next line.""" + return "new_func_wrong_docstring called" + + +def new_func_with_deprecation(): + """ + This is the summary. The deprecate directive goes next. + + .. deprecated:: 1.0 + Use new_func instead. + + This is the extended summary. The deprecate directive goes before this. + """ + pass + + +def test_deprecate_ok(): + depr_func = deprecate("depr_func", new_func, "1.0", msg="Use new_func instead.") + + with tm.assert_produces_warning(FutureWarning): + result = depr_func() + + assert result == "new_func called" + assert depr_func.__doc__ == dedent(new_func_with_deprecation.__doc__) + + +def test_deprecate_no_docstring(): + depr_func = deprecate( + "depr_func", new_func_no_docstring, "1.0", msg="Use new_func instead." + ) + with tm.assert_produces_warning(FutureWarning): + result = depr_func() + assert result == "new_func_no_docstring called" + + +def test_deprecate_wrong_docstring(): + msg = "deprecate needs a correctly formatted docstring" + with pytest.raises(AssertionError, match=msg): + deprecate( + "depr_func", new_func_wrong_docstring, "1.0", msg="Use new_func instead." + ) diff --git a/pandas/tests/util/test_deprecate_kwarg.py b/pandas/tests/util/test_deprecate_kwarg.py new file mode 100644 index 00000000..b165e9fb --- /dev/null +++ b/pandas/tests/util/test_deprecate_kwarg.py @@ -0,0 +1,90 @@ +import pytest + +from pandas.util._decorators import deprecate_kwarg + +import pandas._testing as tm + + +@deprecate_kwarg("old", "new") +def _f1(new=False): + return new + + +_f2_mappings = {"yes": True, "no": False} + + +@deprecate_kwarg("old", "new", _f2_mappings) +def _f2(new=False): + return new + + +def _f3_mapping(x): + return x + 1 + + +@deprecate_kwarg("old", "new", _f3_mapping) +def _f3(new=0): + return new + + +@pytest.mark.parametrize("key,klass", [("old", FutureWarning), ("new", None)]) +def test_deprecate_kwarg(key, klass): + x = 78 + + with tm.assert_produces_warning(klass): + assert _f1(**{key: x}) == x + + +@pytest.mark.parametrize("key", list(_f2_mappings.keys())) +def test_dict_deprecate_kwarg(key): + with tm.assert_produces_warning(FutureWarning): + assert _f2(old=key) == _f2_mappings[key] + + +@pytest.mark.parametrize("key", ["bogus", 12345, -1.23]) +def test_missing_deprecate_kwarg(key): + with tm.assert_produces_warning(FutureWarning): + assert _f2(old=key) == key + + +@pytest.mark.parametrize("x", [1, -1.4, 0]) +def test_callable_deprecate_kwarg(x): + with tm.assert_produces_warning(FutureWarning): + assert _f3(old=x) == _f3_mapping(x) + + +def test_callable_deprecate_kwarg_fail(): + msg = "((can only|cannot) concatenate)|(must be str)|(Can't convert)" + + with pytest.raises(TypeError, match=msg): + _f3(old="hello") + + +def test_bad_deprecate_kwarg(): + msg = "mapping from old to new argument values must be dict or callable!" + + with pytest.raises(TypeError, match=msg): + + @deprecate_kwarg("old", "new", 0) + def f4(new=None): + return new + + +@deprecate_kwarg("old", None) +def _f4(old=True, unchanged=True): + return old, unchanged + + +@pytest.mark.parametrize("key", ["old", "unchanged"]) +def test_deprecate_keyword(key): + x = 9 + + if key == "old": + klass = FutureWarning + expected = (x, True) + else: + klass = None + expected = (True, x) + + with tm.assert_produces_warning(klass): + assert _f4(**{key: x}) == expected diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py new file mode 100644 index 00000000..c856585f --- /dev/null +++ b/pandas/tests/util/test_hashing.py @@ -0,0 +1,383 @@ +import datetime + +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Index, MultiIndex, Series +import pandas._testing as tm +from pandas.core.util.hashing import _hash_scalar, hash_tuple, hash_tuples +from pandas.util import hash_array, hash_pandas_object + + +@pytest.fixture( + params=[ + Series([1, 2, 3] * 3, dtype="int32"), + Series([None, 2.5, 3.5] * 3, dtype="float32"), + Series(["a", "b", "c"] * 3, dtype="category"), + Series(["d", "e", "f"] * 3), + Series([True, False, True] * 3), + Series(pd.date_range("20130101", periods=9)), + Series(pd.date_range("20130101", periods=9, tz="US/Eastern")), + Series(pd.timedelta_range("2000", periods=9)), + ] +) +def series(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def index(request): + return request.param + + +def _check_equal(obj, **kwargs): + """ + Check that hashing an objects produces the same value each time. + + Parameters + ---------- + obj : object + The object to hash. + kwargs : kwargs + Keyword arguments to pass to the hashing function. + """ + a = hash_pandas_object(obj, **kwargs) + b = hash_pandas_object(obj, **kwargs) + tm.assert_series_equal(a, b) + + +def _check_not_equal_with_index(obj): + """ + Check the hash of an object with and without its index is not the same. + + Parameters + ---------- + obj : object + The object to hash. + """ + if not isinstance(obj, Index): + a = hash_pandas_object(obj, index=True) + b = hash_pandas_object(obj, index=False) + + if len(obj): + assert not (a == b).all() + + +def test_consistency(): + # Check that our hash doesn't change because of a mistake + # in the actual code; this is the ground truth. + result = hash_pandas_object(Index(["foo", "bar", "baz"])) + expected = Series( + np.array( + [3600424527151052760, 1374399572096150070, 477881037637427054], + dtype="uint64", + ), + index=["foo", "bar", "baz"], + ) + tm.assert_series_equal(result, expected) + + +def test_hash_array(series): + arr = series.values + tm.assert_numpy_array_equal(hash_array(arr), hash_array(arr)) + + +@pytest.mark.parametrize( + "arr2", [np.array([3, 4, "All"]), np.array([3, 4, "All"], dtype=object)] +) +def test_hash_array_mixed(arr2): + result1 = hash_array(np.array(["3", "4", "All"])) + result2 = hash_array(arr2) + + tm.assert_numpy_array_equal(result1, result2) + + +@pytest.mark.parametrize("val", [5, "foo", pd.Timestamp("20130101")]) +def test_hash_array_errors(val): + msg = "must pass a ndarray-like" + with pytest.raises(TypeError, match=msg): + hash_array(val) + + +def test_hash_tuples(): + tuples = [(1, "one"), (1, "two"), (2, "one")] + result = hash_tuples(tuples) + + expected = hash_pandas_object(MultiIndex.from_tuples(tuples)).values + tm.assert_numpy_array_equal(result, expected) + + result = hash_tuples(tuples[0]) + assert result == expected[0] + + +@pytest.mark.parametrize( + "tup", + [(1, "one"), (1, np.nan), (1.0, pd.NaT, "A"), ("A", pd.Timestamp("2012-01-01"))], +) +def test_hash_tuple(tup): + # Test equivalence between + # hash_tuples and hash_tuple. + result = hash_tuple(tup) + expected = hash_tuples([tup])[0] + + assert result == expected + + +@pytest.mark.parametrize( + "val", + [ + 1, + 1.4, + "A", + b"A", + pd.Timestamp("2012-01-01"), + pd.Timestamp("2012-01-01", tz="Europe/Brussels"), + datetime.datetime(2012, 1, 1), + pd.Timestamp("2012-01-01", tz="EST").to_pydatetime(), + pd.Timedelta("1 days"), + datetime.timedelta(1), + pd.Period("2012-01-01", freq="D"), + pd.Interval(0, 1), + np.nan, + pd.NaT, + None, + ], +) +def test_hash_scalar(val): + result = _hash_scalar(val) + expected = hash_array(np.array([val], dtype=object), categorize=True) + + assert result[0] == expected[0] + + +@pytest.mark.parametrize("val", [5, "foo", pd.Timestamp("20130101")]) +def test_hash_tuples_err(val): + msg = "must be convertible to a list-of-tuples" + with pytest.raises(TypeError, match=msg): + hash_tuples(val) + + +def test_multiindex_unique(): + mi = MultiIndex.from_tuples([(118, 472), (236, 118), (51, 204), (102, 51)]) + assert mi.is_unique is True + + result = hash_pandas_object(mi) + assert result.is_unique is True + + +def test_multiindex_objects(): + mi = MultiIndex( + levels=[["b", "d", "a"], [1, 2, 3]], + codes=[[0, 1, 0, 2], [2, 0, 0, 1]], + names=["col1", "col2"], + ) + recons = mi._sort_levels_monotonic() + + # These are equal. + assert mi.equals(recons) + assert Index(mi.values).equals(Index(recons.values)) + + # _hashed_values and hash_pandas_object(..., index=False) equivalency. + expected = hash_pandas_object(mi, index=False).values + result = mi._hashed_values + + tm.assert_numpy_array_equal(result, expected) + + expected = hash_pandas_object(recons, index=False).values + result = recons._hashed_values + + tm.assert_numpy_array_equal(result, expected) + + expected = mi._hashed_values + result = recons._hashed_values + + # Values should match, but in different order. + tm.assert_numpy_array_equal(np.sort(result), np.sort(expected)) + + +@pytest.mark.parametrize( + "obj", + [ + Series([1, 2, 3]), + Series([1.0, 1.5, 3.2]), + Series([1.0, 1.5, np.nan]), + Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]), + Series(["a", "b", "c"]), + Series(["a", np.nan, "c"]), + Series(["a", None, "c"]), + Series([True, False, True]), + Series(dtype=object), + Index([1, 2, 3]), + Index([True, False, True]), + DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}), + DataFrame(), + tm.makeMissingDataframe(), + tm.makeMixedDataFrame(), + tm.makeTimeDataFrame(), + tm.makeTimeSeries(), + tm.makeTimedeltaIndex(), + tm.makePeriodIndex(), + Series(tm.makePeriodIndex()), + Series(pd.date_range("20130101", periods=3, tz="US/Eastern")), + MultiIndex.from_product( + [range(5), ["foo", "bar", "baz"], pd.date_range("20130101", periods=2)] + ), + MultiIndex.from_product([pd.CategoricalIndex(list("aabc")), range(3)]), + ], +) +def test_hash_pandas_object(obj, index): + _check_equal(obj, index=index) + _check_not_equal_with_index(obj) + + +def test_hash_pandas_object2(series, index): + _check_equal(series, index=index) + _check_not_equal_with_index(series) + + +@pytest.mark.parametrize( + "obj", [Series([], dtype="float64"), Series([], dtype="object"), Index([])] +) +def test_hash_pandas_empty_object(obj, index): + # These are by-definition the same with + # or without the index as the data is empty. + _check_equal(obj, index=index) + + +@pytest.mark.parametrize( + "s1", + [ + Series(["a", "b", "c", "d"]), + Series([1000, 2000, 3000, 4000]), + Series(pd.date_range(0, periods=4)), + ], +) +@pytest.mark.parametrize("categorize", [True, False]) +def test_categorical_consistency(s1, categorize): + # see gh-15143 + # + # Check that categoricals hash consistent with their values, + # not codes. This should work for categoricals of any dtype. + s2 = s1.astype("category").cat.set_categories(s1) + s3 = s2.cat.set_categories(list(reversed(s1))) + + # These should all hash identically. + h1 = hash_pandas_object(s1, categorize=categorize) + h2 = hash_pandas_object(s2, categorize=categorize) + h3 = hash_pandas_object(s3, categorize=categorize) + + tm.assert_series_equal(h1, h2) + tm.assert_series_equal(h1, h3) + + +def test_categorical_with_nan_consistency(): + c = pd.Categorical.from_codes( + [-1, 0, 1, 2, 3, 4], categories=pd.date_range("2012-01-01", periods=5, name="B") + ) + expected = hash_array(c, categorize=False) + + c = pd.Categorical.from_codes([-1, 0], categories=[pd.Timestamp("2012-01-01")]) + result = hash_array(c, categorize=False) + + assert result[0] in expected + assert result[1] in expected + + +@pytest.mark.parametrize("obj", [pd.Timestamp("20130101")]) +def test_pandas_errors(obj): + msg = "Unexpected type for hashing" + with pytest.raises(TypeError, match=msg): + hash_pandas_object(obj) + + +def test_hash_keys(): + # Using different hash keys, should have + # different hashes for the same data. + # + # This only matters for object dtypes. + obj = Series(list("abc")) + + a = hash_pandas_object(obj, hash_key="9876543210123456") + b = hash_pandas_object(obj, hash_key="9876543210123465") + + assert (a != b).all() + + +def test_invalid_key(): + # This only matters for object dtypes. + msg = "key should be a 16-byte string encoded" + + with pytest.raises(ValueError, match=msg): + hash_pandas_object(Series(list("abc")), hash_key="foo") + + +def test_already_encoded(index): + # If already encoded, then ok. + obj = Series(list("abc")).str.encode("utf8") + _check_equal(obj, index=index) + + +def test_alternate_encoding(index): + obj = Series(list("abc")) + _check_equal(obj, index=index, encoding="ascii") + + +@pytest.mark.parametrize("l_exp", range(8)) +@pytest.mark.parametrize("l_add", [0, 1]) +def test_same_len_hash_collisions(l_exp, l_add): + length = 2 ** (l_exp + 8) + l_add + s = tm.rands_array(length, 2) + + result = hash_array(s, "utf8") + assert not result[0] == result[1] + + +def test_hash_collisions(): + # Hash collisions are bad. + # + # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726 + hashes = [ + "Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9", # noqa: E501 + "Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe", # noqa: E501 + ] + + # These should be different. + result1 = hash_array(np.asarray(hashes[0:1], dtype=object), "utf8") + expected1 = np.array([14963968704024874985], dtype=np.uint64) + tm.assert_numpy_array_equal(result1, expected1) + + result2 = hash_array(np.asarray(hashes[1:2], dtype=object), "utf8") + expected2 = np.array([16428432627716348016], dtype=np.uint64) + tm.assert_numpy_array_equal(result2, expected2) + + result = hash_array(np.asarray(hashes, dtype=object), "utf8") + tm.assert_numpy_array_equal(result, np.concatenate([expected1, expected2], axis=0)) + + +def test_hash_with_tuple(): + # GH#28969 array containing a tuple raises on call to arr.astype(str) + # apparently a numpy bug github.com/numpy/numpy/issues/9441 + + df = pd.DataFrame({"data": [tuple("1"), tuple("2")]}) + result = hash_pandas_object(df) + expected = pd.Series([10345501319357378243, 8331063931016360761], dtype=np.uint64) + tm.assert_series_equal(result, expected) + + df2 = pd.DataFrame({"data": [tuple([1]), tuple([2])]}) + result = hash_pandas_object(df2) + expected = pd.Series([9408946347443669104, 3278256261030523334], dtype=np.uint64) + tm.assert_series_equal(result, expected) + + # require that the elements of such tuples are themselves hashable + + df3 = pd.DataFrame({"data": [tuple([1, []]), tuple([2, {}])]}) + with pytest.raises(TypeError, match="unhashable type: 'list'"): + hash_pandas_object(df3) + + +def test_hash_object_none_key(): + # https://github.com/pandas-dev/pandas/issues/30887 + result = pd.util.hash_pandas_object(pd.Series(["a", "b"]), hash_key=None) + expected = pd.Series([4578374827886788867, 17338122309987883691], dtype="uint64") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/util/test_safe_import.py b/pandas/tests/util/test_safe_import.py new file mode 100644 index 00000000..bd07bea9 --- /dev/null +++ b/pandas/tests/util/test_safe_import.py @@ -0,0 +1,39 @@ +import sys +import types + +import pytest + +import pandas.util._test_decorators as td + + +@pytest.mark.parametrize("name", ["foo", "hello123"]) +def test_safe_import_non_existent(name): + assert not td.safe_import(name) + + +def test_safe_import_exists(): + assert td.safe_import("pandas") + + +@pytest.mark.parametrize("min_version,valid", [("0.0.0", True), ("99.99.99", False)]) +def test_safe_import_versions(min_version, valid): + result = td.safe_import("pandas", min_version=min_version) + result = result if valid else not result + assert result + + +@pytest.mark.parametrize( + "min_version,valid", [(None, False), ("1.0", True), ("2.0", False)] +) +def test_safe_import_dummy(monkeypatch, min_version, valid): + mod_name = "hello123" + + mod = types.ModuleType(mod_name) + mod.__version__ = "1.5" + + if min_version is not None: + monkeypatch.setitem(sys.modules, mod_name, mod) + + result = td.safe_import(mod_name, min_version=min_version) + result = result if valid else not result + assert result diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py new file mode 100644 index 00000000..c9dbcf47 --- /dev/null +++ b/pandas/tests/util/test_util.py @@ -0,0 +1,78 @@ +import os + +import pytest + +import pandas.compat as compat + +import pandas._testing as tm + + +def test_rands(): + r = tm.rands(10) + assert len(r) == 10 + + +def test_rands_array_1d(): + arr = tm.rands_array(5, size=10) + assert arr.shape == (10,) + assert len(arr[0]) == 5 + + +def test_rands_array_2d(): + arr = tm.rands_array(7, size=(10, 10)) + assert arr.shape == (10, 10) + assert len(arr[1, 1]) == 7 + + +def test_numpy_err_state_is_default(): + expected = {"over": "warn", "divide": "warn", "invalid": "warn", "under": "ignore"} + import numpy as np + + # The error state should be unchanged after that import. + assert np.geterr() == expected + + +def test_convert_rows_list_to_csv_str(): + rows_list = ["aaa", "bbb", "ccc"] + ret = tm.convert_rows_list_to_csv_str(rows_list) + + if compat.is_platform_windows(): + expected = "aaa\r\nbbb\r\nccc\r\n" + else: + expected = "aaa\nbbb\nccc\n" + + assert ret == expected + + +def test_create_temp_directory(): + with tm.ensure_clean_dir() as path: + assert os.path.exists(path) + assert os.path.isdir(path) + assert not os.path.exists(path) + + +@pytest.mark.parametrize("strict_data_files", [True, False]) +def test_datapath_missing(datapath): + with pytest.raises(ValueError, match="Could not find file"): + datapath("not_a_file") + + +def test_datapath(datapath): + args = ("io", "data", "csv", "iris.csv") + + result = datapath(*args) + expected = os.path.join(os.path.dirname(os.path.dirname(__file__)), *args) + + assert result == expected + + +def test_rng_context(): + import numpy as np + + expected0 = 1.764052345967664 + expected1 = 1.6243453636632417 + + with tm.RNGContext(0): + with tm.RNGContext(1): + assert np.random.randn() == expected1 + assert np.random.randn() == expected0 diff --git a/pandas/tests/util/test_validate_args.py b/pandas/tests/util/test_validate_args.py new file mode 100644 index 00000000..746d859b --- /dev/null +++ b/pandas/tests/util/test_validate_args.py @@ -0,0 +1,67 @@ +import pytest + +from pandas.util._validators import validate_args + +_fname = "func" + + +def test_bad_min_fname_arg_count(): + msg = "'max_fname_arg_count' must be non-negative" + + with pytest.raises(ValueError, match=msg): + validate_args(_fname, (None,), -1, "foo") + + +def test_bad_arg_length_max_value_single(): + args = (None, None) + compat_args = ("foo",) + + min_fname_arg_count = 0 + max_length = len(compat_args) + min_fname_arg_count + actual_length = len(args) + min_fname_arg_count + msg = ( + fr"{_fname}\(\) takes at most {max_length} " + fr"argument \({actual_length} given\)" + ) + + with pytest.raises(TypeError, match=msg): + validate_args(_fname, args, min_fname_arg_count, compat_args) + + +def test_bad_arg_length_max_value_multiple(): + args = (None, None) + compat_args = dict(foo=None) + + min_fname_arg_count = 2 + max_length = len(compat_args) + min_fname_arg_count + actual_length = len(args) + min_fname_arg_count + msg = ( + fr"{_fname}\(\) takes at most {max_length} " + fr"arguments \({actual_length} given\)" + ) + + with pytest.raises(TypeError, match=msg): + validate_args(_fname, args, min_fname_arg_count, compat_args) + + +@pytest.mark.parametrize("i", range(1, 3)) +def test_not_all_defaults(i): + bad_arg = "foo" + msg = ( + f"the '{bad_arg}' parameter is not supported " + fr"in the pandas implementation of {_fname}\(\)" + ) + + compat_args = {"foo": 2, "bar": -1, "baz": 3} + arg_vals = (1, -1, 3) + + with pytest.raises(ValueError, match=msg): + validate_args(_fname, arg_vals[:i], 2, compat_args) + + +def test_validation(): + # No exceptions should be raised. + validate_args(_fname, (None,), 2, dict(out=None)) + + compat_args = {"axis": 1, "out": None} + validate_args(_fname, (1, None), 2, compat_args) diff --git a/pandas/tests/util/test_validate_args_and_kwargs.py b/pandas/tests/util/test_validate_args_and_kwargs.py new file mode 100644 index 00000000..941ba86c --- /dev/null +++ b/pandas/tests/util/test_validate_args_and_kwargs.py @@ -0,0 +1,81 @@ +import pytest + +from pandas.util._validators import validate_args_and_kwargs + +_fname = "func" + + +def test_invalid_total_length_max_length_one(): + compat_args = ("foo",) + kwargs = {"foo": "FOO"} + args = ("FoO", "BaZ") + + min_fname_arg_count = 0 + max_length = len(compat_args) + min_fname_arg_count + actual_length = len(kwargs) + len(args) + min_fname_arg_count + + msg = ( + fr"{_fname}\(\) takes at most {max_length} " + fr"argument \({actual_length} given\)" + ) + + with pytest.raises(TypeError, match=msg): + validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) + + +def test_invalid_total_length_max_length_multiple(): + compat_args = ("foo", "bar", "baz") + kwargs = {"foo": "FOO", "bar": "BAR"} + args = ("FoO", "BaZ") + + min_fname_arg_count = 2 + max_length = len(compat_args) + min_fname_arg_count + actual_length = len(kwargs) + len(args) + min_fname_arg_count + + msg = ( + fr"{_fname}\(\) takes at most {max_length} " + fr"arguments \({actual_length} given\)" + ) + + with pytest.raises(TypeError, match=msg): + validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) + + +@pytest.mark.parametrize("args,kwargs", [((), {"foo": -5, "bar": 2}), ((-5, 2), {})]) +def test_missing_args_or_kwargs(args, kwargs): + bad_arg = "bar" + min_fname_arg_count = 2 + + compat_args = {"foo": -5, bad_arg: 1} + + msg = ( + fr"the '{bad_arg}' parameter is not supported " + fr"in the pandas implementation of {_fname}\(\)" + ) + + with pytest.raises(ValueError, match=msg): + validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) + + +def test_duplicate_argument(): + min_fname_arg_count = 2 + + compat_args = {"foo": None, "bar": None, "baz": None} + kwargs = {"foo": None, "bar": None} + args = (None,) # duplicate value for "foo" + + msg = fr"{_fname}\(\) got multiple values for keyword argument 'foo'" + + with pytest.raises(TypeError, match=msg): + validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) + + +def test_validation(): + # No exceptions should be raised. + compat_args = {"foo": 1, "bar": None, "baz": -2} + kwargs = {"baz": -2} + + args = (1, None) + min_fname_arg_count = 2 + + validate_args_and_kwargs(_fname, args, kwargs, min_fname_arg_count, compat_args) diff --git a/pandas/tests/util/test_validate_kwargs.py b/pandas/tests/util/test_validate_kwargs.py new file mode 100644 index 00000000..a7b6d8f9 --- /dev/null +++ b/pandas/tests/util/test_validate_kwargs.py @@ -0,0 +1,63 @@ +import pytest + +from pandas.util._validators import validate_bool_kwarg, validate_kwargs + +_fname = "func" + + +def test_bad_kwarg(): + good_arg = "f" + bad_arg = good_arg + "o" + + compat_args = {good_arg: "foo", bad_arg + "o": "bar"} + kwargs = {good_arg: "foo", bad_arg: "bar"} + + msg = fr"{_fname}\(\) got an unexpected keyword argument '{bad_arg}'" + + with pytest.raises(TypeError, match=msg): + validate_kwargs(_fname, kwargs, compat_args) + + +@pytest.mark.parametrize("i", range(1, 3)) +def test_not_all_none(i): + bad_arg = "foo" + msg = ( + fr"the '{bad_arg}' parameter is not supported " + fr"in the pandas implementation of {_fname}\(\)" + ) + + compat_args = {"foo": 1, "bar": "s", "baz": None} + + kwarg_keys = ("foo", "bar", "baz") + kwarg_vals = (2, "s", None) + + kwargs = dict(zip(kwarg_keys[:i], kwarg_vals[:i])) + + with pytest.raises(ValueError, match=msg): + validate_kwargs(_fname, kwargs, compat_args) + + +def test_validation(): + # No exceptions should be raised. + compat_args = {"f": None, "b": 1, "ba": "s"} + + kwargs = dict(f=None, b=1) + validate_kwargs(_fname, kwargs, compat_args) + + +@pytest.mark.parametrize("name", ["inplace", "copy"]) +@pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) +def test_validate_bool_kwarg_fail(name, value): + msg = ( + f'For argument "{name}" expected type bool,' + f" received type {type(value).__name__}" + ) + + with pytest.raises(ValueError, match=msg): + validate_bool_kwarg(value, name) + + +@pytest.mark.parametrize("name", ["inplace", "copy"]) +@pytest.mark.parametrize("value", [True, False, None]) +def test_validate_bool_kwarg(name, value): + assert validate_bool_kwarg(value, name) == value diff --git a/pandas/tests/window/__init__.py b/pandas/tests/window/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tests/window/common.py b/pandas/tests/window/common.py new file mode 100644 index 00000000..6aeada31 --- /dev/null +++ b/pandas/tests/window/common.py @@ -0,0 +1,386 @@ +from datetime import datetime + +import numpy as np +from numpy.random import randn + +from pandas import DataFrame, Series, bdate_range, notna +import pandas._testing as tm + +N, K = 100, 10 + + +class Base: + + _nan_locs = np.arange(20, 40) + _inf_locs = np.array([]) + + def _create_data(self): + arr = randn(N) + arr[self._nan_locs] = np.NaN + + self.arr = arr + self.rng = bdate_range(datetime(2009, 1, 1), periods=N) + self.series = Series(arr.copy(), index=self.rng) + self.frame = DataFrame(randn(N, K), index=self.rng, columns=np.arange(K)) + + +# create the data only once as we are not setting it +def _create_consistency_data(): + def create_series(): + return [ + Series(dtype=object), + Series([np.nan]), + Series([np.nan, np.nan]), + Series([3.0]), + Series([np.nan, 3.0]), + Series([3.0, np.nan]), + Series([1.0, 3.0]), + Series([2.0, 2.0]), + Series([3.0, 1.0]), + Series( + [5.0, 5.0, 5.0, 5.0, np.nan, np.nan, np.nan, 5.0, 5.0, np.nan, np.nan] + ), + Series( + [ + np.nan, + 5.0, + 5.0, + 5.0, + np.nan, + np.nan, + np.nan, + 5.0, + 5.0, + np.nan, + np.nan, + ] + ), + Series( + [ + np.nan, + np.nan, + 5.0, + 5.0, + np.nan, + np.nan, + np.nan, + 5.0, + 5.0, + np.nan, + np.nan, + ] + ), + Series( + [ + np.nan, + 3.0, + np.nan, + 3.0, + 4.0, + 5.0, + 6.0, + np.nan, + np.nan, + 7.0, + 12.0, + 13.0, + 14.0, + 15.0, + ] + ), + Series( + [ + np.nan, + 5.0, + np.nan, + 2.0, + 4.0, + 0.0, + 9.0, + np.nan, + np.nan, + 3.0, + 12.0, + 13.0, + 14.0, + 15.0, + ] + ), + Series( + [ + 2.0, + 3.0, + np.nan, + 3.0, + 4.0, + 5.0, + 6.0, + np.nan, + np.nan, + 7.0, + 12.0, + 13.0, + 14.0, + 15.0, + ] + ), + Series( + [ + 2.0, + 5.0, + np.nan, + 2.0, + 4.0, + 0.0, + 9.0, + np.nan, + np.nan, + 3.0, + 12.0, + 13.0, + 14.0, + 15.0, + ] + ), + Series(range(10)), + Series(range(20, 0, -2)), + ] + + def create_dataframes(): + return [ + DataFrame(), + DataFrame(columns=["a"]), + DataFrame(columns=["a", "a"]), + DataFrame(columns=["a", "b"]), + DataFrame(np.arange(10).reshape((5, 2))), + DataFrame(np.arange(25).reshape((5, 5))), + DataFrame(np.arange(25).reshape((5, 5)), columns=["a", "b", 99, "d", "d"]), + ] + [DataFrame(s) for s in create_series()] + + def is_constant(x): + values = x.values.ravel() + return len(set(values[notna(values)])) == 1 + + def no_nans(x): + return x.notna().all().all() + + # data is a tuple(object, is_constant, no_nans) + data = create_series() + create_dataframes() + + return [(x, is_constant(x), no_nans(x)) for x in data] + + +_consistency_data = _create_consistency_data() + + +class ConsistencyBase(Base): + base_functions = [ + (lambda v: Series(v).count(), None, "count"), + (lambda v: Series(v).max(), None, "max"), + (lambda v: Series(v).min(), None, "min"), + (lambda v: Series(v).sum(), None, "sum"), + (lambda v: Series(v).mean(), None, "mean"), + (lambda v: Series(v).std(), 1, "std"), + (lambda v: Series(v).cov(Series(v)), None, "cov"), + (lambda v: Series(v).corr(Series(v)), None, "corr"), + (lambda v: Series(v).var(), 1, "var"), + # restore once GH 8086 is fixed + # lambda v: Series(v).skew(), 3, 'skew'), + # (lambda v: Series(v).kurt(), 4, 'kurt'), + # restore once GH 8084 is fixed + # lambda v: Series(v).quantile(0.3), None, 'quantile'), + (lambda v: Series(v).median(), None, "median"), + (np.nanmax, 1, "max"), + (np.nanmin, 1, "min"), + (np.nansum, 1, "sum"), + (np.nanmean, 1, "mean"), + (lambda v: np.nanstd(v, ddof=1), 1, "std"), + (lambda v: np.nanvar(v, ddof=1), 1, "var"), + (np.nanmedian, 1, "median"), + ] + no_nan_functions = [ + (np.max, None, "max"), + (np.min, None, "min"), + (np.sum, None, "sum"), + (np.mean, None, "mean"), + (lambda v: np.std(v, ddof=1), 1, "std"), + (lambda v: np.var(v, ddof=1), 1, "var"), + (np.median, None, "median"), + ] + + def _create_data(self): + super()._create_data() + self.data = _consistency_data + + def _test_moments_consistency_mock_mean(self, mean, mock_mean): + for (x, is_constant, no_nans) in self.data: + mean_x = mean(x) + # check that correlation of a series with itself is either 1 or NaN + + if mock_mean: + # check that mean equals mock_mean + expected = mock_mean(x) + tm.assert_equal(mean_x, expected.astype("float64")) + + def _test_moments_consistency_is_constant(self, min_periods, count, mean, corr): + for (x, is_constant, no_nans) in self.data: + count_x = count(x) + mean_x = mean(x) + # check that correlation of a series with itself is either 1 or NaN + corr_x_x = corr(x, x) + + if is_constant: + exp = x.max() if isinstance(x, Series) else x.max().max() + + # check mean of constant series + expected = x * np.nan + expected[count_x >= max(min_periods, 1)] = exp + tm.assert_equal(mean_x, expected) + + # check correlation of constant series with itself is NaN + expected[:] = np.nan + tm.assert_equal(corr_x_x, expected) + + def _test_moments_consistency_var_debiasing_factors( + self, var_biased=None, var_unbiased=None, var_debiasing_factors=None + ): + for (x, is_constant, no_nans) in self.data: + if var_unbiased and var_biased and var_debiasing_factors: + # check variance debiasing factors + var_unbiased_x = var_unbiased(x) + var_biased_x = var_biased(x) + var_debiasing_factors_x = var_debiasing_factors(x) + tm.assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) + + def _test_moments_consistency( + self, + min_periods, + count, + mean, + corr, + var_unbiased=None, + std_unbiased=None, + cov_unbiased=None, + var_biased=None, + std_biased=None, + cov_biased=None, + ): + + for (x, is_constant, no_nans) in self.data: + count_x = count(x) + mean_x = mean(x) + + for (std, var, cov) in [ + (std_biased, var_biased, cov_biased), + (std_unbiased, var_unbiased, cov_unbiased), + ]: + + # check that var(x), std(x), and cov(x) are all >= 0 + var_x = var(x) + std_x = std(x) + assert not (var_x < 0).any().any() + assert not (std_x < 0).any().any() + if cov: + cov_x_x = cov(x, x) + assert not (cov_x_x < 0).any().any() + + # check that var(x) == cov(x, x) + tm.assert_equal(var_x, cov_x_x) + + # check that var(x) == std(x)^2 + tm.assert_equal(var_x, std_x * std_x) + + if var is var_biased: + # check that biased var(x) == mean(x^2) - mean(x)^2 + mean_x2 = mean(x * x) + tm.assert_equal(var_x, mean_x2 - (mean_x * mean_x)) + + if is_constant: + # check that variance of constant series is identically 0 + assert not (var_x > 0).any().any() + expected = x * np.nan + expected[count_x >= max(min_periods, 1)] = 0.0 + if var is var_unbiased: + expected[count_x < 2] = np.nan + tm.assert_equal(var_x, expected) + + if isinstance(x, Series): + for (y, is_constant, no_nans) in self.data: + if not x.isna().equals(y.isna()): + # can only easily test two Series with similar + # structure + continue + + # check that cor(x, y) is symmetric + corr_x_y = corr(x, y) + corr_y_x = corr(y, x) + tm.assert_equal(corr_x_y, corr_y_x) + + if cov: + # check that cov(x, y) is symmetric + cov_x_y = cov(x, y) + cov_y_x = cov(y, x) + tm.assert_equal(cov_x_y, cov_y_x) + + # check that cov(x, y) == (var(x+y) - var(x) - + # var(y)) / 2 + var_x_plus_y = var(x + y) + var_y = var(y) + tm.assert_equal( + cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y) + ) + + # check that corr(x, y) == cov(x, y) / (std(x) * + # std(y)) + std_y = std(y) + tm.assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) + + if cov is cov_biased: + # check that biased cov(x, y) == mean(x*y) - + # mean(x)*mean(y) + mean_y = mean(y) + mean_x_times_y = mean(x * y) + tm.assert_equal( + cov_x_y, mean_x_times_y - (mean_x * mean_y) + ) + + def _check_pairwise_moment(self, dispatch, name, **kwargs): + def get_result(obj, obj2=None): + return getattr(getattr(obj, dispatch)(**kwargs), name)(obj2) + + result = get_result(self.frame) + result = result.loc[(slice(None), 1), 5] + result.index = result.index.droplevel(1) + expected = get_result(self.frame[1], self.frame[5]) + tm.assert_series_equal(result, expected, check_names=False) + + +def ew_func(A, B, com, name, **kwargs): + return getattr(A.ewm(com, **kwargs), name)(B) + + +def check_binary_ew(name, A, B): + + result = ew_func(A=A, B=B, com=20, name=name, min_periods=5) + assert np.isnan(result.values[:14]).all() + assert not np.isnan(result.values[14:]).any() + + +def check_binary_ew_min_periods(name, min_periods, A, B): + # GH 7898 + result = ew_func(A, B, 20, name=name, min_periods=min_periods) + # binary functions (ewmcov, ewmcorr) with bias=False require at + # least two values + assert np.isnan(result.values[:11]).all() + assert not np.isnan(result.values[11:]).any() + + # check series of length 0 + empty = Series([], dtype=np.float64) + result = ew_func(empty, empty, 50, name=name, min_periods=min_periods) + tm.assert_series_equal(result, empty) + + # check series of length 1 + result = ew_func( + Series([1.0]), Series([1.0]), 50, name=name, min_periods=min_periods + ) + tm.assert_series_equal(result, Series([np.NaN])) diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py new file mode 100644 index 00000000..fb46ca51 --- /dev/null +++ b/pandas/tests/window/conftest.py @@ -0,0 +1,89 @@ +import pytest + +import pandas.util._test_decorators as td + + +@pytest.fixture(params=[True, False]) +def raw(request): + return request.param + + +@pytest.fixture( + params=[ + "triang", + "blackman", + "hamming", + "bartlett", + "bohman", + "blackmanharris", + "nuttall", + "barthann", + ] +) +def win_types(request): + return request.param + + +@pytest.fixture(params=["kaiser", "gaussian", "general_gaussian", "exponential"]) +def win_types_special(request): + return request.param + + +@pytest.fixture( + params=["sum", "mean", "median", "max", "min", "var", "std", "kurt", "skew"] +) +def arithmetic_win_operators(request): + return request.param + + +@pytest.fixture(params=["right", "left", "both", "neither"]) +def closed(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def center(request): + return request.param + + +@pytest.fixture(params=[None, 1]) +def min_periods(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def parallel(request): + """parallel keyword argument for numba.jit""" + return request.param + + +@pytest.fixture(params=[True, False]) +def nogil(request): + """nogil keyword argument for numba.jit""" + return request.param + + +@pytest.fixture(params=[True, False]) +def nopython(request): + """nopython keyword argument for numba.jit""" + return request.param + + +@pytest.fixture( + params=[pytest.param("numba", marks=td.skip_if_no("numba", "0.46.0")), "cython"] +) +def engine(request): + """engine keyword argument for rolling.apply""" + return request.param + + +@pytest.fixture( + params=[ + pytest.param(("numba", True), marks=td.skip_if_no("numba", "0.46.0")), + ("cython", True), + ("cython", False), + ] +) +def engine_and_raw(request): + """engine and raw keyword arguments for rolling.apply""" + return request.param diff --git a/pandas/tests/window/moments/conftest.py b/pandas/tests/window/moments/conftest.py new file mode 100644 index 00000000..2002f4d0 --- /dev/null +++ b/pandas/tests/window/moments/conftest.py @@ -0,0 +1,20 @@ +import numpy as np +from numpy.random import randn +import pytest + +from pandas import Series + + +@pytest.fixture +def binary_ew_data(): + A = Series(randn(50), index=np.arange(50)) + B = A[2:] + randn(48) + + A[:10] = np.NaN + B[-10:] = np.NaN + return A, B + + +@pytest.fixture(params=[0, 1, 2]) +def min_periods(request): + return request.param diff --git a/pandas/tests/window/moments/test_moments_ewm.py b/pandas/tests/window/moments/test_moments_ewm.py new file mode 100644 index 00000000..59976125 --- /dev/null +++ b/pandas/tests/window/moments/test_moments_ewm.py @@ -0,0 +1,439 @@ +import numpy as np +from numpy.random import randn +import pytest + +import pandas as pd +from pandas import DataFrame, Series, concat +import pandas._testing as tm +from pandas.tests.window.common import ( + Base, + ConsistencyBase, + check_binary_ew, + check_binary_ew_min_periods, + ew_func, +) + + +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") +class TestMoments(Base): + def setup_method(self, method): + self._create_data() + + def test_ewma(self): + self._check_ew(name="mean") + + vals = pd.Series(np.zeros(1000)) + vals[5] = 1 + result = vals.ewm(span=100, adjust=False).mean().sum() + assert np.abs(result - 1) < 1e-2 + + @pytest.mark.parametrize("adjust", [True, False]) + @pytest.mark.parametrize("ignore_na", [True, False]) + def test_ewma_cases(self, adjust, ignore_na): + # try adjust/ignore_na args matrix + + s = Series([1.0, 2.0, 4.0, 8.0]) + + if adjust: + expected = Series([1.0, 1.6, 2.736842, 4.923077]) + else: + expected = Series([1.0, 1.333333, 2.222222, 4.148148]) + + result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() + tm.assert_series_equal(result, expected) + + def test_ewma_nan_handling(self): + s = Series([1.0] + [np.nan] * 5 + [1.0]) + result = s.ewm(com=5).mean() + tm.assert_series_equal(result, Series([1.0] * len(s))) + + s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0]) + result = s.ewm(com=5).mean() + tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4)) + + # GH 7603 + s0 = Series([np.nan, 1.0, 101.0]) + s1 = Series([1.0, np.nan, 101.0]) + s2 = Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]) + s3 = Series([1.0, np.nan, 101.0, 50.0]) + com = 2.0 + alpha = 1.0 / (1.0 + com) + + def simple_wma(s, w): + return (s.multiply(w).cumsum() / w.cumsum()).fillna(method="ffill") + + for (s, adjust, ignore_na, w) in [ + (s0, True, False, [np.nan, (1.0 - alpha), 1.0]), + (s0, True, True, [np.nan, (1.0 - alpha), 1.0]), + (s0, False, False, [np.nan, (1.0 - alpha), alpha]), + (s0, False, True, [np.nan, (1.0 - alpha), alpha]), + (s1, True, False, [(1.0 - alpha) ** 2, np.nan, 1.0]), + (s1, True, True, [(1.0 - alpha), np.nan, 1.0]), + (s1, False, False, [(1.0 - alpha) ** 2, np.nan, alpha]), + (s1, False, True, [(1.0 - alpha), np.nan, alpha]), + ( + s2, + True, + False, + [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, 1.0, np.nan], + ), + (s2, True, True, [np.nan, (1.0 - alpha), np.nan, np.nan, 1.0, np.nan]), + ( + s2, + False, + False, + [np.nan, (1.0 - alpha) ** 3, np.nan, np.nan, alpha, np.nan], + ), + (s2, False, True, [np.nan, (1.0 - alpha), np.nan, np.nan, alpha, np.nan]), + (s3, True, False, [(1.0 - alpha) ** 3, np.nan, (1.0 - alpha), 1.0]), + (s3, True, True, [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha), 1.0]), + ( + s3, + False, + False, + [ + (1.0 - alpha) ** 3, + np.nan, + (1.0 - alpha) * alpha, + alpha * ((1.0 - alpha) ** 2 + alpha), + ], + ), + ( + s3, + False, + True, + [(1.0 - alpha) ** 2, np.nan, (1.0 - alpha) * alpha, alpha], + ), + ]: + expected = simple_wma(s, Series(w)) + result = s.ewm(com=com, adjust=adjust, ignore_na=ignore_na).mean() + + tm.assert_series_equal(result, expected) + if ignore_na is False: + # check that ignore_na defaults to False + result = s.ewm(com=com, adjust=adjust).mean() + tm.assert_series_equal(result, expected) + + def test_ewmvar(self): + self._check_ew(name="var") + + def test_ewmvol(self): + self._check_ew(name="vol") + + def test_ewma_span_com_args(self): + A = self.series.ewm(com=9.5).mean() + B = self.series.ewm(span=20).mean() + tm.assert_almost_equal(A, B) + + with pytest.raises(ValueError): + self.series.ewm(com=9.5, span=20) + with pytest.raises(ValueError): + self.series.ewm().mean() + + def test_ewma_halflife_arg(self): + A = self.series.ewm(com=13.932726172912965).mean() + B = self.series.ewm(halflife=10.0).mean() + tm.assert_almost_equal(A, B) + + with pytest.raises(ValueError): + self.series.ewm(span=20, halflife=50) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, halflife=50) + with pytest.raises(ValueError): + self.series.ewm(com=9.5, span=20, halflife=50) + with pytest.raises(ValueError): + self.series.ewm() + + def test_ewm_alpha(self): + # GH 10789 + s = Series(self.arr) + a = s.ewm(alpha=0.61722699889169674).mean() + b = s.ewm(com=0.62014947789973052).mean() + c = s.ewm(span=2.240298955799461).mean() + d = s.ewm(halflife=0.721792864318).mean() + tm.assert_series_equal(a, b) + tm.assert_series_equal(a, c) + tm.assert_series_equal(a, d) + + def test_ewm_alpha_arg(self): + # GH 10789 + s = self.series + with pytest.raises(ValueError): + s.ewm() + with pytest.raises(ValueError): + s.ewm(com=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(span=10.0, alpha=0.5) + with pytest.raises(ValueError): + s.ewm(halflife=10.0, alpha=0.5) + + def test_ewm_domain_checks(self): + # GH 12492 + s = Series(self.arr) + msg = "comass must satisfy: comass >= 0" + with pytest.raises(ValueError, match=msg): + s.ewm(com=-0.1) + s.ewm(com=0.0) + s.ewm(com=0.1) + + msg = "span must satisfy: span >= 1" + with pytest.raises(ValueError, match=msg): + s.ewm(span=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(span=0.0) + with pytest.raises(ValueError, match=msg): + s.ewm(span=0.9) + s.ewm(span=1.0) + s.ewm(span=1.1) + + msg = "halflife must satisfy: halflife > 0" + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=0.0) + s.ewm(halflife=0.1) + + msg = "alpha must satisfy: 0 < alpha <= 1" + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=0.0) + s.ewm(alpha=0.1) + s.ewm(alpha=1.0) + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=1.1) + + @pytest.mark.parametrize("method", ["mean", "vol", "var"]) + def test_ew_empty_series(self, method): + vals = pd.Series([], dtype=np.float64) + + ewm = vals.ewm(3) + result = getattr(ewm, method)() + tm.assert_almost_equal(result, vals) + + def _check_ew(self, name=None, preserve_nan=False): + series_result = getattr(self.series.ewm(com=10), name)() + assert isinstance(series_result, Series) + + frame_result = getattr(self.frame.ewm(com=10), name)() + assert type(frame_result) == DataFrame + + result = getattr(self.series.ewm(com=10), name)() + if preserve_nan: + assert result[self._nan_locs].isna().all() + + @pytest.mark.parametrize("min_periods", [0, 1]) + @pytest.mark.parametrize("name", ["mean", "var", "vol"]) + def test_ew_min_periods(self, min_periods, name): + # excluding NaNs correctly + arr = randn(50) + arr[:10] = np.NaN + arr[-10:] = np.NaN + s = Series(arr) + + # check min_periods + # GH 7898 + result = getattr(s.ewm(com=50, min_periods=2), name)() + assert result[:11].isna().all() + assert not result[11:].isna().any() + + result = getattr(s.ewm(com=50, min_periods=min_periods), name)() + if name == "mean": + assert result[:10].isna().all() + assert not result[10:].isna().any() + else: + # ewm.std, ewm.vol, ewm.var (with bias=False) require at least + # two values + assert result[:11].isna().all() + assert not result[11:].isna().any() + + # check series of length 0 + result = getattr( + Series(dtype=object).ewm(com=50, min_periods=min_periods), name + )() + tm.assert_series_equal(result, Series(dtype="float64")) + + # check series of length 1 + result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)() + if name == "mean": + tm.assert_series_equal(result, Series([1.0])) + else: + # ewm.std, ewm.vol, ewm.var with bias=False require at least + # two values + tm.assert_series_equal(result, Series([np.NaN])) + + # pass in ints + result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() + assert result2.dtype == np.float_ + + +class TestEwmMomentsConsistency(ConsistencyBase): + def setup_method(self, method): + self._create_data() + + def test_ewmcov_pairwise(self): + self._check_pairwise_moment("ewm", "cov", span=10, min_periods=5) + + @pytest.mark.parametrize("name", ["cov", "corr"]) + def test_ewm_corr_cov(self, name, min_periods, binary_ew_data): + A, B = binary_ew_data + + check_binary_ew(name="corr", A=A, B=B) + check_binary_ew_min_periods("corr", min_periods, A, B) + + def test_ewmcorr_pairwise(self): + self._check_pairwise_moment("ewm", "corr", span=10, min_periods=5) + + @pytest.mark.parametrize("name", ["cov", "corr"]) + def test_different_input_array_raise_exception(self, name, binary_ew_data): + + A, _ = binary_ew_data + msg = "Input arrays must be of the same type!" + # exception raised is Exception + with pytest.raises(Exception, match=msg): + ew_func(A, randn(50), 20, name=name, min_periods=5) + + @pytest.mark.slow + @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) + @pytest.mark.parametrize("adjust", [True, False]) + @pytest.mark.parametrize("ignore_na", [True, False]) + def test_ewm_consistency(self, min_periods, adjust, ignore_na): + def _weights(s, com, adjust, ignore_na): + if isinstance(s, DataFrame): + if not len(s.columns): + return DataFrame(index=s.index, columns=s.columns) + w = concat( + [ + _weights( + s.iloc[:, i], com=com, adjust=adjust, ignore_na=ignore_na + ) + for i, _ in enumerate(s.columns) + ], + axis=1, + ) + w.index = s.index + w.columns = s.columns + return w + + w = Series(np.nan, index=s.index) + alpha = 1.0 / (1.0 + com) + if ignore_na: + w[s.notna()] = _weights( + s[s.notna()], com=com, adjust=adjust, ignore_na=False + ) + elif adjust: + for i in range(len(s)): + if s.iat[i] == s.iat[i]: + w.iat[i] = pow(1.0 / (1.0 - alpha), i) + else: + sum_wts = 0.0 + prev_i = -1 + for i in range(len(s)): + if s.iat[i] == s.iat[i]: + if prev_i == -1: + w.iat[i] = 1.0 + else: + w.iat[i] = alpha * sum_wts / pow(1.0 - alpha, i - prev_i) + sum_wts += w.iat[i] + prev_i = i + return w + + def _variance_debiasing_factors(s, com, adjust, ignore_na): + weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na) + cum_sum = weights.cumsum().fillna(method="ffill") + cum_sum_sq = (weights * weights).cumsum().fillna(method="ffill") + numerator = cum_sum * cum_sum + denominator = numerator - cum_sum_sq + denominator[denominator <= 0.0] = np.nan + return numerator / denominator + + def _ewma(s, com, min_periods, adjust, ignore_na): + weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na) + result = ( + s.multiply(weights) + .cumsum() + .divide(weights.cumsum()) + .fillna(method="ffill") + ) + result[ + s.expanding().count() < (max(min_periods, 1) if min_periods else 1) + ] = np.nan + return result + + com = 3.0 + self._test_moments_consistency_mock_mean( + mean=lambda x: x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean(), + mock_mean=lambda x: _ewma( + x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ), + ) + + self._test_moments_consistency_is_constant( + min_periods=min_periods, + count=lambda x: x.expanding().count(), + mean=lambda x: x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean(), + corr=lambda x, y: x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).corr(y), + ) + + self._test_moments_consistency_var_debiasing_factors( + var_unbiased=lambda x: ( + x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=False) + ), + var_biased=lambda x: ( + x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=True) + ), + var_debiasing_factors=lambda x: ( + _variance_debiasing_factors( + x, com=com, adjust=adjust, ignore_na=ignore_na + ) + ), + ) + # test consistency between different ewm* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.expanding().count(), + mean=lambda x: x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).mean(), + corr=lambda x, y: x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).corr(y), + var_unbiased=lambda x: ( + x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=False) + ), + std_unbiased=lambda x: ( + x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=False) + ), + cov_unbiased=lambda x, y: ( + x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).cov(y, bias=False) + ), + var_biased=lambda x: ( + x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).var(bias=True) + ), + std_biased=lambda x: x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).std(bias=True), + cov_biased=lambda x, y: ( + x.ewm( + com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na + ).cov(y, bias=True) + ), + ) diff --git a/pandas/tests/window/moments/test_moments_expanding.py b/pandas/tests/window/moments/test_moments_expanding.py new file mode 100644 index 00000000..983aa305 --- /dev/null +++ b/pandas/tests/window/moments/test_moments_expanding.py @@ -0,0 +1,409 @@ +import warnings + +import numpy as np +from numpy.random import randn +import pytest + +from pandas import DataFrame, Index, MultiIndex, Series, isna, notna +import pandas._testing as tm +from pandas.tests.window.common import ConsistencyBase + + +class TestExpandingMomentsConsistency(ConsistencyBase): + def setup_method(self, method): + self._create_data() + + def test_expanding_apply_args_kwargs(self, raw): + def mean_w_arg(x, const): + return np.mean(x) + const + + df = DataFrame(np.random.rand(20, 3)) + + expected = df.expanding().apply(np.mean, raw=raw) + 20.0 + + result = df.expanding().apply(mean_w_arg, raw=raw, args=(20,)) + tm.assert_frame_equal(result, expected) + + result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20}) + tm.assert_frame_equal(result, expected) + + def test_expanding_corr(self): + A = self.series.dropna() + B = (A + randn(len(A)))[:-5] + + result = A.expanding().corr(B) + + rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) + + tm.assert_almost_equal(rolling_result, result) + + def test_expanding_count(self): + result = self.series.expanding(min_periods=0).count() + tm.assert_almost_equal( + result, self.series.rolling(window=len(self.series), min_periods=0).count() + ) + + def test_expanding_quantile(self): + result = self.series.expanding().quantile(0.5) + + rolling_result = self.series.rolling( + window=len(self.series), min_periods=1 + ).quantile(0.5) + + tm.assert_almost_equal(result, rolling_result) + + def test_expanding_cov(self): + A = self.series + B = (A + randn(len(A)))[:-5] + + result = A.expanding().cov(B) + + rolling_result = A.rolling(window=len(A), min_periods=1).cov(B) + + tm.assert_almost_equal(rolling_result, result) + + def test_expanding_cov_pairwise(self): + result = self.frame.expanding().corr() + + rolling_result = self.frame.rolling( + window=len(self.frame), min_periods=1 + ).corr() + + tm.assert_frame_equal(result, rolling_result) + + def test_expanding_corr_pairwise(self): + result = self.frame.expanding().corr() + + rolling_result = self.frame.rolling( + window=len(self.frame), min_periods=1 + ).corr() + tm.assert_frame_equal(result, rolling_result) + + def test_expanding_cov_diff_index(self): + # GH 7512 + s1 = Series([1, 2, 3], index=[0, 1, 2]) + s2 = Series([1, 3], index=[0, 2]) + result = s1.expanding().cov(s2) + expected = Series([None, None, 2.0]) + tm.assert_series_equal(result, expected) + + s2a = Series([1, None, 3], index=[0, 1, 2]) + result = s1.expanding().cov(s2a) + tm.assert_series_equal(result, expected) + + s1 = Series([7, 8, 10], index=[0, 1, 3]) + s2 = Series([7, 9, 10], index=[0, 2, 3]) + result = s1.expanding().cov(s2) + expected = Series([None, None, None, 4.5]) + tm.assert_series_equal(result, expected) + + def test_expanding_corr_diff_index(self): + # GH 7512 + s1 = Series([1, 2, 3], index=[0, 1, 2]) + s2 = Series([1, 3], index=[0, 2]) + result = s1.expanding().corr(s2) + expected = Series([None, None, 1.0]) + tm.assert_series_equal(result, expected) + + s2a = Series([1, None, 3], index=[0, 1, 2]) + result = s1.expanding().corr(s2a) + tm.assert_series_equal(result, expected) + + s1 = Series([7, 8, 10], index=[0, 1, 3]) + s2 = Series([7, 9, 10], index=[0, 2, 3]) + result = s1.expanding().corr(s2) + expected = Series([None, None, None, 1.0]) + tm.assert_series_equal(result, expected) + + def test_expanding_cov_pairwise_diff_length(self): + # GH 7512 + df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo")) + df1a = DataFrame( + [[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo") + ) + df2 = DataFrame( + [[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo") + ) + df2a = DataFrame( + [[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo") + ) + # TODO: xref gh-15826 + # .loc is not preserving the names + result1 = df1.expanding().cov(df2, pairwise=True).loc[2] + result2 = df1.expanding().cov(df2a, pairwise=True).loc[2] + result3 = df1a.expanding().cov(df2, pairwise=True).loc[2] + result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2] + expected = DataFrame( + [[-3.0, -6.0], [-5.0, -10.0]], + columns=Index(["A", "B"], name="foo"), + index=Index(["X", "Y"], name="foo"), + ) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected) + tm.assert_frame_equal(result4, expected) + + def test_expanding_corr_pairwise_diff_length(self): + # GH 7512 + df1 = DataFrame( + [[1, 2], [3, 2], [3, 4]], + columns=["A", "B"], + index=Index(range(3), name="bar"), + ) + df1a = DataFrame( + [[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"] + ) + df2 = DataFrame( + [[5, 6], [None, None], [2, 1]], + columns=["X", "Y"], + index=Index(range(3), name="bar"), + ) + df2a = DataFrame( + [[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"] + ) + result1 = df1.expanding().corr(df2, pairwise=True).loc[2] + result2 = df1.expanding().corr(df2a, pairwise=True).loc[2] + result3 = df1a.expanding().corr(df2, pairwise=True).loc[2] + result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2] + expected = DataFrame( + [[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"]) + ) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected) + tm.assert_frame_equal(result4, expected) + + @pytest.mark.parametrize("has_min_periods", [True, False]) + @pytest.mark.parametrize( + "func,static_comp", + [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)], + ids=["sum", "mean", "max", "min"], + ) + def test_expanding_func(self, func, static_comp, has_min_periods): + def expanding_func(x, min_periods=1, center=False, axis=0): + exp = x.expanding(min_periods=min_periods, center=center, axis=axis) + return getattr(exp, func)() + + self._check_expanding(expanding_func, static_comp, preserve_nan=False) + self._check_expanding_has_min_periods( + expanding_func, static_comp, has_min_periods + ) + + @pytest.mark.parametrize("has_min_periods", [True, False]) + def test_expanding_apply(self, raw, has_min_periods): + def expanding_mean(x, min_periods=1): + + exp = x.expanding(min_periods=min_periods) + result = exp.apply(lambda x: x.mean(), raw=raw) + return result + + # TODO(jreback), needed to add preserve_nan=False + # here to make this pass + self._check_expanding(expanding_mean, np.mean, preserve_nan=False) + self._check_expanding_has_min_periods(expanding_mean, np.mean, has_min_periods) + + def test_expanding_apply_empty_series(self, raw): + ser = Series([], dtype=np.float64) + tm.assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean(), raw=raw)) + + def test_expanding_apply_min_periods_0(self, raw): + # GH 8080 + s = Series([None, None, None]) + result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw) + expected = Series([1.0, 2.0, 3.0]) + tm.assert_series_equal(result, expected) + + def _check_expanding(self, func, static_comp, preserve_nan=True): + + series_result = func(self.series) + assert isinstance(series_result, Series) + frame_result = func(self.frame) + assert isinstance(frame_result, DataFrame) + + result = func(self.series) + tm.assert_almost_equal(result[10], static_comp(self.series[:11])) + + if preserve_nan: + assert result.iloc[self._nan_locs].isna().all() + + def _check_expanding_has_min_periods(self, func, static_comp, has_min_periods): + ser = Series(randn(50)) + + if has_min_periods: + result = func(ser, min_periods=30) + assert result[:29].isna().all() + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) + + # min_periods is working correctly + result = func(ser, min_periods=15) + assert isna(result.iloc[13]) + assert notna(result.iloc[14]) + + ser2 = Series(randn(20)) + result = func(ser2, min_periods=5) + assert isna(result[3]) + assert notna(result[4]) + + # min_periods=0 + result0 = func(ser, min_periods=0) + result1 = func(ser, min_periods=1) + tm.assert_almost_equal(result0, result1) + else: + result = func(ser) + tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) + + @pytest.mark.parametrize( + "f", + [ + lambda x: x.expanding().count(), + lambda x: x.expanding(min_periods=5).cov(x, pairwise=False), + lambda x: x.expanding(min_periods=5).corr(x, pairwise=False), + lambda x: x.expanding(min_periods=5).max(), + lambda x: x.expanding(min_periods=5).min(), + lambda x: x.expanding(min_periods=5).sum(), + lambda x: x.expanding(min_periods=5).mean(), + lambda x: x.expanding(min_periods=5).std(), + lambda x: x.expanding(min_periods=5).var(), + lambda x: x.expanding(min_periods=5).skew(), + lambda x: x.expanding(min_periods=5).kurt(), + lambda x: x.expanding(min_periods=5).quantile(0.5), + lambda x: x.expanding(min_periods=5).median(), + lambda x: x.expanding(min_periods=5).apply(sum, raw=False), + lambda x: x.expanding(min_periods=5).apply(sum, raw=True), + ], + ) + def test_moment_functions_zero_length(self, f): + # GH 8056 + s = Series(dtype=np.float64) + s_expected = s + df1 = DataFrame() + df1_expected = df1 + df2 = DataFrame(columns=["a"]) + df2["a"] = df2["a"].astype("float64") + df2_expected = df2 + + s_result = f(s) + tm.assert_series_equal(s_result, s_expected) + + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) + + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) + + @pytest.mark.parametrize( + "f", + [ + lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)), + lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)), + ], + ) + def test_moment_functions_zero_length_pairwise(self, f): + + df1 = DataFrame() + df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) + df2["a"] = df2["a"].astype("float64") + + df1_expected = DataFrame( + index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) + ) + df2_expected = DataFrame( + index=MultiIndex.from_product( + [df2.index, df2.columns], names=["bar", "foo"] + ), + columns=Index(["a"], name="foo"), + dtype="float64", + ) + + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) + + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) + + @pytest.mark.slow + @pytest.mark.parametrize("min_periods", [0, 1, 2, 3, 4]) + def test_expanding_consistency(self, min_periods): + + # suppress warnings about empty slices, as we are deliberately testing + # with empty/0-length Series/DataFrames + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=".*(empty slice|0 for slice).*", + category=RuntimeWarning, + ) + + # test consistency between different expanding_* moments + self._test_moments_consistency_mock_mean( + mean=lambda x: x.expanding(min_periods=min_periods).mean(), + mock_mean=lambda x: x.expanding(min_periods=min_periods).sum() + / x.expanding().count(), + ) + + self._test_moments_consistency_is_constant( + min_periods=min_periods, + count=lambda x: x.expanding().count(), + mean=lambda x: x.expanding(min_periods=min_periods).mean(), + corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y), + ) + + self._test_moments_consistency_var_debiasing_factors( + var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), + var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0), + var_debiasing_factors=lambda x: ( + x.expanding().count() + / (x.expanding().count() - 1.0).replace(0.0, np.nan) + ), + ) + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.expanding(min_periods=min_periods).count(), + mean=lambda x: x.expanding(min_periods=min_periods).mean(), + corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y), + var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), + std_unbiased=lambda x: x.expanding(min_periods=min_periods).std(), + cov_unbiased=lambda x, y: x.expanding(min_periods=min_periods).cov(y), + var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0), + std_biased=lambda x: x.expanding(min_periods=min_periods).std(ddof=0), + cov_biased=lambda x, y: x.expanding(min_periods=min_periods).cov( + y, ddof=0 + ), + ) + + # test consistency between expanding_xyz() and either (a) + # expanding_apply of Series.xyz(), or (b) expanding_apply of + # np.nanxyz() + for (x, is_constant, no_nans) in self.data: + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + expanding_f = getattr(x.expanding(min_periods=min_periods), name) + + if ( + require_min_periods + and (min_periods is not None) + and (min_periods < require_min_periods) + ): + continue + + if name == "count": + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding(min_periods=0).apply( + func=f, raw=True + ) + else: + if name in ["cov", "corr"]: + expanding_f_result = expanding_f(pairwise=False) + else: + expanding_f_result = expanding_f() + expanding_apply_f_result = x.expanding( + min_periods=min_periods + ).apply(func=f, raw=True) + + # GH 9422 + if name in ["sum", "prod"]: + tm.assert_equal(expanding_f_result, expanding_apply_f_result) diff --git a/pandas/tests/window/moments/test_moments_rolling.py b/pandas/tests/window/moments/test_moments_rolling.py new file mode 100644 index 00000000..83e4ee25 --- /dev/null +++ b/pandas/tests/window/moments/test_moments_rolling.py @@ -0,0 +1,1529 @@ +import copy +from datetime import datetime +import warnings + +import numpy as np +from numpy.random import randn +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, Index, Series, isna, notna +import pandas._testing as tm +from pandas.core.window.common import _flex_binary_moment +from pandas.tests.window.common import Base, ConsistencyBase + +import pandas.tseries.offsets as offsets + + +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") +class TestMoments(Base): + def setup_method(self, method): + self._create_data() + + def test_centered_axis_validation(self): + + # ok + Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() + + # bad axis + with pytest.raises(ValueError): + Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() + + # ok ok + DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=0).mean() + DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=1).mean() + + # bad axis + with pytest.raises(ValueError): + (DataFrame(np.ones((10, 10))).rolling(window=3, center=True, axis=2).mean()) + + def test_rolling_sum(self, raw): + self._check_moment_func( + np.nansum, name="sum", zero_min_periods_equal=False, raw=raw + ) + + def test_rolling_count(self, raw): + counter = lambda x: np.isfinite(x).astype(float).sum() + self._check_moment_func( + counter, name="count", has_min_periods=False, fill_value=0, raw=raw + ) + + def test_rolling_mean(self, raw): + self._check_moment_func(np.mean, name="mean", raw=raw) + + @td.skip_if_no_scipy + def test_cmov_mean(self): + # GH 8238 + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] + ) + result = Series(vals).rolling(5, center=True).mean() + expected = Series( + [ + np.nan, + np.nan, + 9.962, + 11.27, + 11.564, + 12.516, + 12.818, + 12.952, + np.nan, + np.nan, + ] + ) + tm.assert_series_equal(expected, result) + + @td.skip_if_no_scipy + def test_cmov_window(self): + # GH 8238 + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] + ) + result = Series(vals).rolling(5, win_type="boxcar", center=True).mean() + expected = Series( + [ + np.nan, + np.nan, + 9.962, + 11.27, + 11.564, + 12.516, + 12.818, + 12.952, + np.nan, + np.nan, + ] + ) + tm.assert_series_equal(expected, result) + + @td.skip_if_no_scipy + def test_cmov_window_corner(self): + # GH 8238 + # all nan + vals = pd.Series([np.nan] * 10) + result = vals.rolling(5, center=True, win_type="boxcar").mean() + assert np.isnan(result).all() + + # empty + vals = pd.Series([], dtype=object) + result = vals.rolling(5, center=True, win_type="boxcar").mean() + assert len(result) == 0 + + # shorter than window + vals = pd.Series(np.random.randn(5)) + result = vals.rolling(10, win_type="boxcar").mean() + assert np.isnan(result).all() + assert len(result) == 5 + + @td.skip_if_no_scipy + @pytest.mark.parametrize( + "f,xp", + [ + ( + "mean", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [9.252, 9.392], + [8.644, 9.906], + [8.87, 10.208], + [6.81, 8.588], + [7.792, 8.644], + [9.05, 7.824], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ( + "std", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [3.789706, 4.068313], + [3.429232, 3.237411], + [3.589269, 3.220810], + [3.405195, 2.380655], + [3.281839, 2.369869], + [3.676846, 1.801799], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ( + "var", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [14.36187, 16.55117], + [11.75963, 10.48083], + [12.88285, 10.37362], + [11.59535, 5.66752], + [10.77047, 5.61628], + [13.51920, 3.24648], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ( + "sum", + [ + [np.nan, np.nan], + [np.nan, np.nan], + [46.26, 46.96], + [43.22, 49.53], + [44.35, 51.04], + [34.05, 42.94], + [38.96, 43.22], + [45.25, 39.12], + [np.nan, np.nan], + [np.nan, np.nan], + ], + ), + ], + ) + def test_cmov_window_frame(self, f, xp): + # Gh 8238 + df = DataFrame( + np.array( + [ + [12.18, 3.64], + [10.18, 9.16], + [13.24, 14.61], + [4.51, 8.11], + [6.15, 11.44], + [9.14, 6.21], + [11.31, 10.67], + [2.94, 6.51], + [9.42, 8.39], + [12.44, 7.34], + ] + ) + ) + xp = DataFrame(np.array(xp)) + + roll = df.rolling(5, win_type="boxcar", center=True) + rs = getattr(roll, f)() + + tm.assert_frame_equal(xp, rs) + + @td.skip_if_no_scipy + def test_cmov_window_na_min_periods(self): + # min_periods + vals = Series(np.random.randn(10)) + vals[4] = np.nan + vals[8] = np.nan + + xp = vals.rolling(5, min_periods=4, center=True).mean() + rs = vals.rolling(5, win_type="boxcar", min_periods=4, center=True).mean() + tm.assert_series_equal(xp, rs) + + @td.skip_if_no_scipy + def test_cmov_window_regular(self, win_types): + # GH 8238 + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] + ) + xps = { + "hamming": [ + np.nan, + np.nan, + 8.71384, + 9.56348, + 12.38009, + 14.03687, + 13.8567, + 11.81473, + np.nan, + np.nan, + ], + "triang": [ + np.nan, + np.nan, + 9.28667, + 10.34667, + 12.00556, + 13.33889, + 13.38, + 12.33667, + np.nan, + np.nan, + ], + "barthann": [ + np.nan, + np.nan, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 14.0825, + 11.5675, + np.nan, + np.nan, + ], + "bohman": [ + np.nan, + np.nan, + 7.61599, + 9.1764, + 12.83559, + 14.17267, + 14.65923, + 11.10401, + np.nan, + np.nan, + ], + "blackmanharris": [ + np.nan, + np.nan, + 6.97691, + 9.16438, + 13.05052, + 14.02156, + 15.10512, + 10.74574, + np.nan, + np.nan, + ], + "nuttall": [ + np.nan, + np.nan, + 7.04618, + 9.16786, + 13.02671, + 14.03559, + 15.05657, + 10.78514, + np.nan, + np.nan, + ], + "blackman": [ + np.nan, + np.nan, + 7.73345, + 9.17869, + 12.79607, + 14.20036, + 14.57726, + 11.16988, + np.nan, + np.nan, + ], + "bartlett": [ + np.nan, + np.nan, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 14.0825, + 11.5675, + np.nan, + np.nan, + ], + } + + xp = Series(xps[win_types]) + rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + tm.assert_series_equal(xp, rs) + + @td.skip_if_no_scipy + def test_cmov_window_regular_linear_range(self, win_types): + # GH 8238 + vals = np.array(range(10), dtype=np.float) + xp = vals.copy() + xp[:2] = np.nan + xp[-2:] = np.nan + xp = Series(xp) + + rs = Series(vals).rolling(5, win_type=win_types, center=True).mean() + tm.assert_series_equal(xp, rs) + + @td.skip_if_no_scipy + def test_cmov_window_regular_missing_data(self, win_types): + # GH 8238 + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, 10.63, 14.48] + ) + xps = { + "bartlett": [ + np.nan, + np.nan, + 9.70333, + 10.5225, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 15.61667, + 13.655, + ], + "blackman": [ + np.nan, + np.nan, + 9.04582, + 11.41536, + 7.73345, + 9.17869, + 12.79607, + 14.20036, + 15.8706, + 13.655, + ], + "barthann": [ + np.nan, + np.nan, + 9.70333, + 10.5225, + 8.4425, + 9.1925, + 12.5575, + 14.3675, + 15.61667, + 13.655, + ], + "bohman": [ + np.nan, + np.nan, + 8.9444, + 11.56327, + 7.61599, + 9.1764, + 12.83559, + 14.17267, + 15.90976, + 13.655, + ], + "hamming": [ + np.nan, + np.nan, + 9.59321, + 10.29694, + 8.71384, + 9.56348, + 12.38009, + 14.20565, + 15.24694, + 13.69758, + ], + "nuttall": [ + np.nan, + np.nan, + 8.47693, + 12.2821, + 7.04618, + 9.16786, + 13.02671, + 14.03673, + 16.08759, + 13.65553, + ], + "triang": [ + np.nan, + np.nan, + 9.33167, + 9.76125, + 9.28667, + 10.34667, + 12.00556, + 13.82125, + 14.49429, + 13.765, + ], + "blackmanharris": [ + np.nan, + np.nan, + 8.42526, + 12.36824, + 6.97691, + 9.16438, + 13.05052, + 14.02175, + 16.1098, + 13.65509, + ], + } + + xp = Series(xps[win_types]) + rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean() + tm.assert_series_equal(xp, rs) + + @td.skip_if_no_scipy + def test_cmov_window_special(self, win_types_special): + # GH 8238 + kwds = { + "kaiser": {"beta": 1.0}, + "gaussian": {"std": 1.0}, + "general_gaussian": {"power": 2.0, "width": 2.0}, + "exponential": {"tau": 10}, + } + + vals = np.array( + [6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48] + ) + + xps = { + "gaussian": [ + np.nan, + np.nan, + 8.97297, + 9.76077, + 12.24763, + 13.89053, + 13.65671, + 12.01002, + np.nan, + np.nan, + ], + "general_gaussian": [ + np.nan, + np.nan, + 9.85011, + 10.71589, + 11.73161, + 13.08516, + 12.95111, + 12.74577, + np.nan, + np.nan, + ], + "kaiser": [ + np.nan, + np.nan, + 9.86851, + 11.02969, + 11.65161, + 12.75129, + 12.90702, + 12.83757, + np.nan, + np.nan, + ], + "exponential": [ + np.nan, + np.nan, + 9.83364, + 11.10472, + 11.64551, + 12.66138, + 12.92379, + 12.83770, + np.nan, + np.nan, + ], + } + + xp = Series(xps[win_types_special]) + rs = ( + Series(vals) + .rolling(5, win_type=win_types_special, center=True) + .mean(**kwds[win_types_special]) + ) + tm.assert_series_equal(xp, rs) + + @td.skip_if_no_scipy + def test_cmov_window_special_linear_range(self, win_types_special): + # GH 8238 + kwds = { + "kaiser": {"beta": 1.0}, + "gaussian": {"std": 1.0}, + "general_gaussian": {"power": 2.0, "width": 2.0}, + "slepian": {"width": 0.5}, + "exponential": {"tau": 10}, + } + + vals = np.array(range(10), dtype=np.float) + xp = vals.copy() + xp[:2] = np.nan + xp[-2:] = np.nan + xp = Series(xp) + + rs = ( + Series(vals) + .rolling(5, win_type=win_types_special, center=True) + .mean(**kwds[win_types_special]) + ) + tm.assert_series_equal(xp, rs) + + def test_rolling_median(self, raw): + self._check_moment_func(np.median, name="median", raw=raw) + + def test_rolling_min(self, raw): + self._check_moment_func(np.min, name="min", raw=raw) + + a = pd.Series([1, 2, 3, 4, 5]) + result = a.rolling(window=100, min_periods=1).min() + expected = pd.Series(np.ones(len(a))) + tm.assert_series_equal(result, expected) + + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).min() + + def test_rolling_max(self, raw): + self._check_moment_func(np.max, name="max", raw=raw) + + a = pd.Series([1, 2, 3, 4, 5], dtype=np.float64) + b = a.rolling(window=100, min_periods=1).max() + tm.assert_almost_equal(a, b) + + with pytest.raises(ValueError): + pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max() + + @pytest.mark.parametrize("q", [0.0, 0.1, 0.5, 0.9, 1.0]) + def test_rolling_quantile(self, q, raw): + def scoreatpercentile(a, per): + values = np.sort(a, axis=0) + + idx = int(per / 1.0 * (values.shape[0] - 1)) + + if idx == values.shape[0] - 1: + retval = values[-1] + + else: + qlow = float(idx) / float(values.shape[0] - 1) + qhig = float(idx + 1) / float(values.shape[0] - 1) + vlow = values[idx] + vhig = values[idx + 1] + retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow) + + return retval + + def quantile_func(x): + return scoreatpercentile(x, q) + + self._check_moment_func(quantile_func, name="quantile", quantile=q, raw=raw) + + def test_rolling_quantile_np_percentile(self): + # #9413: Tests that rolling window's quantile default behavior + # is analogous to Numpy's percentile + row = 10 + col = 5 + idx = pd.date_range("20100101", periods=row, freq="B") + df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx) + + df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0) + np_percentile = np.percentile(df, [25, 50, 75], axis=0) + + tm.assert_almost_equal(df_quantile.values, np.array(np_percentile)) + + @pytest.mark.parametrize("quantile", [0.0, 0.1, 0.45, 0.5, 1]) + @pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] + ) + @pytest.mark.parametrize( + "data", + [ + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], + [8.0, 1.0, 3.0, 4.0, 5.0, 2.0, 6.0, 7.0], + [0.0, np.nan, 0.2, np.nan, 0.4], + [np.nan, np.nan, np.nan, np.nan], + [np.nan, 0.1, np.nan, 0.3, 0.4, 0.5], + [0.5], + [np.nan, 0.7, 0.6], + ], + ) + def test_rolling_quantile_interpolation_options( + self, quantile, interpolation, data + ): + # Tests that rolling window's quantile behavior is analogous to + # Series' quantile for each interpolation option + s = Series(data) + + q1 = s.quantile(quantile, interpolation) + q2 = s.expanding(min_periods=1).quantile(quantile, interpolation).iloc[-1] + + if np.isnan(q1): + assert np.isnan(q2) + else: + assert q1 == q2 + + def test_invalid_quantile_value(self): + data = np.arange(5) + s = Series(data) + + msg = "Interpolation 'invalid' is not supported" + with pytest.raises(ValueError, match=msg): + s.rolling(len(data), min_periods=1).quantile(0.5, interpolation="invalid") + + def test_rolling_quantile_param(self): + ser = Series([0.0, 0.1, 0.5, 0.9, 1.0]) + + with pytest.raises(ValueError): + ser.rolling(3).quantile(-0.1) + + with pytest.raises(ValueError): + ser.rolling(3).quantile(10.0) + + with pytest.raises(TypeError): + ser.rolling(3).quantile("foo") + + def test_rolling_apply(self, raw): + # suppress warnings about empty slices, as we are deliberately testing + # with a 0-length Series + + def f(x): + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=".*(empty slice|0 for slice).*", + category=RuntimeWarning, + ) + return x[np.isfinite(x)].mean() + + self._check_moment_func(np.mean, name="apply", func=f, raw=raw) + + def test_rolling_std(self, raw): + self._check_moment_func(lambda x: np.std(x, ddof=1), name="std", raw=raw) + self._check_moment_func( + lambda x: np.std(x, ddof=0), name="std", ddof=0, raw=raw + ) + + def test_rolling_std_1obs(self): + vals = pd.Series([1.0, 2.0, 3.0, 4.0, 5.0]) + + result = vals.rolling(1, min_periods=1).std() + expected = pd.Series([np.nan] * 5) + tm.assert_series_equal(result, expected) + + result = vals.rolling(1, min_periods=1).std(ddof=0) + expected = pd.Series([0.0] * 5) + tm.assert_series_equal(result, expected) + + result = pd.Series([np.nan, np.nan, 3, 4, 5]).rolling(3, min_periods=2).std() + assert np.isnan(result[2]) + + def test_rolling_std_neg_sqrt(self): + # unit test from Bottleneck + + # Test move_nanstd for neg sqrt. + + a = pd.Series( + [ + 0.0011448196318903589, + 0.00028718669878572767, + 0.00028718669878572767, + 0.00028718669878572767, + 0.00028718669878572767, + ] + ) + b = a.rolling(window=3).std() + assert np.isfinite(b[2:]).all() + + b = a.ewm(span=3).std() + assert np.isfinite(b[2:]).all() + + def test_rolling_var(self, raw): + self._check_moment_func(lambda x: np.var(x, ddof=1), name="var", raw=raw) + self._check_moment_func( + lambda x: np.var(x, ddof=0), name="var", ddof=0, raw=raw + ) + + @td.skip_if_no_scipy + def test_rolling_skew(self, raw): + from scipy.stats import skew + + self._check_moment_func(lambda x: skew(x, bias=False), name="skew", raw=raw) + + @td.skip_if_no_scipy + def test_rolling_kurt(self, raw): + from scipy.stats import kurtosis + + self._check_moment_func(lambda x: kurtosis(x, bias=False), name="kurt", raw=raw) + + def _check_moment_func( + self, + static_comp, + name, + raw, + has_min_periods=True, + has_center=True, + has_time_rule=True, + fill_value=None, + zero_min_periods_equal=True, + **kwargs, + ): + + # inject raw + if name == "apply": + kwargs = copy.copy(kwargs) + kwargs["raw"] = raw + + def get_result(obj, window, min_periods=None, center=False): + r = obj.rolling(window=window, min_periods=min_periods, center=center) + return getattr(r, name)(**kwargs) + + series_result = get_result(self.series, window=50) + assert isinstance(series_result, Series) + tm.assert_almost_equal(series_result.iloc[-1], static_comp(self.series[-50:])) + + frame_result = get_result(self.frame, window=50) + assert isinstance(frame_result, DataFrame) + tm.assert_series_equal( + frame_result.iloc[-1, :], + self.frame.iloc[-50:, :].apply(static_comp, axis=0, raw=raw), + check_names=False, + ) + + # check time_rule works + if has_time_rule: + win = 25 + minp = 10 + series = self.series[::2].resample("B").mean() + frame = self.frame[::2].resample("B").mean() + + if has_min_periods: + series_result = get_result(series, window=win, min_periods=minp) + frame_result = get_result(frame, window=win, min_periods=minp) + else: + series_result = get_result(series, window=win, min_periods=0) + frame_result = get_result(frame, window=win, min_periods=0) + + last_date = series_result.index[-1] + prev_date = last_date - 24 * offsets.BDay() + + trunc_series = self.series[::2].truncate(prev_date, last_date) + trunc_frame = self.frame[::2].truncate(prev_date, last_date) + + tm.assert_almost_equal(series_result[-1], static_comp(trunc_series)) + + tm.assert_series_equal( + frame_result.xs(last_date), + trunc_frame.apply(static_comp, raw=raw), + check_names=False, + ) + + # excluding NaNs correctly + obj = Series(randn(50)) + obj[:10] = np.NaN + obj[-10:] = np.NaN + if has_min_periods: + result = get_result(obj, 50, min_periods=30) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # min_periods is working correctly + result = get_result(obj, 20, min_periods=15) + assert isna(result.iloc[23]) + assert not isna(result.iloc[24]) + + assert not isna(result.iloc[-6]) + assert isna(result.iloc[-5]) + + obj2 = Series(randn(20)) + result = get_result(obj2, 10, min_periods=5) + assert isna(result.iloc[3]) + assert notna(result.iloc[4]) + + if zero_min_periods_equal: + # min_periods=0 may be equivalent to min_periods=1 + result0 = get_result(obj, 20, min_periods=0) + result1 = get_result(obj, 20, min_periods=1) + tm.assert_almost_equal(result0, result1) + else: + result = get_result(obj, 50) + tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10])) + + # window larger than series length (#7297) + if has_min_periods: + for minp in (0, len(self.series) - 1, len(self.series)): + result = get_result(self.series, len(self.series) + 1, min_periods=minp) + expected = get_result(self.series, len(self.series), min_periods=minp) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + else: + result = get_result(self.series, len(self.series) + 1, min_periods=0) + expected = get_result(self.series, len(self.series), min_periods=0) + nan_mask = isna(result) + tm.assert_series_equal(nan_mask, isna(expected)) + + nan_mask = ~nan_mask + tm.assert_almost_equal(result[nan_mask], expected[nan_mask]) + + # check center=True + if has_center: + if has_min_periods: + result = get_result(obj, 20, min_periods=15, center=True) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=15 + )[9:].reset_index(drop=True) + else: + result = get_result(obj, 20, min_periods=0, center=True) + print(result) + expected = get_result( + pd.concat([obj, Series([np.NaN] * 9)]), 20, min_periods=0 + )[9:].reset_index(drop=True) + + tm.assert_series_equal(result, expected) + + # shifter index + s = ["x{x:d}".format(x=x) for x in range(12)] + + if has_min_periods: + minp = 10 + + series_xp = ( + get_result( + self.series.reindex(list(self.series.index) + s), + window=25, + min_periods=minp, + ) + .shift(-12) + .reindex(self.series.index) + ) + frame_xp = ( + get_result( + self.frame.reindex(list(self.frame.index) + s), + window=25, + min_periods=minp, + ) + .shift(-12) + .reindex(self.frame.index) + ) + + series_rs = get_result( + self.series, window=25, min_periods=minp, center=True + ) + frame_rs = get_result( + self.frame, window=25, min_periods=minp, center=True + ) + + else: + series_xp = ( + get_result( + self.series.reindex(list(self.series.index) + s), + window=25, + min_periods=0, + ) + .shift(-12) + .reindex(self.series.index) + ) + frame_xp = ( + get_result( + self.frame.reindex(list(self.frame.index) + s), + window=25, + min_periods=0, + ) + .shift(-12) + .reindex(self.frame.index) + ) + + series_rs = get_result( + self.series, window=25, min_periods=0, center=True + ) + frame_rs = get_result(self.frame, window=25, min_periods=0, center=True) + + if fill_value is not None: + series_xp = series_xp.fillna(fill_value) + frame_xp = frame_xp.fillna(fill_value) + tm.assert_series_equal(series_xp, series_rs) + tm.assert_frame_equal(frame_xp, frame_rs) + + +def _rolling_consistency_cases(): + for window in [1, 2, 3, 10, 20]: + for min_periods in {0, 1, 2, 3, 4, window}: + if min_periods and (min_periods > window): + continue + for center in [False, True]: + yield window, min_periods, center + + +class TestRollingMomentsConsistency(ConsistencyBase): + def setup_method(self, method): + self._create_data() + + @pytest.mark.slow + @pytest.mark.parametrize( + "window,min_periods,center", list(_rolling_consistency_cases()) + ) + def test_rolling_consistency(self, window, min_periods, center): + + # suppress warnings about empty slices, as we are deliberately testing + # with empty/0-length Series/DataFrames + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=".*(empty slice|0 for slice).*", + category=RuntimeWarning, + ) + + # test consistency between different rolling_* moments + self._test_moments_consistency_mock_mean( + mean=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).mean() + ), + mock_mean=lambda x: ( + x.rolling(window=window, min_periods=min_periods, center=center) + .sum() + .divide( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + ) + ), + ) + + self._test_moments_consistency_is_constant( + min_periods=min_periods, + count=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + ), + mean=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).mean() + ), + corr=lambda x, y: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).corr(y) + ), + ) + + self._test_moments_consistency_var_debiasing_factors( + var_unbiased=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).var() + ), + var_biased=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).var(ddof=0) + ), + var_debiasing_factors=lambda x: ( + x.rolling(window=window, min_periods=min_periods, center=center) + .count() + .divide( + ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + - 1.0 + ).replace(0.0, np.nan) + ) + ), + ) + + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).count() + ), + mean=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).mean() + ), + corr=lambda x, y: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).corr(y) + ), + var_unbiased=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).var() + ), + std_unbiased=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).std() + ), + cov_unbiased=lambda x, y: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).cov(y) + ), + var_biased=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).var(ddof=0) + ), + std_biased=lambda x: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).std(ddof=0) + ), + cov_biased=lambda x, y: ( + x.rolling( + window=window, min_periods=min_periods, center=center + ).cov(y, ddof=0) + ), + ) + + # test consistency between rolling_xyz() and either (a) + # rolling_apply of Series.xyz(), or (b) rolling_apply of + # np.nanxyz() + for (x, is_constant, no_nans) in self.data: + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + rolling_f = getattr( + x.rolling( + window=window, center=center, min_periods=min_periods + ), + name, + ) + + if ( + require_min_periods + and (min_periods is not None) + and (min_periods < require_min_periods) + ): + continue + + if name == "count": + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling( + window=window, min_periods=min_periods, center=center + ).apply(func=f, raw=True) + else: + if name in ["cov", "corr"]: + rolling_f_result = rolling_f(pairwise=False) + else: + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling( + window=window, min_periods=min_periods, center=center + ).apply(func=f, raw=True) + + # GH 9422 + if name in ["sum", "prod"]: + tm.assert_equal(rolling_f_result, rolling_apply_f_result) + + # binary moments + def test_rolling_cov(self): + A = self.series + B = A + randn(len(A)) + + result = A.rolling(window=50, min_periods=25).cov(B) + tm.assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) + + def test_rolling_cov_pairwise(self): + self._check_pairwise_moment("rolling", "cov", window=10, min_periods=5) + + def test_rolling_corr(self): + A = self.series + B = A + randn(len(A)) + + result = A.rolling(window=50, min_periods=25).corr(B) + tm.assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1]) + + # test for correct bias correction + a = tm.makeTimeSeries() + b = tm.makeTimeSeries() + a[:5] = np.nan + b[:10] = np.nan + + result = a.rolling(window=len(a), min_periods=1).corr(b) + tm.assert_almost_equal(result[-1], a.corr(b)) + + def test_rolling_corr_pairwise(self): + self._check_pairwise_moment("rolling", "corr", window=10, min_periods=5) + + @pytest.mark.parametrize("window", range(7)) + def test_rolling_corr_with_zero_variance(self, window): + # GH 18430 + s = pd.Series(np.zeros(20)) + other = pd.Series(np.arange(20)) + + assert s.rolling(window=window).corr(other=other).isna().all() + + def test_flex_binary_moment(self): + # GH3155 + # don't blow the stack + msg = ( + "arguments to moment function must be of type " + "np.ndarray/Series/DataFrame" + ) + with pytest.raises(TypeError, match=msg): + _flex_binary_moment(5, 6, None) + + def test_corr_sanity(self): + # GH 3155 + df = DataFrame( + np.array( + [ + [0.87024726, 0.18505595], + [0.64355431, 0.3091617], + [0.92372966, 0.50552513], + [0.00203756, 0.04520709], + [0.84780328, 0.33394331], + [0.78369152, 0.63919667], + ] + ) + ) + + res = df[0].rolling(5, center=True).corr(df[1]) + assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res) + + # and some fuzzing + for _ in range(10): + df = DataFrame(np.random.rand(30, 2)) + res = df[0].rolling(5, center=True).corr(df[1]) + try: + assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res) + except AssertionError: + print(res) + + @pytest.mark.parametrize("method", ["corr", "cov"]) + def test_flex_binary_frame(self, method): + series = self.frame[1] + + res = getattr(series.rolling(window=10), method)(self.frame) + res2 = getattr(self.frame.rolling(window=10), method)(series) + exp = self.frame.apply(lambda x: getattr(series.rolling(window=10), method)(x)) + + tm.assert_frame_equal(res, exp) + tm.assert_frame_equal(res2, exp) + + frame2 = self.frame.copy() + frame2.values[:] = np.random.randn(*frame2.shape) + + res3 = getattr(self.frame.rolling(window=10), method)(frame2) + exp = DataFrame( + { + k: getattr(self.frame[k].rolling(window=10), method)(frame2[k]) + for k in self.frame + } + ) + tm.assert_frame_equal(res3, exp) + + def test_rolling_cov_diff_length(self): + # GH 7512 + s1 = Series([1, 2, 3], index=[0, 1, 2]) + s2 = Series([1, 3], index=[0, 2]) + result = s1.rolling(window=3, min_periods=2).cov(s2) + expected = Series([None, None, 2.0]) + tm.assert_series_equal(result, expected) + + s2a = Series([1, None, 3], index=[0, 1, 2]) + result = s1.rolling(window=3, min_periods=2).cov(s2a) + tm.assert_series_equal(result, expected) + + def test_rolling_corr_diff_length(self): + # GH 7512 + s1 = Series([1, 2, 3], index=[0, 1, 2]) + s2 = Series([1, 3], index=[0, 2]) + result = s1.rolling(window=3, min_periods=2).corr(s2) + expected = Series([None, None, 1.0]) + tm.assert_series_equal(result, expected) + + s2a = Series([1, None, 3], index=[0, 1, 2]) + result = s1.rolling(window=3, min_periods=2).corr(s2a) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "f", + [ + lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).max(), + lambda x: x.rolling(window=10, min_periods=5).min(), + lambda x: x.rolling(window=10, min_periods=5).sum(), + lambda x: x.rolling(window=10, min_periods=5).mean(), + lambda x: x.rolling(window=10, min_periods=5).std(), + lambda x: x.rolling(window=10, min_periods=5).var(), + lambda x: x.rolling(window=10, min_periods=5).skew(), + lambda x: x.rolling(window=10, min_periods=5).kurt(), + lambda x: x.rolling(window=10, min_periods=5).quantile(quantile=0.5), + lambda x: x.rolling(window=10, min_periods=5).median(), + lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=False), + lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=True), + lambda x: x.rolling(win_type="boxcar", window=10, min_periods=5).mean(), + ], + ) + @td.skip_if_no_scipy + def test_rolling_functions_window_non_shrinkage(self, f): + # GH 7764 + s = Series(range(4)) + s_expected = Series(np.nan, index=s.index) + df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=["A", "B"]) + df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) + + s_result = f(s) + tm.assert_series_equal(s_result, s_expected) + + df_result = f(df) + tm.assert_frame_equal(df_result, df_expected) + + def test_rolling_functions_window_non_shrinkage_binary(self): + + # corr/cov return a MI DataFrame + df = DataFrame( + [[1, 5], [3, 2], [3, 9], [-1, 0]], + columns=Index(["A", "B"], name="foo"), + index=Index(range(4), name="bar"), + ) + df_expected = DataFrame( + columns=Index(["A", "B"], name="foo"), + index=pd.MultiIndex.from_product( + [df.index, df.columns], names=["bar", "foo"] + ), + dtype="float64", + ) + functions = [ + lambda x: (x.rolling(window=10, min_periods=5).cov(x, pairwise=True)), + lambda x: (x.rolling(window=10, min_periods=5).corr(x, pairwise=True)), + ] + for f in functions: + df_result = f(df) + tm.assert_frame_equal(df_result, df_expected) + + def test_rolling_skew_edge_cases(self): + + all_nan = Series([np.NaN] * 5) + + # yields all NaN (0 variance) + d = Series([1] * 5) + x = d.rolling(window=5).skew() + tm.assert_series_equal(all_nan, x) + + # yields all NaN (window too small) + d = Series(np.random.randn(5)) + x = d.rolling(window=2).skew() + tm.assert_series_equal(all_nan, x) + + # yields [NaN, NaN, NaN, 0.177994, 1.548824] + d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401]) + expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824]) + x = d.rolling(window=4).skew() + tm.assert_series_equal(expected, x) + + def test_rolling_kurt_edge_cases(self): + + all_nan = Series([np.NaN] * 5) + + # yields all NaN (0 variance) + d = Series([1] * 5) + x = d.rolling(window=5).kurt() + tm.assert_series_equal(all_nan, x) + + # yields all NaN (window too small) + d = Series(np.random.randn(5)) + x = d.rolling(window=3).kurt() + tm.assert_series_equal(all_nan, x) + + # yields [NaN, NaN, NaN, 1.224307, 2.671499] + d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401]) + expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499]) + x = d.rolling(window=4).kurt() + tm.assert_series_equal(expected, x) + + def test_rolling_skew_eq_value_fperr(self): + # #18804 all rolling skew for all equal values should return Nan + a = Series([1.1] * 15).rolling(window=10).skew() + assert np.isnan(a).all() + + def test_rolling_kurt_eq_value_fperr(self): + # #18804 all rolling kurt for all equal values should return Nan + a = Series([1.1] * 15).rolling(window=10).kurt() + assert np.isnan(a).all() + + def test_rolling_max_gh6297(self): + """Replicate result expected in GH #6297""" + + indices = [datetime(1975, 1, i) for i in range(1, 6)] + # So that we can have 2 datapoints on one of the days + indices.append(datetime(1975, 1, 3, 6, 0)) + series = Series(range(1, 7), index=indices) + # Use floats instead of ints as values + series = series.map(lambda x: float(x)) + # Sort chronologically + series = series.sort_index() + + expected = Series( + [1.0, 2.0, 6.0, 4.0, 5.0], + index=[datetime(1975, 1, i, 0) for i in range(1, 6)], + ) + x = series.resample("D").max().rolling(window=1).max() + tm.assert_series_equal(expected, x) + + def test_rolling_max_resample(self): + + indices = [datetime(1975, 1, i) for i in range(1, 6)] + # So that we can have 3 datapoints on last day (4, 10, and 20) + indices.append(datetime(1975, 1, 5, 1)) + indices.append(datetime(1975, 1, 5, 2)) + series = Series(list(range(0, 5)) + [10, 20], index=indices) + # Use floats instead of ints as values + series = series.map(lambda x: float(x)) + # Sort chronologically + series = series.sort_index() + + # Default how should be max + expected = Series( + [0.0, 1.0, 2.0, 3.0, 20.0], + index=[datetime(1975, 1, i, 0) for i in range(1, 6)], + ) + x = series.resample("D").max().rolling(window=1).max() + tm.assert_series_equal(expected, x) + + # Now specify median (10.0) + expected = Series( + [0.0, 1.0, 2.0, 3.0, 10.0], + index=[datetime(1975, 1, i, 0) for i in range(1, 6)], + ) + x = series.resample("D").median().rolling(window=1).max() + tm.assert_series_equal(expected, x) + + # Now specify mean (4+10+20)/3 + v = (4.0 + 10.0 + 20.0) / 3.0 + expected = Series( + [0.0, 1.0, 2.0, 3.0, v], + index=[datetime(1975, 1, i, 0) for i in range(1, 6)], + ) + x = series.resample("D").mean().rolling(window=1).max() + tm.assert_series_equal(expected, x) + + def test_rolling_min_resample(self): + + indices = [datetime(1975, 1, i) for i in range(1, 6)] + # So that we can have 3 datapoints on last day (4, 10, and 20) + indices.append(datetime(1975, 1, 5, 1)) + indices.append(datetime(1975, 1, 5, 2)) + series = Series(list(range(0, 5)) + [10, 20], index=indices) + # Use floats instead of ints as values + series = series.map(lambda x: float(x)) + # Sort chronologically + series = series.sort_index() + + # Default how should be min + expected = Series( + [0.0, 1.0, 2.0, 3.0, 4.0], + index=[datetime(1975, 1, i, 0) for i in range(1, 6)], + ) + r = series.resample("D").min().rolling(window=1) + tm.assert_series_equal(expected, r.min()) + + def test_rolling_median_resample(self): + + indices = [datetime(1975, 1, i) for i in range(1, 6)] + # So that we can have 3 datapoints on last day (4, 10, and 20) + indices.append(datetime(1975, 1, 5, 1)) + indices.append(datetime(1975, 1, 5, 2)) + series = Series(list(range(0, 5)) + [10, 20], index=indices) + # Use floats instead of ints as values + series = series.map(lambda x: float(x)) + # Sort chronologically + series = series.sort_index() + + # Default how should be median + expected = Series( + [0.0, 1.0, 2.0, 3.0, 10], + index=[datetime(1975, 1, i, 0) for i in range(1, 6)], + ) + x = series.resample("D").median().rolling(window=1).median() + tm.assert_series_equal(expected, x) + + def test_rolling_median_memory_error(self): + # GH11722 + n = 20000 + Series(np.random.randn(n)).rolling(window=2, center=False).median() + Series(np.random.randn(n)).rolling(window=2, center=False).median() + + def test_rolling_min_max_numeric_types(self): + + # GH12373 + types_test = [np.dtype("f{}".format(width)) for width in [4, 8]] + types_test.extend( + [ + np.dtype("{}{}".format(sign, width)) + for width in [1, 2, 4, 8] + for sign in "ui" + ] + ) + for data_type in types_test: + # Just testing that these don't throw exceptions and that + # the return type is float64. Other tests will cover quantitative + # correctness + result = DataFrame(np.arange(20, dtype=data_type)).rolling(window=5).max() + assert result.dtypes[0] == np.dtype("f8") + result = DataFrame(np.arange(20, dtype=data_type)).rolling(window=5).min() + assert result.dtypes[0] == np.dtype("f8") + + def test_moment_functions_zero_length(self): + # GH 8056 + s = Series(dtype=np.float64) + s_expected = s + df1 = DataFrame() + df1_expected = df1 + df2 = DataFrame(columns=["a"]) + df2["a"] = df2["a"].astype("float64") + df2_expected = df2 + + functions = [ + lambda x: x.rolling(window=10).count(), + lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).max(), + lambda x: x.rolling(window=10, min_periods=5).min(), + lambda x: x.rolling(window=10, min_periods=5).sum(), + lambda x: x.rolling(window=10, min_periods=5).mean(), + lambda x: x.rolling(window=10, min_periods=5).std(), + lambda x: x.rolling(window=10, min_periods=5).var(), + lambda x: x.rolling(window=10, min_periods=5).skew(), + lambda x: x.rolling(window=10, min_periods=5).kurt(), + lambda x: x.rolling(window=10, min_periods=5).quantile(0.5), + lambda x: x.rolling(window=10, min_periods=5).median(), + lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=False), + lambda x: x.rolling(window=10, min_periods=5).apply(sum, raw=True), + lambda x: x.rolling(win_type="boxcar", window=10, min_periods=5).mean(), + ] + for f in functions: + try: + s_result = f(s) + tm.assert_series_equal(s_result, s_expected) + + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) + + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) + except (ImportError): + + # scipy needed for rolling_window + continue + + def test_moment_functions_zero_length_pairwise(self): + + df1 = DataFrame() + df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) + df2["a"] = df2["a"].astype("float64") + + df1_expected = DataFrame( + index=pd.MultiIndex.from_product([df1.index, df1.columns]), + columns=Index([]), + ) + df2_expected = DataFrame( + index=pd.MultiIndex.from_product( + [df2.index, df2.columns], names=["bar", "foo"] + ), + columns=Index(["a"], name="foo"), + dtype="float64", + ) + + functions = [ + lambda x: (x.rolling(window=10, min_periods=5).cov(x, pairwise=True)), + lambda x: (x.rolling(window=10, min_periods=5).corr(x, pairwise=True)), + ] + + for f in functions: + df1_result = f(df1) + tm.assert_frame_equal(df1_result, df1_expected) + + df2_result = f(df2) + tm.assert_frame_equal(df2_result, df2_expected) diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py new file mode 100644 index 00000000..680237db --- /dev/null +++ b/pandas/tests/window/test_api.py @@ -0,0 +1,344 @@ +from collections import OrderedDict + +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, Index, Series, Timestamp, concat +import pandas._testing as tm +from pandas.core.base import SpecificationError +from pandas.tests.window.common import Base + + +class TestApi(Base): + def setup_method(self, method): + self._create_data() + + def test_getitem(self): + + r = self.frame.rolling(window=5) + tm.assert_index_equal(r._selected_obj.columns, self.frame.columns) + + r = self.frame.rolling(window=5)[1] + assert r._selected_obj.name == self.frame.columns[1] + + # technically this is allowed + r = self.frame.rolling(window=5)[1, 3] + tm.assert_index_equal(r._selected_obj.columns, self.frame.columns[[1, 3]]) + + r = self.frame.rolling(window=5)[[1, 3]] + tm.assert_index_equal(r._selected_obj.columns, self.frame.columns[[1, 3]]) + + def test_select_bad_cols(self): + df = DataFrame([[1, 2]], columns=["A", "B"]) + g = df.rolling(window=5) + with pytest.raises(KeyError, match="Columns not found: 'C'"): + g[["C"]] + with pytest.raises(KeyError, match="^[^A]+$"): + # A should not be referenced as a bad column... + # will have to rethink regex if you change message! + g[["A", "C"]] + + def test_attribute_access(self): + + df = DataFrame([[1, 2]], columns=["A", "B"]) + r = df.rolling(window=5) + tm.assert_series_equal(r.A.sum(), r["A"].sum()) + msg = "'Rolling' object has no attribute 'F'" + with pytest.raises(AttributeError, match=msg): + r.F + + def tests_skip_nuisance(self): + + df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) + r = df.rolling(window=3) + result = r[["A", "B"]].sum() + expected = DataFrame( + {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, + columns=list("AB"), + ) + tm.assert_frame_equal(result, expected) + + def test_skip_sum_object_raises(self): + df = DataFrame({"A": range(5), "B": range(5, 10), "C": "foo"}) + r = df.rolling(window=3) + result = r.sum() + expected = DataFrame( + {"A": [np.nan, np.nan, 3, 6, 9], "B": [np.nan, np.nan, 18, 21, 24]}, + columns=list("AB"), + ) + tm.assert_frame_equal(result, expected) + + def test_agg(self): + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + + r = df.rolling(window=3) + a_mean = r["A"].mean() + a_std = r["A"].std() + a_sum = r["A"].sum() + b_mean = r["B"].mean() + b_std = r["B"].std() + + result = r.aggregate([np.mean, np.std]) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) + tm.assert_frame_equal(result, expected) + + result = r.aggregate({"A": np.mean, "B": np.std}) + + expected = concat([a_mean, b_std], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) + + result = r.aggregate({"A": ["mean", "std"]}) + expected = concat([a_mean, a_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")]) + tm.assert_frame_equal(result, expected) + + result = r["A"].aggregate(["mean", "sum"]) + expected = concat([a_mean, a_sum], axis=1) + expected.columns = ["mean", "sum"] + tm.assert_frame_equal(result, expected) + + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + # using a dict with renaming + r.aggregate({"A": {"mean": "mean", "sum": "sum"}}) + + with pytest.raises(SpecificationError, match=msg): + r.aggregate( + { + "A": {"mean": "mean", "sum": "sum"}, + "B": {"mean2": "mean", "sum2": "sum"}, + } + ) + + result = r.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]}) + expected = concat([a_mean, a_std, b_mean, b_std], axis=1) + + exp_cols = [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")] + expected.columns = pd.MultiIndex.from_tuples(exp_cols) + tm.assert_frame_equal(result, expected, check_like=True) + + def test_agg_apply(self, raw): + + # passed lambda + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + + r = df.rolling(window=3) + a_sum = r["A"].sum() + + result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) + rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw) + expected = concat([a_sum, rcustom], axis=1) + tm.assert_frame_equal(result, expected, check_like=True) + + def test_agg_consistency(self): + + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + r = df.rolling(window=3) + + result = r.agg([np.sum, np.mean]).columns + expected = pd.MultiIndex.from_product([list("AB"), ["sum", "mean"]]) + tm.assert_index_equal(result, expected) + + result = r["A"].agg([np.sum, np.mean]).columns + expected = Index(["sum", "mean"]) + tm.assert_index_equal(result, expected) + + result = r.agg({"A": [np.sum, np.mean]}).columns + expected = pd.MultiIndex.from_tuples([("A", "sum"), ("A", "mean")]) + tm.assert_index_equal(result, expected) + + def test_agg_nested_dicts(self): + + # API change for disallowing these types of nested dicts + df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) + r = df.rolling(window=3) + + msg = "nested renamer is not supported" + with pytest.raises(SpecificationError, match=msg): + r.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}}) + + expected = concat( + [r["A"].mean(), r["A"].std(), r["B"].mean(), r["B"].std()], axis=1 + ) + expected.columns = pd.MultiIndex.from_tuples( + [("ra", "mean"), ("ra", "std"), ("rb", "mean"), ("rb", "std")] + ) + with pytest.raises(SpecificationError, match=msg): + r[["A", "B"]].agg( + {"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}} + ) + + with pytest.raises(SpecificationError, match=msg): + r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}) + + def test_count_nonnumeric_types(self): + # GH12541 + cols = [ + "int", + "float", + "string", + "datetime", + "timedelta", + "periods", + "fl_inf", + "fl_nan", + "str_nan", + "dt_nat", + "periods_nat", + ] + + df = DataFrame( + { + "int": [1, 2, 3], + "float": [4.0, 5.0, 6.0], + "string": list("abc"), + "datetime": pd.date_range("20170101", periods=3), + "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), + "periods": [ + pd.Period("2012-01"), + pd.Period("2012-02"), + pd.Period("2012-03"), + ], + "fl_inf": [1.0, 2.0, np.Inf], + "fl_nan": [1.0, 2.0, np.NaN], + "str_nan": ["aa", "bb", np.NaN], + "dt_nat": [ + Timestamp("20170101"), + Timestamp("20170203"), + Timestamp(None), + ], + "periods_nat": [ + pd.Period("2012-01"), + pd.Period("2012-02"), + pd.Period(None), + ], + }, + columns=cols, + ) + + expected = DataFrame( + { + "int": [1.0, 2.0, 2.0], + "float": [1.0, 2.0, 2.0], + "string": [1.0, 2.0, 2.0], + "datetime": [1.0, 2.0, 2.0], + "timedelta": [1.0, 2.0, 2.0], + "periods": [1.0, 2.0, 2.0], + "fl_inf": [1.0, 2.0, 2.0], + "fl_nan": [1.0, 2.0, 1.0], + "str_nan": [1.0, 2.0, 1.0], + "dt_nat": [1.0, 2.0, 1.0], + "periods_nat": [1.0, 2.0, 1.0], + }, + columns=cols, + ) + + result = df.rolling(window=2, min_periods=0).count() + tm.assert_frame_equal(result, expected) + + result = df.rolling(1, min_periods=0).count() + expected = df.notna().astype(float) + tm.assert_frame_equal(result, expected) + + @td.skip_if_no_scipy + @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") + def test_window_with_args(self): + # make sure that we are aggregating window functions correctly with arg + r = Series(np.random.randn(100)).rolling( + window=10, min_periods=1, win_type="gaussian" + ) + expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) + expected.columns = ["", ""] + result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=0.01)]) + tm.assert_frame_equal(result, expected) + + def a(x): + return x.mean(std=10) + + def b(x): + return x.mean(std=0.01) + + expected = concat([r.mean(std=10), r.mean(std=0.01)], axis=1) + expected.columns = ["a", "b"] + result = r.aggregate([a, b]) + tm.assert_frame_equal(result, expected) + + def test_preserve_metadata(self): + # GH 10565 + s = Series(np.arange(100), name="foo") + + s2 = s.rolling(30).sum() + s3 = s.rolling(20).sum() + assert s2.name == "foo" + assert s3.name == "foo" + + @pytest.mark.parametrize( + "func,window_size,expected_vals", + [ + ( + "rolling", + 2, + [ + [np.nan, np.nan, np.nan, np.nan], + [15.0, 20.0, 25.0, 20.0], + [25.0, 30.0, 35.0, 30.0], + [np.nan, np.nan, np.nan, np.nan], + [20.0, 30.0, 35.0, 30.0], + [35.0, 40.0, 60.0, 40.0], + [60.0, 80.0, 85.0, 80], + ], + ), + ( + "expanding", + None, + [ + [10.0, 10.0, 20.0, 20.0], + [15.0, 20.0, 25.0, 20.0], + [20.0, 30.0, 30.0, 20.0], + [10.0, 10.0, 30.0, 30.0], + [20.0, 30.0, 35.0, 30.0], + [26.666667, 40.0, 50.0, 30.0], + [40.0, 80.0, 60.0, 30.0], + ], + ), + ], + ) + def test_multiple_agg_funcs(self, func, window_size, expected_vals): + # GH 15072 + df = pd.DataFrame( + [ + ["A", 10, 20], + ["A", 20, 30], + ["A", 30, 40], + ["B", 10, 30], + ["B", 30, 40], + ["B", 40, 80], + ["B", 80, 90], + ], + columns=["stock", "low", "high"], + ) + + f = getattr(df.groupby("stock"), func) + if window_size: + window = f(window_size) + else: + window = f() + + index = pd.MultiIndex.from_tuples( + [("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)], + names=["stock", None], + ) + columns = pd.MultiIndex.from_tuples( + [("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")] + ) + expected = pd.DataFrame(expected_vals, index=index, columns=columns) + + result = window.agg( + OrderedDict((("low", ["mean", "max"]), ("high", ["mean", "min"]))) + ) + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py new file mode 100644 index 00000000..f56227b7 --- /dev/null +++ b/pandas/tests/window/test_apply.py @@ -0,0 +1,165 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range +import pandas._testing as tm + + +@pytest.mark.parametrize("bad_raw", [None, 1, 0]) +def test_rolling_apply_invalid_raw(bad_raw): + with pytest.raises(ValueError, match="raw parameter must be `True` or `False`"): + Series(range(3)).rolling(1).apply(len, raw=bad_raw) + + +def test_rolling_apply_out_of_bounds(engine_and_raw): + # gh-1850 + engine, raw = engine_and_raw + + vals = Series([1, 2, 3, 4]) + + result = vals.rolling(10).apply(np.sum, engine=engine, raw=raw) + assert result.isna().all() + + result = vals.rolling(10, min_periods=1).apply(np.sum, engine=engine, raw=raw) + expected = Series([1, 3, 6, 10], dtype=float) + tm.assert_almost_equal(result, expected) + + +@pytest.mark.parametrize("window", [2, "2s"]) +def test_rolling_apply_with_pandas_objects(window): + # 5071 + df = DataFrame( + {"A": np.random.randn(5), "B": np.random.randint(0, 10, size=5)}, + index=date_range("20130101", periods=5, freq="s"), + ) + + # we have an equal spaced timeseries index + # so simulate removing the first period + def f(x): + if x.index[0] == df.index[0]: + return np.nan + return x.iloc[-1] + + result = df.rolling(window).apply(f, raw=False) + expected = df.iloc[2:].reindex_like(df) + tm.assert_frame_equal(result, expected) + + with pytest.raises(AttributeError): + df.rolling(window).apply(f, raw=True) + + +def test_rolling_apply(engine_and_raw): + engine, raw = engine_and_raw + + expected = Series([], dtype="float64") + result = expected.rolling(10).apply(lambda x: x.mean(), engine=engine, raw=raw) + tm.assert_series_equal(result, expected) + + # gh-8080 + s = Series([None, None, None]) + result = s.rolling(2, min_periods=0).apply(lambda x: len(x), engine=engine, raw=raw) + expected = Series([1.0, 2.0, 2.0]) + tm.assert_series_equal(result, expected) + + result = s.rolling(2, min_periods=0).apply(len, engine=engine, raw=raw) + tm.assert_series_equal(result, expected) + + +def test_all_apply(engine_and_raw): + engine, raw = engine_and_raw + + df = ( + DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)} + ).set_index("A") + * 2 + ) + er = df.rolling(window=1) + r = df.rolling(window="1s") + + result = r.apply(lambda x: 1, engine=engine, raw=raw) + expected = er.apply(lambda x: 1, engine=engine, raw=raw) + tm.assert_frame_equal(result, expected) + + +def test_ragged_apply(engine_and_raw): + engine, raw = engine_and_raw + + df = DataFrame({"B": range(5)}) + df.index = [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] + + f = lambda x: 1 + result = df.rolling(window="1s", min_periods=1).apply(f, engine=engine, raw=raw) + expected = df.copy() + expected["B"] = 1.0 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).apply(f, engine=engine, raw=raw) + expected = df.copy() + expected["B"] = 1.0 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).apply(f, engine=engine, raw=raw) + expected = df.copy() + expected["B"] = 1.0 + tm.assert_frame_equal(result, expected) + + +def test_invalid_engine(): + with pytest.raises(ValueError, match="engine must be either 'numba' or 'cython'"): + Series(range(1)).rolling(1).apply(lambda x: x, engine="foo") + + +def test_invalid_engine_kwargs_cython(): + with pytest.raises(ValueError, match="cython engine does not accept engine_kwargs"): + Series(range(1)).rolling(1).apply( + lambda x: x, engine="cython", engine_kwargs={"nopython": False} + ) + + +def test_invalid_raw_numba(): + with pytest.raises( + ValueError, match="raw must be `True` when using the numba engine" + ): + Series(range(1)).rolling(1).apply(lambda x: x, raw=False, engine="numba") + + +@td.skip_if_no("numba") +def test_invalid_kwargs_nopython(): + with pytest.raises(ValueError, match="numba does not support kwargs with"): + Series(range(1)).rolling(1).apply( + lambda x: x, kwargs={"a": 1}, engine="numba", raw=True + ) + + +@pytest.mark.parametrize("args_kwargs", [[None, {"par": 10}], [(10,), None]]) +def test_rolling_apply_args_kwargs(args_kwargs): + # GH 33433 + def foo(x, par): + return np.sum(x + par) + + df = DataFrame({"gr": [1, 1], "a": [1, 2]}) + + idx = Index(["gr", "a"]) + expected = DataFrame([[11.0, 11.0], [11.0, 12.0]], columns=idx) + + result = df.rolling(1).apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1]) + tm.assert_frame_equal(result, expected) + + result = df.rolling(1).apply(foo, args=(10,)) + + midx = MultiIndex.from_tuples([(1, 0), (1, 1)], names=["gr", None]) + expected = Series([11.0, 12.0], index=midx, name="a") + + gb_rolling = df.groupby("gr")["a"].rolling(1) + + result = gb_rolling.apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_base_indexer.py b/pandas/tests/window/test_base_indexer.py new file mode 100644 index 00000000..606520c6 --- /dev/null +++ b/pandas/tests/window/test_base_indexer.py @@ -0,0 +1,82 @@ +import numpy as np +import pytest + +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.api.indexers import BaseIndexer +from pandas.core.window.indexers import ExpandingIndexer + + +def test_bad_get_window_bounds_signature(): + class BadIndexer(BaseIndexer): + def get_window_bounds(self): + return None + + indexer = BadIndexer() + with pytest.raises(ValueError, match="BadIndexer does not implement"): + Series(range(5)).rolling(indexer) + + +def test_expanding_indexer(): + s = Series(range(10)) + indexer = ExpandingIndexer() + result = s.rolling(indexer).mean() + expected = s.expanding().mean() + tm.assert_series_equal(result, expected) + + +def test_indexer_constructor_arg(): + # Example found in computation.rst + use_expanding = [True, False, True, False, True] + df = DataFrame({"values": range(5)}) + + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + start = np.empty(num_values, dtype=np.int64) + end = np.empty(num_values, dtype=np.int64) + for i in range(num_values): + if self.use_expanding[i]: + start[i] = 0 + end[i] = i + 1 + else: + start[i] = i + end[i] = i + self.window_size + return start, end + + indexer = CustomIndexer(window_size=1, use_expanding=use_expanding) + result = df.rolling(indexer).sum() + expected = DataFrame({"values": [0.0, 1.0, 3.0, 3.0, 10.0]}) + tm.assert_frame_equal(result, expected) + + +def test_indexer_accepts_rolling_args(): + df = DataFrame({"values": range(5)}) + + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + start = np.empty(num_values, dtype=np.int64) + end = np.empty(num_values, dtype=np.int64) + for i in range(num_values): + if center and min_periods == 1 and closed == "both" and i == 2: + start[i] = 0 + end[i] = num_values + else: + start[i] = i + end[i] = i + self.window_size + return start, end + + indexer = CustomIndexer(window_size=1) + result = df.rolling(indexer, center=True, min_periods=1, closed="both").sum() + expected = DataFrame({"values": [0.0, 1.0, 10.0, 3.0, 4.0]}) + tm.assert_frame_equal(result, expected) + + +def test_win_type_not_implemented(): + class CustomIndexer(BaseIndexer): + def get_window_bounds(self, num_values, min_periods, center, closed): + return np.array([0, 1]), np.array([1, 2]) + + df = DataFrame({"values": range(2)}) + indexer = CustomIndexer() + with pytest.raises(NotImplementedError, match="BaseIndexer subclasses not"): + df.rolling(indexer, win_type="boxcar") diff --git a/pandas/tests/window/test_dtypes.py b/pandas/tests/window/test_dtypes.py new file mode 100644 index 00000000..b1c9b66a --- /dev/null +++ b/pandas/tests/window/test_dtypes.py @@ -0,0 +1,242 @@ +from itertools import product + +import numpy as np +import pytest + +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.core.base import DataError + +# gh-12373 : rolling functions error on float32 data +# make sure rolling functions works for different dtypes +# +# NOTE that these are yielded tests and so _create_data +# is explicitly called. +# +# further note that we are only checking rolling for fully dtype +# compliance (though both expanding and ewm inherit) + + +class Dtype: + window = 2 + + funcs = { + "count": lambda v: v.count(), + "max": lambda v: v.max(), + "min": lambda v: v.min(), + "sum": lambda v: v.sum(), + "mean": lambda v: v.mean(), + "std": lambda v: v.std(), + "var": lambda v: v.var(), + "median": lambda v: v.median(), + } + + def get_expects(self): + expects = { + "sr1": { + "count": Series([1, 2, 2, 2, 2], dtype="float64"), + "max": Series([np.nan, 1, 2, 3, 4], dtype="float64"), + "min": Series([np.nan, 0, 1, 2, 3], dtype="float64"), + "sum": Series([np.nan, 1, 3, 5, 7], dtype="float64"), + "mean": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"), + "std": Series([np.nan] + [np.sqrt(0.5)] * 4, dtype="float64"), + "var": Series([np.nan, 0.5, 0.5, 0.5, 0.5], dtype="float64"), + "median": Series([np.nan, 0.5, 1.5, 2.5, 3.5], dtype="float64"), + }, + "sr2": { + "count": Series([1, 2, 2, 2, 2], dtype="float64"), + "max": Series([np.nan, 10, 8, 6, 4], dtype="float64"), + "min": Series([np.nan, 8, 6, 4, 2], dtype="float64"), + "sum": Series([np.nan, 18, 14, 10, 6], dtype="float64"), + "mean": Series([np.nan, 9, 7, 5, 3], dtype="float64"), + "std": Series([np.nan] + [np.sqrt(2)] * 4, dtype="float64"), + "var": Series([np.nan, 2, 2, 2, 2], dtype="float64"), + "median": Series([np.nan, 9, 7, 5, 3], dtype="float64"), + }, + "sr3": { + "count": Series([1, 2, 2, 1, 1], dtype="float64"), + "max": Series([np.nan, 1, 2, np.nan, np.nan], dtype="float64"), + "min": Series([np.nan, 0, 1, np.nan, np.nan], dtype="float64"), + "sum": Series([np.nan, 1, 3, np.nan, np.nan], dtype="float64"), + "mean": Series([np.nan, 0.5, 1.5, np.nan, np.nan], dtype="float64"), + "std": Series( + [np.nan] + [np.sqrt(0.5)] * 2 + [np.nan] * 2, dtype="float64" + ), + "var": Series([np.nan, 0.5, 0.5, np.nan, np.nan], dtype="float64"), + "median": Series([np.nan, 0.5, 1.5, np.nan, np.nan], dtype="float64"), + }, + "df": { + "count": DataFrame( + {0: Series([1, 2, 2, 2, 2]), 1: Series([1, 2, 2, 2, 2])}, + dtype="float64", + ), + "max": DataFrame( + {0: Series([np.nan, 2, 4, 6, 8]), 1: Series([np.nan, 3, 5, 7, 9])}, + dtype="float64", + ), + "min": DataFrame( + {0: Series([np.nan, 0, 2, 4, 6]), 1: Series([np.nan, 1, 3, 5, 7])}, + dtype="float64", + ), + "sum": DataFrame( + { + 0: Series([np.nan, 2, 6, 10, 14]), + 1: Series([np.nan, 4, 8, 12, 16]), + }, + dtype="float64", + ), + "mean": DataFrame( + {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}, + dtype="float64", + ), + "std": DataFrame( + { + 0: Series([np.nan] + [np.sqrt(2)] * 4), + 1: Series([np.nan] + [np.sqrt(2)] * 4), + }, + dtype="float64", + ), + "var": DataFrame( + {0: Series([np.nan, 2, 2, 2, 2]), 1: Series([np.nan, 2, 2, 2, 2])}, + dtype="float64", + ), + "median": DataFrame( + {0: Series([np.nan, 1, 3, 5, 7]), 1: Series([np.nan, 2, 4, 6, 8])}, + dtype="float64", + ), + }, + } + return expects + + def _create_dtype_data(self, dtype): + sr1 = Series(np.arange(5), dtype=dtype) + sr2 = Series(np.arange(10, 0, -2), dtype=dtype) + sr3 = sr1.copy() + sr3[3] = np.NaN + df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype) + + data = {"sr1": sr1, "sr2": sr2, "sr3": sr3, "df": df} + + return data + + def _create_data(self): + self.data = self._create_dtype_data(self.dtype) + self.expects = self.get_expects() + + def test_dtypes(self): + self._create_data() + for f_name, d_name in product(self.funcs.keys(), self.data.keys()): + + f = self.funcs[f_name] + d = self.data[d_name] + exp = self.expects[d_name][f_name] + self.check_dtypes(f, f_name, d, d_name, exp) + + def check_dtypes(self, f, f_name, d, d_name, exp): + roll = d.rolling(window=self.window) + result = f(roll) + tm.assert_almost_equal(result, exp) + + +class TestDtype_object(Dtype): + dtype = object + + +class Dtype_integer(Dtype): + pass + + +class TestDtype_int8(Dtype_integer): + dtype = np.int8 + + +class TestDtype_int16(Dtype_integer): + dtype = np.int16 + + +class TestDtype_int32(Dtype_integer): + dtype = np.int32 + + +class TestDtype_int64(Dtype_integer): + dtype = np.int64 + + +class Dtype_uinteger(Dtype): + pass + + +class TestDtype_uint8(Dtype_uinteger): + dtype = np.uint8 + + +class TestDtype_uint16(Dtype_uinteger): + dtype = np.uint16 + + +class TestDtype_uint32(Dtype_uinteger): + dtype = np.uint32 + + +class TestDtype_uint64(Dtype_uinteger): + dtype = np.uint64 + + +class Dtype_float(Dtype): + pass + + +class TestDtype_float16(Dtype_float): + dtype = np.float16 + + +class TestDtype_float32(Dtype_float): + dtype = np.float32 + + +class TestDtype_float64(Dtype_float): + dtype = np.float64 + + +class TestDtype_category(Dtype): + dtype = "category" + include_df = False + + def _create_dtype_data(self, dtype): + sr1 = Series(range(5), dtype=dtype) + sr2 = Series(range(10, 0, -2), dtype=dtype) + + data = {"sr1": sr1, "sr2": sr2} + + return data + + +class DatetimeLike(Dtype): + def check_dtypes(self, f, f_name, d, d_name, exp): + + roll = d.rolling(window=self.window) + if f_name == "count": + result = f(roll) + tm.assert_almost_equal(result, exp) + + else: + with pytest.raises(DataError): + f(roll) + + +class TestDtype_timedelta(DatetimeLike): + dtype = np.dtype("m8[ns]") + + +class TestDtype_datetime(DatetimeLike): + dtype = np.dtype("M8[ns]") + + +class TestDtype_datetime64UTC(DatetimeLike): + dtype = "datetime64[ns, UTC]" + + def _create_data(self): + pytest.skip( + "direct creation of extension dtype " + "datetime64[ns, UTC] is not supported ATM" + ) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py new file mode 100644 index 00000000..1683fda5 --- /dev/null +++ b/pandas/tests/window/test_ewm.py @@ -0,0 +1,70 @@ +import numpy as np +import pytest + +from pandas.errors import UnsupportedFunctionCall + +from pandas import DataFrame, Series +from pandas.core.window import EWM +from pandas.tests.window.common import Base + + +class TestEWM(Base): + def setup_method(self, method): + self._create_data() + + def test_doc_string(self): + + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.ewm(com=0.5).mean() + + @pytest.mark.parametrize("which", ["series", "frame"]) + def test_constructor(self, which): + o = getattr(self, which) + c = o.ewm + + # valid + c(com=0.5) + c(span=1.5) + c(alpha=0.5) + c(halflife=0.75) + c(com=0.5, span=None) + c(alpha=0.5, com=None) + c(halflife=0.75, alpha=None) + + # not valid: mutually exclusive + with pytest.raises(ValueError): + c(com=0.5, alpha=0.5) + with pytest.raises(ValueError): + c(span=1.5, halflife=0.75) + with pytest.raises(ValueError): + c(alpha=0.5, span=1.5) + + # not valid: com < 0 + with pytest.raises(ValueError): + c(com=-0.5) + + # not valid: span < 1 + with pytest.raises(ValueError): + c(span=0.5) + + # not valid: halflife <= 0 + with pytest.raises(ValueError): + c(halflife=0) + + # not valid: alpha <= 0 or alpha > 1 + for alpha in (-0.5, 1.5): + with pytest.raises(ValueError): + c(alpha=alpha) + + @pytest.mark.parametrize("method", ["std", "mean", "var"]) + def test_numpy_compat(self, method): + # see gh-12811 + e = EWM(Series([2, 4, 6]), alpha=0.5) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py new file mode 100644 index 00000000..6b6367fd --- /dev/null +++ b/pandas/tests/window/test_expanding.py @@ -0,0 +1,134 @@ +import numpy as np +import pytest + +from pandas.errors import UnsupportedFunctionCall + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.core.window import Expanding +from pandas.tests.window.common import Base + + +class TestExpanding(Base): + def setup_method(self, method): + self._create_data() + + def test_doc_string(self): + + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.expanding(2).sum() + + @pytest.mark.parametrize("which", ["series", "frame"]) + def test_constructor(self, which): + # GH 12669 + + o = getattr(self, which) + c = o.expanding + + # valid + c(min_periods=1) + c(min_periods=1, center=True) + c(min_periods=1, center=False) + + # not valid + for w in [2.0, "foo", np.array([2])]: + with pytest.raises(ValueError): + c(min_periods=w) + with pytest.raises(ValueError): + c(min_periods=1, center=w) + + @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) + def test_numpy_compat(self, method): + # see gh-12811 + e = Expanding(Series([2, 4, 6]), window=2) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(e, method)(dtype=np.float64) + + @pytest.mark.parametrize( + "expander", + [ + 1, + pytest.param( + "ls", + marks=pytest.mark.xfail( + reason="GH#16425 expanding with offset not supported" + ), + ), + ], + ) + def test_empty_df_expanding(self, expander): + # GH 15819 Verifies that datetime and integer expanding windows can be + # applied to empty DataFrames + + expected = DataFrame() + result = DataFrame().expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer expanding windows can be applied + # to empty DataFrames with datetime index + expected = DataFrame(index=pd.DatetimeIndex([])) + result = DataFrame(index=pd.DatetimeIndex([])).expanding(expander).sum() + tm.assert_frame_equal(result, expected) + + def test_missing_minp_zero(self): + # https://github.com/pandas-dev/pandas/pull/18921 + # minp=0 + x = pd.Series([np.nan]) + result = x.expanding(min_periods=0).sum() + expected = pd.Series([0.0]) + tm.assert_series_equal(result, expected) + + # minp=1 + result = x.expanding(min_periods=1).sum() + expected = pd.Series([np.nan]) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) + def test_iter_raises(self, klass): + # https://github.com/pandas-dev/pandas/issues/11704 + # Iteration over a Window + obj = klass([1, 2, 3, 4]) + with pytest.raises(NotImplementedError): + iter(obj.expanding(2)) + + def test_expanding_axis(self, axis_frame): + # see gh-23372. + df = DataFrame(np.ones((10, 20))) + axis = df._get_axis_number(axis_frame) + + if axis == 0: + expected = DataFrame( + {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)} + ) + else: + # axis == 1 + expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10) + + result = df.expanding(3, axis=axis_frame).sum() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_expanding_count_with_min_periods(constructor): + # GH 26996 + result = constructor(range(5)).expanding(min_periods=3).count() + expected = constructor([np.nan, np.nan, 3.0, 4.0, 5.0]) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_expanding_count_default_min_periods_with_null_values(constructor): + # GH 26996 + values = [1, 2, 3, np.nan, 4, 5, 6] + expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0] + + result = constructor(values).expanding().count() + expected = constructor(expected_counts) + tm.assert_equal(result, expected) diff --git a/pandas/tests/window/test_grouper.py b/pandas/tests/window/test_grouper.py new file mode 100644 index 00000000..5b268727 --- /dev/null +++ b/pandas/tests/window/test_grouper.py @@ -0,0 +1,210 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm +from pandas.core.groupby.groupby import get_groupby + + +class TestGrouperGrouping: + def setup_method(self, method): + self.series = Series(np.arange(10)) + self.frame = DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)}) + + def test_mutated(self): + + msg = r"groupby\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + self.frame.groupby("A", foo=1) + + g = self.frame.groupby("A") + assert not g.mutated + g = get_groupby(self.frame, by="A", mutated=True) + assert g.mutated + + def test_getitem(self): + g = self.frame.groupby("A") + g_mutated = get_groupby(self.frame, by="A", mutated=True) + + expected = g_mutated.B.apply(lambda x: x.rolling(2).mean()) + + result = g.rolling(2).mean().B + tm.assert_series_equal(result, expected) + + result = g.rolling(2).B.mean() + tm.assert_series_equal(result, expected) + + result = g.B.rolling(2).mean() + tm.assert_series_equal(result, expected) + + result = self.frame.B.groupby(self.frame.A).rolling(2).mean() + tm.assert_series_equal(result, expected) + + def test_getitem_multiple(self): + + # GH 13174 + g = self.frame.groupby("A") + r = g.rolling(2) + g_mutated = get_groupby(self.frame, by="A", mutated=True) + expected = g_mutated.B.apply(lambda x: x.rolling(2).count()) + + result = r.B.count() + tm.assert_series_equal(result, expected) + + result = r.B.count() + tm.assert_series_equal(result, expected) + + def test_rolling(self): + g = self.frame.groupby("A") + r = g.rolling(window=4) + + for f in ["sum", "mean", "min", "max", "count", "kurt", "skew"]: + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.rolling(4), f)()) + tm.assert_frame_equal(result, expected) + + for f in ["std", "var"]: + result = getattr(r, f)(ddof=1) + expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1)) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "midpoint", "nearest"] + ) + def test_rolling_quantile(self, interpolation): + g = self.frame.groupby("A") + r = g.rolling(window=4) + result = r.quantile(0.4, interpolation=interpolation) + expected = g.apply( + lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation) + ) + tm.assert_frame_equal(result, expected) + + def test_rolling_corr_cov(self): + g = self.frame.groupby("A") + r = g.rolling(window=4) + + for f in ["corr", "cov"]: + result = getattr(r, f)(self.frame) + + def func(x): + return getattr(x.rolling(4), f)(self.frame) + + expected = g.apply(func) + tm.assert_frame_equal(result, expected) + + result = getattr(r.B, f)(pairwise=True) + + def func(x): + return getattr(x.B.rolling(4), f)(pairwise=True) + + expected = g.apply(func) + tm.assert_series_equal(result, expected) + + def test_rolling_apply(self, raw): + g = self.frame.groupby("A") + r = g.rolling(window=4) + + # reduction + result = r.apply(lambda x: x.sum(), raw=raw) + expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw)) + tm.assert_frame_equal(result, expected) + + def test_rolling_apply_mutability(self): + # GH 14013 + df = pd.DataFrame({"A": ["foo"] * 3 + ["bar"] * 3, "B": [1] * 6}) + g = df.groupby("A") + + mi = pd.MultiIndex.from_tuples( + [("bar", 3), ("bar", 4), ("bar", 5), ("foo", 0), ("foo", 1), ("foo", 2)] + ) + + mi.names = ["A", None] + # Grouped column should not be a part of the output + expected = pd.DataFrame([np.nan, 2.0, 2.0] * 2, columns=["B"], index=mi) + + result = g.rolling(window=2).sum() + tm.assert_frame_equal(result, expected) + + # Call an arbitrary function on the groupby + g.sum() + + # Make sure nothing has been mutated + result = g.rolling(window=2).sum() + tm.assert_frame_equal(result, expected) + + def test_expanding(self): + g = self.frame.groupby("A") + r = g.expanding() + + for f in ["sum", "mean", "min", "max", "count", "kurt", "skew"]: + + result = getattr(r, f)() + expected = g.apply(lambda x: getattr(x.expanding(), f)()) + tm.assert_frame_equal(result, expected) + + for f in ["std", "var"]: + result = getattr(r, f)(ddof=0) + expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0)) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "interpolation", ["linear", "lower", "higher", "midpoint", "nearest"] + ) + def test_expanding_quantile(self, interpolation): + g = self.frame.groupby("A") + r = g.expanding() + result = r.quantile(0.4, interpolation=interpolation) + expected = g.apply( + lambda x: x.expanding().quantile(0.4, interpolation=interpolation) + ) + tm.assert_frame_equal(result, expected) + + def test_expanding_corr_cov(self): + g = self.frame.groupby("A") + r = g.expanding() + + for f in ["corr", "cov"]: + result = getattr(r, f)(self.frame) + + def func(x): + return getattr(x.expanding(), f)(self.frame) + + expected = g.apply(func) + tm.assert_frame_equal(result, expected) + + result = getattr(r.B, f)(pairwise=True) + + def func(x): + return getattr(x.B.expanding(), f)(pairwise=True) + + expected = g.apply(func) + tm.assert_series_equal(result, expected) + + def test_expanding_apply(self, raw): + g = self.frame.groupby("A") + r = g.expanding() + + # reduction + result = r.apply(lambda x: x.sum(), raw=raw) + expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw)) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("expected_value,raw_value", [[1.0, True], [0.0, False]]) + def test_groupby_rolling(self, expected_value, raw_value): + # GH 31754 + + def foo(x): + return int(isinstance(x, np.ndarray)) + + df = pd.DataFrame({"id": [1, 1, 1], "value": [1, 2, 3]}) + result = df.groupby("id").value.rolling(1).apply(foo, raw=raw_value) + expected = Series( + [expected_value] * 3, + index=pd.MultiIndex.from_tuples( + ((1, 0), (1, 1), (1, 2)), names=["id", None] + ), + name="value", + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py new file mode 100644 index 00000000..cc8aef17 --- /dev/null +++ b/pandas/tests/window/test_numba.py @@ -0,0 +1,74 @@ +import numpy as np +import pytest + +import pandas.util._test_decorators as td + +from pandas import Series +import pandas._testing as tm + + +@td.skip_if_no("numba", "0.46.0") +@pytest.mark.filterwarnings("ignore:\\nThe keyword argument") +# Filter warnings when parallel=True and the function can't be parallelized by Numba +class TestApply: + @pytest.mark.parametrize("jit", [True, False]) + def test_numba_vs_cython(self, jit, nogil, parallel, nopython): + def f(x, *args): + arg_sum = 0 + for arg in args: + arg_sum += arg + return np.mean(x) + arg_sum + + if jit: + import numba + + f = numba.jit(f) + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + args = (2,) + + s = Series(range(10)) + result = s.rolling(2).apply( + f, args=args, engine="numba", engine_kwargs=engine_kwargs, raw=True + ) + expected = s.rolling(2).apply(f, engine="cython", args=args, raw=True) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("jit", [True, False]) + def test_cache(self, jit, nogil, parallel, nopython): + # Test that the functions are cached correctly if we switch functions + def func_1(x): + return np.mean(x) + 4 + + def func_2(x): + return np.std(x) * 5 + + if jit: + import numba + + func_1 = numba.jit(func_1) + func_2 = numba.jit(func_2) + + engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} + + roll = Series(range(10)).rolling(2) + result = roll.apply( + func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True + ) + expected = roll.apply(func_1, engine="cython", raw=True) + tm.assert_series_equal(result, expected) + + # func_1 should be in the cache now + assert func_1 in roll._numba_func_cache + + result = roll.apply( + func_2, engine="numba", engine_kwargs=engine_kwargs, raw=True + ) + expected = roll.apply(func_2, engine="cython", raw=True) + tm.assert_series_equal(result, expected) + # This run should use the cached func_1 + result = roll.apply( + func_1, engine="numba", engine_kwargs=engine_kwargs, raw=True + ) + expected = roll.apply(func_1, engine="cython", raw=True) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py new file mode 100644 index 00000000..bb305e93 --- /dev/null +++ b/pandas/tests/window/test_pairwise.py @@ -0,0 +1,191 @@ +import warnings + +import numpy as np +import pytest + +from pandas import DataFrame, Series, date_range +import pandas._testing as tm +from pandas.core.algorithms import safe_sort + + +class TestPairwise: + + # GH 7738 + df1s = [ + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", "C"]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1.0, 0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0.0, 1]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=["C", 1]), + DataFrame([[2.0, 4.0], [1.0, 2.0], [5.0, 2.0], [8.0, 1.0]], columns=[1, 0.0]), + DataFrame([[2, 4.0], [1, 2.0], [5, 2.0], [8, 1.0]], columns=[0, 1.0]), + DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.0]], columns=[1.0, "X"]), + ] + df2 = DataFrame( + [[None, 1, 1], [None, 1, 2], [None, 3, 2], [None, 8, 1]], + columns=["Y", "Z", "X"], + ) + s = Series([1, 1, 3, 8]) + + def compare(self, result, expected): + + # since we have sorted the results + # we can only compare non-nans + result = result.dropna().values + expected = expected.dropna().values + + tm.assert_numpy_array_equal(result, expected, check_dtype=False) + + @pytest.mark.parametrize("f", [lambda x: x.cov(), lambda x: x.corr()]) + def test_no_flex(self, f): + + # DataFrame methods (which do not call _flex_binary_moment()) + + results = [f(df) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.columns) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + "f", + [ + lambda x: x.expanding().cov(pairwise=True), + lambda x: x.expanding().corr(pairwise=True), + lambda x: x.rolling(window=3).cov(pairwise=True), + lambda x: x.rolling(window=3).corr(pairwise=True), + lambda x: x.ewm(com=3).cov(pairwise=True), + lambda x: x.ewm(com=3).corr(pairwise=True), + ], + ) + def test_pairwise_with_self(self, f): + + # DataFrame with itself, pairwise=True + # note that we may construct the 1st level of the MI + # in a non-monotonic way, so compare accordingly + results = [] + for i, df in enumerate(self.df1s): + result = f(df) + tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) + tm.assert_numpy_array_equal( + safe_sort(result.index.levels[1]), safe_sort(df.columns.unique()) + ) + tm.assert_index_equal(result.columns, df.columns) + results.append(df) + + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + "f", + [ + lambda x: x.expanding().cov(pairwise=False), + lambda x: x.expanding().corr(pairwise=False), + lambda x: x.rolling(window=3).cov(pairwise=False), + lambda x: x.rolling(window=3).corr(pairwise=False), + lambda x: x.ewm(com=3).cov(pairwise=False), + lambda x: x.ewm(com=3).corr(pairwise=False), + ], + ) + def test_no_pairwise_with_self(self, f): + + # DataFrame with itself, pairwise=False + results = [f(df) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + "f", + [ + lambda x, y: x.expanding().cov(y, pairwise=True), + lambda x, y: x.expanding().corr(y, pairwise=True), + lambda x, y: x.rolling(window=3).cov(y, pairwise=True), + lambda x, y: x.rolling(window=3).corr(y, pairwise=True), + lambda x, y: x.ewm(com=3).cov(y, pairwise=True), + lambda x, y: x.ewm(com=3).corr(y, pairwise=True), + ], + ) + def test_pairwise_with_other(self, f): + + # DataFrame with another DataFrame, pairwise=True + results = [f(df, self.df2) for df in self.df1s] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index.levels[0], df.index, check_names=False) + tm.assert_numpy_array_equal( + safe_sort(result.index.levels[1]), safe_sort(self.df2.columns.unique()) + ) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + @pytest.mark.parametrize( + "f", + [ + lambda x, y: x.expanding().cov(y, pairwise=False), + lambda x, y: x.expanding().corr(y, pairwise=False), + lambda x, y: x.rolling(window=3).cov(y, pairwise=False), + lambda x, y: x.rolling(window=3).corr(y, pairwise=False), + lambda x, y: x.ewm(com=3).cov(y, pairwise=False), + lambda x, y: x.ewm(com=3).corr(y, pairwise=False), + ], + ) + def test_no_pairwise_with_other(self, f): + + # DataFrame with another DataFrame, pairwise=False + results = [ + f(df, self.df2) if df.columns.is_unique else None for df in self.df1s + ] + for (df, result) in zip(self.df1s, results): + if result is not None: + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + # we can have int and str columns + expected_index = df.index.union(self.df2.index) + expected_columns = df.columns.union(self.df2.columns) + tm.assert_index_equal(result.index, expected_index) + tm.assert_index_equal(result.columns, expected_columns) + else: + with pytest.raises(ValueError, match="'arg1' columns are not unique"): + f(df, self.df2) + with pytest.raises(ValueError, match="'arg2' columns are not unique"): + f(self.df2, df) + + @pytest.mark.parametrize( + "f", + [ + lambda x, y: x.expanding().cov(y), + lambda x, y: x.expanding().corr(y), + lambda x, y: x.rolling(window=3).cov(y), + lambda x, y: x.rolling(window=3).corr(y), + lambda x, y: x.ewm(com=3).cov(y), + lambda x, y: x.ewm(com=3).corr(y), + ], + ) + def test_pairwise_with_series(self, f): + + # DataFrame with a Series + results = [f(df, self.s) for df in self.df1s] + [ + f(self.s, df) for df in self.df1s + ] + for (df, result) in zip(self.df1s, results): + tm.assert_index_equal(result.index, df.index) + tm.assert_index_equal(result.columns, df.columns) + for i, result in enumerate(results): + if i > 0: + self.compare(result, results[0]) + + def test_corr_freq_memory_error(self): + # GH 31789 + s = Series(range(5), index=date_range("2020", periods=5)) + result = s.rolling("12H").corr(s) + expected = Series([np.nan] * 5, index=date_range("2020", periods=5)) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py new file mode 100644 index 00000000..80a732c6 --- /dev/null +++ b/pandas/tests/window/test_rolling.py @@ -0,0 +1,447 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +from pandas.errors import UnsupportedFunctionCall +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import DataFrame, Index, Series +import pandas._testing as tm +from pandas.core.window import Rolling +from pandas.tests.window.common import Base + + +class TestRolling(Base): + def setup_method(self, method): + self._create_data() + + def test_doc_string(self): + + df = DataFrame({"B": [0, 1, 2, np.nan, 4]}) + df + df.rolling(2).sum() + df.rolling(2, min_periods=1).sum() + + @pytest.mark.parametrize("which", ["series", "frame"]) + def test_constructor(self, which): + # GH 12669 + + o = getattr(self, which) + c = o.rolling + + # valid + c(window=2) + c(window=2, min_periods=1) + c(window=2, min_periods=1, center=True) + c(window=2, min_periods=1, center=False) + + # GH 13383 + with pytest.raises(ValueError): + c(0) + c(-1) + + # not valid + for w in [2.0, "foo", np.array([2])]: + with pytest.raises(ValueError): + c(window=w) + with pytest.raises(ValueError): + c(window=2, min_periods=w) + with pytest.raises(ValueError): + c(window=2, min_periods=1, center=w) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("which", ["series", "frame"]) + def test_constructor_with_win_type(self, which): + # GH 13383 + o = getattr(self, which) + c = o.rolling + with pytest.raises(ValueError): + c(-1, win_type="boxcar") + + @pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3)]) + def test_constructor_with_timedelta_window(self, window): + # GH 15440 + n = 10 + df = DataFrame( + {"value": np.arange(n)}, + index=pd.date_range("2015-12-24", periods=n, freq="D"), + ) + expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) + + result = df.rolling(window=window).sum() + expected = DataFrame( + {"value": expected_data}, + index=pd.date_range("2015-12-24", periods=n, freq="D"), + ) + tm.assert_frame_equal(result, expected) + expected = df.rolling("3D").sum() + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("window", [timedelta(days=3), pd.Timedelta(days=3), "3D"]) + def test_constructor_timedelta_window_and_minperiods(self, window, raw): + # GH 15305 + n = 10 + df = DataFrame( + {"value": np.arange(n)}, + index=pd.date_range("2017-08-08", periods=n, freq="D"), + ) + expected = DataFrame( + {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, + index=pd.date_range("2017-08-08", periods=n, freq="D"), + ) + result_roll_sum = df.rolling(window=window, min_periods=2).sum() + result_roll_generic = df.rolling(window=window, min_periods=2).apply( + sum, raw=raw + ) + tm.assert_frame_equal(result_roll_sum, expected) + tm.assert_frame_equal(result_roll_generic, expected) + + @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) + def test_numpy_compat(self, method): + # see gh-12811 + r = Rolling(Series([2, 4, 6]), window=2) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(r, method)(dtype=np.float64) + + def test_closed(self): + df = DataFrame({"A": [0, 1, 2, 3, 4]}) + # closed only allowed for datetimelike + with pytest.raises(ValueError): + df.rolling(window=3, closed="neither") + + @pytest.mark.parametrize("closed", ["neither", "left"]) + def test_closed_empty(self, closed, arithmetic_win_operators): + # GH 26005 + func_name = arithmetic_win_operators + ser = pd.Series( + data=np.arange(5), index=pd.date_range("2000", periods=5, freq="2D") + ) + roll = ser.rolling("1D", closed=closed) + + result = getattr(roll, func_name)() + expected = pd.Series([np.nan] * 5, index=ser.index) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("func", ["min", "max"]) + def test_closed_one_entry(self, func): + # GH24718 + ser = pd.Series(data=[2], index=pd.date_range("2000", periods=1)) + result = getattr(ser.rolling("10D", closed="left"), func)() + tm.assert_series_equal(result, pd.Series([np.nan], index=ser.index)) + + @pytest.mark.parametrize("func", ["min", "max"]) + def test_closed_one_entry_groupby(self, func): + # GH24718 + ser = pd.DataFrame( + data={"A": [1, 1, 2], "B": [3, 2, 1]}, + index=pd.date_range("2000", periods=3), + ) + result = getattr( + ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func + )() + exp_idx = pd.MultiIndex.from_arrays( + arrays=[[1, 1, 2], ser.index], names=("A", None) + ) + expected = pd.Series(data=[np.nan, 3, np.nan], index=exp_idx, name="B") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("input_dtype", ["int", "float"]) + @pytest.mark.parametrize( + "func,closed,expected", + [ + ("min", "right", [0.0, 0, 0, 1, 2, 3, 4, 5, 6, 7]), + ("min", "both", [0.0, 0, 0, 0, 1, 2, 3, 4, 5, 6]), + ("min", "neither", [np.nan, 0, 0, 1, 2, 3, 4, 5, 6, 7]), + ("min", "left", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, 6]), + ("max", "right", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + ("max", "both", [0.0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), + ("max", "neither", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), + ("max", "left", [np.nan, 0, 1, 2, 3, 4, 5, 6, 7, 8]), + ], + ) + def test_closed_min_max_datetime(self, input_dtype, func, closed, expected): + # see gh-21704 + ser = pd.Series( + data=np.arange(10).astype(input_dtype), + index=pd.date_range("2000", periods=10), + ) + + result = getattr(ser.rolling("3D", closed=closed), func)() + expected = pd.Series(expected, index=ser.index) + tm.assert_series_equal(result, expected) + + def test_closed_uneven(self): + # see gh-21704 + ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) + + # uneven + ser = ser.drop(index=ser.index[[1, 5]]) + result = ser.rolling("3D", closed="left").min() + expected = pd.Series([np.nan, 0, 0, 2, 3, 4, 6, 6], index=ser.index) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "func,closed,expected", + [ + ("min", "right", [np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), + ("min", "both", [np.nan, 0, 0, 0, 1, 2, 3, 4, 5, np.nan]), + ("min", "neither", [np.nan, np.nan, 0, 1, 2, 3, 4, 5, np.nan, np.nan]), + ("min", "left", [np.nan, np.nan, 0, 0, 1, 2, 3, 4, 5, np.nan]), + ("max", "right", [np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan, np.nan]), + ("max", "both", [np.nan, 1, 2, 3, 4, 5, 6, 6, 6, np.nan]), + ("max", "neither", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, np.nan, np.nan]), + ("max", "left", [np.nan, np.nan, 1, 2, 3, 4, 5, 6, 6, np.nan]), + ], + ) + def test_closed_min_max_minp(self, func, closed, expected): + # see gh-21704 + ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) + ser[ser.index[-3:]] = np.nan + result = getattr(ser.rolling("3D", min_periods=2, closed=closed), func)() + expected = pd.Series(expected, index=ser.index) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "closed,expected", + [ + ("right", [0, 0.5, 1, 2, 3, 4, 5, 6, 7, 8]), + ("both", [0, 0.5, 1, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), + ("neither", [np.nan, 0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5]), + ("left", [np.nan, 0, 0.5, 1, 2, 3, 4, 5, 6, 7]), + ], + ) + def test_closed_median_quantile(self, closed, expected): + # GH 26005 + ser = pd.Series(data=np.arange(10), index=pd.date_range("2000", periods=10)) + roll = ser.rolling("3D", closed=closed) + expected = pd.Series(expected, index=ser.index) + + result = roll.median() + tm.assert_series_equal(result, expected) + + result = roll.quantile(0.5) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("roller", ["1s", 1]) + def tests_empty_df_rolling(self, roller): + # GH 15819 Verifies that datetime and integer rolling windows can be + # applied to empty DataFrames + expected = DataFrame() + result = DataFrame().rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + # Verifies that datetime and integer rolling windows can be applied to + # empty DataFrames with datetime index + expected = DataFrame(index=pd.DatetimeIndex([])) + result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum() + tm.assert_frame_equal(result, expected) + + def test_empty_window_median_quantile(self): + # GH 26005 + expected = pd.Series([np.nan, np.nan, np.nan]) + roll = pd.Series(np.arange(3)).rolling(0) + + result = roll.median() + tm.assert_series_equal(result, expected) + + result = roll.quantile(0.1) + tm.assert_series_equal(result, expected) + + def test_missing_minp_zero(self): + # https://github.com/pandas-dev/pandas/pull/18921 + # minp=0 + x = pd.Series([np.nan]) + result = x.rolling(1, min_periods=0).sum() + expected = pd.Series([0.0]) + tm.assert_series_equal(result, expected) + + # minp=1 + result = x.rolling(1, min_periods=1).sum() + expected = pd.Series([np.nan]) + tm.assert_series_equal(result, expected) + + def test_missing_minp_zero_variable(self): + # https://github.com/pandas-dev/pandas/pull/18921 + x = pd.Series( + [np.nan] * 4, + index=pd.DatetimeIndex( + ["2017-01-01", "2017-01-04", "2017-01-06", "2017-01-07"] + ), + ) + result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum() + expected = pd.Series(0.0, index=x.index) + tm.assert_series_equal(result, expected) + + def test_multi_index_names(self): + + # GH 16789, 16825 + cols = pd.MultiIndex.from_product( + [["A", "B"], ["C", "D", "E"]], names=["1", "2"] + ) + df = DataFrame(np.ones((10, 6)), columns=cols) + result = df.rolling(3).cov() + + tm.assert_index_equal(result.columns, df.columns) + assert result.index.names == [None, "1", "2"] + + @pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) + def test_iter_raises(self, klass): + # https://github.com/pandas-dev/pandas/issues/11704 + # Iteration over a Window + obj = klass([1, 2, 3, 4]) + with pytest.raises(NotImplementedError): + iter(obj.rolling(2)) + + def test_rolling_axis_sum(self, axis_frame): + # see gh-23372. + df = DataFrame(np.ones((10, 20))) + axis = df._get_axis_number(axis_frame) + + if axis == 0: + expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) + else: + # axis == 1 + expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10) + + result = df.rolling(3, axis=axis_frame).sum() + tm.assert_frame_equal(result, expected) + + def test_rolling_axis_count(self, axis_frame): + # see gh-26055 + df = DataFrame({"x": range(3), "y": range(3)}) + + axis = df._get_axis_number(axis_frame) + + if axis in [0, "index"]: + expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) + else: + expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) + + result = df.rolling(2, axis=axis_frame, min_periods=0).count() + tm.assert_frame_equal(result, expected) + + def test_readonly_array(self): + # GH-27766 + arr = np.array([1, 3, np.nan, 3, 5]) + arr.setflags(write=False) + result = pd.Series(arr).rolling(2).mean() + expected = pd.Series([np.nan, 2, np.nan, np.nan, 4]) + tm.assert_series_equal(result, expected) + + def test_rolling_datetime(self, axis_frame, tz_naive_fixture): + # GH-28192 + tz = tz_naive_fixture + df = pd.DataFrame( + { + i: [1] * 2 + for i in pd.date_range("2019-8-01", "2019-08-03", freq="D", tz=tz) + } + ) + if axis_frame in [0, "index"]: + result = df.T.rolling("2D", axis=axis_frame).sum().T + else: + result = df.rolling("2D", axis=axis_frame).sum() + expected = pd.DataFrame( + { + **{ + i: [1.0] * 2 + for i in pd.date_range("2019-8-01", periods=1, freq="D", tz=tz) + }, + **{ + i: [2.0] * 2 + for i in pd.date_range("2019-8-02", "2019-8-03", freq="D", tz=tz) + }, + } + ) + tm.assert_frame_equal(result, expected) + + +def test_rolling_window_as_string(): + # see gh-22590 + date_today = datetime.now() + days = pd.date_range(date_today, date_today + timedelta(365), freq="D") + + npr = np.random.RandomState(seed=421) + + data = npr.randint(1, high=100, size=len(days)) + df = DataFrame({"DateCol": days, "metric": data}) + + df.set_index("DateCol", inplace=True) + result = df.rolling(window="21D", min_periods=2, closed="left")["metric"].agg("max") + + expData = ( + [np.nan] * 2 + + [88.0] * 16 + + [97.0] * 9 + + [98.0] + + [99.0] * 21 + + [95.0] * 16 + + [93.0] * 5 + + [89.0] * 5 + + [96.0] * 21 + + [94.0] * 14 + + [90.0] * 13 + + [88.0] * 2 + + [90.0] * 9 + + [96.0] * 21 + + [95.0] * 6 + + [91.0] + + [87.0] * 6 + + [92.0] * 21 + + [83.0] * 2 + + [86.0] * 10 + + [87.0] * 5 + + [98.0] * 21 + + [97.0] * 14 + + [93.0] * 7 + + [87.0] * 4 + + [86.0] * 4 + + [95.0] * 21 + + [85.0] * 14 + + [83.0] * 2 + + [76.0] * 5 + + [81.0] * 2 + + [98.0] * 21 + + [95.0] * 14 + + [91.0] * 7 + + [86.0] + + [93.0] * 3 + + [95.0] * 20 + ) + + expected = Series(expData, index=Index(days, name="DateCol"), name="metric") + tm.assert_series_equal(result, expected) + + +def test_min_periods1(): + # GH#6795 + df = pd.DataFrame([0, 1, 2, 1, 0], columns=["a"]) + result = df["a"].rolling(3, center=True, min_periods=1).max() + expected = pd.Series([1.0, 2.0, 2.0, 2.0, 1.0], name="a") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_rolling_count_with_min_periods(constructor): + # GH 26996 + result = constructor(range(5)).rolling(3, min_periods=3).count() + expected = constructor([np.nan, np.nan, 3.0, 3.0, 3.0]) + tm.assert_equal(result, expected) + + +@pytest.mark.parametrize("constructor", [Series, DataFrame]) +def test_rolling_count_default_min_periods_with_null_values(constructor): + # GH 26996 + values = [1, 2, 3, np.nan, 4, 5, 6] + expected_counts = [1.0, 2.0, 3.0, 2.0, 2.0, 2.0, 3.0] + + result = constructor(values).rolling(3).count() + expected = constructor(expected_counts) + tm.assert_equal(result, expected) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py new file mode 100644 index 00000000..0c5289cd --- /dev/null +++ b/pandas/tests/window/test_timeseries_window.py @@ -0,0 +1,745 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + to_datetime, +) +import pandas._testing as tm + +import pandas.tseries.offsets as offsets + + +class TestRollingTS: + + # rolling time-series friendly + # xref GH13327 + + def setup_method(self, method): + + self.regular = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)} + ).set_index("A") + + self.ragged = DataFrame({"B": range(5)}) + self.ragged.index = [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] + + def test_doc_string(self): + + df = DataFrame( + {"B": [0, 1, 2, np.nan, 4]}, + index=[ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ], + ) + df + df.rolling("2s").sum() + + def test_valid(self): + + df = self.regular + + # not a valid freq + with pytest.raises(ValueError): + df.rolling(window="foobar") + + # not a datetimelike index + with pytest.raises(ValueError): + df.reset_index().rolling(window="foobar") + + # non-fixed freqs + for freq in ["2MS", offsets.MonthBegin(2)]: + with pytest.raises(ValueError): + df.rolling(window=freq) + + for freq in ["1D", offsets.Day(2), "2ms"]: + df.rolling(window=freq) + + # non-integer min_periods + for minp in [1.0, "foo", np.array([1, 2, 3])]: + with pytest.raises(ValueError): + df.rolling(window="1D", min_periods=minp) + + # center is not implemented + with pytest.raises(NotImplementedError): + df.rolling(window="1D", center=True) + + def test_on(self): + + df = self.regular + + # not a valid column + with pytest.raises(ValueError): + df.rolling(window="2s", on="foobar") + + # column is valid + df = df.copy() + df["C"] = date_range("20130101", periods=len(df)) + df.rolling(window="2d", on="C").sum() + + # invalid columns + with pytest.raises(ValueError): + df.rolling(window="2d", on="B") + + # ok even though on non-selected + df.rolling(window="2d", on="C").B.sum() + + def test_monotonic_on(self): + + # on/index must be monotonic + df = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)} + ) + + assert df.A.is_monotonic + df.rolling("2s", on="A").sum() + + df = df.set_index("A") + assert df.index.is_monotonic + df.rolling("2s").sum() + + def test_non_monotonic_on(self): + # GH 19248 + df = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)} + ) + df = df.set_index("A") + non_monotonic_index = df.index.to_list() + non_monotonic_index[0] = non_monotonic_index[3] + df.index = non_monotonic_index + + assert not df.index.is_monotonic + + with pytest.raises(ValueError): + df.rolling("2s").sum() + + df = df.reset_index() + with pytest.raises(ValueError): + df.rolling("2s", on="A").sum() + + def test_frame_on(self): + + df = DataFrame( + {"B": range(5), "C": date_range("20130101 09:00:00", periods=5, freq="3s")} + ) + + df["A"] = [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] + + # we are doing simulating using 'on' + expected = df.set_index("A").rolling("2s").B.sum().reset_index(drop=True) + + result = df.rolling("2s", on="A").B.sum() + tm.assert_series_equal(result, expected) + + # test as a frame + # we should be ignoring the 'on' as an aggregation column + # note that the expected is setting, computing, and resetting + # so the columns need to be switched compared + # to the actual result where they are ordered as in the + # original + expected = ( + df.set_index("A").rolling("2s")[["B"]].sum().reset_index()[["B", "A"]] + ) + + result = df.rolling("2s", on="A")[["B"]].sum() + tm.assert_frame_equal(result, expected) + + def test_frame_on2(self): + + # using multiple aggregation columns + df = DataFrame( + { + "A": [0, 1, 2, 3, 4], + "B": [0, 1, 2, np.nan, 4], + "C": Index( + [ + Timestamp("20130101 09:00:00"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:05"), + Timestamp("20130101 09:00:06"), + ] + ), + }, + columns=["A", "C", "B"], + ) + + expected1 = DataFrame( + {"A": [0.0, 1, 3, 3, 7], "B": [0, 1, 3, np.nan, 4], "C": df["C"]}, + columns=["A", "C", "B"], + ) + + result = df.rolling("2s", on="C").sum() + expected = expected1 + tm.assert_frame_equal(result, expected) + + expected = Series([0, 1, 3, np.nan, 4], name="B") + result = df.rolling("2s", on="C").B.sum() + tm.assert_series_equal(result, expected) + + expected = expected1[["A", "B", "C"]] + result = df.rolling("2s", on="C")[["A", "B", "C"]].sum() + tm.assert_frame_equal(result, expected) + + def test_basic_regular(self): + + df = self.regular.copy() + + df.index = date_range("20130101", periods=5, freq="D") + expected = df.rolling(window=1, min_periods=1).sum() + result = df.rolling(window="1D").sum() + tm.assert_frame_equal(result, expected) + + df.index = date_range("20130101", periods=5, freq="2D") + expected = df.rolling(window=1, min_periods=1).sum() + result = df.rolling(window="2D", min_periods=1).sum() + tm.assert_frame_equal(result, expected) + + expected = df.rolling(window=1, min_periods=1).sum() + result = df.rolling(window="2D", min_periods=1).sum() + tm.assert_frame_equal(result, expected) + + expected = df.rolling(window=1).sum() + result = df.rolling(window="2D").sum() + tm.assert_frame_equal(result, expected) + + def test_min_periods(self): + + # compare for min_periods + df = self.regular + + # these slightly different + expected = df.rolling(2, min_periods=1).sum() + result = df.rolling("2s").sum() + tm.assert_frame_equal(result, expected) + + expected = df.rolling(2, min_periods=1).sum() + result = df.rolling("2s", min_periods=1).sum() + tm.assert_frame_equal(result, expected) + + def test_closed(self): + + # xref GH13965 + + df = DataFrame( + {"A": [1] * 5}, + index=[ + Timestamp("20130101 09:00:01"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:03"), + Timestamp("20130101 09:00:04"), + Timestamp("20130101 09:00:06"), + ], + ) + + # closed must be 'right', 'left', 'both', 'neither' + with pytest.raises(ValueError): + self.regular.rolling(window="2s", closed="blabla") + + expected = df.copy() + expected["A"] = [1.0, 2, 2, 2, 1] + result = df.rolling("2s", closed="right").sum() + tm.assert_frame_equal(result, expected) + + # default should be 'right' + result = df.rolling("2s").sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [1.0, 2, 3, 3, 2] + result = df.rolling("2s", closed="both").sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [np.nan, 1.0, 2, 2, 1] + result = df.rolling("2s", closed="left").sum() + tm.assert_frame_equal(result, expected) + + expected = df.copy() + expected["A"] = [np.nan, 1.0, 1, 1, np.nan] + result = df.rolling("2s", closed="neither").sum() + tm.assert_frame_equal(result, expected) + + def test_ragged_sum(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 3, 7] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=2).sum() + expected = df.copy() + expected["B"] = [np.nan, np.nan, 3, np.nan, 7] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="3s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 5, 7] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="3s").sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 5, 7] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="4s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 6, 9] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="4s", min_periods=3).sum() + expected = df.copy() + expected["B"] = [np.nan, np.nan, 3, 6, 9] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).sum() + expected = df.copy() + expected["B"] = [0.0, 1, 3, 6, 10] + tm.assert_frame_equal(result, expected) + + def test_ragged_mean(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).mean() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).mean() + expected = df.copy() + expected["B"] = [0.0, 1, 1.5, 3.0, 3.5] + tm.assert_frame_equal(result, expected) + + def test_ragged_median(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).median() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).median() + expected = df.copy() + expected["B"] = [0.0, 1, 1.5, 3.0, 3.5] + tm.assert_frame_equal(result, expected) + + def test_ragged_quantile(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).quantile(0.5) + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).quantile(0.5) + expected = df.copy() + expected["B"] = [0.0, 1, 1.5, 3.0, 3.5] + tm.assert_frame_equal(result, expected) + + def test_ragged_std(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).std(ddof=0) + expected = df.copy() + expected["B"] = [0.0] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="1s", min_periods=1).std(ddof=1) + expected = df.copy() + expected["B"] = [np.nan] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="3s", min_periods=1).std(ddof=0) + expected = df.copy() + expected["B"] = [0.0] + [0.5] * 4 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).std(ddof=1) + expected = df.copy() + expected["B"] = [np.nan, 0.707107, 1.0, 1.0, 1.290994] + tm.assert_frame_equal(result, expected) + + def test_ragged_var(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).var(ddof=0) + expected = df.copy() + expected["B"] = [0.0] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="1s", min_periods=1).var(ddof=1) + expected = df.copy() + expected["B"] = [np.nan] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="3s", min_periods=1).var(ddof=0) + expected = df.copy() + expected["B"] = [0.0] + [0.25] * 4 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).var(ddof=1) + expected = df.copy() + expected["B"] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.0] + tm.assert_frame_equal(result, expected) + + def test_ragged_skew(self): + + df = self.ragged + result = df.rolling(window="3s", min_periods=1).skew() + expected = df.copy() + expected["B"] = [np.nan] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).skew() + expected = df.copy() + expected["B"] = [np.nan] * 2 + [0.0, 0.0, 0.0] + tm.assert_frame_equal(result, expected) + + def test_ragged_kurt(self): + + df = self.ragged + result = df.rolling(window="3s", min_periods=1).kurt() + expected = df.copy() + expected["B"] = [np.nan] * 5 + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).kurt() + expected = df.copy() + expected["B"] = [np.nan] * 4 + [-1.2] + tm.assert_frame_equal(result, expected) + + def test_ragged_count(self): + + df = self.ragged + result = df.rolling(window="1s", min_periods=1).count() + expected = df.copy() + expected["B"] = [1.0, 1, 1, 1, 1] + tm.assert_frame_equal(result, expected) + + df = self.ragged + result = df.rolling(window="1s").count() + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).count() + expected = df.copy() + expected["B"] = [1.0, 1, 2, 1, 2] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=2).count() + expected = df.copy() + expected["B"] = [np.nan, np.nan, 2, np.nan, 2] + tm.assert_frame_equal(result, expected) + + def test_regular_min(self): + + df = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": [0.0, 1, 2, 3, 4]} + ).set_index("A") + result = df.rolling("1s").min() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + df = DataFrame( + {"A": date_range("20130101", periods=5, freq="s"), "B": [5, 4, 3, 4, 5]} + ).set_index("A") + + tm.assert_frame_equal(result, expected) + result = df.rolling("2s").min() + expected = df.copy() + expected["B"] = [5.0, 4, 3, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling("5s").min() + expected = df.copy() + expected["B"] = [5.0, 4, 3, 3, 3] + tm.assert_frame_equal(result, expected) + + def test_ragged_min(self): + + df = self.ragged + + result = df.rolling(window="1s", min_periods=1).min() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).min() + expected = df.copy() + expected["B"] = [0.0, 1, 1, 3, 3] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).min() + expected = df.copy() + expected["B"] = [0.0, 0, 0, 1, 1] + tm.assert_frame_equal(result, expected) + + def test_perf_min(self): + + N = 10000 + + dfp = DataFrame( + {"B": np.random.randn(N)}, index=date_range("20130101", periods=N, freq="s") + ) + expected = dfp.rolling(2, min_periods=1).min() + result = dfp.rolling("2s").min() + assert ((result - expected) < 0.01).all().bool() + + expected = dfp.rolling(200, min_periods=1).min() + result = dfp.rolling("200s").min() + assert ((result - expected) < 0.01).all().bool() + + def test_ragged_max(self): + + df = self.ragged + + result = df.rolling(window="1s", min_periods=1).max() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="2s", min_periods=1).max() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + result = df.rolling(window="5s", min_periods=1).max() + expected = df.copy() + expected["B"] = [0.0, 1, 2, 3, 4] + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "freq, op, result_data", + [ + ("ms", "min", [0.0] * 10), + ("ms", "mean", [0.0] * 9 + [2.0 / 9]), + ("ms", "max", [0.0] * 9 + [2.0]), + ("s", "min", [0.0] * 10), + ("s", "mean", [0.0] * 9 + [2.0 / 9]), + ("s", "max", [0.0] * 9 + [2.0]), + ("min", "min", [0.0] * 10), + ("min", "mean", [0.0] * 9 + [2.0 / 9]), + ("min", "max", [0.0] * 9 + [2.0]), + ("h", "min", [0.0] * 10), + ("h", "mean", [0.0] * 9 + [2.0 / 9]), + ("h", "max", [0.0] * 9 + [2.0]), + ("D", "min", [0.0] * 10), + ("D", "mean", [0.0] * 9 + [2.0 / 9]), + ("D", "max", [0.0] * 9 + [2.0]), + ], + ) + def test_freqs_ops(self, freq, op, result_data): + # GH 21096 + index = date_range(start="2018-1-1 01:00:00", freq=f"1{freq}", periods=10) + s = Series(data=0, index=index) + s.iloc[1] = np.nan + s.iloc[-1] = 2 + result = getattr(s.rolling(window=f"10{freq}"), op)() + expected = Series(data=result_data, index=index) + + tm.assert_series_equal(result, expected) + + def test_all(self): + + # simple comparison of integer vs time-based windowing + df = self.regular * 2 + er = df.rolling(window=1) + r = df.rolling(window="1s") + + for f in [ + "sum", + "mean", + "count", + "median", + "std", + "var", + "kurt", + "skew", + "min", + "max", + ]: + + result = getattr(r, f)() + expected = getattr(er, f)() + tm.assert_frame_equal(result, expected) + + result = r.quantile(0.5) + expected = er.quantile(0.5) + tm.assert_frame_equal(result, expected) + + def test_all2(self): + + # more sophisticated comparison of integer vs. + # time-based windowing + df = DataFrame( + {"B": np.arange(50)}, index=date_range("20130101", periods=50, freq="H") + ) + # in-range data + dft = df.between_time("09:00", "16:00") + + r = dft.rolling(window="5H") + + for f in [ + "sum", + "mean", + "count", + "median", + "std", + "var", + "kurt", + "skew", + "min", + "max", + ]: + + result = getattr(r, f)() + + # we need to roll the days separately + # to compare with a time-based roll + # finally groupby-apply will return a multi-index + # so we need to drop the day + def agg_by_day(x): + x = x.between_time("09:00", "16:00") + return getattr(x.rolling(5, min_periods=1), f)() + + expected = ( + df.groupby(df.index.day) + .apply(agg_by_day) + .reset_index(level=0, drop=True) + ) + + tm.assert_frame_equal(result, expected) + + def test_groupby_monotonic(self): + + # GH 15130 + # we don't need to validate monotonicity when grouping + + data = [ + ["David", "1/1/2015", 100], + ["David", "1/5/2015", 500], + ["David", "5/30/2015", 50], + ["David", "7/25/2015", 50], + ["Ryan", "1/4/2014", 100], + ["Ryan", "1/19/2015", 500], + ["Ryan", "3/31/2016", 50], + ["Joe", "7/1/2015", 100], + ["Joe", "9/9/2015", 500], + ["Joe", "10/15/2015", 50], + ] + + df = DataFrame(data=data, columns=["name", "date", "amount"]) + df["date"] = to_datetime(df["date"]) + + expected = ( + df.set_index("date") + .groupby("name") + .apply(lambda x: x.rolling("180D")["amount"].sum()) + ) + result = df.groupby("name").rolling("180D", on="date")["amount"].sum() + tm.assert_series_equal(result, expected) + + def test_non_monotonic(self): + # GH 13966 (similar to #15130, closed by #15175) + + dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s") + df = DataFrame( + { + "A": [1] * 20 + [2] * 12 + [3] * 8, + "B": np.concatenate((dates, dates)), + "C": np.arange(40), + } + ) + + result = df.groupby("A").rolling("4s", on="B").C.mean() + expected = ( + df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean()) + ) + tm.assert_series_equal(result, expected) + + df2 = df.sort_values("B") + result = df2.groupby("A").rolling("4s", on="B").C.mean() + tm.assert_series_equal(result, expected) + + def test_rolling_cov_offset(self): + # GH16058 + + idx = date_range("2017-01-01", periods=24, freq="1h") + ss = Series(np.arange(len(idx)), index=idx) + + result = ss.rolling("2h").cov() + expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx) + tm.assert_series_equal(result, expected) + + expected2 = ss.rolling(2, min_periods=1).cov() + tm.assert_series_equal(result, expected2) + + result = ss.rolling("3h").cov() + expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx) + tm.assert_series_equal(result, expected) + + expected2 = ss.rolling(3, min_periods=1).cov() + tm.assert_series_equal(result, expected2) + + def test_rolling_on_decreasing_index(self): + # GH-19248, GH-32385 + index = [ + Timestamp("20190101 09:00:30"), + Timestamp("20190101 09:00:27"), + Timestamp("20190101 09:00:20"), + Timestamp("20190101 09:00:18"), + Timestamp("20190101 09:00:10"), + ] + + df = DataFrame({"column": [3, 4, 4, 5, 6]}, index=index) + result = df.rolling("5s").min() + expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]}, index=index) + tm.assert_frame_equal(result, expected) + + def test_rolling_on_empty(self): + # GH-32385 + df = DataFrame({"column": []}, index=[]) + result = df.rolling("5s").min() + expected = DataFrame({"column": []}, index=[]) + tm.assert_frame_equal(result, expected) + + def test_rolling_on_multi_index_level(self): + # GH-15584 + df = DataFrame( + {"column": range(6)}, + index=MultiIndex.from_product( + [date_range("20190101", periods=3), range(2)], names=["date", "seq"] + ), + ) + result = df.rolling("10d", on=df.index.get_level_values("date")).sum() + expected = DataFrame( + {"column": [0.0, 1.0, 3.0, 6.0, 10.0, 15.0]}, index=df.index + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_window.py b/pandas/tests/window/test_window.py new file mode 100644 index 00000000..cc29ab4f --- /dev/null +++ b/pandas/tests/window/test_window.py @@ -0,0 +1,76 @@ +import numpy as np +import pytest + +from pandas.errors import UnsupportedFunctionCall +import pandas.util._test_decorators as td + +import pandas as pd +from pandas import Series +from pandas.core.window import Window +from pandas.tests.window.common import Base + + +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") +class TestWindow(Base): + def setup_method(self, method): + self._create_data() + + @td.skip_if_no_scipy + @pytest.mark.parametrize("which", ["series", "frame"]) + def test_constructor(self, which): + # GH 12669 + + o = getattr(self, which) + c = o.rolling + + # valid + c(win_type="boxcar", window=2, min_periods=1) + c(win_type="boxcar", window=2, min_periods=1, center=True) + c(win_type="boxcar", window=2, min_periods=1, center=False) + + # not valid + for w in [2.0, "foo", np.array([2])]: + with pytest.raises(ValueError): + c(win_type="boxcar", window=2, min_periods=w) + with pytest.raises(ValueError): + c(win_type="boxcar", window=2, min_periods=1, center=w) + + for wt in ["foobar", 1]: + with pytest.raises(ValueError): + c(win_type=wt, window=2) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("which", ["series", "frame"]) + def test_constructor_with_win_type(self, which, win_types): + # GH 12669 + o = getattr(self, which) + c = o.rolling + c(win_type=win_types, window=2) + + @pytest.mark.parametrize("method", ["sum", "mean"]) + def test_numpy_compat(self, method): + # see gh-12811 + w = Window(Series([2, 4, 6]), window=[0, 2]) + + msg = "numpy operations are not valid with window objects" + + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(1, 2, 3) + with pytest.raises(UnsupportedFunctionCall, match=msg): + getattr(w, method)(dtype=np.float64) + + @td.skip_if_no_scipy + @pytest.mark.parametrize("arg", ["median", "kurt", "skew"]) + def test_agg_function_support(self, arg): + df = pd.DataFrame({"A": np.arange(5)}) + roll = df.rolling(2, win_type="triang") + + msg = f"'{arg}' is not a valid function for 'Window' object" + with pytest.raises(AttributeError, match=msg): + roll.agg(arg) + + with pytest.raises(AttributeError, match=msg): + roll.agg([arg]) + + with pytest.raises(AttributeError, match=msg): + roll.agg({"A": arg}) diff --git a/pandas/tseries/__init__.py b/pandas/tseries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py new file mode 100644 index 00000000..2094791e --- /dev/null +++ b/pandas/tseries/api.py @@ -0,0 +1,8 @@ +""" +Timeseries API +""" + +# flake8: noqa + +from pandas.tseries.frequencies import infer_freq +import pandas.tseries.offsets as offsets diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py new file mode 100644 index 00000000..e2d007cd --- /dev/null +++ b/pandas/tseries/frequencies.py @@ -0,0 +1,538 @@ +from datetime import timedelta +import re +from typing import Dict, Optional +import warnings + +import numpy as np +from pytz import AmbiguousTimeError + +from pandas._libs.algos import unique_deltas +from pandas._libs.tslibs import Timedelta, Timestamp +from pandas._libs.tslibs.ccalendar import MONTH_ALIASES, int_to_weekday +from pandas._libs.tslibs.fields import build_field_sarray +import pandas._libs.tslibs.frequencies as libfreqs +from pandas._libs.tslibs.offsets import _offset_to_period_map +import pandas._libs.tslibs.resolution as libresolution +from pandas._libs.tslibs.resolution import Resolution +from pandas._libs.tslibs.timezones import UTC +from pandas._libs.tslibs.tzconversion import tz_convert +from pandas.util._decorators import cache_readonly + +from pandas.core.dtypes.common import ( + is_datetime64_dtype, + is_period_arraylike, + is_timedelta64_dtype, +) +from pandas.core.dtypes.generic import ABCSeries + +from pandas.core.algorithms import unique + +from pandas.tseries.offsets import ( + DateOffset, + Day, + Hour, + Micro, + Milli, + Minute, + Nano, + Second, + prefix_mapping, +) + +_ONE_MICRO = 1000 +_ONE_MILLI = _ONE_MICRO * 1000 +_ONE_SECOND = _ONE_MILLI * 1000 +_ONE_MINUTE = 60 * _ONE_SECOND +_ONE_HOUR = 60 * _ONE_MINUTE +_ONE_DAY = 24 * _ONE_HOUR + +# --------------------------------------------------------------------- +# Offset names ("time rules") and related functions + +#: cache of previously seen offsets +_offset_map: Dict[str, DateOffset] = {} + + +def get_period_alias(offset_str: str) -> Optional[str]: + """ + Alias to closest period strings BQ->Q etc. + """ + return _offset_to_period_map.get(offset_str, None) + + +_name_to_offset_map = { + "days": Day(1), + "hours": Hour(1), + "minutes": Minute(1), + "seconds": Second(1), + "milliseconds": Milli(1), + "microseconds": Micro(1), + "nanoseconds": Nano(1), +} + + +def to_offset(freq) -> Optional[DateOffset]: + """ + Return DateOffset object from string or tuple representation + or datetime.timedelta object. + + Parameters + ---------- + freq : str, tuple, datetime.timedelta, DateOffset or None + + Returns + ------- + DateOffset + None if freq is None. + + Raises + ------ + ValueError + If freq is an invalid frequency + + See Also + -------- + DateOffset + + Examples + -------- + >>> to_offset('5min') + <5 * Minutes> + + >>> to_offset('1D1H') + <25 * Hours> + + >>> to_offset(('W', 2)) + <2 * Weeks: weekday=6> + + >>> to_offset((2, 'B')) + <2 * BusinessDays> + + >>> to_offset(datetime.timedelta(days=1)) + + + >>> to_offset(Hour()) + + """ + if freq is None: + return None + + if isinstance(freq, DateOffset): + return freq + + if isinstance(freq, tuple): + name = freq[0] + stride = freq[1] + if isinstance(stride, str): + name, stride = stride, name + name, _ = libfreqs._base_and_stride(name) + delta = _get_offset(name) * stride + + elif isinstance(freq, timedelta): + delta = None + freq = Timedelta(freq) + try: + for name in freq.components._fields: + offset = _name_to_offset_map[name] + stride = getattr(freq.components, name) + if stride != 0: + offset = stride * offset + if delta is None: + delta = offset + else: + delta = delta + offset + except ValueError: + raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) + + else: + delta = None + stride_sign = None + try: + splitted = re.split(libfreqs.opattern, freq) + if splitted[-1] != "" and not splitted[-1].isspace(): + # the last element must be blank + raise ValueError("last element must be blank") + for sep, stride, name in zip( + splitted[0::4], splitted[1::4], splitted[2::4] + ): + if sep != "" and not sep.isspace(): + raise ValueError("separator must be spaces") + prefix = libfreqs._lite_rule_alias.get(name) or name + if stride_sign is None: + stride_sign = -1 if stride.startswith("-") else 1 + if not stride: + stride = 1 + if prefix in Resolution._reso_str_bump_map.keys(): + stride, name = Resolution.get_stride_from_decimal( + float(stride), prefix + ) + stride = int(stride) + offset = _get_offset(name) + offset = offset * int(np.fabs(stride) * stride_sign) + if delta is None: + delta = offset + else: + delta = delta + offset + except (ValueError, TypeError): + raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) + + if delta is None: + raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq)) + + return delta + + +def get_offset(name: str) -> DateOffset: + """ + Return DateOffset object associated with rule name. + + .. deprecated:: 1.0.0 + + Examples + -------- + get_offset('EOM') --> BMonthEnd(1) + """ + warnings.warn( + "get_offset is deprecated and will be removed in a future version, " + "use to_offset instead", + FutureWarning, + stacklevel=2, + ) + return _get_offset(name) + + +def _get_offset(name: str) -> DateOffset: + """ + Return DateOffset object associated with rule name. + + Examples + -------- + _get_offset('EOM') --> BMonthEnd(1) + """ + if name not in libfreqs._dont_uppercase: + name = name.upper() + name = libfreqs._lite_rule_alias.get(name, name) + name = libfreqs._lite_rule_alias.get(name.lower(), name) + else: + name = libfreqs._lite_rule_alias.get(name, name) + + if name not in _offset_map: + try: + split = name.split("-") + klass = prefix_mapping[split[0]] + # handles case where there's no suffix (and will TypeError if too + # many '-') + offset = klass._from_name(*split[1:]) + except (ValueError, TypeError, KeyError): + # bad prefix or suffix + raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(name)) + # cache + _offset_map[name] = offset + + return _offset_map[name] + + +# --------------------------------------------------------------------- +# Period codes + + +def infer_freq(index, warn: bool = True) -> Optional[str]: + """ + Infer the most likely frequency given the input index. If the frequency is + uncertain, a warning will be printed. + + Parameters + ---------- + index : DatetimeIndex or TimedeltaIndex + If passed a Series will use the values of the series (NOT THE INDEX). + warn : bool, default True + + Returns + ------- + str or None + None if no discernible frequency + TypeError if the index is not datetime-like + ValueError if there are less than three values. + """ + import pandas as pd + + if isinstance(index, ABCSeries): + values = index._values + if not ( + is_datetime64_dtype(values) + or is_timedelta64_dtype(values) + or values.dtype == object + ): + raise TypeError( + "cannot infer freq from a non-convertible dtype " + f"on a Series of {index.dtype}" + ) + index = values + + inferer: _FrequencyInferer + if is_period_arraylike(index): + raise TypeError( + "PeriodIndex given. Check the `freq` attribute " + "instead of using infer_freq." + ) + elif is_timedelta64_dtype(index): + # Allow TimedeltaIndex and TimedeltaArray + inferer = _TimedeltaFrequencyInferer(index, warn=warn) + return inferer.get_freq() + + if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): + if isinstance(index, (pd.Int64Index, pd.Float64Index)): + raise TypeError( + f"cannot infer freq from a non-convertible index type {type(index)}" + ) + index = index.values + + if not isinstance(index, pd.DatetimeIndex): + try: + index = pd.DatetimeIndex(index) + except AmbiguousTimeError: + index = pd.DatetimeIndex(index.asi8) + + inferer = _FrequencyInferer(index, warn=warn) + return inferer.get_freq() + + +class _FrequencyInferer: + """ + Not sure if I can avoid the state machine here + """ + + def __init__(self, index, warn: bool = True): + self.index = index + self.values = index.asi8 + + # This moves the values, which are implicitly in UTC, to the + # the timezone so they are in local time + if hasattr(index, "tz"): + if index.tz is not None: + self.values = tz_convert(self.values, UTC, index.tz) + + self.warn = warn + + if len(index) < 3: + raise ValueError("Need at least 3 dates to infer frequency") + + self.is_monotonic = ( + self.index._is_monotonic_increasing or self.index._is_monotonic_decreasing + ) + + @cache_readonly + def deltas(self): + return unique_deltas(self.values) + + @cache_readonly + def deltas_asi8(self): + return unique_deltas(self.index.asi8) + + @cache_readonly + def is_unique(self) -> bool: + return len(self.deltas) == 1 + + @cache_readonly + def is_unique_asi8(self): + return len(self.deltas_asi8) == 1 + + def get_freq(self) -> Optional[str]: + """ + Find the appropriate frequency string to describe the inferred + frequency of self.values + + Returns + ------- + str or None + """ + if not self.is_monotonic or not self.index._is_unique: + return None + + delta = self.deltas[0] + if _is_multiple(delta, _ONE_DAY): + return self._infer_daily_rule() + + # Business hourly, maybe. 17: one day / 65: one weekend + if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]): + return "BH" + # Possibly intraday frequency. Here we use the + # original .asi8 values as the modified values + # will not work around DST transitions. See #8772 + elif not self.is_unique_asi8: + return None + + delta = self.deltas_asi8[0] + if _is_multiple(delta, _ONE_HOUR): + # Hours + return _maybe_add_count("H", delta / _ONE_HOUR) + elif _is_multiple(delta, _ONE_MINUTE): + # Minutes + return _maybe_add_count("T", delta / _ONE_MINUTE) + elif _is_multiple(delta, _ONE_SECOND): + # Seconds + return _maybe_add_count("S", delta / _ONE_SECOND) + elif _is_multiple(delta, _ONE_MILLI): + # Milliseconds + return _maybe_add_count("L", delta / _ONE_MILLI) + elif _is_multiple(delta, _ONE_MICRO): + # Microseconds + return _maybe_add_count("U", delta / _ONE_MICRO) + else: + # Nanoseconds + return _maybe_add_count("N", delta) + + @cache_readonly + def day_deltas(self): + return [x / _ONE_DAY for x in self.deltas] + + @cache_readonly + def hour_deltas(self): + return [x / _ONE_HOUR for x in self.deltas] + + @cache_readonly + def fields(self): + return build_field_sarray(self.values) + + @cache_readonly + def rep_stamp(self): + return Timestamp(self.values[0]) + + def month_position_check(self): + return libresolution.month_position_check(self.fields, self.index.dayofweek) + + @cache_readonly + def mdiffs(self): + nmonths = self.fields["Y"] * 12 + self.fields["M"] + return unique_deltas(nmonths.astype("i8")) + + @cache_readonly + def ydiffs(self): + return unique_deltas(self.fields["Y"].astype("i8")) + + def _infer_daily_rule(self) -> Optional[str]: + annual_rule = self._get_annual_rule() + if annual_rule: + nyears = self.ydiffs[0] + month = MONTH_ALIASES[self.rep_stamp.month] + alias = f"{annual_rule}-{month}" + return _maybe_add_count(alias, nyears) + + quarterly_rule = self._get_quarterly_rule() + if quarterly_rule: + nquarters = self.mdiffs[0] / 3 + mod_dict = {0: 12, 2: 11, 1: 10} + month = MONTH_ALIASES[mod_dict[self.rep_stamp.month % 3]] + alias = f"{quarterly_rule}-{month}" + return _maybe_add_count(alias, nquarters) + + monthly_rule = self._get_monthly_rule() + if monthly_rule: + return _maybe_add_count(monthly_rule, self.mdiffs[0]) + + if self.is_unique: + days = self.deltas[0] / _ONE_DAY + if days % 7 == 0: + # Weekly + day = int_to_weekday[self.rep_stamp.weekday()] + return _maybe_add_count(f"W-{day}", days / 7) + else: + return _maybe_add_count("D", days) + + if self._is_business_daily(): + return "B" + + wom_rule = self._get_wom_rule() + if wom_rule: + return wom_rule + + return None + + def _get_annual_rule(self) -> Optional[str]: + if len(self.ydiffs) > 1: + return None + + if len(unique(self.fields["M"])) > 1: + return None + + pos_check = self.month_position_check() + return {"cs": "AS", "bs": "BAS", "ce": "A", "be": "BA"}.get(pos_check) + + def _get_quarterly_rule(self) -> Optional[str]: + if len(self.mdiffs) > 1: + return None + + if not self.mdiffs[0] % 3 == 0: + return None + + pos_check = self.month_position_check() + return {"cs": "QS", "bs": "BQS", "ce": "Q", "be": "BQ"}.get(pos_check) + + def _get_monthly_rule(self) -> Optional[str]: + if len(self.mdiffs) > 1: + return None + pos_check = self.month_position_check() + return {"cs": "MS", "bs": "BMS", "ce": "M", "be": "BM"}.get(pos_check) + + def _is_business_daily(self) -> bool: + # quick check: cannot be business daily + if self.day_deltas != [1, 3]: + return False + + # probably business daily, but need to confirm + first_weekday = self.index[0].weekday() + shifts = np.diff(self.index.asi8) + shifts = np.floor_divide(shifts, _ONE_DAY) + weekdays = np.mod(first_weekday + np.cumsum(shifts), 7) + return np.all( + ((weekdays == 0) & (shifts == 3)) + | ((weekdays > 0) & (weekdays <= 4) & (shifts == 1)) + ) + + def _get_wom_rule(self) -> Optional[str]: + # wdiffs = unique(np.diff(self.index.week)) + # We also need -47, -49, -48 to catch index spanning year boundary + # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all(): + # return None + + weekdays = unique(self.index.weekday) + if len(weekdays) > 1: + return None + + week_of_months = unique((self.index.day - 1) // 7) + # Only attempt to infer up to WOM-4. See #9425 + week_of_months = week_of_months[week_of_months < 4] + if len(week_of_months) == 0 or len(week_of_months) > 1: + return None + + # get which week + week = week_of_months[0] + 1 + wd = int_to_weekday[weekdays[0]] + + return f"WOM-{week}{wd}" + + +class _TimedeltaFrequencyInferer(_FrequencyInferer): + def _infer_daily_rule(self): + if self.is_unique: + days = self.deltas[0] / _ONE_DAY + if days % 7 == 0: + # Weekly + wd = int_to_weekday[self.rep_stamp.weekday()] + alias = f"W-{wd}" + return _maybe_add_count(alias, days / 7) + else: + return _maybe_add_count("D", days) + + +def _is_multiple(us, mult: int) -> bool: + return us % mult == 0 + + +def _maybe_add_count(base: str, count: float) -> str: + if count != 1: + assert count == int(count) + count = int(count) + return f"{count}{base}" + else: + return base diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py new file mode 100644 index 00000000..62d7c26b --- /dev/null +++ b/pandas/tseries/holiday.py @@ -0,0 +1,534 @@ +from datetime import datetime, timedelta +from typing import List +import warnings + +from dateutil.relativedelta import FR, MO, SA, SU, TH, TU, WE # noqa +import numpy as np + +from pandas.errors import PerformanceWarning + +from pandas import DateOffset, Series, Timestamp, date_range + +from pandas.tseries.offsets import Day, Easter + + +def next_monday(dt): + """ + If holiday falls on Saturday, use following Monday instead; + if holiday falls on Sunday, use Monday instead + """ + if dt.weekday() == 5: + return dt + timedelta(2) + elif dt.weekday() == 6: + return dt + timedelta(1) + return dt + + +def next_monday_or_tuesday(dt): + """ + For second holiday of two adjacent ones! + If holiday falls on Saturday, use following Monday instead; + if holiday falls on Sunday or Monday, use following Tuesday instead + (because Monday is already taken by adjacent holiday on the day before) + """ + dow = dt.weekday() + if dow == 5 or dow == 6: + return dt + timedelta(2) + elif dow == 0: + return dt + timedelta(1) + return dt + + +def previous_friday(dt): + """ + If holiday falls on Saturday or Sunday, use previous Friday instead. + """ + if dt.weekday() == 5: + return dt - timedelta(1) + elif dt.weekday() == 6: + return dt - timedelta(2) + return dt + + +def sunday_to_monday(dt): + """ + If holiday falls on Sunday, use day thereafter (Monday) instead. + """ + if dt.weekday() == 6: + return dt + timedelta(1) + return dt + + +def weekend_to_monday(dt): + """ + If holiday falls on Sunday or Saturday, + use day thereafter (Monday) instead. + Needed for holidays such as Christmas observation in Europe + """ + if dt.weekday() == 6: + return dt + timedelta(1) + elif dt.weekday() == 5: + return dt + timedelta(2) + return dt + + +def nearest_workday(dt): + """ + If holiday falls on Saturday, use day before (Friday) instead; + if holiday falls on Sunday, use day thereafter (Monday) instead. + """ + if dt.weekday() == 5: + return dt - timedelta(1) + elif dt.weekday() == 6: + return dt + timedelta(1) + return dt + + +def next_workday(dt): + """ + returns next weekday used for observances + """ + dt += timedelta(days=1) + while dt.weekday() > 4: + # Mon-Fri are 0-4 + dt += timedelta(days=1) + return dt + + +def previous_workday(dt): + """ + returns previous weekday used for observances + """ + dt -= timedelta(days=1) + while dt.weekday() > 4: + # Mon-Fri are 0-4 + dt -= timedelta(days=1) + return dt + + +def before_nearest_workday(dt): + """ + returns previous workday after nearest workday + """ + return previous_workday(nearest_workday(dt)) + + +def after_nearest_workday(dt): + """ + returns next workday after nearest workday + needed for Boxing day or multiple holidays in a series + """ + return next_workday(nearest_workday(dt)) + + +class Holiday: + """ + Class that defines a holiday with start/end dates and rules + for observance. + """ + + def __init__( + self, + name, + year=None, + month=None, + day=None, + offset=None, + observance=None, + start_date=None, + end_date=None, + days_of_week=None, + ): + """ + Parameters + ---------- + name : str + Name of the holiday , defaults to class name + offset : array of pandas.tseries.offsets or + class from pandas.tseries.offsets + computes offset from date + observance: function + computes when holiday is given a pandas Timestamp + days_of_week: + provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday + Monday=0,..,Sunday=6 + + Examples + -------- + >>> from pandas.tseries.holiday import Holiday, nearest_workday + >>> from dateutil.relativedelta import MO + >>> USMemorialDay = Holiday('Memorial Day', month=5, day=31, + offset=pd.DateOffset(weekday=MO(-1))) + >>> USLaborDay = Holiday('Labor Day', month=9, day=1, + offset=pd.DateOffset(weekday=MO(1))) + >>> July3rd = Holiday('July 3rd', month=7, day=3,) + >>> NewYears = Holiday('New Years Day', month=1, day=1, + observance=nearest_workday), + >>> July3rd = Holiday('July 3rd', month=7, day=3, + days_of_week=(0, 1, 2, 3)) + """ + if offset is not None and observance is not None: + raise NotImplementedError("Cannot use both offset and observance.") + + self.name = name + self.year = year + self.month = month + self.day = day + self.offset = offset + self.start_date = ( + Timestamp(start_date) if start_date is not None else start_date + ) + self.end_date = Timestamp(end_date) if end_date is not None else end_date + self.observance = observance + assert days_of_week is None or type(days_of_week) == tuple + self.days_of_week = days_of_week + + def __repr__(self) -> str: + info = "" + if self.year is not None: + info += f"year={self.year}, " + info += f"month={self.month}, day={self.day}, " + + if self.offset is not None: + info += f"offset={self.offset}" + + if self.observance is not None: + info += f"observance={self.observance}" + + repr = f"Holiday: {self.name} ({info})" + return repr + + def dates(self, start_date, end_date, return_name=False): + """ + Calculate holidays observed between start date and end date + + Parameters + ---------- + start_date : starting date, datetime-like, optional + end_date : ending date, datetime-like, optional + return_name : bool, optional, default=False + If True, return a series that has dates and holiday names. + False will only return dates. + """ + start_date = Timestamp(start_date) + end_date = Timestamp(end_date) + + filter_start_date = start_date + filter_end_date = end_date + + if self.year is not None: + dt = Timestamp(datetime(self.year, self.month, self.day)) + if return_name: + return Series(self.name, index=[dt]) + else: + return [dt] + + dates = self._reference_dates(start_date, end_date) + holiday_dates = self._apply_rule(dates) + if self.days_of_week is not None: + holiday_dates = holiday_dates[ + np.in1d(holiday_dates.dayofweek, self.days_of_week) + ] + + if self.start_date is not None: + filter_start_date = max( + self.start_date.tz_localize(filter_start_date.tz), filter_start_date + ) + if self.end_date is not None: + filter_end_date = min( + self.end_date.tz_localize(filter_end_date.tz), filter_end_date + ) + holiday_dates = holiday_dates[ + (holiday_dates >= filter_start_date) & (holiday_dates <= filter_end_date) + ] + if return_name: + return Series(self.name, index=holiday_dates) + return holiday_dates + + def _reference_dates(self, start_date, end_date): + """ + Get reference dates for the holiday. + + Return reference dates for the holiday also returning the year + prior to the start_date and year following the end_date. This ensures + that any offsets to be applied will yield the holidays within + the passed in dates. + """ + if self.start_date is not None: + start_date = self.start_date.tz_localize(start_date.tz) + + if self.end_date is not None: + end_date = self.end_date.tz_localize(start_date.tz) + + year_offset = DateOffset(years=1) + reference_start_date = Timestamp( + datetime(start_date.year - 1, self.month, self.day) + ) + + reference_end_date = Timestamp( + datetime(end_date.year + 1, self.month, self.day) + ) + # Don't process unnecessary holidays + dates = date_range( + start=reference_start_date, + end=reference_end_date, + freq=year_offset, + tz=start_date.tz, + ) + + return dates + + def _apply_rule(self, dates): + """ + Apply the given offset/observance to a DatetimeIndex of dates. + + Parameters + ---------- + dates : DatetimeIndex + Dates to apply the given offset/observance rule + + Returns + ------- + Dates with rules applied + """ + if self.observance is not None: + return dates.map(lambda d: self.observance(d)) + + if self.offset is not None: + if not isinstance(self.offset, list): + offsets = [self.offset] + else: + offsets = self.offset + for offset in offsets: + + # if we are adding a non-vectorized value + # ignore the PerformanceWarnings: + with warnings.catch_warnings(): + warnings.simplefilter("ignore", PerformanceWarning) + dates += offset + return dates + + +holiday_calendars = {} + + +def register(cls): + try: + name = cls.name + except AttributeError: + name = cls.__name__ + holiday_calendars[name] = cls + + +def get_calendar(name): + """ + Return an instance of a calendar based on its name. + + Parameters + ---------- + name : str + Calendar name to return an instance of + """ + return holiday_calendars[name]() + + +class HolidayCalendarMetaClass(type): + def __new__(cls, clsname, bases, attrs): + calendar_class = super().__new__(cls, clsname, bases, attrs) + register(calendar_class) + return calendar_class + + +class AbstractHolidayCalendar(metaclass=HolidayCalendarMetaClass): + """ + Abstract interface to create holidays following certain rules. + """ + + rules: List[Holiday] = [] + start_date = Timestamp(datetime(1970, 1, 1)) + end_date = Timestamp(datetime(2200, 12, 31)) + _cache = None + + def __init__(self, name=None, rules=None): + """ + Initializes holiday object with a given set a rules. Normally + classes just have the rules defined within them. + + Parameters + ---------- + name : str + Name of the holiday calendar, defaults to class name + rules : array of Holiday objects + A set of rules used to create the holidays. + """ + super().__init__() + if name is None: + name = type(self).__name__ + self.name = name + + if rules is not None: + self.rules = rules + + def rule_from_name(self, name): + for rule in self.rules: + if rule.name == name: + return rule + + return None + + def holidays(self, start=None, end=None, return_name=False): + """ + Returns a curve with holidays between start_date and end_date + + Parameters + ---------- + start : starting date, datetime-like, optional + end : ending date, datetime-like, optional + return_name : bool, optional + If True, return a series that has dates and holiday names. + False will only return a DatetimeIndex of dates. + + Returns + ------- + DatetimeIndex of holidays + """ + if self.rules is None: + raise Exception( + f"Holiday Calendar {self.name} does not have any rules specified" + ) + + if start is None: + start = AbstractHolidayCalendar.start_date + + if end is None: + end = AbstractHolidayCalendar.end_date + + start = Timestamp(start) + end = Timestamp(end) + + holidays = None + # If we don't have a cache or the dates are outside the prior cache, we + # get them again + if self._cache is None or start < self._cache[0] or end > self._cache[1]: + for rule in self.rules: + rule_holidays = rule.dates(start, end, return_name=True) + + if holidays is None: + holidays = rule_holidays + else: + holidays = holidays.append(rule_holidays) + + self._cache = (start, end, holidays.sort_index()) + + holidays = self._cache[2] + holidays = holidays[start:end] + + if return_name: + return holidays + else: + return holidays.index + + @staticmethod + def merge_class(base, other): + """ + Merge holiday calendars together. The base calendar + will take precedence to other. The merge will be done + based on each holiday's name. + + Parameters + ---------- + base : AbstractHolidayCalendar + instance/subclass or array of Holiday objects + other : AbstractHolidayCalendar + instance/subclass or array of Holiday objects + """ + try: + other = other.rules + except AttributeError: + pass + + if not isinstance(other, list): + other = [other] + other_holidays = {holiday.name: holiday for holiday in other} + + try: + base = base.rules + except AttributeError: + pass + + if not isinstance(base, list): + base = [base] + base_holidays = {holiday.name: holiday for holiday in base} + + other_holidays.update(base_holidays) + return list(other_holidays.values()) + + def merge(self, other, inplace=False): + """ + Merge holiday calendars together. The caller's class + rules take precedence. The merge will be done + based on each holiday's name. + + Parameters + ---------- + other : holiday calendar + inplace : bool (default=False) + If True set rule_table to holidays, else return array of Holidays + """ + holidays = self.merge_class(self, other) + if inplace: + self.rules = holidays + else: + return holidays + + +USMemorialDay = Holiday( + "Memorial Day", month=5, day=31, offset=DateOffset(weekday=MO(-1)) +) +USLaborDay = Holiday("Labor Day", month=9, day=1, offset=DateOffset(weekday=MO(1))) +USColumbusDay = Holiday( + "Columbus Day", month=10, day=1, offset=DateOffset(weekday=MO(2)) +) +USThanksgivingDay = Holiday( + "Thanksgiving", month=11, day=1, offset=DateOffset(weekday=TH(4)) +) +USMartinLutherKingJr = Holiday( + "Martin Luther King Jr. Day", + start_date=datetime(1986, 1, 1), + month=1, + day=1, + offset=DateOffset(weekday=MO(3)), +) +USPresidentsDay = Holiday( + "Presidents Day", month=2, day=1, offset=DateOffset(weekday=MO(3)) +) +GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)]) + +EasterMonday = Holiday("Easter Monday", month=1, day=1, offset=[Easter(), Day(1)]) + + +class USFederalHolidayCalendar(AbstractHolidayCalendar): + """ + US Federal Government Holiday Calendar based on rules specified by: + https://www.opm.gov/policy-data-oversight/ + snow-dismissal-procedures/federal-holidays/ + """ + + rules = [ + Holiday("New Years Day", month=1, day=1, observance=nearest_workday), + USMartinLutherKingJr, + USPresidentsDay, + USMemorialDay, + Holiday("July 4th", month=7, day=4, observance=nearest_workday), + USLaborDay, + USColumbusDay, + Holiday("Veterans Day", month=11, day=11, observance=nearest_workday), + USThanksgivingDay, + Holiday("Christmas", month=12, day=25, observance=nearest_workday), + ] + + +def HolidayCalendarFactory(name, base, other, base_class=AbstractHolidayCalendar): + rules = AbstractHolidayCalendar.merge_class(base, other) + calendar_class = type(name, (base_class,), {"rules": rules, "name": name}) + return calendar_class diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py new file mode 100644 index 00000000..8bb98a27 --- /dev/null +++ b/pandas/tseries/offsets.py @@ -0,0 +1,2838 @@ +from datetime import date, datetime, timedelta +import functools +import operator +from typing import Any, Optional +import warnings + +from dateutil.easter import easter +import numpy as np + +from pandas._libs.tslibs import ( + NaT, + OutOfBoundsDatetime, + Period, + Timedelta, + Timestamp, + ccalendar, + conversion, + delta_to_nanoseconds, + frequencies as libfrequencies, + normalize_date, + offsets as liboffsets, + timezones, +) +from pandas._libs.tslibs.offsets import ( + ApplyTypeError, + BaseOffset, + _get_calendar, + _is_normalized, + _to_dt64, + apply_index_wraps, + as_datetime, + roll_yearday, + shift_month, +) +from pandas.errors import AbstractMethodError +from pandas.util._decorators import Appender, Substitution, cache_readonly + +from pandas.core.dtypes.inference import is_list_like + +__all__ = [ + "Day", + "BusinessDay", + "BDay", + "CustomBusinessDay", + "CDay", + "CBMonthEnd", + "CBMonthBegin", + "MonthBegin", + "BMonthBegin", + "MonthEnd", + "BMonthEnd", + "SemiMonthEnd", + "SemiMonthBegin", + "BusinessHour", + "CustomBusinessHour", + "YearBegin", + "BYearBegin", + "YearEnd", + "BYearEnd", + "QuarterBegin", + "BQuarterBegin", + "QuarterEnd", + "BQuarterEnd", + "LastWeekOfMonth", + "FY5253Quarter", + "FY5253", + "Week", + "WeekOfMonth", + "Easter", + "Hour", + "Minute", + "Second", + "Milli", + "Micro", + "Nano", + "DateOffset", +] + +# convert to/from datetime/timestamp to allow invalid Timestamp ranges to +# pass thru + + +def as_timestamp(obj): + if isinstance(obj, Timestamp): + return obj + try: + return Timestamp(obj) + except (OutOfBoundsDatetime): + pass + return obj + + +def apply_wraps(func): + @functools.wraps(func) + def wrapper(self, other): + if other is NaT: + return NaT + elif isinstance(other, (timedelta, Tick, DateOffset)): + # timedelta path + return func(self, other) + elif isinstance(other, (np.datetime64, datetime, date)): + other = as_timestamp(other) + + tz = getattr(other, "tzinfo", None) + nano = getattr(other, "nanosecond", 0) + + try: + if self._adjust_dst and isinstance(other, Timestamp): + other = other.tz_localize(None) + + result = func(self, other) + + if self._adjust_dst: + result = conversion.localize_pydatetime(result, tz) + + result = Timestamp(result) + if self.normalize: + result = result.normalize() + + # nanosecond may be deleted depending on offset process + if not self.normalize and nano != 0: + if not isinstance(self, Nano) and result.nanosecond != nano: + if result.tz is not None: + # convert to UTC + value = conversion.tz_convert_single( + result.value, timezones.UTC, result.tz + ) + else: + value = result.value + result = Timestamp(value + nano) + + if tz is not None and result.tzinfo is None: + result = conversion.localize_pydatetime(result, tz) + + except OutOfBoundsDatetime: + result = func(self, as_datetime(other)) + + if self.normalize: + # normalize_date returns normal datetime + result = normalize_date(result) + + if tz is not None and result.tzinfo is None: + result = conversion.localize_pydatetime(result, tz) + + result = Timestamp(result) + + return result + + return wrapper + + +# --------------------------------------------------------------------- +# DateOffset + + +class DateOffset(BaseOffset): + """ + Standard kind of date increment used for a date range. + + Works exactly like relativedelta in terms of the keyword args you + pass in, use of the keyword n is discouraged-- you would be better + off specifying n in the keywords you use, but regardless it is + there for you. n is needed for DateOffset subclasses. + + DateOffset work as follows. Each offset specify a set of dates + that conform to the DateOffset. For example, Bday defines this + set to be the set of dates that are weekdays (M-F). To test if a + date is in the set of a DateOffset dateOffset we can use the + is_on_offset method: dateOffset.is_on_offset(date). + + If a date is not on a valid date, the rollback and rollforward + methods can be used to roll the date to the nearest valid date + before/after the date. + + DateOffsets can be created to move dates forward a given number of + valid dates. For example, Bday(2) can be added to a date to move + it two business days forward. If the date does not start on a + valid date, first it is moved to a valid date. Thus pseudo code + is: + + def __add__(date): + date = rollback(date) # does nothing if date is valid + return date + + + When a date offset is created for a negative number of periods, + the date is first rolled forward. The pseudo code is: + + def __add__(date): + date = rollforward(date) # does nothing is date is valid + return date + + + Zero presents a problem. Should it roll forward or back? We + arbitrarily have it rollforward: + + date + BDay(0) == BDay.rollforward(date) + + Since 0 is a bit weird, we suggest avoiding its use. + + Parameters + ---------- + n : int, default 1 + The number of time periods the offset represents. + normalize : bool, default False + Whether to round the result of a DateOffset addition down to the + previous midnight. + **kwds + Temporal parameter that add to or replace the offset value. + + Parameters that **add** to the offset (like Timedelta): + + - years + - months + - weeks + - days + - hours + - minutes + - seconds + - microseconds + - nanoseconds + + Parameters that **replace** the offset value: + + - year + - month + - day + - weekday + - hour + - minute + - second + - microsecond + - nanosecond. + + See Also + -------- + dateutil.relativedelta.relativedelta : The relativedelta type is designed + to be applied to an existing datetime an can replace specific components of + that datetime, or represents an interval of time. + + Examples + -------- + >>> from pandas.tseries.offsets import DateOffset + >>> ts = pd.Timestamp('2017-01-01 09:10:11') + >>> ts + DateOffset(months=3) + Timestamp('2017-04-01 09:10:11') + + >>> ts = pd.Timestamp('2017-01-01 09:10:11') + >>> ts + DateOffset(months=2) + Timestamp('2017-03-01 09:10:11') + """ + + _params = cache_readonly(BaseOffset._params.fget) + _use_relativedelta = False + _adjust_dst = False + _attributes = frozenset(["n", "normalize"] + list(liboffsets.relativedelta_kwds)) + _deprecations = frozenset(["isAnchored", "onOffset"]) + + # default for prior pickles + normalize = False + + def __init__(self, n=1, normalize=False, **kwds): + BaseOffset.__init__(self, n, normalize) + + off, use_rd = liboffsets._determine_offset(kwds) + object.__setattr__(self, "_offset", off) + object.__setattr__(self, "_use_relativedelta", use_rd) + for key in kwds: + val = kwds[key] + object.__setattr__(self, key, val) + + @apply_wraps + def apply(self, other): + if self._use_relativedelta: + other = as_datetime(other) + + if len(self.kwds) > 0: + tzinfo = getattr(other, "tzinfo", None) + if tzinfo is not None and self._use_relativedelta: + # perform calculation in UTC + other = other.replace(tzinfo=None) + + if self.n > 0: + for i in range(self.n): + other = other + self._offset + else: + for i in range(-self.n): + other = other - self._offset + + if tzinfo is not None and self._use_relativedelta: + # bring tz back from UTC calculation + other = conversion.localize_pydatetime(other, tzinfo) + + return as_timestamp(other) + else: + return other + timedelta(self.n) + + @apply_index_wraps + def apply_index(self, i): + """ + Vectorized apply of DateOffset to DatetimeIndex, + raises NotImplentedError for offsets without a + vectorized implementation. + + Parameters + ---------- + i : DatetimeIndex + + Returns + ------- + y : DatetimeIndex + """ + + if type(self) is not DateOffset: + raise NotImplementedError( + f"DateOffset subclass {type(self).__name__} " + "does not have a vectorized implementation" + ) + kwds = self.kwds + relativedelta_fast = { + "years", + "months", + "weeks", + "days", + "hours", + "minutes", + "seconds", + "microseconds", + } + # relativedelta/_offset path only valid for base DateOffset + if self._use_relativedelta and set(kwds).issubset(relativedelta_fast): + + months = (kwds.get("years", 0) * 12 + kwds.get("months", 0)) * self.n + if months: + shifted = liboffsets.shift_months(i.asi8, months) + i = type(i)(shifted, dtype=i.dtype) + + weeks = (kwds.get("weeks", 0)) * self.n + if weeks: + # integer addition on PeriodIndex is deprecated, + # so we directly use _time_shift instead + asper = i.to_period("W") + if not isinstance(asper._data, np.ndarray): + # unwrap PeriodIndex --> PeriodArray + asper = asper._data + shifted = asper._time_shift(weeks) + i = shifted.to_timestamp() + i.to_perioddelta("W") + + timedelta_kwds = { + k: v + for k, v in kwds.items() + if k in ["days", "hours", "minutes", "seconds", "microseconds"] + } + if timedelta_kwds: + delta = Timedelta(**timedelta_kwds) + i = i + (self.n * delta) + return i + elif not self._use_relativedelta and hasattr(self, "_offset"): + # timedelta + return i + (self._offset * self.n) + else: + # relativedelta with other keywords + kwd = set(kwds) - relativedelta_fast + raise NotImplementedError( + "DateOffset with relativedelta " + f"keyword(s) {kwd} not able to be " + "applied vectorized" + ) + + def is_anchored(self): + # TODO: Does this make sense for the general case? It would help + # if there were a canonical docstring for what is_anchored means. + return self.n == 1 + + def onOffset(self, dt): + warnings.warn( + "onOffset is a deprecated, use is_on_offset instead", + FutureWarning, + stacklevel=2, + ) + return self.is_on_offset(dt) + + def isAnchored(self): + warnings.warn( + "isAnchored is a deprecated, use is_anchored instead", + FutureWarning, + stacklevel=2, + ) + return self.is_anchored() + + # TODO: Combine this with BusinessMixin version by defining a whitelisted + # set of attributes on each object rather than the existing behavior of + # iterating over internal ``__dict__`` + def _repr_attrs(self): + exclude = {"n", "inc", "normalize"} + attrs = [] + for attr in sorted(self.__dict__): + if attr.startswith("_") or attr == "kwds": + continue + elif attr not in exclude: + value = getattr(self, attr) + attrs.append(f"{attr}={value}") + + out = "" + if attrs: + out += ": " + ", ".join(attrs) + return out + + @property + def name(self): + return self.rule_code + + def rollback(self, dt): + """ + Roll provided date backward to next offset only if not on offset. + + Returns + ------- + TimeStamp + Rolled timestamp if not on offset, otherwise unchanged timestamp. + """ + dt = as_timestamp(dt) + if not self.is_on_offset(dt): + dt = dt - type(self)(1, normalize=self.normalize, **self.kwds) + return dt + + def rollforward(self, dt): + """ + Roll provided date forward to next offset only if not on offset. + + Returns + ------- + TimeStamp + Rolled timestamp if not on offset, otherwise unchanged timestamp. + """ + dt = as_timestamp(dt) + if not self.is_on_offset(dt): + dt = dt + type(self)(1, normalize=self.normalize, **self.kwds) + return dt + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + # XXX, see #1395 + if type(self) == DateOffset or isinstance(self, Tick): + return True + + # Default (slow) method for determining if some date is a member of the + # date range generated by this offset. Subclasses may have this + # re-implemented in a nicer way. + a = dt + b = (dt + self) - self + return a == b + + # way to get around weirdness with rule_code + @property + def _prefix(self): + raise NotImplementedError("Prefix not defined") + + @property + def rule_code(self): + return self._prefix + + @cache_readonly + def freqstr(self): + try: + code = self.rule_code + except NotImplementedError: + return repr(self) + + if self.n != 1: + fstr = f"{self.n}{code}" + else: + fstr = code + + try: + if self._offset: + fstr += self._offset_str() + except AttributeError: + # TODO: standardize `_offset` vs `offset` naming convention + pass + + return fstr + + def _offset_str(self): + return "" + + @property + def nanos(self): + raise ValueError(f"{self} is a non-fixed frequency") + + +class SingleConstructorOffset(DateOffset): + @classmethod + def _from_name(cls, suffix=None): + # default _from_name calls cls with no args + if suffix: + raise ValueError(f"Bad freq suffix {suffix}") + return cls() + + +class _CustomMixin: + """ + Mixin for classes that define and validate calendar, holidays, + and weekdays attributes. + """ + + def __init__(self, weekmask, holidays, calendar): + calendar, holidays = _get_calendar( + weekmask=weekmask, holidays=holidays, calendar=calendar + ) + # Custom offset instances are identified by the + # following two attributes. See DateOffset._params() + # holidays, weekmask + + object.__setattr__(self, "weekmask", weekmask) + object.__setattr__(self, "holidays", holidays) + object.__setattr__(self, "calendar", calendar) + + +class BusinessMixin: + """ + Mixin to business types to provide related functions. + """ + + @property + def offset(self): + """ + Alias for self._offset. + """ + # Alias for backward compat + return self._offset + + def _repr_attrs(self): + if self.offset: + attrs = [f"offset={repr(self.offset)}"] + else: + attrs = None + out = "" + if attrs: + out += ": " + ", ".join(attrs) + return out + + +class BusinessDay(BusinessMixin, SingleConstructorOffset): + """ + DateOffset subclass representing possibly n business days. + """ + + _prefix = "B" + _adjust_dst = True + _attributes = frozenset(["n", "normalize", "offset"]) + + def __init__(self, n=1, normalize=False, offset=timedelta(0)): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "_offset", offset) + + def _offset_str(self): + def get_str(td): + off_str = "" + if td.days > 0: + off_str += str(td.days) + "D" + if td.seconds > 0: + s = td.seconds + hrs = int(s / 3600) + if hrs != 0: + off_str += str(hrs) + "H" + s -= hrs * 3600 + mts = int(s / 60) + if mts != 0: + off_str += str(mts) + "Min" + s -= mts * 60 + if s != 0: + off_str += str(s) + "s" + if td.microseconds > 0: + off_str += str(td.microseconds) + "us" + return off_str + + if isinstance(self.offset, timedelta): + zero = timedelta(0, 0, 0) + if self.offset >= zero: + off_str = "+" + get_str(self.offset) + else: + off_str = "-" + get_str(-self.offset) + return off_str + else: + return "+" + repr(self.offset) + + @apply_wraps + def apply(self, other): + if isinstance(other, datetime): + n = self.n + wday = other.weekday() + + # avoid slowness below by operating on weeks first + weeks = n // 5 + if n <= 0 and wday > 4: + # roll forward + n += 1 + + n -= 5 * weeks + + # n is always >= 0 at this point + if n == 0 and wday > 4: + # roll back + days = 4 - wday + elif wday > 4: + # roll forward + days = (7 - wday) + (n - 1) + elif wday + n <= 4: + # shift by n days without leaving the current week + days = n + else: + # shift by n days plus 2 to get past the weekend + days = n + 2 + + result = other + timedelta(days=7 * weeks + days) + if self.offset: + result = result + self.offset + return result + + elif isinstance(other, (timedelta, Tick)): + return BDay(self.n, offset=self.offset + other, normalize=self.normalize) + else: + raise ApplyTypeError( + "Only know how to combine business day with datetime or timedelta." + ) + + @apply_index_wraps + def apply_index(self, i): + time = i.to_perioddelta("D") + # to_period rolls forward to next BDay; track and + # reduce n where it does when rolling forward + asper = i.to_period("B") + if not isinstance(asper._data, np.ndarray): + # unwrap PeriodIndex --> PeriodArray + asper = asper._data + + if self.n > 0: + shifted = (i.to_perioddelta("B") - time).asi8 != 0 + + # Integer-array addition is deprecated, so we use + # _time_shift directly + roll = np.where(shifted, self.n - 1, self.n) + shifted = asper._addsub_int_array(roll, operator.add) + else: + # Integer addition is deprecated, so we use _time_shift directly + roll = self.n + shifted = asper._time_shift(roll) + + result = shifted.to_timestamp() + time + return result + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + return dt.weekday() < 5 + + +class BusinessHourMixin(BusinessMixin): + def __init__(self, start="09:00", end="17:00", offset=timedelta(0)): + # must be validated here to equality check + if not is_list_like(start): + start = [start] + if not len(start): + raise ValueError("Must include at least 1 start time") + + if not is_list_like(end): + end = [end] + if not len(end): + raise ValueError("Must include at least 1 end time") + + start = np.array([liboffsets._validate_business_time(x) for x in start]) + end = np.array([liboffsets._validate_business_time(x) for x in end]) + + # Validation of input + if len(start) != len(end): + raise ValueError("number of starting time and ending time must be the same") + num_openings = len(start) + + # sort starting and ending time by starting time + index = np.argsort(start) + + # convert to tuple so that start and end are hashable + start = tuple(start[index]) + end = tuple(end[index]) + + total_secs = 0 + for i in range(num_openings): + total_secs += self._get_business_hours_by_sec(start[i], end[i]) + total_secs += self._get_business_hours_by_sec( + end[i], start[(i + 1) % num_openings] + ) + if total_secs != 24 * 60 * 60: + raise ValueError( + "invalid starting and ending time(s): " + "opening hours should not touch or overlap with " + "one another" + ) + + object.__setattr__(self, "start", start) + object.__setattr__(self, "end", end) + object.__setattr__(self, "_offset", offset) + + @cache_readonly + def next_bday(self): + """ + Used for moving to next business day. + """ + if self.n >= 0: + nb_offset = 1 + else: + nb_offset = -1 + if self._prefix.startswith("C"): + # CustomBusinessHour + return CustomBusinessDay( + n=nb_offset, + weekmask=self.weekmask, + holidays=self.holidays, + calendar=self.calendar, + ) + else: + return BusinessDay(n=nb_offset) + + def _next_opening_time(self, other, sign=1): + """ + If self.n and sign have the same sign, return the earliest opening time + later than or equal to current time. + Otherwise the latest opening time earlier than or equal to current + time. + + Opening time always locates on BusinessDay. + However, closing time may not if business hour extends over midnight. + + Parameters + ---------- + other : datetime + Current time. + sign : int, default 1. + Either 1 or -1. Going forward in time if it has the same sign as + self.n. Going backward in time otherwise. + + Returns + ------- + result : datetime + Next opening time. + """ + earliest_start = self.start[0] + latest_start = self.start[-1] + + if not self.next_bday.is_on_offset(other): + # today is not business day + other = other + sign * self.next_bday + if self.n * sign >= 0: + hour, minute = earliest_start.hour, earliest_start.minute + else: + hour, minute = latest_start.hour, latest_start.minute + else: + if self.n * sign >= 0: + if latest_start < other.time(): + # current time is after latest starting time in today + other = other + sign * self.next_bday + hour, minute = earliest_start.hour, earliest_start.minute + else: + # find earliest starting time no earlier than current time + for st in self.start: + if other.time() <= st: + hour, minute = st.hour, st.minute + break + else: + if other.time() < earliest_start: + # current time is before earliest starting time in today + other = other + sign * self.next_bday + hour, minute = latest_start.hour, latest_start.minute + else: + # find latest starting time no later than current time + for st in reversed(self.start): + if other.time() >= st: + hour, minute = st.hour, st.minute + break + + return datetime(other.year, other.month, other.day, hour, minute) + + def _prev_opening_time(self, other): + """ + If n is positive, return the latest opening time earlier than or equal + to current time. + Otherwise the earliest opening time later than or equal to current + time. + + Parameters + ---------- + other : datetime + Current time. + + Returns + ------- + result : datetime + Previous opening time. + """ + return self._next_opening_time(other, sign=-1) + + def _get_business_hours_by_sec(self, start, end): + """ + Return business hours in a day by seconds. + """ + # create dummy datetime to calculate businesshours in a day + dtstart = datetime(2014, 4, 1, start.hour, start.minute) + day = 1 if start < end else 2 + until = datetime(2014, 4, day, end.hour, end.minute) + return int((until - dtstart).total_seconds()) + + @apply_wraps + def rollback(self, dt): + """ + Roll provided date backward to next offset only if not on offset. + """ + if not self.is_on_offset(dt): + if self.n >= 0: + dt = self._prev_opening_time(dt) + else: + dt = self._next_opening_time(dt) + return self._get_closing_time(dt) + return dt + + @apply_wraps + def rollforward(self, dt): + """ + Roll provided date forward to next offset only if not on offset. + """ + if not self.is_on_offset(dt): + if self.n >= 0: + return self._next_opening_time(dt) + else: + return self._prev_opening_time(dt) + return dt + + def _get_closing_time(self, dt): + """ + Get the closing time of a business hour interval by its opening time. + + Parameters + ---------- + dt : datetime + Opening time of a business hour interval. + + Returns + ------- + result : datetime + Corresponding closing time. + """ + for i, st in enumerate(self.start): + if st.hour == dt.hour and st.minute == dt.minute: + return dt + timedelta( + seconds=self._get_business_hours_by_sec(st, self.end[i]) + ) + assert False + + @apply_wraps + def apply(self, other): + if isinstance(other, datetime): + # used for detecting edge condition + nanosecond = getattr(other, "nanosecond", 0) + # reset timezone and nanosecond + # other may be a Timestamp, thus not use replace + other = datetime( + other.year, + other.month, + other.day, + other.hour, + other.minute, + other.second, + other.microsecond, + ) + n = self.n + + # adjust other to reduce number of cases to handle + if n >= 0: + if other.time() in self.end or not self._is_on_offset(other): + other = self._next_opening_time(other) + else: + if other.time() in self.start: + # adjustment to move to previous business day + other = other - timedelta(seconds=1) + if not self._is_on_offset(other): + other = self._next_opening_time(other) + other = self._get_closing_time(other) + + # get total business hours by sec in one business day + businesshours = sum( + self._get_business_hours_by_sec(st, en) + for st, en in zip(self.start, self.end) + ) + + bd, r = divmod(abs(n * 60), businesshours // 60) + if n < 0: + bd, r = -bd, -r + + # adjust by business days first + if bd != 0: + if isinstance(self, _CustomMixin): # GH 30593 + skip_bd = CustomBusinessDay( + n=bd, + weekmask=self.weekmask, + holidays=self.holidays, + calendar=self.calendar, + ) + else: + skip_bd = BusinessDay(n=bd) + # midnight business hour may not on BusinessDay + if not self.next_bday.is_on_offset(other): + prev_open = self._prev_opening_time(other) + remain = other - prev_open + other = prev_open + skip_bd + remain + else: + other = other + skip_bd + + # remaining business hours to adjust + bhour_remain = timedelta(minutes=r) + + if n >= 0: + while bhour_remain != timedelta(0): + # business hour left in this business time interval + bhour = ( + self._get_closing_time(self._prev_opening_time(other)) - other + ) + if bhour_remain < bhour: + # finish adjusting if possible + other += bhour_remain + bhour_remain = timedelta(0) + else: + # go to next business time interval + bhour_remain -= bhour + other = self._next_opening_time(other + bhour) + else: + while bhour_remain != timedelta(0): + # business hour left in this business time interval + bhour = self._next_opening_time(other) - other + if ( + bhour_remain > bhour + or bhour_remain == bhour + and nanosecond != 0 + ): + # finish adjusting if possible + other += bhour_remain + bhour_remain = timedelta(0) + else: + # go to next business time interval + bhour_remain -= bhour + other = self._get_closing_time( + self._next_opening_time( + other + bhour - timedelta(seconds=1) + ) + ) + + return other + else: + raise ApplyTypeError("Only know how to combine business hour with datetime") + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + + if dt.tzinfo is not None: + dt = datetime( + dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond + ) + # Valid BH can be on the different BusinessDay during midnight + # Distinguish by the time spent from previous opening time + return self._is_on_offset(dt) + + def _is_on_offset(self, dt): + """ + Slight speedups using calculated values. + """ + # if self.normalize and not _is_normalized(dt): + # return False + # Valid BH can be on the different BusinessDay during midnight + # Distinguish by the time spent from previous opening time + if self.n >= 0: + op = self._prev_opening_time(dt) + else: + op = self._next_opening_time(dt) + span = (dt - op).total_seconds() + businesshours = 0 + for i, st in enumerate(self.start): + if op.hour == st.hour and op.minute == st.minute: + businesshours = self._get_business_hours_by_sec(st, self.end[i]) + if span <= businesshours: + return True + else: + return False + + def _repr_attrs(self): + out = super()._repr_attrs() + hours = ",".join( + f'{st.strftime("%H:%M")}-{en.strftime("%H:%M")}' + for st, en in zip(self.start, self.end) + ) + attrs = [f"{self._prefix}={hours}"] + out += ": " + ", ".join(attrs) + return out + + +class BusinessHour(BusinessHourMixin, SingleConstructorOffset): + """ + DateOffset subclass representing possibly n business hours. + """ + + _prefix = "BH" + _anchor = 0 + _attributes = frozenset(["n", "normalize", "start", "end", "offset"]) + + def __init__( + self, n=1, normalize=False, start="09:00", end="17:00", offset=timedelta(0) + ): + BaseOffset.__init__(self, n, normalize) + super().__init__(start=start, end=end, offset=offset) + + +class CustomBusinessDay(_CustomMixin, BusinessDay): + """ + DateOffset subclass representing possibly n custom business days, + excluding holidays. + + Parameters + ---------- + n : int, default 1 + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + weekmask : str, Default 'Mon Tue Wed Thu Fri' + Weekmask of valid business days, passed to ``numpy.busdaycalendar``. + holidays : list + List/array of dates to exclude from the set of valid business days, + passed to ``numpy.busdaycalendar``. + calendar : pd.HolidayCalendar or np.busdaycalendar + offset : timedelta, default timedelta(0) + """ + + _prefix = "C" + _attributes = frozenset( + ["n", "normalize", "weekmask", "holidays", "calendar", "offset"] + ) + + def __init__( + self, + n=1, + normalize=False, + weekmask="Mon Tue Wed Thu Fri", + holidays=None, + calendar=None, + offset=timedelta(0), + ): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "_offset", offset) + + _CustomMixin.__init__(self, weekmask, holidays, calendar) + + @apply_wraps + def apply(self, other): + if self.n <= 0: + roll = "forward" + else: + roll = "backward" + + if isinstance(other, datetime): + date_in = other + np_dt = np.datetime64(date_in.date()) + + np_incr_dt = np.busday_offset( + np_dt, self.n, roll=roll, busdaycal=self.calendar + ) + + dt_date = np_incr_dt.astype(datetime) + result = datetime.combine(dt_date, date_in.time()) + + if self.offset: + result = result + self.offset + return result + + elif isinstance(other, (timedelta, Tick)): + return BDay(self.n, offset=self.offset + other, normalize=self.normalize) + else: + raise ApplyTypeError( + "Only know how to combine trading day with " + "datetime, datetime64 or timedelta." + ) + + def apply_index(self, i): + raise NotImplementedError + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + day64 = _to_dt64(dt, "datetime64[D]") + return np.is_busday(day64, busdaycal=self.calendar) + + +class CustomBusinessHour(_CustomMixin, BusinessHourMixin, SingleConstructorOffset): + """ + DateOffset subclass representing possibly n custom business days. + """ + + _prefix = "CBH" + _anchor = 0 + _attributes = frozenset( + ["n", "normalize", "weekmask", "holidays", "calendar", "start", "end", "offset"] + ) + + def __init__( + self, + n=1, + normalize=False, + weekmask="Mon Tue Wed Thu Fri", + holidays=None, + calendar=None, + start="09:00", + end="17:00", + offset=timedelta(0), + ): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "_offset", offset) + + _CustomMixin.__init__(self, weekmask, holidays, calendar) + BusinessHourMixin.__init__(self, start=start, end=end, offset=offset) + + +# --------------------------------------------------------------------- +# Month-Based Offset Classes + + +class MonthOffset(SingleConstructorOffset): + _adjust_dst = True + _attributes = frozenset(["n", "normalize"]) + + __init__ = BaseOffset.__init__ + + @property + def name(self): + if self.is_anchored: + return self.rule_code + else: + month = ccalendar.MONTH_ALIASES[self.n] + return f"{self.code_rule}-{month}" + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + return dt.day == self._get_offset_day(dt) + + @apply_wraps + def apply(self, other): + compare_day = self._get_offset_day(other) + n = liboffsets.roll_convention(other.day, self.n, compare_day) + return shift_month(other, n, self._day_opt) + + @apply_index_wraps + def apply_index(self, i): + shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt) + # TODO: going through __new__ raises on call to _validate_frequency; + # are we passing incorrect freq? + return type(i)._simple_new(shifted, freq=i.freq, dtype=i.dtype) + + +class MonthEnd(MonthOffset): + """ + DateOffset of one month end. + """ + + _prefix = "M" + _day_opt = "end" + + +class MonthBegin(MonthOffset): + """ + DateOffset of one month at beginning. + """ + + _prefix = "MS" + _day_opt = "start" + + +class BusinessMonthEnd(MonthOffset): + """ + DateOffset increments between business EOM dates. + """ + + _prefix = "BM" + _day_opt = "business_end" + + +class BusinessMonthBegin(MonthOffset): + """ + DateOffset of one business month at beginning. + """ + + _prefix = "BMS" + _day_opt = "business_start" + + +class _CustomBusinessMonth(_CustomMixin, BusinessMixin, MonthOffset): + """ + DateOffset subclass representing custom business month(s). + + Increments between %(bound)s of month dates. + + Parameters + ---------- + n : int, default 1 + The number of months represented. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + weekmask : str, Default 'Mon Tue Wed Thu Fri' + Weekmask of valid business days, passed to ``numpy.busdaycalendar``. + holidays : list + List/array of dates to exclude from the set of valid business days, + passed to ``numpy.busdaycalendar``. + calendar : pd.HolidayCalendar or np.busdaycalendar + Calendar to integrate. + offset : timedelta, default timedelta(0) + Time offset to apply. + """ + + _attributes = frozenset( + ["n", "normalize", "weekmask", "holidays", "calendar", "offset"] + ) + + is_on_offset = DateOffset.is_on_offset # override MonthOffset method + apply_index = DateOffset.apply_index # override MonthOffset method + + def __init__( + self, + n=1, + normalize=False, + weekmask="Mon Tue Wed Thu Fri", + holidays=None, + calendar=None, + offset=timedelta(0), + ): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "_offset", offset) + + _CustomMixin.__init__(self, weekmask, holidays, calendar) + + @cache_readonly + def cbday_roll(self): + """ + Define default roll function to be called in apply method. + """ + cbday = CustomBusinessDay(n=self.n, normalize=False, **self.kwds) + + if self._prefix.endswith("S"): + # MonthBegin + roll_func = cbday.rollforward + else: + # MonthEnd + roll_func = cbday.rollback + return roll_func + + @cache_readonly + def m_offset(self): + if self._prefix.endswith("S"): + # MonthBegin + moff = MonthBegin(n=1, normalize=False) + else: + # MonthEnd + moff = MonthEnd(n=1, normalize=False) + return moff + + @cache_readonly + def month_roll(self): + """ + Define default roll function to be called in apply method. + """ + if self._prefix.endswith("S"): + # MonthBegin + roll_func = self.m_offset.rollback + else: + # MonthEnd + roll_func = self.m_offset.rollforward + return roll_func + + @apply_wraps + def apply(self, other): + # First move to month offset + cur_month_offset_date = self.month_roll(other) + + # Find this custom month offset + compare_date = self.cbday_roll(cur_month_offset_date) + n = liboffsets.roll_convention(other.day, self.n, compare_date.day) + + new = cur_month_offset_date + n * self.m_offset + result = self.cbday_roll(new) + return result + + +@Substitution(bound="end") +@Appender(_CustomBusinessMonth.__doc__) +class CustomBusinessMonthEnd(_CustomBusinessMonth): + _prefix = "CBM" + + +@Substitution(bound="beginning") +@Appender(_CustomBusinessMonth.__doc__) +class CustomBusinessMonthBegin(_CustomBusinessMonth): + _prefix = "CBMS" + + +# --------------------------------------------------------------------- +# Semi-Month Based Offset Classes + + +class SemiMonthOffset(DateOffset): + _adjust_dst = True + _default_day_of_month = 15 + _min_day_of_month = 2 + _attributes = frozenset(["n", "normalize", "day_of_month"]) + + def __init__(self, n=1, normalize=False, day_of_month=None): + BaseOffset.__init__(self, n, normalize) + + if day_of_month is None: + object.__setattr__(self, "day_of_month", self._default_day_of_month) + else: + object.__setattr__(self, "day_of_month", int(day_of_month)) + if not self._min_day_of_month <= self.day_of_month <= 27: + raise ValueError( + "day_of_month must be " + f"{self._min_day_of_month}<=day_of_month<=27, " + f"got {self.day_of_month}" + ) + + @classmethod + def _from_name(cls, suffix=None): + return cls(day_of_month=suffix) + + @property + def rule_code(self): + suffix = f"-{self.day_of_month}" + return self._prefix + suffix + + @apply_wraps + def apply(self, other): + # shift `other` to self.day_of_month, incrementing `n` if necessary + n = liboffsets.roll_convention(other.day, self.n, self.day_of_month) + + days_in_month = ccalendar.get_days_in_month(other.year, other.month) + + # For SemiMonthBegin on other.day == 1 and + # SemiMonthEnd on other.day == days_in_month, + # shifting `other` to `self.day_of_month` _always_ requires + # incrementing/decrementing `n`, regardless of whether it is + # initially positive. + if type(self) is SemiMonthBegin and (self.n <= 0 and other.day == 1): + n -= 1 + elif type(self) is SemiMonthEnd and (self.n > 0 and other.day == days_in_month): + n += 1 + + return self._apply(n, other) + + def _apply(self, n, other): + """ + Handle specific apply logic for child classes. + """ + raise AbstractMethodError(self) + + @apply_index_wraps + def apply_index(self, i): + # determine how many days away from the 1st of the month we are + dti = i + days_from_start = i.to_perioddelta("M").asi8 + delta = Timedelta(days=self.day_of_month - 1).value + + # get boolean array for each element before the day_of_month + before_day_of_month = days_from_start < delta + + # get boolean array for each element after the day_of_month + after_day_of_month = days_from_start > delta + + # determine the correct n for each date in i + roll = self._get_roll(i, before_day_of_month, after_day_of_month) + + # isolate the time since it will be striped away one the next line + time = i.to_perioddelta("D") + + # apply the correct number of months + + # integer-array addition on PeriodIndex is deprecated, + # so we use _addsub_int_array directly + asper = i.to_period("M") + if not isinstance(asper._data, np.ndarray): + # unwrap PeriodIndex --> PeriodArray + asper = asper._data + + shifted = asper._addsub_int_array(roll // 2, operator.add) + i = type(dti)(shifted.to_timestamp()) + + # apply the correct day + i = self._apply_index_days(i, roll) + + return i + time + + def _get_roll(self, i, before_day_of_month, after_day_of_month): + """ + Return an array with the correct n for each date in i. + + The roll array is based on the fact that i gets rolled back to + the first day of the month. + """ + raise AbstractMethodError(self) + + def _apply_index_days(self, i, roll): + """ + Apply the correct day for each date in i. + """ + raise AbstractMethodError(self) + + +class SemiMonthEnd(SemiMonthOffset): + """ + Two DateOffset's per month repeating on the last + day of the month and day_of_month. + + Parameters + ---------- + n : int + normalize : bool, default False + day_of_month : int, {1, 3,...,27}, default 15 + """ + + _prefix = "SM" + _min_day_of_month = 1 + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + days_in_month = ccalendar.get_days_in_month(dt.year, dt.month) + return dt.day in (self.day_of_month, days_in_month) + + def _apply(self, n, other): + months = n // 2 + day = 31 if n % 2 else self.day_of_month + return shift_month(other, months, day) + + def _get_roll(self, i, before_day_of_month, after_day_of_month): + n = self.n + is_month_end = i.is_month_end + if n > 0: + roll_end = np.where(is_month_end, 1, 0) + roll_before = np.where(before_day_of_month, n, n + 1) + roll = roll_end + roll_before + elif n == 0: + roll_after = np.where(after_day_of_month, 2, 0) + roll_before = np.where(~after_day_of_month, 1, 0) + roll = roll_before + roll_after + else: + roll = np.where(after_day_of_month, n + 2, n + 1) + return roll + + def _apply_index_days(self, i, roll): + """ + Add days portion of offset to DatetimeIndex i. + + Parameters + ---------- + i : DatetimeIndex + roll : ndarray[int64_t] + + Returns + ------- + result : DatetimeIndex + """ + nanos = (roll % 2) * Timedelta(days=self.day_of_month).value + i += nanos.astype("timedelta64[ns]") + return i + Timedelta(days=-1) + + +class SemiMonthBegin(SemiMonthOffset): + """ + Two DateOffset's per month repeating on the first + day of the month and day_of_month. + + Parameters + ---------- + n : int + normalize : bool, default False + day_of_month : int, {2, 3,...,27}, default 15 + """ + + _prefix = "SMS" + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + return dt.day in (1, self.day_of_month) + + def _apply(self, n, other): + months = n // 2 + n % 2 + day = 1 if n % 2 else self.day_of_month + return shift_month(other, months, day) + + def _get_roll(self, i, before_day_of_month, after_day_of_month): + n = self.n + is_month_start = i.is_month_start + if n > 0: + roll = np.where(before_day_of_month, n, n + 1) + elif n == 0: + roll_start = np.where(is_month_start, 0, 1) + roll_after = np.where(after_day_of_month, 1, 0) + roll = roll_start + roll_after + else: + roll_after = np.where(after_day_of_month, n + 2, n + 1) + roll_start = np.where(is_month_start, -1, 0) + roll = roll_after + roll_start + return roll + + def _apply_index_days(self, i, roll): + """ + Add days portion of offset to DatetimeIndex i. + + Parameters + ---------- + i : DatetimeIndex + roll : ndarray[int64_t] + + Returns + ------- + result : DatetimeIndex + """ + nanos = (roll % 2) * Timedelta(days=self.day_of_month - 1).value + return i + nanos.astype("timedelta64[ns]") + + +# --------------------------------------------------------------------- +# Week-Based Offset Classes + + +class Week(DateOffset): + """ + Weekly offset. + + Parameters + ---------- + weekday : int, default None + Always generate specific day of week. 0 for Monday. + """ + + _adjust_dst = True + _inc = timedelta(weeks=1) + _prefix = "W" + _attributes = frozenset(["n", "normalize", "weekday"]) + + def __init__(self, n=1, normalize=False, weekday=None): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "weekday", weekday) + + if self.weekday is not None: + if self.weekday < 0 or self.weekday > 6: + raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") + + def is_anchored(self): + return self.n == 1 and self.weekday is not None + + @apply_wraps + def apply(self, other): + if self.weekday is None: + return other + self.n * self._inc + + if not isinstance(other, datetime): + raise TypeError( + f"Cannot add {type(other).__name__} to {type(self).__name__}" + ) + + k = self.n + otherDay = other.weekday() + if otherDay != self.weekday: + other = other + timedelta((self.weekday - otherDay) % 7) + if k > 0: + k -= 1 + + return other + timedelta(weeks=k) + + @apply_index_wraps + def apply_index(self, i): + if self.weekday is None: + # integer addition on PeriodIndex is deprecated, + # so we use _time_shift directly + asper = i.to_period("W") + if not isinstance(asper._data, np.ndarray): + # unwrap PeriodIndex --> PeriodArray + asper = asper._data + + shifted = asper._time_shift(self.n) + return shifted.to_timestamp() + i.to_perioddelta("W") + else: + return self._end_apply_index(i) + + def _end_apply_index(self, dtindex): + """ + Add self to the given DatetimeIndex, specialized for case where + self.weekday is non-null. + + Parameters + ---------- + dtindex : DatetimeIndex + + Returns + ------- + result : DatetimeIndex + """ + off = dtindex.to_perioddelta("D") + + base, mult = libfrequencies.get_freq_code(self.freqstr) + base_period = dtindex.to_period(base) + if not isinstance(base_period._data, np.ndarray): + # unwrap PeriodIndex --> PeriodArray + base_period = base_period._data + + if self.n > 0: + # when adding, dates on end roll to next + normed = dtindex - off + Timedelta(1, "D") - Timedelta(1, "ns") + roll = np.where( + base_period.to_timestamp(how="end") == normed, self.n, self.n - 1 + ) + # integer-array addition on PeriodIndex is deprecated, + # so we use _addsub_int_array directly + shifted = base_period._addsub_int_array(roll, operator.add) + base = shifted.to_timestamp(how="end") + else: + # integer addition on PeriodIndex is deprecated, + # so we use _time_shift directly + roll = self.n + base = base_period._time_shift(roll).to_timestamp(how="end") + + return base + off + Timedelta(1, "ns") - Timedelta(1, "D") + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + elif self.weekday is None: + return True + return dt.weekday() == self.weekday + + @property + def rule_code(self): + suffix = "" + if self.weekday is not None: + weekday = ccalendar.int_to_weekday[self.weekday] + suffix = f"-{weekday}" + return self._prefix + suffix + + @classmethod + def _from_name(cls, suffix=None): + if not suffix: + weekday = None + else: + weekday = ccalendar.weekday_to_int[suffix] + return cls(weekday=weekday) + + +class _WeekOfMonthMixin: + """ + Mixin for methods common to WeekOfMonth and LastWeekOfMonth. + """ + + @apply_wraps + def apply(self, other): + compare_day = self._get_offset_day(other) + + months = self.n + if months > 0 and compare_day > other.day: + months -= 1 + elif months <= 0 and compare_day < other.day: + months += 1 + + shifted = shift_month(other, months, "start") + to_day = self._get_offset_day(shifted) + return liboffsets.shift_day(shifted, to_day - shifted.day) + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + return dt.day == self._get_offset_day(dt) + + +class WeekOfMonth(_WeekOfMonthMixin, DateOffset): + """ + Describes monthly dates like "the Tuesday of the 2nd week of each month". + + Parameters + ---------- + n : int + week : int {0, 1, 2, 3, ...}, default 0 + A specific integer for the week of the month. + e.g. 0 is 1st week of month, 1 is the 2nd week, etc. + weekday : int {0, 1, ..., 6}, default 0 + A specific integer for the day of the week. + + - 0 is Monday + - 1 is Tuesday + - 2 is Wednesday + - 3 is Thursday + - 4 is Friday + - 5 is Saturday + - 6 is Sunday. + """ + + _prefix = "WOM" + _adjust_dst = True + _attributes = frozenset(["n", "normalize", "week", "weekday"]) + + def __init__(self, n=1, normalize=False, week=0, weekday=0): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "weekday", weekday) + object.__setattr__(self, "week", week) + + if self.weekday < 0 or self.weekday > 6: + raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") + if self.week < 0 or self.week > 3: + raise ValueError(f"Week must be 0<=week<=3, got {self.week}") + + def _get_offset_day(self, other): + """ + Find the day in the same month as other that has the same + weekday as self.weekday and is the self.week'th such day in the month. + + Parameters + ---------- + other : datetime + + Returns + ------- + day : int + """ + mstart = datetime(other.year, other.month, 1) + wday = mstart.weekday() + shift_days = (self.weekday - wday) % 7 + return 1 + shift_days + self.week * 7 + + @property + def rule_code(self): + weekday = ccalendar.int_to_weekday.get(self.weekday, "") + return f"{self._prefix}-{self.week + 1}{weekday}" + + @classmethod + def _from_name(cls, suffix=None): + if not suffix: + raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.") + # TODO: handle n here... + # only one digit weeks (1 --> week 0, 2 --> week 1, etc.) + week = int(suffix[0]) - 1 + weekday = ccalendar.weekday_to_int[suffix[1:]] + return cls(week=week, weekday=weekday) + + +class LastWeekOfMonth(_WeekOfMonthMixin, DateOffset): + """ + Describes monthly dates in last week of month like "the last Tuesday of + each month". + + Parameters + ---------- + n : int, default 1 + weekday : int {0, 1, ..., 6}, default 0 + A specific integer for the day of the week. + + - 0 is Monday + - 1 is Tuesday + - 2 is Wednesday + - 3 is Thursday + - 4 is Friday + - 5 is Saturday + - 6 is Sunday. + """ + + _prefix = "LWOM" + _adjust_dst = True + _attributes = frozenset(["n", "normalize", "weekday"]) + + def __init__(self, n=1, normalize=False, weekday=0): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "weekday", weekday) + + if self.n == 0: + raise ValueError("N cannot be 0") + + if self.weekday < 0 or self.weekday > 6: + raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") + + def _get_offset_day(self, other): + """ + Find the day in the same month as other that has the same + weekday as self.weekday and is the last such day in the month. + + Parameters + ---------- + other: datetime + + Returns + ------- + day: int + """ + dim = ccalendar.get_days_in_month(other.year, other.month) + mend = datetime(other.year, other.month, dim) + wday = mend.weekday() + shift_days = (wday - self.weekday) % 7 + return dim - shift_days + + @property + def rule_code(self): + weekday = ccalendar.int_to_weekday.get(self.weekday, "") + return f"{self._prefix}-{weekday}" + + @classmethod + def _from_name(cls, suffix=None): + if not suffix: + raise ValueError(f"Prefix {repr(cls._prefix)} requires a suffix.") + # TODO: handle n here... + weekday = ccalendar.weekday_to_int[suffix] + return cls(weekday=weekday) + + +# --------------------------------------------------------------------- +# Quarter-Based Offset Classes + + +class QuarterOffset(DateOffset): + """ + Quarter representation - doesn't call super. + """ + + _default_startingMonth: Optional[int] = None + _from_name_startingMonth: Optional[int] = None + _adjust_dst = True + _attributes = frozenset(["n", "normalize", "startingMonth"]) + # TODO: Consider combining QuarterOffset and YearOffset __init__ at some + # point. Also apply_index, is_on_offset, rule_code if + # startingMonth vs month attr names are resolved + + def __init__(self, n=1, normalize=False, startingMonth=None): + BaseOffset.__init__(self, n, normalize) + + if startingMonth is None: + startingMonth = self._default_startingMonth + object.__setattr__(self, "startingMonth", startingMonth) + + def is_anchored(self): + return self.n == 1 and self.startingMonth is not None + + @classmethod + def _from_name(cls, suffix=None): + kwargs = {} + if suffix: + kwargs["startingMonth"] = ccalendar.MONTH_TO_CAL_NUM[suffix] + else: + if cls._from_name_startingMonth is not None: + kwargs["startingMonth"] = cls._from_name_startingMonth + return cls(**kwargs) + + @property + def rule_code(self): + month = ccalendar.MONTH_ALIASES[self.startingMonth] + return f"{self._prefix}-{month}" + + @apply_wraps + def apply(self, other): + # months_since: find the calendar quarter containing other.month, + # e.g. if other.month == 8, the calendar quarter is [Jul, Aug, Sep]. + # Then find the month in that quarter containing an is_on_offset date for + # self. `months_since` is the number of months to shift other.month + # to get to this on-offset month. + months_since = other.month % 3 - self.startingMonth % 3 + qtrs = liboffsets.roll_qtrday( + other, self.n, self.startingMonth, day_opt=self._day_opt, modby=3 + ) + months = qtrs * 3 - months_since + return shift_month(other, months, self._day_opt) + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + mod_month = (dt.month - self.startingMonth) % 3 + return mod_month == 0 and dt.day == self._get_offset_day(dt) + + @apply_index_wraps + def apply_index(self, dtindex): + shifted = liboffsets.shift_quarters( + dtindex.asi8, self.n, self.startingMonth, self._day_opt + ) + # TODO: going through __new__ raises on call to _validate_frequency; + # are we passing incorrect freq? + return type(dtindex)._simple_new( + shifted, freq=dtindex.freq, dtype=dtindex.dtype + ) + + +class BQuarterEnd(QuarterOffset): + """ + DateOffset increments between business Quarter dates. + + startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... + startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... + startingMonth = 3 corresponds to dates like 3/30/2007, 6/29/2007, ... + """ + + _outputName = "BusinessQuarterEnd" + _default_startingMonth = 3 + _from_name_startingMonth = 12 + _prefix = "BQ" + _day_opt = "business_end" + + +# TODO: This is basically the same as BQuarterEnd +class BQuarterBegin(QuarterOffset): + _outputName = "BusinessQuarterBegin" + # I suspect this is wrong for *all* of them. + _default_startingMonth = 3 + _from_name_startingMonth = 1 + _prefix = "BQS" + _day_opt = "business_start" + + +class QuarterEnd(QuarterOffset): + """ + DateOffset increments between business Quarter dates. + + startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... + startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... + startingMonth = 3 corresponds to dates like 3/31/2007, 6/30/2007, ... + """ + + _outputName = "QuarterEnd" + _default_startingMonth = 3 + _prefix = "Q" + _day_opt = "end" + + +class QuarterBegin(QuarterOffset): + _outputName = "QuarterBegin" + _default_startingMonth = 3 + _from_name_startingMonth = 1 + _prefix = "QS" + _day_opt = "start" + + +# --------------------------------------------------------------------- +# Year-Based Offset Classes + + +class YearOffset(DateOffset): + """ + DateOffset that just needs a month. + """ + + _adjust_dst = True + _attributes = frozenset(["n", "normalize", "month"]) + + def _get_offset_day(self, other): + # override BaseOffset method to use self.month instead of other.month + # TODO: there may be a more performant way to do this + return liboffsets.get_day_of_month( + other.replace(month=self.month), self._day_opt + ) + + @apply_wraps + def apply(self, other): + years = roll_yearday(other, self.n, self.month, self._day_opt) + months = years * 12 + (self.month - other.month) + return shift_month(other, months, self._day_opt) + + @apply_index_wraps + def apply_index(self, dtindex): + shifted = liboffsets.shift_quarters( + dtindex.asi8, self.n, self.month, self._day_opt, modby=12 + ) + # TODO: going through __new__ raises on call to _validate_frequency; + # are we passing incorrect freq? + return type(dtindex)._simple_new( + shifted, freq=dtindex.freq, dtype=dtindex.dtype + ) + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + return dt.month == self.month and dt.day == self._get_offset_day(dt) + + def __init__(self, n=1, normalize=False, month=None): + BaseOffset.__init__(self, n, normalize) + + month = month if month is not None else self._default_month + object.__setattr__(self, "month", month) + + if self.month < 1 or self.month > 12: + raise ValueError("Month must go from 1 to 12") + + @classmethod + def _from_name(cls, suffix=None): + kwargs = {} + if suffix: + kwargs["month"] = ccalendar.MONTH_TO_CAL_NUM[suffix] + return cls(**kwargs) + + @property + def rule_code(self): + month = ccalendar.MONTH_ALIASES[self.month] + return f"{self._prefix}-{month}" + + +class BYearEnd(YearOffset): + """ + DateOffset increments between business EOM dates. + """ + + _outputName = "BusinessYearEnd" + _default_month = 12 + _prefix = "BA" + _day_opt = "business_end" + + +class BYearBegin(YearOffset): + """ + DateOffset increments between business year begin dates. + """ + + _outputName = "BusinessYearBegin" + _default_month = 1 + _prefix = "BAS" + _day_opt = "business_start" + + +class YearEnd(YearOffset): + """ + DateOffset increments between calendar year ends. + """ + + _default_month = 12 + _prefix = "A" + _day_opt = "end" + + +class YearBegin(YearOffset): + """ + DateOffset increments between calendar year begin dates. + """ + + _default_month = 1 + _prefix = "AS" + _day_opt = "start" + + +# --------------------------------------------------------------------- +# Special Offset Classes + + +class FY5253(DateOffset): + """ + Describes 52-53 week fiscal year. This is also known as a 4-4-5 calendar. + + It is used by companies that desire that their + fiscal year always end on the same day of the week. + + It is a method of managing accounting periods. + It is a common calendar structure for some industries, + such as retail, manufacturing and parking industry. + + For more information see: + http://en.wikipedia.org/wiki/4-4-5_calendar + + The year may either: + + - end on the last X day of the Y month. + - end on the last X day closest to the last day of the Y month. + + X is a specific day of the week. + Y is a certain month of the year + + Parameters + ---------- + n : int + weekday : int {0, 1, ..., 6}, default 0 + A specific integer for the day of the week. + + - 0 is Monday + - 1 is Tuesday + - 2 is Wednesday + - 3 is Thursday + - 4 is Friday + - 5 is Saturday + - 6 is Sunday. + + startingMonth : int {1, 2, ... 12}, default 1 + The month in which the fiscal year ends. + + variation : str, default "nearest" + Method of employing 4-4-5 calendar. + + There are two options: + + - "nearest" means year end is **weekday** closest to last day of month in year. + - "last" means year end is final **weekday** of the final month in fiscal year. + """ + + _prefix = "RE" + _adjust_dst = True + _attributes = frozenset(["weekday", "startingMonth", "variation"]) + + def __init__( + self, n=1, normalize=False, weekday=0, startingMonth=1, variation="nearest" + ): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "startingMonth", startingMonth) + object.__setattr__(self, "weekday", weekday) + + object.__setattr__(self, "variation", variation) + + if self.n == 0: + raise ValueError("N cannot be 0") + + if self.variation not in ["nearest", "last"]: + raise ValueError(f"{self.variation} is not a valid variation") + + def is_anchored(self): + return ( + self.n == 1 and self.startingMonth is not None and self.weekday is not None + ) + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + dt = datetime(dt.year, dt.month, dt.day) + year_end = self.get_year_end(dt) + + if self.variation == "nearest": + # We have to check the year end of "this" cal year AND the previous + return year_end == dt or self.get_year_end(shift_month(dt, -1, None)) == dt + else: + return year_end == dt + + @apply_wraps + def apply(self, other): + norm = Timestamp(other).normalize() + + n = self.n + prev_year = self.get_year_end(datetime(other.year - 1, self.startingMonth, 1)) + cur_year = self.get_year_end(datetime(other.year, self.startingMonth, 1)) + next_year = self.get_year_end(datetime(other.year + 1, self.startingMonth, 1)) + + prev_year = conversion.localize_pydatetime(prev_year, other.tzinfo) + cur_year = conversion.localize_pydatetime(cur_year, other.tzinfo) + next_year = conversion.localize_pydatetime(next_year, other.tzinfo) + + # Note: next_year.year == other.year + 1, so we will always + # have other < next_year + if norm == prev_year: + n -= 1 + elif norm == cur_year: + pass + elif n > 0: + if norm < prev_year: + n -= 2 + elif prev_year < norm < cur_year: + n -= 1 + elif cur_year < norm < next_year: + pass + else: + if cur_year < norm < next_year: + n += 1 + elif prev_year < norm < cur_year: + pass + elif ( + norm.year == prev_year.year + and norm < prev_year + and prev_year - norm <= timedelta(6) + ): + # GH#14774, error when next_year.year == cur_year.year + # e.g. prev_year == datetime(2004, 1, 3), + # other == datetime(2004, 1, 1) + n -= 1 + else: + assert False + + shifted = datetime(other.year + n, self.startingMonth, 1) + result = self.get_year_end(shifted) + result = datetime( + result.year, + result.month, + result.day, + other.hour, + other.minute, + other.second, + other.microsecond, + ) + return result + + def get_year_end(self, dt): + assert dt.tzinfo is None + + dim = ccalendar.get_days_in_month(dt.year, self.startingMonth) + target_date = datetime(dt.year, self.startingMonth, dim) + wkday_diff = self.weekday - target_date.weekday() + if wkday_diff == 0: + # year_end is the same for "last" and "nearest" cases + return target_date + + if self.variation == "last": + days_forward = (wkday_diff % 7) - 7 + + # days_forward is always negative, so we always end up + # in the same year as dt + return target_date + timedelta(days=days_forward) + else: + # variation == "nearest": + days_forward = wkday_diff % 7 + if days_forward <= 3: + # The upcoming self.weekday is closer than the previous one + return target_date + timedelta(days_forward) + else: + # The previous self.weekday is closer than the upcoming one + return target_date + timedelta(days_forward - 7) + + @property + def rule_code(self): + prefix = self._prefix + suffix = self.get_rule_code_suffix() + return f"{prefix}-{suffix}" + + def _get_suffix_prefix(self): + if self.variation == "nearest": + return "N" + else: + return "L" + + def get_rule_code_suffix(self): + prefix = self._get_suffix_prefix() + month = ccalendar.MONTH_ALIASES[self.startingMonth] + weekday = ccalendar.int_to_weekday[self.weekday] + return f"{prefix}-{month}-{weekday}" + + @classmethod + def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code): + if varion_code == "N": + variation = "nearest" + elif varion_code == "L": + variation = "last" + else: + raise ValueError(f"Unable to parse varion_code: {varion_code}") + + startingMonth = ccalendar.MONTH_TO_CAL_NUM[startingMonth_code] + weekday = ccalendar.weekday_to_int[weekday_code] + + return { + "weekday": weekday, + "startingMonth": startingMonth, + "variation": variation, + } + + @classmethod + def _from_name(cls, *args): + return cls(**cls._parse_suffix(*args)) + + +class FY5253Quarter(DateOffset): + """ + DateOffset increments between business quarter dates + for 52-53 week fiscal year (also known as a 4-4-5 calendar). + + It is used by companies that desire that their + fiscal year always end on the same day of the week. + + It is a method of managing accounting periods. + It is a common calendar structure for some industries, + such as retail, manufacturing and parking industry. + + For more information see: + http://en.wikipedia.org/wiki/4-4-5_calendar + + The year may either: + + - end on the last X day of the Y month. + - end on the last X day closest to the last day of the Y month. + + X is a specific day of the week. + Y is a certain month of the year + + startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... + startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... + startingMonth = 3 corresponds to dates like 3/30/2007, 6/29/2007, ... + + Parameters + ---------- + n : int + weekday : int {0, 1, ..., 6}, default 0 + A specific integer for the day of the week. + + - 0 is Monday + - 1 is Tuesday + - 2 is Wednesday + - 3 is Thursday + - 4 is Friday + - 5 is Saturday + - 6 is Sunday. + + startingMonth : int {1, 2, ..., 12}, default 1 + The month in which fiscal years end. + + qtr_with_extra_week : int {1, 2, 3, 4}, default 1 + The quarter number that has the leap or 14 week when needed. + + variation : str, default "nearest" + Method of employing 4-4-5 calendar. + + There are two options: + + - "nearest" means year end is **weekday** closest to last day of month in year. + - "last" means year end is final **weekday** of the final month in fiscal year. + """ + + _prefix = "REQ" + _adjust_dst = True + _attributes = frozenset( + ["weekday", "startingMonth", "qtr_with_extra_week", "variation"] + ) + + def __init__( + self, + n=1, + normalize=False, + weekday=0, + startingMonth=1, + qtr_with_extra_week=1, + variation="nearest", + ): + BaseOffset.__init__(self, n, normalize) + + object.__setattr__(self, "startingMonth", startingMonth) + object.__setattr__(self, "weekday", weekday) + object.__setattr__(self, "qtr_with_extra_week", qtr_with_extra_week) + object.__setattr__(self, "variation", variation) + + if self.n == 0: + raise ValueError("N cannot be 0") + + @cache_readonly + def _offset(self): + return FY5253( + startingMonth=self.startingMonth, + weekday=self.weekday, + variation=self.variation, + ) + + def is_anchored(self): + return self.n == 1 and self._offset.is_anchored() + + def _rollback_to_year(self, other): + """ + Roll `other` back to the most recent date that was on a fiscal year + end. + + Return the date of that year-end, the number of full quarters + elapsed between that year-end and other, and the remaining Timedelta + since the most recent quarter-end. + + Parameters + ---------- + other : datetime or Timestamp + + Returns + ------- + tuple of + prev_year_end : Timestamp giving most recent fiscal year end + num_qtrs : int + tdelta : Timedelta + """ + num_qtrs = 0 + + norm = Timestamp(other).tz_localize(None) + start = self._offset.rollback(norm) + # Note: start <= norm and self._offset.is_on_offset(start) + + if start < norm: + # roll adjustment + qtr_lens = self.get_weeks(norm) + + # check thet qtr_lens is consistent with self._offset addition + end = liboffsets.shift_day(start, days=7 * sum(qtr_lens)) + assert self._offset.is_on_offset(end), (start, end, qtr_lens) + + tdelta = norm - start + for qlen in qtr_lens: + if qlen * 7 <= tdelta.days: + num_qtrs += 1 + tdelta -= Timedelta(days=qlen * 7) + else: + break + else: + tdelta = Timedelta(0) + + # Note: we always have tdelta.value >= 0 + return start, num_qtrs, tdelta + + @apply_wraps + def apply(self, other): + # Note: self.n == 0 is not allowed. + n = self.n + + prev_year_end, num_qtrs, tdelta = self._rollback_to_year(other) + res = prev_year_end + n += num_qtrs + if self.n <= 0 and tdelta.value > 0: + n += 1 + + # Possible speedup by handling years first. + years = n // 4 + if years: + res += self._offset * years + n -= years * 4 + + # Add an extra day to make *sure* we are getting the quarter lengths + # for the upcoming year, not the previous year + qtr_lens = self.get_weeks(res + Timedelta(days=1)) + + # Note: we always have 0 <= n < 4 + weeks = sum(qtr_lens[:n]) + if weeks: + res = liboffsets.shift_day(res, days=weeks * 7) + + return res + + def get_weeks(self, dt): + ret = [13] * 4 + + year_has_extra_week = self.year_has_extra_week(dt) + + if year_has_extra_week: + ret[self.qtr_with_extra_week - 1] = 14 + + return ret + + def year_has_extra_week(self, dt): + # Avoid round-down errors --> normalize to get + # e.g. '370D' instead of '360D23H' + norm = Timestamp(dt).normalize().tz_localize(None) + + next_year_end = self._offset.rollforward(norm) + prev_year_end = norm - self._offset + weeks_in_year = (next_year_end - prev_year_end).days / 7 + assert weeks_in_year in [52, 53], weeks_in_year + return weeks_in_year == 53 + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + if self._offset.is_on_offset(dt): + return True + + next_year_end = dt - self._offset + + qtr_lens = self.get_weeks(dt) + + current = next_year_end + for qtr_len in qtr_lens: + current = liboffsets.shift_day(current, days=qtr_len * 7) + if dt == current: + return True + return False + + @property + def rule_code(self): + suffix = self._offset.get_rule_code_suffix() + qtr = self.qtr_with_extra_week + return f"{self._prefix}-{suffix}-{qtr}" + + @classmethod + def _from_name(cls, *args): + return cls( + **dict(FY5253._parse_suffix(*args[:-1]), qtr_with_extra_week=int(args[-1])) + ) + + +class Easter(DateOffset): + """ + DateOffset for the Easter holiday using logic defined in dateutil. + + Right now uses the revised method which is valid in years 1583-4099. + """ + + _adjust_dst = True + _attributes = frozenset(["n", "normalize"]) + + __init__ = BaseOffset.__init__ + + @apply_wraps + def apply(self, other): + current_easter = easter(other.year) + current_easter = datetime( + current_easter.year, current_easter.month, current_easter.day + ) + current_easter = conversion.localize_pydatetime(current_easter, other.tzinfo) + + n = self.n + if n >= 0 and other < current_easter: + n -= 1 + elif n < 0 and other > current_easter: + n += 1 + # TODO: Why does this handle the 0 case the opposite of others? + + # NOTE: easter returns a datetime.date so we have to convert to type of + # other + new = easter(other.year + n) + new = datetime( + new.year, + new.month, + new.day, + other.hour, + other.minute, + other.second, + other.microsecond, + ) + return new + + def is_on_offset(self, dt): + if self.normalize and not _is_normalized(dt): + return False + return date(dt.year, dt.month, dt.day) == easter(dt.year) + + +# --------------------------------------------------------------------- +# Ticks + + +def _tick_comp(op): + assert op not in [operator.eq, operator.ne] + + def f(self, other): + try: + return op(self.delta, other.delta) + except AttributeError: + # comparing with a non-Tick object + raise TypeError( + f"Invalid comparison between {type(self).__name__} " + f"and {type(other).__name__}" + ) + + f.__name__ = f"__{op.__name__}__" + return f + + +class Tick(liboffsets._Tick, SingleConstructorOffset): + _inc = Timedelta(microseconds=1000) + _prefix = "undefined" + _attributes = frozenset(["n", "normalize"]) + + def __init__(self, n=1, normalize=False): + BaseOffset.__init__(self, n, normalize) + if normalize: + raise ValueError( + "Tick offset with `normalize=True` are not allowed." + ) # GH#21427 + + __gt__ = _tick_comp(operator.gt) + __ge__ = _tick_comp(operator.ge) + __lt__ = _tick_comp(operator.lt) + __le__ = _tick_comp(operator.le) + + def __add__(self, other): + if isinstance(other, Tick): + if type(self) == type(other): + return type(self)(self.n + other.n) + else: + return _delta_to_tick(self.delta + other.delta) + elif isinstance(other, Period): + return other + self + try: + return self.apply(other) + except ApplyTypeError: + return NotImplemented + except OverflowError: + raise OverflowError( + f"the add operation between {self} and {other} will overflow" + ) + + def __eq__(self, other: Any) -> bool: + if isinstance(other, str): + from pandas.tseries.frequencies import to_offset + + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return False + + if isinstance(other, Tick): + return self.delta == other.delta + else: + return False + + # This is identical to DateOffset.__hash__, but has to be redefined here + # for Python 3, because we've redefined __eq__. + def __hash__(self): + return hash(self._params) + + def __ne__(self, other): + if isinstance(other, str): + from pandas.tseries.frequencies import to_offset + + try: + # GH#23524 if to_offset fails, we are dealing with an + # incomparable type so == is False and != is True + other = to_offset(other) + except ValueError: + # e.g. "infer" + return True + + if isinstance(other, Tick): + return self.delta != other.delta + else: + return True + + @property + def delta(self): + return self.n * self._inc + + @property + def nanos(self): + return delta_to_nanoseconds(self.delta) + + # TODO: Should Tick have its own apply_index? + def apply(self, other): + # Timestamp can handle tz and nano sec, thus no need to use apply_wraps + if isinstance(other, Timestamp): + + # GH 15126 + # in order to avoid a recursive + # call of __add__ and __radd__ if there is + # an exception, when we call using the + operator, + # we directly call the known method + result = other.__add__(self) + if result is NotImplemented: + raise OverflowError + return result + elif isinstance(other, (datetime, np.datetime64, date)): + return as_timestamp(other) + self + + if isinstance(other, timedelta): + return other + self.delta + elif isinstance(other, type(self)): + return type(self)(self.n + other.n) + + raise ApplyTypeError(f"Unhandled type: {type(other).__name__}") + + def is_anchored(self): + return False + + +def _delta_to_tick(delta): + if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0: + # nanoseconds only for pd.Timedelta + if delta.seconds == 0: + return Day(delta.days) + else: + seconds = delta.days * 86400 + delta.seconds + if seconds % 3600 == 0: + return Hour(seconds / 3600) + elif seconds % 60 == 0: + return Minute(seconds / 60) + else: + return Second(seconds) + else: + nanos = delta_to_nanoseconds(delta) + if nanos % 1000000 == 0: + return Milli(nanos // 1000000) + elif nanos % 1000 == 0: + return Micro(nanos // 1000) + else: # pragma: no cover + return Nano(nanos) + + +class Day(Tick): + _inc = Timedelta(days=1) + _prefix = "D" + + +class Hour(Tick): + _inc = Timedelta(hours=1) + _prefix = "H" + + +class Minute(Tick): + _inc = Timedelta(minutes=1) + _prefix = "T" + + +class Second(Tick): + _inc = Timedelta(seconds=1) + _prefix = "S" + + +class Milli(Tick): + _inc = Timedelta(milliseconds=1) + _prefix = "L" + + +class Micro(Tick): + _inc = Timedelta(microseconds=1) + _prefix = "U" + + +class Nano(Tick): + _inc = Timedelta(nanoseconds=1) + _prefix = "N" + + +BDay = BusinessDay +BMonthEnd = BusinessMonthEnd +BMonthBegin = BusinessMonthBegin +CBMonthEnd = CustomBusinessMonthEnd +CBMonthBegin = CustomBusinessMonthBegin +CDay = CustomBusinessDay + +# --------------------------------------------------------------------- + + +def generate_range(start=None, end=None, periods=None, offset=BDay()): + """ + Generates a sequence of dates corresponding to the specified time + offset. Similar to dateutil.rrule except uses pandas DateOffset + objects to represent time increments. + + Parameters + ---------- + start : datetime, (default None) + end : datetime, (default None) + periods : int, (default None) + offset : DateOffset, (default BDay()) + + Notes + ----- + * This method is faster for generating weekdays than dateutil.rrule + * At least two of (start, end, periods) must be specified. + * If both start and end are specified, the returned dates will + satisfy start <= date <= end. + + Returns + ------- + dates : generator object + """ + from pandas.tseries.frequencies import to_offset + + offset = to_offset(offset) + + start = Timestamp(start) + start = start if start is not NaT else None + end = Timestamp(end) + end = end if end is not NaT else None + + if start and not offset.is_on_offset(start): + start = offset.rollforward(start) + + elif end and not offset.is_on_offset(end): + end = offset.rollback(end) + + if periods is None and end < start and offset.n >= 0: + end = None + periods = 0 + + if end is None: + end = start + (periods - 1) * offset + + if start is None: + start = end - (periods - 1) * offset + + cur = start + if offset.n >= 0: + while cur <= end: + yield cur + + if cur == end: + # GH#24252 avoid overflows by not performing the addition + # in offset.apply unless we have to + break + + # faster than cur + offset + next_date = offset.apply(cur) + if next_date <= cur: + raise ValueError(f"Offset {offset} did not increment date") + cur = next_date + else: + while cur >= end: + yield cur + + if cur == end: + # GH#24252 avoid overflows by not performing the addition + # in offset.apply unless we have to + break + + # faster than cur + offset + next_date = offset.apply(cur) + if next_date >= cur: + raise ValueError(f"Offset {offset} did not decrement date") + cur = next_date + + +prefix_mapping = { + offset._prefix: offset + for offset in [ + YearBegin, # 'AS' + YearEnd, # 'A' + BYearBegin, # 'BAS' + BYearEnd, # 'BA' + BusinessDay, # 'B' + BusinessMonthBegin, # 'BMS' + BusinessMonthEnd, # 'BM' + BQuarterEnd, # 'BQ' + BQuarterBegin, # 'BQS' + BusinessHour, # 'BH' + CustomBusinessDay, # 'C' + CustomBusinessMonthEnd, # 'CBM' + CustomBusinessMonthBegin, # 'CBMS' + CustomBusinessHour, # 'CBH' + MonthEnd, # 'M' + MonthBegin, # 'MS' + Nano, # 'N' + SemiMonthEnd, # 'SM' + SemiMonthBegin, # 'SMS' + Week, # 'W' + Second, # 'S' + Minute, # 'T' + Micro, # 'U' + QuarterEnd, # 'Q' + QuarterBegin, # 'QS' + Milli, # 'L' + Hour, # 'H' + Day, # 'D' + WeekOfMonth, # 'WOM' + FY5253, + FY5253Quarter, + ] +} diff --git a/pandas/util/__init__.py b/pandas/util/__init__.py new file mode 100644 index 00000000..b5271dbc --- /dev/null +++ b/pandas/util/__init__.py @@ -0,0 +1,30 @@ +from pandas.util._decorators import Appender, Substitution, cache_readonly # noqa + +from pandas import compat +from pandas.core.util.hashing import hash_array, hash_pandas_object # noqa + +# compatibility for import pandas; pandas.util.testing + +if compat.PY37: + + def __getattr__(name): + if name == "testing": + import pandas.util.testing + + return pandas.util.testing + else: + raise AttributeError(f"module 'pandas.util' has no attribute '{name}'") + + +else: + + class _testing: + def __getattr__(self, item): + import pandas.util.testing + + return getattr(pandas.util.testing, item) + + testing = _testing() + + +del compat diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py new file mode 100644 index 00000000..d10d3a1f --- /dev/null +++ b/pandas/util/_decorators.py @@ -0,0 +1,342 @@ +from functools import wraps +import inspect +from textwrap import dedent +from typing import ( + Any, + Callable, + List, + Mapping, + Optional, + Tuple, + Type, + TypeVar, + Union, + cast, +) +import warnings + +from pandas._libs.properties import cache_readonly # noqa + +FuncType = Callable[..., Any] +F = TypeVar("F", bound=FuncType) + + +def deprecate( + name: str, + alternative: Callable[..., Any], + version: str, + alt_name: Optional[str] = None, + klass: Optional[Type[Warning]] = None, + stacklevel: int = 2, + msg: Optional[str] = None, +) -> Callable[..., Any]: + """ + Return a new function that emits a deprecation warning on use. + + To use this method for a deprecated function, another function + `alternative` with the same signature must exist. The deprecated + function will emit a deprecation warning, and in the docstring + it will contain the deprecation directive with the provided version + so it can be detected for future removal. + + Parameters + ---------- + name : str + Name of function to deprecate. + alternative : func + Function to use instead. + version : str + Version of pandas in which the method has been deprecated. + alt_name : str, optional + Name to use in preference of alternative.__name__. + klass : Warning, default FutureWarning + stacklevel : int, default 2 + msg : str + The message to display in the warning. + Default is '{name} is deprecated. Use {alt_name} instead.' + """ + + alt_name = alt_name or alternative.__name__ + klass = klass or FutureWarning + warning_msg = msg or f"{name} is deprecated, use {alt_name} instead" + + @wraps(alternative) + def wrapper(*args, **kwargs) -> Callable[..., Any]: + warnings.warn(warning_msg, klass, stacklevel=stacklevel) + return alternative(*args, **kwargs) + + # adding deprecated directive to the docstring + msg = msg or f"Use `{alt_name}` instead." + doc_error_msg = ( + "deprecate needs a correctly formatted docstring in " + "the target function (should have a one liner short " + "summary, and opening quotes should be in their own " + f"line). Found:\n{alternative.__doc__}" + ) + + # when python is running in optimized mode (i.e. `-OO`), docstrings are + # removed, so we check that a docstring with correct formatting is used + # but we allow empty docstrings + if alternative.__doc__: + if alternative.__doc__.count("\n") < 3: + raise AssertionError(doc_error_msg) + empty1, summary, empty2, doc = alternative.__doc__.split("\n", 3) + if empty1 or empty2 and not summary: + raise AssertionError(doc_error_msg) + wrapper.__doc__ = dedent( + f""" + {summary.strip()} + + .. deprecated:: {version} + {msg} + + {dedent(doc)}""" + ) + + return wrapper + + +def deprecate_kwarg( + old_arg_name: str, + new_arg_name: Optional[str], + mapping: Optional[Union[Mapping[Any, Any], Callable[[Any], Any]]] = None, + stacklevel: int = 2, +) -> Callable[..., Any]: + """ + Decorator to deprecate a keyword argument of a function. + + Parameters + ---------- + old_arg_name : str + Name of argument in function to deprecate + new_arg_name : str or None + Name of preferred argument in function. Use None to raise warning that + ``old_arg_name`` keyword is deprecated. + mapping : dict or callable + If mapping is present, use it to translate old arguments to + new arguments. A callable must do its own value checking; + values not found in a dict will be forwarded unchanged. + + Examples + -------- + The following deprecates 'cols', using 'columns' instead + + >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') + ... def f(columns=''): + ... print(columns) + ... + >>> f(columns='should work ok') + should work ok + + >>> f(cols='should raise warning') + FutureWarning: cols is deprecated, use columns instead + warnings.warn(msg, FutureWarning) + should raise warning + + >>> f(cols='should error', columns="can\'t pass do both") + TypeError: Can only specify 'cols' or 'columns', not both + + >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False}) + ... def f(new=False): + ... print('yes!' if new else 'no!') + ... + >>> f(old='yes') + FutureWarning: old='yes' is deprecated, use new=True instead + warnings.warn(msg, FutureWarning) + yes! + + To raise a warning that a keyword will be removed entirely in the future + + >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name=None) + ... def f(cols='', another_param=''): + ... print(cols) + ... + >>> f(cols='should raise warning') + FutureWarning: the 'cols' keyword is deprecated and will be removed in a + future version please takes steps to stop use of 'cols' + should raise warning + >>> f(another_param='should not raise warning') + should not raise warning + + >>> f(cols='should raise warning', another_param='') + FutureWarning: the 'cols' keyword is deprecated and will be removed in a + future version please takes steps to stop use of 'cols' + should raise warning + """ + + if mapping is not None and not hasattr(mapping, "get") and not callable(mapping): + raise TypeError( + "mapping from old to new argument values must be dict or callable!" + ) + + def _deprecate_kwarg(func: F) -> F: + @wraps(func) + def wrapper(*args, **kwargs) -> Callable[..., Any]: + old_arg_value = kwargs.pop(old_arg_name, None) + + if old_arg_value is not None: + if new_arg_name is None: + msg = ( + f"the {repr(old_arg_name)} keyword is deprecated and " + "will be removed in a future version. Please take " + f"steps to stop the use of {repr(old_arg_name)}" + ) + warnings.warn(msg, FutureWarning, stacklevel=stacklevel) + kwargs[old_arg_name] = old_arg_value + return func(*args, **kwargs) + + elif mapping is not None: + if callable(mapping): + new_arg_value = mapping(old_arg_value) + else: + new_arg_value = mapping.get(old_arg_value, old_arg_value) + msg = ( + f"the {old_arg_name}={repr(old_arg_value)} keyword is " + "deprecated, use " + f"{new_arg_name}={repr(new_arg_value)} instead" + ) + else: + new_arg_value = old_arg_value + msg = ( + f"the {repr(old_arg_name)}' keyword is deprecated, " + f"use {repr(new_arg_name)} instead" + ) + + warnings.warn(msg, FutureWarning, stacklevel=stacklevel) + if kwargs.get(new_arg_name) is not None: + msg = ( + f"Can only specify {repr(old_arg_name)} " + f"or {repr(new_arg_name)}, not both" + ) + raise TypeError(msg) + else: + kwargs[new_arg_name] = new_arg_value + return func(*args, **kwargs) + + return cast(F, wrapper) + + return _deprecate_kwarg + + +def rewrite_axis_style_signature( + name: str, extra_params: List[Tuple[str, Any]] +) -> Callable[..., Any]: + def decorate(func: F) -> F: + @wraps(func) + def wrapper(*args, **kwargs) -> Callable[..., Any]: + return func(*args, **kwargs) + + kind = inspect.Parameter.POSITIONAL_OR_KEYWORD + params = [ + inspect.Parameter("self", kind), + inspect.Parameter(name, kind, default=None), + inspect.Parameter("index", kind, default=None), + inspect.Parameter("columns", kind, default=None), + inspect.Parameter("axis", kind, default=None), + ] + + for pname, default in extra_params: + params.append(inspect.Parameter(pname, kind, default=default)) + + sig = inspect.Signature(params) + + # https://github.com/python/typing/issues/598 + func.__signature__ = sig # type: ignore + return cast(F, wrapper) + + return decorate + + +# Substitution and Appender are derived from matplotlib.docstring (1.1.0) +# module http://matplotlib.org/users/license.html + + +class Substitution: + """ + A decorator to take a function's docstring and perform string + substitution on it. + + This decorator should be robust even if func.__doc__ is None + (for example, if -OO was passed to the interpreter) + + Usage: construct a docstring.Substitution with a sequence or + dictionary suitable for performing substitution; then + decorate a suitable function with the constructed object. e.g. + + sub_author_name = Substitution(author='Jason') + + @sub_author_name + def some_function(x): + "%(author)s wrote this function" + + # note that some_function.__doc__ is now "Jason wrote this function" + + One can also use positional arguments. + + sub_first_last_names = Substitution('Edgar Allen', 'Poe') + + @sub_first_last_names + def some_function(x): + "%s %s wrote the Raven" + """ + + def __init__(self, *args, **kwargs): + if args and kwargs: + raise AssertionError("Only positional or keyword args are allowed") + + self.params = args or kwargs + + def __call__(self, func: F) -> F: + func.__doc__ = func.__doc__ and func.__doc__ % self.params + return func + + def update(self, *args, **kwargs) -> None: + """ + Update self.params with supplied args. + """ + if isinstance(self.params, dict): + self.params.update(*args, **kwargs) + + +class Appender: + """ + A function decorator that will append an addendum to the docstring + of the target function. + + This decorator should be robust even if func.__doc__ is None + (for example, if -OO was passed to the interpreter). + + Usage: construct a docstring.Appender with a string to be joined to + the original docstring. An optional 'join' parameter may be supplied + which will be used to join the docstring and addendum. e.g. + + add_copyright = Appender("Copyright (c) 2009", join='\n') + + @add_copyright + def my_dog(has='fleas'): + "This docstring will have a copyright below" + pass + """ + + addendum: Optional[str] + + def __init__(self, addendum: Optional[str], join: str = "", indents: int = 0): + if indents > 0: + self.addendum = indent(addendum, indents=indents) + else: + self.addendum = addendum + self.join = join + + def __call__(self, func: F) -> F: + func.__doc__ = func.__doc__ if func.__doc__ else "" + self.addendum = self.addendum if self.addendum else "" + docitems = [func.__doc__, self.addendum] + func.__doc__ = dedent(self.join.join(docitems)) + return func + + +def indent(text: Optional[str], indents: int = 1) -> str: + if not text or not isinstance(text, str): + return "" + jointext = "".join(["\n"] + [" "] * indents) + return jointext.join(text.split("\n")) diff --git a/pandas/util/_depr_module.py b/pandas/util/_depr_module.py new file mode 100644 index 00000000..5694ca24 --- /dev/null +++ b/pandas/util/_depr_module.py @@ -0,0 +1,107 @@ +""" +This module houses a utility class for mocking deprecated modules. +It is for internal use only and should not be used beyond this purpose. +""" + +import importlib +from typing import Iterable +import warnings + + +class _DeprecatedModule: + """ + Class for mocking deprecated modules. + + Parameters + ---------- + deprmod : name of module to be deprecated. + deprmodto : name of module as a replacement, optional. + If not given, the __module__ attribute will + be used when needed. + removals : objects or methods in module that will no longer be + accessible once module is removed. + moved : dict, optional + dictionary of function name -> new location for moved + objects + """ + + def __init__(self, deprmod, deprmodto=None, removals=None, moved=None): + self.deprmod = deprmod + self.deprmodto = deprmodto + self.removals = removals + if self.removals is not None: + self.removals = frozenset(self.removals) + self.moved = moved + + # For introspection purposes. + self.self_dir = frozenset(dir(type(self))) + + def __dir__(self) -> Iterable[str]: + deprmodule = self._import_deprmod() + return dir(deprmodule) + + def __repr__(self) -> str: + deprmodule = self._import_deprmod() + return repr(deprmodule) + + __str__ = __repr__ + + def __getattr__(self, name: str): + if name in self.self_dir: + return object.__getattribute__(self, name) + + try: + deprmodule = self._import_deprmod(self.deprmod) + except ImportError: + if self.deprmodto is None: + raise + + # a rename + deprmodule = self._import_deprmod(self.deprmodto) + + obj = getattr(deprmodule, name) + + if self.removals is not None and name in self.removals: + warnings.warn( + f"{self.deprmod}.{name} is deprecated and will be removed in " + "a future version.", + FutureWarning, + stacklevel=2, + ) + elif self.moved is not None and name in self.moved: + warnings.warn( + f"{self.deprmod} is deprecated and will be removed in " + f"a future version.\nYou can access {name} as {self.moved[name]}", + FutureWarning, + stacklevel=2, + ) + else: + deprmodto = self.deprmodto + if deprmodto is False: + warnings.warn( + f"{self.deprmod}.{name} is deprecated and will be removed in " + "a future version.", + FutureWarning, + stacklevel=2, + ) + else: + if deprmodto is None: + deprmodto = obj.__module__ + # The object is actually located in another module. + warnings.warn( + f"{self.deprmod}.{name} is deprecated. Please use " + f"{deprmodto}.{name} instead.", + FutureWarning, + stacklevel=2, + ) + + return obj + + def _import_deprmod(self, mod=None): + if mod is None: + mod = self.deprmod + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=FutureWarning) + deprmodule = importlib.import_module(mod) + return deprmodule diff --git a/pandas/util/_doctools.py b/pandas/util/_doctools.py new file mode 100644 index 00000000..8fd4566d --- /dev/null +++ b/pandas/util/_doctools.py @@ -0,0 +1,193 @@ +from typing import Optional, Tuple + +import numpy as np + +import pandas as pd + + +class TablePlotter: + """ + Layout some DataFrames in vertical/horizontal layout for explanation. + Used in merging.rst + """ + + def __init__( + self, + cell_width: float = 0.37, + cell_height: float = 0.25, + font_size: float = 7.5, + ): + self.cell_width = cell_width + self.cell_height = cell_height + self.font_size = font_size + + def _shape(self, df: pd.DataFrame) -> Tuple[int, int]: + """ + Calculate table chape considering index levels. + """ + row, col = df.shape + return row + df.columns.nlevels, col + df.index.nlevels + + def _get_cells(self, left, right, vertical) -> Tuple[int, int]: + """ + Calculate appropriate figure size based on left and right data. + """ + if vertical: + # calculate required number of cells + vcells = max(sum(self._shape(l)[0] for l in left), self._shape(right)[0]) + hcells = max(self._shape(l)[1] for l in left) + self._shape(right)[1] + else: + vcells = max([self._shape(l)[0] for l in left] + [self._shape(right)[0]]) + hcells = sum([self._shape(l)[1] for l in left] + [self._shape(right)[1]]) + return hcells, vcells + + def plot(self, left, right, labels=None, vertical: bool = True): + """ + Plot left / right DataFrames in specified layout. + + Parameters + ---------- + left : list of DataFrames before operation is applied + right : DataFrame of operation result + labels : list of str to be drawn as titles of left DataFrames + vertical : bool, default True + If True, use vertical layout. If False, use horizontal layout. + """ + import matplotlib.pyplot as plt + import matplotlib.gridspec as gridspec + + if not isinstance(left, list): + left = [left] + left = [self._conv(l) for l in left] + right = self._conv(right) + + hcells, vcells = self._get_cells(left, right, vertical) + + if vertical: + figsize = self.cell_width * hcells, self.cell_height * vcells + else: + # include margin for titles + figsize = self.cell_width * hcells, self.cell_height * vcells + fig = plt.figure(figsize=figsize) + + if vertical: + gs = gridspec.GridSpec(len(left), hcells) + # left + max_left_cols = max(self._shape(l)[1] for l in left) + max_left_rows = max(self._shape(l)[0] for l in left) + for i, (l, label) in enumerate(zip(left, labels)): + ax = fig.add_subplot(gs[i, 0:max_left_cols]) + self._make_table(ax, l, title=label, height=1.0 / max_left_rows) + # right + ax = plt.subplot(gs[:, max_left_cols:]) + self._make_table(ax, right, title="Result", height=1.05 / vcells) + fig.subplots_adjust(top=0.9, bottom=0.05, left=0.05, right=0.95) + else: + max_rows = max(self._shape(df)[0] for df in left + [right]) + height = 1.0 / np.max(max_rows) + gs = gridspec.GridSpec(1, hcells) + # left + i = 0 + for l, label in zip(left, labels): + sp = self._shape(l) + ax = fig.add_subplot(gs[0, i : i + sp[1]]) + self._make_table(ax, l, title=label, height=height) + i += sp[1] + # right + ax = plt.subplot(gs[0, i:]) + self._make_table(ax, right, title="Result", height=height) + fig.subplots_adjust(top=0.85, bottom=0.05, left=0.05, right=0.95) + + return fig + + def _conv(self, data): + """ + Convert each input to appropriate for table outplot. + """ + if isinstance(data, pd.Series): + if data.name is None: + data = data.to_frame(name="") + else: + data = data.to_frame() + data = data.fillna("NaN") + return data + + def _insert_index(self, data): + # insert is destructive + data = data.copy() + idx_nlevels = data.index.nlevels + if idx_nlevels == 1: + data.insert(0, "Index", data.index) + else: + for i in range(idx_nlevels): + data.insert(i, f"Index{i}", data.index._get_level_values(i)) + + col_nlevels = data.columns.nlevels + if col_nlevels > 1: + col = data.columns._get_level_values(0) + values = [ + data.columns._get_level_values(i).values for i in range(1, col_nlevels) + ] + col_df = pd.DataFrame(values) + data.columns = col_df.columns + data = pd.concat([col_df, data]) + data.columns = col + return data + + def _make_table(self, ax, df, title: str, height: Optional[float] = None): + if df is None: + ax.set_visible(False) + return + + import pandas.plotting as plotting + + idx_nlevels = df.index.nlevels + col_nlevels = df.columns.nlevels + # must be convert here to get index levels for colorization + df = self._insert_index(df) + tb = plotting.table(ax, df, loc=9) + tb.set_fontsize(self.font_size) + + if height is None: + height = 1.0 / (len(df) + 1) + + props = tb.properties() + for (r, c), cell in props["celld"].items(): + if c == -1: + cell.set_visible(False) + elif r < col_nlevels and c < idx_nlevels: + cell.set_visible(False) + elif r < col_nlevels or c < idx_nlevels: + cell.set_facecolor("#AAAAAA") + cell.set_height(height) + + ax.set_title(title, size=self.font_size) + ax.axis("off") + + +if __name__ == "__main__": + import matplotlib.pyplot as plt + + p = TablePlotter() + + df1 = pd.DataFrame({"A": [10, 11, 12], "B": [20, 21, 22], "C": [30, 31, 32]}) + df2 = pd.DataFrame({"A": [10, 12], "C": [30, 32]}) + + p.plot([df1, df2], pd.concat([df1, df2]), labels=["df1", "df2"], vertical=True) + plt.show() + + df3 = pd.DataFrame({"X": [10, 12], "Z": [30, 32]}) + + p.plot( + [df1, df3], pd.concat([df1, df3], axis=1), labels=["df1", "df2"], vertical=False + ) + plt.show() + + idx = pd.MultiIndex.from_tuples( + [(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")] + ) + col = pd.MultiIndex.from_tuples([(1, "A"), (1, "B")]) + df3 = pd.DataFrame({"v1": [1, 2, 3, 4, 5, 6], "v2": [5, 6, 7, 8, 9, 10]}, index=idx) + df3.columns = col + p.plot(df3, df3, labels=["df3"]) + plt.show() diff --git a/pandas/util/_exceptions.py b/pandas/util/_exceptions.py new file mode 100644 index 00000000..0723a37b --- /dev/null +++ b/pandas/util/_exceptions.py @@ -0,0 +1,19 @@ +import contextlib +from typing import Tuple + + +@contextlib.contextmanager +def rewrite_exception(old_name: str, new_name: str): + """ + Rewrite the message of an exception. + """ + try: + yield + except Exception as err: + msg = err.args[0] + msg = msg.replace(old_name, new_name) + args: Tuple[str, ...] = (msg,) + if len(err.args) > 1: + args = args + err.args[1:] + err.args = args + raise diff --git a/pandas/util/_print_versions.py b/pandas/util/_print_versions.py new file mode 100644 index 00000000..2801a2bf --- /dev/null +++ b/pandas/util/_print_versions.py @@ -0,0 +1,150 @@ +import codecs +import json +import locale +import os +import platform +import struct +import subprocess +import sys +from typing import List, Optional, Tuple, Union + +from pandas.compat._optional import VERSIONS, _get_version, import_optional_dependency + + +def get_sys_info() -> List[Tuple[str, Optional[Union[str, int]]]]: + """ + Returns system information as a list + """ + blob: List[Tuple[str, Optional[Union[str, int]]]] = [] + + # get full commit hash + commit = None + if os.path.isdir(".git") and os.path.isdir("pandas"): + try: + pipe = subprocess.Popen( + 'git log --format="%H" -n 1'.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + so, serr = pipe.communicate() + except (OSError, ValueError): + pass + else: + if pipe.returncode == 0: + commit = so.decode("utf-8").strip().strip('"') + + blob.append(("commit", commit)) + + try: + (sysname, nodename, release, version, machine, processor) = platform.uname() + blob.extend( + [ + ("python", ".".join(map(str, sys.version_info))), + ("python-bits", struct.calcsize("P") * 8), + ("OS", f"{sysname}"), + ("OS-release", f"{release}"), + # ("Version", "{version}".format(version=version)), + ("machine", f"{machine}"), + ("processor", f"{processor}"), + ("byteorder", f"{sys.byteorder}"), + ("LC_ALL", f"{os.environ.get('LC_ALL', 'None')}"), + ("LANG", f"{os.environ.get('LANG', 'None')}"), + ("LOCALE", ".".join(map(str, locale.getlocale()))), + ] + ) + except (KeyError, ValueError): + pass + + return blob + + +def show_versions(as_json=False): + sys_info = get_sys_info() + deps = [ + "pandas", + # required + "numpy", + "pytz", + "dateutil", + # install / build, + "pip", + "setuptools", + "Cython", + # test + "pytest", + "hypothesis", + # docs + "sphinx", + # Other, need a min version + "blosc", + "feather", + "xlsxwriter", + "lxml.etree", + "html5lib", + "pymysql", + "psycopg2", + "jinja2", + # Other, not imported. + "IPython", + "pandas_datareader", + ] + + deps.extend(list(VERSIONS)) + deps_blob = [] + + for modname in deps: + mod = import_optional_dependency( + modname, raise_on_missing=False, on_version="ignore" + ) + ver: Optional[str] + if mod: + ver = _get_version(mod) + else: + ver = None + deps_blob.append((modname, ver)) + + if as_json: + j = dict(system=dict(sys_info), dependencies=dict(deps_blob)) + + if as_json is True: + print(j) + else: + with codecs.open(as_json, "wb", encoding="utf8") as f: + json.dump(j, f, indent=2) + + else: + maxlen = max(len(x) for x in deps) + tpl = "{{k:<{maxlen}}}: {{stat}}".format(maxlen=maxlen) + print("\nINSTALLED VERSIONS") + print("------------------") + for k, stat in sys_info: + print(tpl.format(k=k, stat=stat)) + print("") + for k, stat in deps_blob: + print(tpl.format(k=k, stat=stat)) + + +def main() -> int: + from optparse import OptionParser + + parser = OptionParser() + parser.add_option( + "-j", + "--json", + metavar="FILE", + nargs=1, + help="Save output as JSON into file, pass in '-' to output to stdout", + ) + + (options, args) = parser.parse_args() + + if options.json == "-": + options.json = True + + show_versions(as_json=options.json) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py new file mode 100644 index 00000000..d8804994 --- /dev/null +++ b/pandas/util/_test_decorators.py @@ -0,0 +1,264 @@ +""" +This module provides decorator functions which can be applied to test objects +in order to skip those objects when certain conditions occur. A sample use case +is to detect if the platform is missing ``matplotlib``. If so, any test objects +which require ``matplotlib`` and decorated with ``@td.skip_if_no_mpl`` will be +skipped by ``pytest`` during the execution of the test suite. + +To illustrate, after importing this module: + +import pandas.util._test_decorators as td + +The decorators can be applied to classes: + +@td.skip_if_some_reason +class Foo: + ... + +Or individual functions: + +@td.skip_if_some_reason +def test_foo(): + ... + +For more information, refer to the ``pytest`` documentation on ``skipif``. +""" +from distutils.version import LooseVersion +from functools import wraps +import locale +from typing import Callable, Optional + +import numpy as np +import pytest + +from pandas.compat import is_platform_32bit, is_platform_windows +from pandas.compat._optional import import_optional_dependency +from pandas.compat.numpy import _np_version + +from pandas.core.computation.expressions import _NUMEXPR_INSTALLED, _USE_NUMEXPR + + +def safe_import(mod_name: str, min_version: Optional[str] = None): + """ + Parameters: + ----------- + mod_name : str + Name of the module to be imported + min_version : str, default None + Minimum required version of the specified mod_name + + Returns: + -------- + object + The imported module if successful, or False + """ + try: + mod = __import__(mod_name) + except ImportError: + return False + + if not min_version: + return mod + else: + import sys + + try: + version = getattr(sys.modules[mod_name], "__version__") + except AttributeError: + # xlrd uses a capitalized attribute name + version = getattr(sys.modules[mod_name], "__VERSION__") + if version: + from distutils.version import LooseVersion + + if LooseVersion(version) >= LooseVersion(min_version): + return mod + + return False + + +# TODO: +# remove when gh-24839 is fixed; this affects numpy 1.16 +# and pytables 3.4.4 +tables = safe_import("tables") +xfail_non_writeable = pytest.mark.xfail( + tables + and LooseVersion(np.__version__) >= LooseVersion("1.16") + and LooseVersion(tables.__version__) < LooseVersion("3.5.1"), + reason=( + "gh-25511, gh-24839. pytables needs a " + "release beyong 3.4.4 to support numpy 1.16x" + ), +) + + +def _skip_if_no_mpl(): + mod = safe_import("matplotlib") + if mod: + mod.use("Agg", warn=True) + else: + return True + + +def _skip_if_has_locale(): + lang, _ = locale.getlocale() + if lang is not None: + return True + + +def _skip_if_not_us_locale(): + lang, _ = locale.getlocale() + if lang != "en_US": + return True + + +def _skip_if_no_scipy() -> bool: + return not ( + safe_import("scipy.stats") + and safe_import("scipy.sparse") + and safe_import("scipy.interpolate") + and safe_import("scipy.signal") + ) + + +def skip_if_installed(package: str) -> Callable: + """ + Skip a test if a package is installed. + + Parameters + ---------- + package : str + The name of the package. + """ + return pytest.mark.skipif( + safe_import(package), reason=f"Skipping because {package} is installed." + ) + + +def skip_if_no(package: str, min_version: Optional[str] = None) -> Callable: + """ + Generic function to help skip tests when required packages are not + present on the testing system. + + This function returns a pytest mark with a skip condition that will be + evaluated during test collection. An attempt will be made to import the + specified ``package`` and optionally ensure it meets the ``min_version`` + + The mark can be used as either a decorator for a test function or to be + applied to parameters in pytest.mark.parametrize calls or parametrized + fixtures. + + If the import and version check are unsuccessful, then the test function + (or test case when used in conjunction with parametrization) will be + skipped. + + Parameters + ---------- + package: str + The name of the required package. + min_version: str or None, default None + Optional minimum version of the package. + + Returns + ------- + _pytest.mark.structures.MarkDecorator + a pytest.mark.skipif to use as either a test decorator or a + parametrization mark. + """ + msg = f"Could not import '{package}'" + if min_version: + msg += f" satisfying a min_version of {min_version}" + return pytest.mark.skipif( + not safe_import(package, min_version=min_version), reason=msg + ) + + +skip_if_no_mpl = pytest.mark.skipif( + _skip_if_no_mpl(), reason="Missing matplotlib dependency" +) +skip_if_mpl = pytest.mark.skipif(not _skip_if_no_mpl(), reason="matplotlib is present") +skip_if_32bit = pytest.mark.skipif(is_platform_32bit(), reason="skipping for 32 bit") +skip_if_windows = pytest.mark.skipif(is_platform_windows(), reason="Running on Windows") +skip_if_windows_python_3 = pytest.mark.skipif( + is_platform_windows(), reason="not used on win32" +) +skip_if_has_locale = pytest.mark.skipif( + _skip_if_has_locale(), reason=f"Specific locale is set {locale.getlocale()[0]}", +) +skip_if_not_us_locale = pytest.mark.skipif( + _skip_if_not_us_locale(), reason=f"Specific locale is set {locale.getlocale()[0]}", +) +skip_if_no_scipy = pytest.mark.skipif( + _skip_if_no_scipy(), reason="Missing SciPy requirement" +) +skip_if_no_ne = pytest.mark.skipif( + not _USE_NUMEXPR, + reason=f"numexpr enabled->{_USE_NUMEXPR}, installed->{_NUMEXPR_INSTALLED}", +) + + +def skip_if_np_lt( + ver_str: str, reason: Optional[str] = None, *args, **kwds +) -> Callable: + if reason is None: + reason = f"NumPy {ver_str} or greater required" + return pytest.mark.skipif( + _np_version < LooseVersion(ver_str), reason=reason, *args, **kwds + ) + + +def parametrize_fixture_doc(*args): + """ + Intended for use as a decorator for parametrized fixture, + this function will wrap the decorated function with a pytest + ``parametrize_fixture_doc`` mark. That mark will format + initial fixture docstring by replacing placeholders {0}, {1} etc + with parameters passed as arguments. + + Parameters + ---------- + args: iterable + Positional arguments for docstring. + + Returns + ------- + function + The decorated function wrapped within a pytest + ``parametrize_fixture_doc`` mark + """ + + def documented_fixture(fixture): + fixture.__doc__ = fixture.__doc__.format(*args) + return fixture + + return documented_fixture + + +def check_file_leaks(func) -> Callable: + """ + Decorate a test function tot check that we are not leaking file descriptors. + """ + psutil = safe_import("psutil") + if not psutil: + return func + + @wraps(func) + def new_func(*args, **kwargs): + proc = psutil.Process() + flist = proc.open_files() + + func(*args, **kwargs) + + flist2 = proc.open_files() + assert flist2 == flist + + return new_func + + +def async_mark(): + try: + import_optional_dependency("pytest_asyncio") + async_mark = pytest.mark.asyncio + except ImportError: + async_mark = pytest.mark.skip(reason="Missing dependency pytest-asyncio") + + return async_mark diff --git a/pandas/util/_tester.py b/pandas/util/_tester.py new file mode 100644 index 00000000..b299f379 --- /dev/null +++ b/pandas/util/_tester.py @@ -0,0 +1,30 @@ +""" +Entrypoint for testing from the top-level namespace. +""" +import os +import sys + +PKG = os.path.dirname(os.path.dirname(__file__)) + + +def test(extra_args=None): + try: + import pytest + except ImportError: + raise ImportError("Need pytest>=5.0.1 to run tests") + try: + import hypothesis # noqa + except ImportError: + raise ImportError("Need hypothesis>=3.58 to run tests") + cmd = ["--skip-slow", "--skip-network", "--skip-db"] + if extra_args: + if not isinstance(extra_args, list): + extra_args = [extra_args] + cmd = extra_args + cmd += [PKG] + joined = " ".join(cmd) + print(f"running: pytest {joined}") + sys.exit(pytest.main(cmd)) + + +__all__ = ["test"] diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py new file mode 100644 index 00000000..b69c9746 --- /dev/null +++ b/pandas/util/_validators.py @@ -0,0 +1,378 @@ +""" +Module that contains many useful utilities +for validating data or function arguments +""" +from typing import Iterable, Union +import warnings + +import numpy as np + +from pandas.core.dtypes.common import is_bool + + +def _check_arg_length(fname, args, max_fname_arg_count, compat_args): + """ + Checks whether 'args' has length of at most 'compat_args'. Raises + a TypeError if that is not the case, similar to in Python when a + function is called with too many arguments. + """ + if max_fname_arg_count < 0: + raise ValueError("'max_fname_arg_count' must be non-negative") + + if len(args) > len(compat_args): + max_arg_count = len(compat_args) + max_fname_arg_count + actual_arg_count = len(args) + max_fname_arg_count + argument = "argument" if max_arg_count == 1 else "arguments" + + raise TypeError( + f"{fname}() takes at most {max_arg_count} {argument} " + f"({actual_arg_count} given)" + ) + + +def _check_for_default_values(fname, arg_val_dict, compat_args): + """ + Check that the keys in `arg_val_dict` are mapped to their + default values as specified in `compat_args`. + + Note that this function is to be called only when it has been + checked that arg_val_dict.keys() is a subset of compat_args + """ + for key in arg_val_dict: + # try checking equality directly with '=' operator, + # as comparison may have been overridden for the left + # hand object + try: + v1 = arg_val_dict[key] + v2 = compat_args[key] + + # check for None-ness otherwise we could end up + # comparing a numpy array vs None + if (v1 is not None and v2 is None) or (v1 is None and v2 is not None): + match = False + else: + match = v1 == v2 + + if not is_bool(match): + raise ValueError("'match' is not a boolean") + + # could not compare them directly, so try comparison + # using the 'is' operator + except ValueError: + match = arg_val_dict[key] is compat_args[key] + + if not match: + raise ValueError( + f"the '{key}' parameter is not supported in " + f"the pandas implementation of {fname}()" + ) + + +def validate_args(fname, args, max_fname_arg_count, compat_args): + """ + Checks whether the length of the `*args` argument passed into a function + has at most `len(compat_args)` arguments and whether or not all of these + elements in `args` are set to their default values. + + Parameters + ---------- + fname : str + The name of the function being passed the `*args` parameter + args : tuple + The `*args` parameter passed into a function + max_fname_arg_count : int + The maximum number of arguments that the function `fname` + can accept, excluding those in `args`. Used for displaying + appropriate error messages. Must be non-negative. + compat_args : dict + A dictionary of keys and their associated default values. + In order to accommodate buggy behaviour in some versions of `numpy`, + where a signature displayed keyword arguments but then passed those + arguments **positionally** internally when calling downstream + implementations, a dict ensures that the original + order of the keyword arguments is enforced. + Raises + ------ + TypeError + If `args` contains more values than there are `compat_args` + ValueError + If `args` contains values that do not correspond to those + of the default values specified in `compat_args` + """ + _check_arg_length(fname, args, max_fname_arg_count, compat_args) + + # We do this so that we can provide a more informative + # error message about the parameters that we are not + # supporting in the pandas implementation of 'fname' + kwargs = dict(zip(compat_args, args)) + _check_for_default_values(fname, kwargs, compat_args) + + +def _check_for_invalid_keys(fname, kwargs, compat_args): + """ + Checks whether 'kwargs' contains any keys that are not + in 'compat_args' and raises a TypeError if there is one. + """ + # set(dict) --> set of the dictionary's keys + diff = set(kwargs) - set(compat_args) + + if diff: + bad_arg = list(diff)[0] + raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'") + + +def validate_kwargs(fname, kwargs, compat_args): + """ + Checks whether parameters passed to the **kwargs argument in a + function `fname` are valid parameters as specified in `*compat_args` + and whether or not they are set to their default values. + + Parameters + ---------- + fname : str + The name of the function being passed the `**kwargs` parameter + kwargs : dict + The `**kwargs` parameter passed into `fname` + compat_args: dict + A dictionary of keys that `kwargs` is allowed to have and their + associated default values + + Raises + ------ + TypeError if `kwargs` contains keys not in `compat_args` + ValueError if `kwargs` contains keys in `compat_args` that do not + map to the default values specified in `compat_args` + """ + kwds = kwargs.copy() + _check_for_invalid_keys(fname, kwargs, compat_args) + _check_for_default_values(fname, kwds, compat_args) + + +def validate_args_and_kwargs(fname, args, kwargs, max_fname_arg_count, compat_args): + """ + Checks whether parameters passed to the *args and **kwargs argument in a + function `fname` are valid parameters as specified in `*compat_args` + and whether or not they are set to their default values. + + Parameters + ---------- + fname: str + The name of the function being passed the `**kwargs` parameter + args: tuple + The `*args` parameter passed into a function + kwargs: dict + The `**kwargs` parameter passed into `fname` + max_fname_arg_count: int + The minimum number of arguments that the function `fname` + requires, excluding those in `args`. Used for displaying + appropriate error messages. Must be non-negative. + compat_args: dict + A dictionary of keys that `kwargs` is allowed to + have and their associated default values. + + Raises + ------ + TypeError if `args` contains more values than there are + `compat_args` OR `kwargs` contains keys not in `compat_args` + ValueError if `args` contains values not at the default value (`None`) + `kwargs` contains keys in `compat_args` that do not map to the default + value as specified in `compat_args` + + See Also + -------- + validate_args : Purely args validation. + validate_kwargs : Purely kwargs validation. + + """ + # Check that the total number of arguments passed in (i.e. + # args and kwargs) does not exceed the length of compat_args + _check_arg_length( + fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args + ) + + # Check there is no overlap with the positional and keyword + # arguments, similar to what is done in actual Python functions + args_dict = dict(zip(compat_args, args)) + + for key in args_dict: + if key in kwargs: + raise TypeError( + f"{fname}() got multiple values for keyword argument '{key}'" + ) + + kwargs.update(args_dict) + validate_kwargs(fname, kwargs, compat_args) + + +def validate_bool_kwarg(value, arg_name): + """ Ensures that argument passed in arg_name is of type bool. """ + if not (is_bool(value) or value is None): + raise ValueError( + f'For argument "{arg_name}" expected type bool, received ' + f"type {type(value).__name__}." + ) + return value + + +def validate_axis_style_args(data, args, kwargs, arg_name, method_name): + """Argument handler for mixed index, columns / axis functions + + In an attempt to handle both `.method(index, columns)`, and + `.method(arg, axis=.)`, we have to do some bad things to argument + parsing. This translates all arguments to `{index=., columns=.}` style. + + Parameters + ---------- + data : DataFrame + args : tuple + All positional arguments from the user + kwargs : dict + All keyword arguments from the user + arg_name, method_name : str + Used for better error messages + + Returns + ------- + kwargs : dict + A dictionary of keyword arguments. Doesn't modify ``kwargs`` + inplace, so update them with the return value here. + + Examples + -------- + >>> df._validate_axis_style_args((str.upper,), {'columns': id}, + ... 'mapper', 'rename') + {'columns': , 'index': } + + This emits a warning + >>> df._validate_axis_style_args((str.upper, id), {}, + ... 'mapper', 'rename') + {'columns': , 'index': } + """ + # TODO: Change to keyword-only args and remove all this + + out = {} + # Goal: fill 'out' with index/columns-style arguments + # like out = {'index': foo, 'columns': bar} + + # Start by validating for consistency + if "axis" in kwargs and any(x in kwargs for x in data._AXIS_NUMBERS): + msg = "Cannot specify both 'axis' and any of 'index' or 'columns'." + raise TypeError(msg) + + # First fill with explicit values provided by the user... + if arg_name in kwargs: + if args: + msg = f"{method_name} got multiple values for argument '{arg_name}'" + raise TypeError(msg) + + axis = data._get_axis_name(kwargs.get("axis", 0)) + out[axis] = kwargs[arg_name] + + # More user-provided arguments, now from kwargs + for k, v in kwargs.items(): + try: + ax = data._get_axis_name(k) + except ValueError: + pass + else: + out[ax] = v + + # All user-provided kwargs have been handled now. + # Now we supplement with positional arguments, emitting warnings + # when there's ambiguity and raising when there's conflicts + + if len(args) == 0: + pass # It's up to the function to decide if this is valid + elif len(args) == 1: + axis = data._get_axis_name(kwargs.get("axis", 0)) + out[axis] = args[0] + elif len(args) == 2: + if "axis" in kwargs: + # Unambiguously wrong + msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" + raise TypeError(msg) + + msg = ( + "Interpreting call\n\t'.{method_name}(a, b)' as " + "\n\t'.{method_name}(index=a, columns=b)'.\nUse named " + "arguments to remove any ambiguity. In the future, using " + "positional arguments for 'index' or 'columns' will raise " + " a 'TypeError'." + ) + warnings.warn(msg.format(method_name=method_name), FutureWarning, stacklevel=4) + out[data._AXIS_NAMES[0]] = args[0] + out[data._AXIS_NAMES[1]] = args[1] + else: + msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'." + raise TypeError(msg) + return out + + +def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): + """Validate the keyword arguments to 'fillna'. + + This checks that exactly one of 'value' and 'method' is specified. + If 'method' is specified, this validates that it's a valid method. + + Parameters + ---------- + value, method : object + The 'value' and 'method' keyword arguments for 'fillna'. + validate_scalar_dict_value : bool, default True + Whether to validate that 'value' is a scalar or dict. Specifically, + validate that it is not a list or tuple. + + Returns + ------- + value, method : object + """ + from pandas.core.missing import clean_fill_method + + if value is None and method is None: + raise ValueError("Must specify a fill 'value' or 'method'.") + elif value is None and method is not None: + method = clean_fill_method(method) + + elif value is not None and method is None: + if validate_scalar_dict_value and isinstance(value, (list, tuple)): + raise TypeError( + '"value" parameter must be a scalar or dict, but ' + f'you passed a "{type(value).__name__}"' + ) + + elif value is not None and method is not None: + raise ValueError("Cannot specify both 'value' and 'method'.") + + return value, method + + +def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray: + """ + Validate percentiles (used by describe and quantile). + + This function checks if the given float oriterable of floats is a valid percentile + otherwise raises a ValueError. + + Parameters + ---------- + q: float or iterable of floats + A single percentile or an iterable of percentiles. + + Returns + ------- + ndarray + An ndarray of the percentiles if valid. + + Raises + ------ + ValueError if percentiles are not in given interval([0, 1]). + """ + msg = "percentiles should all be in the interval [0, 1]. Try {0} instead." + q_arr = np.asarray(q) + if q_arr.ndim == 0: + if not 0 <= q_arr <= 1: + raise ValueError(msg.format(q_arr / 100.0)) + else: + if not all(0 <= qs <= 1 for qs in q_arr): + raise ValueError(msg.format(q_arr / 100.0)) + return q_arr diff --git a/pandas/util/testing.py b/pandas/util/testing.py new file mode 100644 index 00000000..af9fe484 --- /dev/null +++ b/pandas/util/testing.py @@ -0,0 +1,12 @@ +import warnings + +from pandas._testing import * # noqa + +warnings.warn( + ( + "pandas.util.testing is deprecated. Use the functions in the " + "public API at pandas.testing instead." + ), + FutureWarning, + stacklevel=2, +) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..05490d50 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,34 @@ +[build-system] +# Minimum requirements for the build system to execute. +# See https://github.com/scipy/scipy/pull/10431 for the AIX issue. +requires = [ + "setuptools", + "wheel", + "Cython>=0.29.13", # Note: sync with setup.py + "numpy==1.13.3; python_version=='3.6' and platform_system!='AIX'", + "numpy==1.14.5; python_version=='3.7' and platform_system!='AIX'", + "numpy==1.17.3; python_version>='3.8' and platform_system!='AIX'", + "numpy==1.16.0; python_version=='3.6' and platform_system=='AIX'", + "numpy==1.16.0; python_version=='3.7' and platform_system=='AIX'", + "numpy==1.17.3; python_version>='3.8' and platform_system=='AIX'", +] + +[tool.black] +target-version = ['py36', 'py37', 'py38'] +exclude = ''' +( + asv_bench/env + | \.egg + | \.git + | \.hg + | \.mypy_cache + | \.nox + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + | setup.py +) +''' diff --git a/release_stats.sh b/release_stats.sh new file mode 100755 index 00000000..1e824470 --- /dev/null +++ b/release_stats.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +while [[ $# > 1 ]] +do +key="$1" + +case $key in + --from) + FROM="$2" + shift # past argument + ;; + --to) + TO="$2" + shift # past argument + ;; + *) + # unknown option + ;; +esac +shift # past argument or value +done + +if [ -z "$FROM" ]; then + FROM=`git tag --sort v:refname | grep -v rc | tail -1` +fi + +if [ -z "$TO" ]; then + TO="" +fi + +START=`git log $FROM.. --simplify-by-decoration --pretty="format:%ai %d" | tail -1 | gawk '{ print $1 }'` +END=`git log $TO.. --simplify-by-decoration --pretty="format:%ai %d" | head -1 | gawk '{ print $1 }'` + +git log $FROM.. --format='%an#%s' | grep -v Merge > commits + +# Include a summary by contributor in release notes: +# cat commits | gawk -F '#' '{ print "- " $1 }' | sort | uniq + +echo "Stats since <$FROM> [$START - $END]" +echo "" + +AUTHORS=`cat commits | gawk -F '#' '{ print $1 }' | sort | uniq | wc -l` +echo "Number of authors: $AUTHORS" + +TCOMMITS=`cat commits | gawk -F '#' '{ print $1 }'| wc -l` +echo "Total commits : $TCOMMITS" + +# Include a summary count of commits included in the release by contributor: +# cat commits | gawk -F '#' '{ print $1 }' | sort | uniq -c | sort -nr + +/bin/rm commits diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..0b8236c4 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,75 @@ +# This file is auto-generated from environment.yml, do not modify. +# See that file for comments about the need/usage of each dependency. + +numpy>=1.15 +python-dateutil>=2.6.1 +pytz +asv +cython>=0.29.13 +black==19.10b0 +cpplint +flake8 +flake8-comprehensions>=3.1.0 +flake8-rst>=0.6.0,<=0.7.0 +isort +mypy==0.730 +pycodestyle +gitpython +gitdb2==2.0.6 +sphinx +numpydoc>=0.9.0 +nbconvert>=5.4.1 +nbsphinx +pandoc +dask +toolz>=0.7.3 +fsspec>=0.5.1 +partd>=0.3.10 +cloudpickle>=0.2.1 +markdown +feedparser +pyyaml +requests +boto3 +botocore>=1.11 +hypothesis>=3.82 +moto +pytest>=5.0.1 +pytest-cov +pytest-xdist>=1.21 +pytest-asyncio +seaborn +statsmodels +ipywidgets +nbformat +notebook>=5.7.5 +pip +blosc +bottleneck>=1.2.1 +ipykernel +ipython>=7.11.1 +jinja2 +matplotlib>=2.2.2 +numexpr>=2.6.8 +scipy>=1.1 +numba>=0.46.0 +beautifulsoup4>=4.6.0 +html5lib +lxml +openpyxl<=3.0.1 +xlrd +xlsxwriter +xlwt +odfpy +fastparquet>=0.3.2 +pyarrow>=0.13.1 +python-snappy +pyqt5>=5.9.2 +tables>=3.4.2 +s3fs +sqlalchemy +xarray +cftime +pyreadstat +tabulate>=0.8.3 +git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master \ No newline at end of file diff --git a/scripts/build_dist.sh b/scripts/build_dist.sh new file mode 100755 index 00000000..c3f849ce --- /dev/null +++ b/scripts/build_dist.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# build the distribution +LAST=`git tag --sort version:refname | grep -v rc | tail -1` + +echo "Building distribution for: $LAST" +git checkout $LAST + +read -p "Ok to continue (y/n)? " answer +case ${answer:0:1} in + y|Y ) + echo "Building distribution" + ./build_dist_for_release.sh + ;; + * ) + echo "Not building distribution" + ;; +esac diff --git a/scripts/build_dist_for_release.sh b/scripts/build_dist_for_release.sh new file mode 100755 index 00000000..bee0f23a --- /dev/null +++ b/scripts/build_dist_for_release.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# this requires cython to be installed + +# this builds the release cleanly & is building on the current checkout +rm -rf dist +git clean -xfd +python setup.py clean --quiet +python setup.py cython --quiet +python setup.py sdist --formats=gztar --quiet diff --git a/scripts/download_wheels.py b/scripts/download_wheels.py new file mode 100644 index 00000000..3d36eed2 --- /dev/null +++ b/scripts/download_wheels.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +"""Fetch wheels from wheels.scipy.org for a pandas version.""" +import argparse +import pathlib +import sys +import urllib.parse +import urllib.request + +from lxml import html + + +def parse_args(args=None): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("version", type=str, help="Pandas version (0.23.0)") + return parser.parse_args(args) + + +def fetch(version): + base = "http://wheels.scipy.org" + tree = html.parse(base) + root = tree.getroot() + + dest = pathlib.Path("dist") + dest.mkdir(exist_ok=True) + + files = [ + x + for x in root.xpath("//a/text()") + if x.startswith(f"pandas-{version}") and not dest.joinpath(x).exists() + ] + + N = len(files) + + for i, filename in enumerate(files, 1): + out = str(dest.joinpath(filename)) + link = urllib.request.urljoin(base, filename) + urllib.request.urlretrieve(link, out) + print(f"Downloaded {link} to {out} [{i}/{N}]") + + +def main(args=None): + args = parse_args(args) + fetch(args.version) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py new file mode 100755 index 00000000..5e1a169d --- /dev/null +++ b/scripts/find_commits_touching_func.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python +# copyright 2013, y-p @ github +""" +Search the git history for all commits touching a named method + +You need the sh module to run this +WARNING: this script uses git clean -f, running it on a repo with untracked +files will probably erase them. + +Usage:: + $ ./find_commits_touching_func.py (see arguments below) +""" +import argparse +from collections import namedtuple +import logging +import os +import re + +from dateutil.parser import parse + +try: + import sh +except ImportError: + raise ImportError("The 'sh' package is required to run this script.") + + +desc = """ +Find all commits touching a specified function across the codebase. +""".strip() +argparser = argparse.ArgumentParser(description=desc) +argparser.add_argument( + "funcname", + metavar="FUNCNAME", + help="Name of function/method to search for changes on", +) +argparser.add_argument( + "-f", + "--file-masks", + metavar="f_re(,f_re)*", + default=[r"\.py.?$"], + help="comma separated list of regexes to match " + "filenames against\ndefaults all .py? files", +) +argparser.add_argument( + "-d", + "--dir-masks", + metavar="d_re(,d_re)*", + default=[], + help="comma separated list of regexes to match base path against", +) +argparser.add_argument( + "-p", + "--path-masks", + metavar="p_re(,p_re)*", + default=[], + help="comma separated list of regexes to match full file path against", +) +argparser.add_argument( + "-y", + "--saw-the-warning", + action="store_true", + default=False, + help="must specify this to run, acknowledge you " + "realize this will erase untracked files", +) +argparser.add_argument( + "--debug-level", + default="CRITICAL", + help="debug level of messages (DEBUG, INFO, etc...)", +) +args = argparser.parse_args() + + +lfmt = logging.Formatter(fmt="%(levelname)-8s %(message)s", datefmt="%m-%d %H:%M:%S") +shh = logging.StreamHandler() +shh.setFormatter(lfmt) +logger = logging.getLogger("findit") +logger.addHandler(shh) + +Hit = namedtuple("Hit", "commit path") +HASH_LEN = 8 + + +def clean_checkout(comm): + h, s, d = get_commit_vitals(comm) + if len(s) > 60: + s = s[:60] + "..." + s = s.split("\n")[0] + logger.info("CO: %s %s" % (comm, s)) + + sh.git("checkout", comm, _tty_out=False) + sh.git("clean", "-f") + + +def get_hits(defname, files=()): + cs = set() + for f in files: + try: + r = sh.git( + "blame", + "-L", + r"/def\s*{start}/,/def/".format(start=defname), + f, + _tty_out=False, + ) + except sh.ErrorReturnCode_128: + logger.debug("no matches in %s" % f) + continue + + lines = r.strip().splitlines()[:-1] + # remove comment lines + lines = [x for x in lines if not re.search(r"^\w+\s*\(.+\)\s*#", x)] + hits = set(map(lambda x: x.split(" ")[0], lines)) + cs.update({Hit(commit=c, path=f) for c in hits}) + + return cs + + +def get_commit_info(c, fmt, sep="\t"): + r = sh.git( + "log", + "--format={}".format(fmt), + "{}^..{}".format(c, c), + "-n", + "1", + _tty_out=False, + ) + return str(r).split(sep) + + +def get_commit_vitals(c, hlen=HASH_LEN): + h, s, d = get_commit_info(c, "%H\t%s\t%ci", "\t") + return h[:hlen], s, parse(d) + + +def file_filter(state, dirname, fnames): + if args.dir_masks and not any(re.search(x, dirname) for x in args.dir_masks): + return + for f in fnames: + p = os.path.abspath(os.path.join(os.path.realpath(dirname), f)) + if any(re.search(x, f) for x in args.file_masks) or any( + re.search(x, p) for x in args.path_masks + ): + if os.path.isfile(p): + state["files"].append(p) + + +def search(defname, head_commit="HEAD"): + HEAD, s = get_commit_vitals("HEAD")[:2] + logger.info("HEAD at %s: %s" % (HEAD, s)) + done_commits = set() + # allhits = set() + files = [] + state = dict(files=files) + os.walk(".", file_filter, state) + # files now holds a list of paths to files + + # seed with hits from q + allhits = set(get_hits(defname, files=files)) + q = {HEAD} + try: + while q: + h = q.pop() + clean_checkout(h) + hits = get_hits(defname, files=files) + for x in hits: + prevc = get_commit_vitals(x.commit + "^")[0] + if prevc not in done_commits: + q.add(prevc) + allhits.update(hits) + done_commits.add(h) + + logger.debug("Remaining: %s" % q) + finally: + logger.info("Restoring HEAD to %s" % HEAD) + clean_checkout(HEAD) + return allhits + + +def pprint_hits(hits): + SUBJ_LEN = 50 + PATH_LEN = 20 + hits = list(hits) + max_p = 0 + for hit in hits: + p = hit.path.split(os.path.realpath(os.curdir) + os.path.sep)[-1] + max_p = max(max_p, len(p)) + + if max_p < PATH_LEN: + SUBJ_LEN += PATH_LEN - max_p + PATH_LEN = max_p + + def sorter(i): + h, s, d = get_commit_vitals(hits[i].commit) + return hits[i].path, d + + print( + ("\nThese commits touched the %s method in these files on these dates:\n") + % args.funcname + ) + for i in sorted(range(len(hits)), key=sorter): + hit = hits[i] + h, s, d = get_commit_vitals(hit.commit) + p = hit.path.split(os.path.realpath(os.curdir) + os.path.sep)[-1] + + fmt = "{:%d} {:10} {:<%d} {:<%d}" % (HASH_LEN, SUBJ_LEN, PATH_LEN) + if len(s) > SUBJ_LEN: + s = s[: SUBJ_LEN - 5] + " ..." + print(fmt.format(h[:HASH_LEN], d.isoformat()[:10], s, p[-20:])) + + print("\n") + + +def main(): + if not args.saw_the_warning: + argparser.print_help() + print( + """ +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +WARNING: +this script uses git clean -f, running it on a repo with untracked files. +It's recommended that you make a fresh clone and run from its root directory. +You must specify the -y argument to ignore this warning. +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +""" + ) + return + if isinstance(args.file_masks, str): + args.file_masks = args.file_masks.split(",") + if isinstance(args.path_masks, str): + args.path_masks = args.path_masks.split(",") + if isinstance(args.dir_masks, str): + args.dir_masks = args.dir_masks.split(",") + + logger.setLevel(getattr(logging, args.debug_level)) + + hits = search(args.funcname) + pprint_hits(hits) + + +if __name__ == "__main__": + import sys + + sys.exit(main()) diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py new file mode 100755 index 00000000..53a27e87 --- /dev/null +++ b/scripts/generate_pip_deps_from_conda.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +""" +Convert the conda environment.yml to the pip requirements-dev.txt, +or check that they have the same packages (for the CI) + +Usage: + + Generate `requirements-dev.txt` + $ ./conda_to_pip + + Compare and fail (exit status != 0) if `requirements-dev.txt` has not been + generated with this script: + $ ./conda_to_pip --compare +""" +import argparse +import os +import re +import sys + +import yaml + +EXCLUDE = {"python"} +RENAME = {"pytables": "tables", "pyqt": "pyqt5", "dask-core": "dask"} + + +def conda_package_to_pip(package): + """ + Convert a conda package to its pip equivalent. + + In most cases they are the same, those are the exceptions: + - Packages that should be excluded (in `EXCLUDE`) + - Packages that should be renamed (in `RENAME`) + - A package requiring a specific version, in conda is defined with a single + equal (e.g. ``pandas=1.0``) and in pip with two (e.g. ``pandas==1.0``) + """ + package = re.sub("(?<=[^<>])=", "==", package).strip() + + for compare in ("<=", ">=", "=="): + if compare not in package: + continue + + pkg, version = package.split(compare) + if pkg in EXCLUDE: + return + + if pkg in RENAME: + return "".join((RENAME[pkg], compare, version)) + + break + + if package in RENAME: + return RENAME[package] + + return package + + +def main(conda_fname, pip_fname, compare=False): + """ + Generate the pip dependencies file from the conda file, or compare that + they are synchronized (``compare=True``). + + Parameters + ---------- + conda_fname : str + Path to the conda file with dependencies (e.g. `environment.yml`). + pip_fname : str + Path to the pip file with dependencies (e.g. `requirements-dev.txt`). + compare : bool, default False + Whether to generate the pip file (``False``) or to compare if the + pip file has been generated with this script and the last version + of the conda file (``True``). + + Returns + ------- + bool + True if the comparison fails, False otherwise + """ + with open(conda_fname) as conda_fd: + deps = yaml.safe_load(conda_fd)["dependencies"] + + pip_deps = [] + for dep in deps: + if isinstance(dep, str): + conda_dep = conda_package_to_pip(dep) + if conda_dep: + pip_deps.append(conda_dep) + elif isinstance(dep, dict) and len(dep) == 1 and "pip" in dep: + pip_deps += dep["pip"] + else: + raise ValueError(f"Unexpected dependency {dep}") + + fname = os.path.split(conda_fname)[1] + header = ( + f"# This file is auto-generated from {fname}, do not modify.\n" + "# See that file for comments about the need/usage of each dependency.\n\n" + ) + pip_content = header + "\n".join(pip_deps) + + if compare: + with open(pip_fname) as pip_fd: + return pip_content != pip_fd.read() + else: + with open(pip_fname, "w") as pip_fd: + pip_fd.write(pip_content) + return False + + +if __name__ == "__main__": + argparser = argparse.ArgumentParser( + description="convert (or compare) conda file to pip" + ) + argparser.add_argument( + "--compare", + action="store_true", + help="compare whether the two files are equivalent", + ) + argparser.add_argument( + "--azure", action="store_true", help="show the output in azure-pipelines format" + ) + args = argparser.parse_args() + + repo_path = os.path.dirname(os.path.abspath(os.path.dirname(__file__))) + res = main( + os.path.join(repo_path, "environment.yml"), + os.path.join(repo_path, "requirements-dev.txt"), + compare=args.compare, + ) + if res: + msg = ( + f"`requirements-dev.txt` has to be generated with `{sys.argv[0]}` after " + "`environment.yml` is modified.\n" + ) + if args.azure: + msg = ( + "##vso[task.logissue type=error;" + f"sourcepath=requirements-dev.txt]{msg}" + ) + sys.stderr.write(msg) + sys.exit(res) diff --git a/scripts/list_future_warnings.sh b/scripts/list_future_warnings.sh new file mode 100755 index 00000000..121f4f5a --- /dev/null +++ b/scripts/list_future_warnings.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# Check all future warnings in Python files, and report them with the version +# where the FutureWarning was added. +# +# This is useful to detect features that have been deprecated, and should be +# removed from the code. For example, if a line of code contains: +# +# warning.warn('Method deprecated', FutureWarning, stacklevel=2) +# +# Which is released in Pandas 0.20.0, then it is expected that the method +# is removed before releasing Pandas 0.24.0, including the warning. If it +# is not, this script will list this line, with the version 0.20.0, which +# will make it easy to detect that it had to be removed. +# +# In some cases this script can return false positives, for example in files +# where FutureWarning is used to detect deprecations, or similar. The EXCLUDE +# variable can be used to ignore files that use FutureWarning, but do not +# deprecate functionality. +# +# Usage: +# +# $ ./list_future_warnings.sh + +EXCLUDE="^pandas/tests/|" # tests validate that FutureWarnings are raised +EXCLUDE+="^pandas/util/_decorators.py$|" # generic deprecate function that raises warning +EXCLUDE+="^pandas/util/_depr_module.py$|" # generic deprecate module that raises warnings +EXCLUDE+="^pandas._testing.py$|" # contains function to evaluate if warning is raised +EXCLUDE+="^pandas/io/parsers.py$" # implements generic deprecation system in io reading + +BASE_DIR="$(dirname $0)/.." +cd $BASE_DIR +FILES=`grep -RIl "FutureWarning" pandas/* | grep -vE "$EXCLUDE"` +OUTPUT=() +IFS=$'\n' + +for FILE in $FILES; do + FILE_LINES=`git blame -sf $FILE | grep FutureWarning | tr -s " " | cut -d " " -f1,3` + for FILE_LINE in $FILE_LINES; do + TAG=$(git tag --contains $(echo $FILE_LINE | cut -d" " -f1) | head -n1) + OUTPUT_ROW=`printf "%-14s %-16s %s" ${TAG:-"(not released)"} $FILE_LINE $FILE` + OUTPUT+=($OUTPUT_ROW) + done +done + +printf "%s\n" "${OUTPUT[@]}" | sort -V diff --git a/scripts/tests/__init__.py b/scripts/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/tests/conftest.py b/scripts/tests/conftest.py new file mode 100644 index 00000000..496a5195 --- /dev/null +++ b/scripts/tests/conftest.py @@ -0,0 +1,6 @@ +def pytest_addoption(parser): + parser.addoption( + "--strict-data-files", + action="store_true", + help="Unused. For compat with setup.cfg.", + ) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py new file mode 100644 index 00000000..a1bccb1d --- /dev/null +++ b/scripts/tests/test_validate_docstrings.py @@ -0,0 +1,1467 @@ +import functools +import io +import random +import string +import textwrap + +import numpy as np +import pytest +import validate_docstrings + +import pandas as pd + +validate_one = validate_docstrings.validate_one + + +class GoodDocStrings: + """ + Collection of good doc strings. + + This class contains a lot of docstrings that should pass the validation + script without any errors. + """ + + def plot(self, kind, color="blue", **kwargs): + """ + Generate a plot. + + Render the data in the Series as a matplotlib plot of the + specified kind. + + Parameters + ---------- + kind : str + Kind of matplotlib plot. + color : str, default 'blue' + Color name or rgb code. + **kwargs + These parameters will be passed to the matplotlib plotting + function. + """ + pass + + def swap(self, arr, i, j, *args, **kwargs): + """ + Swap two indicies on an array. + + Parameters + ---------- + arr : list + The list having indexes swapped. + i, j : int + The indexes being swapped. + *args, **kwargs + Extraneous parameters are being permitted. + """ + pass + + def sample(self): + """ + Generate and return a random number. + + The value is sampled from a continuous uniform distribution between + 0 and 1. + + Returns + ------- + float + Random number generated. + """ + return random.random() + + @functools.lru_cache(None) + def decorated_sample(self, max): + """ + Generate and return a random integer between 0 and max. + + Parameters + ---------- + max : int + The maximum value of the random number. + + Returns + ------- + int + Random number generated. + """ + return random.randint(0, max) + + def random_letters(self): + """ + Generate and return a sequence of random letters. + + The length of the returned string is also random, and is also + returned. + + Returns + ------- + length : int + Length of the returned string. + letters : str + String of random letters. + """ + length = random.randint(1, 10) + letters = "".join(random.sample(string.ascii_lowercase, length)) + return length, letters + + def sample_values(self): + """ + Generate an infinite sequence of random numbers. + + The values are sampled from a continuous uniform distribution between + 0 and 1. + + Yields + ------ + float + Random number generated. + """ + while True: + yield random.random() + + def head(self): + """ + Return the first 5 elements of the Series. + + This function is mainly useful to preview the values of the + Series without displaying the whole of it. + + Returns + ------- + Series + Subset of the original series with the 5 first values. + + See Also + -------- + Series.tail : Return the last 5 elements of the Series. + Series.iloc : Return a slice of the elements in the Series, + which can also be used to return the first or last n. + """ + return self.iloc[:5] + + def head1(self, n=5): + """ + Return the first elements of the Series. + + This function is mainly useful to preview the values of the + Series without displaying the whole of it. + + Parameters + ---------- + n : int + Number of values to return. + + Returns + ------- + Series + Subset of the original series with the n first values. + + See Also + -------- + tail : Return the last n elements of the Series. + + Examples + -------- + >>> s = pd.Series(['Ant', 'Bear', 'Cow', 'Dog', 'Falcon']) + >>> s.head() + 0 Ant + 1 Bear + 2 Cow + 3 Dog + 4 Falcon + dtype: object + + With the `n` parameter, we can change the number of returned rows: + + >>> s.head(n=3) + 0 Ant + 1 Bear + 2 Cow + dtype: object + """ + return self.iloc[:n] + + def contains(self, pat, case=True, na=np.nan): + """ + Return whether each value contains `pat`. + + In this case, we are illustrating how to use sections, even + if the example is simple enough and does not require them. + + Parameters + ---------- + pat : str + Pattern to check for within each element. + case : bool, default True + Whether check should be done with case sensitivity. + na : object, default np.nan + Fill value for missing data. + + Examples + -------- + >>> s = pd.Series(['Antelope', 'Lion', 'Zebra', np.nan]) + >>> s.str.contains(pat='a') + 0 False + 1 False + 2 True + 3 NaN + dtype: object + + **Case sensitivity** + + With `case_sensitive` set to `False` we can match `a` with both + `a` and `A`: + + >>> s.str.contains(pat='a', case=False) + 0 True + 1 False + 2 True + 3 NaN + dtype: object + + **Missing values** + + We can fill missing values in the output using the `na` parameter: + + >>> s.str.contains(pat='a', na=False) + 0 False + 1 False + 2 True + 3 False + dtype: bool + """ + pass + + def mode(self, axis, numeric_only): + """ + Ensure reST directives don't affect checks for leading periods. + + Parameters + ---------- + axis : str + Sentence ending in period, followed by single directive. + + .. versionchanged:: 0.1.2 + + numeric_only : bool + Sentence ending in period, followed by multiple directives. + + .. versionadded:: 0.1.2 + .. deprecated:: 0.00.0 + A multiline description, + which spans another line. + """ + pass + + def good_imports(self): + """ + Ensure import other than numpy and pandas are fine. + + Examples + -------- + This example does not import pandas or import numpy. + >>> import datetime + >>> datetime.MAXYEAR + 9999 + """ + pass + + def no_returns(self): + """ + Say hello and have no returns. + """ + pass + + def empty_returns(self): + """ + Say hello and always return None. + + Since this function never returns a value, this + docstring doesn't need a return section. + """ + + def say_hello(): + return "Hello World!" + + say_hello() + if True: + return + else: + return None + + def multiple_variables_on_one_line(self, matrix, a, b, i, j): + """ + Swap two values in a matrix. + + Parameters + ---------- + matrix : list of list + A double list that represents a matrix. + a, b : int + The indicies of the first value. + i, j : int + The indicies of the second value. + """ + pass + + +class BadGenericDocStrings: + """Everything here has a bad docstring + """ + + def func(self): + + """Some function. + + With several mistakes in the docstring. + + It has a blank like after the signature `def func():`. + + The text 'Some function' should go in the line after the + opening quotes of the docstring, not in the same line. + + There is a blank line between the docstring and the first line + of code `foo = 1`. + + The closing quotes should be in the next line, not in this one.""" + + foo = 1 + bar = 2 + return foo + bar + + def astype(self, dtype): + """ + Casts Series type. + + Verb in third-person of the present simple, should be infinitive. + """ + pass + + def astype1(self, dtype): + """ + Method to cast Series type. + + Does not start with verb. + """ + pass + + def astype2(self, dtype): + """ + Cast Series type + + Missing dot at the end. + """ + pass + + def astype3(self, dtype): + """ + Cast Series type from its current type to the new type defined in + the parameter dtype. + + Summary is too verbose and doesn't fit in a single line. + """ + pass + + def two_linebreaks_between_sections(self, foo): + """ + Test linebreaks message GL03. + + Note 2 blank lines before parameters section. + + + Parameters + ---------- + foo : str + Description of foo parameter. + """ + pass + + def linebreak_at_end_of_docstring(self, foo): + """ + Test linebreaks message GL03. + + Note extra blank line at end of docstring. + + Parameters + ---------- + foo : str + Description of foo parameter. + + """ + pass + + def plot(self, kind, **kwargs): + """ + Generate a plot. + + Render the data in the Series as a matplotlib plot of the + specified kind. + + Note the blank line between the parameters title and the first + parameter. Also, note that after the name of the parameter `kind` + and before the colon, a space is missing. + + Also, note that the parameter descriptions do not start with a + capital letter, and do not finish with a dot. + + Finally, the `**kwargs` parameter is missing. + + Parameters + ---------- + + kind: str + kind of matplotlib plot + """ + pass + + def method(self, foo=None, bar=None): + """ + A sample DataFrame method. + + Do not import numpy and pandas. + + Try to use meaningful data, when it makes the example easier + to understand. + + Try to avoid positional arguments like in `df.method(1)`. They + can be alright if previously defined with a meaningful name, + like in `present_value(interest_rate)`, but avoid them otherwise. + + When presenting the behavior with different parameters, do not place + all the calls one next to the other. Instead, add a short sentence + explaining what the example shows. + + Examples + -------- + >>> import numpy as np + >>> import pandas as pd + >>> df = pd.DataFrame(np.ones((3, 3)), + ... columns=('a', 'b', 'c')) + >>> df.all(1) + 0 True + 1 True + 2 True + dtype: bool + >>> df.all(bool_only=True) + Series([], dtype: bool) + """ + pass + + def private_classes(self): + """ + This mentions NDFrame, which is not correct. + """ + + def unknown_section(self): + """ + This section has an unknown section title. + + Unknown Section + --------------- + This should raise an error in the validation. + """ + + def sections_in_wrong_order(self): + """ + This docstring has the sections in the wrong order. + + Parameters + ---------- + name : str + This section is in the right position. + + Examples + -------- + >>> print('So far Examples is good, as it goes before Parameters') + So far Examples is good, as it goes before Parameters + + See Also + -------- + function : This should generate an error, as See Also needs to go + before Examples. + """ + + def deprecation_in_wrong_order(self): + """ + This docstring has the deprecation warning in the wrong order. + + This is the extended summary. The correct order should be + summary, deprecation warning, extended summary. + + .. deprecated:: 1.0 + This should generate an error as it needs to go before + extended summary. + """ + + def method_wo_docstrings(self): + pass + + def directives_without_two_colons(self, first, second): + """ + Ensure reST directives have trailing colons. + + Parameters + ---------- + first : str + Sentence ending in period, followed by single directive w/o colons. + + .. versionchanged 0.1.2 + + second : bool + Sentence ending in period, followed by multiple directives w/o + colons. + + .. versionadded 0.1.2 + .. deprecated 0.00.0 + + """ + pass + + +class BadSummaries: + def wrong_line(self): + """Exists on the wrong line""" + pass + + def no_punctuation(self): + """ + Has the right line but forgets punctuation + """ + pass + + def no_capitalization(self): + """ + provides a lowercase summary. + """ + pass + + def no_infinitive(self): + """ + Started with a verb that is not infinitive. + """ + + def multi_line(self): + """ + Extends beyond one line + which is not correct. + """ + + def two_paragraph_multi_line(self): + """ + Extends beyond one line + which is not correct. + + Extends beyond one line, which in itself is correct but the + previous short summary should still be an issue. + """ + + +class BadParameters: + """ + Everything here has a problem with its Parameters section. + """ + + def missing_params(self, kind, **kwargs): + """ + Lacks kwargs in Parameters. + + Parameters + ---------- + kind : str + Foo bar baz. + """ + + def bad_colon_spacing(self, kind): + """ + Has bad spacing in the type line. + + Parameters + ---------- + kind: str + Needs a space after kind. + """ + + def no_description_period(self, kind): + """ + Forgets to add a period to the description. + + Parameters + ---------- + kind : str + Doesn't end with a dot + """ + + def no_description_period_with_directive(self, kind): + """ + Forgets to add a period, and also includes a directive. + + Parameters + ---------- + kind : str + Doesn't end with a dot + + .. versionadded:: 0.00.0 + """ + + def no_description_period_with_directives(self, kind): + """ + Forgets to add a period, and also includes multiple directives. + + Parameters + ---------- + kind : str + Doesn't end with a dot + + .. versionchanged:: 0.00.0 + .. deprecated:: 0.00.0 + """ + + def parameter_capitalization(self, kind): + """ + Forgets to capitalize the description. + + Parameters + ---------- + kind : str + this is not capitalized. + """ + + def blank_lines(self, kind): + """ + Adds a blank line after the section header. + + Parameters + ---------- + + kind : str + Foo bar baz. + """ + pass + + def integer_parameter(self, kind): + """ + Uses integer instead of int. + + Parameters + ---------- + kind : integer + Foo bar baz. + """ + pass + + def string_parameter(self, kind): + """ + Uses string instead of str. + + Parameters + ---------- + kind : string + Foo bar baz. + """ + pass + + def boolean_parameter(self, kind): + """ + Uses boolean instead of bool. + + Parameters + ---------- + kind : boolean + Foo bar baz. + """ + pass + + def list_incorrect_parameter_type(self, kind): + """ + Uses list of boolean instead of list of bool. + + Parameters + ---------- + kind : list of boolean, integer, float or string + Foo bar baz. + """ + pass + + def bad_parameter_spacing(self, a, b): + """ + The parameters on the same line have an extra space between them. + + Parameters + ---------- + a, b : int + Foo bar baz. + """ + pass + + +class BadReturns: + def return_not_documented(self): + """ + Lacks section for Returns + """ + return "Hello world!" + + def yield_not_documented(self): + """ + Lacks section for Yields + """ + yield "Hello world!" + + def no_type(self): + """ + Returns documented but without type. + + Returns + ------- + Some value. + """ + return "Hello world!" + + def no_description(self): + """ + Provides type but no description. + + Returns + ------- + str + """ + return "Hello world!" + + def no_punctuation(self): + """ + Provides type and description but no period. + + Returns + ------- + str + A nice greeting + """ + return "Hello world!" + + def named_single_return(self): + """ + Provides name but returns only one value. + + Returns + ------- + s : str + A nice greeting. + """ + return "Hello world!" + + def no_capitalization(self): + """ + Forgets capitalization in return values description. + + Returns + ------- + foo : str + The first returned string. + bar : str + the second returned string. + """ + return "Hello", "World!" + + def no_period_multi(self): + """ + Forgets period in return values description. + + Returns + ------- + foo : str + The first returned string + bar : str + The second returned string. + """ + return "Hello", "World!" + + +class BadSeeAlso: + def desc_no_period(self): + """ + Return the first 5 elements of the Series. + + See Also + -------- + Series.tail : Return the last 5 elements of the Series. + Series.iloc : Return a slice of the elements in the Series, + which can also be used to return the first or last n + """ + pass + + def desc_first_letter_lowercase(self): + """ + Return the first 5 elements of the Series. + + See Also + -------- + Series.tail : return the last 5 elements of the Series. + Series.iloc : Return a slice of the elements in the Series, + which can also be used to return the first or last n. + """ + pass + + def prefix_pandas(self): + """ + Have `pandas` prefix in See Also section. + + See Also + -------- + pandas.Series.rename : Alter Series index labels or name. + DataFrame.head : The first `n` rows of the caller object. + """ + pass + + +class BadExamples: + def unused_import(self): + """ + Examples + -------- + >>> import pandas as pdf + >>> df = pd.DataFrame(np.ones((3, 3)), columns=('a', 'b', 'c')) + """ + pass + + def missing_whitespace_around_arithmetic_operator(self): + """ + Examples + -------- + >>> 2+5 + 7 + """ + pass + + def indentation_is_not_a_multiple_of_four(self): + """ + Examples + -------- + >>> if 2 + 5: + ... pass + """ + pass + + def missing_whitespace_after_comma(self): + """ + Examples + -------- + >>> df = pd.DataFrame(np.ones((3,3)),columns=('a','b', 'c')) + """ + pass + + +class TestValidator: + def _import_path(self, klass=None, func=None): + """ + Build the required import path for tests in this module. + + Parameters + ---------- + klass : str + Class name of object in module. + func : str + Function name of object in module. + + Returns + ------- + str + Import path of specified object in this module + """ + base_path = "scripts.tests.test_validate_docstrings" + + if klass: + base_path = ".".join([base_path, klass]) + + if func: + base_path = ".".join([base_path, func]) + + return base_path + + def test_good_class(self, capsys): + errors = validate_one(self._import_path(klass="GoodDocStrings"))["errors"] + assert isinstance(errors, list) + assert not errors + + @pytest.mark.parametrize( + "func", + [ + "plot", + "swap", + "sample", + "decorated_sample", + "random_letters", + "sample_values", + "head", + "head1", + "contains", + "mode", + "good_imports", + "no_returns", + "empty_returns", + "multiple_variables_on_one_line", + ], + ) + def test_good_functions(self, capsys, func): + errors = validate_one(self._import_path(klass="GoodDocStrings", func=func))[ + "errors" + ] + assert isinstance(errors, list) + assert not errors + + def test_bad_class(self, capsys): + errors = validate_one(self._import_path(klass="BadGenericDocStrings"))["errors"] + assert isinstance(errors, list) + assert errors + + @pytest.mark.parametrize( + "func", + [ + "func", + "astype", + "astype1", + "astype2", + "astype3", + "plot", + "method", + "private_classes", + "directives_without_two_colons", + ], + ) + def test_bad_generic_functions(self, capsys, func): + errors = validate_one( + self._import_path(klass="BadGenericDocStrings", func=func) # noqa:F821 + )["errors"] + assert isinstance(errors, list) + assert errors + + @pytest.mark.parametrize( + "klass,func,msgs", + [ + # See Also tests + ( + "BadGenericDocStrings", + "private_classes", + ( + "Private classes (NDFrame) should not be mentioned in public " + "docstrings", + ), + ), + ( + "BadGenericDocStrings", + "unknown_section", + ('Found unknown section "Unknown Section".',), + ), + ( + "BadGenericDocStrings", + "sections_in_wrong_order", + ( + "Sections are in the wrong order. Correct order is: Parameters, " + "See Also, Examples", + ), + ), + ( + "BadGenericDocStrings", + "deprecation_in_wrong_order", + ("Deprecation warning should precede extended summary",), + ), + ( + "BadGenericDocStrings", + "directives_without_two_colons", + ( + "reST directives ['versionchanged', 'versionadded', " + "'deprecated'] must be followed by two colons", + ), + ), + ( + "BadSeeAlso", + "desc_no_period", + ('Missing period at end of description for See Also "Series.iloc"',), + ), + ( + "BadSeeAlso", + "desc_first_letter_lowercase", + ('should be capitalized for See Also "Series.tail"',), + ), + # Summary tests + ( + "BadSummaries", + "wrong_line", + ("should start in the line immediately after the opening quotes",), + ), + ("BadSummaries", "no_punctuation", ("Summary does not end with a period",)), + ( + "BadSummaries", + "no_capitalization", + ("Summary does not start with a capital letter",), + ), + ( + "BadSummaries", + "no_capitalization", + ("Summary must start with infinitive verb",), + ), + ("BadSummaries", "multi_line", ("Summary should fit in a single line",)), + ( + "BadSummaries", + "two_paragraph_multi_line", + ("Summary should fit in a single line",), + ), + # Parameters tests + ( + "BadParameters", + "missing_params", + ("Parameters {**kwargs} not documented",), + ), + ( + "BadParameters", + "bad_colon_spacing", + ( + 'Parameter "kind" requires a space before the colon ' + "separating the parameter name and type", + ), + ), + ( + "BadParameters", + "no_description_period", + ('Parameter "kind" description should finish with "."',), + ), + ( + "BadParameters", + "no_description_period_with_directive", + ('Parameter "kind" description should finish with "."',), + ), + ( + "BadParameters", + "parameter_capitalization", + ('Parameter "kind" description should start with a capital letter',), + ), + ( + "BadParameters", + "integer_parameter", + ('Parameter "kind" type should use "int" instead of "integer"',), + ), + ( + "BadParameters", + "string_parameter", + ('Parameter "kind" type should use "str" instead of "string"',), + ), + ( + "BadParameters", + "boolean_parameter", + ('Parameter "kind" type should use "bool" instead of "boolean"',), + ), + ( + "BadParameters", + "list_incorrect_parameter_type", + ('Parameter "kind" type should use "bool" instead of "boolean"',), + ), + ( + "BadParameters", + "list_incorrect_parameter_type", + ('Parameter "kind" type should use "int" instead of "integer"',), + ), + ( + "BadParameters", + "list_incorrect_parameter_type", + ('Parameter "kind" type should use "str" instead of "string"',), + ), + ( + "BadParameters", + "bad_parameter_spacing", + ("Parameters {b} not documented", "Unknown parameters { b}"), + ), + pytest.param( + "BadParameters", + "blank_lines", + ("No error yet?",), + marks=pytest.mark.xfail, + ), + # Returns tests + ("BadReturns", "return_not_documented", ("No Returns section found",)), + ("BadReturns", "yield_not_documented", ("No Yields section found",)), + pytest.param("BadReturns", "no_type", ("foo",), marks=pytest.mark.xfail), + ("BadReturns", "no_description", ("Return value has no description",)), + ( + "BadReturns", + "no_punctuation", + ('Return value description should finish with "."',), + ), + ( + "BadReturns", + "named_single_return", + ( + "The first line of the Returns section should contain only the " + "type, unless multiple values are being returned", + ), + ), + ( + "BadReturns", + "no_capitalization", + ("Return value description should start with a capital letter",), + ), + ( + "BadReturns", + "no_period_multi", + ('Return value description should finish with "."',), + ), + # Examples tests + ( + "BadGenericDocStrings", + "method", + ("Do not import numpy, as it is imported automatically",), + ), + ( + "BadGenericDocStrings", + "method", + ("Do not import pandas, as it is imported automatically",), + ), + ( + "BadGenericDocStrings", + "method_wo_docstrings", + ("The object does not have a docstring",), + ), + # See Also tests + ( + "BadSeeAlso", + "prefix_pandas", + ( + "pandas.Series.rename in `See Also` section " + "does not need `pandas` prefix", + ), + ), + # Examples tests + ( + "BadExamples", + "unused_import", + ("flake8 error: F401 'pandas as pdf' imported but unused",), + ), + ( + "BadExamples", + "indentation_is_not_a_multiple_of_four", + ("flake8 error: E111 indentation is not a multiple of four",), + ), + ( + "BadExamples", + "missing_whitespace_around_arithmetic_operator", + ( + "flake8 error: " + "E226 missing whitespace around arithmetic operator", + ), + ), + ( + "BadExamples", + "missing_whitespace_after_comma", + ("flake8 error: E231 missing whitespace after ',' (3 times)",), + ), + ( + "BadGenericDocStrings", + "two_linebreaks_between_sections", + ( + "Double line break found; please use only one blank line to " + "separate sections or paragraphs, and do not leave blank lines " + "at the end of docstrings", + ), + ), + ( + "BadGenericDocStrings", + "linebreak_at_end_of_docstring", + ( + "Double line break found; please use only one blank line to " + "separate sections or paragraphs, and do not leave blank lines " + "at the end of docstrings", + ), + ), + ], + ) + def test_bad_docstrings(self, capsys, klass, func, msgs): + result = validate_one(self._import_path(klass=klass, func=func)) + for msg in msgs: + assert msg in " ".join(err[1] for err in result["errors"]) + + def test_validate_all_ignore_deprecated(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, + "validate_one", + lambda func_name: { + "docstring": "docstring1", + "errors": [ + ("ER01", "err desc"), + ("ER02", "err desc"), + ("ER03", "err desc"), + ], + "warnings": [], + "examples_errors": "", + "deprecated": True, + }, + ) + result = validate_docstrings.validate_all(prefix=None, ignore_deprecated=True) + assert len(result) == 0 + + +class TestApiItems: + @property + def api_doc(self): + return io.StringIO( + textwrap.dedent( + """ + .. currentmodule:: itertools + + Itertools + --------- + + Infinite + ~~~~~~~~ + + .. autosummary:: + + cycle + count + + Finite + ~~~~~~ + + .. autosummary:: + + chain + + .. currentmodule:: random + + Random + ------ + + All + ~~~ + + .. autosummary:: + + seed + randint + """ + ) + ) + + @pytest.mark.parametrize( + "idx,name", + [ + (0, "itertools.cycle"), + (1, "itertools.count"), + (2, "itertools.chain"), + (3, "random.seed"), + (4, "random.randint"), + ], + ) + def test_item_name(self, idx, name): + result = list(validate_docstrings.get_api_items(self.api_doc)) + assert result[idx][0] == name + + @pytest.mark.parametrize( + "idx,func", + [(0, "cycle"), (1, "count"), (2, "chain"), (3, "seed"), (4, "randint")], + ) + def test_item_function(self, idx, func): + result = list(validate_docstrings.get_api_items(self.api_doc)) + assert callable(result[idx][1]) + assert result[idx][1].__name__ == func + + @pytest.mark.parametrize( + "idx,section", + [ + (0, "Itertools"), + (1, "Itertools"), + (2, "Itertools"), + (3, "Random"), + (4, "Random"), + ], + ) + def test_item_section(self, idx, section): + result = list(validate_docstrings.get_api_items(self.api_doc)) + assert result[idx][2] == section + + @pytest.mark.parametrize( + "idx,subsection", + [(0, "Infinite"), (1, "Infinite"), (2, "Finite"), (3, "All"), (4, "All")], + ) + def test_item_subsection(self, idx, subsection): + result = list(validate_docstrings.get_api_items(self.api_doc)) + assert result[idx][3] == subsection + + +class TestDocstringClass: + @pytest.mark.parametrize( + "name, expected_obj", + [ + ("pandas.isnull", pd.isnull), + ("pandas.DataFrame", pd.DataFrame), + ("pandas.Series.sum", pd.Series.sum), + ], + ) + def test_resolves_class_name(self, name, expected_obj): + d = validate_docstrings.Docstring(name) + assert d.obj is expected_obj + + @pytest.mark.parametrize("invalid_name", ["panda", "panda.DataFrame"]) + def test_raises_for_invalid_module_name(self, invalid_name): + msg = f'No module can be imported from "{invalid_name}"' + with pytest.raises(ImportError, match=msg): + validate_docstrings.Docstring(invalid_name) + + @pytest.mark.parametrize( + "invalid_name", ["pandas.BadClassName", "pandas.Series.bad_method_name"] + ) + def test_raises_for_invalid_attribute_name(self, invalid_name): + name_components = invalid_name.split(".") + obj_name, invalid_attr_name = name_components[-2], name_components[-1] + msg = f"'{obj_name}' has no attribute '{invalid_attr_name}'" + with pytest.raises(AttributeError, match=msg): + validate_docstrings.Docstring(invalid_name) + + @pytest.mark.parametrize( + "name", ["pandas.Series.str.isdecimal", "pandas.Series.str.islower"] + ) + def test_encode_content_write_to_file(self, name): + # GH25466 + docstr = validate_docstrings.Docstring(name).validate_pep8() + # the list of pep8 errors should be empty + assert not list(docstr) + + +class TestMainFunction: + def test_exit_status_for_validate_one(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, + "validate_one", + lambda func_name: { + "docstring": "docstring1", + "errors": [ + ("ER01", "err desc"), + ("ER02", "err desc"), + ("ER03", "err desc"), + ], + "warnings": [], + "examples_errors": "", + }, + ) + exit_status = validate_docstrings.main( + func_name="docstring1", + prefix=None, + errors=[], + output_format="default", + ignore_deprecated=False, + ) + assert exit_status == 0 + + def test_exit_status_errors_for_validate_all(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, + "validate_all", + lambda prefix, ignore_deprecated=False: { + "docstring1": { + "errors": [ + ("ER01", "err desc"), + ("ER02", "err desc"), + ("ER03", "err desc"), + ], + "file": "module1.py", + "file_line": 23, + }, + "docstring2": { + "errors": [("ER04", "err desc"), ("ER05", "err desc")], + "file": "module2.py", + "file_line": 925, + }, + }, + ) + exit_status = validate_docstrings.main( + func_name=None, + prefix=None, + errors=[], + output_format="default", + ignore_deprecated=False, + ) + assert exit_status == 5 + + def test_no_exit_status_noerrors_for_validate_all(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, + "validate_all", + lambda prefix, ignore_deprecated=False: { + "docstring1": {"errors": [], "warnings": [("WN01", "warn desc")]}, + "docstring2": {"errors": []}, + }, + ) + exit_status = validate_docstrings.main( + func_name=None, + prefix=None, + errors=[], + output_format="default", + ignore_deprecated=False, + ) + assert exit_status == 0 + + def test_exit_status_for_validate_all_json(self, monkeypatch): + print("EXECUTED") + monkeypatch.setattr( + validate_docstrings, + "validate_all", + lambda prefix, ignore_deprecated=False: { + "docstring1": { + "errors": [ + ("ER01", "err desc"), + ("ER02", "err desc"), + ("ER03", "err desc"), + ] + }, + "docstring2": {"errors": [("ER04", "err desc"), ("ER05", "err desc")]}, + }, + ) + exit_status = validate_docstrings.main( + func_name=None, + prefix=None, + errors=[], + output_format="json", + ignore_deprecated=False, + ) + assert exit_status == 0 + + def test_errors_param_filters_errors(self, monkeypatch): + monkeypatch.setattr( + validate_docstrings, + "validate_all", + lambda prefix, ignore_deprecated=False: { + "Series.foo": { + "errors": [ + ("ER01", "err desc"), + ("ER02", "err desc"), + ("ER03", "err desc"), + ], + "file": "series.py", + "file_line": 142, + }, + "DataFrame.bar": { + "errors": [("ER01", "err desc"), ("ER02", "err desc")], + "file": "frame.py", + "file_line": 598, + }, + "Series.foobar": { + "errors": [("ER01", "err desc")], + "file": "series.py", + "file_line": 279, + }, + }, + ) + exit_status = validate_docstrings.main( + func_name=None, + prefix=None, + errors=["ER01"], + output_format="default", + ignore_deprecated=False, + ) + assert exit_status == 3 + + exit_status = validate_docstrings.main( + func_name=None, + prefix=None, + errors=["ER03"], + output_format="default", + ignore_deprecated=False, + ) + assert exit_status == 1 diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py new file mode 100755 index 00000000..bcf3fd5d --- /dev/null +++ b/scripts/validate_docstrings.py @@ -0,0 +1,1062 @@ +#!/usr/bin/env python +""" +Analyze docstrings to detect errors. + +If no argument is provided, it does a quick check of docstrings and returns +a csv with all API functions and results of basic checks. + +If a function or method is provided in the form "pandas.function", +"pandas.module.class.method", etc. a list of all errors in the docstring for +the specified function or method. + +Usage:: + $ ./validate_docstrings.py + $ ./validate_docstrings.py pandas.DataFrame.head +""" +import argparse +import ast +import doctest +import functools +import glob +import importlib +import inspect +import json +import os +import pydoc +import re +import sys +import tempfile +import textwrap + +import flake8.main.application + +try: + from io import StringIO +except ImportError: + from cStringIO import StringIO + +# Template backend makes matplotlib to not plot anything. This is useful +# to avoid that plot windows are open from the doctests while running the +# script. Setting here before matplotlib is loaded. +# We don't warn for the number of open plots, as none is actually being opened +os.environ["MPLBACKEND"] = "Template" +import matplotlib # noqa: E402 isort:skip + +matplotlib.rc("figure", max_open_warning=10000) + +import numpy # noqa: E402 isort:skip + +BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +sys.path.insert(0, os.path.join(BASE_PATH)) +import pandas # noqa: E402 isort:skip + +sys.path.insert(1, os.path.join(BASE_PATH, "doc", "sphinxext")) +from numpydoc.docscrape import NumpyDocString # noqa: E402 isort:skip +from pandas.io.formats.printing import pprint_thing # noqa: E402 isort:skip + + +PRIVATE_CLASSES = ["NDFrame", "IndexOpsMixin"] +DIRECTIVES = ["versionadded", "versionchanged", "deprecated"] +DIRECTIVE_PATTERN = re.compile(rf"^\s*\.\. ({'|'.join(DIRECTIVES)})(?!::)", re.I | re.M) +ALLOWED_SECTIONS = [ + "Parameters", + "Attributes", + "Methods", + "Returns", + "Yields", + "Other Parameters", + "Raises", + "Warns", + "See Also", + "Notes", + "References", + "Examples", +] +ERROR_MSGS = { + "GL01": "Docstring text (summary) should start in the line immediately " + "after the opening quotes (not in the same line, or leaving a " + "blank line in between)", + "GL02": "Closing quotes should be placed in the line after the last text " + "in the docstring (do not close the quotes in the same line as " + "the text, or leave a blank line between the last text and the " + "quotes)", + "GL03": "Double line break found; please use only one blank line to " + "separate sections or paragraphs, and do not leave blank lines " + "at the end of docstrings", + "GL04": "Private classes ({mentioned_private_classes}) should not be " + "mentioned in public docstrings", + "GL05": 'Tabs found at the start of line "{line_with_tabs}", please use ' + "whitespace only", + "GL06": 'Found unknown section "{section}". Allowed sections are: ' + "{allowed_sections}", + "GL07": "Sections are in the wrong order. Correct order is: {correct_sections}", + "GL08": "The object does not have a docstring", + "GL09": "Deprecation warning should precede extended summary", + "GL10": "reST directives {directives} must be followed by two colons", + "SS01": "No summary found (a short summary in a single line should be " + "present at the beginning of the docstring)", + "SS02": "Summary does not start with a capital letter", + "SS03": "Summary does not end with a period", + "SS04": "Summary contains heading whitespaces", + "SS05": "Summary must start with infinitive verb, not third person " + '(e.g. use "Generate" instead of "Generates")', + "SS06": "Summary should fit in a single line", + "ES01": "No extended summary found", + "PR01": "Parameters {missing_params} not documented", + "PR02": "Unknown parameters {unknown_params}", + "PR03": "Wrong parameters order. Actual: {actual_params}. " + "Documented: {documented_params}", + "PR04": 'Parameter "{param_name}" has no type', + "PR05": 'Parameter "{param_name}" type should not finish with "."', + "PR06": 'Parameter "{param_name}" type should use "{right_type}" instead ' + 'of "{wrong_type}"', + "PR07": 'Parameter "{param_name}" has no description', + "PR08": 'Parameter "{param_name}" description should start with a ' + "capital letter", + "PR09": 'Parameter "{param_name}" description should finish with "."', + "PR10": 'Parameter "{param_name}" requires a space before the colon ' + "separating the parameter name and type", + "RT01": "No Returns section found", + "RT02": "The first line of the Returns section should contain only the " + "type, unless multiple values are being returned", + "RT03": "Return value has no description", + "RT04": "Return value description should start with a capital letter", + "RT05": 'Return value description should finish with "."', + "YD01": "No Yields section found", + "SA01": "See Also section not found", + "SA02": "Missing period at end of description for See Also " + '"{reference_name}" reference', + "SA03": "Description should be capitalized for See Also " + '"{reference_name}" reference', + "SA04": 'Missing description for See Also "{reference_name}" reference', + "SA05": "{reference_name} in `See Also` section does not need `pandas` " + "prefix, use {right_reference} instead.", + "EX01": "No examples section found", + "EX02": "Examples do not pass tests:\n{doctest_log}", + "EX03": "flake8 error: {error_code} {error_message}{times_happening}", + "EX04": "Do not import {imported_library}, as it is imported " + "automatically for the examples (numpy as np, pandas as pd)", +} + + +def error(code, **kwargs): + """ + Return a tuple with the error code and the message with variables replaced. + + This is syntactic sugar so instead of: + - `('EX02', ERROR_MSGS['EX02'].format(doctest_log=log))` + + We can simply use: + - `error('EX02', doctest_log=log)` + + Parameters + ---------- + code : str + Error code. + **kwargs + Values for the variables in the error messages + + Returns + ------- + code : str + Error code. + message : str + Error message with variables replaced. + """ + return (code, ERROR_MSGS[code].format(**kwargs)) + + +def get_api_items(api_doc_fd): + """ + Yield information about all public API items. + + Parse api.rst file from the documentation, and extract all the functions, + methods, classes, attributes... This should include all pandas public API. + + Parameters + ---------- + api_doc_fd : file descriptor + A file descriptor of the API documentation page, containing the table + of contents with all the public API. + + Yields + ------ + name : str + The name of the object (e.g. 'pandas.Series.str.upper). + func : function + The object itself. In most cases this will be a function or method, + but it can also be classes, properties, cython objects... + section : str + The name of the section in the API page where the object item is + located. + subsection : str + The name of the subsection in the API page where the object item is + located. + """ + current_module = "pandas" + previous_line = current_section = current_subsection = "" + position = None + for line in api_doc_fd: + line = line.strip() + if len(line) == len(previous_line): + if set(line) == set("-"): + current_section = previous_line + continue + if set(line) == set("~"): + current_subsection = previous_line + continue + + if line.startswith(".. currentmodule::"): + current_module = line.replace(".. currentmodule::", "").strip() + continue + + if line == ".. autosummary::": + position = "autosummary" + continue + + if position == "autosummary": + if line == "": + position = "items" + continue + + if position == "items": + if line == "": + position = None + continue + item = line.strip() + func = importlib.import_module(current_module) + for part in item.split("."): + func = getattr(func, part) + + yield ( + ".".join([current_module, item]), + func, + current_section, + current_subsection, + ) + + previous_line = line + + +class Docstring: + def __init__(self, name): + self.name = name + obj = self._load_obj(name) + self.obj = obj + self.code_obj = self._to_original_callable(obj) + self.raw_doc = obj.__doc__ or "" + self.clean_doc = pydoc.getdoc(obj) + self.doc = NumpyDocString(self.clean_doc) + + def __len__(self) -> int: + return len(self.raw_doc) + + @staticmethod + def _load_obj(name): + """ + Import Python object from its name as string. + + Parameters + ---------- + name : str + Object name to import (e.g. pandas.Series.str.upper) + + Returns + ------- + object + Python object that can be a class, method, function... + + Examples + -------- + >>> Docstring._load_obj('pandas.Series') + + """ + for maxsplit in range(1, name.count(".") + 1): + # TODO when py3 only replace by: module, *func_parts = ... + func_name_split = name.rsplit(".", maxsplit) + module = func_name_split[0] + func_parts = func_name_split[1:] + try: + obj = importlib.import_module(module) + except ImportError: + pass + else: + continue + + if "obj" not in locals(): + raise ImportError(f'No module can be imported from "{name}"') + + for part in func_parts: + obj = getattr(obj, part) + return obj + + @staticmethod + def _to_original_callable(obj): + """ + Find the Python object that contains the source code of the object. + + This is useful to find the place in the source code (file and line + number) where a docstring is defined. It does not currently work for + all cases, but it should help find some (properties...). + """ + while True: + if inspect.isfunction(obj) or inspect.isclass(obj): + f = inspect.getfile(obj) + if f.startswith("<") and f.endswith(">"): + return None + return obj + if inspect.ismethod(obj): + obj = obj.__func__ + elif isinstance(obj, functools.partial): + obj = obj.func + elif isinstance(obj, property): + obj = obj.fget + else: + return None + + @property + def type(self): + return type(self.obj).__name__ + + @property + def is_function_or_method(self): + # TODO(py27): remove ismethod + return inspect.isfunction(self.obj) or inspect.ismethod(self.obj) + + @property + def source_file_name(self): + """ + File name where the object is implemented (e.g. pandas/core/frame.py). + """ + try: + fname = inspect.getsourcefile(self.code_obj) + except TypeError: + # In some cases the object is something complex like a cython + # object that can't be easily introspected. An it's better to + # return the source code file of the object as None, than crash + pass + else: + if fname: + fname = os.path.relpath(fname, BASE_PATH) + return fname + + @property + def source_file_def_line(self): + """ + Number of line where the object is defined in its file. + """ + try: + return inspect.getsourcelines(self.code_obj)[-1] + except (OSError, TypeError): + # In some cases the object is something complex like a cython + # object that can't be easily introspected. An it's better to + # return the line number as None, than crash + pass + + @property + def github_url(self): + url = "https://github.com/pandas-dev/pandas/blob/master/" + url += f"{self.source_file_name}#L{self.source_file_def_line}" + return url + + @property + def start_blank_lines(self): + i = None + if self.raw_doc: + for i, row in enumerate(self.raw_doc.split("\n")): + if row.strip(): + break + return i + + @property + def end_blank_lines(self): + i = None + if self.raw_doc: + for i, row in enumerate(reversed(self.raw_doc.split("\n"))): + if row.strip(): + break + return i + + @property + def double_blank_lines(self): + prev = True + for row in self.raw_doc.split("\n"): + if not prev and not row.strip(): + return True + prev = row.strip() + return False + + @property + def section_titles(self): + sections = [] + self.doc._doc.reset() + while not self.doc._doc.eof(): + content = self.doc._read_to_next_section() + if ( + len(content) > 1 + and len(content[0]) == len(content[1]) + and set(content[1]) == {"-"} + ): + sections.append(content[0]) + return sections + + @property + def summary(self): + return " ".join(self.doc["Summary"]) + + @property + def num_summary_lines(self): + return len(self.doc["Summary"]) + + @property + def extended_summary(self): + if not self.doc["Extended Summary"] and len(self.doc["Summary"]) > 1: + return " ".join(self.doc["Summary"]) + return " ".join(self.doc["Extended Summary"]) + + @property + def needs_summary(self): + return not (bool(self.summary) and bool(self.extended_summary)) + + @property + def doc_parameters(self): + parameters = {} + for names, type_, desc in self.doc["Parameters"]: + for name in names.split(", "): + parameters[name] = (type_, "".join(desc)) + return parameters + + @property + def signature_parameters(self): + def add_stars(param_name: str, info: inspect.Parameter): + """ + Add stars to *args and **kwargs parameters + """ + if info.kind == inspect.Parameter.VAR_POSITIONAL: + return f"*{param_name}" + elif info.kind == inspect.Parameter.VAR_KEYWORD: + return f"**{param_name}" + else: + return param_name + + if inspect.isclass(self.obj): + if hasattr(self.obj, "_accessors") and ( + self.name.split(".")[-1] in self.obj._accessors + ): + # accessor classes have a signature but don't want to show this + return tuple() + try: + sig = inspect.signature(self.obj) + except (TypeError, ValueError): + # Some objects, mainly in C extensions do not support introspection + # of the signature + return tuple() + + params = tuple( + add_stars(parameter, sig.parameters[parameter]) + for parameter in sig.parameters + ) + if params and params[0] in ("self", "cls"): + return params[1:] + return params + + @property + def parameter_mismatches(self): + errs = [] + signature_params = self.signature_parameters + doc_params = tuple(self.doc_parameters) + missing = set(signature_params) - set(doc_params) + if missing: + errs.append(error("PR01", missing_params=pprint_thing(missing))) + extra = set(doc_params) - set(signature_params) + if extra: + errs.append(error("PR02", unknown_params=pprint_thing(extra))) + if ( + not missing + and not extra + and signature_params != doc_params + and not (not signature_params and not doc_params) + ): + errs.append( + error( + "PR03", actual_params=signature_params, documented_params=doc_params + ) + ) + + return errs + + @property + def correct_parameters(self): + return not bool(self.parameter_mismatches) + + @property + def directives_without_two_colons(self): + return DIRECTIVE_PATTERN.findall(self.raw_doc) + + def parameter_type(self, param): + return self.doc_parameters[param][0] + + def parameter_desc(self, param): + desc = self.doc_parameters[param][1] + # Find and strip out any sphinx directives + for directive in DIRECTIVES: + full_directive = f".. {directive}" + if full_directive in desc: + # Only retain any description before the directive + desc = desc[: desc.index(full_directive)] + return desc + + @property + def see_also(self): + result = {} + for funcs, desc in self.doc["See Also"]: + for func, _ in funcs: + result[func] = "".join(desc) + + return result + + @property + def examples(self): + return self.doc["Examples"] + + @property + def returns(self): + return self.doc["Returns"] + + @property + def yields(self): + return self.doc["Yields"] + + @property + def method_source(self): + try: + source = inspect.getsource(self.obj) + except TypeError: + return "" + return textwrap.dedent(source) + + @property + def method_returns_something(self): + """ + Check if the docstrings method can return something. + + Bare returns, returns valued None and returns from nested functions are + disconsidered. + + Returns + ------- + bool + Whether the docstrings method can return something. + """ + + def get_returns_not_on_nested_functions(node): + returns = [node] if isinstance(node, ast.Return) else [] + for child in ast.iter_child_nodes(node): + # Ignore nested functions and its subtrees. + if not isinstance(child, ast.FunctionDef): + child_returns = get_returns_not_on_nested_functions(child) + returns.extend(child_returns) + return returns + + tree = ast.parse(self.method_source).body + if tree: + returns = get_returns_not_on_nested_functions(tree[0]) + return_values = [r.value for r in returns] + # Replace NameConstant nodes valued None for None. + for i, v in enumerate(return_values): + if isinstance(v, ast.NameConstant) and v.value is None: + return_values[i] = None + return any(return_values) + else: + return False + + @property + def first_line_ends_in_dot(self): + if self.doc: + return self.doc.split("\n")[0][-1] == "." + + @property + def deprecated(self): + return ".. deprecated:: " in (self.summary + self.extended_summary) + + @property + def mentioned_private_classes(self): + return [klass for klass in PRIVATE_CLASSES if klass in self.raw_doc] + + @property + def examples_errors(self): + flags = doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL + finder = doctest.DocTestFinder() + runner = doctest.DocTestRunner(optionflags=flags) + context = {"np": numpy, "pd": pandas} + error_msgs = "" + for test in finder.find(self.raw_doc, self.name, globs=context): + f = StringIO() + runner.run(test, out=f.write) + error_msgs += f.getvalue() + return error_msgs + + @property + def examples_source_code(self): + lines = doctest.DocTestParser().get_examples(self.raw_doc) + return [line.source for line in lines] + + def validate_pep8(self): + if not self.examples: + return + + # F401 is needed to not generate flake8 errors in examples + # that do not user numpy or pandas + content = "".join( + ( + "import numpy as np # noqa: F401\n", + "import pandas as pd # noqa: F401\n", + *self.examples_source_code, + ) + ) + + application = flake8.main.application.Application() + application.initialize(["--quiet"]) + + with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as file: + file.write(content) + file.flush() + application.run_checks([file.name]) + + # We need this to avoid flake8 printing the names of the files to + # the standard output + application.formatter.write = lambda line, source: None + application.report() + + yield from application.guide.stats.statistics_for("") + + +def get_validation_data(doc): + """ + Validate the docstring. + + Parameters + ---------- + doc : Docstring + A Docstring object with the given function name. + + Returns + ------- + tuple + errors : list of tuple + Errors occurred during validation. + warnings : list of tuple + Warnings occurred during validation. + examples_errs : str + Examples usage displayed along the error, otherwise empty string. + + Notes + ----- + The errors codes are defined as: + - First two characters: Section where the error happens: + * GL: Global (no section, like section ordering errors) + * SS: Short summary + * ES: Extended summary + * PR: Parameters + * RT: Returns + * YD: Yields + * RS: Raises + * WN: Warns + * SA: See Also + * NT: Notes + * RF: References + * EX: Examples + - Last two characters: Numeric error code inside the section + + For example, EX02 is the second codified error in the Examples section + (which in this case is assigned to examples that do not pass the tests). + + The error codes, their corresponding error messages, and the details on how + they are validated, are not documented more than in the source code of this + function. + """ + + errs = [] + wrns = [] + if not doc.raw_doc: + errs.append(error("GL08")) + return errs, wrns, "" + + if doc.start_blank_lines != 1: + errs.append(error("GL01")) + if doc.end_blank_lines != 1: + errs.append(error("GL02")) + if doc.double_blank_lines: + errs.append(error("GL03")) + mentioned_errs = doc.mentioned_private_classes + if mentioned_errs: + errs.append(error("GL04", mentioned_private_classes=", ".join(mentioned_errs))) + for line in doc.raw_doc.splitlines(): + if re.match("^ *\t", line): + errs.append(error("GL05", line_with_tabs=line.lstrip())) + + unexpected_sections = [ + section for section in doc.section_titles if section not in ALLOWED_SECTIONS + ] + for section in unexpected_sections: + errs.append( + error("GL06", section=section, allowed_sections=", ".join(ALLOWED_SECTIONS)) + ) + + correct_order = [ + section for section in ALLOWED_SECTIONS if section in doc.section_titles + ] + if correct_order != doc.section_titles: + errs.append(error("GL07", correct_sections=", ".join(correct_order))) + + if doc.deprecated and not doc.extended_summary.startswith(".. deprecated:: "): + errs.append(error("GL09")) + + directives_without_two_colons = doc.directives_without_two_colons + if directives_without_two_colons: + errs.append(error("GL10", directives=directives_without_two_colons)) + + if not doc.summary: + errs.append(error("SS01")) + else: + if not doc.summary[0].isupper(): + errs.append(error("SS02")) + if doc.summary[-1] != ".": + errs.append(error("SS03")) + if doc.summary != doc.summary.lstrip(): + errs.append(error("SS04")) + elif doc.is_function_or_method and doc.summary.split(" ")[0][-1] == "s": + errs.append(error("SS05")) + if doc.num_summary_lines > 1: + errs.append(error("SS06")) + + if not doc.extended_summary: + wrns.append(("ES01", "No extended summary found")) + + # PR01: Parameters not documented + # PR02: Unknown parameters + # PR03: Wrong parameters order + errs += doc.parameter_mismatches + + for param in doc.doc_parameters: + if not param.startswith("*"): # Check can ignore var / kwargs + if not doc.parameter_type(param): + if ":" in param: + errs.append(error("PR10", param_name=param.split(":")[0])) + else: + errs.append(error("PR04", param_name=param)) + else: + if doc.parameter_type(param)[-1] == ".": + errs.append(error("PR05", param_name=param)) + common_type_errors = [ + ("integer", "int"), + ("boolean", "bool"), + ("string", "str"), + ] + for wrong_type, right_type in common_type_errors: + if wrong_type in doc.parameter_type(param): + errs.append( + error( + "PR06", + param_name=param, + right_type=right_type, + wrong_type=wrong_type, + ) + ) + if not doc.parameter_desc(param): + errs.append(error("PR07", param_name=param)) + else: + if not doc.parameter_desc(param)[0].isupper(): + errs.append(error("PR08", param_name=param)) + if doc.parameter_desc(param)[-1] != ".": + errs.append(error("PR09", param_name=param)) + + if doc.is_function_or_method: + if not doc.returns: + if doc.method_returns_something: + errs.append(error("RT01")) + else: + if len(doc.returns) == 1 and doc.returns[0].name: + errs.append(error("RT02")) + for name_or_type, type_, desc in doc.returns: + if not desc: + errs.append(error("RT03")) + else: + desc = " ".join(desc) + if not desc[0].isupper(): + errs.append(error("RT04")) + if not desc.endswith("."): + errs.append(error("RT05")) + + if not doc.yields and "yield" in doc.method_source: + errs.append(error("YD01")) + + if not doc.see_also: + wrns.append(error("SA01")) + else: + for rel_name, rel_desc in doc.see_also.items(): + if rel_desc: + if not rel_desc.endswith("."): + errs.append(error("SA02", reference_name=rel_name)) + if not rel_desc[0].isupper(): + errs.append(error("SA03", reference_name=rel_name)) + else: + errs.append(error("SA04", reference_name=rel_name)) + if rel_name.startswith("pandas."): + errs.append( + error( + "SA05", + reference_name=rel_name, + right_reference=rel_name[len("pandas.") :], + ) + ) + + examples_errs = "" + if not doc.examples: + wrns.append(error("EX01")) + else: + examples_errs = doc.examples_errors + if examples_errs: + errs.append(error("EX02", doctest_log=examples_errs)) + for err in doc.validate_pep8(): + errs.append( + error( + "EX03", + error_code=err.error_code, + error_message=err.message, + times_happening=f" ({err.count} times)" if err.count > 1 else "", + ) + ) + examples_source_code = "".join(doc.examples_source_code) + for wrong_import in ("numpy", "pandas"): + if f"import {wrong_import}" in examples_source_code: + errs.append(error("EX04", imported_library=wrong_import)) + return errs, wrns, examples_errs + + +def validate_one(func_name): + """ + Validate the docstring for the given func_name + + Parameters + ---------- + func_name : function + Function whose docstring will be evaluated (e.g. pandas.read_csv). + + Returns + ------- + dict + A dictionary containing all the information obtained from validating + the docstring. + """ + doc = Docstring(func_name) + errs, wrns, examples_errs = get_validation_data(doc) + return { + "type": doc.type, + "docstring": doc.clean_doc, + "deprecated": doc.deprecated, + "file": doc.source_file_name, + "file_line": doc.source_file_def_line, + "github_link": doc.github_url, + "errors": errs, + "warnings": wrns, + "examples_errors": examples_errs, + } + + +def validate_all(prefix, ignore_deprecated=False): + """ + Execute the validation of all docstrings, and return a dict with the + results. + + Parameters + ---------- + prefix : str or None + If provided, only the docstrings that start with this pattern will be + validated. If None, all docstrings will be validated. + ignore_deprecated: bool, default False + If True, deprecated objects are ignored when validating docstrings. + + Returns + ------- + dict + A dictionary with an item for every function/method... containing + all the validation information. + """ + result = {} + seen = {} + + # functions from the API docs + api_doc_fnames = os.path.join(BASE_PATH, "doc", "source", "reference", "*.rst") + api_items = [] + for api_doc_fname in glob.glob(api_doc_fnames): + with open(api_doc_fname) as f: + api_items += list(get_api_items(f)) + for func_name, func_obj, section, subsection in api_items: + if prefix and not func_name.startswith(prefix): + continue + doc_info = validate_one(func_name) + if ignore_deprecated and doc_info["deprecated"]: + continue + result[func_name] = doc_info + + shared_code_key = doc_info["file"], doc_info["file_line"] + shared_code = seen.get(shared_code_key, "") + result[func_name].update( + { + "in_api": True, + "section": section, + "subsection": subsection, + "shared_code_with": shared_code, + } + ) + + seen[shared_code_key] = func_name + + # functions from introspecting Series and DataFrame + api_item_names = set(list(zip(*api_items))[0]) + for class_ in (pandas.Series, pandas.DataFrame): + for member in inspect.getmembers(class_): + func_name = f"pandas.{class_.__name__}.{member[0]}" + if not member[0].startswith("_") and func_name not in api_item_names: + if prefix and not func_name.startswith(prefix): + continue + doc_info = validate_one(func_name) + if ignore_deprecated and doc_info["deprecated"]: + continue + result[func_name] = doc_info + result[func_name]["in_api"] = False + + return result + + +def main(func_name, prefix, errors, output_format, ignore_deprecated): + def header(title, width=80, char="#"): + full_line = char * width + side_len = (width - len(title) - 2) // 2 + adj = "" if len(title) % 2 == 0 else " " + title_line = f"{char * side_len} {title}{adj} {char * side_len}" + + return f"\n{full_line}\n{title_line}\n{full_line}\n\n" + + exit_status = 0 + if func_name is None: + result = validate_all(prefix, ignore_deprecated) + + if output_format == "json": + output = json.dumps(result) + else: + if output_format == "default": + output_format = "{text}\n" + elif output_format == "azure": + output_format = ( + "##vso[task.logissue type=error;" + "sourcepath={path};" + "linenumber={row};" + "code={code};" + "]{text}\n" + ) + else: + raise ValueError(f'Unknown output_format "{output_format}"') + + output = "" + for name, res in result.items(): + for err_code, err_desc in res["errors"]: + # The script would be faster if instead of filtering the + # errors after validating them, it didn't validate them + # initially. But that would complicate the code too much + if errors and err_code not in errors: + continue + exit_status += 1 + output += output_format.format( + path=res["file"], + row=res["file_line"], + code=err_code, + text=f"{name}: {err_desc}", + ) + + sys.stdout.write(output) + + else: + result = validate_one(func_name) + sys.stderr.write(header(f"Docstring ({func_name})")) + sys.stderr.write(f"{result['docstring']}\n") + sys.stderr.write(header("Validation")) + if result["errors"]: + sys.stderr.write(f"{len(result['errors'])} Errors found:\n") + for err_code, err_desc in result["errors"]: + # Failing examples are printed at the end + if err_code == "EX02": + sys.stderr.write("\tExamples do not pass tests\n") + continue + sys.stderr.write(f"\t{err_desc}\n") + if result["warnings"]: + sys.stderr.write(f"{len(result['warnings'])} Warnings found:\n") + for wrn_code, wrn_desc in result["warnings"]: + sys.stderr.write(f"\t{wrn_desc}\n") + + if not result["errors"]: + sys.stderr.write(f'Docstring for "{func_name}" correct. :)\n') + + if result["examples_errors"]: + sys.stderr.write(header("Doctests")) + sys.stderr.write(result["examples_errors"]) + + return exit_status + + +if __name__ == "__main__": + format_opts = "default", "json", "azure" + func_help = ( + "function or method to validate (e.g. pandas.DataFrame.head) " + "if not provided, all docstrings are validated and returned " + "as JSON" + ) + argparser = argparse.ArgumentParser(description="validate pandas docstrings") + argparser.add_argument("function", nargs="?", default=None, help=func_help) + argparser.add_argument( + "--format", + default="default", + choices=format_opts, + help="format of the output when validating " + "multiple docstrings (ignored when validating one)." + f"It can be {str(format_opts)[1:-1]}", + ) + argparser.add_argument( + "--prefix", + default=None, + help="pattern for the " + "docstring names, in order to decide which ones " + 'will be validated. A prefix "pandas.Series.str.' + "will make the script validate all the docstrings" + "of methods starting by this pattern. It is " + "ignored if parameter function is provided", + ) + argparser.add_argument( + "--errors", + default=None, + help="comma separated " + "list of error codes to validate. By default it " + "validates all errors (ignored when validating " + "a single docstring)", + ) + argparser.add_argument( + "--ignore_deprecated", + default=False, + action="store_true", + help="if this flag is set, " + "deprecated objects are ignored when validating " + "all docstrings", + ) + + args = argparser.parse_args() + sys.exit( + main( + args.function, + args.prefix, + args.errors.split(",") if args.errors else None, + args.format, + args.ignore_deprecated, + ) + ) diff --git a/scripts/validate_string_concatenation.py b/scripts/validate_string_concatenation.py new file mode 100755 index 00000000..3feeddaa --- /dev/null +++ b/scripts/validate_string_concatenation.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python +""" +GH #30454 + +Check where there is a string that needs to be concatenated. + +This is necessary after black formating, +where for example black transforms this: + +>>> foo = ( +... "bar " +... "baz" +... ) + +into this: + +>>> foo = ("bar " "baz") + +Black is not considering this as an +issue (see issue https://github.com/psf/black/issues/1051), +so we are checking it here. +""" + +import argparse +import os +import sys +import token +import tokenize +from typing import Generator, List, Tuple + +FILE_EXTENSIONS_TO_CHECK = (".py", ".pyx", ".pyx.ini", ".pxd") + + +def main(source_path: str, output_format: str) -> bool: + """ + Main entry point of the script. + + Parameters + ---------- + source_path : str + Source path representing path to a file/directory. + output_format : str + Output format of the script. + + Returns + ------- + bool + True if found any strings that needs to be concatenated. + + Raises + ------ + ValueError + If the `source_path` is not pointing to existing file/directory. + """ + if not os.path.exists(source_path): + raise ValueError( + "Please enter a valid path, pointing to a valid file/directory." + ) + + is_failed: bool = False + + msg = "String unnecessarily split in two by black. Please merge them manually." + + if os.path.isfile(source_path): + for source_path, line_number in strings_to_concatenate(source_path): + is_failed = True + print( + output_format.format( + source_path=source_path, line_number=line_number, msg=msg + ) + ) + + for subdir, _, files in os.walk(source_path): + for file_name in files: + if any( + file_name.endswith(extension) for extension in FILE_EXTENSIONS_TO_CHECK + ): + for source_path, line_number in strings_to_concatenate( + os.path.join(subdir, file_name) + ): + is_failed = True + print( + output_format.format( + source_path=source_path, line_number=line_number, msg=msg + ) + ) + return is_failed + + +def strings_to_concatenate(source_path: str) -> Generator[Tuple[str, int], None, None]: + """ + Yielding the strings that needs to be concatenated in a given file. + + Parameters + ---------- + source_path : str + File path pointing to a single file. + + Yields + ------ + source_path : str + Source file path. + line_number : int + Line number of unconcatenated string. + """ + with open(source_path, "r") as file_name: + tokens: List = list(tokenize.generate_tokens(file_name.readline)) + + for current_token, next_token in zip(tokens, tokens[1:]): + if current_token[0] == next_token[0] == token.STRING: + yield source_path, current_token[2][0] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Validate concatenated strings") + + parser.add_argument( + "path", nargs="?", default=".", help="Source path of file/directory to check." + ) + parser.add_argument( + "--format", + "-f", + default="{source_path}:{line_number}:{msg}", + help="Output format of the unconcatenated strings.", + ) + + args = parser.parse_args() + + sys.exit(main(source_path=args.path, output_format=args.format)) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..d0570cee --- /dev/null +++ b/setup.cfg @@ -0,0 +1,340 @@ + +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer.py setup' after changing this section, and commit the +# resulting files. + +[versioneer] +VCS = git +style = pep440 +versionfile_source = pandas/_version.py +versionfile_build = pandas/_version.py +tag_prefix = v +parentdir_prefix = pandas- + +[flake8] +max-line-length = 88 +ignore = + E203, # space before : (needed for how black formats slicing) + W503, # line break before binary operator + W504, # line break after binary operator + E402, # module level import not at top of file + E731, # do not assign a lambda expression, use a def + C406, # Unnecessary list literal - rewrite as a dict literal. + C408, # Unnecessary dict call - rewrite as a literal. + C409, # Unnecessary list passed to tuple() - rewrite as a tuple literal. + S001 # found modulo formatter (incorrect picks up mod operations) +exclude = + doc/sphinxext/*.py, + doc/build/*.py, + doc/temp/*.py, + .eggs/*.py, + versioneer.py, + env # exclude asv benchmark environments from linting + +[flake8-rst] +bootstrap = + import numpy as np + import pandas as pd + np # avoiding error when importing again numpy or pandas + pd # (in some cases we want to do it to show users) +ignore = E402, # module level import not at top of file + W503, # line break before binary operator + # Classes/functions in different blocks can generate those errors + E302, # expected 2 blank lines, found 0 + E305, # expected 2 blank lines after class or function definition, found 0 + # We use semicolon at the end to avoid displaying plot objects + E703, # statement ends with a semicolon + E711, # comparison to none should be 'if cond is none:' + +exclude = + doc/source/development/contributing_docstring.rst + +[tool:pytest] +# sync minversion with setup.cfg & install.rst +minversion = 4.0.2 +testpaths = pandas +markers = + single: mark a test as single cpu only + slow: mark a test as slow + network: mark a test as network + db: tests requiring a database (mysql or postgres) + high_memory: mark a test as a high-memory only + clipboard: mark a pd.read_clipboard test +doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL +addopts = --strict-data-files +xfail_strict = True +filterwarnings = + error:Sparse:FutureWarning + error:The SparseArray:FutureWarning +junit_family=xunit2 + +[coverage:run] +branch = False +omit = + */tests/* + pandas/_typing.py + pandas/_version.py +plugins = Cython.Coverage + +[coverage:report] +ignore_errors = False +show_missing = True +omit = + pandas/_version.py +# Regexes for lines to exclude from consideration +exclude_lines = + # Have to re-enable the standard pragma + pragma: no cover + + # Don't complain about missing debug-only code: + def __repr__ + if self\.debug + + # Don't complain if tests don't hit defensive assertion code: + raise AssertionError + raise NotImplementedError + AbstractMethodError + + # Don't complain if non-runnable code isn't run: + if 0: + if __name__ == .__main__.: + +[coverage:html] +directory = coverage_html_report + +# To be kept consistent with "Import Formatting" section in contributing.rst +[isort] +known_pre_libs = pandas._config +known_pre_core = pandas._libs,pandas._typing,pandas.util._*,pandas.compat,pandas.errors +known_dtypes = pandas.core.dtypes +known_post_core = pandas.tseries,pandas.io,pandas.plotting +sections = FUTURE,STDLIB,THIRDPARTY,PRE_LIBS,PRE_CORE,DTYPES,FIRSTPARTY,POST_CORE,LOCALFOLDER +known_first_party = pandas +known_third_party = _pytest,announce,dateutil,docutils,flake8,git,hypothesis,jinja2,lxml,matplotlib,numpy,numpydoc,pkg_resources,pyarrow,pytest,pytz,requests,scipy,setuptools,sphinx,sqlalchemy,validate_docstrings,yaml,odf +multi_line_output = 3 +include_trailing_comma = True +force_grid_wrap = 0 +combine_as_imports = True +line_length = 88 +force_sort_within_sections = True +skip_glob = env, +skip = pandas/__init__.py,pandas/core/api.py + +[mypy] +ignore_missing_imports=True +no_implicit_optional=True +check_untyped_defs=True +strict_equality=True + +[mypy-pandas.tests.*] +check_untyped_defs=False + +[mypy-pandas.conftest] +ignore_errors=True + +[mypy-pandas.tests.arithmetic.test_datetime64] +ignore_errors=True + +[mypy-pandas.tests.extension.decimal.test_decimal] +ignore_errors=True + +[mypy-pandas.tests.extension.json.array] +ignore_errors=True + +[mypy-pandas.tests.extension.json.test_json] +ignore_errors=True + +[mypy-pandas.tests.indexes.datetimes.test_tools] +ignore_errors=True + +[mypy-pandas.tests.scalar.period.test_period] +ignore_errors=True + +[mypy-pandas._version] +check_untyped_defs=False + +[mypy-pandas.core.arrays.categorical] +check_untyped_defs=False + +[mypy-pandas.core.arrays.interval] +check_untyped_defs=False + +[mypy-pandas.core.arrays.sparse.array] +check_untyped_defs=False + +[mypy-pandas.core.base] +check_untyped_defs=False + +[mypy-pandas.core.computation.expr] +check_untyped_defs=False + +[mypy-pandas.core.computation.expressions] +check_untyped_defs=False + +[mypy-pandas.core.computation.ops] +check_untyped_defs=False + +[mypy-pandas.core.computation.pytables] +check_untyped_defs=False + +[mypy-pandas.core.computation.scope] +check_untyped_defs=False + +[mypy-pandas.core.dtypes.cast] +check_untyped_defs=False + +[mypy-pandas.core.frame] +check_untyped_defs=False + +[mypy-pandas.core.generic] +check_untyped_defs=False + +[mypy-pandas.core.groupby.generic] +check_untyped_defs=False + +[mypy-pandas.core.groupby.grouper] +check_untyped_defs=False + +[mypy-pandas.core.groupby.ops] +check_untyped_defs=False + +[mypy-pandas.core.indexes.base] +check_untyped_defs=False + +[mypy-pandas.core.indexes.datetimelike] +check_untyped_defs=False + +[mypy-pandas.core.indexes.datetimes] +check_untyped_defs=False + +[mypy-pandas.core.indexes.interval] +check_untyped_defs=False + +[mypy-pandas.core.indexes.multi] +check_untyped_defs=False + +[mypy-pandas.core.indexing] +check_untyped_defs=False + +[mypy-pandas.core.internals.blocks] +check_untyped_defs=False + +[mypy-pandas.core.internals.concat] +check_untyped_defs=False + +[mypy-pandas.core.internals.construction] +check_untyped_defs=False + +[mypy-pandas.core.internals.managers] +check_untyped_defs=False + +[mypy-pandas.core.missing] +check_untyped_defs=False + +[mypy-pandas.core.nanops] +check_untyped_defs=False + +[mypy-pandas.core.ops.docstrings] +check_untyped_defs=False + +[mypy-pandas.core.resample] +check_untyped_defs=False + +[mypy-pandas.core.reshape.merge] +check_untyped_defs=False + +[mypy-pandas.core.reshape.reshape] +check_untyped_defs=False + +[mypy-pandas.core.strings] +check_untyped_defs=False + +[mypy-pandas.core.tools.datetimes] +check_untyped_defs=False + +[mypy-pandas.core.window.common] +check_untyped_defs=False + +[mypy-pandas.core.window.ewm] +check_untyped_defs=False + +[mypy-pandas.core.window.expanding] +check_untyped_defs=False + +[mypy-pandas.core.window.rolling] +check_untyped_defs=False + +[mypy-pandas.io.clipboard] +check_untyped_defs=False + +[mypy-pandas.io.excel._base] +check_untyped_defs=False + +[mypy-pandas.io.excel._openpyxl] +check_untyped_defs=False + +[mypy-pandas.io.excel._util] +check_untyped_defs=False + +[mypy-pandas.io.excel._xlwt] +check_untyped_defs=False + +[mypy-pandas.io.formats.console] +check_untyped_defs=False + +[mypy-pandas.io.formats.css] +check_untyped_defs=False + +[mypy-pandas.io.formats.excel] +check_untyped_defs=False + +[mypy-pandas.io.formats.format] +check_untyped_defs=False + +[mypy-pandas.io.formats.style] +check_untyped_defs=False + +[mypy-pandas.io.html] +check_untyped_defs=False + +[mypy-pandas.io.json._json] +check_untyped_defs=False + +[mypy-pandas.io.json._table_schema] +check_untyped_defs=False + +[mypy-pandas.io.parsers] +check_untyped_defs=False + +[mypy-pandas.io.pytables] +check_untyped_defs=False + +[mypy-pandas.io.sas.sas_xport] +check_untyped_defs=False + +[mypy-pandas.io.sas.sas7bdat] +check_untyped_defs=False + +[mypy-pandas.io.sas.sasreader] +check_untyped_defs=False + +[mypy-pandas.io.stata] +check_untyped_defs=False + +[mypy-pandas.plotting._matplotlib.converter] +check_untyped_defs=False + +[mypy-pandas.plotting._matplotlib.core] +check_untyped_defs=False + +[mypy-pandas.plotting._matplotlib.misc] +check_untyped_defs=False + +[mypy-pandas.tseries.holiday] +check_untyped_defs=False + +[mypy-pandas.tseries.offsets] +check_untyped_defs=False + +[mypy-pandas._testing] +check_untyped_defs=False diff --git a/setup.py b/setup.py new file mode 100755 index 00000000..e29f6fcd --- /dev/null +++ b/setup.py @@ -0,0 +1,781 @@ +#!/usr/bin/env python + +""" +Parts of this file were taken from the pyzmq project +(https://github.com/zeromq/pyzmq) which have been permitted for use under the +BSD license. Parts are from lxml (https://github.com/lxml/lxml) +""" + +import argparse +from distutils.sysconfig import get_config_vars +from distutils.version import LooseVersion +import os +from os.path import join as pjoin +import platform +import shutil +import sys + +import pkg_resources +from setuptools import Command, find_packages, setup + +# versioning +import versioneer + +cmdclass = versioneer.get_cmdclass() + + +def is_platform_windows(): + return sys.platform == "win32" or sys.platform == "cygwin" + + +def is_platform_mac(): + return sys.platform == "darwin" + + +min_numpy_ver = "1.13.3" +min_cython_ver = "0.29.13" # note: sync with pyproject.toml + +setuptools_kwargs = { + "install_requires": [ + "python-dateutil >= 2.6.1", + "pytz >= 2017.2", + f"numpy >= {min_numpy_ver}", + ], + "setup_requires": [f"numpy >= {min_numpy_ver}"], + "zip_safe": False, +} + + +try: + import Cython + + _CYTHON_VERSION = Cython.__version__ + from Cython.Build import cythonize + + _CYTHON_INSTALLED = _CYTHON_VERSION >= LooseVersion(min_cython_ver) +except ImportError: + _CYTHON_VERSION = None + _CYTHON_INSTALLED = False + cythonize = lambda x, *args, **kwargs: x # dummy func + +# The import of Extension must be after the import of Cython, otherwise +# we do not get the appropriately patched class. +# See https://cython.readthedocs.io/en/latest/src/reference/compilation.html +from distutils.extension import Extension # noqa: E402 isort:skip +from distutils.command.build import build # noqa: E402 isort:skip + +if _CYTHON_INSTALLED: + from Cython.Distutils.old_build_ext import old_build_ext as _build_ext + + cython = True + from Cython import Tempita as tempita +else: + from distutils.command.build_ext import build_ext as _build_ext + + cython = False + + +_pxi_dep_template = { + "algos": ["_libs/algos_common_helper.pxi.in", "_libs/algos_take_helper.pxi.in"], + "hashtable": [ + "_libs/hashtable_class_helper.pxi.in", + "_libs/hashtable_func_helper.pxi.in", + ], + "index": ["_libs/index_class_helper.pxi.in"], + "sparse": ["_libs/sparse_op_helper.pxi.in"], + "interval": ["_libs/intervaltree.pxi.in"], +} + +_pxifiles = [] +_pxi_dep = {} +for module, files in _pxi_dep_template.items(): + pxi_files = [pjoin("pandas", x) for x in files] + _pxifiles.extend(pxi_files) + _pxi_dep[module] = pxi_files + + +class build_ext(_build_ext): + @classmethod + def render_templates(cls, pxifiles): + for pxifile in pxifiles: + # build pxifiles first, template extension must be .pxi.in + assert pxifile.endswith(".pxi.in") + outfile = pxifile[:-3] + + if ( + os.path.exists(outfile) + and os.stat(pxifile).st_mtime < os.stat(outfile).st_mtime + ): + # if .pxi.in is not updated, no need to output .pxi + continue + + with open(pxifile, "r") as f: + tmpl = f.read() + pyxcontent = tempita.sub(tmpl) + + with open(outfile, "w") as f: + f.write(pyxcontent) + + def build_extensions(self): + # if building from c files, don't need to + # generate template output + if cython: + self.render_templates(_pxifiles) + + super().build_extensions() + + +DESCRIPTION = "Powerful data structures for data analysis, time series, and statistics" +LONG_DESCRIPTION = """ +**pandas** is a Python package providing fast, flexible, and expressive data +structures designed to make working with structured (tabular, multidimensional, +potentially heterogeneous) and time series data both easy and intuitive. It +aims to be the fundamental high-level building block for doing practical, +**real world** data analysis in Python. Additionally, it has the broader goal +of becoming **the most powerful and flexible open source data analysis / +manipulation tool available in any language**. It is already well on its way +toward this goal. + +pandas is well suited for many different kinds of data: + + - Tabular data with heterogeneously-typed columns, as in an SQL table or + Excel spreadsheet + - Ordered and unordered (not necessarily fixed-frequency) time series data. + - Arbitrary matrix data (homogeneously typed or heterogeneous) with row and + column labels + - Any other form of observational / statistical data sets. The data actually + need not be labeled at all to be placed into a pandas data structure + +The two primary data structures of pandas, Series (1-dimensional) and DataFrame +(2-dimensional), handle the vast majority of typical use cases in finance, +statistics, social science, and many areas of engineering. For R users, +DataFrame provides everything that R's ``data.frame`` provides and much +more. pandas is built on top of `NumPy `__ and is +intended to integrate well within a scientific computing environment with many +other 3rd party libraries. + +Here are just a few of the things that pandas does well: + + - Easy handling of **missing data** (represented as NaN) in floating point as + well as non-floating point data + - Size mutability: columns can be **inserted and deleted** from DataFrame and + higher dimensional objects + - Automatic and explicit **data alignment**: objects can be explicitly + aligned to a set of labels, or the user can simply ignore the labels and + let `Series`, `DataFrame`, etc. automatically align the data for you in + computations + - Powerful, flexible **group by** functionality to perform + split-apply-combine operations on data sets, for both aggregating and + transforming data + - Make it **easy to convert** ragged, differently-indexed data in other + Python and NumPy data structures into DataFrame objects + - Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets + - Intuitive **merging** and **joining** data sets + - Flexible **reshaping** and pivoting of data sets + - **Hierarchical** labeling of axes (possible to have multiple labels per + tick) + - Robust IO tools for loading data from **flat files** (CSV and delimited), + Excel files, databases, and saving / loading data from the ultrafast **HDF5 + format** + - **Time series**-specific functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + +Many of these principles are here to address the shortcomings frequently +experienced using other languages / scientific research environments. For data +scientists, working with data is typically divided into multiple stages: +munging and cleaning data, analyzing / modeling it, then organizing the results +of the analysis into a form suitable for plotting or tabular display. pandas is +the ideal tool for all of these tasks. +""" + +DISTNAME = "pandas" +LICENSE = "BSD" +AUTHOR = "The PyData Development Team" +EMAIL = "pydata@googlegroups.com" +URL = "https://pandas.pydata.org" +DOWNLOAD_URL = "" +PROJECT_URLS = { + "Bug Tracker": "https://github.com/pandas-dev/pandas/issues", + "Documentation": "https://pandas.pydata.org/pandas-docs/stable/", + "Source Code": "https://github.com/pandas-dev/pandas", +} +CLASSIFIERS = [ + "Development Status :: 5 - Production/Stable", + "Environment :: Console", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Cython", + "Topic :: Scientific/Engineering", +] + + +class CleanCommand(Command): + """Custom distutils command to clean the .so and .pyc files.""" + + user_options = [("all", "a", "")] + + def initialize_options(self): + self.all = True + self._clean_me = [] + self._clean_trees = [] + + base = pjoin("pandas", "_libs", "src") + tsbase = pjoin("pandas", "_libs", "tslibs", "src") + dt = pjoin(tsbase, "datetime") + util = pjoin("pandas", "util") + parser = pjoin(base, "parser") + ujson_python = pjoin(base, "ujson", "python") + ujson_lib = pjoin(base, "ujson", "lib") + self._clean_exclude = [ + pjoin(dt, "np_datetime.c"), + pjoin(dt, "np_datetime_strings.c"), + pjoin(parser, "tokenizer.c"), + pjoin(parser, "io.c"), + pjoin(ujson_python, "ujson.c"), + pjoin(ujson_python, "objToJSON.c"), + pjoin(ujson_python, "JSONtoObj.c"), + pjoin(ujson_lib, "ultrajsonenc.c"), + pjoin(ujson_lib, "ultrajsondec.c"), + pjoin(util, "move.c"), + ] + + for root, dirs, files in os.walk("pandas"): + for f in files: + filepath = pjoin(root, f) + if filepath in self._clean_exclude: + continue + + if os.path.splitext(f)[-1] in ( + ".pyc", + ".so", + ".o", + ".pyo", + ".pyd", + ".c", + ".cpp", + ".orig", + ): + self._clean_me.append(filepath) + for d in dirs: + if d == "__pycache__": + self._clean_trees.append(pjoin(root, d)) + + # clean the generated pxi files + for pxifile in _pxifiles: + pxifile = pxifile.replace(".pxi.in", ".pxi") + self._clean_me.append(pxifile) + + for d in ("build", "dist"): + if os.path.exists(d): + self._clean_trees.append(d) + + def finalize_options(self): + pass + + def run(self): + for clean_me in self._clean_me: + try: + os.unlink(clean_me) + except OSError: + pass + for clean_tree in self._clean_trees: + try: + shutil.rmtree(clean_tree) + except OSError: + pass + + +# we need to inherit from the versioneer +# class as it encodes the version info +sdist_class = cmdclass["sdist"] + + +class CheckSDist(sdist_class): + """Custom sdist that ensures Cython has compiled all pyx files to c.""" + + _pyxfiles = [ + "pandas/_libs/lib.pyx", + "pandas/_libs/hashtable.pyx", + "pandas/_libs/tslib.pyx", + "pandas/_libs/index.pyx", + "pandas/_libs/internals.pyx", + "pandas/_libs/algos.pyx", + "pandas/_libs/join.pyx", + "pandas/_libs/indexing.pyx", + "pandas/_libs/interval.pyx", + "pandas/_libs/hashing.pyx", + "pandas/_libs/missing.pyx", + "pandas/_libs/reduction.pyx", + "pandas/_libs/testing.pyx", + "pandas/_libs/sparse.pyx", + "pandas/_libs/ops.pyx", + "pandas/_libs/parsers.pyx", + "pandas/_libs/tslibs/c_timestamp.pyx", + "pandas/_libs/tslibs/ccalendar.pyx", + "pandas/_libs/tslibs/period.pyx", + "pandas/_libs/tslibs/strptime.pyx", + "pandas/_libs/tslibs/np_datetime.pyx", + "pandas/_libs/tslibs/timedeltas.pyx", + "pandas/_libs/tslibs/timestamps.pyx", + "pandas/_libs/tslibs/timezones.pyx", + "pandas/_libs/tslibs/conversion.pyx", + "pandas/_libs/tslibs/fields.pyx", + "pandas/_libs/tslibs/offsets.pyx", + "pandas/_libs/tslibs/frequencies.pyx", + "pandas/_libs/tslibs/resolution.pyx", + "pandas/_libs/tslibs/parsing.pyx", + "pandas/_libs/tslibs/tzconversion.pyx", + "pandas/_libs/window/indexers.pyx", + "pandas/_libs/writers.pyx", + "pandas/io/sas/sas.pyx", + ] + + _cpp_pyxfiles = [ + "pandas/_libs/window/aggregations.pyx", + ] + + def initialize_options(self): + sdist_class.initialize_options(self) + + def run(self): + if "cython" in cmdclass: + self.run_command("cython") + else: + # If we are not running cython then + # compile the extensions correctly + pyx_files = [(self._pyxfiles, "c"), (self._cpp_pyxfiles, "cpp")] + + for pyxfiles, extension in pyx_files: + for pyxfile in pyxfiles: + sourcefile = pyxfile[:-3] + extension + msg = ( + f"{extension}-source file '{sourcefile}' not found.\n" + f"Run 'setup.py cython' before sdist." + ) + assert os.path.isfile(sourcefile), msg + sdist_class.run(self) + + +class CheckingBuildExt(build_ext): + """ + Subclass build_ext to get clearer report if Cython is necessary. + """ + + def check_cython_extensions(self, extensions): + for ext in extensions: + for src in ext.sources: + if not os.path.exists(src): + print(f"{ext.name}: -> [{ext.sources}]") + raise Exception( + f"""Cython-generated file '{src}' not found. + Cython is required to compile pandas from a development branch. + Please install Cython or download a release package of pandas. + """ + ) + + def build_extensions(self): + self.check_cython_extensions(self.extensions) + build_ext.build_extensions(self) + + +class CythonCommand(build_ext): + """ + Custom distutils command subclassed from Cython.Distutils.build_ext + to compile pyx->c, and stop there. All this does is override the + C-compile method build_extension() with a no-op. + """ + + def build_extension(self, ext): + pass + + +class DummyBuildSrc(Command): + """ numpy's build_src command interferes with Cython's build_ext. + """ + + user_options = [] + + def initialize_options(self): + self.py_modules_dict = {} + + def finalize_options(self): + pass + + def run(self): + pass + + +cmdclass.update({"clean": CleanCommand, "build": build}) + +if cython: + suffix = ".pyx" + cmdclass["build_ext"] = CheckingBuildExt + cmdclass["cython"] = CythonCommand +else: + suffix = ".c" + cmdclass["build_src"] = DummyBuildSrc + cmdclass["build_ext"] = CheckingBuildExt + +# ---------------------------------------------------------------------- +# Preparation of compiler arguments + +debugging_symbols_requested = "--with-debugging-symbols" in sys.argv +if debugging_symbols_requested: + sys.argv.remove("--with-debugging-symbols") + + +if sys.byteorder == "big": + endian_macro = [("__BIG_ENDIAN__", "1")] +else: + endian_macro = [("__LITTLE_ENDIAN__", "1")] + + +if is_platform_windows(): + extra_compile_args = [] + extra_link_args = [] + if debugging_symbols_requested: + extra_compile_args.append("/Z7") + extra_link_args.append("/DEBUG") +else: + # args to ignore warnings + extra_compile_args = [] + extra_link_args = [] + if debugging_symbols_requested: + extra_compile_args.append("-g") + +# Build for at least macOS 10.9 when compiling on a 10.9 system or above, +# overriding CPython distuitls behaviour which is to target the version that +# python was built for. This may be overridden by setting +# MACOSX_DEPLOYMENT_TARGET before calling setup.py +if is_platform_mac(): + if "MACOSX_DEPLOYMENT_TARGET" not in os.environ: + current_system = platform.mac_ver()[0] + python_target = get_config_vars().get( + "MACOSX_DEPLOYMENT_TARGET", current_system + ) + if ( + LooseVersion(python_target) < "10.9" + and LooseVersion(current_system) >= "10.9" + ): + os.environ["MACOSX_DEPLOYMENT_TARGET"] = "10.9" + +# enable coverage by building cython files by setting the environment variable +# "PANDAS_CYTHON_COVERAGE" (with a Truthy value) or by running build_ext +# with `--with-cython-coverage`enabled +linetrace = os.environ.get("PANDAS_CYTHON_COVERAGE", False) +if "--with-cython-coverage" in sys.argv: + linetrace = True + sys.argv.remove("--with-cython-coverage") + +# Note: if not using `cythonize`, coverage can be enabled by +# pinning `ext.cython_directives = directives` to each ext in extensions. +# github.com/cython/cython/wiki/enhancements-compilerdirectives#in-setuppy +directives = {"linetrace": False, "language_level": 3} +macros = [] +if linetrace: + # https://pypkg.com/pypi/pytest-cython/f/tests/example-project/setup.py + directives["linetrace"] = True + macros = [("CYTHON_TRACE", "1"), ("CYTHON_TRACE_NOGIL", "1")] + +# in numpy>=1.16.0, silence build warnings about deprecated API usage +# we can't do anything about these warnings because they stem from +# cython+numpy version mismatches. +macros.append(("NPY_NO_DEPRECATED_API", "0")) + + +# ---------------------------------------------------------------------- +# Specification of Dependencies + +# TODO: Need to check to see if e.g. `linetrace` has changed and possibly +# re-compile. +def maybe_cythonize(extensions, *args, **kwargs): + """ + Render tempita templates before calling cythonize. This is skipped for + + * clean + * sdist + """ + if "clean" in sys.argv or "sdist" in sys.argv: + # See https://github.com/cython/cython/issues/1495 + return extensions + + elif not cython: + # GH#28836 raise a helfpul error message + if _CYTHON_VERSION: + raise RuntimeError( + f"Cannot cythonize with old Cython version ({_CYTHON_VERSION} " + f"installed, needs {min_cython_ver})" + ) + raise RuntimeError("Cannot cythonize without Cython installed.") + + numpy_incl = pkg_resources.resource_filename("numpy", "core/include") + # TODO: Is this really necessary here? + for ext in extensions: + if hasattr(ext, "include_dirs") and numpy_incl not in ext.include_dirs: + ext.include_dirs.append(numpy_incl) + + # reuse any parallel arguments provided for compilation to cythonize + parser = argparse.ArgumentParser() + parser.add_argument("-j", type=int) + parser.add_argument("--parallel", type=int) + parsed, _ = parser.parse_known_args() + + nthreads = 0 + if parsed.parallel: + nthreads = parsed.parallel + elif parsed.j: + nthreads = parsed.j + + # GH#30356 Cythonize doesn't support parallel on Windows + if is_platform_windows() and nthreads > 0: + print("Parallel build for cythonize ignored on Windows") + nthreads = 0 + + kwargs["nthreads"] = nthreads + build_ext.render_templates(_pxifiles) + return cythonize(extensions, *args, **kwargs) + + +def srcpath(name=None, suffix=".pyx", subdir="src"): + return pjoin("pandas", subdir, name + suffix) + + +lib_depends = ["pandas/_libs/src/parse_helper.h"] + +klib_include = ["pandas/_libs/src/klib"] + +tseries_depends = [ + "pandas/_libs/tslibs/src/datetime/np_datetime.h", + "pandas/_libs/tslibs/src/datetime/np_datetime_strings.h", +] + +ext_data = { + "_libs.algos": { + "pyxfile": "_libs/algos", + "include": klib_include, + "depends": _pxi_dep["algos"], + }, + "_libs.groupby": {"pyxfile": "_libs/groupby"}, + "_libs.hashing": {"pyxfile": "_libs/hashing", "depends": []}, + "_libs.hashtable": { + "pyxfile": "_libs/hashtable", + "include": klib_include, + "depends": (["pandas/_libs/src/klib/khash_python.h"] + _pxi_dep["hashtable"]), + }, + "_libs.index": { + "pyxfile": "_libs/index", + "include": klib_include, + "depends": _pxi_dep["index"], + }, + "_libs.indexing": {"pyxfile": "_libs/indexing"}, + "_libs.internals": {"pyxfile": "_libs/internals"}, + "_libs.interval": { + "pyxfile": "_libs/interval", + "include": klib_include, + "depends": _pxi_dep["interval"], + }, + "_libs.join": {"pyxfile": "_libs/join", "include": klib_include}, + "_libs.lib": { + "pyxfile": "_libs/lib", + "depends": lib_depends + tseries_depends, + "include": klib_include, # due to tokenizer import + "sources": ["pandas/_libs/src/parser/tokenizer.c"], + }, + "_libs.missing": {"pyxfile": "_libs/missing", "depends": tseries_depends}, + "_libs.parsers": { + "pyxfile": "_libs/parsers", + "include": klib_include + ["pandas/_libs/src"], + "depends": [ + "pandas/_libs/src/parser/tokenizer.h", + "pandas/_libs/src/parser/io.h", + ], + "sources": [ + "pandas/_libs/src/parser/tokenizer.c", + "pandas/_libs/src/parser/io.c", + ], + }, + "_libs.reduction": {"pyxfile": "_libs/reduction"}, + "_libs.ops": {"pyxfile": "_libs/ops"}, + "_libs.ops_dispatch": {"pyxfile": "_libs/ops_dispatch"}, + "_libs.properties": {"pyxfile": "_libs/properties"}, + "_libs.reshape": {"pyxfile": "_libs/reshape", "depends": []}, + "_libs.sparse": {"pyxfile": "_libs/sparse", "depends": _pxi_dep["sparse"]}, + "_libs.tslib": {"pyxfile": "_libs/tslib", "depends": tseries_depends}, + "_libs.tslibs.c_timestamp": { + "pyxfile": "_libs/tslibs/c_timestamp", + "depends": tseries_depends, + }, + "_libs.tslibs.ccalendar": {"pyxfile": "_libs/tslibs/ccalendar"}, + "_libs.tslibs.conversion": { + "pyxfile": "_libs/tslibs/conversion", + "depends": tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], + }, + "_libs.tslibs.fields": { + "pyxfile": "_libs/tslibs/fields", + "depends": tseries_depends, + }, + "_libs.tslibs.frequencies": {"pyxfile": "_libs/tslibs/frequencies"}, + "_libs.tslibs.nattype": {"pyxfile": "_libs/tslibs/nattype"}, + "_libs.tslibs.np_datetime": { + "pyxfile": "_libs/tslibs/np_datetime", + "depends": tseries_depends, + "sources": [ + "pandas/_libs/tslibs/src/datetime/np_datetime.c", + "pandas/_libs/tslibs/src/datetime/np_datetime_strings.c", + ], + }, + "_libs.tslibs.offsets": { + "pyxfile": "_libs/tslibs/offsets", + "depends": tseries_depends, + }, + "_libs.tslibs.parsing": { + "pyxfile": "_libs/tslibs/parsing", + "include": klib_include, + "depends": ["pandas/_libs/src/parser/tokenizer.h"], + "sources": ["pandas/_libs/src/parser/tokenizer.c"], + }, + "_libs.tslibs.period": { + "pyxfile": "_libs/tslibs/period", + "depends": tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], + }, + "_libs.tslibs.resolution": { + "pyxfile": "_libs/tslibs/resolution", + "depends": tseries_depends, + }, + "_libs.tslibs.strptime": { + "pyxfile": "_libs/tslibs/strptime", + "depends": tseries_depends, + }, + "_libs.tslibs.timedeltas": { + "pyxfile": "_libs/tslibs/timedeltas", + "depends": tseries_depends, + }, + "_libs.tslibs.timestamps": { + "pyxfile": "_libs/tslibs/timestamps", + "depends": tseries_depends, + }, + "_libs.tslibs.timezones": {"pyxfile": "_libs/tslibs/timezones"}, + "_libs.tslibs.tzconversion": { + "pyxfile": "_libs/tslibs/tzconversion", + "depends": tseries_depends, + }, + "_libs.testing": {"pyxfile": "_libs/testing"}, + "_libs.window.aggregations": { + "pyxfile": "_libs/window/aggregations", + "language": "c++", + "suffix": ".cpp", + "depends": ["pandas/_libs/src/skiplist.h"], + }, + "_libs.window.indexers": {"pyxfile": "_libs/window/indexers"}, + "_libs.writers": {"pyxfile": "_libs/writers"}, + "io.sas._sas": {"pyxfile": "io/sas/sas"}, +} + +extensions = [] + +for name, data in ext_data.items(): + source_suffix = suffix if suffix == ".pyx" else data.get("suffix", ".c") + + sources = [srcpath(data["pyxfile"], suffix=source_suffix, subdir="")] + + sources.extend(data.get("sources", [])) + + include = data.get("include") + + obj = Extension( + f"pandas.{name}", + sources=sources, + depends=data.get("depends", []), + include_dirs=include, + language=data.get("language", "c"), + define_macros=data.get("macros", macros), + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, + ) + + extensions.append(obj) + +# ---------------------------------------------------------------------- +# ujson + +if suffix == ".pyx": + # undo dumb setuptools bug clobbering .pyx sources back to .c + for ext in extensions: + if ext.sources[0].endswith((".c", ".cpp")): + root, _ = os.path.splitext(ext.sources[0]) + ext.sources[0] = root + suffix + +ujson_ext = Extension( + "pandas._libs.json", + depends=["pandas/_libs/src/ujson/lib/ultrajson.h"], + sources=( + [ + "pandas/_libs/src/ujson/python/ujson.c", + "pandas/_libs/src/ujson/python/objToJSON.c", + "pandas/_libs/src/ujson/python/JSONtoObj.c", + "pandas/_libs/src/ujson/lib/ultrajsonenc.c", + "pandas/_libs/src/ujson/lib/ultrajsondec.c", + ] + + [ + "pandas/_libs/tslibs/src/datetime/np_datetime.c", + "pandas/_libs/tslibs/src/datetime/np_datetime_strings.c", + ] + ), + include_dirs=[ + "pandas/_libs/src/ujson/python", + "pandas/_libs/src/ujson/lib", + "pandas/_libs/src/datetime", + ], + extra_compile_args=(["-D_GNU_SOURCE"] + extra_compile_args), + extra_link_args=extra_link_args, + define_macros=macros, +) + + +extensions.append(ujson_ext) + +# ---------------------------------------------------------------------- + + +# The build cache system does string matching below this point. +# if you change something, be careful. + +setup( + name=DISTNAME, + maintainer=AUTHOR, + version=versioneer.get_version(), + packages=find_packages(include=["pandas", "pandas.*"]), + package_data={"": ["templates/*", "_libs/**/*.dll"]}, + ext_modules=maybe_cythonize(extensions, compiler_directives=directives), + maintainer_email=EMAIL, + description=DESCRIPTION, + license=LICENSE, + cmdclass=cmdclass, + url=URL, + download_url=DOWNLOAD_URL, + project_urls=PROJECT_URLS, + long_description=LONG_DESCRIPTION, + classifiers=CLASSIFIERS, + platforms="any", + python_requires=">=3.6.1", + extras_require={ + "test": [ + # sync with setup.cfg minversion & install.rst + "pytest>=4.0.2", + "pytest-xdist", + "hypothesis>=3.58", + ] + }, + entry_points={ + "pandas_plotting_backends": ["matplotlib = pandas:plotting._matplotlib"] + }, + **setuptools_kwargs, +) diff --git a/test.bat b/test.bat new file mode 100644 index 00000000..e07c84f2 --- /dev/null +++ b/test.bat @@ -0,0 +1,3 @@ +:: test on windows + +pytest --skip-slow --skip-network pandas -n 2 -r sxX --strict %* diff --git a/test.sh b/test.sh new file mode 100755 index 00000000..1255a398 --- /dev/null +++ b/test.sh @@ -0,0 +1,4 @@ +#!/bin/sh +command -v coverage >/dev/null && coverage erase +command -v python-coverage >/dev/null && python-coverage erase +pytest pandas --cov=pandas -r sxX --strict diff --git a/test_fast.bat b/test_fast.bat new file mode 100644 index 00000000..f2c4e9fa --- /dev/null +++ b/test_fast.bat @@ -0,0 +1,3 @@ +:: test on windows +set PYTHONHASHSEED=314159265 +pytest --skip-slow --skip-network --skip-db -m "not single" -n 4 -r sXX --strict pandas diff --git a/test_fast.sh b/test_fast.sh new file mode 100755 index 00000000..0a47f9de --- /dev/null +++ b/test_fast.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# Workaround for pytest-xdist flaky collection order +# https://github.com/pytest-dev/pytest/issues/920 +# https://github.com/pytest-dev/pytest/issues/1075 +export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))') + +pytest pandas --skip-slow --skip-network --skip-db -m "not single" -n 4 -r sxX --strict "$@" diff --git a/test_rebuild.sh b/test_rebuild.sh new file mode 100755 index 00000000..65aa1098 --- /dev/null +++ b/test_rebuild.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +python setup.py clean +python setup.py build_ext --inplace +coverage erase +pytest pandas --cov=pandas diff --git a/versioneer.py b/versioneer.py new file mode 100644 index 00000000..8a4710da --- /dev/null +++ b/versioneer.py @@ -0,0 +1,1748 @@ +# Version: 0.15 + +""" +The Versioneer +============== + +* like a rocketeer, but for versions! +* https://github.com/warner/python-versioneer +* Brian Warner +* License: Public Domain +* [![Latest Version] +(https://pypip.in/version/versioneer/badge.svg?style=flat) +](https://pypi.org/project/versioneer/) +* [![Build Status] +(https://travis-ci.org/warner/python-versioneer.png?branch=master) +](https://travis-ci.org/warner/python-versioneer) + +This is a tool for managing a recorded version number in distutils-based +python projects. The goal is to remove the tedious and error-prone "update +the embedded version string" step from your release process. Making a new +release should be as easy as recording a new tag in your version-control +system, and maybe making new tarballs. + + +## Quick Install + +* `pip install versioneer` to somewhere to your $PATH +* add a `[versioneer]` section to your setup.cfg (see below) +* run `versioneer install` in your source tree, commit the results + +## Version Identifiers + +Source trees come from a variety of places: + +* a version-control system checkout (mostly used by developers) +* a nightly tarball, produced by build automation +* a snapshot tarball, produced by a web-based VCS browser, like github's + "tarball from tag" feature +* a release tarball, produced by "setup.py sdist", distributed through PyPI + +Within each source tree, the version identifier (either a string or a number, +this tool is format-agnostic) can come from a variety of places: + +* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows + about recent "tags" and an absolute revision-id +* the name of the directory into which the tarball was unpacked +* an expanded VCS keyword ($Id$, etc) +* a `_version.py` created by some earlier build step + +For released software, the version identifier is closely related to a VCS +tag. Some projects use tag names that include more than just the version +string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool +needs to strip the tag prefix to extract the version identifier. For +unreleased software (between tags), the version identifier should provide +enough information to help developers recreate the same tree, while also +giving them an idea of roughly how old the tree is (after version 1.2, before +version 1.3). Many VCS systems can report a description that captures this, +for example `git describe --tags --dirty --always` reports things like +"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the +0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has +uncommitted changes. + +The version identifier is used for multiple purposes: + +* to allow the module to self-identify its version: `myproject.__version__` +* to choose a name and prefix for a 'setup.py sdist' tarball + +## Theory of Operation + +Versioneer works by adding a special `_version.py` file into your source +tree, where your `__init__.py` can import it. This `_version.py` knows how to +dynamically ask the VCS tool for version information at import time. + +`_version.py` also contains `$Revision$` markers, and the installation +process marks `_version.py` to have this marker rewritten with a tag name +during the `git archive` command. As a result, generated tarballs will +contain enough information to get the proper version. + +To allow `setup.py` to compute a version too, a `versioneer.py` is added to +the top level of your source tree, next to `setup.py` and the `setup.cfg` +that configures it. This overrides several distutils/setuptools commands to +compute the version when invoked, and changes `setup.py build` and `setup.py +sdist` to replace `_version.py` with a small static file that contains just +the generated version data. + +## Installation + +First, decide on values for the following configuration variables: + +* `VCS`: the version control system you use. Currently accepts "git". + +* `style`: the style of version string to be produced. See "Styles" below for + details. Defaults to "pep440", which looks like + `TAG[+DISTANCE.gSHORTHASH[.dirty]]`. + +* `versionfile_source`: + + A project-relative pathname into which the generated version strings should + be written. This is usually a `_version.py` next to your project's main + `__init__.py` file, so it can be imported at runtime. If your project uses + `src/myproject/__init__.py`, this should be `src/myproject/_version.py`. + This file should be checked in to your VCS as usual: the copy created below + by `setup.py setup_versioneer` will include code that parses expanded VCS + keywords in generated tarballs. The 'build' and 'sdist' commands will + replace it with a copy that has just the calculated version string. + + This must be set even if your project does not have any modules (and will + therefore never import `_version.py`), since "setup.py sdist" -based trees + still need somewhere to record the pre-calculated version strings. Anywhere + in the source tree should do. If there is a `__init__.py` next to your + `_version.py`, the `setup.py setup_versioneer` command (described below) + will append some `__version__`-setting assignments, if they aren't already + present. + +* `versionfile_build`: + + Like `versionfile_source`, but relative to the build directory instead of + the source directory. These will differ when your setup.py uses + 'package_dir='. If you have `package_dir={'myproject': 'src/myproject'}`, + then you will probably have `versionfile_build='myproject/_version.py'` and + `versionfile_source='src/myproject/_version.py'`. + + If this is set to None, then `setup.py build` will not attempt to rewrite + any `_version.py` in the built tree. If your project does not have any + libraries (e.g. if it only builds a script), then you should use + `versionfile_build = None` and override `distutils.command.build_scripts` + to explicitly insert a copy of `versioneer.get_version()` into your + generated script. + +* `tag_prefix`: + + a string, like 'PROJECTNAME-', which appears at the start of all VCS tags. + If your tags look like 'myproject-1.2.0', then you should use + tag_prefix='myproject-'. If you use unprefixed tags like '1.2.0', this + should be an empty string. + +* `parentdir_prefix`: + + a optional string, frequently the same as tag_prefix, which appears at the + start of all unpacked tarball filenames. If your tarball unpacks into + 'myproject-1.2.0', this should be 'myproject-'. To disable this feature, + just omit the field from your `setup.cfg`. + +This tool provides one script, named `versioneer`. That script has one mode, +"install", which writes a copy of `versioneer.py` into the current directory +and runs `versioneer.py setup` to finish the installation. + +To versioneer-enable your project: + +* 1: Modify your `setup.cfg`, adding a section named `[versioneer]` and + populating it with the configuration values you decided earlier (note that + the option names are not case-sensitive): + + ```` + [versioneer] + VCS = git + style = pep440 + versionfile_source = src/myproject/_version.py + versionfile_build = myproject/_version.py + tag_prefix = "" + parentdir_prefix = myproject- + ```` + +* 2: Run `versioneer install`. This will do the following: + + * copy `versioneer.py` into the top of your source tree + * create `_version.py` in the right place (`versionfile_source`) + * modify your `__init__.py` (if one exists next to `_version.py`) to define + `__version__` (by calling a function from `_version.py`) + * modify your `MANIFEST.in` to include both `versioneer.py` and the + generated `_version.py` in sdist tarballs + + `versioneer install` will complain about any problems it finds with your + `setup.py` or `setup.cfg`. Run it multiple times until you have fixed all + the problems. + +* 3: add a `import versioneer` to your setup.py, and add the following + arguments to the setup() call: + + version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), + +* 4: commit these changes to your VCS. To make sure you won't forget, + `versioneer install` will mark everything it touched for addition using + `git add`. Don't forget to add `setup.py` and `setup.cfg` too. + +## Post-Installation Usage + +Once established, all uses of your tree from a VCS checkout should get the +current version string. All generated tarballs should include an embedded +version string (so users who unpack them will not need a VCS tool installed). + +If you distribute your project through PyPI, then the release process should +boil down to two steps: + +* 1: git tag 1.0 +* 2: python setup.py register sdist upload + +If you distribute it through github (i.e. users use github to generate +tarballs with `git archive`), the process is: + +* 1: git tag 1.0 +* 2: git push; git push --tags + +Versioneer will report "0+untagged.NUMCOMMITS.gHASH" until your tree has at +least one tag in its history. + +## Version-String Flavors + +Code which uses Versioneer can learn about its version string at runtime by +importing `_version` from your main `__init__.py` file and running the +`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can +import the top-level `versioneer.py` and run `get_versions()`. + +Both functions return a dictionary with different flavors of version +information: + +* `['version']`: A condensed version string, rendered using the selected + style. This is the most commonly used value for the project's version + string. The default "pep440" style yields strings like `0.11`, + `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section + below for alternative styles. + +* `['full-revisionid']`: detailed revision identifier. For Git, this is the + full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". + +* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that + this is only accurate if run in a VCS checkout, otherwise it is likely to + be False or None + +* `['error']`: if the version string could not be computed, this will be set + to a string describing the problem, otherwise it will be None. It may be + useful to throw an exception in setup.py if this is set, to avoid e.g. + creating tarballs with a version string of "unknown". + +Some variants are more useful than others. Including `full-revisionid` in a +bug report should allow developers to reconstruct the exact code being tested +(or indicate the presence of local changes that should be shared with the +developers). `version` is suitable for display in an "about" box or a CLI +`--version` output: it can be easily compared against release notes and lists +of bugs fixed in various releases. + +The installer adds the following text to your `__init__.py` to place a basic +version in `YOURPROJECT.__version__`: + + from ._version import get_versions + __version__ = get_versions()['version'] + del get_versions + +## Styles + +The setup.cfg `style=` configuration controls how the VCS information is +rendered into a version string. + +The default style, "pep440", produces a PEP440-compliant string, equal to the +un-prefixed tag name for actual releases, and containing an additional "local +version" section with more detail for in-between builds. For Git, this is +TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags +--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the +tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and +that this commit is two revisions ("+2") beyond the "0.11" tag. For released +software (exactly equal to a known tag), the identifier will only contain the +stripped tag, e.g. "0.11". + +Other styles are available. See details.md in the Versioneer source tree for +descriptions. + +## Debugging + +Versioneer tries to avoid fatal errors: if something goes wrong, it will tend +to return a version of "0+unknown". To investigate the problem, run `setup.py +version`, which will run the version-lookup code in a verbose mode, and will +display the full contents of `get_versions()` (including the `error` string, +which may help identify what went wrong). + +## Updating Versioneer + +To upgrade your project to a new release of Versioneer, do the following: + +* install the new Versioneer (`pip install -U versioneer` or equivalent) +* edit `setup.cfg`, if necessary, to include any new configuration settings + indicated by the release notes +* re-run `versioneer install` in your source tree, to replace + `SRC/_version.py` +* commit any changed files + +### Upgrading to 0.15 + +Starting with this version, Versioneer is configured with a `[versioneer]` +section in your `setup.cfg` file. Earlier versions required the `setup.py` to +set attributes on the `versioneer` module immediately after import. The new +version will refuse to run (raising an exception during import) until you +have provided the necessary `setup.cfg` section. + +In addition, the Versioneer package provides an executable named +`versioneer`, and the installation process is driven by running `versioneer +install`. In 0.14 and earlier, the executable was named +`versioneer-installer` and was run without an argument. + +### Upgrading to 0.14 + +0.14 changes the format of the version string. 0.13 and earlier used +hyphen-separated strings like "0.11-2-g1076c97-dirty". 0.14 and beyond use a +plus-separated "local version" section strings, with dot-separated +components, like "0.11+2.g1076c97". PEP440-strict tools did not like the old +format, but should be ok with the new one. + +### Upgrading from 0.11 to 0.12 + +Nothing special. + +### Upgrading from 0.10 to 0.11 + +You must add a `versioneer.VCS = "git"` to your `setup.py` before re-running +`setup.py setup_versioneer`. This will enable the use of additional +version-control systems (SVN, etc) in the future. + +## Future Directions + +This tool is designed to make it easily extended to other version-control +systems: all VCS-specific components are in separate directories like +src/git/ . The top-level `versioneer.py` script is assembled from these +components by running make-versioneer.py . In the future, make-versioneer.py +will take a VCS name as an argument, and will construct a version of +`versioneer.py` that is specific to the given VCS. It might also take the +configuration arguments that are currently provided manually during +installation by editing setup.py . Alternatively, it might go the other +direction and include code from all supported VCS systems, reducing the +number of intermediate scripts. + + +## License + +To make Versioneer easier to embed, all its code is hereby released into the +public domain. The `_version.py` that it creates is also in the public +domain. + +""" + +try: + import configparser +except ImportError: + import ConfigParser as configparser +import errno +import json +import os +import re +import subprocess +import sys + + +class VersioneerConfig(object): + pass + + +def get_root(): + # we require that all commands are run from the project root, i.e. the + # directory that contains setup.py, setup.cfg, and versioneer.py . + root = os.path.realpath(os.path.abspath(os.getcwd())) + setup_py = os.path.join(root, "setup.py") + versioneer_py = os.path.join(root, "versioneer.py") + if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + # allow 'python path/to/setup.py COMMAND' + root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) + setup_py = os.path.join(root, "setup.py") + versioneer_py = os.path.join(root, "versioneer.py") + if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): + err = ( + "Versioneer was unable to run the project root directory. " + "Versioneer requires setup.py to be executed from " + "its immediate directory (like 'python setup.py COMMAND'), " + "or in a way that lets it use sys.argv[0] to find the root " + "(like 'python path/to/setup.py COMMAND')." + ) + raise VersioneerBadRootError(err) + try: + # Certain runtime workflows (setup.py install/develop in a setuptools + # tree) execute all dependencies in a single python process, so + # "versioneer" may be imported multiple times, and python's shared + # module-import table will cache the first one. So we can't use + # os.path.dirname(__file__), as that will find whichever + # versioneer.py was first imported, even in later projects. + me = os.path.realpath(os.path.abspath(__file__)) + if os.path.splitext(me)[0] != os.path.splitext(versioneer_py)[0]: + print( + "Warning: build in %s is using versioneer.py from %s" + % (os.path.dirname(me), versioneer_py) + ) + except NameError: + pass + return root + + +def get_config_from_root(root): + # This might raise EnvironmentError (if setup.cfg is missing), or + # configparser.NoSectionError (if it lacks a [versioneer] section), or + # configparser.NoOptionError (if it lacks "VCS="). See the docstring at + # the top of versioneer.py for instructions on writing your setup.cfg . + setup_cfg = os.path.join(root, "setup.cfg") + parser = configparser.SafeConfigParser() + with open(setup_cfg, "r") as f: + parser.readfp(f) + VCS = parser.get("versioneer", "VCS") # mandatory + + def get(parser, name): + if parser.has_option("versioneer", name): + return parser.get("versioneer", name) + return None + + cfg = VersioneerConfig() + cfg.VCS = VCS + cfg.style = get(parser, "style") or "" + cfg.versionfile_source = get(parser, "versionfile_source") + cfg.versionfile_build = get(parser, "versionfile_build") + cfg.tag_prefix = get(parser, "tag_prefix") + cfg.parentdir_prefix = get(parser, "parentdir_prefix") + cfg.verbose = get(parser, "verbose") + return cfg + + +class NotThisMethod(Exception): + pass + + +# these dictionaries contain VCS-specific tools +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + def decorate(f): + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen( + [c] + args, + cwd=cwd, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None + else: + if verbose: + print("unable to find command, tried %s" % (commands,)) + return None + + stdout = p.communicate()[0].strip().decode() + + if p.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + return None + return stdout + + +LONG_VERSION_PY[ + "git" +] = r""" +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.15 (https://github.com/warner/python-versioneer) + +import errno +import os +import re +import subprocess +import sys + + +def get_keywords(): + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" + git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" + keywords = {"refnames": git_refnames, "full": git_full} + return keywords + + +class VersioneerConfig: + pass + + +def get_config(): + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "%(STYLE)s" + cfg.tag_prefix = "%(TAG_PREFIX)s" + cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" + cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + pass + + +LONG_VERSION_PY = {} +HANDLERS = {} + + +def register_vcs_handler(vcs, method): # decorator + def decorate(f): + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): + assert isinstance(commands, list) + p = None + for c in commands: + try: + dispcmd = str([c] + args) + # remember shell=False, so use git.cmd on windows, not just git + p = subprocess.Popen([c] + args, cwd=cwd, stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr + else None)) + break + except EnvironmentError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %%s" %% dispcmd) + print(e) + return None + else: + if verbose: + print("unable to find command, tried %%s" %% (commands,)) + return None + + stdout = p.communicate()[0].strip().decode() + + if p.returncode != 0: + if verbose: + print("unable to run %%s (error)" %% dispcmd) + return None + return stdout + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + # Source tarballs conventionally unpack into a directory that includes + # both the project name and a version string. + dirname = os.path.basename(root) + if not dirname.startswith(parentdir_prefix): + if verbose: + print("guessing rootdir is '%%s', but '%%s' doesn't start with " + "prefix '%%s'" %% (root, dirname, parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + return {"version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, "error": None} + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + if not keywords: + raise NotThisMethod("no keywords at all, weird") + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %%d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r'\d', r)} + if verbose: + print("discarding '%%s', no digits" %% ",".join(refs-tags)) + if verbose: + print("likely tags: %%s" %% ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + if verbose: + print("picking %%s" %% r) + return {"version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": None + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return {"version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, "error": "no suitable tags"} + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + # this runs 'git' from the root of the source tree. This only gets called + # if the git-archive 'subst' keywords were *not* expanded, and + # _version.py hasn't already been rewritten with a short version string, + # meaning we're inside a checked out source tree. + + if not os.path.exists(os.path.join(root, ".git")): + if verbose: + print("no .git in %%s" %% root) + raise NotThisMethod("no .git directory") + + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + # if there is a tag, this yields TAG-NUM-gHEX[-dirty] + # if there are no tags, this yields HEX[-dirty] (no NUM) + describe_out = run_command(GITS, ["describe", "--tags", "--dirty", + "--always", "--long"], + cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%%s'" + %% describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%%s' doesn't start with prefix '%%s'" + print(fmt %% (full_tag, tag_prefix)) + pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" + %% (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], + cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + return pieces + + +def plus_or_dot(pieces): + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + # now build up version string, with post-release "local version + # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + # exceptions: + # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], + pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + # TAG[.post.devDISTANCE] . No -dirty + + # exceptions: + # 1: no tags. 0.post.devDISTANCE + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%%d" %% pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%%d" %% pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that + # .dev0 sorts backwards (a dirty tree will appear "older" than the + # corresponding clean one), but you shouldn't be releasing software with + # -dirty anyways. + + # exceptions: + # 1: no tags. 0.postDISTANCE[.dev0] + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%%s" %% pieces["short"] + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%%s" %% pieces["short"] + return rendered + + +def render_pep440_old(pieces): + # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. + + # exceptions: + # 1: no tags. 0.postDISTANCE[.dev0] + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%%d" %% pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty + # --always' + + # exceptions: + # 1: no tags. HEX[-dirty] (note: no 'g' prefix) + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty + # --always -long'. The distance/hash is unconditional. + + # exceptions: + # 1: no tags. HEX[-dirty] (note: no 'g' prefix) + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + if pieces["error"]: + return {"version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"]} + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%%s'" %% style) + + return {"version": rendered, "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], "error": None} + + +def get_versions(): + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, + verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for i in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree"} + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return {"version": "0+unknown", "full-revisionid": None, + "dirty": None, + "error": "unable to compute version"} +""" + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + f = open(versionfile_abs, "r") + for line in f.readlines(): + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + f.close() + except EnvironmentError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + if not keywords: + raise NotThisMethod("no keywords at all, weird") + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r"\d", r)} + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix) :] + if verbose: + print("picking %s" % r) + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): + # this runs 'git' from the root of the source tree. This only gets called + # if the git-archive 'subst' keywords were *not* expanded, and + # _version.py hasn't already been rewritten with a short version string, + # meaning we're inside a checked out source tree. + + if not os.path.exists(os.path.join(root, ".git")): + if verbose: + print("no .git in %s" % root) + raise NotThisMethod("no .git directory") + + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + # if there is a tag, this yields TAG-NUM-gHEX[-dirty] + # if there are no tags, this yields HEX[-dirty] (no NUM) + describe_out = run_command( + GITS, ["describe", "--tags", "--dirty", "--always", "--long"], cwd=root + ) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[: git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) + if not mo: + # unparseable. Maybe git-describe is misbehaving? + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix) :] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + return pieces + + +def do_vcs_install(manifest_in, versionfile_source, ipy): + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + files = [manifest_in, versionfile_source] + if ipy: + files.append(ipy) + try: + me = __file__ + if me.endswith(".pyc") or me.endswith(".pyo"): + me = os.path.splitext(me)[0] + ".py" + versioneer_file = os.path.relpath(me) + except NameError: + versioneer_file = "versioneer.py" + files.append(versioneer_file) + present = False + try: + f = open(".gitattributes", "r") + for line in f.readlines(): + if line.strip().startswith(versionfile_source): + if "export-subst" in line.strip().split()[1:]: + present = True + f.close() + except EnvironmentError: + pass + if not present: + f = open(".gitattributes", "a+") + f.write("%s export-subst\n" % versionfile_source) + f.close() + files.append(".gitattributes") + run_command(GITS, ["add", "--"] + files) + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + # Source tarballs conventionally unpack into a directory that includes + # both the project name and a version string. + dirname = os.path.basename(root) + if not dirname.startswith(parentdir_prefix): + if verbose: + print( + "guessing rootdir is '%s', but '%s' doesn't start with " + "prefix '%s'" % (root, dirname, parentdir_prefix) + ) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + } + + +SHORT_VERSION_PY = """ +# This file was generated by 'versioneer.py' (0.15) from +# revision-control system data, or from the parent directory name of an +# unpacked source archive. Distribution tarballs contain a pre-generated copy +# of this file. + +from warnings import catch_warnings +with catch_warnings(record=True): + import json +import sys + +version_json = ''' +%s +''' # END VERSION_JSON + + +def get_versions(): + return json.loads(version_json) +""" + + +def versions_from_file(filename): + try: + with open(filename) as f: + contents = f.read() + except EnvironmentError: + raise NotThisMethod("unable to read _version.py") + mo = re.search( + r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S + ) + if not mo: + raise NotThisMethod("no version_json in _version.py") + return json.loads(mo.group(1)) + + +def write_to_version_file(filename, versions): + os.unlink(filename) + contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) + with open(filename, "w") as f: + f.write(SHORT_VERSION_PY % contents) + + print("set %s to '%s'" % (filename, versions["version"])) + + +def plus_or_dot(pieces): + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + # now build up version string, with post-release "local version + # identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + # get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + # exceptions: + # 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_pre(pieces): + # TAG[.post.devDISTANCE] . No -dirty + + # exceptions: + # 1: no tags. 0.post.devDISTANCE + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += ".post.dev%d" % pieces["distance"] + else: + # exception #1 + rendered = "0.post.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + # TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that + # .dev0 sorts backwards (a dirty tree will appear "older" than the + # corresponding clean one), but you shouldn't be releasing software with + # -dirty anyways. + + # exceptions: + # 1: no tags. 0.postDISTANCE[.dev0] + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_old(pieces): + # TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. + + # exceptions: + # 1: no tags. 0.postDISTANCE[.dev0] + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + # TAG[-DISTANCE-gHEX][-dirty], like 'git describe --tags --dirty + # --always' + + # exceptions: + # 1: no tags. HEX[-dirty] (note: no 'g' prefix) + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + # TAG-DISTANCE-gHEX[-dirty], like 'git describe --tags --dirty + # --always -long'. The distance/hash is unconditional. + + # exceptions: + # 1: no tags. HEX[-dirty] (note: no 'g' prefix) + + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + } + + +class VersioneerBadRootError(Exception): + pass + + +def get_versions(verbose=False): + # returns dict with two keys: 'version' and 'full' + + if "versioneer" in sys.modules: + # see the discussion in cmdclass.py:get_cmdclass() + del sys.modules["versioneer"] + + root = get_root() + cfg = get_config_from_root(root) + + assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" + handlers = HANDLERS.get(cfg.VCS) + assert handlers, "unrecognized VCS '%s'" % cfg.VCS + verbose = verbose or cfg.verbose + assert ( + cfg.versionfile_source is not None + ), "please set versioneer.versionfile_source" + assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" + + versionfile_abs = os.path.join(root, cfg.versionfile_source) + + # extract version from first of: _version.py, VCS command (e.g. 'git + # describe'), parentdir. This is meant to work for developers using a + # source checkout, for users of a tarball created by 'setup.py sdist', + # and for users of a tarball/zipball created by 'git archive' or github's + # download-from-tag feature or the equivalent in other VCSes. + + get_keywords_f = handlers.get("get_keywords") + from_keywords_f = handlers.get("keywords") + if get_keywords_f and from_keywords_f: + try: + keywords = get_keywords_f(versionfile_abs) + ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) + if verbose: + print("got version from expanded keyword %s" % ver) + return ver + except NotThisMethod: + pass + + try: + ver = versions_from_file(versionfile_abs) + if verbose: + print("got version from file %s %s" % (versionfile_abs, ver)) + return ver + except NotThisMethod: + pass + + from_vcs_f = handlers.get("pieces_from_vcs") + if from_vcs_f: + try: + pieces = from_vcs_f(cfg.tag_prefix, root, verbose) + ver = render(pieces, cfg.style) + if verbose: + print("got version from VCS %s" % ver) + return ver + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + if verbose: + print("got version from parentdir %s" % ver) + return ver + except NotThisMethod: + pass + + if verbose: + print("unable to compute version") + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + } + + +def get_version(): + return get_versions()["version"] + + +def get_cmdclass(): + if "versioneer" in sys.modules: + del sys.modules["versioneer"] + # this fixes the "python setup.py develop" case (also 'install' and + # 'easy_install .'), in which subdependencies of the main project are + # built (using setup.py bdist_egg) in the same python process. Assume + # a main project A and a dependency B, which use different versions + # of Versioneer. A's setup.py imports A's Versioneer, leaving it in + # sys.modules by the time B's setup.py is executed, causing B to run + # with the wrong versioneer. Setuptools wraps the sub-dep builds in a + # sandbox that restores sys.modules to it's pre-build state, so the + # parent is protected against the child's "import versioneer". By + # removing ourselves from sys.modules here, before the child build + # happens, we protect the child from the parent's versioneer too. + # Also see https://github.com/warner/python-versioneer/issues/52 + + cmds = {} + + # we add "version" to both distutils and setuptools + from distutils.core import Command + + class cmd_version(Command): + description = "report generated version string" + user_options = [] + boolean_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + vers = get_versions(verbose=True) + print("Version: %s" % vers["version"]) + print(" full-revisionid: %s" % vers.get("full-revisionid")) + print(" dirty: %s" % vers.get("dirty")) + if vers["error"]: + print(" error: %s" % vers["error"]) + + cmds["version"] = cmd_version + + # we override "build_py" in both distutils and setuptools + # + # most invocation pathways end up running build_py: + # distutils/build -> build_py + # distutils/install -> distutils/build ->.. + # setuptools/bdist_wheel -> distutils/install ->.. + # setuptools/bdist_egg -> distutils/install_lib -> build_py + # setuptools/install -> bdist_egg ->.. + # setuptools/develop -> ? + + from distutils.command.build_py import build_py as _build_py + + class cmd_build_py(_build_py): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + _build_py.run(self) + # now locate _version.py in the new build/ directory and replace + # it with an updated value + if cfg.versionfile_build: + target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + cmds["build_py"] = cmd_build_py + + if "cx_Freeze" in sys.modules: # cx_freeze enabled? + from cx_Freeze.dist import build_exe as _build_exe + + class cmd_build_exe(_build_exe): + def run(self): + root = get_root() + cfg = get_config_from_root(root) + versions = get_versions() + target_versionfile = cfg.versionfile_source + print("UPDATING %s" % target_versionfile) + write_to_version_file(target_versionfile, versions) + + _build_exe.run(self) + os.unlink(target_versionfile) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + + cmds["build_exe"] = cmd_build_exe + del cmds["build_py"] + + # we override different "sdist" commands for both environments + if "setuptools" in sys.modules: + from setuptools.command.sdist import sdist as _sdist + else: + from distutils.command.sdist import sdist as _sdist + + class cmd_sdist(_sdist): + def run(self): + versions = get_versions() + self._versioneer_generated_versions = versions + # unless we update this, the command will keep using the old + # version + self.distribution.metadata.version = versions["version"] + return _sdist.run(self) + + def make_release_tree(self, base_dir, files): + root = get_root() + cfg = get_config_from_root(root) + _sdist.make_release_tree(self, base_dir, files) + # now locate _version.py in the new base_dir directory + # (remembering that it may be a hardlink) and replace it with an + # updated value + target_versionfile = os.path.join(base_dir, cfg.versionfile_source) + print("UPDATING %s" % target_versionfile) + write_to_version_file( + target_versionfile, self._versioneer_generated_versions + ) + + cmds["sdist"] = cmd_sdist + + return cmds + + +CONFIG_ERROR = """ +setup.cfg is missing the necessary Versioneer configuration. You need +a section like: + + [versioneer] + VCS = git + style = pep440 + versionfile_source = src/myproject/_version.py + versionfile_build = myproject/_version.py + tag_prefix = "" + parentdir_prefix = myproject- + +You will also need to edit your setup.py to use the results: + + import versioneer + setup(version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), ...) + +Please read the docstring in ./versioneer.py for configuration instructions, +edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. +""" + +SAMPLE_CONFIG = """ +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer.py setup' after changing this section, and commit the +# resulting files. + +[versioneer] +#VCS = git +#style = pep440 +#versionfile_source = +#versionfile_build = +#tag_prefix = +#parentdir_prefix = + +""" + +INIT_PY_SNIPPET = """ +from ._version import get_versions +__version__ = get_versions()['version'] +del get_versions +""" + + +def do_setup(): + root = get_root() + try: + cfg = get_config_from_root(root) + except ( + EnvironmentError, + configparser.NoSectionError, + configparser.NoOptionError, + ) as e: + if isinstance(e, (EnvironmentError, configparser.NoSectionError)): + print("Adding sample versioneer config to setup.cfg", file=sys.stderr) + with open(os.path.join(root, "setup.cfg"), "a") as f: + f.write(SAMPLE_CONFIG) + print(CONFIG_ERROR, file=sys.stderr) + return 1 + + print(" creating %s" % cfg.versionfile_source) + with open(cfg.versionfile_source, "w") as f: + LONG = LONG_VERSION_PY[cfg.VCS] + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + + ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") + if os.path.exists(ipy): + try: + with open(ipy, "r") as f: + old = f.read() + except EnvironmentError: + old = "" + if INIT_PY_SNIPPET not in old: + print(" appending to %s" % ipy) + with open(ipy, "a") as f: + f.write(INIT_PY_SNIPPET) + else: + print(" %s unmodified" % ipy) + else: + print(" %s doesn't exist, ok" % ipy) + ipy = None + + # Make sure both the top-level "versioneer.py" and versionfile_source + # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so + # they'll be copied into source distributions. Pip won't be able to + # install the package without this. + manifest_in = os.path.join(root, "MANIFEST.in") + simple_includes = set() + try: + with open(manifest_in, "r") as f: + for line in f: + if line.startswith("include "): + for include in line.split()[1:]: + simple_includes.add(include) + except EnvironmentError: + pass + # That doesn't cover everything MANIFEST.in can do + # (http://docs.python.org/2/distutils/sourcedist.html#commands), so + # it might give some false negatives. Appending redundant 'include' + # lines is safe, though. + if "versioneer.py" not in simple_includes: + print(" appending 'versioneer.py' to MANIFEST.in") + with open(manifest_in, "a") as f: + f.write("include versioneer.py\n") + else: + print(" 'versioneer.py' already in MANIFEST.in") + if cfg.versionfile_source not in simple_includes: + print( + " appending versionfile_source ('%s') to MANIFEST.in" + % cfg.versionfile_source + ) + with open(manifest_in, "a") as f: + f.write("include %s\n" % cfg.versionfile_source) + else: + print(" versionfile_source already in MANIFEST.in") + + # Make VCS-specific changes. For git, this means creating/changing + # .gitattributes to mark _version.py for export-time keyword + # substitution. + do_vcs_install(manifest_in, cfg.versionfile_source, ipy) + return 0 + + +def scan_setup_py(): + found = set() + setters = False + errors = 0 + with open("setup.py", "r") as f: + for line in f.readlines(): + if "import versioneer" in line: + found.add("import") + if "versioneer.get_cmdclass()" in line: + found.add("cmdclass") + if "versioneer.get_version()" in line: + found.add("get_version") + if "versioneer.VCS" in line: + setters = True + if "versioneer.versionfile_source" in line: + setters = True + if len(found) != 3: + print("") + print("Your setup.py appears to be missing some important items") + print("(but I might be wrong). Please make sure it has something") + print("roughly like the following:") + print("") + print(" import versioneer") + print(" setup( version=versioneer.get_version(),") + print(" cmdclass=versioneer.get_cmdclass(), ...)") + print("") + errors += 1 + if setters: + print("You should remove lines like 'versioneer.VCS = ' and") + print("'versioneer.versionfile_source = ' . This configuration") + print("now lives in setup.cfg, and should be removed from setup.py") + print("") + errors += 1 + return errors + + +if __name__ == "__main__": + cmd = sys.argv[1] + if cmd == "setup": + errors = do_setup() + errors += scan_setup_py() + if errors: + sys.exit(1) diff --git a/web/README.md b/web/README.md new file mode 100644 index 00000000..7396fbd0 --- /dev/null +++ b/web/README.md @@ -0,0 +1,12 @@ +Directory containing the pandas website (hosted at https://pandas.io). + +The website sources are in `web/pandas/`, which also include a `config.yml` file +containing the settings to build the website. The website is generated with the +command `./pandas_web.py pandas`. See `./pandas_web.py --help` and the header of +the script for more information and options. + +After building the website, to navigate it, it is needed to access the web using +an http server (a not open the local files with the browser, since the links and +the image sources are absolute to where they are served from). The easiest way +to run an http server locally is to run `python -m http.server` from the +`web/build/` directory. diff --git a/web/pandas/_templates/layout.html b/web/pandas/_templates/layout.html new file mode 100644 index 00000000..120058af --- /dev/null +++ b/web/pandas/_templates/layout.html @@ -0,0 +1,108 @@ + + + + + pandas - Python Data Analysis Library + + + + + {% for stylesheet in static.css %} + + {% endfor %} + + + +
    + +
    +
    +
    + {% block body %}{% endblock %} +
    +
    + + + + + + + diff --git a/web/pandas/about/citing.md b/web/pandas/about/citing.md new file mode 100644 index 00000000..5cd31d87 --- /dev/null +++ b/web/pandas/about/citing.md @@ -0,0 +1,123 @@ +# Citing and logo + +## Citing pandas + +If you use _pandas_ for a scientific publication, we would appreciate citations to one of the following papers: + +- [Data structures for statistical computing in python](http://conference.scipy.org/proceedings/scipy2010/pdfs/mckinney.pdf), + McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010. + + @inproceedings{mckinney2010data, + title={Data structures for statistical computing in python}, + author={Wes McKinney}, + booktitle={Proceedings of the 9th Python in Science Conference}, + volume={445}, + pages={51--56}, + year={2010}, + organization={Austin, TX} + } + + +- [pandas: a foundational Python library for data analysis and statistics](https://www.scribd.com/document/71048089/pandas-a-Foundational-Python-Library-for-Data-Analysis-and-Statistics), + McKinney, Python for High Performance and Scientific Computing, Volume 14, 2011. + + @article{mckinney2011pandas, + title={pandas: a foundational Python library for data analysis and statistics}, + author={Wes McKinney}, + journal={Python for High Performance and Scientific Computing}, + volume={14}, + year={2011} + } + +## Brand and logo + +When using the project name _pandas_, please use it in lower case, even at the beginning of a sentence. + +The official logos of _pandas_ are: + +### Primary logo + + + + + + + + +### Secondary logo + + + + + + + + +### Logo mark + + + + + + + + +### Logo usage + +The pandas logo is available in full color and white accent. +The full color logo should only appear against white backgrounds. +The white accent logo should go against contrasting color background. + +When using the logo, please follow the next directives: + +- Primary logo should never be seen under 1 inch in size for printing and 72px for web +- The secondary logo should never be seen under 0.75 inch in size for printing and 55px for web +- Leave enough margin around the logo (leave the height of the logo in the top, bottom and both sides) +- Do not distort the logo by changing its proportions +- Do not place text or other elements on top of the logo + +### Colors + + + + + + + +
    + + + +
    + Blue
    + RGB: R21 G4 B88
    + HEX: #150458 +
    + + + +
    + Yellow
    + RGB: R255 G202 B0
    + HEX: #FFCA00 +
    + + + +
    + Pink
    + RGB: R231 G4 B136
    + HEX: #E70488 +
    diff --git a/web/pandas/about/index.md b/web/pandas/about/index.md new file mode 100644 index 00000000..9a0a3923 --- /dev/null +++ b/web/pandas/about/index.md @@ -0,0 +1,86 @@ +# About pandas + +## History of development + +In 2008, _pandas_ development began at [AQR Capital Management](http://www.aqr.com). +By the end of 2009 it had been [open sourced](http://en.wikipedia.org/wiki/Open_source), +and is actively supported today by a community of like-minded individuals around the world who +contribute their valuable time and energy to help make open source _pandas_ +possible. Thank you to [all of our contributors](team.html). + +Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects). +This will help ensure the success of development of _pandas_ as a world-class open-source project. + +### Timeline + +- **2008**: Development of _pandas_ started +- **2009**: _pandas_ becomes open source +- **2012**: First edition of _Python for Data Analysis_ is published +- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects) +- **2018**: First in-person core developer sprint + +## Library Highlights + +- A fast and efficient **DataFrame** object for data manipulation with + integrated indexing; + +- Tools for **reading and writing data** between in-memory data structures and + different formats: CSV and text files, Microsoft Excel, SQL databases, and + the fast HDF5 format; + +- Intelligent **data alignment** and integrated handling of **missing data**: + gain automatic label-based alignment in computations and easily manipulate + messy data into an orderly form; + +- Flexible **reshaping** and pivoting of data sets; + +- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting** + of large data sets; + +- Columns can be inserted and deleted from data structures for **size + mutability**; + +- Aggregating or transforming data with a powerful **group by** engine + allowing split-apply-combine operations on data sets; + +- High performance **merging and joining** of data sets; + +- **Hierarchical axis indexing** provides an intuitive way of working with + high-dimensional data in a lower-dimensional data structure; + +- **Time series**-functionality: date range generation and frequency + conversion, moving window statistics, date shifting and lagging. + Even create domain-specific time offsets and join time + series without losing data; + +- Highly **optimized for performance**, with critical code paths written in + [Cython](http://www.cython.org/) or C. + +- Python with *pandas* is in use in a wide variety of **academic and + commercial** domains, including Finance, Neuroscience, Economics, + Statistics, Advertising, Web Analytics, and more. + +## Mission + +_pandas_ aims to be the fundamental high-level building block for doing practical, +real world data analysis in Python. +Additionally, it has the broader goal of becoming the most powerful and flexible +open source data analysis / manipulation tool available in any language. + +## Vision + +A world where data analytics and manipulation software is: + +- Accessible to everyone +- Free for users to use and modify +- Flexible +- Powerful +- Easy to use +- Fast + +## Values + +Is in the core of _pandas_ to be respectful and welcoming with everybody, +users, contributors and the broader community. Regardless of level of experience, +gender, gender identity and expression, sexual orientation, disability, +personal appearance, body size, race, ethnicity, age, religion, or nationality. diff --git a/web/pandas/about/roadmap.md b/web/pandas/about/roadmap.md new file mode 100644 index 00000000..8a5c2735 --- /dev/null +++ b/web/pandas/about/roadmap.md @@ -0,0 +1,195 @@ +# Roadmap + +This page provides an overview of the major themes in pandas' +development. Each of these items requires a relatively large amount of +effort to implement. These may be achieved more quickly with dedicated +funding or interest from contributors. + +An item being on the roadmap does not mean that it will *necessarily* +happen, even with unlimited funding. During the implementation period we +may discover issues preventing the adoption of the feature. + +Additionally, an item *not* being on the roadmap does not exclude it +from inclusion in pandas. The roadmap is intended for larger, +fundamental changes to the project that are likely to take months or +years of developer time. Smaller-scoped items will continue to be +tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues). + +See [Roadmap evolution](#roadmap-evolution) for proposing +changes to this document. + +## Extensibility + +Pandas `extending.extension-types` allow +for extending NumPy types with custom data types and array storage. +Pandas uses extension types internally, and provides an interface for +3rd-party libraries to define their own custom data types. + +Many parts of pandas still unintentionally convert data to a NumPy +array. These problems are especially pronounced for nested data. + +We'd like to improve the handling of extension arrays throughout the +library, making their behavior more consistent with the handling of +NumPy arrays. We'll do this by cleaning up pandas' internals and +adding new methods to the extension array interface. + +## String data type + +Currently, pandas stores text data in an `object` -dtype NumPy array. +The current implementation has two primary drawbacks: First, `object` +-dtype is not specific to strings: any Python object can be stored in an +`object` -dtype array, not just strings. Second: this is not efficient. +The NumPy memory model isn't especially well-suited to variable width +text data. + +To solve the first issue, we propose a new extension type for string +data. This will initially be opt-in, with users explicitly requesting +`dtype="string"`. The array backing this string dtype may initially be +the current implementation: an `object` -dtype NumPy array of Python +strings. + +To solve the second issue (performance), we'll explore alternative +in-memory array libraries (for example, Apache Arrow). As part of the +work, we may need to implement certain operations expected by pandas +users (for example the algorithm used in, `Series.str.upper`). That work +may be done outside of pandas. + +## Apache Arrow interoperability + +[Apache Arrow](https://arrow.apache.org) is a cross-language development +platform for in-memory data. The Arrow logical types are closely aligned +with typical pandas use cases. + +We'd like to provide better-integrated support for Arrow memory and +data types within pandas. This will let us take advantage of its I/O +capabilities and provide for better interoperability with other +languages and libraries using Arrow. + +## Block manager rewrite + +We'd like to replace pandas current internal data structures (a +collection of 1 or 2-D arrays) with a simpler collection of 1-D arrays. + +Pandas internal data model is quite complex. A DataFrame is made up of +one or more 2-dimensional "blocks", with one or more blocks per dtype. +This collection of 2-D arrays is managed by the BlockManager. + +The primary benefit of the BlockManager is improved performance on +certain operations (construction from a 2D array, binary operations, +reductions across the columns), especially for wide DataFrames. However, +the BlockManager substantially increases the complexity and maintenance +burden of pandas. + +By replacing the BlockManager we hope to achieve + +- Substantially simpler code +- Easier extensibility with new logical types +- Better user control over memory use and layout +- Improved micro-performance +- Option to provide a C / Cython API to pandas' internals + +See [these design +documents](https://dev.pandas.io/pandas2/internal-architecture.html#removal-of-blockmanager-new-dataframe-internals) +for more. + +## Decoupling of indexing and internals + +The code for getting and setting values in pandas' data structures +needs refactoring. In particular, we must clearly separate code that +converts keys (e.g., the argument to `DataFrame.loc`) to positions from +code that uses these positions to get or set values. This is related to +the proposed BlockManager rewrite. Currently, the BlockManager sometimes +uses label-based, rather than position-based, indexing. We propose that +it should only work with positional indexing, and the translation of +keys to positions should be entirely done at a higher level. + +Indexing is a complicated API with many subtleties. This refactor will +require care and attention. More details are discussed at + + +## Numba-accelerated operations + +[Numba](https://numba.pydata.org) is a JIT compiler for Python code. +We'd like to provide ways for users to apply their own Numba-jitted +functions where pandas accepts user-defined functions (for example, +`Series.apply`, +`DataFrame.apply`, +`DataFrame.applymap`, and in groupby and +window contexts). This will improve the performance of +user-defined-functions in these operations by staying within compiled +code. + +## Documentation improvements + +We'd like to improve the content, structure, and presentation of the +pandas documentation. Some specific goals include + +- Overhaul the HTML theme with a modern, responsive design + (`15556`) +- Improve the "Getting Started" documentation, designing and writing + learning paths for users different backgrounds (e.g. brand new to + programming, familiar with other languages like R, already familiar + with Python). +- Improve the overall organization of the documentation and specific + subsections of the documentation to make navigation and finding + content easier. + +## Package docstring validation + +To improve the quality and consistency of pandas docstrings, we've +developed tooling to check docstrings in a variety of ways. + +contains the checks. + +Like many other projects, pandas uses the +[numpydoc](https://numpydoc.readthedocs.io/en/latest/) style for writing +docstrings. With the collaboration of the numpydoc maintainers, we'd +like to move the checks to a package other than pandas so that other +projects can easily use them as well. + +## Performance monitoring + +Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/) +to monitor for performance regressions. ASV itself is a fabulous tool, +but requires some additional work to be integrated into an open source +project's workflow. + +The [asv-runner](https://github.com/asv-runner) organization, currently +made up of pandas maintainers, provides tools built on top of ASV. We +have a physical machine for running a number of project's benchmarks, +and tools managing the benchmark runs and reporting on results. + +We'd like to fund improvements and maintenance of these tools to + +- Be more stable. Currently, they're maintained on the nights and + weekends when a maintainer has free time. +- Tune the system for benchmarks to improve stability, following + +- Build a GitHub bot to request ASV runs *before* a PR is merged. + Currently, the benchmarks are only run nightly. + +## Roadmap Evolution + +Pandas continues to evolve. The direction is primarily determined by +community interest. Everyone is welcome to review existing items on the +roadmap and to propose a new item. + +Each item on the roadmap should be a short summary of a larger design +proposal. The proposal should include + +1. Short summary of the changes, which would be appropriate for + inclusion in the roadmap if accepted. +2. Motivation for the changes. +3. An explanation of why the change is in scope for pandas. +4. Detailed design: Preferably with example-usage (even if not + implemented yet) and API documentation +5. API Change: Any API changes that may result from the proposal. + +That proposal may then be submitted as a GitHub issue, where the pandas +maintainers can review and comment on the design. The [pandas mailing +list](https://mail.python.org/mailman/listinfo/pandas-dev) should be +notified of the proposal. + +When there's agreement that an implementation would be welcome, the +roadmap should be updated to include the summary and a link to the +discussion issue. diff --git a/web/pandas/about/sponsors.md b/web/pandas/about/sponsors.md new file mode 100644 index 00000000..dcc6e367 --- /dev/null +++ b/web/pandas/about/sponsors.md @@ -0,0 +1,41 @@ +# Sponsors + +## NumFOCUS + +![](https://numfocus.org/wp-content/uploads/2018/01/optNumFocus_LRG.png) + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. + +## Tidelift + +_pandas_ is part of the [Tidelift subscription](https://tidelift.com/subscription/pkg/pypi-pandas?utm_source=pypi-pandas&utm_medium=referral&utm_campaign=readme). +You can support pandas by becoming a Tidelift subscriber. + +## Institutional partners + +Institutional Partners are companies and universities that support the project by employing contributors. +Current Institutional Partners include: + +
      + {% for company in partners.active if company.employs %} +
    • {{ company.name }} ({{ company.employs }})
    • + {% endfor %} +
    + +## In-kind sponsors + +- [OVH](https://us.ovhcloud.com/): Hosting +- [Indeed](https://opensource.indeedeng.io/): Logo and website design + +## Past institutional partners + + diff --git a/web/pandas/about/team.md b/web/pandas/about/team.md new file mode 100644 index 00000000..8eb2edeb --- /dev/null +++ b/web/pandas/about/team.md @@ -0,0 +1,77 @@ +# Team + +## Contributors + +_pandas_ is made with love by more than [1,500 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors). + +If you want to support pandas development, you can find information in the [donations page](../donate.html). + +## Maintainers + +
    + {% for row in maintainers.people | batch(6, "") %} +
    + {% for person in row %} + {% if person %} +
    + +
    +
    + {% if person.blog %} + + {{ person.name or person.login }} + + {% else %} + {{ person.name or person.login }} + {% endif %} +
    +

    {{ person.login }}

    +
    +
    + {% else %} +
    + {% endif %} + {% endfor %} +
    + {% endfor %} +
    + +## Diversity and Inclusion + +> _pandas_ expressly welcomes and encourages contributions from anyone who faces under-representation, discrimination in the technology industry +> or anyone willing to increase the diversity of our team. +> We have identified visible gaps and obstacles in sustaining diversity and inclusion in the open-source communities and we are proactive in increasing +> the diversity of our team. +> We have a [code of conduct]({base_url}/community/coc.html) to ensure a friendly and welcoming environment. +> Please send an email to [pandas-code-of-conduct-committee](mailto:pandas-coc@googlegroups.com), if you think we can do a +> better job at achieving this goal. + +## Governance + +Wes McKinney is the Benevolent Dictator for Life (BDFL). + +The project governance is available in the [project governance documents](https://github.com/pandas-dev/pandas-governance). + +## Code of conduct committee + +
      + {% for person in maintainers.coc %} +
    • {{ person }}
    • + {% endfor %} +
    + +## NumFOCUS committee + +
      + {% for person in maintainers.numfocus %} +
    • {{ person }}
    • + {% endfor %} +
    + +## Emeritus maintainers + +
      + {% for person in maintainers.emeritus %} +
    • {{ person }}
    • + {% endfor %} +
    diff --git a/web/pandas/community/blog.html b/web/pandas/community/blog.html new file mode 100644 index 00000000..ffe6f97d --- /dev/null +++ b/web/pandas/community/blog.html @@ -0,0 +1,14 @@ +{% extends "layout.html" %} + +{% block body %} + {% for post in blog.posts %} +
    +
    +

    {{ post.title }}

    +
    Source: {{ post.feed }} | Author: {{ post.author }} | Published: {{ post.published.strftime("%b %d, %Y") }}
    +
    {{ post.summary }}
    + Read +
    +
    + {% endfor %} +{% endblock %} diff --git a/web/pandas/community/coc.md b/web/pandas/community/coc.md new file mode 100644 index 00000000..de0e8120 --- /dev/null +++ b/web/pandas/community/coc.md @@ -0,0 +1,63 @@ +# Code of conduct + +As contributors and maintainers of this project, and in the interest of +fostering an open and welcoming community, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating +documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free +experience for everyone, regardless of level of experience, gender, gender +identity and expression, sexual orientation, disability, personal appearance, +body size, race, ethnicity, age, religion, or nationality. + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery +* Personal attacks +* Trolling or insulting/derogatory comments +* Public or private harassment +* Publishing other's private information, such as physical or electronic + addresses, without explicit permission +* Other unethical or unprofessional conduct + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +By adopting this Code of Conduct, project maintainers commit themselves to +fairly and consistently applying these principles to every aspect of managing +this project. Project maintainers who do not follow or enforce the Code of +Conduct may be permanently removed from the project team. + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. + +A working group of community members is committed to promptly addressing any +reported issues. The working group is made up of pandas contributors and users. +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the working group by e-mail (pandas-coc@googlegroups.com). +Messages sent to this e-mail address will not be publicly visible but only to +the working group members. The working group currently includes + +
      + {% for person in maintainers.coc %} +
    • {{ person }}
    • + {% endfor %} +
    + +All complaints will be reviewed and investigated and will result in a response +that is deemed necessary and appropriate to the circumstances. Maintainers are +obligated to maintain confidentiality with regard to the reporter of an +incident. + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 1.3.0, available at +[http://contributor-covenant.org/version/1/3/0/][version], +and the [Swift Code of Conduct][swift]. + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/3/0/ +[swift]: https://swift.org/community/#code-of-conduct + diff --git a/web/pandas/community/ecosystem.md b/web/pandas/community/ecosystem.md new file mode 100644 index 00000000..a707854c --- /dev/null +++ b/web/pandas/community/ecosystem.md @@ -0,0 +1,365 @@ +# Ecosystem + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. This is +encouraging because it means pandas is not only helping users to handle +their data tasks but also that it provides a better starting point for +developers to build powerful and more focused data tools. The creation +of libraries that complement pandas' functionality also allows pandas +development to remain focused around it's original requirements. + +This is an inexhaustive list of projects that build on pandas in order +to provide tools in the PyData space. For a list of projects that depend +on pandas, see the [libraries.io usage page for +pandas](https://libraries.io/pypi/pandas/usage) or [search pypi for +pandas](https://pypi.org/search/?q=pandas). + +We'd like to make it easier for users to find these projects, if you +know of other substantial projects that you feel should be on this list, +please let us know. + +## Statistics and machine learning + +### [Statsmodels](https://www.statsmodels.org/) + +Statsmodels is the prominent Python "statistics and econometrics +library" and it has a long-standing special relationship with pandas. +Statsmodels provides powerful statistics, econometrics, analysis and +modeling functionality that is out of pandas' scope. Statsmodels +leverages pandas objects as the underlying data container for +computation. + +### [sklearn-pandas](https://github.com/paulgb/sklearn-pandas) + +Use pandas DataFrames in your [scikit-learn](https://scikit-learn.org/) +ML pipeline. + +### [Featuretools](https://github.com/featuretools/featuretools/) + +Featuretools is a Python library for automated feature engineering built +on top of pandas. It excels at transforming temporal and relational +datasets into feature matrices for machine learning using reusable +feature engineering "primitives". Users can contribute their own +primitives in Python and share them with the rest of the community. + +## Visualization + +### [Altair](https://altair-viz.github.io/) + +Altair is a declarative statistical visualization library for Python. +With Altair, you can spend more time understanding your data and its +meaning. Altair's API is simple, friendly and consistent and built on +top of the powerful Vega-Lite JSON specification. This elegant +simplicity produces beautiful and effective visualizations with a +minimal amount of code. Altair works with Pandas DataFrames. + +### [Bokeh](https://bokeh.pydata.org) + +Bokeh is a Python interactive visualization library for large datasets +that natively uses the latest web technologies. Its goal is to provide +elegant, concise construction of novel graphics in the style of +Protovis/D3, while delivering high-performance interactivity over large +data to thin clients. + +[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a +high level API for Bokeh that can be loaded as a native Pandas plotting +backend via + +``` +pd.set_option("plotting.backend", "pandas_bokeh") +``` + +It is very similar to the matplotlib plotting backend, but provides +interactive web-based charts and maps. + +### [seaborn](https://seaborn.pydata.org) + +Seaborn is a Python visualization library based on +[matplotlib](https://matplotlib.org). It provides a high-level, +dataset-oriented interface for creating attractive statistical graphics. +The plotting functions in seaborn understand pandas objects and leverage +pandas grouping operations internally to support concise specification +of complex visualizations. Seaborn also goes beyond matplotlib and +pandas with the option to perform statistical estimation while plotting, +aggregating across observations and visualizing the fit of statistical +models to emphasize patterns in a dataset. + +### [plotnine](https://github.com/has2k1/plotnine/) + +Hadley Wickham's [ggplot2](https://ggplot2.tidyverse.org/) is a +foundational exploratory visualization package for the R language. Based +on ["The Grammar of +Graphics"](https://www.cs.uic.edu/~wilkinson/TheGrammarOfGraphics/GOG.html) +it provides a powerful, declarative and extremely general way to +generate bespoke plots of any kind of data. +Various implementations to other languages are available. +A good implementation for Python users is [has2k1/plotnine](https://github.com/has2k1/plotnine/). + +### [IPython Vega](https://github.com/vega/ipyvega) + +[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega](https://github.com/vega/vega) to create plots within Jupyter Notebook. + +### [Plotly](https://plot.ly/python) + +[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/) +enables interactive figures and web shareability. Maps, 2D, 3D, and +live-streaming graphs are rendered with WebGL and +[D3.js](https://d3js.org/). The library supports plotting directly from +a pandas DataFrame and cloud-based collaboration. Users of [matplotlib, +ggplot for Python, and +Seaborn](https://plot.ly/python/matplotlib-to-plotly-tutorial/) can +convert figures into interactive web-based plots. Plots can be drawn in +[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R +or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly +is free for unlimited sharing, and has +[cloud](https://plot.ly/product/plans/), +[offline](https://plot.ly/python/offline/), or +[on-premise](https://plot.ly/product/enterprise/) accounts for private +use. + +### [QtPandas](https://github.com/draperjames/qtpandas) + +Spun off from the main pandas library, the +[qtpandas](https://github.com/draperjames/qtpandas) library enables +DataFrame visualization and manipulation in PyQt4 and PySide +applications. + +## IDE + +### [IPython](https://ipython.org/documentation.html) + +IPython is an interactive command shell and distributed computing +environment. IPython tab completion works with Pandas methods and also +attributes like DataFrame columns. + +### [Jupyter Notebook / Jupyter Lab](https://jupyter.org) + +Jupyter Notebook is a web application for creating Jupyter notebooks. A +Jupyter notebook is a JSON document containing an ordered list of +input/output cells which can contain code, text, mathematics, plots and +rich media. Jupyter notebooks can be converted to a number of open +standard output formats (HTML, HTML presentation slides, LaTeX, PDF, +ReStructuredText, Markdown, Python) through 'Download As' in the web +interface and `jupyter convert` in a shell. + +Pandas DataFrames implement `_repr_html_`and `_repr_latex` methods which +are utilized by Jupyter Notebook for displaying (abbreviated) HTML or +LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may +or may not be compatible with non-HTML Jupyter output formats.) + +See `Options and Settings ` and +`Available Options ` +for pandas `display.` settings. + +### [quantopian/qgrid](https://github.com/quantopian/qgrid) + +qgrid is "an interactive grid for sorting and filtering DataFrames in +IPython Notebook" built with SlickGrid. + +### [Spyder](https://www.spyder-ide.org/) + +Spyder is a cross-platform PyQt-based IDE combining the editing, +analysis, debugging and profiling functionality of a software +development tool with the data exploration, interactive execution, deep +inspection and rich visualization capabilities of a scientific +environment like MATLAB or Rstudio. + +Its [Variable +Explorer](https://docs.spyder-ide.org/variableexplorer.html) allows +users to view, manipulate and edit pandas `Index`, `Series`, and +`DataFrame` objects like a "spreadsheet", including copying and +modifying values, sorting, displaying a "heatmap", converting data +types and more. Pandas objects can also be renamed, duplicated, new +columns added, copyed/pasted to/from the clipboard (as TSV), and +saved/loaded to/from a file. Spyder can also import data from a variety +of plain text and binary files or the clipboard into a new pandas +DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's [Editor](https://docs.spyder-ide.org/editor.html) and [IPython +Console](https://docs.spyder-ide.org/ipythonconsole.html), and Spyder's +[Help pane](https://docs.spyder-ide.org/help.html) can retrieve and +render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. + +## API + +### [pandas-datareader](https://github.com/pydata/pandas-datareader) + +`pandas-datareader` is a remote data access library for pandas +(PyPI:`pandas-datareader`). It is based on functionality that was +located in `pandas.io.data` and `pandas.io.wb` but was split off in +v0.19. See more in the [pandas-datareader +docs](https://pandas-datareader.readthedocs.io/en/latest/): + +The following data feeds are available: + +- Google Finance +- Tiingo +- Morningstar +- IEX +- Robinhood +- Enigma +- Quandl +- FRED +- Fama/French +- World Bank +- OECD +- Eurostat +- TSP Fund Data +- Nasdaq Trader Symbol Definitions +- Stooq Index Data +- MOEX Data + +### [quandl/Python](https://github.com/quandl/Python) + +Quandl API for Python wraps the Quandl REST API to return Pandas +DataFrames with timeseries indexes. + +### [pydatastream](https://github.com/vfilimonov/pydatastream) + +PyDatastream is a Python interface to the [Thomson Dataworks Enterprise +(DWE/Datastream)](http://dataworks.thomson.com/Dataworks/Enterprise/1.0/) +SOAP API to return indexed Pandas DataFrames with financial data. This +package requires valid credentials for this API (non free). + +### [pandaSDMX](https://pandasdmx.readthedocs.io) + +pandaSDMX is a library to retrieve and acquire statistical data and +metadata disseminated in [SDMX](https://www.sdmx.org) 2.1, an +ISO-standard widely used by institutions such as statistics offices, +central banks, and international organisations. pandaSDMX can expose +datasets and related structural metadata including data flows, +code-lists, and data structure definitions as pandas Series or +MultiIndexed DataFrames. + +### [fredapi](https://github.com/mortada/fredapi) + +fredapi is a Python interface to the [Federal Reserve Economic Data +(FRED)](https://fred.stlouisfed.org/) provided by the Federal Reserve +Bank of St. Louis. It works with both the FRED database and ALFRED +database that contains point-in-time data (i.e. historic data +revisions). fredapi provides a wrapper in Python to the FRED HTTP API, +and also provides several convenient methods for parsing and analyzing +point-in-time data from ALFRED. fredapi makes use of pandas and returns +data in a Series or DataFrame. This module requires a FRED API key that +you can obtain for free on the FRED website. + +## Domain specific + +### [Geopandas](https://github.com/kjordahl/geopandas) + +Geopandas extends pandas data objects to include geographic information +which support geometric operations. If your work entails maps and +geographical coordinates, and you love pandas, you should take a close +look at Geopandas. + +### [xarray](https://github.com/pydata/xarray) + +xarray brings the labeled data power of pandas to the physical sciences +by providing N-dimensional variants of the core pandas data structures. +It aims to provide a pandas-like and pandas-compatible toolkit for +analytics on multi-dimensional arrays, rather than the tabular data for +which pandas excels. + +## Out-of-core + +### [Blaze](http://blaze.pydata.org/) + +Blaze provides a standard API for doing computations with various +in-memory and on-disk backends: NumPy, Pandas, SQLAlchemy, MongoDB, +PyTables, PySpark. + +### [Dask](https://dask.readthedocs.io/en/latest/) + +Dask is a flexible parallel computing library for analytics. Dask +provides a familiar `DataFrame` interface for out-of-core, parallel and +distributed computing. + +### [Dask-ML](https://dask-ml.readthedocs.io/en/latest/) + +Dask-ML enables parallel and distributed machine learning using Dask +alongside existing machine learning libraries like Scikit-Learn, +XGBoost, and TensorFlow. + +### [Koalas](https://koalas.readthedocs.io/en/latest/) + +Koalas provides a familiar pandas DataFrame interface on top of Apache +Spark. It enables users to leverage multi-cores on one machine or a +cluster of machines to speed up or scale their DataFrame code. + +### [Odo](http://odo.pydata.org) + +Odo provides a uniform API for moving data between different formats. It +uses pandas own `read_csv` for CSV IO and leverages many existing +packages such as PyTables, h5py, and pymongo to move data between non +pandas formats. Its graph based approach is also extensible by end users +for custom formats that may be too specific for the core of odo. + +### [Ray](https://ray.readthedocs.io/en/latest/pandas_on_ray.html) + +Pandas on Ray is an early stage DataFrame library that wraps Pandas and +transparently distributes the data and computation. The user does not +need to know how many cores their system has, nor do they need to +specify how to distribute the data. In fact, users can continue using +their previous Pandas notebooks while experiencing a considerable +speedup from Pandas on Ray, even on a single machine. Only a +modification of the import statement is needed, as we demonstrate below. +Once you've changed your import statement, you're ready to use Pandas on +Ray just like you would Pandas. + +``` +# import pandas as pd +import ray.dataframe as pd +``` + +### [Vaex](https://docs.vaex.io/) + +Increasingly, packages are being built on top of pandas to address +specific needs in data preparation, analysis and visualization. Vaex is +a python library for Out-of-Core DataFrames (similar to Pandas), to +visualize and explore big tabular datasets. It can calculate statistics +such as mean, sum, count, standard deviation etc, on an N-dimensional +grid up to a billion (10^9^) objects/rows per second. Visualization is +done using histograms, density plots and 3d volume rendering, allowing +interactive exploration of big data. Vaex uses memory mapping, zero +memory copy policy and lazy computations for best performance (no memory +wasted). + +- ``vaex.from_pandas`` +- ``vaex.to_pandas_df`` + +## Data cleaning and validation + +### [pyjanitor](https://github.com/ericmjl/pyjanitor/) + +Pyjanitor provides a clean API for cleaning data, using method chaining. + +### [Engarde](https://engarde.readthedocs.io/en/latest/) + +Engarde is a lightweight library used to explicitly state your +assumptions about your datasets and check that they're *actually* true. + +## Extension data types + +Pandas provides an interface for defining +`extension types ` to extend NumPy's type system. The following libraries +implement that interface to provide types not found in NumPy or pandas, +which work well with pandas' data containers. + +### [cyberpandas](https://cyberpandas.readthedocs.io/en/latest) + +Cyberpandas provides an extension type for storing arrays of IP +Addresses. These arrays can be stored inside pandas' Series and +DataFrame. + +## Accessors + +A directory of projects providing +`extension accessors `. This is for users to discover new accessors and for library +authors to coordinate on the namespace. + + | Library | Accessor | Classes | + | ------------------------------------------------------------|----------|-----------------------| + | [cyberpandas](https://cyberpandas.readthedocs.io/en/latest) | `ip` | `Series` | + | [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | diff --git a/web/pandas/config.yml b/web/pandas/config.yml new file mode 100644 index 00000000..e2a95a50 --- /dev/null +++ b/web/pandas/config.yml @@ -0,0 +1,130 @@ +main: + templates_path: _templates + base_template: "layout.html" + ignore: + - _templates/layout.html + - config.yml + - try.md # the binder page will be added later + github_repo_url: pandas-dev/pandas + context_preprocessors: + - pandas_web.Preprocessors.navbar_add_info + - pandas_web.Preprocessors.blog_add_posts + - pandas_web.Preprocessors.maintainers_add_info + - pandas_web.Preprocessors.home_add_releases + markdown_extensions: + - toc + - tables + - fenced_code +static: + logo: /static/img/pandas_white.svg + css: + - /static/css/pandas.css +navbar: + - name: "About us" + target: + - name: "About pandas" + target: /about/index.html + - name: "Project roadmap" + target: /about/roadmap.html + - name: "Team" + target: /about/team.html + - name: "Sponsors" + target: /about/sponsors.html + - name: "Citing and logo" + target: /about/citing.html + - name: "Getting started" + target: /getting_started.html + - name: "Documentation" + target: + - name: "User guide" + target: /docs/user_guide/index.html + - name: "API reference" + target: /docs/reference/index.html + - name: "Release notes" + target: /docs/whatsnew/index.html + - name: "Older versions" + target: https://pandas.pydata.org/pandas-docs/version/ + - name: "Community" + target: + - name: "Blog" + target: /community/blog.html + - name: "Ask a question (StackOverflow)" + target: https://stackoverflow.com/questions/tagged/pandas + - name: "Discuss" + target: https://pandas.discourse.group + - name: "Code of conduct" + target: /community/coc.html + - name: "Ecosystem" + target: /community/ecosystem.html + - name: "Contribute" + target: /contribute.html +blog: + num_posts: 8 + feed: + - https://wesmckinney.com/feeds/pandas.atom.xml + - https://tomaugspurger.github.io/feed + - https://jorisvandenbossche.github.io/feeds/all.atom.xml + - https://datapythonista.github.io/blog/feeds/pandas.atom.xml + - https://numfocus.org/tag/pandas/feed/ +maintainers: + active: + - wesm + - jorisvandenbossche + - TomAugspurger + - shoyer + - jreback + - chris-b1 + - sinhrks + - cpcloud + - gfyoung + - toobaz + - WillAyd + - mroeschke + - jschendel + - jbrockmendel + - datapythonista + - simonjayhawkins + - topper-123 + emeritus: + - Wouter Overmeire + - Skipper Seabold + - Jeff Tratner + coc: + - Safia Abdalla + - Tom Augspurger + - Joris Van den Bossche + - Camille Scott + - Nathaniel Smith + numfocus: + - Phillip Cloud + - Stephan Hoyer + - Wes McKinney + - Jeff Reback + - Joris Van den Bossche +partners: + active: + - name: "NumFOCUS" + url: https://numfocus.org/ + logo: /static/img/partners/numfocus.svg + - name: "Anaconda" + url: https://www.anaconda.com/ + logo: /static/img/partners/anaconda.svg + employs: "Tom Augspurger, Brock Mendel" + - name: "Two Sigma" + url: https://www.twosigma.com/ + logo: /static/img/partners/two_sigma.svg + employs: "Phillip Cloud, Jeff Reback" + - name: "RStudio" + url: https://www.rstudio.com/ + logo: /static/img/partners/r_studio.svg + employs: "Wes McKinney" + - name: "Ursa Labs" + url: https://ursalabs.org/ + logo: /static/img/partners/ursa_labs.svg + employs: "Wes McKinney, Joris Van den Bossche" + - name: "Tidelift" + url: https://tidelift.com + logo: /static/img/partners/tidelift.svg + past: + - name: "Paris-Saclay Center for Data Science" + url: https://www.datascience-paris-saclay.fr/ diff --git a/web/pandas/contribute.md b/web/pandas/contribute.md new file mode 100644 index 00000000..9f4ebaf9 --- /dev/null +++ b/web/pandas/contribute.md @@ -0,0 +1,55 @@ +# Contribute to pandas + +_pandas_ is and will always be **free**. To make the development sustainable, we need _pandas_ users, corporate +and individual, to support the development by providing their time and money. + +You can find more information about current developers in the [team page](about/team.html), +and about current sponsors in the [sponsors page](about/sponsors.html). + +
    +
    +
    +
    + + + + +

    Corporate support

    +

    + pandas depends on companies and institutions using the software to support its development. Hiring + people to work on pandas, or letting existing employees to contribute to the + software. Or sponsoring pandas with funds, so the project can hire people to + progress on the pandas roadmap. +

    +

    More information in the sponsors page

    +
    +
    + + + + +

    Individual contributors

    +

    + pandas is mostly developed by volunteers. All kind of contributions are welcome, + such as contributions to the code, to the website (including graphical designers), + to the documentation (including translators) and others. There are tasks for all + levels, including beginners. +

    +

    More information in the contributing page

    +
    +
    + + + + +

    Donations

    +

    + Individual donations are appreciated, and are used for things like the project + infrastructure, travel expenses for our volunteer contributors to attend + the in-person sprints, or to give small grants to develop features. +

    +

    Make your donation in the donate page

    +
    +
    +
    +
    diff --git a/web/pandas/donate.md b/web/pandas/donate.md new file mode 100644 index 00000000..69db7e46 --- /dev/null +++ b/web/pandas/donate.md @@ -0,0 +1,14 @@ +# Donate to pandas + +
    +
    + + +_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States. +NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the +health and sustainability of the project. Visit numfocus.org for more information. + +Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible +to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation. diff --git a/web/pandas/getting_started.md b/web/pandas/getting_started.md new file mode 100644 index 00000000..4195cc00 --- /dev/null +++ b/web/pandas/getting_started.md @@ -0,0 +1,51 @@ +# Getting started + +## Installation instructions + +The next steps provides the easiest and recommended way to set up your +environment to use pandas. Other installation options can be found in +the [advanced installation page]({{ base_url}}/docs/getting_started/install.html). + +1. Download [Anaconda](https://www.anaconda.com/distribution/) for your operating system and + the latest Python version, run the installer, and follow the steps. Detailed instructions + on how to install Anaconda can be found in the + [Anaconda documentation](https://docs.anaconda.com/anaconda/install/). + +2. In the Anaconda prompt (or terminal in Linux or MacOS), start JupyterLab: + + + +3. In JupyterLab, create a new (Python 3) notebook: + + + +4. In the first cell of the notebook, you can import pandas and check the version with: + + + +5. Now you are ready to use pandas, and you can write your code in the next cells. + +## Tutorials + +You can learn more about pandas in the [tutorials](#), and more about JupyterLab +in the [JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html). + +## Books + +The book we recommend to learn pandas is [Python for Data Analysis](https://amzn.to/2KI5JJw), +by [Wes McKinney](https://wesmckinney.com/), creator of pandas. + + + Python for Data Analysis + + +## Videos + + + +## Cheat sheet + +[pandas cheat sheet](https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf) diff --git a/web/pandas/index.html b/web/pandas/index.html new file mode 100644 index 00000000..5aac5da1 --- /dev/null +++ b/web/pandas/index.html @@ -0,0 +1,114 @@ +{% extends "layout.html" %} +{% block body %} +
    +
    +
    +
    +

    pandas

    +

    + pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
    + built on top of the Python programming language. +

    +

    + Install pandas now! +

    +
    + +
    +
    +
    Getting started
    + +
    + +
    +
    Community
    + +
    +
    +
    +
    With the support of:
    +
    + {% for company in partners.active %} +
    + + {{ company.name }} + +
    + {% endfor %} +
    +
    +
    +
    + {% if releases %} +

    Latest version: {{ releases[0].name }}

    + + {% endif %} +

    Follow us

    + +

    Get the book

    +

    + + Python for Data Analysis + +

    + {% if releases[1:5] %} +

    Previous versions

    +
      + {% for release in releases[1:5] %} +
    • + {{ release.name }} ({{ release.published.strftime("%b %d, %Y") }})
      + changelog | + docs | + pdf | + code +
    • + {% endfor %} +
    + {% endif %} + {% if releases[5:] %} +

    + +

    +
      + {% for release in releases[5:] %} +
    • + {{ release.name }} ({{ release.published.strftime("%Y-%m-%d") }})
      + changelog | + docs | + pdf | + code +
    • + {% endfor %} +
    + {% endif %} +
    +
    +
    + +{% endblock %} diff --git a/web/pandas/static/css/pandas.css b/web/pandas/static/css/pandas.css new file mode 100644 index 00000000..b27ec6d4 --- /dev/null +++ b/web/pandas/static/css/pandas.css @@ -0,0 +1,66 @@ +body { + padding-top: 5em; + color: #444; +} +h1 { + font-size: 2.4rem; + font-weight: 700; + color: #130654; +} +h2 { + font-size: 1.45rem; + font-weight: 700; + color: black; +} +h3 { + font-size: 1.3rem; + font-weight: 600; + color: black; +} +a { + color: #130654; +} +code { + white-space: pre; +} +.blue { + color: #150458; +} +.pink { + color: #e70488; +} +.fab { + font-size: 1.2rem; + color: #666; +} +.fab:hover { + color: #130654; +} +a.navbar-brand img { + height: 3rem; +} +div.card { + margin: 0 0 .2em .2em !important; +} +div.card .card-title { + font-weight: 500; + color: #130654; +} +.book { + padding: 0 20%; +} +.bg-dark { + background-color: #130654 !important; +} +.navbar-dark .navbar-nav .nav-link { + color: rgba(255, 255, 255, .9); +} +.navbar-dark .navbar-nav .nav-link:hover { + color: white; +} +table.logo td { + text-align: center; +} +table.logo img { + height: 4rem; +} diff --git a/web/pandas/static/img/favicon.ico b/web/pandas/static/img/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..0af2443dcaa3e814d631a36bdcdb1c4d070bd6ce GIT binary patch literal 1150 zcmZQzU<5(|0R|wcz>vYhz#zuJz@P!dKp~(AL>x%r1r7`hEFtW|e?r(rwxcOz1jzx# z7}3O0xM>UwtU&$$fp{0PL@1kZ1w>rP4OtwWO=bOzo(wANp$rVn$o9uFh;s*kLkit? z1_osNgE@uGL2eG_5N;1;7xDmx$?Q-zAuW*KK;j^^=qBLO-^TV#ql5W*SS!OzadPx` zu)L_~V0r$(o#m+;vHBx81QdeVg=Pk`2?unrz0hrEeV)+C@LGyk{R}7n3j=W!5bK9< z2s;DyO$=ohQ72meng8Ytr~dy3;&$ZlCt5$q{!{;BfY^W>{UHAX?V1X7vmdegLGcMn z8=y3T9RHAXB*^ao$sai5A2~fB>qi#H=YM2%l0frB-GcX)r#*ZHW<$pXzU;yd{uLA%8 literal 0 HcmV?d00001 diff --git a/web/pandas/static/img/install/anaconda_prompt.png b/web/pandas/static/img/install/anaconda_prompt.png new file mode 100644 index 0000000000000000000000000000000000000000..7b547e4ebb02a6102ecf615ddddf576dc74ccd15 GIT binary patch literal 1373 zcmeAS@N?(olHy`uVBq!ia0y~yVA=y@|6*YVk{h`?HGq^sfKP}k10#b9bF2%uKq8Bc zoSdPsM`yJOV`zA^eCw@!9nLL_Hq6}KxA)+_-aCJPy;}PB|DUWqmG2oCSYCL#IEGZ* zdV6bswsgA0fsf$}7Q0V-C)y;v<(a<5EiOjKX*{jYYJ;_H*%M>%es_ziw=tk%35>kwN(GMdp|eD+v?`u#*FU$_YYp{-z{xd_vPMdfu$Mq z_b#zrrsVsZJFeoPYQ@9W=@0(uf2{NBy7zda`91S|w-4{yzyJB;B6M#}*47iZzoiHG zYu|el9(QBjrhq{Cd;aR`+V}YT%Rg`1eDB-q%6#+p`IW^J4@JMQ6TDaVpYPtI@4sG0 zZkZkaPf2jk`TUB0`xiI;*Y(BzOo?CZF)zkGEB%?n!@0HQU+?_!);D?Ib9ZND>BO|7 zfuYM&F7`dFTd=1xf8x2t@Tj8+eN|-Mv!uJPv~0IE?>M%*B6eTVp4TFG)2qH7e;)67sCwDgi4T8n z4X?YjcI(e2^KTw?GXlEo*wa2woyS@FRCE7~#}U=`)1)_Z@9v!X(z`i*<<9)r z>%qS_)?bOU==Z*Nr+Sw7<=<;}&OLee^q%dHMT+MqKi_a?#@zTnOM}yo_I;c8G3EYm zbM4BnckiG6aqSy_Wv2cf7X3XRg!X(m(bpAUZ=Zag;xDg+Hz52wSc$R1xzm+dkTEnleepdRpSGcyqyz(#?-~Ktr z9&ApqJiBjut@0s^;@`;qp1b);qoiDsgw-|5aG> za!=^p!$*9!%eVOApd{e)Hh;}t2VQU%U3-wI;wsAwEyAk z{_lI;3N~AaymyZNz{dLC(dgqGm9j}HoZBi=CFXXP&)!~WAr@D?r~dW%gmujCuYIXA zo-#YUHsd&Of|dK8s4n5iKccB*XkbB z-dsFkYxuI8?@pevY47t%UomHXx2a0Z^Qo_Ie9zokETOvbXF&aZshP13?%#Gj{cv*M z^-rhHRI$E4xZBOU&pbKHZ_bU!Ag_2<&izrm_Uro(`&P)9)W2>`QV&>jvMj#({4=q8 zSGR9lD?T^&ey#P#uI&@rzW=K)z7e)&pP}x4W$yK#&ozHP4~khx)FGl15#bOqq*w+f s0<2;%VWh+YR|*qDw`P!8z2z1E3ib2V9y4c40?SJVPgg&ebxsLQ0Dqmv%>V!Z literal 0 HcmV?d00001 diff --git a/web/pandas/static/img/install/jupyterlab_home.png b/web/pandas/static/img/install/jupyterlab_home.png new file mode 100644 index 0000000000000000000000000000000000000000..c62d33a5e0fc605be6d66c4a7be9f31d9baee8bc GIT binary patch literal 1962 zcmY*aX;f2576yr|C!ium34ulk$V5mBVvYnv8bsrQ2tvTfbBO~AhNmd-LK+t~8(G|w zZZ^fGl{JG9BJfZk5=0F+G65Sqk0dNKexVHcd4p->#NEQ4cUq{ zw>38~Fu>Bb1%(+HEQ0lT4r8R3b>_Ki^uza|JNIs}x5pYd-DUe*1k z%vIBQ4hKMHFgp94I-?}2QIV}z^#UoA*VyC<4x1rALk{=C_+h-kpRThYZ*=bL*pC2c zia?N%+<|ve|K3q^nbJp&cH_ihceofoADYf%M)eH1z8@3MihnH3f8@eW< zI8th{1-mOEK!iWZZ0v3M`{+nuXG^SKINXN@JCMTSt5KJHtqW4uIG0EQz{8;$AMtna zo(sy=f$O&VpdA)u_?x&|Yov$E^atdf?%tu4X;;vE%QwE_D2QL|=f72=V~0GO*QSrb z01)RF?K*hxSNMNEUsHVbK>*c)sw?Lch98SKY>Izo11Zw0ApebzkPLc08>#>xEu{6) zuR80@=N5udEhtw~d3^ktVjYGMa1=0R6VTzkUDm<-JEIizl*G{s#FVff5 z_34XD2pZal+{a^o%iQe2scBivlFxDga&*ML^`I#vMCDb)0W>0wPeU! z(>R?oqDA9JY6Q!{562YfpfQL+epM1=*>op$$PUfE2{e9`^*5yZ7G-&)Tq>IP93g!4 zv&$#Gl8CBm(k4tcDz>H=H(PyR>ysIGawMen&bw#ghp~C7tu!3K!q;a?68)^?(w9`F zHxATULEhNTN^Rkv8}!OKundRzj&?vRUat>)htLrpF!Po&NlPbqQ|t-`NHU&|1~R<6>0Y#?Z3(7T^&; zv%x$Yo1=UL35GEL;q(8P{RM$eeBL_?ax5N!`>gmb$f1Gh1Kc7AXJ-NIf)UgaIH{0| zIsMZ;wz*_aZffGICt)`uM&6*~@s|CM65PA7+X ztF0$F3aRHg!Ew>75q|~Hl|$JCq;8fM)TJ#(D4o=6wl)0o^%CMu+i)`PA{wlaY!69} z9XPH?C)&*D=7sVCF+(t@>35|BsU(y+O z`c^{n4}TsIRh>^A#M9#@fyn2`BmdC|ckefBH*0`EH3@%MYcq{Jj|qo0mG4RO6|d}0 z@8u5}+qGapbcq@A2YlnLqNhB`|EMK8KAJ2Zhjq#^GRshXswmR~5ZspRbVCj@SD=xi z3M}$RoMiz1r~_0SPBjL<7q;WA`CcxlII2}o;?FpwaLN>6qCKfaO?S@r8X|$z`mU*% zk*ZM5*u)}rc>zd)DVI!PR*`x*5fbI8fxcS`zuCbRI{)<@+`FpVJo?41ZB~`yPDUUl y1er|d?4wgb;X(igiMl+x-r3`?cVQ@0XH=pqxn;r(->m<94QN|Jf@*&_nDK8H{P*er literal 0 HcmV?d00001 diff --git a/web/pandas/static/img/install/pandas_import_and_version.png b/web/pandas/static/img/install/pandas_import_and_version.png new file mode 100644 index 0000000000000000000000000000000000000000..64c1303ac495ccf72a7c649401cce26c47c15ace GIT binary patch literal 2252 zcmZ{mc|6Wpy{dDhjCxN;DWyRAKT3#6!c$7O199%SvO7=DJRb0p64C6$g|JF zTAw<-^m@N`3KWT@3E`Bzyd#cqWCog8sr(66Dz64>{Nn8&KY_ihRZ97es|LP`*jD=h zEA8-nu=H%uPN9!@DZ)c!iAx_H%kPz3VwxTGZa}dW^iYqWb9BWL!1a*TciL)^!y#3fsKrS2sQC_&=q+eG< z(9?22qGyJ}=aJC{Crg9O7l;h^oa&wdn^4N)rkwYInz<{Tbd1Z3tP_AKm`uf%R+|w^ zoz$xhN({Qy06RzSMthB@ItW~}0$$x6N@D^t%NxT%Ojpd@vWucL2OWs&RuK2>uk|XG zMCXvS^a^Vw(P!h%Y`l@3+WW|0M1ed!h6(6sr&VZ`+^?1x6T^}6#N69XHrJ4^*nI%o zS{7&}Rk%$OyvMrL(~M0A!JQV!S9H%Ta7=; zK$aRD5Q}uHWRcGzVGj1U(~bdO5doI+ph(6 zx9liJ_c&wMNqCuZag9-=0KQ(?fy2hu*@O|Y?xHQuV=j3k#wqYWyl(Xx7_VykN!MKc z>29swK`j%`+nM-yqTO=&;KA02?nTD=-PwlcV^_~BFI4;VBeQtCnZ&`xD-AJ*2|6+G z0B4T`ujB+_8-moJ%?y_zcsp|rgqB8-Yjy|5c$m)uHPUdddMq=?#gO#V=Hx{oq zz-{9)`{H-SK)5wcNgPS76&LSuJmK0co$TXNbMkZ3%yQw-X z=YB+Iz^)-}wnj(ofUwMCRNr^(f-1+pp@WAq&7MkPUyNk>VMAuWnu%`}P8hH@<@RaA z3|K!1qZTm~rhILb4p*#0s?et2m( zGgtyMG`g0TZ|YI^%2nw z#K?lV{FT{z#CRBwftD3G6$s_n_ zP2i|nyh^k(*~M=^=b>f>Z4~>JSo`tRqVHJq(f_M5>b~B5AuMsoLn>kr(HxxAyOu1? zsf6OMzfLK3G8Fi4FmNEB7m~IzKMDcd>FrgvD&Noc;9}PyV97<1f9wQbR9YD8DJ^xy z=-Wm75<5ffMfI(t&9Oh6_*#O$Yw#_^{tCYDwfX<4_K&^)M`!*MFup6v6KhATdP%`I R`QH-(TPsJ)dW)+!{{$*Kl8*oY literal 0 HcmV?d00001 diff --git a/web/pandas/static/img/pandas.svg b/web/pandas/static/img/pandas.svg new file mode 100644 index 00000000..a7af4e4d --- /dev/null +++ b/web/pandas/static/img/pandas.svg @@ -0,0 +1 @@ +Artboard 63 \ No newline at end of file diff --git a/web/pandas/static/img/pandas_mark.svg b/web/pandas/static/img/pandas_mark.svg new file mode 100644 index 00000000..1451f57d --- /dev/null +++ b/web/pandas/static/img/pandas_mark.svg @@ -0,0 +1,111 @@ + + + + + + image/svg+xml + + + + + + + + + Artboard 61 + + + + + + + + + diff --git a/web/pandas/static/img/pandas_mark_white.svg b/web/pandas/static/img/pandas_mark_white.svg new file mode 100644 index 00000000..ae50bf54 --- /dev/null +++ b/web/pandas/static/img/pandas_mark_white.svg @@ -0,0 +1,111 @@ + + + + + + image/svg+xml + + + + + + + + + Artboard 61 copy + + + + + + + + + diff --git a/web/pandas/static/img/pandas_secondary.svg b/web/pandas/static/img/pandas_secondary.svg new file mode 100644 index 00000000..e7440484 --- /dev/null +++ b/web/pandas/static/img/pandas_secondary.svg @@ -0,0 +1 @@ +Artboard 57 \ No newline at end of file diff --git a/web/pandas/static/img/pandas_secondary_white.svg b/web/pandas/static/img/pandas_secondary_white.svg new file mode 100644 index 00000000..86bcca57 --- /dev/null +++ b/web/pandas/static/img/pandas_secondary_white.svg @@ -0,0 +1 @@ +Artboard 57 copy \ No newline at end of file diff --git a/web/pandas/static/img/pandas_white.svg b/web/pandas/static/img/pandas_white.svg new file mode 100644 index 00000000..bc7c4165 --- /dev/null +++ b/web/pandas/static/img/pandas_white.svg @@ -0,0 +1 @@ +Artboard 63 copy 2 \ No newline at end of file diff --git a/web/pandas/static/img/partners/anaconda.svg b/web/pandas/static/img/partners/anaconda.svg new file mode 100644 index 00000000..fcddf72e --- /dev/null +++ b/web/pandas/static/img/partners/anaconda.svg @@ -0,0 +1,99 @@ + +image/svg+xml \ No newline at end of file diff --git a/web/pandas/static/img/partners/numfocus.svg b/web/pandas/static/img/partners/numfocus.svg new file mode 100644 index 00000000..fcdd87b4 --- /dev/null +++ b/web/pandas/static/img/partners/numfocus.svg @@ -0,0 +1,60 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/web/pandas/static/img/partners/r_studio.svg b/web/pandas/static/img/partners/r_studio.svg new file mode 100644 index 00000000..15a1d2a3 --- /dev/null +++ b/web/pandas/static/img/partners/r_studio.svg @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/web/pandas/static/img/partners/tidelift.svg b/web/pandas/static/img/partners/tidelift.svg new file mode 100644 index 00000000..af12d684 --- /dev/null +++ b/web/pandas/static/img/partners/tidelift.svg @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + diff --git a/web/pandas/static/img/partners/two_sigma.svg b/web/pandas/static/img/partners/two_sigma.svg new file mode 100644 index 00000000..d38df127 --- /dev/null +++ b/web/pandas/static/img/partners/two_sigma.svg @@ -0,0 +1 @@ +Logo diff --git a/web/pandas/static/img/partners/ursa_labs.svg b/web/pandas/static/img/partners/ursa_labs.svg new file mode 100644 index 00000000..cacc80e3 --- /dev/null +++ b/web/pandas/static/img/partners/ursa_labs.svg @@ -0,0 +1,106 @@ + + + + +HOME 1 Copy 8 +Created with Sketch. + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web/pandas/static/img/pydata_book.gif b/web/pandas/static/img/pydata_book.gif new file mode 100644 index 0000000000000000000000000000000000000000..db05c209704a2ea3053b44e485fcc476591633d4 GIT binary patch literal 5862 zcmV45Zjh3gX ztMIwN%gxR2$I9&C;{5*mEC2ui0G0s6000F3kjY71x;X32yZ>M)hDcc|Xo?~!!Z}nd zJ#<|=grM(y@BhG{a7Zi)2?T(0$!snOg&o0RtHP=_oKYCvdcR<95)g1I4a+hBs+(eO zj5ke7SU4JoMWW%b5cqfr2MIZK0tpKOc?1VK1&@%Cl9QB`mY0}ajAly+3{Grra#y1{ zdIWn1e~fqwNOgN{4M=);0y$y1xfR zEt)+=%c3od_)KCRc8)ShDl*B3#*h9{9`!00z<~h<2nLX8g0PsvV>$+5WT$EnAtW{# zU0{SupU++XoS;;|fM}BdFRdiC((|X&r(Ot@Qxaf>gQ-iF&e3W^qQnIaoi+G6)~nfu zWz(u%%eJlCw{YKzB}+UStlSu{v&@e%I zo9pi5$P0@eXVQU!0azK}PENhLJ~gE*j}caVwR0 zT2CLm=)AJCqp#1O`f}%{b^elGe*}JZ-wL0|H(-GbhDIJR<;>UMgsb&7m4p;#$QvgL zYUp8Q_X+snh?0dk+JYpeIGz(2s_5cjETZ`0i@nJ>ql=fdc;k*$C?Fz_K=!BvQ9%}2 zgO5ZSX=DjDDmmhJP6j#M194Sp<&{`wspXbw{WYByP=zUGKIc5P(3ohZ`9+yoq^AW) za6-~goOEuKkwzYM#6u-LunC7>V5%wTpg;Jz6`)P=kSG{kj2Y4`s&LaNn*7|-5;|_; z5(PADVI*lJb2e&+HJ^SuX+J$w>cc_NXqpi>X&PD_qI=eX15S1{0;)$ypn6E4KRjBh zBSReEfTuy63YDy1{w^Ae6k`>k2M^HfGwG?_Jj)T9%w{_04%+H)P*bYfQ%_7_y4okP zV1V`NEwFkNX1BD;$xsgmA@Yp4&In>hGs`G4%O0)zTE|!cA2Kb$w)lWbAJZ}v4Iv5> zvd6hk(R*g2;R*m2G=9Xyhajx(bj>rI;$rMFgJ6>fzz2Yg>@9m(pvqVzNBc4&k_F5=Ro(??tbfWBF1`J zcMtAH&hFd(Z^!!epJ)C(w7h??6R{468_vVob$3&_(R5>+_t2Red~ED|Z+>X!T|8^= z4$j!qROz(0!}#a?^lo|UK&*~z&im(8 z=MFot&Ls>SsR@P@bUhST@KAMe;VqM^ev830( zSv(_Og;-!CAo#HF<*N~_I95*{A+wv=A`i1@nIOvblfJ3rTbwG3;!ZV%wP^wn2g8}b zRLGD%zz-}kOkvt?fy1-e3Sy=x3#roQl6rB$VLO!5qIj{vZS)EVjZ&5qZ8xjVdBJ)> zkz)Q9-4`sc5zvZ6(8&&iH3=wC;)uH&RvHPVC@MA!Ra#V(9quP7c14jRafITY?vzI{ znUE>e>Q~@Q<|`sZQBP!SgCJ5>M>r-DA^}Svtn!G2atW&qZiHS;zydI5^ns32?1m&s zNQ4AXX-{iRl@x8|LVNA1Pt7pGAG4q?j};Do;!71OwiSeT7|o7`1j6J<34}!!VU{;= zR{Ow(jW{;1i$Qo>wAi%^_8_2c*jv=b)vJTB<7q?a-ZW7~04mX>YX8IJ z{T#9fHgMBcH{IGhlu% zFa=jVBF5FJA~jIv1140T_=*n(s#mX2XEtxbh*)ak0iJ5yOLL$trS%S7IT%Y%Vx|o2 z33h3oI;&Doy0rZDkP1;{pu`Z0zN1aFEPX&}-p<7^it?@=^y}(1?r?#;Vb551NLU+5 zdcx?Pb6hJMCp03fxL?s#u08N!uLM}rypC0aJUrK5g=Y_4(TtM~8yzY*{y9WTu@;c7 zEhJ2b`_D5Hs-F zQlZpkX48E8tKYS+cicTz#8z>-u?p3_lVh*Y0EpAV+3I^*kViyEyF{%i6>m^2u-uqg zxa|@(T_M{raj$FC)vC{mAiFFeR6#h4%41}IXh>c4@C=Oh02)-XhWS3Z2Pz%dhApG9 z9-8s8W~gk!b?6tXGH7IpAR?hAE}|6~5i>!=Y*EWyWXv+^&ODrg%85e7mK`UPEmv7r zR*saHZ*pZbdt-;VIt8T09A`RW^0~BRBNqw9Sf=Je!A8(xD1C+gEk+Ijn}J5g{SZw? zdjVP-s>&mydAS9UVN6(eH1wUKj431P`O}K%*%86;Xi%HlizoKmI_1ggR=fJuu#UB? zXH9FK)SA|lDm5SL#A`g6ikQ3ZNTIt3?2)FrwaJ<^8V@Zff8b=xX{Ki*kDyI3o?1-Y z#mNA+J&tV;zyKx44+8WsPCpl_+N!p7O&GDxnyg#4EH1#68NuCg+B$+ffwv-fvJXZW z06GB(xEca}6axhO+u^>lsW=TZWQV%SRAw{DNM@5ZM|{gUxX8M9A`^;3oZ^)B#sLJr z?EpZhAS)pW8OjhGa4SI6yLvRV)4a$FoZ04SHglOLX!HJjySw5z&xt!_d4Q9foZv_o zIMS&y6@mYI90-89CeZD0o;dpz*^c$P4ZilYV}<1dr^?~ezH)9G03$NDvQ98i<(ODG+TdifnNeXfIZ4voQ1nO9 zNe+6%lfmdq_xx~zfAD~p-~a)}b29U8dBXC$`B7D~?tc#PP!&6o{s{cH&3~;th`Yv$uUXM}c(q7j?#> zeE#GU@3%*|7jiW7d;i7)<8Xi3ksU`za^BW|DHnRahjdS800&`!Np)>uAa7`}eX^kd z6BvO{7jWbNeug)2odL{ZR4;4KB$5hxP9NJbVbN=-^X+_ zcyI<_J5zEa&tY=eff5i1btz$g#m9ZyR(Kcqf}YoZG^hZL2PE(zg|guQCm@0U#%(2c zg=;s2-3NNwr*vM}e*?f3PuLnTpaLi$9h2c4-LY*fAOR}?ZWHK(+-7oXXo1C-cy@Sj zrMH1^=Yvv)97W-V-RFZWKt)PX6y1?{{zM@GXsCww*MSjedTV%uaHoA}SAj>!gksom zxv?GES8~vCi-MQ}Uo#Ndfqztia44{c+W~ME2h~L(TZ^&&+k%yAu z0N-bSLO1{^K!Z%^gTHWvMmPY)NRGy60t9)2hIopp7>^bRZu*FbrbmSQ$Br!r3}Dk5 zQTTfZ*KHB_3*&$ibf}1Mhm6a3jD3iL`SF7jxE)r=kZyQ#0LO-$*N_aEjx&jiA(@e& zAr3QWj3*Ip&|!@zv5ZFOa=p0zh>DmUD9DE>Sd2({0!ev)@d$VONR=MxktVo`x5$hL zz>}RJ4qa(-qd^cZDGm^6aODV%Z|4bX35}X4fh+KoRk#A>7;aeEZ90jK#8`?dxQ0q; zcBU7LRa6;hSbu`KZ3<8Z&k>X+pcqZqnE02ACs+b-X?i9=mnPtHb_ta!*pd5ag5H>x z*cg$`Xm-LpDRA~_BYXIJC!hkJKmo(~n?)fGrct6UDV3g(0t70b zQK-?xTwkcVP5>=9j;IKp3#yU}DTL9OlhE;%Li(M3h!R(dqSYCS(h&}t`JT^F zo)tQdRG5fi`k})Zr!R?)aCnOnc$c%uk}PSG4!M`xHjaUTh7LNT4QifzIF5yAp~Fa! z-$|h3*o)YSp#Djkqrlma5=oP;D52=-rPbM^Z25La*MuBOh?&uwzLMNP+vOjEM*(hT$An_?{-Io}{>=5{j!Z z5DrvWg|s@Hn)#G^DWQlMlcv{w6Pm1OYK+awdCgdYKA0qwF##wrmhV`TtN5DUn5M#5 z6f_!~1*(R{Sen~+bhh|{fNHAyshhzOq13sCXP}r(QW%8zbO`!y_4k0;I-nxj0-BhH zc}klWij1!rkrxSkz<8JMDQ$3X>48myohqTJe-1h+ma&+ znEt7iZ(AKEF|Hgdij1L^J8HOLtA!eoMdaDi&(Q$ ziHu0wnSS^es*9$+N|238n=J>t6Y6cl3myz0xGP$SWb18~*|1>?n)m0rjPaCQQ4}ry zfrb#evJVK6=bNavalNN2v(>nbzU!>m%7|{Oryx3ra^NE>aeqtDZQbXbPPc~HaU|Bs zkNUg6eAwxm zYu5*ycqG5zh~s*TWH7mis}PF`xHd41=GeXJ;gS)$!^=rq*1xF4F{r5spYHgZ4*-pM5v{m$6U^ zycuhJ^kJ6)mlWbqv|d~vbG(!Y8E}MozlnIp>Vc0%8E}^(oE19A{ZX@UcN_lP=7227 zxE1idw0Xd9yr;;@!r&pN7bpQjTXH8s9dqh^QL7k?Oaq@A$bI-EwYs`aw;gAf$h!d? zNU6oEil65>$&_3lMQd>NhnBQR6{(pV)>y9qr>(|wf4nRQZ`!YO*#{>e zid0*kULp`>oUuQs6;>*(?n}T0f}*A|pUe6eX#1qKOo#)`7yg*$G=QmdK$I2GBeE=pHSNBVy#9xDi_*F=x;*Q( z+-uHE`KFQzxo(QKCF+E2qNe=%#&fKs^vcv&3#X*~8QhtI0y&SBDH zi-K7VD7(0=Y`V6W$QNo#&@4*GR4s1bcyxIzjI8Ub?rWzPt+Z(hygn_{R&Xf;k*irf z143OK6#&;(S|8uri`>bqXL!E_NuUJ@AW;3;P8kEY0nc-A1(z}u;b6m~T?3Il1HpL3 zUF{hwdJ@cGaDLdW1loswO`d~DuXJm}P|c$0yshal5KwBxKSSEiLA7am*Yt7Dr0dDI zG0ah&wZ0LGLcON4%Ax~(seCwysr|O8{nHBJ&^PVbrlBL^{sA1aIi~_0)8P=J!cDbw zo0-Dd)Qz2zm|CJG+S~u!)W99s=Z)UaLDa4xuiJ6I{OzW7`liKU#Vl&Yy`9#=eH(TO zxwabvPN~~m9i(w!r-duQ6C#F~>Kjz}+eKm6zz_qQt=ln8jvvm}$UWNp-LbAp+K}zk zN;|r-U5)D70*$@gnF-Um-P|T_rMXQ7+j`URUD`K*<8+(GL*csyjvQzR*t7iE70bG5 zTI1;rr9%POs?Fi7OWvgY&%o`bQXbA1TjeP&!9~uxSiRgL&el`jqL3@!4j!o@3ez^s zuQuGeP|D>QUZ8HS8}$0TQC$LXN+hT)(rK&Fy-ni&Va|<`3LtJe=V_eU{T+CV~aSc%5$sTj}S z;hUtF+TOGt@ZSBBeR;Sj*qnL0wi2(tE8hO=oI@Y74x=0HuO-T)Uz*#Up6+-{*zzlg zmf6in&WA*P(_ZrMtC^YWONB^I^sbBS6R_!P6b+R~jYQoE|d}q;)U$K9Bg+(e{up z$6amplYfsgd*-Ea`HhSstE|gX`1qWk971XmmdehSKl+7XkE<-qsjvEik%u^r1+NeL zfFb$iP2#s<`;m_#`mL+A%==dE`$7-vqA&cc@r0xx_LqMJB>pui9@4CR&xVO2&f(Af z%p+)c728iFjt&4Y5CDd48j@|;RIxDavoDJUEWyD2z)=$HOmYG@9hBMuh;hL5mF|}5 z=g(s~8j$}S5b%te$|>|1Rh_-gco2ozGNf{h>~Y`M`C$4*zSF9AzW0A%@WnuxNGAL8-TD>Z;>X&p2F>Sd_G>+_x*pjG{8Z^ wLMlGPL`6NpMaRcLM94|XFUd;G%*ISkOc^=QP|;D+Qqxn^RMl11*3}3AI}wv&C;$Ke literal 0 HcmV?d00001 diff --git a/web/pandas/try.md b/web/pandas/try.md new file mode 100644 index 00000000..20e11975 --- /dev/null +++ b/web/pandas/try.md @@ -0,0 +1,21 @@ +# Try pandas online + +
    +
    +import pandas
    +fibonacci = pandas.Series([1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144])
    +fibonacci.sum()
    +    
    + + +
    + +## Interactive tutorials + +You can also try _pandas_ on [Binder](https://mybinder.org/) for one of the next topics: + +- Exploratory analysis of US presidents +- Preprocessing the Titanic dataset to train a machine learning model +- Forecasting the stock market + +_(links will be added soon)_ diff --git a/web/pandas_web.py b/web/pandas_web.py new file mode 100644 index 00000000..d515d8a0 --- /dev/null +++ b/web/pandas_web.py @@ -0,0 +1,286 @@ +#!/usr/bin/env python +""" +Simple static site generator for the pandas web. + +pandas_web.py takes a directory as parameter, and copies all the files into the +target directory after converting markdown files into html and rendering both +markdown and html files with a context. The context is obtained by parsing +the file ``config.yml`` in the root of the source directory. + +The file should contain: +``` +main: + template_path: + base_template: + ignore: + - + github_repo_url: + context_preprocessors: + - + markdown_extensions: + - +``` + +The rest of the items in the file will be added directly to the context. +""" +import argparse +import datetime +import importlib +import operator +import os +import shutil +import sys +import time +import typing + +import feedparser +import markdown +import jinja2 +import requests +import yaml + + +class Preprocessors: + """ + Built-in context preprocessors. + + Context preprocessors are functions that receive the context used to + render the templates, and enriches it with additional information. + + The original context is obtained by parsing ``config.yml``, and + anything else needed just be added with context preprocessors. + """ + + @staticmethod + def navbar_add_info(context): + """ + Items in the main navigation bar can be direct links, or dropdowns with + subitems. This context preprocessor adds a boolean field + ``has_subitems`` that tells which one of them every element is. It + also adds a ``slug`` field to be used as a CSS id. + """ + for i, item in enumerate(context["navbar"]): + context["navbar"][i] = dict( + item, + has_subitems=isinstance(item["target"], list), + slug=(item["name"].replace(" ", "-").lower()), + ) + return context + + @staticmethod + def blog_add_posts(context): + """ + Given the blog feed defined in the configuration yaml, this context + preprocessor fetches the posts in the feeds, and returns the relevant + information for them (sorted from newest to oldest). + """ + posts = [] + for feed_url in context["blog"]["feed"]: + feed_data = feedparser.parse(feed_url) + for entry in feed_data.entries: + published = datetime.datetime.fromtimestamp( + time.mktime(entry.published_parsed) + ) + posts.append( + { + "title": entry.title, + "author": entry.author, + "published": published, + "feed": feed_data["feed"]["title"], + "link": entry.link, + "description": entry.description, + "summary": entry.summary, + } + ) + posts.sort(key=operator.itemgetter("published"), reverse=True) + context["blog"]["posts"] = posts[: context["blog"]["num_posts"]] + return context + + @staticmethod + def maintainers_add_info(context): + """ + Given the active maintainers defined in the yaml file, it fetches + the GitHub user information for them. + """ + context["maintainers"]["people"] = [] + for user in context["maintainers"]["active"]: + resp = requests.get(f"https://api.github.com/users/{user}") + if context["ignore_io_errors"] and resp.status_code == 403: + return context + resp.raise_for_status() + context["maintainers"]["people"].append(resp.json()) + return context + + @staticmethod + def home_add_releases(context): + context["releases"] = [] + + github_repo_url = context["main"]["github_repo_url"] + resp = requests.get(f"https://api.github.com/repos/{github_repo_url}/releases") + if context["ignore_io_errors"] and resp.status_code == 403: + return context + resp.raise_for_status() + + for release in resp.json(): + if release["prerelease"]: + continue + published = datetime.datetime.strptime( + release["published_at"], "%Y-%m-%dT%H:%M:%SZ" + ) + context["releases"].append( + { + "name": release["tag_name"].lstrip("v"), + "tag": release["tag_name"], + "published": published, + "url": ( + release["assets"][0]["browser_download_url"] + if release["assets"] + else "" + ), + } + ) + return context + + +def get_callable(obj_as_str: str) -> object: + """ + Get a Python object from its string representation. + + For example, for ``sys.stdout.write`` would import the module ``sys`` + and return the ``write`` function. + """ + components = obj_as_str.split(".") + attrs = [] + while components: + try: + obj = importlib.import_module(".".join(components)) + except ImportError: + attrs.insert(0, components.pop()) + else: + break + + if not obj: + raise ImportError(f'Could not import "{obj_as_str}"') + + for attr in attrs: + obj = getattr(obj, attr) + + return obj + + +def get_context(config_fname: str, ignore_io_errors: bool, **kwargs): + """ + Load the config yaml as the base context, and enrich it with the + information added by the context preprocessors defined in the file. + """ + with open(config_fname) as f: + context = yaml.safe_load(f) + + context["ignore_io_errors"] = ignore_io_errors + context.update(kwargs) + + preprocessors = ( + get_callable(context_prep) + for context_prep in context["main"]["context_preprocessors"] + ) + for preprocessor in preprocessors: + context = preprocessor(context) + msg = f"{preprocessor.__name__} is missing the return statement" + assert context is not None, msg + + return context + + +def get_source_files(source_path: str) -> typing.Generator[str, None, None]: + """ + Generate the list of files present in the source directory. + """ + for root, dirs, fnames in os.walk(source_path): + root = os.path.relpath(root, source_path) + for fname in fnames: + yield os.path.join(root, fname) + + +def extend_base_template(content: str, base_template: str) -> str: + """ + Wrap document to extend the base template, before it is rendered with + Jinja2. + """ + result = '{% extends "' + base_template + '" %}' + result += "{% block body %}" + result += content + result += "{% endblock %}" + return result + + +def main( + source_path: str, target_path: str, base_url: str, ignore_io_errors: bool +) -> int: + """ + Copy every file in the source directory to the target directory. + + For ``.md`` and ``.html`` files, render them with the context + before copyings them. ``.md`` files are transformed to HTML. + """ + config_fname = os.path.join(source_path, "config.yml") + + shutil.rmtree(target_path, ignore_errors=True) + os.makedirs(target_path, exist_ok=True) + + sys.stderr.write("Generating context...\n") + context = get_context(config_fname, ignore_io_errors, base_url=base_url) + sys.stderr.write("Context generated\n") + + templates_path = os.path.join(source_path, context["main"]["templates_path"]) + jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_path)) + + for fname in get_source_files(source_path): + if os.path.normpath(fname) in context["main"]["ignore"]: + continue + + sys.stderr.write(f"Processing {fname}\n") + dirname = os.path.dirname(fname) + os.makedirs(os.path.join(target_path, dirname), exist_ok=True) + + extension = os.path.splitext(fname)[-1] + if extension in (".html", ".md"): + with open(os.path.join(source_path, fname)) as f: + content = f.read() + if extension == ".md": + body = markdown.markdown( + content, extensions=context["main"]["markdown_extensions"] + ) + content = extend_base_template(body, context["main"]["base_template"]) + content = jinja_env.from_string(content).render(**context) + fname = os.path.splitext(fname)[0] + ".html" + with open(os.path.join(target_path, fname), "w") as f: + f.write(content) + else: + shutil.copy( + os.path.join(source_path, fname), os.path.join(target_path, dirname) + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Documentation builder.") + parser.add_argument( + "source_path", help="path to the source directory (must contain config.yml)" + ) + parser.add_argument( + "--target-path", default="build", help="directory where to write the output" + ) + parser.add_argument( + "--base-url", default="", help="base url where the website is served from" + ) + parser.add_argument( + "--ignore-io-errors", + action="store_true", + help="do not fail if errors happen when fetching " + "data from http sources, and those fail " + "(mostly useful to allow github quota errors " + "when running the script locally)", + ) + args = parser.parse_args() + sys.exit( + main(args.source_path, args.target_path, args.base_url, args.ignore_io_errors) + ) -- 2.30.2
  • |4C z>ul^!!8r!zo+?O`NTlwQLAbk8_Sx?&v2@|t>uf%c1_9aa-N}<{*px*d4s`5P0HunE zYbhhPY7x)6TJR={4t+c)*Y`<#iLzli#KrKc6h~)c{ols{u3}kfiu{fT4$|{2lV%_ zwFSBwl;yd)pwQ-x1JMc^1 zc8|#GQcC>ZMyNf#$igxnPRj2NB5Tgf9lzoGh%96^krEv(20qRVAZ0@7vZ1kW!rksQ z3@IgEcwMxfMKZ~J~E76 z1b!p=ILfzqhmP(~IGmAtQ%qd^FF2B*E!<+trO*Cv<5=6-H?MO|HyHJ)gunf-_o*<6 z{>z_AQDxdpK`8$*KK;wk6>iJh`mB=%lkcCuNS^)eqe_zgd;G{EQ~&>tCy@)E`H$Q6 ze@%T~%6>@7fETPO=)CyvhkyCm+68qY{r!`0tK!ghwUPp#psY{g|4X#IGj2*y3 zaY^@4921oM(azmsA&@g9DPe3(si@t>?WzeRBl+&Hy(uXNQ3gh!6tt?Nc3hJ4>MV9V(K{zH;30 zfaXMfODrU@;#Y9Yefi2|LjUHLI1Q?f5%K|008BjTN)GMMC(3_=Xf-9KL6uC$#Q%})G8U1%zM;<_c% zE+h`X-JvAnP)`;j=)M50NfS)9-3{8sYj_gNsubY-=em$Qqm+O@TYYwSt+8i9ieyed-Y`?1!Ec)+zvZ75v0)jLbx|WKrdcV7d z;n+Ew32eis{n>Q51 z%ZlYy0^(P~_9z*@&Lh+9*jAezb4-z|B);K~UaPJ$>DS?vdi_+1@-_PH8`Q>MQ8E6a z-sZzESwqr(7&|mt97VugK`= zf^`dz(t8^tfMsi4+!Au*;ic;i-c91UvqJlhg5B_?v0u@xJk^Ppn8&sS9eO?rpGXrr z;rsVg!g{DrKhapbv0LJ1l4sQ|u92g!lfL}-)ozUz{c55`{=abq9(Q7X$DR4hjQoCH9%*Z~$~L{Gh;{3WYo;X2xFQ zWSEniOBb(AKHg&ABe{sJHM|t#eQX=A85XlxRvD-Irk;Sxm4FZ%9+V2-_2pX|8XJ>N zs)XpKgf)ma2p03zDk1w^y*zN=u&$=3T{=<4?D+cck)IymHLfuw2mkEG_>DGTY;8t zob+YW+j6qvvX$9FleOOJUP>nGJM)b$A(6q^A`RKP=}KVnF`K!N3rR3>2(?X2;ubYP zJI?6W9lYOSI7*`?hji;QaD8R)rsSLU4pmCI*TB`N>)q7eYo0&u=StA~wAjHK;@SAI z@L2Fv_?5s~oMo4pW0G9B8uM1~fFUH!m7|M2sOfYdfOu6k?IPs!a#5lS+w0Z20O&m9GwcN8~+@*bW<(^29*=V)LsCK}jiay>+?I$RBB z=pxN@Yb~(Ko25>xDiG0*8ZkLo{&17+5t<$h*mbB%9=$DcMrX$-iczVMn^YNJ;Br3i zX=*P5X<(pLQP;1pk69?&X-yrnG~6R())vp2o}tB8KUWt5l|$yut{WOc!ouV-A^oqM z4%GO~2|UoqMvN?(zOLr8Uh*xs*mUxD?6A~0cx`%w4Ew%U(XC`!DJuQ3w||gz+Ylm1 zXCZSb(WK|P92Owmn=X>8&~$K9_09+ws6 ztdN^c&0Izl$g&Cea&TUaJHNv_6lwKoS>o;f{F`)1|9=C^2_vTKdMH6ZlV5P#ju$lTwvZwl-gC;I zvTY{-I(AEfta}k%onNQIzG5gCg1^ELp>Jn&^}rwRULVMp>85+39y*v3e+SKRt!6IT zjBZEq+ph?}1U$_RTl<8yQjQ!$x_e1EqLa-Hoc`7;IzayOnLG@AQr)+QD5Sqg=tM%) z#{t9?HLoaZHG5X=IT?fKr~zYo$ea7pPv6R=kyFYe$Jh8dUdcZ}1X&>nA*r^2xqaIX zhln0*#h7ilvh{=RtI3k+FYir^lv{Zf;@WfCG3`l-L*<6trSB$9@Cu+?4{Flmog@ky zTI3-kl*!lwzH~=DMuT#6$NN{H4BRD~*e|N4PiVN7g1Q{8m(p5mDHbL0G~#Vd4E5T+ zSsJQqwAFe=sfA8T@9Id$2ZNW@(T~S_CI`FgeXaWo12??fIvON(PA(n;WU0Sh1u71r z->7)i4zJ$*dOGB6=M_I~_+Ef6crP&+l}>t7eog-K<=rcZWqUBli0NYh`s_94e%bT{ z#{0tM*j;a`Y1iXKb2i`B`J5w)n+pL^pmMzlJ~rSp_&D2}E9F6L^7vzRX|{5|<(P}_ z7k(AuS7nF9xJpmosuIwfZ zoR(S=DZL7g;;GDuwnE>d2j^|Ie;P85u@ao@;j%hum7sue7+kJh3K-H|+=rYa21_H) zE7K$9XJ4Cb2PPX`tKO8JeqQA-qx&ETdc}CfFr%|7=50<>P{2uM0@I)1EPj9W;ssiA zI{0Sf(yE9a)GyQj!TRW3go7LpL+lqz0!3g94Wm?mT?IPwOc&o15FZ{JYpIxf1&R=F zn^mr@@3Krmk4dS#8i=+5NMLs#a)X+77CRCs==7f15sH?^@q$r*tJ$3Q=>?B%O~Bc3 ze$&b8#;;DsUK++;9trV6KWu;YPZ?}pT3^0MYZg5J7Wl|yZU5-{)H)6_r^W2Jj!?wUHqcKz($R2eZk#eP0?`fGh%D4n01dhTLWim4I#d;8sd?;Yx7>#+tM4h`eYBJT_Lq&b{soCnv|g7aRA>oTQRJ3;coA~?oHKHi7MIjo?sgFxre<|;S~V)w zfif=p;{T0=Z~)5p)EyVLH0g>rv9UItq*)8M;$!pdR?Mm)%2kD-S=-CE*fFZTf_(ua zSTtixZ8ppe?hWV zuWRl8k&UBoC+A-vboch)^#iWYsnGYXBdoy`*(M(Hp^p!Q&s*KkS<0ibEbpcBFbW?s z5t_%XKg_4^hgZHQ)aW0pJQac{>2xSukV8b~4~Z4r;-gH4!>|lqPWI>@kvc1;h~KS< zTo)GR8cfk^dDS|2nM@d%sTR<(r3=M0$9W~nb6$JU4g;&AsAJuTQ!#3xpCh-#FZkGC zNJ*Z%?)2`4loh6wR%MslGmde$%}aY%obD%_+a5EX$GN{E@i#O2z2@j+^}P_|ZX16I z8{6s#loZbELm#!k9x)4jm`P23#K@d(Vs|lCGzA^XN13L@_LP6@yRX7N2oGarZ+eOk zr9>TQi^z!xd0)?TeCYae={jpT+S9+#Y=qbc#Ejm3Bdto z7~@k}-6ea-etyI;U;hJs*9JXSghs~4{X_zGW=@|rf82l@>n8VRe{>o{^>!)7AIiN! zi)PjY5ofP+#C;@l)z;nBQQA5x9(MLO<*@@l(l6z-ooY2VI})`%7a*acd5bpKHIc5o zt7l(cPIN)bn3BY_261NOS`@dznoNICueDd#3~Lru0T>AG0*(ZJtQRk`cf1uZdt}$S z*ATF~`QF9*{ip4s`E`6M=^Q$CSJg_lgGq0S6k58`LNS631c2af-xvC`c2@4f8cdYxLu5n z4KWdH#D0!}Lk)8Tb0r)9(sv~&_Lko~Q!W`Ot+TDqEw-OSM%nkKGN}XC2HY;XlY^uh zfPUO3oqwSx9)3QYF7&Z`f|zl>+8Z8tq1_w2eFK4vS*|U(iTUFep!KilLFe;!cSf(@sn;eq6B?vYA1*avPi@rdd5}X?jQVn9~*ZGr; zh0b_NC7c;%7j<~M(SGM5Up1IMuVmlX?W(JSVb+eV4t}e zI@Z7(z3w6&Vai15zxpCFYWEjQI7Vc#{3$Bg57l^woeu>5w}|YCt5Ra(ng=45?0)RH zqOEnW@T?_h%5bqX9T;0);dRY(a-G)Nu zPDgh=C(NMhbxEYObRYkn$6tvh@@MeujQEkUGwuHq#0p2au?=?}A5qcK$LJ1YU0#f# zIKW^{hPCy2t&K~XYwb+CvPsf$bF=cn?_`P$TXU8hOXihnAQ~f4}=eQ3qQfKh$bz&W` zkA%V1ia)i%nl{QrPm9$>oU(Ir$=Jrx)buGHF3?LI5JIROg7j{iA*7(XET!c2Enz4B zMuMNRtkrC9ck}yZ+}aOvN9R6Cd(y#-MK3(wIbR+EooCs84w%By-mo7SCerv92RlxE z2c6c^)fIl+H^~9$XfPJZ@aMocQU2IfS>(c(7P16S+$%T!N~Hk7X!p4nHV{5&2r?R#}=+hb%Scq zwa^(9KAcSd4=A)#k55zr8chB3Fz&b}^e3lM3-K~rXSsf<)&r=#yiV@$6?AH`b(gI6 zh>p0wZtlCR<{JnwKh@4_r(8G}$n~OyNv6CCH#XE~I6bJEmCoPVeU+IUcX%Gr?C2EE z;Jmh!m^>&xDaQK8s$d)cT2dyLdXtiW{c?2;1P&&};2{V{SlRHPaNVOWQ25$I1Pn4< ze)*0>;SzjJX38|c0A-UjyFXmK2wAe3cDw*ME z)(Mt0QCpVE3U9&sg^llWT}p^iEs8*i(Fy(4aTB3~nQl^H>CLkT>qg{a8?9lSK46(1 zf^`!M0lhAu8oVWsPwJRgiR;GxP>t3ME1UTPv16LqL45s=k@ z{$d5_&Xu7r7LSdWJbpBlQ#MTAqO%+-_$w`!Ry@D_%XHJf_5B~ds1)dK>0kz8#pYFo(jDo z^>Z~}FR|hOCUMmKJebR^{85?^T93&s4?)^#lwA?0_>c3-YtCWl?tddtc>ZvLiK z3K5ZeNp-@rv0Q!et!i7U#l50sB=#g$CgH?N6iq`d&VrA&x7ti}TrMG=#G{aVGga6@bWG>7gn#6Jt8Y+&@tv z=?n54CBtKUe1^H6ZfMcVWO*{0dKyQ!x+urCvhwHIT_R zI#R}{$}JZi_NHMwn_Is)A0kx#!L`78=@@BwyaKwTKQne|(8_H~2)RP&z96rSIFP=g0-3XYV3wAiuJJvFgV=}AoW>-(c9#*4&I&=&%qY@XyY^}tf!LFeNmXoZtxi7HUO}OCVDrJ=zvfPku z)@8-Z=kh`z3If6JIp#`2ey!XW1|Iq;E{}eGRD7lhc=IJ+pdiD>xfSK1Gl@Ue_gEoc z8uUX(pXWX1tBz+z-{N?Y06F7K$65+gnh2@kpoytvrAe2y9QMU;Pa5Q$Mh2B>t<_!0 zNc%3_rs((G>a%!y!Oc%DDkAYi_H%dC+a2*+dn`;IM^i^W*FLiva1-xCg|-P*zxt?F z`h#@c>&d#`1O7MRT_rHPDIX&_~1)YeubC`#2E1jmq{JZ!U6 zrt_WGSpDkg%j{MYg37q5L{xZOMiqGlL)GpvR#giveL5<$Jn}_cT4px;Lv~_)b|&?2 z%L(|Cq+c0BxO=8Bpt62TrqSB zPu?@~+YRtr_Adwc&Us|;M^>CNr3)7Ej(uh8<4QY(f=Eo(1Ad9+*qz--MZi0+c_w7UBvL+PH}SjA^Z0_oKmYiW z7~C&-`0*`%6@4YGgq8ap>b^+~N{X3SM$78rROfZYmG}p{G)~-!!WWZIAHCb#P`eo0xHtrc02rT0hZ0EO0h>KKx zKCiW!e@jK5<=dP>h;or|i#C?2U9{|TU!l31i@ZD7bu1KI`x0=~xP`Z8)nRvpOxpuH zS%cIO+p7q<67h%VG(c;?*#4cwlbsYh5S#f@J}Lu&$xvKyPKj@B!fn|Q zIn5ACKZ1A_yaPvN3Ym78Co1?f{m+AI2$lqsEpUf8x`sWm#OCneK+Vl5Io%Xt-;x~W zFV{U00N3se=dHchO{O^a6y53y29Rw>^6c_Jm6egVzKk#9nzQuAXWc6z14?>avM4pt zpLe&^P{+9*Zm^12mET4aFgc;y#k}gMkK;#N5~yiNf~}1BXf)v$|+nUB5|qv#>An-j3uIptlQrd^Y0j)|cc-Giwt_Hrl%e{;=&+e$c*~CuH4|==i)Y zwde5wxlE|l8{rQ5fT|4K>g@-8Ka;8l7QVxs+a3{oL0oOqF44H00iki*Xa$5~uW1{| z2fO@5SWag`E=QM4y{v`K>R}E=?Mb=g2NHrNzosK#huP~MfTvY`6fo}NoXLi|+HP!D zM933}ayDtlkk_L%aspcV(C5CD$&9DR&Q?+_Yip#8X6xJS&sFM~6_J+AQIxO(#Z}$!DBuq1vU#cYn3Ro{dl8BZz!8 z?f*o@dA0q=DnHqvA|YoaM0JjSA>tp500*|=ekC5(yo zsH#s42FX!t-JP=jb-yCHPj%#l&G%mDVjm+{zH}+V`bP#q^6Y!-&NJ5>vHvF$7V;`3 ztgP7X+dHd$1Kn3jD*(tR>ubec_f*60u&O9FhHcMJPA`y6Z}!`y=_0f2_>j#;f6g$^ zwxFH2BadO6z2xFvMnCS4Z_gpEaIHLZAWiss>{`jpH&oc z@al0q)rnH#bHjd3Xps=Wb+I$tNCU<#*Jv`A?2wDm3V9o&?&BaU8WXciyCB9#XZIpg zAI^~GnqbDtkOP#{{iXr&Uv_Ma7}+MIT>83HcJPlnI*UJ+x_{ZT<&yig$ICFW_`xzl zg>MlKX#3*deJdVrVJH$U6c=|Z=XWt@&hl6k z+Re^haae#W*o4PcA& zS!LHl=3nZXP|eNr*&PoJ6N>oddVl955=;vkiJN>g4BK2TzhC8L8#;q5aFvQnNFa+v z?4Exi*xXu*&*fY|O)cp-yKMYCKZ9bB_g1Auwr<;_hme)(VHJ+2E|yf1_!T zJS9fEhL94nNO66ySt}mD8_fPb%@czc`RNYN?DzT7b)$VAL;b^2f|1$3z;fb#ZHDqy zv7OH8!ttRZKOb*HnXy6w&o#za^}f&7!vNl02V$rd_ty+7FtbTW?`$3k zE4|j(bg#l6J4>~vzTX{##_Yl0=6X~OR+CWcw&}ZsCb3%Y&hqO(GNH9^5p%r*sD7hd z^`NZj#I_Pim@i!K{Zl9-X(F*~df)H#0-Vt+@|LiJGPdGZcy!=J5&eRzf%D+m5Ij>r zBQlikN8R?s^^4SF1IEnxlDH(F3{7R3Ufa{4*|^ACoRnODE7%=Hftzowmt#@)UUpTg zsySy%l|Eq;_vgQ{Id{TB+u20wmYjA6aSpW%~IX<{QQX2_ptK^mEihv7Bo4!}Y=f)KM>0eSN zj@b#S^xA-3N^l|IIkz-=*O`&S?i{M;7kz}2!9?@--rt9sAfCKhn~V2$T1)Vb%oLC+ zz|f{>r;e%ayn@tdm~nk!pEEeal5419ajL_IW&RTd+8iQ-su0djj(uZI94BP6#klgu z0ddh$EOgJs;1)P}Tab#3UXf_$lw$I0{lOL? zd7n%war{hP|E}Yb`f=|tat2I6yW2jJc=4W?xaDN|nwN)`fOf#_!UwCAS3G=lpUmVF zUhmK*nztYOLGGee^?5tsyW3PAQEIcGwt)MVy}-Bbr5%~EzxhuT*F0MySM`Cmq)!Ng zO}bicjP@~uo_EV|6pU!|k>SC;#0w)q?k|w&j$mUeb&T)>E7quvapH$W{6LLUko2}X zFb_>Cz=>&PHJa<|A_rl+$o5VZy4!A__` zF*|W6qjQ`jg)WdvXswqYlv-{qxKOL-@dK{$S_m#E82SgZBB813c<&{wx-}@g<@d*q zg)_X@97aq83?94uCR|@xzDFFYh98#D-Bs0>GYqjd*mAdcJ1eO2Se@sPSwZ>E2{LA> zT_N!>Iq2?bnHN%&7jKPZW#2@Nqe)Fld8JW*{vQk*qr=mehgf*xTyPMlciiCRy2hCD zK2CV0JdNr(IKKg7GCDL5m@NOYbUoRwMJ>z1`I^y;=)Njfe&_-BJu$rJdZ5vSs`n6; ztGf`6P*T{|9v{Q@OUat<%(ihzVN(v35VD`~2W}YxPE~=>#eIylwM3VXLcg|uGm~3R zznMu|{H*P$)23l9+#o@gY(v={c}z1=;9tb6H5nttKpwXPo_5ewY5uK)0*roHmThkF zONAM{KF2872Q%4AO0I`W$OE8?RV>D+GJYQfDWYeEBBR5*t74}kh%0g)V0~K;$)7fQzX5&g5)IwP9l7D|GzoAXd*r2#(UqEO>kRKH6 zJR&D(vscUch?D4@ZHn)TM64zjTyCh~&zpGl(z)t^@bDqPwh;4|uNDf59o?&NpS-uz zCHE99*7Th+2E4{Y8Bkl2lz}7aC(VP|uKthr{G5|EUynBfT1A8Adiw@%)I85yJUz1x z0j^Q!=rFde5gDm+2AgyWxxS=UV!)O2zG_r;Me~M1L+s(ETm^uE{O5EW%KSk4a~7y@ zO=3??BgTWakI8`adyl|UVy$p}M}>N~8OM3=>qGeZ?cIVQhBk3LUbkGD>z$f^n;dgp zhHt2ce-rSL%pBESwF10KwV@czZS|?}{cj2MKNJuU;Tfhw4)usHEs?UZYNkWaa;um$ z^VJ<(Y%@(wxTjMkJVX+8r0s_>9(*h0d7!U%{^k6Xj&a-O<7@`ME6$>yP*>CEw>g{9 zum?eV)SauFB_dvoJc(F12wruy;Cgyt?*Zo%z3&$EBjO7U-KU7f<>fKF@8&yp4qbhh z{7=(Lyr}p$7B>Yym!9dk_YDqY-$eKk@`VHJM*afF_O(7}D47O8b3@!kZSM{2g&!)l zSNyp)t+=P5PW^b7b}!O-{Z%>c4k5VfFMplmvqX| z1rBq(k@;Grc}F}rxR&4qC}HbB&tL#jb7(-Gg3CHm|8KM{;sChsq-&du~*;lT`dtO>bD1>f}`)%-lUlEx4kZPijbc3rlRM$OtSL(8ZTXQD4 zMNUYH`HekT`sM7vyM*Xj!hiu-XIJF8SZ?z(dAfNo_Clm&`FjS`n#GUf&K=EGIo=nz z+G$`!riYM=`TLjSNcjtIYk!G2qD7M>s%`cT4}O3>%st`%QD_UeyaTCOcfHaA2bkg6 z)W{3nNZ5d93CiUw>1W4WYFaB{sbn=+$sa2^2mYj!h@U*YSF*D-G?D5l+yP1ueR0^> zMGs^5%AowcF8-%4qn*Oewwk=KHm$&-_@m5^b|H^I>*?$F8%CqVhe|AN7ZGG4qBmRM;YqDMc4 z8?+8u;0I-mM7knJFEq%xa}U0`7q|KtC2id$NT%ZyM!J4I;xO2kJyhb;YB92II*~G_ z4_1|ht@5LY|iTv zdzSgpw@yAXF=XC;!q@Z;-e#p8z(egQWYR?`2(e|m(9l`-dnQrR?!rjXQ zrn2QUuNU5FL9kbc2BxVZcO;$@EiZ`;yZ{gU=D6oiX$g<`8OFoVsck4( z>by`7?Vua)koSfIk{d((mJV;rzV99jfak@Cf7wMxbE2xy`ci^@+K?WyAA)-=1vZ{_ zO|T2!NDsqM z(8OQXs%cH!NBT*%Xb^AB`G#!=`aSK}y z^?mZN0XE_N#e}4)Dh^n6Q6!w0qh7Bo*e)z7hfKF;n_lMsM=rdSjaMs?MR5|yNiwJM z7H_m1IuS^oJ<=-Z>?-I~P9`lWEcGrxZ)3kFK2jKHX)fv-TIqUVRM4cI(~(X>s3>mx zDKM8W%pre1QWPd3zuJYC1h6ZAciGkKIU%Y|tT^@#D9*s1GMJ@2X0wL0LrTTnC2Pm; z)cap-|Mm6t*EcKiIRDiq|F=wm0$!%j?%Yhx_-}mvTj2TxUP>W(@Lw(0->@5A*6{0> zT)4L#y(ClU0!1{Nnw|!fQ&0tl z$Il*~H}49W3~WUHvCmqPefn}2t9}^vGW#OK)7+hxm2J@$TQwuvGcz9@`kS3)azkp~ zITE+G!`30_>E(q3?I~NWybyC~I|{kk`zl)(#Qt51>%l>pI;F_+^Uk#TenJf~OC4DU z$IR5%(ONdq(rTL-ZZ@!8DmZ;intZYC?5uHU2!kzBm07GW7KCY|Zel)*L}7Z;3UprK z&}_5f*Z*9dJ!39i1%pL{3M#jx>30;>dR3Ef6;uOh?>Wu=Sola?3Y<0|bV>Qy|4s?7{`WnLPl9p%(+l9A zr8oBFZ>Z*~aZ%00o>pn|1W|xt1_;K^Zj{eM*xb0wMu~Q#h|?Ya+4>bA=9Fmrcty~I z1#mq_18U#U&v|;`o^WI|S<|)1Sh0vOwK)6(X8lLzr$XRhcCwo%iRFCiP#nOJz9{16 z;u#lfy={vmuUN0WpRpNYtMlNMykyZ$h=tqf-^6bOXdXPd$EqqZNgIl&GfVmMkSS zJkUHofw5C<*h_d(dxD>|-=(J-JYsW%~$rE0bb6W8V@8nfP+b>H&l z-6BTL-ORopoG~gu=;y_amYrMiNGU5bv%s3{g4u;6m3f5bufN9Ej%sVFy0GAac z+ar7Q-p~r)vv->d35n8tx51g`zqJX+*a)*t(c&3o-=8JZkm9d09^7`5&D2q11QTC= zS&p?PLKwY{=n%0P`%$~f8*yhktZ6w@gU)@==Qu~yhW`n3@rAMK3092|5Oh|$R00jI zf6Xnw>vWPuMaJR4ITopXef2nW`T5+5=#U}v{4XHNHbD46=azl2w@EcRc+2j_==$Wj z7T>-Z{0)eY@?8KfS#tlZ&HohM>=-rC2dhu3+n{6Ah0t{P_uHpE?92f37>Gc#F$e{7 zq*!L-e@KX%`D&Djj$$dAgY z=;IMoB27r*F^*W>NOzuccnKY;BPP}A*I5ipCQ5jQJk!+))noKU}Xpz*lwbc-Ydx|FeUBqNGt`+}5|6z!5&68R4w-L=2SuN?bkP!{}VxlKxV!FGuxY z`l|nV%BP~g=xzm>zvynAAdRX#qW@cv`0c|Quh^$ub;aviYU*2B)9xo!I;XOsT#!&V z#S?fli&H}*GFr85rEeG_>$qeIh>O+->Ir_$`%wKJZ3h*tDzdW1Y@$wrOA#IRA+T&PPC)us$`Xc8*^jRj8uOC+r0 zax-V8i_j>~m{C_l>Sc?h*ZP|Q!OX=sFH@HOqGNaaAmJ{!8T^2V%W>Zrzxw&L_Q5Gk zw(WU^zXY_jEq1YUdC@nG0%Ccsk#TBeZ&X0B2BVtZ^rgfI5|8o4b&kNC^)A|>uBup< z?o*xcz9K(I?yo^M5$FpVs?2wgG0@pqk)zj-tuJ!}mHjbQnO#ufN|7BBEzQP_Tm`;N zkIl%`W|SXrnzlhLLDy|DDq*Wl-b3b&O<3@N#tU;`NXkVBi`nS9raqu-Z-`tb80(QV}(rAk8lx=TIDU~A~^86Ow& z$OC$`Vp!WrlUkJ8|7q{r3qwsTK{`<9a2+`ph8Mrm`aikuE>#YEt zMvLeJokIaH6&tj45XI}AfCp4V+`lU*5_o<1DZ=TA(B7gqU7Fq}4 z7_7%DP>1_Ka=c3WxVyG$z?k3cTm$&hHLUMe#bISY37$`Xd8+`_H@n z?-VQ%SFDSAnoo?&inTP^dg`yf?T1g+DFzn(r{=aW>2{0L^)Dz1?0*Vv6<(@_?!FQx znDV^3*%&jsV$-!PmB=sig=XA(`e@yPafb@$s;0o8<`lUqU?u1p4;|3I+_C@9&xZ{J zzT(o)@Z`_@=DVVui6y`nx*wGaK0Wkh|1pzkZuduyd*#}-623vYNK|vYDR^y8sc8`H zTPTMH);t2!3HDIFL~_}&-@M%ly8dlh=zqiDTiT{Z z-M9qh45ZtsE{%a{A{b_4Ur4*ZgQSJhVHg z=zciezZ#!byn%31xrt-^HulnoUZpMFqjX79#`;Xij< zE*wl(wYHkF85-Y_ENfiM(789%DI2zBfBMk>)`Jy~T(h{Y4t=pX#|SeNE9;k8@nmRk zG(lFN-v3v?dJBX(#~}LfL2S7TM395oCfWiFi-wj6H`i94hTdgu`UK2YaJu=S@+Bch{h<{#$4pl3CsD$t0&MyGwo{%@^d5 zF_2nF{eGX^tQ$Himm{Zn=TFnJnSWe*u<&NuqtDVcM_V!Y^{%Zc*$TSY6FKmmxEppZ zPgiv!CpXJ?LYfYoF3EnDtQYSEw|UIkDzooJMA7SPi;T7Uo$QwAd#2tmqBj{X@{TKG z%fNySbBCAEj?;lvvu9J2r;!;diH0tqo&87BTNf!o1F@{zn3D+gw^4h+2azA5<(Y~g zYs;EAibK-*5@gqyILa|S7c7WBSxqr^cE0YseKMl-XnL;V(}pOS^WC=}Z~ocITD%G9 zWTg!bQpCLgGM0F7XGiatj~|r;D4v&j{_Gdx{DHcFGHD|V)sUkmJAM^RFaPPkuebW% zInNEZb^0y;@!O@`uA$Qn)rQ*{%DcT=qi%ByM!~1jJc249(^4H&qg`HC7`5q%2-j$r3 z?4NtUO^tivt=+;G-S+nOcqgYF@nc||UUA?f$nn}M6-~6i4PqFJ1IzaT|qDZi_N%BWH?cRN*uWo>9*(d;RtE-^ry< zG5HmLX#?OJ7E7%zuY@+*PaCHt60%`d)(Y5!<$tn8XVpAJRw-20o#Ce#)vPz$I@83z_swiHGxj3{`De-1-a3+zs-CNE&i13XHMAkG0#7P^+{28RU zmi_+1%}zw9_HQV$nteahb!WlJ^<&p@2a?@wlM>B@L z;7F(+7O0lR?h-UTk`WVWvrEB6&vN^-)rCB;0hU z;`GgPqxUPiZw4}6L^8bCo`EqMy^9}Z?##^Td|}qH#G2~(ig(tf<>i(>t_qEhyYKJU z^Y=o#3x?;%>(m^;}~c1zGfcn=A;C4YfT=?qUDY!9-{xU1!3XfVi2zqPoUA{-e1 zs{}p&7F{-A1y8Z`CJwJ(lEV`N(feN}QkL3&`O)3g&(k(+06>yi_Ep%MID5M<#wVV@jnX+bZO4XX-uO6Rfn zM%D!8x5V0?jC@GtsILFhEkrBk?JfHGtkU%p8^%c|YnI4o(~h|fg@{M;X3@wSJLd^saz>cnk!z8pyNdaQr$ z7NV}i_^Vc3yBqCdLA_?@O^>T8b*x5@B`zP3s_jWFkDEa6A3i6;G{Ti^`cGkM+FBL# z+uJ+es5mI>qBekDy)rxq+x4Rom;LMpP>K7pW_i(R%r%swk!daZVCM(s`TCe<@A!;~ zkLdGhw$z6YXOqxdU5>c>7HHV%nz%v!@QUP_Ov!Z&!FswjUAm_H_RzI>%(ZABNoYU) z{(G0EZONI+bd@e{Z#~yE+U{(QRZCjO_qNF?H&-p{>7zp0txH2*N5V557|{P@E>Txp z3bthZwLascFp6Wl&`IxP;Rn}ov|NdDGFt5EULfdQvT6%)Zq1i)n6ih$}{+r)%iQSVd;{tY_V$4GHZwR6vZ&v3Z^qo|@?f}0(@bJUKp-X49y8WpFc zMh%)k4}XVLqO5Sbj%iK!uKlx9tvk&hC^ztg!aVYj@qfVD%NKHIGJ%5Vrw%=jp!tI5 z&953Hdc7U(-Nc1uhiby)_in9R4Y53IHu$WHmyiSoD`em4Y|F80zFY7Vqy(fyDLx*L zJI}uu6ApL~Z<)kA*;-}w_Ir)*@QHJO9+B@Fe5`AF*7))@<7(!YKWieyW)E~LZShCf z!ka{H0CwD=`pH$A`d0Tcx#65ZHfu#^l3j(&mElvYfCfsg*31bv9R)~GXk z+vc*!EIWrc(f(F!nipTAFBuGqv0Q7ZRJQdY&v9)cRR@@X_(rHkz}*~?zi?stO8CH5 zPhVu;fn*z#h6Cdl__1%SSJ{Azd}@H?=}p0(f$KK!^G@9=9x$sH7T=$6*ZS+s$2M?v zrrgE=c=RK7!T}07%QW4Ixk4a-O^f*#}!Lf45*g zfes;nx*2h)*{7-r8xm+}4;iB&28Cq9MbQ91* z`u7)I_R#mt=#0u{BR%e#(!K?mVW0VJx*Ar7=@;ZLdghFpHYe)`A9ccBZq88Zj2z=0 ze#V>Fb^vszAs2D|R;9Ud`nh)^?vLbY4zc(3=6i2Q!EaakxvgQv6GycS>6p89hwWT- zuI^p@l3x)#ay+8X-m}K$G;|Cs-rjz|d^`Se0>$oPoV>TdFMu}xzJ0d)FvYF?MsZ?z zJ0$uUF+!eEF}X|QT;o+!RNT>*YRRF>x?#6cM$g=me`_juce}@YHbm-Ni&eXK>htfD zAEbMqJkhhrnE6pD%2;?2$JPE)#8vLVPu^G4-IE>~0;CADZr%J#i2c*@wF)@GXXOQ) zk?w`M>l;7I*;<2KYB5k$1_2zbK%91o61(-i{rbX9yA8Kfa9cJ_?l7$B)A{77EZ&dO zSr3iUHB!^}HYqx&o0qf3j@rqdZo;iGZvEJKl!g9WP4!Z1a>}IbGKX!5?e|T&vR?HI zxK$@*B(?c_~@j z0y?Z&a?>yG>kZM>ThQ@ilN(=Ec`pk)Y&VE?e9C7&cYHT=cZW>w-g8Zs5~)XH&mQ(G z(PrbVLn}bn2Kz4=_0wXHso%f8cza&x5v5vLC9h={=~zj{-g97_3LE7B4FOi`o9+-T zgswvFIenAUyB6enF?zj_6~sbMqqf!H8C#<-v8tqk9BTTleDVed1LB2;l|R5JX0O4A zv5d7_v&zfudLsOyT3C&(Pm40F?Jj2}Pdu0lQ$T-ZJDj-Rczd))(BdE4gf zBuoC)Xe)KC>`;Ze=dyMQ2}i=;LEib5$?e}M9cqKY6}-cS$T1lkvRW2u7M>)^-5IC` z5M##qy^)r0vqnZAEJUhhZCqyL?rU&J0#5(as`1v(eQaCTc4ww+lDr~jrPNuyOep3eNWc6Ka%tJ=lcYI z-lvsAG$L$DFO_@o<=h3|>W;G;?2prHkCRtyG^QFqZIPe)C?k`K#wR*ul^v%y{cY>G zZ6w*72?9)f$03!aQpdZZ-`+K}TIQe8;g|TAGaoX#n+fMv-`k_~a z@4G{kFoR@czXDai`d#Z1690lrk8H9Wh;<&bj0}0`h}U~6zvauC;^X0%eLHsXWM{UZ z5t9c4TUMRlD?TjsJjm}BAJ}tPc=6se9CZNwuxXR(89lk>Uwv*}`#d{a;@^Kpf7`Jn z2AwN5>@;y_F6hNwcx3{B@+aZ z-$Q);PR49~5^RALmZZYzO`rz9@3~gyXG%Z6Yb+Njl@DnHABkMVh+mC4l32Q`dh&F;CT(+hWIDHV z(Ny{#1r|+}=czx%YAs$|1h1s+Cz?t-km8y#%mRs_A2~`{V~x-X85A*<&Px2yF}-yLH_4&>&L_>PDG@fR|3Bv9z@;liyp?>xjF z8B<%F6xopG9*S%h2b7S~!RF5Jqo~(DD(URbG2}vh(K$@y96OhSoU@(2;Kx&OG%9U1 z{k5Rvfd5R(d`+Je>l;?RE-X^q%OkB9*Z3`{$|I%Rt&168J?q68yWGSa@boD4wMA|u z=rS^W4y-Lr7{g2+dgq5Em)rPZ%p>@AV~{ZJ+}I42G?b}AcV4&`fPYbGL@8+xnJ1vn zA;tLI+;UPFzt-*mXRhen&ZQ&Xw#VOM03v5G$LKaCN^IH1I}d8uSi69TGlPDfeqloI z5ac~>$)!cR?BX|fv=N67)wTH5e8Zs@<3`mmiyPZ5l{2Ooq$#s@dB)WC_O&VtZX1CP zn?EnTaOTYpd{|KN3H9IA_UCVh?X*HvtR6m|N6kruAi}~2vnf197YrnHF5B7S+x2i^ z3)-mIs8QI~Y%F$?@0ncqj5L)FIFgQ-Njn^KC^g@Ta#Us~kZCn@JtT0X$Wgc`b{eh@ z5E0b7T>VEvZ;KPb3x(~Z(2?|Zh6DvRkbdA?J{_PVA$5khd#wC<(!dGi{JGfO!wu<^ zBG*e>kW#yxMMOY$kqvT=F+V#U#F+imj-JOcNT`YYs3y`?dD4VwWG%z7%6_qDtd2rD zLY!}==d7*rV)qyl;o+2%&l7V)tlO<@tS1pu8Dj*E%Qf?~X%bkFD!GRdfJvFJ8H*yZ<;9}nR7w2g9IlDr zK^iPN50*M7e%g_qo6DG+-q;?sz{RB1NYIN>V^xf>nZ^9k5BDb1%)f1Bgt=o}B-tc= zDc)sa&Y3@>mQ!vjv~ihONX_(<7T6m$kPcL6Im-RUhX?(^sLXhZI9X(kWLMqV zfK3)G@chJ4?Ka5v=Hc}>7P~*L4}I~+PsPxyaF>iotS}7WonHSNE4|x3W!0uM?>6;e zD`IL(|8vK^O9y}C^m}a!Hz)32|IsWz-ud(@Sqp`*3t9AGk zxD;!uo_O}RTX!zEXxnL5wX~sad)~eKCDpB; z-G96F$mwIYN9#MWziB?7Q^#yem!QMP3Fm&hwSJv?bbSTw`ETzY3tTez_R{IVZ@0PuH2RlpzsUP9 z+5Y9WpIYc&A^lgkEve7<|C)qlT2byojAz(Z3#Vy*^?=fWeqA@XqHK2 zbV;g;IXT%A#bOI1Gx>!(Ebc~Zn${uMOdmG_Exp*?RN2{%TyuS*kmfC)8!?`js}-HO zvsyf!FHcg|rv^5|XK<4fEqT6(3Vh}$>p@!luWcyFJ|gMLleW~wL;G1M?V6-Mcw4_P zbF}+V$<=pwq_%QS>Ms!a?&_Y6uV$aWF&ips4~i2boF(ZUhhH_A9a#K4l)AH*l{vbz z!Y$D6V<+fW+^pB@u10h*cZcO+-+?62lv|)(nmFQMIZpi=YDbtijG}f+?H3XZIkqSI zrN;ftmZ8y<^5EvGxk7jp$$D{L$bjhry^n7*()Fhba^uxsFni}={}VEIup%yu3)Ag@ z&SraR*1xJY#z=i^9u_bEMWHJ7FOui#$m54Zsgs>O^_RJc9ZkGfbLX5=6X!hqFn4TA z<*py}^Yzt21fNp)1&6!e9$Rp|yzy0xzUZh3VJ43rv4RCj@L@qMJz|?m^~%=RRo#j- zr+H&G?)UhGT*HYs3)i0ERvMa%`iaw6#PudM!$?O|=KkS~t1U%R2zKYv)|Rg@xjS#6 zPCu3>-St{r<1Epac85B{nNs>y*_OeX>wC*tBC&oK{bbCqLnF#IxZI}&uTf>>m&BHq zxsdhfP7l$IM5m)~Vf5%sq4`SAHv4o8r(Cjo`;zM4^KSbimxtpn7{_DDa4MGH|1Amp ztlcrh4VQW-ddr{FEV{i3_6+ceUVi0O%x`OLe?!`aN4oPi`-K*qOM(|=prSlXsBv%* z7dBdW8+Kk999*t8IXk2DTLnU?6VFNoF@}G+?_$8_w|Eo%SH?YP6tKP{fU67ztMl|9XQ8&Wgzhwn?r> zMi^_zKwy`((^NX$eyMqcKNTs)_fiJtKcu7%d1<=du!f(xSR*3(MH5WTrDHZ^ z$#h~cCX|!`7qw2i(S%Zxl1d5nEWrj-Di))d4>8y5A#87O;`Dnj4Ke=P46cej6XTvD)epq8XX=f+tqNoHcDIrx`g}RFxGt~tgiHK(JXN`>&eu-!? zb6ku)96Y{5TfeM>Lhux9&uvDSfDNhj9Pubw8=@Z!S{Stii#d#Mnv8GFRSYRlRDWR0 zFL4st4!|&y-syo4_fRemif3;t(Pqi!LY&It`?cm%gU<2OU$qPonx9ovGFxoqtU$hW zBkS5BPdY>crX_dO#R_IUE*`X&w+L{@hu$W8v%Mf(7rB#LDccuo2fV@JEU=hSv%n!O z%#{MUS?d{1=7s7Pj+-r!e!wYRIuqDhpGVjKu{J@}?moz+<|u zPc0gAZCNKm_Am_7*D>NW&3sd5jipm2NSuh+tz(wQ14c(jI=U}{*M4Rw3$}AgRG{;28~Bf1?hh3;5vwH;gEtmCol^<% z2~r<{ife-bv#m>T6I^7*hV_EEen)9AZ1Z3 zJsYc^X4+q{g1@EmPN^WMmt(2H?4cO6w#&&81L`x)rXPPi5dG2z1~p^*iof+$L+BSx zS*=0CAWAWVfXck2b;wu4MOeZp<1T2Pj^U_%b5q=^tL!5z`SRFc!)qhIvLLM?P#glW z_l2~%=K;1hoEg(u#w8n}yXnW5mN#5A)VX-|hS&Oi{s219hX+C@6r?fa3Za{#Wi5*b?qWL;8IQ>EIH@O)meToLO_=*_ zC!ya~5JlzTSBP`Fn-(rAz+jkA!xjRKVqXg5r<8Q~O{xWQ;=NV7QoUjDFntd!gJ+gl zz7uR|TN?ULa%07;#u~5GF)M*iB1S_wu)3p*$o{rBw{^sbHhXXT>Pw1kXqdQ&qZ%5LXSeWVxU!*1)eRI^)=I4~|7kMbb7e<~B+a@&Y#B@%Y8acPd50EI;36S( z`@18w%j+{;0L~2OH|K*sNmNmp6|W**VK`KU0+7sPEZbGaMLdjvf#%j8M5Q&L>|;t| z%`#O)&nzy)l{i;2y7gFDv8pg{l3^kaWl^&v#0J`5Y8agXMHrkX9Z~P4FquOP(C)7v zuN)<})wAj)6Veb?DK0ypbtKHqluGF>DFs&MI9>0(qbcW#zTXfUBS^qCI2!9Bv9WlN zJo3oUo7J|p#oM9@JIMr9IZkn`Jz{SL)y}PI%EFYzd_k4<+p+6fT7^D{WXeHOM`u=erbDqcbZ$h`&O} zcyQE{V-6a_5SbzlEW$?txsIlaN=fpT>_P#2YaYMdvc9;5)8yScA45)gRA+>4jtvlU zB3PXU0vMJze%fH!NaY>?W>)YQ;HS zBDeW5L5#Ba#Q(^2Kb|gQ(bij6TPK3Bw$=%9V9*$twUAoMf5L3Y%yr%_03}Lk^>cJr zbyu!hk>*ejBS?GFY-r{UlU4+RBB79~)})_C`Fi!S>Xld!ppD5iMGi(YpJp)#xAUxKu9l<_Wp>O(V% zpg03bJcdayQ1+BFC8hzsQ(3Iwlm?dlB^TnzI;{~G4sN4_n#eN=Sv;LY)$>0U!_>`H z8Wo>ZDtzIVk}9j#X9OYJ^H`M%gRwDq=RH z4R~s(*Q0DT*MQ;57!L(l8#=2#FhYkvby%R=6{iQG>x1-Nez6B@)i<-Cu^4FUS0ly^ zH&SzxeHloe8EOgWNc|U5&k_u0yrSH|%?e6<(O@F9uet^8{~*AWsJ~T1ZH$o@`EZ=* zMTYG)6Ktng4-HYA0XVEx(Ne)uiF~isRedN(QEF;e3O#-q^(v`|0K(#&KR-`1jzcIW zlT^4QIL^4p=I7D8bG{p4Ss3!A9?&!OP6*ec2@krQ5vWyo0n#d>2_uLBwL=XvlD4eL z7)$tQLz>Ul5fy##TXiq(y;|mw>5|yF>Q~p6-PB+)8wfETsqnfAARG&9MY(`(_{*b)GRVRpCw(%&zR)#M7XmFfA+2@&nl zh?=kkLGRkDE+HDY;cA3%W_VmUk48y*R2R?m5w%G}6i@-cI4R3}HGXf^D|sV!_tm$! z_Le1Y2c%8eNA3*qyY^v!&fF(~R#_1)%8rEQ6~fvms+8xLpOWG`^3j2iAp3nPHl{X~ofQEg)EO2*xQ}*>!wf4D?6`B8K_M z9^SEc5A`Obs1Qd(N73a@$)Hq`GM^17yP$}@+7c+eb)WE(^s zLY`MOm0}OgVkmf+(yon`GSg6HLRchl8fCF?RB!lnSJ&JqRlkQ))?!AV?X|FV@~`;Mt9CNn{?FQYhCdM>PCuj z2a8Yv(us$5^*Zw8S7j_G4R!Cp-lja5_%~;+A*4B69$FArLi)kOF|#QgIL%FGe4N?ty)S4^$~1WC`34bx)w)Sql# zky{qeNJYUsAk%_tnX&URowH6pi3s<|7-j<=y6F0C(5@wqUq&)0a`rg)Q}m8N_At(F zO~0*GR1cTNSkQ%ut~yjXlb|%Ik2eA^G6D@eS8C}C9~Q@WoTszqp9BMpXZS>$izf-! z{IN&afN*vcwT80X75Ee6drkkS(Tuz_giUj!qWWEEJis#b#V&LjTAzzE=)&qP#X!1q zfCKN!cZu#KOi%$AE>ug7U-mAK2_e-7P7(|z^9)sByC~IoXhBn!P%rpmRgQXef)LV+ zHVfb-KnwgNGMx1MM0tQRh-Pv;becKnF7u54$O57Hj~8I&<^k8uFi0nnX`syp4iu$I z++Ollf*0Qswj$h`5jh=&rK~2e4&~y|6F|@lF z11)OE!_g@EZLM{1Hb8Y8He|R4WG4Xr!g4A%tDvN0s-MqLn1R$MvI4oil`&-H9^k)O zGO)0W7U1|T1-Z2eAb_~yWqB!!W!;J2-&3+JdPXNMi0Bd(9u}XtSio;agxDjY5c_5_UHsil!VCn#{f_5@8^{L}u%5>6k6P5~Or{y|pzt(&IS<2ws2FSuabe@ID zc!P1+`f($P6|}id23~5ZeHFH)h3XW!bR_hHUuv?-?0QoD+&~a7j?*+?lBs2O>+0s6 zs;U-G=Y+AguZO|j+DT4nvK zWFi94A0l%q!^2#R4MS&!FG*pD?e#IU@>ZpBas+Ey%ynWlGVas*AB$M#%m z8e}o4w_n`T4FX)#2126PI1z9-P|fzjtQIpjU#`s7w{qs(%99T+r0Ici29hvMP%8oA z=mrJX2Mi-5wu^A<^T=%VC6x=6u@`93oAQ9Ob;Cm?fN8@DKxKRWS~Rs&Dwl?*bDiS6 zQ|I)7FIWHzo|NAvPn`Y-BM5`Vr%Jo4u!a02juP>s2lm2e7#T#fnQ#VZ)ZUdgO17PJ zQwf_U9u=uup?^AF16u1ei0x%CK$`WT!yVO%R09o;k%TOFjDR&yjj>R$j4MIPq|JTS z*K22QL~BJ4oetL)@4|R9w*cN*8!H}ux>d)r=t49xkOY*4r}91ZBeKS z?%bAGk%JwHu5afNyJR%VH2g=2`nq4h!#1O-gKAfUs3n%U5e z=pf~!HO(8hB6x|+l5(<~Mc8pGZEielA_9AxUNN{E6~I&C>sw+eK|$6gc4WY_wNKyx zPXm;e8}PyzD2s>|TMGVw%a$b;YFS@vG8}~N|858Y*4FL!rq2-q>N(2#T_|>>2P8(X zNT~M$ip>WB+b8Oio2k911)N2jn?k{++6N-YFq|JV(o$a;Q#VEn1w5;2spp)DsLA^+ zlSYYzP10Dq0?|G&>sC=c5JJiLh2#2mG=%i_EB)XnAGb)McU2jKRTm1?prp@%S^-#d z+7u1ZV6lb>JPq!fOm(dqx%B}!{VopGo=AuRJVUtylD2Ol*p>oCHohg++f;(H#5M(M zU1b7}Dz?W`c@dHzm^~!3#7vqSYT>L`S^uFhoa+YfMF5;DO5NTJONd>ZU3qSee~|;D3D{9D$HT{(-ZWOX;^2@KY<&-Fm==ui zjZn`J5Dh4OsoR%Q3EdCh2SK{oU8Ub;F3}Yv4@jMt5*5)p;|eNYdOieQow3`o6Fl<( z&J{x-3V=85pVC@uz*04=_2CJtfYJXlgV$J*IWSvKt$hw*H#&4&+Bk? z2!p*WmFSU@#vl-u<5fVNEaZ-u$(fLwL@8#hK(A^c^8_r7OS5NLI!cH$+z4%t9}J}` zqN9ZjqJ4Ui)Gd%>{L8#>*DDJCU^GG9V9ebVMA@qsQhn~Lw0;B(x7$ym)2d2IW*jok_d9k;~QRXsh zZWR&I3~G*IVF0%DHInpX?9S(W_hR3$X_i6;5CXa+A+xCX-yG6EjD+q8Es}f@j<)rc zqKAp@RS|&JW~zyNi;HU7OgvQd7M3qzN33_LqB=noTy|3l5R&}HXE7d*Zr+l3dCNXG zl`(Rludj9buXxy=Cjdoe;esgFpoI&I9di}vN^%OH1_v8x3#y=#bv3CKx)s4BLN&10 zjMRKSY=$Z1KpNXnEhDUy>|kF~wm$2Z%02i+Zj<+()2|@TDO?Q+ZUVa?oPT?IlNNouaD$n`NUd z<+yA^||fD_5xoYuMpA+{j*8- z!@@XW8c|VaNnc4c{hO_j+9B2jEqP2DTFsUWlNuBOI$X2P4Qn>8UkbBCTM}QYluSDL z0&j4Bmr9jE06i#b2n43Kat#x;mx2}kqE%SQQ*gO2w7MmqN!!Z5L?)VjJ6i_#sHQ3> z&>a`;eTFyA>A9S%F=I<%?nlpB0(Pq8`9e>a;n#gq%xqQZ?a_XqH;h^I| zKBi&pu90~VJ=K&7DxyRF#{s)5*>hxKgm^Dx)Cnf=W)iFMxQbqgZ-4z)Csv2Bd%iLl zhup4BC1)@%Ec8Zb*BAhRx;H!YLAs+O3`-?32?)LK%?>Lw88384d;ErrQ0Gs~0@C>c|A;EhYN*8So= z$Ypjb7k?B;UZ6y-d5AuO6Qj&Zf+tXSGlrXHgIa?;TfWZs_Bxp^OJsF5Yn$Ca3D+*v zxBLmK{K2cwA6&Pk&^ZN@nu;Q~JES=3G2kDggcF4zS9q_A)0FEP)*f*R(c-z^=6)qYLi)aD^byo%Rv^;_SO=IoyJIdo)Uzlh ze<_S|Y~aAExP$w)jAR(Ah~S>+jK-j@X`zD>{HeB(Y9>kvVt-!Wpbsxxqwe8e6Y~rZFEbjLs$;?Svb2jO9OF{t(o|8X{94)2F?=aRp9+@ z-_VU*Tt;P0IeWYt8a`(}a}8UvVC;Q}m&f5iekr9@=VgrW?Rjy(99B=ZKS@~a4)r@E7geXpqysCq`=mtsrzq# z@Yy$mTpIe9$H{H0%$yX=h{j5+6%6r}jlZRSRfM$oSUfGF|R3b*!jQSlH(9 z3$;64-}sKB@F`7-E}9w?>qOL zbL*e$qYrNOX05r#9Py5KypwPx1t~0aQgjG{uwF~QRDmG4Sn%%!Dl+)aZv9#p_=4yp z{`xH{_~VUg90p#a*-O83f*>4&hktN|A_W!@L=C-uDfZSqZD-y~X z-ouG2pYl>JB@B)8!$+EMI(CK?6Xf@BrpN~Rlh^&G+>sn}D4$7uGdB)jpQY4J>5@6$ zo0c=Kra8gHQE6_UL^>m((geHHp;HIL#F2yH!HZb9YzVw(EXv>yK0^Zg|9oq+x~hsD zHE`)fQCpk5s;X*qbd+^$Vy7}g=iYju#Ur;^#`9xx^0V6vOk@Oc2?=FYRj#hu$J}kS zI&doQ-ePkC4=WQEf zo(o%L&3mM8FOAI;5)usV?`~O%F(LohSeW4P4-RQX#s2Sc^imD3h*^+0GBp<$f#3D) zBKa7P-9n^Jv#Y`3N^jxUuj2Oh2ma-q#AI>=aL#&)D#0F^fnvxpDHUOZJJXd2soXY!o%$VTyR#^&gf{VfO<)DfC!6Ene*AF0us+?I z2)es|r5yy1>QH2s;eiZQ_}%+9JA)@XIdSgk?G=L|28VI_QI<+k1u#aD9s>u* zo97#M*Zcc*3(c;(;wmbioLfsuN`x14O52_%$b7YW_l{(2d>ogGie*h!RyK&5_wJa7 z7!!sF_wL<0gsWU_lQLsW3=@2$~Fu%(dzFFiUm(}FSXS=hV8EqKiW=czdK5kLIk;o+gm!mnR%n-@RI`}nk;98*cRHeGv=Vj>R{ zuWoK`D(qiQHU$R<^Zv?a>?2;RwNj4}!C^mP@Q)SV-VWm9;z|IEQB@__(DrUX5A2Ac z5HL-$n<~=*YsR4zWXNj;UYW*5C)8te^(Le=<9Wh|a^2S7o0)TS+QW?(9o0zT;{}R1 z7b$g+{?Vrgi}gJe0&d**b){NmP*QQ_Vsco)$cVXM#i{6QAKO9?Cp_ZeO75W zt1P%&k<+cs4GRo$otZ%AsqL9D+gpKARS;o{va;J}PV9n(c-3)4dOInw6Q&~>Oi_aH{+~Y+ztC+V?vJLBFfz)~ zDX5B4EAFi{>W|IHhz_Ol)sVr##_qejz5W;<|HocI4=Y!9*f%3a@Ca2QQ#i_~Kl&l` zRD>Z{9|{u_6R4+MYCf6EiV!TGd&%o@nHXGfLz-GK(hnae8iWR!~-lh@Vgdw2aJ_lT;z&z~As{lzSz;YQ|X!4ko6dWt+U>J<64O zw;8ZSA!9I`>0E3u#kR)BF>gjf^eV?gF;&kjn8kB_690jeUuJ}-*HAJ zHN@L^-)&74!#N|zgCw*w-#k@T6nO4X778Aye7Vh3S{gMlC`kG3+dn5x2{I-;nCEJB z{yryvl|x5>>91OPB1@nBP(5u**xm0XvugSoeZp9cYnVR2|;dd zB+)FLWM=r0!i51cAg}vDR=0LUKb7CyvL+1;%~IN|%S*S``hdw|9yT_%&q7|@j!sTL zM@RklLAt{BcyP^mBe#cz{wjYNG94Cm|Cq9%dAYJ~Ie8vyv1W(_)PoHHx4l0gpDuy$ zT=cuY9rF(R^5yYxBQH@;Vq$u_P!{`@0M4Ju>^y|Vd>dlBDT79#JiFgD-)#%4qim+_ zz&6c~xPok>l93}7!%s13bv-j9tOXJn4jS4r@F0+GX0TR;z|!M+I5@`l7J!d+lSj7w zDz48gh_WF*DeeB;tWxw=kb|RomYvw><~@<_-$Q=mzseAekEs_cM&Qx`Jtt?pnTZLX z4>d6+at5fC@|LN<(K-7>I5^@k0`<>3s)QOqnnZak=?5?E>De@X3A=60-@TZ;-e(pP z5J&|Vfr(mYx}-N(dK{ZJkJtpPOosAotjtB$)NoRgk`6rN*c2|S2DRb@I(FBD)Ko^K zkkL`4P5xncX}zspj}Z|?vRT0xt&P4p&FJdz#56Z#B{LBL_7>uen!hlb1?FeZ(4wNE zF3x7{q?(?88yb40rbdWODQM1PmRAh294&YsR6}gqZ=mcrEO#M!-(7BWE9;CQ)MHcg zr(@^kK>e;={c5G9FLk2;7 zcz`@=7RhIG*OX9NRJNUB1f0hCx3iRF$ZT|1lM7;qHnWt8$Oe56cnN*QWY@HteIO`k@d_s)n{cAzn3kycXMq14L0=rz_jO* zZ~$h1^H+kv3wCOJQ2@6w!)CEpPwAc~SoTcOkYb9#a$74~6BzeDI_*`&(a(N!WKw05;)-7Z+)2VF6-_Rt ztDB*dB4FJrgStufKcPKIEXcPnB_+Rn`2r7cY*&rNIIGQXPEZ{YpgNn`kZ<3lLysvv z{@}iNSA{5>_(Duf433jD>TNXE2Dyia$7c~g;m5SJmB!y6HIKkiv#{Xc2wzZx?>L;# zyBX~-w7O=FWQtIEo$vL2q?O9`9v%SCO^yU(XGl&?E-WhQ#9(RhFUasjzr4KEs4_-V zE7L}Ucztgg;_I`s;d*;}FFJ92{o8MM?KJAFNgmVKdA9%pK0~jfQ~%4Zx^DQDi)Zp!Q?0 zfzBt?^h=x=8aB41m{@=_IR!=3zpBwg(FR8A3`D|!pimqtWJvI2R00(f^QFB#>(}C9 zTmphMB>MFlVd0;FL+@Kc9t zNqRCSAejUE<`fm71q1|?-HO53(Ul8%|)(7wy>QOib$>i+npbm+=sHw9P^3TtwOXG9u+jId)G{R=KP7Fj0 z+~%ZX8q#y|5Og~`l;cB^O-syY=+t|;*_5nv#G9HYveMFF5}MA=oM3I0w)6adHa2pA zBPc5?GcYqt+S=O2tg-d6T8!n|ij5kIpMETF!+QasL8yFBbH2iXsU+I16sNLd@+aE*K|ABy!d>9zC?{KyM=kzqH=*`(= z+|rW1;9(EmbhQ}{K$;GJzTsNW)?tl^+zWz-%=L|47@#p`I2fp2^4qn}Hxcnhf5C2w zXvJAG$L()ktJ!SSs2x2pAoKV1N_cI;vQ}jwN!Rl=DIK~qx82DGX-^}7_B8zbqz~vF zgfO_|j-=8Ie-()l_K-S`&T=>e0p|Od{^1Rz( zZz7Xcg;76t!DTaJC`^4N>a?P*3c=hC0jl_w09#++=&B{ve5fE3mf3Hg?~Vb>oOrhW zy7@KLqH~-SA1^J>eA|fZz|6pa3c@vBQj-r4L|aeK%g_@f48oev%mG{>oHiJt;yZ4oh$Zl&5aqkFz3#&dW_lm zazQaE*!1~cJhlCh84eEYf20f4FHTV-l3UDSJ?Dbsku0~KcGgn|Dc#xuU@%YNap;s7*|pBaN&QrY3_P-a4_P|VmlQ9 z0l{sPq>PMAJx*s|pQOf%MBDw|)cc>V1t39!-R#eO9amwu*!Hn(xS~q4w6M6CY}1k# z6${H{(6ezsdXB2RD9~lZb@Q`O6`Z^gJtU!olPnkFu-ezyGS_K83xS+!G!<{mo(#jq zr4gF5xh1l!ZR&4XTU#47=NvNw7!87vkzq7yFRXG&pFcDOZ{CcrZs;W8()7&Q$0=h{ zmX<=0tK0MGE)>ZIweAQ99?E?URaK-YNrYO>DBzO0d3mpK&ePJ!d;0rjB_zBCW>1-N zT3X0)L~dB=6f@?wJ$zKPv}U)-!g&0SY0u_P&v(m!*_K;GY-_u&j`rb(Ub7jf2nO#^ z=(D%eH0KMn1vI!ohxquS(&~Fx*RE6GIqerRYb&6e1FMw)cGRxKd3^(kYkRkTh@Icd z4UNXw{~~!rAP5DI+WO)&76r%c)k8JD3fnI4xGET)cs|`N0yP`;p-!3`1N0pnE4;+S z)2nd29v#3_y%8jIaPUpdGWef&bnS;b?{6O2Uw-+_DvaJ`L^khe+h_mffi`Phdi8!O z)6IHBkPhm?8}W}KNmgy1M?(Ts+9sSx0i1}tC_ka^4Ms)SH{6ns*MNsYrI}xHnd*J@ zs8kJtw3?8u)s9f727La(2=ooTkC{gnp6t1XTU2+b3G-JaIC9{Ne)IqFpXKxb^mmWh z*&jWW5r>o8uALObbp@fCE<2-4;HMG3&O3fy$4MkDmp+?BmCeg@uJ3`SPg`(4g7M ztyIs~R~Y#GKQCJq!rWHN*Wgtb!%z$?QAIpRb%dy`ad%82%h^Huci1?7V zoAn>kdP&I(xeP&)#>U2nxCB+4R!0N?OpqwL!NPLqu>m3WkcMlmrexNPO-;Y#=OeSU z9KdIIZao8K&FK55r>>_0j(=vU#DLupLeo#Cu=@)_S|M(n=uG71wFa}B1sn~wA zA$mg|<$D=}!OB^Wb?G7w@BOjb_$X^&DJ`d|>9=j?%b-qw(9&{panZ1{R*b9xqG%~W zT^EkJ%rh-Bb3gVB?84HIWR5Y=JlzX|n$5iSuF3fsoqS3#__4Qd-|7SSGFJ8;6bA`$ zaro!YpC15BGovE{5C;+x(wGA%#{kGE1B@IWk8eNx@E=8y+qb`&liYc9Pnq1sHqgg* zP!t1SHT!zF+KIyTL=mUV+8YtWhZRrP71xq{Lr7#>S5g3%G5t}IIX}xq4Yrn(o11?} zx&hSq2k6n#qW!ze9dI`8C(4!co&YgmyR^9E6q^ht(tu@v8kLl|uLlqpQjBsS4_C+4 zfNPe#_G~nhCbGf87u>Rh>o3rXq&R%Y#?|gsWTcr`3AK zBL$uZv+mt@JZGNA9JOr>uL>Xg;NouQuf5)hTmt-FlCC;sNd7|v9gG`Jr=KHHm* z0Cn<(PE*au`D;P4c5STf`#WWRQ>#0yojL)j3dxM11&|t?&k28W`2b+6QK1j_z*ha> z7@?-4Ll$t|sTf=>N!uH1%D+D;5b166xmE&U2SN%&I5jP;1gUZY2ru07;{)}% z2kca(&$Vk&$$tWeq=CQuQ${r`i5t5OGl<&@yP|{g5G1RMSZAIrJ0R>a6XuCSiq8S7 zq0Pn^jSkLus8Hki^4(VDt#na!js$MEKFwCG+Ij8g{iSMb?%vAJ>0b7Vep^6V-xvAY z6tBmzEF#hrJPPUEm@4Cc2}04M+XXxnjcwrls=nDlEF;a$2}u%S%vK2D&cz@0-zEMF zze2tohlnL@md!Qcyet^cuBpKXe1T^xWl>R4>|i=e_S_rYD9NC&U!OxApeX{03=V)v z=%FtF6eR?K4};L+1XW1zghvYWNi8e*7unv2{%oi*_vqHzJz0aLcDCJxDIW%K}ePdTzbOU$I$@qGMa#`=Ru{Bw*pmnOD#I zt5x-9c+0fzb|NXW+y)l^4dD5%m6nv$XnlPQ@}FnxLK%6ZwnC>-{6iBXFCR5{EfVIn z%EYu%S7VymVF{kVYX5hi%C!Xs#BXzFxZ96xz1%cQ31#xe znB|M!x{`@D4W@qnY|i6RQdRW`%B6cD>dpAmoD<|`H%(!ho@0mM&f_zP%g~9ti(cG) z=iVPJ$P3ePxbi+1J5!JFTt@vH;?4CJhG6XRfk(98|0@UKH8@+Vm{U;!d!Rj{jSLNA z)6yaV%6X`0e0+S&g+eqv^9L~ta4eV7f(KNzWfhF-m`!u_ZDrjeZ3F@XK1lkj9k>ViY%`iU1p6=O z4b7~Jx_uco@)#9fTw>miuv(7J34kUU#dEXED@@Gt``7%~S2IornZiz^Q#S{*+ZV^1 znwz{{4uX5>{G93Qd%X1E0i^-=4iMYUSfpf*&iEU@BTG(FL}X;cy}3q}6u^Z~mTKiG zIGc5Tr+fM|Z}#k{udh#cBOvx?H^5nI-Vs0fae~QgJFdD2vsx*j4oY|8{AV2cq<=!$J&x)u3^?wH9Pi(b*t(e;&C3>dT1Jbo;Wj;>j%wpIRDRu@t^V0ivK zaK>8l6Ir2a%MYa40PWwQdUqI7EIW(ukinLWHWd zhjbK1U&^LI5cAY{!>QZ}u$eh0Y2k$BTQ#-EOic3V=wN%oVqzZp7SA8WcR252hdih^y5@>8z&N)G5EwMb)=P4?DiQ2Zbx{-9_5hdS-g8C(eMb05skLCgT zg%pMCcyRRl3KJd;{e!GBqlTG~opY9z6y+Np5vLaT_C}sZZ4!doQxuD$bNv499~R)l z3XacEa754lV%fHeY!^@kQe6bOFEIwiwjx*`*#tF7%BB)%efWAfgyqn%_Xp-@-7Z4v zI5WDRuon&2W0P6|xRWe*mpoqMdIYVc_npW%4rToW&)kENE>?IV9)~`4Z(w|&MasLP zqE?K>WEmlUCU2Oy%`eacLty@av=ghjtp5IwiL#3r> z{fTM^YS%^@{71F5zbTIEqVMaSpLLIU44=AUxXVVItOs=HqrOY+G!F8Tz1UH~yL0~P z^!@|=hNS-FMW>?Z3GxGx#B3&{jCgldoNlFnh0y#{G!UY3!qfOWEtcT1=gHxyp#5<` zmqQ4%$bly4`FQFBx@^D~ae==XVR>CVk4-|QCrXE4?CbvnN7$$@OXmQt57B?V8Q)q) zeBb0`$?^@O@BIjAOu+71*5OS*SHswo1*x|cRNO3SsL3&A^ZoW&${NA3VyiB- z6?ap9@Iw@`=<0DB*lziSv z$aAG?(Oy~kV+~sz2cR4Q6;P)olMY*%aks)KVY#Pl_+9zD)Du29>K=Q2-^1z2!figK zoe$GlHY(%&4%mOi#*sd<|meWZsr9q3A{{Nu`c%JZYYT=hh9{FvY4lgaQA}ogw(UJ-x%b4MBcuogO4dja6 z=;wP0<<8G!NgD708hO}Bbcg5}1A{1Ne=~=l1~-UoAL4Z2tjYFU-;Q+drEvvvkrErp z3izHYV`1|2Hd4ApV|0vcM1?LH(DqAy8Vos9Q&nBA?5FI`mI#Hyd}smJ+z-Znm7wIt zCnh#+sRYO3c8z%3w#!{@^;17t6mTRUrIu|bm=!4!lh0|eoeUNg5p+^~a^r1oD&g0# zQ&4NSC>$IbYP2?x0QrNdjv?TEczMEzCnUM=ZJD7 zWUN&QNBYdE(!^wB5fY);VnuIPMzbZPrD5t_zqU_#o&PFjhF-en{dc16m#tatdAxeg z&#bRs(INZ08xO$JOLvxvOi>%xESttPybQMq%Kt`;swgP%SBP6ue1?Gb>~1^7`Nr*)$Od3pJ7-@gX{x=vCO8KM>tAOr9WbZy&i$<1#2eW0Y|qJ04M z<#yPLJvnQ_veK{@y+orD{&-`gACyiMXDo3^Jlo2SCN}{kTHj0Y&!k-=et<9X(5t9% z;i){nAkYvnZ585FeIJ@15^bH=;y<_`3Y^kmsRQn>Gk-Hp6JVO9<>YqG>#ww~zgMwnkM~z8s!25) zOKJfbW4h6)AYS*~J2X-4Ka~$gfLYCDv=XnBNRj^VkbxbR%aMDuh0zqd-E_QGY%n!lnQLTJP)4{<3AfYN*4$>%v zYr3mXz>TR{Sz%5a!xW=jD*rJn0z=E1&tF1wybv$c3k-ZNJvKj=YDcWk9RJ**5lJdL zKTlcbzdJm2f&At4-Ca8o89X?)wo;_3J_N0}L_C9EslNaM0T30U#S`KnPO7R@G+!t) zCmG${T!KA7Ff=l{*0zASJ`%a_3?_VuTFV45YohD41N^I*M9k@l%$R1&6( zUKx$^9q4y&8@%gCkp5ts2w+k_W9pxjlx&JNyfOhC6deOVzadRV;72;!NvB9f170Kc}9%r{f+=Jsv9PCM!f|aP0!PdAybX}i{{}aHo%^}(VAm-uK{&__cXO|G*vm9 zXVuz!hx)+ms2qDiqh^lEN?6tyh@R}80C39nZx2?0Mx+ppBa-6&IMIdV74k~yOB2}e1MB%>zH=)(Z^;P>x6 zd^mEO9Xz0-uU`W}jDuaZw6v`6HbkSL4QfwEx!Ytr?3g0ziW>=#Pvb0cVb%J5SgY&Z zT{xPo{LT$es79Jb`&l<eCJx)k z@ET44rRF4V?PjW??a=HXuGbLW2bo?)>7zg4qd3Qw3(Q1+kFWod=`mQ zgM`Tk9+ypF*`xSA8X43KFj8Y+V(MLu5s3z~6Yu-$72+enjv0csm4Shw<9Dr61?TUt2v0cYIM}}_ViGSq z%Dq9MmQ#T1#%p-tj}LmThxv%Se66gaoLtXJZv>Eyyo3vmHR!VA4$9<3_AP-Gwwm5& zv!hz~YNAFYHr+IZtaG=Vm%`m&)KP|{?#{2L;_t**EW;U!glI;`LR-1wIB*Zbd(sKc%j3jy%!|P)p13US}m)A{FWAPQGrK~#IHj1N)Sb^ z>ZaR&`VR8k3z&&ZYoVzQ82%!|WNtQXPFU4TisJRI!`9o{o%;NUIF_6s%*!Z{)h3Nx z#&0BfLg8xI1kQNktLk*C7mw@y0uz_f=Ba0VeEbq%2QGS9e3!q<;;y(h^ulWwA@_X^ zwP8l8*9~;oCl}s) zn|Rmr>Tt}X?xSbVUwpvo`;7bBu6y=A!}C^!4$vwOLJi>lKh1+xigu>ZTY}vm-_1y+ zcOpP1>K&)*{-0ucK1#8R=&J~7?Cctfh9|=l^4Hx@vCTpE2NVY}iwHl?uBWLDA8TyM zRQ!3Re^<7>xReIcKL>VZX?GDLUFgTR)%}bIqi+dJqt!*TJVR__o8KvpvXkz%D#5VL=d!^on zzQLo(Ek(?yZ!8H%hb29_1P}|{Ou|K?c;4(z81uXr*=PozfQf{CppnxbpO8?-E84A> z55*|*N-S(@ITYPLANi2lLfE^CJK~mxOLghD9kk1L@YN|=!UD@hbjEDSBr{y>70mzJ zcSF|jba+Azq{7zcXLe~!^&(UKL_1|jD`ZkNR_!G^JX!}ac9`z>EpYNoXX%GVYi%0b zuijs;iWj~~gX|5U${}&};tAJgEcU)!NIZ;Z8QhGjm67t|QbOa{4ua_7;MnRTI`t17 z+3emRWZfdq5h{IcgR^xdX*8~dpU^z8WI8zX8m6EBo}hou45w9t|DvbXOZy>3cAIS< zSh&P>e|qc|iP0hPMpZf*GdM={KKeC%5>o8gm6>WTYaUXNSUc;HTdw7W zY&mDoix97`mjY(REB?3=h+DYwZoVJ93jvcqBB>b4-jhv<6arxk}MDl zf@@$@2@lQx%BsUu;43p`2kZrA9%G1IT2h8sHF!S%K{`5=mV%KQ5I^KCddSMiWB~-P ztPEE@V@;&-`0-==CI1VXs^+DaVOW4I_AN2e;Yp?n-jATzpl2aF1YH6h@}N;Bxo*OT;TIFN*MU`*+agPW6cP@9?q-%iZX;|B0Qs3%8i; zK*fTxva&ihYZ>fg+m_P0ucmxf!k$6a-z_UDmm6&B^rsF7%{SdO?r)3vFD_ZeVMrZSHi*H@NEHuFB67n0H@JS=K5&Pr#n~ z%j|!`Rn7tQPf85<3@j`U^3&Zc>9iI4hyW+jLw#%BX@p`3^5fHCM8Zj(bd6>yOF8IO z#k-5t1kRx^84SSJrb4PmAwH7<*nfckQu4eU* z$(zi;zWF$KhYeoOacYS{6S!cu5Fma_a{0oDk4_wQwtf5gpINlnTC3_*6VPG3RaNCX ziGy`5dQh;=!Ehd}`wul>s68;R3Io;L4lnj1u=;!|J3%&eG-7OPFzrgVGBqzt{yAp? z*EJYb+Auf$!L5x=RKFv>5>CZw%Eky`Y4&hwUv?CgWHrbGpYt0kusomV2?Lni^KJk; z@81UN0)2XRVc|FCA#+ZC8{I9%g|Zq=CZOed?(*dv-~K~Hgu_Y?T5}DUwwLI&Qv!96 z{qmPSTPeCY8ct5<++wy|&!+ym!HGf^{wI_d1F)1!#*5&PmDXxc?^!lD7H+v{sl5*g zls6EJ5c!F#bhCFAf%M8DK5Y>G8_?tZ`T7M4lb6*=%>tS> z5E{8z`rGF~blAGStMG5Glmf^Tfb{I==B$*A3_9o$(h|p~r=a-wc|nL5sLhkvHNf{3i>X3`&rsD!n~<~R_WbYe`G$)fwO#8e;mYbN zo7-3TOI}w_U=IR_$E$Xnr9TK>B z<|R=U-!sbK-~c8q1n5F(7P9fdfE-SL_f~e&s=gARgD;(xlO~!QL*KT#&NBj2OMN4F zeGt+2=|u@+exL9aXuo_blmhsd`NGQ&rOb`PO@3Mtx;s7s*h2U z21CrK)DI_1z+i-NOZ;1yg^&)sxAJ33#NrN0PkczpcPMy_)FCSjbWX(uKPz;^tqXC2 zBR%H~0}d7aVxwoa`a9tNP;|SVs$AiiMN9Zc-hGD|u1;DR;IflnpxAYK)=COC>?%|= zomG%IkIv z!e_Qihnl@EoGv9{&Gss=Jz}YDSF}a~E`yKCp;HrGSBoPE6A3CQH$O9%*1v>85EmAc z`*|9#^%S7iLaWWEnQ4Xr6P|-Ea!*j=RfiytWf^L7hnaN5UCNI=M>aztIj~6A|&mP#ZCrzTRIxGVv&0hi&B-frHy1Ya~HoTgu~=JeMGCYw#vNd>ZfnB%a>$=9JSrvYl@tXulk? zL-xj6dFMWaq@i~wWS_HI+BHS=VigV5t$_cJjd|S?m;6S>eE&va=Yu7L9JkMp|5u%x zBXWE_kk|rov^Y?-{V+ZDFBcO;VY$Yiq)6otl2Sqq))U(2bcZ^M^OE$fYvCcnUc}|P z3jgJ=&uJ(hw8v`2wE~HQYlS5x!Mf!@*!|~^afx=LqtX<0Mu1jn1>qgRWUGLGVyfXv zt~=_;nt!jni>E?uHd5&GGB~L3g8fFH4r}&lF3VgUnk*n$ir)UtdzJw7ZJwbMe%EnR z1RJ>LJ$BsPo=Z3Yf~%0Gdf`mtWWpaG*N+VHF_UU4X&Uu}{zkT#-rKS;K#dy*PJPt{ zmi3>K=zm?K+eKY}zL}!ITUkyYe)`<^$uRT&-$C8;%lcBec0`eeERIX zp${BWkq?(SCZ#3iPo!_0pjehMej4>YGiJ7K>(PoUQ1Jb?GOM|zsH>hP6|#U;jiiRn zaD#|=fsRf4#a$M|{U^=A`2wOXx0^4F4R;-DWaX_EOh&jda$G&CVWy8_fQXn?NIUw^ zH_SY?v82rWa8b!{0vUV~2&~9@&~D;!FueKe20ar<5c~X2_eSg&B_+kJWMCrgwJFI zVMPEx(VpB54b4&s`=2l7fLbnq46C>4dfXl46m2Dm(Klccdx?3^c;r%z#7AGYB7ld4eKRZVppWZS1Gb$o>RnZB>{*KceiD*?GuHH{MT`p~G(T$o?I z_dI>itP(`BhVAQt;9*uqtF{)iF;0iIodM7T@uA*R^}MK5MC1~A@`%tJ#ESYmlu4ju z{oNdyf}V+*8S01HTVl-cOcchx!r&iiXrmS_p%+np^ZSbMPew~f=1A)3zGF$+xmB~e ztU$kB+i1>#wVBkVMk7mxf{MXbkz40EVPJioa|R3_{%kZy|Cxn;+lhK2%uVc`RH_mf z9HiiTM2O0g>?3hm$Cv3P;dHw-a+HA+z?e|3;4JmKO$RynN_t%$IMeFuC*8_ zl3CWPxE?H!W@c^EU*|WuR2KKNhAI{V+7hu;nA8pqs-`m*1 zKwKj1-f)C4S1f>3J9xhgAtIXWDwPh5kdb1Q|C*)}WKM+*sR!H%OGm#bG-9$l2Nc!_yxYecjR2Q+v?jI@$FHi+sY0r}*1Z{b3PkZaREX z&1Gq>HwSeBG$mIw(>88R7kj8Q-S6SQF`Rfr;WAO&5*+8iFVC9fpN-8**srIc<#9=J z)@puqT0!?s|Jt*T^Hq7Ovgg}08@#WVCl%eK3u4T>?=as3e5C>u%kqFJUA%iQ9h6_X z2wRqLR{iGXe|b@heIdEjS8=zfhxWj1*}!bi;^23MjUt!LzOO1@+7gtd&K{`RaQGh8 zJJx9R`s0!Ojqsal`B2$p*NbOp8+u@;7<74!5Fpl(>pn+H=&nVkvE1btt)5UNvp6HQ%bbZF^xg6f@0K7BUt+ilTt{( ztxbRRaYvt&@&R>E1n_`sASj)Ez?YWxbQNvm!=3sM#NM$Ho!t)q9n@A@DAccd1bM?Qwz@4({>ept$U}6*#aCBRWuq(|C91h0q(4K zTT~s2ghAH>)HfT0`df!W>-DLEynKZlRoiy{_s3LAR1~;Uif}8I4YK1XdJf`Y>R`-} z4V1c_ii8m;HiQ5HBVPX;X&Ta-nhQZsCxKXLltMHTR=+(P=F1X7@!er{pD8%=a+u@8 zu%1`6z$c)2DGt|zaF{w&LD*uLGr?~sJ|RRZ-QH*^q{u{P38w>xu|#pIXyGC z%xrJI9C{T7V$0Cuw(sNu@Y)xXcZR~k2YRu{gWcN}EZE{<6rPXWseKC}(Da}j%v{V| zm}xw2Lv5w7(M!n_R8M));8;evpH>_yr+;v~i%E<@7e1suXNe?o2Y0dD4v%$uMx=}! zFn>%vEhG!WcS}{@=aNSyA^z{+Z6QG29D?v|9)2&tzfQsJ@gA1ew+Q+Ci$IEogTQH} ze-lU^8hdus7rnNyF-`Q|&taH{9{mwX`Y9y*1>Z??Ue37{6EUv9s z-$$z1*inrB0H@Ug512^(s*H0)n&3!Va8e0S)ln1zt`PLYlmm<&DuDRYcVl<6`X8m5 zJp?sjDRh4aBgyL8p)d@xpjfuEcFKhX>}@?=Ns|D1dGhdCx?p!!upj@aeoK3ppp9-iwEjKCy_Xp-MWKSj^ z1B2G_lZ>O3p>eD<968t)T48;k^w3`ZM^N$pUH>E zZeI`Fv#=xNJagux+#0VqO^!7MIrF5fL=@^ z6^)zOP-E{SY$Y9FP;jO&be;YmMEi52MJDeN*oS^c>rMoiz|Z2ZU-h|DL-XEkgBt!I zDGHe?OQsCjr;mJJ-eG_B70pYgT+FU2D=&T3B#2aRu99%5?XKZ_kKQc8Ax%S$5tj}j zKD_JxqXV1;Kqf>-W=;>)z%)12jbkc z#RdAKv|+pxfAo3ue!|L_odo;>1~7c5&-Oa;UhwtXyT}^>+P{Yh?|A&bCcOBf*0+>E z)Hl^K@00)bd8OsV^UZ2-R`UrR9ly`D^y}ARGBO{=o4C2T0cg#-Fr?LGXoDA_3b|d3 zaw2{;F)`)I(KZ=jCy57da?X>ywYh4{7VFVK6u>cX* z&GHY2i-npB+mY{l4oFmPFG|}g&uHh)$EgJ7KZolH;%x8xp|$#<-mw}94G+-k>%OI~ zB9i-iMuui*0cTtV*J{JNx_i$G*Cb`Pw0jQp-m-ic)QgI!-M?RMwD0sKy53yuFLoIs z#S?Oz`JH)1CZ}VbU6hkkUID>u;QhAIA@wSnwqg^G(OQ))ParA!!W|A?me&dL!>$mE zf_8B*Zs$2aj!%S-rdL0?5K#OqE;NLT`cX05cta4C8h2LCHp02C0Fs|oY6Gj`%O}$V zwJIbnWrR0x`vb!Vs8NpzY35=~LxGwJ0SQLb@OvxBsrHPv6_aOn)Vgxz{Jtd^-N&hZ zW5KWX?UkSJA1)21Qhl6zSc4<(q`K>C#+53sq`=G5mUCWk){!2aS|Ycw3hq?`me6{# zY2nM<@YDb{BWZ^ncB$6Y=8pO8h z!>>!Z?Lp?Z2dEe7ND#!WO8$!Og(k8vGyJ4avwx~#98t5|v*{G7@vjZRu=Xs)D}(m0$w7Iht|YeJ&$S1nTjyTyzS0vvV}IULp4wEB0KxJIHbY~pe{caR(RCZ} zB~zyhdVQdxec+r#r%0kH3MRjPGSM74@L?s0bH--HeOk+eI%|4Oz4ogShZSy$q`l(6 zO8U!{T0MhYNt;cydhn@FGWHdsK!?%zdiKJ=wWnL+RRaRoUziqEt#tLRST2_t|Ge?S z(^kGN|F2i;(vGq=qGo{~f7qz{=IO@q7;MqQBo0o_8=a&so^08K>S?m1u&tM#9A3sA z?w8in3{y`SS&pI0i@90eX1coh-4IWPii` zhn$9?$5&qEdTb^`;y$Jepi$_92`0l(BEvH0(b6UOnmwmq_$#3Xar$>9uSA}~-@+Ld zB92zqPG+tc;HcT1ZT8KOHk&;$7EXA;A|edNm3oC;TvvDD`knr8ftYaZ7Z5#D+Z?Hv zgVGL`8e2qn8V*PG$EY(}VzK&aq6U)OT1Y`XnGDZ5!UPHqM`0-}DV|M}RkCD7kO*!L zsSW+pdRQ&zEG{1XKMKiEJCLpCW1$_M1wD$=8b4aIs)%eNgxEER?xmvib+$F1&$ql;!O(i|zkVU#A0N=zJOUf`=Mv-z2QXBUTg!3FHx0<%CRx7n04C_Y z0%eAz^-5Nc$@1y}JY6UKa93~Y!v_|YGiSJxdu~}%+@_q>*tRak*jaB&cneSX)1R~k zqj$HJF(PQja0vK7#JS^;UoWNi-maw8C*g2Ol)8p09PUlC!at5{J6TA-rP1^wvT+p~ zvi}{g5ta0pP-_Rx?N`8I*y_V#hl%>&;WJvgx)e?iJFfrnG%FL7i>s;A$UFI-wRIO@ zKns0hC$$6IVgfM01Tu4aB{)v-QPeDNe9~n#a zcRgGe=1b56OmCp!PB^E=MNp~NQ6F%BFbG`qVo zhpnu*=@t*hD9U3ht7Vllo_rTp3~L+!<2JCQm`F|$%N;N;{(vTns&kWpR0E_)n$;V# zd$Z^E$>DyGXq+x%SObb;{|*OuCncU>4A7`i$VntzAH^JAa z!NKxF)cHzYHp@wE9t|z zbc;WxqD zC5}IqlE|MxfhjvUEOedv#{EJ}xc$4)Ds~8+PET6(fO+ z7v_=)s+`6{UHwGD{7S<#pO+@qxtMazhU#D$V zkjSrt!THgSmi~L|5<5FP3=@96lt%sNdn zHF3@I*FrDP&y|%~pfE&TThn|Td|5f3R4wc3;c+qFgb5M}#R#y#h@NSZVAZ9n_4c*^ zD%xQ-hj{xebZ(nsqd80@kx0fyIddB-?$hrRwO_PDwE1Hgq6m3z6YGa`T(>=_2U{}f zDm*+qz#KYiqWq3IB@+22^Z3g5jF8WtKa2Avbkw^&$ccGa7e6~|H`(mR>Ehx7-YCF& zntdeD>rlPyZ*k|&yP6uN83cb1z@zDu=JhXie?(_Gp6;-AppkRYvmRd_Usm?-jUW5P zNMoI=S)^~@L_v%NSBq|5?rRL~K!&W}C$PeNvCa3eAU{7^(r6DT!Y~~9CfqW87Lz*H z)fKhZ@H;yb9*>vKA_I^pl$r5)UY={9#`3|OB+BPsMrp8Qu=e8(51(KG=S!=}`oqG4 zBuI+&_4Nn^h49Eorkf@vaM9%AKcvNTzbGAHWimQDIv7eyN??ilrqa9!Rn;p$em5p< zZYmxe9HiWo;!4y{{c=AyNfqc^2*4;KBPk}O3Mwi%2gNu;Nk*_Gh(uP1|N0CAxR|+w zS{Y(aepts!uI}nVu)nRScwvE$EAq_=o^E`svyjOULRR<4DKSiaD8K?RGT6YC-y!Xj zlfoPv9P2=8Vs3S+84?DNa0ULKz11HxYhx~D-?Vpk=NNOXw~Lz9_wcARWJMy8%*I1G zpEmbfr~)Fv!NGfgNC**@dMhX(&<mnK_FJ|$&k1r#bCFadFH+dDhj!2hSr1zx?U!X~-Rc0iiS@rhzKF9Q{M=sPks zHPt;Z0Q1Heh{MpY{|qP4aJq2#vbl;dU&pJE+ViZ@;3ff7A6v2hFaz1&$u3!N^;&=anStgBhqe%_SU~`l1-r5+fT3U>SQ_sGiwbY z?`Hl>&-BsmDu#tKc@7#yG=Zmvl{g!|{&Vg)u(bA5>EiC8Rpx~liN7PR%E{GDVuQyA zEGEj9m_LHV6XSXL4bTIqZwt+{MT{b+2dAf#v$HRpZZE;af`I7`TNsEOKrs!NkrcSr z8Vhv1;D!B6D(o`b&Q@aT0a0~zzrQJu_eM=2E49CMLbNge2A*?vKATgbmG&xAB)Ih; z>9A#IcQ{wzUtoGT3@{mYJg?i;JML zrAGG)Nyjsi5Ph{Ch5eVM`e=nSo}YdgoChfyus%W6VvlipHpeWo$#)tJd+4v5WXT{r zbb%utonXamU6~B--O1wS{*q7S41kLh{#ObA-@*>^ d|JHoO78AQ)9l>NV2~N`sUr-MO#%z5D(4 z{`P+NfBQM$@R?!ezOPuFP zgO%~oVm^JL_aiAYGd4)JFl1UVAgb3X*75Be&n{cYNLXE55w$2*KU@--E94XE+>6s~ z>wIH_vEpf0rnNXuSY9FD&nbkktOi?BDwhgIA{yGcRU8UQs%EreI^5Y@mz{X;E`fT~ zCbUz_GcJru7Y^HCOs>L|LZf5kei00lL32e%Mg#ArY5xEAg6!kFcNo!f^ie_Y-@mU8 zy!R!=LNO(&U3I&-t9>*5#FQiji&jwZ)s@7vXV0W_HNO-U4Gb32i$~nFKp3lsw%kf) z?acMdX7lp$5Rj3PSy))QNKE}h-b@!Yzx|7diet9*J;nQUOz?P!A+f1Gt9?1(a#>bH z;o}|ohB_o56re0T)ORefWmWM(D7Gzwx}09 zA0Gi2MbfYpXHFQ8z(;z7-PSBd?sDT+Vjmx$=(sq-x%&;0PAL-;>J{Jp3cANo>7_6e zbG*idjsQ^sr={qkq9+cE4US5KdCz&n4CUpK$FqdPmX?fK9Y@l*Xig?bTxl?18v+wz zr&tiP)s6sx{dxlq-EzHOA$!b4Q{l>x7*M*v!o0CSiz>i+DTcIY`$IMJNN}Gqf>wr|ho$>5+ z9?M9Vj`RI_+#B+Wt(1{18GSZZ)@qA>3`S;V%-h$mUrRH7d#qZWnL$cc3J0HS)voO5 z_$=ym5U#}7W9Zzzb<{+kjON^V-%#xYpPsU^jK6=|@&mYSNlD3<7FbX>I$1bZ$5klb zz}5E@w%cXro3lM5Mtt-U^7u%;fhHx!jnkcpo&EW$9esWMES`ZaH(_TxyNxV&69a?q z?(XiwmT^_4ou3aE8z=TGg&Gdw)(xla}FkJQqCdo)8}$1?8t-Me>zRa>!K z9cwRMyy)3^RcSN~?r#$Z|K`!}-&W?P^#+co;@|8X90)reDnFK0RoT3JNdP`T#HK6M zw!QK?=H0t*Y_b}0JRR;k z^x&Tv@ZtS?%E}xbOB9kAxg|4hwEc3yWn9U-N$XW$&~!*Qme>)WOqx0@yPwkX!A7(=EfPd0X^@+a-;qZ1PcZ_f9R zyAW~BR@!_7ZuJfAo(Txxc&oljn3|cfIyFG5o~hs-;IBSq}9fEUTnYyJDM&L&@y>~2EoV2Ps+%M$yZFwYi|Bor1ev| zY!-fRxEWIiUYVP>t3K)N2)O089KdRwz)EG?ayvWsKbul?-5w>Q6!#^}$jImkggE>W zVP$27=mjR+@H+nD2Y;B^vZp_gg~}{}3nJ`xKvgpZ)SC7Lo;_PXosd+Nmyd8cCB=G` zP-D^mPP(z@ldyY{;rP#awTY=?KR!|{mZwicj5xiTvU#itgv9loTfmTUsw`QIo~GsR z8-v4ia&l7bb!b8-g+nO>X*O~q5SYu!0j4c6QlQD=&=YrsgN)`{78!}cJrIsD2^RnN z*4Dan&~2t>t{9lgC^%u0M7ak-3PF_1SKtOXmJ z#@5@<|H=kNh>_)<`EK{~{I*>)%KN0G;JxYM{4ZZ1C}B{^o2kUV7a71VUE%{uOG-$! z+wqSPa_S%Hp@mi3HFMh;0nb+JF38}Pxmsh1Y6`3X?rwn7lnIIka~*FkcQ$F~ce{LJSbEC?EUcCun)_oHOloX=zC! zv72-g_)&|_9l8~1s0>QKuO)GeoM#Xr2C`{qV0$lx3~)>nV`D@j^66E>M|?MRSqM3)dNUyUH5L)Q53upi(=AK4H@5~kX=#JtGseMsjpu(`Ye3M={5SVy zH{f)8>4zW#8(Sy{CT@Lp(ZYSJQw$>hSuH(8`mUUH7#KKdHy-};;# zP)y$nC5RZftx$bQ=^H{es(?ygU-{?<;ky)fd1bmG9W)uVvxmMztmshXFq(3NE;zHAQi z@ewvKG^9&WhC<%Fd9%XGN*6eigTVOoDf-Stj-ri?P3J9G0s^~w^2J8yMtv^)m>X=G z4dX!Lm=61SPAeN51`ZBv@Yn0>>wp780|VA@RSS!}msGQ8WC#BDet906-#>jKcUWq2 z?Yj5+yFjNsW2le%w&3W9CXj-*-OxO`-@=xkeD~Zp>Xw9vh)z_L0`f(h^|)B_et7dW zBO{D^V2O9|yXMTy41796V`Ee^E`Pyc1;IJ{2V=2TerJv%`Re=8$p{zbV*-de`@2g1 z6V5qOaSHj%A~A9sF73Ky5yMuY%7rq@${48}hLIeG4c__b;V}HVI{_&Wa>1fG&XL6{ zC^6!<2Bau6^;y&$Spu%-((_bCp~o$O=nt}lWj!GJ*$5q{r zoU&I{dF}1X|K`^x2r=X}b=7}(pr)C%9aL9y$5DySsMGT+yw0X~zG6@FXWfT6MEHWMh>CGvc^z zmSO^Bca|{7mk!%baUf>L8~xGI(cq)9#>P}n z)U#~8mM&NPCaiywAv}76njrKF^FjaQz_wNOEE~6$G z7)*};2|6){K{$hAf*C-g5cZU9aQDMtL1+6lwEmP~2!w-!L&)!iW9++^k^WjJgi4?+hC z2ICdGSg_Pi_*&iz+NEEF(TBHAokJh4XBa2@>4ycS$cGKn!vX zxN#(Z_+U95`rROTjI`D-px*n~LQY=ZeD8pZlkYj}uf@gNB@pr=m&3Jy1$|)F7}&4s zOo)q%3%pbV2g}Zm)gPT%nf3{0+Uq8*b7m@P(_lv0wS1?0$Xkub;dn}XcfP#uD{*{RjGUNt}Y(p%pJJ@lt zfsT_C2jKW~4_7ogV{b*&tK-hF&Qp=eUrX)M>`=od{5(|Z3vV(IIRMHU`=-YpsgNIH zQWd3D`u2(p17~O0ir1*kyJq#DcoZJk9pW8IA$N@J#s`dDE-z;^Y~;*Vn8Ug<_j4q@ zk{##6Kq^|T(i!V)ZPtH+Qr|~L=#rKG&)_C;$q=3-)%R?*%YTh%ZE0R$|EIBgSE@LU zfWWV^t>Z-q4|%-jJ|lii?>QtXDT(&2#0TX9k z%8dkJrMi@9eBWw>(kClT{`zHCJ8sJ_L!T_P)M+#Ex=yl-94g|VPYdE9&hzuoi+FEUQ&F7{4k!suz05gF;|y0<-2rflWjzO94N&WYfI`Rwl6vKuz2E;P2aQG&>+U8;_T&a&iy zB6-hql{+6CNz)=2M~W^G^mGAnq)Xv?B4ErJ5ef1+@a7F62?-ooSsBg~NgdJ0~g4u+8ozhSW3)g5BY zqmq9wh%%%69)9L16xbQH`3yT!(p0ch8}l^eRmdMM2SL>g=+wzGGlHAXuZkt5?=fOw zGLd{MZ?W%=0*?q3p-u-Y+&s`cqDKsYH(w7i1eFMB0sAC>*MfdWgW4@AgtXK))h^A0 z;;^@AR(pVmnM9oPpHCpS)e^LoBei);PA-YR=IiMWG<4{Q5!TckLNu`$ zzRNmvl0a$FGA42AVrXgQW;nX&VEm7d9_LN7%l9+Z3dDdp};#K z&%>2?co+hd!9tB?RJGH@XL0Y17-ooGsO@x-nqHkPo`AzbaL~hP&O*DNAlyBJXj@QF zV0QjzcA?3Y0Yr4dzI5v7U$vHZ;3oW=fJMM-&i-|b;1 z*T=)5WCr+)jsDm--;Fki+P3{vM!p_?brc*`irQTdg1$_+RO?;}N9aTR9QY{q>6Zw~ zifeRUk8E-$ZJ^TNT2?*?V%A-K|CKT478WAo;o*fzsB3F~Cxpnw9XnZI-Gw+F58 z_<7V!V&V-2VFdreXE!$rMnV$R)+VO?>Xqg9k57BbBjAx0fL^fbu775pW}c2%@%~-y zG}TLpkC%6H;{5*ods1>TbX*wxkITvL=jJU&GGY;?6{wO|*!MtkJU)gVc$4=$3_R0bl9$J*fo z_T56OfNY|aFm;~d19G~6pPt&FDs%(-VtwWMTg#?c91n zB|ZoTK`;u6Sa313sXTelThW$CfN1g>8gzwBjI!+i?V*#QIXk0DdLrE|3qoHe^4t|9 zo5j)~w_ZwDVO1SC_8z{w>(Vr3Lk|5c==BQFUXU&j=$yRV8$0l8McjU)fyhPsVYnJV zHvr{&tsfOoEfK)ES?{<#pl}mL-@_El?3TPmx?8jFdKPdQS&e7*$DtaP21;-#E@1j4 zUsumc)Aw?tUbjuOY{#aY=qFgTbRpNgqvE0>6u2oPH-k_QIH&)B#ieEfd3kw(iavo zIVo<}iAb3Z8xN$UvU0#OQ9LpNlLRa$F#wU$AnH^5?&eCHGSjuYe!y#TgSI{7Z5@t~ z1mDDV;fj2@oS32VGT7cuPhZRIxq%=nB~gqTA%O4-w?052!3|bEmU#VmkY@0!PT7sP zy6L`eWe_-@@I91#I8Fq}ITWB+b#EE71+5sQIZmKnSXHthn*DMx9E(}q9UV&=0Vh=(h$H6Ia_37v*YLP zU}MR%>>ALhLsASM&EKNIfu};ykjF2zSPI_9ZH0jwU&lR3&mN-Xx@?)aC5YKY2Jm5< ztq2TDLDpB8{DicOU;Yb@OwISxQuhT-hIC#&xf;%wvaxv-jS5HjdTd;k-C`mhpM%!_ z@8#rz-QUOBzBlj~(%Dxv7uFfnPmS!`^cCXLObUl+gl}UNTS&>Ek-1qQvJux2PU<;EtK`LT2=Y4_uSFkb1zb zkkHIeGYbryAUUWE+HuT77{ekyH6dl-*#Ct4P@UXfX=(;%Wo1eK+s7IgCd7y^Ox&{v z^t$yaZKezk1IR(i3%dDOgd%Y}agvN%J<&JGGE|@Z-r2D{-5$FSEc*5hzgRuH=SQK+ zGBs^NBo^-5Zm~LfZj(l?7cN+mfV(iLn6uVrCzj2hk^*!}skX>n+^`jbC6?CHdl>!( z*VU}=2zx-W1d^}Bd4x*Ww^R^J^VZNb0eEWe_#AFfaiMUGMu+%ijr0K^ zpE!kEbxHh~zIER&-ls2oeOroS<H9WwlF#Dt`?PAB^$&^XDua9QFG1zkdBWI@bYi70?y{gFqm@ z?bKUaTT0KLOUuX{jp8UobDdF8>Z^2Zb5LKrY(-(~#7+tiN6MVn0e6|2nvx0D+gAb{ z!jB(693)zuS0xGx3x%;VzkTD)EY*Zc6>6D}eR>8eH&`u2ndau^y_IiH%qBf>jcx9k z(pPRUD}&B(yOx%}zI$GPW_ricLt}uGoO4ErJ8;-2xXpgNPlj%9XQ!#7gIuVE9R47^!*C4UmKV4{(4agoknhk!5>po^8i!??ii;L_+om%g#m8rK!k~|PvvUL;%H)o9dq3x`m=XrPcN?P zc&=kxh@}A{C&ifebxrp?wAEq=s%2yp7lw)}!V6Ljn`N`o-(!r82B|wq@ zJL@$#WT^sDT&9bveOD*Rk2chjJL)=_H zPw!1432760@^{Rl3JVLts>2zOk56Bs+_8g+9yLB9_&=t+mX<)ttA9j)f^qDa8>*e% ztyOh=d~I#S=ds}U>Ul4>ARYcC^AIstfg-|wz=;X9w2$Ak`!sq;y+&b+hyWvy-GByUqCs7;SDO3Kx} zghY1mF4;mZdPorY-^Lts65!rtO0_XKssLvP(ff3C0BUx|#q{CcXq`@>K9>S(kz0Z$ zsRAT_ACM(DXO9Mi2L)d(x_}YC_88+{smrkwe$IpMPGFTzKoe8);$ix6(UA!+w!epA z25<$m#If1rQSV?IhQPRAnIin*GSKbo3){MzVXjgLl@1tD^PibLYv`B_L@S(H&u!wO zgI_8-o_sG1A7|7N`aV59x;F|}mNjJqEGY0IP_t5PNnlXeW$Mv6n}z*rrf=i5sW?M+ zW7NzT?oS7ZN(`JmH>`{?6W(B7{jtkjzDTCSs_|Xvzi_HYRC~K+85qk zn8?O)LrSMCM%9w&94(M~di@Qn*0CCqqk)OCStpge{ckNmRU1OKy3Mqlpr9ZLfI9rt z#B_n*f9N0en`9%Gc8_SDQEL6Zy0#{Ddi(1i0f8{WDgJBt5?J<`OR#KBHn$5Dc|9hq zd=STzxkEkN4ff&kAby)h(-KK|T{4(NFD{>e)!lxa>Ko74KNMVLO0E?QiZ!&%%!7Ny zn81^P+@%~~Y?fl?a4fu_eXcIyKkPz;h}ci@SuN6u82azglhY+k-f2o!&ASXFAu#gr z;7Q*8oJjig=^eb-wX)p~hs=Pu1^?)^e+nF->q{HWNeM{YA7k7rLlB#Mr zV4Ah<$k4xmBm%?;P^Jev0jLzc&Oq_m7}$pFog3Aasqb52|CvWoOt*2@((5OmoZot` zE{vs8_+5@IFKre%xBtxSmZ{9G|K~d|P^$(*+xKN}6-iU5voW*rCiX9zN01#FS3quRn2WFZEhIBU5YDr$i9 zQpNpp%#=~41TBy(Dxn!Mk?Ut!B}zJr0pBxM-(_D|veM^y!@BTZM}p%+hP$?rz{bC- z%5|0Ye;>L_G$XHvUkr5^ht$09`kPC|zC=w6_8Anexyj?fR@*u{7QXO7Kc;(_E9+{m zf!Kk5Ws_7o9DdT^h%Q^sDke83|g@M64oidci6`iu#U+l5} ztHh;2%+-GOe$3bIvtlTjGhcUG@!)7^?M~?O<(K*KppIh^_~L@d<5=|iHN;x&qs7mH zsipvjrupx(V!e(~3*L;RtgOE-hHh@WD?U3^#RKfRVrlzRQTaGDRFJYJ#I<3C80t=%LMpsx~b)~GF7M$#j|+7XG1VoGdIjfZAi zCw%Zo%~jyXZjg;vQj9*DtZ-v*(@7|Cu{#r9YWi2W`R(?zxDIr_a*K42n%34jx30*=3a-^s)9*RC38*PI{a^%~w1 zQ|=_Y8y|9SE;~~k_lX+ZUG%-qJknH_IdmI$wEus{GQk#!=y6O6wcSD_0yz>n!y;Sqd zGjj=z<0jpUI_I_>PK$YeQfu(bKS@9@nG-L{e7N8&H|!nS?9Z()Emr^z0k~o~Yi`Rw^!+*>oE~$Hyt3C(1kz^E*YSFSg^!c< z1fGX@HwWQ9<2CA79YpM$|~!Y`l^(<(AJa7Nh2OV z3F7_0<6ly$b~g-$yQrT(f6hFH!f@BlN#JB@;2+Y-sr{!u-?5^p{YjfV-Yo{bD06O( zu~FF%r~Zw8l6>_n0(Ebr0rG-1C0)ehYb%Z-8nkO70QpoB2f;=X*|)YfIL}EMk%!9lYc+x1W8NVZwn6D6fM_Eb zaZTfl_{rHpFaZMt{buiYsM&gTEJX!qDtc!tkJ4EvLU#C=>EHK+ovk%Mz^-*C z2Hjc0iO#1_pWbT*hV+ zOsOiuD($IH(32UenfQob> zH?S)NMUPEsOr+HEnZyX!a~HlQ$iGDz`zejG-2egG5TebByh8~h-hl`^9W>VTYuL35 zolDV@XYYb-^|ea3wTrfOgg2o(x1l^7P^9YWWQ#|NrakQz)qUlDaCsU6KPDjD41kljq(d!pM|^0D`uH31$m)K~QNt46W!zi?@IL1 zUi6m6GL={-mIjf6;$JJmMSkz-@ghOm(NCtuqjY{BisE9~IxC`uE4@`hCz!|>W>i)ozYW#iz)HF0T6r)v|N{J?oQK~wTb~$sK*I7ll zvQW>`LNWzWB&1HLU>tswf9Hm&x%M?ZJ_$c)1|kpIXF?&l^pU13f>5u7(;Q-RD{oS_ zV1OBb0>|rP1gvq5iYnD)ZKvZ;;3r$2g};1eR&iLUdCbA`8W5FMnshO7+U+_X_XNr8 z%0b0vb4L(?v5ero%|Z!IGO~n7N?IT#92y;ctEi7g#``c!mTvk;v-UkA#0v)s#{^@~lbn4lp=c zgeR&fMDle$tmaSex!jz+0h0qFBj8-3`x7#0bU|7MVkyK0ur|Mc|DG(Xr|qh01V!a8 z-b8Q3MD;J|jWTtAU1=YPt6gq$GBhI)97wgF%s*R}(l_Q)pJ_n*ARR+`q1VyK(MSsEDC^Z&&9t*C2p#hK2SzP5+NQ1*(CP19}`mdodNJ| z;a)4!i=9>OD6Mbo-kF9RY=| zjXv~D51^E?pDUNPvSJK%blCukD4--aCCS$ioCu7ZlY9)~HWlM4w*-9Os|V6`%+#=m zW>3WP;{D1u-0S&=RqG4QG@8?s+b%gXu6XHwBcw}d1zmcj`|e&>(;j{=0ktgnDfbp= zz?hhF>*{8#jqUC2JF5!6e*Fr=!<#zX?II&18yXsNX{u;$&WLosBv@L&I4ujRwoe^_ z7mzQwnm9jf`l`f^3|^jlm~t<5#k3G-l{hM7hcRP+*+17lWRA~N;O7?##Gt`4HEpay z+Vr@+cRBd{@&5@spHIAo$aurE8gcRV3G2?jAmC}XAFnwyji=gqj|HNsjZsrwVh=F{ zk@q z-m<>8=`L1%FKm;#Wclzfr1CpP!?oeudoKl~zY zJ5@uhi@(sIXiVI55`U(<(LQ<9c-YtorjB~f#*XiQs{0ck{k<8U$_Nl0~4mmjvGk}3F_DR){BfRnN#Jwq>TJ1jp!+P-3MMbXjQKun{?JCq6Q zi3k?QgQNugRX2h`J1kp={XSb)+xZ;sIN~UaI3dD~YFJu=A4oq}t{dXGiyap~ z2>n!IZ!u1|-C*G1{b=$sc})r`8r9z=@z2BeZOhu?CPTnL)OTRJJ%{vsR@-=){rX>)R)po7@nu zex;f#0bwm|3tu()P@n)mCVSW-I|^!Z)JH>)uwCZLqJDn)sg#NduzW)E;r2xdA(D_U z6}@r41_Wj$Z;jC_g8qZeqfC%E|*u>qlXwh}e!)SKTuS_C+{G z`vx5=tkbQdM0HyL`WcLk8QpD484k;ef9gAndbC75X*xOFL2`}#&lH3O%} zgm-NRkYXFMjsy=60R)wG3oA?VSn0yj?@&MFb33*nRUGa>el|7`6A8JG-0ff4Zpzy_HZ(?Oe1RV~?9MqZ3On#Wa%sHf{$o0&AfjxG;B zx#U$M|7)if{GBh8ltLK+m>uamTLT{1-i2Qc_;I;t|!1mI?h zuHzw}4jrOc_3x3=-LjTDR*f8cD?hvn-a5Bg+g4mGRN z&${tRf#e1N!Yk4&%gf?dn#<3WaviMD0GdSHQzCx?v|HeeW%Vqo0>T6NF8}hMpu$ks zK?e!@zrc~WP{g)S-`X2;i5USz2w*YA{DQEwwf?M1D*-J&UD&{*K92j8$(KEY%kOv$ zT?bi?=<%WlNSI_x)4hYwPzZG%XxShAZiJQup0wp?z+~>Qk*qEi`aS0P%>R~o|=BakjDUOGTwWC}WaHaP=M?5zkDP_vlxW}skw zGV1|^?6nKt(Q?}Ie#=7#u{d=5xWSc`KF7n zr67}Vq%rN=s`L@tuwtm;v20UR{J|9Now|w=ty@;FPaNA1xD&YOkoBKwHmHq4As2_M zhbbq>2$k0Z_>`tO|J-TZ@0q?l;CuH!bx{Aj9aQA6hhA`||DC`xW4AOBkN;a84B zuPhLpttWQ<;Omy53M$iALeqPT1%6y!MUL!L0zSp^8H#_2W8{jhWY+LiG#=pbuzoyI zX!6IAi4Fo#@A-d|c5`!cbe$cnY;3(ipW`rBs?7>gH4fHFAgSVn9RBXpTYE)I;H3(;>zO<>$r(i!y+dYUg)UdpjkH~(}Cvm z`>*x0nw6qSQy!?e&UZnl;I>Li)jr0r`N{F9s5p?K>l6BZN(~PUNhw5buGVE{W=^df zm~l%<4jN){AKp7&zSC*8ZM~JKtFODb&*j93K)}Yv4wHBVBA!D#9!-w(Kxqr{(rz8L z5sJ#`l1~ip=(}XkG1`TMfq?<(oL-8o_Jj4%N>T6dhzN6_5~3E5T17#Fh~1w}vjD;j zP=N~h-7hu7nH)sl?!rL1Bsn`98&dN1tBjgjd^HCX6Ea{LB%U@l9^d-y-5^pU@#0 zIde~y`|?IzQKdI2^-CAkJI#WUK3LRG-D)@zJw6>13hW>|uedNJd7h5u4j={Bi%3Jo z#FMgD+KA`_t4h$812gDPEfw*V0vb9JAuY80 zvaF12)7oOu96L^M%=7kZf%#B$yp&xX(oUuEU6&@#LT5zS*Cd1c_Vk@G4J6!%6cx%#h^J}t$- zeh6YDL3*kh^|xubI2wFB&JBhfuWr5;X8hH2WjQ&BmoC7Z?n#Ix<=IWu2mK|`jU8~} zO|HqVNSH&o*n%%ImnjPISaL2c34_eh6E*RwUB{8qdct5bS@O^g03cDXM61GrjszyB z%sXYnv0hO>BAbXT(wE;1V-YzOzA;CqGXWQk0!o+k%X{LI>v2z6E5e1dpJvn3uHwfy zf7zR(8*o%!E`n*-tfjKhd`yMmm$OLuhYQ|8W=xR=G>~G+Im!j$MIRTzN$YRrK)MMy z$pvFFsT0sAjV83?ef^tghMavJkBuTEXFMzL`UtMr^Xu?==nYAVYXSX_wxvPic<>|^U_h!jl@P;$FOJQMa=RjngZJ%7ine9(> zx;}toxYh=A(}N!EyCyB=exZ>q)css*QAusV*)I=fPz(QFS2G_H>m3$UAn6Yy;uyrFv+J?k0-X=0~N#g{NZ}gt{@BDFbAyWkp}2H%t{;F zjJ|nF+38O}MZyuH^_53TfEOv_Iq#C!qJw{_bFZX&HVNxcg$yY-vjo11(<|B+>;Dx0zp|1h_?_(1Ul}2a13K`^vl;8E_EpG8o0DM%X z8SIUAxC;ToPt73o*WXK1k9O(F=z0)`U!<24ZUW5+yh=G`4_SV0*?TPIM)<>$p~d=~ z0SHBa-B{nDQe7z?hUDDAYD@XkaW`YaND}6qHQ#6^tZgQR z4aKMV|3vQOBcd$8nbb>|@*HFa;%k9E!#41M0Cf0s0py{)NOymWxk=gAW@>_cMS%1TD_Gnc&lqOGc>Ohlo1#rC6n7qT68U}z$u(pq&A8zeCB5(#JdpU< zH`e`ZMV$QxP=f+1qM!fpqS5tYAV+_=iXcDwgOM&NnvljU5&i!74J0^*3WYyh*-VaQ zGJkOZP9=8OP@WS5Er2L!hFW{J+IzRbCv~fK7jWl5Ck9!QDN(&@jE3?Js3hT$#I#x# zz@nz^D8rN1qY$hhZDUA*7yM=p5qtNtEIMO7!!=K@BzScfn58A~pT!H(T6BR@pROu( zyzQr-i7?*L7r|#??g<)~?V*ASOOdcsG$U0X{36Xo^vqN>Ewiz=Om8a$@rda|>9aZt z!jsn4=DkG!-%W$KKC9h!64yL|Tq=#2*pTQL%8*+E`r{+2)S^b#r{b=l?=998bqGix zs{?L5A;H5|GJ$~-0?sR9zXNVx-l-uz803a!nMP4h37F=-pil7Ic&+C=eEF-_+5}O9v-BXmnOWAmk@2 zF@luRDdo5uq+$LVO z|LMy3-{p4I>b)5O|BE@ok3mWlJ3$Mq^~R|7en+89qc@O1wZBh)g=FmIfK#B!g zmJttk={o~TbKryw#(&oVkt=lBW?ZrAoHgM5&&Wva_37^q8XFajDc?|<1>`J7z^-XbAD0IkOlx(;$d2jP_M(acK?^hF#hk#Nw754tvSn-0ye z3Og8+Kxy8Tq?>sc%%s|+vG~v)2o%xL(Up{xAP@j#e>(U9xfE(U7ii17JY9UH2Lj0* z#eY4#BK+=VzVo5J3Ui$cv^1fvydllo$Kiat+};0SeoSb`e{C85j0;21y#tepR)MQI z;T?J40`M0@)dkagnxGbZa^ePhCwi@TvH!S(Cr^Z}q``u!Uyr0hFI~?cd!_DL7;;;$ zeIsQ}UAGMY7pD30B_c$~>wpqy2C_cA>^E)V?bZVl0`+W>h(eV#l>SR{2|G@p7uO3ZDJX8*N#1NAHV+e%)Uvzc>k17AwT(* zuI>rw!uqy)f0xZC*$DcR?|>{S%YEEKgvzwmI(~3u=jphSm$`ZF*vVnwrD$Tx2nBfV z#BjWVWzM4U^&_{Q^eIMJr#@!Xx2jI}Z3ED$3DO5H_Tqv9pe;pR@@VDlUKswsoc#X% z#$KM!$6Cooz}WZbxq)hV(9Lv?n2Cah%SFq`$cSUZImkAh#Dv#PG?AkvM_qOu$Bk6} z?~E&2(<{Akb#A~_M}T$?4U&<~ta~z4_IVTV1}K)d3VR?#)IOSmCmS4{<~&k@`vCMg z0A#T3FI01m`@;QnrC*|M?33XH^#8PvNlYYpFb13Gu|MbY)s}j56v=Y#d!JOr>F#jG zVf}yi(1vQ2%4uog1MT+F4;3z-{1T_FzZ2fp)=&QI@4qlH!GS;@e+$FB)S7mbCdERo zEFxut*8bE30@4DslmOcQ%<5Q-{^`?UCu38BI2~PGnc``{kP`lvWRQi6i-a5>3E!q; z9xR7Njo#HK&{GU(`7Ftx6$*I|Hk(SYtrmwX?eOP$nE3870kd8Q;y{`?j>D|$$F=e= z=rEg(=+Aedewl3ZT3{XEU65F~UCqNo|D_s8VOoN(%N*9D(gFfVHF~cLY2G}`}at=B1^6u|r zvH=|!5YQn)aA+hblg^FD{i}3XiL_Z$svUsvQ>)a$#2f{~rmEFT5A@7N(fvMzTfUJI zR@Vt1+?XBq@A+0%AER>7zIGG-mvokC)F3@2h3PJ7YiDP1)Ma6{BgDFTzG8Z>+PoJf zb3&OB|L%Gs=t90G1~coLY<~Y7{s%NQdDhbdWElu6xtxS_27 zIj_YZi#-RHw|Kj%EpxzG7M z&$+G(cMS)y*RZ!g{EuO#mn{m5m#_k(2nUnhU+fl#7KM!A@QX5^ z-`gR{tOfJeVz9}4ey{Z}?T}V6Q|5fY|J+34WCeG*`u%K$O&LEPKYmDDxD&NPX6l@^ z9`*~*;};=R9|PbtE;gkP&VLpb7M7lt=7we3l#ya$4rht|%rMUt9`Dm+^S!n-7mWo9 zp!CEb^?uEk2C9 zRHM5j)n1svV1Uo%?IJ9pqdUy=Sy2&=8wT+)mdau^HZ`5#AT#Z0d{g`0wwz09S@}!5 z5pw+-aXl&;7n~t5@oq9SvdF5IduA^-Dv>$ot!NrsIHBlJY(ia{ibR{_M8Bxg0F4lg zhJ-SN*53;}El-x$_Ntpj1ebq=ThL8n_ID^o#=>G}{Yw&=FhMg@-Te5Q1t8ZVMt|=O zq>oKZ_!__D*XIbd)_ew3Pe?4TB-URyd5$E~*FcJnPK`vTacjMqIuftkHUGMYdFzX~ zQGgmL4r)wM#>PNiSGwj6F0xGiXd==x&D3S%O4a=y);g-!-4p>~4K9vv?PS6Gy57AL zHKIy^ICS&? z9(2%@I04{TBr+L@ln7fm4TYdG`vb{gVF|#*d^%_}QP(zRBcK zjd$I>PR~8RIDXO2yxz^fS^xI5sjNkIUjXXP(=y&=)i_7>_;7~g?2L0=sd|GSFT7B5 zg@X0hlMu)a=nV5*L#I=z)CBMO+|K!eB=r=K-&e;85%|;hE2`p-gi=j=ovmq{I=J}4 zo&5d|tZawf_xnK!;&Bibc})M!%C`5HmzV3s+qawd2eNHzHML0}Cc;%RkEs~v08>wL|J&bS zfGfZYKLZkM1=q8QzGAcXexacJos48}aZ-6*?{Ko^3(zTt!W7d!+m@|w?H$zb(P*@X z6YE!`i=H{x5y6!ZaMRllG8?KZHI)O2eg;r`#nr#>85kJE!u$RRXBe26B&u46xID3^T`y1rE~r{} z4HjeBA*lNHb`$Qk2L{l@sw%Jz$uqBQbbTEmYq85)J^^+lKWmwgN@9l0-A6v^Ge#Lz zWvGgRhycr%x}55)M4?D-M5@PTB((DKzLYouu^g!!n}8Lv9mi2CZyZWND}lQ7TLj}j z9jN_@n4?1s5L5z3sFMtpwYYx165`gnzP&mF{5U0*lxeF*KylE+OGr>k+q|sk + + + + + + + + + image/svg+xml + + + + + + + ? + + diff --git a/doc/source/_static/reshaping_melt.png b/doc/source/_static/reshaping_melt.png new file mode 100644 index 0000000000000000000000000000000000000000..d0c4e77655e60a0062b1d5d01f2c7d9d561c427b GIT binary patch literal 52900 zcmeFYby$>Z`zT5yAfc3igme!r9TEc~pmd|sjdX{IfJhGA-Q6&>Al)U6bTd`doiIA0t5q1MaJ~Zp4 z2CoAt#UycFBPqWYd)+GfnEhD*J!P&peE3_6yi{*W3a1B`p0LKdi~Tx}fj&Vk9i1C{ zH3!%j7TnXq%&xB2cr0-5ezF^AQR(O!6RPRXB8ehCScIbsqq&d(Z>eh~AZA{>+``HQ zKW$L1)~#e0-D%w2vDi0x4hzGHPCPg}3qC^=x1oYFaZHi6eFzupDZz?Ej$0Q<_^3}> z>H&gx){l1hnOHkqH*5Vk7E(7YA*5|YQ89|}E_gVc=ex(>FOVo41Hz?AJ!-9A^QOlK zzLiT6jR3*zBn1oGN@e_q=ILjP5%EQDl<_abkUL`a$fC>#BL~ZawMJ!V~DB!Dwun#2Iy2o4-L>itF z>037UV+NzFiDGXc>f!`x&r={E`KC72Mf0spItD+ZfaiDs=P7CnKFaYMRh8qltwEGo z@h#4(DeV zwvLIWFdCAfgMgAFkaQLl{L=hY*gr?yl?89NgO(8k z={acmQjx}dc-Y{wThM^Mx#O@nW!g_bw8f!0#@S zIm40pda^y3rzksw@B8Rd`cU=RgEp}-9K?2%L(vyuNYto#qVZwy23S)sSi=bM;QYkP zhIHu>dwkK;Ag^hZF?9T_L}969CUCCE!Q!Js&o>Csr5?ThO3xa}_L4a3l*|Gd+IGuEG>^UB zMq`7oja~Ds$rCHI)$RbDILJu!IGrZ^oJ&d|)nn+*#?6NK24n+c!^ocQlF#+2Q0Q%N z+IO4x)NE!?aKCcW!r_K?w?)3qXP1j*HQ04;Vb(8GH!6IfN~Dp%`s1wy#~DsF!i;xg z5NV72`xqPErzG+SIX)F_;Wq5G%-RflkN2?li1rZo=#cS|vHhO4|-mqM?~V9MY}KTcmn@4imimvR)d%G1!(UuN-(Irwco8|NKI_erLZAEK(oeUAoy!2JpON$u1A*w|Rx*z{QO z82cDF*F0}uDOB-}ngb^*fL229J$l*KAFte{u3w1DP)THnFN;AWlwQ~+tugdJWuYlz zD>-x?W(N*vA)n&yAY?BVKOYkT{ zDCD0Xeztq4^N{>W*;mT1v|n4lx_pfxx_Yu6w-JXI`!w!xY)`BedocUAzP3KvzO=rJ zzR6hsSg$ghvW?R7vX#=Y3PJsg)}Ge<{;kgV-su+puFIg{n0{}TPetBEe!DCJ@j8if zpM{cd@Z1Qmo}yCu^ZR3iSv$EyCKKu9Kgeqh7z|V-HYUny&uU9)hihkP!|K**@J=s~)18UoENf}sZ zxl@G$rBW_}UQ-V}V*_4lo_)qwdW7+)0B4&J9LV$q^-D~kNR+4yy$q6!vh>59wjISe z{H6l4$x>=DMKFi_QTj?PUPWY`a=miU7@?_{soEyOrq$*G#}|$?j&+0iu94pOp6uT0 zZchkq%yBpq#O;{^i#2@;DhBW1hjU$!ispT=Y zbw{P>8DFY;o<;fRKT**c#~XJX$LFT0dQxR>rZ-MAE`0T{F)a{e&>`~?+wTmvS&6&?-+@&bIP+BFE#u|6u zcHf0uC}ml=qG`HGvT4+l*wfE5<(m5%<0c1z3n2vg6{;C>G|~X_CUVk4%?CVK!lc(! z?TneU22^7VmCwFX_A%{9@p?E*I~yZ8NxWzm&6F@uT~UwJRQ@m~IQ3PV6jO3P&?@+y zgsy|DzN9{Odq}70_o$AWuTEc^lSN~2l7|xLlVc(04Bc#-?i~D6bancmh55?H(Dl`d9Z(D$Y6mmadO)oc@O&pX|yxjP8^7x z`sP#&Pl-)=@E!4G@ip6d*paVxuSBgBksk4#Y?SW&#Pxq3a!$V4tJWjpfak__z46*d zwbk!MeOqQ*w?08|b8yqEIio#dfH!j5g%66SetH-|^6AU<@cz4pDt zJ$$`(JzmP^s)7Pp9{IOilba2r%>zhDE*hrFQ`$7zDz*e}JbOExb|LHag9V8tMMfn9 z1-d0@B}CA-ZfO;&b=B1&6|8rZTkm%|Hf8r>EL4oOiPJd-K%NN(gj28+AP({2z(BJ!G${4I=F-w~~H9KeK0*X)RwaH{v?tTGM*a z8ZUsG^{&D$!Y}o^>5>(ylx&rDM?a6=a9VMa8&Mgnt(OardH{5}%sMPMo1Ax9zre;= z@e8*0Z>sPp(NI?3Q7_je*1^)!y$Qd9?5_LaM@8-F$u|oZeJ}YrAzv6&RCB{~B~<>X zy6lk*C8Syx`R&S!iz~!~A{00?LbxYkf>;W(1I>=;4=uAv;do!5(37eo_kU^CXRzVG zV~r#_7eHd>4*c;VCqCO{KqA4ByXr~|B=b@+A-#XMPwB}?N8zoZ4z@G z_8oQYgw$j^j8PB6N5ZnBJ?Ta%5*UrC2I=lJetuqpry((vU&UrT%||oi-*P@(hxT&2 zb32)Oj+zb67i1L34teA_X*Ca|ww1P+G_!Y0^hS=`w-!z|%%``NkL1s}ju7Z~8V&A= zZa)wx<}0d)Q+xS5UEq}LB$*}@ZP2p>nz=nc4zQ}uGw>E3{>-Od483*fnR}YWJFcHL z=kMDk47|qVj_Nk3*cr++KFFbMNHk|c-CBiz-k34a=2@ARZGt(za6+)@pT_p}h?pd? zM2bgY9KH76zzQN8?yC{pQeM>DaP6z>D*MsYzE-=6R-u>ysW*&Y&Bs5=p;ib=3D4sy z=;0M`oX@;e?6+XiNg9%zlHH0q-DV+9EOyq#Y#?wEYWm=%Tywc7Uuf{jFSFWf0K3S736cMmQe~c0EB)c9jFK#X?TlUg;f~k#7T5ad z8Ee|~JIbG9($rtUAyO<-#~se!qrcAudBw5C@y1k^$C#1BJcse-l`XcQmDwlP4{ftM z;r*G{eZ{7T`0(puj^nYo$*lx!x$lmKr#~M18HYsS;}fpOtUOf^30Einz>&(FD_FHO zU|PmWm@hFvknHB^nf2M(r%^Rk{%IiMYH$4MB_@HIcJXK3-g?jKL&s6kW0PLxZ5Yov ztt_;i<#cA+DRTd4PJAw5W20$dOk-5#Y|#Yf<1Zt$0%i}jbDg=0P1)P_y83!kH-T{_ zc9%Bx^B9!Ur0Fg$LU}Ra&UG|*SeCFnNGnB0H$UjvPBE{LKo*;o0J$hUmpo_cpG-L> zC;Z$jbm^&IX3JnWb?Dy}x|*`fB-1>mhGu@bJt}Y;^U>%HekrQvL*H-AZ*x9y;^9VO zC_8_|slpnLq76QKJO_7!l7Rz(w`1#tpBS|W-qCrA%|~uCw^F#&Yags`G${tMTd6WKWRmECNy0d1?7 z!s0$!X)432ALKW^IYMnQjwU#=;?utuI=;~X-%MGh%;|AOaDRw5)I`@5pO)dDL@LZppgacGvT9H|{Q`S?eTBBQOT8deQTjpl1 zw^`a5-hb$))KpP+{8<)ao|}+5qXml)igt?@`XcBjNYZHEpbL52aM7qoc_p2i65ed< zZI&v7!@$e@2`?XCiiGE}r!Xb6BD*cimN4J*Y5va)YjQiAybI1h&qL929%DWRN4$U# zK~nY`>-NuX2O_WXo{yM~%5rdUP+`>?5kW+}@$Mr5)?` z@`)<~RcQD3hHv@rWXlSqKc5D?h)e$*;ad);;5V4orbJ3piffjOpF0ys+|EOcAD>Y9 zz0!K){f6l#h;xJUA=@-37MljUkp7v$ZMR}iek*@Zb!d`rWAHa$NJK>vQ;3$&mB1jk zE(Z`q6MNN&2EI)lvU-<(5ZmQ2Y{72*ffVhhPP+`BETJD@*yRANZEwQoeQXX%q+&iHc@ zcr7|8)!AH#37y9}bBykDBd2{K2eSRfQfhwdLRiEg>cXSuIJkJxiqxe=g*525FKx{d z@lxZGxbM^7>vfXQR?^DQs_ie&=|%S>s)qRtz7i<4XXAC_dA|SpM|d_uz5b45imaJ} zbFu;Jb_uWh^hTee$}QjbZB?PcO8pf@DbutwgnOm4qtN}~ z)p5(oj8pZ?z#mFkJ_qX}{=+2vrEJ$E{xmeeOLc_xBRkjJrI2 zcRH@Qd^x-@Pr{*f)tP|r}r zw}p$yBgT)#q8>xWc$hEAR=@O!>WF)IyS!C6e=6v&`ocIJmHmAzSKouBD~_iv$V(^; z59u`-vKY1x-k-}VqboZ-nD`{Kg+xhH7W+AL4Lzc{DJ>|+INSQ2jlp?eE#Ud>KWao= zJd#CX!r0cSrf`%xgFbzf_%V@6J8O18uu!nXnd#j9!Vt;31*0|SMO@n+CAu{0bE*j@ zPg1^)BO1=kvzMJ+=UryKk5(>M?C|^WH1G%`bs3xrxC&JBNEH>PNpr$8rt(k4!Mc-b zAcf}X)Tx8)VYy{3TA}RaHF}H>vfu*52^JnZ$B$>pSok3s1J|sr#r=D=Lh@vELeK^j zY0~*B;i)Du&+6On@T@+YVou_@;*H;?seVvF(&weNzmI(%ZU@r}$}h==%*-_Fv9D?q zgI(Xy-iq2zX!9@_Q-U^2UkPt*%(N9qexO0`Y>v`p2`YLmaJv1H&i(N{-q!|K8 zJ5Rq(-BTmu_Bd+9Jb&^kUb?@aNX3$8cTshh^cadknj_Fio-4HC_Q~^lMP^pX!fTJS z?Kod-@LiBvnzh-yC-HJzsb|XB;Tc!&&M>I$SfPoKgh>1a7IHW2{dkD7 zAO%|*jwThg&2N1Sxp&*%QeLA^69iE7OEtb10+Cef<6=GGJ0B_r0=6AJSE8 z%~0Zo^2x*R$j3+p{*8X4Epr)cInJ^~!8xsZX|~iYp9r?-)0q$HlK^z575-6c`U`peMG0$s+LTO5nQ;?hMy!}#(!g6dXyWS1+m-#P^LUU=NvaYif zdLDL{^kL&7j) zXv{ul2-VB4^@^KL(7q<|1`^G|qPZXEtPkOe zalB7I1_zJl2M3>4NSCuCw9F0dS_^0yW3s@lCM-C&EKhI`czNkxfclp^Bfa6Ww}Jja zlBu$~y}H~hetm08)^`Th?+sa&oUGQi#vpb+K0Xi|2Z)1%1z5pi=VE35&Y8u^j_UU&fA1q^%bEhOP);JA;o zj#X1e`T>PE9Kt_7YM+J81PdC7!NDWpi2nDZ% zss4F<;5K5^&4~ZF(Ea_aFmNR5 zK&^KqOIC@QOCeeCs2x`!vo;a=Hwg@keY6RSB9olLJjCyrdLZXhTFS=fa%AwaCx%%= z5uJcJ_O0#`c57>^?d|miQblhR6P?TPnj9O5NyPnR7<_cOU6NB9*ye}PTW7bfs6Fr( z-tr1Kmhh>IjZ$j>kvBHRt7Jh9GS`hXB!p>K*qqY>O>E4$_+YY-Ve3a^G$W8kxx{p( zDdcT(jTf)YqW8`mbW%A(I-GJOQwG<%6^vQy$vT zMRV+wYVAG$F_yAD3#R;h=l7R}JQJD?@_6D|~RCs=VxZGd9)}N3Q>;D&_DZ<#{KX!e+ zDC4=u--3_R6~z>DvNhgoA+sO#H8@zVT%{3g)A0r0$a81Prv91pyJ90m>!#iolj5{)Xe zD2pGZVvhUsmU|_I+I8hzR!WWq)1shTF`^|Sv7PGbNtwuGLGi%okhP?#F zvPE{;z3l;ys3gEPG|zr1Ko}9A^Kog?M89aKCLyXtC31VNAID+%MFI?#O%`4%^dn&JzI~>N>wNfI>PMJFapH9OCVt6u?Gl0`4$mR|dU&-8M4x(XNX(bK2Q{F+7C?iWWgW>*v?8Zg;ZL%I4FtNW>!ohd}g&^ zu|@^03|yX}!|t>S0joB>kB?8b$u58XU;*`S7A^(>ENpTf%i{#3aIpiyF*P?MXCRjS z+r#pV@!>0~{n}-$8gm}IwO6Aws%k@vHakDc9^>IfE?ricjYp{Ix^&cCXM_sd%5(bs z5=(Co8ji%}$Z&S?y-;J}-H-bfYi-`un42ts^O&@q<{T7D zbQ`%Qs}R-ubBm&>e{CL@x2!IB6G!(aG8W%pHAwnu|L%$orD`q+S;A1 z8;z6GqVT%CFh15u;Wo`_A0=ef#vyG0@23gct$ih`QXz+b@#2?9Yl#62yvL@Ak{Ylf zh5v&eVworo@mvpfCW;ckRd$$^4vSAcT;lClR6j~9jCcJuO1lX5#;0dgUAI(#v`9hnW``~l(nl9E{OQ8n%*UV3EDIVCtlur4nB!g zue)0g!0;+1(51MKWN6ZOB6Sh!x&L(64xmGhV%3?3?5s%sIwl1eaMx=A-=t~o*}Z^U z3!8V7)Z3JaVpQRvD*xWz&Z}0US(R@WkM^5RIMN#4dxZ=$rAYTamck$X{Nh69bZb1R zO5nrNr++Nd@uuK#Admj#$yQ#&H#OjQoXhW?MS~3ofUAWdLIdb`y=}#(jCm&4F-}1NJ>fdxa~KtzoEg!9)L{r z(7cQcd*O%9S0R_kJIt4qj5Y}lGLg*>;g$W(!Nr%rHo9nr)xYE{Y!~SyoFc{ZGS9Br z^ZF3%ayDg5!_3^r2Ri(O67tO-I|c=lxGVQf#$%BD@v$$q@J1h}K^KljlY8B9MGh{v z`2-$Rwg3y~WJDGei1O_(u`Ac4n3QBPJe8s+{~fa^Am758@4CgYg$z`gw5kiib8_52 z>?}{U$_)YmwWFQV*_f@X=t~lqtb3((Uhl9?>##k+?YnQHHcTqu7(1EBXV-4!^OWB{ z*Tg)VP6qX07~Z4p+qZr*z`{oZmM6f9Q>|FrNH z7fy)~uwnjaf{9FQztu~WABlA#DkUUJ1Nrv}tqEh<^*^JPqhOOUmz9;-F1Gj}`DhqT zD5Nmu&8b&ODne7X8p9pE&8$1C67q#WSuinQlm)RJ5xW2=-z^g&qC`3*u1(oZx)j`3uqSxX&8| z#HE)1k)$0847cMg0TU7|3l>8{^Wm%{I}xZ9Q=bEuN|1o2&ii^a~)br6mOEMuM8J6?+zvFHwLV;>g1d#0ZwD=&>P*AkYjd#TaB9t`W*0*1VS0KxkfhHeML4cq?Lig35Q12DXKG@|gj~BECUxlWnN+!a zvXlJp-4jI)`UZ0Ke4|@JkOjl|8&G@WK^ulGP%cPl0Eax42~z>Jv*m7Hu+>3xLX4FZc=%md+6MhAt%}O|!jNNE&uiXa`wU0fP+dLXtaBk2v zC~`NO2&=J}qP#lWl^tH3q-h<||3Ma4b?E-ZMVs)ImX~fash#v5S(BQp7gVh+160fE zD4+5$5CZm`266J#sukOH6tJ<|{xsD;7>q7BzT;u0mC#K-40xCB^LB!g8j%8&D*(k8MCuniY2k@TRti@oc;ucurVqOJd_qT!V`}L2jMeN;K}a;dC|H(aS3sO^H{S82REbnAveOoKwOMD5zJ%AgiNBP~qk^W=N-w(Bou%}W}MZh5vP zYw41m+mNI*tLICShYc1cjbb3@6+#Q1p~1lpP^&ZkxYH)bUCrU*=1`H_S}nk>Mks?5_bA@WdswknR|v2x#5_L0JuOlsp!QASt;I*i1Bu~4(Nw0!YHsCXJtf9%uMS)( zkaMdusfDFI+7(W<3YbfR-Q)tV<0=}P^Mjw8^@`+VWUMS&)m$Ch!X_hD29C0Y)zY+1 z3QIjvOme!r{SlfYR}{~!W;goM*AszKL&wEb^3dYTZ6Tl>Htbn0PiY3@Dn~P>HBz~t*z0M_v$^JwZs`J=lOJ{J-Ii4!RT;v;wgFj$=?lkYU;7?JlwFsO>IK#U}nL!@FYG-`s*JVnb z%Q&IVMqIs5WwXg#6%~rz!coF6M>6#&D_0d0?v!L*mqruz9m0??IZmlB7k#m_tI>ZI zp-UbeWMh1ozRZJBnk}XgKI{x8F-F-NrjXb-pB#Re;&G<2-oniv7928E#FFM3^zlrT zh|}n6B~bdSwwg2RIam=zMnT~R39@R}4$b=J&e`r>Z2~3iM2ipgEOPwBCT5%2304t6 z@gW{gXV4O;=XV=Tz9FVlNVeF@*U@zt3I-wyk*S8gKIYe{-g(~ubv5}YUX+tWz@lk@ zAUt_gW$18UojxGPWCgkiruA^!b#HR?JTDC09FB{9nu&Acu#9rnH?*u9#~&i3w`L|) zk`YU9dpk~NIYC;{YsBT{*3JRPG^N~DahCO$A zVHTasWK5~h8*y`NV10mPjH5wq&AvG|^hjc0Mo3(hRF~3e0y;RARAujDJHUafFvc?1 zg}y~KSAWBJSfA%Hf#GnrkT7~!j`cPkBJA= zWI)6lCN;&njjl9L9?C%@w>l(t>(WCVj;J?N1Y3abOq$C@N3pgiTDFLfzsCF zM1^tn?Oj*wg7|IY#>Q{*(;WG*`h3$Rijjc~vHXycnL-Utw`d~Lx!bp)qIq*kyDi!= zrb8R?_gx5?0+N#IWFzVT_p|Sc=_Qd7_sYa&bM%HOppQyHYff3|WzF;IrWo~-Il6Q? zoH#|{$&2MX0VWdRTu5rvYiQB5>GY+YVw!sD^+ZYjxhZF6)SkJ{t3s3>q&z2CFsMLI z)jXS2BD=cetf$1Dh2^qdGtXw8@~9bVY}m7gvChKiB~m|6v}l?JFiH)j&;8Zyl#mWq z(hMDbkTN$gLg-j?acy&KyxOgbm^iaPo?2q)Hl^2F>D%>f9Xpny9EvYN&ZO`&aHqQr|E!&;;3fnDc z{1T12I^!VhP~pp5m+xxz7zUER_==Y4_>7g)k+TBSZn_;w>wyHJy{f2wWybGy+u$-e z!Gw8IDX;-3d!H<#Y?4}mD$8iPDTl5Pxoh%>l~G)HC5= zhfL@&$0~bw`kycCoJ<%_2Z0f#8%y(xz7?pI@Q6XH54O6>%@Vao2&5I=x!#uLIOFcv;ZmVJ>aAL%64o@J~XQf7GOMN@ao!HUbLhcdsR6Vi^!NZ#{Akf`s z>-C(dyKR1@kWRxPWLCIbg>cDCItdBWd33WB5tj!v3Xj$jtiR~W+yMQzewCd*=9KdL zI?8yePj)5eozNz1Gej1R6s-qlFibW`8v%tLW@9Zm0UYSy=fKE|Ik3CUrL>}*&W z3D;hi9z*+j%4IwRBj>I2g)}*)A(Ejn+3htJ1}ANKC}B?XE;%kMQH~p1SzQ*BpcB`EJ* z#~Ap+Jzrnu!c8fF%hj!5jES6jQiG~V*waJCRF^viWSgb2zVQ5-S)_=JG3T2Cb$7v> z&TgbgY3l6U#-oe=tu5E(uFG^7Pj&8)l&gl08b;l4k4cOj_x$dmXg<)UD(*Ml$Klga zc~66+NHBz(r+e`#F@#Jg-dXjt$TlItE@SH8&vI0iE}JyG`_iJeBzz-I%&cV1%9^3U z^IeP&hCPw&gi9k@(dIbWqi4ofb7VTIs42rP(?Jqi#gCg$bT65rPK3djeH_aTnVqsq zOUsaRJJ+r3*7WMVskxU`+-lIDTX0gvGW!DKtXV*&!pTzax}RMQH{-kYqqD_iIsVVf;`@ ziFf;;WX-#h9z%bUanGoAk^?+{SiSU+ZQ;r#~-y9TuOOg z1;+$pz7W-S|vNi+*=C^3L*~fb+X!@p($17D?nSQ zKMa;UOOh!Dp~bvZlyw}6SDciabl?sc%hWsA*)uBc6^g6YqBvd3Gbfzto4w#G_7l(KkgD_1}+>c~9pG4daY(#Mc%Qw+-2snJ?qj#s{<)$P83we*9jX>>&@gw3h^Z(y`Sh9i&T z$hapCHQ`BH<)ECK_^%sl!2A6}t!Nx0in()smF8i>%dhsa+m}9D=n1SBk_y*NGd~yj25RuDh26VFlx=Penc%( z!L5~v_pU3vXiY)rVJURP&|YNclsk2QIK%%U7m;>KrY3`>JFjtP*}7yIW8bha+(2Lw zM9n_INdrp>kyl(*j3HYt+k3l3C>>E4`~6|#MyjvIrfjpnPH?90!5G8%YaTMagq^i- z`Y-zR1&>hqSs$BbV-PMEJyIiuJ)wg}^!AxERYM|>d%Aoo+_cwplbH!tObi#^&6&GY zjQ|ywzB@G1GQP&Z=&qBVvSc;%4HGBVh0h@^lXZ5+N42z56>_Aes#=v?qBSX-J6rjk-x5Z;!+59O3+22S4X+3RyX^?!v)ox>a38-U zIElH$S8To;Hh6WCvAak*QY7>4xGUuS#f{-+G24abt!bV~@Q-|$G_Cw)Ziwc@TS2AC zBtj3XEMdp?K{MAa6QR9t4lT*;+b+H9XXAx;`gb1BtOs3GHP!0piLbjA>c@7h9GfNm zs>?4AI4(?Dx3h!tg}>{MZN1BO;P`M;IsRd%o_w{Mzp=kX;1T;f4)b*bzDKUg^%M5m zW6}-s5ZjQJ-GZ$B?j(nOh^-&v%kb3q1D!t911b0^UclIb9XxYZGy9$egkpr0=4gc; zUMS5}4c#NQ62<#$XtCQvtdyw<7|L+t=!PcqI^ZCYriv--Oy}p(fravD_Lkxe42|x` zo^+|hAb_2!phCrZYLfhj&-vi(`kP?_TugPvmzNeYyHa_+4I3_^h&AnEnYt!p6a1LA zCl`_NC9bDi1EOpnrmP!JqfJ-GhE;G;jpEf%Ek=VP5^+~)Ogy@PokkSM!+&X~W7o#j)g$s)9Jup#v@Jt#!{X(Ah-8zyZ% zVpk&^-BHD4hi~qUQzf%al(0Fs7Nu@|TaB}MJlBMjKy2#3Hy)Es*8io8d{9-PbT$e5 za<;pss58i+b&V@SRTVIf{g9lDRiT* zv)r5eUv3xbH8Q&&^u?uRL z-4k+gh+e)6Z(NY{{gPm-n{x9>sJHU{S?-vu39MJ| zp=nKuzj)EdY2)D58_mKw*Vh?Bny|EX`Y}30q6j9jbr*L+`|;kKk^SBE43k8d7E(g> za627WlZ5&i8IJ5y!_I@-XU49us;+WNs4c^*_x;CJO}!LFU6I*le^UQ#x)Jh8)D^jsslc(3di5bN5SIAiX&&-_?#K#n zO%_0_gKtEGgws{(&8-eX`=f0E90>+YSdr80?i%;a!WF{xK;xb@LMO^k{{(9Ttg+*q_{aPAMnx5X8eJWJ0L zn?()>`c3Kk)JWx;i|^eN+dIDpG(lzwYoB-x+XmIlKE1P^(}N@eBa4;@i5ZQ%0cFjb z_yIvRO}^jN7sQI3BKGf_$v?};cwm{4&aRCscgsb3?$6yYalR!VFvDoK@ z{R^cg%WKH>sTb#{%4H@h@I!dg?_D2ZF`0FqD^v7*HFph(0ye_^4F*7QqYroSX_8Cy zUi8ZLx{Fg%Ipe~qkfU^(=JrZ8s-TF%Q2JLDvLj=-yATFX)OebL1C8d}ouuv{j)y7dGv`5pl;vT(S&*~iC9 zN+p5_Z$3PFOomGvUiL_%`VN;TB*HqzgSyUkHKJ;ee6V~sGiR|YOzH85v)cAgDYAoG z8s+jUbr7SJ^tOR)joZ)NDZQ70oB1sDq{!k0tUy?Y(kp&QD-IxkJZ3JcGa$D+KC%XlpMRo_VILMX0rhSF9odE zcIKiOXap&xUc8{9VV;E&TRfmF>DvcqTN(}zuo=T$T#b%rIrXMz?z$TJMydw zy(jn-U8eCU1XL^1CqfhC_$j{Oa6(9%{1qSV?}u7_^%t^hkO@#_n7(;P@1a~y=G!TRk&~aFfzwJH>{XQS#RO+V^P?v> zZZFfXGEA4xi)<}+(KtO?>tzgAVdh!x$K`8=Mn`~i;L!25v(JVY)r#r~4l+%b;QAVV ziEP^pN@tl`xml7Yv^K{X{7FKzUne}j4n-%z|1YnZ?i?;WUhU<&k zfD?ishzOz!LG(oLEf~Gm(GtCj(L0fZM2|9h@4fdLVK5lIcZR5g(d#!!e($^1yVm#T z_xWqCd+)jX?6dbi_n!Ui=P0gz@&`G@-i#g0p^cS(5UWYQyZAWcEJ#Jklr~2ampq9a z_$}Y{%qgDxViHts>d;6j^o*Xjcr#`*f}>(xrD+&XkEn!Swy#kf))y(NuF>HhEpj9w zU!?JNgjcb8{F!@CU% zF+U>_3(ixSuh22?Y-Ww)@*`LVg8j;Qy%H)GnARD~5~x~KmKRKCcE#fjWrz!6f{_>Z8KG$eh!sKubZouG+U zc?++TuH9UzCb}X=CVCVMf;-#e%QRKf3P`OW<<52(+eW)B5StECCP0s@|C%UqcrPRX z4DmO5#&)d=P%NB0b?d!Cf>?NT4(vBCo{6Bq8D2MAx5MP6)t^TCbpn&;cOnJ1XLrWh z^a;r<-7FSCzm5REKM_;5iC%`bdfUxX5=!4SUGiVPUt$HVW}lKCY9u%KeBF)|`rXS3 zkM#B%TqrVtKr;nl^ZMq{Z>L-V=SooK247bw0Z22#@%Z{{hQ(G9`>;qiIDn0S$A@bk zBgU%i0ir^8PQOdFa&~&%0Nh`WVFs`7a>=!CtG8V0nyfKDh;Cuv;~Lb@2rjYgukQ#z zElXwCoX%D_Z6bgI#-Gm1!8po}Kdu@?EjH6;?lJ}`XSB1sIXqG(A5_0DQplbkd(Z3q z)a$I1Bx$Up&4p^H!Dm%q`|JpfDAo2O8OXPS*#zrY|FWYY@6s>0`c>$}15me93WM%} z;n+qN=Cf#ViL>E(r_F8r=PV|jk77n&j#lr4xMh<*Dfg+$n{7Z3Bj_Il-^I*?>LK+5 z_Do#W>hG%F9b6odF#AWW-!Bd)wZJ!~#x<(6y=-O*%KJ-#cH;bNq@i`=F_Vzeed4$r zyC}t?W`-Y+V#p;_yx$k2BrLG-xWD9M9Jrt2(1nB0?^_< z5&&2|w8i{>(t~Ur$}@5Jdhb2vYrn|Wanii#jDH2gIpdibI~m{79&Eu(Rc-TEtF7u( z2^bc#YbWsI&;4=I*3y`CJHNqQZ50rg+5n{7;=qZCEii*8M1C2nmLG==BuA`{Ww}Xu z9lwNNXm~*D8k&5S6K_QBYNhof=#%gCDNR4EZg&{R(k9j+=Me#%k7KUWXRX{Y+I&-{ z_X1#0&JHZ7mffN^FL%RMlt0bgCIrJ0Ic;R1?Cs%U=xSVLo8fzV+hKWExJr=hfhTa? z_GYTuZ9?8kCB1og=I+gjvDe1k6r%}9TsA_V1EP`&Hum6Ceky%;w?(esa+kR<(w@K_ zntBVZjb~jMZ(;3wz#3XJUI-~3 z&^_0#70+>ut(r1WK7>6F4P>}bg( zG9{(MudsSye(ng;4ZAWG9U2&KC|h^=PJ#S^VK(LOgx1%bN6`;yxOttiAS9(g^4bgy zy{$t%785&zliWPtsd!eIQM^&Qz1=Kai(O~D^t$^yY^YjS_(_6+-8t9zm04kYflr%H zT1E}@Gp7BQ0j>DmjBEAcx+dtJj;~Uj{gqu|Qe?#U=r1Pc+z-vOxF&NK$CnjK0OD1^ zO$Eiq$sK|79I+xDi|un*(t9-oZrS|leXetbE(3?q&qvYBWiMKta-MUTXbQEy(I~Fn zv8?Wq0VF4=-G019(&qvVAnjKenDWz-nt8Gz6X*1K4tcyC8WaG(Vx>EgY`M@&5rX= z%toUv7++{LT(#}UAtB_7hFYoEw3@)JlgRhXz`ROV?}|39wyf&@{Oc^q%KXUS(u8A< z$-!Ea%!~!G*0728wy&3i1gNik@`uBt`s>L{8h%G2P6nrqd4(H8>lfd}AvhO%xn1%3 znXOIm$P_AeEt4n_8??UO)8l@pWv$q%Wt~`%1=^6;!S!e)GQF+6{hm)y?-Q(7eldB< zV*nVbBQOm=t1W<+&r`m{+j-P~eG64QFOSUBMa28U{QxByM&-Lc<`XhR59}W3muXO6 zUfn8iZA9Q`6iWwE3R2S(h_t|Xee#u-C(FKB>{_qh%;PNW@r>il@xXC$;;0+=`NmD~ zGf>unnh*LLGic(Ea;W2vEy7;RuhNqyiICkA5|Xj1w!RsD6g^pz5a%S`0uV#d2E}n` z%i+_Aviecq3g)E9Ir{zOq%g70cgj>KZJ~^W8Y56e5%w8 zd36)Mp4}A`Kdw|+c)fU5^g*M@OIH&^{9Z!;sv9?3@^_JuU9NeArF=0-t4DC^Les#Y z)5k4b(BgXJgO;T-)jlV`62^Wf*dcBoy@&5jg>|4_3zGhLx6MKtYZ043%(Obr$u*b~ zC=5~Bsz2*Eu4|YrvApv6l3-DGdl-9>=qXsWnyb}XpK8N9!BIM=;AI-SzO#>A0$0-v z;<{8mA2q5uPkgVw<>r|aq2;*MSQ#r!+h85&*=x%y(gd%KD+Zt3spDbRF=%M6V^PSp zJS-b@y=~d;LX1-!M|lHE5Yb;pIopZ8}2E`P*WmD5_fsN;q<~$+E_Z` zDm@)k0+h^5pop73VyN}#2;*`a7SwMS8r%b%vUnNtf5sr7Tm1x54hcnkq|5>US0 zjqz5$16vNfop^2|qY%FiiAYLeCZ97#ypi!5pK&WEuCDBI;gfaaF?ertexgXw-SJO5 zB8%NKvugM5m*YKrqVR%xd`WX#AwhHfTO1vlJ;jH)Iz`t0;@kF%OoBe+UY8yQp|ze? zCC>+TnfMKYHUbR>VMZMX0;+P zZU=4|S}VsD&G$ZvK5PAHQSnwXw@vlzhV`gCCo#beD2rbvKnLMVjgIcl%KvhhJgm)N z9bi1lksqUznGfaBw%K|-I1D`Jp0hdaJFT{yz(l^~y8%t>;7P>d(!mZ|S{>nv>{-pA zED%PX-f1?-5~kns`w;@39LM)CS5(tLf;Tk021t)eP^}oDKHs_TJui2>mb9x?&rP!> zH;y!`HjTS}^xRmC^Fc1}jO~*0QRTbPOmG<>kR3OEG$1ubo8qSh@=T5!t=LY#eB90U zQ69NOe!~JEJB|Rl9D5EF?~C%&qH!O5X+@Cah9j8vyau7#M=CBf3|n2*dF9JowiMkY znM>lcyvUBaa8a4xQIL+|J887?R7?JKnMLyP7H-`6Qls*nQLCmuRg08PnJ8^aJvDDm zW%84x6SD@zH&^{5O~W&Ld4Ox>yqn8y@h0A= z?0>ib1iw`uB#i;SDj9n9wx;hI0>3toDo+`_G+Q@=6YnO)kW<>S#5IKS)BuUznMs*1 z#@s*C+O4Ay`$5b4+pyB3UK%1J|M|2qx~U~UNf+^z0J=*ht`IFQ9MGp*s`PWQCMA{A zZ73Yad4aX4tVrlV6e_V@mJHHA|HKG3rQ`4b_iJ{J;`I!ZYBlD*qQTKOvi&YJP=_4I z)A{IP0~16JC~G+yj6RB)o3r+?nDlvqBymR_#7{bYt2H>!9+pX>;&Hs@SSBduu2U>A z%UWkYf+bc$qjny%C06Q6skHj5to^pfuq%W|J-#e$a@B|pJCgN66=Tx~)!m9`kp=HK` z+a3zjaG|txP}pG{6JS&MtgWbT#;RXBv#2CSoerb{(#L6&wf zR#g6+HQ8i16_0d(4A`2Oqg`)~Z5KFzAK7(gS{CH)EzYBnKQw2}S8zFBU+#zr)vxyT zX!dxy#4^o`k>PCkn;)}e5$SC{wFRhC-QPNY%8 z-P+RiILGegCya%1g{(i;67y)FCW)LGZU!DMc+MRq`Xj3%-{p?)Ezc!=iZ={ zGp6%&TfpWZwaOrS7?9#vK@5!NhK6amFX(1JJbYJ!r~>*>F>!e801MaP39Fh@fx_gK zwqrVj>tLYBB6Elc%0Juw`sa*uIw|&(K?YGLb{2YnVFq&PImsma>h%C-V}n|{2{H+M z->THsD_63knuKi$US=y;qpl*mIgk|&TsMIAqv4pSW2c*}QdFQlF+!^ii_HB&QB3GI|2Uwp{_xI@m zJP5aBTg|i0-2mqcVN-a7SD^&L4(VrQO1}9;ytYotvee!n0+N-V5-L%Xkw&*%l+?`u@i$_RmB+BMXW(N4mJ z*`Pczj(05`wk+BWjb76ay-o;e+{YvxdzVf_!fT8_KsY_H>%qG|MM6Ccuso7J;^oqW zU?{#p)a7G7^wZyk#Yr)!J4lUW7op)?%&0|;YL(7u*l5yAsMk`~%+|1}r4&Fctz+VC zZ?L;d-FW>X*Z{%PIPu`^g?+11#%|*zUEA{2nC8LPvY6@%aR6;m6y~K}eU4kL;kHDP zg{a1>risS%oQ5bdoo;G39i-p+E_2h_)c)8`{XXbo25zAEgO$Dwd9XkEv$Xl9qm658 z*}-z|V4#?$eqX?GbDIEjw?slwo>1LDVo%95)l)yE(EONt1IDd) zbVIebIr8ry0#8Uw5svRTBk)V0?Nn8A$#wT~$sQMa_%!k}iUY>{IS9d;BIXnU2$gon6)a z-`>6kdswe@sV{DFryS(nDoiU(K<*U!|m_xlJ9E#%zrTpnC-=!U`q;zTpAc z={!}kev^`()hssPDfR$+{va@B)SR{!x+1FBy(HOHOGns7(SqG>W1MQ-z5lpE7n&e1 z{0hu_*m)WJl2I`tr6ftxbMK}c;e`W_Nl@jB@5v~Q?yFHho?hop3k&{^k>v9xjawx3 zwc6b1kJ7H+zvr3cY5?Sd88+J=)KL|*L9DM=cDH^HW^Rfg)FvQ4TIJCIe9Fj#H@lxq zhitv_nxHi_yM``;0xiq7eiMX1D}DOLUSnIHP~|KN8zAJhDBSkJd=rcVX%Fd!a4hc} z9TVj#m)L^l?I(FU7UtU;!*99umM(7!!>g%xKSVGv`49*J5#D+^tvauF{SlcP^)X{2 zeyxCbg&r9LOtZKXVA|&-QiT6doIQy{Kfy*gHT9hOa>^)+2& zAOtiCwXq?aVDk(`rw~P|d4bG_*XevV^g_Swx;^ovfuOJGW?&Z3^}9mS(zm zHS`H)TsmmM2;x5xvN`==RK?vc*S~yB?8BQy=l2G?vp?{9VB|5p8`g!~ffE4t1%bw3 zBk5{H^OCX-wFAa!&E26Lcq$T63;t0@#h$v|wtSRKQ6iL~>UGfR#r@O1TDVgQ^(Cx> z*6trU93I?-3ND3eqTG8gHwXUa&6|cNzD21bi~Ofw!zz|5Uce`Ft23Z?vzXSAyIt760&=Z<8D_kgue_`br@qgqwb3?erySEH zRdjVNCDwA}MST&yIJ{Aw;(lG{Q%Y=PAP?}lUD z4%+DNZef{G7mfL@$NWkeQK4YuRF1^=&2#XlDdZO}?g;3#$qw6obdpT7c7i)#(H#`5 z#c6Ed`f4if#~rDluYU(6uBd%=9J8J=3WJwme>U>_>24M6OO2Ukum%7$^u4xh2u(k( zLs#edJ{$KG`D><&PD^ByC;SRq*3HdQ!9uF#*XF?fh3F{2*IEKtB%|8|UnDCkjlatN zUFnjIezC!>H<)wHJ0GGNN9?whXY9$y9#M5*@np%ZH%_Nj_U$;;RBU#gVks5R4!sx; z?W7LOaoO!CH}R2I&bw@xb4A3i(ysrIj)Cxzhgc9bjUh($_4xxQ0YtNRK5v@Q@Tkpx zLEDXO8gzf3>HMH<>vChq6gG}{AetOtA21l_LG{?`wD)6?T|{ef6jpyJ$}r!R#b}?7 z(z)4g#EVS%r5)stRN_{9h2)f2`R!iDW8gNLMD>@L#4n|dEt|2_`As}@*D;{4nJP2V zIXR+DXY|9*t#{Wc*BcDKNS?cTnWbKBG5!6!GMalIz%Q`=EM;KG~2vgd4?67>c~W~Z}p$pePXG~8v?8o+WR%Tt*@@6yi> zPhDIsDcOgS2h3Lnqk-;tLwhLw-~wS4wFtKp^8g;}iG!9|b__if_6IzElJ7fpf$PS( zNRy;PM$ThtZeSCi;VCF-Zb~Rd9~cNh^D3R{BB#3Qn@SSz8&$@Id+aXBOlwukau<2b zYnIm})<3FstDJJ+H6KS947t39EQq}gQY`Xj$m-jpVw%)01Vz z#L_$e%YyXSGdS?tBhCAr!wlg6QPK3#CtB{-2C~p9TuF z!v@d$_*3+Q z^f9y_^H4~dia4J6zg%;NI1lltA*wf;UwwtNesz0_)Wl*b_9jaAOOV@KMd(icB9C>r$F=tg41JI;#s>s z{RaX9KLY9N8ZPcsr(d_ip7Kd9pE)VlLs>(NsA=uj^RwVaHWryNe$}T#7 zbIjfm6TLDK7BXsFC$75qqQW5_-L4d+; z7_V&S$34vyil-!dMm&%j%vqja2qAu%wV!E|nuC4Z05M_3t0 z`sghGh&TdK%pS)+{|#pfT8ld(UbhqyXg}9FImFxd^{)aXhEWKkK`%%g`QHIG&ytX{ zE^IwtwK|@z)z@z%{{9c9M0<^D%W~PQJN@5fnnAcziLN;im1x!W3t2ikx|(g{YJ-1p zB=ZQA6HGN~u>6&c;PFj^ZA9zUln?0lAt6m6yY@e3z)n=@eZLdhKW`^{D&W1>i2Vx8 zcF&2PSNZqO2WU(5C^Hp>d>8#oVAPk7HahzD7(TrWEgW6{!{C;3z|ivLUoSRMkPOR6 z`n-R9e6-_M!!eSG06HsJ8sH!1CW!J6xtAFMJMlkuFLJbb_q|$bbS}Aihh;BbJ{aFW zs7xQ|kE=3S{$mSlXZ$#9`;GGA4#MR(MQKtZboy76!-pu~4vj#c)?Y%xl>uDC#*dP+ zw@#_PN2eaoh>88+9)axt3>t+%d=#VqNU`dlZVEfc;H>bvhGinEp7?)^PAEV+Y6P$W zF+zWZH@6FV7ZNoP$iEzauT$PXb=^Pw&(&5?ly14g3LF1e7-@siVq(;f`@ze7zpIEC zmUea&{sp*B1B{zCH^{8hUq4xX+K94E!Q$)yR!Oaj2DTt2;>ThxI)5x~ zFkM(&6PowtpIcU=U~F6^Uzq=K90fgpw6?Zx@e7s%-(97DCB(@1#c=RHgYv=lgp0;p zpCiD8^s;@$+9j`OX(fQQ)-V4M#l>+L3HfYW9xb_gSvI=b7y*nM5tvc+uG6gad0P?6 zxvF4G>awkfByqCa^zcL>k*;#Ai%Z3-pA5M+WhBR*4F!2!yb(lfTkoWT2r~%c`VJ32 zbC;=7HJ?R)WuJSMgb<>q-)&AwJ_;%_dJ0mfxFKl0f~$0`;dpNI!pN@o5%OFyI*LFL@P2j8qWZq` zCE4K^w?@8eQL5%!+rO%drpIXI2wkW$3vu0BPNdZ&k@XW>jOi{q#*GLJXV2i&yRTp* zNTJi-)s?S?W?8+1_fTNT#W%<2IW+7J9usrl$9A-*Fq#bBN`D8lTWMrT3}n;D4fVZ*gP#!+zC!9&yTxg!o;xqzd~P zi=~~$Y+!>P9T?FZ&{ZtoB`A>#=^s#iY;f^o1f`=GEOLD>wsps9uJ=J6FkuFdgH(^{ zZ=5HL*##r<*B$avIVT3N%Sy18!=jPx!bhhoTxWfk{&Sc?K+VlY^ZoTti*ldXnq^dx zT-0^0cZ=~s*$P7w*BCu3{jM=Kbp;L`EJK_VMUTbLfnrnyxBFcuZ==e#`wmyTEq;E+ zKAzMy`PR{)tIetzh3pBVyDy((E~%${1FtXC)pxX3*95^8nv;+p)Vbz&I2ydp)r*Yk z!cQ8rVjjWU76z((*Tss|nhT!~WbjY=$}66w3$_U_G7)XvGUiy`ymk^fDR+Tz0;3iA zIewhBOvxP>Hsy`-DsMD@-Dij7MpL+##m5_f<)yVg^AwM2l{<|w&-xDkY_m4KKiJpE zF}`ZPwlp{5q~mI7_C6pV|57J4gaZ;2cs0AY-0B-9B6czQ3c6EQY}Q;hhnt{c#BtuS z1R;~y@pbUqJ6$v4IxbUx6vL;5ACPgf#WUMxmC<@=FV3t-`@zG?sT{Rgz&{PM=Rwr3 z%GR^#;ZnZ#SGaPumfvi*5t4*fr_WkEU+yotT_@Z!`;aI{QKVY zyS8c^WN%J5nT<*_Z_YlLFl)T&>#uU2CL|L&Gij>Qb~tJ%ckMV|XS>{K2(XyqSC#)p zcd6Wd4j4@Eo`fc6^3?ebOSQSKCF~wVW9!f|gFp3+gg*7!i4B2YB$Zu<9$|f#V%jeHakX@cQbh8m&t=Bq5wyy`FF=a_VkQ!FBAVX!75LLTzyZ5QUvNdXPMdex+) zXO7i(mip%+sBT}xQHp<*BCoewVS}X?1{jHAi;=Tgsiel6PW4b}43p}0IM;YR30-2+ zFSCuORR7>j1P|>_+-H%D%Gq<4v-R+Q1@(9(#s|!n3a;@LPO1tDKj|CjYfh>{9K@|^ zOK$YXGNrk(Xzzf|rKfF%)=MNe=DQ0F&Q*<~<8!VR(y=5>DyjS z^Vu~YIa37Q*GbFDepYPQgex~RhgPaFYt<%--1ah!9o0iSRQt729?7Vi-T-rP=)tc7 zvZ~VHCVYt{fyRge@(`I=%BQXp>6vJf8ik%?SR*uuS~e zaX>ul=*%VkiCSo9ht#vhpt5Sr&dQoaGo^~3!g8$e?Gp^AIr3dZUy>Q7nzO9?)T?+@jLn%W8nS zh6T;>7CUW>4mh&$LO{^z_T?=KO?P}oPlHOFl|^>&X7BD2;8!O^R5zD>O(Dm>y(RM6 zT|G~Gn45-x`z*ha%Iz2oIej@f{qI5qNkafViRn@eqUUL;xDiT~31`oD zCJ=V|R*P0;K?P^eZSQAvL075k3pq)s+osn^a+5eEyTooJeJI(D?<+JVd?-aP996x} z3#NwzMMYSYY9XWV5a-pAFu1`p)bDxmGjM^J9|~)Mm0~P7xBfe(%Wv=;(<;bVgs?Rr zRw-beQLxP_O*<^C+WqGJWl4Y!QQ_?5m?Sn>{~QUU0!2xuy7{Uaif&RRoeu9mqad@i zicPz}QFR}@&Op|b7o5uvd+u6d3)TbBkfaG^+#L;K9x)Cs{s~kr072x==Y#GOeiUJB zOV?a{She?F3m1G6{~48c?}o71ehFst=u4}OzALY1Ak&Vd45NU?vpylOy~@w2E)(5p zw?lo+2XPr>UaiFpT(5s0YsbvYK>Zqx3XG8En9&V!Rmc_j{tSLK@`=}AbMUSfJG=VGkWD+b==%+NoDa()%=*DZ8-@M+oO;#*;|oTfWuB0H zaV8S&2HpI=s!gL@8bi?gsxq~qJaOpKN%m7~m9sNwta&cK%d)){%<2BBZ-as6t19r- zW$AU*O(`ufwm#qOw>LOd0HlW6I)UBBXMLY}>J|NNzMeI{US`zxg;#eJs7t8nK(9@+ z-T%%2MUAx90Fw_qz2{lNd7+`;Ty^x!`c|CjGF_toSx?@(7CvJmXOjbEY1OQf>k`GN zoVZ($!CM(`d;@w6MQy$$px8waD(~{mK1s|ld6~a&aQ-bqSFn23qzO6zhTuHn<=wtb z#%AGHCSy@Xc#RJ#-k2fA#LZzpkH(NGr#hGMlwm$+;L`wKM!Vj2CO1cawl0>l_Kk*i zGaK8>5v=2DqEm}7Vo$!dYrN5e>N~IFes|Uf4%b>|G;-YZqSaj0j%X8XH!IrgG3ARo z@1!_Pm~rs%+@8UIS;_1W58gW=#8!$~mIVUuQj6l_bE<1k z5W^D*d6u(p*vAJAlLWcWIY^VyQIzse)uI`?@2{zw!9Go-@UK6qI>ti;&Qs4*k?qUD zA2Y7ox4huf!6cX17W!`&$}(ld2*^0S1+~JIUv8za+6zv4DIZ-69u?`x+%K#?{kAsF zp=;HEmZC0ubFB0{Bd~{YNe!*Ijp_bZ`TBfa``I^>+F;?RH({#GMuv+M4|5q6kY})! z<1aORzMUJtWdwDQz|Cfk8b=)`4ZI3YvJlbTv(?DHzMZv$PjR`e0R_npU|8OOAIBP* z6-NStT7$dtUDN>inzInvvyg|=2Ok;K0zZ^@qgo*rk*zZ#)r z&K3M5=VDY7-n5_{`JClaxStS!v5f9$on-f?n0y(K?O?$FF*4x*MoSN?54@W$wZI-Ni-wGxZh;rmGS>*l@g`Z zQvI5IA)8?odl7Mt{gK+i&SXjaDT-UBbA0>;E$`U4yb)yNL+Pe4CyIKO);H%*P@mVc zy8QcsWyqQu$1UuOP-!YqBMYfR5RFG^XVPJ8_SPE@Ihjn==och=;Z%SIThiwh^^`NT zT#HvfRiD&ElL1Q}X?0J%YldMs3N{iNEn72v8Vo5Yo@xpubdYO~4b{D>>uHJwxY@ed zFRMJ#F6YQk@AI1x4G7AnOu0AQJOrLm-`y>`=N9ZlzJ4rDoONP*jd}T6-p$-{DyM^H z+~$n1s;qp5DR%ec+kl4v09Na5ubpRkfK$z5S~wLqWK^1x*_`~IL~yNe(YkH4&cvm za)i$!?7d3-697p&YkE+t>pq(Q-8$grX3N%>%fIMxZ*Uy4Pab4Zomym~BCU4$REtD^ zV1w@dK|j`E0e(h~|2rn`dX-z#?Bg5rTbBE??Eu-lqF){$SV4b zWdtK25Hqo!z;7_RO7DZh?0aVm4n~};)%dOem;ZvEh6ke;@;e9LvRXzQtD98|sdjBa6 zO%#gR>m`a=y{fvdPTQD(QlL^`|NA~r8y&iOTeUc-HjiOxv?1 z$Gy99!kIw7Murm`tzVD(o_J~-4aezj`n5eoGPZpV+PCc#FZ}WQEo;QD3%yp6ZNJ)s z3C+b?;cwV_=ZmH%0UuJINe@tc5!LZ~#%)4&K5M6Uyr~iHa4qC3W6GK&&BVuozi6`e z-GAMu#K5G(K{Py-8_gwzGcYZfs`|i!Mf1E>B!Fk9zBq>iLX3QFJ-LS6dhzq=%e)1zr`CS87TlfMnY!ZIJNW#yf2-6Bg$zn z{YfHfMJQCHk$O@%VuIymNW*}Axv2OIYO`j>m2oz;b`=W4xi*}K5p`+fju4h{&xzk6 zz?w$=vJzSI{=KuwU0Z@}_C4$Jdyu^|U9#a*W@k%SdIXBBWaZa4qRRSu*)$oUlif#{Bwt^Op?j!^nz3vMX=0gFI)m$3) z<5fLg(J$$%mE3s1@|ZElA)QN0)LB_sJ(ozewP_G3nU3)19kB;#9j2feTi&;2Sm12z zKGm13w<&d}l2$*uETWzIg1;x;h>_}g^i|wfKZnoUgG9~5Wi8*wn|#rKAwnYCe*%qj z^XR5n@6EK0SIA#Z45{s0kR@`?9eL;I`F4LZGzWr9HP*^sjmR^D);gNXl@xHbo;ogQ zPwd}f?tEt!#sM0(29KV$rOgN;1Su~ai+|*uIvPGF$4kzCbx>6gOWA76blF%oZdb8G zVKp98FxG!9)!jd+fODGiRNxBJLA3xb@z2&%>sknKGEM5-@i0%ebvun6hdDDnwZowu zNfoD$OL5Mn?5+z=_f{GG&|1bDXkW+Olbp`}o+t6L#-lVy*1~`5H}pEep~%N2{P^4# z@$kyWBMbxn)+E>RwAj;c5BkxK9hq36buunKaQ<+6dppXPM_vAIQCUgF=eOKTFD#IB z{RvjYbCxS3mL~4RwPjF}#q9_B$lOBT5_>lK$PJ%BQ58SMJz1v3P&Kal1ZEQqKFmi! z%tzB7RbS0Hm#7($&DME=0FU?)-;#|^q9T3N&@CiVKEj_Y+(=VNXLo0Flxg^Tmm*52 z`I2`fPdNvw2!txy{f=*cysS#^Ka<|E6KPNnM0=k307FS8(*UgUf~S(R^j9I4qQT%_ zyGq(reFLB*^Ismgc<#S3nN_hYWy4CrL&k-xLhU~QZZ%z-tZDRj7oStQ7&ycUr7yML zU)7#<1^GSy`|ftD+K5T{i+1b*8%f>+jMowmah_{SV5(KL3ZSX-@TT!g$~bUG6+FOT zQ_V#AsouS;qC{@oWBcQ9t_Ih=OZp3+q3Z$UkKuG;HiDjT;2`Z-xFchV)I%J#PD%_E zVrR(ALB|;%e0t8xnUC*cB98$bF%!{_^KS>7V10{0>7nSS( z2=M?TTq+Z(x&PKxEkMrz2`g1!c&hX1?+NOn2lvgQ8Y=1Ge-GL8JBNo!I3p;(CJen} zPYQBCsTTD_Zc@|}H~ABXa{e3!tq~rc9_i#G9v7~dh>&h*|0g@ct4`YL&nsT~e>(E~ z4NVYZ#LoI8SoY`WXqPW}Pr`qq;XHl+RR6&fe$9V6qh?fV)?z+EOyjmSdq^@@4gZt) z^?PizCv@zq95oStyM5S)f!d?u?P?uZtw1LN;P03o{a51G?*~!WMO+?KI{v-xeHAurMQH{~ZmdthoBk@lbMKK*ty3`JyNqDG0>qmZA1LNuGnQC= z^g^0s_}jm`I)7r=JJ`Rm{q?@qVe#irh8f?#U8{6`82qVE+iqH~87#SPLVRGM=B3Ed zD(n}=z`wjw51?M_ka;(valZLNBh`XAlU{8Ys;RVm#PTT^&GpO~{f|ab1~~TU2@W0B z$+ne^*nLwQ*9x`T+XHbLCwZ)a_bqj|;|UWRbamR83zAtk4SIUgWFLj=*^Fk^NQ@%8 z>hI>4GmWXjskm{5hgGg{KeGPU-k$=`Rs3#$IezAF>yJ8dxtJ~@r>b=p!rJ?T+X!w54OQXz8?L=0H zaNaQxA+3on>B~%w!hZM_a||`C8#IqoAGZ%gV`O;~Fw7Ijyl$FZXI1yt7y{9HI6M2tzb~ii=Kj?P z{j>KEY^3>vwf`w77G<$E&lx>zO2U(@^NvVVRmNuj(?et}Wv z$3GkWpEH5Zk5Ta=^12iFpD+Apo6p~1Kg5h-bWYKe|KG;{-P8LL6%Pas(CsAub>^ST zqpar@%1)DiIhZp1*Mt9&;6(^(XxAGFN-_S~#6KrPJ?H<+PUukpc<&6@-47PNq4qM# z*2q)Pt2XZsK)qj5^19yvecSG6H(TfMD$bmY-baWVv|+`PIClOPW3PPg3qynIUqyPc zTCPAN!s=e6?xn4ma6E9JaPKUnuITKk3ZL><3~*++p`n~Rr6@NPiW;d;8>w{z%*+al z#WB!};sMV0J58|eq-B9D#nX1-!+x0E0|U7Mry`Y$0DtlC^UfY4&LgoxaR0;xaa zq4{?ij-Pgvi;Ehpau>Uc+6q2ycIxN(=3Nnw);rnh=^GS5KmHeTP%4j$3q;Loj7&^s zs2!VTs?4zPX~YaQ{Chx);XeA6H8tvJ{a8kaDG?!YpzUlC@Fxte>VucHmM)YR$8pDX z=g5wuaUX+2h2`n0^9_zCsdMQ1vpibZ<@f%~y(bRAzxzH;*gyTg9B@f(L=gv>B?4&4 zocP09JqVoe{V!R5Aq?~_Ktdc8X-6SD%8yh}vI@&D5J>F}kJDEpSc38~>U9 z9~)6n=-g;VPhdC5e-uH%XD?xBXei%AeC;OuRi_()1zX+tVpnL;qq=qVfsp^H8$IDZ zh3@Dq1g{y*z5eVF#9FYlif*EHR`DH`_RQ`Ld~oA-_d0pPQ>txQLsx}rF-oR}aV-oF zi_kFd>(7zAWwk*svqxfg%K}xm5e5|&S>k*>MKyTu zz_CYZA8(Wv;`TGQ9AUc8wEO1cT1|3Klgj^k%Xg#B4R1QsK0DZCazKp-%mFO+4yB{_ zMPK#W8K^=#njH~&bOJ*6u`-jfP1mC zjD-jpJ8`Y{jphZ*1L#$};;7H>7j6%VY!Q9@mMcZZFwv3sTlu^%_|A;n9xzAM)zv@_ zoi!^hhCq`=gKO7S`w-{53ycfhBf$I92W<|`?#!57yul|2;|q-e>yjrF(*{%$1Ag_U zucVfb+KwPQ;=Ey?wEvAcaiM|z7%|7Qkar>Y%qFN!16T=be>DG&wn|> z2Jq-k&wWB$L|ko)+>l&PsZ+uG!xOQ_SP;NyctNMSCwl2tnEz$%siu%J^7!Z-;-#5d zu@-ZhAE-ORPWPz`FB)D--T17DcrBi31>mLFS@W=%=HiHxFHLwdx720vVr#;ZrL;-& z*UaP~g%zp*(TDwO!HGADhOU&3hE3mu*zLBKzW|49>BJbRvH6MI{!GJ4>Jpp0zelpr zjlH)SG)vo?hq;`B6p_`8@WYWqA?((%>T$&xLsAJ3}HxUG3^xTi;^%@Lbb z*%sqxTR46Ffq0k|5Z5W14HL7Fz3u=jN=Hkfuv^|Ma9Q|GeVY)Sn z>)Gse@KmM=dF#|*w%vk0(&n~AF*7`5`?_mwLOpE~Rqn2R=F5>Hq}Dbxq2tKJv@+AQ zK&yB5dZ|4nG@$(RYKQ~$=nSbzXxN_JERWO~etvIVbF>>0@N>9VWmc^He*T5%xz`}+ zwPl2HCWoC0YlCAi?d7n;XRVS5&P~;>;UT-|bP(x7;*#uT-1U*_cc=yCl&2m(RcE2u`q36la`2|M(uos9vT(JzpZ1p$?mCk zEWFA78fAh@vJOY@p!J23i(HOtz4R zoe)MXF+{6z_89-5cpjITLVa|ZD2*3QrfF!Z7iVbq%gS23o`jjnZSfuOVJ@3+wiS0c zQAV{2*{uGFP(OR%6QIS)WV`(20v_YVJvU*&8=HPlw;}F>y!EMly+_|xs?<-_PTenU z;NO?qd!17YMCfT(It*^Pmx5uP$ zr^m80CLxt8X>S!&O{2Ds#ZpJ?&lhv&KRUAuj5vdsgKOiS54{`ozfJ!AElzU%rp#q^ z)j;Qx-_;sQ$VS=nxc-dKdZCI?;~=+lwrnMX0YHk?HfuCQ9fik3@z7<+uCo=R_ndxU z0jnoKwb+gFZiX{CEh*^4eEedie*vk0dY{`9KZrU*3zmK==rXJGc7X@)U=7dWdm_&J z+l+!YTg10*Qs~0`8Z5LKP9iUV9DfG%tWNa2xM@Cr0{m`+&I7h|d>Kf?Bzz5@jX2 zR{2~4}^BNH+Jp5!O)jAc7gu%+Cz>6}`;_kC^OG1e_0|0onDb}2$vZuBW9aUk1U zO}VW4_>0mnGtOrx(#8aUGCJR@d>2#t+32+RFL5pXp;0YCc^!MY)5ZGN94~z7$rYD; zD2%S7Mud$*e9ztrm#nDrb8{nXL|Kc(n#d{!jm6RNvzgR+l|v|TZluBap)KyXtjh*@ zD2&D0EM#x+Q%Et2uQ4o;n}0TvOtZ?yl8qKgzPEI{@*yLC#&@U{gQk5+u>iZc6@v>Q zaVN9MK2v+mqY}Ddc{<1kb zzbUfxqOaB3O7Oys%m|ZW5=G5bY^T@3AR0=6IxHp*YF+CcZO2ecHz4uF%vZrni%@_| zcBGSx)CO7Ow{t#Xga4esitGoMT>N%rWQqo#%CpW1pxZFG=(jI{fyNQWszu z?4h3plRHux87=iWiiv{s)*5BZb@-Xn5{33Kflz)A>ylgsJJsvWPct&>0`vwfT%N|p z>f|fCJG1qEUq0+&s+_)g!l{r-Zj6%9?M&eo)p)0gy{Wc;}}?{`mIxUWz{G}NA?zsuf9Ek zJUVr|p^@0%XiI7&r$4~l$<2>V8hK1!Hu(=;O;FQ{K5N&QQ}4r^s%Mle77-VvuxO#& z+sFKB)w9-Xc4He)1sRFRx4tirTWwZ7EF$xYOfEXc%qk_&fdp98WK(GkfskTrT3@?T#a;1T@pK9& zcYW`DVL^RuUbez>duhos(K0OxKPFGQx4!B}`GrjD%IPMY>QJa>ZK}`k+*CzLH$d%a z{u)r(#lm&nK0ij=!3cTPI-;fd>RChw>~8fk_t$CKl)%X(3$vL?ww4<_+xKn>X@8JkSuQgHJDJ92rF2YbZ{d^3O~N@3YgnP9a^5(vr$#xLxJW zlw|Z6(hjFs&jMcS--xHYI>))u$=8FKDAwUs@@$>fsSglf2XXu2ohDAyi5iYOj5|B| z1iF`%i3}b6P`xQt)HO4eF7iBtH)z)WcvS9bPbjGo(IW0axPYqGVk@ziBWUG(IrwP{ z?iijWq1u?GqrD%NrMlX}?#qq)8=igx<8}Ar*EcA>PFK(Y$vre`p#h)Fy$J`uE+2Zo zI=JNNyEr#CWnIVe2edoBvg%|MbJ0K)qtytKTP;WbrS_#Nl#&J)K`C z%RS9$p_*TTAgqUeO;RT-y|b|14Fu~oZh<^0_`YIwd~5DxUiwQpN}D7mN7`TD4|w%v zE|U5!B?dC)RKXSK9;s@ZXHKf{R8r>WP)L_887BRAEq*t=B+h4{8%NXdmbY$9F=rrj za#s!a72W~9lN^l^4gm!$^5NMtSbz|H-o!E{5{F7gN=`y#SDW}Y^z_c}QDtX;is1`m z6M7n=5-2C);GLXe)RP}`&G?@`m#l=6U|A@O8_idksy7J>28f5SE=TT9j35I~4e=mr zdBkAKZCTRN=bQ3W^p1p56vI04I`o;@g0T)7XrdUEdesJiAWrzEPc11Yvtkxc~H z?aj{%Tp2dOuXd2K1nuTO?$j@Kgg0>@)meNu*9}+~q5ibWu|`jrl4Ua0_~O)TCd@5n zs#<2-nBK>3^VE6k8A~LRz4<~Xc>b%!J4A%xmc)5`nA@luupyVO?2EX#la0OZL#+0{ z3LCJz4wB=Gs^P1!L+MvdjjNT!&jJ3oA^^Qc8z$%GKb$%i$i7M|&6*8Ioj(i9c-OW% zOs@5Pa!;#wn1DJX@OX{K!G4OSsSZWa>;}f3*>NU7nlyu}(#SV$^O3AVTPvA*7==$l zesqiCpwLF?8K7X#YUEd9;G)F)08}qO;D>XJM=m1EIK*lT>E`Kk6j-Gl6w$Tf%_I$X zDtGxL@rgCNG2Z+bvLkh-l8%$y{xA?}^Nj10$3jgZq1lZvaTYnLo$`mkV~b`38TY`B zoB;wv|5OCo(VpGW?~!ctL=9BH>Wh%&i@Y1R^(MvM^EM+_e3ho$%n>}i*vQG6T`2^=2P zxcQaO%{FH)itj`8;CuO-|G+ z$^F*J^vaWF4lFmVZiGX+l?Wud>*PQXU;Nvh_UF>iUTLjD*|Li%=MjDnEu6@uYL368 z)&8tACc)!&O{_R@0r8lOr(k}m@;r7`WgpR1-{#|OZ4=_6#ze&)}~1PE-O9nSs66j z2%TU4J*)B|iS5-l=**vL_K!pXlUmwriKG1Vs|xzZ(ibMW zx1ADF>WP9VhISK8*Mh;fRUb9{?`YNX25NRTcOghiItn7ejcCvklOL!+q-=@f8*Ydv zO42+|Y0mYOw^|sLJyJ|9DNN7WbKL05L!uS%=bK3Jt5b-w&vWHEQRFbqF++JDU+_YY z+c!5$O>`UY-F=#0Zm@>nFKAv-%m`h3^yszyJHW&dWII#c-a>UGY&bF8K8~(HCEvbo zPAlA2Aimvaii9GB1_I-p4gM6Gw|*LzYzHqAX}>9xT13?4jiL5y$y7G6cE{*{Uh> z4c8v(t!u|>9cq*0qK>1?j_qyhKN8L`E8vL!+-WD<`V#u}5oV9DZ$4S{fnb zDq}ThBG+WmdC%HUx=E+JH0B=S(K&sp7R%l5IJ&L9Hn=0<2VYwe^guaACtmU0ii4}G zL5rb60S+d@bPU>T1&_ukRhy0|R&}!jcj`l%ds@F%yKkSI7B7E6492$sE&GlOt}hEU zE`0!zt~Q1;mV9z6-SwlB>!toM%D)Qi+7xu&z5YdE=g9tYE3r^LlB)JDX4hdqs>0VG zD$1J2?%EdS*1r*+HGGhHihd77#ZFUQ7;7&}XS^Od!a;Y35C63Vi$QXdVG@-TuPg{O*YKFXsaZ|PqQhsex^o21mbh8$4U zx<2!(LwVvBYOdZ@v8umR@l{01M0u z)z7oAp}Y7!d04Er z!jG$I%oZlS{8-)xqH_uD3rF4_+)#ce~kH`kRBQ7B`9!5jCl=LX&zxX*bD^ zu)OpRX&v~S48ARE?8GF+W$Qd)dl08=C$wb?Z{D#I6EgjG8Y1YW!O?06oY_1&b3>!9 zg{tmf1@U=5wzKo?W0kDyW90>~AyPv)g8%2aCUs`gifZW&pzxKKZ(L~NJ@uc;&9{j>NpqLdh$)526I z)SB2JfU`%|DHKxthq}-Vp25a5u^X_|=F)O5*(skLAHgh2GWZ%|d(` zLtRQACVB_~;16S2_Ui9nze4r^zzLtLWtw~0li!*l%fx1xh9Ig|wLkIVf%F4qAeszN z21a+;X2kxo&R=RmDjXn61iE*>eIN`d#{&%(YgjR%|9i?`VuA-Tph?g`;-mc0SMQYx zEkocqv^yacP508mpKk&HiLxBdfeZXMj=#Qe&w>Xen@Y~1(_eDTFCFDqlPNqOdG-8n zxd8VnA|#C}yI`VD+UD(i3;IA-3Yym4W58+AXZ42g5cQ50_s}b>p0zt{ndU#Dvj52A z4Q%-1%VVcj!`i@qw;E86`Q6*!|D!YBOEr(6fdaW+J;G4EU+*LG=c`gEpzES7Vy1;2 z^hl^k>X=IzSGx7V>-&(!<~@qsiu)0)SMjrNuBo*|kQ3jKPo&Y^_dztRRu5j#a&!d$ zu6uxGr^|bdJxz$X@Mb^&Ja!m>WK6h8!{sD6QlQy7PSc-hFC&H3$Bpkz){Z{e`&9Ix z$E05Z4HH~mX}drBpN7+j0cw=qoOyNhzptGy{HNCG9}4?NiTyQv0KKS`=H~_4woI!$ zxIjTPK)HR%gD?K?5!YG{97kp1=A-e0A@>Xrp>@O09sU{QzvZib;5dfw&l3LM1^mxF zVBBv+r+Pn#`QANAM?L|%e+`g zHZ~psIkl7_x+E8dLL}kh+9Z-t>rY18Wd^pm2;9>8t|y-5gFwBM`AOKq8`EmW+l0V$@Je7 zgK}vrkeTwOSe^6b%a^$rEM~KQf`OPHfSr2{rbf6tbL0w~@3-}UzhDB4MW z2&_m%?dhra9f==20HAI__f6vS%#WGNo53O`^LGfzpbT2fmJ6i%*L(aW+Nk@5oAA^# zcPt|eOxy_U-scxB=T1<>c0$W}pq+p;LBWPkCd2hotb~BRG~^$F9?5Xq>kpUz)DLqm zp7ILCls)J~jAf~P=V%eYo72&@^avFxZYYM7KMdvyvd~P-RLdS88c)!F{a~!0VZFON z1x$%-U=Y&%4fD!sRfN?2_~4)oNWgiDf#D6fj-UJc`(txCy!!n4bFKh{7xXsZbXilB z+^PkTM`iv3a&W%y@;Hiyv?Hkt_f2KrerE<&?s(0&3|_-#rjHB#LXNUR%|%@&jLQRh z0!g7=Lkez-DLU&XFya0Bg#uA9Q49zD@SA-#KW3g-`wX@Fkm4~ocS-AAJrK)GABbg} zD}mPD{JNG#M=F1&4Fn7k35iZ%N8F;yTfmX3k=3IXK zkubTBMe5W~c}`y};PubnWOsP>+)XSL8ez2jorXQ$z^OGkVgMW43IR97K?k~2Zs`8$ zsba}Ot>xP$Gf8SjifxWv>!7ukV&0)g3lXj8E}<)j^>LIfyuUO8uN$0cO` zt*-=ULmOf*b{eo5D@tm4LFVVyR^<$9fvg9I*B|^^(CK%(T?SOy(cAS=_*8sOU4X9W zQA$S^S~*uDuQI}MImZR6<{^b&rB_xHTyA3*zi22nX-KgqaGK|IFEZa1P|>b0-&y>u zGj+YmLYgZ7!ohBi_gw!-a9pfs1;CYfLU%ry8!xBWR943FUzs=?QV$h!`&w@e7xec< z)mGH(_xou+bz|Vi`Q~_Dv2qUD_}J4bOLssl65ImL9pkSiy|`4@Ybd)ZSvcpxX4e07 zsL%kSZ5mg#+LwE6qN%B`+3}-4mnInm{tt0*3tTW>NVcrA9B#oRqpxXcq9vtkNpXaG ze@kHHDi-0|ZBM0T5iWrHmkO=I-`N4%@ugQSsA-75Z+y?y4fZ0Tp8u3&x~p1}m%)*- z*HJ=v=5nd1X)D(oMo7?fe~Tij#LF*G|9u+l_ zD(h3_=xp`J=3}na^^OT@>{tDuKeQT=4#P>?O!z+9(Otxxifyv#CU&refZ)#&_>)C`I zjEaX5Sgx)6#*%*9pT^8mBlP~2Mfc-J?Xg^$T8kymbcT-u*tPYz3#FCm^Z75oAk-bN zyTr0q`*OI0*DI^3g4s)K+Of7yshcA6w-EO*f2Nhp)K?aITZgkM&`0e7#$4Ml?`Df~ zdqqd&it-8z@-^hpsRC1+c8G;zMwc}9zF9z~1w$i!Z7A#`1(|10WIOg4I6qws4mA9Efh7FT~r2DTP{&Tb_z%>(RnG?0=qbCsbmf~cWZi;F<+q&iltrjf%u?$xwJm~2{a z!;E}Va6x?q7!faG2*a}uoSO;DOVRK(OqH=}6F&T!G#i&rEfaeX>Uhx)XESf)5T zhw-^QGJ6r~rf*>o&1@5qCnf>8!1YtgWWj-*DDjraG;rhs{a$_gczI&s^URJcVkTxe zM1?`8WLX6YhC^g$pjZfZ#Fg%&P3q*}ftDV9!XtXRwWM%IYUaC-6(HK2ALo~=SUpeU zyoU)I+`Vj7w zwY6-@s;WAd$3H}~73EgLhB^8=$KTfo3y$Mo(CEBp*e*=%lsJCb5jYPst}WLCT6m%= zX{#Z2>q>WsNOif693?3(iFzi+DlKyXwzLw|mi0ul=v(q}lDeft+uI+P5FI(d@2+c=|fcSf$4Zo=D z{2r_X?**It?7A!q{kw?ox7|&^{>hdIN7wf&1PVxU$wXs*Ac`N4Sz}QMiW8n=&s$tBt>+atkQkdnejIj z%4$?=A)pqYAme547iThwU2wa)4!SwFc-s$ZAc6WmBNUv|$bIKI$BeV?ae z7ODdso}}>6@Fag1!Z`dOYsDw9c)OLi$gGAWYL zdx#}YdXmc?TRn<`!Wy^8!)04UVml3)>~ZO;&Y@1lxN;I_{Y<0COQ-XYeq|<$CN$lA zR-?jm=7Y`)+L+hXtHs&#qFw{VMtOnT+}Ae^J0enpF9uV8pt@4MdFQx_x0>;CJUs4w z=&h@Ggk5|Sb!Jd4#E(k)lIR5Yz} z3n*#lR3^L9zJ^a(vf0ObHW0gCTdWZ*UvL^%8cDx16gB*k@ybn2DoAyoX-H^Rnsx72 zTGH%fePA7c2T@6Y!kYrDa6?18 zodc-Xi}Mq#PMn|dfKHf>9rCCP?*>CTM4U2rk3GV)&NPDlv7n-AZY1Z?_bH?VnjOClqx#%|p-hF(U>-Y9k) z@naohhJ}ohjiv##E!Ixys?dcEFOMYa9O+k_iP|P%;?8n)C83uA5AuqLY{#!wMO#jX z{VA2ozibud2n=V0))Kt;ldAW|Az4pzD&W~HpHn&QY8pU9jGsWj*W(Je<`qWd4e?jE15YK=An9GY{D9wLE?i)D%}}EQ%#= z(Hp9+C=pt;$gdVO+fSYgM0coQxI`@#TvA1lBvUt7kShKF71B5oAM%pq?P1GJSN~$H z*63xiqDgB^p+E6|n%6JD38uQ#lAtUNHb%XW8Ma!o4_@dOFClO zi1di?xM+B#r-a|IQK9BBhK1(E>d2G5Y~%_jk!FJO)u|}0c{dv-ysCTMS(aoWXT4^h zh|eCSt}vf@C!3;lN0YL$F|jk?95BH5!Ab*)Dq%$xW@V2)$y&tSMUQ76UOBg|c?f^Y z3KGg4QXW$OKM0Nq%K~ed%MPySbVRZ5L&11QCB9nFTfUF&g~#R-6R_hVWQ*{JNFeE_ zfz#x)YL9Kmo9W@S_8Q-crcyfLuUWh&WRU2P&VbseL~0|o+OBtJKaDv!P6qcE-bzEs0T!9o*dG7phy12 z!N!FxirV2afml5jme&rTPN7M$Soap^U#9LgcyQ51$U4h$XK$3gg~NfRZs3Pp{HXr) z_StGU6QHtD)>>Yya3GFAe7B1(^*zD*c?c~VNJ)(l1D0n0FXMq2RPlVh+=42Vjze)38I_&nQjUD^D>Uy%L;pim>AhhFV$DuBq!?>*m=_%|O* zg#!3k^dk=X-;VeDXRmatJT&0p-b~VIdce)V?f`j2<6D2|?T*a{5UyO4bs_8hBkT9E zJkWloR6uStzWn#+5ONp*Xv?<`Im!M9$Nr3sG$g=w#U3TaJT$cD1D{Kr43*9QZRBtN zxF{b$^59dCA|8A$t^lCLym|JCg!}jO-?8F@hXdEsN2Ar!{k>>+c=$qa-}M*8DAVO`+VMdj!T_-PwO^q3gUBma z0}<|TzBA!|5J@kf3voL-Ki{|W9<8eWM5_yK)ye<9es_K#kVwngA0C`Gnid!$<$JuU z_wVX|`la>1$N_&yDJIZ?(`P9~zw>bZ9!)C@Xs1`La_ocC>H!%MmD^Xt6A$9>N(5*p zzZ%VdA-BK?R5`uT-+B-SY#^0ql#mYd!MFjo%>p!mtJ{i`2XR3BNp$~@hQAPrYVe=v z10qL`WYXt!!9a_HWkEJh&WsQ3J`OuolZ(5%8lNhnXVY}nOFAdFP_%~%=Jt*5CZH8p zE5@B(87{#D?S6$s=sKe*eQy6uU28r7=iF*Vy9k|qnzOA%?a?=uZFd=N4Ua$_oT3}& z0|Thr#441D(+PtEq1-m|+2cdSu?+=)!?8p z#H5`xDOUayvhlGQrqSJ{v3WT*>%~IZX4#tngP9uU!os34y#S}5%q{T9@lPBPFs7Pa zI63ThgSNL#s#>!p5=wP@(VlbKA>Q8JZti54M_AG$*x9j&5?-Vd9{x=~=%xm0hoPh~ zC#R@a{cC^Illm7Ox=v+I%fa5BA>j_+c6&vzh@6>t3u7_f8uAM~C z*Hq7p@~&@ZfgFR?$wJL+U{!TX=`YyOMJn_7{>_mOq5`X@x3_I1?QIk*pY7&Y8<1FS zu{+D&cz1hM3pyerlZgL>L9G~@`L=t+==^4pJ+aXO$+qD9MGEBPJ%XF6&Dvqn# zr31LB>2K}*MTwWIJo|!rclibiCA3C}aIKo_q3Q2}$;Ynh4h^7;P6oHQonGsYaf_Wc znPigxIGz6yO;ZQ#YeGk|>HONR`10}HbIuZ0zea#Sl=JLvu;lkYrmi=-HJWXQuME0Kxr zfbNfr)8>il6pS!K;W*$RDT8ONRB^q#b-A~6R!&770;wWp=GolekSMzy}?SY@zs;L?Relqd z#*x+9*R;>NBY-2bGl$4o_$3!rLp$w=f;z9xd-m#^TEnh~89(h?~tp@PPzt?!}y z;@(fkHrpVmAm5={#TFXH)u(1^ti~VNN^obGpKResZZN!UO(Rh?LDe)u0uEU|It_N zg%JATs9*-{E^E6uZF{$vGKi%>PZSzC{LT#f1`inSx-wKdMsAg5lF~bONC2Ee*3l-@onN??3sBppApz{UJxxJ&(4@>8I5Yox%S<75V)*dq7*#yX(jFYt> z4JuGh*j6znlD}a4LXE{X)iHszcve!fwamKCqtZFj^~#L+whqmQpqjROW>`wl;v&E( z8bqy6)m{iNSgEP$Dax7dGdH}S*z`hsq3{dBn4W|SX{~7(+DKL0sM?&D!$~W1mJ-9I z1tJwMM*8am*HagP-MWAgB|OTK;bP7bDDdNnQ=grdrl?@yiSFow8Oy^fCWGTQ+)b~s zToJmnOk)maFL^9Zhn$A?{xVa=2J)M2`t>h&IU1YggI&yR_EIHXaVbM-OMZ^I(>b_> z+l3allVS!;S>E%;@qJwzfs+Cei8evGthiaCF>j2laQpn8!g?*d`1F0widQ}o9`rm> zaJUTHCCtt8CpbNyL;DF%=f5@8UG(#WOacjyzEs2a@0)xckXFiDoO_KK^9#8$B&It6H-!AU_(D`psZw#`zxjB}{g#%FVNJK_|NftFgZz0hXS*mZ)$?aB5D(;}fGn!;C` zo^|t-jnQY8lV%^*oC?5%qe5I)u}N{PmE3L_6^s>vt#Jt^8H{WdBOgEobWUS?xQ)5j%u$kNmF0H02I<|79&PE9jbndd6BOqs} zRMn75DEGRPg95Yus8U(1ay8dJYCh?!7GT&OpUX1wc%zr779I(NM#K{t)~>Gi8E>$p0Jg-RD33XeG<$ud zW>#!yM06tKeb@Y;#!*(eh6+gf}D>pl{&pU3>D!+gKvn-3S4e6Wf~QTM635k)a>t7{FRtp)poZ|jD!lCL-D00DU9TKs$WmBQINHIXgY4=e zW*^QmKWYEA5&#jtbO7os0eeJ>C-mYJk{9dH& z*ODJb3vPp*!=GC#{jde|>t==2afe_C*eS+kjTh9z779W>uSuOn;Z5wQ2r92BpRVmH zrH;JXt1f*+*GvpoAD15Kp@5%aLyNQ9cFC(nxqTr~rKY`YN-AAgwEj3_bh(|VZq<+Q zix?s(rYV@Ur0X*$UxTocBoQ%|lqSlWljU6~cX%i5(AJ;o6ExDPz!IR(IqVSBQ)N6! z1(}zZw^>I77%F8dvdvHu^JGrUG*Lb*O&)ZIBd|G>p>fYljL&=$30FAq5xKeh$2Bkh zLuM*VREvEqPhR7gFIhUdTbF*deeH9{QAE@=H8u3(MlV7qyc&Y(bnC_F$9OVBb9lFF zKwf{`Aq+eCb*>)tr@h9Y4U);Iw8C=kTZz8FNd~_uN=Ks3O5FVaUu;`Xf+%aA#s5*M zYHV$Hry+LK375`4ZQ~;5>(x+eOL+=ou&;PN2~!3B&5iP{HXCvEIL4K(RZ@g902n@T zl>C|SOJ70Y5KiQVCz#swgb@LCcp&;>=tD__LnzSD$;A2M80&H1B!YY2iNnl`K}C4Y ztB2qdyMs$kXg!F-eKH)P4eu5}=EQrm65W9nZqMMgcB$UVoxI2~PY^F9si?u{b;7ve zU>Q;BSgzI!)aV#Hes=QTnANq3Q`(XkovRs}y1fhTa+8Jxef&+$KOBzWkliE}*YjT} zdd|QspHGr65huG<+7IrigQw++UUWQr&g)|cWUY>5)3dgEgoaR9bLUG&p(gvOGo4X# z$1yhKHfJsQ)r$fH%~qkR=Lg5CJW&XKNWKJ&OU$swZ$A5P6T9{LaMTX_f7sZ~G-_nj z_g6fbYi$_!>;6UepJ>3aJ;@DzPP2)pmlof3LOOp`Y}mc{-hL^OtBef%n|* zk~kBkk23ey7IKO`;UJsHlFAoslgr;Rg*gCWJ(LO)k6YLr(@3TRu>ENUc(A zn}7bHZ+Y;L;kAL}g3L(WCC__;lBPR!$H2WIXs9CS7skaiH7{;4qOL9tm>>GVuYUT$ z2eyIz$H(C3^<2WDqP%N4%VHU<@ExmnbLjLXsXb_^w5~GMb4S_D+3HME{y)I%q{{)~ zZg7La^&6Mh*eW@ExHXTIj<53r(O%=9pTqTm?PVg4d-C3{CWbK@FxwoMZx@W+tP>>= zZYk*V?fT03Bt6(e)?P*Mr@H7_kWFHL{83dVn%jrhi3y>a2izrieda@4>x zxzW|dwTldy8ilsg&<2;Rj<<2k-R=l~0Skq)*7!GO?2+{^jcLu+>&UZAZ$$>MR4jg*Qb*?%~;gRf3h-%(J1PpYhNIZrUTaL!tg+gn_Rrq8XY#I(0o z`i+b_U8aQ{n-UYB|17%%c^P&XHFiY7GaC5pK=%(x66ET4<5lxPdMDG{Bi{e101nj@ z5e{-MK%ZnUYZiT&zn-#s%g0n`oGuDnM}Slmj%a{R=btIUZw5`aOS#SdcDUMCr#}KO z5u_x(LbUD(etUV-F=FG=YbS3S{-^Fz{*B;8GT=%~T{u9w8Arq$Hy^> zRI2)F#u+s^{4N$ZmHG!m@+cR#-k+y6;dQ(wp2w#n#pE-Zm;q0&jZBE%Du~y?hc@k6 z33wRuExECr4RK~d#h0(kAjd$jVxhWGK4q1h?~vQ8QP9A<^KQRNJUJsKLXn5 zv^bz@*jygYQw5f9h#GWcLlZ*J>ED&H)t;wjCT-X&l>N=_0WGCb;UimQcipApNt(h^ zSDC0ZKwz0?gYHwb>9)qZTwd*CVrx8SEAM9MZiWQRl9fZHXA^BNZ=OwG>_@5u%J3qCa{N$obbY&qAxL1$>z}3QM z;S~QYh3?&>?vy_ZeRwx?a6E*{XaJBDvLaFbYZd#`w(OsUf1d54ZHfFF9{=BY5gxK< zo!T#LKm3jCE7_mw3bx^(GVQNUb9nZf?ivCP)A#`B2Y68$u-slS-3RM0oAw51=X(Px z%m-AO4+z;c&Vw%D^LPWWPMX&k2oEBh3iRzQ;(gEEpMx(AAf5khOZZ>9!8{lr(y1dr zepp-BUeWFVC(?} zZ&ru@hT{Gl&r|^Lo|dQlr`XZ`9voi;9vfI@+09SewAWXavQ1JW;`wt9Bh3M$(9<-cAK=pvO2369!OkR~RdX zP>P6SDwyy4-v%1U0 zBqq#@(%jx&MO-Er!=JB>wW)OV%m~!=77>Kukyl}8qo}XMB=Y@gC8^>9+;fle8&V~gr||u&%@7AMD3|y%w0329MNGCeZ`ou$#5D&2%Zi~Ng%`d z=goeHT}X7o@v<{YVj}U<7C?Z&3yY9P_rk(pa~+&?Um=jYeuPx~@lR#EXqjz7}i{oz9e&e4js=R>VDCF1Rm1*187+-s4s=&aoD59`jiht!N5 z=L%%Ez2e9(i5F}m2$Sm3D4v?8B>JzO+uO-7AWTqQ8b%$)-_!V#Jh$4f$PwDc0S6Bf z8X&#N`|*N!EwegsQ)&ZT_eHRu?N#@8-zQ!mDr>j>oV9PFOhaz_#!s%3 zZaWKZvoZGXcP^h0CX+u8;ZQMB%50r@qo87@6O}{#F$5VJiCM|MV8NG+TWEGA8T`Vk zxla?jGxXkAbisxsk(2Ryq``oWY+Oyh6nQ!VrG{(S7LD&loG<0!=PIPYu3`xG{0z-) z_9qh6Se{k$#2W^-Nl!^K!A$~7<7R=8qqrUJW+9u5ALOug0$4|4tbLQ7DMBqT2#svo zhcLpCwuG^E;PtRWb(YED5CSvX8sm93W?aK5$YI%#VSI%hC65b&Ce-A(9qZwxSJ^+_ z3ZJgHHfh)Tl4C}fKg8LtPzOAy*(|DsVjn&2G&Fauq0u6-iCCxm)BR~=*Q(}$=e3I!o!=Mc5VFwXRvDrU+Ain6l zVPfFZN*CGP@O!<7FRmyRs{O3K%^kTk z5Vt+K=s2)PTMatw0Nb;8Tj4x}f4b0)J}Tf^V6vt7l9nmN%SK?5Z!&!^d6-Lrw87*T z_`Lm7;t*mM!P6uuOjP8spHm{P#zJ}^d$JM&#>;ofu-ngIxrCYU$N2f zWFkxkykUjO??9b}Cs+*_^dv_X)#wRqCC4xh;J3%c4Fs)naHBE%53CZI!yc@Xy2FqL z`m!P~lUM$N9sKA~fv!e@{7qyM8~!`eFX6XQ2rr)$2`5Lv8e`7AWsV}ig$WX^9MyXT z-yevY1yZC|LDLPg6~4$MHHYy;3>Tdk<=P=Ym3XQc_KG={^&L^($8nLQto`?1im-*R zRG!h}xFEQ|sRlKDvXod76_CKl?pCZWq<-F^!pc7QMCKcP1*tV+^S66e!ey-OZ`Ag9 zI#~4-ZN8Y1olZxnM4_g_C)w1|mmCs&nLeZMckXsXcR)L6JEqRG*F2st1S0Rlv%2j+ zykxa}juXaC1A`OU_bt|-__bUjv+;p<2cuD?hG{9X8lh$i^Q?h2+c|a}+=72=C`pI> zhXi}>7sT>#1p(FHqU~QdFzV1jpFuI9giv@WEg~KwR?v$eiXaAYZgF056mgOtHebzf zGhV6DSu8V-gl;1j~ZP5+AV4?=If=QV| zT!m~14>0Bp=c##-cr|$$npm4eon4#-oRv?xJqjQ1dAkqMD`i| zvlF^5I@$BeFv>8Ru+A`#umr-J=i5m;Nw|qGlAa~@C)&Oaf890sZIEU#Yw&7tCNVhC zuhPD9r{c15qhhj}-{`8dzq5E~uP1q6zJs^-Iy5|C$e-yG*dH8pz{HoVo4WK_ApH*4 zi{R$P6RKd|U@S@I9?poF)K~Hj^4i13!_}#+sWLi?Iubh3I(a%5jav;#jkS$|jiC*8 zP6{WiN9M-}zi@vo{!IF5vNrV-V^g4Wp^tB*r_XGVU_iZZrl-E2<=bIL#_&qVgBlD= zCgm!N;>%GAR+e5WbBYG_qwG~Higy%vS;ATCZna_6ZBM9$6uz;%nlUmmxa1 zgirJEr=EpB13&Yj_>8Cg6zypVHiSSjgyHLxuL&VSal+ECq!FZ5q|o=j?JF(ewUt=T zRJ;^Xl4O%V&faLit&VL}X;KNDB(M;%P~U~ywcTA|`^uKZwr#xJJ3f%ypFdF7=L^C~ zIEiKec~K}Z*}cl3T%m8zB+ptcuz&Y6k1{YjXiek?<_{NY6Y46Z*72IE%&G*(#^Vaq zoUgV06mh}D&sBA&aHm|S@HnY!pVwMh>Q7NmiB3W$3tU6@&lh}bFLIIEk)(5!jATLT z6EY`ZTZ#A?_|hfstlnx1FQ_+b-GCwzyhp}~5rJz0d4igHYDtb&EhHu- zK)Z6`iJSNZ<3*MLrPs6bQ}+t5Po9g%xxZd|=v{ns=kb{G07sWfspeAK;T$?1dXNcZ ztP57R&9{lSP52V|2Ki>(a^9ld6~J-8MIg#Pu|$kV7)IPhOheZ~euF7Ua!d7{K9|Or zYLc#oB8+m7VPAsV$6dUY7f&N` zBdqTcJr><@KkmZZ!rIe?6R^`qQ(mPff-dR$Sa-eIc;{#vjaXKeYg*TDoX=IE!RKCw zhf+_a`ed)7EMq+>Ff#CC&R^=%w6glV?xM?K0x__2Ivx9u1%zsTXLOuC5;+Sjs2-aW zne*W}=E>t}ck*!}+w9wj+W?as^PKKf?El0G=8Cu^+Z|Bv7jnV%;<(*W3{dL~dfW6Z z_gkM4eou0!d#Gb5eyBjG*E`&I@?T06TPE6v5z;&~EmY=osC85w@x9(a_j{Zowwp#uQp>@n<-;X<dW}831&a2d3zg};}amBHt@u4wWk!;t$ ziMoouHtM5ISEyBXR6dyaJaNZv%T8uWWv0GeB{<;&?8|lDuafhbWsmJEEVPZFaJ%5P zYM*jV6^(t3Dm@}yOl`fp=o`?%b`V}%98_PvT@c(|{&QNsG!$Ha_vS{R>Sz6~uv%k%qQ*sC%Ory)9f%^k}f={R(3ezJ9ffxL>S8l>|{RP3AB(huh!jo%4g zlJ3*Tp+}EL<;VNdPLQY2n^BF>K4|{@yar26Y$CsjMSoU|V#&Mbezx5_!0FBDX5l+w zIksGqQzA3!Q{blEKAicjqQkuXb)VQk?38n7=}gOV_P46>;w8^fF=>t&NT|}ENerKpZ1p@zKa?A zP0AfN?ohp^tJHLmL)j5)&u#YV6#RK-Nk>y;Yf-r?$@Y~Ug!Sr0;$Xj!c^Xr!XdK$H zV(<=TDCyW>J^!A{s@9I@U}JCPY}@y(hE0@er5sR`N%Cef-f_W8h0u)XB94-NZa&xL z+-s#FYbM>aQSmvMy@WFe6H#iJyB4p{#~H7QdGF0;eP=6pED@m5nAE*-_bzstLAY1d08! zE|qSkb|4$Kb3vc%NLRr4Oz|HR_e@@D=XLiG@oNAyYFO%+<5k&@PvRPSG&<|k$#ZLeuAaT(I9`MxSKpF8Lh zEo7qkBQ+kipNv41Am#eY0jeA6GCe_g5z(hg`%3+)bxMYcgi1`RPRiR#WFpxex*$0x z4M;;>N<_tQF*I{o^-dJY_j7ajPM9E3eF|<$chXgomo|cyghr62*Mi1e@j}Rg6E`-= z`-lPgjMQKqX!CS)T;n!W`WX4x`FxyK+N=6nSdhIf&^PT~4t6SlS|q9SDFt)ZydmY! zNvW4%k`WS25+^_0yW_i;Lj96hleiOVsuC>8E_}yumsPCynrre;Z_yp|dtieZw*y7y z2zl@t6HbybImvAK9XTJ4$L2qt1ery|;o%W%Cv3b>5Q^3ya$w73Eab0U8@8xqCny#h z#!vV1_09Y29?+_mDgPn_esduC>>2}KU8n4`-awOY-7nV(;S=)#70AV#OB$Kx?@VV4 z^KP++$4jD1DLXrDE0dZNs^_cb7XiW20vnRABb_`KZW1$~5WkzSyT)m>8Ezi_xIKfgKko7EQ(dBX%w3 zSR76G`LiXMJERrZ8pG&Twg89gWnVP$m01Lo#hyWhuiMu; z&%E$btMAu(umWzpT6&A#dFHLjkHMlT_Q|*w)nl{pk;aV3!exDs(@#YkOOoB}W9qd$ zy9A+`avcht%{CghrKZ=7MU_`(C41q$b>k`O^V)VxElW;jyXo(H!sVOG@}M!KSCTBA zSQj2=Df|;2S$w$HU%A=%#<;3$8me|54jz8KK~qEVH?olWoQ&unh!2C9CY8?fWh2cU zrnCsQLLKpyp`SnHDV&te?co(HMF9rBA}ntQs=GgO6s)C`tu>-ffKUuhL4dbC>SW-; zD&6xZp;BADD1C1wqG)lk@PrdGgGVKC@u|PENs8ytNAcUi_d(WHiI41Ro?buqgrkb= z>u%{PHk7F>k@|f0N&@=BTY<_EBQ5%b4|RI|mY!pSs_5OJekMmu0Z}Ir?Y@`ufBB%@|(Md#hTj(yPd; z{8Hsq2fa6&lbehhXPk&1n!CP!YZptFm=eeN zko`fwhnS{@Mw&+baD7QXzCTqhDquvGufmy?+v^S2q2g?GK3tR0zIcX=rGk69F*Br` z+k1XzP)YTkr~71XYJlLaWxFoF=Uiq0wEtUeWVrHBH8^9QW`O`&u{hCuIJP-uGm~?s zaUC+NoELDkEfhRfPA4hfZuGqUE`Yn%O>Nt$Yi7#sUFhe5OR``;L9L|gGsCkft@Z1% zm1SZ!?VFzb2M2$c9a!R3_%&gmh&M98Y7u@eg~XGIJNc1#^|d?N76tlu%sy)mp{Mj8 z%Y=PK&2TZ^k#2tN7uFT^@%J!LxO~AMtoGI{`^oDMi5!E-Yd35!IuO^8=+Iwj(dE%W zjy_zyxR=glvkrhZJN(#cyK<}c+hcW1ctzA{1Z??CGeeVgK-tR@knYLuvG0;J{PJru5ep0L`mZ;2h$KDV)7%R? zPV2m(GoxhLt&kPm+gbQlBJMzq+S4AV#}o=y`SzsRN~l?)fFhd;($dH7_px7QqzgZ&2n{0Kbk>ii?x>ui zcO#x4lmxd1O>`{fuok$>5QZ0Y>SsB=?D&Mg_bQw57p)wD4V^AyD_S5*4jl-inl6;B zZuX5mq zp5s*+y>QG?2s|u;?{ht{Dc7aiXRnAJ9Q2jeZ@&h9@TlVQ`E$C3#MKI!4w6o z1T7G1H%TmB7J54ceXjNk*u4M`5i(J0QLip0%5?F@=zkaFYIAkwZmeixyqOKWQ z*(P3xZm)U|gIh#{?@q!|XHm%rt_WDjMqcIdX{wf~{MxATqgGM%mEXKIkh5lzT^ib3Saxy?9|TPiK#KhAg>#*I^E2RTTR9NE3$a{G zPrc>pZrXt3VrktW86~)n{jfl&-xoEBm;KJ()D-h1=_GVe{m!ERahz-Aa60rhR7NH^ zfhPgtTX@fXWwco^F|viXg?Ie!q<@d)KtHM7Q!S*#wWLS=X6Ds&&CIjuvNE~y6L51W zL79-2{sYs^!o|g}l}Nvb<*22j1@6kR=G_N#KaWQ6Iy~hLoIl~x>?JcgOc}QS*~jqk zsp9bPvudfT*Ki$6qX%|;+GZF`FqpijlF!>NbKS;As(Qwv~ zljSwCvtc$gw)~3QZND448{O-KKp^b^NA-TJawXGAcy8z|yGkAgH$6*#q^53U8 zTM1BV$SIPG+Buq#b24)Tc^Lv0v5>f_zlZzW>%K}+#9%*|8bO8(Zb!tT0_jj#>CbM zm_v}~H7Eb?^Z#Gp{CCEGU8(usD>=Ac|NF{+ee?H9ewN1t{A)pfcI)>juw8Vj?Q;u=`6-+;PQ;JAQFtzaz&+ZoSY`O%9@Wtz?kH#lu6(eq|sk z#DJ=%(y@$)heodPp6;Xn>9W@$ML^W`P;-whG)AT|%F*p=jTg6#jp9?*DG+|Hn3zd~%!WuQ3I{;A*sa zH@@I=9W3@Zn6J%~2q%JdCr}~$V=fO*k)_}C`4x-G{q_;mj~(X+3nnvVy1UDkMM`&L zdXv+gu_V3Hw?U|CigP9EER7e-UU?doACgE#VHE-p(eR~YWy9j;3g1%|Y1I~+!3t9R zc9bYN*lt-7f!Z@2iLnWpeDy6f10gFdUGm#BgJkK*;ce~Hi4 z*=RSJlr?Sri$ZK)k#>A-+zsWh+nX6W|DjA z77V;!1=GRA==?BK4x9POoMAn*=#W2AD+P#z#7HT>U(r2-V!N*oZQ^2!NA*i;Y7Y{# zBDYK<)(2o_XS;kgu$Cq&O(+2iGl)j6YPi~TD4FpIh*xkzuuLxH4IxsD)7Icq-RqZU zO(`fSC~Bjn0VnGEJ|->~QwEANtEcg6Yip@io(tt3M5TKJ+Ix$Q)%DiLe|BF=9eAa> zM6SPM{3XcWt3nm90%v$>aP{z|R^efh^uyq^MTN<6gcj|EfOA8$x& z*4w5z*&M_qy}t%5etaJ~o)?8rN zy8Xq*3Hlt^^g3Qo8KRe#Tv~iljM{ajvg!PqysrB@xC*kev4A1ZCfQ4w)st{q0swfL z&e@ZSlJafM8HbDITvZ@eV$P))yJjy~Jr-~}_cO99u%`b1iZ&I!UqD8iq+?MKi zyto&JEJ;bDLFY$j7BDz$#rFnWy&{0^!Z!4Ic#_xi)SOm^_QGN;JLrVxtUD)C&{D29 z=ZaQ7H3~2ePM4>GRDWX2Hk`B5kdq_xm)~<~2?ypxMPbwO(bM-QTbn@_8>@_ZFlqI< zI2s167qfd{%`USnr;5jGt<=r%Rct&aUxzP`llk3J@UoZWS5&YzLYp8hcsK`T#RGn` zyJgMLDz4TxD@cA>?bY4IiuTRJ)izoAX0O<9?Z8$g zsMU>0qfC|o71{rTL&ymbceav48&ZiLH+6LovF3-8_GUbbK68^^PCYqvusr4$VNKBc z1eCEFuM1nNn6ybr^O-4NF=Ku5<)J0`_E<WbC?p_VcDYXOb)?_*kxf znb-6& zK4v6EOi@KMCaHoxP39Lma?#Pzj=OmYS_{j*4NpK&k?i^^SG_oT)qOduaJhqf?Rr~X z+DME)UU94ixNG^3M$+FC62aNzV(%Yag({-S}#45}rx%D+~og9}7U<~2vl-?4M`X~XUDK%Q(8+cd`ROTPAp z`(`Jno=CC;i-~-m4EDXX;vm4McVm~D-O8)e#w7L}q1(MX+~`Cu%v_=X1V8>6N}XlE``?fhB% z9|SDyi;69~RUkMc@n|E+uV63mCDW`WX_v+}mVeZ`9WIlu3$-p8fVFD*tGI0DH-616 z1cwuI@y-=Ry1BZ~A!Eo9j%7m(V*aKVI@phWsWBV;zu%Y$%V?1@ZINDUlS>BgT!eg+ z)6OWx)j|DAs+SuUciNx|@!8(g$#bgfIkSvB`kzESl)nSOYIR42VV5z0OokmoLPF#XHjPb< z)&YdUW;*!1T+&i~{9vJ8`=-hDfPOo}&o|kTw`hnel0Sw<$VCFk047t#s-$xi6ckl1 z5Vgo8Edf;btI=|Op{un}mQ(?vOI@cS&hau`zA>+>ixofc-TAC+YxiIhyALXj%ZJSF zaALlTG6JimrdkfmNvZxghV8IJ>(R^rbW;A=BDBAcY)m7}nv{fn^V_x@b;#>K_HkNG zOHJg76xovkzi z_qi0S6n!xBYJpgn0$z{+1O`)vs7a6oOq0in=~!7Ufv$7(6g?q6{+0uY;GeQ-wFHI< z)b{l~>7Tc%^K7=>E`w3OO-HR%gTiDWKJGEO8$G=_h$7~bza_+#5ZnDfH{(qujDTc7`o{PtxSZ1P0(|9keIJn$Dt{@*?M|F4}O zWBBtbiT}x=zdhxn{OUKBn2*r(ed1&Xb{n-ian|;e*ubk@N4tN!oxdD0rnZ2|#b6|x01YkLiv{kHCg`=olg$ka9(8|Uxk$6{ zIZc0eA<7F5c!W5+_B*u*G9l<0rO^Fl4v=8v+OGE)Lc+u2zJC{!mX|-*dQ;CfQ?Abf zfTzm+m3E-Ma?NRA>(pmaG=@3e34?gz& zB2hk*hmX>Z_PeL(kpAISd;^Q9Ro}KnlE3@9k=X&D`d5aAhH9*;E$+Ww@HweciB^_)!Lk< z=C*@D2DY<9H2dp+w-cz)lp5>}fnR;6l=GA`K#J}Q7gp-QHJ!#&VVKR8#(~6HnD{W- z?@iLtsg)*1O6CDI>)MZO-H)6{L+^?KUSQG7ZW=xZ08eyUHRhiSTnuyt z7*&pP&4bH+ZBes<3%RmgXt|1Gtb7X)uYD2q0sCFvZTF=NUS(}IPN|`H{g!i7B@fR| z$igQZ@BK7+m`pyZ_}i9=TpKhp{WaiT4FkwDc)ZoijZU>VR;5r;gw%Z%c0!reM!hpq z@VXb3ob$`1gAY(zI|^DY118AcrpW76-mr$cJ5dlhbyoF0M?%fNcsqs7BaAPTDa&qz@${9L|yWuR#q9u1PZxCXI@YemekW&D^=>M?5&Um+wwYxTZ|{E83)c z?$HgD0GKUa9l(mldy_@yMX48oPq6wqXLP8ZK7IO(kdTgR#k<-(bBDa<9 z1|xiJ0|2AM4;SR;^EB_ylJe4}cBa(zYxnY(JVstxxT(rzQB##(ObW%U8JXPYFqO`1 z9j4d0mU|YIg9^aOQ0am4hke+q?SSOt3mfWV7USc z3t#bEEIMs&^ns2SoklcagX=O6W-GhEu~sLg)x(ZjgEb5Jb?=($Gvk=F-weIh>>EXK zcD`K(9GIpn7%Sm;b3hyHKBMOazP*N&t7SrbNiibiQ@JTlwnq}AK{V3N&NVl-R@#u_ z&di8A>&1r3>W8DBUB>_(mTvdG%T=UM*ZvrB(X#pxO{~WNlZ00#`Hc;|(8FytKTsgY zcL9Wj9Z5OZ@BD5!m1hEV%Q+@{Z>mK5hST6AUaefu#smlwD+{)?huNXbRb^n(^`6KC z(O^uTn?(_MsjFamArXx4DiJ|71*(%ta`z z7VO90aP$pLoOXN6&1KXfi`P||c5lqvrgGY>jD6hoU|#h!HAgbCE@Gzd<7tcAl1l_Z1ihhix>a>HXw^>Ty8-eL)h4*_PCL zq9;^*JsU(27V7Or>5^R*EL!o3?eRDhT?;Xq_9$^~p&k-sLrwI*lEFgxn@_M`DZ;U| z-&Vb8e3^pcl6vz7?A!;S%erGZv|P|(n;~48F?;@q0K>vUKIeE-JW52W)2s`t5qet7 zBr#2Ibp$(;VUZhk3ihoQ8cBal@-wAs2 zO}}412;`R&^nEQhTY=(SKHVZtwoJJoBJR-EKzr|^Q(*22x;=Hr(sJ~+3WXUYH=0&D z(vMKwN8_*XhACnAXFei#qj6shVViC9!J88A`VkyZ^@-)=0|`WLUXX7E^2Na_QCa-_}r_j-^y`DQa*i}u4*o*FX$_-Lfa}hjr2+rTn_(DLJ@6r zsMi1Pm;b^)DLQ(^??G2~^_9AwM_yMJll894Qy0|DBZ)n$J+p&%MFW1hGfeC`^yY4w z2W-5C-w+}{Tx<5cbln~Q(Kmz7EmyznSpqWio1PXRIdrFyHdC0g+A=KRSHLd~!1HZfOgzd<|g8AK;83dEl?}#U)pD(LRetkpiYJZc}BQ zp1wA}?zU3w_`8!WL(A%j%udJgrxs;{)tl07;v&*V>W+8g#k|_5Me5Ol@Z;WXpC1>- z#80UWQ35%o9qE@d9*QN%`HX`$*_ zCD3)^+GSqMI-vfCivL)sR;9(5cd47J9uZvdOmiCM8Lu>wWbo_yj6u3R%3Pk|ci*_l zZ8Oh7$Yq;qJRLr>j^J@haUKPk!(-83SpRB>rxVa1e`-XVHdtVuL~lW}L#Nm2QtG97 z^L+Vf1>$pd3z?&czt{;;LRfpbWvG{kD7n6!$fG$ut)M#2ly+C6MUvFxyJgzs>)3h6 zd?WZJ?Y+|$Pv09Y=uG8_k^;xN9u30+FOsr03P1|jAVM{HXsX$Y%dIen9(eRQ|wWqnn$!rKmA@&m3OAC`rGz@jiV zsyQS>z9NR#ViklVZALQH=g5IU{6>~ut#s$CX1T0@DJ0o)vhe3w;vidzN)hE!lM4tS z^t1*wgc%AaH^n$AoE7Gk3P4vovU$*evohW0(htki(Ub8!a?7TzwXO|!qXtS7K)LM1 znCQ&s)0CoRhk-fK%yI$mVk46l&gH_8??rfFlXj=pJYdN+JHXPucs$kTTVqv~=FFsI z(>nOdg2BNS3G`#2S!-5-WX?pc&}c?n$ckQXnA0M%)o0MzWQtHr^sN#vQ#$Y3wOwax zsY^wq&^dGoB3qnWjo9or5gVDaZ?Mdtqk@MLvs?y^Sz5B^N%&!N`HANTAeriT?^PUL_eQi($p}P%-xjqoBm-jSQf{tT| zc|{KUo~O(;|00Ols0u)8N!$+EJA-Y!rpi63O(Sr()7o~L+mk2x7vs>%#-w*ME&|Q) zXaX7F;ql@k=hj%Wl7$cl+KnSeg}}Rqq{D!#b&=gB8{jc>B*J3=JfqE`s}+o^16L!9 zG;^|H`?ty_M}uq1-ypVYF=UE+4G_ClimI+j{Z&P8W!rb9$#$7HRvtz7ysNpz}@N(Q8@#;T;QV=sp4e4G-xJ)S4*Oo(km zH%YDPT$=0ppbKbXaSyA3SYOMEigt&%R&X&|&U{bXICl{Qo3`>?nWmv&PXUtP;__Qr zym?&~>f$%?Ln{v>rP`WmlthVnBGY$N+P?1c@6@blHb)xgi_(*RPHC|j4?Tg;9Ru6R zXi!&k^nN=uS!oGTss!^W#7W3&zQLAa0=ONmPa}Wr3Huao9#2I($F}8{1jFhW@w9C) zk&bz&vL5w;XK_G^Yf??mb|BgmJu$xsR-HDfWEKy%Pt4AHbym$Q?GXRki}Y?2FkzxSPM-en9ES()r)1Eip3 zCAe2w2=F=ztqXf!OSYiW@D_ZD)0z5#-1;#{^?vtP1v)-wQ`4{(MsJ;b4xep(vU;LXRK%=zV>N68Ri4?-(_`{4G$KZ@=ib) z%3Id<2SzXs$~>o!^i8H_Q%nwm-FQ^#7IH&5TDRFFp{3&A2Kwd{?x{kKZ8djQM?+h#7y;0-Re0IGnDTA(z>@R?VcZbQBGoTJ3L{|Q8}!M6KRt(XmN{U`ju zz?qR}>Ut=Ty!1jUSRaLXmiSm;3R@DavL`9h``b2lsK&+j!Cmw=8vdpedY=Fdt|5fzf?C%MWtPvY_gSX)}`Z>~Y*{+QllDr9;ft)Ut5!!0F)4+eH z{(Uq@0BALBB$9vF)PE>DL^XdDchvoy?e2dFyMR?3jath%{^MS%;cz@UC9!$&(`jk}<`2znv=Swqw_Pa8j<{S2@)868 zLxrp$d@wSqb{5u2W!ULe`uT!6HY#e>p?fkzz%~6oYU;ZC`*$hFstbHEyX_%H1#Rk! z5y6*1a&BYe;a%(uwg(FGz4K={3MS>7taQwG>zFLOmQz{x{>jgjxL?YgVJh}SOnCX4nW<`1Hge&tEUSJ8X9OkPex`WT|m6Ku&IEA&s8dtR3O)St}0tsGlj!~ z@)0$AJT5?uSUT<=N&|ECJWq^bXI$$0vS{={bJT(#^Z+j+jTE(k?wWf*> zt;$=sVy8Td;{8ih`+?;eR*W1axT*V@?a6c$5Vrh3*@~BqaI)QP%i;e{R~P{RYbCalKn$GMXup4Z1UD*Z>N| zO@P~~`?yRY`nmV^>I7`WVZT&k0m5ZYWvdKB8VBU<=wMRMUR27Q?CcFd&WIHc#qYyY z*D+Kz1O!SpmpzqcUcHrp1Qr%7q1%d{wj(V=B|l-NF$1<7+*Te5weaOvXSQGp@pr-f zb<)iyeMhR}TSNFtZ9baYqiM?L??Q-vh=u@6XTD5P*`OhKZ{k04XCfaes97itn-zD16 z+y#Gmmt*4nmwvrFAFMmAxBA{)AK#vg3;=ls+@T0BfU!)(Wy`2l08m-dfDDr+ zW0mO%h*=D;NVx4Yr?Po$7rCzv+aGd$ZqC&jfiiml&|JXrvj8E((_<-9h&1b}1@tep zoK0(=OO_vMwNG{$_svu1cIuY-FG55L*w=HOZKTd5%lts21gE~!9T-><*c#}@s`Eeq z9rVE05lC zXS>w~xTX6$@4|l=I&$n-I3g*z3_)E$CFYG3RT~95T$c7o1#hQ{lx9C!Zco&WAmj)KBCkSsLX1X&m{X|sPnXH zEvvk*9C^GiD?sJML8y7R$z`2YNK1sR9ONUmj^^^}AvwQ1GElRH;Rh2h7x|Xd|Sw96_QiaEt^$X4W@8&#Oo}>e!PQb-r;p=}Q zCH4^gk{G5M#S{GsH`_ZL7TN^QH%LfvDzWz~|8l;jcjRh`kaDc)0$skEvD ztr#hUH{$&S1-r=bJQtVD;XLIml}}mLdL&kM0uBRP< zz&wEHw^@+k8xu_>`Bi@BsVUX z)L3!RL5=aR;(0$n7(=t&$aHFLg`8Kzr4RzGfyi9JVBcxA;mLsFddCgKeBt>AdiWE; zLe78o(O(z_WrwjK`Hk&jq6N5C)e&gBvgn`l0OUT|Y6W&@R~6kR(Lgynk&-(4J?ko{ zbaU2TLvM>LFT+0j2)|KlO>=Rbd;}qTHwul}U?K&PvuP|5hk4Y2!SQ53O?72AslAIf z2H#TnSw~A(j62(Vq9tufXW6{8^P#XRu9JP;X559PpjyOwS;7Z>QT7r0am7N^}9qZVWH+zl(AlLK;T zU!`hKT30RS@oEc|Qn8-2RrtH5XzcamvpT6yW-`b3nidJz4)U=w#7)*;T_dLxhVy0m zg#c;cwtBn&AABMlfImw3P5~hIczME+5Ur+5qk$fgA+3oqpR^wUwXMk%+Qj@AP8oTd zi-vWIO%5B@F^hB>tJEbiXlk1+9`5hJ@og+^mv5)bbRn?^%dO3|;O$W-3;SYSS(ny< z7AY+Po1ojv^+*%I<)$59jR@Y9nj5(HQ73S#j=FF%0F?1h8QNF;c9%ozX!wU0@$1A!#&->1$4MsSA?O(ovpvwjLDVo3 zk-VMV0WhAMKvi3VPh6%%eZVo}x!Zn!EmN}ys3u}PB0Dvk9y}U?1a=w9f7@DU;Q+O| ztyXV6&25?Pu@%BePSs=OQnxy7i?vNFnAJ`hOp_`X8L>`%xV`+v z;}F7_MwM+rN9QL0>6Xhq?IAXe~fr#!4N%Xe$_-n|Fru2Uu+7V)b&UUf7==KLqUFZ2z*p28#(A$ch>U=|PB%5?Pzi zjjNTc5zs>BSc>QX(E4gYqyzjJ$NKG?X{uXFXAgN3s<^fshv{h7(1kAJ*Oo~D$*P|}7%xl0>v=u2DP;#iEPII;;RsSj1FA;&RAKTj1^W6!_owekLk(BtpTR)^p z@(jcptI*+?bHE9t02>C2JS=xh`<{+T9s|%ut9HtkyJ@rnDb{uTH35)kYlV*g;jDnJ ztS&g-1sfHWWfsYpuKK4O1yqUX$&!8H#*)GhX1PYB3hDfGBLWwxQ$IEU3ycQfW-d>f zyuL4hGPSON`C6vvE&Qejt4s#+c|N26tMvG&3RDgs&-xtoHJ?2P`q9wI1a+L>l8_97 zr3t=GRX!&&&*y@TtdrV9w|=<4v5PMgrTF*NazIOxfmiloqypSy3r{Swu2xyg@w@GY zFJF*%$8v&eIlh4Ukp79ct54u6twe7#@y+yt*1|Ov*9P%U9?)ewKo2ASL>oE&@?Bsn z(LF6yXc5q-041CI-IMbh)%1$P>c#`G&3E7jgZ+3fJUbS@txE*JD6N z_wzqh{i+`vpb^CkFgFi+>}q}LOccD5H{OvtbUWYbp>z{~`sn*^EWeS>Jrt$1dXwjO z1`wA0x^7FoFe-?7tE*$cVx)i{d zzVHVDxM5&j$%qhiKk1Fp_lF}!&=1i?e(oy?WQ`y{f~$Li=)mBK_UlZMz)}=!K)X0O zMXT&N1o}+xpwWt1Ex0?QS%EM3-A9cmNp$TT=W8rHG%?ElNvAw76#*ad*o;#zO&I=8 z8n$~DDVCir-c)n((9Gwmv!RkIrQs0$*=4-EoVh&TNiFOJk5bL{b2?Vw{_yKO?+0|A zxvvI1DWw(!a1AXl;fUYE9WAil_^x2~%Lz^9si29!m89?%+&qTT!%<3xbYNQPv`7Nk zK;NoVV0C2y@gcK0h%@Mt(J}4I(EOD*9j8@*lu}G@TB{g++{k4wAaN=K8T1v1R$;h- z-0MyPXwSDJMa1jFdhCeRSkecyTxJ4&@5&mRg=xzo0Cbr9ssvm8nOBh8i&YsO`CnxkK4PHDHYUZkiEbzh zU^Y**xDMD#T9chUSbfp};sofHjp69&>F)0D!V+S{V^G6Aa4m7M1=J;=omWs!Q=c7^ z`beW?6Vvx6K-+9Oi z;BB7TwZUTeM8S%X?Dmtve6XSo6dFWw+?g82X_!aCFTc}a-?g7u<1W*8WvymT~ z7@GcBbKw1^jAH_$6Ps!WbS8X!PRoNC^W6JE946~4<9g@=8Tjc-0&1R>YYch5H#5I- zPXz%m&-m`lp&TlW* z)6U~T0_7pyE9dn=7L&s#QPr%v&f(qOsieNw!R}V?4yG{9=DcKjKR`UOr8o;5`Am9M zOGwKdtGdHYLJX-e4DUJG;KM={dv%A$k1O^rjkCK$>WUq*qGhK?Ru+oW-UGrooGH;4%b-=* z`rcVgLPADeJzhcqdWjdYH(N7=%qS`{Qrwc;whp)!)r{f>!vnIQ%A5yN2nMKCQ5yn9 znUpDd!X?nQL11BgKpWSa0dkjyOo*9KUc2#*a)XnV1>yEuU)-$5M8ngVvobc*!OFeG zOz${Ui7)~*l;+*&7!W1+CHgRS*ScQ-pMAZ!g8!T97S&a6+@1<+KgmObVm9-d=>WyE zX`TIJ>|n=jI@Z$g?rk!@^ExlhvcZtq9g2~ zb7hA9^qQ5D*o@krP~?Zd(c+D*nT(_#Z477HKakGbvYf1R8ePghwT{*~m_C#jesm;g zn8@Y$q2TvU6loQCPYsSb^G1wW@yq$H_UrEXWD%{)Y|WEFD;e`V+KoxCQ3DGXBBE>% zqh>%(^CpqvbYemA&B4%iai?7wnm9KZ)9CT^Dox|aaEWVd$!vkRYg3(jtu&ThojuyS zhWA`*_a2;MFaF)rGffzs(K^$i)QouvR6-tdfNg@Y`?Ma2AzXLLy7>0Ay41}l6$u@3 z+YGm|+Kdudv+qqAp+HB0mZ|6RR~#|Dg{X-JAZMK@ekhExb!?jw&gL1?RRtF^szs^M zjKPPi(7EasnQ%<56N1GCt@maos)#`ek&EjtvE%YH=9>(gUwCTxb~LvPe@~ieaW)Y` z#`#dlPf?K;G!rY_UiPk~09=~|4F`(>phcop`DwYiZ<Pt6=b09%Dn(MAqDuEfw7?JwvoxNS4gJ(&GQEMo*az@fvWw9CGWiQ zycg2k_7l9SsYJ{d7y36uk)Hu|P`F<#AEf^I7Rg(LtCLN#`kSv|Y-6!M3X1D#2ST^% z!vV~zXNwO|X|&}SoIuDqeDb9E_peHr93rg-w=FXF>l>h& zCk;%a(G$^~7Xx}M+j_$CZ&!RMCq&r5nx=qa8CBxu0E%x3h#AK4$aOu%G-lY|gyFjp zTNvx5*Zp-`jA<;|pDt zt)X%^Ifj+p(@v#SW3~drtt35wTwk8xNHtw~m%Y$oDu}J0B6X|>_a9n@P-_c9|NiKN zJ$}8IfAxVYK3u{)?@z`G9-?v}6-*c;%|HEFTvp!OtS!roB^KBWg2Ut~5W;{3aEXu3Jm_DcheV+j z;WI}({ZcrGzD?S+Hr(0p2fKeR{D;3ta{sW#45iXNCdvQ63K%*eu1RkXM`eP4%l~7D zF9^H`|JSpbaZAI)g8uhKE~x+@&+AP4Zx`L)ai6IJck_R}^#3ka{J96AR-lIebSE?O z*S{LZGD0t7*XDyJ-jG|NR8?AJC@%7Z2wDBc>){*SC=SXAY7{1WMT}%E^*8gh!LB zrJtj485h6;ZEBbPS$)s0nNfX1 zRfbq2r4DtYoZ2DbV8SN_*k2A|qoH3QAId#u=(2Z63oHYrPoa*!qsF4aZqX=ys1j<# zar1)vkoY1hibhYQJdmgg%?F15m5G;3{FGqNc_tR1oE53~O)*>CERc^%gcL8%zSqjd zreJ(Vqghmg<^!3m({1I@%;ZKhk6eP#;)TuEZunykts3zxSGY)@@GvxzAQfZDB0UU*TV|==ed?v`-#=^ za(mnqNL1)+t;y}^&g{ChPMj^5h*DS1dEF9=LCHZ5^FKqp#00K|jevM@o!xoO!}4tL zyz-lU#nm#`Wb?T$=B9H|cVZ~%O0z8Snx#Xr&#Ote`h2?hIeYcha@6-i zdsoOBh=rV8ayn#3MVImy>%3yLX?OS?*`mee`27tjl{=V9PEklZRhAvLVATD8I|4t9y7P0b@b_91{QiJ z)zI?IS+qgBZ9(E4WO7mD2YRl$aUbWt^{!wusLb+iPz+f(I%DFUOy1Mwk%NU9#XJkw z?LkNSwXI3T&zlT-iNDD(2x6?!8H@BR4Vr>t9IAd=r&aQX`c+GIig(BWbydaQ#?KTC zEKRvuoY=0vg(EE(?dM~{pafS2y@bt&WpojIvy~#AhxOYkmnVxv3%9*0#i3>(cWcSs z?J}?!GL9U##CQ`lGv=Gi6vY<$8p)m ztxy!5>)1Us*I#kb4MdijJ{%EaI8+2$(%743Z>mdX*H0rU$Qt@0$Gv(|HHc+y zb(y5*$~^2{>bAM2X;ZFbATay9#yEnD;$g)}4B)#vp{;WQnr{@Og&sb4WQFE-W`e~~ zOvb$+6KoV6ifdD%sLbpRR+BC%8f*qKsp;69On-I?ST@myio2sg$-q&5Z=3qX%Itwx38u|XN!1D1tnH35+xss^ zwxuIYD21eDF>wvUropYzEb~GfRi9SfHl0L1Bjxrl3RNa&=McTxbV{}0{prbBVp)He z7?&-^Rx(!OW2GgWdT7?+nozSZD=##r1(#Q{S z!32D7SgndI?u*vrRhRLVdY!;(t0e=Z#Eh7()6onLR|&P!2mdj5w;)VS3v^wNqSk)! znb5Eubb&?qEKB|U}>+IK?Aa($({`ke(vebxL?1rASULkgCiC)2o}rNnhW!R*<#~IIW>>&}0-vw?#aC z?*!5s2jl)!UG>V*vRB)ascQ(9t9duWjWgkFMUkLtyW3m;y;5;zGh?(U^31fM@Up@0 z)43-4zx`^7382(n^+CFQ+{lyPZY4w2v&=N@W%kmlv_)i}0yS}hZBMzHQH0fOx$KqH zAh9fWIcdpQ^3lplmnn1ZgQ1TF@q0`bus^@7AUz=V*WU)601!WASAT{YlK-vBG9kJl zI2b38&5#D17~7%O|ARG=bl~o}ED5Nec|d2U*Wvzh#08DeFfY&lQmX@>OncVn?s%NvUvGpEi5CKZaOm9=G`R)# z(!9r$n7G-s+sm|&yPvSJmr*t+Au@h{u6FL|Sfr&<<4_-m^kcVOCd^g&x_c!02}_|g z3wPx~E`}uMYjrKgn>B5-V@(Gd&FX+IyFFqNWdB;P7m2&v_pS@Q+M|S{a#i_(H~fMW z5$M|V)OI`HrJlj|>w#05M$9OjIqds=6%dL1u4cGgGFRg_kN z)}ZFOyYQLTal;wfgGtIE@b&DjgQyZ?dwF(nK}se4f3h9mU9ZvexPeNKku|_V%~Ke} zr5jAxr@3rCi&;5|!{ezFY4Z&5SjB5xiN+z*sMHi;B01mwE-?muCw%X(c1zQNY-g+;7^nsBM6PxHNQG9ZUuK}|HhdAYVS$^J%gK*ZeuaVwDqOvr z_SdP~8;zYo@ICR!!`pYpOKxM?KlSi5f16&*9{$kEOyPi@#3in7=9##oEWdCKHOqUxwd zzvq@H{U#b>Wvf?x={!ID14{bbxwhIgap@q3qu5I4N)%2iS$RNR@oK`K%DJ{Uz2Ud+PQ5?iTB3jygTNl6_RJLf38viXvD&!+7j zEP2{X%6pzYx6$q#gvmfVlI1C_n}#@G#BL(HR8pZxIXPThny7pGljAVV+Gdtwq_)8t zn)Svxpz%PmcU@?D1Vpxiu2VJdG*(E+-COZnlqV6~GSY7V4(v`bkT)jqsUD@!67p1Vp7| zVwcg(ezHrkHohRC{Uy0*I~lY?R8aCO=j7YC0_#{DGrlD_8s&a0v1#L~j`Fp3rK}r` z-!;o5(i|lvqyRf3ddjz0NYT{5%2tgbZ@!N`AwjuKwp%I3286@`6cn)>CQm%jUFE*8 zyXK}=32rJ!5nP-*(m`V|8U^=u%qZtK>C3?_+eE?fsm)|h6F?N#bpm>ppP`60Y;3`? z*u$#*CE2xOQ$_sDJY*zP3$;gp|Yj9Juk`0+GQV@06Rq?1HeEt2XkJ+b(mU~Vmz{?bm}rg?vFfm@1+iP{PyBx2yj!;jFXgo5>C=Rz zy+3L_bai{etCi(U7kP>(UQNVO6rXKT;A8_L)-y{h^1_cy8e+9jN}Q0gIvlgtT+sSM z<)@bXlUE#ep=Wh+#6X8{LC;5u5c`{mroF`W>hvQL_!h>KOsBx8ecbc?_s^ub84s36 ziTiBy?jnCTzAn0>DB zCQq{@KY1QnbwX?xj7ej1hpzVoz0D{>U{&O+DAcGbdxEY)iyl)`?0W%Mg>FI&ir;NL zyrfv>Xk>UCJ}ssuHFezefqg-O$tF5U5!)xUQS82#loxM|>0g=Og^gB5ZO7P2p)t(% zJ2Fk+0P7(iz;Q(v(+ezenNc&uO6=AdRnNvA^JFO%IRv1XR~;|Hi*JoG+E!Vy%s1> z#PG_8ugxgTqv$Q#zdcc*6yfe};svd<%>5moo`L)i=|l9ky6D*1qTk!U1BTK{i_Rve#Sr=R4BH6=5fPEB`3{rhFIwi{ z5BPOeYlNy6)7e-_^Jk^5*|2HN2X&*Qd>d8QtKU;~Qj|CaYZ7**%O|h)m(uc%jh1Ab z5pIbUpHZfC4gJeM;Xk)Vu6-z9>Ee&VT*r%Zw!K4}<^ZKt5{`&$rmGKApr9 z$g%0wMbGaD@KbE^FJQVXHoIP3O;w7rCH?TSpPSxJVg+ho%BG&MTG6_=Sf@z=+L_W} zntMaHUL5^tbnSG1-vmwes?gO0vGB^d3QKZJLlb1_YC^U03Dqa&xWC_*GNyolMyFW* zS|SQ@(M@RQi2x_%h1MDpC4V!K`@d3z7fygCJ_Fi-v=jx#385AS!}T4{rz*x`Pb*0B zwXj9Ol~(hd8A22^>5I&~Cr4o;M~QX$1nPo-Sq+Bl%(;1$WFq(U_w86_3zj6aXeoi` zH~c+VvYckK>a@xQBvwq;bY33UHT0L@JTbzssF6q$WLw3s=bt83Q%<_&KKN(Q+uY9O zE6JFR-V%Uu_D!kNF?I>2%uIUN^5l+rGUm0Fax_CNf|`+~N#p6MJ0dpW8nt`!VQh?VF}sA`lnrB}#y|Kbp~kvTl*B)mWp zPv0${27W9@k~~3hWFy)(r|c-glKp0kt(FF)11qO6{;DA^XWr9bNhCZTYPclEg}oz*L;^ zvew$Eg%IW6xBa4^b(x}zFW^H1b1!bSnZ^n2pxF0bXonHs z-w>^@icTQxutd6jaf0EiJTE;ipIjcb^I-FePdSAs9)%$MTG94#7je%R=W*?%T7{1l zYOlc8+}E14gkaql%4Qx-L^vDmlpY=di@%I|?fQ#RGh{^JcfDyxpO;K(5&qSRJ$-UP z!}czR9|HLrLI^ZGWp^|*gJYOzm#bvDzadBvet8TTsii1d>a8!Bu z53W2E8?)A#e%c5yzmx0e?2%R4>qIQB2|fZWhL>BCT+_>~B|Cvm(AsN+pDXdNM7tlzpRAra0u?_=O`4 z#ATm&`A&KmnaHQ>_PN=g&hxK!9V!)SIOsL*uCY_)Yt{tKUEUCy9r?15^|cx8<69$l zUJu29M_L=ig7F>J=GvS_FzA|N6jMg_oiHuW!P(~y?y8M*W4B8Qso6pV_XEUhP-p(a z@~cuROZx%$gz=+jQ4&D}b6q`gxt*XGxMOXGP#z46+gS_gUET3Qa}{Qcqp?jb9Vgjo zx|tx{syCXn&U7}EQ-Kj2CYKReN>cVqi|vZvM2$Qb-YIBbFK!+eAvv9x@aQ`DOoF#; zHNaF$W2&;BrAlPEV{tXf$3jMfBeHiByHC&cs6bj{J!K9Kq;ai;Lvxff^Pro|eeS-} zDNY@m9nu;>kAi6Gm?H@WupV=>DR=yf&ZPC!e|#`o`1*U#R0(4Y19U(}?q_WGcX9q4 zpHFkpvu z2V!Uh3aNuh_V4~vzkMj%kZb@DgYO-hgYUoE*Pp_x&>)~F^t+M9^gmh5pF#egT-xUe z+QrIH0*sgqAQNLzuor& zTKbwlU&GfO6X3X9lrP(xHh{@l-mr8H5f)MC~nz}dY zDaS%svab6p_hJ8zgxypQ!e#m(AE)iI{I%R-l5zd>XpVdw0Ezd8;Id@_WN|qG{AWfI z`nORi=pO8vBEi^^;$DoAPB=9^gJ#HMEP;VNtXrXrSt`L>2wTl{(@PiuVS$l2x}G|2 z3$~$iwpOTso&!>I(rVb_9d!%)c$+7TVN0Qq_eIl?J*Z+d0$pH=m7hKglQ^t)M{;+6 z&qK~{wb8qIgLAfHuB)?pUSfi&>_QO{1cbQ-+ zw`!NpQ_YD?c0N2+taMRN+_uraOe@r7myk;dh^!wNjIH3+)l9!*B?gnKn#f;DEZkj# zXq0P9-8rr-xSbB-#iGL{4aNPsEA`Y8Sxma1W-GMz2MX-uo6mLx(HR`#tYYbSIpcnQ zr@8kqKTIFTI-|xk>Qe$rFiQvs~6s;7$;ibJa6Xr@+Kh#drIWqP8>%o7ysc zVW$!Yh`Hs+rIedbeG^5@$p(1yRn@2jZ%MSOE$xncU01{$qZ?iJIgXZtQKc+pE0cL} z$gKp}{53n&{rp^;qKvYXp4x08+#yT2z)kU+CIZG63F+YD#6G>na!Vn% zq5D7y#uzTW=3zHbUHV724rF&vqfKeJo|DH?dd0O`Y3nXi?ovf&Q|EBR>6gq~{RowpDwY#fBC5^*< zy`wAC;H<@nNc$Cj`S{~oGVy`SY|kq{CB`W(&)Xa4x>7Z+b6SvSNmr+1RfCT?W8qXMM+vKQZ{_QJ*0V(0qkt}9K(T|{I- zJG$WSNh3>no4K`gSf!v{ss1NoRz2s54zx z5AKa7aO%KBW)yd+PpYCzLOJZXA^&%X!CStCwx1x)VZ58g2ET?KUh*}+Qw9Jp`?paZi)cC)NtsNF*AC6)Ka-LQw0F6;&eP6JHI9lQUZYjRNx`iiT3RP3Ef-OJiB!+(kTU>b z#p>&=Gg z4}zJROMA%^t>Xa|r>ZKCHUu0=fECz8gfE+bl|K=65KvyutP-c()aB)MKaBj0PPHUu zO^eaGzQa6j`jU+@hufqrgogDb`(@PYRfyb;c@e`j#lY<>E)detw|HFo-$pvayJH6g z%SWfS3r15t)ux@9KfSzmlicqTh*h$a-T1|pO-#t_wM>e8w^~tdxXvh;kEJ5~xKHfE z!&1g8NBO`ZYO(^o69r=hYVzij#SAiWi4;t8XBxzxFZ`?DEePDGT-vU_nl7`ea>$t- zz>6Yoyhxw3q+A~Ead4O7n&%a1cE6Px2GQ(RIvi`_-uE|+EN57VQ=KDCRDzF*H2A$j z9h=Hz>T9Oft_D>T__fZnwfqR58Id!)_lGO8lt;hV8tSSz?|X}b0<*}S=bau>;f{-zt9 zE|B3HXBEvjUR7SCd6mT&Wq-#XtuM$hfSaR}*wTLbdhO&w}em&N)zJsE1YshobFu;5MIG3H=V z0`;kkoPg6|bn1vcU&YUIUGc`rbo-;1#WZqD(-rIS=k25cFm6}wQ&`#A3D+X>c38g= zl)Pav)1M&RVRTcS#zC^ny)Mh$ZZ%If=1$iV?Cop{*-1iNNX;nuS&om6K0l*A9wBWk z1U|uWCWC%UxsMoNMt9T(b=27OH4l={*Whb-Ju`t0sO>j?TyvU9xObI#+On!YQL*>r_ zQ#RVu(#<SG% z7~p4g>srBN)~0B-!AYrbuDURP=2=j>Utbd1E8?Tt)#fMj4vX5!k$V6)jMr4fh&dqz z`B)uU;lXsR+nCRuKktgh)oX81zb74fm@lp>OV`E$AM_b|gKAA5)k4(Cw?-*BF3Sf>L0 zOHUA}g#qbCw_)%*3&L!?gUgn0@vZ^$e}ef$d= zA6{o)=Ck0%E;E+Dd~gzF+K5pl=SH*a_gXdW_rDoJZrq>jAY;3a#sU`_GB#o<2XjB(cP1#dF4vYxJEgt_omcIepqdF$vUNV3%_zi z=Hw)V;OF`0%wk@?ua|n!IBMJz^TXFa{FhTJ zVF)2Nyj4i5NljzJGtC_;|PSzGkz-lS>hZ3srQXew+h|Gs0`x z-^E-*?;f9pq#6n3ZyJ+0B@JZ|9F~rC&I*i6&71d zs(*Jbb*N0~)gRMEOg>!il=sJW=AUOafRxPdC7dd-`nza0v4EFO_RlcAX!^`~$>0t; z=%15Nkob_1I)3D_l{w$PK^ad}c|1&k#2l|YGXJaU(fZl&fckgYK^%p8qvO}?t%2Uh z!>sVHWocIg4ryurIASJjM-a9!#Lvc}@~n{a(eaF7GS?IO`(l==9$qiN6~?F)#w%Fg zy&!y|rG?L-8V-7N>aWj%0WVNi*Z#PLHxB|&wT&;44sYv^FQEZruKpTy50>b~0VaO~ z?#6qxLlAsn(ecW$a9W0HXpOQmo$90ak5RYkD~3A9{Jpd|ay0E`P#Ky3_nu+l$jPDL z>_2yIg+$F6KiA;QWHOYBf{KcIb22JFQ*Tjd6^2Zo)`YC(dC9Aipo&nqw50ihBeCf# z>eEW!sv=PS?VKd~TbETHr`?7Su<*wNTROjDLW}fdL_zrDbzfKwv`=H{L{CoukAoVk zzFcSd>;Kv~3P#MhxVV9Us{Rr{*MEkBG5}&vRnUPw9QvhDXHD_q-ZMbE?8_H1zR-|1 zoqOxDzPP7AwkI#x09urHJQMd86fPlbMqbnN`zgt!ieyj8< z3e0o5=a{sr)RzXyJIk6ASmStV;^9Xgwx=f?%K17ht}UIDPCIp9pao$oH)T@Po0{qq z8xvA)v>Fp=a1w>@X3rb-jq`H#nDAOM3-6N9OPREvW%{GOyR;{=*+O^GeOy_9;uYqp znGKME0OI%hv2+F5aC(g{KF^yGZFd_>w2R79s}unKt8(U@4!6S#@#LUP?;=7u;u#N)iY3lz7%L1(|N`&XG)d0>18=(xv6qpLHTXyAa%n z$&7m%>(p2ZhFdE_RCP%5c+=HtW?pmH@iXewg>R|IY(#O~dj8(hiI=<&&yS_EURrYI ze|8uc=u*&F&?Z2ctCw!yarBzQ^3~1`a(+vB;hR6soU17GVI?%UIr2fzS`SbxR$4=0 z$yQ|ffELfO0e8b`0dJ6*d9Y)~eizS6#D-WY?kE=D9B?W)^nKCm#+!z?$yY0G2$snk zBslQnOKRvV;2T0c))^-s&(5dGSWLy7zxWG8gx@=~IrbAwy)0AI&s$sDJ8f_|Vt!$* z()%f)Eq(7jUaaW;9_YnLW=2|%nG*s^OCfW2VdR%!uDzA^K$(%EqVs4$k~4?lRH26R z#f~v4LtIIdl?kdJ@5q*2Ejgu^zCErKIxyn=2>^AvF5z2Ik(JYS@zjdiJu}v!-4Kgu z3Ti(?)Kl{f&QC#@FPz$gE+Id?dSN~0&#bkBT)k4>vBX8$YYr{5G5_+dDfL(j-(Yb} zOs@7CWxXI=WLo1zu)yc#E>gNbjHg40(~29Ai1X3wHZ#AYI+7x1Osl0;JM0J+6ZwQ=KR6-kwgtMQ)dE?ktMR z(~{n0w$itMypUN{=tEmpS&X7XbIx$&)Zu!DvQJiSNnQf3TX^!je&y=({# z8~tS*>tL>W!DoRKy3OZ&Ua8(RY&8almju5~C%8d!)boX~9gZh=j7n!|frKsi z_-Ey$doc;WHs{4yWn)T8a*|(C&kQoYD5|t?N2@R!qxliylXe~t&@@@%& zx|KP;gm7UUJ*GE3hj};>&_;L9?Gw7_8B0mUHBPJ0V$JYk>z?IEMxL_HNQdM~cVvWS(PfqH~4xB0ctD~6e`F3t(W=6C|9BA%=|JH~w; zsHdKGBbV)08k)zwXNUEf{VnY(-{V?D$gfbkHZ^?BFZ`J1-d{F50Nvijr}sVMKjtF3Dwe2X znf>^2oq9hQ>8z`+PH_RTr)5%nA%acou-h3fKd2Gm@>~J62kYU{F`-EkhBlIuIj^Dk z7%)Qh4b@&#jj&5$sX4o!Q*x6y+A>(3bZANq7>GkO(wdS`!Vc#f5De=l#6{d2+XNUW z0`8)ViTp-+KJ}1T>8#(@s$HSOs#g>e!}V0Y#kD*)*Li*|r`;Yii&}cyjd0?bHgL?E zt|)GgV7Aa*r}y5JB*E9YBKbU=dch!~`3J4&-50HO+@rH9EV3wAsXTG1GEKq{W=r-{ z;OgjeafIZKP!Qy_OGit9o+ct%bLH*W9{oClHuP(w;L(}c+#It?S{&V<74yIzh^Lv( zdmO+Wqut?M9>I&vc!9tdT~8))s0ZHExlGueL_J+b^IbPG+2r}esydhi&%y+?t^*$< zW4|Z@C9%A7rJESk3qQM^&8^zf$e&Cmb}}59 zniv_vORBN()&dI;W#h(8`)%+}x-M+l6i!IF_UbDtLY!lM;&30L&x9$^y zw%IrP(+{l_t_0rSV-mmVC*TD{roIcJiZHNqYvAG&)I-dnZ@=~Z(RN}t9`$iD} zdFK_{?2H>ev>!2=jPLP4gGNrB;M!JhKyd(u`qZus@+B`=xiZ-PiN1?<&=+;PK zl@HO$8agFM!vl`!izMJQ2y-1TwX2Mt1bUu$Yo>N?lf3E^0ma)in>)Y>=@#N+yagvHf1& zU`|Y2V(H;fsJ71J72cJnX$yFN$HL#PX=fxJE79VO{hB9!6qofY(D<9Z&Gxy!j#MO< zc*4~S?Z<2n4#qQsh2$eF5cT*ZV@)|O@yY4FXsvF?v9%LD5+dOSW5>qM!cr=-?eFRE zW*AhIibgzcyw>PH4{URm$)mAo6t-0%g=~#e-rtX(KPdU08mwoq2>8m=X zlTY1yw7h$x4KLS4U#rb67k4{ik)d0%^(rSu)Pf9Xm27YQLmt4a>D`t!tlTdNWG*BY zuyHm|$=Lbuz+Sk186!e`+R;q<78bsdX{cY+Vx)UZLcUw*o2xvrPRU-`it;%MD5lDp z@G*`=cfod(&;#V~^vKfK-=+Ypo?gd9KZ`y;(sx@pt_JoD?T!4+kw^CKBK$;Z4;gi% zn0MQ%jBPDo;B+CmXR;S>&4NBBQwJM2y27BIv%Y3CU=G5jIr`1yr$+J|)8-6Uw!bKg z#O@h-s`xgX6dOKR`R0TRvtU7vX^r+hlYv1h@no!5*bh^dt46@!9d|OTR7qF;kEZsC zLuQV5r$Kf86Qn;uh(cH}TYyh|5=7z-CKWus0SL$RvA@84t30 ze9}sV{8hTy%F`d+?XZ;DCe#XzQ8#Ym#)?VVNSH*PArdpUpd^%EeO%-;X5{20D_ z(=RC$=Cbd0DMZugM3Fw`G%(0nKs#WFtFY3#caabdeZtCYZU*O8rmNq?`G;yv89x^-#+>N=Qz=JUD*^lsGv z`wfuehrLR8rQVS9iPGMJcl#m1Ka&@lFt<@kf8vL7`Qld|InLuWpCGVeGxNmn4A)t7 zLT%+~VbEN(z)n&zouMZPVn3~ByZLTYWI0~F@^PNTe)DuwmKpPvursQ&CT?$5+V@u8 z9`Tc40ycwZ1cX6Q)Q-Qm{siIVp;mB3Rx9$Drz?E(ew}w(5q{nJ{j;8J-oaV9^q=`3 z-~qSU{8SlR>TjaW4EY^`=^#-Z{X1gy@870-9|Ea-XpMo~p9w1@2n~~-DOOCh9oRZ6 zX$+_nKHf*`HjwCEUWK|o{qws!6V}1xy=dF#YS)u37mdHxZe-Aps$mLO`&%(`@qm3n zu9wCj^Z2k&9}?<&eiM{Gx%pq)1Kzz_3xrk#RK`AkZT`=2V@yEOe0v_xk@Y7#{&$!< zVg;D6Lvl32Ul09La{?}>(DKAr>U7a?BI5BGKU4r7MiB~oZfhR#Fn}~pfD1t&nCr8? ziFMGgJ;u`NDpj`_m)rk2aJL;y2Lc2D;5uijES6VR;2azrTwDPkD*Z?e$kKQUYL{aos@J*5t7xI2${73WQvg5L&Kw3*_?p1|&=Gsh=9CJ2tZ4SpW?`4vu@ zH!YUVf!+R9DO3N_HJ0Pt!`tv^1LZgAx%x+Qbq=2<3UP@9Jqx=Q%Z;CV-W*EoEHvSB zn2-I;arg?GfTk`0#;U%#;qvnkga>Jkf3Aaa|4Qvx9totIs@2EHcDhG*sP;ghvnc4t zESPEh7M}T`;Ls9)M8g;f^ZQP3EVecJukD!J&%%8RWZsNVq+mT3W&TV<5?w(#t>-}( z?n?I5?>f5znX# zI1heg8Lu4fY_eJ2g0y0L)mDU8a;!ym6QO07LogfTMb$SmrN7w5@-&_vWACUK#%NIe zyLn(eXNL`PWyQt7@C7svy5sIoC-t^{B>=^}fL%bCVj1Z5rv8v2mPniLaNH4$Q@aFy zGC6qNi$%Y!@m$56%je)HgW4d0C|b@hYJCg;QGX|rr7HwXCMBX%EvL_4RXlY#ZyWBv zLTVkio~3q(wHMiglx;2;0loN~#gd7bafivJqE3|$WvZ8s1=OE=U&761C@YJB&U)&>1A?P@DCg_$l5M8h^}>FhIQUU_P_4wsv*SEW~8rA z@*rWSH38rdHd`ATxq7HCUrvZI<6>a>xEi{xd{B5|&S4eoPI-CIHFsEA!Q>a|pq3%j zmPP5e?UhP_7>*-PATsVY@X^5B?;$atFkUq5Ety@rfn{Q0L6d30MM-PY`k4;GQD3WR zOM9NZlBV*U<*Gptnw^4h(rK<;=fVAozst$XY+Msu^V!8t3cc3eJn=-__gm-mv9m(K zJF|8|Jo#yd8{{82LNswQHJ|M^Dg~{rQwH@}DPBzedpi|EybV70AY;+$1JaUt`W^li z59)+w2F$7UYrW8!l8Ml9L0NJ4Ifs4?Dijf;$P=;5*>swCwhVmL{Yk0ZJ83>w z{8!5TAqe84*>Q0WdtyO-nvrP-%^J&4R$9%=Oi$rp9xUd3=i}&xTDku$444$@@l+5R z6@V)e3b07pCb{xjmb!?&Lbx4%XZ2^yoC4W3)aPTNLVaVLFrm1YWZw;ftzawhUJT@2 zH3(FoA2IaJg$DjgFM3oQGL7hLP0GXiqWQWyt2Ze=!aIQKjlFF?*dT${7nu_oyE^9j z>$@-S(sO0du*RsJ3-ql`KJ%)XNWF*ZE#(fqKfy1DowVWmQJ&W1a4v%BSS@c__&F~R z3EN{>R9J05X1rcVu;5ga%EjDxBWH zziqN^cwE3oHD#0EvhY3);^Abahp%?Rg!rqZrp@mMuM|d4_Yyi4F_!NvrHlQv+IrqjVw9Ax77O z{4p;n`P+$a7QwGh)!4(wZ2=s}(F^YU%bv}X+ZH9{>q5pLWJ=(T2nM8U$W)Q7f8-Vc z3UtD~Z7$_O=~*kuGBM&CzTl&DZW;+|ED-SB)7BM#VO!c|YsZ+Nw{^X6zV2@={XB(; zGEr5fI%!X7mw#rH<(_-azp$uu9mY=PcO+7&85(2`S9MC}!w9ItmtRi8_46QGJfeNW z5jp4|@ss)~#LSbVnyB7eqh^a|-u z0Bx3MEE|J>zW8ZLKtKSW`zg1BS%7((!hr#6=J%1BkQLVfuE zRQJ{oRc&3=urx?ZhlEJCbOEz?fcHfL5>!ym>e41 zR$AbQ+JR+r13e&aZ*SfS_3&~Wa(l3&QH*Z!?!OPl`#EOY;YV;WVm_)scT<7d)Yo8= z@X_HPJO@1H0>Y5N2o%J8?o@Cg9emu}_<+$x7%2a*&@2Sh6lMgOuJx?8bX)0$bUnY9 zyxxJ8uoGiQWg~3PdgI)+%$1M%^kh^lfHzZNggLkyrE0_R zJ;lahToPS+{>Ukw z5waQF+}n)7Q$${*r$Pa;BAEuQ<)*fUc6L8Fz$;3#2zGD-QP+Xrdcl;Pf*qu(5{_3* zWZWos0x4L-`o&#Oczi{QzG)&Gb#x9w#Ks!I*8xrb;5d3djT{)2D>)F=yc@=wM|s)} z81G41M<;EE4pFwQbczA}=r{fbAc%|3!=}{}`W%@8Jap1fdIXu|a3~UW-kcqwSt@DV z3l}%~eqCN%G1V3bH>o+!b`R{g;f^Hg1D_~2b$u^k9z7+?;FYA^jaFA`U9^#N3!DPa zwb$)>p@xzZE`v6ZSnRrQIeM$z)^-%&H7AjTwpLACLVMj6_W;=t&`J51C$6| zm&KXB5s(o9pf3Zx7H{={_30Cng5DXj2A+7g#UCU~GWJ_e9JF82c>POFtkM8WO=vU1~Jx=iP9{)Qm|1+6~FL6V#EIj?6`GhM%YTh4p`UaqHZGs(TuzGEE zURWP~clbH$ga<&2L4fImebCYa%}&dFmM zr)J1*SkTSI(y456y8@8qkxbQWT4lF4^ZWi&<~LkyeWS&sJ~ztdm#;t;nji$DF4ZW|z90)UV%|_ycUEx8!!kb(20h z=OJtj{XMaz_UnM#N%q4$1&oLF$gKyIe{9pq>GOeyad{PX9{l}vfJc+=+XULdhN2rIZR1PeP~`8RlIuq z>UaxF-a>`ZIpTx>0_89?;5q0&3L2LMxFc9g&O$LCEo`**)i1J3oeN2-r^YR-UI>)u zR^21dcbTrH_NcT5)E8?HO8KpO?q67HaffKe4UtgLyp`d1GsIDG=YY^&~ID{CVVH$1W7NRcBu`M|Qc-9NlEJj8H52 zCsJQ%uqQ?C4Ay{ESs20$4iFsRAFs2}ZT`IC$y3g(T0x1)p~@LH)YpH(cfCc%6@Ger z+yPYgW`3x=UBl~q_ymJON^YB8qE%zqzyKZZoD*O$1}oZ1u-tw$X)10{KsT!H4cc;P za5HU~67u1fAypT|jVe^Ds{)i64vgo$Sr(yKVthPcaw0clHKW&UFQ(nhA~{0z zEv(;iLyIwE?Np^dwhq(sI#1=%nwurV3t?oTxa15x2M_mff)0An>JGU&iKGP ze3>VnqapWAf>gSI4(8VES8n`qJC3?4-B`jmezr2!wA$l(#26Kf;jhBoF`xNWZXP^n zFLkb9pWAiU^WpR_&oEC6N0*j}-@JKKu>({%4x^Ax{JtB2^1K7E8C&nqbG;ATY*SvW zbAAd$o+O}NGQuS=j@ANg-%=Ac4d_V#DErLV8JobDCBP}e42=C%w-yj+ zOqG5rgg{-l+H3A>$ZI+;B7&)4Ah|QT`Z@Cw=f^Ub2$$KozG<;c8dd7xf}sne2P)}a zAdOj9(npUO9P|{+p@TF3EHt4WCb8-ur2QaP^vB+=ZKX;?uhW4MuZy$p5Wn8++bgT= zX(l$7p2(R@l8;J9G>Tsfk^}DL1gIg0(&V{x&PKD*o3({e4>Xv)QU-d&d#rb3NR~fX_tJ9CW4z6A z(5xp=N6_DKI$1Ch%{Tkmi8>i{GFB=ogPW?X&VR^odN_*~oKD;)`G2NF?KfqXU%70( z`>@8J4APv<9W|dxE|2FbOTkR{&*hfcBM4xB{${3NpGu_(H<)v#v(gjmr>>LS-o%g! z_-@7ly@k@spzrdcQaLmcRYuC)XT@%Eb)g;6`e?%glTOc%c#5#AQP8t1lD_9~E+^Bh zgl|{G#{v;rr!|i;b9U}vYpPoUh}YLY9H}ptjQ?a?S7`VcM<}*48q6IRt-6}ZAGhQS zdF#+v<~brIb!5rViUMwyNb>|Ym3pSoV#@a-D}Y<~5ltuCHHiq@>Xp<_(<%fkcN81ddGZgwFQLieIrQUg&uJW;%0tIAkdyKe zQKDMLB_RY=VVzL)Hf|K1ycvGSa=5ao3rb?^(bIqNI^o7Mc%MnS=|HQ6V6c&6Y)aX3 zr-6G$rq!3WLO{%f)&GmS?|XhX!P^(>ku7byH`x@<_sMOok|OU$y7qA|MtG+0THlx` zEkN-XOI|Bbg2@sjAb{0oWZuxI{J<3uKM)NzZ?dj<`0P=18K~h+@#40Qdl^)fyLPIj2ynXNC^?_-YSktnb zKr7>WJ`a$jN^-Vmq_`v>-K-r{tjcVH;Vn7ce6|3@NKti9PiwDs%x$P_Y@*t+@s-1p z2=n3cY)J0m;1?9wT-W}>Pp9O{d@ii3`obTO&I{?^MCjIY@$eV`B>=Z(*-SYXfhLj} zKvl>eY%a=k(ywaxK-*adEycFyq8Cuv?ZKdRK9b?I?ml0Hz~>oD;?1$*CyUSvEwJVU zbM~Tz7-Z8yPfk4bY*|PQN(FZFKa9dvKGhOzEkVbH!^#{pECa81YqjLbOlb;$3w~~16J-?T#RAw>y%w37o+0iNu-f01k!d7_&wXcqZSxMpZe{r!d;wh zYkd{a!HU0F)yaR4UuKlRnvD$At5q*8{1b6`a|xA91E2B5p6#`T-YD+N_mMZ&xr3QG zAB6c5s%mR{qN&BDnY6XEn7r=XUC@Y_#boPk6K)JLa+1^w)ivu63;-U>dVgu*y5*(L zS)#bK_Qf04u(JqQ!g!#OVErt*0tRu?$8)@AffR2Wj0EtE6xu`UjBON_zC(#cdt#tk zvJRw+FlO;tfm?ne_TR?B-c-d~(~@9fyji7zhAe|@E2X(Bs@S{a%xTmEPB4P&R0={)>!pE%UEHUL6B;J`i+f|_j|9ZJd$ zLwT)&T=B(@8ub%z9qmP>6pwA;mgd#VwH8+D zs-zvPFiRf)w@2O@%~Hdn&q3cr3g$1o5A~fyF*MzrHC(*&xGxAkJMLh^?dDm{*zi-7|j=`3*2vNG|CYfL#rA)Rec?HsZs;j z%(7R8YQKZUu4T2U6rIJ#+;5NepaYl7Jqw3iiO{JwYHR%B)9Y2n*stx&Z7rr?r+MlG zIIgt|QfTI;sfX!jUUq7*?Tz9#s_egGx=IU&e>R4QDJnfrB8yz1XIYCxUtO0e9#S#w z`W^LvLkxVg9dz1dyE!7cHK)TvCU1UpS2@H*`6KUx?6%~n<>)EV)LNS?>jx~asV;9D z>Nw)NbIJOA-Lw|NT2JlT7deC1h9LJ-T<+&ScjB4ff|=nJWOB3n`!5C%2a-~56Ua_P zPv8xph3xt-Df#iEVq9$1+9tBmYKrjS(SIx?|n-3Jg%G z6ex)^EX~akOcbgHn1FV6cWHA&2)UlYAY!|J*c>*8`P>q!Pt;rWd5fd|Xv!*!qzw8g zHp7bp>_L0L@bmIs9USF}sah_03vnL1v97^uUdI}_@ZLkdwluZpV3bhPLu4@am})~* z$P1jHju4q+R<~w{C*8=}dss%1g5U45zl1Wxgz6+mLgvG~c%Zj_j-l4DUdJb?G{ACt zr%xS>t>V?=?vY*p)`t)cO*b)^56p(&N?BVWP9SD^fY6p_p4yQk34xYQ@sS}WG*`q{3kC7GiG)#k5-w1MyS$Y zXw{;TCl}dV6%?lyfrFAQV^hr{tCI>R<9g1MA97?dXj2`M`lPia<1Qp(7_Q|VxA`G3 z+}OXBohvi1HTT6I%iFoLZV<4#x4IQHnaxi(n=ux!A4hJ+4CHwVYX`UO?$J!2Y_@!< zU`m0+dm8On-gBqx|8C=eN510h<@ibJ=i}!sLP;X2TW30ZNMWlee+~~ju(-Z} zxMy>jpv+P;9l63_-JMpoCXXLWww5*i28qd6(2%? z+Db5pru#GUui?BMfpN?>bt-cnC-i$HTdB6K5iQkH-6R9biMMZntcWVPLBf24wXb!A z)S|S0MGJ~bw912yuT_`Qm21$||LU>d1=_asDnvI+@G#rgsvut+)|~&0WKWpeh7U}i ztBG*oc8iZr^@eY1tbrG|!WkO{K5h))DIP5|tm|##5yanrq=-nMAlU5&pZNFwXKGa8 zTCa@qP?+E@Zh{xLo43J&t&EDc0(m4 zOrWjiC!k5Xnwpv((D8M~X1N?6`_h`m2&rOiP4ACCxYz*)HQPQr$qm>epNzH4 z`fqI+!(u*!kzj61z-a)?A(}RPsq(WeqfWaj!^cY?^#xN4>b*__yvbJT%|+;by;TX1 zf#XN#ZrCUwt+)r!Oy5J%($WsXL;a7r*L2Z{_w!7lg-YqE@+@ty35H6-4o^4VIjhX)2mbN*n5~(@80{o z?S4wGI^9cD@?KlM*hcx3Y6(tNR#z~47Axw)+d zBo!#*-Z?&w>2ZBJvX;_lbA(xva`C1sN|dwZhH<{xMR@kSyVI>!(S`SONW=wyNT^iI zLuh|_1Ju)Mc0PjK3_Ixb*KPf#`?)n@N*Euft#9_JqwrIZ%^`+u78*b@Koqz%|8efm^FktvNW~DZRjv<%Lc|-m z(izaXu<%-TwWZ`UVx1!dgen;AIl8}H&pD`lL6g(To`e1N70(3qu;h&q>fWcZ0H-Vo zU2v;uo%3;7GByTQ=>6^ztfW!cTMH@7REg5}cHi_j+8ohts#@loJ#l1-{M(Bq@QM|) z3R>YQ3MJaJE{9eu<#37kgT$-T2-=923gesYRcV5o6*PKATJ?HHB=bje>1X`~b4zmn ziB*0rf0*BQApv2mv$K=g_0)tDh$=?2#9bE7tF?72Q5KzZCHXl4ODUiVr;B7x<}DOG zm-%_Gd2O9`D|9pvp?!E*1cR=$6AQ>p$-H&X&omsPqf`bYZIdg9;9ds|wgsel1zAlJ zMkS&p=}%lJ+6Uy{z5eaqZe_UA@jobuQo8`dN*!=vG*i06<8T;H5E$lsR4sv7En3o! zF5*N#(;V;-ouIak)7j-n>y=_I7@8}IpsXP}je@e&7Iu2x`b8r~;>|-Sth1BZxc42# z-s~=xA?V=GO%Xtc4Blbkayd49puhlUf?r5T$O~_~-4~KEC!gy5$SrlakuUn#*2YqK z8sO5T?}$B9`P97d;$)utXPTRGVlvY6&&*e!t3C+g0|WG@E>eR7BW`PSUw*1VHH!>H zCc0w(I8fZ}8$_%<9IHz^n{Xq#A$&)=VokmJCAgYqYpg!ajAv-0$Fz>1xZ91V}@aul58C z36jr^U0jwN@F&6i69H;QALbd#Rf%ChY=8y4qVwe@^jc@16|kRnbS?Gd z6vR6BxgA}S5A1W`Eb5-V{Y*5dus3!5?QV}uPqFQpk&$=^!SOm2cTJ&xTVojW)qM?9 z{Ov)2AaBaN?O_w46s!c;vRm;xwnHD83jUkRAz!9wYr0R;H^W~QE6p_SZ1fi%Hvzdk zZ~esj63tqfr=iadPq!7Ttd@v?bMoEdr;gnmPV1io`W5XHQe z;gb~@GOX64!fyceIrf!$)NUyG^A)dse~R5yooruXApxYevAQ?vz6%>0;&wI^Ia`h9 z;o^b_Aq8$?WDWj(|Ab`>_b?%Y*?J1laaPJy5CrCu(TwZqq*y93kGpX-JTPO!R!?vl z(2rE96lliPchTO_@m9uat_J9=BpJK&&dkVv2YD$bRmQR%W(g`!p)p7+R5c}s^Pp1K z(@ecp+1O*|h1`JEhtE-PHQQtMv^|a=yqkRy0rG5|-1J=gG}r!&s$}Lmx7b;)Uze7Nwrm)M> zUg73ZBdfC5l8s%qyf)T}hi{9Hy1ICDqFm3nt{qe?s6!#XppM?xxn+)bu3ywy-?qx` z(qi>-W$1k1_;RNY<$3j$vd`t^YvmIz7lH>eK=#DdB!x;DJjZt4HGSpv^^@W|OzLRX zcx4+np6RvPF4~^_nCr!L>AnxLSRFdx0Y&(R>7%-jD?k zNs8B+15f|snEdVn9GJ_h@7d$>`TI}ublL`{@k{>ilAQHUqr@{M4MqN-^l)ED`IRvA zyUPeI{i9dwc4daxXALlag~vUIG>Nfw*&8#0jLbAVTzeBu_}$xCQ->bPD6WgjuE@t{z!@2mM}@H?d=iWKv{|C`t3CEr1GG#k zLRXJ(oWX$)ZRsv1o$9l|U|0VHyE`BAqYi6EMY@Up5!&z~N~7M+`2P&1)d>DZAj>NC^)JfO>&|{nz8+ z!@*EyzE=u3Ck0wU0X0o;LH0htR(Th+kv#^F(1OTwqrb&%Qqgq3GqVbat~4%Su#Vu0 zLPJqgi|yUZE+OG`DZc^}AQI==Gqr!o4ra&UU`X!jOn<@|V=sDTcJ^aG6P>BHEc)ay ztqAc8AXQ}qy_XuW<8>oSJNxLoMho*`)iX;iDIMrR;-V!v(-EyWk?FJZXcJ&YZVWEm_2?Xzv+0YpSLH*@$vB) z+Dj2-eY#J8-J*a$>SG))0V>QhwO>7%%n%#KC8D zdb-oxW9x@cR)LCe7)KW<{1#d>OUooU#ym)oAEk=vezN4p@C>^Ce!noOeproOn2fg6 z-u1PrRbkO{dwhA=0>_?VU|64ks9hFxUHkGhOVapBOLcfQGbP`Kd^_mfZ5_mCfQzJ{ zpvYS)U=bP84l=WjaxCp5h>@kX}cCLxl<5iAODC$fQAVmqvM5Drm5H48EBQypNykY-$tx5 zxp`f4w|Y?_s9YN+OQGx}sZRQPeQ|}RnXvdINg4JXtB+om`sV zB@xAg76?80Zpw|p2U{%H=9pA!Vv9XqUvRegEy(@Pt@MAK4qrZtXQ_l#%^nf@JS{1^ zaS`GNSe363)^#<9K^UfoWVDiYRW7v>MxPDuRN;BuF%dKgXix9m$&WQP^*V!xm*obq z`9?9WzGhA>w_sX$PS&>5O33$EdcMyJz4@;3dw*di4{!CRr%@?ooKlC5x`4V}_ly7( zXK&GU{Nt#etw2hJbQh_iOg9=lYx=AFH`Uh%S+7nCrnDBZWqA%3*Wk1At-nmGRe*3i zI@2Ve+F4m%a8#~Ew*g?bY_;^OEeUlWCit{ViB6HSDXjOtmONLc(^dig<;#~ZUeRE; zqmms4oMHmWD`b$@7aA(x>d@(#ZLW<1bILT-0Cr}Dp)&OMs*d$k$^3^z$wc)D-?0F< zz93SD;$)$tLc@61S%a>EovJqAC`M;Ez_<_0O`?vAp3}FKWcY?S{}eukUtlE`4y<8E0jEM7kDe0Heg22Iwt?gKbKr<5M|wC{1fdfJ>NFbZmAbsA1Qwtuk_M!a%ZGm$s4 z_PS0INJG@^RfMbUzY@knDY2~A;(4DY10A#t6P!t58J-KP&QzOJK^rex$|q%}GT*f4 zPPoK@!nQ<{utRh4ydn6(Qs)^Jd|j7Tfa2oN35cNMS=CQbh)+aI_r>W+?WbJNZ?(Jl zJ_iIe`Z9=590Z$fxZYmc+yaLG?UuSy$dgtfYlu-&hp3ji|#-$y}cT2r* zRugw=4?9~9NETbQII!M@bud@o#EmkYmaEfgrvwCD#dHyuCDp!_%QZ4B(r|(#r;%z4 zQiRwkvPKyHCR_ZVqqsWj zPX7uJt)ejvx%+%i0-*{y%!>JSq)10sPZ-xfj4;~T=W1LQshheSrX2R5uG=3lFNrCJ zQtgmMws>ld#0vP?hUTfxA!oKulX#UE7B=I;i=55@D6_ta$Vc2j)1Vt)*W&sn`3_p2 zb+tY7E*frWq%Uqa___dezBhL~SfE}>fzK{w8T;ayWDE`Y!B`*vKQ5PVcZ6Ivtx%&marM?c6NY zSS1OWU&nI0FN?N%HKDa& zqO2qmJKE72dg#HOB+(8p1r4NMFsA^LM~SA8V)}0%3|i*mHug~f=PNo)qSWfOmm?Np ztOw}6nDJsuTMerD)ED&VOZ#T;)L%OxFPdqwDU&2rEWXJ5$yD~bB((tuSjToj`D+5E zZ*`b~Hq-B~yV$3z_L6z&>P0-=6;$rCcXJt*e1`qz;1NE5)4dt`nvsM-6Yc1}%{&ie zJH14ho}O8q3&~x(3mZ`&79(g}vxGxl+=V@oXq3{bML*!8kT4{a) z@KcDnk(Q%yM_Ham`LFEA`XQHwKyhX2-&l^@0L8q_QJvp(QPA{9T$`M!r>tT$<8J(Z zI6s@%S{OM4lz4_qD<5aJU`eG~Fqor@p_ z9d(k7Tg-}B5>;6YsbMl~&`E=uASc-!uUE$#Y;SByTtoNnMt7>uEn$v8kS=+0%5%aH2iMW$8K8i0yiCbA&LBqlA;_*1_ zd;)TGGUZ4=NHNS8`EnE$eJK0T4f)_baipR_w&H8nHAjj+e~SVy3`!ttT=RY~pXe!^ zQgcxwGaas!W!=4?-H?t9ORF3b#cyWaPKvhlrC8k^Zqx%cC^45mzKoV*JT`!+NJ7x^MH zKY{C^j6;gKXT)`6UZdAkRKUAHFQTvxlwRqaQ#CDd)d#QQ^sL&~`%PLKNi&(sciqOL zI5-&0wkC*3#I-0H2y}n)2!SZ!43UZ=-u?V19))b`7vOO+*c?ikZg%5LgoR=;1~r5+R2OWo zj^8g7P%N3VnIkSWi$kF#-#Bp;Oqw(BI8T`SpY)vf?(&V^xzh9W?V^v8@Up|~u~gCM zX^Ii$QFbEHOq2de`q0rx`d&tr%mca@<<*!`&u|`9oVQFd!&j;Y?5^-(;jmBO;n}8p zp#KDn0uT-ez`S_vHjuFCHD0!+@_UuEA_4_Yug^OCsuz^XZ3jh;t+zqcSzCW!JbjICCO1Y8o|Bs-5{+#C{omY2{Nme@LI(?vTGGSgnHE6XO`4a%Jn+gI^=}2 zr^h)7NjM+ckJV|F)$a#X`g+Sr=(5@ko?=NQV)CvZ4UM+ks%!+hB;IZXnX(uSgl~+=k1tovT=zcnc;3w$X^JyqWxHCs zB=$xR0h2D7RjT|ig*M7zgVhsMUj^h`g>+Z?ukH`7QFQzxzE_HV9nPCkAnLvi%*tkR zZPYreFqx@@p>StHFh)uB!(CCXo&P^lIlR(XYuHqZj=FPNhqvj0Kxth(ihpi1G z*|wHAb{V~1cWzBDq1I#KqreJea8OH8adG+ke(clTDOAAo`7upb;hs%WQgR@L2M2sZ zFyxYZ=yo#NU#gY~u@84?@OIcgZ!QCB_ToA9X6YXiO;PssvU)A`kOY|OL1|Ymi8p#F zkDq6(Km2VpFlR_C@Uv)FRKOO;ksGV~{kk1v4vp9ffp5z;YVN2^JdYkxYR|X!!Ahi# z|H<)KVH=p~$2n2^@OO`Xi_~Vf-RE(-uLGy+HA8AFo0Bn$uVfc+pCW2<*~@EhqJc%` z#-=F0r}isXA0Tw9DAMEg6z1bA-uHum9lXvH%zX<6lP1=vlxX2yoe-QaDBOfpRaJZ& z?Yuy<219B|#7(G9&3;26VBj6I+Esy-($3RlbGG^=#pCGPPq|-7tsNf+;i<9J*b|Nu zfgJ7~nf9-jd<_ub>gu>8GiPabTE$Bisy9OV$(BJ<^RYQ_2&i7c5Bs8C3ahGViT0-s z8AJLrO5|BE7Q-dnSGrAh<>`gh^+$RtTty%vEmByHT1P_{u!b+ocUub$7S#oA6qQ(t z4+d7+HYJAI{7D2kAnc2lFp~lA%ny)47iG17Hd9gX(luQC@c817Gp666*zB|h{H~hx zhiCNBdLY<#*LZ(bQ~L8c#5$nYD&4&{ME*m~{F1!a8UTS;i*gg;KZF^TE+Fh5ACQ{= zrVbyeH=v;;!1!@<$v)Hz_*Y_rz!e}l$9SQtL;cUkKp4RQXw*}^E^q(y`Tv#~`YUUZSnmhN6f(6(R~|c|}=?;8X1u{H#OFXM)4z{Y$LM-}TM_UxO4P zE|%2!);!x7%*lWAVld@OpCr{6Tr5@?{p-bl_)#YbIQuD){*V7T`#e?P?EgjFJf8h> zHNu)3DSc_NJ@%u*?VSO``6MmFikLAK*e<-~A=RIg1+-_$Hw_5=?-lZfq=uOuV9SR6 zlaKm1Y8w?mT8c0;WBe~!3Z(vH0yk5^clSTDrqYWBvQjeRstg77EDj0^h5-Q{7%39h z_l1JOqp}bYQIrx9Ayss=Gqtcbfr8QujPrP|iX~U$IywTc8BejD8nb~M?KFb#PqAHT ztQbrtEQYQKtEwoh_)X{~%j+-HWQBgvQHG>NnSNxXZU_+2Rommuag)!;5TA~o-o3Mi z%he<*)T`3mzCJ}P2B`O|EXF$IdirL#8v2W{LeCIZpr|4#ZbT*aG<8y*<=%NbTvhXZ z+9BI)+Q=_^)OvhmaBc^U2|x)=BV1mFUA_>pCx-^yY2;}Ia(};@K{ta9P;lN-5K{#4t(V=e2-h-e0yZT#%Id!e46su7bb+3G#gJRO9ptQi7hxE7J1~9@ zNxJ)1aF>m8@UV0J96y;9FPL4`NIA1@;;n+JnQmkb#m8WTqX^V0wgn5`WUNB78;PJ- zRxSNn=-)yfj71h~h!Q#IA)}3kG$iBd2Biqo;V)`Amu!)EZ^d|19#>cP3+yU~p)W6x z+-6td$;a}nS|;Aou}*qQkO*wzS{k?TkN%3=;c5}I$>=79uIESlCCc1C>6s$f`U>C3 zrehc-41P-pZReRjdWh~4DGY2tW_wdS_r{EC*lSW~Rs<-JkfX$DLEwbC9G7FmGpQA} zk9R`n%dX8jbs$pIsEWrp+hq#>M|GP;^$_%57~kHTyVg=_6WN3>)b$2pT)5=zf0;T_y2Kj7WqYf;B~F`|JVt z>f0>?9j|t}@a~2$%QhU2A##;zBO8xh*VZ`+Q7!X#ZH%&zX`yTK#jzoCSJGg0A@S^h z>32Q(UnC!6-X9p)J`Qbm+%6d3XROafVRi?9<)v?Z^b|j?nC>Xsx$k{-L#9yUYxRBZ zmrEmI`}3>LLwlr^z~jy`dnRuy%*U`#S2|I@3OE-SY+rv#%M|2c#Wl${nSPKs&Lx81 zVDJsV>-dy743~wAktB(Vh!DCuCG2V}s2^ibQl!`F$XZPcWqowKu+ZzaQ07DU0R)K{ zvP1L(ONOdkK9VB|zzgZ&LeV!p_I(0F87{zePMHZ*Z8X~8d!BuUb{7gLMSk0f0_#W3 zO39rGJL&(H87jXMarPPRivN%&DT0V*Z$KL-k z6mbBE8DWXE>I8b|qemsO`fG&m!jtIFy5LWQ-bKQ`d0r%x90_fVI`@t-5*G_9P^9Xo zKJ~MK0K}{qMG93Uy+B)`t4v~ZC{MUBk%^z2JGhAA7>c3PjIqqp1bH9Fg_E)ll)Mz7 z3#nCK(qg*6y1=LfHh;1d2aE8Fqh$9e))Z3Ub*eJ64Lz6nPFqQA4cGGhff;`ZZTmZg zJ+>}d!|QesYQ#6EUx)-Drb1`g6j9gg;=GwYKb3awcSLq#c93>Vov9$)p0D^L9>TJE z>_5C=w#35>Wut_`jOhO!Ygo)8m&j;*=-o+gRHbQJilC0KmBKh{Xw7l$ONg(e|sOf9LkB}9PUzgX0-UJV@?ZnH0B;R}s zva{C;l=G^y5VNK8c!XU7_g+tNO(FWHlgj@jZ^ZoYLIQ!~Q_ClfPsfv!liw%jC(9>U zCM623ijGwxlpo))qUU{~6gBvOSQR=e>n(owPDF}aG)H7zI7U?Eom1Kt&G0J*iZbRh zVuoN>K^y!-l==7b6g==eT0C^k%*`UsF3$YUD(76D?>(P+8bD+rHRo5mQ~)kP!}G(F zd`AB4gsg{5f>#wv7D^fVEz~140sj_nJ835gEAds*%fx|1Tb3}EA4A`VD2K9!ZiZ$O zgA#qK?5lPvud6mHCu{hOZoUnCD<0nKO&*-@6t1+pmt|_1? zq|wev;f(p0`6=uP*2&^((yGbY)GEp*|F?yH-qGHEvmx9;jsBV5h5@GU$DJ7?%bkzv zP)wO*D@=-Se!gaA>LWLQ-Kg;^d&TOt^lR)ap{(_H;CF(b#6G2cLPKmoPzbL4n)u~S zur3xpMjp=8%dnSaFMVEr##X^V!YD!C$CU`C`}+KALa<<*kQB8Ptdy!G^1=55WiWPo ziRDb?8)0P$R{7KHjYh1R*e2Cx)sRVC3tlnGSt|wh(yMu70oj3T!riFdE)*sd)yi$-wbhx`364#t zm54cC>jqxO1r_6|=}loxxlUnoP}JeoSy>uPQA~+U?oSrDh8$cj_}E_M!gs(+#L48L6xx&{wnKLE#JQw2 z_7rAZPO6j!txS4ZhH$M=O+>BFI`x{+beJVZ>UL#fr8%W`^M*F1HUriI^8@oob%*sv zm1ngjH9GY(b)yZw>!f?=`#)DD*LRLI_pg6QEf@W?7}A>e$T%~?bxj~mP}fK;$+4>2 zk52K|sakmMCU!-CmBmly_44xEz0&KG=i+JZ$r}&-tMBgI9#bA=QKgbWlc0yXXP?PBc{AOcVzDC3Uf4(YxCh8-pxPWHJaTs-Uu+%8-ivNpn7Q~{zp@-Et3 zN@Ma#n%dW)WJ7cZ;#@xNlI~`(ZldqHgmOiV)i*R_wN)J^`Q}1(iBQCjgKfj!i|V_0 z8i^U9b%pm@^u%@Fhq{G!qzfgWr~gc$PEU-vrs-$i^=9RnqiQl@T3)JcTfcR_R67c~ z^g2G4#E|Tly@|Ap^?Z$zffIfCMvtpH@N1IYL5%@H7c4LKyB$S;^>2ai zn!o3M?>EBfO%8DnaSXu;;SceW#*&sN=_QHa7jXT4Cu&t>xH~pp7voR$e9(E2V1Rqj zX~0+YTAhzK&!_l-eP*|HqGJR$%|pvVbxxN;SIrT}>+R7&uT%JT^JqzGMVV>ENQr*M ziwgV}L$9nF^``pz@EXQPvb_%n-McbJ2@s}z1KYBM(h6%s+rAc<@-F?Prac#zo#eT? z@O|2ueZoM3!4$k?$CRwCuC1H+&iE?zglT`md`_PE$%SNFX>h5=2+XvUJ z!!TYcxA-IRrL-1Yd%Nl%HS5DZ+qXr^EuUHPs&v*N>us2>n0AytlxE8k z9R|0NH&GCyeyVhZIu%Ei!->xm_iVOoB&OtM8r#(Z6Fz`mAbBSxmorNq+c#)P8-ZbV zLG3j@6Z>rM z$YSdC;S4w4-P}I=RR#}bi3^1n$%m@2IMU&Yh-{Nj1jY3Zo|;G#ZusjrBN}^FEXG*; zYhGA-j^NpMAq7ZzzFmcVsv|I(BC`;#Sgr`i;ZyUotuqva6*#p(-A^K--$lXQ&fQI2 zxNnFLXycHh#v}9NK~xi@DYRzfqg0PttDo1PDF{vEH_>P>ieFgr?73fTw+wQ4bGTW6 zCM?I6N^(kMe)<%+>2!=_ey{8_?_lW{9gLlF{#H8Ex|IFBdb}9yIgVr0YdU%)w2#1B z&Ry0Fr6J9k&3i$7o=y?*ViZ)-lDoe&1#DFlDEI*y`qIBy81dWMYws-U_w1|o*B`n> zjeRHOPMdbfS!k-X9OPc?h<4<*czqLCy|<*HEV8wz+Ld7a$`*r0{VH*2K+rsmAyy;~ z=~OXj2Q`FvY^Z^6Pjy9m$8)HuuWGivYpZedMU8S!OtVSyW-<0@!5f8;jHn{^k^wGW z*QH#D@~}07UfNHwIhnnLi+u)y)N*%ylvW%M{&ojn70)yO_=D+I!W)}z&cTHeJC3EJ zW;SoPgC#|YA3l%r(<4E86liNL_asNV4xYwmv!ydT!44Vs6NL|K&nnVY+9HW330d*q zx)+^F?{J)445SoACDi$El>eY9K-aZ0!M{vvr#n?*WJSCNU>*CDc|YSdv_U#;}%Dt@m1L^Uv>)9rJskgXp&d zgy-KBN*L1@(8D}K8pF8*= zAfqZr8k4z`puULBj6*u9j!BIdR*?M(-iL&brb^hA&$W>Qe$X2hIIv-1#v-Y@e?+fC z9gCw3yL<_Tx`)p}kAZe#9)zBruns%WdxgeLVh`RZ-;i3F+Pv>}3ttZPRVUR^6s;8L z6EPQSloS=;h%)!XlP?ut)Vzxkon{=<(~A~R7Ak({*>EnCq`7gJFFc}aD=i@6ual)V zrtToW>&F`5h;%y5nwOm2Q|hXuCviV#n*lapkLGYlHql1Z7MYjgS>!2eR&TC2hq-ur zUV>bTLW6Qlc*vp7oX%3j>cVNnLCDHfUs;n|bx<`>sotPpYf*_(g<0igXRuq@8#R0q zsM20jb@o{%#;PzSb3x}Snm^tvp8qRfARl3ybE|&L%hsDV1F~Dm%#5fGM?cF~bx;uU<#ID0qqTQX=|Y41P?;aa+^z z<-;5Rh8?*2uCVM(n-X4&3 zo_XQ5cK^wGkOEe`dU~t=W#*kp_fXLk+hkm;+NoLCXj4W+;gUg&)2gD4CDCs7DaBfz zU4r0Dh3;$J&2~yRa5GC&QPquE$zE7r{dkJTypA2X73^fTo37LwCf`z?cN9&0Bf;c} zcI9!A!Z+cO#ft^`%Eih%##vL_SiSpr__+EON&SVNk%i>vWH`S794NRn$#m{78)@!P zrA5${8gSI_ef`MJVI*ztj&Gn}7ogxMLi2PYy89tSLR(7OTEpr33r1rW_|7k_rnATPPtj=%)l04#ihi39%g7NItt~{rhVxgum=+F(7!JFys;a zJ*_{A-^)~$NPfQf@-8X+b96v8ltSQW)^`;mib_n&LhQnYV1ll<1lY+bwLP_VJ0Er| zcA40A*pQj$*-)9aSon=DjUW1z2a3P(4Ae)Y1+<0z2#ATUNuvwb@xSFA<#&D~UG9U6Mf?VDG0K>>mglFr%ZU^;WZ1qg9nx zJyGM(h*2_|lbehjr=N)b$z8gTb2a7nJ+lAHaN>mMPt|Wj<9q{apG*p-I0>P3s9MM` z!QtjqKHKyO`kbb2$NVnD$8D8wc<8`n>{6MODr2nWaZ>2Lcm)Bd)W5wC1e3)jV1F;MlO9l6IW5)do zF7NrBA!W4(?w+%`sX^R#mK}P0o^zT0M+4vMBEnRLYsxa_DHm{$Dik5qY-VyU zG$FyWDtZ3DwgrR6Drh9+JB;u;?)|yy+|;+7e#}hSNr!wMye0|q70^zCT)e-S(q4y* zEiVzW>fH9`KRWn9?LZT*JX;e2ig+VKv{oTJNqFu|tjUjrD=h9vTd$G3Q2VVt1Tkno zmJ9j(G{ZuXCf@uyAfzYay;ZAq$ePaUa*NOBk(^0X*b4KnMzm*RkHS)_7Q}MU7!b=h@)d|eC z3xnz(dY~EocZJ~7_e;W5=eL|Q9cMc zPV2sk_v(_thz*Sj-ZJNw>(TNfV@% z^Kce7ZdE*Un=Cn8Ql@6}_Haf0km#%hiKu|Ljiiu&!|N01ZbNEO#oG6X?fY4=@aX#x zjVwFMB@n@SQY9$k^5l|z@L-JT`oN{TX~HqHq8(H@^ft#zapzDJosaeQtX!*slpGcfz4 z_rRUOmISp0PIQ8EmHLJVN1aW7LM4Z5L!(FEh7|B3hb9K4h9-oy z1TTfGg-U|V3^{?bfj@$x0xkotqscEYvn;Z}zP9iE7*AEu+C)nsMO@xpp27fff7ReE zEjx8NtxgG_(%{@-cJLTaaa+DseuBSyWp^cbr`{o>zc?w$qb4tQih_`4!PJXC=yd@z zZtFgUn*y7IYm?UqO!5UF(kr28z z)(Mu7?G^8lvR09x`?D~_Sws@t8(b!mQR+NiEwyshlZ{$m3RN|b{N|mZoHc_4_=gP5 zg}6p;1^(N7c8$8NhNjw@MS^w}*U?RZ^0T{Vfk!F)@DZQAFd@0OtNu6JDgn=az*UIq zYqZ|nPaCpdf!7_9Uj+5D9Tx}=fDn^-*zWC3O;OL1&O(MX?mY_N#yOXdr$g>SWMqO8 zxD)n4g%4aeMwcJ(hCA}KAGt|?yGcTvh%jGK0%34Zs z%LTO!9vN;IuC7j&BYYp1BEi2FxT?llb|1}sJ(|kapONjr_~CVv=pGA_}qDbpEf4W?@8TltZkim-1*6VKfwe1ei~*XBmMn| zvlTy?rkoe}r#nn6jLb~`nHzYO?`f1r(Zb!tT2s`*#>CbM z_=W&C3n$<2=l^ea{`TJus^>y^vzk2Od<>e@j4G zxWV@C1(8O6fc`juHQ_+|FGcx52~%YJdqGhCSV-uq`HA7rF#at8pi<=j7pt_o;9BxO zdg{R9ay>}AyIin4YIZqV)H>bhU)MK-{;diBTCBd8Cd%44l3IT@AXn6IG@bu+RPeEH zYdFPZB46$$4i2TsV0_%qpNd6FIiG87!P<`7BWcmuY+x4Ohns?0^Re$fV6UsAy7(g$ z`aepG{0vj8n*W+o?Df;myfIkW=#M!apsVPUAn|##y1IIT6Uk|(-}C_4-#ltLj!)Pca`4Bs3~7))X_JJ>2#E14(da3!ApYg!n}{TSn_jN+%GI(jF3S0m@8dn} zGim;?dfky!ZWV#+l>phAYns0^hxhnaxQqxt&;(yG=zYEU1NOtkdtKV`8p#OC6 zZS%T{&yk2|ma&^DPg?}t9x4|pQ}kyDb{rzMfbKmP7}R?aSU>!b>xTJbM<8+`#a+&Z zk6)jJOp;mge@+~Ee|xnMGF=r7C@Kb(h^I^F*DKsc&kIYB^)OE3<#r#I)5?#3nLIun zV82xlsYHP0`(j}v!e3Ph=Q^F_h=+Z}ROGo$z4E-+>+cH26w~37`pX+u1Od6vGrWZ5 z0AKKzC9kxcqP?m!9ZpsQp54?}u+B0c%MLb?YPjdAiX18R+#RRLNIN^2t8v`<`8p#` z;oD!Es_qv*Fp22M_auiAd6d zUIB+m|3Lpr_ved%S}z-Ksa~gY-6mzzp~Tl6cc&rN%k6roBm&mG9;Tz|V0kXp0(qh_ z9JRDpg_2QZC)jZ-u4Tu#2Hvmtr^{+8^jci!oOTWx;^;Ku%k^5MJ3#m2);KzC<*Pq} zLc;Z?Vi|PadYlb&9yU(H(hbP}!J%@5U#Zdph`-mr%gFqp3&CSIFX6z$X=}*B%yr6; zl0(WY2<62KDF+O>l(%ttG6_9i-r!~zlf(JClcTFerL1>wyc5>zJz@NuRE?$Iz(FJ+ zYNqz<>7CWB*MEbqU=?oOAb%s9^d!A?I; zXl>fIqDqt}f9WiM;jo%^PN>k6=#3y@x>|IK@Y0dSsW$E>Ri<#>p5kC`*W41 z>wC0@{oH8C;vZNk7-Yc!rl47_SqU4C)72SgD-E29=yp z$T)V5rIvjk+no0%X_6?So{UR~v<5&m(@O=YnFzr4jQxaud`7Y1ltBq+=fByAsdqVA z4I;rIGD?b$rBWcCt9&n#Dw4u(K?ZnJ1Gv1*Pl5kJHI+{aoT+C_;(OHBWREOqCv~W7npJh)o;2Lh~7g5QjXo^{9E@cqox2n;|`2YH*UVb(H@qb$iR0 z$%@o>JI!qXP4LbzVhTt9aC)KMX2Ce28~QKHJV+tr{IlcAuQSU&5=in$=|G5m&Xs5k z+FMZuK#*Okv1wjkX~3DIfQ|0=gMQWuAFHilq{*yP0OdAcanLvZgKGhrPYyOzVZZbDj>hQa{aphBBC zqW6b9IVrjb5H;bS)BUkJieHfZijZ)lb6E)TK!o6fCXd^Z^|J=6@+R>muVa4RAH`SU=ej+d8R&8GO~^Dfiema57cUl0KT;O~y*NW9Z( zJ(?hGkV#<77H**|D}k?^(f2BwxwHNg)<|i9a4e-nhW6W4lab65WYU7w0%X!iDP6~d zt>T#s7)SZeLT$cH+U8dEa`XuF7)+OFR2-Fm4X+4gO5<@EaE)`h?D+K^$@MO`CO!Um zo?x-cNFqtlx1FILFu&8=9%1wwXO$^3YZ6iMSw)@cHvxIg z&19$I+0sON*ev|GZRXjv>aCN!Zx0*rm(*1Db4J4#Ys?~icB^fjq6Iv=q~>|>4CY%s zoI>?Dm%W`rFKV^99o9Y_p6-P*FeqmF5&bAK68#I-a@7Gf*L{5TWSpdtme522g%R~X z6Y2|hbNFi)3gM&EV~$_(xhpoi9ow1-$n83js;Fq8j5yz|7b@j!^hJ?N(GDbeOzW7A zW-3!rkDIwd$Sf+0YgTxq!Z?0W6grE#OZ-kk!l1wgDmlGXzqiwZF|*8^h{rh+Sf0vs zg@Ij5r>E-}ALyjon!PfX$BP%mX`*n$h0o0xed0RCqyZ! zhw?&6 zp-UDrv(far?Wt|?r!aNd6t3Ms#OvDF^4&5)GtHWsrk_UPY!6SxfYQfyYj3I~4?vDf zcAoSax8o&;W`f&l)Y!1!CYlQ4?29sTJD~QogTfS<4DLLa${A=yQqRcJY70bzU+y}l z0nxD|mD{N&U4|s+bUVXWdUrf;o!{+%CF{iG`~cMKb!E%T$VS69)T5#Ayq`^>1YgktzW=h)F^A?8z|w1Q8~SRH(!`0PK2c z1lFL#Wzw(JvrjwixYSN~;?d z9YcI53svrXAI?awLMun!F8zu0lHow&=vdqL+Z|UpVE`1ASJtaGi#UpPx;fo^L$sb` zF`UeC#F*XWxET!uLXV@%<^yi^ajVaCg!}>0;Mxz}Uq4kBxM#@2Y$Tn}Wz<91%?+=(>^s|1 zC*jf@rloks!nTb$M_fm*%5)kA0Knnr>~1M_nb;QNvFQ4OF_NvYe74bVL^7N$TutfH z1SI%Id^p_&*qQcE;O3wClC%)Hc%i@hDRb}#fX=s~?J}8&Fe#Q`zlONZWmpObALyev zl4dOpR{*Xya(cn%AB6?ePnK#;q=`^X&(&JQc!3wbSHAzT$dIyh>18`{5@XkP5$_cf z?l7rk8WYVI0M6ZrNGkx{Ls&-my)LcsxgCuUtV_wCGBDD}LO`$CqQ<|oTuCcvz?(ke zpzW>n)^7WcYA{?%c0Dm2qe_%IR;;w%yDQ zmpeK>1qHRtqy+*9)*quFV4(w*W+Ua%v7PSq!MW&guBN~T$JD!*nAdg4RYNYMw-_0Z zS%}B=U`JpogsFX}hOzM|rtXEk5orz^%=8t zj~+d$Q8*ili^T$lJOW+IG!CI@97UV=x^AJXj+-)TChCEFx6R^sscmfz zWwz}Hy*YSN#B5?pHBTDbi$C4J7W_@{Pi*S~(nWkUS^Mn~W}TF@bV;W2Xr8cV0;AqyLEG(7i^;)k)sYL%Z*`(eO8OmDyx6To z`<=N^!4P!X%vOFAAUR34DtBnW>2YJVDVs1<4rq_%OR4s`y#)Td;tO~;I`91aR<8_X z|D5d;-E-wN1(ryw3w9k0fd7Tm7;vf!TD zxK1oK*k$an5e_CW**)VpjNjuHv20eQ_K!*B^B8v(nTi+lK@OUhqdFTCn%k95>0Fgs za=E*9&}y(v^Rd9yAzCP^Xm;6`22d$l7Zv{xr2MLb63(j#q#=1y!+%Oggb&co9q=c^ zo8$i&1a2DunG-}i@$}ETe?9L{3RJ$xYbk?yf5PT(^93&L!0>q;^X4BP`+HOvs8jK~ zI1^+<{`0k`dAu2b>cQ&$z#lmM`_{*&Lj8*!W5VYrH~o7GgllcT%<-A;vA;v)e@0=S z%4+mwUB=(Z=-*SI|36Cjae%bFNtfWC6O*o30j0eGl8V5;M$x-~v_sh2sfqocN)qav z0fyUg^1c5xO8Nz;*#oY(94-E_Vjw6~0K+~EvN!)46+#Cz`aebgt?mCQ`d@bbKa2j4 zb3Zx%|DfZ)5C||8|Nn%JcC1+-+e0Si)09u;TD@QgD(0Ho2x9&{7eT^+rwWzph+HE| z+TUU#UkVuB==6Vf5`PXv=zbLiehMJ$^z;!`-Iy=PJGmR0`>}F5qgDkeOmjKg|x1&&{##BV7<0N&WIF!2cYz z8uf%meZ{7e2I}VxAbY7_zItLjfmC|#*xRkgSrhNAb)tD>{E`&^IcPs(tzQaq74ra< zCD`1X;J7tJ6CnU8yjy;}&)8`rgDU`HwEdDsIc*qR``xKbe+*T`!BU$}t=bt-dM5xZ z>Xnvr3J60kl}qWVnm=y4O2?vA?E}4*%MbvkMZougTRm#L0anZrD0n;DBV|>+ zP(0-`1Z=!sT7)Eng7)3&V`@#Rop;AiZ?-cA$Ma+k(fZ1?>#7Bj(FoZ4`r{do=amU0 zeK}iCdI&EiTn-Q86dhsC(FCqZv#bx04E+zFb=``?g^FR%6PAF?R4Sfc>m`FkK>vK5 zmCVs%Q`^>*!R0_K&HBU=AtGuqoP)J9$VY@T;6h=r|YUgR(a6= z6evTM;a32TNjLjS*vn~m`JH7EI8tSQchcYO5hF8i-1i8MLy8KTNMo$s$LopG1(+gT z$kj1ZI@mDPatAjUJ{mGEyfcs8r!@mOO9QW#kvBd!p8p{1*N@GEtp}>{En)~r_<|sJ%iHlvz|3S_d<6h95`Cv(4o83!>)9>l_B=N)(x^!P^y!o2w~uf( z`-=b*zJ^t#=dn<0VKiFiduO$ux{V&e_1t*na|kZeyuMKPSL_%VI7G=HRHD{!-$@*k z?ewC6d>jVt#s&+knixJ`EZ6$o>rShRw&`SILD1q{>cJJ@VY9b6{;5mP54&@J81oIj z5(Krj=cSs%-1n+C_Mf9k&bSe$~)7%fIb?oY}CrA*|9v_+XTQ$$o z4gmB3UrLT4LnNBb#vKv}CiwumQyD1C#z@#)tU*B6LhKm=3KXzUD3hJ5J%o0(+hfxW zf{%9-uD#v>ozGX(qp9EKS+{8a14X&r+vDbXJ;JC~Xcv#u27A2f69L|i+jyhH`T>XK zO7^mEGJE5g2h{oP-Z1BKt$(uobo9?I7Z%!8lTCnYj_i1V3=XF6$nk)bsD`C%_rNU; zvS~b1^Ol+AU@?H{J>u%dV$h!S8D)R0=y;$795b^;twJ~e-XZ*CF;loMdG`$)g>>w% z#kdUmWtDoRq3V-AYH6oS3I2GOX0kf8!JXEh{gOk7Kz@+4zuMO%F`krd*kS4 zP=n7-Yyvv0T^Ppo^J>|C2jEZkP(s3XzGy;=^X^NN2j(x@O^znn9{s-<| zY{+B-un`XOw#}J6bNPfrjl8!$RmS}O;#V*M0@k4cM>T@QFqfBu;_v5e%RF`n`Xn0 z4lQ8VF_wB>x@XT(Yj5R9x2>hm?|+~P*7hcHzkE4NXaHY;te`_>=bgcXi6@>!0+VX3 zzue~24s@b?be4gQN>qC#?!AnK}}= zrOzS(EKvnNWl=dKX=-#C2$BRX)YAb%K+z7u+fkudez61PbI51}BXA{^$|!t%avR%i0y?MiGFNrUoE6!E3z<^lGgk{1~?wd3v zjTp8b4w~LbDtp+S2Li6EugUrTeh+Ahj|KUd=kApEIb>zy0ALumZ0^XK1AU(l+SoGO zzY9|Vboa|SGm}m~=*t%2Ai5orWk^u%nS`gbe38+tvu-xLJWo=n*;lZddVpM7A61ba zLLEI(Oo0(n-M;0)F&ntT#IpJE;r7t$!1vr+e*9SMa1GyUyt(#H-+3f%-mR=8fA)%! zVZRvYEl5Sc8yCHJVE;AqnsdHPv$uCIQT|>@r=^V2c?=Y10C?MyrpA8&TC!sm6hE^ZH1b_9hBKk)>T>3-p6lZGb0R=&N3 zn#3U>x;1IWpsXN`Se%c}$NbiMJ}y&jzxs|BB>yM`bVK!0U)a}js5}du6;6|IaKO2c zkU`OOk==n%d2yG8%kxK{mb*-0 z-SYBYdMXc`#!Cc{qflR@-<=LJT`J)(ol>}@z~@5T`~|8-dnbxR!lS09*RCn2q)HD^ z2;Y7QznJRJIcF;tB;7HS5kyL^%^Q>QBub9P?6gd~=yf0}x&4mB^krB25DRx3jEqvf zu;wLTe()^yg=gyehhsDTN}$n;d~hpq7(y@o58_RsQfPEd3*hvT_<#Yk`EP_ zT}cj!4j&~B$%01E!c{*U*c{8cfWp2FItJ$RD~sJX&B5~mX*arxKn`M7awFN#{qS0RKq)iVYpPp z>bxdb5M+L3O0{c7=Q7$ao2|+Xq_1@YC~ZIxQOeu>A8bJg!JB#QU&8+U@em zv6_w_6}B6@PHO}!VF9g`-iq0%&}6bL?p6dq?2V{)D|~>I+fiNRyu1Tz&mHUj`vlE; z+x;k~7@QOO6Qv>4jA*VQoF=C72z7bc+s1}j7_N36?~I&PkF(n-I?C?*;Qay#>1@$B zp-xOP2L5YR%)~gBR}1Fs9(~z-adDY}{LH)IlA1otQ*Z0fVEavyf^G$k3ooSrX|@;P zydmit0pqchX*TKs8=v$c2yq)g!z>@4&9sZldA`1K)HaP} zg3h67kr90zoQwg43jsx*-EiED!18Bq%=JzU+C`Oi^0^W%#HSEd9%<(b2Km|~TRV^Y zLU~ysM0!70M5b0uwxhx2B{>Q!N9Uw^$K)HI^N@P^y@((4hiV4tQoZ?mjT0M`9MgU( z$5rr;1ev?bq{+1$KXt~{9q*C%?k7h-KBQ%5P?-&tfzr=e%y@9z`yyXovgJh-1v6Dn zm}&2s=^ReVQK3y58VEj@OJ=W%&^l2^1LtQD+Q{8+#3)$1Y2GjNv+c6U0Z1|HS(a|( z!hg0~El{JQqjkZSqz;ZV$_ThX#$onFk3sKKg)#M(iM=7oN}Ha)+r~9%&J|5^U~=WB zh`?g$xNaUCKmE|*tnKph$zc5+UnG(|%Nb$|KFq;PeRNBSC%F9r6sO@`I`4hx^(XY02VgOdj-O!~iY1-cDN#yRM$CPQnk!9QVIPpi!7}+Mb;Sa?#_%Z( zFgx|m{jz=8u;e*NY}TyS%(=g=EW7fa#Ar=7r9CBnmU^Okd!zsqs)b#x8i!a$j8=ObY$vAb?Y(f5d3cKWDvQ||sdTo{8mEN~WO&ycIXXKaSjpY1R&YLK2UPFKcW)O}JCQ$CQfo z7n7rI@A^Zv!dYv&dOYsr-t|^8Rl-)Ay9_^%|0I)Lt7Q~R{VAz#KDmQHE8rlf^sc@;fk^uo0V6Rsw9bf#0|34SwPo2QI1ksiiOU@4{sYX;pqujQOTpnw| zRMF=#l>T>T!2dPS1Hiiq*8cZdIS>ud z1Nh(5|KdFVr<4CL*-0dr$x1Bmubx_G1uUiP%}B+bh{^Afas5nsOzC zcLIdAF!6L&bB5f;@V&czT=^MqDh8vz)%B$o2-Yy`Dnp*TZehu!luVOU+A9zaZp23? zlc2?5tLATs_P0@=lCPk_#6?`32U#jUhaH#8+$C4qb!C+QGc^>g+;F!Hbb`mZ(yX{0l7;xtz$3iDr0Mj_I8fDEKZSN~Tx5gIm{G4deTGg_aW z`|lh6wYZJmtJRY6^wA6&>cOGRY&a5030kAc{4( z#+2o_NnIBq&`h^h>YfSfX$csHy}G^4_2Mtv^Vl_R)aAiuB*_1)+LxljKE0>v*jpT{! zby^WSMv4N?^r*@<+&2nIi^`eW0x4#L+q@KawYy{srIi~D2@;pXe8vqLzOB(dg6*fV z2M?f*Plw!0O)^{2WGF~dV^)Y#4<+vo9>6MBAzNcu*!UDW9g7r!``FALhzmCU*Q1b* zw|%7*p!jFJsFGFH-MOw4$;yw-p2}6Gfp`f{#>W^*mp8I=?XE!^Luo3-dU|5EJ7D>* z2`+Fa@>^$Sql4#6rv3FC3v&A__%vGOpK5oPvvH_m68wha>P_?#GTNq zqaN^?mz&ie=ys?`YtS;J9qr>82?EE|BHMwew7Wk%_m{(wCc${+6UAa`eB|SI-tO{I zE|domsuCwUb%htvd%FnJ`07~boVIHPZQS{Xyu@(T3a95`IGf`=!M?ve{=C^#$+x=?qU8K8wD%_v6<|8>QYY;0#a=^6^D-D!rZe`CzU6Vb(FK$t}ADW+B(oiy!^3V zQJ+=M=uB;d?q*mjh%C9&+ONsI@|z@pL83{S%8Y9RLP^+_{8z0E+O=Ab^jbk}I|oo!R*o807BO_O@t zVLI(iplI2?m6p=amcd66HqXJ1d0|=wy^)BqzG%XOWc8?PrbG7}#4bdj?^_2hoyjbV zmWNpXGAC5DY|40-SjM>h(*e3uVp2Z7d$` zOxFNDB)wEki7xxbge3yc0%GB_;G$OZIR#2ZhlxXv4J!J-8nHgX++c9J4fzSva$vz;Zm(=B5=p zn;|5b*z+FRVmzx#ZD05(EW zoW%du%X4TV@*<1t8)7lf*x z4Zc?P63;sP)Qb;);SwEwQ@T`HY_6gCFA2P9?SJ0=Tm+7J+RBwrgc8z>`_KLI`S5V( zE83p^s=<7MT!uD|ZN0`a1ldP9S9R!yVU4&G2NwNTu+WlY(|WAVc3^S!$&W)v_f?kQ zRxA^ zT5m9I{RKSEbDaupY+Nc516$$V@o8v@L-{*n0TEHcT0T(76G8T&UzTqbve21s`|+j< zD0?_hx8;~h*v5Q0ySe&{FUTN}UCt;I!7s7&I7XaL*og`Q36&bf!Y8JgJ2Ok*2`G_@ zfBnJCo&R!1YVL9HeUmep?xie0sm28ZQxoF$IUjvy0+WPpJUXT||AV}s#~N6|%jb1!!^{w~8Dsi2|O;fC_cG;MfFiswj~YGG`$)CiMC zhugD$nj>VUf2xdd)(0}0+-(IOG~qjK;-sw11iPl?x{Srk$;}69RD=PU3_f|MC_n!C zh!)1Lq|Q(HEFtLv1{=DXLpG&(ZJn_ZV&@z;`MVv6mn?pHAdF}J$y&UyP1rwU2`hrPFq zYU|tHMhiuX6e(6H1&X`77k4P`P_($amjb1@yR^7d+@Vn1iU!x<9wY?I8`}QPIsbFV zeebyP@qWw5*x6ZouDPZ?^O<{HdRKceTjRK)3a_)W4rgl@LC|j$$e+%+JAL*=Yo^Q= zz{Q(!HLuWG4jp;eCfHlG&qeeB&~x-jWJ)CG&1##l zQeUUgJ5*ez55Yo~768d9{EYtKCX1yR=B9vqP$QEO4FYO~>a1U7KBIbxkz|eexJB)L zd7S{y;fpM@4@1Qz&O>>4Ty*Af$Z!agN-!QnU2Q%k0=vHPIYwO4f&NTWbJhSlj!I}+ zGm}MX?)Dfnp<#P&rRYur8_P){H+hm?Ys;G|PNrl^80o?j&X2@=teela3^=p{kPjk) z98wC6anN0+B|*xzYw<=6?&NID69>L!4;O40d{zDG<9eXiXQVi3t6@(LyG&O+Y*UR}zGdS`xZ@h&eNY>!xkCUF_Ib-(Z(b$D9@+ z(RG68j6d||`D%q7yfk30b?TMDcrm&N-Wx>g?F1H{1UDF+TKp?hKelI+HT(<{3;q;u!b{vr}fJ$XVjlc0zVg%pY$mlX#zK*z0 zIZ?6C&WGOjnIKu0F64e`69F6WH66t6b$kCblsbb2kqTS8PKhn=TI=^YXnZi*dW7vI zEG;XvYsE6pk{0f#2ODiRNy3|O?OVtBf8-S<2#e)!F)=9wdW*WETa*zlS$+P;FeO)#i&ZK-a_lgp=TU8x8PbylD#893f8ZrsmlHp)xBLlsB(NqRQT*1zfotRX^jA!nlaElo>OgbK4jucnUuk6E6#0(E1ia zO(7mcj+pOWt2@a1vHTg?8beSQv@^fTjFuqi2~O(nL~s4U^U9&sU#gTbY07Uc>1LR# zVJ(HNC94^--kJFv-PT~hM1n-!<#kQk=4OURR(spOx+#B9!HOvQ;DoZleE5YId|Nbt z-VD;bg3+uAtU|4LMR^7J?Mgp#ym+1oL)W*ARJ6b7W+jA#T0h&)(}8Z;s35_(YmaKw3+wc2pd~=dPs&6X@H6x(lE_Th*5YHk<2#9SA@FBR zhWPLZ!)vXJ-W*{D&_NFQ0)Kgl8%`WCpZ@UO@2>X8n;~StVv6eJcQDu%C6j9teBqUt zPBeqvT-9nbf4(|cXj8sDU#($wDHZ3)Gfh}2JNR5%+N-N<`_(1HjFa@4lw~< z7VAlbfV;2kY2_F=g}y0+m2)D@1Rl^P%)t`Ge>d(2ic;gh^!p(Xy-_%fBT0EMKBjx2 zdJ=uXi?{Qc%F+JPthqvITpL!(t?$CLp^=TblJ@f>8UmA_F1MK&Hc;E)EE^?kOECYl zZ;va5?H~&**D+L?`zN>mCw%98%)I&H(EexU-4y}r`X#{S{-0&JXOxPt1l39j_+RAO zQ*u~c4lzXW?>+yiPs!rI;)`^g=D+&$cO3|AFvf@X+xIwsD}mQlUGbKs!`=S9W(WBM z_l@z>=t1aMyq*&I>Ns&Pq716|0?b6I*dcV5BZd@tojRF&xXZ@^AK_ua$Kc}0c$|jK6?H1hOOF@o zNSg**>Hl6Ed)8xgz~Xh|8|QL&^#Z3C*@j3W8hY%oH+Lki0|K~{X8wRij2uHX@bz*W}|5#yL?i1noT=qZ*5{4%3EW`OUmJe}XNpF3oY zbafykua$D6n~<-;I{8x$AAK*8Tm3Ejtw@+67Gx+m%k1PT%KnRx9$05fSNkBI$LM0a0zVdey2J69C>b{03K+ zkj!!|yRGz~8X5qpO)OI#o*(gffk16&RWK4S30(d`9&pFxJb9{|E~}D;(4lTl2vU#+ zpDA~;N_JB)w*okZ#cG0w81wcm@Lxe&#^@e2vPM)hhPK6^_d>P$6|gkXd@sBex`bi< zF>n<%a~=|d+f*8kYvUQQ3ySM3TKJx<*C)gj720A!JAXeFU?-Z^sY+4u3!>x0l+iCt zm7?@IvHe`w2{~y@(%O+ur$$cZgB=Q;-r{^_$!e#sOYC!67=lQzxagK-Be24ZRv49n zhh{k(9R(elgJQ%zIQZlBd{|yZD{#tCnygV%hWOnyetT=4h;nEda2x)demqBjffVz| z^6AhdUg_q8lH=%|!;AbjXjBLCehNo!iO@n_=L^yZcZ-%ICUE#woQH!^}qQ zTmwjDMAe$5?^X?{(az8qxz502?UgXD=dO2WK|4cnnd`V736M_POEKf%-);~Q8JXC=$nANEU83FI@tig*s&_cCF#>x% zY&jtQ>NpMw1oacsUy`^ z8ck8%Vr^M~@M~fjIdxGz@}ef;jc;al$C1K7)g~LSaI3+kdgXlhb5hC8>cm2rr?MK7 zegBV(GK8n$VUBbvO&|#}$u_~;`q^T%3}?P|TqPHhVZ_=)RXdoD+0xHD=0X**=P&yO z|JPZxl5XtCQvjT_18U@A|D<4qjx0xg|K~3bRXuFJ0nyJ03TK+ zId8}rdpV!y9P1dk;L|gC4Nu3q+yYv|<+xAGpAN|vVGK?rjrH7pv;5>{Y4#>mbSwS& z5=Sd@`hAvH{(y6<5zPkVKr6+x1f(UK_GQL|IAv}~qkhDeomw)Tt&*evAxGdgk}VIo zr}t{8%RXxB`ps0Gl$?53?ZE*}Rd^aN-3`~aP~1|pzNF{2e4PqAS*!w_f3!UH%lY4) z@uU1&0?PwUMx$*4^2rGWtc~$mWds_iY(ld)iDE&nlKiXl*|i~fMspXGX*QazCN?zf z?4ZX{8U|!A8gJ0wHxo*)^I@cWAC%dAKCrCZPECs8rGk;_6dT;agZ-M+%M5A{4gw3G zCz}mtw~5f%aM@E;@UPMZjHJ_=uK2z6&ps_dxv3rcDzs?V+!AA{(GtFCz-yD;_ia3H zdb&7zz|W9qohGC5d%9&XVPDIFAYP&2b1nY|NiwF#Q3LXdL%*YpXEH$$w1?O?J<_RR zTBOehZY*czQrbIm(lUz@^aa<`CDkqMD_n0K={!sWp}9Tq7=coY+|Ujb>B@jsg)9A*lFl9BuwAfE7=lJ7k*b zoil)NLdMCS!&6m1{6eHeosrzG3~v$o`_*gqAd2~|X9^iK&JQ1LDtu@xZAQQuc|MGb zmYRJ_!$j~_6tQhNNt4@|iGSIVt0=as-{rIYoSFnLffUYee%XVE9y|kiVim&Sd>L6gb_-CaAWvxM4~V}#VTl0KU279>{l&}`%WuD^V`dx z(gfA+j17Khc2@SkIvnW_gr1P!%Z$2HFAL6%>i*OtoD49dYc~GVR zL2?Jp&fl6cLM|-4f3m_^8u^G4t@#AuF+h_nkug)UTHS9S%rgHnho3yM%GNTZXMStD zFN5RmDYn%j*i>=raob*Mfo1v3gP{qIdDNVX9!W*9~;k24L(ajh!CTPjFc zcivNPGGjyhzl4-HP;KX146@l`UK7rB=1f_RA2oBOcES=71)qCDw{@Zc@P`1-Jf)nH@r-n1;oOfyo6t;WMJAiQxj?kuH`(j|N3rN;D(i1GnYF1LgioVCbnWKjokQdJHn#^)QCQ@Abt|7-tW}D$*Yz`SSG#~JlH9)S z?Aknw(2~c{_4>|?bCSZRx^04=mV_c8#bS~=xrGJR`6xcuT#;`82+aLx6E>f zMcAh-Cx}f}U*t;`ijDxu{f1)Gb9>$II-y{1#j$LDO91rVG>Y-=eH<>c&VV1uBlkDx z0&6Z{EmsgaSq7S;@GCGsFWhsI`&-HgG(tfbIjrKk(TbPz6I=FlGpeNKLN?qojB$9Y zN6V5{`5i*PlXSOTP!at~WX`_U20X8s@JhZ1Y%MS2HRU#l5VJsDQZnnJk$(}TrMn%e zN|8`xo1*G&lzWe3-eMK?2zc53KLxz@{_g-@iHtfmb5EQ{K06(=XNA-URMu7`JryA$ zdtTMKU_Wc~OH-D{kK42EIc@zCGp1|&%J>Q$;um0wpB`A?4|ix%ac!elp51x62y{7z z;tX-ViM&;AjM$X;6Sq;4W3e(Ng;*2By0qr$jep@}r1cu677nG7zha!cn7U~=DAoP+ zzk$HUF6mXLUBiSrx@Yb+p60%&(67jJw+y_`tB8KDrAwkrENqqXD$Tg!6AFC&j7kEM z-SAEYmYvS@#jgWQnFNfeqZ;?1dtk`8T~`Anhe@?g;hhe@PujA_rV!7-z4%G}Fsve% z0M2o(4?WgzRAUa49@t2eKkP_0&>N_Q5rtCpJX)1P!Tttyh^{{pL$R}u@udt% zK$qtYHnw_GSq(-JG>6UL4Exsfx+D#wbbfHVED6b3pn?5rS%u)Nb)Y_L!e#8EMnz(! zLZ*me{gOrK-urBH`o+dl>On4lnww&eA5C1Lx>Uc+zP1d_7Hw=#bgf_u@Y8c5Xq!WU z8RnoxqsFnC66);GiP`gXa2w=Y&UA{u*gy%uWWW;4FU;Zv+KLOcdzGr$r#G59SbC-F zR$h?ciVlp)QEDtB7&mg|9^Tf_%m3K#T|v#YeCrB1Hz#ccT>2`tw7B_MPIT~m&=^Nb zT2O7VJYRYF*1TW=&or{05nHz#DQP`LAM>?H^=57><7=FK%Gc*Q?e8@vDytuhf@FCJ z+_nq0Bm(RT*@UO5)8+4&@=AYr{q=`DL4kV@>8f#UTh9G_bz^861?smZ@mxh^8mgr4 zoE;zDO+0bA_o>9M=0!%l{*8J8mS&&v@p020l#jsGNF!np{7K( z^>n{J?y+(efb}c;rBO_pF{uD(uE&qc<5>Seg=rsFfO|`Q`UjN+e;cX8Q7y?#`&82~ z@QX%LP7(^i<>Am%Wjek6#>Bn@cwXZGl`Tns zxU(yKb_a~7p-AMq@7Y_nuhDQol&N@YTWByFU=eU0apAzDf>cLprptsW5827`GhYsftNBb5Kr%mqc4I9#5Nho2cvEJ|n9n^r%NWHtM{y8wQ{R ziIq`05IxXemdP|6&L5ufGaQXQZ;drtS7vN0bf`H#zDRE5Ag(s{u-{y zkC-l5>@57wXikNt!cO&Em0ID`h0@E)z*-%iIPnSPZTSOwvl~#hw|~vj{Te{EPUiEH ztGFGX<7uM))D2VGU5`jwAN8&tz3Hm#{{|6j$#zR?y-u2y<6t=aACTB#&AnT@`}u-u zvBaB2ddOAJ$sq9Es{9^mfmsF0F()2}!Mmzbk0c;F4ydlRwqhjbI>YyNOd?I-eTUh2 z+^8U6@-j@NHX~gQ<(yGY(SYR8ESNWob?2BLET0{_eF$$N zC9X6Kxfbo(f%myOPT2`rC#yfQ#inFrAtbW$~DYp zb_03yF)_aU_11dDwOt|~5AVDK!ld;g@_f-z-^qc);pE}oZ{zi@ppdlLVKijZ2ICvh z)AVHX9fp_W;{6!OLv9T7%-?R%WwMeq^e#(NjhzR0SYUjB@%fX{u>3_+1H0U!A2~QG z?oDOw=r^8$t-Al80ccnIuz;Tm&}9-*)Y78Kbw%xHwZ!4`qF0!8h|GRAHBS(E0*A+)(q59k z%iwMedUt`IJJ!KHmChgGw1Ng;B4g^~N~{(l0Ny2uvMKK^y-;P|MquNwtl7Gjdg0G4 z275J?LS{V+$5Wxo^B~kKQnkkWCw!*ravKaJQMURJtPe4D(z19F0=? ze*&o`Z&CVvk{Q<1O?`41xchQM6;EDvPp;swmLW^<=3(7|LP%?QxJrp=-_RW&R#3C= z|9s{3y)jFrSij!+cy>Zs!B|1;NzfX0Q#;`PhYQ=boPXw7g6(pLIMUS#4I&!uvr?N) z6PSv1BH*WmUuUgWFK~maS`g*!7V1i=&E&Yt(m7VM!ip&`s7imd!bo zI8!a^5r-*-{7tusTWvXkAd*I|3*EULBbIog89CXlFTHKYm<>^BmA5w4%c7N!rbd*E z6*2m?$eYI$@lFtc?n?fMp;(bsdxd>IcZQc^YT)GPEDW-r{LV~Ju;Oh@j5jeOyck)vBZl6N7#sUts-`G zvcHR&RkIA9Ij+j130FvcZ*j{b-rVU0Sgj(fKH!0Z6+u9b1WKb0eNaD3aK6RmPakBY zD?DjHb}swEM=IEIVWA&$;IQE@0S}4^AX)^Ns8oXFQ}i(`lqA`*C}gY{bC(T#R(N!;js@P> zBzG`kE-#W#AtuVn$(s`eRMTQS%JxZ6BI>7*tQ4LVFQb-K_u7j?f16C58Z}+!{x^Z5 z+gRE#@c}`#PP_IToODp zS$j(U+!J0eef7|^e|&H-;r(PyPpd_;)=5Po1HtoTYmJWq<$`a`(*xRQ|4e@ks>9}W zzDPwU;T9ul>5Qqf85ZB3luA;?dd3{|u1DfRQZ69qo1_@!#!u9QwI1`ABK;AKKlLJ|D4JR+a6EE7PlkvcJHOWJjo3_ z6XZvX4>s)#_HR7~xfp-`&GQ*>G(-AE7``{r;I zj~&ZZ`#0oJq>)}kc}da{AxW|QWo*V292I~Q3_IPMc=#YX!}pT@7KWBKS$0LKd#LuI z>H}(;FMJ-)7lT%xCS`W*0T`R^9`T~}b0QnONO@Kw8_gKda_g4%RQSmt2L02M7bR?e z@XH6LSl9s0=Bn|m$-TZSe}jn$f|gsi`;Xt9@2WGMJT!tKi~AQNO{iv*95!U*m^Xwj z=V~}3IO-N7LYpwa{}C(I4k8ah8{|+HVxB3}c>8!VV3G`kHD{p@EJ_vpK0zPf=DLsW zvw!F^RfVy)jhl!oe;2{X5>R2)y2)4L=D*e0th|dUOJHcZ{Up{_r`Ez&l2_o*B1;}S zy{O2-{wRAaaS&_-+c3^ON$j@bE1Dfv(BBU91OJ@l z6CKYF^X%JBZb|y0I(6mo!|E1h-QqdFmAGEMmg-DMio8P}UMGF$M&;$foxNBu? zC4CajMTPaE>MOk#F)1@@c#&fs&=G??YG?gRhmw6#W7vs8lD`hLLxe27JQiXQKe5V~ zTz>$T_xx#IqE;JuapY2gOP9&R%glJk*#eN`j4CV@YJ%uH ze;QVw?jn$Oe>&(|vHq^oUY)kE3z#FJl!eXbGIp8HDzxoCJ88H!cjltnxMu@8ILXL& z8!nyfP<;M~4woz*a?Q6w%@%TgDa+>j!1n+tPrS{ff-S#{jl#E0WgH zqH8D$so1q+Q#%ovXgDZ?g3m43;3nT6g}fJ$OX$Nh-vD4Xjk|k{Q%@dMj++CaqbcB( zrsZ~_RD$aUwC&d6`rp8Oewk>cKHt_v*l~&`Q+mOH7at*f$BXN&Ru;3-8pMX{;$m9J z@g|_aJ9xXs?_R_ixJE*!-PCm9duQGnusK9k<2Z}Zx&&e77wp7k!`tgQKor}RH#N>#2dNp0 z;UlbY=5t=>4`l;c2Ju?>%?Zraj%WCgwIdQIj1O*hco4sdZQjLTx2Le)jTN2B-yH{J z28ePg^j4$7h6ZI+aljp%Ll*N@bcSaW+I6k?njR z08iZ&w*I40A^2+B@jVRfGwwU&a$obb9$vrBN4e|x5P|OH*0Ka}h-`bfvYl_cbezPX z)apXxwDPTQ6EaD~QbD?_0K%J~2wqGgPd3<)f@f2NvzU37&H_1Yx6(&>*+Dh7dad2Z zt2KQRY+CWPZC;l~W7JkleIsP1lLQnt{JaEk=RKG#6ecvf9gq_49<}`aeNtkTuAVb+h)M6k4KblbSuMN<@5t%a z-8Oy;&A(=GVZ}Q}5d6c#*&t;FG^Ev;I;gZTcjQ77K)0i0nT?`!C|Lx`DPWLvH-xlw z9!Y7vZ>;lZmzMbKD*b}N%n9gxXPoBgtqA?){3)sjqE#=m5yIOMfMv(t>~ ze5XsqCs@luLw#cGU5HtaNQ9kn)8OIMbn&I)mETSw*OdX(NtXti(|-FTrgU!c$wMIx zC6>&yuVJ{}DDASydy^7a!y4F<5?kO}(zVC7`2C^h#D#+2 z;nyuqzaQ2j({DIyY85K_4$Ja+Jely(BB!87&yHI7ww~L+{*LC?Gl9KnlAniqQIQNq zYlk{$D~hdon#2CZEBSeER+}%+<1&(;H)Fs-QFTy&%8z}3M(Q=Z#eq+!$UDBCg5qF@11da7p4e_^x=$po%K6Rr!%X< zte^T8D7FjExH$c8>mhd22;S zTxDflHjq8w)-q;vX=~KkiZ(VfHeq*yBb0`n67K8)EGEd| z{Y1CGy4GUZ$4U#KUTfiE1q|rxcb&dLcbPVFTA|?=lanm0)S)p&3 zL3ffIG;fj0Zz#ih2fngYF)oKny*Y&Y)tY)E4Bq=1TF#N9SA@ULb+{%K^6cVZ8U6CW zn}9)p$p8$cIw2y)*Fl%=5wItn!^9NvWYKTZ{~W3pr_uN+(6`bVBEUlSh8s1&cD4A6 zE&jsu%HtO3&V8E|BD-qB3~BbF+d)+{z~Ng4wx)k%bBjiVlDx}+x@6dXqJ-QxaDTfMREcX<9f8%pXLXO)1p z#;{z~+Hv)LJ?6`9qf=|*FS)G`Pdk}6r|lu-=GR}+ZWuo=fhtjH%aGm0GH5i*Qp!9G z-0mi$dqaJ1o)hYCecE+ItV_df1Y=;X&ClJ1sj@+0b7BG4T>!jMeVJMDThf!@94r zX;&rhjGs5eA`j(t?d^=4lz};0V#l*I0sMO^IoGBWbM|7wJJ_4n85Cnc_6G>NbskM6;WbnEoI zlNj-(uTil=se}`jYx_r-ol2(DY|w0QSJ}H)`UIwz6pbBWhrkbJeVKH#`4~#fyS!ja zHi=|Y2$j-E@A(Ex?5jkzleh%dgQG2LJz;I=P*0CKCUE?l2M7)i3;!;R0{YeKVe`__ z-ARL^U-8wF{^*rEm8xY&z>d@p4UP=o8`WZ99AVs2tV0d?$HMOFS_3Rwtd5k>LJ07Ve z8tCeF?!$*Rn;v#xI9uxIf}6(eKIb#h(S-+@FA?u{_Hv36Nr96T;1N%}h#f_>fbwjv zN{+jIp1zr4nI0~Z%=5zC^$Fk3uyucWX9`91i}-_gL(?VCPuDz`-Pdz$dVguB?U0^b z9u&WHoUN2x&s*Y_CS&a)$Pz$W3o03AJ!!`Nu&4`O{gS!3@wLCRubc+?$IQ%Nru>I( z{ZS}^UagmdxG3F>pSWAQrL!JVyH5Xb@v5ut;zT2V^RTGfy zTqvCRo3ziTVwf`#VY_HWDr~nI3J?gSiISw0B{Q89QvTw3Z^3wJo|r>w&s^;kW=~F%W_ouTf4Xz0`|%PU>fSN~P*t3Ba$~pbLx>tRpKV|`ye`%No?7^@Tgj{~Z~g;z zny-;EFW(th3(d}>mVd}FwY4v*`;&ll*-A>r)}}w|TuWd|BjySY)L~dgUaG}YYEmBE zWu|C_;yBb7Ptk=SK+82{Vkc;mM56N@_$@Qri&uC--cjNzEM(djDhtw8(b%Dcq{pwH z4*)7A?@i$#CZ#WfLN9Ghrg)~Xa$=*0-^n^+!Cn)N8`HrXF>icCX9e>yFmC6xp==1e zWXytMtfxuJn_P4vC_VhKrxIraPPCs&_jTYaD!$L_DGG@2ATMb@{;U4+XGiZ&X{x$m zj>Ex3u&nGMGozg#@gQ*ABReFc)&AM}D9A+wv0OtG!y}8+tK^1a;Jb+7iv!g&k9pn2 zp=oa~FK?4RoEWhyRrYs*@ccp6hAXiw@>Dcz^GW&u)s9r!TsJs~Z-MW;&7|+MEPSVz z;XLre`BRTWXGpqo-2ZpXEp-^&nbd(laG3X zuUpa#z_tby$+#1Wub62z<~!wgv>TmRQO9>J)Vmag5msNyQ2_E5+U`unD^EHfusHf% zKlzT4zmXaV7?pryAc&119wr2L=ujtjzZbRIs(`&u1jq_#G|cD^tZ}8^&V7PND+-57 z3peU9IB3}0gLd09TJ)xpN}rw6rfY!D(bYY~@?JC4uU|Nm%;Jk(kvH>}nGpNn`ges! z{8)~LW$+2&^@n3zeeChKV}L68z{sJR&FS~UvUpDqS?>WH?s*FRNG~Sb&*?g8ntFHm4TL-BLaR~&4?bscu=vY~Ya5qK*^2@uo^;h||($C%UCt?Yu%rAVE0pb>Q38d)o ze$vYkhCMXo``xf9^iQg@UxMg!A{cK}?xi!(EF!ov8MzJbJC8y{O6@Sm_fy%yer)a6 zz7nAaq4da{HWU`0$_yD_Y|Or968ou8r%pd9`RMC~O}R1H>_$>~JUL+J}tf z&N#M$p-lXa2ER7gm&D;v)$0QFne(NdKfEdXmZc;7Tf^om14j{(8WttmIDT(vu?)=H z7ak}%8CiN;Y}ciV-b~cvGejUJDH`{jISjeOyXZ+6k47)x(}jhoXFmxZ8miq+`YX$H zeC_z&HWmP zhZXyA=>D|^L|T=4`0)-D-}(m6*Tj;5cg7UC;x=nrG(j!?z2}a~z0d9(M&X)TTaA~B zukc*p7#Nd;k~HUN%U}E!_3tNqf~bH!FaobS710^57DPeg24@L&5>?2gD`1OhhDXMf zHUV91%|YZ-nngjMc|D%MS;Dpe6+4BTZw#NlPFo%%BU1LY33s zo{T*(F!CcfhDkgn8ZMbmMZ6%9uQ+p~LrJ{RQb0pdM7O&MX|Y+TtRsO-z*^7k-nQP^ zwQ(kS%kQ_qMI})pcOCRW)&8O?Mr0zSyo}yVY4n=LWAX=VSJ77k0|R~ahf81DQyF3@M~U*D71Lz#!kPjPoy#yGgHV zJpR@|l&)%`Q(Dq6+}z=Q58;ZmEwEi6`-cmQL$yFGJ93gc&Cgebf2`kcfdwKVh=mcT zvkPN?R-W`b2E2Z`P_*{4k&yw5ASaq88-kPp5X2y`yv?i$eOq82P^3-tpTaJcSi6A4 zv*^WebS9&ler;E?EdQCHcKg-*yak{Bolh@o@w8#a_rCwsHCpP^qo{CiARHe>Iq^da z@RjilSTx(S(s}-yNs(h%8oW`(To@3jFp|pMH4vwgCwt z9BKG)F%5 zu&{{r)Czmn;<|tB${%6OwTrqpM3V_fUubT_q0=*g8JzCSaN7iz9aj^pB2lB1W%}K| zR}QcES)LsqY&!K8x6~=)tdNj}8{`9KYXV z)WQeK&BgyPk%iklB( zgfo!`2of%zwVf|y&8nrzm7?qflu~{$4E^_2k0&~i2^A)rUd=6`|1uH9lyFE%NeH9E ze~;N;n}k~}3H!cbuC$HjKUW63D2cKw+?yUn_aH@9UP;usTf^*zoLx*47=`nhZDE5}8~m6*gW4=xTMy5-%dRWUSw$K~f6`Zm+#4AF_AT?P zQF=dKXW)!*6xV%3s=K1MHGrUuh27f~Zx3M__N5Ni#wEe}oXSLk^YU<{l(P{%FqIE) zt%UrwAHEMU1WORA7gF}UKPsv!@xv6lZSanX7L0(Xg;LOyFR2{&a?pGa08rmOZ1Ruj z)gZ12yP@|ty-ydN`>M6JrXqL}mq##Bpq8+K+|>UKUN3+@=pLEH_$1o$bZPh%qb@gE z-oK36EPal~_Rq$+tNBe62siV_2RF^?1ol5PmpES0EzuPvo zV=@W3v|28}BtqD;@O8s_C!g{_F~M4lxN!OM35_Lz#^zK26orrT>P|%ZSCf@ZQ&Tg= zor3`@UCf|*(OVq1m96t4$I}!hYa=Kdv^x`XHg|l$hZQOf-_7T z>3laZI4a$5SoL-FGx&5Rz{3RitHI<-Yuq{Urh9tYHbGn z>C6{D8KtqGkycL?ghC+y*puAA(PuCVlvY@t{>p0Arn;s2FiOy8BX=P=;fNFMsI;r3G6khf#>i={3O^+pf89Z;Fmevy&%^Q02zg1U|`cV=k?4Ly?ds>5R2n~x|uke(4jZ$KH#k0)~cBP z*F6hFuXm9DWmt_F;L+*r3aA+Ngn=nQ8y!MNm#V_bmviK}^>_Q^XV#n#bKl{zxvawp zjWT*>6baOims>Qq65tAif1%q?$&MWEhXFs8&9!^snPFNu9z@f3O^evj4WByTs>^zq zze+i~Jo^c&4TXA*#{^n(Y2{_y9LP`-_nue(2=bS@h5Yb^BV>@*>u)#ualB*m=!wki%AL}k#+uw9r zE3J*Pkl>1S7D;8D*!BhQ&l5qx`rg27!YY%qHmfFAX7t)Vw@{<|z9u3(tERqSmla$g z8{#axCv5L2{txvOfCbG%^T#j}C{b-TF#@b-2S?5X|50qS4Am!l+XJ!@()SNBWM&%& zvwH7lBjGj3u>3SewBArJjE@5he0zPm);_|$*HEFjZ2~e>K?7aS2IMCw0Tsvdqr&{M z*A?v_4$fV|;g^x3#Z<~L_O(ZPT|T!H!z}1`Fjh~4Z?Dvm2GlWR@>gro zMhO7+9;qUG+Ct&n@^@w2z<4SHnQSkKbOraw7%4p!og{yBNTWa0&lW5|hejDl&KI^y~Zk^tp>s z&Vd~WvozI@w}R61H97{oF3UgJX?3X&1ge&;(mraV{t1A(B!V$oNF0}F4;_l5XDjNB~H zXFEUn0nQhyQw;{J*Ohdqbeyl+CQ$Ai)q&f-hWq5i6f=*vQXI;+mS7c*|@jU_3(=4!rKpIS})_r!=RCCKvms9(k z>mxry);~x=p$Yc~1EJH31$)SV>h!@m*(6I&SHThKt7wAdVv@PR)G79_)G?^^g-rV?K^svR`EDh&t7@26{ZJmrUQ9EyR*?%|FX1TkqFt-*HNqaj+ z)_EDg4<5PT^aX)T>2u2ous&Ly6KwvlT*0mF0ld|}plai|T`@6t?yo?+5I6GqOvg$T zVYIk@?+WoaMBHe)09Txi@DjQ66G=xKh;1gbSlX^OuU68| zZ5s7Ap>=0jJ0|#yF@EwXA)``yZTEH)%i{wD%)4*%Z7RiWg6&D1YF#wayRjPBky*i zQr&s1t=IR}3$B0nf=b>l+H39b^Yd*E^H_)@m4T>Im`&*o)-bU<^_%jCd9M2I!t;Js z9YeX{Oq~6FTk%r8mG`z-rRNcqvZCL`5FagaAv@cn&6_r_-%@yb-Z(e4HX8$fahtQK z2V26DKjXabLw|j%4?*#KTjQJDMEs7gJ?biSjpkk2751M|QE`-y#ipx_$S;@t^9=-} z+Zg{>dv6_;Rrj?E({c29g2m&g+`j+&4Y*s>F|t+oZ7fnbV`g7m(RR*8L%@JtjO2 z#9za)EUP`FmcZUrc7dVEuZEmZ(WYDr7x5Jh&3tjmg(E*nTlt~GklR1dqu7u^WZ#x` zRk~X-F(zi&hcE8I;bG6?C(0#6u~*RfOvwYgC}haSjjpv_Bi?Yszm*|(6kg@@dX0v0 zab?3W{)G$@4?b%?7ZDxFLe{OhuC$4YWvtL26iQ9@l5CbDS)J1Zf#B$W4U}#e!reXa?(LxoJ$~uC`cj+E&O&xUKh+vWjG4R^GKtu?kMvwc%NM_S zY_Twz_l17FqohN_Tolh(F4T|tO=TN@k_3!uz5q8_d^J%Q+au%o#)wjZ_f(*GZ_0E2h!!C!QqrDJ(L!$VX+Kt05hwU+3eKwY} zeWoo=BlyPG=c1Wwlp!hF!f%$3!Y=H$!_!cH@&oO=#CY?!1)iuUNleVtc`6q4D?Q_ZjdwXhP+PX|=@xWyzV8I4SPx@v3OA%^KGB2Z6`cNDJ9<~s$? z%pIG}U%YvjoZE$jolx`c_h3tzeJTUn-!-BrlcwC@U8rbin2qySUC>0}JN&Tjy8(V) z;3fSVb{LxgEAhyuvlvb9QQx3c>j(F4?M^@yWf!%cG~zBBX6MFvI;!yNI98*KmN)4b zP5=)r2qY0iq6BA08M@Kn=&KL)6O^7R6lZ8F>M&;@S_xTGXQjKHwB9|_@(N7Q=_?W=-37e8rG>?AU69ib zch)$ZlT$@FH|J$-pYA|E-ea0+tkTLSzHaNR<5de{>Zp1z=CAe?lN~l}uz0_4zPEI5 zG5~c{pVOASFu72el$}#CyL-SYnaZ$3)Wc=7C3U&uVMClc^olUCsgADfVA$&AzgdETab$=H z@X2GtRGNM68%kS+uQXl_$Jg-NKc2BV00o?7sJz2dSWkncTlfINDLZcxS=ACCCMnI4 zB}VKtm3;t0(l5UQWVWtt>SApyW|kudTU}s~izeUSY&M%x!PBV%EE@mXanoUBi9Iw8 zR-zRnBchv0Dbpk#OPbflSNhCbX3Qnnl$qS=$A_mVKyPqJD*tYy12`)?2S8-kE@?^S zdCM1npnw#GJ&;}~b<1ANn~0rTrrxEc{#|yev%1>M~+!mI9^E2_LWBLNud@y zn!{Coro|bw@z{z&j7^MQi&y`oX<#0|DZ$VugBj&+^ugzgFm&vAM1LW(e^cVmiA|Be9sVIgA}kuh0qTE!z)>lT}OWHIDcHnZr2KH{v`GObgY z9QmxnTxm)_D8dG5Bck?RC(ykNsAYjO$+n@mOQMv#P~OP=|hRKK>RvpXi!+2a$em_ z5UW4jf>Gu+axdo9OGp>sDk77Dw0e@ShZai&UxWS&V=j&XV5538yT?=TI5u7sfIbuC zJ^%mY3kw`E|6z83f-DqWm}2!Q8AmVql)26UXAWg=jM#YfN_wKi8GpAdnRjvn(|p7C1mRgJ}%WAJ!);| z}u01JU3)0Ugb=4M8eZ3##Lk<@TAsQ3&g*r(~OKujcH`%G5 zsa+>dwp^BMP|SY&|L+@)9psEmxL$mnDrmZD9)xvvAtm$O>$P%}Fdp(olt zcEK)$=~&R%5h6xsfYKTMTS!KH@kP$rR*|&E;p;s4z+`+jDM* zne{Jk2{z{(=}e)VXezt7St}Su%5-{&lGz=>=;P?pEi+#-Sv9cVsWN;_aMLTv&Zv)j zlg(ZA9DhvxU?*5urmKYGhSM2biu)R|I57QaIKVZ*1}wbz;pwQ@D8MZ<~Q={ zS-34;Akk0py+6?aem%jOv?5xGm*?2vx;L*g_&c%(g`@YIhme5wa({;m`a>_4&dUA- z$vjv=Gwfl9T}&T38tz&w2*L$|nVANDxQs#w^$>wz20P9W=^OkJtM1`7*Wz|z4;fM) z)R&Dn;(vp@ka#MUSHDpn%=>Pdy?k$37=MdY!?C|m-V<11GJY(} zs!g2g9f#i#Z-X42Yx6IR$M%TvgzNx}SGhR@f9q+6#c_M09}Gx_>U=#2KG zH;c>JTS4p>H*6hvKc7tbC7^h~6wlr4Z!rlmr+N*!EsW(N7O$-l+HSdZONI#<_G3Bp z7udW(Nv@Wg2D#L+UF;giNbNppwBvUFi6GmxeIg9#tVH_@-geAWg*2ER;a1-yf zQ6VGyZTnoQ97}SiLwxl;Km(}4U!d@b_Nh*^YvK8D6Tc*Yf5%lKa@f6evJ`l09!U4Nz zNLOSWiK1VV2*L-**_p&I>-V#qe079;=-z)_Oe}|4cJg*HN4+JXD&gfUsK(fK{Am$c3K8i}S-_W#bXm!IIkr0I z3rz0SH3MQER-CT67SsbTE1AobCU@xyL4sw&SA?KrO@7VmZy!S8I~fzjM!NuQXeJ1f zU;}yT#O)4E-u!RScC|2hQ?tzA#p&Z0{oGo z(OOVTE*ioP%aSbTi9fFizgG5;W|4pMdnk;nwhzv~L%UD#Gt5KYo~-&h2q_-bL;782 zsfAAVcI)Y#kk{L%Yz+4`9deMMjAehS)z;8Ro^d+^84Lh$-sF^%VY-$GYl5`(jE-DF#3FMc-B zT1vo}DCmli^w(&+c5-S@DhXrmHR~f_!jsPy#1+bTLTwf_*Y%&GrstB|uPal$!bK?G z^-G5F%`EO&ZBKZ#NwSrp>WF3MdhB{id^i0Gr#{rjXPJ%ZPWvgd0tZ)PX0)2?=85T) z=D>EM^9kIkxrX1a-?x`vER4E?oKN?DsEk|Se!P%Kj0qyERe+7*(c)aIcL8P_)!??Lrm03_p5-e-WPQs3lxFX{GGxAj8ZGb z(x2nE#+<*f-p^e7-(=1PUaw;{i=Dx`GY5N~OwO$tk%{Ze!AOl6HWmGjE@C>s_Nx7*EFe-l2*9-Le z-%|OnLW3M|lhwcL_p38uA4ccrS*=Sm;d85TlGXihWmWnHP6jDsSWKP_>gu!m8&?~q zvmze68!J10WEh@4ySfRHhhH|}x%nmLIVKNVq+S9)cHhHyrNn+OsiMY^lYMD$PB}{sO7!D%R}Fu1X=toYcUk0hp=?jCyiT?NDk~;(zoQz^zR&YrTrve=t7UuwUDRyRm8aak$=5-gavx{e zR=F@g*1ZY^TZdICzq-wDrZA z1B<)UxSJV(K2@z&%o`1X620D%lW|H-jiIo`Rw1UDgB*axShE%%E)NC`2T$FDt#wXZ zL&XP(t5SD%^WRe)JQsx4zb}kuG=S=Sq4i(#=t!$WYkla9dj+Vj0Qg4|WJ~b4V2>t= ze+86PUy&MS{3m4Q1;ht{AnVIlkH-%_&Qbdlp!bN1j}Lle86Kx7`dnN3Uajyzr#u2l zZxH}}OX%v7K7Q~K=#z_h^BIUx&ItdR*26obPpwn_^A?jiysEYkpHPeQldPUi&TykJ z*-i{+B>W5qJ3Mkg(>%a`U0WkWSu$+ee4Mry853hS@{@>}!LkGOgAr`V^xGw<#};C*(b1^vD+7bw_qw4i=dH0afI3mw zuBF)}x05yrdGfcI#pf5qqhbbZ$W-5S^c{MDud1SBbmnQ7G=7;U&)zf2T6=XMGd(X63lG_nS?yEKSVAvC|{%0d84SICX z;*4F{`%O|?XFxcFWNQa`23mSySn`QyZtWF+$OP{JxXRrhu!z3)jLp44uJy+RLH>L+ z-;2Az*w`C#FS)PJ(jxfvhHmYkLX)cfLz3Qny&Z>+I*b>0z-E$J)8O`kgY`nG=m{M4 z>Th|y=EuCwc!=MBdnjDFGcL6=50uV+$bA}R@rozi@6uWLN}@4W#&5ru39~V0XXj&8 zlcr$2>+jGFg7z@PF=bj zZ1Y^qy0h|WS7H9jm&$ZVX|8R;SXeBcrV40fP-tw;K(1a&$gvDaO1kFK=}s^I<<;U0 z{l|G|+d=HOZN!R3{O*}LBO{wOLI0^>(dRA8bEUv@%uHOB4qNUBv=Nau-ykBSjmWZT zj(E%T`fmrR=WJb}^)-WC@51& zmbIdHc(Vtni2k%KWc}kcbKinr+r>OBlK}vYkV2H<=UD;iyi3{ZqinS9cM^*2I)8T+ zeIKGMKnf)2I=juE7x*o^kjZF0<>Ce89vUCt3dx(dQPGtwyf{@vSC;P>KOFPQa2k3x zdVHE2_^YT{{BkR)>Vi}U`k%dV0!V~OOAHKJ2Q*m0MYKbtM-K}^Itqr*^}1l3$8$*x z-7S!mSZCulo75)qlOn*g3K}f6tdk^l4V8Fak*Rm?4itO z0arE&g+unxXaw6oq|VJnU?4zaY%|n=)Nf{&gAcRmD~Cv`!E3*Enl_@#iq@9`q?T!a zNi9n$z|%N%DTi@3dLnoB+k?Z$$k=V90yOJYbPhW(`we&MgBRnFbY)Y)Mhkbs8`Wng z#qJapuj-DwxfnGS*0eY<{8g4X^h9q-oe#7i1=1<%Ck0%s2a|O3o_h)^U}jyt6PqX! zrl@c*CEor9xzxE-a516L{yDWMy6cu55*IN_O*|I8dtjjJAWCL^vbiiPWL(_kicXPH zqBHmpr|Hv+@w+u7$bnKIxXu}=ERGl;LYSeGKsjK z4$PFYO%u5aIJP~XiXnwIDh``o_=LXk?x)tsJ|Oa@rnu`5p$lk5S5~&&l6U^NjyTA7 zlzkIRtte+;b6$|07!vcBFQi%k(y%iY+~*Yyq{JGW#2Y03b(?Z~C{Fy%!P#{oEmZY= z>qo6N$QkpwmoMWmgF=IOo4yg|VmzaEB~STyc+g%1{U41OY(C>OTZF6!Eoe^Qh|&G& zvXG%2l>baSA3b+<1rby(Z)@wQxTV@#MUw!;SPOXT^MdN>@T#0YPWs*(N@qcA=M~C& z?x=Ipu=5R!lA2K)8R(Tu`o!HFag~=_Qj03QMN3O!hHB>iko|#*5ZNh4>Lnv1tXT70 z2(d6-o?v)`Jwof3hWgf7Bn)2Kw3kDVSp{zaaOF|cJ_XyRnm+P}4>LctO%bv#Z4ID}AYz-AVe*&%K+sH8)mo@%0`r>kUy& z-I^+@W~nikH2d^UK-#&>&VQWlfDS&SRuc9>%VJjCt&_nfKn238=@vB+W}>ZrB^